From c7a589a2c4e2db496d732821a8dba59508326250 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Fri, 24 Dec 2021 08:16:33 -0500 Subject: [PATCH 001/992] [Clang][OpenMP] Add the support for atomic compare in parser This patch adds the support for `atomic compare` in parser. The support in Sema and CodeGen will come soon. For now, it simply eimits an error when it is encountered. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D115561 --- clang/include/clang/AST/OpenMPClause.h | 41 +++++++++++++++++++ clang/include/clang/AST/RecursiveASTVisitor.h | 5 +++ .../clang/Basic/DiagnosticSemaKinds.td | 2 +- clang/include/clang/Sema/Sema.h | 3 ++ clang/lib/AST/OpenMPClause.cpp | 6 +++ clang/lib/AST/StmtProfile.cpp | 2 + clang/lib/Basic/OpenMPKinds.cpp | 2 + clang/lib/CodeGen/CGStmtOpenMP.cpp | 3 ++ clang/lib/Parse/ParseOpenMP.cpp | 1 + clang/lib/Sema/SemaOpenMP.cpp | 39 ++++++++++++++---- clang/lib/Sema/TreeTransform.h | 7 ++++ clang/lib/Serialization/ASTReader.cpp | 5 +++ clang/lib/Serialization/ASTWriter.cpp | 2 + clang/test/OpenMP/atomic_messages.cpp | 29 +++++++++---- clang/tools/libclang/CIndex.cpp | 2 + flang/lib/Semantics/check-omp-structure.cpp | 1 + llvm/include/llvm/Frontend/OpenMP/OMP.td | 2 + 17 files changed, 134 insertions(+), 18 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 565eb0c9cf99..3fd1b6d30080 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -2224,6 +2224,47 @@ class OMPCaptureClause : public OMPClause { } }; +/// This represents 'compare' clause in the '#pragma omp atomic' +/// directive. +/// +/// \code +/// #pragma omp atomic compare +/// \endcode +/// In this example directive '#pragma omp atomic' has 'compare' clause. +class OMPCompareClause final : public OMPClause { +public: + /// Build 'compare' clause. + /// + /// \param StartLoc Starting location of the clause. + /// \param EndLoc Ending location of the clause. + OMPCompareClause(SourceLocation StartLoc, SourceLocation EndLoc) + : OMPClause(llvm::omp::OMPC_compare, StartLoc, EndLoc) {} + + /// Build an empty clause. + OMPCompareClause() + : OMPClause(llvm::omp::OMPC_compare, SourceLocation(), SourceLocation()) { + } + + child_range children() { + return child_range(child_iterator(), child_iterator()); + } + + const_child_range children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_compare; + } +}; + /// This represents 'seq_cst' clause in the '#pragma omp atomic' /// directive. /// diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 8bcee8790e7b..f62dc36de556 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3234,6 +3234,11 @@ bool RecursiveASTVisitor::VisitOMPCaptureClause(OMPCaptureClause *) { return true; } +template +bool RecursiveASTVisitor::VisitOMPCompareClause(OMPCompareClause *) { + return true; +} + template bool RecursiveASTVisitor::VisitOMPSeqCstClause(OMPSeqCstClause *) { return true; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 3b6341d2232d..f2089bfda04d 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10509,7 +10509,7 @@ def err_omp_atomic_capture_not_compound_statement : Error< def note_omp_atomic_capture: Note< "%select{expected assignment expression|expected compound statement|expected exactly two expression statements|expected in right hand side of the first expression}0">; def err_omp_atomic_several_clauses : Error< - "directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause">; + "directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause">; def err_omp_several_mem_order_clauses : Error< "directive '#pragma omp %0' cannot contain more than one %select{'seq_cst', 'relaxed', |}1'acq_rel', 'acquire' or 'release' clause">; def err_omp_atomic_incompatible_mem_order_clause : Error< diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 55171767da10..79834554a50d 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11190,6 +11190,9 @@ class Sema final { /// Called on well-formed 'capture' clause. OMPClause *ActOnOpenMPCaptureClause(SourceLocation StartLoc, SourceLocation EndLoc); + /// Called on well-formed 'compare' clause. + OMPClause *ActOnOpenMPCompareClause(SourceLocation StartLoc, + SourceLocation EndLoc); /// Called on well-formed 'seq_cst' clause. OMPClause *ActOnOpenMPSeqCstClause(SourceLocation StartLoc, SourceLocation EndLoc); diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 56e140f10710..1bd049b88005 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -126,6 +126,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -217,6 +218,7 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C) case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -1792,6 +1794,10 @@ void OMPClausePrinter::VisitOMPCaptureClause(OMPCaptureClause *) { OS << "capture"; } +void OMPClausePrinter::VisitOMPCompareClause(OMPCompareClause *) { + OS << "compare"; +} + void OMPClausePrinter::VisitOMPSeqCstClause(OMPSeqCstClause *) { OS << "seq_cst"; } diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 4339c249e027..09853e0f0e49 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -551,6 +551,8 @@ void OMPClauseProfiler::VisitOMPUpdateClause(const OMPUpdateClause *) {} void OMPClauseProfiler::VisitOMPCaptureClause(const OMPCaptureClause *) {} +void OMPClauseProfiler::VisitOMPCompareClause(const OMPCompareClause *) {} + void OMPClauseProfiler::VisitOMPSeqCstClause(const OMPSeqCstClause *) {} void OMPClauseProfiler::VisitOMPAcqRelClause(const OMPAcqRelClause *) {} diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 9e74e05bd863..1761c6d3d89b 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -163,6 +163,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str, case OMPC_read: case OMPC_write: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -428,6 +429,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, case OMPC_read: case OMPC_write: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index ba0ced7d8f97..564c3a591f16 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -5967,6 +5967,9 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, IsXLHSInRHSPart, Loc); break; + case OMPC_compare: + // Do nothing here as we already emit an error. + break; case OMPC_if: case OMPC_final: case OMPC_num_threads: diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 613ad742c93f..300b022d83b9 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3192,6 +3192,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, case OMPC_read: case OMPC_write: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index a5962ec3b1d9..ba0481874577 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -6354,6 +6354,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -10939,7 +10940,8 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, case OMPC_read: case OMPC_write: case OMPC_update: - case OMPC_capture: { + case OMPC_capture: + case OMPC_compare: { if (AtomicKind != OMPC_unknown) { Diag(C->getBeginLoc(), diag::err_omp_atomic_several_clauses) << SourceRange(C->getBeginLoc(), C->getEndLoc()); @@ -11383,15 +11385,21 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, SourceRange(Body->getBeginLoc(), Body->getBeginLoc()); ErrorFound = NotACompoundStatement; } - if (ErrorFound != NoError) { - Diag(ErrorLoc, diag::err_omp_atomic_capture_not_compound_statement) - << ErrorRange; - Diag(NoteLoc, diag::note_omp_atomic_capture) << ErrorFound << NoteRange; - return StmtError(); - } - if (CurContext->isDependentContext()) - UE = V = E = X = nullptr; } + if (ErrorFound != NoError) { + Diag(ErrorLoc, diag::err_omp_atomic_capture_not_compound_statement) + << ErrorRange; + Diag(NoteLoc, diag::note_omp_atomic_capture) << ErrorFound << NoteRange; + return StmtError(); + } + if (CurContext->isDependentContext()) + UE = V = E = X = nullptr; + } else if (AtomicKind == OMPC_compare) { + // TODO: For now we emit an error here and in emitOMPAtomicExpr we ignore + // code gen. + unsigned DiagID = Diags.getCustomDiagID( + DiagnosticsEngine::Error, "atomic compare is not supported for now"); + Diag(AtomicKindLoc, DiagID); } setFunctionHasBranchProtectedScope(); @@ -13472,6 +13480,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -14303,6 +14312,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -14764,6 +14774,7 @@ OMPClause *Sema::ActOnOpenMPSimpleClause( case OMPC_read: case OMPC_write: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -15069,6 +15080,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause( case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -15257,6 +15269,9 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind, case OMPC_capture: Res = ActOnOpenMPCaptureClause(StartLoc, EndLoc); break; + case OMPC_compare: + Res = ActOnOpenMPCompareClause(StartLoc, EndLoc); + break; case OMPC_seq_cst: Res = ActOnOpenMPSeqCstClause(StartLoc, EndLoc); break; @@ -15403,6 +15418,11 @@ OMPClause *Sema::ActOnOpenMPCaptureClause(SourceLocation StartLoc, return new (Context) OMPCaptureClause(StartLoc, EndLoc); } +OMPClause *Sema::ActOnOpenMPCompareClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (Context) OMPCompareClause(StartLoc, EndLoc); +} + OMPClause *Sema::ActOnOpenMPSeqCstClause(SourceLocation StartLoc, SourceLocation EndLoc) { return new (Context) OMPSeqCstClause(StartLoc, EndLoc); @@ -15871,6 +15891,7 @@ OMPClause *Sema::ActOnOpenMPVarListClause( case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 39b659753dfa..298a3f7a83d8 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -9460,6 +9460,13 @@ TreeTransform::TransformOMPCaptureClause(OMPCaptureClause *C) { return C; } +template +OMPClause * +TreeTransform::TransformOMPCompareClause(OMPCompareClause *C) { + // No need to rebuild this clause, no template-dependent parameters. + return C; +} + template OMPClause * TreeTransform::TransformOMPSeqCstClause(OMPSeqCstClause *C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index d16697b48ca9..f93e0d2ed1c4 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11765,6 +11765,9 @@ OMPClause *OMPClauseReader::readClause() { case llvm::omp::OMPC_capture: C = new (Context) OMPCaptureClause(); break; + case llvm::omp::OMPC_compare: + C = new (Context) OMPCompareClause(); + break; case llvm::omp::OMPC_seq_cst: C = new (Context) OMPSeqCstClause(); break; @@ -12123,6 +12126,8 @@ void OMPClauseReader::VisitOMPUpdateClause(OMPUpdateClause *C) { void OMPClauseReader::VisitOMPCaptureClause(OMPCaptureClause *) {} +void OMPClauseReader::VisitOMPCompareClause(OMPCompareClause *) {} + void OMPClauseReader::VisitOMPSeqCstClause(OMPSeqCstClause *) {} void OMPClauseReader::VisitOMPAcqRelClause(OMPAcqRelClause *) {} diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 3e4153b3b612..65a780e67510 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6252,6 +6252,8 @@ void OMPClauseWriter::VisitOMPUpdateClause(OMPUpdateClause *C) { void OMPClauseWriter::VisitOMPCaptureClause(OMPCaptureClause *) {} +void OMPClauseWriter::VisitOMPCompareClause(OMPCompareClause *) {} + void OMPClauseWriter::VisitOMPSeqCstClause(OMPSeqCstClause *) {} void OMPClauseWriter::VisitOMPAcqRelClause(OMPAcqRelClause *) {} diff --git a/clang/test/OpenMP/atomic_messages.cpp b/clang/test/OpenMP/atomic_messages.cpp index 4a8ca7ba82ce..608847f87dad 100644 --- a/clang/test/OpenMP/atomic_messages.cpp +++ b/clang/test/OpenMP/atomic_messages.cpp @@ -1,8 +1,10 @@ // RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -ferror-limit 150 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50,omp51 -fopenmp -fopenmp-version=51 -ferror-limit 150 %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -ferror-limit 150 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50,omp51 -fopenmp-simd -fopenmp-version=51 -ferror-limit 150 %s -Wuninitialized int foo() { L1: @@ -896,19 +898,19 @@ int relaxed() { template T mixed() { T a, b = T(); -// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 2 {{'read' clause used here}} #pragma omp atomic read write a = b; -// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 2 {{'write' clause used here}} #pragma omp atomic write read a = b; -// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 2 {{'update' clause used here}} #pragma omp atomic update read a += b; -// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 2 {{'capture' clause used here}} #pragma omp atomic capture read a = ++b; @@ -917,19 +919,19 @@ T mixed() { int mixed() { int a, b = 0; -// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 {{'read' clause used here}} #pragma omp atomic read write a = b; -// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 {{'write' clause used here}} #pragma omp atomic write read a = b; -// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 {{'write' clause used here}} #pragma omp atomic write update a = b; -// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 {{'write' clause used here}} #pragma omp atomic write capture a = b; @@ -937,3 +939,14 @@ int mixed() { return mixed(); } +#if _OPENMP >= 202011 +int compare() { + int a, b, c; +// omp51-error@+1 {{atomic compare is not supported for now}} +#pragma omp atomic compare + { + if (a == b) + a = c; + } +} +#endif diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index c59c5709b6fa..4722bece7a1d 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2275,6 +2275,8 @@ void OMPClauseEnqueue::VisitOMPUpdateClause(const OMPUpdateClause *) {} void OMPClauseEnqueue::VisitOMPCaptureClause(const OMPCaptureClause *) {} +void OMPClauseEnqueue::VisitOMPCompareClause(const OMPCompareClause *) {} + void OMPClauseEnqueue::VisitOMPSeqCstClause(const OMPSeqCstClause *) {} void OMPClauseEnqueue::VisitOMPAcqRelClause(const OMPAcqRelClause *) {} diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 2698cafca8eb..16efa1edf8f9 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -1482,6 +1482,7 @@ CHECK_SIMPLE_CLAUSE(AppendArgs, OMPC_append_args) CHECK_SIMPLE_CLAUSE(MemoryOrder, OMPC_memory_order) CHECK_SIMPLE_CLAUSE(Bind, OMPC_bind) CHECK_SIMPLE_CLAUSE(Align, OMPC_align) +CHECK_SIMPLE_CLAUSE(Compare, OMPC_compare) CHECK_REQ_SCALAR_INT_CLAUSE(Grainsize, OMPC_grainsize) CHECK_REQ_SCALAR_INT_CLAUSE(NumTasks, OMPC_num_tasks) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index eab8d03f7316..18d577dff497 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -180,6 +180,7 @@ def OMPC_Read : Clause<"read"> { let clangClass = "OMPReadClause"; } def OMPC_Write : Clause<"write"> { let clangClass = "OMPWriteClause"; } def OMPC_Update : Clause<"update"> { let clangClass = "OMPUpdateClause"; } def OMPC_Capture : Clause<"capture"> { let clangClass = "OMPCaptureClause"; } +def OMPC_Compare : Clause<"compare"> { let clangClass = "OMPCompareClause"; } def OMPC_SeqCst : Clause<"seq_cst"> { let clangClass = "OMPSeqCstClause"; } def OMPC_AcqRel : Clause<"acq_rel"> { let clangClass = "OMPAcqRelClause"; } def OMPC_Acquire : Clause<"acquire"> { let clangClass = "OMPAcquireClause"; } @@ -536,6 +537,7 @@ def OMP_Atomic : Directive<"atomic"> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause ]; let allowedOnceClauses = [ VersionedClause, From 159da567378ab5a4bf9b62162d16caccf3db16f9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 23 Dec 2021 18:53:06 +0000 Subject: [PATCH 002/992] [X86] Enable v32i16 ISD::ROTL/ROTR lowering on AVX512BW targets --- llvm/lib/Target/X86/X86ISelLowering.cpp | 25 +++---- llvm/test/CodeGen/X86/vector-fshl-rot-512.ll | 52 +++++++-------- llvm/test/CodeGen/X86/vector-fshr-rot-512.ll | 68 +++++++++----------- llvm/test/CodeGen/X86/vector-rotate-512.ll | 28 ++++---- 4 files changed, 76 insertions(+), 97 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 34ea8901fb3a..6f6361b6757b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1654,6 +1654,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); // The condition codes aren't legal in SSE/AVX and under AVX512 we use @@ -1668,21 +1670,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UMIN, VT, Legal); setOperationAction(ISD::ABS, VT, Legal); setOperationAction(ISD::CTPOP, VT, Custom); - setOperationAction(ISD::ROTL, VT, Custom); - setOperationAction(ISD::ROTR, VT, Custom); setOperationAction(ISD::STRICT_FSETCC, VT, Custom); setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); } - // With BWI, expanding (and promoting the shifts) is the better. - if (!Subtarget.useBWIRegs()) { - setOperationAction(ISD::ROTL, MVT::v32i16, Custom); - setOperationAction(ISD::ROTR, MVT::v32i16, Custom); - } - - setOperationAction(ISD::ROTL, MVT::v64i8, Custom); - setOperationAction(ISD::ROTR, MVT::v64i8, Custom); - for (auto VT : { MVT::v64i8, MVT::v32i16 }) { setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom); setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom); @@ -29894,12 +29885,12 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, if (VT.is512BitVector() && !Subtarget.useBWIRegs()) return splitVectorIntBinary(Op, DAG); - assert((VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 || - ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) && - Subtarget.hasAVX2()) || - (VT == MVT::v32i16 && !Subtarget.useBWIRegs()) || - (VT == MVT::v64i8 && Subtarget.useBWIRegs())) && - "Only vXi32/vXi16/vXi8 vector rotates supported"); + assert( + (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 || + ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) && + Subtarget.hasAVX2()) || + ((VT == MVT::v32i16 || VT == MVT::v64i8) && Subtarget.useBWIRegs())) && + "Only vXi32/vXi16/vXi8 vector rotates supported"); MVT ExtSVT = MVT::getIntegerVT(2 * EltSizeInBits); MVT ExtVT = MVT::getVectorVT(ExtSVT, NumElts / 2); diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll index df579191c87f..0e8cceb4db3f 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll @@ -90,26 +90,22 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; ; AVX512BW-LABEL: var_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3 -; AVX512BW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3 -; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512BW-NEXT: vpsubw %zmm1, %zmm4, %zmm1 -; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %zmm1, %zmm3, %zmm1 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm3 -; AVX512VLBW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3 -; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm4, %zmm1 -; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 +; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm3, %zmm1 ; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: var_funnnel_v32i16: @@ -334,30 +330,26 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw ; ; AVX512BW-LABEL: splatvar_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm3 -; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512BW-NEXT: vpsubw %xmm1, %xmm4, %xmm1 -; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] -; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm3 -; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm4, %xmm1 -; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll index bd458426f114..d7ace82e7f08 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -90,26 +90,22 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; ; AVX512BW-LABEL: var_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3 -; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3 -; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512BW-NEXT: vpsubw %zmm1, %zmm4, %zmm1 -; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %zmm1, %zmm3, %zmm1 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm3 -; AVX512VLBW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3 -; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm4, %zmm1 -; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 +; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm3, %zmm1 ; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: var_funnnel_v32i16: @@ -336,30 +332,26 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw ; ; AVX512BW-LABEL: splatvar_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3 -; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512BW-NEXT: vpsubw %xmm1, %xmm4, %xmm1 -; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm2, %zmm0, %zmm2 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] -; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3 -; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm4, %xmm1 -; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm2, %zmm0, %zmm2 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16: @@ -536,15 +528,15 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind { ; ; AVX512BW-LABEL: constant_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 -; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 +; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: constant_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 -; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 +; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; @@ -706,15 +698,15 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind { ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsllw $9, %zmm0, %zmm1 -; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm1 +; AVX512BW-NEXT: vpsllw $9, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsllw $9, %zmm0, %zmm1 -; AVX512VLBW-NEXT: vpsrlw $7, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpsrlw $7, %zmm0, %zmm1 +; AVX512VLBW-NEXT: vpsllw $9, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll index a54988c0870d..4427d3b2c79f 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-512.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll @@ -91,20 +91,22 @@ define <32 x i16> @var_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; ; AVX512BW-LABEL: var_rotate_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512BW-NEXT: vpsubw %zmm1, %zmm2, %zmm2 -; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm1 -; AVX512BW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %zmm1, %zmm3, %zmm1 +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: var_rotate_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm2, %zmm2 -; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm1 -; AVX512VLBW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 +; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm3, %zmm1 +; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: var_rotate_v32i16: @@ -341,22 +343,24 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind ; ; AVX512BW-LABEL: splatvar_rotate_v32i16: ; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_rotate_v32i16: ; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 ; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq From 286237962ad35bc4756947c32310838ad4c76451 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 16 Dec 2021 13:47:57 -0500 Subject: [PATCH 003/992] InstCombine: Gracefully handle more allocas in the wrong address space Officially this is currently required to always use the datalayout's alloca address space. This may change in the future, and it's cleaner to propagate the existing alloca's addrspace anyway. This is a triple fix. Initially the change in simplifyAllocaArraySize would drop the address space, but produce output. Fixing this hit an assertion in the cast combine. This patch also makes the changes to handle this situation from a33e12801279a947c74fdee2655b24480941fb39 dead, so eliminate it. InstCombine should not take it upon itself to introduce addrspacecasts, and preserve the original address space instead. --- llvm/include/llvm/IR/Instructions.h | 5 ++++ .../InstCombine/InstCombineCasts.cpp | 2 +- .../InstCombineLoadStoreAlloca.cpp | 17 ++++-------- .../InstCombine/alloca-in-non-alloca-as.ll | 27 +++++++++++++++---- 4 files changed, 33 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 046e9b5e809e..ccf17628e265 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -105,6 +105,11 @@ class AllocaInst : public UnaryInstruction { return cast(Instruction::getType()); } + /// Return the address space for the allocation. + unsigned getAddressSpace() const { + return getType()->getAddressSpace(); + } + /// Get allocation size in bits. Returns None if size can't be determined, /// e.g. in case of a VLA. Optional getAllocationSizeInBits(const DataLayout &DL) const; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 18eb245779bf..8df4a4529f47 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -157,7 +157,7 @@ Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI, Amt = Builder.CreateAdd(Amt, Off); } - AllocaInst *New = Builder.CreateAlloca(CastElTy, Amt); + AllocaInst *New = Builder.CreateAlloca(CastElTy, AI.getAddressSpace(), Amt); New->setAlignment(AI.getAlign()); New->takeName(&AI); New->setUsedWithInAlloca(AI.isUsedWithInAlloca()); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 286a947fc603..0dbfdba353c4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -183,7 +183,8 @@ static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC, if (const ConstantInt *C = dyn_cast(AI.getArraySize())) { if (C->getValue().getActiveBits() <= 64) { Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName()); + AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(), + nullptr, AI.getName()); New->setAlignment(AI.getAlign()); // Scan to the end of the allocation instructions, to skip over a block of @@ -199,21 +200,13 @@ static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC, Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType()); Value *NullIdx = Constant::getNullValue(IdxTy); Value *Idx[2] = {NullIdx, NullIdx}; - Instruction *NewI = GetElementPtrInst::CreateInBounds( + Instruction *GEP = GetElementPtrInst::CreateInBounds( NewTy, New, Idx, New->getName() + ".sub"); - IC.InsertNewInstBefore(NewI, *It); - - // Gracefully handle allocas in other address spaces. - if (AI.getType()->getPointerAddressSpace() != - NewI->getType()->getPointerAddressSpace()) { - NewI = - CastInst::CreatePointerBitCastOrAddrSpaceCast(NewI, AI.getType()); - IC.InsertNewInstBefore(NewI, *It); - } + IC.InsertNewInstBefore(GEP, *It); // Now make everything use the getelementptr instead of the original // allocation. - return IC.replaceInstUsesWith(AI, NewI); + return IC.replaceInstUsesWith(AI, GEP); } } diff --git a/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll b/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll index c1c45b8e5ab3..c8987e11a723 100644 --- a/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll +++ b/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll @@ -11,11 +11,9 @@ declare void @use(i8*, i32**) define weak amdgpu_kernel void @__omp_offloading_802_ea0109_main_l8(i32* %a) { ; CHECK-LABEL: @__omp_offloading_802_ea0109_main_l8( ; CHECK-NEXT: .master: -; CHECK-NEXT: [[TMP0:%.*]] = alloca i32*, align 1, addrspace(5) -; CHECK-NEXT: [[DOTSUB:%.*]] = bitcast i32* addrspace(5)* [[TMP0]] to i8 addrspace(5)* -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i8 addrspace(5)* [[DOTSUB]] to i8* -; CHECK-NEXT: [[A_ON_STACK:%.*]] = addrspacecast i32* addrspace(5)* [[TMP0]] to i32** -; CHECK-NEXT: call void @use(i8* [[TMP1]], i32** [[A_ON_STACK]]) +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32*, align 1 +; CHECK-NEXT: [[DOTSUB:%.*]] = bitcast i32** [[TMP0]] to i8* +; CHECK-NEXT: call void @use(i8* [[DOTSUB]], i32** [[TMP0]]) ; CHECK-NEXT: ret void ; .master: @@ -25,3 +23,22 @@ define weak amdgpu_kernel void @__omp_offloading_802_ea0109_main_l8(i32* %a) { call void @use(i8* %0, i32** %a_on_stack) ret void } + +%struct.widget = type { [8 x i8] } + +define void @spam(i64* %arg1) { +; CHECK-LABEL: @spam( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[ALLOCA1:%.*]] = alloca [0 x [30 x %struct.widget]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [0 x [30 x %struct.widget]], [0 x [30 x %struct.widget]]* [[ALLOCA1]], i64 0, i64 0, i64 0 +; CHECK-NEXT: call void @zot(%struct.widget* [[GEP]]) +; CHECK-NEXT: ret void +; +bb: + %alloca = alloca [30 x %struct.widget], i32 0, align 16 + %gep = getelementptr inbounds [30 x %struct.widget], [30 x %struct.widget]* %alloca, i64 0, i64 0 + call void @zot(%struct.widget* %gep) + ret void +} + +declare hidden void @zot(%struct.widget*) From 8020458c5dc2be841c07d26ff75b5471314e6631 Mon Sep 17 00:00:00 2001 From: alex-t Date: Fri, 24 Dec 2021 01:01:07 +0300 Subject: [PATCH 004/992] [AMDGPU] Changing S_AND_B32 to V_AND_B32_e64 in the divergent 'trunc' to i1 pattern In 'trunc' i16/32/64 to i1 pattern the 'and $src, 1' node supply operand to 'setcc'. The latter is selected to S_CMP_EQ/V_CMP_EQ dependent on the divergence. In case the 'and' is scalar and 'setcc' is divergent, we need VGPR to SGPR copy to adjust input operand for V_CMP_EQ. This patch changes the S_AND_B32 to V_AND_B32_e64 in the 'trunc to i1' divergent patterns. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D116241 --- llvm/lib/Target/AMDGPU/SIInstructions.td | 12 ++-- llvm/lib/Target/AMDGPU/VOPInstructions.td | 12 ++++ .../AMDGPU/divergence-driven-trunc-to-i1.ll | 59 +++++++++++++++++++ 3 files changed, 77 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 95744b6390c5..636337ede000 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2212,18 +2212,18 @@ def : GCNPat < >; def : GCNPat < - (i1 (trunc i32:$a)), - (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1)) + (i1 (DivergentUnaryFrag i32:$a)), + (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1)) >; def : GCNPat < - (i1 (trunc i16:$a)), - (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1)) + (i1 (DivergentUnaryFrag i16:$a)), + (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1)) >; def : GCNPat < - (i1 (trunc i64:$a)), - (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), + (i1 (DivergentUnaryFrag i64:$a)), + (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), (i32 (EXTRACT_SUBREG $a, sub0))), (i32 1)) >; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index a3eccf13cd71..a8368892c565 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -794,6 +794,18 @@ class VOPPatGen { list ret = [!con(Outs, (set Ins))]; } +class DivergentUnaryFrag : PatFrag < + (ops node:$src0), + (Op $src0), + [{ return N->isDivergent(); }]> { + // This check is unnecessary as it's captured by the result register + // bank constraint. + // + // FIXME: Should add a way for the emitter to recognize this is a + // trivially true predicate to eliminate the check. + let GISelPredicateCode = [{return true;}]; +} + class VOPPatOrNull { list ret = !if(!ne(P.NeedPatGen,PatGenMode.NoPattern), VOPPatGen.ret, []); } diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll new file mode 100644 index 000000000000..4429ee6f3ba6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll @@ -0,0 +1,59 @@ +; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: name: uniform_trunc_i16_to_i1 +; GCN: S_AND_B32 1 +; GCN: S_CMP_EQ_U32 +define amdgpu_kernel void @uniform_trunc_i16_to_i1(i1 addrspace(1)* %out, i16 %x, i1 %z) { + %setcc = icmp slt i16 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + store i1 %select, i1 addrspace(1)* %out + ret void +} + +; GCN-LABEL: name: divergent_trunc_i16_to_i1 +; GCN: V_AND_B32_e64 1 +; GCN: V_CMP_EQ_U32_e64 +define i1 @divergent_trunc_i16_to_i1(i1 addrspace(1)* %out, i16 %x, i1 %z) { + %setcc = icmp slt i16 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + ret i1 %select +} + +; GCN-LABEL: name: uniform_trunc_i32_to_i1 +; GCN: S_AND_B32 1 +; GCN: S_CMP_EQ_U32 +define amdgpu_kernel void @uniform_trunc_i32_to_i1(i1 addrspace(1)* %out, i32 %x, i1 %z) { + %setcc = icmp slt i32 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + store i1 %select, i1 addrspace(1)* %out + ret void +} + +; GCN-LABEL: name: divergent_trunc_i32_to_i1 +; GCN: V_AND_B32_e64 1 +; GCN: V_CMP_EQ_U32_e64 +define i1 @divergent_trunc_i32_to_i1(i1 addrspace(1)* %out, i32 %x, i1 %z) { + %setcc = icmp slt i32 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + ret i1 %select +} + +; GCN-LABEL: name: uniform_trunc_i64_to_i1 +; GCN: S_AND_B32 1 +; GCN: S_CMP_EQ_U32 +define amdgpu_kernel void @uniform_trunc_i64_to_i1(i1 addrspace(1)* %out, i64 %x, i1 %z) { + %setcc = icmp slt i64 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + store i1 %select, i1 addrspace(1)* %out + ret void +} + +; GCN-LABEL: name: divergent_trunc_i64_to_i1 +; GCN: V_AND_B32_e64 1 +; GCN: V_CMP_EQ_U32_e64 +define i1 @divergent_trunc_i64_to_i1(i1 addrspace(1)* %out, i64 %x, i1 %z) { + %setcc = icmp slt i64 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + ret i1 %select +} + From d5dc3964a7417a34fe581d93ff9642923f8a634d Mon Sep 17 00:00:00 2001 From: Alexey Zhikhartsev Date: Wed, 15 Dec 2021 12:14:13 -0500 Subject: [PATCH 005/992] [DFAJumpThreading] Determinator BB should precede switch-defining BB Otherwise, it is possible that the state defined in the determinator block defines the state for the next iteration of the loop, rather than for the current one. Fixes llvm-test-suite's SingleSource/Regression/C/gcc-c-torture/execute/pr80421.c Differential Revision: https://reviews.llvm.org/D115832 --- .../Transforms/Scalar/DFAJumpThreading.cpp | 58 ++++++++++++++++++- .../dfa-jump-threading-analysis.ll | 14 +++-- 2 files changed, 67 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index 8c4523206070..dda1a2f08076 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -588,7 +588,7 @@ struct AllSwitchPaths { PrevBB = BB; } - if (TPath.isExitValueSet()) + if (TPath.isExitValueSet() && isSupported(TPath)) TPaths.push_back(TPath); } } @@ -683,6 +683,62 @@ struct AllSwitchPaths { return Res; } + /// The determinator BB should precede the switch-defining BB. + /// + /// Otherwise, it is possible that the state defined in the determinator block + /// defines the state for the next iteration of the loop, rather than for the + /// current one. + /// + /// Currently supported paths: + /// \code + /// < switch bb1 determ def > [ 42, determ ] + /// < switch_and_def bb1 determ > [ 42, determ ] + /// < switch_and_def_and_determ bb1 > [ 42, switch_and_def_and_determ ] + /// \endcode + /// + /// Unsupported paths: + /// \code + /// < switch bb1 def determ > [ 43, determ ] + /// < switch_and_determ bb1 def > [ 43, switch_and_determ ] + /// \endcode + bool isSupported(const ThreadingPath &TPath) { + Instruction *SwitchCondI = dyn_cast(Switch->getCondition()); + assert(SwitchCondI); + if (!SwitchCondI) + return false; + + const BasicBlock *SwitchCondDefBB = SwitchCondI->getParent(); + const BasicBlock *SwitchCondUseBB = Switch->getParent(); + const BasicBlock *DeterminatorBB = TPath.getDeterminatorBB(); + + assert( + SwitchCondUseBB == TPath.getPath().front() && + "The first BB in a threading path should have the switch instruction"); + if (SwitchCondUseBB != TPath.getPath().front()) + return false; + + // Make DeterminatorBB the first element in Path. + PathType Path = TPath.getPath(); + auto ItDet = std::find(Path.begin(), Path.end(), DeterminatorBB); + std::rotate(Path.begin(), ItDet, Path.end()); + + bool IsDetBBSeen = false; + bool IsDefBBSeen = false; + bool IsUseBBSeen = false; + for (BasicBlock *BB : Path) { + if (BB == DeterminatorBB) + IsDetBBSeen = true; + if (BB == SwitchCondDefBB) + IsDefBBSeen = true; + if (BB == SwitchCondUseBB) + IsUseBBSeen = true; + if (IsDetBBSeen && IsUseBBSeen && !IsDefBBSeen) + return false; + } + + return true; + } + SwitchInst *Switch; BasicBlock *SwitchBlock; OptimizationRemarkEmitter *ORE; diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll index 7dfa1fcc3a77..ccc9b38759c4 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll @@ -109,10 +109,16 @@ exit: declare void @baz() -; Verify that having the switch block as a determinator is handled correctly. -define i32 @main() { -; CHECK: < bb43 bb59 bb3 bb31 bb41 > [ 77, bb43 ] -; CHECK-NEXT: < bb43 bb49 bb59 bb3 bb31 bb41 > [ 77, bb43 ] +; Do not jump-thread those paths where the determinator basic block does not +; precede the basic block that defines the switch condition. +; +; Otherwise, it is possible that the state defined in the determinator block +; defines the state for the next iteration of the loop, rather than for the +; current one. +define i32 @wrong_bb_order() { +; CHECK-LABEL: DFA Jump threading: wrong_bb_order +; CHECK-NOT: < bb43 bb59 bb3 bb31 bb41 > [ 77, bb43 ] +; CHECK-NOT: < bb43 bb49 bb59 bb3 bb31 bb41 > [ 77, bb43 ] bb: %i = alloca [420 x i8], align 1 %i2 = getelementptr inbounds [420 x i8], [420 x i8]* %i, i64 0, i64 390 From dc8f9fb196dab8ca31361928bd6a361dc80d8ade Mon Sep 17 00:00:00 2001 From: Anastasia Stulova Date: Fri, 24 Dec 2021 16:21:34 +0000 Subject: [PATCH 006/992] [Docs] Minor fix in clang user manual --- clang/docs/UsersManual.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 3f9947afc29b..26da5a0ff255 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -3537,7 +3537,7 @@ should be built or installed. Please refer to `the following instructions `_ for more details. Clang will expects the ``llvm-spirv`` executable to be present in the ``PATH`` environment variable. Clang uses ``llvm-spirv`` -with `the conformant assembly syntax package +with `the widely adopted assembly syntax package `_. `The versioning From 6c0eaefaf832745f509841afe4dd8a698671b86e Mon Sep 17 00:00:00 2001 From: Groverkss Date: Sat, 25 Dec 2021 00:11:35 +0530 Subject: [PATCH 007/992] [MLIR][FlatAffineConstraints][NFC] Move some static functions to be available to Presburger/ This patch moves some static functions from AffineStructures.cpp to Presburger/Utils.cpp and some to be private members of FlatAffineConstraints (which will later be moved to IntegerPolyhedron) to allow for a smoother transition for moving FlatAffineConstraints math functionality to Presburger/IntegerPolyhedron. This patch is part of a series of patches for moving math functionality to Presburger directory. Reviewed By: arjunp, bondhugula Differential Revision: https://reviews.llvm.org/D115869 --- mlir/include/mlir/Analysis/AffineStructures.h | 10 + mlir/include/mlir/Analysis/Presburger/Utils.h | 40 ++++ mlir/lib/Analysis/AffineStructures.cpp | 177 ++---------------- mlir/lib/Analysis/Presburger/CMakeLists.txt | 1 + mlir/lib/Analysis/Presburger/Utils.cpp | 154 +++++++++++++++ 5 files changed, 223 insertions(+), 159 deletions(-) create mode 100644 mlir/include/mlir/Analysis/Presburger/Utils.h create mode 100644 mlir/lib/Analysis/Presburger/Utils.cpp diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h index 9e7ffb125f7e..089d8a9f1eeb 100644 --- a/mlir/include/mlir/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Analysis/AffineStructures.h @@ -419,6 +419,16 @@ class FlatAffineConstraints : public IntegerPolyhedron { /// Normalized each constraints by the GCD of its coefficients. void normalizeConstraintsByGCD(); + /// Searches for a constraint with a non-zero coefficient at `colIdx` in + /// equality (isEq=true) or inequality (isEq=false) constraints. + /// Returns true and sets row found in search in `rowIdx`, false otherwise. + bool findConstraintWithNonZeroAt(unsigned colIdx, bool isEq, + unsigned *rowIdx) const; + + /// Returns true if the pos^th column is all zero for both inequalities and + /// equalities. + bool isColZero(unsigned pos) const; + /// A parameter that controls detection of an unrealistic number of /// constraints. If the number of constraints is this many times the number of /// variables, we consider such a system out of line with the intended use diff --git a/mlir/include/mlir/Analysis/Presburger/Utils.h b/mlir/include/mlir/Analysis/Presburger/Utils.h new file mode 100644 index 000000000000..6a72471d9697 --- /dev/null +++ b/mlir/include/mlir/Analysis/Presburger/Utils.h @@ -0,0 +1,40 @@ +//===- Utils.h - General utilities for Presburger library ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utility functions required by the Presburger Library. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_ANALYSIS_PRESBURGER_UTILS_H +#define MLIR_ANALYSIS_PRESBURGER_UTILS_H + +#include "mlir/Support/LLVM.h" + +namespace mlir { + +class FlatAffineConstraints; + +namespace presburger_utils { + +/// Check if the pos^th identifier can be expressed as a floordiv of an affine +/// function of other identifiers (where the divisor is a positive constant). +/// `foundRepr` contains a boolean for each identifier indicating if the +/// explicit representation for that identifier has already been computed. +/// Returns the upper and lower bound inequalities using which the floordiv +/// can be computed. If the representation could be computed, `dividend` and +/// `denominator` are set. If the representation could not be computed, +/// `llvm::None` is returned. +Optional> +computeSingleVarRepr(const FlatAffineConstraints &cst, ArrayRef foundRepr, + unsigned pos, SmallVector ÷nd, + unsigned &divisor); + +} // namespace presburger_utils +} // namespace mlir + +#endif // MLIR_ANALYSIS_PRESBURGER_UTILS_H diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index f4d857479cde..8fee0c8f4f3e 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -13,6 +13,7 @@ #include "mlir/Analysis/AffineStructures.h" #include "mlir/Analysis/LinearTransform.h" #include "mlir/Analysis/Presburger/Simplex.h" +#include "mlir/Analysis/Presburger/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" @@ -700,14 +701,13 @@ void FlatAffineValueConstraints::addAffineIfOpDomain(AffineIfOp ifOp) { // Searches for a constraint with a non-zero coefficient at `colIdx` in // equality (isEq=true) or inequality (isEq=false) constraints. // Returns true and sets row found in search in `rowIdx`, false otherwise. -static bool findConstraintWithNonZeroAt(const FlatAffineConstraints &cst, - unsigned colIdx, bool isEq, - unsigned *rowIdx) { - assert(colIdx < cst.getNumCols() && "position out of bounds"); +bool FlatAffineConstraints::findConstraintWithNonZeroAt( + unsigned colIdx, bool isEq, unsigned *rowIdx) const { + assert(colIdx < getNumCols() && "position out of bounds"); auto at = [&](unsigned rowIdx) -> int64_t { - return isEq ? cst.atEq(rowIdx, colIdx) : cst.atIneq(rowIdx, colIdx); + return isEq ? atEq(rowIdx, colIdx) : atIneq(rowIdx, colIdx); }; - unsigned e = isEq ? cst.getNumEqualities() : cst.getNumInequalities(); + unsigned e = isEq ? getNumEqualities() : getNumInequalities(); for (*rowIdx = 0; *rowIdx < e; ++(*rowIdx)) { if (at(*rowIdx) != 0) { return true; @@ -1203,145 +1203,6 @@ bool FlatAffineConstraints::containsPoint(ArrayRef point) const { return true; } -/// Check if the pos^th identifier can be represented as a division using upper -/// bound inequality at position `ubIneq` and lower bound inequality at position -/// `lbIneq`. -/// -/// Let `id` be the pos^th identifier, then `id` is equivalent to -/// `expr floordiv divisor` if there are constraints of the form: -/// 0 <= expr - divisor * id <= divisor - 1 -/// Rearranging, we have: -/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' -/// -divisor * id + expr >= 0 <-- Upper bound for 'id' -/// -/// For example: -/// 32*k >= 16*i + j - 31 <-- Lower bound for 'k' -/// 32*k <= 16*i + j <-- Upper bound for 'k' -/// expr = 16*i + j, divisor = 32 -/// k = ( 16*i + j ) floordiv 32 -/// -/// 4q >= i + j - 2 <-- Lower bound for 'q' -/// 4q <= i + j + 1 <-- Upper bound for 'q' -/// expr = i + j + 1, divisor = 4 -/// q = (i + j + 1) floordiv 4 -// -/// This function also supports detecting divisions from bounds that are -/// strictly tighter than the division bounds described above, since tighter -/// bounds imply the division bounds. For example: -/// 4q - i - j + 2 >= 0 <-- Lower bound for 'q' -/// -4q + i + j >= 0 <-- Tight upper bound for 'q' -/// -/// To extract floor divisions with tighter bounds, we assume that that the -/// constraints are of the form: -/// c <= expr - divisior * id <= divisor - 1, where 0 <= c <= divisor - 1 -/// Rearranging, we have: -/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' -/// -divisor * id + expr - c >= 0 <-- Upper bound for 'id' -/// -/// If successful, `expr` is set to dividend of the division and `divisor` is -/// set to the denominator of the division. -static LogicalResult getDivRepr(const FlatAffineConstraints &cst, unsigned pos, - unsigned ubIneq, unsigned lbIneq, - SmallVector &expr, - unsigned &divisor) { - - assert(pos <= cst.getNumIds() && "Invalid identifier position"); - assert(ubIneq <= cst.getNumInequalities() && - "Invalid upper bound inequality position"); - assert(lbIneq <= cst.getNumInequalities() && - "Invalid upper bound inequality position"); - - // Extract divisor from the lower bound. - divisor = cst.atIneq(lbIneq, pos); - - // First, check if the constraints are opposite of each other except the - // constant term. - unsigned i = 0, e = 0; - for (i = 0, e = cst.getNumIds(); i < e; ++i) - if (cst.atIneq(ubIneq, i) != -cst.atIneq(lbIneq, i)) - break; - - if (i < e) - return failure(); - - // Then, check if the constant term is of the proper form. - // Due to the form of the upper/lower bound inequalities, the sum of their - // constants is `divisor - 1 - c`. From this, we can extract c: - int64_t constantSum = cst.atIneq(lbIneq, cst.getNumCols() - 1) + - cst.atIneq(ubIneq, cst.getNumCols() - 1); - int64_t c = divisor - 1 - constantSum; - - // Check if `c` satisfies the condition `0 <= c <= divisor - 1`. This also - // implictly checks that `divisor` is positive. - if (!(c >= 0 && c <= divisor - 1)) - return failure(); - - // The inequality pair can be used to extract the division. - // Set `expr` to the dividend of the division except the constant term, which - // is set below. - expr.resize(cst.getNumCols(), 0); - for (i = 0, e = cst.getNumIds(); i < e; ++i) - if (i != pos) - expr[i] = cst.atIneq(ubIneq, i); - - // From the upper bound inequality's form, its constant term is equal to the - // constant term of `expr`, minus `c`. From this, - // constant term of `expr` = constant term of upper bound + `c`. - expr.back() = cst.atIneq(ubIneq, cst.getNumCols() - 1) + c; - - return success(); -} - -/// Check if the pos^th identifier can be expressed as a floordiv of an affine -/// function of other identifiers (where the divisor is a positive constant). -/// `foundRepr` contains a boolean for each identifier indicating if the -/// explicit representation for that identifier has already been computed. -/// Returns the upper and lower bound inequalities using which the floordiv can -/// be computed. If the representation could be computed, `dividend` and -/// `denominator` are set. If the representation could not be computed, -/// `llvm::None` is returned. -static Optional> -computeSingleVarRepr(const FlatAffineConstraints &cst, - const SmallVector &foundRepr, unsigned pos, - SmallVector ÷nd, unsigned &divisor) { - assert(pos < cst.getNumIds() && "invalid position"); - assert(foundRepr.size() == cst.getNumIds() && - "Size of foundRepr does not match total number of variables"); - - SmallVector lbIndices, ubIndices; - cst.getLowerAndUpperBoundIndices(pos, &lbIndices, &ubIndices); - - for (unsigned ubPos : ubIndices) { - for (unsigned lbPos : lbIndices) { - // Attempt to get divison representation from ubPos, lbPos. - if (failed(getDivRepr(cst, pos, ubPos, lbPos, dividend, divisor))) - continue; - - // Check if the inequalities depend on a variable for which - // an explicit representation has not been found yet. - // Exit to avoid circular dependencies between divisions. - unsigned c, f; - for (c = 0, f = cst.getNumIds(); c < f; ++c) { - if (c == pos) - continue; - if (!foundRepr[c] && dividend[c] != 0) - break; - } - - // Expression can't be constructed as it depends on a yet unknown - // identifier. - // TODO: Visit/compute the identifiers in an order so that this doesn't - // happen. More complex but much more efficient. - if (c < f) - continue; - - return std::make_pair(ubPos, lbPos); - } - } - - return llvm::None; -} - void FlatAffineConstraints::getLocalReprs( std::vector>> &repr) const { std::vector> dividends(getNumLocalIds()); @@ -1378,8 +1239,9 @@ void FlatAffineConstraints::getLocalReprs( changed = false; for (unsigned i = 0, e = getNumLocalIds(); i < e; ++i) { if (!foundRepr[i + divOffset]) { - if (auto res = computeSingleVarRepr(*this, foundRepr, divOffset + i, - dividends[i], denominators[i])) { + if (auto res = presburger_utils::computeSingleVarRepr( + *this, foundRepr, divOffset + i, dividends[i], + denominators[i])) { foundRepr[i + divOffset] = true; repr[i] = res; changed = true; @@ -1437,11 +1299,9 @@ unsigned FlatAffineConstraints::gaussianEliminateIds(unsigned posStart, for (pivotCol = posStart; pivotCol < posLimit; ++pivotCol) { // Find a row which has a non-zero coefficient in column 'j'. unsigned pivotRow; - if (!findConstraintWithNonZeroAt(*this, pivotCol, /*isEq=*/true, - &pivotRow)) { + if (!findConstraintWithNonZeroAt(pivotCol, /*isEq=*/true, &pivotRow)) { // No pivot row in equalities with non-zero at 'pivotCol'. - if (!findConstraintWithNonZeroAt(*this, pivotCol, /*isEq=*/false, - &pivotRow)) { + if (!findConstraintWithNonZeroAt(pivotCol, /*isEq=*/false, &pivotRow)) { // If inequalities are also non-zero in 'pivotCol', it can be // eliminated. continue; @@ -1670,7 +1530,8 @@ static bool detectAsFloorDiv(const FlatAffineConstraints &cst, unsigned pos, SmallVector dividend; unsigned divisor; - auto ulPair = computeSingleVarRepr(cst, foundRepr, pos, dividend, divisor); + auto ulPair = presburger_utils::computeSingleVarRepr(cst, foundRepr, pos, + dividend, divisor); // No upper-lower bound pair found for this var. if (!ulPair) @@ -2109,7 +1970,7 @@ void FlatAffineConstraints::getSliceBounds(unsigned offset, unsigned num, // Detect an identifier as an expression of other identifiers. unsigned idx; - if (!findConstraintWithNonZeroAt(*this, pos, /*isEq=*/true, &idx)) { + if (!findConstraintWithNonZeroAt(pos, /*isEq=*/true, &idx)) { continue; } @@ -3447,12 +3308,10 @@ void FlatAffineValueConstraints::getIneqAsAffineValueMap( vmap.reset(AffineMap::get(numDims - 1, numSyms, boundExpr), operands); } -/// Returns true if the pos^th column is all zero for both inequalities and -/// equalities.. -static bool isColZero(const FlatAffineConstraints &cst, unsigned pos) { +bool FlatAffineConstraints::isColZero(unsigned pos) const { unsigned rowPos; - return !findConstraintWithNonZeroAt(cst, pos, /*isEq=*/false, &rowPos) && - !findConstraintWithNonZeroAt(cst, pos, /*isEq=*/true, &rowPos); + return !findConstraintWithNonZeroAt(pos, /*isEq=*/false, &rowPos) && + !findConstraintWithNonZeroAt(pos, /*isEq=*/true, &rowPos); } IntegerSet FlatAffineConstraints::getAsIntegerSet(MLIRContext *context) const { @@ -3471,7 +3330,7 @@ IntegerSet FlatAffineConstraints::getAsIntegerSet(MLIRContext *context) const { SmallVector noLocalRepVars; unsigned numDimsSymbols = getNumDimAndSymbolIds(); for (unsigned i = numDimsSymbols, e = getNumIds(); i < e; ++i) { - if (!memo[i] && !isColZero(*this, /*pos=*/i)) + if (!memo[i] && !isColZero(/*pos=*/i)) noLocalRepVars.push_back(i - numDimsSymbols); } if (!noLocalRepVars.empty()) { diff --git a/mlir/lib/Analysis/Presburger/CMakeLists.txt b/mlir/lib/Analysis/Presburger/CMakeLists.txt index dd8c8d96d872..0b84f031b4c0 100644 --- a/mlir/lib/Analysis/Presburger/CMakeLists.txt +++ b/mlir/lib/Analysis/Presburger/CMakeLists.txt @@ -2,6 +2,7 @@ add_mlir_library(MLIRPresburger IntegerPolyhedron.cpp Matrix.cpp Simplex.cpp + Utils.cpp DEPENDS MLIRBuiltinLocationAttributesIncGen diff --git a/mlir/lib/Analysis/Presburger/Utils.cpp b/mlir/lib/Analysis/Presburger/Utils.cpp new file mode 100644 index 000000000000..14d04d36c24e --- /dev/null +++ b/mlir/lib/Analysis/Presburger/Utils.cpp @@ -0,0 +1,154 @@ +//===- Utils.cpp - General utilities for Presburger library ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utility functions required by the Presburger Library. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis/Presburger/Utils.h" +#include "mlir/Analysis/AffineStructures.h" + +using namespace mlir; + +/// Check if the pos^th identifier can be represented as a division using upper +/// bound inequality at position `ubIneq` and lower bound inequality at position +/// `lbIneq`. +/// +/// Let `id` be the pos^th identifier, then `id` is equivalent to +/// `expr floordiv divisor` if there are constraints of the form: +/// 0 <= expr - divisor * id <= divisor - 1 +/// Rearranging, we have: +/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' +/// -divisor * id + expr >= 0 <-- Upper bound for 'id' +/// +/// For example: +/// 32*k >= 16*i + j - 31 <-- Lower bound for 'k' +/// 32*k <= 16*i + j <-- Upper bound for 'k' +/// expr = 16*i + j, divisor = 32 +/// k = ( 16*i + j ) floordiv 32 +/// +/// 4q >= i + j - 2 <-- Lower bound for 'q' +/// 4q <= i + j + 1 <-- Upper bound for 'q' +/// expr = i + j + 1, divisor = 4 +/// q = (i + j + 1) floordiv 4 +// +/// This function also supports detecting divisions from bounds that are +/// strictly tighter than the division bounds described above, since tighter +/// bounds imply the division bounds. For example: +/// 4q - i - j + 2 >= 0 <-- Lower bound for 'q' +/// -4q + i + j >= 0 <-- Tight upper bound for 'q' +/// +/// To extract floor divisions with tighter bounds, we assume that that the +/// constraints are of the form: +/// c <= expr - divisior * id <= divisor - 1, where 0 <= c <= divisor - 1 +/// Rearranging, we have: +/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' +/// -divisor * id + expr - c >= 0 <-- Upper bound for 'id' +/// +/// If successful, `expr` is set to dividend of the division and `divisor` is +/// set to the denominator of the division. +static LogicalResult getDivRepr(const FlatAffineConstraints &cst, unsigned pos, + unsigned ubIneq, unsigned lbIneq, + SmallVector &expr, + unsigned &divisor) { + + assert(pos <= cst.getNumIds() && "Invalid identifier position"); + assert(ubIneq <= cst.getNumInequalities() && + "Invalid upper bound inequality position"); + assert(lbIneq <= cst.getNumInequalities() && + "Invalid upper bound inequality position"); + + // Extract divisor from the lower bound. + divisor = cst.atIneq(lbIneq, pos); + + // First, check if the constraints are opposite of each other except the + // constant term. + unsigned i = 0, e = 0; + for (i = 0, e = cst.getNumIds(); i < e; ++i) + if (cst.atIneq(ubIneq, i) != -cst.atIneq(lbIneq, i)) + break; + + if (i < e) + return failure(); + + // Then, check if the constant term is of the proper form. + // Due to the form of the upper/lower bound inequalities, the sum of their + // constants is `divisor - 1 - c`. From this, we can extract c: + int64_t constantSum = cst.atIneq(lbIneq, cst.getNumCols() - 1) + + cst.atIneq(ubIneq, cst.getNumCols() - 1); + int64_t c = divisor - 1 - constantSum; + + // Check if `c` satisfies the condition `0 <= c <= divisor - 1`. This also + // implictly checks that `divisor` is positive. + if (!(c >= 0 && c <= divisor - 1)) + return failure(); + + // The inequality pair can be used to extract the division. + // Set `expr` to the dividend of the division except the constant term, which + // is set below. + expr.resize(cst.getNumCols(), 0); + for (i = 0, e = cst.getNumIds(); i < e; ++i) + if (i != pos) + expr[i] = cst.atIneq(ubIneq, i); + + // From the upper bound inequality's form, its constant term is equal to the + // constant term of `expr`, minus `c`. From this, + // constant term of `expr` = constant term of upper bound + `c`. + expr.back() = cst.atIneq(ubIneq, cst.getNumCols() - 1) + c; + + return success(); +} + +/// Check if the pos^th identifier can be expressed as a floordiv of an affine +/// function of other identifiers (where the divisor is a positive constant). +/// `foundRepr` contains a boolean for each identifier indicating if the +/// explicit representation for that identifier has already been computed. +/// Returns the upper and lower bound inequalities using which the floordiv can +/// be computed. If the representation could be computed, `dividend` and +/// `denominator` are set. If the representation could not be computed, +/// `llvm::None` is returned. +Optional> presburger_utils::computeSingleVarRepr( + const FlatAffineConstraints &cst, ArrayRef foundRepr, unsigned pos, + SmallVector ÷nd, unsigned &divisor) { + assert(pos < cst.getNumIds() && "invalid position"); + assert(foundRepr.size() == cst.getNumIds() && + "Size of foundRepr does not match total number of variables"); + + SmallVector lbIndices, ubIndices; + cst.getLowerAndUpperBoundIndices(pos, &lbIndices, &ubIndices); + + for (unsigned ubPos : ubIndices) { + for (unsigned lbPos : lbIndices) { + // Attempt to get divison representation from ubPos, lbPos. + if (failed(getDivRepr(cst, pos, ubPos, lbPos, dividend, divisor))) + continue; + + // Check if the inequalities depend on a variable for which + // an explicit representation has not been found yet. + // Exit to avoid circular dependencies between divisions. + unsigned c, f; + for (c = 0, f = cst.getNumIds(); c < f; ++c) { + if (c == pos) + continue; + if (!foundRepr[c] && dividend[c] != 0) + break; + } + + // Expression can't be constructed as it depends on a yet unknown + // identifier. + // TODO: Visit/compute the identifiers in an order so that this doesn't + // happen. More complex but much more efficient. + if (c < f) + continue; + + return std::make_pair(ubPos, lbPos); + } + } + + return llvm::None; +} From 27a0718ad0a4a566720fc11a080a47752725e747 Mon Sep 17 00:00:00 2001 From: Groverkss Date: Sat, 25 Dec 2021 00:39:27 +0530 Subject: [PATCH 008/992] Revert "[MLIR][FlatAffineConstraints][NFC] Move some static functions to be available to Presburger/" This reverts commit 6c0eaefaf832745f509841afe4dd8a698671b86e. --- mlir/include/mlir/Analysis/AffineStructures.h | 10 - mlir/include/mlir/Analysis/Presburger/Utils.h | 40 ---- mlir/lib/Analysis/AffineStructures.cpp | 177 ++++++++++++++++-- mlir/lib/Analysis/Presburger/CMakeLists.txt | 1 - mlir/lib/Analysis/Presburger/Utils.cpp | 154 --------------- 5 files changed, 159 insertions(+), 223 deletions(-) delete mode 100644 mlir/include/mlir/Analysis/Presburger/Utils.h delete mode 100644 mlir/lib/Analysis/Presburger/Utils.cpp diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h index 089d8a9f1eeb..9e7ffb125f7e 100644 --- a/mlir/include/mlir/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Analysis/AffineStructures.h @@ -419,16 +419,6 @@ class FlatAffineConstraints : public IntegerPolyhedron { /// Normalized each constraints by the GCD of its coefficients. void normalizeConstraintsByGCD(); - /// Searches for a constraint with a non-zero coefficient at `colIdx` in - /// equality (isEq=true) or inequality (isEq=false) constraints. - /// Returns true and sets row found in search in `rowIdx`, false otherwise. - bool findConstraintWithNonZeroAt(unsigned colIdx, bool isEq, - unsigned *rowIdx) const; - - /// Returns true if the pos^th column is all zero for both inequalities and - /// equalities. - bool isColZero(unsigned pos) const; - /// A parameter that controls detection of an unrealistic number of /// constraints. If the number of constraints is this many times the number of /// variables, we consider such a system out of line with the intended use diff --git a/mlir/include/mlir/Analysis/Presburger/Utils.h b/mlir/include/mlir/Analysis/Presburger/Utils.h deleted file mode 100644 index 6a72471d9697..000000000000 --- a/mlir/include/mlir/Analysis/Presburger/Utils.h +++ /dev/null @@ -1,40 +0,0 @@ -//===- Utils.h - General utilities for Presburger library ------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Utility functions required by the Presburger Library. -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_ANALYSIS_PRESBURGER_UTILS_H -#define MLIR_ANALYSIS_PRESBURGER_UTILS_H - -#include "mlir/Support/LLVM.h" - -namespace mlir { - -class FlatAffineConstraints; - -namespace presburger_utils { - -/// Check if the pos^th identifier can be expressed as a floordiv of an affine -/// function of other identifiers (where the divisor is a positive constant). -/// `foundRepr` contains a boolean for each identifier indicating if the -/// explicit representation for that identifier has already been computed. -/// Returns the upper and lower bound inequalities using which the floordiv -/// can be computed. If the representation could be computed, `dividend` and -/// `denominator` are set. If the representation could not be computed, -/// `llvm::None` is returned. -Optional> -computeSingleVarRepr(const FlatAffineConstraints &cst, ArrayRef foundRepr, - unsigned pos, SmallVector ÷nd, - unsigned &divisor); - -} // namespace presburger_utils -} // namespace mlir - -#endif // MLIR_ANALYSIS_PRESBURGER_UTILS_H diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index 8fee0c8f4f3e..f4d857479cde 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -13,7 +13,6 @@ #include "mlir/Analysis/AffineStructures.h" #include "mlir/Analysis/LinearTransform.h" #include "mlir/Analysis/Presburger/Simplex.h" -#include "mlir/Analysis/Presburger/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" @@ -701,13 +700,14 @@ void FlatAffineValueConstraints::addAffineIfOpDomain(AffineIfOp ifOp) { // Searches for a constraint with a non-zero coefficient at `colIdx` in // equality (isEq=true) or inequality (isEq=false) constraints. // Returns true and sets row found in search in `rowIdx`, false otherwise. -bool FlatAffineConstraints::findConstraintWithNonZeroAt( - unsigned colIdx, bool isEq, unsigned *rowIdx) const { - assert(colIdx < getNumCols() && "position out of bounds"); +static bool findConstraintWithNonZeroAt(const FlatAffineConstraints &cst, + unsigned colIdx, bool isEq, + unsigned *rowIdx) { + assert(colIdx < cst.getNumCols() && "position out of bounds"); auto at = [&](unsigned rowIdx) -> int64_t { - return isEq ? atEq(rowIdx, colIdx) : atIneq(rowIdx, colIdx); + return isEq ? cst.atEq(rowIdx, colIdx) : cst.atIneq(rowIdx, colIdx); }; - unsigned e = isEq ? getNumEqualities() : getNumInequalities(); + unsigned e = isEq ? cst.getNumEqualities() : cst.getNumInequalities(); for (*rowIdx = 0; *rowIdx < e; ++(*rowIdx)) { if (at(*rowIdx) != 0) { return true; @@ -1203,6 +1203,145 @@ bool FlatAffineConstraints::containsPoint(ArrayRef point) const { return true; } +/// Check if the pos^th identifier can be represented as a division using upper +/// bound inequality at position `ubIneq` and lower bound inequality at position +/// `lbIneq`. +/// +/// Let `id` be the pos^th identifier, then `id` is equivalent to +/// `expr floordiv divisor` if there are constraints of the form: +/// 0 <= expr - divisor * id <= divisor - 1 +/// Rearranging, we have: +/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' +/// -divisor * id + expr >= 0 <-- Upper bound for 'id' +/// +/// For example: +/// 32*k >= 16*i + j - 31 <-- Lower bound for 'k' +/// 32*k <= 16*i + j <-- Upper bound for 'k' +/// expr = 16*i + j, divisor = 32 +/// k = ( 16*i + j ) floordiv 32 +/// +/// 4q >= i + j - 2 <-- Lower bound for 'q' +/// 4q <= i + j + 1 <-- Upper bound for 'q' +/// expr = i + j + 1, divisor = 4 +/// q = (i + j + 1) floordiv 4 +// +/// This function also supports detecting divisions from bounds that are +/// strictly tighter than the division bounds described above, since tighter +/// bounds imply the division bounds. For example: +/// 4q - i - j + 2 >= 0 <-- Lower bound for 'q' +/// -4q + i + j >= 0 <-- Tight upper bound for 'q' +/// +/// To extract floor divisions with tighter bounds, we assume that that the +/// constraints are of the form: +/// c <= expr - divisior * id <= divisor - 1, where 0 <= c <= divisor - 1 +/// Rearranging, we have: +/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' +/// -divisor * id + expr - c >= 0 <-- Upper bound for 'id' +/// +/// If successful, `expr` is set to dividend of the division and `divisor` is +/// set to the denominator of the division. +static LogicalResult getDivRepr(const FlatAffineConstraints &cst, unsigned pos, + unsigned ubIneq, unsigned lbIneq, + SmallVector &expr, + unsigned &divisor) { + + assert(pos <= cst.getNumIds() && "Invalid identifier position"); + assert(ubIneq <= cst.getNumInequalities() && + "Invalid upper bound inequality position"); + assert(lbIneq <= cst.getNumInequalities() && + "Invalid upper bound inequality position"); + + // Extract divisor from the lower bound. + divisor = cst.atIneq(lbIneq, pos); + + // First, check if the constraints are opposite of each other except the + // constant term. + unsigned i = 0, e = 0; + for (i = 0, e = cst.getNumIds(); i < e; ++i) + if (cst.atIneq(ubIneq, i) != -cst.atIneq(lbIneq, i)) + break; + + if (i < e) + return failure(); + + // Then, check if the constant term is of the proper form. + // Due to the form of the upper/lower bound inequalities, the sum of their + // constants is `divisor - 1 - c`. From this, we can extract c: + int64_t constantSum = cst.atIneq(lbIneq, cst.getNumCols() - 1) + + cst.atIneq(ubIneq, cst.getNumCols() - 1); + int64_t c = divisor - 1 - constantSum; + + // Check if `c` satisfies the condition `0 <= c <= divisor - 1`. This also + // implictly checks that `divisor` is positive. + if (!(c >= 0 && c <= divisor - 1)) + return failure(); + + // The inequality pair can be used to extract the division. + // Set `expr` to the dividend of the division except the constant term, which + // is set below. + expr.resize(cst.getNumCols(), 0); + for (i = 0, e = cst.getNumIds(); i < e; ++i) + if (i != pos) + expr[i] = cst.atIneq(ubIneq, i); + + // From the upper bound inequality's form, its constant term is equal to the + // constant term of `expr`, minus `c`. From this, + // constant term of `expr` = constant term of upper bound + `c`. + expr.back() = cst.atIneq(ubIneq, cst.getNumCols() - 1) + c; + + return success(); +} + +/// Check if the pos^th identifier can be expressed as a floordiv of an affine +/// function of other identifiers (where the divisor is a positive constant). +/// `foundRepr` contains a boolean for each identifier indicating if the +/// explicit representation for that identifier has already been computed. +/// Returns the upper and lower bound inequalities using which the floordiv can +/// be computed. If the representation could be computed, `dividend` and +/// `denominator` are set. If the representation could not be computed, +/// `llvm::None` is returned. +static Optional> +computeSingleVarRepr(const FlatAffineConstraints &cst, + const SmallVector &foundRepr, unsigned pos, + SmallVector ÷nd, unsigned &divisor) { + assert(pos < cst.getNumIds() && "invalid position"); + assert(foundRepr.size() == cst.getNumIds() && + "Size of foundRepr does not match total number of variables"); + + SmallVector lbIndices, ubIndices; + cst.getLowerAndUpperBoundIndices(pos, &lbIndices, &ubIndices); + + for (unsigned ubPos : ubIndices) { + for (unsigned lbPos : lbIndices) { + // Attempt to get divison representation from ubPos, lbPos. + if (failed(getDivRepr(cst, pos, ubPos, lbPos, dividend, divisor))) + continue; + + // Check if the inequalities depend on a variable for which + // an explicit representation has not been found yet. + // Exit to avoid circular dependencies between divisions. + unsigned c, f; + for (c = 0, f = cst.getNumIds(); c < f; ++c) { + if (c == pos) + continue; + if (!foundRepr[c] && dividend[c] != 0) + break; + } + + // Expression can't be constructed as it depends on a yet unknown + // identifier. + // TODO: Visit/compute the identifiers in an order so that this doesn't + // happen. More complex but much more efficient. + if (c < f) + continue; + + return std::make_pair(ubPos, lbPos); + } + } + + return llvm::None; +} + void FlatAffineConstraints::getLocalReprs( std::vector>> &repr) const { std::vector> dividends(getNumLocalIds()); @@ -1239,9 +1378,8 @@ void FlatAffineConstraints::getLocalReprs( changed = false; for (unsigned i = 0, e = getNumLocalIds(); i < e; ++i) { if (!foundRepr[i + divOffset]) { - if (auto res = presburger_utils::computeSingleVarRepr( - *this, foundRepr, divOffset + i, dividends[i], - denominators[i])) { + if (auto res = computeSingleVarRepr(*this, foundRepr, divOffset + i, + dividends[i], denominators[i])) { foundRepr[i + divOffset] = true; repr[i] = res; changed = true; @@ -1299,9 +1437,11 @@ unsigned FlatAffineConstraints::gaussianEliminateIds(unsigned posStart, for (pivotCol = posStart; pivotCol < posLimit; ++pivotCol) { // Find a row which has a non-zero coefficient in column 'j'. unsigned pivotRow; - if (!findConstraintWithNonZeroAt(pivotCol, /*isEq=*/true, &pivotRow)) { + if (!findConstraintWithNonZeroAt(*this, pivotCol, /*isEq=*/true, + &pivotRow)) { // No pivot row in equalities with non-zero at 'pivotCol'. - if (!findConstraintWithNonZeroAt(pivotCol, /*isEq=*/false, &pivotRow)) { + if (!findConstraintWithNonZeroAt(*this, pivotCol, /*isEq=*/false, + &pivotRow)) { // If inequalities are also non-zero in 'pivotCol', it can be // eliminated. continue; @@ -1530,8 +1670,7 @@ static bool detectAsFloorDiv(const FlatAffineConstraints &cst, unsigned pos, SmallVector dividend; unsigned divisor; - auto ulPair = presburger_utils::computeSingleVarRepr(cst, foundRepr, pos, - dividend, divisor); + auto ulPair = computeSingleVarRepr(cst, foundRepr, pos, dividend, divisor); // No upper-lower bound pair found for this var. if (!ulPair) @@ -1970,7 +2109,7 @@ void FlatAffineConstraints::getSliceBounds(unsigned offset, unsigned num, // Detect an identifier as an expression of other identifiers. unsigned idx; - if (!findConstraintWithNonZeroAt(pos, /*isEq=*/true, &idx)) { + if (!findConstraintWithNonZeroAt(*this, pos, /*isEq=*/true, &idx)) { continue; } @@ -3308,10 +3447,12 @@ void FlatAffineValueConstraints::getIneqAsAffineValueMap( vmap.reset(AffineMap::get(numDims - 1, numSyms, boundExpr), operands); } -bool FlatAffineConstraints::isColZero(unsigned pos) const { +/// Returns true if the pos^th column is all zero for both inequalities and +/// equalities.. +static bool isColZero(const FlatAffineConstraints &cst, unsigned pos) { unsigned rowPos; - return !findConstraintWithNonZeroAt(pos, /*isEq=*/false, &rowPos) && - !findConstraintWithNonZeroAt(pos, /*isEq=*/true, &rowPos); + return !findConstraintWithNonZeroAt(cst, pos, /*isEq=*/false, &rowPos) && + !findConstraintWithNonZeroAt(cst, pos, /*isEq=*/true, &rowPos); } IntegerSet FlatAffineConstraints::getAsIntegerSet(MLIRContext *context) const { @@ -3330,7 +3471,7 @@ IntegerSet FlatAffineConstraints::getAsIntegerSet(MLIRContext *context) const { SmallVector noLocalRepVars; unsigned numDimsSymbols = getNumDimAndSymbolIds(); for (unsigned i = numDimsSymbols, e = getNumIds(); i < e; ++i) { - if (!memo[i] && !isColZero(/*pos=*/i)) + if (!memo[i] && !isColZero(*this, /*pos=*/i)) noLocalRepVars.push_back(i - numDimsSymbols); } if (!noLocalRepVars.empty()) { diff --git a/mlir/lib/Analysis/Presburger/CMakeLists.txt b/mlir/lib/Analysis/Presburger/CMakeLists.txt index 0b84f031b4c0..dd8c8d96d872 100644 --- a/mlir/lib/Analysis/Presburger/CMakeLists.txt +++ b/mlir/lib/Analysis/Presburger/CMakeLists.txt @@ -2,7 +2,6 @@ add_mlir_library(MLIRPresburger IntegerPolyhedron.cpp Matrix.cpp Simplex.cpp - Utils.cpp DEPENDS MLIRBuiltinLocationAttributesIncGen diff --git a/mlir/lib/Analysis/Presburger/Utils.cpp b/mlir/lib/Analysis/Presburger/Utils.cpp deleted file mode 100644 index 14d04d36c24e..000000000000 --- a/mlir/lib/Analysis/Presburger/Utils.cpp +++ /dev/null @@ -1,154 +0,0 @@ -//===- Utils.cpp - General utilities for Presburger library ---------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Utility functions required by the Presburger Library. -// -//===----------------------------------------------------------------------===// - -#include "mlir/Analysis/Presburger/Utils.h" -#include "mlir/Analysis/AffineStructures.h" - -using namespace mlir; - -/// Check if the pos^th identifier can be represented as a division using upper -/// bound inequality at position `ubIneq` and lower bound inequality at position -/// `lbIneq`. -/// -/// Let `id` be the pos^th identifier, then `id` is equivalent to -/// `expr floordiv divisor` if there are constraints of the form: -/// 0 <= expr - divisor * id <= divisor - 1 -/// Rearranging, we have: -/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' -/// -divisor * id + expr >= 0 <-- Upper bound for 'id' -/// -/// For example: -/// 32*k >= 16*i + j - 31 <-- Lower bound for 'k' -/// 32*k <= 16*i + j <-- Upper bound for 'k' -/// expr = 16*i + j, divisor = 32 -/// k = ( 16*i + j ) floordiv 32 -/// -/// 4q >= i + j - 2 <-- Lower bound for 'q' -/// 4q <= i + j + 1 <-- Upper bound for 'q' -/// expr = i + j + 1, divisor = 4 -/// q = (i + j + 1) floordiv 4 -// -/// This function also supports detecting divisions from bounds that are -/// strictly tighter than the division bounds described above, since tighter -/// bounds imply the division bounds. For example: -/// 4q - i - j + 2 >= 0 <-- Lower bound for 'q' -/// -4q + i + j >= 0 <-- Tight upper bound for 'q' -/// -/// To extract floor divisions with tighter bounds, we assume that that the -/// constraints are of the form: -/// c <= expr - divisior * id <= divisor - 1, where 0 <= c <= divisor - 1 -/// Rearranging, we have: -/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' -/// -divisor * id + expr - c >= 0 <-- Upper bound for 'id' -/// -/// If successful, `expr` is set to dividend of the division and `divisor` is -/// set to the denominator of the division. -static LogicalResult getDivRepr(const FlatAffineConstraints &cst, unsigned pos, - unsigned ubIneq, unsigned lbIneq, - SmallVector &expr, - unsigned &divisor) { - - assert(pos <= cst.getNumIds() && "Invalid identifier position"); - assert(ubIneq <= cst.getNumInequalities() && - "Invalid upper bound inequality position"); - assert(lbIneq <= cst.getNumInequalities() && - "Invalid upper bound inequality position"); - - // Extract divisor from the lower bound. - divisor = cst.atIneq(lbIneq, pos); - - // First, check if the constraints are opposite of each other except the - // constant term. - unsigned i = 0, e = 0; - for (i = 0, e = cst.getNumIds(); i < e; ++i) - if (cst.atIneq(ubIneq, i) != -cst.atIneq(lbIneq, i)) - break; - - if (i < e) - return failure(); - - // Then, check if the constant term is of the proper form. - // Due to the form of the upper/lower bound inequalities, the sum of their - // constants is `divisor - 1 - c`. From this, we can extract c: - int64_t constantSum = cst.atIneq(lbIneq, cst.getNumCols() - 1) + - cst.atIneq(ubIneq, cst.getNumCols() - 1); - int64_t c = divisor - 1 - constantSum; - - // Check if `c` satisfies the condition `0 <= c <= divisor - 1`. This also - // implictly checks that `divisor` is positive. - if (!(c >= 0 && c <= divisor - 1)) - return failure(); - - // The inequality pair can be used to extract the division. - // Set `expr` to the dividend of the division except the constant term, which - // is set below. - expr.resize(cst.getNumCols(), 0); - for (i = 0, e = cst.getNumIds(); i < e; ++i) - if (i != pos) - expr[i] = cst.atIneq(ubIneq, i); - - // From the upper bound inequality's form, its constant term is equal to the - // constant term of `expr`, minus `c`. From this, - // constant term of `expr` = constant term of upper bound + `c`. - expr.back() = cst.atIneq(ubIneq, cst.getNumCols() - 1) + c; - - return success(); -} - -/// Check if the pos^th identifier can be expressed as a floordiv of an affine -/// function of other identifiers (where the divisor is a positive constant). -/// `foundRepr` contains a boolean for each identifier indicating if the -/// explicit representation for that identifier has already been computed. -/// Returns the upper and lower bound inequalities using which the floordiv can -/// be computed. If the representation could be computed, `dividend` and -/// `denominator` are set. If the representation could not be computed, -/// `llvm::None` is returned. -Optional> presburger_utils::computeSingleVarRepr( - const FlatAffineConstraints &cst, ArrayRef foundRepr, unsigned pos, - SmallVector ÷nd, unsigned &divisor) { - assert(pos < cst.getNumIds() && "invalid position"); - assert(foundRepr.size() == cst.getNumIds() && - "Size of foundRepr does not match total number of variables"); - - SmallVector lbIndices, ubIndices; - cst.getLowerAndUpperBoundIndices(pos, &lbIndices, &ubIndices); - - for (unsigned ubPos : ubIndices) { - for (unsigned lbPos : lbIndices) { - // Attempt to get divison representation from ubPos, lbPos. - if (failed(getDivRepr(cst, pos, ubPos, lbPos, dividend, divisor))) - continue; - - // Check if the inequalities depend on a variable for which - // an explicit representation has not been found yet. - // Exit to avoid circular dependencies between divisions. - unsigned c, f; - for (c = 0, f = cst.getNumIds(); c < f; ++c) { - if (c == pos) - continue; - if (!foundRepr[c] && dividend[c] != 0) - break; - } - - // Expression can't be constructed as it depends on a yet unknown - // identifier. - // TODO: Visit/compute the identifiers in an order so that this doesn't - // happen. More complex but much more efficient. - if (c < f) - continue; - - return std::make_pair(ubPos, lbPos); - } - } - - return llvm::None; -} From 8ea64d5585ec3a0a52db20c9e57ac9bed9e80fc2 Mon Sep 17 00:00:00 2001 From: Gabriel Smith Date: Fri, 24 Dec 2021 11:38:55 -0800 Subject: [PATCH 009/992] [clang-format] Fix short enums getting wrapped even when denied Single-variant enums were still getting placed on a single line even when AllowShortEnumsOnASingleLine was false. This fixes that by checking that setting when looking to merge lines. Differential Revision: https://reviews.llvm.org/D116188 --- clang/lib/Format/UnwrappedLineFormatter.cpp | 9 ++++++++- clang/unittests/Format/FormatTest.cpp | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 3d4c1a4f903b..f652a4e7088f 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -393,11 +393,18 @@ class LineJoiner { // Try to merge a block with left brace wrapped that wasn't yet covered if (TheLine->Last->is(tok::l_brace)) { + const FormatToken *Tok = TheLine->First; bool ShouldMerge = false; - if (TheLine->First->isOneOf(tok::kw_class, tok::kw_struct)) { + if (Tok->is(tok::kw_typedef)) { + Tok = Tok->getNextNonComment(); + assert(Tok); + } + if (Tok->isOneOf(tok::kw_class, tok::kw_struct)) { ShouldMerge = !Style.BraceWrapping.AfterClass || (I[1]->First->is(tok::r_brace) && !Style.BraceWrapping.SplitEmptyRecord); + } else if (Tok->is(tok::kw_enum)) { + ShouldMerge = Style.AllowShortEnumsOnASingleLine; } else { ShouldMerge = !Style.BraceWrapping.AfterFunction || (I[1]->First->is(tok::r_brace) && diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index ee486f452194..374f3865acc3 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -2504,6 +2504,7 @@ TEST_F(FormatTest, ShortEnums) { FormatStyle Style = getLLVMStyle(); Style.AllowShortEnumsOnASingleLine = true; verifyFormat("enum { A, B, C } ShortEnum1, ShortEnum2;", Style); + verifyFormat("typedef enum { A, B, C } ShortEnum1, ShortEnum2;", Style); Style.AllowShortEnumsOnASingleLine = false; verifyFormat("enum {\n" " A,\n" @@ -2511,6 +2512,20 @@ TEST_F(FormatTest, ShortEnums) { " C\n" "} ShortEnum1, ShortEnum2;", Style); + verifyFormat("typedef enum {\n" + " A,\n" + " B,\n" + " C\n" + "} ShortEnum1, ShortEnum2;", + Style); + verifyFormat("enum {\n" + " A,\n" + "} ShortEnum1, ShortEnum2;", + Style); + verifyFormat("typedef enum {\n" + " A,\n" + "} ShortEnum1, ShortEnum2;", + Style); Style.BreakBeforeBraces = FormatStyle::BS_Custom; Style.BraceWrapping.AfterEnum = true; verifyFormat("enum\n" From e1b6b5be462ee2f197737162fc2a7d23e9a2eab6 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 12:09:48 -0800 Subject: [PATCH 010/992] [ELF] Avoid referencing SectionBase::repl after ICF It is fairly easy to forget SectionBase::repl after ICF. Let ICF rewrite a Defined symbol's `section` field to avoid references to SectionBase::repl in subsequent passes. This slightly improves the --icf=none performance due to less indirection (maybe for --icf={safe,all} as well if most symbols are Defined). With this change, there is only one reference to `repl` (--gdb-index D89751). We can undo f4fb5fd7523f8e3c3b3966d43c0a28457b59d1d8 (`Move Repl to SectionBase.`) but move `repl` to `InputSection` instead. Reviewed By: ikudrin Differential Revision: https://reviews.llvm.org/D116093 --- lld/ELF/CallGraphSort.cpp | 4 ++-- lld/ELF/ICF.cpp | 16 ++++++++++++++++ lld/ELF/InputSection.cpp | 13 ++++++------- lld/ELF/InputSection.h | 17 +++++++---------- lld/ELF/Relocations.cpp | 4 ++-- lld/ELF/Symbols.cpp | 3 +-- lld/ELF/Symbols.h | 8 ++++++-- lld/ELF/SyntheticSections.cpp | 2 +- lld/ELF/Writer.cpp | 5 ++--- 9 files changed, 43 insertions(+), 29 deletions(-) diff --git a/lld/ELF/CallGraphSort.cpp b/lld/ELF/CallGraphSort.cpp index aa00d6eadbf9..5b07f0e18c8a 100644 --- a/lld/ELF/CallGraphSort.cpp +++ b/lld/ELF/CallGraphSort.cpp @@ -114,8 +114,8 @@ CallGraphSort::CallGraphSort() { // Create the graph. for (std::pair &c : profile) { - const auto *fromSB = cast(c.first.first->repl); - const auto *toSB = cast(c.first.second->repl); + const auto *fromSB = cast(c.first.first); + const auto *toSB = cast(c.first.second); uint64_t weight = c.second; // Ignore edges between input sections belonging to different output diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index 0ec748e8f990..ec63d2ef4d6f 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -550,6 +550,22 @@ template void ICF::run() { } }); + // Change Defined symbol's section field to the canonical one. + auto fold = [](Symbol *sym) { + if (auto *d = dyn_cast(sym)) + if (auto *sec = dyn_cast_or_null(d->section)) + if (sec->repl != d->section) { + d->section = sec->repl; + d->folded = true; + } + }; + for (Symbol *sym : symtab->symbols()) + fold(sym); + parallelForEach(objectFiles, [&](ELFFileBase *file) { + for (Symbol *sym : file->getLocalSymbols()) + fold(sym); + }); + // InputSectionDescription::sections is populated by processSectionCommands(). // ICF may fold some input sections assigned to output sections. Remove them. for (SectionCommand *cmd : script->sectionCommands) diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index df8cc5f221a0..e3871260fe5b 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -442,7 +442,7 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { p->setSymbolAndType(0, 0, false); continue; } - SectionBase *section = d->section->repl; + SectionBase *section = d->section; if (!section->isLive()) { p->setSymbolAndType(0, 0, false); continue; @@ -948,10 +948,10 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { // // If the referenced symbol is discarded (made Undefined), or the // section defining the referenced symbol is garbage collected, - // sym.getOutputSection() is nullptr. `ds->section->repl != ds->section` - // catches the ICF folded case. However, resolving a relocation in - // .debug_line to -1 would stop debugger users from setting breakpoints on - // the folded-in function, so exclude .debug_line. + // sym.getOutputSection() is nullptr. `ds->folded` catches the ICF folded + // case. However, resolving a relocation in .debug_line to -1 would stop + // debugger users from setting breakpoints on the folded-in function, so + // exclude .debug_line. // // For pre-DWARF-v5 .debug_loc and .debug_ranges, -1 is a reserved value // (base address selection entry), use 1 (which is used by GNU ld for @@ -960,8 +960,7 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { // TODO To reduce disruption, we use 0 instead of -1 as the tombstone // value. Enable -1 in a future release. auto *ds = dyn_cast(&sym); - if (!sym.getOutputSection() || - (ds && ds->section->repl != ds->section && !isDebugLine)) { + if (!sym.getOutputSection() || (ds && ds->folded && !isDebugLine)) { // If -z dead-reloc-in-nonalloc= is specified, respect it. const uint64_t value = tombstone ? SignExtend64(*tombstone) : (isDebugLocOrRanges ? 1 : 0); diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index 5a0dd78f0e55..5319830b5d80 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -52,13 +52,6 @@ class SectionBase { StringRef name; - // This pointer points to the "real" instance of this instance. - // Usually Repl == this. However, if ICF merges two sections, - // Repl pointer of one section points to another section. So, - // if you need to get a pointer to this instance, do not use - // this but instead this->Repl. - SectionBase *repl; - uint8_t sectionKind : 3; // The next two bit fields are only used by InputSectionBase, but we @@ -102,9 +95,9 @@ class SectionBase { constexpr SectionBase(Kind sectionKind, StringRef name, uint64_t flags, uint32_t entsize, uint32_t alignment, uint32_t type, uint32_t info, uint32_t link) - : name(name), repl(this), sectionKind(sectionKind), bss(false), - keepUnique(false), partition(0), alignment(alignment), flags(flags), - entsize(entsize), type(type), link(link), info(info) {} + : name(name), sectionKind(sectionKind), bss(false), keepUnique(false), + partition(0), alignment(alignment), flags(flags), entsize(entsize), + type(type), link(link), info(info) {} }; // This corresponds to a section of an input file. @@ -367,6 +360,10 @@ class InputSection : public InputSectionBase { template void relocateNonAlloc(uint8_t *buf, llvm::ArrayRef rels); + // Points to the canonical section. If ICF folds two sections, repl pointer of + // one section points to the other. + InputSection *repl = this; + // Used by ICF. uint32_t eqClass[2] = {0, 0}; diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index c4438be0cb59..38e0d84e6271 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -2004,8 +2004,8 @@ std::pair ThunkCreator::getThunk(InputSection *isec, // non-Thunk target, so we cannot fold offset + addend. if (auto *d = dyn_cast(rel.sym)) if (!d->isInPlt() && d->section) - thunkVec = &thunkedSymbolsBySectionAndAddend[{ - {d->section->repl, d->value}, keyAddend}]; + thunkVec = &thunkedSymbolsBySectionAndAddend[{{d->section, d->value}, + keyAddend}]; if (!thunkVec) thunkVec = &thunkedSymbols[{rel.sym, keyAddend}]; diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index f6f0ad0087d7..23f8d3ef545e 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -78,7 +78,6 @@ static uint64_t getSymVA(const Symbol &sym, int64_t addend) { return d.value; assert(isec != &InputSection::discarded); - isec = isec->repl; uint64_t offset = d.value; @@ -348,7 +347,7 @@ void elf::maybeWarnUnorderableSymbol(const Symbol *sym) { report(": unable to order absolute symbol: "); else if (d && isa(d->section)) report(": unable to order synthetic symbol: "); - else if (d && !d->section->repl->isLive()) + else if (d && !d->section->isLive()) report(": unable to order discarded symbol: "); } diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h index beb45ec14147..e5fe53c6c496 100644 --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -248,8 +248,9 @@ class Symbol { exportDynamic(isExportDynamic(k, visibility)), inDynamicList(false), canInline(false), referenced(false), traced(false), isInIplt(false), gotInIgot(false), isPreemptible(false), used(!config->gcSections), - needsTocRestore(false), scriptDefined(false), needsCopy(false), - needsGot(false), needsPlt(false), hasDirectReloc(false) {} + folded(false), needsTocRestore(false), scriptDefined(false), + needsCopy(false), needsGot(false), needsPlt(false), + hasDirectReloc(false) {} public: // True if this symbol is in the Iplt sub-section of the Plt and the Igot @@ -269,6 +270,9 @@ class Symbol { // which are referenced by relocations when -r or --emit-relocs is given. uint8_t used : 1; + // True if defined relative to a section discarded by ICF. + uint8_t folded : 1; + // True if a call to this symbol needs to be followed by a restore of the // PPC64 toc pointer. uint8_t needsTocRestore : 1; diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 872dd0e612af..e480118f5ae9 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -391,7 +391,7 @@ Defined *EhFrameSection::isFdeLive(EhSectionPiece &fde, ArrayRef rels) { // FDEs for garbage-collected or merged-by-ICF sections, or sections in // another partition, are dead. if (auto *d = dyn_cast(&b)) - if (d->section && d->section->partition == partition) + if (!d->folded && d->section && d->section->partition == partition) return d; return nullptr; } diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 221601d15c2f..497e56886b72 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -679,7 +679,6 @@ static bool includeInSymtab(const Symbol &b) { SectionBase *sec = d->section; if (!sec) return true; - sec = sec->repl; // Exclude symbols pointing to garbage-collected sections. if (isa(sec) && !sec->isLive()) @@ -1302,7 +1301,7 @@ static DenseMap buildSectionOrder() { if (auto *d = dyn_cast(&sym)) { if (auto *sec = dyn_cast_or_null(d->section)) { - int &priority = sectionOrder[cast(sec->repl)]; + int &priority = sectionOrder[cast(sec)]; priority = std::min(priority, ent.priority); } } @@ -1725,7 +1724,7 @@ static void fixSymbolsAfterShrinking() { if (!sec) return; - const InputSectionBase *inputSec = dyn_cast(sec->repl); + const InputSectionBase *inputSec = dyn_cast(sec); if (!inputSec || !inputSec->bytesDropped) return; From e694180033d1e9d6e215bbc2f956092d30c1e3cd Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 12:28:59 -0800 Subject: [PATCH 011/992] [ELF] Optimize --wrap to only check non-local symbols --- lld/ELF/Driver.cpp | 9 ++++----- lld/ELF/InputFiles.h | 8 +++++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 7d01b7f33dec..e4d3e1d50b0f 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -2120,11 +2120,10 @@ static void redirectSymbols(ArrayRef wrapped) { return; // Update pointers in input files. - parallelForEach(objectFiles, [&](InputFile *file) { - MutableArrayRef syms = file->getMutableSymbols(); - for (size_t i = 0, e = syms.size(); i != e; ++i) - if (Symbol *s = map.lookup(syms[i])) - syms[i] = s; + parallelForEach(objectFiles, [&](ELFFileBase *file) { + for (Symbol *&sym : file->getMutableGlobalSymbols()) + if (Symbol *s = map.lookup(sym)) + sym = s; }); // Update pointers in the symbol table. diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index a031ef94c14d..6111df521840 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -91,9 +91,7 @@ class InputFile { // Returns object file symbols. It is a runtime error to call this // function on files of other types. - ArrayRef getSymbols() { return getMutableSymbols(); } - - MutableArrayRef getMutableSymbols() { + ArrayRef getSymbols() const { assert(fileKind == BinaryKind || fileKind == ObjKind || fileKind == BitcodeKind); return symbols; @@ -186,6 +184,10 @@ class ELFFileBase : public InputFile { ArrayRef getGlobalSymbols() { return llvm::makeArrayRef(symbols).slice(firstGlobal); } + MutableArrayRef getMutableGlobalSymbols() { + return llvm::makeMutableArrayRef(symbols.data(), symbols.size()) + .slice(firstGlobal); + } template typename ELFT::SymRange getELFSyms() const { return typename ELFT::SymRange( From 5e3403bd22039d043d3ffc8ab23255f8b9207b2b Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 13:16:34 -0800 Subject: [PATCH 012/992] [ELF] parseLazy: skip local symbols --- lld/ELF/InputFiles.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index f2128c84f453..964898fb790e 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1800,7 +1800,7 @@ template void ObjFile::parseLazy() { // resolve() may trigger this->extract() if an existing symbol is an undefined // symbol. If that happens, this function has served its purpose, and we can // exit from the loop early. - for (Symbol *sym : symbols) + for (Symbol *sym : makeArrayRef(symbols).slice(firstGlobal)) if (sym) { sym->resolve(LazyObject{*this, sym->getName()}); if (!lazy) From a9e8b1ee7fd44b53c555a7823ae8fd1a8209c520 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20B=C3=B6ck?= Date: Fri, 24 Dec 2021 22:25:32 +0100 Subject: [PATCH 013/992] [mlir] Fully qualify default types used in parser code --- mlir/tools/mlir-tblgen/OpFormatGen.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 2b5767e32567..adbd8407af99 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -1291,7 +1291,7 @@ void OperationFormat::genElementParser(Element *element, MethodBody &body, llvm::raw_string_ostream os(attrTypeStr); os << tgfmt(*typeBuilder, &attrTypeCtx); } else { - attrTypeStr = "Type{}"; + attrTypeStr = "::mlir::Type{}"; } if (var->attr.isOptional()) { body << formatv(optionalAttrParserCode, var->name, attrTypeStr); @@ -1375,7 +1375,7 @@ void OperationFormat::genElementParser(Element *element, MethodBody &body, listName); }) .Default([&](auto operand) { - body << formatv(typeParserCode, "Type", listName); + body << formatv(typeParserCode, "::mlir::Type", listName); }); } } else if (auto *dir = dyn_cast(element)) { @@ -1517,7 +1517,7 @@ void OperationFormat::genParserOperandTypeResolution( // once. Use llvm::concat to perform the merge. llvm::concat does not allow // the case of a single range, so guard it here. if (op.getNumOperands() > 1) { - body << "::llvm::concat("; + body << "::llvm::concat("; llvm::interleaveComma( llvm::seq(0, op.getNumOperands()), body, [&](int i) { body << "::llvm::ArrayRef<::mlir::Type>("; From d63016a86548e8231002a760bbe9eb817cd1eb00 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 15:41:56 -0800 Subject: [PATCH 014/992] [CMake] Revert -Wl,-O3 This reverts 8cb7876cb366b5803ca35d92313ea00eadf29b78 and follow-ups. GNU ld/gold/ld.lld -O has nothing to do with any code related linker optimizations. It has very small benefit (save 144Ki (.hash, .gnu_hash) with GNU ld, save 0.7% .debug_str with gold/ld.lld) while it makes gold/ld.lld significantly slower when linking RelWithDebInfo clang (gold: 16.437 vs 19.488; ld.lld: 1.882 vs 4.881). --- llvm/cmake/modules/AddLLVM.cmake | 8 -------- 1 file changed, 8 deletions(-) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 1d97626d69cc..327b8e0ba2e7 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -254,14 +254,6 @@ function(add_link_opts target_name) # Don't use linker optimizations in debug builds since it slows down the # linker in a context where the optimizations are not important. if (NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG") - - # Pass -O3 to the linker. This enabled different optimizations on different - # linkers. - if(NOT (CMAKE_SYSTEM_NAME MATCHES "Darwin|SunOS|AIX|OS390" OR WIN32) AND in_distribution) - set_property(TARGET ${target_name} APPEND_STRING PROPERTY - LINK_FLAGS " -Wl,-O3") - endif() - if(NOT LLVM_NO_DEAD_STRIP) if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") # ld64's implementation of -dead_strip breaks tools that use plugins. From 2709fd1520bca98667db9c10b3156cac892949bc Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Fri, 24 Dec 2021 16:51:54 -0500 Subject: [PATCH 015/992] [MLIR][LLVM] Add MemmoveOp to LLVM Dialect LLVM Dialect in MLIR doesn't have a memmove op. This adds one. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D116274 --- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 4 ++++ mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index eaa1c0b42bac..f671c3ca1dae 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -1447,6 +1447,10 @@ def LLVM_MemcpyInlineOp : LLVM_ZeroResultIntrOp<"memcpy.inline", [0, 1, 2]> { let arguments = (ins LLVM_Type:$dst, LLVM_Type:$src, LLVM_Type:$len, LLVM_Type:$isVolatile); } +def LLVM_MemmoveOp : LLVM_ZeroResultIntrOp<"memmove", [0, 1, 2]> { + let arguments = (ins LLVM_Type:$dst, LLVM_Type:$src, LLVM_Type:$len, + LLVM_Type:$isVolatile); +} def LLVM_MemsetOp : LLVM_ZeroResultIntrOp<"memset", [0, 2]> { let arguments = (ins LLVM_Type:$dst, LLVM_Type:$val, LLVM_Type:$len, diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir index ebb59aee530e..e1eff69a4251 100644 --- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir @@ -350,6 +350,14 @@ llvm.func @memcpy_test(%arg0: i32, %arg2: !llvm.ptr, %arg3: !llvm.ptr) { llvm.return } +// CHECK-LABEL: @memmove_test +llvm.func @memmove_test(%arg0: i32, %arg2: !llvm.ptr, %arg3: !llvm.ptr) { + %i1 = llvm.mlir.constant(false) : i1 + // CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 %{{.*}}, i1 {{.*}}) + "llvm.intr.memmove"(%arg2, %arg3, %arg0, %i1) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () + llvm.return +} + // CHECK-LABEL: @memset_test llvm.func @memset_test(%arg0: i32, %arg2: !llvm.ptr, %arg3: i8) { %i1 = llvm.mlir.constant(false) : i1 From b5a0f0f397c778cc7db71754c1b9c939f669568e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 17:10:38 -0800 Subject: [PATCH 016/992] [ELF] Add ELFFileBase::{elfShdrs,numELFShdrs} to avoid duplicate llvm::object::ELFFile::sections() This mainly avoid `relsOrRelas` cost in `InputSectionBase::relocate`. `llvm::object::ELFFile::sections()` has redundant and expensive checks. --- lld/ELF/DWARF.cpp | 3 +-- lld/ELF/Driver.cpp | 9 ++++----- lld/ELF/InputFiles.cpp | 9 +++++---- lld/ELF/InputFiles.h | 6 ++++++ lld/ELF/InputSection.cpp | 11 +++++------ lld/ELF/Relocations.cpp | 4 ++-- 6 files changed, 23 insertions(+), 19 deletions(-) diff --git a/lld/ELF/DWARF.cpp b/lld/ELF/DWARF.cpp index 4d84c09a0185..789820ba7a8e 100644 --- a/lld/ELF/DWARF.cpp +++ b/lld/ELF/DWARF.cpp @@ -27,8 +27,7 @@ using namespace lld::elf; template LLDDwarfObj::LLDDwarfObj(ObjFile *obj) { // Get the ELF sections to retrieve sh_flags. See the SHF_GROUP comment below. - ArrayRef objSections = - CHECK(obj->getObj().sections(), obj); + ArrayRef objSections = obj->template getELFShdrs(); assert(objSections.size() == obj->getSections().size()); for (auto it : llvm::enumerate(obj->getSections())) { InputSectionBase *sec = it.value(); diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index e4d3e1d50b0f..19266cb280b9 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -846,14 +846,13 @@ static bool processCallGraphRelocations(SmallVector &symbolIndices, ArrayRef &cgProfile, ObjFile *inputObj) { - symbolIndices.clear(); - const ELFFile &obj = inputObj->getObj(); - ArrayRef> objSections = - CHECK(obj.sections(), "could not retrieve object sections"); - if (inputObj->cgProfileSectionIndex == SHN_UNDEF) return false; + ArrayRef> objSections = + inputObj->template getELFShdrs(); + symbolIndices.clear(); + const ELFFile &obj = inputObj->getObj(); cgProfile = check(obj.template getSectionContentsAsArray( objSections[inputObj->cgProfileSectionIndex])); diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 964898fb790e..0badf2c55e5b 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -370,6 +370,8 @@ template void ELFFileBase::init() { abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION]; ArrayRef sections = CHECK(obj.sections(), this); + elfShdrs = sections.data(); + numELFShdrs = sections.size(); // Find a symbol table. bool isDSO = @@ -477,8 +479,7 @@ bool ObjFile::shouldMerge(const Elf_Shdr &sec, StringRef name) { // When the option is given, we link "just symbols". The section table is // initialized with null pointers. template void ObjFile::initializeJustSymbols() { - ArrayRef sections = CHECK(this->getObj().sections(), this); - this->sections.resize(sections.size()); + sections.resize(numELFShdrs); } // An ELF object file may contain a `.deplibs` section. If it exists, the @@ -544,7 +545,7 @@ template void ObjFile::initializeSections(bool ignoreComdats) { const ELFFile &obj = this->getObj(); - ArrayRef objSections = CHECK(obj.sections(), this); + ArrayRef objSections = getELFShdrs(); StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this); uint64_t size = objSections.size(); this->sections.resize(size); @@ -1410,7 +1411,7 @@ template void SharedFile::parse() { ArrayRef dynamicTags; const ELFFile obj = this->getObj(); - ArrayRef sections = CHECK(obj.sections(), this); + ArrayRef sections = getELFShdrs(); const Elf_Shdr *versymSec = nullptr; const Elf_Shdr *verdefSec = nullptr; diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 6111df521840..f58e76e48433 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -189,6 +189,10 @@ class ELFFileBase : public InputFile { .slice(firstGlobal); } + template typename ELFT::ShdrRange getELFShdrs() const { + return typename ELFT::ShdrRange( + reinterpret_cast(elfShdrs), numELFShdrs); + } template typename ELFT::SymRange getELFSyms() const { return typename ELFT::SymRange( reinterpret_cast(elfSyms), numELFSyms); @@ -201,7 +205,9 @@ class ELFFileBase : public InputFile { // Initializes this class's member variables. template void init(); + const void *elfShdrs = nullptr; const void *elfSyms = nullptr; + uint32_t numELFShdrs = 0; uint32_t numELFSyms = 0; uint32_t firstGlobal = 0; StringRef stringTable; diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index e3871260fe5b..33a42ff3f4a5 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -163,16 +163,16 @@ template RelsOrRelas InputSectionBase::relsOrRelas() const { if (relSecIdx == 0) return {}; RelsOrRelas ret; - const ELFFile obj = cast(file)->getObj(); - typename ELFT::Shdr shdr = cantFail(obj.sections())[relSecIdx]; + typename ELFT::Shdr shdr = + cast(file)->getELFShdrs()[relSecIdx]; if (shdr.sh_type == SHT_REL) { ret.rels = makeArrayRef(reinterpret_cast( - obj.base() + shdr.sh_offset), + file->mb.getBufferStart() + shdr.sh_offset), shdr.sh_size / sizeof(typename ELFT::Rel)); } else { assert(shdr.sh_type == SHT_RELA); ret.relas = makeArrayRef(reinterpret_cast( - obj.base() + shdr.sh_offset), + file->mb.getBufferStart() + shdr.sh_offset), shdr.sh_size / sizeof(typename ELFT::Rela)); } return ret; @@ -433,8 +433,7 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { sec->name != ".gcc_except_table" && sec->name != ".got2" && sec->name != ".toc") { uint32_t secIdx = cast(sym).discardedSecIdx; - Elf_Shdr_Impl sec = - CHECK(file->getObj().sections(), file)[secIdx]; + Elf_Shdr_Impl sec = file->template getELFShdrs()[secIdx]; warn("relocation refers to a discarded section: " + CHECK(file->getObj().getSectionName(sec), file) + "\n>>> referenced by " + getObjMsg(p->r_offset)); diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 38e0d84e6271..23612ec48ded 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -472,8 +472,8 @@ static std::string maybeReportDiscarded(Undefined &sym) { if (!file || !sym.discardedSecIdx || file->getSections()[sym.discardedSecIdx] != &InputSection::discarded) return ""; - ArrayRef> objSections = - CHECK(file->getObj().sections(), file); + ArrayRef objSections = + file->template getELFShdrs(); std::string msg; if (sym.type == ELF::STT_SECTION) { From 745420d3f4b050282e65cdf7893050fc90bf9c8a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 17:54:12 -0800 Subject: [PATCH 017/992] [ELF] Cache global variable `target` in relocate* This avoid repeated load of the unique_ptr in hot paths. --- lld/ELF/InputSection.cpp | 45 +++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 33a42ff3f4a5..e1ee3def89f3 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -395,6 +395,7 @@ InputSectionBase *InputSection::getRelocatedSection() const { // for each relocation. So we copy relocations one by one. template void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { + const TargetInfo &target = *elf::target; InputSectionBase *sec = getRelocatedSection(); for (const RelTy &rel : rels) { @@ -450,10 +451,10 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { int64_t addend = getAddend(rel); const uint8_t *bufLoc = sec->data().begin() + rel.r_offset; if (!RelTy::IsRela) - addend = target->getImplicitAddend(bufLoc, type); + addend = target.getImplicitAddend(bufLoc, type); if (config->emachine == EM_MIPS && - target->getRelExpr(type, sym, bufLoc) == R_MIPS_GOTREL) { + target.getRelExpr(type, sym, bufLoc) == R_MIPS_GOTREL) { // Some MIPS relocations depend on "gp" value. By default, // this value has 0x7ff0 offset from a .got section. But // relocatable files produced by a compiler or a linker @@ -470,7 +471,7 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { if (RelTy::IsRela) p->r_addend = sym.getVA(addend) - section->getOutputSection()->addr; - else if (config->relocatable && type != target->noneRel) + else if (config->relocatable && type != target.noneRel) sec->relocations.push_back({R_ABS, type, rel.r_offset, addend, &sym}); } else if (config->emachine == EM_PPC && type == R_PPC_PLTREL24 && p->r_addend >= 0x8000 && sec->file->ppc32Got2) { @@ -864,6 +865,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, template void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { const unsigned bits = sizeof(typename ELFT::uint) * 8; + const TargetInfo &target = *elf::target; const bool isDebug = isDebugSection(*this); const bool isDebugLocOrRanges = isDebug && (name == ".debug_loc" || name == ".debug_ranges"); @@ -889,16 +891,16 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { uint8_t *bufLoc = buf + offset; int64_t addend = getAddend(rel); if (!RelTy::IsRela) - addend += target->getImplicitAddend(bufLoc, type); + addend += target.getImplicitAddend(bufLoc, type); Symbol &sym = getFile()->getRelocTargetSym(rel); - RelExpr expr = target->getRelExpr(type, sym, bufLoc); + RelExpr expr = target.getRelExpr(type, sym, bufLoc); if (expr == R_NONE) continue; if (expr == R_SIZE) { - target->relocateNoSym(bufLoc, type, - SignExtend64(sym.getSize() + addend)); + target.relocateNoSym(bufLoc, type, + SignExtend64(sym.getSize() + addend)); continue; } @@ -922,14 +924,14 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { // address 0. For bug-compatibilty, we accept them with warnings. We // know Steel Bank Common Lisp as of 2018 have this bug. warn(msg); - target->relocateNoSym( + target.relocateNoSym( bufLoc, type, SignExtend64(sym.getVA(addend - offset - outSecOff))); continue; } if (tombstone || - (isDebug && (type == target->symbolicRel || expr == R_DTPREL))) { + (isDebug && (type == target.symbolicRel || expr == R_DTPREL))) { // Resolve relocations in .debug_* referencing (discarded symbols or ICF // folded section symbols) to a tombstone value. Resolving to addend is // unsatisfactory because the result address range may collide with a @@ -963,11 +965,11 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { // If -z dead-reloc-in-nonalloc= is specified, respect it. const uint64_t value = tombstone ? SignExtend64(*tombstone) : (isDebugLocOrRanges ? 1 : 0); - target->relocateNoSym(bufLoc, type, value); + target.relocateNoSym(bufLoc, type, value); continue; } } - target->relocateNoSym(bufLoc, type, SignExtend64(sym.getVA(addend))); + target.relocateNoSym(bufLoc, type, SignExtend64(sym.getVA(addend))); } } @@ -1013,6 +1015,7 @@ void InputSectionBase::relocate(uint8_t *buf, uint8_t *bufEnd) { void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { assert(flags & SHF_ALLOC); const unsigned bits = config->wordsize * 8; + const TargetInfo &target = *elf::target; uint64_t lastPPCRelaxedRelocOff = UINT64_C(-1); for (const Relocation &rel : relocations) { @@ -1031,7 +1034,7 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { switch (rel.expr) { case R_RELAX_GOT_PC: case R_RELAX_GOT_PC_NOPIC: - target->relaxGot(bufLoc, rel, targetVA); + target.relaxGot(bufLoc, rel, targetVA); break; case R_PPC64_RELAX_GOT_PC: { // The R_PPC64_PCREL_OPT relocation must appear immediately after @@ -1044,7 +1047,7 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { lastPPCRelaxedRelocOff = offset; if (rel.type == R_PPC64_PCREL_OPT && offset != lastPPCRelaxedRelocOff) break; - target->relaxGot(bufLoc, rel, targetVA); + target.relaxGot(bufLoc, rel, targetVA); break; } case R_PPC64_RELAX_TOC: @@ -1055,25 +1058,25 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { // opportunities but is safe. if (ppc64noTocRelax.count({rel.sym, rel.addend}) || !tryRelaxPPC64TocIndirection(rel, bufLoc)) - target->relocate(bufLoc, rel, targetVA); + target.relocate(bufLoc, rel, targetVA); break; case R_RELAX_TLS_IE_TO_LE: - target->relaxTlsIeToLe(bufLoc, rel, targetVA); + target.relaxTlsIeToLe(bufLoc, rel, targetVA); break; case R_RELAX_TLS_LD_TO_LE: case R_RELAX_TLS_LD_TO_LE_ABS: - target->relaxTlsLdToLe(bufLoc, rel, targetVA); + target.relaxTlsLdToLe(bufLoc, rel, targetVA); break; case R_RELAX_TLS_GD_TO_LE: case R_RELAX_TLS_GD_TO_LE_NEG: - target->relaxTlsGdToLe(bufLoc, rel, targetVA); + target.relaxTlsGdToLe(bufLoc, rel, targetVA); break; case R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC: case R_RELAX_TLS_GD_TO_IE: case R_RELAX_TLS_GD_TO_IE_ABS: case R_RELAX_TLS_GD_TO_IE_GOT_OFF: case R_RELAX_TLS_GD_TO_IE_GOTPLT: - target->relaxTlsGdToIe(bufLoc, rel, targetVA); + target.relaxTlsGdToIe(bufLoc, rel, targetVA); break; case R_PPC64_CALL: // If this is a call to __tls_get_addr, it may be part of a TLS @@ -1098,10 +1101,10 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { } write32(bufLoc + 4, 0xe8410018); // ld %r2, 24(%r1) } - target->relocate(bufLoc, rel, targetVA); + target.relocate(bufLoc, rel, targetVA); break; default: - target->relocate(bufLoc, rel, targetVA); + target.relocate(bufLoc, rel, targetVA); break; } } @@ -1114,7 +1117,7 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { for (const JumpInstrMod &jumpMod : jumpInstrMods) { uint64_t offset = jumpMod.offset; uint8_t *bufLoc = buf + offset; - target->applyJumpInstrMod(bufLoc, jumpMod.original, jumpMod.size); + target.applyJumpInstrMod(bufLoc, jumpMod.original, jumpMod.size); } } } From 40fae4d8fcbd6224f16fdf3ba84a0d89b713e7d5 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 19:01:50 -0800 Subject: [PATCH 018/992] [ELF] Optimize replaceCommonSymbols This decreases the 0.2% time (no debug info) to nearly no. --- lld/ELF/Driver.cpp | 22 +++++++++++++--------- lld/ELF/InputFiles.cpp | 1 + lld/ELF/InputFiles.h | 5 ++++- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 19266cb280b9..6b689f50cce7 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1821,17 +1821,21 @@ static void writeDependencyFile() { // symbols of type CommonSymbol. static void replaceCommonSymbols() { llvm::TimeTraceScope timeScope("Replace common symbols"); - for (Symbol *sym : symtab->symbols()) { - auto *s = dyn_cast(sym); - if (!s) + for (ELFFileBase *file : objectFiles) { + if (!file->hasCommonSyms) continue; + for (Symbol *sym : file->getGlobalSymbols()) { + auto *s = dyn_cast(sym); + if (!s) + continue; - auto *bss = make("COMMON", s->size, s->alignment); - bss->file = s->file; - bss->markDead(); - inputSections.push_back(bss); - s->replace(Defined{s->file, s->getName(), s->binding, s->stOther, s->type, - /*value=*/0, s->size, bss}); + auto *bss = make("COMMON", s->size, s->alignment); + bss->file = s->file; + bss->markDead(); + inputSections.push_back(bss); + s->replace(Defined{s->file, s->getName(), s->binding, s->stOther, s->type, + /*value=*/0, s->size, bss}); + } } } diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 0badf2c55e5b..e321b0d82920 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1123,6 +1123,7 @@ template void ObjFile::initializeSymbols() { if (value == 0 || value >= UINT32_MAX) fatal(toString(this) + ": common symbol '" + name + "' has invalid alignment: " + Twine(value)); + hasCommonSyms = true; sym->resolve( CommonSymbol{this, name, binding, stOther, type, value, size}); continue; diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index f58e76e48433..d622390fcade 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -205,12 +205,15 @@ class ELFFileBase : public InputFile { // Initializes this class's member variables. template void init(); + StringRef stringTable; const void *elfShdrs = nullptr; const void *elfSyms = nullptr; uint32_t numELFShdrs = 0; uint32_t numELFSyms = 0; uint32_t firstGlobal = 0; - StringRef stringTable; + +public: + bool hasCommonSyms = false; }; // .o file. From a8cbddc99411f9734958f974263aed7bf0113ec9 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 24 Dec 2021 19:51:10 -0800 Subject: [PATCH 019/992] [CodeGen] Fix a memory leak --- polly/lib/CodeGen/IslAst.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polly/lib/CodeGen/IslAst.cpp b/polly/lib/CodeGen/IslAst.cpp index ab0bcdaf8ef5..6497275df610 100644 --- a/polly/lib/CodeGen/IslAst.cpp +++ b/polly/lib/CodeGen/IslAst.cpp @@ -671,7 +671,7 @@ IslAstInfo IslAstAnalysis::run(Scop &S, ScopAnalysisManager &SAM, return SAM.getResult(S, SAR).getDependences(Lvl); }; - return std::move(*runIslAst(S, GetDeps).release()); + return std::move(*runIslAst(S, GetDeps)); } static __isl_give isl_printer *cbPrintUser(__isl_take isl_printer *P, From 9c0a4227a9ca7a2e4dc63ae27ee3868efdedb7ea Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 24 Dec 2021 20:57:40 -0800 Subject: [PATCH 020/992] Use Optional::getValueOr (NFC) --- clang/include/clang/APINotes/Types.h | 10 +++++----- clang/include/clang/AST/AbstractBasicReader.h | 2 +- clang/include/clang/AST/DeclTemplate.h | 11 +++++------ clang/lib/ASTMatchers/Dynamic/Parser.cpp | 2 +- clang/lib/CodeGen/CGObjC.cpp | 8 ++++---- lld/ELF/LinkerScript.cpp | 2 +- lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h | 2 +- .../ctf/CommandObjectThreadTraceExportCTF.cpp | 3 +-- llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp | 2 +- llvm/lib/IR/Instructions.cpp | 2 +- llvm/lib/MC/MachObjectWriter.cpp | 4 ++-- 11 files changed, 23 insertions(+), 25 deletions(-) diff --git a/clang/include/clang/APINotes/Types.h b/clang/include/clang/APINotes/Types.h index d9bf2f07291f..0d97e9ad8623 100644 --- a/clang/include/clang/APINotes/Types.h +++ b/clang/include/clang/APINotes/Types.h @@ -240,7 +240,7 @@ class ObjCContextInfo : public CommonTypeInfo { } void setSwiftImportAsNonGeneric(llvm::Optional Value) { SwiftImportAsNonGenericSpecified = Value.hasValue(); - SwiftImportAsNonGeneric = Value.hasValue() ? *Value : false; + SwiftImportAsNonGeneric = Value.getValueOr(false); } llvm::Optional getSwiftObjCMembers() const { @@ -249,7 +249,7 @@ class ObjCContextInfo : public CommonTypeInfo { } void setSwiftObjCMembers(llvm::Optional Value) { SwiftObjCMembersSpecified = Value.hasValue(); - SwiftObjCMembers = Value.hasValue() ? *Value : false; + SwiftObjCMembers = Value.getValueOr(false); } /// Strip off any information within the class information structure that is @@ -368,7 +368,7 @@ class ObjCPropertyInfo : public VariableInfo { } void setSwiftImportAsAccessors(llvm::Optional Value) { SwiftImportAsAccessorsSpecified = Value.hasValue(); - SwiftImportAsAccessors = Value.hasValue() ? *Value : false; + SwiftImportAsAccessors = Value.getValueOr(false); } friend bool operator==(const ObjCPropertyInfo &, const ObjCPropertyInfo &); @@ -433,7 +433,7 @@ class ParamInfo : public VariableInfo { } void setNoEscape(llvm::Optional Value) { NoEscapeSpecified = Value.hasValue(); - NoEscape = Value.hasValue() ? *Value : false; + NoEscape = Value.getValueOr(false); } llvm::Optional getRetainCountConvention() const { @@ -671,7 +671,7 @@ class TagInfo : public CommonTypeInfo { } void setFlagEnum(llvm::Optional Value) { HasFlagEnum = Value.hasValue(); - IsFlagEnum = Value.hasValue() ? *Value : false; + IsFlagEnum = Value.getValueOr(false); } TagInfo &operator|=(const TagInfo &RHS) { diff --git a/clang/include/clang/AST/AbstractBasicReader.h b/clang/include/clang/AST/AbstractBasicReader.h index 5505d661b44e..442039044cfe 100644 --- a/clang/include/clang/AST/AbstractBasicReader.h +++ b/clang/include/clang/AST/AbstractBasicReader.h @@ -21,7 +21,7 @@ inline T makeNullableFromOptional(const Optional &value) { template inline T *makePointerFromOptional(Optional value) { - return (value ? *value : nullptr); + return value.getValueOr(nullptr); } // PropertyReader is a class concept that requires the following method: diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index d33babef958e..f7a2e3146d06 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -1211,13 +1211,12 @@ class TemplateTypeParmDecl final : public TypeDecl, DefArgStorage DefaultArgument; TemplateTypeParmDecl(DeclContext *DC, SourceLocation KeyLoc, - SourceLocation IdLoc, IdentifierInfo *Id, - bool Typename, bool HasTypeConstraint, - Optional NumExpanded) + SourceLocation IdLoc, IdentifierInfo *Id, bool Typename, + bool HasTypeConstraint, Optional NumExpanded) : TypeDecl(TemplateTypeParm, DC, IdLoc, Id, KeyLoc), Typename(Typename), - HasTypeConstraint(HasTypeConstraint), TypeConstraintInitialized(false), - ExpandedParameterPack(NumExpanded), - NumExpanded(NumExpanded ? *NumExpanded : 0) {} + HasTypeConstraint(HasTypeConstraint), TypeConstraintInitialized(false), + ExpandedParameterPack(NumExpanded), + NumExpanded(NumExpanded.getValueOr(0)) {} public: static TemplateTypeParmDecl *Create(const ASTContext &C, DeclContext *DC, diff --git a/clang/lib/ASTMatchers/Dynamic/Parser.cpp b/clang/lib/ASTMatchers/Dynamic/Parser.cpp index c6a77bb6c2e0..cab1476acf94 100644 --- a/clang/lib/ASTMatchers/Dynamic/Parser.cpp +++ b/clang/lib/ASTMatchers/Dynamic/Parser.cpp @@ -645,7 +645,7 @@ bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken, Tokenizer->SkipNewlines(); { - ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr); + ScopedContextEntry SCE(this, Ctor.getValueOr(nullptr)); while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index ac26f0d4232c..b5bcf157036d 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -3915,8 +3915,8 @@ static llvm::Value *emitIsPlatformVersionAtLeast(CodeGenFunction &CGF, Args.push_back( llvm::ConstantInt::get(CGM.Int32Ty, getBaseMachOPlatformID(TT))); Args.push_back(llvm::ConstantInt::get(CGM.Int32Ty, Version.getMajor())); - Args.push_back(llvm::ConstantInt::get(CGM.Int32Ty, Min ? *Min : 0)); - Args.push_back(llvm::ConstantInt::get(CGM.Int32Ty, SMin ? *SMin : 0)); + Args.push_back(llvm::ConstantInt::get(CGM.Int32Ty, Min.getValueOr(0))); + Args.push_back(llvm::ConstantInt::get(CGM.Int32Ty, SMin.getValueOr(0))); }; assert(!Version.empty() && "unexpected empty version"); @@ -3952,8 +3952,8 @@ CodeGenFunction::EmitBuiltinAvailable(const VersionTuple &Version) { Optional Min = Version.getMinor(), SMin = Version.getSubminor(); llvm::Value *Args[] = { llvm::ConstantInt::get(CGM.Int32Ty, Version.getMajor()), - llvm::ConstantInt::get(CGM.Int32Ty, Min ? *Min : 0), - llvm::ConstantInt::get(CGM.Int32Ty, SMin ? *SMin : 0), + llvm::ConstantInt::get(CGM.Int32Ty, Min.getValueOr(0)), + llvm::ConstantInt::get(CGM.Int32Ty, SMin.getValueOr(0)) }; llvm::Value *CallRes = diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 999bb94d6416..e8f2ce4fdf1f 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -1338,7 +1338,7 @@ std::vector LinkerScript::createPhdrs() { // Process PHDRS and FILEHDR keywords because they are not // real output sections and cannot be added in the following loop. for (const PhdrsCommand &cmd : phdrsCommands) { - PhdrEntry *phdr = make(cmd.type, cmd.flags ? *cmd.flags : PF_R); + PhdrEntry *phdr = make(cmd.type, cmd.flags.getValueOr(PF_R)); if (cmd.hasFilehdr) phdr->add(Out::elfHeader); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index cece29dcf9ac..71d4c1e6c52f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -153,7 +153,7 @@ class DWARFUnit : public lldb_private::UserID { const DWARFAbbreviationDeclarationSet *GetAbbreviations() const; dw_offset_t GetAbbrevOffset() const; uint8_t GetAddressByteSize() const { return m_header.GetAddressByteSize(); } - dw_addr_t GetAddrBase() const { return m_addr_base ? *m_addr_base : 0; } + dw_addr_t GetAddrBase() const { return m_addr_base.getValueOr(0); } dw_addr_t GetBaseAddress() const { return m_base_addr; } dw_offset_t GetLineTableOffset(); dw_addr_t GetRangesBase() const { return m_ranges_base; } diff --git a/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp b/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp index 919cdf46a5c0..a72e46a0b703 100644 --- a/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp +++ b/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp @@ -73,8 +73,7 @@ bool CommandObjectThreadTraceExportCTF::DoExecute(Args &command, if (thread == nullptr) { const uint32_t num_threads = process->GetThreadList().GetSize(); - size_t tid = m_options.m_thread_index ? *m_options.m_thread_index - : LLDB_INVALID_THREAD_ID; + size_t tid = m_options.m_thread_index.getValueOr(LLDB_INVALID_THREAD_ID); result.AppendErrorWithFormatv( "Thread index {0} is out of range (valid values are 1 - {1}).\n", tid, num_threads); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp index 9b7ffed2ca67..eed0a60ec75e 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -607,7 +607,7 @@ bool DWARFUnit::parseDWO() { DWO->setAddrOffsetSection(AddrOffsetSection, *AddrOffsetSectionBase); if (getVersion() == 4) { auto DWORangesBase = UnitDie.getRangesBaseAttribute(); - DWO->setRangesSection(RangeSection, DWORangesBase ? *DWORangesBase : 0); + DWO->setRangesSection(RangeSection, DWORangesBase.getValueOr(0)); } return true; diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index ce349f66c916..fb6105712d1a 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -4407,7 +4407,7 @@ void SwitchInstProfUpdateWrapper::addCase( Weights.getValue()[SI.getNumSuccessors() - 1] = *W; } else if (Weights) { Changed = true; - Weights.getValue().push_back(W ? *W : 0); + Weights.getValue().push_back(W.getValueOr(0)); } if (Weights) assert(SI.getNumSuccessors() == Weights->size() && diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp index d18579ad4bfc..16941b1cb727 100644 --- a/llvm/lib/MC/MachObjectWriter.cpp +++ b/llvm/lib/MC/MachObjectWriter.cpp @@ -877,8 +877,8 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm, [&](const MCAssembler::VersionInfoType &VersionInfo) { auto EncodeVersion = [](VersionTuple V) -> uint32_t { assert(!V.empty() && "empty version"); - unsigned Update = V.getSubminor() ? *V.getSubminor() : 0; - unsigned Minor = V.getMinor() ? *V.getMinor() : 0; + unsigned Update = V.getSubminor().getValueOr(0); + unsigned Minor = V.getMinor().getValueOr(0); assert(Update < 256 && "unencodable update target version"); assert(Minor < 256 && "unencodable minor target version"); assert(V.getMajor() < 65536 && "unencodable major target version"); From 62e48ed10f9d2328331378f7c070487e58346a7e Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 24 Dec 2021 21:22:27 -0800 Subject: [PATCH 021/992] Use isa instead of dyn_cast (NFC) --- clang-tools-extra/clang-doc/Mapper.cpp | 2 +- clang-tools-extra/clang-doc/Serialize.cpp | 6 +++--- clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp | 2 +- clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h | 2 +- clang/lib/Sema/SemaExprCXX.cpp | 2 +- lld/COFF/Driver.cpp | 2 +- lld/ELF/Symbols.cpp | 4 ++-- .../Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp | 2 +- .../Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp | 2 +- lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp | 2 +- lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp | 6 +++--- lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp | 2 +- 12 files changed, 17 insertions(+), 17 deletions(-) diff --git a/clang-tools-extra/clang-doc/Mapper.cpp b/clang-tools-extra/clang-doc/Mapper.cpp index 790f11bb69c5..de7e4c341086 100644 --- a/clang-tools-extra/clang-doc/Mapper.cpp +++ b/clang-tools-extra/clang-doc/Mapper.cpp @@ -68,7 +68,7 @@ bool MapASTVisitor::VisitCXXMethodDecl(const CXXMethodDecl *D) { bool MapASTVisitor::VisitFunctionDecl(const FunctionDecl *D) { // Don't visit CXXMethodDecls twice - if (dyn_cast(D)) + if (isa(D)) return true; return mapDecl(D); } diff --git a/clang-tools-extra/clang-doc/Serialize.cpp b/clang-tools-extra/clang-doc/Serialize.cpp index e132c56cb000..29762b6b54b1 100644 --- a/clang-tools-extra/clang-doc/Serialize.cpp +++ b/clang-tools-extra/clang-doc/Serialize.cpp @@ -382,7 +382,7 @@ populateParentNamespaces(llvm::SmallVector &Namespaces, // corresponds to a Record and if it doesn't have any namespace (because this // means it's in the global namespace). Also if its outermost namespace is a // record because that record matches the previous condition mentioned. - if ((Namespaces.empty() && dyn_cast(D)) || + if ((Namespaces.empty() && isa(D)) || (!Namespaces.empty() && Namespaces.back().RefType == InfoType::IT_record)) Namespaces.emplace_back(SymbolID(), "GlobalNamespace", InfoType::IT_namespace); @@ -419,10 +419,10 @@ static void populateFunctionInfo(FunctionInfo &I, const FunctionDecl *D, populateSymbolInfo(I, D, FC, LineNumber, Filename, IsFileInRootDir, IsInAnonymousNamespace); if (const auto *T = getDeclForType(D->getReturnType())) { - if (dyn_cast(T)) + if (isa(T)) I.ReturnType = TypeInfo(getUSRForDecl(T), T->getNameAsString(), InfoType::IT_enum, getInfoRelativePath(T)); - else if (dyn_cast(T)) + else if (isa(T)) I.ReturnType = TypeInfo(getUSRForDecl(T), T->getNameAsString(), InfoType::IT_record, getInfoRelativePath(T)); } else { diff --git a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp index 432d929057d1..b0b882be1a6c 100644 --- a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp @@ -829,7 +829,7 @@ bool LoopConvertCheck::isConvertible(ASTContext *Context, } else if (FixerKind == LFK_PseudoArray) { // This call is required to obtain the container. const auto *EndCall = Nodes.getNodeAs(EndCallName); - if (!EndCall || !dyn_cast(EndCall->getCallee())) + if (!EndCall || !isa(EndCall->getCallee())) return false; } return true; diff --git a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h index 4a58fe870944..a0ae44131b45 100644 --- a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h +++ b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h @@ -155,7 +155,7 @@ class CFGWalker { return false; // Ignore anonymous functions. - if (!dyn_cast_or_null(AC.getDecl())) + if (!isa_and_nonnull(AC.getDecl())) return false; SortedGraph = AC.getAnalysis(); diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index d25f329f85e4..54f0242d2ca1 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1346,7 +1346,7 @@ bool Sema::CheckCXXThisCapture(SourceLocation Loc, const bool Explicit, // implicitly capturing the *enclosing object* by reference (see loop // above)). assert((!ByCopy || - dyn_cast(FunctionScopes[MaxFunctionScopesIndex])) && + isa(FunctionScopes[MaxFunctionScopesIndex])) && "Only a lambda can capture the enclosing object (referred to by " "*this) by copy"); QualType ThisTy = getCurrentThisType(); diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index f1b0c5c0707d..07b60673577e 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -2085,7 +2085,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { if (args.hasArg(OPT_include_optional)) { // Handle /includeoptional for (auto *arg : args.filtered(OPT_include_optional)) - if (dyn_cast_or_null(ctx.symtab.find(arg->getValue()))) + if (isa_and_nonnull(ctx.symtab.find(arg->getValue()))) addUndefined(arg->getValue()); while (run()); } diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index 23f8d3ef545e..20301497a059 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -550,7 +550,7 @@ void Symbol::resolveUndefined(const Undefined &other) { } // Undefined symbols in a SharedFile do not change the binding. - if (dyn_cast_or_null(other.file)) + if (isa_and_nonnull(other.file)) return; if (isUndefined() || isShared()) { @@ -608,7 +608,7 @@ int Symbol::compare(const Symbol *other) const { auto *oldSym = cast(this); auto *newSym = cast(other); - if (dyn_cast_or_null(other->file)) + if (isa_and_nonnull(other->file)) return 0; if (!oldSym->section && !newSym->section && oldSym->value == newSym->value && diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp index 85e2fcfc838c..7844f27139cf 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp @@ -211,7 +211,7 @@ bool ASTResultSynthesizer::SynthesizeBodyResult(CompoundStmt *Body, Stmt **last_stmt_ptr = Body->body_end() - 1; Stmt *last_stmt = *last_stmt_ptr; - while (dyn_cast(last_stmt)) { + while (isa(last_stmt)) { if (last_stmt_ptr != Body->body_begin()) { last_stmt_ptr--; last_stmt = *last_stmt_ptr; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp b/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp index a6e36d81b950..8b132b54b7e6 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp @@ -353,7 +353,7 @@ class ValidPointerChecker : public Instrumenter { } bool InspectInstruction(llvm::Instruction &i) override { - if (dyn_cast(&i) || dyn_cast(&i)) + if (isa(&i) || isa(&i)) RegisterInstruction(i); return true; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp b/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp index f80dc2b14467..e0e41925f7ef 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp @@ -1255,7 +1255,7 @@ bool IRForTarget::MaybeHandleVariable(Value *llvm_value_ptr) { m_decl_map->AddValueToStruct(named_decl, lldb_private::ConstString(name), llvm_value_ptr, *value_size, value_alignment); - } else if (dyn_cast(llvm_value_ptr)) { + } else if (isa(llvm_value_ptr)) { LLDB_LOG(log, "Function pointers aren't handled right now"); return false; diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp index 7ce2f1451580..9473befa6cc3 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp @@ -1280,15 +1280,15 @@ clang::QualType PdbAstBuilder::CreateFunctionType( } static bool isTagDecl(clang::DeclContext &context) { - return !!llvm::dyn_cast(&context); + return llvm::isa(&context); } static bool isFunctionDecl(clang::DeclContext &context) { - return !!llvm::dyn_cast(&context); + return llvm::isa(&context); } static bool isBlockDecl(clang::DeclContext &context) { - return !!llvm::dyn_cast(&context); + return llvm::isa(&context); } void PdbAstBuilder::ParseAllNamespacesPlusChildrenOf( diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp index db0ae241be7e..c0547936b666 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp @@ -402,7 +402,7 @@ static size_t ParseFunctionBlocksForPDBSymbol( block = parent_block; else break; - } else if (llvm::dyn_cast(pdb_symbol)) { + } else if (llvm::isa(pdb_symbol)) { auto uid = pdb_symbol->getSymIndexId(); if (parent_block->FindBlockByID(uid)) break; From 76f0f1cc5c52359da5dd6dffff0c444400a1bca1 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 24 Dec 2021 21:43:06 -0800 Subject: [PATCH 022/992] Use {DenseSet,SetVector,SmallPtrSet}::contains (NFC) --- clang/lib/AST/ASTContext.cpp | 2 +- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 2 +- clang/lib/Frontend/CompilerInstance.cpp | 12 ++++++------ .../ExpressionParser/Clang/ClangASTImporter.cpp | 2 +- .../Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 2 +- .../Plugins/Process/Linux/NativeProcessLinux.cpp | 2 +- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++-- polly/lib/Support/SCEVValidator.cpp | 2 +- polly/lib/Transform/ScopInliner.cpp | 2 +- 9 files changed, 15 insertions(+), 15 deletions(-) diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 58bd7b6a4a8c..008b703d4c1a 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -9272,7 +9272,7 @@ void getIntersectionOfProtocols(ASTContext &Context, // Remove any implied protocols from the list of inherited protocols. if (!ImpliedProtocols.empty()) { llvm::erase_if(IntersectionSet, [&](ObjCProtocolDecl *proto) -> bool { - return ImpliedProtocols.count(proto) > 0; + return ImpliedProtocols.contains(proto); }); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 0c71fee14025..e35c15421520 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -12788,7 +12788,7 @@ void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); for (const auto &Pair : It->DeclToUniqueName) { const auto *VD = cast(Pair.first->getCanonicalDecl()); - if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) + if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) continue; auto I = LPCI->getSecond().find(Pair.first); assert(I != LPCI->getSecond().end() && diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 1432607204bd..31e7ea3d243d 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -1154,12 +1154,12 @@ compileModuleImpl(CompilerInstance &ImportingInstance, SourceLocation ImportLoc, // Remove any macro definitions that are explicitly ignored by the module. // They aren't supposed to affect how the module is built anyway. HeaderSearchOptions &HSOpts = Invocation->getHeaderSearchOpts(); - llvm::erase_if( - PPOpts.Macros, [&HSOpts](const std::pair &def) { - StringRef MacroDef = def.first; - return HSOpts.ModulesIgnoreMacros.count( - llvm::CachedHashString(MacroDef.split('=').first)) > 0; - }); + llvm::erase_if(PPOpts.Macros, + [&HSOpts](const std::pair &def) { + StringRef MacroDef = def.first; + return HSOpts.ModulesIgnoreMacros.contains( + llvm::CachedHashString(MacroDef.split('=').first)); + }); // If the original compiler invocation had -fmodule-name, pass it through. Invocation->getLangOpts()->ModuleName = diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp index 80469e292580..719b35689feb 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp @@ -293,7 +293,7 @@ class CompleteTagDeclsScope : public ClangASTImporter::NewDeclListener { NamedDecl *to_named_decl = dyn_cast(to); // Check if we already completed this type. - if (m_decls_already_completed.count(to_named_decl) != 0) + if (m_decls_already_completed.contains(to_named_decl)) return; // Queue this type to be completed. m_decls_to_complete.insert(to_named_decl); diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 7668b68650b4..e72d55dd2aba 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -4647,7 +4647,7 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { // Add symbols from the trie to the symbol table. for (auto &e : external_sym_trie_entries) { - if (symbols_added.find(e.entry.address) != symbols_added.end()) + if (symbols_added.contains(e.entry.address)) continue; // Find the section that this trie address is in, use that to annotate diff --git a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp index 279ffb1f2a9c..d7651ce71da0 100644 --- a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp @@ -874,7 +874,7 @@ void NativeProcessLinux::MonitorSignal(const siginfo_t &info, // Check if debugger should stop at this signal or just ignore it and resume // the inferior. - if (m_signals_to_ignore.find(signo) != m_signals_to_ignore.end()) { + if (m_signals_to_ignore.contains(signo)) { ResumeThread(thread, thread.GetState(), signo); return; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b3ba2aa13d37..4747f34fcc62 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2963,7 +2963,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, // poison-generating flags (nuw/nsw, exact, inbounds, etc.). The scalarized // instruction could feed a poison value to the base address of the widen // load/store. - if (State.MayGeneratePoisonRecipes.count(RepRecipe) > 0) + if (State.MayGeneratePoisonRecipes.contains(RepRecipe)) Cloned->dropPoisonGeneratingFlags(); State.Builder.SetInsertPoint(Builder.GetInsertBlock(), @@ -9533,7 +9533,7 @@ void VPWidenRecipe::execute(VPTransformState &State) { // exact, etc.). The control flow has been linearized and the // instruction is no longer guarded by the predicate, which could make // the flag properties to no longer hold. - if (State.MayGeneratePoisonRecipes.count(this) > 0) + if (State.MayGeneratePoisonRecipes.contains(this)) VecOp->dropPoisonGeneratingFlags(); } diff --git a/polly/lib/Support/SCEVValidator.cpp b/polly/lib/Support/SCEVValidator.cpp index 002674375df0..8f175596d711 100644 --- a/polly/lib/Support/SCEVValidator.cpp +++ b/polly/lib/Support/SCEVValidator.cpp @@ -472,7 +472,7 @@ class SCEVInRegionDependences { // are strictly not necessary by tracking the invariant load as a // scalar. LoadInst *LI = dyn_cast(Inst); - if (LI && ILS.count(LI) > 0) + if (LI && ILS.contains(LI)) return false; } diff --git a/polly/lib/Transform/ScopInliner.cpp b/polly/lib/Transform/ScopInliner.cpp index ed54731c6b2a..b35d3518e72d 100644 --- a/polly/lib/Transform/ScopInliner.cpp +++ b/polly/lib/Transform/ScopInliner.cpp @@ -84,7 +84,7 @@ class ScopInliner : public CallGraphSCCPass { ScopDetection &SD = FAM.getResult(*F); const bool HasScopAsTopLevelRegion = - SD.ValidRegions.count(RI.getTopLevelRegion()) > 0; + SD.ValidRegions.contains(RI.getTopLevelRegion()); bool Changed = false; if (HasScopAsTopLevelRegion) { From 3cfe375ae43139839af01e29c3ec03654e98186b Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 24 Dec 2021 22:05:34 -0800 Subject: [PATCH 023/992] Use StringRef::contains (NFC) --- clang-tools-extra/clang-tidy/android/CloexecCheck.cpp | 2 +- .../clang-tidy/bugprone/ReservedIdentifierCheck.cpp | 2 +- .../clang-tidy/modernize/RawStringLiteralCheck.cpp | 2 +- .../clang-tidy/readability/BracesAroundStatementsCheck.cpp | 2 +- .../clang-tidy/readability/NamedParameterCheck.cpp | 2 +- clang/tools/driver/driver.cpp | 2 +- llvm/lib/Support/RISCVISAInfo.cpp | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp b/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp index 64c8797934d2..d373877713f1 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp @@ -87,7 +87,7 @@ void CloexecCheck::insertStringFlag( // Check if the may be in the mode string. const auto *ModeStr = dyn_cast(ModeArg->IgnoreParenCasts()); - if (!ModeStr || (ModeStr->getString().find(Mode) != StringRef::npos)) + if (!ModeStr || ModeStr->getString().contains(Mode)) return; std::string ReplacementText = buildFixMsgForStringFlag( diff --git a/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp index 8da046955425..4bf841648f94 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp @@ -64,7 +64,7 @@ static std::string collapseConsecutive(StringRef Str, char C) { static bool hasReservedDoubleUnderscore(StringRef Name, const LangOptions &LangOpts) { if (LangOpts.CPlusPlus) - return Name.find("__") != StringRef::npos; + return Name.contains("__"); return Name.startswith("__"); } diff --git a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp index 26b1d8ecdc31..40dda98b1e49 100644 --- a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp @@ -25,7 +25,7 @@ bool containsEscapes(StringRef HayStack, StringRef Escapes) { return false; while (BackSlash != StringRef::npos) { - if (Escapes.find(HayStack[BackSlash + 1]) == StringRef::npos) + if (!Escapes.contains(HayStack[BackSlash + 1])) return false; BackSlash = HayStack.find('\\', BackSlash + 2); } diff --git a/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp b/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp index 7dc519c15282..07e962a07e84 100644 --- a/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp @@ -81,7 +81,7 @@ static SourceLocation findEndLocation(const Stmt &S, const SourceManager &SM, SourceRange TokRange(Loc, TokEndLoc); StringRef Comment = Lexer::getSourceText( CharSourceRange::getTokenRange(TokRange), SM, Context->getLangOpts()); - if (Comment.startswith("/*") && Comment.find('\n') != StringRef::npos) { + if (Comment.startswith("/*") && Comment.contains('\n')) { // Multi-line block comment, insert brace before. break; } diff --git a/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp b/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp index 4f81dc49ded7..c8a8edf67884 100644 --- a/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp @@ -71,7 +71,7 @@ void NamedParameterCheck::check(const MatchFinder::MatchResult &Result) { const char *Begin = SM.getCharacterData(Parm->getBeginLoc()); const char *End = SM.getCharacterData(Parm->getLocation()); StringRef Data(Begin, End - Begin); - if (Data.find("/*") != StringRef::npos) + if (Data.contains("/*")) continue; UnnamedParams.push_back(std::make_pair(Function, I)); diff --git a/clang/tools/driver/driver.cpp b/clang/tools/driver/driver.cpp index c9129ee9e502..a7bfb07e002b 100644 --- a/clang/tools/driver/driver.cpp +++ b/clang/tools/driver/driver.cpp @@ -120,7 +120,7 @@ static void ApplyOneQAOverride(raw_ostream &OS, OS << "### Adding argument " << Str << " at end\n"; Args.push_back(Str); } else if (Edit[0] == 's' && Edit[1] == '/' && Edit.endswith("/") && - Edit.slice(2, Edit.size()-1).find('/') != StringRef::npos) { + Edit.slice(2, Edit.size() - 1).contains('/')) { StringRef MatchPattern = Edit.substr(2).split('/').first; StringRef ReplPattern = Edit.substr(2).split('/').second; ReplPattern = ReplPattern.slice(0, ReplPattern.size()-1); diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 15a249e6177e..e2e4340f44e9 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -565,7 +565,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, // TODO: Use version number when setting target features // Currently LLVM supports only "mafdcbv". StringRef SupportedStandardExtension = "mafdcbv"; - if (SupportedStandardExtension.find(C) == StringRef::npos) + if (!SupportedStandardExtension.contains(C)) return createStringError(errc::invalid_argument, "unsupported standard user-level extension '%c'", C); From cde37a7e5a1fd3917ab8f66c8b61d86220afc581 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 22:24:15 -0800 Subject: [PATCH 024/992] [ELF][test] Add tests for mixed GD-to-IE and IE, mixed TLSDESC and GD Note: mixed TLSDESC and GD currently does not work. --- lld/test/ELF/x86-64-tls-ie.s | 37 +++++++++++++++++--------- lld/test/ELF/x86-64-tlsdesc-gd-mixed.s | 25 +++++++++++++++++ 2 files changed, 49 insertions(+), 13 deletions(-) create mode 100644 lld/test/ELF/x86-64-tlsdesc-gd-mixed.s diff --git a/lld/test/ELF/x86-64-tls-ie.s b/lld/test/ELF/x86-64-tls-ie.s index e5510f84d5f3..8190c569f0fe 100644 --- a/lld/test/ELF/x86-64-tls-ie.s +++ b/lld/test/ELF/x86-64-tls-ie.s @@ -4,7 +4,7 @@ // RUN: ld.lld -shared %t2.o -soname=so -o %t2.so // RUN: ld.lld -e main %t1.o %t2.so -o %t3 // RUN: llvm-readobj -S -r %t3 | FileCheck %s -// RUN: llvm-objdump -d %t3 | FileCheck --check-prefix=DISASM %s +// RUN: llvm-objdump -d --no-show-raw-insn %t3 | FileCheck --check-prefix=DISASM %s // CHECK: Section { // CHECK: Index: 9 @@ -15,7 +15,7 @@ // CHECK-NEXT: SHF_WRITE // CHECK-NEXT: ] // CHECK-NEXT: Address: [[ADDR:.*]] -// CHECK-NEXT: Offset: 0x3B0 +// CHECK-NEXT: Offset: 0x3F0 // CHECK-NEXT: Size: 16 // CHECK-NEXT: Link: 0 // CHECK-NEXT: Info: 0 @@ -26,23 +26,27 @@ // CHECK: Relocations [ // CHECK-NEXT: Section (5) .rela.dyn { // CHECK-NEXT: [[ADDR]] R_X86_64_TPOFF64 tls1 0x0 -// CHECK-NEXT: 0x2023B8 R_X86_64_TPOFF64 tls0 0x0 +// CHECK-NEXT: 0x2023F8 R_X86_64_TPOFF64 tls0 0x0 // CHECK-NEXT: } // CHECK-NEXT: ] -// 0x2012d0 + 4313 + 7 = 0x2023B0 -// 0x2012dA + 4311 + 7 = 0x2023B8 -// 0x2012e4 + 4301 + 7 = 0x2023B8 +/// 0x2023F0 - 0x201307 = 4329 +/// 0x2023F8 - 0x201311 = 4327 +/// 0x2023F8 - 0x20131b = 4317 // DISASM: Disassembly of section .text: // DISASM-EMPTY: // DISASM-NEXT:
: -// DISASM-NEXT: 2012d0: {{.*}} movq 4313(%rip), %rax -// DISASM-NEXT: 2012d7: {{.*}} movl %fs:(%rax), %eax -// DISASM-NEXT: 2012da: {{.*}} movq 4311(%rip), %rax -// DISASM-NEXT: 2012e1: {{.*}} movl %fs:(%rax), %eax -// DISASM-NEXT: 2012e4: {{.*}} movq 4301(%rip), %rax -// DISASM-NEXT: 2012eb: {{.*}} movl %fs:(%rax), %eax -// DISASM-NEXT: 2012ee: {{.*}} retq +// DISASM-NEXT: movq 4329(%rip), %rax +// DISASM-NEXT: 201307: movl %fs:(%rax), %eax +// DISASM-NEXT: movq 4327(%rip), %rax +// DISASM-NEXT: 201311: movl %fs:(%rax), %eax +// DISASM-NEXT: movq 4317(%rip), %rax +// DISASM-NEXT: 20131b: movl %fs:(%rax), %eax + +/// 0x2023F0 - 0x20132e = 4290 +// DISASM-NEXT: movq %fs:0, %rax +// DISASM-NEXT: addq 4290(%rip), %rax +// DISASM-NEXT: 20132e: retq .section .tdata,"awT",@progbits @@ -57,4 +61,11 @@ main: movl %fs:0(%rax), %eax movq tls0@GOTTPOFF(%rip), %rax movl %fs:0(%rax), %eax + +## Relaxed to TLS IE. Share the GOT entry with GOTTPOFF. + .byte 0x66 + leaq tls1@tlsgd(%rip), %rdi + .value 0x6666 + rex64 + call __tls_get_addr@PLT ret diff --git a/lld/test/ELF/x86-64-tlsdesc-gd-mixed.s b/lld/test/ELF/x86-64-tlsdesc-gd-mixed.s new file mode 100644 index 000000000000..f6bfe5bbba1c --- /dev/null +++ b/lld/test/ELF/x86-64-tlsdesc-gd-mixed.s @@ -0,0 +1,25 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: ld.lld -shared %t.o -o %t.so +# RUN: llvm-readobj -r %t.so | FileCheck %s --check-prefix=RELA + +## FIXME Both TLSDESC and DTPMOD64/DTPOFF64 should be present. +# RELA: .rela.dyn { +# RELA-NEXT: 0x2430 R_X86_64_TLSDESC a 0x0 +# RELA-NEXT: } + +leaq a@tlsdesc(%rip), %rax +call *a@tlscall(%rax) +movl %fs:(%rax), %eax + +.byte 0x66 +leaq a@tlsgd(%rip), %rdi +.word 0x6666 +rex64 +call __tls_get_addr@PLT + +.section .tbss +.globl a +.zero 8 +a: +.zero 4 From 70912420bbc39e0cf486a933182d910bfd835063 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 22:36:49 -0800 Subject: [PATCH 025/992] [ELF] Move TLS dynamic relocations to postScanRelocations This temporarily increases sizeof(SymbolUnion), but allows us to mov GOT/PLT/etc index members outside Symbol in the future. Then, we can make TLSDESC and TLSGD use different indexes and support mixed TLSDESC and TLSGD (tested by x86-64-tlsdesc-gd-mixed.s). Note: needsTlsGd and needsTlsGdToIe may optionally be combined. Test updates are due to reordered GOT entries. --- lld/ELF/Relocations.cpp | 111 ++++++++++++++++++------------- lld/ELF/Symbols.h | 15 +++-- lld/test/ELF/i386-tls-dynamic.s | 22 +++--- lld/test/ELF/i386-tlsdesc-gd.s | 20 +++--- lld/test/ELF/riscv-tls-ld.s | 34 +++++----- lld/test/ELF/x86-64-tlsdesc-gd.s | 20 +++--- 6 files changed, 122 insertions(+), 100 deletions(-) diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 23612ec48ded..cfe49007b814 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1158,13 +1158,10 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, if (oneof(expr) && config->shared) { - if (in.got->addDynTlsEntry(sym)) { - uint64_t off = in.got->getGlobalDynOffset(sym); - mainPart->relaDyn->addAddendOnlyRelocIfNonPreemptible( - target->tlsDescRel, *in.got, off, sym, target->tlsDescRel); - } - if (expr != R_TLSDESC_CALL) + if (expr != R_TLSDESC_CALL) { + sym.needsTlsDesc = true; c.relocations.push_back({expr, type, offset, addend, &sym}); + } return 1; } @@ -1200,14 +1197,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, } if (expr == R_TLSLD_HINT) return 1; - if (in.got->addTlsIndex()) { - if (isLocalInExecutable) - in.got->relocations.push_back( - {R_ADDEND, target->symbolicRel, in.got->getTlsIndexOff(), 1, &sym}); - else - mainPart->relaDyn->addReloc( - {target->tlsModuleIndexRel, in.got, in.got->getTlsIndexOff()}); - } + sym.needsTlsLd = true; c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; } @@ -1223,12 +1213,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, // Local-Dynamic sequence where offset of tls variable relative to dynamic // thread pointer is stored in the got. This cannot be relaxed to Local-Exec. if (expr == R_TLSLD_GOT_OFF) { - if (!sym.isInGot()) { - in.got->addEntry(sym); - uint64_t off = sym.getGotOffset(); - in.got->relocations.push_back( - {R_ABS, target->tlsOffsetRel, off, 0, &sym}); - } + sym.needsGotDtprel = true; c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; } @@ -1236,27 +1221,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, if (oneof(expr)) { if (!toExecRelax) { - if (in.got->addDynTlsEntry(sym)) { - uint64_t off = in.got->getGlobalDynOffset(sym); - - if (isLocalInExecutable) - // Write one to the GOT slot. - in.got->relocations.push_back( - {R_ADDEND, target->symbolicRel, off, 1, &sym}); - else - mainPart->relaDyn->addSymbolReloc(target->tlsModuleIndexRel, *in.got, - off, sym); - - // If the symbol is preemptible we need the dynamic linker to write - // the offset too. - uint64_t offsetOff = off + config->wordsize; - if (sym.isPreemptible) - mainPart->relaDyn->addSymbolReloc(target->tlsOffsetRel, *in.got, - offsetOff, sym); - else - in.got->relocations.push_back( - {R_ABS, target->tlsOffsetRel, offsetOff, 0, &sym}); - } + sym.needsTlsGd = true; c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; } @@ -1264,14 +1229,10 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, // Global-Dynamic relocs can be relaxed to Initial-Exec or Local-Exec // depending on the symbol being locally defined or not. if (sym.isPreemptible) { + sym.needsTlsGdToIe = true; c.relocations.push_back( {target->adjustTlsExpr(type, R_RELAX_TLS_GD_TO_IE), type, offset, addend, &sym}); - if (!sym.isInGot()) { - in.got->addEntry(sym); - mainPart->relaDyn->addSymbolReloc(target->tlsGotRel, *in.got, - sym.getGotOffset(), sym); - } } else { c.relocations.push_back( {target->adjustTlsExpr(type, R_RELAX_TLS_GD_TO_LE), type, offset, @@ -1288,8 +1249,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, c.relocations.push_back( {R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym}); } else if (expr != R_TLSIE_HINT) { - if (!sym.isInGot()) - addTpOffsetGotEntry(sym); + sym.needsTlsIe = true; // R_GOT needs a relative relocation for PIC on i386 and Hexagon. if (expr == R_GOT && config->isPic && !target->usesOnlyLowPageBits(type)) addRelativeReloc(c, offset, sym, addend, expr, type); @@ -1638,6 +1598,61 @@ void elf::postScanRelocations() { } } } + + if (!sym.isTls()) + return; + bool isLocalInExecutable = !sym.isPreemptible && !config->shared; + + if (sym.needsTlsDesc) { + in.got->addDynTlsEntry(sym); + mainPart->relaDyn->addAddendOnlyRelocIfNonPreemptible( + target->tlsDescRel, *in.got, in.got->getGlobalDynOffset(sym), sym, + target->tlsDescRel); + } + if (sym.needsTlsGd && !sym.needsTlsDesc) { + // TODO Support mixed TLSDESC and TLS GD. + in.got->addDynTlsEntry(sym); + uint64_t off = in.got->getGlobalDynOffset(sym); + if (isLocalInExecutable) + // Write one to the GOT slot. + in.got->relocations.push_back( + {R_ADDEND, target->symbolicRel, off, 1, &sym}); + else + mainPart->relaDyn->addSymbolReloc(target->tlsModuleIndexRel, *in.got, + off, sym); + + // If the symbol is preemptible we need the dynamic linker to write + // the offset too. + uint64_t offsetOff = off + config->wordsize; + if (sym.isPreemptible) + mainPart->relaDyn->addSymbolReloc(target->tlsOffsetRel, *in.got, + offsetOff, sym); + else + in.got->relocations.push_back( + {R_ABS, target->tlsOffsetRel, offsetOff, 0, &sym}); + } + if (sym.needsTlsGdToIe) { + in.got->addEntry(sym); + mainPart->relaDyn->addSymbolReloc(target->tlsGotRel, *in.got, + sym.getGotOffset(), sym); + } + + if (sym.needsTlsLd && in.got->addTlsIndex()) { + if (isLocalInExecutable) + in.got->relocations.push_back( + {R_ADDEND, target->symbolicRel, in.got->getTlsIndexOff(), 1, &sym}); + else + mainPart->relaDyn->addReloc( + {target->tlsModuleIndexRel, in.got, in.got->getTlsIndexOff()}); + } + if (sym.needsGotDtprel) { + in.got->addEntry(sym); + in.got->relocations.push_back( + {R_ABS, target->tlsOffsetRel, sym.getGotOffset(), 0, &sym}); + } + + if (sym.needsTlsIe && !sym.needsTlsGdToIe) + addTpOffsetGotEntry(sym); }; for (Symbol *sym : symtab->symbols()) fn(*sym); diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h index e5fe53c6c496..27c36eedce80 100644 --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -249,8 +249,9 @@ class Symbol { canInline(false), referenced(false), traced(false), isInIplt(false), gotInIgot(false), isPreemptible(false), used(!config->gcSections), folded(false), needsTocRestore(false), scriptDefined(false), - needsCopy(false), needsGot(false), needsPlt(false), - hasDirectReloc(false) {} + needsCopy(false), needsGot(false), needsPlt(false), needsTlsDesc(false), + needsTlsGd(false), needsTlsGdToIe(false), needsTlsLd(false), + needsGotDtprel(false), needsTlsIe(false), hasDirectReloc(false) {} public: // True if this symbol is in the Iplt sub-section of the Plt and the Igot @@ -288,6 +289,12 @@ class Symbol { // entries during postScanRelocations(); uint8_t needsGot : 1; uint8_t needsPlt : 1; + uint8_t needsTlsDesc : 1; + uint8_t needsTlsGd : 1; + uint8_t needsTlsGdToIe : 1; + uint8_t needsTlsLd : 1; + uint8_t needsGotDtprel : 1; + uint8_t needsTlsIe : 1; uint8_t hasDirectReloc : 1; // The partition whose dynamic symbol table contains this symbol's definition. @@ -493,9 +500,9 @@ union SymbolUnion { }; // It is important to keep the size of SymbolUnion small for performance and -// memory usage reasons. 72 bytes is a soft limit based on the size of Defined +// memory usage reasons. 80 bytes is a soft limit based on the size of Defined // on a 64-bit system. -static_assert(sizeof(SymbolUnion) <= 72, "SymbolUnion too large"); +static_assert(sizeof(SymbolUnion) <= 80, "SymbolUnion too large"); template struct AssertSymbol { static_assert(std::is_trivially_destructible(), diff --git a/lld/test/ELF/i386-tls-dynamic.s b/lld/test/ELF/i386-tls-dynamic.s index 07e894795cc0..d61ee5526bb1 100644 --- a/lld/test/ELF/i386-tls-dynamic.s +++ b/lld/test/ELF/i386-tls-dynamic.s @@ -66,35 +66,35 @@ addl tls1@gotntpoff(%ebx),%eax # CHECK: Relocations [ # CHECK: Section ({{.+}}) .rel.dyn { -# CHECK-NEXT: 0x2368 R_386_TLS_DTPMOD32 - +# CHECK-NEXT: 0x2370 R_386_TLS_DTPMOD32 - # CHECK-NEXT: 0x2358 R_386_TLS_DTPMOD32 tls0 # CHECK-NEXT: 0x235C R_386_TLS_DTPOFF32 tls0 -# CHECK-NEXT: 0x2370 R_386_TLS_TPOFF tls0 -# CHECK-NEXT: 0x2360 R_386_TLS_DTPMOD32 tls1 -# CHECK-NEXT: 0x2364 R_386_TLS_DTPOFF32 tls1 -# CHECK-NEXT: 0x2374 R_386_TLS_TPOFF tls1 +# CHECK-NEXT: 0x2360 R_386_TLS_TPOFF tls0 +# CHECK-NEXT: 0x2364 R_386_TLS_DTPMOD32 tls1 +# CHECK-NEXT: 0x2368 R_386_TLS_DTPOFF32 tls1 +# CHECK-NEXT: 0x236C R_386_TLS_TPOFF tls1 # CHECK-NEXT: } # DIS: Disassembly of section .text: # DIS-EMPTY: # DIS-NEXT: <_start>: ## General dynamic model: -## -4128 and -4120 are first and second GOT entries offsets. +## -4128 and -4116 are first and second GOT entries offsets. ## Each one is a pair of records. # DIS-NEXT: 1260: leal -4128(,%ebx), %eax # DIS-NEXT: 1267: calll 0x12d0 -# DIS-NEXT: 126c: leal -4120(,%ebx), %eax +# DIS-NEXT: 126c: leal -4116(,%ebx), %eax # DIS-NEXT: 1273: calll 0x12d0 ## Local dynamic model: ## -16 is a local module tls index offset. -# DIS-NEXT: 1278: leal -4112(%ebx), %eax +# DIS-NEXT: 1278: leal -4104(%ebx), %eax # DIS-NEXT: 127e: calll 0x12d0 # DIS-NEXT: 1283: leal 8(%eax), %edx -# DIS-NEXT: 1289: leal -4112(%ebx), %eax +# DIS-NEXT: 1289: leal -4104(%ebx), %eax # DIS-NEXT: 128f: calll 0x12d0 # DIS-NEXT: 1294: leal 12(%eax), %edx ## Initial exec model: # DIS-NEXT: 129a: movl %gs:0, %eax -# DIS-NEXT: 12a0: addl -4104(%ebx), %eax +# DIS-NEXT: 12a0: addl -4120(%ebx), %eax # DIS-NEXT: 12a6: movl %gs:0, %eax -# DIS-NEXT: 12ac: addl -4100(%ebx), %eax +# DIS-NEXT: 12ac: addl -4108(%ebx), %eax diff --git a/lld/test/ELF/i386-tlsdesc-gd.s b/lld/test/ELF/i386-tlsdesc-gd.s index a2fc0f8f6645..132febed2feb 100644 --- a/lld/test/ELF/i386-tlsdesc-gd.s +++ b/lld/test/ELF/i386-tlsdesc-gd.s @@ -19,18 +19,18 @@ # RUN: llvm-objdump -h -d --no-show-raw-insn %t | FileCheck --check-prefix=IE %s # GD-REL: .rel.dyn { -# GD-REL-NEXT: 0x2250 R_386_TLS_DESC - +# GD-REL-NEXT: 0x2258 R_386_TLS_DESC - # GD-REL-NEXT: 0x2248 R_386_TLS_DESC a -# GD-REL-NEXT: 0x2258 R_386_TLS_DESC c +# GD-REL-NEXT: 0x2250 R_386_TLS_DESC c # GD-REL-NEXT: } # GD-REL: Hex dump of section '.got': -# GD-REL-NEXT: 0x00002248 00000000 00000000 00000000 0b000000 -# GD-REL-NEXT: 0x00002258 00000000 00000000 +# GD-REL-NEXT: 0x00002248 00000000 00000000 00000000 00000000 +# GD-REL-NEXT: 0x00002258 00000000 0b000000 # GD-RELA: .rela.dyn { -# GD-RELA-NEXT: 0x225C R_386_TLS_DESC - 0xB +# GD-RELA-NEXT: 0x2264 R_386_TLS_DESC - 0xB # GD-RELA-NEXT: 0x2254 R_386_TLS_DESC a 0x0 -# GD-RELA-NEXT: 0x2264 R_386_TLS_DESC c 0x0 +# GD-RELA-NEXT: 0x225C R_386_TLS_DESC c 0x0 # GD-RELA-NEXT: } # GD-RELA: Hex dump of section '.got': # GD-RELA-NEXT: 0x00002254 00000000 00000000 00000000 00000000 @@ -44,14 +44,14 @@ # GD-NEXT: calll *(%eax) # GD-NEXT: movl %gs:(%eax), %eax -# &.rel.dyn[b]-.got.plt = 0x2250-0x2260 = -16 -# GD-NEXT: leal -16(%ebx), %eax +# &.rel.dyn[b]-.got.plt = 0x2258-0x2260 = -8 +# GD-NEXT: leal -8(%ebx), %eax # GD-NEXT: movl %edx, %ebx # GD-NEXT: calll *(%eax) # GD-NEXT: movl %gs:(%eax), %eax -# &.rel.dyn[c]-.got.plt = 0x2258-0x2260 = -8 -# GD-NEXT: leal -8(%ebx), %eax +# &.rel.dyn[c]-.got.plt = 0x2250-0x2260 = -16 +# GD-NEXT: leal -16(%ebx), %eax # GD-NEXT: calll *(%eax) # GD-NEXT: movl %gs:(%eax), %eax diff --git a/lld/test/ELF/riscv-tls-ld.s b/lld/test/ELF/riscv-tls-ld.s index f47964ef3e26..bc9a601a74ec 100644 --- a/lld/test/ELF/riscv-tls-ld.s +++ b/lld/test/ELF/riscv-tls-ld.s @@ -35,26 +35,26 @@ ## a@dtprel = st_value(a)-0x800 = 0xfffff808 is a link-time constant. # LD32-REL: .rela.dyn { -# LD32-REL-NEXT: 0x22B4 -# LD32-REL-NEXT: 0x22AC R_RISCV_TLS_DTPMOD32 - 0x0 +# LD32-REL-NEXT: 0x22AC +# LD32-REL-NEXT: 0x22B0 R_RISCV_TLS_DTPMOD32 - 0x0 # LD32-REL-NEXT: } # LD32-GOT: section '.got': -# LD32-GOT-NEXT: 0x000022a8 30220000 00000000 00f8ffff 00000000 +# LD32-GOT-NEXT: 0x000022a8 30220000 00000000 00000000 00f8ffff # LD64-REL: .rela.dyn { -# LD64-REL-NEXT: 0x2458 -# LD64-REL-NEXT: 0x2448 R_RISCV_TLS_DTPMOD64 - 0x0 +# LD64-REL-NEXT: 0x2448 +# LD64-REL-NEXT: 0x2450 R_RISCV_TLS_DTPMOD64 - 0x0 # LD64-REL-NEXT: } # LD64-GOT: section '.got': # LD64-GOT-NEXT: 0x00002440 50230000 00000000 00000000 00000000 -# LD64-GOT-NEXT: 0x00002450 00f8ffff ffffffff 00000000 00000000 +# LD64-GOT-NEXT: 0x00002450 00000000 00000000 00f8ffff ffffffff -## rv32: &DTPMOD(a) - . = 0x22ac - 0x11d8 = 4096*1+212 -## rv64: &DTPMOD(a) - . = 0x2448 - 0x12f8 = 4096*1+336 +## rv32: &DTPMOD(a) - . = 0x22b0 - 0x11d8 = 4096*1+216 +## rv64: &DTPMOD(a) - . = 0x2450 - 0x12f8 = 4096*1+344 # LD32: 11d8: auipc a0, 1 -# LD32-NEXT: addi a0, a0, 212 +# LD32-NEXT: addi a0, a0, 216 # LD64: 12f8: auipc a0, 1 -# LD64-NEXT: addi a0, a0, 336 +# LD64-NEXT: addi a0, a0, 344 # LD-NEXT: auipc ra, 0 # LD-NEXT: jalr 64(ra) @@ -63,18 +63,18 @@ ## a is local - its DTPMOD/DTPREL slots are link-time constants. ## a@dtpmod = 1 (main module) # LE32-GOT: section '.got': -# LE32-GOT-NEXT: 0x00012134 00000000 01000000 00f8ffff 34210100 +# LE32-GOT-NEXT: 0x00012134 00000000 34210100 01000000 00f8ffff # LE64-GOT: section '.got': -# LE64-GOT-NEXT: 0x000121e8 00000000 00000000 01000000 00000000 -# LE64-GOT-NEXT: 0x000121f8 00f8ffff ffffffff e8210100 00000000 +# LE64-GOT-NEXT: 0x000121e8 00000000 00000000 e8210100 00000000 +# LE64-GOT-NEXT: 0x000121f8 01000000 00000000 00f8ffff ffffffff -## rv32: DTPMOD(.LANCHOR0) - . = 0x12138 - 0x11114 = 4096*1+36 -## rv64: DTPMOD(.LANCHOR0) - . = 0x121f0 - 0x111c8 = 4096*1+40 +## rv32: DTPMOD(.LANCHOR0) - . = 0x1213c - 0x11114 = 4096*1+40 +## rv64: DTPMOD(.LANCHOR0) - . = 0x121f8 - 0x111c8 = 4096*1+48 # LE32: 11114: auipc a0, 1 -# LE32-NEXT: addi a0, a0, 36 +# LE32-NEXT: addi a0, a0, 40 # LE64: 111c8: auipc a0, 1 -# LE64-NEXT: addi a0, a0, 40 +# LE64-NEXT: addi a0, a0, 48 # LE-NEXT: auipc ra, 0 # LE-NEXT: jalr 24(ra) diff --git a/lld/test/ELF/x86-64-tlsdesc-gd.s b/lld/test/ELF/x86-64-tlsdesc-gd.s index f0cdf08040f3..436216e7d3d5 100644 --- a/lld/test/ELF/x86-64-tlsdesc-gd.s +++ b/lld/test/ELF/x86-64-tlsdesc-gd.s @@ -19,9 +19,9 @@ # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=IE %s # GD-RELA: .rela.dyn { -# GD-RELA-NEXT: 0x23C0 R_X86_64_TLSDESC - 0xB +# GD-RELA-NEXT: 0x23D0 R_X86_64_TLSDESC - 0xB # GD-RELA-NEXT: 0x23B0 R_X86_64_TLSDESC a 0x0 -# GD-RELA-NEXT: 0x23D0 R_X86_64_TLSDESC c 0x0 +# GD-RELA-NEXT: 0x23C0 R_X86_64_TLSDESC c 0x0 # GD-RELA-NEXT: } # GD-RELA: Hex dump of section '.got': # GD-RELA-NEXT: 0x000023b0 00000000 00000000 00000000 00000000 @@ -29,28 +29,28 @@ # GD-RELA-NEXT: 0x000023d0 00000000 00000000 00000000 00000000 # GD-REL: .rel.dyn { -# GD-REL-NEXT: 0x23A8 R_X86_64_TLSDESC - +# GD-REL-NEXT: 0x23B8 R_X86_64_TLSDESC - # GD-REL-NEXT: 0x2398 R_X86_64_TLSDESC a -# GD-REL-NEXT: 0x23B8 R_X86_64_TLSDESC c +# GD-REL-NEXT: 0x23A8 R_X86_64_TLSDESC c # GD-REL-NEXT: } # GD-REL: Hex dump of section '.got': # GD-REL-NEXT: 0x00002398 00000000 00000000 00000000 00000000 -# GD-REL-NEXT: 0x000023a8 00000000 00000000 0b000000 00000000 -# GD-REL-NEXT: 0x000023b8 00000000 00000000 00000000 00000000 +# GD-REL-NEXT: 0x000023a8 00000000 00000000 00000000 00000000 +# GD-REL-NEXT: 0x000023b8 00000000 00000000 0b000000 00000000 ## &.rela.dyn[a]-pc = 0x23B0-0x12e7 = 4297 # GD: leaq 4297(%rip), %rax # GD-NEXT: 12e7: callq *(%rax) # GD-NEXT: movl %fs:(%rax), %eax -## &.rela.dyn[b]-pc = 0x23C0-0x12f3 = 4301 -# GD-NEXT: leaq 4301(%rip), %rcx +## &.rela.dyn[b]-pc = 0x23D0-0x12f3 = 4317 +# GD-NEXT: leaq 4317(%rip), %rcx # GD-NEXT: 12f3: movq %rcx, %rax # GD-NEXT: callq *(%rax) # GD-NEXT: movl %fs:(%rax), %eax -## &.rela.dyn[c]-pc = 0x23D0-0x1302 = 4302 -# GD-NEXT: leaq 4302(%rip), %r15 +## &.rela.dyn[c]-pc = 0x23C0-0x1302 = 4286 +# GD-NEXT: leaq 4286(%rip), %r15 # GD-NEXT: 1302: movq %r15, %rax # GD-NEXT: callq *(%rax) # GD-NEXT: movl %fs:(%rax), %eax From 2d303e678152fddb88dea4199c8872223232b406 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 24 Dec 2021 23:17:53 -0800 Subject: [PATCH 026/992] Remove redundant return and continue statements (NFC) Identified with readability-redundant-control-flow. --- .../abseil/DurationFactoryScaleCheck.cpp | 1 - .../llvmlibc/ImplementationInNamespaceCheck.cpp | 1 - clang/lib/ASTMatchers/Dynamic/Marshallers.h | 1 - clang/lib/Analysis/CFG.cpp | 2 -- clang/lib/Basic/Targets/PPC.cpp | 5 +---- clang/lib/CodeGen/CGStmtOpenMP.cpp | 8 ++++---- clang/lib/CodeGen/CodeGenAction.cpp | 1 - clang/lib/Driver/ToolChains/HIPAMD.cpp | 1 - clang/lib/Format/UnwrappedLineParser.cpp | 1 - clang/lib/Sema/SemaCUDA.cpp | 1 - clang/lib/Sema/SemaCodeComplete.cpp | 1 - clang/lib/Sema/SemaType.cpp | 1 - .../clang-fuzzer/handle-llvm/handle_llvm.cpp | 2 -- lldb/include/lldb/Core/ValueObject.h | 2 +- lldb/include/lldb/Target/LanguageRuntime.h | 2 +- lldb/include/lldb/Utility/RangeMap.h | 1 - lldb/source/Core/IOHandlerCursesGUI.cpp | 15 +++++---------- lldb/source/Expression/IRExecutionUnit.cpp | 2 -- lldb/source/Expression/IRMemoryMap.cpp | 4 ---- .../Host/posix/ProcessLauncherPosixFork.cpp | 1 - lldb/source/Interpreter/CommandInterpreter.cpp | 3 --- .../MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp | 1 - .../ExpressionParser/Clang/ClangASTImporter.cpp | 2 +- .../ExpressionParser/Clang/ClangASTSource.cpp | 2 -- .../ExpressionParser/Clang/ClangASTSource.h | 2 +- .../Clang/ClangExpressionDeclMap.cpp | 2 -- .../ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp | 3 --- .../SymbolFile/NativePDB/SymbolFileNativePDB.cpp | 2 +- .../Plugins/SymbolFile/PDB/SymbolFilePDB.cpp | 1 - lldb/source/Symbol/SymbolFile.cpp | 4 +--- lldb/source/Target/ThreadPlanStack.cpp | 1 - lldb/source/Target/UnwindLLDB.cpp | 1 - .../OrcV2CBindingsVeryLazy.c | 1 - llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 1 - llvm/lib/DebugInfo/DWARF/DWARFDie.cpp | 2 -- llvm/lib/IR/Instructions.cpp | 1 - llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp | 1 - llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 -- llvm/lib/Target/AVR/AVRInstrInfo.cpp | 2 -- polly/lib/External/isl/isl_int_sioimath.h | 1 - polly/lib/Transform/ManualOptimizer.cpp | 1 - 41 files changed, 16 insertions(+), 73 deletions(-) diff --git a/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp b/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp index aa839beddac6..c9f3a7db0346 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp @@ -221,7 +221,6 @@ void DurationFactoryScaleCheck::check(const MatchFinder::MatchResult &Result) { tooling::fixit::getText(*Remainder, *Result.Context) + ")") .str()); } - return; } } // namespace abseil diff --git a/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.cpp b/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.cpp index 42b697076b35..842bf43b2c45 100644 --- a/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.cpp +++ b/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.cpp @@ -41,7 +41,6 @@ void ImplementationInNamespaceCheck::check( diag(MatchedDecl->getLocation(), "declaration must be declared within the '%0' namespace") << RequiredNamespace; - return; } } // namespace llvm_libc diff --git a/clang/lib/ASTMatchers/Dynamic/Marshallers.h b/clang/lib/ASTMatchers/Dynamic/Marshallers.h index 783fb203c408..fa9d42247e24 100644 --- a/clang/lib/ASTMatchers/Dynamic/Marshallers.h +++ b/clang/lib/ASTMatchers/Dynamic/Marshallers.h @@ -1035,7 +1035,6 @@ class MapAnyOfBuilderDescriptor : public MatcherDescriptor { void getArgKinds(ASTNodeKind ThisKind, unsigned, std::vector &ArgKinds) const override { ArgKinds.push_back(ArgKind::MakeNodeArg(ThisKind)); - return; } bool isConvertibleTo(ASTNodeKind Kind, unsigned *Specificity = nullptr, ASTNodeKind *LeastDerivedKind = nullptr) const override { diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp index abf65e3efce9..9ef3b5b6277a 100644 --- a/clang/lib/Analysis/CFG.cpp +++ b/clang/lib/Analysis/CFG.cpp @@ -1820,8 +1820,6 @@ void CFGBuilder::addScopesEnd(LocalScope::const_iterator B, for (VarDecl *VD : llvm::reverse(DeclsWithEndedScope)) appendScopeEnd(Block, VD, S); - - return; } /// addAutomaticObjDtors - Add to current block automatic objects destructors diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index c3c61ed443ca..7f7b44b658eb 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -623,14 +623,11 @@ void PPCTargetInfo::addP10SpecificFeatures( Features["pcrelative-memops"] = true; Features["prefix-instrs"] = true; Features["isa-v31-instructions"] = true; - return; } // Add features specific to the "Future" CPU. void PPCTargetInfo::addFutureSpecificFeatures( - llvm::StringMap &Features) const { - return; -} + llvm::StringMap &Features) const {} bool PPCTargetInfo::hasFeature(StringRef Feature) const { return llvm::StringSwitch(Feature) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 564c3a591f16..4c11f7d67534 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -4301,10 +4301,10 @@ class CheckVarsEscapingUntiedTaskDeclContext final PrivateDecls.push_back(VD); } } - void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; } - void VisitCapturedStmt(const CapturedStmt *) { return; } - void VisitLambdaExpr(const LambdaExpr *) { return; } - void VisitBlockExpr(const BlockExpr *) { return; } + void VisitOMPExecutableDirective(const OMPExecutableDirective *) {} + void VisitCapturedStmt(const CapturedStmt *) {} + void VisitLambdaExpr(const LambdaExpr *) {} + void VisitBlockExpr(const BlockExpr *) {} void VisitStmt(const Stmt *S) { if (!S) return; diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 52c54d3c7a72..b72b16cf2b5f 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -571,7 +571,6 @@ void BackendConsumer::SrcMgrDiagHandler(const llvm::DiagnosticInfoSrcMgr &DI) { // If Loc is invalid, we still need to report the issue, it just gets no // location info. Diags.Report(Loc, DiagID).AddString(Message); - return; } bool diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index c08f825e0cb9..6d553791b394 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -365,5 +365,4 @@ void HIPAMDToolChain::checkTargetID( getDriver().Diag(clang::diag::err_drv_bad_target_id) << PTID.OptionalTargetID.getValue(); } - return; } diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 4f300bb63a42..b6e55aab708f 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -1138,7 +1138,6 @@ void UnwrappedLineParser::parseModuleImport() { } addUnwrappedLine(); - return; } // readTokenWithJavaScriptASI reads the next token and terminates the current diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index 840b3daae63c..59601c5ce79d 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -886,7 +886,6 @@ void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, diag::warn_maybe_capture_bad_target_this_ptr, Callee, *this); } - return; } void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index 083a67db7a91..93c07ccc891f 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -569,7 +569,6 @@ void PreferredTypeBuilder::enterMemAccess(Sema &S, SourceLocation Tok, return; // Keep the expected type, only update the location. ExpectedLoc = Tok; - return; } void PreferredTypeBuilder::enterUnary(Sema &S, SourceLocation Tok, diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 0607d3a774aa..7a038301a249 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -6541,7 +6541,6 @@ static void HandleBTFTypeTagAttribute(QualType &Type, const ParsedAttr &Attr, StringRef BTFTypeTag = StrLiteral->getString(); Type = State.getAttributedType( ::new (Ctx) BTFTypeTagAttr(Ctx, Attr, BTFTypeTag), Type, Type); - return; } /// HandleAddressSpaceTypeAttribute - Process an address_space attribute on the diff --git a/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp b/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp index bf694e575a9d..aa38b5e4aa26 100644 --- a/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp +++ b/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp @@ -227,6 +227,4 @@ void clang_fuzzer::HandleLLVM(const std::string &IR, if (memcmp(OptArrays, UnoptArrays, kTotalSize)) ErrorAndExit("!!!BUG!!!"); - - return; } diff --git a/lldb/include/lldb/Core/ValueObject.h b/lldb/include/lldb/Core/ValueObject.h index 5f1cbc65b320..192149f05436 100644 --- a/lldb/include/lldb/Core/ValueObject.h +++ b/lldb/include/lldb/Core/ValueObject.h @@ -1000,7 +1000,7 @@ class ValueObject { void SetPreferredDisplayLanguageIfNeeded(lldb::LanguageType); protected: - virtual void DoUpdateChildrenAddressType(ValueObject &valobj) { return; }; + virtual void DoUpdateChildrenAddressType(ValueObject &valobj){}; private: virtual CompilerType MaybeCalculateCompleteType(); diff --git a/lldb/include/lldb/Target/LanguageRuntime.h b/lldb/include/lldb/Target/LanguageRuntime.h index 2f95c2643318..ba96d080f908 100644 --- a/lldb/include/lldb/Target/LanguageRuntime.h +++ b/lldb/include/lldb/Target/LanguageRuntime.h @@ -141,7 +141,7 @@ class LanguageRuntime : public Runtime, public PluginInterface { return false; } - virtual void SymbolsDidLoad(const ModuleList &module_list) { return; } + virtual void SymbolsDidLoad(const ModuleList &module_list) {} virtual lldb::ThreadPlanSP GetStepThroughTrampolinePlan(Thread &thread, bool stop_others) = 0; diff --git a/lldb/include/lldb/Utility/RangeMap.h b/lldb/include/lldb/Utility/RangeMap.h index 118fdfd85fa9..422f90d807a7 100644 --- a/lldb/include/lldb/Utility/RangeMap.h +++ b/lldb/include/lldb/Utility/RangeMap.h @@ -360,7 +360,6 @@ template class RangeVector { m_entries.erase(next); } } - return; } Collection m_entries; diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp index 9122117ef5ff..60207f75b7df 100644 --- a/lldb/source/Core/IOHandlerCursesGUI.cpp +++ b/lldb/source/Core/IOHandlerCursesGUI.cpp @@ -1034,7 +1034,7 @@ class FieldDelegate { // navigates to the next or the previous field. This is particularly useful to // do in-field validation and error setting. Fields with internal navigation // should call this method on their fields. - virtual void FieldDelegateExitCallback() { return; } + virtual void FieldDelegateExitCallback() {} // Fields may have internal navigation, for instance, a List Field have // multiple internal elements, which needs to be navigated. To allow for this @@ -1055,10 +1055,10 @@ class FieldDelegate { virtual bool FieldDelegateOnLastOrOnlyElement() { return true; } // Select the first element in the field if multiple elements exists. - virtual void FieldDelegateSelectFirstElement() { return; } + virtual void FieldDelegateSelectFirstElement() {} // Select the last element in the field if multiple elements exists. - virtual void FieldDelegateSelectLastElement() { return; } + virtual void FieldDelegateSelectLastElement() {} // Returns true if the field has an error, false otherwise. virtual bool FieldDelegateHasError() { return false; } @@ -2000,7 +2000,6 @@ template class ListFieldDelegate : public FieldDelegate { void FieldDelegateSelectLastElement() override { m_selection_type = SelectionType::NewButton; - return; } int GetNumberOfFields() { return m_fields.size(); } @@ -2292,7 +2291,7 @@ class FormDelegate { virtual std::string GetName() = 0; - virtual void UpdateFieldsVisibility() { return; } + virtual void UpdateFieldsVisibility() {} FieldDelegate *GetField(uint32_t field_index) { if (field_index < m_fields.size()) @@ -3765,13 +3764,11 @@ class SearcherWindowDelegate : public WindowDelegate { void SelectNext() { if (m_selected_match != m_delegate_sp->GetNumberOfMatches() - 1) m_selected_match++; - return; } void SelectPrevious() { if (m_selected_match != 0) m_selected_match--; - return; } void ExecuteCallback(Window &window) { @@ -4608,9 +4605,7 @@ class TreeDelegate { virtual void TreeDelegateDrawTreeItem(TreeItem &item, Window &window) = 0; virtual void TreeDelegateGenerateChildren(TreeItem &item) = 0; virtual void TreeDelegateUpdateSelection(TreeItem &root, int &selection_index, - TreeItem *&selected_item) { - return; - } + TreeItem *&selected_item) {} // This is invoked when a tree item is selected. If true is returned, the // views are updated. virtual bool TreeDelegateItemSelected(TreeItem &item) = 0; diff --git a/lldb/source/Expression/IRExecutionUnit.cpp b/lldb/source/Expression/IRExecutionUnit.cpp index f2d22f7ed9cc..3c4a87c27e20 100644 --- a/lldb/source/Expression/IRExecutionUnit.cpp +++ b/lldb/source/Expression/IRExecutionUnit.cpp @@ -483,8 +483,6 @@ void IRExecutionUnit::GetRunnableInfo(Status &error, lldb::addr_t &func_addr, func_addr = m_function_load_addr; func_end = m_function_end_load_addr; - - return; } IRExecutionUnit::~IRExecutionUnit() { diff --git a/lldb/source/Expression/IRMemoryMap.cpp b/lldb/source/Expression/IRMemoryMap.cpp index 4ae2724d4dd8..9eee5cf5b9a2 100644 --- a/lldb/source/Expression/IRMemoryMap.cpp +++ b/lldb/source/Expression/IRMemoryMap.cpp @@ -609,7 +609,6 @@ void IRMemoryMap::WriteScalarToMemory(lldb::addr_t process_address, error.SetErrorToGenericError(); error.SetErrorString("Couldn't write scalar: its size was zero"); } - return; } void IRMemoryMap::WritePointerToMemory(lldb::addr_t process_address, @@ -757,7 +756,6 @@ void IRMemoryMap::ReadScalarFromMemory(Scalar &scalar, error.SetErrorToGenericError(); error.SetErrorString("Couldn't read scalar: its size was zero"); } - return; } void IRMemoryMap::ReadPointerFromMemory(lldb::addr_t *address, @@ -773,8 +771,6 @@ void IRMemoryMap::ReadPointerFromMemory(lldb::addr_t *address, return; *address = pointer_scalar.ULongLong(); - - return; } void IRMemoryMap::GetMemoryData(DataExtractor &extractor, diff --git a/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp b/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp index 63178e6c8a7a..2f08b9fa8857 100644 --- a/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp +++ b/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp @@ -86,7 +86,6 @@ static void DupDescriptor(int error_fd, const FileSpec &file_spec, int fd, ExitWithError(error_fd, "DupDescriptor-dup2"); ::close(target_fd); - return; } [[noreturn]] static void ChildFunc(int error_fd, diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index 301bf949feef..bd03f18b47c0 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -2216,7 +2216,6 @@ void CommandInterpreter::BuildAliasCommandArgs(CommandObject *alias_cmd_obj, } result.SetStatus(eReturnStatusSuccessFinishNoResult); - return; } int CommandInterpreter::GetOptionArgumentPosition(const char *in_string) { @@ -2563,8 +2562,6 @@ void CommandInterpreter::HandleCommands(const StringList &commands, result.SetStatus(eReturnStatusSuccessFinishResult); m_debugger.SetAsyncExecution(old_async_execution); - - return; } // Make flags that we can pass into the IOHandler so our delegates can do the diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp index c3cfd0afe551..866b89f532ac 100644 --- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp +++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp @@ -242,7 +242,6 @@ void DynamicLoaderMacOSXDYLD::DoInitialImageFetch() { ReadDYLDInfoFromMemoryAndSetNotificationCallback(0x8fe00000); } } - return; } // Assume that dyld is in memory at ADDR and try to parse it's load commands diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp index 719b35689feb..6ed3cc9384f0 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp @@ -806,7 +806,7 @@ void ClangASTImporter::ForgetSource(clang::ASTContext *dst_ast, md->removeOriginsWithContext(src_ast); } -ClangASTImporter::MapCompleter::~MapCompleter() { return; } +ClangASTImporter::MapCompleter::~MapCompleter() {} llvm::Expected ClangASTImporter::ASTImporterDelegate::ImportImpl(Decl *From) { diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp index 410d8a95cb12..510352e8c173 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp @@ -513,8 +513,6 @@ void ClangASTSource::FindExternalLexicalDecls( // is consulted again when a clang::DeclContext::lookup is called. const_cast(decl_context)->setMustBuildLookupTable(); } - - return; } void ClangASTSource::FindExternalVisibleDecls(NameSearchContext &context) { diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h index 3afd1fd5f2d1..f3fec3f944a1 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h @@ -59,7 +59,7 @@ class ClangASTSource : public clang::ExternalASTSource, GetExternalCXXBaseSpecifiers(uint64_t Offset) override { return nullptr; } - void MaterializeVisibleDecls(const clang::DeclContext *DC) { return; } + void MaterializeVisibleDecls(const clang::DeclContext *DC) {} void InstallASTContext(TypeSystemClang &ast_context); diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp index 846c1597292b..4af5d41a5921 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp @@ -1954,8 +1954,6 @@ void ClangExpressionDeclMap::AddContextClassType(NameSearchContext &context, return; context.AddNamedDecl(typedef_decl); - - return; } void ClangExpressionDeclMap::AddOneType(NameSearchContext &context, diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp index 9bc40c16e5d0..af11109ae45d 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp @@ -80,8 +80,6 @@ class lldb_private::AppleObjCExternalASTSource LLDB_LOG(log, " AOEAS::CT Before:\n{1}", ClangUtil::DumpDecl(tag_decl)); LLDB_LOG(log, " AOEAS::CT After:{1}", ClangUtil::DumpDecl(tag_decl)); - - return; } void CompleteType(clang::ObjCInterfaceDecl *interface_decl) override { @@ -107,7 +105,6 @@ class lldb_private::AppleObjCExternalASTSource LLDB_LOGF(log, " [CT] After:"); LLDB_LOG(log, " [CT] {0}", ClangUtil::DumpDecl(interface_decl)); } - return; } bool layoutRecordType( diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index bf101ac1acf1..e859b1d5a86c 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -900,7 +900,7 @@ lldb::LanguageType SymbolFileNativePDB::ParseLanguage(CompileUnit &comp_unit) { return TranslateLanguage(item->m_compile_opts->getLanguage()); } -void SymbolFileNativePDB::AddSymbols(Symtab &symtab) { return; } +void SymbolFileNativePDB::AddSymbols(Symtab &symtab) {} size_t SymbolFileNativePDB::ParseFunctions(CompileUnit &comp_unit) { std::lock_guard guard(GetModuleMutex()); diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp index c0547936b666..a40b6ec9a635 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp @@ -239,7 +239,6 @@ void SymbolFilePDB::GetCompileUnitIndex( } } index = UINT32_MAX; - return; } std::unique_ptr diff --git a/lldb/source/Symbol/SymbolFile.cpp b/lldb/source/Symbol/SymbolFile.cpp index 557c55699137..b85901af4d67 100644 --- a/lldb/source/Symbol/SymbolFile.cpp +++ b/lldb/source/Symbol/SymbolFile.cpp @@ -125,9 +125,7 @@ void SymbolFile::FindFunctions(const RegularExpression ®ex, void SymbolFile::GetMangledNamesForFunction( const std::string &scope_qualified_name, - std::vector &mangled_names) { - return; -} + std::vector &mangled_names) {} void SymbolFile::FindTypes( ConstString name, const CompilerDeclContext &parent_decl_ctx, diff --git a/lldb/source/Target/ThreadPlanStack.cpp b/lldb/source/Target/ThreadPlanStack.cpp index f09583cc50cc..80634647f9e0 100644 --- a/lldb/source/Target/ThreadPlanStack.cpp +++ b/lldb/source/Target/ThreadPlanStack.cpp @@ -210,7 +210,6 @@ void ThreadPlanStack::DiscardAllPlans() { for (int i = stack_size - 1; i > 0; i--) { DiscardPlan(); } - return; } void ThreadPlanStack::DiscardConsultingControllingPlans() { diff --git a/lldb/source/Target/UnwindLLDB.cpp b/lldb/source/Target/UnwindLLDB.cpp index 047147112f3b..77dd19b04ebd 100644 --- a/lldb/source/Target/UnwindLLDB.cpp +++ b/lldb/source/Target/UnwindLLDB.cpp @@ -312,7 +312,6 @@ void UnwindLLDB::UpdateUnwindPlanForFirstFrameIfInvalid(ABI *abi) { // Restore status after calling AddOneMoreFrame m_unwind_complete = old_m_unwind_complete; m_candidate_frame = old_m_candidate_frame; - return; } bool UnwindLLDB::AddOneMoreFrame(ABI *abi) { diff --git a/llvm/examples/OrcV2Examples/OrcV2CBindingsVeryLazy/OrcV2CBindingsVeryLazy.c b/llvm/examples/OrcV2Examples/OrcV2CBindingsVeryLazy/OrcV2CBindingsVeryLazy.c index 93d36a40b12f..c557fc7795e5 100644 --- a/llvm/examples/OrcV2Examples/OrcV2CBindingsVeryLazy/OrcV2CBindingsVeryLazy.c +++ b/llvm/examples/OrcV2Examples/OrcV2CBindingsVeryLazy/OrcV2CBindingsVeryLazy.c @@ -159,7 +159,6 @@ void Materialize(void *Ctx, LLVMOrcMaterializationResponsibilityRef MR) { LLVMOrcIRTransformLayerRef IRLayer = LLVMOrcLLJITGetIRTransformLayer(J); LLVMOrcIRTransformLayerEmit(IRLayer, MR, TSM); } - return; } int main(int argc, char *argv[]) { diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 262dc16f44ab..e8a8efd5dad4 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -302,7 +302,6 @@ void LegalizerHelper::mergeMixedSubvectors(Register DstReg, appendVectorElts(AllElts, Leftover); MIRBuilder.buildMerge(DstReg, AllElts); - return; } /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs. diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 906a48d81840..5421b2d59a1b 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -89,7 +89,6 @@ static void dumpLocationList(raw_ostream &OS, const DWARFFormValue &FormValue, U->getLocationTable().dumpLocationList(&Offset, OS, U->getBaseAddress(), MRI, Ctx.getDWARFObj(), U, DumpOpts, Indent); - return; } static void dumpLocationExpr(raw_ostream &OS, const DWARFFormValue &FormValue, @@ -105,7 +104,6 @@ static void dumpLocationExpr(raw_ostream &OS, const DWARFFormValue &FormValue, Ctx.isLittleEndian(), 0); DWARFExpression(Data, U->getAddressByteSize(), U->getFormParams().Format) .print(OS, DumpOpts, MRI, U); - return; } static DWARFDie resolveReferencedType(DWARFDie D, diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index fb6105712d1a..b40fe0cfc860 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -2324,7 +2324,6 @@ bool ShuffleVectorInst::isInsertSubvectorMask(ArrayRef Mask, } Src1Elts.setBit(i); Src1Identity &= (M == (i + NumSrcElts)); - continue; } assert((Src0Elts | Src1Elts | UndefElts).isAllOnes() && "unknown shuffle elements"); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp index 301e6f6d6f42..e79ff9b597c9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp @@ -378,5 +378,4 @@ void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI, } MI.eraseFromParent(); - return; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4ce2c8a02194..1755b93538ce 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2395,8 +2395,6 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, OffsetLo->setVariableValue(MCBinaryExpr::createAnd(Offset, Mask, MCCtx)); auto *ShAmt = MCConstantExpr::create(32, MCCtx); OffsetHi->setVariableValue(MCBinaryExpr::createAShr(Offset, ShAmt, MCCtx)); - - return; } unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) { diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp index 798d08393eae..51060018a5ca 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -571,8 +571,6 @@ void AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, // See lib/CodeGen/RegisterRelaxation.cpp for details. // We end up here when a jump is too long for a RJMP instruction. BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB); - - return; } } // end of namespace llvm diff --git a/polly/lib/External/isl/isl_int_sioimath.h b/polly/lib/External/isl/isl_int_sioimath.h index a2112cd8e2fb..dc691b8a0b1e 100644 --- a/polly/lib/External/isl/isl_int_sioimath.h +++ b/polly/lib/External/isl/isl_int_sioimath.h @@ -868,7 +868,6 @@ inline void isl_sioimath_tdiv_q(isl_sioimath_ptr dst, isl_sioimath_src lhs, isl_sioimath_bigarg_src(rhs, &rhsscratch), isl_sioimath_reinit_big(dst), NULL); isl_sioimath_try_demote(dst); - return; } /* Divide lhs by an unsigned long rhs, rounding to zero (Truncate). diff --git a/polly/lib/Transform/ManualOptimizer.cpp b/polly/lib/Transform/ManualOptimizer.cpp index 2c05927582e2..ec4c584b27e2 100644 --- a/polly/lib/Transform/ManualOptimizer.cpp +++ b/polly/lib/Transform/ManualOptimizer.cpp @@ -271,7 +271,6 @@ class SearchTransformVisitor } // not a loop transformation; look for next property - continue; } } From 34558b039b3baf057851b8d39f53402608da4927 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 25 Dec 2021 00:35:41 -0800 Subject: [PATCH 027/992] [StaticAnalyzer] Remove redundant declaration isStdSmartPtr (NFC) An identical declaration is present just a couple of lines above the line being removed in this patch. Identified with readability-redundant-declaration. --- clang/lib/StaticAnalyzer/Checkers/SmartPtr.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h b/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h index 6a40f8eda5fa..b4352b450c7f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h +++ b/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h @@ -25,8 +25,6 @@ bool isStdSmartPtrCall(const CallEvent &Call); bool isStdSmartPtr(const CXXRecordDecl *RD); bool isStdSmartPtr(const Expr *E); -bool isStdSmartPtr(const CXXRecordDecl *RD); - /// Returns whether the smart pointer is null or not. bool isNullSmartPtr(const ProgramStateRef State, const MemRegion *ThisRegion); From d86e2cc2e37c9051a5cf5a4b9b3ae4b9a961ef11 Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Mon, 20 Sep 2021 00:12:21 +0700 Subject: [PATCH 028/992] [NFC] Method for evaluation of FCmpInst for constant operands New method `FCmpInst::compare` is added, which evaluates the given compare predicate for constant operands. Interface is made similar to `ICmpInst::compare`. Differential Revision: https://reviews.llvm.org/D116168 --- llvm/include/llvm/IR/Instructions.h | 4 +++ llvm/lib/IR/ConstantFold.cpp | 42 ++--------------------------- llvm/lib/IR/Instructions.cpp | 41 ++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 40 deletions(-) diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index ccf17628e265..84ebb461ebef 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -1456,6 +1456,10 @@ class FCmpInst: public CmpInst { /// static auto predicates() { return FCmpPredicates(); } + /// Return result of `LHS Pred RHS` comparison. + static bool compare(const APFloat &LHS, const APFloat &RHS, + FCmpInst::Predicate Pred); + /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::FCmp; diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index ae48d0333d67..8668fe82601c 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1801,46 +1801,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, } else if (isa(C1) && isa(C2)) { const APFloat &C1V = cast(C1)->getValueAPF(); const APFloat &C2V = cast(C2)->getValueAPF(); - APFloat::cmpResult R = C1V.compare(C2V); - switch (pred) { - default: llvm_unreachable("Invalid FCmp Predicate"); - case FCmpInst::FCMP_FALSE: return Constant::getNullValue(ResultTy); - case FCmpInst::FCMP_TRUE: return Constant::getAllOnesValue(ResultTy); - case FCmpInst::FCMP_UNO: - return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered); - case FCmpInst::FCMP_ORD: - return ConstantInt::get(ResultTy, R!=APFloat::cmpUnordered); - case FCmpInst::FCMP_UEQ: - return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered || - R==APFloat::cmpEqual); - case FCmpInst::FCMP_OEQ: - return ConstantInt::get(ResultTy, R==APFloat::cmpEqual); - case FCmpInst::FCMP_UNE: - return ConstantInt::get(ResultTy, R!=APFloat::cmpEqual); - case FCmpInst::FCMP_ONE: - return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan || - R==APFloat::cmpGreaterThan); - case FCmpInst::FCMP_ULT: - return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered || - R==APFloat::cmpLessThan); - case FCmpInst::FCMP_OLT: - return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan); - case FCmpInst::FCMP_UGT: - return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered || - R==APFloat::cmpGreaterThan); - case FCmpInst::FCMP_OGT: - return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan); - case FCmpInst::FCMP_ULE: - return ConstantInt::get(ResultTy, R!=APFloat::cmpGreaterThan); - case FCmpInst::FCMP_OLE: - return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan || - R==APFloat::cmpEqual); - case FCmpInst::FCMP_UGE: - return ConstantInt::get(ResultTy, R!=APFloat::cmpLessThan); - case FCmpInst::FCMP_OGE: - return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan || - R==APFloat::cmpEqual); - } + CmpInst::Predicate Predicate = CmpInst::Predicate(pred); + return ConstantInt::get(ResultTy, FCmpInst::compare(C1V, C2V, Predicate)); } else if (auto *C1VTy = dyn_cast(C1->getType())) { // Fast path for splatted constants. diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index b40fe0cfc860..7798af3b19b9 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -4160,6 +4160,47 @@ bool ICmpInst::compare(const APInt &LHS, const APInt &RHS, }; } +bool FCmpInst::compare(const APFloat &LHS, const APFloat &RHS, + FCmpInst::Predicate Pred) { + APFloat::cmpResult R = LHS.compare(RHS); + switch (Pred) { + default: + llvm_unreachable("Invalid FCmp Predicate"); + case FCmpInst::FCMP_FALSE: + return false; + case FCmpInst::FCMP_TRUE: + return true; + case FCmpInst::FCMP_UNO: + return R == APFloat::cmpUnordered; + case FCmpInst::FCMP_ORD: + return R != APFloat::cmpUnordered; + case FCmpInst::FCMP_UEQ: + return R == APFloat::cmpUnordered || R == APFloat::cmpEqual; + case FCmpInst::FCMP_OEQ: + return R == APFloat::cmpEqual; + case FCmpInst::FCMP_UNE: + return R != APFloat::cmpEqual; + case FCmpInst::FCMP_ONE: + return R == APFloat::cmpLessThan || R == APFloat::cmpGreaterThan; + case FCmpInst::FCMP_ULT: + return R == APFloat::cmpUnordered || R == APFloat::cmpLessThan; + case FCmpInst::FCMP_OLT: + return R == APFloat::cmpLessThan; + case FCmpInst::FCMP_UGT: + return R == APFloat::cmpUnordered || R == APFloat::cmpGreaterThan; + case FCmpInst::FCMP_OGT: + return R == APFloat::cmpGreaterThan; + case FCmpInst::FCMP_ULE: + return R != APFloat::cmpGreaterThan; + case FCmpInst::FCMP_OLE: + return R == APFloat::cmpLessThan || R == APFloat::cmpEqual; + case FCmpInst::FCMP_UGE: + return R != APFloat::cmpLessThan; + case FCmpInst::FCMP_OGE: + return R == APFloat::cmpGreaterThan || R == APFloat::cmpEqual; + } +} + CmpInst::Predicate CmpInst::getFlippedSignednessPredicate(Predicate pred) { assert(CmpInst::isRelational(pred) && "Call only with non-equality predicates!"); From 5b2e611b734cb64c5600c1c8d0a6b4881a05f874 Mon Sep 17 00:00:00 2001 From: Groverkss Date: Sat, 25 Dec 2021 22:35:57 +0530 Subject: [PATCH 029/992] [MLIR][FlatAffineConstraints][NFC] Move some static functions to be available to Presburger/ This patch moves some static functions from AffineStructures.cpp to Presburger/Utils.cpp and some to be private members of FlatAffineConstraints (which will later be moved to IntegerPolyhedron) to allow for a smoother transition for moving FlatAffineConstraints math functionality to Presburger/IntegerPolyhedron. This patch is part of a series of patches for moving math functionality to Presburger directory. Reviewed By: arjunp, bondhugula Differential Revision: https://reviews.llvm.org/D115869 --- mlir/include/mlir/Analysis/AffineStructures.h | 20 +- .../Analysis/Presburger/IntegerPolyhedron.h | 10 + mlir/include/mlir/Analysis/Presburger/Utils.h | 40 +++ mlir/lib/Analysis/AffineStructures.cpp | 231 ++---------------- mlir/lib/Analysis/Presburger/CMakeLists.txt | 2 + .../Analysis/Presburger/IntegerPolyhedron.cpp | 54 ++++ mlir/lib/Analysis/Presburger/Utils.cpp | 155 ++++++++++++ 7 files changed, 289 insertions(+), 223 deletions(-) create mode 100644 mlir/include/mlir/Analysis/Presburger/Utils.h create mode 100644 mlir/lib/Analysis/Presburger/Utils.cpp diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h index 9e7ffb125f7e..ea20edd1036f 100644 --- a/mlir/include/mlir/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Analysis/AffineStructures.h @@ -292,16 +292,6 @@ class FlatAffineConstraints : public IntegerPolyhedron { unsigned symStartPos, ArrayRef localExprs, MLIRContext *context) const; - /// Gather positions of all lower and upper bounds of the identifier at `pos`, - /// and optionally any equalities on it. In addition, the bounds are to be - /// independent of identifiers in position range [`offset`, `offset` + `num`). - void - getLowerAndUpperBoundIndices(unsigned pos, - SmallVectorImpl *lbIndices, - SmallVectorImpl *ubIndices, - SmallVectorImpl *eqIndices = nullptr, - unsigned offset = 0, unsigned num = 0) const; - /// Removes constraints that are independent of (i.e., do not have a /// coefficient) identifiers in the range [pos, pos + num). void removeIndependentConstraints(unsigned pos, unsigned num); @@ -419,6 +409,16 @@ class FlatAffineConstraints : public IntegerPolyhedron { /// Normalized each constraints by the GCD of its coefficients. void normalizeConstraintsByGCD(); + /// Searches for a constraint with a non-zero coefficient at `colIdx` in + /// equality (isEq=true) or inequality (isEq=false) constraints. + /// Returns true and sets row found in search in `rowIdx`, false otherwise. + bool findConstraintWithNonZeroAt(unsigned colIdx, bool isEq, + unsigned *rowIdx) const; + + /// Returns true if the pos^th column is all zero for both inequalities and + /// equalities. + bool isColZero(unsigned pos) const; + /// A parameter that controls detection of an unrealistic number of /// constraints. If the number of constraints is this many times the number of /// variables, we consider such a system out of line with the intended use diff --git a/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h b/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h index 933f20e44122..c7eae0cd29ee 100644 --- a/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h +++ b/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h @@ -185,6 +185,16 @@ class IntegerPolyhedron { /// Removes all equalities and inequalities. void clearConstraints(); + /// Gather positions of all lower and upper bounds of the identifier at `pos`, + /// and optionally any equalities on it. In addition, the bounds are to be + /// independent of identifiers in position range [`offset`, `offset` + `num`). + void + getLowerAndUpperBoundIndices(unsigned pos, + SmallVectorImpl *lbIndices, + SmallVectorImpl *ubIndices, + SmallVectorImpl *eqIndices = nullptr, + unsigned offset = 0, unsigned num = 0) const; + protected: /// Return the index at which the specified kind of id starts. unsigned getIdKindOffset(IdKind kind) const; diff --git a/mlir/include/mlir/Analysis/Presburger/Utils.h b/mlir/include/mlir/Analysis/Presburger/Utils.h new file mode 100644 index 000000000000..0b9c2c707d3e --- /dev/null +++ b/mlir/include/mlir/Analysis/Presburger/Utils.h @@ -0,0 +1,40 @@ +//===- Utils.h - General utilities for Presburger library ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utility functions required by the Presburger Library. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_ANALYSIS_PRESBURGER_UTILS_H +#define MLIR_ANALYSIS_PRESBURGER_UTILS_H + +#include "mlir/Support/LLVM.h" + +namespace mlir { + +class IntegerPolyhedron; + +namespace presburger_utils { + +/// Check if the pos^th identifier can be expressed as a floordiv of an affine +/// function of other identifiers (where the divisor is a positive constant). +/// `foundRepr` contains a boolean for each identifier indicating if the +/// explicit representation for that identifier has already been computed. +/// Returns the upper and lower bound inequalities using which the floordiv +/// can be computed. If the representation could be computed, `dividend` and +/// `denominator` are set. If the representation could not be computed, +/// `llvm::None` is returned. +Optional> +computeSingleVarRepr(const IntegerPolyhedron &cst, ArrayRef foundRepr, + unsigned pos, SmallVector ÷nd, + unsigned &divisor); + +} // namespace presburger_utils +} // namespace mlir + +#endif // MLIR_ANALYSIS_PRESBURGER_UTILS_H diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index f4d857479cde..520262d6fddc 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -13,6 +13,7 @@ #include "mlir/Analysis/AffineStructures.h" #include "mlir/Analysis/LinearTransform.h" #include "mlir/Analysis/Presburger/Simplex.h" +#include "mlir/Analysis/Presburger/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" @@ -700,14 +701,13 @@ void FlatAffineValueConstraints::addAffineIfOpDomain(AffineIfOp ifOp) { // Searches for a constraint with a non-zero coefficient at `colIdx` in // equality (isEq=true) or inequality (isEq=false) constraints. // Returns true and sets row found in search in `rowIdx`, false otherwise. -static bool findConstraintWithNonZeroAt(const FlatAffineConstraints &cst, - unsigned colIdx, bool isEq, - unsigned *rowIdx) { - assert(colIdx < cst.getNumCols() && "position out of bounds"); +bool FlatAffineConstraints::findConstraintWithNonZeroAt( + unsigned colIdx, bool isEq, unsigned *rowIdx) const { + assert(colIdx < getNumCols() && "position out of bounds"); auto at = [&](unsigned rowIdx) -> int64_t { - return isEq ? cst.atEq(rowIdx, colIdx) : cst.atIneq(rowIdx, colIdx); + return isEq ? atEq(rowIdx, colIdx) : atIneq(rowIdx, colIdx); }; - unsigned e = isEq ? cst.getNumEqualities() : cst.getNumInequalities(); + unsigned e = isEq ? getNumEqualities() : getNumInequalities(); for (*rowIdx = 0; *rowIdx < e; ++(*rowIdx)) { if (at(*rowIdx) != 0) { return true; @@ -1203,145 +1203,6 @@ bool FlatAffineConstraints::containsPoint(ArrayRef point) const { return true; } -/// Check if the pos^th identifier can be represented as a division using upper -/// bound inequality at position `ubIneq` and lower bound inequality at position -/// `lbIneq`. -/// -/// Let `id` be the pos^th identifier, then `id` is equivalent to -/// `expr floordiv divisor` if there are constraints of the form: -/// 0 <= expr - divisor * id <= divisor - 1 -/// Rearranging, we have: -/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' -/// -divisor * id + expr >= 0 <-- Upper bound for 'id' -/// -/// For example: -/// 32*k >= 16*i + j - 31 <-- Lower bound for 'k' -/// 32*k <= 16*i + j <-- Upper bound for 'k' -/// expr = 16*i + j, divisor = 32 -/// k = ( 16*i + j ) floordiv 32 -/// -/// 4q >= i + j - 2 <-- Lower bound for 'q' -/// 4q <= i + j + 1 <-- Upper bound for 'q' -/// expr = i + j + 1, divisor = 4 -/// q = (i + j + 1) floordiv 4 -// -/// This function also supports detecting divisions from bounds that are -/// strictly tighter than the division bounds described above, since tighter -/// bounds imply the division bounds. For example: -/// 4q - i - j + 2 >= 0 <-- Lower bound for 'q' -/// -4q + i + j >= 0 <-- Tight upper bound for 'q' -/// -/// To extract floor divisions with tighter bounds, we assume that that the -/// constraints are of the form: -/// c <= expr - divisior * id <= divisor - 1, where 0 <= c <= divisor - 1 -/// Rearranging, we have: -/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' -/// -divisor * id + expr - c >= 0 <-- Upper bound for 'id' -/// -/// If successful, `expr` is set to dividend of the division and `divisor` is -/// set to the denominator of the division. -static LogicalResult getDivRepr(const FlatAffineConstraints &cst, unsigned pos, - unsigned ubIneq, unsigned lbIneq, - SmallVector &expr, - unsigned &divisor) { - - assert(pos <= cst.getNumIds() && "Invalid identifier position"); - assert(ubIneq <= cst.getNumInequalities() && - "Invalid upper bound inequality position"); - assert(lbIneq <= cst.getNumInequalities() && - "Invalid upper bound inequality position"); - - // Extract divisor from the lower bound. - divisor = cst.atIneq(lbIneq, pos); - - // First, check if the constraints are opposite of each other except the - // constant term. - unsigned i = 0, e = 0; - for (i = 0, e = cst.getNumIds(); i < e; ++i) - if (cst.atIneq(ubIneq, i) != -cst.atIneq(lbIneq, i)) - break; - - if (i < e) - return failure(); - - // Then, check if the constant term is of the proper form. - // Due to the form of the upper/lower bound inequalities, the sum of their - // constants is `divisor - 1 - c`. From this, we can extract c: - int64_t constantSum = cst.atIneq(lbIneq, cst.getNumCols() - 1) + - cst.atIneq(ubIneq, cst.getNumCols() - 1); - int64_t c = divisor - 1 - constantSum; - - // Check if `c` satisfies the condition `0 <= c <= divisor - 1`. This also - // implictly checks that `divisor` is positive. - if (!(c >= 0 && c <= divisor - 1)) - return failure(); - - // The inequality pair can be used to extract the division. - // Set `expr` to the dividend of the division except the constant term, which - // is set below. - expr.resize(cst.getNumCols(), 0); - for (i = 0, e = cst.getNumIds(); i < e; ++i) - if (i != pos) - expr[i] = cst.atIneq(ubIneq, i); - - // From the upper bound inequality's form, its constant term is equal to the - // constant term of `expr`, minus `c`. From this, - // constant term of `expr` = constant term of upper bound + `c`. - expr.back() = cst.atIneq(ubIneq, cst.getNumCols() - 1) + c; - - return success(); -} - -/// Check if the pos^th identifier can be expressed as a floordiv of an affine -/// function of other identifiers (where the divisor is a positive constant). -/// `foundRepr` contains a boolean for each identifier indicating if the -/// explicit representation for that identifier has already been computed. -/// Returns the upper and lower bound inequalities using which the floordiv can -/// be computed. If the representation could be computed, `dividend` and -/// `denominator` are set. If the representation could not be computed, -/// `llvm::None` is returned. -static Optional> -computeSingleVarRepr(const FlatAffineConstraints &cst, - const SmallVector &foundRepr, unsigned pos, - SmallVector ÷nd, unsigned &divisor) { - assert(pos < cst.getNumIds() && "invalid position"); - assert(foundRepr.size() == cst.getNumIds() && - "Size of foundRepr does not match total number of variables"); - - SmallVector lbIndices, ubIndices; - cst.getLowerAndUpperBoundIndices(pos, &lbIndices, &ubIndices); - - for (unsigned ubPos : ubIndices) { - for (unsigned lbPos : lbIndices) { - // Attempt to get divison representation from ubPos, lbPos. - if (failed(getDivRepr(cst, pos, ubPos, lbPos, dividend, divisor))) - continue; - - // Check if the inequalities depend on a variable for which - // an explicit representation has not been found yet. - // Exit to avoid circular dependencies between divisions. - unsigned c, f; - for (c = 0, f = cst.getNumIds(); c < f; ++c) { - if (c == pos) - continue; - if (!foundRepr[c] && dividend[c] != 0) - break; - } - - // Expression can't be constructed as it depends on a yet unknown - // identifier. - // TODO: Visit/compute the identifiers in an order so that this doesn't - // happen. More complex but much more efficient. - if (c < f) - continue; - - return std::make_pair(ubPos, lbPos); - } - } - - return llvm::None; -} - void FlatAffineConstraints::getLocalReprs( std::vector>> &repr) const { std::vector> dividends(getNumLocalIds()); @@ -1378,8 +1239,9 @@ void FlatAffineConstraints::getLocalReprs( changed = false; for (unsigned i = 0, e = getNumLocalIds(); i < e; ++i) { if (!foundRepr[i + divOffset]) { - if (auto res = computeSingleVarRepr(*this, foundRepr, divOffset + i, - dividends[i], denominators[i])) { + if (auto res = presburger_utils::computeSingleVarRepr( + *this, foundRepr, divOffset + i, dividends[i], + denominators[i])) { foundRepr[i + divOffset] = true; repr[i] = res; changed = true; @@ -1437,11 +1299,9 @@ unsigned FlatAffineConstraints::gaussianEliminateIds(unsigned posStart, for (pivotCol = posStart; pivotCol < posLimit; ++pivotCol) { // Find a row which has a non-zero coefficient in column 'j'. unsigned pivotRow; - if (!findConstraintWithNonZeroAt(*this, pivotCol, /*isEq=*/true, - &pivotRow)) { + if (!findConstraintWithNonZeroAt(pivotCol, /*isEq=*/true, &pivotRow)) { // No pivot row in equalities with non-zero at 'pivotCol'. - if (!findConstraintWithNonZeroAt(*this, pivotCol, /*isEq=*/false, - &pivotRow)) { + if (!findConstraintWithNonZeroAt(pivotCol, /*isEq=*/false, &pivotRow)) { // If inequalities are also non-zero in 'pivotCol', it can be // eliminated. continue; @@ -1596,60 +1456,6 @@ static bool detectAsMod(const FlatAffineConstraints &cst, unsigned pos, return false; } -/// Gather all lower and upper bounds of the identifier at `pos`, and -/// optionally any equalities on it. In addition, the bounds are to be -/// independent of identifiers in position range [`offset`, `offset` + `num`). -void FlatAffineConstraints::getLowerAndUpperBoundIndices( - unsigned pos, SmallVectorImpl *lbIndices, - SmallVectorImpl *ubIndices, SmallVectorImpl *eqIndices, - unsigned offset, unsigned num) const { - assert(pos < getNumIds() && "invalid position"); - assert(offset + num < getNumCols() && "invalid range"); - - // Checks for a constraint that has a non-zero coeff for the identifiers in - // the position range [offset, offset + num) while ignoring `pos`. - auto containsConstraintDependentOnRange = [&](unsigned r, bool isEq) { - unsigned c, f; - auto cst = isEq ? getEquality(r) : getInequality(r); - for (c = offset, f = offset + num; c < f; ++c) { - if (c == pos) - continue; - if (cst[c] != 0) - break; - } - return c < f; - }; - - // Gather all lower bounds and upper bounds of the variable. Since the - // canonical form c_1*x_1 + c_2*x_2 + ... + c_0 >= 0, a constraint is a lower - // bound for x_i if c_i >= 1, and an upper bound if c_i <= -1. - for (unsigned r = 0, e = getNumInequalities(); r < e; r++) { - // The bounds are to be independent of [offset, offset + num) columns. - if (containsConstraintDependentOnRange(r, /*isEq=*/false)) - continue; - if (atIneq(r, pos) >= 1) { - // Lower bound. - lbIndices->push_back(r); - } else if (atIneq(r, pos) <= -1) { - // Upper bound. - ubIndices->push_back(r); - } - } - - // An equality is both a lower and upper bound. Record any equalities - // involving the pos^th identifier. - if (!eqIndices) - return; - - for (unsigned r = 0, e = getNumEqualities(); r < e; r++) { - if (atEq(r, pos) == 0) - continue; - if (containsConstraintDependentOnRange(r, /*isEq=*/true)) - continue; - eqIndices->push_back(r); - } -} - /// Check if the pos^th identifier can be expressed as a floordiv of an affine /// function of other identifiers (where the divisor is a positive constant) /// given the initial set of expressions in `exprs`. If it can be, the @@ -1670,7 +1476,8 @@ static bool detectAsFloorDiv(const FlatAffineConstraints &cst, unsigned pos, SmallVector dividend; unsigned divisor; - auto ulPair = computeSingleVarRepr(cst, foundRepr, pos, dividend, divisor); + auto ulPair = presburger_utils::computeSingleVarRepr(cst, foundRepr, pos, + dividend, divisor); // No upper-lower bound pair found for this var. if (!ulPair) @@ -2109,7 +1916,7 @@ void FlatAffineConstraints::getSliceBounds(unsigned offset, unsigned num, // Detect an identifier as an expression of other identifiers. unsigned idx; - if (!findConstraintWithNonZeroAt(*this, pos, /*isEq=*/true, &idx)) { + if (!findConstraintWithNonZeroAt(pos, /*isEq=*/true, &idx)) { continue; } @@ -3447,12 +3254,10 @@ void FlatAffineValueConstraints::getIneqAsAffineValueMap( vmap.reset(AffineMap::get(numDims - 1, numSyms, boundExpr), operands); } -/// Returns true if the pos^th column is all zero for both inequalities and -/// equalities.. -static bool isColZero(const FlatAffineConstraints &cst, unsigned pos) { +bool FlatAffineConstraints::isColZero(unsigned pos) const { unsigned rowPos; - return !findConstraintWithNonZeroAt(cst, pos, /*isEq=*/false, &rowPos) && - !findConstraintWithNonZeroAt(cst, pos, /*isEq=*/true, &rowPos); + return !findConstraintWithNonZeroAt(pos, /*isEq=*/false, &rowPos) && + !findConstraintWithNonZeroAt(pos, /*isEq=*/true, &rowPos); } IntegerSet FlatAffineConstraints::getAsIntegerSet(MLIRContext *context) const { @@ -3471,7 +3276,7 @@ IntegerSet FlatAffineConstraints::getAsIntegerSet(MLIRContext *context) const { SmallVector noLocalRepVars; unsigned numDimsSymbols = getNumDimAndSymbolIds(); for (unsigned i = numDimsSymbols, e = getNumIds(); i < e; ++i) { - if (!memo[i] && !isColZero(*this, /*pos=*/i)) + if (!memo[i] && !isColZero(/*pos=*/i)) noLocalRepVars.push_back(i - numDimsSymbols); } if (!noLocalRepVars.empty()) { diff --git a/mlir/lib/Analysis/Presburger/CMakeLists.txt b/mlir/lib/Analysis/Presburger/CMakeLists.txt index dd8c8d96d872..d52d4ccdb1c2 100644 --- a/mlir/lib/Analysis/Presburger/CMakeLists.txt +++ b/mlir/lib/Analysis/Presburger/CMakeLists.txt @@ -2,10 +2,12 @@ add_mlir_library(MLIRPresburger IntegerPolyhedron.cpp Matrix.cpp Simplex.cpp + Utils.cpp DEPENDS MLIRBuiltinLocationAttributesIncGen LINK_LIBS PUBLIC MLIRIR + MLIRSupport ) diff --git a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp index 3c4f512bf4fb..958f52e2625e 100644 --- a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp +++ b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp @@ -217,3 +217,57 @@ void IntegerPolyhedron::clearConstraints() { equalities.resizeVertically(0); inequalities.resizeVertically(0); } + +/// Gather all lower and upper bounds of the identifier at `pos`, and +/// optionally any equalities on it. In addition, the bounds are to be +/// independent of identifiers in position range [`offset`, `offset` + `num`). +void IntegerPolyhedron::getLowerAndUpperBoundIndices( + unsigned pos, SmallVectorImpl *lbIndices, + SmallVectorImpl *ubIndices, SmallVectorImpl *eqIndices, + unsigned offset, unsigned num) const { + assert(pos < getNumIds() && "invalid position"); + assert(offset + num < getNumCols() && "invalid range"); + + // Checks for a constraint that has a non-zero coeff for the identifiers in + // the position range [offset, offset + num) while ignoring `pos`. + auto containsConstraintDependentOnRange = [&](unsigned r, bool isEq) { + unsigned c, f; + auto cst = isEq ? getEquality(r) : getInequality(r); + for (c = offset, f = offset + num; c < f; ++c) { + if (c == pos) + continue; + if (cst[c] != 0) + break; + } + return c < f; + }; + + // Gather all lower bounds and upper bounds of the variable. Since the + // canonical form c_1*x_1 + c_2*x_2 + ... + c_0 >= 0, a constraint is a lower + // bound for x_i if c_i >= 1, and an upper bound if c_i <= -1. + for (unsigned r = 0, e = getNumInequalities(); r < e; r++) { + // The bounds are to be independent of [offset, offset + num) columns. + if (containsConstraintDependentOnRange(r, /*isEq=*/false)) + continue; + if (atIneq(r, pos) >= 1) { + // Lower bound. + lbIndices->push_back(r); + } else if (atIneq(r, pos) <= -1) { + // Upper bound. + ubIndices->push_back(r); + } + } + + // An equality is both a lower and upper bound. Record any equalities + // involving the pos^th identifier. + if (!eqIndices) + return; + + for (unsigned r = 0, e = getNumEqualities(); r < e; r++) { + if (atEq(r, pos) == 0) + continue; + if (containsConstraintDependentOnRange(r, /*isEq=*/true)) + continue; + eqIndices->push_back(r); + } +} diff --git a/mlir/lib/Analysis/Presburger/Utils.cpp b/mlir/lib/Analysis/Presburger/Utils.cpp new file mode 100644 index 000000000000..8fb9390a440e --- /dev/null +++ b/mlir/lib/Analysis/Presburger/Utils.cpp @@ -0,0 +1,155 @@ +//===- Utils.cpp - General utilities for Presburger library ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utility functions required by the Presburger Library. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis/Presburger/Utils.h" +#include "mlir/Analysis/Presburger/IntegerPolyhedron.h" +#include "mlir/Support/LogicalResult.h" + +using namespace mlir; + +/// Check if the pos^th identifier can be represented as a division using upper +/// bound inequality at position `ubIneq` and lower bound inequality at position +/// `lbIneq`. +/// +/// Let `id` be the pos^th identifier, then `id` is equivalent to +/// `expr floordiv divisor` if there are constraints of the form: +/// 0 <= expr - divisor * id <= divisor - 1 +/// Rearranging, we have: +/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' +/// -divisor * id + expr >= 0 <-- Upper bound for 'id' +/// +/// For example: +/// 32*k >= 16*i + j - 31 <-- Lower bound for 'k' +/// 32*k <= 16*i + j <-- Upper bound for 'k' +/// expr = 16*i + j, divisor = 32 +/// k = ( 16*i + j ) floordiv 32 +/// +/// 4q >= i + j - 2 <-- Lower bound for 'q' +/// 4q <= i + j + 1 <-- Upper bound for 'q' +/// expr = i + j + 1, divisor = 4 +/// q = (i + j + 1) floordiv 4 +// +/// This function also supports detecting divisions from bounds that are +/// strictly tighter than the division bounds described above, since tighter +/// bounds imply the division bounds. For example: +/// 4q - i - j + 2 >= 0 <-- Lower bound for 'q' +/// -4q + i + j >= 0 <-- Tight upper bound for 'q' +/// +/// To extract floor divisions with tighter bounds, we assume that that the +/// constraints are of the form: +/// c <= expr - divisior * id <= divisor - 1, where 0 <= c <= divisor - 1 +/// Rearranging, we have: +/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' +/// -divisor * id + expr - c >= 0 <-- Upper bound for 'id' +/// +/// If successful, `expr` is set to dividend of the division and `divisor` is +/// set to the denominator of the division. +static LogicalResult getDivRepr(const IntegerPolyhedron &cst, unsigned pos, + unsigned ubIneq, unsigned lbIneq, + SmallVector &expr, + unsigned &divisor) { + + assert(pos <= cst.getNumIds() && "Invalid identifier position"); + assert(ubIneq <= cst.getNumInequalities() && + "Invalid upper bound inequality position"); + assert(lbIneq <= cst.getNumInequalities() && + "Invalid upper bound inequality position"); + + // Extract divisor from the lower bound. + divisor = cst.atIneq(lbIneq, pos); + + // First, check if the constraints are opposite of each other except the + // constant term. + unsigned i = 0, e = 0; + for (i = 0, e = cst.getNumIds(); i < e; ++i) + if (cst.atIneq(ubIneq, i) != -cst.atIneq(lbIneq, i)) + break; + + if (i < e) + return failure(); + + // Then, check if the constant term is of the proper form. + // Due to the form of the upper/lower bound inequalities, the sum of their + // constants is `divisor - 1 - c`. From this, we can extract c: + int64_t constantSum = cst.atIneq(lbIneq, cst.getNumCols() - 1) + + cst.atIneq(ubIneq, cst.getNumCols() - 1); + int64_t c = divisor - 1 - constantSum; + + // Check if `c` satisfies the condition `0 <= c <= divisor - 1`. This also + // implictly checks that `divisor` is positive. + if (!(c >= 0 && c <= divisor - 1)) + return failure(); + + // The inequality pair can be used to extract the division. + // Set `expr` to the dividend of the division except the constant term, which + // is set below. + expr.resize(cst.getNumCols(), 0); + for (i = 0, e = cst.getNumIds(); i < e; ++i) + if (i != pos) + expr[i] = cst.atIneq(ubIneq, i); + + // From the upper bound inequality's form, its constant term is equal to the + // constant term of `expr`, minus `c`. From this, + // constant term of `expr` = constant term of upper bound + `c`. + expr.back() = cst.atIneq(ubIneq, cst.getNumCols() - 1) + c; + + return success(); +} + +/// Check if the pos^th identifier can be expressed as a floordiv of an affine +/// function of other identifiers (where the divisor is a positive constant). +/// `foundRepr` contains a boolean for each identifier indicating if the +/// explicit representation for that identifier has already been computed. +/// Returns the upper and lower bound inequalities using which the floordiv can +/// be computed. If the representation could be computed, `dividend` and +/// `denominator` are set. If the representation could not be computed, +/// `llvm::None` is returned. +Optional> presburger_utils::computeSingleVarRepr( + const IntegerPolyhedron &cst, ArrayRef foundRepr, unsigned pos, + SmallVector ÷nd, unsigned &divisor) { + assert(pos < cst.getNumIds() && "invalid position"); + assert(foundRepr.size() == cst.getNumIds() && + "Size of foundRepr does not match total number of variables"); + + SmallVector lbIndices, ubIndices; + cst.getLowerAndUpperBoundIndices(pos, &lbIndices, &ubIndices); + + for (unsigned ubPos : ubIndices) { + for (unsigned lbPos : lbIndices) { + // Attempt to get divison representation from ubPos, lbPos. + if (failed(getDivRepr(cst, pos, ubPos, lbPos, dividend, divisor))) + continue; + + // Check if the inequalities depend on a variable for which + // an explicit representation has not been found yet. + // Exit to avoid circular dependencies between divisions. + unsigned c, f; + for (c = 0, f = cst.getNumIds(); c < f; ++c) { + if (c == pos) + continue; + if (!foundRepr[c] && dividend[c] != 0) + break; + } + + // Expression can't be constructed as it depends on a yet unknown + // identifier. + // TODO: Visit/compute the identifiers in an order so that this doesn't + // happen. More complex but much more efficient. + if (c < f) + continue; + + return std::make_pair(ubPos, lbPos); + } + } + + return llvm::None; +} From 46cdcf08730012128173cd261767a7d12898c8d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Sat, 25 Dec 2021 19:37:45 +0000 Subject: [PATCH 030/992] [lldb] Add support for UTF-8 unicode formatting This patch adds missing formatting for UTF-8 unicode. Cross-referencing https://reviews.llvm.org/D66447 Reviewed By: labath Differential Revision: https://reviews.llvm.org/D112564 --- lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp | 3 +++ .../data-formatter/builtin-formats/TestBuiltinFormats.py | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 0df95594eea2..88c3aedb4c6b 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -5149,6 +5149,8 @@ lldb::Format TypeSystemClang::GetFormat(lldb::opaque_compiler_type_t type) { case clang::BuiltinType::UChar: case clang::BuiltinType::WChar_U: return lldb::eFormatChar; + case clang::BuiltinType::Char8: + return lldb::eFormatUnicode8; case clang::BuiltinType::Char16: return lldb::eFormatUnicode16; case clang::BuiltinType::Char32: @@ -8957,6 +8959,7 @@ bool TypeSystemClang::DumpTypeValue( case eFormatCharPrintable: case eFormatCharArray: case eFormatBytes: + case eFormatUnicode8: case eFormatBytesWithASCII: item_count = byte_size; byte_size = 1; diff --git a/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py b/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py index c894b80228cf..7763305b58db 100644 --- a/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py +++ b/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py @@ -115,8 +115,7 @@ def test(self): self.assertIn('= \\0\\e90zaZA\\v\\t\\r\\n\\f\\b\\a \n', self.getFormatted("character array", string_expr)) self.assertIn('= \\0\\e90zaZA\\v\\t\\r\\n\\f\\b\\a \n', self.getFormatted("character", string_expr)) self.assertIn('= ..90zaZA....... \n', self.getFormatted("printable character", string_expr)) - # FIXME: This should probably print the characters in the uint128_t. - self.assertIn('= 0x2007080c0a0d090b415a617a30391b00\n', self.getFormatted("unicode8", string_expr)) + self.assertIn('= 0x00 0x1b 0x39 0x30 0x7a 0x61 0x5a 0x41 0x0b 0x09 0x0d 0x0a 0x0c 0x08 0x07 0x20\n', self.getFormatted("unicode8", string_expr)) # OSType ostype_expr = "(__UINT64_TYPE__)0x" @@ -137,6 +136,9 @@ def test(self): # bytes with ASCII self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("bytes with ASCII", "cstring")) + # unicode8 + self.assertIn('= 0x78 0x56 0x34 0x12\n', self.getFormatted("unicode8", "0x12345678")) + # unicode16 self.assertIn('= U+5678 U+1234\n', self.getFormatted("unicode16", "0x12345678")) From 0c553cc1af2e4c14100df6cf4a6fc91987e778e6 Mon Sep 17 00:00:00 2001 From: Arjun P Date: Sun, 26 Dec 2021 01:48:19 +0530 Subject: [PATCH 031/992] [MLIR] Add forgotten directory Support to unittests cmake The Support directory was removed from the unittests cmake when the directory was removed in 204c3b551626a925dfdc3822a6f240bdc8ef5d3a. Subsequent commits added the directory back but seem to have missed adding it back to the cmake. This patch also removes MLIRSupportIndentedStream from the list of linked libraries to avoid an ODR violation (it's already part of MLIRSupport which is also being linked here). Otherwise ASAN complains: ``` ================================================================= ==102592==ERROR: AddressSanitizer: odr-violation (0x7fbdf214eee0): [1] size=120 'vtable for mlir::raw_indented_ostream' /home/arjun/llvm-project/mlir/lib/Support/IndentedOstream.cpp [2] size=120 'vtable for mlir::raw_indented_ostream' /home/arjun/llvm-project/mlir/lib/Support/IndentedOstream.cpp These globals were registered at these points: [1]: #0 0x28a71d in __asan_register_globals (/home/arjun/llvm-project/build/tools/mlir/unittests/Support/MLIRSupportTests+0x28a71d) #1 0x7fbdf214a61b in asan.module_ctor (/home/arjun/llvm-project/build/lib/libMLIRSupportIndentedOstream.so.14git+0x661b) [2]: #0 0x28a71d in __asan_register_globals (/home/arjun/llvm-project/build/tools/mlir/unittests/Support/MLIRSupportTests+0x28a71d) #1 0x7fbdf2061c4b in asan.module_ctor (/home/arjun/llvm-project/build/lib/libMLIRSupport.so.14git+0x11bc4b) ==102592==HINT: if you don't care about these errors you may set ASAN_OPTIONS=detect_odr_violation=0 SUMMARY AddressSanitizer: odr-violation: global 'vtable for mlir::raw_indented_ostream' at /home/arjun/llvm-project/mlir/lib/Support/IndentedOstream.cpp ==102592==ABORTING ``` Reviewed By: jpienaar Differential Revision: https://reviews.llvm.org/D116027 --- mlir/include/mlir/Support/DebugAction.h | 3 +-- mlir/unittests/CMakeLists.txt | 1 + mlir/unittests/Support/CMakeLists.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Support/DebugAction.h b/mlir/include/mlir/Support/DebugAction.h index 227d213b0dda..4dc04476b583 100644 --- a/mlir/include/mlir/Support/DebugAction.h +++ b/mlir/include/mlir/Support/DebugAction.h @@ -205,8 +205,7 @@ template class DebugAction { /// Provide classof to allow casting between handler types. static bool classof(const DebugActionManager::HandlerBase *handler) { - return handler->getHandlerID() == - TypeID::get::Handler>(); + return handler->getHandlerID() == TypeID::get(); } }; diff --git a/mlir/unittests/CMakeLists.txt b/mlir/unittests/CMakeLists.txt index 21506862a302..2798c443cf98 100644 --- a/mlir/unittests/CMakeLists.txt +++ b/mlir/unittests/CMakeLists.txt @@ -11,6 +11,7 @@ add_subdirectory(ExecutionEngine) add_subdirectory(Interfaces) add_subdirectory(IR) add_subdirectory(Pass) +add_subdirectory(Support) add_subdirectory(Rewrite) add_subdirectory(TableGen) add_subdirectory(Transforms) diff --git a/mlir/unittests/Support/CMakeLists.txt b/mlir/unittests/Support/CMakeLists.txt index 6616a793ec12..fd1e66205c07 100644 --- a/mlir/unittests/Support/CMakeLists.txt +++ b/mlir/unittests/Support/CMakeLists.txt @@ -7,4 +7,4 @@ add_mlir_unittest(MLIRSupportTests ) target_link_libraries(MLIRSupportTests - PRIVATE MLIRSupportIndentedOstream MLIRSupport) + PRIVATE MLIRSupport) From dd4f5d4ae57703a9b79e466e027674fbeac80d41 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 25 Dec 2021 14:23:12 -0800 Subject: [PATCH 032/992] [ELF] De-template handleTlsRelocation. NFC --- lld/ELF/Relocations.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index cfe49007b814..e0dc9a9fa3ea 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1145,10 +1145,9 @@ static unsigned handleMipsTlsRelocation(RelType type, Symbol &sym, // symbol in TLS block. // // Returns the number of relocations processed. -template -static unsigned -handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, - typename ELFT::uint offset, int64_t addend, RelExpr expr) { +static unsigned handleTlsRelocation(RelType type, Symbol &sym, + InputSectionBase &c, uint64_t offset, + int64_t addend, RelExpr expr) { if (!sym.isTls()) return 0; @@ -1354,8 +1353,8 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, getLocation(sec, sym, offset)); return; } - } else if (unsigned processed = handleTlsRelocation( - type, sym, sec, offset, addend, expr)) { + } else if (unsigned processed = + handleTlsRelocation(type, sym, sec, offset, addend, expr)) { i += (processed - 1); return; } From a00f480fe8ee5236abfe6ba89ec17f113b6fe132 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 25 Dec 2021 14:34:05 -0800 Subject: [PATCH 033/992] [ELF] scanReloc: remove unused start parameter. NFC This was once used as a workaround for detecting missing PPC64 TLSGD/TLSLD relocations produced by ancient IBM XL C/C++. --- lld/ELF/Relocations.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index e0dc9a9fa3ea..719386e2e71f 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1263,7 +1263,7 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, template static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, - RelTy *start, RelTy *end) { + RelTy *end) { const RelTy &rel = *i; uint32_t symIndex = rel.getSymbol(config->isMips64EL); Symbol &sym = sec.getFile()->getSymbol(symIndex); @@ -1470,7 +1470,7 @@ static void scanRelocs(InputSectionBase &sec, ArrayRef rels) { rels = sortRels(rels, storage); for (auto i = rels.begin(), end = rels.end(); i != end;) - scanReloc(sec, getOffset, i, rels.begin(), end); + scanReloc(sec, getOffset, i, end); // Sort relocations by offset for more efficient searching for // R_RISCV_PCREL_HI20 and R_PPC64_ADDR64. From 2bddab25dba8d4b0932dc2b6cacef13fcf8a0694 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sat, 25 Dec 2021 15:59:29 -0800 Subject: [PATCH 034/992] DebugInfo: Don't hash DIE offsets before they're computed Instead of hashing DIE offsets, hash DIE references the same as they would be when used outside of a loclist - that is, deep hash the type on first use, and hash the numbering on subsequent uses. This does produce different hashes for different type references, where it did not before (because we were hashing zero all the time - so it didn't matter what type was referenced, the hash would be identical). This also allows us to enforce that the DIE offset (& size) is not queried before it is used (which came up while investigating another bug recently). --- llvm/include/llvm/CodeGen/DIE.h | 12 ++++++++++-- llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h | 21 +++++++++++++++++---- llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp | 12 ++++++++++++ llvm/lib/CodeGen/AsmPrinter/DIEHash.h | 2 ++ llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 9 +-------- llvm/test/DebugInfo/X86/convert-loclist.ll | 2 +- 6 files changed, 43 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/CodeGen/DIE.h b/llvm/include/llvm/CodeGen/DIE.h index 9e94c401bfae..51320aea0327 100644 --- a/llvm/include/llvm/CodeGen/DIE.h +++ b/llvm/include/llvm/CodeGen/DIE.h @@ -774,8 +774,16 @@ class DIE : IntrusiveBackListNode, public DIEValueList { unsigned getAbbrevNumber() const { return AbbrevNumber; } dwarf::Tag getTag() const { return Tag; } /// Get the compile/type unit relative offset of this DIE. - unsigned getOffset() const { return Offset; } - unsigned getSize() const { return Size; } + unsigned getOffset() const { + // A real Offset can't be zero because the unit headers are at offset zero. + assert(Offset && "Offset being queried before it's been computed."); + return Offset; + } + unsigned getSize() const { + // A real Size can't be zero because it includes the non-empty abbrev code. + assert(Size && "Size being queried before it's been ocmputed."); + return Size; + } bool hasChildren() const { return ForceChildren || !Children.empty(); } void setForceChildren(bool B) { ForceChildren = B; } diff --git a/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h index 5e7db1f2f76c..7525e5865282 100644 --- a/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -33,6 +33,7 @@ class ByteStreamer { virtual void emitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0; virtual void emitULEB128(uint64_t DWord, const Twine &Comment = "", unsigned PadTo = 0) = 0; + virtual void emitDIERef(const DIE &D) = 0; }; class APByteStreamer final : public ByteStreamer { @@ -54,15 +55,21 @@ class APByteStreamer final : public ByteStreamer { AP.OutStreamer->AddComment(Comment); AP.emitULEB128(DWord, nullptr, PadTo); } + void emitDIERef(const DIE &D) override { + uint64_t Offset = D.getOffset(); + static constexpr unsigned ULEB128PadSize = 4; + assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit"); + emitULEB128(Offset, "", ULEB128PadSize); + } }; class HashingByteStreamer final : public ByteStreamer { private: DIEHash &Hash; public: - HashingByteStreamer(DIEHash &H) : Hash(H) {} - void emitInt8(uint8_t Byte, const Twine &Comment) override { - Hash.update(Byte); + HashingByteStreamer(DIEHash &H) : Hash(H) {} + void emitInt8(uint8_t Byte, const Twine &Comment) override { + Hash.update(Byte); } void emitSLEB128(uint64_t DWord, const Twine &Comment) override { Hash.addSLEB128(DWord); @@ -71,6 +78,7 @@ class HashingByteStreamer final : public ByteStreamer { unsigned PadTo) override { Hash.addULEB128(DWord); } + void emitDIERef(const DIE &D) override { Hash.hashRawTypeReference(D); } }; class BufferByteStreamer final : public ByteStreamer { @@ -115,9 +123,14 @@ class BufferByteStreamer final : public ByteStreamer { // with each other. for (size_t i = 1; i < Length; ++i) Comments.push_back(""); - } } + void emitDIERef(const DIE &D) override { + uint64_t Offset = D.getOffset(); + static constexpr unsigned ULEB128PadSize = 4; + assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit"); + emitULEB128(Offset, "", ULEB128PadSize); + } }; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 5f4ee747fcca..b7b26199956a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -207,6 +207,18 @@ void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, computeHash(Entry); } +void DIEHash::hashRawTypeReference(const DIE &Entry) { + unsigned &DieNumber = Numbering[&Entry]; + if (DieNumber) { + addULEB128('R'); + addULEB128(DieNumber); + return; + } + DieNumber = Numbering.size(); + addULEB128('T'); + computeHash(Entry); +} + // Hash all of the values in a block like set of values. This assumes that // all of the data is going to be added as integers. void DIEHash::hashBlockData(const DIE::const_value_range &Values) { diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/llvm/lib/CodeGen/AsmPrinter/DIEHash.h index 29e1da4c5d60..24a973b39271 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.h @@ -62,6 +62,8 @@ class DIEHash { /// Encodes and adds \param Value to the hash as a SLEB128. void addSLEB128(int64_t Value); + void hashRawTypeReference(const DIE &Entry); + private: /// Adds \param Str to the hash and includes a NULL byte. void addString(StringRef Str); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 48134f1fd774..b129aa171669 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2539,14 +2539,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, if (Op.getDescription().Op[I] == Encoding::SizeNA) continue; if (Op.getDescription().Op[I] == Encoding::BaseTypeRef) { - uint64_t Offset = - CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die->getOffset(); - assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit"); - Streamer.emitULEB128(Offset, "", ULEB128PadSize); - // Make sure comments stay aligned. - for (unsigned J = 0; J < ULEB128PadSize; ++J) - if (Comment != End) - Comment++; + Streamer.emitDIERef(*CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die); } else { for (uint64_t J = Offset; J < Op.getOperandEndOffset(I); ++J) Streamer.emitInt8(Data.getData()[J], Comment != End ? *(Comment++) : ""); diff --git a/llvm/test/DebugInfo/X86/convert-loclist.ll b/llvm/test/DebugInfo/X86/convert-loclist.ll index d732840d4924..56dede02c51d 100644 --- a/llvm/test/DebugInfo/X86/convert-loclist.ll +++ b/llvm/test/DebugInfo/X86/convert-loclist.ll @@ -13,7 +13,7 @@ ; often - add another IR file with a different DW_OP_convert that's otherwise ; identical and demonstrate that they have different DWO IDs. -; SPLIT: 0x00000000: Compile Unit: {{.*}} DWO_id = 0xecf2563326b0bdd3 +; SPLIT: 0x00000000: Compile Unit: {{.*}} DWO_id = 0xa6edbf487b0a7acf ; Regression testing a fairly quirky bug where instead of hashing (see above), ; extra bytes would be emitted into the output assembly in no From dabfefa490570e720249070db68cc48cdddab82c Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 26 Dec 2021 02:13:54 +0000 Subject: [PATCH 035/992] Fix clang-tidy performance-move-const-arg in DLTI Dialect (NFC) The const loop iterator was inhibiting the std::move(). --- mlir/lib/Dialect/DLTI/DLTI.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/DLTI/DLTI.cpp b/mlir/lib/Dialect/DLTI/DLTI.cpp index c35572bbf1d4..1449a2c4691f 100644 --- a/mlir/lib/Dialect/DLTI/DLTI.cpp +++ b/mlir/lib/Dialect/DLTI/DLTI.cpp @@ -201,7 +201,7 @@ combineOneSpec(DataLayoutSpecInterface spec, spec.bucketEntriesByType(newEntriesForType, newEntriesForID); // Try overwriting the old entries with the new ones. - for (const auto &kvp : newEntriesForType) { + for (auto &kvp : newEntriesForType) { if (!entriesForType.count(kvp.first)) { entriesForType[kvp.first] = std::move(kvp.second); continue; From d5e310b154351e28f56fbf3c5401da63caef2f98 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 25 Dec 2021 22:05:20 -0800 Subject: [PATCH 036/992] [ELF][test] Make some TLS tests less sensitive to addresses --- lld/test/ELF/aarch64-tls-le.s | 24 +++++++++---------- lld/test/ELF/aarch64-tlsdesc-zrel.s | 18 +++++++-------- lld/test/ELF/i386-tls-opt.s | 36 ++++++++++++++--------------- 3 files changed, 39 insertions(+), 39 deletions(-) diff --git a/lld/test/ELF/aarch64-tls-le.s b/lld/test/ELF/aarch64-tls-le.s index c43345e8e254..803818eb9e7f 100644 --- a/lld/test/ELF/aarch64-tls-le.s +++ b/lld/test/ELF/aarch64-tls-le.s @@ -1,15 +1,15 @@ # REQUIRES: aarch64 -# RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-freebsd %s -o %tmain.o -# RUN: ld.lld %tmain.o -o %tout -# RUN: llvm-objdump -d %tout | FileCheck %s -# RUN: llvm-readobj -S -r %tout | FileCheck -check-prefix=RELOC %s +# RUN: llvm-mc -filetype=obj -triple=aarch64 %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s +# RUN: llvm-readobj -S -r %t | FileCheck -check-prefix=RELOC %s #Local-Dynamic to Local-Exec relax creates no #RELOC: Relocations [ #RELOC-NEXT: ] ## Reject local-exec TLS relocations for -shared. -# RUN: not ld.lld -shared %tmain.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error: +# RUN: not ld.lld -shared %t.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error: # ERR: error: relocation R_AARCH64_TLSLE_ADD_TPREL_HI12 against v1 cannot be used with -shared # ERR: error: relocation R_AARCH64_TLSLE_ADD_TPREL_LO12_NC against v1 cannot be used with -shared @@ -27,13 +27,13 @@ _start: # TCB size = 0x16 and foo is first element from TLS register. #CHECK: Disassembly of section .text: -#CHECK: <_start>: -#CHECK: 210158: 40 d0 3b d5 mrs x0, TPIDR_EL0 -#CHECK: 21015c: 00 00 40 91 add x0, x0, #0, lsl #12 -#CHECK: 210160: 00 40 00 91 add x0, x0, #16 -#CHECK: 210164: 40 d0 3b d5 mrs x0, TPIDR_EL0 -#CHECK: 210168: 00 fc 7f 91 add x0, x0, #4095, lsl #12 -#CHECK: 21016c: 00 e0 3f 91 add x0, x0, #4088 +#CHECK: <_start>: +#CHECK-NEXT: mrs x0, TPIDR_EL0 +#CHECK-NEXT: add x0, x0, #0, lsl #12 +#CHECK-NEXT: add x0, x0, #16 +#CHECK-NEXT: mrs x0, TPIDR_EL0 +#CHECK-NEXT: add x0, x0, #4095, lsl #12 +#CHECK-NEXT: add x0, x0, #4088 .section .tbss,"awT",@nobits diff --git a/lld/test/ELF/aarch64-tlsdesc-zrel.s b/lld/test/ELF/aarch64-tlsdesc-zrel.s index f9f69c43d1a7..1b35e0d29a26 100644 --- a/lld/test/ELF/aarch64-tlsdesc-zrel.s +++ b/lld/test/ELF/aarch64-tlsdesc-zrel.s @@ -9,14 +9,14 @@ // RELA: Relocations [ // RELA-NEXT: Section (5) .rela.dyn { -// RELA-NEXT: 0x20340 R_AARCH64_TLSDESC - 0x0 -// RELA-NEXT: 0x20350 R_AARCH64_TLSDESC - 0x4 +// RELA-NEXT: 0x[[#%x,ADDR:]] R_AARCH64_TLSDESC - 0x0 +// RELA-NEXT: 0x[[#ADDR+16]] R_AARCH64_TLSDESC - 0x4 // RELA-NEXT: } // RELA-NEXT: ] // RELA-NEXT: Hex dump of section '.got': -// RELA-NEXT: 0x00020340 00000000 00000000 00000000 00000000 -// RELA-NO-ADDENDS-NEXT: 0x00020350 00000000 00000000 00000000 00000000 -// RELA-WITH-ADDENDS-NEXT: 0x00020350 00000000 00000000 04000000 00000000 +// RELA-NEXT: 0x000[[#ADDR]] 00000000 00000000 00000000 00000000 +// RELA-NO-ADDENDS-NEXT: 0x000[[#ADDR+16]] 00000000 00000000 00000000 00000000 +// RELA-WITH-ADDENDS-NEXT: 0x000[[#ADDR+16]] 00000000 00000000 04000000 00000000 /// Addend 0x4 for R_AARCH64_TLSDESC -----^ // RELA-EMPTY: @@ -24,13 +24,13 @@ // RUN: llvm-readobj -W -r -x .got %t-rel.so | FileCheck %s --check-prefix=REL // REL: Relocations [ // REL-NEXT: Section (5) .rel.dyn { -// REL-NEXT: 0x20330 R_AARCH64_TLSDESC -{{$}} -// REL-NEXT: 0x20340 R_AARCH64_TLSDESC -{{$}} +// REL-NEXT: 0x[[#%x,ADDR:]] R_AARCH64_TLSDESC -{{$}} +// REL-NEXT: 0x[[#ADDR+16]] R_AARCH64_TLSDESC -{{$}} // REL-NEXT: } // REL-NEXT: ] // REL-NEXT: Hex dump of section '.got': -// REL-NEXT: 0x00020330 00000000 00000000 00000000 00000000 -// REL-NEXT: 0x00020340 00000000 00000000 04000000 00000000 +// REL-NEXT: 0x000[[#ADDR]] 00000000 00000000 00000000 00000000 +// REL-NEXT: 0x000[[#ADDR+16]] 00000000 00000000 04000000 00000000 /// Addend 0x4 for R_AARCH64_TLSDESC -----^ // REL-EMPTY: diff --git a/lld/test/ELF/i386-tls-opt.s b/lld/test/ELF/i386-tls-opt.s index d6838cab7397..a5aad4c40936 100644 --- a/lld/test/ELF/i386-tls-opt.s +++ b/lld/test/ELF/i386-tls-opt.s @@ -1,8 +1,8 @@ // REQUIRES: x86 -// RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %s -o %t.o +// RUN: llvm-mc -filetype=obj -triple=i686 %s -o %t.o // RUN: ld.lld %t.o -o %t1 // RUN: llvm-readobj -r %t1 | FileCheck --check-prefix=NORELOC %s -// RUN: llvm-objdump -d %t1 | FileCheck --check-prefix=DISASM %s +// RUN: llvm-objdump -d --no-show-raw-insn %t1 | FileCheck --check-prefix=DISASM %s // NORELOC: Relocations [ // NORELOC-NEXT: ] @@ -11,25 +11,25 @@ // DISASM-EMPTY: // DISASM-NEXT: <_start>: // LD -> LE: -// DISASM-NEXT: 4010f4: 65 a1 00 00 00 00 movl %gs:0, %eax -// DISASM-NEXT: 4010fa: 90 nop -// DISASM-NEXT: 4010fb: 8d 74 26 00 leal (%esi,%eiz), %esi -// DISASM-NEXT: 4010ff: 8d 90 f8 ff ff ff leal -8(%eax), %edx -// DISASM-NEXT: 401105: 65 a1 00 00 00 00 movl %gs:0, %eax -// DISASM-NEXT: 40110b: 90 nop -// DISASM-NEXT: 40110c: 8d 74 26 00 leal (%esi,%eiz), %esi -// DISASM-NEXT: 401110: 8d 90 fc ff ff ff leal -4(%eax), %edx +// DISASM-NEXT: movl %gs:0, %eax +// DISASM-NEXT: nop +// DISASM-NEXT: leal (%esi,%eiz), %esi +// DISASM-NEXT: leal -8(%eax), %edx +// DISASM-NEXT: movl %gs:0, %eax +// DISASM-NEXT: nop +// DISASM-NEXT: leal (%esi,%eiz), %esi +// DISASM-NEXT: leal -4(%eax), %edx // IE -> LE: // 4294967288 == 0xFFFFFFF8 // 4294967292 == 0xFFFFFFFC -// DISASM-NEXT: 401116: 65 a1 00 00 00 00 movl %gs:0, %eax -// DISASM-NEXT: 40111c: c7 c0 f8 ff ff ff movl $4294967288, %eax -// DISASM-NEXT: 401122: 65 a1 00 00 00 00 movl %gs:0, %eax -// DISASM-NEXT: 401128: c7 c0 fc ff ff ff movl $4294967292, %eax -// DISASM-NEXT: 40112e: 65 a1 00 00 00 00 movl %gs:0, %eax -// DISASM-NEXT: 401134: 8d 80 f8 ff ff ff leal -8(%eax), %eax -// DISASM-NEXT: 40113a: 65 a1 00 00 00 00 movl %gs:0, %eax -// DISASM-NEXT: 401140: 8d 80 fc ff ff ff leal -4(%eax), %eax +// DISASM-NEXT: movl %gs:0, %eax +// DISASM-NEXT: movl $4294967288, %eax +// DISASM-NEXT: movl %gs:0, %eax +// DISASM-NEXT: movl $4294967292, %eax +// DISASM-NEXT: movl %gs:0, %eax +// DISASM-NEXT: leal -8(%eax), %eax +// DISASM-NEXT: movl %gs:0, %eax +// DISASM-NEXT: leal -4(%eax), %eax .type tls0,@object .section .tbss,"awT",@nobits .globl tls0 From 2c8ebab32eadbc749e669a6529d6a40929ae5d14 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 25 Dec 2021 23:16:26 -0800 Subject: [PATCH 037/992] [ELF] sortSymTabSymbols: change vector to SmallVector This function may take ~1% time. SmallVector is smaller (16 bytes instead of 24) and more efficient. --- lld/ELF/SyntheticSections.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index e480118f5ae9..b8775097f1fc 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -2127,12 +2127,12 @@ void SymbolTableBaseSection::sortSymTabSymbols() { // symbols, they are already naturally placed first in each group. That // happens because STT_FILE is always the first symbol in the object and hence // precede all other local symbols we add for a file. - MapVector> arr; + MapVector> arr; for (const SymbolTableEntry &s : llvm::make_range(symbols.begin(), e)) arr[s.sym->file].push_back(s); auto i = symbols.begin(); - for (std::pair> &p : arr) + for (auto &p : arr) for (SymbolTableEntry &entry : p.second) *i++ = entry; } From 20b4704da315f569d417a55313c61b792e568a07 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 25 Dec 2021 23:46:47 -0800 Subject: [PATCH 038/992] [ELF] reportRangeError: mention symbol name for non-STT_SECTION local symbols like non-global symbols --- lld/ELF/Relocations.cpp | 4 ++-- lld/test/ELF/aarch64-movw-error.s | 6 +++--- lld/test/ELF/arm-adr-err-long.s | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 719386e2e71f..5841c509aa67 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -100,11 +100,11 @@ void elf::reportRangeError(uint8_t *loc, const Relocation &rel, const Twine &v, int64_t min, uint64_t max) { ErrorPlace errPlace = getErrorPlace(loc); std::string hint; - if (rel.sym && !rel.sym->isLocal()) + if (rel.sym && !rel.sym->isSection()) hint = "; references " + lld::toString(*rel.sym); if (!errPlace.srcLoc.empty()) hint += "\n>>> referenced by " + errPlace.srcLoc; - if (rel.sym && !rel.sym->isLocal()) + if (rel.sym && !rel.sym->isSection()) hint += getDefinedLocation(*rel.sym); if (errPlace.isec && errPlace.isec->name.startswith(".debug")) diff --git a/lld/test/ELF/aarch64-movw-error.s b/lld/test/ELF/aarch64-movw-error.s index 03575b1fce26..a5bd2c9d8855 100644 --- a/lld/test/ELF/aarch64-movw-error.s +++ b/lld/test/ELF/aarch64-movw-error.s @@ -36,11 +36,11 @@ movn x0, #:prel_g1:.-0x100010000 movn x0, #:prel_g2:.-0x1000100000000 movz x0, #:tprel_g0: v1 -# CHECK: relocation R_AARCH64_TLSLE_MOVW_TPREL_G0 out of range: 65552 is not in [-65536, 65535] +# CHECK: relocation R_AARCH64_TLSLE_MOVW_TPREL_G0 out of range: 65552 is not in [-65536, 65535]; references v1 movz x0, #:tprel_g1: v2 -# CHECK: relocation R_AARCH64_TLSLE_MOVW_TPREL_G1 out of range: 4295032848 is not in [-4294967296, 4294967295] +# CHECK: relocation R_AARCH64_TLSLE_MOVW_TPREL_G1 out of range: 4295032848 is not in [-4294967296, 4294967295]; references v2 movz x0, #:tprel_g2: v3 -# CHECK: relocation R_AARCH64_TLSLE_MOVW_TPREL_G2 out of range: 281479271743496 is not in [-281474976710656, 281474976710655] +# CHECK: relocation R_AARCH64_TLSLE_MOVW_TPREL_G2 out of range: 281479271743496 is not in [-281474976710656, 281474976710655]; references v3 .section .tbss,"awT",@nobits .balign 16 diff --git a/lld/test/ELF/arm-adr-err-long.s b/lld/test/ELF/arm-adr-err-long.s index a4aa86b6eb96..0fd6beaba512 100644 --- a/lld/test/ELF/arm-adr-err-long.s +++ b/lld/test/ELF/arm-adr-err-long.s @@ -49,7 +49,7 @@ _start: .inst 0xe1c000d0 // ldrd r0, r1, [r0, #0] .reloc 32, R_ARM_ALU_PC_G0_NC, dat2 .reloc 36, R_ARM_ALU_PC_G1_NC, dat2 -// CHECK: {{.*}}.s.tmp.o:(.text.1+0x28): relocation R_ARM_LDRS_PC_G2 out of range: 4056 is not in [0, 255] +// CHECK: {{.*}}.s.tmp.o:(.text.1+0x28): relocation R_ARM_LDRS_PC_G2 out of range: 4056 is not in [0, 255]; references dat2 .reloc 40, R_ARM_LDRS_PC_G2, dat2 .section .text.2, "ax", %progbits From aabe901d57d6df4cd2786163359a7b2a7aae8c32 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 25 Dec 2021 23:59:27 -0800 Subject: [PATCH 039/992] [ELF] Remove one redundant computeBinding This does resolve the redundancy in includeInDynsym(). --- lld/ELF/SyntheticSections.cpp | 13 ++++--------- lld/ELF/Writer.cpp | 1 + 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index b8775097f1fc..86b2f33196ec 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -2116,9 +2116,8 @@ void SymbolTableBaseSection::finalizeContents() { void SymbolTableBaseSection::sortSymTabSymbols() { // Move all local symbols before global symbols. auto e = std::stable_partition( - symbols.begin(), symbols.end(), [](const SymbolTableEntry &s) { - return s.sym->isLocal() || s.sym->computeBinding() == STB_LOCAL; - }); + symbols.begin(), symbols.end(), + [](const SymbolTableEntry &s) { return s.sym->isLocal(); }); size_t numLocals = e - symbols.begin(); getParent()->info = numLocals + 1; @@ -2208,12 +2207,8 @@ template void SymbolTableSection::writeTo(uint8_t *buf) { // Set st_info and st_other. eSym->st_other = 0; - if (sym->isLocal()) { - eSym->setBindingAndType(STB_LOCAL, sym->type); - } else { - eSym->setBindingAndType(sym->computeBinding(), sym->type); - eSym->setVisibility(sym->visibility); - } + eSym->setBindingAndType(sym->binding, sym->type); + eSym->setVisibility(sym->visibility); // The 3 most significant bits of st_other are used by OpenPOWER ABI. // See getPPC64GlobalEntryToLocalEntryOffset() for more details. diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 497e56886b72..6fbb3f7bf471 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1973,6 +1973,7 @@ template void Writer::finalizeSections() { for (Symbol *sym : symtab->symbols()) { if (!sym->isUsedInRegularObj || !includeInSymtab(*sym)) continue; + sym->binding = sym->computeBinding(); if (in.symTab) in.symTab->addSymbol(sym); From 2132906836cf0618e76485c67a60305bf1557ffc Mon Sep 17 00:00:00 2001 From: Arjun P Date: Sun, 26 Dec 2021 13:59:23 +0530 Subject: [PATCH 040/992] Revert "[MLIR] Add forgotten directory Support to unittests cmake" This reverts commit 0c553cc1af2e4c14100df6cf4a6fc91987e778e6. This caused a buildbot failure (https://lab.llvm.org/buildbot#builders/197/builds/888). ``` ******************** TEST 'ScudoStandalone-Unit :: ./ScudoUnitTest-aarch64-Test/ScudoCommonTest.ResidentMemorySize' FAILED ******************** Script: -- /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/projects/compiler-rt/lib/scudo/standalone/tests/./ScudoUnitTest-aarch64-Test --gtest_filter=ScudoCommonTest.ResidentMemorySize -- Note: Google Test filter = ScudoCommonTest.ResidentMemorySize [==========] Running 1 test from 1 test suite. [----------] Global test environment set-up. [----------] 1 test from ScudoCommonTest [ RUN ] ScudoCommonTest.ResidentMemorySize /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/llvm/compiler-rt/lib/scudo/standalone/tests/common_test.cpp:49: Failure Expected: (getResidentMemorySize()) > (OnStart + Size - Threshold), actual: 707358720 vs 943153152 [ FAILED ] ScudoCommonTest.ResidentMemorySize (21709 ms) [----------] 1 test from ScudoCommonTest (21709 ms total) [----------] Global test environment tear-down [==========] 1 test from 1 test suite ran. (21709 ms total) [ PASSED ] 0 tests. [ FAILED ] 1 test, listed below: [ FAILED ] ScudoCommonTest.ResidentMemorySize 1 FAILED TEST ******************** ``` --- mlir/include/mlir/Support/DebugAction.h | 3 ++- mlir/unittests/CMakeLists.txt | 1 - mlir/unittests/Support/CMakeLists.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Support/DebugAction.h b/mlir/include/mlir/Support/DebugAction.h index 4dc04476b583..227d213b0dda 100644 --- a/mlir/include/mlir/Support/DebugAction.h +++ b/mlir/include/mlir/Support/DebugAction.h @@ -205,7 +205,8 @@ template class DebugAction { /// Provide classof to allow casting between handler types. static bool classof(const DebugActionManager::HandlerBase *handler) { - return handler->getHandlerID() == TypeID::get(); + return handler->getHandlerID() == + TypeID::get::Handler>(); } }; diff --git a/mlir/unittests/CMakeLists.txt b/mlir/unittests/CMakeLists.txt index 2798c443cf98..21506862a302 100644 --- a/mlir/unittests/CMakeLists.txt +++ b/mlir/unittests/CMakeLists.txt @@ -11,7 +11,6 @@ add_subdirectory(ExecutionEngine) add_subdirectory(Interfaces) add_subdirectory(IR) add_subdirectory(Pass) -add_subdirectory(Support) add_subdirectory(Rewrite) add_subdirectory(TableGen) add_subdirectory(Transforms) diff --git a/mlir/unittests/Support/CMakeLists.txt b/mlir/unittests/Support/CMakeLists.txt index fd1e66205c07..6616a793ec12 100644 --- a/mlir/unittests/Support/CMakeLists.txt +++ b/mlir/unittests/Support/CMakeLists.txt @@ -7,4 +7,4 @@ add_mlir_unittest(MLIRSupportTests ) target_link_libraries(MLIRSupportTests - PRIVATE MLIRSupport) + PRIVATE MLIRSupportIndentedOstream MLIRSupport) From fc15fc57e61590998984a08f3fc47145d0ef7521 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 26 Dec 2021 01:01:38 -0800 Subject: [PATCH 041/992] [Orc] Remove a redundant declaration (NFC) DebugUtils.h contains an identical declaration with a correct comment, namely: /// Render a LookupKind. raw_ostream &operator<<(raw_ostream &OS, const LookupKind &K); Identified with readability-redundant-declaration. --- llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h index 4b4472e0ac4d..7eb98dfc741e 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h @@ -73,9 +73,6 @@ raw_ostream &operator<<(raw_ostream &OS, /// Rendar a SymbolLookupFlags instance. raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupFlags &LookupFlags); -/// Render a JITDylibLookupFlags instance. -raw_ostream &operator<<(raw_ostream &OS, const LookupKind &K); - /// Render a SymbolLookupSet entry. raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupSet::value_type &KV); From 7006d34ce753a4e7f3caf71539008176ceaf6e43 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 26 Dec 2021 08:51:06 -0800 Subject: [PATCH 042/992] Ensure newlines at the end of files (NFC) --- libcxx/src/chrono_system_time_init.h | 2 +- libcxx/src/experimental/memory_resource_init_helper.h | 2 +- libcxx/src/iostream_init.h | 2 +- third-party/benchmark/src/CMakeLists.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libcxx/src/chrono_system_time_init.h b/libcxx/src/chrono_system_time_init.h index 3c5a0c33a56a..b1bdc691b385 100644 --- a/libcxx/src/chrono_system_time_init.h +++ b/libcxx/src/chrono_system_time_init.h @@ -1,2 +1,2 @@ #pragma GCC system_header -GetSystemTimeInit GetSystemTimeAsFileTimeFunc _LIBCPP_INIT_PRIORITY_MAX; \ No newline at end of file +GetSystemTimeInit GetSystemTimeAsFileTimeFunc _LIBCPP_INIT_PRIORITY_MAX; diff --git a/libcxx/src/experimental/memory_resource_init_helper.h b/libcxx/src/experimental/memory_resource_init_helper.h index 2e1cae5ecc60..56b9da685878 100644 --- a/libcxx/src/experimental/memory_resource_init_helper.h +++ b/libcxx/src/experimental/memory_resource_init_helper.h @@ -1,2 +1,2 @@ #pragma GCC system_header -_LIBCPP_SAFE_STATIC ResourceInitHelper res_init _LIBCPP_INIT_PRIORITY_MAX; \ No newline at end of file +_LIBCPP_SAFE_STATIC ResourceInitHelper res_init _LIBCPP_INIT_PRIORITY_MAX; diff --git a/libcxx/src/iostream_init.h b/libcxx/src/iostream_init.h index b0a60f42a67c..7d1bb5c2d7d8 100644 --- a/libcxx/src/iostream_init.h +++ b/libcxx/src/iostream_init.h @@ -1,2 +1,2 @@ #pragma GCC system_header -_LIBCPP_HIDDEN ios_base::Init __start_std_streams _LIBCPP_INIT_PRIORITY_MAX; \ No newline at end of file +_LIBCPP_HIDDEN ios_base::Init __start_std_streams _LIBCPP_INIT_PRIORITY_MAX; diff --git a/third-party/benchmark/src/CMakeLists.txt b/third-party/benchmark/src/CMakeLists.txt index dd82e9761bd4..e814a4e00f7c 100644 --- a/third-party/benchmark/src/CMakeLists.txt +++ b/third-party/benchmark/src/CMakeLists.txt @@ -156,4 +156,4 @@ else() DIRECTORY "${PROJECT_SOURCE_DIR}/docs/" DESTINATION ${CMAKE_INSTALL_DOCDIR}) endif() -endif() \ No newline at end of file +endif() From 0542d15211cb1fb45bcd79d485d122c69cd23f28 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 26 Dec 2021 09:39:26 -0800 Subject: [PATCH 043/992] Remove redundant string initialization (NFC) Identified with readability-redundant-string-init. --- .../clang-tidy/cert/LimitedRandomnessCheck.cpp | 2 +- clang/lib/Basic/Targets/M68k.cpp | 2 +- clang/lib/Basic/Targets/PPC.h | 2 +- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 4 ++-- clang/lib/Format/NamespaceEndCommentsFixer.cpp | 4 ++-- clang/lib/Frontend/InitHeaderSearch.cpp | 2 +- clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp | 2 +- clang/tools/clang-scan-deps/ClangScanDeps.cpp | 2 +- clang/utils/TableGen/NeonEmitter.cpp | 2 +- lldb/source/Commands/CommandCompletions.cpp | 2 +- .../source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp | 2 +- .../Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp | 2 +- .../Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp | 2 +- .../InstrumentationRuntimeMainThreadChecker.cpp | 6 +++--- .../TSan/InstrumentationRuntimeTSan.cpp | 6 +++--- .../Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp | 2 +- .../Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp | 2 +- lldb/source/Target/RegisterContextUnwind.cpp | 2 +- llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp | 2 +- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 6 +++--- 20 files changed, 28 insertions(+), 28 deletions(-) diff --git a/clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.cpp b/clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.cpp index 9733a4e7c1f5..0691787f1a90 100644 --- a/clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.cpp +++ b/clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.cpp @@ -24,7 +24,7 @@ void LimitedRandomnessCheck::registerMatchers(MatchFinder *Finder) { } void LimitedRandomnessCheck::check(const MatchFinder::MatchResult &Result) { - std::string Msg = ""; + std::string Msg; if (getLangOpts().CPlusPlus) Msg = "; use C++11 random library instead"; diff --git a/clang/lib/Basic/Targets/M68k.cpp b/clang/lib/Basic/Targets/M68k.cpp index c0cd8fa90ed6..ada5b97ed66d 100644 --- a/clang/lib/Basic/Targets/M68k.cpp +++ b/clang/lib/Basic/Targets/M68k.cpp @@ -29,7 +29,7 @@ M68kTargetInfo::M68kTargetInfo(const llvm::Triple &Triple, const TargetOptions &) : TargetInfo(Triple) { - std::string Layout = ""; + std::string Layout; // M68k is Big Endian Layout += "E"; diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 60701072ac4b..ac52eb219f54 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -414,7 +414,7 @@ class LLVM_LIBRARY_VISIBILITY PPC64TargetInfo : public PPCTargetInfo { LongWidth = LongAlign = PointerWidth = PointerAlign = 64; IntMaxType = SignedLong; Int64Type = SignedLong; - std::string DataLayout = ""; + std::string DataLayout; if (Triple.isOSAIX()) { // TODO: Set appropriate ABI for AIX platform. diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index e35c15421520..b23100d435b4 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1434,7 +1434,7 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, Loc.isInvalid()) { SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); } else { - std::string FunctionName = ""; + std::string FunctionName; if (const auto *FD = dyn_cast_or_null(CGF.CurFuncDecl)) FunctionName = FD->getQualifiedNameAsString(); PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); @@ -9540,7 +9540,7 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, Loc = MapExprs.getMapDecl()->getLocation(); } - std::string ExprName = ""; + std::string ExprName; if (MapExprs.getMapExpr()) { PrintingPolicy P(CGF.getContext().getLangOpts()); llvm::raw_string_ostream OS(ExprName); diff --git a/clang/lib/Format/NamespaceEndCommentsFixer.cpp b/clang/lib/Format/NamespaceEndCommentsFixer.cpp index 38ab5b9df76d..9c00d243f34a 100644 --- a/clang/lib/Format/NamespaceEndCommentsFixer.cpp +++ b/clang/lib/Format/NamespaceEndCommentsFixer.cpp @@ -28,7 +28,7 @@ std::string computeName(const FormatToken *NamespaceTok) { assert(NamespaceTok && NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && "expecting a namespace token"); - std::string name = ""; + std::string name; const FormatToken *Tok = NamespaceTok->getNextNonComment(); if (NamespaceTok->is(TT_NamespaceMacro)) { // Collects all the non-comment tokens between opening parenthesis @@ -224,7 +224,7 @@ std::pair NamespaceEndCommentsFixer::analyze( return {Fixes, 0}; } - std::string AllNamespaceNames = ""; + std::string AllNamespaceNames; size_t StartLineIndex = SIZE_MAX; StringRef NamespaceTokenText; unsigned int CompactedNamespacesCount = 0; diff --git a/clang/lib/Frontend/InitHeaderSearch.cpp b/clang/lib/Frontend/InitHeaderSearch.cpp index ed1314f3b03d..94ea7baa99c5 100644 --- a/clang/lib/Frontend/InitHeaderSearch.cpp +++ b/clang/lib/Frontend/InitHeaderSearch.cpp @@ -354,7 +354,7 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple, break; case llvm::Triple::PS4: { // gets prepended later in AddPath(). - std::string BaseSDKPath = ""; + std::string BaseSDKPath; if (!HasSysroot) { const char *envValue = getenv("SCE_ORBIS_SDK_DIR"); if (envValue) diff --git a/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp index cd502241ef61..cf97439a468d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp @@ -59,7 +59,7 @@ class ReturnValueChecker : public Checker { } // namespace static std::string getName(const CallEvent &Call) { - std::string Name = ""; + std::string Name; if (const auto *MD = dyn_cast(Call.getDecl())) if (const CXXRecordDecl *RD = MD->getParent()) Name += RD->getNameAsString() + "::"; diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index 86e95a75e429..4d61e37db29b 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -470,7 +470,7 @@ int main(int argc, const char **argv) { AdjustingCompilations->appendArgumentsAdjuster( [&ResourceDirCache](const tooling::CommandLineArguments &Args, StringRef FileName) { - std::string LastO = ""; + std::string LastO; bool HasResourceDir = false; bool ClangCLMode = false; auto FlagsEnd = llvm::find(Args, "--"); diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index ff552b66c0e2..af0544b54b17 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -1473,7 +1473,7 @@ Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) { Intr.Dependencies.insert(&Callee); // Now create the call itself. - std::string S = ""; + std::string S; if (!Callee.isBigEndianSafe()) S += CallPrefix.str(); S += Callee.getMangledName(true) + "("; diff --git a/lldb/source/Commands/CommandCompletions.cpp b/lldb/source/Commands/CommandCompletions.cpp index 42b0bac717bd..ff825cce813e 100644 --- a/lldb/source/Commands/CommandCompletions.cpp +++ b/lldb/source/Commands/CommandCompletions.cpp @@ -600,7 +600,7 @@ void CommandCompletions::VariablePath(CommandInterpreter &interpreter, void CommandCompletions::Registers(CommandInterpreter &interpreter, CompletionRequest &request, SearchFilter *searcher) { - std::string reg_prefix = ""; + std::string reg_prefix; if (request.GetCursorArgumentPrefix().startswith("$")) reg_prefix = "$"; diff --git a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp index 2cf32bdd3800..8c54219f0a14 100644 --- a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp +++ b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp @@ -1111,7 +1111,7 @@ DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch, triple.getSubArch() == llvm::Triple::NoSubArch) triple.setArchName("armv8.7a"); - std::string features_str = ""; + std::string features_str; const char *triple_str = triple.getTriple().c_str(); // ARM Cortex M0-M7 devices only execute thumb instructions diff --git a/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp b/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp index ea9c95c55cbb..4ef0a034b6dd 100644 --- a/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp +++ b/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp @@ -137,7 +137,7 @@ EmulateInstructionMIPS::EmulateInstructionMIPS( break; } - std::string features = ""; + std::string features; uint32_t arch_flags = arch.GetFlags(); if (arch_flags & ArchSpec::eMIPSAse_msa) features += "+msa,"; diff --git a/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp b/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp index e5732a50f3f2..26736f4c58ba 100644 --- a/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp +++ b/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp @@ -137,7 +137,7 @@ EmulateInstructionMIPS64::EmulateInstructionMIPS64( break; } - std::string features = ""; + std::string features; uint32_t arch_flags = arch.GetFlags(); if (arch_flags & ArchSpec::eMIPSAse_msa) features += "+msa,"; diff --git a/lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.cpp b/lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.cpp index dc8c7c96aa11..a5c23615309d 100644 --- a/lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.cpp +++ b/lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.cpp @@ -100,14 +100,14 @@ InstrumentationRuntimeMainThreadChecker::RetrieveReportData( if (!apiname_ptr) return StructuredData::ObjectSP(); - std::string apiName = ""; + std::string apiName; Status read_error; target.ReadCStringFromMemory(apiname_ptr, apiName, read_error); if (read_error.Fail()) return StructuredData::ObjectSP(); - std::string className = ""; - std::string selector = ""; + std::string className; + std::string selector; if (apiName.substr(0, 2) == "-[") { size_t spacePos = apiName.find(' '); if (spacePos != std::string::npos) { diff --git a/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp b/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp index aef10bb2a778..977d8e4dbe07 100644 --- a/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp +++ b/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp @@ -711,7 +711,7 @@ addr_t InstrumentationRuntimeTSan::GetMainRacyAddress( std::string InstrumentationRuntimeTSan::GetLocationDescription( StructuredData::ObjectSP report, addr_t &global_addr, std::string &global_name, std::string &filename, uint32_t &line) { - std::string result = ""; + std::string result; ProcessSP process_sp = GetProcessSP(); @@ -820,8 +820,8 @@ bool InstrumentationRuntimeTSan::NotifyBreakpointHit( report->GetAsDictionary()->AddIntegerItem("memory_address", main_address); addr_t global_addr = 0; - std::string global_name = ""; - std::string location_filename = ""; + std::string global_name; + std::string location_filename; uint32_t location_line = 0; std::string location_description = instance->GetLocationDescription( report, global_addr, global_name, location_filename, location_line); diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp index a70e6a079f76..ef419d9a89e8 100644 --- a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp +++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp @@ -115,7 +115,7 @@ Status MinidumpFileBuilder::AddSystemInfo(const llvm::Triple &target_triple) { sys_info.PlatformId = platform_id; m_data.AppendData(&sys_info, sizeof(llvm::minidump::SystemInfo)); - std::string csd_string = ""; + std::string csd_string; error = WriteString(csd_string, &m_data); if (error.Fail()) { diff --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp index 3535a5ad739d..66090c2ee1a0 100644 --- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp +++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp @@ -693,7 +693,7 @@ const UnixSignalsSP &PlatformRemoteGDBServer::GetRemoteUnixSignals() { if (object_sp && object_sp->IsValid()) notify = object_sp->GetBooleanValue(); - std::string description{""}; + std::string description; object_sp = dict->GetValueForKey("description"); if (object_sp && object_sp->IsValid()) description = std::string(object_sp->GetStringValue()); diff --git a/lldb/source/Target/RegisterContextUnwind.cpp b/lldb/source/Target/RegisterContextUnwind.cpp index 96b69640a3a3..315ccea65d1f 100644 --- a/lldb/source/Target/RegisterContextUnwind.cpp +++ b/lldb/source/Target/RegisterContextUnwind.cpp @@ -1509,7 +1509,7 @@ RegisterContextUnwind::SavedLocationForRegister( regnum.GetName(), regnum.GetAsKind(eRegisterKindLLDB)); return UnwindLLDB::RegisterSearchResult::eRegisterFound; } else { - std::string unwindplan_name(""); + std::string unwindplan_name; if (m_full_unwind_plan_sp) { unwindplan_name += "via '"; unwindplan_name += m_full_unwind_plan_sp->GetSourceName().AsCString(); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp index d91a630256d6..ee54fc754803 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp @@ -74,7 +74,7 @@ std::string DWARFAbbreviationDeclarationSet::getCodeRange() const { for (const auto &Decl : Decls) Codes.push_back(Decl.getCode()); - std::string Buffer = ""; + std::string Buffer; raw_string_ostream Stream(Buffer); // Each iteration through this loop represents a single contiguous range in // the set of codes. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index d20eaaaa65e8..98f1a91b494b 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -410,7 +410,7 @@ void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { } std::string AMDGPUTargetID::toString() const { - std::string StringRep = ""; + std::string StringRep; raw_string_ostream StreamRep(StringRep); auto TargetTriple = STI.getTargetTriple(); @@ -421,7 +421,7 @@ std::string AMDGPUTargetID::toString() const { << TargetTriple.getOSName() << '-' << TargetTriple.getEnvironmentName() << '-'; - std::string Processor = ""; + std::string Processor; // TODO: Following else statement is present here because we used various // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803'). // Remove once all aliases are removed from GCNProcessors.td. @@ -432,7 +432,7 @@ std::string AMDGPUTargetID::toString() const { Twine(Version.Stepping)) .str(); - std::string Features = ""; + std::string Features; if (Optional HsaAbiVersion = getHsaAbiVersion(&STI)) { switch (*HsaAbiVersion) { case ELF::ELFABIVERSION_AMDGPU_HSA_V2: From bf7f3dd74ee3d6c8a219ff5434df12451c9aad95 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 26 Dec 2021 12:11:40 -0800 Subject: [PATCH 044/992] [ELF] Move outSecOff addition from InputSection::writeTo to the caller Simplify the code a bit and improve consistency with SyntheticSection::writeTo. --- lld/ELF/InputSection.cpp | 21 ++++++++++----------- lld/ELF/OutputSections.cpp | 2 +- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index e1ee3def89f3..6d26e19aac48 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -1227,7 +1227,7 @@ void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *buf, template void InputSection::writeTo(uint8_t *buf) { if (auto *s = dyn_cast(this)) { - s->writeTo(buf + outSecOff); + s->writeTo(buf); return; } @@ -1236,17 +1236,17 @@ template void InputSection::writeTo(uint8_t *buf) { // If -r or --emit-relocs is given, then an InputSection // may be a relocation section. if (LLVM_UNLIKELY(type == SHT_RELA)) { - copyRelocations(buf + outSecOff, getDataAs()); + copyRelocations(buf, getDataAs()); return; } if (LLVM_UNLIKELY(type == SHT_REL)) { - copyRelocations(buf + outSecOff, getDataAs()); + copyRelocations(buf, getDataAs()); return; } // If -r is given, we may have a SHT_GROUP section. if (LLVM_UNLIKELY(type == SHT_GROUP)) { - copyShtGroup(buf + outSecOff); + copyShtGroup(buf); return; } @@ -1254,20 +1254,19 @@ template void InputSection::writeTo(uint8_t *buf) { // to the buffer. if (uncompressedSize >= 0) { size_t size = uncompressedSize; - if (Error e = zlib::uncompress(toStringRef(rawData), - (char *)(buf + outSecOff), size)) + if (Error e = zlib::uncompress(toStringRef(rawData), (char *)buf, size)) fatal(toString(this) + ": uncompress failed: " + llvm::toString(std::move(e))); - uint8_t *bufEnd = buf + outSecOff + size; - relocate(buf + outSecOff, bufEnd); + uint8_t *bufEnd = buf + size; + relocate(buf, bufEnd); return; } // Copy section contents from source object file to output file // and then apply relocations. - memcpy(buf + outSecOff, data().data(), data().size()); - uint8_t *bufEnd = buf + outSecOff + data().size(); - relocate(buf + outSecOff, bufEnd); + memcpy(buf, data().data(), data().size()); + uint8_t *bufEnd = buf + data().size(); + relocate(buf, bufEnd); } void InputSection::replace(InputSection *other) { diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index 4a03ac387814..cd53eecd8756 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -355,7 +355,7 @@ template void OutputSection::writeTo(uint8_t *buf) { parallelForEachN(0, sections.size(), [&](size_t i) { InputSection *isec = sections[i]; - isec->writeTo(buf); + isec->writeTo(buf + isec->outSecOff); // Fill gaps between sections. if (nonZeroFiller) { From 511726c64d3b6cca66f7c54d457d586aa3129f67 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 26 Dec 2021 21:17:25 +0100 Subject: [PATCH 045/992] [LV] Move getStepVector out of ILV (NFC). First step to split up induction handling and move it outside ILV. Used in D116123 and following. --- .../Transforms/Vectorize/LoopVectorize.cpp | 147 +++++++++--------- 1 file changed, 70 insertions(+), 77 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 4747f34fcc62..83f9e3f58993 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -606,14 +606,6 @@ class InnerLoopVectorizer { /// represented as. void truncateToMinimalBitwidths(VPTransformState &State); - /// This function adds - /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...) - /// to each vector element of Val. The sequence starts at StartIndex. - /// \p Opcode is relevant for FP induction variable. - virtual Value * - getStepVector(Value *Val, Value *StartIdx, Value *Step, - Instruction::BinaryOps Opcode = Instruction::BinaryOpsEnd); - /// Compute scalar induction steps. \p ScalarIV is the scalar induction /// variable on which to base the steps, \p Step is the size of the step, and /// \p EntryVal is the value from the original loop that maps to the steps. @@ -856,9 +848,6 @@ class InnerLoopUnroller : public InnerLoopVectorizer { private: Value *getBroadcastInstrs(Value *V) override; - Value *getStepVector( - Value *Val, Value *StartIdx, Value *Step, - Instruction::BinaryOps Opcode = Instruction::BinaryOpsEnd) override; Value *reverseVector(Value *Vec) override; }; @@ -2335,6 +2324,72 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) { return Shuf; } +/// This function adds +/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...) +/// to each vector element of Val. The sequence starts at StartIndex. +/// \p Opcode is relevant for FP induction variable. +static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step, + Instruction::BinaryOps BinOp, ElementCount VF, + IRBuilder<> &Builder) { + if (VF.isScalar()) { + // When unrolling and the VF is 1, we only need to add a simple scalar. + Type *Ty = Val->getType(); + assert(!Ty->isVectorTy() && "Val must be a scalar"); + + if (Ty->isFloatingPointTy()) { + // Floating-point operations inherit FMF via the builder's flags. + Value *MulOp = Builder.CreateFMul(StartIdx, Step); + return Builder.CreateBinOp(BinOp, Val, MulOp); + } + return Builder.CreateAdd(Val, Builder.CreateMul(StartIdx, Step), + "induction"); + } + + // Create and check the types. + auto *ValVTy = cast(Val->getType()); + ElementCount VLen = ValVTy->getElementCount(); + + Type *STy = Val->getType()->getScalarType(); + assert((STy->isIntegerTy() || STy->isFloatingPointTy()) && + "Induction Step must be an integer or FP"); + assert(Step->getType() == STy && "Step has wrong type"); + + SmallVector Indices; + + // Create a vector of consecutive numbers from zero to VF. + VectorType *InitVecValVTy = ValVTy; + Type *InitVecValSTy = STy; + if (STy->isFloatingPointTy()) { + InitVecValSTy = + IntegerType::get(STy->getContext(), STy->getScalarSizeInBits()); + InitVecValVTy = VectorType::get(InitVecValSTy, VLen); + } + Value *InitVec = Builder.CreateStepVector(InitVecValVTy); + + // Splat the StartIdx + Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx); + + if (STy->isIntegerTy()) { + InitVec = Builder.CreateAdd(InitVec, StartIdxSplat); + Step = Builder.CreateVectorSplat(VLen, Step); + assert(Step->getType() == Val->getType() && "Invalid step vec"); + // FIXME: The newly created binary instructions should contain nsw/nuw + // flags, which can be found from the original scalar operations. + Step = Builder.CreateMul(InitVec, Step); + return Builder.CreateAdd(Val, Step, "induction"); + } + + // Floating point induction. + assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) && + "Binary Opcode should be specified for FP induction"); + InitVec = Builder.CreateUIToFP(InitVec, ValVTy); + InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat); + + Step = Builder.CreateVectorSplat(VLen, Step); + Value *MulOp = Builder.CreateFMul(InitVec, Step); + return Builder.CreateBinOp(BinOp, Val, MulOp, "induction"); +} + void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( const InductionDescriptor &II, Value *Step, Value *Start, Instruction *EntryVal, VPValue *Def, VPTransformState &State) { @@ -2355,8 +2410,8 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0); Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start); - Value *SteppedStart = - getStepVector(SplatStart, Zero, Step, II.getInductionOpcode()); + Value *SteppedStart = getStepVector( + SplatStart, Zero, Step, II.getInductionOpcode(), State.VF, State.Builder); // We create vector phi nodes for both integer and floating-point induction // variables. Here, we determine the kind of arithmetic we will perform. @@ -2502,7 +2557,8 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, StartIdx = getRuntimeVF(Builder, Step->getType(), State.VF * Part); Value *EntryPart = - getStepVector(Broadcasted, StartIdx, Step, ID.getInductionOpcode()); + getStepVector(Broadcasted, StartIdx, Step, ID.getInductionOpcode(), + State.VF, State.Builder); State.set(Def, EntryPart, Part); if (Trunc) addMetadata(EntryPart, Trunc); @@ -2554,54 +2610,6 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); } -Value *InnerLoopVectorizer::getStepVector(Value *Val, Value *StartIdx, - Value *Step, - Instruction::BinaryOps BinOp) { - // Create and check the types. - auto *ValVTy = cast(Val->getType()); - ElementCount VLen = ValVTy->getElementCount(); - - Type *STy = Val->getType()->getScalarType(); - assert((STy->isIntegerTy() || STy->isFloatingPointTy()) && - "Induction Step must be an integer or FP"); - assert(Step->getType() == STy && "Step has wrong type"); - - SmallVector Indices; - - // Create a vector of consecutive numbers from zero to VF. - VectorType *InitVecValVTy = ValVTy; - Type *InitVecValSTy = STy; - if (STy->isFloatingPointTy()) { - InitVecValSTy = - IntegerType::get(STy->getContext(), STy->getScalarSizeInBits()); - InitVecValVTy = VectorType::get(InitVecValSTy, VLen); - } - Value *InitVec = Builder.CreateStepVector(InitVecValVTy); - - // Splat the StartIdx - Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx); - - if (STy->isIntegerTy()) { - InitVec = Builder.CreateAdd(InitVec, StartIdxSplat); - Step = Builder.CreateVectorSplat(VLen, Step); - assert(Step->getType() == Val->getType() && "Invalid step vec"); - // FIXME: The newly created binary instructions should contain nsw/nuw flags, - // which can be found from the original scalar operations. - Step = Builder.CreateMul(InitVec, Step); - return Builder.CreateAdd(Val, Step, "induction"); - } - - // Floating point induction. - assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) && - "Binary Opcode should be specified for FP induction"); - InitVec = Builder.CreateUIToFP(InitVec, ValVTy); - InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat); - - Step = Builder.CreateVectorSplat(VLen, Step); - Value *MulOp = Builder.CreateFMul(InitVec, Step); - return Builder.CreateBinOp(BinOp, Val, MulOp, "induction"); -} - void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, Instruction *EntryVal, const InductionDescriptor &ID, @@ -8036,21 +8044,6 @@ Value *InnerLoopUnroller::reverseVector(Value *Vec) { return Vec; } Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; } -Value *InnerLoopUnroller::getStepVector(Value *Val, Value *StartIdx, - Value *Step, - Instruction::BinaryOps BinOp) { - // When unrolling and the VF is 1, we only need to add a simple scalar. - Type *Ty = Val->getType(); - assert(!Ty->isVectorTy() && "Val must be a scalar"); - - if (Ty->isFloatingPointTy()) { - // Floating-point operations inherit FMF via the builder's flags. - Value *MulOp = Builder.CreateFMul(StartIdx, Step); - return Builder.CreateBinOp(BinOp, Val, MulOp); - } - return Builder.CreateAdd(Val, Builder.CreateMul(StartIdx, Step), "induction"); -} - static void AddRuntimeUnrollDisableMetaData(Loop *L) { SmallVector MDs; // Reserve first location for self reference to the LoopID metadata node. From 10316a6f94d47e859a8c2e5b42905f40b9c5c44e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 26 Dec 2021 13:06:54 -0800 Subject: [PATCH 046/992] [ELF] Change InputSectionDescription members from vector to SmallVector This decreases sizeof(lld::elf::InputSectionDescription) from 264 to 232. --- lld/ELF/AArch64ErrataFix.cpp | 2 +- lld/ELF/ARMErrataFix.cpp | 2 +- lld/ELF/LinkerScript.cpp | 21 +++++++++------------ lld/ELF/LinkerScript.h | 22 +++++++++++----------- lld/ELF/Relocations.cpp | 2 +- 5 files changed, 23 insertions(+), 26 deletions(-) diff --git a/lld/ELF/AArch64ErrataFix.cpp b/lld/ELF/AArch64ErrataFix.cpp index 50d4c237778b..a1e276ea9c77 100644 --- a/lld/ELF/AArch64ErrataFix.cpp +++ b/lld/ELF/AArch64ErrataFix.cpp @@ -512,7 +512,7 @@ void AArch64Err843419Patcher::insertPatches( // determine the insertion point. This is ok as we only merge into an // InputSectionDescription once per pass, and at the end of the pass // assignAddresses() will recalculate all the outSecOff values. - std::vector tmp; + SmallVector tmp; tmp.reserve(isd.sections.size() + patches.size()); auto mergeCmp = [](const InputSection *a, const InputSection *b) { if (a->outSecOff != b->outSecOff) diff --git a/lld/ELF/ARMErrataFix.cpp b/lld/ELF/ARMErrataFix.cpp index 5ad55f1326b3..cfaa3109afe8 100644 --- a/lld/ELF/ARMErrataFix.cpp +++ b/lld/ELF/ARMErrataFix.cpp @@ -395,7 +395,7 @@ void ARMErr657417Patcher::insertPatches( // determine the insertion point. This is ok as we only merge into an // InputSectionDescription once per pass, and at the end of the pass // assignAddresses() will recalculate all the outSecOff values. - std::vector tmp; + SmallVector tmp; tmp.reserve(isd.sections.size() + patches.size()); auto mergeCmp = [](const InputSection *a, const InputSection *b) { if (a->outSecOff != b->outSecOff) diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index e8f2ce4fdf1f..9a95256a15af 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -270,8 +270,8 @@ using SymbolAssignmentMap = // Collect section/value pairs of linker-script-defined symbols. This is used to // check whether symbol values converge. -static SymbolAssignmentMap getSymbolAssignmentValues( - const std::vector §ionCommands) { +static SymbolAssignmentMap +getSymbolAssignmentValues(ArrayRef sectionCommands) { SymbolAssignmentMap ret; for (SectionCommand *cmd : sectionCommands) { if (auto *assign = dyn_cast(cmd)) { @@ -486,10 +486,10 @@ static void sortInputSections(MutableArrayRef vec, } // Compute and remember which sections the InputSectionDescription matches. -std::vector +SmallVector LinkerScript::computeInputSections(const InputSectionDescription *cmd, ArrayRef sections) { - std::vector ret; + SmallVector ret; std::vector indexes; DenseSet seen; auto sortByPositionThenCommandLine = [&](size_t begin, size_t end) { @@ -585,18 +585,15 @@ void LinkerScript::discardSynthetic(OutputSection &outCmd) { std::vector secs(part.armExidx->exidxSections.begin(), part.armExidx->exidxSections.end()); for (SectionCommand *cmd : outCmd.commands) - if (auto *isd = dyn_cast(cmd)) { - std::vector matches = - computeInputSections(isd, secs); - for (InputSectionBase *s : matches) + if (auto *isd = dyn_cast(cmd)) + for (InputSectionBase *s : computeInputSections(isd, secs)) discard(*s); - } } } -std::vector +SmallVector LinkerScript::createInputSectionList(OutputSection &outCmd) { - std::vector ret; + SmallVector ret; for (SectionCommand *cmd : outCmd.commands) { if (auto *isd = dyn_cast(cmd)) { @@ -612,7 +609,7 @@ LinkerScript::createInputSectionList(OutputSection &outCmd) { // Create output sections described by SECTIONS commands. void LinkerScript::processSectionCommands() { auto process = [this](OutputSection *osec) { - std::vector v = createInputSectionList(*osec); + SmallVector v = createInputSectionList(*osec); // The output section name `/DISCARD/' is special. // Any input section assigned to it is discarded. diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index f385c8320978..9fafdb64bfaf 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -203,20 +203,20 @@ class InputSectionDescription : public SectionCommand { // Input sections that matches at least one of SectionPatterns // will be associated with this InputSectionDescription. - std::vector sectionPatterns; + SmallVector sectionPatterns; // Includes InputSections and MergeInputSections. Used temporarily during // assignment of input sections to output sections. - std::vector sectionBases; + SmallVector sectionBases; // Used after the finalizeInputSections() pass. MergeInputSections have been // merged into MergeSyntheticSections. - std::vector sections; + SmallVector sections; // Temporary record of synthetic ThunkSection instances and the pass that // they were created in. This is used to insert newly created ThunkSections // into Sections at the end of a createThunks() pass. - std::vector> thunkSections; + SmallVector, 0> thunkSections; // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command. uint64_t withFlags; @@ -279,11 +279,11 @@ class LinkerScript final { void expandOutputSection(uint64_t size); void expandMemoryRegions(uint64_t size); - std::vector + SmallVector computeInputSections(const InputSectionDescription *, ArrayRef); - std::vector createInputSectionList(OutputSection &cmd); + SmallVector createInputSectionList(OutputSection &cmd); void discardSynthetic(OutputSection &); @@ -347,23 +347,23 @@ class LinkerScript final { // List of section patterns specified with KEEP commands. They will // be kept even if they are unused and --gc-sections is specified. - std::vector keptSections; + SmallVector keptSections; // A map from memory region name to a memory region descriptor. llvm::MapVector memoryRegions; // A list of symbols referenced by the script. - std::vector referencedSymbols; + SmallVector referencedSymbols; // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need // to be reordered. - std::vector insertCommands; + SmallVector insertCommands; // OutputSections specified by OVERWRITE_SECTIONS. - std::vector overwriteSections; + SmallVector overwriteSections; // Sections that will be warned/errored by --orphan-handling. - std::vector orphanSections; + SmallVector orphanSections; }; extern std::unique_ptr script; diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 5841c509aa67..33227bd2447b 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1816,7 +1816,7 @@ void ThunkCreator::mergeThunks(ArrayRef outputSections) { }); // Merge sorted vectors of Thunks and InputSections by outSecOff - std::vector tmp; + SmallVector tmp; tmp.reserve(isd->sections.size() + newThunks.size()); std::merge(isd->sections.begin(), isd->sections.end(), From 31cfb3f4f6446512fa0170092af46783a0de9139 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 26 Dec 2021 13:31:40 -0800 Subject: [PATCH 047/992] [clang] Remove redundant calls to c_str() (NFC) Identified with readability-redundant-string-cstr. --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 2 +- clang/lib/Driver/ToolChains/CommonArgs.cpp | 16 ++++++++-------- .../clang-nvlink-wrapper/ClangNvlinkWrapper.cpp | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index b23100d435b4..c314044c66dd 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9551,7 +9551,7 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, } PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); - return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(), + return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, PLoc.getLine(), PLoc.getColumn()); } diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 407f81a2ae09..ad50c66cb6c1 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1724,7 +1724,7 @@ bool tools::GetSDLFromOffloadArchive( std::string OutputLib = D.GetTemporaryPath( Twine(Prefix + Lib + "-" + Arch + "-" + Target).str(), "a"); - C.addTempFile(C.getArgs().MakeArgString(OutputLib.c_str())); + C.addTempFile(C.getArgs().MakeArgString(OutputLib)); ArgStringList CmdArgs; SmallString<128> DeviceTriple; @@ -1747,20 +1747,20 @@ bool tools::GetSDLFromOffloadArchive( T.getToolChain().GetProgramPath("clang-offload-bundler")); ArgStringList UBArgs; - UBArgs.push_back(C.getArgs().MakeArgString(UnbundleArg.c_str())); - UBArgs.push_back(C.getArgs().MakeArgString(TypeArg.c_str())); - UBArgs.push_back(C.getArgs().MakeArgString(InputArg.c_str())); - UBArgs.push_back(C.getArgs().MakeArgString(OffloadArg.c_str())); - UBArgs.push_back(C.getArgs().MakeArgString(OutputArg.c_str())); + UBArgs.push_back(C.getArgs().MakeArgString(UnbundleArg)); + UBArgs.push_back(C.getArgs().MakeArgString(TypeArg)); + UBArgs.push_back(C.getArgs().MakeArgString(InputArg)); + UBArgs.push_back(C.getArgs().MakeArgString(OffloadArg)); + UBArgs.push_back(C.getArgs().MakeArgString(OutputArg)); // Add this flag to not exit from clang-offload-bundler if no compatible // code object is found in heterogenous archive library. std::string AdditionalArgs("-allow-missing-bundles"); - UBArgs.push_back(C.getArgs().MakeArgString(AdditionalArgs.c_str())); + UBArgs.push_back(C.getArgs().MakeArgString(AdditionalArgs)); C.addCommand(std::make_unique( JA, T, ResponseFileSupport::AtFileCurCP(), UBProgram, UBArgs, Inputs, - InputInfo(&JA, C.getArgs().MakeArgString(OutputLib.c_str())))); + InputInfo(&JA, C.getArgs().MakeArgString(OutputLib)))); if (postClangLink) CC1Args.push_back("-mlink-builtin-bitcode"); diff --git a/clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp b/clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp index bc5b9a9f1fde..46a4f30ba881 100644 --- a/clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp +++ b/clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp @@ -63,7 +63,7 @@ static Error runNVLink(std::string NVLinkPath, NVLArgs.push_back(Arg); } - if (sys::ExecuteAndWait(NVLinkPath.c_str(), NVLArgs)) + if (sys::ExecuteAndWait(NVLinkPath, NVLArgs)) return createStringError(inconvertibleErrorCode(), "'nvlink' failed"); return Error::success(); } From a1c2ee01470e00c0e191606e7391e9ee14d0a113 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 26 Dec 2021 13:53:47 -0800 Subject: [PATCH 048/992] [ELF] LinkerScript/OutputSection: change other std::vector members to SmallVector 11+KiB smaller .text with both libc++ and libstdc++ builds. --- lld/ELF/LinkerScript.cpp | 22 ++++++++++----------- lld/ELF/LinkerScript.h | 12 ++++++------ lld/ELF/OutputSections.cpp | 10 +++++----- lld/ELF/OutputSections.h | 10 +++++----- lld/ELF/ScriptParser.cpp | 8 ++++---- lld/ELF/SyntheticSections.h | 2 +- lld/ELF/Writer.cpp | 39 ++++++++++++++++++------------------- 7 files changed, 51 insertions(+), 52 deletions(-) diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 9a95256a15af..7c67ee53adba 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -306,7 +306,7 @@ getChangedSymbolAssignment(const SymbolAssignmentMap &oldValues) { // Process INSERT [AFTER|BEFORE] commands. For each command, we move the // specified output section to the designated place. void LinkerScript::processInsertCommands() { - std::vector moves; + SmallVector moves; for (const InsertCommand &cmd : insertCommands) { for (StringRef name : cmd.names) { // If base is empty, it may have been discarded by @@ -490,7 +490,7 @@ SmallVector LinkerScript::computeInputSections(const InputSectionDescription *cmd, ArrayRef sections) { SmallVector ret; - std::vector indexes; + SmallVector indexes; DenseSet seen; auto sortByPositionThenCommandLine = [&](size_t begin, size_t end) { llvm::sort(MutableArrayRef(indexes).slice(begin, end - begin)); @@ -827,7 +827,7 @@ addInputSec(StringMap> &map, // Add sections that didn't match any sections command. void LinkerScript::addOrphanSections() { StringMap> map; - std::vector v; + SmallVector v; std::function add; add = [&](InputSectionBase *s) { @@ -1110,7 +1110,7 @@ bool LinkerScript::isDiscarded(const OutputSection *sec) const { } static void maybePropagatePhdrs(OutputSection &sec, - std::vector &phdrs) { + SmallVector &phdrs) { if (sec.phdrs.empty()) { // To match the bfd linker script behaviour, only propagate program // headers to sections that are allocated. @@ -1144,7 +1144,7 @@ void LinkerScript::adjustSectionsBeforeSorting() { // the previous sections. Only a few flags are needed to keep the impact low. uint64_t flags = SHF_ALLOC; - std::vector defPhdrs; + SmallVector defPhdrs; for (SectionCommand *&cmd : sectionCommands) { auto *sec = dyn_cast(cmd); if (!sec) @@ -1215,7 +1215,7 @@ void LinkerScript::adjustSectionsAfterSorting() { // Below is an example of such linker script: // PHDRS { seg PT_LOAD; } // SECTIONS { .aaa : { *(.aaa) } } - std::vector defPhdrs; + SmallVector defPhdrs; auto firstPtLoad = llvm::find_if(phdrsCommands, [](const PhdrsCommand &cmd) { return cmd.type == PT_LOAD; }); @@ -1245,7 +1245,7 @@ static uint64_t computeBase(uint64_t min, bool allocateHeaders) { // We check if the headers fit below the first allocated section. If there isn't // enough space for these sections, we'll remove them from the PT_LOAD segment, // and we'll also remove the PT_PHDR segment. -void LinkerScript::allocateHeaders(std::vector &phdrs) { +void LinkerScript::allocateHeaders(SmallVector &phdrs) { uint64_t min = std::numeric_limits::max(); for (OutputSection *sec : outputSections) if (sec->flags & SHF_ALLOC) @@ -1329,8 +1329,8 @@ const Defined *LinkerScript::assignAddresses() { } // Creates program headers as instructed by PHDRS linker script command. -std::vector LinkerScript::createPhdrs() { - std::vector ret; +SmallVector LinkerScript::createPhdrs() { + SmallVector ret; // Process PHDRS and FILEHDR keywords because they are not // real output sections and cannot be added in the following loop. @@ -1412,8 +1412,8 @@ static Optional getPhdrIndex(ArrayRef vec, // Returns indices of ELF headers containing specific section. Each index is a // zero based number of ELF header listed within PHDRS {} script block. -std::vector LinkerScript::getPhdrIndices(OutputSection *cmd) { - std::vector ret; +SmallVector LinkerScript::getPhdrIndices(OutputSection *cmd) { + SmallVector ret; for (StringRef s : cmd->phdrs) { if (Optional idx = getPhdrIndex(phdrsCommands, s)) diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index 9fafdb64bfaf..c415186f6de9 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -244,7 +244,7 @@ struct ByteCommand : SectionCommand { }; struct InsertCommand { - std::vector names; + SmallVector names; bool isAfter; StringRef where; }; @@ -287,7 +287,7 @@ class LinkerScript final { void discardSynthetic(OutputSection &); - std::vector getPhdrIndices(OutputSection *sec); + SmallVector getPhdrIndices(OutputSection *sec); std::pair findMemoryRegion(OutputSection *sec, MemoryRegion *hint); @@ -321,12 +321,12 @@ class LinkerScript final { void adjustSectionsBeforeSorting(); void adjustSectionsAfterSorting(); - std::vector createPhdrs(); + SmallVector createPhdrs(); bool needsInterpSection(); bool shouldKeep(InputSectionBase *s); const Defined *assignAddresses(); - void allocateHeaders(std::vector &phdrs); + void allocateHeaders(SmallVector &phdrs); void processSectionCommands(); void processSymbolAssignments(); void declareSymbols(); @@ -337,10 +337,10 @@ class LinkerScript final { void processInsertCommands(); // SECTIONS command list. - std::vector sectionCommands; + SmallVector sectionCommands; // PHDRS command list. - std::vector phdrsCommands; + SmallVector phdrsCommands; bool hasSectionsCommand = false; bool errorOnMissingSection = false; diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index cd53eecd8756..07ee7d84a2cd 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -347,7 +347,7 @@ template void OutputSection::writeTo(uint8_t *buf) { } // Write leading padding. - std::vector sections = getInputSections(this); + SmallVector sections = getInputSections(*this); std::array filler = getFiller(); bool nonZeroFiller = read32(filler.data()) != 0; if (nonZeroFiller) @@ -520,9 +520,9 @@ InputSection *elf::getFirstInputSection(const OutputSection *os) { return nullptr; } -std::vector elf::getInputSections(const OutputSection *os) { - std::vector ret; - for (SectionCommand *cmd : os->commands) +SmallVector elf::getInputSections(const OutputSection &os) { + SmallVector ret; + for (SectionCommand *cmd : os.commands) if (auto *isd = dyn_cast(cmd)) ret.insert(ret.end(), isd->sections.begin(), isd->sections.end()); return ret; @@ -550,7 +550,7 @@ std::array OutputSection::getFiller() { void OutputSection::checkDynRelAddends(const uint8_t *bufStart) { assert(config->writeAddends && config->checkDynamicRelocs); assert(type == SHT_REL || type == SHT_RELA); - std::vector sections = getInputSections(this); + SmallVector sections = getInputSections(*this); parallelForEachN(0, sections.size(), [&](size_t i) { // When linking with -r or --emit-relocs we might also call this function // for input .rel[a]. sections which we simply pass through to the diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h index fb3eb0059909..4f589d8432e4 100644 --- a/lld/ELF/OutputSections.h +++ b/lld/ELF/OutputSections.h @@ -82,8 +82,8 @@ class OutputSection final : public SectionCommand, public SectionBase { Expr alignExpr; Expr lmaExpr; Expr subalignExpr; - std::vector commands; - std::vector phdrs; + SmallVector commands; + SmallVector phdrs; llvm::Optional> filler; ConstraintKind constraint = ConstraintKind::NoConstraint; std::string location; @@ -112,8 +112,8 @@ class OutputSection final : public SectionCommand, public SectionBase { private: // Used for implementation of --compress-debug-sections option. - std::vector zDebugHeader; - llvm::SmallVector compressedData; + SmallVector zDebugHeader; + SmallVector compressedData; std::array getFiller(); }; @@ -121,7 +121,7 @@ class OutputSection final : public SectionCommand, public SectionBase { int getPriority(StringRef s); InputSection *getFirstInputSection(const OutputSection *os); -std::vector getInputSections(const OutputSection *os); +SmallVector getInputSections(const OutputSection &os); // All output sections that are handled by the linker specially are // globally accessible. Writer initializes them, so don't use them diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index d3b0296acab0..49da94e9c52f 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -94,7 +94,7 @@ class ScriptParser final : ScriptLexer { OutputSection *readOverlaySectionDescription(); OutputSection *readOutputSectionDescription(StringRef outSec); std::vector readOverlay(); - std::vector readOutputSectionPhdrs(); + SmallVector readOutputSectionPhdrs(); std::pair readInputSectionFlags(); InputSectionDescription *readInputSectionDescription(StringRef tok); StringMatcher readFilePatterns(); @@ -597,7 +597,7 @@ void ScriptParser::readSections() { else if (!consume("BEFORE")) setError("expected AFTER/BEFORE, but got '" + next() + "'"); StringRef where = next(); - std::vector names; + SmallVector names; for (SectionCommand *cmd : v) if (auto *os = dyn_cast(cmd)) names.push_back(os->name); @@ -1452,8 +1452,8 @@ Expr ScriptParser::readParenExpr() { return e; } -std::vector ScriptParser::readOutputSectionPhdrs() { - std::vector phdrs; +SmallVector ScriptParser::readOutputSectionPhdrs() { + SmallVector phdrs; while (!errorCount() && peek().startswith(":")) { StringRef tok = next(); phdrs.push_back((tok.size() == 1) ? next() : tok.substr(1)); diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index c35e19cf2fb4..c785d5b48b33 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -1205,7 +1205,7 @@ struct Partition { SyntheticSection *elfHeader; SyntheticSection *programHeaders; - std::vector phdrs; + SmallVector phdrs; ARMExidxSyntheticSection *armExidx; BuildIdSection *buildId; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 6fbb3f7bf471..acc78dd06734 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -65,7 +65,7 @@ template class Writer { void checkExecuteOnly(); void setReservedSymbolSections(); - std::vector createPhdrs(Partition &part); + SmallVector createPhdrs(Partition &part); void addPhdrForSection(Partition &part, unsigned shType, unsigned pType, unsigned pFlags); void assignFileOffsets(); @@ -100,7 +100,7 @@ template void elf::writeResult() { Writer().run(); } -static void removeEmptyPTLoad(std::vector &phdrs) { +static void removeEmptyPTLoad(SmallVector &phdrs) { auto it = std::stable_partition( phdrs.begin(), phdrs.end(), [&](const PhdrEntry *p) { if (p->p_type != PT_LOAD) @@ -1170,9 +1170,9 @@ static bool shouldSkip(SectionCommand *cmd) { // We want to place orphan sections so that they share as much // characteristics with their neighbors as possible. For example, if // both are rw, or both are tls. -static std::vector::iterator -findOrphanPos(std::vector::iterator b, - std::vector::iterator e) { +static SmallVectorImpl::iterator +findOrphanPos(SmallVectorImpl::iterator b, + SmallVectorImpl::iterator e) { OutputSection *sec = cast(*e); // Find the first element that has as close a rank as possible. @@ -1332,8 +1332,8 @@ static DenseMap buildSectionOrder() { static void sortISDBySectionOrder(InputSectionDescription *isd, const DenseMap &order) { - std::vector unorderedSections; - std::vector> orderedSections; + SmallVector unorderedSections; + SmallVector, 0> orderedSections; uint64_t unorderedSize = 0; for (InputSection *isec : isd->sections) { @@ -1766,10 +1766,10 @@ template void Writer::optimizeBasicBlockJumps() { // jump to the following section as it is not required. // 2. If there are two consecutive jump instructions, it checks // if they can be flipped and one can be deleted. - for (OutputSection *os : outputSections) { - if (!(os->flags & SHF_EXECINSTR)) + for (OutputSection *osec : outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) continue; - std::vector sections = getInputSections(os); + SmallVector sections = getInputSections(*osec); std::vector result(sections.size()); // Delete all fall through jump instructions. Also, check if two // consecutive jump instructions can be flipped so that a fall @@ -1790,11 +1790,9 @@ template void Writer::optimizeBasicBlockJumps() { fixSymbolsAfterShrinking(); - for (OutputSection *os : outputSections) { - std::vector sections = getInputSections(os); - for (InputSection *is : sections) + for (OutputSection *osec : outputSections) + for (InputSection *is : getInputSections(*osec)) is->trim(); - } } // In order to allow users to manipulate linker-synthesized sections, @@ -2165,11 +2163,12 @@ template void Writer::checkExecuteOnly() { if (!config->executeOnly) return; - for (OutputSection *os : outputSections) - if (os->flags & SHF_EXECINSTR) - for (InputSection *isec : getInputSections(os)) + for (OutputSection *osec : outputSections) + if (osec->flags & SHF_EXECINSTR) + for (InputSection *isec : getInputSections(*osec)) if (!(isec->flags & SHF_EXECINSTR)) - error("cannot place " + toString(isec) + " into " + toString(os->name) + + error("cannot place " + toString(isec) + " into " + + toString(osec->name) + ": -execute-only does not support intermingling data and code"); } @@ -2259,8 +2258,8 @@ static uint64_t computeFlags(uint64_t flags) { // Decide which program headers to create and which sections to include in each // one. template -std::vector Writer::createPhdrs(Partition &part) { - std::vector ret; +SmallVector Writer::createPhdrs(Partition &part) { + SmallVector ret; auto addHdr = [&](unsigned type, unsigned flags) -> PhdrEntry * { ret.push_back(make(type, flags)); return ret.back(); From e7774f499bb2a393145d7e696905048d131a3091 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 26 Dec 2021 14:26:44 -0800 Subject: [PATCH 049/992] Use static_assert instead of assert (NFC) Identified with misc-static-assert. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +- llvm/lib/Transforms/Utils/SampleProfileInference.cpp | 7 ++++--- polly/lib/CodeGen/IslExprBuilder.cpp | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4f5512e6fb37..39dd139953b0 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2774,7 +2774,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, // We define our scalable vector types for lmul=1 to use a 64 bit known // minimum size. e.g. . VLENB is in bytes so we calculate // vscale as VLENB / 8. - assert(RISCV::RVVBitsPerBlock == 64 && "Unexpected bits per block!"); + static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!"); if (isa(Op.getOperand(0))) { // We assume VLENB is a multiple of 8. We manually choose the best shift // here because SimplifyDemandedBits isn't always able to simplify it. diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp index 2f2dff6b5f0b..01b54c3d88c9 100644 --- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp +++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp @@ -455,9 +455,10 @@ class FlowAdjuster { /// Rebalance unknown subgraphs so as each branch splits with probabilities /// UnknownFirstSuccProbability and 1 - UnknownFirstSuccProbability void rebalanceUnknownSubgraphs() { - assert(UnknownFirstSuccProbability >= 0.0 && - UnknownFirstSuccProbability <= 1.0 && - "the share of the unknown successor should be between 0 and 1"); + static_assert( + UnknownFirstSuccProbability >= 0.0 && + UnknownFirstSuccProbability <= 1.0, + "the share of the unknown successor should be between 0 and 1"); // Try to find unknown subgraphs from each non-unknown block for (uint64_t I = 0; I < Func.Blocks.size(); I++) { auto SrcBlock = &Func.Blocks[I]; diff --git a/polly/lib/CodeGen/IslExprBuilder.cpp b/polly/lib/CodeGen/IslExprBuilder.cpp index db6680e258a3..4d1094cf3eff 100644 --- a/polly/lib/CodeGen/IslExprBuilder.cpp +++ b/polly/lib/CodeGen/IslExprBuilder.cpp @@ -526,8 +526,8 @@ Value *IslExprBuilder::createOpICmp(__isl_take isl_ast_expr *Expr) { isl_ast_op_type OpType = isl_ast_expr_get_op_type(Expr); assert(OpType >= isl_ast_op_eq && OpType <= isl_ast_op_gt && "Unsupported ICmp isl ast expression"); - assert(isl_ast_op_eq + 4 == isl_ast_op_gt && - "Isl ast op type interface changed"); + static_assert(isl_ast_op_eq + 4 == isl_ast_op_gt, + "Isl ast op type interface changed"); CmpInst::Predicate Predicates[5][2] = { {CmpInst::ICMP_EQ, CmpInst::ICMP_EQ}, From 213896bc5aea019c0c776a069f54d23febe4e807 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 26 Dec 2021 15:18:56 -0800 Subject: [PATCH 050/992] [ELF] Remove unused InputSection::getOffsetInFile --- lld/ELF/InputSection.cpp | 6 ------ lld/ELF/InputSection.h | 2 -- 2 files changed, 8 deletions(-) diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 6d26e19aac48..76a29b034146 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -153,12 +153,6 @@ void InputSectionBase::uncompress() const { uncompressedSize = -1; } -uint64_t InputSectionBase::getOffsetInFile() const { - const uint8_t *fileStart = (const uint8_t *)file->mb.getBufferStart(); - const uint8_t *secStart = data().begin(); - return secStart - fileStart; -} - template RelsOrRelas InputSectionBase::relsOrRelas() const { if (relSecIdx == 0) return {}; diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index 5319830b5d80..cbb060fd2740 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -156,8 +156,6 @@ class InputSectionBase : public SectionBase { return rawData; } - uint64_t getOffsetInFile() const; - // Input sections are part of an output section. Special sections // like .eh_frame and merge sections are first combined into a // synthetic section that is then added to an output section. In all From 469144ffa37f065a6bcd00e8bd837bc943f632e3 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 26 Dec 2021 15:21:22 -0800 Subject: [PATCH 051/992] [ELF] De-template InputSectionBase::getEnclosingFunction --- lld/ELF/InputSection.cpp | 5 ++--- lld/ELF/InputSection.h | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 76a29b034146..9a2b52cf568d 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -268,7 +268,6 @@ InputSection *InputSectionBase::getLinkOrderDep() const { } // Find a function symbol that encloses a given location. -template Defined *InputSectionBase::getEnclosingFunction(uint64_t offset) { for (Symbol *b : file->getSymbols()) if (Defined *d = dyn_cast(b)) @@ -289,7 +288,7 @@ std::string InputSectionBase::getLocation(uint64_t offset) { return (config->outputFile + ":(" + secAndOffset).str(); std::string file = toString(getFile()); - if (Defined *d = getEnclosingFunction(offset)) + if (Defined *d = getEnclosingFunction(offset)) return file + ":(function " + toString(*d) + ": " + secAndOffset; return file + ":(" + secAndOffset; @@ -1203,7 +1202,7 @@ void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *buf, if (enclosingPrologueAttempted(rel.offset, prologues)) continue; - if (Defined *f = getEnclosingFunction(rel.offset)) { + if (Defined *f = getEnclosingFunction(rel.offset)) { prologues.insert(f); if (target->adjustPrologueForCrossSplitStack(buf + f->value, end, f->stOther)) diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index cbb060fd2740..3bd180be7d43 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -178,7 +178,6 @@ class InputSectionBase : public SectionBase { // Get the function symbol that encloses this offset from within the // section. - template Defined *getEnclosingFunction(uint64_t offset); // Returns a source location string. Used to construct an error message. From 7924b3814f40747cafff7aec24e6b16fda02af44 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 26 Dec 2021 17:25:54 -0800 Subject: [PATCH 052/992] [ELF] Add Symbol::hasVersionSuffix "Process symbol versions" may take 2+% time. "Redirect symbols" may take 0.6% time. This change speeds up the two passes and makes `*sym.getVersionSuffix() == '@'` in the `undefined reference` diagnostic cleaner. Linking chrome (no debug info) and another large program is 1.5% faster. For empty-ver2.s: the behavior now matches GNU ld, though I'd consider the input invalid and the exact behavior does not matter. --- lld/ELF/Driver.cpp | 11 +++++++---- lld/ELF/Relocations.cpp | 2 +- lld/ELF/SymbolTable.cpp | 9 +++++++-- lld/ELF/Symbols.cpp | 5 +++-- lld/ELF/Symbols.h | 13 +++++++++---- lld/test/ELF/empty-ver2.s | 2 +- 6 files changed, 28 insertions(+), 14 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 6b689f50cce7..2923d45018c4 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -2006,7 +2006,8 @@ template void LinkerDriver::compileBitcodeFiles() { // Parse '@' in symbol names for non-relocatable output. if (!config->relocatable) for (Symbol *sym : obj->getGlobalSymbols()) - sym->parseSymbolVersion(); + if (sym->hasVersionSuffix) + sym->parseSymbolVersion(); objectFiles.push_back(obj); } } @@ -2080,8 +2081,10 @@ static void redirectSymbols(ArrayRef wrapped) { map[w.real] = w.sym; } for (Symbol *sym : symtab->symbols()) { - // Enumerate symbols with a non-default version (foo@v1). - StringRef name = sym->getName(); + // Enumerate symbols with a non-default version (foo@v1). hasVersionSuffix + // filters out most symbols but is not sufficient. + if (!sym->hasVersionSuffix) + continue; const char *suffix1 = sym->getVersionSuffix(); if (suffix1[0] != '@' || suffix1[1] == '@') continue; @@ -2090,7 +2093,7 @@ static void redirectSymbols(ArrayRef wrapped) { // // * There is a definition of foo@v1 and foo@@v1. // * There is a definition of foo@v1 and foo. - Defined *sym2 = dyn_cast_or_null(symtab->find(name)); + Defined *sym2 = dyn_cast_or_null(symtab->find(sym->getName())); if (!sym2) continue; const char *suffix2 = sym2->getVersionSuffix(); diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 33227bd2447b..d7eef68800c5 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -741,7 +741,7 @@ static bool maybeReportUndefined(Symbol &sym, InputSectionBase &sec, uint64_t offset) { // If versioned, issue an error (even if the symbol is weak) because we don't // know the defining filename which is required to construct a Verneed entry. - if (*sym.getVersionSuffix() == '@') { + if (sym.hasVersionSuffix) { undefs.push_back({&sym, {{&sec, offset}}, false}); return true; } diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index a12c5f22c4fe..c93a166daa6e 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -72,8 +72,10 @@ Symbol *SymbolTable::insert(StringRef name) { auto p = symMap.insert({CachedHashStringRef(stem), (int)symVector.size()}); if (!p.second) { Symbol *sym = symVector[p.first->second]; - if (stem.size() != name.size()) + if (stem.size() != name.size()) { sym->setName(name); + sym->hasVersionSuffix = true; + } return sym; } @@ -93,6 +95,8 @@ Symbol *SymbolTable::insert(StringRef name) { sym->referenced = false; sym->traced = false; sym->scriptDefined = false; + if (pos != StringRef::npos) + sym->hasVersionSuffix = true; sym->partition = 1; return sym; } @@ -316,7 +320,8 @@ void SymbolTable::scanVersionScript() { // can contain versions in the form of @. // Let them parse and update their names to exclude version suffix. for (Symbol *sym : symVector) - sym->parseSymbolVersion(); + if (sym->hasVersionSuffix) + sym->parseSymbolVersion(); // isPreemptible is false at this point. To correctly compute the binding of a // Defined (which is used by includeInDynsym()), we need to know if it is diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index 20301497a059..9d8ff8aa4c19 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -216,12 +216,13 @@ void Symbol::parseSymbolVersion() { if (pos == StringRef::npos) return; StringRef verstr = s.substr(pos + 1); - if (verstr.empty()) - return; // Truncate the symbol name so that it doesn't include the version string. nameSize = pos; + if (verstr.empty()) + return; + // If this is not in this DSO, it is not a definition. if (!isDefined()) return; diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h index 27c36eedce80..e2dc76f576af 100644 --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -144,6 +144,9 @@ class Symbol { // True if this symbol is specified by --trace-symbol option. uint8_t traced : 1; + // True if the name contains '@'. + uint8_t hasVersionSuffix : 1; + inline void replace(const Symbol &newSym); bool includeInDynsym() const; @@ -246,10 +249,11 @@ class Symbol { type(type), stOther(stOther), symbolKind(k), visibility(stOther & 3), isUsedInRegularObj(!file || file->kind() == InputFile::ObjKind), exportDynamic(isExportDynamic(k, visibility)), inDynamicList(false), - canInline(false), referenced(false), traced(false), isInIplt(false), - gotInIgot(false), isPreemptible(false), used(!config->gcSections), - folded(false), needsTocRestore(false), scriptDefined(false), - needsCopy(false), needsGot(false), needsPlt(false), needsTlsDesc(false), + canInline(false), referenced(false), traced(false), + hasVersionSuffix(false), isInIplt(false), gotInIgot(false), + isPreemptible(false), used(!config->gcSections), folded(false), + needsTocRestore(false), scriptDefined(false), needsCopy(false), + needsGot(false), needsPlt(false), needsTlsDesc(false), needsTlsGd(false), needsTlsGdToIe(false), needsTlsLd(false), needsGotDtprel(false), needsTlsIe(false), hasDirectReloc(false) {} @@ -575,6 +579,7 @@ void Symbol::replace(const Symbol &newSym) { canInline = old.canInline; referenced = old.referenced; traced = old.traced; + hasVersionSuffix = old.hasVersionSuffix; isPreemptible = old.isPreemptible; scriptDefined = old.scriptDefined; partition = old.partition; diff --git a/lld/test/ELF/empty-ver2.s b/lld/test/ELF/empty-ver2.s index 8692e049c947..c28b3ae83b2f 100644 --- a/lld/test/ELF/empty-ver2.s +++ b/lld/test/ELF/empty-ver2.s @@ -12,7 +12,7 @@ # CHECK-NEXT: } # CHECK-NEXT: Symbol { # CHECK-NEXT: Version: 1 -# CHECK-NEXT: Name: bar@ +# CHECK-NEXT: Name: bar{{$}} # CHECK-NEXT: } # CHECK-NEXT: ] From 70a98008eaf723ce2300fab4c3b60b344ce52672 Mon Sep 17 00:00:00 2001 From: Shao-Ce SUN Date: Fri, 24 Dec 2021 09:22:28 +0800 Subject: [PATCH 053/992] [RISCV] Reduce repetitive codes in flw, fsw Trying to improve code reuse in F,D,Zfh *.td files. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D116089 --- llvm/lib/Target/RISCV/RISCVInstrInfoD.td | 13 ++-------- llvm/lib/Target/RISCV/RISCVInstrInfoF.td | 28 ++++++++++++++-------- llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td | 12 ++-------- 3 files changed, 22 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index d6c31c4804db..6bfc9bbdc0a3 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -30,21 +30,12 @@ def RISCVSplitF64 : SDNode<"RISCVISD::SplitF64", SDT_RISCVSplitF64>; //===----------------------------------------------------------------------===// let Predicates = [HasStdExtD] in { - -let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in -def FLD : RVInstI<0b011, OPC_LOAD_FP, (outs FPR64:$rd), - (ins GPR:$rs1, simm12:$imm12), - "fld", "$rd, ${imm12}(${rs1})">, - Sched<[WriteFLD64, ReadFMemBase]>; +def FLD : FPLoad_r<0b011, "fld", FPR64, WriteFLD64>; // Operands for stores are in the order srcreg, base, offset rather than // reflecting the order these fields are specified in the instruction // encoding. -let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in -def FSD : RVInstS<0b011, OPC_STORE_FP, (outs), - (ins FPR64:$rs2, GPR:$rs1, simm12:$imm12), - "fsd", "$rs2, ${imm12}(${rs1})">, - Sched<[WriteFST64, ReadStoreData, ReadFMemBase]>; +def FSD : FPStore_r<0b011, "fsd", FPR64, WriteFST64>; let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64] in { def FMADD_D : FPFMA_rrr_frm; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index bb45ed859442..5dbdc428d372 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -73,6 +73,22 @@ def frmarg : Operand { // Instruction class templates //===----------------------------------------------------------------------===// +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +class FPLoad_r funct3, string opcodestr, RegisterClass rty, + SchedWrite sw> + : RVInstI, + Sched<[sw, ReadFMemBase]>; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +class FPStore_r funct3, string opcodestr, RegisterClass rty, + SchedWrite sw> + : RVInstS, + Sched<[sw, ReadStoreData, ReadFMemBase]>; + let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1, UseNamedOperandTable = 1, hasPostISelHook = 1 in class FPFMA_rrr_frm funct2, string opcodestr, @@ -138,20 +154,12 @@ class FPCmp_rr funct7, bits<3> funct3, string opcodestr, //===----------------------------------------------------------------------===// let Predicates = [HasStdExtF] in { -let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in -def FLW : RVInstI<0b010, OPC_LOAD_FP, (outs FPR32:$rd), - (ins GPR:$rs1, simm12:$imm12), - "flw", "$rd, ${imm12}(${rs1})">, - Sched<[WriteFLD32, ReadFMemBase]>; +def FLW : FPLoad_r<0b010, "flw", FPR32, WriteFLD32>; // Operands for stores are in the order srcreg, base, offset rather than // reflecting the order these fields are specified in the instruction // encoding. -let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in -def FSW : RVInstS<0b010, OPC_STORE_FP, (outs), - (ins FPR32:$rs2, GPR:$rs1, simm12:$imm12), - "fsw", "$rs2, ${imm12}(${rs1})">, - Sched<[WriteFST32, ReadStoreData, ReadFMemBase]>; +def FSW : FPStore_r<0b010, "fsw", FPR32, WriteFST32>; let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32] in { def FMADD_S : FPFMA_rrr_frm; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index 663e44813899..fa2eaa13ec57 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -32,20 +32,12 @@ def riscv_fmv_x_anyexth //===----------------------------------------------------------------------===// let Predicates = [HasStdExtZfhmin] in { -let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in -def FLH : RVInstI<0b001, OPC_LOAD_FP, (outs FPR16:$rd), - (ins GPR:$rs1, simm12:$imm12), - "flh", "$rd, ${imm12}(${rs1})">, - Sched<[WriteFLD16, ReadFMemBase]>; +def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>; // Operands for stores are in the order srcreg, base, offset rather than // reflecting the order these fields are specified in the instruction // encoding. -let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in -def FSH : RVInstS<0b001, OPC_STORE_FP, (outs), - (ins FPR16:$rs2, GPR:$rs1, simm12:$imm12), - "fsh", "$rs2, ${imm12}(${rs1})">, - Sched<[WriteFST16, ReadStoreData, ReadFMemBase]>; +def FSH : FPStore_r<0b001, "fsh", FPR16, WriteFST16>; } // Predicates = [HasStdExtZfhmin] let Predicates = [HasStdExtZfh] in { From e9262edf0d11a907763098d8e101219ccd9c43e9 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 26 Dec 2021 18:11:45 -0800 Subject: [PATCH 054/992] [ELF] SymbolTable::symbols: don't filter out PlaceholderKind Placeholders (-y and redirectSymbols removed versioned symbols) are very rare and the check just makes symbol table iteration slower. Most iterations filter out placeholders anyway, so this change just drops the filter behavior. For "Add symbols to symtabs", we need to ensure that redirectSymbols sets isUsedInRegularObj to false when making a symbol placeholder, to avoid an assertion failure in SymbolTableSection::writeTo. My .text is 2KiB smaller. The speed-up linking chrome is 0.x%. --- lld/ELF/Driver.cpp | 4 +++- lld/ELF/LTO.cpp | 2 ++ lld/ELF/SymbolTable.h | 11 +---------- lld/ELF/Symbols.cpp | 2 +- 4 files changed, 7 insertions(+), 12 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 2923d45018c4..939a40ef5886 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1724,7 +1724,7 @@ static void handleUndefinedGlob(StringRef arg) { // symbols to the symbol table, invalidating the current iterator. std::vector syms; for (Symbol *sym : symtab->symbols()) - if (pat->match(sym->getName())) + if (!sym->isPlaceholder() && pat->match(sym->getName())) syms.push_back(sym); for (Symbol *sym : syms) @@ -2106,6 +2106,7 @@ static void redirectSymbols(ArrayRef wrapped) { sym2->resolve(*sym); // Eliminate foo@v1 from the symbol table. sym->symbolKind = Symbol::PlaceholderKind; + sym->isUsedInRegularObj = false; } else if (auto *sym1 = dyn_cast(sym)) { if (sym2->versionId > VER_NDX_GLOBAL ? config->versionDefinitions[sym2->versionId].name == suffix1 + 1 @@ -2118,6 +2119,7 @@ static void redirectSymbols(ArrayRef wrapped) { // defined in the same place. map.try_emplace(sym2, sym); sym2->symbolKind = Symbol::PlaceholderKind; + sym2->isUsedInRegularObj = false; } } } diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 65b943c4a54c..5b7ac6a5e925 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -207,6 +207,8 @@ BitcodeCompiler::BitcodeCompiler() { if (bitcodeFiles.empty()) return; for (Symbol *sym : symtab->symbols()) { + if (sym->isPlaceholder()) + continue; StringRef s = sym->getName(); for (StringRef prefix : {"__start_", "__stop_"}) if (s.startswith(prefix)) diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h index 84d93a3dc786..fd9fc0735fd5 100644 --- a/lld/ELF/SymbolTable.h +++ b/lld/ELF/SymbolTable.h @@ -32,17 +32,8 @@ namespace elf { // add*() functions, which are called by input files as they are parsed. There // is one add* function per symbol type. class SymbolTable { - struct FilterOutPlaceholder { - bool operator()(Symbol *S) const { return !S->isPlaceholder(); } - }; - using iterator = - llvm::filter_iterator::const_iterator, - FilterOutPlaceholder>; - public: - llvm::iterator_range symbols() const { - return llvm::make_filter_range(symVector, FilterOutPlaceholder()); - } + ArrayRef symbols() const { return symVector; } void wrap(Symbol *sym, Symbol *real, Symbol *wrap); diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index 9d8ff8aa4c19..acb0dd27d0ab 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -355,7 +355,7 @@ void elf::maybeWarnUnorderableSymbol(const Symbol *sym) { // Returns true if a symbol can be replaced at load-time by a symbol // with the same name defined in other ELF executable or DSO. bool elf::computeIsPreemptible(const Symbol &sym) { - assert(!sym.isLocal()); + assert(!sym.isLocal() || sym.isPlaceholder()); // Only symbols with default visibility that appear in dynsym can be // preempted. Symbols with protected visibility cannot be preempted. From 64038ef8c3fedf28d437f59405c62e125d405600 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 26 Dec 2021 20:12:55 -0800 Subject: [PATCH 055/992] [ELF] ScriptParser: change std::vector to SmallVector --- lld/ELF/Config.h | 4 ++-- lld/ELF/ScriptParser.cpp | 46 ++++++++++++++++++++-------------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index b3d5219ff57b..47bbed125cb1 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -87,8 +87,8 @@ struct SymbolVersion { struct VersionDefinition { llvm::StringRef name; uint16_t id; - std::vector nonLocalPatterns; - std::vector localPatterns; + SmallVector nonLocalPatterns; + SmallVector localPatterns; }; // This struct contains the global configuration for the linker. diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index 49da94e9c52f..f26b6c41adf2 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -93,12 +93,12 @@ class ScriptParser final : ScriptLexer { void readSectionAddressType(OutputSection *cmd); OutputSection *readOverlaySectionDescription(); OutputSection *readOutputSectionDescription(StringRef outSec); - std::vector readOverlay(); + SmallVector readOverlay(); SmallVector readOutputSectionPhdrs(); std::pair readInputSectionFlags(); InputSectionDescription *readInputSectionDescription(StringRef tok); StringMatcher readFilePatterns(); - std::vector readInputSectionsList(); + SmallVector readInputSectionsList(); InputSectionDescription *readInputSectionRules(StringRef filePattern, uint64_t withFlags, uint64_t withoutFlags); @@ -125,11 +125,11 @@ class ScriptParser final : ScriptLexer { Expr readParenExpr(); // For parsing version script. - std::vector readVersionExtern(); + SmallVector readVersionExtern(); void readAnonymousDeclaration(); void readVersionDeclaration(StringRef verStr); - std::pair, std::vector> + std::pair, SmallVector> readSymbols(); // True if a script being read is in the --sysroot directory. @@ -181,8 +181,8 @@ static ExprValue bitOr(ExprValue a, ExprValue b) { void ScriptParser::readDynamicList() { expect("{"); - std::vector locals; - std::vector globals; + SmallVector locals; + SmallVector globals; std::tie(locals, globals) = readSymbols(); expect(";"); @@ -519,7 +519,7 @@ void ScriptParser::readSearchDir() { // sections that use the same virtual memory range and normally would trigger // linker's sections sanity check failures. // https://sourceware.org/binutils/docs/ld/Overlay-Description.html#Overlay-Description -std::vector ScriptParser::readOverlay() { +SmallVector ScriptParser::readOverlay() { // VA and LMA expressions are optional, though for simplicity of // implementation we assume they are not. That is what OVERLAY was designed // for first of all: to allow sections with overlapping VAs at different LMAs. @@ -529,7 +529,7 @@ std::vector ScriptParser::readOverlay() { Expr lmaExpr = readParenExpr(); expect("{"); - std::vector v; + SmallVector v; OutputSection *prev = nullptr; while (!errorCount() && !consume("}")) { // VA is the same for all sections. The LMAs are consecutive in memory @@ -566,7 +566,7 @@ void ScriptParser::readOverwriteSections() { void ScriptParser::readSections() { expect("{"); - std::vector v; + SmallVector v; while (!errorCount() && !consume("}")) { StringRef tok = next(); if (tok == "OVERLAY") { @@ -672,8 +672,8 @@ SortSectionPolicy ScriptParser::readSortKind() { // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". // The semantics of that is section .foo in any file, section .bar in // any file but a.o, and section .baz in any file but b.o. -std::vector ScriptParser::readInputSectionsList() { - std::vector ret; +SmallVector ScriptParser::readInputSectionsList() { + SmallVector ret; while (!errorCount() && peek() != ")") { StringMatcher excludeFilePat; if (consume("EXCLUDE_FILE")) { @@ -718,7 +718,7 @@ ScriptParser::readInputSectionRules(StringRef filePattern, uint64_t withFlags, while (!errorCount() && !consume(")")) { SortSectionPolicy outer = readSortKind(); SortSectionPolicy inner = SortSectionPolicy::Default; - std::vector v; + SmallVector v; if (outer != SortSectionPolicy::Default) { expect("("); inner = readSortKind(); @@ -1494,8 +1494,8 @@ unsigned ScriptParser::readPhdrType() { // Reads an anonymous version declaration. void ScriptParser::readAnonymousDeclaration() { - std::vector locals; - std::vector globals; + SmallVector locals; + SmallVector globals; std::tie(locals, globals) = readSymbols(); for (const SymbolVersion &pat : locals) config->versionDefinitions[VER_NDX_LOCAL].localPatterns.push_back(pat); @@ -1509,8 +1509,8 @@ void ScriptParser::readAnonymousDeclaration() { // e.g. "VerStr { global: foo; bar; local: *; };". void ScriptParser::readVersionDeclaration(StringRef verStr) { // Read a symbol list. - std::vector locals; - std::vector globals; + SmallVector locals; + SmallVector globals; std::tie(locals, globals) = readSymbols(); // Create a new version definition and add that to the global symbols. @@ -1535,11 +1535,11 @@ bool elf::hasWildcard(StringRef s) { } // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". -std::pair, std::vector> +std::pair, SmallVector> ScriptParser::readSymbols() { - std::vector locals; - std::vector globals; - std::vector *v = &globals; + SmallVector locals; + SmallVector globals; + SmallVector *v = &globals; while (!errorCount()) { if (consume("}")) @@ -1554,7 +1554,7 @@ ScriptParser::readSymbols() { } if (consume("extern")) { - std::vector ext = readVersionExtern(); + SmallVector ext = readVersionExtern(); v->insert(v->end(), ext.begin(), ext.end()); } else { StringRef tok = next(); @@ -1570,14 +1570,14 @@ ScriptParser::readSymbols() { // // The last semicolon is optional. E.g. this is OK: // "extern "C++" { ns::*; "f(int, double)" };" -std::vector ScriptParser::readVersionExtern() { +SmallVector ScriptParser::readVersionExtern() { StringRef tok = next(); bool isCXX = tok == "\"C++\""; if (!isCXX && tok != "\"C\"") setError("Unknown language"); expect("{"); - std::vector ret; + SmallVector ret; while (!errorCount() && peek() != "}") { StringRef tok = next(); ret.push_back( From 6441536c27cfac46e4a6c5801dc01d5bdcf200c2 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Mon, 27 Dec 2021 13:52:42 +0800 Subject: [PATCH 056/992] [libcxx] [Coroutines] Support noop_coroutine for GCC We didn't support noop_coroutine for GCC in previous conforming patch. So that GCC couldn't use noop_coroutine() defined in . And after this patch, GCC should be able to compile the whole header. Reviewed By: Quuxplusone Differential Revision: https://reviews.llvm.org/D116144 --- libcxx/docs/Status/Cxx2bIssues.csv | 2 +- .../__coroutine/noop_coroutine_handle.h | 30 +++++++++++++++++-- .../noop_coroutine.pass.cpp | 23 +++++++------- 3 files changed, 42 insertions(+), 13 deletions(-) diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv index 902a18228c12..025a0c12d331 100644 --- a/libcxx/docs/Status/Cxx2bIssues.csv +++ b/libcxx/docs/Status/Cxx2bIssues.csv @@ -37,7 +37,7 @@ "`3449 `__","``take_view`` and ``take_while_view``'s ``sentinel`` not comparable with their ``const iterator``","November 2020","","","|ranges|" "`3453 `__","Generic code cannot call ``ranges::advance(i, s)``","November 2020","","","|ranges|" "`3455 `__","Incorrect Postconditions on ``unique_ptr`` move assignment","November 2020","|Nothing To Do|","" -"`3460 `__","Unimplementable ``noop_coroutine_handle`` guarantees","November 2020","","" +"`3460 `__","Unimplementable ``noop_coroutine_handle`` guarantees","November 2020","|Complete|","14.0" "`3461 `__","``convertible_to``'s description mishandles cv-qualified ``void``","November 2020","","" "`3465 `__","``compare_partial_order_fallback`` requires ``F < E``","November 2020","","","|spaceship|" "`3466 `__","Specify the requirements for ``promise``/``future``/``shared_future`` consistently","November 2020","","" diff --git a/libcxx/include/__coroutine/noop_coroutine_handle.h b/libcxx/include/__coroutine/noop_coroutine_handle.h index 9dbf21aac5e6..a29e202f4e4f 100644 --- a/libcxx/include/__coroutine/noop_coroutine_handle.h +++ b/libcxx/include/__coroutine/noop_coroutine_handle.h @@ -20,7 +20,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_builtin(__builtin_coro_noop) +#if __has_builtin(__builtin_coro_noop) || defined(_LIBCPP_COMPILER_GCC) + // [coroutine.noop] // [coroutine.promise.noop] struct noop_coroutine_promise {}; @@ -64,20 +65,45 @@ struct _LIBCPP_TEMPLATE_VIS coroutine_handle { _LIBCPP_HIDE_FROM_ABI friend coroutine_handle noop_coroutine() noexcept; +#if __has_builtin(__builtin_coro_noop) _LIBCPP_HIDE_FROM_ABI coroutine_handle() noexcept { this->__handle_ = __builtin_coro_noop(); } void* __handle_ = nullptr; + +#elif defined(_LIBCPP_COMPILER_GCC) + // GCC doesn't implement __builtin_coro_noop(). + // Construct the coroutine frame manually instead. + struct __noop_coroutine_frame_ty_ { + static void __dummy_resume_destroy_func() { } + + void (*__resume_)() = __dummy_resume_destroy_func; + void (*__destroy_)() = __dummy_resume_destroy_func; + struct noop_coroutine_promise __promise_; + }; + + static __noop_coroutine_frame_ty_ __noop_coroutine_frame_; + + void* __handle_ = &__noop_coroutine_frame_; + + _LIBCPP_HIDE_FROM_ABI coroutine_handle() noexcept = default; + +#endif // __has_builtin(__builtin_coro_noop) }; using noop_coroutine_handle = coroutine_handle; +#if defined(_LIBCPP_COMPILER_GCC) +inline noop_coroutine_handle::__noop_coroutine_frame_ty_ + noop_coroutine_handle::__noop_coroutine_frame_{}; +#endif + // [coroutine.noop.coroutine] inline _LIBCPP_HIDE_FROM_ABI noop_coroutine_handle noop_coroutine() noexcept { return noop_coroutine_handle(); } -#endif // __has_builtin(__builtin_coro_noop) +#endif // __has_builtin(__builtin_coro_noop) || defined(_LIBCPP_COMPILER_GCC) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/test/std/language.support/support.coroutines/coroutine.handle/coroutine.handle.noop/noop_coroutine.pass.cpp b/libcxx/test/std/language.support/support.coroutines/coroutine.handle/coroutine.handle.noop/noop_coroutine.pass.cpp index 36a567983b28..215f44953c5a 100644 --- a/libcxx/test/std/language.support/support.coroutines/coroutine.handle/coroutine.handle.noop/noop_coroutine.pass.cpp +++ b/libcxx/test/std/language.support/support.coroutines/coroutine.handle/coroutine.handle.noop/noop_coroutine.pass.cpp @@ -21,8 +21,6 @@ #include "test_macros.h" -#if __has_builtin(__builtin_coro_noop) - static_assert(std::is_same, std::noop_coroutine_handle>::value, ""); static_assert(std::is_same::value, ""); @@ -57,20 +55,25 @@ int main(int, char**) h.resume(); h.destroy(); h(); - static_assert(h.done() == false, ""); static_assert(h, ""); + static_assert(h.done() == false, ""); + + // [coroutine.handle.noop.resumption]p2 + // Remarks: If noop_­coroutine_­handle is converted to + // coroutine_­handle<>, calls to operator(), resume and + // destroy on that handle will also have no observable + // effects. + base.resume(); + base.destroy(); + base(); + assert(base); + assert(base.done() == false); h.promise(); assert(h.address() == base.address()); - assert(h==base); + assert(h == base); assert(h.address() != nullptr); assert(std::coroutine_handle<>::from_address(h.address()) == base); return 0; } - -#else - -int main(int, char**) { return 0; } - -#endif // __has_builtin(__builtin_coro_noop) From 5d47e7d768951a7616ee19f12ef69cd50e02d230 Mon Sep 17 00:00:00 2001 From: Hsiangkai Wang Date: Thu, 16 Dec 2021 18:12:38 +0800 Subject: [PATCH 057/992] [RISCV] Convert whole register copies as the source defined explicitly. The implicit defines may come from a partial define in an instruction. It does not mean the defining instruction and the COPY instruction have the same vl and vtype. When the source comes from the implicit defines, do not convert the whole register copies to vmv.v.v. Differential Revision: https://reviews.llvm.org/D115866 --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 5 +++-- llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir | 16 ++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 2e2e00886d57..dee1ce635c73 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -201,8 +201,9 @@ static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI, if (MBBI->modifiesRegister(RISCV::VL)) return false; - // Go through all defined operands, including implicit defines. - for (const MachineOperand &MO : MBBI->operands()) { + // Only converting whole register copies to vmv.v.v when the defining + // value appears in the explicit operands. + for (const MachineOperand &MO : MBBI->explicit_operands()) { if (!MO.isReg() || !MO.isDef()) continue; if (!FoundDef && TRI->isSubRegisterEq(MO.getReg(), SrcReg)) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir b/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir index 19eeea2aa472..72460189b085 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir @@ -313,14 +313,14 @@ body: | ; CHECK-NEXT: $v8_v9_v10_v11_v12_v13_v14_v15 = PseudoVLSEG8E32_V_M1 killed $x12, $noreg, 5, implicit $vl, implicit $vtype ; CHECK-NEXT: $x0 = PseudoVSETIVLI 10, 80, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: $v15 = PseudoVLE32_V_M1 killed $x16, $noreg, 5, implicit $vl, implicit $vtype, implicit killed $v8_v9_v10_v11_v12_v13_v14_v15, implicit-def $v8_v9_v10_v11_v12_v13_v14_v15 - ; CHECK-NEXT: $v24 = PseudoVMV_V_V_M1 killed $v8, $noreg, 5, implicit $vl, implicit $vtype - ; CHECK-NEXT: $v25 = PseudoVMV_V_V_M1 killed $v9, $noreg, 5, implicit $vl, implicit $vtype - ; CHECK-NEXT: $v26 = PseudoVMV_V_V_M1 killed $v10, $noreg, 5, implicit $vl, implicit $vtype - ; CHECK-NEXT: $v27 = PseudoVMV_V_V_M1 killed $v11, $noreg, 5, implicit $vl, implicit $vtype - ; CHECK-NEXT: $v28 = PseudoVMV_V_V_M1 killed $v12, $noreg, 5, implicit $vl, implicit $vtype - ; CHECK-NEXT: $v29 = PseudoVMV_V_V_M1 killed $v13, $noreg, 5, implicit $vl, implicit $vtype - ; CHECK-NEXT: $v30 = PseudoVMV_V_V_M1 killed $v14, $noreg, 5, implicit $vl, implicit $vtype - ; CHECK-NEXT: $v31 = PseudoVMV_V_V_M1 killed $v15, $noreg, 5, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v24 = PseudoVMV1R_V killed $v8 + ; CHECK-NEXT: $v25 = PseudoVMV1R_V killed $v9 + ; CHECK-NEXT: $v26 = PseudoVMV1R_V killed $v10 + ; CHECK-NEXT: $v27 = PseudoVMV1R_V killed $v11 + ; CHECK-NEXT: $v28 = PseudoVMV1R_V killed $v12 + ; CHECK-NEXT: $v29 = PseudoVMV1R_V killed $v13 + ; CHECK-NEXT: $v30 = PseudoVMV1R_V killed $v14 + ; CHECK-NEXT: $v31 = PseudoVMV1R_V killed $v15 $x0 = PseudoVSETVLI $x14, 80, implicit-def $vl, implicit-def $vtype $v8_v9_v10_v11_v12_v13_v14_v15 = PseudoVLSEG8E32_V_M1 killed $x12, $noreg, 5, implicit $vl, implicit $vtype $x0 = PseudoVSETIVLI 10, 80, implicit-def $vl, implicit-def $vtype From ec501f15a8b8ace2b283732740d6d65d40d82e09 Mon Sep 17 00:00:00 2001 From: Shao-Ce SUN Date: Mon, 27 Dec 2021 14:15:30 +0800 Subject: [PATCH 058/992] [clang][CodeGen] Remove the signed version of createExpression Fix a TODO. Remove the callers of this signed version and delete. Reviewed By: CodaFi Differential Revision: https://reviews.llvm.org/D116014 --- clang/lib/CodeGen/CGDebugInfo.cpp | 12 ++++++------ clang/lib/CodeGen/CGDebugInfo.h | 2 +- llvm/bindings/ocaml/debuginfo/debuginfo_ocaml.c | 2 +- llvm/include/llvm-c/DebugInfo.h | 4 ++-- llvm/include/llvm/IR/DIBuilder.h | 1 - llvm/lib/IR/DIBuilder.cpp | 6 ------ llvm/lib/IR/DebugInfo.cpp | 8 ++++---- 7 files changed, 14 insertions(+), 21 deletions(-) diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 6e189a61dd20..b976dcb3058e 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -722,7 +722,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { auto *LowerBound = llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned( llvm::Type::getInt64Ty(CGM.getLLVMContext()), 0)); - SmallVector Expr( + SmallVector Expr( {llvm::dwarf::DW_OP_constu, NumElemsPerVG, llvm::dwarf::DW_OP_bregx, /* AArch64::VG */ 46, 0, llvm::dwarf::DW_OP_mul, llvm::dwarf::DW_OP_constu, 1, llvm::dwarf::DW_OP_minus}); @@ -768,7 +768,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { } // Element count = (VLENB / SEW) x LMUL - SmallVector Expr( + SmallVector Expr( // The DW_OP_bregx operation has two operands: a register which is // specified by an unsigned LEB128 number, followed by a signed LEB128 // offset. @@ -4325,7 +4325,7 @@ void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) { } void CGDebugInfo::AppendAddressSpaceXDeref( - unsigned AddressSpace, SmallVectorImpl &Expr) const { + unsigned AddressSpace, SmallVectorImpl &Expr) const { Optional DWARFAddressSpace = CGM.getTarget().getDWARFAddressSpace(AddressSpace); if (!DWARFAddressSpace) @@ -4494,7 +4494,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, Line = getLineNumber(VD->getLocation()); Column = getColumnNumber(VD->getLocation()); } - SmallVector Expr; + SmallVector Expr; llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; if (VD->isImplicit()) Flags |= llvm::DINode::FlagArtificial; @@ -4720,7 +4720,7 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( target.getStructLayout(blockInfo.StructureType) ->getElementOffset(blockInfo.getCapture(VD).getIndex())); - SmallVector addr; + SmallVector addr; addr.push_back(llvm::dwarf::DW_OP_deref); addr.push_back(llvm::dwarf::DW_OP_plus_uconst); addr.push_back(offset.getQuantity()); @@ -5191,7 +5191,7 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, } else { auto Align = getDeclAlignIfRequired(D, CGM.getContext()); - SmallVector Expr; + SmallVector Expr; unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(D->getType()); if (CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) { diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index 14ff0eeabd21..d782bd97f590 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -363,7 +363,7 @@ class CGDebugInfo { /// Extended dereferencing mechanism is has the following format: /// DW_OP_constu DW_OP_swap DW_OP_xderef void AppendAddressSpaceXDeref(unsigned AddressSpace, - SmallVectorImpl &Expr) const; + SmallVectorImpl &Expr) const; /// A helper function to collect debug info for the default elements of a /// block. diff --git a/llvm/bindings/ocaml/debuginfo/debuginfo_ocaml.c b/llvm/bindings/ocaml/debuginfo/debuginfo_ocaml.c index 794fa6b06ab6..81f4748c5518 100644 --- a/llvm/bindings/ocaml/debuginfo/debuginfo_ocaml.c +++ b/llvm/bindings/ocaml/debuginfo/debuginfo_ocaml.c @@ -865,7 +865,7 @@ value llvm_instr_set_debug_loc(LLVMValueRef Inst, LLVMMetadataRef Loc) { LLVMMetadataRef llvm_dibuild_create_constant_value_expression(value Builder, value Value) { return LLVMDIBuilderCreateConstantValueExpression(DIBuilder_val(Builder), - (int64_t)Int_val(Value)); + (uint64_t)Int_val(Value)); } LLVMMetadataRef llvm_dibuild_create_global_variable_expression_native( diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h index d7fb898b60d2..a515533f38e2 100644 --- a/llvm/include/llvm-c/DebugInfo.h +++ b/llvm/include/llvm-c/DebugInfo.h @@ -1102,7 +1102,7 @@ LLVMMetadataRef LLVMDIBuilderGetOrCreateArray(LLVMDIBuilderRef Builder, * \param Length Length of the address operation array. */ LLVMMetadataRef LLVMDIBuilderCreateExpression(LLVMDIBuilderRef Builder, - int64_t *Addr, size_t Length); + uint64_t *Addr, size_t Length); /** * Create a new descriptor for the specified variable that does not have an @@ -1112,7 +1112,7 @@ LLVMMetadataRef LLVMDIBuilderCreateExpression(LLVMDIBuilderRef Builder, */ LLVMMetadataRef LLVMDIBuilderCreateConstantValueExpression(LLVMDIBuilderRef Builder, - int64_t Value); + uint64_t Value); /** * Create a new descriptor for the specified variable. diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h index 61c6dd885980..d8861c61fcb2 100644 --- a/llvm/include/llvm/IR/DIBuilder.h +++ b/llvm/include/llvm/IR/DIBuilder.h @@ -698,7 +698,6 @@ namespace llvm { /// variable which has a complex address expression for its address. /// \param Addr An array of complex address operations. DIExpression *createExpression(ArrayRef Addr = None); - DIExpression *createExpression(ArrayRef Addr); /// Create an expression for a variable that does not have an address, but /// does have a constant value. diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp index 35af22034a12..16429e08382f 100644 --- a/llvm/lib/IR/DIBuilder.cpp +++ b/llvm/lib/IR/DIBuilder.cpp @@ -821,12 +821,6 @@ DIExpression *DIBuilder::createExpression(ArrayRef Addr) { return DIExpression::get(VMContext, Addr); } -DIExpression *DIBuilder::createExpression(ArrayRef Signed) { - // TODO: Remove the callers of this signed version and delete. - SmallVector Addr(Signed.begin(), Signed.end()); - return createExpression(Addr); -} - template static DISubprogram *getSubprogram(bool IsDistinct, Ts &&...Args) { if (IsDistinct) diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 7c69fbf7085d..98f25b035157 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -1436,14 +1436,14 @@ LLVMDIBuilderCreateSubroutineType(LLVMDIBuilderRef Builder, } LLVMMetadataRef LLVMDIBuilderCreateExpression(LLVMDIBuilderRef Builder, - int64_t *Addr, size_t Length) { - return wrap(unwrap(Builder)->createExpression(ArrayRef(Addr, - Length))); + uint64_t *Addr, size_t Length) { + return wrap( + unwrap(Builder)->createExpression(ArrayRef(Addr, Length))); } LLVMMetadataRef LLVMDIBuilderCreateConstantValueExpression(LLVMDIBuilderRef Builder, - int64_t Value) { + uint64_t Value) { return wrap(unwrap(Builder)->createConstantValueExpression(Value)); } From e90c8c042214c1cfacd3ffc38ad2927390cb6fe1 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 26 Dec 2021 22:17:30 -0800 Subject: [PATCH 059/992] [ELF] Optimize basic block section bytesDropped/jumpInstrMods and make them more space efficient. This decreases sizeof(InputSection) from 176 to 160, and decreases peak memory usage by 0.3% when linking Chrome. --- lld/ELF/Arch/X86_64.cpp | 3 ++- lld/ELF/InputSection.cpp | 9 +++------ lld/ELF/InputSection.h | 13 ++++++++----- lld/ELF/Relocations.h | 2 +- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index 614b5ed59218..08591d8e5f06 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -304,7 +304,8 @@ bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file, JmpInsnOpcode jInvert = invertJmpOpcode(jmpOpcodeB); if (jInvert == J_UNKNOWN) return false; - is.jumpInstrMods.push_back({jInvert, (rB.offset - 1), 4}); + is.jumpInstrMod = make(); + *is.jumpInstrMod = {rB.offset - 1, jInvert, 4}; // Move R's values to rB except the offset. rB = {r.expr, r.type, rB.offset, r.addend, r.sym}; // Cancel R diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 9a2b52cf568d..844388330d6f 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -1106,12 +1106,9 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { // a jmp insn must be modified to shrink the jmp insn or to flip the jmp // insn. This is primarily used to relax and optimize jumps created with // basic block sections. - if (isa(this)) { - for (const JumpInstrMod &jumpMod : jumpInstrMods) { - uint64_t offset = jumpMod.offset; - uint8_t *bufLoc = buf + offset; - target.applyJumpInstrMod(bufLoc, jumpMod.original, jumpMod.size); - } + if (jumpInstrMod) { + target.applyJumpInstrMod(buf + jumpInstrMod->offset, jumpInstrMod->original, + jumpInstrMod->size); } } diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index 3bd180be7d43..32c9ed26c1fd 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -130,13 +130,16 @@ class InputSectionBase : public SectionBase { // one or two jump instructions at the end that could be relaxed to a smaller // instruction. The members below help trimming the trailing jump instruction // and shrinking a section. - unsigned bytesDropped = 0; + uint8_t bytesDropped = 0; // Whether the section needs to be padded with a NOP filler due to // deleteFallThruJmpInsn. bool nopFiller = false; - void drop_back(uint64_t num) { bytesDropped += num; } + void drop_back(unsigned num) { + assert(bytesDropped + num < 256); + bytesDropped += num; + } void push_back(uint64_t num) { assert(bytesDropped >= num); @@ -203,7 +206,7 @@ class InputSectionBase : public SectionBase { // block sections are enabled. Basic block sections creates opportunities to // relax jump instructions at basic block boundaries after reordering the // basic blocks. - SmallVector jumpInstrMods; + JumpInstrMod *jumpInstrMod = nullptr; // A function compiled with -fsplit-stack calling a function // compiled without -fsplit-stack needs its prologue adjusted. Find @@ -377,9 +380,9 @@ class InputSection : public InputSectionBase { }; #ifdef _WIN32 -static_assert(sizeof(InputSection) <= 184, "InputSection is too big"); +static_assert(sizeof(InputSection) <= 168, "InputSection is too big"); #else -static_assert(sizeof(InputSection) <= 176, "InputSection is too big"); +static_assert(sizeof(InputSection) <= 160, "InputSection is too big"); #endif inline bool isDebugSection(const InputSectionBase &sec) { diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index c652c0a5f70f..f9909f236d12 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -117,8 +117,8 @@ struct Relocation { // jump instruction opcodes at basic block boundaries and are particularly // useful when basic block sections are enabled. struct JumpInstrMod { - JumpModType original; uint64_t offset; + JumpModType original; unsigned size; }; From 315554e873a583a4b9297e72256f87ff13539993 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 26 Dec 2021 23:02:24 -0800 Subject: [PATCH 060/992] [ELF] Unify sizeof(InputSection) limits for _WIN32 and others Windows sizeof(InputSection) seems to match non-Windows now. --- lld/ELF/InputSection.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index 32c9ed26c1fd..016f2523b4a8 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -379,11 +379,7 @@ class InputSection : public InputSectionBase { template void copyShtGroup(uint8_t *buf); }; -#ifdef _WIN32 -static_assert(sizeof(InputSection) <= 168, "InputSection is too big"); -#else static_assert(sizeof(InputSection) <= 160, "InputSection is too big"); -#endif inline bool isDebugSection(const InputSectionBase &sec) { return (sec.flags & llvm::ELF::SHF_ALLOC) == 0 && From b07292f77a1e82c27fd98105d69f351ef41ac29f Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 26 Dec 2021 23:26:13 -0800 Subject: [PATCH 061/992] [ELF] Serialize deleteFallThruJmpInsn to fix concurrency issue New deleteFallThruJmpInsn calls `make` which cannot be called concurrently. Losing parallelism is unfortunate but we can think of a better approach if parallelism here justifies itself. --- lld/ELF/Writer.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index acc78dd06734..986cca27fdf8 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1774,12 +1774,11 @@ template void Writer::optimizeBasicBlockJumps() { // Delete all fall through jump instructions. Also, check if two // consecutive jump instructions can be flipped so that a fall // through jmp instruction can be deleted. - parallelForEachN(0, sections.size(), [&](size_t i) { + for (size_t i = 0, e = sections.size(); i != e; ++i) { InputSection *next = i + 1 < sections.size() ? sections[i + 1] : nullptr; - InputSection &is = *sections[i]; - result[i] = - target->deleteFallThruJmpInsn(is, is.getFile(), next) ? 1 : 0; - }); + InputSection &sec = *sections[i]; + result[i] = target->deleteFallThruJmpInsn(sec, sec.file, next) ? 1 : 0; + } size_t numDeleted = std::count(result.begin(), result.end(), 1); if (numDeleted > 0) { script->assignAddresses(); From 0a5788ab57464f1ec102c263aef2961175b084d0 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 26 Dec 2021 23:49:38 -0800 Subject: [PATCH 062/992] [Target] Use range-based for loops (NFC) --- .../Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 3 +-- .../ARM/MCTargetDesc/ARMELFStreamer.cpp | 6 ++---- .../ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 4 ++-- llvm/lib/Target/ARM/Thumb2SizeReduction.cpp | 5 ++--- llvm/lib/Target/Hexagon/BitTracker.cpp | 4 ++-- .../lib/Target/Hexagon/HexagonBitSimplify.cpp | 7 +++---- .../Target/Hexagon/HexagonCFGOptimizer.cpp | 19 ++++++++---------- .../Hexagon/HexagonConstPropagation.cpp | 8 ++++---- .../lib/Target/Hexagon/HexagonEarlyIfConv.cpp | 20 +++++++++---------- .../Target/Hexagon/HexagonExpandCondsets.cpp | 11 +++++----- .../Target/Hexagon/HexagonFrameLowering.cpp | 4 ++-- 11 files changed, 41 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 851acea94022..fd79116e8009 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -1123,9 +1123,8 @@ uint32_t ARMAsmBackendDarwin::generateCompactUnwindEncoding( DenseMap RegOffsets; int FloatRegCount = 0; // Process each .cfi directive and build up compact unwind info. - for (size_t i = 0, e = Instrs.size(); i != e; ++i) { + for (const MCCFIInstruction &Inst : Instrs) { unsigned Reg; - const MCCFIInstruction &Inst = Instrs[i]; switch (Inst.getOperation()) { case MCCFIInstruction::OpDefCfa: // DW_CFA_def_cfa CFARegisterOffset = Inst.getOffset(); diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index e060e59e3759..0de5bf5d2d49 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -264,10 +264,8 @@ void ARMTargetAsmStreamer::emitInst(uint32_t Inst, char Suffix) { void ARMTargetAsmStreamer::emitUnwindRaw(int64_t Offset, const SmallVectorImpl &Opcodes) { OS << "\t.unwind_raw " << Offset; - for (SmallVectorImpl::const_iterator OCI = Opcodes.begin(), - OCE = Opcodes.end(); - OCI != OCE; ++OCI) - OS << ", 0x" << Twine::utohexstr(*OCI); + for (uint8_t Opcode : Opcodes) + OS << ", 0x" << Twine::utohexstr(Opcode); OS << '\n'; } diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 05e5a473a3c6..17ca1866cf95 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -338,8 +338,8 @@ void ARM_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) { {codeview::RegisterId::ARM_NQ14, ARM::Q14}, {codeview::RegisterId::ARM_NQ15, ARM::Q15}, }; - for (unsigned I = 0; I < array_lengthof(RegMap); ++I) - MRI->mapLLVMRegToCVReg(RegMap[I].Reg, static_cast(RegMap[I].CVReg)); + for (const auto &I : RegMap) + MRI->mapLLVMRegToCVReg(I.Reg, static_cast(I.CVReg)); } static MCRegisterInfo *createARMMCRegisterInfo(const Triple &Triple) { diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index 1164b6ebbac3..1cc5422523f1 100644 --- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -1147,9 +1147,8 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { // predecessors. ReversePostOrderTraversal RPOT(&MF); bool Modified = false; - for (ReversePostOrderTraversal::rpo_iterator - I = RPOT.begin(), E = RPOT.end(); I != E; ++I) - Modified |= ReduceMBB(**I); + for (MachineBasicBlock *MBB : RPOT) + Modified |= ReduceMBB(*MBB); return Modified; } diff --git a/llvm/lib/Target/Hexagon/BitTracker.cpp b/llvm/lib/Target/Hexagon/BitTracker.cpp index 685bafd785df..17adf32750db 100644 --- a/llvm/lib/Target/Hexagon/BitTracker.cpp +++ b/llvm/lib/Target/Hexagon/BitTracker.cpp @@ -940,8 +940,8 @@ void BT::visitBranchesFrom(const MachineInstr &BI) { // If evaluated successfully add the targets to the cumulative list. if (Trace) { dbgs() << " adding targets:"; - for (unsigned i = 0, n = BTs.size(); i < n; ++i) - dbgs() << " " << printMBBReference(*BTs[i]); + for (const MachineBasicBlock *BT : BTs) + dbgs() << " " << printMBBReference(*BT); if (FallsThrough) dbgs() << "\n falls through\n"; else diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index 428d25da6dbc..b2a842233bb8 100644 --- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -3260,13 +3260,12 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) { dbgs() << "Group[" << i << "] inp: " << printReg(G.Inp.Reg, HRI, G.Inp.Sub) << " out: " << printReg(G.Out.Reg, HRI, G.Out.Sub) << "\n"; - for (unsigned j = 0, m = G.Ins.size(); j < m; ++j) - dbgs() << " " << *G.Ins[j]; + for (const MachineInstr *MI : G.Ins) + dbgs() << " " << MI; } }); - for (unsigned i = 0, n = Groups.size(); i < n; ++i) { - InstrGroup &G = Groups[i]; + for (InstrGroup &G : Groups) { if (!isShuffleOf(G.Out.Reg, G.Inp.Reg)) continue; auto LoopInpEq = [G] (const PhiInfo &P) -> bool { diff --git a/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp index b456cf139c55..a31ad45f4bb0 100644 --- a/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -118,13 +118,10 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { return false; // Loop over all of the basic blocks. - for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); - MBBb != MBBe; ++MBBb) { - MachineBasicBlock *MBB = &*MBBb; - + for (MachineBasicBlock &MBB : Fn) { // Traverse the basic block. - MachineBasicBlock::iterator MII = MBB->getFirstTerminator(); - if (MII != MBB->end()) { + MachineBasicBlock::iterator MII = MBB.getFirstTerminator(); + if (MII != MBB.end()) { MachineInstr &MI = *MII; int Opc = MI.getOpcode(); if (IsConditionalBranch(Opc)) { @@ -155,17 +152,17 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { // Remove BB2 // BB3: ... // BB4: ... - unsigned NumSuccs = MBB->succ_size(); - MachineBasicBlock::succ_iterator SI = MBB->succ_begin(); + unsigned NumSuccs = MBB.succ_size(); + MachineBasicBlock::succ_iterator SI = MBB.succ_begin(); MachineBasicBlock* FirstSucc = *SI; MachineBasicBlock* SecondSucc = *(++SI); MachineBasicBlock* LayoutSucc = nullptr; MachineBasicBlock* JumpAroundTarget = nullptr; - if (MBB->isLayoutSuccessor(FirstSucc)) { + if (MBB.isLayoutSuccessor(FirstSucc)) { LayoutSucc = FirstSucc; JumpAroundTarget = SecondSucc; - } else if (MBB->isLayoutSuccessor(SecondSucc)) { + } else if (MBB.isLayoutSuccessor(SecondSucc)) { LayoutSucc = SecondSucc; JumpAroundTarget = FirstSucc; } else { @@ -201,7 +198,7 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { if (case1 || case2) { InvertAndChangeJumpTarget(MI, UncondTarget); - MBB->replaceSuccessor(JumpAroundTarget, UncondTarget); + MBB.replaceSuccessor(JumpAroundTarget, UncondTarget); // Remove the unconditional branch in LayoutSucc. LayoutSucc->erase(LayoutSucc->begin()); diff --git a/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp index daf311fc49d4..105bf2811a20 100644 --- a/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp +++ b/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp @@ -125,8 +125,8 @@ namespace { }; LatticeCell() : Kind(Top), Size(0), IsSpecial(false) { - for (unsigned i = 0; i < MaxCellSize; ++i) - Values[i] = nullptr; + for (const Constant *&Value : Values) + Value = nullptr; } bool meet(const LatticeCell &L); @@ -1029,8 +1029,8 @@ bool MachineConstPropagator::rewrite(MachineFunction &MF) { ToRemove.push_back(const_cast(SB)); Targets.remove(SB); } - for (unsigned i = 0, n = ToRemove.size(); i < n; ++i) - removeCFGEdge(B, ToRemove[i]); + for (MachineBasicBlock *MBB : ToRemove) + removeCFGEdge(B, MBB); // If there are any blocks left in the computed targets, it means that // we think that the block could go somewhere, but the CFG does not. // This could legitimately happen in blocks that have non-returning diff --git a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp index 9a3feb5b6af1..2207925ceeba 100644 --- a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp +++ b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -612,8 +612,8 @@ bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B, // Simply keep a list of children of B, and traverse that list. using DTNodeVectType = SmallVector; DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N)); - for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) { - MachineBasicBlock *SB = (*I)->getBlock(); + for (auto &I : Cn) { + MachineBasicBlock *SB = I->getBlock(); if (!Deleted.count(SB)) Changed |= visitBlock(SB, L); } @@ -648,8 +648,8 @@ bool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) { << "\n"); bool Changed = false; if (L) { - for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) - Changed |= visitLoop(*I); + for (MachineLoop *I : *L) + Changed |= visitLoop(I); } MachineBasicBlock *EntryB = GraphTraits::getEntryNode(MFN); @@ -964,8 +964,8 @@ void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) { using DTNodeVectType = SmallVector; DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N)); - for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) { - MachineBasicBlock *SB = (*I)->getBlock(); + for (auto &I : Cn) { + MachineBasicBlock *SB = I->getBlock(); MDT->changeImmediateDominator(SB, IDB); } } @@ -973,8 +973,8 @@ void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) { while (!B->succ_empty()) B->removeSuccessor(B->succ_begin()); - for (auto I = B->pred_begin(), E = B->pred_end(); I != E; ++I) - (*I)->removeSuccessor(B, true); + for (MachineBasicBlock *Pred : B->predecessors()) + Pred->removeSuccessor(B, true); Deleted.insert(B); MDT->eraseNode(B); @@ -1064,8 +1064,8 @@ bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) { Deleted.clear(); bool Changed = false; - for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I) - Changed |= visitLoop(*I); + for (MachineLoop *L : *MLI) + Changed |= visitLoop(L); Changed |= visitLoop(nullptr); return Changed; diff --git a/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp index c444cf557c21..2693940bb1e9 100644 --- a/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -1106,8 +1106,7 @@ bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) { } bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) { - for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { - LiveRange::Segment &LR = *I; + for (LiveRange::Segment &LR : LI) { // Range must start at a register... if (!LR.start.isRegister()) return false; @@ -1160,16 +1159,16 @@ bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) { // Move all live segments from L2 to L1. using ValueInfoMap = DenseMap; ValueInfoMap VM; - for (LiveInterval::iterator I = L2.begin(), E = L2.end(); I != E; ++I) { - VNInfo *NewVN, *OldVN = I->valno; + for (LiveRange::Segment &I : L2) { + VNInfo *NewVN, *OldVN = I.valno; ValueInfoMap::iterator F = VM.find(OldVN); if (F == VM.end()) { - NewVN = L1.getNextValue(I->valno->def, LIS->getVNInfoAllocator()); + NewVN = L1.getNextValue(I.valno->def, LIS->getVNInfoAllocator()); VM.insert(std::make_pair(OldVN, NewVN)); } else { NewVN = F->second; } - L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN)); + L1.addSegment(LiveRange::Segment(I.start, I.end, NewVN)); } while (!L2.empty()) L2.removeSegment(*L2.begin()); diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index 12ceac545e9d..a9520dcde88e 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -416,8 +416,8 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF, UnsignedMap RPO; RPOTType RPOT(&MF); unsigned RPON = 0; - for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) - RPO[(*I)->getNumber()] = RPON++; + for (auto &I : RPOT) + RPO[I->getNumber()] = RPON++; // Don't process functions that have loops, at least for now. Placement // of prolog and epilog must take loop structure into account. For simpli- From 66c550f8de675f1d9138855c2d83faf2dbd3afbf Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 27 Dec 2021 00:03:53 -0800 Subject: [PATCH 063/992] [ELF] Delete unused LazyObjKind --- lld/ELF/InputFiles.h | 1 - 1 file changed, 1 deletion(-) diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index d622390fcade..7bf5423bed44 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -66,7 +66,6 @@ class InputFile { enum Kind : uint8_t { ObjKind, SharedKind, - LazyObjKind, ArchiveKind, BitcodeKind, BinaryKind, From abc388ed3cf0ef7e617ebe243d3b0b32d29e69a5 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 27 Dec 2021 00:31:54 -0800 Subject: [PATCH 064/992] [ELF] Move excludeLibs/redirectSymbols/replaceCommonSymbols adjacent Make post-thinlto-index symbol resolution passes closer. --- lld/ELF/Driver.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 939a40ef5886..66bf7d0f3848 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -2369,12 +2369,6 @@ template void LinkerDriver::link(opt::InputArgList &args) { // except a few linker-synthesized ones will be added to the symbol table. compileBitcodeFiles(); - // Handle --exclude-libs again because lto.tmp may reference additional - // libcalls symbols defined in an excluded archive. This may override - // versionId set by scanVersionScript(). - if (args.hasArg(OPT_exclude_libs)) - excludeLibs(args); - // Symbol resolution finished. Report backward reference problems. reportBackrefs(); if (errorCount()) @@ -2392,9 +2386,18 @@ template void LinkerDriver::link(opt::InputArgList &args) { !config->thinLTOModulesToCompile.empty()) return; + // Handle --exclude-libs again because lto.tmp may reference additional + // libcalls symbols defined in an excluded archive. This may override + // versionId set by scanVersionScript(). + if (args.hasArg(OPT_exclude_libs)) + excludeLibs(args); + // Apply symbol renames for --wrap and combine foo@v1 and foo@@v1. redirectSymbols(wrapped); + // Replace common symbols with regular symbols. + replaceCommonSymbols(); + { llvm::TimeTraceScope timeScope("Aggregate sections"); // Now that we have a complete list of input files. @@ -2479,9 +2482,6 @@ template void LinkerDriver::link(opt::InputArgList &args) { if (!config->relocatable) inputSections.push_back(createCommentSection()); - // Replace common symbols with regular symbols. - replaceCommonSymbols(); - // Split SHF_MERGE and .eh_frame sections into pieces in preparation for garbage collection. splitSections(); From 7c3cf4c2c0689be1a08b8a1326703ec5770de471 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 15 Dec 2021 17:04:01 +0100 Subject: [PATCH 065/992] [Inline][X86] Avoid inlining if it would create ABI-incompatible calls (PR52660) X86 allows inlining functions if the callee target features are a subset of the caller target features. This ensures that we don't inline something into a caller that does not support it. However, this does not account for possible call ABI mismatches as a result of inlining. If a call passing a vector argument was originally in a -avx function, calling another -avx function, the vector is passed in xmm. If we now inline it into a +avx function, then it will be passed in ymm, even though the callee expects it in xmm. Fix this by scanning over all calls in the function and checking whether ABI incompatibility is possible. Calls that only pass scalar types are excluded, as I believe those always use the same ABI independent of target features. Fixes https://github.com/llvm/llvm-project/issues/52660. Differential Revision: https://reviews.llvm.org/D116036 --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 42 ++++++++++++++++++- .../Inline/X86/call-abi-compatibility.ll | 14 +++++-- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index d8cd7311a0d5..c459445fc0ca 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -43,6 +43,7 @@ #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/CostTable.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" @@ -5187,9 +5188,48 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller, const FeatureBitset &CalleeBits = TM.getSubtargetImpl(*Callee)->getFeatureBits(); + // Check whether features are the same (apart from the ignore list). FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList; FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList; - return (RealCallerBits & RealCalleeBits) == RealCalleeBits; + if (RealCallerBits == RealCalleeBits) + return true; + + // If the features are a subset, we need to additionally check for calls + // that may become ABI-incompatible as a result of inlining. + if ((RealCallerBits & RealCalleeBits) != RealCalleeBits) + return false; + + for (const Instruction &I : instructions(Callee)) { + if (const auto *CB = dyn_cast(&I)) { + SmallVector Types; + for (Value *Arg : CB->args()) + Types.push_back(Arg->getType()); + if (!CB->getType()->isVoidTy()) + Types.push_back(CB->getType()); + + // Simple types are always ABI compatible. + auto IsSimpleTy = [](Type *Ty) { + return !Ty->isVectorTy() && !Ty->isAggregateType(); + }; + if (all_of(Types, IsSimpleTy)) + continue; + + if (Function *NestedCallee = CB->getCalledFunction()) { + // Assume that intrinsics are always ABI compatible. + if (NestedCallee->isIntrinsic()) + continue; + + // Do a precise compatibility check. + if (!areTypesABICompatible(Caller, NestedCallee, Types)) + return false; + } else { + // We don't know the target features of the callee, + // assume it is incompatible. + return false; + } + } + } + return true; } bool X86TTIImpl::areTypesABICompatible(const Function *Caller, diff --git a/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll b/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll index 0254de9df411..d5a3aa1aa9e3 100644 --- a/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll +++ b/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll @@ -5,11 +5,10 @@ ; This call should not get inlined, because it would make the callee_not_avx ; call ABI incompatible. -; TODO: Currently gets inlined. define void @caller_avx() "target-features"="+avx" { ; CHECK-LABEL: define {{[^@]+}}@caller_avx ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @callee_not_avx(<4 x i64> ) +; CHECK-NEXT: call void @caller_not_avx() ; CHECK-NEXT: ret void ; call void @caller_not_avx() @@ -17,6 +16,10 @@ define void @caller_avx() "target-features"="+avx" { } define internal void @caller_not_avx() { +; CHECK-LABEL: define {{[^@]+}}@caller_not_avx() { +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @callee_not_avx(<4 x i64> ) +; CHECK-NEXT: ret void +; call i64 @callee_not_avx(<4 x i64> ) ret void } @@ -33,11 +36,10 @@ define i64 @callee_not_avx(<4 x i64> %arg) noinline { ; This call also shouldn't be inlined, as we don't know whether callee_unknown ; is ABI compatible or not. -; TODO: Currently gets inlined. define void @caller_avx2() "target-features"="+avx" { ; CHECK-LABEL: define {{[^@]+}}@caller_avx2 ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @callee_unknown(<4 x i64> ) +; CHECK-NEXT: call void @caller_not_avx2() ; CHECK-NEXT: ret void ; call void @caller_not_avx2() @@ -45,6 +47,10 @@ define void @caller_avx2() "target-features"="+avx" { } define internal void @caller_not_avx2() { +; CHECK-LABEL: define {{[^@]+}}@caller_not_avx2() { +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @callee_unknown(<4 x i64> ) +; CHECK-NEXT: ret void +; call i64 @callee_unknown(<4 x i64> ) ret void } From b5fef6dbfd6b4c84d61155fd2221064bcc1ecdcf Mon Sep 17 00:00:00 2001 From: Justas Janickas Date: Tue, 30 Nov 2021 09:17:09 +0000 Subject: [PATCH 066/992] [OpenCL] Allow optional __generic in __remove_address_space utility --- clang/lib/Headers/opencl-c-base.h | 2 ++ .../remove-address-space.clcpp | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) rename clang/test/{CodeGenOpenCLCXX => SemaOpenCLCXX}/remove-address-space.clcpp (75%) diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h index 9c81ddb5e2a7..7485386c8234 100644 --- a/clang/lib/Headers/opencl-c-base.h +++ b/clang/lib/Headers/opencl-c-base.h @@ -600,9 +600,11 @@ typedef struct { // C++ for OpenCL - __remove_address_space #if defined(__OPENCL_CPP_VERSION__) template struct __remove_address_space { using type = _Tp; }; +#if defined(__opencl_c_generic_address_space) template struct __remove_address_space<__generic _Tp> { using type = _Tp; }; +#endif template struct __remove_address_space<__global _Tp> { using type = _Tp; }; diff --git a/clang/test/CodeGenOpenCLCXX/remove-address-space.clcpp b/clang/test/SemaOpenCLCXX/remove-address-space.clcpp similarity index 75% rename from clang/test/CodeGenOpenCLCXX/remove-address-space.clcpp rename to clang/test/SemaOpenCLCXX/remove-address-space.clcpp index f6f0c3290aa6..e6b2924eab5f 100644 --- a/clang/test/CodeGenOpenCLCXX/remove-address-space.clcpp +++ b/clang/test/SemaOpenCLCXX/remove-address-space.clcpp @@ -1,4 +1,8 @@ -// RUN: %clang_cc1 %s -cl-std=clc++ -fdeclare-opencl-builtins -finclude-default-header +// RUN: %clang_cc1 %s -cl-std=clc++1.0 -triple spir-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -verify +// RUN: %clang_cc1 %s -cl-std=clc++2021 -triple spir-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -verify +// RUN: %clang_cc1 %s -cl-std=clc++2021 -cl-ext=-__opencl_c_generic_address_space,-__opencl_c_pipes -triple spir-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -verify + +// expected-no-diagnostics template struct is_same { @@ -19,8 +23,10 @@ void test_is_same() { void test_remove_address_space() { static_assert(is_same<__remove_address_space::type, int>::value, "type without an address space unexpectedly modified by __remove_address_space"); +#if defined(__opencl_c_generic_address_space) static_assert(is_same<__remove_address_space<__generic int>::type, int>::value, "__generic address space not removed by __remove_address_space"); +#endif static_assert(is_same<__remove_address_space<__global char>::type, char>::value, "__global address space not removed by __remove_address_space"); static_assert(is_same<__remove_address_space<__private ulong>::type, ulong>::value, From 4fe5cfe53e86dbfb4ad4ad05f7c19c0929433e16 Mon Sep 17 00:00:00 2001 From: Arjun P Date: Mon, 27 Dec 2021 14:37:47 +0530 Subject: [PATCH 067/992] [MLIR] Add forgotten directory Support to unittests cmake The Support directory was removed from the unittests cmake when the directory was removed in 204c3b551626a925dfdc3822a6f240bdc8ef5d3a. Subsequent commits added the directory back but seem to have missed adding it back to the cmake. This patch also removes MLIRSupportIndentedStream from the list of linked libraries to avoid an ODR violation (it's already part of MLIRSupport which is also being linked here). Otherwise ASAN complains: ``` ================================================================= ==102592==ERROR: AddressSanitizer: odr-violation (0x7fbdf214eee0): [1] size=120 'vtable for mlir::raw_indented_ostream' /home/arjun/llvm-project/mlir/lib/Support/IndentedOstream.cpp [2] size=120 'vtable for mlir::raw_indented_ostream' /home/arjun/llvm-project/mlir/lib/Support/IndentedOstream.cpp These globals were registered at these points: [1]: #0 0x28a71d in __asan_register_globals (/home/arjun/llvm-project/build/tools/mlir/unittests/Support/MLIRSupportTests+0x28a71d) #1 0x7fbdf214a61b in asan.module_ctor (/home/arjun/llvm-project/build/lib/libMLIRSupportIndentedOstream.so.14git+0x661b) [2]: #0 0x28a71d in __asan_register_globals (/home/arjun/llvm-project/build/tools/mlir/unittests/Support/MLIRSupportTests+0x28a71d) #1 0x7fbdf2061c4b in asan.module_ctor (/home/arjun/llvm-project/build/lib/libMLIRSupport.so.14git+0x11bc4b) ==102592==HINT: if you don't care about these errors you may set ASAN_OPTIONS=detect_odr_violation=0 SUMMARY AddressSanitizer: odr-violation: global 'vtable for mlir::raw_indented_ostream' at /home/arjun/llvm-project/mlir/lib/Support/IndentedOstream.cpp ==102592==ABORTING ``` This patch also fixes a build issue with `DebugAction::classof` under Windows. This commit re-lands this patch, which was previously reverted in 2132906836cf0618e76485c67a60305bf1557ffc due to a buildbot failure that turned out to be because of a flaky test. Reviewed By: jpienaar Differential Revision: https://reviews.llvm.org/D116027 --- mlir/include/mlir/Support/DebugAction.h | 3 +-- mlir/unittests/CMakeLists.txt | 1 + mlir/unittests/Support/CMakeLists.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Support/DebugAction.h b/mlir/include/mlir/Support/DebugAction.h index 227d213b0dda..4dc04476b583 100644 --- a/mlir/include/mlir/Support/DebugAction.h +++ b/mlir/include/mlir/Support/DebugAction.h @@ -205,8 +205,7 @@ template class DebugAction { /// Provide classof to allow casting between handler types. static bool classof(const DebugActionManager::HandlerBase *handler) { - return handler->getHandlerID() == - TypeID::get::Handler>(); + return handler->getHandlerID() == TypeID::get(); } }; diff --git a/mlir/unittests/CMakeLists.txt b/mlir/unittests/CMakeLists.txt index 21506862a302..2798c443cf98 100644 --- a/mlir/unittests/CMakeLists.txt +++ b/mlir/unittests/CMakeLists.txt @@ -11,6 +11,7 @@ add_subdirectory(ExecutionEngine) add_subdirectory(Interfaces) add_subdirectory(IR) add_subdirectory(Pass) +add_subdirectory(Support) add_subdirectory(Rewrite) add_subdirectory(TableGen) add_subdirectory(Transforms) diff --git a/mlir/unittests/Support/CMakeLists.txt b/mlir/unittests/Support/CMakeLists.txt index 6616a793ec12..fd1e66205c07 100644 --- a/mlir/unittests/Support/CMakeLists.txt +++ b/mlir/unittests/Support/CMakeLists.txt @@ -7,4 +7,4 @@ add_mlir_unittest(MLIRSupportTests ) target_link_libraries(MLIRSupportTests - PRIVATE MLIRSupportIndentedOstream MLIRSupport) + PRIVATE MLIRSupport) From 21aa4d5d5ef947d824c50a22d15fb93d7df0b711 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Mon, 27 Dec 2021 17:26:44 +0800 Subject: [PATCH 068/992] [NFC] [Coroutines] Add a test for icmp use of coro.suspend to prevent musttail call converting Add a test to show the false negative optimization oppotunity to not convert a resume call to musttail call. It should could be. --- .../Coroutines/coro-split-musttail4.ll | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 llvm/test/Transforms/Coroutines/coro-split-musttail4.ll diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll new file mode 100644 index 000000000000..4cba73c5e6ff --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll @@ -0,0 +1,58 @@ +; Tests that coro-split will convert a call before coro.suspend to a musttail call +; while the user of the coro.suspend is a icmpinst. +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +define void @fakeresume1(i8*) { +entry: + ret void; +} + +define void @f() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %alloc = call i8* @malloc(i64 16) #3 + %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc) + + %save = call token @llvm.coro.save(i8* null) + call fastcc void @fakeresume1(i8* null) + + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + %switch = icmp ult i8 %suspend, 2 + br i1 %switch, label %cleanup, label %coro.end + +cleanup: + %free.handle = call i8* @llvm.coro.free(token %id, i8* %vFrame) + %.not = icmp eq i8* %free.handle, null + br i1 %.not, label %coro.end, label %coro.free + +coro.free: + call void @delete(i8* nonnull %free.handle) #2 + br label %coro.end + +coro.end: + call i1 @llvm.coro.end(i8* null, i1 false) + ret void +} + +; FIXME: The fakerresume here should be musttail call. +; CHECK-LABEL: @f.resume( +; CHECK-NOT: musttail call fastcc void @fakeresume1( + + +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare i64 @llvm.coro.size.i64() #3 +declare i8* @llvm.coro.begin(token, i8* writeonly) #2 +declare token @llvm.coro.save(i8*) #2 +declare i8* @llvm.coro.frame() #3 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1 +declare i1 @llvm.coro.end(i8*, i1) #2 +declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #1 +declare i8* @malloc(i64) +declare void @delete(i8* nonnull) #2 + +attributes #0 = { "coroutine.presplit"="1" } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nounwind readnone } \ No newline at end of file From 2e630eabd32989d9ef31472d9470dc577a0d77e4 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 27 Dec 2021 11:25:45 +0100 Subject: [PATCH 069/992] [LV] Sink BTC creation to actual use (NFC). Suggested separately in D116123. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 83f9e3f58993..e713925e3b80 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8457,7 +8457,6 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { auto NewInsertionPoint = Builder.getInsertBlock()->getFirstNonPhi(); Builder.setInsertPoint(Builder.getInsertBlock(), NewInsertionPoint); - VPValue *BTC = Plan->getOrCreateBackedgeTakenCount(); bool TailFolded = !CM.isScalarEpilogueAllowed(); if (TailFolded && CM.TTI.emitGetActiveLaneMask()) { @@ -8467,6 +8466,7 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { // happen. BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV}); } else { + VPValue *BTC = Plan->getOrCreateBackedgeTakenCount(); BlockMask = Builder.createNaryOp(VPInstruction::ICmpULE, {IV, BTC}); } return BlockMaskCache[BB] = BlockMask; From 1f07a4a5699b73582461880e716e6692cbe3d6a6 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 24 Dec 2021 14:01:54 +0100 Subject: [PATCH 070/992] [CodeGen] Avoid more pointer element type accesses --- clang/lib/CodeGen/CGClass.cpp | 4 ++-- clang/lib/CodeGen/CGExprAgg.cpp | 9 +++++---- clang/lib/CodeGen/CodeGenFunction.h | 5 +++-- clang/lib/CodeGen/ItaniumCXXABI.cpp | 4 ++-- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 8f99ff0d50ff..d84956c2653e 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -390,7 +390,7 @@ Address CodeGenFunction::GetAddressOfBaseClass( llvm::PHINode *PHI = Builder.CreatePHI(BasePtrTy, 2, "cast.result"); PHI->addIncoming(Value.getPointer(), notNullBB); PHI->addIncoming(llvm::Constant::getNullValue(BasePtrTy), origBB); - Value = Address(PHI, Value.getAlignment()); + Value = Value.withPointer(PHI); } return Value; @@ -1983,7 +1983,7 @@ void CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor, CharUnits eltAlignment = arrayBase.getAlignment() .alignmentOfArrayElement(getContext().getTypeSizeInChars(type)); - Address curAddr = Address(cur, eltAlignment); + Address curAddr = Address(cur, elementType, eltAlignment); // Zero initialize the storage, if requested. if (zeroInitialize) diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 3b996b89a1d7..0968afd82064 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -614,8 +614,8 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, // every temporary created in a default argument is sequenced before // the construction of the next array element, if any CodeGenFunction::RunCleanupsScope CleanupsScope(CGF); - LValue elementLV = - CGF.MakeAddrLValue(Address(currentElement, elementAlign), elementType); + LValue elementLV = CGF.MakeAddrLValue( + Address(currentElement, llvmElementType, elementAlign), elementType); if (filler) EmitInitializationToLValue(filler, elementLV); else @@ -1801,6 +1801,7 @@ void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E, CharUnits elementSize = CGF.getContext().getTypeSizeInChars(elementType); CharUnits elementAlign = destPtr.getAlignment().alignmentOfArrayElement(elementSize); + llvm::Type *llvmElementType = CGF.ConvertTypeForMem(elementType); llvm::BasicBlock *entryBB = Builder.GetInsertBlock(); llvm::BasicBlock *bodyBB = CGF.createBasicBlock("arrayinit.body"); @@ -1810,8 +1811,8 @@ void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E, llvm::PHINode *index = Builder.CreatePHI(zero->getType(), 2, "arrayinit.index"); index->addIncoming(zero, entryBB); - llvm::Value *element = Builder.CreateInBoundsGEP( - begin->getType()->getPointerElementType(), begin, index); + llvm::Value *element = + Builder.CreateInBoundsGEP(llvmElementType, begin, index); // Prepare for a cleanup. QualType::DestructionKind dtorKind = elementType.isDestructedType(); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index f76ce8a6400d..ece863ad1077 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -182,6 +182,7 @@ template <> struct DominatingValue
{ struct saved_type { DominatingLLVMValue::saved_type SavedValue; + llvm::Type *ElementType; CharUnits Alignment; }; @@ -190,11 +191,11 @@ template <> struct DominatingValue
{ } static saved_type save(CodeGenFunction &CGF, type value) { return { DominatingLLVMValue::save(CGF, value.getPointer()), - value.getAlignment() }; + value.getElementType(), value.getAlignment() }; } static type restore(CodeGenFunction &CGF, saved_type value) { return Address(DominatingLLVMValue::restore(CGF, value.SavedValue), - value.Alignment); + value.ElementType, value.Alignment); } }; diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 1a15b09c7b2b..ce84430dd743 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -697,8 +697,8 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( CharUnits VTablePtrAlign = CGF.CGM.getDynamicOffsetAlignment(ThisAddr.getAlignment(), RD, CGF.getPointerAlign()); - llvm::Value *VTable = - CGF.GetVTablePtr(Address(This, VTablePtrAlign), VTableTy, RD); + llvm::Value *VTable = CGF.GetVTablePtr( + Address(This, ThisAddr.getElementType(), VTablePtrAlign), VTableTy, RD); // Apply the offset. // On ARM64, to reserve extra space in virtual member function pointers, From ca4d2c368d1a139c18942ac5581d1aab7e1d4b67 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Mon, 27 Dec 2021 19:05:22 +0800 Subject: [PATCH 071/992] Revert "[NFC] [Coroutines] Add a test for icmp use of coro.suspend to prevent musttail call converting" This reverts commit 21aa4d5d5ef947d824c50a22d15fb93d7df0b711. The test added is not proper. It would be passed all the time since it is in the ramp function. --- .../Coroutines/coro-split-musttail4.ll | 58 ------------------- 1 file changed, 58 deletions(-) delete mode 100644 llvm/test/Transforms/Coroutines/coro-split-musttail4.ll diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll deleted file mode 100644 index 4cba73c5e6ff..000000000000 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll +++ /dev/null @@ -1,58 +0,0 @@ -; Tests that coro-split will convert a call before coro.suspend to a musttail call -; while the user of the coro.suspend is a icmpinst. -; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s - -define void @fakeresume1(i8*) { -entry: - ret void; -} - -define void @f() #0 { -entry: - %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) - %alloc = call i8* @malloc(i64 16) #3 - %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc) - - %save = call token @llvm.coro.save(i8* null) - call fastcc void @fakeresume1(i8* null) - - %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) - %switch = icmp ult i8 %suspend, 2 - br i1 %switch, label %cleanup, label %coro.end - -cleanup: - %free.handle = call i8* @llvm.coro.free(token %id, i8* %vFrame) - %.not = icmp eq i8* %free.handle, null - br i1 %.not, label %coro.end, label %coro.free - -coro.free: - call void @delete(i8* nonnull %free.handle) #2 - br label %coro.end - -coro.end: - call i1 @llvm.coro.end(i8* null, i1 false) - ret void -} - -; FIXME: The fakerresume here should be musttail call. -; CHECK-LABEL: @f.resume( -; CHECK-NOT: musttail call fastcc void @fakeresume1( - - -declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1 -declare i1 @llvm.coro.alloc(token) #2 -declare i64 @llvm.coro.size.i64() #3 -declare i8* @llvm.coro.begin(token, i8* writeonly) #2 -declare token @llvm.coro.save(i8*) #2 -declare i8* @llvm.coro.frame() #3 -declare i8 @llvm.coro.suspend(token, i1) #2 -declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1 -declare i1 @llvm.coro.end(i8*, i1) #2 -declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #1 -declare i8* @malloc(i64) -declare void @delete(i8* nonnull) #2 - -attributes #0 = { "coroutine.presplit"="1" } -attributes #1 = { argmemonly nounwind readonly } -attributes #2 = { nounwind } -attributes #3 = { nounwind readnone } \ No newline at end of file From 508e39afe012e3197be1149812c8e5a47a955fe4 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Mon, 27 Dec 2021 12:13:41 +0100 Subject: [PATCH 072/992] GlobalISel: remove redundant line added in D114198. NFC --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e8a8efd5dad4..6867597a10f0 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3655,7 +3655,6 @@ static bool hasSameNumEltsOnAllVectorOperands( if (!Ty.isVector()) { if (!is_contained(NonVecOpIndices, OpIdx)) return false; - is_contained(NonVecOpIndices, OpIdx); continue; } From daf32b13d7009e4c53cad71132564f49bac61cb7 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 27 Dec 2021 12:31:02 +0100 Subject: [PATCH 073/992] [IndVars] Support opaque pointers in LFTR Remove the assertion about the pointer element type, only check that the stride is one. Ultimately, the actual pointer type here doesn't matter, because SCEVExpander would insert appropriate casts if necessary. --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 9 +--- .../IndVarSimplify/lftr-opaque-pointers.ll | 42 +++++++++++++++++++ 2 files changed, 43 insertions(+), 8 deletions(-) create mode 100644 llvm/test/Transforms/IndVarSimplify/lftr-opaque-pointers.ll diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 7001d330fce0..0027e7db055f 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -982,6 +982,7 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, assert(isLoopCounter(IndVar, L, SE)); const SCEVAddRecExpr *AR = cast(SE->getSCEV(IndVar)); const SCEV *IVInit = AR->getStart(); + assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter // finds a valid pointer IV. Sign extend ExitCount in order to materialize a @@ -1004,13 +1005,6 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, assert(SE->isLoopInvariant(IVOffset, L) && "Computed iteration count is not loop invariant!"); - // We could handle pointer IVs other than i8*, but we need to compensate for - // gep index scaling. - assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), - cast(IndVar->getType()) - ->getElementType())->isOne() && - "unit stride pointer IV must be i8*"); - const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); BranchInst *BI = cast(ExitingBB->getTerminator()); return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); @@ -1026,7 +1020,6 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, // IVInit integer and ExitCount pointer would only occur if a canonical IV // were generated on top of case #2, which is not expected. - assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); // For unit stride, IVCount = Start + ExitCount with 2's complement // overflow. diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-opaque-pointers.ll b/llvm/test/Transforms/IndVarSimplify/lftr-opaque-pointers.ll new file mode 100644 index 000000000000..94e0288e534b --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/lftr-opaque-pointers.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -indvars -opaque-pointers < %s | FileCheck %s + +target datalayout = "n8:16:32:64" + +@data = common global [240 x i8] zeroinitializer, align 16 + +; Based on the test from lftr.ll +define void @test_zext(ptr %a) { +; CHECK-LABEL: @test_zext( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[P_0:%.*]] = phi ptr [ getelementptr inbounds ([240 x i8], ptr @data, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[T3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[DOT0:%.*]] = phi ptr [ [[A:%.*]], [[ENTRY]] ], [ [[T:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[T]] = getelementptr inbounds i8, ptr [[DOT0]], i64 1 +; CHECK-NEXT: [[T2:%.*]] = load i8, ptr [[DOT0]], align 1 +; CHECK-NEXT: [[T3]] = getelementptr inbounds i8, ptr [[P_0]], i64 1 +; CHECK-NEXT: store i8 [[T2]], ptr [[P_0]], align 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne ptr [[P_0]], getelementptr (i8, ptr @data, i64 239) +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %i.0 = phi i8 [ 0, %entry ], [ %t4, %loop ] + %p.0 = phi ptr [ getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 0), %entry ], [ %t3, %loop ] + %.0 = phi ptr [ %a, %entry ], [ %t, %loop ] + %t = getelementptr inbounds i8, ptr %.0, i64 1 + %t2 = load i8, ptr %.0, align 1 + %t3 = getelementptr inbounds i8, ptr %p.0, i64 1 + store i8 %t2, ptr %p.0, align 1 + %t4 = add i8 %i.0, 1 + %t5 = icmp ult i8 %t4, -16 + br i1 %t5, label %loop, label %exit + +exit: + ret void +} From 948ae472a6109884c92179cff84b4f5ee7c2385c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 24 Dec 2021 22:46:20 +0000 Subject: [PATCH 074/992] [MCA][X86] Add AVX512 vector move instruction test coverage --- .../llvm-mca/X86/Generic/resources-avx512.s | 202 ++++++++- .../llvm-mca/X86/Generic/resources-avx512bw.s | 52 ++- .../X86/Generic/resources-avx512bwvl.s | 102 ++++- .../llvm-mca/X86/Generic/resources-avx512vl.s | 402 +++++++++++++++++- .../X86/IceLakeServer/resources-avx512.s | 202 ++++++++- .../X86/IceLakeServer/resources-avx512bw.s | 52 ++- .../X86/IceLakeServer/resources-avx512bwvl.s | 102 ++++- .../X86/IceLakeServer/resources-avx512vl.s | 402 +++++++++++++++++- .../X86/SkylakeServer/resources-avx512.s | 202 ++++++++- .../X86/SkylakeServer/resources-avx512bw.s | 52 ++- .../X86/SkylakeServer/resources-avx512bwvl.s | 102 ++++- .../X86/SkylakeServer/resources-avx512vl.s | 402 +++++++++++++++++- 12 files changed, 2262 insertions(+), 12 deletions(-) diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s index 0eeee89246ea..e500f8a91f6a 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s @@ -212,6 +212,24 @@ vminps %zmm16, %zmm17, %zmm19 {z}{k1} vminps (%rax), %zmm17, %zmm19 {z}{k1} vminps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} +vmovapd %zmm16, %zmm19 +vmovapd (%rax), %zmm19 +vmovapd %zmm16, (%rax) +vmovapd %zmm16, %zmm19 {k1} +vmovapd (%rax), %zmm19 {k1} +vmovapd %zmm16, (%rax) {k1} +vmovapd %zmm16, %zmm19 {z}{k1} +vmovapd (%rax), %zmm19 {z}{k1} + +vmovaps %zmm16, %zmm19 +vmovaps (%rax), %zmm19 +vmovaps %zmm16, (%rax) +vmovaps %zmm16, %zmm19 {k1} +vmovaps (%rax), %zmm19 {k1} +vmovaps %zmm16, (%rax) {k1} +vmovaps %zmm16, %zmm19 {z}{k1} +vmovaps (%rax), %zmm19 {z}{k1} + vmovddup %zmm16, %zmm19 vmovddup (%rax), %zmm19 vmovddup %zmm16, %zmm19 {k1} @@ -219,6 +237,42 @@ vmovddup (%rax), %zmm19 {k1} vmovddup %zmm16, %zmm19 {z}{k1} vmovddup (%rax), %zmm19 {z}{k1} +vmovdqa32 %zmm16, %zmm19 +vmovdqa32 (%rax), %zmm19 +vmovdqa32 %zmm16, (%rax) +vmovdqa32 %zmm16, %zmm19 {k1} +vmovdqa32 (%rax), %zmm19 {k1} +vmovdqa32 %zmm16, (%rax) {k1} +vmovdqa32 %zmm16, %zmm19 {z}{k1} +vmovdqa32 (%rax), %zmm19 {z}{k1} + +vmovdqa64 %zmm16, %zmm19 +vmovdqa64 (%rax), %zmm19 +vmovdqa64 %zmm16, (%rax) +vmovdqa64 %zmm16, %zmm19 {k1} +vmovdqa64 (%rax), %zmm19 {k1} +vmovdqa64 %zmm16, (%rax) {k1} +vmovdqa64 %zmm16, %zmm19 {z}{k1} +vmovdqa64 (%rax), %zmm19 {z}{k1} + +vmovdqu32 %zmm16, %zmm19 +vmovdqu32 (%rax), %zmm19 +vmovdqu32 %zmm16, (%rax) +vmovdqu32 %zmm16, %zmm19 {k1} +vmovdqu32 (%rax), %zmm19 {k1} +vmovdqu32 %zmm16, (%rax) {k1} +vmovdqu32 %zmm16, %zmm19 {z}{k1} +vmovdqu32 (%rax), %zmm19 {z}{k1} + +vmovdqu64 %zmm16, %zmm19 +vmovdqu64 (%rax), %zmm19 +vmovdqu64 %zmm16, (%rax) +vmovdqu64 %zmm16, %zmm19 {k1} +vmovdqu64 (%rax), %zmm19 {k1} +vmovdqu64 %zmm16, (%rax) {k1} +vmovdqu64 %zmm16, %zmm19 {z}{k1} +vmovdqu64 (%rax), %zmm19 {z}{k1} + vmovshdup %zmm16, %zmm19 vmovshdup (%rax), %zmm19 vmovshdup %zmm16, %zmm19 {k1} @@ -233,6 +287,24 @@ vmovsldup (%rax), %zmm19 {k1} vmovsldup %zmm16, %zmm19 {z}{k1} vmovsldup (%rax), %zmm19 {z}{k1} +vmovupd %zmm16, %zmm19 +vmovupd (%rax), %zmm19 +vmovupd %zmm16, (%rax) +vmovupd %zmm16, %zmm19 {k1} +vmovupd (%rax), %zmm19 {k1} +vmovupd %zmm16, (%rax) {k1} +vmovupd %zmm16, %zmm19 {z}{k1} +vmovupd (%rax), %zmm19 {z}{k1} + +vmovups %zmm16, %zmm19 +vmovups (%rax), %zmm19 +vmovups %zmm16, (%rax) +vmovups %zmm16, %zmm19 {k1} +vmovups (%rax), %zmm19 {k1} +vmovups %zmm16, (%rax) {k1} +vmovups %zmm16, %zmm19 {z}{k1} +vmovups (%rax), %zmm19 {z}{k1} + vmulpd %zmm16, %zmm17, %zmm19 vmulpd (%rax), %zmm17, %zmm19 vmulpd (%rax){1to8}, %zmm17, %zmm19 @@ -996,12 +1068,60 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 3 1.00 vminps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vminps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vmovapd %zmm16, %zmm19 +# CHECK-NEXT: 1 7 0.50 * vmovapd (%rax), %zmm19 +# CHECK-NEXT: 1 1 1.00 * vmovapd %zmm16, (%rax) +# CHECK-NEXT: 1 1 1.00 vmovapd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovapd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovapd %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vmovapd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovapd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vmovaps %zmm16, %zmm19 +# CHECK-NEXT: 1 7 0.50 * vmovaps (%rax), %zmm19 +# CHECK-NEXT: 1 1 1.00 * vmovaps %zmm16, (%rax) +# CHECK-NEXT: 1 1 1.00 vmovaps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovaps (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovaps %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vmovaps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovaps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vmovddup %zmm16, %zmm19 # CHECK-NEXT: 2 8 1.00 * vmovddup (%rax), %zmm19 # CHECK-NEXT: 1 1 1.00 vmovddup %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 1.00 * vmovddup (%rax), %zmm19 {%k1} # CHECK-NEXT: 1 1 1.00 vmovddup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 1.00 * vmovddup (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqa32 %zmm16, %zmm19 +# CHECK-NEXT: 1 7 0.50 * vmovdqa32 (%rax), %zmm19 +# CHECK-NEXT: 1 1 1.00 * vmovdqa32 %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.50 vmovdqa32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovdqa32 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovdqa32 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqa32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovdqa32 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqa64 %zmm16, %zmm19 +# CHECK-NEXT: 1 7 0.50 * vmovdqa64 (%rax), %zmm19 +# CHECK-NEXT: 1 1 1.00 * vmovdqa64 %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.50 vmovdqa64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovdqa64 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovdqa64 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqa64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovdqa64 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqu32 %zmm16, %zmm19 +# CHECK-NEXT: 1 7 0.50 * vmovdqu32 (%rax), %zmm19 +# CHECK-NEXT: 1 1 1.00 * vmovdqu32 %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.50 vmovdqu32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovdqu32 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovdqu32 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqu32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovdqu32 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqu64 %zmm16, %zmm19 +# CHECK-NEXT: 1 7 0.50 * vmovdqu64 (%rax), %zmm19 +# CHECK-NEXT: 1 1 1.00 * vmovdqu64 %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.50 vmovdqu64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovdqu64 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovdqu64 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqu64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovdqu64 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vmovshdup %zmm16, %zmm19 # CHECK-NEXT: 2 8 1.00 * vmovshdup (%rax), %zmm19 # CHECK-NEXT: 1 1 1.00 vmovshdup %zmm16, %zmm19 {%k1} @@ -1014,6 +1134,22 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 8 1.00 * vmovsldup (%rax), %zmm19 {%k1} # CHECK-NEXT: 1 1 1.00 vmovsldup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 1.00 * vmovsldup (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vmovupd %zmm16, %zmm19 +# CHECK-NEXT: 1 7 0.50 * vmovupd (%rax), %zmm19 +# CHECK-NEXT: 1 1 1.00 * vmovupd %zmm16, (%rax) +# CHECK-NEXT: 1 1 1.00 vmovupd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovupd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovupd %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vmovupd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovupd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vmovups %zmm16, %zmm19 +# CHECK-NEXT: 1 7 0.50 * vmovups (%rax), %zmm19 +# CHECK-NEXT: 1 1 1.00 * vmovups %zmm16, (%rax) +# CHECK-NEXT: 1 1 1.00 vmovups %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovups (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovups %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vmovups %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovups (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 5 1.00 vmulpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 12 1.00 * vmulpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 2 12 1.00 * vmulpd (%rax){1to8}, %zmm17, %zmm19 @@ -1531,7 +1667,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 1506.00 138.67 201.67 - 438.67 225.50 225.50 +# CHECK-NEXT: - 1506.00 144.67 201.67 16.00 456.67 245.50 245.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -1719,12 +1855,60 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - 1.00 - - - - vminps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vminps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vmovapd %zmm16, %zmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovapd (%rax), %zmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovapd %zmm16, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vmovapd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovapd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovapd %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vmovapd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovapd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vmovaps %zmm16, %zmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovaps (%rax), %zmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovaps %zmm16, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vmovaps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovaps (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovaps %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vmovaps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovaps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - vmovddup %zmm16, %zmm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vmovddup (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - vmovddup %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vmovddup (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - vmovddup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vmovddup (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqa32 %zmm16, %zmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa32 (%rax), %zmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqa32 %zmm16, (%rax) +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqa32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa32 (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqa32 %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqa32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa32 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqa64 %zmm16, %zmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa64 (%rax), %zmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqa64 %zmm16, (%rax) +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqa64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa64 (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqa64 %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqa64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa64 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu32 %zmm16, %zmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu32 (%rax), %zmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu32 %zmm16, (%rax) +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu32 (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu32 %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu32 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu64 %zmm16, %zmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu64 (%rax), %zmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu64 %zmm16, (%rax) +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu64 (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu64 %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu64 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - vmovshdup %zmm16, %zmm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vmovshdup (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - vmovshdup %zmm16, %zmm19 {%k1} @@ -1737,6 +1921,22 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vmovsldup (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - vmovsldup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vmovsldup (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vmovupd %zmm16, %zmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovupd (%rax), %zmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovupd %zmm16, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vmovupd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovupd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovupd %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vmovupd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovupd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vmovups %zmm16, %zmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovups (%rax), %zmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovups %zmm16, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vmovups %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovups (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovups %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vmovups %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovups (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - 1.00 - - - - - vmulpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vmulpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vmulpd (%rax){1to8}, %zmm17, %zmm19 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bw.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bw.s index a1ea3bb5e5e6..da27526f87e3 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bw.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bw.s @@ -20,6 +20,24 @@ kshiftrq $2, %k1, %k2 kunpckdq %k0, %k1, %k2 kunpckwd %k0, %k1, %k2 +vmovdqu8 %zmm16, %zmm19 +vmovdqu8 (%rax), %zmm19 +vmovdqu8 %zmm16, (%rax) +vmovdqu8 %zmm16, %zmm19 {k1} +vmovdqu8 (%rax), %zmm19 {k1} +vmovdqu8 %zmm16, (%rax) {k1} +vmovdqu8 %zmm16, %zmm19 {z}{k1} +vmovdqu8 (%rax), %zmm19 {z}{k1} + +vmovdqu16 %zmm16, %zmm19 +vmovdqu16 (%rax), %zmm19 +vmovdqu16 %zmm16, (%rax) +vmovdqu16 %zmm16, %zmm19 {k1} +vmovdqu16 (%rax), %zmm19 {k1} +vmovdqu16 %zmm16, (%rax) {k1} +vmovdqu16 %zmm16, %zmm19 {z}{k1} +vmovdqu16 (%rax), %zmm19 {z}{k1} + vpabsb %zmm16, %zmm19 vpabsb (%rax), %zmm19 vpabsb %zmm16, %zmm19 {k1} @@ -280,6 +298,22 @@ vpmovw2m %zmm0, %k0 # CHECK-NEXT: 1 1 1.00 kshiftrq $2, %k1, %k2 # CHECK-NEXT: 1 1 1.00 kunpckdq %k0, %k1, %k2 # CHECK-NEXT: 1 1 1.00 kunpckwd %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.50 vmovdqu8 %zmm16, %zmm19 +# CHECK-NEXT: 1 7 0.50 * vmovdqu8 (%rax), %zmm19 +# CHECK-NEXT: 1 1 1.00 * vmovdqu8 %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.50 vmovdqu8 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovdqu8 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovdqu8 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqu8 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovdqu8 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqu16 %zmm16, %zmm19 +# CHECK-NEXT: 1 7 0.50 * vmovdqu16 (%rax), %zmm19 +# CHECK-NEXT: 1 1 1.00 * vmovdqu16 %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.50 vmovdqu16 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovdqu16 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovdqu16 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqu16 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovdqu16 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 1 0.50 vpabsb %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vpabsb (%rax), %zmm19 # CHECK-NEXT: 1 1 0.50 vpabsb %zmm16, %zmm19 {%k1} @@ -487,7 +521,7 @@ vpmovw2m %zmm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 10.67 47.67 - 153.67 47.50 47.50 +# CHECK-NEXT: - - 13.67 47.67 4.00 156.67 52.50 52.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -509,6 +543,22 @@ vpmovw2m %zmm0, %k0 # CHECK-NEXT: - - - - - 1.00 - - kshiftrq $2, %k1, %k2 # CHECK-NEXT: - - - - - 1.00 - - kunpckdq %k0, %k1, %k2 # CHECK-NEXT: - - - - - 1.00 - - kunpckwd %k0, %k1, %k2 +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu8 %zmm16, %zmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu8 (%rax), %zmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu8 %zmm16, (%rax) +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu8 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu8 (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu8 %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu8 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu8 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu16 %zmm16, %zmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu16 (%rax), %zmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu16 %zmm16, (%rax) +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu16 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu16 (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu16 %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu16 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu16 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - 0.50 - 0.50 - - vpabsb %zmm16, %zmm19 # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpabsb (%rax), %zmm19 # CHECK-NEXT: - - - 0.50 - 0.50 - - vpabsb %zmm16, %zmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bwvl.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bwvl.s index d0eeeabcdb25..ad32d0707077 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bwvl.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bwvl.s @@ -1,6 +1,42 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s +vmovdqu8 %xmm16, %xmm19 +vmovdqu8 (%rax), %xmm19 +vmovdqu8 %xmm16, (%rax) +vmovdqu8 %xmm16, %xmm19 {k1} +vmovdqu8 (%rax), %xmm19 {k1} +vmovdqu8 %xmm16, (%rax) {k1} +vmovdqu8 %xmm16, %xmm19 {z}{k1} +vmovdqu8 (%rax), %xmm19 {z}{k1} + +vmovdqu8 %ymm16, %ymm19 +vmovdqu8 (%rax), %ymm19 +vmovdqu8 %ymm16, (%rax) +vmovdqu8 %ymm16, %ymm19 {k1} +vmovdqu8 (%rax), %ymm19 {k1} +vmovdqu8 %ymm16, (%rax) {k1} +vmovdqu8 %ymm16, %ymm19 {z}{k1} +vmovdqu8 (%rax), %ymm19 {z}{k1} + +vmovdqu16 %xmm16, %xmm19 +vmovdqu16 (%rax), %xmm19 +vmovdqu16 %xmm16, (%rax) +vmovdqu16 %xmm16, %xmm19 {k1} +vmovdqu16 (%rax), %xmm19 {k1} +vmovdqu16 %xmm16, (%rax) {k1} +vmovdqu16 %xmm16, %xmm19 {z}{k1} +vmovdqu16 (%rax), %xmm19 {z}{k1} + +vmovdqu16 %ymm16, %ymm19 +vmovdqu16 (%rax), %ymm19 +vmovdqu16 %ymm16, (%rax) +vmovdqu16 %ymm16, %ymm19 {k1} +vmovdqu16 (%rax), %ymm19 {k1} +vmovdqu16 %ymm16, (%rax) {k1} +vmovdqu16 %ymm16, %ymm19 {z}{k1} +vmovdqu16 (%rax), %ymm19 {z}{k1} + vpabsb %xmm16, %xmm19 vpabsb (%rax), %xmm19 vpabsb %xmm16, %xmm19 {k1} @@ -476,6 +512,38 @@ vpmovw2m %ymm0, %k0 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %xmm16, %xmm19 +# CHECK-NEXT: 1 6 0.50 * vmovdqu8 (%rax), %xmm19 +# CHECK-NEXT: 1 1 1.00 * vmovdqu8 %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 1 6 0.50 * vmovdqu8 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovdqu8 %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 6 0.50 * vmovdqu8 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqu8 %ymm16, %ymm19 +# CHECK-NEXT: 1 7 0.50 * vmovdqu8 (%rax), %ymm19 +# CHECK-NEXT: 1 1 1.00 * vmovdqu8 %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.50 vmovdqu8 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovdqu8 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovdqu8 %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqu8 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovdqu8 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %xmm16, %xmm19 +# CHECK-NEXT: 1 6 0.50 * vmovdqu16 (%rax), %xmm19 +# CHECK-NEXT: 1 1 1.00 * vmovdqu16 %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 1 6 0.50 * vmovdqu16 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovdqu16 %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 6 0.50 * vmovdqu16 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqu16 %ymm16, %ymm19 +# CHECK-NEXT: 1 7 0.50 * vmovdqu16 (%rax), %ymm19 +# CHECK-NEXT: 1 1 1.00 * vmovdqu16 %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.50 vmovdqu16 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovdqu16 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovdqu16 %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqu16 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovdqu16 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 1 1 0.50 vpabsb %xmm16, %xmm19 # CHECK-NEXT: 2 7 0.50 * vpabsb (%rax), %xmm19 # CHECK-NEXT: 1 1 0.50 vpabsb %xmm16, %xmm19 {%k1} @@ -877,10 +945,42 @@ vpmovw2m %ymm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 13.33 131.33 - 243.33 95.00 95.00 +# CHECK-NEXT: - - 18.33 133.33 8.00 248.33 105.00 105.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqu8 %xmm16, %xmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu8 (%rax), %xmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu8 %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqu8 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu8 (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu8 %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqu8 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu8 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu8 %ymm16, %ymm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu8 (%rax), %ymm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu8 %ymm16, (%rax) +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu8 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu8 (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu8 %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu8 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu8 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqu16 %xmm16, %xmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu16 (%rax), %xmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu16 %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqu16 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu16 (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu16 %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqu16 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu16 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu16 %ymm16, %ymm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu16 (%rax), %ymm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu16 %ymm16, (%rax) +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu16 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu16 (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu16 %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu16 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu16 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - 0.50 - 0.50 - - vpabsb %xmm16, %xmm19 # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpabsb (%rax), %xmm19 # CHECK-NEXT: - - - 0.50 - 0.50 - - vpabsb %xmm16, %xmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s index b78ce71df344..20dc9e2fca61 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s @@ -308,6 +308,42 @@ vminps %ymm16, %ymm17, %ymm19 {z}{k1} vminps (%rax), %ymm17, %ymm19 {z}{k1} vminps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} +vmovapd %xmm16, %xmm19 +vmovapd (%rax), %xmm19 +vmovapd %xmm16, (%rax) +vmovapd %xmm16, %xmm19 {k1} +vmovapd (%rax), %xmm19 {k1} +vmovapd %xmm16, (%rax) {k1} +vmovapd %xmm16, %xmm19 {z}{k1} +vmovapd (%rax), %xmm19 {z}{k1} + +vmovapd %ymm16, %ymm19 +vmovapd (%rax), %ymm19 +vmovapd %ymm16, (%rax) +vmovapd %ymm16, %ymm19 {k1} +vmovapd (%rax), %ymm19 {k1} +vmovapd %ymm16, (%rax) {k1} +vmovapd %ymm16, %ymm19 {z}{k1} +vmovapd (%rax), %ymm19 {z}{k1} + +vmovaps %xmm16, %xmm19 +vmovaps (%rax), %xmm19 +vmovaps %xmm16, (%rax) +vmovaps %xmm16, %xmm19 {k1} +vmovaps (%rax), %xmm19 {k1} +vmovaps %xmm16, (%rax) {k1} +vmovaps %xmm16, %xmm19 {z}{k1} +vmovaps (%rax), %xmm19 {z}{k1} + +vmovaps %ymm16, %ymm19 +vmovaps (%rax), %ymm19 +vmovaps %ymm16, (%rax) +vmovaps %ymm16, %ymm19 {k1} +vmovaps (%rax), %ymm19 {k1} +vmovaps %ymm16, (%rax) {k1} +vmovaps %ymm16, %ymm19 {z}{k1} +vmovaps (%rax), %ymm19 {z}{k1} + vmovddup %xmm16, %xmm19 vmovddup (%rax), %xmm19 vmovddup %xmm16, %xmm19 {k1} @@ -315,6 +351,78 @@ vmovddup (%rax), %xmm19 {k1} vmovddup %xmm16, %xmm19 {z}{k1} vmovddup (%rax), %xmm19 {z}{k1} +vmovdqa32 %xmm16, %xmm19 +vmovdqa32 (%rax), %xmm19 +vmovdqa32 %xmm16, (%rax) +vmovdqa32 %xmm16, %xmm19 {k1} +vmovdqa32 (%rax), %xmm19 {k1} +vmovdqa32 %xmm16, (%rax) {k1} +vmovdqa32 %xmm16, %xmm19 {z}{k1} +vmovdqa32 (%rax), %xmm19 {z}{k1} + +vmovdqa32 %ymm16, %ymm19 +vmovdqa32 (%rax), %ymm19 +vmovdqa32 %ymm16, (%rax) +vmovdqa32 %ymm16, %ymm19 {k1} +vmovdqa32 (%rax), %ymm19 {k1} +vmovdqa32 %ymm16, (%rax) {k1} +vmovdqa32 %ymm16, %ymm19 {z}{k1} +vmovdqa32 (%rax), %ymm19 {z}{k1} + +vmovdqa64 %xmm16, %xmm19 +vmovdqa64 (%rax), %xmm19 +vmovdqa64 %xmm16, (%rax) +vmovdqa64 %xmm16, %xmm19 {k1} +vmovdqa64 (%rax), %xmm19 {k1} +vmovdqa64 %xmm16, (%rax) {k1} +vmovdqa64 %xmm16, %xmm19 {z}{k1} +vmovdqa64 (%rax), %xmm19 {z}{k1} + +vmovdqa64 %ymm16, %ymm19 +vmovdqa64 (%rax), %ymm19 +vmovdqa64 %ymm16, (%rax) +vmovdqa64 %ymm16, %ymm19 {k1} +vmovdqa64 (%rax), %ymm19 {k1} +vmovdqa64 %ymm16, (%rax) {k1} +vmovdqa64 %ymm16, %ymm19 {z}{k1} +vmovdqa64 (%rax), %ymm19 {z}{k1} + +vmovdqu32 %xmm16, %xmm19 +vmovdqu32 (%rax), %xmm19 +vmovdqu32 %xmm16, (%rax) +vmovdqu32 %xmm16, %xmm19 {k1} +vmovdqu32 (%rax), %xmm19 {k1} +vmovdqu32 %xmm16, (%rax) {k1} +vmovdqu32 %xmm16, %xmm19 {z}{k1} +vmovdqu32 (%rax), %xmm19 {z}{k1} + +vmovdqu32 %ymm16, %ymm19 +vmovdqu32 (%rax), %ymm19 +vmovdqu32 %ymm16, (%rax) +vmovdqu32 %ymm16, %ymm19 {k1} +vmovdqu32 (%rax), %ymm19 {k1} +vmovdqu32 %ymm16, (%rax) {k1} +vmovdqu32 %ymm16, %ymm19 {z}{k1} +vmovdqu32 (%rax), %ymm19 {z}{k1} + +vmovdqu64 %xmm16, %xmm19 +vmovdqu64 (%rax), %xmm19 +vmovdqu64 %xmm16, (%rax) +vmovdqu64 %xmm16, %xmm19 {k1} +vmovdqu64 (%rax), %xmm19 {k1} +vmovdqu64 %xmm16, (%rax) {k1} +vmovdqu64 %xmm16, %xmm19 {z}{k1} +vmovdqu64 (%rax), %xmm19 {z}{k1} + +vmovdqu64 %ymm16, %ymm19 +vmovdqu64 (%rax), %ymm19 +vmovdqu64 %ymm16, (%rax) +vmovdqu64 %ymm16, %ymm19 {k1} +vmovdqu64 (%rax), %ymm19 {k1} +vmovdqu64 %ymm16, (%rax) {k1} +vmovdqu64 %ymm16, %ymm19 {z}{k1} +vmovdqu64 (%rax), %ymm19 {z}{k1} + vmovddup %ymm16, %ymm19 vmovddup (%rax), %ymm19 vmovddup %ymm16, %ymm19 {k1} @@ -350,6 +458,42 @@ vmovsldup (%rax), %ymm19 {k1} vmovsldup %ymm16, %ymm19 {z}{k1} vmovsldup (%rax), %ymm19 {z}{k1} +vmovupd %xmm16, %xmm19 +vmovupd (%rax), %xmm19 +vmovupd %xmm16, (%rax) +vmovupd %xmm16, %xmm19 {k1} +vmovupd (%rax), %xmm19 {k1} +vmovupd %xmm16, (%rax) {k1} +vmovupd %xmm16, %xmm19 {z}{k1} +vmovupd (%rax), %xmm19 {z}{k1} + +vmovupd %ymm16, %ymm19 +vmovupd (%rax), %ymm19 +vmovupd %ymm16, (%rax) +vmovupd %ymm16, %ymm19 {k1} +vmovupd (%rax), %ymm19 {k1} +vmovupd %ymm16, (%rax) {k1} +vmovupd %ymm16, %ymm19 {z}{k1} +vmovupd (%rax), %ymm19 {z}{k1} + +vmovups %xmm16, %xmm19 +vmovups (%rax), %xmm19 +vmovups %xmm16, (%rax) +vmovups %xmm16, %xmm19 {k1} +vmovups (%rax), %xmm19 {k1} +vmovups %xmm16, (%rax) {k1} +vmovups %xmm16, %xmm19 {z}{k1} +vmovups (%rax), %xmm19 {z}{k1} + +vmovups %ymm16, %ymm19 +vmovups (%rax), %ymm19 +vmovups %ymm16, (%rax) +vmovups %ymm16, %ymm19 {k1} +vmovups (%rax), %ymm19 {k1} +vmovups %ymm16, (%rax) {k1} +vmovups %ymm16, %ymm19 {z}{k1} +vmovups (%rax), %ymm19 {z}{k1} + vmulpd %xmm16, %xmm17, %xmm19 vmulpd (%rax), %xmm17, %xmm19 vmulpd (%rax){1to2}, %xmm17, %xmm19 @@ -1517,12 +1661,108 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 3 1.00 vminps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vminps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vminps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vmovapd %xmm16, %xmm19 +# CHECK-NEXT: 1 6 0.50 * vmovapd (%rax), %xmm19 +# CHECK-NEXT: 1 1 1.00 * vmovapd %xmm16, (%rax) +# CHECK-NEXT: 1 1 1.00 vmovapd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 1 6 0.50 * vmovapd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovapd %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vmovapd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 6 0.50 * vmovapd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vmovapd %ymm16, %ymm19 +# CHECK-NEXT: 1 7 0.50 * vmovapd (%rax), %ymm19 +# CHECK-NEXT: 1 1 1.00 * vmovapd %ymm16, (%rax) +# CHECK-NEXT: 1 1 1.00 vmovapd %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovapd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovapd %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vmovapd %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovapd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vmovaps %xmm16, %xmm19 +# CHECK-NEXT: 1 6 0.50 * vmovaps (%rax), %xmm19 +# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm16, (%rax) +# CHECK-NEXT: 1 1 1.00 vmovaps %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 1 6 0.50 * vmovaps (%rax), %xmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vmovaps %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 6 0.50 * vmovaps (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vmovaps %ymm16, %ymm19 +# CHECK-NEXT: 1 7 0.50 * vmovaps (%rax), %ymm19 +# CHECK-NEXT: 1 1 1.00 * vmovaps %ymm16, (%rax) +# CHECK-NEXT: 1 1 1.00 vmovaps %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovaps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovaps %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vmovaps %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovaps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vmovddup %xmm16, %xmm19 # CHECK-NEXT: 2 7 1.00 * vmovddup (%rax), %xmm19 # CHECK-NEXT: 1 1 1.00 vmovddup %xmm16, %xmm19 {%k1} # CHECK-NEXT: 2 7 1.00 * vmovddup (%rax), %xmm19 {%k1} # CHECK-NEXT: 1 1 1.00 vmovddup %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: 2 7 1.00 * vmovddup (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %xmm16, %xmm19 +# CHECK-NEXT: 1 6 0.50 * vmovdqa32 (%rax), %xmm19 +# CHECK-NEXT: 1 1 1.00 * vmovdqa32 %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 1 6 0.50 * vmovdqa32 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovdqa32 %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 6 0.50 * vmovdqa32 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqa32 %ymm16, %ymm19 +# CHECK-NEXT: 1 7 0.50 * vmovdqa32 (%rax), %ymm19 +# CHECK-NEXT: 1 1 1.00 * vmovdqa32 %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.50 vmovdqa32 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovdqa32 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovdqa32 %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqa32 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovdqa32 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %xmm16, %xmm19 +# CHECK-NEXT: 1 6 0.50 * vmovdqa64 (%rax), %xmm19 +# CHECK-NEXT: 1 1 1.00 * vmovdqa64 %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 1 6 0.50 * vmovdqa64 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovdqa64 %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 6 0.50 * vmovdqa64 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqa64 %ymm16, %ymm19 +# CHECK-NEXT: 1 7 0.50 * vmovdqa64 (%rax), %ymm19 +# CHECK-NEXT: 1 1 1.00 * vmovdqa64 %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.50 vmovdqa64 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovdqa64 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovdqa64 %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqa64 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovdqa64 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %xmm16, %xmm19 +# CHECK-NEXT: 1 6 0.50 * vmovdqu32 (%rax), %xmm19 +# CHECK-NEXT: 1 1 1.00 * vmovdqu32 %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 1 6 0.50 * vmovdqu32 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovdqu32 %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 6 0.50 * vmovdqu32 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqu32 %ymm16, %ymm19 +# CHECK-NEXT: 1 7 0.50 * vmovdqu32 (%rax), %ymm19 +# CHECK-NEXT: 1 1 1.00 * vmovdqu32 %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.50 vmovdqu32 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovdqu32 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovdqu32 %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqu32 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovdqu32 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %xmm16, %xmm19 +# CHECK-NEXT: 1 6 0.50 * vmovdqu64 (%rax), %xmm19 +# CHECK-NEXT: 1 1 1.00 * vmovdqu64 %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 1 6 0.50 * vmovdqu64 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovdqu64 %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 6 0.50 * vmovdqu64 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqu64 %ymm16, %ymm19 +# CHECK-NEXT: 1 7 0.50 * vmovdqu64 (%rax), %ymm19 +# CHECK-NEXT: 1 1 1.00 * vmovdqu64 %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.50 vmovdqu64 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovdqu64 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovdqu64 %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqu64 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovdqu64 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vmovddup %ymm16, %ymm19 # CHECK-NEXT: 2 8 1.00 * vmovddup (%rax), %ymm19 # CHECK-NEXT: 1 1 1.00 vmovddup %ymm16, %ymm19 {%k1} @@ -1553,6 +1793,38 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 2 8 1.00 * vmovsldup (%rax), %ymm19 {%k1} # CHECK-NEXT: 1 1 1.00 vmovsldup %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 1.00 * vmovsldup (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vmovupd %xmm16, %xmm19 +# CHECK-NEXT: 1 6 0.50 * vmovupd (%rax), %xmm19 +# CHECK-NEXT: 1 1 1.00 * vmovupd %xmm16, (%rax) +# CHECK-NEXT: 1 1 1.00 vmovupd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 1 6 0.50 * vmovupd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovupd %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vmovupd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 6 0.50 * vmovupd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vmovupd %ymm16, %ymm19 +# CHECK-NEXT: 1 7 0.50 * vmovupd (%rax), %ymm19 +# CHECK-NEXT: 1 1 1.00 * vmovupd %ymm16, (%rax) +# CHECK-NEXT: 1 1 1.00 vmovupd %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovupd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovupd %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vmovupd %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovupd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vmovups %xmm16, %xmm19 +# CHECK-NEXT: 1 6 0.50 * vmovups (%rax), %xmm19 +# CHECK-NEXT: 1 1 1.00 * vmovups %xmm16, (%rax) +# CHECK-NEXT: 1 1 1.00 vmovups %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 1 6 0.50 * vmovups (%rax), %xmm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovups %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vmovups %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 6 0.50 * vmovups (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 1.00 vmovups %ymm16, %ymm19 +# CHECK-NEXT: 1 7 0.50 * vmovups (%rax), %ymm19 +# CHECK-NEXT: 1 1 1.00 * vmovups %ymm16, (%rax) +# CHECK-NEXT: 1 1 1.00 vmovups %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 1 7 0.50 * vmovups (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1 1 1.00 * vmovups %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 1.00 vmovups %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 7 0.50 * vmovups (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 1 5 1.00 vmulpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 2 11 1.00 * vmulpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: 2 11 1.00 * vmulpd (%rax){1to2}, %xmm17, %xmm19 @@ -2348,7 +2620,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 1935.00 196.00 359.50 - 608.50 350.50 350.50 +# CHECK-NEXT: - 1935.00 206.00 363.50 32.00 642.50 390.50 390.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -2624,12 +2896,108 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - 1.00 - - - - vminps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vminps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vminps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vmovapd %xmm16, %xmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovapd (%rax), %xmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovapd %xmm16, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vmovapd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovapd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovapd %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vmovapd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovapd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vmovapd %ymm16, %ymm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovapd (%rax), %ymm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovapd %ymm16, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vmovapd %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovapd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovapd %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vmovapd %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovapd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vmovaps %xmm16, %xmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovaps (%rax), %xmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovaps %xmm16, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vmovaps %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovaps (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovaps %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vmovaps %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovaps (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vmovaps %ymm16, %ymm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovaps (%rax), %ymm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovaps %ymm16, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vmovaps %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovaps (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovaps %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vmovaps %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovaps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - vmovddup %xmm16, %xmm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vmovddup (%rax), %xmm19 # CHECK-NEXT: - - - - - 1.00 - - vmovddup %xmm16, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vmovddup (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - vmovddup %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vmovddup (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqa32 %xmm16, %xmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa32 (%rax), %xmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqa32 %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqa32 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa32 (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqa32 %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqa32 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa32 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqa32 %ymm16, %ymm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa32 (%rax), %ymm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqa32 %ymm16, (%rax) +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqa32 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa32 (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqa32 %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqa32 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa32 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqa64 %xmm16, %xmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa64 (%rax), %xmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqa64 %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqa64 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa64 (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqa64 %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqa64 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa64 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqa64 %ymm16, %ymm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa64 (%rax), %ymm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqa64 %ymm16, (%rax) +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqa64 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa64 (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqa64 %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqa64 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa64 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqu32 %xmm16, %xmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu32 (%rax), %xmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu32 %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqu32 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu32 (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu32 %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqu32 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu32 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu32 %ymm16, %ymm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu32 (%rax), %ymm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu32 %ymm16, (%rax) +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu32 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu32 (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu32 %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu32 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu32 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqu64 %xmm16, %xmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu64 (%rax), %xmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu64 %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqu64 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu64 (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu64 %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqu64 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu64 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu64 %ymm16, %ymm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu64 (%rax), %ymm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu64 %ymm16, (%rax) +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu64 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu64 (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu64 %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu64 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu64 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - vmovddup %ymm16, %ymm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vmovddup (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - vmovddup %ymm16, %ymm19 {%k1} @@ -2660,6 +3028,38 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vmovsldup (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - vmovsldup %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vmovsldup (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vmovupd %xmm16, %xmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovupd (%rax), %xmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovupd %xmm16, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vmovupd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovupd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovupd %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vmovupd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovupd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vmovupd %ymm16, %ymm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovupd (%rax), %ymm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovupd %ymm16, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vmovupd %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovupd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovupd %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vmovupd %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovupd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vmovups %xmm16, %xmm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovups (%rax), %xmm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovups %xmm16, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vmovups %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovups (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovups %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vmovups %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovups (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - 1.00 - - vmovups %ymm16, %ymm19 +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovups (%rax), %ymm19 +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovups %ymm16, (%rax) +# CHECK-NEXT: - - - - - 1.00 - - vmovups %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovups (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovups %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - - - - 1.00 - - vmovups %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vmovups (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - 1.00 - - - - - vmulpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vmulpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vmulpd (%rax){1to2}, %xmm17, %xmm19 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s index bbdfba5f3b5f..ebf3dd4eac97 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s @@ -212,6 +212,24 @@ vminps %zmm16, %zmm17, %zmm19 {z}{k1} vminps (%rax), %zmm17, %zmm19 {z}{k1} vminps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} +vmovapd %zmm16, %zmm19 +vmovapd (%rax), %zmm19 +vmovapd %zmm16, (%rax) +vmovapd %zmm16, %zmm19 {k1} +vmovapd (%rax), %zmm19 {k1} +vmovapd %zmm16, (%rax) {k1} +vmovapd %zmm16, %zmm19 {z}{k1} +vmovapd (%rax), %zmm19 {z}{k1} + +vmovaps %zmm16, %zmm19 +vmovaps (%rax), %zmm19 +vmovaps %zmm16, (%rax) +vmovaps %zmm16, %zmm19 {k1} +vmovaps (%rax), %zmm19 {k1} +vmovaps %zmm16, (%rax) {k1} +vmovaps %zmm16, %zmm19 {z}{k1} +vmovaps (%rax), %zmm19 {z}{k1} + vmovddup %zmm16, %zmm19 vmovddup (%rax), %zmm19 vmovddup %zmm16, %zmm19 {k1} @@ -219,6 +237,42 @@ vmovddup (%rax), %zmm19 {k1} vmovddup %zmm16, %zmm19 {z}{k1} vmovddup (%rax), %zmm19 {z}{k1} +vmovdqa32 %zmm16, %zmm19 +vmovdqa32 (%rax), %zmm19 +vmovdqa32 %zmm16, (%rax) +vmovdqa32 %zmm16, %zmm19 {k1} +vmovdqa32 (%rax), %zmm19 {k1} +vmovdqa32 %zmm16, (%rax) {k1} +vmovdqa32 %zmm16, %zmm19 {z}{k1} +vmovdqa32 (%rax), %zmm19 {z}{k1} + +vmovdqa64 %zmm16, %zmm19 +vmovdqa64 (%rax), %zmm19 +vmovdqa64 %zmm16, (%rax) +vmovdqa64 %zmm16, %zmm19 {k1} +vmovdqa64 (%rax), %zmm19 {k1} +vmovdqa64 %zmm16, (%rax) {k1} +vmovdqa64 %zmm16, %zmm19 {z}{k1} +vmovdqa64 (%rax), %zmm19 {z}{k1} + +vmovdqu32 %zmm16, %zmm19 +vmovdqu32 (%rax), %zmm19 +vmovdqu32 %zmm16, (%rax) +vmovdqu32 %zmm16, %zmm19 {k1} +vmovdqu32 (%rax), %zmm19 {k1} +vmovdqu32 %zmm16, (%rax) {k1} +vmovdqu32 %zmm16, %zmm19 {z}{k1} +vmovdqu32 (%rax), %zmm19 {z}{k1} + +vmovdqu64 %zmm16, %zmm19 +vmovdqu64 (%rax), %zmm19 +vmovdqu64 %zmm16, (%rax) +vmovdqu64 %zmm16, %zmm19 {k1} +vmovdqu64 (%rax), %zmm19 {k1} +vmovdqu64 %zmm16, (%rax) {k1} +vmovdqu64 %zmm16, %zmm19 {z}{k1} +vmovdqu64 (%rax), %zmm19 {z}{k1} + vmovshdup %zmm16, %zmm19 vmovshdup (%rax), %zmm19 vmovshdup %zmm16, %zmm19 {k1} @@ -233,6 +287,24 @@ vmovsldup (%rax), %zmm19 {k1} vmovsldup %zmm16, %zmm19 {z}{k1} vmovsldup (%rax), %zmm19 {z}{k1} +vmovupd %zmm16, %zmm19 +vmovupd (%rax), %zmm19 +vmovupd %zmm16, (%rax) +vmovupd %zmm16, %zmm19 {k1} +vmovupd (%rax), %zmm19 {k1} +vmovupd %zmm16, (%rax) {k1} +vmovupd %zmm16, %zmm19 {z}{k1} +vmovupd (%rax), %zmm19 {z}{k1} + +vmovups %zmm16, %zmm19 +vmovups (%rax), %zmm19 +vmovups %zmm16, (%rax) +vmovups %zmm16, %zmm19 {k1} +vmovups (%rax), %zmm19 {k1} +vmovups %zmm16, (%rax) {k1} +vmovups %zmm16, %zmm19 {z}{k1} +vmovups (%rax), %zmm19 {z}{k1} + vmulpd %zmm16, %zmm17, %zmm19 vmulpd (%rax), %zmm17, %zmm19 vmulpd (%rax){1to8}, %zmm17, %zmm19 @@ -996,12 +1068,60 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 4 0.50 vminps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vminps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovapd %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %zmm19 +# CHECK-NEXT: 2 1 1.00 * vmovapd %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovapd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovapd %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovapd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovaps %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %zmm19 +# CHECK-NEXT: 2 1 1.00 * vmovaps %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovaps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovaps %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovaps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vmovddup %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %zmm19 # CHECK-NEXT: 1 1 1.00 vmovddup %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %zmm19 {%k1} # CHECK-NEXT: 1 1 1.00 vmovddup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %zmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqa32 %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqa32 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %zmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqa64 %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqa64 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %zmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu32 %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu32 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %zmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu64 %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu64 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vmovshdup %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovshdup (%rax), %zmm19 # CHECK-NEXT: 1 1 1.00 vmovshdup %zmm16, %zmm19 {%k1} @@ -1014,6 +1134,22 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 8 0.50 * vmovsldup (%rax), %zmm19 {%k1} # CHECK-NEXT: 1 1 1.00 vmovsldup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovsldup (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovupd %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %zmm19 +# CHECK-NEXT: 2 1 1.00 * vmovupd %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovupd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovupd %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovupd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovups %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %zmm19 +# CHECK-NEXT: 2 1 1.00 * vmovups %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovups %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovups %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovups %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 4 0.50 vmulpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 11 0.50 * vmulpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 2 11 0.50 * vmulpd (%rax){1to8}, %zmm17, %zmm19 @@ -1535,7 +1671,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - 612.00 220.67 41.67 261.50 261.50 - 539.67 2.00 - - - +# CHECK-NEXT: - 612.00 236.67 57.67 278.83 278.83 16.00 555.67 2.00 5.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -1723,12 +1859,60 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vminps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vminps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovapd %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovapd (%rax), %zmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovapd %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovapd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovapd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovapd %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovapd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovapd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovaps %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovaps (%rax), %zmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovaps %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovaps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovaps (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovaps %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovaps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovaps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - - - vmovddup %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovddup (%rax), %zmm19 # CHECK-NEXT: - - - - - - - 1.00 - - - - vmovddup %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovddup (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - - - vmovddup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovddup (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa32 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa32 (%rax), %zmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa32 %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa32 (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa32 %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa32 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa64 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa64 (%rax), %zmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa64 %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa64 (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa64 %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa64 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu32 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu32 (%rax), %zmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu32 %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu32 (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu32 %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu32 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu64 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu64 (%rax), %zmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu64 %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu64 (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu64 %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu64 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - - - vmovshdup %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovshdup (%rax), %zmm19 # CHECK-NEXT: - - - - - - - 1.00 - - - - vmovshdup %zmm16, %zmm19 {%k1} @@ -1741,6 +1925,22 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovsldup (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - - - vmovsldup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovsldup (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovupd %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovupd (%rax), %zmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovupd %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovupd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovupd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovupd %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovupd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovupd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovups %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovups (%rax), %zmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovups %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovups %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovups (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovups %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovups %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovups (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmulpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vmulpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vmulpd (%rax){1to8}, %zmm17, %zmm19 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bw.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bw.s index 12ee08425c2d..d98738fc9352 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bw.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bw.s @@ -20,6 +20,24 @@ kshiftrq $2, %k1, %k2 kunpckdq %k0, %k1, %k2 kunpckwd %k0, %k1, %k2 +vmovdqu8 %zmm16, %zmm19 +vmovdqu8 (%rax), %zmm19 +vmovdqu8 %zmm16, (%rax) +vmovdqu8 %zmm16, %zmm19 {k1} +vmovdqu8 (%rax), %zmm19 {k1} +vmovdqu8 %zmm16, (%rax) {k1} +vmovdqu8 %zmm16, %zmm19 {z}{k1} +vmovdqu8 (%rax), %zmm19 {z}{k1} + +vmovdqu16 %zmm16, %zmm19 +vmovdqu16 (%rax), %zmm19 +vmovdqu16 %zmm16, (%rax) +vmovdqu16 %zmm16, %zmm19 {k1} +vmovdqu16 (%rax), %zmm19 {k1} +vmovdqu16 %zmm16, (%rax) {k1} +vmovdqu16 %zmm16, %zmm19 {z}{k1} +vmovdqu16 (%rax), %zmm19 {z}{k1} + vpabsb %zmm16, %zmm19 vpabsb (%rax), %zmm19 vpabsb %zmm16, %zmm19 {k1} @@ -280,6 +298,22 @@ vpmovw2m %zmm0, %k0 # CHECK-NEXT: 1 4 1.00 kshiftrq $2, %k1, %k2 # CHECK-NEXT: 1 4 1.00 kunpckdq %k0, %k1, %k2 # CHECK-NEXT: 1 4 1.00 kunpckwd %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %zmm19 +# CHECK-NEXT: 5 2 2.00 * vmovdqu8 %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 5 2 2.00 * vmovdqu8 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %zmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu16 %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu16 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vpabsb %zmm16, %zmm19 # CHECK-NEXT: 2 8 1.00 * vpabsb (%rax), %zmm19 # CHECK-NEXT: 1 1 1.00 vpabsb %zmm16, %zmm19 {%k1} @@ -491,7 +525,7 @@ vpmovw2m %zmm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - - 32.50 8.50 47.50 47.50 - 176.50 0.50 - - - +# CHECK-NEXT: - - 36.50 13.50 52.50 52.50 6.00 181.50 0.50 2.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -513,6 +547,22 @@ vpmovw2m %zmm0, %k0 # CHECK-NEXT: - - - - - - - 1.00 - - - - kshiftrq $2, %k1, %k2 # CHECK-NEXT: - - - - - - - 1.00 - - - - kunpckdq %k0, %k1, %k2 # CHECK-NEXT: - - - - - - - 1.00 - - - - kunpckwd %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu8 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu8 (%rax), %zmm19 +# CHECK-NEXT: - - - 0.50 0.67 0.67 2.00 0.50 - 0.67 - - vmovdqu8 %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu8 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu8 (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - 0.50 0.67 0.67 2.00 0.50 - 0.67 - - vmovdqu8 %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu8 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu8 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu16 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu16 (%rax), %zmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu16 %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu16 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu16 (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu16 %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu16 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu16 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - 1.00 - - - - - - - - - vpabsb %zmm16, %zmm19 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vpabsb (%rax), %zmm19 # CHECK-NEXT: - - 1.00 - - - - - - - - - vpabsb %zmm16, %zmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bwvl.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bwvl.s index d8bb3edc6ed7..0539e352c380 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bwvl.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bwvl.s @@ -1,6 +1,42 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=icelake-server -instruction-tables < %s | FileCheck %s +vmovdqu8 %xmm16, %xmm19 +vmovdqu8 (%rax), %xmm19 +vmovdqu8 %xmm16, (%rax) +vmovdqu8 %xmm16, %xmm19 {k1} +vmovdqu8 (%rax), %xmm19 {k1} +vmovdqu8 %xmm16, (%rax) {k1} +vmovdqu8 %xmm16, %xmm19 {z}{k1} +vmovdqu8 (%rax), %xmm19 {z}{k1} + +vmovdqu8 %ymm16, %ymm19 +vmovdqu8 (%rax), %ymm19 +vmovdqu8 %ymm16, (%rax) +vmovdqu8 %ymm16, %ymm19 {k1} +vmovdqu8 (%rax), %ymm19 {k1} +vmovdqu8 %ymm16, (%rax) {k1} +vmovdqu8 %ymm16, %ymm19 {z}{k1} +vmovdqu8 (%rax), %ymm19 {z}{k1} + +vmovdqu16 %xmm16, %xmm19 +vmovdqu16 (%rax), %xmm19 +vmovdqu16 %xmm16, (%rax) +vmovdqu16 %xmm16, %xmm19 {k1} +vmovdqu16 (%rax), %xmm19 {k1} +vmovdqu16 %xmm16, (%rax) {k1} +vmovdqu16 %xmm16, %xmm19 {z}{k1} +vmovdqu16 (%rax), %xmm19 {z}{k1} + +vmovdqu16 %ymm16, %ymm19 +vmovdqu16 (%rax), %ymm19 +vmovdqu16 %ymm16, (%rax) +vmovdqu16 %ymm16, %ymm19 {k1} +vmovdqu16 (%rax), %ymm19 {k1} +vmovdqu16 %ymm16, (%rax) {k1} +vmovdqu16 %ymm16, %ymm19 {z}{k1} +vmovdqu16 (%rax), %ymm19 {z}{k1} + vpabsb %xmm16, %xmm19 vpabsb (%rax), %xmm19 vpabsb %xmm16, %xmm19 {k1} @@ -476,6 +512,38 @@ vpmovw2m %ymm0, %k0 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovdqu8 (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu8 %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovdqu8 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu8 %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovdqu8 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu8 %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu8 %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovdqu16 (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu16 %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovdqu16 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu16 %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovdqu16 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu16 %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu16 %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 1 1 0.50 vpabsb %xmm16, %xmm19 # CHECK-NEXT: 2 7 0.50 * vpabsb (%rax), %xmm19 # CHECK-NEXT: 1 1 0.50 vpabsb %xmm16, %xmm19 {%k1} @@ -881,10 +949,42 @@ vpmovw2m %ymm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - - 33.00 81.00 95.00 95.00 - 285.00 1.00 - - - +# CHECK-NEXT: - - 41.00 89.00 103.67 103.67 8.00 293.00 1.00 2.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu8 %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu8 (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu8 %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu8 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu8 (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu8 %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu8 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu8 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu8 %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu8 (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu8 %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu8 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu8 (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu8 %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu8 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu8 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu16 %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu16 (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu16 %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu16 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu16 (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu16 %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu16 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu16 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu16 %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu16 (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu16 %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu16 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu16 (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu16 %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu16 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu16 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vpabsb %xmm16, %xmm19 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vpabsb (%rax), %xmm19 # CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vpabsb %xmm16, %xmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s index 796a716f1fa1..35ed47fb69f9 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s @@ -308,6 +308,42 @@ vminps %ymm16, %ymm17, %ymm19 {z}{k1} vminps (%rax), %ymm17, %ymm19 {z}{k1} vminps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} +vmovapd %xmm16, %xmm19 +vmovapd (%rax), %xmm19 +vmovapd %xmm16, (%rax) +vmovapd %xmm16, %xmm19 {k1} +vmovapd (%rax), %xmm19 {k1} +vmovapd %xmm16, (%rax) {k1} +vmovapd %xmm16, %xmm19 {z}{k1} +vmovapd (%rax), %xmm19 {z}{k1} + +vmovapd %ymm16, %ymm19 +vmovapd (%rax), %ymm19 +vmovapd %ymm16, (%rax) +vmovapd %ymm16, %ymm19 {k1} +vmovapd (%rax), %ymm19 {k1} +vmovapd %ymm16, (%rax) {k1} +vmovapd %ymm16, %ymm19 {z}{k1} +vmovapd (%rax), %ymm19 {z}{k1} + +vmovaps %xmm16, %xmm19 +vmovaps (%rax), %xmm19 +vmovaps %xmm16, (%rax) +vmovaps %xmm16, %xmm19 {k1} +vmovaps (%rax), %xmm19 {k1} +vmovaps %xmm16, (%rax) {k1} +vmovaps %xmm16, %xmm19 {z}{k1} +vmovaps (%rax), %xmm19 {z}{k1} + +vmovaps %ymm16, %ymm19 +vmovaps (%rax), %ymm19 +vmovaps %ymm16, (%rax) +vmovaps %ymm16, %ymm19 {k1} +vmovaps (%rax), %ymm19 {k1} +vmovaps %ymm16, (%rax) {k1} +vmovaps %ymm16, %ymm19 {z}{k1} +vmovaps (%rax), %ymm19 {z}{k1} + vmovddup %xmm16, %xmm19 vmovddup (%rax), %xmm19 vmovddup %xmm16, %xmm19 {k1} @@ -315,6 +351,78 @@ vmovddup (%rax), %xmm19 {k1} vmovddup %xmm16, %xmm19 {z}{k1} vmovddup (%rax), %xmm19 {z}{k1} +vmovdqa32 %xmm16, %xmm19 +vmovdqa32 (%rax), %xmm19 +vmovdqa32 %xmm16, (%rax) +vmovdqa32 %xmm16, %xmm19 {k1} +vmovdqa32 (%rax), %xmm19 {k1} +vmovdqa32 %xmm16, (%rax) {k1} +vmovdqa32 %xmm16, %xmm19 {z}{k1} +vmovdqa32 (%rax), %xmm19 {z}{k1} + +vmovdqa32 %ymm16, %ymm19 +vmovdqa32 (%rax), %ymm19 +vmovdqa32 %ymm16, (%rax) +vmovdqa32 %ymm16, %ymm19 {k1} +vmovdqa32 (%rax), %ymm19 {k1} +vmovdqa32 %ymm16, (%rax) {k1} +vmovdqa32 %ymm16, %ymm19 {z}{k1} +vmovdqa32 (%rax), %ymm19 {z}{k1} + +vmovdqa64 %xmm16, %xmm19 +vmovdqa64 (%rax), %xmm19 +vmovdqa64 %xmm16, (%rax) +vmovdqa64 %xmm16, %xmm19 {k1} +vmovdqa64 (%rax), %xmm19 {k1} +vmovdqa64 %xmm16, (%rax) {k1} +vmovdqa64 %xmm16, %xmm19 {z}{k1} +vmovdqa64 (%rax), %xmm19 {z}{k1} + +vmovdqa64 %ymm16, %ymm19 +vmovdqa64 (%rax), %ymm19 +vmovdqa64 %ymm16, (%rax) +vmovdqa64 %ymm16, %ymm19 {k1} +vmovdqa64 (%rax), %ymm19 {k1} +vmovdqa64 %ymm16, (%rax) {k1} +vmovdqa64 %ymm16, %ymm19 {z}{k1} +vmovdqa64 (%rax), %ymm19 {z}{k1} + +vmovdqu32 %xmm16, %xmm19 +vmovdqu32 (%rax), %xmm19 +vmovdqu32 %xmm16, (%rax) +vmovdqu32 %xmm16, %xmm19 {k1} +vmovdqu32 (%rax), %xmm19 {k1} +vmovdqu32 %xmm16, (%rax) {k1} +vmovdqu32 %xmm16, %xmm19 {z}{k1} +vmovdqu32 (%rax), %xmm19 {z}{k1} + +vmovdqu32 %ymm16, %ymm19 +vmovdqu32 (%rax), %ymm19 +vmovdqu32 %ymm16, (%rax) +vmovdqu32 %ymm16, %ymm19 {k1} +vmovdqu32 (%rax), %ymm19 {k1} +vmovdqu32 %ymm16, (%rax) {k1} +vmovdqu32 %ymm16, %ymm19 {z}{k1} +vmovdqu32 (%rax), %ymm19 {z}{k1} + +vmovdqu64 %xmm16, %xmm19 +vmovdqu64 (%rax), %xmm19 +vmovdqu64 %xmm16, (%rax) +vmovdqu64 %xmm16, %xmm19 {k1} +vmovdqu64 (%rax), %xmm19 {k1} +vmovdqu64 %xmm16, (%rax) {k1} +vmovdqu64 %xmm16, %xmm19 {z}{k1} +vmovdqu64 (%rax), %xmm19 {z}{k1} + +vmovdqu64 %ymm16, %ymm19 +vmovdqu64 (%rax), %ymm19 +vmovdqu64 %ymm16, (%rax) +vmovdqu64 %ymm16, %ymm19 {k1} +vmovdqu64 (%rax), %ymm19 {k1} +vmovdqu64 %ymm16, (%rax) {k1} +vmovdqu64 %ymm16, %ymm19 {z}{k1} +vmovdqu64 (%rax), %ymm19 {z}{k1} + vmovddup %ymm16, %ymm19 vmovddup (%rax), %ymm19 vmovddup %ymm16, %ymm19 {k1} @@ -350,6 +458,42 @@ vmovsldup (%rax), %ymm19 {k1} vmovsldup %ymm16, %ymm19 {z}{k1} vmovsldup (%rax), %ymm19 {z}{k1} +vmovupd %xmm16, %xmm19 +vmovupd (%rax), %xmm19 +vmovupd %xmm16, (%rax) +vmovupd %xmm16, %xmm19 {k1} +vmovupd (%rax), %xmm19 {k1} +vmovupd %xmm16, (%rax) {k1} +vmovupd %xmm16, %xmm19 {z}{k1} +vmovupd (%rax), %xmm19 {z}{k1} + +vmovupd %ymm16, %ymm19 +vmovupd (%rax), %ymm19 +vmovupd %ymm16, (%rax) +vmovupd %ymm16, %ymm19 {k1} +vmovupd (%rax), %ymm19 {k1} +vmovupd %ymm16, (%rax) {k1} +vmovupd %ymm16, %ymm19 {z}{k1} +vmovupd (%rax), %ymm19 {z}{k1} + +vmovups %xmm16, %xmm19 +vmovups (%rax), %xmm19 +vmovups %xmm16, (%rax) +vmovups %xmm16, %xmm19 {k1} +vmovups (%rax), %xmm19 {k1} +vmovups %xmm16, (%rax) {k1} +vmovups %xmm16, %xmm19 {z}{k1} +vmovups (%rax), %xmm19 {z}{k1} + +vmovups %ymm16, %ymm19 +vmovups (%rax), %ymm19 +vmovups %ymm16, (%rax) +vmovups %ymm16, %ymm19 {k1} +vmovups (%rax), %ymm19 {k1} +vmovups %ymm16, (%rax) {k1} +vmovups %ymm16, %ymm19 {z}{k1} +vmovups (%rax), %ymm19 {z}{k1} + vmulpd %xmm16, %xmm17, %xmm19 vmulpd (%rax), %xmm17, %xmm19 vmulpd (%rax){1to2}, %xmm17, %xmm19 @@ -1517,12 +1661,108 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 4 0.50 vminps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vminps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vminps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovapd %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovapd (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovapd %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovapd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovapd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovapd %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovapd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovapd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovapd %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovapd %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovapd %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovapd %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovapd %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovaps %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovaps (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovaps %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovaps %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovaps (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovaps %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovaps %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovaps (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovaps %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovaps %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovaps %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovaps %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovaps %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vmovddup %xmm16, %xmm19 # CHECK-NEXT: 2 7 0.50 * vmovddup (%rax), %xmm19 # CHECK-NEXT: 1 1 1.00 vmovddup %xmm16, %xmm19 {%k1} # CHECK-NEXT: 2 7 0.50 * vmovddup (%rax), %xmm19 {%k1} # CHECK-NEXT: 1 1 1.00 vmovddup %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: 2 7 0.50 * vmovddup (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovdqa32 (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqa32 %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovdqa32 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqa32 %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovdqa32 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqa32 %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqa32 %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovdqa64 (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqa64 %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovdqa64 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqa64 %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovdqa64 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqa64 %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqa64 %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovdqu32 (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu32 %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovdqu32 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu32 %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovdqu32 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu32 %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu32 %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovdqu64 (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu64 %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovdqu64 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu64 %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovdqu64 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu64 %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu64 %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vmovddup %ymm16, %ymm19 # CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %ymm19 # CHECK-NEXT: 1 1 1.00 vmovddup %ymm16, %ymm19 {%k1} @@ -1553,6 +1793,38 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 2 8 0.50 * vmovsldup (%rax), %ymm19 {%k1} # CHECK-NEXT: 1 1 0.50 vmovsldup %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovsldup (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovupd %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovupd (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovupd %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovupd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovupd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovupd %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovupd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovupd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovupd %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovupd %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovupd %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovupd %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovupd %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovups %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovups (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovups %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovups %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovups (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovups %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovups %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovups (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovups %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovups %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovups %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovups %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovups %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 1 4 0.50 vmulpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 2 10 0.50 * vmulpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: 2 10 0.50 * vmulpd (%rax){1to2}, %xmm17, %xmm19 @@ -2352,7 +2624,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - 423.00 257.33 232.33 372.50 372.50 - 652.33 4.00 - - - +# CHECK-NEXT: - 423.00 289.33 264.33 407.17 407.17 32.00 684.33 4.00 10.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -2628,12 +2900,108 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vminps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vminps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vminps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovapd %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovapd (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovapd %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovapd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovapd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovapd %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovapd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovapd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovapd %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovapd (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovapd %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovapd %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovapd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovapd %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovapd %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovapd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovaps %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovaps (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovaps %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovaps %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovaps (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovaps %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovaps %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovaps (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovaps %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovaps (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovaps %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovaps %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovaps (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovaps %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovaps %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovaps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - - - vmovddup %xmm16, %xmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovddup (%rax), %xmm19 # CHECK-NEXT: - - - - - - - 1.00 - - - - vmovddup %xmm16, %xmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovddup (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - - - vmovddup %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovddup (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa32 %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa32 (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa32 %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa32 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa32 (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa32 %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa32 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa32 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa32 %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa32 (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa32 %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa32 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa32 (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa32 %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa32 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa32 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa64 %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa64 (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa64 %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa64 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa64 (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa64 %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa64 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa64 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa64 %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa64 (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa64 %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa64 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa64 (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa64 %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa64 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa64 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu32 %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu32 (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu32 %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu32 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu32 (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu32 %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu32 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu32 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu32 %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu32 (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu32 %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu32 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu32 (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu32 %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu32 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu32 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu64 %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu64 (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu64 %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu64 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu64 (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu64 %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu64 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu64 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu64 %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu64 (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu64 %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu64 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu64 (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu64 %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu64 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu64 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - - - vmovddup %ymm16, %ymm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovddup (%rax), %ymm19 # CHECK-NEXT: - - - - - - - 1.00 - - - - vmovddup %ymm16, %ymm19 {%k1} @@ -2664,6 +3032,38 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovsldup (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - 0.50 - - - 0.50 - - - - vmovsldup %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovsldup (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovupd %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovupd (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovupd %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovupd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovupd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovupd %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovupd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovupd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovupd %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovupd (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovupd %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovupd %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovupd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovupd %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovupd %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovupd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovups %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovups (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovups %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovups %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovups (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovups %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovups %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovups (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovups %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovups (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovups %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovups %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovups (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovups %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovups %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovups (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vmulpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vmulpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vmulpd (%rax){1to2}, %xmm17, %xmm19 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s index b664dda42699..919129d30f99 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s @@ -212,6 +212,24 @@ vminps %zmm16, %zmm17, %zmm19 {z}{k1} vminps (%rax), %zmm17, %zmm19 {z}{k1} vminps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} +vmovapd %zmm16, %zmm19 +vmovapd (%rax), %zmm19 +vmovapd %zmm16, (%rax) +vmovapd %zmm16, %zmm19 {k1} +vmovapd (%rax), %zmm19 {k1} +vmovapd %zmm16, (%rax) {k1} +vmovapd %zmm16, %zmm19 {z}{k1} +vmovapd (%rax), %zmm19 {z}{k1} + +vmovaps %zmm16, %zmm19 +vmovaps (%rax), %zmm19 +vmovaps %zmm16, (%rax) +vmovaps %zmm16, %zmm19 {k1} +vmovaps (%rax), %zmm19 {k1} +vmovaps %zmm16, (%rax) {k1} +vmovaps %zmm16, %zmm19 {z}{k1} +vmovaps (%rax), %zmm19 {z}{k1} + vmovddup %zmm16, %zmm19 vmovddup (%rax), %zmm19 vmovddup %zmm16, %zmm19 {k1} @@ -219,6 +237,42 @@ vmovddup (%rax), %zmm19 {k1} vmovddup %zmm16, %zmm19 {z}{k1} vmovddup (%rax), %zmm19 {z}{k1} +vmovdqa32 %zmm16, %zmm19 +vmovdqa32 (%rax), %zmm19 +vmovdqa32 %zmm16, (%rax) +vmovdqa32 %zmm16, %zmm19 {k1} +vmovdqa32 (%rax), %zmm19 {k1} +vmovdqa32 %zmm16, (%rax) {k1} +vmovdqa32 %zmm16, %zmm19 {z}{k1} +vmovdqa32 (%rax), %zmm19 {z}{k1} + +vmovdqa64 %zmm16, %zmm19 +vmovdqa64 (%rax), %zmm19 +vmovdqa64 %zmm16, (%rax) +vmovdqa64 %zmm16, %zmm19 {k1} +vmovdqa64 (%rax), %zmm19 {k1} +vmovdqa64 %zmm16, (%rax) {k1} +vmovdqa64 %zmm16, %zmm19 {z}{k1} +vmovdqa64 (%rax), %zmm19 {z}{k1} + +vmovdqu32 %zmm16, %zmm19 +vmovdqu32 (%rax), %zmm19 +vmovdqu32 %zmm16, (%rax) +vmovdqu32 %zmm16, %zmm19 {k1} +vmovdqu32 (%rax), %zmm19 {k1} +vmovdqu32 %zmm16, (%rax) {k1} +vmovdqu32 %zmm16, %zmm19 {z}{k1} +vmovdqu32 (%rax), %zmm19 {z}{k1} + +vmovdqu64 %zmm16, %zmm19 +vmovdqu64 (%rax), %zmm19 +vmovdqu64 %zmm16, (%rax) +vmovdqu64 %zmm16, %zmm19 {k1} +vmovdqu64 (%rax), %zmm19 {k1} +vmovdqu64 %zmm16, (%rax) {k1} +vmovdqu64 %zmm16, %zmm19 {z}{k1} +vmovdqu64 (%rax), %zmm19 {z}{k1} + vmovshdup %zmm16, %zmm19 vmovshdup (%rax), %zmm19 vmovshdup %zmm16, %zmm19 {k1} @@ -233,6 +287,24 @@ vmovsldup (%rax), %zmm19 {k1} vmovsldup %zmm16, %zmm19 {z}{k1} vmovsldup (%rax), %zmm19 {z}{k1} +vmovupd %zmm16, %zmm19 +vmovupd (%rax), %zmm19 +vmovupd %zmm16, (%rax) +vmovupd %zmm16, %zmm19 {k1} +vmovupd (%rax), %zmm19 {k1} +vmovupd %zmm16, (%rax) {k1} +vmovupd %zmm16, %zmm19 {z}{k1} +vmovupd (%rax), %zmm19 {z}{k1} + +vmovups %zmm16, %zmm19 +vmovups (%rax), %zmm19 +vmovups %zmm16, (%rax) +vmovups %zmm16, %zmm19 {k1} +vmovups (%rax), %zmm19 {k1} +vmovups %zmm16, (%rax) {k1} +vmovups %zmm16, %zmm19 {z}{k1} +vmovups (%rax), %zmm19 {z}{k1} + vmulpd %zmm16, %zmm17, %zmm19 vmulpd (%rax), %zmm17, %zmm19 vmulpd (%rax){1to8}, %zmm17, %zmm19 @@ -996,12 +1068,60 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 4 0.50 vminps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vminps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovapd %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %zmm19 +# CHECK-NEXT: 2 1 1.00 * vmovapd %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovapd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovapd %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovapd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovaps %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %zmm19 +# CHECK-NEXT: 2 1 1.00 * vmovaps %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovaps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovaps %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovaps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vmovddup %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %zmm19 # CHECK-NEXT: 1 1 1.00 vmovddup %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %zmm19 {%k1} # CHECK-NEXT: 1 1 1.00 vmovddup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %zmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqa32 %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqa32 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %zmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqa64 %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqa64 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %zmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu32 %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu32 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %zmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu64 %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu64 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vmovshdup %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovshdup (%rax), %zmm19 # CHECK-NEXT: 1 1 1.00 vmovshdup %zmm16, %zmm19 {%k1} @@ -1014,6 +1134,22 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 8 0.50 * vmovsldup (%rax), %zmm19 {%k1} # CHECK-NEXT: 1 1 1.00 vmovsldup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovsldup (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovupd %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %zmm19 +# CHECK-NEXT: 2 1 1.00 * vmovupd %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovupd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovupd %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovupd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovups %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %zmm19 +# CHECK-NEXT: 2 1 1.00 * vmovups %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovups %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovups %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovups %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 4 0.50 vmulpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 11 0.50 * vmulpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 2 11 0.50 * vmulpd (%rax){1to8}, %zmm17, %zmm19 @@ -1533,7 +1669,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 612.00 220.67 41.67 261.50 261.50 - 539.67 2.00 - +# CHECK-NEXT: - 612.00 236.67 57.67 278.83 278.83 16.00 555.67 2.00 5.33 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1721,12 +1857,60 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - vminps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vminps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovapd (%rax), %zmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovapd %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovapd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovapd %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovapd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovaps %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovaps (%rax), %zmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovaps %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovaps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovaps (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovaps %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovaps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovaps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - vmovddup %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovddup (%rax), %zmm19 # CHECK-NEXT: - - - - - - - 1.00 - - vmovddup %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovddup (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - vmovddup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovddup (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa32 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa32 (%rax), %zmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa32 %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa32 (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa32 %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa32 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa64 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa64 (%rax), %zmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa64 %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa64 (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa64 %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa64 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu32 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu32 (%rax), %zmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu32 %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu32 (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu32 %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu32 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu64 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu64 (%rax), %zmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu64 %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu64 (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu64 %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu64 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - vmovshdup %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovshdup (%rax), %zmm19 # CHECK-NEXT: - - - - - - - 1.00 - - vmovshdup %zmm16, %zmm19 {%k1} @@ -1739,6 +1923,22 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovsldup (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - vmovsldup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovsldup (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovupd %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovupd (%rax), %zmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovupd %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovupd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovupd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovupd %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovupd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovupd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovups %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovups (%rax), %zmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovups %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovups %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovups (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovups %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovups %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovups (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmulpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vmulpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vmulpd (%rax){1to8}, %zmm17, %zmm19 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s index 389f51f937f9..9e4d4cd654f7 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s @@ -20,6 +20,24 @@ kshiftrq $2, %k1, %k2 kunpckdq %k0, %k1, %k2 kunpckwd %k0, %k1, %k2 +vmovdqu8 %zmm16, %zmm19 +vmovdqu8 (%rax), %zmm19 +vmovdqu8 %zmm16, (%rax) +vmovdqu8 %zmm16, %zmm19 {k1} +vmovdqu8 (%rax), %zmm19 {k1} +vmovdqu8 %zmm16, (%rax) {k1} +vmovdqu8 %zmm16, %zmm19 {z}{k1} +vmovdqu8 (%rax), %zmm19 {z}{k1} + +vmovdqu16 %zmm16, %zmm19 +vmovdqu16 (%rax), %zmm19 +vmovdqu16 %zmm16, (%rax) +vmovdqu16 %zmm16, %zmm19 {k1} +vmovdqu16 (%rax), %zmm19 {k1} +vmovdqu16 %zmm16, (%rax) {k1} +vmovdqu16 %zmm16, %zmm19 {z}{k1} +vmovdqu16 (%rax), %zmm19 {z}{k1} + vpabsb %zmm16, %zmm19 vpabsb (%rax), %zmm19 vpabsb %zmm16, %zmm19 {k1} @@ -280,6 +298,22 @@ vpmovw2m %zmm0, %k0 # CHECK-NEXT: 1 4 1.00 kshiftrq $2, %k1, %k2 # CHECK-NEXT: 1 4 1.00 kunpckdq %k0, %k1, %k2 # CHECK-NEXT: 1 4 1.00 kunpckwd %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %zmm19 +# CHECK-NEXT: 5 2 2.00 * vmovdqu8 %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 5 2 2.00 * vmovdqu8 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %zmm16, %zmm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %zmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu16 %zmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu16 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vpabsb %zmm16, %zmm19 # CHECK-NEXT: 2 8 1.00 * vpabsb (%rax), %zmm19 # CHECK-NEXT: 1 1 1.00 vpabsb %zmm16, %zmm19 {%k1} @@ -489,7 +523,7 @@ vpmovw2m %zmm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 32.50 8.50 47.50 47.50 - 176.50 0.50 - +# CHECK-NEXT: - - 36.50 13.50 52.50 52.50 6.00 181.50 0.50 2.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -511,6 +545,22 @@ vpmovw2m %zmm0, %k0 # CHECK-NEXT: - - - - - - - 1.00 - - kshiftrq $2, %k1, %k2 # CHECK-NEXT: - - - - - - - 1.00 - - kunpckdq %k0, %k1, %k2 # CHECK-NEXT: - - - - - - - 1.00 - - kunpckwd %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu8 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu8 (%rax), %zmm19 +# CHECK-NEXT: - - - 0.50 0.67 0.67 2.00 0.50 - 0.67 vmovdqu8 %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu8 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu8 (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - 0.50 0.67 0.67 2.00 0.50 - 0.67 vmovdqu8 %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu8 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu8 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu16 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu16 (%rax), %zmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu16 %zmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu16 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu16 (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu16 %zmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu16 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu16 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - 1.00 - - - - - - - vpabsb %zmm16, %zmm19 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpabsb (%rax), %zmm19 # CHECK-NEXT: - - 1.00 - - - - - - - vpabsb %zmm16, %zmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bwvl.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bwvl.s index 042108d1c784..2744d2894f0e 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bwvl.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bwvl.s @@ -1,6 +1,42 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -instruction-tables < %s | FileCheck %s +vmovdqu8 %xmm16, %xmm19 +vmovdqu8 (%rax), %xmm19 +vmovdqu8 %xmm16, (%rax) +vmovdqu8 %xmm16, %xmm19 {k1} +vmovdqu8 (%rax), %xmm19 {k1} +vmovdqu8 %xmm16, (%rax) {k1} +vmovdqu8 %xmm16, %xmm19 {z}{k1} +vmovdqu8 (%rax), %xmm19 {z}{k1} + +vmovdqu8 %ymm16, %ymm19 +vmovdqu8 (%rax), %ymm19 +vmovdqu8 %ymm16, (%rax) +vmovdqu8 %ymm16, %ymm19 {k1} +vmovdqu8 (%rax), %ymm19 {k1} +vmovdqu8 %ymm16, (%rax) {k1} +vmovdqu8 %ymm16, %ymm19 {z}{k1} +vmovdqu8 (%rax), %ymm19 {z}{k1} + +vmovdqu16 %xmm16, %xmm19 +vmovdqu16 (%rax), %xmm19 +vmovdqu16 %xmm16, (%rax) +vmovdqu16 %xmm16, %xmm19 {k1} +vmovdqu16 (%rax), %xmm19 {k1} +vmovdqu16 %xmm16, (%rax) {k1} +vmovdqu16 %xmm16, %xmm19 {z}{k1} +vmovdqu16 (%rax), %xmm19 {z}{k1} + +vmovdqu16 %ymm16, %ymm19 +vmovdqu16 (%rax), %ymm19 +vmovdqu16 %ymm16, (%rax) +vmovdqu16 %ymm16, %ymm19 {k1} +vmovdqu16 (%rax), %ymm19 {k1} +vmovdqu16 %ymm16, (%rax) {k1} +vmovdqu16 %ymm16, %ymm19 {z}{k1} +vmovdqu16 (%rax), %ymm19 {z}{k1} + vpabsb %xmm16, %xmm19 vpabsb (%rax), %xmm19 vpabsb %xmm16, %xmm19 {k1} @@ -476,6 +512,38 @@ vpmovw2m %ymm0, %k0 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovdqu8 (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu8 %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovdqu8 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu8 %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovdqu8 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu8 %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu8 %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu8 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovdqu16 (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu16 %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovdqu16 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu16 %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovdqu16 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu16 %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu16 %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu16 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 1 1 0.50 vpabsb %xmm16, %xmm19 # CHECK-NEXT: 2 7 0.50 * vpabsb (%rax), %xmm19 # CHECK-NEXT: 1 1 0.50 vpabsb %xmm16, %xmm19 {%k1} @@ -879,10 +947,42 @@ vpmovw2m %ymm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 33.00 29.00 95.00 95.00 - 337.00 1.00 - +# CHECK-NEXT: - - 41.00 37.00 103.67 103.67 8.00 345.00 1.00 2.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu8 %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu8 (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu8 %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu8 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu8 (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu8 %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu8 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu8 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu8 %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu8 (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu8 %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu8 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu8 (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu8 %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu8 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu8 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu16 %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu16 (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu16 %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu16 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu16 (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu16 %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu16 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu16 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu16 %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu16 (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu16 %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu16 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu16 (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu16 %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu16 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu16 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.50 0.50 - - - - - - vpabsb %xmm16, %xmm19 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpabsb (%rax), %xmm19 # CHECK-NEXT: - - 0.50 0.50 - - - - - - vpabsb %xmm16, %xmm19 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s index 36878e8295ee..819737e433a1 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s @@ -308,6 +308,42 @@ vminps %ymm16, %ymm17, %ymm19 {z}{k1} vminps (%rax), %ymm17, %ymm19 {z}{k1} vminps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} +vmovapd %xmm16, %xmm19 +vmovapd (%rax), %xmm19 +vmovapd %xmm16, (%rax) +vmovapd %xmm16, %xmm19 {k1} +vmovapd (%rax), %xmm19 {k1} +vmovapd %xmm16, (%rax) {k1} +vmovapd %xmm16, %xmm19 {z}{k1} +vmovapd (%rax), %xmm19 {z}{k1} + +vmovapd %ymm16, %ymm19 +vmovapd (%rax), %ymm19 +vmovapd %ymm16, (%rax) +vmovapd %ymm16, %ymm19 {k1} +vmovapd (%rax), %ymm19 {k1} +vmovapd %ymm16, (%rax) {k1} +vmovapd %ymm16, %ymm19 {z}{k1} +vmovapd (%rax), %ymm19 {z}{k1} + +vmovaps %xmm16, %xmm19 +vmovaps (%rax), %xmm19 +vmovaps %xmm16, (%rax) +vmovaps %xmm16, %xmm19 {k1} +vmovaps (%rax), %xmm19 {k1} +vmovaps %xmm16, (%rax) {k1} +vmovaps %xmm16, %xmm19 {z}{k1} +vmovaps (%rax), %xmm19 {z}{k1} + +vmovaps %ymm16, %ymm19 +vmovaps (%rax), %ymm19 +vmovaps %ymm16, (%rax) +vmovaps %ymm16, %ymm19 {k1} +vmovaps (%rax), %ymm19 {k1} +vmovaps %ymm16, (%rax) {k1} +vmovaps %ymm16, %ymm19 {z}{k1} +vmovaps (%rax), %ymm19 {z}{k1} + vmovddup %xmm16, %xmm19 vmovddup (%rax), %xmm19 vmovddup %xmm16, %xmm19 {k1} @@ -315,6 +351,78 @@ vmovddup (%rax), %xmm19 {k1} vmovddup %xmm16, %xmm19 {z}{k1} vmovddup (%rax), %xmm19 {z}{k1} +vmovdqa32 %xmm16, %xmm19 +vmovdqa32 (%rax), %xmm19 +vmovdqa32 %xmm16, (%rax) +vmovdqa32 %xmm16, %xmm19 {k1} +vmovdqa32 (%rax), %xmm19 {k1} +vmovdqa32 %xmm16, (%rax) {k1} +vmovdqa32 %xmm16, %xmm19 {z}{k1} +vmovdqa32 (%rax), %xmm19 {z}{k1} + +vmovdqa32 %ymm16, %ymm19 +vmovdqa32 (%rax), %ymm19 +vmovdqa32 %ymm16, (%rax) +vmovdqa32 %ymm16, %ymm19 {k1} +vmovdqa32 (%rax), %ymm19 {k1} +vmovdqa32 %ymm16, (%rax) {k1} +vmovdqa32 %ymm16, %ymm19 {z}{k1} +vmovdqa32 (%rax), %ymm19 {z}{k1} + +vmovdqa64 %xmm16, %xmm19 +vmovdqa64 (%rax), %xmm19 +vmovdqa64 %xmm16, (%rax) +vmovdqa64 %xmm16, %xmm19 {k1} +vmovdqa64 (%rax), %xmm19 {k1} +vmovdqa64 %xmm16, (%rax) {k1} +vmovdqa64 %xmm16, %xmm19 {z}{k1} +vmovdqa64 (%rax), %xmm19 {z}{k1} + +vmovdqa64 %ymm16, %ymm19 +vmovdqa64 (%rax), %ymm19 +vmovdqa64 %ymm16, (%rax) +vmovdqa64 %ymm16, %ymm19 {k1} +vmovdqa64 (%rax), %ymm19 {k1} +vmovdqa64 %ymm16, (%rax) {k1} +vmovdqa64 %ymm16, %ymm19 {z}{k1} +vmovdqa64 (%rax), %ymm19 {z}{k1} + +vmovdqu32 %xmm16, %xmm19 +vmovdqu32 (%rax), %xmm19 +vmovdqu32 %xmm16, (%rax) +vmovdqu32 %xmm16, %xmm19 {k1} +vmovdqu32 (%rax), %xmm19 {k1} +vmovdqu32 %xmm16, (%rax) {k1} +vmovdqu32 %xmm16, %xmm19 {z}{k1} +vmovdqu32 (%rax), %xmm19 {z}{k1} + +vmovdqu32 %ymm16, %ymm19 +vmovdqu32 (%rax), %ymm19 +vmovdqu32 %ymm16, (%rax) +vmovdqu32 %ymm16, %ymm19 {k1} +vmovdqu32 (%rax), %ymm19 {k1} +vmovdqu32 %ymm16, (%rax) {k1} +vmovdqu32 %ymm16, %ymm19 {z}{k1} +vmovdqu32 (%rax), %ymm19 {z}{k1} + +vmovdqu64 %xmm16, %xmm19 +vmovdqu64 (%rax), %xmm19 +vmovdqu64 %xmm16, (%rax) +vmovdqu64 %xmm16, %xmm19 {k1} +vmovdqu64 (%rax), %xmm19 {k1} +vmovdqu64 %xmm16, (%rax) {k1} +vmovdqu64 %xmm16, %xmm19 {z}{k1} +vmovdqu64 (%rax), %xmm19 {z}{k1} + +vmovdqu64 %ymm16, %ymm19 +vmovdqu64 (%rax), %ymm19 +vmovdqu64 %ymm16, (%rax) +vmovdqu64 %ymm16, %ymm19 {k1} +vmovdqu64 (%rax), %ymm19 {k1} +vmovdqu64 %ymm16, (%rax) {k1} +vmovdqu64 %ymm16, %ymm19 {z}{k1} +vmovdqu64 (%rax), %ymm19 {z}{k1} + vmovddup %ymm16, %ymm19 vmovddup (%rax), %ymm19 vmovddup %ymm16, %ymm19 {k1} @@ -350,6 +458,42 @@ vmovsldup (%rax), %ymm19 {k1} vmovsldup %ymm16, %ymm19 {z}{k1} vmovsldup (%rax), %ymm19 {z}{k1} +vmovupd %xmm16, %xmm19 +vmovupd (%rax), %xmm19 +vmovupd %xmm16, (%rax) +vmovupd %xmm16, %xmm19 {k1} +vmovupd (%rax), %xmm19 {k1} +vmovupd %xmm16, (%rax) {k1} +vmovupd %xmm16, %xmm19 {z}{k1} +vmovupd (%rax), %xmm19 {z}{k1} + +vmovupd %ymm16, %ymm19 +vmovupd (%rax), %ymm19 +vmovupd %ymm16, (%rax) +vmovupd %ymm16, %ymm19 {k1} +vmovupd (%rax), %ymm19 {k1} +vmovupd %ymm16, (%rax) {k1} +vmovupd %ymm16, %ymm19 {z}{k1} +vmovupd (%rax), %ymm19 {z}{k1} + +vmovups %xmm16, %xmm19 +vmovups (%rax), %xmm19 +vmovups %xmm16, (%rax) +vmovups %xmm16, %xmm19 {k1} +vmovups (%rax), %xmm19 {k1} +vmovups %xmm16, (%rax) {k1} +vmovups %xmm16, %xmm19 {z}{k1} +vmovups (%rax), %xmm19 {z}{k1} + +vmovups %ymm16, %ymm19 +vmovups (%rax), %ymm19 +vmovups %ymm16, (%rax) +vmovups %ymm16, %ymm19 {k1} +vmovups (%rax), %ymm19 {k1} +vmovups %ymm16, (%rax) {k1} +vmovups %ymm16, %ymm19 {z}{k1} +vmovups (%rax), %ymm19 {z}{k1} + vmulpd %xmm16, %xmm17, %xmm19 vmulpd (%rax), %xmm17, %xmm19 vmulpd (%rax){1to2}, %xmm17, %xmm19 @@ -1517,12 +1661,108 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 4 0.50 vminps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vminps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vminps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovapd %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovapd (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovapd %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovapd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovapd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovapd %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovapd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovapd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovapd %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovapd %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovapd %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovapd %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovapd %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovaps %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovaps (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovaps %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovaps %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovaps (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovaps %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovaps %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovaps (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovaps %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovaps %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovaps %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovaps %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovaps %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vmovddup %xmm16, %xmm19 # CHECK-NEXT: 2 7 0.50 * vmovddup (%rax), %xmm19 # CHECK-NEXT: 1 1 1.00 vmovddup %xmm16, %xmm19 {%k1} # CHECK-NEXT: 2 7 0.50 * vmovddup (%rax), %xmm19 {%k1} # CHECK-NEXT: 1 1 1.00 vmovddup %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: 2 7 0.50 * vmovddup (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovdqa32 (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqa32 %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovdqa32 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqa32 %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovdqa32 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqa32 %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqa32 %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqa32 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovdqa64 (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqa64 %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovdqa64 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqa64 %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovdqa64 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqa64 %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqa64 %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqa64 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovdqu32 (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu32 %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovdqu32 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu32 %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovdqu32 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu32 %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu32 %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu32 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovdqu64 (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu64 %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovdqu64 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu64 %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovdqu64 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovdqu64 %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovdqu64 %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovdqu64 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vmovddup %ymm16, %ymm19 # CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %ymm19 # CHECK-NEXT: 1 1 1.00 vmovddup %ymm16, %ymm19 {%k1} @@ -1553,6 +1793,38 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 2 8 0.50 * vmovsldup (%rax), %ymm19 {%k1} # CHECK-NEXT: 1 1 1.00 vmovsldup %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovsldup (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovupd %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovupd (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovupd %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovupd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovupd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovupd %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovupd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovupd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovupd %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovupd %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovupd %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovupd %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovupd %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovups %xmm16, %xmm19 +# CHECK-NEXT: 2 7 0.50 * vmovups (%rax), %xmm19 +# CHECK-NEXT: 2 1 1.00 * vmovups %xmm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovups %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 0.50 * vmovups (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovups %xmm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovups %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 0.50 * vmovups (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vmovups %ymm16, %ymm19 +# CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %ymm19 +# CHECK-NEXT: 2 1 1.00 * vmovups %ymm16, (%rax) +# CHECK-NEXT: 1 1 0.33 vmovups %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 1 1.00 * vmovups %ymm16, (%rax) {%k1} +# CHECK-NEXT: 1 1 0.33 vmovups %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 1 4 0.50 vmulpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 2 10 0.50 * vmulpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: 2 10 0.50 * vmulpd (%rax){1to2}, %xmm17, %xmm19 @@ -2350,7 +2622,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 423.00 257.33 169.33 372.50 372.50 - 715.33 4.00 - +# CHECK-NEXT: - 423.00 289.33 201.33 407.17 407.17 32.00 747.33 4.00 10.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -2626,12 +2898,108 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 0.50 0.50 - - - - - - vminps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovapd (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovapd %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovapd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovapd %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovapd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovapd (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovapd %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovapd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovapd %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovapd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovaps %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovaps (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovaps %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovaps %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovaps (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovaps %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovaps %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovaps (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovaps %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovaps (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovaps %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovaps %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovaps (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovaps %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovaps %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovaps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - vmovddup %xmm16, %xmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovddup (%rax), %xmm19 # CHECK-NEXT: - - - - - - - 1.00 - - vmovddup %xmm16, %xmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovddup (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - vmovddup %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovddup (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa32 %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa32 (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa32 %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa32 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa32 (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa32 %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa32 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa32 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa32 %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa32 (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa32 %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa32 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa32 (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa32 %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa32 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa32 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa64 %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa64 (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa64 %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa64 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa64 (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa64 %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa64 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa64 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa64 %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa64 (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa64 %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa64 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa64 (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa64 %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa64 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa64 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu32 %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu32 (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu32 %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu32 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu32 (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu32 %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu32 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu32 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu32 %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu32 (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu32 %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu32 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu32 (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu32 %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu32 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu32 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu64 %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu64 (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu64 %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu64 %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu64 (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu64 %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu64 %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu64 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu64 %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu64 (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu64 %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu64 %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu64 (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu64 %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu64 %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu64 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - vmovddup %ymm16, %ymm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovddup (%rax), %ymm19 # CHECK-NEXT: - - - - - - - 1.00 - - vmovddup %ymm16, %ymm19 {%k1} @@ -2662,6 +3030,38 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovsldup (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - vmovsldup %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovsldup (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovupd %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovupd (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovupd %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovupd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovupd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovupd %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovupd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovupd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovupd %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovupd (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovupd %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovupd %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovupd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovupd %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovupd %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovupd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovups %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovups (%rax), %xmm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovups %xmm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovups %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovups (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovups %xmm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovups %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovups (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovups %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovups (%rax), %ymm19 +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovups %ymm16, (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovups %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovups (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovups %ymm16, (%rax) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovups %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovups (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulpd (%rax){1to2}, %xmm17, %xmm19 From 3e6586113157c164d39120139c14295058e88ccb Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 27 Dec 2021 12:42:42 +0100 Subject: [PATCH 075/992] [CodeGen] Avoid one more pointer element type access The number of elements is always a SizeTy here. --- clang/lib/CodeGen/CGDecl.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index e09279c1d455..36185faf942f 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -1392,9 +1392,11 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions( else { // Create an artificial VarDecl to generate debug info for. IdentifierInfo *NameIdent = VLAExprNames[NameIdx++]; - auto VlaExprTy = VlaSize.NumElts->getType()->getPointerElementType(); + assert(cast(VlaSize.NumElts->getType()) + ->isOpaqueOrPointeeTypeMatches(SizeTy) && + "Number of VLA elements must be SizeTy"); auto QT = getContext().getIntTypeForBitwidth( - VlaExprTy->getScalarSizeInBits(), false); + SizeTy->getScalarSizeInBits(), false); auto *ArtificialDecl = VarDecl::Create( getContext(), const_cast(D.getDeclContext()), D.getLocation(), D.getLocation(), NameIdent, QT, From 29475e02865b0ecad5f184f413382a68b9040047 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 27 Dec 2021 12:13:17 +0000 Subject: [PATCH 076/992] [X86] Add scheduler classes for zmm vector reg-reg move instructions Basic zmm reg-reg moves (with predication) are more port limited than xmm/ymm moves, so we need to add a separate class for them. We still appear to be missing move-elimination patterns for most of the intel models, which looks to be one of the main diffs for basic codegen analysis between llvm-mca and uops.info Load/stores are a bit messier and might be better handled as overrides. --- llvm/lib/Target/X86/X86SchedBroadwell.td | 2 + llvm/lib/Target/X86/X86SchedHaswell.td | 2 + llvm/lib/Target/X86/X86SchedIceLake.td | 2 + llvm/lib/Target/X86/X86SchedSandyBridge.td | 2 + llvm/lib/Target/X86/X86SchedSkylakeClient.td | 2 + llvm/lib/Target/X86/X86SchedSkylakeServer.td | 2 + llvm/lib/Target/X86/X86Schedule.td | 10 +- llvm/lib/Target/X86/X86ScheduleAtom.td | 2 + llvm/lib/Target/X86/X86ScheduleBdVer2.td | 2 + llvm/lib/Target/X86/X86ScheduleBtVer2.td | 2 + llvm/lib/Target/X86/X86ScheduleSLM.td | 2 + llvm/lib/Target/X86/X86ScheduleZnver1.td | 2 + llvm/lib/Target/X86/X86ScheduleZnver2.td | 2 + llvm/lib/Target/X86/X86ScheduleZnver3.td | 2 + .../X86/IceLakeServer/resources-avx512.s | 98 +++++++++---------- .../X86/IceLakeServer/resources-avx512bw.s | 26 ++--- .../X86/SkylakeServer/resources-avx512.s | 98 +++++++++---------- .../X86/SkylakeServer/resources-avx512bw.s | 26 ++--- 18 files changed, 158 insertions(+), 126 deletions(-) diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index a6ff472aac6f..a47abf1e83a0 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -255,6 +255,7 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : BWWriteResPair; // Floating point add/sub. @@ -418,6 +419,7 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 371a9571ae39..278e11dfa727 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -257,6 +257,7 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : HWWriteResPair; @@ -416,6 +417,7 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td index 789de9eb5751..7a6bedf3d748 100644 --- a/llvm/lib/Target/X86/X86SchedIceLake.td +++ b/llvm/lib/Target/X86/X86SchedIceLake.td @@ -252,6 +252,7 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : ICXWriteResPair; // Floating point add/sub. @@ -367,6 +368,7 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index af5c0540deb5..e2599e04d635 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -223,6 +223,7 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : SBWriteResPair; @@ -380,6 +381,7 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index b3c13c72dd01..d7d18da7dde9 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -244,6 +244,7 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : SKLWriteResPair; // Floating point add/sub. @@ -359,6 +360,7 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 74f9da158353..f8d5f484dca8 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -244,6 +244,7 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : SKXWriteResPair; // Floating point add/sub. @@ -359,6 +360,7 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 1cb48175260a..d57e14715a4e 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -239,6 +239,7 @@ def WriteFMaskedStore64Y : SchedWrite; def WriteFMove : SchedWrite; def WriteFMoveX : SchedWrite; def WriteFMoveY : SchedWrite; +def WriteFMoveZ : SchedWrite; defm WriteFAdd : X86SchedWritePair; // Floating point add/sub. defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM). @@ -354,6 +355,7 @@ def WriteVecMaskedStore64Y : SchedWrite; def WriteVecMove : SchedWrite; def WriteVecMoveX : SchedWrite; def WriteVecMoveY : SchedWrite; +def WriteVecMoveZ : SchedWrite; def WriteVecMoveToGpr : SchedWrite; def WriteVecMoveFromGpr : SchedWrite; @@ -516,9 +518,11 @@ def WriteFMoveLSX : X86SchedWriteMoveLS; def WriteFMoveLSY : X86SchedWriteMoveLS; +def WriteFMoveLSZ + : X86SchedWriteMoveLS; def SchedWriteFMoveLS : X86SchedWriteMoveLSWidths; + WriteFMoveLSY, WriteFMoveLSZ>; def WriteFMoveLSNT : X86SchedWriteMoveLS; @@ -536,9 +540,11 @@ def WriteVecMoveLSX : X86SchedWriteMoveLS; def WriteVecMoveLSY : X86SchedWriteMoveLS; +def WriteVecMoveLSZ + : X86SchedWriteMoveLS; def SchedWriteVecMoveLS : X86SchedWriteMoveLSWidths; + WriteVecMoveLSY, WriteVecMoveLSZ>; def WriteVecMoveLSNT : X86SchedWriteMoveLS; diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 0fedfc01092c..8ae8e574f87a 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -229,6 +229,7 @@ defm : X86WriteResUnsupported; def : WriteRes; def : WriteRes; defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; defm : X86WriteRes; @@ -382,6 +383,7 @@ defm : X86WriteResUnsupported; def : WriteRes; def : WriteRes; defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/llvm/lib/Target/X86/X86ScheduleBdVer2.td index 0f6f24f9f1fe..cb75c3660728 100644 --- a/llvm/lib/Target/X86/X86ScheduleBdVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBdVer2.td @@ -772,6 +772,7 @@ defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; +defm : X86WriteResUnsupported; defm : PdWriteRes; @@ -1107,6 +1108,7 @@ defm : X86WriteResUnsupported; defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; +defm : X86WriteResUnsupported; def PdWriteMOVDQArr : SchedWriteRes<[PdFPU01, PdFPMAL]> { } diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index a070da34cab5..4b2fa87a25b5 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -525,6 +525,7 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : X86WriteRes; @@ -682,6 +683,7 @@ defm : X86WriteResUnsupported; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 36e5b55a4194..43cf5c9f98a4 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -200,6 +200,7 @@ def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : SLMWriteResPair; @@ -345,6 +346,7 @@ def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; +defm : X86WriteResUnsupported; def : WriteRes; def : WriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 4343e1ed45d1..9655ed987733 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -286,6 +286,7 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; @@ -404,6 +405,7 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td index 96d2837880c7..249cbfff5dc6 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver2.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td @@ -274,6 +274,7 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; @@ -388,6 +389,7 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver3.td b/llvm/lib/Target/X86/X86ScheduleZnver3.td index f4e03ac11f0b..02f7f8376fdb 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver3.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver3.td @@ -1446,10 +1446,12 @@ defm : Zn3WriteResInt; // Compare+Exc defm : Zn3WriteResXMM; // Empty sched class defm : Zn3WriteResXMM; defm : Zn3WriteResYMM; +defm : X86WriteResUnsupported; defm : Zn3WriteResXMM; // MMX defm : Zn3WriteResXMM; defm : Zn3WriteResYMM; +defm : X86WriteResUnsupported; def : IsOptimizableRegisterMove<[ InstructionEquivalenceClass<[ diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s index ebf3dd4eac97..13327794d2b4 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s @@ -1068,21 +1068,21 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 4 0.50 vminps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vminps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vmovapd %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovapd %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %zmm19 # CHECK-NEXT: 2 1 1.00 * vmovapd %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovapd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovapd %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %zmm19 {%k1} # CHECK-NEXT: 2 1 1.00 * vmovapd %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovapd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovapd %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vmovaps %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovaps %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %zmm19 # CHECK-NEXT: 2 1 1.00 * vmovaps %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovaps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovaps %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %zmm19 {%k1} # CHECK-NEXT: 2 1 1.00 * vmovaps %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovaps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovaps %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vmovddup %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %zmm19 @@ -1090,37 +1090,37 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %zmm19 {%k1} # CHECK-NEXT: 1 1 1.00 vmovddup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vmovdqa32 %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovdqa32 %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %zmm19 # CHECK-NEXT: 2 1 1.00 * vmovdqa32 %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovdqa32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqa32 %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %zmm19 {%k1} # CHECK-NEXT: 2 1 1.00 * vmovdqa32 %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovdqa32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqa32 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vmovdqa64 %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovdqa64 %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %zmm19 # CHECK-NEXT: 2 1 1.00 * vmovdqa64 %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovdqa64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqa64 %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %zmm19 {%k1} # CHECK-NEXT: 2 1 1.00 * vmovdqa64 %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovdqa64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqa64 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vmovdqu32 %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovdqu32 %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %zmm19 # CHECK-NEXT: 2 1 1.00 * vmovdqu32 %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovdqu32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqu32 %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %zmm19 {%k1} # CHECK-NEXT: 2 1 1.00 * vmovdqu32 %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovdqu32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqu32 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vmovdqu64 %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovdqu64 %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %zmm19 # CHECK-NEXT: 2 1 1.00 * vmovdqu64 %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovdqu64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqu64 %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %zmm19 {%k1} # CHECK-NEXT: 2 1 1.00 * vmovdqu64 %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovdqu64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqu64 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vmovshdup %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovshdup (%rax), %zmm19 @@ -1134,21 +1134,21 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 8 0.50 * vmovsldup (%rax), %zmm19 {%k1} # CHECK-NEXT: 1 1 1.00 vmovsldup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovsldup (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vmovupd %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovupd %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %zmm19 # CHECK-NEXT: 2 1 1.00 * vmovupd %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovupd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovupd %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %zmm19 {%k1} # CHECK-NEXT: 2 1 1.00 * vmovupd %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovupd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovupd %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vmovups %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovups %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %zmm19 # CHECK-NEXT: 2 1 1.00 * vmovups %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovups %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovups %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %zmm19 {%k1} # CHECK-NEXT: 2 1 1.00 * vmovups %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovups %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovups %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 4 0.50 vmulpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 11 0.50 * vmulpd (%rax), %zmm17, %zmm19 @@ -1671,7 +1671,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - 612.00 236.67 57.67 278.83 278.83 16.00 555.67 2.00 5.33 - - +# CHECK-NEXT: - 612.00 240.67 49.67 278.83 278.83 16.00 559.67 2.00 5.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -1859,21 +1859,21 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vminps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vminps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovapd %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovapd %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovapd (%rax), %zmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovapd %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovapd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovapd %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovapd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovapd %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovapd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovapd %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovapd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovaps %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovaps %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovaps (%rax), %zmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovaps %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovaps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovaps %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovaps (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovaps %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovaps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovaps %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovaps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - - - vmovddup %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovddup (%rax), %zmm19 @@ -1881,37 +1881,37 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovddup (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - - - vmovddup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovddup (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa32 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqa32 %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa32 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa32 %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqa32 %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa32 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa32 %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqa32 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa32 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa64 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqa64 %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa64 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa64 %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqa64 %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa64 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqa64 %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqa64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqa64 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqa64 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu32 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu32 %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu32 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu32 %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu32 %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu32 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu32 %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu32 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu32 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu64 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu64 %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu64 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu64 %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu64 %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu64 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu64 %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu64 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu64 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - - - vmovshdup %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovshdup (%rax), %zmm19 @@ -1925,21 +1925,21 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovsldup (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - - - vmovsldup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovsldup (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovupd %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovupd %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovupd (%rax), %zmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovupd %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovupd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovupd %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovupd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovupd %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovupd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovupd %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovupd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovups %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovups %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovups (%rax), %zmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovups %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovups %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovups %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovups (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovups %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovups %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovups %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovups (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmulpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vmulpd (%rax), %zmm17, %zmm19 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bw.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bw.s index d98738fc9352..9a41974014ff 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bw.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bw.s @@ -298,21 +298,21 @@ vpmovw2m %zmm0, %k0 # CHECK-NEXT: 1 4 1.00 kshiftrq $2, %k1, %k2 # CHECK-NEXT: 1 4 1.00 kunpckdq %k0, %k1, %k2 # CHECK-NEXT: 1 4 1.00 kunpckwd %k0, %k1, %k2 -# CHECK-NEXT: 1 1 0.33 vmovdqu8 %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovdqu8 %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %zmm19 # CHECK-NEXT: 5 2 2.00 * vmovdqu8 %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovdqu8 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqu8 %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %zmm19 {%k1} # CHECK-NEXT: 5 2 2.00 * vmovdqu8 %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovdqu8 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqu8 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vmovdqu16 %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovdqu16 %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %zmm19 # CHECK-NEXT: 2 1 1.00 * vmovdqu16 %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovdqu16 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqu16 %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %zmm19 {%k1} # CHECK-NEXT: 2 1 1.00 * vmovdqu16 %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovdqu16 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqu16 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vpabsb %zmm16, %zmm19 # CHECK-NEXT: 2 8 1.00 * vpabsb (%rax), %zmm19 @@ -525,7 +525,7 @@ vpmovw2m %zmm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - - 36.50 13.50 52.50 52.50 6.00 181.50 0.50 2.00 - - +# CHECK-NEXT: - - 37.50 11.50 52.50 52.50 6.00 182.50 0.50 2.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -547,21 +547,21 @@ vpmovw2m %zmm0, %k0 # CHECK-NEXT: - - - - - - - 1.00 - - - - kshiftrq $2, %k1, %k2 # CHECK-NEXT: - - - - - - - 1.00 - - - - kunpckdq %k0, %k1, %k2 # CHECK-NEXT: - - - - - - - 1.00 - - - - kunpckwd %k0, %k1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu8 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu8 %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu8 (%rax), %zmm19 # CHECK-NEXT: - - - 0.50 0.67 0.67 2.00 0.50 - 0.67 - - vmovdqu8 %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu8 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu8 %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu8 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - 0.50 0.67 0.67 2.00 0.50 - 0.67 - - vmovdqu8 %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu8 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu8 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu8 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu16 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu16 %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu16 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu16 %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu16 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu16 %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu16 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovdqu16 %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovdqu16 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vmovdqu16 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vmovdqu16 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - 1.00 - - - - - - - - - vpabsb %zmm16, %zmm19 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vpabsb (%rax), %zmm19 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s index 919129d30f99..127be91c0dea 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s @@ -1068,21 +1068,21 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 4 0.50 vminps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vminps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vmovapd %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovapd %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %zmm19 # CHECK-NEXT: 2 1 1.00 * vmovapd %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovapd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovapd %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %zmm19 {%k1} # CHECK-NEXT: 2 1 1.00 * vmovapd %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovapd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovapd %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovapd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vmovaps %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovaps %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %zmm19 # CHECK-NEXT: 2 1 1.00 * vmovaps %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovaps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovaps %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %zmm19 {%k1} # CHECK-NEXT: 2 1 1.00 * vmovaps %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovaps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovaps %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovaps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vmovddup %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %zmm19 @@ -1090,37 +1090,37 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %zmm19 {%k1} # CHECK-NEXT: 1 1 1.00 vmovddup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovddup (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vmovdqa32 %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovdqa32 %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %zmm19 # CHECK-NEXT: 2 1 1.00 * vmovdqa32 %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovdqa32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqa32 %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %zmm19 {%k1} # CHECK-NEXT: 2 1 1.00 * vmovdqa32 %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovdqa32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqa32 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovdqa32 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vmovdqa64 %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovdqa64 %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %zmm19 # CHECK-NEXT: 2 1 1.00 * vmovdqa64 %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovdqa64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqa64 %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %zmm19 {%k1} # CHECK-NEXT: 2 1 1.00 * vmovdqa64 %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovdqa64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqa64 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovdqa64 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vmovdqu32 %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovdqu32 %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %zmm19 # CHECK-NEXT: 2 1 1.00 * vmovdqu32 %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovdqu32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqu32 %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %zmm19 {%k1} # CHECK-NEXT: 2 1 1.00 * vmovdqu32 %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovdqu32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqu32 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovdqu32 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vmovdqu64 %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovdqu64 %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %zmm19 # CHECK-NEXT: 2 1 1.00 * vmovdqu64 %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovdqu64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqu64 %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %zmm19 {%k1} # CHECK-NEXT: 2 1 1.00 * vmovdqu64 %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovdqu64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqu64 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovdqu64 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vmovshdup %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovshdup (%rax), %zmm19 @@ -1134,21 +1134,21 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 8 0.50 * vmovsldup (%rax), %zmm19 {%k1} # CHECK-NEXT: 1 1 1.00 vmovsldup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovsldup (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vmovupd %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovupd %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %zmm19 # CHECK-NEXT: 2 1 1.00 * vmovupd %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovupd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovupd %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %zmm19 {%k1} # CHECK-NEXT: 2 1 1.00 * vmovupd %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovupd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovupd %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovupd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vmovups %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovups %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %zmm19 # CHECK-NEXT: 2 1 1.00 * vmovups %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovups %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovups %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %zmm19 {%k1} # CHECK-NEXT: 2 1 1.00 * vmovups %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovups %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovups %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovups (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 4 0.50 vmulpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 11 0.50 * vmulpd (%rax), %zmm17, %zmm19 @@ -1669,7 +1669,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 612.00 236.67 57.67 278.83 278.83 16.00 555.67 2.00 5.33 +# CHECK-NEXT: - 612.00 240.67 49.67 278.83 278.83 16.00 559.67 2.00 5.33 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1857,21 +1857,21 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - vminps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vminps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovapd %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovapd (%rax), %zmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovapd %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovapd %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovapd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovapd %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovapd %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovapd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovaps %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovaps %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovaps (%rax), %zmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovaps %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovaps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovaps %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovaps (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovaps %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovaps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovaps %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovaps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - vmovddup %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovddup (%rax), %zmm19 @@ -1879,37 +1879,37 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovddup (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - vmovddup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovddup (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa32 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqa32 %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa32 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa32 %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqa32 %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa32 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa32 %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqa32 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa32 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa64 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqa64 %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa64 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa64 %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqa64 %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa64 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqa64 %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqa64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqa64 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqa64 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu32 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu32 %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu32 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu32 %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu32 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu32 %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu32 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu32 %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu32 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu32 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu32 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu64 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu64 %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu64 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu64 %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu64 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu64 %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu64 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu64 %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu64 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu64 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu64 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - 1.00 - - vmovshdup %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovshdup (%rax), %zmm19 @@ -1923,21 +1923,21 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovsldup (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - vmovsldup %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovsldup (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovupd %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovupd %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovupd (%rax), %zmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovupd %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovupd %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovupd %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovupd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovupd %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovupd %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovupd %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovupd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovups %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovups %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovups (%rax), %zmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovups %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovups %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovups %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovups (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovups %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovups %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovups %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovups (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmulpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vmulpd (%rax), %zmm17, %zmm19 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s index 9e4d4cd654f7..a7899881a135 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s @@ -298,21 +298,21 @@ vpmovw2m %zmm0, %k0 # CHECK-NEXT: 1 4 1.00 kshiftrq $2, %k1, %k2 # CHECK-NEXT: 1 4 1.00 kunpckdq %k0, %k1, %k2 # CHECK-NEXT: 1 4 1.00 kunpckwd %k0, %k1, %k2 -# CHECK-NEXT: 1 1 0.33 vmovdqu8 %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovdqu8 %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %zmm19 # CHECK-NEXT: 5 2 2.00 * vmovdqu8 %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovdqu8 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqu8 %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %zmm19 {%k1} # CHECK-NEXT: 5 2 2.00 * vmovdqu8 %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovdqu8 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqu8 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovdqu8 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vmovdqu16 %zmm16, %zmm19 +# CHECK-NEXT: 1 1 0.50 vmovdqu16 %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %zmm19 # CHECK-NEXT: 2 1 1.00 * vmovdqu16 %zmm16, (%rax) -# CHECK-NEXT: 1 1 0.33 vmovdqu16 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 1 1 0.50 vmovdqu16 %zmm16, %zmm19 {%k1} # CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %zmm19 {%k1} # CHECK-NEXT: 2 1 1.00 * vmovdqu16 %zmm16, (%rax) {%k1} -# CHECK-NEXT: 1 1 0.33 vmovdqu16 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vmovdqu16 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vmovdqu16 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1 1 1.00 vpabsb %zmm16, %zmm19 # CHECK-NEXT: 2 8 1.00 * vpabsb (%rax), %zmm19 @@ -523,7 +523,7 @@ vpmovw2m %zmm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 36.50 13.50 52.50 52.50 6.00 181.50 0.50 2.00 +# CHECK-NEXT: - - 37.50 11.50 52.50 52.50 6.00 182.50 0.50 2.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -545,21 +545,21 @@ vpmovw2m %zmm0, %k0 # CHECK-NEXT: - - - - - - - 1.00 - - kshiftrq $2, %k1, %k2 # CHECK-NEXT: - - - - - - - 1.00 - - kunpckdq %k0, %k1, %k2 # CHECK-NEXT: - - - - - - - 1.00 - - kunpckwd %k0, %k1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu8 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu8 %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu8 (%rax), %zmm19 # CHECK-NEXT: - - - 0.50 0.67 0.67 2.00 0.50 - 0.67 vmovdqu8 %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu8 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu8 %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu8 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - 0.50 0.67 0.67 2.00 0.50 - 0.67 vmovdqu8 %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu8 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu8 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu8 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu16 %zmm16, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu16 %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu16 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu16 %zmm16, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu16 %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu16 %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu16 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovdqu16 %zmm16, (%rax) {%k1} -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovdqu16 %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmovdqu16 %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmovdqu16 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - 1.00 - - - - - - - vpabsb %zmm16, %zmm19 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpabsb (%rax), %zmm19 From 1ef3f83ef242d51ddce5d881e99d71bd8494a3e3 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Mon, 27 Dec 2021 20:24:37 +0800 Subject: [PATCH 077/992] [NFC] [Coroutines] Add tests to address the problem for converting to musttail call Add two tests to address the problem for missing oppotunities to convert calls to musttail call. --- .../Coroutines/coro-split-musttail4.ll | 65 +++++++++++++++++++ .../Coroutines/coro-split-musttail5.ll | 62 ++++++++++++++++++ 2 files changed, 127 insertions(+) create mode 100644 llvm/test/Transforms/Coroutines/coro-split-musttail4.ll create mode 100644 llvm/test/Transforms/Coroutines/coro-split-musttail5.ll diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll new file mode 100644 index 000000000000..9fd801799620 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll @@ -0,0 +1,65 @@ +; Tests that coro-split will convert a call before coro.suspend to a musttail call +; while the user of the coro.suspend is a icmpinst. +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +define void @fakeresume1(i8*) { +entry: + ret void; +} + +define void @f() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %alloc = call i8* @malloc(i64 16) #3 + %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc) + + %save = call token @llvm.coro.save(i8* null) + + %init_suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + switch i8 %init_suspend, label %coro.end [ + i8 0, label %await.ready + i8 1, label %coro.end + ] +await.ready: + %save2 = call token @llvm.coro.save(i8* null) + + call fastcc void @fakeresume1(i8* align 8 null) + %suspend = call i8 @llvm.coro.suspend(token %save2, i1 true) + %switch = icmp ult i8 %suspend, 2 + br i1 %switch, label %cleanup, label %coro.end + +cleanup: + %free.handle = call i8* @llvm.coro.free(token %id, i8* %vFrame) + %.not = icmp eq i8* %free.handle, null + br i1 %.not, label %coro.end, label %coro.free + +coro.free: + call void @delete(i8* nonnull %free.handle) #2 + br label %coro.end + +coro.end: + call i1 @llvm.coro.end(i8* null, i1 false) + ret void +} + +; FIXME: The fakerresume1 here should be musttail call. +; CHECK-LABEL: @f.resume( +; CHECK-NOT: musttail call fastcc void @fakeresume1( + +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare i64 @llvm.coro.size.i64() #3 +declare i8* @llvm.coro.begin(token, i8* writeonly) #2 +declare token @llvm.coro.save(i8*) #2 +declare i8* @llvm.coro.frame() #3 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1 +declare i1 @llvm.coro.end(i8*, i1) #2 +declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #1 +declare i8* @malloc(i64) +declare void @delete(i8* nonnull) #2 + +attributes #0 = { "coroutine.presplit"="1" } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nounwind readnone } diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll new file mode 100644 index 000000000000..84a52e47f939 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll @@ -0,0 +1,62 @@ +; Tests that sinked lifetime markers wouldn't provent optimization +; to convert a resuming call to a musttail call. +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +declare void @fakeresume1(i64* align 8) + +define void @g() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %alloc = call i8* @malloc(i64 16) #3 + %alloc.var = alloca i8 + call void @llvm.lifetime.start.p0i8(i64 1, i8* %alloc.var) + %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc) + + %save = call token @llvm.coro.save(i8* null) + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + + switch i8 %suspend, label %exit [ + i8 0, label %await.suspend + i8 1, label %exit + ] +await.suspend: + %save2 = call token @llvm.coro.save(i8* null) + call fastcc void @fakeresume1(i64* align 8 null) + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + switch i8 %suspend2, label %exit [ + i8 0, label %await.ready + i8 1, label %exit + ] +await.ready: + call void @consume(i8* %alloc.var) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %alloc.var) + br label %exit +exit: + call i1 @llvm.coro.end(i8* null, i1 false) + ret void +} + +; FIXME: The fakeresume1 here should be marked as musttail. +; Verify that in the resume part resume call is marked with musttail. +; CHECK-LABEL: @g.resume( +; CHECK-NOT: musttail call fastcc void @fakeresume1(i64* align 8 null) + +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare i64 @llvm.coro.size.i64() #3 +declare i8* @llvm.coro.begin(token, i8* writeonly) #2 +declare token @llvm.coro.save(i8*) #2 +declare i8* @llvm.coro.frame() #3 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1 +declare i1 @llvm.coro.end(i8*, i1) #2 +declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #1 +declare i8* @malloc(i64) +declare void @consume(i8*) +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +attributes #0 = { "coroutine.presplit"="1" } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nounwind readnone } From ba89c6d5056975c046275ce9614eb96eb7ec01f4 Mon Sep 17 00:00:00 2001 From: Anton Daubert Date: Mon, 27 Dec 2021 13:50:02 +0100 Subject: [PATCH 078/992] Fix forward for "signed version of createExpression" Fix forward for "signed version of createExpression" change in https://github.com/llvm/llvm-project/commit/ec501f15a8b8ace2b283732740d6d65d40d82e09 Reviewed By: bgraur, achieveartificialintelligence Differential Revision: https://reviews.llvm.org/D116301 --- llvm/bindings/go/llvm/dibuilder.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/bindings/go/llvm/dibuilder.go b/llvm/bindings/go/llvm/dibuilder.go index aeaf49e539b3..b260f8e3d316 100644 --- a/llvm/bindings/go/llvm/dibuilder.go +++ b/llvm/bindings/go/llvm/dibuilder.go @@ -563,10 +563,10 @@ func (d *DIBuilder) getOrCreateTypeArray(values []Metadata) Metadata { // CreateExpression creates a new descriptor for the specified // variable which has a complex address expression for its address. -func (d *DIBuilder) CreateExpression(addr []int64) Metadata { - var data *C.int64_t +func (d *DIBuilder) CreateExpression(addr []uint64) Metadata { + var data *C.uint64_t if len(addr) > 0 { - data = (*C.int64_t)(unsafe.Pointer(&addr[0])) + data = (*C.uint64_t)(unsafe.Pointer(&addr[0])) } result := C.LLVMDIBuilderCreateExpression(d.ref, data, C.size_t(len(addr))) return Metadata{C: result} From 3f22d492ac3c84e9856eff7280ac51a6baa268c6 Mon Sep 17 00:00:00 2001 From: Groverkss Date: Mon, 27 Dec 2021 18:39:57 +0530 Subject: [PATCH 079/992] [MLIR] Move `print()` and `dump()` from FlatAffineConstraints to IntegerPolyhedron. This patch moves `FlatAffineConstraints::print` and `FlatAffineConstraints::dump()` to IntegerPolyhedron. Reviewed By: arjunp Differential Revision: https://reviews.llvm.org/D116289 --- mlir/include/mlir/Analysis/AffineStructures.h | 13 +++---- .../Analysis/Presburger/IntegerPolyhedron.h | 12 ++++++ mlir/lib/Analysis/AffineStructures.cpp | 35 +---------------- .../Analysis/Presburger/IntegerPolyhedron.cpp | 39 +++++++++++++++++++ 4 files changed, 58 insertions(+), 41 deletions(-) diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h index ea20edd1036f..3f632dcc96a8 100644 --- a/mlir/include/mlir/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Analysis/AffineStructures.h @@ -335,15 +335,7 @@ class FlatAffineConstraints : public IntegerPolyhedron { /// match. void mergeLocalIds(FlatAffineConstraints &other); - void print(raw_ostream &os) const; - void dump() const; - protected: - /// Returns false if the fields corresponding to various identifier counts, or - /// equality/inequality buffer sizes aren't consistent; true otherwise. This - /// is meant to be used within an assert internally. - virtual bool hasConsistentState() const; - /// Checks all rows of equality/inequality constraints for trivial /// contradictions (for example: 1 == 0, 0 >= 1), which may have surfaced /// after elimination. Returns true if an invalid constraint is found; @@ -419,6 +411,11 @@ class FlatAffineConstraints : public IntegerPolyhedron { /// equalities. bool isColZero(unsigned pos) const; + /// Prints the number of constraints, dimensions, symbols and locals in the + /// FlatAffineConstraints. Also, prints for each identifier whether there is + /// an SSA Value attached to it. + void printSpace(raw_ostream &os) const override; + /// A parameter that controls detection of an unrealistic number of /// constraints. If the number of constraints is this many times the number of /// variables, we consider such a system out of line with the intended use diff --git a/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h b/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h index c7eae0cd29ee..b46874fb8072 100644 --- a/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h +++ b/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h @@ -195,7 +195,19 @@ class IntegerPolyhedron { SmallVectorImpl *eqIndices = nullptr, unsigned offset = 0, unsigned num = 0) const; + void print(raw_ostream &os) const; + void dump() const; + protected: + /// Returns false if the fields corresponding to various identifier counts, or + /// equality/inequality buffer sizes aren't consistent; true otherwise. This + /// is meant to be used within an assert internally. + virtual bool hasConsistentState() const; + + /// Prints the number of constraints, dimensions, symbols and locals in the + /// IntegerPolyhedron. + virtual void printSpace(raw_ostream &os) const; + /// Return the index at which the specified kind of id starts. unsigned getIdKindOffset(IdKind kind) const; diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index 520262d6fddc..205abe280d9b 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -747,19 +747,6 @@ void FlatAffineConstraints::normalizeConstraintsByGCD() { } } -bool FlatAffineConstraints::hasConsistentState() const { - if (!inequalities.hasConsistentState()) - return false; - if (!equalities.hasConsistentState()) - return false; - - // Catches errors where numDims, numSymbols, numIds aren't consistent. - if (numDims > numIds || numSymbols > numIds || numDims + numSymbols > numIds) - return false; - - return true; -} - bool FlatAffineValueConstraints::hasConsistentState() const { return FlatAffineConstraints::hasConsistentState() && values.size() == getNumIds(); @@ -2587,11 +2574,8 @@ bool FlatAffineConstraints::isHyperRectangular(unsigned pos, return true; } -void FlatAffineConstraints::print(raw_ostream &os) const { - assert(hasConsistentState()); - os << "\nConstraints (" << getNumDimIds() << " dims, " << getNumSymbolIds() - << " symbols, " << getNumLocalIds() << " locals), (" << getNumConstraints() - << " constraints)\n"; +void FlatAffineConstraints::printSpace(raw_ostream &os) const { + IntegerPolyhedron::printSpace(os); os << "("; for (unsigned i = 0, e = getNumIds(); i < e; i++) { if (auto *valueCstr = dyn_cast(this)) { @@ -2604,23 +2588,8 @@ void FlatAffineConstraints::print(raw_ostream &os) const { } } os << " const)\n"; - for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) { - for (unsigned j = 0, f = getNumCols(); j < f; ++j) { - os << atEq(i, j) << " "; - } - os << "= 0\n"; - } - for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) { - for (unsigned j = 0, f = getNumCols(); j < f; ++j) { - os << atIneq(i, j) << " "; - } - os << ">= 0\n"; - } - os << '\n'; } -void FlatAffineConstraints::dump() const { print(llvm::errs()); } - /// Removes duplicate constraints, trivially true constraints, and constraints /// that can be detected as redundant as a result of differing only in their /// constant term part. A constraint of the form >= 0 is diff --git a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp index 958f52e2625e..4eed7ca91dd4 100644 --- a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp +++ b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp @@ -271,3 +271,42 @@ void IntegerPolyhedron::getLowerAndUpperBoundIndices( eqIndices->push_back(r); } } + +bool IntegerPolyhedron::hasConsistentState() const { + if (!inequalities.hasConsistentState()) + return false; + if (!equalities.hasConsistentState()) + return false; + + // Catches errors where numDims, numSymbols, numIds aren't consistent. + if (numDims > numIds || numSymbols > numIds || numDims + numSymbols > numIds) + return false; + + return true; +} + +void IntegerPolyhedron::printSpace(raw_ostream &os) const { + os << "\nConstraints (" << getNumDimIds() << " dims, " << getNumSymbolIds() + << " symbols, " << getNumLocalIds() << " locals), (" << getNumConstraints() + << " constraints)\n"; +} + +void IntegerPolyhedron::print(raw_ostream &os) const { + assert(hasConsistentState()); + printSpace(os); + for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) { + for (unsigned j = 0, f = getNumCols(); j < f; ++j) { + os << atEq(i, j) << " "; + } + os << "= 0\n"; + } + for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) { + for (unsigned j = 0, f = getNumCols(); j < f; ++j) { + os << atIneq(i, j) << " "; + } + os << ">= 0\n"; + } + os << '\n'; +} + +void IntegerPolyhedron::dump() const { print(llvm::errs()); } From a0a0eb192eff7750f386a066660d5bdefffe2bcb Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 27 Dec 2021 13:21:20 +0000 Subject: [PATCH 080/992] [X86] Use WriteVecMove scheduler classes for VPMOVM2* instructions These match the port behaviour of reg-reg predicated xmm/ymm/zmm moves Fixes #34958 --- llvm/lib/Target/X86/X86InstrAVX512.td | 7 +++---- .../llvm-mca/X86/Generic/resources-avx512bw.s | 10 +++++----- .../X86/Generic/resources-avx512bwvl.s | 10 +++++----- .../llvm-mca/X86/Generic/resources-avx512dq.s | 10 +++++----- .../X86/Generic/resources-avx512dqvl.s | 10 +++++----- .../X86/IceLakeServer/resources-avx512bw.s | 10 +++++----- .../X86/IceLakeServer/resources-avx512bwvl.s | 18 +++++++++--------- .../X86/IceLakeServer/resources-avx512dq.s | 10 +++++----- .../X86/IceLakeServer/resources-avx512dqvl.s | 18 +++++++++--------- .../X86/SkylakeServer/resources-avx512bw.s | 10 +++++----- .../X86/SkylakeServer/resources-avx512bwvl.s | 18 +++++++++--------- .../X86/SkylakeServer/resources-avx512dq.s | 10 +++++----- .../X86/SkylakeServer/resources-avx512dqvl.s | 18 +++++++++--------- 13 files changed, 79 insertions(+), 80 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index ecd4777c3533..bc67d1f89d7f 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -10537,13 +10537,12 @@ def rr : AVX512XS8I opc, AVX512VLVectorVTInfo VTInfo, string OpcodeStr, Predicate prd> { -// TODO - Replace WriteMove with WriteVecTrunc? let Predicates = [prd] in - defm Z : cvt_by_vec_width, EVEX_V512; + defm Z : cvt_by_vec_width, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : cvt_by_vec_width, EVEX_V256; - defm Z128 : cvt_by_vec_width, EVEX_V128; + defm Z256 : cvt_by_vec_width, EVEX_V256; + defm Z128 : cvt_by_vec_width, EVEX_V128; } } diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bw.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bw.s index da27526f87e3..73016947de70 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bw.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bw.s @@ -504,8 +504,8 @@ vpmovw2m %zmm0, %k0 # CHECK-NEXT: 2 8 1.00 * vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1 1 1.00 vpunpcklwd %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 1.00 * vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vpmovm2b %k0, %zmm0 -# CHECK-NEXT: 1 1 0.33 vpmovm2w %k0, %zmm0 +# CHECK-NEXT: 1 1 0.50 vpmovm2b %k0, %zmm0 +# CHECK-NEXT: 1 1 0.50 vpmovm2w %k0, %zmm0 # CHECK-NEXT: 1 1 0.33 vpmovb2m %zmm0, %k0 # CHECK-NEXT: 1 1 0.33 vpmovw2m %zmm0, %k0 @@ -521,7 +521,7 @@ vpmovw2m %zmm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 13.67 47.67 4.00 156.67 52.50 52.50 +# CHECK-NEXT: - - 14.00 47.00 4.00 157.00 52.50 52.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -749,7 +749,7 @@ vpmovw2m %zmm0, %k0 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - vpunpcklwd %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovm2b %k0, %zmm0 -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovm2w %k0, %zmm0 +# CHECK-NEXT: - - 0.50 - - 0.50 - - vpmovm2b %k0, %zmm0 +# CHECK-NEXT: - - 0.50 - - 0.50 - - vpmovm2w %k0, %zmm0 # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovb2m %zmm0, %k0 # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovw2m %zmm0, %k0 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bwvl.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bwvl.s index ad32d0707077..32c285623cf7 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bwvl.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512bwvl.s @@ -926,8 +926,8 @@ vpmovw2m %ymm0, %k0 # CHECK-NEXT: 2 8 1.00 * vpunpcklwd (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1 1 0.33 vpmovm2b %k0, %xmm0 # CHECK-NEXT: 1 1 0.33 vpmovm2w %k0, %xmm0 -# CHECK-NEXT: 1 1 0.33 vpmovm2b %k0, %ymm0 -# CHECK-NEXT: 1 1 0.33 vpmovm2w %k0, %ymm0 +# CHECK-NEXT: 1 1 0.50 vpmovm2b %k0, %ymm0 +# CHECK-NEXT: 1 1 0.50 vpmovm2w %k0, %ymm0 # CHECK-NEXT: 1 1 0.33 vpmovb2m %xmm0, %k0 # CHECK-NEXT: 1 1 0.33 vpmovw2m %xmm0, %k0 # CHECK-NEXT: 1 1 0.33 vpmovb2m %ymm0, %k0 @@ -945,7 +945,7 @@ vpmovw2m %ymm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 18.33 133.33 8.00 248.33 105.00 105.00 +# CHECK-NEXT: - - 18.67 132.67 8.00 248.67 105.00 105.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -1363,8 +1363,8 @@ vpmovw2m %ymm0, %k0 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpunpcklwd (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovm2b %k0, %xmm0 # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovm2w %k0, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovm2b %k0, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovm2w %k0, %ymm0 +# CHECK-NEXT: - - 0.50 - - 0.50 - - vpmovm2b %k0, %ymm0 +# CHECK-NEXT: - - 0.50 - - 0.50 - - vpmovm2w %k0, %ymm0 # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovb2m %xmm0, %k0 # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovw2m %xmm0, %k0 # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovb2m %ymm0, %k0 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512dq.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512dq.s index 9d08164bba70..5849220ddc9d 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512dq.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512dq.s @@ -336,8 +336,8 @@ vpmovq2m %zmm0, %k0 # CHECK-NEXT: 1 1 1.00 vxorps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 1.00 * vxorps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 1.00 * vxorps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.33 vpmovm2d %k0, %zmm0 -# CHECK-NEXT: 1 1 0.33 vpmovm2q %k0, %zmm0 +# CHECK-NEXT: 1 1 0.50 vpmovm2d %k0, %zmm0 +# CHECK-NEXT: 1 1 0.50 vpmovm2q %k0, %zmm0 # CHECK-NEXT: 1 1 0.33 vpmovd2m %zmm0, %k0 # CHECK-NEXT: 1 1 0.33 vpmovq2m %zmm0, %k0 @@ -353,7 +353,7 @@ vpmovq2m %zmm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 9.67 41.67 - 110.67 46.50 46.50 +# CHECK-NEXT: - - 10.00 41.00 - 111.00 46.50 46.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -506,7 +506,7 @@ vpmovq2m %zmm0, %k0 # CHECK-NEXT: - - - - - 1.00 - - vxorps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vxorps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vxorps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovm2d %k0, %zmm0 -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovm2q %k0, %zmm0 +# CHECK-NEXT: - - 0.50 - - 0.50 - - vpmovm2d %k0, %zmm0 +# CHECK-NEXT: - - 0.50 - - 0.50 - - vpmovm2q %k0, %zmm0 # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovd2m %zmm0, %k0 # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovq2m %zmm0, %k0 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512dqvl.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512dqvl.s index 7ab33c319e80..d7d0c3a733fc 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512dqvl.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512dqvl.s @@ -459,8 +459,8 @@ vpmovq2m %ymm0, %k0 # CHECK-NEXT: 2 8 1.00 * vxorps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1 1 0.33 vpmovm2d %k0, %xmm0 # CHECK-NEXT: 1 1 0.33 vpmovm2q %k0, %xmm0 -# CHECK-NEXT: 1 1 0.33 vpmovm2d %k0, %ymm0 -# CHECK-NEXT: 1 1 0.33 vpmovm2q %k0, %ymm0 +# CHECK-NEXT: 1 1 0.50 vpmovm2d %k0, %ymm0 +# CHECK-NEXT: 1 1 0.50 vpmovm2q %k0, %ymm0 # CHECK-NEXT: 1 1 0.33 vpmovd2m %xmm0, %k0 # CHECK-NEXT: 1 1 0.33 vpmovq2m %xmm0, %k0 # CHECK-NEXT: 1 1 0.33 vpmovd2m %ymm0, %k0 @@ -478,7 +478,7 @@ vpmovq2m %ymm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 14.67 46.17 - 169.17 66.50 66.50 +# CHECK-NEXT: - - 15.00 45.50 - 169.50 66.50 66.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -688,8 +688,8 @@ vpmovq2m %ymm0, %k0 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vxorps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovm2d %k0, %xmm0 # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovm2q %k0, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovm2d %k0, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovm2q %k0, %ymm0 +# CHECK-NEXT: - - 0.50 - - 0.50 - - vpmovm2d %k0, %ymm0 +# CHECK-NEXT: - - 0.50 - - 0.50 - - vpmovm2q %k0, %ymm0 # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovd2m %xmm0, %k0 # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovq2m %xmm0, %k0 # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vpmovd2m %ymm0, %k0 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bw.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bw.s index 9a41974014ff..02a5152f9831 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bw.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bw.s @@ -504,8 +504,8 @@ vpmovw2m %zmm0, %k0 # CHECK-NEXT: 2 8 1.00 * vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1 1 1.00 vpunpcklwd %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 1.00 * vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.25 vpmovm2b %k0, %zmm0 -# CHECK-NEXT: 1 1 0.25 vpmovm2w %k0, %zmm0 +# CHECK-NEXT: 1 1 0.50 vpmovm2b %k0, %zmm0 +# CHECK-NEXT: 1 1 0.50 vpmovm2w %k0, %zmm0 # CHECK-NEXT: 1 1 1.00 vpmovb2m %zmm0, %k0 # CHECK-NEXT: 1 1 1.00 vpmovw2m %zmm0, %k0 @@ -525,7 +525,7 @@ vpmovw2m %zmm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - - 37.50 11.50 52.50 52.50 6.00 182.50 0.50 2.00 - - +# CHECK-NEXT: - - 38.00 11.00 52.50 52.50 6.00 183.00 - 2.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -753,7 +753,7 @@ vpmovw2m %zmm0, %k0 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - - - vpunpcklwd %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - - - vpmovm2b %k0, %zmm0 -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - - - vpmovm2w %k0, %zmm0 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vpmovm2b %k0, %zmm0 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vpmovm2w %k0, %zmm0 # CHECK-NEXT: - - 1.00 - - - - - - - - - vpmovb2m %zmm0, %k0 # CHECK-NEXT: - - 1.00 - - - - - - - - - vpmovw2m %zmm0, %k0 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bwvl.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bwvl.s index 0539e352c380..6e106c9f0e58 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bwvl.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bwvl.s @@ -924,10 +924,10 @@ vpmovw2m %ymm0, %k0 # CHECK-NEXT: 2 8 0.50 * vpunpcklwd (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1 1 0.50 vpunpcklwd %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpunpcklwd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.25 vpmovm2b %k0, %xmm0 -# CHECK-NEXT: 1 1 0.25 vpmovm2w %k0, %xmm0 -# CHECK-NEXT: 1 1 0.25 vpmovm2b %k0, %ymm0 -# CHECK-NEXT: 1 1 0.25 vpmovm2w %k0, %ymm0 +# CHECK-NEXT: 1 1 0.33 vpmovm2b %k0, %xmm0 +# CHECK-NEXT: 1 1 0.33 vpmovm2w %k0, %xmm0 +# CHECK-NEXT: 1 1 0.33 vpmovm2b %k0, %ymm0 +# CHECK-NEXT: 1 1 0.33 vpmovm2w %k0, %ymm0 # CHECK-NEXT: 1 1 1.00 vpmovb2m %xmm0, %k0 # CHECK-NEXT: 1 1 1.00 vpmovw2m %xmm0, %k0 # CHECK-NEXT: 1 1 1.00 vpmovb2m %ymm0, %k0 @@ -949,7 +949,7 @@ vpmovw2m %ymm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - - 41.00 89.00 103.67 103.67 8.00 293.00 1.00 2.67 - - +# CHECK-NEXT: - - 41.33 89.33 103.67 103.67 8.00 293.33 - 2.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -1365,10 +1365,10 @@ vpmovw2m %ymm0, %k0 # CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - - - vpunpcklwd (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - 0.50 - - - 0.50 - - - - vpunpcklwd %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - - - vpunpcklwd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - - - vpmovm2b %k0, %xmm0 -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - - - vpmovm2w %k0, %xmm0 -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - - - vpmovm2b %k0, %ymm0 -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - - - vpmovm2w %k0, %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vpmovm2b %k0, %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vpmovm2w %k0, %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vpmovm2b %k0, %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vpmovm2w %k0, %ymm0 # CHECK-NEXT: - - 1.00 - - - - - - - - - vpmovb2m %xmm0, %k0 # CHECK-NEXT: - - 1.00 - - - - - - - - - vpmovw2m %xmm0, %k0 # CHECK-NEXT: - - 1.00 - - - - - - - - - vpmovb2m %ymm0, %k0 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512dq.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512dq.s index 102602b837e8..b2b9a933a42c 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512dq.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512dq.s @@ -336,8 +336,8 @@ vpmovq2m %zmm0, %k0 # CHECK-NEXT: 1 1 0.50 vxorps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vxorps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vxorps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.25 vpmovm2d %k0, %zmm0 -# CHECK-NEXT: 1 1 0.25 vpmovm2q %k0, %zmm0 +# CHECK-NEXT: 1 1 0.50 vpmovm2d %k0, %zmm0 +# CHECK-NEXT: 1 1 0.50 vpmovm2q %k0, %zmm0 # CHECK-NEXT: 1 1 1.00 vpmovd2m %zmm0, %k0 # CHECK-NEXT: 1 1 1.00 vpmovq2m %zmm0, %k0 @@ -357,7 +357,7 @@ vpmovq2m %zmm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - - 65.50 10.50 46.50 46.50 - 95.50 0.50 - - - +# CHECK-NEXT: - - 66.00 10.00 46.50 46.50 - 96.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -510,7 +510,7 @@ vpmovq2m %zmm0, %k0 # CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vxorps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vxorps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vxorps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - - - vpmovm2d %k0, %zmm0 -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - - - vpmovm2q %k0, %zmm0 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vpmovm2d %k0, %zmm0 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vpmovm2q %k0, %zmm0 # CHECK-NEXT: - - 1.00 - - - - - - - - - vpmovd2m %zmm0, %k0 # CHECK-NEXT: - - 1.00 - - - - - - - - - vpmovq2m %zmm0, %k0 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512dqvl.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512dqvl.s index 647a5cff6f51..9c7e93f92ec1 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512dqvl.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512dqvl.s @@ -457,10 +457,10 @@ vpmovq2m %ymm0, %k0 # CHECK-NEXT: 1 1 0.33 vxorps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vxorps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vxorps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.25 vpmovm2d %k0, %xmm0 -# CHECK-NEXT: 1 1 0.25 vpmovm2q %k0, %xmm0 -# CHECK-NEXT: 1 1 0.25 vpmovm2d %k0, %ymm0 -# CHECK-NEXT: 1 1 0.25 vpmovm2q %k0, %ymm0 +# CHECK-NEXT: 1 1 0.33 vpmovm2d %k0, %xmm0 +# CHECK-NEXT: 1 1 0.33 vpmovm2q %k0, %xmm0 +# CHECK-NEXT: 1 1 0.33 vpmovm2d %k0, %ymm0 +# CHECK-NEXT: 1 1 0.33 vpmovm2q %k0, %ymm0 # CHECK-NEXT: 1 1 1.00 vpmovd2m %xmm0, %k0 # CHECK-NEXT: 1 1 1.00 vpmovq2m %xmm0, %k0 # CHECK-NEXT: 1 1 1.00 vpmovd2m %ymm0, %k0 @@ -482,7 +482,7 @@ vpmovq2m %ymm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - - 77.00 73.00 66.50 66.50 - 85.00 1.00 - - - +# CHECK-NEXT: - - 77.33 73.33 66.50 66.50 - 85.33 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -690,10 +690,10 @@ vpmovq2m %ymm0, %k0 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vxorps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vxorps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vxorps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - - - vpmovm2d %k0, %xmm0 -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - - - vpmovm2q %k0, %xmm0 -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - - - vpmovm2d %k0, %ymm0 -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - - - vpmovm2q %k0, %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vpmovm2d %k0, %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vpmovm2q %k0, %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vpmovm2d %k0, %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vpmovm2q %k0, %ymm0 # CHECK-NEXT: - - 1.00 - - - - - - - - - vpmovd2m %xmm0, %k0 # CHECK-NEXT: - - 1.00 - - - - - - - - - vpmovq2m %xmm0, %k0 # CHECK-NEXT: - - 1.00 - - - - - - - - - vpmovd2m %ymm0, %k0 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s index a7899881a135..fed721e37175 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s @@ -504,8 +504,8 @@ vpmovw2m %zmm0, %k0 # CHECK-NEXT: 2 8 1.00 * vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1 1 1.00 vpunpcklwd %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 1.00 * vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.25 vpmovm2b %k0, %zmm0 -# CHECK-NEXT: 1 1 0.25 vpmovm2w %k0, %zmm0 +# CHECK-NEXT: 1 1 0.50 vpmovm2b %k0, %zmm0 +# CHECK-NEXT: 1 1 0.50 vpmovm2w %k0, %zmm0 # CHECK-NEXT: 1 1 1.00 vpmovb2m %zmm0, %k0 # CHECK-NEXT: 1 1 1.00 vpmovw2m %zmm0, %k0 @@ -523,7 +523,7 @@ vpmovw2m %zmm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 37.50 11.50 52.50 52.50 6.00 182.50 0.50 2.00 +# CHECK-NEXT: - - 38.00 11.00 52.50 52.50 6.00 183.00 - 2.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -751,7 +751,7 @@ vpmovw2m %zmm0, %k0 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - vpunpcklwd %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - vpmovm2b %k0, %zmm0 -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - vpmovm2w %k0, %zmm0 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vpmovm2b %k0, %zmm0 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vpmovm2w %k0, %zmm0 # CHECK-NEXT: - - 1.00 - - - - - - - vpmovb2m %zmm0, %k0 # CHECK-NEXT: - - 1.00 - - - - - - - vpmovw2m %zmm0, %k0 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bwvl.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bwvl.s index 2744d2894f0e..cbdeb14f6284 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bwvl.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bwvl.s @@ -924,10 +924,10 @@ vpmovw2m %ymm0, %k0 # CHECK-NEXT: 2 8 1.00 * vpunpcklwd (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1 1 1.00 vpunpcklwd %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 1.00 * vpunpcklwd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.25 vpmovm2b %k0, %xmm0 -# CHECK-NEXT: 1 1 0.25 vpmovm2w %k0, %xmm0 -# CHECK-NEXT: 1 1 0.25 vpmovm2b %k0, %ymm0 -# CHECK-NEXT: 1 1 0.25 vpmovm2w %k0, %ymm0 +# CHECK-NEXT: 1 1 0.33 vpmovm2b %k0, %xmm0 +# CHECK-NEXT: 1 1 0.33 vpmovm2w %k0, %xmm0 +# CHECK-NEXT: 1 1 0.33 vpmovm2b %k0, %ymm0 +# CHECK-NEXT: 1 1 0.33 vpmovm2w %k0, %ymm0 # CHECK-NEXT: 1 1 1.00 vpmovb2m %xmm0, %k0 # CHECK-NEXT: 1 1 1.00 vpmovw2m %xmm0, %k0 # CHECK-NEXT: 1 1 1.00 vpmovb2m %ymm0, %k0 @@ -947,7 +947,7 @@ vpmovw2m %ymm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 41.00 37.00 103.67 103.67 8.00 345.00 1.00 2.67 +# CHECK-NEXT: - - 41.33 37.33 103.67 103.67 8.00 345.33 - 2.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1363,10 +1363,10 @@ vpmovw2m %ymm0, %k0 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpunpcklwd (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - vpunpcklwd %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpunpcklwd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - vpmovm2b %k0, %xmm0 -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - vpmovm2w %k0, %xmm0 -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - vpmovm2b %k0, %ymm0 -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - vpmovm2w %k0, %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmovm2b %k0, %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmovm2w %k0, %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmovm2b %k0, %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmovm2w %k0, %ymm0 # CHECK-NEXT: - - 1.00 - - - - - - - vpmovb2m %xmm0, %k0 # CHECK-NEXT: - - 1.00 - - - - - - - vpmovw2m %xmm0, %k0 # CHECK-NEXT: - - 1.00 - - - - - - - vpmovb2m %ymm0, %k0 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dq.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dq.s index 18750b04b00c..046bc8e123a7 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dq.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dq.s @@ -336,8 +336,8 @@ vpmovq2m %zmm0, %k0 # CHECK-NEXT: 1 1 0.50 vxorps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vxorps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vxorps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.25 vpmovm2d %k0, %zmm0 -# CHECK-NEXT: 1 1 0.25 vpmovm2q %k0, %zmm0 +# CHECK-NEXT: 1 1 0.50 vpmovm2d %k0, %zmm0 +# CHECK-NEXT: 1 1 0.50 vpmovm2q %k0, %zmm0 # CHECK-NEXT: 1 1 1.00 vpmovd2m %zmm0, %k0 # CHECK-NEXT: 1 1 1.00 vpmovq2m %zmm0, %k0 @@ -355,7 +355,7 @@ vpmovq2m %zmm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 65.50 10.50 46.50 46.50 - 95.50 0.50 - +# CHECK-NEXT: - - 66.00 10.00 46.50 46.50 - 96.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -508,7 +508,7 @@ vpmovq2m %zmm0, %k0 # CHECK-NEXT: - - 0.50 - - - - 0.50 - - vxorps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vxorps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vxorps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - vpmovm2d %k0, %zmm0 -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - vpmovm2q %k0, %zmm0 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vpmovm2d %k0, %zmm0 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vpmovm2q %k0, %zmm0 # CHECK-NEXT: - - 1.00 - - - - - - - vpmovd2m %zmm0, %k0 # CHECK-NEXT: - - 1.00 - - - - - - - vpmovq2m %zmm0, %k0 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dqvl.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dqvl.s index 4f2b0d9f1f33..6233999fbba9 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dqvl.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dqvl.s @@ -457,10 +457,10 @@ vpmovq2m %ymm0, %k0 # CHECK-NEXT: 1 1 0.33 vxorps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vxorps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vxorps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1 1 0.25 vpmovm2d %k0, %xmm0 -# CHECK-NEXT: 1 1 0.25 vpmovm2q %k0, %xmm0 -# CHECK-NEXT: 1 1 0.25 vpmovm2d %k0, %ymm0 -# CHECK-NEXT: 1 1 0.25 vpmovm2q %k0, %ymm0 +# CHECK-NEXT: 1 1 0.33 vpmovm2d %k0, %xmm0 +# CHECK-NEXT: 1 1 0.33 vpmovm2q %k0, %xmm0 +# CHECK-NEXT: 1 1 0.33 vpmovm2d %k0, %ymm0 +# CHECK-NEXT: 1 1 0.33 vpmovm2q %k0, %ymm0 # CHECK-NEXT: 1 1 1.00 vpmovd2m %xmm0, %k0 # CHECK-NEXT: 1 1 1.00 vpmovq2m %xmm0, %k0 # CHECK-NEXT: 1 1 1.00 vpmovd2m %ymm0, %k0 @@ -480,7 +480,7 @@ vpmovq2m %ymm0, %k0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 77.00 73.00 66.50 66.50 - 85.00 1.00 - +# CHECK-NEXT: - - 77.33 73.33 66.50 66.50 - 85.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -688,10 +688,10 @@ vpmovq2m %ymm0, %k0 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vxorps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vxorps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vxorps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - vpmovm2d %k0, %xmm0 -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - vpmovm2q %k0, %xmm0 -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - vpmovm2d %k0, %ymm0 -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - vpmovm2q %k0, %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmovm2d %k0, %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmovm2q %k0, %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmovm2d %k0, %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmovm2q %k0, %ymm0 # CHECK-NEXT: - - 1.00 - - - - - - - vpmovd2m %xmm0, %k0 # CHECK-NEXT: - - 1.00 - - - - - - - vpmovq2m %xmm0, %k0 # CHECK-NEXT: - - 1.00 - - - - - - - vpmovd2m %ymm0, %k0 From 5f22f248d85726f6dd0a056189424949020f28b2 Mon Sep 17 00:00:00 2001 From: Groverkss Date: Mon, 27 Dec 2021 19:06:32 +0530 Subject: [PATCH 081/992] [MLIR] Use IntegerPolyhedron in Simplex instead of FlatAffineConstraints This patch replaces usage of FlatAffineConstraints in Simplex with IntegerPolyhedron. This removes dependency of Simplex on FlatAffineConstraints and puts it on IntegerPolyhedron, which is part of Presburger library. Reviewed By: arjunp Differential Revision: https://reviews.llvm.org/D116287 --- .../mlir/Analysis/Presburger/Simplex.h | 16 +++++----- mlir/lib/Analysis/Presburger/Simplex.cpp | 29 +++++++++---------- mlir/lib/Analysis/PresburgerSet.cpp | 2 +- .../Analysis/Presburger/SimplexTest.cpp | 19 ++++++------ 4 files changed, 32 insertions(+), 34 deletions(-) diff --git a/mlir/include/mlir/Analysis/Presburger/Simplex.h b/mlir/include/mlir/Analysis/Presburger/Simplex.h index a04f0b0a485b..074353aa3a02 100644 --- a/mlir/include/mlir/Analysis/Presburger/Simplex.h +++ b/mlir/include/mlir/Analysis/Presburger/Simplex.h @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// Functionality to perform analysis on FlatAffineConstraints. In particular, +// Functionality to perform analysis on an IntegerPolyhedron. In particular, // support for performing emptiness checks and redundancy checks. // //===----------------------------------------------------------------------===// @@ -14,8 +14,8 @@ #ifndef MLIR_ANALYSIS_PRESBURGER_SIMPLEX_H #define MLIR_ANALYSIS_PRESBURGER_SIMPLEX_H -#include "mlir/Analysis/AffineStructures.h" #include "mlir/Analysis/Presburger/Fraction.h" +#include "mlir/Analysis/Presburger/IntegerPolyhedron.h" #include "mlir/Analysis/Presburger/Matrix.h" #include "mlir/IR/Location.h" #include "mlir/Support/LogicalResult.h" @@ -39,7 +39,7 @@ class GBRSimplex; /// sets. Furthermore, it can find a subset of these constraints that are /// redundant, i.e. a subset of constraints that doesn't constrain the affine /// set further after adding the non-redundant constraints. Simplex can also be -/// constructed from a FlatAffineConstraints object. +/// constructed from an IntegerPolyhedron object. /// /// The implementation of the Simplex and SimplexBase classes, other than the /// functionality for sampling, is based on the paper @@ -146,7 +146,7 @@ class SimplexBase { SimplexBase() = delete; explicit SimplexBase(unsigned nVar); - explicit SimplexBase(const FlatAffineConstraints &constraints); + explicit SimplexBase(const IntegerPolyhedron &constraints); /// Returns true if the tableau is empty (has conflicting constraints), /// false otherwise. @@ -180,8 +180,8 @@ class SimplexBase { /// Rollback to a snapshot. This invalidates all later snapshots. void rollback(unsigned snapshot); - /// Add all the constraints from the given FlatAffineConstraints. - void intersectFlatAffineConstraints(const FlatAffineConstraints &fac); + /// Add all the constraints from the given IntegerPolyhedron. + void intersectIntegerPolyhedron(const IntegerPolyhedron &fac); /// Returns a rational sample point. This should not be called when Simplex is /// empty. @@ -330,7 +330,7 @@ class Simplex : public SimplexBase { public: Simplex() = delete; explicit Simplex(unsigned nVar) : SimplexBase(nVar) {} - explicit Simplex(const FlatAffineConstraints &constraints) + explicit Simplex(const IntegerPolyhedron &constraints) : SimplexBase(constraints) {} /// Compute the maximum or minimum value of the given row, depending on @@ -389,7 +389,7 @@ class Simplex : public SimplexBase { /// Returns true if this Simplex's polytope is a rational subset of `fac`. /// Otherwise, returns false. - bool isRationalSubsetOf(const FlatAffineConstraints &fac); + bool isRationalSubsetOf(const IntegerPolyhedron &fac); private: friend class GBRSimplex; diff --git a/mlir/lib/Analysis/Presburger/Simplex.cpp b/mlir/lib/Analysis/Presburger/Simplex.cpp index ed3232d6fc5b..1e881aa018a2 100644 --- a/mlir/lib/Analysis/Presburger/Simplex.cpp +++ b/mlir/lib/Analysis/Presburger/Simplex.cpp @@ -28,7 +28,7 @@ SimplexBase::SimplexBase(unsigned nVar) } } -SimplexBase::SimplexBase(const FlatAffineConstraints &constraints) +SimplexBase::SimplexBase(const IntegerPolyhedron &constraints) : SimplexBase(constraints.getNumIds()) { for (unsigned i = 0, numIneqs = constraints.getNumInequalities(); i < numIneqs; ++i) @@ -502,15 +502,14 @@ void SimplexBase::appendVariable(unsigned count) { undoLog.insert(undoLog.end(), count, UndoLogEntry::RemoveLastVariable); } -/// Add all the constraints from the given FlatAffineConstraints. -void SimplexBase::intersectFlatAffineConstraints( - const FlatAffineConstraints &fac) { - assert(fac.getNumIds() == getNumVariables() && - "FlatAffineConstraints must have same dimensionality as simplex"); - for (unsigned i = 0, e = fac.getNumInequalities(); i < e; ++i) - addInequality(fac.getInequality(i)); - for (unsigned i = 0, e = fac.getNumEqualities(); i < e; ++i) - addEquality(fac.getEquality(i)); +/// Add all the constraints from the given IntegerPolyhedron. +void SimplexBase::intersectIntegerPolyhedron(const IntegerPolyhedron &poly) { + assert(poly.getNumIds() == getNumVariables() && + "IntegerPolyhedron must have same dimensionality as simplex"); + for (unsigned i = 0, e = poly.getNumInequalities(); i < e; ++i) + addInequality(poly.getInequality(i)); + for (unsigned i = 0, e = poly.getNumEqualities(); i < e; ++i) + addEquality(poly.getEquality(i)); } Optional Simplex::computeRowOptimum(Direction direction, @@ -1285,16 +1284,16 @@ void SimplexBase::print(raw_ostream &os) const { void SimplexBase::dump() const { print(llvm::errs()); } -bool Simplex::isRationalSubsetOf(const FlatAffineConstraints &fac) { +bool Simplex::isRationalSubsetOf(const IntegerPolyhedron &poly) { if (isEmpty()) return true; - for (unsigned i = 0, e = fac.getNumInequalities(); i < e; ++i) - if (!isRedundantInequality(fac.getInequality(i))) + for (unsigned i = 0, e = poly.getNumInequalities(); i < e; ++i) + if (!isRedundantInequality(poly.getInequality(i))) return false; - for (unsigned i = 0, e = fac.getNumEqualities(); i < e; ++i) - if (!isRedundantEquality(fac.getEquality(i))) + for (unsigned i = 0, e = poly.getNumEqualities(); i < e; ++i) + if (!isRedundantEquality(poly.getEquality(i))) return false; return true; diff --git a/mlir/lib/Analysis/PresburgerSet.cpp b/mlir/lib/Analysis/PresburgerSet.cpp index aa1b8e70c3dd..84be397a221d 100644 --- a/mlir/lib/Analysis/PresburgerSet.cpp +++ b/mlir/lib/Analysis/PresburgerSet.cpp @@ -242,7 +242,7 @@ static void subtractRecursively(FlatAffineConstraints &b, Simplex &simplex, simplex.appendVariable(numLocalsAdded); unsigned snapshotBeforeIntersect = simplex.getSnapshot(); - simplex.intersectFlatAffineConstraints(sI); + simplex.intersectIntegerPolyhedron(sI); if (simplex.isEmpty()) { /// b ^ s_i is empty, so b \ s_i = b. We move directly to i + 1. diff --git a/mlir/unittests/Analysis/Presburger/SimplexTest.cpp b/mlir/unittests/Analysis/Presburger/SimplexTest.cpp index df36992792c5..fd5750dd3756 100644 --- a/mlir/unittests/Analysis/Presburger/SimplexTest.cpp +++ b/mlir/unittests/Analysis/Presburger/SimplexTest.cpp @@ -476,24 +476,23 @@ TEST(SimplexTest, isRedundantEquality) { EXPECT_TRUE(simplex.isRedundantEquality({-1, 0, 2})); // x = 2. } -static FlatAffineConstraints parseFAC(StringRef str, MLIRContext *context) { - FailureOr fac = parseIntegerSetToFAC(str, context); +static IntegerPolyhedron parsePoly(StringRef str, MLIRContext *context) { + FailureOr poly = parseIntegerSetToFAC(str, context); - EXPECT_TRUE(succeeded(fac)); + EXPECT_TRUE(succeeded(poly)); - return *fac; + return *poly; } TEST(SimplexTest, IsRationalSubsetOf) { MLIRContext context; - FlatAffineConstraints univ = FlatAffineConstraints::getUniverse(1, 0); - FlatAffineConstraints empty = - parseFAC("(x) : (x + 0 >= 0, -x - 1 >= 0)", &context); - FlatAffineConstraints s1 = parseFAC("(x) : ( x >= 0, -x + 4 >= 0)", &context); - FlatAffineConstraints s2 = - parseFAC("(x) : (x - 1 >= 0, -x + 3 >= 0)", &context); + IntegerPolyhedron univ = parsePoly("(x) : ()", &context); + IntegerPolyhedron empty = + parsePoly("(x) : (x + 0 >= 0, -x - 1 >= 0)", &context); + IntegerPolyhedron s1 = parsePoly("(x) : ( x >= 0, -x + 4 >= 0)", &context); + IntegerPolyhedron s2 = parsePoly("(x) : (x - 1 >= 0, -x + 3 >= 0)", &context); Simplex simUniv(univ); Simplex simEmpty(empty); From de2ed8e38e73eeda5d13904467fbf263586cd75d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 27 Dec 2021 14:50:04 +0100 Subject: [PATCH 082/992] [InstCombine] Extract GEP of GEP fold into separate function This change may not be entirely NFC, because a number of early returns will now only early return from this particular fold, rather than the whole visitGetElementPtr() implementation. This is also the reason why I'm doing this change, as I don't think this was intended. --- .../InstCombine/InstCombineInternal.h | 1 + .../InstCombine/InstructionCombining.cpp | 259 +++++++++--------- 2 files changed, 134 insertions(+), 126 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 39b55b028110..f92ee31a3de2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -148,6 +148,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final Instruction *SliceUpIllegalIntegerPHI(PHINode &PN); Instruction *visitPHINode(PHINode &PN); Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); + Instruction *visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src); Instruction *visitAllocaInst(AllocaInst &AI); Instruction *visitAllocSite(Instruction &FI); Instruction *visitFree(CallInst &FI); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index eb5eadba194d..9bc32e407eee 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1884,6 +1884,136 @@ static Instruction *foldSelectGEP(GetElementPtrInst &GEP, return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel); } +Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, + GEPOperator *Src) { + // Combine Indices - If the source pointer to this getelementptr instruction + // is a getelementptr instruction, combine the indices of the two + // getelementptr instructions into a single instruction. + if (!shouldMergeGEPs(*cast(&GEP), *Src)) + return nullptr; + + if (Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 && + Src->hasOneUse()) { + Value *GO1 = GEP.getOperand(1); + Value *SO1 = Src->getOperand(1); + + if (LI) { + // Try to reassociate loop invariant GEP chains to enable LICM. + if (Loop *L = LI->getLoopFor(GEP.getParent())) { + // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is + // invariant: this breaks the dependence between GEPs and allows LICM + // to hoist the invariant part out of the loop. + if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) { + // We have to be careful here. + // We have something like: + // %src = getelementptr , * %base, %idx + // %gep = getelementptr , * %src, %idx2 + // If we just swap idx & idx2 then we could inadvertantly + // change %src from a vector to a scalar, or vice versa. + // Cases: + // 1) %base a scalar & idx a scalar & idx2 a vector + // => Swapping idx & idx2 turns %src into a vector type. + // 2) %base a scalar & idx a vector & idx2 a scalar + // => Swapping idx & idx2 turns %src in a scalar type + // 3) %base, %idx, and %idx2 are scalars + // => %src & %gep are scalars + // => swapping idx & idx2 is safe + // 4) %base a vector + // => %src is a vector + // => swapping idx & idx2 is safe. + auto *SO0 = Src->getOperand(0); + auto *SO0Ty = SO0->getType(); + if (!isa(GEP.getType()) || // case 3 + isa(SO0Ty)) { // case 4 + Src->setOperand(1, GO1); + GEP.setOperand(1, SO1); + return &GEP; + } else { + // Case 1 or 2 + // -- have to recreate %src & %gep + // put NewSrc at same location as %src + Builder.SetInsertPoint(cast(Src)); + Value *NewSrc = Builder.CreateGEP( + GEP.getSourceElementType(), SO0, GO1, Src->getName()); + // Propagate 'inbounds' if the new source was not constant-folded. + if (auto *NewSrcGEPI = dyn_cast(NewSrc)) + NewSrcGEPI->setIsInBounds(Src->isInBounds()); + GetElementPtrInst *NewGEP = GetElementPtrInst::Create( + GEP.getSourceElementType(), NewSrc, {SO1}); + NewGEP->setIsInBounds(GEP.isInBounds()); + return NewGEP; + } + } + } + } + } + + // Note that if our source is a gep chain itself then we wait for that + // chain to be resolved before we perform this transformation. This + // avoids us creating a TON of code in some cases. + if (auto *SrcGEP = dyn_cast(Src->getOperand(0))) + if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) + return nullptr; // Wait until our source is folded to completion. + + SmallVector Indices; + + // Find out whether the last index in the source GEP is a sequential idx. + bool EndsWithSequential = false; + for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); + I != E; ++I) + EndsWithSequential = I.isSequential(); + + // Can we combine the two pointer arithmetics offsets? + if (EndsWithSequential) { + // Replace: gep (gep %P, long B), long A, ... + // With: T = long A+B; gep %P, T, ... + Value *SO1 = Src->getOperand(Src->getNumOperands()-1); + Value *GO1 = GEP.getOperand(1); + + // If they aren't the same type, then the input hasn't been processed + // by the loop above yet (which canonicalizes sequential index types to + // intptr_t). Just avoid transforming this until the input has been + // normalized. + if (SO1->getType() != GO1->getType()) + return nullptr; + + Value *Sum = + SimplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP)); + // Only do the combine when we are sure the cost after the + // merge is never more than that before the merge. + if (Sum == nullptr) + return nullptr; + + // Update the GEP in place if possible. + if (Src->getNumOperands() == 2) { + GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast(&GEP))); + replaceOperand(GEP, 0, Src->getOperand(0)); + replaceOperand(GEP, 1, Sum); + return &GEP; + } + Indices.append(Src->op_begin()+1, Src->op_end()-1); + Indices.push_back(Sum); + Indices.append(GEP.op_begin()+2, GEP.op_end()); + } else if (isa(*GEP.idx_begin()) && + cast(*GEP.idx_begin())->isNullValue() && + Src->getNumOperands() != 1) { + // Otherwise we can do the fold if the first index of the GEP is a zero + Indices.append(Src->op_begin()+1, Src->op_end()); + Indices.append(GEP.idx_begin()+1, GEP.idx_end()); + } + + if (!Indices.empty()) + return isMergedGEPInBounds(*Src, *cast(&GEP)) + ? GetElementPtrInst::CreateInBounds( + Src->getSourceElementType(), Src->getOperand(0), Indices, + GEP.getName()) + : GetElementPtrInst::Create(Src->getSourceElementType(), + Src->getOperand(0), Indices, + GEP.getName()); + + return nullptr; +} + Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector Ops(GEP.operands()); Type *GEPType = GEP.getType(); @@ -2063,132 +2193,9 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { PtrOp = NewGEP; } - // Combine Indices - If the source pointer to this getelementptr instruction - // is a getelementptr instruction, combine the indices of the two - // getelementptr instructions into a single instruction. - if (auto *Src = dyn_cast(PtrOp)) { - if (!shouldMergeGEPs(*cast(&GEP), *Src)) - return nullptr; - - if (Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 && - Src->hasOneUse()) { - Value *GO1 = GEP.getOperand(1); - Value *SO1 = Src->getOperand(1); - - if (LI) { - // Try to reassociate loop invariant GEP chains to enable LICM. - if (Loop *L = LI->getLoopFor(GEP.getParent())) { - // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is - // invariant: this breaks the dependence between GEPs and allows LICM - // to hoist the invariant part out of the loop. - if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) { - // We have to be careful here. - // We have something like: - // %src = getelementptr , * %base, %idx - // %gep = getelementptr , * %src, %idx2 - // If we just swap idx & idx2 then we could inadvertantly - // change %src from a vector to a scalar, or vice versa. - // Cases: - // 1) %base a scalar & idx a scalar & idx2 a vector - // => Swapping idx & idx2 turns %src into a vector type. - // 2) %base a scalar & idx a vector & idx2 a scalar - // => Swapping idx & idx2 turns %src in a scalar type - // 3) %base, %idx, and %idx2 are scalars - // => %src & %gep are scalars - // => swapping idx & idx2 is safe - // 4) %base a vector - // => %src is a vector - // => swapping idx & idx2 is safe. - auto *SO0 = Src->getOperand(0); - auto *SO0Ty = SO0->getType(); - if (!isa(GEPType) || // case 3 - isa(SO0Ty)) { // case 4 - Src->setOperand(1, GO1); - GEP.setOperand(1, SO1); - return &GEP; - } else { - // Case 1 or 2 - // -- have to recreate %src & %gep - // put NewSrc at same location as %src - Builder.SetInsertPoint(cast(PtrOp)); - Value *NewSrc = - Builder.CreateGEP(GEPEltType, SO0, GO1, Src->getName()); - // Propagate 'inbounds' if the new source was not constant-folded. - if (auto *NewSrcGEPI = dyn_cast(NewSrc)) - NewSrcGEPI->setIsInBounds(Src->isInBounds()); - GetElementPtrInst *NewGEP = - GetElementPtrInst::Create(GEPEltType, NewSrc, {SO1}); - NewGEP->setIsInBounds(GEP.isInBounds()); - return NewGEP; - } - } - } - } - } - - // Note that if our source is a gep chain itself then we wait for that - // chain to be resolved before we perform this transformation. This - // avoids us creating a TON of code in some cases. - if (auto *SrcGEP = dyn_cast(Src->getOperand(0))) - if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) - return nullptr; // Wait until our source is folded to completion. - - SmallVector Indices; - - // Find out whether the last index in the source GEP is a sequential idx. - bool EndsWithSequential = false; - for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); - I != E; ++I) - EndsWithSequential = I.isSequential(); - - // Can we combine the two pointer arithmetics offsets? - if (EndsWithSequential) { - // Replace: gep (gep %P, long B), long A, ... - // With: T = long A+B; gep %P, T, ... - Value *SO1 = Src->getOperand(Src->getNumOperands()-1); - Value *GO1 = GEP.getOperand(1); - - // If they aren't the same type, then the input hasn't been processed - // by the loop above yet (which canonicalizes sequential index types to - // intptr_t). Just avoid transforming this until the input has been - // normalized. - if (SO1->getType() != GO1->getType()) - return nullptr; - - Value *Sum = - SimplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP)); - // Only do the combine when we are sure the cost after the - // merge is never more than that before the merge. - if (Sum == nullptr) - return nullptr; - - // Update the GEP in place if possible. - if (Src->getNumOperands() == 2) { - GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast(&GEP))); - replaceOperand(GEP, 0, Src->getOperand(0)); - replaceOperand(GEP, 1, Sum); - return &GEP; - } - Indices.append(Src->op_begin()+1, Src->op_end()-1); - Indices.push_back(Sum); - Indices.append(GEP.op_begin()+2, GEP.op_end()); - } else if (isa(*GEP.idx_begin()) && - cast(*GEP.idx_begin())->isNullValue() && - Src->getNumOperands() != 1) { - // Otherwise we can do the fold if the first index of the GEP is a zero - Indices.append(Src->op_begin()+1, Src->op_end()); - Indices.append(GEP.idx_begin()+1, GEP.idx_end()); - } - - if (!Indices.empty()) - return isMergedGEPInBounds(*Src, *cast(&GEP)) - ? GetElementPtrInst::CreateInBounds( - Src->getSourceElementType(), Src->getOperand(0), Indices, - GEP.getName()) - : GetElementPtrInst::Create(Src->getSourceElementType(), - Src->getOperand(0), Indices, - GEP.getName()); - } + if (auto *Src = dyn_cast(PtrOp)) + if (Instruction *I = visitGEPOfGEP(GEP, Src)) + return I; // Skip if GEP source element type is scalable. The type alloc size is unknown // at compile-time. From d122d91e37848d47bf0b1851ce0f1793521323cd Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 27 Dec 2021 14:53:38 +0100 Subject: [PATCH 083/992] [InstCombine] Fix GEP of GEP fold with opaque pointers We need to check that result and source element types match, as this is no longer automatically enforced with opaque pointers. --- .../InstCombine/InstructionCombining.cpp | 7 +++++-- .../test/Transforms/InstCombine/opaque-ptr.ll | 21 +++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 9bc32e407eee..aaf07f25e474 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1887,8 +1887,11 @@ static Instruction *foldSelectGEP(GetElementPtrInst &GEP, Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src) { // Combine Indices - If the source pointer to this getelementptr instruction - // is a getelementptr instruction, combine the indices of the two - // getelementptr instructions into a single instruction. + // is a getelementptr instruction with matching element type, combine the + // indices of the two getelementptr instructions into a single instruction. + if (Src->getResultElementType() != GEP.getSourceElementType()) + return nullptr; + if (!shouldMergeGEPs(*cast(&GEP), *Src)) return nullptr; diff --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll index 558aa7c48a38..ab2a109f6103 100644 --- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll +++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll @@ -161,3 +161,24 @@ define void @varargs_cast_opaque_to_typed(ptr %a) { call void (...) @varargs(i8* byval(i8) %b) ret void } + +define ptr @geps_combinable(ptr %a) { +; CHECK-LABEL: @geps_combinable( +; CHECK-NEXT: [[A3:%.*]] = getelementptr { i32, { i32, i32 } }, ptr [[A:%.*]], i64 0, i32 1, i32 1 +; CHECK-NEXT: ret ptr [[A3]] +; + %a2 = getelementptr { i32, { i32, i32 } }, ptr %a, i32 0, i32 1 + %a3 = getelementptr { i32, i32 }, ptr %a2, i32 0, i32 1 + ret ptr %a3 +} + +define ptr @geps_not_combinable(ptr %a) { +; CHECK-LABEL: @geps_not_combinable( +; CHECK-NEXT: [[A2:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 0, i32 1 +; CHECK-NEXT: [[A3:%.*]] = getelementptr { i32, i32 }, ptr [[A2]], i64 0, i32 1 +; CHECK-NEXT: ret ptr [[A3]] +; + %a2 = getelementptr { i32, i32 }, ptr %a, i32 0, i32 1 + %a3 = getelementptr { i32, i32 }, ptr %a2, i32 0, i32 1 + ret ptr %a3 +} From 2ec3ca747732e9cb9b051c1f979902c0291091c4 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 27 Dec 2021 14:15:03 +0000 Subject: [PATCH 084/992] [ARM] Extend IsCMPZCSINC to handle CMOV A 'CMOV 1, 0, CC, %cpsr, Cmp' is the same as a 'CSINC 0, 0, CC, Cmp', and can be treated the same in IsCMPZCSINC added in D114013. This allows us to remove the unnecessary CMOV in the same way that we could remove a CSINC. Differential Revision: https://reviews.llvm.org/D115188 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 28 +- llvm/test/CodeGen/ARM/fp16-fullfp16.ll | 4 +- llvm/test/CodeGen/ARM/fpclamptosat_vec.ll | 898 ++++++------------ llvm/test/CodeGen/Thumb2/active_lane_mask.ll | 22 +- .../CodeGen/Thumb2/mve-fpclamptosat_vec.ll | 284 ++---- llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll | 188 ++-- llvm/test/CodeGen/Thumb2/mve-masked-load.ll | 32 +- llvm/test/CodeGen/Thumb2/mve-masked-store.ll | 176 ++-- llvm/test/CodeGen/Thumb2/mve-minmax.ll | 40 +- llvm/test/CodeGen/Thumb2/mve-minmaxi.ll | 162 +--- llvm/test/CodeGen/Thumb2/mve-pred-ext.ll | 99 +- llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll | 351 +++---- .../CodeGen/Thumb2/mve-saturating-arith.ll | 186 ++-- llvm/test/CodeGen/Thumb2/mve-vcmp.ll | 12 +- llvm/test/CodeGen/Thumb2/mve-vcmpf.ll | 68 +- llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll | 116 +-- llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll | 120 +-- llvm/test/CodeGen/Thumb2/mve-vcmpr.ll | 8 - llvm/test/CodeGen/Thumb2/mve-vqmovn.ll | 52 +- llvm/test/CodeGen/Thumb2/mve-vqshrn.ll | 136 ++- 20 files changed, 1030 insertions(+), 1952 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 3d45db349644..31c6234f02d6 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14682,7 +14682,9 @@ static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -// Check that N is CMPZ(CSINC(0, 0, CC, X)), return X if valid. +// Check that N is CMPZ(CSINC(0, 0, CC, X)), +// or CMPZ(CMOV(1, 0, CC, $cpsr, X)) +// return X if valid. static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) { if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1))) return SDValue(); @@ -14696,12 +14698,24 @@ static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) { CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse()) CSInc = CSInc.getOperand(0); - if (CSInc.getOpcode() != ARMISD::CSINC || - !isNullConstant(CSInc.getOperand(0)) || - !isNullConstant(CSInc.getOperand(1)) || !CSInc->hasOneUse()) - return SDValue(); - CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2); - return CSInc.getOperand(3); + if (CSInc.getOpcode() == ARMISD::CSINC && + isNullConstant(CSInc.getOperand(0)) && + isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) { + CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2); + return CSInc.getOperand(3); + } + if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) && + isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) { + CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2); + return CSInc.getOperand(4); + } + if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) && + isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) { + CC = ARMCC::getOppositeCondition( + (ARMCC::CondCodes)CSInc.getConstantOperandVal(2)); + return CSInc.getOperand(4); + } + return SDValue(); } static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG) { diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll index 0ff7e0635450..c6db4d3ae47e 100644 --- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll +++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll @@ -482,11 +482,9 @@ define void @test_copysign(half* %p, half* %q) { ; CHECK-NEXT: vstr.16 s0, [sp] ; CHECK-NEXT: vldr.16 s0, [r0] ; CHECK-NEXT: ldrb r1, [sp, #1] -; CHECK-NEXT: ands r1, r1, #128 ; CHECK-NEXT: vabs.f16 s0, s0 -; CHECK-NEXT: movwne r1, #1 +; CHECK-NEXT: tst r1, #128 ; CHECK-NEXT: vneg.f16 s2, s0 -; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: vseleq.f16 s0, s0, s2 ; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: add sp, sp, #4 diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll index 74dd9fe154aa..a4d470b72d4e 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll @@ -2379,67 +2379,42 @@ define <2 x i32> @stest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r2, r1 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, r1, d9 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: clz r7, r2 -; CHECK-NEXT: movwmi r3, #1 -; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: vmov r0, r2, d9 +; CHECK-NEXT: cmn r4, #-2147483647 ; CHECK-NEXT: mvn r3, #-2147483648 +; CHECK-NEXT: movlo r3, r4 ; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: movne r3, r4 -; CHECK-NEXT: cmn r4, #-2147483647 -; CHECK-NEXT: movhs r4, r5 -; CHECK-NEXT: lsr r7, r7, #5 -; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: movpl r4, r5 +; CHECK-NEXT: movpl r1, r6 ; CHECK-NEXT: moveq r4, r3 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: movpl r2, r6 -; CHECK-NEXT: cmn r2, #1 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: add r2, r2, #1 -; CHECK-NEXT: movwgt r3, #1 -; CHECK-NEXT: clz r2, r2 -; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: cmn r1, #1 ; CHECK-NEXT: mov r3, #-2147483648 -; CHECK-NEXT: movne r3, r4 ; CHECK-NEXT: mov r7, #-2147483648 +; CHECK-NEXT: movgt r3, r4 ; CHECK-NEXT: cmp r4, #-2147483648 -; CHECK-NEXT: lsr r2, r2, #5 ; CHECK-NEXT: movls r4, r7 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: moveq r4, r3 +; CHECK-NEXT: cmn r1, #1 +; CHECK-NEXT: movne r4, r3 +; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwmi r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvn r2, #-2147483648 -; CHECK-NEXT: vmov.32 d0[0], r4 -; CHECK-NEXT: movne r2, r0 ; CHECK-NEXT: cmn r0, #-2147483647 -; CHECK-NEXT: movlo r5, r0 -; CHECK-NEXT: clz r0, r1 -; CHECK-NEXT: lsr r0, r0, #5 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r5, r2 +; CHECK-NEXT: mvn r2, #-2147483648 +; CHECK-NEXT: movlo r2, r0 ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: movpl r1, r6 -; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: add r1, r1, #1 -; CHECK-NEXT: movwgt r6, #1 -; CHECK-NEXT: clz r1, r1 -; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: movmi r5, r0 +; CHECK-NEXT: movmi r6, r1 +; CHECK-NEXT: moveq r5, r2 +; CHECK-NEXT: cmn r6, #1 ; CHECK-NEXT: mov r0, #-2147483648 -; CHECK-NEXT: movne r0, r5 +; CHECK-NEXT: vmov.32 d0[0], r4 +; CHECK-NEXT: movgt r0, r5 ; CHECK-NEXT: cmp r5, #-2147483648 ; CHECK-NEXT: movls r5, r7 -; CHECK-NEXT: lsr r1, r1, #5 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r5, r0 +; CHECK-NEXT: cmn r6, #1 +; CHECK-NEXT: movne r5, r0 ; CHECK-NEXT: vmov.32 d0[1], r5 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} @@ -2485,63 +2460,45 @@ entry: define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i32_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: vmov r2, r1, d8 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: movwmi r3, #1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: clz r3, r4 -; CHECK-NEXT: mvn r8, #0 -; CHECK-NEXT: movne r8, r0 +; CHECK-NEXT: vmov r2, r12, d9 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: mvn r3, #0 ; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: lsr r3, r3, #5 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movne r8, r0 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: movpl r4, r5 -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: movmi r3, r0 +; CHECK-NEXT: movpl r1, r5 +; CHECK-NEXT: moveq r3, r0 +; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: mvn r7, #0 +; CHECK-NEXT: mvn r4, #0 ; CHECK-NEXT: movwgt r6, #1 ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: movne r6, r8 +; CHECK-NEXT: movne r6, r3 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: moveq r6, r3 ; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r12 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwmi r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: clz r2, r1 -; CHECK-NEXT: movne r7, r0 -; CHECK-NEXT: lsr r2, r2, #5 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: movne r7, r0 -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: vmov.32 d0[0], r6 +; CHECK-NEXT: movmi r4, r0 ; CHECK-NEXT: movpl r1, r5 -; CHECK-NEXT: clz r0, r1 +; CHECK-NEXT: moveq r4, r0 ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: movwgt r5, #1 ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: movne r5, r7 -; CHECK-NEXT: lsr r0, r0, #5 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: clz r0, r4 -; CHECK-NEXT: movne r5, r7 -; CHECK-NEXT: vmov.32 d0[0], r5 -; CHECK-NEXT: lsr r0, r0, #5 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r6, r8 -; CHECK-NEXT: vmov.32 d0[1], r6 +; CHECK-NEXT: movne r5, r4 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: moveq r5, r4 +; CHECK-NEXT: vmov.32 d0[1], r5 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %conv = fptosi <2 x double> %x to <2 x i64> %spec.store.select = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> ) @@ -2560,129 +2517,78 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: mov r8, #-2147483648 ; CHECK-NEXT: mvn r7, #-2147483648 -; CHECK-NEXT: mov r9, #0 ; CHECK-NEXT: vmov r0, s19 -; CHECK-NEXT: vmov r5, s18 +; CHECK-NEXT: vmov r5, s16 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: clz r2, r1 -; CHECK-NEXT: movwmi r0, #1 -; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: cmn r0, #-2147483647 ; CHECK-NEXT: mvn r0, #-2147483648 -; CHECK-NEXT: lsr r2, r2, #5 -; CHECK-NEXT: movne r0, r4 -; CHECK-NEXT: cmn r4, #-2147483647 -; CHECK-NEXT: movhs r4, r7 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: moveq r4, r0 +; CHECK-NEXT: mov r9, #0 +; CHECK-NEXT: movlo r0, r4 ; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: movpl r4, r7 ; CHECK-NEXT: movpl r1, r9 +; CHECK-NEXT: moveq r4, r0 ; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: add r1, r1, #1 -; CHECK-NEXT: movwgt r0, #1 -; CHECK-NEXT: clz r1, r1 -; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, #-2147483648 -; CHECK-NEXT: movne r0, r4 +; CHECK-NEXT: movgt r0, r4 ; CHECK-NEXT: cmp r4, #-2147483648 ; CHECK-NEXT: movls r4, r8 -; CHECK-NEXT: lsr r1, r1, #5 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r4, r0 +; CHECK-NEXT: cmn r1, #1 +; CHECK-NEXT: movne r4, r0 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: clz r2, r1 -; CHECK-NEXT: movwmi r0, #1 -; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: cmn r0, #-2147483647 ; CHECK-NEXT: mvn r0, #-2147483648 -; CHECK-NEXT: lsr r2, r2, #5 -; CHECK-NEXT: movne r0, r5 -; CHECK-NEXT: cmn r5, #-2147483647 -; CHECK-NEXT: movhs r5, r7 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: moveq r5, r0 +; CHECK-NEXT: mov r2, #-2147483648 +; CHECK-NEXT: movlo r0, r5 ; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: movpl r5, r7 ; CHECK-NEXT: movpl r1, r9 +; CHECK-NEXT: moveq r5, r0 +; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r2, #-2147483648 -; CHECK-NEXT: movwgt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: add r0, r1, #1 -; CHECK-NEXT: movne r2, r5 -; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: movgt r2, r5 ; CHECK-NEXT: cmp r5, #-2147483648 ; CHECK-NEXT: movls r5, r8 -; CHECK-NEXT: lsr r1, r0, #5 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r5, r2 +; CHECK-NEXT: cmn r1, #1 +; CHECK-NEXT: movne r5, r2 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: clz r2, r1 -; CHECK-NEXT: movwmi r0, #1 -; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: cmn r0, #-2147483647 ; CHECK-NEXT: mvn r0, #-2147483648 -; CHECK-NEXT: lsr r2, r2, #5 -; CHECK-NEXT: movne r0, r6 -; CHECK-NEXT: cmn r6, #-2147483647 -; CHECK-NEXT: movhs r6, r7 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: moveq r6, r0 +; CHECK-NEXT: mov r2, #-2147483648 +; CHECK-NEXT: movlo r0, r6 ; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: movpl r6, r7 ; CHECK-NEXT: movpl r1, r9 +; CHECK-NEXT: moveq r6, r0 +; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r2, #-2147483648 -; CHECK-NEXT: movwgt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: add r0, r1, #1 -; CHECK-NEXT: movne r2, r6 -; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: movgt r2, r6 ; CHECK-NEXT: cmp r6, #-2147483648 ; CHECK-NEXT: movls r6, r8 -; CHECK-NEXT: lsr r1, r0, #5 -; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r6, r2 +; CHECK-NEXT: cmn r1, #1 +; CHECK-NEXT: movne r6, r2 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwmi r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvn r2, #-2147483648 -; CHECK-NEXT: vmov.32 d0[0], r6 -; CHECK-NEXT: movne r2, r0 ; CHECK-NEXT: cmn r0, #-2147483647 -; CHECK-NEXT: movlo r7, r0 -; CHECK-NEXT: clz r0, r1 -; CHECK-NEXT: vmov.32 d1[0], r5 -; CHECK-NEXT: lsr r0, r0, #5 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r7, r2 +; CHECK-NEXT: mvn r2, #-2147483648 +; CHECK-NEXT: movlo r2, r0 ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: movpl r1, r9 -; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: add r1, r1, #1 -; CHECK-NEXT: movwgt r9, #1 -; CHECK-NEXT: clz r1, r1 -; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: movmi r7, r0 +; CHECK-NEXT: movmi r9, r1 +; CHECK-NEXT: moveq r7, r2 +; CHECK-NEXT: cmn r9, #1 ; CHECK-NEXT: mov r0, #-2147483648 -; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: movne r0, r7 +; CHECK-NEXT: vmov.32 d1[0], r6 +; CHECK-NEXT: movgt r0, r7 ; CHECK-NEXT: cmp r7, #-2147483648 +; CHECK-NEXT: vmov.32 d0[0], r5 ; CHECK-NEXT: movls r7, r8 -; CHECK-NEXT: lsr r1, r1, #5 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r7, r0 +; CHECK-NEXT: cmn r9, #1 +; CHECK-NEXT: vmov.32 d1[1], r4 +; CHECK-NEXT: movne r7, r0 ; CHECK-NEXT: vmov.32 d0[1], r7 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} @@ -2743,115 +2649,75 @@ entry: define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: ustest_f32i32_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vorr q4, q0, q0 +; CHECK-NEXT: mvn r9, #0 ; CHECK-NEXT: vmov r0, s19 +; CHECK-NEXT: vmov r5, s16 +; CHECK-NEXT: vmov r8, s18 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r2, r0 -; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: movwmi r3, #1 -; CHECK-NEXT: clz r6, r1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mvn r3, #0 -; CHECK-NEXT: movne r3, r2 -; CHECK-NEXT: lsr r6, r6, #5 -; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: mvn r2, #0 +; CHECK-NEXT: movmi r2, r0 ; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: movne r3, r2 -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: moveq r2, r0 ; CHECK-NEXT: movpl r1, r7 ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r11, #0 -; CHECK-NEXT: clz r1, r1 -; CHECK-NEXT: movwgt r11, #1 -; CHECK-NEXT: cmp r11, #0 -; CHECK-NEXT: movne r11, r3 -; CHECK-NEXT: lsr r1, r1, #5 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mvn r9, #0 -; CHECK-NEXT: vmov r8, s16 -; CHECK-NEXT: movne r11, r3 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: mvn r10, #0 -; CHECK-NEXT: movwmi r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: clz r1, r4 -; CHECK-NEXT: movne r10, r0 -; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: lsr r1, r1, #5 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: movne r10, r0 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: movpl r4, r7 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movwgt r4, #1 ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: movwgt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: movne r6, r10 +; CHECK-NEXT: movne r4, r2 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: moveq r4, r2 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwmi r2, #1 -; CHECK-NEXT: clz r3, r1 -; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: mvn r2, #0 -; CHECK-NEXT: movne r2, r0 -; CHECK-NEXT: lsr r3, r3, #5 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: movne r2, r0 -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: movmi r2, r0 ; CHECK-NEXT: movpl r1, r7 -; CHECK-NEXT: clz r0, r1 +; CHECK-NEXT: moveq r2, r0 ; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: movwgt r5, #1 ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: lsr r0, r0, #5 -; CHECK-NEXT: movne r5, r2 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: movne r5, r2 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: moveq r5, r2 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwmi r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: clz r2, r1 -; CHECK-NEXT: movne r9, r0 -; CHECK-NEXT: vmov.32 d0[0], r5 -; CHECK-NEXT: lsr r2, r2, #5 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: movne r9, r0 +; CHECK-NEXT: mvn r2, #0 +; CHECK-NEXT: movmi r2, r0 +; CHECK-NEXT: movpl r1, r7 +; CHECK-NEXT: moveq r2, r0 +; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: movwgt r6, #1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: movne r6, r2 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: moveq r6, r2 +; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: vmov.32 d1[0], r6 +; CHECK-NEXT: movmi r9, r0 ; CHECK-NEXT: movpl r1, r7 -; CHECK-NEXT: clz r0, r1 +; CHECK-NEXT: moveq r9, r0 ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: movwgt r7, #1 ; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: vmov.32 d0[0], r5 ; CHECK-NEXT: movne r7, r9 -; CHECK-NEXT: lsr r0, r0, #5 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: clz r0, r4 -; CHECK-NEXT: movne r7, r9 -; CHECK-NEXT: vmov.32 d1[0], r7 -; CHECK-NEXT: lsr r0, r0, #5 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r6, r10 -; CHECK-NEXT: vmov.32 d1[1], r11 -; CHECK-NEXT: vmov.32 d0[1], r6 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: vmov.32 d1[1], r4 +; CHECK-NEXT: moveq r7, r9 +; CHECK-NEXT: vmov.32 d0[1], r7 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} entry: %conv = fptosi <4 x float> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -2868,136 +2734,85 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-NEXT: .vsave {d8, d9, d10} ; CHECK-NEON-NEXT: vpush {d8, d9, d10} ; CHECK-NEON-NEXT: vmov r0, s3 -; CHECK-NEON-NEXT: vmov.f32 s20, s2 +; CHECK-NEON-NEXT: vmov.f32 s18, s2 ; CHECK-NEON-NEXT: vmov.f32 s16, s1 -; CHECK-NEON-NEXT: vmov.f32 s18, s0 +; CHECK-NEON-NEXT: vmov.f32 s20, s0 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: mov r4, r0 ; CHECK-NEON-NEXT: vmov r0, s20 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r2, #0 -; CHECK-NEON-NEXT: movwmi r2, #1 -; CHECK-NEON-NEXT: clz r3, r1 -; CHECK-NEON-NEXT: cmp r2, #0 +; CHECK-NEON-NEXT: cmn r4, #-2147483647 ; CHECK-NEON-NEXT: mvn r2, #-2147483648 -; CHECK-NEON-NEXT: movne r2, r4 +; CHECK-NEON-NEXT: movlo r2, r4 ; CHECK-NEON-NEXT: mvn r7, #-2147483648 -; CHECK-NEON-NEXT: cmn r4, #-2147483647 -; CHECK-NEON-NEXT: lsr r3, r3, #5 -; CHECK-NEON-NEXT: movhs r4, r7 -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: moveq r4, r2 -; CHECK-NEON-NEXT: mov r9, #0 ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r2, #0 +; CHECK-NEON-NEXT: mov r9, #0 +; CHECK-NEON-NEXT: movpl r4, r7 ; CHECK-NEON-NEXT: movpl r1, r9 +; CHECK-NEON-NEXT: moveq r4, r2 ; CHECK-NEON-NEXT: cmn r1, #1 -; CHECK-NEON-NEXT: movwgt r2, #1 -; CHECK-NEON-NEXT: add r1, r1, #1 -; CHECK-NEON-NEXT: clz r1, r1 -; CHECK-NEON-NEXT: cmp r2, #0 ; CHECK-NEON-NEXT: mov r2, #-2147483648 ; CHECK-NEON-NEXT: mov r8, #-2147483648 -; CHECK-NEON-NEXT: movne r2, r4 +; CHECK-NEON-NEXT: movgt r2, r4 ; CHECK-NEON-NEXT: cmp r4, #-2147483648 ; CHECK-NEON-NEXT: movls r4, r8 -; CHECK-NEON-NEXT: lsr r1, r1, #5 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: moveq r4, r2 +; CHECK-NEON-NEXT: cmn r1, #1 +; CHECK-NEON-NEXT: movne r4, r2 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: mov r5, r0 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r0, #0 -; CHECK-NEON-NEXT: clz r2, r1 -; CHECK-NEON-NEXT: movwmi r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 +; CHECK-NEON-NEXT: cmn r0, #-2147483647 ; CHECK-NEON-NEXT: mvn r0, #-2147483648 -; CHECK-NEON-NEXT: lsr r2, r2, #5 -; CHECK-NEON-NEXT: movne r0, r5 -; CHECK-NEON-NEXT: cmn r5, #-2147483647 -; CHECK-NEON-NEXT: movhs r5, r7 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: moveq r5, r0 +; CHECK-NEON-NEXT: mov r2, #-2147483648 +; CHECK-NEON-NEXT: movlo r0, r5 ; CHECK-NEON-NEXT: cmp r1, #0 +; CHECK-NEON-NEXT: movpl r5, r7 ; CHECK-NEON-NEXT: movpl r1, r9 +; CHECK-NEON-NEXT: moveq r5, r0 +; CHECK-NEON-NEXT: vmov r0, s18 ; CHECK-NEON-NEXT: cmn r1, #1 -; CHECK-NEON-NEXT: mov r0, #0 -; CHECK-NEON-NEXT: mov r2, #-2147483648 -; CHECK-NEON-NEXT: movwgt r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: add r0, r1, #1 -; CHECK-NEON-NEXT: movne r2, r5 -; CHECK-NEON-NEXT: clz r0, r0 +; CHECK-NEON-NEXT: movgt r2, r5 ; CHECK-NEON-NEXT: cmp r5, #-2147483648 ; CHECK-NEON-NEXT: movls r5, r8 -; CHECK-NEON-NEXT: lsr r1, r0, #5 -; CHECK-NEON-NEXT: vmov r0, s18 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: moveq r5, r2 +; CHECK-NEON-NEXT: cmn r1, #1 +; CHECK-NEON-NEXT: movne r5, r2 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: mov r6, r0 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r0, #0 -; CHECK-NEON-NEXT: clz r2, r1 -; CHECK-NEON-NEXT: movwmi r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 +; CHECK-NEON-NEXT: cmn r0, #-2147483647 ; CHECK-NEON-NEXT: mvn r0, #-2147483648 -; CHECK-NEON-NEXT: lsr r2, r2, #5 -; CHECK-NEON-NEXT: movne r0, r6 -; CHECK-NEON-NEXT: cmn r6, #-2147483647 -; CHECK-NEON-NEXT: movhs r6, r7 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: moveq r6, r0 +; CHECK-NEON-NEXT: mov r2, #-2147483648 +; CHECK-NEON-NEXT: movlo r0, r6 ; CHECK-NEON-NEXT: cmp r1, #0 +; CHECK-NEON-NEXT: movpl r6, r7 ; CHECK-NEON-NEXT: movpl r1, r9 +; CHECK-NEON-NEXT: moveq r6, r0 +; CHECK-NEON-NEXT: vmov r0, s16 ; CHECK-NEON-NEXT: cmn r1, #1 -; CHECK-NEON-NEXT: mov r0, #0 -; CHECK-NEON-NEXT: mov r2, #-2147483648 -; CHECK-NEON-NEXT: movwgt r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: add r0, r1, #1 -; CHECK-NEON-NEXT: movne r2, r6 -; CHECK-NEON-NEXT: clz r0, r0 +; CHECK-NEON-NEXT: movgt r2, r6 ; CHECK-NEON-NEXT: cmp r6, #-2147483648 ; CHECK-NEON-NEXT: movls r6, r8 -; CHECK-NEON-NEXT: lsr r1, r0, #5 -; CHECK-NEON-NEXT: vmov r0, s16 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: moveq r6, r2 +; CHECK-NEON-NEXT: cmn r1, #1 +; CHECK-NEON-NEXT: movne r6, r2 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r2, #0 -; CHECK-NEON-NEXT: movwmi r2, #1 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: mvn r2, #-2147483648 -; CHECK-NEON-NEXT: vmov.32 d0[0], r6 -; CHECK-NEON-NEXT: movne r2, r0 ; CHECK-NEON-NEXT: cmn r0, #-2147483647 -; CHECK-NEON-NEXT: movlo r7, r0 -; CHECK-NEON-NEXT: clz r0, r1 -; CHECK-NEON-NEXT: vmov.32 d1[0], r5 -; CHECK-NEON-NEXT: lsr r0, r0, #5 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: moveq r7, r2 +; CHECK-NEON-NEXT: mvn r2, #-2147483648 +; CHECK-NEON-NEXT: movlo r2, r0 ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: movpl r1, r9 -; CHECK-NEON-NEXT: cmn r1, #1 -; CHECK-NEON-NEXT: add r1, r1, #1 -; CHECK-NEON-NEXT: movwgt r9, #1 -; CHECK-NEON-NEXT: clz r1, r1 -; CHECK-NEON-NEXT: cmp r9, #0 +; CHECK-NEON-NEXT: movmi r7, r0 +; CHECK-NEON-NEXT: movmi r9, r1 +; CHECK-NEON-NEXT: moveq r7, r2 +; CHECK-NEON-NEXT: cmn r9, #1 ; CHECK-NEON-NEXT: mov r0, #-2147483648 -; CHECK-NEON-NEXT: vmov.32 d1[1], r4 -; CHECK-NEON-NEXT: movne r0, r7 +; CHECK-NEON-NEXT: vmov.32 d1[0], r6 +; CHECK-NEON-NEXT: movgt r0, r7 ; CHECK-NEON-NEXT: cmp r7, #-2147483648 +; CHECK-NEON-NEXT: vmov.32 d0[0], r5 ; CHECK-NEON-NEXT: movls r7, r8 -; CHECK-NEON-NEXT: lsr r1, r1, #5 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: moveq r7, r0 +; CHECK-NEON-NEXT: cmn r9, #1 +; CHECK-NEON-NEXT: vmov.32 d1[1], r4 +; CHECK-NEON-NEXT: movne r7, r0 ; CHECK-NEON-NEXT: vmov.32 d0[1], r7 ; CHECK-NEON-NEXT: vpop {d8, d9, d10} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} @@ -3013,131 +2828,80 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: mov r4, r0 -; CHECK-FP16-NEXT: vmov.u16 r0, d8[0] -; CHECK-FP16-NEXT: vmov.u16 r2, d8[2] -; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: vmov.u16 r0, d8[2] +; CHECK-FP16-NEXT: vmov.u16 r2, d8[0] +; CHECK-FP16-NEXT: cmn r4, #-2147483647 ; CHECK-FP16-NEXT: mvn r7, #-2147483648 ; CHECK-FP16-NEXT: mov r9, #0 ; CHECK-FP16-NEXT: mov r8, #-2147483648 ; CHECK-FP16-NEXT: vmov s18, r0 -; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: movwmi r0, #1 -; CHECK-FP16-NEXT: vmov s0, r2 -; CHECK-FP16-NEXT: clz r2, r1 -; CHECK-FP16-NEXT: cmp r0, #0 ; CHECK-FP16-NEXT: mvn r0, #-2147483648 -; CHECK-FP16-NEXT: movne r0, r4 -; CHECK-FP16-NEXT: cmn r4, #-2147483647 -; CHECK-FP16-NEXT: movhs r4, r7 -; CHECK-FP16-NEXT: lsr r2, r2, #5 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: moveq r4, r0 +; CHECK-FP16-NEXT: movlo r0, r4 ; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: movpl r4, r7 ; CHECK-FP16-NEXT: movpl r1, r9 +; CHECK-FP16-NEXT: moveq r4, r0 ; CHECK-FP16-NEXT: cmn r1, #1 -; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: add r1, r1, #1 -; CHECK-FP16-NEXT: movwgt r0, #1 -; CHECK-FP16-NEXT: clz r1, r1 -; CHECK-FP16-NEXT: cmp r0, #0 ; CHECK-FP16-NEXT: mov r0, #-2147483648 -; CHECK-FP16-NEXT: movne r0, r4 +; CHECK-FP16-NEXT: vmov s0, r2 +; CHECK-FP16-NEXT: movgt r0, r4 ; CHECK-FP16-NEXT: cmp r4, #-2147483648 ; CHECK-FP16-NEXT: movls r4, r8 -; CHECK-FP16-NEXT: lsr r1, r1, #5 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: moveq r4, r0 +; CHECK-FP16-NEXT: cmn r1, #1 +; CHECK-FP16-NEXT: movne r4, r0 ; CHECK-FP16-NEXT: bl __fixhfdi +; CHECK-FP16-NEXT: vmov.f32 s0, s18 ; CHECK-FP16-NEXT: mov r5, r0 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: clz r2, r1 -; CHECK-FP16-NEXT: movwmi r0, #1 -; CHECK-FP16-NEXT: cmp r0, #0 +; CHECK-FP16-NEXT: cmn r0, #-2147483647 ; CHECK-FP16-NEXT: mvn r0, #-2147483648 -; CHECK-FP16-NEXT: vmov.f32 s0, s18 -; CHECK-FP16-NEXT: movne r0, r5 -; CHECK-FP16-NEXT: cmn r5, #-2147483647 -; CHECK-FP16-NEXT: lsr r2, r2, #5 -; CHECK-FP16-NEXT: movhs r5, r7 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: moveq r5, r0 +; CHECK-FP16-NEXT: movlo r0, r5 ; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: movpl r5, r7 ; CHECK-FP16-NEXT: movpl r1, r9 +; CHECK-FP16-NEXT: moveq r5, r0 ; CHECK-FP16-NEXT: cmn r1, #1 -; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: add r1, r1, #1 -; CHECK-FP16-NEXT: movwgt r0, #1 -; CHECK-FP16-NEXT: clz r1, r1 -; CHECK-FP16-NEXT: cmp r0, #0 ; CHECK-FP16-NEXT: mov r0, #-2147483648 -; CHECK-FP16-NEXT: movne r0, r5 +; CHECK-FP16-NEXT: movgt r0, r5 ; CHECK-FP16-NEXT: cmp r5, #-2147483648 ; CHECK-FP16-NEXT: movls r5, r8 -; CHECK-FP16-NEXT: lsr r1, r1, #5 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: moveq r5, r0 +; CHECK-FP16-NEXT: cmn r1, #1 +; CHECK-FP16-NEXT: movne r5, r0 ; CHECK-FP16-NEXT: bl __fixhfdi +; CHECK-FP16-NEXT: vmov.u16 r2, d8[1] ; CHECK-FP16-NEXT: mov r6, r0 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: clz r2, r1 -; CHECK-FP16-NEXT: movwmi r0, #1 -; CHECK-FP16-NEXT: cmp r0, #0 +; CHECK-FP16-NEXT: cmn r0, #-2147483647 ; CHECK-FP16-NEXT: mvn r0, #-2147483648 -; CHECK-FP16-NEXT: lsr r2, r2, #5 -; CHECK-FP16-NEXT: movne r0, r6 -; CHECK-FP16-NEXT: cmn r6, #-2147483647 -; CHECK-FP16-NEXT: movhs r6, r7 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: vmov.u16 r2, d8[1] -; CHECK-FP16-NEXT: moveq r6, r0 +; CHECK-FP16-NEXT: movlo r0, r6 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mov r0, #0 +; CHECK-FP16-NEXT: movpl r6, r7 ; CHECK-FP16-NEXT: movpl r1, r9 +; CHECK-FP16-NEXT: moveq r6, r0 ; CHECK-FP16-NEXT: cmn r1, #1 -; CHECK-FP16-NEXT: movwgt r0, #1 -; CHECK-FP16-NEXT: add r1, r1, #1 -; CHECK-FP16-NEXT: clz r1, r1 -; CHECK-FP16-NEXT: cmp r0, #0 ; CHECK-FP16-NEXT: mov r0, #-2147483648 -; CHECK-FP16-NEXT: movne r0, r6 +; CHECK-FP16-NEXT: movgt r0, r6 ; CHECK-FP16-NEXT: cmp r6, #-2147483648 ; CHECK-FP16-NEXT: movls r6, r8 -; CHECK-FP16-NEXT: lsr r1, r1, #5 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: moveq r6, r0 +; CHECK-FP16-NEXT: cmn r1, #1 +; CHECK-FP16-NEXT: movne r6, r0 ; CHECK-FP16-NEXT: vmov s0, r2 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mov r2, #0 -; CHECK-FP16-NEXT: movwmi r2, #1 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: mvn r2, #-2147483648 -; CHECK-FP16-NEXT: vmov.32 d0[0], r6 -; CHECK-FP16-NEXT: movne r2, r0 ; CHECK-FP16-NEXT: cmn r0, #-2147483647 -; CHECK-FP16-NEXT: movlo r7, r0 -; CHECK-FP16-NEXT: clz r0, r1 -; CHECK-FP16-NEXT: vmov.32 d1[0], r5 -; CHECK-FP16-NEXT: lsr r0, r0, #5 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: moveq r7, r2 +; CHECK-FP16-NEXT: mvn r2, #-2147483648 +; CHECK-FP16-NEXT: movlo r2, r0 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movpl r1, r9 -; CHECK-FP16-NEXT: cmn r1, #1 -; CHECK-FP16-NEXT: add r1, r1, #1 -; CHECK-FP16-NEXT: movwgt r9, #1 -; CHECK-FP16-NEXT: clz r1, r1 -; CHECK-FP16-NEXT: cmp r9, #0 +; CHECK-FP16-NEXT: movmi r7, r0 +; CHECK-FP16-NEXT: movmi r9, r1 +; CHECK-FP16-NEXT: moveq r7, r2 +; CHECK-FP16-NEXT: cmn r9, #1 ; CHECK-FP16-NEXT: mov r0, #-2147483648 -; CHECK-FP16-NEXT: vmov.32 d1[1], r4 -; CHECK-FP16-NEXT: movne r0, r7 +; CHECK-FP16-NEXT: vmov.32 d1[0], r6 +; CHECK-FP16-NEXT: movgt r0, r7 ; CHECK-FP16-NEXT: cmp r7, #-2147483648 +; CHECK-FP16-NEXT: vmov.32 d0[0], r5 ; CHECK-FP16-NEXT: movls r7, r8 -; CHECK-FP16-NEXT: lsr r1, r1, #5 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: moveq r7, r0 +; CHECK-FP16-NEXT: cmn r9, #1 +; CHECK-FP16-NEXT: vmov.32 d1[1], r4 +; CHECK-FP16-NEXT: movne r7, r0 ; CHECK-FP16-NEXT: vmov.32 d0[1], r7 ; CHECK-FP16-NEXT: vpop {d8, d9} ; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} @@ -3247,233 +3011,157 @@ entry: define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-LABEL: ustest_f16i32_mm: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEON-NEXT: .pad #4 -; CHECK-NEON-NEXT: sub sp, sp, #4 +; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEON-NEXT: .vsave {d8, d9, d10} ; CHECK-NEON-NEXT: vpush {d8, d9, d10} ; CHECK-NEON-NEXT: vmov r0, s3 -; CHECK-NEON-NEXT: vmov.f32 s16, s2 -; CHECK-NEON-NEXT: vmov.f32 s18, s1 +; CHECK-NEON-NEXT: vmov.f32 s18, s2 +; CHECK-NEON-NEXT: vmov.f32 s16, s1 ; CHECK-NEON-NEXT: vmov.f32 s20, s0 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: mov r2, r0 -; CHECK-NEON-NEXT: vmov r0, s18 +; CHECK-NEON-NEXT: vmov r2, s20 ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r3, #0 -; CHECK-NEON-NEXT: movwmi r3, #1 -; CHECK-NEON-NEXT: clz r6, r1 -; CHECK-NEON-NEXT: cmp r3, #0 ; CHECK-NEON-NEXT: mvn r3, #0 -; CHECK-NEON-NEXT: movne r3, r2 -; CHECK-NEON-NEXT: lsr r6, r6, #5 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: mov r7, #0 -; CHECK-NEON-NEXT: movne r3, r2 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: movpl r1, r7 +; CHECK-NEON-NEXT: mov r6, #0 +; CHECK-NEON-NEXT: movmi r3, r0 +; CHECK-NEON-NEXT: movpl r1, r6 +; CHECK-NEON-NEXT: moveq r3, r0 ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r11, #0 -; CHECK-NEON-NEXT: clz r1, r1 -; CHECK-NEON-NEXT: movwgt r11, #1 -; CHECK-NEON-NEXT: cmp r11, #0 -; CHECK-NEON-NEXT: movne r11, r3 -; CHECK-NEON-NEXT: lsr r1, r1, #5 +; CHECK-NEON-NEXT: mov r7, #0 +; CHECK-NEON-NEXT: vmov r8, s18 +; CHECK-NEON-NEXT: movwgt r7, #1 +; CHECK-NEON-NEXT: cmp r7, #0 +; CHECK-NEON-NEXT: movne r7, r3 ; CHECK-NEON-NEXT: cmp r1, #0 ; CHECK-NEON-NEXT: mvn r9, #0 -; CHECK-NEON-NEXT: vmov r8, s20 -; CHECK-NEON-NEXT: movne r11, r3 +; CHECK-NEON-NEXT: moveq r7, r3 +; CHECK-NEON-NEXT: mov r0, r2 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: mov r4, r1 ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r1, #0 -; CHECK-NEON-NEXT: mvn r10, #0 -; CHECK-NEON-NEXT: movwmi r1, #1 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: clz r1, r4 -; CHECK-NEON-NEXT: movne r10, r0 -; CHECK-NEON-NEXT: mov r6, #0 -; CHECK-NEON-NEXT: lsr r1, r1, #5 +; CHECK-NEON-NEXT: mvn r2, #0 +; CHECK-NEON-NEXT: movmi r2, r0 +; CHECK-NEON-NEXT: movpl r1, r6 +; CHECK-NEON-NEXT: moveq r2, r0 ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: movne r10, r0 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: movpl r4, r7 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: movwgt r6, #1 -; CHECK-NEON-NEXT: cmp r6, #0 +; CHECK-NEON-NEXT: mov r4, #0 ; CHECK-NEON-NEXT: mov r0, r8 -; CHECK-NEON-NEXT: movne r6, r10 +; CHECK-NEON-NEXT: movwgt r4, #1 +; CHECK-NEON-NEXT: cmp r4, #0 +; CHECK-NEON-NEXT: movne r4, r2 +; CHECK-NEON-NEXT: cmp r1, #0 +; CHECK-NEON-NEXT: moveq r4, r2 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r2, #0 -; CHECK-NEON-NEXT: movwmi r2, #1 -; CHECK-NEON-NEXT: clz r3, r1 -; CHECK-NEON-NEXT: cmp r2, #0 ; CHECK-NEON-NEXT: mvn r2, #0 -; CHECK-NEON-NEXT: movne r2, r0 -; CHECK-NEON-NEXT: lsr r3, r3, #5 -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: mov r5, #0 -; CHECK-NEON-NEXT: movne r2, r0 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: movpl r1, r7 -; CHECK-NEON-NEXT: clz r0, r1 +; CHECK-NEON-NEXT: movmi r2, r0 +; CHECK-NEON-NEXT: movpl r1, r6 +; CHECK-NEON-NEXT: moveq r2, r0 +; CHECK-NEON-NEXT: vmov r0, s16 ; CHECK-NEON-NEXT: cmp r1, #0 +; CHECK-NEON-NEXT: mov r5, #0 ; CHECK-NEON-NEXT: movwgt r5, #1 ; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: lsr r0, r0, #5 -; CHECK-NEON-NEXT: movne r5, r2 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: vmov r0, s16 ; CHECK-NEON-NEXT: movne r5, r2 +; CHECK-NEON-NEXT: cmp r1, #0 +; CHECK-NEON-NEXT: moveq r5, r2 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r2, #0 -; CHECK-NEON-NEXT: movwmi r2, #1 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: clz r2, r1 -; CHECK-NEON-NEXT: movne r9, r0 -; CHECK-NEON-NEXT: vmov.32 d0[0], r5 -; CHECK-NEON-NEXT: lsr r2, r2, #5 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: movne r9, r0 +; CHECK-NEON-NEXT: vmov.32 d1[0], r5 +; CHECK-NEON-NEXT: movmi r9, r0 +; CHECK-NEON-NEXT: movpl r1, r6 +; CHECK-NEON-NEXT: moveq r9, r0 ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: movpl r1, r7 -; CHECK-NEON-NEXT: clz r0, r1 +; CHECK-NEON-NEXT: movwgt r6, #1 +; CHECK-NEON-NEXT: cmp r6, #0 +; CHECK-NEON-NEXT: vmov.32 d0[0], r4 +; CHECK-NEON-NEXT: movne r6, r9 ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: movwgt r7, #1 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: movne r7, r9 -; CHECK-NEON-NEXT: lsr r0, r0, #5 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: clz r0, r4 -; CHECK-NEON-NEXT: movne r7, r9 -; CHECK-NEON-NEXT: vmov.32 d1[0], r7 -; CHECK-NEON-NEXT: lsr r0, r0, #5 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: movne r6, r10 -; CHECK-NEON-NEXT: vmov.32 d1[1], r11 +; CHECK-NEON-NEXT: vmov.32 d1[1], r7 +; CHECK-NEON-NEXT: moveq r6, r9 ; CHECK-NEON-NEXT: vmov.32 d0[1], r6 ; CHECK-NEON-NEXT: vpop {d8, d9, d10} -; CHECK-NEON-NEXT: add sp, sp, #4 -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; ; CHECK-FP16-LABEL: ustest_f16i32_mm: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, lr} ; CHECK-FP16-NEXT: .vsave {d8, d9} ; CHECK-FP16-NEXT: vpush {d8, d9} ; CHECK-FP16-NEXT: vmov.u16 r0, d0[3] ; CHECK-FP16-NEXT: vorr d8, d0, d0 -; CHECK-FP16-NEXT: vmov.u16 r4, d0[1] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: vmov.u16 r2, d8[0] +; CHECK-FP16-NEXT: vmov.u16 r2, d8[1] ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: clz r3, r1 +; CHECK-FP16-NEXT: vmov.u16 r7, d8[0] +; CHECK-FP16-NEXT: mov r5, #0 +; CHECK-FP16-NEXT: vmov.u16 r3, d8[2] +; CHECK-FP16-NEXT: movpl r1, r5 ; CHECK-FP16-NEXT: mov r6, #0 -; CHECK-FP16-NEXT: mov r10, #0 -; CHECK-FP16-NEXT: vmov s0, r4 -; CHECK-FP16-NEXT: lsr r3, r3, #5 ; CHECK-FP16-NEXT: mvn r8, #0 -; CHECK-FP16-NEXT: vmov s18, r2 -; CHECK-FP16-NEXT: mov r2, #0 -; CHECK-FP16-NEXT: movwmi r2, #1 -; CHECK-FP16-NEXT: cmp r2, #0 +; CHECK-FP16-NEXT: vmov s16, r2 ; CHECK-FP16-NEXT: mvn r2, #0 -; CHECK-FP16-NEXT: movne r2, r0 -; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: movne r2, r0 +; CHECK-FP16-NEXT: movmi r2, r0 +; CHECK-FP16-NEXT: vmov s0, r7 +; CHECK-FP16-NEXT: moveq r2, r0 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movpl r1, r6 -; CHECK-FP16-NEXT: clz r0, r1 +; CHECK-FP16-NEXT: movwgt r6, #1 +; CHECK-FP16-NEXT: cmp r6, #0 +; CHECK-FP16-NEXT: movne r6, r2 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movwgt r10, #1 -; CHECK-FP16-NEXT: cmp r10, #0 -; CHECK-FP16-NEXT: movne r10, r2 -; CHECK-FP16-NEXT: lsr r0, r0, #5 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: movne r10, r2 +; CHECK-FP16-NEXT: vmov s18, r3 +; CHECK-FP16-NEXT: moveq r6, r2 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: mov r4, r1 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mov r1, #0 ; CHECK-FP16-NEXT: vmov.f32 s0, s18 -; CHECK-FP16-NEXT: movwmi r1, #1 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: clz r1, r4 -; CHECK-FP16-NEXT: mvn r9, #0 -; CHECK-FP16-NEXT: movne r9, r0 -; CHECK-FP16-NEXT: mov r5, #0 -; CHECK-FP16-NEXT: lsr r1, r1, #5 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movne r9, r0 -; CHECK-FP16-NEXT: cmp r4, #0 -; CHECK-FP16-NEXT: movpl r4, r6 -; CHECK-FP16-NEXT: cmp r4, #0 -; CHECK-FP16-NEXT: movwgt r5, #1 -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: movne r5, r9 -; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mov r2, #0 -; CHECK-FP16-NEXT: movwmi r2, #1 -; CHECK-FP16-NEXT: clz r3, r1 -; CHECK-FP16-NEXT: cmp r2, #0 ; CHECK-FP16-NEXT: mvn r2, #0 -; CHECK-FP16-NEXT: movne r2, r0 -; CHECK-FP16-NEXT: lsr r3, r3, #5 -; CHECK-FP16-NEXT: cmp r3, #0 +; CHECK-FP16-NEXT: movpl r1, r5 +; CHECK-FP16-NEXT: movmi r2, r0 ; CHECK-FP16-NEXT: mov r7, #0 -; CHECK-FP16-NEXT: movne r2, r0 +; CHECK-FP16-NEXT: moveq r2, r0 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movpl r1, r6 -; CHECK-FP16-NEXT: clz r0, r1 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: vmov.u16 r1, d8[2] ; CHECK-FP16-NEXT: movwgt r7, #1 ; CHECK-FP16-NEXT: cmp r7, #0 ; CHECK-FP16-NEXT: movne r7, r2 -; CHECK-FP16-NEXT: lsr r0, r0, #5 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: movne r7, r2 -; CHECK-FP16-NEXT: vmov s0, r1 +; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: moveq r7, r2 ; CHECK-FP16-NEXT: bl __fixhfdi +; CHECK-FP16-NEXT: vmov.f32 s0, s16 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mov r2, #0 -; CHECK-FP16-NEXT: movwmi r2, #1 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: clz r2, r1 -; CHECK-FP16-NEXT: movne r8, r0 -; CHECK-FP16-NEXT: vmov.32 d0[0], r7 -; CHECK-FP16-NEXT: lsr r2, r2, #5 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: movne r8, r0 +; CHECK-FP16-NEXT: mvn r2, #0 +; CHECK-FP16-NEXT: movpl r1, r5 +; CHECK-FP16-NEXT: movmi r2, r0 +; CHECK-FP16-NEXT: mov r4, #0 +; CHECK-FP16-NEXT: moveq r2, r0 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movpl r1, r6 -; CHECK-FP16-NEXT: clz r0, r1 +; CHECK-FP16-NEXT: movwgt r4, #1 +; CHECK-FP16-NEXT: cmp r4, #0 +; CHECK-FP16-NEXT: movne r4, r2 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movwgt r6, #1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: movne r6, r8 -; CHECK-FP16-NEXT: lsr r0, r0, #5 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: clz r0, r4 -; CHECK-FP16-NEXT: movne r6, r8 -; CHECK-FP16-NEXT: vmov.32 d1[0], r6 -; CHECK-FP16-NEXT: lsr r0, r0, #5 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: movne r5, r9 -; CHECK-FP16-NEXT: vmov.32 d1[1], r10 +; CHECK-FP16-NEXT: moveq r4, r2 +; CHECK-FP16-NEXT: bl __fixhfdi +; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: vmov.32 d1[0], r4 +; CHECK-FP16-NEXT: movmi r8, r0 +; CHECK-FP16-NEXT: movpl r1, r5 +; CHECK-FP16-NEXT: moveq r8, r0 +; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: movwgt r5, #1 +; CHECK-FP16-NEXT: cmp r5, #0 +; CHECK-FP16-NEXT: vmov.32 d0[0], r7 +; CHECK-FP16-NEXT: movne r5, r8 +; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: vmov.32 d1[1], r6 +; CHECK-FP16-NEXT: moveq r5, r8 ; CHECK-FP16-NEXT: vmov.32 d0[1], r5 ; CHECK-FP16-NEXT: vpop {d8, d9} -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi <4 x half> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) diff --git a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll index f63477e03826..a5f9c511e068 100644 --- a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll +++ b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll @@ -15,19 +15,15 @@ define <2 x i64> @v2i64(i32 %index, i32 %TC, <2 x i64> %V1, <2 x i64> %V2) { ; CHECK-NEXT: adds r6, r0, #1 ; CHECK-NEXT: adc r4, r4, #0 ; CHECK-NEXT: subs.w r0, lr, #-1 -; CHECK-NEXT: sbcs r0, r12, #0 ; CHECK-NEXT: vmov q1[2], q1[0], lr, r6 -; CHECK-NEXT: cset r0, lo +; CHECK-NEXT: sbcs r0, r12, #0 ; CHECK-NEXT: vmov q1[3], q1[1], r12, r4 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: csetm r12, ne +; CHECK-NEXT: csetm r12, lo ; CHECK-NEXT: subs.w r6, r6, #-1 -; CHECK-NEXT: sbcs r6, r4, #0 ; CHECK-NEXT: bfi r5, r12, #0, #8 -; CHECK-NEXT: cset r6, lo -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csetm r6, ne +; CHECK-NEXT: sbcs r6, r4, #0 +; CHECK-NEXT: mov.w r0, #0 +; CHECK-NEXT: csetm r6, lo ; CHECK-NEXT: bfi r5, r6, #8, #8 ; CHECK-NEXT: vmsr p0, r5 ; CHECK-NEXT: vpsel q1, q1, q0 @@ -38,17 +34,13 @@ define <2 x i64> @v2i64(i32 %index, i32 %TC, <2 x i64> %V1, <2 x i64> %V2) { ; CHECK-NEXT: subs r1, r6, r1 ; CHECK-NEXT: sbcs.w r1, r5, r4 ; CHECK-NEXT: vmov r5, r4, d1 -; CHECK-NEXT: cset r1, lo +; CHECK-NEXT: csetm r1, lo ; CHECK-NEXT: vldr d1, [sp, #16] -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne ; CHECK-NEXT: bfi r0, r1, #0, #8 ; CHECK-NEXT: vmov r1, r6, d3 ; CHECK-NEXT: subs r1, r1, r5 ; CHECK-NEXT: sbcs.w r1, r6, r4 -; CHECK-NEXT: cset r1, lo -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lo ; CHECK-NEXT: bfi r0, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: add r0, sp, #24 diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll index 35b418f69226..08bcba9b5cd7 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll @@ -23,37 +23,29 @@ define arm_aapcs_vfpcc <2 x i32> @stest_f64i32(<2 x double> %x) { ; CHECK-NEXT: subs.w r3, r4, r12 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov.w r5, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: subs.w r0, r0, r12 ; CHECK-NEXT: sbcs r0, r1, #0 +; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 +; CHECK-NEXT: mov.w r5, #0 +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r5, r3, #0, #8 -; CHECK-NEXT: cset r0, lt ; CHECK-NEXT: mov.w r12, #-1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: adr r4, .LCPI0_1 ; CHECK-NEXT: bfi r5, r0, #8, #8 +; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: vmsr p0, r5 +; CHECK-NEXT: adr r4, .LCPI0_1 ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vldrw.u32 q1, [r4] ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: rsbs.w r0, r0, #-2147483648 ; CHECK-NEXT: sbcs.w r0, r12, r1 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r2, r0, #0, #8 ; CHECK-NEXT: rsbs.w r0, r3, #-2147483648 ; CHECK-NEXT: sbcs.w r0, r12, r5 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r2, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -99,18 +91,14 @@ define arm_aapcs_vfpcc <2 x i32> @utest_f64i32(<2 x double> %x) { ; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: cset r3, lo -; CHECK-NEXT: vmov.i64 q0, #0xffffffff -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lo ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r2, r3, #0, #8 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo +; CHECK-NEXT: vmov.i64 q0, #0xffffffff ; CHECK-NEXT: bfi r2, r0, #8, #8 +; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vpop {d8, d9} @@ -141,35 +129,27 @@ define arm_aapcs_vfpcc <2 x i32> @ustest_f64i32(<2 x double> %x) { ; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: movs r5, #0 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: vmov.i64 q0, #0xffffffff -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: subs.w r0, r0, #-1 +; CHECK-NEXT: mov.w r5, #0 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r5, r3, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r5, r0, #8, #8 +; CHECK-NEXT: vmov.i64 q0, #0xffffffff ; CHECK-NEXT: vmsr p0, r5 +; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vmov.i32 q1, #0x0 ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: sbcs.w r0, r2, r1 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: rsbs r1, r3, #0 ; CHECK-NEXT: sbcs.w r1, r2, r5 ; CHECK-NEXT: bfi r2, r0, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r2, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -220,21 +200,17 @@ define arm_aapcs_vfpcc <4 x i32> @utest_f32i32(<4 x float> %x) { ; CHECK-NEXT: vmov q0[2], q0[0], r5, r0 ; CHECK-NEXT: sbcs r2, r6, #0 ; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: vmov.i64 q5, #0xffffffff -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: movs r7, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo +; CHECK-NEXT: vmov.i64 q5, #0xffffffff ; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmov r0, r4, d8 +; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 ; CHECK-NEXT: vmsr p0, r3 +; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: vpsel q6, q0, q5 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: mov r5, r0 @@ -245,15 +221,11 @@ define arm_aapcs_vfpcc <4 x i32> @utest_f32i32(<4 x float> %x) { ; CHECK-NEXT: vmov q0[2], q0[0], r5, r0 ; CHECK-NEXT: sbcs r2, r6, #0 ; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r7, r2, #0, #8 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r7, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r7 ; CHECK-NEXT: vpsel q0, q0, q5 @@ -368,20 +340,16 @@ define arm_aapcs_vfpcc <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NEXT: mov.w r6, #0 ; CHECK-NEXT: sbcs.w r2, r6, r5 ; CHECK-NEXT: vmov q0[2], q0[0], r4, r0 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: vmov.i32 q5, #0x0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r0, r6, r1 ; CHECK-NEXT: bfi r3, r2, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: vmov q0[3], q0[1], r5, r1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmov.u16 r0, q4[0] +; CHECK-NEXT: vmov.i32 q5, #0x0 +; CHECK-NEXT: vmov q0[3], q0[1], r5, r1 ; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q6, q0, q5 ; CHECK-NEXT: bl __fixhfdi @@ -393,15 +361,11 @@ define arm_aapcs_vfpcc <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NEXT: vmov q0[2], q0[0], r4, r0 ; CHECK-NEXT: sbcs.w r2, r6, r5 ; CHECK-NEXT: vmov q0[3], q0[1], r5, r1 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: sbcs.w r0, r6, r1 ; CHECK-NEXT: bfi r6, r2, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r6, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r6 ; CHECK-NEXT: vpsel q0, q0, q5 @@ -450,16 +414,12 @@ define arm_aapcs_vfpcc <2 x i16> @stest_f64i16(<2 x double> %x) { ; CHECK-NEXT: subs r1, r1, r4 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r2, r1, #0, #8 ; CHECK-NEXT: subs r1, r3, r4 ; CHECK-NEXT: sbcs r1, r5, #0 ; CHECK-NEXT: adr r4, .LCPI9_1 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r2, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -468,15 +428,11 @@ define arm_aapcs_vfpcc <2 x i16> @stest_f64i16(<2 x double> %x) { ; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: subs.w r1, lr, r1 ; CHECK-NEXT: sbcs.w r1, r12, r2 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #0, #8 ; CHECK-NEXT: subs.w r1, lr, r3 ; CHECK-NEXT: sbcs.w r1, r12, r5 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -527,15 +483,11 @@ define arm_aapcs_vfpcc <2 x i16> @utest_f64i16(<2 x double> %x) { ; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: subs r0, r0, r4 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r5, r0, #0, #8 ; CHECK-NEXT: subs r0, r2, r4 ; CHECK-NEXT: sbcs r0, r3, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r5, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r5 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -573,15 +525,11 @@ define arm_aapcs_vfpcc <2 x i16> @ustest_f64i16(<2 x double> %x) { ; CHECK-NEXT: subs r1, r1, r4 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r2, r1, #0, #8 ; CHECK-NEXT: subs r1, r3, r4 ; CHECK-NEXT: sbcs r1, r5, #0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r2, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -590,15 +538,11 @@ define arm_aapcs_vfpcc <2 x i16> @ustest_f64i16(<2 x double> %x) { ; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: sbcs.w r1, r0, r2 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: rsbs r2, r3, #0 ; CHECK-NEXT: sbcs.w r2, r0, r5 ; CHECK-NEXT: bfi r0, r1, #0, #8 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -1299,37 +1243,29 @@ define arm_aapcs_vfpcc <2 x i32> @stest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: subs.w r3, r4, r12 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov.w r5, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: subs.w r0, r0, r12 ; CHECK-NEXT: sbcs r0, r1, #0 +; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 +; CHECK-NEXT: mov.w r5, #0 +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r5, r3, #0, #8 -; CHECK-NEXT: cset r0, lt ; CHECK-NEXT: mov.w r12, #-1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: adr r4, .LCPI27_1 ; CHECK-NEXT: bfi r5, r0, #8, #8 +; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: vmsr p0, r5 +; CHECK-NEXT: adr r4, .LCPI27_1 ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vldrw.u32 q1, [r4] ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: rsbs.w r0, r0, #-2147483648 ; CHECK-NEXT: sbcs.w r0, r12, r1 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r2, r0, #0, #8 ; CHECK-NEXT: rsbs.w r0, r3, #-2147483648 ; CHECK-NEXT: sbcs.w r0, r12, r5 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r2, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -1373,18 +1309,14 @@ define arm_aapcs_vfpcc <2 x i32> @utest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: cset r3, lo -; CHECK-NEXT: vmov.i64 q0, #0xffffffff -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lo ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r2, r3, #0, #8 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo +; CHECK-NEXT: vmov.i64 q0, #0xffffffff ; CHECK-NEXT: bfi r2, r0, #8, #8 +; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vpop {d8, d9} @@ -1414,35 +1346,27 @@ define arm_aapcs_vfpcc <2 x i32> @ustest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: movs r5, #0 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: vmov.i64 q0, #0xffffffff -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: subs.w r0, r0, #-1 +; CHECK-NEXT: mov.w r5, #0 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r5, r3, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r5, r0, #8, #8 +; CHECK-NEXT: vmov.i64 q0, #0xffffffff ; CHECK-NEXT: vmsr p0, r5 +; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vmov.i32 q1, #0x0 ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: sbcs.w r0, r2, r1 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: rsbs r1, r3, #0 ; CHECK-NEXT: sbcs.w r1, r2, r5 ; CHECK-NEXT: bfi r2, r0, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r2, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -1489,21 +1413,17 @@ define arm_aapcs_vfpcc <4 x i32> @utest_f32i32_mm(<4 x float> %x) { ; CHECK-NEXT: vmov q0[2], q0[0], r5, r0 ; CHECK-NEXT: sbcs r2, r6, #0 ; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: vmov.i64 q5, #0xffffffff -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: movs r7, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo +; CHECK-NEXT: vmov.i64 q5, #0xffffffff ; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmov r0, r4, d8 +; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 ; CHECK-NEXT: vmsr p0, r3 +; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: vpsel q6, q0, q5 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: mov r5, r0 @@ -1514,15 +1434,11 @@ define arm_aapcs_vfpcc <4 x i32> @utest_f32i32_mm(<4 x float> %x) { ; CHECK-NEXT: vmov q0[2], q0[0], r5, r0 ; CHECK-NEXT: sbcs r2, r6, #0 ; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r7, r2, #0, #8 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r7, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r7 ; CHECK-NEXT: vpsel q0, q0, q5 @@ -1631,20 +1547,16 @@ define arm_aapcs_vfpcc <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEXT: mov.w r6, #0 ; CHECK-NEXT: sbcs.w r2, r6, r5 ; CHECK-NEXT: vmov q0[2], q0[0], r4, r0 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: vmov.i32 q5, #0x0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r0, r6, r1 ; CHECK-NEXT: bfi r3, r2, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: vmov q0[3], q0[1], r5, r1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmov.u16 r0, q4[0] +; CHECK-NEXT: vmov.i32 q5, #0x0 +; CHECK-NEXT: vmov q0[3], q0[1], r5, r1 ; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q6, q0, q5 ; CHECK-NEXT: bl __fixhfdi @@ -1656,15 +1568,11 @@ define arm_aapcs_vfpcc <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEXT: vmov q0[2], q0[0], r4, r0 ; CHECK-NEXT: sbcs.w r2, r6, r5 ; CHECK-NEXT: vmov q0[3], q0[1], r5, r1 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: sbcs.w r0, r6, r1 ; CHECK-NEXT: bfi r6, r2, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r6, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r6 ; CHECK-NEXT: vpsel q0, q0, q5 @@ -1711,16 +1619,12 @@ define arm_aapcs_vfpcc <2 x i16> @stest_f64i16_mm(<2 x double> %x) { ; CHECK-NEXT: subs r1, r1, r4 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r2, r1, #0, #8 ; CHECK-NEXT: subs r1, r3, r4 ; CHECK-NEXT: sbcs r1, r5, #0 ; CHECK-NEXT: adr r4, .LCPI36_1 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r2, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -1729,15 +1633,11 @@ define arm_aapcs_vfpcc <2 x i16> @stest_f64i16_mm(<2 x double> %x) { ; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: subs.w r1, lr, r1 ; CHECK-NEXT: sbcs.w r1, r12, r2 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #0, #8 ; CHECK-NEXT: subs.w r1, lr, r3 ; CHECK-NEXT: sbcs.w r1, r12, r5 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -1786,15 +1686,11 @@ define arm_aapcs_vfpcc <2 x i16> @utest_f64i16_mm(<2 x double> %x) { ; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: subs r0, r0, r4 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r5, r0, #0, #8 ; CHECK-NEXT: subs r0, r2, r4 ; CHECK-NEXT: sbcs r0, r3, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r5, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r5 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -1831,15 +1727,11 @@ define arm_aapcs_vfpcc <2 x i16> @ustest_f64i16_mm(<2 x double> %x) { ; CHECK-NEXT: subs r1, r1, r4 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r2, r1, #0, #8 ; CHECK-NEXT: subs r1, r3, r4 ; CHECK-NEXT: sbcs r1, r5, #0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r2, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -1848,15 +1740,11 @@ define arm_aapcs_vfpcc <2 x i16> @ustest_f64i16_mm(<2 x double> %x) { ; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: sbcs.w r1, r0, r2 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: rsbs r2, r3, #0 ; CHECK-NEXT: sbcs.w r2, r0, r5 ; CHECK-NEXT: bfi r0, r1, #0, #8 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll index f92c575b7d22..c3d4276c712c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll @@ -95,55 +95,47 @@ define void @foo_sext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> ; CHECK-LE-NEXT: sub sp, #4 ; CHECK-LE-NEXT: ldrd r12, lr, [r1] ; CHECK-LE-NEXT: movs r3, #0 -; CHECK-LE-NEXT: @ implicit-def: $q0 +; CHECK-LE-NEXT: @ implicit-def: $q1 ; CHECK-LE-NEXT: rsbs.w r1, r12, #0 -; CHECK-LE-NEXT: vmov q1[2], q1[0], r12, lr +; CHECK-LE-NEXT: vmov q0[2], q0[0], r12, lr ; CHECK-LE-NEXT: sbcs.w r1, r3, r12, asr #31 -; CHECK-LE-NEXT: cset r1, lt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: csetm r1, ne +; CHECK-LE-NEXT: csetm r1, lt ; CHECK-LE-NEXT: rsbs.w r4, lr, #0 ; CHECK-LE-NEXT: sbcs.w r4, r3, lr, asr #31 ; CHECK-LE-NEXT: bfi r3, r1, #0, #1 -; CHECK-LE-NEXT: cset r1, lt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: csetm r1, ne +; CHECK-LE-NEXT: csetm r1, lt ; CHECK-LE-NEXT: bfi r3, r1, #1, #1 ; CHECK-LE-NEXT: lsls r1, r3, #31 ; CHECK-LE-NEXT: itt ne ; CHECK-LE-NEXT: ldrne r1, [r2] -; CHECK-LE-NEXT: vmovne.32 q0[0], r1 +; CHECK-LE-NEXT: vmovne.32 q1[0], r1 ; CHECK-LE-NEXT: lsls r1, r3, #30 ; CHECK-LE-NEXT: itt mi ; CHECK-LE-NEXT: ldrmi r1, [r2, #4] -; CHECK-LE-NEXT: vmovmi.32 q0[2], r1 -; CHECK-LE-NEXT: vmov r2, s2 +; CHECK-LE-NEXT: vmovmi.32 q1[2], r1 +; CHECK-LE-NEXT: vmov r2, s6 ; CHECK-LE-NEXT: movs r1, #0 -; CHECK-LE-NEXT: vmov r3, s4 -; CHECK-LE-NEXT: vmov r4, s0 -; CHECK-LE-NEXT: vmov q0[2], q0[0], r4, r2 +; CHECK-LE-NEXT: vmov r3, s0 +; CHECK-LE-NEXT: vmov r4, s4 +; CHECK-LE-NEXT: vmov q1[2], q1[0], r4, r2 ; CHECK-LE-NEXT: rsbs r5, r3, #0 ; CHECK-LE-NEXT: asr.w r12, r2, #31 ; CHECK-LE-NEXT: sbcs.w r2, r1, r3, asr #31 -; CHECK-LE-NEXT: vmov r3, s6 -; CHECK-LE-NEXT: cset r2, lt +; CHECK-LE-NEXT: vmov r3, s2 +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: asr.w lr, r4, #31 -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: vmov q0[3], q0[1], lr, r12 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: vmov q1[3], q1[1], lr, r12 ; CHECK-LE-NEXT: rsbs r5, r3, #0 ; CHECK-LE-NEXT: sbcs.w r3, r1, r3, asr #31 ; CHECK-LE-NEXT: bfi r1, r2, #0, #1 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: it ne -; CHECK-LE-NEXT: vstrne d0, [r0] +; CHECK-LE-NEXT: vstrne d2, [r0] ; CHECK-LE-NEXT: lsls r1, r1, #30 ; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vstrmi d1, [r0, #8] +; CHECK-LE-NEXT: vstrmi d3, [r0, #8] ; CHECK-LE-NEXT: add sp, #4 ; CHECK-LE-NEXT: pop {r4, r5, r7, pc} ; @@ -157,17 +149,13 @@ define void @foo_sext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> ; CHECK-BE-NEXT: rsbs.w r3, lr, #0 ; CHECK-BE-NEXT: mov.w r1, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, lr, asr #31 -; CHECK-BE-NEXT: cset r3, lt ; CHECK-BE-NEXT: vmov q0[3], q0[1], r12, lr -; CHECK-BE-NEXT: cmp r3, #0 -; CHECK-BE-NEXT: @ implicit-def: $q2 -; CHECK-BE-NEXT: csetm lr, ne +; CHECK-BE-NEXT: csetm lr, lt ; CHECK-BE-NEXT: rsbs.w r3, r12, #0 +; CHECK-BE-NEXT: @ implicit-def: $q2 ; CHECK-BE-NEXT: sbcs.w r3, r1, r12, asr #31 ; CHECK-BE-NEXT: bfi r1, lr, #0, #1 -; CHECK-BE-NEXT: cset r3, lt -; CHECK-BE-NEXT: cmp r3, #0 -; CHECK-BE-NEXT: csetm r3, ne +; CHECK-BE-NEXT: csetm r3, lt ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 ; CHECK-BE-NEXT: lsls r3, r1, #30 ; CHECK-BE-NEXT: bpl .LBB5_2 @@ -198,16 +186,12 @@ define void @foo_sext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> ; CHECK-BE-NEXT: sbcs.w r4, r1, r4, asr #31 ; CHECK-BE-NEXT: vmov q1[3], q1[1], r3, r2 ; CHECK-BE-NEXT: vmov r3, s9 -; CHECK-BE-NEXT: cset r2, lt +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: vrev64.32 q0, q1 -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne ; CHECK-BE-NEXT: rsbs r5, r3, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, r3, asr #31 ; CHECK-BE-NEXT: bfi r1, r2, #0, #1 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: lsls r2, r1, #30 ; CHECK-BE-NEXT: it mi @@ -239,15 +223,11 @@ define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-LE-NEXT: rsbs.w r1, r12, #0 ; CHECK-LE-NEXT: vmov q1[2], q1[0], r12, lr ; CHECK-LE-NEXT: sbcs.w r1, r3, r12, asr #31 -; CHECK-LE-NEXT: cset r1, lt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: csetm r1, ne +; CHECK-LE-NEXT: csetm r1, lt ; CHECK-LE-NEXT: rsbs.w r4, lr, #0 ; CHECK-LE-NEXT: sbcs.w r4, r3, lr, asr #31 ; CHECK-LE-NEXT: bfi r3, r1, #0, #1 -; CHECK-LE-NEXT: cset r1, lt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: csetm r1, ne +; CHECK-LE-NEXT: csetm r1, lt ; CHECK-LE-NEXT: bfi r3, r1, #1, #1 ; CHECK-LE-NEXT: lsls r1, r3, #31 ; CHECK-LE-NEXT: itt ne @@ -266,17 +246,13 @@ define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-LE-NEXT: asr.w r12, r2, #31 ; CHECK-LE-NEXT: sbcs.w r2, r1, r3, asr #31 ; CHECK-LE-NEXT: vmov r3, s6 -; CHECK-LE-NEXT: cset r2, lt +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: asr.w lr, r4, #31 -; CHECK-LE-NEXT: cmp r2, #0 ; CHECK-LE-NEXT: vmov q0[3], q0[1], lr, r12 -; CHECK-LE-NEXT: csetm r2, ne ; CHECK-LE-NEXT: rsbs r5, r3, #0 ; CHECK-LE-NEXT: sbcs.w r3, r1, r3, asr #31 ; CHECK-LE-NEXT: bfi r1, r2, #0, #1 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: itt ne @@ -299,17 +275,13 @@ define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-BE-NEXT: rsbs.w r3, lr, #0 ; CHECK-BE-NEXT: mov.w r1, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, lr, asr #31 -; CHECK-BE-NEXT: cset r3, lt ; CHECK-BE-NEXT: vmov q0[3], q0[1], r12, lr -; CHECK-BE-NEXT: cmp r3, #0 -; CHECK-BE-NEXT: @ implicit-def: $q2 -; CHECK-BE-NEXT: csetm lr, ne +; CHECK-BE-NEXT: csetm lr, lt ; CHECK-BE-NEXT: rsbs.w r3, r12, #0 +; CHECK-BE-NEXT: @ implicit-def: $q2 ; CHECK-BE-NEXT: sbcs.w r3, r1, r12, asr #31 ; CHECK-BE-NEXT: bfi r1, lr, #0, #1 -; CHECK-BE-NEXT: cset r3, lt -; CHECK-BE-NEXT: cmp r3, #0 -; CHECK-BE-NEXT: csetm r3, ne +; CHECK-BE-NEXT: csetm r3, lt ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 ; CHECK-BE-NEXT: lsls r3, r1, #30 ; CHECK-BE-NEXT: bpl .LBB6_2 @@ -340,16 +312,12 @@ define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-BE-NEXT: sbcs.w r4, r1, r4, asr #31 ; CHECK-BE-NEXT: vmov q1[3], q1[1], r3, r2 ; CHECK-BE-NEXT: vmov r3, s9 -; CHECK-BE-NEXT: cset r2, lt +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: vrev64.32 q0, q1 -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne ; CHECK-BE-NEXT: rsbs r5, r3, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, r3, asr #31 ; CHECK-BE-NEXT: bfi r1, r2, #0, #1 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: lsls r2, r1, #30 ; CHECK-BE-NEXT: itt mi @@ -384,15 +352,11 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> ; CHECK-LE-NEXT: rsbs.w r1, r12, #0 ; CHECK-LE-NEXT: vmov q1[2], q1[0], r12, lr ; CHECK-LE-NEXT: sbcs.w r1, r3, r12, asr #31 -; CHECK-LE-NEXT: cset r1, lt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: csetm r1, ne +; CHECK-LE-NEXT: csetm r1, lt ; CHECK-LE-NEXT: rsbs.w r4, lr, #0 ; CHECK-LE-NEXT: sbcs.w r4, r3, lr, asr #31 ; CHECK-LE-NEXT: bfi r3, r1, #0, #1 -; CHECK-LE-NEXT: cset r1, lt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: csetm r1, ne +; CHECK-LE-NEXT: csetm r1, lt ; CHECK-LE-NEXT: bfi r3, r1, #1, #1 ; CHECK-LE-NEXT: lsls r1, r3, #31 ; CHECK-LE-NEXT: itt ne @@ -408,15 +372,11 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> ; CHECK-LE-NEXT: rsbs r3, r2, #0 ; CHECK-LE-NEXT: vmov r3, s6 ; CHECK-LE-NEXT: sbcs.w r2, r1, r2, asr #31 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: rsbs r4, r3, #0 ; CHECK-LE-NEXT: sbcs.w r3, r1, r3, asr #31 ; CHECK-LE-NEXT: bfi r1, r2, #0, #1 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: it ne @@ -437,51 +397,43 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> ; CHECK-BE-NEXT: rsbs.w r3, lr, #0 ; CHECK-BE-NEXT: mov.w r1, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, lr, asr #31 -; CHECK-BE-NEXT: cset r3, lt -; CHECK-BE-NEXT: vmov q0[3], q0[1], r12, lr -; CHECK-BE-NEXT: cmp r3, #0 -; CHECK-BE-NEXT: @ implicit-def: $q1 -; CHECK-BE-NEXT: csetm lr, ne +; CHECK-BE-NEXT: vmov q1[3], q1[1], r12, lr +; CHECK-BE-NEXT: csetm lr, lt ; CHECK-BE-NEXT: rsbs.w r3, r12, #0 +; CHECK-BE-NEXT: @ implicit-def: $q0 ; CHECK-BE-NEXT: sbcs.w r3, r1, r12, asr #31 ; CHECK-BE-NEXT: bfi r1, lr, #0, #1 -; CHECK-BE-NEXT: cset r3, lt -; CHECK-BE-NEXT: cmp r3, #0 -; CHECK-BE-NEXT: csetm r3, ne +; CHECK-BE-NEXT: csetm r3, lt ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 ; CHECK-BE-NEXT: lsls r3, r1, #30 ; CHECK-BE-NEXT: bpl .LBB7_2 ; CHECK-BE-NEXT: @ %bb.1: @ %cond.load ; CHECK-BE-NEXT: ldr r3, [r2] ; CHECK-BE-NEXT: vmov.32 q2[1], r3 -; CHECK-BE-NEXT: vrev64.32 q1, q2 +; CHECK-BE-NEXT: vrev64.32 q0, q2 ; CHECK-BE-NEXT: .LBB7_2: @ %else -; CHECK-BE-NEXT: vrev64.32 q2, q0 +; CHECK-BE-NEXT: vrev64.32 q2, q1 ; CHECK-BE-NEXT: lsls r1, r1, #31 ; CHECK-BE-NEXT: beq .LBB7_4 ; CHECK-BE-NEXT: @ %bb.3: @ %cond.load1 ; CHECK-BE-NEXT: ldr r1, [r2, #4] -; CHECK-BE-NEXT: vrev64.32 q0, q1 -; CHECK-BE-NEXT: vmov.32 q0[3], r1 ; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov.32 q1[3], r1 +; CHECK-BE-NEXT: vrev64.32 q0, q1 ; CHECK-BE-NEXT: .LBB7_4: @ %else2 ; CHECK-BE-NEXT: vrev64.32 q3, q2 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: vmov r2, s15 -; CHECK-BE-NEXT: vmov.i64 q0, #0xffffffff -; CHECK-BE-NEXT: vand q0, q1, q0 +; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff +; CHECK-BE-NEXT: vand q0, q0, q1 ; CHECK-BE-NEXT: rsbs r3, r2, #0 ; CHECK-BE-NEXT: vmov r3, s13 ; CHECK-BE-NEXT: sbcs.w r2, r1, r2, asr #31 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r12, ne +; CHECK-BE-NEXT: csetm r12, lt ; CHECK-BE-NEXT: rsbs r2, r3, #0 ; CHECK-BE-NEXT: sbcs.w r2, r1, r3, asr #31 ; CHECK-BE-NEXT: bfi r1, r12, #0, #1 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: lsls r2, r1, #30 ; CHECK-BE-NEXT: it mi @@ -514,15 +466,11 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-LE-NEXT: rsbs.w r1, r12, #0 ; CHECK-LE-NEXT: vmov q1[2], q1[0], r12, lr ; CHECK-LE-NEXT: sbcs.w r1, r3, r12, asr #31 -; CHECK-LE-NEXT: cset r1, lt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: csetm r1, ne +; CHECK-LE-NEXT: csetm r1, lt ; CHECK-LE-NEXT: rsbs.w r4, lr, #0 ; CHECK-LE-NEXT: sbcs.w r4, r3, lr, asr #31 ; CHECK-LE-NEXT: bfi r3, r1, #0, #1 -; CHECK-LE-NEXT: cset r1, lt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: csetm r1, ne +; CHECK-LE-NEXT: csetm r1, lt ; CHECK-LE-NEXT: bfi r3, r1, #1, #1 ; CHECK-LE-NEXT: lsls r1, r3, #31 ; CHECK-LE-NEXT: itt ne @@ -538,15 +486,11 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-LE-NEXT: rsbs r3, r2, #0 ; CHECK-LE-NEXT: vmov r3, s6 ; CHECK-LE-NEXT: sbcs.w r2, r1, r2, asr #31 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: rsbs r4, r3, #0 ; CHECK-LE-NEXT: sbcs.w r3, r1, r3, asr #31 ; CHECK-LE-NEXT: bfi r1, r2, #0, #1 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: itt ne @@ -569,51 +513,43 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-BE-NEXT: rsbs.w r3, lr, #0 ; CHECK-BE-NEXT: mov.w r1, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, lr, asr #31 -; CHECK-BE-NEXT: cset r3, lt -; CHECK-BE-NEXT: vmov q0[3], q0[1], r12, lr -; CHECK-BE-NEXT: cmp r3, #0 -; CHECK-BE-NEXT: @ implicit-def: $q1 -; CHECK-BE-NEXT: csetm lr, ne +; CHECK-BE-NEXT: vmov q1[3], q1[1], r12, lr +; CHECK-BE-NEXT: csetm lr, lt ; CHECK-BE-NEXT: rsbs.w r3, r12, #0 +; CHECK-BE-NEXT: @ implicit-def: $q0 ; CHECK-BE-NEXT: sbcs.w r3, r1, r12, asr #31 ; CHECK-BE-NEXT: bfi r1, lr, #0, #1 -; CHECK-BE-NEXT: cset r3, lt -; CHECK-BE-NEXT: cmp r3, #0 -; CHECK-BE-NEXT: csetm r3, ne +; CHECK-BE-NEXT: csetm r3, lt ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 ; CHECK-BE-NEXT: lsls r3, r1, #30 ; CHECK-BE-NEXT: bpl .LBB8_2 ; CHECK-BE-NEXT: @ %bb.1: @ %cond.load ; CHECK-BE-NEXT: ldr r3, [r2] ; CHECK-BE-NEXT: vmov.32 q2[1], r3 -; CHECK-BE-NEXT: vrev64.32 q1, q2 +; CHECK-BE-NEXT: vrev64.32 q0, q2 ; CHECK-BE-NEXT: .LBB8_2: @ %else -; CHECK-BE-NEXT: vrev64.32 q2, q0 +; CHECK-BE-NEXT: vrev64.32 q2, q1 ; CHECK-BE-NEXT: lsls r1, r1, #31 ; CHECK-BE-NEXT: beq .LBB8_4 ; CHECK-BE-NEXT: @ %bb.3: @ %cond.load1 ; CHECK-BE-NEXT: ldr r1, [r2, #4] -; CHECK-BE-NEXT: vrev64.32 q0, q1 -; CHECK-BE-NEXT: vmov.32 q0[3], r1 ; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov.32 q1[3], r1 +; CHECK-BE-NEXT: vrev64.32 q0, q1 ; CHECK-BE-NEXT: .LBB8_4: @ %else2 ; CHECK-BE-NEXT: vrev64.32 q3, q2 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: vmov r2, s15 -; CHECK-BE-NEXT: vmov.i64 q0, #0xffffffff -; CHECK-BE-NEXT: vand q0, q1, q0 +; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff +; CHECK-BE-NEXT: vand q0, q0, q1 ; CHECK-BE-NEXT: rsbs r3, r2, #0 ; CHECK-BE-NEXT: vmov r3, s13 ; CHECK-BE-NEXT: sbcs.w r2, r1, r2, asr #31 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r12, ne +; CHECK-BE-NEXT: csetm r12, lt ; CHECK-BE-NEXT: rsbs r2, r3, #0 ; CHECK-BE-NEXT: sbcs.w r2, r1, r3, asr #31 ; CHECK-BE-NEXT: bfi r1, r12, #0, #1 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: lsls r2, r1, #30 ; CHECK-BE-NEXT: itt mi diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll index 2adaf7cf577a..afcea7901ccf 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll @@ -1759,15 +1759,11 @@ define arm_aapcs_vfpcc <2 x i64> @masked_v2i64_align4_zero(<2 x i64> *%dest, <2 ; CHECK-LE-NEXT: vmov r12, lr, d1 ; CHECK-LE-NEXT: rsbs r2, r2, #0 ; CHECK-LE-NEXT: sbcs.w r2, r1, r3 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: rsbs.w r3, r12, #0 ; CHECK-LE-NEXT: sbcs.w r3, r1, lr ; CHECK-LE-NEXT: bfi r1, r2, #0, #1 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: beq .LBB49_2 @@ -1801,15 +1797,11 @@ define arm_aapcs_vfpcc <2 x i64> @masked_v2i64_align4_zero(<2 x i64> *%dest, <2 ; CHECK-BE-NEXT: vmov r12, lr, d2 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: sbcs.w r2, r1, r2 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: rsbs.w r3, lr, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, r12 ; CHECK-BE-NEXT: bfi r1, r2, #0, #1 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: lsls r2, r1, #30 ; CHECK-BE-NEXT: bpl .LBB49_2 @@ -1848,15 +1840,11 @@ define arm_aapcs_vfpcc <2 x double> @masked_v2f64_align4_zero(<2 x double> *%des ; CHECK-LE-NEXT: vmov r12, lr, d3 ; CHECK-LE-NEXT: rsbs r2, r2, #0 ; CHECK-LE-NEXT: sbcs.w r2, r1, r3 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: rsbs.w r3, r12, #0 ; CHECK-LE-NEXT: sbcs.w r3, r1, lr ; CHECK-LE-NEXT: bfi r1, r2, #0, #1 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: beq .LBB50_2 @@ -1890,15 +1878,11 @@ define arm_aapcs_vfpcc <2 x double> @masked_v2f64_align4_zero(<2 x double> *%des ; CHECK-BE-NEXT: vmov r12, lr, d0 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: sbcs.w r2, r1, r2 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: rsbs.w r3, lr, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, r12 ; CHECK-BE-NEXT: bfi r1, r2, #0, #1 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: lsls r2, r1, #30 ; CHECK-BE-NEXT: bpl .LBB50_2 diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-store.ll b/llvm/test/CodeGen/Thumb2/mve-masked-store.ll index 6c598cf71b2e..29b29859e862 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-store.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-store.ll @@ -944,15 +944,11 @@ define arm_aapcs_vfpcc void @masked_v2i64(<2 x i64> *%dest, <2 x i64> %a) { ; CHECK-LE-NEXT: vmov r12, lr, d1 ; CHECK-LE-NEXT: rsbs r2, r2, #0 ; CHECK-LE-NEXT: sbcs.w r2, r1, r3 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: rsbs.w r3, r12, #0 ; CHECK-LE-NEXT: sbcs.w r3, r1, lr ; CHECK-LE-NEXT: bfi r1, r2, #0, #1 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: it ne @@ -975,15 +971,11 @@ define arm_aapcs_vfpcc void @masked_v2i64(<2 x i64> *%dest, <2 x i64> %a) { ; CHECK-BE-NEXT: vmov r12, lr, d2 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: sbcs.w r2, r1, r2 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: rsbs.w r3, lr, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, r12 ; CHECK-BE-NEXT: bfi r1, r2, #0, #1 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: lsls r2, r1, #30 ; CHECK-BE-NEXT: it mi @@ -1011,15 +1003,11 @@ define arm_aapcs_vfpcc void @masked_v2f64(<2 x double> *%dest, <2 x double> %a, ; CHECK-LE-NEXT: vmov r12, lr, d3 ; CHECK-LE-NEXT: rsbs r2, r2, #0 ; CHECK-LE-NEXT: sbcs.w r2, r1, r3 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: rsbs.w r3, r12, #0 ; CHECK-LE-NEXT: sbcs.w r3, r1, lr ; CHECK-LE-NEXT: bfi r1, r2, #0, #1 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: it ne @@ -1042,15 +1030,11 @@ define arm_aapcs_vfpcc void @masked_v2f64(<2 x double> *%dest, <2 x double> %a, ; CHECK-BE-NEXT: vmov r12, lr, d4 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: sbcs.w r2, r1, r2 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: rsbs.w r3, lr, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, r12 ; CHECK-BE-NEXT: bfi r1, r2, #0, #1 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: lsls r2, r1, #30 ; CHECK-BE-NEXT: it mi @@ -1216,33 +1200,25 @@ define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float> ; CHECK-LE-NEXT: .pad #4 ; CHECK-LE-NEXT: sub sp, #4 ; CHECK-LE-NEXT: vcmp.f32 s0, #0 -; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 +; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-LE-NEXT: vcmp.f32 s1, #0 -; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 +; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 ; CHECK-LE-NEXT: vcvtb.f16.f32 s6, s2 +; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 ; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 -; CHECK-LE-NEXT: cset r1, gt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: mov.w r1, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-LE-NEXT: bfi r1, r2, #0, #1 ; CHECK-LE-NEXT: vcmp.f32 s2, #0 -; CHECK-LE-NEXT: cset r2, gt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: bfi r1, r2, #0, #1 +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: vcmp.f32 s3, #0 -; CHECK-LE-NEXT: cset r2, gt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: bfi r1, r2, #1, #1 +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-LE-NEXT: bfi r1, r2, #2, #1 -; CHECK-LE-NEXT: cset r2, gt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: bfi r1, r2, #3, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: bne .LBB25_5 @@ -1282,6 +1258,7 @@ define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: .pad #4 ; CHECK-BE-NEXT: sub sp, #4 ; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: vcmp.f32 s7, #0 ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr @@ -1289,27 +1266,18 @@ define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 ; CHECK-BE-NEXT: vcvtb.f16.f32 s2, s6 ; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 -; CHECK-BE-NEXT: cset r1, gt -; CHECK-BE-NEXT: cmp r1, #0 -; CHECK-BE-NEXT: mov.w r1, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-BE-NEXT: bfi r1, r2, #0, #1 ; CHECK-BE-NEXT: vcmp.f32 s5, #0 -; CHECK-BE-NEXT: cset r2, gt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: bfi r1, r2, #0, #1 +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: vcmp.f32 s4, #0 -; CHECK-BE-NEXT: cset r2, gt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: bfi r1, r2, #1, #1 +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: bfi r1, r2, #2, #1 -; CHECK-BE-NEXT: cset r2, gt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: bfi r1, r2, #3, #1 ; CHECK-BE-NEXT: lsls r2, r1, #28 ; CHECK-BE-NEXT: bmi .LBB25_5 @@ -1356,33 +1324,25 @@ define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float> ; CHECK-LE-NEXT: .pad #4 ; CHECK-LE-NEXT: sub sp, #4 ; CHECK-LE-NEXT: vcmp.f32 s0, #0 -; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 +; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-LE-NEXT: vcmp.f32 s1, #0 -; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 +; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 ; CHECK-LE-NEXT: vcvtb.f16.f32 s6, s2 +; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 ; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 -; CHECK-LE-NEXT: cset r1, gt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: mov.w r1, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-LE-NEXT: bfi r1, r2, #0, #1 ; CHECK-LE-NEXT: vcmp.f32 s2, #0 -; CHECK-LE-NEXT: cset r2, gt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: bfi r1, r2, #0, #1 +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: vcmp.f32 s3, #0 -; CHECK-LE-NEXT: cset r2, gt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: bfi r1, r2, #1, #1 +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-LE-NEXT: bfi r1, r2, #2, #1 -; CHECK-LE-NEXT: cset r2, gt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: bfi r1, r2, #3, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: bne .LBB26_5 @@ -1422,6 +1382,7 @@ define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: .pad #4 ; CHECK-BE-NEXT: sub sp, #4 ; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: vcmp.f32 s7, #0 ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr @@ -1429,27 +1390,18 @@ define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 ; CHECK-BE-NEXT: vcvtb.f16.f32 s2, s6 ; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 -; CHECK-BE-NEXT: cset r1, gt -; CHECK-BE-NEXT: cmp r1, #0 -; CHECK-BE-NEXT: mov.w r1, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-BE-NEXT: bfi r1, r2, #0, #1 ; CHECK-BE-NEXT: vcmp.f32 s5, #0 -; CHECK-BE-NEXT: cset r2, gt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: bfi r1, r2, #0, #1 +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: vcmp.f32 s4, #0 -; CHECK-BE-NEXT: cset r2, gt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: bfi r1, r2, #1, #1 +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: bfi r1, r2, #2, #1 -; CHECK-BE-NEXT: cset r2, gt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: bfi r1, r2, #3, #1 ; CHECK-BE-NEXT: lsls r2, r1, #28 ; CHECK-BE-NEXT: bmi .LBB26_5 @@ -1496,33 +1448,25 @@ define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float> ; CHECK-LE-NEXT: .pad #20 ; CHECK-LE-NEXT: sub sp, #20 ; CHECK-LE-NEXT: vcmp.f32 s0, #0 -; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 +; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-LE-NEXT: vcmp.f32 s1, #0 -; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 +; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 ; CHECK-LE-NEXT: vcvtb.f16.f32 s6, s2 +; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 ; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 -; CHECK-LE-NEXT: cset r1, gt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: mov.w r1, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-LE-NEXT: bfi r1, r2, #0, #1 ; CHECK-LE-NEXT: vcmp.f32 s2, #0 -; CHECK-LE-NEXT: cset r2, gt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: bfi r1, r2, #0, #1 +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: vcmp.f32 s3, #0 -; CHECK-LE-NEXT: cset r2, gt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: bfi r1, r2, #1, #1 +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-LE-NEXT: bfi r1, r2, #2, #1 -; CHECK-LE-NEXT: cset r2, gt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: bfi r1, r2, #3, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: bne .LBB27_5 @@ -1570,6 +1514,7 @@ define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: .pad #20 ; CHECK-BE-NEXT: sub sp, #20 ; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: vcmp.f32 s7, #0 ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr @@ -1577,27 +1522,18 @@ define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 ; CHECK-BE-NEXT: vcvtb.f16.f32 s2, s6 ; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 -; CHECK-BE-NEXT: cset r1, gt -; CHECK-BE-NEXT: cmp r1, #0 -; CHECK-BE-NEXT: mov.w r1, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-BE-NEXT: bfi r1, r2, #0, #1 ; CHECK-BE-NEXT: vcmp.f32 s5, #0 -; CHECK-BE-NEXT: cset r2, gt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: bfi r1, r2, #0, #1 +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: vcmp.f32 s4, #0 -; CHECK-BE-NEXT: cset r2, gt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: bfi r1, r2, #1, #1 +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: bfi r1, r2, #2, #1 -; CHECK-BE-NEXT: cset r2, gt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: bfi r1, r2, #3, #1 ; CHECK-BE-NEXT: lsls r2, r1, #28 ; CHECK-BE-NEXT: bmi .LBB27_5 diff --git a/llvm/test/CodeGen/Thumb2/mve-minmax.ll b/llvm/test/CodeGen/Thumb2/mve-minmax.ll index 92355a8256eb..d536e6b72ac9 100644 --- a/llvm/test/CodeGen/Thumb2/mve-minmax.ll +++ b/llvm/test/CodeGen/Thumb2/mve-minmax.ll @@ -47,15 +47,11 @@ define arm_aapcs_vfpcc <2 x i64> @smin_v2i64(<2 x i64> %s1, <2 x i64> %s2) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: subs.w r0, r4, r12 ; CHECK-NEXT: sbcs.w r0, r5, lr -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -111,15 +107,11 @@ define arm_aapcs_vfpcc <2 x i64> @umin_v2i64(<2 x i64> %s1, <2 x i64> %s2) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: subs.w r0, r4, r12 ; CHECK-NEXT: sbcs.w r0, r5, lr -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -176,15 +168,11 @@ define arm_aapcs_vfpcc <2 x i64> @smax_v2i64(<2 x i64> %s1, <2 x i64> %s2) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: subs.w r0, r4, r12 ; CHECK-NEXT: sbcs.w r0, r5, lr -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -240,15 +228,11 @@ define arm_aapcs_vfpcc <2 x i64> @umax_v2i64(<2 x i64> %s1, <2 x i64> %s2) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: subs.w r0, r4, r12 ; CHECK-NEXT: sbcs.w r0, r5, lr -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -329,18 +313,12 @@ define arm_aapcs_vfpcc <2 x double> @maxnm_float64_t(<2 x double> %src1, <2 x do ; CHECK-NEXT: vmov r12, r1, d9 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: vmov r2, r3, d11 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov.w r4, #0 ; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: bfi r4, r0, #0, #8 ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 -; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: csetm r0, ne ; CHECK-NEXT: bfi r4, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r4 diff --git a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll index 2b4f3d66fe64..892be9a43307 100644 --- a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll +++ b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll @@ -153,21 +153,17 @@ define arm_aapcs_vfpcc <2 x i32> @smax2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: subs r1, r1, r3 ; CHECK-NEXT: sbcs.w r1, lr, r3, asr #31 ; CHECK-NEXT: asr.w r5, r3, #31 -; CHECK-NEXT: cset r1, lt ; CHECK-NEXT: asr.w r12, r0, #31 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: subs r0, r0, r2 +; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r0, r12, r2, asr #31 ; CHECK-NEXT: bfi r3, r1, #0, #8 -; CHECK-NEXT: cset r0, lt +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: asrs r4, r2, #31 -; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmov q1[3], q1[1], lr, r12 -; CHECK-NEXT: csetm r0, ne ; CHECK-NEXT: vmov q0[3], q0[1], r5, r4 -; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: pop {r4, r5, r7, pc} @@ -233,17 +229,13 @@ define arm_aapcs_vfpcc <2 x i64> @smax2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lt +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: vmov r3, r2, d3 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d1 ; CHECK-NEXT: subs r0, r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -265,17 +257,13 @@ define arm_aapcs_vfpcc void @smax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r1, r2, r12 ; CHECK-NEXT: vmov lr, r12, d3 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt +; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 ; CHECK-NEXT: vmov r2, r4, d7 ; CHECK-NEXT: subs.w r2, r2, lr ; CHECK-NEXT: sbcs.w r2, r4, r12 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r3, r2, #8, #8 ; CHECK-NEXT: vmov r2, r12, d0 ; CHECK-NEXT: vmsr p0, r3 @@ -285,16 +273,12 @@ define arm_aapcs_vfpcc void @smax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) ; CHECK-NEXT: subs r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 ; CHECK-NEXT: vmov r4, r3, d5 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r1, r2, #0, #8 ; CHECK-NEXT: vmov r2, r12, d1 ; CHECK-NEXT: subs r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r1, r2, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q2 @@ -453,17 +437,13 @@ define arm_aapcs_vfpcc <2 x i32> @umax2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lo +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: vmov r3, r2, d3 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d1 ; CHECK-NEXT: subs r0, r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -530,17 +510,13 @@ define arm_aapcs_vfpcc <2 x i64> @umax2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lo +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: vmov r3, r2, d3 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d1 ; CHECK-NEXT: subs r0, r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -562,17 +538,13 @@ define arm_aapcs_vfpcc void @umax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r1, r2, r12 ; CHECK-NEXT: vmov lr, r12, d3 -; CHECK-NEXT: cset r1, lo -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo +; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 ; CHECK-NEXT: vmov r2, r4, d7 ; CHECK-NEXT: subs.w r2, r2, lr ; CHECK-NEXT: sbcs.w r2, r4, r12 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: bfi r3, r2, #8, #8 ; CHECK-NEXT: vmov r2, r12, d0 ; CHECK-NEXT: vmsr p0, r3 @@ -582,16 +554,12 @@ define arm_aapcs_vfpcc void @umax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) ; CHECK-NEXT: subs r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 ; CHECK-NEXT: vmov r4, r3, d5 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: bfi r1, r2, #0, #8 ; CHECK-NEXT: vmov r2, r12, d1 ; CHECK-NEXT: subs r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: bfi r1, r2, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q2 @@ -747,29 +715,25 @@ define arm_aapcs_vfpcc <2 x i32> @smin2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: vmov r0, s6 ; CHECK-NEXT: vmov r1, s4 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 -; CHECK-NEXT: vmov lr, s2 ; CHECK-NEXT: asrs r2, r0, #31 ; CHECK-NEXT: asrs r3, r1, #31 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2 ; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vmov q0[2], q0[0], r3, lr -; CHECK-NEXT: asr.w r12, lr, #31 -; CHECK-NEXT: asrs r2, r3, #31 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 +; CHECK-NEXT: asr.w lr, r3, #31 ; CHECK-NEXT: subs r3, r3, r1 -; CHECK-NEXT: sbcs.w r1, r2, r1, asr #31 -; CHECK-NEXT: vmov q0[3], q0[1], r2, r12 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne -; CHECK-NEXT: bfi r2, r1, #0, #8 -; CHECK-NEXT: subs.w r1, lr, r0 +; CHECK-NEXT: sbcs.w r1, lr, r1, asr #31 +; CHECK-NEXT: mov.w r3, #0 +; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: asr.w r12, r2, #31 +; CHECK-NEXT: bfi r3, r1, #0, #8 +; CHECK-NEXT: subs r1, r2, r0 ; CHECK-NEXT: sbcs.w r0, r12, r0, asr #31 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: bfi r2, r0, #8, #8 -; CHECK-NEXT: vmsr p0, r2 +; CHECK-NEXT: vmov q0[3], q0[1], lr, r12 +; CHECK-NEXT: csetm r0, lt +; CHECK-NEXT: bfi r3, r0, #8, #8 +; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: pop {r7, pc} %c = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %a, <2 x i32> %b) @@ -834,17 +798,13 @@ define arm_aapcs_vfpcc <2 x i64> @smin2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lt +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: vmov r3, r2, d1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d3 ; CHECK-NEXT: subs r0, r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -866,17 +826,13 @@ define arm_aapcs_vfpcc void @smin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r1, r2, r12 ; CHECK-NEXT: vmov lr, r12, d7 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt +; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 ; CHECK-NEXT: vmov r2, r4, d3 ; CHECK-NEXT: subs.w r2, r2, lr ; CHECK-NEXT: sbcs.w r2, r4, r12 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r3, r2, #8, #8 ; CHECK-NEXT: vmov r2, r12, d4 ; CHECK-NEXT: vmsr p0, r3 @@ -886,16 +842,12 @@ define arm_aapcs_vfpcc void @smin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) ; CHECK-NEXT: subs r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 ; CHECK-NEXT: vmov r4, r3, d1 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r1, r2, #0, #8 ; CHECK-NEXT: vmov r2, r12, d5 ; CHECK-NEXT: subs r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r1, r2, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q2 @@ -1054,17 +1006,13 @@ define arm_aapcs_vfpcc <2 x i32> @umin2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lo +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: vmov r3, r2, d1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d3 ; CHECK-NEXT: subs r0, r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -1131,17 +1079,13 @@ define arm_aapcs_vfpcc <2 x i64> @umin2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lo +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: vmov r3, r2, d1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d3 ; CHECK-NEXT: subs r0, r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -1163,17 +1107,13 @@ define arm_aapcs_vfpcc void @umin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r1, r2, r12 ; CHECK-NEXT: vmov lr, r12, d7 -; CHECK-NEXT: cset r1, lo -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo +; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 ; CHECK-NEXT: vmov r2, r4, d3 ; CHECK-NEXT: subs.w r2, r2, lr ; CHECK-NEXT: sbcs.w r2, r4, r12 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: bfi r3, r2, #8, #8 ; CHECK-NEXT: vmov r2, r12, d4 ; CHECK-NEXT: vmsr p0, r3 @@ -1183,16 +1123,12 @@ define arm_aapcs_vfpcc void @umin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) ; CHECK-NEXT: subs r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 ; CHECK-NEXT: vmov r4, r3, d1 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: bfi r1, r2, #0, #8 ; CHECK-NEXT: vmov r2, r12, d5 ; CHECK-NEXT: subs r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: bfi r1, r2, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q2 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll index e919891c446e..ea7a26ee3a9e 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll @@ -79,14 +79,10 @@ define arm_aapcs_vfpcc <2 x i64> @sext_v2i1_v2i64(<2 x i64> %src) { ; CHECK-NEXT: vmov r2, r3, d0 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: sbcs.w r0, r12, r1 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: rsbs r1, r2, #0 ; CHECK-NEXT: sbcs.w r1, r12, r3 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 ; CHECK-NEXT: bx lr @@ -105,24 +101,22 @@ define arm_aapcs_vfpcc <2 x i64> @sext_v2i1_v2f64(<2 x double> %src) { ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI6_0 -; CHECK-NEXT: vmov r0, r1, d8 +; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: vmov r4, r5, d0 ; CHECK-NEXT: mov r2, r4 ; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpeq ; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: mov r2, r4 ; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpeq -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: clz r1, r6 -; CHECK-NEXT: lsrs r0, r0, #5 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: lsrs r1, r1, #5 -; CHECK-NEXT: csetm r1, ne -; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: csetm r1, eq +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csetm r0, eq +; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 +; CHECK-NEXT: vmov q0[3], q0[1], r0, r1 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, pc} ; CHECK-NEXT: .p2align 3 @@ -218,14 +212,10 @@ define arm_aapcs_vfpcc <2 x i64> @zext_v2i1_v2i64(<2 x i64> %src) { ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: sbcs.w r0, r12, r1 ; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: cset r0, ne ; CHECK-NEXT: rsbs r1, r2, #0 ; CHECK-NEXT: sbcs.w r1, r12, r3 ; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: cset r0, lt ; CHECK-NEXT: vmov s0, r0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 @@ -247,27 +237,24 @@ define arm_aapcs_vfpcc <2 x i64> @zext_v2i1_v2f64(<2 x double> %src) { ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI13_0 -; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: vmov r4, r5, d0 ; CHECK-NEXT: mov r2, r4 ; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpeq -; CHECK-NEXT: vmov r2, r1, d8 -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: vldr s17, .LCPI13_1 -; CHECK-NEXT: lsrs r0, r0, #5 -; CHECK-NEXT: cset r6, ne -; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpeq -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: vmov s18, r6 -; CHECK-NEXT: vmov.f32 s19, s17 -; CHECK-NEXT: lsrs r0, r0, #5 -; CHECK-NEXT: cset r0, ne -; CHECK-NEXT: vmov s16, r0 -; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: vldr s1, .LCPI13_1 +; CHECK-NEXT: cset r0, eq +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: cset r0, eq +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vmov.f32 s3, s1 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, pc} ; CHECK-NEXT: .p2align 3 @@ -472,21 +459,18 @@ define arm_aapcs_vfpcc <2 x double> @uitofp_v2i1_v2f64(<2 x i64> %src) { ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov r0, r1, d0 -; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: rsbs r0, r0, #0 -; CHECK-NEXT: sbcs.w r0, r12, r1 -; CHECK-NEXT: cset r4, lt -; CHECK-NEXT: rsbs r0, r2, #0 -; CHECK-NEXT: sbcs.w r0, r12, r3 +; CHECK-NEXT: sbcs.w r0, r4, r1 ; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: cset r0, ne ; CHECK-NEXT: bl __aeabi_ui2d -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 -; CHECK-NEXT: cset r2, ne +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: sbcs.w r2, r4, r3 +; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: bl __aeabi_ui2d ; CHECK-NEXT: vmov d8, r0, r1 @@ -506,21 +490,18 @@ define arm_aapcs_vfpcc <2 x double> @sitofp_v2i1_v2f64(<2 x i64> %src) { ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov r0, r1, d0 -; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: rsbs r0, r0, #0 -; CHECK-NEXT: sbcs.w r0, r12, r1 -; CHECK-NEXT: cset r4, lt -; CHECK-NEXT: rsbs r0, r2, #0 -; CHECK-NEXT: sbcs.w r0, r12, r3 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: sbcs.w r0, r4, r1 +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bl __aeabi_i2d -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: sbcs.w r2, r4, r3 +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: bl __aeabi_i2d ; CHECK-NEXT: vmov d8, r0, r1 diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll index 20112715a0a4..251b187e7bcf 100644 --- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -6,8 +6,8 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: .pad #12 +; CHECK-NEXT: sub sp, #12 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: beq.w .LBB0_8 ; CHECK-NEXT: @ %bb.1: @ %entry @@ -16,64 +16,57 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: @ %bb.2: ; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: mov r12, r0 -; CHECK-NEXT: mov r10, r1 -; CHECK-NEXT: mov r11, r2 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: mov r10, r2 ; CHECK-NEXT: b .LBB0_6 ; CHECK-NEXT: .LBB0_3: @ %vector.ph -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: bic r3, r3, #1 -; CHECK-NEXT: subs r7, r3, #2 +; CHECK-NEXT: bic r5, r3, #1 ; CHECK-NEXT: adr r4, .LCPI0_0 +; CHECK-NEXT: subs r7, r5, #2 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: movs r6, #1 +; CHECK-NEXT: add.w r3, r1, r5, lsl #2 ; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: adr r4, .LCPI0_1 ; CHECK-NEXT: add.w lr, r6, r7, lsr #1 -; CHECK-NEXT: str r3, [sp] @ 4-byte Spill -; CHECK-NEXT: add.w r11, r2, r3, lsl #2 -; CHECK-NEXT: add.w r10, r1, r3, lsl #2 -; CHECK-NEXT: add.w r12, r0, r3, lsl #2 +; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: str r5, [sp] @ 4-byte Spill +; CHECK-NEXT: add.w r10, r2, r5, lsl #2 +; CHECK-NEXT: add.w r12, r0, r5, lsl #2 ; CHECK-NEXT: vldrw.u32 q1, [r4] ; CHECK-NEXT: .LBB0_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrd r4, r5, [r0], #8 -; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: ldrd r4, r6, [r0], #8 +; CHECK-NEXT: movs r5, #0 ; CHECK-NEXT: ldrd r7, r8, [r1], #8 -; CHECK-NEXT: smull r8, r5, r8, r5 -; CHECK-NEXT: smull r4, r7, r7, r4 -; CHECK-NEXT: asrl r8, r5, #31 -; CHECK-NEXT: asrl r4, r7, #31 +; CHECK-NEXT: smull r4, r11, r7, r4 +; CHECK-NEXT: asrl r4, r11, #31 ; CHECK-NEXT: rsbs.w r9, r4, #-2147483648 -; CHECK-NEXT: vmov q2[2], q2[0], r4, r8 ; CHECK-NEXT: mov.w r9, #-1 -; CHECK-NEXT: sbcs.w r3, r9, r7 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: vmov q2[3], q2[1], r7, r5 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: bfi r6, r3, #0, #8 -; CHECK-NEXT: rsbs.w r3, r8, #-2147483648 -; CHECK-NEXT: sbcs.w r3, r9, r5 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: bfi r6, r3, #8, #8 -; CHECK-NEXT: vmsr p0, r6 +; CHECK-NEXT: sbcs.w r3, r9, r11 +; CHECK-NEXT: csetm r3, lt +; CHECK-NEXT: bfi r5, r3, #0, #8 +; CHECK-NEXT: smull r6, r3, r8, r6 +; CHECK-NEXT: asrl r6, r3, #31 +; CHECK-NEXT: rsbs.w r7, r6, #-2147483648 +; CHECK-NEXT: vmov q2[2], q2[0], r4, r6 +; CHECK-NEXT: sbcs.w r7, r9, r3 +; CHECK-NEXT: vmov q2[3], q2[1], r11, r3 +; CHECK-NEXT: csetm r7, lt ; CHECK-NEXT: mvn r6, #-2147483648 +; CHECK-NEXT: bfi r5, r7, #8, #8 +; CHECK-NEXT: vmsr p0, r5 ; CHECK-NEXT: vpsel q2, q2, q0 ; CHECK-NEXT: vmov r3, r4, d4 ; CHECK-NEXT: subs r3, r3, r6 ; CHECK-NEXT: sbcs r3, r4, #0 ; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #0, #8 ; CHECK-NEXT: vmov r3, r5, d5 ; CHECK-NEXT: subs r3, r3, r6 ; CHECK-NEXT: sbcs r3, r5, #0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #8, #8 ; CHECK-NEXT: vmsr p0, r4 ; CHECK-NEXT: vpsel q2, q2, q1 @@ -83,6 +76,7 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: le lr, .LBB0_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: ldrd r7, r3, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: cmp r7, r3 ; CHECK-NEXT: beq .LBB0_8 ; CHECK-NEXT: .LBB0_6: @ %for.body.preheader @@ -93,7 +87,7 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: .LBB0_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r3, [r12], #4 -; CHECK-NEXT: ldr r4, [r10], #4 +; CHECK-NEXT: ldr r4, [r6], #4 ; CHECK-NEXT: smull r4, r3, r4, r3 ; CHECK-NEXT: asrl r4, r3, #31 ; CHECK-NEXT: subs r5, r1, r4 @@ -105,10 +99,10 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: subs r5, r4, r2 ; CHECK-NEXT: sbcs r3, r3, #0 ; CHECK-NEXT: csel r3, r4, r2, lt -; CHECK-NEXT: str r3, [r11], #4 +; CHECK-NEXT: str r3, [r10], #4 ; CHECK-NEXT: le lr, .LBB0_7 ; CHECK-NEXT: .LBB0_8: @ %for.cond.cleanup -; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: add sp, #12 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.9: @@ -212,122 +206,108 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: beq.w .LBB1_8 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader -; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB1_3 ; CHECK-NEXT: @ %bb.2: ; CHECK-NEXT: mov r12, r0 -; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov r9, r5 ; CHECK-NEXT: mov r11, r2 ; CHECK-NEXT: b .LBB1_6 ; CHECK-NEXT: .LBB1_3: @ %vector.ph -; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: bic r3, r3, #3 -; CHECK-NEXT: subs r7, r3, #4 +; CHECK-NEXT: bic r1, r3, #3 ; CHECK-NEXT: adr r4, .LCPI1_0 +; CHECK-NEXT: subs r7, r1, #4 ; CHECK-NEXT: movs r6, #1 ; CHECK-NEXT: vldrw.u32 q0, [r4] -; CHECK-NEXT: add.w lr, r6, r7, lsr #2 -; CHECK-NEXT: add.w r7, r1, r3, lsl #2 -; CHECK-NEXT: strd r7, r3, [sp, #4] @ 8-byte Folded Spill ; CHECK-NEXT: adr r4, .LCPI1_1 -; CHECK-NEXT: add.w r11, r2, r3, lsl #2 -; CHECK-NEXT: add.w r12, r0, r3, lsl #2 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: add.w lr, r6, r7, lsr #2 +; CHECK-NEXT: str r1, [sp] @ 4-byte Spill +; CHECK-NEXT: add.w r11, r2, r1, lsl #2 +; CHECK-NEXT: add.w r9, r5, r1, lsl #2 +; CHECK-NEXT: add.w r12, r0, r1, lsl #2 ; CHECK-NEXT: vldrw.u32 q1, [r4] -; CHECK-NEXT: mov.w r9, #-1 ; CHECK-NEXT: .LBB1_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q4, [r5], #16 ; CHECK-NEXT: vldrw.u32 q3, [r0], #16 -; CHECK-NEXT: vldrw.u32 q4, [r1], #16 -; CHECK-NEXT: mov.w r3, #-1 -; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov.w r2, #-1 ; CHECK-NEXT: vmov.f32 s8, s14 -; CHECK-NEXT: mov.w r6, #-1 +; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: vmov.f32 s20, s18 +; CHECK-NEXT: mov.w r8, #0 ; CHECK-NEXT: vmov.f32 s10, s15 ; CHECK-NEXT: vmov.f32 s22, s19 ; CHECK-NEXT: vmullb.s32 q6, q5, q2 -; CHECK-NEXT: vmov.f32 s14, s13 +; CHECK-NEXT: vmov.f32 s18, s17 ; CHECK-NEXT: vmov r4, r7, d12 ; CHECK-NEXT: asrl r4, r7, #31 -; CHECK-NEXT: vmov.f32 s18, s17 +; CHECK-NEXT: vmov.f32 s14, s13 ; CHECK-NEXT: rsbs.w r5, r4, #-2147483648 -; CHECK-NEXT: sbcs.w r5, r3, r7 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csetm r5, ne +; CHECK-NEXT: sbcs.w r5, r2, r7 +; CHECK-NEXT: csetm r5, lt ; CHECK-NEXT: bfi r8, r5, #0, #8 ; CHECK-NEXT: vmov r10, r5, d13 ; CHECK-NEXT: asrl r10, r5, #31 +; CHECK-NEXT: vmov r6, s18 ; CHECK-NEXT: rsbs.w r3, r10, #-2147483648 ; CHECK-NEXT: vmov q2[2], q2[0], r4, r10 -; CHECK-NEXT: sbcs.w r3, r6, r5 +; CHECK-NEXT: sbcs.w r3, r2, r5 ; CHECK-NEXT: vmov q2[3], q2[1], r7, r5 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: mvn r10, #-2147483648 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov.w r6, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r8, r3, #8, #8 ; CHECK-NEXT: vmsr p0, r8 +; CHECK-NEXT: mvn r8, #-2147483648 ; CHECK-NEXT: vpsel q2, q2, q0 ; CHECK-NEXT: vmov r3, r4, d4 -; CHECK-NEXT: subs.w r3, r3, r10 +; CHECK-NEXT: subs.w r3, r3, r8 ; CHECK-NEXT: sbcs r3, r4, #0 ; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #0, #8 ; CHECK-NEXT: vmov r3, r5, d5 -; CHECK-NEXT: subs.w r3, r3, r10 +; CHECK-NEXT: subs.w r3, r3, r8 ; CHECK-NEXT: sbcs r3, r5, #0 -; CHECK-NEXT: vmov r5, s18 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: mov.w r5, #0 +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #8, #8 ; CHECK-NEXT: vmov r3, s12 ; CHECK-NEXT: vmsr p0, r4 ; CHECK-NEXT: vmov r4, s16 ; CHECK-NEXT: vpsel q2, q2, q1 -; CHECK-NEXT: smull r8, r7, r4, r3 -; CHECK-NEXT: asrl r8, r7, #31 -; CHECK-NEXT: rsbs.w r3, r8, #-2147483648 -; CHECK-NEXT: sbcs.w r3, r9, r7 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: bfi r6, r3, #0, #8 -; CHECK-NEXT: vmov r3, s14 -; CHECK-NEXT: smull r4, r5, r5, r3 -; CHECK-NEXT: asrl r4, r5, #31 +; CHECK-NEXT: smull r4, r7, r4, r3 +; CHECK-NEXT: asrl r4, r7, #31 ; CHECK-NEXT: rsbs.w r3, r4, #-2147483648 -; CHECK-NEXT: vmov q3[2], q3[0], r8, r4 -; CHECK-NEXT: sbcs.w r3, r9, r5 -; CHECK-NEXT: vmov q3[3], q3[1], r7, r5 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: bfi r6, r3, #8, #8 -; CHECK-NEXT: vmsr p0, r6 +; CHECK-NEXT: sbcs.w r3, r2, r7 +; CHECK-NEXT: csetm r3, lt +; CHECK-NEXT: bfi r5, r3, #0, #8 +; CHECK-NEXT: vmov r3, s14 +; CHECK-NEXT: smull r6, r3, r6, r3 +; CHECK-NEXT: asrl r6, r3, #31 +; CHECK-NEXT: rsbs.w r1, r6, #-2147483648 +; CHECK-NEXT: vmov q3[2], q3[0], r4, r6 +; CHECK-NEXT: sbcs.w r1, r2, r3 +; CHECK-NEXT: vmov q3[3], q3[1], r7, r3 +; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: bfi r5, r1, #8, #8 +; CHECK-NEXT: vmsr p0, r5 +; CHECK-NEXT: ldrd r5, r2, [sp, #8] @ 8-byte Folded Reload ; CHECK-NEXT: vpsel q3, q3, q0 -; CHECK-NEXT: vmov r3, r4, d6 -; CHECK-NEXT: subs.w r3, r3, r10 -; CHECK-NEXT: sbcs r3, r4, #0 -; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: bfi r4, r3, #0, #8 -; CHECK-NEXT: vmov r3, r5, d7 -; CHECK-NEXT: subs.w r3, r3, r10 -; CHECK-NEXT: sbcs r3, r5, #0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: bfi r4, r3, #8, #8 -; CHECK-NEXT: vmsr p0, r4 +; CHECK-NEXT: vmov r1, r3, d6 +; CHECK-NEXT: subs.w r1, r1, r8 +; CHECK-NEXT: sbcs r1, r3, #0 +; CHECK-NEXT: mov.w r3, #0 +; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: bfi r3, r1, #0, #8 +; CHECK-NEXT: vmov r1, r4, d7 +; CHECK-NEXT: subs.w r1, r1, r8 +; CHECK-NEXT: sbcs r1, r4, #0 +; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: bfi r3, r1, #8, #8 +; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q3, q3, q1 ; CHECK-NEXT: vmov.f32 s13, s14 ; CHECK-NEXT: vmov.f32 s14, s8 @@ -335,31 +315,30 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vstrb.8 q3, [r2], #16 ; CHECK-NEXT: le lr, .LBB1_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block -; CHECK-NEXT: ldrd r7, r3, [sp, #8] @ 8-byte Folded Reload -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: cmp r7, r3 +; CHECK-NEXT: ldrd r1, r3, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: cmp r1, r3 ; CHECK-NEXT: beq .LBB1_8 ; CHECK-NEXT: .LBB1_6: @ %for.body.preheader21 -; CHECK-NEXT: sub.w lr, r3, r7 -; CHECK-NEXT: mov.w r1, #-1 +; CHECK-NEXT: sub.w lr, r3, r1 +; CHECK-NEXT: mov.w r0, #-1 ; CHECK-NEXT: mov.w r3, #-2147483648 ; CHECK-NEXT: mvn r2, #-2147483648 ; CHECK-NEXT: .LBB1_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr r4, [r12], #4 -; CHECK-NEXT: ldr r5, [r0], #4 -; CHECK-NEXT: smull r4, r5, r5, r4 -; CHECK-NEXT: asrl r4, r5, #31 -; CHECK-NEXT: subs r6, r3, r4 -; CHECK-NEXT: sbcs.w r6, r1, r5 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: ldr r1, [r12], #4 +; CHECK-NEXT: ldr r4, [r9], #4 +; CHECK-NEXT: smull r4, r1, r4, r1 +; CHECK-NEXT: asrl r4, r1, #31 +; CHECK-NEXT: subs r5, r3, r4 +; CHECK-NEXT: sbcs.w r5, r0, r1 +; CHECK-NEXT: cset r5, lt +; CHECK-NEXT: cmp r5, #0 ; CHECK-NEXT: csel r4, r4, r3, ne -; CHECK-NEXT: csel r5, r5, r1, ne -; CHECK-NEXT: subs r6, r4, r2 -; CHECK-NEXT: sbcs r5, r5, #0 -; CHECK-NEXT: csel r4, r4, r2, lt -; CHECK-NEXT: str r4, [r11], #4 +; CHECK-NEXT: csel r1, r1, r0, ne +; CHECK-NEXT: subs r5, r4, r2 +; CHECK-NEXT: sbcs r1, r1, #0 +; CHECK-NEXT: csel r1, r4, r2, lt +; CHECK-NEXT: str r1, [r11], #4 ; CHECK-NEXT: le lr, .LBB1_7 ; CHECK-NEXT: .LBB1_8: @ %for.cond.cleanup ; CHECK-NEXT: add sp, #16 @@ -468,21 +447,21 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: beq.w .LBB2_3 ; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: adds r7, r3, #3 -; CHECK-NEXT: movs r6, #1 -; CHECK-NEXT: bic r7, r7, #3 +; CHECK-NEXT: adds r6, r3, #3 +; CHECK-NEXT: movs r5, #1 +; CHECK-NEXT: bic r6, r6, #3 ; CHECK-NEXT: adr r4, .LCPI2_1 -; CHECK-NEXT: subs r7, #4 -; CHECK-NEXT: adr r5, .LCPI2_2 +; CHECK-NEXT: subs r6, #4 ; CHECK-NEXT: vldrw.u32 q2, [r4] -; CHECK-NEXT: vldrw.u32 q3, [r5] -; CHECK-NEXT: add.w lr, r6, r7, lsr #2 -; CHECK-NEXT: adr r6, .LCPI2_0 -; CHECK-NEXT: subs r7, r3, #1 -; CHECK-NEXT: vldrw.u32 q0, [r6] ; CHECK-NEXT: mov.w r9, #0 -; CHECK-NEXT: vdup.32 q1, r7 ; CHECK-NEXT: mov.w r12, #-1 +; CHECK-NEXT: add.w lr, r5, r6, lsr #2 +; CHECK-NEXT: adr r5, .LCPI2_0 +; CHECK-NEXT: vldrw.u32 q0, [r5] +; CHECK-NEXT: adr r5, .LCPI2_2 +; CHECK-NEXT: subs r6, r3, #1 +; CHECK-NEXT: vldrw.u32 q3, [r5] +; CHECK-NEXT: vdup.32 q1, r6 ; CHECK-NEXT: mvn r8, #-2147483648 ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill ; CHECK-NEXT: .LBB2_2: @ %vector.body @@ -502,14 +481,12 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vmov.f32 s28, s26 ; CHECK-NEXT: vmov.f32 s30, s27 ; CHECK-NEXT: vmullb.s32 q0, q7, q4 -; CHECK-NEXT: vmov.f32 s22, s21 +; CHECK-NEXT: vmov.f32 s22, s25 ; CHECK-NEXT: vmov r10, r5, d0 ; CHECK-NEXT: asrl r10, r5, #31 ; CHECK-NEXT: rsbs.w r7, r10, #-2147483648 ; CHECK-NEXT: sbcs.w r7, r12, r5 -; CHECK-NEXT: cset r7, lt -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csetm r7, ne +; CHECK-NEXT: csetm r7, lt ; CHECK-NEXT: bfi r4, r7, #0, #8 ; CHECK-NEXT: vmov r6, r7, d1 ; CHECK-NEXT: asrl r6, r7, #31 @@ -517,72 +494,58 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vmov q0[2], q0[0], r10, r6 ; CHECK-NEXT: sbcs.w r3, r12, r7 ; CHECK-NEXT: vmov q0[3], q0[1], r5, r7 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt +; CHECK-NEXT: vmov r7, s22 ; CHECK-NEXT: bfi r4, r3, #8, #8 ; CHECK-NEXT: vmsr p0, r4 -; CHECK-NEXT: vpsel q4, q0, q2 -; CHECK-NEXT: vmov.f32 s2, s25 -; CHECK-NEXT: vmov r3, r4, d8 -; CHECK-NEXT: vmov r7, s2 +; CHECK-NEXT: vpsel q0, q0, q2 +; CHECK-NEXT: vmov r3, r4, d0 ; CHECK-NEXT: subs.w r3, r3, r8 ; CHECK-NEXT: sbcs r3, r4, #0 ; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #0, #8 -; CHECK-NEXT: vmov r3, r5, d9 +; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: subs.w r3, r3, r8 ; CHECK-NEXT: sbcs r3, r5, #0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #8, #8 ; CHECK-NEXT: vmov r3, s20 ; CHECK-NEXT: vmsr p0, r4 ; CHECK-NEXT: vmov r4, s24 -; CHECK-NEXT: vpsel q4, q4, q3 +; CHECK-NEXT: vpsel q4, q0, q3 +; CHECK-NEXT: vmov.f32 s2, s21 ; CHECK-NEXT: smull r10, r5, r4, r3 ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: asrl r10, r5, #31 ; CHECK-NEXT: rsbs.w r3, r10, #-2147483648 ; CHECK-NEXT: sbcs.w r3, r12, r5 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #0, #8 -; CHECK-NEXT: vmov r3, s22 -; CHECK-NEXT: smull r6, r7, r7, r3 -; CHECK-NEXT: asrl r6, r7, #31 -; CHECK-NEXT: rsbs.w r3, r6, #-2147483648 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: smull r6, r3, r7, r3 +; CHECK-NEXT: asrl r6, r3, #31 +; CHECK-NEXT: rsbs.w r7, r6, #-2147483648 ; CHECK-NEXT: vmov q0[2], q0[0], r10, r6 -; CHECK-NEXT: sbcs.w r3, r12, r7 -; CHECK-NEXT: vmov q0[3], q0[1], r5, r7 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: bfi r4, r3, #8, #8 +; CHECK-NEXT: sbcs.w r7, r12, r3 +; CHECK-NEXT: vmov q0[3], q0[1], r5, r3 +; CHECK-NEXT: csetm r7, lt +; CHECK-NEXT: bfi r4, r7, #8, #8 ; CHECK-NEXT: vmsr p0, r4 -; CHECK-NEXT: vpsel q5, q0, q2 -; CHECK-NEXT: vmov r3, r4, d10 +; CHECK-NEXT: vpsel q0, q0, q2 +; CHECK-NEXT: vmov r3, r4, d0 ; CHECK-NEXT: subs.w r3, r3, r8 ; CHECK-NEXT: sbcs r3, r4, #0 ; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #0, #8 -; CHECK-NEXT: vmov r3, r5, d11 +; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: subs.w r3, r3, r8 ; CHECK-NEXT: sbcs r3, r5, #0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #8, #8 ; CHECK-NEXT: vmsr p0, r4 -; CHECK-NEXT: vpsel q0, q5, q3 +; CHECK-NEXT: vpsel q0, q0, q3 ; CHECK-NEXT: vldr p0, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: vmov.f32 s1, s2 ; CHECK-NEXT: vmov.f32 s2, s16 @@ -693,9 +656,7 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: lsrl r4, r9, #31 ; CHECK-NEXT: subs.w r5, r4, #-1 ; CHECK-NEXT: sbcs r5, r9, #0 -; CHECK-NEXT: cset r5, lo -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csetm r5, ne +; CHECK-NEXT: csetm r5, lo ; CHECK-NEXT: bfi r8, r5, #0, #8 ; CHECK-NEXT: umull r6, r5, r3, r6 ; CHECK-NEXT: lsrl r6, r5, #31 @@ -703,9 +664,7 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vmov q1[2], q1[0], r4, r6 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: vmov q1[3], q1[1], r9, r5 -; CHECK-NEXT: cset r3, lo -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lo ; CHECK-NEXT: bfi r8, r3, #8, #8 ; CHECK-NEXT: vmsr p0, r8 ; CHECK-NEXT: vpsel q1, q1, q0 @@ -858,9 +817,7 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: subs.w r5, r4, #-1 ; CHECK-NEXT: vmullb.u32 q4, q3, q1 ; CHECK-NEXT: sbcs r5, r9, #0 -; CHECK-NEXT: cset r5, lo -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csetm r5, ne +; CHECK-NEXT: csetm r5, lo ; CHECK-NEXT: bfi r6, r5, #0, #8 ; CHECK-NEXT: vmov r8, r5, d11 ; CHECK-NEXT: lsrl r8, r5, #31 @@ -868,9 +825,7 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vmov q2[2], q2[0], r4, r8 ; CHECK-NEXT: sbcs r7, r5, #0 ; CHECK-NEXT: vmov q2[3], q2[1], r9, r5 -; CHECK-NEXT: cset r7, lo -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csetm r7, ne +; CHECK-NEXT: csetm r7, lo ; CHECK-NEXT: bfi r6, r7, #8, #8 ; CHECK-NEXT: vmov r4, r7, d8 ; CHECK-NEXT: lsrl r4, r7, #31 @@ -879,9 +834,7 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: mov.w r6, #0 ; CHECK-NEXT: sbcs r5, r7, #0 ; CHECK-NEXT: vpsel q2, q2, q0 -; CHECK-NEXT: cset r5, lo -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csetm r5, ne +; CHECK-NEXT: csetm r5, lo ; CHECK-NEXT: bfi r6, r5, #0, #8 ; CHECK-NEXT: vmov r2, r5, d9 ; CHECK-NEXT: lsrl r2, r5, #31 @@ -889,9 +842,7 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vmov q1[2], q1[0], r4, r2 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: vmov q1[3], q1[1], r7, r5 -; CHECK-NEXT: cset r3, lo -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lo ; CHECK-NEXT: bfi r6, r3, #8, #8 ; CHECK-NEXT: vmsr p0, r6 ; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload diff --git a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll index 10ecdc01918d..bbc0ff9bd1be 100644 --- a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll +++ b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll @@ -36,21 +36,19 @@ define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: vmov r0, r2, d2 -; CHECK-NEXT: vmov r3, r1, d0 -; CHECK-NEXT: adds.w r12, r3, r0 +; CHECK-NEXT: vmov r0, r1, d2 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: adds.w r12, r2, r0 ; CHECK-NEXT: vmov r0, r4, d1 -; CHECK-NEXT: adc.w lr, r1, r2 -; CHECK-NEXT: subs.w r3, r12, r3 -; CHECK-NEXT: sbcs.w r1, lr, r1 -; CHECK-NEXT: cset r1, lt +; CHECK-NEXT: adc.w lr, r3, r1 +; CHECK-NEXT: subs.w r2, r12, r2 +; CHECK-NEXT: sbcs.w r2, lr, r3 +; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: cset r1, ne -; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: it mi -; CHECK-NEXT: eormi r1, r1, #1 +; CHECK-NEXT: eormi r2, r2, #1 +; CHECK-NEXT: rsbs r1, r2, #0 ; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: bfi r2, r1, #0, #8 ; CHECK-NEXT: vmov r1, r3, d3 ; CHECK-NEXT: adds r1, r1, r0 @@ -59,14 +57,12 @@ define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) ; CHECK-NEXT: sbcs.w r0, r5, r4 ; CHECK-NEXT: vmov q0[2], q0[0], r12, r1 ; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: asr.w r1, lr, #31 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vmov q0[3], q0[1], lr, r5 -; CHECK-NEXT: cset r0, ne ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it mi ; CHECK-NEXT: eormi r0, r0, #1 +; CHECK-NEXT: asr.w r1, lr, #31 ; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: vmov q0[3], q0[1], lr, r5 ; CHECK-NEXT: bfi r2, r0, #8, #8 ; CHECK-NEXT: asrs r0, r5, #31 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 @@ -122,32 +118,28 @@ entry: define arm_aapcs_vfpcc <2 x i64> @uadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: uadd_int64_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: vmov r0, r1, d3 ; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: adds.w lr, r2, r0 -; CHECK-NEXT: vmov r0, r4, d0 -; CHECK-NEXT: adc.w r12, r3, r1 -; CHECK-NEXT: subs.w r2, lr, r2 -; CHECK-NEXT: sbcs.w r2, r12, r3 -; CHECK-NEXT: vmov r3, r1, d2 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne -; CHECK-NEXT: adds r3, r3, r0 -; CHECK-NEXT: adcs r1, r4 -; CHECK-NEXT: subs r0, r3, r0 -; CHECK-NEXT: sbcs.w r0, r1, r4 -; CHECK-NEXT: vmov q1[2], q1[0], r3, lr -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: vmov q1[3], q1[1], r1, r12 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r2 -; CHECK-NEXT: vmov q0[3], q0[1], r0, r2 +; CHECK-NEXT: adds r5, r2, r0 +; CHECK-NEXT: adc.w lr, r3, r1 +; CHECK-NEXT: subs r2, r5, r2 +; CHECK-NEXT: sbcs.w r2, lr, r3 +; CHECK-NEXT: vmov r3, r12, d2 +; CHECK-NEXT: vmov r1, r4, d0 +; CHECK-NEXT: csetm r2, lo +; CHECK-NEXT: adds r3, r3, r1 +; CHECK-NEXT: adc.w r0, r4, r12 +; CHECK-NEXT: subs r1, r3, r1 +; CHECK-NEXT: sbcs.w r1, r0, r4 +; CHECK-NEXT: vmov q1[2], q1[0], r3, r5 +; CHECK-NEXT: csetm r1, lo +; CHECK-NEXT: vmov q1[3], q1[1], r0, lr +; CHECK-NEXT: vmov q0[2], q0[0], r1, r2 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r2 ; CHECK-NEXT: vorr q0, q1, q0 -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %0 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2) ret <2 x i64> %0 @@ -187,55 +179,47 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ssub_int64_t(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: ssub_int64_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: vmov r1, r3, d2 -; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: rsbs r2, r1, #0 -; CHECK-NEXT: sbcs.w r2, r0, r3 -; CHECK-NEXT: vmov r2, r4, d0 -; CHECK-NEXT: cset lr, lt -; CHECK-NEXT: subs.w r12, r2, r1 -; CHECK-NEXT: sbc.w r5, r4, r3 -; CHECK-NEXT: subs.w r2, r12, r2 -; CHECK-NEXT: sbcs.w r2, r5, r4 -; CHECK-NEXT: vmov r3, r4, d3 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: cset r2, ne -; CHECK-NEXT: cmp.w lr, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: eorne r2, r2, #1 +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: vmov r2, r3, d2 +; CHECK-NEXT: vmov r1, r0, d0 +; CHECK-NEXT: vmov r4, r5, d1 +; CHECK-NEXT: subs.w r12, r1, r2 +; CHECK-NEXT: sbc.w lr, r0, r3 +; CHECK-NEXT: subs.w r1, r12, r1 +; CHECK-NEXT: sbcs.w r0, lr, r0 +; CHECK-NEXT: mov.w r1, #0 +; CHECK-NEXT: cset r0, lt ; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: rsbs r1, r3, #0 -; CHECK-NEXT: sbcs.w r1, r0, r4 -; CHECK-NEXT: bfi r0, r2, #0, #8 -; CHECK-NEXT: vmov r2, r1, d1 -; CHECK-NEXT: cset lr, lt -; CHECK-NEXT: subs r3, r2, r3 -; CHECK-NEXT: sbc.w r4, r1, r4 -; CHECK-NEXT: subs r2, r3, r2 -; CHECK-NEXT: sbcs.w r1, r4, r1 -; CHECK-NEXT: vmov q0[2], q0[0], r12, r3 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: vmov q0[3], q0[1], r5, r4 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: cset r1, ne -; CHECK-NEXT: cmp.w lr, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: eorne r1, r1, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r0, r1, #8, #8 -; CHECK-NEXT: asrs r1, r5, #31 -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: asrs r0, r4, #31 +; CHECK-NEXT: sbcs.w r2, r1, r3 +; CHECK-NEXT: it lt +; CHECK-NEXT: eorlt r0, r0, #1 +; CHECK-NEXT: vmov r2, r3, d3 +; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: subs r6, r4, r2 +; CHECK-NEXT: sbc.w r7, r5, r3 +; CHECK-NEXT: subs r4, r6, r4 +; CHECK-NEXT: sbcs.w r4, r7, r5 +; CHECK-NEXT: vmov q0[2], q0[0], r12, r6 +; CHECK-NEXT: cset r4, lt +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: sbcs.w r2, r1, r3 +; CHECK-NEXT: bfi r1, r0, #0, #8 +; CHECK-NEXT: it lt +; CHECK-NEXT: eorlt r4, r4, #1 +; CHECK-NEXT: rsbs r0, r4, #0 +; CHECK-NEXT: bfi r1, r0, #8, #8 +; CHECK-NEXT: asrs r0, r7, #31 +; CHECK-NEXT: vmsr p0, r1 +; CHECK-NEXT: asr.w r1, lr, #31 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 +; CHECK-NEXT: vmov q0[3], q0[1], lr, r7 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0 ; CHECK-NEXT: adr r0, .LCPI11_0 ; CHECK-NEXT: vldrw.u32 q2, [r0] ; CHECK-NEXT: veor q1, q1, q2 ; CHECK-NEXT: vpsel q0, q1, q0 -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI11_0: @@ -281,32 +265,28 @@ entry: define arm_aapcs_vfpcc <2 x i64> @usub_int64_t(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: usub_int64_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: vmov r0, r1, d3 ; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: subs.w lr, r2, r0 -; CHECK-NEXT: vmov r0, r4, d0 -; CHECK-NEXT: sbc.w r12, r3, r1 -; CHECK-NEXT: subs.w r2, r2, lr -; CHECK-NEXT: sbcs.w r2, r3, r12 -; CHECK-NEXT: vmov r3, r1, d2 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne -; CHECK-NEXT: subs r3, r0, r3 -; CHECK-NEXT: sbc.w r1, r4, r1 -; CHECK-NEXT: subs r0, r0, r3 -; CHECK-NEXT: sbcs.w r0, r4, r1 -; CHECK-NEXT: vmov q1[2], q1[0], r3, lr -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: vmov q1[3], q1[1], r1, r12 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r2 -; CHECK-NEXT: vmov q0[3], q0[1], r0, r2 +; CHECK-NEXT: subs r5, r2, r0 +; CHECK-NEXT: sbc.w lr, r3, r1 +; CHECK-NEXT: subs r2, r2, r5 +; CHECK-NEXT: sbcs.w r2, r3, lr +; CHECK-NEXT: vmov r3, r12, d2 +; CHECK-NEXT: vmov r1, r4, d0 +; CHECK-NEXT: csetm r2, lo +; CHECK-NEXT: subs r3, r1, r3 +; CHECK-NEXT: sbc.w r0, r4, r12 +; CHECK-NEXT: subs r1, r1, r3 +; CHECK-NEXT: sbcs.w r1, r4, r0 +; CHECK-NEXT: vmov q1[2], q1[0], r3, r5 +; CHECK-NEXT: csetm r1, lo +; CHECK-NEXT: vmov q1[3], q1[1], r0, lr +; CHECK-NEXT: vmov q0[2], q0[0], r1, r2 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r2 ; CHECK-NEXT: vbic q0, q1, q0 -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %0 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2) ret <2 x i64> %0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmp.ll b/llvm/test/CodeGen/Thumb2/mve-vcmp.ll index fae8e393ea94..f8e0a493b403 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmp.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmp.ll @@ -399,17 +399,13 @@ define arm_aapcs_vfpcc <2 x i64> @vcmp_slt_v2i64(<2 x i64> %src, <2 x i64> %srcb ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lt +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: vmov r3, r2, d1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d3 ; CHECK-NEXT: subs r0, r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q2, q3 @@ -470,8 +466,6 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, < ; CHECK-NEXT: sbcs.w r2, r12, r3, asr #31 ; CHECK-NEXT: vmov r3, s4 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: cset r2, ne ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: cset r1, ne ; CHECK-NEXT: cmp r3, #0 @@ -487,8 +481,6 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, < ; CHECK-NEXT: sbcs.w r2, r12, r3, asr #31 ; CHECK-NEXT: vmov r3, s6 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: cset r2, ne ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: cset r1, ne ; CHECK-NEXT: cmp r3, #0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll index d5cddc6fcfeb..5802b0073f29 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll @@ -279,25 +279,17 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_une_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_une_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s5 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s7 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s6 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r3, ne -; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 -; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 -; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -1122,69 +1114,53 @@ entry: define arm_aapcs_vfpcc <8 x half> @vcmp_une_v8f16(<8 x half> %src, <8 x half> %src2, <8 x half> %a, <8 x half> %b) { ; CHECK-MVE-LABEL: vcmp_une_v8f16: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: .vsave {d8, d9} -; CHECK-MVE-NEXT: vpush {d8, d9} +; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} ; CHECK-MVE-NEXT: vmovx.f16 s16, s4 ; CHECK-MVE-NEXT: vmovx.f16 s18, s0 ; CHECK-MVE-NEXT: vcmp.f16 s18, s16 -; CHECK-MVE-NEXT: vmovx.f16 s16, s8 +; CHECK-MVE-NEXT: vmovx.f16 s20, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmovx.f16 s22, s12 ; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmovx.f16 s18, s12 ; CHECK-MVE-NEXT: vmovx.f16 s4, s5 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vseleq.f16 s16, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s0, s12, s8 ; CHECK-MVE-NEXT: vmovx.f16 s8, s1 ; CHECK-MVE-NEXT: vcmp.f16 s8, s4 -; CHECK-MVE-NEXT: vmovx.f16 s4, s9 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s13 -; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: vins.f16 s0, s16 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 +; CHECK-MVE-NEXT: vmovx.f16 s12, s9 +; CHECK-MVE-NEXT: vmovx.f16 s16, s13 ; CHECK-MVE-NEXT: vmovx.f16 s8, s2 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 +; CHECK-MVE-NEXT: vseleq.f16 s4, s16, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmovx.f16 s12, s10 ; CHECK-MVE-NEXT: vseleq.f16 s1, s13, s9 ; CHECK-MVE-NEXT: vins.f16 s1, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s6 ; CHECK-MVE-NEXT: vcmp.f16 s8, s4 -; CHECK-MVE-NEXT: vmovx.f16 s4, s10 +; CHECK-MVE-NEXT: vmovx.f16 s8, s11 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s8, s14 ; CHECK-MVE-NEXT: vmovx.f16 s6, s3 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4 +; CHECK-MVE-NEXT: vseleq.f16 s4, s5, s12 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s2, s14, s10 +; CHECK-MVE-NEXT: vmovx.f16 s10, s15 ; CHECK-MVE-NEXT: vins.f16 s2, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s7 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmovx.f16 s4, s11 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s6, s15 ; CHECK-MVE-NEXT: vcmp.f16 s3, s7 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s6, s4 +; CHECK-MVE-NEXT: vseleq.f16 s4, s10, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s3, s15, s11 ; CHECK-MVE-NEXT: vins.f16 s3, s4 -; CHECK-MVE-NEXT: vpop {d8, d9} +; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: vcmp_une_v8f16: diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll index 37225a44b365..de6e85a8f588 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll @@ -300,25 +300,17 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_une_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_une_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s4 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r3, ne -; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 -; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 -; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -1137,59 +1129,43 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_une_v8f16(<8 x half> %src, half %src2, < ; CHECK-MVE-LABEL: vcmp_une_v8f16: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vmovx.f16 s6, s0 -; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: vmovx.f16 s5, s8 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmovx.f16 s6, s8 +; CHECK-MVE-NEXT: vmovx.f16 s7, s12 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 +; CHECK-MVE-NEXT: vseleq.f16 s6, s7, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s0, s12, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s13 +; CHECK-MVE-NEXT: vmovx.f16 s8, s9 ; CHECK-MVE-NEXT: vins.f16 s0, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 +; CHECK-MVE-NEXT: vmovx.f16 s12, s13 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s1, s4 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s6, s8, s6 +; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s14 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s8, s10 +; CHECK-MVE-NEXT: vmovx.f16 s12, s14 ; CHECK-MVE-NEXT: vseleq.f16 s1, s13, s9 ; CHECK-MVE-NEXT: vins.f16 s1, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s2, s4 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s6, s8, s6 +; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s15 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s8, s11 ; CHECK-MVE-NEXT: vseleq.f16 s2, s14, s10 +; CHECK-MVE-NEXT: vmovx.f16 s10, s15 ; CHECK-MVE-NEXT: vins.f16 s2, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s3 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmovx.f16 s6, s11 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s3, s4 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s6, s8, s6 +; CHECK-MVE-NEXT: vseleq.f16 s6, s10, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s3, s15, s11 ; CHECK-MVE-NEXT: vins.f16 s3, s6 ; CHECK-MVE-NEXT: bx lr @@ -1961,25 +1937,17 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_une_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_une_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s4, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s4, s0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s3 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s2 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r3, ne -; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 -; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 -; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -2798,59 +2766,43 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_une_v8f16(<8 x half> %src, half %src2, ; CHECK-MVE-LABEL: vcmp_r_une_v8f16: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vmovx.f16 s6, s0 -; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: vmovx.f16 s5, s8 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmovx.f16 s6, s8 +; CHECK-MVE-NEXT: vmovx.f16 s7, s12 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 +; CHECK-MVE-NEXT: vseleq.f16 s6, s7, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s0, s12, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s13 +; CHECK-MVE-NEXT: vmovx.f16 s8, s9 ; CHECK-MVE-NEXT: vins.f16 s0, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 +; CHECK-MVE-NEXT: vmovx.f16 s12, s13 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s4, s1 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s6, s8, s6 +; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s14 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s8, s10 +; CHECK-MVE-NEXT: vmovx.f16 s12, s14 ; CHECK-MVE-NEXT: vseleq.f16 s1, s13, s9 ; CHECK-MVE-NEXT: vins.f16 s1, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s4, s2 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s6, s8, s6 +; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s15 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s8, s11 ; CHECK-MVE-NEXT: vseleq.f16 s2, s14, s10 +; CHECK-MVE-NEXT: vmovx.f16 s10, s15 ; CHECK-MVE-NEXT: vins.f16 s2, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s3 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmovx.f16 s6, s11 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s4, s3 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s6, s8, s6 +; CHECK-MVE-NEXT: vseleq.f16 s6, s10, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s3, s15, s11 ; CHECK-MVE-NEXT: vins.f16 s3, s6 ; CHECK-MVE-NEXT: bx lr diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll index 722a5313b1d6..809bf664fc95 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll @@ -279,25 +279,17 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_une_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_une_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r3, ne -; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 -; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 -; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -1074,59 +1066,43 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_une_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-LABEL: vcmp_une_v8f16: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 -; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: vmovx.f16 s14, s4 ; CHECK-MVE-NEXT: vcmp.f16 s12, #0 -; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmovx.f16 s13, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vseleq.f16 s12, s13, s14 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s0, s8, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vmovx.f16 s4, s5 +; CHECK-MVE-NEXT: vins.f16 s0, s12 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s9 +; CHECK-MVE-NEXT: vmovx.f16 s8, s5 +; CHECK-MVE-NEXT: vmovx.f16 s12, s9 ; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vins.f16 s0, s12 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4 +; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s10 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s8, s6 +; CHECK-MVE-NEXT: vmovx.f16 s12, s10 ; CHECK-MVE-NEXT: vseleq.f16 s1, s9, s5 ; CHECK-MVE-NEXT: vins.f16 s1, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vmovx.f16 s4, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s2, #0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4 +; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s8, s11 ; CHECK-MVE-NEXT: vseleq.f16 s2, s10, s6 -; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmovx.f16 s6, s7 ; CHECK-MVE-NEXT: vins.f16 s2, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s3 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s3, #0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s6, s4 +; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s3, s11, s7 ; CHECK-MVE-NEXT: vins.f16 s3, s4 ; CHECK-MVE-NEXT: bx lr @@ -1856,25 +1832,17 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_une_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_une_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r3, ne -; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 -; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 -; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -2651,59 +2619,43 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_une_v8f16(<8 x half> %src, <8 x half> ; CHECK-MVE-LABEL: vcmp_r_une_v8f16: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 -; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: vmovx.f16 s14, s4 ; CHECK-MVE-NEXT: vcmp.f16 s12, #0 -; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmovx.f16 s13, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vseleq.f16 s12, s13, s14 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s0, s8, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vmovx.f16 s4, s5 +; CHECK-MVE-NEXT: vins.f16 s0, s12 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s9 +; CHECK-MVE-NEXT: vmovx.f16 s8, s5 +; CHECK-MVE-NEXT: vmovx.f16 s12, s9 ; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vins.f16 s0, s12 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4 +; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s10 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s8, s6 +; CHECK-MVE-NEXT: vmovx.f16 s12, s10 ; CHECK-MVE-NEXT: vseleq.f16 s1, s9, s5 ; CHECK-MVE-NEXT: vins.f16 s1, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vmovx.f16 s4, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s2, #0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4 +; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s8, s11 ; CHECK-MVE-NEXT: vseleq.f16 s2, s10, s6 -; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmovx.f16 s6, s7 ; CHECK-MVE-NEXT: vins.f16 s2, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s3 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s3, #0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s6, s4 +; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s3, s11, s7 ; CHECK-MVE-NEXT: vins.f16 s3, s4 ; CHECK-MVE-NEXT: bx lr diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll index 9b765e8ac938..707290f4f66c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll @@ -507,8 +507,6 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, < ; CHECK-NEXT: sbcs.w r2, r12, r3, asr #31 ; CHECK-NEXT: vmov r3, s4 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: cset r2, ne ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: cset r1, ne ; CHECK-NEXT: cmp r3, #0 @@ -524,8 +522,6 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, < ; CHECK-NEXT: sbcs.w r2, r12, r3, asr #31 ; CHECK-NEXT: vmov r3, s6 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: cset r2, ne ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: cset r1, ne ; CHECK-NEXT: cmp r3, #0 @@ -1056,8 +1052,6 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_r_multi_v2i32(<2 x i64> %a, <2 x i32> %b, ; CHECK-NEXT: sbcs.w r2, r12, r3, asr #31 ; CHECK-NEXT: vmov r3, s4 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: cset r2, ne ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: cset r1, ne ; CHECK-NEXT: cmp r3, #0 @@ -1073,8 +1067,6 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_r_multi_v2i32(<2 x i64> %a, <2 x i32> %b, ; CHECK-NEXT: sbcs.w r2, r12, r3, asr #31 ; CHECK-NEXT: vmov r3, s6 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: cset r2, ne ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: cset r1, ne ; CHECK-NEXT: cmp r3, #0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vqmovn.ll b/llvm/test/CodeGen/Thumb2/mve-vqmovn.ll index a3b1cc0a24a8..75f7350fcd5b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vqmovn.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vqmovn.ll @@ -169,17 +169,13 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_smaxmin(<2 x i64> %s0) { ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: subs.w r0, r0, r12 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: bfi r3, r1, #0, #8 ; CHECK-NEXT: vmov r1, r2, d1 ; CHECK-NEXT: subs.w r1, r1, r12 ; CHECK-NEXT: sbcs r1, r2, #0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r3, r1, #8, #8 ; CHECK-NEXT: adr r1, .LCPI12_0 ; CHECK-NEXT: vldrw.u32 q1, [r1] @@ -189,16 +185,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_smaxmin(<2 x i64> %s0) { ; CHECK-NEXT: vmov r1, r2, d0 ; CHECK-NEXT: rsbs.w r1, r1, #-2147483648 ; CHECK-NEXT: sbcs.w r1, r3, r2 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #0, #8 ; CHECK-NEXT: vmov r1, r2, d1 ; CHECK-NEXT: rsbs.w r1, r1, #-2147483648 ; CHECK-NEXT: sbcs.w r1, r3, r2 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: adr r0, .LCPI12_1 @@ -233,17 +225,13 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_sminmax(<2 x i64> %s0) { ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: rsbs.w r0, r0, #-2147483648 ; CHECK-NEXT: sbcs.w r0, r12, r1 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: bfi r3, r1, #0, #8 ; CHECK-NEXT: vmov r1, r2, d1 ; CHECK-NEXT: rsbs.w r1, r1, #-2147483648 ; CHECK-NEXT: sbcs.w r1, r12, r2 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r3, r1, #8, #8 ; CHECK-NEXT: adr r1, .LCPI13_0 ; CHECK-NEXT: vldrw.u32 q1, [r1] @@ -253,16 +241,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_sminmax(<2 x i64> %s0) { ; CHECK-NEXT: vmov r1, r2, d0 ; CHECK-NEXT: subs r1, r1, r3 ; CHECK-NEXT: sbcs r1, r2, #0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #0, #8 ; CHECK-NEXT: vmov r1, r2, d1 ; CHECK-NEXT: subs r1, r1, r3 ; CHECK-NEXT: sbcs r1, r2, #0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: adr r0, .LCPI13_1 @@ -297,16 +281,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_umaxmin(<2 x i64> %s0) { ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r2, d1 ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r2, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -325,16 +305,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_uminmax(<2 x i64> %s0) { ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r2, d1 ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r2, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 diff --git a/llvm/test/CodeGen/Thumb2/mve-vqshrn.ll b/llvm/test/CodeGen/Thumb2/mve-vqshrn.ll index 1220ca2f6070..f78d36222c31 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vqshrn.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vqshrn.ll @@ -182,49 +182,41 @@ define arm_aapcs_vfpcc <2 x i64> @vqshrni64_smaxmin(<2 x i64> %so) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: vmov r2, r1, d1 +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: mvn r12, #-2147483648 -; CHECK-NEXT: vmov r0, r3, d0 -; CHECK-NEXT: asrl r2, r1, #3 -; CHECK-NEXT: asrl r0, r3, #3 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r2 -; CHECK-NEXT: subs.w r0, r0, r12 -; CHECK-NEXT: sbcs r0, r3, #0 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: asrl r0, r1, #3 +; CHECK-NEXT: asrl r2, r3, #3 +; CHECK-NEXT: vmov q0[2], q0[0], r2, r0 +; CHECK-NEXT: subs.w r2, r2, r12 +; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r1 -; CHECK-NEXT: cset r0, lt +; CHECK-NEXT: csetm lr, lt +; CHECK-NEXT: subs.w r0, r0, r12 +; CHECK-NEXT: mov.w r2, #0 +; CHECK-NEXT: sbcs r0, r1, #0 +; CHECK-NEXT: bfi r2, lr, #0, #8 +; CHECK-NEXT: csetm r0, lt +; CHECK-NEXT: bfi r2, r0, #8, #8 +; CHECK-NEXT: adr r0, .LCPI12_0 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmsr p0, r2 +; CHECK-NEXT: mov.w r2, #-1 ; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: csetm lr, ne -; CHECK-NEXT: subs.w r2, r2, r12 -; CHECK-NEXT: sbcs r1, r1, #0 -; CHECK-NEXT: bfi r3, lr, #0, #8 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne -; CHECK-NEXT: bfi r3, r1, #8, #8 -; CHECK-NEXT: adr r1, .LCPI12_0 -; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: vmsr p0, r3 -; CHECK-NEXT: mov.w r3, #-1 ; CHECK-NEXT: vpsel q0, q0, q1 -; CHECK-NEXT: vmov r1, r2, d0 -; CHECK-NEXT: rsbs.w r1, r1, #-2147483648 -; CHECK-NEXT: sbcs.w r1, r3, r2 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne -; CHECK-NEXT: bfi r0, r1, #0, #8 -; CHECK-NEXT: vmov r1, r2, d1 -; CHECK-NEXT: rsbs.w r1, r1, #-2147483648 -; CHECK-NEXT: sbcs.w r1, r3, r2 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne -; CHECK-NEXT: bfi r0, r1, #8, #8 -; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: rsbs.w r0, r0, #-2147483648 +; CHECK-NEXT: sbcs.w r0, r2, r1 +; CHECK-NEXT: csetm r0, lt +; CHECK-NEXT: bfi r3, r0, #0, #8 +; CHECK-NEXT: vmov r0, r1, d1 +; CHECK-NEXT: rsbs.w r0, r0, #-2147483648 +; CHECK-NEXT: sbcs.w r0, r2, r1 +; CHECK-NEXT: csetm r0, lt +; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: adr r0, .LCPI12_1 ; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: pop {r7, pc} ; CHECK-NEXT: .p2align 4 @@ -251,53 +243,45 @@ entry: define arm_aapcs_vfpcc <2 x i64> @vqshrni64_sminmax(<2 x i64> %so) { ; CHECK-LABEL: vqshrni64_sminmax: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: vmov r2, r1, d0 ; CHECK-NEXT: mov.w r12, #-1 ; CHECK-NEXT: asrl r2, r1, #3 -; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: vmov r4, r5, d1 ; CHECK-NEXT: rsbs.w r0, r2, #-2147483648 +; CHECK-NEXT: asrl r4, r5, #3 ; CHECK-NEXT: sbcs.w r0, r12, r1 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: bfi r0, r3, #0, #8 -; CHECK-NEXT: vmov r4, r3, d1 -; CHECK-NEXT: asrl r4, r3, #3 -; CHECK-NEXT: rsbs.w r5, r4, #-2147483648 +; CHECK-NEXT: mov.w r3, #0 +; CHECK-NEXT: csetm lr, lt +; CHECK-NEXT: rsbs.w r0, r4, #-2147483648 +; CHECK-NEXT: sbcs.w r0, r12, r5 +; CHECK-NEXT: bfi r3, lr, #0, #8 +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: vmov q0[2], q0[0], r2, r4 -; CHECK-NEXT: sbcs.w r5, r12, r3 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: mvn r2, #-2147483648 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csetm r5, ne -; CHECK-NEXT: bfi r0, r5, #8, #8 -; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: adr r0, .LCPI13_0 ; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmsr p0, r3 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 +; CHECK-NEXT: mvn r2, #-2147483648 ; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: movs r6, #0 ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: subs r0, r0, r2 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: bfi lr, r0, #0, #8 +; CHECK-NEXT: csetm r0, lt +; CHECK-NEXT: bfi r6, r0, #0, #8 ; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: subs r0, r0, r2 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: bfi lr, r0, #8, #8 +; CHECK-NEXT: csetm r0, lt +; CHECK-NEXT: bfi r6, r0, #8, #8 ; CHECK-NEXT: adr r0, .LCPI13_1 ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vmsr p0, lr +; CHECK-NEXT: vmsr p0, r6 ; CHECK-NEXT: vpsel q0, q0, q1 -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: pop {r4, r5, r6, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI13_0: @@ -331,16 +315,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqshrni64_umaxmin(<2 x i64> %so) { ; CHECK-NEXT: subs.w r2, r2, #-1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r1 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: subs.w r0, r0, #-1 +; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -364,16 +344,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqshrni64_uminmax(<2 x i64> %so) { ; CHECK-NEXT: subs.w r2, r2, #-1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r1 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: subs.w r0, r0, #-1 +; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q0, q0, q1 From 7171af744543433ac75b232eb7dfdaef7efd4d7a Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 27 Dec 2021 07:35:35 -0800 Subject: [PATCH 085/992] [SLP][NFC]Add a test for shuffled entries with different vector sizes, NFC. --- .../X86/shuffled-gathers-diff-size.ll | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/shuffled-gathers-diff-size.ll diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shuffled-gathers-diff-size.ll b/llvm/test/Transforms/SLPVectorizer/X86/shuffled-gathers-diff-size.ll new file mode 100644 index 000000000000..5e70a05ee5b2 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/shuffled-gathers-diff-size.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-unknown-linux -slp-threshold=-2 | FileCheck %s + +define void @foo(i32* noalias nocapture writeonly %B, i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %C, i32 %n, i32 %m) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], [[N:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[C:%.*]], align 4 +; CHECK-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP1]], [[M:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL2]], [[MUL]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[MUL4:%.*]] = mul nsw i32 [[ADD]], [[TMP2]] +; CHECK-NEXT: store i32 [[MUL4]], i32* [[B:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP3]], [[M]] +; CHECK-NEXT: [[ADD10:%.*]] = add nsw i32 [[MUL9]], [[MUL]] +; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 2 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX11]], align 4 +; CHECK-NEXT: [[MUL12:%.*]] = mul nsw i32 [[ADD10]], [[TMP4]] +; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 1 +; CHECK-NEXT: store i32 [[MUL12]], i32* [[ARRAYIDX13]], align 4 +; CHECK-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP2]], [[N]] +; CHECK-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP4]], [[M]] +; CHECK-NEXT: [[ADD18:%.*]] = add nsw i32 [[MUL17]], [[MUL15]] +; CHECK-NEXT: [[MUL20:%.*]] = mul nsw i32 [[ADD18]], [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 +; CHECK-NEXT: store i32 [[MUL20]], i32* [[ARRAYIDX21]], align 4 +; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 3 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX24]], align 4 +; CHECK-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP5]], [[M]] +; CHECK-NEXT: [[ADD26:%.*]] = add nsw i32 [[MUL25]], [[MUL15]] +; CHECK-NEXT: [[MUL28:%.*]] = mul nsw i32 [[ADD26]], [[TMP1]] +; CHECK-NEXT: [[ARRAYIDX29:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 +; CHECK-NEXT: store i32 [[MUL28]], i32* [[ARRAYIDX29]], align 4 +; CHECK-NEXT: ret void +; +entry: + %0 = load i32, i32* %A, align 4 + %mul = mul nsw i32 %0, %n + %1 = load i32, i32* %C, align 4 + %mul2 = mul nsw i32 %1, %m + %add = add nsw i32 %mul2, %mul + %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 1 + %2 = load i32, i32* %arrayidx3, align 4 + %mul4 = mul nsw i32 %add, %2 + store i32 %mul4, i32* %B, align 4 + %arrayidx8 = getelementptr inbounds i32, i32* %C, i64 1 + %3 = load i32, i32* %arrayidx8, align 4 + %mul9 = mul nsw i32 %3, %m + %add10 = add nsw i32 %mul9, %mul + %arrayidx11 = getelementptr inbounds i32, i32* %C, i64 2 + %4 = load i32, i32* %arrayidx11, align 4 + %mul12 = mul nsw i32 %add10, %4 + %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 1 + store i32 %mul12, i32* %arrayidx13, align 4 + %mul15 = mul nsw i32 %2, %n + %mul17 = mul nsw i32 %4, %m + %add18 = add nsw i32 %mul17, %mul15 + %mul20 = mul nsw i32 %add18, %0 + %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 2 + store i32 %mul20, i32* %arrayidx21, align 4 + %arrayidx24 = getelementptr inbounds i32, i32* %C, i64 3 + %5 = load i32, i32* %arrayidx24, align 4 + %mul25 = mul nsw i32 %5, %m + %add26 = add nsw i32 %mul25, %mul15 + %mul28 = mul nsw i32 %add26, %1 + %arrayidx29 = getelementptr inbounds i32, i32* %B, i64 3 + store i32 %mul28, i32* %arrayidx29, align 4 + ret void +} From a697a0a4b669a2b99cc973fe5f5164df309d285c Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Mon, 27 Dec 2021 11:31:59 -0500 Subject: [PATCH 086/992] [OpenMP][Plugin] Introduce generic resource pool Currently CUDA streams are managed by `StreamManagerTy`. It works very well. Now we have the need that some resources, such as CUDA stream and event, will be hold by `libomptarget`. It is always good to buffer those resources. What's more important, given the way that `libomptarget` and plugins are connected, we cannot make sure whether plugins are still alive when `libomptarget` is destroyed. That leads to an issue that those resouces hold by `libomptarget` might not be released correctly. As a result, we need an unified management of all the resources that can be shared between `libomptarget` and plugins. `ResourcePoolTy` is designed to manage the type of resource for one device. It has to work with an allocator which is supposed to provide `create` and `destroy`. In this way, when the plugin is destroyed, we can make sure that all resources allocated from native runtime library will be released correctly, no matter whether `libomptarget` starts its destroy. Reviewed By: ye-luo Differential Revision: https://reviews.llvm.org/D111954 --- openmp/libomptarget/plugins/cuda/src/rtl.cpp | 248 ++++++++++--------- 1 file changed, 125 insertions(+), 123 deletions(-) diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp index 90b6281e3e13..ed26f2f7731f 100644 --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -187,137 +187,125 @@ struct DeviceDataTy { int NumThreads = 0; }; -class StreamManagerTy { - int NumberOfDevices; - // The initial size of stream pool - int EnvNumInitialStreams; - // Per-device stream mutex - std::vector> StreamMtx; - // Per-device stream Id indicates the next available stream in the pool - std::vector NextStreamId; - // Per-device stream pool - std::vector> StreamPool; - // Reference to per-device data - std::vector &DeviceData; - - // If there is no CUstream left in the pool, we will resize the pool to - // allocate more CUstream. This function should be called with device mutex, - // and we do not resize to smaller one. - void resizeStreamPool(const int DeviceId, const size_t NewSize) { - std::vector &Pool = StreamPool[DeviceId]; - const size_t CurrentSize = Pool.size(); - assert(NewSize > CurrentSize && "new size is not larger than current size"); - - CUresult Err = cuCtxSetCurrent(DeviceData[DeviceId].Context); - if (!checkResult(Err, "Error returned from cuCtxSetCurrent\n")) { - // We will return if cannot switch to the right context in case of - // creating bunch of streams that are not corresponding to the right - // device. The offloading will fail later because selected CUstream is - // nullptr. - return; - } - - Pool.resize(NewSize, nullptr); +/// Resource allocator where \p T is the resource type. +/// Functions \p create and \p destroy return OFFLOAD_SUCCESS and OFFLOAD_FAIL +/// accordingly. The implementation should not raise any exception. +template class AllocatorTy { +public: + /// Create a resource and assign to R. + int create(T &R) noexcept; + /// Destroy the resource. + int destroy(T) noexcept; +}; - for (size_t I = CurrentSize; I < NewSize; ++I) { - checkResult(cuStreamCreate(&Pool[I], CU_STREAM_NON_BLOCKING), - "Error returned from cuStreamCreate\n"); - } - } +/// Allocator for CUstream. +template <> class AllocatorTy { + CUcontext Context; public: - StreamManagerTy(const int NumberOfDevices, - std::vector &DeviceData) - : NumberOfDevices(NumberOfDevices), EnvNumInitialStreams(32), - DeviceData(DeviceData) { - StreamPool.resize(NumberOfDevices); - NextStreamId.resize(NumberOfDevices); - StreamMtx.resize(NumberOfDevices); + AllocatorTy(CUcontext C) noexcept : Context(C) {} - if (const char *EnvStr = getenv("LIBOMPTARGET_NUM_INITIAL_STREAMS")) - EnvNumInitialStreams = std::stoi(EnvStr); + /// See AllocatorTy::create. + int create(CUstream &Stream) noexcept { + if (!checkResult(cuCtxSetCurrent(Context), + "Error returned from cuCtxSetCurrent\n")) + return OFFLOAD_FAIL; - // Initialize the next stream id - std::fill(NextStreamId.begin(), NextStreamId.end(), 0); + if (!checkResult(cuStreamCreate(&Stream, CU_STREAM_NON_BLOCKING), + "Error returned from cuStreamCreate\n")) + return OFFLOAD_FAIL; - // Initialize stream mutex - for (std::unique_ptr &Ptr : StreamMtx) - Ptr = std::make_unique(); + return OFFLOAD_SUCCESS; } - ~StreamManagerTy() { - // Destroy streams - for (int I = 0; I < NumberOfDevices; ++I) { - checkResult(cuCtxSetCurrent(DeviceData[I].Context), - "Error returned from cuCtxSetCurrent\n"); + /// See AllocatorTy::destroy. + int destroy(CUstream Stream) noexcept { + if (!checkResult(cuCtxSetCurrent(Context), + "Error returned from cuCtxSetCurrent\n")) + return OFFLOAD_FAIL; + if (!checkResult(cuStreamDestroy(Stream), + "Error returned from cuStreamDestroy\n")) + return OFFLOAD_FAIL; - for (CUstream &S : StreamPool[I]) { - if (S) - checkResult(cuStreamDestroy(S), - "Error returned from cuStreamDestroy\n"); - } - } + return OFFLOAD_SUCCESS; } +}; - // Get a CUstream from pool. Per-device next stream id always points to the - // next available CUstream. That means, CUstreams [0, id-1] have been - // assigned, and [id,] are still available. If there is no CUstream left, we - // will ask more CUstreams from CUDA RT. Each time a CUstream is assigned, - // the id will increase one. - // xxxxxs+++++++++ - // ^ - // id - // After assignment, the pool becomes the following and s is assigned. - // xxxxxs+++++++++ - // ^ - // id - CUstream getStream(const int DeviceId) { - const std::lock_guard Lock(*StreamMtx[DeviceId]); - int &Id = NextStreamId[DeviceId]; - // No CUstream left in the pool, we need to request from CUDA RT - if (Id == static_cast(StreamPool[DeviceId].size())) { - // By default we double the stream pool every time - resizeStreamPool(DeviceId, Id * 2); +/// A generic pool of resources where \p T is the resource type. +/// \p T should be copyable as the object is stored in \p std::vector . +template class ResourcePoolTy { + /// Index of the next available resource. + size_t Next = 0; + /// Mutex to guard the pool. + std::mutex Mutex; + /// Pool of resources. + std::vector Resources; + /// A reference to the corresponding allocator. + AllocatorTy Allocator; + + /// If `Resources` is used up, we will fill in more resources. It assumes that + /// the new size `Size` should be always larger than the current size. + bool resize(size_t Size) { + auto CurSize = Resources.size(); + assert(Size > CurSize && "Unexpected smaller size"); + Resources.reserve(Size); + for (auto I = CurSize; I < Size; ++I) { + T NewItem; + int Ret = Allocator.create(NewItem); + if (Ret != OFFLOAD_SUCCESS) + return false; + Resources.push_back(NewItem); } - return StreamPool[DeviceId][Id++]; + return true; } - // Return a CUstream back to pool. As mentioned above, per-device next - // stream is always points to the next available CUstream, so when we return - // a CUstream, we need to first decrease the id, and then copy the CUstream - // back. - // It is worth noting that, the order of streams return might be different - // from that they're assigned, that saying, at some point, there might be - // two identical CUstreams. - // xxax+a+++++ - // ^ - // id - // However, it doesn't matter, because they're always on the two sides of - // id. The left one will in the end be overwritten by another CUstream. - // Therefore, after several execution, the order of pool might be different - // from its initial state. - void returnStream(const int DeviceId, CUstream Stream) { - const std::lock_guard Lock(*StreamMtx[DeviceId]); - int &Id = NextStreamId[DeviceId]; - assert(Id > 0 && "Wrong stream ID"); - StreamPool[DeviceId][--Id] = Stream; +public: + ResourcePoolTy(AllocatorTy &&A, size_t Size = 0) noexcept + : Allocator(std::move(A)) { + (void)resize(Size); } - bool initializeDeviceStreamPool(const int DeviceId) { - assert(StreamPool[DeviceId].empty() && "stream pool has been initialized"); + ~ResourcePoolTy() noexcept { + for (auto &R : Resources) + (void)Allocator.destroy(R); + } - resizeStreamPool(DeviceId, EnvNumInitialStreams); + /// Get a resource from pool. `Next` always points to the next available + /// resource. That means, `[0, next-1]` have been assigned, and `[id,]` are + /// still available. If there is no resource left, we will ask for more. Each + /// time a resource is assigned, the id will increase one. + /// xxxxxs+++++++++ + /// ^ + /// Next + /// After assignment, the pool becomes the following and s is assigned. + /// xxxxxs+++++++++ + /// ^ + /// Next + int acquire(T &R) noexcept { + std::lock_guard LG(Mutex); + if (Next == Resources.size() && !resize(Resources.size() * 2)) + return OFFLOAD_FAIL; - // Check the size of stream pool - if (static_cast(StreamPool[DeviceId].size()) != EnvNumInitialStreams) - return false; + R = Resources[Next++]; - // Check whether each stream is valid - for (CUstream &S : StreamPool[DeviceId]) - if (!S) - return false; + return OFFLOAD_SUCCESS; + } - return true; + /// Return the resource back to the pool. When we return a resource, we need + /// to first decrease `Next`, and then copy the resource back. It is worth + /// noting that, the order of resources return might be different from that + /// they're assigned, that saying, at some point, there might be two identical + /// resources. + /// xxax+a+++++ + /// ^ + /// Next + /// However, it doesn't matter, because they're always on the two sides of + /// `Next`. The left one will in the end be overwritten by another resource. + /// Therefore, after several execution, the order of pool might be different + /// from its initial state. + void release(T R) noexcept { + std::lock_guard LG(Mutex); + Resources[--Next] = R; } }; @@ -331,13 +319,18 @@ class DeviceRTLTy { int64_t RequiresFlags; // Amount of dynamic shared memory to use at launch. uint64_t DynamicMemorySize; + // Number of initial streams for each device. + int NumInitialStreams = 32; static constexpr const int HardTeamLimit = 1U << 16U; // 64k static constexpr const int HardThreadLimit = 1024; static constexpr const int DefaultNumTeams = 128; static constexpr const int DefaultNumThreads = 128; - std::unique_ptr StreamManager; + using StreamPoolTy = ResourcePoolTy; + using StreamAllocatorTy = AllocatorTy; + std::vector> StreamPool; + std::vector DeviceData; std::vector Modules; @@ -471,8 +464,13 @@ class DeviceRTLTy { CUstream getStream(const int DeviceId, __tgt_async_info *AsyncInfo) const { assert(AsyncInfo && "AsyncInfo is nullptr"); - if (!AsyncInfo->Queue) - AsyncInfo->Queue = StreamManager->getStream(DeviceId); + if (!AsyncInfo->Queue) { + CUstream S; + if (StreamPool[DeviceId]->acquire(S) != OFFLOAD_SUCCESS) + return nullptr; + + AsyncInfo->Queue = S; + } return reinterpret_cast(AsyncInfo->Queue); } @@ -509,6 +507,7 @@ class DeviceRTLTy { } DeviceData.resize(NumberOfDevices); + StreamPool.resize(NumberOfDevices); // Get environment variables regarding teams if (const char *EnvStr = getenv("OMP_TEAM_LIMIT")) { @@ -532,9 +531,11 @@ class DeviceRTLTy { DP("Parsed LIBOMPTARGET_SHARED_MEMORY_SIZE = %" PRIu64 "\n", DynamicMemorySize); } - - StreamManager = - std::make_unique(NumberOfDevices, DeviceData); + if (const char *EnvStr = getenv("LIBOMPTARGET_NUM_INITIAL_STREAMS")) { + // LIBOMPTARGET_NUM_INITIAL_STREAMS has been set + NumInitialStreams = std::stoi(EnvStr); + DP("Parsed LIBOMPTARGET_NUM_INITIAL_STREAMS=%d\n", NumInitialStreams); + } for (int I = 0; I < NumberOfDevices; ++I) DeviceAllocators.emplace_back(I, DeviceData); @@ -556,13 +557,14 @@ class DeviceRTLTy { for (auto &M : MemoryManagers) M.release(); - StreamManager = nullptr; - for (CUmodule &M : Modules) // Close module if (M) checkResult(cuModuleUnload(M), "Error returned from cuModuleUnload\n"); + for (auto &S : StreamPool) + S = nullptr; + for (DeviceDataTy &D : DeviceData) { // Destroy context if (D.Context) { @@ -627,8 +629,9 @@ class DeviceRTLTy { return OFFLOAD_FAIL; // Initialize stream pool - if (!StreamManager->initializeDeviceStreamPool(DeviceId)) - return OFFLOAD_FAIL; + if (!StreamPool[DeviceId]) + StreamPool[DeviceId] = std::make_unique( + StreamAllocatorTy(DeviceData[DeviceId].Context), NumInitialStreams); // Query attributes to determine number of threads/block and blocks/grid. int MaxGridDimX; @@ -1195,8 +1198,7 @@ class DeviceRTLTy { // Once the stream is synchronized, return it to stream pool and reset // AsyncInfo. This is to make sure the synchronization only works for its // own tasks. - StreamManager->returnStream(DeviceId, - reinterpret_cast(AsyncInfo->Queue)); + StreamPool[DeviceId]->release(reinterpret_cast(AsyncInfo->Queue)); AsyncInfo->Queue = nullptr; if (Err != CUDA_SUCCESS) { From c49dcb4830df1ef113bfa1c42132f47b0979789c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 21 Dec 2021 13:50:25 -0500 Subject: [PATCH 087/992] [InstCombine] add tests for shuffle + binop; NFC --- .../Transforms/InstCombine/shuffle_select.ll | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/shuffle_select.ll b/llvm/test/Transforms/InstCombine/shuffle_select.ll index c3edde8d70c8..986dbeef1aab 100644 --- a/llvm/test/Transforms/InstCombine/shuffle_select.ll +++ b/llvm/test/Transforms/InstCombine/shuffle_select.ll @@ -1060,15 +1060,30 @@ define <3 x i42> @and_2_vars(<3 x i42> %v0, <3 x i42> %v1) { define <4 x i32> @or_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @or_2_vars( ; CHECK-NEXT: [[T1:%.*]] = or <4 x i32> [[V0:%.*]], +; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], -; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %t1 = or <4 x i32> %v0, + call void @use_v4i32(<4 x i32> %t1) %t2 = or <4 x i32> %v1, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> + ret <4 x i32> %t3 +} + +define <4 x i32> @or_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { +; CHECK-LABEL: @or_2_vars_undef_mask_elt( +; CHECK-NEXT: [[T1:%.*]] = or <4 x i32> [[V0:%.*]], +; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; + %t1 = or <4 x i32> %v0, call void @use_v4i32(<4 x i32> %t1) + %t2 = or <4 x i32> %v1, + %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } @@ -1077,17 +1092,17 @@ define <4 x i32> @or_2_vars(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @xor_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @xor_2_vars( ; CHECK-NEXT: [[T1:%.*]] = xor <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = xor <4 x i32> [[V1:%.*]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) +; CHECK-NEXT: [[T2:%.*]] = xor <4 x i32> [[V1:%.*]], ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T2]]) +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = xor <4 x i32> %v0, - %t2 = xor <4 x i32> %v1, - %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> call void @use_v4i32(<4 x i32> %t1) + %t2 = xor <4 x i32> %v1, call void @use_v4i32(<4 x i32> %t2) + %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } From 773ab3c6655f4d2beec25bb3516b4d4fe2eea990 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 27 Dec 2021 13:38:11 -0500 Subject: [PATCH 088/992] [Analysis] remove unneeded casts; NFC The callee does the casting too; this matches a plain call later in the same function for 'shl'. --- llvm/lib/Analysis/ValueTracking.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index fc378f97de0b..7a1caed0420a 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -6762,11 +6762,13 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, switch (BO.getOpcode()) { case Instruction::Add: if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) { + bool HasNSW = IIQ.hasNoSignedWrap(&BO); + bool HasNUW = IIQ.hasNoUnsignedWrap(&BO); // FIXME: If we have both nuw and nsw, we should reduce the range further. - if (IIQ.hasNoUnsignedWrap(cast(&BO))) { + if (HasNUW) { // 'add nuw x, C' produces [C, UINT_MAX]. Lower = *C; - } else if (IIQ.hasNoSignedWrap(cast(&BO))) { + } else if (HasNSW) { if (C->isNegative()) { // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C]. Lower = APInt::getSignedMinValue(Width); From f261e258ecc0fc5b8e8a70dbe45752d1bb3c2d69 Mon Sep 17 00:00:00 2001 From: Gulfem Savrun Yeniceri Date: Wed, 15 Dec 2021 21:51:33 +0000 Subject: [PATCH 089/992] [profile] Fix writing binary id into profiles This patch adds support to read all the PT_NOTE segments in the executable to find the binary ids. Previously, it was only reading the first PT_NOTE segment, and this was missing the cases where binary id is in the following segments. As a result, binary-id.c and binary-id-padding.c test were failing in the following cases: 1) sanitizer-x86_64-linux bot https://lab.llvm.org/staging/#/builders/97 2) OpenSuse Tumbleweed https://github.com/llvm/llvm-project/issues/52695 Differential Revision: https://reviews.llvm.org/D115830 --- .../lib/profile/InstrProfilingPlatformLinux.c | 51 ++++++++++++++----- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c index e61f90b2cef9..ac2ab4a6a8f5 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c @@ -125,11 +125,9 @@ static int WriteOneBinaryId(ProfDataWriter *Writer, uint64_t BinaryIdLen, static int WriteBinaryIdForNote(ProfDataWriter *Writer, const ElfW(Nhdr) * Note) { int BinaryIdSize = 0; - const char *NoteName = (const char *)Note + sizeof(ElfW(Nhdr)); if (Note->n_type == NT_GNU_BUILD_ID && Note->n_namesz == 4 && memcmp(NoteName, "GNU\0", 4) == 0) { - uint64_t BinaryIdLen = Note->n_descsz; const uint8_t *BinaryIdData = (const uint8_t *)(NoteName + RoundUp(Note->n_namesz, 4)); @@ -151,12 +149,12 @@ static int WriteBinaryIdForNote(ProfDataWriter *Writer, */ static int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note, const ElfW(Nhdr) * NotesEnd) { - int TotalBinaryIdsSize = 0; + int BinaryIdsSize = 0; while (Note < NotesEnd) { - int Result = WriteBinaryIdForNote(Writer, Note); - if (Result == -1) + int OneBinaryIdSize = WriteBinaryIdForNote(Writer, Note); + if (OneBinaryIdSize == -1) return -1; - TotalBinaryIdsSize += Result; + BinaryIdsSize += OneBinaryIdSize; /* Calculate the offset of the next note in notes section. */ size_t NoteOffset = sizeof(ElfW(Nhdr)) + RoundUp(Note->n_namesz, 4) + @@ -164,7 +162,7 @@ static int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note, Note = (const ElfW(Nhdr) *)((const char *)(Note) + NoteOffset); } - return TotalBinaryIdsSize; + return BinaryIdsSize; } /* @@ -178,21 +176,46 @@ COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) { const ElfW(Phdr) *ProgramHeader = (const ElfW(Phdr) *)((uintptr_t)ElfHeader + ElfHeader->e_phoff); + int TotalBinaryIdsSize = 0; uint32_t I; /* Iterate through entries in the program header. */ for (I = 0; I < ElfHeader->e_phnum; I++) { - /* Look for the notes section in program header entries. */ + /* Look for the notes segment in program header entries. */ if (ProgramHeader[I].p_type != PT_NOTE) continue; - const ElfW(Nhdr) *Note = - (const ElfW(Nhdr) *)((uintptr_t)ElfHeader + ProgramHeader[I].p_offset); - const ElfW(Nhdr) *NotesEnd = - (const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_filesz); - return WriteBinaryIds(Writer, Note, NotesEnd); + /* There can be multiple notes segment, and examine each of them. */ + const ElfW(Nhdr) * Note; + const ElfW(Nhdr) * NotesEnd; + /* + * When examining notes in file, use p_offset, which is the offset within + * the elf file, to find the start of notes. + */ + if (ProgramHeader[I].p_memsz == 0 || + ProgramHeader[I].p_memsz == ProgramHeader[I].p_filesz) { + Note = (const ElfW(Nhdr) *)((uintptr_t)ElfHeader + + ProgramHeader[I].p_offset); + NotesEnd = (const ElfW(Nhdr) *)((const char *)(Note) + + ProgramHeader[I].p_filesz); + } else { + /* + * When examining notes in memory, use p_vaddr, which is the address of + * section after loaded to memory, to find the start of notes. + */ + Note = + (const ElfW(Nhdr) *)((uintptr_t)ElfHeader + ProgramHeader[I].p_vaddr); + NotesEnd = + (const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz); + } + + int BinaryIdsSize = WriteBinaryIds(Writer, Note, NotesEnd); + if (TotalBinaryIdsSize == -1) + return -1; + + TotalBinaryIdsSize += BinaryIdsSize; } - return 0; + return TotalBinaryIdsSize; } #else /* !NT_GNU_BUILD_ID */ /* From e590c9bc7331f7351528369e512b3659f7f16e70 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 27 Dec 2021 12:10:23 -0800 Subject: [PATCH 090/992] [ELF] -r: move zero OutputSection::addr code into finalizeAddressDependentContent Ensure addresses are unchanged after finalizeAddressDependentContent. --- lld/ELF/Writer.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 986cca27fdf8..4b33563e8a79 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -557,9 +557,6 @@ template void Writer::run() { for (Partition &part : partitions) setPhdrs(part); - if (config->relocatable) - for (OutputSection *sec : outputSections) - sec->addr = 0; // Handle --print-map(-M)/--Map, --why-extract=, --cref and // --print-archive-stats=. Dump them before checkSections() because the files @@ -1699,6 +1696,10 @@ template void Writer::finalizeAddressDependentContent() { } } + if (config->relocatable) + for (OutputSection *sec : outputSections) + sec->addr = 0; + // If addrExpr is set, the address may not be a multiple of the alignment. // Warn because this is error-prone. for (SectionCommand *cmd : script->sectionCommands) From 80c14dcc0e2987338fbd27148a3214c3bba4e847 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 27 Dec 2021 12:56:38 -0800 Subject: [PATCH 091/992] [ELF] Delete stale declaration. NFC --- lld/ELF/SyntheticSections.h | 1 - 1 file changed, 1 deletion(-) diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index c785d5b48b33..547fa5f4056d 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -34,7 +34,6 @@ namespace elf { class Defined; struct PhdrEntry; class SymbolTableBaseSection; -class VersionNeedBaseSection; class SyntheticSection : public InputSection { public: From 38fc89623b3ea274ba45be20a19de217e45e1563 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Fri, 17 Dec 2021 14:28:36 -0500 Subject: [PATCH 092/992] [Attributor][Fix] Add alignment return attribute to HeapToStack This patch changes the HeapToStack optimization to attach the return alignment attribute information to the created alloca instruction. This would cause problems when replacing the heap allocation with an alloca did not respect the alignment of the original heap allocation, which would typically be aligned on an 8 or 16 byte boundary. Malloc calls now contain alignment attributes, so we can use that information here. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D115888 --- .../Transforms/IPO/AttributorAttributes.cpp | 2 ++ .../Transforms/Attributor/heap_to_stack.ll | 18 +++++++++--------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index b977821bcaa6..468ae1c45c88 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -5929,6 +5929,8 @@ struct AAHeapToStackFunction final : public AAHeapToStack { } Align Alignment(1); + if (MaybeAlign RetAlign = AI.CB->getRetAlign()) + Alignment = max(Alignment, RetAlign); if (AI.Kind == AllocationInfo::AllocationKind::ALIGNED_ALLOC) { Optional AlignmentAPI = getAPInt(A, *this, *AI.CB->getArgOperand(0)); diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll index 9979e0e1d9ca..b9965b0d0f87 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll @@ -34,7 +34,7 @@ define void @h2s_value_simplify_interaction(i1 %c, i8* %A) { ; IS________OPM-LABEL: define {{[^@]+}}@h2s_value_simplify_interaction ; IS________OPM-SAME: (i1 [[C:%.*]], i8* nocapture nofree readnone [[A:%.*]]) { ; IS________OPM-NEXT: entry: -; IS________OPM-NEXT: [[M:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) +; IS________OPM-NEXT: [[M:%.*]] = tail call noalias align 16 i8* @malloc(i64 noundef 4) ; IS________OPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS________OPM: t: ; IS________OPM-NEXT: br i1 false, label [[DEAD:%.*]], label [[F2:%.*]] @@ -43,41 +43,41 @@ define void @h2s_value_simplify_interaction(i1 %c, i8* %A) { ; IS________OPM: f2: ; IS________OPM-NEXT: [[C1:%.*]] = bitcast i8* [[M]] to i32* ; IS________OPM-NEXT: [[C2:%.*]] = bitcast i32* [[C1]] to i8* -; IS________OPM-NEXT: [[L:%.*]] = load i8, i8* [[C2]], align 1 +; IS________OPM-NEXT: [[L:%.*]] = load i8, i8* [[C2]], align 16 ; IS________OPM-NEXT: call void @usei8(i8 [[L]]) -; IS________OPM-NEXT: call void @no_sync_func(i8* nocapture nofree noundef [[C2]]) #[[ATTR5:[0-9]+]] +; IS________OPM-NEXT: call void @no_sync_func(i8* nocapture nofree noundef align 16 [[C2]]) #[[ATTR5:[0-9]+]] ; IS________OPM-NEXT: br label [[J]] ; IS________OPM: dead: ; IS________OPM-NEXT: unreachable ; IS________OPM: j: ; IS________OPM-NEXT: [[PHI:%.*]] = phi i8* [ [[M]], [[F]] ], [ null, [[F2]] ] -; IS________OPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree noundef [[PHI]]) #[[ATTR5]] +; IS________OPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree noundef align 16 [[PHI]]) #[[ATTR5]] ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@h2s_value_simplify_interaction ; IS________NPM-SAME: (i1 [[C:%.*]], i8* nocapture nofree readnone [[A:%.*]]) { ; IS________NPM-NEXT: entry: -; IS________NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 +; IS________NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 16 ; IS________NPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS________NPM: t: ; IS________NPM-NEXT: br i1 false, label [[DEAD:%.*]], label [[F2:%.*]] ; IS________NPM: f: ; IS________NPM-NEXT: br label [[J:%.*]] ; IS________NPM: f2: -; IS________NPM-NEXT: [[L:%.*]] = load i8, i8* [[TMP0]], align 1 +; IS________NPM-NEXT: [[L:%.*]] = load i8, i8* [[TMP0]], align 16 ; IS________NPM-NEXT: call void @usei8(i8 [[L]]) -; IS________NPM-NEXT: call void @no_sync_func(i8* nocapture nofree noundef [[TMP0]]) #[[ATTR6:[0-9]+]] +; IS________NPM-NEXT: call void @no_sync_func(i8* nocapture nofree noundef align 16 [[TMP0]]) #[[ATTR6:[0-9]+]] ; IS________NPM-NEXT: br label [[J]] ; IS________NPM: dead: ; IS________NPM-NEXT: unreachable ; IS________NPM: j: ; IS________NPM-NEXT: [[PHI:%.*]] = phi i8* [ [[TMP0]], [[F]] ], [ null, [[F2]] ] -; IS________NPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree noundef [[PHI]]) #[[ATTR6]] +; IS________NPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree noundef align 16 [[PHI]]) #[[ATTR6]] ; IS________NPM-NEXT: ret void ; entry: %add = add i64 2, 2 - %m = tail call noalias i8* @malloc(i64 %add) + %m = tail call align 16 noalias i8* @malloc(i64 %add) br i1 %c, label %t, label %f t: br i1 false, label %dead, label %f2 From 7cdaa5a94ed74a7f31bb16b722833b54bc21b0be Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Fri, 17 Dec 2021 17:00:13 -0500 Subject: [PATCH 093/992] [OpenMP][FIX] Change globalization alignment to 16 This patch changes the default aligntment from 8 to 16, and encodes this information in the `__kmpc_alloc_shared` runtime call to communicate it to the HeapToStack pass. The previous alignment of 8 was not sufficient for the maximum size of primitive types on 64-bit systems, and needs to be increaesd. This reduces the amount of space availible in the data sharing stack, so this implementation will need to be improved later to include the alignment requirements in the allocation call, and use it properly in the data sharing stack in the runtime. Depends on D115888 Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D115971 --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 11 +- .../declare_target_codegen_globalization.cpp | 8 +- clang/test/OpenMP/nvptx_data_sharing.cpp | 4 +- ...stribute_parallel_generic_mode_codegen.cpp | 6 +- clang/test/OpenMP/nvptx_parallel_codegen.cpp | 4 +- .../OpenMP/nvptx_parallel_for_codegen.cpp | 2 +- clang/test/OpenMP/nvptx_target_codegen.cpp | 6 +- ...arallel_reduction_codegen_tbaa_PR46146.cpp | 960 +++++++++--------- .../nvptx_target_teams_distribute_codegen.cpp | 6 +- ..._teams_distribute_parallel_for_codegen.cpp | 8 +- ...s_distribute_parallel_for_simd_codegen.cpp | 6 +- clang/test/OpenMP/nvptx_teams_codegen.cpp | 16 +- .../OpenMP/nvptx_teams_reduction_codegen.cpp | 32 +- openmp/libomptarget/DeviceRTL/src/State.cpp | 4 +- 14 files changed, 541 insertions(+), 532 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 866454ddeaed..594c7fef36a7 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1402,10 +1402,14 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, // Allocate space for the variable to be globalized llvm::Value *AllocArgs[] = {CGF.getTypeSize(VD->getType())}; - llvm::Instruction *VoidPtr = + llvm::CallBase *VoidPtr = CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_alloc_shared), AllocArgs, VD->getName()); + // FIXME: We should use the variables actual alignment as an argument. + VoidPtr->addRetAttr(llvm::Attribute::get( + CGM.getLLVMContext(), llvm::Attribute::Alignment, + CGM.getContext().getTargetInfo().getNewAlign() / 8)); // Cast the void pointer and get the address of the globalized variable. llvm::PointerType *VarPtrTy = CGF.ConvertTypeForMem(VarTy)->getPointerTo(); @@ -1438,10 +1442,13 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, // Allocate space for this VLA object to be globalized. llvm::Value *AllocArgs[] = {CGF.getTypeSize(VD->getType())}; - llvm::Instruction *VoidPtr = + llvm::CallBase *VoidPtr = CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_alloc_shared), AllocArgs, VD->getName()); + VoidPtr->addRetAttr( + llvm::Attribute::get(CGM.getLLVMContext(), llvm::Attribute::Alignment, + CGM.getContext().getTargetInfo().getNewAlign())); I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back( std::pair( diff --git a/clang/test/OpenMP/declare_target_codegen_globalization.cpp b/clang/test/OpenMP/declare_target_codegen_globalization.cpp index 022a88c60a48..aaf03634f3d7 100644 --- a/clang/test/OpenMP/declare_target_codegen_globalization.cpp +++ b/clang/test/OpenMP/declare_target_codegen_globalization.cpp @@ -58,8 +58,8 @@ int maini1() { // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooRi(i32* nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR4:[0-9]+]] -// CHECK1-NEXT: [[CALL1:%.*]] = call i32 @_Z3barv() #[[ATTR4]] +// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooRi(i32* nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: [[CALL1:%.*]] = call i32 @_Z3barv() #[[ATTR6]] // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 // CHECK1-NEXT: ret void @@ -78,9 +78,9 @@ int maini1() { // CHECK1-LABEL: define {{[^@]+}}@_Z3barv // CHECK1-SAME: () #[[ATTR2]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[A:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[A:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A]] to i32* -// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooRi(i32* nonnull align 4 dereferenceable(4) [[A_ON_STACK]]) #[[ATTR4]] +// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooRi(i32* nonnull align 4 dereferenceable(4) [[A_ON_STACK]]) #[[ATTR6]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[A]], i64 4) // CHECK1-NEXT: ret i32 [[CALL]] // diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp index f6317077d94d..290051ef7d42 100644 --- a/clang/test/OpenMP/nvptx_data_sharing.cpp +++ b/clang/test/OpenMP/nvptx_data_sharing.cpp @@ -397,9 +397,9 @@ void test_ds(){ // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: -// CHECK-NEXT: [[A:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK-NEXT: [[A:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) // CHECK-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A]] to i32* -// CHECK-NEXT: [[B:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK-NEXT: [[B:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) // CHECK-NEXT: [[B_ON_STACK:%.*]] = bitcast i8* [[B]] to i32* // CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: store i32 10, i32* [[A_ON_STACK]], align 4 diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp index c8e95951a61f..6791a32c79e5 100644 --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -3046,7 +3046,7 @@ int main(int argc, char **argv) { // CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* // CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8 -// CHECK4-NEXT: [[C1:%.*]] = call i8* @__kmpc_alloc_shared(i64 40) +// CHECK4-NEXT: [[C1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 40) // CHECK4-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C1]] to [10 x i32]* // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -3377,7 +3377,7 @@ int main(int argc, char **argv) { // CHECK5-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK5-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK5-NEXT: [[C1:%.*]] = call i8* @__kmpc_alloc_shared(i32 40) +// CHECK5-NEXT: [[C1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 40) // CHECK5-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C1]] to [10 x i32]* // CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -3700,7 +3700,7 @@ int main(int argc, char **argv) { // CHECK6-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 // CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK6-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK6-NEXT: [[C1:%.*]] = call i8* @__kmpc_alloc_shared(i32 40) +// CHECK6-NEXT: [[C1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 40) // CHECK6-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C1]] to [10 x i32]* // CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp index 5c33af387296..ef7fe4d4d2a0 100644 --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -1633,7 +1633,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[A1:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[A1:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A1]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[A_ON_STACK]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -1905,7 +1905,7 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK2-NEXT: [[A1:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[A1:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A1]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[A_ON_STACK]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) diff --git a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp index b0738928a013..790f85320dfe 100644 --- a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp @@ -469,7 +469,7 @@ int bar(int n){ // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: -// CHECK-NEXT: [[D:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK-NEXT: [[D:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) // CHECK-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D]] to i32* // CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp index c3a90d523110..e38b26a2a690 100644 --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -418,7 +418,7 @@ void unreachable_call() { // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK1-NEXT: [[F:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[F:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[F_ON_STACK:%.*]] = bitcast i8* [[F]] to i32* // CHECK1-NEXT: store i32 [[F1]], i32* [[F_ON_STACK]], align 4 // CHECK1-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 @@ -802,7 +802,7 @@ void unreachable_call() { // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK2-NEXT: [[F:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[F:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: [[F_ON_STACK:%.*]] = bitcast i8* [[F]] to i32* // CHECK2-NEXT: store i32 [[F1]], i32* [[F_ON_STACK]], align 4 // CHECK2-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 @@ -1185,7 +1185,7 @@ void unreachable_call() { // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK3-NEXT: [[F:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK3-NEXT: [[F:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK3-NEXT: [[F_ON_STACK:%.*]] = bitcast i8* [[F]] to i32* // CHECK3-NEXT: store i32 [[F1]], i32* [[F_ON_STACK]], align 4 // CHECK3-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp index 90229cae452f..58594d315673 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -66,11 +66,11 @@ void test() { // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12:![0-9]+]] // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[ISTART:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[ISTART:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* -// CHECK1-NEXT: [[IEND:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[IEND:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* -// CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 8) // CHECK1-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex"* // CHECK1-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR5]] @@ -123,7 +123,7 @@ void test() { // CHECK1-NEXT: [[TMP15:%.*]] = bitcast float* [[REF_TMP2]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR5]] // CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR11:[0-9]+]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR10:[0-9]+]] // CHECK1-NEXT: [[TMP16:%.*]] = bitcast float* [[REF_TMP2]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP17:%.*]] = bitcast float* [[REF_TMP]] to i8* @@ -188,7 +188,7 @@ void test() { // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR11]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR10]] // CHECK1-NEXT: ret void // // @@ -278,7 +278,7 @@ void test() { // CHECK1-NEXT: [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR5]] // CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR11]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR10]] // CHECK1-NEXT: [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8* @@ -340,8 +340,8 @@ void test() { // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float // CHECK1-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR11]] -// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR11]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR10]] +// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR10]] // CHECK1-NEXT: [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8* @@ -382,7 +382,7 @@ void test() { // CHECK1-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK1-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR11]] +// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR10]] // CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: @@ -420,13 +420,13 @@ void test() { // CHECK1-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR11]] +// CHECK1-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR10]] // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] // CHECK1-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] // CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR11]] +// CHECK1-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR10]] // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] // CHECK1-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] @@ -461,49 +461,49 @@ void test() { // CHECK1-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex"* [[TMP12]] to i64* // CHECK1-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 4 -// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK1-NEXT: [[TMP18:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK1-NEXT: [[TMP19:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP18]]) -// CHECK1-NEXT: store i64 [[TMP19]], i64* [[TMP16]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP23:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP26:%.*]] = and i1 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: [[TMP27:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK1-NEXT: [[TMP28:%.*]] = and i16 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP28]], 0 -// CHECK1-NEXT: [[TMP30:%.*]] = and i1 [[TMP27]], [[TMP29]] -// CHECK1-NEXT: [[TMP31:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: [[TMP33:%.*]] = or i1 [[TMP23]], [[TMP26]] -// CHECK1-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[TMP32]] -// CHECK1-NEXT: br i1 [[TMP34]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK1-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 +// CHECK1-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) +// CHECK1-NEXT: store i64 [[TMP20]], i64* [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK1-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK1-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0 +// CHECK1-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]] +// CHECK1-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]] +// CHECK1-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] +// CHECK1-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* -// CHECK1-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* +// CHECK1-NEXT: [[TMP37:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP36]], i8* [[TMP37]]) #[[ATTR5]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: -// CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK1: then4: -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP41:%.*]] = load i8*, i8** [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = bitcast i8* [[TMP41]] to %"class.std::complex"* -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP43]] to %"class.std::complex"* -// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %"class.std::complex"* [[TMP45]] to i8* -// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[TMP44]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 8, i1 false), !tbaa.struct !21 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP44:%.*]] = load i8*, i8** [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %"class.std::complex"* +// CHECK1-NEXT: [[TMP46:%.*]] = bitcast i8* [[TMP44]] to %"class.std::complex"* +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[TMP46]] to i8* +// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %"class.std::complex"* [[TMP45]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 8, i1 false), !tbaa.struct !21 // CHECK1-NEXT: br label [[IFCONT6:%.*]] // CHECK1: else5: // CHECK1-NEXT: br label [[IFCONT6]] @@ -520,53 +520,53 @@ void test() { // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK1-NEXT: [[NVPTX_TID2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK1-NEXT: [[NVPTX_TID3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* // CHECK1-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[PRECOND:%.*]] // CHECK1: precond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 2 -// CHECK1-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 +// CHECK1-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK1: body: // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) // CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK1-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] -// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] +// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK1: then2: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] -// CHECK1-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: br label [[IFCONT6:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] +// CHECK1-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: br label [[IFCONT4:%.*]] // CHECK1: else3: -// CHECK1-NEXT: br label [[IFCONT6]] +// CHECK1-NEXT: br label [[IFCONT4]] // CHECK1: ifcont4: -// CHECK1-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[PRECOND]] // CHECK1: exit: // CHECK1-NEXT: ret void @@ -633,11 +633,11 @@ void test() { // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[ISTART:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[ISTART:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* -// CHECK1-NEXT: [[IEND:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[IEND:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* -// CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 16) +// CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 16) // CHECK1-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex.0"* // CHECK1-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR5]] @@ -690,7 +690,7 @@ void test() { // CHECK1-NEXT: [[TMP15:%.*]] = bitcast double* [[REF_TMP2]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR5]] // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR11]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR10]] // CHECK1-NEXT: [[TMP16:%.*]] = bitcast double* [[REF_TMP2]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP17:%.*]] = bitcast double* [[REF_TMP]] to i8* @@ -755,7 +755,7 @@ void test() { // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR11]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR10]] // CHECK1-NEXT: ret void // // @@ -845,7 +845,7 @@ void test() { // CHECK1-NEXT: [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR5]] // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR11]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR10]] // CHECK1-NEXT: [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8* @@ -907,8 +907,8 @@ void test() { // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double // CHECK1-NEXT: store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR11]] -// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR11]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR10]] +// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR10]] // CHECK1-NEXT: [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8* @@ -949,7 +949,7 @@ void test() { // CHECK1-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK1-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR11]] +// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR10]] // CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: @@ -987,13 +987,13 @@ void test() { // CHECK1-NEXT: store %"class.std::complex.0"* [[__C]], %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR11]] +// CHECK1-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR10]] // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] // CHECK1-NEXT: store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] // CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR11]] +// CHECK1-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR10]] // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] // CHECK1-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] @@ -1029,8 +1029,8 @@ void test() { // CHECK1-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] // CHECK1: .shuffle.pre_cond: -// CHECK1-NEXT: [[TMP17:%.*]] = phi i64* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[TMP28:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] -// CHECK1-NEXT: [[TMP18:%.*]] = phi i64* [ [[TMP16]], [[ENTRY]] ], [ [[TMP29:%.*]], [[DOTSHUFFLE_THEN]] ] +// CHECK1-NEXT: [[TMP17:%.*]] = phi i64* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[TMP29:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = phi i64* [ [[TMP16]], [[ENTRY]] ], [ [[TMP30:%.*]], [[DOTSHUFFLE_THEN]] ] // CHECK1-NEXT: [[TMP19:%.*]] = bitcast i64* [[TMP17]] to i8* // CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint i8* [[TMP14]] to i64 // CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint i8* [[TMP19]] to i64 @@ -1040,51 +1040,51 @@ void test() { // CHECK1-NEXT: br i1 [[TMP24]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] // CHECK1: .shuffle.then: // CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[TMP17]], align 8 -// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK1-NEXT: [[TMP26:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK1-NEXT: [[TMP27:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP25]], i16 [[TMP7]], i16 [[TMP26]]) -// CHECK1-NEXT: store i64 [[TMP27]], i64* [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP28]] = getelementptr i64, i64* [[TMP17]], i64 1 -// CHECK1-NEXT: [[TMP29]] = getelementptr i64, i64* [[TMP18]], i64 1 +// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK1-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 +// CHECK1-NEXT: [[TMP28:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP25]], i16 [[TMP7]], i16 [[TMP27]]) +// CHECK1-NEXT: store i64 [[TMP28]], i64* [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP29]] = getelementptr i64, i64* [[TMP17]], i64 1 +// CHECK1-NEXT: [[TMP30]] = getelementptr i64, i64* [[TMP18]], i64 1 // CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND]] // CHECK1: .shuffle.exit: -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP32:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP33:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] -// CHECK1-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK1-NEXT: [[TMP36:%.*]] = and i16 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP36]], 0 -// CHECK1-NEXT: [[TMP38:%.*]] = and i1 [[TMP35]], [[TMP37]] -// CHECK1-NEXT: [[TMP39:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: [[TMP41:%.*]] = or i1 [[TMP31]], [[TMP34]] -// CHECK1-NEXT: [[TMP42:%.*]] = or i1 [[TMP41]], [[TMP40]] -// CHECK1-NEXT: br i1 [[TMP42]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP32:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK1-NEXT: [[TMP33:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP35:%.*]] = and i1 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK1-NEXT: [[TMP37:%.*]] = and i16 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP37]], 0 +// CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP36]], [[TMP38]] +// CHECK1-NEXT: [[TMP40:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP41:%.*]] = and i1 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = or i1 [[TMP32]], [[TMP35]] +// CHECK1-NEXT: [[TMP43:%.*]] = or i1 [[TMP42]], [[TMP41]] +// CHECK1-NEXT: br i1 [[TMP43]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: -// CHECK1-NEXT: [[TMP43:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* -// CHECK1-NEXT: [[TMP44:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP43]], i8* [[TMP44]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP44:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP44]], i8* [[TMP45]]) #[[ATTR5]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: -// CHECK1-NEXT: [[TMP45:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP46:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP47:%.*]] = and i1 [[TMP45]], [[TMP46]] -// CHECK1-NEXT: br i1 [[TMP47]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK1-NEXT: [[TMP46:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP47:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP48:%.*]] = and i1 [[TMP46]], [[TMP47]] +// CHECK1-NEXT: br i1 [[TMP48]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK1: then4: -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP49:%.*]] = load i8*, i8** [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP51:%.*]] = load i8*, i8** [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = bitcast i8* [[TMP49]] to %"class.std::complex.0"* -// CHECK1-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP51]] to %"class.std::complex.0"* -// CHECK1-NEXT: [[TMP54:%.*]] = bitcast %"class.std::complex.0"* [[TMP53]] to i8* -// CHECK1-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.0"* [[TMP52]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP54]], i8* align 8 [[TMP55]], i64 16, i1 false), !tbaa.struct !27 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP50:%.*]] = load i8*, i8** [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP50]] to %"class.std::complex.0"* +// CHECK1-NEXT: [[TMP54:%.*]] = bitcast i8* [[TMP52]] to %"class.std::complex.0"* +// CHECK1-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.0"* [[TMP54]] to i8* +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast %"class.std::complex.0"* [[TMP53]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP55]], i8* align 8 [[TMP56]], i64 16, i1 false), !tbaa.struct !27 // CHECK1-NEXT: br label [[IFCONT6:%.*]] // CHECK1: else5: // CHECK1-NEXT: br label [[IFCONT6]] @@ -1101,53 +1101,53 @@ void test() { // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK1-NEXT: [[NVPTX_TID2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK1-NEXT: [[NVPTX_TID3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* // CHECK1-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[PRECOND:%.*]] // CHECK1: precond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 4 -// CHECK1-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 4 +// CHECK1-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK1: body: // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK1-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] -// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] +// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK1: then2: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] -// CHECK1-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: br label [[IFCONT6:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] +// CHECK1-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: br label [[IFCONT4:%.*]] // CHECK1: else3: -// CHECK1-NEXT: br label [[IFCONT6]] +// CHECK1-NEXT: br label [[IFCONT4]] // CHECK1: ifcont4: -// CHECK1-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[PRECOND]] // CHECK1: exit: // CHECK1-NEXT: ret void @@ -1300,11 +1300,11 @@ void test() { // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[ISTART:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK2-NEXT: [[ISTART:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) // CHECK2-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* -// CHECK2-NEXT: [[IEND:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK2-NEXT: [[IEND:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) // CHECK2-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* -// CHECK2-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) +// CHECK2-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 8) // CHECK2-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex"* // CHECK2-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR5]] @@ -1357,7 +1357,7 @@ void test() { // CHECK2-NEXT: [[TMP15:%.*]] = bitcast float* [[REF_TMP2]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR5]] // CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA14]] -// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR11:[0-9]+]] +// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR10:[0-9]+]] // CHECK2-NEXT: [[TMP16:%.*]] = bitcast float* [[REF_TMP2]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP17:%.*]] = bitcast float* [[REF_TMP]] to i8* @@ -1422,7 +1422,7 @@ void test() { // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 -// CHECK2-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR11]] +// CHECK2-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR10]] // CHECK2-NEXT: ret void // // @@ -1512,7 +1512,7 @@ void test() { // CHECK2-NEXT: [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR5]] // CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA14]] -// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR11]] +// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR10]] // CHECK2-NEXT: [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8* @@ -1574,8 +1574,8 @@ void test() { // CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float // CHECK2-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA14]] -// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR11]] -// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR11]] +// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR10]] +// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR10]] // CHECK2-NEXT: [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8* @@ -1616,7 +1616,7 @@ void test() { // CHECK2-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK2-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR11]] +// CHECK2-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR10]] // CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: @@ -1654,13 +1654,13 @@ void test() { // CHECK2-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR11]] +// CHECK2-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR10]] // CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] // CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] // CHECK2-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] // CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR11]] +// CHECK2-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR10]] // CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 // CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] // CHECK2-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] @@ -1695,49 +1695,49 @@ void test() { // CHECK2-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex"* [[TMP12]] to i64* // CHECK2-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 4 -// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK2-NEXT: [[TMP18:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK2-NEXT: [[TMP19:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP18]]) -// CHECK2-NEXT: store i64 [[TMP19]], i64* [[TMP16]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 -// CHECK2-NEXT: [[TMP22:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK2-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[TMP23:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK2-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP25:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP26:%.*]] = and i1 [[TMP24]], [[TMP25]] -// CHECK2-NEXT: [[TMP27:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK2-NEXT: [[TMP28:%.*]] = and i16 [[TMP6]], 1 -// CHECK2-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP28]], 0 -// CHECK2-NEXT: [[TMP30:%.*]] = and i1 [[TMP27]], [[TMP29]] -// CHECK2-NEXT: [[TMP31:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK2-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] -// CHECK2-NEXT: [[TMP33:%.*]] = or i1 [[TMP23]], [[TMP26]] -// CHECK2-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[TMP32]] -// CHECK2-NEXT: br i1 [[TMP34]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK2-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK2-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 +// CHECK2-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) +// CHECK2-NEXT: store i64 [[TMP20]], i64* [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 +// CHECK2-NEXT: [[TMP23:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK2-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK2-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1 +// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0 +// CHECK2-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]] +// CHECK2-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]] +// CHECK2-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] +// CHECK2-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: -// CHECK2-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* -// CHECK2-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR5]] +// CHECK2-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* +// CHECK2-NEXT: [[TMP37:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP36]], i8* [[TMP37]]) #[[ATTR5]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: -// CHECK2-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK2-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK2-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK2: then4: -// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP41:%.*]] = load i8*, i8** [[TMP40]], align 8 -// CHECK2-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 -// CHECK2-NEXT: [[TMP44:%.*]] = bitcast i8* [[TMP41]] to %"class.std::complex"* -// CHECK2-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP43]] to %"class.std::complex"* -// CHECK2-NEXT: [[TMP46:%.*]] = bitcast %"class.std::complex"* [[TMP45]] to i8* -// CHECK2-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[TMP44]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 8, i1 false), !tbaa.struct !21 +// CHECK2-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 8 +// CHECK2-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP44:%.*]] = load i8*, i8** [[TMP43]], align 8 +// CHECK2-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %"class.std::complex"* +// CHECK2-NEXT: [[TMP46:%.*]] = bitcast i8* [[TMP44]] to %"class.std::complex"* +// CHECK2-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[TMP46]] to i8* +// CHECK2-NEXT: [[TMP48:%.*]] = bitcast %"class.std::complex"* [[TMP45]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 8, i1 false), !tbaa.struct !21 // CHECK2-NEXT: br label [[IFCONT6:%.*]] // CHECK2: else5: // CHECK2-NEXT: br label [[IFCONT6]] @@ -1754,53 +1754,53 @@ void test() { // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] -// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK2-NEXT: [[NVPTX_TID2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK2-NEXT: [[NVPTX_TID3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 +// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 +// CHECK2-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* // CHECK2-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[PRECOND:%.*]] // CHECK2: precond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] -// CHECK2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 2 -// CHECK2-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 +// CHECK2-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK2: body: // CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) // CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK2-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK2-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: // CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] -// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] -// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] +// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK2: then2: -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] -// CHECK2-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4, !tbaa [[TBAA8]] -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !tbaa [[TBAA8]] -// CHECK2-NEXT: br label [[IFCONT6:%.*]] +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] +// CHECK2-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: br label [[IFCONT4:%.*]] // CHECK2: else3: -// CHECK2-NEXT: br label [[IFCONT6]] +// CHECK2-NEXT: br label [[IFCONT4]] // CHECK2: ifcont4: -// CHECK2-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[PRECOND]] // CHECK2: exit: // CHECK2-NEXT: ret void @@ -1867,11 +1867,11 @@ void test() { // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[ISTART:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK2-NEXT: [[ISTART:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) // CHECK2-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* -// CHECK2-NEXT: [[IEND:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK2-NEXT: [[IEND:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) // CHECK2-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* -// CHECK2-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 16) +// CHECK2-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 16) // CHECK2-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex.0"* // CHECK2-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR5]] @@ -1924,7 +1924,7 @@ void test() { // CHECK2-NEXT: [[TMP15:%.*]] = bitcast double* [[REF_TMP2]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR5]] // CHECK2-NEXT: store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]] -// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR11]] +// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR10]] // CHECK2-NEXT: [[TMP16:%.*]] = bitcast double* [[REF_TMP2]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP17:%.*]] = bitcast double* [[REF_TMP]] to i8* @@ -1989,7 +1989,7 @@ void test() { // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8 -// CHECK2-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR11]] +// CHECK2-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR10]] // CHECK2-NEXT: ret void // // @@ -2079,7 +2079,7 @@ void test() { // CHECK2-NEXT: [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR5]] // CHECK2-NEXT: store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]] -// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR11]] +// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR10]] // CHECK2-NEXT: [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8* @@ -2141,8 +2141,8 @@ void test() { // CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double // CHECK2-NEXT: store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]] -// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR11]] -// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR11]] +// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR10]] +// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR10]] // CHECK2-NEXT: [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8* @@ -2183,7 +2183,7 @@ void test() { // CHECK2-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK2-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR11]] +// CHECK2-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR10]] // CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: @@ -2221,13 +2221,13 @@ void test() { // CHECK2-NEXT: store %"class.std::complex.0"* [[__C]], %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR11]] +// CHECK2-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR10]] // CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] // CHECK2-NEXT: store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] // CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR11]] +// CHECK2-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR10]] // CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 // CHECK2-NEXT: [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] // CHECK2-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] @@ -2263,8 +2263,8 @@ void test() { // CHECK2-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK2-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] // CHECK2: .shuffle.pre_cond: -// CHECK2-NEXT: [[TMP17:%.*]] = phi i64* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[TMP28:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] -// CHECK2-NEXT: [[TMP18:%.*]] = phi i64* [ [[TMP16]], [[ENTRY]] ], [ [[TMP29:%.*]], [[DOTSHUFFLE_THEN]] ] +// CHECK2-NEXT: [[TMP17:%.*]] = phi i64* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[TMP29:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] +// CHECK2-NEXT: [[TMP18:%.*]] = phi i64* [ [[TMP16]], [[ENTRY]] ], [ [[TMP30:%.*]], [[DOTSHUFFLE_THEN]] ] // CHECK2-NEXT: [[TMP19:%.*]] = bitcast i64* [[TMP17]] to i8* // CHECK2-NEXT: [[TMP20:%.*]] = ptrtoint i8* [[TMP14]] to i64 // CHECK2-NEXT: [[TMP21:%.*]] = ptrtoint i8* [[TMP19]] to i64 @@ -2274,51 +2274,51 @@ void test() { // CHECK2-NEXT: br i1 [[TMP24]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] // CHECK2: .shuffle.then: // CHECK2-NEXT: [[TMP25:%.*]] = load i64, i64* [[TMP17]], align 8 -// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK2-NEXT: [[TMP26:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK2-NEXT: [[TMP27:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP25]], i16 [[TMP7]], i16 [[TMP26]]) -// CHECK2-NEXT: store i64 [[TMP27]], i64* [[TMP18]], align 8 -// CHECK2-NEXT: [[TMP28]] = getelementptr i64, i64* [[TMP17]], i64 1 -// CHECK2-NEXT: [[TMP29]] = getelementptr i64, i64* [[TMP18]], i64 1 +// CHECK2-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK2-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 +// CHECK2-NEXT: [[TMP28:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP25]], i16 [[TMP7]], i16 [[TMP27]]) +// CHECK2-NEXT: store i64 [[TMP28]], i64* [[TMP18]], align 8 +// CHECK2-NEXT: [[TMP29]] = getelementptr i64, i64* [[TMP17]], i64 1 +// CHECK2-NEXT: [[TMP30]] = getelementptr i64, i64* [[TMP18]], i64 1 // CHECK2-NEXT: br label [[DOTSHUFFLE_PRE_COND]] // CHECK2: .shuffle.exit: -// CHECK2-NEXT: [[TMP30:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK2-NEXT: store i8* [[TMP30]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK2-NEXT: [[TMP32:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP33:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] -// CHECK2-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK2-NEXT: [[TMP36:%.*]] = and i16 [[TMP6]], 1 -// CHECK2-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP36]], 0 -// CHECK2-NEXT: [[TMP38:%.*]] = and i1 [[TMP35]], [[TMP37]] -// CHECK2-NEXT: [[TMP39:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK2-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK2-NEXT: [[TMP41:%.*]] = or i1 [[TMP31]], [[TMP34]] -// CHECK2-NEXT: [[TMP42:%.*]] = or i1 [[TMP41]], [[TMP40]] -// CHECK2-NEXT: br i1 [[TMP42]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK2-NEXT: [[TMP31:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK2-NEXT: store i8* [[TMP31]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP32:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK2-NEXT: [[TMP33:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP34:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP35:%.*]] = and i1 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK2-NEXT: [[TMP37:%.*]] = and i16 [[TMP6]], 1 +// CHECK2-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP37]], 0 +// CHECK2-NEXT: [[TMP39:%.*]] = and i1 [[TMP36]], [[TMP38]] +// CHECK2-NEXT: [[TMP40:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP41:%.*]] = and i1 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: [[TMP42:%.*]] = or i1 [[TMP32]], [[TMP35]] +// CHECK2-NEXT: [[TMP43:%.*]] = or i1 [[TMP42]], [[TMP41]] +// CHECK2-NEXT: br i1 [[TMP43]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: -// CHECK2-NEXT: [[TMP43:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* -// CHECK2-NEXT: [[TMP44:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP43]], i8* [[TMP44]]) #[[ATTR5]] +// CHECK2-NEXT: [[TMP44:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* +// CHECK2-NEXT: [[TMP45:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP44]], i8* [[TMP45]]) #[[ATTR5]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: -// CHECK2-NEXT: [[TMP45:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP46:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP47:%.*]] = and i1 [[TMP45]], [[TMP46]] -// CHECK2-NEXT: br i1 [[TMP47]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK2-NEXT: [[TMP46:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP47:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP48:%.*]] = and i1 [[TMP46]], [[TMP47]] +// CHECK2-NEXT: br i1 [[TMP48]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK2: then4: -// CHECK2-NEXT: [[TMP48:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP49:%.*]] = load i8*, i8** [[TMP48]], align 8 -// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP51:%.*]] = load i8*, i8** [[TMP50]], align 8 -// CHECK2-NEXT: [[TMP52:%.*]] = bitcast i8* [[TMP49]] to %"class.std::complex.0"* -// CHECK2-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP51]] to %"class.std::complex.0"* -// CHECK2-NEXT: [[TMP54:%.*]] = bitcast %"class.std::complex.0"* [[TMP53]] to i8* -// CHECK2-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.0"* [[TMP52]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP54]], i8* align 8 [[TMP55]], i64 16, i1 false), !tbaa.struct !27 +// CHECK2-NEXT: [[TMP49:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP50:%.*]] = load i8*, i8** [[TMP49]], align 8 +// CHECK2-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 8 +// CHECK2-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP50]] to %"class.std::complex.0"* +// CHECK2-NEXT: [[TMP54:%.*]] = bitcast i8* [[TMP52]] to %"class.std::complex.0"* +// CHECK2-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.0"* [[TMP54]] to i8* +// CHECK2-NEXT: [[TMP56:%.*]] = bitcast %"class.std::complex.0"* [[TMP53]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP55]], i8* align 8 [[TMP56]], i64 16, i1 false), !tbaa.struct !27 // CHECK2-NEXT: br label [[IFCONT6:%.*]] // CHECK2: else5: // CHECK2-NEXT: br label [[IFCONT6]] @@ -2335,53 +2335,53 @@ void test() { // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] -// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK2-NEXT: [[NVPTX_TID2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK2-NEXT: [[NVPTX_TID3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 +// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 +// CHECK2-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* // CHECK2-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[PRECOND:%.*]] // CHECK2: precond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] -// CHECK2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 4 -// CHECK2-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 4 +// CHECK2-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK2: body: // CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK2-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK2-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: // CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] -// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] -// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] +// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK2: then2: -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] -// CHECK2-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4, !tbaa [[TBAA8]] -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !tbaa [[TBAA8]] -// CHECK2-NEXT: br label [[IFCONT6:%.*]] +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] +// CHECK2-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: br label [[IFCONT4:%.*]] // CHECK2: else3: -// CHECK2-NEXT: br label [[IFCONT6]] +// CHECK2-NEXT: br label [[IFCONT4]] // CHECK2: ifcont4: -// CHECK2-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[PRECOND]] // CHECK2: exit: // CHECK2-NEXT: ret void @@ -2534,11 +2534,11 @@ void test() { // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12:![0-9]+]] // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[ISTART:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK3-NEXT: [[ISTART:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) // CHECK3-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* -// CHECK3-NEXT: [[IEND:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK3-NEXT: [[IEND:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) // CHECK3-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* -// CHECK3-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) +// CHECK3-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 8) // CHECK3-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex"* // CHECK3-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR5]] @@ -2591,7 +2591,7 @@ void test() { // CHECK3-NEXT: [[TMP15:%.*]] = bitcast float* [[REF_TMP2]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR5]] // CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA14]] -// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR11:[0-9]+]] +// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR10:[0-9]+]] // CHECK3-NEXT: [[TMP16:%.*]] = bitcast float* [[REF_TMP2]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP17:%.*]] = bitcast float* [[REF_TMP]] to i8* @@ -2656,7 +2656,7 @@ void test() { // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 -// CHECK3-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR11]] +// CHECK3-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR10]] // CHECK3-NEXT: ret void // // @@ -2746,7 +2746,7 @@ void test() { // CHECK3-NEXT: [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR5]] // CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA14]] -// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR11]] +// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR10]] // CHECK3-NEXT: [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8* @@ -2808,8 +2808,8 @@ void test() { // CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float // CHECK3-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA14]] -// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR11]] -// CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR11]] +// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR10]] +// CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR10]] // CHECK3-NEXT: [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8* @@ -2850,7 +2850,7 @@ void test() { // CHECK3-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK3-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR11]] +// CHECK3-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR10]] // CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: @@ -2888,13 +2888,13 @@ void test() { // CHECK3-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR11]] +// CHECK3-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR10]] // CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] // CHECK3-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] // CHECK3-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] // CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR11]] +// CHECK3-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR10]] // CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] // CHECK3-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] @@ -2929,49 +2929,49 @@ void test() { // CHECK3-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex"* [[TMP12]] to i64* // CHECK3-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 4 -// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP18:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK3-NEXT: [[TMP19:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP18]]) -// CHECK3-NEXT: store i64 [[TMP19]], i64* [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 -// CHECK3-NEXT: [[TMP22:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK3-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[TMP23:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP26:%.*]] = and i1 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: [[TMP27:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK3-NEXT: [[TMP28:%.*]] = and i16 [[TMP6]], 1 -// CHECK3-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP28]], 0 -// CHECK3-NEXT: [[TMP30:%.*]] = and i1 [[TMP27]], [[TMP29]] -// CHECK3-NEXT: [[TMP31:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK3-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] -// CHECK3-NEXT: [[TMP33:%.*]] = or i1 [[TMP23]], [[TMP26]] -// CHECK3-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[TMP32]] -// CHECK3-NEXT: br i1 [[TMP34]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK3-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 +// CHECK3-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) +// CHECK3-NEXT: store i64 [[TMP20]], i64* [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK3-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK3-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0 +// CHECK3-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]] +// CHECK3-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]] +// CHECK3-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] +// CHECK3-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: -// CHECK3-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* -// CHECK3-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR5]] +// CHECK3-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* +// CHECK3-NEXT: [[TMP37:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP36]], i8* [[TMP37]]) #[[ATTR5]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: -// CHECK3-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK3-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK3-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] +// CHECK3-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK3: then4: -// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP41:%.*]] = load i8*, i8** [[TMP40]], align 8 -// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 -// CHECK3-NEXT: [[TMP44:%.*]] = bitcast i8* [[TMP41]] to %"class.std::complex"* -// CHECK3-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP43]] to %"class.std::complex"* -// CHECK3-NEXT: [[TMP46:%.*]] = bitcast %"class.std::complex"* [[TMP45]] to i8* -// CHECK3-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[TMP44]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 8, i1 false), !tbaa.struct !21 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 8 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP44:%.*]] = load i8*, i8** [[TMP43]], align 8 +// CHECK3-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %"class.std::complex"* +// CHECK3-NEXT: [[TMP46:%.*]] = bitcast i8* [[TMP44]] to %"class.std::complex"* +// CHECK3-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[TMP46]] to i8* +// CHECK3-NEXT: [[TMP48:%.*]] = bitcast %"class.std::complex"* [[TMP45]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 8, i1 false), !tbaa.struct !21 // CHECK3-NEXT: br label [[IFCONT6:%.*]] // CHECK3: else5: // CHECK3-NEXT: br label [[IFCONT6]] @@ -2988,53 +2988,53 @@ void test() { // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] -// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK3-NEXT: [[NVPTX_TID2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK3-NEXT: [[NVPTX_TID3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 +// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* // CHECK3-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[PRECOND:%.*]] // CHECK3: precond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] -// CHECK3-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 2 -// CHECK3-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 +// CHECK3-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK3: body: // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) // CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] -// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] -// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] +// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK3: then2: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] -// CHECK3-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4, !tbaa [[TBAA8]] -// CHECK3-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !tbaa [[TBAA8]] -// CHECK3-NEXT: br label [[IFCONT6:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] +// CHECK3-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: br label [[IFCONT4:%.*]] // CHECK3: else3: -// CHECK3-NEXT: br label [[IFCONT6]] +// CHECK3-NEXT: br label [[IFCONT4]] // CHECK3: ifcont4: -// CHECK3-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[PRECOND]] // CHECK3: exit: // CHECK3-NEXT: ret void @@ -3101,11 +3101,11 @@ void test() { // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[ISTART:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK3-NEXT: [[ISTART:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) // CHECK3-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* -// CHECK3-NEXT: [[IEND:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK3-NEXT: [[IEND:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) // CHECK3-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* -// CHECK3-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 16) +// CHECK3-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 16) // CHECK3-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex.0"* // CHECK3-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR5]] @@ -3158,7 +3158,7 @@ void test() { // CHECK3-NEXT: [[TMP15:%.*]] = bitcast double* [[REF_TMP2]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR5]] // CHECK3-NEXT: store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]] -// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR11]] +// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR10]] // CHECK3-NEXT: [[TMP16:%.*]] = bitcast double* [[REF_TMP2]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP17:%.*]] = bitcast double* [[REF_TMP]] to i8* @@ -3223,7 +3223,7 @@ void test() { // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8 -// CHECK3-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR11]] +// CHECK3-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR10]] // CHECK3-NEXT: ret void // // @@ -3313,7 +3313,7 @@ void test() { // CHECK3-NEXT: [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR5]] // CHECK3-NEXT: store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]] -// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR11]] +// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR10]] // CHECK3-NEXT: [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8* @@ -3375,8 +3375,8 @@ void test() { // CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double // CHECK3-NEXT: store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]] -// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR11]] -// CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR11]] +// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR10]] +// CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR10]] // CHECK3-NEXT: [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8* @@ -3417,7 +3417,7 @@ void test() { // CHECK3-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK3-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR11]] +// CHECK3-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR10]] // CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: @@ -3455,13 +3455,13 @@ void test() { // CHECK3-NEXT: store %"class.std::complex.0"* [[__C]], %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR11]] +// CHECK3-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR10]] // CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] // CHECK3-NEXT: store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] // CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR11]] +// CHECK3-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR10]] // CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] // CHECK3-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] @@ -3497,8 +3497,8 @@ void test() { // CHECK3-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK3-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] // CHECK3: .shuffle.pre_cond: -// CHECK3-NEXT: [[TMP17:%.*]] = phi i64* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[TMP28:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] -// CHECK3-NEXT: [[TMP18:%.*]] = phi i64* [ [[TMP16]], [[ENTRY]] ], [ [[TMP29:%.*]], [[DOTSHUFFLE_THEN]] ] +// CHECK3-NEXT: [[TMP17:%.*]] = phi i64* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[TMP29:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] +// CHECK3-NEXT: [[TMP18:%.*]] = phi i64* [ [[TMP16]], [[ENTRY]] ], [ [[TMP30:%.*]], [[DOTSHUFFLE_THEN]] ] // CHECK3-NEXT: [[TMP19:%.*]] = bitcast i64* [[TMP17]] to i8* // CHECK3-NEXT: [[TMP20:%.*]] = ptrtoint i8* [[TMP14]] to i64 // CHECK3-NEXT: [[TMP21:%.*]] = ptrtoint i8* [[TMP19]] to i64 @@ -3508,51 +3508,51 @@ void test() { // CHECK3-NEXT: br i1 [[TMP24]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] // CHECK3: .shuffle.then: // CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[TMP17]], align 8 -// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP26:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK3-NEXT: [[TMP27:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP25]], i16 [[TMP7]], i16 [[TMP26]]) -// CHECK3-NEXT: store i64 [[TMP27]], i64* [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP28]] = getelementptr i64, i64* [[TMP17]], i64 1 -// CHECK3-NEXT: [[TMP29]] = getelementptr i64, i64* [[TMP18]], i64 1 +// CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK3-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 +// CHECK3-NEXT: [[TMP28:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP25]], i16 [[TMP7]], i16 [[TMP27]]) +// CHECK3-NEXT: store i64 [[TMP28]], i64* [[TMP18]], align 8 +// CHECK3-NEXT: [[TMP29]] = getelementptr i64, i64* [[TMP17]], i64 1 +// CHECK3-NEXT: [[TMP30]] = getelementptr i64, i64* [[TMP18]], i64 1 // CHECK3-NEXT: br label [[DOTSHUFFLE_PRE_COND]] // CHECK3: .shuffle.exit: -// CHECK3-NEXT: [[TMP30:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK3-NEXT: store i8* [[TMP30]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK3-NEXT: [[TMP32:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP33:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] -// CHECK3-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK3-NEXT: [[TMP36:%.*]] = and i16 [[TMP6]], 1 -// CHECK3-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP36]], 0 -// CHECK3-NEXT: [[TMP38:%.*]] = and i1 [[TMP35]], [[TMP37]] -// CHECK3-NEXT: [[TMP39:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK3-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK3-NEXT: [[TMP41:%.*]] = or i1 [[TMP31]], [[TMP34]] -// CHECK3-NEXT: [[TMP42:%.*]] = or i1 [[TMP41]], [[TMP40]] -// CHECK3-NEXT: br i1 [[TMP42]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK3-NEXT: [[TMP31:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK3-NEXT: store i8* [[TMP31]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP32:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP34:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP35:%.*]] = and i1 [[TMP33]], [[TMP34]] +// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK3-NEXT: [[TMP37:%.*]] = and i16 [[TMP6]], 1 +// CHECK3-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP37]], 0 +// CHECK3-NEXT: [[TMP39:%.*]] = and i1 [[TMP36]], [[TMP38]] +// CHECK3-NEXT: [[TMP40:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP41:%.*]] = and i1 [[TMP39]], [[TMP40]] +// CHECK3-NEXT: [[TMP42:%.*]] = or i1 [[TMP32]], [[TMP35]] +// CHECK3-NEXT: [[TMP43:%.*]] = or i1 [[TMP42]], [[TMP41]] +// CHECK3-NEXT: br i1 [[TMP43]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: -// CHECK3-NEXT: [[TMP43:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* -// CHECK3-NEXT: [[TMP44:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP43]], i8* [[TMP44]]) #[[ATTR5]] +// CHECK3-NEXT: [[TMP44:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* +// CHECK3-NEXT: [[TMP45:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP44]], i8* [[TMP45]]) #[[ATTR5]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: -// CHECK3-NEXT: [[TMP45:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP46:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP47:%.*]] = and i1 [[TMP45]], [[TMP46]] -// CHECK3-NEXT: br i1 [[TMP47]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK3-NEXT: [[TMP46:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP47:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP48:%.*]] = and i1 [[TMP46]], [[TMP47]] +// CHECK3-NEXT: br i1 [[TMP48]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK3: then4: -// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP49:%.*]] = load i8*, i8** [[TMP48]], align 8 -// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP51:%.*]] = load i8*, i8** [[TMP50]], align 8 -// CHECK3-NEXT: [[TMP52:%.*]] = bitcast i8* [[TMP49]] to %"class.std::complex.0"* -// CHECK3-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP51]] to %"class.std::complex.0"* -// CHECK3-NEXT: [[TMP54:%.*]] = bitcast %"class.std::complex.0"* [[TMP53]] to i8* -// CHECK3-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.0"* [[TMP52]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP54]], i8* align 8 [[TMP55]], i64 16, i1 false), !tbaa.struct !27 +// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP50:%.*]] = load i8*, i8** [[TMP49]], align 8 +// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 8 +// CHECK3-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP50]] to %"class.std::complex.0"* +// CHECK3-NEXT: [[TMP54:%.*]] = bitcast i8* [[TMP52]] to %"class.std::complex.0"* +// CHECK3-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.0"* [[TMP54]] to i8* +// CHECK3-NEXT: [[TMP56:%.*]] = bitcast %"class.std::complex.0"* [[TMP53]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP55]], i8* align 8 [[TMP56]], i64 16, i1 false), !tbaa.struct !27 // CHECK3-NEXT: br label [[IFCONT6:%.*]] // CHECK3: else5: // CHECK3-NEXT: br label [[IFCONT6]] @@ -3569,53 +3569,53 @@ void test() { // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] -// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK3-NEXT: [[NVPTX_TID2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK3-NEXT: [[NVPTX_TID3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 +// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* // CHECK3-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[PRECOND:%.*]] // CHECK3: precond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] -// CHECK3-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 4 -// CHECK3-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 4 +// CHECK3-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK3: body: // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] -// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] -// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] +// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK3: then2: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] -// CHECK3-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4, !tbaa [[TBAA8]] -// CHECK3-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !tbaa [[TBAA8]] -// CHECK3-NEXT: br label [[IFCONT6:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] +// CHECK3-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: br label [[IFCONT4:%.*]] // CHECK3: else3: -// CHECK3-NEXT: br label [[IFCONT6]] +// CHECK3-NEXT: br label [[IFCONT4]] // CHECK3: ifcont4: -// CHECK3-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[PRECOND]] // CHECK3: exit: // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp index 6bf279444adb..272cf8ec6777 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp @@ -607,7 +607,7 @@ int bar(int n){ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[I:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[I:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[I_ON_STACK:%.*]] = bitcast i8* [[I]] to i32* // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 @@ -729,7 +729,7 @@ int bar(int n){ // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: [[I:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[I:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: [[I_ON_STACK:%.*]] = bitcast i8* [[I]] to i32* // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 @@ -851,7 +851,7 @@ int bar(int n){ // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: [[I:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK3-NEXT: [[I:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK3-NEXT: [[I_ON_STACK:%.*]] = bitcast i8* [[I]] to i32* // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp index 0cd88fb9b12c..37f003abdb35 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -18534,7 +18534,7 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK1-NEXT: [[L2:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[L2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L2]] to i32* // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -20128,7 +20128,7 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK2-NEXT: [[L2:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK2-NEXT: [[L2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) // CHECK2-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L2]] to i32* // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -21711,7 +21711,7 @@ int bar(int n){ // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[L1:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK3-NEXT: [[L1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK3-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L1]] to i32* // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -23245,7 +23245,7 @@ int bar(int n){ // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK4-NEXT: [[L1:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK4-NEXT: [[L1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK4-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L1]] to i32* // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp index 0339a6d21899..a3b5248ba8de 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -9447,7 +9447,7 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK1-NEXT: [[L2:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[L2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L2]] to i32* // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -10525,7 +10525,7 @@ int bar(int n){ // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[L1:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[L1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L1]] to i32* // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -11563,7 +11563,7 @@ int bar(int n){ // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[L1:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK3-NEXT: [[L1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK3-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L1]] to i32* // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp index 52d94ab9b682..8b51f6ed0949 100644 --- a/clang/test/OpenMP/nvptx_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -903,7 +903,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -943,7 +943,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[ARGC1:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) // CHECK1-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8*** // CHECK1-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -983,7 +983,7 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: [[ARGC1:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -1023,7 +1023,7 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: [[ARGC1:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8*** // CHECK2-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -1070,7 +1070,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK3-NEXT: [[ARGC3:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK3-NEXT: [[ARGC3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) // CHECK3-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC3]] to i32* // CHECK3-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -1116,7 +1116,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[ARGC2:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) +// CHECK3-NEXT: [[ARGC2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) // CHECK3-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC2]] to i8*** // CHECK3-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -1160,7 +1160,7 @@ int main (int argc, char **argv) { // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[ARGC1:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK4-NEXT: [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK4-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* // CHECK4-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -1204,7 +1204,7 @@ int main (int argc, char **argv) { // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: // CHECK4-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[ARGC1:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK4-NEXT: [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK4-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8*** // CHECK4-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp index f584b2b4676c..08471750e425 100644 --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -4219,7 +4219,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[CONV]], align 8 -// CHECK1-NEXT: [[E1:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[E1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) // CHECK1-NEXT: [[E_ON_STACK:%.*]] = bitcast i8* [[E1]] to double* // CHECK1-NEXT: store double [[TMP1]], double* [[E_ON_STACK]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -4244,7 +4244,7 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store double* [[E]], double** [[E_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 8 -// CHECK1-NEXT: [[E1:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[E1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) // CHECK1-NEXT: [[E_ON_STACK:%.*]] = bitcast i8* [[E1]] to double* // CHECK1-NEXT: store double 0.000000e+00, double* [[E_ON_STACK]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[E_ON_STACK]], align 8 @@ -4521,10 +4521,10 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK1-NEXT: [[C2:%.*]] = call i8* @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[C2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) // CHECK1-NEXT: store i8 [[TMP1]], i8* [[C2]], align 1 // CHECK1-NEXT: [[TMP2:%.*]] = load float, float* [[CONV1]], align 4 -// CHECK1-NEXT: [[D3:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[D3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D3]] to float* // CHECK1-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -4553,8 +4553,8 @@ int bar(int n){ // CHECK1-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[C1:%.*]] = call i8* @__kmpc_alloc_shared(i64 1) -// CHECK1-NEXT: [[D2:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[C1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[D2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D2]] to float* // CHECK1-NEXT: store i8 0, i8* [[C1]], align 1 // CHECK1-NEXT: store float 1.000000e+00, float* [[D_ON_STACK]], align 4 @@ -5563,7 +5563,7 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK2-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK2-NEXT: [[E1:%.*]] = call i8* @__kmpc_alloc_shared(i32 8) +// CHECK2-NEXT: [[E1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) // CHECK2-NEXT: [[E_ON_STACK:%.*]] = bitcast i8* [[E1]] to double* // CHECK2-NEXT: store double 0.000000e+00, double* [[E_ON_STACK]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load double, double* [[E_ON_STACK]], align 8 @@ -5840,10 +5840,10 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK2-NEXT: [[C2:%.*]] = call i8* @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[C2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) // CHECK2-NEXT: store i8 [[TMP1]], i8* [[C2]], align 1 // CHECK2-NEXT: [[TMP2:%.*]] = load float, float* [[CONV1]], align 4 -// CHECK2-NEXT: [[D3:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[D3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D3]] to float* // CHECK2-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -5872,8 +5872,8 @@ int bar(int n){ // CHECK2-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 -// CHECK2-NEXT: [[C1:%.*]] = call i8* @__kmpc_alloc_shared(i32 1) -// CHECK2-NEXT: [[D2:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[C1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[D2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D2]] to float* // CHECK2-NEXT: store i8 0, i8* [[C1]], align 1 // CHECK2-NEXT: store float 1.000000e+00, float* [[D_ON_STACK]], align 4 @@ -6881,7 +6881,7 @@ int bar(int n){ // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK3-NEXT: [[E1:%.*]] = call i8* @__kmpc_alloc_shared(i32 8) +// CHECK3-NEXT: [[E1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) // CHECK3-NEXT: [[E_ON_STACK:%.*]] = bitcast i8* [[E1]] to double* // CHECK3-NEXT: store double 0.000000e+00, double* [[E_ON_STACK]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = load double, double* [[E_ON_STACK]], align 8 @@ -7158,10 +7158,10 @@ int bar(int n){ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[C2:%.*]] = call i8* @__kmpc_alloc_shared(i32 1) +// CHECK3-NEXT: [[C2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) // CHECK3-NEXT: store i8 [[TMP1]], i8* [[C2]], align 1 // CHECK3-NEXT: [[TMP2:%.*]] = load float, float* [[CONV1]], align 4 -// CHECK3-NEXT: [[D3:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK3-NEXT: [[D3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK3-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D3]] to float* // CHECK3-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -7190,8 +7190,8 @@ int bar(int n){ // CHECK3-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[C1:%.*]] = call i8* @__kmpc_alloc_shared(i32 1) -// CHECK3-NEXT: [[D2:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK3-NEXT: [[C1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK3-NEXT: [[D2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK3-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D2]] to float* // CHECK3-NEXT: store i8 0, i8* [[C1]], align 1 // CHECK3-NEXT: store float 1.000000e+00, float* [[D_ON_STACK]], align 4 diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp index d6ae00b1850d..ee6295fdcadc 100644 --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -26,7 +26,9 @@ using namespace _OMP; ///{ /// Add worst-case padding so that future allocations are properly aligned. -constexpr const uint32_t Alignment = 8; +/// FIXME: The stack shouldn't require worst-case padding. Alignment needs to be +/// passed in as an argument and the stack rewritten to support it. +constexpr const uint32_t Alignment = 16; /// External symbol to access dynamic shared memory. extern unsigned char DynamicSharedBuffer[] __attribute__((aligned(Alignment))); From 6e220296d7d13b8255c2c26baf59f5433a135475 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 27 Dec 2021 15:02:32 -0500 Subject: [PATCH 094/992] [OpenMP] Use alignment information in HeapToShared This patch uses the return alignment attribute now present in the `__kmpc_alloc_shared` runtime call to set the alignment of the shared memory global created to replace it. Depends on D115971 Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D116319 --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 5 ++- .../OpenMP/replace_globalization.ll | 36 +++++++++---------- llvm/test/Transforms/OpenMP/spmdization.ll | 30 ++++++++-------- 3 files changed, 37 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index f289e3ecc979..c24ca90ce1ba 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2786,7 +2786,10 @@ struct AAHeapToSharedFunction : public AAHeapToShared { }; A.emitRemark(CB, "OMP111", Remark); - SharedMem->setAlignment(MaybeAlign(32)); + MaybeAlign Alignment = CB->getRetAlign(); + assert(Alignment && + "HeapToShared on allocation without alignment attribute"); + SharedMem->setAlignment(MaybeAlign(Alignment)); A.changeValueAfterManifest(*CB, *NewBuffer); A.deleteAfterManifest(*CB); diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll index d1a04de1f2f6..56dd9dd20893 100644 --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -20,7 +20,7 @@ target triple = "nvptx64" define dso_local void @foo() { entry: %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) - %x = call i8* @__kmpc_alloc_shared(i64 4) + %x = call align 4 i8* @__kmpc_alloc_shared(i64 4) call void @unknown_no_openmp() %x_on_stack = bitcast i8* %x to i32* %0 = bitcast i32* %x_on_stack to i8* @@ -36,7 +36,7 @@ define void @bar() { %cmp = icmp eq i32 %c, -1 br i1 %cmp, label %master1, label %exit master1: - %x = call i8* @__kmpc_alloc_shared(i64 16), !dbg !11 + %x = call align 4 i8* @__kmpc_alloc_shared(i64 16), !dbg !11 %x_on_stack = bitcast i8* %x to [4 x i32]* %a0 = bitcast [4 x i32]* %x_on_stack to i8* call void @use(i8* %a0) @@ -47,7 +47,7 @@ next: %b0 = icmp eq i32 %c, -1 br i1 %b0, label %master2, label %exit master2: - %y = call i8* @__kmpc_alloc_shared(i64 4), !dbg !12 + %y = call align 4 i8* @__kmpc_alloc_shared(i64 4), !dbg !12 %y_on_stack = bitcast i8* %y to [4 x i32]* %b1 = bitcast [4 x i32]* %y_on_stack to i8* call void @use(i8* %b1) @@ -64,7 +64,7 @@ define void @baz_spmd() { %c0 = icmp eq i32 %c, -1 br i1 %c0, label %master3, label %exit master3: - %z = call i8* @__kmpc_alloc_shared(i64 24), !dbg !12 + %z = call align 4 i8* @__kmpc_alloc_shared(i64 24), !dbg !12 %z_on_stack = bitcast i8* %z to [6 x i32]* %c1 = bitcast [6 x i32]* %z_on_stack to i8* call void @use(i8* %c1) @@ -119,15 +119,15 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8* ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [113 x i8] c" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 -; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [16 x i8] undef, align 32 -; CHECK: @[[Y:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32 +; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [16 x i8] undef, align 4 +; CHECK: @[[Y:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ;. ; CHECK-LABEL: define {{[^@]+}}@foo() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) -; CHECK-NEXT: [[X:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR5:[0-9]+]] -; CHECK-NEXT: call void @unknown_no_openmp() -; CHECK-NEXT: call void @use.internalized(i8* nofree writeonly [[X]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 4) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR4:[0-9]+]] +; CHECK-NEXT: call void @use.internalized(i8* nofree writeonly align 4 [[X]]) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR5]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; CHECK-NEXT: ret void @@ -135,17 +135,17 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; ; CHECK-LABEL: define {{[^@]+}}@bar() { ; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) -; CHECK-NEXT: call void @unknown_no_openmp() +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR4]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]] ; CHECK: master1: -; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*)) #[[ATTR6]] +; CHECK-NEXT: call void @use.internalized(i8* nofree align 4 addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*)) #[[ATTR6]] ; CHECK-NEXT: br label [[NEXT:%.*]] ; CHECK: next: -; CHECK-NEXT: call void @unknown_no_openmp() +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR4]] ; CHECK-NEXT: br label [[MASTER2:%.*]] ; CHECK: master2: -; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y, i32 0, i32 0) to i8*)) #[[ATTR6]] +; CHECK-NEXT: call void @use.internalized(i8* nofree align 4 addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y, i32 0, i32 0) to i8*)) #[[ATTR6]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -154,12 +154,12 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; ; CHECK-LABEL: define {{[^@]+}}@baz_spmd() { ; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 true, i1 true) -; CHECK-NEXT: call void @unknown_no_openmp() +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR4]] ; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]] ; CHECK: master3: -; CHECK-NEXT: [[Z:%.*]] = call i8* @__kmpc_alloc_shared(i64 24) #[[ATTR5]], !dbg [[DBG9:![0-9]+]] -; CHECK-NEXT: call void @use.internalized(i8* nofree [[Z]]) #[[ATTR6]] +; CHECK-NEXT: [[Z:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 24) #[[ATTR5]], !dbg [[DBG9:![0-9]+]] +; CHECK-NEXT: call void @use.internalized(i8* nofree align 4 [[Z]]) #[[ATTR6]] ; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR5]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -169,7 +169,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; ; CHECK: Function Attrs: nofree nosync nounwind willreturn writeonly ; CHECK-LABEL: define {{[^@]+}}@use.internalized -; CHECK-SAME: (i8* nofree writeonly [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (i8* nofree writeonly align 4 [[X:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: store i8* [[X]], i8** @S, align 8 ; CHECK-NEXT: ret void @@ -186,7 +186,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nosync nounwind } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind readnone speculatable } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nofree nosync nounwind readnone speculatable willreturn } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } +; CHECK: attributes #[[ATTR4]] = { "llvm.assume"="omp_no_openmp" } ; CHECK: attributes #[[ATTR5]] = { nounwind } ; CHECK: attributes #[[ATTR6]] = { nounwind writeonly } ;. diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll index 95fc0deb7418..e686dd6f09f4 100644 --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -113,8 +113,8 @@ ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" -; AMDGPU: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32 -; AMDGPU: @[[X_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32 +; AMDGPU: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 +; AMDGPU: @[[X_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; AMDGPU: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ;. ; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" @@ -126,8 +126,8 @@ ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" -; NVPTX: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32 -; NVPTX: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32 +; NVPTX: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 +; NVPTX: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; NVPTX: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ;. ; AMDGPU-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" @@ -139,8 +139,8 @@ ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" -; AMDGPU-DISABLED: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32 -; AMDGPU-DISABLED: @[[X_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32 +; AMDGPU-DISABLED: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 +; AMDGPU-DISABLED: @[[X_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef @@ -156,8 +156,8 @@ ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" -; NVPTX-DISABLED: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32 -; NVPTX-DISABLED: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32 +; NVPTX-DISABLED: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 +; NVPTX-DISABLED: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; NVPTX-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ; NVPTX-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ; NVPTX-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef @@ -688,7 +688,7 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 +; AMDGPU-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4 ; AMDGPU-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* ; AMDGPU-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] @@ -710,7 +710,7 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 +; NVPTX-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4 ; NVPTX-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* ; NVPTX-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] ; NVPTX-NEXT: br label [[FOR_COND:%.*]] @@ -732,7 +732,7 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4 ; AMDGPU-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* ; AMDGPU-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] @@ -754,7 +754,7 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4 ; NVPTX-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* ; NVPTX-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] ; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] @@ -774,7 +774,7 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; entry: %captured_vars_addrs = alloca [0 x i8*], align 8 - %x = call i8* @__kmpc_alloc_shared(i64 4) + %x = call align 4 i8* @__kmpc_alloc_shared(i64 4) %x_on_stack = bitcast i8* %x to i32* call void @use(i32* nocapture %x_on_stack) #10 br label %for.cond @@ -1141,7 +1141,7 @@ define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias ; entry: %captured_vars_addrs = alloca [1 x i8*], align 8 - %x = call i8* @__kmpc_alloc_shared(i64 4) + %x = call align 4 i8* @__kmpc_alloc_shared(i64 4) %x_on_stack = bitcast i8* %x to i32* br label %for.cond @@ -1573,7 +1573,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias ; entry: %captured_vars_addrs = alloca [1 x i8*], align 8 - %x = call i8* @__kmpc_alloc_shared(i64 4) + %x = call align 4 i8* @__kmpc_alloc_shared(i64 4) %x_on_stack = bitcast i8* %x to i32* store i32 42, i32* %x_on_stack, align 4, !tbaa !18 br label %for.cond From 5c38b584e55904ddabc0ba887f7bdc1dad5e71be Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 27 Dec 2021 14:44:34 -0800 Subject: [PATCH 095/992] [RISCV] Rename two tests to reflect extension being tested. NFC --- ...v64f-half-convert-strict.ll => rv64zfh-half-convert-strict.ll} | 0 .../RISCV/{rv64f-half-convert.ll => rv64zfh-half-convert.ll} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename llvm/test/CodeGen/RISCV/{rv64f-half-convert-strict.ll => rv64zfh-half-convert-strict.ll} (100%) rename llvm/test/CodeGen/RISCV/{rv64f-half-convert.ll => rv64zfh-half-convert.ll} (100%) diff --git a/llvm/test/CodeGen/RISCV/rv64f-half-convert-strict.ll b/llvm/test/CodeGen/RISCV/rv64zfh-half-convert-strict.ll similarity index 100% rename from llvm/test/CodeGen/RISCV/rv64f-half-convert-strict.ll rename to llvm/test/CodeGen/RISCV/rv64zfh-half-convert-strict.ll diff --git a/llvm/test/CodeGen/RISCV/rv64f-half-convert.ll b/llvm/test/CodeGen/RISCV/rv64zfh-half-convert.ll similarity index 100% rename from llvm/test/CodeGen/RISCV/rv64f-half-convert.ll rename to llvm/test/CodeGen/RISCV/rv64zfh-half-convert.ll From 10129fe86102a1178a0f4f0dcdf861c568445900 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 27 Dec 2021 14:59:21 -0800 Subject: [PATCH 096/992] [RISCV] Fix type in f16 and f64 version of lrint/llrint/lround/llround test cases. NFC Due to a copy/paste mistake we were always testing float. This required splitting up the f16 tests into separate files since we don't have an appropriate libcall to use when the types involved aren't legal. --- llvm/test/CodeGen/RISCV/double-intrinsics.ll | 76 +++---- llvm/test/CodeGen/RISCV/half-intrinsics.ll | 192 ------------------ .../CodeGen/RISCV/rv64zfh-half-intrinsics.ll | 41 ++++ .../test/CodeGen/RISCV/zfh-half-intrinsics.ll | 67 ++++++ 4 files changed, 150 insertions(+), 226 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rv64zfh-half-intrinsics.ll create mode 100644 llvm/test/CodeGen/RISCV/zfh-half-intrinsics.ll diff --git a/llvm/test/CodeGen/RISCV/double-intrinsics.ll b/llvm/test/CodeGen/RISCV/double-intrinsics.ll index 553e8fbeec37..5b1be180c1b8 100644 --- a/llvm/test/CodeGen/RISCV/double-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/double-intrinsics.ll @@ -1146,26 +1146,30 @@ define double @roundeven_f64(double %a) nounwind { ret double %1 } -declare iXLen @llvm.lrint.iXLen.f64(float) +declare iXLen @llvm.lrint.iXLen.f64(double) -define iXLen @lrint_f64(float %a) nounwind { +define iXLen @lrint_f64(double %a) nounwind { ; RV32IFD-LABEL: lrint_f64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: fmv.w.x ft0, a0 -; RV32IFD-NEXT: fcvt.w.s a0, ft0 +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fcvt.w.d a0, ft0 +; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: lrint_f64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.w.x ft0, a0 -; RV64IFD-NEXT: fcvt.l.s a0, ft0 +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.l.d a0, ft0 ; RV64IFD-NEXT: ret ; ; RV32I-LABEL: lrint_f64: ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call lrintf@plt +; RV32I-NEXT: call lrint@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret @@ -1174,34 +1178,38 @@ define iXLen @lrint_f64(float %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call lrintf@plt +; RV64I-NEXT: call lrint@plt ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret - %1 = call iXLen @llvm.lrint.iXLen.f64(float %a) + %1 = call iXLen @llvm.lrint.iXLen.f64(double %a) ret iXLen %1 } -declare iXLen @llvm.lround.iXLen.f64(float) +declare iXLen @llvm.lround.iXLen.f64(double) -define iXLen @lround_f64(float %a) nounwind { +define iXLen @lround_f64(double %a) nounwind { ; RV32IFD-LABEL: lround_f64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: fmv.w.x ft0, a0 -; RV32IFD-NEXT: fcvt.w.s a0, ft0, rmm +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fcvt.w.d a0, ft0, rmm +; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: lround_f64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.w.x ft0, a0 -; RV64IFD-NEXT: fcvt.l.s a0, ft0, rmm +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.l.d a0, ft0, rmm ; RV64IFD-NEXT: ret ; ; RV32I-LABEL: lround_f64: ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call lroundf@plt +; RV32I-NEXT: call lround@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret @@ -1210,37 +1218,37 @@ define iXLen @lround_f64(float %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call lroundf@plt +; RV64I-NEXT: call lround@plt ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret - %1 = call iXLen @llvm.lround.iXLen.f64(float %a) + %1 = call iXLen @llvm.lround.iXLen.f64(double %a) ret iXLen %1 } -declare i64 @llvm.llrint.i64.f64(float) +declare i64 @llvm.llrint.i64.f64(double) -define i64 @llrint_f64(float %a) nounwind { +define i64 @llrint_f64(double %a) nounwind { ; RV32IFD-LABEL: llrint_f64: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: addi sp, sp, -16 ; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: call llrintf@plt +; RV32IFD-NEXT: call llrint@plt ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: llrint_f64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.w.x ft0, a0 -; RV64IFD-NEXT: fcvt.l.s a0, ft0 +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.l.d a0, ft0 ; RV64IFD-NEXT: ret ; ; RV32I-LABEL: llrint_f64: ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call llrintf@plt +; RV32I-NEXT: call llrint@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret @@ -1249,37 +1257,37 @@ define i64 @llrint_f64(float %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call llrintf@plt +; RV64I-NEXT: call llrint@plt ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret - %1 = call i64 @llvm.llrint.i64.f64(float %a) + %1 = call i64 @llvm.llrint.i64.f64(double %a) ret i64 %1 } -declare i64 @llvm.llround.i64.f64(float) +declare i64 @llvm.llround.i64.f64(double) -define i64 @llround_f64(float %a) nounwind { +define i64 @llround_f64(double %a) nounwind { ; RV32IFD-LABEL: llround_f64: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: addi sp, sp, -16 ; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: call llroundf@plt +; RV32IFD-NEXT: call llround@plt ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: llround_f64: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.w.x ft0, a0 -; RV64IFD-NEXT: fcvt.l.s a0, ft0, rmm +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.l.d a0, ft0, rmm ; RV64IFD-NEXT: ret ; ; RV32I-LABEL: llround_f64: ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call llroundf@plt +; RV32I-NEXT: call llround@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret @@ -1288,10 +1296,10 @@ define i64 @llround_f64(float %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call llroundf@plt +; RV64I-NEXT: call llround@plt ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret - %1 = call i64 @llvm.llround.i64.f64(float %a) + %1 = call i64 @llvm.llround.i64.f64(double %a) ret i64 %1 } diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll index 128c61947cb0..88d05e6ad9eb 100644 --- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll @@ -1967,195 +1967,3 @@ define half @roundeven_f16(half %a) nounwind { %1 = call half @llvm.roundeven.f16(half %a) ret half %1 } - -declare iXLen @llvm.lrint.iXLen.f16(float) - -define iXLen @lrint_f16(float %a) nounwind { -; RV32IZFH-LABEL: lrint_f16: -; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: fcvt.w.s a0, fa0 -; RV32IZFH-NEXT: ret -; -; RV64IZFH-LABEL: lrint_f16: -; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: fcvt.l.s a0, fa0 -; RV64IZFH-NEXT: ret -; -; RV32IDZFH-LABEL: lrint_f16: -; RV32IDZFH: # %bb.0: -; RV32IDZFH-NEXT: fcvt.w.s a0, fa0 -; RV32IDZFH-NEXT: ret -; -; RV64IDZFH-LABEL: lrint_f16: -; RV64IDZFH: # %bb.0: -; RV64IDZFH-NEXT: fcvt.l.s a0, fa0 -; RV64IDZFH-NEXT: ret -; -; RV32I-LABEL: lrint_f16: -; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call lrintf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret -; -; RV64I-LABEL: lrint_f16: -; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call lrintf@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret - %1 = call iXLen @llvm.lrint.iXLen.f16(float %a) - ret iXLen %1 -} - -declare iXLen @llvm.lround.iXLen.f16(float) - -define iXLen @lround_f16(float %a) nounwind { -; RV32IZFH-LABEL: lround_f16: -; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: fcvt.w.s a0, fa0, rmm -; RV32IZFH-NEXT: ret -; -; RV64IZFH-LABEL: lround_f16: -; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: fcvt.l.s a0, fa0, rmm -; RV64IZFH-NEXT: ret -; -; RV32IDZFH-LABEL: lround_f16: -; RV32IDZFH: # %bb.0: -; RV32IDZFH-NEXT: fcvt.w.s a0, fa0, rmm -; RV32IDZFH-NEXT: ret -; -; RV64IDZFH-LABEL: lround_f16: -; RV64IDZFH: # %bb.0: -; RV64IDZFH-NEXT: fcvt.l.s a0, fa0, rmm -; RV64IDZFH-NEXT: ret -; -; RV32I-LABEL: lround_f16: -; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call lroundf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret -; -; RV64I-LABEL: lround_f16: -; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call lroundf@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret - %1 = call iXLen @llvm.lround.iXLen.f16(float %a) - ret iXLen %1 -} - -declare i64 @llvm.llrint.i64.f16(float) - -define i64 @llrint_f16(float %a) nounwind { -; RV32IZFH-LABEL: llrint_f16: -; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: call llrintf@plt -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: ret -; -; RV64IZFH-LABEL: llrint_f16: -; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: fcvt.l.s a0, fa0 -; RV64IZFH-NEXT: ret -; -; RV32IDZFH-LABEL: llrint_f16: -; RV32IDZFH: # %bb.0: -; RV32IDZFH-NEXT: addi sp, sp, -16 -; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IDZFH-NEXT: call llrintf@plt -; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IDZFH-NEXT: addi sp, sp, 16 -; RV32IDZFH-NEXT: ret -; -; RV64IDZFH-LABEL: llrint_f16: -; RV64IDZFH: # %bb.0: -; RV64IDZFH-NEXT: fcvt.l.s a0, fa0 -; RV64IDZFH-NEXT: ret -; -; RV32I-LABEL: llrint_f16: -; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call llrintf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret -; -; RV64I-LABEL: llrint_f16: -; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call llrintf@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret - %1 = call i64 @llvm.llrint.i64.f16(float %a) - ret i64 %1 -} - -declare i64 @llvm.llround.i64.f16(float) - -define i64 @llround_f16(float %a) nounwind { -; RV32IZFH-LABEL: llround_f16: -; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: call llroundf@plt -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: ret -; -; RV64IZFH-LABEL: llround_f16: -; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: fcvt.l.s a0, fa0, rmm -; RV64IZFH-NEXT: ret -; -; RV32IDZFH-LABEL: llround_f16: -; RV32IDZFH: # %bb.0: -; RV32IDZFH-NEXT: addi sp, sp, -16 -; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IDZFH-NEXT: call llroundf@plt -; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IDZFH-NEXT: addi sp, sp, 16 -; RV32IDZFH-NEXT: ret -; -; RV64IDZFH-LABEL: llround_f16: -; RV64IDZFH: # %bb.0: -; RV64IDZFH-NEXT: fcvt.l.s a0, fa0, rmm -; RV64IDZFH-NEXT: ret -; -; RV32I-LABEL: llround_f16: -; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call llroundf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret -; -; RV64I-LABEL: llround_f16: -; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call llroundf@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret - %1 = call i64 @llvm.llround.i64.f16(float %a) - ret i64 %1 -} diff --git a/llvm/test/CodeGen/RISCV/rv64zfh-half-intrinsics.ll b/llvm/test/CodeGen/RISCV/rv64zfh-half-intrinsics.ll new file mode 100644 index 000000000000..4091c52d6c8a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64zfh-half-intrinsics.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv64 -mattr=+experimental-zfh \ +; RUN: -verify-machineinstrs -target-abi lp64f | \ +; RUN: FileCheck -check-prefix=RV64IZFH %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+d \ +; RUN: -mattr=+experimental-zfh -verify-machineinstrs -target-abi lp64d | \ +; RUN: FileCheck -check-prefix=RV64IDZFH %s + +; These intrinsics require half and i64 to be legal types. + +declare i64 @llvm.llrint.i64.f16(half) + +define i64 @llrint_f16(half %a) nounwind { +; RV64IZFH-LABEL: llrint_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: llrint_f16: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.l.h a0, fa0 +; RV64IDZFH-NEXT: ret + %1 = call i64 @llvm.llrint.i64.f16(half %a) + ret i64 %1 +} + +declare i64 @llvm.llround.i64.f16(half) + +define i64 @llround_f16(half %a) nounwind { +; RV64IZFH-LABEL: llround_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rmm +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: llround_f16: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rmm +; RV64IDZFH-NEXT: ret + %1 = call i64 @llvm.llround.i64.f16(half %a) + ret i64 %1 +} diff --git a/llvm/test/CodeGen/RISCV/zfh-half-intrinsics.ll b/llvm/test/CodeGen/RISCV/zfh-half-intrinsics.ll new file mode 100644 index 000000000000..f6f011ecfe8b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/zfh-half-intrinsics.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+experimental-zfh \ +; RUN: -verify-machineinstrs -target-abi ilp32f | \ +; RUN: FileCheck -check-prefix=RV32IZFH %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+experimental-zfh \ +; RUN: -verify-machineinstrs -target-abi lp64f | \ +; RUN: FileCheck -check-prefix=RV64IZFH %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+d \ +; RUN: -mattr=+experimental-zfh -verify-machineinstrs -target-abi ilp32d | \ +; RUN: FileCheck -check-prefix=RV32IDZFH %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d \ +; RUN: -mattr=+experimental-zfh -verify-machineinstrs -target-abi lp64d | \ +; RUN: FileCheck -check-prefix=RV64IDZFH %s + +; These intrinsics require half to be a legal type. + +declare iXLen @llvm.lrint.iXLen.f16(half) + +define iXLen @lrint_f16(half %a) nounwind { +; RV32IZFH-LABEL: lrint_f16: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: lrint_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: lrint_f16: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.w.h a0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: lrint_f16: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.l.h a0, fa0 +; RV64IDZFH-NEXT: ret + %1 = call iXLen @llvm.lrint.iXLen.f16(half %a) + ret iXLen %1 +} + +declare iXLen @llvm.lround.iXLen.f16(half) + +define iXLen @lround_f16(half %a) nounwind { +; RV32IZFH-LABEL: lround_f16: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rmm +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: lround_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rmm +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: lround_f16: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.w.h a0, fa0, rmm +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: lround_f16: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rmm +; RV64IDZFH-NEXT: ret + %1 = call iXLen @llvm.lround.iXLen.f16(half %a) + ret iXLen %1 +} From b8a47800321de538a39b92a2d539e1082e034e91 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 27 Dec 2021 15:16:14 -0800 Subject: [PATCH 097/992] [ELF] Simplify and optimize SymbolTableSection::writeTo --- lld/ELF/SyntheticSections.cpp | 55 +++++++++++++++++------------------ 1 file changed, 26 insertions(+), 29 deletions(-) diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 86b2f33196ec..b3da9c7881be 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -2182,8 +2182,6 @@ static BssSection *getCommonSec(Symbol *sym) { } static uint32_t getSymSectionIndex(Symbol *sym) { - if (getCommonSec(sym)) - return SHN_COMMON; assert(!(sym->needsCopy && sym->isObject())); if (!isa(sym) || sym->needsCopy) return SHN_UNDEF; @@ -2205,10 +2203,10 @@ template void SymbolTableSection::writeTo(uint8_t *buf) { Symbol *sym = ent.sym; bool isDefinedHere = type == SHT_SYMTAB || sym->partition == partition; - // Set st_info and st_other. - eSym->st_other = 0; + // Set st_name, st_info and st_other. + eSym->st_name = ent.strTabOffset; eSym->setBindingAndType(sym->binding, sym->type); - eSym->setVisibility(sym->visibility); + eSym->st_other = sym->visibility; // The 3 most significant bits of st_other are used by OpenPOWER ABI. // See getPPC64GlobalEntryToLocalEntryOffset() for more details. @@ -2219,30 +2217,29 @@ template void SymbolTableSection::writeTo(uint8_t *buf) { else if (config->emachine == EM_AARCH64) eSym->st_other |= sym->stOther & STO_AARCH64_VARIANT_PCS; - eSym->st_name = ent.strTabOffset; - if (isDefinedHere) - eSym->st_shndx = getSymSectionIndex(ent.sym); - else - eSym->st_shndx = 0; - - // Copy symbol size if it is a defined symbol. st_size is not significant - // for undefined symbols, so whether copying it or not is up to us if that's - // the case. We'll leave it as zero because by not setting a value, we can - // get the exact same outputs for two sets of input files that differ only - // in undefined symbol size in DSOs. - if (eSym->st_shndx == SHN_UNDEF || !isDefinedHere) - eSym->st_size = 0; - else - eSym->st_size = sym->getSize(); - - // st_value is usually an address of a symbol, but that has a special - // meaning for uninstantiated common symbols (--no-define-common). - if (BssSection *commonSec = getCommonSec(ent.sym)) + if (BssSection *commonSec = getCommonSec(sym)) { + // st_value is usually an address of a symbol, but that has a special + // meaning for uninstantiated common symbols (--no-define-common). + eSym->st_shndx = SHN_COMMON; eSym->st_value = commonSec->alignment; - else if (isDefinedHere) - eSym->st_value = sym->getVA(); - else - eSym->st_value = 0; + eSym->st_size = cast(sym)->size; + } else { + const uint32_t shndx = getSymSectionIndex(sym); + if (isDefinedHere) { + eSym->st_shndx = shndx; + eSym->st_value = sym->getVA(); + // Copy symbol size if it is a defined symbol. st_size is not + // significant for undefined symbols, so whether copying it or not is up + // to us if that's the case. We'll leave it as zero because by not + // setting a value, we can get the exact same outputs for two sets of + // input files that differ only in undefined symbol size in DSOs. + eSym->st_size = shndx != SHN_UNDEF ? cast(sym)->size : 0; + } else { + eSym->st_shndx = 0; + eSym->st_value = 0; + eSym->st_size = 0; + } + } ++eSym; } @@ -2293,7 +2290,7 @@ void SymtabShndxSection::writeTo(uint8_t *buf) { // we need to write actual index, otherwise, we must write SHN_UNDEF(0). buf += 4; // Ignore .symtab[0] entry. for (const SymbolTableEntry &entry : in.symTab->getSymbols()) { - if (getSymSectionIndex(entry.sym) == SHN_XINDEX) + if (!getCommonSec(entry.sym) && getSymSectionIndex(entry.sym) == SHN_XINDEX) write32(buf, entry.sym->getOutputSection()->sectionIndex); buf += 4; } From 5161060559965fc5f69c46853358bea8ab504b4c Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Mon, 27 Dec 2021 15:20:12 -0800 Subject: [PATCH 098/992] [llvm] Fix header guards (NFC) Identified with llvm-header-guard. --- llvm/include/llvm/Debuginfod/HTTPClient.h | 6 +++--- .../llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h | 6 +++--- llvm/include/llvm/Support/DivisionByConstantInfo.h | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/Debuginfod/HTTPClient.h b/llvm/include/llvm/Debuginfod/HTTPClient.h index 51de66629544..e8f0e7ef8f78 100644 --- a/llvm/include/llvm/Debuginfod/HTTPClient.h +++ b/llvm/include/llvm/Debuginfod/HTTPClient.h @@ -13,8 +13,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_HTTP_CLIENT_H -#define LLVM_SUPPORT_HTTP_CLIENT_H +#ifndef LLVM_DEBUGINFOD_HTTPCLIENT_H +#define LLVM_DEBUGINFOD_HTTPCLIENT_H #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" @@ -116,4 +116,4 @@ class HTTPClient { } // end namespace llvm -#endif // LLVM_SUPPORT_HTTP_CLIENT_H +#endif // LLVM_DEBUGINFOD_HTTPCLIENT_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h b/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h index af092b3287d3..d2bf8330695f 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTIONENGINE_ORC_DEBUGGERSUPPORT_H -#define LLVM_EXECUTIONENGINE_ORC_DEBUGGERSUPPORT_H +#ifndef LLVM_EXECUTIONENGINE_ORC_DEBUGGERSUPPORTPLUGIN_H +#define LLVM_EXECUTIONENGINE_ORC_DEBUGGERSUPPORTPLUGIN_H #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h" @@ -61,4 +61,4 @@ class GDBJITDebugInfoRegistrationPlugin : public ObjectLinkingLayer::Plugin { } // namespace orc } // namespace llvm -#endif // LLVM_EXECUTIONENGINE_ORC_DEBUGGERSUPPORT_H +#endif // LLVM_EXECUTIONENGINE_ORC_DEBUGGERSUPPORTPLUGIN_H diff --git a/llvm/include/llvm/Support/DivisionByConstantInfo.h b/llvm/include/llvm/Support/DivisionByConstantInfo.h index 5bb326178c3e..896bc679885e 100644 --- a/llvm/include/llvm/Support/DivisionByConstantInfo.h +++ b/llvm/include/llvm/Support/DivisionByConstantInfo.h @@ -10,8 +10,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_DIVISON_BY_CONSTANT_INFO_H -#define LLVM_SUPPORT_DIVISON_BY_CONSTANT_INFO_H +#ifndef LLVM_SUPPORT_DIVISIONBYCONSTANTINFO_H +#define LLVM_SUPPORT_DIVISIONBYCONSTANTINFO_H #include "llvm/ADT/APInt.h" From 0c9a4da497c44d2c88b22ee52868fe37359c7a76 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Mon, 27 Dec 2021 15:32:51 -0800 Subject: [PATCH 099/992] [ASTMatchers] Simplify string comparisons (NFC) Identified with readability-string-compare. --- clang/include/clang/ASTMatchers/ASTMatchers.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h index 55cce324b436..599ab407c442 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchers.h +++ b/clang/include/clang/ASTMatchers/ASTMatchers.h @@ -3725,10 +3725,9 @@ AST_MATCHER_P(ObjCMessageExpr, hasReceiver, internal::Matcher, /// \endcode AST_MATCHER_P(ObjCMessageExpr, hasSelector, std::string, BaseName) { Selector Sel = Node.getSelector(); - return BaseName.compare(Sel.getAsString()) == 0; + return BaseName == Sel.getAsString(); } - /// Matches when at least one of the supplied string equals to the /// Selector.getAsString() /// From fb28d6fb5ffc3c58b30467ef3f59cd2891be7a65 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Mon, 27 Dec 2021 15:42:52 -0800 Subject: [PATCH 100/992] [DebugInfo] Drop unnecessary const from return types (NFC) Identified with readability-const-return-type. --- llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp index 555d29fe184b..e29968d113bd 100644 --- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp @@ -33,8 +33,8 @@ namespace symbolize { class SourceCode { std::unique_ptr MemBuf; - const Optional load(StringRef FileName, - const Optional &EmbeddedSource) { + Optional load(StringRef FileName, + const Optional &EmbeddedSource) { if (Lines <= 0) return None; @@ -50,7 +50,7 @@ class SourceCode { } } - const Optional pruneSource(const Optional &Source) { + Optional pruneSource(const Optional &Source) { if (!Source) return None; size_t FirstLinePos = StringRef::npos, Pos = 0; From 3c94d5d9d233a8e20efd2b4d4f883d7482303b60 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 27 Dec 2021 15:57:38 -0800 Subject: [PATCH 101/992] [ELF] addOrphanSections: avoid std::function --- lld/ELF/LinkerScript.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 7c67ee53adba..db20dd36241c 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -829,8 +829,7 @@ void LinkerScript::addOrphanSections() { StringMap> map; SmallVector v; - std::function add; - add = [&](InputSectionBase *s) { + auto add = [&](InputSectionBase *s) { if (s->isLive() && !s->parent) { orphanSections.push_back(s); @@ -846,11 +845,6 @@ void LinkerScript::addOrphanSections() { s->getOutputSection()->sectionIndex == UINT32_MAX); } } - - if (config->relocatable) - for (InputSectionBase *depSec : s->dependentSections) - if (depSec->flags & SHF_LINK_ORDER) - add(depSec); }; // For further --emit-reloc handling code we need target output section @@ -869,6 +863,10 @@ void LinkerScript::addOrphanSections() { if (auto *relIS = dyn_cast_or_null(rel->parent)) add(relIS); add(isec); + if (config->relocatable) + for (InputSectionBase *depSec : isec->dependentSections) + if (depSec->flags & SHF_LINK_ORDER) + add(depSec); } // If no SECTIONS command was given, we should insert sections commands From 8445883327b1383dc3c404922815b116a8d7dcb2 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Mon, 27 Dec 2021 15:58:03 -0800 Subject: [PATCH 102/992] [llvm] Drop unnecessary const from return types (NFC) Identified with readability-const-return-type. --- llvm/include/llvm/Target/TargetLoweringObjectFile.h | 2 +- llvm/lib/Target/ARM/ARMTargetObjectFile.cpp | 4 +--- llvm/lib/Target/ARM/ARMTargetObjectFile.h | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h index 752032d3d04d..392ee4334cb5 100644 --- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h +++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h @@ -221,7 +221,7 @@ class TargetLoweringObjectFile : public MCObjectFileInfo { } /// Returns the register used as static base in RWPI variants. - virtual const MCRegister getStaticBase() const { return MCRegister::NoRegister; } + virtual MCRegister getStaticBase() const { return MCRegister::NoRegister; } /// Get the target specific RWPI relocation. virtual const MCExpr *getIndirectSymViaRWPI(const MCSymbol *Sym) const { diff --git a/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp index 8c5438f7093b..936cae17f004 100644 --- a/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -54,9 +54,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, } } -const MCRegister ARMElfTargetObjectFile::getStaticBase() const { - return ARM::R9; -} +MCRegister ARMElfTargetObjectFile::getStaticBase() const { return ARM::R9; } const MCExpr *ARMElfTargetObjectFile:: getIndirectSymViaRWPI(const MCSymbol *Sym) const { diff --git a/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/llvm/lib/Target/ARM/ARMTargetObjectFile.h index 8b13198fe144..f86774beb397 100644 --- a/llvm/lib/Target/ARM/ARMTargetObjectFile.h +++ b/llvm/lib/Target/ARM/ARMTargetObjectFile.h @@ -24,7 +24,7 @@ class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF { void Initialize(MCContext &Ctx, const TargetMachine &TM) override; - const MCRegister getStaticBase() const override; + MCRegister getStaticBase() const override; const MCExpr *getIndirectSymViaRWPI(const MCSymbol *Sym) const override; From 049cd480a0ce2f23c85ea12d24cc6ab6b535e764 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 27 Dec 2021 17:05:48 -0800 Subject: [PATCH 103/992] [ELF] Use const reference. NFC --- lld/ELF/SyntheticSections.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index b3da9c7881be..797f38188e99 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1265,11 +1265,11 @@ DynamicSection::DynamicSection() // .rela.dyn // // DT_RELASZ is the total size of the included sections. -static uint64_t addRelaSz(RelocationBaseSection *relaDyn) { - size_t size = relaDyn->getSize(); - if (in.relaIplt->getParent() == relaDyn->getParent()) +static uint64_t addRelaSz(const RelocationBaseSection &relaDyn) { + size_t size = relaDyn.getSize(); + if (in.relaIplt->getParent() == relaDyn.getParent()) size += in.relaIplt->getSize(); - if (in.relaPlt->getParent() == relaDyn->getParent()) + if (in.relaPlt->getParent() == relaDyn.getParent()) size += in.relaPlt->getSize(); return size; } @@ -1375,7 +1375,8 @@ DynamicSection::computeContents() { (in.relaIplt->isNeeded() && part.relaDyn->getParent() == in.relaIplt->getParent())) { addInSec(part.relaDyn->dynamicTag, *part.relaDyn); - entries.emplace_back(part.relaDyn->sizeDynamicTag, addRelaSz(part.relaDyn)); + entries.emplace_back(part.relaDyn->sizeDynamicTag, + addRelaSz(*part.relaDyn)); bool isRela = config->isRela; addInt(isRela ? DT_RELAENT : DT_RELENT, From a1c7ddf9264e21a1f8ef6e118d7041a0166076d1 Mon Sep 17 00:00:00 2001 From: Hsiangkai Wang Date: Mon, 27 Dec 2021 22:35:27 +0800 Subject: [PATCH 104/992] [RISCV] Support passing scalable vectur values through the stack. After consuming all vector registers, the scalable vector values will be passed indirectly. The pointer values will be saved in general registers. If all general registers are used up, we will report an error to notify users the compiler does not support passing scalable vector values through the stack. In this patch, we remove the restriction. After all general registers are used up, we use the stack to save the pointers which point to the indirect passed scalable vector values. Differential Revision: https://reviews.llvm.org/D116310 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 9 ++- .../test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll | 56 +++++++++++++++++++ .../RISCV/rvv/unsupported-calling-conv.ll | 12 ---- 3 files changed, 64 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll delete mode 100644 llvm/test/CodeGen/RISCV/rvv/unsupported-calling-conv.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 39dd139953b0..9d4f3b28f5b3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -8393,7 +8393,8 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, LocVT = XLenVT; LocInfo = CCValAssign::Indirect; } else if (ValVT.isScalableVector()) { - report_fatal_error("Unable to pass scalable vector types on the stack"); + LocVT = XLenVT; + LocInfo = CCValAssign::Indirect; } else { // Pass fixed-length vectors on the stack. LocVT = ValVT; @@ -8592,6 +8593,12 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, EVT LocVT = VA.getLocVT(); EVT ValVT = VA.getValVT(); EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); + if (ValVT.isScalableVector()) { + // When the value is a scalable vector, we save the pointer which points to + // the scalable vector value in the stack. The ValVT will be the pointer + // type, instead of the scalable vector type. + ValVT = LocVT; + } int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(), /*Immutable=*/true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll new file mode 100644 index 000000000000..af0e5a07862f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v < %s 2>&1 | FileCheck %s + +define @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, %w, %x, %y, %z) { +; CHECK-LABEL: bar: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a0, 0(sp) +; CHECK-NEXT: ld a1, 8(sp) +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vl8re32.v v0, (a1) +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vadd.vv v16, v16, v0 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: ret + %s0 = add %w, %y + %s1 = add %x, %z + %s = add %s0, %s1 + ret %s +} + +define @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, %x) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: csrr t0, vlenb +; CHECK-NEXT: slli t0, t0, 4 +; CHECK-NEXT: sub sp, sp, t0 +; CHECK-NEXT: addi t0, sp, 40 +; CHECK-NEXT: sd t0, 8(sp) +; CHECK-NEXT: csrr t0, vlenb +; CHECK-NEXT: slli t0, t0, 3 +; CHECK-NEXT: add t0, sp, t0 +; CHECK-NEXT: addi t0, t0, 40 +; CHECK-NEXT: sd t0, 0(sp) +; CHECK-NEXT: addi t0, sp, 40 +; CHECK-NEXT: vs8r.v v8, (t0) +; CHECK-NEXT: csrr t0, vlenb +; CHECK-NEXT: slli t0, t0, 3 +; CHECK-NEXT: add t0, sp, t0 +; CHECK-NEXT: addi t0, t0, 40 +; CHECK-NEXT: vs8r.v v8, (t0) +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: call bar@plt +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 +; CHECK-NEXT: ret + %ret = call @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, %x, %x, %x, %x) + ret %ret +} diff --git a/llvm/test/CodeGen/RISCV/rvv/unsupported-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/unsupported-calling-conv.ll deleted file mode 100644 index b2272c9eae34..000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/unsupported-calling-conv.ll +++ /dev/null @@ -1,12 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: not --crash llc -mtriple=riscv64 -mattr=+experimental-v < %s 2>&1 | FileCheck %s - -; A rather pathological test case in which we exhaust all vector registers and -; all scalar registers, forcing %z to go through the stack. This is not yet -; supported, so check that a reasonable error message is produced rather than -; hitting an assertion or producing incorrect code. -; CHECK: LLVM ERROR: Unable to pass scalable vector types on the stack -define @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, %x, %y, %z) { - %s = add %x, %z - ret %s -} From cb203f3f92e13000712c2ee8d8b0576102804fb1 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 27 Dec 2021 18:15:23 -0800 Subject: [PATCH 105/992] [ELF] Change InStruct/Partition pointers to unique_ptr and remove associated make calls. gnuHash and sysvHash are unchanged, otherwise LinkerScript::discard would destroy the objects which may be referenced by input section descriptions. My x86-64 lld executable is 121+KiB smaller. --- lld/ELF/Driver.cpp | 8 +- lld/ELF/InputFiles.cpp | 8 +- lld/ELF/LinkerScript.cpp | 2 +- lld/ELF/Relocations.cpp | 8 +- lld/ELF/SyntheticSections.cpp | 49 ++++++++--- lld/ELF/SyntheticSections.h | 72 ++++++++-------- lld/ELF/Writer.cpp | 158 ++++++++++++++++++---------------- 7 files changed, 170 insertions(+), 135 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 66bf7d0f3848..dab6a537a2ee 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -97,9 +97,10 @@ bool elf::link(ArrayRef args, bool canExitEarly, whyExtract.clear(); tar = nullptr; - memset(&in, 0, sizeof(in)); + in.reset(); - partitions = {Partition()}; + partitions.clear(); + partitions.emplace_back(); SharedFile::vernauxNum = 0; }; @@ -116,7 +117,8 @@ bool elf::link(ArrayRef args, bool canExitEarly, script = std::make_unique(); symtab = std::make_unique(); - partitions = {Partition()}; + partitions.clear(); + partitions.emplace_back(); config->progName = args[0]; diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index e321b0d82920..f1e29547de12 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -879,8 +879,8 @@ InputSectionBase *ObjFile::createInputSection(uint32_t idx, // to work. In a full implementation we would merge all attribute // sections. if (in.attributes == nullptr) { - in.attributes = make(*this, sec, name); - return in.attributes; + in.attributes = std::make_unique(*this, sec, name); + return in.attributes.get(); } return &InputSection::discarded; } @@ -901,8 +901,8 @@ InputSectionBase *ObjFile::createInputSection(uint32_t idx, // standard extensions to enable. In a full implementation we would merge // all attribute sections. if (in.attributes == nullptr) { - in.attributes = make(*this, sec, name); - return in.attributes; + in.attributes = std::make_unique(*this, sec, name); + return in.attributes.get(); } return &InputSection::discarded; } diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index db20dd36241c..22c3a5c0c91e 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -561,7 +561,7 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd, } void LinkerScript::discard(InputSectionBase &s) { - if (&s == in.shStrTab || &s == mainPart->relrDyn) + if (&s == in.shStrTab.get() || &s == mainPart->relrDyn.get()) error("discarding " + s.name + " section is not allowed"); // You can discard .hash and .gnu.hash sections by linker scripts. Since diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index d7eef68800c5..bd5043274f9f 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -870,7 +870,7 @@ static void addGotEntry(Symbol &sym) { // If preemptible, emit a GLOB_DAT relocation. if (sym.isPreemptible) { - mainPart->relaDyn->addReloc({target->gotRel, in.got, off, + mainPart->relaDyn->addReloc({target->gotRel, in.got.get(), off, DynamicReloc::AgainstSymbol, sym, 0, R_ABS}); return; } @@ -1551,7 +1551,7 @@ static bool handleNonPreemptibleIfunc(Symbol &sym) { if (sym.hasDirectReloc) { // Change the value to the IPLT and redirect all references to it. auto &d = cast(sym); - d.section = in.iplt; + d.section = in.iplt.get(); d.value = sym.pltIndex * target->ipltEntrySize; d.size = 0; // It's important to set the symbol type here so that dynamic loaders @@ -1641,8 +1641,8 @@ void elf::postScanRelocations() { in.got->relocations.push_back( {R_ADDEND, target->symbolicRel, in.got->getTlsIndexOff(), 1, &sym}); else - mainPart->relaDyn->addReloc( - {target->tlsModuleIndexRel, in.got, in.got->getTlsIndexOff()}); + mainPart->relaDyn->addReloc({target->tlsModuleIndexRel, in.got.get(), + in.got->getTlsIndexOff()}); } if (sym.needsGotDtprel) { in.got->addEntry(sym); diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 797f38188e99..f93c09b95d75 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1627,7 +1627,7 @@ void RelocationBaseSection::addReloc(const DynamicReloc &reloc) { } void RelocationBaseSection::finalizeContents() { - SymbolTableBaseSection *symTab = getPartition().dynSymTab; + SymbolTableBaseSection *symTab = getPartition().dynSymTab.get(); // When linking glibc statically, .rel{,a}.plt contains R_*_IRELATIVE // relocations due to IFUNC (e.g. strcpy). sh_link will be set to 0 in that @@ -1637,11 +1637,11 @@ void RelocationBaseSection::finalizeContents() { else getParent()->link = 0; - if (in.relaPlt == this && in.gotPlt->getParent()) { + if (in.relaPlt.get() == this && in.gotPlt->getParent()) { getParent()->flags |= ELF::SHF_INFO_LINK; getParent()->info = in.gotPlt->getParent()->sectionIndex; } - if (in.relaIplt == this && in.igotPlt->getParent()) { + if (in.relaIplt.get() == this && in.igotPlt->getParent()) { getParent()->flags |= ELF::SHF_INFO_LINK; getParent()->info = in.igotPlt->getParent()->sectionIndex; } @@ -1678,7 +1678,7 @@ RelocationSection::RelocationSection(StringRef name, bool sort) } template void RelocationSection::writeTo(uint8_t *buf) { - SymbolTableBaseSection *symTab = getPartition().dynSymTab; + SymbolTableBaseSection *symTab = getPartition().dynSymTab.get(); parallelForEach(relocs, [symTab](DynamicReloc &rel) { rel.computeRaw(symTab); }); @@ -1773,8 +1773,8 @@ bool AndroidPackedRelocationSection::updateAllocSize() { for (const DynamicReloc &rel : relocs) { Elf_Rela r; r.r_offset = rel.getOffset(); - r.setSymbolAndType(rel.getSymIndex(getPartition().dynSymTab), rel.type, - false); + r.setSymbolAndType(rel.getSymIndex(getPartition().dynSymTab.get()), + rel.type, false); if (config->isRela) r.r_addend = rel.computeAddend(); @@ -2100,7 +2100,7 @@ void SymbolTableBaseSection::finalizeContents() { // Only the main partition's dynsym indexes are stored in the symbols // themselves. All other partitions use a lookup table. - if (this == mainPart->dynSymTab) { + if (this == mainPart->dynSymTab.get()) { size_t i = 0; for (const SymbolTableEntry &s : symbols) s.sym->dynsymIndex = ++i; @@ -2146,7 +2146,7 @@ void SymbolTableBaseSection::addSymbol(Symbol *b) { } size_t SymbolTableBaseSection::getSymbolIndex(Symbol *sym) { - if (this == mainPart->dynSymTab) + if (this == mainPart->dynSymTab.get()) return sym->dynsymIndex; // Initializes symbol lookup tables lazily. This is used only for -r, @@ -2474,7 +2474,7 @@ HashTableSection::HashTableSection() } void HashTableSection::finalizeContents() { - SymbolTableBaseSection *symTab = getPartition().dynSymTab; + SymbolTableBaseSection *symTab = getPartition().dynSymTab.get(); if (OutputSection *sec = symTab->getParent()) getParent()->link = sec->sectionIndex; @@ -2488,7 +2488,7 @@ void HashTableSection::finalizeContents() { } void HashTableSection::writeTo(uint8_t *buf) { - SymbolTableBaseSection *symTab = getPartition().dynSymTab; + SymbolTableBaseSection *symTab = getPartition().dynSymTab.get(); // See comment in GnuHashTableSection::writeTo. memset(buf, 0, size); @@ -3792,8 +3792,9 @@ void PartitionIndexSection::writeTo(uint8_t *buf) { write32(buf, mainPart->dynStrTab->getVA() + partitions[i].nameStrTab - va); write32(buf + 4, partitions[i].elfHeader->getVA() - (va + 4)); - SyntheticSection *next = - i == partitions.size() - 1 ? in.partEnd : partitions[i + 1].elfHeader; + SyntheticSection *next = i == partitions.size() - 1 + ? in.partEnd.get() + : partitions[i + 1].elfHeader.get(); write32(buf + 8, next->getVA() - partitions[i].elfHeader->getVA()); va += 12; @@ -3801,6 +3802,30 @@ void PartitionIndexSection::writeTo(uint8_t *buf) { } } +void InStruct::reset() { + attributes.reset(); + bss.reset(); + bssRelRo.reset(); + got.reset(); + gotPlt.reset(); + igotPlt.reset(); + ppc64LongBranchTarget.reset(); + mipsGot.reset(); + mipsRldMap.reset(); + partEnd.reset(); + partIndex.reset(); + plt.reset(); + iplt.reset(); + ppc32Got2.reset(); + ibtPlt.reset(); + relaPlt.reset(); + relaIplt.reset(); + shStrTab.reset(); + strTab.reset(); + symTab.reset(); + symTabShndx.reset(); +} + InStruct elf::in; std::vector elf::partitions; diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 547fa5f4056d..7f4562a92017 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -1202,24 +1202,24 @@ struct Partition { StringRef name; uint64_t nameStrTab; - SyntheticSection *elfHeader; - SyntheticSection *programHeaders; + std::unique_ptr elfHeader; + std::unique_ptr programHeaders; SmallVector phdrs; - ARMExidxSyntheticSection *armExidx; - BuildIdSection *buildId; - SyntheticSection *dynamic; - StringTableSection *dynStrTab; - SymbolTableBaseSection *dynSymTab; - EhFrameHeader *ehFrameHdr; - EhFrameSection *ehFrame; + std::unique_ptr armExidx; + std::unique_ptr buildId; + std::unique_ptr dynamic; + std::unique_ptr dynStrTab; + std::unique_ptr dynSymTab; + std::unique_ptr ehFrameHdr; + std::unique_ptr ehFrame; GnuHashTableSection *gnuHashTab; HashTableSection *hashTab; - RelocationBaseSection *relaDyn; - RelrBaseSection *relrDyn; - VersionDefinitionSection *verDef; - SyntheticSection *verNeed; - VersionTableSection *verSym; + std::unique_ptr relaDyn; + std::unique_ptr relrDyn; + std::unique_ptr verDef; + std::unique_ptr verNeed; + std::unique_ptr verSym; unsigned getNumber() const { return this - &partitions[0] + 1; } }; @@ -1234,27 +1234,29 @@ inline Partition &SectionBase::getPartition() const { // Linker generated sections which can be used as inputs and are not specific to // a partition. struct InStruct { - InputSection *attributes; - BssSection *bss; - BssSection *bssRelRo; - GotSection *got; - GotPltSection *gotPlt; - IgotPltSection *igotPlt; - PPC64LongBranchTargetSection *ppc64LongBranchTarget; - MipsGotSection *mipsGot; - MipsRldMapSection *mipsRldMap; - SyntheticSection *partEnd; - SyntheticSection *partIndex; - PltSection *plt; - IpltSection *iplt; - PPC32Got2Section *ppc32Got2; - IBTPltSection *ibtPlt; - RelocationBaseSection *relaPlt; - RelocationBaseSection *relaIplt; - StringTableSection *shStrTab; - StringTableSection *strTab; - SymbolTableBaseSection *symTab; - SymtabShndxSection *symTabShndx; + std::unique_ptr attributes; + std::unique_ptr bss; + std::unique_ptr bssRelRo; + std::unique_ptr got; + std::unique_ptr gotPlt; + std::unique_ptr igotPlt; + std::unique_ptr ppc64LongBranchTarget; + std::unique_ptr mipsGot; + std::unique_ptr mipsRldMap; + std::unique_ptr partEnd; + std::unique_ptr partIndex; + std::unique_ptr plt; + std::unique_ptr iplt; + std::unique_ptr ppc32Got2; + std::unique_ptr ibtPlt; + std::unique_ptr relaPlt; + std::unique_ptr relaIplt; + std::unique_ptr shStrTab; + std::unique_ptr strTab; + std::unique_ptr symTab; + std::unique_ptr symTabShndx; + + void reset(); }; extern InStruct in; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 4b33563e8a79..f62f6bf68ed7 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -299,18 +299,18 @@ template void elf::createSyntheticSections() { auto add = [](SyntheticSection &sec) { inputSections.push_back(&sec); }; - in.shStrTab = make(".shstrtab", false); + in.shStrTab = std::make_unique(".shstrtab", false); Out::programHeaders = make("", 0, SHF_ALLOC); Out::programHeaders->alignment = config->wordsize; if (config->strip != StripPolicy::All) { - in.strTab = make(".strtab", false); - in.symTab = make>(*in.strTab); - in.symTabShndx = make(); + in.strTab = std::make_unique(".strtab", false); + in.symTab = std::make_unique>(*in.strTab); + in.symTabShndx = std::make_unique(); } - in.bss = make(".bss", 0, 1); + in.bss = std::make_unique(".bss", 0, 1); add(*in.bss); // If there is a SECTIONS command and a .data.rel.ro section name use name @@ -318,14 +318,14 @@ template void elf::createSyntheticSections() { // This makes sure our relro is contiguous. bool hasDataRelRo = script->hasSectionsCommand && findSection(".data.rel.ro", 0); - in.bssRelRo = - make(hasDataRelRo ? ".data.rel.ro.bss" : ".bss.rel.ro", 0, 1); + in.bssRelRo = std::make_unique( + hasDataRelRo ? ".data.rel.ro.bss" : ".bss.rel.ro", 0, 1); add(*in.bssRelRo); // Add MIPS-specific sections. if (config->emachine == EM_MIPS) { if (!config->shared && config->hasDynSymTab) { - in.mipsRldMap = make(); + in.mipsRldMap = std::make_unique(); add(*in.mipsRldMap); } if (auto *sec = MipsAbiFlagsSection::create()) @@ -345,40 +345,43 @@ template void elf::createSyntheticSections() { }; if (!part.name.empty()) { - part.elfHeader = make>(); + part.elfHeader = std::make_unique>(); part.elfHeader->name = part.name; add(*part.elfHeader); - part.programHeaders = make>(); + part.programHeaders = + std::make_unique>(); add(*part.programHeaders); } if (config->buildId != BuildIdKind::None) { - part.buildId = make(); + part.buildId = std::make_unique(); add(*part.buildId); } - part.dynStrTab = make(".dynstr", true); - part.dynSymTab = make>(*part.dynStrTab); - part.dynamic = make>(); + part.dynStrTab = std::make_unique(".dynstr", true); + part.dynSymTab = + std::make_unique>(*part.dynStrTab); + part.dynamic = std::make_unique>(); if (config->androidPackDynRelocs) - part.relaDyn = make>(relaDynName); - else part.relaDyn = - make>(relaDynName, config->zCombreloc); + std::make_unique>(relaDynName); + else + part.relaDyn = std::make_unique>( + relaDynName, config->zCombreloc); if (config->hasDynSymTab) { add(*part.dynSymTab); - part.verSym = make(); + part.verSym = std::make_unique(); add(*part.verSym); if (!namedVersionDefs().empty()) { - part.verDef = make(); + part.verDef = std::make_unique(); add(*part.verDef); } - part.verNeed = make>(); + part.verNeed = std::make_unique>(); add(*part.verNeed); if (config->gnuHash) { @@ -397,23 +400,23 @@ template void elf::createSyntheticSections() { } if (config->relrPackDynRelocs) { - part.relrDyn = make>(); + part.relrDyn = std::make_unique>(); add(*part.relrDyn); } if (!config->relocatable) { if (config->ehFrameHdr) { - part.ehFrameHdr = make(); + part.ehFrameHdr = std::make_unique(); add(*part.ehFrameHdr); } - part.ehFrame = make(); + part.ehFrame = std::make_unique(); add(*part.ehFrame); } if (config->emachine == EM_ARM && !config->relocatable) { // The ARMExidxsyntheticsection replaces all the individual .ARM.exidx // InputSections. - part.armExidx = make(); + part.armExidx = std::make_unique(); add(*part.armExidx); } } @@ -422,13 +425,14 @@ template void elf::createSyntheticSections() { // Create the partition end marker. This needs to be in partition number 255 // so that it is sorted after all other partitions. It also has other // special handling (see createPhdrs() and combineEhSections()). - in.partEnd = make(".part.end", config->maxPageSize, 1); + in.partEnd = + std::make_unique(".part.end", config->maxPageSize, 1); in.partEnd->partition = 255; add(*in.partEnd); - in.partIndex = make(); - addOptionalRegular("__part_index_begin", in.partIndex, 0); - addOptionalRegular("__part_index_end", in.partIndex, + in.partIndex = std::make_unique(); + addOptionalRegular("__part_index_begin", in.partIndex.get(), 0); + addOptionalRegular("__part_index_end", in.partIndex.get(), in.partIndex->getSize()); add(*in.partIndex); } @@ -436,26 +440,26 @@ template void elf::createSyntheticSections() { // Add .got. MIPS' .got is so different from the other archs, // it has its own class. if (config->emachine == EM_MIPS) { - in.mipsGot = make(); + in.mipsGot = std::make_unique(); add(*in.mipsGot); } else { - in.got = make(); + in.got = std::make_unique(); add(*in.got); } if (config->emachine == EM_PPC) { - in.ppc32Got2 = make(); + in.ppc32Got2 = std::make_unique(); add(*in.ppc32Got2); } if (config->emachine == EM_PPC64) { - in.ppc64LongBranchTarget = make(); + in.ppc64LongBranchTarget = std::make_unique(); add(*in.ppc64LongBranchTarget); } - in.gotPlt = make(); + in.gotPlt = std::make_unique(); add(*in.gotPlt); - in.igotPlt = make(); + in.igotPlt = std::make_unique(); add(*in.igotPlt); // _GLOBAL_OFFSET_TABLE_ is defined relative to either .got.plt or .got. Treat @@ -472,7 +476,7 @@ template void elf::createSyntheticSections() { // We always need to add rel[a].plt to output if it has entries. // Even for static linking it can contain R_[*]_IRELATIVE relocations. - in.relaPlt = make>( + in.relaPlt = std::make_unique>( config->isRela ? ".rela.plt" : ".rel.plt", /*sort=*/false); add(*in.relaPlt); @@ -482,21 +486,23 @@ template void elf::createSyntheticSections() { // that would cause a section type mismatch. However, because the Android // dynamic loader reads .rel.plt after .rel.dyn, we can get the desired // behaviour by placing the iplt section in .rel.plt. - in.relaIplt = make>( + in.relaIplt = std::make_unique>( config->androidPackDynRelocs ? in.relaPlt->name : relaDynName, /*sort=*/false); add(*in.relaIplt); if ((config->emachine == EM_386 || config->emachine == EM_X86_64) && (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)) { - in.ibtPlt = make(); + in.ibtPlt = std::make_unique(); add(*in.ibtPlt); } - in.plt = config->emachine == EM_PPC ? make() - : make(); + if (config->emachine == EM_PPC) + in.plt = std::make_unique(); + else + in.plt = std::make_unique(); add(*in.plt); - in.iplt = make(); + in.iplt = std::make_unique(); add(*in.iplt); if (config->andFeatures) @@ -1056,17 +1062,17 @@ template void Writer::setReservedSymbolSections() { if (ElfSym::globalOffsetTable) { // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention usually // to the start of the .got or .got.plt section. - InputSection *gotSection = in.gotPlt; + InputSection *sec = in.gotPlt.get(); if (!target->gotBaseSymInGotPlt) - gotSection = in.mipsGot ? cast(in.mipsGot) - : cast(in.got); - ElfSym::globalOffsetTable->section = gotSection; + sec = in.mipsGot.get() ? cast(in.mipsGot.get()) + : cast(in.got.get()); + ElfSym::globalOffsetTable->section = sec; } // .rela_iplt_{start,end} mark the start and the end of in.relaIplt. if (ElfSym::relaIpltStart && in.relaIplt->isNeeded()) { - ElfSym::relaIpltStart->section = in.relaIplt; - ElfSym::relaIpltEnd->section = in.relaIplt; + ElfSym::relaIpltStart->section = in.relaIplt.get(); + ElfSym::relaIpltEnd->section = in.relaIplt.get(); ElfSym::relaIpltEnd->value = in.relaIplt->getSize(); } @@ -1643,7 +1649,7 @@ template void Writer::finalizeAddressDependentContent() { // can assign Virtual Addresses to OutputSections that are not monotonically // increasing. for (Partition &part : partitions) - finalizeSynthetic(part.armExidx); + finalizeSynthetic(part.armExidx.get()); resolveShfLinkOrder(); // Converts call x@GDPLT to call __tls_get_addr @@ -1864,9 +1870,9 @@ template void Writer::finalizeSections() { // Even the author of gold doesn't remember why gold behaves that way. // https://sourceware.org/ml/binutils/2002-03/msg00360.html if (mainPart->dynamic->parent) - symtab->addSymbol(Defined{/*file=*/nullptr, "_DYNAMIC", STB_WEAK, - STV_HIDDEN, STT_NOTYPE, - /*value=*/0, /*size=*/0, mainPart->dynamic}); + symtab->addSymbol( + Defined{/*file=*/nullptr, "_DYNAMIC", STB_WEAK, STV_HIDDEN, STT_NOTYPE, + /*value=*/0, /*size=*/0, mainPart->dynamic.get()}); // Define __rel[a]_iplt_{start,end} symbols if needed. addRelIpltSymbols(); @@ -1909,7 +1915,7 @@ template void Writer::finalizeSections() { // pieces. The relocation scan uses those pieces, so this has to be // earlier. for (Partition &part : partitions) - finalizeSynthetic(part.ehFrame); + finalizeSynthetic(part.ehFrame.get()); } for (Symbol *sym : symtab->symbols()) @@ -2075,35 +2081,35 @@ template void Writer::finalizeSections() { { llvm::TimeTraceScope timeScope("Finalize synthetic sections"); - finalizeSynthetic(in.bss); - finalizeSynthetic(in.bssRelRo); - finalizeSynthetic(in.symTabShndx); - finalizeSynthetic(in.shStrTab); - finalizeSynthetic(in.strTab); - finalizeSynthetic(in.got); - finalizeSynthetic(in.mipsGot); - finalizeSynthetic(in.igotPlt); - finalizeSynthetic(in.gotPlt); - finalizeSynthetic(in.relaIplt); - finalizeSynthetic(in.relaPlt); - finalizeSynthetic(in.plt); - finalizeSynthetic(in.iplt); - finalizeSynthetic(in.ppc32Got2); - finalizeSynthetic(in.partIndex); + finalizeSynthetic(in.bss.get()); + finalizeSynthetic(in.bssRelRo.get()); + finalizeSynthetic(in.symTabShndx.get()); + finalizeSynthetic(in.shStrTab.get()); + finalizeSynthetic(in.strTab.get()); + finalizeSynthetic(in.got.get()); + finalizeSynthetic(in.mipsGot.get()); + finalizeSynthetic(in.igotPlt.get()); + finalizeSynthetic(in.gotPlt.get()); + finalizeSynthetic(in.relaIplt.get()); + finalizeSynthetic(in.relaPlt.get()); + finalizeSynthetic(in.plt.get()); + finalizeSynthetic(in.iplt.get()); + finalizeSynthetic(in.ppc32Got2.get()); + finalizeSynthetic(in.partIndex.get()); // Dynamic section must be the last one in this list and dynamic // symbol table section (dynSymTab) must be the first one. for (Partition &part : partitions) { - finalizeSynthetic(part.dynSymTab); + finalizeSynthetic(part.dynSymTab.get()); finalizeSynthetic(part.gnuHashTab); finalizeSynthetic(part.hashTab); - finalizeSynthetic(part.verDef); - finalizeSynthetic(part.relaDyn); - finalizeSynthetic(part.relrDyn); - finalizeSynthetic(part.ehFrameHdr); - finalizeSynthetic(part.verSym); - finalizeSynthetic(part.verNeed); - finalizeSynthetic(part.dynamic); + finalizeSynthetic(part.verDef.get()); + finalizeSynthetic(part.relaDyn.get()); + finalizeSynthetic(part.relrDyn.get()); + finalizeSynthetic(part.ehFrameHdr.get()); + finalizeSynthetic(part.verSym.get()); + finalizeSynthetic(part.verNeed.get()); + finalizeSynthetic(part.dynamic.get()); } } @@ -2139,8 +2145,8 @@ template void Writer::finalizeSections() { llvm::TimeTraceScope timeScope("Finalize synthetic sections"); // finalizeAddressDependentContent may have added local symbols to the // static symbol table. - finalizeSynthetic(in.symTab); - finalizeSynthetic(in.ppc64LongBranchTarget); + finalizeSynthetic(in.symTab.get()); + finalizeSynthetic(in.ppc64LongBranchTarget.get()); } // Relaxation to delete inter-basic block jumps created by basic block From 6c335b1a452f48c563bc503c81af297803885b59 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Mon, 27 Dec 2021 20:48:21 -0800 Subject: [PATCH 106/992] [clang] Remove unused "using" (NFC) Identified by misc-unused-using-decls. --- clang/lib/Sema/SemaExpr.cpp | 1 - clang/lib/Serialization/ASTReader.cpp | 1 - clang/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp | 1 - clang/lib/Tooling/Transformer/Parsing.cpp | 1 - 4 files changed, 4 deletions(-) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index d32b3f217aa0..736e76152fe4 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -55,7 +55,6 @@ using namespace clang; using namespace sema; -using llvm::RoundingMode; /// Determine whether the use of this declaration is valid, without /// emitting diagnostics. diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index f93e0d2ed1c4..732f07c1e0b3 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -142,7 +142,6 @@ using namespace clang; using namespace clang::serialization; using namespace clang::serialization::reader; using llvm::BitstreamCursor; -using llvm::RoundingMode; //===----------------------------------------------------------------------===// // ChainedASTReaderListener implementation diff --git a/clang/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp index 517a5d78271b..aa70db041c76 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp @@ -23,7 +23,6 @@ using namespace clang; using namespace ento; -using llvm::APSInt; namespace { class MmapWriteExecChecker : public Checker { diff --git a/clang/lib/Tooling/Transformer/Parsing.cpp b/clang/lib/Tooling/Transformer/Parsing.cpp index 242db2a16b43..4f41e2e90def 100644 --- a/clang/lib/Tooling/Transformer/Parsing.cpp +++ b/clang/lib/Tooling/Transformer/Parsing.cpp @@ -33,7 +33,6 @@ using namespace transformer; // much as possible with the AST Matchers parsing. namespace { -using llvm::Error; using llvm::Expected; template using RangeSelectorOp = RangeSelector (*)(Ts...); From 49f646a9ede6301fd47a4b2ed19d0630fa90c7b4 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 27 Dec 2021 21:34:38 -0800 Subject: [PATCH 107/992] [ELF] Change EhInputSection::pieces to SmallVector The decreased size does not matter that much as one file contributes at most one EhInputSection. --- lld/ELF/InputSection.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index 016f2523b4a8..87b748fdf99e 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -324,7 +324,7 @@ class EhInputSection : public InputSectionBase { // Splittable sections are handled as a sequence of data // rather than a single large blob of data. - std::vector pieces; + SmallVector pieces; SyntheticSection *getParent() const; }; From b5c5d8912e07218e3a6a0f8d1d9f7b436046c5bc Mon Sep 17 00:00:00 2001 From: Stanislav Gatev Date: Mon, 20 Dec 2021 09:56:25 +0000 Subject: [PATCH 108/992] [clang][dataflow] Add support for noreturn destructor calls This is part of the implementation of the dataflow analysis framework. See "[RFC] A dataflow analysis framework for Clang AST" on cfe-dev. Reviewed By: xazax.hun, gribozavr2 Differential Revision: https://reviews.llvm.org/D116022 --- .../FlowSensitive/ControlFlowContext.h | 57 +++++ .../Analysis/FlowSensitive/DataflowAnalysis.h | 10 +- .../TypeErasedDataflowAnalysis.h | 9 +- .../lib/Analysis/FlowSensitive/CMakeLists.txt | 1 + .../FlowSensitive/ControlFlowContext.cpp | 68 ++++++ .../TypeErasedDataflowAnalysis.cpp | 51 ++++- .../Analysis/FlowSensitive/TestingSupport.cpp | 23 --- .../Analysis/FlowSensitive/TestingSupport.h | 36 ++-- .../TypeErasedDataflowAnalysisTest.cpp | 194 ++++++++++++++++-- 9 files changed, 376 insertions(+), 73 deletions(-) create mode 100644 clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h create mode 100644 clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp diff --git a/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h b/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h new file mode 100644 index 000000000000..e6ceb3a89131 --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h @@ -0,0 +1,57 @@ +//===-- ControlFlowContext.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a ControlFlowContext class that is used by dataflow +// analyses that run over Control-Flow Graphs (CFGs). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_CONTROLFLOWCONTEXT_H +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_CONTROLFLOWCONTEXT_H + +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Stmt.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace clang { +namespace dataflow { + +/// Holds CFG and other derived context that is needed to perform dataflow +/// analysis. +class ControlFlowContext { +public: + /// Builds a ControlFlowContext from an AST node. + static llvm::Expected build(const Decl *D, Stmt *S, + ASTContext *C); + + /// Returns the CFG that is stored in this context. + const CFG &getCFG() const { return *Cfg; } + + /// Returns a mapping from statements to basic blocks that contain them. + const llvm::DenseMap &getStmtToBlock() const { + return StmtToBlock; + } + +private: + ControlFlowContext(std::unique_ptr Cfg, + llvm::DenseMap StmtToBlock) + : Cfg(std::move(Cfg)), StmtToBlock(std::move(StmtToBlock)) {} + + std::unique_ptr Cfg; + llvm::DenseMap StmtToBlock; +}; + +} // namespace dataflow +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_CONTROLFLOWCONTEXT_H diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h index a5d4a5d6ba40..a96ed0437a43 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h @@ -21,6 +21,7 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/Stmt.h" #include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/ControlFlowContext.h" #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" #include "llvm/ADT/Any.h" @@ -101,17 +102,12 @@ template struct DataflowAnalysisState { /// Performs dataflow analysis and returns a mapping from basic block IDs to /// dataflow analysis states that model the respective basic blocks. Indices /// of the returned vector correspond to basic block IDs. -/// -/// Requirements: -/// -/// `Cfg` must have been built with `CFG::BuildOptions::setAllAlwaysAdd()` to -/// ensure that all sub-expressions in a basic block are evaluated. template std::vector>> -runDataflowAnalysis(const CFG &Cfg, AnalysisT &Analysis, +runDataflowAnalysis(const ControlFlowContext &CFCtx, AnalysisT &Analysis, const Environment &InitEnv) { auto TypeErasedBlockStates = - runTypeErasedDataflowAnalysis(Cfg, Analysis, InitEnv); + runTypeErasedDataflowAnalysis(CFCtx, Analysis, InitEnv); std::vector< llvm::Optional>> BlockStates; diff --git a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h index 6193b9860d33..9290609068d7 100644 --- a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h +++ b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h @@ -19,6 +19,7 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/Stmt.h" #include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/ControlFlowContext.h" #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" #include "llvm/ADT/Any.h" @@ -87,6 +88,7 @@ struct TypeErasedDataflowAnalysisState { /// already been transferred. States in `BlockStates` that are set to /// `llvm::None` represent basic blocks that are not evaluated yet. TypeErasedDataflowAnalysisState transferBlock( + const ControlFlowContext &CFCtx, std::vector> &BlockStates, const CFGBlock &Block, const Environment &InitEnv, TypeErasedDataflowAnalysis &Analysis, @@ -97,13 +99,8 @@ TypeErasedDataflowAnalysisState transferBlock( /// Performs dataflow analysis and returns a mapping from basic block IDs to /// dataflow analysis states that model the respective basic blocks. Indices /// of the returned vector correspond to basic block IDs. -/// -/// Requirements: -/// -/// `Cfg` must have been built with `CFG::BuildOptions::setAllAlwaysAdd()` to -/// ensure that all sub-expressions in a basic block are evaluated. std::vector> -runTypeErasedDataflowAnalysis(const CFG &Cfg, +runTypeErasedDataflowAnalysis(const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis, const Environment &InitEnv); diff --git a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt index 34e97df2182e..e5a8f73c961d 100644 --- a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt +++ b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt @@ -1,4 +1,5 @@ add_clang_library(clangAnalysisFlowSensitive + ControlFlowContext.cpp TypeErasedDataflowAnalysis.cpp LINK_LIBS diff --git a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp new file mode 100644 index 000000000000..a1817687bd68 --- /dev/null +++ b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp @@ -0,0 +1,68 @@ +//===- ControlFlowContext.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a ControlFlowContext class that is used by dataflow +// analyses that run over Control-Flow Graphs (CFGs). +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/ControlFlowContext.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Stmt.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Error.h" +#include + +namespace clang { +namespace dataflow { + +/// Returns a map from statements to basic blocks that contain them. +static llvm::DenseMap +buildStmtToBasicBlockMap(const CFG &Cfg) { + llvm::DenseMap StmtToBlock; + for (const CFGBlock *Block : Cfg) { + if (Block == nullptr) + continue; + + for (const CFGElement &Element : *Block) { + auto Stmt = Element.getAs(); + if (!Stmt.hasValue()) + continue; + + StmtToBlock[Stmt.getValue().getStmt()] = Block; + } + } + return StmtToBlock; +} + +llvm::Expected +ControlFlowContext::build(const Decl *D, Stmt *S, ASTContext *C) { + CFG::BuildOptions Options; + Options.PruneTriviallyFalseEdges = false; + Options.AddImplicitDtors = true; + Options.AddTemporaryDtors = true; + Options.AddInitializers = true; + + // Ensure that all sub-expressions in basic blocks are evaluated. + Options.setAllAlwaysAdd(); + + auto Cfg = CFG::buildCFG(D, S, C, Options); + if (Cfg == nullptr) + return llvm::createStringError( + std::make_error_code(std::errc::invalid_argument), + "CFG::buildCFG failed"); + + llvm::DenseMap StmtToBlock = + buildStmtToBasicBlockMap(*Cfg); + return ControlFlowContext(std::move(Cfg), std::move(StmtToBlock)); +} + +} // namespace dataflow +} // namespace clang diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index 413e8d14bf0a..ef967c501bf7 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -35,6 +35,7 @@ namespace dataflow { /// already been transferred. States in `BlockStates` that are set to /// `llvm::None` represent basic blocks that are not evaluated yet. static TypeErasedDataflowAnalysisState computeBlockInputState( + const ControlFlowContext &CFCtx, std::vector> &BlockStates, const CFGBlock &Block, const Environment &InitEnv, TypeErasedDataflowAnalysis &Analysis) { @@ -43,7 +44,40 @@ static TypeErasedDataflowAnalysisState computeBlockInputState( // the state of each basic block differently. TypeErasedDataflowAnalysisState State = {Analysis.typeErasedInitialElement(), InitEnv}; - for (const CFGBlock *Pred : Block.preds()) { + + llvm::DenseSet Preds; + Preds.insert(Block.pred_begin(), Block.pred_end()); + if (Block.getTerminator().isTemporaryDtorsBranch()) { + // This handles a special case where the code that produced the CFG includes + // a conditional operator with a branch that constructs a temporary and + // calls a destructor annotated as noreturn. The CFG models this as follows: + // + // B1 (contains the condition of the conditional operator) - succs: B2, B3 + // B2 (contains code that does not call a noreturn destructor) - succs: B4 + // B3 (contains code that calls a noreturn destructor) - succs: B4 + // B4 (has temporary destructor terminator) - succs: B5, B6 + // B5 (noreturn block that is associated with the noreturn destructor call) + // B6 (contains code that follows the conditional operator statement) + // + // The first successor (B5 above) of a basic block with a temporary + // destructor terminator (B4 above) is the block that evaluates the + // destructor. If that block has a noreturn element then the predecessor + // block that constructed the temporary object (B3 above) is effectively a + // noreturn block and its state should not be used as input for the state + // of the block that has a temporary destructor terminator (B4 above). This + // holds regardless of which branch of the ternary operator calls the + // noreturn destructor. However, it doesn't cases where a nested ternary + // operator includes a branch that contains a noreturn destructor call. + // + // See `NoreturnDestructorTest` for concrete examples. + if (Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) { + auto StmtBlock = CFCtx.getStmtToBlock().find(Block.getTerminatorStmt()); + assert(StmtBlock != CFCtx.getStmtToBlock().end()); + Preds.erase(StmtBlock->getSecond()); + } + } + + for (const CFGBlock *Pred : Preds) { // Skip if the `Block` is unreachable or control flow cannot get past it. if (!Pred || Pred->hasNoReturnElement()) continue; @@ -64,6 +98,7 @@ static TypeErasedDataflowAnalysisState computeBlockInputState( } TypeErasedDataflowAnalysisState transferBlock( + const ControlFlowContext &CFCtx, std::vector> &BlockStates, const CFGBlock &Block, const Environment &InitEnv, TypeErasedDataflowAnalysis &Analysis, @@ -71,7 +106,7 @@ TypeErasedDataflowAnalysisState transferBlock( const TypeErasedDataflowAnalysisState &)> HandleTransferredStmt) { TypeErasedDataflowAnalysisState State = - computeBlockInputState(BlockStates, Block, InitEnv, Analysis); + computeBlockInputState(CFCtx, BlockStates, Block, InitEnv, Analysis); for (const CFGElement &Element : Block) { // FIXME: Evaluate other kinds of `CFGElement`. const llvm::Optional Stmt = Element.getAs(); @@ -89,21 +124,21 @@ TypeErasedDataflowAnalysisState transferBlock( } std::vector> -runTypeErasedDataflowAnalysis(const CFG &Cfg, +runTypeErasedDataflowAnalysis(const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis, const Environment &InitEnv) { // FIXME: Consider enforcing that `Cfg` meets the requirements that // are specified in the header. This could be done by remembering // what options were used to build `Cfg` and asserting on them here. - PostOrderCFGView POV(&Cfg); - ForwardDataflowWorklist Worklist(Cfg, &POV); + PostOrderCFGView POV(&CFCtx.getCFG()); + ForwardDataflowWorklist Worklist(CFCtx.getCFG(), &POV); std::vector> BlockStates; - BlockStates.resize(Cfg.size(), llvm::None); + BlockStates.resize(CFCtx.getCFG().size(), llvm::None); // The entry basic block doesn't contain statements so it can be skipped. - const CFGBlock &Entry = Cfg.getEntry(); + const CFGBlock &Entry = CFCtx.getCFG().getEntry(); BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(), InitEnv}; Worklist.enqueueSuccessors(&Entry); @@ -125,7 +160,7 @@ runTypeErasedDataflowAnalysis(const CFG &Cfg, const llvm::Optional &OldBlockState = BlockStates[Block->getBlockID()]; TypeErasedDataflowAnalysisState NewBlockState = - transferBlock(BlockStates, *Block, InitEnv, Analysis); + transferBlock(CFCtx, BlockStates, *Block, InitEnv, Analysis); if (OldBlockState.hasValue() && Analysis.isEqualTypeErased(OldBlockState.getValue().Lattice, diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp b/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp index 8558290e5e4a..dfc9175f9fe1 100644 --- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp @@ -144,26 +144,3 @@ test::buildStatementToAnnotationMapping(const FunctionDecl *Func, return Result; } - -std::pair> -test::buildCFG(ASTContext &Context, - ast_matchers::internal::Matcher FuncMatcher) { - CFG::BuildOptions Options; - Options.PruneTriviallyFalseEdges = false; - Options.AddInitializers = true; - Options.AddImplicitDtors = true; - Options.AddTemporaryDtors = true; - Options.setAllAlwaysAdd(); - - const FunctionDecl *F = ast_matchers::selectFirst( - "target", - ast_matchers::match( - ast_matchers::functionDecl(ast_matchers::isDefinition(), FuncMatcher) - .bind("target"), - Context)); - if (F == nullptr) - return std::make_pair(nullptr, nullptr); - - return std::make_pair( - F, clang::CFG::buildCFG(F, F->getBody(), &Context, Options)); -} diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h index af2e70729d96..632fe73b26b0 100644 --- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h +++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h @@ -20,9 +20,12 @@ #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/ASTMatchers/ASTMatchersInternal.h" #include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/ControlFlowContext.h" #include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/Basic/LLVM.h" +#include "clang/Serialization/PCHContainerOperations.h" +#include "clang/Tooling/ArgumentsAdjusters.h" #include "clang/Tooling/Tooling.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -56,12 +59,6 @@ llvm::Expected> buildStatementToAnnotationMapping(const FunctionDecl *Func, llvm::Annotations AnnotatedCode); -// Creates a CFG from the body of the function that matches `func_matcher`, -// suitable to testing a dataflow analysis. -std::pair> -buildCFG(ASTContext &Context, - ast_matchers::internal::Matcher FuncMatcher); - // Runs dataflow on the body of the function that matches `func_matcher` in code // snippet `code`. Requires: `Analysis` contains a type `Lattice`. template @@ -79,7 +76,10 @@ void checkDataflow( using StateT = DataflowAnalysisState; llvm::Annotations AnnotatedCode(Code); - auto Unit = tooling::buildASTFromCodeWithArgs(AnnotatedCode.code(), Args); + auto Unit = tooling::buildASTFromCodeWithArgs( + AnnotatedCode.code(), Args, "input.cc", "clang-dataflow-test", + std::make_shared(), + tooling::getClangStripDependencyFileAdjuster(), VirtualMappedFiles); auto &Context = Unit->getASTContext(); if (Context.getDiagnostics().getClient()->getNumErrors() != 0) { @@ -87,12 +87,16 @@ void checkDataflow( "the test log"; } - std::pair> CFGResult = - buildCFG(Context, FuncMatcher); - const auto *F = CFGResult.first; - auto Cfg = std::move(CFGResult.second); - ASSERT_TRUE(F != nullptr) << "Could not find target function"; - ASSERT_TRUE(Cfg != nullptr) << "Could not build control flow graph."; + const FunctionDecl *F = ast_matchers::selectFirst( + "target", + ast_matchers::match( + ast_matchers::functionDecl(ast_matchers::isDefinition(), FuncMatcher) + .bind("target"), + Context)); + ASSERT_TRUE(F != nullptr) << "Could not find target function."; + + auto CFCtx = ControlFlowContext::build(F, F->getBody(), &F->getASTContext()); + ASSERT_TRUE((bool)CFCtx) << "Could not build ControlFlowContext."; Environment Env; auto Analysis = MakeAnalysis(Context, Env); @@ -107,7 +111,7 @@ void checkDataflow( auto &Annotations = *StmtToAnnotations; std::vector> BlockStates = - runTypeErasedDataflowAnalysis(*Cfg, Analysis, Env); + runTypeErasedDataflowAnalysis(*CFCtx, Analysis, Env); if (BlockStates.empty()) { Expectations({}, Context); @@ -117,13 +121,13 @@ void checkDataflow( // Compute a map from statement annotations to the state computed for // the program point immediately after the annotated statement. std::vector> Results; - for (const CFGBlock *Block : *Cfg) { + for (const CFGBlock *Block : CFCtx->getCFG()) { // Skip blocks that were not evaluated. if (!BlockStates[Block->getBlockID()].hasValue()) continue; transferBlock( - BlockStates, *Block, Env, Analysis, + *CFCtx, BlockStates, *Block, Env, Analysis, [&Results, &Annotations](const clang::CFGStmt &Stmt, const TypeErasedDataflowAnalysisState &State) { auto It = Annotations.find(Stmt.getStmt()); diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp index 61765eb1404a..bbd840ca6743 100644 --- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "TestingSupport.h" #include "clang/AST/Decl.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" @@ -14,15 +15,24 @@ #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" #include "clang/Tooling/Tooling.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include #include +#include +#include +#include #include using namespace clang; using namespace dataflow; +using ::testing::IsEmpty; +using ::testing::Pair; +using ::testing::UnorderedElementsAre; template class AnalysisCallback : public ast_matchers::MatchFinder::MatchCallback { @@ -36,21 +46,12 @@ class AnalysisCallback : public ast_matchers::MatchFinder::MatchCallback { Stmt *Body = Func->getBody(); assert(Body != nullptr); - // FIXME: Consider providing a utility that returns a `CFG::BuildOptions` - // which is a good default for most clients or a utility that directly - // builds the `CFG` using default `CFG::BuildOptions`. - CFG::BuildOptions Options; - Options.AddImplicitDtors = true; - Options.AddTemporaryDtors = true; - Options.setAllAlwaysAdd(); - - std::unique_ptr Cfg = - CFG::buildCFG(nullptr, Body, Result.Context, Options); - assert(Cfg != nullptr); + auto CFCtx = llvm::cantFail( + ControlFlowContext::build(nullptr, Body, Result.Context)); AnalysisT Analysis(*Result.Context); Environment Env; - BlockStates = runDataflowAnalysis(*Cfg, Analysis, Env); + BlockStates = runDataflowAnalysis(CFCtx, Analysis, Env); } std::vector< @@ -141,8 +142,175 @@ TEST(DataflowAnalysisTest, NonConvergingAnalysis) { } )"); EXPECT_EQ(BlockStates.size(), 4u); - EXPECT_FALSE(BlockStates[0].hasValue()); + EXPECT_TRUE(BlockStates[0].hasValue()); EXPECT_TRUE(BlockStates[1].hasValue()); EXPECT_TRUE(BlockStates[2].hasValue()); EXPECT_TRUE(BlockStates[3].hasValue()); } + +struct FunctionCallLattice { + llvm::SmallSet CalledFunctions; + + bool operator==(const FunctionCallLattice &Other) const { + return CalledFunctions == Other.CalledFunctions; + } + + LatticeJoinEffect join(const FunctionCallLattice &Other) { + if (Other.CalledFunctions.empty()) + return LatticeJoinEffect::Unchanged; + const size_t size_before = CalledFunctions.size(); + CalledFunctions.insert(Other.CalledFunctions.begin(), + Other.CalledFunctions.end()); + return CalledFunctions.size() == size_before ? LatticeJoinEffect::Unchanged + : LatticeJoinEffect::Changed; + } +}; + +std::ostream &operator<<(std::ostream &OS, const FunctionCallLattice &L) { + std::string S; + llvm::raw_string_ostream ROS(S); + llvm::interleaveComma(L.CalledFunctions, ROS); + return OS << "{" << S << "}"; +} + +class FunctionCallAnalysis + : public DataflowAnalysis { +public: + explicit FunctionCallAnalysis(ASTContext &Context) + : DataflowAnalysis(Context) {} + + static FunctionCallLattice initialElement() { return {}; } + + FunctionCallLattice transfer(const Stmt *S, const FunctionCallLattice &E, + Environment &Env) { + FunctionCallLattice R = E; + if (auto *C = dyn_cast(S)) { + if (auto *F = dyn_cast(C->getCalleeDecl())) { + R.CalledFunctions.insert(F->getNameInfo().getAsString()); + } + } + return R; + } +}; + +class NoreturnDestructorTest : public ::testing::Test { +protected: + template + void runDataflow(llvm::StringRef Code, Matcher Expectations) { + tooling::FileContentMappings FilesContents; + FilesContents.push_back(std::make_pair( + "noreturn_destructor_test_defs.h", R"( + int foo(); + + class Fatal { + public: + ~Fatal() __attribute__((noreturn)); + int bar(); + int baz(); + }; + + class NonFatal { + public: + ~NonFatal(); + int bar(); + }; + )")); + + test::checkDataflow( + Code, "target", + [](ASTContext &C, Environment &) { return FunctionCallAnalysis(C); }, + [&Expectations]( + llvm::ArrayRef>> + Results, + ASTContext &) { EXPECT_THAT(Results, Expectations); }, + {"-fsyntax-only", "-std=c++17"}, FilesContents); + } +}; + +MATCHER_P(HoldsFunctionCallLattice, m, + ((negation ? "doesn't hold" : "holds") + + llvm::StringRef(" a lattice element that ") + + ::testing::DescribeMatcher(m, negation)) + .str()) { + return ExplainMatchResult(m, arg.Lattice, result_listener); +} + +MATCHER_P(HasCalledFunctions, m, "") { + return ExplainMatchResult(m, arg.CalledFunctions, result_listener); +} + +TEST_F(NoreturnDestructorTest, ConditionalOperatorBothBranchesReturn) { + std::string Code = R"( + #include "noreturn_destructor_test_defs.h" + + void target(bool b) { + int value = b ? foo() : NonFatal().bar(); + (void)0; + // [[p]] + } + )"; + runDataflow(Code, UnorderedElementsAre( + Pair("p", HoldsFunctionCallLattice(HasCalledFunctions( + UnorderedElementsAre("foo", "bar")))))); +} + +TEST_F(NoreturnDestructorTest, ConditionalOperatorLeftBranchReturns) { + std::string Code = R"( + #include "noreturn_destructor_test_defs.h" + + void target(bool b) { + int value = b ? foo() : Fatal().bar(); + (void)0; + // [[p]] + } + )"; + runDataflow(Code, UnorderedElementsAre( + Pair("p", HoldsFunctionCallLattice(HasCalledFunctions( + UnorderedElementsAre("foo")))))); +} + +TEST_F(NoreturnDestructorTest, ConditionalOperatorRightBranchReturns) { + std::string Code = R"( + #include "noreturn_destructor_test_defs.h" + + void target(bool b) { + int value = b ? Fatal().bar() : foo(); + (void)0; + // [[p]] + } + )"; + runDataflow(Code, UnorderedElementsAre( + Pair("p", HoldsFunctionCallLattice(HasCalledFunctions( + UnorderedElementsAre("foo")))))); +} + +TEST_F(NoreturnDestructorTest, ConditionalOperatorNestedBranchesDoNotReturn) { + std::string Code = R"( + #include "noreturn_destructor_test_defs.h" + + void target(bool b1, bool b2) { + int value = b1 ? foo() : (b2 ? Fatal().bar() : Fatal().baz()); + (void)0; + // [[p]] + } + )"; + runDataflow(Code, IsEmpty()); + // FIXME: Called functions at point `p` should contain "foo". +} + +TEST_F(NoreturnDestructorTest, ConditionalOperatorNestedBranchReturns) { + std::string Code = R"( + #include "noreturn_destructor_test_defs.h" + + void target(bool b1, bool b2) { + int value = b1 ? Fatal().bar() : (b2 ? Fatal().baz() : foo()); + (void)0; + // [[p]] + } + )"; + runDataflow(Code, UnorderedElementsAre( + Pair("p", HoldsFunctionCallLattice(HasCalledFunctions( + UnorderedElementsAre("baz", "foo")))))); + // FIXME: Called functions at point `p` should contain only "foo". +} From 08192340335e640dd7cb8f136bda783e441a789d Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 28 Dec 2021 07:59:27 +0000 Subject: [PATCH 109/992] [gn build] Port b5c5d8912e07 --- .../gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn index ac71eae37525..26b620ea3f39 100644 --- a/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn @@ -5,5 +5,8 @@ static_library("FlowSensitive") { "//clang/lib/AST", "//clang/lib/Analysis", ] - sources = [ "TypeErasedDataflowAnalysis.cpp" ] + sources = [ + "ControlFlowContext.cpp", + "TypeErasedDataflowAnalysis.cpp", + ] } From 7c9fb58cacd3527924e2c9bc8e529c073c8504f8 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Tue, 28 Dec 2021 16:20:06 +0800 Subject: [PATCH 110/992] [NFC] [Coroutines] Add tests for coro-split-musttail Add two tests to address the problems during marking coro.resume calls as musttail. The two problems are bitcast instruction and unused instruciton respectively. --- .../Coroutines/coro-split-musttail6.ll | 116 +++++++++++++++++ .../Coroutines/coro-split-musttail7.ll | 118 ++++++++++++++++++ 2 files changed, 234 insertions(+) create mode 100644 llvm/test/Transforms/Coroutines/coro-split-musttail6.ll create mode 100644 llvm/test/Transforms/Coroutines/coro-split-musttail7.ll diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll new file mode 100644 index 000000000000..f139b8fb5884 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll @@ -0,0 +1,116 @@ +; Tests that sinked lifetime markers wouldn't provent optimization +; to convert a resuming call to a musttail call. +; The difference between this and coro-split-musttail5.ll is that there is +; an extra bitcast instruction in the path, which makes it harder to +; optimize. +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +declare void @fakeresume1(i64* align 8) + +define void @g() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %alloc = call i8* @malloc(i64 16) #3 + %alloc.var = alloca i64 + %alloca.var.i8 = bitcast i64* %alloc.var to i8* + call void @llvm.lifetime.start.p0i8(i64 1, i8* %alloca.var.i8) + %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc) + + %save = call token @llvm.coro.save(i8* null) + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + + switch i8 %suspend, label %exit [ + i8 0, label %await.suspend + i8 1, label %exit + ] +await.suspend: + %save2 = call token @llvm.coro.save(i8* null) + call fastcc void @fakeresume1(i64* align 8 null) + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + switch i8 %suspend2, label %exit [ + i8 0, label %await.ready + i8 1, label %exit + ] +await.ready: + call void @consume(i64* %alloc.var) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %alloca.var.i8) + br label %exit +exit: + call i1 @llvm.coro.end(i8* null, i1 false) + ret void +} + +; FIXME: The fakeresume1 here should be marked as musttail. +; Verify that in the resume part resume call is marked with musttail. +; CHECK-LABEL: @g.resume( +; CHECK-NOT: musttail call fastcc void @fakeresume1(i64* align 8 null) + +; It has a cleanup bb. +define void @f() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %alloc = call i8* @malloc(i64 16) #3 + %alloc.var = alloca i64 + %alloca.var.i8 = bitcast i64* %alloc.var to i8* + call void @llvm.lifetime.start.p0i8(i64 1, i8* %alloca.var.i8) + %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc) + + %save = call token @llvm.coro.save(i8* null) + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + + switch i8 %suspend, label %exit [ + i8 0, label %await.suspend + i8 1, label %exit + ] +await.suspend: + %save2 = call token @llvm.coro.save(i8* null) + call fastcc void @fakeresume1(i64* align 8 null) + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + switch i8 %suspend2, label %exit [ + i8 0, label %await.ready + i8 1, label %cleanup + ] +await.ready: + call void @consume(i64* %alloc.var) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %alloca.var.i8) + br label %exit + +cleanup: + %free.handle = call i8* @llvm.coro.free(token %id, i8* %vFrame) + %.not = icmp eq i8* %free.handle, null + br i1 %.not, label %exit, label %coro.free + +coro.free: + call void @delete(i8* nonnull %free.handle) #2 + br label %exit + +exit: + call i1 @llvm.coro.end(i8* null, i1 false) + ret void +} + +; FIXME: The fakeresume1 here should be marked as musttail. +; Verify that in the resume part resume call is marked with musttail. +; CHECK-LABEL: @f.resume( +; CHECK-NOT: musttail call fastcc void @fakeresume1(i64* align 8 null) + +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare i64 @llvm.coro.size.i64() #3 +declare i8* @llvm.coro.begin(token, i8* writeonly) #2 +declare token @llvm.coro.save(i8*) #2 +declare i8* @llvm.coro.frame() #3 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1 +declare i1 @llvm.coro.end(i8*, i1) #2 +declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #1 +declare i8* @malloc(i64) +declare void @delete(i8* nonnull) #2 +declare void @consume(i64*) +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +attributes #0 = { "coroutine.presplit"="1" } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nounwind readnone } diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll new file mode 100644 index 000000000000..ce1f7203a0c7 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll @@ -0,0 +1,118 @@ +; Tests that sinked lifetime markers wouldn't provent optimization +; to convert a resuming call to a musttail call. +; The difference between this and coro-split-musttail5.ll and coro-split-musttail5.ll +; is that this contains dead instruction generated during the transformation, +; which makes the optimization harder. +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +declare void @fakeresume1(i64* align 8) + +define void @g() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %alloc = call i8* @malloc(i64 16) #3 + %alloc.var = alloca i64 + %alloca.var.i8 = bitcast i64* %alloc.var to i8* + call void @llvm.lifetime.start.p0i8(i64 1, i8* %alloca.var.i8) + %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc) + + %save = call token @llvm.coro.save(i8* null) + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + + switch i8 %suspend, label %exit [ + i8 0, label %await.suspend + i8 1, label %exit + ] +await.suspend: + %save2 = call token @llvm.coro.save(i8* null) + call fastcc void @fakeresume1(i64* align 8 null) + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + switch i8 %suspend2, label %exit [ + i8 0, label %await.ready + i8 1, label %exit + ] +await.ready: + call void @consume(i64* %alloc.var) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %alloca.var.i8) + br label %exit +exit: + %.unused = getelementptr inbounds i8, i8* %vFrame, i32 0 + call i1 @llvm.coro.end(i8* null, i1 false) + ret void +} + +; FIXME: The fakeresume1 here should be marked as musttail. +; Verify that in the resume part resume call is marked with musttail. +; CHECK-LABEL: @g.resume( +; CHECK-NOT: musttail call fastcc void @fakeresume1(i64* align 8 null) + +; It has a cleanup bb. +define void @f() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %alloc = call i8* @malloc(i64 16) #3 + %alloc.var = alloca i64 + %alloca.var.i8 = bitcast i64* %alloc.var to i8* + call void @llvm.lifetime.start.p0i8(i64 1, i8* %alloca.var.i8) + %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc) + + %save = call token @llvm.coro.save(i8* null) + %suspend = call i8 @llvm.coro.suspend(token %save, i1 false) + + switch i8 %suspend, label %exit [ + i8 0, label %await.suspend + i8 1, label %exit + ] +await.suspend: + %save2 = call token @llvm.coro.save(i8* null) + call fastcc void @fakeresume1(i64* align 8 null) + %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false) + switch i8 %suspend2, label %exit [ + i8 0, label %await.ready + i8 1, label %cleanup + ] +await.ready: + call void @consume(i64* %alloc.var) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %alloca.var.i8) + br label %exit + +cleanup: + %free.handle = call i8* @llvm.coro.free(token %id, i8* %vFrame) + %.not = icmp eq i8* %free.handle, null + br i1 %.not, label %exit, label %coro.free + +coro.free: + call void @delete(i8* nonnull %free.handle) #2 + br label %exit + +exit: + %.unused = getelementptr inbounds i8, i8* %vFrame, i32 0 + call i1 @llvm.coro.end(i8* null, i1 false) + ret void +} + +; FIXME: The fakeresume1 here should be marked as musttail. +; Verify that in the resume part resume call is marked with musttail. +; CHECK-LABEL: @f.resume( +; CHECK-NOT: musttail call fastcc void @fakeresume1(i64* align 8 null) + +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1 +declare i1 @llvm.coro.alloc(token) #2 +declare i64 @llvm.coro.size.i64() #3 +declare i8* @llvm.coro.begin(token, i8* writeonly) #2 +declare token @llvm.coro.save(i8*) #2 +declare i8* @llvm.coro.frame() #3 +declare i8 @llvm.coro.suspend(token, i1) #2 +declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1 +declare i1 @llvm.coro.end(i8*, i1) #2 +declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #1 +declare i8* @malloc(i64) +declare void @delete(i8* nonnull) #2 +declare void @consume(i64*) +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +attributes #0 = { "coroutine.presplit"="1" } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nounwind readnone } From 30a12f3f6322399185fdceffe176152a58bb84ae Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 28 Dec 2021 09:14:27 +0100 Subject: [PATCH 111/992] [InstCombine] Fix GEP with same index comparison with opaque pointers We need to also check that the source element type is the same, otherwise the indices may have different meaning. The added addrspacecast demonstrates that we do still need to check the pointer type. --- .../InstCombine/InstCombineCompares.cpp | 7 +++--- .../Transforms/InstCombine/getelementptr.ll | 16 ++++++++++++- .../test/Transforms/InstCombine/opaque-ptr.ll | 24 +++++++++++++++++++ 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index ed53b88aed61..62a43b00773a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -894,9 +894,10 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // If the base pointers are different, but the indices are the same, just // compare the base pointer. if (PtrBase != GEPRHS->getOperand(0)) { - bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands(); - IndicesTheSame &= GEPLHS->getOperand(0)->getType() == - GEPRHS->getOperand(0)->getType(); + bool IndicesTheSame = + GEPLHS->getNumOperands() == GEPRHS->getNumOperands() && + GEPLHS->getType() == GEPRHS->getType() && + GEPLHS->getSourceElementType() == GEPRHS->getSourceElementType(); if (IndicesTheSame) for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll index e57eccc84b5f..be3a7b1ac804 100644 --- a/llvm/test/Transforms/InstCombine/getelementptr.ll +++ b/llvm/test/Transforms/InstCombine/getelementptr.ll @@ -151,11 +151,25 @@ define i1 @test10({ i32, i32 }* %x, { i32, i32 }* %y) { ; %t1 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1 %t3 = getelementptr { i32, i32 }, { i32, i32 }* %y, i32 0, i32 1 - ;; seteq x, y %t4 = icmp eq i32* %t1, %t3 ret i1 %t4 } +define i1 @test10_addrspacecast({ i32, i32 }* %x, { i32, i32 } addrspace(3)* %y) { +; CHECK-LABEL: @test10_addrspacecast( +; CHECK-NEXT: [[T1:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[X:%.*]], i64 0, i32 1 +; CHECK-NEXT: [[T3:%.*]] = getelementptr { i32, i32 }, { i32, i32 } addrspace(3)* [[Y:%.*]], i64 0, i32 1 +; CHECK-NEXT: [[T3_C:%.*]] = addrspacecast i32 addrspace(3)* [[T3]] to i32* +; CHECK-NEXT: [[T4:%.*]] = icmp eq i32* [[T1]], [[T3_C]] +; CHECK-NEXT: ret i1 [[T4]] +; + %t1 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1 + %t3 = getelementptr { i32, i32 }, { i32, i32 } addrspace(3)* %y, i32 0, i32 1 + %t3.c = addrspacecast i32 addrspace(3)* %t3 to i32* + %t4 = icmp eq i32* %t1, %t3.c + ret i1 %t4 +} + define i1 @test11({ i32, i32 }* %X) { ; CHECK-LABEL: @test11( ; CHECK-NEXT: [[Q:%.*]] = icmp eq { i32, i32 }* [[X:%.*]], null diff --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll index ab2a109f6103..4ebf68f918e1 100644 --- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll +++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll @@ -182,3 +182,27 @@ define ptr @geps_not_combinable(ptr %a) { %a3 = getelementptr { i32, i32 }, ptr %a2, i32 0, i32 1 ret ptr %a3 } + +define i1 @compare_geps_same_indices(ptr %a, ptr %b, i64 %idx) { +; CHECK-LABEL: @compare_geps_same_indices( +; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %a2 = getelementptr i32, ptr %a, i64 %idx + %b2 = getelementptr i32, ptr %b, i64 %idx + %c = icmp eq ptr %a2, %b2 + ret i1 %c +} + +define i1 @compare_geps_same_indices_different_types(ptr %a, ptr %b, i64 %idx) { +; CHECK-LABEL: @compare_geps_same_indices_different_types( +; CHECK-NEXT: [[A2:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[IDX:%.*]] +; CHECK-NEXT: [[B2:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[IDX]] +; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[A2]], [[B2]] +; CHECK-NEXT: ret i1 [[C]] +; + %a2 = getelementptr i32, ptr %a, i64 %idx + %b2 = getelementptr i64, ptr %b, i64 %idx + %c = icmp eq ptr %a2, %b2 + ret i1 %c +} From d09a284dfbd17424ebfb7806e830a9b64b8e70a7 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 28 Dec 2021 00:38:11 -0800 Subject: [PATCH 112/992] [CodeGen] Drop unnecessary const from return types (NFC) Identified with readability-const-return-type. --- llvm/lib/CodeGen/TargetPassConfig.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 402e21d3708b..6d9d22699273 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -328,7 +328,7 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID, // Find the FSProfile file name. The internal option takes the precedence // before getting from TargetMachine. -static const std::string getFSProfileFile(const TargetMachine *TM) { +static std::string getFSProfileFile(const TargetMachine *TM) { if (!FSProfileFile.empty()) return FSProfileFile.getValue(); const Optional &PGOOpt = TM->getPGOOption(); @@ -339,7 +339,7 @@ static const std::string getFSProfileFile(const TargetMachine *TM) { // Find the Profile remapping file name. The internal option takes the // precedence before getting from TargetMachine. -static const std::string getFSRemappingFile(const TargetMachine *TM) { +static std::string getFSRemappingFile(const TargetMachine *TM) { if (!FSRemappingFile.empty()) return FSRemappingFile.getValue(); const Optional &PGOOpt = TM->getPGOOption(); From c66286ed5924a6b1535f0eb9af4d19f8cf676be0 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 28 Dec 2021 10:53:56 +0100 Subject: [PATCH 113/992] [LV] Use specific first-order recurrence recipe as arg type (NFC). Required for further refactoring in D116304. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e713925e3b80..261f9317f260 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -582,7 +582,8 @@ class InnerLoopVectorizer { /// Create the exit value of first order recurrences in the middle block and /// update their users. - void fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR, VPTransformState &State); + void fixFirstOrderRecurrence(VPFirstOrderRecurrencePHIRecipe *PhiR, + VPTransformState &State); /// Create code for the loop exit value of the reduction. void fixReduction(VPReductionPHIRecipe *Phi, VPTransformState &State); @@ -4096,8 +4097,8 @@ void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) { } } -void InnerLoopVectorizer::fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR, - VPTransformState &State) { +void InnerLoopVectorizer::fixFirstOrderRecurrence( + VPFirstOrderRecurrencePHIRecipe *PhiR, VPTransformState &State) { // This is the second phase of vectorizing first-order recurrences. An // overview of the transformation is described below. Suppose we have the // following loop. From 7d850a0c4d26591fadb26d82a8ffac530c217e9c Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 24 Jun 2021 22:31:06 +0200 Subject: [PATCH 114/992] [InstCombine] Make indexed compare fold opaque ptr compatible We need to make sure that the GEP source element types match. A caveat here is that the used GEP source element type can be arbitrary if no offset is stripped from the original GEP -- the transform is somewhat inconsistent in that it always starts from a GEP, but might not actually look through it if it has multiple indices. --- .../InstCombine/InstCombineCompares.cpp | 47 +++++++-------- .../InstCombine/indexed-gep-compares.ll | 25 ++++++++ .../test/Transforms/InstCombine/opaque-ptr.ll | 57 +++++++++++++++++++ 3 files changed, 101 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 62a43b00773a..59e131bd3b6a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -503,7 +503,7 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombinerImpl &IC, /// Returns true if we can rewrite Start as a GEP with pointer Base /// and some integer offset. The nodes that need to be re-written /// for this transformation will be added to Explored. -static bool canRewriteGEPAsOffset(Value *Start, Value *Base, +static bool canRewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base, const DataLayout &DL, SetVector &Explored) { SmallVector WorkList(1, Start); @@ -551,7 +551,7 @@ static bool canRewriteGEPAsOffset(Value *Start, Value *Base, // the original pointer type. We could handle more cases in the // future. if (GEP->getNumIndices() != 1 || !GEP->isInBounds() || - GEP->getType() != Start->getType()) + GEP->getSourceElementType() != ElemTy) return false; if (!Explored.contains(GEP->getOperand(0))) @@ -627,7 +627,7 @@ static void setInsertionPoint(IRBuilder<> &Builder, Value *V, /// Returns a re-written value of Start as an indexed GEP using Base as a /// pointer. -static Value *rewriteGEPAsOffset(Value *Start, Value *Base, +static Value *rewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base, const DataLayout &DL, SetVector &Explored) { // Perform all the substitutions. This is a bit tricky because we can @@ -714,6 +714,8 @@ static Value *rewriteGEPAsOffset(Value *Start, Value *Base, } } + PointerType *PtrTy = + ElemTy->getPointerTo(Start->getType()->getPointerAddressSpace()); for (Value *Val : Explored) { if (Val == Base) continue; @@ -722,22 +724,14 @@ static Value *rewriteGEPAsOffset(Value *Start, Value *Base, // a GEP or a GEP + ptrtoint. setInsertionPoint(Builder, Val, false); - // If required, create an inttoptr instruction for Base. - Value *NewBase = Base; - if (!Base->getType()->isPointerTy()) - NewBase = Builder.CreateBitOrPointerCast(Base, Start->getType(), - Start->getName() + "to.ptr"); - - Value *GEP = Builder.CreateInBoundsGEP( - Start->getType()->getPointerElementType(), NewBase, - makeArrayRef(NewInsts[Val]), Val->getName() + ".ptr"); - - if (!Val->getType()->isPointerTy()) { - Value *Cast = Builder.CreatePointerCast(GEP, Val->getType(), - Val->getName() + ".conv"); - GEP = Cast; - } - Val->replaceAllUsesWith(GEP); + // Cast base to the expected type. + Value *NewVal = Builder.CreateBitOrPointerCast( + Base, PtrTy, Start->getName() + "to.ptr"); + NewVal = Builder.CreateInBoundsGEP( + ElemTy, NewVal, makeArrayRef(NewInsts[Val]), Val->getName() + ".ptr"); + NewVal = Builder.CreateBitOrPointerCast( + NewVal, Val->getType(), Val->getName() + ".conv"); + Val->replaceAllUsesWith(NewVal); } return NewInsts[Start]; @@ -747,7 +741,7 @@ static Value *rewriteGEPAsOffset(Value *Start, Value *Base, /// the input Value as a constant indexed GEP. Returns a pair containing /// the GEPs Pointer and Index. static std::pair -getAsConstantIndexedAddress(Value *V, const DataLayout &DL) { +getAsConstantIndexedAddress(Type *ElemTy, Value *V, const DataLayout &DL) { Type *IndexType = IntegerType::get(V->getContext(), DL.getIndexTypeSizeInBits(V->getType())); @@ -759,7 +753,7 @@ getAsConstantIndexedAddress(Value *V, const DataLayout &DL) { if (!GEP->isInBounds()) break; if (GEP->hasAllConstantIndices() && GEP->getNumIndices() == 1 && - GEP->getType() == V->getType()) { + GEP->getSourceElementType() == ElemTy) { V = GEP->getOperand(0); Constant *GEPIndex = static_cast(GEP->getOperand(1)); Index = ConstantExpr::getAdd( @@ -798,17 +792,14 @@ static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS, if (!GEPLHS->hasAllConstantIndices()) return nullptr; - // Make sure the pointers have the same type. - if (GEPLHS->getType() != RHS->getType()) - return nullptr; - + Type *ElemTy = GEPLHS->getSourceElementType(); Value *PtrBase, *Index; - std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL); + std::tie(PtrBase, Index) = getAsConstantIndexedAddress(ElemTy, GEPLHS, DL); // The set of nodes that will take part in this transformation. SetVector Nodes; - if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes)) + if (!canRewriteGEPAsOffset(ElemTy, RHS, PtrBase, DL, Nodes)) return nullptr; // We know we can re-write this as @@ -817,7 +808,7 @@ static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS, // can't have overflow on either side. We can therefore re-write // this as: // OFFSET1 cmp OFFSET2 - Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes); + Value *NewRHS = rewriteGEPAsOffset(ElemTy, RHS, PtrBase, DL, Nodes); // RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written // GEP having PtrBase as the pointer base, and has returned in NewRHS the diff --git a/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll b/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll index b8180603f11f..c00fd5be06e4 100644 --- a/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll +++ b/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll @@ -300,3 +300,28 @@ entry: %cmp = icmp eq i32** %gepi32, %cast ret i1 %cmp } + +define void @test_zero_offset_cycle({ i64, i64 }* %arg) { +; CHECK-LABEL: @test_zero_offset_cycle( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: br i1 true, label [[LOOP]], label [[LOOP_CONT:%.*]] +; CHECK: loop.cont: +; CHECK-NEXT: br label [[LOOP]] +; +entry: + %gep = getelementptr inbounds { i64, i64 }, { i64, i64 }* %arg, i32 0, i32 1 + %gep.int = ptrtoint i64* %gep to i32 + br label %loop + +loop: + %phi = phi i32 [ %gep.int, %entry ], [ %gep.int2, %loop.cont ], [ %phi, %loop ] + %phi.ptr = inttoptr i32 %phi to i64* + %cmp = icmp eq i64* %gep, %phi.ptr + br i1 %cmp, label %loop, label %loop.cont + +loop.cont: + %gep.int2 = ptrtoint i64* %gep to i32 + br label %loop +} diff --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll index 4ebf68f918e1..035b50588f54 100644 --- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll +++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll @@ -206,3 +206,60 @@ define i1 @compare_geps_same_indices_different_types(ptr %a, ptr %b, i64 %idx) { %c = icmp eq ptr %a2, %b2 ret i1 %c } + +define ptr @indexed_compare(ptr %A, i64 %offset) { +; CHECK-LABEL: @indexed_compare( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[RHS_IDX:%.*]] = phi i64 [ [[RHS_ADD:%.*]], [[BB]] ], [ [[OFFSET:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RHS_ADD]] = add nsw i64 [[RHS_IDX]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp sgt i64 [[RHS_IDX]], 100 +; CHECK-NEXT: br i1 [[COND]], label [[BB2:%.*]], label [[BB]] +; CHECK: bb2: +; CHECK-NEXT: [[RHS_PTR:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[RHS_IDX]] +; CHECK-NEXT: ret ptr [[RHS_PTR]] +; +entry: + %tmp = getelementptr inbounds i32, ptr %A, i64 %offset + br label %bb + +bb: + %RHS = phi ptr [ %RHS.next, %bb ], [ %tmp, %entry ] + %LHS = getelementptr inbounds i32, ptr %A, i32 100 + %RHS.next = getelementptr inbounds i32, ptr %RHS, i64 1 + %cond = icmp ult ptr %LHS, %RHS + br i1 %cond, label %bb2, label %bb + +bb2: + ret ptr %RHS +} + +define ptr @indexed_compare_different_types(ptr %A, i64 %offset) { +; CHECK-LABEL: @indexed_compare_different_types( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET:%.*]] +; CHECK-NEXT: br label [[BB:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[RHS:%.*]] = phi ptr [ [[RHS_NEXT:%.*]], [[BB]] ], [ [[TMP]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LHS:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 100 +; CHECK-NEXT: [[RHS_NEXT]] = getelementptr inbounds i32, ptr [[RHS]], i64 1 +; CHECK-NEXT: [[COND:%.*]] = icmp ult ptr [[LHS]], [[RHS]] +; CHECK-NEXT: br i1 [[COND]], label [[BB2:%.*]], label [[BB]] +; CHECK: bb2: +; CHECK-NEXT: ret ptr [[RHS]] +; +entry: + %tmp = getelementptr inbounds i32, ptr %A, i64 %offset + br label %bb + +bb: + %RHS = phi ptr [ %RHS.next, %bb ], [ %tmp, %entry ] + %LHS = getelementptr inbounds i64, ptr %A, i32 100 + %RHS.next = getelementptr inbounds i32, ptr %RHS, i64 1 + %cond = icmp ult ptr %LHS, %RHS + br i1 %cond, label %bb2, label %bb + +bb2: + ret ptr %RHS +} From 1bd11d34feecde09958669f6c507b9a10cc6b2ab Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 28 Dec 2021 14:06:07 +0100 Subject: [PATCH 115/992] [ConstFold] Add additional icmp of gep of global tests (NFC) The fold is incorrect for the sgt case, as gep inbounds is allowed to cross the sign boundary. --- .../{icmp-null.ll => icmp-global.ll} | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) rename llvm/test/Transforms/InstSimplify/ConstProp/{icmp-null.ll => icmp-global.ll} (90%) diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-null.ll b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll similarity index 90% rename from llvm/test/Transforms/InstSimplify/ConstProp/icmp-null.ll rename to llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll index 8698132b87f9..9e5a54a8a65c 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-null.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll @@ -215,3 +215,24 @@ define i1 @global_gep_sgt_global_neg_offset() { %cmp = icmp sgt [2 x i32]* %gep, @g ret i1 %cmp } + +define i1 @global_gep_ugt_global_gep() { +; CHECK-LABEL: @global_gep_ugt_global_gep( +; CHECK-NEXT: ret i1 true +; + %gep1 = getelementptr inbounds [2 x i32], [2 x i32]* @g, i64 0, i64 0 + %gep2 = getelementptr inbounds [2 x i32], [2 x i32]* @g, i64 0, i64 1 + %cmp = icmp ugt i32* %gep2, %gep1 + ret i1 %cmp +} + +; TODO: Should not fold due to signed comparison. +define i1 @global_gep_sgt_global_gep() { +; CHECK-LABEL: @global_gep_sgt_global_gep( +; CHECK-NEXT: ret i1 true +; + %gep1 = getelementptr inbounds [2 x i32], [2 x i32]* @g, i64 0, i64 0 + %gep2 = getelementptr inbounds [2 x i32], [2 x i32]* @g, i64 0, i64 1 + %cmp = icmp sgt i32* %gep2, %gep1 + ret i1 %cmp +} From 23de66d1636b53ff4e52be91f6b84f014a6ae279 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 28 Dec 2021 14:09:34 +0100 Subject: [PATCH 116/992] [ConstFold] Don't fold signed comparison of gep of global An inbounds GEP may still cross the sign boundary, so signed icmps cannot be folded (https://alive2.llvm.org/ce/z/XSgi4D). This was previously fixed for other folds in this function, but this one was missed. --- llvm/lib/IR/ConstantFold.cpp | 8 ++++---- llvm/test/Assembler/ConstantExprFold.ll | 2 +- .../test/Transforms/InstSimplify/ConstProp/icmp-global.ll | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 8668fe82601c..ae926f95cefe 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1668,8 +1668,8 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, ++i, ++GTI) switch (IdxCompare(CE1->getOperand(i), CE2->getOperand(i), GTI.getIndexedType())) { - case -1: return isSigned ? ICmpInst::ICMP_SLT:ICmpInst::ICMP_ULT; - case 1: return isSigned ? ICmpInst::ICMP_SGT:ICmpInst::ICMP_UGT; + case -1: return ICmpInst::ICMP_ULT; + case 1: return ICmpInst::ICMP_UGT; case -2: return ICmpInst::BAD_ICMP_PREDICATE; } @@ -1678,7 +1678,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, for (; i < CE1->getNumOperands(); ++i) if (!CE1->getOperand(i)->isNullValue()) { if (isa(CE1->getOperand(i))) - return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; + return ICmpInst::ICMP_UGT; else return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal. } @@ -1686,7 +1686,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, for (; i < CE2->getNumOperands(); ++i) if (!CE2->getOperand(i)->isNullValue()) { if (isa(CE2->getOperand(i))) - return isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; + return ICmpInst::ICMP_ULT; else return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal. } diff --git a/llvm/test/Assembler/ConstantExprFold.ll b/llvm/test/Assembler/ConstantExprFold.ll index 96b3e54fcffe..523edd94bd05 100644 --- a/llvm/test/Assembler/ConstantExprFold.ll +++ b/llvm/test/Assembler/ConstantExprFold.ll @@ -21,7 +21,7 @@ @9 = global i1 icmp ult (i64* @A, i64* getelementptr (i64, i64* @A, i64 1)) ; true @10 = global i1 icmp slt (i64* @A, i64* getelementptr (i64, i64* @A, i64 0)) ; false -@11 = global i1 icmp slt (i32* getelementptr (%Ty, %Ty* @B, i64 0, i32 0), +@11 = global i1 icmp ult (i32* getelementptr (%Ty, %Ty* @B, i64 0, i32 0), i32* getelementptr (%Ty, %Ty* @B, i64 0, i32 1)) ; true ;global i1 icmp ne (i64* @A, i64* bitcast (%Ty* @B to i64*)) ; true diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll index 9e5a54a8a65c..ee33201a8a22 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll @@ -226,10 +226,10 @@ define i1 @global_gep_ugt_global_gep() { ret i1 %cmp } -; TODO: Should not fold due to signed comparison. +; Should not fold due to signed comparison. define i1 @global_gep_sgt_global_gep() { ; CHECK-LABEL: @global_gep_sgt_global_gep( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: ret i1 icmp sgt (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @g, i64 0, i64 1), i32* getelementptr inbounds ([2 x i32], [2 x i32]* @g, i64 0, i64 0)) ; %gep1 = getelementptr inbounds [2 x i32], [2 x i32]* @g, i64 0, i64 0 %gep2 = getelementptr inbounds [2 x i32], [2 x i32]* @g, i64 0, i64 1 From 054f8d86fc0c7292ece8341a984553a903fcf37b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 28 Dec 2021 14:20:29 +0100 Subject: [PATCH 117/992] [Assembler] Regenerate test checks (NFC) Switch this Assembler test to use utc by adding a dummy function and opt run line. --- llvm/test/Assembler/ConstantExprFold.ll | 41 ++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/llvm/test/Assembler/ConstantExprFold.ll b/llvm/test/Assembler/ConstantExprFold.ll index 523edd94bd05..ab85fa525ae3 100644 --- a/llvm/test/Assembler/ConstantExprFold.ll +++ b/llvm/test/Assembler/ConstantExprFold.ll @@ -1,7 +1,9 @@ -; This test checks to make sure that constant exprs fold in some simple +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; This test checks to make sure that constant exprs fold in some simple ; situations -; RUN: llvm-as < %s | llvm-dis | not grep "(" +; RUN: opt -S < %s | FileCheck %s +; RUN: llvm-as < %s | llvm-dis | FileCheck %s ; RUN: verify-uselistorder %s @A = global i64 0 @@ -17,12 +19,12 @@ @8 = global i64* inttoptr (i64 xor (i64 ptrtoint (i64* @A to i64), i64 0) to i64*) ; X ^ 0 == X %Ty = type { i32, i32 } -@B = external global %Ty +@B = external global %Ty @9 = global i1 icmp ult (i64* @A, i64* getelementptr (i64, i64* @A, i64 1)) ; true @10 = global i1 icmp slt (i64* @A, i64* getelementptr (i64, i64* @A, i64 0)) ; false @11 = global i1 icmp ult (i32* getelementptr (%Ty, %Ty* @B, i64 0, i32 0), - i32* getelementptr (%Ty, %Ty* @B, i64 0, i32 1)) ; true + i32* getelementptr (%Ty, %Ty* @B, i64 0, i32 1)) ; true ;global i1 icmp ne (i64* @A, i64* bitcast (%Ty* @B to i64*)) ; true ; PR2206 @@ -33,3 +35,34 @@ @14 = global <2 x i8*> getelementptr({ i8 }, <2 x { i8 }*> undef, <2 x i64> , <2 x i32> ) @15 = global <2 x i8*> getelementptr(i8, <2 x i8*> zeroinitializer, <2 x i64> ) @16 = global <2 x i8*> getelementptr({ i8 }, <2 x { i8 }*> zeroinitializer, <2 x i64> , <2 x i32> ) + + +; Need a function to make update_test_checks.py work. +;. +; CHECK: @[[A:[a-zA-Z0-9_$"\\.-]+]] = global i64 0 +; CHECK: @[[GLOB0:[0-9]+]] = global i64* @A +; CHECK: @[[GLOB1:[0-9]+]] = global i64* @A +; CHECK: @[[GLOB2:[0-9]+]] = global i64* null +; CHECK: @[[GLOB3:[0-9]+]] = global i64* @A +; CHECK: @[[GLOB4:[0-9]+]] = global i64* null +; CHECK: @[[GLOB5:[0-9]+]] = global i64* null +; CHECK: @[[GLOB6:[0-9]+]] = global i64* @A +; CHECK: @[[GLOB7:[0-9]+]] = global i64 -1 +; CHECK: @[[GLOB8:[0-9]+]] = global i64* @A +; CHECK: @[[B:[a-zA-Z0-9_$"\\.-]+]] = external global [[TY:%.*]] +; CHECK: @[[GLOB9:[0-9]+]] = global i1 true +; CHECK: @[[GLOB10:[0-9]+]] = global i1 false +; CHECK: @[[GLOB11:[0-9]+]] = global i1 true +; CHECK: @[[CONS:[a-zA-Z0-9_$"\\.-]+]] = weak global i32 0, align 8 +; CHECK: @[[GLOB12:[0-9]+]] = global i64 0 +; CHECK: @[[GLOB13:[0-9]+]] = global <2 x i8*> undef +; CHECK: @[[GLOB14:[0-9]+]] = global <2 x i8*> undef +; CHECK: @[[GLOB15:[0-9]+]] = global <2 x i8*> zeroinitializer +; CHECK: @[[GLOB16:[0-9]+]] = global <2 x i8*> zeroinitializer +;. +define void @dummy() { +; CHECK-LABEL: @dummy( +; CHECK-NEXT: ret void +; + ret void +} From 3bfe0962bac6147a34aa5d2dc7b6cd4968ac975a Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 28 Dec 2021 14:28:28 +0100 Subject: [PATCH 118/992] [ConstFold] Add another icmp of gep of global test (NFC) This time with some complex arithmetic involving bitcasts. --- .../InstSimplify/ConstProp/icmp-global.ll | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll index ee33201a8a22..d22317a18924 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll @@ -236,3 +236,16 @@ define i1 @global_gep_sgt_global_gep() { %cmp = icmp sgt i32* %gep2, %gep1 ret i1 %cmp } + +define i1 @global_gep_ugt_global_gep_complex() { +; CHECK-LABEL: @global_gep_ugt_global_gep_complex( +; CHECK-NEXT: ret i1 icmp ugt (i32* bitcast (i8* getelementptr inbounds (i8, i8* bitcast ([2 x i32]* @g to i8*), i64 2) to i32*), i32* getelementptr inbounds ([2 x i32], [2 x i32]* @g, i64 0, i64 0)) +; + %gep1 = getelementptr inbounds [2 x i32], [2 x i32]* @g, i64 0, i64 0 + %gep2 = getelementptr inbounds [2 x i32], [2 x i32]* @g, i64 0, i64 0 + %gep2.cast = bitcast i32* %gep2 to i8* + %gep3 = getelementptr inbounds i8, i8* %gep2.cast, i64 2 + %gep3.cast = bitcast i8* %gep3 to i32* + %cmp = icmp ugt i32* %gep3.cast, %gep1 + ret i1 %cmp +} From e6f31f4e51df484e2f34c6c9cfd8d791cdee2eb0 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 28 Dec 2021 14:57:43 +0100 Subject: [PATCH 119/992] [InstCombine] Use GEP type instead of pointee type The GEP source type is independent of whether it is a scalar or vector GEP, as such we can simply preserve it. --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index c6a4602e59e3..736cf9c825d5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -495,8 +495,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { } GetElementPtrInst *NewGEP = GetElementPtrInst::Create( - cast(NewPtr->getType())->getElementType(), NewPtr, - NewOps); + GEP->getSourceElementType(), NewPtr, NewOps); NewGEP->setIsInBounds(GEP->isInBounds()); return NewGEP; } From 693b1f1e1bd1bc2060ea8cfd2129cb0123397a8c Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 28 Dec 2021 15:30:01 +0100 Subject: [PATCH 120/992] [InstCombine] Skip some GEP folds under opaque pointers In their current form, these folds are fundamentally incompatible with opaque pointers. We should add a separate set of folds for the canonicalization of the GEP source type. For now, skip these folds. --- .../Transforms/InstCombine/InstructionCombining.cpp | 6 +++++- llvm/test/Transforms/InstCombine/opaque-ptr.ll | 11 +++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index aaf07f25e474..c66b39fc7927 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2244,7 +2244,11 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *StrippedPtr = PtrOp->stripPointerCasts(); PointerType *StrippedPtrTy = cast(StrippedPtr->getType()); - if (StrippedPtr != PtrOp) { + // TODO: The basic approach of these folds is not compatible with opaque + // pointers, because we can't use bitcasts as a hint for a desirable GEP + // type. Instead, we should perform canonicalization directly on the GEP + // type. For now, skip these. + if (StrippedPtr != PtrOp && !StrippedPtrTy->isOpaque()) { bool HasZeroPointerIndex = false; Type *StrippedPtrEltTy = StrippedPtrTy->getElementType(); diff --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll index 035b50588f54..1d73ab168939 100644 --- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll +++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll @@ -263,3 +263,14 @@ bb: bb2: ret ptr %RHS } + +define ptr addrspace(1) @gep_of_addrspace_cast(ptr %ptr) { +; CHECK-LABEL: @gep_of_addrspace_cast( +; CHECK-NEXT: [[CAST1:%.*]] = addrspacecast ptr [[PTR:%.*]] to ptr addrspace(1) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[CAST1]], i64 1 +; CHECK-NEXT: ret ptr addrspace(1) [[GEP]] +; + %cast1 = addrspacecast ptr %ptr to ptr addrspace(1) + %gep = getelementptr inbounds i32, ptr addrspace(1) %cast1, i64 1 + ret ptr addrspace(1) %gep +} From c2275278c693c4e0d6947c6f1ae95bf147f29125 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 28 Dec 2021 15:37:28 +0100 Subject: [PATCH 121/992] [VPlan] Add abstract base class for header phi recipes (NFC). Not all header phis widen the phi, e.g. like the new VPCanonicalIVPHIRecipe in D113223. To let those recipes also inherit from a phi-like base class, add a more generic VPHeaderPHIRecipe abstract base class. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D116304 --- .../Transforms/Vectorize/LoopVectorize.cpp | 4 +- .../Transforms/Vectorize/VPRecipeBuilder.h | 2 +- llvm/lib/Transforms/Vectorize/VPlan.cpp | 4 +- llvm/lib/Transforms/Vectorize/VPlan.h | 94 ++++++++++++------- 4 files changed, 63 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 261f9317f260..ef49ae529cec 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8713,7 +8713,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, void VPRecipeBuilder::fixHeaderPhis() { BasicBlock *OrigLatch = OrigLoop->getLoopLatch(); - for (VPWidenPHIRecipe *R : PhisToFix) { + for (VPHeaderPHIRecipe *R : PhisToFix) { auto *PN = cast(R->getUnderlyingValue()); VPRecipeBase *IncR = getRecipe(cast(PN->getIncomingValueForBlock(OrigLatch))); @@ -8855,7 +8855,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands))) return toVPRecipeResult(Recipe); - VPWidenPHIRecipe *PhiRecipe = nullptr; + VPHeaderPHIRecipe *PhiRecipe = nullptr; if (Legal->isReductionVariable(Phi) || Legal->isFirstOrderRecurrence(Phi)) { VPValue *StartV = Operands[0]; if (Legal->isReductionVariable(Phi)) { diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index 65857f034210..e5dded3c0f1e 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -59,7 +59,7 @@ class VPRecipeBuilder { /// Cross-iteration reduction & first-order recurrence phis for which we need /// to add the incoming value from the backedge after all recipes have been /// created. - SmallVector PhisToFix; + SmallVector PhisToFix; /// Check if \p I can be widened at the start of \p Range and possibly /// decrease the range such that the returned value holds for the entire \p diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 1d9e71663cd2..bebc6bbdd4a7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -838,7 +838,7 @@ void VPlan::execute(VPTransformState *State) { // vector loop. VPBasicBlock *Header = Entry->getEntryBasicBlock(); for (VPRecipeBase &R : Header->phis()) { - auto *PhiR = dyn_cast(&R); + auto *PhiR = dyn_cast(&R); if (!PhiR || !(isa(&R) || isa(&R))) continue; @@ -1461,7 +1461,7 @@ void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, InterleavedAccessInfo &IAI) { if (VPBasicBlock *VPBB = dyn_cast(Block)) { for (VPRecipeBase &VPI : *VPBB) { - if (isa(&VPI)) + if (isa(&VPI)) continue; assert(isa(&VPI) && "Can only handle VPInstructions"); auto *VPInst = cast(&VPI); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index f4a1883e35d5..7fa3c1defaca 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1059,34 +1059,21 @@ class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue { const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } }; -/// A recipe for handling first order recurrences and pointer inductions. For -/// first-order recurrences, the start value is the first operand of the recipe -/// and the incoming value from the backedge is the second operand. It also -/// serves as base class for VPReductionPHIRecipe. In the VPlan native path, all -/// incoming VPValues & VPBasicBlock pairs are managed in the recipe directly. -class VPWidenPHIRecipe : public VPRecipeBase, public VPValue { - /// List of incoming blocks. Only used in the VPlan native path. - SmallVector IncomingBlocks; - +/// A pure virtual base class for all recipes modeling header phis, including +/// phis for first order recurrences, pointer inductions and reductions. The +/// start value is the first operand of the recipe and the incoming value from +/// the backedge is the second operand. +class VPHeaderPHIRecipe : public VPRecipeBase, public VPValue { protected: - VPWidenPHIRecipe(unsigned char VPVID, unsigned char VPDefID, PHINode *Phi, - VPValue *Start = nullptr) + VPHeaderPHIRecipe(unsigned char VPVID, unsigned char VPDefID, PHINode *Phi, + VPValue *Start = nullptr) : VPRecipeBase(VPDefID, {}), VPValue(VPVID, Phi, this) { if (Start) addOperand(Start); } public: - /// Create a VPWidenPHIRecipe for \p Phi - VPWidenPHIRecipe(PHINode *Phi) - : VPWidenPHIRecipe(VPVWidenPHISC, VPWidenPHISC, Phi) {} - - /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start. - VPWidenPHIRecipe(PHINode *Phi, VPValue &Start) : VPWidenPHIRecipe(Phi) { - addOperand(&Start); - } - - ~VPWidenPHIRecipe() override = default; + ~VPHeaderPHIRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPRecipeBase *B) { @@ -1100,23 +1087,21 @@ class VPWidenPHIRecipe : public VPRecipeBase, public VPValue { V->getVPValueID() == VPValue::VPVReductionPHISC; } - /// Generate the phi/select nodes. - void execute(VPTransformState &State) override; + /// Generate the phi nodes. + void execute(VPTransformState &State) override = 0; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override; + VPSlotTracker &SlotTracker) const override = 0; #endif - /// Returns the start value of the phi, if it is a reduction or first-order - /// recurrence. + /// Returns the start value of the phi, if one is set. VPValue *getStartValue() { return getNumOperands() == 0 ? nullptr : getOperand(0); } - /// Returns the incoming value from the loop backedge, if it is a reduction or - /// first-order recurrence. + /// Returns the incoming value from the loop backedge. VPValue *getBackedgeValue() { return getOperand(1); } @@ -1126,6 +1111,43 @@ class VPWidenPHIRecipe : public VPRecipeBase, public VPValue { VPRecipeBase *getBackedgeRecipe() { return cast(getBackedgeValue()->getDef()); } +}; + +/// A recipe for handling header phis that are widened in the vector loop. +/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are +/// managed in the recipe directly. +class VPWidenPHIRecipe : public VPHeaderPHIRecipe { + /// List of incoming blocks. Only used in the VPlan native path. + SmallVector IncomingBlocks; + +public: + /// Create a VPWidenPHIRecipe for \p Phi + VPWidenPHIRecipe(PHINode *Phi) + : VPHeaderPHIRecipe(VPVWidenPHISC, VPWidenPHISC, Phi) {} + + /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start. + VPWidenPHIRecipe(PHINode *Phi, VPValue &Start) : VPWidenPHIRecipe(Phi) { + addOperand(&Start); + } + + ~VPWidenPHIRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPRecipeBase *B) { + return B->getVPDefID() == VPRecipeBase::VPWidenPHISC; + } + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPVWidenPHISC; + } + + /// Generate the phi/select nodes. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi. void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) { @@ -1133,20 +1155,20 @@ class VPWidenPHIRecipe : public VPRecipeBase, public VPValue { IncomingBlocks.push_back(IncomingBlock); } - /// Returns the \p I th incoming VPValue. - VPValue *getIncomingValue(unsigned I) { return getOperand(I); } - /// Returns the \p I th incoming VPBasicBlock. VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; } + + /// Returns the \p I th incoming VPValue. + VPValue *getIncomingValue(unsigned I) { return getOperand(I); } }; /// A recipe for handling first-order recurrence phis. The start value is the /// first operand of the recipe and the incoming value from the backedge is the /// second operand. -struct VPFirstOrderRecurrencePHIRecipe : public VPWidenPHIRecipe { +struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe { VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start) - : VPWidenPHIRecipe(VPVFirstOrderRecurrencePHISC, - VPFirstOrderRecurrencePHISC, Phi, &Start) {} + : VPHeaderPHIRecipe(VPVFirstOrderRecurrencePHISC, + VPFirstOrderRecurrencePHISC, Phi, &Start) {} /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPRecipeBase *R) { @@ -1171,7 +1193,7 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPWidenPHIRecipe { /// A recipe for handling reduction phis. The start value is the first operand /// of the recipe and the incoming value from the backedge is the second /// operand. -class VPReductionPHIRecipe : public VPWidenPHIRecipe { +class VPReductionPHIRecipe : public VPHeaderPHIRecipe { /// Descriptor for the reduction. const RecurrenceDescriptor &RdxDesc; @@ -1187,7 +1209,7 @@ class VPReductionPHIRecipe : public VPWidenPHIRecipe { VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc, VPValue &Start, bool IsInLoop = false, bool IsOrdered = false) - : VPWidenPHIRecipe(VPVReductionPHISC, VPReductionPHISC, Phi, &Start), + : VPHeaderPHIRecipe(VPVReductionPHISC, VPReductionPHISC, Phi, &Start), RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) { assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop"); } From aaeae842ef821063c5c6ce5fada4e62967c14b96 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 27 Dec 2021 15:44:59 -0500 Subject: [PATCH 122/992] [InstSimplify] add tests for icmp with no-wrap add operand; NFC --- .../Transforms/InstSimplify/icmp-constant.ll | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/icmp-constant.ll b/llvm/test/Transforms/InstSimplify/icmp-constant.ll index 77d4a388c055..0784510a7ae4 100644 --- a/llvm/test/Transforms/InstSimplify/icmp-constant.ll +++ b/llvm/test/Transforms/InstSimplify/icmp-constant.ll @@ -623,6 +623,72 @@ define i1 @add_nsw_neg_const1(i32 %x) { ret i1 %cmp } +define i1 @add_nsw_sgt(i8 %x) { +; CHECK-LABEL: @add_nsw_sgt( +; CHECK-NEXT: ret i1 true +; + %add = add nsw i8 %x, 5 + %cmp = icmp sgt i8 %add, -124 + ret i1 %cmp +} + +define i1 @add_nsw_nuw_sgt(i8 %x) { +; CHECK-LABEL: @add_nsw_nuw_sgt( +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i8 [[X:%.*]], 5 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[ADD]], -124 +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = add nsw nuw i8 %x, 5 + %cmp = icmp sgt i8 %add, -124 + ret i1 %cmp +} + +; minimum x is -128, so add could be -124. + +define i1 @add_nsw_sgt_limit(i8 %x) { +; CHECK-LABEL: @add_nsw_sgt_limit( +; CHECK-NEXT: [[ADD:%.*]] = add nsw i8 [[X:%.*]], 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[ADD]], -124 +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = add nsw i8 %x, 4 + %cmp = icmp sgt i8 %add, -124 + ret i1 %cmp +} + +define i1 @add_nsw_slt(i8 %x) { +; CHECK-LABEL: @add_nsw_slt( +; CHECK-NEXT: ret i1 false +; + %add = add nsw i8 %x, 5 + %cmp = icmp slt i8 %add, -123 + ret i1 %cmp +} + +define i1 @add_nsw_nuw_slt(i8 %x) { +; CHECK-LABEL: @add_nsw_nuw_slt( +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i8 [[X:%.*]], 5 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[ADD]], -123 +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = add nsw nuw i8 %x, 5 + %cmp = icmp slt i8 %add, -123 + ret i1 %cmp +} + +; minimum x is -128, so add could be -123. + +define i1 @add_nsw_slt_limit(i8 %x) { +; CHECK-LABEL: @add_nsw_slt_limit( +; CHECK-NEXT: [[ADD:%.*]] = add nsw i8 [[X:%.*]], 5 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[ADD]], -122 +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = add nsw i8 %x, 5 + %cmp = icmp slt i8 %add, -122 + ret i1 %cmp +} + ; InstCombine can fold this, but not InstSimplify. define i1 @add_nsw_neg_const2(i32 %x) { From 0edf99950e6234159c99710838f21d3629d756af Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 28 Dec 2021 09:25:16 -0500 Subject: [PATCH 123/992] [Analysis] allow caller to choose signed/unsigned when computing constant range We should not lose analysis precision if an 'add' has both no-wrap flags (nsw and nuw) compared to just one or the other. This patch is modeled on a similar construct that was added with D59386. I don't think it is possible to expose a problem with an unsigned compare because of the way this was coded (nuw is handled first). InstCombine has an assert that fires with the example from: https://github.com/llvm/llvm-project/issues/52884 ...because it was expecting InstSimplify to handle this kind of pattern with an smax. Fixes #52884 Differential Revision: https://reviews.llvm.org/D116322 --- llvm/include/llvm/Analysis/ValueTracking.h | 3 ++- llvm/lib/Analysis/BasicAliasAnalysis.cpp | 4 ++-- llvm/lib/Analysis/InstructionSimplify.cpp | 3 ++- llvm/lib/Analysis/ValueTracking.cpp | 24 +++++++++++++------ .../Transforms/Vectorize/VectorCombine.cpp | 3 ++- .../InstCombine/minmax-intrinsics.ll | 12 ++++++++++ .../Transforms/InstSimplify/icmp-constant.ll | 16 ++++++------- llvm/unittests/Analysis/ValueTrackingTest.cpp | 20 ++++++++-------- 8 files changed, 55 insertions(+), 30 deletions(-) diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index b4f38a3e976f..f0f78c0eaed4 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -555,7 +555,8 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; /// Determine the possible constant range of an integer or vector of integer /// value. This is intended as a cheap, non-recursive check. - ConstantRange computeConstantRange(const Value *V, bool UseInstrInfo = true, + ConstantRange computeConstantRange(const Value *V, bool ForSigned, + bool UseInstrInfo = true, AssumptionCache *AC = nullptr, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr, diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 5f1bf2001d47..6f0da2cf18fa 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1248,8 +1248,8 @@ AliasResult BasicAAResult::aliasGEP( else GCD = APIntOps::GreatestCommonDivisor(GCD, ScaleForGCD.abs()); - ConstantRange CR = - computeConstantRange(Index.Val.V, true, &AC, Index.CxtI); + ConstantRange CR = computeConstantRange(Index.Val.V, /* ForSigned */ false, + true, &AC, Index.CxtI); KnownBits Known = computeKnownBits(Index.Val.V, DL, 0, &AC, Index.CxtI, DT); CR = CR.intersectWith( diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 4831b22b1d46..1c26ab361908 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -2890,7 +2890,8 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, if (RHS_CR.isFullSet()) return ConstantInt::getTrue(ITy); - ConstantRange LHS_CR = computeConstantRange(LHS, IIQ.UseInstrInfo); + ConstantRange LHS_CR = + computeConstantRange(LHS, CmpInst::isSigned(Pred), IIQ.UseInstrInfo); if (!LHS_CR.isFullSet()) { if (RHS_CR.contains(LHS_CR)) return ConstantInt::getTrue(ITy); diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 7a1caed0420a..7876e209acc6 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -6756,7 +6756,8 @@ Optional llvm::isImpliedByDomCondition(CmpInst::Predicate Pred, } static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, - APInt &Upper, const InstrInfoQuery &IIQ) { + APInt &Upper, const InstrInfoQuery &IIQ, + bool PreferSignedRange) { unsigned Width = Lower.getBitWidth(); const APInt *C; switch (BO.getOpcode()) { @@ -6764,7 +6765,14 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) { bool HasNSW = IIQ.hasNoSignedWrap(&BO); bool HasNUW = IIQ.hasNoUnsignedWrap(&BO); - // FIXME: If we have both nuw and nsw, we should reduce the range further. + + // If the caller expects a signed compare, then try to use a signed range. + // Otherwise if both no-wraps are set, use the unsigned range because it + // is never larger than the signed range. Example: + // "add nuw nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125]. + if (PreferSignedRange && HasNSW && HasNUW) + HasNUW = false; + if (HasNUW) { // 'add nuw x, C' produces [C, UINT_MAX]. Lower = *C; @@ -7085,8 +7093,8 @@ static void setLimitForFPToI(const Instruction *I, APInt &Lower, APInt &Upper) { } } -ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo, - AssumptionCache *AC, +ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned, + bool UseInstrInfo, AssumptionCache *AC, const Instruction *CtxI, const DominatorTree *DT, unsigned Depth) { @@ -7104,7 +7112,7 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo, APInt Lower = APInt(BitWidth, 0); APInt Upper = APInt(BitWidth, 0); if (auto *BO = dyn_cast(V)) - setLimitsForBinOp(*BO, Lower, Upper, IIQ); + setLimitsForBinOp(*BO, Lower, Upper, IIQ, ForSigned); else if (auto *II = dyn_cast(V)) setLimitsForIntrinsic(*II, Lower, Upper); else if (auto *SI = dyn_cast(V)) @@ -7136,8 +7144,10 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo, // Currently we just use information from comparisons. if (!Cmp || Cmp->getOperand(0) != V) continue; - ConstantRange RHS = computeConstantRange(Cmp->getOperand(1), UseInstrInfo, - AC, I, DT, Depth + 1); + // TODO: Set "ForSigned" parameter via Cmp->isSigned()? + ConstantRange RHS = + computeConstantRange(Cmp->getOperand(1), UseInstrInfo, + /* ForSigned */ false, AC, I, DT, Depth + 1); CR = CR.intersectWith( ConstantRange::makeAllowedICmpRegion(Cmp->getPredicate(), RHS)); } diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index c0aedab2fed0..620d388199e0 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -881,7 +881,8 @@ static ScalarizationResult canScalarizeAccess(FixedVectorType *VecTy, ConstantRange IdxRange(IntWidth, true); if (isGuaranteedNotToBePoison(Idx, &AC)) { - if (ValidIndices.contains(computeConstantRange(Idx, true, &AC, CtxI, &DT))) + if (ValidIndices.contains(computeConstantRange(Idx, /* ForSigned */ false, + true, &AC, CtxI, &DT))) return ScalarizationResult::safe(); return ScalarizationResult::unsafe(); } diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll index 7b76c3ad7422..194a693e5580 100644 --- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -2129,3 +2129,15 @@ define <3 x i8> @umax_vector_splat_undef(<3 x i8> %x) { %r = call <3 x i8> @llvm.umax.v3i8(<3 x i8> %a, <3 x i8> ) ret <3 x i8> %r } + +; Issue #52884 - this would assert because of a failure to simplify. + +define i8 @smax_offset_simplify(i8 %x) { +; CHECK-LABEL: @smax_offset_simplify( +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i8 [[X:%.*]], 50 +; CHECK-NEXT: ret i8 [[TMP1]] +; + %1 = add nuw nsw i8 50, %x + %m = call i8 @llvm.smax.i8(i8 %1, i8 -124) + ret i8 %m +} diff --git a/llvm/test/Transforms/InstSimplify/icmp-constant.ll b/llvm/test/Transforms/InstSimplify/icmp-constant.ll index 0784510a7ae4..46ac6f6b9dac 100644 --- a/llvm/test/Transforms/InstSimplify/icmp-constant.ll +++ b/llvm/test/Transforms/InstSimplify/icmp-constant.ll @@ -632,18 +632,18 @@ define i1 @add_nsw_sgt(i8 %x) { ret i1 %cmp } +; nuw should not inhibit the fold. + define i1 @add_nsw_nuw_sgt(i8 %x) { ; CHECK-LABEL: @add_nsw_nuw_sgt( -; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i8 [[X:%.*]], 5 -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[ADD]], -124 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %add = add nsw nuw i8 %x, 5 %cmp = icmp sgt i8 %add, -124 ret i1 %cmp } -; minimum x is -128, so add could be -124. +; negative test - minimum x is -128, so add could be -124. define i1 @add_nsw_sgt_limit(i8 %x) { ; CHECK-LABEL: @add_nsw_sgt_limit( @@ -665,18 +665,18 @@ define i1 @add_nsw_slt(i8 %x) { ret i1 %cmp } +; nuw should not inhibit the fold. + define i1 @add_nsw_nuw_slt(i8 %x) { ; CHECK-LABEL: @add_nsw_nuw_slt( -; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i8 [[X:%.*]], 5 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[ADD]], -123 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %add = add nsw nuw i8 %x, 5 %cmp = icmp slt i8 %add, -123 ret i1 %cmp } -; minimum x is -128, so add could be -123. +; negative test - minimum x is -128, so add could be -123. define i1 @add_nsw_slt_limit(i8 %x) { ; CHECK-LABEL: @add_nsw_slt_limit( diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp index 0104d321f2c9..f4b4d345a054 100644 --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -2000,11 +2000,11 @@ TEST_F(ValueTrackingTest, ComputeConstantRange) { AssumptionCache AC(*F); Value *Stride = &*F->arg_begin(); - ConstantRange CR1 = computeConstantRange(Stride, true, &AC, nullptr); + ConstantRange CR1 = computeConstantRange(Stride, false, true, &AC, nullptr); EXPECT_TRUE(CR1.isFullSet()); Instruction *I = &findInstructionByName(F, "stride.plus.one"); - ConstantRange CR2 = computeConstantRange(Stride, true, &AC, I); + ConstantRange CR2 = computeConstantRange(Stride, false, true, &AC, I); EXPECT_EQ(5, CR2.getLower()); EXPECT_EQ(10, CR2.getUpper()); } @@ -2034,7 +2034,7 @@ TEST_F(ValueTrackingTest, ComputeConstantRange) { AssumptionCache AC(*F); Value *Stride = &*F->arg_begin(); Instruction *I = &findInstructionByName(F, "stride.plus.one"); - ConstantRange CR = computeConstantRange(Stride, true, &AC, I); + ConstantRange CR = computeConstantRange(Stride, false, true, &AC, I); EXPECT_EQ(99, *CR.getSingleElement()); } @@ -2072,12 +2072,12 @@ TEST_F(ValueTrackingTest, ComputeConstantRange) { AssumptionCache AC(*F); Value *Stride = &*F->arg_begin(); Instruction *GT2 = &findInstructionByName(F, "gt.2"); - ConstantRange CR = computeConstantRange(Stride, true, &AC, GT2); + ConstantRange CR = computeConstantRange(Stride, false, true, &AC, GT2); EXPECT_EQ(5, CR.getLower()); EXPECT_EQ(0, CR.getUpper()); Instruction *I = &findInstructionByName(F, "stride.plus.one"); - ConstantRange CR2 = computeConstantRange(Stride, true, &AC, I); + ConstantRange CR2 = computeConstantRange(Stride, false, true, &AC, I); EXPECT_EQ(50, CR2.getLower()); EXPECT_EQ(100, CR2.getUpper()); } @@ -2105,7 +2105,7 @@ TEST_F(ValueTrackingTest, ComputeConstantRange) { Value *Stride = &*F->arg_begin(); Instruction *I = &findInstructionByName(F, "stride.plus.one"); - ConstantRange CR = computeConstantRange(Stride, true, &AC, I); + ConstantRange CR = computeConstantRange(Stride, false, true, &AC, I); EXPECT_TRUE(CR.isEmptySet()); } @@ -2133,8 +2133,8 @@ TEST_F(ValueTrackingTest, ComputeConstantRange) { Value *X2 = &*std::next(F->arg_begin()); Instruction *I = &findInstructionByName(F, "stride.plus.one"); - ConstantRange CR1 = computeConstantRange(X1, true, &AC, I); - ConstantRange CR2 = computeConstantRange(X2, true, &AC, I); + ConstantRange CR1 = computeConstantRange(X1, false, true, &AC, I); + ConstantRange CR2 = computeConstantRange(X2, false, true, &AC, I); EXPECT_EQ(5, CR1.getLower()); EXPECT_EQ(0, CR1.getUpper()); @@ -2144,7 +2144,7 @@ TEST_F(ValueTrackingTest, ComputeConstantRange) { // Check the depth cutoff results in a conservative result (full set) by // passing Depth == MaxDepth == 6. - ConstantRange CR3 = computeConstantRange(X2, true, &AC, I, nullptr, 6); + ConstantRange CR3 = computeConstantRange(X2, false, true, &AC, I, nullptr, 6); EXPECT_TRUE(CR3.isFullSet()); } { @@ -2165,7 +2165,7 @@ TEST_F(ValueTrackingTest, ComputeConstantRange) { Value *X2 = &*std::next(F->arg_begin()); Instruction *I = &findInstructionByName(F, "stride.plus.one"); - ConstantRange CR1 = computeConstantRange(X2, true, &AC, I); + ConstantRange CR1 = computeConstantRange(X2, false, true, &AC, I); // If we don't know the value of x.2, we don't know the value of x.1. EXPECT_TRUE(CR1.isFullSet()); } From 648246cce6a36732dfcbc727d088888dffa18b9c Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 28 Dec 2021 08:37:08 -0800 Subject: [PATCH 124/992] [Hexagon] Remove isPredicateRegister in favor of isPredReg, NFC HexagonMCChecker has its own function isPredicateRegister, which does the same thing as HexagonMCInstrInfo::isPredReg. --- .../Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp | 11 ++++++----- .../Target/Hexagon/MCTargetDesc/HexagonMCChecker.h | 5 ----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index 96c2965296ca..b9233618e5fd 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -65,7 +65,8 @@ void HexagonMCChecker::init() { void HexagonMCChecker::initReg(MCInst const &MCI, unsigned R, unsigned &PredReg, bool &isTrue) { - if (HexagonMCInstrInfo::isPredicated(MCII, MCI) && isPredicateRegister(R)) { + if (HexagonMCInstrInfo::isPredicated(MCII, MCI) && + HexagonMCInstrInfo::isPredReg(RI, R)) { // Note an used predicate register. PredReg = R; isTrue = HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI); @@ -123,7 +124,7 @@ void HexagonMCChecker::init(MCInst const &MCI) { // same packet with an instruction that modifies is explicitly. Deal // with such situations individually. SoftDefs.insert(R); - else if (isPredicateRegister(R) && + else if (HexagonMCInstrInfo::isPredReg(RI, R) && HexagonMCInstrInfo::isPredicateLate(MCII, MCI)) // Include implicit late predicates. LatePreds.insert(R); @@ -167,7 +168,7 @@ void HexagonMCChecker::init(MCInst const &MCI) { // side-effect, then note as a soft definition. SoftDefs.insert(*SRI); else if (HexagonMCInstrInfo::isPredicateLate(MCII, MCI) && - isPredicateRegister(*SRI)) + HexagonMCInstrInfo::isPredReg(RI, *SRI)) // Some insns produce predicates too late to be used in the same packet. LatePreds.insert(*SRI); else if (i == 0 && HexagonMCInstrInfo::getType(MCII, MCI) == @@ -193,7 +194,7 @@ void HexagonMCChecker::init(MCInst const &MCI) { if (MCI.getOperand(i).isReg()) { unsigned P = MCI.getOperand(i).getReg(); - if (isPredicateRegister(P)) + if (HexagonMCInstrInfo::isPredReg(RI, P)) NewPreds.insert(P); } } @@ -599,7 +600,7 @@ bool HexagonMCChecker::checkRegisters() { reportErrorRegisters(BadR); return false; } - if (!isPredicateRegister(R) && Defs[R].size() > 1) { + if (!HexagonMCInstrInfo::isPredReg(RI, R) && Defs[R].size() > 1) { // Check for multiple register definitions. PredSet &PM = Defs[R]; diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h index dbd3d8ae45e6..160d452ab917 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h @@ -103,11 +103,6 @@ class HexagonMCChecker { static void compoundRegisterMap(unsigned &); - bool isPredicateRegister(unsigned R) const { - return (Hexagon::P0 == R || Hexagon::P1 == R || Hexagon::P2 == R || - Hexagon::P3 == R); - } - bool isLoopRegister(unsigned R) const { return (Hexagon::SA0 == R || Hexagon::LC0 == R || Hexagon::SA1 == R || Hexagon::LC1 == R); From 9d297c7894ecae3d0ab18b6dac4c2df0bc489951 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 28 Dec 2021 17:49:46 +0100 Subject: [PATCH 125/992] [VPlan] Add prepareToExecute to set up live-ins (NFC). This patch adds a new prepareToExecute helper to set up live-ins, so VPTransformState doesn't need to hold values like TripCount. This also requires making the trip count operand for ActiveLaneMask explicit in VPlan. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D116320 --- .../Transforms/Vectorize/LoopVectorize.cpp | 9 ++---- llvm/lib/Transforms/Vectorize/VPlan.cpp | 32 ++++++++++++------- llvm/lib/Transforms/Vectorize/VPlan.h | 21 +++++++++--- 3 files changed, 40 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index ef49ae529cec..112e697c7f5e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7962,7 +7962,6 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF, // 1. Create a new empty loop. Unlink the old loop and connect the new one. VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan}; State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton(); - State.TripCount = ILV.getOrCreateTripCount(nullptr); State.CanonicalIV = ILV.Induction; ILV.collectPoisonGeneratingRecipes(State); @@ -7977,6 +7976,7 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF, //===------------------------------------------------===// // 2. Copy and widen instructions from the old loop into the new loop. + BestVPlan.prepareToExecute(ILV.getOrCreateTripCount(nullptr), State); BestVPlan.execute(&State); // 3. Fix the vectorized code: take care of header phi's, live-outs, @@ -8461,11 +8461,8 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { bool TailFolded = !CM.isScalarEpilogueAllowed(); if (TailFolded && CM.TTI.emitGetActiveLaneMask()) { - // While ActiveLaneMask is a binary op that consumes the loop tripcount - // as a second argument, we only pass the IV here and extract the - // tripcount from the transform state where codegen of the VP instructions - // happen. - BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV}); + VPValue *TC = Plan->getOrCreateTripCount(); + BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV, TC}); } else { VPValue *BTC = Plan->getOrCreateBackedgeTakenCount(); BlockMask = Builder.createNaryOp(VPInstruction::ICmpULE, {IV, BTC}); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index bebc6bbdd4a7..e4517a47e7c5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -677,7 +677,7 @@ void VPInstruction::generateInstruction(VPTransformState &State, // Get first lane of vector induction variable. Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); // Get the original loop tripcount. - Value *ScalarTC = State.TripCount; + Value *ScalarTC = State.get(getOperand(1), Part); auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); auto *PredTy = FixedVectorType::get(Int1Ty, State.VF.getKnownMinValue()); @@ -786,23 +786,31 @@ void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) { FMF = FMFNew; } -/// Generate the code inside the body of the vectorized loop. Assumes a single -/// LoopVectorBody basic-block was created for this. Introduce additional -/// basic-blocks as needed, and fill them all. -void VPlan::execute(VPTransformState *State) { - // -1. Check if the backedge taken count is needed, and if so build it. +void VPlan::prepareToExecute(Value *TripCountV, VPTransformState &State) { + // Check if the trip count is needed, and if so build it. + if (TripCount && TripCount->getNumUsers()) { + for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) + State.set(TripCount, TripCountV, Part); + } + + // Check if the backedge taken count is needed, and if so build it. if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { - Value *TC = State->TripCount; - IRBuilder<> Builder(State->CFG.PrevBB->getTerminator()); - auto *TCMO = Builder.CreateSub(TC, ConstantInt::get(TC->getType(), 1), + IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); + auto *TCMO = Builder.CreateSub(TripCountV, + ConstantInt::get(TripCountV->getType(), 1), "trip.count.minus.1"); - auto VF = State->VF; + auto VF = State.VF; Value *VTCMO = VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast"); - for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) - State->set(BackedgeTakenCount, VTCMO, Part); + for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) + State.set(BackedgeTakenCount, VTCMO, Part); } +} +/// Generate the code inside the body of the vectorized loop. Assumes a single +/// LoopVectorBody basic-block was created for this. Introduce additional +/// basic-blocks as needed, and fill them all. +void VPlan::execute(VPTransformState *State) { // 0. Set the reverse mapping from VPValues to Values for code generation. for (auto &Entry : Value2VPValue) State->VPValue2Value[Entry.second] = Entry.first; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 7fa3c1defaca..96de9114b618 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -341,9 +341,6 @@ struct VPTransformState { /// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF). Value *CanonicalIV = nullptr; - /// Hold the trip count of the scalar loop. - Value *TripCount = nullptr; - /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods. InnerLoopVectorizer *ILV; @@ -2134,8 +2131,12 @@ class VPlan { // (operators '==' and '<'). SetVector VPExternalDefs; - /// Represents the backedge taken count of the original loop, for folding + /// Represents the trip count of the original loop, for folding /// the tail. + VPValue *TripCount = nullptr; + + /// Represents the backedge taken count of the original loop, for folding + /// the tail. It equals TripCount - 1. VPValue *BackedgeTakenCount = nullptr; /// Holds a mapping between Values and their corresponding VPValue inside @@ -2169,12 +2170,17 @@ class VPlan { } for (VPValue *VPV : VPValuesToFree) delete VPV; + if (TripCount) + delete TripCount; if (BackedgeTakenCount) delete BackedgeTakenCount; for (VPValue *Def : VPExternalDefs) delete Def; } + /// Prepare the plan for execution, setting up the required live-in values. + void prepareToExecute(Value *TripCount, VPTransformState &State); + /// Generate the IR code for this VPlan. void execute(struct VPTransformState *State); @@ -2187,6 +2193,13 @@ class VPlan { return Entry; } + /// The trip count of the original loop. + VPValue *getOrCreateTripCount() { + if (!TripCount) + TripCount = new VPValue(); + return TripCount; + } + /// The backedge taken count of the original loop. VPValue *getOrCreateBackedgeTakenCount() { if (!BackedgeTakenCount) From 5a667c0e741e5a895161b7a14376c59632fc5aa1 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 28 Dec 2021 08:52:25 -0800 Subject: [PATCH 126/992] [llvm] Use nullptr instead of 0 (NFC) Identified with modernize-use-nullptr. --- llvm/include/llvm/ADT/ArrayRef.h | 2 +- llvm/include/llvm/ADT/ImmutableMap.h | 4 ++-- llvm/include/llvm/ADT/ilist.h | 4 ++-- .../llvm/ExecutionEngine/RuntimeDyldChecker.h | 2 +- llvm/include/llvm/LTO/legacy/LTOModule.h | 2 +- llvm/include/llvm/Support/CommandLine.h | 10 ++++++---- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 2 +- .../CodeGen/InterleavedLoadCombinePass.cpp | 6 +++--- llvm/lib/Debuginfod/Debuginfod.cpp | 2 +- .../ExecutionEngine/Orc/OrcV2CBindings.cpp | 12 ++++++------ .../SimpleExecutorMemoryManager.cpp | 2 +- .../ExecutionEngine/SectionMemoryManager.cpp | 6 +++--- llvm/lib/IR/Core.cpp | 4 ++-- llvm/lib/InterfaceStub/IFSHandler.cpp | 2 +- llvm/lib/ObjectYAML/MachOEmitter.cpp | 6 +++--- llvm/lib/Remarks/Remark.cpp | 8 ++++---- llvm/lib/Support/Host.cpp | 8 ++++---- llvm/lib/Support/Unix/Path.inc | 2 +- .../Target/AArch64/AArch64SIMDInstrOpt.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp | 19 +++++++++++-------- .../AMDGPU/AMDGPUPrintfRuntimeBinding.cpp | 2 +- .../Target/AMDGPU/SILoadStoreOptimizer.cpp | 3 ++- llvm/lib/Target/AMDGPU/SIModeRegister.cpp | 2 +- .../lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 2 +- llvm/lib/Target/AVR/AVRISelLowering.cpp | 5 +++-- llvm/lib/Target/AVR/AVRInstrInfo.cpp | 4 ++-- llvm/lib/Target/AVR/AVRRegisterInfo.h | 4 ++-- .../lib/Target/AVR/AsmParser/AVRAsmParser.cpp | 2 +- .../Target/BPF/BPFAbstractMemberAccess.cpp | 2 +- llvm/lib/Target/BPF/BPFISelLowering.cpp | 2 +- llvm/lib/Target/BPF/BPFPreserveDIType.cpp | 6 +++--- .../BPF/MCTargetDesc/BPFInstPrinter.cpp | 2 +- .../Lanai/MCTargetDesc/LanaiInstPrinter.cpp | 2 +- .../PowerPC/MCTargetDesc/PPCELFStreamer.cpp | 5 ++--- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 7 ++++--- .../RISCV/MCTargetDesc/RISCVInstPrinter.cpp | 2 +- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 2 +- .../WindowsManifest/WindowsManifestMerger.cpp | 2 +- llvm/tools/llvm-jitlink/llvm-jitlink.cpp | 4 ++-- llvm/tools/llvm-objdump/MachODump.cpp | 4 ++-- llvm/tools/llvm-readobj/COFFDumper.cpp | 2 +- .../llvm-reduce/deltas/ReduceMetadata.cpp | 8 ++++---- .../TableGen/GlobalISel/GIMatchDagInstr.h | 2 +- 43 files changed, 94 insertions(+), 87 deletions(-) diff --git a/llvm/include/llvm/ADT/ArrayRef.h b/llvm/include/llvm/ADT/ArrayRef.h index 61f85cfc812b..b6896395dae8 100644 --- a/llvm/include/llvm/ADT/ArrayRef.h +++ b/llvm/include/llvm/ADT/ArrayRef.h @@ -141,7 +141,7 @@ namespace llvm { template ArrayRef(const std::vector &Vec, std::enable_if_t::value> - * = 0) + * = nullptr) : Data(Vec.data()), Length(Vec.size()) {} /// @} diff --git a/llvm/include/llvm/ADT/ImmutableMap.h b/llvm/include/llvm/ADT/ImmutableMap.h index 81b21a7319a7..cf6fb870897a 100644 --- a/llvm/include/llvm/ADT/ImmutableMap.h +++ b/llvm/include/llvm/ADT/ImmutableMap.h @@ -264,7 +264,7 @@ class ImmutableMapRef { : Root(X.getRootWithoutRetain()), Factory(F.getTreeFactory()) {} static inline ImmutableMapRef getEmptyMap(FactoryTy *F) { - return ImmutableMapRef(0, F); + return ImmutableMapRef(nullptr, F); } void manualRetain() { @@ -345,7 +345,7 @@ class ImmutableMapRef { /// which key is the highest in the ordering of keys in the map. This /// method returns NULL if the map is empty. value_type* getMaxElement() const { - return Root ? &(Root->getMaxElement()->getValue()) : 0; + return Root ? &(Root->getMaxElement()->getValue()) : nullptr; } //===--------------------------------------------------===// diff --git a/llvm/include/llvm/ADT/ilist.h b/llvm/include/llvm/ADT/ilist.h index d5a1f286b177..b3aa26f2454d 100644 --- a/llvm/include/llvm/ADT/ilist.h +++ b/llvm/include/llvm/ADT/ilist.h @@ -103,7 +103,7 @@ template struct HasGetNext { template struct SFINAE {}; template - static Yes &test(U *I, decltype(I->getNext(&make())) * = 0); + static Yes &test(U *I, decltype(I->getNext(&make())) * = nullptr); template static No &test(...); public: @@ -117,7 +117,7 @@ template struct HasCreateSentinel { typedef char No[2]; template - static Yes &test(U *I, decltype(I->createSentinel()) * = 0); + static Yes &test(U *I, decltype(I->createSentinel()) * = nullptr); template static No &test(...); public: diff --git a/llvm/include/llvm/ExecutionEngine/RuntimeDyldChecker.h b/llvm/include/llvm/ExecutionEngine/RuntimeDyldChecker.h index 153ffef14e6f..37fe44d5fa69 100644 --- a/llvm/include/llvm/ExecutionEngine/RuntimeDyldChecker.h +++ b/llvm/include/llvm/ExecutionEngine/RuntimeDyldChecker.h @@ -127,7 +127,7 @@ class RuntimeDyldChecker { JITTargetAddress getTargetAddress() const { return TargetAddress; } private: - const char *ContentPtr = 0; + const char *ContentPtr = nullptr; uint64_t Size = 0; JITTargetAddress TargetAddress = 0; }; diff --git a/llvm/include/llvm/LTO/legacy/LTOModule.h b/llvm/include/llvm/LTO/legacy/LTOModule.h index 01e63db4bab3..e4ec18067327 100644 --- a/llvm/include/llvm/LTO/legacy/LTOModule.h +++ b/llvm/include/llvm/LTO/legacy/LTOModule.h @@ -41,7 +41,7 @@ struct LTOModule { StringRef name; uint32_t attributes = 0; bool isFunction = 0; - const GlobalValue *symbol = 0; + const GlobalValue *symbol = nullptr; }; std::unique_ptr OwnedContext; diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index 2ee02010ff1d..2c3edd858a3f 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -1550,8 +1550,9 @@ template class list_storage { } template void addValue(const T &V) { - assert(Location != 0 && "cl::location(...) not specified for a command " - "line option with external storage!"); + assert(Location != nullptr && + "cl::location(...) not specified for a command " + "line option with external storage!"); Location->push_back(V); } }; @@ -1754,8 +1755,9 @@ template class bits_storage { } template void addValue(const T &V) { - assert(Location != 0 && "cl::location(...) not specified for a command " - "line option with external storage!"); + assert(Location != nullptr && + "cl::location(...) not specified for a command " + "line option with external storage!"); *Location |= Bit(V); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 6b6d63f14f87..0d656707615c 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1189,7 +1189,7 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP, DefinitionArgs = SP->getType()->getTypeArray(); if (DeclArgs.size() && DefinitionArgs.size()) - if (DefinitionArgs[0] != NULL && DeclArgs[0] != DefinitionArgs[0]) + if (DefinitionArgs[0] != nullptr && DeclArgs[0] != DefinitionArgs[0]) addType(SPDie, DefinitionArgs[0]); DeclDie = getDIE(SPDecl); diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index 9fabcfb1f326..7e43a0cbbe73 100644 --- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -195,12 +195,12 @@ class Polynomial { } Polynomial(const APInt &A, unsigned ErrorMSBs = 0) - : ErrorMSBs(ErrorMSBs), V(NULL), B(), A(A) {} + : ErrorMSBs(ErrorMSBs), V(nullptr), B(), A(A) {} Polynomial(unsigned BitWidth, uint64_t A, unsigned ErrorMSBs = 0) - : ErrorMSBs(ErrorMSBs), V(NULL), B(), A(BitWidth, A) {} + : ErrorMSBs(ErrorMSBs), V(nullptr), B(), A(BitWidth, A) {} - Polynomial() : ErrorMSBs((unsigned)-1), V(NULL), B(), A() {} + Polynomial() : ErrorMSBs((unsigned)-1), V(nullptr), B(), A() {} /// Increment and clamp the number of undefined bits. void incErrorMSBs(unsigned amt) { diff --git a/llvm/lib/Debuginfod/Debuginfod.cpp b/llvm/lib/Debuginfod/Debuginfod.cpp index 389b18fd62ac..48d0e4d1b763 100644 --- a/llvm/lib/Debuginfod/Debuginfod.cpp +++ b/llvm/lib/Debuginfod/Debuginfod.cpp @@ -36,7 +36,7 @@ static std::string buildIDToString(BuildIDRef ID) { Expected> getDefaultDebuginfodUrls() { const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS"); - if (DebuginfodUrlsEnv == NULL) + if (DebuginfodUrlsEnv == nullptr) return SmallVector(); SmallVector DebuginfodUrls; diff --git a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp index 77a8f5af8ba0..71be8dfdc004 100644 --- a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp +++ b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp @@ -611,7 +611,7 @@ LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForProcess( DynamicLibrarySearchGenerator::GetForCurrentProcess(GlobalPrefix, Pred); if (!ProcessSymsGenerator) { - *Result = 0; + *Result = nullptr; return wrap(ProcessSymsGenerator.takeError()); } @@ -637,7 +637,7 @@ LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForPath( DynamicLibrarySearchGenerator::Load(FileName, GlobalPrefix, Pred); if (!LibrarySymsGenerator) { - *Result = 0; + *Result = nullptr; return wrap(LibrarySymsGenerator.takeError()); } @@ -657,7 +657,7 @@ LLVMErrorRef LLVMOrcCreateStaticLibrarySearchGeneratorForPath( auto LibrarySymsGenerator = StaticLibraryDefinitionGenerator::Load(*unwrap(ObjLayer), FileName, TT); if (!LibrarySymsGenerator) { - *Result = 0; + *Result = nullptr; return wrap(LibrarySymsGenerator.takeError()); } *Result = wrap(LibrarySymsGenerator->release()); @@ -666,7 +666,7 @@ LLVMErrorRef LLVMOrcCreateStaticLibrarySearchGeneratorForPath( auto LibrarySymsGenerator = StaticLibraryDefinitionGenerator::Load(*unwrap(ObjLayer), FileName); if (!LibrarySymsGenerator) { - *Result = 0; + *Result = nullptr; return wrap(LibrarySymsGenerator.takeError()); } *Result = wrap(LibrarySymsGenerator->release()); @@ -712,7 +712,7 @@ LLVMErrorRef LLVMOrcJITTargetMachineBuilderDetectHost( auto JTMB = JITTargetMachineBuilder::detectHost(); if (!JTMB) { - Result = 0; + Result = nullptr; return wrap(JTMB.takeError()); } @@ -876,7 +876,7 @@ LLVMErrorRef LLVMOrcCreateLLJIT(LLVMOrcLLJITRef *Result, LLVMOrcDisposeLLJITBuilder(Builder); if (!J) { - Result = 0; + Result = nullptr; return wrap(J.takeError()); } diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp index 232340c22a32..1198b81d80fc 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp @@ -24,7 +24,7 @@ SimpleExecutorMemoryManager::~SimpleExecutorMemoryManager() { Expected SimpleExecutorMemoryManager::allocate(uint64_t Size) { std::error_code EC; auto MB = sys::Memory::allocateMappedMemory( - Size, 0, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC); + Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC); if (EC) return errorCodeToError(EC); std::lock_guard Lock(M); diff --git a/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp b/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp index 6690dd07d99b..56b232b9dbcd 100644 --- a/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp @@ -114,11 +114,11 @@ uint8_t *SectionMemoryManager::allocateSection( // Copy the address to all the other groups, if they have not // been initialized. - if (CodeMem.Near.base() == 0) + if (CodeMem.Near.base() == nullptr) CodeMem.Near = MB; - if (RODataMem.Near.base() == 0) + if (RODataMem.Near.base() == nullptr) RODataMem.Near = MB; - if (RWDataMem.Near.base() == 0) + if (RWDataMem.Near.base() == nullptr) RWDataMem.Near = MB; // Remember that we allocated this memory diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index a263d2536541..85271ddd57f2 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -142,12 +142,12 @@ LLVMAttributeRef LLVMCreateEnumAttribute(LLVMContextRef C, unsigned KindID, if (AttrKind == Attribute::AttrKind::ByVal) { // After r362128, byval attributes need to have a type attribute. Provide a // NULL one until a proper API is added for this. - return wrap(Attribute::getWithByValType(Ctx, NULL)); + return wrap(Attribute::getWithByValType(Ctx, nullptr)); } if (AttrKind == Attribute::AttrKind::StructRet) { // Same as byval. - return wrap(Attribute::getWithStructRetType(Ctx, NULL)); + return wrap(Attribute::getWithStructRetType(Ctx, nullptr)); } return wrap(Attribute::get(Ctx, AttrKind, Val)); diff --git a/llvm/lib/InterfaceStub/IFSHandler.cpp b/llvm/lib/InterfaceStub/IFSHandler.cpp index e6bf09232ce2..4ccbb18ca04a 100644 --- a/llvm/lib/InterfaceStub/IFSHandler.cpp +++ b/llvm/lib/InterfaceStub/IFSHandler.cpp @@ -195,7 +195,7 @@ Expected> ifs::readIFSFromBuffer(StringRef Buf) { } Error ifs::writeIFSToOutputStream(raw_ostream &OS, const IFSStub &Stub) { - yaml::Output YamlOut(OS, NULL, /*WrapColumn =*/0); + yaml::Output YamlOut(OS, nullptr, /*WrapColumn =*/0); std::unique_ptr CopyStub(new IFSStubTriple(Stub)); if (Stub.Target.Arch) { CopyStub->Target.ArchString = std::string( diff --git a/llvm/lib/ObjectYAML/MachOEmitter.cpp b/llvm/lib/ObjectYAML/MachOEmitter.cpp index e5ffb12df434..b9fad2982828 100644 --- a/llvm/lib/ObjectYAML/MachOEmitter.cpp +++ b/llvm/lib/ObjectYAML/MachOEmitter.cpp @@ -481,9 +481,9 @@ void MachOWriter::writeLinkEditData(raw_ostream &OS) { typedef std::pair writeOperation; std::vector WriteQueue; - MachO::dyld_info_command *DyldInfoOnlyCmd = 0; - MachO::symtab_command *SymtabCmd = 0; - MachO::dysymtab_command *DSymtabCmd = 0; + MachO::dyld_info_command *DyldInfoOnlyCmd = nullptr; + MachO::symtab_command *SymtabCmd = nullptr; + MachO::dysymtab_command *DSymtabCmd = nullptr; for (auto &LC : Obj.LoadCommands) { switch (LC.Data.load_command_data.cmd) { case MachO::LC_SYMTAB: diff --git a/llvm/lib/Remarks/Remark.cpp b/llvm/lib/Remarks/Remark.cpp index 057d1a378599..e6b7de1a2cf5 100644 --- a/llvm/lib/Remarks/Remark.cpp +++ b/llvm/lib/Remarks/Remark.cpp @@ -111,7 +111,7 @@ LLVMRemarkEntryGetFirstArg(LLVMRemarkEntryRef Remark) { ArrayRef Args = unwrap(Remark)->Args; // No arguments to iterate on. if (Args.empty()) - return NULL; + return nullptr; return reinterpret_cast( const_cast(Args.begin())); } @@ -119,13 +119,13 @@ LLVMRemarkEntryGetFirstArg(LLVMRemarkEntryRef Remark) { extern "C" LLVMRemarkArgRef LLVMRemarkEntryGetNextArg(LLVMRemarkArgRef ArgIt, LLVMRemarkEntryRef Remark) { // No more arguments to iterate on. - if (ArgIt == NULL) - return NULL; + if (ArgIt == nullptr) + return nullptr; auto It = (ArrayRef::const_iterator)ArgIt; auto Next = std::next(It); if (Next == unwrap(Remark)->Args.end()) - return NULL; + return nullptr; return reinterpret_cast(const_cast(Next)); } diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index 7b14616f6fea..66d21a6ec859 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -83,12 +83,12 @@ StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { StringRef::const_iterator CIP = CPUInfoStart; - StringRef::const_iterator CPUStart = 0; + StringRef::const_iterator CPUStart = nullptr; size_t CPULen = 0; // We need to find the first line which starts with cpu, spaces, and a colon. // After the colon, there may be some additional spaces and then the cpu type. - while (CIP < CPUInfoEnd && CPUStart == 0) { + while (CIP < CPUInfoEnd && CPUStart == nullptr) { if (CIP < CPUInfoEnd && *CIP == '\n') ++CIP; @@ -118,12 +118,12 @@ StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { } } - if (CPUStart == 0) + if (CPUStart == nullptr) while (CIP < CPUInfoEnd && *CIP != '\n') ++CIP; } - if (CPUStart == 0) + if (CPUStart == nullptr) return generic; return StringSwitch(StringRef(CPUStart, CPULen)) diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc index f5cb5895d95d..c0712e0a0681 100644 --- a/llvm/lib/Support/Unix/Path.inc +++ b/llvm/lib/Support/Unix/Path.inc @@ -273,7 +273,7 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) { // the program, and not the eventual binary file. Therefore, call realpath // so this behaves the same on all platforms. #if _POSIX_VERSION >= 200112 || defined(__GLIBC__) - if (char *real_path = realpath(exe_path, NULL)) { + if (char *real_path = realpath(exe_path, nullptr)) { std::string ret = std::string(real_path); free(real_path); return ret; diff --git a/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp b/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp index 80d98d17e1d6..2ef7bc83003a 100644 --- a/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp @@ -633,7 +633,7 @@ bool AArch64SIMDInstrOpt::optimizeLdStInterleave(MachineInstr &MI) { /// Return true when the instruction is processed successfully. bool AArch64SIMDInstrOpt::processSeqRegInst(MachineInstr *DefiningMI, unsigned* StReg, unsigned* StRegKill, unsigned NumArg) const { - assert (DefiningMI != NULL); + assert(DefiningMI != nullptr); if (DefiningMI->getOpcode() != AArch64::REG_SEQUENCE) return false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index 49cf6db5197f..c221b55d9c70 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -623,7 +623,8 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) { Function *Callee = CI->getCalledFunction(); // Ignore indirect calls. - if (Callee == 0) return false; + if (Callee == nullptr) + return false; BasicBlock *BB = CI->getParent(); LLVMContext &Context = CI->getParent()->getContext(); @@ -1402,8 +1403,8 @@ AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B, Function *UCallee = UI->getCalledFunction(); Type *RetType = UCallee->getReturnType(); B.SetInsertPoint(&*ItNew); - AllocaInst *Alloc = B.CreateAlloca(RetType, 0, - std::string(prefix) + UI->getName()); + AllocaInst *Alloc = + B.CreateAlloca(RetType, nullptr, std::string(prefix) + UI->getName()); Alloc->setAlignment( Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType))); return Alloc; @@ -1724,7 +1725,8 @@ bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) { // Ignore indirect calls. Function *Callee = CI->getCalledFunction(); - if (Callee == 0) continue; + if (Callee == nullptr) + continue; LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n"; dbgs().flush()); @@ -1757,7 +1759,7 @@ PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F, // Ignore indirect calls. Function *Callee = CI->getCalledFunction(); - if (Callee == 0) + if (Callee == nullptr) continue; LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n"; @@ -1783,9 +1785,10 @@ bool AMDGPUUseNativeCalls::runOnFunction(Function &F) { // Ignore indirect calls. Function *Callee = CI->getCalledFunction(); - if (Callee == 0) continue; + if (Callee == nullptr) + continue; - if(Simplifier.useNative(CI)) + if (Simplifier.useNative(CI)) Changed = true; } } @@ -1811,7 +1814,7 @@ PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F, // Ignore indirect calls. Function *Callee = CI->getCalledFunction(); - if (Callee == 0) + if (Callee == nullptr) continue; if (Simplifier.useNative(CI)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp index 7c4eb71882c7..f91f31508ad2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp @@ -463,7 +463,7 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) { WhatToStore.push_back(Arg); } } else if (isa(ArgType)) { - Type *IType = NULL; + Type *IType = nullptr; uint32_t EleCount = cast(ArgType)->getNumElements(); uint32_t EleSize = ArgType->getScalarSizeInBits(); uint32_t TotalSize = EleCount * EleSize; diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index f4d9002e930e..4f807cca00eb 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1847,7 +1847,8 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm( if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0) return false; - if (MI.mayLoad() && TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != NULL) + if (MI.mayLoad() && + TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != nullptr) return false; if (AnchorList.count(&MI)) diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp index 69eab762f05c..24a8879b5684 100644 --- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp +++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp @@ -188,7 +188,7 @@ void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI, unsigned Offset = countTrailingZeros(InstrMode.Mask); unsigned Width = countTrailingOnes(InstrMode.Mask >> Offset); unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1); - BuildMI(MBB, MI, 0, TII->get(AMDGPU::S_SETREG_IMM32_B32)) + BuildMI(MBB, MI, nullptr, TII->get(AMDGPU::S_SETREG_IMM32_B32)) .addImm(Value) .addImm(((Width - 1) << AMDGPU::Hwreg::WIDTH_M1_SHIFT_) | (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) | diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index bfe078b06861..c8cec88d6e11 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1870,7 +1870,7 @@ class ARMOperand : public MCParsedAsmOperand { } template bool isMemRegRQOffset() const { - if (!isMVEMem() || Memory.OffsetImm != 0 || Memory.Alignment != 0) + if (!isMVEMem() || Memory.OffsetImm != nullptr || Memory.Alignment != 0) return false; if (!ARMMCRegisterClasses[ARM::GPRnopcRegClassID].contains( diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index a6f2afb87102..1fb71ab205e7 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -874,7 +874,8 @@ bool AVRTargetLowering::isLegalAddressingMode(const DataLayout &DL, // Allow reg+<6bit> offset. if (Offs < 0) Offs = -Offs; - if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 0 && isUInt<6>(Offs)) { + if (AM.BaseGV == nullptr && AM.HasBaseReg && AM.Scale == 0 && + isUInt<6>(Offs)) { return true; } @@ -2012,7 +2013,7 @@ void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector &Ops, SelectionDAG &DAG) const { - SDValue Result(0, 0); + SDValue Result(nullptr, 0); SDLoc DL(Op); EVT Ty = Op.getValueType(); diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp index 51060018a5ca..ac52c47f93d5 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -304,11 +304,11 @@ bool AVRInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } Cond.clear(); - FBB = 0; + FBB = nullptr; // Delete the JMP if it's equivalent to a fall-through. if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { - TBB = 0; + TBB = nullptr; I->eraseFromParent(); I = MBB.end(); UnCondBrIter = MBB.end(); diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.h b/llvm/lib/Target/AVR/AVRRegisterInfo.h index fa27d9283209..2c5647b52c1c 100644 --- a/llvm/lib/Target/AVR/AVRRegisterInfo.h +++ b/llvm/lib/Target/AVR/AVRRegisterInfo.h @@ -27,7 +27,7 @@ class AVRRegisterInfo : public AVRGenRegisterInfo { public: const uint16_t * - getCalleeSavedRegs(const MachineFunction *MF = 0) const override; + getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; @@ -39,7 +39,7 @@ class AVRRegisterInfo : public AVRGenRegisterInfo { /// Stack Frame Processing Methods void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS = NULL) const override; + RegScavenger *RS = nullptr) const override; Register getFrameRegister(const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index 95ecd28200ba..95c737aa272e 100644 --- a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -281,7 +281,7 @@ bool AVRAsmParser::invalidOperand(SMLoc const &Loc, OperandVector const &Operands, uint64_t const &ErrorInfo) { SMLoc ErrorLoc = Loc; - char const *Diag = 0; + char const *Diag = nullptr; if (ErrorInfo != ~0U) { if (ErrorInfo >= Operands.size()) { diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp index ab7e848409d9..46141e69d9d4 100644 --- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp +++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp @@ -1002,7 +1002,7 @@ bool BPFAbstractMemberAccess::transformGEPChain(CallInst *Call, VarType = Type::getInt64Ty(BB->getContext()); // 64bit ptr or enum value GV = new GlobalVariable(*M, VarType, false, GlobalVariable::ExternalLinkage, - NULL, AccessKey); + nullptr, AccessKey); GV->addAttribute(BPFCoreSharedInfo::AmaAttr); GV->setMetadata(LLVMContext::MD_preserve_access_index, TypeMeta); GEPGlobals[AccessKey] = GV; diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index 90723ac04f64..0587cb0e16e3 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -325,7 +325,7 @@ SDValue BPFTargetLowering::LowerFormalArguments( default: { errs() << "LowerFormalArguments Unhandled argument type: " << RegVT.getEVTString() << '\n'; - llvm_unreachable(0); + llvm_unreachable(nullptr); } case MVT::i32: case MVT::i64: diff --git a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp index 36237b2fc4fd..6dfb7dc39922 100644 --- a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp +++ b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp @@ -105,10 +105,10 @@ static bool BPFPreserveDITypeImpl(Function &F) { BasicBlock *BB = Call->getParent(); IntegerType *VarType = Type::getInt64Ty(BB->getContext()); - std::string GVName = BaseName + std::to_string(Count) + "$" + - std::to_string(Reloc); + std::string GVName = + BaseName + std::to_string(Count) + "$" + std::to_string(Reloc); GlobalVariable *GV = new GlobalVariable( - *M, VarType, false, GlobalVariable::ExternalLinkage, NULL, GVName); + *M, VarType, false, GlobalVariable::ExternalLinkage, nullptr, GVName); GV->addAttribute(BPFCoreSharedInfo::TypeIdAttr); GV->setMetadata(LLVMContext::MD_preserve_access_index, MD); diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp index e0aeec989879..200c72a07ed6 100644 --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp @@ -50,7 +50,7 @@ static void printExpr(const MCExpr *Expr, raw_ostream &O) { void BPFInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, const char *Modifier) { - assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + assert((Modifier == nullptr || Modifier[0] == 0) && "No modifiers supported"); const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { O << getRegisterName(Op.getReg()); diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp index 7027d18126bb..d8a66bc8a0da 100644 --- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp +++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp @@ -148,7 +148,7 @@ void LanaiInstPrinter::printInst(const MCInst *MI, uint64_t Address, void LanaiInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS, const char *Modifier) { - assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + assert((Modifier == nullptr || Modifier[0] == 0) && "No modifiers supported"); const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) OS << "%" << getRegisterName(Op.getReg()); diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp index 0ca8587ba483..b92b0fc342ec 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp @@ -40,9 +40,8 @@ PPCELFStreamer::PPCELFStreamer(MCContext &Context, std::unique_ptr MAB, std::unique_ptr OW, std::unique_ptr Emitter) - : MCELFStreamer(Context, std::move(MAB), std::move(OW), - std::move(Emitter)), LastLabel(NULL) { -} + : MCELFStreamer(Context, std::move(MAB), std::move(OW), std::move(Emitter)), + LastLabel(nullptr) {} void PPCELFStreamer::emitPrefixedInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) { diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index a0fd2111de11..61a6ed9e3438 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -5266,7 +5266,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, if (!Register::isVirtualRegister(SrcReg)) return false; const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); - if (SrcMI != NULL) + if (SrcMI != nullptr) return isSignOrZeroExtended(*SrcMI, SignExt, Depth); return false; @@ -5290,7 +5290,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, if (!Register::isVirtualRegister(SrcReg)) return false; const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); - if (SrcMI != NULL) + if (SrcMI != nullptr) return isSignOrZeroExtended(*SrcMI, SignExt, Depth); return false; @@ -5319,7 +5319,8 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, if (!Register::isVirtualRegister(SrcReg)) return false; const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); - if (SrcMI == NULL || !isSignOrZeroExtended(*SrcMI, SignExt, Depth+1)) + if (SrcMI == nullptr || + !isSignOrZeroExtended(*SrcMI, SignExt, Depth + 1)) return false; } else diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index 89a7d54f60f8..3268740849f0 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -85,7 +85,7 @@ void RISCVInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const { void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O, const char *Modifier) { - assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + assert((Modifier == nullptr || Modifier[0] == 0) && "No modifiers supported"); const MCOperand &MO = MI->getOperand(OpNo); if (MO.isReg()) { diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 893928fb0560..022d9c7abc8c 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1142,7 +1142,7 @@ static LoopUnrollResult tryToUnrollLoop( // automatic unrolling from interfering with the user requested // transformation. Loop *ParentL = L->getParentLoop(); - if (ParentL != NULL && + if (ParentL != nullptr && hasUnrollAndJamTransformation(ParentL) == TM_ForcedByUser && hasUnrollTransformation(L) != TM_ForcedByUser) { LLVM_DEBUG(dbgs() << "Not unrolling loop since parent loop has" diff --git a/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp b/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp index 1be1d34417eb..40c03f7b0de7 100644 --- a/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp +++ b/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp @@ -669,7 +669,7 @@ WindowsManifestMerger::WindowsManifestMergerImpl::getMergedManifest() { std::unique_ptr OutputDoc( xmlNewDoc((const unsigned char *)"1.0")); xmlDocSetRootElement(OutputDoc.get(), CombinedRoot); - assert(0 == xmlDocGetRootElement(CombinedDoc)); + assert(nullptr == xmlDocGetRootElement(CombinedDoc)); xmlKeepBlanksDefault(0); xmlChar *Buff = nullptr; diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index 3638569343ba..e6588090625e 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -476,7 +476,7 @@ class JITLinkSlabAllocator final : public JITLinkMemoryManager { return; } - char *AllocBase = 0; + char *AllocBase = nullptr; { std::lock_guard Lock(SlabMutex); @@ -1883,7 +1883,7 @@ int main(int argc, char *argv[]) { if (ShowInitialExecutionSessionState) S->ES.dump(outs()); - JITEvaluatedSymbol EntryPoint = 0; + JITEvaluatedSymbol EntryPoint = nullptr; { TimeRegion TR(Timers ? &Timers->LinkTimer : nullptr); // Find the entry-point function unconditionally, since we want to force diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp index b0cf1f775ced..193ba2da1941 100644 --- a/llvm/tools/llvm-objdump/MachODump.cpp +++ b/llvm/tools/llvm-objdump/MachODump.cpp @@ -917,10 +917,10 @@ static void PrintRelocationEntries(const MachOObjectFile *O, else { SymbolRef Symbol = *O->getSymbolByIndex(r_symbolnum); Expected SymNameNext = Symbol.getName(); - const char *name = NULL; + const char *name = nullptr; if (SymNameNext) name = SymNameNext->data(); - if (name == NULL) + if (name == nullptr) outs() << format("?(%d)\n", r_symbolnum); else outs() << name << "\n"; diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp index b235398e7a45..e1b28e3ce745 100644 --- a/llvm/tools/llvm-readobj/COFFDumper.cpp +++ b/llvm/tools/llvm-readobj/COFFDumper.cpp @@ -126,7 +126,7 @@ class COFFDumper : public ObjDumper { void printCOFFTLSDirectory(const coff_tls_directory *TlsTable); typedef void (*PrintExtraCB)(raw_ostream &, const uint8_t *); void printRVATable(uint64_t TableVA, uint64_t Count, uint64_t EntrySize, - PrintExtraCB PrintExtra = 0); + PrintExtraCB PrintExtra = nullptr); void printCodeViewSymbolSection(StringRef SectionName, const SectionRef &Section); void printCodeViewTypeSection(StringRef SectionName, const SectionRef &Section); diff --git a/llvm/tools/llvm-reduce/deltas/ReduceMetadata.cpp b/llvm/tools/llvm-reduce/deltas/ReduceMetadata.cpp index 9ef03d4c8536..078230e80954 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceMetadata.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceMetadata.cpp @@ -31,7 +31,7 @@ static void extractMetadataFromModule(Oracle &O, Module &Program) { for (NamedMDNode *NN : NamedNodesToDelete) { for (auto I : seq(0, NN->getNumOperands())) - NN->setOperand(I, NULL); + NN->setOperand(I, nullptr); NN->eraseFromParent(); } @@ -41,7 +41,7 @@ static void extractMetadataFromModule(Oracle &O, Module &Program) { GV.getAllMetadata(MDs); for (std::pair &MD : MDs) if (!O.shouldKeep()) - GV.setMetadata(MD.first, NULL); + GV.setMetadata(MD.first, nullptr); } for (Function &F : Program) { @@ -51,7 +51,7 @@ static void extractMetadataFromModule(Oracle &O, Module &Program) { F.getAllMetadata(MDs); for (std::pair &MD : MDs) if (!O.shouldKeep()) - F.setMetadata(MD.first, NULL); + F.setMetadata(MD.first, nullptr); } // Delete out-of-chunk metadata attached to instructions. @@ -60,7 +60,7 @@ static void extractMetadataFromModule(Oracle &O, Module &Program) { I.getAllMetadata(MDs); for (std::pair &MD : MDs) if (!O.shouldKeep()) - I.setMetadata(MD.first, NULL); + I.setMetadata(MD.first, nullptr); } } } diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h b/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h index 4a07767a2e19..0c39b50442b4 100644 --- a/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h +++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h @@ -61,7 +61,7 @@ class GIMatchDagInstr { /// For debugging purposes, it's helpful to have access to a description of /// the Opcode. However, this object shouldn't use it for more than debugging /// output since predicates are expected to be handled outside the DAG. - CodeGenInstruction *OpcodeAnnotation = 0; + CodeGenInstruction *OpcodeAnnotation = nullptr; /// When true, this instruction will be a starting point for a match attempt. bool IsMatchRoot = false; From 4360207a219ca7a6cd53f6ef866b58bfb01c9c69 Mon Sep 17 00:00:00 2001 From: Tony Tye Date: Fri, 24 Dec 2021 08:30:06 +0000 Subject: [PATCH 127/992] [AMDGPU][NFC] Update DWARF extension allowing locations on stack Add changes to the DWARF Version 5 standard to the DWARF extension to allow locations on the evaluation stack documentation. Reviewed By: kzhuravl Differential Revision: https://reviews.llvm.org/D116265 --- ...ionDescriptionOnTheDwarfExpressionStack.md | 2883 ++++++++++++++++- 1 file changed, 2817 insertions(+), 66 deletions(-) diff --git a/llvm/docs/AMDGPUDwarfExtensionAllowLocationDescriptionOnTheDwarfExpressionStack/AMDGPUDwarfExtensionAllowLocationDescriptionOnTheDwarfExpressionStack.md b/llvm/docs/AMDGPUDwarfExtensionAllowLocationDescriptionOnTheDwarfExpressionStack/AMDGPUDwarfExtensionAllowLocationDescriptionOnTheDwarfExpressionStack.md index 53ce8cb8b3a9..36b57a22980d 100644 --- a/llvm/docs/AMDGPUDwarfExtensionAllowLocationDescriptionOnTheDwarfExpressionStack/AMDGPUDwarfExtensionAllowLocationDescriptionOnTheDwarfExpressionStack.md +++ b/llvm/docs/AMDGPUDwarfExtensionAllowLocationDescriptionOnTheDwarfExpressionStack/AMDGPUDwarfExtensionAllowLocationDescriptionOnTheDwarfExpressionStack.md @@ -1,33 +1,88 @@ # Allow Location Descriptions on the DWARF Expression Stack -- [Extension](#extension) -- [Heterogeneous Computing Devices](#heterogeneous-computing-devices) -- [DWARF 5](#dwarf-5) - - [How DWARF Maps Source Language To Hardware](#how-dwarf-maps-source-language-to-hardware) - - [Examples](#examples) - - [Dynamic Array Size](#dynamic-array-size) - - [Variable Location in Register](#variable-location-in-register) - - [Variable Location in Memory](#variable-location-in-memory) - - [Variable Spread Across Different Locations](#variable-spread-across-different-locations) - - [Offsetting a Composite Location](#offsetting-a-composite-location) - - [Limitations](#limitations) -- [Extension Solution](#extension-solution) - - [Location Description](#location-description) - - [Stack Location Description Operations](#stack-location-description-operations) - - [Examples](#examples-1) - - [Source Language Variable Spilled to Part of a Vector Register](#source-language-variable-spilled-to-part-of-a-vector-register) - - [Source Language Variable Spread Across Multiple Vector Registers](#source-language-variable-spread-across-multiple-vector-registers) - - [Source Language Variable Spread Across Multiple Kinds of Locations](#source-language-variable-spread-across-multiple-kinds-of-locations) - - [Address Spaces](#address-spaces) - - [Bit Offsets](#bit-offsets) - - [Call Frame Information (CFI)](#call-frame-information-cfi) - - [Objects Not In Byte Aligned Global Memory](#objects-not-in-byte-aligned-global-memory) - - [Higher Order Operations](#higher-order-operations) - - [Objects In Multiple Places](#objects-in-multiple-places) -- [Conclusion](#conclusion) -- [Further Information](#further-information) - -# Extension +- [1. Extension](#extension) +- [2. Heterogeneous Computing Devices](#heterogeneous-computing-devices) +- [3. DWARF 5](#dwarf-5) + - [3.1 How DWARF Maps Source Language To Hardware](#how-dwarf-maps-source-language-to-hardware) + - [3.2 Examples](#examples) + - [3.2.1 Dynamic Array Size](#dynamic-array-size) + - [3.2.2 Variable Location in Register](#variable-location-in-register) + - [3.2.3 Variable Location in Memory](#variable-location-in-memory) + - [3.2.4 Variable Spread Across Different Locations](#variable-spread-across-different-locations) + - [3.2.5 Offsetting a Composite Location](#offsetting-a-composite-location) + - [3.3 Limitations](#limitations) +- [4. Extension Solution](#extension-solution) + - [4.1 Location Description](#location-description) + - [4.2 Stack Location Description Operations](#stack-location-description-operations) + - [4.3 Examples](#examples-1) + - [4.3.1 Source Language Variable Spilled to Part of a Vector Register](#source-language-variable-spilled-to-part-of-a-vector-register) + - [4.3.2 Source Language Variable Spread Across Multiple Vector Registers](#source-language-variable-spread-across-multiple-vector-registers) + - [4.3.3 Source Language Variable Spread Across Multiple Kinds of Locations](#source-language-variable-spread-across-multiple-kinds-of-locations) + - [4.3.4 Address Spaces](#address-spaces) + - [4.3.5 Bit Offsets](#bit-offsets) + - [4.4 Call Frame Information (CFI)](#call-frame-information-cfi) + - [4.5 Objects Not In Byte Aligned Global Memory](#objects-not-in-byte-aligned-global-memory) + - [4.6 Higher Order Operations](#higher-order-operations) + - [4.7 Objects In Multiple Places](#objects-in-multiple-places) +- [5. Conclusion](#conclusion) +- [A. Changes to DWARF Debugging Information Format Version 5](#a-changes-to-dwarf-debugging-information-format-version-5) + - [A.2 General Description](#a-2-general-description) + - [A.2.5 DWARF Expressions](#a-2-5-dwarf-expressions) + - [A.2.5.1 DWARF Expression Evaluation Context](#a-2-5-1-dwarf-expression-evaluation-context) + - [A.2.5.2 DWARF Expression Value](#a-2-5-2-dwarf-expression-value) + - [A.2.5.3 DWARF Location Description](#a-2-5-3-dwarf-location-description) + - [A.2.5.4 DWARF Operation Expressions](#a-2-5-4-dwarf-operation-expressions) + - [A.2.5.4.1 Stack Operations](#a-2-5-4-1-stack-operations) + - [A.2.5.4.2 Control Flow Operations](#a-2-5-4-2-control-flow-operations) + - [A.2.5.4.3 Value Operations](#a-2-5-4-3-value-operations) + - [A.2.5.4.3.1 Literal Operations](#a-2-5-4-3-1-literal-operations) + - [A.2.5.4.3.2 Arithmetic and Logical Operations](#a-2-5-4-3-2-arithmetic-and-logical-operations) + - [A.2.5.4.3.3 Type Conversion Operations](#a-2-5-4-3-3-type-conversion-operations) + - [A.2.5.4.3.4 Special Value Operations](#a-2-5-4-3-4-special-value-operations) + - [A.2.5.4.4 Location Description Operations](#a-2-5-4-4-location-description-operations) + - [A.2.5.4.4.1 General Location Description Operations](#a-2-5-4-4-1-general-location-description-operations) + - [A.2.5.4.4.2 Undefined Location Description Operations](#a-2-5-4-4-2-undefined-location-description-operations) + - [A.2.5.4.4.3 Memory Location Description Operations](#a-2-5-4-4-3-memory-location-description-operations) + - [A.2.5.4.4.4 Register Location Description Operations](#a-2-5-4-4-4-register-location-description-operations) + - [A.2.5.4.4.5 Implicit Location Description Operations](#a-2-5-4-4-5-implicit-location-description-operations) + - [A.2.5.4.4.6 Composite Location Description Operations](#a-2-5-4-4-6-composite-location-description-operations) + - [A.2.5.5 DWARF Location List Expressions](#a-2-5-5-dwarf-location-list-expressions) + - [A.3 Program Scope Entries](#a-3-program-scope-entries) + - [A.3.3 Subroutine and Entry Point Entries](#a-3-3-subroutine-and-entry-point-entries) + - [A.3.3.5 Low-Level Information](#a-3-3-5-low-level-information) + - [A.3.4 Call Site Entries and Parameters](#a-3-4-call-site-entries-and-parameters) + - [A.3.4.2 Call Site Parameters](#a-3-4-2-call-site-parameters) + - [A.3.5 Lexical Block Entries](#a-3-5-lexical-block-entries) + - [A.4 Data Object and Object List Entries](#a-4-data-object-and-object-list-entries) + - [A.4.1 Data Object Entries](#a-4-1-data-object-entries) + - [A.5 Type Entries](#a-5-type-entries) + - [A.5.7 Structure, Union, Class and Interface Type Entries](#a-5-7-structure-union-class-and-interface-type-entries) + - [A.5.7.3 Derived or Extended Structures, Classes and Interfaces](#a-5-7-3-derived-or-extended-structures-classes-and-interfaces) + - [A.5.7.8 Member Function Entries](#a-5-7-8-member-function-entries) + - [A.5.14 Pointer to Member Type Entries](#a-5-14-pointer-to-member-type-entries) + - [A.5.16 Dynamic Type Entries](#a-5-16-dynamic-type-entries) + - [A.6 Other Debugging Information](#a-6-other-debugging-information) + - [A.6.2 Line Number Information](#a-6-2-line-number-information) + - [A.6.4 Call Frame Information](#a-6-4-call-frame-information) + - [A.6.4.1 Structure of Call Frame Information](#a-6-4-1-structure-of-call-frame-information) + - [A.6.4.2 Call Frame Instructions](#a-6-4-2-call-frame-instructions) + - [A.6.4.2.1 Row Creation Instructions](#a-6-4-2-1-row-creation-instructions) + - [A.6.4.2.2 CFA Definition Instructions](#a-6-4-2-2-cfa-definition-instructions) + - [A.6.4.2.3 Register Rule Instructions](#a-6-4-2-3-register-rule-instructions) + - [A.6.4.2.4 Row State Instructions](#a-6-4-2-4-row-state-instructions) + - [A.6.4.2.5 Padding Instruction](#a-6-4-2-5-padding-instruction) + - [A.6.4.3 Call Frame Instruction Usage](#a-6-4-3-call-frame-instruction-usage) + - [A.6.4.4 Call Frame Calling Address](#a-6-4-4-call-frame-calling-address) + - [A.7 Data Representation](#a-7-data-representation) + - [A.7.4 32-Bit and 64-Bit DWARF Formats](#a-7-4-32-bit-and-64-bit-dwarf-formats) + - [A.7.5 Format of Debugging Information](#a-7-5-format-of-debugging-information) + - [A.7.5.5 Classes and Forms](#a-7-5-5-classes-and-forms) + - [A.7.7 DWARF Expressions](#a-7-7-dwarf-expressions) + - [A.7.7.1 Operation Expressions](#a-7-7-1-operation-expressions) + - [A.7.7.3 Location List Expressions](#a-7-7-3-location-list-expressions) +- [B. Further Information](#b-further-information) + +# 1. Extension In DWARF 5, expressions are evaluated using a typed value stack, a separate location area, and an independent loclist mechanism. This extension unifies all @@ -54,15 +109,22 @@ specialized context sensitive operations are harder for both produces and consumers than a smaller number of general composable operations that have consistent semantics regardless of context. -The following sections first describe heterogeneous devices and the features -they have that are not addressed by DWARF 5. Then a brief simplified overview of -the DWARF 5 expression evaluation model is presented that highlights the -difficulties for supporting the heterogeneous features. Finally, an overview of -the extension is presented, using simplified examples to illustrate how it can -address the issues of heterogeneous devices and also benefit non-heterogeneous -devices. References to further information are provided. - -# Heterogeneous Computing Devices +First, section [2. Heterogeneous Computing +Devices](#heterogeneous-computing-devices) describes heterogeneous devices and +the features they have that are not addressed by DWARF 5. Then section [3. DWARF +5](#dwarf-5) presents a brief simplified overview of the DWARF 5 expression +evaluation model that highlights the difficulties for supporting the +heterogeneous features. Next, section [4. Extension +Solution](#extension-solution) provides an overview of the proposal, using +simplified examples to illustrate how it can address the issues of heterogeneous +devices and also benefit non-heterogeneous devices. Then overall conclusions are +covered in section [5. Conclusion](#conclusion). Appendix [A. Changes to DWARF +Debugging Information Format Version +5](#a-changes-to-dwarf-debugging-information-format-version-5) gives changes +relative to the DWARF Version 5 standard. Finally, appendix [B. Further +Information](#b-further-information) has references to further information. + +# 2. Heterogeneous Computing Devices GPUs and other heterogeneous computing devices have features not common to CPU computing devices. @@ -101,13 +163,13 @@ of runtime defined pieces of vector registers. With the more complex locations, there is a benefit to be able to factorize their calculation which requires all location kinds to be supported uniformly, otherwise duplication is necessary. -# DWARF 5 +# 3. DWARF 5 Before presenting the proposed solution to supporting heterogeneous devices, a brief overview of the DWARF 5 expression evaluation model will be given to highlight the aspects being addressed by the extension. -## How DWARF Maps Source Language To Hardware +## 3.1 How DWARF Maps Source Language To Hardware DWARF is a standardized way to specify debug information. It describes source language entities such as compilation units, functions, types, variables, etc. @@ -158,13 +220,13 @@ may include: value or with the location of a base object that is available using the DW_OP_push_object_address operation. -## Examples +## 3.2 Examples The following examples illustrate how DWARF expressions involving operations are evaluated in DWARF 5. DWARF also has expressions involving location lists that are not covered in these examples. -### Dynamic Array Size +### 3.2.1 Dynamic Array Size The first example is for an operation expression associated with a DIE attribute that provides the number of elements in a dynamic array type. Such an attribute @@ -211,7 +273,7 @@ The evaluation stops when it reaches the end of the expression. The result of an expression that is evaluated with a value result kind context is the top element of the stack, which provides the value and its type. -### Variable Location in Register +### 3.2.2 Variable Location in Register This example is for an operation expression associated with a DIE attribute that provides the location of a source language variable. Such an attribute dictates @@ -244,7 +306,7 @@ Again, evaluation stops when it reaches the end of the expression. The result of an expression that is evaluated with a location result kind context is the location description in the location area. -### Variable Location in Memory +### 3.2.3 Variable Location in Memory The next example is for an operation expression associated with a DIE attribute that provides the location of a source language variable that is allocated in a @@ -285,7 +347,7 @@ location area. ![Variable Location in Memory Example: Step 4](images/03-memory.example.frame.4.png) -### Variable Spread Across Different Locations +### 3.2.4 Variable Spread Across Different Locations This example is for a source variable that is partly in a register, partly undefined, and partly in memory. @@ -349,11 +411,12 @@ of the expression is the location description in the location area. ![Variable Spread Across Different Locations Example: Step 7](images/04-composite.example.frame.7.png) -### Offsetting a Composite Location +### 3.2.5 Offsetting a Composite Location This example attempts to extend the previous example to offset the composite -location description it created. The *Variable Location in Memory* example -conveniently used the DW_OP_plus operation to offset a memory address. +location description it created. The [3.2.3 Variable Location in +Memory](#variable-location-in-memory) example conveniently used the DW_OP_plus +operation to offset a memory address. DW_OP_regx SGPR3 DW_OP_piece 4 @@ -380,7 +443,7 @@ the offset. For example: This illustrates that operations on stack values are not composable with operations on location descriptions. -## Limitations +## 3.3 Limitations DWARF 5 is unable to describe variables in runtime indexed parts of registers. This is required to describe a source variable that is located in a lane of a @@ -406,7 +469,7 @@ Bit field offsets are only supported in a limited way for register locations. Supporting them in a uniform manner for all location kinds is required to support languages with bit sized entities. -# Extension Solution +# 4. Extension Solution This section outlines the extension to generalize the DWARF expression evaluation model to allow location descriptions to be manipulated on the stack. It presents @@ -414,7 +477,7 @@ a number of simplified examples to demonstrate the benefits and how the extensio solves the issues of heterogeneous devices. It presents how this is done in a manner that is backwards compatible with DWARF 5. -## Location Description +## 4.1 Location Description In order to have consistent, composable operations that act on location descriptions, the extension defines a uniform way to handle all location kinds. @@ -437,7 +500,7 @@ storage). undefined. - For composite, it is a linear stream of bytes defined by the composite's parts. -## Stack Location Description Operations +## 4.2 Stack Location Description Operations The DWARF expression stack is extended to allow each stack entry to either be a value or a location description. @@ -475,12 +538,12 @@ A DW_OP_undefined operation can be defined that explicitly creates the undefined location description. Currently this is only possible as a piece of a composite when the stack is empty. -## Examples +## 4.3 Examples This section provides some motivating examples to illustrate the benefits that result from allowing location descriptions on the stack. -### Source Language Variable Spilled to Part of a Vector Register +### 4.3.1 Source Language Variable Spilled to Part of a Vector Register A compiler generating code for a GPU may allocate a source language variable that it proves has the same value for every lane of a SIMT thread in a scalar @@ -522,7 +585,7 @@ GPUs due to the sheer number of registers that would have to be defined. It would also not permit a runtime index into part of the whole register to be used as shown in the next example. -### Source Language Variable Spread Across Multiple Vector Registers +### 4.3.2 Source Language Variable Spread Across Multiple Vector Registers A compiler may generate SIMT code for a GPU. Each source language thread of execution is mapped to a single lane of the GPU thread. Source language @@ -623,7 +686,7 @@ description and returned as the result. ![Source Language Variable Spread Across Multiple Vector Registers Example: Step 14](images/07-extension-multi-lane-vgpr.example.frame.14.png) -### Source Language Variable Spread Across Multiple Kinds of Locations +### 4.3.3 Source Language Variable Spread Across Multiple Kinds of Locations This example is the same as the previous one, except the first 2 bytes of the second vector register have been spilled to memory, and the last 2 bytes have @@ -683,7 +746,7 @@ beneficial to factor the incrementally creation of location descriptions. ![Source Language Variable Spread Across Multiple Kinds of Locations Example: Step 12](images/08-extension-mixed-composite.example.frame.7.png) -### Address Spaces +### 4.3.4 Address Spaces Heterogeneous devices can have multiple hardware supported address spaces which use specific hardware instructions to access them. @@ -752,7 +815,7 @@ address spaces. For example, this can happen when parts of a source variable allocated in a register are spilled to a stack frame that resides in the non-global address space. -### Bit Offsets +### 4.3.5 Bit Offsets With the generalization of location descriptions on the stack, it is possible to define a DW_OP_bit_offset operation that adjusts the offset of any kind of @@ -792,7 +855,7 @@ The ordering of bits within a byte, like byte ordering, is defined by the target architecture. A base type could be extended to specify bit ordering in addition to byte ordering. -## Call Frame Information (CFI) +## 4.4 Call Frame Information (CFI) DWARF defines call frame information (CFI) that can be used to virtually unwind the subprogram call stack. This involves determining the location where register @@ -804,7 +867,7 @@ spaces, or even a composite of different location kinds. Therefore, the extension extends the CFI rules to support any kind of location description, and operations to create locations in address spaces. -## Objects Not In Byte Aligned Global Memory +## 4.5 Objects Not In Byte Aligned Global Memory DWARF 5 only effectively supports byte aligned memory locations on the stack by using a global memory address as a proxy for a memory location description. This @@ -842,7 +905,7 @@ expressions that support all of these. Full general support for bit fields and implicit locations benefits optimizations on any target. -## Higher Order Operations +## 4.6 Higher Order Operations The generalization allows an elegant way to add higher order operations that create location descriptions out of other location descriptions in a general @@ -876,7 +939,7 @@ to efficiently express a source language array that has had a set of elements promoted into a vector register when executing a set of iterations of a loop in a SIMD manner. -## Objects In Multiple Places +## 4.7 Objects In Multiple Places A compiler may allocate a source variable in stack frame memory, but for some range of code may promote it to a register. If the generated code does not @@ -922,7 +985,7 @@ evaluation of a DWARF expression results in multiple single location descriptions, the consumer can ensure any updates are done to all of them, and any reads can use any one of them. -# Conclusion +# 5. Conclusion A strength of DWARF is that it has generally sought to provide generalized composable solutions that address many problems, rather than solutions that only @@ -932,17 +995,2705 @@ significant family of issues. It addresses the specific issues present for heterogeneous computing devices, provides benefits for non-heterogeneous devices, and can help address a number of other previously reported issues. -# Further Information +# A. Changes to DWARF Debugging Information Format Version 5 + +> NOTE: This appendix provides changes relative to DWARF Version 5. It has been +> defined such that it is backwards compatible with DWARF Version 5. +> Non-normative text is shown in italics. The section numbers generally +> correspond to those in the DWARF Version 5 standard unless specified +> otherwise. Definitions are given to clarify how existing expression +> operations, CFI operations, and attributes behave with respect to generalized +> location descriptions that support multiple places. +> +> > NOTE: Notes are included to describe how the changes are to be applied to +> > the DWARF Version 5 standard. They also describe rational and issues that +> > may need further consideration. + +## A.2 General Description + +### A.2.5 DWARF Expressions + +> NOTE: This section, and its nested sections, replaces DWARF Version 5 section +> 2.5 and section 2.6. It is based on the text of the existing DWARF Version 5 +> standard. + +DWARF expressions describe how to compute a value or specify a location. + +The evaluation of a DWARF expression can provide the location of an object, +the value of an array bound, the length of a dynamic string, the desired value +itself, and so on. + +If the evaluation of a DWARF expression does not encounter an error, then it can +either result in a value (see [2.5.2 DWARF Expression +Value](#dwarf-expression-value)) or a location description (see [2.5.3 DWARF +Location Description](#dwarf-location-description)). When a DWARF expression +is evaluated, it may be specified whether a value or location description is +required as the result kind. + +If a result kind is specified, and the result of the evaluation does not match +the specified result kind, then the implicit conversions described in [2.5.4.4.3 +Memory Location Description +Operations](#memory-location-description-operations) are performed if +valid. Otherwise, the DWARF expression is ill-formed. + +If the evaluation of a DWARF expression encounters an evaluation error, then the +result is an evaluation error. + +> NOTE: Decided to define the concept of an evaluation error. An alternative is +> to introduce an undefined value base type in a similar way to location +> descriptions having an undefined location description. Then operations that +> encounter an evaluation error can return the undefined location description or +> value with an undefined base type. +> +> All operations that act on values would return an undefined entity if given an +> undefined value. The expression would then always evaluate to completion, and +> can be tested to determine if it is an undefined entity. +> +> However, this would add considerable additional complexity and does not match +> that GDB throws an exception when these evaluation errors occur. + +If a DWARF expression is ill-formed, then the result is undefined. + +The following sections detail the rules for when a DWARF expression is +ill-formed or results in an evaluation error. + +A DWARF expression can either be encoded as an operation expression (see [2.5.4 +DWARF Operation Expressions](#dwarf-operation-expressions)), or as a +location list expression (see [2.5.5 DWARF Location List +Expressions](#dwarf-location-list-expressions)). + +#### A.2.5.1 DWARF Expression Evaluation Context + +A DWARF expression is evaluated in a context that can include a number of +context elements. If multiple context elements are specified then they must be +self consistent or the result of the evaluation is undefined. The context +elements that can be specified are: + +1. A current result kind + + The kind of result required by the DWARF expression evaluation. If specified + it can be a location description or a value. + +2. A current thread + + The target architecture thread identifier of the source program thread of + execution for which a user presented expression is currently being + evaluated. + + It is required for operations that are related to target architecture + threads. + + For example, the `DW_OP_regval_type` operation. + +3. A current call frame + + The target architecture call frame identifier. It identifies a call frame + that corresponds to an active invocation of a subprogram in the current + thread. It is identified by its address on the call stack. The address is + referred to as the Canonical Frame Address (CFA). The call frame information + is used to determine the CFA for the call frames of the current thread's + call stack (see [6.4 Call Frame Information](#call-frame-information)). + + It is required for operations that specify target architecture registers to + support virtual unwinding of the call stack. + + For example, the `DW_OP_*reg*` operations. + + If specified, it must be an active call frame in the current thread. + Otherwise the result is undefined. + + If it is the currently executing call frame, then it is termed the top call + frame. + +4. A current program location + + The target architecture program location corresponding to the current call + frame of the current thread. + + The program location of the top call frame is the target architecture + program counter for the current thread. The call frame information is used + to obtain the value of the return address register to determine the program + location of the other call frames (see [6.4 Call Frame + Information](#call-frame-information)). + + It is required for the evaluation of location list expressions to select + amongst multiple program location ranges. It is required for operations that + specify target architecture registers to support virtual unwinding of the + call stack (see [6.4 Call Frame Information](#call-frame-information)). + + If specified: + + - If the current call frame is the top call frame, it must be the current + target architecture program location. + - If the current call frame F is not the top call frame, it must be the + program location associated with the call site in the current caller frame + F that invoked the callee frame. + - Otherwise the result is undefined. + +5. A current compilation unit + + The compilation unit debug information entry that contains the DWARF + expression being evaluated. + + It is required for operations that reference debug information associated + with the same compilation unit, including indicating if such references use + the 32-bit or 64-bit DWARF format. It can also provide the default address + space address size if no current target architecture is specified. + + For example, the `DW_OP_constx` and `DW_OP_addrx` operations. + + Note that this compilation unit may not be the same as the compilation + unit determined from the loaded code object corresponding to the current + program location. For example, the evaluation of the expression E associated + with a `DW_AT_location` attribute of the debug information entry operand of + the `DW_OP_call*` operations is evaluated with the compilation unit that + contains E and not the one that contains the `DW_OP_call*` operation + expression. + +6. A current target architecture + + The target architecture. + + It is required for operations that specify target architecture specific + entities. + + For example, target architecture specific entities include DWARF register + identifiers, DWARF address space identifiers, the default address space, and + the address space address sizes. + + If specified: + + - If the current thread is specified, then the current target architecture + must be the same as the target architecture of the current thread. + - If the current compilation unit is specified, then the current target + architecture default address space address size must be the same as the + `address_size` field in the header of the current compilation unit and any + associated entry in the `.debug_aranges` section. + - If the current program location is specified, then the current target + architecture must be the same as the target architecture of any line + number information entry (see [6.2 Line Number + Information](#line-number-information)) corresponding to the current + program location. + - If the current program location is specified, then the current target + architecture default address space address size must be the same as the + `address_size` field in the header of any entry corresponding to the + current program location in the `.debug_addr`, `.debug_line`, + `.debug_rnglists`, `.debug_rnglists.dwo`, `.debug_loclists`, and + `.debug_loclists.dwo` sections. + - Otherwise the result is undefined. + +7. A current object + + The location description of a program object. + + It is required for the `DW_OP_push_object_address` operation. + + For example, the `DW_AT_data_location` attribute on type debug + information entries specifies the program object corresponding to a runtime + descriptor as the current object when it evaluates its associated + expression. + + The result is undefined if the location descriptor is invalid (see [3.5.3 + DWARF Location Description](#dwarf-location-description)). + +8. An initial stack + + This is a list of values or location descriptions that will be pushed on the + operation expression evaluation stack in the order provided before + evaluation of an operation expression starts. + + Some debugger information entries have attributes that evaluate their DWARF + expression value with initial stack entries. In all other cases the initial + stack is empty. + + The result is undefined if any location descriptors are invalid (see [3.5.3 + DWARF Location Description](#dwarf-location-description)). + +If the evaluation requires a context element that is not specified, then the +result of the evaluation is an error. + +A DWARF expression for a location description may be able to be evaluated +without a thread, call frame, program location, or architecture context. For +example, the location of a global variable may be able to be evaluated without +such context. If the expression evaluates with an error then it may indicate the +variable has been optimized and so requires more context. + +The DWARF expression for call frame information (see [6.4 Call Frame +Information](#call-frame-information)) operations are restricted to those +that do not require the compilation unit context to be specified. + +The DWARF is ill-formed if all the `address_size` fields in the headers of all +the entries in the `.debug_info`, `.debug_addr`, `.debug_line`, +`.debug_rnglists`, `.debug_rnglists.dwo`, `.debug_loclists`, and +`.debug_loclists.dwo` sections corresponding to any given program location do +not match. + +#### A.2.5.2 DWARF Expression Value + +A value has a type and a literal value. It can represent a literal value of any +supported base type of the target architecture. The base type specifies the +size, encoding, and endianity of the literal value. + +> NOTE: It may be desirable to add an implicit pointer base type encoding. It +> would be used for the type of the value that is produced when the +> `DW_OP_deref*` operation retrieves the full contents of an implicit pointer +> location storage created by the `DW_OP_implicit_pointer` operation. The +> literal value would record the debugging information entry and byte +> displacement specified by the associated `DW_OP_implicit_pointer` operation. + +There is a distinguished base type termed the generic type, which is an integral +type that has the size of an address in the target architecture default address +space, a target architecture defined endianity, and unspecified signedness. + +The generic type is the same as the unspecified type used for stack +operations defined in DWARF Version 4 and before. + +An integral type is a base type that has an encoding of `DW_ATE_signed`, +`DW_ATE_signed_char`, `DW_ATE_unsigned`, `DW_ATE_unsigned_char`, +`DW_ATE_boolean`, or any target architecture defined integral encoding in the +inclusive range `DW_ATE_lo_user` to `DW_ATE_hi_user`. + +> NOTE: It is unclear if `DW_ATE_address` is an integral type. GDB does not seem +> to consider it as integral. + +#### A.2.5.3 DWARF Location Description + +Debugging information must provide consumers a way to find the location of +program variables, determine the bounds of dynamic arrays and strings, and +possibly to find the base address of a subprogram's call frame or the return +address of a subprogram. Furthermore, to meet the needs of recent computer +architectures and optimization techniques, debugging information must be able to +describe the location of an object whose location changes over the object's +lifetime, and may reside at multiple locations simultaneously during parts of an +object's lifetime. + +Information about the location of program objects is provided by location +descriptions. + +Location descriptions can consist of one or more single location descriptions. + +A single location description specifies the location storage that holds a +program object and a position within the location storage where the program +object starts. The position within the location storage is expressed as a bit +offset relative to the start of the location storage. + +A location storage is a linear stream of bits that can hold values. Each +location storage has a size in bits and can be accessed using a zero-based bit +offset. The ordering of bits within a location storage uses the bit numbering +and direction conventions that are appropriate to the current language on the +target architecture. + +There are five kinds of location storage: + +1. memory location storage + + Corresponds to the target architecture memory address spaces. + +2. register location storage + + Corresponds to the target architecture registers. + +3. implicit location storage + + Corresponds to fixed values that can only be read. + +4. undefined location storage + + Indicates no value is available and therefore cannot be read or written. + +5. composite location storage + + Allows a mixture of these where some bits come from one location storage and + some from another location storage, or from disjoint parts of the same + location storage. + +> NOTE: It may be better to add an implicit pointer location storage kind used +> by the `DW_OP_implicit_pointer` operation. It would specify the debugger +> information entry and byte offset provided by the operations. + +Location descriptions are a language independent representation of addressing +rules. + +- They can be the result of evaluating a debugger information entry attribute + that specifies an operation expression of arbitrary complexity. In this usage + they can describe the location of an object as long as its lifetime is either + static or the same as the lexical block (see [3.5 Lexical Block + Entries](#lexical-block-entries)) that owns it, and it does not move during + its lifetime. + +- They can be the result of evaluating a debugger information entry attribute + that specifies a location list expression. In this usage they can describe the + location of an object that has a limited lifetime, changes its location during + its lifetime, or has multiple locations over part or all of its lifetime. + +If a location description has more than one single location description, the +DWARF expression is ill-formed if the object value held in each single location +description's position within the associated location storage is not the same +value, except for the parts of the value that are uninitialized. + +A location description that has more than one single location description can +only be created by a location list expression that has overlapping program +location ranges, or certain expression operations that act on a location +description that has more than one single location description. There are no +operation expression operations that can directly create a location description +with more than one single location description. + +A location description with more than one single location description can be +used to describe objects that reside in more than one piece of storage at the +same time. An object may have more than one location as a result of +optimization. For example, a value that is only read may be promoted from memory +to a register for some region of code, but later code may revert to reading the +value from memory as the register may be used for other purposes. For the code +region where the value is in a register, any change to the object value must be +made in both the register and the memory so both regions of code will read the +updated value. + +A consumer of a location description with more than one single location +description can read the object's value from any of the single location +descriptions (since they all refer to location storage that has the same value), +but must write any changed value to all the single location descriptions. + +Updating a location description L by a bit offset B is defined as adding the +value of B to the bit offset of each single location description SL of L. It is +an evaluation error if the updated bit offset of any SL is less than 0 or +greater than or equal to the size of the location storage specified by SL. + +The evaluation of an expression may require context elements to create a +location description. If such a location description is accessed, the storage it +denotes is that associated with the context element values specified when the +location description was created, which may differ from the context at the time +it is accessed. + +For example, creating a register location description requires the thread +context: the location storage is for the specified register of that thread. +Creating a memory location description for an address space may required a +thread context: the location storage is the memory associated with that +thread. + +If any of the context elements required to create a location description change, +the location description becomes invalid and accessing it is undefined. + +Examples of context that can invalidate a location description are: + +- The thread context is required and execution causes the thread to + terminate. +- The call frame context is required and further execution causes the call + frame to return to the calling frame. +- The program location is required and further execution of the thread + occurs. That could change the location list entry or call frame information + entry that applies. +- An operation uses call frame information: + - Any of the frames used in the virtual call frame unwinding return. + - The top call frame is used, the program location is used to select the + call frame information entry, and further execution of the thread + occurs. + +A DWARF expression can be used to compute a location description for an +object. A subsequent DWARF expression evaluation can be given the object +location description as the object context or initial stack context to compute a +component of the object. The final result is undefined if the object location +description becomes invalid between the two expression evaluations. + +A change of a thread's program location may not make a location description +invalid, yet may still render it as no longer meaningful. Accessing such a +location description, or using it as the object context or initial stack context +of an expression evaluation, may produce an undefined result. + +For example, a location description may specify a register that no longer +holds the intended program object after a program location change. One way to +avoid such problems is to recompute location descriptions associated with +threads when their program locations change. + +#### A.2.5.4 DWARF Operation Expressions + +An operation expression is comprised of a stream of operations, each consisting +of an opcode followed by zero or more operands. The number of operands is +implied by the opcode. + +Operations represent a postfix operation on a simple stack machine. Each stack +entry can hold either a value or a location description. Operations can act on +entries on the stack, including adding entries and removing entries. If the kind +of a stack entry does not match the kind required by the operation and is not +implicitly convertible to the required kind (see [2.5.4.4.3 Memory Location +Description Operations](#memory-location-description-operations)), then +the DWARF operation expression is ill-formed. + +Evaluation of an operation expression starts with an empty stack on which the +entries from the initial stack provided by the context are pushed in the order +provided. Then the operations are evaluated, starting with the first operation +of the stream. Evaluation continues until either an operation has an evaluation +error, or until one past the last operation of the stream is reached. + +The result of the evaluation is: + +- If an operation has an evaluation error, or an operation evaluates an + expression that has an evaluation error, then the result is an evaluation + error. +- If the current result kind specifies a location description, then: + - If the stack is empty, the result is a location description with one + undefined location description. + + This rule is for backwards compatibility with DWARF Version 5 which uses + an empty operation expression for this purpose. + + - If the top stack entry is a location description, or can be converted to one + (see [2.5.4.4.3 Memory Location Description + Operations](#memory-location-description-operations)), then the result + is that, possibly converted, location description. Any other entries on the + stack are discarded. + - Otherwise the DWARF expression is ill-formed. + + > NOTE: Could define this case as returning an implicit location description + > as if the `DW_OP_implicit` operation is performed. + +- If the current result kind specifies a value, then: + - If the top stack entry is a value, or can be converted to one (see + [2.5.4.4.3 Memory Location Description + Operations](#memory-location-description-operations)), then the result is + that, possibly converted, value. Any other entries on the stack are + discarded. + - Otherwise the DWARF expression is ill-formed. +- If the current result kind is not specified, then: + - If the stack is empty, the result is a location description with one + undefined location description. + + This rule is for backwards compatibility with DWARF Version 5 which uses + an empty operation expression for this purpose. + + > NOTE: This rule is consistent with the rule above for when a location + > description is requested. However, GDB appears to report this as an error + > and no GDB tests appear to cause an empty stack for this case. + + - Otherwise, the top stack entry is returned. Any other entries on the stack + are discarded. + +An operation expression is encoded as a byte block with some form of prefix that +specifies the byte count. It can be used: + +- as the value of a debugging information entry attribute that is encoded using + class `exprloc` (see [7.5.5 Classes and Forms](#classes-and-forms)), +- as the operand to certain operation expression operations, +- as the operand to certain call frame information operations (see [6.4 Call + Frame Information](#call-frame-information)), +- and in location list entries (see [2.5.5 DWARF Location List + Expressions](#dwarf-location-list-expressions)). + +##### A.2.5.4.1 Stack Operations + +> NOTE: This section replaces DWARF Version 5 section 2.5.1.3. + +The following operations manipulate the DWARF stack. Operations that index the +stack assume that the top of the stack (most recently added entry) has index 0. +They allow the stack entries to be either a value or location description. + +If any stack entry accessed by a stack operation is an incomplete composite +location description (see [2.5.4.4.6 Composite Location Description +Operations](#composite-location-description-operations)), then the DWARF +expression is ill-formed. + +> NOTE: These operations now support stack entries that are values and location +> descriptions. + +> NOTE: If it is desired to also make them work with incomplete composite +> location descriptions, then would need to define that the composite location +> storage specified by the incomplete composite location description is also +> replicated when a copy is pushed. This ensures that each copy of the +> incomplete composite location description can update the composite location +> storage they specify independently. + +1. `DW_OP_dup` + + `DW_OP_dup` duplicates the stack entry at the top of the stack. + +2. `DW_OP_drop` + + `DW_OP_drop` pops the stack entry at the top of the stack and discards it. + +3. `DW_OP_pick` + + `DW_OP_pick` has a single unsigned 1-byte operand that represents an index + I. A copy of the stack entry with index I is pushed onto the stack. + +4. `DW_OP_over` + + `DW_OP_over` pushes a copy of the entry with index 1. + + This is equivalent to a `DW_OP_pick 1` operation. + +5. `DW_OP_swap` + + `DW_OP_swap` swaps the top two stack entries. The entry at the top of the + stack becomes the second stack entry, and the second stack entry becomes the + top of the stack. + +6. `DW_OP_rot` + + `DW_OP_rot` rotates the first three stack entries. The entry at the top of + the stack becomes the third stack entry, the second entry becomes the top of + the stack, and the third entry becomes the second entry. + +##### A.2.5.4.2 Control Flow Operations + +> NOTE: This section replaces DWARF Version 5 section 2.5.1.5. + +The following operations provide simple control of the flow of a DWARF operation +expression. + +1. `DW_OP_nop` + + `DW_OP_nop` is a place holder. It has no effect on the DWARF stack entries. + +2. `DW_OP_le`, `DW_OP_ge`, `DW_OP_eq`, `DW_OP_lt`, `DW_OP_gt`, + `DW_OP_ne` + + > NOTE: The same as in DWARF Version 5 section 2.5.1.5. + +3. `DW_OP_skip` + + `DW_OP_skip` is an unconditional branch. Its single operand is a 2-byte + signed integer constant. The 2-byte constant is the number of bytes of the + DWARF expression to skip forward or backward from the current operation, + beginning after the 2-byte constant. + + If the updated position is at one past the end of the last operation, then + the operation expression evaluation is complete. + + Otherwise, the DWARF expression is ill-formed if the updated operation + position is not in the range of the first to last operation inclusive, or + not at the start of an operation. + +4. `DW_OP_bra` + + `DW_OP_bra` is a conditional branch. Its single operand is a 2-byte signed + integer constant. This operation pops the top of stack. If the value popped + is not the constant 0, the 2-byte constant operand is the number of bytes of + the DWARF operation expression to skip forward or backward from the current + operation, beginning after the 2-byte constant. + + If the updated position is at one past the end of the last operation, then + the operation expression evaluation is complete. + + Otherwise, the DWARF expression is ill-formed if the updated operation + position is not in the range of the first to last operation inclusive, or + not at the start of an operation. + +5. `DW_OP_call2, DW_OP_call4, DW_OP_call_ref` + + `DW_OP_call2`, `DW_OP_call4`, and `DW_OP_call_ref` perform DWARF procedure + calls during evaluation of a DWARF expression. + + `DW_OP_call2` and `DW_OP_call4`, have one operand that is, respectively, a + 2-byte or 4-byte unsigned offset DR that represents the byte offset of a + debugging information entry D relative to the beginning of the current + compilation unit. + + `DW_OP_call_ref` has one operand that is a 4-byte unsigned value in the + 32-bit DWARF format, or an 8-byte unsigned value in the 64-bit DWARF format, + that represents the byte offset DR of a debugging information entry D + relative to the beginning of the `.debug_info` section that contains the + current compilation unit. D may not be in the current compilation unit. + + > NOTE: DWARF Version 5 states that DR can be an offset in a `.debug_info` + > section other than the one that contains the current compilation unit. It + > states that relocation of references from one executable or shared object + > file to another must be performed by the consumer. But given that DR is + > defined as an offset in a `.debug_info` section this seems impossible. If + > DR was defined as an implementation defined value, then the consumer could + > choose to interpret the value in an implementation defined manner to + > reference a debug information in another executable or shared object. + > + > In ELF the `.debug_info` section is in a non-`PT_LOAD` segment so standard + > dynamic relocations cannot be used. But even if they were loaded segments + > and dynamic relocations were used, DR would need to be the address of D, + > not an offset in a `.debug_info` section. That would also need DR to be + > the size of a global address. So it would not be possible to use the + > 32-bit DWARF format in a 64-bit global address space. In addition, the + > consumer would need to determine what executable or shared object the + > relocated address was in so it could determine the containing compilation + > unit. + > + > GDB only interprets DR as an offset in the `.debug_info` section that + > contains the current compilation unit. + > + > This comment also applies to `DW_OP_implicit_pointer`. + + Operand interpretation of `DW_OP_call2`, `DW_OP_call4`, and + `DW_OP_call_ref` is exactly like that for `DW_FORM_ref2`, `DW_FORM_ref4`, + and `DW_FORM_ref_addr`, respectively. + + The call operation is evaluated by: + + - If D has a `DW_AT_location` attribute that is encoded as a `exprloc` that + specifies an operation expression E, then execution of the current + operation expression continues from the first operation of E. Execution + continues until one past the last operation of E is reached, at which + point execution continues with the operation following the call operation. + The operations of E are evaluated with the same current context, except + current compilation unit is the one that contains D and the stack is the + same as that being used by the call operation. After the call operation + has been evaluated, the stack is therefore as it is left by the evaluation + of the operations of E. Since E is evaluated on the same stack as the call + operation, E can use, and/or remove entries already on the stack, and can + add new entries to the stack. + + Values on the stack at the time of the call may be used as parameters + by the called expression and values left on the stack by the called + expression may be used as return values by prior agreement between the + calling and called expressions. + + - If D has a `DW_AT_location` attribute that is encoded as a `loclist` or + `loclistsptr`, then the specified location list expression E is evaluated. + The evaluation of E uses the current context, except the result kind is a + location description, the compilation unit is the one that contains D, and + the initial stack is empty. The location description result is pushed on + the stack. + + > NOTE: This rule avoids having to define how to execute a matched + > location list entry operation expression on the same stack as the call + > when there are multiple matches. But it allows the call to obtain the + > location description for a variable or formal parameter which may use a + > location list expression. + > + > An alternative is to treat the case when D has a `DW_AT_location` + > attribute that is encoded as a `loclist` or `loclistsptr`, and the + > specified location list expression E' matches a single location list + > entry with operation expression E, the same as the `exprloc` case and + > evaluate on the same stack. + > + > But this is not attractive as if the attribute is for a variable that + > happens to end with a non-singleton stack, it will not simply put a + > location description on the stack. Presumably the intent of using + > `DW_OP_call*` on a variable or formal parameter debugger information + > entry is to push just one location description on the stack. That + > location description may have more than one single location description. + > + > The previous rule for `exprloc` also has the same problem, as normally a + > variable or formal parameter location expression may leave multiple + > entries on the stack and only return the top entry. + > + > GDB implements `DW_OP_call*` by always executing E on the same stack. If + > the location list has multiple matching entries, it simply picks the + > first one and ignores the rest. This seems fundamentally at odds with + > the desire to support multiple places for variables. + > + > So, it feels like `DW_OP_call*` should both support pushing a location + > description on the stack for a variable or formal parameter, and also + > support being able to execute an operation expression on the same stack. + > Being able to specify a different operation expression for different + > program locations seems a desirable feature to retain. + > + > A solution to that is to have a distinct `DW_AT_proc` attribute for the + > `DW_TAG_dwarf_procedure` debugging information entry. Then the + > `DW_AT_location` attribute expression is always executed separately and + > pushes a location description (that may have multiple single location + > descriptions), and the `DW_AT_proc` attribute expression is always + > executed on the same stack and can leave anything on the stack. + > + > The `DW_AT_proc` attribute could have the new classes `exprproc`, + > `loclistproc`, and `loclistsptrproc` to indicate that the expression is + > executed on the same stack. `exprproc` is the same encoding as + > `exprloc`. `loclistproc` and `loclistsptrproc` are the same encoding as + > their non-`proc` counterparts, except the DWARF is ill-formed if the + > location list does not match exactly one location list entry and a + > default entry is required. These forms indicate explicitly that the + > matched single operation expression must be executed on the same stack. + > This is better than ad hoc special rules for `loclistproc` and + > `loclistsptrproc` which are currently clearly defined to always return a + > location description. The producer then explicitly indicates the intent + > through the attribute classes. + > + > Such a change would be a breaking change for how GDB implements + > `DW_OP_call*`. However, are the breaking cases actually occurring in + > practice? GDB could implement the current approach for DWARF Version 5, + > and the new semantics for DWARF Version 6 which has been done for some + > other features. + > + > Another option is to limit the execution to be on the same stack only to + > the evaluation of an expression E that is the value of a + > `DW_AT_location` attribute of a `DW_TAG_dwarf_procedure` debugging + > information entry. The DWARF would be ill-formed if E is a location list + > expression that does not match exactly one location list entry. In all + > other cases the evaluation of an expression E that is the value of a + > `DW_AT_location` attribute would evaluate E with the current context, + > except the result kind is a location description, the compilation unit + > is the one that contains D, and the initial stack is empty. The location + > description result is pushed on the stack. + + - If D has a `DW_AT_const_value` attribute with a value V, then it is as if + a `DW_OP_implicit_value V` operation was executed. + + This allows a call operation to be used to compute the location + description for any variable or formal parameter regardless of whether the + producer has optimized it to a constant. This is consistent with the + `DW_OP_implicit_pointer` operation. + + > NOTE: Alternatively, could deprecate using `DW_AT_const_value` for + > `DW_TAG_variable` and `DW_TAG_formal_parameter` debugger information + > entries that are constants and instead use `DW_AT_location` with an + > operation expression that results in a location description with one + > implicit location description. Then this rule would not be required. + + - Otherwise, there is no effect and no changes are made to the stack. + + > NOTE: In DWARF Version 5, if D does not have a `DW_AT_location` then + > `DW_OP_call*` is defined to have no effect. It is unclear that this is + > the right definition as a producer should be able to rely on using + > `DW_OP_call*` to get a location description for any + > non-`DW_TAG_dwarf_procedure` debugging information entries. Also, the + > producer should not be creating DWARF with `DW_OP_call*` to a + > `DW_TAG_dwarf_procedure` that does not have a `DW_AT_location` + > attribute. So, should this case be defined as an ill-formed DWARF + > expression? + + The `DW_TAG_dwarf_procedure` debugging information entry can be used to + define DWARF procedures that can be called. + +##### A.2.5.4.3 Value Operations + +This section describes the operations that push values on the stack. + +Each value stack entry has a type and a literal value. It can represent a +literal value of any supported base type of the target architecture. The base +type specifies the size, encoding, and endianity of the literal value. + +The base type of value stack entries can be the distinguished generic type. + +###### A.2.5.4.3.1 Literal Operations + +> NOTE: This section replaces DWARF Version 5 section 2.5.1.1. + +The following operations all push a literal value onto the DWARF stack. + +Operations other than `DW_OP_const_type` push a value V with the generic type. +If V is larger than the generic type, then V is truncated to the generic type +size and the low-order bits used. + +1. `DW_OP_lit0`, `DW_OP_lit1`, ..., `DW_OP_lit31` + + `DW_OP_lit` operations encode an unsigned literal value N from 0 through + 31, inclusive. They push the value N with the generic type. + +2. `DW_OP_const1u`, `DW_OP_const2u`, `DW_OP_const4u`, `DW_OP_const8u` + + `DW_OP_constu` operations have a single operand that is a 1, 2, 4, or + 8-byte unsigned integer constant U, respectively. They push the value U with + the generic type. + +3. `DW_OP_const1s`, `DW_OP_const2s`, `DW_OP_const4s`, `DW_OP_const8s` + + `DW_OP_consts` operations have a single operand that is a 1, 2, 4, or + 8-byte signed integer constant S, respectively. They push the value S with + the generic type. + +4. `DW_OP_constu` + + `DW_OP_constu` has a single unsigned LEB128 integer operand N. It pushes the + value N with the generic type. + +5. `DW_OP_consts` + + `DW_OP_consts` has a single signed LEB128 integer operand N. It pushes the + value N with the generic type. + +6. `DW_OP_constx` + + `DW_OP_constx` has a single unsigned LEB128 integer operand that represents + a zero-based index into the `.debug_addr` section relative to the value of + the `DW_AT_addr_base` attribute of the associated compilation unit. The + value N in the `.debug_addr` section has the size of the generic type. It + pushes the value N with the generic type. + + The `DW_OP_constx` operation is provided for constants that require + link-time relocation but should not be interpreted by the consumer as a + relocatable address (for example, offsets to thread-local storage). + +7. `DW_OP_const_type` + + `DW_OP_const_type` has three operands. The first is an unsigned LEB128 + integer DR that represents the byte offset of a debugging information entry + D relative to the beginning of the current compilation unit, that provides + the type T of the constant value. The second is a 1-byte unsigned integral + constant S. The third is a block of bytes B, with a length equal to S. + + TS is the bit size of the type T. The least significant TS bits of B are + interpreted as a value V of the type D. It pushes the value V with the type + D. + + The DWARF is ill-formed if D is not a `DW_TAG_base_type` debugging + information entry in the current compilation unit, or if TS divided by 8 + (the byte size) and rounded up to a whole number is not equal to S. + + While the size of the byte block B can be inferred from the type D + definition, it is encoded explicitly into the operation so that the + operation can be parsed easily without reference to the `.debug_info` + section. + +###### A.2.5.4.3.2 Arithmetic and Logical Operations + +> NOTE: This section is the same as DWARF Version 5 section 2.5.1.4. + +###### A.2.5.4.3.3 Type Conversion Operations + +> NOTE: This section is the same as DWARF Version 5 section 2.5.1.6. + +###### A.2.5.4.3.4 Special Value Operations + +> NOTE: This section replaces parts of DWARF Version 5 sections 2.5.1.2, + 2.5.1.3, and 2.5.1.7. + +There are these special value operations currently defined: + +1. `DW_OP_regval_type` + + `DW_OP_regval_type` has two operands. The first is an unsigned LEB128 + integer that represents a register number R. The second is an unsigned + LEB128 integer DR that represents the byte offset of a debugging information + entry D relative to the beginning of the current compilation unit, that + provides the type T of the register value. + + The operation is equivalent to performing `DW_OP_regx R; DW_OP_deref_type + DR`. + + > NOTE: Should DWARF allow the type T to be a larger size than the size of + > the register R? Restricting a larger bit size avoids any issue of + > conversion as the, possibly truncated, bit contents of the register is + > simply interpreted as a value of T. If a conversion is wanted it can be + > done explicitly using a `DW_OP_convert` operation. + > + > GDB has a per register hook that allows a target specific conversion on a + > register by register basis. It defaults to truncation of bigger registers. + > Removing use of the target hook does not cause any test failures in common + > architectures. If the compiler for a target architecture did want some + > form of conversion, including a larger result type, it could always + > explicitly used the `DW_OP_convert` operation. + > + > If T is a larger type than the register size, then the default GDB + > register hook reads bytes from the next register (or reads out of bounds + > for the last register!). Removing use of the target hook does not cause + > any test failures in common architectures (except an illegal hand written + > assembly test). If a target architecture requires this behavior, these + > extensions allow a composite location description to be used to combine + > multiple registers. + +2. `DW_OP_deref` + + S is the bit size of the generic type divided by 8 (the byte size) and + rounded up to a whole number. DR is the offset of a hypothetical debug + information entry D in the current compilation unit for a base type of the + generic type. + + The operation is equivalent to performing `DW_OP_deref_type S, DR`. + +3. `DW_OP_deref_size` + + `DW_OP_deref_size` has a single 1-byte unsigned integral constant that + represents a byte result size S. + + TS is the smaller of the generic type bit size and S scaled by 8 (the byte + size). If TS is smaller than the generic type bit size then T is an unsigned + integral type of bit size TS, otherwise T is the generic type. DR is the + offset of a hypothetical debug information entry D in the current + compilation unit for a base type T. + + > NOTE: Truncating the value when S is larger than the generic type matches + > what GDB does. This allows the generic type size to not be an integral + > byte size. It does allow S to be arbitrarily large. Should S be restricted + > to the size of the generic type rounded up to a multiple of 8? + + The operation is equivalent to performing `DW_OP_deref_type S, DR`, except + if T is not the generic type, the value V pushed is zero-extended to the + generic type bit size and its type changed to the generic type. + +4. `DW_OP_deref_type` + + `DW_OP_deref_type` has two operands. The first is a 1-byte unsigned integral + constant S. The second is an unsigned LEB128 integer DR that represents the + byte offset of a debugging information entry D relative to the beginning of + the current compilation unit, that provides the type T of the result value. + + TS is the bit size of the type T. + + While the size of the pushed value V can be inferred from the type T, it + is encoded explicitly as the operand S so that the operation can be parsed + easily without reference to the `.debug_info` section. + + > NOTE: It is unclear why the operand S is needed. Unlike + > `DW_OP_const_type`, the size is not needed for parsing. Any evaluation + > needs to get the base type T to push with the value to know its encoding + > and bit size. + + It pops one stack entry that must be a location description L. + + A value V of TS bits is retrieved from the location storage LS specified by + one of the single location descriptions SL of L. + + If L, or the location description of any composite location description + part that is a subcomponent of L, has more than one single location + description, then any one of them can be selected as they are required to + all have the same value. For any single location description SL, bits are + retrieved from the associated storage location starting at the bit offset + specified by SL. For a composite location description, the retrieved bits + are the concatenation of the N bits from each composite location part PL, + where N is limited to the size of PL. + + V is pushed on the stack with the type T. + + > NOTE: This definition makes it an evaluation error if L is a register + > location description that has less than TS bits remaining in the register + > storage. Particularly since these extensions extend location descriptions + > to have a bit offset, it would be odd to define this as performing sign + > extension based on the type, or be target architecture dependent, as the + > number of remaining bits could be any number. This matches the GDB + > implementation for `DW_OP_deref_type`. + > + > These extensions define `DW_OP_*breg*` in terms of `DW_OP_regval_type`. + > `DW_OP_regval_type` is defined in terms of `DW_OP_regx`, which uses a 0 + > bit offset, and `DW_OP_deref_type`. Therefore, it requires the register + > size to be greater or equal to the address size of the address space. This + > matches the GDB implementation for `DW_OP_*breg*`. + + The DWARF is ill-formed if D is not in the current compilation unit, D is + not a `DW_TAG_base_type` debugging information entry, or if TS divided by 8 + (the byte size) and rounded up to a whole number is not equal to S. + + > NOTE: This definition allows the base type to be a bit size since there + > seems no reason to restrict it. + + It is an evaluation error if any bit of the value is retrieved from the + undefined location storage or the offset of any bit exceeds the size of the + location storage LS specified by any single location description SL of L. + + See [2.5.4.4.5 Implicit Location Description + Operations](#implicit-location-description-operations) for special + rules concerning implicit location descriptions created by the + `DW_OP_implicit_pointer` operation. + +5. `DW_OP_xderef` + + `DW_OP_xderef` pops two stack entries. The first must be an integral type + value that represents an address A. The second must be an integral type + value that represents a target architecture specific address space + identifier AS. + + The address size S is defined as the address bit size of the target + architecture specific address space that corresponds to AS. + + A is adjusted to S bits by zero extending if necessary, and then treating + the least significant S bits as an unsigned value A'. + + It creates a location description L with one memory location description SL. + SL specifies the memory location storage LS that corresponds to AS with a + bit offset equal to A' scaled by 8 (the byte size). + + If AS is an address space that is specific to context elements, then LS + corresponds to the location storage associated with the current context. + + For example, if AS is for per thread storage then LS is the location + storage for the current thread. Therefore, if L is accessed by an operation, + the location storage selected when the location description was created is + accessed, and not the location storage associated with the current context + of the access operation. + + The DWARF expression is ill-formed if AS is not one of the values defined by + the target architecture specific `DW_ASPACE_*` values. + + The operation is equivalent to popping A and AS, pushing L, and then + performing `DW_OP_deref`. The value V retrieved is left on the stack with + the generic type. + +6. `DW_OP_xderef_size` + + `DW_OP_xderef_size` has a single 1-byte unsigned integral constant that + represents a byte result size S. + + It pops two stack entries. The first must be an integral type value + that represents an address A. The second must be an integral type + value that represents a target architecture specific address space + identifier AS. + + It creates a location description L as described for `DW_OP_xderef`. + + The operation is equivalent to popping A and AS, pushing L, and then + performing `DW_OP_deref_size S` . The zero-extended value V retrieved is + left on the stack with the generic type. + +7. `DW_OP_xderef_type` + + `DW_OP_xderef_type` has two operands. The first is a 1-byte unsigned + integral constant S. The second operand is an unsigned LEB128 integer DR + that represents the byte offset of a debugging information entry D relative + to the beginning of the current compilation unit, that provides the type T + of the result value. + + It pops two stack entries. The first must be an integral type value that + represents an address A. The second must be an integral type value that + represents a target architecture specific address space identifier AS. + + It creates a location description L as described for `DW_OP_xderef`. + + The operation is equivalent to popping A and AS, pushing L, and then + performing `DW_OP_deref_type DR` . The value V retrieved is left on the + stack with the type T. + +8. `DW_OP_entry_value` Deprecated + + `DW_OP_entry_value` pushes the value of an expression that is evaluated in + the context of the calling frame. + + It may be used to determine the value of arguments on entry to the + current call frame provided they are not clobbered. + + It has two operands. The first is an unsigned LEB128 integer S. The second + is a block of bytes, with a length equal S, interpreted as a DWARF operation + expression E. + + E is evaluated with the current context, except the result kind is + unspecified, the call frame is the one that called the current frame, the + program location is the call site in the calling frame, the object is + unspecified, and the initial stack is empty. The calling frame information + is obtained by virtually unwinding the current call frame using the call + frame information (see [6.4 Call Frame + Information](#call-frame-information)). + + If the result of E is a location description L (see [2.5.4.4.4 Register + Location Description + Operations](#register-location-description-operations)), and the last + operation executed by E is a `DW_OP_reg*` for register R with a target + architecture specific base type of T, then the contents of the register are + retrieved as if a `DW_OP_deref_type DR` operation was performed where DR is + the offset of a hypothetical debug information entry in the current + compilation unit for T. The resulting value V s pushed on the stack. + + Using `DW_OP_reg*` provides a more compact form for the case where the + value was in a register on entry to the subprogram. + + > NOTE: It is unclear how this provides a more compact expression, as + > `DW_OP_regval_type` could be used which is marginally larger. + + If the result of E is a value V, then V is pushed on the stack. + + Otherwise, the DWARF expression is ill-formed. + + The `DW_OP_entry_value` operation is deprecated as its main usage is + provided by other means. DWARF Version 5 added the + `DW_TAG_call_site_parameter` debugger information entry for call sites that + has `DW_AT_call_value`, `DW_AT_call_data_location`, and + `DW_AT_call_data_value` attributes that provide DWARF expressions to compute + actual parameter values at the time of the call, and requires the producer + to ensure the expressions are valid to evaluate even when virtually + unwound. + + > NOTE: GDB only implements `DW_OP_entry_value` when E is exactly + > `DW_OP_reg*` or `DW_OP_breg*; DW_OP_deref*`. + +##### A.2.5.4.4 Location Description Operations + +This section describes the operations that push location descriptions on the +stack. + +###### A.2.5.4.4.1 General Location Description Operations + +> NOTE: This section replaces part of DWARF Version 5 section 2.5.1.3. + +1. `DW_OP_push_object_address` + + `DW_OP_push_object_address` pushes the location description L of the current + object. + + This object may correspond to an independent variable that is part of a + user presented expression that is being evaluated. The object location + description may be determined from the variable's own debugging information + entry or it may be a component of an array, structure, or class whose + address has been dynamically determined by an earlier step during user + expression evaluation. + + This operation provides explicit functionality (especially for arrays + involving descriptors) that is analogous to the implicit push of the base + location description of a structure prior to evaluation of a + `DW_AT_data_member_location` to access a data member of a structure. + + > NOTE: This operation could be removed and the object location description + > specified as the initial stack as for `DW_AT_data_member_location`. + > + > Or this operation could be used instead of needing to specify an initial + > stack. The latter approach is more composable as access to the object may + > be needed at any point of the expression, and passing it as the initial + > stack requires the entire expression to be aware where on the stack it is. + > If this were done, ``DW_AT_use_location`` would require a + > ``DW_OP_push_object2_address`` operation for the second object. + > + > Or a more general way to pass an arbitrary number of arguments in and an + > operation to get the Nth one such as ``DW_OP_arg N``. A vector of + > arguments would then be passed in the expression context rather than an + > initial stack. This could also resolve the issues with ``DW_OP_call*`` by + > allowing a specific number of arguments passed in and returned to be + > specified. The ``DW_OP_call*`` operation could then always execute on a + > separate stack: the number of arguments would be specified in a new call + > operation and taken from the callers stack, and similarly the number of + > return results specified and copied from the called stack back to the + > callee stack when the called expression was complete. + > + > The only attribute that specifies a current object is + > `DW_AT_data_location` so the non-normative text seems to overstate how + > this is being used. Or are there other attributes that need to state they + > pass an object? + +###### A.2.5.4.4.2 Undefined Location Description Operations + +> NOTE: This section replaces DWARF Version 5 section 2.6.1.1.1. + +The undefined location storage represents a piece or all of an object that is +present in the source but not in the object code (perhaps due to optimization). +Neither reading nor writing to the undefined location storage is meaningful. + +An undefined location description specifies the undefined location storage. +There is no concept of the size of the undefined location storage, nor of a bit +offset for an undefined location description. The `DW_OP_*piece` operations can +implicitly specify an undefined location description, allowing any size and +offset to be specified, and results in a part with all undefined bits. + +###### A.2.5.4.4.3 Memory Location Description Operations + +> NOTE: This section replaces parts of DWARF Version 5 section 2.5.1.1, 2.5.1.2, +> 2.5.1.3, and 2.6.1.1.2. + +Each of the target architecture specific address spaces has a corresponding +memory location storage that denotes the linear addressable memory of that +address space. The size of each memory location storage corresponds to the range +of the addresses in the corresponding address space. + +It is target architecture defined how address space location storage maps to +target architecture physical memory. For example, they may be independent +memory, or more than one location storage may alias the same physical memory +possibly at different offsets and with different interleaving. The mapping may +also be dictated by the source language address classes. + +A memory location description specifies a memory location storage. The bit +offset corresponds to a bit position within a byte of the memory. Bits accessed +using a memory location description, access the corresponding target +architecture memory starting at the bit position within the byte specified by +the bit offset. + +A memory location description that has a bit offset that is a multiple of 8 (the +byte size) is defined to be a byte address memory location description. It has a +memory byte address A that is equal to the bit offset divided by 8. + +A memory location description that does not have a bit offset that is a multiple +of 8 (the byte size) is defined to be a bit field memory location description. +It has a bit position B equal to the bit offset modulo 8, and a memory byte +address A equal to the bit offset minus B that is then divided by 8. + +The address space AS of a memory location description is defined to be the +address space that corresponds to the memory location storage associated with +the memory location description. + +A location description that is comprised of one byte address memory location +description SL is defined to be a memory byte address location description. It +has a byte address equal to A and an address space equal to AS of the +corresponding SL. + +`DW_ASPACE_none` is defined as the target architecture default address space. + +If a stack entry is required to be a location description, but it is a value V +with the generic type, then it is implicitly converted to a location description +L with one memory location description SL. SL specifies the memory location +storage that corresponds to the target architecture default address space with a +bit offset equal to V scaled by 8 (the byte size). + +> NOTE: If it is wanted to allow any integral type value to be implicitly +> converted to a memory location description in the target architecture default +> address space: +> +> > If a stack entry is required to be a location description, but is a value V +> > with an integral type, then it is implicitly converted to a location +> > description L with a one memory location description SL. If the type size of +> > V is less than the generic type size, then the value V is zero extended to +> > the size of the generic type. The least significant generic type size bits +> > are treated as an unsigned value to be used as an address A. SL specifies +> > memory location storage corresponding to the target architecture default +> > address space with a bit offset equal to A scaled by 8 (the byte size). +> +> The implicit conversion could also be defined as target architecture specific. +> For example, GDB checks if V is an integral type. If it is not it gives an +> error. Otherwise, GDB zero-extends V to 64 bits. If the GDB target defines a +> hook function, then it is called. The target specific hook function can modify +> the 64-bit value, possibly sign extending based on the original value type. +> Finally, GDB treats the 64-bit value V as a memory location address. + +If a stack entry is required to be a location description, but it is an implicit +pointer value IPV with the target architecture default address space, then it is +implicitly converted to a location description with one single location +description specified by IPV. See [2.5.4.4.5 Implicit Location Description +Operations](#implicit-location-description-operations). + +If a stack entry is required to be a value, but it is a location description L +with one memory location description SL in the target architecture default +address space with a bit offset B that is a multiple of 8, then it is implicitly +converted to a value equal to B divided by 8 (the byte size) with the generic +type. + +1. `DW_OP_addr` + + `DW_OP_addr` has a single byte constant value operand, which has the size of + the generic type, that represents an address A. + + It pushes a location description L with one memory location description SL + on the stack. SL specifies the memory location storage corresponding to the + target architecture default address space with a bit offset equal to A + scaled by 8 (the byte size). + + If the DWARF is part of a code object, then A may need to be relocated. + For example, in the ELF code object format, A must be adjusted by the + difference between the ELF segment virtual address and the virtual address + at which the segment is loaded. + +2. `DW_OP_addrx` + + `DW_OP_addrx` has a single unsigned LEB128 integer operand that represents a + zero-based index into the `.debug_addr` section relative to the value of the + `DW_AT_addr_base` attribute of the associated compilation unit. The address + value A in the `.debug_addr` section has the size of the generic type. + + It pushes a location description L with one memory location description SL + on the stack. SL specifies the memory location storage corresponding to the + target architecture default address space with a bit offset equal to A + scaled by 8 (the byte size). + + If the DWARF is part of a code object, then A may need to be relocated. + For example, in the ELF code object format, A must be adjusted by the + difference between the ELF segment virtual address and the virtual address + at which the segment is loaded. + +3. `DW_OP_form_tls_address` + + `DW_OP_form_tls_address` pops one stack entry that must be an integral type + value and treats it as a thread-local storage address TA. + + It pushes a location description L with one memory location description SL + on the stack. SL is the target architecture specific memory location + description that corresponds to the thread-local storage address TA. + + The meaning of the thread-local storage address TA is defined by the + run-time environment. If the run-time environment supports multiple + thread-local storage blocks for a single thread, then the block + corresponding to the executable or shared library containing this DWARF + expression is used. + + Some implementations of C, C++, Fortran, and other languages support a + thread-local storage class. Variables with this storage class have distinct + values and addresses in distinct threads, much as automatic variables have + distinct values and addresses in each subprogram invocation. Typically, + there is a single block of storage containing all thread-local variables + declared in the main executable, and a separate block for the variables + declared in each shared library. Each thread-local variable can then be + accessed in its block using an identifier. This identifier is typically a + byte offset into the block and pushed onto the DWARF stack by one of the + `DW_OP_const*` operations prior to the `DW_OP_form_tls_address` operation. + Computing the address of the appropriate block can be complex (in some + cases, the compiler emits a function call to do it), and difficult to + describe using ordinary DWARF location descriptions. Instead of forcing + complex thread-local storage calculations into the DWARF expressions, the + `DW_OP_form_tls_address` allows the consumer to perform the computation + based on the target architecture specific run-time environment. + +4. `DW_OP_call_frame_cfa` + + `DW_OP_call_frame_cfa` pushes the location description L of the Canonical + Frame Address (CFA) of the current subprogram, obtained from the call frame + information on the stack. See [6.4 Call Frame + Information](#call-frame-information). + + Although the value of the `DW_AT_frame_base` attribute of the debugger + information entry corresponding to the current subprogram can be computed + using a location list expression, in some cases this would require an + extensive location list because the values of the registers used in + computing the CFA change during a subprogram execution. If the call frame + information is present, then it already encodes such changes, and it is + space efficient to reference that using the `DW_OP_call_frame_cfa` + operation. + +5. `DW_OP_fbreg` + + `DW_OP_fbreg` has a single signed LEB128 integer operand that represents a + byte displacement B. + + The location description L for the frame base of the current + subprogram is obtained from the `DW_AT_frame_base` attribute of the debugger + information entry corresponding to the current subprogram as described in + [3.3.5 Low-Level Information](#low-level-information). + + The location description L is updated by bit offset B scaled by 8 (the byte + size) and pushed on the stack. + +6. `DW_OP_breg0`, `DW_OP_breg1`, ..., `DW_OP_breg31` + + The `DW_OP_breg` operations encode the numbers of up to 32 registers, + numbered from 0 through 31, inclusive. The register number R corresponds to + the N in the operation name. + + They have a single signed LEB128 integer operand that represents a byte + displacement B. + + The address space identifier AS is defined as the one corresponding to the + target architecture specific default address space. + + The address size S is defined as the address bit size of the target + architecture specific address space corresponding to AS. + + The contents of the register specified by R are retrieved as if a + `DW_OP_regval_type R, DR` operation was performed where DR is the offset of + a hypothetical debug information entry in the current compilation unit for + an unsigned integral base type of size S bits. B is added and the least + significant S bits are treated as an unsigned value to be used as an address + A. + + They push a location description L comprising one memory location + description LS on the stack. LS specifies the memory location storage that + corresponds to AS with a bit offset equal to A scaled by 8 (the byte size). + +7. `DW_OP_bregx` + + `DW_OP_bregx` has two operands. The first is an unsigned LEB128 integer that + represents a register number R. The second is a signed LEB128 integer that + represents a byte displacement B. + + The action is the same as for `DW_OP_breg`, except that R is used as the + register number and B is used as the byte displacement. + +###### A.2.5.4.4.4 Register Location Description Operations + +> NOTE: This section replaces DWARF Version 5 section 2.6.1.1.3. + +There is a register location storage that corresponds to each of the target +architecture registers. The size of each register location storage corresponds +to the size of the corresponding target architecture register. + +A register location description specifies a register location storage. The bit +offset corresponds to a bit position within the register. Bits accessed using a +register location description access the corresponding target architecture +register starting at the specified bit offset. + +1. `DW_OP_reg0`, `DW_OP_reg1`, ..., `DW_OP_reg31` + + `DW_OP_reg` operations encode the numbers of up to 32 registers, numbered + from 0 through 31, inclusive. The target architecture register number R + corresponds to the N in the operation name. + + The operation is equivalent to performing `DW_OP_regx R`. + +2. `DW_OP_regx` + + `DW_OP_regx` has a single unsigned LEB128 integer operand that represents a + target architecture register number R. + + If the current call frame is the top call frame, it pushes a location + description L that specifies one register location description SL on the + stack. SL specifies the register location storage that corresponds to R with + a bit offset of 0 for the current thread. + + If the current call frame is not the top call frame, call frame information + (see [6.4 Call Frame Information](#call-frame-information)) is used to + determine the location description that holds the register for the current + call frame and current program location of the current thread. The resulting + location description L is pushed. + + Note that if call frame information is used, the resulting location + description may be register, memory, or undefined. + + An implementation may evaluate the call frame information immediately, or + may defer evaluation until L is accessed by an operation. If evaluation is + deferred, R and the current context can be recorded in L. When accessed, the + recorded context is used to evaluate the call frame information, not the + current context of the access operation. + +These operations obtain a register location. To fetch the contents of a +register, it is necessary to use `DW_OP_regval_type`, use one of the +`DW_OP_breg*` register-based addressing operations, or use `DW_OP_deref*` on a +register location description. + +###### A.2.5.4.4.5 Implicit Location Description Operations + +> NOTE: This section replaces DWARF Version 5 section 2.6.1.1.4. + +Implicit location storage represents a piece or all of an object which has no +actual location in the program but whose contents are nonetheless known, either +as a constant or can be computed from other locations and values in the program. + +An implicit location description specifies an implicit location storage. The bit +offset corresponds to a bit position within the implicit location storage. Bits +accessed using an implicit location description, access the corresponding +implicit storage value starting at the bit offset. + +1. `DW_OP_implicit_value` + + `DW_OP_implicit_value` has two operands. The first is an unsigned LEB128 + integer that represents a byte size S. The second is a block of bytes with a + length equal to S treated as a literal value V. + + An implicit location storage LS is created with the literal value V and a + size of S. + + It pushes location description L with one implicit location description SL + on the stack. SL specifies LS with a bit offset of 0. + +2. `DW_OP_stack_value` + + `DW_OP_stack_value` pops one stack entry that must be a value V. + + An implicit location storage LS is created with the literal value V using + the size, encoding, and enianity specified by V's base type. + + It pushes a location description L with one implicit location description SL + on the stack. SL specifies LS with a bit offset of 0. + + The `DW_OP_stack_value` operation specifies that the object does not + exist in memory, but its value is nonetheless known. In this form, the + location description specifies the actual value of the object, rather than + specifying the memory or register storage that holds the value. + + See [2.5.4.4.5 Implicit Location Description + Operations](#implicit-location-description-operations) for special + rules concerning implicit pointer values produced by dereferencing implicit + location descriptions created by the `DW_OP_implicit_pointer` operation. + + > NOTE: Since location descriptions are allowed on the stack, the + > `DW_OP_stack_value` operation no longer terminates the DWARF operation + > expression execution as in DWARF Version 5. + +3. `DW_OP_implicit_pointer` + + An optimizing compiler may eliminate a pointer, while still retaining the + value that the pointer addressed. `DW_OP_implicit_pointer` allows a producer + to describe this value. + + `DW_OP_implicit_pointer` specifies an object is a pointer to the target + architecture default address space that cannot be represented as a real + pointer, even though the value it would point to can be described. In this + form, the location description specifies a debugging information entry that + represents the actual location description of the object to which the + pointer would point. Thus, a consumer of the debug information would be able + to access the dereferenced pointer, even when it cannot access the pointer + itself. + + `DW_OP_implicit_pointer` has two operands. The first operand is a 4-byte + unsigned value in the 32-bit DWARF format, or an 8-byte unsigned value in + the 64-bit DWARF format, that represents the byte offset DR of a debugging + information entry D relative to the beginning of the `.debug_info` section + that contains the current compilation unit. The second operand is a signed + LEB128 integer that represents a byte displacement B. + + Note that D may not be in the current compilation unit. + + The first operand interpretation is exactly like that for + `DW_FORM_ref_addr`. + + The address space identifier AS is defined as the one corresponding to the + target architecture specific default address space. + + The address size S is defined as the address bit size of the target + architecture specific address space corresponding to AS. + + An implicit location storage LS is created with the debugging information + entry D, address space AS, and size of S. + + It pushes a location description L that comprises one implicit location + description SL on the stack. SL specifies LS with a bit offset of 0. + + It is an evaluation error if a `DW_OP_deref*` operation pops a location + description L', and retrieves S bits, such that any retrieved bits come from + an implicit location storage that is the same as LS, unless both the + following conditions are met: + + 1. All retrieved bits come from an implicit location description that + refers to an implicit location storage that is the same as LS. + + Note that all bits do not have to come from the same implicit + location description, as L' may involve composite location + descriptors. + + 2. The bits come from consecutive ascending offsets within their respective + implicit location storage. + + These rules are equivalent to retrieving the complete contents of LS. + + If both the above conditions are met, then the value V pushed by the + `DW_OP_deref*` operation is an implicit pointer value IPV with a target + architecture specific address space of AS, a debugging information entry of + D, and a base type of T. If AS is the target architecture default address + space, then T is the generic type. Otherwise, T is a target architecture + specific integral type with a bit size equal to S. + + If IPV is either implicitly converted to a location description (only done + if AS is the target architecture default address space), then the resulting + location description RL is: + + - If D has a `DW_AT_location` attribute, the DWARF expression E from the + `DW_AT_location` attribute is evaluated with the current context, except + that the result kind is a location description, the compilation unit is + the one that contains D, the object is unspecified, and the initial stack + is empty. RL is the expression result. + + Note that E is evaluated with the context of the expression accessing + IPV, and not the context of the expression that contained the + `DW_OP_implicit_pointer` operation that created L. + + - If D has a `DW_AT_const_value` attribute, then an implicit location + storage RLS is created from the `DW_AT_const_value` attribute's value with + a size matching the size of the `DW_AT_const_value` attribute's value. RL + comprises one implicit location description SRL. SRL specifies RLS with a + bit offset of 0. + + > NOTE: If using `DW_AT_const_value` for variables and formal parameters + > is deprecated and instead `DW_AT_location` is used with an implicit + > location description, then this rule would not be required. + + - Otherwise, it is an evaluation error. + + The location description RL is updated by bit offset B scaled by 8 (the byte + size). + + If a `DW_OP_stack_value` operation pops a value that is the same as IPV, + then it pushes a location description that is the same as L. + + It is an evaluation error if LS or IPV is accessed in any other manner. + + The restrictions on how an implicit pointer location description created + by `DW_OP_implicit_pointer` can be used are to simplify the DWARF consumer. + Similarly, for an implicit pointer value created by `DW_OP_deref*` and + `DW_OP_stack_value`. + +Typically a `DW_OP_implicit_pointer` operation is used in a DWARF expression +E1 of a `DW_TAG_variable` or `DW_TAG_formal_parameter` debugging +information entry D1's `DW_AT_location` attribute. The debugging +information entry referenced by the `DW_OP_implicit_pointer` operation is +typically itself a `DW_TAG_variable` or `DW_TAG_formal_parameter` debugging +information entry D2 whose `DW_AT_location` attribute gives a second +DWARF expression E2. + +D1 and E1 are describing the location of a pointer type +object. D2 and E2 are describing the location of the +object pointed to by that pointer object. + +However, D2 may be any debugging information entry that contains a +`DW_AT_location` or `DW_AT_const_value` attribute (for example, +`DW_TAG_dwarf_procedure`). By using E2, a consumer can reconstruct +the value of the object when asked to dereference the pointer described by +E1 which contains the `DW_OP_implicit_pointer` operation. + +###### A.2.5.4.4.6 Composite Location Description Operations + +> NOTE: This section replaces DWARF Version 5 section 2.6.1.2. + +A composite location storage represents an object or value which may be +contained in part of another location storage or contained in parts of more than +one location storage. + +Each part has a part location description L and a part bit size S. L can have +one or more single location descriptions SL. If there are more than one SL then +that indicates that part is located in more than one place. The bits of each +place of the part comprise S contiguous bits from the location storage LS +specified by SL starting at the bit offset specified by SL. All the bits must be +within the size of LS or the DWARF expression is ill-formed. + +A composite location storage can have zero or more parts. The parts are +contiguous such that the zero-based location storage bit index will range over +each part with no gaps between them. Therefore, the size of a composite location +storage is the sum of the size of its parts. The DWARF expression is ill-formed +if the size of the contiguous location storage is larger than the size of the +memory location storage corresponding to the largest target architecture +specific address space. + +A composite location description specifies a composite location storage. The bit +offset corresponds to a bit position within the composite location storage. + +There are operations that create a composite location storage. + +There are other operations that allow a composite location storage to be +incrementally created. Each part is created by a separate operation. There may +be one or more operations to create the final composite location storage. A +series of such operations describes the parts of the composite location storage +that are in the order that the associated part operations are executed. + +To support incremental creation, a composite location storage can be in an +incomplete state. When an incremental operation operates on an incomplete +composite location storage, it adds a new part. + +A composite location description that specifies a composite location storage +that is incomplete is termed an incomplete composite location description. A +composite location description that specifies a composite location storage that +is complete is termed a complete composite location description. + +If the top stack entry is a location description that has one incomplete +composite location description SL after the execution of an operation expression +has completed, SL is converted to a complete composite location description. + +Note that this conversion does not happen after the completion of an +operation expression that is evaluated on the same stack by the `DW_OP_call*` +operations. Such executions are not a separate evaluation of an operation +expression, but rather the continued evaluation of the same operation expression +that contains the `DW_OP_call*` operation. + +If a stack entry is required to be a location description L, but L has an +incomplete composite location description, then the DWARF expression is +ill-formed. The exception is for the operations involved in incrementally +creating a composite location description as described below. + +Note that a DWARF operation expression may arbitrarily compose composite +location descriptions from any other location description, including those that +have multiple single location descriptions, and those that have composite +location descriptions. + +The incremental composite location description operations are defined to be +compatible with the definitions in DWARF Version 5. + +1. `DW_OP_piece` + + `DW_OP_piece` has a single unsigned LEB128 integer that represents a byte + size S. + + The action is based on the context: + + - If the stack is empty, then a location description L comprised of one + incomplete composite location description SL is pushed on the stack. + + An incomplete composite location storage LS is created with a single part + P. P specifies a location description PL and has a bit size of S scaled by + 8 (the byte size). PL is comprised of one undefined location description + PSL. + + SL specifies LS with a bit offset of 0. + + - Otherwise, if the top stack entry is a location description L comprised of + one incomplete composite location description SL, then the incomplete + composite location storage LS that SL specifies is updated to append a new + part P. P specifies a location description PL and has a bit size of S + scaled by 8 (the byte size). PL is comprised of one undefined location + description PSL. L is left on the stack. + - Otherwise, if the top stack entry is a location description or can be + converted to one, then it is popped and treated as a part location + description PL. Then: + + - If the top stack entry (after popping PL) is a location description L + comprised of one incomplete composite location description SL, then the + incomplete composite location storage LS that SL specifies is updated to + append a new part P. P specifies the location description PL and has a + bit size of S scaled by 8 (the byte size). L is left on the stack. + - Otherwise, a location description L comprised of one + incomplete composite location description SL is pushed on + the stack. + + An incomplete composite location storage LS is created with a single + part P. P specifies the location description PL and has a bit size of S + scaled by 8 (the byte size). + + SL specifies LS with a bit offset of 0. + + - Otherwise, the DWARF expression is ill-formed + + Many compilers store a single variable in sets of registers or store a + variable partially in memory and partially in registers. `DW_OP_piece` + provides a way of describing where a part of a variable is located. + + The evaluation rules for the `DW_OP_piece` operation allow it to be + compatible with the DWARF Version 5 definition. + + > NOTE: Since these extensions allow location descriptions to be entries on + > the stack, a simpler operation to create composite location descriptions + > could be defined. For example, just one operation that specifies how many + > parts, and pops pairs of stack entries for the part size and location + > description. Not only would this be a simpler operation and avoid the + > complexities of incomplete composite location descriptions, but it may + > also have a smaller encoding in practice. However, the desire for + > compatibility with DWARF Version 5 is likely a stronger consideration. + +2. `DW_OP_bit_piece` + + `DW_OP_bit_piece` has two operands. The first is an unsigned LEB128 integer + that represents the part bit size S. The second is an unsigned LEB128 + integer that represents a bit displacement B. + + The action is the same as for `DW_OP_piece`, except that any part created + has the bit size S, and the location description PL of any created part is + updated by a bit offset B. + + `DW_OP_bit_piece` is used instead of `DW_OP_piece` when the piece to be + assembled is not byte-sized or is not at the start of the part location + description. + +#### A.2.5.5 DWARF Location List Expressions + +> NOTE: This section replaces DWARF Version 5 section 2.6.2. + +To meet the needs of recent computer architectures and optimization +techniques, debugging information must be able to describe the location of an +object whose location changes over the object's lifetime, and may reside at +multiple locations during parts of an object's lifetime. Location list +expressions are used in place of operation expressions whenever the object whose +location is being described has these requirements. + +A location list expression consists of a series of location list entries. Each +location list entry is one of the following kinds: + +1. Bounded location description + + This kind of location list entry provides an operation expression that + evaluates to the location description of an object that is valid over a + lifetime bounded by a starting and ending address. The starting address is + the lowest address of the address range over which the location is valid. + The ending address is the address of the first location past the highest + address of the address range. + + The location list entry matches when the current program location is within + the given range. + + There are several kinds of bounded location description entries which differ + in the way that they specify the starting and ending addresses. + +2. Default location description + + This kind of location list entry provides an operation expression that + evaluates to the location description of an object that is valid when no + bounded location description entry applies. + + The location list entry matches when the current program location is not + within the range of any bounded location description entry. + +3. Base address + + This kind of location list entry provides an address to be used as the base + address for beginning and ending address offsets given in certain kinds of + bounded location description entries. The applicable base address of a + bounded location description entry is the address specified by the closest + preceding base address entry in the same location list. If there is no + preceding base address entry, then the applicable base address defaults to + the base address of the compilation unit (see DWARF Version 5 section + 3.1.1). + + In the case of a compilation unit where all of the machine code is contained + in a single contiguous section, no base address entry is needed. + +4. End-of-list + + This kind of location list entry marks the end of the location list + expression. + +The address ranges defined by the bounded location description entries of a +location list expression may overlap. When they do, they describe a situation in +which an object exists simultaneously in more than one place. + +If all of the address ranges in a given location list expression do not +collectively cover the entire range over which the object in question is +defined, and there is no following default location description entry, it is +assumed that the object is not available for the portion of the range that is +not covered. + +The result of the evaluation of a DWARF location list expression is: + +- If the current program location is not specified, then it is an evaluation + error. + + > NOTE: If the location list only has a single default entry, should that be + > considered a match if there is no program location? If there are non-default + > entries then it seems it has to be an evaluation error when there is no + > program location as that indicates the location depends on the program + > location which is not known. + +- If there are no matching location list entries, then the result is a location + description that comprises one undefined location description. +- Otherwise, the operation expression E of each matching location list entry is + evaluated with the current context, except that the result kind is a location + description, the object is unspecified, and the initial stack is empty. The + location list entry result is the location description returned by the + evaluation of E. + + The result is a location description that is comprised of the union of the + single location descriptions of the location description result of each + matching location list entry. + +A location list expression can only be used as the value of a debugger +information entry attribute that is encoded using class `loclist` or +`loclistsptr` (see [7.5.5 Classes and Forms](#classes-and-forms)). The value of +the attribute provides an index into a separate object file section called +`.debug_loclists` or `.debug_loclists.dwo` (for split DWARF object files) that +contains the location list entries. + +A `DW_OP_call*` and `DW_OP_implicit_pointer` operation can be used to specify a +debugger information entry attribute that has a location list expression. +Several debugger information entry attributes allow DWARF expressions that are +evaluated with an initial stack that includes a location description that may +originate from the evaluation of a location list expression. + +This location list representation, the `loclist` and `loclistsptr` class, and +the related `DW_AT_loclists_base` attribute are new in DWARF Version 5. Together +they eliminate most, or all of the code object relocations previously needed for +location list expressions. + +> NOTE: The rest of this section is the same as DWARF Version 5 section 2.6.2. + +## A.3 Program Scope Entries + +> NOTE: This section provides changes to existing debugger information entry +> attributes. These would be incorporated into the corresponding DWARF Version 5 +> chapter 3 sections. + +### A.3.3 Subroutine and Entry Point Entries + +#### A.3.3.5 Low-Level Information + +1. A `DW_TAG_subprogram`, `DW_TAG_inlined_subroutine`, or `DW_TAG_entry_point` + debugger information entry may have a `DW_AT_return_addr` attribute, whose + value is a DWARF expression E. + + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a location description, an unspecified object, the + compilation unit that contains E, an empty initial stack, and other context + elements corresponding to the source language thread of execution upon which + the user is focused, if any. The result of the evaluation is the location + description L of the place where the return address for the current call + frame's subprogram or entry point is stored. + + The DWARF is ill-formed if L is not comprised of one memory location + description for one of the target architecture specific address spaces. + + > NOTE: It is unclear why `DW_TAG_inlined_subroutine` has a + > `DW_AT_return_addr` attribute but not a `DW_AT_frame_base` or + > `DW_AT_static_link` attribute. Seems it would either have all of them or + > none. Since inlined subprograms do not have a call frame it seems they + > would have none of these attributes. + +2. A `DW_TAG_subprogram` or `DW_TAG_entry_point` debugger information entry may + have a `DW_AT_frame_base` attribute, whose value is a DWARF expression E. + + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a location description, an unspecified object, the + compilation unit that contains E, an empty initial stack, and other context + elements corresponding to the source language thread of execution upon which + the user is focused, if any. + + The DWARF is ill-formed if E contains an `DW_OP_fbreg` operation, or the + resulting location description L is not comprised of one single location + description SL. + + If SL is a register location description for register R, then L is replaced + with the result of evaluating a `DW_OP_bregx R, 0` operation. This computes + the frame base memory location description in the target architecture + default address space. + + This allows the more compact `DW_OP_reg*` to be used instead of + `DW_OP_breg* 0`. + + > NOTE: This rule could be removed and require the producer to create the + > required location description directly using `DW_OP_call_frame_cfa` or + > `DW_OP_breg*`. This would also then allow a target to implement the call + > frames within a large register. + + Otherwise, the DWARF is ill-formed if SL is not a memory location + description in any of the target architecture specific address spaces. + + The resulting L is the frame base for the subprogram or entry point. + + Typically, E will use the `DW_OP_call_frame_cfa` operation or be a stack + pointer register plus or minus some offset. + +3. If a `DW_TAG_subprogram` or `DW_TAG_entry_point` debugger information entry + is lexically nested, it may have a `DW_AT_static_link` attribute, whose + value is a DWARF expression E. + + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a location description, an unspecified object, the + compilation unit that contains E, an empty initial stack, and other context + elements corresponding to the source language thread of execution upon which + the user is focused, if any. The result of the evaluation is the location + description L of the canonical frame address (see [6.4 Call Frame + Information](#call-frame-information)) of the relevant call frame of the + subprogram instance that immediately lexically encloses the current call + frame's subprogram or entry point. + + The DWARF is ill-formed if L is is not comprised of one memory location + description for one of the target architecture specific address spaces. + +### A.3.4 Call Site Entries and Parameters + +#### A.3.4.2 Call Site Parameters + +1. A `DW_TAG_call_site_parameter` debugger information entry may have a + `DW_AT_call_value` attribute, whose value is a DWARF operation expression + E1. + + The result of the `DW_AT_call_value` attribute is obtained by evaluating + E1 with a context that has a result kind of a value, an unspecified + object, the compilation unit that contains E, an empty initial stack, and other + context elements corresponding to the source language thread of execution upon + which the user is focused, if any. The resulting value V1 is the + value of the parameter at the time of the call made by the call site. + + For parameters passed by reference, where the code passes a pointer to a + location which contains the parameter, or for reference type parameters, the + `DW_TAG_call_site_parameter` debugger information entry may also have a + `DW_AT_call_data_location` attribute whose value is a DWARF operation expression + E2, and a `DW_AT_call_data_value` attribute whose value is a DWARF + operation expression E3. + + The value of the `DW_AT_call_data_location` attribute is obtained by evaluating + E2 with a context that has a result kind of a location description, + an unspecified object, the compilation unit that contains E, an empty initial + stack, and other context elements corresponding to the source language thread of + execution upon which the user is focused, if any. The resulting location + description L2 is the location where the referenced parameter lives + during the call made by the call site. If E2 would just be a + `DW_OP_push_object_address`, then the `DW_AT_call_data_location` attribute may + be omitted. + + > NOTE: The DWARF Version 5 implies that `DW_OP_push_object_address` may be + > used but does not state what object must be specified in the context. + > Either `DW_OP_push_object_address` cannot be used, or the object to be + > passed in the context must be defined. + + The value of the `DW_AT_call_data_value` attribute is obtained by evaluating + E3 with a context that has a result kind of a value, an unspecified + object, the compilation unit that contains E, an empty initial stack, and other + context elements corresponding to the source language thread of execution upon + which the user is focused, if any. The resulting value V3 is the + value in L2 at the time of the call made by the call site. + + The result of these attributes is undefined if the current call frame is not for + the subprogram containing the `DW_TAG_call_site_parameter` debugger information + entry or the current program location is not for the call site containing the + `DW_TAG_call_site_parameter` debugger information entry in the current call + frame. + + The consumer may have to virtually unwind to the call site (see [6.4 Call + Frame Information](#call-frame-information)) in order to evaluate these + attributes. This will ensure the source language thread of execution upon which + the user is focused corresponds to the call site needed to evaluate the + expression. + + If it is not possible to avoid the expressions of these attributes from + accessing registers or memory locations that might be clobbered by the + subprogram being called by the call site, then the associated attribute should + not be provided. + + The reason for the restriction is that the parameter may need to be accessed + during the execution of the callee. The consumer may virtually unwind from the + called subprogram back to the caller and then evaluate the attribute + expressions. The call frame information (see [6.4 Call Frame + Information](#call-frame-information)) will not be able to restore registers + that have been clobbered, and clobbered memory will no longer have the value at + the time of the call. + +### A.3.5 Lexical Block Entries + +> NOTE: This section is the same as DWARF Version 5 section 3.5. + +## A.4 Data Object and Object List Entries + +> NOTE: This section provides changes to existing debugger information entry +> attributes. These would be incorporated into the corresponding DWARF Version 5 +> chapter 4 sections. + +### A.4.1 Data Object Entries + +1. Any debugging information entry describing a data object (which includes + variables and parameters) or common blocks may have a `DW_AT_location` + attribute, whose value is a DWARF expression E. + + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a location description, an unspecified object, the + compilation unit that contains E, an empty initial stack, and other context + elements corresponding to the source language thread of execution upon which + the user is focused, if any. The result of the evaluation is the location + description of the base of the data object. + + See [2.5.4.2 Control Flow Operations](#control-flow-operations) for special + evaluation rules used by the `DW_OP_call*` operations. + + > NOTE: Delete the description of how the `DW_OP_call*` operations evaluate + > a `DW_AT_location` attribute as that is now described in the operations. + + > NOTE: See the discussion about the `DW_AT_location` attribute in the + > `DW_OP_call*` operation. Having each attribute only have a single purpose + > and single execution semantics seems desirable. It makes it easier for the + > consumer that no longer have to track the context. It makes it easier for + > the producer as it can rely on a single semantics for each attribute. + > + > For that reason, limiting the `DW_AT_location` attribute to only + > supporting evaluating the location description of an object, and using a + > different attribute and encoding class for the evaluation of DWARF + > expression procedures on the same operation expression stack seems + > desirable. + +2. `DW_AT_const_value` + + > NOTE: Could deprecate using the `DW_AT_const_value` attribute for + > `DW_TAG_variable` or `DW_TAG_formal_parameter` debugger information + > entries that have been optimized to a constant. Instead, `DW_AT_location` + > could be used with a DWARF expression that produces an implicit location + > description now that any location description can be used within a DWARF + > expression. This allows the `DW_OP_call*` operations to be used to push + > the location description of any variable regardless of how it is + > optimized. + +## A.5 Type Entries + +> NOTE: This section provides changes to existing debugger information entry +> attributes. These would be incorporated into the corresponding DWARF Version 5 +> chapter 5 sections. + +### A.5.7 Structure, Union, Class and Interface Type Entries + +#### A.5.7.3 Derived or Extended Structures, Classes and Interfaces + +1. For a `DW_AT_data_member_location` attribute there are two cases: + + 1. If the attribute is an integer constant B, it provides the offset in + bytes from the beginning of the containing entity. + + The result of the attribute is obtained by updating the bit offset of + the location description of the beginning of the containing entity by B + scaled by 8 (the byte size). The result is the location description of + the base of the member entry. + + If the beginning of the containing entity is not byte aligned, then + the beginning of the member entry has the same bit displacement within a + byte. + + 2. Otherwise, the attribute must be a DWARF expression E which is evaluated + with a context that has a result kind of a location description, an + unspecified object, the compilation unit that contains E, an initial + stack comprising the location description of the beginning of the + containing entity, and other context elements corresponding to the + source language thread of execution upon which the user is focused, if + any. The result of the evaluation is the location description of the + base of the member entry. + + > NOTE: The beginning of the containing entity can now be any location + > description, including those with more than one single location + > description, and those with single location descriptions that are of any + > kind and have any bit offset. + +#### A.5.7.8 Member Function Entries + +1. An entry for a virtual function also has a `DW_AT_vtable_elem_location` + attribute whose value is a DWARF expression E. + + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a location description, an unspecified object, the + compilation unit that contains E, an initial stack comprising the location + description of the object of the enclosing type, and other context elements + corresponding to the source language thread of execution upon which the user + is focused, if any. The result of the evaluation is the location description + of the slot for the function within the virtual function table for the + enclosing class. + +### A.5.14 Pointer to Member Type Entries + +1. The `DW_TAG_ptr_to_member_type` debugging information entry has a + `DW_AT_use_location` attribute whose value is a DWARF expression E. It is used + to compute the location description of the member of the class to which the + pointer to member entry points. + + The method used to find the location description of a given member of a + class, structure, or union is common to any instance of that class, structure, + or union and to any instance of the pointer to member type. The method is thus + associated with the pointer to member type, rather than with each object that + has a pointer to member type. + + The `DW_AT_use_location` DWARF expression is used in conjunction with the + location description for a particular object of the given pointer to member type + and for a particular structure or class instance. + + The result of the attribute is obtained by evaluating E with a context that has + a result kind of a location description, an unspecified object, the compilation + unit that contains E, an initial stack comprising two entries, and other context + elements corresponding to the source language thread of execution upon which the + user is focused, if any. The first stack entry is the value of the pointer to + member object itself. The second stack entry is the location description of the + base of the entire class, structure, or union instance containing the member + whose location is being calculated. The result of the evaluation is the location + description of the member of the class to which the pointer to member entry + points. + +### A.5.16 Dynamic Type Entries + +1. The `DW_AT_data_location` attribute may be used with any type that provides one + or more levels of hidden indirection and/or run-time parameters in its + representation. Its value is a DWARF operation expression E which computes the + location description of the data for an object. When this attribute is omitted, + the location description of the data is the same as the location description of + the object. + + The result of the attribute is obtained by evaluating E with a context that has + a result kind of a location description, an object that is the location + description of the data descriptor, the compilation unit that contains E, an + empty initial stack, and other context elements corresponding to the source + language thread of execution upon which the user is focused, if any. The result + of the evaluation is the location description of the base of the member entry. + + E will typically involve an operation expression that begins with a + `DW_OP_push_object_address` operation which loads the location description + of the object which can then serve as a descriptor in subsequent + calculation. + + > NOTE: Since `DW_AT_data_member_location`, `DW_AT_use_location`, and + > `DW_AT_vtable_elem_location` allow both operation expressions and location + > list expressions, why does `DW_AT_data_location` not allow both? In all cases + > they apply to data objects so less likely that optimization would cause + > different operation expressions for different program location ranges. But if + > supporting for some then should be for all. + > + > It seems odd this attribute is not the same as `DW_AT_data_member_location` in + > having an initial stack with the location description of the object since the + > expression has to need it. + +## A.6 Other Debugging Information + +> NOTE: This section provides changes to existing debugger information entry +> attributes. These would be incorporated into the corresponding DWARF Version 5 +> chapter 6 sections. + +### A.6.2 Line Number Information + +> NOTE: This section is the same as DWARF Version 5 section 6.2. + +### A.6.4 Call Frame Information + +> NOTE: This section provides changes to DWARF Version 5 section 6.4. Register +> unwind DWARF expressions are generalized to allow any location description, +> including those with composite and implicit location descriptions. + +#### A.6.4.1 Structure of Call Frame Information + +The register rules are: + +1. undefined + + A register that has this rule has no recoverable value in the previous + frame. The previous value of this register is the undefined location + description (see [2.5.4.4.2 Undefined Location Description + Operations](#undefined-location-description-operations)). + + By convention, the register is not preserved by a callee. + +2. same value + + This register has not been modified from the previous caller frame. + + If the current frame is the top frame, then the previous value of this + register is the location description L that specifies one register location + description SL. SL specifies the register location storage that corresponds + to the register with a bit offset of 0 for the current thread. + + If the current frame is not the top frame, then the previous value of this + register is the location description obtained using the call frame + information for the callee frame and callee program location invoked by the + current caller frame for the same register. + + By convention, the register is preserved by the callee, but the callee + has not modified it. + +3. offset(N) + + N is a signed byte offset. The previous value of this register is saved at + the location description L. Where L is the location description of the + current CFA (see [2.5.4 DWARF Operation + Expressions](#dwarf-operation-expressions)) updated with the bit offset N + scaled by 8 (the byte size). + +4. val_offset(N) + + N is a signed byte offset. The previous value of this register is the memory + byte address of the location description L. Where L is the location + description of the current CFA (see [2.5.4 DWARF Operation + Expressions](#dwarf-operation-expressions)) updated with the bit offset N + scaled by 8 (the byte size). + + The DWARF is ill-formed if the CFA location description is not a memory byte + address location description, or if the register size does not match the + size of an address in the target architecture default address space. + + Since the CFA location description is required to be a memory byte + address location description, the value of val_offset(N) will also be a + memory byte address location description since it is offsetting the CFA + location description by N bytes. Furthermore, the value of val_offset(N) + will be a memory byte address in the target architecture default address + space. + + > NOTE: Should DWARF allow the address size to be a different size to the + > size of the register? Requiring them to be the same bit size avoids any + > issue of conversion as the bit contents of the register is simply + > interpreted as a value of the address. + > + > GDB has a per register hook that allows a target specific conversion on a + > register by register basis. It defaults to truncation of bigger registers, + > and to actually reading bytes from the next register (or reads out of + > bounds for the last register) for smaller registers. There are no GDB + > tests that read a register out of bounds (except an illegal hand written + > assembly test). + +5. register(R) + + This register has been stored in another register numbered R. + + The previous value of this register is the location description obtained + using the call frame information for the current frame and current program + location for register R. + + The DWARF is ill-formed if the size of this register does not match the size + of register R or if there is a cyclic dependency in the call frame + information. + + > NOTE: Should this also allow R to be larger than this register? If so is + > the value stored in the low order bits and it is undefined what is stored + > in the extra upper bits? + +6. expression(E) + + The previous value of this register is located at the location description + produced by evaluating the DWARF operation expression E (see [2.5.4 DWARF + Operation Expressions](#dwarf-operation-expressions)). + + E is evaluated with the current context, except the result kind is a + location description, the compilation unit is unspecified, the object is + unspecified, and an initial stack comprising the location description of the + current CFA (see [2.5.4 DWARF Operation + Expressions](#dwarf-operation-expressions)). + +7. val_expression(E) + + The previous value of this register is the value produced by evaluating the + DWARF operation expression E (see [2.5.4 DWARF Operation + Expressions](#dwarf-operation-expressions)). + + E is evaluated with the current context, except the result kind is a value, + the compilation unit is unspecified, the object is unspecified, and an + initial stack comprising the location description of the current CFA (see + [2.5.4 DWARF Operation Expressions](#dwarf-operation-expressions)). + + The DWARF is ill-formed if the resulting value type size does not match the + register size. + + > NOTE: This has limited usefulness as the DWARF expression E can only + > produce values up to the size of the generic type. This is due to not + > allowing any operations that specify a type in a CFI operation expression. + > This makes it unusable for registers that are larger than the generic + > type. However, expression(E) can be used to create an implicit + > location description of any size. + +8. architectural + + The rule is defined externally to this specification by the augmenter. + +A Common Information Entry (CIE) holds information that is shared among many +Frame Description Entries (FDE). There is at least one CIE in every non-empty +`.debug_frame` section. A CIE contains the following fields, in order: + +1. `length` (initial length) + + A constant that gives the number of bytes of the CIE structure, not + including the length field itself. The size of the length field plus the + value of length must be an integral multiple of the address size specified + in the `address_size` field. + +2. `CIE_id` (4 or 8 bytes, see [7.4 32-Bit and 64-Bit DWARF + Formats](#32-bit-and-64-bit-dwarf-formats)) + + A constant that is used to distinguish CIEs from FDEs. + + In the 32-bit DWARF format, the value of the CIE id in the CIE header is + 0xffffffff; in the 64-bit DWARF format, the value is 0xffffffffffffffff. + +3. `version` (ubyte) + + A version number. This number is specific to the call frame information and + is independent of the DWARF version number. + + The value of the CIE version number is 4. + + > NOTE: Would this be increased to 5 to reflect the changes in these + > extensions? + +4. `augmentation` (sequence of UTF-8 characters) + + A null-terminated UTF-8 string that identifies the augmentation to this CIE + or to the FDEs that use it. If a reader encounters an augmentation string + that is unexpected, then only the following fields can be read: + + - CIE: length, CIE_id, version, augmentation + - FDE: length, CIE_pointer, initial_location, address_range + + If there is no augmentation, this value is a zero byte. + + The augmentation string allows users to indicate that there is additional + vendor and target architecture specific information in the CIE or FDE which + is needed to virtually unwind a stack frame. For example, this might be + information about dynamically allocated data which needs to be freed on exit + from the routine. + + Because the `.debug_frame` section is useful independently of any + `.debug_info` section, the augmentation string always uses UTF-8 + encoding. + +5. `address_size` (ubyte) + + The size of a target address in this CIE and any FDEs that use it, in bytes. + If a compilation unit exists for this frame, its address size must match the + address size here. + +6. `segment_selector_size` (ubyte) + + The size of a segment selector in this CIE and any FDEs that use it, in + bytes. + +7. `code_alignment_factor` (unsigned LEB128) + + A constant that is factored out of all advance location instructions (see + [6.4.2.1 Row Creation Instructions](#row-creation-instructions)). The + resulting value is `(operand * code_alignment_factor)`. + +8. `data_alignment_factor` (signed LEB128) + + A constant that is factored out of certain offset instructions (see [6.4.2.2 + CFA Definition Instructions](#cfa-definition-instructions) and [6.4.2.3 + Register Rule Instructions](#register-rule-instructions)). The + resulting value is `(operand * data_alignment_factor)`. + +9. `return_address_register` (unsigned LEB128) + + An unsigned LEB128 constant that indicates which column in the rule table + represents the return address of the subprogram. Note that this column might + not correspond to an actual machine register. + + The value of the return address register is used to determine the program + location of the caller frame. The program location of the top frame is the + target architecture program counter value of the current thread. + +10. `initial_instructions` (array of ubyte) + + A sequence of rules that are interpreted to create the initial setting of + each column in the table. + + The default rule for all columns before interpretation of the initial + instructions is the undefined rule. However, an ABI authoring body or a + compilation system authoring body may specify an alternate default value for + any or all columns. + +11. `padding` (array of ubyte) + + Enough `DW_CFA_nop` instructions to make the size of this entry match the + length value above. + +An FDE contains the following fields, in order: + +1. `length` (initial length) + + A constant that gives the number of bytes of the header and instruction + stream for this subprogram, not including the length field itself. The size + of the length field plus the value of length must be an integral multiple of + the address size. + +2. `CIE_pointer` (4 or 8 bytes, see [7.4 32-Bit and 64-Bit DWARF + Formats](#32-bit-and-64-bit-dwarf-formats)) + + A constant offset into the `.debug_frame` section that denotes the CIE that + is associated with this FDE. + +3. `initial_location` (segment selector and target address) + + The address of the first location associated with this table entry. If the + segment_selector_size field of this FDE's CIE is non-zero, the initial + location is preceded by a segment selector of the given length. + +4. `address_range` (target address) + + The number of bytes of program instructions described by this entry. + +5. `instructions` (array of ubyte) + + A sequence of table defining instructions that are described in [6.4.2 Call + Frame Instructions](#call-frame-instructions). + +6. `padding` (array of ubyte) + + Enough `DW_CFA_nop` instructions to make the size of this entry match the + length value above. + +#### A.6.4.2 Call Frame Instructions + +Some call frame instructions have operands that are encoded as DWARF operation +expressions E (see [2.5.4 DWARF Operation +Expressions](#dwarf-operation-expressions)). The DWARF operations that can be +used in E have the following restrictions: + +- `DW_OP_addrx`, `DW_OP_call2`, `DW_OP_call4`, `DW_OP_call_ref`, + `DW_OP_const_type`, `DW_OP_constx`, `DW_OP_convert`, `DW_OP_deref_type`, + `DW_OP_fbreg`, `DW_OP_implicit_pointer`, `DW_OP_regval_type`, + `DW_OP_reinterpret`, and `DW_OP_xderef_type` operations are not allowed + because the call frame information must not depend on other debug sections. +- `DW_OP_push_object_address` is not allowed because there is no object context + to provide a value to push. +- `DW_OP_call_frame_cfa` and `DW_OP_entry_value` are not allowed because their + use would be circular. + +Call frame instructions to which these restrictions apply include +`DW_CFA_def_cfa_expression`, `DW_CFA_expression`, and +`DW_CFA_val_expression`. + +##### A.6.4.2.1 Row Creation Instructions + +> NOTE: These instructions are the same as in DWARF Version 5 section 6.4.2.1. + +##### A.6.4.2.2 CFA Definition Instructions + +1. `DW_CFA_def_cfa` + + The `DW_CFA_def_cfa` instruction takes two unsigned LEB128 operands + representing a register number R and a (non-factored) byte displacement B. + The required action is to define the current CFA rule to be the result of + evaluating the DWARF operation expression `DW_OP_bregx R, B` as a location + description. + +2. `DW_CFA_def_cfa_sf` + + The `DW_CFA_def_cfa_sf` instruction takes two operands: an unsigned LEB128 + value representing a register number R and a signed LEB128 factored byte + displacement B. The required action is to define the current CFA rule to be + the result of evaluating the DWARF operation expression `DW_OP_bregx R, B * + data_alignment_factor` as a location description. + + The action is the same as `DW_CFA_def_cfa`, except that the second + operand is signed and factored. + +3. `DW_CFA_def_cfa_register` + + The `DW_CFA_def_cfa_register` instruction takes a single unsigned LEB128 + operand representing a register number R. The required action is to define + the current CFA rule to be the result of evaluating the DWARF operation + expression `DW_OP_bregx R, B` as a location description. B is the old CFA + byte displacement. + + If the subprogram has no current CFA rule, or the rule was defined by a + `DW_CFA_def_cfa_expression` instruction, then the DWARF is ill-formed. + +4. `DW_CFA_def_cfa_offset` + + The `DW_CFA_def_cfa_offset` instruction takes a single unsigned LEB128 + operand representing a (non-factored) byte displacement B. The required + action is to define the current CFA rule to be the result of evaluating the + DWARF operation expression `DW_OP_bregx R, B` as a location description. R + is the old CFA register number. + + If the subprogram has no current CFA rule, or the rule was defined by a + `DW_CFA_def_cfa_expression` instruction, then the DWARF is ill-formed. + +5. `DW_CFA_def_cfa_offset_sf` + + The `DW_CFA_def_cfa_offset_sf` instruction takes a signed LEB128 operand + representing a factored byte displacement B. The required action is to + define the current CFA rule to be the result of evaluating the DWARF + operation expression `DW_OP_bregx R, B * data_alignment_factor` as a + location description. R is the old CFA register number. + + If the subprogram has no current CFA rule, or the rule was defined by a + `DW_CFA_def_cfa_expression` instruction, then the DWARF is ill-formed. + + The action is the same as `DW_CFA_def_cfa_offset`, except that the + operand is signed and factored. + +6. `DW_CFA_def_cfa_expression` + + The `DW_CFA_def_cfa_expression` instruction takes a single operand encoded + as a `DW_FORM_exprloc` value representing a DWARF operation expression E. + The required action is to define the current CFA rule to be the result of + evaluating E with the current context, except the result kind is a location + description, the compilation unit is unspecified, the object is unspecified, + and an empty initial stack. + + See [6.4.2 Call Frame Instructions](#call-frame-instructions) regarding + restrictions on the DWARF expression operations that can be used in E. + + The DWARF is ill-formed if the result of evaluating E is not a memory byte + address location description. + +##### A.6.4.2.3 Register Rule Instructions + +1. `DW_CFA_undefined` + + The `DW_CFA_undefined` instruction takes a single unsigned LEB128 operand + that represents a register number R. The required action is to set the rule + for the register specified by R to `undefined`. + +2. `DW_CFA_same_value` + + The `DW_CFA_same_value` instruction takes a single unsigned LEB128 operand + that represents a register number R. The required action is to set the rule + for the register specified by R to `same value`. + +3. `DW_CFA_offset` + + The `DW_CFA_offset` instruction takes two operands: a register number R + (encoded with the opcode) and an unsigned LEB128 constant representing a + factored displacement B. The required action is to change the rule for the + register specified by R to be an offset(B * data_alignment_factor) + rule. + + > NOTE: Seems this should be named `DW_CFA_offset_uf` since the offset is + > unsigned factored. + +4. `DW_CFA_offset_extended` + + The `DW_CFA_offset_extended` instruction takes two unsigned LEB128 operands + representing a register number R and a factored displacement B. This + instruction is identical to `DW_CFA_offset`, except for the encoding and + size of the register operand. + + > NOTE: Seems this should be named `DW_CFA_offset_extended_uf` since the + > displacement is unsigned factored. + +5. `DW_CFA_offset_extended_sf` + + The `DW_CFA_offset_extended_sf` instruction takes two operands: an unsigned + LEB128 value representing a register number R and a signed LEB128 factored + displacement B. This instruction is identical to `DW_CFA_offset_extended`, + except that B is signed. + +6. `DW_CFA_val_offset` + + The `DW_CFA_val_offset` instruction takes two unsigned LEB128 operands + representing a register number R and a factored displacement B. The required + action is to change the rule for the register indicated by R to be a + val_offset(B * data_alignment_factor) rule. + + > NOTE: Seems this should be named `DW_CFA_val_offset_uf` since the + displacement is unsigned factored. + +7. `DW_CFA_val_offset_sf` + + The `DW_CFA_val_offset_sf` instruction takes two operands: an unsigned + LEB128 value representing a register number R and a signed LEB128 factored + displacement B. This instruction is identical to `DW_CFA_val_offset`, except + that B is signed. + +8. `DW_CFA_register` + + The `DW_CFA_register` instruction takes two unsigned LEB128 operands + representing register numbers R1 and R2 respectively. The required action is + to set the rule for the register specified by R1 to be a register(R2) + rule. + +9. `DW_CFA_expression` + + The `DW_CFA_expression` instruction takes two operands: an unsigned LEB128 + value representing a register number R, and a `DW_FORM_block` value + representing a DWARF operation expression E. The required action is to + change the rule for the register specified by R to be an + expression(E) rule. + + That is, E computes the location description where the register value can + be retrieved. + + See [6.4.2 Call Frame Instructions](#call-frame-instructions) regarding + restrictions on the DWARF expression operations that can be used in E. + +10. `DW_CFA_val_expression` + + The `DW_CFA_val_expression` instruction takes two operands: an unsigned + LEB128 value representing a register number R, and a `DW_FORM_block` value + representing a DWARF operation expression E. The required action is to + change the rule for the register specified by R to be a + val_expression(E) rule. + + That is, E computes the value of register R. + + See [6.4.2 Call Frame Instructions](#call-frame-instructions) regarding + restrictions on the DWARF expression operations that can be used in E. + + If the result of evaluating E is not a value with a base type size that + matches the register size, then the DWARF is ill-formed. + +11. `DW_CFA_restore` + + The `DW_CFA_restore` instruction takes a single operand (encoded with the + opcode) that represents a register number R. The required action is to + change the rule for the register specified by R to the rule assigned it by + the `initial_instructions` in the CIE. + +12. `DW_CFA_restore_extended` + + The `DW_CFA_restore_extended` instruction takes a single unsigned LEB128 + operand that represents a register number R. This instruction is identical + to `DW_CFA_restore`, except for the encoding and size of the register + operand. + +##### A.6.4.2.4 Row State Instructions + +> NOTE: These instructions are the same as in DWARF Version 5 section 6.4.2.4. + +##### A.6.4.2.5 Padding Instruction + +> NOTE: These instructions are the same as in DWARF Version 5 section 6.4.2.5. + +#### A.6.4.3 Call Frame Instruction Usage + +> NOTE: The same as in DWARF Version 5 section 6.4.3. + +#### A.6.4.4 Call Frame Calling Address + +> NOTE: The same as in DWARF Version 5 section 6.4.4. + +## A.7 Data Representation + +> NOTE: This section provides changes to existing debugger information entry +> attributes. These would be incorporated into the corresponding DWARF Version 5 +> chapter 7 sections. + +### A.7.4 32-Bit and 64-Bit DWARF Formats + +> NOTE: This augments DWARF Version 5 section 7.4 list item 3's table. + + Form Role + ------------------------ -------------------------------------- + DW_OP_implicit_pointer offset in `.debug_info` + +### A.7.5 Format of Debugging Information + +#### A.7.5.5 Classes and Forms + +> NOTE: The same as in DWARF Version 5 section 7.5.5. + +### A.7.7 DWARF Expressions + +> NOTE: Rename DWARF Version 5 section 7.7 to reflect the unification of +> location descriptions into DWARF expressions. + +#### A.7.7.1 Operation Expressions + +> NOTE: Rename DWARF Version 5 section 7.7.1 and delete section 7.7.2 to reflect +> the unification of location descriptions into DWARF expressions. + +#### A.7.7.3 Location List Expressions + +> NOTE: Rename DWARF Version 5 section 7.7.3 to reflect that location lists are +> a kind of DWARF expression. + +# B. Further Information The following references provide additional information on the extension. +A reference to the DWARF standard is provided. + +A formatted version of this extension is available on the LLVM site. It includes +many figures that help illustrate the textual description, especially of the +example DWARF expression evaluations. + Slides and a video of a presentation at the Linux Plumbers Conference 2021 related to this extension are available. -The LLVM compiler extension includes possible normative text changes for this -extension as well as the operations mentioned in the motivating examples. It -also covers other extensions needed for heterogeneous devices. +The LLVM compiler extension includes the operations mentioned in the motivating +examples. It also covers other extensions needed for heterogeneous devices. +- [DWARF Debugging Information Format](https://dwarfstd.org/) + - [DWARF Debugging Information Format Version 5](https://dwarfstd.org/Dwarf5Std.php) +- [Allow Location Descriptions on the DWARF Expression Stack](https://llvm.org/docs/AMDGPUDwarfExtensionAllowLocationDescriptionOnTheDwarfExpressionStack/AMDGPUDwarfExtensionAllowLocationDescriptionOnTheDwarfExpressionStack.html) - DWARF extensions for optimized SIMT/SIMD (GPU) debugging - Linux Plumbers Conference 2021 - [Video](https://www.youtube.com/watch?v=QiR0ra0ymEY&t=10015s) - [Slides](https://linuxplumbersconf.org/event/11/contributions/1012/attachments/798/1505/DWARF_Extensions_for_Optimized_SIMT-SIMD_GPU_Debugging-LPC2021.pdf) From 0ac939f3e249b46643411143cc1275717d0a3eee Mon Sep 17 00:00:00 2001 From: Tony Tye Date: Sat, 25 Dec 2021 00:49:23 +0000 Subject: [PATCH 128/992] [AMDGPU][NFC] Update to DWARF extension for heterogeneous debugging - Update documentation on the DWARF extension for heterogeneous debugging to better reference the DWARF Version 5 standard. - Numerous other corrections. Reviewed By: kzhuravl Differential Revision: https://reviews.llvm.org/D116275 --- ...arfExtensionsForHeterogeneousDebugging.rst | 2003 ++++++++++------- llvm/docs/AMDGPUUsage.rst | 7 +- 2 files changed, 1151 insertions(+), 859 deletions(-) diff --git a/llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst b/llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst index 7027b371f432..d72001566a85 100644 --- a/llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst +++ b/llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst @@ -15,425 +15,583 @@ DWARF Extensions For Heterogeneous Debugging .. _amdgpu-dwarf-introduction: -Introduction -============ +1. Introduction +=============== AMD [:ref:`AMD `] has been working on supporting heterogeneous -computing through the AMD Radeon Open Compute Platform (ROCm) [:ref:`AMD-ROCm -`]. A heterogeneous computing program can be written in a -high level language such as C++ or Fortran with OpenMP pragmas, OpenCL, or HIP -(a portable C++ programming environment for heterogeneous computing [:ref:`HIP +computing. A heterogeneous computing program can be written in a high level +language such as C++ or Fortran with OpenMP pragmas, OpenCL, or HIP (a portable +C++ programming environment for heterogeneous computing [:ref:`HIP `]). A heterogeneous compiler and runtime allows a program to execute on multiple devices within the same native process. Devices could include CPUs, GPUs, DSPs, FPGAs, or other special purpose accelerators. Currently HIP programs execute on systems with CPUs and GPUs. -ROCm is fully open sourced and includes contributions to open source projects -such as LLVM for compilation [:ref:`LLVM `] and GDB for -debugging [:ref:`GDB `], as well as collaboration with other -third party projects such as the GCC compiler [:ref:`GCC `] -and the Perforce TotalView HPC debugger [:ref:`Perforce-TotalView +The AMD [:ref:`AMD `] ROCm platform [:ref:`AMD-ROCm +`] is an implementation of the industry standard for +heterogeneous computing devices defined by the Heterogeneous System Architecture +(HSA) Foundation [:ref:`HSA `]. It is open sourced and +includes contributions to open source projects such as LLVM [:ref:`LLVM +`] for compilation and GDB for debugging [:ref:`GDB +`]. + +The LLVM compiler has upstream support for commercially available AMD GPU +hardware (AMDGPU) [:ref:`AMDGPU-LLVM `]. The open +source ROCgdb [:ref:`AMD-ROCgdb `] GDB based debugger +also has support for AMDGPU which is being upstreamed. Support for AMDGPU is +also being added by third parties to the GCC [:ref:`GCC `] +compiler and the Perforce TotalView HPC Debugger [:ref:`Perforce-TotalView `]. To support debugging heterogeneous programs several features that are not provided by current DWARF Version 5 [:ref:`DWARF `] have -been identified. This document contains a collection of extensions to address -providing those features. - -The :ref:`amdgpu-dwarf-motivation` section describes the issues that are being -addressed for heterogeneous computing. That is followed by the -:ref:`amdgpu-dwarf-changes-relative-to-dwarf-version-5` section containing the +been identified. The :ref:`amdgpu-dwarf-extensions` section gives an overview of +the extensions devised to address the missing features. The extensions seek to +be general in nature and backwards compatible with DWARF Version 5. Their goal +is to be applicable to meeting the needs of any heterogeneous system and not be +vendor or architecture specific. That is followed by appendix +:ref:`amdgpu-dwarf-changes-relative-to-dwarf-version-5` which contains the textual changes for the extensions relative to the DWARF Version 5 standard. -Then there is an :ref:`amdgpu-dwarf-examples` section that links to the AMD GPU -specific usage of the extensions that includes an example. Finally, there is a -:ref:`amdgpu-dwarf-references` section. There are a number of notes included -that raise open questions, or provide alternative approaches considered. The -extensions seek to be general in nature and backwards compatible with DWARF -Version 5. The goal is to be applicable to meeting the needs of any -heterogeneous system and not be vendor or architecture specific. - -A fundamental aspect of the extensions is that it allows DWARF expression -location descriptions as stack elements. The extensions are based on DWARF -Version 5 and maintains compatibility with DWARF Version 5. After attempting -several alternatives, the current thinking is that such extensions to DWARF -Version 5 are the simplest and cleanest ways to support debugging optimized GPU -code. It also appears to be generally useful and may be able to address other -reported DWARF issues, as well as being helpful in providing better optimization -support for non-GPU code. - -General feedback on these extensions is sought, together with suggestions on how -to clarify, simplify, or organize them. If their is general interest then some -or all of these extensions could be submitted as future DWARF proposals. - -We are in the process of modifying LLVM and GDB to support these extensions -which is providing experience and insights. We plan to upstream the changes to -those projects for any final form of the extensions. - -The author very much appreciates the input provided so far by many others which -has been incorporated into this current version. - -.. _amdgpu-dwarf-motivation: - -Motivation -========== - -This document presents a set of backwards compatible extensions to DWARF Version -5 [:ref:`DWARF `] to support heterogeneous debugging. - -The remainder of this section provides motivation for each extension in -terms of heterogeneous debugging on commercially available AMD GPU hardware -(AMDGPU). The goal is to add support to the AMD [:ref:`AMD `] -open source Radeon Open Compute Platform (ROCm) [:ref:`AMD-ROCm -`] which is an implementation of the industry standard -for heterogeneous computing devices defined by the Heterogeneous System -Architecture (HSA) Foundation [:ref:`HSA `]. ROCm includes the -LLVM compiler [:ref:`LLVM `] with upstreamed support for -AMDGPU [:ref:`AMDGPU-LLVM `]. The goal is to also add -the GDB debugger [:ref:`GDB `] with upstreamed support for -AMDGPU [:ref:`AMD-ROCgdb `]. In addition, the goal is -to work with third parties to enable support for AMDGPU debugging in the GCC -compiler [:ref:`GCC `] and the Perforce TotalView HPC debugger -[:ref:`Perforce-TotalView `]. - -However, the extensions are intended to be vendor and architecture neutral. They -are believed to apply to other heterogeneous hardware devices including GPUs, -DSPs, FPGAs, and other specialized hardware. These collectively include similar -characteristics and requirements as AMDGPU devices. Some of the extension can -also apply to traditional CPU hardware that supports large vector registers. -Compilers can map source languages and extensions that describe large scale -parallel execution onto the lanes of the vector registers. This is common in -programming languages used in ML and HPC. The extensions also include improved -support for optimized code on any architecture. Some of the generalizations may -also benefit other issues that have been raised. - -The extensions have evolved through collaboration with many individuals and +There are a number of notes included that raise open questions, or provide +alternative approaches that may be worth considering. Then appendix +:ref:`amdgpu-dwarf-examples` links to the AMD GPU specific usage of the +extensions that includes an example. Finally, appendix +:ref:`amdgpu-dwarf-references` provides references to further information. + +.. _amdgpu-dwarf-extensions: + +1. Extensions +============= + +The extensions continue to evolve through collaboration with many individuals and active prototyping within the GDB debugger and LLVM compiler. Input has also been very much appreciated from the developers working on the Perforce TotalView HPC Debugger and GCC compiler. -The AMDGPU has several features that require additional DWARF functionality in -order to support optimized code. +The inputs provided and insights gained so far have been incorporated into this +current version. The plan is to participate in upstreaming the work and +addressing any feedback. If there is general interest then some or all of these +extensions could be submitted as future DWARF standard proposals. -AMDGPU optimized code may spill vector registers to non-global address space -memory, and this spilling may be done only for lanes that are active on entry -to the subprogram. To support this, a location description that can be created -as a masked select is required. See ``DW_OP_LLVM_select_bit_piece``. +The general principles in designing the extensions have been: -Since the active lane mask may be held in a register, a way to get the value -of a register on entry to a subprogram is required. To support this an -operation that returns the caller value of a register as specified by the Call -Frame Information (CFI) is required. See ``DW_OP_LLVM_call_frame_entry_reg`` -and :ref:`amdgpu-dwarf-call-frame-information`. +1. Be backwards compatible with the DWARF Version 5 [:ref:`DWARF + `] standard. -Current DWARF uses an empty expression to indicate an undefined location -description. Since the masked select composite location description operation -takes more than one location description, it is necessary to have an explicit -way to specify an undefined location description. Otherwise it is not possible -to specify that a particular one of the input location descriptions is -undefined. See ``DW_OP_LLVM_undefined``. +2. Be vendor and architecture neutral. They are intended to apply to other + heterogeneous hardware devices including GPUs, DSPs, FPGAs, and other + specialized hardware. These collectively include similar characteristics and + requirements as AMDGPU devices. + +3. Provide improved optimization support for non-GPU code. For example, some + extensions apply to traditional CPU hardware that supports large vector + registers. Compilers can map source languages, and source language + extensions, that describe large scale parallel execution, onto the lanes of + the vector registers. This is common in programming languages used in ML and + HPC. + +4. Fully define well-formed DWARF in a consistent style based on the DWARF + Version 5 specification. + +It is possible that some of the generalizations may also benefit other DWARF +issues that have been raised. + +The remainder of this section enumerates the extensions and provides motivation +for each in terms of heterogeneous debugging. + +.. _amdgpu-dwarf-allow-location-description-on-the-dwarf-evaluation-stack: + +2.1 Allow Location Description on the DWARF Expression Stack +------------------------------------------------------------ + +DWARF Version 5 does not allow location descriptions to be entries on the DWARF +expression stack. They can only be the final result of the evaluation of a DWARF +expression. However, by allowing a location description to be a first-class +entry on the DWARF expression stack it becomes possible to compose expressions +containing both values and location descriptions naturally. It allows objects to +be located in any kind of memory address space, in registers, be implicit +values, be undefined, or a composite of any of these. + +By extending DWARF carefully, all existing DWARF expressions can retain their +current semantic meaning. DWARF has implicit conversions that convert from a +value that represents an address in the default address space to a memory +location description. This can be extended to allow a default address space +memory location description to be implicitly converted back to its address +value. This allows all DWARF Version 5 expressions to retain their same meaning, +while enabling the ability to explicitly create memory location descriptions in +non-default address spaces and generalizing the power of composite location +descriptions to any kind of location description. + +For those familiar with the definition of location descriptions in DWARF Version +5, the definitions in these extensions are presented differently, but does in +fact define the same concept with the same fundamental semantics. However, it +does so in a way that allows the concept to extend to support address spaces, +bit addressing, the ability for composite location descriptions to be composed +of any kind of location description, and the ability to support objects located +at multiple places. Collectively these changes expand the set of architectures +that can be supported and improves support for optimized code. + +Several approaches were considered, and the one presented, together with the +extensions it enables, appears to be the simplest and cleanest one that offers +the greatest improvement of DWARF's ability to support debugging optimized GPU +and non-GPU code. Examining the GDB debugger and LLVM compiler, it appears only +to require modest changes as they both already have to support general use of +location descriptions. It is anticipated that will also be the case for other +debuggers and compilers. + +GDB has been modified to evaluate DWARF Version 5 expressions with location +descriptions as stack entries and with implicit conversions. All GDB tests have +passed, except one that turned out to be an invalid test case by DWARF Version 5 +rules. The code in GDB actually became simpler as all evaluation is done on a +single stack and there was no longer a need to maintain a separate structure for +the location description results. This gives confidence in backwards +compatibility. + +See :ref:`amdgpu-dwarf-expressions` and nested sections. + +This extension is separately described at *Allow Location Descriptions on the +DWARF Expression Stack* [:ref:`AMDGPU-DWARF-LOC +`]. + +2.2 Generalize CFI to Allow Any Location Description Kind +--------------------------------------------------------- CFI describes restoring callee saved registers that are spilled. Currently CFI only allows a location description that is a register, memory address, or -implicit location description. AMDGPU optimized code may spill scalar -registers into portions of vector registers. This requires extending CFI to -allow any location description. See -:ref:`amdgpu-dwarf-call-frame-information`. +implicit location description. AMDGPU optimized code may spill scalar registers +into portions of vector registers. This requires extending CFI to allow any +location description kind to be supported. -The vector registers of the AMDGPU are represented as their full wavefront -size, meaning the wavefront size times the dword size. This reflects the -actual hardware and allows the compiler to generate DWARF for languages that -map a thread to the complete wavefront. It also allows more efficient DWARF to -be generated to describe the CFI as only a single expression is required for -the whole vector register, rather than a separate expression for each lane's -dword of the vector register. It also allows the compiler to produce DWARF -that indexes the vector register if it spills scalar registers into portions -of a vector register. +See :ref:`amdgpu-dwarf-call-frame-information`. -Since DWARF stack value entries have a base type and AMDGPU registers are a -vector of dwords, the ability to specify that a base type is a vector is -required. See ``DW_AT_LLVM_vector_size``. +2.3 Generalize DWARF Operation Expressions to Support Multiple Places +--------------------------------------------------------------------- -If the source language is mapped onto the AMDGPU wavefronts in a SIMT manner, -then the variable DWARF location expressions must compute the location for a -single lane of the wavefront. Therefore, a DWARF operation is required to denote -the current lane, much like ``DW_OP_push_object_address`` denotes the current -object. The ``DW_OP_*piece`` operations only allow literal indices. Therefore, a -way to use a computed offset of an arbitrary location description (such as a -vector register) is required. See ``DW_OP_LLVM_push_lane``, -``DW_OP_LLVM_offset``, ``DW_OP_LLVM_offset_uconst``, and -``DW_OP_LLVM_bit_offset``. - -If the source language is mapped onto the AMDGPU wavefronts in a SIMT manner -the compiler can use the AMDGPU execution mask register to control which lanes -are active. To describe the conceptual location of non-active lanes a DWARF -expression is needed that can compute a per lane PC. For efficiency, this is -done for the wavefront as a whole. This expression benefits by having a masked -select composite location description operation. This requires an attribute -for source location of each lane. The AMDGPU may update the execution mask for -whole wavefront operations and so needs an attribute that computes the current -active lane mask. See ``DW_OP_LLVM_select_bit_piece``, ``DW_OP_LLVM_extend``, -``DW_AT_LLVM_lane_pc``, and ``DW_AT_LLVM_active_lane``. +In DWARF Version 5 a location description is defined as a single location +description or a location list. A location list is defined as either +effectively an undefined location description or as one or more single +location descriptions to describe an object with multiple places. + +With +:ref:`amdgpu-dwarf-allow-location-description-on-the-dwarf-evaluation-stack`, +the ``DW_OP_push_object_address`` and ``DW_OP_call*`` operations can put a +location description on the stack. Furthermore, debugger information entry +attributes such as ``DW_AT_data_member_location``, ``DW_AT_use_location``, and +``DW_AT_vtable_elem_location`` are defined as pushing a location description on +the expression stack before evaluating the expression. + +DWARF Version 5 only allows the stack to contain values and so only a single +memory address can be on the stack. This makes these operations and attributes +incapable of handling location descriptions with multiple places, or places +other than memory. + +Since +:ref:`amdgpu-dwarf-allow-location-description-on-the-dwarf-evaluation-stack` +allows the stack to contain location descriptions, the operations are +generalized to support location descriptions that can have multiple places. This +is backwards compatible with DWARF Version 5 and allows objects with multiple +places to be supported. For example, the expression that describes how to access +the field of an object can be evaluated with a location description that has +multiple places and will result in a location description with multiple places. + +With this change, the separate DWARF Version 5 sections that described DWARF +expressions and location lists are unified into a single section that describes +DWARF expressions in general. This unification is a natural consequence of, and +a necessity of, allowing location descriptions to be part of the evaluation +stack. + +See :ref:`amdgpu-dwarf-location-description`. + +2.4 Generalize Offsetting of Location Descriptions +-------------------------------------------------- + +The ``DW_OP_plus`` and ``DW_OP_minus`` operations can be defined to operate on a +memory location description in the default target architecture specific address +space and a generic type value to produce an updated memory location +description. This allows them to continue to be used to offset an address. + +To generalize offsetting to any location description, including location +descriptions that describe when bytes are in registers, are implicit, or a +composite of these, the ``DW_OP_LLVM_offset``, ``DW_OP_LLVM_offset_uconst``, and +``DW_OP_LLVM_bit_offset`` offset operations are added. + +The offset operations can operate on location storage of any size. For example, +implicit location storage could be any number of bits in size. It is simpler to +define offsets that exceed the size of the location storage as being an +evaluation error, than having to force an implementation to support potentially +infinite precision offsets to allow it to correctly track a series of positive +and negative offsets that may transiently overflow or underflow, but end up in +range. This is simple for the arithmetic operations as they are defined in terms +of two's compliment arithmetic on a base type of a fixed size. Therefore, the +offset operation define that integer overflow is ill-formed. This is in contrast +to the ``DW_OP_plus``, ``DW_OP_plus_uconst``, and ``DW_OP_minus`` arithmetic +operations which define that it causes wrap-around. + +Having the offset operations allows ``DW_OP_push_object_address`` to push a +location description that may be in a register, or be an implicit value. The +DWARF expression of ``DW_TAG_ptr_to_member_type`` can use the offset operations +without regard to what kind of location description was pushed. + +Since +:ref:`amdgpu-dwarf-allow-location-description-on-the-dwarf-evaluation-stack` has +generalized location storage to be bit indexable, ``DW_OP_LLVM_bit_offset`` +generalizes DWARF to work with bit fields. This is generally not possible in +DWARF Version 5. + +The ``DW_OP_*piece`` operations only allow literal indices. A way to use a +computed offset of an arbitrary location description (such as a vector register) +is required. The offset operations provide this ability since they can be used +to compute a location description on the stack. + +See ``DW_OP_LLVM_offset``, ``DW_OP_LLVM_offset_uconst``, and +``DW_OP_LLVM_bit_offset`` in +:ref:`amdgpu-dwarf-general-location-description-operations`. + +2.5 Generalize Creation of Undefined Location Descriptions +---------------------------------------------------------- + +Current DWARF uses an empty expression to indicate an undefined location +description. Since +:ref:`amdgpu-dwarf-allow-location-description-on-the-dwarf-evaluation-stack` +allows location descriptions to be created on the stack, it is necessary to have +an explicit way to specify an undefined location description. + +For example, the ``DW_OP_LLVM_select_bit_piece`` (see +:ref:`amdgpu-dwarf-support-for-divergent-control-flow-of-simt-hardware`) +operation takes more than one location description on the stack. Without this +ability, it is not possible to specify that a particular one of the input +location descriptions is undefined. + +See the ``DW_OP_LLVM_undefined`` operation in +:ref:`amdgpu-dwarf-undefined-location-description-operations`. + +2.6 Generalize Creation of Composite Location Descriptions +---------------------------------------------------------- + +To allow composition of composite location descriptions, an explicit operation +that indicates the end of the definition of a composite location description is +required. This can be implied if the end of a DWARF expression is reached, +allowing current DWARF expressions to remain legal. + +See ``DW_OP_LLVM_piece_end`` in +:ref:`amdgpu-dwarf-composite-location-description-operations`. + +2.7 Generalize DWARF Base Objects to Allow Any Location Description Kind +------------------------------------------------------------------------ + +The number of registers and the cost of memory operations is much higher for +AMDGPU than a typical CPU. The compiler attempts to optimize whole variables and +arrays into registers. + +Currently DWARF only allows ``DW_OP_push_object_address`` and related operations +to work with a global memory location. To support AMDGPU optimized code it is +required to generalize DWARF to allow any location description to be used. This +allows registers, or composite location descriptions that may be a mixture of +memory, registers, or even implicit values. + +See ``DW_OP_push_object_address`` in +:ref:`amdgpu-dwarf-general-location-description-operations`. + +2.8 General Support for Address Spaces +-------------------------------------- AMDGPU needs to be able to describe addresses that are in different kinds of memory. Optimized code may need to describe a variable that resides in pieces that are in different kinds of storage which may include parts of registers, memory that is in a mixture of memory kinds, implicit values, or be undefined. + DWARF has the concept of segment addresses. However, the segment cannot be specified within a DWARF expression, which is only able to specify the offset portion of a segment address. The segment index is only provided by the entity -that specifies the DWARF expression. Therefore, the segment index is a -property that can only be put on complete objects, such as a variable. That -makes it only suitable for describing an entity (such as variable or -subprogram code) that is in a single kind of memory. Therefore, AMDGPU uses -the DWARF concept of address spaces. For example, a variable may be allocated -in a register that is partially spilled to the call stack which is in the -private address space, and partially spilled to the local address space. +that specifies the DWARF expression. Therefore, the segment index is a property +that can only be put on complete objects, such as a variable. That makes it only +suitable for describing an entity (such as variable or subprogram code) that is +in a single kind of memory. + +Therefore, AMDGPU uses the DWARF concept of address spaces. For example, a +variable may be allocated in a register that is partially spilled to the call +stack which is in the private address space, and partially spilled to the local +address space. DWARF uses the concept of an address in many expression operations but does not define how it relates to address spaces. For example, ``DW_OP_push_object_address`` pushes the address of an object. Other contexts implicitly push an address on the stack before evaluating an expression. For example, the ``DW_AT_use_location`` attribute of the -``DW_TAG_ptr_to_member_type``. The expression that uses the address needs to -do so in a general way and not need to be dependent on the address space of -the address. For example, a pointer to member value may want to be applied to -an object that may reside in any address space. - -The number of registers and the cost of memory operations is much higher for -AMDGPU than a typical CPU. The compiler attempts to optimize whole variables -and arrays into registers. Currently DWARF only allows -``DW_OP_push_object_address`` and related operations to work with a global -memory location. To support AMDGPU optimized code it is required to generalize -DWARF to allow any location description to be used. This allows registers, or -composite location descriptions that may be a mixture of memory, registers, or -even implicit values. - -DWARF Version 5 does not allow location descriptions to be entries on the -DWARF stack. They can only be the final result of the evaluation of a DWARF -expression. However, by allowing a location description to be a first-class -entry on the DWARF stack it becomes possible to compose expressions containing -both values and location descriptions naturally. It allows objects to be -located in any kind of memory address space, in registers, be implicit values, -be undefined, or a composite of any of these. By extending DWARF carefully, -all existing DWARF expressions can retain their current semantic meaning. -DWARF has implicit conversions that convert from a value that represents an -address in the default address space to a memory location description. This -can be extended to allow a default address space memory location description -to be implicitly converted back to its address value. This allows all DWARF -Version 5 expressions to retain their same meaning, while adding the ability -to explicitly create memory location descriptions in non-default address -spaces and generalizing the power of composite location descriptions to any -kind of location description. See :ref:`amdgpu-dwarf-operation-expressions`. - -To allow composition of composite location descriptions, an explicit operation -that indicates the end of the definition of a composite location description -is required. This can be implied if the end of a DWARF expression is reached, -allowing current DWARF expressions to remain legal. See -``DW_OP_LLVM_piece_end``. - -The ``DW_OP_plus`` and ``DW_OP_minus`` can be defined to operate on a memory -location description in the default target architecture specific address space -and a generic type value to produce an updated memory location description. This -allows them to continue to be used to offset an address. To generalize -offsetting to any location description, including location descriptions that -describe when bytes are in registers, are implicit, or a composite of these, the -``DW_OP_LLVM_offset``, ``DW_OP_LLVM_offset_uconst``, and -``DW_OP_LLVM_bit_offset`` offset operations are added. Unlike ``DW_OP_plus``, -``DW_OP_plus_uconst``, and ``DW_OP_minus`` arithmetic operations, these do not -define that integer overflow causes wrap-around. The offset operations can -operate on location storage of any size. For example, implicit location storage -could be any number of bits in size. It is simpler to define offsets that exceed -the size of the location storage as being an evaluation error, than having to -force an implementation to support potentially infinite precision offsets to -allow it to correctly track a series of positive and negative offsets that may -transiently overflow or underflow, but end up in range. This is simple for the -arithmetic operations as they are defined in terms of two's compliment -arithmetic on a base type of a fixed size. - -Having the offset operations allows ``DW_OP_push_object_address`` to push a -location description that may be in a register, or be an implicit value, and the -DWARF expression of ``DW_TAG_ptr_to_member_type`` can contain them to offset -within it. ``DW_OP_LLVM_bit_offset`` generalizes DWARF to work with bit fields -which is not possible in DWARF Version 5. +``DW_TAG_ptr_to_member_type``. The expression belongs to a source language type +which may apply to objects allocated in different kinds of storage. Therefore, +it is desirable that the expression that uses the address can do so without +regard to what kind of storage it specifies, including the address space of a +memory location description. For example, a pointer to member value may want to +be applied to an object that may reside in any address space. The DWARF ``DW_OP_xderef*`` operations allow a value to be converted into an address of a specified address space which is then read. But it provides no way to create a memory location description for an address in the non-default address space. For example, AMDGPU variables can be allocated in the local -address space at a fixed address. It is required to have an operation to -create an address in a specific address space that can be used to define the -location description of the variable. Defining this operation to produce a -location description allows the size of addresses in an address space to be -larger than the generic type. See ``DW_OP_LLVM_form_aspace_address``. - -If the ``DW_OP_LLVM_form_aspace_address`` operation had to produce a value -that can be implicitly converted to a memory location description, then it -would be limited to the size of the generic type which matches the size of the -default address space. Its value would be undefined and likely not match any -value in the actual program. By making the result a location description, it -allows a consumer great freedom in how it implements it. The implicit -conversion back to a value can be limited only to the default address space to -maintain compatibility with DWARF Version 5. For other address spaces the -producer can use the new operations that explicitly specify the address space. +address space at a fixed address. + +The ``DW_OP_LLVM_form_aspace_address`` (see +:ref:`amdgpu-dwarf-memory-location-description-operations`) operation is defined +to create a memory location description from an address and address space. If +can be used to specify the location of a variable that is allocated in a +specific address space. This allows the size of addresses in an address space to +be larger than the generic type. It also allows a consumer great implementation +freedom. It allows the implicit conversion back to a value to be limited only to +the default address space to maintain compatibility with DWARF Version 5. For +other address spaces the producer can use the new operations that explicitly +specify the address space. + +In contrast, if the ``DW_OP_LLVM_form_aspace_address`` operation had been +defined to produce a value, and an implicit conversion to a memory location +description was defined, then it would be limited to the size of the generic +type (which matches the size of the default address space). An implementation +would likely have to use *reserved ranges* of value to represent different +address spaces. Such a value would likely not match any address value in the +actual hardware. That would require the consumer to have special treatment for +such values. ``DW_OP_breg*`` treats the register as containing an address in the default -address space. It is required to be able to specify the address space of the -register value. See ``DW_OP_LLVM_aspace_bregx``. +address space. A ``DW_OP_LLVM_aspace_bregx`` (see +:ref:`amdgpu-dwarf-memory-location-description-operations`) operation is added +to allow the address space of the address held in a register to be specified. -Similarly, ``DW_OP_implicit_pointer`` treats its implicit pointer value as -being in the default address space. It is required to be able to specify the -address space of the pointer value. See -``DW_OP_LLVM_aspace_implicit_pointer``. +Similarly, ``DW_OP_implicit_pointer`` treats its implicit pointer value as being +in the default address space. A ``DW_OP_LLVM_aspace_implicit_pointer`` +(:ref:`amdgpu-dwarf-implicit-location-description-operations`) operation is +added to allow the address space to be specified. Almost all uses of addresses in DWARF are limited to defining location descriptions, or to be dereferenced to read memory. The exception is -``DW_CFA_val_offset`` which uses the address to set the value of a register. -By defining the CFA DWARF expression as being a memory location description, -it can maintain what address space it is, and that can be used to convert the -offset address back to an address in that address space. See +``DW_CFA_val_offset`` which uses the address to set the value of a register. In +order to support address spaces, the CFA DWARF expression is defined to be a +memory location description. This allows it to specify an address space which is +used to convert the offset address back to an address in that address space. See :ref:`amdgpu-dwarf-call-frame-information`. -This approach allows all existing DWARF to have the identical semantics. It -allows the compiler to explicitly specify the address space it is using. For -example, a compiler could choose to access private memory in a swizzled manner -when mapping a source language to a wavefront in a SIMT manner, or to access -it in an unswizzled manner if mapping the same language with the wavefront -being the thread. It also allows the compiler to mix the address space it uses -to access private memory. For example, for SIMT it can still spill entire -vector registers in an unswizzled manner, while using a swizzled private -memory for SIMT variable access. This approach allows memory location -descriptions for different address spaces to be combined using the regular -``DW_OP_*piece`` operations. - -Location descriptions are an abstraction of storage, they give freedom to the +This approach of extending memory location descriptions to support address +spaces, allows all existing DWARF Version 5 expressions to have the identical +semantics. It allows the compiler to explicitly specify the address space it is +using. For example, a compiler could choose to access private memory in a +swizzled manner when mapping a source language thread to the lane of a wavefront +in a SIMT manner. Or a compiler could choose to access it in an unswizzled +manner if mapping the same language with the wavefront being the thread. + +It also allows the compiler to mix the address space it uses to access private +memory. For example, for SIMT it can still spill entire vector registers in an +unswizzled manner, while using a swizzled private memory for SIMT variable +access. + +This approach also allows memory location descriptions for different address +spaces to be combined using the regular ``DW_OP_*piece`` operations. + +Location descriptions are an abstraction of storage. They give freedom to the consumer on how to implement them. They allow the address space to encode lane -information so they can be used to read memory with only the memory -description and no extra arguments. The same set of operations can operate on +information so they can be used to read memory with only the memory location +description and no extra information. The same set of operations can operate on locations independent of their kind of storage. The ``DW_OP_deref*`` therefore -can be used on any storage kind. ``DW_OP_xderef*`` is unnecessary, except to -become a more compact way to convert a non-default address space address -followed by dereferencing it. +can be used on any storage kind, including memory location descriptions of +different address spaces. Therefore, the ``DW_OP_xderef*`` operations are +unnecessary, except to become a more compact way to encode a non-default address +space address followed by dereferencing it. See +:ref:`amdgpu-dwarf-general-operations`. -In DWARF Version 5 a location description is defined as a single location -description or a location list. A location list is defined as either -effectively an undefined location description or as one or more single -location descriptions to describe an object with multiple places. The -``DW_OP_push_object_address`` and ``DW_OP_call*`` operations can put a -location description on the stack. Furthermore, debugger information entry -attributes such as ``DW_AT_data_member_location``, ``DW_AT_use_location``, and -``DW_AT_vtable_elem_location`` are defined as pushing a location description -on the expression stack before evaluating the expression. However, DWARF -Version 5 only allows the stack to contain values and so only a single memory -address can be on the stack which makes these incapable of handling location -descriptions with multiple places, or places other than memory. Since these -extensions allow the stack to contain location descriptions, the operations are -generalized to support location descriptions that can have multiple places. -This is backwards compatible with DWARF Version 5 and allows objects with -multiple places to be supported. For example, the expression that describes -how to access the field of an object can be evaluated with a location -description that has multiple places and will result in a location description -with multiple places as expected. With this change, the separate DWARF Version -5 sections that described DWARF expressions and location lists have been -unified into a single section that describes DWARF expressions in general. -This unification seems to be a natural consequence and a necessity of allowing -location descriptions to be part of the evaluation stack. +2.9 Support for Vector Base Types +--------------------------------- -For those familiar with the definition of location descriptions in DWARF Version -5, the definitions in these extensions are presented differently, but does -in fact define the same concept with the same fundamental semantics. However, -it does so in a way that allows the concept to extend to support address -spaces, bit addressing, the ability for composite location descriptions to be -composed of any kind of location description, and the ability to support -objects located at multiple places. Collectively these changes expand the set -of processors that can be supported and improves support for optimized code. - -Several approaches were considered, and the one presented appears to be the -cleanest and offers the greatest improvement of DWARF's ability to support -optimized code. Examining the GDB debugger and LLVM compiler, it appears only -to require modest changes as they both already have to support general use of -location descriptions. It is anticipated that will also be the case for other -debuggers and compilers. +The vector registers of the AMDGPU are represented as their full wavefront +size, meaning the wavefront size times the dword size. This reflects the +actual hardware and allows the compiler to generate DWARF for languages that +map a thread to the complete wavefront. It also allows more efficient DWARF to +be generated to describe the CFI as only a single expression is required for +the whole vector register, rather than a separate expression for each lane's +dword of the vector register. It also allows the compiler to produce DWARF +that indexes the vector register if it spills scalar registers into portions +of a vector register. -As an experiment, GDB was modified to evaluate DWARF Version 5 expressions -with location descriptions as stack entries and implicit conversions. All GDB -tests have passed, except one that turned out to be an invalid test by DWARF -Version 5 rules. The code in GDB actually became simpler as all evaluation was -on the stack and there was no longer a need to maintain a separate structure -for the location description result. This gives confidence of the backwards -compatibility. +Since DWARF stack value entries have a base type and AMDGPU registers are a +vector of dwords, the ability to specify that a base type is a vector is +required. + +See ``DW_AT_LLVM_vector_size`` in :ref:`amdgpu-dwarf-literal-operations`. + +.. _amdgpu-dwarf-operation-to-create-vector-composite-location-descriptions: + +2.10 DWARF Operations to Create Vector Composite Location Descriptions +---------------------------------------------------------------------- + +AMDGPU optimized code may spill vector registers to non-global address space +memory, and this spilling may be done only for SIMT lanes that are active on +entry to the subprogram. + +To support this, a composite location description that can be created as a +masked select is required. In addition, an operation that creates a composite +location description that is a vector on another location description is needed. + +An example that uses these operations is referenced in the +:ref:`amdgpu-dwarf-examples` appendix. + +See ``DW_OP_LLVM_select_bit_piece`` and ``DW_OP_LLVM_extend`` in +:ref:`amdgpu-dwarf-composite-location-description-operations`. + +2.11 DWARF Operation to Access Call Frame Entry Registers +--------------------------------------------------------- + +As described in +:ref:`amdgpu-dwarf-operation-to-create-vector-composite-location-descriptions`, +a DWARF expression involving the set of SIMT lanes active on entry to a +subprogram is required. The SIMT active lane mask may be held in a register that +is modified as the subprogram executes. However, its value may be saved on entry +to the subprogram. + +The Call Frame Information (CFI) already encodes such register saving, so it is +more efficient to provide an operation to return the location of a saved +register than have to generate a loclist to describe the same information. This +is now possible since +:ref:`amdgpu-dwarf-allow-location-description-on-the-dwarf-evaluation-stack` +allows location descriptions on the stack. + +See ``DW_OP_LLVM_call_frame_entry_reg`` in +:ref:`amdgpu-dwarf-general-location-description-operations` and +:ref:`amdgpu-dwarf-call-frame-information`. + +2.12 Support for Source Languages Mapped to SIMT Hardware +--------------------------------------------------------- + +If the source language is mapped onto the AMDGPU wavefronts in a SIMT manner, +then the variable DWARF location expressions must compute the location for a +single lane of the wavefront. Therefore, a DWARF operation is required to denote +the current lane, much like ``DW_OP_push_object_address`` denotes the current +object. + +See ``DW_OP_LLVM_push_lane`` in :ref:`amdgpu-dwarf-base-type-entries`. + +.. _amdgpu-dwarf-support-for-divergent-control-flow-of-simt-hardware: + +2.13 Support for Divergent Control Flow of SIMT Hardware +-------------------------------------------------------- + +If the source language is mapped onto the AMDGPU wavefronts in a SIMT manner the +compiler can use the AMDGPU execution mask register to control which lanes are +active. To describe the conceptual location of non-active lanes requires an +attribute that has an expression that computes the source location PC for each +lane. + +For efficiency, the expression calculates the source location the wavefront as a +whole. This can be done using the ``DW_OP_LLVM_select_bit_piece`` (see +:ref:`amdgpu-dwarf-operation-to-create-vector-composite-location-descriptions`) +operation. + +The AMDGPU may update the execution mask to perform whole wavefront operations. +Therefore, there is a need for an attribute that computes the current active +lane mask. This can have an expression that may evaluate to the SIMT active lane +mask register or to a saved mask when in whole wavefront execution mode. + +An example that uses these attributes is referenced in the +:ref:`amdgpu-dwarf-examples` appendix. + +See ``DW_AT_LLVM_lane_pc`` and ``DW_AT_LLVM_active_lane`` in +:ref:`amdgpu-dwarf-composite-location-description-operations`. + +2.14 Define Source Language Address Classes +------------------------------------------- + +AMDGPU supports languages, such as OpenCL [:ref:`OpenCL `], +that define source language address classes. Support is added to define language +specific address classes so they can be used in a consistent way by consumers. + +It would also be desirable to add support for using address classes in defining +source language types. DWARF Version 5 only supports using target architecture +specific address spaces. + +See :ref:`amdgpu-dwarf-segment_addresses`. + +2.15 Define Augmentation Strings to Support Multiple Extensions +--------------------------------------------------------------- + +A ``DW_AT_LLVM_augmentation`` attribute is added to a compilation unit debugger +information entry to indicate that there is additional target architecture +specific information in the debugging information entries of that compilation +unit. This allows a consumer to know what extensions are present in the debugger +information entries as is possible with the augmentation string of other +sections. See . + +The format that should be used for an augmentation string is also recommended. +This allows a consumer to parse the string when it contains information from +multiple vendors. Augmentation strings occur in the ``DW_AT_LLVM_augmentation`` +attribute, in the lookup by name table, and in the CFI Common Information Entry +(CIE). -Since the AMDGPU supports languages such as OpenCL [:ref:`OpenCL -`], there is a need to define source language address -classes so they can be used in a consistent way by consumers. It would also be -desirable to add support for using them in defining language types rather than -the current target architecture specific address spaces. See -:ref:`amdgpu-dwarf-segment_addresses`. - -A ``DW_AT_LLVM_augmentation`` attribute is added to a compilation unit -debugger information entry to indicate that there is additional target -architecture specific information in the debugging information entries of that -compilation unit. This allows a consumer to know what extensions are present -in the debugger information entries as is possible with the augmentation -string of other sections. The format that should be used for the augmentation -string in the lookup by name table and CFI Common Information Entry is also -recommended to allow a consumer to parse the string when it contains -information from multiple vendors. - -The AMDGPU supports programming languages that include online compilation -where the source text may be created at runtime. Therefore, a way to embed the -source text in the debug information is required. For example, the OpenCL -language runtime supports online compilation. See -:ref:`amdgpu-dwarf-line-number-information`. - -Support to allow MD5 checksums to be optionally present in the line table is -added. This allows linking together compilation units where some have MD5 -checksums and some do not. In DWARF Version 5 the file timestamp and file size -can be optional, but if the MD5 checksum is present it must be valid for all -files. See :ref:`amdgpu-dwarf-line-number-information`. - -Support is added for the HIP programming language [:ref:`HIP -`] which is supported by the AMDGPU. See -:ref:`amdgpu-dwarf-language-names`. - -The following sections provide the definitions for the additional operations, -as well as clarifying how existing expression operations, CFI operations, and -attributes behave with respect to generalized location descriptions that -support address spaces and location descriptions that support multiple places. -It has been defined such that it is backwards compatible with DWARF Version 5. -The definitions are intended to fully define well-formed DWARF in a consistent -style based on the DWARF Version 5 specification. Non-normative text is shown -in *italics*. - -The names for the new operations, attributes, and constants include "\ -``LLVM``\ " and are encoded with vendor specific codes so these extensions can -be implemented as an LLVM vendor extension to DWARF Version 5. If accepted these -names would not include the "\ ``LLVM``\ " and would not use encodings in the -vendor range. - -The extensions are described in -:ref:`amdgpu-dwarf-changes-relative-to-dwarf-version-5` and are -organized to follow the section ordering of DWARF Version 5. It includes notes -to indicate the corresponding DWARF Version 5 sections to which they pertain. -Other notes describe additional changes that may be worth considering, and to -raise questions. +See :ref:`amdgpu-dwarf-full-and-partial-compilation-unit-entries`, +:ref:`amdgpu-dwarf-name-index-section-header`, and +:ref:`amdgpu-dwarf-structure_of-call-frame-information`. + +2.16 Support Embedding Source Text for Online Compilation +--------------------------------------------------------- + +AMDGPU supports programming languages that include online compilation where the +source text may be created at runtime. For example, the OpenCL and HIP language +runtimes support online compilation. To support is, a way to embed the source +text in the debug information is provided. + +See :ref:`amdgpu-dwarf-line-number-information`. + +2.17 Allow MD5 Checksums to be Optionally Present +------------------------------------------------- + +In DWARF Version 5 the file timestamp and file size can be optional, but if the +MD5 checksum is present it must be valid for all files. This is a problem if +using link time optimization to combine compilation units where some have MD5 +checksums and some do not. Therefore, sSupport to allow MD5 checksums to be +optionally present in the line table is added. + +See :ref:`amdgpu-dwarf-line-number-information`. + +2.18 Add the HIP Programing Language +------------------------------------ + +The HIP programming language [:ref:`HIP `], which is supported +by the AMDGPU, is added. + +See :ref:`amdgpu-dwarf-language-names-table`. .. _amdgpu-dwarf-changes-relative-to-dwarf-version-5: -Changes Relative to DWARF Version 5 -=================================== +A. Changes Relative to DWARF Version 5 +====================================== + +.. note:: + + This appendix provides changes relative to DWARF Version 5. It has been + defined such that it is backwards compatible with DWARF Version 5. + Non-normative text is shown in *italics*. The section numbers generally + correspond to those in the DWARF Version 5 standard unless specified + otherwise. Definitions are given for the additional operations, as well as + clarifying how existing expression operations, CFI operations, and attributes + behave with respect to generalized location descriptions that support address + spaces and multiple places. + + The names for the new operations, attributes, and constants include "\ + ``LLVM``\ " and are encoded with vendor specific codes so these extensions can + be implemented as an LLVM vendor extension to DWARF Version 5. + + .. note:: + + Notes are included to describe how the changes are to be applied to the + DWARF Version 5 standard. They also describe rational and issues that may + need further consideration. -General Description -------------------- +A.2 General Description +----------------------- -Attribute Types -~~~~~~~~~~~~~~~ +A.2.2 Attribute Types +~~~~~~~~~~~~~~~~~~~~~ .. note:: This augments DWARF Version 5 section 2.2 and Table 2.2. -The following table provides the additional attributes. See -:ref:`amdgpu-dwarf-debugging-information-entry-attributes`. +The following table provides the additional attributes. .. table:: Attribute names :name: amdgpu-dwarf-attribute-names-table @@ -441,17 +599,17 @@ The following table provides the additional attributes. See =========================== ==================================== Attribute Usage =========================== ==================================== - ``DW_AT_LLVM_active_lane`` SIMD or SIMT active lanes - ``DW_AT_LLVM_augmentation`` Compilation unit augmentation string - ``DW_AT_LLVM_lane_pc`` SIMD or SIMT lane program location - ``DW_AT_LLVM_lanes`` SIMD or SIMT thread lane count - ``DW_AT_LLVM_vector_size`` Base type vector size + ``DW_AT_LLVM_active_lane`` SIMD or SIMT active lanes (see :ref:`amdgpu-dwarf-low-level-information`) + ``DW_AT_LLVM_augmentation`` Compilation unit augmentation string (see :ref:`amdgpu-dwarf-full-and-partial-compilation-unit-entries`) + ``DW_AT_LLVM_lane_pc`` SIMD or SIMT lane program location (see :ref:`amdgpu-dwarf-low-level-information`) + ``DW_AT_LLVM_lanes`` SIMD or SIMT thread lane count (see :ref:`amdgpu-dwarf-low-level-information`) + ``DW_AT_LLVM_vector_size`` Base type vector size (see :ref:`amdgpu-dwarf-base-type-entries`) =========================== ==================================== .. _amdgpu-dwarf-expressions: -DWARF Expressions -~~~~~~~~~~~~~~~~~ +A.2.5 DWARF Expressions +~~~~~~~~~~~~~~~~~~~~~~~ .. note:: @@ -506,8 +664,8 @@ A DWARF expression can either be encoded as an operation expression (see .. _amdgpu-dwarf-expression-evaluation-context: -DWARF Expression Evaluation Context -+++++++++++++++++++++++++++++++++++ +A.2.5.1 DWARF Expression Evaluation Context ++++++++++++++++++++++++++++++++++++++++++++ A DWARF expression is evaluated in a context that can include a number of context elements. If multiple context elements are specified then they must be @@ -526,9 +684,9 @@ elements that can be specified are: It is required for operations that are related to target architecture threads. - *For example, the* ``DW_OP_form_tls_address`` *operation and* - ``DW_OP_LLVM_form_aspace_address`` *operation when given an address space that - is thread specific.* + *For example, the* ``DW_OP_regval_type`` *operation, or the* + ``DW_OP_form_tls_address`` *and* ``DW_OP_LLVM_form_aspace_address`` + *operations when given an address space that is thread specific.* *A current lane* @@ -618,10 +776,10 @@ elements that can be specified are: *Note that this compilation unit may not be the same as the compilation unit determined from the loaded code object corresponding to the current program - location. For example, the evaluation of the expression E associated with a - ``DW_AT_location`` attribute of the debug information entry operand of the - ``DW_OP_call*`` operations is evaluated with the compilation unit that - contains E and not the one that contains the ``DW_OP_call*`` operation + location. For example, the evaluation of the expression E associated with a* + ``DW_AT_location`` *attribute of the debug information entry operand of the* + ``DW_OP_call*`` *operations is evaluated with the compilation unit that + contains E and not the one that contains the* ``DW_OP_call*`` *operation expression.* *A current target architecture* @@ -641,7 +799,7 @@ elements that can be specified are: must be the same as the target architecture of the current thread. * If the current compilation unit is specified, then the current target - architecture default address space address size must be the same as he + architecture default address space address size must be the same as the ``address_size`` field in the header of the current compilation unit and any associated entry in the ``.debug_aranges`` section. @@ -651,7 +809,7 @@ elements that can be specified are: corresponding to the current program location. * If the current program location is specified, then the current target - architecture default address space address size must be the same as he + architecture default address space address size must be the same as the ``address_size`` field in the header of any entry corresponding to the current program location in the ``.debug_addr``, ``.debug_line``, ``.debug_rnglists``, ``.debug_rnglists.dwo``, ``.debug_loclists``, and @@ -666,9 +824,8 @@ elements that can be specified are: It is required for the ``DW_OP_push_object_address`` operation. *For example, the* ``DW_AT_data_location`` *attribute on type debug - information entries specifies the the program object corresponding to a - runtime descriptor as the current object when it evaluates its associated - expression.* + information entries specifies the program object corresponding to a runtime + descriptor as the current object when it evaluates its associated expression.* The result is undefined if the location descriptor is invalid (see :ref:`amdgpu-dwarf-location-description`). @@ -689,7 +846,7 @@ elements that can be specified are: If the evaluation requires a context element that is not specified, then the result of the evaluation is an error. -*A DWARF expression for the location description may be able to be evaluated +*A DWARF expression for a location description may be able to be evaluated without a thread, lane, call frame, program location, or architecture context. For example, the location of a global variable may be able to be evaluated without such context. If the expression evaluates with an error then it may @@ -707,8 +864,8 @@ not match. .. _amdgpu-dwarf-expression-value: -DWARF Expression Value -++++++++++++++++++++++ +A.2.5.2 DWARF Expression Value +++++++++++++++++++++++++++++++ A value has a type and a literal value. It can represent a literal value of any supported base type of the target architecture. The base type specifies the @@ -744,8 +901,8 @@ inclusive range ``DW_ATE_lo_user`` to ``DW_ATE_hi_user``. .. _amdgpu-dwarf-location-description: -DWARF Location Description -++++++++++++++++++++++++++ +A.2.5.3 DWARF Location Description +++++++++++++++++++++++++++++++++++ *Debugging information must provide consumers a way to find the location of program variables, determine the bounds of dynamic arrays and strings, and @@ -799,16 +956,19 @@ There are five kinds of location storage: provided by the operations. *Location descriptions are a language independent representation of addressing -rules. They are created using DWARF operation expressions of arbitrary -complexity. They can be the result of evaluating a debugger information entry -attribute that specifies an operation expression. In this usage they can -describe the location of an object as long as its lifetime is either static or -the same as the lexical block (see DWARF Version 5 section 3.5) that owns it, -and it does not move during its lifetime. They can be the result of evaluating a -debugger information entry attribute that specifies a location list expression. -In this usage they can describe the location of an object that has a limited -lifetime, changes its location during its lifetime, or has multiple locations -over part or all of its lifetime.* +rules.* + +* *They can be the result of evaluating a debugger information entry attribute + that specifies an operation expression of arbitrary complexity. In this usage + they can describe the location of an object as long as its lifetime is either + static or the same as the lexical block (see + :ref:`amdgpu-dwarf-lexical-block-entries`) that owns it, and it does not move + during its lifetime.* + +* *They can be the result of evaluating a debugger information entry attribute + that specifies a location list expression. In this usage they can describe the + location of an object that has a limited lifetime, changes its location during + its lifetime, or has multiple locations over part or all of its lifetime.* If a location description has more than one single location description, the DWARF expression is ill-formed if the object value held in each single location @@ -884,8 +1044,8 @@ their program locations change.* .. _amdgpu-dwarf-operation-expressions: -DWARF Operation Expressions -+++++++++++++++++++++++++++ +A.2.5.4 DWARF Operation Expressions ++++++++++++++++++++++++++++++++++++ An operation expression is comprised of a stream of operations, each consisting of an opcode followed by zero or more operands. The number of operands is @@ -963,7 +1123,7 @@ An operation expression is encoded as a byte block with some form of prefix that specifies the byte count. It can be used: * as the value of a debugging information entry attribute that is encoded using - class ``exprloc`` (see DWARF Version 5 section 7.5.5), + class ``exprloc`` (see :ref:`amdgpu-dwarf-classes-and-forms`), * as the operand to certain operation expression operations, @@ -975,8 +1135,12 @@ specifies the byte count. It can be used: .. _amdgpu-dwarf-stack-operations: -Stack Operations -################ +A.2.5.4.1 Stack Operations +########################## + +.. note:: + + This section replaces DWARF Version 5 section 2.5.1.3. The following operations manipulate the DWARF stack. Operations that index the stack assume that the top of the stack (most recently added entry) has index 0. @@ -1018,7 +1182,7 @@ expression is ill-formed. ``DW_OP_over`` pushes a copy of the entry with index 1. - *This is equivalent to a ``DW_OP_pick 1`` operation.* + *This is equivalent to a* ``DW_OP_pick 1`` *operation.* 5. ``DW_OP_swap`` @@ -1034,8 +1198,12 @@ expression is ill-formed. .. _amdgpu-dwarf-control-flow-operations: -Control Flow Operations -####################### +A.2.5.4.2 Control Flow Operations +################################# + +.. note:: + + This section replaces DWARF Version 5 section 2.5.1.5. The following operations provide simple control of the flow of a DWARF operation expression. @@ -1097,7 +1265,7 @@ expression. relative to the beginning of the ``.debug_info`` section that contains the current compilation unit. D may not be in the current compilation unit. - .. note: + .. note:: DWARF Version 5 states that DR can be an offset in a ``.debug_info`` section other than the one that contains the current compilation unit. It @@ -1176,14 +1344,14 @@ expression. entry is to push just one location description on the stack. That location description may have more than one single location description. - The previous rule for ``exprloc`` also has the same problem as normally + The previous rule for ``exprloc`` also has the same problem, as normally a variable or formal parameter location expression may leave multiple entries on the stack and only return the top entry. GDB implements ``DW_OP_call*`` by always executing E on the same stack. If the location list has multiple matching entries, it simply picks the first one and ignores the rest. This seems fundamentally at odds with - the desire to supporting multiple places for variables. + the desire to support multiple places for variables. So, it feels like ``DW_OP_call*`` should both support pushing a location description on the stack for a variable or formal parameter, and also @@ -1234,8 +1402,8 @@ expression. *This allows a call operation to be used to compute the location description for any variable or formal parameter regardless of whether the - producer has optimized it to a constant. This is consistent with the - ``DW_OP_implicit_pointer`` operation.* + producer has optimized it to a constant. This is consistent with the* + ``DW_OP_implicit_pointer`` *operation.* .. note:: @@ -1264,12 +1432,12 @@ expression. .. _amdgpu-dwarf-value-operations: -Value Operations -################ +A.2.5.4.3 Value Operations +########################## This section describes the operations that push values on the stack. -Each value stack entry has a type and a literal value and can represent a +Each value stack entry has a type and a literal value. It can represent a literal value of any supported base type of the target architecture. The base type specifies the size, encoding, and endianity of the literal value. @@ -1277,8 +1445,12 @@ The base type of value stack entries can be the distinguished generic type. .. _amdgpu-dwarf-literal-operations: -Literal Operations -^^^^^^^^^^^^^^^^^^ +A.2.5.4.3.1 Literal Operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + + This section replaces DWARF Version 5 section 2.5.1.1. The following operations all push a literal value onto the DWARF stack. @@ -1325,7 +1497,7 @@ size and the low-order bits used. link-time relocation but should not be interpreted by the consumer as a relocatable address (for example, offsets to thread-local storage).* -9. ``DW_OP_const_type`` +7. ``DW_OP_const_type`` ``DW_OP_const_type`` has three operands. The first is an unsigned LEB128 integer DR that represents the byte offset of a debugging information entry @@ -1346,7 +1518,7 @@ size and the low-order bits used. operation can be parsed easily without reference to the* ``.debug_info`` *section.* -10. ``DW_OP_LLVM_push_lane`` *New* +8. ``DW_OP_LLVM_push_lane`` *New* ``DW_OP_LLVM_push_lane`` pushes the target architecture lane identifier of the current lane as a value with the generic type. @@ -1357,8 +1529,8 @@ size and the low-order bits used. .. _amdgpu-dwarf-arithmetic-logical-operations: -Arithmetic and Logical Operations -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +A.2.5.4.3.2 Arithmetic and Logical Operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. note:: @@ -1366,8 +1538,8 @@ Arithmetic and Logical Operations .. _amdgpu-dwarf-type-conversions-operations: -Type Conversion Operations -^^^^^^^^^^^^^^^^^^^^^^^^^^ +A.2.5.4.3.3 Type Conversion Operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. note:: @@ -1375,8 +1547,13 @@ Type Conversion Operations .. _amdgpu-dwarf-general-operations: -Special Value Operations -^^^^^^^^^^^^^^^^^^^^^^^^ +A.2.5.4.3.4 Special Value Operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + + This section replaces parts of DWARF Version 5 sections 2.5.1.2, 2.5.1.3, and + 2.5.1.7. There are these special value operations currently defined: @@ -1511,8 +1688,8 @@ There are these special value operations currently defined: undefined location storage or the offset of any bit exceeds the size of the location storage LS specified by any single location description SL of L. - See :ref:`amdgpu-dwarf-implicit-location-descriptions` for special rules - concerning implicit location descriptions created by the + See :ref:`amdgpu-dwarf-implicit-location-description-operations` for special + rules concerning implicit location descriptions created by the ``DW_OP_implicit_pointer`` and ``DW_OP_LLVM_implicit_aspace_pointer`` operations. @@ -1559,8 +1736,8 @@ There are these special value operations currently defined: represents a target architecture specific address space identifier AS. The operation is equivalent to performing ``DW_OP_swap; - DW_OP_LLVM_form_aspace_address; DW_OP_deref_type S R``. The value V - retrieved is left on the stack with the type D. + DW_OP_LLVM_form_aspace_address; DW_OP_deref_type S DR``. The value V + retrieved is left on the stack with the type T. *This operation is deprecated as the* ``DW_OP_LLVM_form_aspace_address`` *operation can be used and provides greater expressiveness.* @@ -1585,17 +1762,17 @@ There are these special value operations currently defined: frame information (see :ref:`amdgpu-dwarf-call-frame-information`). If the result of E is a location description L (see - :ref:`amdgpu-dwarf-register-location-descriptions`), and the last operation - executed by E is a ``DW_OP_reg*`` for register R with a target architecture - specific base type of T, then the contents of the register are retrieved as - if a ``DW_OP_deref_type DR`` operation was performed where DR is the offset - of a hypothetical debug information entry in the current compilation unit - for T. The resulting value V s pushed on the stack. + :ref:`amdgpu-dwarf-register-location-description-operations`), and the last + operation executed by E is a ``DW_OP_reg*`` for register R with a target + architecture specific base type of T, then the contents of the register are + retrieved as if a ``DW_OP_deref_type DR`` operation was performed where DR + is the offset of a hypothetical debug information entry in the current + compilation unit for T. The resulting value V s pushed on the stack. *Using* ``DW_OP_reg*`` *provides a more compact form for the case where the value was in a register on entry to the subprogram.* - .. note: + .. note:: It is unclear how this provides a more compact expression, as ``DW_OP_regval_type`` could be used which is marginally larger. @@ -1621,14 +1798,20 @@ There are these special value operations currently defined: .. _amdgpu-dwarf-location-description-operations: -Location Description Operations -############################### +A.2.5.4.4 Location Description Operations +######################################### This section describes the operations that push location descriptions on the stack. -General Location Description Operations -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. _amdgpu-dwarf-general-location-description-operations: + +A.2.5.4.4.1 General Location Description Operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + + This section replaces part of DWARF Version 5 section 2.5.1.3. 1. ``DW_OP_LLVM_offset`` *New* @@ -1687,15 +1870,33 @@ General Location Description Operations expression evaluation.* *This operation provides explicit functionality (especially for arrays - involving descriptions) that is analogous to the implicit push of the base - location description of a structure prior to evaluation of a - ``DW_AT_data_member_location`` to access a data member of a structure.* + involving descriptors) that is analogous to the implicit push of the base + location description of a structure prior to evaluation of a* + ``DW_AT_data_member_location`` *to access a data member of a structure.* .. note:: This operation could be removed and the object location description specified as the initial stack as for ``DW_AT_data_member_location``. + Or this operation could be used instead of needing to specify an initial + stack. The latter approach is more composable as access to the object may + be needed at any point of the expression, and passing it as the initial + stack requires the entire expression to be aware where on the stack it is. + If this were done, ``DW_AT_use_location`` would require a + ``DW_OP_push_object2_address`` operation for the second object. + + Or a more general way to pass an arbitrary number of arguments in and an + operation to get the Nth one such as ``DW_OP_arg N``. A vector of + arguments would then be passed in the expression context rather than an + initial stack. This could also resolve the issues with ``DW_OP_call*`` by + allowing a specific number of arguments passed in and returned to be + specified. The ``DW_OP_call*`` operation could then always execute on a + separate stack: the number of arguments would be specified in a new call + operation and taken from the callers stack, and similarly the number of + return results specified and copied from the called stack back to the + callee stack when the called expression was complete. + The only attribute that specifies a current object is ``DW_AT_data_location`` so the non-normative text seems to overstate how this is being used. Or are there other attributes that need to state they @@ -1717,8 +1918,12 @@ General Location Description Operations .. _amdgpu-dwarf-undefined-location-description-operations: -Undefined Location Description Operations -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +A.2.5.4.4.2 Undefined Location Description Operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + + This section replaces DWARF Version 5 section 2.6.1.1.1. *The undefined location storage represents a piece or all of an object that is present in the source but not in the object code (perhaps due to optimization). @@ -1739,8 +1944,13 @@ in a part with all undefined bits. .. _amdgpu-dwarf-memory-location-description-operations: -Memory Location Description Operations -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +A.2.5.4.4.3 Memory Location Description Operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + + This section replaces parts of DWARF Version 5 section 2.5.1.1, 2.5.1.2, + 2.5.1.3, and 2.6.1.1.2. Each of the target architecture specific address spaces has a corresponding memory location storage that denotes the linear addressable memory of that @@ -1796,10 +2006,9 @@ bit offset equal to V scaled by 8 (the byte size). description L with a one memory location description SL. If the type size of V is less than the generic type size, then the value V is zero extended to the size of the generic type. The least significant generic type size bits - are treated as a twos-complement unsigned value to be used as an address A. - SL specifies memory location storage corresponding to the target - architecture default address space with a bit offset equal to A scaled by 8 - (the byte size). + are treated as an unsigned value to be used as an address A. SL specifies + memory location storage corresponding to the target architecture default + address space with a bit offset equal to A scaled by 8 (the byte size). The implicit conversion could also be defined as target architecture specific. For example, GDB checks if V is an integral type. If it is not it gives an @@ -1812,7 +2021,7 @@ If a stack entry is required to be a location description, but it is an implicit pointer value IPV with the target architecture default address space, then it is implicitly converted to a location description with one single location description specified by IPV. See -:ref:`amdgpu-dwarf-implicit-location-descriptions`. +:ref:`amdgpu-dwarf-implicit-location-description-operations`. .. note:: @@ -1869,8 +2078,8 @@ type. The address size S is defined as the address bit size of the target architecture specific address space that corresponds to AS. - A is adjusted to S bits by zero extending if necessary, and then treating the - least significant S bits as a twos-complement unsigned value A'. + A is adjusted to S bits by zero extending if necessary, and then treating + the least significant S bits as an unsigned value A'. It pushes a location description L with one memory location description SL on the stack. SL specifies the memory location storage LS that corresponds @@ -1890,8 +2099,8 @@ type. The DWARF expression is ill-formed if AS is not one of the values defined by the target architecture specific ``DW_ASPACE_*`` values. - See :ref:`amdgpu-dwarf-implicit-location-descriptions` for special rules - concerning implicit pointer values produced by dereferencing implicit + See :ref:`amdgpu-dwarf-implicit-location-description-operations` for special + rules concerning implicit pointer values produced by dereferencing implicit location descriptions created by the ``DW_OP_implicit_pointer`` and ``DW_OP_LLVM_implicit_aspace_pointer`` operations. @@ -1950,7 +2159,7 @@ type. The location description L for the *frame base* of the current subprogram is obtained from the ``DW_AT_frame_base`` attribute of the debugger information entry corresponding to the current subprogram as described in - :ref:`amdgpu-dwarf-debugging-information-entry-attributes`. + :ref:`amdgpu-dwarf-low-level-information`. The location description L is updated as if the ``DW_OP_LLVM_offset_uconst B`` operation was applied. The updated L is pushed on the stack. @@ -2010,10 +2219,14 @@ type. Could also consider adding ``DW_OP_aspace_breg0, DW_OP_aspace_breg1, ..., DW_OP_aspace_bref31`` which would save encoding size. -.. _amdgpu-dwarf-register-location-descriptions: +.. _amdgpu-dwarf-register-location-description-operations: + +A.2.5.4.4.4 Register Location Description Operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. note:: -Register Location Description Operations -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + This section replaces DWARF Version 5 section 2.6.1.1.3. There is a register location storage that corresponds to each of the target architecture registers. The size of each register location storage corresponds @@ -2062,10 +2275,14 @@ register, it is necessary to use* ``DW_OP_regval_type``\ *, use one of the* ``DW_OP_breg*`` *register-based addressing operations, or use* ``DW_OP_deref*`` *on a register location description.* -.. _amdgpu-dwarf-implicit-location-descriptions: +.. _amdgpu-dwarf-implicit-location-description-operations: -Implicit Location Description Operations -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +A.2.5.4.4.5 Implicit Location Description Operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + + This section replaces DWARF Version 5 section 2.6.1.1.4. Implicit location storage represents a piece or all of an object which has no actual location in the program but whose contents are nonetheless known, either @@ -2103,8 +2320,8 @@ implicit storage value starting at the bit offset. location description specifies the actual value of the object, rather than specifying the memory or register storage that holds the value.* - See :ref:`amdgpu-dwarf-implicit-location-descriptions` for special rules - concerning implicit pointer values produced by dereferencing implicit + See :ref:`amdgpu-dwarf-implicit-location-description-operations` for special + rules concerning implicit pointer values produced by dereferencing implicit location descriptions created by the ``DW_OP_implicit_pointer`` and ``DW_OP_LLVM_implicit_aspace_pointer`` operations. @@ -2218,7 +2435,7 @@ implicit storage value starting at the bit offset. *The restrictions on how an implicit pointer location description created by* ``DW_OP_implicit_pointer`` *and* ``DW_OP_LLVM_aspace_implicit_pointer`` *can be used are to simplify the DWARF consumer. Similarly, for an implicit - pointer value created by* ``DW_OP_deref*`` *and* ``DW_OP_stack_value``\ .* + pointer value created by* ``DW_OP_deref*`` *and* ``DW_OP_stack_value``\ *.* 4. ``DW_OP_LLVM_aspace_implicit_pointer`` *New* @@ -2259,13 +2476,17 @@ object pointed to by that pointer object.* ``DW_AT_location`` *or* ``DW_AT_const_value`` *attribute (for example,* ``DW_TAG_dwarf_procedure``\ *). By using E*\ :sub:`2`\ *, a consumer can reconstruct the value of the object when asked to dereference the pointer -described by E*\ :sub:`1` *which contains the* ``DW_OP_implicit_pointer`` or +described by E*\ :sub:`1` *which contains the* ``DW_OP_implicit_pointer`` *or* ``DW_OP_LLVM_aspace_implicit_pointer`` *operation.* .. _amdgpu-dwarf-composite-location-description-operations: -Composite Location Description Operations -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +A.2.5.4.4.6 Composite Location Description Operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + + This section replaces DWARF Version 5 section 2.6.1.2. A composite location storage represents an object or value which may be contained in part of another location storage or contained in parts of more @@ -2480,8 +2701,12 @@ compatible with the definitions in DWARF Version 5.* .. _amdgpu-dwarf-location-list-expressions: -DWARF Location List Expressions -+++++++++++++++++++++++++++++++ +A.2.5.5 DWARF Location List Expressions ++++++++++++++++++++++++++++++++++++++++ + +.. note:: + + This section replaces DWARF Version 5 section 2.6.2. *To meet the needs of recent computer architectures and optimization techniques, debugging information must be able to describe the location of an object whose @@ -2573,10 +2798,10 @@ The result of the evaluation of a DWARF location list expression is: A location list expression can only be used as the value of a debugger information entry attribute that is encoded using class ``loclist`` or -``loclistsptr`` (see DWARF Version 5 section 7.5.5). The value of the attribute -provides an index into a separate object file section called ``.debug_loclists`` -or ``.debug_loclists.dwo`` (for split DWARF object files) that contains the -location list entries. +``loclistsptr`` (see :ref:`amdgpu-dwarf-classes-and-forms`). The value of the +attribute provides an index into a separate object file section called +``.debug_loclists`` or ``.debug_loclists.dwo`` (for split DWARF object files) +that contains the location list entries. A ``DW_OP_call*`` and ``DW_OP_implicit_pointer`` operation can be used to specify a debugger information entry attribute that has a location list @@ -2596,8 +2821,8 @@ previously needed for location list expressions.* .. _amdgpu-dwarf-segment_addresses: -Segmented Addresses -~~~~~~~~~~~~~~~~~~~ +A.2.12 Segmented Addresses +~~~~~~~~~~~~~~~~~~~~~~~~~~ .. note:: @@ -2798,69 +3023,106 @@ DWARF address space identifiers are used by: operations may be needed. The legal casts between address classes may need to be defined on a per language address class basis. -.. _amdgpu-dwarf-debugging-information-entry-attributes: - -Debugging Information Entry Attributes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A.3 Program Scope Entries +------------------------- .. note:: This section provides changes to existing debugger information entry - attributes and defines attributes added by these extensions. These would be - incorporated into the appropriate DWARF Version 5 chapter 2 sections. + attributes. These would be incorporated into the corresponding DWARF Version 5 + chapter 3 sections. -1. ``DW_AT_location`` +A.3.1 Unit Entries +~~~~~~~~~~~~~~~~~~ - Any debugging information entry describing a data object (which includes - variables and parameters) or common blocks may have a ``DW_AT_location`` - attribute, whose value is a DWARF expression E. +.. _amdgpu-dwarf-full-and-partial-compilation-unit-entries: - The result of the attribute is obtained by evaluating E with a context that - has a result kind of a location description, an unspecified object, the - compilation unit that contains E, an empty initial stack, and other context - elements corresponding to the source language thread of execution upon which - the user is focused, if any. The result of the evaluation is the location - description of the base of the data object. +A.3.1.1 Full and Partial Compilation Unit Entries ++++++++++++++++++++++++++++++++++++++++++++++++++ - See :ref:`amdgpu-dwarf-control-flow-operations` for special evaluation rules - used by the ``DW_OP_call*`` operations. +.. note:: - .. note:: + This augments DWARF Version 5 section 3.1.1 and Table 3.1. - Delete the description of how the ``DW_OP_call*`` operations evaluate a - ``DW_AT_location`` attribute as that is now described in the operations. +Additional language codes defined for use with the ``DW_AT_language`` attribute +are defined in :ref:`amdgpu-dwarf-language-names-table`. - .. note:: +.. table:: Language Names + :name: amdgpu-dwarf-language-names-table - See the discussion about the ``DW_AT_location`` attribute in the - ``DW_OP_call*`` operation. Having each attribute only have a single - purpose and single execution semantics seems desirable. It makes it easier - for the consumer that no longer have to track the context. It makes it - easier for the producer as it can rely on a single semantics for each - attribute. + ==================== ============================= + Language Name Meaning + ==================== ============================= + ``DW_LANG_LLVM_HIP`` HIP Language. + ==================== ============================= - For that reason, limiting the ``DW_AT_location`` attribute to only - supporting evaluating the location description of an object, and using a - different attribute and encoding class for the evaluation of DWARF - expression *procedures* on the same operation expression stack seems - desirable. +The HIP language [:ref:`HIP `] can be supported by extending +the C++ language. -2. ``DW_AT_const_value`` +.. note:: - .. note:: + The following new attribute is added. - Could deprecate using the ``DW_AT_const_value`` attribute for - ``DW_TAG_variable`` or ``DW_TAG_formal_parameter`` debugger information - entries that have been optimized to a constant. Instead, - ``DW_AT_location`` could be used with a DWARF expression that produces an - implicit location description now that any location description can be - used within a DWARF expression. This allows the ``DW_OP_call*`` operations - to be used to push the location description of any variable regardless of - how it is optimized. +1. A ``DW_TAG_compile_unit`` debugger information entry for a compilation unit + may have a ``DW_AT_LLVM_augmentation`` attribute, whose value is an + augmentation string. -3. ``DW_AT_frame_base`` + *The augmentation string allows producers to indicate that there is + additional vendor or target specific information in the debugging + information entries. For example, this might be information about the + version of vendor specific extensions that are being used.* - A ``DW_TAG_subprogram`` or ``DW_TAG_entry_point`` debugger information entry + If not present, or if the string is empty, then the compilation unit has no + augmentation string. + + The format for the augmentation string is: + + | ``[``\ *vendor*\ ``:v``\ *X*\ ``.``\ *Y*\ [\ ``:``\ *options*\ ]\ ``]``\ * + + Where *vendor* is the producer, ``vX.Y`` specifies the major X and minor Y + version number of the extensions used, and *options* is an optional string + providing additional information about the extensions. The version number + must conform to semantic versioning [:ref:`SEMVER `]. + The *options* string must not contain the "\ ``]``\ " character. + + For example: + + :: + + [abc:v0.0][def:v1.2:feature-a=on,feature-b=3] + +A.3.3 Subroutine and Entry Point Entries +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _amdgpu-dwarf-low-level-information: + +A.3.3.5 Low-Level Information ++++++++++++++++++++++++++++++ + +1. A ``DW_TAG_subprogram``, ``DW_TAG_inlined_subroutine``, or + ``DW_TAG_entry_point`` debugger information entry may have a + ``DW_AT_return_addr`` attribute, whose value is a DWARF expression E. + + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a location description, an unspecified object, the + compilation unit that contains E, an empty initial stack, and other context + elements corresponding to the source language thread of execution upon which + the user is focused, if any. The result of the evaluation is the location + description L of the place where the return address for the current call + frame's subprogram or entry point is stored. + + The DWARF is ill-formed if L is not comprised of one memory location + description for one of the target architecture specific address spaces. + + .. note:: + + It is unclear why ``DW_TAG_inlined_subroutine`` has a + ``DW_AT_return_addr`` attribute but not a ``DW_AT_frame_base`` or + ``DW_AT_static_link`` attribute. Seems it would either have all of them or + none. Since inlined subprograms do not have a call frame it seems they + would have none of these attributes. + +2. A ``DW_TAG_subprogram`` or ``DW_TAG_entry_point`` debugger information entry may have a ``DW_AT_frame_base`` attribute, whose value is a DWARF expression E. @@ -2874,12 +3136,12 @@ Debugging Information Entry Attributes resulting location description L is not comprised of one single location description SL. - If SL a register location description for register R, then L is replaced + If SL is a register location description for register R, then L is replaced with the result of evaluating a ``DW_OP_bregx R, 0`` operation. This computes the frame base memory location description in the target architecture default address space. - *This allows the more compact* ``DW_OPreg*`` *to be used instead of* + *This allows the more compact* ``DW_OP_reg*`` *to be used instead of* ``DW_OP_breg* 0``\ *.* .. note:: @@ -2897,120 +3159,7 @@ Debugging Information Entry Attributes *Typically, E will use the* ``DW_OP_call_frame_cfa`` *operation or be a stack pointer register plus or minus some offset.* -4. ``DW_AT_data_member_location`` - - For a ``DW_AT_data_member_location`` attribute there are two cases: - - 1. If the attribute is an integer constant B, it provides the offset in - bytes from the beginning of the containing entity. - - The result of the attribute is obtained by evaluating a - ``DW_OP_LLVM_offset B`` operation with an initial stack comprising the - location description of the beginning of the containing entity. The - result of the evaluation is the location description of the base of the - member entry. - - *If the beginning of the containing entity is not byte aligned, then the - beginning of the member entry has the same bit displacement within a - byte.* - - 2. Otherwise, the attribute must be a DWARF expression E which is evaluated - with a context that has a result kind of a location description, an - unspecified object, the compilation unit that contains E, an initial - stack comprising the location description of the beginning of the - containing entity, and other context elements corresponding to the - source language thread of execution upon which the user is focused, if - any. The result of the evaluation is the location description of the - base of the member entry. - - .. note:: - - The beginning of the containing entity can now be any location - description, including those with more than one single location - description, and those with single location descriptions that are of any - kind and have any bit offset. - -5. ``DW_AT_use_location`` - - The ``DW_TAG_ptr_to_member_type`` debugging information entry has a - ``DW_AT_use_location`` attribute whose value is a DWARF expression E. It is - used to compute the location description of the member of the class to which - the pointer to member entry points. - - *The method used to find the location description of a given member of a - class, structure, or union is common to any instance of that class, - structure, or union and to any instance of the pointer to member type. The - method is thus associated with the pointer to member type, rather than with - each object that has a pointer to member type.* - - The ``DW_AT_use_location`` DWARF expression is used in conjunction with the - location description for a particular object of the given pointer to member - type and for a particular structure or class instance. - - The result of the attribute is obtained by evaluating E with a context that - has a result kind of a location description, an unspecified object, the - compilation unit that contains E, an initial stack comprising two entries, - and other context elements corresponding to the source language thread of - execution upon which the user is focused, if any. The first stack entry is - the value of the pointer to member object itself. The second stack entry is - the location description of the base of the entire class, structure, or - union instance containing the member whose location is being calculated. The - result of the evaluation is the location description of the member of the - class to which the pointer to member entry points. - -6. ``DW_AT_data_location`` - - The ``DW_AT_data_location`` attribute may be used with any type that - provides one or more levels of hidden indirection and/or run-time parameters - in its representation. Its value is a DWARF operation expression E which - computes the location description of the data for an object. When this - attribute is omitted, the location description of the data is the same as - the location description of the object. - - The result of the attribute is obtained by evaluating E with a context that - has a result kind of a location description, an object that is the location - description of the data descriptor, the compilation unit that contains E, an - empty initial stack, and other context elements corresponding to the source - language thread of execution upon which the user is focused, if any. The - result of the evaluation is the location description of the base of the - member entry. - - *E will typically involve an operation expression that begins with a* - ``DW_OP_push_object_address`` *operation which loads the location - description of the object which can then serve as a description in - subsequent calculation.* - - .. note:: - - Since ``DW_AT_data_member_location``, ``DW_AT_use_location``, and - ``DW_AT_vtable_elem_location`` allow both operation expressions and - location list expressions, why does ``DW_AT_data_location`` not allow - both? In all cases they apply to data objects so less likely that - optimization would cause different operation expressions for different - program location ranges. But if supporting for some then should be for - all. - - It seems odd this attribute is not the same as - ``DW_AT_data_member_location`` in having an initial stack with the - location description of the object since the expression has to need it. - -7. ``DW_AT_vtable_elem_location`` - - An entry for a virtual function also has a ``DW_AT_vtable_elem_location`` - attribute whose value is a DWARF expression E. - - The result of the attribute is obtained by evaluating E with a context that - has a result kind of a location description, an unspecified object, the - compilation unit that contains E, an initial stack comprising the location - description of the object of the enclosing type, and other context elements - corresponding to the source language thread of execution upon which the user - is focused, if any. The result of the evaluation is the location description - of the slot for the function within the virtual function table for the - enclosing class. - -8. ``DW_AT_static_link`` - - If a ``DW_TAG_subprogram`` or ``DW_TAG_entry_point`` debugger information +3. If a ``DW_TAG_subprogram`` or ``DW_TAG_entry_point`` debugger information entry is lexically nested, it may have a ``DW_AT_static_link`` attribute, whose value is a DWARF expression E. @@ -3027,35 +3176,86 @@ Debugging Information Entry Attributes The DWARF is ill-formed if L is is not comprised of one memory location description for one of the target architecture specific address spaces. -9. ``DW_AT_return_addr`` + .. note:: + + The following new attributes are added. - A ``DW_TAG_subprogram``, ``DW_TAG_inlined_subroutine``, or +4. For languages that are implemented using a SIMD or SIMT execution model, a + ``DW_TAG_subprogram``, ``DW_TAG_inlined_subroutine``, or ``DW_TAG_entry_point`` debugger information entry may have a - ``DW_AT_return_addr`` attribute, whose value is a DWARF expression E. + ``DW_AT_LLVM_lanes`` attribute whose value is an integer constant that is + the number of lanes per thread. This is the static number of lanes per + thread. It is not the dynamic number of lanes with which the thread was + initiated, for example, due to smaller or partial work-groups. + + If not present, the default value of 1 is used. + + The DWARF is ill-formed if the value is 0. + +5. For languages that are implemented using a SIMD or SIMT execution model, a + ``DW_TAG_subprogram``, ``DW_TAG_inlined_subroutine``, or + ``DW_TAG_entry_point`` debugging information entry may have a + ``DW_AT_LLVM_lane_pc`` attribute whose value is a DWARF expression E. The result of the attribute is obtained by evaluating E with a context that has a result kind of a location description, an unspecified object, the compilation unit that contains E, an empty initial stack, and other context elements corresponding to the source language thread of execution upon which - the user is focused, if any. The result of the evaluation is the location - description L of the place where the return address for the current call - frame's subprogram or entry point is stored. + the user is focused, if any. - The DWARF is ill-formed if L is not comprised of one memory location - description for one of the target architecture specific address spaces. + The resulting location description L is for a thread lane count sized vector + of generic type elements. The thread lane count is the value of the + ``DW_AT_LLVM_lanes`` attribute. Each element holds the conceptual program + location of the corresponding lane, where the least significant element + corresponds to the first target architecture specific lane identifier and so + forth. If the lane was not active when the current subprogram was called, + its element is an undefined location description. - .. note:: + ``DW_AT_LLVM_lane_pc`` *allows the compiler to indicate conceptually where + each lane of a SIMT thread is positioned even when it is in divergent + control flow that is not active.* - It is unclear why ``DW_TAG_inlined_subroutine`` has a - ``DW_AT_return_addr`` attribute but not a ``DW_AT_frame_base`` or - ``DW_AT_static_link`` attribute. Seems it would either have all of them or - none. Since inlined subprograms do not have a call frame it seems they - would have none of these attributes. + *Typically, the result is a location description with one composite location + description with each part being a location description with either one + undefined location description or one memory location description.* + + If not present, the thread is not being used in a SIMT manner, and the + thread's current program location is used. + +6. For languages that are implemented using a SIMD or SIMT execution model, a + ``DW_TAG_subprogram``, ``DW_TAG_inlined_subroutine``, or + ``DW_TAG_entry_point`` debugger information entry may have a + ``DW_AT_LLVM_active_lane`` attribute whose value is a DWARF expression E. + + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a value, an unspecified object, the compilation unit + that contains E, an empty initial stack, and other context elements + corresponding to the source language thread of execution upon which the user + is focused, if any. -10. ``DW_AT_call_value``, ``DW_AT_call_data_location``, and - ``DW_AT_call_data_value`` + The DWARF is ill-formed if the resulting value V is not an integral value. - A ``DW_TAG_call_site_parameter`` debugger information entry may have a + The resulting V is a bit mask of active lanes for the current program + location. The N\ :sup:`th` least significant bit of the mask corresponds to + the N\ :sup:`th` lane. If the bit is 1 the lane is active, otherwise it is + inactive. + + *Some targets may update the target architecture execution mask for regions + of code that must execute with different sets of lanes than the current + active lanes. For example, some code must execute with all lanes made + temporarily active.* ``DW_AT_LLVM_active_lane`` *allows the compiler to + provide the means to determine the source language active lanes.* + + If not present and ``DW_AT_LLVM_lanes`` is greater than 1, then the target + architecture execution mask is used. + +A.3.4 Call Site Entries and Parameters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A.3.4.2 Call Site Parameters +++++++++++++++++++++++++++++ + +1. A ``DW_TAG_call_site_parameter`` debugger information entry may have a ``DW_AT_call_value`` attribute, whose value is a DWARF operation expression E\ :sub:`1`\ . @@ -3084,6 +3284,13 @@ Debugging Information Entry Attributes :sub:`2` would just be a ``DW_OP_push_object_address``, then the ``DW_AT_call_data_location`` attribute may be omitted. + .. note:: + + The DWARF Version 5 implies that `DW_OP_push_object_address` may be used + but does not state what object must be specified in the context. Either + `DW_OP_push_object_address` cannot be used, or the object to be passed in + the context must be defined. + The value of the ``DW_AT_call_data_value`` attribute is obtained by evaluating E\ :sub:`3` with a context that has a result kind of a value, an unspecified object, the compilation unit that contains E, an empty initial @@ -3092,11 +3299,11 @@ Debugging Information Entry Attributes value V\ :sub:`3` is the value in L\ :sub:`2` at the time of the call made by the call site. - The result of these attributes is undefined if the current call frame is - not for the subprogram containing the ``DW_TAG_call_site_parameter`` - debugger information entry or the current program location is not for the - call site containing the ``DW_TAG_call_site_parameter`` debugger information - entry in the current call frame. + The result of these attributes is undefined if the current call frame is not + for the subprogram containing the ``DW_TAG_call_site_parameter`` debugger + information entry or the current program location is not for the call site + containing the ``DW_TAG_call_site_parameter`` debugger information entry in + the current call frame. *The consumer may have to virtually unwind to the call site (see* :ref:`amdgpu-dwarf-call-frame-information`\ *) in order to evaluate these @@ -3117,84 +3324,93 @@ Debugging Information Entry Attributes registers that have been clobbered, and clobbered memory will no longer have the value at the time of the call.* -11. ``DW_AT_LLVM_lanes`` *New* +.. _amdgpu-dwarf-lexical-block-entries: - For languages that are implemented using a SIMD or SIMT execution model, a - ``DW_TAG_subprogram``, ``DW_TAG_inlined_subroutine``, or - ``DW_TAG_entry_point`` debugger information entry may have a - ``DW_AT_LLVM_lanes`` attribute whose value is an integer constant that is - the number of lanes per thread. This is the static number of lanes per - thread. It is not the dynamic number of lanes with which the thread was - initiated, for example, due to smaller or partial work-groups. +A.3.5 Lexical Block Entries +~~~~~~~~~~~~~~~~~~~~~~~~~~~ - If not present, the default value of 1 is used. +.. note:: - The DWARF is ill-formed if the value is 0. + This section is the same as DWARF Version 5 section 3.5. -12. ``DW_AT_LLVM_lane_pc`` *New* +A.4 Data Object and Object List Entries +--------------------------------------- - For languages that are implemented using a SIMD or SIMT execution model, a - ``DW_TAG_subprogram``, ``DW_TAG_inlined_subroutine``, or - ``DW_TAG_entry_point`` debugging information entry may have a - ``DW_AT_LLVM_lane_pc`` attribute whose value is a DWARF expression E. +.. note:: + + This section provides changes to existing debugger information entry + attributes. These would be incorporated into the corresponding DWARF Version 5 + chapter 4 sections. + +A.4.1 Data Object Entries +~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. Any debugging information entry describing a data object (which includes + variables and parameters) or common blocks may have a ``DW_AT_location`` + attribute, whose value is a DWARF expression E. The result of the attribute is obtained by evaluating E with a context that has a result kind of a location description, an unspecified object, the compilation unit that contains E, an empty initial stack, and other context elements corresponding to the source language thread of execution upon which - the user is focused, if any. + the user is focused, if any. The result of the evaluation is the location + description of the base of the data object. - The resulting location description L is for a thread lane count sized vector - of generic type elements. The thread lane count is the value of the - ``DW_AT_LLVM_lanes`` attribute. Each element holds the conceptual program - location of the corresponding lane, where the least significant element - corresponds to the first target architecture specific lane identifier and so - forth. If the lane was not active when the current subprogram was called, - its element is an undefined location description. + See :ref:`amdgpu-dwarf-control-flow-operations` for special evaluation rules + used by the ``DW_OP_call*`` operations. - ``DW_AT_LLVM_lane_pc`` *allows the compiler to indicate conceptually where - each lane of a SIMT thread is positioned even when it is in divergent - control flow that is not active.* + .. note:: - *Typically, the result is a location description with one composite location - description with each part being a location description with either one - undefined location description or one memory location description.* + Delete the description of how the ``DW_OP_call*`` operations evaluate a + ``DW_AT_location`` attribute as that is now described in the operations. - If not present, the thread is not being used in a SIMT manner, and the - thread's current program location is used. + .. note:: -13. ``DW_AT_LLVM_active_lane`` *New* + See the discussion about the ``DW_AT_location`` attribute in the + ``DW_OP_call*`` operation. Having each attribute only have a single + purpose and single execution semantics seems desirable. It makes it easier + for the consumer that no longer have to track the context. It makes it + easier for the producer as it can rely on a single semantics for each + attribute. - For languages that are implemented using a SIMD or SIMT execution model, a - ``DW_TAG_subprogram``, ``DW_TAG_inlined_subroutine``, or - ``DW_TAG_entry_point`` debugger information entry may have a - ``DW_AT_LLVM_active_lane`` attribute whose value is a DWARF expression E. + For that reason, limiting the ``DW_AT_location`` attribute to only + supporting evaluating the location description of an object, and using a + different attribute and encoding class for the evaluation of DWARF + expression *procedures* on the same operation expression stack seems + desirable. - The result of the attribute is obtained by evaluating E with a context that - has a result kind of a value, an unspecified object, the compilation unit - that contains E, an empty initial stack, and other context elements - corresponding to the source language thread of execution upon which the user - is focused, if any. +2. ``DW_AT_const_value`` - The DWARF is ill-formed if the resulting value V is not an integral value. + .. note:: - The resulting V is a bit mask of active lanes for the current program - location. The N\ :sup:`th` least significant bit of the mask corresponds to - the N\ :sup:`th` lane. If the bit is 1 the lane is active, otherwise it is - inactive. + Could deprecate using the ``DW_AT_const_value`` attribute for + ``DW_TAG_variable`` or ``DW_TAG_formal_parameter`` debugger information + entries that have been optimized to a constant. Instead, + ``DW_AT_location`` could be used with a DWARF expression that produces an + implicit location description now that any location description can be + used within a DWARF expression. This allows the ``DW_OP_call*`` operations + to be used to push the location description of any variable regardless of + how it is optimized. - *Some targets may update the target architecture execution mask for regions - of code that must execute with different sets of lanes than the current - active lanes. For example, some code must execute with all lanes made - temporarily active.* ``DW_AT_LLVM_active_lane`` *allows the compiler to - provide the means to determine the source language active lanes.* +A.5 Type Entries +---------------- - If not present and ``DW_AT_LLVM_lanes`` is greater than 1, then the target - architecture execution mask is used. +.. note:: + + This section provides changes to existing debugger information entry + attributes. These would be incorporated into the corresponding DWARF Version 5 + chapter 5 sections. + +.. _amdgpu-dwarf-base-type-entries: + +A.5.1 Base Type Entries +~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: -14. ``DW_AT_LLVM_vector_size`` *New* + The following new attribute is added. - A ``DW_TAG_base_type`` debugger information entry for a base type T may have +1. A ``DW_TAG_base_type`` debugger information entry for a base type T may have a ``DW_AT_LLVM_vector_size`` attribute whose value is an integer constant that is the vector type size N. @@ -3215,76 +3431,143 @@ Debugging Information Entry Attributes would not be suitable as the type of a stack value entry. But perhaps that could be replaced by using this attribute. -15. ``DW_AT_LLVM_augmentation`` *New* +A.5.7 Structure, Union, Class and Interface Type Entries +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - A ``DW_TAG_compile_unit`` debugger information entry for a compilation unit - may have a ``DW_AT_LLVM_augmentation`` attribute, whose value is an - augmentation string. +A.5.7.3 Derived or Extended Structures, Classes and Interfaces +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - *The augmentation string allows producers to indicate that there is - additional vendor or target specific information in the debugging - information entries. For example, this might be information about the - version of vendor specific extensions that are being used.* +1. For a ``DW_AT_data_member_location`` attribute there are two cases: - If not present, or if the string is empty, then the compilation unit has no - augmentation string. + 1. If the attribute is an integer constant B, it provides the offset in + bytes from the beginning of the containing entity. - The format for the augmentation string is: + The result of the attribute is obtained by evaluating a + ``DW_OP_LLVM_offset B`` operation with an initial stack comprising the + location description of the beginning of the containing entity. The + result of the evaluation is the location description of the base of the + member entry. - | ``[``\ *vendor*\ ``:v``\ *X*\ ``.``\ *Y*\ [\ ``:``\ *options*\ ]\ ``]``\ * + *If the beginning of the containing entity is not byte aligned, then the + beginning of the member entry has the same bit displacement within a + byte.* - Where *vendor* is the producer, ``vX.Y`` specifies the major X and minor Y - version number of the extensions used, and *options* is an optional string - providing additional information about the extensions. The version number - must conform to semantic versioning [:ref:`SEMVER `]. - The *options* string must not contain the "\ ``]``\ " character. + 2. Otherwise, the attribute must be a DWARF expression E which is evaluated + with a context that has a result kind of a location description, an + unspecified object, the compilation unit that contains E, an initial + stack comprising the location description of the beginning of the + containing entity, and other context elements corresponding to the + source language thread of execution upon which the user is focused, if + any. The result of the evaluation is the location description of the + base of the member entry. - For example: + .. note:: - :: + The beginning of the containing entity can now be any location + description, including those with more than one single location + description, and those with single location descriptions that are of any + kind and have any bit offset. - [abc:v0.0][def:v1.2:feature-a=on,feature-b=3] +A.5.7.8 Member Function Entries ++++++++++++++++++++++++++++++++ -Program Scope Entities ----------------------- +1. An entry for a virtual function also has a ``DW_AT_vtable_elem_location`` + attribute whose value is a DWARF expression E. -.. _amdgpu-dwarf-language-names: + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a location description, an unspecified object, the + compilation unit that contains E, an initial stack comprising the location + description of the object of the enclosing type, and other context elements + corresponding to the source language thread of execution upon which the user + is focused, if any. The result of the evaluation is the location description + of the slot for the function within the virtual function table for the + enclosing class. -Unit Entities -~~~~~~~~~~~~~ +A.5.14 Pointer to Member Type Entries +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. note:: +1. The ``DW_TAG_ptr_to_member_type`` debugging information entry has a + ``DW_AT_use_location`` attribute whose value is a DWARF expression E. It is + used to compute the location description of the member of the class to which + the pointer to member entry points. - This augments DWARF Version 5 section 3.1.1 and Table 3.1. + *The method used to find the location description of a given member of a + class, structure, or union is common to any instance of that class, + structure, or union and to any instance of the pointer to member type. The + method is thus associated with the pointer to member type, rather than with + each object that has a pointer to member type.* -Additional language codes defined for use with the ``DW_AT_language`` attribute -are defined in :ref:`amdgpu-dwarf-language-names-table`. + The ``DW_AT_use_location`` DWARF expression is used in conjunction with the + location description for a particular object of the given pointer to member + type and for a particular structure or class instance. -.. table:: Language Names - :name: amdgpu-dwarf-language-names-table + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a location description, an unspecified object, the + compilation unit that contains E, an initial stack comprising two entries, + and other context elements corresponding to the source language thread of + execution upon which the user is focused, if any. The first stack entry is + the value of the pointer to member object itself. The second stack entry is + the location description of the base of the entire class, structure, or + union instance containing the member whose location is being calculated. The + result of the evaluation is the location description of the member of the + class to which the pointer to member entry points. - ==================== ============================= - Language Name Meaning - ==================== ============================= - ``DW_LANG_LLVM_HIP`` HIP Language. - ==================== ============================= +A.5.16 Dynamic Type Entries +~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The HIP language [:ref:`HIP `] can be supported by extending -the C++ language. +1. The ``DW_AT_data_location`` attribute may be used with any type that + provides one or more levels of hidden indirection and/or run-time parameters + in its representation. Its value is a DWARF operation expression E which + computes the location description of the data for an object. When this + attribute is omitted, the location description of the data is the same as + the location description of the object. -Other Debugger Information --------------------------- + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a location description, an object that is the location + description of the data descriptor, the compilation unit that contains E, an + empty initial stack, and other context elements corresponding to the source + language thread of execution upon which the user is focused, if any. The + result of the evaluation is the location description of the base of the + member entry. -Accelerated Access -~~~~~~~~~~~~~~~~~~ + *E will typically involve an operation expression that begins with a* + ``DW_OP_push_object_address`` *operation which loads the location + description of the object which can then serve as a descriptor in subsequent + calculation.* + + .. note:: + + Since ``DW_AT_data_member_location``, ``DW_AT_use_location``, and + ``DW_AT_vtable_elem_location`` allow both operation expressions and + location list expressions, why does ``DW_AT_data_location`` not allow + both? In all cases they apply to data objects so less likely that + optimization would cause different operation expressions for different + program location ranges. But if supporting for some then should be for + all. + + It seems odd this attribute is not the same as + ``DW_AT_data_member_location`` in having an initial stack with the + location description of the object since the expression has to need it. + +A.6 Other Debugging Information +------------------------------- + +.. note:: + + This section provides changes to existing debugger information entry + attributes. These would be incorporated into the corresponding DWARF Version 5 + chapter 6 sections. + +A.6.1 Accelerated Access +~~~~~~~~~~~~~~~~~~~~~~~~ .. _amdgpu-dwarf-lookup-by-name: -Lookup By Name -++++++++++++++ +A.6.1.1 Lookup By Name +++++++++++++++++++++++ -Contents of the Name Index -########################## +A.6.1.1.1 Contents of the Name Index +#################################### .. note:: @@ -3304,11 +3587,14 @@ following rules: or ``DW_OP_form_tls_address`` operation are included; otherwise, they are excluded. -Data Representation of the Name Index -##################################### +A.6.1.1.4 Data Representation of the Name Index +############################################### -Section Header -^^^^^^^^^^^^^^ +.. _amdgpu-dwarf-name-index-section-header: + + +A.6.1.1.4.1 Section Header +^^^^^^^^^^^^^^^^^^^^^^^^^^ .. note:: @@ -3342,14 +3628,14 @@ For example: .. _amdgpu-dwarf-line-number-information: -Line Number Information -~~~~~~~~~~~~~~~~~~~~~~~ +A.6.2 Line Number Information +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The Line Number Program Header -++++++++++++++++++++++++++++++ +A.6.2.4 The Line Number Program Header +++++++++++++++++++++++++++++++++++++++ -Standard Content Descriptions -############################# +A.6.2.4.1 Standard Content Descriptions +####################################### .. note:: @@ -3392,8 +3678,8 @@ Standard Content Descriptions .. _amdgpu-dwarf-call-frame-information: -Call Frame Information -~~~~~~~~~~~~~~~~~~~~~~ +A.6.4 Call Frame Information +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. note:: @@ -3403,12 +3689,12 @@ Call Frame Information location description, including those with composite and implicit location descriptions. - These changes would be incorporated into the DWARF Version 5 section 6.1. + These changes would be incorporated into the DWARF Version 5 section 6.4. .. _amdgpu-dwarf-structure_of-call-frame-information: -Structure of Call Frame Information -+++++++++++++++++++++++++++++++++++ +A.6.4.1 Structure of Call Frame Information ++++++++++++++++++++++++++++++++++++++++++++ The register rules are: @@ -3682,8 +3968,8 @@ An FDE contains the following fields, in order: .. _amdgpu-dwarf-call-frame-instructions: -Call Frame Instructions -+++++++++++++++++++++++ +A.6.4.2 Call Frame Instructions ++++++++++++++++++++++++++++++++ Some call frame instructions have operands that are encoded as DWARF operation expressions E (see :ref:`amdgpu-dwarf-operation-expressions`). The DWARF @@ -3720,8 +4006,8 @@ operations that can be used in E have the following restrictions: .. _amdgpu-dwarf-row-creation-instructions: -Row Creation Instructions -######################### +A.6.4.2.1 Row Creation Instructions +################################### .. note:: @@ -3729,8 +4015,8 @@ Row Creation Instructions .. _amdgpu-dwarf-cfa-definition-instructions: -CFA Definition Instructions -########################### +A.6.4.2.2 CFA Definition Instructions +##################################### 1. ``DW_CFA_def_cfa`` @@ -3748,7 +4034,7 @@ CFA Definition Instructions displacement B. AS is set to the target architecture default address space identifier. The required action is to define the current CFA rule to be the result of evaluating the DWARF operation expression ``DW_OP_constu AS; - DW_OP_aspace_bregx R, B*data_alignment_factor`` as a location description. + DW_OP_aspace_bregx R, B * data_alignment_factor`` as a location description. *The action is the same as* ``DW_CFA_def_cfa``\ *, except that the second operand is signed and factored.* @@ -3773,7 +4059,7 @@ CFA Definition Instructions architecture specific address space identifier AS. The required action is to define the current CFA rule to be the result of evaluating the DWARF operation expression ``DW_OP_constu AS; DW_OP_aspace_bregx R, - B*data_alignment_factor`` as a location description. + B * data_alignment_factor`` as a location description. If AS is not one of the values defined by the target architecture specific ``DW_ASPACE_*`` values, then the DWARF expression is ill-formed. @@ -3810,9 +4096,9 @@ CFA Definition Instructions The ``DW_CFA_def_cfa_offset_sf`` instruction takes a signed LEB128 operand representing a factored byte displacement B. The required action is to define the current CFA rule to be the result of evaluating the DWARF - operation expression ``DW_OP_constu AS; DW_OP_aspace_bregx R, - B*data_alignment_factor`` as a location description. R and AS are the old - CFA register number and address space respectively. + operation expression ``DW_OP_constu AS; DW_OP_aspace_bregx R, B * + data_alignment_factor`` as a location description. R and AS are the old CFA + register number and address space respectively. If the subprogram has no current CFA rule, or the rule was defined by a ``DW_CFA_def_cfa_expression`` instruction, then the DWARF is ill-formed. @@ -3837,8 +4123,8 @@ CFA Definition Instructions .. _amdgpu-dwarf-register-rule-instructions: -Register Rule Instructions -########################## +A.6.4.2.3 Register Rule Instructions +#################################### 1. ``DW_CFA_undefined`` @@ -3857,7 +4143,7 @@ Register Rule Instructions The ``DW_CFA_offset`` instruction takes two operands: a register number R (encoded with the opcode) and an unsigned LEB128 constant representing a factored displacement B. The required action is to change the rule for the - register specified by R to be an *offset(B\*data_alignment_factor)* rule. + register specified by R to be an *offset(B \* data_alignment_factor)* rule. .. note:: @@ -3888,7 +4174,7 @@ Register Rule Instructions The ``DW_CFA_val_offset`` instruction takes two unsigned LEB128 operands representing a register number R and a factored displacement B. The required action is to change the rule for the register indicated by R to be a - *val_offset(B\*data_alignment_factor)* rule. + *val_offset(B \* data_alignment_factor)* rule. .. note:: @@ -3958,22 +4244,22 @@ Register Rule Instructions to ``DW_CFA_restore``, except for the encoding and size of the register operand. -Row State Instructions -###################### +A.6.4.2.4 Row State Instructions +################################ .. note:: These instructions are the same as in DWARF Version 5 section 6.4.2.4. -Padding Instruction -################### +A.6.4.2.5 Padding Instruction +############################# .. note:: These instructions are the same as in DWARF Version 5 section 6.4.2.5. -Call Frame Instruction Usage -++++++++++++++++++++++++++++ +A.6.4.3 Call Frame Instruction Usage +++++++++++++++++++++++++++++++++++++ .. note:: @@ -3981,53 +4267,45 @@ Call Frame Instruction Usage .. _amdgpu-dwarf-call-frame-calling-address: -Call Frame Calling Address -++++++++++++++++++++++++++ +A.6.4.4 Call Frame Calling Address +++++++++++++++++++++++++++++++++++ .. note:: The same as in DWARF Version 5 section 6.4.4. -Data Representation -------------------- +A.7 Data Representation +----------------------- + +.. note:: + + This section provides changes to existing debugger information entry + attributes. These would be incorporated into the corresponding DWARF Version 5 + chapter 7 sections. .. _amdgpu-dwarf-32-bit-and-64-bit-dwarf-formats: -32-Bit and 64-Bit DWARF Formats -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A.7.4 32-Bit and 64-Bit DWARF Formats +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. note:: - This augments DWARF Version 5 section 7.4. - -1. Within the body of the ``.debug_info`` section, certain forms of attribute - value depend on the choice of DWARF format as follows. For the 32-bit DWARF - format, the value is a 4-byte unsigned integer; for the 64-bit DWARF format, - the value is an 8-byte unsigned integer. - - .. table:: ``.debug_info`` section attribute form roles - :name: amdgpu-dwarf-debug-info-section-attribute-form-roles-table - - ================================== =================================== - Form Role - ================================== =================================== - DW_FORM_line_strp offset in ``.debug_line_str`` - DW_FORM_ref_addr offset in ``.debug_info`` - DW_FORM_sec_offset offset in a section other than - ``.debug_info`` or ``.debug_str`` - DW_FORM_strp offset in ``.debug_str`` - DW_FORM_strp_sup offset in ``.debug_str`` section of - supplementary object file - DW_OP_call_ref offset in ``.debug_info`` - DW_OP_implicit_pointer offset in ``.debug_info`` - DW_OP_LLVM_aspace_implicit_pointer offset in ``.debug_info`` - ================================== =================================== - -Format of Debugging Information -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Attribute Encodings -+++++++++++++++++++ + This augments DWARF Version 5 section 7.4 list item 3's table. + +.. table:: ``.debug_info`` section attribute form roles + :name: amdgpu-dwarf-debug-info-section-attribute-form-roles-table + + ================================== =================================== + Form Role + ================================== =================================== + DW_OP_LLVM_aspace_implicit_pointer offset in ``.debug_info`` + ================================== =================================== + +A.7.5 Format of Debugging Information +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A.7.5.4 Attribute Encodings ++++++++++++++++++++++++++++ .. note:: @@ -4049,16 +4327,25 @@ entry attributes. DW_AT_LLVM_vector_size 0x3e0c constant ================================== ====== =================================== -DWARF Expressions -~~~~~~~~~~~~~~~~~ +.. _amdgpu-dwarf-classes-and-forms: + +A.7.5.5 Classes and Forms ++++++++++++++++++++++++++ + +.. note:: + + The same as in DWARF Version 5 section 7.5.5. + +A.7.7 DWARF Expressions +~~~~~~~~~~~~~~~~~~~~~~~ .. note:: Rename DWARF Version 5 section 7.7 to reflect the unification of location descriptions into DWARF expressions. -Operation Expressions -+++++++++++++++++++++ +A.7.7.1 Operation Expressions ++++++++++++++++++++++++++++++ .. note:: @@ -4096,16 +4383,16 @@ operations. ULEB128 count ================================== ===== ======== =============================== -Location List Expressions -+++++++++++++++++++++++++ +A.7.7.3 Location List Expressions ++++++++++++++++++++++++++++++++++ .. note:: Rename DWARF Version 5 section 7.7.3 to reflect that location lists are a kind of DWARF expression. -Source Languages -~~~~~~~~~~~~~~~~ +A.7.12 Source Languages +~~~~~~~~~~~~~~~~~~~~~~~ .. note:: @@ -4122,8 +4409,8 @@ The following table gives the encoding of the additional DWARF languages. ``DW_LANG_LLVM_HIP`` 0x8100 0 ==================== ====== =================== -Address Class and Address Space Encodings -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A.7.13 Address Class and Address Space Encodings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. note:: @@ -4147,8 +4434,8 @@ are given in :ref:`amdgpu-dwarf-address-class-encodings-table`. ``DW_ADDR_LLVM_hi_user`` 0xffff ========================== ====== -Line Number Information -~~~~~~~~~~~~~~~~~~~~~~~ +A.7.22 Line Number Information +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. note:: @@ -4167,8 +4454,8 @@ entry formats. ``DW_LNCT_LLVM_is_MD5`` 0x2002 ==================================== ==================== -Call Frame Information -~~~~~~~~~~~~~~~~~~~~~~ +A.7.24 Call Frame Information +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. note:: @@ -4188,8 +4475,8 @@ instructions. DW_CFA_LLVM_def_aspace_cfa_sf 0 0x31 ULEB128 register SLEB128 offset ULEB128 address space ============================= ====== ====== ================ ================ ===================== -Attributes by Tag Value (Informative) -------------------------------------- +A. Attributes by Tag Value (Informative) +---------------------------------------- .. note:: @@ -4219,8 +4506,8 @@ debugger information entries. .. _amdgpu-dwarf-examples: -Examples -======== +B. Examples +=========== The AMD GPU specific usage of the features in these extensions, including examples, is available at *User Guide for AMDGPU Backend* section @@ -4235,65 +4522,69 @@ examples, is available at *User Guide for AMDGPU Backend* section .. _amdgpu-dwarf-references: -References -========== +C. References +============= .. _amdgpu-dwarf-AMD: 1. [AMD] `Advanced Micro Devices `__ + .. _amdgpu-dwarf-AMD-ROCgdb: + +2. [AMD-ROCgdb] `AMD ROCm Debugger (ROCgdb) `__ + .. _amdgpu-dwarf-AMD-ROCm: -2. [AMD-ROCm] `AMD ROCm Platform `__ +3. [AMD-ROCm] `AMD ROCm Platform `__ - .. _amdgpu-dwarf-AMD-ROCgdb: + .. _amdgpu-dwarf-AMDGPU-DWARF-LOC: -3. [AMD-ROCgdb] `AMD ROCm Debugger (ROCgdb) `__ +4. [AMDGPU-DWARF-LOC] `Allow Location Descriptions on the DWARF Expression Stack `__ .. _amdgpu-dwarf-AMDGPU-LLVM: -4. [AMDGPU-LLVM] `User Guide for AMDGPU LLVM Backend `__ +5. [AMDGPU-LLVM] `User Guide for AMDGPU LLVM Backend `__ .. _amdgpu-dwarf-CUDA: -5. [CUDA] `Nvidia CUDA Language `__ +6. [CUDA] `Nvidia CUDA Language `__ .. _amdgpu-dwarf-DWARF: -6. [DWARF] `DWARF Debugging Information Format `__ +7. [DWARF] `DWARF Debugging Information Format `__ .. _amdgpu-dwarf-ELF: -7. [ELF] `Executable and Linkable Format (ELF) `__ +8. [ELF] `Executable and Linkable Format (ELF) `__ .. _amdgpu-dwarf-GCC: -8. [GCC] `GCC: The GNU Compiler Collection `__ +9. [GCC] `GCC: The GNU Compiler Collection `__ .. _amdgpu-dwarf-GDB: -9. [GDB] `GDB: The GNU Project Debugger `__ +10. [GDB] `GDB: The GNU Project Debugger `__ .. _amdgpu-dwarf-HIP: -10. [HIP] `HIP Programming Guide `__ +11. [HIP] `HIP Programming Guide `__ .. _amdgpu-dwarf-HSA: -11. [HSA] `Heterogeneous System Architecture (HSA) Foundation `__ +12. [HSA] `Heterogeneous System Architecture (HSA) Foundation `__ .. _amdgpu-dwarf-LLVM: -12. [LLVM] `The LLVM Compiler Infrastructure `__ +13. [LLVM] `The LLVM Compiler Infrastructure `__ .. _amdgpu-dwarf-OpenCL: -13. [OpenCL] `The OpenCL Specification Version 2.0 `__ +14. [OpenCL] `The OpenCL Specification Version 2.0 `__ .. _amdgpu-dwarf-Perforce-TotalView: -14. [Perforce-TotalView] `Perforce TotalView HPC Debugging Software `__ +15. [Perforce-TotalView] `Perforce TotalView HPC Debugging Software `__ .. _amdgpu-dwarf-SEMVER: -15. [SEMVER] `Semantic Versioning `__ +16. [SEMVER] `Semantic Versioning `__ diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 23d614fad109..8875d178015b 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -2016,9 +2016,10 @@ Debugger Information Entry Attributes ------------------------------------- This section describes how certain debugger information entry attributes are -used by AMDGPU. See the sections in DWARF Version 5 section 2 which are updated -by *DWARF Extensions For Heterogeneous Debugging* section -:ref:`amdgpu-dwarf-debugging-information-entry-attributes`. +used by AMDGPU. See the sections in DWARF Version 5 section 3.3.5 and 3.1.1 +which are updated by *DWARF Extensions For Heterogeneous Debugging* section +:ref:`amdgpu-dwarf-low-level-information` and +:ref:`amdgpu-dwarf-full-and-partial-compilation-unit-entries`. .. _amdgpu-dwarf-dw-at-llvm-lane-pc: From c5327137df04ba4754547483d140aec8f8a954a9 Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Thu, 6 Sep 2018 11:53:00 -0500 Subject: [PATCH 129/992] [Hexagon] Fix for producer operands search w/z-reg Z-register does not show up in defs, so checks searching for the def operand must look for a different def index than they would normally. --- .../Hexagon/MCTargetDesc/HexagonMCChecker.cpp | 101 ++++++++++++------ .../Hexagon/MCTargetDesc/HexagonMCChecker.h | 4 + llvm/test/DebugInfo/Hexagon/zreg-post-inc.s | 8 ++ 3 files changed, 78 insertions(+), 35 deletions(-) create mode 100644 llvm/test/DebugInfo/Hexagon/zreg-post-inc.s diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index b9233618e5fd..ca8adcb773a9 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -16,6 +16,7 @@ #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCShuffler.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" + #include "llvm/ADT/Twine.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" @@ -425,81 +426,109 @@ bool HexagonMCChecker::checkPredicates() { // Check legal use of new values. bool HexagonMCChecker::checkNewValues() { - for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { - if (!HexagonMCInstrInfo::isNewValue(MCII, I)) + for (auto const &ConsumerInst : + HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + if (!HexagonMCInstrInfo::isNewValue(MCII, ConsumerInst)) continue; - auto Consumer = HexagonMCInstrInfo::predicateInfo(MCII, I); - bool Branch = HexagonMCInstrInfo::getDesc(MCII, I).isBranch(); - MCOperand const &Op = HexagonMCInstrInfo::getNewValueOperand(MCII, I); + + const HexagonMCInstrInfo::PredicateInfo ConsumerPredInfo = + HexagonMCInstrInfo::predicateInfo(MCII, ConsumerInst); + + bool Branch = HexagonMCInstrInfo::getDesc(MCII, ConsumerInst).isBranch(); + MCOperand const &Op = + HexagonMCInstrInfo::getNewValueOperand(MCII, ConsumerInst); assert(Op.isReg()); - auto Producer = registerProducer(Op.getReg(), Consumer); - if (std::get<0>(Producer) == nullptr) { - reportError(I.getLoc(), "New value register consumer has no producer"); + + auto Producer = registerProducer(Op.getReg(), ConsumerPredInfo); + const MCInst *const ProducerInst = std::get<0>(Producer); + const HexagonMCInstrInfo::PredicateInfo ProducerPredInfo = + std::get<2>(Producer); + + if (ProducerInst == nullptr) { + reportError(ConsumerInst.getLoc(), + "New value register consumer has no producer"); return false; } if (!RelaxNVChecks) { // Checks that statically prove correct new value consumption - if (std::get<2>(Producer).isPredicated() && - (!Consumer.isPredicated() || - llvm::HexagonMCInstrInfo::getType(MCII, I) == HexagonII::TypeNCJ)) { + if (ProducerPredInfo.isPredicated() && + (!ConsumerPredInfo.isPredicated() || + llvm::HexagonMCInstrInfo::getType(MCII, ConsumerInst) == + HexagonII::TypeNCJ)) { reportNote( - std::get<0>(Producer)->getLoc(), + ProducerInst->getLoc(), "Register producer is predicated and consumer is unconditional"); - reportError(I.getLoc(), + reportError(ConsumerInst.getLoc(), "Instruction does not have a valid new register producer"); return false; } - if (std::get<2>(Producer).Register != Hexagon::NoRegister && - std::get<2>(Producer).Register != Consumer.Register) { - reportNote(std::get<0>(Producer)->getLoc(), + if (ProducerPredInfo.Register != Hexagon::NoRegister && + ProducerPredInfo.Register != ConsumerPredInfo.Register) { + reportNote(ProducerInst->getLoc(), "Register producer does not use the same predicate " "register as the consumer"); - reportError(I.getLoc(), + reportError(ConsumerInst.getLoc(), "Instruction does not have a valid new register producer"); return false; } } - if (std::get<2>(Producer).Register == Consumer.Register && - Consumer.PredicatedTrue != std::get<2>(Producer).PredicatedTrue) { + if (ProducerPredInfo.Register == ConsumerPredInfo.Register && + ConsumerPredInfo.PredicatedTrue != ProducerPredInfo.PredicatedTrue) { reportNote( - std::get<0>(Producer)->getLoc(), + ProducerInst->getLoc(), "Register producer has the opposite predicate sense as consumer"); - reportError(I.getLoc(), + reportError(ConsumerInst.getLoc(), "Instruction does not have a valid new register producer"); return false; } - MCInstrDesc const &Desc = - HexagonMCInstrInfo::getDesc(MCII, *std::get<0>(Producer)); - if (Desc.OpInfo[std::get<1>(Producer)].RegClass == + + MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, *ProducerInst); + const unsigned ProducerOpIndex = std::get<1>(Producer); + + if (Desc.OpInfo[ProducerOpIndex].RegClass == Hexagon::DoubleRegsRegClassID) { - reportNote(std::get<0>(Producer)->getLoc(), + reportNote(ProducerInst->getLoc(), "Double registers cannot be new-value producers"); - reportError(I.getLoc(), + reportError(ConsumerInst.getLoc(), "Instruction does not have a valid new register producer"); return false; } - if ((Desc.mayLoad() && std::get<1>(Producer) == 1) || - (Desc.mayStore() && std::get<1>(Producer) == 0)) { - unsigned Mode = - HexagonMCInstrInfo::getAddrMode(MCII, *std::get<0>(Producer)); + + // The ProducerOpIsMemIndex logic checks for the index of the producer + // register operand. Z-reg load instructions have an implicit operand + // that's not encoded, so the producer won't appear as the 1-th def, it + // will be at the 0-th. + const unsigned ProducerOpSearchIndex = + (HexagonMCInstrInfo::getType(MCII, *ProducerInst) == + HexagonII::TypeCVI_ZW) + ? 0 + : 1; + + const bool ProducerOpIsMemIndex = + ((Desc.mayLoad() && ProducerOpIndex == ProducerOpSearchIndex) || + (Desc.mayStore() && ProducerOpIndex == 0)); + + if (ProducerOpIsMemIndex) { + unsigned Mode = HexagonMCInstrInfo::getAddrMode(MCII, *ProducerInst); + StringRef ModeError; if (Mode == HexagonII::AbsoluteSet) ModeError = "Absolute-set"; if (Mode == HexagonII::PostInc) ModeError = "Auto-increment"; if (!ModeError.empty()) { - reportNote(std::get<0>(Producer)->getLoc(), + reportNote(ProducerInst->getLoc(), ModeError + " registers cannot be a new-value " "producer"); - reportError(I.getLoc(), + reportError(ConsumerInst.getLoc(), "Instruction does not have a valid new register producer"); return false; } } - if (Branch && HexagonMCInstrInfo::isFloat(MCII, *std::get<0>(Producer))) { - reportNote(std::get<0>(Producer)->getLoc(), + if (Branch && HexagonMCInstrInfo::isFloat(MCII, *ProducerInst)) { + reportNote(ProducerInst->getLoc(), "FPU instructions cannot be new-value producers for jumps"); - reportError(I.getLoc(), + reportError(ConsumerInst.getLoc(), "Instruction does not have a valid new register producer"); return false; } @@ -542,9 +571,11 @@ HexagonMCChecker::registerProducer( unsigned Register, HexagonMCInstrInfo::PredicateInfo ConsumerPredicate) { std::tuple WrongSense; + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I); auto ProducerPredicate = HexagonMCInstrInfo::predicateInfo(MCII, I); + for (unsigned J = 0, N = Desc.getNumDefs(); J < N; ++J) for (auto K = MCRegAliasIterator(I.getOperand(J).getReg(), &RI, true); K.isValid(); ++K) diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h index 160d452ab917..4d3a0f0c4cbd 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h @@ -81,6 +81,10 @@ class HexagonMCChecker { void initReg(MCInst const &, unsigned, unsigned &PredReg, bool &isTrue); bool registerUsed(unsigned Register); + + /// \return a tuple of: pointer to the producer instruction or nullptr if + /// none was found, the operand index, and the PredicateInfo for the + /// producer. std::tuple registerProducer(unsigned Register, HexagonMCInstrInfo::PredicateInfo Predicated); diff --git a/llvm/test/DebugInfo/Hexagon/zreg-post-inc.s b/llvm/test/DebugInfo/Hexagon/zreg-post-inc.s new file mode 100644 index 000000000000..346bec1b0fdd --- /dev/null +++ b/llvm/test/DebugInfo/Hexagon/zreg-post-inc.s @@ -0,0 +1,8 @@ +# RUN: not llvm-mc -arch=hexagon -filetype=obj -mhvx -mcpu=hexagonv66 %s 2> %t; FileCheck --implicit-check-not=error %s <%t + +{ + if (p0) memb(r14+#8)=r4.new + if (p0) z=vmem(r4++#0) +} + +# CHECK: error: Instruction does not have a valid new register producer From 1e7bd93ff2cc55965e8f31670a459b11e679a4ad Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Tue, 28 Dec 2021 09:51:27 -0800 Subject: [PATCH 130/992] [Hexagon] Add HexagonMCInstrInfo::IsABranchingInst, NFC --- .../lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp | 9 +++------ .../Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp | 8 ++++++++ .../lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h | 2 ++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index ca8adcb773a9..5f094dfeb95c 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -317,8 +317,7 @@ bool HexagonMCChecker::checkAXOK() { void HexagonMCChecker::reportBranchErrors() { for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { - MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I); - if (Desc.isBranch() || Desc.isCall() || Desc.isReturn()) + if (HexagonMCInstrInfo::IsABranchingInst(MCII, STI, I)) reportNote(I.getLoc(), "Branching instruction"); } } @@ -328,8 +327,7 @@ bool HexagonMCChecker::checkHWLoop() { !HexagonMCInstrInfo::isOuterLoop(MCB)) return true; for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { - MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I); - if (Desc.isBranch() || Desc.isCall() || Desc.isReturn()) { + if (HexagonMCInstrInfo::IsABranchingInst(MCII, STI, I)) { reportError(MCB.getLoc(), "Branches cannot be in a packet with hardware loops"); reportBranchErrors(); @@ -342,8 +340,7 @@ bool HexagonMCChecker::checkHWLoop() { bool HexagonMCChecker::checkCOFMax1() { SmallVector BranchLocations; for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { - MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I); - if (Desc.isBranch() || Desc.isCall() || Desc.isReturn()) + if (HexagonMCInstrInfo::IsABranchingInst(MCII, STI, I)) BranchLocations.push_back(&I); } for (unsigned J = 0, N = BranchLocations.size(); J < N; ++J) { diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp index 68ccb20f4f15..589363d3e9a3 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -1030,3 +1030,11 @@ unsigned HexagonMCInstrInfo::SubregisterBit(unsigned Consumer, return Consumer == Producer; return 0; } + +bool HexagonMCInstrInfo::IsABranchingInst(MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, + MCInst const &I) { + assert(!HexagonMCInstrInfo::isBundle(I)); + MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I); + return (Desc.isBranch() || Desc.isCall() || Desc.isReturn()); +} diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h index 5c56db14798f..2e8c3ddbc55f 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -95,6 +95,8 @@ bool canonicalizePacket(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCContext &Context, MCInst &MCB, HexagonMCChecker *Checker, bool AttemptCompatibility = false); +bool IsABranchingInst(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst const &I); // Create a duplex instruction given the two subinsts MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0, From e6e7bdd6a90ca8ba896fb92f6e0e642d42c84efc Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 28 Dec 2021 10:01:39 -0800 Subject: [PATCH 131/992] Drop unnecessary const from return types (NFC) Identified with readability-const-return-type. --- clang/lib/Frontend/CompilerInvocation.cpp | 4 ++-- llvm/include/llvm/Passes/StandardInstrumentations.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index b71addd84bfd..7727d70adfb1 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -438,7 +438,7 @@ static T extractMaskValue(T KeyPath) { }(EXTRACTOR(KEYPATH)); \ } -static const StringRef GetInputKindName(InputKind IK); +static StringRef GetInputKindName(InputKind IK); static bool FixupInvocation(CompilerInvocation &Invocation, DiagnosticsEngine &Diags, const ArgList &Args, @@ -3291,7 +3291,7 @@ static bool IsInputCompatibleWithStandard(InputKind IK, } /// Get language name for given input kind. -static const StringRef GetInputKindName(InputKind IK) { +static StringRef GetInputKindName(InputKind IK) { switch (IK.getLanguage()) { case Language::C: return "C"; diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 6cab4ce7d138..9eb754a4d824 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -432,7 +432,7 @@ class DCData { } // Return the label of the basic block reached on a transition on \p S. - const StringRef getSuccessorLabel(StringRef S) const { + StringRef getSuccessorLabel(StringRef S) const { assert(Successors.count(S) == 1 && "Expected to find successor."); return Successors.find(S)->getValue(); } From b5d3bbcc9433193351a22b738c7ff4b007cb1e68 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 28 Dec 2021 10:49:52 -0800 Subject: [PATCH 132/992] [CMake] Remove unneeded CLANG_DEFAULT_PIE_ON_LINUX canonicalization after D115751 --- clang/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 00243d8b13b9..69d639fcec1b 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -228,9 +228,6 @@ set(CLANG_SPAWN_CC1 OFF CACHE BOOL "Whether clang should use a new process for the CC1 invocation") option(CLANG_DEFAULT_PIE_ON_LINUX "Default to -fPIE and -pie on Linux" OFF) -if(CLANG_DEFAULT_PIE_ON_LINUX) - set(CLANG_DEFAULT_PIE_ON_LINUX 1) -endif() # TODO: verify the values against LangStandards.def? set(CLANG_DEFAULT_STD_C "" CACHE STRING From a2154b19515304f42000160bed820630c3780db8 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Tue, 28 Dec 2021 10:49:59 -0800 Subject: [PATCH 133/992] Cache the manual DWARF index out to the LLDB cache directory when the LLDB index cache is enabled. This patch add the ability to cache the manual DWARF indexing results to disk for faster subsequent debug sessions. Manual DWARF indexing is time consuming and causes all DWARF to be fully parsed and indexed each time you debug a binary that doesn't have an acceptable accelerator table. Acceptable accelerator tables include .debug_names in DWARF5 or Apple accelerator tables. This patch breaks up testing by testing all of the encoding and decoding of required C++ objects in a gtest unit test, and then has a test to verify the debug info cache is generated correctly. This patch also adds the ability to track when a symbol table or DWARF index is loaded or saved to the cache in the "statistics dump" command. This is essential to know in statistics as it can help explain why a debug session was slower or faster than expected. Reviewed By: labath, wallace Differential Revision: https://reviews.llvm.org/D115951 --- lldb/include/lldb/Symbol/SymbolFile.h | 32 +- lldb/include/lldb/Symbol/Symtab.h | 27 +- lldb/include/lldb/Target/Statistics.h | 4 + .../Plugins/SymbolFile/DWARF/DIERef.cpp | 37 + lldb/source/Plugins/SymbolFile/DWARF/DIERef.h | 31 + .../SymbolFile/DWARF/ManualDWARFIndex.cpp | 232 ++++- .../SymbolFile/DWARF/ManualDWARFIndex.h | 98 +- .../Plugins/SymbolFile/DWARF/NameToDIE.cpp | 50 ++ .../Plugins/SymbolFile/DWARF/NameToDIE.h | 38 + lldb/source/Symbol/Symtab.cpp | 8 +- lldb/source/Target/Statistics.cpp | 31 + .../commands/statistics/basic/TestStats.py | 20 + .../debug_index/TestDebugIndexCache.py | 141 +++ .../module_cache/debug_index/exe.yaml | 844 ++++++++++++++++++ .../simple_exe/TestModuleCacheSimple.py | 2 +- .../unittests/SymbolFile/DWARF/CMakeLists.txt | 1 + .../DWARF/DWARFIndexCachingTest.cpp | 198 ++++ 17 files changed, 1776 insertions(+), 18 deletions(-) create mode 100644 lldb/test/API/functionalities/module_cache/debug_index/TestDebugIndexCache.py create mode 100644 lldb/test/API/functionalities/module_cache/debug_index/exe.yaml create mode 100644 lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp diff --git a/lldb/include/lldb/Symbol/SymbolFile.h b/lldb/include/lldb/Symbol/SymbolFile.h index 7c0365483c12..288576b978a7 100644 --- a/lldb/include/lldb/Symbol/SymbolFile.h +++ b/lldb/include/lldb/Symbol/SymbolFile.h @@ -67,8 +67,7 @@ class SymbolFile : public PluginInterface { // Constructors and Destructors SymbolFile(lldb::ObjectFileSP objfile_sp) - : m_objfile_sp(std::move(objfile_sp)), m_abilities(0), - m_calculated_abilities(false) {} + : m_objfile_sp(std::move(objfile_sp)) {} ~SymbolFile() override = default; @@ -326,6 +325,29 @@ class SymbolFile : public PluginInterface { /// hasn't been indexed yet, or a valid duration if it has. virtual StatsDuration GetDebugInfoIndexTime() { return StatsDuration(0.0); } + /// Accessors for the bool that indicates if the debug info index was loaded + /// from, or saved to the module index cache. + /// + /// In statistics it is handy to know if a module's debug info was loaded from + /// or saved to the cache. When the debug info index is loaded from the cache + /// startup times can be faster. When the cache is enabled and the debug info + /// index is saved to the cache, debug sessions can be slower. These accessors + /// can be accessed by the statistics and emitted to help track these costs. + /// \{ + bool GetDebugInfoIndexWasLoadedFromCache() const { + return m_index_was_loaded_from_cache; + } + void SetDebugInfoIndexWasLoadedFromCache() { + m_index_was_loaded_from_cache = true; + } + bool GetDebugInfoIndexWasSavedToCache() const { + return m_index_was_saved_to_cache; + } + void SetDebugInfoIndexWasSavedToCache() { + m_index_was_saved_to_cache = true; + } + /// \} + protected: void AssertModuleLock(); virtual uint32_t CalculateNumCompileUnits() = 0; @@ -341,8 +363,10 @@ class SymbolFile : public PluginInterface { llvm::Optional> m_compile_units; TypeList m_type_list; Symtab *m_symtab = nullptr; - uint32_t m_abilities; - bool m_calculated_abilities; + uint32_t m_abilities = 0; + bool m_calculated_abilities = false; + bool m_index_was_loaded_from_cache = false; + bool m_index_was_saved_to_cache = false; private: SymbolFile(const SymbolFile &) = delete; diff --git a/lldb/include/lldb/Symbol/Symtab.h b/lldb/include/lldb/Symbol/Symtab.h index fe0a82306c4f..504b49c02674 100644 --- a/lldb/include/lldb/Symbol/Symtab.h +++ b/lldb/include/lldb/Symbol/Symtab.h @@ -212,6 +212,30 @@ class Symtab { /// false if the symbol table wasn't cached or was out of date. bool LoadFromCache(); + + /// Accessors for the bool that indicates if the debug info index was loaded + /// from, or saved to the module index cache. + /// + /// In statistics it is handy to know if a module's debug info was loaded from + /// or saved to the cache. When the debug info index is loaded from the cache + /// startup times can be faster. When the cache is enabled and the debug info + /// index is saved to the cache, debug sessions can be slower. These accessors + /// can be accessed by the statistics and emitted to help track these costs. + /// \{ + bool GetWasLoadedFromCache() const { + return m_loaded_from_cache; + } + void SetWasLoadedFromCache() { + m_loaded_from_cache = true; + } + bool GetWasSavedToCache() const { + return m_saved_to_cache; + } + void SetWasSavedToCache() { + m_saved_to_cache = true; + } + /// \} + protected: typedef std::vector collection; typedef collection::iterator iterator; @@ -252,7 +276,8 @@ class Symtab { m_name_to_symbol_indices; mutable std::recursive_mutex m_mutex; // Provide thread safety for this symbol table - bool m_file_addr_to_index_computed : 1, m_name_indexes_computed : 1; + bool m_file_addr_to_index_computed : 1, m_name_indexes_computed : 1, + m_loaded_from_cache : 1, m_saved_to_cache : 1; private: UniqueCStringMap & diff --git a/lldb/include/lldb/Target/Statistics.h b/lldb/include/lldb/Target/Statistics.h index 087fbee26328..cf4fb83c816e 100644 --- a/lldb/include/lldb/Target/Statistics.h +++ b/lldb/include/lldb/Target/Statistics.h @@ -84,6 +84,10 @@ struct ModuleStats { double debug_parse_time = 0.0; double debug_index_time = 0.0; uint64_t debug_info_size = 0; + bool symtab_loaded_from_cache = false; + bool symtab_saved_to_cache = false; + bool debug_info_index_loaded_from_cache = false; + bool debug_info_index_saved_to_cache = false; }; /// A class that represents statistics for a since lldb_private::Target. diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp index 7a8ab9c9bcfd..25cb368763c1 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp @@ -7,8 +7,13 @@ //===----------------------------------------------------------------------===// #include "DIERef.h" +#include "lldb/Utility/DataEncoder.h" +#include "lldb/Utility/DataExtractor.h" #include "llvm/Support/Format.h" +using namespace lldb; +using namespace lldb_private; + void llvm::format_provider::format(const DIERef &ref, raw_ostream &OS, StringRef Style) { if (ref.dwo_num()) @@ -16,3 +21,35 @@ void llvm::format_provider::format(const DIERef &ref, raw_ostream &OS, OS << (ref.section() == DIERef::DebugInfo ? "INFO" : "TYPE"); OS << "/" << format_hex_no_prefix(ref.die_offset(), 8); } + +constexpr uint32_t k_dwo_num_mask = 0x3FFFFFFF; +constexpr uint32_t k_dwo_num_valid_bitmask = (1u << 30); +constexpr uint32_t k_section_bitmask = (1u << 31); + +llvm::Optional DIERef::Decode(const DataExtractor &data, + lldb::offset_t *offset_ptr) { + const uint32_t bitfield_storage = data.GetU32(offset_ptr); + uint32_t dwo_num = bitfield_storage & k_dwo_num_mask; + bool dwo_num_valid = (bitfield_storage & (k_dwo_num_valid_bitmask)) != 0; + Section section = (Section)((bitfield_storage & (k_section_bitmask)) != 0); + // DIE offsets can't be zero and if we fail to decode something from data, + // it will return 0 + dw_offset_t die_offset = data.GetU32(offset_ptr); + if (die_offset == 0) + return llvm::None; + if (dwo_num_valid) + return DIERef(dwo_num, section, die_offset); + else + return DIERef(llvm::None, section, die_offset); +} + +void DIERef::Encode(DataEncoder &encoder) const { + uint32_t bitfield_storage = m_dwo_num; + if (m_dwo_num_valid) + bitfield_storage |= k_dwo_num_valid_bitmask; + if (m_section) + bitfield_storage |= k_section_bitmask; + encoder.AppendU32(bitfield_storage); + static_assert(sizeof(m_die_offset) == 4, "m_die_offset must be 4 bytes"); + encoder.AppendU32(m_die_offset); +} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h index f7e09ee17283..23e1eec26ec3 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h @@ -54,6 +54,37 @@ class DIERef { return m_die_offset < other.m_die_offset; } + bool operator==(const DIERef &rhs) const { + return dwo_num() == rhs.dwo_num() && m_section == rhs.m_section && + m_die_offset == rhs.m_die_offset; + } + + bool operator!=(const DIERef &rhs) const { return !(*this == rhs); } + + /// Decode a serialized version of this object from data. + /// + /// \param data + /// The decoder object that references the serialized data. + /// + /// \param offset_ptr + /// A pointer that contains the offset from which the data will be decoded + /// from that gets updated as data gets decoded. + /// + /// \return + /// Returns a valid DIERef if decoding succeeded, llvm::None if there was + /// unsufficient or invalid values that were decoded. + static llvm::Optional Decode(const lldb_private::DataExtractor &data, + lldb::offset_t *offset_ptr); + + /// Encode this object into a data encoder object. + /// + /// This allows this object to be serialized to disk. + /// + /// \param encoder + /// A data encoder object that serialized bytes will be encoded into. + /// + void Encode(lldb_private::DataEncoder &encoder) const; + private: uint32_t m_dwo_num : 30; uint32_t m_dwo_num_valid : 1; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp index ab10e9ca98f9..e15a22affcb2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp @@ -12,9 +12,12 @@ #include "Plugins/SymbolFile/DWARF/DWARFDeclContext.h" #include "Plugins/SymbolFile/DWARF/LogChannelDWARF.h" #include "Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h" +#include "lldb/Core/DataFileCache.h" #include "lldb/Core/Module.h" #include "lldb/Core/Progress.h" #include "lldb/Symbol/ObjectFile.h" +#include "lldb/Utility/DataEncoder.h" +#include "lldb/Utility/DataExtractor.h" #include "lldb/Utility/Stream.h" #include "lldb/Utility/Timer.h" #include "llvm/Support/FormatVariadic.h" @@ -24,17 +27,19 @@ using namespace lldb_private; using namespace lldb; void ManualDWARFIndex::Index() { - if (!m_dwarf) + if (m_indexed) return; - - SymbolFileDWARF &main_dwarf = *m_dwarf; - m_dwarf = nullptr; + m_indexed = true; ElapsedTime elapsed(m_index_time); - LLDB_SCOPED_TIMERF("%p", static_cast(&main_dwarf)); + LLDB_SCOPED_TIMERF("%p", static_cast(m_dwarf)); + if (LoadFromCache()) { + m_dwarf->SetDebugInfoIndexWasLoadedFromCache(); + return; + } - DWARFDebugInfo &main_info = main_dwarf.DebugInfo(); - SymbolFileDWARFDwo *dwp_dwarf = main_dwarf.GetDwpSymbolFile().get(); + DWARFDebugInfo &main_info = m_dwarf->DebugInfo(); + SymbolFileDWARFDwo *dwp_dwarf = m_dwarf->GetDwpSymbolFile().get(); DWARFDebugInfo *dwp_info = dwp_dwarf ? &dwp_dwarf->DebugInfo() : nullptr; std::vector units_to_index; @@ -125,6 +130,8 @@ void ManualDWARFIndex::Index() { pool.async(finalize_fn, &IndexSet::types); pool.async(finalize_fn, &IndexSet::namespaces); pool.wait(); + + SaveToCache(); } void ManualDWARFIndex::IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp, @@ -480,3 +487,214 @@ void ManualDWARFIndex::Dump(Stream &s) { s.Printf("\nNamespaces:\n"); m_set.namespaces.Dump(&s); } + +constexpr llvm::StringLiteral kIdentifierManualDWARFIndex("DIDX"); +// Define IDs for the different tables when encoding and decoding the +// ManualDWARFIndex NameToDIE objects so we can avoid saving any empty maps. +enum DataID { + kDataIDFunctionBasenames = 1u, + kDataIDFunctionFullnames, + kDataIDFunctionMethods, + kDataIDFunctionSelectors, + kDataIDFunctionObjcClassSelectors, + kDataIDGlobals, + kDataIDTypes, + kDataIDNamespaces, + kDataIDEnd = 255u, + +}; +constexpr uint32_t CURRENT_CACHE_VERSION = 1; + +bool ManualDWARFIndex::IndexSet::Decode(const DataExtractor &data, + lldb::offset_t *offset_ptr) { + StringTableReader strtab; + // We now decode the string table for all strings in the data cache file. + if (!strtab.Decode(data, offset_ptr)) + return false; + + llvm::StringRef identifier((const char *)data.GetData(offset_ptr, 4), 4); + if (identifier != kIdentifierManualDWARFIndex) + return false; + const uint32_t version = data.GetU32(offset_ptr); + if (version != CURRENT_CACHE_VERSION) + return false; + + bool done = false; + while (!done) { + switch (data.GetU8(offset_ptr)) { + default: + // If we got here, this is not expected, we expect the data IDs to match + // one of the values from the DataID enumeration. + return false; + case kDataIDFunctionBasenames: + if (!function_basenames.Decode(data, offset_ptr, strtab)) + return false; + break; + case kDataIDFunctionFullnames: + if (!function_fullnames.Decode(data, offset_ptr, strtab)) + return false; + break; + case kDataIDFunctionMethods: + if (!function_methods.Decode(data, offset_ptr, strtab)) + return false; + break; + case kDataIDFunctionSelectors: + if (!function_selectors.Decode(data, offset_ptr, strtab)) + return false; + break; + case kDataIDFunctionObjcClassSelectors: + if (!objc_class_selectors.Decode(data, offset_ptr, strtab)) + return false; + break; + case kDataIDGlobals: + if (!globals.Decode(data, offset_ptr, strtab)) + return false; + break; + case kDataIDTypes: + if (!types.Decode(data, offset_ptr, strtab)) + return false; + break; + case kDataIDNamespaces: + if (!namespaces.Decode(data, offset_ptr, strtab)) + return false; + break; + case kDataIDEnd: + // We got to the end of our NameToDIE encodings. + done = true; + break; + } + } + // Success! + return true; +} + +void ManualDWARFIndex::IndexSet::Encode(DataEncoder &encoder) const { + ConstStringTable strtab; + + // Encoder the DWARF index into a separate encoder first. This allows us + // gather all of the strings we willl need in "strtab" as we will need to + // write the string table out before the symbol table. + DataEncoder index_encoder(encoder.GetByteOrder(), + encoder.GetAddressByteSize()); + + index_encoder.AppendData(kIdentifierManualDWARFIndex); + // Encode the data version. + index_encoder.AppendU32(CURRENT_CACHE_VERSION); + + if (!function_basenames.IsEmpty()) { + index_encoder.AppendU8(kDataIDFunctionBasenames); + function_basenames.Encode(index_encoder, strtab); + } + if (!function_fullnames.IsEmpty()) { + index_encoder.AppendU8(kDataIDFunctionFullnames); + function_fullnames.Encode(index_encoder, strtab); + } + if (!function_methods.IsEmpty()) { + index_encoder.AppendU8(kDataIDFunctionMethods); + function_methods.Encode(index_encoder, strtab); + } + if (!function_selectors.IsEmpty()) { + index_encoder.AppendU8(kDataIDFunctionSelectors); + function_selectors.Encode(index_encoder, strtab); + } + if (!objc_class_selectors.IsEmpty()) { + index_encoder.AppendU8(kDataIDFunctionObjcClassSelectors); + objc_class_selectors.Encode(index_encoder, strtab); + } + if (!globals.IsEmpty()) { + index_encoder.AppendU8(kDataIDGlobals); + globals.Encode(index_encoder, strtab); + } + if (!types.IsEmpty()) { + index_encoder.AppendU8(kDataIDTypes); + types.Encode(index_encoder, strtab); + } + if (!namespaces.IsEmpty()) { + index_encoder.AppendU8(kDataIDNamespaces); + namespaces.Encode(index_encoder, strtab); + } + index_encoder.AppendU8(kDataIDEnd); + + // Now that all strings have been gathered, we will emit the string table. + strtab.Encode(encoder); + // Followed the the symbol table data. + encoder.AppendData(index_encoder.GetData()); +} + +bool ManualDWARFIndex::Decode(const DataExtractor &data, + lldb::offset_t *offset_ptr, + bool &signature_mismatch) { + signature_mismatch = false; + CacheSignature signature; + if (!signature.Decode(data, offset_ptr)) + return false; + if (CacheSignature(m_dwarf->GetObjectFile()) != signature) { + signature_mismatch = true; + return false; + } + IndexSet set; + if (!set.Decode(data, offset_ptr)) + return false; + m_set = std::move(set); + return true; +} + +bool ManualDWARFIndex::Encode(DataEncoder &encoder) const { + CacheSignature signature(m_dwarf->GetObjectFile()); + if (!signature.Encode(encoder)) + return false; + m_set.Encode(encoder); + return true; +} + +std::string ManualDWARFIndex::GetCacheKey() { + std::string key; + llvm::raw_string_ostream strm(key); + // DWARF Index can come from different object files for the same module. A + // module can have one object file as the main executable and might have + // another object file in a separate symbol file, or we might have a .dwo file + // that claims its module is the main executable. + ObjectFile *objfile = m_dwarf->GetObjectFile(); + strm << objfile->GetModule()->GetCacheKey() << "-dwarf-index-" + << llvm::format_hex(objfile->GetCacheHash(), 10); + return strm.str(); +} + +bool ManualDWARFIndex::LoadFromCache() { + DataFileCache *cache = Module::GetIndexCache(); + if (!cache) + return false; + ObjectFile *objfile = m_dwarf->GetObjectFile(); + if (!objfile) + return false; + std::unique_ptr mem_buffer_up = + cache->GetCachedData(GetCacheKey()); + if (!mem_buffer_up) + return false; + DataExtractor data(mem_buffer_up->getBufferStart(), + mem_buffer_up->getBufferSize(), + endian::InlHostByteOrder(), + objfile->GetAddressByteSize()); + bool signature_mismatch = false; + lldb::offset_t offset = 0; + const bool result = Decode(data, &offset, signature_mismatch); + if (signature_mismatch) + cache->RemoveCacheFile(GetCacheKey()); + return result; +} + +void ManualDWARFIndex::SaveToCache() { + DataFileCache *cache = Module::GetIndexCache(); + if (!cache) + return; // Caching is not enabled. + ObjectFile *objfile = m_dwarf->GetObjectFile(); + if (!objfile) + return; + DataEncoder file(endian::InlHostByteOrder(), objfile->GetAddressByteSize()); + // Encode will return false if the object file doesn't have anything to make + // a signature from. + if (Encode(file)) { + if (cache->SetCachedData(GetCacheKey(), file.GetData())) + m_dwarf->SetDebugInfoIndexWasSavedToCache(); + } +} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h index 36f371402b90..5c5e43de9ca6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h @@ -55,7 +55,7 @@ class ManualDWARFIndex : public DWARFIndex { void Dump(Stream &s) override; -private: + // Make IndexSet public so we can unit test the encoding and decoding logic. struct IndexSet { NameToDIE function_basenames; NameToDIE function_fullnames; @@ -65,21 +65,113 @@ class ManualDWARFIndex : public DWARFIndex { NameToDIE globals; NameToDIE types; NameToDIE namespaces; + bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr); + void Encode(DataEncoder &encoder) const; + bool operator==(const IndexSet &rhs) const { + return function_basenames == rhs.function_basenames && + function_fullnames == rhs.function_fullnames && + function_methods == rhs.function_methods && + function_selectors == rhs.function_selectors && + objc_class_selectors == rhs.objc_class_selectors && + globals == rhs.globals && types == rhs.types && + namespaces == rhs.namespaces; + } }; + +private: void Index(); + + /// Decode a serialized version of this object from data. + /// + /// \param data + /// The decoder object that references the serialized data. + /// + /// \param offset_ptr + /// A pointer that contains the offset from which the data will be decoded + /// from that gets updated as data gets decoded. + /// + /// \param strtab + /// All strings in cache files are put into string tables for efficiency + /// and cache file size reduction. Strings are stored as uint32_t string + /// table offsets in the cache data. + bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, + bool &signature_mismatch); + + /// Encode this object into a data encoder object. + /// + /// This allows this object to be serialized to disk. + /// + /// \param encoder + /// A data encoder object that serialized bytes will be encoded into. + /// + /// \param strtab + /// All strings in cache files are put into string tables for efficiency + /// and cache file size reduction. Strings are stored as uint32_t string + /// table offsets in the cache data. + /// + /// \return + /// True if the symbol table's object file can generate a valid signature + /// and all data for the symbol table was encoded, false otherwise. + bool Encode(DataEncoder &encoder) const; + + /// Get the cache key string for this symbol table. + /// + /// The cache key must start with the module's cache key and is followed + /// by information that indicates this key is for caching the symbol table + /// contents and should also include the has of the object file. A module can + /// be represented by an ObjectFile object for the main executable, but can + /// also have a symbol file that is from the same or a different object file. + /// This means we might have two symbol tables cached in the index cache, one + /// for the main executable and one for the symbol file. + /// + /// \return + /// The unique cache key used to save and retrieve data from the index + /// cache. + std::string GetCacheKey(); + + /// Save the symbol table data out into a cache. + /// + /// The symbol table will only be saved to a cache file if caching is enabled. + /// + /// We cache the contents of the symbol table since symbol tables in LLDB take + /// some time to initialize. This is due to the many sources for data that are + /// used to create a symbol table: + /// - standard symbol table + /// - dynamic symbol table (ELF) + /// - compressed debug info sections + /// - unwind information + /// - function pointers found in runtimes for global constructor/destructors + /// - other sources. + /// All of the above sources are combined and one symbol table results after + /// all sources have been considered. + void SaveToCache(); + + /// Load the symbol table from the index cache. + /// + /// Quickly load the finalized symbol table from the index cache. This saves + /// time when the debugger starts up. The index cache file for the symbol + /// table has the modification time set to the same time as the main module. + /// If the cache file exists and the modification times match, we will load + /// the symbol table from the serlized cache file. + /// + /// \return + /// True if the symbol table was successfully loaded from the index cache, + /// false if the symbol table wasn't cached or was out of date. + bool LoadFromCache(); + void IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp, IndexSet &set); static void IndexUnitImpl(DWARFUnit &unit, const lldb::LanguageType cu_language, IndexSet &set); - /// The DWARF file which we are indexing. Set to nullptr after the index is - /// built. + /// The DWARF file which we are indexing. SymbolFileDWARF *m_dwarf; /// Which dwarf units should we skip while building the index. llvm::DenseSet m_units_to_avoid; IndexSet m_set; + bool m_indexed = false; }; } // namespace lldb_private diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp index 493d1b4a2702..33e2695f403a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp @@ -8,8 +8,11 @@ #include "NameToDIE.h" #include "DWARFUnit.h" +#include "lldb/Core/DataFileCache.h" #include "lldb/Symbol/ObjectFile.h" #include "lldb/Utility/ConstString.h" +#include "lldb/Utility/DataEncoder.h" +#include "lldb/Utility/DataExtractor.h" #include "lldb/Utility/RegularExpression.h" #include "lldb/Utility/Stream.h" #include "lldb/Utility/StreamString.h" @@ -87,3 +90,50 @@ void NameToDIE::Append(const NameToDIE &other) { other.m_map.GetValueAtIndexUnchecked(i)); } } + +constexpr llvm::StringLiteral kIdentifierNameToDIE("N2DI"); + +bool NameToDIE::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, + const StringTableReader &strtab) { + m_map.Clear(); + llvm::StringRef identifier((const char *)data.GetData(offset_ptr, 4), 4); + if (identifier != kIdentifierNameToDIE) + return false; + const uint32_t count = data.GetU32(offset_ptr); + for (uint32_t i = 0; i < count; ++i) { + llvm::StringRef str(strtab.Get(data.GetU32(offset_ptr))); + // No empty strings allowed in the name to DIE maps. + if (str.empty()) + return false; + if (llvm::Optional die_ref = DIERef::Decode(data, offset_ptr)) + m_map.Append(ConstString(str), die_ref.getValue()); + else + return false; + } + return true; +} + +void NameToDIE::Encode(DataEncoder &encoder, ConstStringTable &strtab) const { + encoder.AppendData(kIdentifierNameToDIE); + encoder.AppendU32(m_map.GetSize()); + for (const auto &entry : m_map) { + // Make sure there are no empty strings. + assert((bool)entry.cstring); + encoder.AppendU32(strtab.Add(entry.cstring)); + entry.value.Encode(encoder); + } +} + +bool NameToDIE::operator==(const NameToDIE &rhs) const { + const size_t size = m_map.GetSize(); + if (size != rhs.m_map.GetSize()) + return false; + for (size_t i = 0; i < size; ++i) { + if (m_map.GetCStringAtIndex(i) != rhs.m_map.GetCStringAtIndex(i)) + return false; + if (m_map.GetValueRefAtIndexUnchecked(i) != + rhs.m_map.GetValueRefAtIndexUnchecked(i)) + return false; + } + return true; +} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h index 994af07189f8..61df1a628ab5 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h @@ -48,6 +48,44 @@ class NameToDIE { const DIERef &die_ref)> const &callback) const; + /// Decode a serialized version of this object from data. + /// + /// \param data + /// The decoder object that references the serialized data. + /// + /// \param offset_ptr + /// A pointer that contains the offset from which the data will be decoded + /// from that gets updated as data gets decoded. + /// + /// \param strtab + /// All strings in cache files are put into string tables for efficiency + /// and cache file size reduction. Strings are stored as uint32_t string + /// table offsets in the cache data. + bool Decode(const lldb_private::DataExtractor &data, + lldb::offset_t *offset_ptr, + const lldb_private::StringTableReader &strtab); + + /// Encode this object into a data encoder object. + /// + /// This allows this object to be serialized to disk. + /// + /// \param encoder + /// A data encoder object that serialized bytes will be encoded into. + /// + /// \param strtab + /// All strings in cache files are put into string tables for efficiency + /// and cache file size reduction. Strings are stored as uint32_t string + /// table offsets in the cache data. + void Encode(lldb_private::DataEncoder &encoder, + lldb_private::ConstStringTable &strtab) const; + + /// Used for unit testing the encoding and decoding. + bool operator==(const NameToDIE &rhs) const; + + bool IsEmpty() const { return m_map.IsEmpty(); } + + void Clear() { m_map.Clear(); } + protected: lldb_private::UniqueCStringMap m_map; }; diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp index 75450a156c28..97dc31bc9766 100644 --- a/lldb/source/Symbol/Symtab.cpp +++ b/lldb/source/Symbol/Symtab.cpp @@ -34,7 +34,8 @@ using namespace lldb_private; Symtab::Symtab(ObjectFile *objfile) : m_objfile(objfile), m_symbols(), m_file_addr_to_index(*this), m_name_to_symbol_indices(), m_mutex(), - m_file_addr_to_index_computed(false), m_name_indexes_computed(false) { + m_file_addr_to_index_computed(false), m_name_indexes_computed(false), + m_loaded_from_cache(false), m_saved_to_cache(false) { m_name_to_symbol_indices.emplace(std::make_pair( lldb::eFunctionNameTypeNone, UniqueCStringMap())); m_name_to_symbol_indices.emplace(std::make_pair( @@ -1179,7 +1180,8 @@ void Symtab::SaveToCache() { // Encode will return false if the symbol table's object file doesn't have // anything to make a signature from. if (Encode(file)) - cache->SetCachedData(GetCacheKey(), file.GetData()); + if (cache->SetCachedData(GetCacheKey(), file.GetData())) + SetWasSavedToCache(); } constexpr llvm::StringLiteral kIdentifierCStrMap("CMAP"); @@ -1343,5 +1345,7 @@ bool Symtab::LoadFromCache() { const bool result = Decode(data, &offset, signature_mismatch); if (signature_mismatch) cache->RemoveCacheFile(GetCacheKey()); + if (result) + SetWasLoadedFromCache(); return result; } diff --git a/lldb/source/Target/Statistics.cpp b/lldb/source/Target/Statistics.cpp index 1b205c533519..d50343fb5a43 100644 --- a/lldb/source/Target/Statistics.cpp +++ b/lldb/source/Target/Statistics.cpp @@ -52,9 +52,15 @@ json::Value ModuleStats::ToJSON() const { module.try_emplace("identifier", identifier); module.try_emplace("symbolTableParseTime", symtab_parse_time); module.try_emplace("symbolTableIndexTime", symtab_index_time); + module.try_emplace("symbolTableLoadedFromCache", symtab_loaded_from_cache); + module.try_emplace("symbolTableSavedToCache", symtab_saved_to_cache); module.try_emplace("debugInfoParseTime", debug_parse_time); module.try_emplace("debugInfoIndexTime", debug_index_time); module.try_emplace("debugInfoByteSize", (int64_t)debug_info_size); + module.try_emplace("debugInfoIndexLoadedFromCache", + debug_info_index_loaded_from_cache); + module.try_emplace("debugInfoIndexSavedToCache", + debug_info_index_saved_to_cache); return module; } @@ -144,6 +150,10 @@ llvm::json::Value DebuggerStats::ReportStatistics(Debugger &debugger, double symtab_index_time = 0.0; double debug_parse_time = 0.0; double debug_index_time = 0.0; + uint32_t symtabs_loaded = 0; + uint32_t symtabs_saved = 0; + uint32_t debug_index_loaded = 0; + uint32_t debug_index_saved = 0; uint64_t debug_info_size = 0; if (target) { json_targets.emplace_back(target->ReportStatistics()); @@ -169,11 +179,28 @@ llvm::json::Value DebuggerStats::ReportStatistics(Debugger &debugger, module_stat.triple = module->GetArchitecture().GetTriple().str(); module_stat.symtab_parse_time = module->GetSymtabParseTime().count(); module_stat.symtab_index_time = module->GetSymtabIndexTime().count(); + Symtab *symtab = module->GetSymtab(); + if (symtab) { + module_stat.symtab_loaded_from_cache = symtab->GetWasLoadedFromCache(); + if (module_stat.symtab_loaded_from_cache) + ++symtabs_loaded; + module_stat.symtab_saved_to_cache = symtab->GetWasSavedToCache(); + if (module_stat.symtab_saved_to_cache) + ++symtabs_saved; + } SymbolFile *sym_file = module->GetSymbolFile(); if (sym_file) { module_stat.debug_index_time = sym_file->GetDebugInfoIndexTime().count(); module_stat.debug_parse_time = sym_file->GetDebugInfoParseTime().count(); module_stat.debug_info_size = sym_file->GetDebugInfoSize(); + module_stat.debug_info_index_loaded_from_cache = + sym_file->GetDebugInfoIndexWasLoadedFromCache(); + if (module_stat.debug_info_index_loaded_from_cache) + ++debug_index_loaded; + module_stat.debug_info_index_saved_to_cache = + sym_file->GetDebugInfoIndexWasSavedToCache(); + if (module_stat.debug_info_index_saved_to_cache) + ++debug_index_saved; } symtab_parse_time += module_stat.symtab_parse_time; symtab_index_time += module_stat.symtab_index_time; @@ -188,8 +215,12 @@ llvm::json::Value DebuggerStats::ReportStatistics(Debugger &debugger, {"modules", std::move(json_modules)}, {"totalSymbolTableParseTime", symtab_parse_time}, {"totalSymbolTableIndexTime", symtab_index_time}, + {"totalSymbolTablesLoadedFromCache", symtabs_loaded}, + {"totalSymbolTablesSavedToCache", symtabs_saved}, {"totalDebugInfoParseTime", debug_parse_time}, {"totalDebugInfoIndexTime", debug_index_time}, + {"totalDebugInfoIndexLoadedFromCache", debug_index_loaded}, + {"totalDebugInfoIndexSavedToCache", debug_index_saved}, {"totalDebugInfoByteSize", debug_info_size}, }; return std::move(global_stats); diff --git a/lldb/test/API/commands/statistics/basic/TestStats.py b/lldb/test/API/commands/statistics/basic/TestStats.py index e2d62e181a52..f69fddc27fba 100644 --- a/lldb/test/API/commands/statistics/basic/TestStats.py +++ b/lldb/test/API/commands/statistics/basic/TestStats.py @@ -164,8 +164,12 @@ def test_default_no_run(self): 'targets', 'totalSymbolTableParseTime', 'totalSymbolTableIndexTime', + 'totalSymbolTablesLoadedFromCache', + 'totalSymbolTablesSavedToCache', 'totalDebugInfoByteSize', 'totalDebugInfoIndexTime', + 'totalDebugInfoIndexLoadedFromCache', + 'totalDebugInfoIndexSavedToCache', 'totalDebugInfoParseTime', ] self.verify_keys(debug_stats, '"debug_stats"', debug_stat_keys, None) @@ -227,8 +231,12 @@ def test_default_with_run(self): 'targets', 'totalSymbolTableParseTime', 'totalSymbolTableIndexTime', + 'totalSymbolTablesLoadedFromCache', + 'totalSymbolTablesSavedToCache', 'totalDebugInfoByteSize', 'totalDebugInfoIndexTime', + 'totalDebugInfoIndexLoadedFromCache', + 'totalDebugInfoIndexSavedToCache', 'totalDebugInfoParseTime', ] self.verify_keys(debug_stats, '"debug_stats"', debug_stat_keys, None) @@ -265,8 +273,12 @@ def test_modules(self): 'targets', 'totalSymbolTableParseTime', 'totalSymbolTableIndexTime', + 'totalSymbolTablesLoadedFromCache', + 'totalSymbolTablesSavedToCache', 'totalDebugInfoParseTime', 'totalDebugInfoIndexTime', + 'totalDebugInfoIndexLoadedFromCache', + 'totalDebugInfoIndexSavedToCache', 'totalDebugInfoByteSize' ] self.verify_keys(debug_stats, '"debug_stats"', debug_stat_keys, None) @@ -278,12 +290,16 @@ def test_modules(self): exe_module = self.find_module_in_metrics(exe, debug_stats) module_keys = [ 'debugInfoByteSize', + 'debugInfoIndexLoadedFromCache', 'debugInfoIndexTime', + 'debugInfoIndexSavedToCache', 'debugInfoParseTime', 'identifier', 'path', 'symbolTableIndexTime', + 'symbolTableLoadedFromCache', 'symbolTableParseTime', + 'symbolTableSavedToCache', 'triple', 'uuid', ] @@ -343,8 +359,12 @@ def test_breakpoints(self): 'targets', 'totalSymbolTableParseTime', 'totalSymbolTableIndexTime', + 'totalSymbolTablesLoadedFromCache', + 'totalSymbolTablesSavedToCache', 'totalDebugInfoParseTime', 'totalDebugInfoIndexTime', + 'totalDebugInfoIndexLoadedFromCache', + 'totalDebugInfoIndexSavedToCache', 'totalDebugInfoByteSize', ] self.verify_keys(debug_stats, '"debug_stats"', debug_stat_keys, None) diff --git a/lldb/test/API/functionalities/module_cache/debug_index/TestDebugIndexCache.py b/lldb/test/API/functionalities/module_cache/debug_index/TestDebugIndexCache.py new file mode 100644 index 000000000000..9a77309d6e22 --- /dev/null +++ b/lldb/test/API/functionalities/module_cache/debug_index/TestDebugIndexCache.py @@ -0,0 +1,141 @@ +import glob +import json +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +import os +import time + + +class DebugIndexCacheTestcase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + def setUp(self): + # Call super's setUp(). + TestBase.setUp(self) + # Set the lldb module cache directory to a directory inside the build + # artifacts directory so no other tests are interfered with. + self.cache_dir = os.path.join(self.getBuildDir(), 'lldb-module-cache') + + def get_module_cache_files(self, basename): + module_cache_glob = os.path.join(self.cache_dir, + "llvmcache-*%s*dwarf-index*" % (basename)) + return glob.glob(module_cache_glob) + + def get_stats(self, log_path=None): + """ + Get the output of the "statistics dump" and return the JSON as a + python dictionary. + """ + # If log_path is set, open the path and emit the output of the command + # for debugging purposes. + if log_path is not None: + f = open(log_path, 'w') + else: + f = None + return_obj = lldb.SBCommandReturnObject() + command = "statistics dump " + if f: + f.write('(lldb) %s\n' % (command)) + self.ci.HandleCommand(command, return_obj, False) + metrics_json = return_obj.GetOutput() + if f: + f.write(metrics_json) + return json.loads(metrics_json) + + def enable_lldb_index_cache(self): + self.runCmd('settings set symbols.lldb-index-cache-path "%s"' % (self.cache_dir)) + self.runCmd('settings set symbols.enable-lldb-index-cache true') + + @no_debug_info_test + def test_with_caching_enabled(self): + """ + Test module cache functionality for debug info index caching. + + We test that a debug info index file is created for the debug + information when caching is enabled with a file that contains + at least one of each kind of DIE in ManualDWARFIndex::IndexSet. + + The input file has DWARF that will fill in every member of the + ManualDWARFIndex::IndexSet class to ensure we can encode all of the + required information. + + With caching enabled, we also verify that the appropriate statistics + specify that the cache file was saved to the cache. + """ + self.enable_lldb_index_cache() + src_dir = self.getSourceDir() + yaml_path = os.path.join(src_dir, "exe.yaml") + yaml_base, ext = os.path.splitext(yaml_path) + obj_path = self.getBuildArtifact("main.o") + self.yaml2obj(yaml_path, obj_path) + + # Create a target with the object file we just created from YAML + target = self.dbg.CreateTarget(obj_path) + self.assertTrue(target, VALID_TARGET) + + debug_index_cache_files = self.get_module_cache_files('main.o') + self.assertEqual(len(debug_index_cache_files), 1, + "make sure there is one file in the module cache directory (%s) for main.o that is a debug info cache" % (self.cache_dir)) + + # Verify that the module statistics have the information that specifies + # if we loaded or saved the debug index and symtab to the cache + stats = self.get_stats() + module_stats = stats['modules'][0] + self.assertFalse(module_stats['debugInfoIndexLoadedFromCache']) + self.assertTrue(module_stats['debugInfoIndexSavedToCache']) + self.assertFalse(module_stats['symbolTableLoadedFromCache']) + self.assertTrue(module_stats['symbolTableSavedToCache']) + # Verify the top level stats track how many things were loaded or saved + # to the cache. + self.assertEqual(stats["totalDebugInfoIndexLoadedFromCache"], 0) + self.assertEqual(stats["totalDebugInfoIndexSavedToCache"], 1) + self.assertEqual(stats["totalSymbolTablesLoadedFromCache"], 0) + self.assertEqual(stats["totalSymbolTablesSavedToCache"], 1) + + @no_debug_info_test + def test_with_caching_disabled(self): + """ + Test module cache functionality for debug info index caching. + + We test that a debug info index file is not created for the debug + information when caching is disabled with a file that contains + at least one of each kind of DIE in ManualDWARFIndex::IndexSet. + + The input file has DWARF that will fill in every member of the + ManualDWARFIndex::IndexSet class to ensure we can encode all of the + required information. + + With caching disabled, we also verify that the appropriate + statistics specify that the cache file was not saved to the cache. + """ + src_dir = self.getSourceDir() + yaml_path = os.path.join(src_dir, "exe.yaml") + yaml_base, ext = os.path.splitext(yaml_path) + obj_path = self.getBuildArtifact("main.o") + self.yaml2obj(yaml_path, obj_path) + + # Create a target with the object file we just created from YAML + target = self.dbg.CreateTarget(obj_path) + self.assertTrue(target, VALID_TARGET) + + debug_index_cache_files = self.get_module_cache_files('main.o') + self.assertEqual(len(debug_index_cache_files), 0, + "make sure there is no file in the module cache directory (%s) for main.o that is a debug info cache" % (self.cache_dir)) + + # Verify that the module statistics have the information that specifies + # if we loaded or saved the debug index and symtab to the cache + stats = self.get_stats() + module_stats = stats['modules'][0] + self.assertFalse(module_stats['debugInfoIndexLoadedFromCache']) + self.assertFalse(module_stats['debugInfoIndexSavedToCache']) + self.assertFalse(module_stats['symbolTableLoadedFromCache']) + self.assertFalse(module_stats['symbolTableSavedToCache']) + # Verify the top level stats track how many things were loaded or saved + # to the cache. + self.assertEqual(stats["totalDebugInfoIndexLoadedFromCache"], 0) + self.assertEqual(stats["totalDebugInfoIndexSavedToCache"], 0) + self.assertEqual(stats["totalSymbolTablesLoadedFromCache"], 0) + self.assertEqual(stats["totalSymbolTablesSavedToCache"], 0) diff --git a/lldb/test/API/functionalities/module_cache/debug_index/exe.yaml b/lldb/test/API/functionalities/module_cache/debug_index/exe.yaml new file mode 100644 index 000000000000..122095890e1e --- /dev/null +++ b/lldb/test/API/functionalities/module_cache/debug_index/exe.yaml @@ -0,0 +1,844 @@ +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_386 +DWARF: + debug_str: + - 'Apple clang version 13.0.0 (clang-1300.0.29.3)' + - main.mm + - '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk' + - MacOSX.sdk + - '/tmp/test' + - g_global + - int + - SimpleClass + - NSObject + - isa + - Class + - objc_class + - foo + - _Z3fooi + - '-[SimpleClass sayHello]' + - sayHello + - main + - baz + - Bar + - x + - _ZNK3baz3Bar3getEv + - get + - _ZN3baz3BarC1Ei + - _ZN3baz3BarC2Ei + - self + - _cmd + - SEL + - objc_selector + - argc + - argv + - char + - b + - this + - i + debug_abbrev: + - ID: 0 + Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_producer + Form: DW_FORM_strp + - Attribute: DW_AT_language + Form: DW_FORM_data2 + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_LLVM_sysroot + Form: DW_FORM_strp + - Attribute: DW_AT_APPLE_sdk + Form: DW_FORM_strp + - Attribute: DW_AT_stmt_list + Form: DW_FORM_sec_offset + - Attribute: DW_AT_comp_dir + Form: DW_FORM_strp + - Attribute: DW_AT_APPLE_major_runtime_vers + Form: DW_FORM_data1 + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Code: 0x2 + Tag: DW_TAG_variable + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Attribute: DW_AT_external + Form: DW_FORM_flag_present + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + - Attribute: DW_AT_location + Form: DW_FORM_exprloc + - Code: 0x3 + Tag: DW_TAG_base_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_encoding + Form: DW_FORM_data1 + - Attribute: DW_AT_byte_size + Form: DW_FORM_data1 + - Code: 0x4 + Tag: DW_TAG_structure_type + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_APPLE_objc_complete_type + Form: DW_FORM_flag_present + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_byte_size + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + - Attribute: DW_AT_APPLE_runtime_class + Form: DW_FORM_data1 + - Code: 0x5 + Tag: DW_TAG_inheritance + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Attribute: DW_AT_data_member_location + Form: DW_FORM_data1 + - Code: 0x6 + Tag: DW_TAG_structure_type + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_byte_size + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + - Attribute: DW_AT_APPLE_runtime_class + Form: DW_FORM_data1 + - Code: 0x7 + Tag: DW_TAG_member + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + - Attribute: DW_AT_data_member_location + Form: DW_FORM_data1 + - Attribute: DW_AT_accessibility + Form: DW_FORM_data1 + - Code: 0x8 + Tag: DW_TAG_typedef + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + - Code: 0x9 + Tag: DW_TAG_pointer_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Code: 0xA + Tag: DW_TAG_structure_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_declaration + Form: DW_FORM_flag_present + - Code: 0xB + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Attribute: DW_AT_APPLE_omit_frame_ptr + Form: DW_FORM_flag_present + - Attribute: DW_AT_frame_base + Form: DW_FORM_exprloc + - Attribute: DW_AT_linkage_name + Form: DW_FORM_strp + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Attribute: DW_AT_external + Form: DW_FORM_flag_present + - Code: 0xC + Tag: DW_TAG_formal_parameter + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_location + Form: DW_FORM_exprloc + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Code: 0xD + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Attribute: DW_AT_APPLE_omit_frame_ptr + Form: DW_FORM_flag_present + - Attribute: DW_AT_frame_base + Form: DW_FORM_exprloc + - Attribute: DW_AT_object_pointer + Form: DW_FORM_ref4 + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + - Code: 0xE + Tag: DW_TAG_formal_parameter + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_location + Form: DW_FORM_exprloc + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Attribute: DW_AT_artificial + Form: DW_FORM_flag_present + - Code: 0xF + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Attribute: DW_AT_frame_base + Form: DW_FORM_exprloc + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Attribute: DW_AT_external + Form: DW_FORM_flag_present + - Code: 0x10 + Tag: DW_TAG_variable + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_location + Form: DW_FORM_exprloc + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Code: 0x11 + Tag: DW_TAG_namespace + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Code: 0x12 + Tag: DW_TAG_class_type + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_calling_convention + Form: DW_FORM_data1 + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_byte_size + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + - Code: 0x13 + Tag: DW_TAG_member + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + - Attribute: DW_AT_data_member_location + Form: DW_FORM_data1 + - Code: 0x14 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + - Attribute: DW_AT_declaration + Form: DW_FORM_flag_present + - Attribute: DW_AT_external + Form: DW_FORM_flag_present + - Attribute: DW_AT_accessibility + Form: DW_FORM_data1 + - Code: 0x15 + Tag: DW_TAG_formal_parameter + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Attribute: DW_AT_artificial + Form: DW_FORM_flag_present + - Code: 0x16 + Tag: DW_TAG_formal_parameter + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Code: 0x17 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_linkage_name + Form: DW_FORM_strp + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Attribute: DW_AT_declaration + Form: DW_FORM_flag_present + - Attribute: DW_AT_external + Form: DW_FORM_flag_present + - Attribute: DW_AT_accessibility + Form: DW_FORM_data1 + - Code: 0x18 + Tag: DW_TAG_const_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Code: 0x19 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Attribute: DW_AT_frame_base + Form: DW_FORM_exprloc + - Attribute: DW_AT_object_pointer + Form: DW_FORM_ref4 + - Attribute: DW_AT_linkage_name + Form: DW_FORM_strp + - Attribute: DW_AT_specification + Form: DW_FORM_ref4 + - Code: 0x1A + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Attribute: DW_AT_APPLE_omit_frame_ptr + Form: DW_FORM_flag_present + - Attribute: DW_AT_frame_base + Form: DW_FORM_exprloc + - Attribute: DW_AT_object_pointer + Form: DW_FORM_ref4 + - Attribute: DW_AT_linkage_name + Form: DW_FORM_strp + - Attribute: DW_AT_specification + Form: DW_FORM_ref4 + debug_info: + - Length: 0x21F + Version: 4 + AbbrevTableID: 0 + AbbrOffset: 0x0 + AddrSize: 8 + Entries: + - AbbrCode: 0x1 + Values: + - Value: 0x0 + - Value: 0x11 + - Value: 0x2F + - Value: 0x37 + - Value: 0x96 + - Value: 0x0 + - Value: 0xA1 + - Value: 0x2 + - Value: 0x0 + - Value: 0xC4 + - AbbrCode: 0x2 + Values: + - Value: 0xAB + - Value: 0x48 + - Value: 0x1 + - Value: 0x1 + - Value: 0x3 + - Value: 0x9 + BlockData: [ 0x3, 0xC4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0 ] + - AbbrCode: 0x3 + Values: + - Value: 0xB4 + - Value: 0x5 + - Value: 0x4 + - AbbrCode: 0x4 + Values: + - Value: 0x1 + - Value: 0xB8 + - Value: 0x8 + - Value: 0x1 + - Value: 0x13 + - Value: 0x11 + - AbbrCode: 0x5 + Values: + - Value: 0x5F + - Value: 0x0 + - AbbrCode: 0x0 + - AbbrCode: 0x6 + Values: + - Value: 0xC4 + - Value: 0x8 + - Value: 0x2 + - Value: 0x35 + - Value: 0x11 + - AbbrCode: 0x7 + Values: + - Value: 0xCD + - Value: 0x76 + - Value: 0x2 + - Value: 0x38 + - Value: 0x0 + - Value: 0x2 + - AbbrCode: 0x0 + - AbbrCode: 0x8 + Values: + - Value: 0x81 + - Value: 0xD1 + - Value: 0x1 + - Value: 0xD + - AbbrCode: 0x9 + Values: + - Value: 0x86 + - AbbrCode: 0xA + Values: + - Value: 0xD7 + - Value: 0x1 + - AbbrCode: 0xB + Values: + - Value: 0x0 + - Value: 0x20 + - Value: 0x1 + - Value: 0x1 + BlockData: [ 0x6F ] + - Value: 0xE6 + - Value: 0xE2 + - Value: 0x1 + - Value: 0x6 + - Value: 0x48 + - Value: 0x1 + - AbbrCode: 0xC + Values: + - Value: 0x2 + BlockData: [ 0x91, 0xC ] + - Value: 0x11C + - Value: 0x1 + - Value: 0x6 + - Value: 0x48 + - AbbrCode: 0x0 + - AbbrCode: 0xD + Values: + - Value: 0x20 + - Value: 0x14 + - Value: 0x1 + - Value: 0x1 + BlockData: [ 0x6F ] + - Value: 0xD0 + - Value: 0xEE + - Value: 0x1 + - Value: 0x18 + - AbbrCode: 0xE + Values: + - Value: 0x2 + BlockData: [ 0x91, 0x8 ] + - Value: 0x155 + - Value: 0x1ED + - Value: 0x1 + - AbbrCode: 0xE + Values: + - Value: 0x2 + BlockData: [ 0x91, 0x0 ] + - Value: 0x15A + - Value: 0x1F2 + - Value: 0x1 + - AbbrCode: 0x0 + - AbbrCode: 0xF + Values: + - Value: 0x34 + - Value: 0x3C + - Value: 0x1 + BlockData: [ 0x6D ] + - Value: 0x10F + - Value: 0x1 + - Value: 0x1B + - Value: 0x48 + - Value: 0x1 + - AbbrCode: 0xC + Values: + - Value: 0x2 + BlockData: [ 0x91, 0x78 ] + - Value: 0x171 + - Value: 0x1 + - Value: 0x1B + - Value: 0x48 + - AbbrCode: 0xC + Values: + - Value: 0x2 + BlockData: [ 0x8F, 0x10 ] + - Value: 0x176 + - Value: 0x1 + - Value: 0x1B + - Value: 0x207 + - AbbrCode: 0x10 + Values: + - Value: 0x2 + BlockData: [ 0x8F, 0xC ] + - Value: 0x180 + - Value: 0x1 + - Value: 0x1C + - Value: 0x132 + - AbbrCode: 0x0 + - AbbrCode: 0x11 + Values: + - Value: 0x114 + - AbbrCode: 0x12 + Values: + - Value: 0x5 + - Value: 0x118 + - Value: 0x4 + - Value: 0x1 + - Value: 0xA + - AbbrCode: 0x13 + Values: + - Value: 0x11C + - Value: 0x48 + - Value: 0x1 + - Value: 0xB + - Value: 0x0 + - AbbrCode: 0x14 + Values: + - Value: 0x118 + - Value: 0x1 + - Value: 0xD + - Value: 0x1 + - Value: 0x1 + - Value: 0x1 + - AbbrCode: 0x15 + Values: + - Value: 0x172 + - Value: 0x1 + - AbbrCode: 0x16 + Values: + - Value: 0x48 + - AbbrCode: 0x0 + - AbbrCode: 0x17 + Values: + - Value: 0x11E + - Value: 0x131 + - Value: 0x1 + - Value: 0xF + - Value: 0x48 + - Value: 0x1 + - Value: 0x1 + - Value: 0x1 + - AbbrCode: 0x15 + Values: + - Value: 0x177 + - Value: 0x1 + - AbbrCode: 0x0 + - AbbrCode: 0x0 + - AbbrCode: 0x0 + - AbbrCode: 0x9 + Values: + - Value: 0x132 + - AbbrCode: 0x9 + Values: + - Value: 0x17C + - AbbrCode: 0x18 + Values: + - Value: 0x132 + - AbbrCode: 0x19 + Values: + - Value: 0x70 + - Value: 0x34 + - Value: 0x1 + BlockData: [ 0x6D ] + - Value: 0x19C + - Value: 0x135 + - Value: 0x147 + - AbbrCode: 0xE + Values: + - Value: 0x2 + BlockData: [ 0x91, 0x78 ] + - Value: 0x182 + - Value: 0x21D + - Value: 0x1 + - AbbrCode: 0xC + Values: + - Value: 0x2 + BlockData: [ 0x91, 0x74 ] + - Value: 0x187 + - Value: 0x1 + - Value: 0xD + - Value: 0x48 + - AbbrCode: 0x0 + - AbbrCode: 0x1A + Values: + - Value: 0xA4 + - Value: 0x20 + - Value: 0x1 + - Value: 0x1 + BlockData: [ 0x6F ] + - Value: 0x1D2 + - Value: 0x145 + - Value: 0x147 + - AbbrCode: 0xE + Values: + - Value: 0x2 + BlockData: [ 0x91, 0x8 ] + - Value: 0x182 + - Value: 0x21D + - Value: 0x1 + - AbbrCode: 0xC + Values: + - Value: 0x2 + BlockData: [ 0x91, 0x4 ] + - Value: 0x187 + - Value: 0x1 + - Value: 0xD + - Value: 0x48 + - AbbrCode: 0x0 + - AbbrCode: 0x9 + Values: + - Value: 0x4F + - AbbrCode: 0x8 + Values: + - Value: 0x1FD + - Value: 0x15F + - Value: 0x1 + - Value: 0x8 + - AbbrCode: 0x9 + Values: + - Value: 0x202 + - AbbrCode: 0xA + Values: + - Value: 0x163 + - Value: 0x1 + - AbbrCode: 0x9 + Values: + - Value: 0x20C + - AbbrCode: 0x9 + Values: + - Value: 0x211 + - AbbrCode: 0x18 + Values: + - Value: 0x216 + - AbbrCode: 0x3 + Values: + - Value: 0x17B + - Value: 0x6 + - Value: 0x1 + - AbbrCode: 0x9 + Values: + - Value: 0x132 + - AbbrCode: 0x0 + debug_line: + - Length: 250 + Version: 4 + PrologueLength: 157 + MinInstLength: 1 + MaxOpsPerInst: 1 + DefaultIsStmt: 1 + LineBase: 251 + LineRange: 14 + OpcodeBase: 13 + StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ] + IncludeDirs: + - '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/objc' + Files: + - Name: main.mm + DirIdx: 0 + ModTime: 0 + Length: 0 + - Name: NSObject.h + DirIdx: 1 + ModTime: 0 + Length: 0 + Opcodes: + - Opcode: DW_LNS_extended_op + ExtLen: 9 + SubOpcode: DW_LNE_set_address + Data: 0 + - Opcode: 0x17 + Data: 0 + - Opcode: DW_LNS_set_column + Data: 10 + - Opcode: DW_LNS_set_prologue_end + Data: 0 + - Opcode: 0x83 + Data: 0 + - Opcode: DW_LNS_set_column + Data: 0 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: DW_LNS_advance_line + SData: -7 + Data: 0 + - Opcode: 0x4A + Data: 0 + - Opcode: DW_LNS_set_column + Data: 14 + - Opcode: 0x51 + Data: 0 + - Opcode: DW_LNS_set_column + Data: 12 + - Opcode: 0x4A + Data: 0 + - Opcode: DW_LNS_set_column + Data: 3 + - Opcode: 0x4A + Data: 0 + - Opcode: DW_LNS_set_column + Data: 0 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: DW_LNS_advance_line + SData: 17 + Data: 0 + - Opcode: 0x82 + Data: 0 + - Opcode: DW_LNS_set_column + Data: 20 + - Opcode: DW_LNS_set_prologue_end + Data: 0 + - Opcode: 0xBA + Data: 0 + - Opcode: DW_LNS_set_column + Data: 0 + - Opcode: 0x85 + Data: 0 + - Opcode: DW_LNS_set_column + Data: 12 + - Opcode: DW_LNS_set_prologue_end + Data: 0 + - Opcode: DW_LNS_advance_pc + Data: 40 + - Opcode: 0x13 + Data: 0 + - Opcode: DW_LNS_set_column + Data: 3 + - Opcode: 0x83 + Data: 0 + - Opcode: DW_LNS_set_column + Data: 0 + - Opcode: DW_LNS_advance_line + SData: -16 + Data: 0 + - Opcode: 0xBA + Data: 0 + - Opcode: DW_LNS_set_column + Data: 21 + - Opcode: DW_LNS_set_prologue_end + Data: 0 + - Opcode: DW_LNS_const_add_pc + Data: 0 + - Opcode: 0xAC + Data: 0 + - Opcode: DW_LNS_set_column + Data: 22 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: 0xBA + Data: 0 + - Opcode: DW_LNS_set_column + Data: 0 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: 0xBA + Data: 0 + - Opcode: DW_LNS_set_column + Data: 18 + - Opcode: DW_LNS_set_prologue_end + Data: 0 + - Opcode: 0xF2 + Data: 0 + - Opcode: DW_LNS_set_column + Data: 16 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: 0x4A + Data: 0 + - Opcode: DW_LNS_set_column + Data: 22 + - Opcode: 0x4A + Data: 0 + - Opcode: DW_LNS_advance_pc + Data: 8 + - Opcode: DW_LNS_extended_op + ExtLen: 1 + SubOpcode: DW_LNE_end_sequence + Data: 0 +... diff --git a/lldb/test/API/functionalities/module_cache/simple_exe/TestModuleCacheSimple.py b/lldb/test/API/functionalities/module_cache/simple_exe/TestModuleCacheSimple.py index 4605c3b25864..35e96fb584ed 100644 --- a/lldb/test/API/functionalities/module_cache/simple_exe/TestModuleCacheSimple.py +++ b/lldb/test/API/functionalities/module_cache/simple_exe/TestModuleCacheSimple.py @@ -31,7 +31,7 @@ def get_module_cache_files(self, basename): # Doesn't depend on any specific debug information. @no_debug_info_test - @skipIfWindows # Windows runs into trouble deleting the executable + @skipIfWindows def test(self): """ Test module cache functionality for a simple object file. diff --git a/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt b/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt index 76215c31b2aa..16c38c4ab219 100644 --- a/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt +++ b/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt @@ -1,6 +1,7 @@ add_lldb_unittest(SymbolFileDWARFTests DWARFASTParserClangTests.cpp DWARFDIETest.cpp + DWARFIndexCachingTest.cpp DWARFUnitTest.cpp SymbolFileDWARFTests.cpp XcodeSDKModuleTests.cpp diff --git a/lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp b/lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp new file mode 100644 index 000000000000..29514b5d1fcf --- /dev/null +++ b/lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp @@ -0,0 +1,198 @@ +//===-- DWARFIndexCachingTest.cpp -------------------------------------=---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Plugins/SymbolFile/DWARF/DIERef.h" +#include "Plugins/SymbolFile/DWARF/DWARFDIE.h" +#include "Plugins/SymbolFile/DWARF/ManualDWARFIndex.h" +#include "Plugins/SymbolFile/DWARF/NameToDIE.h" +#include "TestingSupport/Symbol/YAMLModuleTester.h" +#include "lldb/Core/DataFileCache.h" +#include "lldb/Core/ModuleList.h" +#include "lldb/Utility/DataEncoder.h" +#include "lldb/Utility/DataExtractor.h" +#include "llvm/ADT/STLExtras.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +using namespace lldb; +using namespace lldb_private; + +static void EncodeDecode(const DIERef &object, ByteOrder byte_order) { + const uint8_t addr_size = 8; + DataEncoder encoder(byte_order, addr_size); + object.Encode(encoder); + llvm::ArrayRef bytes = encoder.GetData(); + DataExtractor data(bytes.data(), bytes.size(), byte_order, addr_size); + offset_t data_offset = 0; + EXPECT_EQ(object, DIERef::Decode(data, &data_offset)); +} + +static void EncodeDecode(const DIERef &object) { + EncodeDecode(object, eByteOrderLittle); + EncodeDecode(object, eByteOrderBig); +} + +TEST(DWARFIndexCachingTest, DIERefEncodeDecode) { + // Tests DIERef::Encode(...) and DIERef::Decode(...) + EncodeDecode(DIERef(llvm::None, DIERef::Section::DebugInfo, 0x11223344)); + EncodeDecode(DIERef(llvm::None, DIERef::Section::DebugTypes, 0x11223344)); + EncodeDecode(DIERef(100, DIERef::Section::DebugInfo, 0x11223344)); + EncodeDecode(DIERef(200, DIERef::Section::DebugTypes, 0x11223344)); +} + +static void EncodeDecode(const NameToDIE &object, ByteOrder byte_order) { + const uint8_t addr_size = 8; + DataEncoder encoder(byte_order, addr_size); + DataEncoder strtab_encoder(byte_order, addr_size); + ConstStringTable const_strtab; + + object.Encode(encoder, const_strtab); + + llvm::ArrayRef bytes = encoder.GetData(); + DataExtractor data(bytes.data(), bytes.size(), byte_order, addr_size); + + const_strtab.Encode(strtab_encoder); + llvm::ArrayRef strtab_bytes = strtab_encoder.GetData(); + DataExtractor strtab_data(strtab_bytes.data(), strtab_bytes.size(), + byte_order, addr_size); + StringTableReader strtab_reader; + offset_t strtab_data_offset = 0; + ASSERT_EQ(strtab_reader.Decode(strtab_data, &strtab_data_offset), true); + + NameToDIE decoded_object; + offset_t data_offset = 0; + decoded_object.Decode(data, &data_offset, strtab_reader); + EXPECT_TRUE(object == decoded_object); +} + +static void EncodeDecode(const NameToDIE &object) { + EncodeDecode(object, eByteOrderLittle); + EncodeDecode(object, eByteOrderBig); +} + +TEST(DWARFIndexCachingTest, NameToDIEEncodeDecode) { + NameToDIE map; + // Make sure an empty NameToDIE map encodes and decodes correctly. + EncodeDecode(map); + map.Insert(ConstString("hello"), + DIERef(llvm::None, DIERef::Section::DebugInfo, 0x11223344)); + map.Insert(ConstString("workd"), + DIERef(100, DIERef::Section::DebugInfo, 0x11223344)); + // Make sure a valid NameToDIE map encodes and decodes correctly. + EncodeDecode(map); +} + +static void EncodeDecode(const ManualDWARFIndex::IndexSet &object, + ByteOrder byte_order) { + const uint8_t addr_size = 8; + DataEncoder encoder(byte_order, addr_size); + DataEncoder strtab_encoder(byte_order, addr_size); + object.Encode(encoder); + llvm::ArrayRef bytes = encoder.GetData(); + DataExtractor data(bytes.data(), bytes.size(), byte_order, addr_size); + ManualDWARFIndex::IndexSet decoded_object; + offset_t data_offset = 0; + decoded_object.Decode(data, &data_offset); + EXPECT_TRUE(object == decoded_object); +} + +static void EncodeDecode(const ManualDWARFIndex::IndexSet &object) { + EncodeDecode(object, eByteOrderLittle); + EncodeDecode(object, eByteOrderBig); +} + +TEST(DWARFIndexCachingTest, ManualDWARFIndexIndexSetEncodeDecode) { + ManualDWARFIndex::IndexSet set; + // Make sure empty IndexSet can be encoded and decoded correctly + EncodeDecode(set); + + dw_offset_t die_offset = 0; + // Make sure an IndexSet with only items in IndexSet::function_basenames can + // be encoded and decoded correctly. + set.function_basenames.Insert( + ConstString("a"), + DIERef(llvm::None, DIERef::Section::DebugInfo, ++die_offset)); + EncodeDecode(set); + set.function_basenames.Clear(); + // Make sure an IndexSet with only items in IndexSet::function_fullnames can + // be encoded and decoded correctly. + set.function_fullnames.Insert( + ConstString("a"), + DIERef(llvm::None, DIERef::Section::DebugInfo, ++die_offset)); + EncodeDecode(set); + set.function_fullnames.Clear(); + // Make sure an IndexSet with only items in IndexSet::function_methods can + // be encoded and decoded correctly. + set.function_methods.Insert( + ConstString("a"), + DIERef(llvm::None, DIERef::Section::DebugInfo, ++die_offset)); + EncodeDecode(set); + set.function_methods.Clear(); + // Make sure an IndexSet with only items in IndexSet::function_selectors can + // be encoded and decoded correctly. + set.function_selectors.Insert( + ConstString("a"), + DIERef(llvm::None, DIERef::Section::DebugInfo, ++die_offset)); + EncodeDecode(set); + set.function_selectors.Clear(); + // Make sure an IndexSet with only items in IndexSet::objc_class_selectors can + // be encoded and decoded correctly. + set.objc_class_selectors.Insert( + ConstString("a"), + DIERef(llvm::None, DIERef::Section::DebugInfo, ++die_offset)); + EncodeDecode(set); + set.objc_class_selectors.Clear(); + // Make sure an IndexSet with only items in IndexSet::globals can + // be encoded and decoded correctly. + set.globals.Insert( + ConstString("a"), + DIERef(llvm::None, DIERef::Section::DebugInfo, ++die_offset)); + EncodeDecode(set); + set.globals.Clear(); + // Make sure an IndexSet with only items in IndexSet::types can + // be encoded and decoded correctly. + set.types.Insert( + ConstString("a"), + DIERef(llvm::None, DIERef::Section::DebugInfo, ++die_offset)); + EncodeDecode(set); + set.types.Clear(); + // Make sure an IndexSet with only items in IndexSet::namespaces can + // be encoded and decoded correctly. + set.namespaces.Insert( + ConstString("a"), + DIERef(llvm::None, DIERef::Section::DebugInfo, ++die_offset)); + EncodeDecode(set); + set.namespaces.Clear(); + // Make sure that an IndexSet with item in all NameToDIE maps can be + // be encoded and decoded correctly. + set.function_basenames.Insert( + ConstString("a"), + DIERef(llvm::None, DIERef::Section::DebugInfo, ++die_offset)); + set.function_fullnames.Insert( + ConstString("b"), + DIERef(llvm::None, DIERef::Section::DebugInfo, ++die_offset)); + set.function_methods.Insert( + ConstString("c"), + DIERef(llvm::None, DIERef::Section::DebugInfo, ++die_offset)); + set.function_selectors.Insert( + ConstString("d"), + DIERef(llvm::None, DIERef::Section::DebugInfo, ++die_offset)); + set.objc_class_selectors.Insert( + ConstString("e"), + DIERef(llvm::None, DIERef::Section::DebugInfo, ++die_offset)); + set.globals.Insert( + ConstString("f"), + DIERef(llvm::None, DIERef::Section::DebugInfo, ++die_offset)); + set.types.Insert( + ConstString("g"), + DIERef(llvm::None, DIERef::Section::DebugInfo, ++die_offset)); + set.namespaces.Insert( + ConstString("h"), + DIERef(llvm::None, DIERef::Section::DebugInfo, ++die_offset)); + EncodeDecode(set); +} From 48207b2559c6b012ce167f2e76acea39e9d405cf Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Tue, 28 Dec 2021 11:02:50 -0800 Subject: [PATCH 134/992] Fix "settings set -g" so it works again. When we switched options over to use the Options.td file, a bug was introduced that caused the "-g" option for "settings set" to require a filename arguemnt. This patch fixes this issue and adds a test so this doesn't regress. Reviewed By: JDevlieghere Differential Revision: https://reviews.llvm.org/D116012 --- lldb/source/Commands/Options.td | 3 +-- lldb/test/Shell/Settings/TestSettingsSet.test | 5 +++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 3e89eb0f6bda..7fbf0ab03995 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -18,8 +18,7 @@ let Command = "help" in { } let Command = "settings set" in { - def setset_global : Option<"global", "g">, Arg<"Filename">, - Completion<"DiskFile">, + def setset_global : Option<"global", "g">, Desc<"Apply the new value to the global default value.">; def setset_force : Option<"force", "f">, Desc<"Force an empty value to be accepted as the default.">; diff --git a/lldb/test/Shell/Settings/TestSettingsSet.test b/lldb/test/Shell/Settings/TestSettingsSet.test index 3006a694a16b..8e90c00c77c2 100644 --- a/lldb/test/Shell/Settings/TestSettingsSet.test +++ b/lldb/test/Shell/Settings/TestSettingsSet.test @@ -3,6 +3,11 @@ # Check that setting an empty value with -f(orce) clears the value. # RUN: not %lldb -b -s %s 2>&1 | FileCheck %s +# Make sure that "settings set -g" no longer requires a bogus filename. +settings set -g target.skip-prologue false +settings show target.skip-prologue +# CHECK: target.skip-prologue (boolean) = false + settings set tab-size 16 settings show tab-size # CHECK: tab-size (unsigned) = 16 From d5a4d6a4974a34092f161b0d23c3376e0f9e33ae Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Tue, 28 Dec 2021 21:58:31 +0300 Subject: [PATCH 135/992] [BitcodeReader] propagateAttributeTypes(): fix opaque pointer handling Can't get the pointee type of an opaque pointer, but in that case said attributes must already be typed, so just don't try to rewrite them if they already are. --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 3 +- llvm/test/Bitcode/attributes.ll | 83 +++++++++++++++++------ 2 files changed, 63 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index f5a878f8788a..1684f04d5ea8 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -3857,7 +3857,8 @@ void BitcodeReader::propagateAttributeTypes(CallBase *CB, for (unsigned i = 0; i != CB->arg_size(); ++i) { for (Attribute::AttrKind Kind : {Attribute::ByVal, Attribute::StructRet, Attribute::InAlloca}) { - if (!CB->paramHasAttr(i, Kind)) + if (!CB->paramHasAttr(i, Kind) || + CB->getParamAttr(i, Kind).getValueAsType()) continue; CB->removeParamAttr(i, Kind); diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll index 5a24b097beb4..ec4f903e710e 100644 --- a/llvm/test/Bitcode/attributes.ll +++ b/llvm/test/Bitcode/attributes.ll @@ -1,4 +1,5 @@ -; RUN: llvm-as < %s | llvm-dis | FileCheck %s +; RUN: llvm-as < %s | llvm-dis | FileCheck %s --check-prefixes=CHECK,CHECK-TYPED +; RUN: llvm-as -opaque-pointers < %s | llvm-dis -opaque-pointers | FileCheck %s --check-prefixes=CHECK,CHECK-OPAQUE ; RUN: verify-uselistorder < %s ; PR12696 @@ -27,7 +28,8 @@ define void @f4(i8 inreg %0) } define void @f5(i8* sret(i8) %0) -; CHECK: define void @f5(i8* sret(i8) %0) +; CHECK-TYPED: define void @f5(i8* sret(i8) %0) +; CHECK-OPAQUE: define void @f5(ptr sret(i8) %0) { ret void; } @@ -39,19 +41,22 @@ define void @f6() nounwind } define void @f7(i8* noalias %0) -; CHECK: define void @f7(i8* noalias %0) +; CHECK-TYPED: define void @f7(i8* noalias %0) +; CHECK-OPAQUE: define void @f7(ptr noalias %0) { ret void; } define void @f8(i8* byval(i8) %0) -; CHECK: define void @f8(i8* byval(i8) %0) +; CHECK-TYPED: define void @f8(i8* byval(i8) %0) +; CHECK-OPAQUE: define void @f8(ptr byval(i8) %0) { ret void; } define void @f9(i8* nest %0) -; CHECK: define void @f9(i8* nest %0) +; CHECK-TYPED: define void @f9(i8* nest %0) +; CHECK-OPAQUE: define void @f9(ptr nest %0) { ret void; } @@ -99,13 +104,15 @@ define void @f16() sspreq } define void @f17(i8* align 4 %0) -; CHECK: define void @f17(i8* align 4 %0) +; CHECK-TYPED: define void @f17(i8* align 4 %0) +; CHECK-OPAQUE: define void @f17(ptr align 4 %0) { ret void; } define void @f18(i8* nocapture %0) -; CHECK: define void @f18(i8* nocapture %0) +; CHECK-TYPED: define void @f18(i8* nocapture %0) +; CHECK-OPAQUE: define void @f18(ptr nocapture %0) { ret void; } @@ -215,12 +222,14 @@ define void @f35() optnone noinline } define void @f36(i8* inalloca(i8) %0) { -; CHECK: define void @f36(i8* inalloca(i8) %0) { +; CHECK-TYPED: define void @f36(i8* inalloca(i8) %0) { +; CHECK-OPAQUE: define void @f36(ptr inalloca(i8) %0) { ret void } define nonnull i8* @f37(i8* nonnull %a) { -; CHECK: define nonnull i8* @f37(i8* nonnull %a) { +; CHECK-TYPED: define nonnull i8* @f37(i8* nonnull %a) { +; CHECK-OPAQUE: define nonnull ptr @f37(ptr nonnull %a) { ret i8* %a } @@ -231,21 +240,25 @@ define void @f38() unnamed_addr jumptable { } define dereferenceable(2) i8* @f39(i8* dereferenceable(1) %a) { -; CHECK: define dereferenceable(2) i8* @f39(i8* dereferenceable(1) %a) { +; CHECK-TYPED: define dereferenceable(2) i8* @f39(i8* dereferenceable(1) %a) { +; CHECK-OPAQUE: define dereferenceable(2) ptr @f39(ptr dereferenceable(1) %a) { ret i8* %a } define dereferenceable(18446744073709551606) i8* @f40(i8* dereferenceable(18446744073709551615) %a) { -; CHECK: define dereferenceable(18446744073709551606) i8* @f40(i8* dereferenceable(18446744073709551615) %a) { +; CHECK-TYPED: define dereferenceable(18446744073709551606) i8* @f40(i8* dereferenceable(18446744073709551615) %a) { +; CHECK-OPAQUE: define dereferenceable(18446744073709551606) ptr @f40(ptr dereferenceable(18446744073709551615) %a) { ret i8* %a } define void @f41(i8* align 32 %0, double* align 64 %1) { -; CHECK: define void @f41(i8* align 32 %0, double* align 64 %1) { +; CHECK-TYPED: define void @f41(i8* align 32 %0, double* align 64 %1) { +; CHECK-OPAQUE: define void @f41(ptr align 32 %0, ptr align 64 %1) { ret void } -; CHECK: define dereferenceable_or_null(8) i8* @f42(i8* dereferenceable_or_null(8) %foo) +; CHECK-TYPED: define dereferenceable_or_null(8) i8* @f42(i8* dereferenceable_or_null(8) %foo) +; CHECK-OPAQUE: define dereferenceable_or_null(8) ptr @f42(ptr dereferenceable_or_null(8) %foo) define dereferenceable_or_null(8) i8* @f42(i8* dereferenceable_or_null(8) %foo) { entry: ret i8* %foo @@ -287,19 +300,22 @@ define void @f49() inaccessiblemem_or_argmemonly { ret void } -; CHECK: define void @f50(i8* swiftself %0) +; CHECK-TYPED: define void @f50(i8* swiftself %0) +; CHECK-OPAQUE: define void @f50(ptr swiftself %0) define void @f50(i8* swiftself %0) { ret void; } -; CHECK: define i32 @f51(i8** swifterror %0) +; CHECK-TYPED: define i32 @f51(i8** swifterror %0) +; CHECK-OPAQUE: define i32 @f51(ptr swifterror %0) define i32 @f51(i8** swifterror %0) { ret i32 0 } -; CHECK: define i32 @f52(i32 %0, i8** swifterror %1) +; CHECK-TYPED: define i32 @f52(i32 %0, i8** swifterror %1) +; CHECK-OPAQUE: define i32 @f52(i32 %0, ptr swifterror %1) define i32 @f52(i32 %0, i8** swifterror %1) { ret i32 0 @@ -318,12 +334,14 @@ entry: ret float 1.0 } -; CHECK: define i8* @f54(i32 %0) #30 +; CHECK-TYPED: define i8* @f54(i32 %0) #30 +; CHECK-OPAQUE: define ptr @f54(i32 %0) #30 define i8* @f54(i32 %0) allocsize(0) { ret i8* null } -; CHECK: define i8* @f55(i32 %0, i32 %1) #31 +; CHECK-TYPED: define i8* @f55(i32 %0, i32 %1) #31 +; CHECK-OPAQUE: define ptr @f55(i32 %0, i32 %1) #31 define i8* @f55(i32 %0, i32 %1) allocsize(0, 1) { ret i8* null } @@ -374,7 +392,8 @@ define void @f63() sanitize_memtag ret void } -; CHECK: define void @f64(i32* preallocated(i32) %a) +; CHECK-TYPED: define void @f64(i32* preallocated(i32) %a) +; CHECK-OPAQUE: define void @f64(ptr preallocated(i32) %a) define void @f64(i32* preallocated(i32) %a) { ret void @@ -392,7 +411,8 @@ define noundef i32 @f66(i32 noundef %a) ret i32 %a } -; CHECK: define void @f67(i32* byref(i32) %a) +; CHECK-TYPED: define void @f67(i32* byref(i32) %a) +; CHECK-OPAQUE: define void @f67(ptr byref(i32) %a) define void @f67(i32* byref(i32) %a) { ret void @@ -440,7 +460,8 @@ define void @f74() vscale_range(1,0) ret void } -; CHECK: define void @f76(i8* swiftasync %0) +; CHECK-TYPED: define void @f76(i8* swiftasync %0) +; CHECK-OPAQUE: define void @f76(ptr swiftasync %0) define void @f76(i8* swiftasync %0) { ret void; @@ -460,7 +481,8 @@ define void @f78() noprofile declare void @llvm.some.intrinsic(i32*) define void @f79() { -; CHECK: call void @llvm.some.intrinsic(i32* elementtype(i32) null) +; CHECK-TYPED: call void @llvm.some.intrinsic(i32* elementtype(i32) null) +; CHECK-OPAQUE: call void @llvm.some.intrinsic(ptr elementtype(i32) null) call void @llvm.some.intrinsic(i32* elementtype(i32) null) ret void } @@ -471,6 +493,23 @@ define void @f80() disable_sanitizer_instrumentation ret void; } +define void @f81(i8** sret(i8*) %0) +; CHECK-TYPED: define void @f81(i8** sret(i8*) %0) +; CHECK-OPAQUE: define void @f81(ptr sret(ptr) %0) +{ + ret void; +} + +define void @f82(i32* %0) +; CHECK-TYPED: define void @f82(i32* %0) +; CHECK-OPAQUE: define void @f82(ptr %0) +{ +; CHECK-TYPED: call void @llvm.some.intrinsic(i32* sret(i32) %0) +; CHECK-OPAQUE: call void @llvm.some.intrinsic(ptr sret(i32) %0) + call void @llvm.some.intrinsic(i32* sret(i32) %0) + ret void; +} + ; CHECK: attributes #0 = { noreturn } ; CHECK: attributes #1 = { nounwind } ; CHECK: attributes #2 = { readnone } From 43e500d79198d761ca76d8587cca76da8de967f8 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 28 Dec 2021 19:10:01 +0000 Subject: [PATCH 136/992] [AArch64] Minor AArch64MIPeepholeOpt cleanup. NFC We should always be in SSA form when running the pass, so turn a check into an assert. --- llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp index 42db18332f1c..3e1306eb3297 100644 --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -246,7 +246,7 @@ bool AArch64MIPeepholeOpt::visitORR( MI.getOperand(0).setReg(DefReg); ToBeRemoved.insert(&MI); - LLVM_DEBUG({ dbgs() << "Removed: " << MI << "\n"; }); + LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n"); return true; } @@ -259,8 +259,7 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { MLI = &getAnalysis(); MRI = &MF.getRegInfo(); - if (!MRI->isSSA()) - return false; + assert (MRI->isSSA() && "Expected to be run on SSA form!"); bool Changed = false; SmallSetVector ToBeRemoved; @@ -278,6 +277,7 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { break; case AArch64::ORRWrs: Changed = visitORR(MI, ToBeRemoved); + break; } } } From 89aa87c4e601985dae4b41206f0c5594e8742c78 Mon Sep 17 00:00:00 2001 From: Michael Benfield Date: Tue, 28 Dec 2021 19:10:27 +0000 Subject: [PATCH 137/992] [clang] Fix AttrDocs.td formatting. This should fix the builder clang-sphinx-docs. --- clang/include/clang/Basic/AttrDocs.td | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 8a7424a88c9f..a24218a9c82b 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -5987,7 +5987,7 @@ attribute requires a string literal argument to identify the handle being releas def DiagnoseAsBuiltinDocs : Documentation { let Category = DocCatFunction; let Content = [{ -The ``diagnose_as_builtin` attribute indicates that Fortify diagnostics are to +The ``diagnose_as_builtin`` attribute indicates that Fortify diagnostics are to be applied to the declared function as if it were the function specified by the attribute. The builtin function whose diagnostics are to be mimicked should be given. In addition, the order in which arguments should be applied must also @@ -5995,12 +5995,12 @@ be given. For example, the attribute can be used as follows. - .. code-block:: c +.. code-block:: c - __attribute__((diagnose_as_builtin(__builtin_memset, 3, 2, 1))) - void *mymemset(int n, int c, void *s) { - // ... - } + __attribute__((diagnose_as_builtin(__builtin_memset, 3, 2, 1))) + void *mymemset(int n, int c, void *s) { + // ... + } This indicates that calls to ``mymemset`` should be diagnosed as if they were calls to ``__builtin_memset``. The arguments ``3, 2, 1`` indicate by index the @@ -6015,7 +6015,8 @@ they would to the builtin function, after all normal arguments. For instance, to diagnose a new function as if it were `sscanf`, we can use the attribute as follows. - .. code-block:: c +.. code-block:: c + __attribute__((diagnose_as_builtin(sscanf, 1, 2))) int mysscanf(const char *str, const char *format, ...) { // ... From 20135c8a5347e6741d00fee0541b3aee2c9ae1e8 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 28 Dec 2021 11:20:03 -0800 Subject: [PATCH 138/992] [Hexagon] Move testcase accidentally committed to wrong directory llvm/test/DebugInfo/Hexagon/zreg-post-inc.s -> llvm/test/MC/Hexagon/zreg-post-inc.s --- llvm/test/{DebugInfo => MC}/Hexagon/zreg-post-inc.s | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename llvm/test/{DebugInfo => MC}/Hexagon/zreg-post-inc.s (100%) diff --git a/llvm/test/DebugInfo/Hexagon/zreg-post-inc.s b/llvm/test/MC/Hexagon/zreg-post-inc.s similarity index 100% rename from llvm/test/DebugInfo/Hexagon/zreg-post-inc.s rename to llvm/test/MC/Hexagon/zreg-post-inc.s From f0cb77d7d5cac88097dbea9a2c9f00c0eee23f28 Mon Sep 17 00:00:00 2001 From: Rob Suderman Date: Thu, 23 Dec 2021 16:25:53 -0800 Subject: [PATCH 139/992] [mlir][tosa] Resubmit split tosa-to-linalg named ops out of pass Includes dependency fix that resulted in canonicalizer pass not linking in. Linalg named ops lowering are moved to a separate pass. This allows TOSA canonicalizers to run between named-ops lowerings and the general TOSA lowerings. This allows the TOSA canonicalizers to run between lowerings. Differential Revision: https://reviews.llvm.org/D116057 --- mlir/include/mlir/Conversion/Passes.td | 14 + .../Conversion/TosaToLinalg/TosaToLinalg.h | 4 + .../Conversion/TosaToLinalg/CMakeLists.txt | 3 + .../Conversion/TosaToLinalg/TosaToLinalg.cpp | 814 +--------------- .../TosaToLinalg/TosaToLinalgNamed.cpp | 885 ++++++++++++++++++ .../TosaToLinalg/TosaToLinalgNamedPass.cpp | 68 ++ .../TosaToLinalg/TosaToLinalgPass.cpp | 4 + .../TosaToLinalg/tosa-to-linalg-named.mlir | 448 +++++++++ .../TosaToLinalg/tosa-to-linalg.mlir | 460 --------- 9 files changed, 1427 insertions(+), 1273 deletions(-) create mode 100644 mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp create mode 100644 mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamedPass.cpp create mode 100644 mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index f6e49cc889d1..4d1f383c0229 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -645,6 +645,20 @@ def TosaToLinalg : FunctionPass<"tosa-to-linalg"> { let constructor = "tosa::createTosaToLinalg()"; } +//===----------------------------------------------------------------------===// +// TosaToLinalgNamed +//===----------------------------------------------------------------------===// + +def TosaToLinalgNamed : FunctionPass<"tosa-to-linalg-named"> { + let summary = "Lower TOSA to LinAlg named operations"; + let description = [{ + Pass that converts TOSA operations to the equivalent operations using the + Linalg named operations. + }]; + + let constructor = "tosa::createTosaToLinalgNamed()"; +} + //===----------------------------------------------------------------------===// // TosaToSCF //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h b/mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h index b542833fa0e9..ec44d01065a7 100644 --- a/mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h +++ b/mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h @@ -20,6 +20,7 @@ namespace mlir { namespace tosa { std::unique_ptr createTosaToLinalg(); +std::unique_ptr createTosaToLinalgNamed(); /// Populates passes to convert from TOSA to Linalg on buffers. At the end of /// the pass, the function will only contain linalg ops or standard ops if the @@ -29,6 +30,9 @@ void addTosaToLinalgPasses(OpPassManager &pm); /// Populates conversion passes from TOSA dialect to Linalg dialect. void populateTosaToLinalgConversionPatterns(RewritePatternSet *patterns); +/// Populates conversion passes from TOSA dialect to Linalg named operations. +void populateTosaToLinalgNamedConversionPatterns(RewritePatternSet *patterns); + } // namespace tosa } // namespace mlir diff --git a/mlir/lib/Conversion/TosaToLinalg/CMakeLists.txt b/mlir/lib/Conversion/TosaToLinalg/CMakeLists.txt index 5617dd3e0ce0..50a1c20c7249 100644 --- a/mlir/lib/Conversion/TosaToLinalg/CMakeLists.txt +++ b/mlir/lib/Conversion/TosaToLinalg/CMakeLists.txt @@ -1,5 +1,7 @@ add_mlir_conversion_library(MLIRTosaToLinalg TosaToLinalg.cpp + TosaToLinalgNamed.cpp + TosaToLinalgNamedPass.cpp TosaToLinalgPass.cpp ADDITIONAL_HEADER_DIRS @@ -18,6 +20,7 @@ add_mlir_conversion_library(MLIRTosaToLinalg MLIRMath MLIRPass MLIRTensor + MLIRTransforms MLIRTosa MLIRTosaTransforms MLIRSupport diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index abff8b57ccdc..04262234ceaa 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -61,37 +61,6 @@ static mlir::SelectOp clampHelper(Location loc, Value arg, return rewriter.create(loc, largerThanMax, max, minOrArg); } -static mlir::Value applyPad(Location loc, Value input, ArrayRef pad, - Attribute padAttr, OpBuilder &rewriter) { - // Input should be padded if necessary. - if (llvm::all_of(pad, [](int64_t p) { return p == 0; })) - return input; - - ShapedType inputTy = input.getType().cast(); - Type inputETy = inputTy.getElementType(); - auto inputShape = inputTy.getShape(); - - assert((inputShape.size() * 2) == pad.size()); - - SmallVector paddedShape; - SmallVector lowIndices; - SmallVector highIndices; - for (int i = 0, s = inputShape.size(); i < s; i++) { - auto lowPad = pad[i * 2]; - auto highPad = pad[i * 2 + 1]; - paddedShape.push_back(inputShape[i] + highPad + lowPad); - lowIndices.push_back(rewriter.getIndexAttr(lowPad)); - highIndices.push_back(rewriter.getIndexAttr(highPad)); - } - - Value padValue = rewriter.create(loc, padAttr); - - return linalg::PadTensorOp::createPadScalarOp( - RankedTensorType::get(paddedShape, inputETy), input, padValue, - lowIndices, highIndices, /*nofold=*/false, loc, rewriter) - .result(); -} - static SmallVector filterDynamicDims(SmallVector dynDims) { SmallVector filteredDims; for (auto dim : dynDims) @@ -1065,510 +1034,6 @@ class PointwiseConverter : public OpRewritePattern { } }; -class ConvConverter : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - LogicalResult - matchAndRewrite(tosa::Conv2DOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const final { - Location loc = op->getLoc(); - Value input = op->getOperand(0); - Value weight = op->getOperand(1); - Value bias = op->getOperand(2); - - ShapedType inputTy = input.getType().cast(); - ShapedType weightTy = weight.getType().cast(); - ShapedType biasTy = bias.getType().cast(); - ShapedType resultTy = op->getResult(0).getType().cast(); - - Type inputETy = inputTy.getElementType(); - Type resultETy = resultTy.getElementType(); - - auto padAttr = op->getAttr("pad").cast(); - auto strideTosaAttr = op->getAttr("stride").cast(); - auto dilationTosaAttr = op->getAttr("dilation").cast(); - bool isQuantized = op->hasAttr("quantization_info"); - - if (!inputTy.hasStaticShape() || !weightTy.hasStaticShape() || - !biasTy.hasStaticShape() || !resultTy.hasStaticShape()) - return rewriter.notifyMatchFailure(op, - "tosa.conv ops require static shapes"); - - if (inputETy.isUnsignedInteger()) - return rewriter.notifyMatchFailure( - op, "tosa.conv ops does not support unsigned integer input"); - - auto weightShape = weightTy.getShape(); - - // Apply padding as necessary. - Attribute zeroAttr = rewriter.getZeroAttr(inputETy); - if (isQuantized) { - auto quantizationInfo = - op->getAttr("quantization_info").cast(); - auto iZp = quantizationInfo.input_zp().getValue().getSExtValue(); - - int64_t intMin = - APInt::getSignedMinValue(inputETy.getIntOrFloatBitWidth()) - .getSExtValue(); - int64_t intMax = - APInt::getSignedMaxValue(inputETy.getIntOrFloatBitWidth()) - .getSExtValue(); - - if (iZp < intMin || iZp > intMax) - return rewriter.notifyMatchFailure( - op, "tosa.conv op quantization has zp outside of input range"); - - zeroAttr = rewriter.getIntegerAttr(inputETy, iZp); - } - - llvm::SmallVector pad; - pad.resize(2, 0); - getValuesFromIntArrayAttribute(padAttr, pad); - pad.resize(pad.size() + 2, 0); - input = applyPad(loc, input, pad, zeroAttr, rewriter); - - // Transpose the kernel to match dimension ordering of the linalg - // convolution operation. - // TODO(suderman): See if this can be efficiently folded - check whether - // the input is used anywhere else, if not fold the constant. - SmallVector weightPerm{1, 2, 3, 0}; - SmallVector newWeightShape{weightShape[1], weightShape[2], - weightShape[3], weightShape[0]}; - auto weightPermAttr = DenseIntElementsAttr::get( - RankedTensorType::get({4}, rewriter.getI64Type()), weightPerm); - Value weightPermValue = - rewriter.create(loc, weightPermAttr); - Type newWeightTy = - RankedTensorType::get(newWeightShape, weightTy.getElementType()); - weight = rewriter.create(loc, newWeightTy, weight, - weightPermValue); - - Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy); - Value initTensor = rewriter.create( - loc, resultTy.getShape(), resultETy); - Value zero = rewriter.create(loc, resultZeroAttr); - Value zeroTensor = - rewriter.create(loc, zero, initTensor).getResult(0); - - // Extract the attributes for convolution. - llvm::SmallVector stride, dilation; - getValuesFromIntArrayAttribute(strideTosaAttr, stride); - getValuesFromIntArrayAttribute(dilationTosaAttr, dilation); - - // Create the convolution op. - auto strideAttr = DenseIntElementsAttr::get( - RankedTensorType::get({2}, rewriter.getI64Type()), stride); - auto dilationAttr = DenseIntElementsAttr::get( - RankedTensorType::get({2}, rewriter.getI64Type()), dilation); - - // Create maps for the bias broadcasting - SmallVector indexingMaps; - indexingMaps.push_back(AffineMap::get( - /*dimCount=*/resultTy.getRank(), /*symbolCount=*/0, - {rewriter.getAffineDimExpr(3)}, rewriter.getContext())); - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); - - Value biasInitTensor = rewriter.create( - loc, resultTy.getShape(), resultETy); - - if (isQuantized) { - auto quantizationInfo = - op->getAttr("quantization_info").cast(); - auto iZp = rewriter.getI32IntegerAttr( - quantizationInfo.input_zp().getValue().getSExtValue()); - auto kZp = rewriter.getI32IntegerAttr( - quantizationInfo.weight_zp().getValue().getSExtValue()); - - auto iZpVal = rewriter.create(loc, iZp); - auto kZpVal = rewriter.create(loc, kZp); - Value conv = - rewriter - .create( - loc, resultTy, ValueRange{input, weight, iZpVal, kZpVal}, - ValueRange{zeroTensor}, strideAttr, dilationAttr) - ->getResult(0); - - Value result = - rewriter - .create( - loc, resultTy, ValueRange({bias, conv}), biasInitTensor, - indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()), - [&](OpBuilder &nestedBuilder, Location nestedLoc, - ValueRange args) { - Value added = nestedBuilder.create( - loc, args[0], args[1]); - nestedBuilder.create(nestedLoc, added); - }) - .getResult(0); - rewriter.replaceOp(op, result); - return success(); - } - - Value conv = rewriter - .create( - loc, resultTy, ValueRange{input, weight}, - ValueRange{zeroTensor}, strideAttr, dilationAttr) - ->getResult(0); - - Value result = - rewriter - .create( - loc, resultTy, ValueRange({bias, conv}), biasInitTensor, - indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()), - [&](OpBuilder &nestedBuilder, Location nestedLoc, - ValueRange args) { - Value added = nestedBuilder.create( - loc, args[0], args[1]); - nestedBuilder.create(nestedLoc, added); - }) - .getResult(0); - - rewriter.replaceOp(op, result); - return success(); - } -}; - -class DepthwiseConvConverter - : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - LogicalResult - matchAndRewrite(tosa::DepthwiseConv2DOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const final { - Location loc = op->getLoc(); - Value input = op->getOperand(0); - Value weight = op->getOperand(1); - Value bias = op->getOperand(2); - - ShapedType inputTy = input.getType().cast(); - ShapedType weightTy = weight.getType().cast(); - ShapedType biasTy = bias.getType().cast(); - ShapedType resultTy = op->getResult(0).getType().cast(); - - Type inputETy = inputTy.getElementType(); - Type resultETy = resultTy.getElementType(); - - auto padAttr = op->getAttr("pad").cast(); - auto strideTosaAttr = op->getAttr("stride").cast(); - auto dilationTosaAttr = op->getAttr("dilation").cast(); - - bool isQuantized = op->hasAttr("quantization_info"); - IntegerAttr iZp; - IntegerAttr kZp; - if (isQuantized) { - auto quantizationInfo = - op->getAttr("quantization_info").cast(); - iZp = rewriter.getI32IntegerAttr( - quantizationInfo.input_zp().getValue().getSExtValue()); - kZp = rewriter.getI32IntegerAttr( - quantizationInfo.weight_zp().getValue().getSExtValue()); - } - - if (!inputTy.hasStaticShape() || !weightTy.hasStaticShape() || - !biasTy.hasStaticShape() || !resultTy.hasStaticShape()) - return rewriter.notifyMatchFailure(op, - "tosa.conv ops require static shapes"); - - auto weightShape = weightTy.getShape(); - auto resultShape = resultTy.getShape(); - - // Apply padding as necessary. - Attribute zeroAttr = rewriter.getZeroAttr(inputETy); - if (isQuantized) { - auto quantizationInfo = - op->getAttr("quantization_info").cast(); - auto iZp = quantizationInfo.input_zp().getValue().getSExtValue(); - - int64_t intMin = - APInt::getSignedMinValue(inputETy.getIntOrFloatBitWidth()) - .getSExtValue(); - int64_t intMax = - APInt::getSignedMaxValue(inputETy.getIntOrFloatBitWidth()) - .getSExtValue(); - - if (iZp < intMin || iZp > intMax) - return rewriter.notifyMatchFailure( - op, "tosa.depthwise_conv op quantization has zp outside of input " - "range"); - - zeroAttr = rewriter.getIntegerAttr(inputETy, iZp); - } - - llvm::SmallVector pad; - pad.resize(2, 0); - getValuesFromIntArrayAttribute(padAttr, pad); - pad.resize(pad.size() + 2, 0); - - input = applyPad(loc, input, pad, zeroAttr, rewriter); - - // Extract the attributes for convolution. - llvm::SmallVector stride, dilation; - getValuesFromIntArrayAttribute(strideTosaAttr, stride); - getValuesFromIntArrayAttribute(dilationTosaAttr, dilation); - - // Create the convolution op. - auto strideAttr = DenseIntElementsAttr::get( - RankedTensorType::get({2}, rewriter.getI64Type()), stride); - auto dilationAttr = DenseIntElementsAttr::get( - RankedTensorType::get({2}, rewriter.getI64Type()), dilation); - ShapedType linalgConvTy = - RankedTensorType::get({resultShape[0], resultShape[1], resultShape[2], - weightShape[2], weightShape[3]}, - resultETy); - - // Broadcast the initial value to the output tensor before convolving. - SmallVector indexingMaps; - indexingMaps.push_back(AffineMap::get( - /*dimCount=*/resultTy.getRank(), /*symbolCount=*/0, - {rewriter.getAffineDimExpr(3)}, rewriter.getContext())); - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); - - Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy); - Value initTensor = rewriter.create( - loc, linalgConvTy.getShape(), resultETy); - Value zero = rewriter.create(loc, resultZeroAttr); - Value zeroTensor = - rewriter.create(loc, zero, initTensor).getResult(0); - - Value biasInitTensor = rewriter.create( - loc, resultTy.getShape(), resultETy); - if (!isQuantized) { - Value conv = rewriter - .create( - loc, linalgConvTy, ValueRange{input, weight}, - ValueRange{zeroTensor}, strideAttr, dilationAttr) - .getResult(0); - Value convReshape = rewriter.create(loc, resultTy, conv); - Value result = - rewriter - .create( - loc, resultTy, ValueRange({bias, convReshape}), - biasInitTensor, indexingMaps, - getNParallelLoopsAttrs(resultTy.getRank()), - [&](OpBuilder &nestedBuilder, Location nestedLoc, - ValueRange args) { - Value added = nestedBuilder.create( - loc, args[0], args[1]); - nestedBuilder.create(nestedLoc, added); - }) - .getResult(0); - rewriter.replaceOp(op, result); - } else { - auto iZpVal = rewriter.create(loc, iZp); - auto kZpVal = rewriter.create(loc, kZp); - Value conv = - rewriter - .create( - loc, linalgConvTy, ValueRange{input, weight, iZpVal, kZpVal}, - ValueRange{zeroTensor}, strideAttr, dilationAttr) - .getResult(0); - Value convReshape = rewriter.create(loc, resultTy, conv); - Value result = - rewriter - .create( - loc, resultTy, ValueRange({bias, convReshape}), - biasInitTensor, indexingMaps, - getNParallelLoopsAttrs(resultTy.getRank()), - [&](OpBuilder &nestedBuilder, Location nestedLoc, - ValueRange args) { - Value added = nestedBuilder.create( - loc, args[0], args[1]); - nestedBuilder.create(nestedLoc, added); - }) - .getResult(0); - rewriter.replaceOp(op, result); - } - return success(); - } -}; - -class MatMulConverter : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - LogicalResult - matchAndRewrite(tosa::MatMulOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const final { - Location loc = op.getLoc(); - - auto outputTy = op.getType().cast(); - auto outputElementTy = outputTy.getElementType(); - - auto firstOperandTy = op->getOperand(0).getType().cast(); - auto secondOperandTy = op->getOperand(1).getType().cast(); - - SmallVector dynDims; - dynDims.resize(op->getResult(0).getType().cast().getRank()); - - if (!firstOperandTy.hasRank() || firstOperandTy.isDynamicDim(0)) { - dynDims[0] = rewriter.create(loc, op->getOperand(0), 0); - } - - if (!firstOperandTy.hasRank() || firstOperandTy.isDynamicDim(1)) { - dynDims[1] = rewriter.create(loc, op->getOperand(0), 1); - } - - if (!secondOperandTy.hasRank() || secondOperandTy.isDynamicDim(2)) { - dynDims[2] = rewriter.create(loc, op->getOperand(1), 2); - } - - SmallVector filteredDims = filterDynamicDims(dynDims); - - auto zeroAttr = rewriter.getZeroAttr(outputElementTy); - Value zero = rewriter.create(loc, zeroAttr); - auto initTensor = rewriter.create( - loc, filteredDims, outputTy.getShape(), outputTy.getElementType()); - Value zeroTensor = - rewriter.create(loc, zero, initTensor).getResult(0); - if (!op.quantization_info()) { - rewriter.replaceOpWithNewOp( - op, TypeRange{op.getType()}, ValueRange{adaptor.a(), adaptor.b()}, - ValueRange{zeroTensor}); - return success(); - } - - auto quantizationInfo = op.quantization_info().getValue(); - auto aZp = rewriter.create( - loc, rewriter.getI32IntegerAttr( - quantizationInfo.a_zp().getValue().getSExtValue())); - auto bZp = rewriter.create( - loc, rewriter.getI32IntegerAttr( - quantizationInfo.b_zp().getValue().getSExtValue())); - rewriter.replaceOpWithNewOp( - op, TypeRange{op.getType()}, - ValueRange{adaptor.a(), adaptor.b(), aZp, bZp}, zeroTensor); - - return success(); - } -}; - -class FullyConnectedConverter - : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - LogicalResult - matchAndRewrite(tosa::FullyConnectedOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const final { - Location loc = op.getLoc(); - auto outputTy = op.getType().cast(); - auto input = op.input(); - auto inputTy = input.getType().cast(); - - auto bias = op.bias(); - - auto weight = op.weight(); - auto weightTy = weight.getType().cast(); - auto weightShape = weightTy.getShape(); - - auto outputETy = outputTy.getElementType(); - - SmallVector dynDims; - dynDims.resize(op->getResult(0).getType().cast().getRank()); - - if (!inputTy.hasRank() || inputTy.isDynamicDim(0)) { - dynDims[0] = rewriter.create(loc, input, 0); - } - - if (!weightTy.hasRank() || weightTy.isDynamicDim(0)) { - dynDims[1] = rewriter.create(loc, weight, 0); - } - - SmallVector filteredDims = filterDynamicDims(dynDims); - - // Creating maps for the output of MatMul and the bias - SmallVector indexingMaps; - - // Broadcast the bias. - indexingMaps.push_back(AffineMap::get(/*dimCount=*/2, /*symbolCount=*/0, - {rewriter.getAffineDimExpr(1)}, - rewriter.getContext())); - - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank())); - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank())); - - auto initTensor = rewriter.create( - loc, filteredDims, outputTy.getShape(), outputTy.getElementType()); - - // When quantized, the input elemeny type is not the same as the output - Attribute resultZeroAttr = rewriter.getZeroAttr(outputETy); - Value zero = rewriter.create(loc, resultZeroAttr); - Value zeroTensor = - rewriter.create(loc, zero, initTensor).getResult(0); - - SmallVector permutation{1, 0}; - auto permutationAttr = DenseIntElementsAttr::get( - RankedTensorType::get({2}, rewriter.getI64Type()), permutation); - Value permutationValue = - rewriter.create(loc, permutationAttr); - - SmallVector newWeightShape{weightShape[1], weightShape[0]}; - Type newWeightTy = - RankedTensorType::get(newWeightShape, weightTy.getElementType()); - - Value transposedWeight = rewriter.create( - loc, newWeightTy, weight, permutationValue); - - auto biasInitTensor = - rewriter - .create(loc, filteredDims, - outputTy.getShape(), outputETy) - ->getResults(); - - if (!op.quantization_info()) { - Value matmul = rewriter - .create( - loc, TypeRange{op.getType()}, - ValueRange{input, transposedWeight}, zeroTensor) - ->getResult(0); - - Value result = - rewriter - .create( - loc, outputTy, ValueRange({bias, matmul}), biasInitTensor, - indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()), - [&](OpBuilder &nestedBuilder, Location nestedLoc, - ValueRange args) { - Value added = nestedBuilder.create( - loc, args[0], args[1]); - nestedBuilder.create(nestedLoc, added); - }) - .getResult(0); - rewriter.replaceOp(op, result); - return success(); - } - - auto quantizationInfo = op.quantization_info().getValue(); - auto inputZp = rewriter.create( - loc, rewriter.getI32IntegerAttr( - quantizationInfo.input_zp().getValue().getSExtValue())); - auto outputZp = rewriter.create( - loc, rewriter.getI32IntegerAttr( - quantizationInfo.weight_zp().getValue().getSExtValue())); - Value matmul = - rewriter - .create( - loc, TypeRange{op.getType()}, - ValueRange{input, transposedWeight, inputZp, outputZp}, - zeroTensor) - ->getResult(0); - Value result = - rewriter - .create( - loc, outputTy, ValueRange({bias, matmul}), biasInitTensor, - indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()), - [&](OpBuilder &nestedBuilder, Location nestedLoc, - ValueRange args) { - Value added = nestedBuilder.create( - loc, args[0], args[1]); - nestedBuilder.create(nestedLoc, added); - }) - .getResult(0); - rewriter.replaceOp(op, result); - return success(); - } -}; - class ReshapeConverterCollapse : public OpConversionPattern { public: using OpConversionPattern::OpConversionPattern; @@ -2810,277 +2275,6 @@ class TableConverter : public OpRewritePattern { } }; -class MaxPool2dConverter : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(tosa::MaxPool2dOp op, - PatternRewriter &rewriter) const final { - Location loc = op.getLoc(); - Value input = op.input(); - ShapedType inputTy = input.getType().cast(); - - ShapedType resultTy = op.getType().template cast(); - Type resultETy = inputTy.getElementType(); - - if (!inputTy.hasStaticShape()) - return failure(); - - // Determine what the initial value needs to be for the max pool op. - Attribute initialAttr; - if (resultETy.isF32()) - initialAttr = rewriter.getFloatAttr( - resultETy, - APFloat::getLargest(resultETy.cast().getFloatSemantics(), - true)); - - if (resultETy.isa()) - initialAttr = rewriter.getIntegerAttr( - resultETy, - APInt::getSignedMinValue(resultETy.getIntOrFloatBitWidth())); - - if (!initialAttr) - return rewriter.notifyMatchFailure( - op, "Unsupported initial value for tosa.maxpool_2d op"); - - // Apply padding as necessary. - llvm::SmallVector pad; - pad.resize(2, 0); - getValuesFromIntArrayAttribute(op.pad(), pad); - pad.resize(pad.size() + 2, 0); - Value paddedInput = applyPad(loc, input, pad, initialAttr, rewriter); - - Value initialValue = rewriter.create(loc, initialAttr); - - SmallVector kernel, stride; - getValuesFromIntArrayAttribute(op.kernel(), kernel); - getValuesFromIntArrayAttribute(op.stride(), stride); - - Attribute strideAttr = rewriter.getI64VectorAttr(stride); - Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1}); - - // Create the linalg op that performs pooling. - Value initTensor = rewriter.create( - loc, resultTy.getShape(), resultTy.getElementType()); - - Value filledInitTensor = - rewriter.create(loc, initialValue, initTensor).result(); - - Value fakeWindowDims = - rewriter.create(loc, kernel, resultETy); - - rewriter.replaceOpWithNewOp( - op, ArrayRef{resultTy}, ValueRange{paddedInput, fakeWindowDims}, - filledInitTensor, strideAttr, dilationAttr); - return success(); - } -}; - -class AvgPool2dConverter : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(tosa::AvgPool2dOp op, - PatternRewriter &rewriter) const final { - Location loc = op.getLoc(); - Value input = op.input(); - ShapedType inputTy = input.getType().cast(); - Type inElementTy = inputTy.getElementType(); - - ShapedType resultTy = op.getType().template cast(); - Type resultETy = op.getType().cast().getElementType(); - - Type accETy = - inElementTy.isa() ? rewriter.getI32Type() : inElementTy; - ShapedType accTy = resultTy.clone(accETy); - - if (!inputTy.hasStaticShape()) - return failure(); - - // Apply padding as necessary. - llvm::SmallVector pad; - pad.resize(2, 0); - getValuesFromIntArrayAttribute(op.pad(), pad); - pad.resize(pad.size() + 2, 0); - Attribute padAttr = rewriter.getZeroAttr(inElementTy); - Value paddedInput = applyPad(loc, input, pad, padAttr, rewriter); - - Attribute initialAttr = rewriter.getZeroAttr(accETy); - Value initialValue = rewriter.create(loc, initialAttr); - - SmallVector kernel, stride; - getValuesFromIntArrayAttribute(op.kernel(), kernel); - getValuesFromIntArrayAttribute(op.stride(), stride); - - Attribute strideAttr = rewriter.getI64VectorAttr(stride); - Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1}); - - // Create the linalg op that performs pooling. - Value poolInitTensor = - rewriter.create(loc, accTy.getShape(), accETy); - - Value filledInitTensor = - rewriter.create(loc, initialValue, poolInitTensor) - .result(); - - Value fakeWindowDims = - rewriter.create(loc, kernel, accETy); - - // Sum across the pooled region. - Value poolingOp = rewriter - .create( - loc, ArrayRef{accTy}, - ValueRange{paddedInput, fakeWindowDims}, - filledInitTensor, strideAttr, dilationAttr) - .getResult(0); - - // Normalize the summed value by the number of elements grouped in each - // pool. - auto poolingOpTy = poolingOp.getType().cast(); - auto affineMap = rewriter.getMultiDimIdentityMap(resultTy.getRank()); - - Value genericInitTensor = rewriter.create( - loc, resultTy.getShape(), resultETy); - - auto genericOp = rewriter.create( - loc, ArrayRef({resultTy}), ValueRange{poolingOp}, - ValueRange{genericInitTensor}, - ArrayRef({affineMap, affineMap}), - getNParallelLoopsAttrs(resultTy.getRank()), - [&](OpBuilder &b, Location loc, ValueRange args) { - auto zero = rewriter.create(loc, 0); - auto one = rewriter.create(loc, 1); - auto iH = rewriter.create( - loc, poolingOpTy.getDimSize(1) - 1); - auto iW = rewriter.create( - loc, poolingOpTy.getDimSize(2) - 1); - - // Compute the indices from either end. - auto y0 = rewriter.create(loc, 1); - auto x0 = rewriter.create(loc, 2); - auto y1 = rewriter.create(loc, iH, y0); - auto x1 = rewriter.create(loc, iW, x0); - - // Determines what the portion of valid input is covered by the - // kernel. - auto padFn = [&](Value v, Value x, int64_t pad) -> Value { - if (pad == 0) - return v; - - auto padVal = rewriter.create(loc, pad); - Value dx = rewriter.create(loc, x, padVal); - - Value cmp = rewriter.create( - loc, arith::CmpIPredicate::slt, dx, zero); - Value offset = rewriter.create(loc, cmp, dx, zero); - return rewriter.create(loc, v, offset)->getResult(0); - }; - - // Compute the vertical component of coverage. - auto kH0 = rewriter.create(loc, kernel[0]); - auto kH1 = padFn(kH0, y0, pad[2]); - auto kH2 = padFn(kH1, y1, pad[3]); - auto kHCmp = rewriter.create( - loc, arith::CmpIPredicate::slt, kH2, one); - auto kH3 = rewriter.create(loc, kHCmp, one, kH2); - - // compute the horizontal component of coverage. - auto kW0 = rewriter.create(loc, kernel[1]); - auto kW1 = padFn(kW0, x0, pad[4]); - auto kW2 = padFn(kW1, x1, pad[5]); - auto kWCmp = rewriter.create( - loc, arith::CmpIPredicate::slt, kW2, one); - auto kW3 = rewriter.create(loc, kWCmp, one, kW2); - - // Compute the total number of elements and normalize. - Value count = rewriter.create(loc, kH3, kW3); - auto countI = rewriter.create( - loc, rewriter.getI32Type(), count); - - // Divide by the number of summed values. For floats this is just - // a div however for quantized values input normalization had - // to be applied. - Value poolVal = args[0]; - if (accETy.isa()) { - auto countF = rewriter.create(loc, accETy, countI); - poolVal = rewriter.create(loc, poolVal, countF) - ->getResult(0); - } else { - - // If we have quantization information we need to apply an offset - // for the input zp value. - if (op.quantization_info()) { - auto quantizationInfo = op.quantization_info().getValue(); - auto inputZp = rewriter.create( - loc, quantizationInfo.input_zp()); - Value offset = - rewriter.create(loc, accETy, countI, inputZp); - poolVal = - rewriter.create(loc, accETy, poolVal, offset); - } - - // Compute the multiplier and shift values for the quantization - // normalization. Preferably we would want to compute more bits - // however 32-bits should be enough for compute. Honestly we - // should probably straight divide. - int64_t numerator = ((1 << 30) + 1); - int64_t shift = 30; - - Value numeratorVal = rewriter.create( - loc, rewriter.getI32IntegerAttr(numerator)); - Value multiplierVal = - rewriter - .create(loc, rewriter.getI32Type(), - numeratorVal, countI) - .getResult(); - Value shiftVal = rewriter.create( - loc, rewriter.getI8IntegerAttr(shift)); - - auto scaled = - rewriter - .create( - loc, rewriter.getI32Type(), poolVal, multiplierVal, - shiftVal, rewriter.getBoolAttr(false)) - .getResult(); - - // If we have quantization information we need to apply output - // zeropoint. - if (op.quantization_info()) { - auto quantizationInfo = op.quantization_info().getValue(); - auto outputZp = rewriter.create( - loc, quantizationInfo.output_zp()); - scaled = rewriter.create(loc, scaled, outputZp) - .getResult(); - } - - // Apply Clip. - int64_t outBitwidth = resultETy.getIntOrFloatBitWidth(); - - auto min = rewriter.create( - loc, APInt::getSignedMinValue(outBitwidth).getSExtValue(), - accETy); - auto max = rewriter.create( - loc, APInt::getSignedMaxValue(outBitwidth).getSExtValue(), - accETy); - auto clamp = clampHelper( - loc, scaled, min, max, arith::CmpIPredicate::slt, rewriter); - - poolVal = clamp; - // Convert type. - if (resultETy != clamp.getType()) { - poolVal = - rewriter.create(loc, resultETy, poolVal); - } - } - - rewriter.create(loc, poolVal); - }); - - rewriter.replaceOp(op, genericOp.getResult(0)); - return success(); - } -}; - } // namespace void mlir::tosa::populateTosaToLinalgConversionPatterns( @@ -3132,8 +2326,6 @@ void mlir::tosa::populateTosaToLinalgConversionPatterns( ReduceConverter, ArgMaxConverter, ConcatConverter, - ConvConverter, - DepthwiseConvConverter, GatherConverter, PadConverter, ReshapeConverterCollapse, @@ -3144,10 +2336,6 @@ void mlir::tosa::populateTosaToLinalgConversionPatterns( ReverseConverter, TableConverter, TileConverter, - TransposeConverter, - MatMulConverter, - MaxPool2dConverter, - AvgPool2dConverter, - FullyConnectedConverter>(patterns->getContext()); + TransposeConverter>(patterns->getContext()); // clang-format on } diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp new file mode 100644 index 000000000000..90220ef44e97 --- /dev/null +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp @@ -0,0 +1,885 @@ +//===- TosaToLinalgNamed.cpp - Lowering Tosa to Linalg Named Ops ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// These rewriters lower from the Tosa to the Linalg named ops. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/TosaToLinalg/TosaToLinalg.h" +#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/Math/IR/Math.h" +#include "mlir/Dialect/SCF/SCF.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Tosa/IR/TosaOps.h" +#include "mlir/Dialect/Utils/ReshapeOpsUtils.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +#include + +using namespace mlir; + +static SmallVector getNParallelLoopsAttrs(unsigned nParallelLoops) { + return SmallVector(nParallelLoops, getParallelIteratorTypeName()); +} + +template +static void getValuesFromIntArrayAttribute(ArrayAttr attr, + SmallVector &arrayValues) { + for (Attribute val : attr.getValue()) { + arrayValues.push_back(val.cast().getValue().getSExtValue()); + } +} + +template +static mlir::SelectOp clampHelper(Location loc, Value arg, + arith::ConstantOp min, arith::ConstantOp max, + P pred, OpBuilder &rewriter) { + auto smallerThanMin = rewriter.create(loc, pred, arg, min); + auto minOrArg = + rewriter.create(loc, smallerThanMin, min, arg); + auto largerThanMax = rewriter.create(loc, pred, max, arg); + return rewriter.create(loc, largerThanMax, max, minOrArg); +} + +static mlir::Value applyPad(Location loc, Value input, ArrayRef pad, + Attribute padAttr, OpBuilder &rewriter) { + // Input should be padded if necessary. + if (llvm::all_of(pad, [](int64_t p) { return p == 0; })) + return input; + + ShapedType inputTy = input.getType().cast(); + Type inputETy = inputTy.getElementType(); + auto inputShape = inputTy.getShape(); + + assert((inputShape.size() * 2) == pad.size()); + + SmallVector paddedShape; + SmallVector lowIndices; + SmallVector highIndices; + for (int i = 0, s = inputShape.size(); i < s; i++) { + auto lowPad = pad[i * 2]; + auto highPad = pad[i * 2 + 1]; + paddedShape.push_back(inputShape[i] + highPad + lowPad); + lowIndices.push_back(rewriter.getIndexAttr(lowPad)); + highIndices.push_back(rewriter.getIndexAttr(highPad)); + } + + Value padValue = rewriter.create(loc, padAttr); + + return linalg::PadTensorOp::createPadScalarOp( + RankedTensorType::get(paddedShape, inputETy), input, padValue, + lowIndices, highIndices, /*nofold=*/false, loc, rewriter) + .result(); +} + +static SmallVector filterDynamicDims(SmallVector dynDims) { + SmallVector filteredDims; + for (auto dim : dynDims) + if (dim) + filteredDims.push_back(dim); + return filteredDims; +} + +namespace { + +class ConvConverter : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(tosa::Conv2DOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + Location loc = op->getLoc(); + Value input = op->getOperand(0); + Value weight = op->getOperand(1); + Value bias = op->getOperand(2); + + ShapedType inputTy = input.getType().cast(); + ShapedType weightTy = weight.getType().cast(); + ShapedType biasTy = bias.getType().cast(); + ShapedType resultTy = op->getResult(0).getType().cast(); + + Type inputETy = inputTy.getElementType(); + Type resultETy = resultTy.getElementType(); + + auto padAttr = op->getAttr("pad").cast(); + auto strideTosaAttr = op->getAttr("stride").cast(); + auto dilationTosaAttr = op->getAttr("dilation").cast(); + bool isQuantized = op->hasAttr("quantization_info"); + + if (!inputTy.hasStaticShape() || !weightTy.hasStaticShape() || + !biasTy.hasStaticShape() || !resultTy.hasStaticShape()) + return rewriter.notifyMatchFailure(op, + "tosa.conv ops require static shapes"); + + if (inputETy.isUnsignedInteger()) + return rewriter.notifyMatchFailure( + op, "tosa.conv ops does not support unsigned integer input"); + + auto weightShape = weightTy.getShape(); + + // Apply padding as necessary. + Attribute zeroAttr = rewriter.getZeroAttr(inputETy); + if (isQuantized) { + auto quantizationInfo = + op->getAttr("quantization_info").cast(); + auto iZp = quantizationInfo.input_zp().getValue().getSExtValue(); + + int64_t intMin = + APInt::getSignedMinValue(inputETy.getIntOrFloatBitWidth()) + .getSExtValue(); + int64_t intMax = + APInt::getSignedMaxValue(inputETy.getIntOrFloatBitWidth()) + .getSExtValue(); + + if (iZp < intMin || iZp > intMax) + return rewriter.notifyMatchFailure( + op, "tosa.conv op quantization has zp outside of input range"); + + zeroAttr = rewriter.getIntegerAttr(inputETy, iZp); + } + + llvm::SmallVector pad; + pad.resize(2, 0); + getValuesFromIntArrayAttribute(padAttr, pad); + pad.resize(pad.size() + 2, 0); + input = applyPad(loc, input, pad, zeroAttr, rewriter); + + // Transpose the kernel to match dimension ordering of the linalg + // convolution operation. + // TODO(suderman): See if this can be efficiently folded - check whether + // the input is used anywhere else, if not fold the constant. + SmallVector weightPerm{1, 2, 3, 0}; + SmallVector newWeightShape{weightShape[1], weightShape[2], + weightShape[3], weightShape[0]}; + auto weightPermAttr = DenseIntElementsAttr::get( + RankedTensorType::get({4}, rewriter.getI64Type()), weightPerm); + Value weightPermValue = + rewriter.create(loc, weightPermAttr); + Type newWeightTy = + RankedTensorType::get(newWeightShape, weightTy.getElementType()); + weight = rewriter.create(loc, newWeightTy, weight, + weightPermValue); + + Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy); + Value initTensor = rewriter.create( + loc, resultTy.getShape(), resultETy); + Value zero = rewriter.create(loc, resultZeroAttr); + Value zeroTensor = + rewriter.create(loc, zero, initTensor).getResult(0); + + // Extract the attributes for convolution. + llvm::SmallVector stride, dilation; + getValuesFromIntArrayAttribute(strideTosaAttr, stride); + getValuesFromIntArrayAttribute(dilationTosaAttr, dilation); + + // Create the convolution op. + auto strideAttr = DenseIntElementsAttr::get( + RankedTensorType::get({2}, rewriter.getI64Type()), stride); + auto dilationAttr = DenseIntElementsAttr::get( + RankedTensorType::get({2}, rewriter.getI64Type()), dilation); + + // Create maps for the bias broadcasting + SmallVector indexingMaps; + indexingMaps.push_back(AffineMap::get( + /*dimCount=*/resultTy.getRank(), /*symbolCount=*/0, + {rewriter.getAffineDimExpr(3)}, rewriter.getContext())); + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); + + Value biasInitTensor = rewriter.create( + loc, resultTy.getShape(), resultETy); + + if (isQuantized) { + auto quantizationInfo = + op->getAttr("quantization_info").cast(); + auto iZp = rewriter.getI32IntegerAttr( + quantizationInfo.input_zp().getValue().getSExtValue()); + auto kZp = rewriter.getI32IntegerAttr( + quantizationInfo.weight_zp().getValue().getSExtValue()); + + auto iZpVal = rewriter.create(loc, iZp); + auto kZpVal = rewriter.create(loc, kZp); + Value conv = + rewriter + .create( + loc, resultTy, ValueRange{input, weight, iZpVal, kZpVal}, + ValueRange{zeroTensor}, strideAttr, dilationAttr) + ->getResult(0); + + Value result = + rewriter + .create( + loc, resultTy, ValueRange({bias, conv}), biasInitTensor, + indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()), + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange args) { + Value added = nestedBuilder.create( + loc, args[0], args[1]); + nestedBuilder.create(nestedLoc, added); + }) + .getResult(0); + rewriter.replaceOp(op, result); + return success(); + } + + Value conv = rewriter + .create( + loc, resultTy, ValueRange{input, weight}, + ValueRange{zeroTensor}, strideAttr, dilationAttr) + ->getResult(0); + + Value result = + rewriter + .create( + loc, resultTy, ValueRange({bias, conv}), biasInitTensor, + indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()), + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange args) { + Value added = nestedBuilder.create( + loc, args[0], args[1]); + nestedBuilder.create(nestedLoc, added); + }) + .getResult(0); + + rewriter.replaceOp(op, result); + return success(); + } +}; + +class DepthwiseConvConverter + : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(tosa::DepthwiseConv2DOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + Location loc = op->getLoc(); + Value input = op->getOperand(0); + Value weight = op->getOperand(1); + Value bias = op->getOperand(2); + + ShapedType inputTy = input.getType().cast(); + ShapedType weightTy = weight.getType().cast(); + ShapedType biasTy = bias.getType().cast(); + ShapedType resultTy = op->getResult(0).getType().cast(); + + Type inputETy = inputTy.getElementType(); + Type resultETy = resultTy.getElementType(); + + auto padAttr = op->getAttr("pad").cast(); + auto strideTosaAttr = op->getAttr("stride").cast(); + auto dilationTosaAttr = op->getAttr("dilation").cast(); + + bool isQuantized = op->hasAttr("quantization_info"); + IntegerAttr iZp; + IntegerAttr kZp; + if (isQuantized) { + auto quantizationInfo = + op->getAttr("quantization_info").cast(); + iZp = rewriter.getI32IntegerAttr( + quantizationInfo.input_zp().getValue().getSExtValue()); + kZp = rewriter.getI32IntegerAttr( + quantizationInfo.weight_zp().getValue().getSExtValue()); + } + + if (!inputTy.hasStaticShape() || !weightTy.hasStaticShape() || + !biasTy.hasStaticShape() || !resultTy.hasStaticShape()) + return rewriter.notifyMatchFailure(op, + "tosa.conv ops require static shapes"); + + auto weightShape = weightTy.getShape(); + auto resultShape = resultTy.getShape(); + + // Apply padding as necessary. + Attribute zeroAttr = rewriter.getZeroAttr(inputETy); + if (isQuantized) { + auto quantizationInfo = + op->getAttr("quantization_info").cast(); + auto iZp = quantizationInfo.input_zp().getValue().getSExtValue(); + + int64_t intMin = + APInt::getSignedMinValue(inputETy.getIntOrFloatBitWidth()) + .getSExtValue(); + int64_t intMax = + APInt::getSignedMaxValue(inputETy.getIntOrFloatBitWidth()) + .getSExtValue(); + + if (iZp < intMin || iZp > intMax) + return rewriter.notifyMatchFailure( + op, "tosa.depthwise_conv op quantization has zp outside of input " + "range"); + + zeroAttr = rewriter.getIntegerAttr(inputETy, iZp); + } + + llvm::SmallVector pad; + pad.resize(2, 0); + getValuesFromIntArrayAttribute(padAttr, pad); + pad.resize(pad.size() + 2, 0); + + input = applyPad(loc, input, pad, zeroAttr, rewriter); + + // Extract the attributes for convolution. + llvm::SmallVector stride, dilation; + getValuesFromIntArrayAttribute(strideTosaAttr, stride); + getValuesFromIntArrayAttribute(dilationTosaAttr, dilation); + + // Create the convolution op. + auto strideAttr = DenseIntElementsAttr::get( + RankedTensorType::get({2}, rewriter.getI64Type()), stride); + auto dilationAttr = DenseIntElementsAttr::get( + RankedTensorType::get({2}, rewriter.getI64Type()), dilation); + ShapedType linalgConvTy = + RankedTensorType::get({resultShape[0], resultShape[1], resultShape[2], + weightShape[2], weightShape[3]}, + resultETy); + + // Broadcast the initial value to the output tensor before convolving. + SmallVector indexingMaps; + indexingMaps.push_back(AffineMap::get( + /*dimCount=*/resultTy.getRank(), /*symbolCount=*/0, + {rewriter.getAffineDimExpr(3)}, rewriter.getContext())); + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); + + Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy); + Value initTensor = rewriter.create( + loc, linalgConvTy.getShape(), resultETy); + Value zero = rewriter.create(loc, resultZeroAttr); + Value zeroTensor = + rewriter.create(loc, zero, initTensor).getResult(0); + + Value biasInitTensor = rewriter.create( + loc, resultTy.getShape(), resultETy); + if (!isQuantized) { + Value conv = rewriter + .create( + loc, linalgConvTy, ValueRange{input, weight}, + ValueRange{zeroTensor}, strideAttr, dilationAttr) + .getResult(0); + Value convReshape = rewriter.create( + loc, resultTy, conv, rewriter.getI64ArrayAttr(resultTy.getShape())); + Value result = + rewriter + .create( + loc, resultTy, ValueRange({bias, convReshape}), + biasInitTensor, indexingMaps, + getNParallelLoopsAttrs(resultTy.getRank()), + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange args) { + Value added = nestedBuilder.create( + loc, args[0], args[1]); + nestedBuilder.create(nestedLoc, added); + }) + .getResult(0); + rewriter.replaceOp(op, result); + } else { + auto iZpVal = rewriter.create(loc, iZp); + auto kZpVal = rewriter.create(loc, kZp); + Value conv = + rewriter + .create( + loc, linalgConvTy, ValueRange{input, weight, iZpVal, kZpVal}, + ValueRange{zeroTensor}, strideAttr, dilationAttr) + .getResult(0); + Value convReshape = rewriter.create( + loc, resultTy, conv, rewriter.getI64ArrayAttr(resultTy.getShape())); + Value result = + rewriter + .create( + loc, resultTy, ValueRange({bias, convReshape}), + biasInitTensor, indexingMaps, + getNParallelLoopsAttrs(resultTy.getRank()), + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange args) { + Value added = nestedBuilder.create( + loc, args[0], args[1]); + nestedBuilder.create(nestedLoc, added); + }) + .getResult(0); + rewriter.replaceOp(op, result); + } + return success(); + } +}; + +class MatMulConverter : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(tosa::MatMulOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + Location loc = op.getLoc(); + + auto outputTy = op.getType().cast(); + auto outputElementTy = outputTy.getElementType(); + + auto firstOperandTy = op->getOperand(0).getType().cast(); + auto secondOperandTy = op->getOperand(1).getType().cast(); + + SmallVector dynDims; + dynDims.resize(op->getResult(0).getType().cast().getRank()); + + if (!firstOperandTy.hasRank() || firstOperandTy.isDynamicDim(0)) { + dynDims[0] = rewriter.create(loc, op->getOperand(0), 0); + } + + if (!firstOperandTy.hasRank() || firstOperandTy.isDynamicDim(1)) { + dynDims[1] = rewriter.create(loc, op->getOperand(0), 1); + } + + if (!secondOperandTy.hasRank() || secondOperandTy.isDynamicDim(2)) { + dynDims[2] = rewriter.create(loc, op->getOperand(1), 2); + } + + SmallVector filteredDims = filterDynamicDims(dynDims); + + auto zeroAttr = rewriter.getZeroAttr(outputElementTy); + Value zero = rewriter.create(loc, zeroAttr); + auto initTensor = rewriter.create( + loc, filteredDims, outputTy.getShape(), outputTy.getElementType()); + Value zeroTensor = + rewriter.create(loc, zero, initTensor).getResult(0); + if (!op.quantization_info()) { + rewriter.replaceOpWithNewOp( + op, TypeRange{op.getType()}, ValueRange{adaptor.a(), adaptor.b()}, + ValueRange{zeroTensor}); + return success(); + } + + auto quantizationInfo = op.quantization_info().getValue(); + auto aZp = rewriter.create( + loc, rewriter.getI32IntegerAttr( + quantizationInfo.a_zp().getValue().getSExtValue())); + auto bZp = rewriter.create( + loc, rewriter.getI32IntegerAttr( + quantizationInfo.b_zp().getValue().getSExtValue())); + rewriter.replaceOpWithNewOp( + op, TypeRange{op.getType()}, + ValueRange{adaptor.a(), adaptor.b(), aZp, bZp}, zeroTensor); + + return success(); + } +}; + +class FullyConnectedConverter + : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(tosa::FullyConnectedOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + Location loc = op.getLoc(); + auto outputTy = op.getType().cast(); + auto input = op.input(); + auto inputTy = input.getType().cast(); + + auto bias = op.bias(); + + auto weight = op.weight(); + auto weightTy = weight.getType().cast(); + auto weightShape = weightTy.getShape(); + + auto outputETy = outputTy.getElementType(); + + SmallVector dynDims; + dynDims.resize(op->getResult(0).getType().cast().getRank()); + + if (!inputTy.hasRank() || inputTy.isDynamicDim(0)) { + dynDims[0] = rewriter.create(loc, input, 0); + } + + if (!weightTy.hasRank() || weightTy.isDynamicDim(0)) { + dynDims[1] = rewriter.create(loc, weight, 0); + } + + SmallVector filteredDims = filterDynamicDims(dynDims); + + // Creating maps for the output of MatMul and the bias + SmallVector indexingMaps; + + // Broadcast the bias. + indexingMaps.push_back(AffineMap::get(/*dimCount=*/2, /*symbolCount=*/0, + {rewriter.getAffineDimExpr(1)}, + rewriter.getContext())); + + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank())); + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank())); + + auto initTensor = rewriter.create( + loc, filteredDims, outputTy.getShape(), outputTy.getElementType()); + + // When quantized, the input elemeny type is not the same as the output + Attribute resultZeroAttr = rewriter.getZeroAttr(outputETy); + Value zero = rewriter.create(loc, resultZeroAttr); + Value zeroTensor = + rewriter.create(loc, zero, initTensor).getResult(0); + + SmallVector permutation{1, 0}; + auto permutationAttr = DenseIntElementsAttr::get( + RankedTensorType::get({2}, rewriter.getI64Type()), permutation); + Value permutationValue = + rewriter.create(loc, permutationAttr); + + SmallVector newWeightShape{weightShape[1], weightShape[0]}; + Type newWeightTy = + RankedTensorType::get(newWeightShape, weightTy.getElementType()); + + Value transposedWeight = rewriter.create( + loc, newWeightTy, weight, permutationValue); + + auto biasInitTensor = + rewriter + .create(loc, filteredDims, + outputTy.getShape(), outputETy) + ->getResults(); + + if (!op.quantization_info()) { + Value matmul = rewriter + .create( + loc, TypeRange{op.getType()}, + ValueRange{input, transposedWeight}, zeroTensor) + ->getResult(0); + + Value result = + rewriter + .create( + loc, outputTy, ValueRange({bias, matmul}), biasInitTensor, + indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()), + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange args) { + Value added = nestedBuilder.create( + loc, args[0], args[1]); + nestedBuilder.create(nestedLoc, added); + }) + .getResult(0); + rewriter.replaceOp(op, result); + return success(); + } + + auto quantizationInfo = op.quantization_info().getValue(); + auto inputZp = rewriter.create( + loc, rewriter.getI32IntegerAttr( + quantizationInfo.input_zp().getValue().getSExtValue())); + auto outputZp = rewriter.create( + loc, rewriter.getI32IntegerAttr( + quantizationInfo.weight_zp().getValue().getSExtValue())); + Value matmul = + rewriter + .create( + loc, TypeRange{op.getType()}, + ValueRange{input, transposedWeight, inputZp, outputZp}, + zeroTensor) + ->getResult(0); + Value result = + rewriter + .create( + loc, outputTy, ValueRange({bias, matmul}), biasInitTensor, + indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()), + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange args) { + Value added = nestedBuilder.create( + loc, args[0], args[1]); + nestedBuilder.create(nestedLoc, added); + }) + .getResult(0); + rewriter.replaceOp(op, result); + return success(); + } +}; + +class MaxPool2dConverter : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(tosa::MaxPool2dOp op, + PatternRewriter &rewriter) const final { + Location loc = op.getLoc(); + Value input = op.input(); + ShapedType inputTy = input.getType().cast(); + + ShapedType resultTy = op.getType().template cast(); + Type resultETy = inputTy.getElementType(); + + if (!inputTy.hasStaticShape()) + return failure(); + + // Determine what the initial value needs to be for the max pool op. + Attribute initialAttr; + if (resultETy.isF32()) + initialAttr = rewriter.getFloatAttr( + resultETy, + APFloat::getLargest(resultETy.cast().getFloatSemantics(), + true)); + + if (resultETy.isa()) + initialAttr = rewriter.getIntegerAttr( + resultETy, + APInt::getSignedMinValue(resultETy.getIntOrFloatBitWidth())); + + if (!initialAttr) + return rewriter.notifyMatchFailure( + op, "Unsupported initial value for tosa.maxpool_2d op"); + + // Apply padding as necessary. + llvm::SmallVector pad; + pad.resize(2, 0); + getValuesFromIntArrayAttribute(op.pad(), pad); + pad.resize(pad.size() + 2, 0); + Value paddedInput = applyPad(loc, input, pad, initialAttr, rewriter); + + Value initialValue = rewriter.create(loc, initialAttr); + + SmallVector kernel, stride; + getValuesFromIntArrayAttribute(op.kernel(), kernel); + getValuesFromIntArrayAttribute(op.stride(), stride); + + Attribute strideAttr = rewriter.getI64VectorAttr(stride); + Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1}); + + // Create the linalg op that performs pooling. + Value initTensor = rewriter.create( + loc, resultTy.getShape(), resultTy.getElementType()); + + Value filledInitTensor = + rewriter.create(loc, initialValue, initTensor).result(); + + Value fakeWindowDims = + rewriter.create(loc, kernel, resultETy); + + rewriter.replaceOpWithNewOp( + op, ArrayRef{resultTy}, ValueRange{paddedInput, fakeWindowDims}, + filledInitTensor, strideAttr, dilationAttr); + return success(); + } +}; + +class AvgPool2dConverter : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(tosa::AvgPool2dOp op, + PatternRewriter &rewriter) const final { + Location loc = op.getLoc(); + Value input = op.input(); + ShapedType inputTy = input.getType().cast(); + Type inElementTy = inputTy.getElementType(); + + ShapedType resultTy = op.getType().template cast(); + Type resultETy = op.getType().cast().getElementType(); + + Type accETy = + inElementTy.isa() ? rewriter.getI32Type() : inElementTy; + ShapedType accTy = resultTy.clone(accETy); + + if (!inputTy.hasStaticShape()) + return failure(); + + // Apply padding as necessary. + llvm::SmallVector pad; + pad.resize(2, 0); + getValuesFromIntArrayAttribute(op.pad(), pad); + pad.resize(pad.size() + 2, 0); + Attribute padAttr = rewriter.getZeroAttr(inElementTy); + Value paddedInput = applyPad(loc, input, pad, padAttr, rewriter); + + Attribute initialAttr = rewriter.getZeroAttr(accETy); + Value initialValue = rewriter.create(loc, initialAttr); + + SmallVector kernel, stride; + getValuesFromIntArrayAttribute(op.kernel(), kernel); + getValuesFromIntArrayAttribute(op.stride(), stride); + + Attribute strideAttr = rewriter.getI64VectorAttr(stride); + Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1}); + + // Create the linalg op that performs pooling. + Value poolInitTensor = + rewriter.create(loc, accTy.getShape(), accETy); + + Value filledInitTensor = + rewriter.create(loc, initialValue, poolInitTensor) + .result(); + + Value fakeWindowDims = + rewriter.create(loc, kernel, accETy); + + // Sum across the pooled region. + Value poolingOp = rewriter + .create( + loc, ArrayRef{accTy}, + ValueRange{paddedInput, fakeWindowDims}, + filledInitTensor, strideAttr, dilationAttr) + .getResult(0); + + // Normalize the summed value by the number of elements grouped in each + // pool. + auto poolingOpTy = poolingOp.getType().cast(); + auto affineMap = rewriter.getMultiDimIdentityMap(resultTy.getRank()); + + Value genericInitTensor = rewriter.create( + loc, resultTy.getShape(), resultETy); + + auto genericOp = rewriter.create( + loc, ArrayRef({resultTy}), ValueRange{poolingOp}, + ValueRange{genericInitTensor}, + ArrayRef({affineMap, affineMap}), + getNParallelLoopsAttrs(resultTy.getRank()), + [&](OpBuilder &b, Location loc, ValueRange args) { + auto zero = rewriter.create(loc, 0); + auto one = rewriter.create(loc, 1); + auto iH = rewriter.create( + loc, poolingOpTy.getDimSize(1) - 1); + auto iW = rewriter.create( + loc, poolingOpTy.getDimSize(2) - 1); + + // Compute the indices from either end. + auto y0 = rewriter.create(loc, 1); + auto x0 = rewriter.create(loc, 2); + auto y1 = rewriter.create(loc, iH, y0); + auto x1 = rewriter.create(loc, iW, x0); + + // Determines what the portion of valid input is covered by the + // kernel. + auto padFn = [&](Value v, Value x, int64_t pad) -> Value { + if (pad == 0) + return v; + + auto padVal = rewriter.create(loc, pad); + Value dx = rewriter.create(loc, x, padVal); + + Value cmp = rewriter.create( + loc, arith::CmpIPredicate::slt, dx, zero); + Value offset = rewriter.create(loc, cmp, dx, zero); + return rewriter.create(loc, v, offset)->getResult(0); + }; + + // Compute the vertical component of coverage. + auto kH0 = rewriter.create(loc, kernel[0]); + auto kH1 = padFn(kH0, y0, pad[2]); + auto kH2 = padFn(kH1, y1, pad[3]); + auto kHCmp = rewriter.create( + loc, arith::CmpIPredicate::slt, kH2, one); + auto kH3 = rewriter.create(loc, kHCmp, one, kH2); + + // compute the horizontal component of coverage. + auto kW0 = rewriter.create(loc, kernel[1]); + auto kW1 = padFn(kW0, x0, pad[4]); + auto kW2 = padFn(kW1, x1, pad[5]); + auto kWCmp = rewriter.create( + loc, arith::CmpIPredicate::slt, kW2, one); + auto kW3 = rewriter.create(loc, kWCmp, one, kW2); + + // Compute the total number of elements and normalize. + Value count = rewriter.create(loc, kH3, kW3); + auto countI = rewriter.create( + loc, rewriter.getI32Type(), count); + + // Divide by the number of summed values. For floats this is just + // a div however for quantized values input normalization had + // to be applied. + Value poolVal = args[0]; + if (accETy.isa()) { + auto countF = rewriter.create(loc, accETy, countI); + poolVal = rewriter.create(loc, poolVal, countF) + ->getResult(0); + } else { + + // If we have quantization information we need to apply an offset + // for the input zp value. + if (op.quantization_info()) { + auto quantizationInfo = op.quantization_info().getValue(); + auto inputZp = rewriter.create( + loc, quantizationInfo.input_zp()); + Value offset = + rewriter.create(loc, accETy, countI, inputZp); + poolVal = + rewriter.create(loc, accETy, poolVal, offset); + } + + // Compute the multiplier and shift values for the quantization + // normalization. Preferably we would want to compute more bits + // however 32-bits should be enough for compute. Honestly we + // should probably straight divide. + int64_t numerator = ((1 << 30) + 1); + int64_t shift = 30; + + Value numeratorVal = rewriter.create( + loc, rewriter.getI32IntegerAttr(numerator)); + Value multiplierVal = + rewriter + .create(loc, rewriter.getI32Type(), + numeratorVal, countI) + .getResult(); + Value shiftVal = rewriter.create( + loc, rewriter.getI8IntegerAttr(shift)); + + auto scaled = + rewriter + .create( + loc, rewriter.getI32Type(), poolVal, multiplierVal, + shiftVal, rewriter.getBoolAttr(false)) + .getResult(); + + // If we have quantization information we need to apply output + // zeropoint. + if (op.quantization_info()) { + auto quantizationInfo = op.quantization_info().getValue(); + auto outputZp = rewriter.create( + loc, quantizationInfo.output_zp()); + scaled = rewriter.create(loc, scaled, outputZp) + .getResult(); + } + + // Apply Clip. + int64_t outBitwidth = resultETy.getIntOrFloatBitWidth(); + + auto min = rewriter.create( + loc, APInt::getSignedMinValue(outBitwidth).getSExtValue(), + accETy); + auto max = rewriter.create( + loc, APInt::getSignedMaxValue(outBitwidth).getSExtValue(), + accETy); + auto clamp = clampHelper( + loc, scaled, min, max, arith::CmpIPredicate::slt, rewriter); + + poolVal = clamp; + // Convert type. + if (resultETy != clamp.getType()) { + poolVal = + rewriter.create(loc, resultETy, poolVal); + } + } + + rewriter.create(loc, poolVal); + }); + + rewriter.replaceOp(op, genericOp.getResult(0)); + return success(); + } +}; + +} // namespace + +void mlir::tosa::populateTosaToLinalgNamedConversionPatterns( + RewritePatternSet *patterns) { + patterns->add< + // clang-format off + ConvConverter, + DepthwiseConvConverter, + MatMulConverter, + MaxPool2dConverter, + AvgPool2dConverter, + FullyConnectedConverter>(patterns->getContext()); + // clang-format on +} diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamedPass.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamedPass.cpp new file mode 100644 index 000000000000..f5f6ac1a5469 --- /dev/null +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamedPass.cpp @@ -0,0 +1,68 @@ +//===- TosaToLinalgPass.cpp - Lowering Tosa to Linalg Dialect -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This transformation pass legalizes Tosa operations to the Linalg dialect. +// +//===----------------------------------------------------------------------===// + +#include "../PassDetail.h" +#include "mlir/Conversion/TosaToLinalg/TosaToLinalg.h" +#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/Math/IR/Math.h" +#include "mlir/Dialect/SCF/SCF.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Tosa/IR/TosaOps.h" +#include "mlir/Dialect/Tosa/Transforms/PassDetail.h" +#include "mlir/Dialect/Tosa/Transforms/Passes.h" +#include "mlir/Dialect/Tosa/Utils/QuantUtils.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +using namespace mlir; + +namespace { +struct TosaToLinalgNamed : public TosaToLinalgNamedBase { +public: + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + void runOnFunction() override { + RewritePatternSet patterns(&getContext()); + ConversionTarget target(getContext()); + target.addLegalDialect(); + + // Not every TOSA op can be legalized to linalg. + target.addIllegalOp(); + target.addIllegalOp(); + target.addIllegalOp(); + target.addIllegalOp(); + target.addIllegalOp(); + target.addIllegalOp(); + + target.markUnknownOpDynamicallyLegal([](Operation *) { return true; }); + + FuncOp func = getFunction(); + mlir::tosa::populateTosaToLinalgNamedConversionPatterns(&patterns); + if (failed(applyFullConversion(func, target, std::move(patterns)))) + signalPassFailure(); + } +}; +} // namespace + +std::unique_ptr mlir::tosa::createTosaToLinalgNamed() { + return std::make_unique(); +} diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp index 8f4f872c8860..3813ba345137 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp @@ -26,6 +26,7 @@ #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "mlir/Transforms/Passes.h" using namespace mlir; @@ -67,6 +68,9 @@ std::unique_ptr mlir::tosa::createTosaToLinalg() { } void mlir::tosa::addTosaToLinalgPasses(OpPassManager &pm) { + pm.addNestedPass(createTosaMakeBroadcastablePass()); + pm.addNestedPass(createTosaToLinalgNamed()); + pm.addNestedPass(mlir::createCanonicalizerPass()); pm.addNestedPass(createTosaMakeBroadcastablePass()); pm.addNestedPass(createTosaToLinalg()); } diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir new file mode 100644 index 000000000000..f5814883cc49 --- /dev/null +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir @@ -0,0 +1,448 @@ +// RUN: mlir-opt --split-input-file --tosa-to-linalg-named %s -verify-diagnostics -o -| FileCheck %s + +// CHECK-LABEL: @matmul +func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) { + // CHECK: [[C0:%.+]] = arith.constant 0 + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6] + // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32> + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> + %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) + return %0 : tensor<1x5x6xf32> +} + +// ----- + + +// CHECK-LABEL: @matmul_quantized +func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) { + // CHECK: [[C0:%.+]] = arith.constant 0 + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6] + // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : i32, tensor<1x5x6xi32> -> tensor<1x5x6xi32> + // CHECK: [[ONE:%.+]] = arith.constant 1 + // CHECK: [[TWO:%.+]] = arith.constant 2 + // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32> + %0 = "tosa.matmul"(%arg0, %arg1) {quantization_info = {a_zp = 1 : i32, b_zp = 2 : i32}} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) + return %0 : tensor<1x5x6xi32> +} + +// ----- + +// CHECK-LABEL: @matmul_dyn_batch +func @matmul_dyn_batch(%arg0: tensor, %arg1: tensor) -> (tensor) { + // CHECK: %[[C0:.+]] = arith.constant 0 + // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]] + // CHECK: %[[C0_0:.+]] = arith.constant 0 + // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM]], 5, 6] + // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0_0]], %[[INIT]]) : f32, tensor -> tensor + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor, tensor) outs(%[[FILLED]] : tensor) -> tensor + %0 = "tosa.matmul"(%arg0, %arg1) : (tensor, tensor) -> (tensor) + return %0 : tensor +} + +// ----- + +// CHECK-LABEL: @matmul_dyn_independent_dim +func @matmul_dyn_independent_dim(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) { + // CHECK: %[[C2:.+]] = arith.constant 2 + // CHECK: %[[DIM:.+]] = tensor.dim %arg1, %[[C2]] + // CHECK: %[[C0:.+]] = arith.constant 0 + // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, %[[DIM]]] + // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0]], %[[INIT]]) : f32, tensor<1x5x?xf32> -> tensor<1x5x?xf32> + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32> + %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) + return %0 : tensor<1x5x?xf32> +} + +// ----- + +// CHECK-LABEL: @matmul_dyn_independent_dim +func @matmul_dyn_independent_dim(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) { + // CHECK: %[[C0:.+]] = arith.constant 0 + // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, 6] + // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0]], %[[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32> + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> + %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x?xf32>, tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) + return %0 : tensor<1x5x6xf32> +} + +// ----- + +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)> +// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)> + +// CHECK-LABEL: @fully_connected +func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<5x6xf32>) { + // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6] + // CHECK: [[ZERO:%.+]] = arith.constant 0 + // CHECK: [[FILL:%.+]] = linalg.fill([[ZERO]], [[INITT]]) + // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]> + // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]]) + // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6] + // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32> + // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) outs([[INITB]] : tensor<5x6xf32>) { + // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): + // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 + // CHECK: linalg.yield [[ADD]] : f32 + + %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor<5x3xf32>, tensor<6x3xf32>, tensor<6xf32>) -> (tensor<5x6xf32>) + return %0 : tensor<5x6xf32> +} + +// ----- + +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)> +// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)> + +// CHECK-LABEL: @quantized_fully_connected +func @quantized_fully_connected(%arg0: tensor<5x3xi8>, %arg1: tensor<6x3xi8>, %arg2: tensor<6xi32>) -> (tensor<5x6xi32>) { + // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6] + // CHECK: [[ZERO:%.+]] = arith.constant 0 + // CHECK: [[FILL:%.+]] = linalg.fill([[ZERO]], [[INITT]]) + // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]> + // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]]) + // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6] + // CHECK: [[ONE:%.+]] = arith.constant 1 + // CHECK: [[TWO:%.+]] = arith.constant 2 + // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32> + // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xi32>, tensor<5x6xi32>) outs([[INITB]] + // CHECK: ^bb0([[IN1:%.+]]: i32, [[IN2:%.+]]: i32, [[UNUSED:%.+]]: i32): + // CHECK: [[ADD:%.+]] = arith.addi + // CHECK: linalg.yield [[ADD]] : i32 + %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) {quantization_info = {input_zp = 1:i32, weight_zp = 2:i32}} : (tensor<5x3xi8>, tensor<6x3xi8>, tensor<6xi32>) -> (tensor<5x6xi32>) + return %0 : tensor<5x6xi32> +} + +// ----- + +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)> +// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)> + +// CHECK-LABEL: @fully_connected_dyn +func @fully_connected_dyn(%arg0: tensor, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor) { + // CHECK: %[[C0:.+]] = arith.constant 0 + // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]] + // CHECK: %[[INITT:.+]] = linalg.init_tensor [%[[DIM]], 6] + // CHECK: %[[ZERO:.+]] = arith.constant 0 + // CHECK: %[[FILL:.+]] = linalg.fill(%[[ZERO]], %[[INITT]]) + // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 0]> + // CHECK: %[[TRANSPOSE:.+]] = "tosa.transpose"(%arg1, %[[PERM]]) + // CHECK: %[[INITB:.+]] = linalg.init_tensor [%[[DIM]], 6] + // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor, tensor<3x6xf32>) outs(%[[FILL]] : tensor) -> tensor + // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor) outs(%[[INITB]] : tensor) { + // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): + // CHECK: %[[ADD:.+]] = arith.addf %arg3, %arg4 : f32 + // CHECK: linalg.yield %[[ADD]] : f32 + + %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor, tensor<6x3xf32>, tensor<6xf32>) -> (tensor) + return %0 : tensor +} + +// ----- + +// CHECK-LABEL: @max_pool +func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () { + // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 + // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 32, 62] + // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[CONST]], [[INIT]]) + // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3] + // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x32x62xf32>) + %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x32x62xf32>) + return +} + +// CHECK-LABEL: @max_pool_padded +func @max_pool_padded(%arg0: tensor<1x6x34x62xf32>) -> () { + // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 : f32 + // CHECK-DAG: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0] + // CHECK-DAG: linalg.yield [[CONST]] + // CHECK-DAG: [[INITVAL:%.+]] = arith.constant -3.40282347E+38 : f32 + // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 33, 62] + // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[INITVAL]], [[INIT]]) + // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3] + // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x33x62xf32>) + %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 1], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x33x62xf32>) + return +} + +// CHECK-LABEL: @max_pool_i8 +func @max_pool_i8(%arg0: tensor<1x6x34x62xi8>) -> () { + // CHECK: arith.constant -128 + // CHECK: linalg.pooling_nhwc_max + %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi8>) -> (tensor<1x4x32x62xi8>) + return +} + +// CHECK-LABEL: @max_pool_i16 +func @max_pool_i16(%arg0: tensor<1x6x34x62xi16>) -> () { + // CHECK: arith.constant -32768 + // CHECK: linalg.pooling_nhwc_max + %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi16>) -> (tensor<1x4x32x62xi16>) + return +} + +// CHECK-LABEL: @max_pool_i32 +func @max_pool_i32(%arg0: tensor<1x6x34x62xi32>) -> () { + // CHECK: arith.constant -2147483648 + // CHECK: linalg.pooling_nhwc_max + %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi32>) -> (tensor<1x4x32x62xi32>) + return +} +// ----- + +// CHECK-LABEL: @avg_pool +func @avg_pool(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) { + // Initial piece computes the sum of the pooling region, with appropriate padding. + // CHECK: [[CONST:%.+]] = arith.constant 0 + // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: [[CONST:%.+]] = arith.constant 0 + // CHECK: [[POOLINIT:%.+]] = linalg.init_tensor [1, 5, 33, 62] + // CHECK: [[FILL:%.+]] = linalg.fill([[CONST]], [[POOLINIT]]) + // CHECK: [[KERNEL:%.+]] = linalg.init_tensor [4, 4] + // CHECK: [[POOL:%.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>) outs([[FILL]] : tensor<1x5x33x62xf32>) + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 33, 62] + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins([[POOL]] : tensor<1x5x33x62xf32>) outs([[INIT]] : tensor<1x5x33x62xf32>) + // CHECK: [[ZERO:%.0]] = arith.constant 0 + // CHECK: [[ONE:%.+]] = arith.constant 1 + // CHECK: [[HEIGHT:%.+]] = arith.constant 4 + // CHECK: [[WIDTH:%.+]] = arith.constant 32 + // CHECK: [[IDX1:%.+]] = linalg.index 1 + // CHECK: [[IDX2:%.+]] = linalg.index 2 + + // The large block below computes what portion of the kernel is within non-padded input. + // CHECK: [[NY:%.+]] = arith.subi [[HEIGHT]], [[IDX1]] + // CHECK: [[NX:%.+]] = arith.subi [[WIDTH]], [[IDX2]] + // CHECK: [[KH:%.+]] = arith.constant 4 + // CHECK: [[PAD0:%.+]] = arith.constant 1 + // CHECK: [[SUBP0:%.+]] = arith.subi [[IDX1]], [[PAD0]] + // CHECK: [[P0CMP:%.+]] = arith.cmpi slt, [[SUBP0]], [[ZERO]] + // CHECK: [[SELP0:%.+]] = select [[P0CMP]], [[SUBP0]], [[ZERO]] + // CHECK: [[ADDP0:%.+]] = arith.addi [[KH]], [[SELP0]] + // CHECK: [[PAD1:%.+]] = arith.constant 1 + // CHECK: [[SUBP1:%.+]] = arith.subi [[NY]], [[PAD1]] + // CHECK: [[P1CMP:%.+]] = arith.cmpi slt, [[SUBP1]], [[ZERO]] + // CHECK: [[SELP1:%.+]] = select [[P1CMP]], [[SUBP1]], [[ZERO]] + // CHECK: [[ADDP1:%.+]] = arith.addi [[ADDP0]], [[SELP1]] + // CHECK: [[YCMP:%.+]] = arith.cmpi slt, [[ADDP1]], [[ONE]] + // CHECK: [[YSEL:%.+]] = select [[YCMP]], [[ONE]], [[ADDP1]] + // CHECK: [[KW:%.+]] = arith.constant 4 : index + // CHECK: [[PAD2:%.+]] = arith.constant 1 : index + // CHECK: [[SUBP2:%.+]] = arith.subi [[IDX2]], [[PAD2]] + // CHECK: [[P2CMP:%.+]] = arith.cmpi slt, [[SUBP2]], [[ZERO]] + // CHECK: [[SELP2:%.+]] = select [[P2CMP]], [[SUBP2]], [[ZERO]] + // CHECK: [[ADDP2:%.+]] = arith.addi [[KW]], [[SELP2]] + // CHECK: [[PAD3:%.+]] = arith.constant 1 : index + // CHECK: [[SUBP3:%.+]] = arith.subi [[NX]], [[PAD3]] + // CHECK: [[P3CMP:%.+]] = arith.cmpi slt, [[SUBP3]], [[ZERO]] + // CHECK: [[SELP3:%.+]] = select [[P3CMP]], [[SUBP3]], [[ZERO]] + // CHECK: [[ADDP3:%.+]] = arith.addi [[ADDP2]], [[SELP3]] + // CHECK: [[XCMP:%.+]] = arith.cmpi slt, [[ADDP3]], [[ONE]] + // CHECK: [[XSEL:%.+]] = select [[XCMP]], [[ONE]], [[ADDP3]] + + // Given the valid coverage of the pooling region, normalize the summation. + // CHECK: [[C:%.+]] = arith.muli [[YSEL]], [[XSEL]] + // CHECK: [[CI:%.+]] = arith.index_cast [[C]] + // CHECK: [[CF:%.+]] = arith.sitofp [[CI]] + // CHECK: [[RESULT:%.+]] = arith.divf %arg1, [[CF]] + // CHECK: linalg.yield [[RESULT]] + %0 = "tosa.avg_pool2d"(%arg0) {pad = [1, 1, 1, 1], kernel = [4, 4], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) + return %0 : tensor<1x5x33x62xf32> +} + +// ----- + +// CHECK-LABEL: @avg_pool_i8 +func @avg_pool_i8(%arg0 : tensor<1x128x128x2xi8>) -> () { + + // CHECK: linalg.pooling_nhwc_sum + // CHECK: linalg.generic + + // CHECK: %[[INZP:.+]] = arith.constant -128 + // CHECK: %[[INZP_OFF:.+]] = arith.muli %{{.+}}, %[[INZP]] + // CHECK: %[[OFFSETED:.+]] = arith.subi %arg1, %[[INZP_OFF]] + // CHECK: %[[NUMERATOR:.+]] = arith.constant 1073741825 + // CHECK: %[[MULTIPLIER:.+]] = arith.divui %[[NUMERATOR]], %{{.+}} + // CHECK: %[[SHIFT:.+]] = arith.constant 30 + // CHECK: %[[SCALE:.+]] = "tosa.apply_scale"(%{{.+}}, %[[MULTIPLIER]], %[[SHIFT]]) {double_round = false} + // CHECK: %[[OUTZP:.+]] = arith.constant -128 + // CHECK: %[[OUT:.+]] = arith.addi %[[SCALE]], %[[OUTZP]] + // CHECK: %[[MIN:.+]] = arith.constant -128 + // CHECK: %[[MAX:.+]] = arith.constant 127 + // CHECK: %[[CMP_MIN:.+]] = arith.cmpi slt, %[[OUT]], %[[MIN]] + // CHECK: %[[CLMP_MIN:.+]] = select %[[CMP_MIN]], %[[MIN]], %[[OUT]] + // CHECK: %[[CMP_MAX:.+]] = arith.cmpi slt, %[[MAX]], %[[OUT]] + // CHECK: %[[CLMP_MAX:.+]] = select %[[CMP_MAX]], %[[MAX]], %[[CLMP_MIN]] + // CHECK: %[[TRUNC:.+]] = arith.trunci %[[CLMP_MAX]] + // CHECK: linalg.yield %[[TRUNC]] + %0 = "tosa.avg_pool2d"(%arg0) {kernel = [4, 4], pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, output_zp = -128 : i32}, stride = [4, 4]} : (tensor<1x128x128x2xi8>) -> tensor<1x32x32x2xi8> + return +} + +// ----- + +// CHECK-LABEL: @avg_pool_i16 +func @avg_pool_i16(%arg0 : tensor<1x128x128x2xi16>) -> () { + + // CHECK: linalg.pooling_nhwc_sum + // CHECK: linalg.generic + + // CHECK: %[[INZP:.+]] = arith.constant -128 + // CHECK: %[[INZP_OFF:.+]] = arith.muli %{{.+}}, %[[INZP]] + // CHECK: %[[OFFSETED:.+]] = arith.subi %arg1, %[[INZP_OFF]] + // CHECK: %[[NUMERATOR:.+]] = arith.constant 1073741825 + // CHECK: %[[MULTIPLIER:.+]] = arith.divui %[[NUMERATOR]], %{{.+}} + // CHECK: %[[SHIFT:.+]] = arith.constant 30 + // CHECK: %[[SCALE:.+]] = "tosa.apply_scale"(%{{.+}}, %[[MULTIPLIER]], %[[SHIFT]]) {double_round = false} + // CHECK: %[[OUTZP:.+]] = arith.constant -128 + // CHECK: %[[OUT:.+]] = arith.addi %[[SCALE]], %[[OUTZP]] + // CHECK: %[[MIN:.+]] = arith.constant -32768 + // CHECK: %[[MAX:.+]] = arith.constant 32767 + // CHECK: %[[CMP_MIN:.+]] = arith.cmpi slt, %[[OUT]], %[[MIN]] + // CHECK: %[[CLMP_MIN:.+]] = select %[[CMP_MIN]], %[[MIN]], %[[OUT]] + // CHECK: %[[CMP_MAX:.+]] = arith.cmpi slt, %[[MAX]], %[[OUT]] + // CHECK: %[[CLMP_MAX:.+]] = select %[[CMP_MAX]], %[[MAX]], %[[CLMP_MIN]] + // CHECK: %[[TRUNC:.+]] = arith.trunci %[[CLMP_MAX]] + // CHECK: linalg.yield %[[TRUNC]] + %0 = "tosa.avg_pool2d"(%arg0) {kernel = [4, 4], pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, output_zp = -128 : i32}, stride = [4, 4]} : (tensor<1x128x128x2xi16>) -> tensor<1x32x32x2xi16> + return +} + +// ----- + +// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d3)> +// CHECK: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> + +// CHECK-LABEL: @conv2d_f32 +func @conv2d_f32(%input: tensor<1x49x42x27xf32>, %weights: tensor<28x3x3x27xf32>, %bias: tensor<28xf32>) -> () { + // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 2, 3, 0]> + // CHECK: %[[W:.+]] = "tosa.transpose"(%arg1, %[[PERM]]) + // CHECK: %[[M_IN:.+]] = linalg.init_tensor [1, 45, 40, 28] + // CHECK: %[[CST:.+]] = arith.constant 0 + // CHECK: %[[FILL:.+]] = linalg.fill + // CHECK: %[[B_IN:.+]] = linalg.init_tensor [1, 45, 40, 28] + // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x45x40x28xf32>) + // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) outs(%[[B_IN]] : tensor<1x45x40x28xf32>) + // CHECK: arith.addf + // CHECK: linalg.yield + %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [0, 0, 0, 0], stride = [1, 1], dilation = [2, 1]} : (tensor<1x49x42x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>) + return +} + +// ----- + +// CHECK-LABEL: @conv2d_padded_f32 +func @conv2d_padded_f32(%input: tensor<1x47x40x28xf32>, %weights: tensor<28x3x3x28xf32>, %bias: tensor<28xf32>) -> () { + // CHECK: %[[C0:.+]] = arith.constant 0 + // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: linalg.yield %[[C0]] + // CHECK: linalg.conv_2d_nhwc_hwcf + %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [1, 1, 1, 1], stride = [1, 1], dilation = [2, 1]} : (tensor<1x47x40x28xf32>, tensor<28x3x3x28xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>) + return +} + +// ----- + +// CHECK-LABEL: @conv2d_quant +func @conv2d_quant(%arg0 : tensor<1x12x12x1xi8>, %arg1 : tensor<1024x3x3x1xi8>, %arg2 : tensor<1024xi32>) -> () { + // CHECK: %[[C22:.+]] = arith.constant -22 + // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: linalg.yield %[[C22]] + // CHECK: linalg.conv_2d_nhwc_hwcf_q + %0 = "tosa.conv2d"(%arg0, %arg1, %arg2) {dilation = [1, 1], pad = [1, 1, 1, 1], quantization_info = {input_zp = -22 : i32, weight_zp = 42 : i32}, stride = [1, 1]} : (tensor<1x12x12x1xi8>, tensor<1024x3x3x1xi8>, tensor<1024xi32>) -> tensor<1x12x12x1024xi32> + return +} + +// ----- + +// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> + +// CHECK-LABEL: @depthwise_conv +func @depthwise_conv(%arg0 : tensor<1x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () { + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11] + // CHECK: [[CST0:%.+]] = arith.constant 0 + // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) + // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33] + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>) + // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 5, 5, 33]} + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) { + // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors + // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 + // CHECK: linalg.yield [[ADD]] : f32 + // CHECK: } -> tensor<1x5x5x33xf32> + %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [1, 1], dilation = [1, 1] } : (tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>) -> (tensor<1x5x5x33xf32>) + return +} + +// ----- + +// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> + +// CHECK-LABEL: @depthwise_conv_strides +func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () { + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11] + // CHECK: [[CST0:%.+]] = arith.constant 0 + // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) + // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33] + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>) + // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 5, 5, 33]} + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) { + // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors + // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 + // CHECK: linalg.yield [[ADD]] : f32 + // CHECK: } -> tensor<1x5x5x33xf32> + %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [2, 2], dilation = [1, 1] } : (tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>) -> (tensor<1x5x5x33xf32>) + return +} + +// ----- + +// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> + +// CHECK-LABEL: @depthwise_conv_quant +func @depthwise_conv_quant(%arg0 : tensor<1x12x12x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () { + // CHECK: [[PADV:%.+]] = arith.constant -128 + // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: linalg.yield [[PADV]] + + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 12, 12, 4, 128] + // CHECK: [[CST0:%.+]] = arith.constant 0 + // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) + // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 12, 12, 512] + // CHECK: [[C128:%.+]] = arith.constant -128 + // CHECK: [[C42:%.+]] = arith.constant 42 + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>) + // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 12, 12, 512]} + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) outs([[OUT]] : tensor<1x12x12x512xi32>) { + // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32): // no predecessors + // CHECK: [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32 + // CHECK: linalg.yield [[ADD]] : i32 + // CHECK: } -> tensor<1x12x12x512xi32> + %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [1, 1, 1, 1], quantization_info = {input_zp = -128 : i32, weight_zp = 42 : i32}, stride = [1, 1], dilation = [1, 1] } : (tensor<1x12x12x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>) -> tensor<1x12x12x512xi32> + return +} + +// ----- + +// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> + +// CHECK-LABEL: @depthwise_conv_quant_dilations +func @depthwise_conv_quant_dilations(%arg0 : tensor<1x14x14x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () { + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 10, 10, 4, 128] + // CHECK: [[CST0:%.+]] = arith.constant 0 + // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) + // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 10, 10, 512] + // CHECK: [[C128:%.+]] = arith.constant -128 + // CHECK: [[C42:%.+]] = arith.constant 42 + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>) + // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 10, 10, 512]} + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) outs([[OUT]] : tensor<1x10x10x512xi32>) { + // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32): // no predecessors + // CHECK: [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32 + // CHECK: linalg.yield [[ADD]] : i32 + // CHECK: } -> tensor<1x10x10x512xi32> + %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, weight_zp = 42 : i32}, stride = [1, 1], dilation = [2, 2] } : (tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>) -> tensor<1x10x10x512xi32> + return +} diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir index f8dc2e0bbd08..e68e76c67ef9 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -1064,154 +1064,6 @@ func @tile(%arg0 : tensor<2x3xi8>) -> () { // ----- - -// CHECK-LABEL: @matmul -func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) { - // CHECK: [[C0:%.+]] = arith.constant 0 - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6] - // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32> - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> - %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) - return %0 : tensor<1x5x6xf32> -} - -// ----- - - -// CHECK-LABEL: @matmul_quantized -func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) { - // CHECK: [[C0:%.+]] = arith.constant 0 - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6] - // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : i32, tensor<1x5x6xi32> -> tensor<1x5x6xi32> - // CHECK: [[ONE:%.+]] = arith.constant 1 - // CHECK: [[TWO:%.+]] = arith.constant 2 - // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32> - %0 = "tosa.matmul"(%arg0, %arg1) {quantization_info = {a_zp = 1 : i32, b_zp = 2 : i32}} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) - return %0 : tensor<1x5x6xi32> -} - -// ----- - -// CHECK-LABEL: @matmul_dyn_batch -func @matmul_dyn_batch(%arg0: tensor, %arg1: tensor) -> (tensor) { - // CHECK: %[[C0:.+]] = arith.constant 0 - // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]] - // CHECK: %[[C0_0:.+]] = arith.constant 0 - // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM]], 5, 6] - // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0_0]], %[[INIT]]) : f32, tensor -> tensor - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor, tensor) outs(%[[FILLED]] : tensor) -> tensor - %0 = "tosa.matmul"(%arg0, %arg1) : (tensor, tensor) -> (tensor) - return %0 : tensor -} - -// ----- - -// CHECK-LABEL: @matmul_dyn_independent_dim -func @matmul_dyn_independent_dim(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) { - // CHECK: %[[C2:.+]] = arith.constant 2 - // CHECK: %[[DIM:.+]] = tensor.dim %arg1, %[[C2]] - // CHECK: %[[C0:.+]] = arith.constant 0 - // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, %[[DIM]]] - // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0]], %[[INIT]]) : f32, tensor<1x5x?xf32> -> tensor<1x5x?xf32> - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32> - %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) - return %0 : tensor<1x5x?xf32> -} - -// ----- - -// CHECK-LABEL: @matmul_dyn_independent_dim -func @matmul_dyn_independent_dim(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) { - // CHECK: %[[C0:.+]] = arith.constant 0 - // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, 6] - // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0]], %[[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32> - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> - %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x?xf32>, tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) - return %0 : tensor<1x5x6xf32> -} - -// ----- - -// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1, d0)> -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)> -// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d1)> - -// CHECK-LABEL: @fully_connected -func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<5x6xf32>) { - // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6] - // CHECK: [[ZERO:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[ZERO]], [[INITT]]) - // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]> - // CHECK: [[INITT:%.+]] = linalg.init_tensor [3, 6] - // CHECK: [[TRANSPOSE:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg1 : tensor<6x3xf32>) outs([[INITT]] : tensor<3x6xf32>) { - // CHECK: ^bb0([[IN:%.+]]: f32, [[UNUSED:%.+]]: f32): - // CHECK: linalg.yield [[IN]] : f32 - // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6] - // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32> - // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) outs([[INITB]] : tensor<5x6xf32>) { - // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): - // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 - // CHECK: linalg.yield [[ADD]] : f32 - - %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor<5x3xf32>, tensor<6x3xf32>, tensor<6xf32>) -> (tensor<5x6xf32>) - return %0 : tensor<5x6xf32> -} - -// ----- - -// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1, d0)> -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)> -// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d1)> - -// CHECK-LABEL: @quantized_fully_connected -func @quantized_fully_connected(%arg0: tensor<5x3xi8>, %arg1: tensor<6x3xi8>, %arg2: tensor<6xi32>) -> (tensor<5x6xi32>) { - // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6] - // CHECK: [[ZERO:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[ZERO]], [[INITT]]) - // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]> - // CHECK: [[INITT:%.+]] = linalg.init_tensor [3, 6] - // CHECK: [[TRANSPOSE:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg1 : tensor<6x3xi8>) outs([[INITT]] : tensor<3x6xi8>) { - // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8): - // CHECK: linalg.yield [[IN]] : i8 - // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6] - // CHECK: [[ONE:%.+]] = arith.constant 1 - // CHECK: [[TWO:%.+]] = arith.constant 2 - // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32> - // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xi32>, tensor<5x6xi32>) outs([[INITB]] - // CHECK: ^bb0([[IN1:%.+]]: i32, [[IN2:%.+]]: i32, [[UNUSED:%.+]]: i32): - // CHECK: [[ADD:%.+]] = arith.addi - // CHECK: linalg.yield [[ADD]] : i32 - %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) {quantization_info = {input_zp = 1:i32, weight_zp = 2:i32}} : (tensor<5x3xi8>, tensor<6x3xi8>, tensor<6xi32>) -> (tensor<5x6xi32>) - return %0 : tensor<5x6xi32> -} - -// ----- - -// CHECK-LABEL: @fully_connected_dyn -func @fully_connected_dyn(%arg0: tensor, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor) { - // CHECK: %[[C0:.+]] = arith.constant 0 - // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]] - // CHECK: %[[INITT:.+]] = linalg.init_tensor [%[[DIM]], 6] - // CHECK: %[[ZERO:.+]] = arith.constant 0 - // CHECK: %[[FILL:.+]] = linalg.fill(%[[ZERO]], %[[INITT]]) - // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 0]> - // CHECK: %[[INITT:.+]] = linalg.init_tensor [3, 6] - // CHECK: %[[TRANSPOSE:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg1 : tensor<6x3xf32>) outs(%[[INITT]] : tensor<3x6xf32>) { - // CHECK: ^bb0(%[[IN:.+]]: f32, %[[UNUSED:.+]]: f32): - // CHECK: linalg.yield %[[IN]] : f32 - // CHECK: %[[INITB:.+]] = linalg.init_tensor [%[[DIM]], 6] - // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor, tensor<3x6xf32>) outs(%[[FILL]] : tensor) -> tensor - // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor) outs(%[[INITB]] : tensor) { - // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): - // CHECK: %[[ADD:.+]] = arith.addf %arg3, %arg4 : f32 - // CHECK: linalg.yield %[[ADD]] : f32 - - %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor, tensor<6x3xf32>, tensor<6xf32>) -> (tensor) - return %0 : tensor -} - -// ----- - func @pad_float(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) { %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32> // TODO: Output contains multiple "arith.constant 1 : index". @@ -1395,318 +1247,6 @@ func @table16(%arg0: tensor<6xi16>, %arg1: tensor<513xi16>) -> () { // ----- -// CHECK-LABEL: @max_pool -func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () { - // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 - // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 32, 62] - // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[CONST]], [[INIT]]) - // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3] - // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x32x62xf32>) - %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x32x62xf32>) - return -} - -// CHECK-LABEL: @max_pool_padded -func @max_pool_padded(%arg0: tensor<1x6x34x62xf32>) -> () { - // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 : f32 - // CHECK-DAG: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0] - // CHECK-DAG: linalg.yield [[CONST]] - // CHECK-DAG: [[INITVAL:%.+]] = arith.constant -3.40282347E+38 : f32 - // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 33, 62] - // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[INITVAL]], [[INIT]]) - // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3] - // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x33x62xf32>) - %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 1], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x33x62xf32>) - return -} - -// CHECK-LABEL: @max_pool_i8 -func @max_pool_i8(%arg0: tensor<1x6x34x62xi8>) -> () { - // CHECK: arith.constant -128 - // CHECK: linalg.pooling_nhwc_max - %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi8>) -> (tensor<1x4x32x62xi8>) - return -} - -// CHECK-LABEL: @max_pool_i16 -func @max_pool_i16(%arg0: tensor<1x6x34x62xi16>) -> () { - // CHECK: arith.constant -32768 - // CHECK: linalg.pooling_nhwc_max - %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi16>) -> (tensor<1x4x32x62xi16>) - return -} - -// CHECK-LABEL: @max_pool_i32 -func @max_pool_i32(%arg0: tensor<1x6x34x62xi32>) -> () { - // CHECK: arith.constant -2147483648 - // CHECK: linalg.pooling_nhwc_max - %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi32>) -> (tensor<1x4x32x62xi32>) - return -} -// ----- - -// CHECK-LABEL: @avg_pool -func @avg_pool(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) { - // Initial piece computes the sum of the pooling region, with appropriate padding. - // CHECK: [[CONST:%.+]] = arith.constant 0 - // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] - // CHECK: [[CONST:%.+]] = arith.constant 0 - // CHECK: [[POOLINIT:%.+]] = linalg.init_tensor [1, 5, 33, 62] - // CHECK: [[FILL:%.+]] = linalg.fill([[CONST]], [[POOLINIT]]) - // CHECK: [[KERNEL:%.+]] = linalg.init_tensor [4, 4] - // CHECK: [[POOL:%.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>) outs([[FILL]] : tensor<1x5x33x62xf32>) - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 33, 62] - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins([[POOL]] : tensor<1x5x33x62xf32>) outs([[INIT]] : tensor<1x5x33x62xf32>) - // CHECK: [[ZERO:%.0]] = arith.constant 0 - // CHECK: [[ONE:%.+]] = arith.constant 1 - // CHECK: [[HEIGHT:%.+]] = arith.constant 4 - // CHECK: [[WIDTH:%.+]] = arith.constant 32 - // CHECK: [[IDX1:%.+]] = linalg.index 1 - // CHECK: [[IDX2:%.+]] = linalg.index 2 - - // The large block below computes what portion of the kernel is within non-padded input. - // CHECK: [[NY:%.+]] = arith.subi [[HEIGHT]], [[IDX1]] - // CHECK: [[NX:%.+]] = arith.subi [[WIDTH]], [[IDX2]] - // CHECK: [[KH:%.+]] = arith.constant 4 - // CHECK: [[PAD0:%.+]] = arith.constant 1 - // CHECK: [[SUBP0:%.+]] = arith.subi [[IDX1]], [[PAD0]] - // CHECK: [[P0CMP:%.+]] = arith.cmpi slt, [[SUBP0]], [[ZERO]] - // CHECK: [[SELP0:%.+]] = select [[P0CMP]], [[SUBP0]], [[ZERO]] - // CHECK: [[ADDP0:%.+]] = arith.addi [[KH]], [[SELP0]] - // CHECK: [[PAD1:%.+]] = arith.constant 1 - // CHECK: [[SUBP1:%.+]] = arith.subi [[NY]], [[PAD1]] - // CHECK: [[P1CMP:%.+]] = arith.cmpi slt, [[SUBP1]], [[ZERO]] - // CHECK: [[SELP1:%.+]] = select [[P1CMP]], [[SUBP1]], [[ZERO]] - // CHECK: [[ADDP1:%.+]] = arith.addi [[ADDP0]], [[SELP1]] - // CHECK: [[YCMP:%.+]] = arith.cmpi slt, [[ADDP1]], [[ONE]] - // CHECK: [[YSEL:%.+]] = select [[YCMP]], [[ONE]], [[ADDP1]] - // CHECK: [[KW:%.+]] = arith.constant 4 : index - // CHECK: [[PAD2:%.+]] = arith.constant 1 : index - // CHECK: [[SUBP2:%.+]] = arith.subi [[IDX2]], [[PAD2]] - // CHECK: [[P2CMP:%.+]] = arith.cmpi slt, [[SUBP2]], [[ZERO]] - // CHECK: [[SELP2:%.+]] = select [[P2CMP]], [[SUBP2]], [[ZERO]] - // CHECK: [[ADDP2:%.+]] = arith.addi [[KW]], [[SELP2]] - // CHECK: [[PAD3:%.+]] = arith.constant 1 : index - // CHECK: [[SUBP3:%.+]] = arith.subi [[NX]], [[PAD3]] - // CHECK: [[P3CMP:%.+]] = arith.cmpi slt, [[SUBP3]], [[ZERO]] - // CHECK: [[SELP3:%.+]] = select [[P3CMP]], [[SUBP3]], [[ZERO]] - // CHECK: [[ADDP3:%.+]] = arith.addi [[ADDP2]], [[SELP3]] - // CHECK: [[XCMP:%.+]] = arith.cmpi slt, [[ADDP3]], [[ONE]] - // CHECK: [[XSEL:%.+]] = select [[XCMP]], [[ONE]], [[ADDP3]] - - // Given the valid coverage of the pooling region, normalize the summation. - // CHECK: [[C:%.+]] = arith.muli [[YSEL]], [[XSEL]] - // CHECK: [[CI:%.+]] = arith.index_cast [[C]] - // CHECK: [[CF:%.+]] = arith.sitofp [[CI]] - // CHECK: [[RESULT:%.+]] = arith.divf %arg1, [[CF]] - // CHECK: linalg.yield [[RESULT]] - %0 = "tosa.avg_pool2d"(%arg0) {pad = [1, 1, 1, 1], kernel = [4, 4], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) - return %0 : tensor<1x5x33x62xf32> -} - -// ----- - -// CHECK-LABEL: @avg_pool_i8 -func @avg_pool_i8(%arg0 : tensor<1x128x128x2xi8>) -> () { - - // CHECK: linalg.pooling_nhwc_sum - // CHECK: linalg.generic - - // CHECK: %[[INZP:.+]] = arith.constant -128 - // CHECK: %[[INZP_OFF:.+]] = arith.muli %{{.+}}, %[[INZP]] - // CHECK: %[[OFFSETED:.+]] = arith.subi %arg1, %[[INZP_OFF]] - // CHECK: %[[NUMERATOR:.+]] = arith.constant 1073741825 - // CHECK: %[[MULTIPLIER:.+]] = arith.divui %[[NUMERATOR]], %{{.+}} - // CHECK: %[[SHIFT:.+]] = arith.constant 30 - // CHECK: %[[SCALE:.+]] = "tosa.apply_scale"(%{{.+}}, %[[MULTIPLIER]], %[[SHIFT]]) {double_round = false} - // CHECK: %[[OUTZP:.+]] = arith.constant -128 - // CHECK: %[[OUT:.+]] = arith.addi %[[SCALE]], %[[OUTZP]] - // CHECK: %[[MIN:.+]] = arith.constant -128 - // CHECK: %[[MAX:.+]] = arith.constant 127 - // CHECK: %[[CMP_MIN:.+]] = arith.cmpi slt, %[[OUT]], %[[MIN]] - // CHECK: %[[CLMP_MIN:.+]] = select %[[CMP_MIN]], %[[MIN]], %[[OUT]] - // CHECK: %[[CMP_MAX:.+]] = arith.cmpi slt, %[[MAX]], %[[OUT]] - // CHECK: %[[CLMP_MAX:.+]] = select %[[CMP_MAX]], %[[MAX]], %[[CLMP_MIN]] - // CHECK: %[[TRUNC:.+]] = arith.trunci %[[CLMP_MAX]] - // CHECK: linalg.yield %[[TRUNC]] - %0 = "tosa.avg_pool2d"(%arg0) {kernel = [4, 4], pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, output_zp = -128 : i32}, stride = [4, 4]} : (tensor<1x128x128x2xi8>) -> tensor<1x32x32x2xi8> - return -} - -// ----- - -// CHECK-LABEL: @avg_pool_i16 -func @avg_pool_i16(%arg0 : tensor<1x128x128x2xi16>) -> () { - - // CHECK: linalg.pooling_nhwc_sum - // CHECK: linalg.generic - - // CHECK: %[[INZP:.+]] = arith.constant -128 - // CHECK: %[[INZP_OFF:.+]] = arith.muli %{{.+}}, %[[INZP]] - // CHECK: %[[OFFSETED:.+]] = arith.subi %arg1, %[[INZP_OFF]] - // CHECK: %[[NUMERATOR:.+]] = arith.constant 1073741825 - // CHECK: %[[MULTIPLIER:.+]] = arith.divui %[[NUMERATOR]], %{{.+}} - // CHECK: %[[SHIFT:.+]] = arith.constant 30 - // CHECK: %[[SCALE:.+]] = "tosa.apply_scale"(%{{.+}}, %[[MULTIPLIER]], %[[SHIFT]]) {double_round = false} - // CHECK: %[[OUTZP:.+]] = arith.constant -128 - // CHECK: %[[OUT:.+]] = arith.addi %[[SCALE]], %[[OUTZP]] - // CHECK: %[[MIN:.+]] = arith.constant -32768 - // CHECK: %[[MAX:.+]] = arith.constant 32767 - // CHECK: %[[CMP_MIN:.+]] = arith.cmpi slt, %[[OUT]], %[[MIN]] - // CHECK: %[[CLMP_MIN:.+]] = select %[[CMP_MIN]], %[[MIN]], %[[OUT]] - // CHECK: %[[CMP_MAX:.+]] = arith.cmpi slt, %[[MAX]], %[[OUT]] - // CHECK: %[[CLMP_MAX:.+]] = select %[[CMP_MAX]], %[[MAX]], %[[CLMP_MIN]] - // CHECK: %[[TRUNC:.+]] = arith.trunci %[[CLMP_MAX]] - // CHECK: linalg.yield %[[TRUNC]] - %0 = "tosa.avg_pool2d"(%arg0) {kernel = [4, 4], pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, output_zp = -128 : i32}, stride = [4, 4]} : (tensor<1x128x128x2xi16>) -> tensor<1x32x32x2xi16> - return -} - -// ----- - -// CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)> -// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> -// CHECK: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d3)> - -// CHECK-LABEL: @conv2d_f32 -func @conv2d_f32(%input: tensor<1x49x42x27xf32>, %weights: tensor<28x3x3x27xf32>, %bias: tensor<28xf32>) -> () { - // CHECK: %[[W_IN:.+]] = linalg.init_tensor [3, 3, 27, 28] - // CHECK: %[[W:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg1 : tensor<28x3x3x27xf32>) outs(%[[W_IN]] : tensor<3x3x27x28xf32>) - // CHECK: linalg.yield %arg3 : f32 - // CHECK: %[[M_IN:.+]] = linalg.init_tensor [1, 45, 40, 28] - // CHECK: %[[CST:.+]] = arith.constant 0 - // CHECK: %[[FILL:.+]] = linalg.fill - // CHECK: %[[B_IN:.+]] = linalg.init_tensor [1, 45, 40, 28] - // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x45x40x28xf32>) - // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) outs(%[[B_IN]] : tensor<1x45x40x28xf32>) - // CHECK: arith.addf - // CHECK: linalg.yield %7 : f32 - %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [0, 0, 0, 0], stride = [1, 1], dilation = [2, 1]} : (tensor<1x49x42x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>) - return -} - -// ----- - -// CHECK-LABEL: @conv2d_padded_f32 -func @conv2d_padded_f32(%input: tensor<1x47x40x28xf32>, %weights: tensor<28x3x3x28xf32>, %bias: tensor<28xf32>) -> () { - // CHECK: %[[C0:.+]] = arith.constant 0 - // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] - // CHECK: linalg.yield %[[C0]] - // CHECK: linalg.conv_2d_nhwc_hwcf - %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [1, 1, 1, 1], stride = [1, 1], dilation = [2, 1]} : (tensor<1x47x40x28xf32>, tensor<28x3x3x28xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>) - return -} - -// ----- - -// CHECK-LABEL: @conv2d_quant -func @conv2d_quant(%arg0 : tensor<1x12x12x1xi8>, %arg1 : tensor<1024x3x3x1xi8>, %arg2 : tensor<1024xi32>) -> () { - // CHECK: %[[C22:.+]] = arith.constant -22 - // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] - // CHECK: linalg.yield %[[C22]] - // CHECK: linalg.conv_2d_nhwc_hwcf_q - %0 = "tosa.conv2d"(%arg0, %arg1, %arg2) {dilation = [1, 1], pad = [1, 1, 1, 1], quantization_info = {input_zp = -22 : i32, weight_zp = 42 : i32}, stride = [1, 1]} : (tensor<1x12x12x1xi8>, tensor<1024x3x3x1xi8>, tensor<1024xi32>) -> tensor<1x12x12x1024xi32> - return -} - -// ----- - -// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> - -// CHECK-LABEL: @depthwise_conv -func @depthwise_conv(%arg0 : tensor<1x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () { - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11] - // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) - // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33] - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>) - // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) { - // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors - // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 - // CHECK: linalg.yield [[ADD]] : f32 - // CHECK: } -> tensor<1x5x5x33xf32> - %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [1, 1], dilation = [1, 1] } : (tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>) -> (tensor<1x5x5x33xf32>) - return -} - -// ----- - -// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> - -// CHECK-LABEL: @depthwise_conv_strides -func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () { - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11] - // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) - // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33] - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>) - // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) { - // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors - // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 - // CHECK: linalg.yield [[ADD]] : f32 - // CHECK: } -> tensor<1x5x5x33xf32> - %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [2, 2], dilation = [1, 1] } : (tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>) -> (tensor<1x5x5x33xf32>) - return -} - -// ----- - -// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> - -// CHECK-LABEL: @depthwise_conv_quant -func @depthwise_conv_quant(%arg0 : tensor<1x12x12x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () { - // CHECK: [[PADV:%.+]] = arith.constant -128 - // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] - // CHECK: linalg.yield [[PADV]] - - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 12, 12, 4, 128] - // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) - // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 12, 12, 512] - // CHECK: [[C128:%.+]] = arith.constant -128 - // CHECK: [[C42:%.+]] = arith.constant 42 - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>) - // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) outs([[OUT]] : tensor<1x12x12x512xi32>) { - // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32): // no predecessors - // CHECK: [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32 - // CHECK: linalg.yield [[ADD]] : i32 - // CHECK: } -> tensor<1x12x12x512xi32> - %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [1, 1, 1, 1], quantization_info = {input_zp = -128 : i32, weight_zp = 42 : i32}, stride = [1, 1], dilation = [1, 1] } : (tensor<1x12x12x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>) -> tensor<1x12x12x512xi32> - return -} - -// ----- - -// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> - -// CHECK-LABEL: @depthwise_conv_quant_dilations -func @depthwise_conv_quant_dilations(%arg0 : tensor<1x14x14x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () { - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 10, 10, 4, 128] - // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) - // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 10, 10, 512] - // CHECK: [[C128:%.+]] = arith.constant -128 - // CHECK: [[C42:%.+]] = arith.constant 42 - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>) - // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) outs([[OUT]] : tensor<1x10x10x512xi32>) { - // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32): // no predecessors - // CHECK: [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32 - // CHECK: linalg.yield [[ADD]] : i32 - // CHECK: } -> tensor<1x10x10x512xi32> - %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, weight_zp = 42 : i32}, stride = [1, 1], dilation = [2, 2] } : (tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>) -> tensor<1x10x10x512xi32> - return -} - -// ----- - // CHECK-LABEL: @resize_nearest func @resize_nearest(%input: tensor<1x2x2x1xf32>) -> () { // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 4, 4, 1] From b05ed0d2cb8d19edf0debf8576fec0e57f66d0a6 Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Tue, 28 Dec 2021 11:23:50 -0800 Subject: [PATCH 140/992] [Hexagon] Add testcase for arch flags --- llvm/test/MC/Hexagon/arch-support.s | 30 +++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 llvm/test/MC/Hexagon/arch-support.s diff --git a/llvm/test/MC/Hexagon/arch-support.s b/llvm/test/MC/Hexagon/arch-support.s new file mode 100644 index 000000000000..1782ebb1ecc3 --- /dev/null +++ b/llvm/test/MC/Hexagon/arch-support.s @@ -0,0 +1,30 @@ +# RUN: llvm-mc -arch=hexagon -mv5 -filetype=obj %s | llvm-readelf -h - | FileCheck --check-prefix=CHECK-V5 %s +# RUN: llvm-mc -arch=hexagon -mv55 -filetype=obj %s | llvm-readelf -h - | FileCheck --check-prefix=CHECK-V55 %s +# RUN: llvm-mc -arch=hexagon -mv60 -filetype=obj %s | llvm-readelf -h - | FileCheck --check-prefix=CHECK-V60 %s +# RUN: llvm-mc -arch=hexagon -mv62 -filetype=obj %s | llvm-readelf -h - | FileCheck --check-prefix=CHECK-V62 %s +# RUN: llvm-mc -arch=hexagon -mv65 -filetype=obj %s | llvm-readelf -h - | FileCheck --check-prefix=CHECK-V65 %s +# RUN: llvm-mc -arch=hexagon -mv67 -filetype=obj %s | llvm-readelf -h - | FileCheck --check-prefix=CHECK-V67 %s +# RUN: llvm-mc -arch=hexagon -mv68 -filetype=obj %s | llvm-readelf -h - | FileCheck --check-prefix=CHECK-V68 %s +# RUN: llvm-mc -arch=hexagon -mv69 -filetype=obj %s | llvm-readelf -h - | FileCheck --check-prefix=CHECK-V69 %s + +# RUN: llvm-mc -arch=hexagon -mv5 -filetype=obj %s | llvm-objdump --disassemble - | FileCheck --check-prefix=CHECK-OBJDUMP %s +# RUN: llvm-mc -arch=hexagon -mv55 -filetype=obj %s | llvm-objdump --disassemble - | FileCheck --check-prefix=CHECK-OBJDUMP %s +# RUN: llvm-mc -arch=hexagon -mv60 -filetype=obj %s | llvm-objdump --disassemble - | FileCheck --check-prefix=CHECK-OBJDUMP %s +# RUN: llvm-mc -arch=hexagon -mv62 -filetype=obj %s | llvm-objdump --disassemble - | FileCheck --check-prefix=CHECK-OBJDUMP %s +# RUN: llvm-mc -arch=hexagon -mv65 -filetype=obj %s | llvm-objdump --disassemble - | FileCheck --check-prefix=CHECK-OBJDUMP %s +# RUN: llvm-mc -arch=hexagon -mv67 -filetype=obj %s | llvm-objdump --disassemble - | FileCheck --check-prefix=CHECK-OBJDUMP %s +# RUN: llvm-mc -arch=hexagon -mv68 -filetype=obj %s | llvm-objdump --disassemble - | FileCheck --check-prefix=CHECK-OBJDUMP %s +# RUN: llvm-mc -arch=hexagon -mv69 -filetype=obj %s | llvm-objdump --disassemble - | FileCheck --check-prefix=CHECK-OBJDUMP %s + .text +r1 = r1 + +# CHECK-V5: Flags:{{.*}}0x4 +# CHECK-V55: Flags:{{.*}}0x5 +# CHECK-V60: Flags:{{.*}}0x60 +# CHECK-V62: Flags:{{.*}}0x62 +# CHECK-V65: Flags:{{.*}}0x65 +# CHECK-V67: Flags:{{.*}}0x67 +# CHECK-V68: Flags:{{.*}}0x68 +# CHECK-V69: Flags:{{.*}}0x69 + +# CHECK-OBJDUMP: { r1 = r1 } From 7df136bcf258e35467afe1876bbd0930648c4cba Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 28 Dec 2021 11:36:07 -0800 Subject: [PATCH 141/992] [Hexagon] Delete unused declaration of LowerHvxMul, NFC --- llvm/lib/Target/Hexagon/HexagonISelLowering.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index d518c036f125..fd6aa06ed843 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -468,7 +468,6 @@ class HexagonTargetLowering : public TargetLowering { SDValue LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxMul(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const; From c5e8eb9783a680ba5f552d84c17d1b9122f5fe36 Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Tue, 28 Dec 2021 11:59:54 +0000 Subject: [PATCH 142/992] Documentation for the process of adding new targets Plenty of new targets nowadays and I found myself repeating the same thing over and over, so this is more or less what we said over the last few years, but condensed in an ordered fashion and easy to digest. This does not change any of the recommendations, only documents what we have been saying for years. --- llvm/docs/DeveloperPolicy.rst | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/llvm/docs/DeveloperPolicy.rst b/llvm/docs/DeveloperPolicy.rst index b88e45b1ac31..e57f1044118c 100644 --- a/llvm/docs/DeveloperPolicy.rst +++ b/llvm/docs/DeveloperPolicy.rst @@ -793,10 +793,40 @@ To **continue** as a supported and official target: nuisance to other targets and be considered a candidate for deprecation and ultimately removed. -In essences, these rules are necessary for targets to gain and retain their +In essence, these rules are necessary for targets to gain and retain their status, but also markers to define bit-rot, and will be used to clean up the tree from unmaintained targets. +Those wishing to add a new target to LLVM must follow the procedure below: + +1. Read this section and make sure your target follows all requirements. For + minor issues, your community will be responsible for making all necessary + adjustments soon after the initial merge. +2. Send a request for comment (RFC) to the llvm-dev@ mailing list, describing + your target and how it follows all the requirements and what work has been + done and will need to be done to accommodate the official target requirements. + Make sure to expose any and all controversial issues, changes needed in the + base code, table gen, etc. +3. Once the response is positive, the LLVM community can start reviewing the + actual patches (but they can be prepared before, to support the RFC). Create + a sequence of N patches, numbered '1/N' to 'N/N' (make sure N is an actual + number, not the letter 'N'), that completes the basic structure of the target. +4. The initial patch should add documentation, code owners and triple support in + clang and LLVM. The following patches add TableGen infrastructure to describe + the target and lower instructions to assembly. The final patch must show that + the target can lower correctly with extensive LIT tests (IR to MIR, MIR to + ASM, etc). +5. Some patches may be approved before others, but only after *all* patches are + approved that the whole set can be merged in one go. This is to guarantee + that all changes are good as a single block. +6. After the initial merge, the target community can stop numbering patches and + start working asynchronously on the target to complete support. They should + still seek review from those who helped them in the initial phase, to make + sure the progress is still consistent. +7. Once all official requirements have been fulfilled (as above), the code owner + should request the target to be enabled by default by sending another RFC to + the llvm-dev@ mailing list. + Adding an Established Project To the LLVM Monorepo -------------------------------------------------- From 357c8031ff29299ad4b823074ff7fdda23654cc8 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 28 Dec 2021 16:10:49 -0500 Subject: [PATCH 143/992] [OpenMP][Plugin] Minor adjustments to ResourcePool This patch makes some minor adjustments to `ResourcePool`: - Don't initialize the resources if `Size` is 0 which can avoid assertion. - Add a new interface function `clear` to release all hold resources. - If initial size is 0, resize to 1 when the first request is encountered. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D116340 --- openmp/libomptarget/plugins/cuda/src/rtl.cpp | 31 +++++++++++++------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp index ed26f2f7731f..1afee7ce3a02 100644 --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -262,13 +262,11 @@ template class ResourcePoolTy { public: ResourcePoolTy(AllocatorTy &&A, size_t Size = 0) noexcept : Allocator(std::move(A)) { - (void)resize(Size); + if (Size) + (void)resize(Size); } - ~ResourcePoolTy() noexcept { - for (auto &R : Resources) - (void)Allocator.destroy(R); - } + ~ResourcePoolTy() noexcept { clear(); } /// Get a resource from pool. `Next` always points to the next available /// resource. That means, `[0, next-1]` have been assigned, and `[id,]` are @@ -283,8 +281,13 @@ template class ResourcePoolTy { /// Next int acquire(T &R) noexcept { std::lock_guard LG(Mutex); - if (Next == Resources.size() && !resize(Resources.size() * 2)) - return OFFLOAD_FAIL; + if (Next == Resources.size()) { + auto NewSize = Resources.size() ? Resources.size() * 2 : 1; + if (!resize(NewSize)) + return OFFLOAD_FAIL; + } + + assert(Next < Resources.size()); R = Resources[Next++]; @@ -307,6 +310,14 @@ template class ResourcePoolTy { std::lock_guard LG(Mutex); Resources[--Next] = R; } + + /// Released all stored resources and clear the pool. + /// Note: This function is not thread safe. Be sure to guard it if necessary. + void clear() noexcept { + for (auto &R : Resources) + (void)Allocator.destroy(R); + Resources.clear(); + } }; class DeviceRTLTy { @@ -328,7 +339,6 @@ class DeviceRTLTy { static constexpr const int DefaultNumThreads = 128; using StreamPoolTy = ResourcePoolTy; - using StreamAllocatorTy = AllocatorTy; std::vector> StreamPool; std::vector DeviceData; @@ -563,7 +573,7 @@ class DeviceRTLTy { checkResult(cuModuleUnload(M), "Error returned from cuModuleUnload\n"); for (auto &S : StreamPool) - S = nullptr; + S.reset(); for (DeviceDataTy &D : DeviceData) { // Destroy context @@ -631,7 +641,8 @@ class DeviceRTLTy { // Initialize stream pool if (!StreamPool[DeviceId]) StreamPool[DeviceId] = std::make_unique( - StreamAllocatorTy(DeviceData[DeviceId].Context), NumInitialStreams); + AllocatorTy(DeviceData[DeviceId].Context), + NumInitialStreams); // Query attributes to determine number of threads/block and blocks/grid. int MaxGridDimX; From cd284b7ac0615afc6e0f1a30da2777e361de27a3 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 28 Dec 2021 12:48:30 -0800 Subject: [PATCH 144/992] [clang][ARM] re-use arm::isHardTPSupported for hardware TLS check This conditional check for -mstack-protector-guard=tls got out of sync with the conditional check for -mtp=cp15 by me in D114116, because I forgot about the similar check added in D113026. Re-use the code in arm::isHardTPSupported so that these aren't out of sync. Interestingly, our CI reported this when testing -mstack-protector-guard=tls; it was only reproducible with Debian's LLVM and not upstream LLVM due to this out of tree patch: https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/blob/snapshot/debian/patches/930008-arm.diff Fixes: https://github.com/ClangBuiltLinux/linux/issues/1502 Reviewed By: ardb Differential Revision: https://reviews.llvm.org/D116233 --- clang/lib/Driver/ToolChains/Clang.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 65347a38490e..2c3439215093 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3217,9 +3217,7 @@ static void RenderSSPOptions(const Driver &D, const ToolChain &TC, return; } // Check whether the target subarch supports the hardware TLS register - if (arm::getARMSubArchVersionNumber(EffectiveTriple) < 7 && - llvm::ARM::parseArch(EffectiveTriple.getArchName()) != - llvm::ARM::ArchKind::ARMV6T2) { + if (!arm::isHardTPSupported(EffectiveTriple)) { D.Diag(diag::err_target_unsupported_tp_hard) << EffectiveTriple.getArchName(); return; From 1c6b740d4b1bb8f74c8eb2fa38b21d684d32ca75 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 28 Dec 2021 14:05:40 -0800 Subject: [PATCH 145/992] [TargetLowering] Remove workaround for old behavior of getShiftAmountTy. NFC getShiftAmountTy used to directly return the shift amount type from the target which could be too small for large illegal types. For example, X86 always returns i8. The code here detected this and used i32 instead if it won't fit. This behavior was added to getShiftAmountTy in D112469 so we no longer need this workaround. --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e6b06ab93d6b..03163c896799 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6438,12 +6438,6 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, unsigned ShiftAmount = OuterBitSize - InnerBitSize; EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout()); - if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) { - // FIXME getShiftAmountTy does not always return a sensible result when VT - // is an illegal type, and so the type may be too small to fit the shift - // amount. Override it with i32. The shift will have to be legalized. - ShiftAmountTy = MVT::i32; - } SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy); if (!LH.getNode() && !RH.getNode() && From 18ffb5dc2503c81df53274d6a2e1945ac08c56d2 Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Tue, 28 Dec 2021 13:34:36 -0800 Subject: [PATCH 146/992] [InstrProf] Prevent duplicate functions in correlated data When using debug info for profile correlation, avoid adding duplicate functions in the synthetic Data section. Before this patch, n duplicate function entries in the Data section would cause counter values to be a factor of n larger. I built instrumented clang with and without debug info correlation and got these summaries. ``` # With Debug Info Correlate $ llvm-profdata show default.profdata Instrumentation level: IR entry_first = 0 Total functions: 182530 Maximum function count: 52034 Maximum internal block count: 5763 # Without $ llvm-profdata show default.profdata Instrumentation level: IR entry_first = 0 Total functions: 183212 Maximum function count: 52034 Maximum internal block count: 5766 ``` The slight difference in counts seem to be mostly from FileSystem and Map functions and the difference in the number of instrumented functions seems to come from missing debug info like destructors without source. Reviewed By: kyulee Differential Revision: https://reviews.llvm.org/D116051 --- .../Darwin/instrprof-debug-info-correlate.c | 31 +++---------------- .../instrprof-debug-info-correlate-bar.h | 7 +++++ .../instrprof-debug-info-correlate-foo.cpp | 7 +++++ .../instrprof-debug-info-correlate-main.cpp | 10 ++++++ .../Linux/instrprof-debug-info-correlate.c | 31 +++---------------- .../llvm/ProfileData/InstrProfCorrelator.h | 2 ++ llvm/lib/ProfileData/InstrProfCorrelator.cpp | 4 +++ 7 files changed, 40 insertions(+), 52 deletions(-) create mode 100644 compiler-rt/test/profile/Inputs/instrprof-debug-info-correlate-bar.h create mode 100644 compiler-rt/test/profile/Inputs/instrprof-debug-info-correlate-foo.cpp create mode 100644 compiler-rt/test/profile/Inputs/instrprof-debug-info-correlate-main.cpp diff --git a/compiler-rt/test/profile/Darwin/instrprof-debug-info-correlate.c b/compiler-rt/test/profile/Darwin/instrprof-debug-info-correlate.c index b35cfe85956a..837e1f428d1e 100644 --- a/compiler-rt/test/profile/Darwin/instrprof-debug-info-correlate.c +++ b/compiler-rt/test/profile/Darwin/instrprof-debug-info-correlate.c @@ -1,33 +1,12 @@ // REQUIRES: zlib // Value profiling is currently not supported in lightweight mode. -// RUN: %clang_pgogen -o %t.normal -mllvm --disable-vp=true %s -// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t.normal -// RUN: llvm-profdata merge -o %t.normal.profdata %t.profraw - -// RUN: %clang_pgogen -o %t -g -mllvm --debug-info-correlate -mllvm --disable-vp=true %s +// RUN: %clang_pgogen -o %t -g -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp // RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t // RUN: llvm-profdata merge -o %t.profdata --debug-info=%t.dSYM %t.proflite -// RUN: diff %t.normal.profdata %t.profdata - -int foo(int a) { - if (a % 2) - return 4 * a + 1; - return 0; -} - -int bar(int a) { - while (a > 100) - a /= 2; - return a; -} - -typedef int (*FP)(int); -FP Fps[3] = {foo, bar}; +// RUN: %clang_pgogen -o %t.normal -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp +// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t.normal +// RUN: llvm-profdata merge -o %t.normal.profdata %t.profraw -int main() { - for (int i = 0; i < 5; i++) - Fps[i % 2](i); - return 0; -} +// RUN: diff %t.normal.profdata %t.profdata diff --git a/compiler-rt/test/profile/Inputs/instrprof-debug-info-correlate-bar.h b/compiler-rt/test/profile/Inputs/instrprof-debug-info-correlate-bar.h new file mode 100644 index 000000000000..4ee9cdacc835 --- /dev/null +++ b/compiler-rt/test/profile/Inputs/instrprof-debug-info-correlate-bar.h @@ -0,0 +1,7 @@ +int foo(int); + +inline int bar(int a) { + while (a > 100) + a /= 2; + return a; +} diff --git a/compiler-rt/test/profile/Inputs/instrprof-debug-info-correlate-foo.cpp b/compiler-rt/test/profile/Inputs/instrprof-debug-info-correlate-foo.cpp new file mode 100644 index 000000000000..e9f8e7a5570e --- /dev/null +++ b/compiler-rt/test/profile/Inputs/instrprof-debug-info-correlate-foo.cpp @@ -0,0 +1,7 @@ +#include "instrprof-debug-info-correlate-bar.h" + +int foo(int a) { + if (a % 2) + return 4 * a + 1; + return bar(a); +} diff --git a/compiler-rt/test/profile/Inputs/instrprof-debug-info-correlate-main.cpp b/compiler-rt/test/profile/Inputs/instrprof-debug-info-correlate-main.cpp new file mode 100644 index 000000000000..13a8cf4f8d95 --- /dev/null +++ b/compiler-rt/test/profile/Inputs/instrprof-debug-info-correlate-main.cpp @@ -0,0 +1,10 @@ +#include "instrprof-debug-info-correlate-bar.h" + +typedef int (*FP)(int); +FP Fps[2] = {foo, bar}; + +int main() { + for (int i = 0; i < 5; i++) + Fps[i % 2](i); + return 0; +} diff --git a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c index df3a94837ed3..c78614f28b11 100644 --- a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c +++ b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c @@ -1,33 +1,12 @@ // REQUIRES: zlib // Value profiling is currently not supported in lightweight mode. -// RUN: %clang_pgogen -o %t.normal -mllvm --disable-vp=true %s -// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t.normal -// RUN: llvm-profdata merge -o %t.normal.profdata %t.profraw - -// RUN: %clang_pgogen -o %t -g -mllvm --debug-info-correlate -mllvm --disable-vp=true %s +// RUN: %clang_pgogen -o %t -g -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp // RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t // RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite -// RUN: diff %t.normal.profdata %t.profdata - -int foo(int a) { - if (a % 2) - return 4 * a + 1; - return 0; -} - -int bar(int a) { - while (a > 100) - a /= 2; - return a; -} - -typedef int (*FP)(int); -FP Fps[3] = {foo, bar}; +// RUN: %clang_pgogen -o %t.normal -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp +// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t.normal +// RUN: llvm-profdata merge -o %t.normal.profdata %t.profraw -int main() { - for (int i = 0; i < 5; i++) - Fps[i % 2](i); - return 0; -} +// RUN: diff %t.normal.profdata %t.profdata diff --git a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h index eae7b4e0322c..81ecbc2813ab 100644 --- a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h +++ b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h @@ -12,6 +12,7 @@ #ifndef LLVM_PROFILEDATA_INSTRPROFCORRELATOR_H #define LLVM_PROFILEDATA_INSTRPROFCORRELATOR_H +#include "llvm/ADT/DenseSet.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" @@ -110,6 +111,7 @@ class InstrProfCorrelatorImpl : public InstrProfCorrelator { std::unique_ptr Ctx) : InstrProfCorrelator(Kind, std::move(Ctx)){}; std::vector Names; + llvm::DenseSet CounterOffsets; // Byte-swap the value if necessary. template T maybeSwap(T Value) const { diff --git a/llvm/lib/ProfileData/InstrProfCorrelator.cpp b/llvm/lib/ProfileData/InstrProfCorrelator.cpp index f9c113027da2..8be2cbff3a20 100644 --- a/llvm/lib/ProfileData/InstrProfCorrelator.cpp +++ b/llvm/lib/ProfileData/InstrProfCorrelator.cpp @@ -129,6 +129,7 @@ Error InstrProfCorrelatorImpl::correlateProfileData() { correlateProfileDataImpl(); auto Result = collectPGOFuncNameStrings(Names, /*doCompression=*/true, CompressedNames); + CounterOffsets.clear(); Names.clear(); return Result; } @@ -139,6 +140,9 @@ void InstrProfCorrelatorImpl::addProbe(StringRef FunctionName, IntPtrT CounterOffset, IntPtrT FunctionPtr, uint32_t NumCounters) { + // Check if a probe was already added for this counter offset. + if (!CounterOffsets.insert(CounterOffset).second) + return; Data.push_back({ maybeSwap(IndexedInstrProf::ComputeHash(FunctionName)), maybeSwap(CFGHash), From 943d1d83dd7799c6371a42c224bfe072ddf2fe88 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 28 Dec 2021 17:42:31 -0500 Subject: [PATCH 147/992] [OpenMP][CUDA] Add resource pool for CUevent Following D111954, this patch adds the resource pool for CUevent. Reviewed By: ye-luo Differential Revision: https://reviews.llvm.org/D116315 --- openmp/libomptarget/plugins/cuda/src/rtl.cpp | 73 +++++++++++--------- 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp index 1afee7ce3a02..970a574b2eb3 100644 --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -114,21 +114,6 @@ int memcpyDtoD(const void *SrcPtr, void *DstPtr, int64_t Size, return OFFLOAD_SUCCESS; } -int createEvent(void **P) { - CUevent Event = nullptr; - - CUresult Err = cuEventCreate(&Event, CU_EVENT_DEFAULT); - if (Err != CUDA_SUCCESS) { - DP("Error when creating event event = " DPxMOD "\n", DPxPTR(Event)); - CUDA_ERR_STRING(Err); - return OFFLOAD_FAIL; - } - - *P = Event; - - return OFFLOAD_SUCCESS; -} - int recordEvent(void *EventPtr, __tgt_async_info *AsyncInfo) { CUstream Stream = reinterpret_cast(AsyncInfo->Queue); CUevent Event = reinterpret_cast(EventPtr); @@ -157,19 +142,6 @@ int syncEvent(void *EventPtr) { return OFFLOAD_SUCCESS; } -int destroyEvent(void *EventPtr) { - CUevent Event = reinterpret_cast(EventPtr); - - CUresult Err = cuEventDestroy(Event); - if (Err != CUDA_SUCCESS) { - DP("Error when destroying event = " DPxMOD "\n", DPxPTR(Event)); - CUDA_ERR_STRING(Err); - return OFFLOAD_FAIL; - } - - return OFFLOAD_SUCCESS; -} - // Structure contains per-device data struct DeviceDataTy { /// List that contains all the kernels. @@ -231,6 +203,28 @@ template <> class AllocatorTy { } }; +/// Allocator for CUevent. +template <> class AllocatorTy { +public: + /// See AllocatorTy::create. + int create(CUevent &Event) noexcept { + if (!checkResult(cuEventCreate(&Event, CU_EVENT_DEFAULT), + "Error returned from cuEventCreate\n")) + return OFFLOAD_FAIL; + + return OFFLOAD_SUCCESS; + } + + /// See AllocatorTy::destroy. + int destroy(CUevent Event) noexcept { + if (!checkResult(cuEventDestroy(Event), + "Error returned from cuEventDestroy\n")) + return OFFLOAD_FAIL; + + return OFFLOAD_SUCCESS; + } +}; + /// A generic pool of resources where \p T is the resource type. /// \p T should be copyable as the object is stored in \p std::vector . template class ResourcePoolTy { @@ -341,6 +335,8 @@ class DeviceRTLTy { using StreamPoolTy = ResourcePoolTy; std::vector> StreamPool; + ResourcePoolTy EventPool; + std::vector DeviceData; std::vector Modules; @@ -493,7 +489,7 @@ class DeviceRTLTy { DeviceRTLTy() : NumberOfDevices(0), EnvNumTeams(-1), EnvTeamLimit(-1), EnvTeamThreadLimit(-1), RequiresFlags(OMP_REQ_UNDEFINED), - DynamicMemorySize(0) { + DynamicMemorySize(0), EventPool(AllocatorTy()) { DP("Start initializing CUDA\n"); @@ -575,6 +571,8 @@ class DeviceRTLTy { for (auto &S : StreamPool) S.reset(); + EventPool.clear(); + for (DeviceDataTy &D : DeviceData) { // Destroy context if (D.Context) { @@ -1395,6 +1393,19 @@ class DeviceRTLTy { printf(" Compute Capabilities: \t\t%d%d \n", TmpInt, TmpInt2); } + int createEvent(void **P) { + CUevent Event = nullptr; + if (EventPool.acquire(Event) != OFFLOAD_SUCCESS) + return OFFLOAD_FAIL; + *P = Event; + return OFFLOAD_SUCCESS; + } + + int destroyEvent(void *EventPtr) { + EventPool.release(reinterpret_cast(EventPtr)); + return OFFLOAD_SUCCESS; + } + int waitEvent(const int DeviceId, __tgt_async_info *AsyncInfo, void *EventPtr) const { CUstream Stream = getStream(DeviceId, AsyncInfo); @@ -1620,7 +1631,7 @@ void __tgt_rtl_print_device_info(int32_t device_id) { int32_t __tgt_rtl_create_event(int32_t device_id, void **event) { assert(event && "event is nullptr"); - return createEvent(event); + return DeviceRTL.createEvent(event); } int32_t __tgt_rtl_record_event(int32_t device_id, void *event_ptr, @@ -1650,7 +1661,7 @@ int32_t __tgt_rtl_sync_event(int32_t device_id, void *event_ptr) { int32_t __tgt_rtl_destroy_event(int32_t device_id, void *event_ptr) { assert(event_ptr && "event is nullptr"); - return destroyEvent(event_ptr); + return DeviceRTL.destroyEvent(event_ptr); } #ifdef __cplusplus From 6a6ac3b36fcdb44a5096f2ddab952a1281eb144e Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 28 Dec 2021 13:47:42 -0800 Subject: [PATCH 148/992] [Hexagon] Support BUILD_VECTOR of floating point HVX vectors Co-authored-by: Anirudh Sundar Subramaniam Co-authored-by: Ankit Aggarwal --- .../Target/Hexagon/HexagonISelLowering.cpp | 1 - .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 81 ++- llvm/lib/Target/Hexagon/HexagonPatterns.td | 12 - llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 40 ++ .../lib/Target/Hexagon/HexagonRegisterInfo.td | 12 +- .../autohvx/build-vector-float-type.ll | 504 ++++++++++++++++++ 6 files changed, 620 insertions(+), 30 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/build-vector-float-type.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 88effed9f076..90dda37a886a 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -2720,7 +2720,6 @@ SDValue HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG) const { if (Ty.isVector()) { - assert(Ty.isInteger() && "Only integer vectors are supported here"); unsigned W = Ty.getSizeInBits(); if (W <= 64) return DAG.getBitcast(Ty, DAG.getConstant(0, dl, MVT::getIntegerVT(W))); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index f7237f496aee..e189b0b49e34 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -55,6 +55,11 @@ HexagonTargetLowering::initializeHVXLowering() { addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass); + if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) { + addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass); + addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass); + addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass); + } } // Set up operation actions. @@ -83,6 +88,21 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() && + Subtarget.useHVXFloatingPoint()) { + // Handle ISD::BUILD_VECTOR for v32f32 in a custom way to generate vsplat + setOperationAction(ISD::BUILD_VECTOR, MVT::v32f32, Custom); + + // BUILD_VECTOR with f16 operands cannot be promoted without + // promoting the result, so lower the node to vsplat or constant pool + setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom); + + // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- + // independent) handling of it would convert it to a load, which is + // not always the optimal choice. + setOperationAction(ISD::BUILD_VECTOR, MVT::v64f32, Custom); + } + for (MVT T : LegalV) { setIndexedLoadAction(ISD::POST_INC, T, Legal); setIndexedStoreAction(ISD::POST_INC, T, Legal); @@ -497,7 +517,9 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef Values, assert(ElemSize*VecLen == HwLen); SmallVector Words; - if (VecTy.getVectorElementType() != MVT::i32) { + if (VecTy.getVectorElementType() != MVT::i32 && + !(Subtarget.useHVXFloatingPoint() && + VecTy.getVectorElementType() == MVT::f32)) { assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size"); unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2; MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord); @@ -506,22 +528,31 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef Values, Words.push_back(DAG.getBitcast(MVT::i32, W)); } } else { - Words.assign(Values.begin(), Values.end()); + for (SDValue V : Values) + Words.push_back(DAG.getBitcast(MVT::i32, V)); } + auto isSplat = [] (ArrayRef Values, SDValue &SplatV) { + unsigned NumValues = Values.size(); + assert(NumValues > 0); + bool IsUndef = true; + for (unsigned i = 0; i != NumValues; ++i) { + if (Values[i].isUndef()) + continue; + IsUndef = false; + if (!SplatV.getNode()) + SplatV = Values[i]; + else if (SplatV != Values[i]) + return false; + } + if (IsUndef) + SplatV = Values[0]; + return true; + }; unsigned NumWords = Words.size(); - bool IsSplat = true, IsUndef = true; SDValue SplatV; - for (unsigned i = 0; i != NumWords && IsSplat; ++i) { - if (isUndef(Words[i])) - continue; - IsUndef = false; - if (!SplatV.getNode()) - SplatV = Words[i]; - else if (SplatV != Words[i]) - IsSplat = false; - } - if (IsUndef) + bool IsSplat = isSplat(Words, SplatV); + if (IsSplat && isUndef(SplatV)) return DAG.getUNDEF(VecTy); if (IsSplat) { assert(SplatV.getNode()); @@ -634,8 +665,15 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef Values, HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {HalfV0, DAG.getConstant(HwLen/2, dl, MVT::i32)}); - SDValue DstV = DAG.getNode(ISD::OR, dl, VecTy, {HalfV0, HalfV1}); - return DstV; + + SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0); + SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1); + + SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1}); + + SDValue OutV = + DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV); + return OutV; } SDValue @@ -1237,6 +1275,19 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) if (VecTy.getVectorElementType() == MVT::i1) return buildHvxVectorPred(Ops, dl, VecTy, DAG); + // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is + // not a legal type, just bitcast the node to use i16 + // types and bitcast the result back to f16 + if (VecTy.getVectorElementType() == MVT::f16) { + SmallVector NewOps; + for (unsigned i = 0; i != Size; i++) + NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i])); + + SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl, + tyVector(VecTy, MVT::i16), NewOps); + return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0); + } + if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) { ArrayRef A(Ops); MVT SingleTy = typeSplit(VecTy).first; diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index cad5ca8ab92e..4ba6d4740e12 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -87,18 +87,6 @@ def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; -def HQ8: PatLeaf<(VecQ8 HvxQR:$R)>; -def HQ16: PatLeaf<(VecQ16 HvxQR:$R)>; -def HQ32: PatLeaf<(VecQ32 HvxQR:$R)>; - -def HVI8: PatLeaf<(VecI8 HvxVR:$R)>; -def HVI16: PatLeaf<(VecI16 HvxVR:$R)>; -def HVI32: PatLeaf<(VecI32 HvxVR:$R)>; - -def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>; -def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; -def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; - def SDTVecLeaf: SDTypeProfile<1, 0, [SDTCisVec<0>]>; def SDTVecVecIntOp: diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index a22a3f8ec0ca..15fa659d26ab 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -6,6 +6,21 @@ // //===----------------------------------------------------------------------===// +def HQ8: PatLeaf<(VecQ8 HvxQR:$R)>; +def HQ16: PatLeaf<(VecQ16 HvxQR:$R)>; +def HQ32: PatLeaf<(VecQ32 HvxQR:$R)>; + +def HVI8: PatLeaf<(VecI8 HvxVR:$R)>; +def HVI16: PatLeaf<(VecI16 HvxVR:$R)>; +def HVI32: PatLeaf<(VecI32 HvxVR:$R)>; +def HVF16: PatLeaf<(VecF16 HvxVR:$R)>; +def HVF32: PatLeaf<(VecF32 HvxVR:$R)>; + +def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>; +def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; +def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; +def HWF16: PatLeaf<(VecPF16 HvxWR:$R)>; +def HWF32: PatLeaf<(VecPF32 HvxWR:$R)>; def SDTVecUnaryOp: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; @@ -211,6 +226,24 @@ let Predicates = [UseHVX] in { defm: NopCast_pat; } +let Predicates = [UseHVX, UseHVXFloatingPoint] in { + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; + + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; +} + let Predicates = [UseHVX] in { let AddedComplexity = 100 in { // These should be preferred over a vsplat of 0. @@ -251,6 +284,13 @@ let Predicates = [UseHVX] in { (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; } +let Predicates = [UseHVXFloatingPoint] in { + def: Pat<(HexagonVINSERTW0 HVF16:$Vu, I32:$Rt), + (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; + def: Pat<(HexagonVINSERTW0 HVF32:$Vu, I32:$Rt), + (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; +} + // Splats for HvxV60 def V60splatib: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatB $V)))>; def V60splatih: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatH $V)))>; diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td index 8b7138d3c809..4c387c8ba638 100644 --- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td +++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -479,6 +479,10 @@ def VecI16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v32i16, v64i16, v32i16]>; def VecI32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v16i32, v32i32, v16i32]>; +def VecF16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], + [v32f16, v64f16, v32f16]>; +def VecF32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], + [v16f32, v32f32, v16f32]>; def VecPI8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v128i8, v256i8, v128i8]>; @@ -486,6 +490,10 @@ def VecPI16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v64i16, v128i16, v64i16]>; def VecPI32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v32i32, v64i32, v32i32]>; +def VecPF16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], + [v64f16, v128f16, v64f16]>; +def VecPF32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], + [v32f32, v64f32, v32f32]>; def VecQ8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v64i1, v128i1, v64i1]>; @@ -496,13 +504,13 @@ def VecQ32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], // HVX register classes -def HvxVR : RegisterClass<"Hexagon", [VecI8, VecI16, VecI32], 512, +def HvxVR : RegisterClass<"Hexagon", [VecI8, VecI16, VecI32, VecF16, VecF32], 512, (add (sequence "V%u", 0, 31), VTMP)> { let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], [RegInfo<512,512,512>, RegInfo<1024,1024,1024>, RegInfo<512,512,512>]>; } -def HvxWR : RegisterClass<"Hexagon", [VecPI8, VecPI16, VecPI32], 1024, +def HvxWR : RegisterClass<"Hexagon", [VecPI8, VecPI16, VecPI32, VecPF16, VecPF32], 1024, (add (sequence "W%u", 0, 15), (sequence "WR%u", 0, 15))> { let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], [RegInfo<1024,1024,1024>, RegInfo<2048,2048,2048>, RegInfo<1024,1024,1024>]>; diff --git a/llvm/test/CodeGen/Hexagon/autohvx/build-vector-float-type.ll b/llvm/test/CodeGen/Hexagon/autohvx/build-vector-float-type.ll new file mode 100644 index 000000000000..2eba9e2db446 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/build-vector-float-type.ll @@ -0,0 +1,504 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check that this code does compile. + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +; Function Attrs: nounwind +; CHECK-LABEL: f0: +; CHECK: vinsert +define <32 x float> @f0(i32* %a0, float* %a1) #0 { +b0: + %v0 = getelementptr i32, i32* %a0, i32 0 + %v1 = load i32, i32* %v0, align 4 + %v2 = getelementptr float, float* %a1, i32 %v1 + %v3 = load float, float* %v2, align 4 + %v4 = insertelement <32 x float> undef, float %v3, i32 0 + %v5 = getelementptr i32, i32* %a0, i32 1 + %v6 = load i32, i32* %v5, align 4 + %v7 = getelementptr float, float* %a1, i32 %v6 + %v8 = load float, float* %v7, align 4 + %v9 = insertelement <32 x float> %v4, float %v8, i32 1 + %v10 = getelementptr i32, i32* %a0, i32 2 + %v11 = load i32, i32* %v10, align 4 + %v12 = getelementptr float, float* %a1, i32 %v11 + %v13 = load float, float* %v12, align 4 + %v14 = insertelement <32 x float> %v9, float %v13, i32 2 + %v15 = getelementptr i32, i32* %a0, i32 3 + %v16 = load i32, i32* %v15, align 4 + %v17 = getelementptr float, float* %a1, i32 %v16 + %v18 = load float, float* %v17, align 4 + %v19 = insertelement <32 x float> %v14, float %v18, i32 3 + %v20 = getelementptr i32, i32* %a0, i32 4 + %v21 = load i32, i32* %v20, align 4 + %v22 = getelementptr float, float* %a1, i32 %v21 + %v23 = load float, float* %v22, align 4 + %v24 = insertelement <32 x float> %v19, float %v23, i32 4 + %v25 = getelementptr i32, i32* %a0, i32 5 + %v26 = load i32, i32* %v25, align 4 + %v27 = getelementptr float, float* %a1, i32 %v26 + %v28 = load float, float* %v27, align 4 + %v29 = insertelement <32 x float> %v24, float %v28, i32 5 + %v30 = getelementptr i32, i32* %a0, i32 6 + %v31 = load i32, i32* %v30, align 4 + %v32 = getelementptr float, float* %a1, i32 %v31 + %v33 = load float, float* %v32, align 4 + %v34 = insertelement <32 x float> %v29, float %v33, i32 6 + %v35 = getelementptr i32, i32* %a0, i32 7 + %v36 = load i32, i32* %v35, align 4 + %v37 = getelementptr float, float* %a1, i32 %v36 + %v38 = load float, float* %v37, align 4 + %v39 = insertelement <32 x float> %v34, float %v38, i32 7 + %v40 = getelementptr i32, i32* %a0, i32 8 + %v41 = load i32, i32* %v40, align 4 + %v42 = getelementptr float, float* %a1, i32 %v41 + %v43 = load float, float* %v42, align 4 + %v44 = insertelement <32 x float> %v39, float %v43, i32 8 + %v45 = getelementptr i32, i32* %a0, i32 9 + %v46 = load i32, i32* %v45, align 4 + %v47 = getelementptr float, float* %a1, i32 %v46 + %v48 = load float, float* %v47, align 4 + %v49 = insertelement <32 x float> %v44, float %v48, i32 9 + %v50 = getelementptr i32, i32* %a0, i32 10 + %v51 = load i32, i32* %v50, align 4 + %v52 = getelementptr float, float* %a1, i32 %v51 + %v53 = load float, float* %v52, align 4 + %v54 = insertelement <32 x float> %v49, float %v53, i32 10 + %v55 = getelementptr i32, i32* %a0, i32 11 + %v56 = load i32, i32* %v55, align 4 + %v57 = getelementptr float, float* %a1, i32 %v56 + %v58 = load float, float* %v57, align 4 + %v59 = insertelement <32 x float> %v54, float %v58, i32 11 + %v60 = getelementptr i32, i32* %a0, i32 12 + %v61 = load i32, i32* %v60, align 4 + %v62 = getelementptr float, float* %a1, i32 %v61 + %v63 = load float, float* %v62, align 4 + %v64 = insertelement <32 x float> %v59, float %v63, i32 12 + %v65 = getelementptr i32, i32* %a0, i32 13 + %v66 = load i32, i32* %v65, align 4 + %v67 = getelementptr float, float* %a1, i32 %v66 + %v68 = load float, float* %v67, align 4 + %v69 = insertelement <32 x float> %v64, float %v68, i32 13 + %v70 = getelementptr i32, i32* %a0, i32 14 + %v71 = load i32, i32* %v70, align 4 + %v72 = getelementptr float, float* %a1, i32 %v71 + %v73 = load float, float* %v72, align 4 + %v74 = insertelement <32 x float> %v69, float %v73, i32 14 + %v75 = getelementptr i32, i32* %a0, i32 15 + %v76 = load i32, i32* %v75, align 4 + %v77 = getelementptr float, float* %a1, i32 %v76 + %v78 = load float, float* %v77, align 4 + %v79 = insertelement <32 x float> %v74, float %v78, i32 15 + %v80 = getelementptr i32, i32* %a0, i32 16 + %v81 = load i32, i32* %v80, align 4 + %v82 = getelementptr float, float* %a1, i32 %v81 + %v83 = load float, float* %v82, align 4 + %v84 = insertelement <32 x float> %v79, float %v83, i32 16 + %v85 = getelementptr i32, i32* %a0, i32 17 + %v86 = load i32, i32* %v85, align 4 + %v87 = getelementptr float, float* %a1, i32 %v86 + %v88 = load float, float* %v87, align 4 + %v89 = insertelement <32 x float> %v84, float %v88, i32 17 + %v90 = getelementptr i32, i32* %a0, i32 18 + %v91 = load i32, i32* %v90, align 4 + %v92 = getelementptr float, float* %a1, i32 %v91 + %v93 = load float, float* %v92, align 4 + %v94 = insertelement <32 x float> %v89, float %v93, i32 18 + %v95 = getelementptr i32, i32* %a0, i32 19 + %v96 = load i32, i32* %v95, align 4 + %v97 = getelementptr float, float* %a1, i32 %v96 + %v98 = load float, float* %v97, align 4 + %v99 = insertelement <32 x float> %v94, float %v98, i32 19 + %v100 = getelementptr i32, i32* %a0, i32 20 + %v101 = load i32, i32* %v100, align 4 + %v102 = getelementptr float, float* %a1, i32 %v101 + %v103 = load float, float* %v102, align 4 + %v104 = insertelement <32 x float> %v99, float %v103, i32 20 + %v105 = getelementptr i32, i32* %a0, i32 21 + %v106 = load i32, i32* %v105, align 4 + %v107 = getelementptr float, float* %a1, i32 %v106 + %v108 = load float, float* %v107, align 4 + %v109 = insertelement <32 x float> %v104, float %v108, i32 21 + %v110 = getelementptr i32, i32* %a0, i32 22 + %v111 = load i32, i32* %v110, align 4 + %v112 = getelementptr float, float* %a1, i32 %v111 + %v113 = load float, float* %v112, align 4 + %v114 = insertelement <32 x float> %v109, float %v113, i32 22 + %v115 = getelementptr i32, i32* %a0, i32 23 + %v116 = load i32, i32* %v115, align 4 + %v117 = getelementptr float, float* %a1, i32 %v116 + %v118 = load float, float* %v117, align 4 + %v119 = insertelement <32 x float> %v114, float %v118, i32 23 + %v120 = getelementptr i32, i32* %a0, i32 24 + %v121 = load i32, i32* %v120, align 4 + %v122 = getelementptr float, float* %a1, i32 %v121 + %v123 = load float, float* %v122, align 4 + %v124 = insertelement <32 x float> %v119, float %v123, i32 24 + %v125 = getelementptr i32, i32* %a0, i32 25 + %v126 = load i32, i32* %v125, align 4 + %v127 = getelementptr float, float* %a1, i32 %v126 + %v128 = load float, float* %v127, align 4 + %v129 = insertelement <32 x float> %v124, float %v128, i32 25 + %v130 = getelementptr i32, i32* %a0, i32 26 + %v131 = load i32, i32* %v130, align 4 + %v132 = getelementptr float, float* %a1, i32 %v131 + %v133 = load float, float* %v132, align 4 + %v134 = insertelement <32 x float> %v129, float %v133, i32 26 + %v135 = getelementptr i32, i32* %a0, i32 27 + %v136 = load i32, i32* %v135, align 4 + %v137 = getelementptr float, float* %a1, i32 %v136 + %v138 = load float, float* %v137, align 4 + %v139 = insertelement <32 x float> %v134, float %v138, i32 27 + %v140 = getelementptr i32, i32* %a0, i32 28 + %v141 = load i32, i32* %v140, align 4 + %v142 = getelementptr float, float* %a1, i32 %v141 + %v143 = load float, float* %v142, align 4 + %v144 = insertelement <32 x float> %v139, float %v143, i32 28 + %v145 = getelementptr i32, i32* %a0, i32 29 + %v146 = load i32, i32* %v145, align 4 + %v147 = getelementptr float, float* %a1, i32 %v146 + %v148 = load float, float* %v147, align 4 + %v149 = insertelement <32 x float> %v144, float %v148, i32 29 + %v150 = getelementptr i32, i32* %a0, i32 30 + %v151 = load i32, i32* %v150, align 4 + %v152 = getelementptr float, float* %a1, i32 %v151 + %v153 = load float, float* %v152, align 4 + %v154 = insertelement <32 x float> %v149, float %v153, i32 30 + %v155 = getelementptr i32, i32* %a0, i32 31 + %v156 = load i32, i32* %v155, align 4 + %v157 = getelementptr float, float* %a1, i32 %v156 + %v158 = load float, float* %v157, align 4 + %v159 = insertelement <32 x float> %v154, float %v158, i32 31 + ret <32 x float> %v159 +} + +; Function Attrs: nounwind +; CHECK-LABEL: f1: +; CHECK: vinsert +define <64 x half> @f1(i32* %a0, half* %a1) #0 { +b0: + %v0 = getelementptr i32, i32* %a0, i32 0 + %v1 = load i32, i32* %v0, align 4 + %v2 = getelementptr half, half* %a1, i32 %v1 + %v3 = load half, half* %v2, align 4 + %v4 = insertelement <64 x half> undef, half %v3, i32 0 + %v5 = getelementptr i32, i32* %a0, i32 1 + %v6 = load i32, i32* %v5, align 4 + %v7 = getelementptr half, half* %a1, i32 %v6 + %v8 = load half, half* %v7, align 4 + %v9 = insertelement <64 x half> %v4, half %v8, i32 1 + %v10 = getelementptr i32, i32* %a0, i32 2 + %v11 = load i32, i32* %v10, align 4 + %v12 = getelementptr half, half* %a1, i32 %v11 + %v13 = load half, half* %v12, align 4 + %v14 = insertelement <64 x half> %v9, half %v13, i32 2 + %v15 = getelementptr i32, i32* %a0, i32 3 + %v16 = load i32, i32* %v15, align 4 + %v17 = getelementptr half, half* %a1, i32 %v16 + %v18 = load half, half* %v17, align 4 + %v19 = insertelement <64 x half> %v14, half %v18, i32 3 + %v20 = getelementptr i32, i32* %a0, i32 4 + %v21 = load i32, i32* %v20, align 4 + %v22 = getelementptr half, half* %a1, i32 %v21 + %v23 = load half, half* %v22, align 4 + %v24 = insertelement <64 x half> %v19, half %v23, i32 4 + %v25 = getelementptr i32, i32* %a0, i32 5 + %v26 = load i32, i32* %v25, align 4 + %v27 = getelementptr half, half* %a1, i32 %v26 + %v28 = load half, half* %v27, align 4 + %v29 = insertelement <64 x half> %v24, half %v28, i32 5 + %v30 = getelementptr i32, i32* %a0, i32 6 + %v31 = load i32, i32* %v30, align 4 + %v32 = getelementptr half, half* %a1, i32 %v31 + %v33 = load half, half* %v32, align 4 + %v34 = insertelement <64 x half> %v29, half %v33, i32 6 + %v35 = getelementptr i32, i32* %a0, i32 7 + %v36 = load i32, i32* %v35, align 4 + %v37 = getelementptr half, half* %a1, i32 %v36 + %v38 = load half, half* %v37, align 4 + %v39 = insertelement <64 x half> %v34, half %v38, i32 7 + %v40 = getelementptr i32, i32* %a0, i32 8 + %v41 = load i32, i32* %v40, align 4 + %v42 = getelementptr half, half* %a1, i32 %v41 + %v43 = load half, half* %v42, align 4 + %v44 = insertelement <64 x half> %v39, half %v43, i32 8 + %v45 = getelementptr i32, i32* %a0, i32 9 + %v46 = load i32, i32* %v45, align 4 + %v47 = getelementptr half, half* %a1, i32 %v46 + %v48 = load half, half* %v47, align 4 + %v49 = insertelement <64 x half> %v44, half %v48, i32 9 + %v50 = getelementptr i32, i32* %a0, i32 10 + %v51 = load i32, i32* %v50, align 4 + %v52 = getelementptr half, half* %a1, i32 %v51 + %v53 = load half, half* %v52, align 4 + %v54 = insertelement <64 x half> %v49, half %v53, i32 10 + %v55 = getelementptr i32, i32* %a0, i32 11 + %v56 = load i32, i32* %v55, align 4 + %v57 = getelementptr half, half* %a1, i32 %v56 + %v58 = load half, half* %v57, align 4 + %v59 = insertelement <64 x half> %v54, half %v58, i32 11 + %v60 = getelementptr i32, i32* %a0, i32 12 + %v61 = load i32, i32* %v60, align 4 + %v62 = getelementptr half, half* %a1, i32 %v61 + %v63 = load half, half* %v62, align 4 + %v64 = insertelement <64 x half> %v59, half %v63, i32 12 + %v65 = getelementptr i32, i32* %a0, i32 13 + %v66 = load i32, i32* %v65, align 4 + %v67 = getelementptr half, half* %a1, i32 %v66 + %v68 = load half, half* %v67, align 4 + %v69 = insertelement <64 x half> %v64, half %v68, i32 13 + %v70 = getelementptr i32, i32* %a0, i32 14 + %v71 = load i32, i32* %v70, align 4 + %v72 = getelementptr half, half* %a1, i32 %v71 + %v73 = load half, half* %v72, align 4 + %v74 = insertelement <64 x half> %v69, half %v73, i32 14 + %v75 = getelementptr i32, i32* %a0, i32 15 + %v76 = load i32, i32* %v75, align 4 + %v77 = getelementptr half, half* %a1, i32 %v76 + %v78 = load half, half* %v77, align 4 + %v79 = insertelement <64 x half> %v74, half %v78, i32 15 + %v80 = getelementptr i32, i32* %a0, i32 16 + %v81 = load i32, i32* %v80, align 4 + %v82 = getelementptr half, half* %a1, i32 %v81 + %v83 = load half, half* %v82, align 4 + %v84 = insertelement <64 x half> %v79, half %v83, i32 16 + %v85 = getelementptr i32, i32* %a0, i32 17 + %v86 = load i32, i32* %v85, align 4 + %v87 = getelementptr half, half* %a1, i32 %v86 + %v88 = load half, half* %v87, align 4 + %v89 = insertelement <64 x half> %v84, half %v88, i32 17 + %v90 = getelementptr i32, i32* %a0, i32 18 + %v91 = load i32, i32* %v90, align 4 + %v92 = getelementptr half, half* %a1, i32 %v91 + %v93 = load half, half* %v92, align 4 + %v94 = insertelement <64 x half> %v89, half %v93, i32 18 + %v95 = getelementptr i32, i32* %a0, i32 19 + %v96 = load i32, i32* %v95, align 4 + %v97 = getelementptr half, half* %a1, i32 %v96 + %v98 = load half, half* %v97, align 4 + %v99 = insertelement <64 x half> %v94, half %v98, i32 19 + %v100 = getelementptr i32, i32* %a0, i32 20 + %v101 = load i32, i32* %v100, align 4 + %v102 = getelementptr half, half* %a1, i32 %v101 + %v103 = load half, half* %v102, align 4 + %v104 = insertelement <64 x half> %v99, half %v103, i32 20 + %v105 = getelementptr i32, i32* %a0, i32 21 + %v106 = load i32, i32* %v105, align 4 + %v107 = getelementptr half, half* %a1, i32 %v106 + %v108 = load half, half* %v107, align 4 + %v109 = insertelement <64 x half> %v104, half %v108, i32 21 + %v110 = getelementptr i32, i32* %a0, i32 22 + %v111 = load i32, i32* %v110, align 4 + %v112 = getelementptr half, half* %a1, i32 %v111 + %v113 = load half, half* %v112, align 4 + %v114 = insertelement <64 x half> %v109, half %v113, i32 22 + %v115 = getelementptr i32, i32* %a0, i32 23 + %v116 = load i32, i32* %v115, align 4 + %v117 = getelementptr half, half* %a1, i32 %v116 + %v118 = load half, half* %v117, align 4 + %v119 = insertelement <64 x half> %v114, half %v118, i32 23 + %v120 = getelementptr i32, i32* %a0, i32 24 + %v121 = load i32, i32* %v120, align 4 + %v122 = getelementptr half, half* %a1, i32 %v121 + %v123 = load half, half* %v122, align 4 + %v124 = insertelement <64 x half> %v119, half %v123, i32 24 + %v125 = getelementptr i32, i32* %a0, i32 25 + %v126 = load i32, i32* %v125, align 4 + %v127 = getelementptr half, half* %a1, i32 %v126 + %v128 = load half, half* %v127, align 4 + %v129 = insertelement <64 x half> %v124, half %v128, i32 25 + %v130 = getelementptr i32, i32* %a0, i32 26 + %v131 = load i32, i32* %v130, align 4 + %v132 = getelementptr half, half* %a1, i32 %v131 + %v133 = load half, half* %v132, align 4 + %v134 = insertelement <64 x half> %v129, half %v133, i32 26 + %v135 = getelementptr i32, i32* %a0, i32 27 + %v136 = load i32, i32* %v135, align 4 + %v137 = getelementptr half, half* %a1, i32 %v136 + %v138 = load half, half* %v137, align 4 + %v139 = insertelement <64 x half> %v134, half %v138, i32 27 + %v140 = getelementptr i32, i32* %a0, i32 28 + %v141 = load i32, i32* %v140, align 4 + %v142 = getelementptr half, half* %a1, i32 %v141 + %v143 = load half, half* %v142, align 4 + %v144 = insertelement <64 x half> %v139, half %v143, i32 28 + %v145 = getelementptr i32, i32* %a0, i32 29 + %v146 = load i32, i32* %v145, align 4 + %v147 = getelementptr half, half* %a1, i32 %v146 + %v148 = load half, half* %v147, align 4 + %v149 = insertelement <64 x half> %v144, half %v148, i32 29 + %v150 = getelementptr i32, i32* %a0, i32 30 + %v151 = load i32, i32* %v150, align 4 + %v152 = getelementptr half, half* %a1, i32 %v151 + %v153 = load half, half* %v152, align 4 + %v154 = insertelement <64 x half> %v149, half %v153, i32 30 + %v155 = getelementptr i32, i32* %a0, i32 31 + %v156 = load i32, i32* %v155, align 4 + %v157 = getelementptr half, half* %a1, i32 %v156 + %v158 = load half, half* %v157, align 4 + %v159 = insertelement <64 x half> %v154, half %v158, i32 31 + %v160 = getelementptr i32, i32* %a0, i32 32 + %v161 = load i32, i32* %v160, align 4 + %v162 = getelementptr half, half* %a1, i32 %v161 + %v163 = load half, half* %v162, align 4 + %v164 = insertelement <64 x half> %v159, half %v163, i32 32 + %v165 = getelementptr i32, i32* %a0, i32 33 + %v166 = load i32, i32* %v165, align 4 + %v167 = getelementptr half, half* %a1, i32 %v166 + %v168 = load half, half* %v167, align 4 + %v169 = insertelement <64 x half> %v164, half %v168, i32 33 + %v170 = getelementptr i32, i32* %a0, i32 34 + %v171 = load i32, i32* %v170, align 4 + %v172 = getelementptr half, half* %a1, i32 %v171 + %v173 = load half, half* %v172, align 4 + %v174 = insertelement <64 x half> %v169, half %v173, i32 34 + %v175 = getelementptr i32, i32* %a0, i32 35 + %v176 = load i32, i32* %v175, align 4 + %v177 = getelementptr half, half* %a1, i32 %v176 + %v178 = load half, half* %v177, align 4 + %v179 = insertelement <64 x half> %v174, half %v178, i32 35 + %v180 = getelementptr i32, i32* %a0, i32 36 + %v181 = load i32, i32* %v180, align 4 + %v182 = getelementptr half, half* %a1, i32 %v181 + %v183 = load half, half* %v182, align 4 + %v184 = insertelement <64 x half> %v179, half %v183, i32 36 + %v185 = getelementptr i32, i32* %a0, i32 37 + %v186 = load i32, i32* %v185, align 4 + %v187 = getelementptr half, half* %a1, i32 %v186 + %v188 = load half, half* %v187, align 4 + %v189 = insertelement <64 x half> %v184, half %v188, i32 37 + %v190 = getelementptr i32, i32* %a0, i32 38 + %v191 = load i32, i32* %v190, align 4 + %v192 = getelementptr half, half* %a1, i32 %v191 + %v193 = load half, half* %v192, align 4 + %v194 = insertelement <64 x half> %v189, half %v193, i32 38 + %v195 = getelementptr i32, i32* %a0, i32 39 + %v196 = load i32, i32* %v195, align 4 + %v197 = getelementptr half, half* %a1, i32 %v196 + %v198 = load half, half* %v197, align 4 + %v199 = insertelement <64 x half> %v194, half %v198, i32 39 + %v200 = getelementptr i32, i32* %a0, i32 40 + %v201 = load i32, i32* %v200, align 4 + %v202 = getelementptr half, half* %a1, i32 %v201 + %v203 = load half, half* %v202, align 4 + %v204 = insertelement <64 x half> %v199, half %v203, i32 40 + %v205 = getelementptr i32, i32* %a0, i32 41 + %v206 = load i32, i32* %v205, align 4 + %v207 = getelementptr half, half* %a1, i32 %v206 + %v208 = load half, half* %v207, align 4 + %v209 = insertelement <64 x half> %v204, half %v208, i32 41 + %v210 = getelementptr i32, i32* %a0, i32 42 + %v211 = load i32, i32* %v210, align 4 + %v212 = getelementptr half, half* %a1, i32 %v211 + %v213 = load half, half* %v212, align 4 + %v214 = insertelement <64 x half> %v209, half %v213, i32 42 + %v215 = getelementptr i32, i32* %a0, i32 43 + %v216 = load i32, i32* %v215, align 4 + %v217 = getelementptr half, half* %a1, i32 %v216 + %v218 = load half, half* %v217, align 4 + %v219 = insertelement <64 x half> %v214, half %v218, i32 43 + %v220 = getelementptr i32, i32* %a0, i32 44 + %v221 = load i32, i32* %v220, align 4 + %v222 = getelementptr half, half* %a1, i32 %v221 + %v223 = load half, half* %v222, align 4 + %v224 = insertelement <64 x half> %v219, half %v223, i32 44 + %v225 = getelementptr i32, i32* %a0, i32 45 + %v226 = load i32, i32* %v225, align 4 + %v227 = getelementptr half, half* %a1, i32 %v226 + %v228 = load half, half* %v227, align 4 + %v229 = insertelement <64 x half> %v224, half %v228, i32 45 + %v230 = getelementptr i32, i32* %a0, i32 46 + %v231 = load i32, i32* %v230, align 4 + %v232 = getelementptr half, half* %a1, i32 %v231 + %v233 = load half, half* %v232, align 4 + %v234 = insertelement <64 x half> %v229, half %v233, i32 46 + %v235 = getelementptr i32, i32* %a0, i32 47 + %v236 = load i32, i32* %v235, align 4 + %v237 = getelementptr half, half* %a1, i32 %v236 + %v238 = load half, half* %v237, align 4 + %v239 = insertelement <64 x half> %v234, half %v238, i32 47 + %v240 = getelementptr i32, i32* %a0, i32 48 + %v241 = load i32, i32* %v240, align 4 + %v242 = getelementptr half, half* %a1, i32 %v241 + %v243 = load half, half* %v242, align 4 + %v244 = insertelement <64 x half> %v239, half %v243, i32 48 + %v245 = getelementptr i32, i32* %a0, i32 49 + %v246 = load i32, i32* %v245, align 4 + %v247 = getelementptr half, half* %a1, i32 %v246 + %v248 = load half, half* %v247, align 4 + %v249 = insertelement <64 x half> %v244, half %v248, i32 49 + %v250 = getelementptr i32, i32* %a0, i32 50 + %v251 = load i32, i32* %v250, align 4 + %v252 = getelementptr half, half* %a1, i32 %v251 + %v253 = load half, half* %v252, align 4 + %v254 = insertelement <64 x half> %v249, half %v253, i32 50 + %v255 = getelementptr i32, i32* %a0, i32 51 + %v256 = load i32, i32* %v255, align 4 + %v257 = getelementptr half, half* %a1, i32 %v256 + %v258 = load half, half* %v257, align 4 + %v259 = insertelement <64 x half> %v254, half %v258, i32 51 + %v260 = getelementptr i32, i32* %a0, i32 52 + %v261 = load i32, i32* %v260, align 4 + %v262 = getelementptr half, half* %a1, i32 %v261 + %v263 = load half, half* %v262, align 4 + %v264 = insertelement <64 x half> %v259, half %v263, i32 52 + %v265 = getelementptr i32, i32* %a0, i32 53 + %v266 = load i32, i32* %v265, align 4 + %v267 = getelementptr half, half* %a1, i32 %v266 + %v268 = load half, half* %v267, align 4 + %v269 = insertelement <64 x half> %v264, half %v268, i32 53 + %v270 = getelementptr i32, i32* %a0, i32 54 + %v271 = load i32, i32* %v270, align 4 + %v272 = getelementptr half, half* %a1, i32 %v271 + %v273 = load half, half* %v272, align 4 + %v274 = insertelement <64 x half> %v269, half %v273, i32 54 + %v275 = getelementptr i32, i32* %a0, i32 55 + %v276 = load i32, i32* %v275, align 4 + %v277 = getelementptr half, half* %a1, i32 %v276 + %v278 = load half, half* %v277, align 4 + %v279 = insertelement <64 x half> %v274, half %v278, i32 55 + %v280 = getelementptr i32, i32* %a0, i32 56 + %v281 = load i32, i32* %v280, align 4 + %v282 = getelementptr half, half* %a1, i32 %v281 + %v283 = load half, half* %v282, align 4 + %v284 = insertelement <64 x half> %v279, half %v283, i32 56 + %v285 = getelementptr i32, i32* %a0, i32 57 + %v286 = load i32, i32* %v285, align 4 + %v287 = getelementptr half, half* %a1, i32 %v286 + %v288 = load half, half* %v287, align 4 + %v289 = insertelement <64 x half> %v284, half %v288, i32 57 + %v290 = getelementptr i32, i32* %a0, i32 58 + %v291 = load i32, i32* %v290, align 4 + %v292 = getelementptr half, half* %a1, i32 %v291 + %v293 = load half, half* %v292, align 4 + %v294 = insertelement <64 x half> %v289, half %v293, i32 58 + %v295 = getelementptr i32, i32* %a0, i32 59 + %v296 = load i32, i32* %v295, align 4 + %v297 = getelementptr half, half* %a1, i32 %v296 + %v298 = load half, half* %v297, align 4 + %v299 = insertelement <64 x half> %v294, half %v298, i32 59 + %v300 = getelementptr i32, i32* %a0, i32 60 + %v301 = load i32, i32* %v300, align 4 + %v302 = getelementptr half, half* %a1, i32 %v301 + %v303 = load half, half* %v302, align 4 + %v304 = insertelement <64 x half> %v299, half %v303, i32 60 + %v305 = getelementptr i32, i32* %a0, i32 61 + %v306 = load i32, i32* %v305, align 4 + %v307 = getelementptr half, half* %a1, i32 %v306 + %v308 = load half, half* %v307, align 4 + %v309 = insertelement <64 x half> %v304, half %v308, i32 61 + %v310 = getelementptr i32, i32* %a0, i32 62 + %v311 = load i32, i32* %v310, align 4 + %v312 = getelementptr half, half* %a1, i32 %v311 + %v313 = load half, half* %v312, align 4 + %v314 = insertelement <64 x half> %v309, half %v313, i32 62 + %v315 = getelementptr i32, i32* %a0, i32 63 + %v316 = load i32, i32* %v315, align 4 + %v317 = getelementptr half, half* %a1, i32 %v316 + %v318 = load half, half* %v317, align 4 + %v319 = insertelement <64 x half> %v314, half %v318, i32 63 + ret <64 x half> %v319 +} + +attributes #0 = { nounwind "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" } From ba51d26ec4519f5b31de3acf643264504ea7bc7c Mon Sep 17 00:00:00 2001 From: modimo Date: Tue, 28 Dec 2021 14:48:15 -0800 Subject: [PATCH 149/992] [CodeView] Clamp Frontend version D43002 introduced a test debug-info-objname.cpp that outputted the current compiler version into CodeView. Internally we appended a date to the patch version and overflowed the 16-bits allocated to that space. This change clamps the Frontend version outputted values to 16-bits like rGd1185fc081ead71a8bf239ff1814f5ff73084c15 did for the Backend version. Testing: ninja check-all newly added tests correctly clamps and no longer asserts when trying to output the field Reviewed By: aganea Differential Revision: https://reviews.llvm.org/D116243 --- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 4 +- llvm/test/MC/COFF/cv-compiler-info-clamp.ll | 61 +++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 llvm/test/MC/COFF/cv-compiler-info-clamp.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index d621108408f0..ed74d2b303ad 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -857,8 +857,10 @@ void CodeViewDebug::emitCompilerInformation() { StringRef CompilerVersion = CU->getProducer(); Version FrontVer = parseVersion(CompilerVersion); OS.AddComment("Frontend version"); - for (int N : FrontVer.Part) + for (int N : FrontVer.Part) { + N = std::min(N, std::numeric_limits::max()); OS.emitInt16(N); + } // Some Microsoft tools, like Binscope, expect a backend version number of at // least 8.something, so we'll coerce the LLVM version into a form that diff --git a/llvm/test/MC/COFF/cv-compiler-info-clamp.ll b/llvm/test/MC/COFF/cv-compiler-info-clamp.ll new file mode 100644 index 000000000000..240d4cce5751 --- /dev/null +++ b/llvm/test/MC/COFF/cv-compiler-info-clamp.ll @@ -0,0 +1,61 @@ +; Check that a large version number (4.0.20211223) is clamped to proper size +; RUN: llc -mtriple i686-pc-windows-msvc < %s | FileCheck %s --check-prefixes=CHECK,STDOUT +; RUN: llc -mtriple i686-pc-windows-msvc < %s -o %t +; RUN: FileCheck %s --input-file=%t --check-prefixes=CHECK,FILE + +; RUN: llvm-mc %t -triple=i686-pc-win32 -filetype=obj -o %t.obj +; RUN: llvm-pdbutil dump -il -symbols %t.obj | FileCheck %s --check-prefixes=CODEVIEW + +; ModuleID = 'D:\src\scopes\foo.cpp' +source_filename = "D:\5Csrc\5Cscopes\5Cfoo.cpp" +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i686-pc-windows-msvc19.0.23918" + +; Function Attrs: nounwind sspstrong +define i32 @"\01?foo@@YAHXZ"() #0 !dbg !10 { +entry: + ret i32 42, !dbg !14 +} + +attributes #0 = { nounwind sspstrong "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 4.0.20211223 ", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +; One .debug$S section should contain an S_COMPILE3 record that identifies the +; source language and the version of the compiler based on the DICompileUnit. +; CHECK: .section .debug$S,"dr" +; CHECK: .short 4353 # Record kind: S_OBJNAME +; CHECK-NEXT: .long 0 # Signature +; STDOUT-NEXT: .byte 0 # Object name +; FILE-NEXT: .asciz "{{.*}}{{\\\\|/}}cv-compiler-info-clamp.ll.tmp" # Object name +; CHECK: .short 4412 # Record kind: S_COMPILE3 +; CHECK-NEXT: .long 1 # Flags and language +; CHECK-NEXT: .short 7 # CPUType +; CHECK-NEXT: .short 4 # Frontend version +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 65535 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short [[BACKEND_VERSION:[0-9]+]] # Backend version +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .asciz "clang version 4.0.20211223 " # Null-terminated compiler version string +; CHECK-NOT: .short 4412 # Record kind: S_COMPILE3 +!1 = !DIFile(filename: "D:\5Csrc\5Cscopes\5Cfoo.cpp", directory: "D:\5Csrc\5Cscopes\5Cclang") +!2 = !{} +!7 = !{i32 2, !"CodeView", i32 1} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{!"clang version 4.0.20211223 "} +!10 = distinct !DISubprogram(name: "foo", linkageName: "\01?foo@@YAHXZ", scope: !1, file: !1, line: 1, type: !11, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) +!11 = !DISubroutineType(types: !12) +!12 = !{!13} +!13 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!14 = !DILocation(line: 2, scope: !10) + +; CODEVIEW: S_COMPILE3 [size = 56] +; CODEVIEW-NEXT: machine = intel pentium 3, Ver = clang version 4.0.20211223 , language = c++ +; Backend version is based off of compiler version building this which is variable +; CODEVIEW-NEXT: frontend = 4.0.65535.0, backend = From 319181f7671868be6cd4865e9bcc63f6ba2ddb06 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 28 Dec 2021 19:01:01 -0500 Subject: [PATCH 150/992] [lld-macho] Fix alignment of TLV data sections References from thread-local variable sections are treated as offsets relative to the start of the thread-local data memory area, which is initialized via copying all the TLV data sections (which are all contiguous). If later data sections require a greater alignment than earlier ones, the offsets of data within those sections won't be guaranteed to aligned unless we normalize alignments. We therefore use the largest alignment for all TLV data sections. Reviewed By: #lld-macho, int3 Differential Revision: https://reviews.llvm.org/D116263 --- lld/MachO/Writer.cpp | 19 +++++++++++++++++-- lld/test/MachO/tlv.s | 30 ++++++++++++++++++++++-------- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index 8903f0189ef9..3c7bea335c66 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -908,13 +908,28 @@ static void sortSegmentsAndSections() { uint32_t sectionIndex = 0; for (OutputSegment *seg : outputSegments) { seg->sortOutputSections(); + // References from thread-local variable sections are treated as offsets + // relative to the start of the thread-local data memory area, which + // is initialized via copying all the TLV data sections (which are all + // contiguous). If later data sections require a greater alignment than + // earlier ones, the offsets of data within those sections won't be + // guaranteed to aligned unless we normalize alignments. We therefore use + // the largest alignment for all TLV data sections. + uint32_t tlvAlign = 0; + for (const OutputSection *osec : seg->getSections()) + if (isThreadLocalData(osec->flags) && osec->align > tlvAlign) + tlvAlign = osec->align; + for (OutputSection *osec : seg->getSections()) { // Now that the output sections are sorted, assign the final // output section indices. if (!osec->isHidden()) osec->index = ++sectionIndex; - if (!firstTLVDataSection && isThreadLocalData(osec->flags)) - firstTLVDataSection = osec; + if (isThreadLocalData(osec->flags)) { + if (!firstTLVDataSection) + firstTLVDataSection = osec; + osec->align = tlvAlign; + } if (!isecPriorities.empty()) { if (auto *merged = dyn_cast(osec)) { diff --git a/lld/test/MachO/tlv.s b/lld/test/MachO/tlv.s index f188bf7279a8..e71fe76980e2 100644 --- a/lld/test/MachO/tlv.s +++ b/lld/test/MachO/tlv.s @@ -24,12 +24,12 @@ # RUN: llvm-objdump -d --bind --rebase %t/regular-and-tbss | FileCheck %s --check-prefixes=REG,TBSS,LINKEDIT # RUN: llvm-objdump --macho --section=__DATA,__thread_vars %t/regular-and-tbss | \ # RUN: FileCheck %s --check-prefix=REG-TBSS-TLVP -# RUN: llvm-objdump --section-headers %t/regular-and-tbss | FileCheck %s --check-prefix=SECTION-ORDER +# RUN: llvm-objdump --section-headers %t/regular-and-tbss | FileCheck %s --check-prefix=SECTIONS ## Check that we always put __thread_bss immediately after __thread_data, ## regardless of the order of the input files. # RUN: %lld -lSystem %t/tbss.o %t/regular.o -o %t/regular-and-tbss -# RUN: llvm-objdump --section-headers %t/regular-and-tbss | FileCheck %s --check-prefix=SECTION-ORDER +# RUN: llvm-objdump --section-headers %t/regular-and-tbss | FileCheck %s --check-prefix=SECTIONS # HEADER: MH_HAS_TLV_DESCRIPTORS @@ -41,6 +41,7 @@ # TBSS: <_f>: # TBSS-NEXT: leaq {{.*}}(%rip), %rax ## {{.*}} <_baz> # TBSS-NEXT: leaq {{.*}}(%rip), %rax ## {{.*}} <_qux> +# TBSS-NEXT: leaq {{.*}}(%rip), %rax ## {{.*}} <_hoge> # TBSS-NEXT: retq # REG-TLVP: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 @@ -53,10 +54,12 @@ # REG-TBSS-TLVP: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 # REG-TBSS-TLVP-NEXT: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 -# REG-TBSS-TLVP-NEXT: 00 00 00 00 00 00 00 00 08 00 00 00 00 00 00 00 +# REG-TBSS-TLVP-NEXT: 00 00 00 00 00 00 00 00 10 00 00 00 00 00 00 00 # REG-TBSS-TLVP-NEXT: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 -# REG-TBSS-TLVP-NEXT: 10 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 -# REG-TBSS-TLVP-NEXT: 00 00 00 00 00 00 00 00 18 00 00 00 00 00 00 00 +# REG-TBSS-TLVP-NEXT: 20 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +# REG-TBSS-TLVP-NEXT: 00 00 00 00 00 00 00 00 28 00 00 00 00 00 00 00 +# REG-TBSS-TLVP-NEXT: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +# REG-TBSS-TLVP-NEXT: 30 00 00 00 00 00 00 00 ## Make sure we don't emit rebase opcodes for relocations in __thread_vars. # LINKEDIT: Rebase table: @@ -66,9 +69,14 @@ # LINKEDIT: __DATA __thread_vars 0x{{[0-9a-f]*}} pointer 0 libSystem __tlv_bootstrap # LINKEDIT: __DATA __thread_vars 0x{{[0-9a-f]*}} pointer 0 libSystem __tlv_bootstrap -# SECTION-ORDER: __thread_data -# SECTION-ORDER: more_thread_data -# SECTION-ORDER-NEXT: __thread_bss +## Make sure we have an odd number of tlv vars, and that the __thread_vars +## section starts 16-bytes aligned. This is the setup required for __thread_data +## not to be automatically 16-bytes aligned, ensuring the linker does its +## expected job of aligning _hoge$tlv$init. +# SECTIONS: __thread_vars {{[0-9]+}}8 {{[0-9]+}}0 +# SECTIONS: __thread_data +# SECTIONS: more_thread_data +# SECTIONS-NEXT: __thread_bss #--- regular.s .globl _main @@ -102,10 +110,12 @@ _bar: _f: mov _baz@TLVP(%rip), %rax mov _qux@TLVP(%rip), %rax + mov _hoge@TLVP(%rip), %rax ret .tbss _baz$tlv$init, 8, 3 .tbss _qux$tlv$init, 8, 3 +.tbss _hoge$tlv$init, 16, 4 .section __DATA,__thread_vars,thread_local_variables _baz: @@ -116,3 +126,7 @@ _qux: .quad __tlv_bootstrap .quad 0 .quad _qux$tlv$init +_hoge: + .quad __tlv_bootstrap + .quad 0 + .quad _hoge$tlv$init From de92a13fec72a0aa80a466e7f228f5af435cebc9 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 28 Dec 2021 16:40:51 -0800 Subject: [PATCH 151/992] [ELF] --gc-sections: Work around SHT_PROGBITS .init_array.N for Rust See https://github.com/rust-lang/rust/issues/92181 --- lld/ELF/MarkLive.cpp | 9 +++++---- lld/test/ELF/gc-sections.s | 4 ++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index b63f2beb9dcb..d2622e95e1e8 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -177,11 +177,12 @@ static bool isReserved(InputSectionBase *sec) { // SHT_NOTE sections in a group are subject to garbage collection. return !sec->nextInSectionGroup; default: - // Support SHT_PROGBITS .init_array for a while - // (https://golang.org/issue/50295). + // Support SHT_PROGBITS .init_array (https://golang.org/issue/50295) and + // .init_array.N (https://github.com/rust-lang/rust/issues/92181) for a + // while. StringRef s = sec->name; - return s == ".init" || s == ".fini" || s == ".init_array" || s == ".jcr" || - s.startswith(".ctors") || s.startswith(".dtors"); + return s == ".init" || s == ".fini" || s.startswith(".init_array") || + s == ".jcr" || s.startswith(".ctors") || s.startswith(".dtors"); } } diff --git a/lld/test/ELF/gc-sections.s b/lld/test/ELF/gc-sections.s index 7896c03b0fc5..2094a54dc2bf 100644 --- a/lld/test/ELF/gc-sections.s +++ b/lld/test/ELF/gc-sections.s @@ -157,6 +157,10 @@ h: .section .init_array,"aw",@progbits .quad 0 +# Work around https://github.com/rust-lang/rust/issues/92181 +.section .init_array.00001,"aw",@progbits + .quad 0 + .section .preinit_array,"aw",@preinit_array .quad 0 From 4ecf15b789f6043cdd2cfc50196d9f0c4d758f8a Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Tue, 28 Dec 2021 17:31:36 -0800 Subject: [PATCH 152/992] [llvm-profdata] Make -debug-info visible Add the option comment in .rst. Reviewed By: ellis Differential Revision: https://reviews.llvm.org/D116348 --- llvm/docs/CommandGuide/llvm-profdata.rst | 7 +++++++ llvm/tools/llvm-profdata/llvm-profdata.cpp | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst index 7c99e14aee69..7c3db6fa5418 100644 --- a/llvm/docs/CommandGuide/llvm-profdata.rst +++ b/llvm/docs/CommandGuide/llvm-profdata.rst @@ -185,6 +185,13 @@ OPTIONS inlined by PGO early inliner and it will not be adjusted based on sample profile. +.. option:: -debug-info=path + + Specify the executable or `.dSYM` that contains debug info for the raw profile. + When `-debug-info-correlate` was used for instrumentation, use this option + to correlate the raw profile. + + EXAMPLES ^^^^^^^^ Basic Usage diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 285b41f57147..6c12750a9ddf 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -968,7 +968,7 @@ static int merge_main(int argc, const char *argv[]) { "gen-cs-nested-profile", cl::Hidden, cl::init(false), cl::desc("Generate nested function profiles for CSSPGO")); cl::opt DebugInfoFilename( - "debug-info", cl::init(""), cl::Hidden, + "debug-info", cl::init(""), cl::desc("Use the provided debug info to correlate the raw profile.")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); From dd2fbbbb2d277a5dd497edbd01b1fcd469e65f53 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 28 Dec 2021 18:03:28 -0800 Subject: [PATCH 153/992] [llvm-profdata][docs] Use `` instead of ` --- llvm/docs/CommandGuide/llvm-profdata.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst index 7c3db6fa5418..da5285b6ca4f 100644 --- a/llvm/docs/CommandGuide/llvm-profdata.rst +++ b/llvm/docs/CommandGuide/llvm-profdata.rst @@ -187,8 +187,8 @@ OPTIONS .. option:: -debug-info=path - Specify the executable or `.dSYM` that contains debug info for the raw profile. - When `-debug-info-correlate` was used for instrumentation, use this option + Specify the executable or ``.dSYM`` that contains debug info for the raw profile. + When ``-debug-info-correlate`` was used for instrumentation, use this option to correlate the raw profile. @@ -204,7 +204,7 @@ Merge three profiles: Weighted Input ++++++++++++++ -The input file `foo.profdata` is especially important, multiply its counts by 10: +The input file ``foo.profdata`` is especially important, multiply its counts by 10: :: From f7b096d754d6424f825d7e08af04b7ade985c77b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 28 Dec 2021 18:28:23 -0800 Subject: [PATCH 154/992] [RISCV] Add more div by constant test cases. Some constants require more instructions than others. This adds additional test for each variation. UDIV has 2 variations, SDIV has 4 variations. Some of these sequence may have gotten worse on RV32 when we started doing the div by constant optimization before type legalization. We materialized a smaller constant, but we require more instructions to emulate 8 or 16 bit right shifts. This was hidden by the lack of test coverage. I've also added Zba and Zbb test cases to show the affect of sext.b, sext.h, zext.h, and zext.w on some of the shifts. In some cases we end up generating more code after the multiply because we use a zext.h+srli and sext.h+srai where without Zbb we share a slli between a srli and srai. --- llvm/test/CodeGen/RISCV/div-by-constant.ll | 1015 ++++++++++++++++++++ 1 file changed, 1015 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/div-by-constant.ll diff --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll new file mode 100644 index 000000000000..5abae8fe3298 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll @@ -0,0 +1,1015 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32,RV32IM %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-zba,+experimental-zbb \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32,RV32IMZB %s +; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64,RV64IM %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-zba,+experimental-zbb \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64,RV64IMZB %s + +; Test that there is a single shift after the mul and no addition. +define i32 @udiv_constant_no_add(i32 %a) nounwind { +; RV32-LABEL: udiv_constant_no_add: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 838861 +; RV32-NEXT: addi a1, a1, -819 +; RV32-NEXT: mulhu a0, a0, a1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: ret +; +; RV64IM-LABEL: udiv_constant_no_add: +; RV64IM: # %bb.0: +; RV64IM-NEXT: slli a0, a0, 32 +; RV64IM-NEXT: lui a1, 838861 +; RV64IM-NEXT: addiw a1, a1, -819 +; RV64IM-NEXT: slli a1, a1, 32 +; RV64IM-NEXT: mulhu a0, a0, a1 +; RV64IM-NEXT: srli a0, a0, 34 +; RV64IM-NEXT: ret +; +; RV64IMZB-LABEL: udiv_constant_no_add: +; RV64IMZB: # %bb.0: +; RV64IMZB-NEXT: zext.w a0, a0 +; RV64IMZB-NEXT: lui a1, 838861 +; RV64IMZB-NEXT: addiw a1, a1, -819 +; RV64IMZB-NEXT: zext.w a1, a1 +; RV64IMZB-NEXT: mul a0, a0, a1 +; RV64IMZB-NEXT: srli a0, a0, 34 +; RV64IMZB-NEXT: ret + %1 = udiv i32 %a, 5 + ret i32 %1 +} + +; This constant requires a sub, shrli, add sequence after the mul. +define i32 @udiv_constant_add(i32 %a) nounwind { +; RV32-LABEL: udiv_constant_add: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 149797 +; RV32-NEXT: addi a1, a1, -1755 +; RV32-NEXT: mulhu a1, a0, a1 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: srli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: ret +; +; RV64IM-LABEL: udiv_constant_add: +; RV64IM: # %bb.0: +; RV64IM-NEXT: slli a1, a0, 32 +; RV64IM-NEXT: lui a2, 149797 +; RV64IM-NEXT: addiw a2, a2, -1755 +; RV64IM-NEXT: slli a2, a2, 32 +; RV64IM-NEXT: mulhu a1, a1, a2 +; RV64IM-NEXT: srli a1, a1, 32 +; RV64IM-NEXT: subw a0, a0, a1 +; RV64IM-NEXT: srliw a0, a0, 1 +; RV64IM-NEXT: add a0, a0, a1 +; RV64IM-NEXT: srli a0, a0, 2 +; RV64IM-NEXT: ret +; +; RV64IMZB-LABEL: udiv_constant_add: +; RV64IMZB: # %bb.0: +; RV64IMZB-NEXT: zext.w a1, a0 +; RV64IMZB-NEXT: lui a2, 149797 +; RV64IMZB-NEXT: addiw a2, a2, -1755 +; RV64IMZB-NEXT: mul a1, a1, a2 +; RV64IMZB-NEXT: srli a1, a1, 32 +; RV64IMZB-NEXT: subw a0, a0, a1 +; RV64IMZB-NEXT: srliw a0, a0, 1 +; RV64IMZB-NEXT: add a0, a0, a1 +; RV64IMZB-NEXT: srli a0, a0, 2 +; RV64IMZB-NEXT: ret + %1 = udiv i32 %a, 7 + ret i32 %1 +} + +define i64 @udiv64_constant_no_add(i64 %a) nounwind { +; RV32-LABEL: udiv64_constant_no_add: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 5 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __udivdi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: udiv64_constant_no_add: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 1035469 +; RV64-NEXT: addiw a1, a1, -819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -819 +; RV64-NEXT: mulhu a0, a0, a1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: ret + %1 = udiv i64 %a, 5 + ret i64 %1 +} + +define i64 @udiv64_constant_add(i64 %a) nounwind { +; RV32-LABEL: udiv64_constant_add: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 7 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __udivdi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: udiv64_constant_add: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 4681 +; RV64-NEXT: addiw a1, a1, 585 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 585 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 585 +; RV64-NEXT: slli a1, a1, 13 +; RV64-NEXT: addi a1, a1, 1171 +; RV64-NEXT: mulhu a1, a0, a1 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: srli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: ret + %1 = udiv i64 %a, 7 + ret i64 %1 +} + +define i8 @udiv8_constant_no_add(i8 %a) nounwind { +; RV32-LABEL: udiv8_constant_no_add: +; RV32: # %bb.0: +; RV32-NEXT: andi a0, a0, 255 +; RV32-NEXT: li a1, 205 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: srli a0, a0, 10 +; RV32-NEXT: ret +; +; RV64-LABEL: udiv8_constant_no_add: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 255 +; RV64-NEXT: li a1, 205 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: srli a0, a0, 10 +; RV64-NEXT: ret + %1 = udiv i8 %a, 5 + ret i8 %1 +} + +define i8 @udiv8_constant_add(i8 %a) nounwind { +; RV32IM-LABEL: udiv8_constant_add: +; RV32IM: # %bb.0: +; RV32IM-NEXT: andi a1, a0, 255 +; RV32IM-NEXT: li a2, 37 +; RV32IM-NEXT: mul a1, a1, a2 +; RV32IM-NEXT: srli a1, a1, 8 +; RV32IM-NEXT: sub a0, a0, a1 +; RV32IM-NEXT: andi a0, a0, 254 +; RV32IM-NEXT: srli a0, a0, 1 +; RV32IM-NEXT: add a0, a0, a1 +; RV32IM-NEXT: srli a0, a0, 2 +; RV32IM-NEXT: ret +; +; RV32IMZB-LABEL: udiv8_constant_add: +; RV32IMZB: # %bb.0: +; RV32IMZB-NEXT: andi a1, a0, 255 +; RV32IMZB-NEXT: sh3add a2, a1, a1 +; RV32IMZB-NEXT: sh2add a1, a2, a1 +; RV32IMZB-NEXT: srli a1, a1, 8 +; RV32IMZB-NEXT: sub a0, a0, a1 +; RV32IMZB-NEXT: andi a0, a0, 254 +; RV32IMZB-NEXT: srli a0, a0, 1 +; RV32IMZB-NEXT: add a0, a0, a1 +; RV32IMZB-NEXT: srli a0, a0, 2 +; RV32IMZB-NEXT: ret +; +; RV64IM-LABEL: udiv8_constant_add: +; RV64IM: # %bb.0: +; RV64IM-NEXT: andi a1, a0, 255 +; RV64IM-NEXT: li a2, 37 +; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: srli a1, a1, 8 +; RV64IM-NEXT: sub a0, a0, a1 +; RV64IM-NEXT: andi a0, a0, 254 +; RV64IM-NEXT: srli a0, a0, 1 +; RV64IM-NEXT: add a0, a0, a1 +; RV64IM-NEXT: srli a0, a0, 2 +; RV64IM-NEXT: ret +; +; RV64IMZB-LABEL: udiv8_constant_add: +; RV64IMZB: # %bb.0: +; RV64IMZB-NEXT: andi a1, a0, 255 +; RV64IMZB-NEXT: sh3add a2, a1, a1 +; RV64IMZB-NEXT: sh2add a1, a2, a1 +; RV64IMZB-NEXT: srli a1, a1, 8 +; RV64IMZB-NEXT: sub a0, a0, a1 +; RV64IMZB-NEXT: andi a0, a0, 254 +; RV64IMZB-NEXT: srli a0, a0, 1 +; RV64IMZB-NEXT: add a0, a0, a1 +; RV64IMZB-NEXT: srli a0, a0, 2 +; RV64IMZB-NEXT: ret + %1 = udiv i8 %a, 7 + ret i8 %1 +} + +define i16 @udiv16_constant_no_add(i16 %a) nounwind { +; RV32IM-LABEL: udiv16_constant_no_add: +; RV32IM: # %bb.0: +; RV32IM-NEXT: slli a0, a0, 16 +; RV32IM-NEXT: lui a1, 838864 +; RV32IM-NEXT: mulhu a0, a0, a1 +; RV32IM-NEXT: srli a0, a0, 18 +; RV32IM-NEXT: ret +; +; RV32IMZB-LABEL: udiv16_constant_no_add: +; RV32IMZB: # %bb.0: +; RV32IMZB-NEXT: zext.h a0, a0 +; RV32IMZB-NEXT: lui a1, 13 +; RV32IMZB-NEXT: addi a1, a1, -819 +; RV32IMZB-NEXT: mul a0, a0, a1 +; RV32IMZB-NEXT: srli a0, a0, 18 +; RV32IMZB-NEXT: ret +; +; RV64IM-LABEL: udiv16_constant_no_add: +; RV64IM: # %bb.0: +; RV64IM-NEXT: lui a1, 52429 +; RV64IM-NEXT: slli a1, a1, 4 +; RV64IM-NEXT: slli a0, a0, 48 +; RV64IM-NEXT: mulhu a0, a0, a1 +; RV64IM-NEXT: srli a0, a0, 18 +; RV64IM-NEXT: ret +; +; RV64IMZB-LABEL: udiv16_constant_no_add: +; RV64IMZB: # %bb.0: +; RV64IMZB-NEXT: zext.h a0, a0 +; RV64IMZB-NEXT: lui a1, 13 +; RV64IMZB-NEXT: addiw a1, a1, -819 +; RV64IMZB-NEXT: mul a0, a0, a1 +; RV64IMZB-NEXT: srli a0, a0, 18 +; RV64IMZB-NEXT: ret + %1 = udiv i16 %a, 5 + ret i16 %1 +} + +define i16 @udiv16_constant_add(i16 %a) nounwind { +; RV32IM-LABEL: udiv16_constant_add: +; RV32IM: # %bb.0: +; RV32IM-NEXT: slli a1, a0, 16 +; RV32IM-NEXT: lui a2, 149808 +; RV32IM-NEXT: mulhu a1, a1, a2 +; RV32IM-NEXT: srli a1, a1, 16 +; RV32IM-NEXT: sub a0, a0, a1 +; RV32IM-NEXT: slli a0, a0, 16 +; RV32IM-NEXT: srli a0, a0, 17 +; RV32IM-NEXT: add a0, a0, a1 +; RV32IM-NEXT: srli a0, a0, 2 +; RV32IM-NEXT: ret +; +; RV32IMZB-LABEL: udiv16_constant_add: +; RV32IMZB: # %bb.0: +; RV32IMZB-NEXT: zext.h a1, a0 +; RV32IMZB-NEXT: lui a2, 2 +; RV32IMZB-NEXT: addi a2, a2, 1171 +; RV32IMZB-NEXT: mul a1, a1, a2 +; RV32IMZB-NEXT: srli a1, a1, 16 +; RV32IMZB-NEXT: sub a0, a0, a1 +; RV32IMZB-NEXT: zext.h a0, a0 +; RV32IMZB-NEXT: srli a0, a0, 1 +; RV32IMZB-NEXT: add a0, a0, a1 +; RV32IMZB-NEXT: srli a0, a0, 2 +; RV32IMZB-NEXT: ret +; +; RV64IM-LABEL: udiv16_constant_add: +; RV64IM: # %bb.0: +; RV64IM-NEXT: slli a1, a0, 48 +; RV64IM-NEXT: lui a2, 149808 +; RV64IM-NEXT: mulhu a1, a1, a2 +; RV64IM-NEXT: srli a1, a1, 16 +; RV64IM-NEXT: subw a0, a0, a1 +; RV64IM-NEXT: slli a0, a0, 48 +; RV64IM-NEXT: srli a0, a0, 49 +; RV64IM-NEXT: add a0, a0, a1 +; RV64IM-NEXT: srli a0, a0, 2 +; RV64IM-NEXT: ret +; +; RV64IMZB-LABEL: udiv16_constant_add: +; RV64IMZB: # %bb.0: +; RV64IMZB-NEXT: zext.h a1, a0 +; RV64IMZB-NEXT: lui a2, 2 +; RV64IMZB-NEXT: addiw a2, a2, 1171 +; RV64IMZB-NEXT: mul a1, a1, a2 +; RV64IMZB-NEXT: srli a1, a1, 16 +; RV64IMZB-NEXT: sub a0, a0, a1 +; RV64IMZB-NEXT: zext.h a0, a0 +; RV64IMZB-NEXT: srli a0, a0, 1 +; RV64IMZB-NEXT: add a0, a0, a1 +; RV64IMZB-NEXT: srli a0, a0, 2 +; RV64IMZB-NEXT: ret + %1 = udiv i16 %a, 7 + ret i16 %1 +} + +; Test the simplest case a srli and an add after the mul. No srai. +define i32 @sdiv_constant_no_srai(i32 %a) nounwind { +; RV32-LABEL: sdiv_constant_no_srai: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1366 +; RV32-NEXT: mulh a0, a0, a1 +; RV32-NEXT: srli a1, a0, 31 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: sdiv_constant_no_srai: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: addiw a1, a1, 1366 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: srli a1, a0, 63 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: addw a0, a0, a1 +; RV64-NEXT: ret + %1 = sdiv i32 %a, 3 + ret i32 %1 +} + +; This constant requires an srai between the mul and the add. +define i32 @sdiv_constant_srai(i32 %a) nounwind { +; RV32-LABEL: sdiv_constant_srai: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 419430 +; RV32-NEXT: addi a1, a1, 1639 +; RV32-NEXT: mulh a0, a0, a1 +; RV32-NEXT: srli a1, a0, 31 +; RV32-NEXT: srai a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: sdiv_constant_srai: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: lui a1, 419430 +; RV64-NEXT: addiw a1, a1, 1639 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: srli a1, a0, 63 +; RV64-NEXT: srai a0, a0, 33 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: ret + %1 = sdiv i32 %a, 5 + ret i32 %1 +} + +; This constant requires an add and an srai after the mul. +define i32 @sdiv_constant_add_srai(i32 %a) nounwind { +; RV32-LABEL: sdiv_constant_add_srai: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 599186 +; RV32-NEXT: addi a1, a1, 1171 +; RV32-NEXT: mulh a1, a0, a1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 31 +; RV32-NEXT: srai a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: sdiv_constant_add_srai: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a1, a0 +; RV64-NEXT: lui a2, 599186 +; RV64-NEXT: addiw a2, a2, 1171 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: srli a1, a1, 32 +; RV64-NEXT: addw a0, a1, a0 +; RV64-NEXT: srliw a1, a0, 31 +; RV64-NEXT: sraiw a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: ret + %1 = sdiv i32 %a, 7 + ret i32 %1 +} + +; This constant requires a sub and an srai after the mul. +define i32 @sdiv_constant_sub_srai(i32 %a) nounwind { +; RV32-LABEL: sdiv_constant_sub_srai: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 449390 +; RV32-NEXT: addi a1, a1, -1171 +; RV32-NEXT: mulh a1, a0, a1 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: srli a1, a0, 31 +; RV32-NEXT: srai a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: sdiv_constant_sub_srai: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a1, a0 +; RV64-NEXT: lui a2, 449390 +; RV64-NEXT: addiw a2, a2, -1171 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: srli a1, a1, 32 +; RV64-NEXT: subw a0, a1, a0 +; RV64-NEXT: srliw a1, a0, 31 +; RV64-NEXT: sraiw a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: ret + %1 = sdiv i32 %a, -7 + ret i32 %1 +} + +define i64 @sdiv64_constant_no_srai(i64 %a) nounwind { +; RV32-LABEL: sdiv64_constant_no_srai: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 3 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __divdi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: sdiv64_constant_no_srai: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 21845 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 1366 +; RV64-NEXT: mulh a0, a0, a1 +; RV64-NEXT: srli a1, a0, 63 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: ret + %1 = sdiv i64 %a, 3 + ret i64 %1 +} + +define i64 @sdiv64_constant_srai(i64 %a) nounwind { +; RV32-LABEL: sdiv64_constant_srai: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 5 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __divdi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: sdiv64_constant_srai: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 13 +; RV64-NEXT: addi a1, a1, 1639 +; RV64-NEXT: mulh a0, a0, a1 +; RV64-NEXT: srli a1, a0, 63 +; RV64-NEXT: srai a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: ret + %1 = sdiv i64 %a, 5 + ret i64 %1 +} + +define i64 @sdiv64_constant_add_srai(i64 %a) nounwind { +; RV32-LABEL: sdiv64_constant_add_srai: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 15 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __divdi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: sdiv64_constant_add_srai: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 1017993 +; RV64-NEXT: addiw a1, a1, -1911 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -1911 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -1911 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -1911 +; RV64-NEXT: mulh a1, a0, a1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 63 +; RV64-NEXT: srai a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: ret + %1 = sdiv i64 %a, 15 + ret i64 %1 +} + +define i64 @sdiv64_constant_sub_srai(i64 %a) nounwind { +; RV32-LABEL: sdiv64_constant_sub_srai: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, -3 +; RV32-NEXT: li a3, -1 +; RV32-NEXT: call __divdi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: sdiv64_constant_sub_srai: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 21845 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 1365 +; RV64-NEXT: mulh a1, a0, a1 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: srli a1, a0, 63 +; RV64-NEXT: srai a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: ret + %1 = sdiv i64 %a, -3 + ret i64 %1 +} + +define i8 @sdiv8_constant_no_srai(i8 %a) nounwind { +; RV32IM-LABEL: sdiv8_constant_no_srai: +; RV32IM: # %bb.0: +; RV32IM-NEXT: slli a0, a0, 24 +; RV32IM-NEXT: srai a0, a0, 24 +; RV32IM-NEXT: li a1, 86 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: srli a1, a0, 8 +; RV32IM-NEXT: srli a0, a0, 15 +; RV32IM-NEXT: andi a0, a0, 1 +; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: ret +; +; RV32IMZB-LABEL: sdiv8_constant_no_srai: +; RV32IMZB: # %bb.0: +; RV32IMZB-NEXT: sext.b a0, a0 +; RV32IMZB-NEXT: li a1, 86 +; RV32IMZB-NEXT: mul a0, a0, a1 +; RV32IMZB-NEXT: srli a1, a0, 8 +; RV32IMZB-NEXT: srli a0, a0, 15 +; RV32IMZB-NEXT: andi a0, a0, 1 +; RV32IMZB-NEXT: add a0, a1, a0 +; RV32IMZB-NEXT: ret +; +; RV64IM-LABEL: sdiv8_constant_no_srai: +; RV64IM: # %bb.0: +; RV64IM-NEXT: slli a0, a0, 56 +; RV64IM-NEXT: srai a0, a0, 56 +; RV64IM-NEXT: li a1, 86 +; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: srli a1, a0, 8 +; RV64IM-NEXT: srli a0, a0, 15 +; RV64IM-NEXT: andi a0, a0, 1 +; RV64IM-NEXT: add a0, a1, a0 +; RV64IM-NEXT: ret +; +; RV64IMZB-LABEL: sdiv8_constant_no_srai: +; RV64IMZB: # %bb.0: +; RV64IMZB-NEXT: sext.b a0, a0 +; RV64IMZB-NEXT: li a1, 86 +; RV64IMZB-NEXT: mul a0, a0, a1 +; RV64IMZB-NEXT: srli a1, a0, 8 +; RV64IMZB-NEXT: srli a0, a0, 15 +; RV64IMZB-NEXT: andi a0, a0, 1 +; RV64IMZB-NEXT: add a0, a1, a0 +; RV64IMZB-NEXT: ret + %1 = sdiv i8 %a, 3 + ret i8 %1 +} + +define i8 @sdiv8_constant_srai(i8 %a) nounwind { +; RV32IM-LABEL: sdiv8_constant_srai: +; RV32IM: # %bb.0: +; RV32IM-NEXT: slli a0, a0, 24 +; RV32IM-NEXT: srai a0, a0, 24 +; RV32IM-NEXT: li a1, 103 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: srai a1, a0, 9 +; RV32IM-NEXT: srli a0, a0, 15 +; RV32IM-NEXT: andi a0, a0, 1 +; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: ret +; +; RV32IMZB-LABEL: sdiv8_constant_srai: +; RV32IMZB: # %bb.0: +; RV32IMZB-NEXT: sext.b a0, a0 +; RV32IMZB-NEXT: li a1, 103 +; RV32IMZB-NEXT: mul a0, a0, a1 +; RV32IMZB-NEXT: srai a1, a0, 9 +; RV32IMZB-NEXT: srli a0, a0, 15 +; RV32IMZB-NEXT: andi a0, a0, 1 +; RV32IMZB-NEXT: add a0, a1, a0 +; RV32IMZB-NEXT: ret +; +; RV64IM-LABEL: sdiv8_constant_srai: +; RV64IM: # %bb.0: +; RV64IM-NEXT: slli a0, a0, 56 +; RV64IM-NEXT: srai a0, a0, 56 +; RV64IM-NEXT: li a1, 103 +; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: srai a1, a0, 9 +; RV64IM-NEXT: srli a0, a0, 15 +; RV64IM-NEXT: andi a0, a0, 1 +; RV64IM-NEXT: add a0, a1, a0 +; RV64IM-NEXT: ret +; +; RV64IMZB-LABEL: sdiv8_constant_srai: +; RV64IMZB: # %bb.0: +; RV64IMZB-NEXT: sext.b a0, a0 +; RV64IMZB-NEXT: li a1, 103 +; RV64IMZB-NEXT: mul a0, a0, a1 +; RV64IMZB-NEXT: srai a1, a0, 9 +; RV64IMZB-NEXT: srli a0, a0, 15 +; RV64IMZB-NEXT: andi a0, a0, 1 +; RV64IMZB-NEXT: add a0, a1, a0 +; RV64IMZB-NEXT: ret + %1 = sdiv i8 %a, 5 + ret i8 %1 +} + +; FIXME: Can shorten the code after the mul by using slli+srai/srli like the +; i16 version without Zbb. +define i8 @sdiv8_constant_add_srai(i8 %a) nounwind { +; RV32IM-LABEL: sdiv8_constant_add_srai: +; RV32IM: # %bb.0: +; RV32IM-NEXT: slli a1, a0, 24 +; RV32IM-NEXT: srai a1, a1, 24 +; RV32IM-NEXT: li a2, -109 +; RV32IM-NEXT: mul a1, a1, a2 +; RV32IM-NEXT: srli a1, a1, 8 +; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: andi a1, a0, 128 +; RV32IM-NEXT: srli a1, a1, 7 +; RV32IM-NEXT: slli a0, a0, 24 +; RV32IM-NEXT: srai a0, a0, 26 +; RV32IM-NEXT: add a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV32IMZB-LABEL: sdiv8_constant_add_srai: +; RV32IMZB: # %bb.0: +; RV32IMZB-NEXT: sext.b a1, a0 +; RV32IMZB-NEXT: li a2, -109 +; RV32IMZB-NEXT: mul a1, a1, a2 +; RV32IMZB-NEXT: srli a1, a1, 8 +; RV32IMZB-NEXT: add a0, a1, a0 +; RV32IMZB-NEXT: andi a1, a0, 128 +; RV32IMZB-NEXT: srli a1, a1, 7 +; RV32IMZB-NEXT: sext.b a0, a0 +; RV32IMZB-NEXT: srai a0, a0, 2 +; RV32IMZB-NEXT: add a0, a0, a1 +; RV32IMZB-NEXT: ret +; +; RV64IM-LABEL: sdiv8_constant_add_srai: +; RV64IM: # %bb.0: +; RV64IM-NEXT: slli a1, a0, 56 +; RV64IM-NEXT: srai a1, a1, 56 +; RV64IM-NEXT: li a2, -109 +; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: srli a1, a1, 8 +; RV64IM-NEXT: add a0, a1, a0 +; RV64IM-NEXT: andi a1, a0, 128 +; RV64IM-NEXT: srli a1, a1, 7 +; RV64IM-NEXT: slli a0, a0, 56 +; RV64IM-NEXT: srai a0, a0, 58 +; RV64IM-NEXT: add a0, a0, a1 +; RV64IM-NEXT: ret +; +; RV64IMZB-LABEL: sdiv8_constant_add_srai: +; RV64IMZB: # %bb.0: +; RV64IMZB-NEXT: sext.b a1, a0 +; RV64IMZB-NEXT: li a2, -109 +; RV64IMZB-NEXT: mul a1, a1, a2 +; RV64IMZB-NEXT: srli a1, a1, 8 +; RV64IMZB-NEXT: add a0, a1, a0 +; RV64IMZB-NEXT: andi a1, a0, 128 +; RV64IMZB-NEXT: srli a1, a1, 7 +; RV64IMZB-NEXT: sext.b a0, a0 +; RV64IMZB-NEXT: srai a0, a0, 2 +; RV64IMZB-NEXT: add a0, a0, a1 +; RV64IMZB-NEXT: ret + %1 = sdiv i8 %a, 7 + ret i8 %1 +} + +; FIXME: Can shorten the code after the mul by using slli+srai/srli like the +; i16 version without Zbb. +define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind { +; RV32IM-LABEL: sdiv8_constant_sub_srai: +; RV32IM: # %bb.0: +; RV32IM-NEXT: slli a1, a0, 24 +; RV32IM-NEXT: srai a1, a1, 24 +; RV32IM-NEXT: li a2, 109 +; RV32IM-NEXT: mul a1, a1, a2 +; RV32IM-NEXT: srli a1, a1, 8 +; RV32IM-NEXT: sub a0, a1, a0 +; RV32IM-NEXT: andi a1, a0, 128 +; RV32IM-NEXT: srli a1, a1, 7 +; RV32IM-NEXT: slli a0, a0, 24 +; RV32IM-NEXT: srai a0, a0, 26 +; RV32IM-NEXT: add a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV32IMZB-LABEL: sdiv8_constant_sub_srai: +; RV32IMZB: # %bb.0: +; RV32IMZB-NEXT: sext.b a1, a0 +; RV32IMZB-NEXT: li a2, 109 +; RV32IMZB-NEXT: mul a1, a1, a2 +; RV32IMZB-NEXT: srli a1, a1, 8 +; RV32IMZB-NEXT: sub a0, a1, a0 +; RV32IMZB-NEXT: andi a1, a0, 128 +; RV32IMZB-NEXT: srli a1, a1, 7 +; RV32IMZB-NEXT: sext.b a0, a0 +; RV32IMZB-NEXT: srai a0, a0, 2 +; RV32IMZB-NEXT: add a0, a0, a1 +; RV32IMZB-NEXT: ret +; +; RV64IM-LABEL: sdiv8_constant_sub_srai: +; RV64IM: # %bb.0: +; RV64IM-NEXT: slli a1, a0, 56 +; RV64IM-NEXT: srai a1, a1, 56 +; RV64IM-NEXT: li a2, 109 +; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: srli a1, a1, 8 +; RV64IM-NEXT: sub a0, a1, a0 +; RV64IM-NEXT: andi a1, a0, 128 +; RV64IM-NEXT: srli a1, a1, 7 +; RV64IM-NEXT: slli a0, a0, 56 +; RV64IM-NEXT: srai a0, a0, 58 +; RV64IM-NEXT: add a0, a0, a1 +; RV64IM-NEXT: ret +; +; RV64IMZB-LABEL: sdiv8_constant_sub_srai: +; RV64IMZB: # %bb.0: +; RV64IMZB-NEXT: sext.b a1, a0 +; RV64IMZB-NEXT: li a2, 109 +; RV64IMZB-NEXT: mul a1, a1, a2 +; RV64IMZB-NEXT: srli a1, a1, 8 +; RV64IMZB-NEXT: sub a0, a1, a0 +; RV64IMZB-NEXT: andi a1, a0, 128 +; RV64IMZB-NEXT: srli a1, a1, 7 +; RV64IMZB-NEXT: sext.b a0, a0 +; RV64IMZB-NEXT: srai a0, a0, 2 +; RV64IMZB-NEXT: add a0, a0, a1 +; RV64IMZB-NEXT: ret + %1 = sdiv i8 %a, -7 + ret i8 %1 +} + +define i16 @sdiv16_constant_no_srai(i16 %a) nounwind { +; RV32IM-LABEL: sdiv16_constant_no_srai: +; RV32IM: # %bb.0: +; RV32IM-NEXT: slli a0, a0, 16 +; RV32IM-NEXT: srai a0, a0, 16 +; RV32IM-NEXT: lui a1, 5 +; RV32IM-NEXT: addi a1, a1, 1366 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: srli a1, a0, 31 +; RV32IM-NEXT: srli a0, a0, 16 +; RV32IM-NEXT: add a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV32IMZB-LABEL: sdiv16_constant_no_srai: +; RV32IMZB: # %bb.0: +; RV32IMZB-NEXT: sext.h a0, a0 +; RV32IMZB-NEXT: lui a1, 5 +; RV32IMZB-NEXT: addi a1, a1, 1366 +; RV32IMZB-NEXT: mul a0, a0, a1 +; RV32IMZB-NEXT: srli a1, a0, 31 +; RV32IMZB-NEXT: srli a0, a0, 16 +; RV32IMZB-NEXT: add a0, a0, a1 +; RV32IMZB-NEXT: ret +; +; RV64IM-LABEL: sdiv16_constant_no_srai: +; RV64IM: # %bb.0: +; RV64IM-NEXT: slli a0, a0, 48 +; RV64IM-NEXT: srai a0, a0, 48 +; RV64IM-NEXT: lui a1, 5 +; RV64IM-NEXT: addiw a1, a1, 1366 +; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: srliw a1, a0, 31 +; RV64IM-NEXT: srli a0, a0, 16 +; RV64IM-NEXT: add a0, a0, a1 +; RV64IM-NEXT: ret +; +; RV64IMZB-LABEL: sdiv16_constant_no_srai: +; RV64IMZB: # %bb.0: +; RV64IMZB-NEXT: sext.h a0, a0 +; RV64IMZB-NEXT: lui a1, 5 +; RV64IMZB-NEXT: addiw a1, a1, 1366 +; RV64IMZB-NEXT: mul a0, a0, a1 +; RV64IMZB-NEXT: srliw a1, a0, 31 +; RV64IMZB-NEXT: srli a0, a0, 16 +; RV64IMZB-NEXT: add a0, a0, a1 +; RV64IMZB-NEXT: ret + %1 = sdiv i16 %a, 3 + ret i16 %1 +} + +define i16 @sdiv16_constant_srai(i16 %a) nounwind { +; RV32IM-LABEL: sdiv16_constant_srai: +; RV32IM: # %bb.0: +; RV32IM-NEXT: slli a0, a0, 16 +; RV32IM-NEXT: srai a0, a0, 16 +; RV32IM-NEXT: lui a1, 6 +; RV32IM-NEXT: addi a1, a1, 1639 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: srli a1, a0, 31 +; RV32IM-NEXT: srai a0, a0, 17 +; RV32IM-NEXT: add a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV32IMZB-LABEL: sdiv16_constant_srai: +; RV32IMZB: # %bb.0: +; RV32IMZB-NEXT: sext.h a0, a0 +; RV32IMZB-NEXT: lui a1, 6 +; RV32IMZB-NEXT: addi a1, a1, 1639 +; RV32IMZB-NEXT: mul a0, a0, a1 +; RV32IMZB-NEXT: srli a1, a0, 31 +; RV32IMZB-NEXT: srai a0, a0, 17 +; RV32IMZB-NEXT: add a0, a0, a1 +; RV32IMZB-NEXT: ret +; +; RV64IM-LABEL: sdiv16_constant_srai: +; RV64IM: # %bb.0: +; RV64IM-NEXT: slli a0, a0, 48 +; RV64IM-NEXT: srai a0, a0, 48 +; RV64IM-NEXT: lui a1, 6 +; RV64IM-NEXT: addiw a1, a1, 1639 +; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: srliw a1, a0, 31 +; RV64IM-NEXT: srai a0, a0, 17 +; RV64IM-NEXT: add a0, a0, a1 +; RV64IM-NEXT: ret +; +; RV64IMZB-LABEL: sdiv16_constant_srai: +; RV64IMZB: # %bb.0: +; RV64IMZB-NEXT: sext.h a0, a0 +; RV64IMZB-NEXT: lui a1, 6 +; RV64IMZB-NEXT: addiw a1, a1, 1639 +; RV64IMZB-NEXT: mul a0, a0, a1 +; RV64IMZB-NEXT: srliw a1, a0, 31 +; RV64IMZB-NEXT: srai a0, a0, 17 +; RV64IMZB-NEXT: add a0, a0, a1 +; RV64IMZB-NEXT: ret + %1 = sdiv i16 %a, 5 + ret i16 %1 +} + +; FIXME: The Zbb test code has 1 more instruction after the mul because we don't +; share a slli. +define i16 @sdiv16_constant_add_srai(i16 %a) nounwind { +; RV32IM-LABEL: sdiv16_constant_add_srai: +; RV32IM: # %bb.0: +; RV32IM-NEXT: slli a1, a0, 16 +; RV32IM-NEXT: srai a1, a1, 16 +; RV32IM-NEXT: lui a2, 1048569 +; RV32IM-NEXT: addi a2, a2, -1911 +; RV32IM-NEXT: mul a1, a1, a2 +; RV32IM-NEXT: srli a1, a1, 16 +; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: slli a0, a0, 16 +; RV32IM-NEXT: srli a1, a0, 31 +; RV32IM-NEXT: srai a0, a0, 19 +; RV32IM-NEXT: add a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV32IMZB-LABEL: sdiv16_constant_add_srai: +; RV32IMZB: # %bb.0: +; RV32IMZB-NEXT: sext.h a1, a0 +; RV32IMZB-NEXT: lui a2, 1048569 +; RV32IMZB-NEXT: addi a2, a2, -1911 +; RV32IMZB-NEXT: mul a1, a1, a2 +; RV32IMZB-NEXT: srli a1, a1, 16 +; RV32IMZB-NEXT: add a0, a1, a0 +; RV32IMZB-NEXT: zext.h a1, a0 +; RV32IMZB-NEXT: srli a1, a1, 15 +; RV32IMZB-NEXT: sext.h a0, a0 +; RV32IMZB-NEXT: srai a0, a0, 3 +; RV32IMZB-NEXT: add a0, a0, a1 +; RV32IMZB-NEXT: ret +; +; RV64IM-LABEL: sdiv16_constant_add_srai: +; RV64IM: # %bb.0: +; RV64IM-NEXT: slli a1, a0, 48 +; RV64IM-NEXT: srai a1, a1, 48 +; RV64IM-NEXT: lui a2, 1048569 +; RV64IM-NEXT: addiw a2, a2, -1911 +; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: srli a1, a1, 16 +; RV64IM-NEXT: addw a0, a1, a0 +; RV64IM-NEXT: slli a0, a0, 48 +; RV64IM-NEXT: srli a1, a0, 63 +; RV64IM-NEXT: srai a0, a0, 51 +; RV64IM-NEXT: add a0, a0, a1 +; RV64IM-NEXT: ret +; +; RV64IMZB-LABEL: sdiv16_constant_add_srai: +; RV64IMZB: # %bb.0: +; RV64IMZB-NEXT: sext.h a1, a0 +; RV64IMZB-NEXT: lui a2, 1048569 +; RV64IMZB-NEXT: addiw a2, a2, -1911 +; RV64IMZB-NEXT: mul a1, a1, a2 +; RV64IMZB-NEXT: srli a1, a1, 16 +; RV64IMZB-NEXT: add a0, a1, a0 +; RV64IMZB-NEXT: zext.h a1, a0 +; RV64IMZB-NEXT: srli a1, a1, 15 +; RV64IMZB-NEXT: sext.h a0, a0 +; RV64IMZB-NEXT: srai a0, a0, 3 +; RV64IMZB-NEXT: add a0, a0, a1 +; RV64IMZB-NEXT: ret + %1 = sdiv i16 %a, 15 + ret i16 %1 +} + +; FIXME: The Zbb test code has 1 more instruction after the mul because we don't +; share a slli. +define i16 @sdiv16_constant_sub_srai(i16 %a) nounwind { +; RV32IM-LABEL: sdiv16_constant_sub_srai: +; RV32IM: # %bb.0: +; RV32IM-NEXT: slli a1, a0, 16 +; RV32IM-NEXT: srai a1, a1, 16 +; RV32IM-NEXT: lui a2, 7 +; RV32IM-NEXT: addi a2, a2, 1911 +; RV32IM-NEXT: mul a1, a1, a2 +; RV32IM-NEXT: srli a1, a1, 16 +; RV32IM-NEXT: sub a0, a1, a0 +; RV32IM-NEXT: slli a0, a0, 16 +; RV32IM-NEXT: srli a1, a0, 31 +; RV32IM-NEXT: srai a0, a0, 19 +; RV32IM-NEXT: add a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV32IMZB-LABEL: sdiv16_constant_sub_srai: +; RV32IMZB: # %bb.0: +; RV32IMZB-NEXT: sext.h a1, a0 +; RV32IMZB-NEXT: lui a2, 7 +; RV32IMZB-NEXT: addi a2, a2, 1911 +; RV32IMZB-NEXT: mul a1, a1, a2 +; RV32IMZB-NEXT: srli a1, a1, 16 +; RV32IMZB-NEXT: sub a0, a1, a0 +; RV32IMZB-NEXT: zext.h a1, a0 +; RV32IMZB-NEXT: srli a1, a1, 15 +; RV32IMZB-NEXT: sext.h a0, a0 +; RV32IMZB-NEXT: srai a0, a0, 3 +; RV32IMZB-NEXT: add a0, a0, a1 +; RV32IMZB-NEXT: ret +; +; RV64IM-LABEL: sdiv16_constant_sub_srai: +; RV64IM: # %bb.0: +; RV64IM-NEXT: slli a1, a0, 48 +; RV64IM-NEXT: srai a1, a1, 48 +; RV64IM-NEXT: lui a2, 7 +; RV64IM-NEXT: addiw a2, a2, 1911 +; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: srli a1, a1, 16 +; RV64IM-NEXT: subw a0, a1, a0 +; RV64IM-NEXT: slli a0, a0, 48 +; RV64IM-NEXT: srli a1, a0, 63 +; RV64IM-NEXT: srai a0, a0, 51 +; RV64IM-NEXT: add a0, a0, a1 +; RV64IM-NEXT: ret +; +; RV64IMZB-LABEL: sdiv16_constant_sub_srai: +; RV64IMZB: # %bb.0: +; RV64IMZB-NEXT: sext.h a1, a0 +; RV64IMZB-NEXT: lui a2, 7 +; RV64IMZB-NEXT: addiw a2, a2, 1911 +; RV64IMZB-NEXT: mul a1, a1, a2 +; RV64IMZB-NEXT: srli a1, a1, 16 +; RV64IMZB-NEXT: sub a0, a1, a0 +; RV64IMZB-NEXT: zext.h a1, a0 +; RV64IMZB-NEXT: srli a1, a1, 15 +; RV64IMZB-NEXT: sext.h a0, a0 +; RV64IMZB-NEXT: srai a0, a0, 3 +; RV64IMZB-NEXT: add a0, a0, a1 +; RV64IMZB-NEXT: ret + %1 = sdiv i16 %a, -15 + ret i16 %1 +} From 7f410251e8d483e57ad54340ad968e4b498461da Mon Sep 17 00:00:00 2001 From: Joe Loser Date: Wed, 22 Dec 2021 12:30:09 -0500 Subject: [PATCH 155/992] [libcxx][test] Remove redundant semiregular checks for CPOs Some individual test files verify the CPO under test satisfies `semiregular` concept. This is redundant since it is already part of the test in verifying whether the entity is indeed a CPO in `libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp`. Differential Revision: https://reviews.llvm.org/D116173 --- .../iterator.cust/iterator.cust.swap.pass.cpp | 2 -- libcxx/test/std/ranges/range.access/size.pass.cpp | 2 -- libcxx/test/std/ranges/range.access/ssize.pass.cpp | 2 -- .../std/ranges/range.adaptors/range.counted/counted.pass.cpp | 3 --- .../range.factories/range.iota.view/views_iota.pass.cpp | 5 ----- 5 files changed, 14 deletions(-) diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.swap.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.swap.pass.cpp index 31ecbb3bb34a..1d578a9ea126 100644 --- a/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.swap.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.swap.pass.cpp @@ -22,8 +22,6 @@ using IterSwapT = decltype(std::ranges::iter_swap); -static_assert(std::semiregular>); - struct HasIterSwap { int &value_; explicit HasIterSwap(int &value) : value_(value) { assert(value == 0); } diff --git a/libcxx/test/std/ranges/range.access/size.pass.cpp b/libcxx/test/std/ranges/range.access/size.pass.cpp index 4d91e3dafebd..0a45a2d7c498 100644 --- a/libcxx/test/std/ranges/range.access/size.pass.cpp +++ b/libcxx/test/std/ranges/range.access/size.pass.cpp @@ -36,8 +36,6 @@ static_assert(std::ranges::size(std::move(array_of_incomplete)) == 42); static_assert(std::ranges::size(std::as_const(array_of_incomplete)) == 42); static_assert(std::ranges::size(static_cast(array_of_incomplete)) == 42); -static_assert(std::semiregular>); - struct SizeMember { constexpr size_t size() { return 42; } }; diff --git a/libcxx/test/std/ranges/range.access/ssize.pass.cpp b/libcxx/test/std/ranges/range.access/ssize.pass.cpp index beedcfa9908b..39e7b80e2163 100644 --- a/libcxx/test/std/ranges/range.access/ssize.pass.cpp +++ b/libcxx/test/std/ranges/range.access/ssize.pass.cpp @@ -25,8 +25,6 @@ static_assert( std::is_invocable_v); static_assert( std::is_invocable_v); static_assert( std::is_invocable_v); -static_assert(std::semiregular>); - struct SizeMember { constexpr size_t size() { return 42; } }; diff --git a/libcxx/test/std/ranges/range.adaptors/range.counted/counted.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.counted/counted.pass.cpp index 29a891174b58..d19c991a508a 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.counted/counted.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.counted/counted.pass.cpp @@ -48,9 +48,6 @@ constexpr bool test() { { static_assert(std::addressof(std::views::counted) == std::addressof(std::ranges::views::counted)); - auto copy = std::views::counted; - static_assert(std::semiregular); - static_assert( CountedInvocable); static_assert(!CountedInvocable); static_assert( CountedInvocable); diff --git a/libcxx/test/std/ranges/range.factories/range.iota.view/views_iota.pass.cpp b/libcxx/test/std/ranges/range.factories/range.iota.view/views_iota.pass.cpp index 6fcb4abe21de..1a6f7e1fe203 100644 --- a/libcxx/test/std/ranges/range.factories/range.iota.view/views_iota.pass.cpp +++ b/libcxx/test/std/ranges/range.factories/range.iota.view/views_iota.pass.cpp @@ -43,11 +43,6 @@ constexpr void testType(U u) { ASSERT_SAME_TYPE(decltype(std::views::iota(T(10))), std::ranges::iota_view); ASSERT_SAME_TYPE(decltype(std::views::iota(T(10), u)), std::ranges::iota_view); } - // Test that this is semiregular. - // Note: we cannot test perfect forwarding because both T and U must be copyable. - { - static_assert(std::semiregular>); - } } struct X {}; From ca8997eb7f6858768c58f538de3a5c85c8fad7ea Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Mon, 27 Dec 2021 16:07:11 -0500 Subject: [PATCH 156/992] [MLIR] Add constant folder for fptosi and friends This patch adds constant folds for FPToSI/FPToUI/SIToFP/UIToFP Reviewed By: mehdi_amini, bondhugula Differential Revision: https://reviews.llvm.org/D116321 --- .../Dialect/Arithmetic/IR/ArithmeticOps.td | 4 ++ .../Dialect/Arithmetic/IR/ArithmeticOps.cpp | 61 ++++++++++++++++++ .../test/Dialect/Arithmetic/canonicalize.mlir | 62 +++++++++++++++++++ 3 files changed, 127 insertions(+) diff --git a/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticOps.td b/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticOps.td index 0aff766414b9..a08f811d92b0 100644 --- a/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticOps.td +++ b/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticOps.td @@ -892,6 +892,7 @@ def Arith_UIToFPOp : Arith_IToFCastOp<"uitofp"> { rounded using the default rounding mode. When operating on vectors, casts elementwise. }]; + let hasFolder = 1; } //===----------------------------------------------------------------------===// @@ -906,6 +907,7 @@ def Arith_SIToFPOp : Arith_IToFCastOp<"sitofp"> { rounded using the default rounding mode. When operating on vectors, casts elementwise. }]; + let hasFolder = 1; } //===----------------------------------------------------------------------===// @@ -919,6 +921,7 @@ def Arith_FPToUIOp : Arith_FToICastOp<"fptoui"> { towards zero) unsigned integer value. When operating on vectors, casts elementwise. }]; + let hasFolder = 1; } //===----------------------------------------------------------------------===// @@ -932,6 +935,7 @@ def Arith_FPToSIOp : Arith_FToICastOp<"fptosi"> { towards zero) signed integer value. When operating on vectors, casts elementwise. }]; + let hasFolder = 1; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp index a413fb263775..f0ce1b7a4d70 100644 --- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp +++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp @@ -14,6 +14,8 @@ #include "mlir/IR/PatternMatch.h" #include "mlir/IR/TypeUtilities.h" +#include "llvm/ADT/APSInt.h" + using namespace mlir; using namespace mlir::arith; @@ -881,6 +883,18 @@ bool arith::UIToFPOp::areCastCompatible(TypeRange inputs, TypeRange outputs) { return checkIntFloatCast(inputs, outputs); } +OpFoldResult arith::UIToFPOp::fold(ArrayRef operands) { + if (auto lhs = operands[0].dyn_cast_or_null()) { + const APInt &api = lhs.getValue(); + FloatType floatTy = getType().cast(); + APFloat apf(floatTy.getFloatSemantics(), + APInt::getZero(floatTy.getWidth())); + apf.convertFromAPInt(api, /*signed=*/false, APFloat::rmNearestTiesToEven); + return FloatAttr::get(floatTy, apf); + } + return {}; +} + //===----------------------------------------------------------------------===// // SIToFPOp //===----------------------------------------------------------------------===// @@ -889,6 +903,17 @@ bool arith::SIToFPOp::areCastCompatible(TypeRange inputs, TypeRange outputs) { return checkIntFloatCast(inputs, outputs); } +OpFoldResult arith::SIToFPOp::fold(ArrayRef operands) { + if (auto lhs = operands[0].dyn_cast_or_null()) { + const APInt &api = lhs.getValue(); + FloatType floatTy = getType().cast(); + APFloat apf(floatTy.getFloatSemantics(), + APInt::getZero(floatTy.getWidth())); + apf.convertFromAPInt(api, /*signed=*/true, APFloat::rmNearestTiesToEven); + return FloatAttr::get(floatTy, apf); + } + return {}; +} //===----------------------------------------------------------------------===// // FPToUIOp //===----------------------------------------------------------------------===// @@ -897,6 +922,24 @@ bool arith::FPToUIOp::areCastCompatible(TypeRange inputs, TypeRange outputs) { return checkIntFloatCast(inputs, outputs); } +OpFoldResult arith::FPToUIOp::fold(ArrayRef operands) { + if (auto lhs = operands[0].dyn_cast_or_null()) { + const APFloat &apf = lhs.getValue(); + IntegerType intTy = getType().cast(); + bool ignored; + APSInt api(intTy.getWidth(), /*unsigned=*/true); + if (APFloat::opInvalidOp == + apf.convertToInteger(api, APFloat::rmTowardZero, &ignored)) { + // Undefined behavior invoked - the destination type can't represent + // the input constant. + return {}; + } + return IntegerAttr::get(getType(), api); + } + + return {}; +} + //===----------------------------------------------------------------------===// // FPToSIOp //===----------------------------------------------------------------------===// @@ -905,6 +948,24 @@ bool arith::FPToSIOp::areCastCompatible(TypeRange inputs, TypeRange outputs) { return checkIntFloatCast(inputs, outputs); } +OpFoldResult arith::FPToSIOp::fold(ArrayRef operands) { + if (auto lhs = operands[0].dyn_cast_or_null()) { + const APFloat &apf = lhs.getValue(); + IntegerType intTy = getType().cast(); + bool ignored; + APSInt api(intTy.getWidth(), /*unsigned=*/false); + if (APFloat::opInvalidOp == + apf.convertToInteger(api, APFloat::rmTowardZero, &ignored)) { + // Undefined behavior invoked - the destination type can't represent + // the input constant. + return {}; + } + return IntegerAttr::get(getType(), api); + } + + return {}; +} + //===----------------------------------------------------------------------===// // IndexCastOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Arithmetic/canonicalize.mlir b/mlir/test/Dialect/Arithmetic/canonicalize.mlir index 96a630a248cf..834842c0f351 100644 --- a/mlir/test/Dialect/Arithmetic/canonicalize.mlir +++ b/mlir/test/Dialect/Arithmetic/canonicalize.mlir @@ -484,3 +484,65 @@ func @test_minui(%arg0 : i8) -> (i8, i8, i8, i8) { %3 = arith.minui %arg0, %c0 : i8 return %0, %1, %2, %3: i8, i8, i8, i8 } + +// ----- + +// CHECK-LABEL: @constant_FPtoUI( +func @constant_FPtoUI() -> i32 { + // CHECK: %[[C0:.+]] = arith.constant 2 : i32 + // CHECK: return %[[C0]] + %c0 = arith.constant 2.0 : f32 + %res = arith.fptoui %c0 : f32 to i32 + return %res : i32 +} + +// ----- +// CHECK-LABEL: @invalid_constant_FPtoUI( +func @invalid_constant_FPtoUI() -> i32 { + // CHECK: %[[C0:.+]] = arith.constant -2.000000e+00 : f32 + // CHECK: %[[C1:.+]] = arith.fptoui %[[C0]] : f32 to i32 + // CHECK: return %[[C1]] + %c0 = arith.constant -2.0 : f32 + %res = arith.fptoui %c0 : f32 to i32 + return %res : i32 +} + +// ----- +// CHECK-LABEL: @constant_FPtoSI( +func @constant_FPtoSI() -> i32 { + // CHECK: %[[C0:.+]] = arith.constant -2 : i32 + // CHECK: return %[[C0]] + %c0 = arith.constant -2.0 : f32 + %res = arith.fptosi %c0 : f32 to i32 + return %res : i32 +} + +// ----- +// CHECK-LABEL: @invalid_constant_FPtoSI( +func @invalid_constant_FPtoSI() -> i8 { + // CHECK: %[[C0:.+]] = arith.constant 2.000000e+10 : f32 + // CHECK: %[[C1:.+]] = arith.fptosi %[[C0]] : f32 to i8 + // CHECK: return %[[C1]] + %c0 = arith.constant 2.0e10 : f32 + %res = arith.fptosi %c0 : f32 to i8 + return %res : i8 +} + +// CHECK-LABEL: @constant_SItoFP( +func @constant_SItoFP() -> f32 { + // CHECK: %[[C0:.+]] = arith.constant -2.000000e+00 : f32 + // CHECK: return %[[C0]] + %c0 = arith.constant -2 : i32 + %res = arith.sitofp %c0 : i32 to f32 + return %res : f32 +} + +// ----- +// CHECK-LABEL: @constant_UItoFP( +func @constant_UItoFP() -> f32 { + // CHECK: %[[C0:.+]] = arith.constant 2.000000e+00 : f32 + // CHECK: return %[[C0]] + %c0 = arith.constant 2 : i32 + %res = arith.sitofp %c0 : i32 to f32 + return %res : f32 +} From 99fc000c87c0ce3a5f698cfa5a67b177ad0cf5f8 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Tue, 28 Dec 2021 18:40:54 -0500 Subject: [PATCH 157/992] [MLIR] Expose atomicrmw and/or LLVM (dialect and IR) have atomics for and/or. This patch enables atomic_rmw ops in the standard dialect for and/or that lower to these (in addition to the existing atomics such as addi, etc). Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D116345 --- .../mlir/Dialect/StandardOps/IR/StandardOpsBase.td | 5 ++++- mlir/lib/Analysis/AffineAnalysis.cpp | 2 ++ mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp | 4 ++++ mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 11 +++++++++++ .../Conversion/StandardToLLVM/standard-to-llvm.mlir | 4 ++++ 5 files changed, 25 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/StandardOpsBase.td b/mlir/include/mlir/Dialect/StandardOps/IR/StandardOpsBase.td index 802a32fce370..3016a197df0d 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/StandardOpsBase.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/StandardOpsBase.td @@ -26,13 +26,16 @@ def ATOMIC_RMW_KIND_MINS : I64EnumAttrCase<"mins", 7>; def ATOMIC_RMW_KIND_MINU : I64EnumAttrCase<"minu", 8>; def ATOMIC_RMW_KIND_MULF : I64EnumAttrCase<"mulf", 9>; def ATOMIC_RMW_KIND_MULI : I64EnumAttrCase<"muli", 10>; +def ATOMIC_RMW_KIND_ORI : I64EnumAttrCase<"ori", 11>; +def ATOMIC_RMW_KIND_ANDI : I64EnumAttrCase<"andi", 12>; def AtomicRMWKindAttr : I64EnumAttr< "AtomicRMWKind", "", [ATOMIC_RMW_KIND_ADDF, ATOMIC_RMW_KIND_ADDI, ATOMIC_RMW_KIND_ASSIGN, ATOMIC_RMW_KIND_MAXF, ATOMIC_RMW_KIND_MAXS, ATOMIC_RMW_KIND_MAXU, ATOMIC_RMW_KIND_MINF, ATOMIC_RMW_KIND_MINS, ATOMIC_RMW_KIND_MINU, - ATOMIC_RMW_KIND_MULF, ATOMIC_RMW_KIND_MULI]> { + ATOMIC_RMW_KIND_MULF, ATOMIC_RMW_KIND_MULI, ATOMIC_RMW_KIND_ORI, + ATOMIC_RMW_KIND_ANDI]> { let cppNamespace = "::mlir"; } diff --git a/mlir/lib/Analysis/AffineAnalysis.cpp b/mlir/lib/Analysis/AffineAnalysis.cpp index 873d9b9aa3b4..79a367e33713 100644 --- a/mlir/lib/Analysis/AffineAnalysis.cpp +++ b/mlir/lib/Analysis/AffineAnalysis.cpp @@ -57,6 +57,8 @@ static Value getSupportedReduction(AffineForOp forOp, unsigned pos, .Case([](arith::AddFOp) { return AtomicRMWKind::addf; }) .Case([](arith::MulFOp) { return AtomicRMWKind::mulf; }) .Case([](arith::AddIOp) { return AtomicRMWKind::addi; }) + .Case([](arith::AndIOp) { return AtomicRMWKind::andi; }) + .Case([](arith::OrIOp) { return AtomicRMWKind::ori; }) .Case([](arith::MulIOp) { return AtomicRMWKind::muli; }) .Case([](arith::MinFOp) { return AtomicRMWKind::minf; }) .Case([](arith::MaxFOp) { return AtomicRMWKind::maxf; }) diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index f588521ac6ef..da429dd8af11 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -792,6 +792,10 @@ static Optional matchSimpleAtomicOp(AtomicRMWOp atomicOp) { return LLVM::AtomicBinOp::min; case AtomicRMWKind::minu: return LLVM::AtomicBinOp::umin; + case AtomicRMWKind::ori: + return LLVM::AtomicBinOp::_or; + case AtomicRMWKind::andi: + return LLVM::AtomicBinOp::_and; default: return llvm::None; } diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 4c0f69e2ce0a..02d54472baf5 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -155,6 +155,8 @@ static LogicalResult verify(AtomicRMWOp op) { case AtomicRMWKind::mins: case AtomicRMWKind::minu: case AtomicRMWKind::muli: + case AtomicRMWKind::ori: + case AtomicRMWKind::andi: if (!op.getValue().getType().isa()) return op.emitOpError() << "with kind '" << stringifyAtomicRMWKind(op.getKind()) @@ -178,7 +180,12 @@ Attribute mlir::getIdentityValueAttr(AtomicRMWKind kind, Type resultType, case AtomicRMWKind::addf: case AtomicRMWKind::addi: case AtomicRMWKind::maxu: + case AtomicRMWKind::ori: return builder.getZeroAttr(resultType); + case AtomicRMWKind::andi: + return builder.getIntegerAttr( + resultType, + APInt::getAllOnes(resultType.cast().getWidth())); case AtomicRMWKind::maxs: return builder.getIntegerAttr( resultType, @@ -240,6 +247,10 @@ Value mlir::getReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc, return builder.create(loc, lhs, rhs); case AtomicRMWKind::minu: return builder.create(loc, lhs, rhs); + case AtomicRMWKind::ori: + return builder.create(loc, lhs, rhs); + case AtomicRMWKind::andi: + return builder.create(loc, lhs, rhs); // TODO: Add remaining reduction operations. default: (void)emitOptionalError(loc, "Reduction operation type not supported"); diff --git a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir index a5c88455124a..c3282e1903d6 100644 --- a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir +++ b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir @@ -502,6 +502,10 @@ func @atomic_rmw(%I : memref<10xi32>, %ival : i32, %F : memref<10xf32>, %fval : // CHECK: llvm.atomicrmw umin %{{.*}}, %{{.*}} acq_rel atomic_rmw addf %fval, %F[%i] : (f32, memref<10xf32>) -> f32 // CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} acq_rel + atomic_rmw ori %ival, %I[%i] : (i32, memref<10xi32>) -> i32 + // CHECK: llvm.atomicrmw _or %{{.*}}, %{{.*}} acq_rel + atomic_rmw andi %ival, %I[%i] : (i32, memref<10xi32>) -> i32 + // CHECK: llvm.atomicrmw _and %{{.*}}, %{{.*}} acq_rel return } From 73104ad65bc07119eb6ed6c1bef7f42e22d459e7 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 3 Nov 2021 11:58:05 -0500 Subject: [PATCH 158/992] [OpenMP][NFC] Move headers into include folder --- openmp/libomptarget/{src => include}/device.h | 0 openmp/libomptarget/{src => include}/rtl.h | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename openmp/libomptarget/{src => include}/device.h (100%) rename openmp/libomptarget/{src => include}/rtl.h (100%) diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/include/device.h similarity index 100% rename from openmp/libomptarget/src/device.h rename to openmp/libomptarget/include/device.h diff --git a/openmp/libomptarget/src/rtl.h b/openmp/libomptarget/include/rtl.h similarity index 100% rename from openmp/libomptarget/src/rtl.h rename to openmp/libomptarget/include/rtl.h From 7de5da2a67e5ef972535537fbcf10e2f2591e570 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 3 Nov 2021 12:57:43 -0500 Subject: [PATCH 159/992] [OpenMP][NFC] Move address space enum into OMPConstants header --- llvm/include/llvm/Frontend/OpenMP/OMPConstants.h | 8 ++++++++ llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 8 -------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index d2f9bac16e5a..c82f8e69b743 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -140,6 +140,14 @@ enum OMPTgtExecModeFlags : int8_t { LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ OMP_TGT_EXEC_MODE_GENERIC_SPMD) }; +enum class AddressSpace : unsigned { + Generic = 0, + Global = 1, + Shared = 3, + Constant = 4, + Local = 5, +}; + } // end namespace omp } // end namespace llvm diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index c24ca90ce1ba..e0cb736952e8 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -153,14 +153,6 @@ static constexpr auto TAG = "[" DEBUG_TYPE "]"; namespace { -enum class AddressSpace : unsigned { - Generic = 0, - Global = 1, - Shared = 3, - Constant = 4, - Local = 5, -}; - struct AAHeapToShared; struct AAICVTracker; From ba70f3a5d9149c7409e0b3436c9b5dfd71d131ea Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sun, 26 Dec 2021 14:33:37 -0600 Subject: [PATCH 160/992] [OpenMP][FIX] Make heap2shared deterministic Issue #52875 reported non-determinism, this is the first step to avoid it. We iterate over MallocCalls so we should keep the order stable. --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index e0cb736952e8..1de9ccf4bc6c 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2808,7 +2808,7 @@ struct AAHeapToSharedFunction : public AAHeapToShared { if (CallBase *CB = dyn_cast(U)) if (!isa(CB->getArgOperand(0)) || !ED.isExecutedByInitialThreadOnly(*CB)) - MallocCalls.erase(CB); + MallocCalls.remove(CB); } findPotentialRemovedFreeCalls(A); @@ -2820,7 +2820,7 @@ struct AAHeapToSharedFunction : public AAHeapToShared { } /// Collection of all malloc calls in a function. - SmallPtrSet MallocCalls; + SmallSetVector MallocCalls; /// Collection of potentially removed free calls in a function. SmallPtrSet PotentialRemovedFreeCalls; }; From 9f04a0ea433be40c4b4f61c2aa8522fe36a09fb9 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 28 Dec 2021 16:03:16 -0600 Subject: [PATCH 161/992] [OpenMP][FIX] Make AAExecutionDomain deterministic --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 1de9ccf4bc6c..e5a81430e7c6 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/EnumeratedArray.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/CallGraph.h" @@ -2540,7 +2541,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { } /// Set of basic blocks that are executed by a single thread. - DenseSet SingleThreadedBBs; + SmallSetVector SingleThreadedBBs; /// Total number of basic blocks in this function. long unsigned NumBBs; @@ -2564,7 +2565,7 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { if (!A.checkForAllCallSites(PredForCallSite, *this, /* RequiresAllCallSites */ true, AllCallSitesKnown)) - SingleThreadedBBs.erase(&F->getEntryBlock()); + SingleThreadedBBs.remove(&F->getEntryBlock()); auto &OMPInfoCache = static_cast(A.getInfoCache()); auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; @@ -2629,7 +2630,7 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { for (auto *BB : RPOT) { if (!MergePredecessorStates(BB)) - SingleThreadedBBs.erase(BB); + SingleThreadedBBs.remove(BB); } return (NumSingleThreadedBBs == SingleThreadedBBs.size()) From 6e2fcf8513a3a1deb4d04851ded6719ab6e1ea4c Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sat, 11 Sep 2021 18:15:53 -0500 Subject: [PATCH 162/992] [Attributor][FIX] Ensure store uses are correlated with reloads While we skipped uses in stores if we can find all copies of the value when the memory is loaded, we did not correlate the use in the store with the use in the load. So far this lead to less precise results in the offset calculations which prevented deductions. With the new EquivalentUseCB callback argument the user of checkForAllUses can be informed of the correlation and act on it appropriately. Differential Revision: https://reviews.llvm.org/D109662 --- llvm/include/llvm/Transforms/IPO/Attributor.h | 9 ++++++++- llvm/lib/Transforms/IPO/Attributor.cpp | 20 +++++++++++++------ .../Transforms/IPO/AttributorAttributes.cpp | 9 ++++++++- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index d4cbc9bd20b7..bd7ab6c5d003 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1623,10 +1623,17 @@ struct Attributor { /// /// This method will evaluate \p Pred on all (transitive) uses of the /// associated value and return true if \p Pred holds every time. + /// If uses are skipped in favor of equivalent ones, e.g., if we look through + /// memory, the \p EquivalentUseCB will be used to give the caller an idea + /// what original used was replaced by a new one (or new ones). The visit is + /// cut short if \p EquivalentUseCB returns false and the function will return + /// false as well. bool checkForAllUses(function_ref Pred, const AbstractAttribute &QueryingAA, const Value &V, bool CheckBBLivenessOnly = false, - DepClassTy LivenessDepClass = DepClassTy::OPTIONAL); + DepClassTy LivenessDepClass = DepClassTy::OPTIONAL, + function_ref + EquivalentUseCB = nullptr); /// Emit a remark generically. /// diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 7e729e57153c..35bb91239964 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -999,10 +999,11 @@ bool Attributor::isAssumedDead(const BasicBlock &BB, return false; } -bool Attributor::checkForAllUses(function_ref Pred, - const AbstractAttribute &QueryingAA, - const Value &V, bool CheckBBLivenessOnly, - DepClassTy LivenessDepClass) { +bool Attributor::checkForAllUses( + function_ref Pred, + const AbstractAttribute &QueryingAA, const Value &V, + bool CheckBBLivenessOnly, DepClassTy LivenessDepClass, + function_ref EquivalentUseCB) { // Check the trivial case first as it catches void values. if (V.use_empty()) @@ -1053,8 +1054,15 @@ bool Attributor::checkForAllUses(function_ref Pred, << PotentialCopies.size() << " potential copies instead!\n"); for (Value *PotentialCopy : PotentialCopies) - for (const Use &U : PotentialCopy->uses()) - Worklist.push_back(&U); + for (const Use &CopyUse : PotentialCopy->uses()) { + if (EquivalentUseCB && !EquivalentUseCB(*U, CopyUse)) { + LLVM_DEBUG(dbgs() << "[Attributor] Potential copy was " + "rejected by the equivalence call back: " + << *CopyUse << "!\n"); + return false; + } + Worklist.push_back(&CopyUse); + } continue; } } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 468ae1c45c88..d69c5006673e 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1293,8 +1293,15 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { LLVM_DEBUG(dbgs() << "[AAPointerInfo] User not handled " << *Usr << "\n"); return false; }; + auto EquivalentUseCB = [&](const Use &OldU, const Use &NewU) { + if (OffsetInfoMap.count(NewU)) + return OffsetInfoMap[NewU] == OffsetInfoMap[OldU]; + OffsetInfoMap[NewU] = OffsetInfoMap[OldU]; + return true; + }; if (!A.checkForAllUses(UsePred, *this, AssociatedValue, - /* CheckBBLivenessOnly */ true)) + /* CheckBBLivenessOnly */ true, DepClassTy::OPTIONAL, + EquivalentUseCB)) return indicatePessimisticFixpoint(); LLVM_DEBUG({ From 7bfcdbcbf368cea14a5236080af975d5878a46eb Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 3 Nov 2021 12:28:11 -0500 Subject: [PATCH 163/992] [OpenMP][NFCI] Embed the source location string size in the ident_t One of the unused ident_t fields now holds the size of the string (=const char *) field so we have an easier time dealing with those in the future. Differential Revision: https://reviews.llvm.org/D113126 --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 22 +- clang/test/OpenMP/barrier_codegen.cpp | 4 +- clang/test/OpenMP/for_codegen.cpp | 4 +- clang/test/OpenMP/nvptx_SPMD_codegen.cpp | 14 +- .../OpenMP/parallel_num_threads_codegen.cpp | 2 +- .../OpenMP/parallel_proc_bind_codegen.cpp | 2 +- .../parallel_proc_bind_primary_codegen.cpp | 2 +- clang/test/OpenMP/sections_codegen.cpp | 4 +- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 21 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 165 ++++---- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 13 +- llvm/test/Transforms/OpenMP/deduplication.ll | 1 - .../get_hardware_num_threads_in_block_fold.ll | 2 +- .../OpenMP/parallel_region_merging.ll | 356 +++++++++--------- .../Transforms/OpenMP/remove_globalization.ll | 8 +- llvm/test/Transforms/OpenMP/spmdization.ll | 6 +- .../Transforms/OpenMP/spmdization_assumes.ll | 3 +- .../Transforms/OpenMP/spmdization_guarding.ll | 11 +- 18 files changed, 345 insertions(+), 295 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index c314044c66dd..40e2094ea4ce 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1429,10 +1429,11 @@ static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags) { + uint32_t SrcLocStrSize; llvm::Constant *SrcLocStr; if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || Loc.isInvalid()) { - SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); + SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); } else { std::string FunctionName; if (const auto *FD = dyn_cast_or_null(CGF.CurFuncDecl)) @@ -1441,12 +1442,12 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, const char *FileName = PLoc.getFilename(); unsigned Line = PLoc.getLine(); unsigned Column = PLoc.getColumn(); - SrcLocStr = - OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column); + SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, + Column, SrcLocStrSize); } unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); - return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), - Reserved2Flags); + return OMPBuilder.getOrCreateIdent( + SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); } llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, @@ -1457,10 +1458,11 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, if (CGM.getLangOpts().OpenMPIRBuilder) { SmallString<128> Buffer; OMPBuilder.updateToLocation(CGF.Builder.saveIP()); + uint32_t SrcLocStrSize; auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( - getIdentStringFromSourceLocation(CGF, Loc, Buffer)); + getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); return OMPBuilder.getOrCreateThreadID( - OMPBuilder.getOrCreateIdent(SrcLocStr)); + OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); } llvm::Value *ThreadID = nullptr; @@ -9527,8 +9529,9 @@ llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs) { + uint32_t SrcLocStrSize; if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) - return OMPBuilder.getOrCreateDefaultSrcLocStr(); + return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); SourceLocation Loc; if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { @@ -9552,7 +9555,8 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, - PLoc.getLine(), PLoc.getColumn()); + PLoc.getLine(), PLoc.getColumn(), + SrcLocStrSize); } /// Emit the arrays used to pass the captures and map information to the diff --git a/clang/test/OpenMP/barrier_codegen.cpp b/clang/test/OpenMP/barrier_codegen.cpp index 35b2ed721276..89704b36e13b 100644 --- a/clang/test/OpenMP/barrier_codegen.cpp +++ b/clang/test/OpenMP/barrier_codegen.cpp @@ -19,8 +19,8 @@ #define HEADER // CHECK: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } -// CHECK-DAG: [[EXPLICIT_BARRIER_LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) } -// CHECK-DAG: [[LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) } +// CHECK-DAG: [[EXPLICIT_BARRIER_LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 34, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) } +// CHECK-DAG: [[LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 2, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) } void foo() {} diff --git a/clang/test/OpenMP/for_codegen.cpp b/clang/test/OpenMP/for_codegen.cpp index 64652e530f6a..813b1313f1ee 100644 --- a/clang/test/OpenMP/for_codegen.cpp +++ b/clang/test/OpenMP/for_codegen.cpp @@ -22,8 +22,8 @@ // PROF-INSTR-PATH: constant [25 x i8] c"for_codegen-test.profraw\00" // CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } -// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8* -// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 514, i32 0, i32 0, i8* +// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 {{[0-9]+}}, i8* +// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 514, i32 0, i32 {{[0-9]+}}, i8* // CHECK-DAG: [[I:@.+]] ={{.*}} global i8 1, // CHECK-DAG: [[J:@.+]] ={{.*}} global i8 2, // CHECK-DAG: [[K:@.+]] ={{.*}} global i8 3, diff --git a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp index eebc41b44f84..c2e98eec0cd9 100644 --- a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp +++ b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp @@ -11,13 +11,13 @@ int a; // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 -// CHECK-DAG: [[DISTR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 3, i32 0, i8* getelementptr inbounds -// CHECK-DAG: [[FOR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 514, i32 3, i32 0, i8* getelementptr inbounds -// CHECK-DAG: [[LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 3, i32 0, i8* getelementptr inbounds -// CHECK-DAG: [[DISTR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 1, i32 0, i8* getelementptr inbounds -// CHECK-DAG: [[FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 0, i8* getelementptr inbounds -// CHECK-DAG: [[BAR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 3, i32 0, i8* getelementptr inbounds -// CHECK-DAG: [[BAR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 1, i32 0, i8* getelementptr inbounds +// CHECK-DAG: [[DISTR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[FOR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 514, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[DISTR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[BAR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[BAR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 void foo() { diff --git a/clang/test/OpenMP/parallel_num_threads_codegen.cpp b/clang/test/OpenMP/parallel_num_threads_codegen.cpp index 0fdb6cfbcfef..e22f73057035 100644 --- a/clang/test/OpenMP/parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/parallel_num_threads_codegen.cpp @@ -15,7 +15,7 @@ typedef __INTPTR_TYPE__ intptr_t; // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" -// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } void foo(); diff --git a/clang/test/OpenMP/parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/parallel_proc_bind_codegen.cpp index 8b9e09191b24..e6103335b778 100644 --- a/clang/test/OpenMP/parallel_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/parallel_proc_bind_codegen.cpp @@ -14,7 +14,7 @@ typedef __INTPTR_TYPE__ intptr_t; // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" -// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } void foo(); diff --git a/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp b/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp index 34de488c1dfc..98b7f0374345 100644 --- a/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp +++ b/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp @@ -15,7 +15,7 @@ typedef __INTPTR_TYPE__ intptr_t; // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" -// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } void foo(); diff --git a/clang/test/OpenMP/sections_codegen.cpp b/clang/test/OpenMP/sections_codegen.cpp index 07b95f53fa62..f9cdc5cc7ed1 100644 --- a/clang/test/OpenMP/sections_codegen.cpp +++ b/clang/test/OpenMP/sections_codegen.cpp @@ -9,8 +9,8 @@ // expected-no-diagnostics #ifndef HEADER #define HEADER -// CHECK-DAG: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8* -// CHECK-DAG: [[SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 1026, i32 0, i32 0, i8* +// CHECK-DAG: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 {{[0-9]+}}, i8* +// CHECK-DAG: [[SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 1026, i32 0, i32 {{[0-9]+}}, i8* // CHECK-LABEL: foo void foo() { extern void mayThrow(); mayThrow(); }; // CHECK-LABEL: bar diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 9976d1961ed1..fa9244649206 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -663,28 +663,31 @@ class OpenMPIRBuilder { Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID); /// Return the (LLVM-IR) string describing the source location \p LocStr. - Constant *getOrCreateSrcLocStr(StringRef LocStr); + Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize); /// Return the (LLVM-IR) string describing the default source location. - Constant *getOrCreateDefaultSrcLocStr(); + Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize); /// Return the (LLVM-IR) string describing the source location identified by /// the arguments. Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName, - unsigned Line, unsigned Column); + unsigned Line, unsigned Column, + uint32_t &SrcLocStrSize); /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as /// fallback if \p DL does not specify the function name. - Constant *getOrCreateSrcLocStr(DebugLoc DL, Function *F = nullptr); + Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize, + Function *F = nullptr); /// Return the (LLVM-IR) string describing the source location \p Loc. - Constant *getOrCreateSrcLocStr(const LocationDescription &Loc); + Constant *getOrCreateSrcLocStr(const LocationDescription &Loc, + uint32_t &SrcLocStrSize); /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags. /// TODO: Create a enum class for the Reserve2Flags - Value *getOrCreateIdent(Constant *SrcLocStr, - omp::IdentFlag Flags = omp::IdentFlag(0), - unsigned Reserve2Flags = 0); + Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, + omp::IdentFlag Flags = omp::IdentFlag(0), + unsigned Reserve2Flags = 0); /// Create a global flag \p Namein the module with initial value \p Value. GlobalValue *createGlobalFlag(unsigned Value, StringRef Name); @@ -754,7 +757,7 @@ class OpenMPIRBuilder { StringMap SrcLocStrMap; /// Map to remember existing ident_t*. - DenseMap, Value *> IdentMap; + DenseMap, Constant *> IdentMap; /// Helper that contains information about regions we need to outline /// during finalization. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 5157d51fd18c..de2507631f00 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -21,7 +21,9 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PassManager.h" @@ -37,6 +39,7 @@ #include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" +#include #include #define DEBUG_TYPE "openmp-ir-builder" @@ -255,19 +258,21 @@ GlobalValue *OpenMPIRBuilder::createGlobalFlag(unsigned Value, StringRef Name) { return GV; } -Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, - IdentFlag LocFlags, - unsigned Reserve2Flags) { +Constant *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, + uint32_t SrcLocStrSize, + IdentFlag LocFlags, + unsigned Reserve2Flags) { // Enable "C-mode". LocFlags |= OMP_IDENT_FLAG_KMPC; - Value *&Ident = + Constant *&Ident = IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}]; if (!Ident) { Constant *I32Null = ConstantInt::getNullValue(Int32); - Constant *IdentData[] = { - I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)), - ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr}; + Constant *IdentData[] = {I32Null, + ConstantInt::get(Int32, uint32_t(LocFlags)), + ConstantInt::get(Int32, Reserve2Flags), + ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr}; Constant *Initializer = ConstantStruct::get(OpenMPIRBuilder::Ident, IdentData); @@ -290,10 +295,12 @@ Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, } } - return Builder.CreatePointerCast(Ident, IdentPtr); + return ConstantExpr::getPointerBitCastOrAddrSpaceCast(Ident, IdentPtr); } -Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) { +Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr, + uint32_t &SrcLocStrSize) { + SrcLocStrSize = LocStr.size(); Constant *&SrcLocStr = SrcLocStrMap[LocStr]; if (!SrcLocStr) { Constant *Initializer = @@ -314,8 +321,8 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) { Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName, - unsigned Line, - unsigned Column) { + unsigned Line, unsigned Column, + uint32_t &SrcLocStrSize) { SmallString<128> Buffer; Buffer.push_back(';'); Buffer.append(FileName); @@ -327,17 +334,21 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName, Buffer.append(std::to_string(Column)); Buffer.push_back(';'); Buffer.push_back(';'); - return getOrCreateSrcLocStr(Buffer.str()); + return getOrCreateSrcLocStr(Buffer.str(), SrcLocStrSize); } -Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() { - return getOrCreateSrcLocStr(";unknown;unknown;0;0;;"); +Constant * +OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize) { + StringRef UnknownLoc = ";unknown;unknown;0;0;;"; + return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize); } -Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL, Function *F) { +Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL, + uint32_t &SrcLocStrSize, + Function *F) { DILocation *DIL = DL.get(); if (!DIL) - return getOrCreateDefaultSrcLocStr(); + return getOrCreateDefaultSrcLocStr(SrcLocStrSize); StringRef FileName = M.getName(); if (DIFile *DIF = DIL->getFile()) if (Optional Source = DIF->getSource()) @@ -346,12 +357,13 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL, Function *F) { if (Function.empty() && F) Function = F->getName(); return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(), - DIL->getColumn()); + DIL->getColumn(), SrcLocStrSize); } -Constant * -OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) { - return getOrCreateSrcLocStr(Loc.DL, Loc.IP.getBlock()->getParent()); +Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc, + uint32_t &SrcLocStrSize) { + return getOrCreateSrcLocStr(Loc.DL, SrcLocStrSize, + Loc.IP.getBlock()->getParent()); } Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { @@ -393,9 +405,11 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind, break; } - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags), - getOrCreateThreadID(getOrCreateIdent(SrcLocStr))}; + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Args[] = { + getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags), + getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))}; // If we are in a cancellable parallel region, barriers are cancellation // points. @@ -441,8 +455,9 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, llvm_unreachable("Unknown cancel kind!"); } - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; Value *Result = Builder.CreateCall( getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); @@ -513,8 +528,9 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( if (!updateToLocation(Loc)) return Loc.IP; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadID = getOrCreateThreadID(Ident); if (NumThreads) { @@ -871,8 +887,9 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) { // Build call void __kmpc_flush(ident_t *loc) - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Args[] = {getOrCreateIdent(SrcLocStr)}; + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)}; Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args); } @@ -886,8 +903,9 @@ void OpenMPIRBuilder::createFlush(const LocationDescription &Loc) { void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) { // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 // global_tid); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *Args[] = {Ident, getOrCreateThreadID(Ident)}; // Ignore return result until untied tasks are supported. @@ -903,8 +921,9 @@ void OpenMPIRBuilder::createTaskwait(const LocationDescription &Loc) { void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) { // Build call __kmpc_omp_taskyield(loc, thread_id, 0); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Constant *I32Null = ConstantInt::getNullValue(Int32); Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null}; @@ -1114,14 +1133,16 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions( Module *Module = Func->getParent(); Value *RedArrayPtr = Builder.CreateBitCast(RedArray, Builder.getInt8PtrTy(), "red.array.ptr"); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); bool CanGenerateAtomic = llvm::all_of(ReductionInfos, [](const ReductionInfo &RI) { return RI.AtomicReductionGen; }); - Value *Ident = getOrCreateIdent( - SrcLocStr, CanGenerateAtomic ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE - : IdentFlag(0)); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize, + CanGenerateAtomic + ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE + : IdentFlag(0)); Value *ThreadId = getOrCreateThreadID(Ident); Constant *NumVariables = Builder.getInt32(NumReductions); const DataLayout &DL = Module->getDataLayout(); @@ -1235,8 +1256,9 @@ OpenMPIRBuilder::createMaster(const LocationDescription &Loc, return Loc.IP; Directive OMPD = Directive::OMPD_master; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId}; @@ -1258,8 +1280,9 @@ OpenMPIRBuilder::createMasked(const LocationDescription &Loc, return Loc.IP; Directive OMPD = Directive::OMPD_masked; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId, Filter}; Value *ArgsEnd[] = {Ident, ThreadId}; @@ -1480,8 +1503,9 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, Builder.restoreIP(CLI->getPreheaderIP()); Builder.SetCurrentDebugLocation(DL); - Constant *SrcLocStr = getOrCreateSrcLocStr(DL); - Value *SrcLoc = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize); + Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize); // Declare useful OpenMP runtime functions. Value *IV = CLI->getIndVar(); @@ -1608,8 +1632,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop( // Set up the source location value for OpenMP runtime. Builder.SetCurrentDebugLocation(DL); - Constant *SrcLocStr = getOrCreateSrcLocStr(DL); - Value *SrcLoc = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize); + Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize); // Declare useful OpenMP runtime functions. Value *IV = CLI->getIndVar(); @@ -2379,8 +2404,9 @@ OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc, if (!updateToLocation(Loc)) return Loc.IP; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt); @@ -2407,8 +2433,9 @@ OpenMPIRBuilder::createSingle(const LocationDescription &Loc, } Directive OMPD = Directive::OMPD_single; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId}; @@ -2436,8 +2463,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical( return Loc.IP; Directive OMPD = Directive::OMPD_critical; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *LockVar = getOMPCriticalRegionLock(CriticalName); Value *Args[] = {Ident, ThreadId, LockVar}; @@ -2486,8 +2514,9 @@ OpenMPIRBuilder::createOrderedDepend(const LocationDescription &Loc, Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP( ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)}); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP}; @@ -2512,8 +2541,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd( Instruction *ExitCall = nullptr; if (IsThreads) { - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId}; @@ -2718,8 +2748,9 @@ CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc, IRBuilder<>::InsertPointGuard IPG(Builder); Builder.restoreIP(Loc.IP); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {ThreadId, Size, Allocator}; @@ -2734,8 +2765,9 @@ CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc, IRBuilder<>::InsertPointGuard IPG(Builder); Builder.restoreIP(Loc.IP); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {ThreadId, Addr, Allocator}; Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free); @@ -2748,8 +2780,9 @@ CallInst *OpenMPIRBuilder::createCachedThreadPrivate( IRBuilder<>::InsertPointGuard IPG(Builder); Builder.restoreIP(Loc.IP); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Constant *ThreadPrivateCache = getOrCreateOMPInternalVariable(Int8PtrPtr, Name); @@ -2767,8 +2800,9 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, if (!updateToLocation(Loc)) return Loc.IP; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); ConstantInt *IsSPMDVal = ConstantInt::getSigned( IntegerType::getInt8Ty(Int8->getContext()), IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); @@ -2820,8 +2854,9 @@ void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc, if (!updateToLocation(Loc)) return; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); ConstantInt *IsSPMDVal = ConstantInt::getSigned( IntegerType::getInt8Ty(Int8->getContext()), IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index e5a81430e7c6..4c578d56db83 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -1590,8 +1590,10 @@ struct OpenMPOpt { &F.getEntryBlock(), F.getEntryBlock().begin())); // Create a fallback location if non was found. // TODO: Use the debug locations of the calls instead. - Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(); - Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc); + uint32_t SrcLocStrSize; + Constant *Loc = + OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); + Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc, SrcLocStrSize); } return Ident; } @@ -3221,8 +3223,11 @@ struct AAKernelInfoFunction : AAKernelInfo { OpenMPIRBuilder::LocationDescription Loc( InsertPointTy(ParentBB, ParentBB->end()), DL); OMPInfoCache.OMPBuilder.updateToLocation(Loc); - auto *SrcLocStr = OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc); - Value *Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + auto *SrcLocStr = + OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = + OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize); BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL); // Add check for Tid in RegionCheckTidBB diff --git a/llvm/test/Transforms/OpenMP/deduplication.ll b/llvm/test/Transforms/OpenMP/deduplication.ll index fbf554e3f9f6..9228fa052e37 100644 --- a/llvm/test/Transforms/OpenMP/deduplication.ll +++ b/llvm/test/Transforms/OpenMP/deduplication.ll @@ -19,7 +19,6 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 ; CHECK-DAG: @.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 ; CHECK-DAG: @.str1 = private unnamed_addr constant [23 x i8] c";file001;loc0001;0;0;;\00", align 1 ; CHECK-DAG: @.str2 = private unnamed_addr constant [23 x i8] c";file002;loc0002;0;0;;\00", align 1 -; CHECK-DAG: @3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8 ; UTC_ARGS: --enable diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll index 602d4f0be577..b72031a9b68c 100644 --- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll +++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll @@ -13,7 +13,7 @@ target triple = "nvptx64" ; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ;. define weak void @kernel0() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel0 diff --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll index 72af4cece3b4..b4ff9d9ffee6 100644 --- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll +++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll @@ -4694,10 +4694,10 @@ entry: ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4719,8 +4719,8 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: @@ -4753,7 +4753,7 @@ entry: ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1:[0-9]+]]) ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) ; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -4837,10 +4837,10 @@ entry: ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4864,15 +4864,15 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -4893,7 +4893,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -4921,11 +4921,11 @@ entry: ; CHECK1-NEXT: [[F_RELOADED:%.*]] = alloca float, align 4 ; CHECK1-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 ; CHECK1-NEXT: store float [[F]], float* [[F_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: store float [[F]], float* [[F_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4948,15 +4948,15 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) @@ -4976,7 +4976,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5006,10 +5006,10 @@ entry: ; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5033,15 +5033,15 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 @@ -5065,7 +5065,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5092,10 +5092,10 @@ entry: ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5118,15 +5118,15 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -5150,7 +5150,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5179,13 +5179,13 @@ entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: ; CHECK1-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* ; CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5210,15 +5210,15 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) @@ -5239,7 +5239,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5270,10 +5270,10 @@ entry: ; CHECK1-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 ; CHECK1-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5295,8 +5295,8 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: @@ -5345,11 +5345,11 @@ entry: ; CHECK1-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 ; CHECK1-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5372,15 +5372,15 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) @@ -5401,7 +5401,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5440,10 +5440,10 @@ entry: ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5465,11 +5465,11 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: @@ -5514,11 +5514,11 @@ entry: ; CHECK1-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5543,27 +5543,27 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 ; CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] ; CHECK1: omp_region.end4: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split.split.split: ; CHECK1-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -5584,7 +5584,7 @@ entry: ; CHECK1: omp.par.merged.split.split.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] ; CHECK1: omp_region.body5.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK1-NEXT: br label [[OMP_REGION_END4]] ; CHECK1: omp_region.body: ; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]] @@ -5595,7 +5595,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5741,10 +5741,10 @@ entry: ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5768,8 +5768,8 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: @@ -5811,10 +5811,10 @@ entry: ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5836,8 +5836,8 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: @@ -5870,7 +5870,7 @@ entry: ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1:[0-9]+]]) ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK2-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) ; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -5954,10 +5954,10 @@ entry: ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5981,15 +5981,15 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -6010,7 +6010,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6038,11 +6038,11 @@ entry: ; CHECK2-NEXT: [[F_RELOADED:%.*]] = alloca float, align 4 ; CHECK2-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 ; CHECK2-NEXT: store float [[F]], float* [[F_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: store float [[F]], float* [[F_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6065,15 +6065,15 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) @@ -6093,7 +6093,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6123,10 +6123,10 @@ entry: ; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6150,15 +6150,15 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 @@ -6182,7 +6182,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6209,10 +6209,10 @@ entry: ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6235,15 +6235,15 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -6267,7 +6267,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6296,13 +6296,13 @@ entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: ; CHECK2-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* ; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6327,15 +6327,15 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) @@ -6356,7 +6356,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6387,10 +6387,10 @@ entry: ; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6412,8 +6412,8 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: @@ -6462,11 +6462,11 @@ entry: ; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6489,15 +6489,15 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) @@ -6518,7 +6518,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6557,10 +6557,10 @@ entry: ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6582,11 +6582,11 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: @@ -6631,11 +6631,11 @@ entry: ; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6660,27 +6660,27 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 ; CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] ; CHECK2: omp_region.end4: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split.split.split: ; CHECK2-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -6701,7 +6701,7 @@ entry: ; CHECK2: omp.par.merged.split.split.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] ; CHECK2: omp_region.body5.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK2-NEXT: br label [[OMP_REGION_END4]] ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] @@ -6712,7 +6712,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6858,10 +6858,10 @@ entry: ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6885,8 +6885,8 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll index 8ecae7f5af9a..4a99013e01a9 100644 --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -32,7 +32,7 @@ define void @kernel() { ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true) ; CHECK-NEXT: call void @foo() #[[ATTR4:[0-9]+]] ; CHECK-NEXT: call void @bar() #[[ATTR4]] -; CHECK-NEXT: call void @unknown_no_openmp() +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR3:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true) ; CHECK-NEXT: ret void ; @@ -41,7 +41,7 @@ define void @kernel() { ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true) ; CHECK-DISABLED-NEXT: call void @foo() #[[ATTR4:[0-9]+]] ; CHECK-DISABLED-NEXT: call void @bar() #[[ATTR4]] -; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR3:[0-9]+]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true) ; CHECK-DISABLED-NEXT: ret void ; @@ -183,14 +183,14 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK: attributes #[[ATTR0]] = { nosync nounwind } ; CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } ; CHECK: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } +; CHECK: attributes #[[ATTR3]] = { "llvm.assume"="omp_no_openmp" } ; CHECK: attributes #[[ATTR4]] = { nounwind } ; CHECK: attributes #[[ATTR5]] = { nosync nounwind writeonly } ;. ; CHECK-DISABLED: attributes #[[ATTR0]] = { nosync nounwind } ; CHECK-DISABLED: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } ; CHECK-DISABLED: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } -; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } +; CHECK-DISABLED: attributes #[[ATTR3]] = { "llvm.assume"="omp_no_openmp" } ; CHECK-DISABLED: attributes #[[ATTR4]] = { nounwind } ; CHECK-DISABLED: attributes #[[ATTR5]] = { nosync nounwind writeonly } ;. diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll index e686dd6f09f4..22bbe9728626 100644 --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -113,6 +113,7 @@ ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" +; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ; AMDGPU: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; AMDGPU: @[[X_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; AMDGPU: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef @@ -126,6 +127,7 @@ ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" +; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ; NVPTX: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; NVPTX: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; NVPTX: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef @@ -1469,7 +1471,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias ; AMDGPU: region.guarded.end: ; AMDGPU-NEXT: br label [[REGION_BARRIER]] ; AMDGPU: region.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[REGION_EXIT:%.*]] ; AMDGPU: region.exit: ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] @@ -1505,7 +1507,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias ; NVPTX: region.guarded.end: ; NVPTX-NEXT: br label [[REGION_BARRIER]] ; NVPTX: region.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[REGION_EXIT:%.*]] ; NVPTX: region.exit: ; NVPTX-NEXT: br label [[FOR_COND:%.*]] diff --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll index a0d43feecc3c..93065a263e67 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll @@ -25,6 +25,7 @@ target triple = "nvptx64" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata" +; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ;. define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_404433c2_main_l5 @@ -50,7 +51,7 @@ define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8 ; CHECK: region.guarded.end: ; CHECK-NEXT: br label [[REGION_BARRIER]] ; CHECK: region.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) ; CHECK-NEXT: br label [[REGION_EXIT:%.*]] ; CHECK: region.exit: ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x i8*], [0 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll index 5d6334e7fa2b..b2c4aca9d5e5 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll @@ -47,6 +47,7 @@ target triple = "nvptx64" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata" +; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ;. ; CHECK-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 @@ -81,7 +82,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK: region.guarded.end: ; CHECK-NEXT: br label [[REGION_BARRIER]] ; CHECK: region.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) ; CHECK-NEXT: br label [[REGION_EXIT:%.*]] ; CHECK: region.exit: ; CHECK-NEXT: br label [[FOR_COND_I:%.*]] @@ -105,7 +106,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK: region.guarded.end1: ; CHECK-NEXT: br label [[REGION_BARRIER2]] ; CHECK: region.barrier2: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) ; CHECK-NEXT: br label [[REGION_EXIT3]] ; CHECK: region.exit3: ; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 @@ -126,7 +127,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK: region.guarded.end6: ; CHECK-NEXT: br label [[REGION_BARRIER7]] ; CHECK: region.barrier7: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]]) ; CHECK-NEXT: br label [[REGION_EXIT8:%.*]] ; CHECK: region.exit8: ; CHECK-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8 @@ -143,7 +144,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK: region.guarded.end11: ; CHECK-NEXT: br label [[REGION_BARRIER12]] ; CHECK: region.barrier12: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) ; CHECK-NEXT: br label [[REGION_EXIT13:%.*]] ; CHECK: region.exit13: ; CHECK-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8 @@ -160,7 +161,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK: region.guarded.end16: ; CHECK-NEXT: br label [[REGION_BARRIER17]] ; CHECK: region.barrier17: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]]) ; CHECK-NEXT: br label [[REGION_EXIT18:%.*]] ; CHECK: region.exit18: ; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8 From 7f1eaeafe7a13fc642e9510f43ec19390b32157d Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 4 Nov 2021 08:48:19 -0500 Subject: [PATCH 164/992] [OpenMP][NFC] Extract assumption helpers into own header file --- clang/include/clang/AST/OpenMPClause.h | 1 + clang/lib/Parse/ParseOpenMP.cpp | 1 + clang/lib/Sema/SemaOpenMP.cpp | 1 + llvm/include/llvm/Frontend/OpenMP/OMPAssume.h | 55 +++++++++++++++++++ .../llvm/Frontend/OpenMP/OMPConstants.h | 28 ---------- 5 files changed, 58 insertions(+), 28 deletions(-) create mode 100644 llvm/include/llvm/Frontend/OpenMP/OMPAssume.h diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 3fd1b6d30080..3ecc1d40fafc 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -32,6 +32,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Frontend/OpenMP/OMPAssume.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPContext.h" #include "llvm/Support/Casting.h" diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 300b022d83b9..7c783ef0b02b 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/UniqueVector.h" +#include "llvm/Frontend/OpenMP/OMPAssume.h" #include "llvm/Frontend/OpenMP/OMPContext.h" using namespace clang; diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index ba0481874577..61e6d4995b9b 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -36,6 +36,7 @@ #include "llvm/ADT/PointerEmbeddedInt.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Frontend/OpenMP/OMPAssume.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPAssume.h b/llvm/include/llvm/Frontend/OpenMP/OMPAssume.h new file mode 100644 index 000000000000..c7462ffe6bc0 --- /dev/null +++ b/llvm/include/llvm/Frontend/OpenMP/OMPAssume.h @@ -0,0 +1,55 @@ +//===- OpenMP/OMPAssume.h --- OpenMP assumption helper functions - C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file provides helper functions and classes to deal with OpenMP +/// assumptions, e.g., as used by `[begin/end] assumes` and `assume`. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FRONTEND_OPENMP_OMPASSUME_H +#define LLVM_FRONTEND_OPENMP_OMPASSUME_H + +#include "llvm/ADT/StringRef.h" + +namespace llvm { + +namespace omp { + +/// Helper to describe assume clauses. +struct AssumptionClauseMappingInfo { + /// The identifier describing the (beginning of the) clause. + llvm::StringLiteral Identifier; + /// Flag to determine if the identifier is a full name or the start of a name. + bool StartsWith; + /// Flag to determine if a directive lists follows. + bool HasDirectiveList; + /// Flag to determine if an expression follows. + bool HasExpression; +}; + +/// All known assume clauses. +static constexpr AssumptionClauseMappingInfo AssumptionClauseMappings[] = { +#define OMP_ASSUME_CLAUSE(Identifier, StartsWith, HasDirectiveList, \ + HasExpression) \ + {Identifier, StartsWith, HasDirectiveList, HasExpression}, +#include "llvm/Frontend/OpenMP/OMPKinds.def" +}; + +inline std::string getAllAssumeClauseOptions() { + std::string S; + for (const AssumptionClauseMappingInfo &ACMI : AssumptionClauseMappings) + S += (S.empty() ? "'" : "', '") + ACMI.Identifier.str(); + return S + "'"; +} + +} // namespace omp + +} // namespace llvm + +#endif // LLVM_FRONTEND_OPENMP_OMPASSUME_H diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index c82f8e69b743..0479e305fcb5 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -25,7 +25,6 @@ class Module; class ArrayType; class StructType; class PointerType; -class StringRef; class FunctionType; namespace omp { @@ -80,33 +79,6 @@ enum class IdentFlag { #define OMP_IDENT_FLAG(Enum, ...) constexpr auto Enum = omp::IdentFlag::Enum; #include "llvm/Frontend/OpenMP/OMPKinds.def" -/// Helper to describe assume clauses. -struct AssumptionClauseMappingInfo { - /// The identifier describing the (beginning of the) clause. - llvm::StringLiteral Identifier; - /// Flag to determine if the identifier is a full name or the start of a name. - bool StartsWith; - /// Flag to determine if a directive lists follows. - bool HasDirectiveList; - /// Flag to determine if an expression follows. - bool HasExpression; -}; - -/// All known assume clauses. -static constexpr AssumptionClauseMappingInfo AssumptionClauseMappings[] = { -#define OMP_ASSUME_CLAUSE(Identifier, StartsWith, HasDirectiveList, \ - HasExpression) \ - {Identifier, StartsWith, HasDirectiveList, HasExpression}, -#include "llvm/Frontend/OpenMP/OMPKinds.def" -}; - -inline std::string getAllAssumeClauseOptions() { - std::string S; - for (const AssumptionClauseMappingInfo &ACMI : AssumptionClauseMappings) - S += (S.empty() ? "'" : "', '") + ACMI.Identifier.str(); - return S + "'"; -} - /// \note This needs to be kept in sync with kmp.h enum sched_type. /// Todo: Update kmp.h to include this file, and remove the enums in kmp.h /// To complete this, more enum values will need to be moved here. From 16da2140045808b2aea1d28366ca7d326eb3c809 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 29 Dec 2021 00:16:56 -0600 Subject: [PATCH 165/992] [OpenMP][FIX] Also update unit test after API change --- .../Frontend/OpenMPIRBuilderTest.cpp | 83 ++++++++++--------- 1 file changed, 46 insertions(+), 37 deletions(-) diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index fba36d42d185..f200ef57be57 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -303,8 +303,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancel) { EXPECT_EQ(Barrier->getNumUses(), 0U); EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 1U); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), - CBB); + EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); EXPECT_EQ(cast(Cancel)->getArgOperand(1), GTID); @@ -342,7 +341,6 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { BB = BB->getTerminator()->getSuccessor(0); EXPECT_EQ(BB->size(), 4U); - CallInst *GTID = dyn_cast(&BB->front()); EXPECT_NE(GTID, nullptr); EXPECT_EQ(GTID->arg_size(), 1U); @@ -360,7 +358,8 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { Instruction *CancelBBTI = Cancel->getParent()->getTerminator(); EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U); EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U); - EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(), NewIP.getBlock()); + EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(), + NewIP.getBlock()); EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); CallInst *GTID1 = dyn_cast(&CancelBBTI->getSuccessor(1)->front()); EXPECT_NE(GTID1, nullptr); @@ -377,8 +376,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { EXPECT_EQ(Barrier->getNumUses(), 0U); EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 1U); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), - CBB); + EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); EXPECT_EQ(cast(Cancel)->getArgOperand(1), GTID); @@ -502,8 +500,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) { Builder.CreateStore(F->arg_begin(), PrivAI); Builder.restoreIP(CodeGenIP); - Value *PrivLoad = Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, - "local.use"); + Value *PrivLoad = + Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); Instruction *ThenTerm, *ElseTerm; SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), @@ -809,8 +807,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { Builder.CreateStore(F->arg_begin(), PrivAI); Builder.restoreIP(CodeGenIP); - Value *PrivLoad = Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, - "local.use"); + Value *PrivLoad = + Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); Instruction *ThenTerm, *ElseTerm; SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), @@ -2011,8 +2009,8 @@ TEST_F(OpenMPIRBuilderTest, MasterDirective) { EntryBB = ThenBB->getUniquePredecessor(); // simple instructions for body - Value *PrivLoad = Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, - "local.use"); + Value *PrivLoad = + Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; @@ -2164,8 +2162,8 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) { // body begin Builder.restoreIP(CodeGenIP); Builder.CreateStore(F->arg_begin(), PrivAI); - Value *PrivLoad = Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, - "local.use"); + Value *PrivLoad = + Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; @@ -2538,32 +2536,33 @@ TEST_F(OpenMPIRBuilderTest, CopyinBlocks) { OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); - IntegerType* Int32 = Type::getInt32Ty(M->getContext()); - AllocaInst* MasterAddress = Builder.CreateAlloca(Int32->getPointerTo()); - AllocaInst* PrivAddress = Builder.CreateAlloca(Int32->getPointerTo()); + IntegerType *Int32 = Type::getInt32Ty(M->getContext()); + AllocaInst *MasterAddress = Builder.CreateAlloca(Int32->getPointerTo()); + AllocaInst *PrivAddress = Builder.CreateAlloca(Int32->getPointerTo()); BasicBlock *EntryBB = BB; OMPBuilder.createCopyinClauseBlocks(Builder.saveIP(), MasterAddress, PrivAddress, Int32, /*BranchtoEnd*/ true); - BranchInst* EntryBr = dyn_cast_or_null(EntryBB->getTerminator()); + BranchInst *EntryBr = dyn_cast_or_null(EntryBB->getTerminator()); EXPECT_NE(EntryBr, nullptr); EXPECT_TRUE(EntryBr->isConditional()); - BasicBlock* NotMasterBB = EntryBr->getSuccessor(0); - BasicBlock* CopyinEnd = EntryBr->getSuccessor(1); - CmpInst* CMP = dyn_cast_or_null(EntryBr->getCondition()); + BasicBlock *NotMasterBB = EntryBr->getSuccessor(0); + BasicBlock *CopyinEnd = EntryBr->getSuccessor(1); + CmpInst *CMP = dyn_cast_or_null(EntryBr->getCondition()); EXPECT_NE(CMP, nullptr); EXPECT_NE(NotMasterBB, nullptr); EXPECT_NE(CopyinEnd, nullptr); - BranchInst* NotMasterBr = dyn_cast_or_null(NotMasterBB->getTerminator()); + BranchInst *NotMasterBr = + dyn_cast_or_null(NotMasterBB->getTerminator()); EXPECT_NE(NotMasterBr, nullptr); EXPECT_FALSE(NotMasterBr->isConditional()); - EXPECT_EQ(CopyinEnd,NotMasterBr->getSuccessor(0)); + EXPECT_EQ(CopyinEnd, NotMasterBr->getSuccessor(0)); } TEST_F(OpenMPIRBuilderTest, SingleDirective) { @@ -2602,8 +2601,8 @@ TEST_F(OpenMPIRBuilderTest, SingleDirective) { EntryBB = ThenBB->getUniquePredecessor(); // simple instructions for body - Value *PrivLoad = Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, - "local.use"); + Value *PrivLoad = + Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; @@ -3106,8 +3105,9 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); - Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc); - Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr); + uint32_t StrSize; + Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); + Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); Value *TID = OMPBuilder.getOrCreateThreadID(Ident); Value *SumLocal = Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local"); @@ -3339,8 +3339,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); - Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc); - Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr); + uint32_t StrSize; + Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); + Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); Value *TID = OMPBuilder.getOrCreateThreadID(Ident); Value *SumLocal = Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local"); @@ -3359,8 +3360,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); - Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc); - Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr); + uint32_t StrSize; + Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); + Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); Value *TID = OMPBuilder.getOrCreateThreadID(Ident); Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial"); Value *Xor = Builder.CreateXor(XorPartial, TID, "xor"); @@ -3695,8 +3697,11 @@ TEST_F(OpenMPIRBuilderTest, CreateOffloadMapnames) { IRBuilder<> Builder(BB); - Constant *Cst1 = OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5); - Constant *Cst2 = OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5); + uint32_t StrSize; + Constant *Cst1 = + OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize); + Constant *Cst2 = + OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize); SmallVector Names = {Cst1, Cst2}; GlobalVariable *OffloadMaptypesGlobal = @@ -3798,11 +3803,15 @@ TEST_F(OpenMPIRBuilderTest, EmitMapperCall) { SmallVector Flags = {0, 2}; - Constant *SrcLocCst = OMPBuilder.getOrCreateSrcLocStr("", "file1", 2, 5); - Value *SrcLocInfo = OMPBuilder.getOrCreateIdent(SrcLocCst); + uint32_t StrSize; + Constant *SrcLocCst = + OMPBuilder.getOrCreateSrcLocStr("", "file1", 2, 5, StrSize); + Value *SrcLocInfo = OMPBuilder.getOrCreateIdent(SrcLocCst, StrSize); - Constant *Cst1 = OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5); - Constant *Cst2 = OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5); + Constant *Cst1 = + OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize); + Constant *Cst2 = + OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize); SmallVector Names = {Cst1, Cst2}; GlobalVariable *Maptypes = From 3e0c512ce6cec6d7b83b87c49697adf5a67e4196 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 28 Dec 2021 23:55:32 -0600 Subject: [PATCH 166/992] [OpenMP] Simplify all stores in the device code Similar to loads, we want to be aggressive when it comes to store simplification. Not everything in LLVM handles dead stores well when address space casts are involved, we can simply ask the Attributor to do it for us though. Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D109998 --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 2 + .../OpenMP/custom_state_machines.ll | 144 ------------------ llvm/test/Transforms/OpenMP/spmdization.ll | 50 ------ ...mdization_guarding_two_reaching_kernels.ll | 3 - 4 files changed, 2 insertions(+), 197 deletions(-) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 4c578d56db83..464d74905cd0 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -4509,6 +4509,8 @@ void OpenMPOpt::registerAAs(bool IsModulePass) { bool UsedAssumedInformation = false; A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr, UsedAssumedInformation); + } else if (auto *SI = dyn_cast(&I)) { + A.getOrCreateAAFor(IRPosition::value(*SI)); } } } diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll index b9eda9eb7f85..e4cee1c24909 100644 --- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll @@ -1001,9 +1001,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1027,9 +1024,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1159,9 +1153,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1287,9 +1278,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1313,9 +1301,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1417,9 +1402,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1443,9 +1425,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1547,9 +1526,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1573,9 +1549,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1771,9 +1744,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1797,9 +1767,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1845,9 +1812,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -2012,9 +1976,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2038,9 +1999,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2169,9 +2127,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2296,9 +2251,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2322,9 +2274,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2425,9 +2374,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2451,9 +2397,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2554,9 +2497,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2580,9 +2520,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2776,9 +2713,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2802,9 +2736,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2850,9 +2781,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2977,9 +2905,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3003,9 +2928,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3088,9 +3010,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3173,9 +3092,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3199,9 +3115,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3262,9 +3175,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3288,9 +3198,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3351,9 +3258,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3377,9 +3281,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3507,9 +3408,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3533,9 +3431,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3581,9 +3476,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3708,9 +3600,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -3734,9 +3623,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -3819,9 +3705,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -3904,9 +3787,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -3930,9 +3810,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -3993,9 +3870,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -4019,9 +3893,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -4082,9 +3953,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -4108,9 +3976,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -4238,9 +4103,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -4264,9 +4126,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -4312,9 +4171,6 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll index 22bbe9728626..5051bce98279 100644 --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -250,7 +250,6 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 { ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -304,7 +303,6 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 { ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -467,8 +465,6 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -479,8 +475,6 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -491,8 +485,6 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -503,8 +495,6 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -605,7 +595,6 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -659,7 +648,6 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -837,8 +825,6 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -849,8 +835,6 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -861,8 +845,6 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -873,8 +855,6 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void @@ -976,7 +956,6 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -1030,7 +1009,6 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -1221,8 +1199,6 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** @@ -1236,8 +1212,6 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; NVPTX-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** @@ -1251,8 +1225,6 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** @@ -1266,8 +1238,6 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** @@ -1374,7 +1344,6 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -1428,7 +1397,6 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -1654,8 +1622,6 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** @@ -1669,8 +1635,6 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; NVPTX-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** @@ -1684,8 +1648,6 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** @@ -1699,8 +1661,6 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** @@ -1860,7 +1820,6 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] @@ -1907,7 +1866,6 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] @@ -2325,8 +2283,6 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -2337,8 +2293,6 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void @@ -2349,8 +2303,6 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -2361,8 +2313,6 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll index cb01af9f7a35..6b1983dd9591 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll @@ -193,9 +193,6 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 { ; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) ; CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] ; CHECK-NEXT: ret void From 5602c866c062622905899ee151f0dc868c846e7a Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 28 Dec 2021 23:59:35 -0600 Subject: [PATCH 167/992] [Attributor] Look through allocated heap memory AAPointerInfo, and thereby other places, can look already through internal global and stack memory. This patch enables them to look through heap memory returned by functions with a `noalias` return. In the future we can look through `noalias` arguments as well but that will require AAIsDead to learn that such memory can be inspected by the caller later on. We also need teach AAPointerInfo about dominance to actually deal with memory that might not be `null` or `undef` initialized. D106397 is a first step in that direction already. Reviewed By: kuter Differential Revision: https://reviews.llvm.org/D109170 --- llvm/include/llvm/Transforms/IPO/Attributor.h | 3 +- llvm/lib/Transforms/IPO/Attributor.cpp | 16 +- .../Transforms/IPO/AttributorAttributes.cpp | 15 +- .../Transforms/Attributor/heap_to_stack.ll | 4 +- .../Attributor/heap_to_stack_gpu.ll | 1 - .../Transforms/Attributor/memory_locations.ll | 71 +- .../Attributor/value-simplify-pointer-info.ll | 995 ++++++++++++++++-- 7 files changed, 1006 insertions(+), 99 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index bd7ab6c5d003..884f8191368c 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -173,7 +173,8 @@ combineOptionalValuesInAAValueLatice(const Optional &A, const Optional &B, Type *Ty); /// Return the initial value of \p Obj with type \p Ty if that is a constant. -Constant *getInitialValueForObj(Value &Obj, Type &Ty); +Constant *getInitialValueForObj(Value &Obj, Type &Ty, + const TargetLibraryInfo *TLI); /// Collect all potential underlying objects of \p Ptr at position \p CtxI in /// \p Objects. Assumed information is used and dependences onto \p QueryingAA diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 35bb91239964..94515b60f1dd 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ValueTracking.h" @@ -202,9 +203,17 @@ bool AA::isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA, return NoRecurseAA.isAssumedNoRecurse(); } -Constant *AA::getInitialValueForObj(Value &Obj, Type &Ty) { +Constant *AA::getInitialValueForObj(Value &Obj, Type &Ty, + const TargetLibraryInfo *TLI) { if (isa(Obj)) return UndefValue::get(&Ty); + if (isNoAliasFn(&Obj, TLI)) { + if (isMallocLikeFn(&Obj, TLI) || isAlignedAllocLikeFn(&Obj, TLI)) + return UndefValue::get(&Ty); + if (isCallocLikeFn(&Obj, TLI)) + return Constant::getNullValue(&Ty); + return nullptr; + } auto *GV = dyn_cast(&Obj); if (!GV || !GV->hasLocalLinkage()) return nullptr; @@ -300,6 +309,8 @@ bool AA::getPotentialCopiesOfStoredValue( SmallVector PIs; SmallVector NewCopies; + const auto *TLI = + A.getInfoCache().getTargetLibraryInfoForFunction(*SI.getFunction()); for (Value *Obj : Objects) { LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n"); if (isa(Obj)) @@ -316,7 +327,8 @@ bool AA::getPotentialCopiesOfStoredValue( dbgs() << "Underlying object is a valid nullptr, giving up.\n";); return false; } - if (!isa(Obj) && !isa(Obj)) { + if (!isa(Obj) && !isa(Obj) && + !isNoAliasFn(Obj, TLI)) { LLVM_DEBUG(dbgs() << "Underlying object is not supported yet: " << *Obj << "\n";); return false; diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index d69c5006673e..8bfa940449fb 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1161,6 +1161,10 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { return true; }; + const auto *TLI = getAnchorScope() + ? A.getInfoCache().getTargetLibraryInfoForFunction( + *getAnchorScope()) + : nullptr; auto UsePred = [&](const Use &U, bool &Follow) -> bool { Value *CurPtr = U.get(); User *Usr = U.getUser(); @@ -1275,6 +1279,8 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { if (auto *CB = dyn_cast(Usr)) { if (CB->isLifetimeStartOrEnd()) return true; + if (TLI && isFreeCall(CB, TLI)) + return true; if (CB->isArgOperand(&U)) { unsigned ArgNo = CB->getArgOperandNo(&U); const auto &CSArgPI = A.getAAFor( @@ -2332,6 +2338,8 @@ struct AANoRecurseFunction final : AANoRecurseImpl { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { AANoRecurseImpl::initialize(A); + // TODO: We should build a call graph ourselves to enable this in the module + // pass as well. if (const Function *F = getAnchorScope()) if (A.getInfoCache().getSccSize(*F) != 1) indicatePessimisticFixpoint(); @@ -5243,6 +5251,8 @@ struct AAValueSimplifyImpl : AAValueSimplify { if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, AA, &L)) return false; + const auto *TLI = + A.getInfoCache().getTargetLibraryInfoForFunction(*L.getFunction()); for (Value *Obj : Objects) { LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n"); if (isa(Obj)) @@ -5257,9 +5267,10 @@ struct AAValueSimplifyImpl : AAValueSimplify { continue; return false; } - if (!isa(Obj) && !isa(Obj)) + if (!isa(Obj) && !isa(Obj) && + !isNoAliasFn(Obj, TLI)) return false; - Constant *InitialVal = AA::getInitialValueForObj(*Obj, *L.getType()); + Constant *InitialVal = AA::getInitialValueForObj(*Obj, *L.getType(), TLI); if (!InitialVal || !Union(*InitialVal)) return false; diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll index b9965b0d0f87..2f9e004bad3b 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals ; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=14 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM @@ -626,7 +626,6 @@ define i32 @malloc_in_loop(i32 %0) { ; IS________OPM: 8: ; IS________OPM-NEXT: [[TMP9:%.*]] = call noalias i8* @malloc(i64 noundef 4) ; IS________OPM-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -; IS________OPM-NEXT: store i32 1, i32* [[TMP10]], align 8 ; IS________OPM-NEXT: br label [[TMP4]] ; IS________OPM: 11: ; IS________OPM-NEXT: ret i32 5 @@ -646,7 +645,6 @@ define i32 @malloc_in_loop(i32 %0) { ; IS________NPM: 8: ; IS________NPM-NEXT: [[TMP9:%.*]] = alloca i8, i64 4, align 1 ; IS________NPM-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -; IS________NPM-NEXT: store i32 1, i32* [[TMP10]], align 8 ; IS________NPM-NEXT: br label [[TMP4]] ; IS________NPM: 11: ; IS________NPM-NEXT: ret i32 5 diff --git a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll index 52e01dd525f0..7e3cf0f6955f 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll @@ -494,7 +494,6 @@ define i32 @malloc_in_loop(i32 %0) { ; CHECK: 8: ; CHECK-NEXT: [[TMP9:%.*]] = call noalias i8* @malloc(i64 noundef 4) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -; CHECK-NEXT: store i32 1, i32* [[TMP10]], align 8 ; CHECK-NEXT: br label [[TMP4]] ; CHECK: 11: ; CHECK-NEXT: ret i32 5 diff --git a/llvm/test/Transforms/Attributor/memory_locations.ll b/llvm/test/Transforms/Attributor/memory_locations.ll index 7e9e3d2a33ff..13aaa4464350 100644 --- a/llvm/test/Transforms/Attributor/memory_locations.ll +++ b/llvm/test/Transforms/Attributor/memory_locations.ll @@ -122,11 +122,18 @@ return: ; preds = %if.end, %if.then define dso_local i8* @internal_only_rec_static_helper_malloc_noescape(i32 %arg) { ; FIXME: This is actually inaccessiblememonly because the malloced memory does not escape -; CHECK-LABEL: define {{[^@]+}}@internal_only_rec_static_helper_malloc_noescape -; CHECK-SAME: (i32 [[ARG:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = call noalias i8* @internal_only_rec_static_malloc_noescape(i32 [[ARG]]) -; CHECK-NEXT: ret i8* [[CALL]] +; IS__TUNIT____-LABEL: define {{[^@]+}}@internal_only_rec_static_helper_malloc_noescape +; IS__TUNIT____-SAME: (i32 [[ARG:%.*]]) { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call noalias i8* @internal_only_rec_static_malloc_noescape(i32 [[ARG]]) +; IS__TUNIT____-NEXT: ret i8* [[CALL]] +; +; IS__CGSCC____: Function Attrs: inaccessiblememonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@internal_only_rec_static_helper_malloc_noescape +; IS__CGSCC____-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call noalias i8* @internal_only_rec_static_malloc_noescape(i32 [[ARG]]) +; IS__CGSCC____-NEXT: ret i8* [[CALL]] ; entry: %call = call i8* @internal_only_rec_static_malloc_noescape(i32 %arg) @@ -135,24 +142,42 @@ entry: define internal i8* @internal_only_rec_static_malloc_noescape(i32 %arg) { ; FIXME: This is actually inaccessiblememonly because the malloced memory does not escape -; CHECK-LABEL: define {{[^@]+}}@internal_only_rec_static_malloc_noescape -; CHECK-SAME: (i32 [[ARG:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[REM:%.*]] = srem i32 [[ARG]], 2 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM]], 1 -; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[ARG]], 2 -; CHECK-NEXT: [[CALL:%.*]] = call noalias i8* @internal_only_rec(i32 [[DIV]]) -; CHECK-NEXT: br label [[RETURN:%.*]] -; CHECK: if.end: -; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[ARG]] to i64 -; CHECK-NEXT: [[CALL1:%.*]] = call noalias i8* @malloc(i64 [[CONV]]) -; CHECK-NEXT: store i8 0, i8* [[CALL1]], align 1 -; CHECK-NEXT: br label [[RETURN]] -; CHECK: return: -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i8* [ [[CALL]], [[IF_THEN]] ], [ null, [[IF_END]] ] -; CHECK-NEXT: ret i8* [[RETVAL_0]] +; IS__TUNIT____-LABEL: define {{[^@]+}}@internal_only_rec_static_malloc_noescape +; IS__TUNIT____-SAME: (i32 [[ARG:%.*]]) { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[REM:%.*]] = srem i32 [[ARG]], 2 +; IS__TUNIT____-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM]], 1 +; IS__TUNIT____-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; IS__TUNIT____: if.then: +; IS__TUNIT____-NEXT: [[DIV:%.*]] = sdiv i32 [[ARG]], 2 +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call noalias i8* @internal_only_rec(i32 [[DIV]]) +; IS__TUNIT____-NEXT: br label [[RETURN:%.*]] +; IS__TUNIT____: if.end: +; IS__TUNIT____-NEXT: [[CONV:%.*]] = sext i32 [[ARG]] to i64 +; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call noalias i8* @malloc(i64 [[CONV]]) +; IS__TUNIT____-NEXT: br label [[RETURN]] +; IS__TUNIT____: return: +; IS__TUNIT____-NEXT: [[RETVAL_0:%.*]] = phi i8* [ [[CALL]], [[IF_THEN]] ], [ null, [[IF_END]] ] +; IS__TUNIT____-NEXT: ret i8* [[RETVAL_0]] +; +; IS__CGSCC____: Function Attrs: inaccessiblememonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@internal_only_rec_static_malloc_noescape +; IS__CGSCC____-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: [[REM:%.*]] = srem i32 [[ARG]], 2 +; IS__CGSCC____-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM]], 1 +; IS__CGSCC____-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; IS__CGSCC____: if.then: +; IS__CGSCC____-NEXT: [[DIV:%.*]] = sdiv i32 [[ARG]], 2 +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call noalias i8* @internal_only_rec(i32 [[DIV]]) +; IS__CGSCC____-NEXT: br label [[RETURN:%.*]] +; IS__CGSCC____: if.end: +; IS__CGSCC____-NEXT: [[CONV:%.*]] = sext i32 [[ARG]] to i64 +; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call noalias i8* @malloc(i64 [[CONV]]) +; IS__CGSCC____-NEXT: br label [[RETURN]] +; IS__CGSCC____: return: +; IS__CGSCC____-NEXT: [[RETVAL_0:%.*]] = phi i8* [ [[CALL]], [[IF_THEN]] ], [ null, [[IF_END]] ] +; IS__CGSCC____-NEXT: ret i8* [[RETVAL_0]] ; entry: %rem = srem i32 %arg, 2 diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll index 59aebe1ff340..f1300c59512a 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll @@ -54,6 +54,7 @@ ; CHECK: @[[BYTES1:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 undef ; CHECK: @[[BYTES2:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 undef ; CHECK: @[[REC_STORAGE:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 undef +; CHECK: @[[GLOBAL:[a-zA-Z0-9_$"\\.-]+]] = internal global [[STRUCT_STY:%.*]] zeroinitializer, align 8 ;. define void @write_arg(i32* %p, i32 %v) { ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly @@ -115,7 +116,7 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig ; IS__TUNIT_OPM-NEXT: entry: ; IS__TUNIT_OPM-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 4 ; IS__TUNIT_OPM-NEXT: [[I:%.*]] = bitcast %struct.S* [[S]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I]]) #[[ATTR9:[0-9]+]] +; IS__TUNIT_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I]]) #[[ATTR10:[0-9]+]] ; IS__TUNIT_OPM-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3 ; IS__TUNIT_OPM-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 4 ; IS__TUNIT_OPM-NEXT: [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5 @@ -142,7 +143,7 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig ; IS__TUNIT_OPM-NEXT: [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 2 ; IS__TUNIT_OPM-NEXT: store i32 [[ADD15]], i32* [[I316]], align 4, !tbaa [[TBAA14:![0-9]+]] ; IS__TUNIT_OPM-NEXT: [[I12:%.*]] = bitcast %struct.S* [[S]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR9]] +; IS__TUNIT_OPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR10]] ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn @@ -151,7 +152,7 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig ; IS__TUNIT_NPM-NEXT: entry: ; IS__TUNIT_NPM-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 4 ; IS__TUNIT_NPM-NEXT: [[I:%.*]] = bitcast %struct.S* [[S]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I]]) #[[ATTR7:[0-9]+]] +; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I]]) #[[ATTR9:[0-9]+]] ; IS__TUNIT_NPM-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3 ; IS__TUNIT_NPM-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 4 ; IS__TUNIT_NPM-NEXT: [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5 @@ -178,7 +179,7 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig ; IS__TUNIT_NPM-NEXT: [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 2 ; IS__TUNIT_NPM-NEXT: store i32 [[ADD15]], i32* [[I316]], align 4, !tbaa [[TBAA14:![0-9]+]] ; IS__TUNIT_NPM-NEXT: [[I12:%.*]] = bitcast %struct.S* [[S]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR7]] +; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR9]] ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn @@ -187,16 +188,16 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 4 ; IS__CGSCC_OPM-NEXT: [[I:%.*]] = bitcast %struct.S* [[S]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I]]) #[[ATTR11:[0-9]+]] +; IS__CGSCC_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I]]) #[[ATTR13:[0-9]+]] ; IS__CGSCC_OPM-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3 ; IS__CGSCC_OPM-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 4 ; IS__CGSCC_OPM-NEXT: [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5 ; IS__CGSCC_OPM-NEXT: [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0 -; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR12:[0-9]+]] +; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR14:[0-9]+]] ; IS__CGSCC_OPM-NEXT: [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1 -; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR12]] +; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR14]] ; IS__CGSCC_OPM-NEXT: [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2 -; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR12]] +; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR14]] ; IS__CGSCC_OPM-NEXT: [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3 ; IS__CGSCC_OPM-NEXT: store float 0x3FF19999A0000000, float* [[F12]], align 4, !tbaa [[TBAA7:![0-9]+]] ; IS__CGSCC_OPM-NEXT: [[MUL:%.*]] = fmul float 0x40019999A0000000, 2.000000e+00 @@ -214,7 +215,7 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig ; IS__CGSCC_OPM-NEXT: [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 2 ; IS__CGSCC_OPM-NEXT: store i32 [[ADD15]], i32* [[I316]], align 4, !tbaa [[TBAA14:![0-9]+]] ; IS__CGSCC_OPM-NEXT: [[I12:%.*]] = bitcast %struct.S* [[S]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR13]] ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn @@ -223,16 +224,16 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig ; IS__CGSCC_NPM-NEXT: entry: ; IS__CGSCC_NPM-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 4 ; IS__CGSCC_NPM-NEXT: [[I:%.*]] = bitcast %struct.S* [[S]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I]]) #[[ATTR9:[0-9]+]] +; IS__CGSCC_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I]]) #[[ATTR12:[0-9]+]] ; IS__CGSCC_NPM-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3 ; IS__CGSCC_NPM-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 4 ; IS__CGSCC_NPM-NEXT: [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5 ; IS__CGSCC_NPM-NEXT: [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0 -; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR10:[0-9]+]] +; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR13:[0-9]+]] ; IS__CGSCC_NPM-NEXT: [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1 -; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR10]] +; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR13]] ; IS__CGSCC_NPM-NEXT: [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2 -; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR10]] +; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR13]] ; IS__CGSCC_NPM-NEXT: [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3 ; IS__CGSCC_NPM-NEXT: store float 0x3FF19999A0000000, float* [[F12]], align 4, !tbaa [[TBAA7:![0-9]+]] ; IS__CGSCC_NPM-NEXT: [[MUL:%.*]] = fmul float 0x40019999A0000000, 2.000000e+00 @@ -250,7 +251,7 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig ; IS__CGSCC_NPM-NEXT: [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 2 ; IS__CGSCC_NPM-NEXT: store i32 [[ADD15]], i32* [[I316]], align 4, !tbaa [[TBAA14:![0-9]+]] ; IS__CGSCC_NPM-NEXT: [[I12:%.*]] = bitcast %struct.S* [[S]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR9]] +; IS__CGSCC_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR12]] ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -411,7 +412,7 @@ define void @local_alloca_simplifiable_2() { ; IS__TUNIT_NPM-NEXT: entry: ; IS__TUNIT_NPM-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; IS__TUNIT_NPM-NEXT: [[I:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0 -; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 1024, i8* nocapture nofree noundef nonnull align 16 dereferenceable(1024) [[I]]) #[[ATTR7]] +; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 1024, i8* nocapture nofree noundef nonnull align 16 dereferenceable(1024) [[I]]) #[[ATTR9]] ; IS__TUNIT_NPM-NEXT: br label [[FOR_COND:%.*]] ; IS__TUNIT_NPM: for.cond: ; IS__TUNIT_NPM-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] @@ -481,7 +482,7 @@ define void @local_alloca_simplifiable_2() { ; IS__TUNIT_NPM-NEXT: br label [[FOR_COND28]], !llvm.loop [[LOOP20:![0-9]+]] ; IS__TUNIT_NPM: for.end38: ; IS__TUNIT_NPM-NEXT: [[I24:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0 -; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 1024, i8* nocapture nofree noundef nonnull align 16 dereferenceable(1024) [[I24]]) #[[ATTR7]] +; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 1024, i8* nocapture nofree noundef nonnull align 16 dereferenceable(1024) [[I24]]) #[[ATTR9]] ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@local_alloca_simplifiable_2() { @@ -545,7 +546,7 @@ define void @local_alloca_simplifiable_2() { ; IS__CGSCC_OPM-NEXT: store i8 0, i8* [[ARRAYIDX25]], align 1, !tbaa [[TBAA15]] ; IS__CGSCC_OPM-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 500 ; IS__CGSCC_OPM-NEXT: [[I22:%.*]] = bitcast i8* [[ARRAYIDX26]] to i32* -; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[I22]], i32 noundef 0) #[[ATTR13:[0-9]+]] +; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[I22]], i32 noundef 0) #[[ATTR15:[0-9]+]] ; IS__CGSCC_OPM-NEXT: br label [[FOR_COND28:%.*]] ; IS__CGSCC_OPM: for.cond28: ; IS__CGSCC_OPM-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC36:%.*]] ], [ 0, [[FOR_END24]] ] @@ -571,7 +572,7 @@ define void @local_alloca_simplifiable_2() { ; IS__CGSCC_NPM-NEXT: entry: ; IS__CGSCC_NPM-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; IS__CGSCC_NPM-NEXT: [[I:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0 -; IS__CGSCC_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 1024, i8* nocapture nofree noundef nonnull align 16 dereferenceable(1024) [[I]]) #[[ATTR9]] +; IS__CGSCC_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 1024, i8* nocapture nofree noundef nonnull align 16 dereferenceable(1024) [[I]]) #[[ATTR12]] ; IS__CGSCC_NPM-NEXT: br label [[FOR_COND:%.*]] ; IS__CGSCC_NPM: for.cond: ; IS__CGSCC_NPM-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] @@ -628,7 +629,7 @@ define void @local_alloca_simplifiable_2() { ; IS__CGSCC_NPM-NEXT: store i8 0, i8* [[ARRAYIDX25]], align 1, !tbaa [[TBAA15]] ; IS__CGSCC_NPM-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 500 ; IS__CGSCC_NPM-NEXT: [[I22:%.*]] = bitcast i8* [[ARRAYIDX26]] to i32* -; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[I22]], i32 noundef 0) #[[ATTR11:[0-9]+]] +; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[I22]], i32 noundef 0) #[[ATTR14:[0-9]+]] ; IS__CGSCC_NPM-NEXT: br label [[FOR_COND28:%.*]] ; IS__CGSCC_NPM: for.cond28: ; IS__CGSCC_NPM-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC36:%.*]] ], [ 0, [[FOR_END24]] ] @@ -825,7 +826,7 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) { ; IS__TUNIT_OPM-NEXT: entry: ; IS__TUNIT_OPM-NEXT: [[L:%.*]] = alloca i32, align 4 ; IS__TUNIT_OPM-NEXT: [[I:%.*]] = bitcast i32* [[L]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR9]] +; IS__TUNIT_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR10]] ; IS__TUNIT_OPM-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0 ; IS__TUNIT_OPM-NEXT: br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]] ; IS__TUNIT_OPM: cond.true: @@ -834,7 +835,7 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) { ; IS__TUNIT_OPM-NEXT: br label [[COND_END]] ; IS__TUNIT_OPM: cond.end: ; IS__TUNIT_OPM-NEXT: [[I2:%.*]] = bitcast i32* [[L]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR9]] +; IS__TUNIT_OPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR10]] ; IS__TUNIT_OPM-NEXT: ret i32 5 ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind willreturn @@ -843,7 +844,7 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) { ; IS__TUNIT_NPM-NEXT: entry: ; IS__TUNIT_NPM-NEXT: [[L:%.*]] = alloca i32, align 4 ; IS__TUNIT_NPM-NEXT: [[I:%.*]] = bitcast i32* [[L]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR7]] +; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR9]] ; IS__TUNIT_NPM-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0 ; IS__TUNIT_NPM-NEXT: br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]] ; IS__TUNIT_NPM: cond.true: @@ -852,7 +853,7 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) { ; IS__TUNIT_NPM-NEXT: br label [[COND_END]] ; IS__TUNIT_NPM: cond.end: ; IS__TUNIT_NPM-NEXT: [[I2:%.*]] = bitcast i32* [[L]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR7]] +; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR9]] ; IS__TUNIT_NPM-NEXT: ret i32 5 ; ; IS__CGSCC_OPM: Function Attrs: nofree nosync nounwind willreturn @@ -861,7 +862,7 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) { ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[L:%.*]] = alloca i32, align 4 ; IS__CGSCC_OPM-NEXT: [[I:%.*]] = bitcast i32* [[L]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR13]] ; IS__CGSCC_OPM-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0 ; IS__CGSCC_OPM-NEXT: br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]] ; IS__CGSCC_OPM: cond.true: @@ -870,7 +871,7 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) { ; IS__CGSCC_OPM-NEXT: br label [[COND_END]] ; IS__CGSCC_OPM: cond.end: ; IS__CGSCC_OPM-NEXT: [[I2:%.*]] = bitcast i32* [[L]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR13]] ; IS__CGSCC_OPM-NEXT: ret i32 5 ; ; IS__CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn @@ -879,7 +880,7 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) { ; IS__CGSCC_NPM-NEXT: entry: ; IS__CGSCC_NPM-NEXT: [[L:%.*]] = alloca i32, align 4 ; IS__CGSCC_NPM-NEXT: [[I:%.*]] = bitcast i32* [[L]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR9]] +; IS__CGSCC_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR12]] ; IS__CGSCC_NPM-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0 ; IS__CGSCC_NPM-NEXT: br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]] ; IS__CGSCC_NPM: cond.true: @@ -888,7 +889,7 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) { ; IS__CGSCC_NPM-NEXT: br label [[COND_END]] ; IS__CGSCC_NPM: cond.end: ; IS__CGSCC_NPM-NEXT: [[I2:%.*]] = bitcast i32* [[L]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR9]] +; IS__CGSCC_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR12]] ; IS__CGSCC_NPM-NEXT: ret i32 5 ; entry: @@ -929,7 +930,7 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) { ; IS__TUNIT_OPM-NEXT: entry: ; IS__TUNIT_OPM-NEXT: [[L:%.*]] = alloca i32, align 4 ; IS__TUNIT_OPM-NEXT: [[I:%.*]] = bitcast i32* [[L]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR9]] +; IS__TUNIT_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR10]] ; IS__TUNIT_OPM-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0 ; IS__TUNIT_OPM-NEXT: br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]] ; IS__TUNIT_OPM: cond.true: @@ -938,7 +939,7 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) { ; IS__TUNIT_OPM-NEXT: br label [[COND_END]] ; IS__TUNIT_OPM: cond.end: ; IS__TUNIT_OPM-NEXT: [[I1:%.*]] = bitcast i32* [[L]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR9]] +; IS__TUNIT_OPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR10]] ; IS__TUNIT_OPM-NEXT: ret i32 5 ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind willreturn @@ -947,7 +948,7 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) { ; IS__TUNIT_NPM-NEXT: entry: ; IS__TUNIT_NPM-NEXT: [[L:%.*]] = alloca i32, align 4 ; IS__TUNIT_NPM-NEXT: [[I:%.*]] = bitcast i32* [[L]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR7]] +; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR9]] ; IS__TUNIT_NPM-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0 ; IS__TUNIT_NPM-NEXT: br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]] ; IS__TUNIT_NPM: cond.true: @@ -956,7 +957,7 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) { ; IS__TUNIT_NPM-NEXT: br label [[COND_END]] ; IS__TUNIT_NPM: cond.end: ; IS__TUNIT_NPM-NEXT: [[I1:%.*]] = bitcast i32* [[L]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR7]] +; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR9]] ; IS__TUNIT_NPM-NEXT: ret i32 5 ; ; IS__CGSCC_OPM: Function Attrs: nofree nosync nounwind willreturn @@ -965,7 +966,7 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) { ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[L:%.*]] = alloca i32, align 4 ; IS__CGSCC_OPM-NEXT: [[I:%.*]] = bitcast i32* [[L]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR13]] ; IS__CGSCC_OPM-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0 ; IS__CGSCC_OPM-NEXT: br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]] ; IS__CGSCC_OPM: cond.true: @@ -974,7 +975,7 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) { ; IS__CGSCC_OPM-NEXT: br label [[COND_END]] ; IS__CGSCC_OPM: cond.end: ; IS__CGSCC_OPM-NEXT: [[I1:%.*]] = bitcast i32* [[L]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR13]] ; IS__CGSCC_OPM-NEXT: ret i32 5 ; ; IS__CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn @@ -983,7 +984,7 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) { ; IS__CGSCC_NPM-NEXT: entry: ; IS__CGSCC_NPM-NEXT: [[L:%.*]] = alloca i32, align 4 ; IS__CGSCC_NPM-NEXT: [[I:%.*]] = bitcast i32* [[L]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR9]] +; IS__CGSCC_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR12]] ; IS__CGSCC_NPM-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0 ; IS__CGSCC_NPM-NEXT: br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]] ; IS__CGSCC_NPM: cond.true: @@ -992,7 +993,7 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) { ; IS__CGSCC_NPM-NEXT: br label [[COND_END]] ; IS__CGSCC_NPM: cond.end: ; IS__CGSCC_NPM-NEXT: [[I1:%.*]] = bitcast i32* [[L]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR9]] +; IS__CGSCC_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR12]] ; IS__CGSCC_NPM-NEXT: ret i32 5 ; entry: @@ -1090,9 +1091,9 @@ define void @static_global_simplifiable_1(%struct.S* noalias sret(%struct.S) ali ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@static_global_simplifiable_1 ; IS__CGSCC_OPM-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR4:[0-9]+]] { ; IS__CGSCC_OPM-NEXT: entry: -; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i32 0, i32 0), i32 noundef 1) #[[ATTR12]] -; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR12]] -; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(16) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR12]] +; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i32 0, i32 0), i32 noundef 1) #[[ATTR14]] +; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR14]] +; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(16) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR14]] ; IS__CGSCC_OPM-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3 ; IS__CGSCC_OPM-NEXT: store float 0x3FF19999A0000000, float* [[F1]], align 4, !tbaa [[TBAA7]] ; IS__CGSCC_OPM-NEXT: [[MUL:%.*]] = fmul float 0x40019999A0000000, 2.000000e+00 @@ -1115,9 +1116,9 @@ define void @static_global_simplifiable_1(%struct.S* noalias sret(%struct.S) ali ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@static_global_simplifiable_1 ; IS__CGSCC_NPM-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR4:[0-9]+]] { ; IS__CGSCC_NPM-NEXT: entry: -; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i32 0, i32 0), i32 noundef 1) #[[ATTR10]] -; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR10]] -; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(16) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR10]] +; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i32 0, i32 0), i32 noundef 1) #[[ATTR13]] +; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR13]] +; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(16) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR13]] ; IS__CGSCC_NPM-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3 ; IS__CGSCC_NPM-NEXT: store float 0x3FF19999A0000000, float* [[F1]], align 4, !tbaa [[TBAA7]] ; IS__CGSCC_NPM-NEXT: [[MUL:%.*]] = fmul float 0x40019999A0000000, 2.000000e+00 @@ -1377,7 +1378,7 @@ define void @static_global_simplifiable_2() { ; IS__CGSCC_OPM-NEXT: br label [[FOR_COND13]], !llvm.loop [[LOOP26:![0-9]+]] ; IS__CGSCC_OPM: for.end23: ; IS__CGSCC_OPM-NEXT: store i8 0, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 1023), align 1, !tbaa [[TBAA15]] -; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) bitcast (i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 500) to i32*), i32 noundef 0) #[[ATTR13]] +; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) bitcast (i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 500) to i32*), i32 noundef 0) #[[ATTR15]] ; IS__CGSCC_OPM-NEXT: br label [[FOR_COND25:%.*]] ; IS__CGSCC_OPM: for.cond25: ; IS__CGSCC_OPM-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC33:%.*]] ], [ 0, [[FOR_END23]] ] @@ -1450,7 +1451,7 @@ define void @static_global_simplifiable_2() { ; IS__CGSCC_NPM-NEXT: br label [[FOR_COND13]], !llvm.loop [[LOOP26:![0-9]+]] ; IS__CGSCC_NPM: for.end23: ; IS__CGSCC_NPM-NEXT: store i8 0, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 1023), align 1, !tbaa [[TBAA15]] -; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) bitcast (i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 500) to i32*), i32 noundef 0) #[[ATTR11]] +; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) bitcast (i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 500) to i32*), i32 noundef 0) #[[ATTR14]] ; IS__CGSCC_NPM-NEXT: br label [[FOR_COND25:%.*]] ; IS__CGSCC_NPM: for.cond25: ; IS__CGSCC_NPM-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC33:%.*]] ], [ 0, [[FOR_END23]] ] @@ -1724,11 +1725,11 @@ define void @noalias_arg_simplifiable_1(%struct.S* noalias sret(%struct.S) align ; IS__CGSCC_OPM-NEXT: [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5 ; IS__CGSCC_OPM-NEXT: store float 0x400A666660000000, float* [[F3]], align 4, !tbaa [[TBAA11]] ; IS__CGSCC_OPM-NEXT: [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0 -; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR12]] +; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR14]] ; IS__CGSCC_OPM-NEXT: [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1 -; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR12]] +; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR14]] ; IS__CGSCC_OPM-NEXT: [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2 -; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR12]] +; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR14]] ; IS__CGSCC_OPM-NEXT: [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3 ; IS__CGSCC_OPM-NEXT: [[I:%.*]] = load float, float* [[F11]], align 4, !tbaa [[TBAA7]] ; IS__CGSCC_OPM-NEXT: [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3 @@ -1774,11 +1775,11 @@ define void @noalias_arg_simplifiable_1(%struct.S* noalias sret(%struct.S) align ; IS__CGSCC_NPM-NEXT: [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5 ; IS__CGSCC_NPM-NEXT: store float 0x400A666660000000, float* [[F3]], align 4, !tbaa [[TBAA11]] ; IS__CGSCC_NPM-NEXT: [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0 -; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR10]] +; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR13]] ; IS__CGSCC_NPM-NEXT: [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1 -; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR10]] +; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR13]] ; IS__CGSCC_NPM-NEXT: [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2 -; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR10]] +; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR13]] ; IS__CGSCC_NPM-NEXT: [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3 ; IS__CGSCC_NPM-NEXT: [[I:%.*]] = load float, float* [[F11]], align 4, !tbaa [[TBAA7]] ; IS__CGSCC_NPM-NEXT: [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3 @@ -2095,7 +2096,7 @@ define void @noalias_arg_simplifiable_2(i8* %Bytes) { ; IS__CGSCC_OPM-NEXT: store i8 0, i8* [[ARRAYIDX24]], align 1, !tbaa [[TBAA15]] ; IS__CGSCC_OPM-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, i8* [[BYTES]], i64 500 ; IS__CGSCC_OPM-NEXT: [[I21:%.*]] = bitcast i8* [[ARRAYIDX25]] to i32* -; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[I21]], i32 noundef 0) #[[ATTR13]] +; IS__CGSCC_OPM-NEXT: call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[I21]], i32 noundef 0) #[[ATTR15]] ; IS__CGSCC_OPM-NEXT: br label [[FOR_COND27:%.*]] ; IS__CGSCC_OPM: for.cond27: ; IS__CGSCC_OPM-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC35:%.*]] ], [ 0, [[FOR_END23]] ] @@ -2174,7 +2175,7 @@ define void @noalias_arg_simplifiable_2(i8* %Bytes) { ; IS__CGSCC_NPM-NEXT: store i8 0, i8* [[ARRAYIDX24]], align 1, !tbaa [[TBAA15]] ; IS__CGSCC_NPM-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, i8* [[BYTES]], i64 500 ; IS__CGSCC_NPM-NEXT: [[I21:%.*]] = bitcast i8* [[ARRAYIDX25]] to i32* -; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[I21]], i32 noundef 0) #[[ATTR11]] +; IS__CGSCC_NPM-NEXT: call void @write_arg(i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[I21]], i32 noundef 0) #[[ATTR14]] ; IS__CGSCC_NPM-NEXT: br label [[FOR_COND27:%.*]] ; IS__CGSCC_NPM: for.cond27: ; IS__CGSCC_NPM-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC35:%.*]] ], [ 0, [[FOR_END23]] ] @@ -2307,9 +2308,9 @@ define i32 @local_alloca_not_simplifiable_1() { ; IS__TUNIT_OPM-NEXT: [[X:%.*]] = alloca i32, align 4 ; IS__TUNIT_OPM-NEXT: [[Y:%.*]] = alloca i32, align 4 ; IS__TUNIT_OPM-NEXT: [[I:%.*]] = bitcast i32* [[X]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR9]] +; IS__TUNIT_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR10]] ; IS__TUNIT_OPM-NEXT: [[I1:%.*]] = bitcast i32* [[Y]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR9]] +; IS__TUNIT_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR10]] ; IS__TUNIT_OPM-NEXT: store i32 1, i32* [[Y]], align 4, !tbaa [[TBAA3]] ; IS__TUNIT_OPM-NEXT: store i32 1, i32* [[X]], align 4, !tbaa [[TBAA3]] ; IS__TUNIT_OPM-NEXT: [[I2:%.*]] = bitcast i32* [[X]] to i8* @@ -2332,9 +2333,9 @@ define i32 @local_alloca_not_simplifiable_1() { ; IS__TUNIT_NPM-NEXT: [[X:%.*]] = alloca i32, align 4 ; IS__TUNIT_NPM-NEXT: [[Y:%.*]] = alloca i32, align 4 ; IS__TUNIT_NPM-NEXT: [[I:%.*]] = bitcast i32* [[X]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR7]] +; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR9]] ; IS__TUNIT_NPM-NEXT: [[I1:%.*]] = bitcast i32* [[Y]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR7]] +; IS__TUNIT_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR9]] ; IS__TUNIT_NPM-NEXT: store i32 1, i32* [[Y]], align 4, !tbaa [[TBAA3]] ; IS__TUNIT_NPM-NEXT: store i32 1, i32* [[X]], align 4, !tbaa [[TBAA3]] ; IS__TUNIT_NPM-NEXT: [[I2:%.*]] = bitcast i32* [[X]] to i8* @@ -2357,9 +2358,9 @@ define i32 @local_alloca_not_simplifiable_1() { ; IS__CGSCC_OPM-NEXT: [[X:%.*]] = alloca i32, align 4 ; IS__CGSCC_OPM-NEXT: [[Y:%.*]] = alloca i32, align 4 ; IS__CGSCC_OPM-NEXT: [[I:%.*]] = bitcast i32* [[X]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR13]] ; IS__CGSCC_OPM-NEXT: [[I1:%.*]] = bitcast i32* [[Y]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR13]] ; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[Y]], align 4, !tbaa [[TBAA3]] ; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[X]], align 4, !tbaa [[TBAA3]] ; IS__CGSCC_OPM-NEXT: [[I2:%.*]] = bitcast i32* [[X]] to i8* @@ -2382,9 +2383,9 @@ define i32 @local_alloca_not_simplifiable_1() { ; IS__CGSCC_NPM-NEXT: [[X:%.*]] = alloca i32, align 4 ; IS__CGSCC_NPM-NEXT: [[Y:%.*]] = alloca i32, align 4 ; IS__CGSCC_NPM-NEXT: [[I:%.*]] = bitcast i32* [[X]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR9]] +; IS__CGSCC_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR12]] ; IS__CGSCC_NPM-NEXT: [[I1:%.*]] = bitcast i32* [[Y]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR9]] +; IS__CGSCC_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR12]] ; IS__CGSCC_NPM-NEXT: store i32 1, i32* [[Y]], align 4, !tbaa [[TBAA3]] ; IS__CGSCC_NPM-NEXT: store i32 1, i32* [[X]], align 4, !tbaa [[TBAA3]] ; IS__CGSCC_NPM-NEXT: [[I2:%.*]] = bitcast i32* [[X]] to i8* @@ -3359,6 +3360,852 @@ for.end: ret void } +define dso_local i32 @round_trip_malloc(i32 %x) { +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@round_trip_malloc +; IS__TUNIT_OPM-SAME: (i32 [[X:%.*]]) { +; IS__TUNIT_OPM-NEXT: entry: +; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call noalias i8* @malloc(i64 noundef 4) #[[ATTR11:[0-9]+]] +; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; IS__TUNIT_OPM-NEXT: store i32 [[X]], i32* [[TMP0]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP0]] to i8* +; IS__TUNIT_OPM-NEXT: call void @free(i8* noundef [[TMP2]]) #[[ATTR11]] +; IS__TUNIT_OPM-NEXT: ret i32 [[TMP1]] +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@round_trip_malloc +; IS__TUNIT_NPM-SAME: (i32 [[X:%.*]]) { +; IS__TUNIT_NPM-NEXT: entry: +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* +; IS__TUNIT_NPM-NEXT: store i32 [[X]], i32* [[TMP1]], align 4 +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +; IS__TUNIT_NPM-NEXT: ret i32 [[TMP2]] +; +; IS__CGSCC_OPM: Function Attrs: norecurse +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@round_trip_malloc +; IS__CGSCC_OPM-SAME: (i32 [[X:%.*]]) #[[ATTR11:[0-9]+]] { +; IS__CGSCC_OPM-NEXT: entry: +; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call noalias i8* @malloc(i64 noundef 4) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; IS__CGSCC_OPM-NEXT: store i32 [[X]], i32* [[TMP0]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP0]] to i8* +; IS__CGSCC_OPM-NEXT: call void @free(i8* noundef [[TMP2]]) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: ret i32 [[TMP1]] +; +; IS__CGSCC_NPM: Function Attrs: norecurse +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@round_trip_malloc +; IS__CGSCC_NPM-SAME: (i32 returned [[X:%.*]]) #[[ATTR9:[0-9]+]] { +; IS__CGSCC_NPM-NEXT: entry: +; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* +; IS__CGSCC_NPM-NEXT: store i32 [[X]], i32* [[TMP1]], align 4 +; IS__CGSCC_NPM-NEXT: ret i32 [[X]] +; +entry: + %call = call noalias i8* @malloc(i64 4) norecurse + %0 = bitcast i8* %call to i32* + store i32 %x, i32* %0, align 4 + %1 = load i32, i32* %0, align 4 + %2 = bitcast i32* %0 to i8* + call void @free(i8* %2) norecurse + ret i32 %1 +} + +define dso_local i32 @round_trip_malloc_constant() { +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@round_trip_malloc_constant() { +; IS__TUNIT_OPM-NEXT: entry: +; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call noalias i8* @malloc(i64 noundef 4) #[[ATTR11]] +; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; IS__TUNIT_OPM-NEXT: store i32 7, i32* [[TMP0]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP0]] to i8* +; IS__TUNIT_OPM-NEXT: call void @free(i8* noundef [[TMP2]]) #[[ATTR11]] +; IS__TUNIT_OPM-NEXT: ret i32 [[TMP1]] +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@round_trip_malloc_constant() { +; IS__TUNIT_NPM-NEXT: entry: +; IS__TUNIT_NPM-NEXT: ret i32 7 +; +; IS__CGSCC_OPM: Function Attrs: norecurse +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@round_trip_malloc_constant +; IS__CGSCC_OPM-SAME: () #[[ATTR11]] { +; IS__CGSCC_OPM-NEXT: entry: +; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call noalias i8* @malloc(i64 noundef 4) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; IS__CGSCC_OPM-NEXT: store i32 7, i32* [[TMP0]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP0]] to i8* +; IS__CGSCC_OPM-NEXT: call void @free(i8* noundef [[TMP2]]) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: ret i32 [[TMP1]] +; +; IS__CGSCC_NPM: Function Attrs: norecurse +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@round_trip_malloc_constant +; IS__CGSCC_NPM-SAME: () #[[ATTR9]] { +; IS__CGSCC_NPM-NEXT: entry: +; IS__CGSCC_NPM-NEXT: ret i32 7 +; +entry: + %call = call noalias i8* @malloc(i64 4) norecurse + %0 = bitcast i8* %call to i32* + store i32 7, i32* %0, align 4 + %1 = load i32, i32* %0, align 4 + %2 = bitcast i32* %0 to i8* + call void @free(i8* %2) norecurse + ret i32 %1 +} + +declare noalias i8* @malloc(i64) + +declare void @free(i8*) + +define dso_local i32 @conditional_malloc(i32 %x) { +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@conditional_malloc +; IS__TUNIT_OPM-SAME: (i32 [[X:%.*]]) { +; IS__TUNIT_OPM-NEXT: entry: +; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call noalias i8* @malloc(i64 noundef 4) #[[ATTR11]] +; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; IS__TUNIT_OPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0 +; IS__TUNIT_OPM-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; IS__TUNIT_OPM: if.then: +; IS__TUNIT_OPM-NEXT: store i32 [[X]], i32* [[TMP0]], align 4 +; IS__TUNIT_OPM-NEXT: br label [[IF_END]] +; IS__TUNIT_OPM: if.end: +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; IS__TUNIT_OPM-NEXT: ret i32 [[TMP1]] +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@conditional_malloc +; IS__TUNIT_NPM-SAME: (i32 [[X:%.*]]) { +; IS__TUNIT_NPM-NEXT: entry: +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* +; IS__TUNIT_NPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0 +; IS__TUNIT_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; IS__TUNIT_NPM: if.then: +; IS__TUNIT_NPM-NEXT: store i32 [[X]], i32* [[TMP1]], align 4 +; IS__TUNIT_NPM-NEXT: br label [[IF_END]] +; IS__TUNIT_NPM: if.end: +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +; IS__TUNIT_NPM-NEXT: ret i32 [[TMP2]] +; +; IS__CGSCC_OPM: Function Attrs: norecurse +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@conditional_malloc +; IS__CGSCC_OPM-SAME: (i32 [[X:%.*]]) #[[ATTR11]] { +; IS__CGSCC_OPM-NEXT: entry: +; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call noalias i8* @malloc(i64 noundef 4) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; IS__CGSCC_OPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0 +; IS__CGSCC_OPM-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; IS__CGSCC_OPM: if.then: +; IS__CGSCC_OPM-NEXT: store i32 [[X]], i32* [[TMP0]], align 4 +; IS__CGSCC_OPM-NEXT: br label [[IF_END]] +; IS__CGSCC_OPM: if.end: +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; IS__CGSCC_OPM-NEXT: ret i32 [[TMP1]] +; +; IS__CGSCC_NPM: Function Attrs: norecurse +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@conditional_malloc +; IS__CGSCC_NPM-SAME: (i32 returned [[X:%.*]]) #[[ATTR9]] { +; IS__CGSCC_NPM-NEXT: entry: +; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* +; IS__CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0 +; IS__CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; IS__CGSCC_NPM: if.then: +; IS__CGSCC_NPM-NEXT: store i32 [[X]], i32* [[TMP1]], align 4 +; IS__CGSCC_NPM-NEXT: br label [[IF_END]] +; IS__CGSCC_NPM: if.end: +; IS__CGSCC_NPM-NEXT: ret i32 [[X]] +; +entry: + %call = call noalias i8* @malloc(i64 4) norecurse + %0 = bitcast i8* %call to i32* + %tobool = icmp ne i32 %x, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + store i32 %x, i32* %0, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +define dso_local i32 @round_trip_calloc(i32 %x) { +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@round_trip_calloc +; IS__TUNIT_OPM-SAME: (i32 [[X:%.*]]) { +; IS__TUNIT_OPM-NEXT: entry: +; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call noalias i8* @calloc(i64 noundef 4, i64 noundef 1) #[[ATTR11]] +; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; IS__TUNIT_OPM-NEXT: store i32 [[X]], i32* [[TMP0]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; IS__TUNIT_OPM-NEXT: ret i32 [[TMP1]] +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@round_trip_calloc +; IS__TUNIT_NPM-SAME: (i32 [[X:%.*]]) { +; IS__TUNIT_NPM-NEXT: entry: +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 +; IS__TUNIT_NPM-NEXT: [[CALLOC_BC:%.*]] = bitcast i8* [[TMP0]] to i8* +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALLOC_BC]], i8 0, i64 4, i1 false) +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* +; IS__TUNIT_NPM-NEXT: store i32 [[X]], i32* [[TMP1]], align 4 +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +; IS__TUNIT_NPM-NEXT: ret i32 [[TMP2]] +; +; IS__CGSCC_OPM: Function Attrs: norecurse +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@round_trip_calloc +; IS__CGSCC_OPM-SAME: (i32 [[X:%.*]]) #[[ATTR11]] { +; IS__CGSCC_OPM-NEXT: entry: +; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call noalias i8* @calloc(i64 noundef 4, i64 noundef 1) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; IS__CGSCC_OPM-NEXT: store i32 [[X]], i32* [[TMP0]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; IS__CGSCC_OPM-NEXT: ret i32 [[TMP1]] +; +; IS__CGSCC_NPM: Function Attrs: norecurse +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@round_trip_calloc +; IS__CGSCC_NPM-SAME: (i32 [[X:%.*]]) #[[ATTR9]] { +; IS__CGSCC_NPM-NEXT: entry: +; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 +; IS__CGSCC_NPM-NEXT: [[CALLOC_BC:%.*]] = bitcast i8* [[TMP0]] to i8* +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALLOC_BC]], i8 0, i64 4, i1 false) +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* +; IS__CGSCC_NPM-NEXT: store i32 [[X]], i32* [[TMP1]], align 4 +; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +; IS__CGSCC_NPM-NEXT: ret i32 [[TMP2]] +; +entry: + %call = call noalias i8* @calloc(i64 4, i64 1) norecurse + %0 = bitcast i8* %call to i32* + store i32 %x, i32* %0, align 4 + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +define dso_local i32 @round_trip_calloc_constant() { +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@round_trip_calloc_constant() { +; IS__TUNIT_OPM-NEXT: entry: +; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call noalias i8* @calloc(i64 noundef 4, i64 noundef 1) #[[ATTR11]] +; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; IS__TUNIT_OPM-NEXT: store i32 11, i32* [[TMP0]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; IS__TUNIT_OPM-NEXT: ret i32 [[TMP1]] +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@round_trip_calloc_constant() { +; IS__TUNIT_NPM-NEXT: entry: +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 +; IS__TUNIT_NPM-NEXT: [[CALLOC_BC:%.*]] = bitcast i8* [[TMP0]] to i8* +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALLOC_BC]], i8 0, i64 4, i1 false) +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* +; IS__TUNIT_NPM-NEXT: store i32 11, i32* [[TMP1]], align 4 +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +; IS__TUNIT_NPM-NEXT: ret i32 [[TMP2]] +; +; IS__CGSCC_OPM: Function Attrs: norecurse +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@round_trip_calloc_constant +; IS__CGSCC_OPM-SAME: () #[[ATTR11]] { +; IS__CGSCC_OPM-NEXT: entry: +; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call noalias i8* @calloc(i64 noundef 4, i64 noundef 1) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; IS__CGSCC_OPM-NEXT: store i32 11, i32* [[TMP0]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; IS__CGSCC_OPM-NEXT: ret i32 [[TMP1]] +; +; IS__CGSCC_NPM: Function Attrs: norecurse +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@round_trip_calloc_constant +; IS__CGSCC_NPM-SAME: () #[[ATTR9]] { +; IS__CGSCC_NPM-NEXT: entry: +; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 +; IS__CGSCC_NPM-NEXT: [[CALLOC_BC:%.*]] = bitcast i8* [[TMP0]] to i8* +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALLOC_BC]], i8 0, i64 4, i1 false) +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* +; IS__CGSCC_NPM-NEXT: store i32 11, i32* [[TMP1]], align 4 +; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +; IS__CGSCC_NPM-NEXT: ret i32 [[TMP2]] +; +entry: + %call = call noalias i8* @calloc(i64 4, i64 1) norecurse + %0 = bitcast i8* %call to i32* + store i32 11, i32* %0, align 4 + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +declare noalias i8* @calloc(i64, i64) + +define dso_local i32 @conditional_calloc(i32 %x) { +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@conditional_calloc +; IS__TUNIT_OPM-SAME: (i32 [[X:%.*]]) { +; IS__TUNIT_OPM-NEXT: entry: +; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call noalias i8* @calloc(i64 noundef 1, i64 noundef 4) #[[ATTR11]] +; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; IS__TUNIT_OPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0 +; IS__TUNIT_OPM-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__TUNIT_OPM: if.then: +; IS__TUNIT_OPM-NEXT: store i32 [[X]], i32* [[TMP0]], align 4 +; IS__TUNIT_OPM-NEXT: br label [[IF_END]] +; IS__TUNIT_OPM: if.end: +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP0]] to i8* +; IS__TUNIT_OPM-NEXT: call void @free(i8* [[TMP2]]) #[[ATTR11]] +; IS__TUNIT_OPM-NEXT: ret i32 [[TMP1]] +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@conditional_calloc +; IS__TUNIT_NPM-SAME: (i32 [[X:%.*]]) { +; IS__TUNIT_NPM-NEXT: entry: +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 +; IS__TUNIT_NPM-NEXT: [[CALLOC_BC:%.*]] = bitcast i8* [[TMP0]] to i8* +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALLOC_BC]], i8 0, i64 4, i1 false) +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* +; IS__TUNIT_NPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0 +; IS__TUNIT_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__TUNIT_NPM: if.then: +; IS__TUNIT_NPM-NEXT: store i32 [[X]], i32* [[TMP1]], align 4 +; IS__TUNIT_NPM-NEXT: br label [[IF_END]] +; IS__TUNIT_NPM: if.end: +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +; IS__TUNIT_NPM-NEXT: ret i32 [[TMP2]] +; +; IS__CGSCC_OPM: Function Attrs: norecurse +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@conditional_calloc +; IS__CGSCC_OPM-SAME: (i32 [[X:%.*]]) #[[ATTR11]] { +; IS__CGSCC_OPM-NEXT: entry: +; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call noalias i8* @calloc(i64 noundef 1, i64 noundef 4) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; IS__CGSCC_OPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0 +; IS__CGSCC_OPM-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__CGSCC_OPM: if.then: +; IS__CGSCC_OPM-NEXT: store i32 [[X]], i32* [[TMP0]], align 4 +; IS__CGSCC_OPM-NEXT: br label [[IF_END]] +; IS__CGSCC_OPM: if.end: +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP0]] to i8* +; IS__CGSCC_OPM-NEXT: call void @free(i8* [[TMP2]]) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: ret i32 [[TMP1]] +; +; IS__CGSCC_NPM: Function Attrs: norecurse +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@conditional_calloc +; IS__CGSCC_NPM-SAME: (i32 [[X:%.*]]) #[[ATTR9]] { +; IS__CGSCC_NPM-NEXT: entry: +; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 +; IS__CGSCC_NPM-NEXT: [[CALLOC_BC:%.*]] = bitcast i8* [[TMP0]] to i8* +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALLOC_BC]], i8 0, i64 4, i1 false) +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* +; IS__CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0 +; IS__CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__CGSCC_NPM: if.then: +; IS__CGSCC_NPM-NEXT: store i32 [[X]], i32* [[TMP1]], align 4 +; IS__CGSCC_NPM-NEXT: br label [[IF_END]] +; IS__CGSCC_NPM: if.end: +; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +; IS__CGSCC_NPM-NEXT: ret i32 [[TMP2]] +; +entry: + %call = call noalias i8* @calloc(i64 1, i64 4) norecurse + %0 = bitcast i8* %call to i32* + %tobool = icmp ne i32 %x, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + store i32 %x, i32* %0, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %1 = load i32, i32* %0, align 4 + %2 = bitcast i32* %0 to i8* + call void @free(i8* %2) norecurse + ret i32 %1 +} + +define dso_local i32 @conditional_calloc_zero(i1 %c) { +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@conditional_calloc_zero +; IS__TUNIT_OPM-SAME: (i1 [[C:%.*]]) { +; IS__TUNIT_OPM-NEXT: entry: +; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call noalias i8* @calloc(i64 noundef 1, i64 noundef 4) #[[ATTR11]] +; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; IS__TUNIT_OPM-NEXT: br i1 [[C]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__TUNIT_OPM: if.then: +; IS__TUNIT_OPM-NEXT: store i32 0, i32* [[TMP0]], align 4 +; IS__TUNIT_OPM-NEXT: br label [[IF_END]] +; IS__TUNIT_OPM: if.end: +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP0]] to i8* +; IS__TUNIT_OPM-NEXT: call void @free(i8* [[TMP2]]) #[[ATTR11]] +; IS__TUNIT_OPM-NEXT: ret i32 [[TMP1]] +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@conditional_calloc_zero +; IS__TUNIT_NPM-SAME: (i1 [[C:%.*]]) { +; IS__TUNIT_NPM-NEXT: entry: +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 +; IS__TUNIT_NPM-NEXT: [[CALLOC_BC:%.*]] = bitcast i8* [[TMP0]] to i8* +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALLOC_BC]], i8 0, i64 4, i1 false) +; IS__TUNIT_NPM-NEXT: br i1 [[C]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__TUNIT_NPM: if.then: +; IS__TUNIT_NPM-NEXT: br label [[IF_END]] +; IS__TUNIT_NPM: if.end: +; IS__TUNIT_NPM-NEXT: ret i32 0 +; +; IS__CGSCC_OPM: Function Attrs: norecurse +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@conditional_calloc_zero +; IS__CGSCC_OPM-SAME: (i1 [[C:%.*]]) #[[ATTR11]] { +; IS__CGSCC_OPM-NEXT: entry: +; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call noalias i8* @calloc(i64 noundef 1, i64 noundef 4) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; IS__CGSCC_OPM-NEXT: br i1 [[C]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__CGSCC_OPM: if.then: +; IS__CGSCC_OPM-NEXT: store i32 0, i32* [[TMP0]], align 4 +; IS__CGSCC_OPM-NEXT: br label [[IF_END]] +; IS__CGSCC_OPM: if.end: +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP0]] to i8* +; IS__CGSCC_OPM-NEXT: call void @free(i8* [[TMP2]]) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: ret i32 [[TMP1]] +; +; IS__CGSCC_NPM: Function Attrs: norecurse +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@conditional_calloc_zero +; IS__CGSCC_NPM-SAME: (i1 [[C:%.*]]) #[[ATTR9]] { +; IS__CGSCC_NPM-NEXT: entry: +; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 +; IS__CGSCC_NPM-NEXT: [[CALLOC_BC:%.*]] = bitcast i8* [[TMP0]] to i8* +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALLOC_BC]], i8 0, i64 4, i1 false) +; IS__CGSCC_NPM-NEXT: br i1 [[C]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__CGSCC_NPM: if.then: +; IS__CGSCC_NPM-NEXT: br label [[IF_END]] +; IS__CGSCC_NPM: if.end: +; IS__CGSCC_NPM-NEXT: ret i32 0 +; +entry: + %call = call noalias i8* @calloc(i64 1, i64 4) norecurse + %0 = bitcast i8* %call to i32* + br i1 %c, label %if.end, label %if.then + +if.then: ; preds = %entry + store i32 0, i32* %0, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %1 = load i32, i32* %0, align 4 + %2 = bitcast i32* %0 to i8* + call void @free(i8* %2) norecurse + ret i32 %1 +} + +define dso_local i32* @malloc_like(i32 %s) { +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@malloc_like +; IS__TUNIT_OPM-SAME: (i32 [[S:%.*]]) { +; IS__TUNIT_OPM-NEXT: entry: +; IS__TUNIT_OPM-NEXT: [[CONV:%.*]] = sext i32 [[S]] to i64 +; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call noalias i8* @malloc(i64 [[CONV]]) #[[ATTR11]] +; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; IS__TUNIT_OPM-NEXT: ret i32* [[TMP0]] +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@malloc_like +; IS__TUNIT_NPM-SAME: (i32 [[S:%.*]]) { +; IS__TUNIT_NPM-NEXT: entry: +; IS__TUNIT_NPM-NEXT: [[CONV:%.*]] = sext i32 [[S]] to i64 +; IS__TUNIT_NPM-NEXT: [[CALL:%.*]] = call noalias i8* @malloc(i64 [[CONV]]) #[[ATTR10:[0-9]+]] +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; IS__TUNIT_NPM-NEXT: ret i32* [[TMP0]] +; +; IS__CGSCC_OPM: Function Attrs: norecurse +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@malloc_like +; IS__CGSCC_OPM-SAME: (i32 [[S:%.*]]) #[[ATTR11]] { +; IS__CGSCC_OPM-NEXT: entry: +; IS__CGSCC_OPM-NEXT: [[CONV:%.*]] = sext i32 [[S]] to i64 +; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call noalias i8* @malloc(i64 [[CONV]]) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; IS__CGSCC_OPM-NEXT: ret i32* [[TMP0]] +; +; IS__CGSCC_NPM: Function Attrs: norecurse +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@malloc_like +; IS__CGSCC_NPM-SAME: (i32 [[S:%.*]]) #[[ATTR9]] { +; IS__CGSCC_NPM-NEXT: entry: +; IS__CGSCC_NPM-NEXT: [[CONV:%.*]] = sext i32 [[S]] to i64 +; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call noalias i8* @malloc(i64 [[CONV]]) #[[ATTR9]] +; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; IS__CGSCC_NPM-NEXT: ret i32* [[TMP0]] +; +entry: + %conv = sext i32 %s to i64 + %call = call noalias i8* @malloc(i64 %conv) norecurse + %0 = bitcast i8* %call to i32* + ret i32* %0 +} + +define dso_local i32 @round_trip_malloc_like(i32 %x) { +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@round_trip_malloc_like +; IS__TUNIT_OPM-SAME: (i32 [[X:%.*]]) { +; IS__TUNIT_OPM-NEXT: entry: +; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call i32* @malloc_like(i32 noundef 4) #[[ATTR11]] +; IS__TUNIT_OPM-NEXT: store i32 [[X]], i32* [[CALL]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8* +; IS__TUNIT_OPM-NEXT: call void @free(i8* noundef [[TMP1]]) #[[ATTR11]] +; IS__TUNIT_OPM-NEXT: ret i32 [[TMP0]] +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@round_trip_malloc_like +; IS__TUNIT_NPM-SAME: (i32 [[X:%.*]]) { +; IS__TUNIT_NPM-NEXT: entry: +; IS__TUNIT_NPM-NEXT: [[CALL:%.*]] = call i32* @malloc_like(i32 noundef 4) #[[ATTR10]] +; IS__TUNIT_NPM-NEXT: store i32 [[X]], i32* [[CALL]], align 4 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8* +; IS__TUNIT_NPM-NEXT: call void @free(i8* noundef [[TMP1]]) #[[ATTR10]] +; IS__TUNIT_NPM-NEXT: ret i32 [[TMP0]] +; +; IS__CGSCC_OPM: Function Attrs: norecurse +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@round_trip_malloc_like +; IS__CGSCC_OPM-SAME: (i32 [[X:%.*]]) #[[ATTR11]] { +; IS__CGSCC_OPM-NEXT: entry: +; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call i32* @malloc_like(i32 noundef 4) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: store i32 [[X]], i32* [[CALL]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8* +; IS__CGSCC_OPM-NEXT: call void @free(i8* noundef [[TMP1]]) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: ret i32 [[TMP0]] +; +; IS__CGSCC_NPM: Function Attrs: norecurse +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@round_trip_malloc_like +; IS__CGSCC_NPM-SAME: (i32 [[X:%.*]]) #[[ATTR9]] { +; IS__CGSCC_NPM-NEXT: entry: +; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32* @malloc_like(i32 noundef 4) #[[ATTR9]] +; IS__CGSCC_NPM-NEXT: store i32 [[X]], i32* [[CALL]], align 4 +; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4 +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8* +; IS__CGSCC_NPM-NEXT: call void @free(i8* noundef [[TMP1]]) #[[ATTR9]] +; IS__CGSCC_NPM-NEXT: ret i32 [[TMP0]] +; +entry: + %call = call i32* @malloc_like(i32 4) norecurse + store i32 %x, i32* %call, align 4 + %0 = load i32, i32* %call, align 4 + %1 = bitcast i32* %call to i8* + call void @free(i8* %1) norecurse + ret i32 %0 +} + +define dso_local i32 @round_trip_unknown_alloc(i32 %x) { +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@round_trip_unknown_alloc +; IS__TUNIT_OPM-SAME: (i32 [[X:%.*]]) { +; IS__TUNIT_OPM-NEXT: entry: +; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call i32* @unknown_alloc(i32 noundef 4) #[[ATTR11]] +; IS__TUNIT_OPM-NEXT: store i32 [[X]], i32* [[CALL]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8* +; IS__TUNIT_OPM-NEXT: call void @free(i8* noundef [[TMP1]]) #[[ATTR11]] +; IS__TUNIT_OPM-NEXT: ret i32 [[TMP0]] +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@round_trip_unknown_alloc +; IS__TUNIT_NPM-SAME: (i32 [[X:%.*]]) { +; IS__TUNIT_NPM-NEXT: entry: +; IS__TUNIT_NPM-NEXT: [[CALL:%.*]] = call i32* @unknown_alloc(i32 noundef 4) #[[ATTR10]] +; IS__TUNIT_NPM-NEXT: store i32 [[X]], i32* [[CALL]], align 4 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8* +; IS__TUNIT_NPM-NEXT: call void @free(i8* noundef [[TMP1]]) #[[ATTR10]] +; IS__TUNIT_NPM-NEXT: ret i32 [[TMP0]] +; +; IS__CGSCC_OPM: Function Attrs: norecurse +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@round_trip_unknown_alloc +; IS__CGSCC_OPM-SAME: (i32 [[X:%.*]]) #[[ATTR11]] { +; IS__CGSCC_OPM-NEXT: entry: +; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call i32* @unknown_alloc(i32 noundef 4) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: store i32 [[X]], i32* [[CALL]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8* +; IS__CGSCC_OPM-NEXT: call void @free(i8* noundef [[TMP1]]) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: ret i32 [[TMP0]] +; +; IS__CGSCC_NPM: Function Attrs: norecurse +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@round_trip_unknown_alloc +; IS__CGSCC_NPM-SAME: (i32 [[X:%.*]]) #[[ATTR9]] { +; IS__CGSCC_NPM-NEXT: entry: +; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32* @unknown_alloc(i32 noundef 4) #[[ATTR9]] +; IS__CGSCC_NPM-NEXT: store i32 [[X]], i32* [[CALL]], align 4 +; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4 +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8* +; IS__CGSCC_NPM-NEXT: call void @free(i8* noundef [[TMP1]]) #[[ATTR9]] +; IS__CGSCC_NPM-NEXT: ret i32 [[TMP0]] +; +entry: + %call = call i32* @unknown_alloc(i32 4) norecurse + store i32 %x, i32* %call, align 4 + %0 = load i32, i32* %call, align 4 + %1 = bitcast i32* %call to i8* + call void @free(i8* %1) norecurse + ret i32 %0 +} + +declare noalias i32* @unknown_alloc(i32) + +define dso_local i32 @conditional_unknown_alloc(i32 %x) { +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@conditional_unknown_alloc +; IS__TUNIT_OPM-SAME: (i32 [[X:%.*]]) { +; IS__TUNIT_OPM-NEXT: entry: +; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call noalias i32* @unknown_alloc(i32 noundef 4) #[[ATTR11]] +; IS__TUNIT_OPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0 +; IS__TUNIT_OPM-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__TUNIT_OPM: if.then: +; IS__TUNIT_OPM-NEXT: store i32 [[X]], i32* [[CALL]], align 4 +; IS__TUNIT_OPM-NEXT: br label [[IF_END]] +; IS__TUNIT_OPM: if.end: +; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8* +; IS__TUNIT_OPM-NEXT: call void @free(i8* [[TMP1]]) #[[ATTR11]] +; IS__TUNIT_OPM-NEXT: ret i32 [[TMP0]] +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@conditional_unknown_alloc +; IS__TUNIT_NPM-SAME: (i32 [[X:%.*]]) { +; IS__TUNIT_NPM-NEXT: entry: +; IS__TUNIT_NPM-NEXT: [[CALL:%.*]] = call noalias i32* @unknown_alloc(i32 noundef 4) #[[ATTR10]] +; IS__TUNIT_NPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0 +; IS__TUNIT_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__TUNIT_NPM: if.then: +; IS__TUNIT_NPM-NEXT: store i32 [[X]], i32* [[CALL]], align 4 +; IS__TUNIT_NPM-NEXT: br label [[IF_END]] +; IS__TUNIT_NPM: if.end: +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8* +; IS__TUNIT_NPM-NEXT: call void @free(i8* [[TMP1]]) #[[ATTR10]] +; IS__TUNIT_NPM-NEXT: ret i32 [[TMP0]] +; +; IS__CGSCC_OPM: Function Attrs: norecurse +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@conditional_unknown_alloc +; IS__CGSCC_OPM-SAME: (i32 [[X:%.*]]) #[[ATTR11]] { +; IS__CGSCC_OPM-NEXT: entry: +; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call noalias i32* @unknown_alloc(i32 noundef 4) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0 +; IS__CGSCC_OPM-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__CGSCC_OPM: if.then: +; IS__CGSCC_OPM-NEXT: store i32 [[X]], i32* [[CALL]], align 4 +; IS__CGSCC_OPM-NEXT: br label [[IF_END]] +; IS__CGSCC_OPM: if.end: +; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8* +; IS__CGSCC_OPM-NEXT: call void @free(i8* [[TMP1]]) #[[ATTR11]] +; IS__CGSCC_OPM-NEXT: ret i32 [[TMP0]] +; +; IS__CGSCC_NPM: Function Attrs: norecurse +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@conditional_unknown_alloc +; IS__CGSCC_NPM-SAME: (i32 [[X:%.*]]) #[[ATTR9]] { +; IS__CGSCC_NPM-NEXT: entry: +; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call noalias i32* @unknown_alloc(i32 noundef 4) #[[ATTR9]] +; IS__CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0 +; IS__CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__CGSCC_NPM: if.then: +; IS__CGSCC_NPM-NEXT: store i32 [[X]], i32* [[CALL]], align 4 +; IS__CGSCC_NPM-NEXT: br label [[IF_END]] +; IS__CGSCC_NPM: if.end: +; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4 +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = bitcast i32* [[CALL]] to i8* +; IS__CGSCC_NPM-NEXT: call void @free(i8* [[TMP1]]) #[[ATTR9]] +; IS__CGSCC_NPM-NEXT: ret i32 [[TMP0]] +; +entry: + %call = call noalias i32* @unknown_alloc(i32 4) norecurse + %tobool = icmp ne i32 %x, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + store i32 %x, i32* %call, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = load i32, i32* %call, align 4 + %1 = bitcast i32* %call to i8* + call void @free(i8* %1) norecurse + ret i32 %0 +} + +%struct.STy = type { float*, double*, %struct.STy* } + +@global = internal global %struct.STy zeroinitializer, align 8 + +; We mark %dst as writeonly and %src as readonly, that is (for now) all we can expect. +define dso_local void @test_nested_memory(float* %dst, double* %src) { +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test_nested_memory +; IS__TUNIT_OPM-SAME: (float* nocapture nofree writeonly [[DST:%.*]], double* nocapture nofree readonly [[SRC:%.*]]) { +; IS__TUNIT_OPM-NEXT: entry: +; IS__TUNIT_OPM-NEXT: [[LOCAL:%.*]] = alloca [[STRUCT_STY:%.*]], align 8 +; IS__TUNIT_OPM-NEXT: [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[LOCAL]], i64 0, i32 2 +; IS__TUNIT_OPM-NEXT: store %struct.STy* @global, %struct.STy** [[INNER]], align 8 +; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call noalias dereferenceable_or_null(24) i8* @malloc(i64 noundef 24) +; IS__TUNIT_OPM-NEXT: [[DST1:%.*]] = bitcast i8* [[CALL]] to float** +; IS__TUNIT_OPM-NEXT: store float* [[DST]], float** [[DST1]], align 8 +; IS__TUNIT_OPM-NEXT: [[SRC2:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 8 +; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[SRC2]] to double** +; IS__TUNIT_OPM-NEXT: store double* [[SRC]], double** [[TMP0]], align 8 +; IS__TUNIT_OPM-NEXT: store i8* [[CALL]], i8** bitcast (%struct.STy** getelementptr inbounds ([[STRUCT_STY]], %struct.STy* @global, i64 0, i32 2) to i8**), align 8 +; IS__TUNIT_OPM-NEXT: call fastcc void @nested_memory_callee(%struct.STy* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(24) [[LOCAL]]) #[[ATTR12:[0-9]+]] +; IS__TUNIT_OPM-NEXT: ret void +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test_nested_memory +; IS__TUNIT_NPM-SAME: (float* nocapture nofree writeonly [[DST:%.*]], double* nocapture nofree readonly [[SRC:%.*]]) { +; IS__TUNIT_NPM-NEXT: entry: +; IS__TUNIT_NPM-NEXT: [[LOCAL:%.*]] = alloca [[STRUCT_STY:%.*]], align 8 +; IS__TUNIT_NPM-NEXT: [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[LOCAL]], i64 0, i32 2 +; IS__TUNIT_NPM-NEXT: store %struct.STy* @global, %struct.STy** [[INNER]], align 8 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 24, align 1 +; IS__TUNIT_NPM-NEXT: [[DST1:%.*]] = bitcast i8* [[TMP0]] to float** +; IS__TUNIT_NPM-NEXT: store float* [[DST]], float** [[DST1]], align 8 +; IS__TUNIT_NPM-NEXT: [[SRC2:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 8 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = bitcast i8* [[SRC2]] to double** +; IS__TUNIT_NPM-NEXT: store double* [[SRC]], double** [[TMP1]], align 8 +; IS__TUNIT_NPM-NEXT: store i8* [[TMP0]], i8** bitcast (%struct.STy** getelementptr inbounds ([[STRUCT_STY]], %struct.STy* @global, i64 0, i32 2) to i8**), align 8 +; IS__TUNIT_NPM-NEXT: [[LOCAL_CAST:%.*]] = bitcast %struct.STy* [[LOCAL]] to float** +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load float*, float** [[LOCAL_CAST]], align 8 +; IS__TUNIT_NPM-NEXT: [[LOCAL_0_1:%.*]] = getelementptr [[STRUCT_STY]], %struct.STy* [[LOCAL]], i64 0, i32 1 +; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load double*, double** [[LOCAL_0_1]], align 8 +; IS__TUNIT_NPM-NEXT: [[LOCAL_0_2:%.*]] = getelementptr [[STRUCT_STY]], %struct.STy* [[LOCAL]], i64 0, i32 2 +; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load %struct.STy*, %struct.STy** [[LOCAL_0_2]], align 8 +; IS__TUNIT_NPM-NEXT: call fastcc void @nested_memory_callee(float* [[TMP2]], double* [[TMP3]], %struct.STy* [[TMP4]]) #[[ATTR11:[0-9]+]] +; IS__TUNIT_NPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test_nested_memory +; IS__CGSCC_OPM-SAME: (float* nocapture nofree writeonly [[DST:%.*]], double* nocapture nofree readonly [[SRC:%.*]]) { +; IS__CGSCC_OPM-NEXT: entry: +; IS__CGSCC_OPM-NEXT: [[LOCAL:%.*]] = alloca [[STRUCT_STY:%.*]], align 8 +; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast %struct.STy* [[LOCAL]] to i8* +; IS__CGSCC_OPM-NEXT: [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[LOCAL]], i64 0, i32 2 +; IS__CGSCC_OPM-NEXT: store %struct.STy* @global, %struct.STy** [[INNER]], align 8 +; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call noalias dereferenceable_or_null(24) i8* @malloc(i64 noundef 24) +; IS__CGSCC_OPM-NEXT: [[DST1:%.*]] = bitcast i8* [[CALL]] to float** +; IS__CGSCC_OPM-NEXT: store float* [[DST]], float** [[DST1]], align 8 +; IS__CGSCC_OPM-NEXT: [[SRC2:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 8 +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = bitcast i8* [[SRC2]] to double** +; IS__CGSCC_OPM-NEXT: store double* [[SRC]], double** [[TMP1]], align 8 +; IS__CGSCC_OPM-NEXT: store i8* [[CALL]], i8** bitcast (%struct.STy** getelementptr inbounds ([[STRUCT_STY]], %struct.STy* @global, i64 0, i32 2) to i8**), align 8 +; IS__CGSCC_OPM-NEXT: call fastcc void @nested_memory_callee(%struct.STy* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(24) [[LOCAL]]) #[[ATTR16:[0-9]+]] +; IS__CGSCC_OPM-NEXT: ret void +; +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test_nested_memory +; IS__CGSCC_NPM-SAME: (float* nocapture nofree writeonly [[DST:%.*]], double* nocapture nofree readonly [[SRC:%.*]]) { +; IS__CGSCC_NPM-NEXT: entry: +; IS__CGSCC_NPM-NEXT: [[LOCAL:%.*]] = alloca [[STRUCT_STY:%.*]], align 8 +; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast %struct.STy* [[LOCAL]] to i8* +; IS__CGSCC_NPM-NEXT: [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[LOCAL]], i64 0, i32 2 +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 24, align 1 +; IS__CGSCC_NPM-NEXT: [[DST1:%.*]] = bitcast i8* [[TMP1]] to float** +; IS__CGSCC_NPM-NEXT: store float* [[DST]], float** [[DST1]], align 8 +; IS__CGSCC_NPM-NEXT: [[SRC2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 8 +; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[SRC2]] to double** +; IS__CGSCC_NPM-NEXT: store double* [[SRC]], double** [[TMP2]], align 8 +; IS__CGSCC_NPM-NEXT: store i8* [[TMP1]], i8** bitcast (%struct.STy** getelementptr inbounds ([[STRUCT_STY]], %struct.STy* @global, i64 0, i32 2) to i8**), align 8 +; IS__CGSCC_NPM-NEXT: call fastcc void @nested_memory_callee(float* noalias nocapture nofree nonnull readnone undef, double* noalias nocapture nofree nonnull readnone undef, %struct.STy* noalias nocapture nofree nonnull readnone align 8 dereferenceable(24) undef) #[[ATTR15:[0-9]+]] +; IS__CGSCC_NPM-NEXT: ret void +; +entry: + %local = alloca %struct.STy, align 8 + %0 = bitcast %struct.STy* %local to i8* + %inner = getelementptr inbounds %struct.STy, %struct.STy* %local, i64 0, i32 2 + store %struct.STy* @global, %struct.STy** %inner, align 8 + %call = call noalias dereferenceable_or_null(24) i8* @malloc(i64 24) #4 + %dst1 = bitcast i8* %call to float** + store float* %dst, float** %dst1, align 8 + %src2 = getelementptr inbounds i8, i8* %call, i64 8 + %1 = bitcast i8* %src2 to double** + store double* %src, double** %1, align 8 + store i8* %call, i8** bitcast (%struct.STy** getelementptr inbounds (%struct.STy, %struct.STy* @global, i64 0, i32 2) to i8**), align 8 + call fastcc void @nested_memory_callee(%struct.STy* nonnull %local) + ret void +} + +define internal fastcc void @nested_memory_callee(%struct.STy* nocapture readonly %S) nofree norecurse nounwind uwtable { +; IS__TUNIT_OPM: Function Attrs: nofree norecurse nosync nounwind uwtable willreturn +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@nested_memory_callee +; IS__TUNIT_OPM-SAME: (%struct.STy* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(24) [[S:%.*]]) #[[ATTR9:[0-9]+]] { +; IS__TUNIT_OPM-NEXT: entry: +; IS__TUNIT_OPM-NEXT: [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY:%.*]], %struct.STy* [[S]], i64 0, i32 2 +; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = load %struct.STy*, %struct.STy** [[INNER]], align 8 +; IS__TUNIT_OPM-NEXT: [[INNER1:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[TMP0]], i64 0, i32 2 +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load %struct.STy*, %struct.STy** [[INNER1]], align 8 +; IS__TUNIT_OPM-NEXT: [[SRC:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[TMP1]], i64 0, i32 1 +; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = load double*, double** [[SRC]], align 8 +; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = load double, double* [[TMP2]], align 8 +; IS__TUNIT_OPM-NEXT: [[CONV:%.*]] = fptrunc double [[TMP3]] to float +; IS__TUNIT_OPM-NEXT: [[DST:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[TMP1]], i64 0, i32 0 +; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load float*, float** [[DST]], align 8 +; IS__TUNIT_OPM-NEXT: store float [[CONV]], float* [[TMP4]], align 4 +; IS__TUNIT_OPM-NEXT: ret void +; +; IS__TUNIT_NPM: Function Attrs: nofree norecurse nosync nounwind uwtable willreturn +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@nested_memory_callee +; IS__TUNIT_NPM-SAME: (float* [[TMP0:%.*]], double* [[TMP1:%.*]], %struct.STy* [[TMP2:%.*]]) #[[ATTR7:[0-9]+]] { +; IS__TUNIT_NPM-NEXT: entry: +; IS__TUNIT_NPM-NEXT: [[S_PRIV:%.*]] = alloca [[STRUCT_STY:%.*]], align 8 +; IS__TUNIT_NPM-NEXT: [[S_PRIV_CAST:%.*]] = bitcast %struct.STy* [[S_PRIV]] to float** +; IS__TUNIT_NPM-NEXT: store float* [[TMP0]], float** [[S_PRIV_CAST]], align 8 +; IS__TUNIT_NPM-NEXT: [[S_PRIV_0_1:%.*]] = getelementptr [[STRUCT_STY]], %struct.STy* [[S_PRIV]], i64 0, i32 1 +; IS__TUNIT_NPM-NEXT: store double* [[TMP1]], double** [[S_PRIV_0_1]], align 8 +; IS__TUNIT_NPM-NEXT: [[S_PRIV_0_2:%.*]] = getelementptr [[STRUCT_STY]], %struct.STy* [[S_PRIV]], i64 0, i32 2 +; IS__TUNIT_NPM-NEXT: store %struct.STy* [[TMP2]], %struct.STy** [[S_PRIV_0_2]], align 8 +; IS__TUNIT_NPM-NEXT: [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[S_PRIV]], i64 0, i32 2 +; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load %struct.STy*, %struct.STy** [[INNER]], align 8 +; IS__TUNIT_NPM-NEXT: [[INNER1:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[TMP3]], i64 0, i32 2 +; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load %struct.STy*, %struct.STy** [[INNER1]], align 8 +; IS__TUNIT_NPM-NEXT: [[SRC:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[TMP4]], i64 0, i32 1 +; IS__TUNIT_NPM-NEXT: [[TMP5:%.*]] = load double*, double** [[SRC]], align 8 +; IS__TUNIT_NPM-NEXT: [[TMP6:%.*]] = load double, double* [[TMP5]], align 8 +; IS__TUNIT_NPM-NEXT: [[CONV:%.*]] = fptrunc double [[TMP6]] to float +; IS__TUNIT_NPM-NEXT: [[DST:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[TMP4]], i64 0, i32 0 +; IS__TUNIT_NPM-NEXT: [[TMP7:%.*]] = load float*, float** [[DST]], align 8 +; IS__TUNIT_NPM-NEXT: store float [[CONV]], float* [[TMP7]], align 4 +; IS__TUNIT_NPM-NEXT: ret void +; +; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind uwtable willreturn +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@nested_memory_callee +; IS__CGSCC_OPM-SAME: (%struct.STy* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(24) [[S:%.*]]) #[[ATTR12:[0-9]+]] { +; IS__CGSCC_OPM-NEXT: entry: +; IS__CGSCC_OPM-NEXT: [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY:%.*]], %struct.STy* [[S]], i64 0, i32 2 +; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = load %struct.STy*, %struct.STy** [[INNER]], align 8 +; IS__CGSCC_OPM-NEXT: [[INNER1:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[TMP0]], i64 0, i32 2 +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load %struct.STy*, %struct.STy** [[INNER1]], align 8 +; IS__CGSCC_OPM-NEXT: [[SRC:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[TMP1]], i64 0, i32 1 +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = load double*, double** [[SRC]], align 8 +; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = load double, double* [[TMP2]], align 8 +; IS__CGSCC_OPM-NEXT: [[CONV:%.*]] = fptrunc double [[TMP3]] to float +; IS__CGSCC_OPM-NEXT: [[DST:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[TMP1]], i64 0, i32 0 +; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load float*, float** [[DST]], align 8 +; IS__CGSCC_OPM-NEXT: store float [[CONV]], float* [[TMP4]], align 4 +; IS__CGSCC_OPM-NEXT: ret void +; +; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind uwtable willreturn +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@nested_memory_callee +; IS__CGSCC_NPM-SAME: (float* noalias nocapture nofree nonnull readnone [[TMP0:%.*]], double* noalias nocapture nofree nonnull readnone [[TMP1:%.*]], %struct.STy* noalias nocapture nofree nonnull readnone align 8 dereferenceable(24) [[TMP2:%.*]]) #[[ATTR10:[0-9]+]] { +; IS__CGSCC_NPM-NEXT: entry: +; IS__CGSCC_NPM-NEXT: [[S_PRIV:%.*]] = alloca [[STRUCT_STY:%.*]], align 8 +; IS__CGSCC_NPM-NEXT: [[S_PRIV_CAST:%.*]] = bitcast %struct.STy* [[S_PRIV]] to float** +; IS__CGSCC_NPM-NEXT: [[S_PRIV_0_1:%.*]] = getelementptr [[STRUCT_STY]], %struct.STy* [[S_PRIV]], i64 0, i32 1 +; IS__CGSCC_NPM-NEXT: [[S_PRIV_0_2:%.*]] = getelementptr [[STRUCT_STY]], %struct.STy* [[S_PRIV]], i64 0, i32 2 +; IS__CGSCC_NPM-NEXT: [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[S_PRIV]], i64 0, i32 2 +; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = load %struct.STy*, %struct.STy** [[INNER]], align 8 +; IS__CGSCC_NPM-NEXT: [[INNER1:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* @global, i64 0, i32 2 +; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load %struct.STy*, %struct.STy** getelementptr inbounds ([[STRUCT_STY]], %struct.STy* @global, i64 0, i32 2), align 8 +; IS__CGSCC_NPM-NEXT: [[SRC:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[TMP4]], i64 0, i32 1 +; IS__CGSCC_NPM-NEXT: [[TMP5:%.*]] = load double*, double** [[SRC]], align 8 +; IS__CGSCC_NPM-NEXT: [[TMP6:%.*]] = load double, double* [[TMP5]], align 8 +; IS__CGSCC_NPM-NEXT: [[CONV:%.*]] = fptrunc double [[TMP6]] to float +; IS__CGSCC_NPM-NEXT: [[DST:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[TMP4]], i64 0, i32 0 +; IS__CGSCC_NPM-NEXT: [[TMP7:%.*]] = load float*, float** [[DST]], align 8 +; IS__CGSCC_NPM-NEXT: store float [[CONV]], float* [[TMP7]], align 4 +; IS__CGSCC_NPM-NEXT: ret void +; +entry: + %inner = getelementptr inbounds %struct.STy, %struct.STy* %S, i64 0, i32 2 + %0 = load %struct.STy*, %struct.STy** %inner, align 8 + %inner1 = getelementptr inbounds %struct.STy, %struct.STy* %0, i64 0, i32 2 + %1 = load %struct.STy*, %struct.STy** %inner1, align 8 + %src = getelementptr inbounds %struct.STy, %struct.STy* %1, i64 0, i32 1 + %2 = load double*, double** %src, align 8 + %3 = load double, double* %2, align 8 + %conv = fptrunc double %3 to float + %dst = getelementptr inbounds %struct.STy, %struct.STy* %1, i64 0, i32 0 + %4 = load float*, float** %dst, align 8 + store float %conv, float* %4, align 4 + ret void +} + !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} @@ -3404,7 +4251,10 @@ for.end: ; IS__TUNIT_OPM: attributes #[[ATTR6]] = { nofree nosync nounwind writeonly } ; IS__TUNIT_OPM: attributes #[[ATTR7]] = { nofree nosync nounwind readonly willreturn } ; IS__TUNIT_OPM: attributes #[[ATTR8]] = { nofree nosync nounwind readnone } -; IS__TUNIT_OPM: attributes #[[ATTR9]] = { willreturn } +; IS__TUNIT_OPM: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind uwtable willreturn } +; IS__TUNIT_OPM: attributes #[[ATTR10]] = { willreturn } +; IS__TUNIT_OPM: attributes #[[ATTR11]] = { norecurse } +; IS__TUNIT_OPM: attributes #[[ATTR12]] = { nounwind } ;. ; IS__TUNIT_NPM: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind willreturn writeonly } ; IS__TUNIT_NPM: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind willreturn } @@ -3413,7 +4263,11 @@ for.end: ; IS__TUNIT_NPM: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn writeonly } ; IS__TUNIT_NPM: attributes #[[ATTR5]] = { nofree nosync nounwind readonly willreturn } ; IS__TUNIT_NPM: attributes #[[ATTR6]] = { nofree nosync nounwind writeonly } -; IS__TUNIT_NPM: attributes #[[ATTR7]] = { willreturn } +; IS__TUNIT_NPM: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind uwtable willreturn } +; IS__TUNIT_NPM: attributes #[[ATTR8:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly } +; IS__TUNIT_NPM: attributes #[[ATTR9]] = { willreturn } +; IS__TUNIT_NPM: attributes #[[ATTR10]] = { norecurse } +; IS__TUNIT_NPM: attributes #[[ATTR11]] = { nounwind } ;. ; IS__CGSCC_OPM: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } ; IS__CGSCC_OPM: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind willreturn } @@ -3426,9 +4280,12 @@ for.end: ; IS__CGSCC_OPM: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind readnone } ; IS__CGSCC_OPM: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind } ; IS__CGSCC_OPM: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind writeonly } -; IS__CGSCC_OPM: attributes #[[ATTR11]] = { willreturn } -; IS__CGSCC_OPM: attributes #[[ATTR12]] = { nounwind willreturn writeonly } -; IS__CGSCC_OPM: attributes #[[ATTR13]] = { nounwind writeonly } +; IS__CGSCC_OPM: attributes #[[ATTR11]] = { norecurse } +; IS__CGSCC_OPM: attributes #[[ATTR12]] = { nofree norecurse nosync nounwind uwtable willreturn } +; IS__CGSCC_OPM: attributes #[[ATTR13]] = { willreturn } +; IS__CGSCC_OPM: attributes #[[ATTR14]] = { nounwind willreturn writeonly } +; IS__CGSCC_OPM: attributes #[[ATTR15]] = { nounwind writeonly } +; IS__CGSCC_OPM: attributes #[[ATTR16]] = { nounwind } ;. ; IS__CGSCC_NPM: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } ; IS__CGSCC_NPM: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind willreturn } @@ -3439,9 +4296,13 @@ for.end: ; IS__CGSCC_NPM: attributes #[[ATTR6]] = { argmemonly nofree norecurse nosync nounwind willreturn } ; IS__CGSCC_NPM: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind readonly willreturn } ; IS__CGSCC_NPM: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind writeonly } -; IS__CGSCC_NPM: attributes #[[ATTR9]] = { willreturn } -; IS__CGSCC_NPM: attributes #[[ATTR10]] = { nounwind willreturn writeonly } -; IS__CGSCC_NPM: attributes #[[ATTR11]] = { nounwind writeonly } +; IS__CGSCC_NPM: attributes #[[ATTR9]] = { norecurse } +; IS__CGSCC_NPM: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind uwtable willreturn } +; IS__CGSCC_NPM: attributes #[[ATTR11:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly } +; IS__CGSCC_NPM: attributes #[[ATTR12]] = { willreturn } +; IS__CGSCC_NPM: attributes #[[ATTR13]] = { nounwind willreturn writeonly } +; IS__CGSCC_NPM: attributes #[[ATTR14]] = { nounwind writeonly } +; IS__CGSCC_NPM: attributes #[[ATTR15]] = { nounwind } ;. ; IS__TUNIT____: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; IS__TUNIT____: [[META1:![0-9]+]] = !{i32 7, !"uwtable", i32 1} From 773ea16eba53c6ce34b458de6a3a3a1fa7ddafc5 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 28 Dec 2021 22:52:56 -0800 Subject: [PATCH 168/992] [AST] Fix a warning This patch fixes: mlir/include/mlir/Tools/PDLL/AST/Types.h:54:3: error: definition of implicit copy assignment operator for 'Type' is deprecated because it has a user-declared copy constructor [-Werror,-Wdeprecated-copy] --- mlir/include/mlir/Tools/PDLL/AST/Types.h | 1 - 1 file changed, 1 deletion(-) diff --git a/mlir/include/mlir/Tools/PDLL/AST/Types.h b/mlir/include/mlir/Tools/PDLL/AST/Types.h index 87b1a6cb72d8..cac3cae962c2 100644 --- a/mlir/include/mlir/Tools/PDLL/AST/Types.h +++ b/mlir/include/mlir/Tools/PDLL/AST/Types.h @@ -51,7 +51,6 @@ class Type { }; Type(Storage *impl = nullptr) : impl(impl) {} - Type(const Type &other) = default; bool operator==(const Type &other) const { return impl == other.impl; } bool operator!=(const Type &other) const { return !(*this == other); } From 73ece231ee0cf048d56841f47915beb1db6afc26 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 29 Dec 2021 06:57:36 +0000 Subject: [PATCH 169/992] Revert "[OpenMP][NFCI] Embed the source location string size in the ident_t" This reverts commit 7bfcdbcbf368cea14a5236080af975d5878a46eb. Broke MLIR build --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 22 +- clang/test/OpenMP/barrier_codegen.cpp | 4 +- clang/test/OpenMP/for_codegen.cpp | 4 +- clang/test/OpenMP/nvptx_SPMD_codegen.cpp | 14 +- .../OpenMP/parallel_num_threads_codegen.cpp | 2 +- .../OpenMP/parallel_proc_bind_codegen.cpp | 2 +- .../parallel_proc_bind_primary_codegen.cpp | 2 +- clang/test/OpenMP/sections_codegen.cpp | 4 +- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 21 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 165 ++++---- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 13 +- llvm/test/Transforms/OpenMP/deduplication.ll | 1 + .../get_hardware_num_threads_in_block_fold.ll | 2 +- .../OpenMP/parallel_region_merging.ll | 356 +++++++++--------- .../Transforms/OpenMP/remove_globalization.ll | 8 +- llvm/test/Transforms/OpenMP/spmdization.ll | 6 +- .../Transforms/OpenMP/spmdization_assumes.ll | 3 +- .../Transforms/OpenMP/spmdization_guarding.ll | 11 +- 18 files changed, 295 insertions(+), 345 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 40e2094ea4ce..c314044c66dd 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1429,11 +1429,10 @@ static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags) { - uint32_t SrcLocStrSize; llvm::Constant *SrcLocStr; if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || Loc.isInvalid()) { - SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); + SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); } else { std::string FunctionName; if (const auto *FD = dyn_cast_or_null(CGF.CurFuncDecl)) @@ -1442,12 +1441,12 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, const char *FileName = PLoc.getFilename(); unsigned Line = PLoc.getLine(); unsigned Column = PLoc.getColumn(); - SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, - Column, SrcLocStrSize); + SrcLocStr = + OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column); } unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); - return OMPBuilder.getOrCreateIdent( - SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); + return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), + Reserved2Flags); } llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, @@ -1458,11 +1457,10 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, if (CGM.getLangOpts().OpenMPIRBuilder) { SmallString<128> Buffer; OMPBuilder.updateToLocation(CGF.Builder.saveIP()); - uint32_t SrcLocStrSize; auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( - getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); + getIdentStringFromSourceLocation(CGF, Loc, Buffer)); return OMPBuilder.getOrCreateThreadID( - OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); + OMPBuilder.getOrCreateIdent(SrcLocStr)); } llvm::Value *ThreadID = nullptr; @@ -9529,9 +9527,8 @@ llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs) { - uint32_t SrcLocStrSize; if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) - return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); + return OMPBuilder.getOrCreateDefaultSrcLocStr(); SourceLocation Loc; if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { @@ -9555,8 +9552,7 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, - PLoc.getLine(), PLoc.getColumn(), - SrcLocStrSize); + PLoc.getLine(), PLoc.getColumn()); } /// Emit the arrays used to pass the captures and map information to the diff --git a/clang/test/OpenMP/barrier_codegen.cpp b/clang/test/OpenMP/barrier_codegen.cpp index 89704b36e13b..35b2ed721276 100644 --- a/clang/test/OpenMP/barrier_codegen.cpp +++ b/clang/test/OpenMP/barrier_codegen.cpp @@ -19,8 +19,8 @@ #define HEADER // CHECK: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } -// CHECK-DAG: [[EXPLICIT_BARRIER_LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 34, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) } -// CHECK-DAG: [[LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 2, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) } +// CHECK-DAG: [[EXPLICIT_BARRIER_LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) } +// CHECK-DAG: [[LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) } void foo() {} diff --git a/clang/test/OpenMP/for_codegen.cpp b/clang/test/OpenMP/for_codegen.cpp index 813b1313f1ee..64652e530f6a 100644 --- a/clang/test/OpenMP/for_codegen.cpp +++ b/clang/test/OpenMP/for_codegen.cpp @@ -22,8 +22,8 @@ // PROF-INSTR-PATH: constant [25 x i8] c"for_codegen-test.profraw\00" // CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } -// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 {{[0-9]+}}, i8* -// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 514, i32 0, i32 {{[0-9]+}}, i8* +// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8* +// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 514, i32 0, i32 0, i8* // CHECK-DAG: [[I:@.+]] ={{.*}} global i8 1, // CHECK-DAG: [[J:@.+]] ={{.*}} global i8 2, // CHECK-DAG: [[K:@.+]] ={{.*}} global i8 3, diff --git a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp index c2e98eec0cd9..eebc41b44f84 100644 --- a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp +++ b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp @@ -11,13 +11,13 @@ int a; // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 -// CHECK-DAG: [[DISTR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds -// CHECK-DAG: [[FOR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 514, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds -// CHECK-DAG: [[LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds -// CHECK-DAG: [[DISTR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds -// CHECK-DAG: [[FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds -// CHECK-DAG: [[BAR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds -// CHECK-DAG: [[BAR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[DISTR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 3, i32 0, i8* getelementptr inbounds +// CHECK-DAG: [[FOR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 514, i32 3, i32 0, i8* getelementptr inbounds +// CHECK-DAG: [[LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 3, i32 0, i8* getelementptr inbounds +// CHECK-DAG: [[DISTR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 1, i32 0, i8* getelementptr inbounds +// CHECK-DAG: [[FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 0, i8* getelementptr inbounds +// CHECK-DAG: [[BAR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 3, i32 0, i8* getelementptr inbounds +// CHECK-DAG: [[BAR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 1, i32 0, i8* getelementptr inbounds // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 void foo() { diff --git a/clang/test/OpenMP/parallel_num_threads_codegen.cpp b/clang/test/OpenMP/parallel_num_threads_codegen.cpp index e22f73057035..0fdb6cfbcfef 100644 --- a/clang/test/OpenMP/parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/parallel_num_threads_codegen.cpp @@ -15,7 +15,7 @@ typedef __INTPTR_TYPE__ intptr_t; // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" -// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } void foo(); diff --git a/clang/test/OpenMP/parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/parallel_proc_bind_codegen.cpp index e6103335b778..8b9e09191b24 100644 --- a/clang/test/OpenMP/parallel_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/parallel_proc_bind_codegen.cpp @@ -14,7 +14,7 @@ typedef __INTPTR_TYPE__ intptr_t; // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" -// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } void foo(); diff --git a/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp b/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp index 98b7f0374345..34de488c1dfc 100644 --- a/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp +++ b/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp @@ -15,7 +15,7 @@ typedef __INTPTR_TYPE__ intptr_t; // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" -// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } void foo(); diff --git a/clang/test/OpenMP/sections_codegen.cpp b/clang/test/OpenMP/sections_codegen.cpp index f9cdc5cc7ed1..07b95f53fa62 100644 --- a/clang/test/OpenMP/sections_codegen.cpp +++ b/clang/test/OpenMP/sections_codegen.cpp @@ -9,8 +9,8 @@ // expected-no-diagnostics #ifndef HEADER #define HEADER -// CHECK-DAG: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 {{[0-9]+}}, i8* -// CHECK-DAG: [[SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 1026, i32 0, i32 {{[0-9]+}}, i8* +// CHECK-DAG: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8* +// CHECK-DAG: [[SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 1026, i32 0, i32 0, i8* // CHECK-LABEL: foo void foo() { extern void mayThrow(); mayThrow(); }; // CHECK-LABEL: bar diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index fa9244649206..9976d1961ed1 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -663,31 +663,28 @@ class OpenMPIRBuilder { Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID); /// Return the (LLVM-IR) string describing the source location \p LocStr. - Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize); + Constant *getOrCreateSrcLocStr(StringRef LocStr); /// Return the (LLVM-IR) string describing the default source location. - Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize); + Constant *getOrCreateDefaultSrcLocStr(); /// Return the (LLVM-IR) string describing the source location identified by /// the arguments. Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName, - unsigned Line, unsigned Column, - uint32_t &SrcLocStrSize); + unsigned Line, unsigned Column); /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as /// fallback if \p DL does not specify the function name. - Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize, - Function *F = nullptr); + Constant *getOrCreateSrcLocStr(DebugLoc DL, Function *F = nullptr); /// Return the (LLVM-IR) string describing the source location \p Loc. - Constant *getOrCreateSrcLocStr(const LocationDescription &Loc, - uint32_t &SrcLocStrSize); + Constant *getOrCreateSrcLocStr(const LocationDescription &Loc); /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags. /// TODO: Create a enum class for the Reserve2Flags - Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, - omp::IdentFlag Flags = omp::IdentFlag(0), - unsigned Reserve2Flags = 0); + Value *getOrCreateIdent(Constant *SrcLocStr, + omp::IdentFlag Flags = omp::IdentFlag(0), + unsigned Reserve2Flags = 0); /// Create a global flag \p Namein the module with initial value \p Value. GlobalValue *createGlobalFlag(unsigned Value, StringRef Name); @@ -757,7 +754,7 @@ class OpenMPIRBuilder { StringMap SrcLocStrMap; /// Map to remember existing ident_t*. - DenseMap, Constant *> IdentMap; + DenseMap, Value *> IdentMap; /// Helper that contains information about regions we need to outline /// during finalization. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index de2507631f00..5157d51fd18c 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -21,9 +21,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PassManager.h" @@ -39,7 +37,6 @@ #include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" -#include #include #define DEBUG_TYPE "openmp-ir-builder" @@ -258,21 +255,19 @@ GlobalValue *OpenMPIRBuilder::createGlobalFlag(unsigned Value, StringRef Name) { return GV; } -Constant *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, - uint32_t SrcLocStrSize, - IdentFlag LocFlags, - unsigned Reserve2Flags) { +Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, + IdentFlag LocFlags, + unsigned Reserve2Flags) { // Enable "C-mode". LocFlags |= OMP_IDENT_FLAG_KMPC; - Constant *&Ident = + Value *&Ident = IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}]; if (!Ident) { Constant *I32Null = ConstantInt::getNullValue(Int32); - Constant *IdentData[] = {I32Null, - ConstantInt::get(Int32, uint32_t(LocFlags)), - ConstantInt::get(Int32, Reserve2Flags), - ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr}; + Constant *IdentData[] = { + I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)), + ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr}; Constant *Initializer = ConstantStruct::get(OpenMPIRBuilder::Ident, IdentData); @@ -295,12 +290,10 @@ Constant *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, } } - return ConstantExpr::getPointerBitCastOrAddrSpaceCast(Ident, IdentPtr); + return Builder.CreatePointerCast(Ident, IdentPtr); } -Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr, - uint32_t &SrcLocStrSize) { - SrcLocStrSize = LocStr.size(); +Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) { Constant *&SrcLocStr = SrcLocStrMap[LocStr]; if (!SrcLocStr) { Constant *Initializer = @@ -321,8 +314,8 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr, Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName, - unsigned Line, unsigned Column, - uint32_t &SrcLocStrSize) { + unsigned Line, + unsigned Column) { SmallString<128> Buffer; Buffer.push_back(';'); Buffer.append(FileName); @@ -334,21 +327,17 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName, Buffer.append(std::to_string(Column)); Buffer.push_back(';'); Buffer.push_back(';'); - return getOrCreateSrcLocStr(Buffer.str(), SrcLocStrSize); + return getOrCreateSrcLocStr(Buffer.str()); } -Constant * -OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize) { - StringRef UnknownLoc = ";unknown;unknown;0;0;;"; - return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize); +Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() { + return getOrCreateSrcLocStr(";unknown;unknown;0;0;;"); } -Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL, - uint32_t &SrcLocStrSize, - Function *F) { +Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL, Function *F) { DILocation *DIL = DL.get(); if (!DIL) - return getOrCreateDefaultSrcLocStr(SrcLocStrSize); + return getOrCreateDefaultSrcLocStr(); StringRef FileName = M.getName(); if (DIFile *DIF = DIL->getFile()) if (Optional Source = DIF->getSource()) @@ -357,13 +346,12 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL, if (Function.empty() && F) Function = F->getName(); return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(), - DIL->getColumn(), SrcLocStrSize); + DIL->getColumn()); } -Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc, - uint32_t &SrcLocStrSize) { - return getOrCreateSrcLocStr(Loc.DL, SrcLocStrSize, - Loc.IP.getBlock()->getParent()); +Constant * +OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) { + return getOrCreateSrcLocStr(Loc.DL, Loc.IP.getBlock()->getParent()); } Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { @@ -405,11 +393,9 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind, break; } - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Args[] = { - getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags), - getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))}; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags), + getOrCreateThreadID(getOrCreateIdent(SrcLocStr))}; // If we are in a cancellable parallel region, barriers are cancellation // points. @@ -455,9 +441,8 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, llvm_unreachable("Unknown cancel kind!"); } - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; Value *Result = Builder.CreateCall( getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); @@ -528,9 +513,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( if (!updateToLocation(Loc)) return Loc.IP; - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); Value *ThreadID = getOrCreateThreadID(Ident); if (NumThreads) { @@ -887,9 +871,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) { // Build call void __kmpc_flush(ident_t *loc) - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)}; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Args[] = {getOrCreateIdent(SrcLocStr)}; Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args); } @@ -903,9 +886,8 @@ void OpenMPIRBuilder::createFlush(const LocationDescription &Loc) { void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) { // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 // global_tid); - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); Value *Args[] = {Ident, getOrCreateThreadID(Ident)}; // Ignore return result until untied tasks are supported. @@ -921,9 +903,8 @@ void OpenMPIRBuilder::createTaskwait(const LocationDescription &Loc) { void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) { // Build call __kmpc_omp_taskyield(loc, thread_id, 0); - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); Constant *I32Null = ConstantInt::getNullValue(Int32); Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null}; @@ -1133,16 +1114,14 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions( Module *Module = Func->getParent(); Value *RedArrayPtr = Builder.CreateBitCast(RedArray, Builder.getInt8PtrTy(), "red.array.ptr"); - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); bool CanGenerateAtomic = llvm::all_of(ReductionInfos, [](const ReductionInfo &RI) { return RI.AtomicReductionGen; }); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize, - CanGenerateAtomic - ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE - : IdentFlag(0)); + Value *Ident = getOrCreateIdent( + SrcLocStr, CanGenerateAtomic ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE + : IdentFlag(0)); Value *ThreadId = getOrCreateThreadID(Ident); Constant *NumVariables = Builder.getInt32(NumReductions); const DataLayout &DL = Module->getDataLayout(); @@ -1256,9 +1235,8 @@ OpenMPIRBuilder::createMaster(const LocationDescription &Loc, return Loc.IP; Directive OMPD = Directive::OMPD_master; - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId}; @@ -1280,9 +1258,8 @@ OpenMPIRBuilder::createMasked(const LocationDescription &Loc, return Loc.IP; Directive OMPD = Directive::OMPD_masked; - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId, Filter}; Value *ArgsEnd[] = {Ident, ThreadId}; @@ -1503,9 +1480,8 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, Builder.restoreIP(CLI->getPreheaderIP()); Builder.SetCurrentDebugLocation(DL); - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize); - Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(DL); + Value *SrcLoc = getOrCreateIdent(SrcLocStr); // Declare useful OpenMP runtime functions. Value *IV = CLI->getIndVar(); @@ -1632,9 +1608,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop( // Set up the source location value for OpenMP runtime. Builder.SetCurrentDebugLocation(DL); - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize); - Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(DL); + Value *SrcLoc = getOrCreateIdent(SrcLocStr); // Declare useful OpenMP runtime functions. Value *IV = CLI->getIndVar(); @@ -2404,9 +2379,8 @@ OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc, if (!updateToLocation(Loc)) return Loc.IP; - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); Value *ThreadId = getOrCreateThreadID(Ident); llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt); @@ -2433,9 +2407,8 @@ OpenMPIRBuilder::createSingle(const LocationDescription &Loc, } Directive OMPD = Directive::OMPD_single; - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId}; @@ -2463,9 +2436,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical( return Loc.IP; Directive OMPD = Directive::OMPD_critical; - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); Value *ThreadId = getOrCreateThreadID(Ident); Value *LockVar = getOMPCriticalRegionLock(CriticalName); Value *Args[] = {Ident, ThreadId, LockVar}; @@ -2514,9 +2486,8 @@ OpenMPIRBuilder::createOrderedDepend(const LocationDescription &Loc, Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP( ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)}); - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP}; @@ -2541,9 +2512,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd( Instruction *ExitCall = nullptr; if (IsThreads) { - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId}; @@ -2748,9 +2718,8 @@ CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc, IRBuilder<>::InsertPointGuard IPG(Builder); Builder.restoreIP(Loc.IP); - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {ThreadId, Size, Allocator}; @@ -2765,9 +2734,8 @@ CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc, IRBuilder<>::InsertPointGuard IPG(Builder); Builder.restoreIP(Loc.IP); - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {ThreadId, Addr, Allocator}; Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free); @@ -2780,9 +2748,8 @@ CallInst *OpenMPIRBuilder::createCachedThreadPrivate( IRBuilder<>::InsertPointGuard IPG(Builder); Builder.restoreIP(Loc.IP); - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); Value *ThreadId = getOrCreateThreadID(Ident); Constant *ThreadPrivateCache = getOrCreateOMPInternalVariable(Int8PtrPtr, Name); @@ -2800,9 +2767,8 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, if (!updateToLocation(Loc)) return Loc.IP; - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); ConstantInt *IsSPMDVal = ConstantInt::getSigned( IntegerType::getInt8Ty(Int8->getContext()), IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); @@ -2854,9 +2820,8 @@ void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc, if (!updateToLocation(Loc)) return; - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); ConstantInt *IsSPMDVal = ConstantInt::getSigned( IntegerType::getInt8Ty(Int8->getContext()), IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 464d74905cd0..9c6b0f6920fb 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -1590,10 +1590,8 @@ struct OpenMPOpt { &F.getEntryBlock(), F.getEntryBlock().begin())); // Create a fallback location if non was found. // TODO: Use the debug locations of the calls instead. - uint32_t SrcLocStrSize; - Constant *Loc = - OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); - Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc, SrcLocStrSize); + Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(); + Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc); } return Ident; } @@ -3223,11 +3221,8 @@ struct AAKernelInfoFunction : AAKernelInfo { OpenMPIRBuilder::LocationDescription Loc( InsertPointTy(ParentBB, ParentBB->end()), DL); OMPInfoCache.OMPBuilder.updateToLocation(Loc); - uint32_t SrcLocStrSize; - auto *SrcLocStr = - OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = - OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize); + auto *SrcLocStr = OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc); + Value *Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr); BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL); // Add check for Tid in RegionCheckTidBB diff --git a/llvm/test/Transforms/OpenMP/deduplication.ll b/llvm/test/Transforms/OpenMP/deduplication.ll index 9228fa052e37..fbf554e3f9f6 100644 --- a/llvm/test/Transforms/OpenMP/deduplication.ll +++ b/llvm/test/Transforms/OpenMP/deduplication.ll @@ -19,6 +19,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 ; CHECK-DAG: @.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 ; CHECK-DAG: @.str1 = private unnamed_addr constant [23 x i8] c";file001;loc0001;0;0;;\00", align 1 ; CHECK-DAG: @.str2 = private unnamed_addr constant [23 x i8] c";file002;loc0002;0;0;;\00", align 1 +; CHECK-DAG: @3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8 ; UTC_ARGS: --enable diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll index b72031a9b68c..602d4f0be577 100644 --- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll +++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll @@ -13,7 +13,7 @@ target triple = "nvptx64" ; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ;. define weak void @kernel0() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel0 diff --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll index b4ff9d9ffee6..72af4cece3b4 100644 --- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll +++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll @@ -4694,10 +4694,10 @@ entry: ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4719,8 +4719,8 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: @@ -4753,7 +4753,7 @@ entry: ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1:[0-9]+]]) +; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) ; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -4837,10 +4837,10 @@ entry: ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4864,15 +4864,15 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -4893,7 +4893,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -4921,11 +4921,11 @@ entry: ; CHECK1-NEXT: [[F_RELOADED:%.*]] = alloca float, align 4 ; CHECK1-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 ; CHECK1-NEXT: store float [[F]], float* [[F_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK1-NEXT: store float [[F]], float* [[F_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4948,15 +4948,15 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) @@ -4976,7 +4976,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5006,10 +5006,10 @@ entry: ; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5033,15 +5033,15 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 @@ -5065,7 +5065,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5092,10 +5092,10 @@ entry: ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5118,15 +5118,15 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -5150,7 +5150,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5179,13 +5179,13 @@ entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK1-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: ; CHECK1-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* ; CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5210,15 +5210,15 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) @@ -5239,7 +5239,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5270,10 +5270,10 @@ entry: ; CHECK1-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 ; CHECK1-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5295,8 +5295,8 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: @@ -5345,11 +5345,11 @@ entry: ; CHECK1-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 ; CHECK1-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5372,15 +5372,15 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) @@ -5401,7 +5401,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5440,10 +5440,10 @@ entry: ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5465,11 +5465,11 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: @@ -5514,11 +5514,11 @@ entry: ; CHECK1-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK1-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5543,27 +5543,27 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 ; CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] ; CHECK1: omp_region.end4: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split.split.split: ; CHECK1-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -5584,7 +5584,7 @@ entry: ; CHECK1: omp.par.merged.split.split.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] ; CHECK1: omp_region.body5.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK1-NEXT: br label [[OMP_REGION_END4]] ; CHECK1: omp_region.body: ; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]] @@ -5595,7 +5595,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5741,10 +5741,10 @@ entry: ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5768,8 +5768,8 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: @@ -5811,10 +5811,10 @@ entry: ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5836,8 +5836,8 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: @@ -5870,7 +5870,7 @@ entry: ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1:[0-9]+]]) +; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK2-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) ; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -5954,10 +5954,10 @@ entry: ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5981,15 +5981,15 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -6010,7 +6010,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6038,11 +6038,11 @@ entry: ; CHECK2-NEXT: [[F_RELOADED:%.*]] = alloca float, align 4 ; CHECK2-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 ; CHECK2-NEXT: store float [[F]], float* [[F_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK2-NEXT: store float [[F]], float* [[F_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6065,15 +6065,15 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) @@ -6093,7 +6093,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6123,10 +6123,10 @@ entry: ; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6150,15 +6150,15 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 @@ -6182,7 +6182,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6209,10 +6209,10 @@ entry: ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6235,15 +6235,15 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -6267,7 +6267,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6296,13 +6296,13 @@ entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK2-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: ; CHECK2-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* ; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6327,15 +6327,15 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) @@ -6356,7 +6356,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6387,10 +6387,10 @@ entry: ; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6412,8 +6412,8 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: @@ -6462,11 +6462,11 @@ entry: ; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6489,15 +6489,15 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) @@ -6518,7 +6518,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6557,10 +6557,10 @@ entry: ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6582,11 +6582,11 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: @@ -6631,11 +6631,11 @@ entry: ; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK2-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6660,27 +6660,27 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 ; CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] ; CHECK2: omp_region.end4: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split.split.split: ; CHECK2-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -6701,7 +6701,7 @@ entry: ; CHECK2: omp.par.merged.split.split.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] ; CHECK2: omp_region.body5.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK2-NEXT: br label [[OMP_REGION_END4]] ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] @@ -6712,7 +6712,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6858,10 +6858,10 @@ entry: ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6885,8 +6885,8 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll index 4a99013e01a9..8ecae7f5af9a 100644 --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -32,7 +32,7 @@ define void @kernel() { ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true) ; CHECK-NEXT: call void @foo() #[[ATTR4:[0-9]+]] ; CHECK-NEXT: call void @bar() #[[ATTR4]] -; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR3:[0-9]+]] +; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true) ; CHECK-NEXT: ret void ; @@ -41,7 +41,7 @@ define void @kernel() { ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true) ; CHECK-DISABLED-NEXT: call void @foo() #[[ATTR4:[0-9]+]] ; CHECK-DISABLED-NEXT: call void @bar() #[[ATTR4]] -; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR3:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true) ; CHECK-DISABLED-NEXT: ret void ; @@ -183,14 +183,14 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK: attributes #[[ATTR0]] = { nosync nounwind } ; CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } ; CHECK: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } -; CHECK: attributes #[[ATTR3]] = { "llvm.assume"="omp_no_openmp" } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } ; CHECK: attributes #[[ATTR4]] = { nounwind } ; CHECK: attributes #[[ATTR5]] = { nosync nounwind writeonly } ;. ; CHECK-DISABLED: attributes #[[ATTR0]] = { nosync nounwind } ; CHECK-DISABLED: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } ; CHECK-DISABLED: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } -; CHECK-DISABLED: attributes #[[ATTR3]] = { "llvm.assume"="omp_no_openmp" } +; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } ; CHECK-DISABLED: attributes #[[ATTR4]] = { nounwind } ; CHECK-DISABLED: attributes #[[ATTR5]] = { nosync nounwind writeonly } ;. diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll index 5051bce98279..29e4d08e0c5f 100644 --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -113,7 +113,6 @@ ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" -; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ; AMDGPU: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; AMDGPU: @[[X_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; AMDGPU: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef @@ -127,7 +126,6 @@ ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" -; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ; NVPTX: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; NVPTX: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; NVPTX: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef @@ -1439,7 +1437,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias ; AMDGPU: region.guarded.end: ; AMDGPU-NEXT: br label [[REGION_BARRIER]] ; AMDGPU: region.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[REGION_EXIT:%.*]] ; AMDGPU: region.exit: ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] @@ -1475,7 +1473,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias ; NVPTX: region.guarded.end: ; NVPTX-NEXT: br label [[REGION_BARRIER]] ; NVPTX: region.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[REGION_EXIT:%.*]] ; NVPTX: region.exit: ; NVPTX-NEXT: br label [[FOR_COND:%.*]] diff --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll index 93065a263e67..a0d43feecc3c 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll @@ -25,7 +25,6 @@ target triple = "nvptx64" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata" -; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ;. define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_404433c2_main_l5 @@ -51,7 +50,7 @@ define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8 ; CHECK: region.guarded.end: ; CHECK-NEXT: br label [[REGION_BARRIER]] ; CHECK: region.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) ; CHECK-NEXT: br label [[REGION_EXIT:%.*]] ; CHECK: region.exit: ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x i8*], [0 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll index b2c4aca9d5e5..5d6334e7fa2b 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll @@ -47,7 +47,6 @@ target triple = "nvptx64" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata" -; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ;. ; CHECK-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 @@ -82,7 +81,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK: region.guarded.end: ; CHECK-NEXT: br label [[REGION_BARRIER]] ; CHECK: region.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) ; CHECK-NEXT: br label [[REGION_EXIT:%.*]] ; CHECK: region.exit: ; CHECK-NEXT: br label [[FOR_COND_I:%.*]] @@ -106,7 +105,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK: region.guarded.end1: ; CHECK-NEXT: br label [[REGION_BARRIER2]] ; CHECK: region.barrier2: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) ; CHECK-NEXT: br label [[REGION_EXIT3]] ; CHECK: region.exit3: ; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 @@ -127,7 +126,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK: region.guarded.end6: ; CHECK-NEXT: br label [[REGION_BARRIER7]] ; CHECK: region.barrier7: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) ; CHECK-NEXT: br label [[REGION_EXIT8:%.*]] ; CHECK: region.exit8: ; CHECK-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8 @@ -144,7 +143,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK: region.guarded.end11: ; CHECK-NEXT: br label [[REGION_BARRIER12]] ; CHECK: region.barrier12: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) ; CHECK-NEXT: br label [[REGION_EXIT13:%.*]] ; CHECK: region.exit13: ; CHECK-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8 @@ -161,7 +160,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK: region.guarded.end16: ; CHECK-NEXT: br label [[REGION_BARRIER17]] ; CHECK: region.barrier17: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) ; CHECK-NEXT: br label [[REGION_EXIT18:%.*]] ; CHECK: region.exit18: ; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8 From 423ba12971bac8397c87fcf975ba6a4b7530ed28 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 29 Dec 2021 07:08:58 +0000 Subject: [PATCH 170/992] Revert "[OpenMP][FIX] Also update unit test after API change" This reverts commit 16da2140045808b2aea1d28366ca7d326eb3c809. Revert unit-test API update after previous revert of the API change. --- .../Frontend/OpenMPIRBuilderTest.cpp | 83 +++++++++---------- 1 file changed, 37 insertions(+), 46 deletions(-) diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index f200ef57be57..fba36d42d185 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -303,7 +303,8 @@ TEST_F(OpenMPIRBuilderTest, CreateCancel) { EXPECT_EQ(Barrier->getNumUses(), 0U); EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 1U); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); + EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), + CBB); EXPECT_EQ(cast(Cancel)->getArgOperand(1), GTID); @@ -341,6 +342,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { BB = BB->getTerminator()->getSuccessor(0); EXPECT_EQ(BB->size(), 4U); + CallInst *GTID = dyn_cast(&BB->front()); EXPECT_NE(GTID, nullptr); EXPECT_EQ(GTID->arg_size(), 1U); @@ -358,8 +360,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { Instruction *CancelBBTI = Cancel->getParent()->getTerminator(); EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U); EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U); - EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(), - NewIP.getBlock()); + EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(), NewIP.getBlock()); EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); CallInst *GTID1 = dyn_cast(&CancelBBTI->getSuccessor(1)->front()); EXPECT_NE(GTID1, nullptr); @@ -376,7 +377,8 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { EXPECT_EQ(Barrier->getNumUses(), 0U); EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 1U); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); + EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), + CBB); EXPECT_EQ(cast(Cancel)->getArgOperand(1), GTID); @@ -500,8 +502,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) { Builder.CreateStore(F->arg_begin(), PrivAI); Builder.restoreIP(CodeGenIP); - Value *PrivLoad = - Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); + Value *PrivLoad = Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, + "local.use"); Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); Instruction *ThenTerm, *ElseTerm; SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), @@ -807,8 +809,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { Builder.CreateStore(F->arg_begin(), PrivAI); Builder.restoreIP(CodeGenIP); - Value *PrivLoad = - Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); + Value *PrivLoad = Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, + "local.use"); Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); Instruction *ThenTerm, *ElseTerm; SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), @@ -2009,8 +2011,8 @@ TEST_F(OpenMPIRBuilderTest, MasterDirective) { EntryBB = ThenBB->getUniquePredecessor(); // simple instructions for body - Value *PrivLoad = - Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); + Value *PrivLoad = Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, + "local.use"); Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; @@ -2162,8 +2164,8 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) { // body begin Builder.restoreIP(CodeGenIP); Builder.CreateStore(F->arg_begin(), PrivAI); - Value *PrivLoad = - Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); + Value *PrivLoad = Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, + "local.use"); Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; @@ -2536,33 +2538,32 @@ TEST_F(OpenMPIRBuilderTest, CopyinBlocks) { OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); - IntegerType *Int32 = Type::getInt32Ty(M->getContext()); - AllocaInst *MasterAddress = Builder.CreateAlloca(Int32->getPointerTo()); - AllocaInst *PrivAddress = Builder.CreateAlloca(Int32->getPointerTo()); + IntegerType* Int32 = Type::getInt32Ty(M->getContext()); + AllocaInst* MasterAddress = Builder.CreateAlloca(Int32->getPointerTo()); + AllocaInst* PrivAddress = Builder.CreateAlloca(Int32->getPointerTo()); BasicBlock *EntryBB = BB; OMPBuilder.createCopyinClauseBlocks(Builder.saveIP(), MasterAddress, PrivAddress, Int32, /*BranchtoEnd*/ true); - BranchInst *EntryBr = dyn_cast_or_null(EntryBB->getTerminator()); + BranchInst* EntryBr = dyn_cast_or_null(EntryBB->getTerminator()); EXPECT_NE(EntryBr, nullptr); EXPECT_TRUE(EntryBr->isConditional()); - BasicBlock *NotMasterBB = EntryBr->getSuccessor(0); - BasicBlock *CopyinEnd = EntryBr->getSuccessor(1); - CmpInst *CMP = dyn_cast_or_null(EntryBr->getCondition()); + BasicBlock* NotMasterBB = EntryBr->getSuccessor(0); + BasicBlock* CopyinEnd = EntryBr->getSuccessor(1); + CmpInst* CMP = dyn_cast_or_null(EntryBr->getCondition()); EXPECT_NE(CMP, nullptr); EXPECT_NE(NotMasterBB, nullptr); EXPECT_NE(CopyinEnd, nullptr); - BranchInst *NotMasterBr = - dyn_cast_or_null(NotMasterBB->getTerminator()); + BranchInst* NotMasterBr = dyn_cast_or_null(NotMasterBB->getTerminator()); EXPECT_NE(NotMasterBr, nullptr); EXPECT_FALSE(NotMasterBr->isConditional()); - EXPECT_EQ(CopyinEnd, NotMasterBr->getSuccessor(0)); + EXPECT_EQ(CopyinEnd,NotMasterBr->getSuccessor(0)); } TEST_F(OpenMPIRBuilderTest, SingleDirective) { @@ -2601,8 +2602,8 @@ TEST_F(OpenMPIRBuilderTest, SingleDirective) { EntryBB = ThenBB->getUniquePredecessor(); // simple instructions for body - Value *PrivLoad = - Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); + Value *PrivLoad = Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, + "local.use"); Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; @@ -3105,9 +3106,8 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); - uint32_t StrSize; - Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); - Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); + Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc); + Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr); Value *TID = OMPBuilder.getOrCreateThreadID(Ident); Value *SumLocal = Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local"); @@ -3339,9 +3339,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); - uint32_t StrSize; - Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); - Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); + Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc); + Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr); Value *TID = OMPBuilder.getOrCreateThreadID(Ident); Value *SumLocal = Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local"); @@ -3360,9 +3359,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); - uint32_t StrSize; - Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); - Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); + Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc); + Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr); Value *TID = OMPBuilder.getOrCreateThreadID(Ident); Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial"); Value *Xor = Builder.CreateXor(XorPartial, TID, "xor"); @@ -3697,11 +3695,8 @@ TEST_F(OpenMPIRBuilderTest, CreateOffloadMapnames) { IRBuilder<> Builder(BB); - uint32_t StrSize; - Constant *Cst1 = - OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize); - Constant *Cst2 = - OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize); + Constant *Cst1 = OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5); + Constant *Cst2 = OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5); SmallVector Names = {Cst1, Cst2}; GlobalVariable *OffloadMaptypesGlobal = @@ -3803,15 +3798,11 @@ TEST_F(OpenMPIRBuilderTest, EmitMapperCall) { SmallVector Flags = {0, 2}; - uint32_t StrSize; - Constant *SrcLocCst = - OMPBuilder.getOrCreateSrcLocStr("", "file1", 2, 5, StrSize); - Value *SrcLocInfo = OMPBuilder.getOrCreateIdent(SrcLocCst, StrSize); + Constant *SrcLocCst = OMPBuilder.getOrCreateSrcLocStr("", "file1", 2, 5); + Value *SrcLocInfo = OMPBuilder.getOrCreateIdent(SrcLocCst); - Constant *Cst1 = - OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize); - Constant *Cst2 = - OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize); + Constant *Cst1 = OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5); + Constant *Cst2 = OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5); SmallVector Names = {Cst1, Cst2}; GlobalVariable *Maptypes = From 944aa0421cb7da3aa764b2a108ea25ef8bceb979 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 29 Dec 2021 01:07:53 -0600 Subject: [PATCH 171/992] Reapply "[OpenMP][NFCI] Embed the source location string size in the ident_t" This reverts commit 73ece231ee0cf048d56841f47915beb1db6afc26 and reapplies 7bfcdbcbf368cea14a5236080af975d5878a46eb with mlir changes. Also reverts commit 423ba12971bac8397c87fcf975ba6a4b7530ed28 and includes the unit test changes of 16da2140045808b2aea1d28366ca7d326eb3c809. --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 22 +- clang/test/OpenMP/barrier_codegen.cpp | 4 +- clang/test/OpenMP/for_codegen.cpp | 4 +- clang/test/OpenMP/nvptx_SPMD_codegen.cpp | 14 +- .../OpenMP/parallel_num_threads_codegen.cpp | 2 +- .../OpenMP/parallel_proc_bind_codegen.cpp | 2 +- .../parallel_proc_bind_primary_codegen.cpp | 2 +- clang/test/OpenMP/sections_codegen.cpp | 4 +- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 21 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 165 ++++---- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 13 +- llvm/test/Transforms/OpenMP/deduplication.ll | 1 - .../get_hardware_num_threads_in_block_fold.ll | 2 +- .../OpenMP/parallel_region_merging.ll | 356 +++++++++--------- .../Transforms/OpenMP/remove_globalization.ll | 8 +- llvm/test/Transforms/OpenMP/spmdization.ll | 6 +- .../Transforms/OpenMP/spmdization_assumes.ll | 3 +- .../Transforms/OpenMP/spmdization_guarding.ll | 11 +- .../Frontend/OpenMPIRBuilderTest.cpp | 83 ++-- .../OpenACC/OpenACCToLLVMIRTranslation.cpp | 18 +- 20 files changed, 402 insertions(+), 339 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index c314044c66dd..40e2094ea4ce 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1429,10 +1429,11 @@ static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags) { + uint32_t SrcLocStrSize; llvm::Constant *SrcLocStr; if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || Loc.isInvalid()) { - SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); + SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); } else { std::string FunctionName; if (const auto *FD = dyn_cast_or_null(CGF.CurFuncDecl)) @@ -1441,12 +1442,12 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, const char *FileName = PLoc.getFilename(); unsigned Line = PLoc.getLine(); unsigned Column = PLoc.getColumn(); - SrcLocStr = - OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column); + SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, + Column, SrcLocStrSize); } unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); - return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), - Reserved2Flags); + return OMPBuilder.getOrCreateIdent( + SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); } llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, @@ -1457,10 +1458,11 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, if (CGM.getLangOpts().OpenMPIRBuilder) { SmallString<128> Buffer; OMPBuilder.updateToLocation(CGF.Builder.saveIP()); + uint32_t SrcLocStrSize; auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( - getIdentStringFromSourceLocation(CGF, Loc, Buffer)); + getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); return OMPBuilder.getOrCreateThreadID( - OMPBuilder.getOrCreateIdent(SrcLocStr)); + OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); } llvm::Value *ThreadID = nullptr; @@ -9527,8 +9529,9 @@ llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs) { + uint32_t SrcLocStrSize; if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) - return OMPBuilder.getOrCreateDefaultSrcLocStr(); + return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); SourceLocation Loc; if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { @@ -9552,7 +9555,8 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, - PLoc.getLine(), PLoc.getColumn()); + PLoc.getLine(), PLoc.getColumn(), + SrcLocStrSize); } /// Emit the arrays used to pass the captures and map information to the diff --git a/clang/test/OpenMP/barrier_codegen.cpp b/clang/test/OpenMP/barrier_codegen.cpp index 35b2ed721276..89704b36e13b 100644 --- a/clang/test/OpenMP/barrier_codegen.cpp +++ b/clang/test/OpenMP/barrier_codegen.cpp @@ -19,8 +19,8 @@ #define HEADER // CHECK: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } -// CHECK-DAG: [[EXPLICIT_BARRIER_LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) } -// CHECK-DAG: [[LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) } +// CHECK-DAG: [[EXPLICIT_BARRIER_LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 34, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) } +// CHECK-DAG: [[LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 2, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) } void foo() {} diff --git a/clang/test/OpenMP/for_codegen.cpp b/clang/test/OpenMP/for_codegen.cpp index 64652e530f6a..813b1313f1ee 100644 --- a/clang/test/OpenMP/for_codegen.cpp +++ b/clang/test/OpenMP/for_codegen.cpp @@ -22,8 +22,8 @@ // PROF-INSTR-PATH: constant [25 x i8] c"for_codegen-test.profraw\00" // CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } -// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8* -// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 514, i32 0, i32 0, i8* +// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 {{[0-9]+}}, i8* +// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 514, i32 0, i32 {{[0-9]+}}, i8* // CHECK-DAG: [[I:@.+]] ={{.*}} global i8 1, // CHECK-DAG: [[J:@.+]] ={{.*}} global i8 2, // CHECK-DAG: [[K:@.+]] ={{.*}} global i8 3, diff --git a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp index eebc41b44f84..c2e98eec0cd9 100644 --- a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp +++ b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp @@ -11,13 +11,13 @@ int a; // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 -// CHECK-DAG: [[DISTR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 3, i32 0, i8* getelementptr inbounds -// CHECK-DAG: [[FOR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 514, i32 3, i32 0, i8* getelementptr inbounds -// CHECK-DAG: [[LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 3, i32 0, i8* getelementptr inbounds -// CHECK-DAG: [[DISTR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 1, i32 0, i8* getelementptr inbounds -// CHECK-DAG: [[FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 0, i8* getelementptr inbounds -// CHECK-DAG: [[BAR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 3, i32 0, i8* getelementptr inbounds -// CHECK-DAG: [[BAR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 1, i32 0, i8* getelementptr inbounds +// CHECK-DAG: [[DISTR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[FOR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 514, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[DISTR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[BAR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[BAR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 void foo() { diff --git a/clang/test/OpenMP/parallel_num_threads_codegen.cpp b/clang/test/OpenMP/parallel_num_threads_codegen.cpp index 0fdb6cfbcfef..e22f73057035 100644 --- a/clang/test/OpenMP/parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/parallel_num_threads_codegen.cpp @@ -15,7 +15,7 @@ typedef __INTPTR_TYPE__ intptr_t; // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" -// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } void foo(); diff --git a/clang/test/OpenMP/parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/parallel_proc_bind_codegen.cpp index 8b9e09191b24..e6103335b778 100644 --- a/clang/test/OpenMP/parallel_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/parallel_proc_bind_codegen.cpp @@ -14,7 +14,7 @@ typedef __INTPTR_TYPE__ intptr_t; // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" -// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } void foo(); diff --git a/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp b/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp index 34de488c1dfc..98b7f0374345 100644 --- a/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp +++ b/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp @@ -15,7 +15,7 @@ typedef __INTPTR_TYPE__ intptr_t; // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" -// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } void foo(); diff --git a/clang/test/OpenMP/sections_codegen.cpp b/clang/test/OpenMP/sections_codegen.cpp index 07b95f53fa62..f9cdc5cc7ed1 100644 --- a/clang/test/OpenMP/sections_codegen.cpp +++ b/clang/test/OpenMP/sections_codegen.cpp @@ -9,8 +9,8 @@ // expected-no-diagnostics #ifndef HEADER #define HEADER -// CHECK-DAG: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8* -// CHECK-DAG: [[SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 1026, i32 0, i32 0, i8* +// CHECK-DAG: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 {{[0-9]+}}, i8* +// CHECK-DAG: [[SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 1026, i32 0, i32 {{[0-9]+}}, i8* // CHECK-LABEL: foo void foo() { extern void mayThrow(); mayThrow(); }; // CHECK-LABEL: bar diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 9976d1961ed1..fa9244649206 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -663,28 +663,31 @@ class OpenMPIRBuilder { Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID); /// Return the (LLVM-IR) string describing the source location \p LocStr. - Constant *getOrCreateSrcLocStr(StringRef LocStr); + Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize); /// Return the (LLVM-IR) string describing the default source location. - Constant *getOrCreateDefaultSrcLocStr(); + Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize); /// Return the (LLVM-IR) string describing the source location identified by /// the arguments. Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName, - unsigned Line, unsigned Column); + unsigned Line, unsigned Column, + uint32_t &SrcLocStrSize); /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as /// fallback if \p DL does not specify the function name. - Constant *getOrCreateSrcLocStr(DebugLoc DL, Function *F = nullptr); + Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize, + Function *F = nullptr); /// Return the (LLVM-IR) string describing the source location \p Loc. - Constant *getOrCreateSrcLocStr(const LocationDescription &Loc); + Constant *getOrCreateSrcLocStr(const LocationDescription &Loc, + uint32_t &SrcLocStrSize); /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags. /// TODO: Create a enum class for the Reserve2Flags - Value *getOrCreateIdent(Constant *SrcLocStr, - omp::IdentFlag Flags = omp::IdentFlag(0), - unsigned Reserve2Flags = 0); + Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, + omp::IdentFlag Flags = omp::IdentFlag(0), + unsigned Reserve2Flags = 0); /// Create a global flag \p Namein the module with initial value \p Value. GlobalValue *createGlobalFlag(unsigned Value, StringRef Name); @@ -754,7 +757,7 @@ class OpenMPIRBuilder { StringMap SrcLocStrMap; /// Map to remember existing ident_t*. - DenseMap, Value *> IdentMap; + DenseMap, Constant *> IdentMap; /// Helper that contains information about regions we need to outline /// during finalization. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 5157d51fd18c..de2507631f00 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -21,7 +21,9 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PassManager.h" @@ -37,6 +39,7 @@ #include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" +#include #include #define DEBUG_TYPE "openmp-ir-builder" @@ -255,19 +258,21 @@ GlobalValue *OpenMPIRBuilder::createGlobalFlag(unsigned Value, StringRef Name) { return GV; } -Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, - IdentFlag LocFlags, - unsigned Reserve2Flags) { +Constant *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, + uint32_t SrcLocStrSize, + IdentFlag LocFlags, + unsigned Reserve2Flags) { // Enable "C-mode". LocFlags |= OMP_IDENT_FLAG_KMPC; - Value *&Ident = + Constant *&Ident = IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}]; if (!Ident) { Constant *I32Null = ConstantInt::getNullValue(Int32); - Constant *IdentData[] = { - I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)), - ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr}; + Constant *IdentData[] = {I32Null, + ConstantInt::get(Int32, uint32_t(LocFlags)), + ConstantInt::get(Int32, Reserve2Flags), + ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr}; Constant *Initializer = ConstantStruct::get(OpenMPIRBuilder::Ident, IdentData); @@ -290,10 +295,12 @@ Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, } } - return Builder.CreatePointerCast(Ident, IdentPtr); + return ConstantExpr::getPointerBitCastOrAddrSpaceCast(Ident, IdentPtr); } -Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) { +Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr, + uint32_t &SrcLocStrSize) { + SrcLocStrSize = LocStr.size(); Constant *&SrcLocStr = SrcLocStrMap[LocStr]; if (!SrcLocStr) { Constant *Initializer = @@ -314,8 +321,8 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) { Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName, - unsigned Line, - unsigned Column) { + unsigned Line, unsigned Column, + uint32_t &SrcLocStrSize) { SmallString<128> Buffer; Buffer.push_back(';'); Buffer.append(FileName); @@ -327,17 +334,21 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName, Buffer.append(std::to_string(Column)); Buffer.push_back(';'); Buffer.push_back(';'); - return getOrCreateSrcLocStr(Buffer.str()); + return getOrCreateSrcLocStr(Buffer.str(), SrcLocStrSize); } -Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() { - return getOrCreateSrcLocStr(";unknown;unknown;0;0;;"); +Constant * +OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize) { + StringRef UnknownLoc = ";unknown;unknown;0;0;;"; + return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize); } -Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL, Function *F) { +Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL, + uint32_t &SrcLocStrSize, + Function *F) { DILocation *DIL = DL.get(); if (!DIL) - return getOrCreateDefaultSrcLocStr(); + return getOrCreateDefaultSrcLocStr(SrcLocStrSize); StringRef FileName = M.getName(); if (DIFile *DIF = DIL->getFile()) if (Optional Source = DIF->getSource()) @@ -346,12 +357,13 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL, Function *F) { if (Function.empty() && F) Function = F->getName(); return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(), - DIL->getColumn()); + DIL->getColumn(), SrcLocStrSize); } -Constant * -OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) { - return getOrCreateSrcLocStr(Loc.DL, Loc.IP.getBlock()->getParent()); +Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc, + uint32_t &SrcLocStrSize) { + return getOrCreateSrcLocStr(Loc.DL, SrcLocStrSize, + Loc.IP.getBlock()->getParent()); } Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { @@ -393,9 +405,11 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind, break; } - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags), - getOrCreateThreadID(getOrCreateIdent(SrcLocStr))}; + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Args[] = { + getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags), + getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))}; // If we are in a cancellable parallel region, barriers are cancellation // points. @@ -441,8 +455,9 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, llvm_unreachable("Unknown cancel kind!"); } - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; Value *Result = Builder.CreateCall( getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); @@ -513,8 +528,9 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( if (!updateToLocation(Loc)) return Loc.IP; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadID = getOrCreateThreadID(Ident); if (NumThreads) { @@ -871,8 +887,9 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) { // Build call void __kmpc_flush(ident_t *loc) - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Args[] = {getOrCreateIdent(SrcLocStr)}; + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)}; Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args); } @@ -886,8 +903,9 @@ void OpenMPIRBuilder::createFlush(const LocationDescription &Loc) { void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) { // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 // global_tid); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *Args[] = {Ident, getOrCreateThreadID(Ident)}; // Ignore return result until untied tasks are supported. @@ -903,8 +921,9 @@ void OpenMPIRBuilder::createTaskwait(const LocationDescription &Loc) { void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) { // Build call __kmpc_omp_taskyield(loc, thread_id, 0); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Constant *I32Null = ConstantInt::getNullValue(Int32); Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null}; @@ -1114,14 +1133,16 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions( Module *Module = Func->getParent(); Value *RedArrayPtr = Builder.CreateBitCast(RedArray, Builder.getInt8PtrTy(), "red.array.ptr"); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); bool CanGenerateAtomic = llvm::all_of(ReductionInfos, [](const ReductionInfo &RI) { return RI.AtomicReductionGen; }); - Value *Ident = getOrCreateIdent( - SrcLocStr, CanGenerateAtomic ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE - : IdentFlag(0)); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize, + CanGenerateAtomic + ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE + : IdentFlag(0)); Value *ThreadId = getOrCreateThreadID(Ident); Constant *NumVariables = Builder.getInt32(NumReductions); const DataLayout &DL = Module->getDataLayout(); @@ -1235,8 +1256,9 @@ OpenMPIRBuilder::createMaster(const LocationDescription &Loc, return Loc.IP; Directive OMPD = Directive::OMPD_master; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId}; @@ -1258,8 +1280,9 @@ OpenMPIRBuilder::createMasked(const LocationDescription &Loc, return Loc.IP; Directive OMPD = Directive::OMPD_masked; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId, Filter}; Value *ArgsEnd[] = {Ident, ThreadId}; @@ -1480,8 +1503,9 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, Builder.restoreIP(CLI->getPreheaderIP()); Builder.SetCurrentDebugLocation(DL); - Constant *SrcLocStr = getOrCreateSrcLocStr(DL); - Value *SrcLoc = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize); + Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize); // Declare useful OpenMP runtime functions. Value *IV = CLI->getIndVar(); @@ -1608,8 +1632,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop( // Set up the source location value for OpenMP runtime. Builder.SetCurrentDebugLocation(DL); - Constant *SrcLocStr = getOrCreateSrcLocStr(DL); - Value *SrcLoc = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize); + Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize); // Declare useful OpenMP runtime functions. Value *IV = CLI->getIndVar(); @@ -2379,8 +2404,9 @@ OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc, if (!updateToLocation(Loc)) return Loc.IP; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt); @@ -2407,8 +2433,9 @@ OpenMPIRBuilder::createSingle(const LocationDescription &Loc, } Directive OMPD = Directive::OMPD_single; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId}; @@ -2436,8 +2463,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical( return Loc.IP; Directive OMPD = Directive::OMPD_critical; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *LockVar = getOMPCriticalRegionLock(CriticalName); Value *Args[] = {Ident, ThreadId, LockVar}; @@ -2486,8 +2514,9 @@ OpenMPIRBuilder::createOrderedDepend(const LocationDescription &Loc, Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP( ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)}); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP}; @@ -2512,8 +2541,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd( Instruction *ExitCall = nullptr; if (IsThreads) { - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId}; @@ -2718,8 +2748,9 @@ CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc, IRBuilder<>::InsertPointGuard IPG(Builder); Builder.restoreIP(Loc.IP); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {ThreadId, Size, Allocator}; @@ -2734,8 +2765,9 @@ CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc, IRBuilder<>::InsertPointGuard IPG(Builder); Builder.restoreIP(Loc.IP); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {ThreadId, Addr, Allocator}; Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free); @@ -2748,8 +2780,9 @@ CallInst *OpenMPIRBuilder::createCachedThreadPrivate( IRBuilder<>::InsertPointGuard IPG(Builder); Builder.restoreIP(Loc.IP); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Constant *ThreadPrivateCache = getOrCreateOMPInternalVariable(Int8PtrPtr, Name); @@ -2767,8 +2800,9 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, if (!updateToLocation(Loc)) return Loc.IP; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); ConstantInt *IsSPMDVal = ConstantInt::getSigned( IntegerType::getInt8Ty(Int8->getContext()), IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); @@ -2820,8 +2854,9 @@ void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc, if (!updateToLocation(Loc)) return; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); ConstantInt *IsSPMDVal = ConstantInt::getSigned( IntegerType::getInt8Ty(Int8->getContext()), IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 9c6b0f6920fb..464d74905cd0 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -1590,8 +1590,10 @@ struct OpenMPOpt { &F.getEntryBlock(), F.getEntryBlock().begin())); // Create a fallback location if non was found. // TODO: Use the debug locations of the calls instead. - Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(); - Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc); + uint32_t SrcLocStrSize; + Constant *Loc = + OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); + Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc, SrcLocStrSize); } return Ident; } @@ -3221,8 +3223,11 @@ struct AAKernelInfoFunction : AAKernelInfo { OpenMPIRBuilder::LocationDescription Loc( InsertPointTy(ParentBB, ParentBB->end()), DL); OMPInfoCache.OMPBuilder.updateToLocation(Loc); - auto *SrcLocStr = OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc); - Value *Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + auto *SrcLocStr = + OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = + OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize); BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL); // Add check for Tid in RegionCheckTidBB diff --git a/llvm/test/Transforms/OpenMP/deduplication.ll b/llvm/test/Transforms/OpenMP/deduplication.ll index fbf554e3f9f6..9228fa052e37 100644 --- a/llvm/test/Transforms/OpenMP/deduplication.ll +++ b/llvm/test/Transforms/OpenMP/deduplication.ll @@ -19,7 +19,6 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 ; CHECK-DAG: @.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 ; CHECK-DAG: @.str1 = private unnamed_addr constant [23 x i8] c";file001;loc0001;0;0;;\00", align 1 ; CHECK-DAG: @.str2 = private unnamed_addr constant [23 x i8] c";file002;loc0002;0;0;;\00", align 1 -; CHECK-DAG: @3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8 ; UTC_ARGS: --enable diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll index 602d4f0be577..b72031a9b68c 100644 --- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll +++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll @@ -13,7 +13,7 @@ target triple = "nvptx64" ; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ;. define weak void @kernel0() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel0 diff --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll index 72af4cece3b4..b4ff9d9ffee6 100644 --- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll +++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll @@ -4694,10 +4694,10 @@ entry: ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4719,8 +4719,8 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: @@ -4753,7 +4753,7 @@ entry: ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1:[0-9]+]]) ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) ; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -4837,10 +4837,10 @@ entry: ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4864,15 +4864,15 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -4893,7 +4893,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -4921,11 +4921,11 @@ entry: ; CHECK1-NEXT: [[F_RELOADED:%.*]] = alloca float, align 4 ; CHECK1-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 ; CHECK1-NEXT: store float [[F]], float* [[F_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: store float [[F]], float* [[F_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4948,15 +4948,15 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) @@ -4976,7 +4976,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5006,10 +5006,10 @@ entry: ; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5033,15 +5033,15 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 @@ -5065,7 +5065,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5092,10 +5092,10 @@ entry: ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5118,15 +5118,15 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -5150,7 +5150,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5179,13 +5179,13 @@ entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: ; CHECK1-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* ; CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5210,15 +5210,15 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) @@ -5239,7 +5239,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5270,10 +5270,10 @@ entry: ; CHECK1-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 ; CHECK1-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5295,8 +5295,8 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: @@ -5345,11 +5345,11 @@ entry: ; CHECK1-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 ; CHECK1-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5372,15 +5372,15 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) @@ -5401,7 +5401,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5440,10 +5440,10 @@ entry: ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5465,11 +5465,11 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: @@ -5514,11 +5514,11 @@ entry: ; CHECK1-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5543,27 +5543,27 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 ; CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] ; CHECK1: omp_region.end4: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split.split.split: ; CHECK1-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -5584,7 +5584,7 @@ entry: ; CHECK1: omp.par.merged.split.split.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] ; CHECK1: omp_region.body5.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK1-NEXT: br label [[OMP_REGION_END4]] ; CHECK1: omp_region.body: ; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]] @@ -5595,7 +5595,7 @@ entry: ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5741,10 +5741,10 @@ entry: ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5768,8 +5768,8 @@ entry: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: @@ -5811,10 +5811,10 @@ entry: ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5836,8 +5836,8 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: @@ -5870,7 +5870,7 @@ entry: ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1:[0-9]+]]) ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK2-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) ; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -5954,10 +5954,10 @@ entry: ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5981,15 +5981,15 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -6010,7 +6010,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6038,11 +6038,11 @@ entry: ; CHECK2-NEXT: [[F_RELOADED:%.*]] = alloca float, align 4 ; CHECK2-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 ; CHECK2-NEXT: store float [[F]], float* [[F_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: store float [[F]], float* [[F_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6065,15 +6065,15 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) @@ -6093,7 +6093,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6123,10 +6123,10 @@ entry: ; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6150,15 +6150,15 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 @@ -6182,7 +6182,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6209,10 +6209,10 @@ entry: ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6235,15 +6235,15 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -6267,7 +6267,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6296,13 +6296,13 @@ entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: ; CHECK2-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* ; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6327,15 +6327,15 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) @@ -6356,7 +6356,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6387,10 +6387,10 @@ entry: ; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6412,8 +6412,8 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: @@ -6462,11 +6462,11 @@ entry: ; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6489,15 +6489,15 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) @@ -6518,7 +6518,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6557,10 +6557,10 @@ entry: ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6582,11 +6582,11 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: @@ -6631,11 +6631,11 @@ entry: ; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6660,27 +6660,27 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 ; CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] ; CHECK2: omp_region.end4: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split.split.split: ; CHECK2-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -6701,7 +6701,7 @@ entry: ; CHECK2: omp.par.merged.split.split.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] ; CHECK2: omp_region.body5.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK2-NEXT: br label [[OMP_REGION_END4]] ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] @@ -6712,7 +6712,7 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6858,10 +6858,10 @@ entry: ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6885,8 +6885,8 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll index 8ecae7f5af9a..4a99013e01a9 100644 --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -32,7 +32,7 @@ define void @kernel() { ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true) ; CHECK-NEXT: call void @foo() #[[ATTR4:[0-9]+]] ; CHECK-NEXT: call void @bar() #[[ATTR4]] -; CHECK-NEXT: call void @unknown_no_openmp() +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR3:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true) ; CHECK-NEXT: ret void ; @@ -41,7 +41,7 @@ define void @kernel() { ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true) ; CHECK-DISABLED-NEXT: call void @foo() #[[ATTR4:[0-9]+]] ; CHECK-DISABLED-NEXT: call void @bar() #[[ATTR4]] -; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR3:[0-9]+]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true) ; CHECK-DISABLED-NEXT: ret void ; @@ -183,14 +183,14 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK: attributes #[[ATTR0]] = { nosync nounwind } ; CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } ; CHECK: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } +; CHECK: attributes #[[ATTR3]] = { "llvm.assume"="omp_no_openmp" } ; CHECK: attributes #[[ATTR4]] = { nounwind } ; CHECK: attributes #[[ATTR5]] = { nosync nounwind writeonly } ;. ; CHECK-DISABLED: attributes #[[ATTR0]] = { nosync nounwind } ; CHECK-DISABLED: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } ; CHECK-DISABLED: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } -; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } +; CHECK-DISABLED: attributes #[[ATTR3]] = { "llvm.assume"="omp_no_openmp" } ; CHECK-DISABLED: attributes #[[ATTR4]] = { nounwind } ; CHECK-DISABLED: attributes #[[ATTR5]] = { nosync nounwind writeonly } ;. diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll index 29e4d08e0c5f..5051bce98279 100644 --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -113,6 +113,7 @@ ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" +; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ; AMDGPU: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; AMDGPU: @[[X_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; AMDGPU: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef @@ -126,6 +127,7 @@ ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" +; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ; NVPTX: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; NVPTX: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; NVPTX: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef @@ -1437,7 +1439,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias ; AMDGPU: region.guarded.end: ; AMDGPU-NEXT: br label [[REGION_BARRIER]] ; AMDGPU: region.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[REGION_EXIT:%.*]] ; AMDGPU: region.exit: ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] @@ -1473,7 +1475,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias ; NVPTX: region.guarded.end: ; NVPTX-NEXT: br label [[REGION_BARRIER]] ; NVPTX: region.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[REGION_EXIT:%.*]] ; NVPTX: region.exit: ; NVPTX-NEXT: br label [[FOR_COND:%.*]] diff --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll index a0d43feecc3c..93065a263e67 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll @@ -25,6 +25,7 @@ target triple = "nvptx64" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata" +; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ;. define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_404433c2_main_l5 @@ -50,7 +51,7 @@ define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8 ; CHECK: region.guarded.end: ; CHECK-NEXT: br label [[REGION_BARRIER]] ; CHECK: region.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) ; CHECK-NEXT: br label [[REGION_EXIT:%.*]] ; CHECK: region.exit: ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x i8*], [0 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll index 5d6334e7fa2b..b2c4aca9d5e5 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll @@ -47,6 +47,7 @@ target triple = "nvptx64" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata" +; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ;. ; CHECK-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 @@ -81,7 +82,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK: region.guarded.end: ; CHECK-NEXT: br label [[REGION_BARRIER]] ; CHECK: region.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) ; CHECK-NEXT: br label [[REGION_EXIT:%.*]] ; CHECK: region.exit: ; CHECK-NEXT: br label [[FOR_COND_I:%.*]] @@ -105,7 +106,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK: region.guarded.end1: ; CHECK-NEXT: br label [[REGION_BARRIER2]] ; CHECK: region.barrier2: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) ; CHECK-NEXT: br label [[REGION_EXIT3]] ; CHECK: region.exit3: ; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 @@ -126,7 +127,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK: region.guarded.end6: ; CHECK-NEXT: br label [[REGION_BARRIER7]] ; CHECK: region.barrier7: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]]) ; CHECK-NEXT: br label [[REGION_EXIT8:%.*]] ; CHECK: region.exit8: ; CHECK-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8 @@ -143,7 +144,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK: region.guarded.end11: ; CHECK-NEXT: br label [[REGION_BARRIER12]] ; CHECK: region.barrier12: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) ; CHECK-NEXT: br label [[REGION_EXIT13:%.*]] ; CHECK: region.exit13: ; CHECK-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8 @@ -160,7 +161,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK: region.guarded.end16: ; CHECK-NEXT: br label [[REGION_BARRIER17]] ; CHECK: region.barrier17: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]]) ; CHECK-NEXT: br label [[REGION_EXIT18:%.*]] ; CHECK: region.exit18: ; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8 diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index fba36d42d185..f200ef57be57 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -303,8 +303,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancel) { EXPECT_EQ(Barrier->getNumUses(), 0U); EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 1U); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), - CBB); + EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); EXPECT_EQ(cast(Cancel)->getArgOperand(1), GTID); @@ -342,7 +341,6 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { BB = BB->getTerminator()->getSuccessor(0); EXPECT_EQ(BB->size(), 4U); - CallInst *GTID = dyn_cast(&BB->front()); EXPECT_NE(GTID, nullptr); EXPECT_EQ(GTID->arg_size(), 1U); @@ -360,7 +358,8 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { Instruction *CancelBBTI = Cancel->getParent()->getTerminator(); EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U); EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U); - EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(), NewIP.getBlock()); + EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(), + NewIP.getBlock()); EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); CallInst *GTID1 = dyn_cast(&CancelBBTI->getSuccessor(1)->front()); EXPECT_NE(GTID1, nullptr); @@ -377,8 +376,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { EXPECT_EQ(Barrier->getNumUses(), 0U); EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 1U); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), - CBB); + EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); EXPECT_EQ(cast(Cancel)->getArgOperand(1), GTID); @@ -502,8 +500,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) { Builder.CreateStore(F->arg_begin(), PrivAI); Builder.restoreIP(CodeGenIP); - Value *PrivLoad = Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, - "local.use"); + Value *PrivLoad = + Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); Instruction *ThenTerm, *ElseTerm; SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), @@ -809,8 +807,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { Builder.CreateStore(F->arg_begin(), PrivAI); Builder.restoreIP(CodeGenIP); - Value *PrivLoad = Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, - "local.use"); + Value *PrivLoad = + Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); Instruction *ThenTerm, *ElseTerm; SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), @@ -2011,8 +2009,8 @@ TEST_F(OpenMPIRBuilderTest, MasterDirective) { EntryBB = ThenBB->getUniquePredecessor(); // simple instructions for body - Value *PrivLoad = Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, - "local.use"); + Value *PrivLoad = + Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; @@ -2164,8 +2162,8 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) { // body begin Builder.restoreIP(CodeGenIP); Builder.CreateStore(F->arg_begin(), PrivAI); - Value *PrivLoad = Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, - "local.use"); + Value *PrivLoad = + Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; @@ -2538,32 +2536,33 @@ TEST_F(OpenMPIRBuilderTest, CopyinBlocks) { OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); - IntegerType* Int32 = Type::getInt32Ty(M->getContext()); - AllocaInst* MasterAddress = Builder.CreateAlloca(Int32->getPointerTo()); - AllocaInst* PrivAddress = Builder.CreateAlloca(Int32->getPointerTo()); + IntegerType *Int32 = Type::getInt32Ty(M->getContext()); + AllocaInst *MasterAddress = Builder.CreateAlloca(Int32->getPointerTo()); + AllocaInst *PrivAddress = Builder.CreateAlloca(Int32->getPointerTo()); BasicBlock *EntryBB = BB; OMPBuilder.createCopyinClauseBlocks(Builder.saveIP(), MasterAddress, PrivAddress, Int32, /*BranchtoEnd*/ true); - BranchInst* EntryBr = dyn_cast_or_null(EntryBB->getTerminator()); + BranchInst *EntryBr = dyn_cast_or_null(EntryBB->getTerminator()); EXPECT_NE(EntryBr, nullptr); EXPECT_TRUE(EntryBr->isConditional()); - BasicBlock* NotMasterBB = EntryBr->getSuccessor(0); - BasicBlock* CopyinEnd = EntryBr->getSuccessor(1); - CmpInst* CMP = dyn_cast_or_null(EntryBr->getCondition()); + BasicBlock *NotMasterBB = EntryBr->getSuccessor(0); + BasicBlock *CopyinEnd = EntryBr->getSuccessor(1); + CmpInst *CMP = dyn_cast_or_null(EntryBr->getCondition()); EXPECT_NE(CMP, nullptr); EXPECT_NE(NotMasterBB, nullptr); EXPECT_NE(CopyinEnd, nullptr); - BranchInst* NotMasterBr = dyn_cast_or_null(NotMasterBB->getTerminator()); + BranchInst *NotMasterBr = + dyn_cast_or_null(NotMasterBB->getTerminator()); EXPECT_NE(NotMasterBr, nullptr); EXPECT_FALSE(NotMasterBr->isConditional()); - EXPECT_EQ(CopyinEnd,NotMasterBr->getSuccessor(0)); + EXPECT_EQ(CopyinEnd, NotMasterBr->getSuccessor(0)); } TEST_F(OpenMPIRBuilderTest, SingleDirective) { @@ -2602,8 +2601,8 @@ TEST_F(OpenMPIRBuilderTest, SingleDirective) { EntryBB = ThenBB->getUniquePredecessor(); // simple instructions for body - Value *PrivLoad = Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, - "local.use"); + Value *PrivLoad = + Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; @@ -3106,8 +3105,9 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); - Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc); - Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr); + uint32_t StrSize; + Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); + Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); Value *TID = OMPBuilder.getOrCreateThreadID(Ident); Value *SumLocal = Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local"); @@ -3339,8 +3339,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); - Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc); - Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr); + uint32_t StrSize; + Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); + Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); Value *TID = OMPBuilder.getOrCreateThreadID(Ident); Value *SumLocal = Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local"); @@ -3359,8 +3360,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); - Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc); - Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr); + uint32_t StrSize; + Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); + Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); Value *TID = OMPBuilder.getOrCreateThreadID(Ident); Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial"); Value *Xor = Builder.CreateXor(XorPartial, TID, "xor"); @@ -3695,8 +3697,11 @@ TEST_F(OpenMPIRBuilderTest, CreateOffloadMapnames) { IRBuilder<> Builder(BB); - Constant *Cst1 = OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5); - Constant *Cst2 = OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5); + uint32_t StrSize; + Constant *Cst1 = + OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize); + Constant *Cst2 = + OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize); SmallVector Names = {Cst1, Cst2}; GlobalVariable *OffloadMaptypesGlobal = @@ -3798,11 +3803,15 @@ TEST_F(OpenMPIRBuilderTest, EmitMapperCall) { SmallVector Flags = {0, 2}; - Constant *SrcLocCst = OMPBuilder.getOrCreateSrcLocStr("", "file1", 2, 5); - Value *SrcLocInfo = OMPBuilder.getOrCreateIdent(SrcLocCst); + uint32_t StrSize; + Constant *SrcLocCst = + OMPBuilder.getOrCreateSrcLocStr("", "file1", 2, 5, StrSize); + Value *SrcLocInfo = OMPBuilder.getOrCreateIdent(SrcLocCst, StrSize); - Constant *Cst1 = OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5); - Constant *Cst2 = OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5); + Constant *Cst1 = + OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize); + Constant *Cst2 = + OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize); SmallVector Names = {Cst1, Cst2}; GlobalVariable *Maptypes = diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenACC/OpenACCToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenACC/OpenACCToLLVMIRTranslation.cpp index e4196aeca50a..bda505d93503 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenACC/OpenACCToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenACC/OpenACCToLLVMIRTranslation.cpp @@ -49,17 +49,18 @@ static constexpr int64_t kDefaultDevice = -1; /// Create a constant string location from the MLIR Location information. static llvm::Constant *createSourceLocStrFromLocation(Location loc, OpenACCIRBuilder &builder, - StringRef name) { + StringRef name, + uint32_t &strLen) { if (auto fileLoc = loc.dyn_cast()) { StringRef fileName = fileLoc.getFilename(); unsigned lineNo = fileLoc.getLine(); unsigned colNo = fileLoc.getColumn(); - return builder.getOrCreateSrcLocStr(name, fileName, lineNo, colNo); + return builder.getOrCreateSrcLocStr(name, fileName, lineNo, colNo, strLen); } std::string locStr; llvm::raw_string_ostream locOS(locStr); locOS << loc; - return builder.getOrCreateSrcLocStr(locOS.str()); + return builder.getOrCreateSrcLocStr(locOS.str(), strLen); } /// Create the location struct from the operation location information. @@ -68,20 +69,23 @@ static llvm::Value *createSourceLocationInfo(OpenACCIRBuilder &builder, auto loc = op->getLoc(); auto funcOp = op->getParentOfType(); StringRef funcName = funcOp ? funcOp.getName() : "unknown"; + uint32_t strLen; llvm::Constant *locStr = - createSourceLocStrFromLocation(loc, builder, funcName); - return builder.getOrCreateIdent(locStr); + createSourceLocStrFromLocation(loc, builder, funcName, strLen); + return builder.getOrCreateIdent(locStr, strLen); } /// Create a constant string representing the mapping information extracted from /// the MLIR location information. static llvm::Constant *createMappingInformation(Location loc, OpenACCIRBuilder &builder) { + uint32_t strLen; if (auto nameLoc = loc.dyn_cast()) { StringRef name = nameLoc.getName(); - return createSourceLocStrFromLocation(nameLoc.getChildLoc(), builder, name); + return createSourceLocStrFromLocation(nameLoc.getChildLoc(), builder, name, + strLen); } - return createSourceLocStrFromLocation(loc, builder, "unknown"); + return createSourceLocStrFromLocation(loc, builder, "unknown", strLen); } /// Return the runtime function used to lower the given operation. From 7e14e881c4111a4044c660978a6fee5cab8b95c4 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 29 Dec 2021 01:29:07 -0600 Subject: [PATCH 172/992] [OpenMP][OpenACC] Update test after encoding change in D113126 --- mlir/test/Target/LLVMIR/openacc-llvm.mlir | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/test/Target/LLVMIR/openacc-llvm.mlir b/mlir/test/Target/LLVMIR/openacc-llvm.mlir index 517897e78611..d0ebfe0e2c9c 100644 --- a/mlir/test/Target/LLVMIR/openacc-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openacc-llvm.mlir @@ -24,7 +24,7 @@ llvm.func @testenterdataop(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: // CHECK: %struct.ident_t = type { i32, i32, i32, i32, i8* } // CHECK: [[LOCSTR:@.*]] = private unnamed_addr constant [{{[0-9]*}} x i8] c";{{.*}};testenterdataop;{{[0-9]*}};{{[0-9]*}};;\00", align 1 -// CHECK: [[LOCGLOBAL:@.*]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]*}} x i8], [{{[0-9]*}} x i8]* [[LOCSTR]], i32 0, i32 0) }, align 8 +// CHECK: [[LOCGLOBAL:@.*]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 {{[0-9]*}}, i8* getelementptr inbounds ([{{[0-9]*}} x i8], [{{[0-9]*}} x i8]* [[LOCSTR]], i32 0, i32 0) }, align 8 // CHECK: [[MAPNAME1:@.*]] = private unnamed_addr constant [{{[0-9]*}} x i8] c";{{.*}};unknown;{{[0-9]*}};{{[0-9]*}};;\00", align 1 // CHECK: [[MAPNAME2:@.*]] = private unnamed_addr constant [{{[0-9]*}} x i8] c";{{.*}};unknown;{{[0-9]*}};{{[0-9]*}};;\00", align 1 // CHECK: [[MAPTYPES:@.*]] = private unnamed_addr constant [{{[0-9]*}} x i64] [i64 0, i64 1] @@ -84,7 +84,7 @@ llvm.func @testexitdataop(%arg0: !llvm.struct<(ptr, ptr, i64, array<1 // CHECK: %struct.ident_t = type { i32, i32, i32, i32, i8* } // CHECK: [[LOCSTR:@.*]] = private unnamed_addr constant [{{[0-9]*}} x i8] c";{{.*}};testexitdataop;{{[0-9]*}};{{[0-9]*}};;\00", align 1 -// CHECK: [[LOCGLOBAL:@.*]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]*}} x i8], [{{[0-9]*}} x i8]* [[LOCSTR]], i32 0, i32 0) }, align 8 +// CHECK: [[LOCGLOBAL:@.*]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 {{[0-9]*}}, i8* getelementptr inbounds ([{{[0-9]*}} x i8], [{{[0-9]*}} x i8]* [[LOCSTR]], i32 0, i32 0) }, align 8 // CHECK: [[MAPNAME1:@.*]] = private unnamed_addr constant [{{[0-9]*}} x i8] c";{{.*}};unknown;{{[0-9]*}};{{[0-9]*}};;\00", align 1 // CHECK: [[MAPNAME2:@.*]] = private unnamed_addr constant [{{[0-9]*}} x i8] c";{{.*}};unknown;{{[0-9]*}};{{[0-9]*}};;\00", align 1 // CHECK: [[MAPTYPES:@.*]] = private unnamed_addr constant [{{[0-9]*}} x i64] [i64 8, i64 2] @@ -143,7 +143,7 @@ llvm.func @testupdateop(%arg0: !llvm.struct<(ptr, ptr, i64, array<1 x // CHECK: %struct.ident_t = type { i32, i32, i32, i32, i8* } // CHECK: [[LOCSTR:@.*]] = private unnamed_addr constant [{{[0-9]*}} x i8] c";{{.*}};testupdateop;{{[0-9]*}};{{[0-9]*}};;\00", align 1 -// CHECK: [[LOCGLOBAL:@.*]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]*}} x i8], [{{[0-9]*}} x i8]* [[LOCSTR]], i32 0, i32 0) }, align 8 +// CHECK: [[LOCGLOBAL:@.*]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 {{[0-9]*}}, i8* getelementptr inbounds ([{{[0-9]*}} x i8], [{{[0-9]*}} x i8]* [[LOCSTR]], i32 0, i32 0) }, align 8 // CHECK: [[MAPNAME1:@.*]] = private unnamed_addr constant [{{[0-9]*}} x i8] c";{{.*}};unknown;{{[0-9]*}};{{[0-9]*}};;\00", align 1 // CHECK: [[MAPNAME2:@.*]] = private unnamed_addr constant [{{[0-9]*}} x i8] c";{{.*}};unknown;{{[0-9]*}};{{[0-9]*}};;\00", align 1 // CHECK: [[MAPTYPES:@.*]] = private unnamed_addr constant [{{[0-9]*}} x i64] [i64 2, i64 1] @@ -205,7 +205,7 @@ llvm.func @testdataop(%arg0: !llvm.struct<(ptr, ptr, i64, array<1 x i6 // CHECK: %struct.ident_t = type { i32, i32, i32, i32, i8* } // CHECK: [[LOCSTR:@.*]] = private unnamed_addr constant [{{[0-9]*}} x i8] c";{{.*}};testdataop;{{[0-9]*}};{{[0-9]*}};;\00", align 1 -// CHECK: [[LOCGLOBAL:@.*]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]*}} x i8], [{{[0-9]*}} x i8]* [[LOCSTR]], i32 0, i32 0) }, align 8 +// CHECK: [[LOCGLOBAL:@.*]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 {{[0-9]*}}, i8* getelementptr inbounds ([{{[0-9]*}} x i8], [{{[0-9]*}} x i8]* [[LOCSTR]], i32 0, i32 0) }, align 8 // CHECK: [[MAPNAME1:@.*]] = private unnamed_addr constant [{{[0-9]*}} x i8] c";{{.*}};unknown;{{[0-9]*}};{{[0-9]*}};;\00", align 1 // CHECK: [[MAPNAME2:@.*]] = private unnamed_addr constant [{{[0-9]*}} x i8] c";{{.*}};unknown;{{[0-9]*}};{{[0-9]*}};;\00", align 1 // CHECK: [[MAPTYPES:@.*]] = private unnamed_addr constant [{{[0-9]*}} x i64] [i64 8195, i64 8194] From 8414321becdb35f2a9c5118bd4840bb8c8272f01 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 28 Dec 2021 23:47:25 -0800 Subject: [PATCH 173/992] [Hexagon] Use range-based for loops (NFC) --- llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp | 127 ++++++++----------- 1 file changed, 53 insertions(+), 74 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp index a53efeb96961..fc5e05d8c9a0 100644 --- a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp @@ -290,13 +290,11 @@ namespace { raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M) LLVM_ATTRIBUTE_UNUSED; raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M){ - using const_iterator = NodeToUsesMap::const_iterator; - - for (const_iterator I = M.begin(), E = M.end(); I != E; ++I) { - const UseSet &Us = I->second; - OS << I->first << " -> #" << Us.size() << '{'; - for (UseSet::const_iterator J = Us.begin(), F = Us.end(); J != F; ++J) { - User *R = (*J)->getUser(); + for (const auto &I : M) { + const UseSet &Us = I.second; + OS << I.first << " -> #" << Us.size() << '{'; + for (const Use *U : Us) { + User *R = U->getUser(); if (R->hasName()) OS << ' ' << R->getName(); else @@ -420,15 +418,12 @@ void HexagonCommonGEP::collect() { // instruction that uses another GEP instruction as the base pointer, the // gep node for the base pointer should already exist. ValueToNodeMap NM; - for (ValueVect::iterator I = BO.begin(), E = BO.end(); I != E; ++I) { - BasicBlock *B = cast(*I); - for (BasicBlock::iterator J = B->begin(), F = B->end(); J != F; ++J) { - if (!isa(J)) - continue; - GetElementPtrInst *GepI = cast(J); - if (isHandledGepForm(GepI)) - processGepInst(GepI, NM); - } + for (Value *I : BO) { + BasicBlock *B = cast(I); + for (Instruction &J : *B) + if (auto *GepI = dyn_cast(&J)) + if (isHandledGepForm(GepI)) + processGepInst(GepI, NM); } LLVM_DEBUG(dbgs() << "Gep nodes after initial collection:\n" << Nodes); @@ -436,17 +431,14 @@ void HexagonCommonGEP::collect() { static void invert_find_roots(const NodeVect &Nodes, NodeChildrenMap &NCM, NodeVect &Roots) { - using const_iterator = NodeVect::const_iterator; - - for (const_iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { - GepNode *N = *I; - if (N->Flags & GepNode::Root) { - Roots.push_back(N); - continue; - } - GepNode *PN = N->Parent; - NCM[PN].push_back(N); + for (GepNode *N : Nodes) { + if (N->Flags & GepNode::Root) { + Roots.push_back(N); + continue; } + GepNode *PN = N->Parent; + NCM[PN].push_back(N); + } } static void nodes_for_root(GepNode *Root, NodeChildrenMap &NCM, @@ -546,8 +538,7 @@ void HexagonCommonGEP::common() { using NodeSetMap = std::map; NodeSetMap MaybeEq; - for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { - GepNode *N = *I; + for (GepNode *N : Nodes) { unsigned H = node_hash(N); MaybeEq[H].insert(N); } @@ -556,9 +547,8 @@ void HexagonCommonGEP::common() { // one for equality and the other for non-equality. NodeSymRel EqRel; // Equality relation (as set of equivalence classes). NodePairSet Eq, Ne; // Caches. - for (NodeSetMap::iterator I = MaybeEq.begin(), E = MaybeEq.end(); - I != E; ++I) { - NodeSet &S = I->second; + for (auto &I : MaybeEq) { + NodeSet &S = I.second; for (NodeSet::iterator NI = S.begin(), NE = S.end(); NI != NE; ++NI) { GepNode *N = *NI; // If node already has a class, then the class must have been created @@ -612,8 +602,7 @@ void HexagonCommonGEP::common() { // Update the min element's flags, and user list. uint32_t Flags = 0; UseSet &MinUs = Uses[Min]; - for (NodeSet::iterator J = S.begin(), F = S.end(); J != F; ++J) { - GepNode *N = *J; + for (GepNode *N : S) { uint32_t NF = N->Flags; // If N is used, append all original values of N to the list of // original values of Min. @@ -633,8 +622,7 @@ void HexagonCommonGEP::common() { // selected (minimum) node from the corresponding equivalence class. // If a given parent does not have an equivalence class, leave it // unchanged (it means that it's the only element in its class). - for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { - GepNode *N = *I; + for (GepNode *N : Nodes) { if (N->Flags & GepNode::Root) continue; const NodeSet *PC = node_class(N->Parent, EqRel); @@ -652,8 +640,7 @@ void HexagonCommonGEP::common() { // Finally, erase the nodes that are no longer used. NodeSet Erase; - for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { - GepNode *N = *I; + for (GepNode *N : Nodes) { const NodeSet *PC = node_class(N, EqRel); if (!PC) continue; @@ -663,7 +650,7 @@ void HexagonCommonGEP::common() { if (N == F->second) continue; // Node for removal. - Erase.insert(*I); + Erase.insert(N); } erase_if(Nodes, in_set(Erase)); @@ -775,8 +762,7 @@ BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node, NodeToUsesMap::iterator UF = Uses.find(Node); assert(UF != Uses.end() && "Used node with no use information"); UseSet &Us = UF->second; - for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) { - Use *U = *I; + for (Use *U : Us) { User *R = U->getUser(); if (!isa(R)) continue; @@ -790,8 +776,7 @@ BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node, NodeChildrenMap::iterator CF = NCM.find(Node); if (CF != NCM.end()) { NodeVect &Cs = CF->second; - for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) { - GepNode *CN = *I; + for (GepNode *CN : Cs) { NodeToValueMap::iterator LF = Loc.find(CN); // If the child is only used in GEP instructions (i.e. is not used in // non-GEP instructions), the nearest dominator computed for it may @@ -831,8 +816,8 @@ BasicBlock *HexagonCommonGEP::recalculatePlacementRec(GepNode *Node, NodeChildrenMap::iterator CF = NCM.find(Node); if (CF != NCM.end()) { NodeVect &Cs = CF->second; - for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) - recalculatePlacementRec(*I, NCM, Loc); + for (GepNode *C : Cs) + recalculatePlacementRec(C, NCM, Loc); } BasicBlock *LB = recalculatePlacement(Node, NCM, Loc); LLVM_DEBUG(dbgs() << "LocRec end for node:" << Node << '\n'); @@ -921,8 +906,8 @@ BasicBlock *HexagonCommonGEP::adjustForInvariance(GepNode *Node, NodeChildrenMap::iterator CF = NCM.find(Node); if (CF != NCM.end()) { NodeVect &Cs = CF->second; - for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) - adjustForInvariance(*I, NCM, Loc); + for (GepNode *C : Cs) + adjustForInvariance(C, NCM, Loc); } return LocB; } @@ -938,10 +923,9 @@ namespace { raw_ostream &operator<< (raw_ostream &OS, const LocationAsBlock &Loc) LLVM_ATTRIBUTE_UNUSED ; raw_ostream &operator<< (raw_ostream &OS, const LocationAsBlock &Loc) { - for (NodeToValueMap::const_iterator I = Loc.Map.begin(), E = Loc.Map.end(); - I != E; ++I) { - OS << I->first << " -> "; - if (BasicBlock *B = cast_or_null(I->second)) + for (const auto &I : Loc.Map) { + OS << I.first << " -> "; + if (BasicBlock *B = cast_or_null(I.second)) OS << B->getName() << '(' << B << ')'; else OS << ""; @@ -1016,8 +1000,7 @@ void HexagonCommonGEP::separateConstantChains(GepNode *Node, // Collect all used nodes together with the uses from loads and stores, // where the GEP node could be folded into the load/store instruction. NodeToUsesMap FNs; // Foldable nodes. - for (NodeSet::iterator I = Ns.begin(), E = Ns.end(); I != E; ++I) { - GepNode *N = *I; + for (GepNode *N : Ns) { if (!(N->Flags & GepNode::Used)) continue; NodeToUsesMap::iterator UF = Uses.find(N); @@ -1025,8 +1008,7 @@ void HexagonCommonGEP::separateConstantChains(GepNode *Node, UseSet &Us = UF->second; // Loads/stores that use the node N. UseSet LSs; - for (UseSet::iterator J = Us.begin(), F = Us.end(); J != F; ++J) { - Use *U = *J; + for (Use *U : Us) { User *R = U->getUser(); // We're interested in uses that provide the address. It can happen // that the value may also be provided via GEP, but we won't handle @@ -1051,11 +1033,11 @@ void HexagonCommonGEP::separateConstantChains(GepNode *Node, LLVM_DEBUG(dbgs() << "Nodes with foldable users:\n" << FNs); - for (NodeToUsesMap::iterator I = FNs.begin(), E = FNs.end(); I != E; ++I) { - GepNode *N = I->first; - UseSet &Us = I->second; - for (UseSet::iterator J = Us.begin(), F = Us.end(); J != F; ++J) - separateChainForNode(N, *J, Loc); + for (auto &FN : FNs) { + GepNode *N = FN.first; + UseSet &Us = FN.second; + for (Use *U : Us) + separateChainForNode(N, U, Loc); } } @@ -1068,21 +1050,21 @@ void HexagonCommonGEP::computeNodePlacement(NodeToValueMap &Loc) { // Compute the initial placement determined by the users' locations, and // the locations of the child nodes. - for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I) - recalculatePlacementRec(*I, NCM, Loc); + for (GepNode *Root : Roots) + recalculatePlacementRec(Root, NCM, Loc); LLVM_DEBUG(dbgs() << "Initial node placement:\n" << LocationAsBlock(Loc)); if (OptEnableInv) { - for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I) - adjustForInvariance(*I, NCM, Loc); + for (GepNode *Root : Roots) + adjustForInvariance(Root, NCM, Loc); LLVM_DEBUG(dbgs() << "Node placement after adjustment for invariance:\n" << LocationAsBlock(Loc)); } if (OptEnableConst) { - for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I) - separateConstantChains(*I, NCM, Loc); + for (GepNode *Root : Roots) + separateConstantChains(Root, NCM, Loc); } LLVM_DEBUG(dbgs() << "Node use information:\n" << Uses); @@ -1153,8 +1135,8 @@ void HexagonCommonGEP::getAllUsersForNode(GepNode *Node, ValueVect &Values, NodeToUsesMap::iterator UF = Uses.find(N); assert(UF != Uses.end() && "No use information for used node"); UseSet &Us = UF->second; - for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) - Values.push_back((*I)->getUser()); + for (const auto &U : Us) + Values.push_back(U->getUser()); } NodeChildrenMap::iterator CF = NCM.find(N); if (CF != NCM.end()) { @@ -1223,8 +1205,7 @@ void HexagonCommonGEP::materialize(NodeToValueMap &Loc) { // to the Roots list. if (LastCN > 0) { NodeVect &Cs = NCM[Last]; - for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) { - GepNode *CN = *I; + for (GepNode *CN : Cs) { CN->Flags &= ~GepNode::Internal; CN->Flags |= GepNode::Root; CN->BaseVal = NewInst; @@ -1238,10 +1219,8 @@ void HexagonCommonGEP::materialize(NodeToValueMap &Loc) { NodeToUsesMap::iterator UF = Uses.find(Last); assert(UF != Uses.end() && "No use information found"); UseSet &Us = UF->second; - for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) { - Use *U = *I; + for (Use *U : Us) U->set(NewInst); - } } } } @@ -1261,8 +1240,8 @@ void HexagonCommonGEP::removeDeadCode() { ValueVect Ins; for (Instruction &I : llvm::reverse(*B)) Ins.push_back(&I); - for (ValueVect::iterator I = Ins.begin(), E = Ins.end(); I != E; ++I) { - Instruction *In = cast(*I); + for (Value *I : Ins) { + Instruction *In = cast(I); if (isInstructionTriviallyDead(In)) In->eraseFromParent(); } From b4682816bc6e35fb4e207227f73b88b2603d4363 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 29 Dec 2021 00:16:40 -0800 Subject: [PATCH 174/992] [clang] Fix header guards (NFC) Identified with llvm-header-guard. --- clang/include/clang/AST/AbstractBasicReader.h | 4 ++-- clang/include/clang/AST/AbstractBasicWriter.h | 4 ++-- clang/include/clang/AST/AbstractTypeReader.h | 4 ++-- clang/include/clang/AST/AbstractTypeWriter.h | 4 ++-- clang/include/clang/AST/ComputeDependence.h | 4 ++-- clang/include/clang/AST/CurrentSourceLocExprScope.h | 6 +++--- clang/include/clang/AST/DeclObjCCommon.h | 6 +++--- clang/include/clang/AST/FormatString.h | 4 ++-- .../include/clang/AST/LexicallyOrderedRecursiveASTVisitor.h | 6 +++--- clang/include/clang/AST/LocInfoType.h | 6 +++--- clang/include/clang/AST/NonTrivialTypeVisitor.h | 4 ++-- clang/include/clang/AST/OSLog.h | 4 ++-- clang/include/clang/AST/QualTypeNames.h | 2 +- clang/include/clang/ASTMatchers/Dynamic/Diagnostics.h | 2 +- clang/include/clang/ASTMatchers/Dynamic/Parser.h | 2 +- clang/include/clang/ASTMatchers/Dynamic/Registry.h | 2 +- clang/include/clang/ASTMatchers/Dynamic/VariantValue.h | 2 +- clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h | 2 +- clang/include/clang/Analysis/Analyses/ThreadSafetyUtil.h | 2 +- clang/include/clang/Analysis/AnyCall.h | 6 +++--- clang/include/clang/Analysis/BodyFarm.h | 4 ++-- clang/include/clang/Analysis/CloneDetection.h | 6 +++--- .../include/clang/Analysis/FlowSensitive/DataflowWorklist.h | 2 +- clang/include/clang/Analysis/IssueHash.h | 4 ++-- clang/include/clang/Analysis/PathDiagnostic.h | 6 +++--- clang/include/clang/Analysis/RetainSummaryManager.h | 4 ++-- clang/include/clang/Analysis/SelectorExtras.h | 4 ++-- clang/include/clang/Basic/AlignedAllocation.h | 6 +++--- clang/include/clang/Basic/AttrSubjectMatchRules.h | 4 ++-- clang/include/clang/Basic/DarwinSDKInfo.h | 6 +++--- clang/include/clang/Basic/DiagnosticError.h | 6 +++--- clang/include/clang/Basic/OperatorPrecedence.h | 2 +- clang/include/clang/Basic/PragmaKinds.h | 4 ++-- clang/include/clang/Basic/ProfileList.h | 4 ++-- clang/include/clang/Basic/TargetID.h | 4 ++-- .../clang/CodeGen/ObjectFilePCHContainerOperations.h | 4 ++-- clang/include/clang/Frontend/PCHContainerOperations.h | 4 ++-- clang/include/clang/Frontend/PrecompiledPreamble.h | 4 ++-- clang/include/clang/Frontend/SerializedDiagnostics.h | 4 ++-- .../clang/IndexSerialization/SerializablePathCollection.h | 6 +++--- .../include/clang/Lex/DependencyDirectivesSourceMinimizer.h | 6 +++--- .../PreprocessorExcludedConditionalDirectiveSkipMapping.h | 6 +++--- clang/include/clang/Parse/RAIIObjectsForParser.h | 4 ++-- clang/include/clang/Sema/CleanupInfo.h | 4 ++-- clang/include/clang/Sema/ParsedAttr.h | 6 +++--- clang/include/clang/Sema/SemaConcept.h | 2 +- clang/include/clang/Sema/TemplateInstCallback.h | 4 ++-- clang/include/clang/Serialization/ModuleFileExtension.h | 2 +- .../StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h | 4 ++-- .../clang/StaticAnalyzer/Checkers/MPIFunctionClassifier.h | 4 ++-- .../Core/PathSensitive/RangedConstraintManager.h | 4 ++-- clang/include/clang/StaticAnalyzer/Frontend/ModelConsumer.h | 4 ++-- clang/include/clang/Tooling/CommonOptionsParser.h | 2 +- .../DependencyScanning/DependencyScanningFilesystem.h | 6 +++--- .../Tooling/DependencyScanning/DependencyScanningService.h | 6 +++--- .../Tooling/DependencyScanning/DependencyScanningTool.h | 6 +++--- .../Tooling/DependencyScanning/DependencyScanningWorker.h | 6 +++--- .../clang/Tooling/DependencyScanning/ModuleDepCollector.h | 6 +++--- clang/include/clang/Tooling/FixIt.h | 2 +- clang/include/clang/Tooling/Refactoring/ASTSelection.h | 6 +++--- clang/include/clang/Tooling/Refactoring/AtomicChange.h | 6 +++--- clang/include/clang/Tooling/Refactoring/Extract/Extract.h | 6 +++--- .../clang/Tooling/Refactoring/Extract/SourceExtraction.h | 6 +++--- clang/include/clang/Tooling/Refactoring/Lookup.h | 6 +++--- .../clang/Tooling/Refactoring/RecursiveSymbolVisitor.h | 6 +++--- clang/include/clang/Tooling/Refactoring/RefactoringAction.h | 6 +++--- .../clang/Tooling/Refactoring/RefactoringActionRule.h | 6 +++--- .../Tooling/Refactoring/RefactoringActionRuleRequirements.h | 6 +++--- .../clang/Tooling/Refactoring/RefactoringActionRules.h | 6 +++--- .../Tooling/Refactoring/RefactoringActionRulesInternal.h | 6 +++--- clang/include/clang/Tooling/Refactoring/RefactoringOption.h | 6 +++--- .../clang/Tooling/Refactoring/RefactoringOptionVisitor.h | 6 +++--- .../include/clang/Tooling/Refactoring/RefactoringOptions.h | 6 +++--- .../clang/Tooling/Refactoring/RefactoringResultConsumer.h | 6 +++--- .../clang/Tooling/Refactoring/RefactoringRuleContext.h | 6 +++--- .../clang/Tooling/Refactoring/Rename/RenamingAction.h | 6 +++--- clang/include/clang/Tooling/Refactoring/Rename/SymbolName.h | 6 +++--- .../clang/Tooling/Refactoring/Rename/SymbolOccurrences.h | 6 +++--- clang/include/clang/Tooling/Refactoring/Rename/USRFinder.h | 6 +++--- .../clang/Tooling/Refactoring/Rename/USRFindingAction.h | 6 +++--- .../include/clang/Tooling/Refactoring/Rename/USRLocFinder.h | 6 +++--- clang/include/clang/Tooling/Syntax/BuildTree.h | 4 ++-- clang/include/clang/Tooling/Syntax/Tree.h | 4 ++-- clang/include/clang/Tooling/Transformer/MatchConsumer.h | 6 +++--- clang/include/clang/Tooling/Transformer/Parsing.h | 6 +++--- clang/include/clang/Tooling/Transformer/RangeSelector.h | 6 +++--- clang/include/clang/Tooling/Transformer/RewriteRule.h | 6 +++--- clang/include/clang/Tooling/Transformer/SourceCode.h | 6 +++--- .../include/clang/Tooling/Transformer/SourceCodeBuilders.h | 6 +++--- 89 files changed, 212 insertions(+), 212 deletions(-) diff --git a/clang/include/clang/AST/AbstractBasicReader.h b/clang/include/clang/AST/AbstractBasicReader.h index 442039044cfe..b2fc2d2c7e4b 100644 --- a/clang/include/clang/AST/AbstractBasicReader.h +++ b/clang/include/clang/AST/AbstractBasicReader.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CLANG_AST_ABSTRACTBASICREADER_H -#define CLANG_AST_ABSTRACTBASICREADER_H +#ifndef LLVM_CLANG_AST_ABSTRACTBASICREADER_H +#define LLVM_CLANG_AST_ABSTRACTBASICREADER_H #include "clang/AST/DeclTemplate.h" diff --git a/clang/include/clang/AST/AbstractBasicWriter.h b/clang/include/clang/AST/AbstractBasicWriter.h index 75aef734ba9b..41772ba0f63c 100644 --- a/clang/include/clang/AST/AbstractBasicWriter.h +++ b/clang/include/clang/AST/AbstractBasicWriter.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CLANG_AST_ABSTRACTBASICWRITER_H -#define CLANG_AST_ABSTRACTBASICWRITER_H +#ifndef LLVM_CLANG_AST_ABSTRACTBASICWRITER_H +#define LLVM_CLANG_AST_ABSTRACTBASICWRITER_H #include "clang/AST/ASTContext.h" #include "clang/AST/DeclTemplate.h" diff --git a/clang/include/clang/AST/AbstractTypeReader.h b/clang/include/clang/AST/AbstractTypeReader.h index 9fea7b26f678..c9162b1779bc 100644 --- a/clang/include/clang/AST/AbstractTypeReader.h +++ b/clang/include/clang/AST/AbstractTypeReader.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CLANG_AST_ABSTRACTTYPEREADER_H -#define CLANG_AST_ABSTRACTTYPEREADER_H +#ifndef LLVM_CLANG_AST_ABSTRACTTYPEREADER_H +#define LLVM_CLANG_AST_ABSTRACTTYPEREADER_H #include "clang/AST/Type.h" #include "clang/AST/AbstractBasicReader.h" diff --git a/clang/include/clang/AST/AbstractTypeWriter.h b/clang/include/clang/AST/AbstractTypeWriter.h index a63cb0be099d..62006ef0f26e 100644 --- a/clang/include/clang/AST/AbstractTypeWriter.h +++ b/clang/include/clang/AST/AbstractTypeWriter.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CLANG_AST_ABSTRACTTYPEWRITER_H -#define CLANG_AST_ABSTRACTTYPEWRITER_H +#ifndef LLVM_CLANG_AST_ABSTRACTTYPEWRITER_H +#define LLVM_CLANG_AST_ABSTRACTTYPEWRITER_H #include "clang/AST/Type.h" #include "clang/AST/AbstractBasicWriter.h" diff --git a/clang/include/clang/AST/ComputeDependence.h b/clang/include/clang/AST/ComputeDependence.h index 8db09e6b57d0..cb545aff51f8 100644 --- a/clang/include/clang/AST/ComputeDependence.h +++ b/clang/include/clang/AST/ComputeDependence.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_AST_COMPUTE_DEPENDENCE_H -#define LLVM_CLANG_AST_COMPUTE_DEPENDENCE_H +#ifndef LLVM_CLANG_AST_COMPUTEDEPENDENCE_H +#define LLVM_CLANG_AST_COMPUTEDEPENDENCE_H #include "clang/AST/DependenceFlags.h" #include "clang/Basic/ExceptionSpecificationType.h" diff --git a/clang/include/clang/AST/CurrentSourceLocExprScope.h b/clang/include/clang/AST/CurrentSourceLocExprScope.h index 34df8ce1309e..4f8343efad16 100644 --- a/clang/include/clang/AST/CurrentSourceLocExprScope.h +++ b/clang/include/clang/AST/CurrentSourceLocExprScope.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_AST_CURRENT_SOURCE_LOC_EXPR_SCOPE_H -#define LLVM_CLANG_AST_CURRENT_SOURCE_LOC_EXPR_SCOPE_H +#ifndef LLVM_CLANG_AST_CURRENTSOURCELOCEXPRSCOPE_H +#define LLVM_CLANG_AST_CURRENTSOURCELOCEXPRSCOPE_H #include @@ -71,4 +71,4 @@ class CurrentSourceLocExprScope::SourceLocExprScopeGuard { } // end namespace clang -#endif // LLVM_CLANG_AST_CURRENT_SOURCE_LOC_EXPR_SCOPE_H +#endif // LLVM_CLANG_AST_CURRENTSOURCELOCEXPRSCOPE_H diff --git a/clang/include/clang/AST/DeclObjCCommon.h b/clang/include/clang/AST/DeclObjCCommon.h index 5f03bce6e9a8..42c97204a613 100644 --- a/clang/include/clang/AST/DeclObjCCommon.h +++ b/clang/include/clang/AST/DeclObjCCommon.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_AST_DECLOBJC_COMMON_H -#define LLVM_CLANG_AST_DECLOBJC_COMMON_H +#ifndef LLVM_CLANG_AST_DECLOBJCCOMMON_H +#define LLVM_CLANG_AST_DECLOBJCCOMMON_H namespace clang { @@ -52,4 +52,4 @@ enum { } // namespace clang -#endif // LLVM_CLANG_AST_DECLOBJC_COMMON_H +#endif // LLVM_CLANG_AST_DECLOBJCCOMMON_H diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h index 8c944451f796..a1271f920ae6 100644 --- a/clang/include/clang/AST/FormatString.h +++ b/clang/include/clang/AST/FormatString.h @@ -15,8 +15,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H +#ifndef LLVM_CLANG_AST_FORMATSTRING_H +#define LLVM_CLANG_AST_FORMATSTRING_H #include "clang/AST/CanonicalType.h" diff --git a/clang/include/clang/AST/LexicallyOrderedRecursiveASTVisitor.h b/clang/include/clang/AST/LexicallyOrderedRecursiveASTVisitor.h index e42f0449f6db..054220b8a32c 100644 --- a/clang/include/clang/AST/LexicallyOrderedRecursiveASTVisitor.h +++ b/clang/include/clang/AST/LexicallyOrderedRecursiveASTVisitor.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_AST_LEXICALLY_ORDERED_RECURSIVEASTVISITOR_H -#define LLVM_CLANG_AST_LEXICALLY_ORDERED_RECURSIVEASTVISITOR_H +#ifndef LLVM_CLANG_AST_LEXICALLYORDEREDRECURSIVEASTVISITOR_H +#define LLVM_CLANG_AST_LEXICALLYORDEREDRECURSIVEASTVISITOR_H #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Basic/LLVM.h" @@ -160,4 +160,4 @@ class LexicallyOrderedRecursiveASTVisitor } // end namespace clang -#endif // LLVM_CLANG_AST_LEXICALLY_ORDERED_RECURSIVEASTVISITOR_H +#endif // LLVM_CLANG_AST_LEXICALLYORDEREDRECURSIVEASTVISITOR_H diff --git a/clang/include/clang/AST/LocInfoType.h b/clang/include/clang/AST/LocInfoType.h index 7e845ad03587..876c7deeceb9 100644 --- a/clang/include/clang/AST/LocInfoType.h +++ b/clang/include/clang/AST/LocInfoType.h @@ -10,8 +10,8 @@ // source-location information. // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SEMA_LOCINFOTYPE_H -#define LLVM_CLANG_SEMA_LOCINFOTYPE_H +#ifndef LLVM_CLANG_AST_LOCINFOTYPE_H +#define LLVM_CLANG_AST_LOCINFOTYPE_H #include "clang/AST/Type.h" @@ -54,4 +54,4 @@ class LocInfoType : public Type { } // end namespace clang -#endif // LLVM_CLANG_SEMA_LOCINFOTYPE_H +#endif // LLVM_CLANG_AST_LOCINFOTYPE_H diff --git a/clang/include/clang/AST/NonTrivialTypeVisitor.h b/clang/include/clang/AST/NonTrivialTypeVisitor.h index c95516538ad1..cf320c8a478a 100644 --- a/clang/include/clang/AST/NonTrivialTypeVisitor.h +++ b/clang/include/clang/AST/NonTrivialTypeVisitor.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_NON_TRIVIAL_TYPE_VISITOR_H -#define LLVM_CLANG_NON_TRIVIAL_TYPE_VISITOR_H +#ifndef LLVM_CLANG_AST_NONTRIVIALTYPEVISITOR_H +#define LLVM_CLANG_AST_NONTRIVIALTYPEVISITOR_H #include "clang/AST/Type.h" diff --git a/clang/include/clang/AST/OSLog.h b/clang/include/clang/AST/OSLog.h index c24e79ce6da0..3772597e2616 100644 --- a/clang/include/clang/AST/OSLog.h +++ b/clang/include/clang/AST/OSLog.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_OSLOG_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_OSLOG_H +#ifndef LLVM_CLANG_AST_OSLOG_H +#define LLVM_CLANG_AST_OSLOG_H #include "clang/AST/ASTContext.h" #include "clang/AST/Expr.h" diff --git a/clang/include/clang/AST/QualTypeNames.h b/clang/include/clang/AST/QualTypeNames.h index 8313e0441be5..daa86cda2d99 100644 --- a/clang/include/clang/AST/QualTypeNames.h +++ b/clang/include/clang/AST/QualTypeNames.h @@ -89,4 +89,4 @@ QualType getFullyQualifiedType(QualType QT, const ASTContext &Ctx, bool WithGlobalNsPrefix = false); } // end namespace TypeName } // end namespace clang -#endif // LLVM_CLANG_TOOLING_CORE_QUALTYPENAMES_H +#endif // LLVM_CLANG_AST_QUALTYPENAMES_H diff --git a/clang/include/clang/ASTMatchers/Dynamic/Diagnostics.h b/clang/include/clang/ASTMatchers/Dynamic/Diagnostics.h index 10625311c1a5..af1affc16dbc 100644 --- a/clang/include/clang/ASTMatchers/Dynamic/Diagnostics.h +++ b/clang/include/clang/ASTMatchers/Dynamic/Diagnostics.h @@ -186,4 +186,4 @@ class Diagnostics { } // namespace ast_matchers } // namespace clang -#endif // LLVM_CLANG_AST_MATCHERS_DYNAMIC_DIAGNOSTICS_H +#endif // LLVM_CLANG_ASTMATCHERS_DYNAMIC_DIAGNOSTICS_H diff --git a/clang/include/clang/ASTMatchers/Dynamic/Parser.h b/clang/include/clang/ASTMatchers/Dynamic/Parser.h index af370d83782a..26e321c98ff6 100644 --- a/clang/include/clang/ASTMatchers/Dynamic/Parser.h +++ b/clang/include/clang/ASTMatchers/Dynamic/Parser.h @@ -280,4 +280,4 @@ class Parser { } // namespace ast_matchers } // namespace clang -#endif // LLVM_CLANG_AST_MATCHERS_DYNAMIC_PARSER_H +#endif // LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H diff --git a/clang/include/clang/ASTMatchers/Dynamic/Registry.h b/clang/include/clang/ASTMatchers/Dynamic/Registry.h index f91f5fe01c4e..ee47469c6e18 100644 --- a/clang/include/clang/ASTMatchers/Dynamic/Registry.h +++ b/clang/include/clang/ASTMatchers/Dynamic/Registry.h @@ -157,4 +157,4 @@ class Registry { } // namespace ast_matchers } // namespace clang -#endif // LLVM_CLANG_AST_MATCHERS_DYNAMIC_REGISTRY_H +#endif // LLVM_CLANG_ASTMATCHERS_DYNAMIC_REGISTRY_H diff --git a/clang/include/clang/ASTMatchers/Dynamic/VariantValue.h b/clang/include/clang/ASTMatchers/Dynamic/VariantValue.h index 5b3f8a7ca5eb..e1c19eb835ba 100644 --- a/clang/include/clang/ASTMatchers/Dynamic/VariantValue.h +++ b/clang/include/clang/ASTMatchers/Dynamic/VariantValue.h @@ -356,4 +356,4 @@ class VariantValue { } // end namespace ast_matchers } // end namespace clang -#endif // LLVM_CLANG_AST_MATCHERS_DYNAMIC_VARIANT_VALUE_H +#endif // LLVM_CLANG_ASTMATCHERS_DYNAMIC_VARIANTVALUE_H diff --git a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h index a0ae44131b45..2f6a78126a1d 100644 --- a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h +++ b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h @@ -517,4 +517,4 @@ void printSCFG(CFGWalker &Walker); } // namespace threadSafety } // namespace clang -#endif // LLVM_CLANG_THREAD_SAFETY_COMMON_H +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_THREADSAFETYCOMMON_H diff --git a/clang/include/clang/Analysis/Analyses/ThreadSafetyUtil.h b/clang/include/clang/Analysis/Analyses/ThreadSafetyUtil.h index e3b6e61d3026..088474b9b298 100644 --- a/clang/include/clang/Analysis/Analyses/ThreadSafetyUtil.h +++ b/clang/include/clang/Analysis/Analyses/ThreadSafetyUtil.h @@ -354,4 +354,4 @@ inline std::ostream& operator<<(std::ostream& ss, const StringRef str) { } // namespace threadSafety } // namespace clang -#endif // LLVM_CLANG_THREAD_SAFETY_UTIL_H +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_THREADSAFETYUTIL_H diff --git a/clang/include/clang/Analysis/AnyCall.h b/clang/include/clang/Analysis/AnyCall.h index 846ff7719ce1..6e5e019ce263 100644 --- a/clang/include/clang/Analysis/AnyCall.h +++ b/clang/include/clang/Analysis/AnyCall.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// // -#ifndef LLVM_CLANG_ANALYSIS_ANY_CALL_H -#define LLVM_CLANG_ANALYSIS_ANY_CALL_H +#ifndef LLVM_CLANG_ANALYSIS_ANYCALL_H +#define LLVM_CLANG_ANALYSIS_ANYCALL_H #include "clang/AST/Decl.h" #include "clang/AST/ExprCXX.h" @@ -215,4 +215,4 @@ class AnyCall { } -#endif // LLVM_CLANG_ANALYSIS_ANY_CALL_H +#endif // LLVM_CLANG_ANALYSIS_ANYCALL_H diff --git a/clang/include/clang/Analysis/BodyFarm.h b/clang/include/clang/Analysis/BodyFarm.h index 72607f8839f5..3863cc204d6c 100644 --- a/clang/include/clang/Analysis/BodyFarm.h +++ b/clang/include/clang/Analysis/BodyFarm.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_LIB_ANALYSIS_BODYFARM_H -#define LLVM_CLANG_LIB_ANALYSIS_BODYFARM_H +#ifndef LLVM_CLANG_ANALYSIS_BODYFARM_H +#define LLVM_CLANG_ANALYSIS_BODYFARM_H #include "clang/AST/DeclBase.h" #include "clang/Basic/LLVM.h" diff --git a/clang/include/clang/Analysis/CloneDetection.h b/clang/include/clang/Analysis/CloneDetection.h index 0b86c7fd86dd..b2911a5b44eb 100644 --- a/clang/include/clang/Analysis/CloneDetection.h +++ b/clang/include/clang/Analysis/CloneDetection.h @@ -11,8 +11,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_AST_CLONEDETECTION_H -#define LLVM_CLANG_AST_CLONEDETECTION_H +#ifndef LLVM_CLANG_ANALYSIS_CLONEDETECTION_H +#define LLVM_CLANG_ANALYSIS_CLONEDETECTION_H #include "clang/AST/StmtVisitor.h" #include "llvm/Support/Regex.h" @@ -441,4 +441,4 @@ struct MatchingVariablePatternConstraint { } // end namespace clang -#endif // LLVM_CLANG_AST_CLONEDETECTION_H +#endif // LLVM_CLANG_ANALYSIS_CLONEDETECTION_H diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h b/clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h index 52d84eb13c56..e926adf6f0b2 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h @@ -92,4 +92,4 @@ struct BackwardDataflowWorklist } // namespace clang -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_CONSUMED_H +#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWWORKLIST_H diff --git a/clang/include/clang/Analysis/IssueHash.h b/clang/include/clang/Analysis/IssueHash.h index 9c02b79f58f9..78bebbdb6ec7 100644 --- a/clang/include/clang/Analysis/IssueHash.h +++ b/clang/include/clang/Analysis/IssueHash.h @@ -5,8 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_STATICANALYZER_CORE_ISSUE_HASH_H -#define LLVM_CLANG_STATICANALYZER_CORE_ISSUE_HASH_H +#ifndef LLVM_CLANG_ANALYSIS_ISSUEHASH_H +#define LLVM_CLANG_ANALYSIS_ISSUEHASH_H #include "llvm/ADT/SmallString.h" diff --git a/clang/include/clang/Analysis/PathDiagnostic.h b/clang/include/clang/Analysis/PathDiagnostic.h index 235d26083191..553708d9ec0c 100644 --- a/clang/include/clang/Analysis/PathDiagnostic.h +++ b/clang/include/clang/Analysis/PathDiagnostic.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_STATICANALYZER_CORE_BUGREPORTER_PATHDIAGNOSTIC_H -#define LLVM_CLANG_STATICANALYZER_CORE_BUGREPORTER_PATHDIAGNOSTIC_H +#ifndef LLVM_CLANG_ANALYSIS_PATHDIAGNOSTIC_H +#define LLVM_CLANG_ANALYSIS_PATHDIAGNOSTIC_H #include "clang/AST/Stmt.h" #include "clang/Analysis/AnalysisDeclContext.h" @@ -905,4 +905,4 @@ class PathDiagnostic : public llvm::FoldingSetNode { } // namespace ento } // namespace clang -#endif // LLVM_CLANG_STATICANALYZER_CORE_BUGREPORTER_PATHDIAGNOSTIC_H +#endif // LLVM_CLANG_ANALYSIS_PATHDIAGNOSTIC_H diff --git a/clang/include/clang/Analysis/RetainSummaryManager.h b/clang/include/clang/Analysis/RetainSummaryManager.h index b7ccb0317830..d9a0416e1ce5 100644 --- a/clang/include/clang/Analysis/RetainSummaryManager.h +++ b/clang/include/clang/Analysis/RetainSummaryManager.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_RETAINSUMMARY_MANAGER_H -#define LLVM_CLANG_ANALYSIS_RETAINSUMMARY_MANAGER_H +#ifndef LLVM_CLANG_ANALYSIS_RETAINSUMMARYMANAGER_H +#define LLVM_CLANG_ANALYSIS_RETAINSUMMARYMANAGER_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" diff --git a/clang/include/clang/Analysis/SelectorExtras.h b/clang/include/clang/Analysis/SelectorExtras.h index d26e9159a937..278f20e87cc6 100644 --- a/clang/include/clang/Analysis/SelectorExtras.h +++ b/clang/include/clang/Analysis/SelectorExtras.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_LIB_ANALYSIS_SELECTOREXTRAS_H -#define LLVM_CLANG_LIB_ANALYSIS_SELECTOREXTRAS_H +#ifndef LLVM_CLANG_ANALYSIS_SELECTOREXTRAS_H +#define LLVM_CLANG_ANALYSIS_SELECTOREXTRAS_H #include "clang/AST/ASTContext.h" diff --git a/clang/include/clang/Basic/AlignedAllocation.h b/clang/include/clang/Basic/AlignedAllocation.h index ab9f19da5d59..c1187b81420b 100644 --- a/clang/include/clang/Basic/AlignedAllocation.h +++ b/clang/include/clang/Basic/AlignedAllocation.h @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_BASIC_ALIGNED_ALLOCATION_H -#define LLVM_CLANG_BASIC_ALIGNED_ALLOCATION_H +#ifndef LLVM_CLANG_BASIC_ALIGNEDALLOCATION_H +#define LLVM_CLANG_BASIC_ALIGNEDALLOCATION_H #include "llvm/ADT/Triple.h" #include "llvm/Support/ErrorHandling.h" @@ -42,4 +42,4 @@ inline llvm::VersionTuple alignedAllocMinVersion(llvm::Triple::OSType OS) { } // end namespace clang -#endif // LLVM_CLANG_BASIC_ALIGNED_ALLOCATION_H +#endif // LLVM_CLANG_BASIC_ALIGNEDALLOCATION_H diff --git a/clang/include/clang/Basic/AttrSubjectMatchRules.h b/clang/include/clang/Basic/AttrSubjectMatchRules.h index 010cefcaf340..4a4c1a883cf4 100644 --- a/clang/include/clang/Basic/AttrSubjectMatchRules.h +++ b/clang/include/clang/Basic/AttrSubjectMatchRules.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_BASIC_ATTR_SUBJECT_MATCH_RULES_H -#define LLVM_CLANG_BASIC_ATTR_SUBJECT_MATCH_RULES_H +#ifndef LLVM_CLANG_BASIC_ATTRSUBJECTMATCHRULES_H +#define LLVM_CLANG_BASIC_ATTRSUBJECTMATCHRULES_H #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/DenseMap.h" diff --git a/clang/include/clang/Basic/DarwinSDKInfo.h b/clang/include/clang/Basic/DarwinSDKInfo.h index 918dc7c8becc..b0673dc8b3cd 100644 --- a/clang/include/clang/Basic/DarwinSDKInfo.h +++ b/clang/include/clang/Basic/DarwinSDKInfo.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_BASIC_DARWIN_SDK_INFO_H -#define LLVM_CLANG_BASIC_DARWIN_SDK_INFO_H +#ifndef LLVM_CLANG_BASIC_DARWINSDKINFO_H +#define LLVM_CLANG_BASIC_DARWINSDKINFO_H #include "clang/Basic/LLVM.h" #include "llvm/ADT/DenseMap.h" @@ -154,4 +154,4 @@ Expected> parseDarwinSDKInfo(llvm::vfs::FileSystem &VFS, } // end namespace clang -#endif // LLVM_CLANG_BASIC_DARWIN_SDK_INFO_H +#endif // LLVM_CLANG_BASIC_DARWINSDKINFO_H diff --git a/clang/include/clang/Basic/DiagnosticError.h b/clang/include/clang/Basic/DiagnosticError.h index 430da6f724ed..76d893a5ccf8 100644 --- a/clang/include/clang/Basic/DiagnosticError.h +++ b/clang/include/clang/Basic/DiagnosticError.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_BASIC_DIAGNOSTIC_ERROR_H -#define LLVM_CLANG_BASIC_DIAGNOSTIC_ERROR_H +#ifndef LLVM_CLANG_BASIC_DIAGNOSTICERROR_H +#define LLVM_CLANG_BASIC_DIAGNOSTICERROR_H #include "clang/Basic/PartialDiagnostic.h" #include "llvm/Support/Error.h" @@ -57,4 +57,4 @@ class DiagnosticError : public llvm::ErrorInfo { } // end namespace clang -#endif // LLVM_CLANG_BASIC_DIAGNOSTIC_ERROR_H +#endif // LLVM_CLANG_BASIC_DIAGNOSTICERROR_H diff --git a/clang/include/clang/Basic/OperatorPrecedence.h b/clang/include/clang/Basic/OperatorPrecedence.h index 61ac7ad62f6b..9bda3eb28fdf 100644 --- a/clang/include/clang/Basic/OperatorPrecedence.h +++ b/clang/include/clang/Basic/OperatorPrecedence.h @@ -49,4 +49,4 @@ prec::Level getBinOpPrecedence(tok::TokenKind Kind, bool GreaterThanIsOperator, } // end namespace clang -#endif // LLVM_CLANG_OPERATOR_PRECEDENCE_H +#endif // LLVM_CLANG_BASIC_OPERATORPRECEDENCE_H diff --git a/clang/include/clang/Basic/PragmaKinds.h b/clang/include/clang/Basic/PragmaKinds.h index 82c0d5f0a551..176bbc9ac7ca 100644 --- a/clang/include/clang/Basic/PragmaKinds.h +++ b/clang/include/clang/Basic/PragmaKinds.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_BASIC_PRAGMA_KINDS_H -#define LLVM_CLANG_BASIC_PRAGMA_KINDS_H +#ifndef LLVM_CLANG_BASIC_PRAGMAKINDS_H +#define LLVM_CLANG_BASIC_PRAGMAKINDS_H namespace clang { diff --git a/clang/include/clang/Basic/ProfileList.h b/clang/include/clang/Basic/ProfileList.h index 989c36549a3d..5b71928eb9d0 100644 --- a/clang/include/clang/Basic/ProfileList.h +++ b/clang/include/clang/Basic/ProfileList.h @@ -10,8 +10,8 @@ // functions. // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_BASIC_INSTRPROFLIST_H -#define LLVM_CLANG_BASIC_INSTRPROFLIST_H +#ifndef LLVM_CLANG_BASIC_PROFILELIST_H +#define LLVM_CLANG_BASIC_PROFILELIST_H #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/LLVM.h" diff --git a/clang/include/clang/Basic/TargetID.h b/clang/include/clang/Basic/TargetID.h index 1a9785574d06..a55b15e9b92c 100644 --- a/clang/include/clang/Basic/TargetID.h +++ b/clang/include/clang/Basic/TargetID.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_BASIC_TARGET_ID_H -#define LLVM_CLANG_BASIC_TARGET_ID_H +#ifndef LLVM_CLANG_BASIC_TARGETID_H +#define LLVM_CLANG_BASIC_TARGETID_H #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" diff --git a/clang/include/clang/CodeGen/ObjectFilePCHContainerOperations.h b/clang/include/clang/CodeGen/ObjectFilePCHContainerOperations.h index 8821cd70362e..c13e052149d9 100644 --- a/clang/include/clang/CodeGen/ObjectFilePCHContainerOperations.h +++ b/clang/include/clang/CodeGen/ObjectFilePCHContainerOperations.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_CODEGEN_OBJECT_FILE_PCH_CONTAINER_OPERATIONS_H -#define LLVM_CLANG_CODEGEN_OBJECT_FILE_PCH_CONTAINER_OPERATIONS_H +#ifndef LLVM_CLANG_CODEGEN_OBJECTFILEPCHCONTAINEROPERATIONS_H +#define LLVM_CLANG_CODEGEN_OBJECTFILEPCHCONTAINEROPERATIONS_H #include "clang/Frontend/PCHContainerOperations.h" diff --git a/clang/include/clang/Frontend/PCHContainerOperations.h b/clang/include/clang/Frontend/PCHContainerOperations.h index fa977a63f32e..098d32ec3869 100644 --- a/clang/include/clang/Frontend/PCHContainerOperations.h +++ b/clang/include/clang/Frontend/PCHContainerOperations.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_PCH_CONTAINER_OPERATIONS_H -#define LLVM_CLANG_PCH_CONTAINER_OPERATIONS_H +#ifndef LLVM_CLANG_FRONTEND_PCHCONTAINEROPERATIONS_H +#define LLVM_CLANG_FRONTEND_PCHCONTAINEROPERATIONS_H #include "clang/Serialization/PCHContainerOperations.h" diff --git a/clang/include/clang/Frontend/PrecompiledPreamble.h b/clang/include/clang/Frontend/PrecompiledPreamble.h index dacbffef0b12..628736f34091 100644 --- a/clang/include/clang/Frontend/PrecompiledPreamble.h +++ b/clang/include/clang/Frontend/PrecompiledPreamble.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_FRONTEND_PRECOMPILED_PREAMBLE_H -#define LLVM_CLANG_FRONTEND_PRECOMPILED_PREAMBLE_H +#ifndef LLVM_CLANG_FRONTEND_PRECOMPILEDPREAMBLE_H +#define LLVM_CLANG_FRONTEND_PRECOMPILEDPREAMBLE_H #include "clang/Lex/Lexer.h" #include "clang/Lex/Preprocessor.h" diff --git a/clang/include/clang/Frontend/SerializedDiagnostics.h b/clang/include/clang/Frontend/SerializedDiagnostics.h index 4e67fd13ac5b..6464693c1482 100644 --- a/clang/include/clang/Frontend/SerializedDiagnostics.h +++ b/clang/include/clang/Frontend/SerializedDiagnostics.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_FRONTEND_SERIALIZE_DIAGNOSTICS_H_ -#define LLVM_CLANG_FRONTEND_SERIALIZE_DIAGNOSTICS_H_ +#ifndef LLVM_CLANG_FRONTEND_SERIALIZEDDIAGNOSTICS_H +#define LLVM_CLANG_FRONTEND_SERIALIZEDDIAGNOSTICS_H #include "llvm/Bitstream/BitCodes.h" diff --git a/clang/include/clang/IndexSerialization/SerializablePathCollection.h b/clang/include/clang/IndexSerialization/SerializablePathCollection.h index 20cf8fbdad96..eb66e725000c 100644 --- a/clang/include/clang/IndexSerialization/SerializablePathCollection.h +++ b/clang/include/clang/IndexSerialization/SerializablePathCollection.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_INDEX_SerializablePathCollection_H -#define LLVM_CLANG_INDEX_SerializablePathCollection_H +#ifndef LLVM_CLANG_INDEXSERIALIZATION_SERIALIZABLEPATHCOLLECTION_H +#define LLVM_CLANG_INDEXSERIALIZATION_SERIALIZABLEPATHCOLLECTION_H #include "clang/Basic/FileManager.h" #include "llvm/ADT/APInt.h" @@ -126,4 +126,4 @@ class SerializablePathCollection { } // namespace index } // namespace clang -#endif // LLVM_CLANG_INDEX_SerializablePathCollection_H +#endif // LLVM_CLANG_INDEXSERIALIZATION_SERIALIZABLEPATHCOLLECTION_H diff --git a/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h b/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h index 121ca893e314..56025c8a3ed5 100644 --- a/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h +++ b/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h @@ -14,8 +14,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_LEX_DEPENDENCY_DIRECTIVES_SOURCE_MINIMIZER_H -#define LLVM_CLANG_LEX_DEPENDENCY_DIRECTIVES_SOURCE_MINIMIZER_H +#ifndef LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSOURCEMINIMIZER_H +#define LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSOURCEMINIMIZER_H #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/ArrayRef.h" @@ -112,4 +112,4 @@ bool minimizeSourceToDependencyDirectives( } // end namespace clang -#endif // LLVM_CLANG_LEX_DEPENDENCY_DIRECTIVES_SOURCE_MINIMIZER_H +#endif // LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSOURCEMINIMIZER_H diff --git a/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h b/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h index 1a0d5ed57b28..49687cb5cc85 100644 --- a/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h +++ b/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_LEX_PREPROCESSOR_EXCLUDED_COND_DIRECTIVE_SKIP_MAPPING_H -#define LLVM_CLANG_LEX_PREPROCESSOR_EXCLUDED_COND_DIRECTIVE_SKIP_MAPPING_H +#ifndef LLVM_CLANG_LEX_PREPROCESSOREXCLUDEDCONDITIONALDIRECTIVESKIPMAPPING_H +#define LLVM_CLANG_LEX_PREPROCESSOREXCLUDEDCONDITIONALDIRECTIVESKIPMAPPING_H #include "clang/Basic/LLVM.h" #include "llvm/ADT/DenseMap.h" @@ -27,4 +27,4 @@ using ExcludedPreprocessorDirectiveSkipMapping = } // end namespace clang -#endif // LLVM_CLANG_LEX_PREPROCESSOR_EXCLUDED_COND_DIRECTIVE_SKIP_MAPPING_H +#endif // LLVM_CLANG_LEX_PREPROCESSOREXCLUDEDCONDITIONALDIRECTIVESKIPMAPPING_H diff --git a/clang/include/clang/Parse/RAIIObjectsForParser.h b/clang/include/clang/Parse/RAIIObjectsForParser.h index bc1754614ad9..8e6e03685c50 100644 --- a/clang/include/clang/Parse/RAIIObjectsForParser.h +++ b/clang/include/clang/Parse/RAIIObjectsForParser.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_LIB_PARSE_RAIIOBJECTSFORPARSER_H -#define LLVM_CLANG_LIB_PARSE_RAIIOBJECTSFORPARSER_H +#ifndef LLVM_CLANG_PARSE_RAIIOBJECTSFORPARSER_H +#define LLVM_CLANG_PARSE_RAIIOBJECTSFORPARSER_H #include "clang/Parse/ParseDiagnostic.h" #include "clang/Parse/Parser.h" diff --git a/clang/include/clang/Sema/CleanupInfo.h b/clang/include/clang/Sema/CleanupInfo.h index ea9df49f77e1..45d16fea93e0 100644 --- a/clang/include/clang/Sema/CleanupInfo.h +++ b/clang/include/clang/Sema/CleanupInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SEMA_CLEANUP_INFO_H -#define LLVM_CLANG_SEMA_CLEANUP_INFO_H +#ifndef LLVM_CLANG_SEMA_CLEANUPINFO_H +#define LLVM_CLANG_SEMA_CLEANUPINFO_H namespace clang { diff --git a/clang/include/clang/Sema/ParsedAttr.h b/clang/include/clang/Sema/ParsedAttr.h index 6403179cb327..657cf9253c77 100644 --- a/clang/include/clang/Sema/ParsedAttr.h +++ b/clang/include/clang/Sema/ParsedAttr.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SEMA_ATTRIBUTELIST_H -#define LLVM_CLANG_SEMA_ATTRIBUTELIST_H +#ifndef LLVM_CLANG_SEMA_PARSEDATTR_H +#define LLVM_CLANG_SEMA_PARSEDATTR_H #include "clang/Basic/AttrSubjectMatchRules.h" #include "clang/Basic/AttributeCommonInfo.h" @@ -1159,4 +1159,4 @@ inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB, } // namespace clang -#endif // LLVM_CLANG_SEMA_ATTRIBUTELIST_H +#endif // LLVM_CLANG_SEMA_PARSEDATTR_H diff --git a/clang/include/clang/Sema/SemaConcept.h b/clang/include/clang/Sema/SemaConcept.h index dc5f0ec97e85..b73a152533d1 100644 --- a/clang/include/clang/Sema/SemaConcept.h +++ b/clang/include/clang/Sema/SemaConcept.h @@ -152,4 +152,4 @@ struct NormalizedConstraint { } // clang -#endif //LLVM_CLANG_SEMA_SEMACONCEPT_H +#endif // LLVM_CLANG_SEMA_SEMACONCEPT_H diff --git a/clang/include/clang/Sema/TemplateInstCallback.h b/clang/include/clang/Sema/TemplateInstCallback.h index 3ab0e8c6be9f..9258a7f41ac1 100644 --- a/clang/include/clang/Sema/TemplateInstCallback.h +++ b/clang/include/clang/Sema/TemplateInstCallback.h @@ -11,8 +11,8 @@ // //===---------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TEMPLATE_INST_CALLBACK_H -#define LLVM_CLANG_TEMPLATE_INST_CALLBACK_H +#ifndef LLVM_CLANG_SEMA_TEMPLATEINSTCALLBACK_H +#define LLVM_CLANG_SEMA_TEMPLATEINSTCALLBACK_H #include "clang/Sema/Sema.h" diff --git a/clang/include/clang/Serialization/ModuleFileExtension.h b/clang/include/clang/Serialization/ModuleFileExtension.h index 3e84a65c4b80..2168ce2ce607 100644 --- a/clang/include/clang/Serialization/ModuleFileExtension.h +++ b/clang/include/clang/Serialization/ModuleFileExtension.h @@ -154,4 +154,4 @@ class ModuleFileExtensionReader { } // end namespace clang -#endif // LLVM_CLANG_FRONTEND_MODULEFILEEXTENSION_H +#endif // LLVM_CLANG_SERIALIZATION_MODULEFILEEXTENSION_H diff --git a/clang/include/clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h b/clang/include/clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h index e2be957821b9..a6069d7dfdc0 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h +++ b/clang/include/clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_CLANGSACHECKERS_H -#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_CLANGSACHECKERS_H +#ifndef LLVM_CLANG_STATICANALYZER_CHECKERS_BUILTINCHECKERREGISTRATION_H +#define LLVM_CLANG_STATICANALYZER_CHECKERS_BUILTINCHECKERREGISTRATION_H #include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h" diff --git a/clang/include/clang/StaticAnalyzer/Checkers/MPIFunctionClassifier.h b/clang/include/clang/StaticAnalyzer/Checkers/MPIFunctionClassifier.h index bbc5111ccacc..6243bbd5d53b 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/MPIFunctionClassifier.h +++ b/clang/include/clang/StaticAnalyzer/Checkers/MPIFunctionClassifier.h @@ -11,8 +11,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MPICHECKER_MPIFUNCTIONCLASSIFIER_H -#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MPICHECKER_MPIFUNCTIONCLASSIFIER_H +#ifndef LLVM_CLANG_STATICANALYZER_CHECKERS_MPIFUNCTIONCLASSIFIER_H +#define LLVM_CLANG_STATICANALYZER_CHECKERS_MPIFUNCTIONCLASSIFIER_H #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h index 3a0bec9d04e5..6c487697bc55 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_LIB_STATICANALYZER_CORE_RANGEDCONSTRAINTMANAGER_H -#define LLVM_CLANG_LIB_STATICANALYZER_CORE_RANGEDCONSTRAINTMANAGER_H +#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_RANGEDCONSTRAINTMANAGER_H +#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_RANGEDCONSTRAINTMANAGER_H #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" diff --git a/clang/include/clang/StaticAnalyzer/Frontend/ModelConsumer.h b/clang/include/clang/StaticAnalyzer/Frontend/ModelConsumer.h index 5f9ae78dac63..7b7087622bc2 100644 --- a/clang/include/clang/StaticAnalyzer/Frontend/ModelConsumer.h +++ b/clang/include/clang/StaticAnalyzer/Frontend/ModelConsumer.h @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_GR_MODELCONSUMER_H -#define LLVM_CLANG_GR_MODELCONSUMER_H +#ifndef LLVM_CLANG_STATICANALYZER_FRONTEND_MODELCONSUMER_H +#define LLVM_CLANG_STATICANALYZER_FRONTEND_MODELCONSUMER_H #include "clang/AST/ASTConsumer.h" #include "llvm/ADT/StringMap.h" diff --git a/clang/include/clang/Tooling/CommonOptionsParser.h b/clang/include/clang/Tooling/CommonOptionsParser.h index 0f072c2886ab..3c0480af3779 100644 --- a/clang/include/clang/Tooling/CommonOptionsParser.h +++ b/clang/include/clang/Tooling/CommonOptionsParser.h @@ -141,4 +141,4 @@ class ArgumentsAdjustingCompilations : public CompilationDatabase { } // namespace tooling } // namespace clang -#endif // LLVM_TOOLS_CLANG_INCLUDE_CLANG_TOOLING_COMMONOPTIONSPARSER_H +#endif // LLVM_CLANG_TOOLING_COMMONOPTIONSPARSER_H diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h index 7d0b8f2138f9..1358950b437c 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_FILESYSTEM_H -#define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_FILESYSTEM_H +#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H +#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H #include "clang/Basic/LLVM.h" #include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" @@ -255,4 +255,4 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_FILESYSTEM_H +#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h index d58e736ab6a6..5c6dce611a95 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_SERVICE_H -#define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_SERVICE_H +#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGSERVICE_H +#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGSERVICE_H #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" @@ -83,4 +83,4 @@ class DependencyScanningService { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_SERVICE_H +#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGSERVICE_H diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h index 9e2ff82f5614..2eb7a35b27b9 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_TOOL_H -#define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_TOOL_H +#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGTOOL_H +#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGTOOL_H #include "clang/Tooling/DependencyScanning/DependencyScanningService.h" #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" @@ -111,4 +111,4 @@ class DependencyScanningTool { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_TOOL_H +#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGTOOL_H diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h index 0f3a5369a021..b7631c09f275 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_WORKER_H -#define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_WORKER_H +#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGWORKER_H +#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGWORKER_H #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/FileManager.h" @@ -91,4 +91,4 @@ class DependencyScanningWorker { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_WORKER_H +#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGWORKER_H diff --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h index e61147d6f2b0..d1a7aab8c24b 100644 --- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h +++ b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_MODULE_DEP_COLLECTOR_H -#define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_MODULE_DEP_COLLECTOR_H +#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H +#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceManager.h" @@ -234,4 +234,4 @@ class ModuleDepCollector final : public DependencyCollector { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_MODULE_DEP_COLLECTOR_H +#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H diff --git a/clang/include/clang/Tooling/FixIt.h b/clang/include/clang/Tooling/FixIt.h index 5fce71f2d8f7..1624c2d6be36 100644 --- a/clang/include/clang/Tooling/FixIt.h +++ b/clang/include/clang/Tooling/FixIt.h @@ -76,4 +76,4 @@ FixItHint createReplacement(const D &Destination, StringRef Source) { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_FIXINT_H +#endif // LLVM_CLANG_TOOLING_FIXIT_H diff --git a/clang/include/clang/Tooling/Refactoring/ASTSelection.h b/clang/include/clang/Tooling/Refactoring/ASTSelection.h index 239be36012c3..33dd386d2340 100644 --- a/clang/include/clang/Tooling/Refactoring/ASTSelection.h +++ b/clang/include/clang/Tooling/Refactoring/ASTSelection.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_AST_SELECTION_H -#define LLVM_CLANG_TOOLING_REFACTOR_AST_SELECTION_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_ASTSELECTION_H +#define LLVM_CLANG_TOOLING_REFACTORING_ASTSELECTION_H #include "clang/AST/ASTTypeTraits.h" #include "clang/AST/Stmt.h" @@ -152,4 +152,4 @@ class CodeRangeASTSelection { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_AST_SELECTION_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_ASTSELECTION_H diff --git a/clang/include/clang/Tooling/Refactoring/AtomicChange.h b/clang/include/clang/Tooling/Refactoring/AtomicChange.h index f1034a3d0579..3945a7c9fefb 100644 --- a/clang/include/clang/Tooling/Refactoring/AtomicChange.h +++ b/clang/include/clang/Tooling/Refactoring/AtomicChange.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_ATOMICCHANGE_H -#define LLVM_CLANG_TOOLING_REFACTOR_ATOMICCHANGE_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_ATOMICCHANGE_H +#define LLVM_CLANG_TOOLING_REFACTORING_ATOMICCHANGE_H #include "clang/Basic/SourceManager.h" #include "clang/Format/Format.h" @@ -187,4 +187,4 @@ applyAtomicChanges(llvm::StringRef FilePath, llvm::StringRef Code, } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_ATOMICCHANGE_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_ATOMICCHANGE_H diff --git a/clang/include/clang/Tooling/Refactoring/Extract/Extract.h b/clang/include/clang/Tooling/Refactoring/Extract/Extract.h index 930991328ca0..2f7c5bc9acff 100644 --- a/clang/include/clang/Tooling/Refactoring/Extract/Extract.h +++ b/clang/include/clang/Tooling/Refactoring/Extract/Extract.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_EXTRACT_EXTRACT_H -#define LLVM_CLANG_TOOLING_REFACTOR_EXTRACT_EXTRACT_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_EXTRACT_EXTRACT_H +#define LLVM_CLANG_TOOLING_REFACTORING_EXTRACT_EXTRACT_H #include "clang/Tooling/Refactoring/ASTSelection.h" #include "clang/Tooling/Refactoring/RefactoringActionRules.h" @@ -49,4 +49,4 @@ class ExtractFunction final : public SourceChangeRefactoringRule { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_EXTRACT_EXTRACT_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_EXTRACT_EXTRACT_H diff --git a/clang/include/clang/Tooling/Refactoring/Extract/SourceExtraction.h b/clang/include/clang/Tooling/Refactoring/Extract/SourceExtraction.h index 034a0aaaf6db..be44518d4bce 100644 --- a/clang/include/clang/Tooling/Refactoring/Extract/SourceExtraction.h +++ b/clang/include/clang/Tooling/Refactoring/Extract/SourceExtraction.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTORING_EXTRACT_SOURCE_EXTRACTION_H -#define LLVM_CLANG_TOOLING_REFACTORING_EXTRACT_SOURCE_EXTRACTION_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_EXTRACT_SOURCEEXTRACTION_H +#define LLVM_CLANG_TOOLING_REFACTORING_EXTRACT_SOURCEEXTRACTION_H #include "clang/Basic/LLVM.h" @@ -48,4 +48,4 @@ class ExtractionSemicolonPolicy { } // end namespace tooling } // end namespace clang -#endif //LLVM_CLANG_TOOLING_REFACTORING_EXTRACT_SOURCE_EXTRACTION_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_EXTRACT_SOURCEEXTRACTION_H diff --git a/clang/include/clang/Tooling/Refactoring/Lookup.h b/clang/include/clang/Tooling/Refactoring/Lookup.h index 448bc422c4e7..dcb40b7eee66 100644 --- a/clang/include/clang/Tooling/Refactoring/Lookup.h +++ b/clang/include/clang/Tooling/Refactoring/Lookup.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_LOOKUP_H -#define LLVM_CLANG_TOOLING_REFACTOR_LOOKUP_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_LOOKUP_H +#define LLVM_CLANG_TOOLING_REFACTORING_LOOKUP_H #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceLocation.h" @@ -47,4 +47,4 @@ std::string replaceNestedName(const NestedNameSpecifier *Use, } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_LOOKUP_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_LOOKUP_H diff --git a/clang/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h b/clang/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h index 63d46abc2034..6fb2decf8614 100644 --- a/clang/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h +++ b/clang/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_RECURSIVE_SYMBOL_VISITOR_H -#define LLVM_CLANG_TOOLING_REFACTOR_RECURSIVE_SYMBOL_VISITOR_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_RECURSIVESYMBOLVISITOR_H +#define LLVM_CLANG_TOOLING_REFACTORING_RECURSIVESYMBOLVISITOR_H #include "clang/AST/AST.h" #include "clang/AST/RecursiveASTVisitor.h" @@ -150,4 +150,4 @@ class RecursiveSymbolVisitor } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_RECURSIVE_SYMBOL_VISITOR_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_RECURSIVESYMBOLVISITOR_H diff --git a/clang/include/clang/Tooling/Refactoring/RefactoringAction.h b/clang/include/clang/Tooling/Refactoring/RefactoringAction.h index d4294ddb2f66..b362f655965e 100644 --- a/clang/include/clang/Tooling/Refactoring/RefactoringAction.h +++ b/clang/include/clang/Tooling/Refactoring/RefactoringAction.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_ACTION_H -#define LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_ACTION_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGACTION_H +#define LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGACTION_H #include "clang/Basic/LLVM.h" #include "clang/Tooling/Refactoring/RefactoringActionRules.h" @@ -60,4 +60,4 @@ std::vector> createRefactoringActions(); } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_ACTION_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGACTION_H diff --git a/clang/include/clang/Tooling/Refactoring/RefactoringActionRule.h b/clang/include/clang/Tooling/Refactoring/RefactoringActionRule.h index 57dffa945acc..388535a69b8b 100644 --- a/clang/include/clang/Tooling/Refactoring/RefactoringActionRule.h +++ b/clang/include/clang/Tooling/Refactoring/RefactoringActionRule.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_ACTION_RULE_H -#define LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_ACTION_RULE_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGACTIONRULE_H +#define LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGACTIONRULE_H #include "clang/Basic/LLVM.h" #include "llvm/ADT/Optional.h" @@ -69,4 +69,4 @@ class RefactoringActionRule : public RefactoringActionRuleBase { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_ACTION_RULE_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGACTIONRULE_H diff --git a/clang/include/clang/Tooling/Refactoring/RefactoringActionRuleRequirements.h b/clang/include/clang/Tooling/Refactoring/RefactoringActionRuleRequirements.h index 6a6dd83731e9..49e4a0c149f1 100644 --- a/clang/include/clang/Tooling/Refactoring/RefactoringActionRuleRequirements.h +++ b/clang/include/clang/Tooling/Refactoring/RefactoringActionRuleRequirements.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_ACTION_RULE_REQUIREMENTS_H -#define LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_ACTION_RULE_REQUIREMENTS_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGACTIONRULEREQUIREMENTS_H +#define LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGACTIONRULEREQUIREMENTS_H #include "clang/Basic/LLVM.h" #include "clang/Tooling/Refactoring/ASTSelection.h" @@ -119,4 +119,4 @@ class OptionRequirement : public RefactoringOptionsRequirement { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_ACTION_RULE_REQUIREMENTS_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGACTIONRULEREQUIREMENTS_H diff --git a/clang/include/clang/Tooling/Refactoring/RefactoringActionRules.h b/clang/include/clang/Tooling/Refactoring/RefactoringActionRules.h index e9606fd6018e..86fcc6ad0a79 100644 --- a/clang/include/clang/Tooling/Refactoring/RefactoringActionRules.h +++ b/clang/include/clang/Tooling/Refactoring/RefactoringActionRules.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_ACTION_RULES_H -#define LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_ACTION_RULES_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGACTIONRULES_H +#define LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGACTIONRULES_H #include "clang/Tooling/Refactoring/RefactoringActionRule.h" #include "clang/Tooling/Refactoring/RefactoringActionRulesInternal.h" @@ -90,4 +90,4 @@ class FindSymbolOccurrencesRefactoringRule : public RefactoringActionRuleBase { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_ACTION_RULES_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGACTIONRULES_H diff --git a/clang/include/clang/Tooling/Refactoring/RefactoringActionRulesInternal.h b/clang/include/clang/Tooling/Refactoring/RefactoringActionRulesInternal.h index fb373fcf5029..e6ebaea5248a 100644 --- a/clang/include/clang/Tooling/Refactoring/RefactoringActionRulesInternal.h +++ b/clang/include/clang/Tooling/Refactoring/RefactoringActionRulesInternal.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_ACTION_RULES_INTERNAL_H -#define LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_ACTION_RULES_INTERNAL_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGACTIONRULESINTERNAL_H +#define LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGACTIONRULESINTERNAL_H #include "clang/Basic/LLVM.h" #include "clang/Tooling/Refactoring/RefactoringActionRule.h" @@ -154,4 +154,4 @@ createRefactoringActionRule(const RequirementTypes &... Requirements) { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_ACTION_RULES_INTERNAL_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGACTIONRULESINTERNAL_H diff --git a/clang/include/clang/Tooling/Refactoring/RefactoringOption.h b/clang/include/clang/Tooling/Refactoring/RefactoringOption.h index 659e02b48e5c..b022c5d61b03 100644 --- a/clang/include/clang/Tooling/Refactoring/RefactoringOption.h +++ b/clang/include/clang/Tooling/Refactoring/RefactoringOption.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_OPTION_H -#define LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_OPTION_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGOPTION_H +#define LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGOPTION_H #include "clang/Basic/LLVM.h" #include @@ -60,4 +60,4 @@ std::shared_ptr createRefactoringOption() { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_OPTION_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGOPTION_H diff --git a/clang/include/clang/Tooling/Refactoring/RefactoringOptionVisitor.h b/clang/include/clang/Tooling/Refactoring/RefactoringOptionVisitor.h index d58b11355a26..f9f85f6eeb82 100644 --- a/clang/include/clang/Tooling/Refactoring/RefactoringOptionVisitor.h +++ b/clang/include/clang/Tooling/Refactoring/RefactoringOptionVisitor.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_OPTION_VISITOR_H -#define LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_OPTION_VISITOR_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGOPTIONVISITOR_H +#define LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGOPTIONVISITOR_H #include "clang/Basic/LLVM.h" #include @@ -58,4 +58,4 @@ struct IsValidOptionType : internal::HasHandle::Type {}; } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_OPTION_VISITOR_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGOPTIONVISITOR_H diff --git a/clang/include/clang/Tooling/Refactoring/RefactoringOptions.h b/clang/include/clang/Tooling/Refactoring/RefactoringOptions.h index 84122b111ee1..1575a136b11c 100644 --- a/clang/include/clang/Tooling/Refactoring/RefactoringOptions.h +++ b/clang/include/clang/Tooling/Refactoring/RefactoringOptions.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_OPTIONS_H -#define LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_OPTIONS_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGOPTIONS_H +#define LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGOPTIONS_H #include "clang/Basic/LLVM.h" #include "clang/Tooling/Refactoring/RefactoringActionRuleRequirements.h" @@ -54,4 +54,4 @@ class RequiredRefactoringOption : public OptionalRefactoringOption { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_OPTIONS_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGOPTIONS_H diff --git a/clang/include/clang/Tooling/Refactoring/RefactoringResultConsumer.h b/clang/include/clang/Tooling/Refactoring/RefactoringResultConsumer.h index 2035c02bc17a..016eff80ca7b 100644 --- a/clang/include/clang/Tooling/Refactoring/RefactoringResultConsumer.h +++ b/clang/include/clang/Tooling/Refactoring/RefactoringResultConsumer.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_RESULT_CONSUMER_H -#define LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_RESULT_CONSUMER_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGRESULTCONSUMER_H +#define LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGRESULTCONSUMER_H #include "clang/Basic/LLVM.h" #include "clang/Tooling/Refactoring/AtomicChange.h" @@ -48,4 +48,4 @@ class RefactoringResultConsumer { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_RESULT_CONSUMER_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGRESULTCONSUMER_H diff --git a/clang/include/clang/Tooling/Refactoring/RefactoringRuleContext.h b/clang/include/clang/Tooling/Refactoring/RefactoringRuleContext.h index e0da9469deb5..7d97f811f024 100644 --- a/clang/include/clang/Tooling/Refactoring/RefactoringRuleContext.h +++ b/clang/include/clang/Tooling/Refactoring/RefactoringRuleContext.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_RULE_CONTEXT_H -#define LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_RULE_CONTEXT_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGRULECONTEXT_H +#define LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGRULECONTEXT_H #include "clang/Basic/DiagnosticError.h" #include "clang/Basic/SourceManager.h" @@ -86,4 +86,4 @@ class RefactoringRuleContext { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_RULE_CONTEXT_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGRULECONTEXT_H diff --git a/clang/include/clang/Tooling/Refactoring/Rename/RenamingAction.h b/clang/include/clang/Tooling/Refactoring/Rename/RenamingAction.h index b04bc3e2d202..8e72076b7b5e 100644 --- a/clang/include/clang/Tooling/Refactoring/Rename/RenamingAction.h +++ b/clang/include/clang/Tooling/Refactoring/Rename/RenamingAction.h @@ -11,8 +11,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_RENAME_RENAMING_ACTION_H -#define LLVM_CLANG_TOOLING_REFACTOR_RENAME_RENAMING_ACTION_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_RENAME_RENAMINGACTION_H +#define LLVM_CLANG_TOOLING_REFACTORING_RENAME_RENAMINGACTION_H #include "clang/Tooling/Refactoring.h" #include "clang/Tooling/Refactoring/AtomicChange.h" @@ -120,4 +120,4 @@ class QualifiedRenamingAction { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_RENAME_RENAMING_ACTION_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_RENAME_RENAMINGACTION_H diff --git a/clang/include/clang/Tooling/Refactoring/Rename/SymbolName.h b/clang/include/clang/Tooling/Refactoring/Rename/SymbolName.h index 9131a4565da7..6c28d40f3679 100644 --- a/clang/include/clang/Tooling/Refactoring/Rename/SymbolName.h +++ b/clang/include/clang/Tooling/Refactoring/Rename/SymbolName.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_RENAME_SYMBOL_NAME_H -#define LLVM_CLANG_TOOLING_REFACTOR_RENAME_SYMBOL_NAME_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_RENAME_SYMBOLNAME_H +#define LLVM_CLANG_TOOLING_REFACTORING_RENAME_SYMBOLNAME_H #include "clang/Basic/LLVM.h" #include "llvm/ADT/ArrayRef.h" @@ -45,4 +45,4 @@ class SymbolName { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_RENAME_SYMBOL_NAME_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_RENAME_SYMBOLNAME_H diff --git a/clang/include/clang/Tooling/Refactoring/Rename/SymbolOccurrences.h b/clang/include/clang/Tooling/Refactoring/Rename/SymbolOccurrences.h index c4bfaa9cc377..0ae023b8d4e4 100644 --- a/clang/include/clang/Tooling/Refactoring/Rename/SymbolOccurrences.h +++ b/clang/include/clang/Tooling/Refactoring/Rename/SymbolOccurrences.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_RENAME_SYMBOL_OCCURRENCES_H -#define LLVM_CLANG_TOOLING_REFACTOR_RENAME_SYMBOL_OCCURRENCES_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_RENAME_SYMBOLOCCURRENCES_H +#define LLVM_CLANG_TOOLING_REFACTORING_RENAME_SYMBOLOCCURRENCES_H #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceLocation.h" @@ -88,4 +88,4 @@ using SymbolOccurrences = std::vector; } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_RENAME_SYMBOL_OCCURRENCES_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_RENAME_SYMBOLOCCURRENCES_H diff --git a/clang/include/clang/Tooling/Refactoring/Rename/USRFinder.h b/clang/include/clang/Tooling/Refactoring/Rename/USRFinder.h index 30f7f0a0008c..a7ffa8556888 100644 --- a/clang/include/clang/Tooling/Refactoring/Rename/USRFinder.h +++ b/clang/include/clang/Tooling/Refactoring/Rename/USRFinder.h @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_RENAME_USR_FINDER_H -#define LLVM_CLANG_TOOLING_REFACTOR_RENAME_USR_FINDER_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_RENAME_USRFINDER_H +#define LLVM_CLANG_TOOLING_REFACTORING_RENAME_USRFINDER_H #include "clang/AST/AST.h" #include "clang/AST/ASTContext.h" @@ -46,4 +46,4 @@ std::string getUSRForDecl(const Decl *Decl); } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_RENAME_USR_FINDER_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_RENAME_USRFINDER_H diff --git a/clang/include/clang/Tooling/Refactoring/Rename/USRFindingAction.h b/clang/include/clang/Tooling/Refactoring/Rename/USRFindingAction.h index 726987d9d46a..e43721bdccd1 100644 --- a/clang/include/clang/Tooling/Refactoring/Rename/USRFindingAction.h +++ b/clang/include/clang/Tooling/Refactoring/Rename/USRFindingAction.h @@ -11,8 +11,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_RENAME_USR_FINDING_ACTION_H -#define LLVM_CLANG_TOOLING_REFACTOR_RENAME_USR_FINDING_ACTION_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_RENAME_USRFINDINGACTION_H +#define LLVM_CLANG_TOOLING_REFACTORING_RENAME_USRFINDINGACTION_H #include "clang/Basic/LLVM.h" #include "llvm/ADT/ArrayRef.h" @@ -64,4 +64,4 @@ struct USRFindingAction { } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_RENAME_USR_FINDING_ACTION_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_RENAME_USRFINDINGACTION_H diff --git a/clang/include/clang/Tooling/Refactoring/Rename/USRLocFinder.h b/clang/include/clang/Tooling/Refactoring/Rename/USRLocFinder.h index 7a7dd76c4238..c3ffb4421e00 100644 --- a/clang/include/clang/Tooling/Refactoring/Rename/USRLocFinder.h +++ b/clang/include/clang/Tooling/Refactoring/Rename/USRLocFinder.h @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_RENAME_USR_LOC_FINDER_H -#define LLVM_CLANG_TOOLING_REFACTOR_RENAME_USR_LOC_FINDER_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_RENAME_USRLOCFINDER_H +#define LLVM_CLANG_TOOLING_REFACTORING_RENAME_USRLOCFINDER_H #include "clang/AST/AST.h" #include "clang/Tooling/Core/Replacement.h" @@ -49,4 +49,4 @@ SymbolOccurrences getOccurrencesOfUSRs(ArrayRef USRs, } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_RENAME_USR_LOC_FINDER_H +#endif // LLVM_CLANG_TOOLING_REFACTORING_RENAME_USRLOCFINDER_H diff --git a/clang/include/clang/Tooling/Syntax/BuildTree.h b/clang/include/clang/Tooling/Syntax/BuildTree.h index 3c8dd8ceed09..d6235797fd7a 100644 --- a/clang/include/clang/Tooling/Syntax/BuildTree.h +++ b/clang/include/clang/Tooling/Syntax/BuildTree.h @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// // Functions to construct a syntax tree from an AST. //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H -#define LLVM_CLANG_TOOLING_SYNTAX_TREE_H +#ifndef LLVM_CLANG_TOOLING_SYNTAX_BUILDTREE_H +#define LLVM_CLANG_TOOLING_SYNTAX_BUILDTREE_H #include "clang/AST/Decl.h" #include "clang/Basic/TokenKinds.h" diff --git a/clang/include/clang/Tooling/Syntax/Tree.h b/clang/include/clang/Tooling/Syntax/Tree.h index b92e92305417..a3e0a2f598f8 100644 --- a/clang/include/clang/Tooling/Syntax/Tree.h +++ b/clang/include/clang/Tooling/Syntax/Tree.h @@ -18,8 +18,8 @@ // This is still work in progress and highly experimental, we leave room for // ourselves to completely change the design and/or implementation. //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H -#define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H +#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H +#define LLVM_CLANG_TOOLING_SYNTAX_TREE_H #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" diff --git a/clang/include/clang/Tooling/Transformer/MatchConsumer.h b/clang/include/clang/Tooling/Transformer/MatchConsumer.h index cb0a5f684b7d..fb57dabb0a6f 100644 --- a/clang/include/clang/Tooling/Transformer/MatchConsumer.h +++ b/clang/include/clang/Tooling/Transformer/MatchConsumer.h @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_MATCH_CONSUMER_H_ -#define LLVM_CLANG_TOOLING_TRANSFORMER_MATCH_CONSUMER_H_ +#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_MATCHCONSUMER_H +#define LLVM_CLANG_TOOLING_TRANSFORMER_MATCHCONSUMER_H #include "clang/AST/ASTTypeTraits.h" #include "clang/ASTMatchers/ASTMatchFinder.h" @@ -100,4 +100,4 @@ llvm::Expected MatchComputation::eval( } } // namespace transformer } // namespace clang -#endif // LLVM_CLANG_TOOLING_TRANSFORMER_MATCH_CONSUMER_H_ +#endif // LLVM_CLANG_TOOLING_TRANSFORMER_MATCHCONSUMER_H diff --git a/clang/include/clang/Tooling/Transformer/Parsing.h b/clang/include/clang/Tooling/Transformer/Parsing.h index b143f63d8ca8..177eca6a044d 100644 --- a/clang/include/clang/Tooling/Transformer/Parsing.h +++ b/clang/include/clang/Tooling/Transformer/Parsing.h @@ -13,8 +13,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_PARSING_H_ -#define LLVM_CLANG_TOOLING_REFACTOR_PARSING_H_ +#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_PARSING_H +#define LLVM_CLANG_TOOLING_TRANSFORMER_PARSING_H #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Basic/SourceLocation.h" @@ -37,4 +37,4 @@ llvm::Expected parseRangeSelector(llvm::StringRef Input); } // namespace transformer } // namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_PARSING_H_ +#endif // LLVM_CLANG_TOOLING_TRANSFORMER_PARSING_H diff --git a/clang/include/clang/Tooling/Transformer/RangeSelector.h b/clang/include/clang/Tooling/Transformer/RangeSelector.h index 38ec24efec65..1e288043f0a8 100644 --- a/clang/include/clang/Tooling/Transformer/RangeSelector.h +++ b/clang/include/clang/Tooling/Transformer/RangeSelector.h @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_RANGE_SELECTOR_H_ -#define LLVM_CLANG_TOOLING_REFACTOR_RANGE_SELECTOR_H_ +#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_RANGESELECTOR_H +#define LLVM_CLANG_TOOLING_TRANSFORMER_RANGESELECTOR_H #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Basic/SourceLocation.h" @@ -105,4 +105,4 @@ RangeSelector expansion(RangeSelector S); } // namespace transformer } // namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_RANGE_SELECTOR_H_ +#endif // LLVM_CLANG_TOOLING_TRANSFORMER_RANGESELECTOR_H diff --git a/clang/include/clang/Tooling/Transformer/RewriteRule.h b/clang/include/clang/Tooling/Transformer/RewriteRule.h index ac93db8446df..6b14861e92d7 100644 --- a/clang/include/clang/Tooling/Transformer/RewriteRule.h +++ b/clang/include/clang/Tooling/Transformer/RewriteRule.h @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_REWRITE_RULE_H_ -#define LLVM_CLANG_TOOLING_TRANSFORMER_REWRITE_RULE_H_ +#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_REWRITERULE_H +#define LLVM_CLANG_TOOLING_TRANSFORMER_REWRITERULE_H #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" @@ -450,4 +450,4 @@ findSelectedCase(const ast_matchers::MatchFinder::MatchResult &Result, } // namespace transformer } // namespace clang -#endif // LLVM_CLANG_TOOLING_TRANSFORMER_REWRITE_RULE_H_ +#endif // LLVM_CLANG_TOOLING_TRANSFORMER_REWRITERULE_H diff --git a/clang/include/clang/Tooling/Transformer/SourceCode.h b/clang/include/clang/Tooling/Transformer/SourceCode.h index 2c7eb65371cf..16411b9c398d 100644 --- a/clang/include/clang/Tooling/Transformer/SourceCode.h +++ b/clang/include/clang/Tooling/Transformer/SourceCode.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_SOURCE_CODE_H -#define LLVM_CLANG_TOOLING_TRANSFORMER_SOURCE_CODE_H +#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_SOURCECODE_H +#define LLVM_CLANG_TOOLING_TRANSFORMER_SOURCECODE_H #include "clang/AST/ASTContext.h" #include "clang/Basic/SourceLocation.h" @@ -100,4 +100,4 @@ getRangeForEdit(const CharSourceRange &EditRange, const ASTContext &Context) { } } // namespace tooling } // namespace clang -#endif // LLVM_CLANG_TOOLING_TRANSFORMER_SOURCE_CODE_H +#endif // LLVM_CLANG_TOOLING_TRANSFORMER_SOURCECODE_H diff --git a/clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h b/clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h index 6c79a7588f28..b6d9bd0e2d5d 100644 --- a/clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h +++ b/clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h @@ -11,8 +11,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_SOURCE_CODE_BUILDERS_H_ -#define LLVM_CLANG_TOOLING_TRANSFORMER_SOURCE_CODE_BUILDERS_H_ +#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_SOURCECODEBUILDERS_H +#define LLVM_CLANG_TOOLING_TRANSFORMER_SOURCECODEBUILDERS_H #include "clang/AST/ASTContext.h" #include "clang/AST/Expr.h" @@ -83,4 +83,4 @@ llvm::Optional buildArrow(const Expr &E, } // namespace tooling } // namespace clang -#endif // LLVM_CLANG_TOOLING_TRANSFORMER_SOURCE_CODE_BUILDERS_H_ +#endif // LLVM_CLANG_TOOLING_TRANSFORMER_SOURCECODEBUILDERS_H From 8de2d06251c30751bdc0fd7b89133610797759e6 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Wed, 29 Dec 2021 16:22:26 +0800 Subject: [PATCH 175/992] [clang] Fix crash in bug52905 The root cause for the crash is the incorrect use of `cast`. The actual type and cast-to type is different. This patch fixes the crash by converting the `cast` to `dyn_cast`. --- clang/lib/Sema/SemaOverload.cpp | 7 +++--- clang/test/SemaTemplate/constraints.cpp | 32 +++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 42b1340f9a65..a268837b3cc8 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -14322,8 +14322,7 @@ ExprResult Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE, FoundDecl = MemExpr->getFoundDecl(); Qualifier = MemExpr->getQualifier(); UnbridgedCasts.restore(); - } else { - UnresolvedMemberExpr *UnresExpr = cast(NakedMemExpr); + } else if (auto *UnresExpr = dyn_cast(NakedMemExpr)) { Qualifier = UnresExpr->getQualifier(); QualType ObjectType = UnresExpr->getBaseType(); @@ -14436,7 +14435,9 @@ ExprResult Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE, } MemExpr = cast(MemExprE->IgnoreParens()); - } + } else + // Unimaged NakedMemExpr type. + return ExprError(); QualType ResultType = Method->getReturnType(); ExprValueKind VK = Expr::getValueKindForType(ResultType); diff --git a/clang/test/SemaTemplate/constraints.cpp b/clang/test/SemaTemplate/constraints.cpp index 0bc4727245f6..e2bb6552fdb1 100644 --- a/clang/test/SemaTemplate/constraints.cpp +++ b/clang/test/SemaTemplate/constraints.cpp @@ -24,3 +24,35 @@ namespace PR45589 { // FIXME: These diagnostics are excessive. static_assert(test == 1); // expected-note 2{{while}} expected-note 2{{during}} } + +namespace PR52905 { +// A mock for std::convertible_to. Not complete support. +template +concept convertible_to = __is_convertible_to(_From, _To); // expected-note {{evaluated to false}} + +template +class A { +public: + using iterator = void **; + + iterator begin(); + const iterator begin() const; +}; + +template +concept Beginable1 = requires(T t) { + { t.begin } + ->convertible_to; // expected-note {{not satisfied}} +}; + +static_assert(Beginable1>); // expected-error {{static_assert failed}} + // expected-note@-1 {{does not satisfy 'Beginable1'}} + +template +concept Beginable2 = requires(T t) { + { t.begin() } + ->convertible_to; +}; + +static_assert(Beginable2>); +} // namespace PR52905 From caa7e765e5ae250c67eab3edd7cd324d3634f779 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 21 Dec 2021 13:59:14 +0100 Subject: [PATCH 176/992] [lldb] Make ProcessLauncherPosixFork (mostly) async-signal-safe Multithreaded applications using fork(2) need to be extra careful about what they do in the fork child. Without any special precautions (which only really work if you can fully control all threads) they can only safely call async-signal-safe functions. This is because the forked child will contain snapshot of the parents memory at a random moment in the execution of all of the non-forking threads (this is where the similarity with signals comes in). For example, the other threads could have been holding locks that can now never be released in the child process and any attempt to obtain them would block. This is what sometimes happen when using tcmalloc -- our fork child ends up hanging in the memory allocation routine. It is also what happened with our logging code, which is why we added a pthread_atfork hackaround. This patch implements a proper fix to the problem, by which is to make the child code async-signal-safe. The ProcessLaunchInfo structure is transformed into a simpler ForkLaunchInfo representation, one which can be read without allocating memory and invoking complex library functions. Strictly speaking this implementation is not async-signal-safe, as it still invokes library functions outside of the posix-blessed set of entry points. Strictly adhering to the spec would mean reimplementing a lot of the functionality in pure C, so instead I rely on the fact that any reasonable implementation of some functions (e.g., basic_string::c_str()) will not start allocating memory or doing other unsafe things. The new child code does not call into our logging infrastructure, which enables us to remove the pthread_atfork call from there. Differential Revision: https://reviews.llvm.org/D116165 --- lldb/include/lldb/Utility/Log.h | 2 - .../Host/posix/ProcessLauncherPosixFork.cpp | 159 ++++++++++++------ lldb/source/Utility/Log.cpp | 12 -- 3 files changed, 107 insertions(+), 66 deletions(-) diff --git a/lldb/include/lldb/Utility/Log.h b/lldb/include/lldb/Utility/Log.h index 01edec044565..2684783939bd 100644 --- a/lldb/include/lldb/Utility/Log.h +++ b/lldb/include/lldb/Utility/Log.h @@ -211,8 +211,6 @@ class Log final { static uint32_t GetFlags(llvm::raw_ostream &stream, const ChannelMap::value_type &entry, llvm::ArrayRef categories); - static void DisableLoggingChild(); - Log(const Log &) = delete; void operator=(const Log &) = delete; }; diff --git a/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp b/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp index 2f08b9fa8857..635dbb14a027 100644 --- a/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp +++ b/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp @@ -38,43 +38,40 @@ using namespace lldb; using namespace lldb_private; -static void FixupEnvironment(Environment &env) { -#ifdef __ANDROID__ - // If there is no PATH variable specified inside the environment then set the - // path to /system/bin. It is required because the default path used by - // execve() is wrong on android. - env.try_emplace("PATH", "/system/bin"); -#endif +// Begin code running in the child process +// NB: This code needs to be async-signal safe, since we're invoking fork from +// multithreaded contexts. + +static void write_string(int error_fd, const char *str) { + int r = write(error_fd, str, strlen(str)); + (void)r; } [[noreturn]] static void ExitWithError(int error_fd, const char *operation) { int err = errno; - llvm::raw_fd_ostream os(error_fd, true); - os << operation << " failed: " << llvm::sys::StrError(err); - os.flush(); + write_string(error_fd, operation); + write_string(error_fd, " failed: "); + // strerror is not guaranteed to be async-signal safe, but it usually is. + write_string(error_fd, strerror(err)); _exit(1); } -static void DisableASLRIfRequested(int error_fd, const ProcessLaunchInfo &info) { +static void DisableASLR(int error_fd) { #if defined(__linux__) - if (info.GetFlags().Test(lldb::eLaunchFlagDisableASLR)) { - const unsigned long personality_get_current = 0xffffffff; - int value = personality(personality_get_current); - if (value == -1) - ExitWithError(error_fd, "personality get"); - - value = personality(ADDR_NO_RANDOMIZE | value); - if (value == -1) - ExitWithError(error_fd, "personality set"); - } + const unsigned long personality_get_current = 0xffffffff; + int value = personality(personality_get_current); + if (value == -1) + ExitWithError(error_fd, "personality get"); + + value = personality(ADDR_NO_RANDOMIZE | value); + if (value == -1) + ExitWithError(error_fd, "personality set"); #endif } -static void DupDescriptor(int error_fd, const FileSpec &file_spec, int fd, - int flags) { - int target_fd = llvm::sys::RetryAfterSignal(-1, ::open, - file_spec.GetCString(), flags, 0666); +static void DupDescriptor(int error_fd, const char *file, int fd, int flags) { + int target_fd = llvm::sys::RetryAfterSignal(-1, ::open, file, flags, 0666); if (target_fd == -1) ExitWithError(error_fd, "DupDescriptor-open"); @@ -88,44 +85,67 @@ static void DupDescriptor(int error_fd, const FileSpec &file_spec, int fd, ::close(target_fd); } -[[noreturn]] static void ChildFunc(int error_fd, - const ProcessLaunchInfo &info) { - if (info.GetFlags().Test(eLaunchFlagLaunchInSeparateProcessGroup)) { +namespace { +struct ForkFileAction { + ForkFileAction(const FileAction &act); + + FileAction::Action action; + int fd; + std::string path; + int arg; +}; + +struct ForkLaunchInfo { + ForkLaunchInfo(const ProcessLaunchInfo &info); + + bool separate_process_group; + bool debug; + bool disable_aslr; + std::string wd; + const char **argv; + Environment::Envp envp; + std::vector actions; + + bool has_action(int fd) const { + for (const ForkFileAction &action : actions) { + if (action.fd == fd) + return true; + } + return false; + } +}; +} // namespace + +[[noreturn]] static void ChildFunc(int error_fd, const ForkLaunchInfo &info) { + if (info.separate_process_group) { if (setpgid(0, 0) != 0) ExitWithError(error_fd, "setpgid"); } - for (size_t i = 0; i < info.GetNumFileActions(); ++i) { - const FileAction &action = *info.GetFileActionAtIndex(i); - switch (action.GetAction()) { + for (const ForkFileAction &action : info.actions) { + switch (action.action) { case FileAction::eFileActionClose: - if (close(action.GetFD()) != 0) + if (close(action.fd) != 0) ExitWithError(error_fd, "close"); break; case FileAction::eFileActionDuplicate: - if (dup2(action.GetFD(), action.GetActionArgument()) == -1) + if (dup2(action.fd, action.arg) == -1) ExitWithError(error_fd, "dup2"); break; case FileAction::eFileActionOpen: - DupDescriptor(error_fd, action.GetFileSpec(), action.GetFD(), - action.GetActionArgument()); + DupDescriptor(error_fd, action.path.c_str(), action.fd, action.arg); break; case FileAction::eFileActionNone: break; } } - const char **argv = info.GetArguments().GetConstArgumentVector(); - // Change working directory - if (info.GetWorkingDirectory() && - 0 != ::chdir(info.GetWorkingDirectory().GetCString())) + if (!info.wd.empty() && 0 != ::chdir(info.wd.c_str())) ExitWithError(error_fd, "chdir"); - DisableASLRIfRequested(error_fd, info); - Environment env = info.GetEnvironment(); - FixupEnvironment(env); - Environment::Envp envp = env.getEnvp(); + if (info.disable_aslr) + DisableASLR(error_fd); // Clear the signal mask to prevent the child from being affected by any // masking done by the parent. @@ -134,7 +154,7 @@ static void DupDescriptor(int error_fd, const FileSpec &file_spec, int fd, pthread_sigmask(SIG_SETMASK, &set, nullptr) != 0) ExitWithError(error_fd, "pthread_sigmask"); - if (info.GetFlags().Test(eLaunchFlagDebug)) { + if (info.debug) { // Do not inherit setgid powers. if (setgid(getgid()) != 0) ExitWithError(error_fd, "setgid"); @@ -143,6 +163,8 @@ static void DupDescriptor(int error_fd, const FileSpec &file_spec, int fd, // Close everything besides stdin, stdout, and stderr that has no file // action to avoid leaking. Only do this when debugging, as elsewhere we // actually rely on passing open descriptors to child processes. + // NB: This code is not async-signal safe, but we currently do not launch + // processes for debugging from within multithreaded contexts. const llvm::StringRef proc_fd_path = "/proc/self/fd"; std::error_code ec; @@ -157,7 +179,7 @@ static void DupDescriptor(int error_fd, const FileSpec &file_spec, int fd, // Don't close first three entries since they are stdin, stdout and // stderr. - if (fd > 2 && !info.GetFileActionForFD(fd) && fd != error_fd) + if (fd > 2 && !info.has_action(fd) && fd != error_fd) files_to_close.push_back(fd); } for (int file_to_close : files_to_close) @@ -166,7 +188,7 @@ static void DupDescriptor(int error_fd, const FileSpec &file_spec, int fd, // Since /proc/self/fd didn't work, trying the slow way instead. int max_fd = sysconf(_SC_OPEN_MAX); for (int fd = 3; fd < max_fd; ++fd) - if (!info.GetFileActionForFD(fd) && fd != error_fd) + if (!info.has_action(fd) && fd != error_fd) close(fd); } @@ -176,7 +198,7 @@ static void DupDescriptor(int error_fd, const FileSpec &file_spec, int fd, } // Execute. We should never return... - execve(argv[0], const_cast(argv), envp); + execve(info.argv[0], const_cast(info.argv), info.envp); #if defined(__linux__) if (errno == ETXTBSY) { @@ -189,7 +211,7 @@ static void DupDescriptor(int error_fd, const FileSpec &file_spec, int fd, // Since this state should clear up quickly, wait a while and then give it // one more go. usleep(50000); - execve(argv[0], const_cast(argv), envp); + execve(info.argv[0], const_cast(info.argv), info.envp); } #endif @@ -198,12 +220,43 @@ static void DupDescriptor(int error_fd, const FileSpec &file_spec, int fd, ExitWithError(error_fd, "execve"); } +// End of code running in the child process. + +ForkFileAction::ForkFileAction(const FileAction &act) + : action(act.GetAction()), fd(act.GetFD()), path(act.GetPath().str()), + arg(act.GetActionArgument()) {} + +static std::vector +MakeForkActions(const ProcessLaunchInfo &info) { + std::vector result; + for (size_t i = 0; i < info.GetNumFileActions(); ++i) + result.emplace_back(*info.GetFileActionAtIndex(i)); + return result; +} + +static Environment::Envp FixupEnvironment(Environment env) { +#ifdef __ANDROID__ + // If there is no PATH variable specified inside the environment then set the + // path to /system/bin. It is required because the default path used by + // execve() is wrong on android. + env.try_emplace("PATH", "/system/bin"); +#endif + return env.getEnvp(); +} + +ForkLaunchInfo::ForkLaunchInfo(const ProcessLaunchInfo &info) + : separate_process_group( + info.GetFlags().Test(eLaunchFlagLaunchInSeparateProcessGroup)), + debug(info.GetFlags().Test(eLaunchFlagDebug)), + disable_aslr(info.GetFlags().Test(eLaunchFlagDisableASLR)), + wd(info.GetWorkingDirectory().GetPath()), + argv(info.GetArguments().GetConstArgumentVector()), + envp(FixupEnvironment(info.GetEnvironment())), + actions(MakeForkActions(info)) {} + HostProcess ProcessLauncherPosixFork::LaunchProcess(const ProcessLaunchInfo &launch_info, Status &error) { - char exe_path[PATH_MAX]; - launch_info.GetExecutableFile().GetPath(exe_path, sizeof(exe_path)); - // A pipe used by the child process to report errors. PipePosix pipe; const bool child_processes_inherit = false; @@ -211,6 +264,8 @@ ProcessLauncherPosixFork::LaunchProcess(const ProcessLaunchInfo &launch_info, if (error.Fail()) return HostProcess(); + const ForkLaunchInfo fork_launch_info(launch_info); + ::pid_t pid = ::fork(); if (pid == -1) { // Fork failed @@ -221,7 +276,7 @@ ProcessLauncherPosixFork::LaunchProcess(const ProcessLaunchInfo &launch_info, if (pid == 0) { // child process pipe.CloseReadFileDescriptor(); - ChildFunc(pipe.ReleaseWriteFileDescriptor(), launch_info); + ChildFunc(pipe.ReleaseWriteFileDescriptor(), fork_launch_info); } // parent process diff --git a/lldb/source/Utility/Log.cpp b/lldb/source/Utility/Log.cpp index ff654ec93e78..26070d0740b1 100644 --- a/lldb/source/Utility/Log.cpp +++ b/lldb/source/Utility/Log.cpp @@ -30,7 +30,6 @@ #include #else #include -#include #endif using namespace lldb_private; @@ -180,9 +179,6 @@ void Log::Warning(const char *format, ...) { } void Log::Initialize() { -#ifdef LLVM_ON_UNIX - pthread_atfork(nullptr, nullptr, &Log::DisableLoggingChild); -#endif InitializeLldbChannel(); } @@ -346,11 +342,3 @@ void Log::Format(llvm::StringRef file, llvm::StringRef function, message << payload << "\n"; WriteMessage(message.str()); } - -void Log::DisableLoggingChild() { - // Disable logging by clearing out the atomic variable after forking -- if we - // forked while another thread held the channel mutex, we would deadlock when - // trying to write to the log. - for (auto &c: *g_channel_map) - c.second.m_channel.log_ptr.store(nullptr, std::memory_order_relaxed); -} From daed4797fee4a5f1985388265f5af209b5cb3b10 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Wed, 29 Dec 2021 10:00:00 +0100 Subject: [PATCH 177/992] [lldb] Adjust TestModuleCacheSimple for D115951 Now that we are caching the dwarf index as well, we will always have more than one cache file (when not using accelerator tables). I have adjusted the test to check for the presence of one _symtab_ index. --- .../module_cache/simple_exe/TestModuleCacheSimple.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lldb/test/API/functionalities/module_cache/simple_exe/TestModuleCacheSimple.py b/lldb/test/API/functionalities/module_cache/simple_exe/TestModuleCacheSimple.py index 35e96fb584ed..6180203c4bb3 100644 --- a/lldb/test/API/functionalities/module_cache/simple_exe/TestModuleCacheSimple.py +++ b/lldb/test/API/functionalities/module_cache/simple_exe/TestModuleCacheSimple.py @@ -26,7 +26,8 @@ def setUp(self): def get_module_cache_files(self, basename): - module_file_glob = os.path.join(self.cache_dir, "llvmcache-*%s*" % (basename)) + module_file_glob = os.path.join(self.cache_dir, + "llvmcache-*%s*-symtab-*" % (basename)) return glob.glob(module_file_glob) # Doesn't depend on any specific debug information. From 633b002944b966ddb64c85f4a8c017a858afb4fc Mon Sep 17 00:00:00 2001 From: PoYao Chang Date: Mon, 27 Dec 2021 12:34:23 +0800 Subject: [PATCH 178/992] [lldb] Fix PR52702 by fixing bool conversion of Mangled Remove the Mangled::operator! and Mangled::operator void* where the comments in header and implementation files disagree and replace them with operator bool. This fix PR52702 as https://reviews.llvm.org/D106837 used the buggy Mangled::operator! in Symbol::SynthesizeNameIfNeeded. For example, consider the symbol "puts" in a hello world C program: // Inside Symbol::SynthesizeNameIfNeeded (lldb) p m_mangled (lldb_private::Mangled) $0 = (m_mangled = None, m_demangled = "puts") (lldb) p !m_mangled (bool) $1 = true # should be false!! This leads to Symbol::SynthesizeNameIfNeeded overwriting m_demangled part of Mangled (in this case "puts"). In conclusion, this patch turns callq 0x401030 ; symbol stub for: ___lldb_unnamed_symbol36 back into callq 0x401030 ; symbol stub for: puts . Differential Revision: https://reviews.llvm.org/D116217 --- lldb/include/lldb/Core/Mangled.h | 28 ++++--------------- lldb/source/Core/Mangled.cpp | 14 ++-------- .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 3 +- lldb/unittests/Core/MangledTest.cpp | 19 +++++++++++++ 4 files changed, 29 insertions(+), 35 deletions(-) diff --git a/lldb/include/lldb/Core/Mangled.h b/lldb/include/lldb/Core/Mangled.h index 6c92591a0881..35705b0319ab 100644 --- a/lldb/include/lldb/Core/Mangled.h +++ b/lldb/include/lldb/Core/Mangled.h @@ -72,10 +72,10 @@ class Mangled { return !(*this == rhs); } - /// Convert to pointer operator. + /// Convert to bool operator. /// - /// This allows code to check a Mangled object to see if it contains a valid - /// mangled name using code such as: + /// This allows code to check any Mangled objects to see if they contain + /// anything valid using code such as: /// /// \code /// Mangled mangled(...); @@ -84,25 +84,9 @@ class Mangled { /// \endcode /// /// \return - /// A pointer to this object if either the mangled or unmangled - /// name is set, NULL otherwise. - operator void *() const; - - /// Logical NOT operator. - /// - /// This allows code to check a Mangled object to see if it contains an - /// empty mangled name using code such as: - /// - /// \code - /// Mangled mangled(...); - /// if (!mangled) - /// { ... - /// \endcode - /// - /// \return - /// Returns \b true if the object has an empty mangled and - /// unmangled name, \b false otherwise. - bool operator!() const; + /// Returns \b true if either the mangled or unmangled name is set, + /// \b false if the object has an empty mangled and unmangled name. + explicit operator bool() const; /// Clear the mangled and demangled values. void Clear(); diff --git a/lldb/source/Core/Mangled.cpp b/lldb/source/Core/Mangled.cpp index c8aacdefefa2..4e10324401dc 100644 --- a/lldb/source/Core/Mangled.cpp +++ b/lldb/source/Core/Mangled.cpp @@ -70,23 +70,13 @@ Mangled::Mangled(llvm::StringRef name) { SetValue(ConstString(name)); } -// Convert to pointer operator. This allows code to check any Mangled objects +// Convert to bool operator. This allows code to check any Mangled objects // to see if they contain anything valid using code such as: // // Mangled mangled(...); // if (mangled) // { ... -Mangled::operator void *() const { - return (m_mangled) ? const_cast(this) : nullptr; -} - -// Logical NOT operator. This allows code to check any Mangled objects to see -// if they are invalid using code such as: -// -// Mangled mangled(...); -// if (!file_spec) -// { ... -bool Mangled::operator!() const { return !m_mangled; } +Mangled::operator bool() const { return m_mangled || m_demangled; } // Clear the mangled and demangled values. void Mangled::Clear() { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 8c995ef2eb2a..ca701c6f2fcc 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -2140,7 +2140,8 @@ void SymbolFileDWARF::FindGlobalVariables( llvm::StringRef basename; llvm::StringRef context; - bool name_is_mangled = (bool)Mangled(name); + bool name_is_mangled = Mangled::GetManglingScheme(name.GetStringRef()) != + Mangled::eManglingSchemeNone; if (!CPlusPlusLanguage::ExtractContextAndIdentifier(name.GetCString(), context, basename)) diff --git a/lldb/unittests/Core/MangledTest.cpp b/lldb/unittests/Core/MangledTest.cpp index 4c1bb0cc45c2..284c2f21aadd 100644 --- a/lldb/unittests/Core/MangledTest.cpp +++ b/lldb/unittests/Core/MangledTest.cpp @@ -89,6 +89,25 @@ TEST(MangledTest, EmptyForInvalidDLangName) { EXPECT_STREQ("", the_demangled.GetCString()); } +TEST(MangledTest, BoolConversionOperator) { + { + ConstString MangledName("_ZN1a1b1cIiiiEEvm"); + Mangled TheMangled(MangledName); + EXPECT_EQ(true, bool(TheMangled)); + EXPECT_EQ(false, !TheMangled); + } + { + ConstString UnmangledName("puts"); + Mangled TheMangled(UnmangledName); + EXPECT_EQ(true, bool(TheMangled)); + EXPECT_EQ(false, !TheMangled); + } + { + Mangled TheMangled{}; + EXPECT_EQ(false, bool(TheMangled)); + EXPECT_EQ(true, !TheMangled); + } +} TEST(MangledTest, NameIndexes_FindFunctionSymbols) { SubsystemRAII From fdd741dd31814d3d4e5c29185f27a529943050d2 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Wed, 29 Dec 2021 10:54:46 +0100 Subject: [PATCH 179/992] [lldb/linux] Fix a bug in wait status handling The MonitorCallback function was assuming that the "exited" argument is set whenever a thread exits, but the caller was only setting that flag for the main thread. This patch deletes the argument altogether, and lets MonitorCallback compute what it needs itself. This is almost NFC, since previously we would end up in the "GetSignalInfo failed for unknown reasons" branch, which was doing the same thing -- forgetting about the thread. --- .../Process/Linux/NativeProcessLinux.cpp | 22 +++++++------------ .../Process/Linux/NativeProcessLinux.h | 5 ++--- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp index d7651ce71da0..8f5496d9f4e5 100644 --- a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp @@ -426,8 +426,7 @@ Status NativeProcessLinux::SetDefaultPtraceOpts(lldb::pid_t pid) { } // Handles all waitpid events from the inferior process. -void NativeProcessLinux::MonitorCallback(lldb::pid_t pid, bool exited, - WaitStatus status) { +void NativeProcessLinux::MonitorCallback(lldb::pid_t pid, WaitStatus status) { Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS)); // Certain activities differ based on whether the pid is the tid of the main @@ -435,7 +434,7 @@ void NativeProcessLinux::MonitorCallback(lldb::pid_t pid, bool exited, const bool is_main_thread = (pid == GetID()); // Handle when the thread exits. - if (exited) { + if (status.type == WaitStatus::Exit || status.type == WaitStatus::Signal) { LLDB_LOG(log, "got exit status({0}) , tid = {1} ({2} main thread), process " "state = {3}", @@ -485,7 +484,7 @@ void NativeProcessLinux::MonitorCallback(lldb::pid_t pid, bool exited, if (info.si_signo == SIGTRAP) MonitorSIGTRAP(info, *thread_sp); else - MonitorSignal(info, *thread_sp, exited); + MonitorSignal(info, *thread_sp); } else { if (info_err.GetError() == EINVAL) { // This is a group stop reception for this tid. We can reach here if we @@ -753,7 +752,7 @@ void NativeProcessLinux::MonitorSIGTRAP(const siginfo_t &info, default: LLDB_LOG(log, "received unknown SIGTRAP stop event ({0}, pid {1} tid {2}", info.si_code, GetID(), thread.GetID()); - MonitorSignal(info, thread, false); + MonitorSignal(info, thread); break; } } @@ -801,7 +800,7 @@ void NativeProcessLinux::MonitorWatchpoint(NativeThreadLinux &thread, } void NativeProcessLinux::MonitorSignal(const siginfo_t &info, - NativeThreadLinux &thread, bool exited) { + NativeThreadLinux &thread) { const int signo = info.si_signo; const bool is_from_llgs = info.si_pid == getpid(); @@ -1962,16 +1961,11 @@ void NativeProcessLinux::SigchldHandler() { } WaitStatus wait_status = WaitStatus::Decode(status); - bool exited = wait_status.type == WaitStatus::Exit || - (wait_status.type == WaitStatus::Signal && - wait_pid == static_cast<::pid_t>(GetID())); - LLDB_LOG( - log, - "waitpid (-1, &status, _) => pid = {0}, status = {1}, exited = {2}", - wait_pid, wait_status, exited); + LLDB_LOG(log, "waitpid (-1, &status, _) => pid = {0}, status = {1}", + wait_pid, wait_status); - MonitorCallback(wait_pid, exited, wait_status); + MonitorCallback(wait_pid, wait_status); } } diff --git a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.h b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.h index 902afb6aa98b..5d33c4753ca8 100644 --- a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.h +++ b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.h @@ -164,7 +164,7 @@ class NativeProcessLinux : public NativeProcessELF, static Status SetDefaultPtraceOpts(const lldb::pid_t); - void MonitorCallback(lldb::pid_t pid, bool exited, WaitStatus status); + void MonitorCallback(lldb::pid_t pid, WaitStatus status); void WaitForCloneNotification(::pid_t pid); @@ -176,8 +176,7 @@ class NativeProcessLinux : public NativeProcessELF, void MonitorWatchpoint(NativeThreadLinux &thread, uint32_t wp_index); - void MonitorSignal(const siginfo_t &info, NativeThreadLinux &thread, - bool exited); + void MonitorSignal(const siginfo_t &info, NativeThreadLinux &thread); bool HasThreadNoLock(lldb::tid_t thread_id); From 4fedd4be385e6502a7806df87a3cdd02f99bbcba Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Thu, 23 Dec 2021 15:22:46 +0000 Subject: [PATCH 180/992] [AArch64] Remove outdated FIXME in test arm64-csel.ll. NFC. --- llvm/test/CodeGen/AArch64/arm64-csel.ll | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/test/CodeGen/AArch64/arm64-csel.ll b/llvm/test/CodeGen/AArch64/arm64-csel.ll index ec3694f863a9..72ad42beb773 100644 --- a/llvm/test/CodeGen/AArch64/arm64-csel.ll +++ b/llvm/test/CodeGen/AArch64/arm64-csel.ll @@ -106,7 +106,6 @@ define i32 @foo7(i32 %a, i32 %b) nounwind { ; CHECK-NEXT: csel w0, w10, w9, ge ; CHECK-NEXT: ret entry: -; FIXME: Misspelled CHECK-NEXT %sub = sub nsw i32 %a, %b %cmp = icmp sgt i32 %sub, -1 %sub3 = sub nsw i32 0, %sub From 9dc4af327b12dfbcf90fde1641cd649c6814bf98 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 29 Dec 2021 12:07:42 +0100 Subject: [PATCH 181/992] Re-land "[clang] Add early exit when checking for const init of arrays." This reverts commit 6d09aaecdfe51e13fc64d539aa7c9a790de341d7. The test uses ulimit and ran into problems on some bots. Run on linux only. There's nothing platform-specific about the code we're testing, so this should be enough to ensure correctness. --- clang/lib/AST/ExprConstant.cpp | 53 ++++++++++++++----- ...R51712-large-array-constexpr-check-oom.cpp | 17 ++++++ 2 files changed, 57 insertions(+), 13 deletions(-) create mode 100644 clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 469339e8cd62..105cd7a3506d 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -10680,28 +10680,55 @@ bool ArrayExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E, bool HadZeroInit = Value->hasValue(); if (const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(Type)) { - unsigned N = CAT->getSize().getZExtValue(); + unsigned FinalSize = CAT->getSize().getZExtValue(); // Preserve the array filler if we had prior zero-initialization. APValue Filler = HadZeroInit && Value->hasArrayFiller() ? Value->getArrayFiller() : APValue(); - *Value = APValue(APValue::UninitArray(), N, N); - - if (HadZeroInit) - for (unsigned I = 0; I != N; ++I) - Value->getArrayInitializedElt(I) = Filler; + *Value = APValue(APValue::UninitArray(), 0, FinalSize); + if (FinalSize == 0) + return true; - // Initialize the elements. LValue ArrayElt = Subobject; ArrayElt.addArray(Info, E, CAT); - for (unsigned I = 0; I != N; ++I) - if (!VisitCXXConstructExpr(E, ArrayElt, &Value->getArrayInitializedElt(I), - CAT->getElementType()) || - !HandleLValueArrayAdjustment(Info, E, ArrayElt, CAT->getElementType(), - 1)) - return false; + // We do the whole initialization in two passes, first for just one element, + // then for the whole array. It's possible we may find out we can't do const + // init in the first pass, in which case we avoid allocating a potentially + // large array. We don't do more passes because expanding array requires + // copying the data, which is wasteful. + for (const unsigned N : {1u, FinalSize}) { + unsigned OldElts = Value->getArrayInitializedElts(); + if (OldElts == N) + break; + + // Expand the array to appropriate size. + APValue NewValue(APValue::UninitArray(), N, FinalSize); + for (unsigned I = 0; I < OldElts; ++I) + NewValue.getArrayInitializedElt(I).swap( + Value->getArrayInitializedElt(I)); + Value->swap(NewValue); + + if (HadZeroInit) + for (unsigned I = OldElts; I < N; ++I) + Value->getArrayInitializedElt(I) = Filler; + + // Initialize the elements. + for (unsigned I = OldElts; I < N; ++I) { + if (!VisitCXXConstructExpr(E, ArrayElt, + &Value->getArrayInitializedElt(I), + CAT->getElementType()) || + !HandleLValueArrayAdjustment(Info, E, ArrayElt, + CAT->getElementType(), 1)) + return false; + // When checking for const initilization any diagnostic is considered + // an error. + if (Info.EvalStatus.Diag && !Info.EvalStatus.Diag->empty() && + !Info.keepEvaluatingAfterFailure()) + return false; + } + } return true; } diff --git a/clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp b/clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp new file mode 100644 index 000000000000..859b1ac3fb2b --- /dev/null +++ b/clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp @@ -0,0 +1,17 @@ +// Only run this test where ulimit is known to work well. +// (There's nothing really platform-specific being tested, this is just ulimit). +// +// REQUIRES: shell +// REQUIRES: linux +// UNSUPPORTED: msan +// UNSUPPORTED: asan +// +// RUN: ulimit -v 1048576 +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -triple=x86_64 %s +// expected-no-diagnostics + +// This used to require too much memory and crash with OOM. +struct { + int a, b, c, d; +} arr[1<<30]; + From 3ad32df72eb2063ba45ec2956a815e1bbb6e6012 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 29 Dec 2021 13:18:11 +0100 Subject: [PATCH 182/992] Fix lit feature name in 9dc4af327b12d --- clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp b/clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp index 859b1ac3fb2b..f84cab8dc33e 100644 --- a/clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp +++ b/clang/test/SemaCXX/PR51712-large-array-constexpr-check-oom.cpp @@ -2,7 +2,7 @@ // (There's nothing really platform-specific being tested, this is just ulimit). // // REQUIRES: shell -// REQUIRES: linux +// REQUIRES: system-linux // UNSUPPORTED: msan // UNSUPPORTED: asan // From 52e8f58d49e63aaf6f4c1682bb787bcbfd240009 Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Wed, 29 Dec 2021 13:46:15 +0300 Subject: [PATCH 183/992] [SYCL] Diagnose uses of zero length arrays Adds diagnosing on attempt to use zero length arrays, pointers, refs, arrays of them and structs/classes containing all of it. In case a struct/class with zero length array is used this emits a set of notes pointing out how zero length array got into used struct, like this: ``` struct ContainsArr { int A[0]; // note: field of illegal type declared here }; struct Wrapper { ContainsArr F; // note: within field of type ContainsArr declared here // ... } // Device code Wrapper W; W.use(); // error: zero-length arrays are not permitted ``` Total deep check of each used declaration may result in double diagnosing at the same location. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D114080 --- .../clang/Basic/DiagnosticSemaKinds.td | 2 +- clang/include/clang/Sema/Sema.h | 3 + clang/lib/Sema/Sema.cpp | 9 ++ clang/lib/Sema/SemaSYCL.cpp | 98 ++++++++++++++ clang/lib/Sema/SemaType.cpp | 2 +- clang/test/SemaSYCL/zero-length-arrays.cpp | 125 ++++++++++++++++++ 6 files changed, 237 insertions(+), 2 deletions(-) create mode 100644 clang/test/SemaSYCL/zero-length-arrays.cpp diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index f2089bfda04d..8ef9195944d5 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5785,7 +5785,7 @@ def err_typecheck_invalid_restrict_invalid_pointee : Error< def ext_typecheck_zero_array_size : Extension< "zero size arrays are an extension">, InGroup; def err_typecheck_zero_array_size : Error< - "zero-length arrays are not permitted in C++">; + "zero-length arrays are not permitted in %select{C++|SYCL device code}0">; def err_array_size_non_int : Error<"size of array has non-integer type %0">; def err_init_element_not_constant : Error< "initializer element is not a compile-time constant">; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 79834554a50d..6758e7ef2c30 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -13142,6 +13142,9 @@ class Sema final { /// Adds Callee to DeviceCallGraph if we don't know if its caller will be /// codegen'ed yet. bool checkSYCLDeviceFunction(SourceLocation Loc, FunctionDecl *Callee); + void deepTypeCheckForSYCLDevice(SourceLocation UsedAt, + llvm::DenseSet Visited, + ValueDecl *DeclToCheck); }; /// RAII object that enters a new expression evaluation context. diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 734ed0f62ec6..ba69400fdbbf 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1858,6 +1858,15 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { if (isUnevaluatedContext() || Ty.isNull()) return; + // The original idea behind checkTypeSupport function is that unused + // declarations can be replaced with an array of bytes of the same size during + // codegen, such replacement doesn't seem to be possible for types without + // constant byte size like zero length arrays. So, do a deep check for SYCL. + if (D && LangOpts.SYCLIsDevice) { + llvm::DenseSet Visited; + deepTypeCheckForSYCLDevice(Loc, Visited, D); + } + Decl *C = cast(getCurLexicalContext()); // Memcpy operations for structs containing a member with unsupported type diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 815463307ecc..f8c713c8545d 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -48,3 +48,101 @@ bool Sema::checkSYCLDeviceFunction(SourceLocation Loc, FunctionDecl *Callee) { return DiagKind != SemaDiagnosticBuilder::K_Immediate && DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; } + +static bool isZeroSizedArray(Sema &SemaRef, QualType Ty) { + if (const auto *CAT = SemaRef.getASTContext().getAsConstantArrayType(Ty)) + return CAT->getSize() == 0; + return false; +} + +void Sema::deepTypeCheckForSYCLDevice(SourceLocation UsedAt, + llvm::DenseSet Visited, + ValueDecl *DeclToCheck) { + assert(getLangOpts().SYCLIsDevice && + "Should only be called during SYCL compilation"); + // Emit notes only for the first discovered declaration of unsupported type + // to avoid mess of notes. This flag is to track that error already happened. + bool NeedToEmitNotes = true; + + auto Check = [&](QualType TypeToCheck, const ValueDecl *D) { + bool ErrorFound = false; + if (isZeroSizedArray(*this, TypeToCheck)) { + SYCLDiagIfDeviceCode(UsedAt, diag::err_typecheck_zero_array_size) << 1; + ErrorFound = true; + } + // Checks for other types can also be done here. + if (ErrorFound) { + if (NeedToEmitNotes) { + if (auto *FD = dyn_cast(D)) + SYCLDiagIfDeviceCode(FD->getLocation(), + diag::note_illegal_field_declared_here) + << FD->getType()->isPointerType() << FD->getType(); + else + SYCLDiagIfDeviceCode(D->getLocation(), diag::note_declared_at); + } + } + + return ErrorFound; + }; + + // In case we have a Record used do the DFS for a bad field. + SmallVector StackForRecursion; + StackForRecursion.push_back(DeclToCheck); + + // While doing DFS save how we get there to emit a nice set of notes. + SmallVector History; + History.push_back(nullptr); + + do { + const ValueDecl *Next = StackForRecursion.pop_back_val(); + if (!Next) { + assert(!History.empty()); + // Found a marker, we have gone up a level. + History.pop_back(); + continue; + } + QualType NextTy = Next->getType(); + + if (!Visited.insert(NextTy).second) + continue; + + auto EmitHistory = [&]() { + // The first element is always nullptr. + for (uint64_t Index = 1; Index < History.size(); ++Index) { + SYCLDiagIfDeviceCode(History[Index]->getLocation(), + diag::note_within_field_of_type) + << History[Index]->getType(); + } + }; + + if (Check(NextTy, Next)) { + if (NeedToEmitNotes) + EmitHistory(); + NeedToEmitNotes = false; + } + + // In case pointer/array/reference type is met get pointee type, then + // proceed with that type. + while (NextTy->isAnyPointerType() || NextTy->isArrayType() || + NextTy->isReferenceType()) { + if (NextTy->isArrayType()) + NextTy = QualType{NextTy->getArrayElementTypeNoTypeQual(), 0}; + else + NextTy = NextTy->getPointeeType(); + if (Check(NextTy, Next)) { + if (NeedToEmitNotes) + EmitHistory(); + NeedToEmitNotes = false; + } + } + + if (const auto *RecDecl = NextTy->getAsRecordDecl()) { + if (auto *NextFD = dyn_cast(Next)) + History.push_back(NextFD); + // When nullptr is discovered, this means we've gone back up a level, so + // the history should be cleaned. + StackForRecursion.push_back(nullptr); + llvm::copy(RecDecl->fields(), std::back_inserter(StackForRecursion)); + } + } while (!StackForRecursion.empty()); +} diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 7a038301a249..0b3154e6bcb6 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -2515,7 +2515,7 @@ QualType Sema::BuildArrayType(QualType T, ArrayType::ArraySizeModifier ASM, Diag(ArraySize->getBeginLoc(), isSFINAEContext() ? diag::err_typecheck_zero_array_size : diag::ext_typecheck_zero_array_size) - << ArraySize->getSourceRange(); + << 0 << ArraySize->getSourceRange(); } // Is the array too large? diff --git a/clang/test/SemaSYCL/zero-length-arrays.cpp b/clang/test/SemaSYCL/zero-length-arrays.cpp new file mode 100644 index 000000000000..d2b8d767eedc --- /dev/null +++ b/clang/test/SemaSYCL/zero-length-arrays.cpp @@ -0,0 +1,125 @@ +// RUN: %clang_cc1 -fsycl-is-device -triple spir64 -fsyntax-only -verify %s +// +// This test checks if compiler reports compilation error on an attempt to use +// a zero-length array inside device code. + +template +__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { + // expected-note@+1 5{{called by 'kernel}} + kernelFunc(); // #KernelObjCall +} + +typedef float ZEROARR[0]; + +struct Wrapper { + int A; + int BadArray[0]; // expected-note 3{{field of illegal type 'int[0]' declared here}} +}; + +struct WrapperOfWrapper { // expected-error 2{{zero-length arrays are not permitted in SYCL device code}} + Wrapper F; // expected-note 2{{within field of type 'Wrapper' declared here}} + ZEROARR *Ptr; //expected-note 5{{field of illegal pointer type 'ZEROARR *' (aka 'float (*)[0]') declared here}} +}; + +template struct InnerTemplated { + double Array[Size]; // expected-note 8{{field of illegal type 'double[0]' declared here}} +}; + +template struct Templated { + unsigned A; + Ty Arr[Size]; + InnerTemplated Array[Size + 1]; // expected-note 8{{within field of type 'InnerTemplated<0U>[1]' declared here}} +}; + +struct KernelSt { + int A; + int BadArray[0]; // expected-note {{field of illegal type 'int[0]' declared here}} + void operator()() const {} +}; + +WrapperOfWrapper offendingFoo() { + // expected-note@+1 {{called by 'offendingFoo'}} + return WrapperOfWrapper{}; +} + +template +void templatedContext() { + Templated Var; + // expected-error@#KernelObjCall 2{{zero-length arrays are not permitted in SYCL device code}} + // expected-note@#KernelObjCall {{called by 'kernel([=] { + // expected-note@+1 {{within field of type 'Templated<0U, float>' declared here}} + (void)Var; // expected-error 2{{zero-length arrays are not permitted in SYCL device code}} + }); + // expected-error@#KernelObjCall {{zero-length arrays are not permitted in SYCL device code}} + // expected-note@+2 {{in instantiation of function template specialization}} + // expected-note@+1 {{within field of type 'Templated<0U, float>' declared here}} + kernel([Var] { + }); +} + +void foo(const unsigned X) { + int Arr[0]; // expected-note 2{{declared here}} + ZEROARR TypeDef; // expected-note {{declared here}} + ZEROARR *Ptr; // expected-note {{declared here}} + // expected-error@#KernelObjCall 3{{zero-length arrays are not permitted in SYCL device code}} + // expected-note@+1 {{in instantiation of function template specialization}} + kernel([=]() { + (void)Arr; // expected-error {{zero-length arrays are not permitted in SYCL device code}} + (void)TypeDef; // expected-error {{zero-length arrays are not permitted in SYCL device code}} + // expected-note@+1 {{field of illegal pointer type 'ZEROARR *' (aka 'float (*)[0]') declared here}} + (void)Ptr; // expected-error {{zero-length arrays are not permitted in SYCL device code}} + }); + // expected-error@#KernelObjCall {{zero-length arrays are not permitted in SYCL device code}} + // expected-note@+2 {{in instantiation of function template specialization}} + // expected-note@+1 {{field of illegal type 'int[0]' declared here}} + kernel([Arr] { // expected-error {{zero-length arrays are not permitted in SYCL device code}} + }); + WrapperOfWrapper St; + // expected-error@#KernelObjCall 2{{zero-length arrays are not permitted in SYCL device code}} + // expected-note@+1 {{in instantiation of function template specialization}} + kernel([=] { + // expected-note@+1 {{within field of type 'WrapperOfWrapper' declared here}} + (void)St.F.BadArray; // expected-error 4{{zero-length arrays are not permitted in SYCL device code}} + }); + // expected-error@#KernelObjCall 2{{zero-length arrays are not permitted in SYCL device code}} + // expected-note@+2 {{in instantiation of function template specialization}} + // expected-note@+1 {{within field of type 'WrapperOfWrapper' declared here}} + kernel([St] { // expected-error 2{{zero-length arrays are not permitted in SYCL device code}} + }); + + Templated<1, int> OK; + Templated<1 - 1, double> Weirdo; + Templated<0, float> Zero; + // expected-error@#KernelObjCall 4{{zero-length arrays are not permitted in SYCL device code}} + // expected-note@+1 {{in instantiation of function template specialization}} + kernel([=] { + (void)OK; // No errors expected + (void)Zero; // expected-error 2{{zero-length arrays are not permitted in SYCL device code}} + // expected-note@+1 {{within field of type 'Templated<1 - 1, double>' declared here}} + int A = Weirdo.A; // expected-error 2{{zero-length arrays are not permitted in SYCL device code}} + }); + + // expected-note@#KernelObjCall {{called by 'kernel' declared here}} + kernel([Zero] { // expected-error 2{{zero-length arrays are not permitted in SYCL device code}} + }); + + templatedContext<10>(); + // expected-note@+1 2{{in instantiation of function template specialization}} + templatedContext<0>(); + + KernelSt K; + // expected-error@#KernelObjCall {{zero-length arrays are not permitted in SYCL device code}} + // expected-note@+1 {{in instantiation of function template specialization}} + kernel(K); + + // expected-note@#KernelObjCall {{called by 'kernel([=] { + // expected-note@+1 {{called by 'operator()'}} + offendingFoo(); + }); +} From 6d702a1e6a069e22e4b7f679a408e95bcc7db66c Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Wed, 29 Dec 2021 12:38:14 +0000 Subject: [PATCH 184/992] [NewGVN] Prefer poison to undef when ranking operands ping @alinas --- llvm/lib/Transforms/Scalar/NewGVN.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index 10a8742940b1..d0d6fc81e8b6 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -4128,21 +4128,25 @@ bool NewGVN::eliminateInstructions(Function &F) { unsigned int NewGVN::getRank(const Value *V) const { // Prefer constants to undef to anything else // Undef is a constant, have to check it first. + // Prefer poison to undef as it's less defined. // Prefer smaller constants to constantexprs + // Note that the order here matters because of class inheritance if (isa(V)) - return 2; - if (isa(V)) + return 3; + if (isa(V)) return 1; + if (isa(V)) + return 2; if (isa(V)) return 0; - else if (auto *A = dyn_cast(V)) - return 3 + A->getArgNo(); + if (auto *A = dyn_cast(V)) + return 4 + A->getArgNo(); - // Need to shift the instruction DFS by number of arguments + 3 to account for + // Need to shift the instruction DFS by number of arguments + 5 to account for // the constant and argument ranking above. unsigned Result = InstrToDFSNum(V); if (Result > 0) - return 4 + NumFuncArgs + Result; + return 5 + NumFuncArgs + Result; // Unreachable or something else, just return a really large number. return ~0; } From 33fc675e1670939cb3b037bf669ca6d1efa3eb1f Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 29 Dec 2021 05:45:43 -0800 Subject: [PATCH 185/992] [Hexagon] Handle floating point vector loads/stores --- llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 18 +- .../autohvx/vector-load-store-basic.ll | 164 ++++++++++++++++++ 2 files changed, 181 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/vector-load-store-basic.ll diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index 15fa659d26ab..21e703fd5a3c 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -165,12 +165,19 @@ let Predicates = [UseHVX] in { defm: HvxLda_pat; defm: HvxLda_pat; defm: HvxLda_pat; - defm: HvxLd_pat; defm: HvxLd_pat; defm: HvxLd_pat; } +let Predicates = [UseHVXV68] in { + defm: HvxLda_pat; + defm: HvxLda_pat; + defm: HvxLda_pat; + defm: HvxLda_pat; + defm: HvxLd_pat; + defm: HvxLd_pat; +} // HVX stores @@ -214,6 +221,15 @@ let Predicates = [UseHVX] in { defm: HvxSt_pat; } +let Predicates = [UseHVXV68] in { + defm: HvxSt_pat; + defm: HvxSt_pat; + defm: HvxSt_pat; + defm: HvxSt_pat; + defm: HvxSt_pat; + defm: HvxSt_pat; +} + // Bitcasts between same-size vector types are no-ops, except for the // actual type change. let Predicates = [UseHVX] in { diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-load-store-basic.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-load-store-basic.ll new file mode 100644 index 000000000000..3b01f971bad2 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-load-store-basic.ll @@ -0,0 +1,164 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +define void @f0(<128 x i8>* %a0, <128 x i8>* %a1) #0 { +; CHECK-LABEL: f0: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: v0.cur = vmem(r0+#1) +; CHECK-NEXT: vmem(r1+#2) = v0 +; CHECK-NEXT: } + %v0 = getelementptr <128 x i8>, <128 x i8>* %a0, i32 1 + %v1 = load <128 x i8>, <128 x i8>* %v0, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a1, i32 2 + store <128 x i8> %v1, <128 x i8>* %v2, align 128 + ret void +} + +define void @f1(<64 x i16>* %a0, <64 x i16>* %a1) #0 { +; CHECK-LABEL: f1: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: v0.cur = vmem(r0+#1) +; CHECK-NEXT: vmem(r1+#2) = v0 +; CHECK-NEXT: } + %v0 = getelementptr <64 x i16>, <64 x i16>* %a0, i32 1 + %v1 = load <64 x i16>, <64 x i16>* %v0, align 128 + %v2 = getelementptr <64 x i16>, <64 x i16>* %a1, i32 2 + store <64 x i16> %v1, <64 x i16>* %v2, align 128 + ret void +} + +define void @f2(<32 x i32>* %a0, <32 x i32>* %a1) #0 { +; CHECK-LABEL: f2: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: v0.cur = vmem(r0+#1) +; CHECK-NEXT: vmem(r1+#2) = v0 +; CHECK-NEXT: } + %v0 = getelementptr <32 x i32>, <32 x i32>* %a0, i32 1 + %v1 = load <32 x i32>, <32 x i32>* %v0, align 128 + %v2 = getelementptr <32 x i32>, <32 x i32>* %a1, i32 2 + store <32 x i32> %v1, <32 x i32>* %v2, align 128 + ret void +} + +define void @f3(<64 x half>* %a0, <64 x half>* %a1) #0 { +; CHECK-LABEL: f3: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: v0.cur = vmem(r0+#1) +; CHECK-NEXT: vmem(r1+#2) = v0 +; CHECK-NEXT: } + %v0 = getelementptr <64 x half>, <64 x half>* %a0, i32 1 + %v1 = load <64 x half>, <64 x half>* %v0, align 128 + %v2 = getelementptr <64 x half>, <64 x half>* %a1, i32 2 + store <64 x half> %v1, <64 x half>* %v2, align 128 + ret void +} + +define void @f4(<32 x float>* %a0, <32 x float>* %a1) #0 { +; CHECK-LABEL: f4: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: v0.cur = vmem(r0+#1) +; CHECK-NEXT: vmem(r1+#2) = v0 +; CHECK-NEXT: } + %v0 = getelementptr <32 x float>, <32 x float>* %a0, i32 1 + %v1 = load <32 x float>, <32 x float>* %v0, align 128 + %v2 = getelementptr <32 x float>, <32 x float>* %a1, i32 2 + store <32 x float> %v1, <32 x float>* %v2, align 128 + ret void +} + +define void @f5(<128 x i8>* %a0, <128 x i8>* %a1) #0 { +; CHECK-LABEL: f5: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmemu(r0+#1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmemu(r1+#2) = v0 +; CHECK-NEXT: } + %v0 = getelementptr <128 x i8>, <128 x i8>* %a0, i32 1 + %v1 = load <128 x i8>, <128 x i8>* %v0, align 1 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a1, i32 2 + store <128 x i8> %v1, <128 x i8>* %v2, align 1 + ret void +} + +define void @f6(<64 x i16>* %a0, <64 x i16>* %a1) #0 { +; CHECK-LABEL: f6: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmemu(r0+#1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmemu(r1+#2) = v0 +; CHECK-NEXT: } + %v0 = getelementptr <64 x i16>, <64 x i16>* %a0, i32 1 + %v1 = load <64 x i16>, <64 x i16>* %v0, align 1 + %v2 = getelementptr <64 x i16>, <64 x i16>* %a1, i32 2 + store <64 x i16> %v1, <64 x i16>* %v2, align 1 + ret void +} + +define void @f7(<32 x i32>* %a0, <32 x i32>* %a1) #0 { +; CHECK-LABEL: f7: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmemu(r0+#1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmemu(r1+#2) = v0 +; CHECK-NEXT: } + %v0 = getelementptr <32 x i32>, <32 x i32>* %a0, i32 1 + %v1 = load <32 x i32>, <32 x i32>* %v0, align 1 + %v2 = getelementptr <32 x i32>, <32 x i32>* %a1, i32 2 + store <32 x i32> %v1, <32 x i32>* %v2, align 1 + ret void +} + +define void @f8(<64 x half>* %a0, <64 x half>* %a1) #0 { +; CHECK-LABEL: f8: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmemu(r0+#1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmemu(r1+#2) = v0 +; CHECK-NEXT: } + %v0 = getelementptr <64 x half>, <64 x half>* %a0, i32 1 + %v1 = load <64 x half>, <64 x half>* %v0, align 1 + %v2 = getelementptr <64 x half>, <64 x half>* %a1, i32 2 + store <64 x half> %v1, <64 x half>* %v2, align 1 + ret void +} + +define void @f9(<32 x float>* %a0, <32 x float>* %a1) #0 { +; CHECK-LABEL: f9: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmemu(r0+#1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmemu(r1+#2) = v0 +; CHECK-NEXT: } + %v0 = getelementptr <32 x float>, <32 x float>* %a0, i32 1 + %v1 = load <32 x float>, <32 x float>* %v0, align 1 + %v2 = getelementptr <32 x float>, <32 x float>* %a1, i32 2 + store <32 x float> %v1, <32 x float>* %v2, align 1 + ret void +} + +attributes #0 = { nounwind "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" } From 2ce586bc497f7c9cc15a7f7fcba3c7169830119a Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 29 Dec 2021 06:52:24 -0800 Subject: [PATCH 186/992] [Hexagon] Handle floating point splats Co-authored-by: Anirudh Sundar Subramaniam --- llvm/lib/Target/Hexagon/HexagonISelLowering.h | 1 + .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 22 +++++++ llvm/lib/Target/Hexagon/HexagonPatterns.td | 3 + llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 17 +++++- llvm/test/CodeGen/Hexagon/autohvx/fsplat.ll | 57 +++++++++++++++++++ .../CodeGen/Hexagon/autohvx/hfnosplat_cp.ll | 18 ++++++ llvm/test/CodeGen/Hexagon/autohvx/hfsplat.ll | 57 +++++++++++++++++++ llvm/test/CodeGen/Hexagon/autohvx/splat.ll | 31 ++++++++++ 8 files changed, 205 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/fsplat.ll create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/hfnosplat_cp.ll create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/hfsplat.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index fd6aa06ed843..a31a697b7317 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -458,6 +458,7 @@ class HexagonTargetLowering : public TargetLowering { SelectionDAG &DAG) const; SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index e189b0b49e34..28f7c5414a2a 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -96,6 +96,9 @@ HexagonTargetLowering::initializeHVXLowering() { // BUILD_VECTOR with f16 operands cannot be promoted without // promoting the result, so lower the node to vsplat or constant pool setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom); + setOperationAction(ISD::SPLAT_VECTOR, MVT::f16, Custom); + setOperationAction(ISD::SPLAT_VECTOR, MVT::v64f16, Legal); + setOperationAction(ISD::SPLAT_VECTOR, MVT::v32f32, Legal); // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- // independent) handling of it would convert it to a load, which is @@ -1299,6 +1302,24 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) return buildHvxVectorReg(Ops, dl, VecTy, DAG); } +SDValue +HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG) + const { + const SDLoc &dl(Op); + MVT VecTy = ty(Op); + MVT ArgTy = ty(Op.getOperand(0)); + + if (ArgTy == MVT::f16) { + MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements()); + SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0)); + SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16); + SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32); + return DAG.getBitcast(VecTy, Splat); + } + + return SDValue(); +} + SDValue HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const { @@ -2185,6 +2206,7 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { default: break; case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG); + case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG); case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG); case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG); diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 4ba6d4740e12..9b21eb98e2c8 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -257,6 +257,9 @@ def anyimm3: PatLeaf<(i32 AnyImm3:$Addr)>; def f32ImmPred : PatLeaf<(f32 fpimm:$F)>; def f64ImmPred : PatLeaf<(f64 fpimm:$F)>; +def f32zero: PatLeaf<(f32 fpimm:$F), [{ + return N->isExactlyValue(APFloat::getZero(APFloat::IEEEsingle(), false)); +}]>; // This complex pattern is really only to detect various forms of // sign-extension i32->i64. The selected value will be of type i64 diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index 21e703fd5a3c..ad8029687770 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -64,7 +64,7 @@ def HexagonVPACKL: SDNode<"HexagonISD::VPACKL", SDTVecUnaryOp>; def HexagonVUNPACK: SDNode<"HexagonISD::VUNPACK", SDTVecUnaryOp>; def HexagonVUNPACKU: SDNode<"HexagonISD::VUNPACKU", SDTVecUnaryOp>; -def vzero: PatFrag<(ops), (splat_vector (i32 0))>; +def vzero: PatFrags<(ops), [(splat_vector (i32 0)), (splat_vector (f32zero))]>; def qtrue: PatFrag<(ops), (HexagonQTRUE)>; def qfalse: PatFrag<(ops), (HexagonQFALSE)>; def qcat: PatFrag<(ops node:$Qs, node:$Qt), @@ -265,10 +265,13 @@ let Predicates = [UseHVX] in { // These should be preferred over a vsplat of 0. def: Pat<(VecI8 vzero), (V6_vd0)>; def: Pat<(VecI16 vzero), (V6_vd0)>; + def: Pat<(VecF16 vzero), (V6_vd0)>; def: Pat<(VecI32 vzero), (V6_vd0)>; + def: Pat<(VecF32 vzero), (V6_vd0)>; def: Pat<(VecPI8 vzero), (PS_vdd0)>; def: Pat<(VecPI16 vzero), (PS_vdd0)>; def: Pat<(VecPI32 vzero), (PS_vdd0)>; + def: Pat<(VecPF32 vzero), (PS_vdd0)>; def: Pat<(concat_vectors (VecI8 vzero), (VecI8 vzero)), (PS_vdd0)>; def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>; @@ -363,6 +366,18 @@ let Predicates = [UseHVX,UseHVXV62] in { def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (V62splatrw $Rs))>; } } +let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { + let AddedComplexity = 30 in { + def: Pat<(VecF16 (splat_vector u16_0ImmPred:$V)), (V62splatih imm:$V)>; + def: Pat<(VecF32 (splat_vector anyint:$V)), (V62splatiw imm:$V)>; + def: Pat<(VecF32 (splat_vector f32ImmPred:$V)), (V62splatiw (ftoi $V))>; + } + let AddedComplexity = 20 in { + def: Pat<(VecF16 (splat_vector I32:$Rs)), (V62splatrh $Rs)>; + def: Pat<(VecF32 (splat_vector I32:$Rs)), (V62splatrw $Rs)>; + def: Pat<(VecF32 (splat_vector F32:$Rs)), (V62splatrw $Rs)>; + } +} class Vneg1 : PatFrag<(ops), (VecTy (splat_vector (i32 -1)))>; diff --git a/llvm/test/CodeGen/Hexagon/autohvx/fsplat.ll b/llvm/test/CodeGen/Hexagon/autohvx/fsplat.ll new file mode 100644 index 000000000000..f64674bd0e84 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/fsplat.ll @@ -0,0 +1,57 @@ +; RUN: llc -mtriple=hexagon < %s | FileCheck %s + +; Check that the vsplat instruction is generated +; CHECK: r[[V:[0-9]+]] = ##1092616192 +; CHECK: vsplat(r[[V]]) + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" +; Function Attrs: nofree norecurse nounwind writeonly +define dso_local i32 @foo(float* nocapture %0, i32 %1) local_unnamed_addr #0 { + %3 = icmp sgt i32 %1, 0 + br i1 %3, label %4, label %22 + +4: ; preds = %2 + %5 = icmp ult i32 %1, 64 + br i1 %5, label %6, label %9 + +6: ; preds = %20, %4 + %7 = phi float* [ %0, %4 ], [ %11, %20 ] + %8 = phi i32 [ 0, %4 ], [ %10, %20 ] + br label %23 + +9: ; preds = %4 + %10 = and i32 %1, -64 + %11 = getelementptr float, float* %0, i32 %10 + br label %12 + +12: ; preds = %12, %9 + %13 = phi i32 [ 0, %9 ], [ %18, %12 ] + %14 = getelementptr float, float* %0, i32 %13 + %15 = bitcast float* %14 to <32 x float>* + store <32 x float> , <32 x float>* %15, align 4 + %16 = getelementptr float, float* %14, i32 32 + %17 = bitcast float* %16 to <32 x float>* + store <32 x float> , <32 x float>* %17, align 4 + %18 = add i32 %13, 64 + %19 = icmp eq i32 %18, %10 + br i1 %19, label %20, label %12 + +20: ; preds = %12 + %21 = icmp eq i32 %10, %1 + br i1 %21, label %22, label %6 + +22: ; preds = %23, %20, %2 + ret i32 0 + +23: ; preds = %23, %6 + %24 = phi float* [ %28, %23 ], [ %7, %6 ] + %25 = phi i32 [ %26, %23 ], [ %8, %6 ] + store float 1.000000e+01, float* %24, align 4 + %26 = add nuw nsw i32 %25, 1 + %27 = icmp eq i32 %26, %1 + %28 = getelementptr float, float* %24, i32 1 + br i1 %27, label %22, label %23 +} + +attributes #0 = { nofree norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv69" "target-features"="+hvx-length128b,+hvxv69,+v69,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/hfnosplat_cp.ll b/llvm/test/CodeGen/Hexagon/autohvx/hfnosplat_cp.ll new file mode 100644 index 000000000000..d5d3dcbe0737 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/hfnosplat_cp.ll @@ -0,0 +1,18 @@ +; RUN: llc -mtriple=hexagon < %s | FileCheck %s + +; Check that the vsplat instruction is generated +; CHECK: .word 1097875824 +; CHECK: .word 1048133241 +; CHECK: .word 0 + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" +; Function Attrs: nofree norecurse nounwind writeonly +define dso_local i32 @foo(half* nocapture %a) local_unnamed_addr #0 { +vector.body: + %0 = bitcast half* %a to <40 x half>* + store <40 x half> , <40 x half>* %0, align 2 + ret i32 0 +} + +attributes #0 = { nofree norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv69" "target-features"="+hvx-length128b,+hvxv69,+v69,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/hfsplat.ll b/llvm/test/CodeGen/Hexagon/autohvx/hfsplat.ll new file mode 100644 index 000000000000..3a6847876ea5 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/hfsplat.ll @@ -0,0 +1,57 @@ +; RUN: llc -mtriple=hexagon < %s | FileCheck %s + +; Check that the vsplat instruction is generated +; CHECK: r[[V:[0-9]+]] = #16752 +; CHECK: vsplat(r[[V]]) + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" +; Function Attrs: nofree norecurse nounwind writeonly +define dso_local i32 @foo(half* nocapture %0, i32 %1) local_unnamed_addr #0 { + %3 = icmp sgt i32 %1, 0 + br i1 %3, label %4, label %22 + +4: ; preds = %2 + %5 = icmp ult i32 %1, 128 + br i1 %5, label %6, label %9 + +6: ; preds = %20, %4 + %7 = phi half* [ %0, %4 ], [ %11, %20 ] + %8 = phi i32 [ 0, %4 ], [ %10, %20 ] + br label %23 + +9: ; preds = %4 + %10 = and i32 %1, -128 + %11 = getelementptr half, half* %0, i32 %10 + br label %12 + +12: ; preds = %12, %9 + %13 = phi i32 [ 0, %9 ], [ %18, %12 ] + %14 = getelementptr half, half* %0, i32 %13 + %15 = bitcast half* %14 to <64 x half>* + store <64 x half> , <64 x half>* %15, align 2 + %16 = getelementptr half, half* %14, i32 64 + %17 = bitcast half* %16 to <64 x half>* + store <64 x half> , <64 x half>* %17, align 2 + %18 = add i32 %13, 128 + %19 = icmp eq i32 %18, %10 + br i1 %19, label %20, label %12 + +20: ; preds = %12 + %21 = icmp eq i32 %10, %1 + br i1 %21, label %22, label %6 + +22: ; preds = %23, %20, %2 + ret i32 0 + +23: ; preds = %23, %6 + %24 = phi half* [ %28, %23 ], [ %7, %6 ] + %25 = phi i32 [ %26, %23 ], [ %8, %6 ] + store half 0xH4170, half* %24, align 2 + %26 = add nuw nsw i32 %25, 1 + %27 = icmp eq i32 %26, %1 + %28 = getelementptr half, half* %24, i32 1 + br i1 %27, label %22, label %23 +} + +attributes #0 = { nofree norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv69" "target-features"="+hvx-length128b,+hvxv69,+v69,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/splat.ll b/llvm/test/CodeGen/Hexagon/autohvx/splat.ll index c686561f1713..bbea3a21270c 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/splat.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/splat.ll @@ -397,5 +397,36 @@ define <64 x i32> @f23(i32 %a0) #1 { ret <64 x i32> %v1 } +; Splat register, 16 bit fp, v68+ +define <64 x half> @f24(i16 %a0) #2 { +; CHECK-LABEL: f24: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0.h = vsplat(r1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r0+#0) = v0.new +; CHECK-NEXT: } + %v0 = bitcast i16 %a0 to half + %v1 = insertelement <64 x half> undef, half %v0, i32 0 + %v2 = shufflevector <64 x half> %v1, <64 x half> undef, <64 x i32> zeroinitializer + ret <64 x half> %v2 +} + +; Splat register, 32 bit fp, v68+ +define <32 x float> @f25(float %a0) #2 { +; CHECK-LABEL: f25: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vsplat(r1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r0+#0) = v0.new +; CHECK-NEXT: } + %v0 = insertelement <32 x float> undef, float %a0, i32 0 + %v1 = shufflevector <32 x float> %v0, <32 x float> undef, <32 x i32> zeroinitializer + ret <32 x float> %v1 +} + + attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length128b" } attributes #1 = { nounwind readnone "target-cpu"="hexagonv62" "target-features"="+hvxv62,+hvx-length128b" } +attributes #2 = { nounwind readnone "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" } From 680d409561d7523e3a33554f8da7e6bcd3b5232a Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Wed, 29 Dec 2021 15:48:30 +0000 Subject: [PATCH 187/992] [NewGVN] Use poison instead of undef to represent unreachable values This enables more simplifications and gets us closer to removing undef. ping @alinas --- llvm/lib/Transforms/Scalar/NewGVN.cpp | 44 ++++++++++++------- .../NewGVN/2007-07-26-PhiErasure.ll | 4 +- llvm/test/Transforms/NewGVN/assume-equal.ll | 4 +- ...ed-to-existing-value-then-changes-again.ll | 4 +- llvm/test/Transforms/NewGVN/pr31594.ll | 2 +- llvm/test/Transforms/NewGVN/pr31758.ll | 2 +- llvm/test/Transforms/NewGVN/pr32607.ll | 4 +- llvm/test/Transforms/NewGVN/pr32838.ll | 4 +- llvm/test/Transforms/NewGVN/pr32845.ll | 2 +- llvm/test/Transforms/NewGVN/pr33014.ll | 2 +- llvm/test/Transforms/NewGVN/pr33187.ll | 4 +- llvm/test/Transforms/NewGVN/pr33204.ll | 2 +- llvm/test/Transforms/NewGVN/pr33461.ll | 2 +- llvm/test/Transforms/NewGVN/pr33720.ll | 4 +- .../Transforms/NewGVN/verify-memoryphi.ll | 2 +- 15 files changed, 47 insertions(+), 39 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index d0d6fc81e8b6..3558ce3e11a4 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -1322,11 +1322,11 @@ bool NewGVN::someEquivalentDominates(const Instruction *Inst, Value *NewGVN::lookupOperandLeader(Value *V) const { CongruenceClass *CC = ValueToClass.lookup(V); if (CC) { - // Everything in TOP is represented by undef, as it can be any value. + // Everything in TOP is represented by poison, as it can be any value. // We do have to make sure we get the type right though, so we can't set the - // RepLeader to undef. + // RepLeader to poison. if (CC == TOPClass) - return UndefValue::get(V->getType()); + return PoisonValue::get(V->getType()); return CC->getStoredValue() ? CC->getStoredValue() : CC->getLeader(); } @@ -1521,9 +1521,9 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const { return nullptr; Value *LoadAddressLeader = lookupOperandLeader(LI->getPointerOperand()); - // Load of undef is undef. + // Load of undef is UB. if (isa(LoadAddressLeader)) - return createConstantExpression(UndefValue::get(LI->getType())); + return createConstantExpression(PoisonValue::get(LI->getType())); MemoryAccess *OriginalAccess = getMemoryAccess(I); MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(OriginalAccess); @@ -1531,9 +1531,9 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const { if (!MSSA->isLiveOnEntryDef(DefiningAccess)) { if (auto *MD = dyn_cast(DefiningAccess)) { Instruction *DefiningInst = MD->getMemoryInst(); - // If the defining instruction is not reachable, replace with undef. + // If the defining instruction is not reachable, replace with poison. if (!ReachableBlocks.count(DefiningInst->getParent())) - return createConstantExpression(UndefValue::get(LI->getType())); + return createConstantExpression(PoisonValue::get(LI->getType())); // This will handle stores and memory insts. We only do if it the // defining access has a different type, or it is a pointer produced by // certain memory operations that cause the memory to have a fixed value @@ -1722,8 +1722,12 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef PHIOps, // We match the semantics of SimplifyPhiNode from InstructionSimplify here. // See if all arguments are the same. // We track if any were undef because they need special handling. - bool HasUndef = false; + bool HasUndef = false, HasPoison = false; auto Filtered = make_filter_range(E->operands(), [&](Value *Arg) { + if (isa(Arg)) { + HasPoison = true; + return false; + } if (isa(Arg)) { HasUndef = true; return false; @@ -1732,8 +1736,14 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef PHIOps, }); // If we are left with no operands, it's dead. if (Filtered.empty()) { - // If it has undef at this point, it means there are no-non-undef arguments, - // and thus, the value of the phi node must be undef. + // If it has undef or poison at this point, it means there are no-non-undef + // arguments, and thus, the value of the phi node must be undef. + if (HasPoison && !HasUndef) { + LLVM_DEBUG( + dbgs() << "PHI Node " << *I + << " has no non-poison arguments, valuing it as poison\n"); + return createConstantExpression(PoisonValue::get(I->getType())); + } if (HasUndef) { LLVM_DEBUG( dbgs() << "PHI Node " << *I @@ -1758,7 +1768,7 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef PHIOps, // expression to say if one is equivalent to the other. // We also special case undef, so that if we have an undef, we can't use the // common value unless it dominates the phi block. - if (HasUndef) { + if (HasPoison || HasUndef) { // If we have undef and at least one other value, this is really a // multivalued phi, and we need to know if it's cycle free in order to // evaluate whether we can ignore the undef. The other parts of this are @@ -2780,7 +2790,7 @@ NewGVN::makePossiblePHIOfOps(Instruction *I, LLVM_DEBUG(dbgs() << "Skipping phi of ops operand for incoming block " << getBlockName(PredBB) << " because the block is unreachable\n"); - FoundVal = UndefValue::get(I->getType()); + FoundVal = PoisonValue::get(I->getType()); RevisitOnReachabilityChange[PHIBlock].set(InstrToDFSNum(I)); } @@ -3459,7 +3469,7 @@ bool NewGVN::runGVN() { // Delete all instructions marked for deletion. for (Instruction *ToErase : InstructionsToErase) { if (!ToErase->use_empty()) - ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType())); + ToErase->replaceAllUsesWith(PoisonValue::get(ToErase->getType())); assert(ToErase->getParent() && "BB containing ToErase deleted unexpectedly!"); @@ -3677,7 +3687,7 @@ void NewGVN::deleteInstructionsInBlock(BasicBlock *BB) { for (BasicBlock::reverse_iterator I(StartPoint); I != BB->rend();) { Instruction &Inst = *I++; if (!Inst.use_empty()) - Inst.replaceAllUsesWith(UndefValue::get(Inst.getType())); + Inst.replaceAllUsesWith(PoisonValue::get(Inst.getType())); if (isa(Inst)) continue; salvageKnowledge(&Inst, AC); @@ -3687,7 +3697,7 @@ void NewGVN::deleteInstructionsInBlock(BasicBlock *BB) { } // Now insert something that simplifycfg will turn into an unreachable. Type *Int8Ty = Type::getInt8Ty(BB->getContext()); - new StoreInst(UndefValue::get(Int8Ty), + new StoreInst(PoisonValue::get(Int8Ty), Constant::getNullValue(Int8Ty->getPointerTo()), BB->getTerminator()); } @@ -3827,8 +3837,8 @@ bool NewGVN::eliminateInstructions(Function &F) { LLVM_DEBUG(dbgs() << "Replacing incoming value of " << PHI << " for block " << getBlockName(PHI->getIncomingBlock(Operand)) - << " with undef due to it being unreachable\n"); - Operand.set(UndefValue::get(PHI->getType())); + << " with poison due to it being unreachable\n"); + Operand.set(PoisonValue::get(PHI->getType())); } }; // Replace unreachable phi arguments. diff --git a/llvm/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll b/llvm/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll index b2484260d0e4..7a0359b763e5 100644 --- a/llvm/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll +++ b/llvm/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll @@ -20,8 +20,8 @@ define i32 @reload(%struct.rtx_def* %first, i32 %global, %struct.FILE* %dumpfile ; CHECK: cond_next2943: ; CHECK-NEXT: br i1 false, label [[BB2982_PREHEADER:%.*]], label [[BB2928]] ; CHECK: bb2982.preheader: -; CHECK-NEXT: store i8 undef, i8* null -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: store i8 poison, i8* null +; CHECK-NEXT: ret i32 poison ; cond_next2835.1: ; preds = %cond_next2861 %tmp2922 = load i32, i32* @n_spills, align 4 ; [#uses=0] diff --git a/llvm/test/Transforms/NewGVN/assume-equal.ll b/llvm/test/Transforms/NewGVN/assume-equal.ll index f0e4f25d17b7..dc2e960f4196 100644 --- a/llvm/test/Transforms/NewGVN/assume-equal.ll +++ b/llvm/test/Transforms/NewGVN/assume-equal.ll @@ -31,7 +31,7 @@ define i32 @_Z1ii(i32 %p) { ; CHECK: bb2: ; CHECK-NEXT: br i1 true, label [[BB2]], label [[BB2]] ; CHECK: 0: -; CHECK-NEXT: store i8 undef, i8* null, align 1 +; CHECK-NEXT: store i8 poison, i8* null, align 1 ; CHECK-NEXT: ret i32 [[P]] ; entry: @@ -82,7 +82,7 @@ define i32 @_Z1ik(i32 %p) { ; CHECK-NEXT: call void @llvm.assume(i1 false) ; CHECK-NEXT: ret i32 15 ; CHECK: bb3: -; CHECK-NEXT: store i8 undef, i8* null, align 1 +; CHECK-NEXT: store i8 poison, i8* null, align 1 ; CHECK-NEXT: ret i32 17 ; entry: diff --git a/llvm/test/Transforms/NewGVN/phi-of-ops-simplified-to-existing-value-then-changes-again.ll b/llvm/test/Transforms/NewGVN/phi-of-ops-simplified-to-existing-value-then-changes-again.ll index fb9f71467d39..c6c817c53d4c 100644 --- a/llvm/test/Transforms/NewGVN/phi-of-ops-simplified-to-existing-value-then-changes-again.ll +++ b/llvm/test/Transforms/NewGVN/phi-of-ops-simplified-to-existing-value-then-changes-again.ll @@ -83,10 +83,10 @@ define void @pr42422(i1 %c.1, i1 %c.2) { ; CHECK: bb14: ; CHECK-NEXT: br label [[BB16]] ; CHECK: bb15: -; CHECK-NEXT: store i8 undef, i8* null, align 1 +; CHECK-NEXT: store i8 poison, i8* null, align 1 ; CHECK-NEXT: br label [[BB16]] ; CHECK: bb16: -; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ undef, [[BB15]] ], [ 1, [[BB14]] ], [ 9, [[BB7]] ] +; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ poison, [[BB15]] ], [ 1, [[BB14]] ], [ 9, [[BB7]] ] ; CHECK-NEXT: switch i32 [[TMP17]], label [[BB19]] [ ; CHECK-NEXT: i32 0, label [[BB6]] ; CHECK-NEXT: i32 9, label [[BB18:%.*]] diff --git a/llvm/test/Transforms/NewGVN/pr31594.ll b/llvm/test/Transforms/NewGVN/pr31594.ll index 72144f8f44a0..58bfd57cd4c0 100644 --- a/llvm/test/Transforms/NewGVN/pr31594.ll +++ b/llvm/test/Transforms/NewGVN/pr31594.ll @@ -77,7 +77,7 @@ define void @foo(i8* %arg) { ; CHECK-NEXT: i8 6, label [[BB8:%.*]] ; CHECK-NEXT: ] ; CHECK: bb8: -; CHECK-NEXT: store i8 undef, i8* null, align 1 +; CHECK-NEXT: store i8 poison, i8* null, align 1 ; CHECK-NEXT: br label [[BB4]] ; CHECK: bb9: ; CHECK-NEXT: store i8 0, i8* [[ARG]], align 1, !g !0 diff --git a/llvm/test/Transforms/NewGVN/pr31758.ll b/llvm/test/Transforms/NewGVN/pr31758.ll index 6052ca973aff..078c29398c13 100644 --- a/llvm/test/Transforms/NewGVN/pr31758.ll +++ b/llvm/test/Transforms/NewGVN/pr31758.ll @@ -12,7 +12,7 @@ define void @tinkywinky() { ; CHECK: bb90: ; CHECK-NEXT: br label [[BB90]] ; CHECK: bb138: -; CHECK-NEXT: store i8 undef, i8* null +; CHECK-NEXT: store i8 poison, i8* null ; CHECK-NEXT: br label [[BB138:%.*]] ; bb: diff --git a/llvm/test/Transforms/NewGVN/pr32607.ll b/llvm/test/Transforms/NewGVN/pr32607.ll index 635757d6425b..b2a544398171 100644 --- a/llvm/test/Transforms/NewGVN/pr32607.ll +++ b/llvm/test/Transforms/NewGVN/pr32607.ll @@ -5,11 +5,9 @@ define hidden void @foo() { ; CHECK-NEXT: top: ; CHECK-NEXT: br label [[IF:%.*]] ; CHECK: if: -; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[TMP1:%.*]], [[IF]] ], [ undef, [[TOP:%.*]] ] -; CHECK-NEXT: [[TMP1]] = fadd double [[TMP0]], 1.000000e+00 ; CHECK-NEXT: br i1 false, label [[L50:%.*]], label [[IF]] ; CHECK: L50: -; CHECK-NEXT: store i8 undef, i8* null +; CHECK-NEXT: store i8 poison, i8* null ; CHECK-NEXT: ret void ; top: diff --git a/llvm/test/Transforms/NewGVN/pr32838.ll b/llvm/test/Transforms/NewGVN/pr32838.ll index b6b7b0d19b86..cb0dd875c3f6 100644 --- a/llvm/test/Transforms/NewGVN/pr32838.ll +++ b/llvm/test/Transforms/NewGVN/pr32838.ll @@ -54,7 +54,7 @@ define void @fn2(i64 %arg) { ; CHECK: if.then: ; CHECK-NEXT: br i1 false, label [[FIRSTPHIBLOCK:%.*]], label [[TEMP:%.*]] ; CHECK: firstphiblock: -; CHECK-NEXT: [[FIRSTPHI:%.*]] = phi i64 [ undef, [[IF_THEN]] ], [ [[SECONDPHI:%.*]], [[SECONDPHIBLOCK:%.*]] ] +; CHECK-NEXT: [[FIRSTPHI:%.*]] = phi i64 [ poison, [[IF_THEN]] ], [ [[SECONDPHI:%.*]], [[SECONDPHIBLOCK:%.*]] ] ; CHECK-NEXT: br i1 undef, label %for.cond17thread-pre-split, label [[SECONDPHIBLOCK]] ; CHECK: secondphiblock: ; CHECK-NEXT: [[SECONDPHI]] = phi i64 [ [[THIRDPHI:%.*]], [[THIRDPHIBLOCK:%.*]] ], [ [[FIRSTPHI]], [[FIRSTPHIBLOCK]] ] @@ -105,7 +105,7 @@ define void @fn3() { ; CHECK-NEXT: [[F_0:%.*]] = phi i32* [ @b, [[ENTRY:%.*]] ], [ @a, [[L1_LOOPEXIT:%.*]] ] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond.loopexit: -; CHECK-NEXT: store i8 undef, i8* null +; CHECK-NEXT: store i8 poison, i8* null ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.cond: ; CHECK-NEXT: br i1 undef, label [[FOR_END14:%.*]], label [[FOR_COND1_PREHEADER:%.*]] diff --git a/llvm/test/Transforms/NewGVN/pr32845.ll b/llvm/test/Transforms/NewGVN/pr32845.ll index beba3363b303..935d3db357e1 100644 --- a/llvm/test/Transforms/NewGVN/pr32845.ll +++ b/llvm/test/Transforms/NewGVN/pr32845.ll @@ -13,7 +13,7 @@ define void @tinkywinky() { ; CHECK-NEXT: [[F_0:%.*]] = phi i32* [ @b, [[ENTRY:%.*]] ], [ @a, [[L1_LOOPEXIT:%.*]] ] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond.loopexit: -; CHECK-NEXT: store i8 undef, i8* null +; CHECK-NEXT: store i8 poison, i8* null ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.cond: ; CHECK-NEXT: br i1 undef, label [[FOR_END14:%.*]], label [[FOR_COND1_PREHEADER:%.*]] diff --git a/llvm/test/Transforms/NewGVN/pr33014.ll b/llvm/test/Transforms/NewGVN/pr33014.ll index 4157178e4f0c..99cf18d34ae1 100644 --- a/llvm/test/Transforms/NewGVN/pr33014.ll +++ b/llvm/test/Transforms/NewGVN/pr33014.ll @@ -20,7 +20,7 @@ ; CHECK-NEXT: %dipsy = load i64, i64* @c ; CHECK-NEXT: br label %ph ; CHECK: back: ; preds = %l2 -; CHECK-NEXT: store i8 undef, i8* null +; CHECK-NEXT: store i8 poison, i8* null ; CHECK-NEXT: br label %ph ; CHECK: end: ; preds = %l2 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/NewGVN/pr33187.ll b/llvm/test/Transforms/NewGVN/pr33187.ll index d7daffd864d9..3603c73d3be1 100644 --- a/llvm/test/Transforms/NewGVN/pr33187.ll +++ b/llvm/test/Transforms/NewGVN/pr33187.ll @@ -30,10 +30,10 @@ define void @fn1() local_unnamed_addr #0 { ; CHECK: while.body12: ; CHECK-NEXT: br i1 undef, label [[IF_END18]], label [[L]] ; CHECK: L.loopexit: -; CHECK-NEXT: store i8 undef, i8* null +; CHECK-NEXT: store i8 poison, i8* null ; CHECK-NEXT: br label [[L]] ; CHECK: L: -; CHECK-NEXT: [[H_125]] = phi i32 [ [[H_127]], [[WHILE_BODY12]] ], [ undef, [[L_LOOPEXIT]] ] +; CHECK-NEXT: [[H_125]] = phi i32 [ [[H_127]], [[WHILE_BODY12]] ], [ poison, [[L_LOOPEXIT]] ] ; CHECK-NEXT: br i1 undef, label [[WHILE_COND10]], label [[IF_END18]] ; CHECK: if.end18: ; CHECK-NEXT: [[H_126]] = phi i32 [ [[H_125]], [[L]] ], [ [[H_127]], [[WHILE_BODY12]] ] diff --git a/llvm/test/Transforms/NewGVN/pr33204.ll b/llvm/test/Transforms/NewGVN/pr33204.ll index 16c065080c9c..90e76c0969fa 100644 --- a/llvm/test/Transforms/NewGVN/pr33204.ll +++ b/llvm/test/Transforms/NewGVN/pr33204.ll @@ -30,7 +30,7 @@ define void @hoge(i32 %arg) { ; CHECK: bb8: ; CHECK-NEXT: br i1 false, label [[BB9:%.*]], label [[BB3:%.*]] ; CHECK: bb9: -; CHECK-NEXT: store i8 undef, i8* null, align 1 +; CHECK-NEXT: store i8 poison, i8* null, align 1 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb10: ; CHECK-NEXT: store i32 0, i32* @global, align 4, !h !0 diff --git a/llvm/test/Transforms/NewGVN/pr33461.ll b/llvm/test/Transforms/NewGVN/pr33461.ll index 85e8b68693b4..c4f0b8a2b94f 100644 --- a/llvm/test/Transforms/NewGVN/pr33461.ll +++ b/llvm/test/Transforms/NewGVN/pr33461.ll @@ -8,7 +8,7 @@ define void @patatino() { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[FOR_COND1:%.*]], label [[FOR_INC:%.*]] ; CHECK: for.cond1: -; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi i16 [ undef, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC]] ] +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi i16 [ poison, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC]] ] ; CHECK-NEXT: store i16 [[PHIOFOPS]], i16* @b, align 2 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: diff --git a/llvm/test/Transforms/NewGVN/pr33720.ll b/llvm/test/Transforms/NewGVN/pr33720.ll index 6360bb9936a9..85405fd0439d 100644 --- a/llvm/test/Transforms/NewGVN/pr33720.ll +++ b/llvm/test/Transforms/NewGVN/pr33720.ll @@ -31,10 +31,10 @@ define void @patatino() { ; CHECK-NEXT: store i64 7, i64* [[J_3:%.*]], align 4 ; CHECK-NEXT: br label [[FOR_BODY]] ; CHECK: for.cond16: -; CHECK-NEXT: [[J_0:%.*]] = phi i64* [ @f, [[ENTRY:%.*]] ], [ undef, [[FOR_COND20:%.*]] ], [ @e, [[FOR_COND16]] ] +; CHECK-NEXT: [[J_0:%.*]] = phi i64* [ @f, [[ENTRY:%.*]] ], [ poison, [[FOR_COND20:%.*]] ], [ @e, [[FOR_COND16]] ] ; CHECK-NEXT: br i1 undef, label [[FOR_COND20]], label [[FOR_COND16]] ; CHECK: for.cond20: -; CHECK-NEXT: [[J_2:%.*]] = phi i64* [ [[J_0]], [[FOR_COND16]] ], [ undef, [[IF_END24]] ] +; CHECK-NEXT: [[J_2:%.*]] = phi i64* [ [[J_0]], [[FOR_COND16]] ], [ poison, [[IF_END24]] ] ; CHECK-NEXT: br i1 true, label [[IF_END24]], label [[FOR_COND16]] ; CHECK: if.end24: ; CHECK-NEXT: [[J_3]] = phi i64* [ [[J_2]], [[FOR_COND20]] ], [ undef, [[ENTRY]] ] diff --git a/llvm/test/Transforms/NewGVN/verify-memoryphi.ll b/llvm/test/Transforms/NewGVN/verify-memoryphi.ll index 57dbd18986d2..0fbca830a03b 100644 --- a/llvm/test/Transforms/NewGVN/verify-memoryphi.ll +++ b/llvm/test/Transforms/NewGVN/verify-memoryphi.ll @@ -7,7 +7,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label %body, label %end ; CHECK: body: -; CHECK-NEXT: store i8 undef, i8* null +; CHECK-NEXT: store i8 poison, i8* null ; CHECK-NEXT: br label %end ; CHECK: end: ; CHECK-NEXT: ret void From 1b329fe28206287e8a496b66cc68f4c1838a0230 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 29 Dec 2021 08:27:29 -0800 Subject: [PATCH 188/992] [clang] Remove unused "using" (NFC) --- clang/lib/AST/FormatString.cpp | 1 - clang/lib/Driver/ToolChains/PS4CPU.cpp | 2 -- 2 files changed, 3 deletions(-) diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp index 83b952116a5e..102bcca96a38 100644 --- a/clang/lib/AST/FormatString.cpp +++ b/clang/lib/AST/FormatString.cpp @@ -21,7 +21,6 @@ using clang::analyze_format_string::FormatStringHandler; using clang::analyze_format_string::FormatSpecifier; using clang::analyze_format_string::LengthModifier; using clang::analyze_format_string::OptionalAmount; -using clang::analyze_format_string::PositionContext; using clang::analyze_format_string::ConversionSpecifier; using namespace clang; diff --git a/clang/lib/Driver/ToolChains/PS4CPU.cpp b/clang/lib/Driver/ToolChains/PS4CPU.cpp index 5783a733983a..bcf9147833dd 100644 --- a/clang/lib/Driver/ToolChains/PS4CPU.cpp +++ b/clang/lib/Driver/ToolChains/PS4CPU.cpp @@ -23,8 +23,6 @@ using namespace clang::driver; using namespace clang; using namespace llvm::opt; -using clang::driver::tools::AddLinkerInputs; - void tools::PS4cpu::addProfileRTArgs(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs) { if ((Args.hasFlag(options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs, From 298367ee6e36eeb1b193ad9fa92082c2ef2345a3 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 29 Dec 2021 08:34:20 -0800 Subject: [PATCH 189/992] [clang] Use nullptr instead of 0 or NULL (NFC) Identified with modernize-use-nullptr. --- clang/lib/AST/ASTContext.cpp | 4 ++-- clang/lib/AST/Type.cpp | 2 +- clang/lib/CodeGen/CodeGenFunction.cpp | 6 +++--- clang/lib/CodeGen/TargetInfo.cpp | 2 +- clang/lib/Driver/Driver.cpp | 2 +- clang/lib/Driver/ToolChains/WebAssembly.cpp | 2 +- clang/lib/Interpreter/IncrementalParser.cpp | 2 +- clang/lib/Lex/Lexer.cpp | 4 ++-- clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp | 3 ++- clang/tools/libclang/CIndex.cpp | 10 +++++----- 10 files changed, 19 insertions(+), 18 deletions(-) diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 008b703d4c1a..ac6f98e91f75 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -8476,8 +8476,8 @@ static TypedefDecl *CreateHexagonBuiltinVaListDecl(const ASTContext *Context) { FieldDecl *Field = FieldDecl::Create( const_cast(*Context), VaListTagDecl, SourceLocation(), SourceLocation(), &Context->Idents.get(FieldNames[i]), FieldTypes[i], - /*TInfo=*/0, - /*BitWidth=*/0, + /*TInfo=*/nullptr, + /*BitWidth=*/nullptr, /*Mutable=*/false, ICIS_NoInit); Field->setAccess(AS_public); VaListTagDecl->addDecl(Field); diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index c771fe264b0c..774b3e94159d 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -194,7 +194,7 @@ void ConstantArrayType::Profile(llvm::FoldingSetNodeID &ID, ID.AddInteger(ArraySize.getZExtValue()); ID.AddInteger(SizeMod); ID.AddInteger(TypeQuals); - ID.AddBoolean(SizeExpr != 0); + ID.AddBoolean(SizeExpr != nullptr); if (SizeExpr) SizeExpr->Profile(ID, Context, true); } diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index e6adec6948af..4814962a472d 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -1595,9 +1595,9 @@ void CodeGenFunction::EmitBranchToCounterBlock( if (!InstrumentRegions || !isInstrumentedCondition(Cond)) return EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount, LH); - llvm::BasicBlock *ThenBlock = NULL; - llvm::BasicBlock *ElseBlock = NULL; - llvm::BasicBlock *NextBlock = NULL; + llvm::BasicBlock *ThenBlock = nullptr; + llvm::BasicBlock *ElseBlock = nullptr; + llvm::BasicBlock *NextBlock = nullptr; // Create the block we'll use to increment the appropriate counter. llvm::BasicBlock *CounterIncrBlock = createBasicBlock("lop.rhscnt"); diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 85089cdb2200..d0ba4e44bffa 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -8693,7 +8693,7 @@ Address HexagonABIInfo::EmitVAArgForHexagonLinux(CodeGenFunction &CGF, llvm::ConstantInt::get(CGF.Int32Ty, ArgSize), "__new_saved_reg_area_pointer"); - llvm::Value *UsingStack = 0; + llvm::Value *UsingStack = nullptr; UsingStack = CGF.Builder.CreateICmpSGT(__new_saved_reg_area_pointer, __saved_reg_area_end_pointer); diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 3b551ea94cc2..a4a53d989851 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -437,7 +437,7 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const { // Enforce -static if -miamcu is present. if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) - DAL->AddFlagArg(0, Opts.getOption(options::OPT_static)); + DAL->AddFlagArg(nullptr, Opts.getOption(options::OPT_static)); // Add a default value of -mlinker-version=, if one was given and the user // didn't specify one. diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index a7298a9a71bf..3614272a5f74 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -76,7 +76,7 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, ToolChain.AddFilePathLibArgs(Args, CmdArgs); const char *Crt1 = "crt1.o"; - const char *Entry = NULL; + const char *Entry = nullptr; // If crt1-command.o exists, it supports new-style commands, so use it. // Otherwise, use the old crt1.o. This is a temporary transition measure. diff --git a/clang/lib/Interpreter/IncrementalParser.cpp b/clang/lib/Interpreter/IncrementalParser.cpp index 84eabc3a210f..4ade8b8bb074 100644 --- a/clang/lib/Interpreter/IncrementalParser.cpp +++ b/clang/lib/Interpreter/IncrementalParser.cpp @@ -256,7 +256,7 @@ IncrementalParser::Parse(llvm::StringRef input) { /*LoadedOffset=*/0, NewLoc); // NewLoc only used for diags. - if (PP.EnterSourceFile(FID, /*DirLookup=*/0, NewLoc)) + if (PP.EnterSourceFile(FID, /*DirLookup=*/nullptr, NewLoc)) return llvm::make_error("Parsing failed. " "Cannot enter source file.", std::error_code()); diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 38467a1835d0..89e89c7c1f17 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2548,9 +2548,9 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, assert(CurPtr[0] == '\n' || CurPtr[0] == '\r'); // Position of the first trigraph in the ending sequence. - const char *TrigraphPos = 0; + const char *TrigraphPos = nullptr; // Position of the first whitespace after a '\' in the ending sequence. - const char *SpacePos = 0; + const char *SpacePos = nullptr; while (true) { // Back up off the newline. diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp index b957bec7493e..e13387fb1fc8 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -2804,7 +2804,8 @@ bool ConditionBRVisitor::patternMatch(const Expr *Ex, Out << '\'' << Lexer::getSourceText( CharSourceRange::getTokenRange(Ex->getSourceRange()), - BRC.getSourceManager(), BRC.getASTContext().getLangOpts(), 0) + BRC.getSourceManager(), BRC.getASTContext().getLangOpts(), + nullptr) << '\''; } diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 4722bece7a1d..53494ecc7ae9 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -4949,7 +4949,7 @@ CXStringSet *clang_Cursor_getObjCManglings(CXCursor C) { CXPrintingPolicy clang_getCursorPrintingPolicy(CXCursor C) { if (clang_Cursor_isNull(C)) - return 0; + return nullptr; return new PrintingPolicy(getCursorContext(C).getPrintingPolicy()); } @@ -6975,16 +6975,16 @@ CXToken *clang_getToken(CXTranslationUnit TU, CXSourceLocation Location) { if (isNotUsableTU(TU)) { LOG_BAD_TU(TU); - return NULL; + return nullptr; } ASTUnit *CXXUnit = cxtu::getASTUnit(TU); if (!CXXUnit) - return NULL; + return nullptr; SourceLocation Begin = cxloc::translateSourceLocation(Location); if (Begin.isInvalid()) - return NULL; + return nullptr; SourceManager &SM = CXXUnit->getSourceManager(); std::pair DecomposedEnd = SM.getDecomposedLoc(Begin); DecomposedEnd.second += @@ -6997,7 +6997,7 @@ CXToken *clang_getToken(CXTranslationUnit TU, CXSourceLocation Location) { getTokens(CXXUnit, SourceRange(Begin, End), CXTokens); if (CXTokens.empty()) - return NULL; + return nullptr; CXTokens.resize(1); CXToken *Token = static_cast(llvm::safe_malloc(sizeof(CXToken))); From ee3f557a9cfefdcf07d4fc1394cb05098a2c2508 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 29 Dec 2021 08:55:37 -0800 Subject: [PATCH 190/992] [Basic] Drop unnecessary const from return types (NFC) Identified with readability-const-return-type. --- clang/include/clang/Basic/TargetID.h | 2 +- clang/lib/Basic/TargetID.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Basic/TargetID.h b/clang/include/clang/Basic/TargetID.h index a55b15e9b92c..2579276fc034 100644 --- a/clang/include/clang/Basic/TargetID.h +++ b/clang/include/clang/Basic/TargetID.h @@ -21,7 +21,7 @@ namespace clang { /// postfixed by a plus or minus sign delimited by colons, e.g. /// gfx908:xnack+:sramecc-. Each processor have a limited /// number of predefined features when showing up in a target ID. -const llvm::SmallVector +llvm::SmallVector getAllPossibleTargetIDFeatures(const llvm::Triple &T, llvm::StringRef Processor); diff --git a/clang/lib/Basic/TargetID.cpp b/clang/lib/Basic/TargetID.cpp index 59d416f0e015..3b8f4c13b9bf 100644 --- a/clang/lib/Basic/TargetID.cpp +++ b/clang/lib/Basic/TargetID.cpp @@ -15,7 +15,7 @@ namespace clang { -static const llvm::SmallVector +static llvm::SmallVector getAllPossibleAMDGPUTargetIDFeatures(const llvm::Triple &T, llvm::StringRef Proc) { // Entries in returned vector should be in alphabetical order. @@ -33,7 +33,7 @@ getAllPossibleAMDGPUTargetIDFeatures(const llvm::Triple &T, return Ret; } -const llvm::SmallVector +llvm::SmallVector getAllPossibleTargetIDFeatures(const llvm::Triple &T, llvm::StringRef Processor) { llvm::SmallVector Ret; From 4df2aba294db784546ee0bc08b41fc227b592d98 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 29 Dec 2021 08:46:26 -0800 Subject: [PATCH 191/992] [Hexagon] Calling conventions for floating point vectors They are the same as for the other HVX vectors, but types need to be listed explicitly. Also, add a detailed codegen testcase. Co-authored-by: Abhikrant Sharma --- llvm/lib/Target/Hexagon/HexagonCallingConv.td | 12 +- .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 4 + llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 19 +- .../CodeGen/Hexagon/autohvx/calling-conv.ll | 1528 +++++++++++++++++ llvm/test/CodeGen/Hexagon/autohvx/splat.ll | 6 +- 5 files changed, 1556 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll diff --git a/llvm/lib/Target/Hexagon/HexagonCallingConv.td b/llvm/lib/Target/Hexagon/HexagonCallingConv.td index 93e17e608dd1..cc41b569e490 100644 --- a/llvm/lib/Target/Hexagon/HexagonCallingConv.td +++ b/llvm/lib/Target/Hexagon/HexagonCallingConv.td @@ -126,16 +126,16 @@ def CC_Hexagon_HVX: CallingConv<[ // HVX 128-byte mode CCIfHvx128< - CCIfType<[v32i32,v64i16,v128i8], + CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16], CCAssignToReg<[V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15]>>>, CCIfHvx128< - CCIfType<[v64i32,v128i16,v256i8], + CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16], CCAssignToReg<[W0,W1,W2,W3,W4,W5,W6,W7]>>>, CCIfHvx128< - CCIfType<[v32i32,v64i16,v128i8], + CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16], CCAssignToStack<128,128>>>, CCIfHvx128< - CCIfType<[v64i32,v128i16,v256i8], + CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16], CCAssignToStack<256,128>>>, CCDelegateTo @@ -152,10 +152,10 @@ def RetCC_Hexagon_HVX: CallingConv<[ // HVX 128-byte mode CCIfHvx128< - CCIfType<[v32i32,v64i16,v128i8], + CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16], CCAssignToReg<[V0]>>>, CCIfHvx128< - CCIfType<[v64i32,v128i16,v256i8], + CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16], CCAssignToReg<[W0]>>>, CCDelegateTo diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 28f7c5414a2a..a3a9097378e7 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -59,6 +59,7 @@ HexagonTargetLowering::initializeHVXLowering() { addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass); + addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass); } } @@ -104,6 +105,9 @@ HexagonTargetLowering::initializeHVXLowering() { // independent) handling of it would convert it to a load, which is // not always the optimal choice. setOperationAction(ISD::BUILD_VECTOR, MVT::v64f32, Custom); + // Make concat-vectors custom to handle concats of more than 2 vectors. + setOperationAction(ISD::CONCAT_VECTORS, MVT::v128f16, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64f32, Custom); } for (MVT T : LegalV) { diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index ad8029687770..8d94a9978831 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -265,9 +265,7 @@ let Predicates = [UseHVX] in { // These should be preferred over a vsplat of 0. def: Pat<(VecI8 vzero), (V6_vd0)>; def: Pat<(VecI16 vzero), (V6_vd0)>; - def: Pat<(VecF16 vzero), (V6_vd0)>; def: Pat<(VecI32 vzero), (V6_vd0)>; - def: Pat<(VecF32 vzero), (V6_vd0)>; def: Pat<(VecPI8 vzero), (PS_vdd0)>; def: Pat<(VecPI16 vzero), (PS_vdd0)>; def: Pat<(VecPI32 vzero), (PS_vdd0)>; @@ -303,7 +301,22 @@ let Predicates = [UseHVX] in { (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; } -let Predicates = [UseHVXFloatingPoint] in { +let Predicates = [UseHVX, UseHVXFloatingPoint] in { + let AddedComplexity = 100 in { + def: Pat<(VecF16 vzero), (V6_vd0)>; + def: Pat<(VecF32 vzero), (V6_vd0)>; + def: Pat<(VecPF16 vzero), (PS_vdd0)>; + def: Pat<(VecPF32 vzero), (PS_vdd0)>; + + def: Pat<(concat_vectors (VecF16 vzero), (VecF16 vzero)), (PS_vdd0)>; + def: Pat<(concat_vectors (VecF32 vzero), (VecF32 vzero)), (PS_vdd0)>; + } + + def: Pat<(VecPF16 (concat_vectors HVF16:$Vs, HVF16:$Vt)), + (Combinev HvxVR:$Vt, HvxVR:$Vs)>; + def: Pat<(VecPF32 (concat_vectors HVF32:$Vs, HVF32:$Vt)), + (Combinev HvxVR:$Vt, HvxVR:$Vs)>; + def: Pat<(HexagonVINSERTW0 HVF16:$Vu, I32:$Rt), (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; def: Pat<(HexagonVINSERTW0 HVF32:$Vu, I32:$Rt), diff --git a/llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll b/llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll new file mode 100644 index 000000000000..884eb6e7ac75 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll @@ -0,0 +1,1528 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +define void @f0(<128 x i8> %a0, <128 x i8>* %a1) #0 { +; CHECK-LABEL: f0: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a1, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + ret void +} + +define void @f1(<128 x i8> %a0, <128 x i8> %a1, <128 x i8>* %a2) #0 { +; CHECK-LABEL: f1: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a2, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a2, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + ret void +} + +define void @f2(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8>* %a3) #0 { +; CHECK-LABEL: f2: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a3, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a3, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a3, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + ret void +} + +define void @f3(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8>* %a4) #0 { +; CHECK-LABEL: f3: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a4, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a4, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a4, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a4, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + ret void +} + +define void @f4(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8>* %a5) #0 { +; CHECK-LABEL: f4: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a5, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a5, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a5, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a5, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a5, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + ret void +} + +define void @f5(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8>* %a6) #0 { +; CHECK-LABEL: f5: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a6, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a6, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a6, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a6, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a6, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a6, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + ret void +} + +define void @f6(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8>* %a7) #0 { +; CHECK-LABEL: f6: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + ret void +} + +define void @f7(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8>* %a8) #0 { +; CHECK-LABEL: f7: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + ret void +} + +define void @f8(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8>* %a9) #0 { +; CHECK-LABEL: f8: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r0,#1024) +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r1+#0) = v8 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + %v8 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 8 + store <128 x i8> %a8, <128 x i8>* %v8, align 128 + ret void +} + +define void @f9(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8>* %a10) #0 { +; CHECK-LABEL: f9: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r0,#1024) +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r1+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + %v8 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 8 + store <128 x i8> %a8, <128 x i8>* %v8, align 128 + %v9 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 9 + store <128 x i8> %a9, <128 x i8>* %v9, align 128 + ret void +} + +define void @f10(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8>* %a11) #0 { +; CHECK-LABEL: f10: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r0,#1024) +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r1+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + %v8 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 8 + store <128 x i8> %a8, <128 x i8>* %v8, align 128 + %v9 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 9 + store <128 x i8> %a9, <128 x i8>* %v9, align 128 + %v10 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 10 + store <128 x i8> %a10, <128 x i8>* %v10, align 128 + ret void +} + +define void @f11(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8> %a11, <128 x i8>* %a12) #0 { +; CHECK-LABEL: f11: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r0,#1024) +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r4 = add(r0,#1408) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r1+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r4+#0) = v11 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + %v8 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 8 + store <128 x i8> %a8, <128 x i8>* %v8, align 128 + %v9 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 9 + store <128 x i8> %a9, <128 x i8>* %v9, align 128 + %v10 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 10 + store <128 x i8> %a10, <128 x i8>* %v10, align 128 + %v11 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 11 + store <128 x i8> %a11, <128 x i8>* %v11, align 128 + ret void +} + +define void @f12(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8> %a11, <128 x i8> %a12, <128 x i8>* %a13) #0 { +; CHECK-LABEL: f12: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r0,#1024) +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r4 = add(r0,#1408) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = add(r0,#1536) +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r1+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r4+#0) = v11 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r5+#0) = v12 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + %v8 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 8 + store <128 x i8> %a8, <128 x i8>* %v8, align 128 + %v9 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 9 + store <128 x i8> %a9, <128 x i8>* %v9, align 128 + %v10 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 10 + store <128 x i8> %a10, <128 x i8>* %v10, align 128 + %v11 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 11 + store <128 x i8> %a11, <128 x i8>* %v11, align 128 + %v12 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 12 + store <128 x i8> %a12, <128 x i8>* %v12, align 128 + ret void +} + +define void @f13(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8> %a11, <128 x i8> %a12, <128 x i8> %a13, <128 x i8>* %a14) #0 { +; CHECK-LABEL: f13: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r1 = add(r0,#1024) +; CHECK-NEXT: r4 = add(r0,#1408) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1664) +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r3 = add(r0,#1536) +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r1+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r4+#0) = v11 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r2+#0) = v13 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + %v8 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 8 + store <128 x i8> %a8, <128 x i8>* %v8, align 128 + %v9 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 9 + store <128 x i8> %a9, <128 x i8>* %v9, align 128 + %v10 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 10 + store <128 x i8> %a10, <128 x i8>* %v10, align 128 + %v11 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 11 + store <128 x i8> %a11, <128 x i8>* %v11, align 128 + %v12 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 12 + store <128 x i8> %a12, <128 x i8>* %v12, align 128 + %v13 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 13 + store <128 x i8> %a13, <128 x i8>* %v13, align 128 + ret void +} + +define void @f14(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8> %a11, <128 x i8> %a12, <128 x i8> %a13, <128 x i8> %a14, <128 x i8>* %a15) #0 { +; CHECK-LABEL: f14: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r4 = add(r0,#1408) +; CHECK-NEXT: r1 = add(r0,#1024) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1792) +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r3 = add(r0,#1664) +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = add(r0,#1536) +; CHECK-NEXT: vmem(r4+#0) = v11 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r1+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r4+#0) = v12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v13 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r2+#0) = v14 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + %v8 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 8 + store <128 x i8> %a8, <128 x i8>* %v8, align 128 + %v9 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 9 + store <128 x i8> %a9, <128 x i8>* %v9, align 128 + %v10 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 10 + store <128 x i8> %a10, <128 x i8>* %v10, align 128 + %v11 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 11 + store <128 x i8> %a11, <128 x i8>* %v11, align 128 + %v12 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 12 + store <128 x i8> %a12, <128 x i8>* %v12, align 128 + %v13 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 13 + store <128 x i8> %a13, <128 x i8>* %v13, align 128 + %v14 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 14 + store <128 x i8> %a14, <128 x i8>* %v14, align 128 + ret void +} + +define void @f15(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8> %a11, <128 x i8> %a12, <128 x i8> %a13, <128 x i8> %a14, <128 x i8> %a15, <128 x i8>* %a16) #0 { +; CHECK-LABEL: f15: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r1 = add(r0,#1024) +; CHECK-NEXT: r6 = add(r0,#1408) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = add(r0,#1536) +; CHECK-NEXT: r4 = add(r0,#1664) +; CHECK-NEXT: r2 = add(r0,#1920) +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r3 = add(r0,#1792) +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r1+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r6+#0) = v11 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r5+#0) = v12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r4+#0) = v13 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v14 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r2+#0) = v15 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + %v8 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 8 + store <128 x i8> %a8, <128 x i8>* %v8, align 128 + %v9 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 9 + store <128 x i8> %a9, <128 x i8>* %v9, align 128 + %v10 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 10 + store <128 x i8> %a10, <128 x i8>* %v10, align 128 + %v11 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 11 + store <128 x i8> %a11, <128 x i8>* %v11, align 128 + %v12 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 12 + store <128 x i8> %a12, <128 x i8>* %v12, align 128 + %v13 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 13 + store <128 x i8> %a13, <128 x i8>* %v13, align 128 + %v14 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 14 + store <128 x i8> %a14, <128 x i8>* %v14, align 128 + %v15 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 15 + store <128 x i8> %a15, <128 x i8>* %v15, align 128 + ret void +} + +define void @f16(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8> %a11, <128 x i8> %a12, <128 x i8> %a13, <128 x i8> %a14, <128 x i8> %a15, <128 x i8> %a16, <128 x i8>* %a17) #0 { +; CHECK-LABEL: f16: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r4 = add(r0,#1408) +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r30,#8) +; CHECK-NEXT: r7 = add(r0,#1024) +; CHECK-NEXT: r6 = add(r0,#1536) +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = add(r0,#1664) +; CHECK-NEXT: r3 = add(r0,#1920) +; CHECK-NEXT: r2 = add(r0,#2048) +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = add(r0,#1792) +; CHECK-NEXT: r29 = and(r29,#-128) +; CHECK-NEXT: v16 = vmem(r1+#0) +; CHECK-NEXT: vmem(r4+#0) = v11 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r7+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r6+#0) = v12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r5+#0) = v13 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r4+#0) = v14 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v15 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r2+#0) = v16 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + %v8 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 8 + store <128 x i8> %a8, <128 x i8>* %v8, align 128 + %v9 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 9 + store <128 x i8> %a9, <128 x i8>* %v9, align 128 + %v10 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 10 + store <128 x i8> %a10, <128 x i8>* %v10, align 128 + %v11 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 11 + store <128 x i8> %a11, <128 x i8>* %v11, align 128 + %v12 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 12 + store <128 x i8> %a12, <128 x i8>* %v12, align 128 + %v13 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 13 + store <128 x i8> %a13, <128 x i8>* %v13, align 128 + %v14 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 14 + store <128 x i8> %a14, <128 x i8>* %v14, align 128 + %v15 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 15 + store <128 x i8> %a15, <128 x i8>* %v15, align 128 + %v16 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 16 + store <128 x i8> %a16, <128 x i8>* %v16, align 128 + ret void +} + +define void @f17(<64 x i16> %a0, <64 x i16> %a1, <64 x i16> %a2, <64 x i16> %a3, <64 x i16> %a4, <64 x i16> %a5, <64 x i16> %a6, <64 x i16> %a7, <64 x i16> %a8, <64 x i16> %a9, <64 x i16> %a10, <64 x i16> %a11, <64 x i16> %a12, <64 x i16> %a13, <64 x i16> %a14, <64 x i16> %a15, <64 x i16> %a16, <64 x i16>* %a17) #0 { +; CHECK-LABEL: f17: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r4 = add(r0,#1408) +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r30,#8) +; CHECK-NEXT: r7 = add(r0,#1024) +; CHECK-NEXT: r6 = add(r0,#1536) +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = add(r0,#1664) +; CHECK-NEXT: r3 = add(r0,#1920) +; CHECK-NEXT: r2 = add(r0,#2048) +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = add(r0,#1792) +; CHECK-NEXT: r29 = and(r29,#-128) +; CHECK-NEXT: v16 = vmem(r1+#0) +; CHECK-NEXT: vmem(r4+#0) = v11 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r7+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r6+#0) = v12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r5+#0) = v13 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r4+#0) = v14 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v15 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r2+#0) = v16 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } +b0: + %v0 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 0 + store <64 x i16> %a0, <64 x i16>* %v0, align 128 + %v1 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 1 + store <64 x i16> %a1, <64 x i16>* %v1, align 128 + %v2 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 2 + store <64 x i16> %a2, <64 x i16>* %v2, align 128 + %v3 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 3 + store <64 x i16> %a3, <64 x i16>* %v3, align 128 + %v4 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 4 + store <64 x i16> %a4, <64 x i16>* %v4, align 128 + %v5 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 5 + store <64 x i16> %a5, <64 x i16>* %v5, align 128 + %v6 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 6 + store <64 x i16> %a6, <64 x i16>* %v6, align 128 + %v7 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 7 + store <64 x i16> %a7, <64 x i16>* %v7, align 128 + %v8 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 8 + store <64 x i16> %a8, <64 x i16>* %v8, align 128 + %v9 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 9 + store <64 x i16> %a9, <64 x i16>* %v9, align 128 + %v10 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 10 + store <64 x i16> %a10, <64 x i16>* %v10, align 128 + %v11 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 11 + store <64 x i16> %a11, <64 x i16>* %v11, align 128 + %v12 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 12 + store <64 x i16> %a12, <64 x i16>* %v12, align 128 + %v13 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 13 + store <64 x i16> %a13, <64 x i16>* %v13, align 128 + %v14 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 14 + store <64 x i16> %a14, <64 x i16>* %v14, align 128 + %v15 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 15 + store <64 x i16> %a15, <64 x i16>* %v15, align 128 + %v16 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 16 + store <64 x i16> %a16, <64 x i16>* %v16, align 128 + ret void +} + +define void @f18(<32 x i32> %a0, <32 x i32> %a1, <32 x i32> %a2, <32 x i32> %a3, <32 x i32> %a4, <32 x i32> %a5, <32 x i32> %a6, <32 x i32> %a7, <32 x i32> %a8, <32 x i32> %a9, <32 x i32> %a10, <32 x i32> %a11, <32 x i32> %a12, <32 x i32> %a13, <32 x i32> %a14, <32 x i32> %a15, <32 x i32> %a16, <32 x i32>* %a17) #0 { +; CHECK-LABEL: f18: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r4 = add(r0,#1408) +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r30,#8) +; CHECK-NEXT: r7 = add(r0,#1024) +; CHECK-NEXT: r6 = add(r0,#1536) +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = add(r0,#1664) +; CHECK-NEXT: r3 = add(r0,#1920) +; CHECK-NEXT: r2 = add(r0,#2048) +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = add(r0,#1792) +; CHECK-NEXT: r29 = and(r29,#-128) +; CHECK-NEXT: v16 = vmem(r1+#0) +; CHECK-NEXT: vmem(r4+#0) = v11 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r7+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r6+#0) = v12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r5+#0) = v13 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r4+#0) = v14 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v15 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r2+#0) = v16 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } +b0: + %v0 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 0 + store <32 x i32> %a0, <32 x i32>* %v0, align 128 + %v1 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 1 + store <32 x i32> %a1, <32 x i32>* %v1, align 128 + %v2 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 2 + store <32 x i32> %a2, <32 x i32>* %v2, align 128 + %v3 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 3 + store <32 x i32> %a3, <32 x i32>* %v3, align 128 + %v4 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 4 + store <32 x i32> %a4, <32 x i32>* %v4, align 128 + %v5 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 5 + store <32 x i32> %a5, <32 x i32>* %v5, align 128 + %v6 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 6 + store <32 x i32> %a6, <32 x i32>* %v6, align 128 + %v7 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 7 + store <32 x i32> %a7, <32 x i32>* %v7, align 128 + %v8 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 8 + store <32 x i32> %a8, <32 x i32>* %v8, align 128 + %v9 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 9 + store <32 x i32> %a9, <32 x i32>* %v9, align 128 + %v10 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 10 + store <32 x i32> %a10, <32 x i32>* %v10, align 128 + %v11 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 11 + store <32 x i32> %a11, <32 x i32>* %v11, align 128 + %v12 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 12 + store <32 x i32> %a12, <32 x i32>* %v12, align 128 + %v13 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 13 + store <32 x i32> %a13, <32 x i32>* %v13, align 128 + %v14 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 14 + store <32 x i32> %a14, <32 x i32>* %v14, align 128 + %v15 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 15 + store <32 x i32> %a15, <32 x i32>* %v15, align 128 + %v16 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 16 + store <32 x i32> %a16, <32 x i32>* %v16, align 128 + ret void +} + +define void @f19(<64 x half> %a0, <64 x half> %a1, <64 x half> %a2, <64 x half> %a3, <64 x half> %a4, <64 x half> %a5, <64 x half> %a6, <64 x half> %a7, <64 x half> %a8, <64 x half> %a9, <64 x half> %a10, <64 x half> %a11, <64 x half> %a12, <64 x half> %a13, <64 x half> %a14, <64 x half> %a15, <64 x half> %a16, <64 x half>* %a17) #0 { +; CHECK-LABEL: f19: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r4 = add(r0,#1408) +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r30,#8) +; CHECK-NEXT: r7 = add(r0,#1024) +; CHECK-NEXT: r6 = add(r0,#1536) +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = add(r0,#1664) +; CHECK-NEXT: r3 = add(r0,#1920) +; CHECK-NEXT: r2 = add(r0,#2048) +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = add(r0,#1792) +; CHECK-NEXT: r29 = and(r29,#-128) +; CHECK-NEXT: v16 = vmem(r1+#0) +; CHECK-NEXT: vmem(r4+#0) = v11 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r7+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r6+#0) = v12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r5+#0) = v13 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r4+#0) = v14 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v15 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r2+#0) = v16 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } +b0: + %v0 = getelementptr <64 x half>, <64 x half>* %a17, i32 0 + store <64 x half> %a0, <64 x half>* %v0, align 128 + %v1 = getelementptr <64 x half>, <64 x half>* %a17, i32 1 + store <64 x half> %a1, <64 x half>* %v1, align 128 + %v2 = getelementptr <64 x half>, <64 x half>* %a17, i32 2 + store <64 x half> %a2, <64 x half>* %v2, align 128 + %v3 = getelementptr <64 x half>, <64 x half>* %a17, i32 3 + store <64 x half> %a3, <64 x half>* %v3, align 128 + %v4 = getelementptr <64 x half>, <64 x half>* %a17, i32 4 + store <64 x half> %a4, <64 x half>* %v4, align 128 + %v5 = getelementptr <64 x half>, <64 x half>* %a17, i32 5 + store <64 x half> %a5, <64 x half>* %v5, align 128 + %v6 = getelementptr <64 x half>, <64 x half>* %a17, i32 6 + store <64 x half> %a6, <64 x half>* %v6, align 128 + %v7 = getelementptr <64 x half>, <64 x half>* %a17, i32 7 + store <64 x half> %a7, <64 x half>* %v7, align 128 + %v8 = getelementptr <64 x half>, <64 x half>* %a17, i32 8 + store <64 x half> %a8, <64 x half>* %v8, align 128 + %v9 = getelementptr <64 x half>, <64 x half>* %a17, i32 9 + store <64 x half> %a9, <64 x half>* %v9, align 128 + %v10 = getelementptr <64 x half>, <64 x half>* %a17, i32 10 + store <64 x half> %a10, <64 x half>* %v10, align 128 + %v11 = getelementptr <64 x half>, <64 x half>* %a17, i32 11 + store <64 x half> %a11, <64 x half>* %v11, align 128 + %v12 = getelementptr <64 x half>, <64 x half>* %a17, i32 12 + store <64 x half> %a12, <64 x half>* %v12, align 128 + %v13 = getelementptr <64 x half>, <64 x half>* %a17, i32 13 + store <64 x half> %a13, <64 x half>* %v13, align 128 + %v14 = getelementptr <64 x half>, <64 x half>* %a17, i32 14 + store <64 x half> %a14, <64 x half>* %v14, align 128 + %v15 = getelementptr <64 x half>, <64 x half>* %a17, i32 15 + store <64 x half> %a15, <64 x half>* %v15, align 128 + %v16 = getelementptr <64 x half>, <64 x half>* %a17, i32 16 + store <64 x half> %a16, <64 x half>* %v16, align 128 + ret void +} + +define void @f20(<32 x float> %a0, <32 x float> %a1, <32 x float> %a2, <32 x float> %a3, <32 x float> %a4, <32 x float> %a5, <32 x float> %a6, <32 x float> %a7, <32 x float> %a8, <32 x float> %a9, <32 x float> %a10, <32 x float> %a11, <32 x float> %a12, <32 x float> %a13, <32 x float> %a14, <32 x float> %a15, <32 x float> %a16, <32 x float>* %a17) #0 { +; CHECK-LABEL: f20: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r4 = add(r0,#1408) +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r30,#8) +; CHECK-NEXT: r7 = add(r0,#1024) +; CHECK-NEXT: r6 = add(r0,#1536) +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = add(r0,#1664) +; CHECK-NEXT: r3 = add(r0,#1920) +; CHECK-NEXT: r2 = add(r0,#2048) +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = add(r0,#1792) +; CHECK-NEXT: r29 = and(r29,#-128) +; CHECK-NEXT: v16 = vmem(r1+#0) +; CHECK-NEXT: vmem(r4+#0) = v11 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r7+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r6+#0) = v12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r5+#0) = v13 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r4+#0) = v14 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v15 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r2+#0) = v16 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } +b0: + %v0 = getelementptr <32 x float>, <32 x float>* %a17, i32 0 + store <32 x float> %a0, <32 x float>* %v0, align 128 + %v1 = getelementptr <32 x float>, <32 x float>* %a17, i32 1 + store <32 x float> %a1, <32 x float>* %v1, align 128 + %v2 = getelementptr <32 x float>, <32 x float>* %a17, i32 2 + store <32 x float> %a2, <32 x float>* %v2, align 128 + %v3 = getelementptr <32 x float>, <32 x float>* %a17, i32 3 + store <32 x float> %a3, <32 x float>* %v3, align 128 + %v4 = getelementptr <32 x float>, <32 x float>* %a17, i32 4 + store <32 x float> %a4, <32 x float>* %v4, align 128 + %v5 = getelementptr <32 x float>, <32 x float>* %a17, i32 5 + store <32 x float> %a5, <32 x float>* %v5, align 128 + %v6 = getelementptr <32 x float>, <32 x float>* %a17, i32 6 + store <32 x float> %a6, <32 x float>* %v6, align 128 + %v7 = getelementptr <32 x float>, <32 x float>* %a17, i32 7 + store <32 x float> %a7, <32 x float>* %v7, align 128 + %v8 = getelementptr <32 x float>, <32 x float>* %a17, i32 8 + store <32 x float> %a8, <32 x float>* %v8, align 128 + %v9 = getelementptr <32 x float>, <32 x float>* %a17, i32 9 + store <32 x float> %a9, <32 x float>* %v9, align 128 + %v10 = getelementptr <32 x float>, <32 x float>* %a17, i32 10 + store <32 x float> %a10, <32 x float>* %v10, align 128 + %v11 = getelementptr <32 x float>, <32 x float>* %a17, i32 11 + store <32 x float> %a11, <32 x float>* %v11, align 128 + %v12 = getelementptr <32 x float>, <32 x float>* %a17, i32 12 + store <32 x float> %a12, <32 x float>* %v12, align 128 + %v13 = getelementptr <32 x float>, <32 x float>* %a17, i32 13 + store <32 x float> %a13, <32 x float>* %v13, align 128 + %v14 = getelementptr <32 x float>, <32 x float>* %a17, i32 14 + store <32 x float> %a14, <32 x float>* %v14, align 128 + %v15 = getelementptr <32 x float>, <32 x float>* %a17, i32 15 + store <32 x float> %a15, <32 x float>* %v15, align 128 + %v16 = getelementptr <32 x float>, <32 x float>* %a17, i32 16 + store <32 x float> %a16, <32 x float>* %v16, align 128 + ret void +} + +define <128 x i8> @f21() #0 { +; CHECK-LABEL: f21: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vxor(v0,v0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <128 x i8> undef, i8 0, i32 0 + %v1 = shufflevector <128 x i8> %v0, <128 x i8> undef, <128 x i32> zeroinitializer + ret <128 x i8> %v1 +} + +define <256 x i8> @f22() #0 { +; CHECK-LABEL: f22: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v1:0.w = vsub(v1:0.w,v1:0.w) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <256 x i8> undef, i8 0, i32 0 + %v1 = shufflevector <256 x i8> %v0, <256 x i8> undef, <256 x i32> zeroinitializer + ret <256 x i8> %v1 +} + +define <64 x i16> @f23() #0 { +; CHECK-LABEL: f23: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vxor(v0,v0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <64 x i16> undef, i16 0, i32 0 + %v1 = shufflevector <64 x i16> %v0, <64 x i16> undef, <64 x i32> zeroinitializer + ret <64 x i16> %v1 +} + +define <128 x i16> @f24() #0 { +; CHECK-LABEL: f24: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v1:0.w = vsub(v1:0.w,v1:0.w) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <128 x i16> undef, i16 0, i32 0 + %v1 = shufflevector <128 x i16> %v0, <128 x i16> undef, <128 x i32> zeroinitializer + ret <128 x i16> %v1 +} + +define <32 x i32> @f25() #0 { +; CHECK-LABEL: f25: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vxor(v0,v0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <32 x i32> undef, i32 0, i32 0 + %v1 = shufflevector <32 x i32> %v0, <32 x i32> undef, <32 x i32> zeroinitializer + ret <32 x i32> %v1 +} + +define <64 x i32> @f26() #0 { +; CHECK-LABEL: f26: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v1:0.w = vsub(v1:0.w,v1:0.w) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <64 x i32> undef, i32 0, i32 0 + %v1 = shufflevector <64 x i32> %v0, <64 x i32> undef, <64 x i32> zeroinitializer + ret <64 x i32> %v1 +} + +define <64 x half> @f27() #0 { +; CHECK-LABEL: f27: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vxor(v0,v0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <64 x half> undef, half 0xH0, i32 0 + %v1 = shufflevector <64 x half> %v0, <64 x half> undef, <64 x i32> zeroinitializer + ret <64 x half> %v1 +} + +define <128 x half> @f28() #0 { +; CHECK-LABEL: f28: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v1:0.w = vsub(v1:0.w,v1:0.w) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <128 x half> undef, half 0xH0, i32 0 + %v1 = shufflevector <128 x half> %v0, <128 x half> undef, <128 x i32> zeroinitializer + ret <128 x half> %v1 +} + +define <32 x float> @f29() #0 { +; CHECK-LABEL: f29: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vxor(v0,v0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <32 x float> undef, float 0.0, i32 0 + %v1 = shufflevector <32 x float> %v0, <32 x float> undef, <32 x i32> zeroinitializer + ret <32 x float> %v1 +} + +define <64 x float> @f30() #0 { +; CHECK-LABEL: f30: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v1:0.w = vsub(v1:0.w,v1:0.w) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <64 x float> undef, float 0.0, i32 0 + %v1 = shufflevector <64 x float> %v0, <64 x float> undef, <64 x i32> zeroinitializer + ret <64 x float> %v1 +} + +attributes #0 = { nounwind "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/splat.ll b/llvm/test/CodeGen/Hexagon/autohvx/splat.ll index bbea3a21270c..eea089851e9c 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/splat.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/splat.ll @@ -402,9 +402,8 @@ define <64 x half> @f24(i16 %a0) #2 { ; CHECK-LABEL: f24: ; CHECK: // %bb.0: ; CHECK-NEXT: { -; CHECK-NEXT: v0.h = vsplat(r1) +; CHECK-NEXT: v0.h = vsplat(r0) ; CHECK-NEXT: jumpr r31 -; CHECK-NEXT: vmem(r0+#0) = v0.new ; CHECK-NEXT: } %v0 = bitcast i16 %a0 to half %v1 = insertelement <64 x half> undef, half %v0, i32 0 @@ -417,9 +416,8 @@ define <32 x float> @f25(float %a0) #2 { ; CHECK-LABEL: f25: ; CHECK: // %bb.0: ; CHECK-NEXT: { -; CHECK-NEXT: v0 = vsplat(r1) +; CHECK-NEXT: v0 = vsplat(r0) ; CHECK-NEXT: jumpr r31 -; CHECK-NEXT: vmem(r0+#0) = v0.new ; CHECK-NEXT: } %v0 = insertelement <32 x float> undef, float %a0, i32 0 %v1 = shufflevector <32 x float> %v0, <32 x float> undef, <32 x i32> zeroinitializer From 015ff729cb90317e4e75cf48b1e5dd7850f0cbd0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 29 Dec 2021 09:14:32 -0800 Subject: [PATCH 192/992] [RISCV] Add a few more instructions to hasAllNBitUsers. --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 14 +++++++ llvm/test/CodeGen/RISCV/atomic-rmw.ll | 40 +++++++++--------- llvm/test/CodeGen/RISCV/atomic-signext.ll | 8 ++-- .../CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll | 4 +- llvm/test/CodeGen/RISCV/div-by-constant.ll | 18 ++++---- llvm/test/CodeGen/RISCV/sadd_sat_plus.ll | 4 +- .../CodeGen/RISCV/srem-seteq-illegal-types.ll | 12 +++--- llvm/test/CodeGen/RISCV/ssub_sat_plus.ll | 4 +- llvm/test/CodeGen/RISCV/uadd_sat_plus.ll | 10 ++--- .../CodeGen/RISCV/urem-seteq-illegal-types.ll | 42 +++++++++---------- llvm/test/CodeGen/RISCV/usub_sat_plus.ll | 10 ++--- 11 files changed, 90 insertions(+), 76 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index b24eb5f7bbf4..ab53c0eadfcf 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1727,6 +1727,20 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1)) return false; break; + case RISCV::ANDI: + if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1)))) + return false; + break; + case RISCV::SEXTB: + if (Bits < 8) + return false; + break; + case RISCV::SEXTH: + case RISCV::ZEXTH_RV32: + case RISCV::ZEXTH_RV64: + if (Bits < 16) + return false; + break; case RISCV::ADDUW: case RISCV::SH1ADDUW: case RISCV::SH2ADDUW: diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll index 131b3abf0fdf..27262442d257 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -2116,7 +2116,7 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64IA-LABEL: atomicrmw_max_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 ; RV64IA-NEXT: sllw a7, a4, a0 @@ -2264,7 +2264,7 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV64IA-LABEL: atomicrmw_max_i8_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 ; RV64IA-NEXT: sllw a7, a4, a0 @@ -2412,7 +2412,7 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind { ; RV64IA-LABEL: atomicrmw_max_i8_release: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 ; RV64IA-NEXT: sllw a7, a4, a0 @@ -2560,7 +2560,7 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV64IA-LABEL: atomicrmw_max_i8_acq_rel: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 ; RV64IA-NEXT: sllw a7, a4, a0 @@ -2708,7 +2708,7 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV64IA-LABEL: atomicrmw_max_i8_seq_cst: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 ; RV64IA-NEXT: sllw a7, a4, a0 @@ -2856,7 +2856,7 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64IA-LABEL: atomicrmw_min_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 ; RV64IA-NEXT: sllw a7, a4, a0 @@ -3004,7 +3004,7 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV64IA-LABEL: atomicrmw_min_i8_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 ; RV64IA-NEXT: sllw a7, a4, a0 @@ -3152,7 +3152,7 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind { ; RV64IA-LABEL: atomicrmw_min_i8_release: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 ; RV64IA-NEXT: sllw a7, a4, a0 @@ -3300,7 +3300,7 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV64IA-LABEL: atomicrmw_min_i8_acq_rel: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 ; RV64IA-NEXT: sllw a7, a4, a0 @@ -3448,7 +3448,7 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV64IA-LABEL: atomicrmw_min_i8_seq_cst: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 ; RV64IA-NEXT: sllw a7, a4, a0 @@ -6997,7 +6997,7 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64IA-LABEL: atomicrmw_max_i16_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 @@ -7147,7 +7147,7 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64IA-LABEL: atomicrmw_max_i16_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 @@ -7297,7 +7297,7 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind { ; RV64IA-LABEL: atomicrmw_max_i16_release: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 @@ -7447,7 +7447,7 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64IA-LABEL: atomicrmw_max_i16_acq_rel: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 @@ -7597,7 +7597,7 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64IA-LABEL: atomicrmw_max_i16_seq_cst: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 @@ -7747,7 +7747,7 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64IA-LABEL: atomicrmw_min_i16_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 @@ -7897,7 +7897,7 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64IA-LABEL: atomicrmw_min_i16_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 @@ -8047,7 +8047,7 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind { ; RV64IA-LABEL: atomicrmw_min_i16_release: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 @@ -8197,7 +8197,7 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64IA-LABEL: atomicrmw_min_i16_acq_rel: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 @@ -8347,7 +8347,7 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64IA-LABEL: atomicrmw_min_i16_seq_cst: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll index 0347efeafbe5..609863f127e8 100644 --- a/llvm/test/CodeGen/RISCV/atomic-signext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll @@ -696,7 +696,7 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64IA-LABEL: atomicrmw_max_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 ; RV64IA-NEXT: sllw a7, a4, a0 @@ -850,7 +850,7 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64IA-LABEL: atomicrmw_min_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 ; RV64IA-NEXT: sllw a7, a4, a0 @@ -1753,7 +1753,7 @@ define signext i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64IA-LABEL: atomicrmw_max_i16_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 @@ -1909,7 +1909,7 @@ define signext i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64IA-LABEL: atomicrmw_min_i16_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a6, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 diff --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll index a09b4d94558e..4a08a7d3f119 100644 --- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll @@ -173,7 +173,7 @@ define i8 @test_cttz_i8(i8 %a) nounwind { ; RV64I-NEXT: andi a0, a0, 51 ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: andi a0, a0, 15 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB3_2: @@ -596,7 +596,7 @@ define i8 @test_cttz_i8_zero_undef(i8 %a) nounwind { ; RV64I-NEXT: andi a0, a0, 51 ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: andi a0, a0, 15 ; RV64I-NEXT: ret %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 true) diff --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll index 5abae8fe3298..8f354bd38b88 100644 --- a/llvm/test/CodeGen/RISCV/div-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll @@ -200,7 +200,7 @@ define i8 @udiv8_constant_add(i8 %a) nounwind { ; RV64IM-NEXT: li a2, 37 ; RV64IM-NEXT: mul a1, a1, a2 ; RV64IM-NEXT: srli a1, a1, 8 -; RV64IM-NEXT: sub a0, a0, a1 +; RV64IM-NEXT: subw a0, a0, a1 ; RV64IM-NEXT: andi a0, a0, 254 ; RV64IM-NEXT: srli a0, a0, 1 ; RV64IM-NEXT: add a0, a0, a1 @@ -213,7 +213,7 @@ define i8 @udiv8_constant_add(i8 %a) nounwind { ; RV64IMZB-NEXT: sh3add a2, a1, a1 ; RV64IMZB-NEXT: sh2add a1, a2, a1 ; RV64IMZB-NEXT: srli a1, a1, 8 -; RV64IMZB-NEXT: sub a0, a0, a1 +; RV64IMZB-NEXT: subw a0, a0, a1 ; RV64IMZB-NEXT: andi a0, a0, 254 ; RV64IMZB-NEXT: srli a0, a0, 1 ; RV64IMZB-NEXT: add a0, a0, a1 @@ -310,7 +310,7 @@ define i16 @udiv16_constant_add(i16 %a) nounwind { ; RV64IMZB-NEXT: addiw a2, a2, 1171 ; RV64IMZB-NEXT: mul a1, a1, a2 ; RV64IMZB-NEXT: srli a1, a1, 16 -; RV64IMZB-NEXT: sub a0, a0, a1 +; RV64IMZB-NEXT: subw a0, a0, a1 ; RV64IMZB-NEXT: zext.h a0, a0 ; RV64IMZB-NEXT: srli a0, a0, 1 ; RV64IMZB-NEXT: add a0, a0, a1 @@ -693,7 +693,7 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind { ; RV64IM-NEXT: li a2, -109 ; RV64IM-NEXT: mul a1, a1, a2 ; RV64IM-NEXT: srli a1, a1, 8 -; RV64IM-NEXT: add a0, a1, a0 +; RV64IM-NEXT: addw a0, a1, a0 ; RV64IM-NEXT: andi a1, a0, 128 ; RV64IM-NEXT: srli a1, a1, 7 ; RV64IM-NEXT: slli a0, a0, 56 @@ -707,7 +707,7 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind { ; RV64IMZB-NEXT: li a2, -109 ; RV64IMZB-NEXT: mul a1, a1, a2 ; RV64IMZB-NEXT: srli a1, a1, 8 -; RV64IMZB-NEXT: add a0, a1, a0 +; RV64IMZB-NEXT: addw a0, a1, a0 ; RV64IMZB-NEXT: andi a1, a0, 128 ; RV64IMZB-NEXT: srli a1, a1, 7 ; RV64IMZB-NEXT: sext.b a0, a0 @@ -757,7 +757,7 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind { ; RV64IM-NEXT: li a2, 109 ; RV64IM-NEXT: mul a1, a1, a2 ; RV64IM-NEXT: srli a1, a1, 8 -; RV64IM-NEXT: sub a0, a1, a0 +; RV64IM-NEXT: subw a0, a1, a0 ; RV64IM-NEXT: andi a1, a0, 128 ; RV64IM-NEXT: srli a1, a1, 7 ; RV64IM-NEXT: slli a0, a0, 56 @@ -771,7 +771,7 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind { ; RV64IMZB-NEXT: li a2, 109 ; RV64IMZB-NEXT: mul a1, a1, a2 ; RV64IMZB-NEXT: srli a1, a1, 8 -; RV64IMZB-NEXT: sub a0, a1, a0 +; RV64IMZB-NEXT: subw a0, a1, a0 ; RV64IMZB-NEXT: andi a1, a0, 128 ; RV64IMZB-NEXT: srli a1, a1, 7 ; RV64IMZB-NEXT: sext.b a0, a0 @@ -937,7 +937,7 @@ define i16 @sdiv16_constant_add_srai(i16 %a) nounwind { ; RV64IMZB-NEXT: addiw a2, a2, -1911 ; RV64IMZB-NEXT: mul a1, a1, a2 ; RV64IMZB-NEXT: srli a1, a1, 16 -; RV64IMZB-NEXT: add a0, a1, a0 +; RV64IMZB-NEXT: addw a0, a1, a0 ; RV64IMZB-NEXT: zext.h a1, a0 ; RV64IMZB-NEXT: srli a1, a1, 15 ; RV64IMZB-NEXT: sext.h a0, a0 @@ -1003,7 +1003,7 @@ define i16 @sdiv16_constant_sub_srai(i16 %a) nounwind { ; RV64IMZB-NEXT: addiw a2, a2, 1911 ; RV64IMZB-NEXT: mul a1, a1, a2 ; RV64IMZB-NEXT: srli a1, a1, 16 -; RV64IMZB-NEXT: sub a0, a1, a0 +; RV64IMZB-NEXT: subw a0, a1, a0 ; RV64IMZB-NEXT: zext.h a1, a0 ; RV64IMZB-NEXT: srli a1, a1, 15 ; RV64IMZB-NEXT: sext.h a0, a0 diff --git a/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll b/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll index ac59d1087da2..bd7e50d6790c 100644 --- a/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll @@ -259,7 +259,7 @@ define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind { ; RV64IZbb-LABEL: func16: ; RV64IZbb: # %bb.0: ; RV64IZbb-NEXT: sext.h a0, a0 -; RV64IZbb-NEXT: mul a1, a1, a2 +; RV64IZbb-NEXT: mulw a1, a1, a2 ; RV64IZbb-NEXT: sext.h a1, a1 ; RV64IZbb-NEXT: add a0, a0, a1 ; RV64IZbb-NEXT: lui a1, 8 @@ -335,7 +335,7 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { ; RV64IZbb-LABEL: func8: ; RV64IZbb: # %bb.0: ; RV64IZbb-NEXT: sext.b a0, a0 -; RV64IZbb-NEXT: mul a1, a1, a2 +; RV64IZbb-NEXT: mulw a1, a1, a2 ; RV64IZbb-NEXT: sext.b a1, a1 ; RV64IZbb-NEXT: add a0, a0, a1 ; RV64IZbb-NEXT: li a1, 127 diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index 58b27218d6c8..361fd88afbc6 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -244,9 +244,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; RV64-NEXT: srai a1, a1, 58 ; RV64-NEXT: srli a1, a1, 9 ; RV64-NEXT: andi a1, a1, 3 -; RV64-NEXT: add a1, a0, a1 +; RV64-NEXT: addw a1, a0, a1 ; RV64-NEXT: andi a1, a1, 60 -; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: subw a0, a0, a1 ; RV64-NEXT: andi a0, a0, 63 ; RV64-NEXT: snez a0, a0 ; RV64-NEXT: ret @@ -270,9 +270,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; RV64M-NEXT: srai a1, a1, 58 ; RV64M-NEXT: srli a1, a1, 9 ; RV64M-NEXT: andi a1, a1, 3 -; RV64M-NEXT: add a1, a0, a1 +; RV64M-NEXT: addw a1, a0, a1 ; RV64M-NEXT: andi a1, a1, 60 -; RV64M-NEXT: sub a0, a0, a1 +; RV64M-NEXT: subw a0, a0, a1 ; RV64M-NEXT: andi a0, a0, 63 ; RV64M-NEXT: snez a0, a0 ; RV64M-NEXT: ret @@ -296,9 +296,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; RV64MV-NEXT: srai a1, a1, 58 ; RV64MV-NEXT: srli a1, a1, 9 ; RV64MV-NEXT: andi a1, a1, 3 -; RV64MV-NEXT: add a1, a0, a1 +; RV64MV-NEXT: addw a1, a0, a1 ; RV64MV-NEXT: andi a1, a1, 60 -; RV64MV-NEXT: sub a0, a0, a1 +; RV64MV-NEXT: subw a0, a0, a1 ; RV64MV-NEXT: andi a0, a0, 63 ; RV64MV-NEXT: snez a0, a0 ; RV64MV-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll index 3ff18083e8d5..3393f2b01e53 100644 --- a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll @@ -258,7 +258,7 @@ define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind { ; RV64IZbb-LABEL: func16: ; RV64IZbb: # %bb.0: ; RV64IZbb-NEXT: sext.h a0, a0 -; RV64IZbb-NEXT: mul a1, a1, a2 +; RV64IZbb-NEXT: mulw a1, a1, a2 ; RV64IZbb-NEXT: sext.h a1, a1 ; RV64IZbb-NEXT: sub a0, a0, a1 ; RV64IZbb-NEXT: lui a1, 8 @@ -334,7 +334,7 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { ; RV64IZbb-LABEL: func8: ; RV64IZbb: # %bb.0: ; RV64IZbb-NEXT: sext.b a0, a0 -; RV64IZbb-NEXT: mul a1, a1, a2 +; RV64IZbb-NEXT: mulw a1, a1, a2 ; RV64IZbb-NEXT: sext.b a1, a1 ; RV64IZbb-NEXT: sub a0, a0, a1 ; RV64IZbb-NEXT: li a1, 127 diff --git a/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll b/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll index 3655564f16dc..96d29b4b2882 100644 --- a/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll @@ -160,7 +160,7 @@ define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind { ; RV64IZbb-LABEL: func16: ; RV64IZbb: # %bb.0: ; RV64IZbb-NEXT: zext.h a0, a0 -; RV64IZbb-NEXT: mul a1, a1, a2 +; RV64IZbb-NEXT: mulw a1, a1, a2 ; RV64IZbb-NEXT: zext.h a1, a1 ; RV64IZbb-NEXT: add a0, a0, a1 ; RV64IZbb-NEXT: lui a1, 16 @@ -189,7 +189,7 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { ; RV64I-LABEL: func8: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a0, a0, 255 -; RV64I-NEXT: mul a1, a1, a2 +; RV64I-NEXT: mulw a1, a1, a2 ; RV64I-NEXT: andi a1, a1, 255 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: li a1, 255 @@ -212,7 +212,7 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { ; RV64IZbb-LABEL: func8: ; RV64IZbb: # %bb.0: ; RV64IZbb-NEXT: andi a0, a0, 255 -; RV64IZbb-NEXT: mul a1, a1, a2 +; RV64IZbb-NEXT: mulw a1, a1, a2 ; RV64IZbb-NEXT: andi a1, a1, 255 ; RV64IZbb-NEXT: add a0, a0, a1 ; RV64IZbb-NEXT: li a1, 255 @@ -240,7 +240,7 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind { ; RV64I-LABEL: func4: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a0, a0, 15 -; RV64I-NEXT: mul a1, a1, a2 +; RV64I-NEXT: mulw a1, a1, a2 ; RV64I-NEXT: andi a1, a1, 15 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: li a1, 15 @@ -263,7 +263,7 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind { ; RV64IZbb-LABEL: func4: ; RV64IZbb: # %bb.0: ; RV64IZbb-NEXT: andi a0, a0, 15 -; RV64IZbb-NEXT: mul a1, a1, a2 +; RV64IZbb-NEXT: mulw a1, a1, a2 ; RV64IZbb-NEXT: andi a1, a1, 15 ; RV64IZbb-NEXT: add a0, a0, a1 ; RV64IZbb-NEXT: li a1, 15 diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll index 6a9c674d9d2b..8c3870ee4070 100644 --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -219,9 +219,9 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { ; ; RV64-LABEL: test_urem_odd_setne: ; RV64: # %bb.0: -; RV64-NEXT: slli a1, a0, 1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: neg a0, a0 +; RV64-NEXT: slliw a1, a0, 1 +; RV64-NEXT: addw a0, a1, a0 +; RV64-NEXT: negw a0, a0 ; RV64-NEXT: andi a0, a0, 15 ; RV64-NEXT: li a1, 3 ; RV64-NEXT: sltu a0, a1, a0 @@ -239,9 +239,9 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { ; ; RV64M-LABEL: test_urem_odd_setne: ; RV64M: # %bb.0: -; RV64M-NEXT: slli a1, a0, 1 -; RV64M-NEXT: add a0, a1, a0 -; RV64M-NEXT: neg a0, a0 +; RV64M-NEXT: slliw a1, a0, 1 +; RV64M-NEXT: addw a0, a1, a0 +; RV64M-NEXT: negw a0, a0 ; RV64M-NEXT: andi a0, a0, 15 ; RV64M-NEXT: li a1, 3 ; RV64M-NEXT: sltu a0, a1, a0 @@ -259,9 +259,9 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { ; ; RV64MV-LABEL: test_urem_odd_setne: ; RV64MV: # %bb.0: -; RV64MV-NEXT: slli a1, a0, 1 -; RV64MV-NEXT: add a0, a1, a0 -; RV64MV-NEXT: neg a0, a0 +; RV64MV-NEXT: slliw a1, a0, 1 +; RV64MV-NEXT: addw a0, a1, a0 +; RV64MV-NEXT: negw a0, a0 ; RV64MV-NEXT: andi a0, a0, 15 ; RV64MV-NEXT: li a1, 3 ; RV64MV-NEXT: sltu a0, a1, a0 @@ -310,7 +310,7 @@ define i1 @test_urem_negative_odd(i9 %X) nounwind { ; RV64M-LABEL: test_urem_negative_odd: ; RV64M: # %bb.0: ; RV64M-NEXT: li a1, 307 -; RV64M-NEXT: mul a0, a0, a1 +; RV64M-NEXT: mulw a0, a0, a1 ; RV64M-NEXT: andi a0, a0, 511 ; RV64M-NEXT: li a1, 1 ; RV64M-NEXT: sltu a0, a1, a0 @@ -328,7 +328,7 @@ define i1 @test_urem_negative_odd(i9 %X) nounwind { ; RV64MV-LABEL: test_urem_negative_odd: ; RV64MV: # %bb.0: ; RV64MV-NEXT: li a1, 307 -; RV64MV-NEXT: mul a0, a0, a1 +; RV64MV-NEXT: mulw a0, a0, a1 ; RV64MV-NEXT: andi a0, a0, 511 ; RV64MV-NEXT: li a1, 1 ; RV64MV-NEXT: sltu a0, a1, a0 @@ -427,19 +427,19 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind { ; RV64-NEXT: li a1, 819 ; RV64-NEXT: mv a0, s1 ; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: addi a0, a0, -1638 +; RV64-NEXT: addiw a0, a0, -1638 ; RV64-NEXT: andi a0, a0, 2047 ; RV64-NEXT: li a1, 1 ; RV64-NEXT: sltu s1, a1, a0 ; RV64-NEXT: li a1, 1463 ; RV64-NEXT: mv a0, s2 ; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: addi a0, a0, -1463 +; RV64-NEXT: addiw a0, a0, -1463 ; RV64-NEXT: andi a0, a0, 2047 ; RV64-NEXT: li a1, 292 ; RV64-NEXT: sltu a0, a1, a0 -; RV64-NEXT: neg a1, s3 -; RV64-NEXT: neg a0, a0 +; RV64-NEXT: negw a1, s3 +; RV64-NEXT: negw a0, a0 ; RV64-NEXT: andi a1, a1, 2047 ; RV64-NEXT: andi a0, a0, 2047 ; RV64-NEXT: slli a0, a0, 11 @@ -524,19 +524,19 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind { ; RV64M-NEXT: li a4, 341 ; RV64M-NEXT: sltu a1, a4, a1 ; RV64M-NEXT: li a4, 819 -; RV64M-NEXT: mul a3, a3, a4 -; RV64M-NEXT: addi a3, a3, -1638 +; RV64M-NEXT: mulw a3, a3, a4 +; RV64M-NEXT: addiw a3, a3, -1638 ; RV64M-NEXT: andi a3, a3, 2047 ; RV64M-NEXT: li a4, 1 ; RV64M-NEXT: sltu a3, a4, a3 ; RV64M-NEXT: li a4, 1463 -; RV64M-NEXT: mul a2, a2, a4 -; RV64M-NEXT: addi a2, a2, -1463 +; RV64M-NEXT: mulw a2, a2, a4 +; RV64M-NEXT: addiw a2, a2, -1463 ; RV64M-NEXT: andi a2, a2, 2047 ; RV64M-NEXT: li a4, 292 ; RV64M-NEXT: sltu a2, a4, a2 -; RV64M-NEXT: neg a1, a1 -; RV64M-NEXT: neg a2, a2 +; RV64M-NEXT: negw a1, a1 +; RV64M-NEXT: negw a2, a2 ; RV64M-NEXT: andi a1, a1, 2047 ; RV64M-NEXT: andi a2, a2, 2047 ; RV64M-NEXT: slli a2, a2, 11 diff --git a/llvm/test/CodeGen/RISCV/usub_sat_plus.ll b/llvm/test/CodeGen/RISCV/usub_sat_plus.ll index ef466dcc4837..42acda020cba 100644 --- a/llvm/test/CodeGen/RISCV/usub_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/usub_sat_plus.ll @@ -163,7 +163,7 @@ define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind { ; RV64IZbb-LABEL: func16: ; RV64IZbb: # %bb.0: ; RV64IZbb-NEXT: zext.h a0, a0 -; RV64IZbb-NEXT: mul a1, a1, a2 +; RV64IZbb-NEXT: mulw a1, a1, a2 ; RV64IZbb-NEXT: zext.h a1, a1 ; RV64IZbb-NEXT: maxu a0, a0, a1 ; RV64IZbb-NEXT: sub a0, a0, a1 @@ -190,7 +190,7 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { ; RV64I-LABEL: func8: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a3, a0, 255 -; RV64I-NEXT: mul a0, a1, a2 +; RV64I-NEXT: mulw a0, a1, a2 ; RV64I-NEXT: andi a0, a0, 255 ; RV64I-NEXT: sub a1, a3, a0 ; RV64I-NEXT: li a0, 0 @@ -212,7 +212,7 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { ; RV64IZbb-LABEL: func8: ; RV64IZbb: # %bb.0: ; RV64IZbb-NEXT: andi a0, a0, 255 -; RV64IZbb-NEXT: mul a1, a1, a2 +; RV64IZbb-NEXT: mulw a1, a1, a2 ; RV64IZbb-NEXT: andi a1, a1, 255 ; RV64IZbb-NEXT: maxu a0, a0, a1 ; RV64IZbb-NEXT: sub a0, a0, a1 @@ -239,7 +239,7 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind { ; RV64I-LABEL: func4: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a3, a0, 15 -; RV64I-NEXT: mul a0, a1, a2 +; RV64I-NEXT: mulw a0, a1, a2 ; RV64I-NEXT: andi a0, a0, 15 ; RV64I-NEXT: sub a1, a3, a0 ; RV64I-NEXT: li a0, 0 @@ -261,7 +261,7 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind { ; RV64IZbb-LABEL: func4: ; RV64IZbb: # %bb.0: ; RV64IZbb-NEXT: andi a0, a0, 15 -; RV64IZbb-NEXT: mul a1, a1, a2 +; RV64IZbb-NEXT: mulw a1, a1, a2 ; RV64IZbb-NEXT: andi a1, a1, 15 ; RV64IZbb-NEXT: maxu a0, a0, a1 ; RV64IZbb-NEXT: sub a0, a0, a1 From ba9016a0304630cbf36c7838a3916d03fc8396c6 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 29 Dec 2021 19:00:41 +0100 Subject: [PATCH 193/992] [LV] Replace redundant tail-fold check with assert (NFC). The code path can only be reached when folding the tail, so turn the check into an assertion. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 112e697c7f5e..5635b1596bac 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8458,9 +8458,9 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { auto NewInsertionPoint = Builder.getInsertBlock()->getFirstNonPhi(); Builder.setInsertPoint(Builder.getInsertBlock(), NewInsertionPoint); - bool TailFolded = !CM.isScalarEpilogueAllowed(); + assert(CM.foldTailByMasking() && "must fold the tail"); - if (TailFolded && CM.TTI.emitGetActiveLaneMask()) { + if (CM.TTI.emitGetActiveLaneMask()) { VPValue *TC = Plan->getOrCreateTripCount(); BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV, TC}); } else { From 180455ae5ecd813ad1acacaa555342cf53013d4e Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Wed, 29 Dec 2021 03:08:01 -0500 Subject: [PATCH 194/992] [MLIR][LLVM] Expose powi intrinsic to MLIR Expose the powi intrinsic to the LLVM dialect within MLIR Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D116364 --- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 7 +++++++ mlir/test/Dialect/LLVMIR/roundtrip.mlir | 3 +++ 2 files changed, 10 insertions(+) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index f671c3ca1dae..a2a1f7a57f43 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -1395,6 +1395,12 @@ class LLVM_BinarySameArgsIntrinsicOp traits = []> : let arguments = (ins LLVM_Type:$a, LLVM_Type:$b); } +class LLVM_BinaryIntrinsicOp traits = []> : + LLVM_OneResultIntrOp { + let arguments = (ins LLVM_Type:$a, LLVM_Type:$b); +} + class LLVM_TernarySameArgsIntrinsicOp traits = []> : LLVM_OneResultIntrOp { @@ -1426,6 +1432,7 @@ def LLVM_Prefetch : LLVM_ZeroResultIntrOp<"prefetch", [0]> { def LLVM_SinOp : LLVM_UnaryIntrinsicOp<"sin">; def LLVM_SqrtOp : LLVM_UnaryIntrinsicOp<"sqrt">; def LLVM_PowOp : LLVM_BinarySameArgsIntrinsicOp<"pow">; +def LLVM_PowIOp : LLVM_BinaryIntrinsicOp<"powi">; def LLVM_BitReverseOp : LLVM_UnaryIntrinsicOp<"bitreverse">; def LLVM_CountLeadingZerosOp : LLVM_CountZerosIntrinsicOp<"ctlz">; def LLVM_CountTrailingZerosOp : LLVM_CountZerosIntrinsicOp<"cttz">; diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir index b931c9bb69e8..9e504e7fe081 100644 --- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir +++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir @@ -146,6 +146,9 @@ func @ops(%arg0: i32, %arg1: f32, // CHECK: "llvm.intr.pow"(%[[FLOAT]], %[[FLOAT]]) : (f32, f32) -> f32 %31 = "llvm.intr.pow"(%arg1, %arg1) : (f32, f32) -> f32 +// CHECK: "llvm.intr.powi"(%[[FLOAT]], %[[I32]]) : (f32, i32) -> f32 + %a31 = "llvm.intr.powi"(%arg1, %arg0) : (f32, i32) -> f32 + // CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (i32) -> i32 %32 = "llvm.intr.bitreverse"(%arg0) : (i32) -> i32 From 505d57486e57eb61e29bed6517de5152d208fede Mon Sep 17 00:00:00 2001 From: Joshua Herrera Date: Wed, 29 Dec 2021 10:18:21 -0800 Subject: [PATCH 195/992] [Hexagon] Improve BUILD_VECTOR codegen For vectors with repeating values, old codegen would rotate and insert every duplicate element. This patch replaces that behavior with a splat of the most common element, vinsert/vror only occur when needed. --- .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 70 +++++++++++++---- .../Hexagon/autohvx/build-vector-i32-128b.ll | 34 +++++++++ .../Hexagon/autohvx/build-vector-i32-64b.ll | 34 +++++++++ .../Hexagon/autohvx/isel-build-vector.ll | 76 +++++++++++++++++++ 4 files changed, 201 insertions(+), 13 deletions(-) mode change 100644 => 100755 llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/build-vector-i32-128b.ll create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/build-vector-i32-64b.ll create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp old mode 100644 new mode 100755 index a3a9097378e7..569ad8b337db --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -656,22 +656,66 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef Values, } } - // Construct two halves in parallel, then or them together. + // Find most common element to initialize vector with. This is to avoid + // unnecessary vinsert/valign for cases where the same value is present + // many times. Creates a histogram of the vector's elements to find the + // most common element n. assert(4*Words.size() == Subtarget.getVectorLength()); - SDValue HalfV0 = getInstr(Hexagon::V6_vd0, dl, VecTy, {}, DAG); - SDValue HalfV1 = getInstr(Hexagon::V6_vd0, dl, VecTy, {}, DAG); - SDValue S = DAG.getConstant(4, dl, MVT::i32); - for (unsigned i = 0; i != NumWords/2; ++i) { - SDValue N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, - {HalfV0, Words[i]}); - SDValue M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, - {HalfV1, Words[i+NumWords/2]}); - HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, S}); - HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, S}); + int VecHist[32]; + int n = 0; + for (unsigned i = 0; i != NumWords; ++i) { + VecHist[i] = 0; + if (Words[i].isUndef()) + continue; + for (unsigned j = i; j != NumWords; ++j) + if (Words[i] == Words[j]) + VecHist[i]++; + + if (VecHist[i] > VecHist[n]) + n = i; } - HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, - {HalfV0, DAG.getConstant(HwLen/2, dl, MVT::i32)}); + SDValue HalfV = getZero(dl, VecTy, DAG); + if (VecHist[n] > 1) { + SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]); + HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy, + {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)}); + } + SDValue HalfV0 = HalfV; + SDValue HalfV1 = HalfV; + + // Construct two halves in parallel, then or them together. Rn and Rm count + // number of rotations needed before the next element. One last rotation is + // performed post-loop to position the last element. + int Rn = 0, Rm = 0; + SDValue Sn, Sm; + SDValue N = HalfV0; + SDValue M = HalfV1; + for (unsigned i = 0; i != NumWords/2; ++i) { + + // Rotate by element count since last insertion. + if (Words[i] != Words[n] || VecHist[n] <= 1) { + Sn = DAG.getConstant(Rn, dl, MVT::i32); + HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn}); + N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, + {HalfV0, Words[i]}); + Rn = 0; + } + if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) { + Sm = DAG.getConstant(Rm, dl, MVT::i32); + HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm}); + M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, + {HalfV1, Words[i+NumWords/2]}); + Rm = 0; + } + Rn += 4; + Rm += 4; + } + // Perform last rotation. + Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32); + Sm = DAG.getConstant(Rm, dl, MVT::i32); + HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn}); + HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm}); SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0); SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1); diff --git a/llvm/test/CodeGen/Hexagon/autohvx/build-vector-i32-128b.ll b/llvm/test/CodeGen/Hexagon/autohvx/build-vector-i32-128b.ll new file mode 100644 index 000000000000..102ebd26c825 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/build-vector-i32-128b.ll @@ -0,0 +1,34 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check that vector is produced with vxor +; CHECK: v{{[0-9]*}} = vxor +define <32 x i32> @f0(i32 %x) #0 { + %vect = insertelement <32 x i32> , i32 %x, i32 0 + ret <32 x i32> %vect +} + +; Check that vector is produced with vsplat +; CHECK: v{{[0-9]*}} = vsplat +define <32 x i32> @f1(i32 %x) #0 { + %vect = insertelement <32 x i32> , i32 %x, i32 0 + ret <32 x i32> %vect +} + +; Check that the correct vror is generated +; CHECK: [[REG0:r([0-9]+)]] = #120 +; CHECK: vror(v{{[0-9]+}},[[REG0]]) +define <32 x i32> @f2(i32 %x) #0 { + %vect = insertelement <32 x i32> , i32 %x, i32 2 + ret <32 x i32> %vect +} + +; Check that the correct vror is generated +; CHECK: [[REG0:r([0-9]+)]] = #12 +; CHECK: vror(v{{[0-9]+}},[[REG0]]) +define <32 x i32> @f3(i32 %x) #0 { + %vect = insertelement <32 x i32> , i32 %x, i32 29 + ret <32 x i32> %vect +} + +attributes #0 = { readnone nounwind "target-cpu"="hexagonv62" "target-features"="+hvx,+hvx-length128b" } + diff --git a/llvm/test/CodeGen/Hexagon/autohvx/build-vector-i32-64b.ll b/llvm/test/CodeGen/Hexagon/autohvx/build-vector-i32-64b.ll new file mode 100644 index 000000000000..85a7872b8a61 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/build-vector-i32-64b.ll @@ -0,0 +1,34 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check that vector is produced with vxor +; CHECK: v{{[0-9]*}} = vxor +define <16 x i32> @f0(i32 %x) #0 { + %vect = insertelement <16 x i32> , i32 %x, i32 0 + ret <16 x i32> %vect +} + +; Check that vector is produced with vsplat +; CHECK: v{{[0-9]*}} = vsplat +define <16 x i32> @f1(i32 %x) #0 { + %vect = insertelement <16 x i32> , i32 %x, i32 0 + ret <16 x i32> %vect +} + +; Check that the correct vror is generated +; CHECK: [[REG0:r([0-9]+)]] = #56 +; CHECK: vror(v{{[0-9]+}},[[REG0]]) +define <16 x i32> @f2(i32 %x) #0 { + %vect = insertelement <16 x i32> , i32 %x, i32 2 + ret <16 x i32> %vect +} + +; Check that the correct vror is generated +; CHECK: [[REG0:r([0-9]+)]] = #12 +; CHECK: vror(v{{[0-9]+}},[[REG0]]) +define <16 x i32> @f3(i32 %x) #0 { + %vect = insertelement <16 x i32> , i32 %x, i32 13 + ret <16 x i32> %vect +} + +attributes #0 = { readnone nounwind "target-cpu"="hexagonv62" "target-features"="+hvx,+hvx-length64b" } + diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll new file mode 100644 index 000000000000..e6b8445f5121 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +define <32 x i32> @fred(i32 %a0) #0 { +; CHECK-LABEL: fred: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r3:2 = combine(#20,#9) +; CHECK-NEXT: v0 = vxor(v0,v0) +; CHECK-NEXT: r1 = #24 +; CHECK-NEXT: r4 = #12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v1 = vror(v0,r1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v1.w = vinsert(r2) +; CHECK-NEXT: r4 = #7 +; CHECK-NEXT: r2 = #116 +; CHECK-NEXT: v0 = vror(v0,r4) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.w = vinsert(r4) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v1 = vror(v1,r3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v1.w = vinsert(r0) +; CHECK-NEXT: v0 = vror(v0,r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v1 = vror(v1,r3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vor(v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <32 x i32> undef, i32 undef, i32 0 + %v1 = insertelement <32 x i32> %v0, i32 undef, i32 1 + %v2 = insertelement <32 x i32> %v1, i32 undef, i32 2 + %v3 = insertelement <32 x i32> %v2, i32 7, i32 3 + %v4 = insertelement <32 x i32> %v3, i32 undef, i32 4 + %v5 = insertelement <32 x i32> %v4, i32 undef, i32 5 + %v6 = insertelement <32 x i32> %v5, i32 undef, i32 6 + %v7 = insertelement <32 x i32> %v6, i32 undef, i32 7 + %v8 = insertelement <32 x i32> %v7, i32 undef, i32 8 + %v9 = insertelement <32 x i32> %v8, i32 undef, i32 9 + %v10 = insertelement <32 x i32> %v9, i32 undef, i32 10 + %v11 = insertelement <32 x i32> %v10, i32 undef, i32 11 + %v12 = insertelement <32 x i32> %v11, i32 undef, i32 12 + %v13 = insertelement <32 x i32> %v12, i32 undef, i32 13 + %v14 = insertelement <32 x i32> %v13, i32 undef, i32 14 + %v15 = insertelement <32 x i32> %v14, i32 undef, i32 15 + %v16 = insertelement <32 x i32> %v15, i32 undef, i32 16 + %v17 = insertelement <32 x i32> %v16, i32 undef, i32 17 + %v18 = insertelement <32 x i32> %v17, i32 undef, i32 18 + %v19 = insertelement <32 x i32> %v18, i32 undef, i32 19 + %v20 = insertelement <32 x i32> %v19, i32 undef, i32 20 + %v21 = insertelement <32 x i32> %v20, i32 undef, i32 21 + %v22 = insertelement <32 x i32> %v21, i32 9, i32 22 + %v23 = insertelement <32 x i32> %v22, i32 undef, i32 23 + %v24 = insertelement <32 x i32> %v23, i32 undef, i32 24 + %v25 = insertelement <32 x i32> %v24, i32 undef, i32 25 + %v26 = insertelement <32 x i32> %v25, i32 undef, i32 26 + %v27 = insertelement <32 x i32> %v26, i32 %a0, i32 27 + %v28 = insertelement <32 x i32> %v27, i32 undef, i32 28 + %v29 = insertelement <32 x i32> %v28, i32 undef, i32 29 + %v30 = insertelement <32 x i32> %v29, i32 undef, i32 30 + %v31 = insertelement <32 x i32> %v30, i32 undef, i32 31 + ret <32 x i32> %v31 +} + +attributes #0 = { "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b" } + From ba07f300c6d67a2c6dde8eef216b7a77ac4600bb Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 29 Dec 2021 11:00:01 -0800 Subject: [PATCH 196/992] [Hexagon] Don't build two halves of HVX vector in parallel There can only be one permute operations per packet, so this actually pessimizes the code (due to the extra "or"). --- .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 67 +++++-------------- .../Hexagon/autohvx/isel-build-vector.ll | 24 +++---- 2 files changed, 28 insertions(+), 63 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 569ad8b337db..a151f3de170a 100755 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -659,10 +659,10 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef Values, // Find most common element to initialize vector with. This is to avoid // unnecessary vinsert/valign for cases where the same value is present // many times. Creates a histogram of the vector's elements to find the - // most common element n. + // most common element. assert(4*Words.size() == Subtarget.getVectorLength()); - int VecHist[32]; - int n = 0; + SmallVector VecHist(32); + int MaxAt = 0; for (unsigned i = 0; i != NumWords; ++i) { VecHist[i] = 0; if (Words[i].isUndef()) @@ -671,60 +671,29 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef Values, if (Words[i] == Words[j]) VecHist[i]++; - if (VecHist[i] > VecHist[n]) - n = i; + if (VecHist[i] > VecHist[MaxAt]) + MaxAt = i; } - SDValue HalfV = getZero(dl, VecTy, DAG); - if (VecHist[n] > 1) { - SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]); - HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy, - {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)}); - } - SDValue HalfV0 = HalfV; - SDValue HalfV1 = HalfV; - - // Construct two halves in parallel, then or them together. Rn and Rm count - // number of rotations needed before the next element. One last rotation is - // performed post-loop to position the last element. - int Rn = 0, Rm = 0; - SDValue Sn, Sm; - SDValue N = HalfV0; - SDValue M = HalfV1; - for (unsigned i = 0; i != NumWords/2; ++i) { - + // If each value is different, don't do splat, just insert them one by one. + bool NoSplat = VecHist[MaxAt] <= 1; + SDValue RotV = NoSplat + ? DAG.getUNDEF(VecTy) + : DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[MaxAt]); + int Rn = 0; + for (unsigned i = 0; i != NumWords; ++i) { // Rotate by element count since last insertion. - if (Words[i] != Words[n] || VecHist[n] <= 1) { - Sn = DAG.getConstant(Rn, dl, MVT::i32); - HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn}); - N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, - {HalfV0, Words[i]}); + if (NoSplat || Words[i] != Words[MaxAt]) { + RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, + {RotV, DAG.getConstant(Rn, dl, MVT::i32)}); + RotV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, Words[i]}); Rn = 0; } - if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) { - Sm = DAG.getConstant(Rm, dl, MVT::i32); - HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm}); - M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, - {HalfV1, Words[i+NumWords/2]}); - Rm = 0; - } Rn += 4; - Rm += 4; } // Perform last rotation. - Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32); - Sm = DAG.getConstant(Rm, dl, MVT::i32); - HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn}); - HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm}); - - SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0); - SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1); - - SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1}); - - SDValue OutV = - DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV); - return OutV; + return DAG.getNode(HexagonISD::VROR, dl, VecTy, + {RotV, DAG.getConstant(Rn, dl, MVT::i32)}); } SDValue diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll index e6b8445f5121..159001c11301 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll @@ -6,35 +6,31 @@ define <32 x i32> @fred(i32 %a0) #0 { ; CHECK: .cfi_startproc ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { -; CHECK-NEXT: r3:2 = combine(#20,#9) -; CHECK-NEXT: v0 = vxor(v0,v0) -; CHECK-NEXT: r1 = #24 -; CHECK-NEXT: r4 = #12 +; CHECK-NEXT: r3:2 = combine(#76,#7) +; CHECK-NEXT: r1 = #12 +; CHECK-NEXT: r4 = #9 ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1 = vror(v0,r1) +; CHECK-NEXT: v0 = vror(v0,r1) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1.w = vinsert(r2) -; CHECK-NEXT: r4 = #7 -; CHECK-NEXT: r2 = #116 -; CHECK-NEXT: v0 = vror(v0,r4) +; CHECK-NEXT: v0.w = vinsert(r2) +; CHECK-NEXT: r2 = #20 ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0.w = vinsert(r4) +; CHECK-NEXT: v0 = vror(v0,r3) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1 = vror(v1,r3) +; CHECK-NEXT: v0.w = vinsert(r4) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1.w = vinsert(r0) ; CHECK-NEXT: v0 = vror(v0,r2) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1 = vror(v1,r3) +; CHECK-NEXT: v0.w = vinsert(r0) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0 = vor(v0,v1) +; CHECK-NEXT: v0 = vror(v0,r2) ; CHECK-NEXT: jumpr r31 ; CHECK-NEXT: } %v0 = insertelement <32 x i32> undef, i32 undef, i32 0 From 7df7586a0b09ecebd2fd07b03efb30d30207e09d Mon Sep 17 00:00:00 2001 From: MaheshRavishankar Date: Wed, 29 Dec 2021 10:48:02 -0800 Subject: [PATCH 197/992] [mlir][MemRef] Deprecate unspecified trailing offset, size, and strides semantics of `OffsetSizeAndStrideOpInterface`. The semantics of the ops that implement the `OffsetSizeAndStrideOpInterface` is that if the number of offsets, sizes or strides are less than the rank of the source, then some default values are filled along the trailing dimensions (0 for offset, source dimension of sizes, and 1 for strides). This is confusing, especially with rank-reducing semantics. Immediate issue here is that the methods of `OffsetSizeAndStridesOpInterface` assumes that the number of values is same as the source rank. This cause out-of-bounds errors. So simplifying the specification of `OffsetSizeAndStridesOpInterface` to make it invalid to specify number of offsets/sizes/strides not equal to the source rank. Differential Revision: https://reviews.llvm.org/D115677 --- mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp | 136 ++++++++++++------ mlir/lib/Dialect/Tensor/IR/TensorOps.cpp | 62 ++++---- .../Vector/VectorTransferOpTransforms.cpp | 18 +-- mlir/lib/Interfaces/ViewLikeInterface.cpp | 8 +- .../MemRefToLLVM/memref-to-llvm.mlir | 18 ++- .../transform-patterns-matmul-to-vector.mlir | 8 +- mlir/test/Dialect/MemRef/canonicalize.mlir | 53 ++++++- mlir/test/Dialect/MemRef/invalid.mlir | 12 +- mlir/test/Dialect/MemRef/subview.mlir | 8 +- mlir/test/Dialect/Tensor/canonicalize.mlir | 8 +- mlir/test/Dialect/Tensor/invalid.mlir | 17 +++ ...ctor-transfer-drop-unit-dims-patterns.mlir | 2 - .../Dialect/Standard/CPU/test_subview.mlir | 4 +- 13 files changed, 230 insertions(+), 124 deletions(-) diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp index aa201370c0cf..ab7e8305ab5b 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -1495,28 +1495,14 @@ Wrapper operator*(Wrapper a, int64_t b) { /// static representation of offsets, sizes and strides. Special sentinels /// encode the dynamic case. Type SubViewOp::inferResultType(MemRefType sourceMemRefType, - ArrayRef leadingStaticOffsets, - ArrayRef leadingStaticSizes, - ArrayRef leadingStaticStrides) { - // A subview may specify only a leading subset of offset/sizes/strides in - // which case we complete with offset=0, sizes from memref type and strides=1. + ArrayRef staticOffsets, + ArrayRef staticSizes, + ArrayRef staticStrides) { unsigned rank = sourceMemRefType.getRank(); - assert(leadingStaticOffsets.size() <= rank && - "unexpected leadingStaticOffsets overflow"); - assert(leadingStaticSizes.size() <= rank && - "unexpected leadingStaticSizes overflow"); - assert(leadingStaticStrides.size() <= rank && - "unexpected leadingStaticStrides overflow"); - auto staticOffsets = llvm::to_vector<4>(leadingStaticOffsets); - auto staticSizes = llvm::to_vector<4>(leadingStaticSizes); - auto staticStrides = llvm::to_vector<4>(leadingStaticStrides); - unsigned numTrailingOffsets = rank - staticOffsets.size(); - unsigned numTrailingSizes = rank - staticSizes.size(); - unsigned numTrailingStrides = rank - staticStrides.size(); - staticOffsets.append(numTrailingOffsets, 0); - llvm::append_range(staticSizes, - sourceMemRefType.getShape().take_back(numTrailingSizes)); - staticStrides.append(numTrailingStrides, 1); + (void)rank; + assert(staticOffsets.size() == rank && "unexpected staticOffsets overflow"); + assert(staticSizes.size() == rank && "unexpected staticSizes overflow"); + assert(staticStrides.size() == rank && "unexpected staticStrides overflow"); // Extract source offset and strides. int64_t sourceOffset; @@ -1553,29 +1539,28 @@ Type SubViewOp::inferResultType(MemRefType sourceMemRefType, } Type SubViewOp::inferResultType(MemRefType sourceMemRefType, - ArrayRef leadingStaticOffsets, - ArrayRef leadingStaticSizes, - ArrayRef leadingStaticStrides) { + ArrayRef offsets, + ArrayRef sizes, + ArrayRef strides) { SmallVector staticOffsets, staticSizes, staticStrides; SmallVector dynamicOffsets, dynamicSizes, dynamicStrides; - dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets, - staticOffsets, ShapedType::kDynamicStrideOrOffset); - dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes, + dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets, + ShapedType::kDynamicStrideOrOffset); + dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes, ShapedType::kDynamicSize); - dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides, - staticStrides, ShapedType::kDynamicStrideOrOffset); + dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides, + ShapedType::kDynamicStrideOrOffset); return SubViewOp::inferResultType(sourceMemRefType, staticOffsets, staticSizes, staticStrides); } -Type SubViewOp::inferRankReducedResultType( - unsigned resultRank, MemRefType sourceRankedTensorType, - ArrayRef leadingStaticOffsets, - ArrayRef leadingStaticSizes, - ArrayRef leadingStaticStrides) { +Type SubViewOp::inferRankReducedResultType(unsigned resultRank, + MemRefType sourceRankedTensorType, + ArrayRef offsets, + ArrayRef sizes, + ArrayRef strides) { auto inferredType = - inferResultType(sourceRankedTensorType, leadingStaticOffsets, - leadingStaticSizes, leadingStaticStrides) + inferResultType(sourceRankedTensorType, offsets, sizes, strides) .cast(); assert(inferredType.getRank() >= resultRank && "expected "); int rankDiff = inferredType.getRank() - resultRank; @@ -1598,19 +1583,19 @@ Type SubViewOp::inferRankReducedResultType( return inferredType; } -Type SubViewOp::inferRankReducedResultType( - unsigned resultRank, MemRefType sourceRankedTensorType, - ArrayRef leadingStaticOffsets, - ArrayRef leadingStaticSizes, - ArrayRef leadingStaticStrides) { +Type SubViewOp::inferRankReducedResultType(unsigned resultRank, + MemRefType sourceRankedTensorType, + ArrayRef offsets, + ArrayRef sizes, + ArrayRef strides) { SmallVector staticOffsets, staticSizes, staticStrides; SmallVector dynamicOffsets, dynamicSizes, dynamicStrides; - dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets, - staticOffsets, ShapedType::kDynamicStrideOrOffset); - dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes, + dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets, + ShapedType::kDynamicStrideOrOffset); + dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes, ShapedType::kDynamicSize); - dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides, - staticStrides, ShapedType::kDynamicStrideOrOffset); + dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides, + ShapedType::kDynamicStrideOrOffset); return SubViewOp::inferRankReducedResultType( resultRank, sourceRankedTensorType, staticOffsets, staticSizes, staticStrides); @@ -1893,6 +1878,43 @@ static MemRefType getCanonicalSubViewResultType( mixedStrides); } +/// Helper method to check if a `subview` operation is trivially a no-op. This +/// is the case if the all offsets are zero, all strides are 1, and the source +/// shape is same as the size of the subview. In such cases, the subview can be +/// folded into its source. +static bool isTrivialSubViewOp(SubViewOp subViewOp) { + if (subViewOp.getSourceType().getRank() != subViewOp.getType().getRank()) + return false; + + auto mixedOffsets = subViewOp.getMixedOffsets(); + auto mixedSizes = subViewOp.getMixedSizes(); + auto mixedStrides = subViewOp.getMixedStrides(); + + // Check offsets are zero. + if (llvm::any_of(mixedOffsets, [](OpFoldResult ofr) { + Optional intValue = getConstantIntValue(ofr); + return !intValue || intValue.getValue() != 0; + })) + return false; + + // Check strides are one. + if (llvm::any_of(mixedStrides, [](OpFoldResult ofr) { + Optional intValue = getConstantIntValue(ofr); + return !intValue || intValue.getValue() != 1; + })) + return false; + + // Check all size values are static and matches the (static) source shape. + ArrayRef sourceShape = subViewOp.getSourceType().getShape(); + for (auto size : llvm::enumerate(mixedSizes)) { + Optional intValue = getConstantIntValue(size.value()); + if (!intValue || intValue.getValue() != sourceShape[size.index()]) + return false; + } + // All conditions met. The `SubViewOp` is foldable as a no-op. + return true; +} + namespace { /// Pattern to rewrite a subview op with MemRefCast arguments. /// This essentially pushes memref.cast past its consuming subview when @@ -1950,6 +1972,26 @@ class SubViewOpMemRefCastFolder final : public OpRewritePattern { return success(); } }; + +/// Canonicalize subview ops that are no-ops. When the source shape is not same +/// as a result shape due to use of `affine_map`. +class TrivialSubViewOpFolder final : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(SubViewOp subViewOp, + PatternRewriter &rewriter) const override { + if (!isTrivialSubViewOp(subViewOp)) + return failure(); + if (subViewOp.getSourceType() == subViewOp.getType()) { + rewriter.replaceOp(subViewOp, subViewOp.source()); + return success(); + } + rewriter.replaceOpWithNewOp(subViewOp, subViewOp.source(), + subViewOp.getType()); + return success(); + } +}; } // namespace /// Return the canonical type of the result of a subview. @@ -1975,7 +2017,7 @@ void SubViewOp::getCanonicalizationPatterns(RewritePatternSet &results, results .add, - SubViewOpMemRefCastFolder>(context); + SubViewOpMemRefCastFolder, TrivialSubViewOpFolder>(context); } OpFoldResult SubViewOp::fold(ArrayRef operands) { diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index cec8b2c18754..f7665135b5b1 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -827,38 +827,31 @@ OpFoldResult CollapseShapeOp::fold(ArrayRef operands) { /// An extract_slice op result type can be fully inferred from the source type /// and the static representation of offsets, sizes and strides. Special /// sentinels encode the dynamic case. -RankedTensorType -ExtractSliceOp::inferResultType(RankedTensorType sourceRankedTensorType, - ArrayRef leadingStaticOffsets, - ArrayRef leadingStaticSizes, - ArrayRef leadingStaticStrides) { +RankedTensorType ExtractSliceOp::inferResultType( + RankedTensorType sourceRankedTensorType, ArrayRef staticOffsets, + ArrayRef staticSizes, ArrayRef staticStrides) { // An extract_slice op may specify only a leading subset of offset/sizes/ // strides in which case we complete with offset=0, sizes from memref type and // strides=1. unsigned rank = sourceRankedTensorType.getRank(); - assert(leadingStaticSizes.size() <= rank && - "unexpected leadingStaticSizes overflow"); - auto staticSizes = llvm::to_vector<4>(leadingStaticSizes); - unsigned numTrailingSizes = rank - staticSizes.size(); - llvm::append_range(staticSizes, sourceRankedTensorType.getShape().take_back( - numTrailingSizes)); + (void)rank; + assert(staticSizes.size() == rank && + "unexpected staticSizes not equal to rank of source"); return RankedTensorType::get(staticSizes, sourceRankedTensorType.getElementType()); } -RankedTensorType -ExtractSliceOp::inferResultType(RankedTensorType sourceRankedTensorType, - ArrayRef leadingStaticOffsets, - ArrayRef leadingStaticSizes, - ArrayRef leadingStaticStrides) { +RankedTensorType ExtractSliceOp::inferResultType( + RankedTensorType sourceRankedTensorType, ArrayRef offsets, + ArrayRef sizes, ArrayRef strides) { SmallVector staticOffsets, staticSizes, staticStrides; SmallVector dynamicOffsets, dynamicSizes, dynamicStrides; - dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets, - staticOffsets, ShapedType::kDynamicStrideOrOffset); - dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes, + dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets, + ShapedType::kDynamicStrideOrOffset); + dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes, ShapedType::kDynamicSize); - dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides, - staticStrides, ShapedType::kDynamicStrideOrOffset); + dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides, + ShapedType::kDynamicStrideOrOffset); return ExtractSliceOp::inferResultType(sourceRankedTensorType, staticOffsets, staticSizes, staticStrides); } @@ -868,12 +861,10 @@ ExtractSliceOp::inferResultType(RankedTensorType sourceRankedTensorType, /// sentinels encode the dynamic case. RankedTensorType ExtractSliceOp::inferRankReducedResultType( unsigned resultRank, RankedTensorType sourceRankedTensorType, - ArrayRef leadingStaticOffsets, - ArrayRef leadingStaticSizes, - ArrayRef leadingStaticStrides) { + ArrayRef offsets, ArrayRef sizes, + ArrayRef strides) { auto inferredType = - inferResultType(sourceRankedTensorType, leadingStaticOffsets, - leadingStaticSizes, leadingStaticStrides) + inferResultType(sourceRankedTensorType, offsets, sizes, strides) .cast(); int rankDiff = inferredType.getRank() - resultRank; if (rankDiff > 0) { @@ -892,17 +883,16 @@ RankedTensorType ExtractSliceOp::inferRankReducedResultType( RankedTensorType ExtractSliceOp::inferRankReducedResultType( unsigned resultRank, RankedTensorType sourceRankedTensorType, - ArrayRef leadingStaticOffsets, - ArrayRef leadingStaticSizes, - ArrayRef leadingStaticStrides) { + ArrayRef offsets, ArrayRef sizes, + ArrayRef strides) { SmallVector staticOffsets, staticSizes, staticStrides; SmallVector dynamicOffsets, dynamicSizes, dynamicStrides; - dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets, - staticOffsets, ShapedType::kDynamicStrideOrOffset); - dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes, + dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets, + ShapedType::kDynamicStrideOrOffset); + dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes, ShapedType::kDynamicSize); - dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides, - staticStrides, ShapedType::kDynamicStrideOrOffset); + dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides, + ShapedType::kDynamicStrideOrOffset); return ExtractSliceOp::inferRankReducedResultType( resultRank, sourceRankedTensorType, staticOffsets, staticSizes, staticStrides); @@ -919,12 +909,10 @@ void ExtractSliceOp::build(OpBuilder &b, OperationState &result, SmallVector staticOffsets, staticSizes, staticStrides; SmallVector dynamicOffsets, dynamicSizes, dynamicStrides; dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets, - ShapedType::kDynamicStrideOrOffset); dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes, ShapedType::kDynamicSize); dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides, - ShapedType::kDynamicStrideOrOffset); auto sourceRankedTensorType = source.getType().cast(); // Structuring implementation this way avoids duplication between builders. @@ -1225,12 +1213,10 @@ void InsertSliceOp::build(OpBuilder &b, OperationState &result, Value source, SmallVector staticOffsets, staticSizes, staticStrides; SmallVector dynamicOffsets, dynamicSizes, dynamicStrides; dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets, - ShapedType::kDynamicStrideOrOffset); dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes, ShapedType::kDynamicSize); dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides, - ShapedType::kDynamicStrideOrOffset); build(b, result, dest.getType(), source, dest, dynamicOffsets, dynamicSizes, dynamicStrides, b.getI64ArrayAttr(staticOffsets), diff --git a/mlir/lib/Dialect/Vector/VectorTransferOpTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransferOpTransforms.cpp index 9b1ae7a40226..8b4cd7e119cd 100644 --- a/mlir/lib/Dialect/Vector/VectorTransferOpTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransferOpTransforms.cpp @@ -212,10 +212,11 @@ void TransferOptimization::storeToLoadForwarding(vector::TransferReadOp read) { } /// Drops unit dimensions from the input MemRefType. -static MemRefType dropUnitDims(MemRefType inputType) { - ArrayRef none{}; +static MemRefType dropUnitDims(MemRefType inputType, ArrayRef offsets, + ArrayRef sizes, + ArrayRef strides) { Type rankReducedType = memref::SubViewOp::inferRankReducedResultType( - 0, inputType, none, none, none); + 0, inputType, offsets, sizes, strides); return canonicalizeStridedLayout(rankReducedType.cast()); } @@ -226,15 +227,16 @@ static Value rankReducingSubviewDroppingUnitDims(PatternRewriter &rewriter, Value input) { MemRefType inputType = input.getType().cast(); assert(inputType.hasStaticShape()); - MemRefType resultType = dropUnitDims(inputType); + SmallVector subViewOffsets(inputType.getRank(), 0); + SmallVector subViewStrides(inputType.getRank(), 1); + ArrayRef subViewSizes = inputType.getShape(); + MemRefType resultType = + dropUnitDims(inputType, subViewOffsets, subViewSizes, subViewStrides); if (canonicalizeStridedLayout(resultType) == canonicalizeStridedLayout(inputType)) return input; - SmallVector subviewOffsets(inputType.getRank(), 0); - SmallVector subviewStrides(inputType.getRank(), 1); return rewriter.create( - loc, resultType, input, subviewOffsets, inputType.getShape(), - subviewStrides); + loc, resultType, input, subViewOffsets, subViewSizes, subViewStrides); } /// Returns the number of dims that aren't unit dims. diff --git a/mlir/lib/Interfaces/ViewLikeInterface.cpp b/mlir/lib/Interfaces/ViewLikeInterface.cpp index 4a963a1d54fd..6394895370e2 100644 --- a/mlir/lib/Interfaces/ViewLikeInterface.cpp +++ b/mlir/lib/Interfaces/ViewLikeInterface.cpp @@ -18,12 +18,12 @@ using namespace mlir; #include "mlir/Interfaces/ViewLikeInterface.cpp.inc" LogicalResult mlir::verifyListOfOperandsOrIntegers( - Operation *op, StringRef name, unsigned maxNumElements, ArrayAttr attr, + Operation *op, StringRef name, unsigned numElements, ArrayAttr attr, ValueRange values, llvm::function_ref isDynamic) { /// Check static and dynamic offsets/sizes/strides does not overflow type. - if (attr.size() > maxNumElements) - return op->emitError("expected <= ") - << maxNumElements << " " << name << " values"; + if (attr.size() != numElements) + return op->emitError("expected ") + << numElements << " " << name << " values"; unsigned expectedNumDynamicEntries = llvm::count_if(attr.getValue(), [&](Attribute attr) { return isDynamic(attr.cast().getInt()); diff --git a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir index 009106f95e8a..5682c853964c 100644 --- a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir @@ -448,7 +448,7 @@ func @subview_leading_operands(%0 : memref<5x3xf32>, %1: memref<5x?xf32>) { // CHECK: %[[C3_3:.*]] = llvm.mlir.constant(3 : i64) : i64 // CHECK: llvm.insertvalue %[[C3_2]], %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: llvm.insertvalue %[[C3_3]], %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> - %2 = memref.subview %0[2][3][1]: memref<5x3xf32> to memref<3x3xf32, offset: 6, strides: [3, 1]> + %2 = memref.subview %0[2, 0][3, 3][1, 1]: memref<5x3xf32> to memref<3x3xf32, offset: 6, strides: [3, 1]> return } @@ -466,13 +466,15 @@ func @subview_leading_operands_dynamic(%0 : memref<5x?xf32>) { // CHECK: %[[ST0:.*]] = llvm.extractvalue %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[ST1:.*]] = llvm.extractvalue %{{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // Compute and insert offset from 2 + dynamic value. - // CHECK: %[[OFF:.*]] = llvm.extractvalue %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[OFF0:.*]] = llvm.extractvalue %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : i64) : i64 - // CHECK: %[[MUL:.*]] = llvm.mul %[[C2]], %[[ST0]] : i64 - // CHECK: %[[NEW_OFF:.*]] = llvm.add %[[OFF]], %[[MUL]] : i64 + // CHECK: %[[MUL0:.*]] = llvm.mul %[[C2]], %[[ST0]] : i64 + // CHECK: %[[OFF1:.*]] = llvm.add %[[OFF0]], %[[MUL0]] : i64 + // CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64 + // CHECK: %[[MUL1:.*]] = llvm.mul %[[C0]], %[[ST1]] : i64 + // CHECK: %[[NEW_OFF:.*]] = llvm.add %[[OFF1]], %[[MUL1]] : i64 // CHECK: llvm.insertvalue %[[NEW_OFF]], %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // Sizes and strides @rank 1: static stride 1, dynamic size unchanged from source memref. - // CHECK: %[[SZ1:.*]] = llvm.extractvalue %{{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: llvm.insertvalue %[[C1]], %{{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> @@ -482,7 +484,9 @@ func @subview_leading_operands_dynamic(%0 : memref<5x?xf32>) { // CHECK: %[[MUL:.*]] = llvm.mul %[[C1_2]], %[[ST0]] : i64 // CHECK: llvm.insertvalue %[[C3]], %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: llvm.insertvalue %[[MUL]], %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> - %1 = memref.subview %0[2][3][1]: memref<5x?xf32> to memref<3x?xf32, offset: ?, strides: [?, 1]> + %c0 = arith.constant 1 : index + %d0 = memref.dim %0, %c0 : memref<5x?xf32> + %1 = memref.subview %0[2, 0][3, %d0][1, 1]: memref<5x?xf32> to memref<3x?xf32, offset: ?, strides: [?, 1]> return } @@ -506,7 +510,7 @@ func @subview_rank_reducing_leading_operands(%0 : memref<5x3xf32>) { // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: llvm.insertvalue %[[C3]], %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // CHECK: llvm.insertvalue %[[C1]], %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %1 = memref.subview %0[1][1][1]: memref<5x3xf32> to memref<3xf32, offset: 3, strides: [1]> + %1 = memref.subview %0[1, 0][1, 3][1, 1]: memref<5x3xf32> to memref<3xf32, offset: 3, strides: [1]> return } diff --git a/mlir/test/Dialect/Linalg/transform-patterns-matmul-to-vector.mlir b/mlir/test/Dialect/Linalg/transform-patterns-matmul-to-vector.mlir index ab0be6bbeebf..3b3e64d5f59c 100644 --- a/mlir/test/Dialect/Linalg/transform-patterns-matmul-to-vector.mlir +++ b/mlir/test/Dialect/Linalg/transform-patterns-matmul-to-vector.mlir @@ -17,17 +17,17 @@ func @matmul(%A: memref<1584x1584xf32, offset: 0, strides: [1584, 1]>, // CHECK-1D: vector.transfer_write {{.*}} : vector<8x12xf32>, memref<8x12xf32> // // CHECK-1D: vector.transfer_read {{.*}} : memref<8x16xf32, #{{.*}}>, vector<8x16xf32> -// CHECK-1D: vector.transfer_write {{.*}} : vector<8x16xf32>, memref<8x16xf32, #{{.*}}> +// CHECK-1D: vector.transfer_write {{.*}} : vector<8x16xf32>, memref<8x16xf32> // CHECK-1D: vector.transfer_read {{.*}} : memref<16x12xf32, #{{.*}}>, vector<16x12xf32> -// CHECK-1D: vector.transfer_write {{.*}} : vector<16x12xf32>, memref<16x12xf32, #{{.*}}> +// CHECK-1D: vector.transfer_write {{.*}} : vector<16x12xf32>, memref<16x12xf32> // CHECK-1D: vector.transfer_read {{.*}} : memref<8x12xf32, #{{.*}}>, vector<8x12xf32> -// CHECK-1D: vector.transfer_write {{.*}} : vector<8x12xf32>, memref<8x12xf32, #{{.*}}> +// CHECK-1D: vector.transfer_write {{.*}} : vector<8x12xf32>, memref<8x12xf32> // // CHECK-1D: vector.contract // CHECK-1D-SAME: iterator_types = ["parallel", "parallel", "reduction"] // CHECK-1D-SAME: : vector<8x16xf32>, vector<16x12xf32> into vector<8x12xf32> // -// CHECK-1D: vector.transfer_read {{.*}} : memref<8x12xf32, #{{.*}}>, vector<8x12xf32> +// CHECK-1D: vector.transfer_read {{.*}} : memref<8x12xf32>, vector<8x12xf32> // CHECK-1D: vector.transfer_write {{.*}} : vector<8x12xf32>, memref<8x12xf32, #{{.*}}> // CHECK-2D-LABEL:func @matmul diff --git a/mlir/test/Dialect/MemRef/canonicalize.mlir b/mlir/test/Dialect/MemRef/canonicalize.mlir index 80282c21afab..39f9847f4c9e 100644 --- a/mlir/test/Dialect/MemRef/canonicalize.mlir +++ b/mlir/test/Dialect/MemRef/canonicalize.mlir @@ -2,13 +2,13 @@ // CHECK-LABEL: func @subview_of_size_memcast // CHECK-SAME: %[[ARG0:.[a-z0-9A-Z_]+]]: memref<4x6x16x32xi8> -// CHECK: %[[S:.+]] = memref.subview %[[ARG0]][0, 1, 0] [1, 1, 16] [1, 1, 1] : memref<4x6x16x32xi8> to memref<16x32xi8, #{{.*}}> +// CHECK: %[[S:.+]] = memref.subview %[[ARG0]][0, 1, 0, 0] [1, 1, 16, 32] [1, 1, 1, 1] : memref<4x6x16x32xi8> to memref<16x32xi8, #{{.*}}> // CHECK: %[[M:.+]] = memref.cast %[[S]] : memref<16x32xi8, #{{.*}}> to memref<16x32xi8, #{{.*}}> // CHECK: return %[[M]] : memref<16x32xi8, #{{.*}}> func @subview_of_size_memcast(%arg : memref<4x6x16x32xi8>) -> memref<16x32xi8, affine_map<(d0, d1)[s0] -> (d0 * 32 + d1 + s0)>>{ %0 = memref.cast %arg : memref<4x6x16x32xi8> to memref - %1 = memref.subview %0[0, 1, 0] [1, 1, 16] [1, 1, 1] : + %1 = memref.subview %0[0, 1, 0, 0] [1, 1, 16, 32] [1, 1, 1, 1] : memref to memref<16x32xi8, affine_map<(d0, d1)[s0] -> (d0 * 32 + d1 + s0)>> return %1 : memref<16x32xi8, affine_map<(d0, d1)[s0] -> (d0 * 32 + d1 + s0)>> @@ -450,3 +450,52 @@ func @fold_rank_memref(%arg0 : memref) -> (index) { // CHECK-NEXT: return [[C2]] return %rank_0 : index } + +// ----- + +#map = affine_map<(d0, d1) -> (d0 * 42 + d1)> +func @fold_no_op_subview(%arg0 : memref<20x42xf32>) -> memref<20x42xf32, #map> { + %0 = memref.subview %arg0[0, 0] [20, 42] [1, 1] : memref<20x42xf32> to memref<20x42xf32, #map> + return %0 : memref<20x42xf32, #map> +} +// CHECK-LABEL: func @fold_no_op_subview( +// CHECK: %[[ARG0:.+]]: memref<20x42xf32>) +// CHECK: %[[CAST:.+]] = memref.cast %[[ARG0]] +// CHECK: return %[[CAST]] + +// ----- + +#map = affine_map<(d0, d1) -> (d0 * 42 + d1 + 1)> +func @no_fold_subview_with_non_zero_offset(%arg0 : memref<20x42xf32>) -> memref<20x42xf32, #map> { + %0 = memref.subview %arg0[0, 1] [20, 42] [1, 1] : memref<20x42xf32> to memref<20x42xf32, #map> + return %0 : memref<20x42xf32, #map> +} +// CHECK-LABEL: func @no_fold_subview_with_non_zero_offset( +// CHECK: %[[SUBVIEW:.+]] = memref.subview +// CHECK: return %[[SUBVIEW]] + +// ----- + +#map = affine_map<(d0, d1) -> (d0 * 42 + d1 * 2)> +func @no_fold_subview_with_non_unit_stride(%arg0 : memref<20x42xf32>) -> memref<20x42xf32, #map> { + %0 = memref.subview %arg0[0, 0] [20, 42] [1, 2] : memref<20x42xf32> to memref<20x42xf32, #map> + return %0 : memref<20x42xf32, #map> +} +// CHECK-LABEL: func @no_fold_subview_with_non_unit_stride( +// CHECK: %[[SUBVIEW:.+]] = memref.subview +// CHECK: return %[[SUBVIEW]] + +// ----- + +#map = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + d1 + s0)> +func @no_fold_dynamic_no_op_subview(%arg0 : memref) -> memref { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %0 = memref.dim %arg0, %c0 : memref + %1 = memref.dim %arg0, %c1 : memref + %2 = memref.subview %arg0[0, 0] [%0, %1] [1, 1] : memref to memref + return %2 : memref +} +// CHECK-LABEL: func @no_fold_dynamic_no_op_subview( +// CHECK: %[[SUBVIEW:.+]] = memref.subview +// CHECK: return %[[SUBVIEW]] diff --git a/mlir/test/Dialect/MemRef/invalid.mlir b/mlir/test/Dialect/MemRef/invalid.mlir index 97d9db8cf1cc..5cf32703c9eb 100644 --- a/mlir/test/Dialect/MemRef/invalid.mlir +++ b/mlir/test/Dialect/MemRef/invalid.mlir @@ -149,7 +149,7 @@ func @transpose_wrong_type(%v : memref(off // ----- func @memref_reinterpret_cast_too_many_offsets(%in: memref) { - // expected-error @+1 {{expected <= 1 offset values}} + // expected-error @+1 {{expected 1 offset values}} %out = memref.reinterpret_cast %in to offset: [0, 0], sizes: [10, 10], strides: [10, 1] : memref to memref<10x10xf32, offset: 0, strides: [10, 1]> @@ -580,7 +580,7 @@ func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = memref.alloc() : memref<8x16x4xf32> - // expected-error@+1 {{expected <= 3 offset values}} + // expected-error@+1 {{expected 3 offset values}} %1 = memref.subview %0[%arg0, %arg1, 0, 0][%arg2, 0, 0, 0][1, 1, 1, 1] : memref<8x16x4xf32> to memref<8x?x4xf32, offset: 0, strides:[?, ?, 4]> @@ -840,3 +840,11 @@ func @rank(%0: f32) { "memref.rank"(%0): (f32)->index return } + +// ----- + +#map = affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (s0 + d0 * s1 + d1 * s2 + d2 * s3)> +func @illegal_num_offsets(%arg0 : memref, %arg1 : index, %arg2 : index) { + // expected-error@+1 {{expected 3 offset values}} + %0 = memref.subview %arg0[0, 0] [%arg1, %arg2] [1, 1] : memref to memref +} diff --git a/mlir/test/Dialect/MemRef/subview.mlir b/mlir/test/Dialect/MemRef/subview.mlir index dbfd1329ce9e..3bfc62d346b6 100644 --- a/mlir/test/Dialect/MemRef/subview.mlir +++ b/mlir/test/Dialect/MemRef/subview.mlir @@ -109,12 +109,12 @@ func @memref_subview(%arg0 : index, %arg1 : index, %arg2 : index) { /// Subview with only leading operands. %24 = memref.alloc() : memref<5x3xf32> - // CHECK: memref.subview %{{.*}}[2] [3] [1] : memref<5x3xf32> to memref<3x3xf32, #[[$SUBVIEW_MAP9]]> - %25 = memref.subview %24[2][3][1]: memref<5x3xf32> to memref<3x3xf32, offset: 6, strides: [3, 1]> + // CHECK: memref.subview %{{.*}}[2, 0] [3, 3] [1, 1] : memref<5x3xf32> to memref<3x3xf32, #[[$SUBVIEW_MAP9]]> + %25 = memref.subview %24[2, 0][3, 3][1, 1]: memref<5x3xf32> to memref<3x3xf32, offset: 6, strides: [3, 1]> /// Rank-reducing subview with only leading operands. - // CHECK: memref.subview %{{.*}}[1] [1] [1] : memref<5x3xf32> to memref<3xf32, #[[$SUBVIEW_MAP10]]> - %26 = memref.subview %24[1][1][1]: memref<5x3xf32> to memref<3xf32, offset: 3, strides: [1]> + // CHECK: memref.subview %{{.*}}[1, 0] [1, 3] [1, 1] : memref<5x3xf32> to memref<3xf32, #[[$SUBVIEW_MAP10]]> + %26 = memref.subview %24[1, 0][1, 3][1, 1]: memref<5x3xf32> to memref<3xf32, offset: 3, strides: [1]> // Corner-case of 0-D rank-reducing subview with an offset. // CHECK: memref.subview %{{.*}}[1, 1] [1, 1] [1, 1] : memref<5x3xf32> to memref diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir index 2c18fe4a6d5e..82f880d098fd 100644 --- a/mlir/test/Dialect/Tensor/canonicalize.mlir +++ b/mlir/test/Dialect/Tensor/canonicalize.mlir @@ -395,13 +395,13 @@ func @trivial_insert_slice(%arg0 : tensor<4x6x16x32xi8>, %arg1 : tensor<4x6x16x3 // CHECK-LABEL: func @rank_reducing_tensor_of_cast // CHECK-SAME: %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8> -// CHECK: %[[S:.+]] = tensor.extract_slice %arg0[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<4x6x16x32xi8> to tensor<16x32xi8> +// CHECK: %[[S:.+]] = tensor.extract_slice %arg0[0, 1, 0, 0] [1, 1, 16, 32] [1, 1, 1, 1] : tensor<4x6x16x32xi8> to tensor<16x32xi8> // Tensor cast is moved after slice and then gets canonicalized away. // CHECK-NOT: tensor.cast // CHECK: return %[[S]] : tensor<16x32xi8> func @rank_reducing_tensor_of_cast(%arg : tensor<4x6x16x32xi8>) -> tensor<16x32xi8> { %0 = tensor.cast %arg : tensor<4x6x16x32xi8> to tensor - %1 = tensor.extract_slice %0[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor to tensor<16x32xi8> + %1 = tensor.extract_slice %0[0, 1, 0, 0] [1, 1, 16, 32] [1, 1, 1, 1] : tensor to tensor<16x32xi8> return %1 : tensor<16x32xi8> } @@ -410,7 +410,7 @@ func @rank_reducing_tensor_of_cast(%arg : tensor<4x6x16x32xi8>) -> tensor<16x32x // CHECK-LABEL: func @rank_reducing_insert_slice_of_cast // CHECK-SAME: %[[A:.[a-z0-9A-Z_]+]]: tensor<16x32xi8> // CHECK-SAME: %[[B:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8> -// CHECK: %[[S:.+]] = tensor.insert_slice %[[A]] into %[[B]][0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<16x32xi8> into tensor<4x6x16x32xi8> +// CHECK: %[[S:.+]] = tensor.insert_slice %[[A]] into %[[B]][0, 1, 0, 0] [1, 1, 16, 32] [1, 1, 1, 1] : tensor<16x32xi8> into tensor<4x6x16x32xi8> // Tensor cast is folded away. // CHECK-NOT: tensor.cast // CHECK: return %[[S]] : tensor<4x6x16x32xi8> @@ -418,7 +418,7 @@ func @rank_reducing_insert_slice_of_cast(%a : tensor<16x32xi8>, %b : tensor<4x6x %c0 = arith.constant 0: index %cast = tensor.cast %a : tensor<16x32xi8> to tensor %sz = tensor.dim %cast, %c0: tensor - %res = tensor.insert_slice %cast into %b[0, 1, 0] [1, 1, %sz] [1, 1, 1] : tensor into tensor<4x6x16x32xi8> + %res = tensor.insert_slice %cast into %b[0, 1, 0, 0] [1, 1, %sz, 32] [1, 1, 1, 1] : tensor into tensor<4x6x16x32xi8> return %res : tensor<4x6x16x32xi8> } diff --git a/mlir/test/Dialect/Tensor/invalid.mlir b/mlir/test/Dialect/Tensor/invalid.mlir index ece2f54d8401..8cdab35fb5e2 100644 --- a/mlir/test/Dialect/Tensor/invalid.mlir +++ b/mlir/test/Dialect/Tensor/invalid.mlir @@ -300,3 +300,20 @@ func @rank(%0: f32) { "tensor.rank"(%0): (f32)->index return } + +// ----- + +func @illegal_num_offsets(%arg0 : tensor, %arg1 : index, %arg2 : index) { + // expected-error@+1 {{expected 3 offset values}} + %0 = tensor.extract_slice %arg0[0, 0] [%arg1, %arg2] [1, 1] : tensor to tensor + return +} + +// ----- + +func @illegal_num_offsets(%arg0 : tensor, %arg1 : tensor, + %arg2 : index, %arg3 : index) { + // expected-error@+1 {{expected 3 offset values}} + %0 = tensor.insert_slice %arg0 into %arg1[0, 0] [%arg2, %arg3] [1, 1] : tensor into tensor + return +} diff --git a/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir b/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir index a3d34a646c2f..6079b1aa8393 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir @@ -1,7 +1,5 @@ // RUN: mlir-opt %s -test-vector-transfer-drop-unit-dims-patterns -split-input-file | FileCheck %s -// ----- - func @transfer_read_rank_reducing( %arg : memref<1x1x3x2xi8, offset:?, strides:[6, 6, 2, 1]>) -> vector<3x2xi8> { %c0 = arith.constant 0 : index diff --git a/mlir/test/Integration/Dialect/Standard/CPU/test_subview.mlir b/mlir/test/Integration/Dialect/Standard/CPU/test_subview.mlir index 477576c3e090..5fb24c633ef9 100644 --- a/mlir/test/Integration/Dialect/Standard/CPU/test_subview.mlir +++ b/mlir/test/Integration/Dialect/Standard/CPU/test_subview.mlir @@ -13,7 +13,7 @@ func @main() { %0 = memref.get_global @__constant_5x3xf32 : memref<5x3xf32> /// Subview with only leading operands. - %1 = memref.subview %0[2][3][1]: memref<5x3xf32> to memref<3x3xf32, offset: 6, strides: [3, 1]> + %1 = memref.subview %0[2, 0][3, 3][1, 1]: memref<5x3xf32> to memref<3x3xf32, offset: 6, strides: [3, 1]> %unranked = memref.cast %1 : memref<3x3xf32, offset: 6, strides: [3, 1]> to memref<*xf32> call @print_memref_f32(%unranked) : (memref<*xf32>) -> () @@ -50,7 +50,7 @@ func @main() { // CHECK-NEXT: [2, 5, 8, 11, 14] /// Rank-reducing subview with only leading operands. - %4 = memref.subview %0[1][1][1]: memref<5x3xf32> to memref<3xf32, offset: 3, strides: [1]> + %4 = memref.subview %0[1, 0][1, 3][1, 1]: memref<5x3xf32> to memref<3xf32, offset: 3, strides: [1]> %unranked4 = memref.cast %4 : memref<3xf32, offset: 3, strides: [1]> to memref<*xf32> call @print_memref_f32(%unranked4) : (memref<*xf32>) -> () // CHECK: Unranked Memref base@ = {{0x[-9a-f]*}} From 77df60965f0c8297ac224cf02979b5b01efc7bce Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 28 Dec 2021 16:59:53 -0500 Subject: [PATCH 198/992] [InstCombine] add tests for lshr(add(shl())); NFC --- llvm/test/Transforms/InstCombine/lshr.ll | 58 ++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll index d524bdb93ea2..bfad4bcd27ab 100644 --- a/llvm/test/Transforms/InstCombine/lshr.ll +++ b/llvm/test/Transforms/InstCombine/lshr.ll @@ -162,6 +162,64 @@ define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) { ret <2 x i8> %lshr } +define i8 @shl_add(i8 %x, i8 %y) { +; CHECK-LABEL: @shl_add( +; CHECK-NEXT: [[L:%.*]] = shl i8 [[X:%.*]], 2 +; CHECK-NEXT: [[A:%.*]] = add i8 [[L]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = lshr i8 [[A]], 2 +; CHECK-NEXT: ret i8 [[R]] +; + %l = shl i8 %x, 2 + %a = add i8 %l, %y + %r = lshr i8 %a, 2 + ret i8 %r +} + +define <2 x i8> @shl_add_commute_vec(<2 x i8> %x, <2 x i8> %py) { +; CHECK-LABEL: @shl_add_commute_vec( +; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], [[PY]] +; CHECK-NEXT: [[L:%.*]] = shl <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[Y]], [[L]] +; CHECK-NEXT: [[R:%.*]] = lshr <2 x i8> [[A]], +; CHECK-NEXT: ret <2 x i8> [[R]] +; + %y = mul <2 x i8> %py, %py ; thwart complexity-based canonicalization + %l = shl <2 x i8> %x, + %a = add <2 x i8> %y, %l + %r = lshr <2 x i8> %a, + ret <2 x i8> %r +} + +define i32 @shl_add_use1(i32 %x, i32 %y) { +; CHECK-LABEL: @shl_add_use1( +; CHECK-NEXT: [[L:%.*]] = shl i32 [[X:%.*]], 2 +; CHECK-NEXT: call void @use(i32 [[L]]) +; CHECK-NEXT: [[A:%.*]] = add i32 [[L]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = lshr i32 [[A]], 2 +; CHECK-NEXT: ret i32 [[R]] +; + %l = shl i32 %x, 2 + call void @use(i32 %l) + %a = add i32 %l, %y + %r = lshr i32 %a, 2 + ret i32 %r +} + +define i32 @shl_add_use2(i32 %x, i32 %y) { +; CHECK-LABEL: @shl_add_use2( +; CHECK-NEXT: [[L:%.*]] = shl i32 [[X:%.*]], 2 +; CHECK-NEXT: [[A:%.*]] = add i32 [[L]], [[Y:%.*]] +; CHECK-NEXT: call void @use(i32 [[A]]) +; CHECK-NEXT: [[R:%.*]] = lshr i32 [[A]], 2 +; CHECK-NEXT: ret i32 [[R]] +; + %l = shl i32 %x, 2 + %a = add i32 %l, %y + call void @use(i32 %a) + %r = lshr i32 %a, 2 + ret i32 %r +} + define i16 @bool_zext(i1 %x) { ; CHECK-LABEL: @bool_zext( ; CHECK-NEXT: [[HIBIT:%.*]] = zext i1 [[X:%.*]] to i16 From baa22e9327a85ce16f2d112c610d828f2ce1cb1c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 29 Dec 2021 15:16:11 -0500 Subject: [PATCH 199/992] [InstCombine] add tests for unsigned overflow of bitmask offset; NFC --- .../unsigned-add-lack-of-overflow-check.ll | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll index 0b340059d92a..11ec67bd6752 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll @@ -197,3 +197,158 @@ define i1 @n15_wrong_pred7(i8 %x, i8 %y) { %r = icmp sge i8 %t0, %y ret i1 %r } + +define i1 @low_bitmask_ult(i8 %x) { +; CHECK-LABEL: @low_bitmask_ult( +; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 31 +; CHECK-NEXT: [[M:%.*]] = and i8 [[A]], 31 +; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[M]], [[X]] +; CHECK-NEXT: ret i1 [[R]] +; + %a = add i8 %x, 31 + %m = and i8 %a, 31 + %r = icmp ult i8 %m, %x + ret i1 %r +} + +define <2 x i1> @low_bitmask_uge(<2 x i8> %x) { +; CHECK-LABEL: @low_bitmask_uge( +; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[M:%.*]] = and <2 x i8> [[A]], +; CHECK-NEXT: [[R:%.*]] = icmp uge <2 x i8> [[M]], [[X]] +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %a = add <2 x i8> %x, + %m = and <2 x i8> %a, + %r = icmp uge <2 x i8> %m, %x + ret <2 x i1> %r +} + +define i1 @low_bitmask_ugt(i8 %px) { +; CHECK-LABEL: @low_bitmask_ugt( +; CHECK-NEXT: [[X:%.*]] = mul i8 [[PX:%.*]], [[PX]] +; CHECK-NEXT: [[A:%.*]] = add i8 [[X]], 127 +; CHECK-NEXT: [[M:%.*]] = and i8 [[A]], 127 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[M]] +; CHECK-NEXT: ret i1 [[R]] +; + %x = mul i8 %px, %px + %a = add i8 %x, 127 + %m = and i8 %a, 127 + %r = icmp ugt i8 %x, %m + ret i1 %r +} + +define <2 x i1> @low_bitmask_ule(<2 x i8> %px) { +; CHECK-LABEL: @low_bitmask_ule( +; CHECK-NEXT: [[X:%.*]] = mul <2 x i8> [[PX:%.*]], [[PX]] +; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X]], +; CHECK-NEXT: [[M:%.*]] = and <2 x i8> [[A]], +; CHECK-NEXT: [[R:%.*]] = icmp ule <2 x i8> [[X]], [[M]] +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %x = mul <2 x i8> %px, %px + %a = add <2 x i8> %x, + %m = and <2 x i8> %a, + %r = icmp ule <2 x i8> %x, %m + ret <2 x i1> %r +} + +define i1 @low_bitmask_ult_use(i8 %x) { +; CHECK-LABEL: @low_bitmask_ult_use( +; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 7 +; CHECK-NEXT: [[M:%.*]] = and i8 [[A]], 7 +; CHECK-NEXT: call void @use8(i8 [[M]]) +; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[M]], [[X]] +; CHECK-NEXT: ret i1 [[R]] +; + %a = add i8 %x, 7 + %m = and i8 %a, 7 + call void @use8(i8 %m) + %r = icmp ult i8 %m, %x + ret i1 %r +} + +define i1 @low_bitmask_ugt_use(i8 %px) { +; CHECK-LABEL: @low_bitmask_ugt_use( +; CHECK-NEXT: [[X:%.*]] = mul i8 [[PX:%.*]], [[PX]] +; CHECK-NEXT: [[A:%.*]] = add i8 [[X]], 3 +; CHECK-NEXT: call void @use8(i8 [[A]]) +; CHECK-NEXT: [[M:%.*]] = and i8 [[A]], 3 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[M]] +; CHECK-NEXT: ret i1 [[R]] +; + %x = mul i8 %px, %px + %a = add i8 %x, 3 + call void @use8(i8 %a) + %m = and i8 %a, 3 + %r = icmp ugt i8 %x, %m + ret i1 %r +} + +define i1 @low_bitmask_ult_wrong_mask1(i8 %x) { +; CHECK-LABEL: @low_bitmask_ult_wrong_mask1( +; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 30 +; CHECK-NEXT: [[M:%.*]] = and i8 [[A]], 31 +; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[M]], [[X]] +; CHECK-NEXT: ret i1 [[R]] +; + %a = add i8 %x, 30 + %m = and i8 %a, 31 + %r = icmp ult i8 %m, %x + ret i1 %r +} + +define i1 @low_bitmask_uge_wrong_mask2(i8 %x) { +; CHECK-LABEL: @low_bitmask_uge_wrong_mask2( +; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 31 +; CHECK-NEXT: [[M:%.*]] = and i8 [[A]], 63 +; CHECK-NEXT: [[R:%.*]] = icmp uge i8 [[M]], [[X]] +; CHECK-NEXT: ret i1 [[R]] +; + %a = add i8 %x, 31 + %m = and i8 %a, 63 + %r = icmp uge i8 %m, %x + ret i1 %r +} + +define i1 @low_bitmask_ugt_swapped(i8 %x) { +; CHECK-LABEL: @low_bitmask_ugt_swapped( +; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 127 +; CHECK-NEXT: [[M:%.*]] = and i8 [[A]], 127 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[M]], [[X]] +; CHECK-NEXT: ret i1 [[R]] +; + %a = add i8 %x, 127 + %m = and i8 %a, 127 + %r = icmp ugt i8 %m, %x + ret i1 %r +} + +define i1 @low_bitmask_sgt(i8 %px) { +; CHECK-LABEL: @low_bitmask_sgt( +; CHECK-NEXT: [[X:%.*]] = mul i8 [[PX:%.*]], [[PX]] +; CHECK-NEXT: [[A:%.*]] = add i8 [[X]], 127 +; CHECK-NEXT: [[M:%.*]] = and i8 [[A]], 127 +; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[X]], [[M]] +; CHECK-NEXT: ret i1 [[R]] +; + %x = mul i8 %px, %px + %a = add i8 %x, 127 + %m = and i8 %a, 127 + %r = icmp sgt i8 %x, %m + ret i1 %r +} + +define i1 @low_bitmask_ult_specific_op(i8 %x, i8 %y) { +; CHECK-LABEL: @low_bitmask_ult_specific_op( +; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 31 +; CHECK-NEXT: [[M:%.*]] = and i8 [[A]], 31 +; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[M]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[R]] +; + %a = add i8 %x, 31 + %m = and i8 %a, 31 + %r = icmp ult i8 %m, %y + ret i1 %r +} From 6c716c8589506cec407f01b0cd60005a3e346cf0 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 29 Dec 2021 15:53:56 -0500 Subject: [PATCH 200/992] [InstCombine] add more folds for unsigned overflow checks ((Op1 + C) & C) u< Op1 --> Op1 != 0 ((Op1 + C) & C) u>= Op1 --> Op1 == 0 Op0 u> ((Op0 + C) & C) --> Op0 != 0 Op0 u<= ((Op0 + C) & C) --> Op0 == 0 https://alive2.llvm.org/ce/z/iUfXJN https://alive2.llvm.org/ce/z/caAtjj define i1 @src(i8 %x, i8 %y) { ; the add/mask must be with a low-bit mask (0x01ff...) %y1 = add i8 %y, 1 %pop = call i8 @llvm.ctpop.i8(i8 %y1) %ismask = icmp eq i8 %pop, 1 call void @llvm.assume(i1 %ismask) %a = add i8 %x, %y %m = and i8 %a, %y %r = icmp ult i8 %m, %x ret i1 %r } define i1 @tgt(i8 %x, i8 %y) { %r = icmp ne i8 %x, 0 ret i1 %r } I suspect this can be generalized in some way, but this is the pattern I'm seeing in a motivating test based on issue #52851. --- llvm/include/llvm/IR/PatternMatch.h | 3 ++ .../InstCombine/InstCombineCompares.cpp | 27 ++++++++++++++++ .../unsigned-add-lack-of-overflow-check.ll | 31 ++++++++++--------- 3 files changed, 46 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 320deb80bb1f..f9f4f1603861 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -589,6 +589,9 @@ struct is_lowbit_mask { inline cst_pred_ty m_LowBitMask() { return cst_pred_ty(); } +inline api_pred_ty m_LowBitMask(const APInt *&V) { + return V; +} struct icmp_pred_with_threshold { ICmpInst::Predicate Pred; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 59e131bd3b6a..5b6728e466fc 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3949,6 +3949,33 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE)) return new ICmpInst(Pred, X, Builder.CreateNot(Op0)); + { + // Similar to above: an unsigned overflow comparison may use offset + mask: + // ((Op1 + C) & C) u< Op1 --> Op1 != 0 + // ((Op1 + C) & C) u>= Op1 --> Op1 == 0 + // Op0 u> ((Op0 + C) & C) --> Op0 != 0 + // Op0 u<= ((Op0 + C) & C) --> Op0 == 0 + BinaryOperator *BO; + const APInt *C; + if ((Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE) && + match(Op0, m_And(m_BinOp(BO), m_LowBitMask(C))) && + match(BO, m_Add(m_Specific(Op1), m_SpecificIntAllowUndef(*C)))) { + CmpInst::Predicate NewPred = + Pred == ICmpInst::ICMP_ULT ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; + Constant *Zero = ConstantInt::getNullValue(Op1->getType()); + return new ICmpInst(NewPred, Op1, Zero); + } + + if ((Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE) && + match(Op1, m_And(m_BinOp(BO), m_LowBitMask(C))) && + match(BO, m_Add(m_Specific(Op0), m_SpecificIntAllowUndef(*C)))) { + CmpInst::Predicate NewPred = + Pred == ICmpInst::ICMP_UGT ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; + Constant *Zero = ConstantInt::getNullValue(Op1->getType()); + return new ICmpInst(NewPred, Op0, Zero); + } + } + bool NoOp0WrapProblem = false, NoOp1WrapProblem = false; if (BO0 && isa(BO0)) NoOp0WrapProblem = diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll index 11ec67bd6752..e76fcbad61c8 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll @@ -200,9 +200,7 @@ define i1 @n15_wrong_pred7(i8 %x, i8 %y) { define i1 @low_bitmask_ult(i8 %x) { ; CHECK-LABEL: @low_bitmask_ult( -; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 31 -; CHECK-NEXT: [[M:%.*]] = and i8 [[A]], 31 -; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[M]], [[X]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %a = add i8 %x, 31 @@ -213,9 +211,7 @@ define i1 @low_bitmask_ult(i8 %x) { define <2 x i1> @low_bitmask_uge(<2 x i8> %x) { ; CHECK-LABEL: @low_bitmask_uge( -; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[M:%.*]] = and <2 x i8> [[A]], -; CHECK-NEXT: [[R:%.*]] = icmp uge <2 x i8> [[M]], [[X]] +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[X:%.*]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; %a = add <2 x i8> %x, @@ -227,9 +223,7 @@ define <2 x i1> @low_bitmask_uge(<2 x i8> %x) { define i1 @low_bitmask_ugt(i8 %px) { ; CHECK-LABEL: @low_bitmask_ugt( ; CHECK-NEXT: [[X:%.*]] = mul i8 [[PX:%.*]], [[PX]] -; CHECK-NEXT: [[A:%.*]] = add i8 [[X]], 127 -; CHECK-NEXT: [[M:%.*]] = and i8 [[A]], 127 -; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[M]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[X]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %x = mul i8 %px, %px @@ -242,9 +236,7 @@ define i1 @low_bitmask_ugt(i8 %px) { define <2 x i1> @low_bitmask_ule(<2 x i8> %px) { ; CHECK-LABEL: @low_bitmask_ule( ; CHECK-NEXT: [[X:%.*]] = mul <2 x i8> [[PX:%.*]], [[PX]] -; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X]], -; CHECK-NEXT: [[M:%.*]] = and <2 x i8> [[A]], -; CHECK-NEXT: [[R:%.*]] = icmp ule <2 x i8> [[X]], [[M]] +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[X]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; %x = mul <2 x i8> %px, %px @@ -259,7 +251,7 @@ define i1 @low_bitmask_ult_use(i8 %x) { ; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 7 ; CHECK-NEXT: [[M:%.*]] = and i8 [[A]], 7 ; CHECK-NEXT: call void @use8(i8 [[M]]) -; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[M]], [[X]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[X]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %a = add i8 %x, 7 @@ -274,8 +266,7 @@ define i1 @low_bitmask_ugt_use(i8 %px) { ; CHECK-NEXT: [[X:%.*]] = mul i8 [[PX:%.*]], [[PX]] ; CHECK-NEXT: [[A:%.*]] = add i8 [[X]], 3 ; CHECK-NEXT: call void @use8(i8 [[A]]) -; CHECK-NEXT: [[M:%.*]] = and i8 [[A]], 3 -; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[M]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[X]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %x = mul i8 %px, %px @@ -286,6 +277,8 @@ define i1 @low_bitmask_ugt_use(i8 %px) { ret i1 %r } +; negative test - need same low bitmask + define i1 @low_bitmask_ult_wrong_mask1(i8 %x) { ; CHECK-LABEL: @low_bitmask_ult_wrong_mask1( ; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 30 @@ -299,6 +292,8 @@ define i1 @low_bitmask_ult_wrong_mask1(i8 %x) { ret i1 %r } +; negative test - need same low bitmask + define i1 @low_bitmask_uge_wrong_mask2(i8 %x) { ; CHECK-LABEL: @low_bitmask_uge_wrong_mask2( ; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 31 @@ -312,6 +307,8 @@ define i1 @low_bitmask_uge_wrong_mask2(i8 %x) { ret i1 %r } +; negative test - predicate mandates operand order + define i1 @low_bitmask_ugt_swapped(i8 %x) { ; CHECK-LABEL: @low_bitmask_ugt_swapped( ; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 127 @@ -325,6 +322,8 @@ define i1 @low_bitmask_ugt_swapped(i8 %x) { ret i1 %r } +; negative test - unsigned preds only + define i1 @low_bitmask_sgt(i8 %px) { ; CHECK-LABEL: @low_bitmask_sgt( ; CHECK-NEXT: [[X:%.*]] = mul i8 [[PX:%.*]], [[PX]] @@ -340,6 +339,8 @@ define i1 @low_bitmask_sgt(i8 %px) { ret i1 %r } +; negative test - specific operand must match + define i1 @low_bitmask_ult_specific_op(i8 %x, i8 %y) { ; CHECK-LABEL: @low_bitmask_ult_specific_op( ; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 31 From f24dff357b61cfed50361a09560990bb5f31dee2 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 29 Dec 2021 13:11:16 -0800 Subject: [PATCH 201/992] DWARFVerifier: Delay loading nested types in type dumping to improve performance Avoid trying to resolve nested types that may not be needed because the name is already provided by the outer DIE. --- llvm/lib/DebugInfo/DWARF/DWARFDie.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 5421b2d59a1b..b3b2bec82457 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -215,15 +215,16 @@ struct DWARFTypePrinter { OS << "void"; return DWARFDie(); } - DWARFDie Inner = resolveReferencedType(D); + DWARFDie InnerDIE; + auto Inner = [&] { return InnerDIE = resolveReferencedType(D); }; const dwarf::Tag T = D.getTag(); switch (T) { case DW_TAG_pointer_type: { - appendPointerLikeTypeBefore(D, Inner, "*"); + appendPointerLikeTypeBefore(D, Inner(), "*"); break; } case DW_TAG_subroutine_type: { - appendQualifiedNameBefore(Inner); + appendQualifiedNameBefore(Inner()); if (Word) { OS << ' '; } @@ -231,18 +232,18 @@ struct DWARFTypePrinter { break; } case DW_TAG_array_type: { - appendQualifiedNameBefore(Inner); + appendQualifiedNameBefore(Inner()); break; } case DW_TAG_reference_type: - appendPointerLikeTypeBefore(D, Inner, "&"); + appendPointerLikeTypeBefore(D, Inner(), "&"); break; case DW_TAG_rvalue_reference_type: - appendPointerLikeTypeBefore(D, Inner, "&&"); + appendPointerLikeTypeBefore(D, Inner(), "&&"); break; case DW_TAG_ptr_to_member_type: { - appendQualifiedNameBefore(Inner); - if (needsParens(Inner)) + appendQualifiedNameBefore(Inner()); + if (needsParens(InnerDIE)) OS << '('; else if (Word) OS << ' '; @@ -284,7 +285,7 @@ struct DWARFTypePrinter { const char *NamePtr = dwarf::toString(D.find(DW_AT_name), nullptr); if (!NamePtr) { appendTypeTagName(D.getTag()); - return Inner; + return DWARFDie(); } Word = true; StringRef Name = NamePtr; @@ -317,7 +318,7 @@ struct DWARFTypePrinter { break; } } - return Inner; + return InnerDIE; } void appendUnqualifiedNameAfter(DWARFDie D, DWARFDie Inner, From 928852f1560ae8708d5c48e2e33911ef05457d58 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Wed, 29 Dec 2021 14:17:26 -0500 Subject: [PATCH 202/992] [libc++] [NFC] Remove an unused parameter from `__sift_down`. Differential Revision: https://reviews.llvm.org/D116382 --- libcxx/include/__algorithm/make_heap.h | 2 +- libcxx/include/__algorithm/partial_sort.h | 2 +- libcxx/include/__algorithm/partial_sort_copy.h | 2 +- libcxx/include/__algorithm/pop_heap.h | 2 +- libcxx/include/__algorithm/sift_down.h | 5 ++--- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/libcxx/include/__algorithm/make_heap.h b/libcxx/include/__algorithm/make_heap.h index b3defd4de072..a67c798ee7dd 100644 --- a/libcxx/include/__algorithm/make_heap.h +++ b/libcxx/include/__algorithm/make_heap.h @@ -32,7 +32,7 @@ __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compar // start from the first parent, there is no need to consider children for (difference_type __start = (__n - 2) / 2; __start >= 0; --__start) { - _VSTD::__sift_down<_Compare>(__first, __last, __comp, __n, __first + __start); + _VSTD::__sift_down<_Compare>(__first, __comp, __n, __first + __start); } } } diff --git a/libcxx/include/__algorithm/partial_sort.h b/libcxx/include/__algorithm/partial_sort.h index 622624ec4f42..017ac90b6714 100644 --- a/libcxx/include/__algorithm/partial_sort.h +++ b/libcxx/include/__algorithm/partial_sort.h @@ -40,7 +40,7 @@ __partial_sort(_RandomAccessIterator __first, _RandomAccessIterator __middle, _R if (__comp(*__i, *__first)) { swap(*__i, *__first); - _VSTD::__sift_down<_Compare>(__first, __middle, __comp, __len, __first); + _VSTD::__sift_down<_Compare>(__first, __comp, __len, __first); } } _VSTD::__sort_heap<_Compare>(__first, __middle, __comp); diff --git a/libcxx/include/__algorithm/partial_sort_copy.h b/libcxx/include/__algorithm/partial_sort_copy.h index 4c0c9f5ad04a..a81c621c75c6 100644 --- a/libcxx/include/__algorithm/partial_sort_copy.h +++ b/libcxx/include/__algorithm/partial_sort_copy.h @@ -40,7 +40,7 @@ __partial_sort_copy(_InputIterator __first, _InputIterator __last, if (__comp(*__first, *__result_first)) { *__result_first = *__first; - _VSTD::__sift_down<_Compare>(__result_first, __r, __comp, __len, __result_first); + _VSTD::__sift_down<_Compare>(__result_first, __comp, __len, __result_first); } _VSTD::__sort_heap<_Compare>(__result_first, __r, __comp); } diff --git a/libcxx/include/__algorithm/pop_heap.h b/libcxx/include/__algorithm/pop_heap.h index e8c801a5c81f..1d57de24ff04 100644 --- a/libcxx/include/__algorithm/pop_heap.h +++ b/libcxx/include/__algorithm/pop_heap.h @@ -31,7 +31,7 @@ __pop_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare if (__len > 1) { swap(*__first, *--__last); - _VSTD::__sift_down<_Compare>(__first, __last, __comp, __len - 1, __first); + _VSTD::__sift_down<_Compare>(__first, __comp, __len - 1, __first); } } diff --git a/libcxx/include/__algorithm/sift_down.h b/libcxx/include/__algorithm/sift_down.h index 4d99ff237c96..bf5447698cd6 100644 --- a/libcxx/include/__algorithm/sift_down.h +++ b/libcxx/include/__algorithm/sift_down.h @@ -21,8 +21,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template _LIBCPP_CONSTEXPR_AFTER_CXX11 void -__sift_down(_RandomAccessIterator __first, _RandomAccessIterator /*__last*/, - _Compare __comp, +__sift_down(_RandomAccessIterator __first, _Compare __comp, typename iterator_traits<_RandomAccessIterator>::difference_type __len, _RandomAccessIterator __start) { @@ -46,7 +45,7 @@ __sift_down(_RandomAccessIterator __first, _RandomAccessIterator /*__last*/, // check if we are in heap-order if (__comp(*__child_i, *__start)) - // we are, __start is larger than it's largest child + // we are, __start is larger than its largest child return; value_type __top(_VSTD::move(*__start)); From 4d58d1d5af31eb386b73279f288286db225fabae Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 29 Dec 2021 14:00:40 -0800 Subject: [PATCH 203/992] DWARFVerifier: Print the CU name and CU count to help visualize progress --- llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp | 15 ++++++++++----- llvm/test/DebugInfo/X86/skeleton-unit-verify.s | 7 +++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp index 6424c2f59844..5f56ebd5a291 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -322,12 +322,17 @@ unsigned DWARFVerifier::verifyUnits(const DWARFUnitVector &Units) { unsigned NumDebugInfoErrors = 0; ReferenceMap CrossUnitReferences; + unsigned int Index = 0; for (const auto &Unit : Units) { - ReferenceMap UnitLocalReferences; - NumDebugInfoErrors += - verifyUnitContents(*Unit, UnitLocalReferences, CrossUnitReferences); - NumDebugInfoErrors += verifyDebugInfoReferences( - UnitLocalReferences, [&](uint64_t Offset) { return Unit.get(); }); + OS << "Verifying unit: " << Index << " / " << Units.getNumUnits() << '\n'; + OS << "Unit Name: " << Unit->getUnitDIE(true).getShortName() << '\n'; + OS.flush(); + ReferenceMap UnitLocalReferences; + NumDebugInfoErrors += + verifyUnitContents(*Unit, UnitLocalReferences, CrossUnitReferences); + NumDebugInfoErrors += verifyDebugInfoReferences( + UnitLocalReferences, [&](uint64_t Offset) { return Unit.get(); }); + ++Index; } NumDebugInfoErrors += verifyDebugInfoReferences( diff --git a/llvm/test/DebugInfo/X86/skeleton-unit-verify.s b/llvm/test/DebugInfo/X86/skeleton-unit-verify.s index 8c3a223fdbf4..062336c35d9f 100644 --- a/llvm/test/DebugInfo/X86/skeleton-unit-verify.s +++ b/llvm/test/DebugInfo/X86/skeleton-unit-verify.s @@ -5,6 +5,10 @@ # CHECK-NEXT: Verifying .debug_info Unit Header Chain... # CHECK-NEXT: Verifying .debug_types Unit Header Chain... # CHECK-NEXT: Verifying non-dwo Units... +# CHECK-NEXT: Verifying unit: 0 / 2 +# CHECK-NEXT: Unit Name: test.cpp +# CHECK-NEXT: Verifying unit: 1 / 2 +# CHECK-NEXT: Unit Name: # CHECK-NEXT: warning: DW_TAG_skeleton_unit has DW_CHILDREN_yes but DIE has no children # CHECK-NEXT: DW_TAG_skeleton_unit # CHECK-NEXT: error: Skeleton compilation unit has children. @@ -15,6 +19,8 @@ .byte 1 # Abbreviation Code .byte 74 # DW_TAG_skeleton_unit .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string .byte 0 # EOM(1) .byte 0 # EOM(2) .byte 2 # Abbreviation Code @@ -34,6 +40,7 @@ .long .debug_abbrev # Offset Into Abbrev. Section .quad -6573227469967412476 .byte 1 # Abbrev [1] + .asciz "test.cpp" .byte 0 .Lcu_end0: .long .Lcu_end1-.Lcu_start1 # Length of Unit From 6edc38935aaf98c2ee2e87874330b9dc08d899ab Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 29 Dec 2021 14:04:38 -0800 Subject: [PATCH 204/992] DWARFDie: don't try to compute a full template name for a template parameter packs Otherwise these look a lot like actual templates (they have a name and they have template parameters) but they don't participate in naming (this doesn't come up in practice because a template parameter pack DIE is never referenced from another DIE (so we don't do full name rebuilding for it) or the subject of simplified template name rebuilding (never has the _STN prefix)) - it could be tested with some hand crafted DWARF but doesn't seem important/useful to do so. This change is just for performance - to avoid trying to parse more DIEs, etc, when it's not needed when computing the name in the DWARF verifier. --- llvm/lib/DebugInfo/DWARF/DWARFDie.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index b3b2bec82457..49aa27998ace 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -802,6 +802,8 @@ void DWARFDie::getFullName(raw_string_ostream &OS, const char *NamePtr = getShortName(); if (!NamePtr) return; + if (getTag() == DW_TAG_GNU_template_parameter_pack) + return; DWARFTypePrinter(OS).appendUnqualifiedName(*this, OriginalFullName); } From 09f43c107fc7688639346d3beead72472cdadbdb Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 29 Dec 2021 14:47:43 -0800 Subject: [PATCH 205/992] DWARFVerifier: fix remaining tests and compact/rephrase the output --- llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp | 8 +++++--- llvm/test/DebugInfo/X86/skeleton-unit-verify.s | 6 ++---- .../llvm-dwarfdump/X86/verify_curanges_incomplete.yaml | 1 + llvm/test/tools/llvm-dwarfdump/X86/verify_debug_info.s | 1 + llvm/test/tools/llvm-dwarfdump/X86/verify_die_ranges.yaml | 1 + .../tools/llvm-dwarfdump/X86/verify_invalid_cu_ref.yaml | 1 + .../llvm-dwarfdump/X86/verify_invalid_die_range.yaml | 1 + .../tools/llvm-dwarfdump/X86/verify_invalid_ranges.yaml | 1 + .../tools/llvm-dwarfdump/X86/verify_invalid_ref_addr.yaml | 1 + .../X86/verify_invalid_ref_addr_between.yaml | 1 + .../tools/llvm-dwarfdump/X86/verify_invalid_rnglists.yaml | 1 + .../llvm-dwarfdump/X86/verify_invalid_stmt_list.yaml | 1 + .../tools/llvm-dwarfdump/X86/verify_invalid_strp.yaml | 1 + .../llvm-dwarfdump/X86/verify_lexical_block_ranges.yaml | 1 + .../X86/verify_overlapping_function_ranges.yaml | 1 + .../X86/verify_overlapping_lexical_block_ranges.yaml | 1 + 16 files changed, 21 insertions(+), 7 deletions(-) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp index 5f56ebd5a291..df68e257af01 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -322,10 +322,12 @@ unsigned DWARFVerifier::verifyUnits(const DWARFUnitVector &Units) { unsigned NumDebugInfoErrors = 0; ReferenceMap CrossUnitReferences; - unsigned int Index = 0; + unsigned int Index = 1; for (const auto &Unit : Units) { - OS << "Verifying unit: " << Index << " / " << Units.getNumUnits() << '\n'; - OS << "Unit Name: " << Unit->getUnitDIE(true).getShortName() << '\n'; + OS << "Verifying unit: " << Index << " / " << Units.getNumUnits(); + if (const char* Name = Unit->getUnitDIE(true).getShortName()) + OS << ", \"" << Name << '\"'; + OS << '\n'; OS.flush(); ReferenceMap UnitLocalReferences; NumDebugInfoErrors += diff --git a/llvm/test/DebugInfo/X86/skeleton-unit-verify.s b/llvm/test/DebugInfo/X86/skeleton-unit-verify.s index 062336c35d9f..0433e897f6a2 100644 --- a/llvm/test/DebugInfo/X86/skeleton-unit-verify.s +++ b/llvm/test/DebugInfo/X86/skeleton-unit-verify.s @@ -5,10 +5,8 @@ # CHECK-NEXT: Verifying .debug_info Unit Header Chain... # CHECK-NEXT: Verifying .debug_types Unit Header Chain... # CHECK-NEXT: Verifying non-dwo Units... -# CHECK-NEXT: Verifying unit: 0 / 2 -# CHECK-NEXT: Unit Name: test.cpp -# CHECK-NEXT: Verifying unit: 1 / 2 -# CHECK-NEXT: Unit Name: +# CHECK-NEXT: Verifying unit: 1 / 2, "test.cpp" +# CHECK-NEXT: Verifying unit: 2 / 2 # CHECK-NEXT: warning: DW_TAG_skeleton_unit has DW_CHILDREN_yes but DIE has no children # CHECK-NEXT: DW_TAG_skeleton_unit # CHECK-NEXT: error: Skeleton compilation unit has children. diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_curanges_incomplete.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_curanges_incomplete.yaml index 5200311ad453..de6f325dc88a 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/verify_curanges_incomplete.yaml +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_curanges_incomplete.yaml @@ -2,6 +2,7 @@ # RUN: not llvm-dwarfdump -verify %t.o | FileCheck %s # CHECK: Verifying non-dwo Units... +# CHECK-NEXT: Verifying unit: 1 / 1, "/tmp/main.c" # CHECK-NEXT: error: DIE address ranges are not contained in its parent's ranges: --- !ELF diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_debug_info.s b/llvm/test/tools/llvm-dwarfdump/X86/verify_debug_info.s index c2502adf1a8d..f677de20cd51 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/verify_debug_info.s +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_debug_info.s @@ -46,6 +46,7 @@ # CHECK-NEXT: DW_AT_decl_file [DW_FORM_data1] (0x01) # CHECK-NEXT: DW_AT_decl_line [DW_FORM_data1] (2) # CHECK-NEXT: DW_AT_use_location [DW_FORM_ref4] (cu + 0x0053 => {0x00000053}){{[[:space:]]}} +# CHECK-NEXT: Verifying unit: 2 / 2 # CHECK-NEXT: error: Compilation unit root DIE is not a unit DIE: DW_TAG_null. # CHECK-NEXT: error: Compilation unit type (DW_UT_compile) and root DIE (DW_TAG_null) do not match. diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_die_ranges.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_die_ranges.yaml index 8580ca4b23dc..98f92e8b1170 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/verify_die_ranges.yaml +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_die_ranges.yaml @@ -2,6 +2,7 @@ # RUN: not llvm-dwarfdump -verify %t.o | FileCheck %s # CHECK: Verifying non-dwo Units... +# CHECK-NEXT: Verifying unit: 1 / 1 # CHECK-NEXT: error: Invalid address range [0x0000000000000007, 0x0000000000000006) --- !ELF diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_cu_ref.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_cu_ref.yaml index 2951d8c708c2..52474abe9267 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_cu_ref.yaml +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_cu_ref.yaml @@ -2,6 +2,7 @@ # RUN: not llvm-dwarfdump -debug-info -verify %t.o | FileCheck %s # CHECK: Verifying non-dwo Units... +# CHECK-NEXT: Verifying unit: 1 / 1, "/tmp/main.c" # CHECK-NEXT: error: DW_FORM_ref4 CU offset 0x00001234 is invalid (must be less than CU size of 0x0000001a): --- !ELF diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_die_range.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_die_range.yaml index e7870474fe3f..e221f3557628 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_die_range.yaml +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_die_range.yaml @@ -2,6 +2,7 @@ # RUN: not llvm-dwarfdump -verify %t.o | FileCheck %s # CHECK: Verifying non-dwo Units... +# CHECK-NEXT: Verifying unit: 1 / 1, "/tmp/main.c" # CHECK-NEXT: error: Invalid address range --- !ELF diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_ranges.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_ranges.yaml index b3e4a89da960..65d9c48ff77e 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_ranges.yaml +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_ranges.yaml @@ -2,6 +2,7 @@ # RUN: not llvm-dwarfdump -debug-info -verify %t.o | FileCheck %s # CHECK: Verifying non-dwo Units... +# CHECK-NEXT: Verifying unit: 1 / 1, "/tmp/main.c" # CHECK-NEXT: error: DW_AT_ranges offset is beyond .debug_ranges bounds: 0x00001000 --- !ELF diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_ref_addr.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_ref_addr.yaml index f9b4cf4f8e92..4599a46341e7 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_ref_addr.yaml +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_ref_addr.yaml @@ -2,6 +2,7 @@ # RUN: not llvm-dwarfdump -debug-info -verify %t.o | FileCheck %s # CHECK: Verifying non-dwo Units... +# CHECK-NEXT: Verifying unit: 1 / 1, "/tmp/main.c" # CHECK-NEXT: error: DW_FORM_ref_addr offset beyond .debug_info bounds: --- !ELF diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_ref_addr_between.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_ref_addr_between.yaml index 76a11f987028..f000bf1bb776 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_ref_addr_between.yaml +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_ref_addr_between.yaml @@ -2,6 +2,7 @@ # RUN: not llvm-dwarfdump -debug-info -verify %t.o | FileCheck %s # CHECK: Verifying non-dwo Units... +# CHECK-NEXT: Verifying unit: 1 / 1, "/tmp/main.c" # CHECK-NEXT: error: invalid DIE reference 0x00000011. Offset is in between DIEs: --- !ELF diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_rnglists.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_rnglists.yaml index 009a037f5f84..9b363fd7e4f8 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_rnglists.yaml +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_rnglists.yaml @@ -2,6 +2,7 @@ # RUN: not llvm-dwarfdump -debug-info -verify %t.o | FileCheck %s # CHECK: Verifying non-dwo Units... +# CHECK-NEXT: Verifying unit: 1 / 1, "/tmp/main.c" # CHECK-NEXT: error: DW_AT_ranges offset is beyond .debug_rnglists bounds: 0x00001000 --- !ELF diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_stmt_list.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_stmt_list.yaml index 31e92d2cac91..d9c23375d5ba 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_stmt_list.yaml +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_stmt_list.yaml @@ -2,6 +2,7 @@ # RUN: not llvm-dwarfdump -debug-info -verify %t.o | FileCheck %s # CHECK: Verifying non-dwo Units... +# CHECK-NEXT: Verifying unit: 1 / 1, "/tmp/main.c" # CHECK-NEXT: error: DW_AT_stmt_list offset is beyond .debug_line bounds: 0x00001000 --- !ELF diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_strp.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_strp.yaml index 81b420f2346d..d6b8bbcec83a 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_strp.yaml +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_strp.yaml @@ -2,6 +2,7 @@ # RUN: not llvm-dwarfdump -debug-info -verify %t.o | FileCheck %s # CHECK: Verifying non-dwo Units... +# CHECK-NEXT: Verifying unit: 1 / 1 # CHECK-NEXT: error: DW_FORM_strp offset 4660 is beyond .debug_str bounds: --- !ELF diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_lexical_block_ranges.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_lexical_block_ranges.yaml index f8e8775a4208..1185e263c8e0 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/verify_lexical_block_ranges.yaml +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_lexical_block_ranges.yaml @@ -2,6 +2,7 @@ # RUN: not llvm-dwarfdump -verify %t.o | FileCheck %s # CHECK: Verifying non-dwo Units... +# CHECK-NEXT: Verifying unit: 1 / 1, "/tmp/main.c" # CHECK-NEXT: error: DIE address ranges are not contained in its parent's ranges: --- !ELF diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_overlapping_function_ranges.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_overlapping_function_ranges.yaml index 8db28adf4540..eb3b791fc577 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/verify_overlapping_function_ranges.yaml +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_overlapping_function_ranges.yaml @@ -2,6 +2,7 @@ # RUN: not llvm-dwarfdump -verify %t.o | FileCheck %s # CHECK: Verifying non-dwo Units... +# CHECK-NEXT: Verifying unit: 1 / 1, "/tmp/main.c" # CHECK-NEXT: error: DIEs have overlapping address ranges --- !ELF diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_overlapping_lexical_block_ranges.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_overlapping_lexical_block_ranges.yaml index c8f98504df06..137f9a234c3a 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/verify_overlapping_lexical_block_ranges.yaml +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_overlapping_lexical_block_ranges.yaml @@ -2,6 +2,7 @@ # RUN: not llvm-dwarfdump -verify %t.o | FileCheck %s # CHECK: Verifying non-dwo Units... +# CHECK-NEXT: Verifying unit: 1 / 1, "/tmp/main.c" # CHECK-NEXT: error: DIEs have overlapping address ranges --- !ELF From eec312ee7f97638508679169cbf4b5183d0b1112 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 29 Dec 2021 23:08:37 +0000 Subject: [PATCH 206/992] Fix build of llvm-prettyprinters/gdb/mlir-support.cpp test This is just fixing the build itself, the test won't pass right now. --- .../llvm-prettyprinters/gdb/mlir-support.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp b/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp index 9a65ad377fd5..28a5cc9cdf1f 100644 --- a/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp +++ b/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp @@ -9,8 +9,6 @@ mlir::MLIRContext Context; auto Identifier = mlir::Identifier::get("foo", &Context); mlir::OperationName OperationName("FooOp", &Context); -mlir::Value Value({reinterpret_cast(0x8), - mlir::Value::Kind::TrailingOpResult}); mlir::Type Type(nullptr); mlir::Type IndexType = mlir::IndexType::get(&Context); @@ -23,6 +21,10 @@ mlir::Type VectorType = mlir::VectorType::get({1, 2}, FloatType); mlir::Type TupleType = mlir::TupleType::get(&Context, mlir::TypeRange({IndexType, FloatType})); + +mlir::detail::OutOfLineOpResult Result(FloatType, 42); +mlir::Value Value(&Result); + auto UnknownLoc = mlir::UnknownLoc::get(&Context); auto FileLineColLoc = mlir::FileLineColLoc::get(&Context, "file", 7, 8); auto OpaqueLoc = mlir::OpaqueLoc::get(9, &Context); From 9e45f2c308db33f62f2311b957ad8a6868ce172b Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 29 Dec 2021 16:45:00 -0800 Subject: [PATCH 207/992] Couple of post-commit tweaks on 4d58d1d5af31 based on maskray's feedback --- llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp | 2 +- llvm/test/DebugInfo/X86/skeleton-unit-verify.s | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp index df68e257af01..a6f3976fe201 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -322,7 +322,7 @@ unsigned DWARFVerifier::verifyUnits(const DWARFUnitVector &Units) { unsigned NumDebugInfoErrors = 0; ReferenceMap CrossUnitReferences; - unsigned int Index = 1; + unsigned Index = 1; for (const auto &Unit : Units) { OS << "Verifying unit: " << Index << " / " << Units.getNumUnits(); if (const char* Name = Unit->getUnitDIE(true).getShortName()) diff --git a/llvm/test/DebugInfo/X86/skeleton-unit-verify.s b/llvm/test/DebugInfo/X86/skeleton-unit-verify.s index 0433e897f6a2..d9c7436d1c75 100644 --- a/llvm/test/DebugInfo/X86/skeleton-unit-verify.s +++ b/llvm/test/DebugInfo/X86/skeleton-unit-verify.s @@ -6,7 +6,7 @@ # CHECK-NEXT: Verifying .debug_types Unit Header Chain... # CHECK-NEXT: Verifying non-dwo Units... # CHECK-NEXT: Verifying unit: 1 / 2, "test.cpp" -# CHECK-NEXT: Verifying unit: 2 / 2 +# CHECK-NEXT: Verifying unit: 2 / 2{{$}} # CHECK-NEXT: warning: DW_TAG_skeleton_unit has DW_CHILDREN_yes but DIE has no children # CHECK-NEXT: DW_TAG_skeleton_unit # CHECK-NEXT: error: Skeleton compilation unit has children. From 43c8296cda97e0aee8668790ebe7b1b1d6c2b3f9 Mon Sep 17 00:00:00 2001 From: "Chenbing.Zheng" Date: Thu, 30 Dec 2021 09:31:01 +0800 Subject: [PATCH 208/992] [RISCV] Refactor immediate comparison instructions patterns The patterns of the immediate comparison instruction is rewrite here, and put similar code to a class. Do not change any function of the original code, making the code more concise. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D116215 --- .../Target/RISCV/RISCVInstrInfoVPseudos.td | 175 ++++++------------ 1 file changed, 52 insertions(+), 123 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 073fa605e0fb..5b85d5adb188 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -3702,6 +3702,47 @@ multiclass VPatConversionVF_WF { } } +multiclass VPatCompare_VI { + foreach vti = AllIntegerVectors in { + defvar Intr = !cast(intrinsic); + defvar Pseudo = !cast(inst#"_VI_"#vti.LMul.MX); + def : Pat<(vti.Mask (Intr (vti.Vector vti.RegClass:$rs1), + (vti.Scalar simm5_plus1:$rs2), + VLOpFrag)), + (Pseudo vti.RegClass:$rs1, (DecImm simm5_plus1:$rs2), + GPR:$vl, vti.Log2SEW)>; + defvar IntrMask = !cast(intrinsic # "_mask"); + defvar PseudoMask = !cast(inst#"_VI_"#vti.LMul.MX#"_MASK"); + def : Pat<(vti.Mask (IntrMask (vti.Mask VR:$merge), + (vti.Vector vti.RegClass:$rs1), + (vti.Scalar simm5_plus1:$rs2), + (vti.Mask V0), + VLOpFrag)), + (PseudoMask VR:$merge, vti.RegClass:$rs1, (DecImm simm5_plus1:$rs2), + (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + } +} + +multiclass VPatCompareUnsignedZero { + foreach vti = AllIntegerVectors in { + defvar Intr = !cast(intrinsic); + defvar Pseudo = !cast(inst#"_VV_"#vti.LMul.MX); + def : Pat<(vti.Mask (Intr (vti.Vector vti.RegClass:$rs1), + (vti.Scalar 0), VLOpFrag)), + (Pseudo vti.RegClass:$rs1, vti.RegClass:$rs1, + GPR:$vl, vti.Log2SEW)>; + defvar IntrMask = !cast(intrinsic # "_mask"); + defvar PseudoMask = !cast(inst#"_VV_"#vti.LMul.MX#"_MASK"); + def : Pat<(vti.Mask (IntrMask (vti.Mask VR:$merge), + (vti.Vector vti.RegClass:$rs1), + (vti.Scalar 0), + (vti.Mask V0), + VLOpFrag)), + (PseudoMask VR:$merge, vti.RegClass:$rs1, vti.RegClass:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + } +} + //===----------------------------------------------------------------------===// // Pseudo instructions //===----------------------------------------------------------------------===// @@ -4481,129 +4522,17 @@ defm : VPatBinarySwappedM_VV<"int_riscv_vmsge", "PseudoVMSLE", AllIntegerVectors // Match vmslt(u).vx intrinsics to vmsle(u).vi if the scalar is -15 to 16. This // avoids the user needing to know that there is no vmslt(u).vi instruction. // Similar for vmsge(u).vx intrinsics using vmslt(u).vi. -foreach vti = AllIntegerVectors in { - def : Pat<(vti.Mask (int_riscv_vmslt (vti.Vector vti.RegClass:$rs1), - (vti.Scalar simm5_plus1:$rs2), - VLOpFrag)), - (!cast("PseudoVMSLE_VI_"#vti.LMul.MX) vti.RegClass:$rs1, - (DecImm simm5_plus1:$rs2), - GPR:$vl, - vti.Log2SEW)>; - def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask VR:$merge), - (vti.Vector vti.RegClass:$rs1), - (vti.Scalar simm5_plus1:$rs2), - (vti.Mask V0), - VLOpFrag)), - (!cast("PseudoVMSLE_VI_"#vti.LMul.MX#"_MASK") - VR:$merge, - vti.RegClass:$rs1, - (DecImm simm5_plus1:$rs2), - (vti.Mask V0), - GPR:$vl, - vti.Log2SEW)>; - - def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1), - (vti.Scalar simm5_plus1:$rs2), - VLOpFrag)), - (!cast("PseudoVMSLEU_VI_"#vti.LMul.MX) vti.RegClass:$rs1, - (DecImm simm5_plus1:$rs2), - GPR:$vl, - vti.Log2SEW)>; - def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge), - (vti.Vector vti.RegClass:$rs1), - (vti.Scalar simm5_plus1:$rs2), - (vti.Mask V0), - VLOpFrag)), - (!cast("PseudoVMSLEU_VI_"#vti.LMul.MX#"_MASK") - VR:$merge, - vti.RegClass:$rs1, - (DecImm simm5_plus1:$rs2), - (vti.Mask V0), - GPR:$vl, - vti.Log2SEW)>; - - // Special cases to avoid matching vmsltu.vi 0 (always false) to - // vmsleu.vi -1 (always true). Instead match to vmsne.vv. - def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1), - (vti.Scalar 0), VLOpFrag)), - (!cast("PseudoVMSNE_VV_"#vti.LMul.MX) vti.RegClass:$rs1, - vti.RegClass:$rs1, - GPR:$vl, - vti.Log2SEW)>; - def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge), - (vti.Vector vti.RegClass:$rs1), - (vti.Scalar 0), - (vti.Mask V0), - VLOpFrag)), - (!cast("PseudoVMSNE_VV_"#vti.LMul.MX#"_MASK") - VR:$merge, - vti.RegClass:$rs1, - vti.RegClass:$rs1, - (vti.Mask V0), - GPR:$vl, - vti.Log2SEW)>; - - def : Pat<(vti.Mask (int_riscv_vmsge (vti.Vector vti.RegClass:$rs1), - (vti.Scalar simm5_plus1:$rs2), - VLOpFrag)), - (!cast("PseudoVMSGT_VI_"#vti.LMul.MX) vti.RegClass:$rs1, - (DecImm simm5_plus1:$rs2), - GPR:$vl, - vti.Log2SEW)>; - def : Pat<(vti.Mask (int_riscv_vmsge_mask (vti.Mask VR:$merge), - (vti.Vector vti.RegClass:$rs1), - (vti.Scalar simm5_plus1:$rs2), - (vti.Mask V0), - VLOpFrag)), - (!cast("PseudoVMSGT_VI_"#vti.LMul.MX#"_MASK") - VR:$merge, - vti.RegClass:$rs1, - (DecImm simm5_plus1:$rs2), - (vti.Mask V0), - GPR:$vl, - vti.Log2SEW)>; - - def : Pat<(vti.Mask (int_riscv_vmsgeu (vti.Vector vti.RegClass:$rs1), - (vti.Scalar simm5_plus1:$rs2), - VLOpFrag)), - (!cast("PseudoVMSGTU_VI_"#vti.LMul.MX) vti.RegClass:$rs1, - (DecImm simm5_plus1:$rs2), - GPR:$vl, - vti.Log2SEW)>; - def : Pat<(vti.Mask (int_riscv_vmsgeu_mask (vti.Mask VR:$merge), - (vti.Vector vti.RegClass:$rs1), - (vti.Scalar simm5_plus1:$rs2), - (vti.Mask V0), - VLOpFrag)), - (!cast("PseudoVMSGTU_VI_"#vti.LMul.MX#"_MASK") - VR:$merge, - vti.RegClass:$rs1, - (DecImm simm5_plus1:$rs2), - (vti.Mask V0), - GPR:$vl, - vti.Log2SEW)>; - - // Special cases to avoid matching vmsgeu.vi 0 (always true) to - // vmsgtu.vi -1 (always false). Instead match to vmsne.vv. - def : Pat<(vti.Mask (int_riscv_vmsgeu (vti.Vector vti.RegClass:$rs1), - (vti.Scalar 0), VLOpFrag)), - (!cast("PseudoVMSEQ_VV_"#vti.LMul.MX) vti.RegClass:$rs1, - vti.RegClass:$rs1, - GPR:$vl, - vti.Log2SEW)>; - def : Pat<(vti.Mask (int_riscv_vmsgeu_mask (vti.Mask VR:$merge), - (vti.Vector vti.RegClass:$rs1), - (vti.Scalar 0), - (vti.Mask V0), - VLOpFrag)), - (!cast("PseudoVMSEQ_VV_"#vti.LMul.MX#"_MASK") - VR:$merge, - vti.RegClass:$rs1, - vti.RegClass:$rs1, - (vti.Mask V0), - GPR:$vl, - vti.Log2SEW)>; -} +defm : VPatCompare_VI<"int_riscv_vmslt", "PseudoVMSLE">; +defm : VPatCompare_VI<"int_riscv_vmsltu", "PseudoVMSLEU">; +// Special cases to avoid matching vmsltu.vi 0 (always false) to +// vmsleu.vi -1 (always true). Instead match to vmsne.vv. +defm : VPatCompareUnsignedZero<"int_riscv_vmsltu", "PseudoVMSNE">; + +defm : VPatCompare_VI<"int_riscv_vmsge", "PseudoVMSGT">; +defm : VPatCompare_VI<"int_riscv_vmsgeu", "PseudoVMSGTU">; +// Special cases to avoid matching vmsgeu.vi 0 (always true) to +// vmsgtu.vi -1 (always false). Instead match to vmsne.vv. +defm : VPatCompareUnsignedZero<"int_riscv_vmsgeu", "PseudoVMSEQ">; //===----------------------------------------------------------------------===// // 12.9. Vector Integer Min/Max Instructions From 4039d17355b7dc54d847d05d8685912cf081a113 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Thu, 30 Dec 2021 10:13:41 +0800 Subject: [PATCH 209/992] [NFC] Specify targets for clang stack-protector-guard.c The run line of stack-protector-guard.c doesn't specify the triple, which means it depends on the platform running the test. This makes some failure hidden. Reviewed By: nickdesaulniers Differential Revision: https://reviews.llvm.org/D116003 --- clang/test/CodeGen/stack-protector-guard.c | 27 ++++++++++++++-------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/clang/test/CodeGen/stack-protector-guard.c b/clang/test/CodeGen/stack-protector-guard.c index a5483ba0f194..5839ab06033a 100644 --- a/clang/test/CodeGen/stack-protector-guard.c +++ b/clang/test/CodeGen/stack-protector-guard.c @@ -1,16 +1,25 @@ -// RUN: %clang_cc1 -mstack-protector-guard=sysreg \ -// RUN: -mstack-protector-guard-reg=sp_el0 \ -// RUN: -mstack-protector-guard-offset=1024 \ -// RUN: -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-NONE %s +// RUN: %clang_cc1 -mstack-protector-guard=sysreg -triple x86_64-linux-gnu \ +// RUN: -mstack-protector-guard-offset=1024 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -mstack-protector-guard=sysreg -triple powerpc64le-linux-gnu \ +// RUN: -mstack-protector-guard-offset=1024 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -mstack-protector-guard=sysreg -triple arm-linux-gnueabi \ +// RUN: -mstack-protector-guard-offset=1024 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -mstack-protector-guard=sysreg -triple thumbv7-linux-gnueabi \ +// RUN: -mstack-protector-guard-offset=1024 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -mstack-protector-guard=sysreg -triple aarch64-linux-gnu \ +// RUN: -mstack-protector-guard-offset=1024 -mstack-protector-guard-reg=sp_el0 \ +// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=AARCH64 void foo(int*); void bar(int x) { int baz[x]; foo(baz); } -// CHECK: !llvm.module.flags = !{{{.*}}[[ATTR1:![0-9]+]], [[ATTR2:![0-9]+]], [[ATTR3:![0-9]+]]} +// CHECK: !llvm.module.flags = !{{{.*}}[[ATTR1:![0-9]+]], [[ATTR2:![0-9]+]]} // CHECK: [[ATTR1]] = !{i32 1, !"stack-protector-guard", !"sysreg"} -// CHECK: [[ATTR2]] = !{i32 1, !"stack-protector-guard-reg", !"sp_el0"} -// CHECK: [[ATTR3]] = !{i32 1, !"stack-protector-guard-offset", i32 1024} -// CHECK-NONE-NOT: !"stack-protector-guard +// CHECK: [[ATTR2]] = !{i32 1, !"stack-protector-guard-offset", i32 1024} + +// AARCH64: !llvm.module.flags = !{{{.*}}[[ATTR1:![0-9]+]], [[ATTR2:![0-9]+]], [[ATTR3:![0-9]+]]} +// AARCH64: [[ATTR1]] = !{i32 1, !"stack-protector-guard", !"sysreg"} +// AARCH64: [[ATTR2]] = !{i32 1, !"stack-protector-guard-reg", !"sp_el0"} +// AARCH64: [[ATTR3]] = !{i32 1, !"stack-protector-guard-offset", i32 1024} From 1dd5e6fed5dbfe105451277d749e3c4240b925c4 Mon Sep 17 00:00:00 2001 From: jacquesguan Date: Wed, 29 Dec 2021 15:29:40 +0800 Subject: [PATCH 210/992] [RISCV] Use vmv.s.x instead of vfmv.s.f when the floating point scalar is 0. Use integer vector scalar move instruction when move 0 to avoid add a integer-float move instruction. Differential Revision: https://reviews.llvm.org/D116365 --- llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td | 5 +++++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll | 9 +++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 73b97e1c3675..0f7db335a090 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -1367,6 +1367,11 @@ let Predicates = [HasVInstructionsAnyF] in { // 17.2. Floating-Point Scalar Move Instructions foreach vti = AllFloatVectors in { + def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$merge), + (vti.Scalar (fpimm0)), + VLOpFrag)), + (!cast("PseudoVMV_S_X_"#vti.LMul.MX) + vti.RegClass:$merge, X0, GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$merge), vti.ScalarRegClass:$rs1, VLOpFrag)), diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index 717824031e17..90f3a138b221 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -86,9 +86,8 @@ define void @buildvec_dominant0_v2f32(<2 x float>* %x) { ; CHECK-NEXT: addi a1, a1, %lo(.LCPI2_0) ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vlse32.v v8, (a1), zero -; CHECK-NEXT: fmv.w.x ft0, zero ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret store <2 x float> , <2 x float>* %x @@ -118,8 +117,7 @@ define void @buildvec_dominant0_v4f32(<4 x float>* %x) { ; CHECK-NEXT: lui a1, %hi(.LCPI4_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI4_0) ; CHECK-NEXT: vlse32.v v8, (a1), zero -; CHECK-NEXT: fmv.w.x ft0, zero -; CHECK-NEXT: vfmv.s.f v9, ft0 +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, mu ; CHECK-NEXT: vslideup.vi v8, v9, 2 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu @@ -132,9 +130,8 @@ define void @buildvec_dominant0_v4f32(<4 x float>* %x) { define void @buildvec_dominant1_v4f32(<4 x float>* %x, float %f) { ; CHECK-LABEL: buildvec_dominant1_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: fmv.w.x ft0, zero ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu ; CHECK-NEXT: vslideup.vi v9, v8, 1 From 458db51c101bc3372e96b71bda7ca0f5ba2ae431 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 29 Dec 2021 23:22:37 -0500 Subject: [PATCH 211/992] [OpenMP] Add missing `tt_hidden_helper_task_encountered` along with `tt_found_proxy_tasks` In most cases, hidden helper task behave similar as detached tasks. That means, for example, if we have to wait for detached tasks, we have to do the same thing for hidden helper tasks as well. This patch adds the missing condition for hidden helper task accordingly along with detached task. Reviewed By: AndreyChurbanov Differential Revision: https://reviews.llvm.org/D107316 --- .../test/offloading/target_nowait_target.cpp | 31 +++++++++++++++++++ openmp/runtime/src/kmp_barrier.cpp | 6 ++-- openmp/runtime/src/kmp_csupport.cpp | 3 +- openmp/runtime/src/kmp_runtime.cpp | 3 +- openmp/runtime/src/kmp_taskdeps.cpp | 6 ++-- openmp/runtime/src/kmp_tasking.cpp | 18 ++++++++++- 6 files changed, 60 insertions(+), 7 deletions(-) create mode 100644 openmp/libomptarget/test/offloading/target_nowait_target.cpp diff --git a/openmp/libomptarget/test/offloading/target_nowait_target.cpp b/openmp/libomptarget/test/offloading/target_nowait_target.cpp new file mode 100644 index 000000000000..24a83c300524 --- /dev/null +++ b/openmp/libomptarget/test/offloading/target_nowait_target.cpp @@ -0,0 +1,31 @@ +// RUN: %libomptarget-compilexx-and-run-generic + +// UNSUPPORTED: amdgcn-amd-amdhsa + +#include + +int main(int argc, char *argv[]) { + int data[1024]; + int sum = 0; + + for (int i = 0; i < 1024; ++i) + data[i] = i; + +#pragma omp target map(tofrom: sum) map(to: data) depend(inout : data[0]) nowait + { + for (int i = 0; i < 1024; ++i) { + sum += data[i]; + } + } + +#pragma omp target map(tofrom: sum) map(to: data) depend(inout : data[0]) + { + for (int i = 0; i < 1024; ++i) { + sum += data[i]; + } + } + + assert(sum == 1023 * 1024); + + return 0; +} diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index 97bf9811bcd0..ee05bb3587ca 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -2037,8 +2037,10 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split, } #endif - KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == - TRUE); + KMP_DEBUG_ASSERT( + this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE || + this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == + TRUE); __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj)); __kmp_task_team_setup(this_thr, team, 0); diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp index e95c2f072509..e263558517d0 100644 --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -531,7 +531,8 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { kmp_task_team_t *task_team = this_thr->th.th_task_team; // we need to wait for the proxy tasks before finishing the thread - if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) + if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks || + task_team->tt.tt_hidden_helper_task_encountered)) __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); KMP_MB(); diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 6efc26df8de3..7af970803a30 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -4106,7 +4106,8 @@ void __kmp_unregister_root_current_thread(int gtid) { kmp_task_team_t *task_team = thread->th.th_task_team; // we need to wait for the proxy tasks before finishing the thread - if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) { + if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks || + task_team->tt.tt_hidden_helper_task_encountered)) { #if OMPT_SUPPORT // the runtime is shutting down so we won't report any events thread->th.ompt_thread_info.state = ompt_state_undefined; diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp index 7d2774a738fb..501830eaa758 100644 --- a/openmp/runtime/src/kmp_taskdeps.cpp +++ b/openmp/runtime/src/kmp_taskdeps.cpp @@ -829,8 +829,10 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, bool ignore = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final; - ignore = ignore && thread->th.th_task_team != NULL && - thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE; + ignore = + ignore && thread->th.th_task_team != NULL && + thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE && + thread->th.th_task_team->tt.tt_hidden_helper_task_encountered == FALSE; ignore = ignore || current_task->td_dephash == NULL; if (ignore) { diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index d956df1b2a37..d6665a7ccfb4 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -3074,6 +3074,18 @@ static inline int __kmp_execute_tasks_template( return FALSE; } + // Check the flag again to see if it has already done in case to be trapped + // into infinite loop when a if0 task depends on a hidden helper task + // outside any parallel region. Detached tasks are not impacted in this case + // because the only thread executing this function has to execute the proxy + // task so it is in another code path that has the same check. + if (flag == NULL || (!final_spin && flag->done_check())) { + KA_TRACE(15, + ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", + gtid)); + return TRUE; + } + // We could be getting tasks from target constructs; if this is the only // thread, keep trying to execute tasks from own queue if (nthreads == 1 && @@ -3478,6 +3490,7 @@ static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread, TCW_4(task_team->tt.tt_found_tasks, FALSE); TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE); + TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE); task_team->tt.tt_nproc = nthreads = team->t.t_nproc; KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, nthreads); @@ -3640,6 +3653,7 @@ void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team, int always) { TCW_4(task_team->tt.tt_nproc, team->t.t_nproc); TCW_4(task_team->tt.tt_found_tasks, FALSE); TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE); + TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE); KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, team->t.t_nproc); TCW_4(task_team->tt.tt_active, TRUE); @@ -3732,8 +3746,10 @@ void __kmp_task_team_wait( "setting active to false, setting local and team's pointer to NULL\n", __kmp_gtid_from_thread(this_thr), task_team)); KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 || - task_team->tt.tt_found_proxy_tasks == TRUE); + task_team->tt.tt_found_proxy_tasks == TRUE || + task_team->tt.tt_hidden_helper_task_encountered == TRUE); TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE); + TCW_SYNC_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE); KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0); TCW_SYNC_4(task_team->tt.tt_active, FALSE); KMP_MB(); From bde561c4813952847112600e5efe72d9015556f7 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Tue, 14 Dec 2021 14:52:02 -0500 Subject: [PATCH 212/992] [compiler-rt][cmake] Factor out extend_install_path function It is likely to become used again, if other projects want their own per-project install directory variables. `install` is removed from the name since it is not inherently about installing. Reviewed By: stephenneuendorffer Differential Revision: https://reviews.llvm.org/D115746 --- cmake/Modules/ExtendPath.cmake | 19 +++++++++++++++++++ compiler-rt/cmake/base-config-ix.cmake | 25 ++++++------------------- 2 files changed, 25 insertions(+), 19 deletions(-) create mode 100644 cmake/Modules/ExtendPath.cmake diff --git a/cmake/Modules/ExtendPath.cmake b/cmake/Modules/ExtendPath.cmake new file mode 100644 index 000000000000..5db393a21e1c --- /dev/null +++ b/cmake/Modules/ExtendPath.cmake @@ -0,0 +1,19 @@ +# Extend the path in `base_path` with the path in `current_segment`, returning +# the result in `joined_path`. If `current_segment` is an absolute path then +# just return it, in effect overriding `base_path`, and issue a warning. +# +# Note that the code returns a relative path (avoiding introducing leading +# slashes) if `base_path` is empty. +function(extend_path joined_path base_path current_segment) + if("${current_segment}" STREQUAL "") + set(temp_path "${base_path}") + elseif("${base_path}" STREQUAL "") + set(temp_path "${current_segment}") + elseif(IS_ABSOLUTE "${current_segment}") + message(WARNING "Since \"${current_segment}\" is absolute, it overrides install path: \"${base_path}\".") + set(temp_path "${current_segment}") + else() + set(temp_path "${base_path}/${current_segment}") + endif() + set(${joined_path} "${temp_path}" PARENT_SCOPE) +endfunction() diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake index 447232e02438..1ada0ab30ba0 100644 --- a/compiler-rt/cmake/base-config-ix.cmake +++ b/compiler-rt/cmake/base-config-ix.cmake @@ -5,6 +5,7 @@ include(CheckIncludeFile) include(CheckCXXSourceCompiles) +include(ExtendPath) check_include_file(unwind.h HAVE_UNWIND_H) @@ -85,20 +86,6 @@ else() set(COMPILER_RT_TEST_COMPILER_ID GNU) endif() -function(extend_install_path joined_path current_segment) - if("${current_segment}" STREQUAL "") - set(temp_path "${COMPILER_RT_INSTALL_PATH}") - elseif("${COMPILER_RT_INSTALL_PATH}" STREQUAL "") - set(temp_path "${current_segment}") - elseif(IS_ABSOLUTE "${current_segment}") - message(WARNING "Since \"${current_segment}\" is absolute, it overrides COMPILER_RT_INSTALL_PATH: \"${COMPILER_RT_INSTALL_PATH}\".") - set(temp_path "${current_segment}") - else() - set(temp_path "${COMPILER_RT_INSTALL_PATH}/${current_segment}") - endif() - set(${joined_path} "${temp_path}" PARENT_SCOPE) -endfunction() - if(NOT DEFINED COMPILER_RT_OS_DIR) if(ANDROID) # The CMAKE_SYSTEM_NAME for Android is Android, but the OS is Linux and the @@ -111,23 +98,23 @@ endif() if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE) set(COMPILER_RT_OUTPUT_LIBRARY_DIR ${COMPILER_RT_OUTPUT_DIR}/lib) - extend_install_path(default_install_path lib) + extend_path(default_install_path "${COMPILER_RT_INSTALL_PATH}" lib) set(COMPILER_RT_INSTALL_LIBRARY_DIR "${default_install_path}" CACHE PATH "Path where built compiler-rt libraries should be installed.") else(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE) set(COMPILER_RT_OUTPUT_LIBRARY_DIR ${COMPILER_RT_OUTPUT_DIR}/lib/${COMPILER_RT_OS_DIR}) - extend_install_path(default_install_path "lib/${COMPILER_RT_OS_DIR}") + extend_path(default_install_path "${COMPILER_RT_INSTALL_PATH}" "lib/${COMPILER_RT_OS_DIR}") set(COMPILER_RT_INSTALL_LIBRARY_DIR "${default_install_path}" CACHE PATH "Path where built compiler-rt libraries should be installed.") endif() -extend_install_path(default_install_path bin) +extend_path(default_install_path "${COMPILER_RT_INSTALL_PATH}" bin) set(COMPILER_RT_INSTALL_BINARY_DIR "${default_install_path}" CACHE PATH "Path where built compiler-rt executables should be installed.") -extend_install_path(default_install_path include) +extend_path(default_install_path "${COMPILER_RT_INSTALL_PATH}" include) set(COMPILER_RT_INSTALL_INCLUDE_DIR "${default_install_path}" CACHE PATH "Path where compiler-rt headers should be installed.") -extend_install_path(default_install_path share) +extend_path(default_install_path "${COMPILER_RT_INSTALL_PATH}" share) set(COMPILER_RT_INSTALL_DATA_DIR "${default_install_path}" CACHE PATH "Path where compiler-rt data files should be installed.") From 5da6d26896d196cfbc992af32f82f8d2faf100c5 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 30 Dec 2021 07:00:50 +0000 Subject: [PATCH 213/992] [cmake] Tweak warning in `extend_path` helper function There was one more reference the word "install" I forgot to remove. Follow-up to bde561c4813952847112600e5efe72d9015556f7 / https://reviews.llvm.org/D115746 --- cmake/Modules/ExtendPath.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Modules/ExtendPath.cmake b/cmake/Modules/ExtendPath.cmake index 5db393a21e1c..0dd6a4d3dab5 100644 --- a/cmake/Modules/ExtendPath.cmake +++ b/cmake/Modules/ExtendPath.cmake @@ -10,7 +10,7 @@ function(extend_path joined_path base_path current_segment) elseif("${base_path}" STREQUAL "") set(temp_path "${current_segment}") elseif(IS_ABSOLUTE "${current_segment}") - message(WARNING "Since \"${current_segment}\" is absolute, it overrides install path: \"${base_path}\".") + message(WARNING "Since \"${current_segment}\" is absolute, it overrides base path: \"${base_path}\".") set(temp_path "${current_segment}") else() set(temp_path "${base_path}/${current_segment}") From ecfd9196d5dde5699d7fe3bd411949a56e01bd8c Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Wed, 29 Dec 2021 11:40:03 +0700 Subject: [PATCH 214/992] [ConstantFolding] Use ICmpInst::Predicate instead of plain integer The function `ConstantFoldCompareInstruction` uses `unsigned short` to represent compare predicate, although all usesrs of the respective include file use definition of CmpInst also. This change replaces predicate argument type in this function to `ICmpInst::Predicate`, which allows to make code a bit clearer and simpler. No functional changes. Differential Revision: https://reviews.llvm.org/D116379 --- llvm/lib/IR/ConstantFold.cpp | 132 ++++++++++++++++++++--------------- llvm/lib/IR/ConstantFold.h | 3 +- llvm/lib/IR/Constants.cpp | 16 ++--- 3 files changed, 86 insertions(+), 65 deletions(-) diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index ae926f95cefe..16b0880ce2f9 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1704,7 +1704,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, return ICmpInst::BAD_ICMP_PREDICATE; } -Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, +Constant *llvm::ConstantFoldCompareInstruction(CmpInst::Predicate Predicate, Constant *C1, Constant *C2) { Type *ResultTy; if (VectorType *VT = dyn_cast(C1->getType())) @@ -1714,10 +1714,10 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, ResultTy = Type::getInt1Ty(C1->getContext()); // Fold FCMP_FALSE/FCMP_TRUE unconditionally. - if (pred == FCmpInst::FCMP_FALSE) + if (Predicate == FCmpInst::FCMP_FALSE) return Constant::getNullValue(ResultTy); - if (pred == FCmpInst::FCMP_TRUE) + if (Predicate == FCmpInst::FCMP_TRUE) return Constant::getAllOnesValue(ResultTy); // Handle some degenerate cases first @@ -1725,7 +1725,6 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, return PoisonValue::get(ResultTy); if (isa(C1) || isa(C2)) { - CmpInst::Predicate Predicate = CmpInst::Predicate(pred); bool isIntegerPredicate = ICmpInst::isIntPredicate(Predicate); // For EQ and NE, we can always pick a value for the undef to make the // predicate pass or fail, so we can return undef. @@ -1750,9 +1749,9 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, if (!isa(GV) && !GV->hasExternalWeakLinkage() && !NullPointerIsDefined(nullptr /* F */, GV->getType()->getAddressSpace())) { - if (pred == ICmpInst::ICMP_EQ) + if (Predicate == ICmpInst::ICMP_EQ) return ConstantInt::getFalse(C1->getContext()); - else if (pred == ICmpInst::ICMP_NE) + else if (Predicate == ICmpInst::ICMP_NE) return ConstantInt::getTrue(C1->getContext()); } // icmp eq/ne(GV,null) -> false/true @@ -1762,9 +1761,9 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, if (!isa(GV) && !GV->hasExternalWeakLinkage() && !NullPointerIsDefined(nullptr /* F */, GV->getType()->getAddressSpace())) { - if (pred == ICmpInst::ICMP_EQ) + if (Predicate == ICmpInst::ICMP_EQ) return ConstantInt::getFalse(C1->getContext()); - else if (pred == ICmpInst::ICMP_NE) + else if (Predicate == ICmpInst::ICMP_NE) return ConstantInt::getTrue(C1->getContext()); } } @@ -1772,16 +1771,16 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, // The caller is expected to commute the operands if the constant expression // is C2. // C1 >= 0 --> true - if (pred == ICmpInst::ICMP_UGE) + if (Predicate == ICmpInst::ICMP_UGE) return Constant::getAllOnesValue(ResultTy); // C1 < 0 --> false - if (pred == ICmpInst::ICMP_ULT) + if (Predicate == ICmpInst::ICMP_ULT) return Constant::getNullValue(ResultTy); } // If the comparison is a comparison between two i1's, simplify it. if (C1->getType()->isIntegerTy(1)) { - switch(pred) { + switch (Predicate) { case ICmpInst::ICMP_EQ: if (isa(C2)) return ConstantExpr::getXor(C1, ConstantExpr::getNot(C2)); @@ -1796,12 +1795,10 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, if (isa(C1) && isa(C2)) { const APInt &V1 = cast(C1)->getValue(); const APInt &V2 = cast(C2)->getValue(); - return ConstantInt::get( - ResultTy, ICmpInst::compare(V1, V2, (ICmpInst::Predicate)pred)); + return ConstantInt::get(ResultTy, ICmpInst::compare(V1, V2, Predicate)); } else if (isa(C1) && isa(C2)) { const APFloat &C1V = cast(C1)->getValueAPF(); const APFloat &C2V = cast(C2)->getValueAPF(); - CmpInst::Predicate Predicate = CmpInst::Predicate(pred); return ConstantInt::get(ResultTy, FCmpInst::compare(C1V, C2V, Predicate)); } else if (auto *C1VTy = dyn_cast(C1->getType())) { @@ -1810,7 +1807,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, if (Constant *C2Splat = C2->getSplatValue()) return ConstantVector::getSplat( C1VTy->getElementCount(), - ConstantExpr::getCompare(pred, C1Splat, C2Splat)); + ConstantExpr::getCompare(Predicate, C1Splat, C2Splat)); // Do not iterate on scalable vector. The number of elements is unknown at // compile-time. @@ -1829,7 +1826,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, Constant *C2E = ConstantExpr::getExtractElement(C2, ConstantInt::get(Ty, I)); - ResElts.push_back(ConstantExpr::getCompare(pred, C1E, C2E)); + ResElts.push_back(ConstantExpr::getCompare(Predicate, C1E, C2E)); } return ConstantVector::get(ResElts); @@ -1854,46 +1851,52 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, case FCmpInst::BAD_FCMP_PREDICATE: break; // Couldn't determine anything about these constants. case FCmpInst::FCMP_OEQ: // We know that C1 == C2 - Result = (pred == FCmpInst::FCMP_UEQ || pred == FCmpInst::FCMP_OEQ || - pred == FCmpInst::FCMP_ULE || pred == FCmpInst::FCMP_OLE || - pred == FCmpInst::FCMP_UGE || pred == FCmpInst::FCMP_OGE); + Result = + (Predicate == FCmpInst::FCMP_UEQ || Predicate == FCmpInst::FCMP_OEQ || + Predicate == FCmpInst::FCMP_ULE || Predicate == FCmpInst::FCMP_OLE || + Predicate == FCmpInst::FCMP_UGE || Predicate == FCmpInst::FCMP_OGE); break; case FCmpInst::FCMP_OLT: // We know that C1 < C2 - Result = (pred == FCmpInst::FCMP_UNE || pred == FCmpInst::FCMP_ONE || - pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT || - pred == FCmpInst::FCMP_ULE || pred == FCmpInst::FCMP_OLE); + Result = + (Predicate == FCmpInst::FCMP_UNE || Predicate == FCmpInst::FCMP_ONE || + Predicate == FCmpInst::FCMP_ULT || Predicate == FCmpInst::FCMP_OLT || + Predicate == FCmpInst::FCMP_ULE || Predicate == FCmpInst::FCMP_OLE); break; case FCmpInst::FCMP_OGT: // We know that C1 > C2 - Result = (pred == FCmpInst::FCMP_UNE || pred == FCmpInst::FCMP_ONE || - pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT || - pred == FCmpInst::FCMP_UGE || pred == FCmpInst::FCMP_OGE); + Result = + (Predicate == FCmpInst::FCMP_UNE || Predicate == FCmpInst::FCMP_ONE || + Predicate == FCmpInst::FCMP_UGT || Predicate == FCmpInst::FCMP_OGT || + Predicate == FCmpInst::FCMP_UGE || Predicate == FCmpInst::FCMP_OGE); break; case FCmpInst::FCMP_OLE: // We know that C1 <= C2 // We can only partially decide this relation. - if (pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT) + if (Predicate == FCmpInst::FCMP_UGT || Predicate == FCmpInst::FCMP_OGT) Result = 0; - else if (pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT) + else if (Predicate == FCmpInst::FCMP_ULT || + Predicate == FCmpInst::FCMP_OLT) Result = 1; break; case FCmpInst::FCMP_OGE: // We known that C1 >= C2 // We can only partially decide this relation. - if (pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT) + if (Predicate == FCmpInst::FCMP_ULT || Predicate == FCmpInst::FCMP_OLT) Result = 0; - else if (pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT) + else if (Predicate == FCmpInst::FCMP_UGT || + Predicate == FCmpInst::FCMP_OGT) Result = 1; break; case FCmpInst::FCMP_ONE: // We know that C1 != C2 // We can only partially decide this relation. - if (pred == FCmpInst::FCMP_OEQ || pred == FCmpInst::FCMP_UEQ) + if (Predicate == FCmpInst::FCMP_OEQ || Predicate == FCmpInst::FCMP_UEQ) Result = 0; - else if (pred == FCmpInst::FCMP_ONE || pred == FCmpInst::FCMP_UNE) + else if (Predicate == FCmpInst::FCMP_ONE || + Predicate == FCmpInst::FCMP_UNE) Result = 1; break; case FCmpInst::FCMP_UEQ: // We know that C1 == C2 || isUnordered(C1, C2). // We can only partially decide this relation. - if (pred == FCmpInst::FCMP_ONE) + if (Predicate == FCmpInst::FCMP_ONE) Result = 0; - else if (pred == FCmpInst::FCMP_UEQ) + else if (Predicate == FCmpInst::FCMP_UEQ) Result = 1; break; } @@ -1905,67 +1908,84 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, } else { // Evaluate the relation between the two constants, per the predicate. int Result = -1; // -1 = unknown, 0 = known false, 1 = known true. - switch (evaluateICmpRelation(C1, C2, - CmpInst::isSigned((CmpInst::Predicate)pred))) { + switch (evaluateICmpRelation(C1, C2, CmpInst::isSigned(Predicate))) { default: llvm_unreachable("Unknown relational!"); case ICmpInst::BAD_ICMP_PREDICATE: break; // Couldn't determine anything about these constants. case ICmpInst::ICMP_EQ: // We know the constants are equal! // If we know the constants are equal, we can decide the result of this // computation precisely. - Result = ICmpInst::isTrueWhenEqual((ICmpInst::Predicate)pred); + Result = ICmpInst::isTrueWhenEqual(Predicate); break; case ICmpInst::ICMP_ULT: - switch (pred) { + switch (Predicate) { case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_ULE: Result = 1; break; case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_UGE: Result = 0; break; + default: + break; } break; case ICmpInst::ICMP_SLT: - switch (pred) { + switch (Predicate) { case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_SLE: Result = 1; break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_SGE: Result = 0; break; + default: + break; } break; case ICmpInst::ICMP_UGT: - switch (pred) { + switch (Predicate) { case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGE: Result = 1; break; case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE: Result = 0; break; + default: + break; } break; case ICmpInst::ICMP_SGT: - switch (pred) { + switch (Predicate) { case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_SGE: Result = 1; break; case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_SLE: Result = 0; break; + default: + break; } break; case ICmpInst::ICMP_ULE: - if (pred == ICmpInst::ICMP_UGT) Result = 0; - if (pred == ICmpInst::ICMP_ULT || pred == ICmpInst::ICMP_ULE) Result = 1; + if (Predicate == ICmpInst::ICMP_UGT) + Result = 0; + if (Predicate == ICmpInst::ICMP_ULT || Predicate == ICmpInst::ICMP_ULE) + Result = 1; break; case ICmpInst::ICMP_SLE: - if (pred == ICmpInst::ICMP_SGT) Result = 0; - if (pred == ICmpInst::ICMP_SLT || pred == ICmpInst::ICMP_SLE) Result = 1; + if (Predicate == ICmpInst::ICMP_SGT) + Result = 0; + if (Predicate == ICmpInst::ICMP_SLT || Predicate == ICmpInst::ICMP_SLE) + Result = 1; break; case ICmpInst::ICMP_UGE: - if (pred == ICmpInst::ICMP_ULT) Result = 0; - if (pred == ICmpInst::ICMP_UGT || pred == ICmpInst::ICMP_UGE) Result = 1; + if (Predicate == ICmpInst::ICMP_ULT) + Result = 0; + if (Predicate == ICmpInst::ICMP_UGT || Predicate == ICmpInst::ICMP_UGE) + Result = 1; break; case ICmpInst::ICMP_SGE: - if (pred == ICmpInst::ICMP_SLT) Result = 0; - if (pred == ICmpInst::ICMP_SGT || pred == ICmpInst::ICMP_SGE) Result = 1; + if (Predicate == ICmpInst::ICMP_SLT) + Result = 0; + if (Predicate == ICmpInst::ICMP_SGT || Predicate == ICmpInst::ICMP_SGE) + Result = 1; break; case ICmpInst::ICMP_NE: - if (pred == ICmpInst::ICMP_EQ) Result = 0; - if (pred == ICmpInst::ICMP_NE) Result = 1; + if (Predicate == ICmpInst::ICMP_EQ) + Result = 0; + if (Predicate == ICmpInst::ICMP_NE) + Result = 1; break; } @@ -1983,16 +2003,16 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, CE2->getType()->isVectorTy() == CE2Op0->getType()->isVectorTy() && !CE2Op0->getType()->isFPOrFPVectorTy()) { Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType()); - return ConstantExpr::getICmp(pred, Inverse, CE2Op0); + return ConstantExpr::getICmp(Predicate, Inverse, CE2Op0); } } // If the left hand side is an extension, try eliminating it. if (ConstantExpr *CE1 = dyn_cast(C1)) { if ((CE1->getOpcode() == Instruction::SExt && - ICmpInst::isSigned((ICmpInst::Predicate)pred)) || + ICmpInst::isSigned(Predicate)) || (CE1->getOpcode() == Instruction::ZExt && - !ICmpInst::isSigned((ICmpInst::Predicate)pred))){ + !ICmpInst::isSigned(Predicate))) { Constant *CE1Op0 = CE1->getOperand(0); Constant *CE1Inverse = ConstantExpr::getTrunc(CE1, CE1Op0->getType()); if (CE1Inverse == CE1Op0) { @@ -2000,7 +2020,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, Constant *C2Inverse = ConstantExpr::getTrunc(C2, CE1Op0->getType()); if (ConstantExpr::getCast(CE1->getOpcode(), C2Inverse, C2->getType()) == C2) - return ConstantExpr::getICmp(pred, CE1Inverse, C2Inverse); + return ConstantExpr::getICmp(Predicate, CE1Inverse, C2Inverse); } } } @@ -2010,8 +2030,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, // If C2 is a constant expr and C1 isn't, flip them around and fold the // other way if possible. // Also, if C1 is null and C2 isn't, flip them around. - pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred); - return ConstantExpr::getICmp(pred, C2, C1); + Predicate = ICmpInst::getSwappedPredicate(Predicate); + return ConstantExpr::getICmp(Predicate, C2, C1); } } return nullptr; diff --git a/llvm/lib/IR/ConstantFold.h b/llvm/lib/IR/ConstantFold.h index 0cdd5cf3cbce..1aa44f4d21e5 100644 --- a/llvm/lib/IR/ConstantFold.h +++ b/llvm/lib/IR/ConstantFold.h @@ -19,6 +19,7 @@ #define LLVM_LIB_IR_CONSTANTFOLD_H #include "llvm/ADT/Optional.h" +#include "llvm/IR/InstrTypes.h" namespace llvm { template class ArrayRef; @@ -46,7 +47,7 @@ template class ArrayRef; Constant *ConstantFoldUnaryInstruction(unsigned Opcode, Constant *V); Constant *ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2); - Constant *ConstantFoldCompareInstruction(unsigned short predicate, + Constant *ConstantFoldCompareInstruction(CmpInst::Predicate Predicate, Constant *C1, Constant *C2); Constant *ConstantFoldGetElementPtr(Type *Ty, Constant *C, bool InBounds, Optional InRangeIndex, diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index 837be910f6d8..e753fc7a3871 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -2546,11 +2546,11 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C, Constant *ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced) { + auto Predicate = static_cast(pred); assert(LHS->getType() == RHS->getType()); - assert(CmpInst::isIntPredicate((CmpInst::Predicate)pred) && - "Invalid ICmp Predicate"); + assert(CmpInst::isIntPredicate(Predicate) && "Invalid ICmp Predicate"); - if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS)) + if (Constant *FC = ConstantFoldCompareInstruction(Predicate, LHS, RHS)) return FC; // Fold a few common cases... if (OnlyIfReduced) @@ -2559,7 +2559,7 @@ Constant *ConstantExpr::getICmp(unsigned short pred, Constant *LHS, // Look up the constant in the table first to ensure uniqueness Constant *ArgVec[] = { LHS, RHS }; // Get the key type with both the opcode and predicate - const ConstantExprKeyType Key(Instruction::ICmp, ArgVec, pred); + const ConstantExprKeyType Key(Instruction::ICmp, ArgVec, Predicate); Type *ResultTy = Type::getInt1Ty(LHS->getContext()); if (VectorType *VT = dyn_cast(LHS->getType())) @@ -2571,11 +2571,11 @@ Constant *ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced) { + auto Predicate = static_cast(pred); assert(LHS->getType() == RHS->getType()); - assert(CmpInst::isFPPredicate((CmpInst::Predicate)pred) && - "Invalid FCmp Predicate"); + assert(CmpInst::isFPPredicate(Predicate) && "Invalid FCmp Predicate"); - if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS)) + if (Constant *FC = ConstantFoldCompareInstruction(Predicate, LHS, RHS)) return FC; // Fold a few common cases... if (OnlyIfReduced) @@ -2584,7 +2584,7 @@ Constant *ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, // Look up the constant in the table first to ensure uniqueness Constant *ArgVec[] = { LHS, RHS }; // Get the key type with both the opcode and predicate - const ConstantExprKeyType Key(Instruction::FCmp, ArgVec, pred); + const ConstantExprKeyType Key(Instruction::FCmp, ArgVec, Predicate); Type *ResultTy = Type::getInt1Ty(LHS->getContext()); if (VectorType *VT = dyn_cast(LHS->getType())) From c6bf71363a2ec3155c257b8f132c47a3173fbe38 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 30 Dec 2021 00:16:03 -0800 Subject: [PATCH 215/992] [ELFAsmParser] Optimize hasPrefix with StringRef::consume_front --- llvm/lib/MC/MCParser/ELFAsmParser.cpp | 32 +++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index e95019c12db7..e814cf003656 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -499,7 +499,8 @@ bool ELFAsmParser::maybeParseUniqueID(int64_t &UniqueID) { } static bool hasPrefix(StringRef SectionName, StringRef Prefix) { - return SectionName.startswith(Prefix) || SectionName == Prefix.drop_back(); + return SectionName.consume_front(Prefix) && + (SectionName.empty() || SectionName[0] == '.'); } static bool allowSectionTypeMismatch(const Triple &TT, StringRef SectionName, @@ -514,7 +515,7 @@ static bool allowSectionTypeMismatch(const Triple &TT, StringRef SectionName, // MIPS .debug_* sections should have SHT_MIPS_DWARF section type to // distinguish among sections contain DWARF and ECOFF debug formats, // but in assembly files these sections have SHT_PROGBITS type. - return hasPrefix(SectionName, ".debug_") && Type == ELF::SHT_PROGBITS; + return SectionName.startswith(".debug_") && Type == ELF::SHT_PROGBITS; } return false; } @@ -537,19 +538,18 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) { int64_t UniqueID = ~0; // Set the defaults first. - if (hasPrefix(SectionName, ".rodata.") || SectionName == ".rodata1") + if (hasPrefix(SectionName, ".rodata") || SectionName == ".rodata1") Flags |= ELF::SHF_ALLOC; else if (SectionName == ".fini" || SectionName == ".init" || - hasPrefix(SectionName, ".text.")) + hasPrefix(SectionName, ".text")) Flags |= ELF::SHF_ALLOC | ELF::SHF_EXECINSTR; - else if (hasPrefix(SectionName, ".data.") || SectionName == ".data1" || - hasPrefix(SectionName, ".bss.") || - hasPrefix(SectionName, ".init_array.") || - hasPrefix(SectionName, ".fini_array.") || - hasPrefix(SectionName, ".preinit_array.")) + else if (hasPrefix(SectionName, ".data") || SectionName == ".data1" || + hasPrefix(SectionName, ".bss") || + hasPrefix(SectionName, ".init_array") || + hasPrefix(SectionName, ".fini_array") || + hasPrefix(SectionName, ".preinit_array")) Flags |= ELF::SHF_ALLOC | ELF::SHF_WRITE; - else if (hasPrefix(SectionName, ".tdata.") || - hasPrefix(SectionName, ".tbss.")) + else if (hasPrefix(SectionName, ".tdata") || hasPrefix(SectionName, ".tbss")) Flags |= ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_TLS; if (getLexer().is(AsmToken::Comma)) { @@ -620,15 +620,15 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) { if (TypeName.empty()) { if (SectionName.startswith(".note")) Type = ELF::SHT_NOTE; - else if (hasPrefix(SectionName, ".init_array.")) + else if (hasPrefix(SectionName, ".init_array")) Type = ELF::SHT_INIT_ARRAY; - else if (hasPrefix(SectionName, ".bss.")) + else if (hasPrefix(SectionName, ".bss")) Type = ELF::SHT_NOBITS; - else if (hasPrefix(SectionName, ".tbss.")) + else if (hasPrefix(SectionName, ".tbss")) Type = ELF::SHT_NOBITS; - else if (hasPrefix(SectionName, ".fini_array.")) + else if (hasPrefix(SectionName, ".fini_array")) Type = ELF::SHT_FINI_ARRAY; - else if (hasPrefix(SectionName, ".preinit_array.")) + else if (hasPrefix(SectionName, ".preinit_array")) Type = ELF::SHT_PREINIT_ARRAY; } else { if (TypeName == "init_array") From 9ebeac8613c6ddd521cfd9a5031b4aa40470c177 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 30 Dec 2021 00:30:47 -0800 Subject: [PATCH 216/992] [MC][test] Improve section_names.s Add missing coverage like .tdata/.data1/.rodata1 --- llvm/test/MC/AsmParser/section_names.s | 124 +++++++++++-------------- 1 file changed, 54 insertions(+), 70 deletions(-) diff --git a/llvm/test/MC/AsmParser/section_names.s b/llvm/test/MC/AsmParser/section_names.s index 97cc8fc00726..2e8d1f957b9a 100644 --- a/llvm/test/MC/AsmParser/section_names.s +++ b/llvm/test/MC/AsmParser/section_names.s @@ -1,82 +1,66 @@ # RUN: llvm-mc -triple i386-pc-linux-gnu -filetype=obj -o %t %s -# RUN: llvm-readobj -S - < %t | FileCheck %s -.section .nobits -.byte 1 -.section .nobits2 -.byte 1 -.section .nobitsfoo -.byte 1 +# RUN: llvm-readelf -S %t | FileCheck %s + +# CHECK: Name Type {{.*}} Flg Lk Inf Al +# CHECK: .note NOTE {{.*}} 0 0 1 +# CHECK-NEXT: .note2 NOTE {{.*}} 0 0 1 +# CHECK-NEXT: .notefoo NOTE {{.*}} 0 0 1 +# CHECK-NEXT: .rodata.foo PROGBITS {{.*}} A 0 0 1 +# CHECK-NEXT: .rodatafoo PROGBITS {{.*}} 0 0 1 +# CHECK-NEXT: .rodata1 PROGBITS {{.*}} A 0 0 1 +# CHECK-NEXT: .tdata.foo PROGBITS {{.*}} WAT 0 0 1 +# CHECK-NEXT: .tbss NOBITS {{.*}} WAT 0 0 1 +# CHECK-NEXT: .tbss.foo NOBITS {{.*}} WAT 0 0 1 +# CHECK-NEXT: .init_array INIT_ARRAY {{.*}} WA 0 0 1 +# CHECK-NEXT: .init_array.42 INIT_ARRAY {{.*}} WA 0 0 1 +# CHECK-NEXT: .init_array2 PROGBITS {{.*}} 0 0 1 +# CHECK-NEXT: .init_arrayfoo PROGBITS {{.*}} 0 0 1 +# CHECK-NEXT: .fini_array FINI_ARRAY {{.*}} WA 0 0 1 +# CHECK-NEXT: .fini_array2 PROGBITS {{.*}} 0 0 1 +# CHECK-NEXT: .fini_arrayfoo PROGBITS {{.*}} 0 0 1 +# CHECK-NEXT: .preinit_array PREINIT_ARRAY {{.*}} WA 0 0 1 +# CHECK-NEXT: .preinit_array2 PROGBITS {{.*}} 0 0 1 +# CHECK-NEXT: .preinit_array.x PREINIT_ARRAY {{.*}} WA 0 0 1 +# CHECK-NEXT: .data.foo PROGBITS {{.*}} WA 0 0 1 +# CHECK-NEXT: .data1 PROGBITS {{.*}} WA 0 0 1 +# CHECK-NEXT: .data2 PROGBITS {{.*}} 0 0 1 +# CHECK-NEXT: .bss NOBITS {{.*}} WA 0 0 1 +# CHECK-NEXT: .bss.foo NOBITS {{.*}} WA 0 0 1 +# CHECK-NEXT: .nobits PROGBITS {{.*}} 0 0 1 +# CHECK-NEXT: .nobits2 PROGBITS {{.*}} 0 0 1 +# CHECK-NEXT: .nobitsfoo PROGBITS {{.*}} 0 0 1 + + +.section .note +.section .note2 +.section .notefoo + +.section .rodata.foo +.section .rodatafoo +.section .rodata1 + +.section .tdata.foo +.section .tbss +.section .tbss.foo + .section .init_array -.byte 1 .section .init_array.42 -.byte 1 .section .init_array2 -.byte 1 .section .init_arrayfoo -.byte 1 .section .fini_array -.byte 1 .section .fini_array2 -.byte 1 .section .fini_arrayfoo -.byte 1 .section .preinit_array -.byte 1 .section .preinit_array2 -.byte 1 -.section .preinit_arrayfoo -.byte 1 -.section .note -.byte 1 -.section .note2 -.byte 1 -.section .notefoo -.byte 1 +.section .preinit_array.x + +.section .data.foo +.section .data1 +.section .data2 .section .bss -.space 1 .section .bss.foo -.space 1 -.section .tbss -.space 1 -.section .tbss.foo -.space 1 -# CHECK: Name: .nobits -# CHECK-NEXT: Type: SHT_PROGBITS -# CHECK: Name: .nobits2 -# CHECK-NEXT: Type: SHT_PROGBITS -# CHECK: Name: .nobitsfoo -# CHECK-NEXT: Type: SHT_PROGBITS -# CHECK: Name: .init_array -# CHECK-NEXT: Type: SHT_INIT_ARRAY -# CHECK: Name: .init_array.42 -# CHECK-NEXT: Type: SHT_INIT_ARRAY -# CHECK: Name: .init_array2 -# CHECK-NEXT: Type: SHT_PROGBITS -# CHECK: Name: .init_arrayfoo -# CHECK-NEXT: Type: SHT_PROGBITS -# CHECK: Name: .fini_array -# CHECK-NEXT: Type: SHT_FINI_ARRAY -# CHECK: Name: .fini_array2 -# CHECK-NEXT: Type: SHT_PROGBITS -# CHECK: Name: .fini_arrayfoo -# CHECK-NEXT: Type: SHT_PROGBITS -# CHECK: Name: .preinit_array -# CHECK-NEXT: Type: SHT_PREINIT_ARRAY -# CHECK: Name: .preinit_array2 -# CHECK-NEXT: Type: SHT_PROGBITS -# CHECK: Name: .preinit_arrayfoo -# CHECK-NEXT: Type: SHT_PROGBITS -# CHECK: Name: .note -# CHECK-NEXT: Type: SHT_NOTE -# CHECK: Name: .note2 -# CHECK-NEXT: Type: SHT_NOTE -# CHECK: Name: .notefoo -# CHECK-NEXT: Type: SHT_NOTE -# CHECK: Name: .bss -# CHECK-NEXT: Type: SHT_NOBITS -# CHECK: Name: .bss.foo -# CHECK-NEXT: Type: SHT_NOBITS -# CHECK: Name: .tbss -# CHECK-NEXT: Type: SHT_NOBITS -# CHECK: Name: .tbss.foo -# CHECK-NEXT: Type: SHT_NOBITS + +.section .nobits +.section .nobits2 +.section .nobitsfoo +.byte 1 From 43ff781c783d32b5abf62509593d388c540b7ad8 Mon Sep 17 00:00:00 2001 From: jacquesguan Date: Mon, 27 Dec 2021 21:03:45 +0800 Subject: [PATCH 217/992] [RISCV] Pre-commit test for Teach VSETVLInsert to eliminate redundant vsetvli for vmv.s.x and vfmv.s.f. Differential Revision: https://reviews.llvm.org/D116306 --- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll index 44c50a7ff607..beab37712fd5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -3,6 +3,7 @@ ; RUN: -verify-machineinstrs -O2 < %s | FileCheck %s declare i64 @llvm.riscv.vsetvli(i64, i64, i64) +declare i64 @llvm.riscv.vsetvlimax(i64, i64) declare @llvm.riscv.vfadd.nxv1f64.nxv1f64( , , @@ -143,6 +144,67 @@ for.body: ; preds = %entry, %for.body br i1 %cmp.not, label %for.cond.cleanup, label %for.body } +define @test7( %a, i64 %b, %mask) nounwind { +; CHECK-LABEL: test7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, mu +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: ret +entry: + %x = tail call i64 @llvm.riscv.vsetvlimax(i64 3, i64 0) + %y = call @llvm.riscv.vmv.s.x.nxv1i64( + %a, + i64 %b, i64 1) + + ret %y +} + +define @test8( %a, i64 %b, %mask) nounwind { +; CHECK-LABEL: test8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli a1, 6, e64, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: ret +entry: + %x = tail call i64 @llvm.riscv.vsetvli(i64 6, i64 3, i64 0) + %y = call @llvm.riscv.vmv.s.x.nxv1i64( %a, i64 %b, i64 2) + ret %y +} + +define @test9( %a, i64 %b, %mask) nounwind { +; CHECK-LABEL: test9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 9, e64, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: ret +entry: + %x = call @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( + %a, + %a, + %a, + %mask, + i64 9, + i64 0) + %y = call @llvm.riscv.vmv.s.x.nxv1i64( %x, i64 %b, i64 2) + ret %y +} + +declare @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( + , + , + , + , + i64, + i64); + +declare @llvm.riscv.vmv.s.x.nxv1i64( + , + i64, + i64); declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) declare @llvm.riscv.vle.nxv2i32.i64(* nocapture, i64) declare @llvm.riscv.vmslt.nxv2i32.i32.i64(, i32, i64) From b69fe48ccf9ec19f6237ee2e9d16fc6a7071c17c Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 30 Dec 2021 01:12:55 -0800 Subject: [PATCH 218/992] [IROutliner] Move global namespace cl::opt inside llvm:: --- llvm/lib/Analysis/IRSimilarityIdentifier.cpp | 2 ++ llvm/lib/Transforms/IPO/IROutliner.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp index 2ec6cbeabda2..ca1a2907e51c 100644 --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -23,11 +23,13 @@ using namespace llvm; using namespace IRSimilarity; +namespace llvm { cl::opt DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes.")); +} // namespace llvm IRInstructionData::IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDList) diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index b8a314c54f18..928b12013a54 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -36,7 +36,9 @@ using namespace IRSimilarity; // A command flag to be used for debugging to exclude branches from similarity // matching and outlining. +namespace llvm { extern cl::opt DisableBranches; +} // namespace llvm // Set to true if the user wants the ir outliner to run on linkonceodr linkage // functions. This is false by default because the linker can dedupe linkonceodr From 550d90e692af6344bd32db7f228f3264a92620b8 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Thu, 30 Dec 2021 09:13:04 +0000 Subject: [PATCH 219/992] Revert "[AArch64] Add a tablegen pattern for UZP2." This reverts commit ada028c32f47ca84a0b7be5d1ab4e3c943f859a3. A performance regression was reported that we need to investigate: https://github.com/llvm/llvm-project/issues/52919 --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 13 ----- .../CodeGen/AArch64/arm64-uzp2-combine.ll | 47 ------------------- 2 files changed, 60 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/arm64-uzp2-combine.ll diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index ebccc07edc7a..efdc8e6f1be8 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5342,19 +5342,6 @@ def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))), (v2i32 (trunc (v2i64 V128:$Vm))))), (UZP1v4i32 V128:$Vn, V128:$Vm)>; -def : Pat<(v16i8 (concat_vectors - (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))), - (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))), - (UZP2v16i8 V128:$Vn, V128:$Vm)>; -def : Pat<(v8i16 (concat_vectors - (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))), - (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))), - (UZP2v8i16 V128:$Vn, V128:$Vm)>; -def : Pat<(v4i32 (concat_vectors - (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))), - (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))), - (UZP2v4i32 V128:$Vn, V128:$Vm)>; - //---------------------------------------------------------------------------- // AdvSIMD TBL/TBX instructions //---------------------------------------------------------------------------- diff --git a/llvm/test/CodeGen/AArch64/arm64-uzp2-combine.ll b/llvm/test/CodeGen/AArch64/arm64-uzp2-combine.ll deleted file mode 100644 index e3c52e690d50..000000000000 --- a/llvm/test/CodeGen/AArch64/arm64-uzp2-combine.ll +++ /dev/null @@ -1,47 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s - -; Test the (concat_vectors (trunc (lshr)), (trunc (lshr))) pattern. - -define <16 x i8> @test_combine_v8i16_to_v16i8(<8 x i16> %x, <8 x i16> %y) { -; CHECK-LABEL: test_combine_v8i16_to_v16i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uzp2 v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ret -entry: - %lshr1 = lshr <8 x i16> %x, - %trunc1 = trunc <8 x i16> %lshr1 to <8 x i8> - %lshr2 = lshr <8 x i16> %y, - %trunc2 = trunc <8 x i16> %lshr2 to <8 x i8> - %shuffle = shufflevector <8 x i8> %trunc1, <8 x i8> %trunc2, <16 x i32> - ret <16 x i8> %shuffle -} - -define <8 x i16> @test_combine_v4i32_to_v8i16(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: test_combine_v4i32_to_v8i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h -; CHECK-NEXT: ret -entry: - %lshr1 = lshr <4 x i32> %x, - %trunc1 = trunc <4 x i32> %lshr1 to <4 x i16> - %lshr2 = lshr <4 x i32> %y, - %trunc2 = trunc <4 x i32> %lshr2 to <4 x i16> - %shuffle = shufflevector <4 x i16> %trunc1, <4 x i16> %trunc2, <8 x i32> - ret <8 x i16> %shuffle -} - -define <4 x i32> @test_combine_v2i64_to_v4i32(<2 x i64> %x, <2 x i64> %y) { -; CHECK-LABEL: test_combine_v2i64_to_v4i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ret -entry: - %lshr1 = lshr <2 x i64> %x, - %trunc1 = trunc <2 x i64> %lshr1 to <2 x i32> - %lshr2 = lshr <2 x i64> %y, - %trunc2 = trunc <2 x i64> %lshr2 to <2 x i32> - %shuffle = shufflevector <2 x i32> %trunc1, <2 x i32> %trunc2, <4 x i32> - ret <4 x i32> %shuffle -} - From 128c6ed73b8f906a13ae908008c6f415415964bb Mon Sep 17 00:00:00 2001 From: jacquesguan Date: Mon, 27 Dec 2021 21:13:24 +0800 Subject: [PATCH 220/992] [RISCV] Teach VSETVLInsert to eliminate redundant vsetvli for vmv.s.x and vfmv.s.f. Differential Revision: https://reviews.llvm.org/D116307 --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 92 ++++++++++++++++++- .../RISCV/rvv/common-shuffle-patterns.ll | 4 +- .../RISCV/rvv/fixed-vectors-int-buildvec.ll | 11 +-- .../RISCV/rvv/fixed-vectors-int-shuffles.ll | 22 +---- .../CodeGen/RISCV/rvv/fixed-vectors-int.ll | 24 +---- .../RISCV/rvv/fixed-vectors-unaligned.ll | 3 +- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll | 7 +- 7 files changed, 107 insertions(+), 56 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index dbfc90f36f80..eab9ee916fd2 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -59,12 +59,13 @@ class VSETVLIInfo { uint8_t MaskAgnostic : 1; uint8_t MaskRegOp : 1; uint8_t StoreOp : 1; + uint8_t ScalarMovOp : 1; uint8_t SEWLMULRatioOnly : 1; public: VSETVLIInfo() : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), MaskRegOp(false), - StoreOp(false), SEWLMULRatioOnly(false) {} + StoreOp(false), ScalarMovOp(false), SEWLMULRatioOnly(false) {} static VSETVLIInfo getUnknown() { VSETVLIInfo Info; @@ -96,6 +97,18 @@ class VSETVLIInfo { assert(hasAVLImm()); return AVLImm; } + bool hasZeroAVL() const { + if (hasAVLImm()) + return getAVLImm() == 0; + return false; + } + bool hasNonZeroAVL() const { + if (hasAVLImm()) + return getAVLImm() > 0; + if (hasAVLReg()) + return getAVLReg() == RISCV::X0; + return false; + } bool hasSameAVL(const VSETVLIInfo &Other) const { assert(isValid() && Other.isValid() && @@ -120,7 +133,7 @@ class VSETVLIInfo { MaskAgnostic = RISCVVType::isMaskAgnostic(VType); } void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA, bool MRO, - bool IsStore) { + bool IsStore, bool IsScalarMovOp) { assert(isValid() && !isUnknown() && "Can't set VTYPE for uninitialized or unknown"); VLMul = L; @@ -129,6 +142,7 @@ class VSETVLIInfo { MaskAgnostic = MA; MaskRegOp = MRO; StoreOp = IsStore; + ScalarMovOp = IsScalarMovOp; } unsigned encodeVTYPE() const { @@ -139,6 +153,16 @@ class VSETVLIInfo { bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } + bool hasSameSEW(const VSETVLIInfo &Other) const { + assert(isValid() && Other.isValid() && + "Can't compare invalid VSETVLIInfos"); + assert(!isUnknown() && !Other.isUnknown() && + "Can't compare VTYPE in unknown state"); + assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && + "Can't compare when only LMUL/SEW ratio is valid."); + return SEW == Other.SEW; + } + bool hasSameVTYPE(const VSETVLIInfo &Other) const { assert(isValid() && Other.isValid() && "Can't compare invalid VSETVLIInfos"); @@ -178,6 +202,15 @@ class VSETVLIInfo { return getSEWLMULRatio() == Other.getSEWLMULRatio(); } + bool hasSamePolicy(const VSETVLIInfo &Other) const { + assert(isValid() && Other.isValid() && + "Can't compare invalid VSETVLIInfos"); + assert(!isUnknown() && !Other.isUnknown() && + "Can't compare VTYPE in unknown state"); + return TailAgnostic == Other.TailAgnostic && + MaskAgnostic == Other.MaskAgnostic; + } + bool hasCompatibleVTYPE(const VSETVLIInfo &InstrInfo, bool Strict) const { // Simple case, see if full VTYPE matches. if (hasSameVTYPE(InstrInfo)) @@ -222,6 +255,15 @@ class VSETVLIInfo { return true; } + // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0. + // So it's compatible when we could make sure that both VL be the same + // situation. + if (!Strict && InstrInfo.ScalarMovOp && InstrInfo.hasAVLImm() && + ((hasNonZeroAVL() && InstrInfo.hasNonZeroAVL()) || + (hasZeroAVL() && InstrInfo.hasZeroAVL())) && + hasSameSEW(InstrInfo) && hasSamePolicy(InstrInfo)) + return true; + // The AVL must match. if (!hasSameAVL(InstrInfo)) return false; @@ -414,6 +456,42 @@ static MachineInstr *elideCopies(MachineInstr *MI, } } +static bool isScalarMoveInstr(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + case RISCV::PseudoVMV_S_X_M1: + case RISCV::PseudoVMV_S_X_M2: + case RISCV::PseudoVMV_S_X_M4: + case RISCV::PseudoVMV_S_X_M8: + case RISCV::PseudoVMV_S_X_MF2: + case RISCV::PseudoVMV_S_X_MF4: + case RISCV::PseudoVMV_S_X_MF8: + case RISCV::PseudoVFMV_F16_S_M1: + case RISCV::PseudoVFMV_F16_S_M2: + case RISCV::PseudoVFMV_F16_S_M4: + case RISCV::PseudoVFMV_F16_S_M8: + case RISCV::PseudoVFMV_F16_S_MF2: + case RISCV::PseudoVFMV_F16_S_MF4: + case RISCV::PseudoVFMV_F16_S_MF8: + case RISCV::PseudoVFMV_F32_S_M1: + case RISCV::PseudoVFMV_F32_S_M2: + case RISCV::PseudoVFMV_F32_S_M4: + case RISCV::PseudoVFMV_F32_S_M8: + case RISCV::PseudoVFMV_F32_S_MF2: + case RISCV::PseudoVFMV_F32_S_MF4: + case RISCV::PseudoVFMV_F32_S_MF8: + case RISCV::PseudoVFMV_F64_S_M1: + case RISCV::PseudoVFMV_F64_S_M2: + case RISCV::PseudoVFMV_F64_S_M4: + case RISCV::PseudoVFMV_F64_S_M8: + case RISCV::PseudoVFMV_F64_S_MF2: + case RISCV::PseudoVFMV_F64_S_MF4: + case RISCV::PseudoVFMV_F64_S_MF8: + return true; + } +} + static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, const MachineRegisterInfo *MRI) { VSETVLIInfo InstrInfo; @@ -461,6 +539,7 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, // If there are no explicit defs, this is a store instruction which can // ignore the tail and mask policies. bool StoreOp = MI.getNumExplicitDefs() == 0; + bool ScalarMovOp = isScalarMoveInstr(MI); if (RISCVII::hasVLOp(TSFlags)) { const MachineOperand &VLOp = MI.getOperand(NumOperands - 2); @@ -477,7 +556,7 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, } else InstrInfo.setAVLReg(RISCV::NoRegister); InstrInfo.setVTYPE(VLMul, SEW, /*TailAgnostic*/ TailAgnostic, - /*MaskAgnostic*/ false, MaskRegOp, StoreOp); + /*MaskAgnostic*/ false, MaskRegOp, StoreOp, ScalarMovOp); return InstrInfo; } @@ -1000,6 +1079,13 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); NeedInsertVSETVLI = false; } + if (isScalarMoveInstr(MI) && + ((CurInfo.hasNonZeroAVL() && NewInfo.hasNonZeroAVL()) || + (CurInfo.hasZeroAVL() && NewInfo.hasZeroAVL())) && + NewInfo.hasSameVLMAX(CurInfo)) { + PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); + NeedInsertVSETVLI = false; + } } if (NeedInsertVSETVLI) insertVSETVLI(MBB, MI, NewInfo, CurInfo); diff --git a/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll b/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll index a18398c1e241..8aa548ec090d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll +++ b/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll @@ -27,12 +27,10 @@ define dso_local <16 x i16> @interleave(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-NEXT: vsetivli zero, 16, e16, m2, tu, mu ; CHECK-NEXT: vslideup.vi v12, v8, 8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vrgather.vv v8, v20, v16 ; CHECK-NEXT: lui a0, 11 ; CHECK-NEXT: addiw a0, a0, -1366 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vrgather.vv v8, v20, v16 ; CHECK-NEXT: vrgather.vv v8, v12, v18, v0.t ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index 10ccb1d85d11..568a393f4bb4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -513,13 +513,12 @@ define void @buildvec_seq_v9i8(<9 x i8>* %x) { ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV32-NEXT: vmv.v.i v8, 2 -; RV32-NEXT: vmerge.vim v8, v8, 1, v0 +; RV32-NEXT: vmv.v.i v9, 2 ; RV32-NEXT: li a1, 36 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV32-NEXT: vmerge.vim v8, v8, 3, v0 +; RV32-NEXT: vmv.s.x v8, a1 +; RV32-NEXT: vmerge.vim v9, v9, 1, v0 +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vmerge.vim v8, v9, 3, v0 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 378239a2d745..67e70969c42a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -319,9 +319,7 @@ define <4 x i8> @interleave_shuffles(<4 x i8> %x) { ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vrgather.vi v9, v8, 1 ; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vsrl.vi v10, v8, 1 ; CHECK-NEXT: vmv.v.x v8, a0 @@ -401,11 +399,9 @@ define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, mu ; CHECK-NEXT: vmv.s.x v11, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 66 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -439,11 +435,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) { ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV32-NEXT: vmv.v.x v11, a0 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV32-NEXT: vrgather.vv v10, v8, v11 ; RV32-NEXT: li a0, 66 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-NEXT: vrgather.vv v10, v8, v11 ; RV32-NEXT: vrgather.vi v10, v9, 0, v0.t ; RV32-NEXT: vmv1r.v v8, v10 ; RV32-NEXT: ret @@ -455,11 +449,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) { ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV64-NEXT: vmv.v.x v11, a0 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV64-NEXT: vrgather.vv v10, v8, v11 ; RV64-NEXT: li a0, 66 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-NEXT: vrgather.vv v10, v8, v11 ; RV64-NEXT: vrgather.vi v10, v9, 0, v0.t ; RV64-NEXT: vmv1r.v v8, v10 ; RV64-NEXT: ret @@ -502,11 +494,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) { ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV32-NEXT: vmv.v.x v12, a0 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV32-NEXT: vrgather.vv v10, v8, v12 ; RV32-NEXT: li a0, 98 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-NEXT: vrgather.vv v10, v8, v12 ; RV32-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV32-NEXT: vmv1r.v v8, v10 ; RV32-NEXT: ret @@ -524,11 +514,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) { ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV64-NEXT: vrgather.vv v10, v8, v12 ; RV64-NEXT: li a0, 98 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-NEXT: vrgather.vv v10, v8, v12 ; RV64-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-NEXT: vmv1r.v v8, v10 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index cf068032e9e4..e26a232d8a94 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -4114,22 +4114,16 @@ define void @mulhu_v16i16(<16 x i16>* %x) { ; LMULMAX2-RV32-NEXT: vle16.v v10, (a0) ; LMULMAX2-RV32-NEXT: lui a1, 2 ; LMULMAX2-RV32-NEXT: addi a1, a1, 289 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.i v8, 3 -; LMULMAX2-RV32-NEXT: vmerge.vim v12, v8, 2, v0 +; LMULMAX2-RV32-NEXT: vmv.v.i v12, 3 ; LMULMAX2-RV32-NEXT: lui a1, 4 ; LMULMAX2-RV32-NEXT: addi a1, a1, 64 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; LMULMAX2-RV32-NEXT: vmv.s.x v8, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 2, v0 ; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8 ; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 1, v0 ; LMULMAX2-RV32-NEXT: li a1, 257 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 ; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI130_0) ; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI130_0) @@ -4153,22 +4147,16 @@ define void @mulhu_v16i16(<16 x i16>* %x) { ; LMULMAX2-RV64-NEXT: vle16.v v10, (a0) ; LMULMAX2-RV64-NEXT: lui a1, 2 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 289 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; LMULMAX2-RV64-NEXT: vmv.v.i v8, 3 -; LMULMAX2-RV64-NEXT: vmerge.vim v12, v8, 2, v0 +; LMULMAX2-RV64-NEXT: vmv.v.i v12, 3 ; LMULMAX2-RV64-NEXT: lui a1, 4 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 64 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; LMULMAX2-RV64-NEXT: vmv.s.x v8, a1 -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 2, v0 ; LMULMAX2-RV64-NEXT: vmv1r.v v0, v8 ; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 1, v0 ; LMULMAX2-RV64-NEXT: li a1, 257 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-RV64-NEXT: vmv.v.i v14, 0 ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI130_0) ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI130_0) @@ -4531,11 +4519,9 @@ define void @mulhs_v16i16(<16 x i16>* %x) { ; LMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX2-RV32-NEXT: lui a1, 7 ; LMULMAX2-RV32-NEXT: addi a1, a1, -1687 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: lui a1, 5 ; LMULMAX2-RV32-NEXT: addi a1, a1, -1755 -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV32-NEXT: lui a1, 1048571 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1755 @@ -4553,11 +4539,9 @@ define void @mulhs_v16i16(<16 x i16>* %x) { ; LMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX2-RV64-NEXT: lui a1, 7 ; LMULMAX2-RV64-NEXT: addiw a1, a1, -1687 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV64-NEXT: lui a1, 5 ; LMULMAX2-RV64-NEXT: addiw a1, a1, -1755 -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV64-NEXT: lui a1, 1048571 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 1755 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll index 3ccc14103327..32c073d59241 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -256,9 +256,8 @@ define <2 x i64> @mgather_v2i64_align4(<2 x i64*> %ptrs, <2 x i1> %m, <2 x i64> ; RV64-NEXT: lwu a0, 0(a0) ; RV64-NEXT: slli a1, a1, 32 ; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu +; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, mu ; RV64-NEXT: vslideup.vi v9, v8, 1 ; RV64-NEXT: .LBB5_4: # %else2 ; RV64-NEXT: vmv1r.v v8, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll index beab37712fd5..7b97b72c9587 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -147,8 +147,7 @@ for.body: ; preds = %entry, %for.body define @test7( %a, i64 %b, %mask) nounwind { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu -; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, mu +; CHECK-NEXT: vsetvli a1, zero, e64, m1, tu, mu ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret entry: @@ -163,8 +162,7 @@ entry: define @test8( %a, i64 %b, %mask) nounwind { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli a1, 6, e64, m1, ta, mu -; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu +; CHECK-NEXT: vsetivli a1, 6, e64, m1, tu, mu ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret entry: @@ -178,7 +176,6 @@ define @test9( %a, i64 %b, Date: Thu, 30 Dec 2021 13:39:39 +0300 Subject: [PATCH 221/992] [BitcodeReader] `bitc::CST_CODE_INLINEASM`: un-hardcode offsets --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 24 +++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 1684f04d5ea8..3360ad1c2350 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2860,22 +2860,26 @@ Error BitcodeReader::parseConstants() { case bitc::CST_CODE_INLINEASM: { if (Record.size() < 2) return error("Invalid record"); + unsigned OpNum = 0; std::string AsmStr, ConstrStr; - bool HasSideEffects = Record[0] & 1; - bool IsAlignStack = (Record[0] >> 1) & 1; - unsigned AsmDialect = (Record[0] >> 2) & 1; - bool CanThrow = (Record[0] >> 3) & 1; - unsigned AsmStrSize = Record[1]; - if (2 + AsmStrSize >= Record.size()) + bool HasSideEffects = Record[OpNum] & 1; + bool IsAlignStack = (Record[OpNum] >> 1) & 1; + unsigned AsmDialect = (Record[OpNum] >> 2) & 1; + bool CanThrow = (Record[OpNum] >> 3) & 1; + ++OpNum; + unsigned AsmStrSize = Record[OpNum]; + ++OpNum; + if (OpNum + AsmStrSize >= Record.size()) return error("Invalid record"); - unsigned ConstStrSize = Record[2 + AsmStrSize]; - if (3 + AsmStrSize + ConstStrSize > Record.size()) + unsigned ConstStrSize = Record[OpNum + AsmStrSize]; + if (OpNum + 1 + AsmStrSize + ConstStrSize > Record.size()) return error("Invalid record"); for (unsigned i = 0; i != AsmStrSize; ++i) - AsmStr += (char)Record[2 + i]; + AsmStr += (char)Record[OpNum + i]; + ++OpNum; for (unsigned i = 0; i != ConstStrSize; ++i) - ConstrStr += (char)Record[3 + AsmStrSize + i]; + ConstrStr += (char)Record[OpNum + AsmStrSize + i]; UpgradeInlineAsmString(&AsmStr); V = InlineAsm::get( cast(cast(CurTy)->getElementType()), From 62b1682570b1059e8c6542192159dcde32c13c30 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Thu, 30 Dec 2021 13:42:11 +0300 Subject: [PATCH 222/992] [Opaqueptrs][IR Serialization] Improve inlineasm [de]serialization The bitcode reader expected that the pointers are typed, so that it can extract the function type for the assembly so `bitc::CST_CODE_INLINEASM` did not explicitly store said function type. I'm not really sure how the upgrade path will look for existing bitcode, but i think we can easily support opaque pointers going forward, by simply storing the function type. Reviewed By: #opaque-pointers, nikic Differential Revision: https://reviews.llvm.org/D116341 --- llvm/include/llvm/Bitcode/LLVMBitCodes.h | 8 +++-- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 40 +++++++++++++++++++++-- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 1 + llvm/test/Bitcode/callbr.ll | 9 +++-- 4 files changed, 51 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 7301618d337a..6d0f51ce9c6d 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -381,10 +381,14 @@ enum ConstantsCodes { CST_CODE_CE_UNOP = 25, // CE_UNOP: [opcode, opval] CST_CODE_POISON = 26, // POISON CST_CODE_DSO_LOCAL_EQUIVALENT = 27, // DSO_LOCAL_EQUIVALENT [gvty, gv] - CST_CODE_INLINEASM = 28, // INLINEASM: [sideeffect|alignstack| + CST_CODE_INLINEASM_OLD3 = 28, // INLINEASM: [sideeffect|alignstack| + // asmdialect|unwind, + // asmstr,conststr] + CST_CODE_NO_CFI_VALUE = 29, // NO_CFI [ fty, f ] + CST_CODE_INLINEASM = 30, // INLINEASM: [fnty, + // sideeffect|alignstack| // asmdialect|unwind, // asmstr,conststr] - CST_CODE_NO_CFI_VALUE = 29, // NO_CFI [ fty, f ] }; /// CastOpcodes - These are values used in the bitcode files to encode which diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 3360ad1c2350..93b3dbf525f1 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2824,6 +2824,7 @@ Error BitcodeReader::parseConstants() { for (unsigned i = 0; i != ConstStrSize; ++i) ConstrStr += (char)Record[3+AsmStrSize+i]; UpgradeInlineAsmString(&AsmStr); + // FIXME: support upgrading in opaque pointers mode. V = InlineAsm::get( cast(cast(CurTy)->getElementType()), AsmStr, ConstrStr, HasSideEffects, IsAlignStack); @@ -2850,6 +2851,7 @@ Error BitcodeReader::parseConstants() { for (unsigned i = 0; i != ConstStrSize; ++i) ConstrStr += (char)Record[3+AsmStrSize+i]; UpgradeInlineAsmString(&AsmStr); + // FIXME: support upgrading in opaque pointers mode. V = InlineAsm::get( cast(cast(CurTy)->getElementType()), AsmStr, ConstrStr, HasSideEffects, IsAlignStack, @@ -2857,7 +2859,7 @@ Error BitcodeReader::parseConstants() { break; } // This version adds support for the unwind keyword. - case bitc::CST_CODE_INLINEASM: { + case bitc::CST_CODE_INLINEASM_OLD3: { if (Record.size() < 2) return error("Invalid record"); unsigned OpNum = 0; @@ -2881,12 +2883,46 @@ Error BitcodeReader::parseConstants() { for (unsigned i = 0; i != ConstStrSize; ++i) ConstrStr += (char)Record[OpNum + AsmStrSize + i]; UpgradeInlineAsmString(&AsmStr); + // FIXME: support upgrading in opaque pointers mode. V = InlineAsm::get( cast(cast(CurTy)->getElementType()), AsmStr, ConstrStr, HasSideEffects, IsAlignStack, InlineAsm::AsmDialect(AsmDialect), CanThrow); break; } + // This version adds explicit function type. + case bitc::CST_CODE_INLINEASM: { + if (Record.size() < 3) + return error("Invalid record"); + unsigned OpNum = 0; + auto *FnTy = dyn_cast_or_null(getTypeByID(Record[OpNum])); + ++OpNum; + if (!FnTy) + return error("Invalid record"); + std::string AsmStr, ConstrStr; + bool HasSideEffects = Record[OpNum] & 1; + bool IsAlignStack = (Record[OpNum] >> 1) & 1; + unsigned AsmDialect = (Record[OpNum] >> 2) & 1; + bool CanThrow = (Record[OpNum] >> 3) & 1; + ++OpNum; + unsigned AsmStrSize = Record[OpNum]; + ++OpNum; + if (OpNum + AsmStrSize >= Record.size()) + return error("Invalid record"); + unsigned ConstStrSize = Record[OpNum + AsmStrSize]; + if (OpNum + 1 + AsmStrSize + ConstStrSize > Record.size()) + return error("Invalid record"); + + for (unsigned i = 0; i != AsmStrSize; ++i) + AsmStr += (char)Record[OpNum + i]; + ++OpNum; + for (unsigned i = 0; i != ConstStrSize; ++i) + ConstrStr += (char)Record[OpNum + AsmStrSize + i]; + UpgradeInlineAsmString(&AsmStr); + V = InlineAsm::get(FnTy, AsmStr, ConstrStr, HasSideEffects, IsAlignStack, + InlineAsm::AsmDialect(AsmDialect), CanThrow); + break; + } case bitc::CST_CODE_BLOCKADDRESS:{ if (Record.size() < 3) return error("Invalid record"); @@ -4783,7 +4819,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { cast(Callee->getType())->getElementType()); if (!FTy) return error("Callee is not of pointer to function type"); - } else if (cast(Callee->getType())->getElementType() != FTy) + } else if (!OpTy->isOpaqueOrPointeeTypeMatches(FTy)) return error("Explicit call type does not match pointee type of " "callee operand"); if (Record.size() < FTy->getNumParams() + OpNum) diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index dc06bc10cf95..e0efdf286caf 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -2458,6 +2458,7 @@ void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal, } if (const InlineAsm *IA = dyn_cast(V)) { + Record.push_back(VE.getTypeID(IA->getFunctionType())); Record.push_back( unsigned(IA->hasSideEffects()) | unsigned(IA->isAlignStack()) << 1 | unsigned(IA->getDialect() & 1) << 2 | unsigned(IA->canThrow()) << 3); diff --git a/llvm/test/Bitcode/callbr.ll b/llvm/test/Bitcode/callbr.ll index ecc397ac7546..648d2f11b0d4 100644 --- a/llvm/test/Bitcode/callbr.ll +++ b/llvm/test/Bitcode/callbr.ll @@ -1,10 +1,13 @@ -; RUN: llvm-dis < %s.bc | FileCheck %s - +; RUN: llvm-dis < %s.bc | FileCheck %s --check-prefixes=CHECK,CHECK-TYPED ; callbr.ll.bc was generated by passing this file to llvm-as. +; RUN: llvm-as < %s | llvm-dis | FileCheck %s --check-prefixes=CHECK,CHECK-TYPED +; RUN: llvm-as -opaque-pointers < %s | llvm-dis -opaque-pointers | FileCheck %s --check-prefixes=CHECK,CHECK-OPAQUE + define i32 @test_asm_goto(i32 %x){ entry: -; CHECK: callbr void asm "", "r,X"(i32 %x, i8* blockaddress(@test_asm_goto, %fail)) +; CHECK-TYPED: callbr void asm "", "r,X"(i32 %x, i8* blockaddress(@test_asm_goto, %fail)) +; CHECK-OPAQUE: callbr void asm "", "r,X"(i32 %x, ptr blockaddress(@test_asm_goto, %fail)) ; CHECK-NEXT: to label %normal [label %fail] callbr void asm "", "r,X"(i32 %x, i8* blockaddress(@test_asm_goto, %fail)) to label %normal [label %fail] normal: From d7dbe2c4a00ba2abd998328ad6b8023637bc71d9 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Thu, 30 Dec 2021 11:22:26 +0100 Subject: [PATCH 223/992] [lldb] Remove lldbtest.getBuildFlags It was being used only in some very old tests (which pass even without it) and its implementation is highly questionable. These days we have different mechanisms for requesting a build with a particular kind of c++ library (USE_LIB(STD)CPP in the makefile). --- .../Python/lldbsuite/test/lldbtest.py | 45 ------------------- .../platform/TestDefaultCacheLineSize.py | 3 +- .../TestDynamicValueChildCount.py | 2 +- .../thread/backtrace_all/TestBacktraceAll.py | 2 +- .../break_after_join/TestBreakAfterJoin.py | 2 +- ...rrentBreakpointDelayBreakpointOneSignal.py | 2 +- ...rentBreakpointOneDelayBreakpointThreads.py | 2 +- ...eakpointsDelayedBreakpointOneWatchpoint.py | 2 +- .../TestConcurrentCrashWithBreak.py | 2 +- .../TestConcurrentCrashWithSignal.py | 2 +- .../TestConcurrentCrashWithWatchpoint.py | 2 +- ...rentCrashWithWatchpointBreakpointSignal.py | 2 +- .../TestConcurrentDelaySignalBreak.py | 2 +- .../TestConcurrentDelaySignalWatch.py | 2 +- .../TestConcurrentDelayWatchBreak.py | 2 +- ...currentDelayedCrashWithBreakpointSignal.py | 2 +- ...entDelayedCrashWithBreakpointWatchpoint.py | 2 +- .../TestConcurrentManyBreakpoints.py | 2 +- .../TestConcurrentManyCrash.py | 2 +- .../TestConcurrentManySignals.py | 2 +- .../TestConcurrentManyWatchpoints.py | 2 +- .../TestConcurrentNWatchNBreak.py | 2 +- .../TestConcurrentSignalBreak.py | 2 +- .../TestConcurrentSignalDelayBreak.py | 2 +- .../TestConcurrentSignalDelayWatch.py | 2 +- .../TestConcurrentSignalNWatchNBreak.py | 2 +- .../TestConcurrentSignalWatch.py | 2 +- .../TestConcurrentSignalWatchBreak.py | 2 +- .../TestConcurrentTwoBreakpointThreads.py | 2 +- ...tConcurrentTwoBreakpointsOneDelaySignal.py | 2 +- .../TestConcurrentTwoBreakpointsOneSignal.py | 2 +- ...stConcurrentTwoBreakpointsOneWatchpoint.py | 2 +- .../TestConcurrentTwoWatchpointThreads.py | 2 +- ...stConcurrentTwoWatchpointsOneBreakpoint.py | 2 +- ...currentTwoWatchpointsOneDelayBreakpoint.py | 2 +- .../TestConcurrentTwoWatchpointsOneSignal.py | 2 +- .../TestConcurrentWatchBreak.py | 2 +- .../TestConcurrentWatchBreakDelay.py | 2 +- ...tWatchpointDelayWatchpointOneBreakpoint.py | 2 +- ...entWatchpointWithDelayWatchpointThreads.py | 2 +- .../crash_during_step/TestCrashDuringStep.py | 2 +- .../TestCreateAfterAttach.py | 2 +- .../TestCreateDuringStep.py | 6 +-- .../exit_during_break/TestExitDuringBreak.py | 2 +- .../exit_during_step/TestExitDuringStep.py | 6 +-- .../thread/jump/TestThreadJump.py | 2 +- .../multi_break/TestMultipleBreakpoints.py | 2 +- .../thread/state/TestThreadStates.py | 10 ++--- .../thread/step_out/TestThreadStepOut.py | 6 +-- .../thread/thread_exit/TestThreadExit.py | 2 +- .../cpp/dynamic-value/TestDynamicValue.py | 2 +- .../TestCreateDuringInstructionStep.py | 2 +- 52 files changed, 62 insertions(+), 106 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index d8cf4aa6beb8..950dd41666fd 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -1584,51 +1584,6 @@ def yaml2obj(self, yaml_path, obj_path, max_size=None): command += ["--max-size=%d" % max_size] self.runBuildCommand(command) - def getBuildFlags( - self, - use_cpp11=True, - use_libcxx=False, - use_libstdcxx=False): - """ Returns a dictionary (which can be provided to build* functions above) which - contains OS-specific build flags. - """ - cflags = "" - ldflags = "" - - # On Mac OS X, unless specifically requested to use libstdc++, use - # libc++ - if not use_libstdcxx and self.platformIsDarwin(): - use_libcxx = True - - if use_libcxx and self.libcxxPath: - cflags += "-stdlib=libc++ " - if self.libcxxPath: - libcxxInclude = os.path.join(self.libcxxPath, "include") - libcxxLib = os.path.join(self.libcxxPath, "lib") - if os.path.isdir(libcxxInclude) and os.path.isdir(libcxxLib): - cflags += "-nostdinc++ -I%s -L%s -Wl,-rpath,%s " % ( - libcxxInclude, libcxxLib, libcxxLib) - - if use_cpp11: - cflags += "-std=" - if "gcc" in self.getCompiler() and "4.6" in self.getCompilerVersion(): - cflags += "c++0x" - else: - cflags += "c++11" - if self.platformIsDarwin() or self.getPlatform() == "freebsd": - cflags += " -stdlib=libc++" - elif self.getPlatform() == "openbsd": - cflags += " -stdlib=libc++" - elif self.getPlatform() == "netbsd": - # NetBSD defaults to libc++ - pass - elif "clang" in self.getCompiler(): - cflags += " -stdlib=libstdc++" - - return {'CFLAGS_EXTRAS': cflags, - 'LD_EXTRAS': ldflags, - } - def cleanup(self, dictionary=None): """Platform specific way to do cleanup after build.""" module = builder_module() diff --git a/lldb/test/API/android/platform/TestDefaultCacheLineSize.py b/lldb/test/API/android/platform/TestDefaultCacheLineSize.py index eccc002f1d55..875d1e54bd7b 100644 --- a/lldb/test/API/android/platform/TestDefaultCacheLineSize.py +++ b/lldb/test/API/android/platform/TestDefaultCacheLineSize.py @@ -13,10 +13,11 @@ class DefaultCacheLineSizeTestCase(TestBase): mydir = TestBase.compute_mydir(__file__) + NO_DEBUG_INFO_TESTCASE = True @skipUnlessTargetAndroid def test_cache_line_size(self): - self.build(dictionary=self.getBuildFlags()) + self.build() target = self.createTestTarget() self.assertTrue(target and target.IsValid(), "Target is valid") diff --git a/lldb/test/API/functionalities/dynamic_value_child_count/TestDynamicValueChildCount.py b/lldb/test/API/functionalities/dynamic_value_child_count/TestDynamicValueChildCount.py index 0d401031874a..d444b3c50bde 100644 --- a/lldb/test/API/functionalities/dynamic_value_child_count/TestDynamicValueChildCount.py +++ b/lldb/test/API/functionalities/dynamic_value_child_count/TestDynamicValueChildCount.py @@ -34,7 +34,7 @@ def setUp(self): def test_get_dynamic_vals(self): """Test fetching C++ dynamic values from pointers & references.""" """Get argument vals for the call stack when stopped on a breakpoint.""" - self.build(dictionary=self.getBuildFlags()) + self.build() exe = self.getBuildArtifact("a.out") # Create a target from the debugger. diff --git a/lldb/test/API/functionalities/thread/backtrace_all/TestBacktraceAll.py b/lldb/test/API/functionalities/thread/backtrace_all/TestBacktraceAll.py index 372244a4c8ca..f34d4e0b81b4 100644 --- a/lldb/test/API/functionalities/thread/backtrace_all/TestBacktraceAll.py +++ b/lldb/test/API/functionalities/thread/backtrace_all/TestBacktraceAll.py @@ -26,7 +26,7 @@ def setUp(self): # TODO: Change the test to don't depend on std::future def test(self): """Test breakpoint handling after a thread join.""" - self.build(dictionary=self.getBuildFlags()) + self.build() exe = self.getBuildArtifact("a.out") self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) diff --git a/lldb/test/API/functionalities/thread/break_after_join/TestBreakAfterJoin.py b/lldb/test/API/functionalities/thread/break_after_join/TestBreakAfterJoin.py index bdcf91c123ec..4dc90cd16be0 100644 --- a/lldb/test/API/functionalities/thread/break_after_join/TestBreakAfterJoin.py +++ b/lldb/test/API/functionalities/thread/break_after_join/TestBreakAfterJoin.py @@ -32,7 +32,7 @@ def setUp(self): @expectedFailureNetBSD def test(self): """Test breakpoint handling after a thread join.""" - self.build(dictionary=self.getBuildFlags()) + self.build() exe = self.getBuildArtifact("a.out") self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentBreakpointDelayBreakpointOneSignal.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentBreakpointDelayBreakpointOneSignal.py index 4a7a5b91f9f6..e265f095a93b 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentBreakpointDelayBreakpointOneSignal.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentBreakpointDelayBreakpointOneSignal.py @@ -15,7 +15,7 @@ class ConcurrentBreakpointDelayBreakpointOneSignal(ConcurrentEventsBase): @skipIf(triple='^mips') def test(self): """Test two threads that trigger a breakpoint (one with a 1 second delay) and one signal thread. """ - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_breakpoint_threads=1, num_delay_breakpoint_threads=1, num_signal_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentBreakpointOneDelayBreakpointThreads.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentBreakpointOneDelayBreakpointThreads.py index 843a657cd1ff..422ac24b4581 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentBreakpointOneDelayBreakpointThreads.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentBreakpointOneDelayBreakpointThreads.py @@ -15,6 +15,6 @@ class ConcurrentBreakpointOneDelayBreakpointThreads(ConcurrentEventsBase): @skipIf(triple='^mips') def test(self): """Test threads that trigger a breakpoint where one thread has a 1 second delay. """ - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_breakpoint_threads=1, num_delay_breakpoint_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentBreakpointsDelayedBreakpointOneWatchpoint.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentBreakpointsDelayedBreakpointOneWatchpoint.py index c48833fc3d77..68fc4a98f446 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentBreakpointsDelayedBreakpointOneWatchpoint.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentBreakpointsDelayedBreakpointOneWatchpoint.py @@ -17,7 +17,7 @@ class ConcurrentBreakpointsDelayedBreakpointOneWatchpoint( @add_test_categories(["watchpoint"]) def test(self): """Test a breakpoint, a delayed breakpoint, and one watchpoint thread. """ - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_breakpoint_threads=1, num_delay_breakpoint_threads=1, num_watchpoint_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentCrashWithBreak.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentCrashWithBreak.py index eef3f8e7f8d4..694562998f8a 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentCrashWithBreak.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentCrashWithBreak.py @@ -15,5 +15,5 @@ class ConcurrentCrashWithBreak(ConcurrentEventsBase): @skipIf(triple='^mips') def test(self): """ Test a thread that crashes while another thread hits a breakpoint.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_crash_threads=1, num_breakpoint_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentCrashWithSignal.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentCrashWithSignal.py index f70e8e9eb280..49aeb191f983 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentCrashWithSignal.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentCrashWithSignal.py @@ -15,5 +15,5 @@ class ConcurrentCrashWithSignal(ConcurrentEventsBase): @skipIf(triple='^mips') def test(self): """ Test a thread that crashes while another thread generates a signal.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_crash_threads=1, num_signal_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentCrashWithWatchpoint.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentCrashWithWatchpoint.py index 02ad3acc5e9d..424e62cbc9ad 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentCrashWithWatchpoint.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentCrashWithWatchpoint.py @@ -16,5 +16,5 @@ class ConcurrentCrashWithWatchpoint(ConcurrentEventsBase): @add_test_categories(["watchpoint"]) def test(self): """ Test a thread that crashes while another thread hits a watchpoint.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_crash_threads=1, num_watchpoint_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentCrashWithWatchpointBreakpointSignal.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentCrashWithWatchpointBreakpointSignal.py index 847972414571..c5e73bda7d88 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentCrashWithWatchpointBreakpointSignal.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentCrashWithWatchpointBreakpointSignal.py @@ -16,7 +16,7 @@ class ConcurrentCrashWithWatchpointBreakpointSignal(ConcurrentEventsBase): @add_test_categories(["watchpoint"]) def test(self): """ Test a thread that crashes while other threads generate a signal and hit a watchpoint and breakpoint. """ - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_crash_threads=1, num_breakpoint_threads=1, num_signal_threads=1, diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelaySignalBreak.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelaySignalBreak.py index 8f0d3ff36733..58d06dfea86f 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelaySignalBreak.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelaySignalBreak.py @@ -15,7 +15,7 @@ class ConcurrentDelaySignalBreak(ConcurrentEventsBase): @skipIf(triple='^mips') def test(self): """Test (1-second delay) signal and a breakpoint in multiple threads.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions( num_breakpoint_threads=1, num_delay_signal_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelaySignalWatch.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelaySignalWatch.py index 6058a1a551e3..ffb8e825d0d2 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelaySignalWatch.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelaySignalWatch.py @@ -16,7 +16,7 @@ class ConcurrentDelaySignalWatch(ConcurrentEventsBase): @add_test_categories(["watchpoint"]) def test(self): """Test a watchpoint and a (1 second delay) signal in multiple threads.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions( num_delay_signal_threads=1, num_watchpoint_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelayWatchBreak.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelayWatchBreak.py index 18f6f12907fe..c4feb3b8f488 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelayWatchBreak.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelayWatchBreak.py @@ -20,7 +20,7 @@ class ConcurrentDelayWatchBreak(ConcurrentEventsBase): @add_test_categories(["watchpoint"]) def test(self): """Test (1-second delay) watchpoint and a breakpoint in multiple threads.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions( num_breakpoint_threads=1, num_delay_watchpoint_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelayedCrashWithBreakpointSignal.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelayedCrashWithBreakpointSignal.py index 9204c799d542..3de34331bbc7 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelayedCrashWithBreakpointSignal.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelayedCrashWithBreakpointSignal.py @@ -15,7 +15,7 @@ class ConcurrentDelayedCrashWithBreakpointSignal(ConcurrentEventsBase): @skipIf(triple='^mips') def test(self): """ Test a thread with a delayed crash while other threads generate a signal and hit a breakpoint. """ - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_delay_crash_threads=1, num_breakpoint_threads=1, num_signal_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelayedCrashWithBreakpointWatchpoint.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelayedCrashWithBreakpointWatchpoint.py index 1ba69e31e53f..dce6e93793dc 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelayedCrashWithBreakpointWatchpoint.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentDelayedCrashWithBreakpointWatchpoint.py @@ -16,7 +16,7 @@ class ConcurrentDelayedCrashWithBreakpointWatchpoint(ConcurrentEventsBase): @add_test_categories(["watchpoint"]) def test(self): """ Test a thread with a delayed crash while other threads hit a watchpoint and a breakpoint. """ - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_delay_crash_threads=1, num_breakpoint_threads=1, num_watchpoint_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyBreakpoints.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyBreakpoints.py index c1b22ced5284..904c06cc0126 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyBreakpoints.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyBreakpoints.py @@ -18,5 +18,5 @@ class ConcurrentManyBreakpoints(ConcurrentEventsBase): bugnumber="llvm.org/pr49433") def test(self): """Test 100 breakpoints from 100 threads.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_breakpoint_threads=100) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyCrash.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyCrash.py index 0530728b6acd..a5990929e33f 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyCrash.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyCrash.py @@ -16,5 +16,5 @@ class ConcurrentManyCrash(ConcurrentEventsBase): @skipIfOutOfTreeDebugserver def test(self): """Test 100 threads that cause a segfault.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_crash_threads=100) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManySignals.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManySignals.py index ec06227ec547..85e07fcf5e61 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManySignals.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManySignals.py @@ -19,5 +19,5 @@ class ConcurrentManySignals(ConcurrentEventsBase): @skipIfOutOfTreeDebugserver def test(self): """Test 100 signals from 100 threads.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_signal_threads=100) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyWatchpoints.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyWatchpoints.py index 79a5c3e90f2c..18a683864c33 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyWatchpoints.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyWatchpoints.py @@ -20,5 +20,5 @@ class ConcurrentManyWatchpoints(ConcurrentEventsBase): @skipIfOutOfTreeDebugserver def test(self): """Test 100 watchpoints from 100 threads.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_watchpoint_threads=100) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentNWatchNBreak.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentNWatchNBreak.py index ab11cae8819b..2d53cfa4802a 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentNWatchNBreak.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentNWatchNBreak.py @@ -22,6 +22,6 @@ class ConcurrentNWatchNBreak(ConcurrentEventsBase): @add_test_categories(["watchpoint"]) def test(self): """Test with 5 watchpoint and breakpoint threads.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_watchpoint_threads=5, num_breakpoint_threads=5) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalBreak.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalBreak.py index 3d63fbd37d41..2ded95428a18 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalBreak.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalBreak.py @@ -15,5 +15,5 @@ class ConcurrentSignalBreak(ConcurrentEventsBase): @skipIf(triple='^mips') def test(self): """Test signal and a breakpoint in multiple threads.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_breakpoint_threads=1, num_signal_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalDelayBreak.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalDelayBreak.py index eddb9d52b743..ed633e628797 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalDelayBreak.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalDelayBreak.py @@ -16,7 +16,7 @@ class ConcurrentSignalDelayBreak(ConcurrentEventsBase): @expectedFlakeyNetBSD def test(self): """Test signal and a (1 second delay) breakpoint in multiple threads.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions( num_delay_breakpoint_threads=1, num_signal_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalDelayWatch.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalDelayWatch.py index 8cdbd0c9fd68..3207cb4b0ea6 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalDelayWatch.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalDelayWatch.py @@ -17,7 +17,7 @@ class ConcurrentSignalDelayWatch(ConcurrentEventsBase): @add_test_categories(["watchpoint"]) def test(self): """Test a (1 second delay) watchpoint and a signal in multiple threads.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions( num_signal_threads=1, num_delay_watchpoint_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalNWatchNBreak.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalNWatchNBreak.py index fa2e022304fe..0645274ae7e5 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalNWatchNBreak.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalNWatchNBreak.py @@ -23,7 +23,7 @@ class ConcurrentSignalNWatchNBreak(ConcurrentEventsBase): @add_test_categories(["watchpoint"]) def test(self): """Test one signal thread with 5 watchpoint and breakpoint threads.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_signal_threads=1, num_watchpoint_threads=5, num_breakpoint_threads=5) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalWatch.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalWatch.py index 8e5c34b21e8a..be59daace974 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalWatch.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalWatch.py @@ -20,5 +20,5 @@ class ConcurrentSignalWatch(ConcurrentEventsBase): @add_test_categories(["watchpoint"]) def test(self): """Test a watchpoint and a signal in multiple threads.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_signal_threads=1, num_watchpoint_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalWatchBreak.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalWatchBreak.py index 806ff952e3bc..120b48514539 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalWatchBreak.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentSignalWatchBreak.py @@ -21,7 +21,7 @@ class ConcurrentSignalWatchBreak(ConcurrentEventsBase): @add_test_categories(["watchpoint"]) def test(self): """Test a signal/watchpoint/breakpoint in multiple threads.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_signal_threads=1, num_watchpoint_threads=1, num_breakpoint_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoBreakpointThreads.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoBreakpointThreads.py index 1f6832d9ecdb..70c474566506 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoBreakpointThreads.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoBreakpointThreads.py @@ -17,5 +17,5 @@ class ConcurrentTwoBreakpointThreads(ConcurrentEventsBase): bugnumber="llvm.org/pr49433") def test(self): """Test two threads that trigger a breakpoint. """ - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_breakpoint_threads=2) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoBreakpointsOneDelaySignal.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoBreakpointsOneDelaySignal.py index 10925e4ecd24..6375d795ab49 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoBreakpointsOneDelaySignal.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoBreakpointsOneDelaySignal.py @@ -18,7 +18,7 @@ class ConcurrentTwoBreakpointsOneDelaySignal(ConcurrentEventsBase): bugnumber="llvm.org/pr49433") def test(self): """Test two threads that trigger a breakpoint and one (1 second delay) signal thread. """ - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions( num_breakpoint_threads=2, num_delay_signal_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoBreakpointsOneSignal.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoBreakpointsOneSignal.py index 616b47874a95..688d4ded04dd 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoBreakpointsOneSignal.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoBreakpointsOneSignal.py @@ -18,5 +18,5 @@ class ConcurrentTwoBreakpointsOneSignal(ConcurrentEventsBase): bugnumber="llvm.org/pr49433") def test(self): """Test two threads that trigger a breakpoint and one signal thread. """ - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_breakpoint_threads=2, num_signal_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoBreakpointsOneWatchpoint.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoBreakpointsOneWatchpoint.py index 72e537f067d8..54a8e09f5037 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoBreakpointsOneWatchpoint.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoBreakpointsOneWatchpoint.py @@ -18,7 +18,7 @@ class ConcurrentTwoBreakpointsOneWatchpoint(ConcurrentEventsBase): bugnumber="llvm.org/pr49433") def test(self): """Test two threads that trigger a breakpoint and one watchpoint thread. """ - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions( num_breakpoint_threads=2, num_watchpoint_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointThreads.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointThreads.py index b2098fa958cf..7deeacfc67d5 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointThreads.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointThreads.py @@ -20,5 +20,5 @@ class ConcurrentTwoWatchpointThreads(ConcurrentEventsBase): @add_test_categories(["watchpoint"]) def test(self): """Test two threads that trigger a watchpoint. """ - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_watchpoint_threads=2) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneBreakpoint.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneBreakpoint.py index 1d67ef925c31..0e8cbf37a197 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneBreakpoint.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneBreakpoint.py @@ -20,7 +20,7 @@ class ConcurrentTwoWatchpointsOneBreakpoint(ConcurrentEventsBase): @add_test_categories(["watchpoint"]) def test(self): """Test two threads that trigger a watchpoint and one breakpoint thread. """ - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions( num_watchpoint_threads=2, num_breakpoint_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneDelayBreakpoint.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneDelayBreakpoint.py index 37191362316b..a4baa5e9f4dd 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneDelayBreakpoint.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneDelayBreakpoint.py @@ -20,7 +20,7 @@ class ConcurrentTwoWatchpointsOneDelayBreakpoint(ConcurrentEventsBase): @add_test_categories(["watchpoint"]) def test(self): """Test two threads that trigger a watchpoint and one (1 second delay) breakpoint thread. """ - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions( num_watchpoint_threads=2, num_delay_breakpoint_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneSignal.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneSignal.py index b8e1572b920f..62e7cfb6f3b4 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneSignal.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentTwoWatchpointsOneSignal.py @@ -21,5 +21,5 @@ class ConcurrentTwoWatchpointsOneSignal(ConcurrentEventsBase): @add_test_categories(["watchpoint"]) def test(self): """Test two threads that trigger a watchpoint and one signal thread. """ - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_watchpoint_threads=2, num_signal_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentWatchBreak.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentWatchBreak.py index f3ddee050f9a..05754c81616e 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentWatchBreak.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentWatchBreak.py @@ -16,7 +16,7 @@ class ConcurrentWatchBreak(ConcurrentEventsBase): @add_test_categories(["watchpoint"]) def test(self): """Test watchpoint and a breakpoint in multiple threads.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions( num_breakpoint_threads=1, num_watchpoint_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentWatchBreakDelay.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentWatchBreakDelay.py index 84a92d7f06c0..32c3f00c0397 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentWatchBreakDelay.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentWatchBreakDelay.py @@ -16,7 +16,7 @@ class ConcurrentWatchBreakDelay(ConcurrentEventsBase): @add_test_categories(["watchpoint"]) def test(self): """Test watchpoint and a (1 second delay) breakpoint in multiple threads.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions( num_delay_breakpoint_threads=1, num_watchpoint_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentWatchpointDelayWatchpointOneBreakpoint.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentWatchpointDelayWatchpointOneBreakpoint.py index 234365166b33..b3db954de4ea 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentWatchpointDelayWatchpointOneBreakpoint.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentWatchpointDelayWatchpointOneBreakpoint.py @@ -16,7 +16,7 @@ class ConcurrentWatchpointDelayWatchpointOneBreakpoint(ConcurrentEventsBase): @add_test_categories(["watchpoint"]) def test(self): """Test two threads that trigger a watchpoint (one with a 1 second delay) and one breakpoint thread. """ - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_watchpoint_threads=1, num_delay_watchpoint_threads=1, num_breakpoint_threads=1) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentWatchpointWithDelayWatchpointThreads.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentWatchpointWithDelayWatchpointThreads.py index 765182a9a7ff..c3205d6c4993 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentWatchpointWithDelayWatchpointThreads.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentWatchpointWithDelayWatchpointThreads.py @@ -16,6 +16,6 @@ class ConcurrentWatchpointWithDelayWatchpointThreads(ConcurrentEventsBase): @add_test_categories(["watchpoint"]) def test(self): """Test two threads that trigger a watchpoint where one thread has a 1 second delay. """ - self.build(dictionary=self.getBuildFlags()) + self.build() self.do_thread_actions(num_watchpoint_threads=1, num_delay_watchpoint_threads=1) diff --git a/lldb/test/API/functionalities/thread/crash_during_step/TestCrashDuringStep.py b/lldb/test/API/functionalities/thread/crash_during_step/TestCrashDuringStep.py index 3394bb2d6aa7..6a3ef9527746 100644 --- a/lldb/test/API/functionalities/thread/crash_during_step/TestCrashDuringStep.py +++ b/lldb/test/API/functionalities/thread/crash_during_step/TestCrashDuringStep.py @@ -22,7 +22,7 @@ def setUp(self): @expectedFailureAll(triple=re.compile('^mips')) def test_step_inst_with(self): """Test thread creation during step-inst handling.""" - self.build(dictionary=self.getBuildFlags()) + self.build() exe = self.getBuildArtifact("a.out") target = self.dbg.CreateTarget(exe) diff --git a/lldb/test/API/functionalities/thread/create_after_attach/TestCreateAfterAttach.py b/lldb/test/API/functionalities/thread/create_after_attach/TestCreateAfterAttach.py index e9c85d5025a8..efa3d9348b27 100644 --- a/lldb/test/API/functionalities/thread/create_after_attach/TestCreateAfterAttach.py +++ b/lldb/test/API/functionalities/thread/create_after_attach/TestCreateAfterAttach.py @@ -28,7 +28,7 @@ def setUp(self): @expectedFailureNetBSD def test_create_after_attach(self): """Test thread creation after process attach.""" - self.build(dictionary=self.getBuildFlags(use_cpp11=False)) + self.build() exe = self.getBuildArtifact("a.out") # Spawn a new process diff --git a/lldb/test/API/functionalities/thread/create_during_step/TestCreateDuringStep.py b/lldb/test/API/functionalities/thread/create_during_step/TestCreateDuringStep.py index 2ec2f182cea9..dc67afc55f9e 100644 --- a/lldb/test/API/functionalities/thread/create_during_step/TestCreateDuringStep.py +++ b/lldb/test/API/functionalities/thread/create_during_step/TestCreateDuringStep.py @@ -29,7 +29,7 @@ class CreateDuringStepTestCase(TestBase): @expectedFailureNetBSD def test_step_inst(self): """Test thread creation during step-inst handling.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.create_during_step_base( "thread step-inst -m all-threads", 'stop reason = instruction step') @@ -49,7 +49,7 @@ def test_step_inst(self): @expectedFailureNetBSD def test_step_over(self): """Test thread creation during step-over handling.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.create_during_step_base( "thread step-over -m all-threads", 'stop reason = step over') @@ -69,7 +69,7 @@ def test_step_over(self): @expectedFailureNetBSD def test_step_in(self): """Test thread creation during step-in handling.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.create_during_step_base( "thread step-in -m all-threads", 'stop reason = step in') diff --git a/lldb/test/API/functionalities/thread/exit_during_break/TestExitDuringBreak.py b/lldb/test/API/functionalities/thread/exit_during_break/TestExitDuringBreak.py index e839dc49dab0..16baf15a4d2a 100644 --- a/lldb/test/API/functionalities/thread/exit_during_break/TestExitDuringBreak.py +++ b/lldb/test/API/functionalities/thread/exit_during_break/TestExitDuringBreak.py @@ -22,7 +22,7 @@ def setUp(self): def test(self): """Test thread exit during breakpoint handling.""" - self.build(dictionary=self.getBuildFlags()) + self.build() exe = self.getBuildArtifact("a.out") self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) diff --git a/lldb/test/API/functionalities/thread/exit_during_step/TestExitDuringStep.py b/lldb/test/API/functionalities/thread/exit_during_step/TestExitDuringStep.py index 49d6a7d1bd99..a28977737985 100644 --- a/lldb/test/API/functionalities/thread/exit_during_step/TestExitDuringStep.py +++ b/lldb/test/API/functionalities/thread/exit_during_step/TestExitDuringStep.py @@ -17,7 +17,7 @@ class ExitDuringStepTestCase(TestBase): @skipIfWindows # This is flakey on Windows: llvm.org/pr38373 def test(self): """Test thread exit during step handling.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.exit_during_step_base( "thread step-inst -m all-threads", 'stop reason = instruction step', @@ -26,7 +26,7 @@ def test(self): @skipIfWindows # This is flakey on Windows: llvm.org/pr38373 def test_step_over(self): """Test thread exit during step-over handling.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.exit_during_step_base( "thread step-over -m all-threads", 'stop reason = step over', @@ -35,7 +35,7 @@ def test_step_over(self): @skipIfWindows # This is flakey on Windows: llvm.org/pr38373 def test_step_in(self): """Test thread exit during step-in handling.""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.exit_during_step_base( "thread step-in -m all-threads", 'stop reason = step in', diff --git a/lldb/test/API/functionalities/thread/jump/TestThreadJump.py b/lldb/test/API/functionalities/thread/jump/TestThreadJump.py index 2035435442f5..44bef6bf9de3 100644 --- a/lldb/test/API/functionalities/thread/jump/TestThreadJump.py +++ b/lldb/test/API/functionalities/thread/jump/TestThreadJump.py @@ -16,7 +16,7 @@ class ThreadJumpTestCase(TestBase): def test(self): """Test thread jump handling.""" - self.build(dictionary=self.getBuildFlags()) + self.build() exe = self.getBuildArtifact("a.out") self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) diff --git a/lldb/test/API/functionalities/thread/multi_break/TestMultipleBreakpoints.py b/lldb/test/API/functionalities/thread/multi_break/TestMultipleBreakpoints.py index 442411e6b524..61cdc882501b 100644 --- a/lldb/test/API/functionalities/thread/multi_break/TestMultipleBreakpoints.py +++ b/lldb/test/API/functionalities/thread/multi_break/TestMultipleBreakpoints.py @@ -33,7 +33,7 @@ def setUp(self): @expectedFailureNetBSD def test(self): """Test simultaneous breakpoints in multiple threads.""" - self.build(dictionary=self.getBuildFlags()) + self.build() exe = self.getBuildArtifact("a.out") self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) diff --git a/lldb/test/API/functionalities/thread/state/TestThreadStates.py b/lldb/test/API/functionalities/thread/state/TestThreadStates.py index ad67f7637199..18586c47e780 100644 --- a/lldb/test/API/functionalities/thread/state/TestThreadStates.py +++ b/lldb/test/API/functionalities/thread/state/TestThreadStates.py @@ -25,7 +25,7 @@ class ThreadStateTestCase(TestBase): @expectedFailureNetBSD def test_state_after_breakpoint(self): """Test thread state after breakpoint.""" - self.build(dictionary=self.getBuildFlags(use_cpp11=False)) + self.build() self.thread_state_after_breakpoint_test() @skipIfDarwin # 'llvm.org/pr23669', cause Python crash randomly @@ -35,7 +35,7 @@ def test_state_after_breakpoint(self): @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr24660") def test_state_after_continue(self): """Test thread state after continue.""" - self.build(dictionary=self.getBuildFlags(use_cpp11=False)) + self.build() self.thread_state_after_continue_test() @skipIfDarwin # 'llvm.org/pr23669', cause Python crash randomly @@ -46,7 +46,7 @@ def test_state_after_continue(self): @expectedFailure("llvm.org/pr16712") def test_state_after_expression(self): """Test thread state after expression.""" - self.build(dictionary=self.getBuildFlags(use_cpp11=False)) + self.build() self.thread_state_after_expression_test() # thread states not properly maintained @@ -58,7 +58,7 @@ def test_state_after_expression(self): @expectedFailureNetBSD def test_process_state(self): """Test thread states (comprehensive).""" - self.build(dictionary=self.getBuildFlags(use_cpp11=False)) + self.build() self.thread_states_test() def setUp(self): @@ -193,7 +193,7 @@ def thread_state_after_expression_test(self): @no_debug_info_test def test_process_interrupt(self): """Test process interrupt and continue.""" - self.build(dictionary=self.getBuildFlags(use_cpp11=False)) + self.build() exe = self.getBuildArtifact("a.out") self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) diff --git a/lldb/test/API/functionalities/thread/step_out/TestThreadStepOut.py b/lldb/test/API/functionalities/thread/step_out/TestThreadStepOut.py index 5b34e74b410d..adce28dc3f7f 100644 --- a/lldb/test/API/functionalities/thread/step_out/TestThreadStepOut.py +++ b/lldb/test/API/functionalities/thread/step_out/TestThreadStepOut.py @@ -27,7 +27,7 @@ class ThreadStepOutTestCase(TestBase): @expectedFailureNetBSD def test_step_single_thread(self): """Test thread step out on one thread via command interpreter. """ - self.build(dictionary=self.getBuildFlags()) + self.build() self.step_out_test(self.step_out_single_thread_with_cmd) # Test occasionally times out on the Linux build bot @@ -44,7 +44,7 @@ def test_step_single_thread(self): @expectedFailureNetBSD def test_step_all_threads(self): """Test thread step out on all threads via command interpreter. """ - self.build(dictionary=self.getBuildFlags()) + self.build() self.step_out_test(self.step_out_all_threads_with_cmd) # Test occasionally times out on the Linux build bot @@ -60,7 +60,7 @@ def test_step_all_threads(self): @expectedFailureNetBSD def test_python(self): """Test thread step out on one thread via Python API (dwarf).""" - self.build(dictionary=self.getBuildFlags()) + self.build() self.step_out_test(self.step_out_with_python) def setUp(self): diff --git a/lldb/test/API/functionalities/thread/thread_exit/TestThreadExit.py b/lldb/test/API/functionalities/thread/thread_exit/TestThreadExit.py index 6bd55e1753fa..dcc0928d7b2e 100644 --- a/lldb/test/API/functionalities/thread/thread_exit/TestThreadExit.py +++ b/lldb/test/API/functionalities/thread/thread_exit/TestThreadExit.py @@ -26,7 +26,7 @@ def setUp(self): @skipIfWindows # This is flakey on Windows: llvm.org/pr38373 def test(self): """Test thread exit handling.""" - self.build(dictionary=self.getBuildFlags()) + self.build() exe = self.getBuildArtifact("a.out") self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) diff --git a/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py b/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py index 313aae896e13..30b6c8f89532 100644 --- a/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py +++ b/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py @@ -32,7 +32,7 @@ def setUp(self): @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr24663") def test_get_dynamic_vals(self): """Test fetching C++ dynamic values from pointers & references.""" - self.build(dictionary=self.getBuildFlags()) + self.build() exe = self.getBuildArtifact("a.out") # Create a target from the debugger. diff --git a/lldb/test/API/linux/thread/create_during_instruction_step/TestCreateDuringInstructionStep.py b/lldb/test/API/linux/thread/create_during_instruction_step/TestCreateDuringInstructionStep.py index 92384e1c5078..5b3387751cc8 100644 --- a/lldb/test/API/linux/thread/create_during_instruction_step/TestCreateDuringInstructionStep.py +++ b/lldb/test/API/linux/thread/create_during_instruction_step/TestCreateDuringInstructionStep.py @@ -19,7 +19,7 @@ class CreateDuringInstructionStepTestCase(TestBase): @expectedFailureAndroid('llvm.org/pr24737', archs=['arm']) @skipIf(oslist=["linux"], archs=["arm", "aarch64"], bugnumber="llvm.org/pr24737") def test_step_inst(self): - self.build(dictionary=self.getBuildFlags()) + self.build() exe = self.getBuildArtifact("a.out") target = self.dbg.CreateTarget(exe) self.assertTrue(target and target.IsValid(), "Target is valid") From 64af9f61c30191482979c6883e4cc63703f12010 Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Thu, 30 Dec 2021 11:44:51 +0000 Subject: [PATCH 224/992] [InstSimplify] add 'x + poison -> poison' (needed for NewGVN) --- llvm/lib/Analysis/InstructionSimplify.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 1c26ab361908..4a8dc754349b 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -620,6 +620,10 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW, if (Constant *C = foldOrCommuteConstant(Instruction::Add, Op0, Op1, Q)) return C; + // X + poison -> poison + if (isa(Op1)) + return Op1; + // X + undef -> undef if (Q.isUndefValue(Op1)) return Op1; From 72ea6fbc150a1546044ee4f246bd630d6aa67a43 Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Thu, 30 Dec 2021 12:08:07 +0000 Subject: [PATCH 225/992] [NewGVN][NFC] Add test for x + poison -> poison --- llvm/test/Transforms/NewGVN/basic.ll | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/llvm/test/Transforms/NewGVN/basic.ll b/llvm/test/Transforms/NewGVN/basic.ll index 90193bdbcda1..789ca3cfe9bb 100644 --- a/llvm/test/Transforms/NewGVN/basic.ll +++ b/llvm/test/Transforms/NewGVN/basic.ll @@ -33,3 +33,11 @@ define i64 @simplifyselect(i64 %x, i64 %y, i1 %c1, i1 %c2, i1 %zzz) { %r = add i64 %r1, %r2_eq2 ret i64 %r } + +define i8 @simplify_add_poison(i8 %x) { +; CHECK-LABEL: @simplify_add_poison( +; CHECK-NEXT: ret i8 poison +; + %r = add i8 poison, %x + ret i8 %r +} From 86825fc2fb363b807569327880c05e4b0b5393ec Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Thu, 30 Dec 2021 11:17:22 +0000 Subject: [PATCH 226/992] [LoopFlatten] Move it to a LoopPassManager In D109958 it was noticed that we could optimise the pipeline and avoid rerunning LoopSimplify/LCSSA for LoopFlatten by moving it to a LoopPassManager. Differential Revision: https://reviews.llvm.org/D110057 --- llvm/lib/Passes/PassBuilderPipelines.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index a6a36ff25402..79c42b886fbb 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -311,6 +311,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, if (EnableLoopInterchange) LPM2.addPass(LoopInterchangePass()); + if (EnableLoopFlatten) + LPM2.addPass(LoopFlattenPass()); // Do not enable unrolling in PreLinkThinLTO phase during sample PGO // because it changes IR to makes profile annotation in back compile @@ -335,8 +337,6 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, /*UseBlockFrequencyInfo=*/true)); FPM.addPass(SimplifyCFGPass()); FPM.addPass(InstCombinePass()); - if (EnableLoopFlatten) - FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass())); // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. // *All* loop passes must preserve it, in order to be able to use it. FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), @@ -485,6 +485,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, if (EnableLoopInterchange) LPM2.addPass(LoopInterchangePass()); + if (EnableLoopFlatten) + LPM2.addPass(LoopFlattenPass()); // Do not enable unrolling in PreLinkThinLTO phase during sample PGO // because it changes IR to makes profile annotation in back compile @@ -509,8 +511,6 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, /*UseBlockFrequencyInfo=*/true)); FPM.addPass(SimplifyCFGPass()); FPM.addPass(InstCombinePass()); - if (EnableLoopFlatten) - FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass())); // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass, // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. // *All* loop passes must preserve it, in order to be able to use it. @@ -1623,9 +1623,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MainFPM.addPass(DSEPass()); MainFPM.addPass(MergedLoadStoreMotionPass()); - // More loops are countable; try to optimize them. - if (EnableLoopFlatten && Level.getSpeedupLevel() > 1) - MainFPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass())); if (EnableConstraintElimination) MainFPM.addPass(ConstraintEliminationPass()); @@ -1633,6 +1630,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, LoopPassManager LPM; LPM.addPass(IndVarSimplifyPass()); LPM.addPass(LoopDeletionPass()); + if (EnableLoopFlatten && Level.getSpeedupLevel() > 1) + LPM.addPass(LoopFlattenPass()); // FIXME: Add loop interchange. // Unroll small loops and perform peeling. From e5e844b37e75efb5528b8b410f1590bf2c913bc7 Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Thu, 30 Dec 2021 12:33:27 +0000 Subject: [PATCH 227/992] [NFC] Pre-commit test for InstSimplify phi(poison) --- llvm/test/Transforms/InstSimplify/phi.ll | 80 ++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/phi.ll b/llvm/test/Transforms/InstSimplify/phi.ll index 2861cca64ffe..e75a1f95273f 100644 --- a/llvm/test/Transforms/InstSimplify/phi.ll +++ b/llvm/test/Transforms/InstSimplify/phi.ll @@ -72,3 +72,83 @@ EXIT: %w = phi i32 [%v, %A], [poison, %B] ret i32 %w } + +define i32 @undef(i1 %cond, i32 %v) { +; CHECK-LABEL: @undef( +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: B: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: EXIT: +; CHECK-NEXT: ret i32 [[V:%.*]] +; + br i1 %cond, label %A, label %B +A: + br label %EXIT +B: + br label %EXIT +EXIT: + %w = phi i32 [%v, %A], [undef, %B] + ret i32 %w +} + +define i8 @undef_poison(i1 %cond) { +; CHECK-LABEL: @undef_poison( +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: B: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: EXIT: +; CHECK-NEXT: ret i8 undef +; + br i1 %cond, label %A, label %B +A: + br label %EXIT +B: + br label %EXIT +EXIT: + %r = phi i8 [undef, %A], [poison, %B] + ret i8 %r +} + +define i8 @only_undef(i1 %cond) { +; CHECK-LABEL: @only_undef( +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: B: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: EXIT: +; CHECK-NEXT: ret i8 undef +; + br i1 %cond, label %A, label %B +A: + br label %EXIT +B: + br label %EXIT +EXIT: + %r = phi i8 [undef, %A], [undef, %B] + ret i8 %r +} + +define i8 @only_poison(i1 %cond) { +; CHECK-LABEL: @only_poison( +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: B: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: EXIT: +; CHECK-NEXT: ret i8 undef +; + br i1 %cond, label %A, label %B +A: + br label %EXIT +B: + br label %EXIT +EXIT: + %r = phi i8 [poison, %A], [poison, %B] + ret i8 %r +} From 9b8f9d33dbbcd6525ab4d582cb9abb6f98e3601c Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Thu, 30 Dec 2021 15:02:23 +0100 Subject: [PATCH 228/992] [lldb/qemu] More flexible emulator specification This small patch adds two useful improvements: - allows one to specify the emulator path as a bare filename, and have it be looked up in the PATH - allows one to leave the path empty and have the filename be derived from the architecture. --- .../Platform/QemuUser/PlatformQemuUser.cpp | 7 +++++-- .../QemuUser/PlatformQemuUserProperties.td | 2 +- lldb/test/API/qemu/TestQemuLaunch.py | 18 ++++++++++++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp index 67c9484680a4..572a5b39985e 100644 --- a/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp +++ b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp @@ -162,7 +162,10 @@ lldb::ProcessSP PlatformQemuUser::DebugProcess(ProcessLaunchInfo &launch_info, Target &target, Status &error) { Log *log = GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PLATFORM); - std::string qemu = GetGlobalProperties().GetEmulatorPath().GetPath(); + FileSpec qemu = GetGlobalProperties().GetEmulatorPath(); + if (!qemu) + qemu.SetPath(("qemu-" + GetGlobalProperties().GetArchitecture()).str()); + FileSystem::Instance().ResolveExecutableLocation(qemu); llvm::SmallString<0> socket_model, socket_path; HostInfo::GetProcessTempDir().GetPath(socket_model); @@ -171,7 +174,7 @@ lldb::ProcessSP PlatformQemuUser::DebugProcess(ProcessLaunchInfo &launch_info, llvm::sys::fs::createUniquePath(socket_model, socket_path, false); } while (FileSystem::Instance().Exists(socket_path)); - Args args({qemu, "-g", socket_path}); + Args args({qemu.GetPath(), "-g", socket_path}); args.AppendArguments(GetGlobalProperties().GetEmulatorArgs()); args.AppendArgument("--"); args.AppendArgument(launch_info.GetExecutableFile().GetPath()); diff --git a/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUserProperties.td b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUserProperties.td index 4e8fbcfd6760..c7ec4bbc6e78 100644 --- a/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUserProperties.td +++ b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUserProperties.td @@ -8,7 +8,7 @@ let Definition = "platformqemuuser" in { def EmulatorPath: Property<"emulator-path", "FileSpec">, Global, DefaultStringValue<"">, - Desc<"Path to the emulator binary.">; + Desc<"Path to the emulator binary. If the path does not contain a directory separator, the filename is looked up in the PATH environment variable. If empty, the filename is derived from the architecture setting.">; def EmulatorArgs: Property<"emulator-args", "Args">, Global, DefaultStringValue<"">, diff --git a/lldb/test/API/qemu/TestQemuLaunch.py b/lldb/test/API/qemu/TestQemuLaunch.py index 2e817ede4154..01c4143c9e77 100644 --- a/lldb/test/API/qemu/TestQemuLaunch.py +++ b/lldb/test/API/qemu/TestQemuLaunch.py @@ -154,6 +154,24 @@ def test_stdio_redirect(self): state = json.load(s) self.assertEqual(state["stdin"], "STDIN CONTENT") + def test_find_in_PATH(self): + emulator = self.getBuildArtifact("qemu-" + self.getArchitecture()) + os.rename(self.getBuildArtifact("qemu.py"), emulator) + self.set_emulator_setting("emulator-path", "''") + + original_path = os.environ["PATH"] + os.environ["PATH"] = (self.getBuildDir() + + self.platformContext.shlib_path_separator + original_path) + def cleanup(): + os.environ["PATH"] = original_path + + self.addTearDownHook(cleanup) + state = self._run_and_get_state() + + self.assertEqual(state["program"], self.getBuildArtifact()) + self.assertEqual(state["args"], + ["dump:" + self.getBuildArtifact("state.log")]) + def test_bad_emulator_path(self): self.set_emulator_setting("emulator-path", self.getBuildArtifact("nonexistent.file")) From 84b285d6eb9d52f467fa710f2c9f490a0584c0b2 Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Thu, 30 Dec 2021 13:25:57 +0000 Subject: [PATCH 229/992] [GVN] Set phi entries of unreachable predecessors to poison instead of undef This matches NewGVN's behavior. --- llvm/lib/Transforms/Scalar/GVN.cpp | 6 +++--- llvm/test/Transforms/GVN/assume-equal.ll | 2 +- llvm/test/Transforms/GVN/calls-nonlocal.ll | 2 +- llvm/test/Transforms/GVN/condprop.ll | 2 +- llvm/test/Transforms/GVN/equality-assume.ll | 2 +- llvm/test/Transforms/GVN/preserve-memoryssa.ll | 6 +++--- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 00506fb86006..ee7a2e4aed25 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1769,7 +1769,7 @@ bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) { // Insert a new store to null instruction before the load to indicate that // this code is not reachable. FIXME: We could insert unreachable // instruction directly because we can modify the CFG. - auto *NewS = new StoreInst(UndefValue::get(Int8Ty), + auto *NewS = new StoreInst(PoisonValue::get(Int8Ty), Constant::getNullValue(Int8Ty->getPointerTo()), IntrinsicI); if (MSSAU) { @@ -2991,12 +2991,12 @@ void GVNPass::addDeadBlock(BasicBlock *BB) { } } - // Now undef the incoming values from the dead predecessors. + // Now poison the incoming values from the dead predecessors. for (BasicBlock *P : predecessors(B)) { if (!DeadBlocks.count(P)) continue; for (PHINode &Phi : B->phis()) { - Phi.setIncomingValueForBlock(P, UndefValue::get(Phi.getType())); + Phi.setIncomingValueForBlock(P, PoisonValue::get(Phi.getType())); if (MD) MD->invalidateCachedPointerInfo(&Phi); } diff --git a/llvm/test/Transforms/GVN/assume-equal.ll b/llvm/test/Transforms/GVN/assume-equal.ll index 941f14ce402c..b51fded5bd20 100644 --- a/llvm/test/Transforms/GVN/assume-equal.ll +++ b/llvm/test/Transforms/GVN/assume-equal.ll @@ -217,7 +217,7 @@ entry: bb2: ; CHECK-NOT: %cmp3 = %cmp3 = icmp eq i32 %p, 43 - ; CHECK: store i8 undef, i8* null + ; CHECK: store i8 poison, i8* null call void @llvm.assume(i1 %cmp3) ret i32 15 bb3: diff --git a/llvm/test/Transforms/GVN/calls-nonlocal.ll b/llvm/test/Transforms/GVN/calls-nonlocal.ll index 81057dd6cc99..952f6748ef6f 100644 --- a/llvm/test/Transforms/GVN/calls-nonlocal.ll +++ b/llvm/test/Transforms/GVN/calls-nonlocal.ll @@ -68,7 +68,7 @@ return: ; preds = %bb27 ; CHECK: bb26: ; CHECK: br label %bb27 ; CHECK: bb27: -; CHECK: %tmp.0 = phi i32 [ 11, %bb26 ], [ undef, %bb24 ], [ undef, %bb14 ], [ %g, %bb ] +; CHECK: %tmp.0 = phi i32 [ 11, %bb26 ], [ poison, %bb24 ], [ poison, %bb14 ], [ %g, %bb ] ; CHECK: ret i32 %tmp.0 ; CHECK: } diff --git a/llvm/test/Transforms/GVN/condprop.ll b/llvm/test/Transforms/GVN/condprop.ll index 04ecb3c5592c..8a730aef847d 100644 --- a/llvm/test/Transforms/GVN/condprop.ll +++ b/llvm/test/Transforms/GVN/condprop.ll @@ -31,7 +31,7 @@ define i32 @test1() nounwind { ; CHECK: bb7: ; CHECK-NEXT: br label [[BB8]] ; CHECK: bb8: -; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ [[TMP0]], [[BB7]] ], [ undef, [[BB6]] ], [ undef, [[BB4]] ], [ 4, [[BB2]] ], [ 5, [[BB]] ] +; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ [[TMP0]], [[BB7]] ], [ poison, [[BB6]] ], [ poison, [[BB4]] ], [ 4, [[BB2]] ], [ 5, [[BB]] ] ; CHECK-NEXT: ret i32 [[DOT0]] ; entry: diff --git a/llvm/test/Transforms/GVN/equality-assume.ll b/llvm/test/Transforms/GVN/equality-assume.ll index ee2cb06c158d..ee55d5d463f4 100644 --- a/llvm/test/Transforms/GVN/equality-assume.ll +++ b/llvm/test/Transforms/GVN/equality-assume.ll @@ -149,7 +149,7 @@ merge: define i32 @conflicting_constants(i32* %p) { ; CHECK-LABEL: @conflicting_constants( -; CHECK-NEXT: store i8 undef, i8* null +; CHECK-NEXT: store i8 poison, i8* null ; CHECK-NEXT: br i1 undef, label [[TAKEN:%.*]], label [[MERGE:%.*]] ; CHECK: taken: ; CHECK-NEXT: br label [[MERGE]] diff --git a/llvm/test/Transforms/GVN/preserve-memoryssa.ll b/llvm/test/Transforms/GVN/preserve-memoryssa.ll index b78aba2238e4..282dbc0a8b28 100644 --- a/llvm/test/Transforms/GVN/preserve-memoryssa.ll +++ b/llvm/test/Transforms/GVN/preserve-memoryssa.ll @@ -97,7 +97,7 @@ for.body.i22: define void @test_assume_false_to_store_undef_1(i32* %ptr) { ; CHECK-LABEL: @test_assume_false_to_store_undef_1( ; CHECK-NEXT: store i32 10, i32* [[PTR:%.*]], align 4 -; CHECK-NEXT: store i8 undef, i8* null, align 1 +; CHECK-NEXT: store i8 poison, i8* null, align 1 ; CHECK-NEXT: call void @f() ; CHECK-NEXT: ret void ; @@ -113,7 +113,7 @@ define i32 @test_assume_false_to_store_undef_2(i32* %ptr, i32* %ptr.2) { ; CHECK-LABEL: @test_assume_false_to_store_undef_2( ; CHECK-NEXT: store i32 10, i32* [[PTR:%.*]], align 4 ; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR_2:%.*]], align 4 -; CHECK-NEXT: store i8 undef, i8* null, align 1 +; CHECK-NEXT: store i8 poison, i8* null, align 1 ; CHECK-NEXT: call void @f() ; CHECK-NEXT: ret i32 [[LV]] ; @@ -130,7 +130,7 @@ define i32 @test_assume_false_to_store_undef_3(i32* %ptr, i32* %ptr.2) { ; CHECK-LABEL: @test_assume_false_to_store_undef_3( ; CHECK-NEXT: store i32 10, i32* [[PTR:%.*]], align 4 ; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR_2:%.*]], align 4 -; CHECK-NEXT: store i8 undef, i8* null, align 1 +; CHECK-NEXT: store i8 poison, i8* null, align 1 ; CHECK-NEXT: ret i32 [[LV]] ; store i32 10, i32* %ptr From 2e69f4f0126d49a1199703b23050ad71024fb52c Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 30 Dec 2021 12:41:18 +0000 Subject: [PATCH 230/992] [mlir][vector] Fix illegal vector.transfer + tensor.insert/extract_slice folding vector.transfer operations do not have rank-reducing semantics. Bail on illegal rank-reduction: we need to check that the rank-reduced dims are exactly the leading dims. I.e. the following is illegal: ``` %0 = vector.transfer_write %v, %t[0,0], %cst : vector<2x4xf32>, tensor<2x4xf32> %1 = tensor.insert_slice %0 into %tt[0,0,0][2,1,4][1,1,1] : tensor<2x4xf32> into tensor<2x1x4xf32> ``` Cannot fold into: ``` %0 = vector.transfer_write %v, %t[0,0,0], %cst : vector<2x4xf32>, tensor<2x1x4xf32> ``` For this, check the trailing `vectorRank` dims of the insert_slice result tensor match the trailing dims of the inferred result tensor. Differential Revision: https://reviews.llvm.org/D116409 --- mlir/lib/Dialect/Vector/VectorOps.cpp | 59 +++++++++++++++++++++- mlir/test/Dialect/Vector/canonicalize.mlir | 26 ++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 8c724f8ef0be..3a65d0e93dfd 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -24,6 +24,7 @@ #include "mlir/IR/BlockAndValueMapping.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/PatternMatch.h" @@ -2783,8 +2784,35 @@ struct FoldExtractSliceIntoTransferRead if (!extractOp.hasUnitStride()) return failure(); + // Bail on illegal rank-reduction: we need to check that the rank-reduced + // dims are exactly the leading dims. I.e. the following is illegal: + // ``` + // %0 = tensor.extract_slice %t[0,0,0][2,1,4][1,1,1] : + // tensor<2x1x4xf32> to tensor<2x4xf32> + // %1 = vector.transfer_read %0[0,0], %cst : + // tensor<2x4xf32>, vector<2x4xf32> + // ``` + // + // Cannot fold into: + // ``` + // %0 = vector.transfer_read %t[0,0,0], %cst : + // tensor<2x1x4xf32>, vector<2x4xf32> + // ``` + // For this, check the trailing `vectorRank` dims of the extract_slice + // result tensor match the trailing dims of the inferred result tensor. int64_t rankReduced = extractOp.getSourceType().getRank() - extractOp.getType().getRank(); + int64_t vectorRank = xferOp.getVectorType().getRank(); + RankedTensorType inferredDestTensorType = + tensor::ExtractSliceOp::inferResultType( + extractOp.getSourceType(), extractOp.getMixedOffsets(), + extractOp.getMixedSizes(), extractOp.getMixedStrides()); + auto actualDestTensorShape = extractOp.getType().getShape(); + if (rankReduced > 0 && + actualDestTensorShape.take_back(vectorRank) != + inferredDestTensorType.getShape().take_back(vectorRank)) + return failure(); + SmallVector newIndices; // In case this is a rank-reducing ExtractSliceOp, copy rank-reduced // indices first. @@ -3168,7 +3196,7 @@ struct FoldInsertSliceIntoTransferWrite if (xferOp.mask()) return failure(); // Fold only if the TransferWriteOp completely overwrites the `source` with - // a vector. I.e., the result of the TransferWriteOp is a new tensor who's + // a vector. I.e., the result of the TransferWriteOp is a new tensor whose // content is the data of the vector. if (!llvm::equal(xferOp.getVectorType().getShape(), xferOp.getShapedType().getShape())) @@ -3176,6 +3204,35 @@ struct FoldInsertSliceIntoTransferWrite if (!xferOp.permutation_map().isIdentity()) return failure(); + // Bail on illegal rank-reduction: we need to check that the rank-reduced + // dims are exactly the leading dims. I.e. the following is illegal: + // ``` + // %0 = vector.transfer_write %v, %t[0,0], %cst : + // vector<2x4xf32>, tensor<2x4xf32> + // %1 = tensor.insert_slice %0 into %tt[0,0,0][2,1,4][1,1,1] : + // tensor<2x4xf32> into tensor<2x1x4xf32> + // ``` + // + // Cannot fold into: + // ``` + // %0 = vector.transfer_write %v, %t[0,0,0], %cst : + // vector<2x4xf32>, tensor<2x1x4xf32> + // ``` + // For this, check the trailing `vectorRank` dims of the insert_slice result + // tensor match the trailing dims of the inferred result tensor. + int64_t rankReduced = + insertOp.getType().getRank() - insertOp.getSourceType().getRank(); + int64_t vectorRank = xferOp.getVectorType().getRank(); + RankedTensorType inferredSourceTensorType = + tensor::ExtractSliceOp::inferResultType( + insertOp.getType(), insertOp.getMixedOffsets(), + insertOp.getMixedSizes(), insertOp.getMixedStrides()); + auto actualSourceTensorShape = insertOp.getSourceType().getShape(); + if (rankReduced > 0 && + actualSourceTensorShape.take_back(vectorRank) != + inferredSourceTensorType.getShape().take_back(vectorRank)) + return failure(); + SmallVector indices = getValueOrCreateConstantIndexOp( rewriter, insertOp.getLoc(), insertOp.getMixedOffsets()); SmallVector inBounds(xferOp.getTransferRank(), true); diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir index 9b496f857b1a..faf801fe534a 100644 --- a/mlir/test/Dialect/Vector/canonicalize.mlir +++ b/mlir/test/Dialect/Vector/canonicalize.mlir @@ -995,6 +995,20 @@ func @transfer_read_of_extract_slice_rank_reducing(%t : tensor, %s1 : // ----- +// CHECK-LABEL: func @transfer_read_of_extract_slice_illegal_rank_reducing( +// CHECK: extract_slice +// CHECK: vector.transfer_read +func @transfer_read_of_extract_slice_illegal_rank_reducing(%t : tensor, %s1 : index, %s2 : index) -> vector<5x6xf32> { + %c3 = arith.constant 3 : index + %c4 = arith.constant 4 : index + %cst = arith.constant 0.0 : f32 + %0 = tensor.extract_slice %t[5, %s1, 6] [%s2, 1, 12] [1, 1, 1] : tensor to tensor + %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true, true]} : tensor, vector<5x6xf32> + return %1 : vector<5x6xf32> +} + +// ----- + // CHECK-LABEL: func @insert_slice_of_transfer_write( // CHECK-SAME: %[[t1:.*]]: tensor, %[[v:.*]]: vector<5x6xf32>, %[[s:.*]]: index // CHECK: %[[c3:.*]] = arith.constant 3 : index @@ -1009,6 +1023,18 @@ func @insert_slice_of_transfer_write(%t1 : tensor, %v : vector<5x6xf32 // ----- +// CHECK-LABEL: func @insert_slice_of_transfer_write_illegal_rank_extending( +// CHECK: vector.transfer_write +// CHECK: insert_slice +func @insert_slice_of_transfer_write_illegal_rank_extending(%t1 : tensor, %v : vector<5x6xf32>, %s : index, %t2 : tensor<5x6xf32>) -> tensor { + %c0 = arith.constant 0 : index + %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<5x6xf32> + %1 = tensor.insert_slice %0 into %t1[4, 3, %s] [5, 1, 6] [1, 1, 1] : tensor<5x6xf32> into tensor + return %1 : tensor +} + +// ----- + // CHECK-LABEL: func @insert_slice_of_transfer_write_rank_extending( // CHECK-SAME: %[[t1:.*]]: tensor, %[[v:.*]]: vector<5x6xf32>, %[[s:.*]]: index // CHECK-DAG: %[[c3:.*]] = arith.constant 3 : index From 7128bb61fb59bd1d170865b5a5f0fe8fe0c00491 Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Thu, 30 Dec 2021 15:41:44 +0000 Subject: [PATCH 231/992] [NFC] Pre-commit NewGVN tests for wrong phi(undef, X) optimization --- .../Transforms/NewGVN/phi-edge-handling.ll | 108 ++++++++++++++++-- 1 file changed, 100 insertions(+), 8 deletions(-) diff --git a/llvm/test/Transforms/NewGVN/phi-edge-handling.ll b/llvm/test/Transforms/NewGVN/phi-edge-handling.ll index 4ac78a557061..78e126aadc7d 100644 --- a/llvm/test/Transforms/NewGVN/phi-edge-handling.ll +++ b/llvm/test/Transforms/NewGVN/phi-edge-handling.ll @@ -5,7 +5,7 @@ ;; Block 6 is reachable, but edge 6->4 is not ;; This means the phi value is undef, not 0 ; Function Attrs: ssp uwtable -define i16 @hoge() local_unnamed_addr #0 align 2 { +define i16 @hoge() { ; CHECK-LABEL: @hoge( ; CHECK-NEXT: bb: ; CHECK-NEXT: switch i8 undef, label [[BB7:%.*]] [ @@ -21,7 +21,7 @@ define i16 @hoge() local_unnamed_addr #0 align 2 { ; CHECK: bb4: ; CHECK-NEXT: ret i16 undef ; CHECK: bb6: -; CHECK-NEXT: br i1 true, label [[BB3:%.*]], label [[BB4]], !llvm.loop !1 +; CHECK-NEXT: br i1 true, label [[BB3:%.*]], label [[BB4]] ; CHECK: bb7: ; CHECK-NEXT: unreachable ; @@ -45,16 +45,108 @@ bb4: ; preds = %bb6, %bb2 ret i16 %tmp bb6: ; preds = %bb4 - br i1 true, label %bb3, label %bb4, !llvm.loop !1 + br i1 true, label %bb3, label %bb4 bb7: ; preds = %bb unreachable } -attributes #0 = { ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +define i8 @only_undef(i1 %cond) { +; CHECK-LABEL: @only_undef( +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: B: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: EXIT: +; CHECK-NEXT: ret i8 undef +; + br i1 %cond, label %A, label %B +A: + br label %EXIT +B: + br label %EXIT +EXIT: + %r = phi i8 [undef, %A], [undef, %B] + ret i8 %r +} + +define i8 @only_poison(i1 %cond) { +; CHECK-LABEL: @only_poison( +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: B: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: EXIT: +; CHECK-NEXT: ret i8 poison +; + br i1 %cond, label %A, label %B +A: + br label %EXIT +B: + br label %EXIT +EXIT: + %r = phi i8 [poison, %A], [poison, %B] + ret i8 %r +} + +define i8 @undef_poison(i1 %cond) { +; CHECK-LABEL: @undef_poison( +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: B: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: EXIT: +; CHECK-NEXT: ret i8 undef +; + br i1 %cond, label %A, label %B +A: + br label %EXIT +B: + br label %EXIT +EXIT: + %r = phi i8 [undef, %A], [poison, %B] + ret i8 %r +} -!llvm.ident = !{!0} +define i8 @value_undef(i1 %cond, i8 %v) { +; CHECK-LABEL: @value_undef( +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: B: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: EXIT: +; CHECK-NEXT: ret i8 [[V:%.*]] +; + br i1 %cond, label %A, label %B +A: + br label %EXIT +B: + br label %EXIT +EXIT: + %r = phi i8 [undef, %A], [%v, %B] + ret i8 %r +} -!0 = !{!"clang version 5.0.0"} -!1 = distinct !{!1, !2} -!2 = !{!"llvm.loop.unroll.disable"} +define i8 @value_poison(i1 %cond, i8 %v) { +; CHECK-LABEL: @value_poison( +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: B: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: EXIT: +; CHECK-NEXT: ret i8 [[V:%.*]] +; + br i1 %cond, label %A, label %B +A: + br label %EXIT +B: + br label %EXIT +EXIT: + %r = phi i8 [poison, %A], [%v, %B] + ret i8 %r +} From 95c7dd8810b0bc93c0f76a285f1bcc3bd73f6a50 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 30 Dec 2021 07:57:11 -0800 Subject: [PATCH 232/992] Revert "[Hexagon] Don't build two halves of HVX vector in parallel" This reverts commit ba07f300c6d67a2c6dde8eef216b7a77ac4600bb. A build-vector sequence is made of pairs: rotate+insert. When constructing a single vector, this results in a chain of 2*N instructions. The rotate operation is a permute operation, but the insert uses a multiplication resource: insert and rotate can execute in the same cycle, but obviously they cannot operate on the same vector. The original halving idea is still beneficial since it does allow for insert/rotate overlap, and for hiding insert's latency. --- .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 67 ++++++++++++++----- .../Hexagon/autohvx/isel-build-vector.ll | 24 ++++--- 2 files changed, 63 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index a151f3de170a..569ad8b337db 100755 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -659,10 +659,10 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef Values, // Find most common element to initialize vector with. This is to avoid // unnecessary vinsert/valign for cases where the same value is present // many times. Creates a histogram of the vector's elements to find the - // most common element. + // most common element n. assert(4*Words.size() == Subtarget.getVectorLength()); - SmallVector VecHist(32); - int MaxAt = 0; + int VecHist[32]; + int n = 0; for (unsigned i = 0; i != NumWords; ++i) { VecHist[i] = 0; if (Words[i].isUndef()) @@ -671,29 +671,60 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef Values, if (Words[i] == Words[j]) VecHist[i]++; - if (VecHist[i] > VecHist[MaxAt]) - MaxAt = i; + if (VecHist[i] > VecHist[n]) + n = i; } - // If each value is different, don't do splat, just insert them one by one. - bool NoSplat = VecHist[MaxAt] <= 1; - SDValue RotV = NoSplat - ? DAG.getUNDEF(VecTy) - : DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[MaxAt]); - int Rn = 0; - for (unsigned i = 0; i != NumWords; ++i) { + SDValue HalfV = getZero(dl, VecTy, DAG); + if (VecHist[n] > 1) { + SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]); + HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy, + {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)}); + } + SDValue HalfV0 = HalfV; + SDValue HalfV1 = HalfV; + + // Construct two halves in parallel, then or them together. Rn and Rm count + // number of rotations needed before the next element. One last rotation is + // performed post-loop to position the last element. + int Rn = 0, Rm = 0; + SDValue Sn, Sm; + SDValue N = HalfV0; + SDValue M = HalfV1; + for (unsigned i = 0; i != NumWords/2; ++i) { + // Rotate by element count since last insertion. - if (NoSplat || Words[i] != Words[MaxAt]) { - RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, - {RotV, DAG.getConstant(Rn, dl, MVT::i32)}); - RotV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, Words[i]}); + if (Words[i] != Words[n] || VecHist[n] <= 1) { + Sn = DAG.getConstant(Rn, dl, MVT::i32); + HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn}); + N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, + {HalfV0, Words[i]}); Rn = 0; } + if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) { + Sm = DAG.getConstant(Rm, dl, MVT::i32); + HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm}); + M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, + {HalfV1, Words[i+NumWords/2]}); + Rm = 0; + } Rn += 4; + Rm += 4; } // Perform last rotation. - return DAG.getNode(HexagonISD::VROR, dl, VecTy, - {RotV, DAG.getConstant(Rn, dl, MVT::i32)}); + Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32); + Sm = DAG.getConstant(Rm, dl, MVT::i32); + HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn}); + HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm}); + + SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0); + SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1); + + SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1}); + + SDValue OutV = + DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV); + return OutV; } SDValue diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll index 159001c11301..e6b8445f5121 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll @@ -6,31 +6,35 @@ define <32 x i32> @fred(i32 %a0) #0 { ; CHECK: .cfi_startproc ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { -; CHECK-NEXT: r3:2 = combine(#76,#7) -; CHECK-NEXT: r1 = #12 -; CHECK-NEXT: r4 = #9 +; CHECK-NEXT: r3:2 = combine(#20,#9) +; CHECK-NEXT: v0 = vxor(v0,v0) +; CHECK-NEXT: r1 = #24 +; CHECK-NEXT: r4 = #12 ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0 = vror(v0,r1) +; CHECK-NEXT: v1 = vror(v0,r1) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0.w = vinsert(r2) -; CHECK-NEXT: r2 = #20 +; CHECK-NEXT: v1.w = vinsert(r2) +; CHECK-NEXT: r4 = #7 +; CHECK-NEXT: r2 = #116 +; CHECK-NEXT: v0 = vror(v0,r4) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0 = vror(v0,r3) +; CHECK-NEXT: v0.w = vinsert(r4) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0.w = vinsert(r4) +; CHECK-NEXT: v1 = vror(v1,r3) ; CHECK-NEXT: } ; CHECK-NEXT: { +; CHECK-NEXT: v1.w = vinsert(r0) ; CHECK-NEXT: v0 = vror(v0,r2) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0.w = vinsert(r0) +; CHECK-NEXT: v1 = vror(v1,r3) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0 = vror(v0,r2) +; CHECK-NEXT: v0 = vor(v0,v1) ; CHECK-NEXT: jumpr r31 ; CHECK-NEXT: } %v0 = insertelement <32 x i32> undef, i32 undef, i32 0 From 23423638cc6901a292d52f6aee8e5042fffb89b2 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 30 Dec 2021 08:40:49 -0800 Subject: [PATCH 233/992] [Hexagon] Handle HVX/FP shuffles, insertion and extraction Co-authored-by: Anirudh Sundar Subramaniam --- .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 20 +++++++++++++++++++ llvm/test/CodeGen/Hexagon/autohvx/hfinsert.ll | 18 +++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/hfinsert.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 569ad8b337db..6c7e18a3a207 100755 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -91,15 +91,26 @@ HexagonTargetLowering::initializeHVXLowering() { if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) { + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64f16, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64f16, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom); + // Handle ISD::BUILD_VECTOR for v32f32 in a custom way to generate vsplat setOperationAction(ISD::BUILD_VECTOR, MVT::v32f32, Custom); // BUILD_VECTOR with f16 operands cannot be promoted without // promoting the result, so lower the node to vsplat or constant pool setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::f16, Custom); setOperationAction(ISD::SPLAT_VECTOR, MVT::f16, Custom); setOperationAction(ISD::SPLAT_VECTOR, MVT::v64f16, Legal); setOperationAction(ISD::SPLAT_VECTOR, MVT::v32f32, Legal); + // Vector shuffle is always promoted to ByteV and a bitcast to f16 is + // generated. + setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV); + setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW); + setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV); // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- // independent) handling of it would convert it to a load, which is @@ -1483,6 +1494,7 @@ SDValue HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); + MVT VecTy = ty(Op); SDValue VecV = Op.getOperand(0); SDValue ValV = Op.getOperand(1); SDValue IdxV = Op.getOperand(2); @@ -1490,6 +1502,14 @@ HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) if (ElemTy == MVT::i1) return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG); + if (ElemTy == MVT::f16) { + SDValue T0 = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, + tyVector(VecTy, MVT::i16), + DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV), + DAG.getBitcast(MVT::i16, ValV), IdxV); + return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0); + } + return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG); } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/hfinsert.ll b/llvm/test/CodeGen/Hexagon/autohvx/hfinsert.ll new file mode 100644 index 000000000000..ffca572e4be8 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/hfinsert.ll @@ -0,0 +1,18 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check that we generate a proper vinsert instruction for f16 types. +; CHECK: vinsert +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define half* @fred(half* %v0) local_unnamed_addr #0 { +b0: + %t1 = bitcast half* %v0 to <64 x half>* + %v1 = load <64 x half>, <64 x half>* %t1, align 2 + %v2 = insertelement <64 x half> %v1, half 0xH4170, i32 17 + store <64 x half> %v2, <64 x half>* %t1, align 2 + %t2 = bitcast <64 x half>* %t1 to half* + ret half* %t2 +} + +attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv69" "target-features"="+hvx-length128b,+hvxv69,+v69,+hvx-qfloat,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } From 2e3e0a5c288041745f88a06e66a831c236a3bb1f Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 30 Dec 2021 11:29:27 -0500 Subject: [PATCH 234/992] [InstCombine] fold opposite shifts around an add ((X << C) + Y) >>u C --> (X + (Y >>u C)) & (-1 >>u C) https://alive2.llvm.org/ce/z/DY9DPg This replaces a shift with an 'and', and in the case where the add has a constant operand, it eliminates both shifts. As noted in the TODO comment, we already have this fold when the shifts are in the opposite order (and that code handles bitwise logic ops too). Fixes #52851 --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 6 +- .../InstCombine/InstCombineShifts.cpp | 16 +- llvm/test/Transforms/InstCombine/lshr.ll | 22 +- .../LoopVectorize/X86/gather_scatter.ll | 346 +++++++++--------- 4 files changed, 199 insertions(+), 191 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 324b7dcfb3ac..61be99d2b9f0 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -84,13 +84,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { /// Estimate a cost of Broadcast as an extract and sequence of insert /// operations. - InstructionCost getBroadcastShuffleOverhead(FixedVectorType *VTy) { + InstructionCost getBroadcastShuffleOverhead(VectorType *VTy) { InstructionCost Cost = 0; // Broadcast cost is equal to the cost of extracting the zero'th element // plus the cost of inserting it into every element of the result vector. Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0); - for (int i = 0, e = VTy->getNumElements(); i < e; ++i) { + for (int i = 0, e = VTy->getElementCount().getKnownMinValue(); i < e; ++i) { Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i); } return Cost; @@ -875,7 +875,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { switch (improveShuffleKindFromMask(Kind, Mask)) { case TTI::SK_Broadcast: - return getBroadcastShuffleOverhead(cast(Tp)); + return getBroadcastShuffleOverhead(Tp); case TTI::SK_Select: case TTI::SK_Splice: case TTI::SK_Reverse: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 06421d553915..0ade25f76825 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -1057,6 +1057,21 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask)); } + // ((X << C) + Y) >>u C --> (X + (Y >>u C)) & (-1 >>u C) + // TODO: Consolidate with the more general transform that starts from shl + // (the shifts are in the opposite order). + Value *Y; + if (match(Op0, + m_OneUse(m_c_Add(m_OneUse(m_Shl(m_Value(X), m_Specific(Op1))), + m_Value(Y))))) { + Value *NewLshr = Builder.CreateLShr(Y, Op1); + Value *NewAdd = Builder.CreateAdd(NewLshr, X); + unsigned Op1Val = C->getLimitedValue(BitWidth); + APInt Bits = APInt::getLowBitsSet(BitWidth, BitWidth - Op1Val); + Constant *Mask = ConstantInt::get(Ty, Bits); + return BinaryOperator::CreateAnd(NewAdd, Mask); + } + if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && (!Ty->isIntegerTy() || shouldChangeType(Ty, X->getType()))) { assert(ShAmtC < X->getType()->getScalarSizeInBits() && @@ -1094,7 +1109,6 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { } } - Value *Y; if (ShAmtC == BitWidth - 1) { // lshr i32 or(X,-X), 31 --> zext (X != 0) if (match(Op0, m_OneUse(m_c_Or(m_Neg(m_Value(X)), m_Deferred(X))))) diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll index bfad4bcd27ab..a158f5ecba92 100644 --- a/llvm/test/Transforms/InstCombine/lshr.ll +++ b/llvm/test/Transforms/InstCombine/lshr.ll @@ -138,9 +138,8 @@ define i8 @lshr_cttz_zero_is_undef_vec(<2 x i8> %x) { define i8 @lshr_exact(i8 %x) { ; CHECK-LABEL: @lshr_exact( -; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[X:%.*]], 2 -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[SHL]], 4 -; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i8 [[ADD]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], 1 +; CHECK-NEXT: [[LSHR:%.*]] = and i8 [[TMP1]], 63 ; CHECK-NEXT: ret i8 [[LSHR]] ; %shl = shl i8 %x, 2 @@ -151,9 +150,8 @@ define i8 @lshr_exact(i8 %x) { define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_exact_splat_vec( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[SHL]], -; CHECK-NEXT: [[LSHR:%.*]] = lshr exact <2 x i8> [[ADD]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[LSHR:%.*]] = and <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i8> [[LSHR]] ; %shl = shl <2 x i8> %x, @@ -164,9 +162,9 @@ define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) { define i8 @shl_add(i8 %x, i8 %y) { ; CHECK-LABEL: @shl_add( -; CHECK-NEXT: [[L:%.*]] = shl i8 [[X:%.*]], 2 -; CHECK-NEXT: [[A:%.*]] = add i8 [[L]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = lshr i8 [[A]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[Y:%.*]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = and i8 [[TMP2]], 63 ; CHECK-NEXT: ret i8 [[R]] ; %l = shl i8 %x, 2 @@ -178,9 +176,9 @@ define i8 @shl_add(i8 %x, i8 %y) { define <2 x i8> @shl_add_commute_vec(<2 x i8> %x, <2 x i8> %py) { ; CHECK-LABEL: @shl_add_commute_vec( ; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], [[PY]] -; CHECK-NEXT: [[L:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[Y]], [[L]] -; CHECK-NEXT: [[R:%.*]] = lshr <2 x i8> [[A]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[Y]], +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP2]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; %y = mul <2 x i8> %py, %py ; thwart complexity-based canonicalization diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index fbdd6b539d21..c1363547ad98 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -1406,25 +1406,23 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; AVX512: for.body.lr.ph: ; AVX512-NEXT: [[MUL:%.*]] = sub nsw i32 0, [[D]] ; AVX512-NEXT: [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64 -; AVX512-NEXT: [[TMP0:%.*]] = shl nsw i64 [[IDX_EXT]], 2 -; AVX512-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], -4 -; AVX512-NEXT: [[TMP2:%.*]] = lshr exact i64 [[TMP1]], 2 -; AVX512-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 -; AVX512-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 60 +; AVX512-NEXT: [[TMP0:%.*]] = add nsw i64 [[IDX_EXT]], 4611686018427387903 +; AVX512-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4611686018427387903 +; AVX512-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; AVX512-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 15 ; AVX512-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; AVX512: vector.memcheck: -; AVX512-NEXT: [[TMP4:%.*]] = shl nsw i64 [[IDX_EXT]], 2 -; AVX512-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], -4 -; AVX512-NEXT: [[TMP6:%.*]] = lshr exact i64 [[TMP5]], 2 -; AVX512-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP5]], 2 -; AVX512-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], 2 -; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[DEST:%.*]], i64 [[TMP8]] -; AVX512-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP6]], 1 -; AVX512-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP9]] +; AVX512-NEXT: [[TMP3:%.*]] = add nsw i64 [[IDX_EXT]], 4611686018427387903 +; AVX512-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 4611686018427387903 +; AVX512-NEXT: [[TMP5:%.*]] = shl i64 [[TMP3]], 4 +; AVX512-NEXT: [[TMP6:%.*]] = or i64 [[TMP5]], 2 +; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[DEST:%.*]], i64 [[TMP6]] +; AVX512-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP4]], 1 +; AVX512-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP7]] ; AVX512-NEXT: [[SCEVGEP6:%.*]] = getelementptr float, float* [[PTR]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 1 -; AVX512-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], [[IDX_EXT]] -; AVX512-NEXT: [[SCEVGEP8:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP11]] +; AVX512-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 1 +; AVX512-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[IDX_EXT]] +; AVX512-NEXT: [[SCEVGEP8:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP9]] ; AVX512-NEXT: [[BOUND0:%.*]] = icmp ugt float* [[SCEVGEP4]], [[DEST]] ; AVX512-NEXT: [[BOUND1:%.*]] = icmp ugt float* [[SCEVGEP]], [[PTR]] ; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] @@ -1434,117 +1432,117 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]] ; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER]], label [[VECTOR_PH:%.*]] ; AVX512: vector.ph: -; AVX512-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 9223372036854775792 +; AVX512-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775792 ; AVX512-NEXT: [[IND_END:%.*]] = getelementptr float, float* [[PTR]], i64 [[N_VEC]] -; AVX512-NEXT: [[TMP12:%.*]] = shl i64 [[N_VEC]], 4 -; AVX512-NEXT: [[IND_END14:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP12]] -; AVX512-NEXT: [[TMP13:%.*]] = add nsw i64 [[N_VEC]], -16 -; AVX512-NEXT: [[TMP14:%.*]] = lshr exact i64 [[TMP13]], 4 -; AVX512-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[TMP14]], 1 -; AVX512-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP15]], 7 -; AVX512-NEXT: [[TMP16:%.*]] = icmp ult i64 [[TMP13]], 112 -; AVX512-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] +; AVX512-NEXT: [[TMP10:%.*]] = shl i64 [[N_VEC]], 4 +; AVX512-NEXT: [[IND_END14:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP10]] +; AVX512-NEXT: [[TMP11:%.*]] = add nsw i64 [[N_VEC]], -16 +; AVX512-NEXT: [[TMP12:%.*]] = lshr exact i64 [[TMP11]], 4 +; AVX512-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP12]], 1 +; AVX512-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP13]], 7 +; AVX512-NEXT: [[TMP14:%.*]] = icmp ult i64 [[TMP11]], 112 +; AVX512-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] ; AVX512: vector.ph.new: -; AVX512-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP15]], 2305843009213693944 +; AVX512-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP13]], 2305843009213693944 ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX512: vector.body: ; AVX512-NEXT: [[POINTER_PHI:%.*]] = phi float* [ [[DEST]], [[VECTOR_PH_NEW]] ], [ [[PTR_IND_7:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_7:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[NEXT_GEP:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP17:%.*]] = getelementptr float, float* [[POINTER_PHI]], <16 x i64> -; AVX512-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP19:%.*]] = bitcast float* [[TMP18]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, <16 x float>* [[TMP19]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD]], <16 x float*> [[TMP17]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP20:%.*]] = bitcast float* [[NEXT_GEP]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15:%.*]] = load <16 x float>, <16 x float>* [[TMP20]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP17]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15]], <16 x float*> [[TMP21]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP15:%.*]] = getelementptr float, float* [[POINTER_PHI]], <16 x i64> +; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, <16 x float>* [[TMP17]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD]], <16 x float*> [[TMP15]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP18:%.*]] = bitcast float* [[NEXT_GEP]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15:%.*]] = load <16 x float>, <16 x float>* [[TMP18]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP15]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15]], <16 x float*> [[TMP19]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 16 ; AVX512-NEXT: [[PTR_IND:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 256 ; AVX512-NEXT: [[NEXT_GEP_1:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT]] -; AVX512-NEXT: [[TMP22:%.*]] = getelementptr float, float* [[PTR_IND]], <16 x i64> -; AVX512-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_1]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP24:%.*]] = bitcast float* [[TMP23]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_1:%.*]] = load <16 x float>, <16 x float>* [[TMP24]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_1]], <16 x float*> [[TMP22]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP25:%.*]] = bitcast float* [[NEXT_GEP_1]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_1:%.*]] = load <16 x float>, <16 x float>* [[TMP25]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP22]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_1]], <16 x float*> [[TMP26]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP20:%.*]] = getelementptr float, float* [[PTR_IND]], <16 x i64> +; AVX512-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_1]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP22:%.*]] = bitcast float* [[TMP21]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_1:%.*]] = load <16 x float>, <16 x float>* [[TMP22]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_1]], <16 x float*> [[TMP20]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP23:%.*]] = bitcast float* [[NEXT_GEP_1]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_1:%.*]] = load <16 x float>, <16 x float>* [[TMP23]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP20]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_1]], <16 x float*> [[TMP24]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_1:%.*]] = or i64 [[INDEX]], 32 ; AVX512-NEXT: [[PTR_IND_1:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 512 ; AVX512-NEXT: [[NEXT_GEP_2:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT_1]] -; AVX512-NEXT: [[TMP27:%.*]] = getelementptr float, float* [[PTR_IND_1]], <16 x i64> -; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_2]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP29:%.*]] = bitcast float* [[TMP28]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_2:%.*]] = load <16 x float>, <16 x float>* [[TMP29]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_2]], <16 x float*> [[TMP27]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP30:%.*]] = bitcast float* [[NEXT_GEP_2]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_2:%.*]] = load <16 x float>, <16 x float>* [[TMP30]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP27]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_2]], <16 x float*> [[TMP31]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP25:%.*]] = getelementptr float, float* [[PTR_IND_1]], <16 x i64> +; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_2]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP26]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_2:%.*]] = load <16 x float>, <16 x float>* [[TMP27]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_2]], <16 x float*> [[TMP25]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP28:%.*]] = bitcast float* [[NEXT_GEP_2]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_2:%.*]] = load <16 x float>, <16 x float>* [[TMP28]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP25]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_2]], <16 x float*> [[TMP29]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_2:%.*]] = or i64 [[INDEX]], 48 ; AVX512-NEXT: [[PTR_IND_2:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 768 ; AVX512-NEXT: [[NEXT_GEP_3:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT_2]] -; AVX512-NEXT: [[TMP32:%.*]] = getelementptr float, float* [[PTR_IND_2]], <16 x i64> -; AVX512-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_3]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP34:%.*]] = bitcast float* [[TMP33]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_3:%.*]] = load <16 x float>, <16 x float>* [[TMP34]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_3]], <16 x float*> [[TMP32]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP35:%.*]] = bitcast float* [[NEXT_GEP_3]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_3:%.*]] = load <16 x float>, <16 x float>* [[TMP35]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP32]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_3]], <16 x float*> [[TMP36]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP30:%.*]] = getelementptr float, float* [[PTR_IND_2]], <16 x i64> +; AVX512-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_3]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP32:%.*]] = bitcast float* [[TMP31]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_3:%.*]] = load <16 x float>, <16 x float>* [[TMP32]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_3]], <16 x float*> [[TMP30]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP33:%.*]] = bitcast float* [[NEXT_GEP_3]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_3:%.*]] = load <16 x float>, <16 x float>* [[TMP33]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP30]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_3]], <16 x float*> [[TMP34]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_3:%.*]] = or i64 [[INDEX]], 64 ; AVX512-NEXT: [[PTR_IND_3:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 1024 ; AVX512-NEXT: [[NEXT_GEP_4:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT_3]] -; AVX512-NEXT: [[TMP37:%.*]] = getelementptr float, float* [[PTR_IND_3]], <16 x i64> -; AVX512-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_4]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP39:%.*]] = bitcast float* [[TMP38]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_4:%.*]] = load <16 x float>, <16 x float>* [[TMP39]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_4]], <16 x float*> [[TMP37]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP40:%.*]] = bitcast float* [[NEXT_GEP_4]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_4:%.*]] = load <16 x float>, <16 x float>* [[TMP40]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP37]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_4]], <16 x float*> [[TMP41]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP35:%.*]] = getelementptr float, float* [[PTR_IND_3]], <16 x i64> +; AVX512-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_4]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP37:%.*]] = bitcast float* [[TMP36]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_4:%.*]] = load <16 x float>, <16 x float>* [[TMP37]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_4]], <16 x float*> [[TMP35]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP38:%.*]] = bitcast float* [[NEXT_GEP_4]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_4:%.*]] = load <16 x float>, <16 x float>* [[TMP38]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP35]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_4]], <16 x float*> [[TMP39]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_4:%.*]] = or i64 [[INDEX]], 80 ; AVX512-NEXT: [[PTR_IND_4:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 1280 ; AVX512-NEXT: [[NEXT_GEP_5:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT_4]] -; AVX512-NEXT: [[TMP42:%.*]] = getelementptr float, float* [[PTR_IND_4]], <16 x i64> -; AVX512-NEXT: [[TMP43:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_5]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP44:%.*]] = bitcast float* [[TMP43]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_5:%.*]] = load <16 x float>, <16 x float>* [[TMP44]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_5]], <16 x float*> [[TMP42]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP45:%.*]] = bitcast float* [[NEXT_GEP_5]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_5:%.*]] = load <16 x float>, <16 x float>* [[TMP45]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP42]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_5]], <16 x float*> [[TMP46]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP40:%.*]] = getelementptr float, float* [[PTR_IND_4]], <16 x i64> +; AVX512-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_5]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP42:%.*]] = bitcast float* [[TMP41]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_5:%.*]] = load <16 x float>, <16 x float>* [[TMP42]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_5]], <16 x float*> [[TMP40]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP43:%.*]] = bitcast float* [[NEXT_GEP_5]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_5:%.*]] = load <16 x float>, <16 x float>* [[TMP43]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP40]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_5]], <16 x float*> [[TMP44]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_5:%.*]] = or i64 [[INDEX]], 96 ; AVX512-NEXT: [[PTR_IND_5:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 1536 ; AVX512-NEXT: [[NEXT_GEP_6:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT_5]] -; AVX512-NEXT: [[TMP47:%.*]] = getelementptr float, float* [[PTR_IND_5]], <16 x i64> -; AVX512-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_6]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP49:%.*]] = bitcast float* [[TMP48]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_6:%.*]] = load <16 x float>, <16 x float>* [[TMP49]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_6]], <16 x float*> [[TMP47]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP50:%.*]] = bitcast float* [[NEXT_GEP_6]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_6:%.*]] = load <16 x float>, <16 x float>* [[TMP50]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP51:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP47]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_6]], <16 x float*> [[TMP51]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP45:%.*]] = getelementptr float, float* [[PTR_IND_5]], <16 x i64> +; AVX512-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_6]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP47:%.*]] = bitcast float* [[TMP46]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_6:%.*]] = load <16 x float>, <16 x float>* [[TMP47]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_6]], <16 x float*> [[TMP45]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP48:%.*]] = bitcast float* [[NEXT_GEP_6]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_6:%.*]] = load <16 x float>, <16 x float>* [[TMP48]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP45]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_6]], <16 x float*> [[TMP49]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_6:%.*]] = or i64 [[INDEX]], 112 ; AVX512-NEXT: [[PTR_IND_6:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 1792 ; AVX512-NEXT: [[NEXT_GEP_7:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT_6]] -; AVX512-NEXT: [[TMP52:%.*]] = getelementptr float, float* [[PTR_IND_6]], <16 x i64> -; AVX512-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_7]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP54:%.*]] = bitcast float* [[TMP53]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_7:%.*]] = load <16 x float>, <16 x float>* [[TMP54]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_7]], <16 x float*> [[TMP52]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP55:%.*]] = bitcast float* [[NEXT_GEP_7]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_7:%.*]] = load <16 x float>, <16 x float>* [[TMP55]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP52]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_7]], <16 x float*> [[TMP56]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP50:%.*]] = getelementptr float, float* [[PTR_IND_6]], <16 x i64> +; AVX512-NEXT: [[TMP51:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_7]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP52:%.*]] = bitcast float* [[TMP51]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_7:%.*]] = load <16 x float>, <16 x float>* [[TMP52]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_7]], <16 x float*> [[TMP50]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP53:%.*]] = bitcast float* [[NEXT_GEP_7]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_7:%.*]] = load <16 x float>, <16 x float>* [[TMP53]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP54:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP50]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_7]], <16 x float*> [[TMP54]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_7]] = add nuw i64 [[INDEX]], 128 ; AVX512-NEXT: [[PTR_IND_7]] = getelementptr float, float* [[POINTER_PHI]], i64 2048 ; AVX512-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8 @@ -1560,22 +1558,22 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; AVX512-NEXT: [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_NEXT_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[INDEX_UNR]], [[MIDDLE_BLOCK_UNR_LCSSA]] ] ; AVX512-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[VECTOR_BODY_EPIL]] ], [ 0, [[MIDDLE_BLOCK_UNR_LCSSA]] ] ; AVX512-NEXT: [[NEXT_GEP_EPIL:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_EPIL]] -; AVX512-NEXT: [[TMP57:%.*]] = getelementptr float, float* [[POINTER_PHI_EPIL]], <16 x i64> -; AVX512-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_EPIL]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP59:%.*]] = bitcast float* [[TMP58]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_EPIL:%.*]] = load <16 x float>, <16 x float>* [[TMP59]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_EPIL]], <16 x float*> [[TMP57]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP60:%.*]] = bitcast float* [[NEXT_GEP_EPIL]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_EPIL:%.*]] = load <16 x float>, <16 x float>* [[TMP60]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP57]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_EPIL]], <16 x float*> [[TMP61]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP55:%.*]] = getelementptr float, float* [[POINTER_PHI_EPIL]], <16 x i64> +; AVX512-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_EPIL]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP57:%.*]] = bitcast float* [[TMP56]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_EPIL:%.*]] = load <16 x float>, <16 x float>* [[TMP57]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_EPIL]], <16 x float*> [[TMP55]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP58:%.*]] = bitcast float* [[NEXT_GEP_EPIL]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_EPIL:%.*]] = load <16 x float>, <16 x float>* [[TMP58]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP55]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_EPIL]], <16 x float*> [[TMP59]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_EPIL]] = add nuw i64 [[INDEX_EPIL]], 16 ; AVX512-NEXT: [[PTR_IND_EPIL]] = getelementptr float, float* [[POINTER_PHI_EPIL]], i64 256 ; AVX512-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1 ; AVX512-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]] ; AVX512-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY_EPIL]], !llvm.loop [[LOOP11:![0-9]+]] ; AVX512: middle.block: -; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]] ; AVX512: for.body.preheader: ; AVX512-NEXT: [[PTR_ADDR_012_PH:%.*]] = phi float* [ [[PTR]], [[VECTOR_MEMCHECK]] ], [ [[PTR]], [[FOR_BODY_LR_PH]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] @@ -1585,11 +1583,11 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; AVX512-NEXT: [[PTR_ADDR_012:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[PTR_ADDR_012_PH]], [[FOR_BODY_PREHEADER]] ] ; AVX512-NEXT: [[DEST_ADDR_011:%.*]] = phi float* [ [[ADD_PTR6:%.*]], [[FOR_BODY]] ], [ [[DEST_ADDR_011_PH]], [[FOR_BODY_PREHEADER]] ] ; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[PTR_ADDR_012]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP62:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; AVX512-NEXT: store float [[TMP62]], float* [[DEST_ADDR_011]], align 4 -; AVX512-NEXT: [[TMP63:%.*]] = load float, float* [[PTR_ADDR_012]], align 4 +; AVX512-NEXT: [[TMP60:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; AVX512-NEXT: store float [[TMP60]], float* [[DEST_ADDR_011]], align 4 +; AVX512-NEXT: [[TMP61:%.*]] = load float, float* [[PTR_ADDR_012]], align 4 ; AVX512-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DEST_ADDR_011]], i64 1 -; AVX512-NEXT: store float [[TMP63]], float* [[ARRAYIDX5]], align 4 +; AVX512-NEXT: store float [[TMP61]], float* [[ARRAYIDX5]], align 4 ; AVX512-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[PTR_ADDR_012]], i64 1 ; AVX512-NEXT: [[ADD_PTR6]] = getelementptr inbounds float, float* [[DEST_ADDR_011]], i64 16 ; AVX512-NEXT: [[CMP_NOT:%.*]] = icmp eq float* [[INCDEC_PTR]], [[ADD_PTR]] @@ -1606,25 +1604,23 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; FVW2: for.body.lr.ph: ; FVW2-NEXT: [[MUL:%.*]] = sub nsw i32 0, [[D]] ; FVW2-NEXT: [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64 -; FVW2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[IDX_EXT]], 2 -; FVW2-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], -4 -; FVW2-NEXT: [[TMP2:%.*]] = lshr exact i64 [[TMP1]], 2 -; FVW2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 -; FVW2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 12 +; FVW2-NEXT: [[TMP0:%.*]] = add nsw i64 [[IDX_EXT]], 4611686018427387903 +; FVW2-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4611686018427387903 +; FVW2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; FVW2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 3 ; FVW2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; FVW2: vector.memcheck: -; FVW2-NEXT: [[TMP4:%.*]] = shl nsw i64 [[IDX_EXT]], 2 -; FVW2-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], -4 -; FVW2-NEXT: [[TMP6:%.*]] = lshr exact i64 [[TMP5]], 2 -; FVW2-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP5]], 2 -; FVW2-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], 2 -; FVW2-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[DEST:%.*]], i64 [[TMP8]] -; FVW2-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP6]], 1 -; FVW2-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP9]] +; FVW2-NEXT: [[TMP3:%.*]] = add nsw i64 [[IDX_EXT]], 4611686018427387903 +; FVW2-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 4611686018427387903 +; FVW2-NEXT: [[TMP5:%.*]] = shl i64 [[TMP3]], 4 +; FVW2-NEXT: [[TMP6:%.*]] = or i64 [[TMP5]], 2 +; FVW2-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[DEST:%.*]], i64 [[TMP6]] +; FVW2-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP4]], 1 +; FVW2-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP7]] ; FVW2-NEXT: [[SCEVGEP6:%.*]] = getelementptr float, float* [[PTR]], i64 [[IDXPROM]] -; FVW2-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 1 -; FVW2-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], [[IDX_EXT]] -; FVW2-NEXT: [[SCEVGEP8:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP11]] +; FVW2-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 1 +; FVW2-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[IDX_EXT]] +; FVW2-NEXT: [[SCEVGEP8:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP9]] ; FVW2-NEXT: [[BOUND0:%.*]] = icmp ugt float* [[SCEVGEP4]], [[DEST]] ; FVW2-NEXT: [[BOUND1:%.*]] = icmp ugt float* [[SCEVGEP]], [[PTR]] ; FVW2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] @@ -1634,61 +1630,61 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; FVW2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]] ; FVW2-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER]], label [[VECTOR_PH:%.*]] ; FVW2: vector.ph: -; FVW2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 9223372036854775804 +; FVW2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775804 ; FVW2-NEXT: [[IND_END:%.*]] = getelementptr float, float* [[PTR]], i64 [[N_VEC]] -; FVW2-NEXT: [[TMP12:%.*]] = shl i64 [[N_VEC]], 4 -; FVW2-NEXT: [[IND_END14:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP12]] +; FVW2-NEXT: [[TMP10:%.*]] = shl i64 [[N_VEC]], 4 +; FVW2-NEXT: [[IND_END14:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP10]] ; FVW2-NEXT: br label [[VECTOR_BODY:%.*]] ; FVW2: vector.body: ; FVW2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FVW2-NEXT: [[NEXT_GEP:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX]] -; FVW2-NEXT: [[TMP13:%.*]] = shl i64 [[INDEX]], 4 -; FVW2-NEXT: [[NEXT_GEP16:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP13]] +; FVW2-NEXT: [[TMP11:%.*]] = shl i64 [[INDEX]], 4 +; FVW2-NEXT: [[NEXT_GEP16:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP11]] +; FVW2-NEXT: [[TMP12:%.*]] = shl i64 [[INDEX]], 4 +; FVW2-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], 16 +; FVW2-NEXT: [[NEXT_GEP17:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP13]] ; FVW2-NEXT: [[TMP14:%.*]] = shl i64 [[INDEX]], 4 -; FVW2-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], 16 -; FVW2-NEXT: [[NEXT_GEP17:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP15]] +; FVW2-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], 32 +; FVW2-NEXT: [[NEXT_GEP18:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP15]] ; FVW2-NEXT: [[TMP16:%.*]] = shl i64 [[INDEX]], 4 -; FVW2-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], 32 -; FVW2-NEXT: [[NEXT_GEP18:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP17]] -; FVW2-NEXT: [[TMP18:%.*]] = shl i64 [[INDEX]], 4 -; FVW2-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], 48 -; FVW2-NEXT: [[NEXT_GEP19:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP19]] -; FVW2-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP]], i64 [[IDXPROM]] +; FVW2-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], 48 +; FVW2-NEXT: [[NEXT_GEP19:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP17]] +; FVW2-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP]], i64 [[IDXPROM]] +; FVW2-NEXT: [[TMP19:%.*]] = bitcast float* [[TMP18]] to <2 x float>* +; FVW2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP19]], align 4, !alias.scope !7 +; FVW2-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 2 ; FVW2-NEXT: [[TMP21:%.*]] = bitcast float* [[TMP20]] to <2 x float>* -; FVW2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP21]], align 4, !alias.scope !7 -; FVW2-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 2 -; FVW2-NEXT: [[TMP23:%.*]] = bitcast float* [[TMP22]] to <2 x float>* -; FVW2-NEXT: [[WIDE_LOAD20:%.*]] = load <2 x float>, <2 x float>* [[TMP23]], align 4, !alias.scope !7 -; FVW2-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i64 0 -; FVW2-NEXT: store float [[TMP24]], float* [[NEXT_GEP16]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP25:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i64 1 -; FVW2-NEXT: store float [[TMP25]], float* [[NEXT_GEP17]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[WIDE_LOAD20]], i64 0 -; FVW2-NEXT: store float [[TMP26]], float* [[NEXT_GEP18]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[WIDE_LOAD20]], i64 1 -; FVW2-NEXT: store float [[TMP27]], float* [[NEXT_GEP19]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP28:%.*]] = bitcast float* [[NEXT_GEP]] to <2 x float>* -; FVW2-NEXT: [[WIDE_LOAD21:%.*]] = load <2 x float>, <2 x float>* [[TMP28]], align 4, !alias.scope !14 -; FVW2-NEXT: [[TMP29:%.*]] = getelementptr float, float* [[NEXT_GEP]], i64 2 -; FVW2-NEXT: [[TMP30:%.*]] = bitcast float* [[TMP29]] to <2 x float>* -; FVW2-NEXT: [[WIDE_LOAD22:%.*]] = load <2 x float>, <2 x float>* [[TMP30]], align 4, !alias.scope !14 -; FVW2-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP16]], i64 1 -; FVW2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP17]], i64 1 -; FVW2-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP18]], i64 1 -; FVW2-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP19]], i64 1 -; FVW2-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[WIDE_LOAD21]], i64 0 +; FVW2-NEXT: [[WIDE_LOAD20:%.*]] = load <2 x float>, <2 x float>* [[TMP21]], align 4, !alias.scope !7 +; FVW2-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i64 0 +; FVW2-NEXT: store float [[TMP22]], float* [[NEXT_GEP16]], align 4, !alias.scope !10, !noalias !12 +; FVW2-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i64 1 +; FVW2-NEXT: store float [[TMP23]], float* [[NEXT_GEP17]], align 4, !alias.scope !10, !noalias !12 +; FVW2-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[WIDE_LOAD20]], i64 0 +; FVW2-NEXT: store float [[TMP24]], float* [[NEXT_GEP18]], align 4, !alias.scope !10, !noalias !12 +; FVW2-NEXT: [[TMP25:%.*]] = extractelement <2 x float> [[WIDE_LOAD20]], i64 1 +; FVW2-NEXT: store float [[TMP25]], float* [[NEXT_GEP19]], align 4, !alias.scope !10, !noalias !12 +; FVW2-NEXT: [[TMP26:%.*]] = bitcast float* [[NEXT_GEP]] to <2 x float>* +; FVW2-NEXT: [[WIDE_LOAD21:%.*]] = load <2 x float>, <2 x float>* [[TMP26]], align 4, !alias.scope !14 +; FVW2-NEXT: [[TMP27:%.*]] = getelementptr float, float* [[NEXT_GEP]], i64 2 +; FVW2-NEXT: [[TMP28:%.*]] = bitcast float* [[TMP27]] to <2 x float>* +; FVW2-NEXT: [[WIDE_LOAD22:%.*]] = load <2 x float>, <2 x float>* [[TMP28]], align 4, !alias.scope !14 +; FVW2-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP16]], i64 1 +; FVW2-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP17]], i64 1 +; FVW2-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP18]], i64 1 +; FVW2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP19]], i64 1 +; FVW2-NEXT: [[TMP33:%.*]] = extractelement <2 x float> [[WIDE_LOAD21]], i64 0 +; FVW2-NEXT: store float [[TMP33]], float* [[TMP29]], align 4, !alias.scope !10, !noalias !12 +; FVW2-NEXT: [[TMP34:%.*]] = extractelement <2 x float> [[WIDE_LOAD21]], i64 1 +; FVW2-NEXT: store float [[TMP34]], float* [[TMP30]], align 4, !alias.scope !10, !noalias !12 +; FVW2-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[WIDE_LOAD22]], i64 0 ; FVW2-NEXT: store float [[TMP35]], float* [[TMP31]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP36:%.*]] = extractelement <2 x float> [[WIDE_LOAD21]], i64 1 +; FVW2-NEXT: [[TMP36:%.*]] = extractelement <2 x float> [[WIDE_LOAD22]], i64 1 ; FVW2-NEXT: store float [[TMP36]], float* [[TMP32]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP37:%.*]] = extractelement <2 x float> [[WIDE_LOAD22]], i64 0 -; FVW2-NEXT: store float [[TMP37]], float* [[TMP33]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP38:%.*]] = extractelement <2 x float> [[WIDE_LOAD22]], i64 1 -; FVW2-NEXT: store float [[TMP38]], float* [[TMP34]], align 4, !alias.scope !10, !noalias !12 ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; FVW2-NEXT: [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; FVW2-NEXT: br i1 [[TMP39]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; FVW2-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; FVW2-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; FVW2: middle.block: -; FVW2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; FVW2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; FVW2-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]] ; FVW2: for.body.preheader: ; FVW2-NEXT: [[PTR_ADDR_012_PH:%.*]] = phi float* [ [[PTR]], [[VECTOR_MEMCHECK]] ], [ [[PTR]], [[FOR_BODY_LR_PH]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] @@ -1698,11 +1694,11 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; FVW2-NEXT: [[PTR_ADDR_012:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[PTR_ADDR_012_PH]], [[FOR_BODY_PREHEADER]] ] ; FVW2-NEXT: [[DEST_ADDR_011:%.*]] = phi float* [ [[ADD_PTR6:%.*]], [[FOR_BODY]] ], [ [[DEST_ADDR_011_PH]], [[FOR_BODY_PREHEADER]] ] ; FVW2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[PTR_ADDR_012]], i64 [[IDXPROM]] -; FVW2-NEXT: [[TMP40:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; FVW2-NEXT: store float [[TMP40]], float* [[DEST_ADDR_011]], align 4 -; FVW2-NEXT: [[TMP41:%.*]] = load float, float* [[PTR_ADDR_012]], align 4 +; FVW2-NEXT: [[TMP38:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; FVW2-NEXT: store float [[TMP38]], float* [[DEST_ADDR_011]], align 4 +; FVW2-NEXT: [[TMP39:%.*]] = load float, float* [[PTR_ADDR_012]], align 4 ; FVW2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DEST_ADDR_011]], i64 1 -; FVW2-NEXT: store float [[TMP41]], float* [[ARRAYIDX5]], align 4 +; FVW2-NEXT: store float [[TMP39]], float* [[ARRAYIDX5]], align 4 ; FVW2-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[PTR_ADDR_012]], i64 1 ; FVW2-NEXT: [[ADD_PTR6]] = getelementptr inbounds float, float* [[DEST_ADDR_011]], i64 16 ; FVW2-NEXT: [[CMP_NOT:%.*]] = icmp eq float* [[INCDEC_PTR]], [[ADD_PTR]] From fd9cd3408baff99e4982be5357057909b7b2b005 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 30 Dec 2021 11:54:55 -0500 Subject: [PATCH 235/992] Revert "[InstCombine] fold opposite shifts around an add" This reverts commit 2e3e0a5c288041745f88a06e66a831c236a3bb1f. Some unintended diffs snuck into this patch. --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 6 +- .../InstCombine/InstCombineShifts.cpp | 16 +- llvm/test/Transforms/InstCombine/lshr.ll | 22 +- .../LoopVectorize/X86/gather_scatter.ll | 346 +++++++++--------- 4 files changed, 191 insertions(+), 199 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 61be99d2b9f0..324b7dcfb3ac 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -84,13 +84,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { /// Estimate a cost of Broadcast as an extract and sequence of insert /// operations. - InstructionCost getBroadcastShuffleOverhead(VectorType *VTy) { + InstructionCost getBroadcastShuffleOverhead(FixedVectorType *VTy) { InstructionCost Cost = 0; // Broadcast cost is equal to the cost of extracting the zero'th element // plus the cost of inserting it into every element of the result vector. Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0); - for (int i = 0, e = VTy->getElementCount().getKnownMinValue(); i < e; ++i) { + for (int i = 0, e = VTy->getNumElements(); i < e; ++i) { Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i); } return Cost; @@ -875,7 +875,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { switch (improveShuffleKindFromMask(Kind, Mask)) { case TTI::SK_Broadcast: - return getBroadcastShuffleOverhead(Tp); + return getBroadcastShuffleOverhead(cast(Tp)); case TTI::SK_Select: case TTI::SK_Splice: case TTI::SK_Reverse: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 0ade25f76825..06421d553915 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -1057,21 +1057,6 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask)); } - // ((X << C) + Y) >>u C --> (X + (Y >>u C)) & (-1 >>u C) - // TODO: Consolidate with the more general transform that starts from shl - // (the shifts are in the opposite order). - Value *Y; - if (match(Op0, - m_OneUse(m_c_Add(m_OneUse(m_Shl(m_Value(X), m_Specific(Op1))), - m_Value(Y))))) { - Value *NewLshr = Builder.CreateLShr(Y, Op1); - Value *NewAdd = Builder.CreateAdd(NewLshr, X); - unsigned Op1Val = C->getLimitedValue(BitWidth); - APInt Bits = APInt::getLowBitsSet(BitWidth, BitWidth - Op1Val); - Constant *Mask = ConstantInt::get(Ty, Bits); - return BinaryOperator::CreateAnd(NewAdd, Mask); - } - if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && (!Ty->isIntegerTy() || shouldChangeType(Ty, X->getType()))) { assert(ShAmtC < X->getType()->getScalarSizeInBits() && @@ -1109,6 +1094,7 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { } } + Value *Y; if (ShAmtC == BitWidth - 1) { // lshr i32 or(X,-X), 31 --> zext (X != 0) if (match(Op0, m_OneUse(m_c_Or(m_Neg(m_Value(X)), m_Deferred(X))))) diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll index a158f5ecba92..bfad4bcd27ab 100644 --- a/llvm/test/Transforms/InstCombine/lshr.ll +++ b/llvm/test/Transforms/InstCombine/lshr.ll @@ -138,8 +138,9 @@ define i8 @lshr_cttz_zero_is_undef_vec(<2 x i8> %x) { define i8 @lshr_exact(i8 %x) { ; CHECK-LABEL: @lshr_exact( -; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], 1 -; CHECK-NEXT: [[LSHR:%.*]] = and i8 [[TMP1]], 63 +; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[X:%.*]], 2 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[SHL]], 4 +; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i8 [[ADD]], 2 ; CHECK-NEXT: ret i8 [[LSHR]] ; %shl = shl i8 %x, 2 @@ -150,8 +151,9 @@ define i8 @lshr_exact(i8 %x) { define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_exact_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[LSHR:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[SHL]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr exact <2 x i8> [[ADD]], ; CHECK-NEXT: ret <2 x i8> [[LSHR]] ; %shl = shl <2 x i8> %x, @@ -162,9 +164,9 @@ define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) { define i8 @shl_add(i8 %x, i8 %y) { ; CHECK-LABEL: @shl_add( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[Y:%.*]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = and i8 [[TMP2]], 63 +; CHECK-NEXT: [[L:%.*]] = shl i8 [[X:%.*]], 2 +; CHECK-NEXT: [[A:%.*]] = add i8 [[L]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = lshr i8 [[A]], 2 ; CHECK-NEXT: ret i8 [[R]] ; %l = shl i8 %x, 2 @@ -176,9 +178,9 @@ define i8 @shl_add(i8 %x, i8 %y) { define <2 x i8> @shl_add_commute_vec(<2 x i8> %x, <2 x i8> %py) { ; CHECK-LABEL: @shl_add_commute_vec( ; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], [[PY]] -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[Y]], -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP2]], +; CHECK-NEXT: [[L:%.*]] = shl <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[Y]], [[L]] +; CHECK-NEXT: [[R:%.*]] = lshr <2 x i8> [[A]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; %y = mul <2 x i8> %py, %py ; thwart complexity-based canonicalization diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index c1363547ad98..fbdd6b539d21 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -1406,23 +1406,25 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; AVX512: for.body.lr.ph: ; AVX512-NEXT: [[MUL:%.*]] = sub nsw i32 0, [[D]] ; AVX512-NEXT: [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64 -; AVX512-NEXT: [[TMP0:%.*]] = add nsw i64 [[IDX_EXT]], 4611686018427387903 -; AVX512-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4611686018427387903 -; AVX512-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; AVX512-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 15 +; AVX512-NEXT: [[TMP0:%.*]] = shl nsw i64 [[IDX_EXT]], 2 +; AVX512-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], -4 +; AVX512-NEXT: [[TMP2:%.*]] = lshr exact i64 [[TMP1]], 2 +; AVX512-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 +; AVX512-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 60 ; AVX512-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; AVX512: vector.memcheck: -; AVX512-NEXT: [[TMP3:%.*]] = add nsw i64 [[IDX_EXT]], 4611686018427387903 -; AVX512-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 4611686018427387903 -; AVX512-NEXT: [[TMP5:%.*]] = shl i64 [[TMP3]], 4 -; AVX512-NEXT: [[TMP6:%.*]] = or i64 [[TMP5]], 2 -; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[DEST:%.*]], i64 [[TMP6]] -; AVX512-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP4]], 1 -; AVX512-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP7]] +; AVX512-NEXT: [[TMP4:%.*]] = shl nsw i64 [[IDX_EXT]], 2 +; AVX512-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], -4 +; AVX512-NEXT: [[TMP6:%.*]] = lshr exact i64 [[TMP5]], 2 +; AVX512-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP5]], 2 +; AVX512-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], 2 +; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[DEST:%.*]], i64 [[TMP8]] +; AVX512-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP6]], 1 +; AVX512-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP9]] ; AVX512-NEXT: [[SCEVGEP6:%.*]] = getelementptr float, float* [[PTR]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 1 -; AVX512-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[IDX_EXT]] -; AVX512-NEXT: [[SCEVGEP8:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP9]] +; AVX512-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 1 +; AVX512-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], [[IDX_EXT]] +; AVX512-NEXT: [[SCEVGEP8:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP11]] ; AVX512-NEXT: [[BOUND0:%.*]] = icmp ugt float* [[SCEVGEP4]], [[DEST]] ; AVX512-NEXT: [[BOUND1:%.*]] = icmp ugt float* [[SCEVGEP]], [[PTR]] ; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] @@ -1432,117 +1434,117 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]] ; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER]], label [[VECTOR_PH:%.*]] ; AVX512: vector.ph: -; AVX512-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775792 +; AVX512-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 9223372036854775792 ; AVX512-NEXT: [[IND_END:%.*]] = getelementptr float, float* [[PTR]], i64 [[N_VEC]] -; AVX512-NEXT: [[TMP10:%.*]] = shl i64 [[N_VEC]], 4 -; AVX512-NEXT: [[IND_END14:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP10]] -; AVX512-NEXT: [[TMP11:%.*]] = add nsw i64 [[N_VEC]], -16 -; AVX512-NEXT: [[TMP12:%.*]] = lshr exact i64 [[TMP11]], 4 -; AVX512-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP12]], 1 -; AVX512-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP13]], 7 -; AVX512-NEXT: [[TMP14:%.*]] = icmp ult i64 [[TMP11]], 112 -; AVX512-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] +; AVX512-NEXT: [[TMP12:%.*]] = shl i64 [[N_VEC]], 4 +; AVX512-NEXT: [[IND_END14:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP12]] +; AVX512-NEXT: [[TMP13:%.*]] = add nsw i64 [[N_VEC]], -16 +; AVX512-NEXT: [[TMP14:%.*]] = lshr exact i64 [[TMP13]], 4 +; AVX512-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[TMP14]], 1 +; AVX512-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP15]], 7 +; AVX512-NEXT: [[TMP16:%.*]] = icmp ult i64 [[TMP13]], 112 +; AVX512-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] ; AVX512: vector.ph.new: -; AVX512-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP13]], 2305843009213693944 +; AVX512-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP15]], 2305843009213693944 ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX512: vector.body: ; AVX512-NEXT: [[POINTER_PHI:%.*]] = phi float* [ [[DEST]], [[VECTOR_PH_NEW]] ], [ [[PTR_IND_7:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_7:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[NEXT_GEP:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP15:%.*]] = getelementptr float, float* [[POINTER_PHI]], <16 x i64> -; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, <16 x float>* [[TMP17]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD]], <16 x float*> [[TMP15]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP18:%.*]] = bitcast float* [[NEXT_GEP]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15:%.*]] = load <16 x float>, <16 x float>* [[TMP18]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP15]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15]], <16 x float*> [[TMP19]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP17:%.*]] = getelementptr float, float* [[POINTER_PHI]], <16 x i64> +; AVX512-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP19:%.*]] = bitcast float* [[TMP18]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, <16 x float>* [[TMP19]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD]], <16 x float*> [[TMP17]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP20:%.*]] = bitcast float* [[NEXT_GEP]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15:%.*]] = load <16 x float>, <16 x float>* [[TMP20]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP17]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15]], <16 x float*> [[TMP21]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 16 ; AVX512-NEXT: [[PTR_IND:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 256 ; AVX512-NEXT: [[NEXT_GEP_1:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT]] -; AVX512-NEXT: [[TMP20:%.*]] = getelementptr float, float* [[PTR_IND]], <16 x i64> -; AVX512-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_1]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP22:%.*]] = bitcast float* [[TMP21]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_1:%.*]] = load <16 x float>, <16 x float>* [[TMP22]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_1]], <16 x float*> [[TMP20]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP23:%.*]] = bitcast float* [[NEXT_GEP_1]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_1:%.*]] = load <16 x float>, <16 x float>* [[TMP23]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP20]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_1]], <16 x float*> [[TMP24]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP22:%.*]] = getelementptr float, float* [[PTR_IND]], <16 x i64> +; AVX512-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_1]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP24:%.*]] = bitcast float* [[TMP23]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_1:%.*]] = load <16 x float>, <16 x float>* [[TMP24]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_1]], <16 x float*> [[TMP22]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP25:%.*]] = bitcast float* [[NEXT_GEP_1]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_1:%.*]] = load <16 x float>, <16 x float>* [[TMP25]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP22]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_1]], <16 x float*> [[TMP26]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_1:%.*]] = or i64 [[INDEX]], 32 ; AVX512-NEXT: [[PTR_IND_1:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 512 ; AVX512-NEXT: [[NEXT_GEP_2:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT_1]] -; AVX512-NEXT: [[TMP25:%.*]] = getelementptr float, float* [[PTR_IND_1]], <16 x i64> -; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_2]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP26]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_2:%.*]] = load <16 x float>, <16 x float>* [[TMP27]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_2]], <16 x float*> [[TMP25]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP28:%.*]] = bitcast float* [[NEXT_GEP_2]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_2:%.*]] = load <16 x float>, <16 x float>* [[TMP28]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP25]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_2]], <16 x float*> [[TMP29]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP27:%.*]] = getelementptr float, float* [[PTR_IND_1]], <16 x i64> +; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_2]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP29:%.*]] = bitcast float* [[TMP28]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_2:%.*]] = load <16 x float>, <16 x float>* [[TMP29]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_2]], <16 x float*> [[TMP27]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP30:%.*]] = bitcast float* [[NEXT_GEP_2]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_2:%.*]] = load <16 x float>, <16 x float>* [[TMP30]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP27]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_2]], <16 x float*> [[TMP31]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_2:%.*]] = or i64 [[INDEX]], 48 ; AVX512-NEXT: [[PTR_IND_2:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 768 ; AVX512-NEXT: [[NEXT_GEP_3:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT_2]] -; AVX512-NEXT: [[TMP30:%.*]] = getelementptr float, float* [[PTR_IND_2]], <16 x i64> -; AVX512-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_3]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP32:%.*]] = bitcast float* [[TMP31]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_3:%.*]] = load <16 x float>, <16 x float>* [[TMP32]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_3]], <16 x float*> [[TMP30]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP33:%.*]] = bitcast float* [[NEXT_GEP_3]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_3:%.*]] = load <16 x float>, <16 x float>* [[TMP33]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP30]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_3]], <16 x float*> [[TMP34]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP32:%.*]] = getelementptr float, float* [[PTR_IND_2]], <16 x i64> +; AVX512-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_3]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP34:%.*]] = bitcast float* [[TMP33]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_3:%.*]] = load <16 x float>, <16 x float>* [[TMP34]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_3]], <16 x float*> [[TMP32]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP35:%.*]] = bitcast float* [[NEXT_GEP_3]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_3:%.*]] = load <16 x float>, <16 x float>* [[TMP35]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP32]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_3]], <16 x float*> [[TMP36]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_3:%.*]] = or i64 [[INDEX]], 64 ; AVX512-NEXT: [[PTR_IND_3:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 1024 ; AVX512-NEXT: [[NEXT_GEP_4:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT_3]] -; AVX512-NEXT: [[TMP35:%.*]] = getelementptr float, float* [[PTR_IND_3]], <16 x i64> -; AVX512-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_4]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP37:%.*]] = bitcast float* [[TMP36]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_4:%.*]] = load <16 x float>, <16 x float>* [[TMP37]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_4]], <16 x float*> [[TMP35]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP38:%.*]] = bitcast float* [[NEXT_GEP_4]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_4:%.*]] = load <16 x float>, <16 x float>* [[TMP38]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP35]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_4]], <16 x float*> [[TMP39]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP37:%.*]] = getelementptr float, float* [[PTR_IND_3]], <16 x i64> +; AVX512-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_4]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP39:%.*]] = bitcast float* [[TMP38]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_4:%.*]] = load <16 x float>, <16 x float>* [[TMP39]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_4]], <16 x float*> [[TMP37]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP40:%.*]] = bitcast float* [[NEXT_GEP_4]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_4:%.*]] = load <16 x float>, <16 x float>* [[TMP40]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP37]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_4]], <16 x float*> [[TMP41]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_4:%.*]] = or i64 [[INDEX]], 80 ; AVX512-NEXT: [[PTR_IND_4:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 1280 ; AVX512-NEXT: [[NEXT_GEP_5:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT_4]] -; AVX512-NEXT: [[TMP40:%.*]] = getelementptr float, float* [[PTR_IND_4]], <16 x i64> -; AVX512-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_5]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP42:%.*]] = bitcast float* [[TMP41]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_5:%.*]] = load <16 x float>, <16 x float>* [[TMP42]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_5]], <16 x float*> [[TMP40]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP43:%.*]] = bitcast float* [[NEXT_GEP_5]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_5:%.*]] = load <16 x float>, <16 x float>* [[TMP43]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP40]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_5]], <16 x float*> [[TMP44]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP42:%.*]] = getelementptr float, float* [[PTR_IND_4]], <16 x i64> +; AVX512-NEXT: [[TMP43:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_5]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP44:%.*]] = bitcast float* [[TMP43]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_5:%.*]] = load <16 x float>, <16 x float>* [[TMP44]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_5]], <16 x float*> [[TMP42]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP45:%.*]] = bitcast float* [[NEXT_GEP_5]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_5:%.*]] = load <16 x float>, <16 x float>* [[TMP45]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP42]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_5]], <16 x float*> [[TMP46]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_5:%.*]] = or i64 [[INDEX]], 96 ; AVX512-NEXT: [[PTR_IND_5:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 1536 ; AVX512-NEXT: [[NEXT_GEP_6:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT_5]] -; AVX512-NEXT: [[TMP45:%.*]] = getelementptr float, float* [[PTR_IND_5]], <16 x i64> -; AVX512-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_6]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP47:%.*]] = bitcast float* [[TMP46]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_6:%.*]] = load <16 x float>, <16 x float>* [[TMP47]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_6]], <16 x float*> [[TMP45]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP48:%.*]] = bitcast float* [[NEXT_GEP_6]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_6:%.*]] = load <16 x float>, <16 x float>* [[TMP48]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP45]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_6]], <16 x float*> [[TMP49]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP47:%.*]] = getelementptr float, float* [[PTR_IND_5]], <16 x i64> +; AVX512-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_6]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP49:%.*]] = bitcast float* [[TMP48]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_6:%.*]] = load <16 x float>, <16 x float>* [[TMP49]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_6]], <16 x float*> [[TMP47]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP50:%.*]] = bitcast float* [[NEXT_GEP_6]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_6:%.*]] = load <16 x float>, <16 x float>* [[TMP50]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP51:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP47]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_6]], <16 x float*> [[TMP51]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_6:%.*]] = or i64 [[INDEX]], 112 ; AVX512-NEXT: [[PTR_IND_6:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 1792 ; AVX512-NEXT: [[NEXT_GEP_7:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT_6]] -; AVX512-NEXT: [[TMP50:%.*]] = getelementptr float, float* [[PTR_IND_6]], <16 x i64> -; AVX512-NEXT: [[TMP51:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_7]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP52:%.*]] = bitcast float* [[TMP51]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_7:%.*]] = load <16 x float>, <16 x float>* [[TMP52]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_7]], <16 x float*> [[TMP50]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP53:%.*]] = bitcast float* [[NEXT_GEP_7]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_7:%.*]] = load <16 x float>, <16 x float>* [[TMP53]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP54:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP50]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_7]], <16 x float*> [[TMP54]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP52:%.*]] = getelementptr float, float* [[PTR_IND_6]], <16 x i64> +; AVX512-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_7]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP54:%.*]] = bitcast float* [[TMP53]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_7:%.*]] = load <16 x float>, <16 x float>* [[TMP54]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_7]], <16 x float*> [[TMP52]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP55:%.*]] = bitcast float* [[NEXT_GEP_7]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_7:%.*]] = load <16 x float>, <16 x float>* [[TMP55]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP52]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_7]], <16 x float*> [[TMP56]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_7]] = add nuw i64 [[INDEX]], 128 ; AVX512-NEXT: [[PTR_IND_7]] = getelementptr float, float* [[POINTER_PHI]], i64 2048 ; AVX512-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8 @@ -1558,22 +1560,22 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; AVX512-NEXT: [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_NEXT_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[INDEX_UNR]], [[MIDDLE_BLOCK_UNR_LCSSA]] ] ; AVX512-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[VECTOR_BODY_EPIL]] ], [ 0, [[MIDDLE_BLOCK_UNR_LCSSA]] ] ; AVX512-NEXT: [[NEXT_GEP_EPIL:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_EPIL]] -; AVX512-NEXT: [[TMP55:%.*]] = getelementptr float, float* [[POINTER_PHI_EPIL]], <16 x i64> -; AVX512-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_EPIL]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP57:%.*]] = bitcast float* [[TMP56]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_EPIL:%.*]] = load <16 x float>, <16 x float>* [[TMP57]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_EPIL]], <16 x float*> [[TMP55]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP58:%.*]] = bitcast float* [[NEXT_GEP_EPIL]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_EPIL:%.*]] = load <16 x float>, <16 x float>* [[TMP58]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP55]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_EPIL]], <16 x float*> [[TMP59]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP57:%.*]] = getelementptr float, float* [[POINTER_PHI_EPIL]], <16 x i64> +; AVX512-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_EPIL]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP59:%.*]] = bitcast float* [[TMP58]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_EPIL:%.*]] = load <16 x float>, <16 x float>* [[TMP59]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_EPIL]], <16 x float*> [[TMP57]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP60:%.*]] = bitcast float* [[NEXT_GEP_EPIL]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_EPIL:%.*]] = load <16 x float>, <16 x float>* [[TMP60]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP57]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_EPIL]], <16 x float*> [[TMP61]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_EPIL]] = add nuw i64 [[INDEX_EPIL]], 16 ; AVX512-NEXT: [[PTR_IND_EPIL]] = getelementptr float, float* [[POINTER_PHI_EPIL]], i64 256 ; AVX512-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1 ; AVX512-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]] ; AVX512-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY_EPIL]], !llvm.loop [[LOOP11:![0-9]+]] ; AVX512: middle.block: -; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]] ; AVX512: for.body.preheader: ; AVX512-NEXT: [[PTR_ADDR_012_PH:%.*]] = phi float* [ [[PTR]], [[VECTOR_MEMCHECK]] ], [ [[PTR]], [[FOR_BODY_LR_PH]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] @@ -1583,11 +1585,11 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; AVX512-NEXT: [[PTR_ADDR_012:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[PTR_ADDR_012_PH]], [[FOR_BODY_PREHEADER]] ] ; AVX512-NEXT: [[DEST_ADDR_011:%.*]] = phi float* [ [[ADD_PTR6:%.*]], [[FOR_BODY]] ], [ [[DEST_ADDR_011_PH]], [[FOR_BODY_PREHEADER]] ] ; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[PTR_ADDR_012]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP60:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; AVX512-NEXT: store float [[TMP60]], float* [[DEST_ADDR_011]], align 4 -; AVX512-NEXT: [[TMP61:%.*]] = load float, float* [[PTR_ADDR_012]], align 4 +; AVX512-NEXT: [[TMP62:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; AVX512-NEXT: store float [[TMP62]], float* [[DEST_ADDR_011]], align 4 +; AVX512-NEXT: [[TMP63:%.*]] = load float, float* [[PTR_ADDR_012]], align 4 ; AVX512-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DEST_ADDR_011]], i64 1 -; AVX512-NEXT: store float [[TMP61]], float* [[ARRAYIDX5]], align 4 +; AVX512-NEXT: store float [[TMP63]], float* [[ARRAYIDX5]], align 4 ; AVX512-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[PTR_ADDR_012]], i64 1 ; AVX512-NEXT: [[ADD_PTR6]] = getelementptr inbounds float, float* [[DEST_ADDR_011]], i64 16 ; AVX512-NEXT: [[CMP_NOT:%.*]] = icmp eq float* [[INCDEC_PTR]], [[ADD_PTR]] @@ -1604,23 +1606,25 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; FVW2: for.body.lr.ph: ; FVW2-NEXT: [[MUL:%.*]] = sub nsw i32 0, [[D]] ; FVW2-NEXT: [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64 -; FVW2-NEXT: [[TMP0:%.*]] = add nsw i64 [[IDX_EXT]], 4611686018427387903 -; FVW2-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4611686018427387903 -; FVW2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; FVW2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 3 +; FVW2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[IDX_EXT]], 2 +; FVW2-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], -4 +; FVW2-NEXT: [[TMP2:%.*]] = lshr exact i64 [[TMP1]], 2 +; FVW2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 +; FVW2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 12 ; FVW2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; FVW2: vector.memcheck: -; FVW2-NEXT: [[TMP3:%.*]] = add nsw i64 [[IDX_EXT]], 4611686018427387903 -; FVW2-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 4611686018427387903 -; FVW2-NEXT: [[TMP5:%.*]] = shl i64 [[TMP3]], 4 -; FVW2-NEXT: [[TMP6:%.*]] = or i64 [[TMP5]], 2 -; FVW2-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[DEST:%.*]], i64 [[TMP6]] -; FVW2-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP4]], 1 -; FVW2-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP7]] +; FVW2-NEXT: [[TMP4:%.*]] = shl nsw i64 [[IDX_EXT]], 2 +; FVW2-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], -4 +; FVW2-NEXT: [[TMP6:%.*]] = lshr exact i64 [[TMP5]], 2 +; FVW2-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP5]], 2 +; FVW2-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], 2 +; FVW2-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[DEST:%.*]], i64 [[TMP8]] +; FVW2-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP6]], 1 +; FVW2-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP9]] ; FVW2-NEXT: [[SCEVGEP6:%.*]] = getelementptr float, float* [[PTR]], i64 [[IDXPROM]] -; FVW2-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 1 -; FVW2-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[IDX_EXT]] -; FVW2-NEXT: [[SCEVGEP8:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP9]] +; FVW2-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 1 +; FVW2-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], [[IDX_EXT]] +; FVW2-NEXT: [[SCEVGEP8:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP11]] ; FVW2-NEXT: [[BOUND0:%.*]] = icmp ugt float* [[SCEVGEP4]], [[DEST]] ; FVW2-NEXT: [[BOUND1:%.*]] = icmp ugt float* [[SCEVGEP]], [[PTR]] ; FVW2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] @@ -1630,61 +1634,61 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; FVW2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]] ; FVW2-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER]], label [[VECTOR_PH:%.*]] ; FVW2: vector.ph: -; FVW2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775804 +; FVW2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 9223372036854775804 ; FVW2-NEXT: [[IND_END:%.*]] = getelementptr float, float* [[PTR]], i64 [[N_VEC]] -; FVW2-NEXT: [[TMP10:%.*]] = shl i64 [[N_VEC]], 4 -; FVW2-NEXT: [[IND_END14:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP10]] +; FVW2-NEXT: [[TMP12:%.*]] = shl i64 [[N_VEC]], 4 +; FVW2-NEXT: [[IND_END14:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP12]] ; FVW2-NEXT: br label [[VECTOR_BODY:%.*]] ; FVW2: vector.body: ; FVW2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FVW2-NEXT: [[NEXT_GEP:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX]] -; FVW2-NEXT: [[TMP11:%.*]] = shl i64 [[INDEX]], 4 -; FVW2-NEXT: [[NEXT_GEP16:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP11]] -; FVW2-NEXT: [[TMP12:%.*]] = shl i64 [[INDEX]], 4 -; FVW2-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], 16 -; FVW2-NEXT: [[NEXT_GEP17:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP13]] +; FVW2-NEXT: [[TMP13:%.*]] = shl i64 [[INDEX]], 4 +; FVW2-NEXT: [[NEXT_GEP16:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP13]] ; FVW2-NEXT: [[TMP14:%.*]] = shl i64 [[INDEX]], 4 -; FVW2-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], 32 -; FVW2-NEXT: [[NEXT_GEP18:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP15]] +; FVW2-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], 16 +; FVW2-NEXT: [[NEXT_GEP17:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP15]] ; FVW2-NEXT: [[TMP16:%.*]] = shl i64 [[INDEX]], 4 -; FVW2-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], 48 -; FVW2-NEXT: [[NEXT_GEP19:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP17]] -; FVW2-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP]], i64 [[IDXPROM]] -; FVW2-NEXT: [[TMP19:%.*]] = bitcast float* [[TMP18]] to <2 x float>* -; FVW2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP19]], align 4, !alias.scope !7 -; FVW2-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 2 +; FVW2-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], 32 +; FVW2-NEXT: [[NEXT_GEP18:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP17]] +; FVW2-NEXT: [[TMP18:%.*]] = shl i64 [[INDEX]], 4 +; FVW2-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], 48 +; FVW2-NEXT: [[NEXT_GEP19:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP19]] +; FVW2-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP]], i64 [[IDXPROM]] ; FVW2-NEXT: [[TMP21:%.*]] = bitcast float* [[TMP20]] to <2 x float>* -; FVW2-NEXT: [[WIDE_LOAD20:%.*]] = load <2 x float>, <2 x float>* [[TMP21]], align 4, !alias.scope !7 -; FVW2-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i64 0 -; FVW2-NEXT: store float [[TMP22]], float* [[NEXT_GEP16]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i64 1 -; FVW2-NEXT: store float [[TMP23]], float* [[NEXT_GEP17]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[WIDE_LOAD20]], i64 0 -; FVW2-NEXT: store float [[TMP24]], float* [[NEXT_GEP18]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP25:%.*]] = extractelement <2 x float> [[WIDE_LOAD20]], i64 1 -; FVW2-NEXT: store float [[TMP25]], float* [[NEXT_GEP19]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP26:%.*]] = bitcast float* [[NEXT_GEP]] to <2 x float>* -; FVW2-NEXT: [[WIDE_LOAD21:%.*]] = load <2 x float>, <2 x float>* [[TMP26]], align 4, !alias.scope !14 -; FVW2-NEXT: [[TMP27:%.*]] = getelementptr float, float* [[NEXT_GEP]], i64 2 -; FVW2-NEXT: [[TMP28:%.*]] = bitcast float* [[TMP27]] to <2 x float>* -; FVW2-NEXT: [[WIDE_LOAD22:%.*]] = load <2 x float>, <2 x float>* [[TMP28]], align 4, !alias.scope !14 -; FVW2-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP16]], i64 1 -; FVW2-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP17]], i64 1 -; FVW2-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP18]], i64 1 -; FVW2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP19]], i64 1 -; FVW2-NEXT: [[TMP33:%.*]] = extractelement <2 x float> [[WIDE_LOAD21]], i64 0 -; FVW2-NEXT: store float [[TMP33]], float* [[TMP29]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP34:%.*]] = extractelement <2 x float> [[WIDE_LOAD21]], i64 1 -; FVW2-NEXT: store float [[TMP34]], float* [[TMP30]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[WIDE_LOAD22]], i64 0 +; FVW2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP21]], align 4, !alias.scope !7 +; FVW2-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 2 +; FVW2-NEXT: [[TMP23:%.*]] = bitcast float* [[TMP22]] to <2 x float>* +; FVW2-NEXT: [[WIDE_LOAD20:%.*]] = load <2 x float>, <2 x float>* [[TMP23]], align 4, !alias.scope !7 +; FVW2-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i64 0 +; FVW2-NEXT: store float [[TMP24]], float* [[NEXT_GEP16]], align 4, !alias.scope !10, !noalias !12 +; FVW2-NEXT: [[TMP25:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i64 1 +; FVW2-NEXT: store float [[TMP25]], float* [[NEXT_GEP17]], align 4, !alias.scope !10, !noalias !12 +; FVW2-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[WIDE_LOAD20]], i64 0 +; FVW2-NEXT: store float [[TMP26]], float* [[NEXT_GEP18]], align 4, !alias.scope !10, !noalias !12 +; FVW2-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[WIDE_LOAD20]], i64 1 +; FVW2-NEXT: store float [[TMP27]], float* [[NEXT_GEP19]], align 4, !alias.scope !10, !noalias !12 +; FVW2-NEXT: [[TMP28:%.*]] = bitcast float* [[NEXT_GEP]] to <2 x float>* +; FVW2-NEXT: [[WIDE_LOAD21:%.*]] = load <2 x float>, <2 x float>* [[TMP28]], align 4, !alias.scope !14 +; FVW2-NEXT: [[TMP29:%.*]] = getelementptr float, float* [[NEXT_GEP]], i64 2 +; FVW2-NEXT: [[TMP30:%.*]] = bitcast float* [[TMP29]] to <2 x float>* +; FVW2-NEXT: [[WIDE_LOAD22:%.*]] = load <2 x float>, <2 x float>* [[TMP30]], align 4, !alias.scope !14 +; FVW2-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP16]], i64 1 +; FVW2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP17]], i64 1 +; FVW2-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP18]], i64 1 +; FVW2-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP19]], i64 1 +; FVW2-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[WIDE_LOAD21]], i64 0 ; FVW2-NEXT: store float [[TMP35]], float* [[TMP31]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP36:%.*]] = extractelement <2 x float> [[WIDE_LOAD22]], i64 1 +; FVW2-NEXT: [[TMP36:%.*]] = extractelement <2 x float> [[WIDE_LOAD21]], i64 1 ; FVW2-NEXT: store float [[TMP36]], float* [[TMP32]], align 4, !alias.scope !10, !noalias !12 +; FVW2-NEXT: [[TMP37:%.*]] = extractelement <2 x float> [[WIDE_LOAD22]], i64 0 +; FVW2-NEXT: store float [[TMP37]], float* [[TMP33]], align 4, !alias.scope !10, !noalias !12 +; FVW2-NEXT: [[TMP38:%.*]] = extractelement <2 x float> [[WIDE_LOAD22]], i64 1 +; FVW2-NEXT: store float [[TMP38]], float* [[TMP34]], align 4, !alias.scope !10, !noalias !12 ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; FVW2-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; FVW2-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; FVW2-NEXT: [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; FVW2-NEXT: br i1 [[TMP39]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; FVW2: middle.block: -; FVW2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; FVW2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; FVW2-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]] ; FVW2: for.body.preheader: ; FVW2-NEXT: [[PTR_ADDR_012_PH:%.*]] = phi float* [ [[PTR]], [[VECTOR_MEMCHECK]] ], [ [[PTR]], [[FOR_BODY_LR_PH]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] @@ -1694,11 +1698,11 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; FVW2-NEXT: [[PTR_ADDR_012:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[PTR_ADDR_012_PH]], [[FOR_BODY_PREHEADER]] ] ; FVW2-NEXT: [[DEST_ADDR_011:%.*]] = phi float* [ [[ADD_PTR6:%.*]], [[FOR_BODY]] ], [ [[DEST_ADDR_011_PH]], [[FOR_BODY_PREHEADER]] ] ; FVW2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[PTR_ADDR_012]], i64 [[IDXPROM]] -; FVW2-NEXT: [[TMP38:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; FVW2-NEXT: store float [[TMP38]], float* [[DEST_ADDR_011]], align 4 -; FVW2-NEXT: [[TMP39:%.*]] = load float, float* [[PTR_ADDR_012]], align 4 +; FVW2-NEXT: [[TMP40:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; FVW2-NEXT: store float [[TMP40]], float* [[DEST_ADDR_011]], align 4 +; FVW2-NEXT: [[TMP41:%.*]] = load float, float* [[PTR_ADDR_012]], align 4 ; FVW2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DEST_ADDR_011]], i64 1 -; FVW2-NEXT: store float [[TMP39]], float* [[ARRAYIDX5]], align 4 +; FVW2-NEXT: store float [[TMP41]], float* [[ARRAYIDX5]], align 4 ; FVW2-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[PTR_ADDR_012]], i64 1 ; FVW2-NEXT: [[ADD_PTR6]] = getelementptr inbounds float, float* [[DEST_ADDR_011]], i64 16 ; FVW2-NEXT: [[CMP_NOT:%.*]] = icmp eq float* [[INCDEC_PTR]], [[ADD_PTR]] From 0c6979b2d64d97298671ba7ed44d1446c9f302cf Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 30 Dec 2021 12:01:06 -0500 Subject: [PATCH 236/992] [InstCombine] fold opposite shifts around an add ((X << C) + Y) >>u C --> (X + (Y >>u C)) & (-1 >>u C) https://alive2.llvm.org/ce/z/DY9DPg This replaces a shift with an 'and', and in the case where the add has a constant operand, it eliminates both shifts. As noted in the TODO comment, we already have this fold when the shifts are in the opposite order (and that code handles bitwise logic ops too). Fixes #52851 --- .../InstCombine/InstCombineShifts.cpp | 16 +- llvm/test/Transforms/InstCombine/lshr.ll | 22 +- .../LoopVectorize/X86/gather_scatter.ll | 346 +++++++++--------- 3 files changed, 196 insertions(+), 188 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 06421d553915..0ade25f76825 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -1057,6 +1057,21 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask)); } + // ((X << C) + Y) >>u C --> (X + (Y >>u C)) & (-1 >>u C) + // TODO: Consolidate with the more general transform that starts from shl + // (the shifts are in the opposite order). + Value *Y; + if (match(Op0, + m_OneUse(m_c_Add(m_OneUse(m_Shl(m_Value(X), m_Specific(Op1))), + m_Value(Y))))) { + Value *NewLshr = Builder.CreateLShr(Y, Op1); + Value *NewAdd = Builder.CreateAdd(NewLshr, X); + unsigned Op1Val = C->getLimitedValue(BitWidth); + APInt Bits = APInt::getLowBitsSet(BitWidth, BitWidth - Op1Val); + Constant *Mask = ConstantInt::get(Ty, Bits); + return BinaryOperator::CreateAnd(NewAdd, Mask); + } + if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && (!Ty->isIntegerTy() || shouldChangeType(Ty, X->getType()))) { assert(ShAmtC < X->getType()->getScalarSizeInBits() && @@ -1094,7 +1109,6 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { } } - Value *Y; if (ShAmtC == BitWidth - 1) { // lshr i32 or(X,-X), 31 --> zext (X != 0) if (match(Op0, m_OneUse(m_c_Or(m_Neg(m_Value(X)), m_Deferred(X))))) diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll index bfad4bcd27ab..a158f5ecba92 100644 --- a/llvm/test/Transforms/InstCombine/lshr.ll +++ b/llvm/test/Transforms/InstCombine/lshr.ll @@ -138,9 +138,8 @@ define i8 @lshr_cttz_zero_is_undef_vec(<2 x i8> %x) { define i8 @lshr_exact(i8 %x) { ; CHECK-LABEL: @lshr_exact( -; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[X:%.*]], 2 -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[SHL]], 4 -; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i8 [[ADD]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], 1 +; CHECK-NEXT: [[LSHR:%.*]] = and i8 [[TMP1]], 63 ; CHECK-NEXT: ret i8 [[LSHR]] ; %shl = shl i8 %x, 2 @@ -151,9 +150,8 @@ define i8 @lshr_exact(i8 %x) { define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_exact_splat_vec( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[SHL]], -; CHECK-NEXT: [[LSHR:%.*]] = lshr exact <2 x i8> [[ADD]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[LSHR:%.*]] = and <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i8> [[LSHR]] ; %shl = shl <2 x i8> %x, @@ -164,9 +162,9 @@ define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) { define i8 @shl_add(i8 %x, i8 %y) { ; CHECK-LABEL: @shl_add( -; CHECK-NEXT: [[L:%.*]] = shl i8 [[X:%.*]], 2 -; CHECK-NEXT: [[A:%.*]] = add i8 [[L]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = lshr i8 [[A]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[Y:%.*]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = and i8 [[TMP2]], 63 ; CHECK-NEXT: ret i8 [[R]] ; %l = shl i8 %x, 2 @@ -178,9 +176,9 @@ define i8 @shl_add(i8 %x, i8 %y) { define <2 x i8> @shl_add_commute_vec(<2 x i8> %x, <2 x i8> %py) { ; CHECK-LABEL: @shl_add_commute_vec( ; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], [[PY]] -; CHECK-NEXT: [[L:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[Y]], [[L]] -; CHECK-NEXT: [[R:%.*]] = lshr <2 x i8> [[A]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[Y]], +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP2]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; %y = mul <2 x i8> %py, %py ; thwart complexity-based canonicalization diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index fbdd6b539d21..c1363547ad98 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -1406,25 +1406,23 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; AVX512: for.body.lr.ph: ; AVX512-NEXT: [[MUL:%.*]] = sub nsw i32 0, [[D]] ; AVX512-NEXT: [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64 -; AVX512-NEXT: [[TMP0:%.*]] = shl nsw i64 [[IDX_EXT]], 2 -; AVX512-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], -4 -; AVX512-NEXT: [[TMP2:%.*]] = lshr exact i64 [[TMP1]], 2 -; AVX512-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 -; AVX512-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 60 +; AVX512-NEXT: [[TMP0:%.*]] = add nsw i64 [[IDX_EXT]], 4611686018427387903 +; AVX512-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4611686018427387903 +; AVX512-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; AVX512-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 15 ; AVX512-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; AVX512: vector.memcheck: -; AVX512-NEXT: [[TMP4:%.*]] = shl nsw i64 [[IDX_EXT]], 2 -; AVX512-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], -4 -; AVX512-NEXT: [[TMP6:%.*]] = lshr exact i64 [[TMP5]], 2 -; AVX512-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP5]], 2 -; AVX512-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], 2 -; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[DEST:%.*]], i64 [[TMP8]] -; AVX512-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP6]], 1 -; AVX512-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP9]] +; AVX512-NEXT: [[TMP3:%.*]] = add nsw i64 [[IDX_EXT]], 4611686018427387903 +; AVX512-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 4611686018427387903 +; AVX512-NEXT: [[TMP5:%.*]] = shl i64 [[TMP3]], 4 +; AVX512-NEXT: [[TMP6:%.*]] = or i64 [[TMP5]], 2 +; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[DEST:%.*]], i64 [[TMP6]] +; AVX512-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP4]], 1 +; AVX512-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP7]] ; AVX512-NEXT: [[SCEVGEP6:%.*]] = getelementptr float, float* [[PTR]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 1 -; AVX512-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], [[IDX_EXT]] -; AVX512-NEXT: [[SCEVGEP8:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP11]] +; AVX512-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 1 +; AVX512-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[IDX_EXT]] +; AVX512-NEXT: [[SCEVGEP8:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP9]] ; AVX512-NEXT: [[BOUND0:%.*]] = icmp ugt float* [[SCEVGEP4]], [[DEST]] ; AVX512-NEXT: [[BOUND1:%.*]] = icmp ugt float* [[SCEVGEP]], [[PTR]] ; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] @@ -1434,117 +1432,117 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]] ; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER]], label [[VECTOR_PH:%.*]] ; AVX512: vector.ph: -; AVX512-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 9223372036854775792 +; AVX512-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775792 ; AVX512-NEXT: [[IND_END:%.*]] = getelementptr float, float* [[PTR]], i64 [[N_VEC]] -; AVX512-NEXT: [[TMP12:%.*]] = shl i64 [[N_VEC]], 4 -; AVX512-NEXT: [[IND_END14:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP12]] -; AVX512-NEXT: [[TMP13:%.*]] = add nsw i64 [[N_VEC]], -16 -; AVX512-NEXT: [[TMP14:%.*]] = lshr exact i64 [[TMP13]], 4 -; AVX512-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[TMP14]], 1 -; AVX512-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP15]], 7 -; AVX512-NEXT: [[TMP16:%.*]] = icmp ult i64 [[TMP13]], 112 -; AVX512-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] +; AVX512-NEXT: [[TMP10:%.*]] = shl i64 [[N_VEC]], 4 +; AVX512-NEXT: [[IND_END14:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP10]] +; AVX512-NEXT: [[TMP11:%.*]] = add nsw i64 [[N_VEC]], -16 +; AVX512-NEXT: [[TMP12:%.*]] = lshr exact i64 [[TMP11]], 4 +; AVX512-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP12]], 1 +; AVX512-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP13]], 7 +; AVX512-NEXT: [[TMP14:%.*]] = icmp ult i64 [[TMP11]], 112 +; AVX512-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] ; AVX512: vector.ph.new: -; AVX512-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP15]], 2305843009213693944 +; AVX512-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP13]], 2305843009213693944 ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX512: vector.body: ; AVX512-NEXT: [[POINTER_PHI:%.*]] = phi float* [ [[DEST]], [[VECTOR_PH_NEW]] ], [ [[PTR_IND_7:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_7:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[NEXT_GEP:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP17:%.*]] = getelementptr float, float* [[POINTER_PHI]], <16 x i64> -; AVX512-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP19:%.*]] = bitcast float* [[TMP18]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, <16 x float>* [[TMP19]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD]], <16 x float*> [[TMP17]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP20:%.*]] = bitcast float* [[NEXT_GEP]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15:%.*]] = load <16 x float>, <16 x float>* [[TMP20]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP17]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15]], <16 x float*> [[TMP21]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP15:%.*]] = getelementptr float, float* [[POINTER_PHI]], <16 x i64> +; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, <16 x float>* [[TMP17]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD]], <16 x float*> [[TMP15]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP18:%.*]] = bitcast float* [[NEXT_GEP]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15:%.*]] = load <16 x float>, <16 x float>* [[TMP18]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP15]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15]], <16 x float*> [[TMP19]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 16 ; AVX512-NEXT: [[PTR_IND:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 256 ; AVX512-NEXT: [[NEXT_GEP_1:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT]] -; AVX512-NEXT: [[TMP22:%.*]] = getelementptr float, float* [[PTR_IND]], <16 x i64> -; AVX512-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_1]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP24:%.*]] = bitcast float* [[TMP23]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_1:%.*]] = load <16 x float>, <16 x float>* [[TMP24]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_1]], <16 x float*> [[TMP22]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP25:%.*]] = bitcast float* [[NEXT_GEP_1]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_1:%.*]] = load <16 x float>, <16 x float>* [[TMP25]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP22]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_1]], <16 x float*> [[TMP26]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP20:%.*]] = getelementptr float, float* [[PTR_IND]], <16 x i64> +; AVX512-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_1]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP22:%.*]] = bitcast float* [[TMP21]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_1:%.*]] = load <16 x float>, <16 x float>* [[TMP22]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_1]], <16 x float*> [[TMP20]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP23:%.*]] = bitcast float* [[NEXT_GEP_1]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_1:%.*]] = load <16 x float>, <16 x float>* [[TMP23]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP20]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_1]], <16 x float*> [[TMP24]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_1:%.*]] = or i64 [[INDEX]], 32 ; AVX512-NEXT: [[PTR_IND_1:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 512 ; AVX512-NEXT: [[NEXT_GEP_2:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT_1]] -; AVX512-NEXT: [[TMP27:%.*]] = getelementptr float, float* [[PTR_IND_1]], <16 x i64> -; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_2]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP29:%.*]] = bitcast float* [[TMP28]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_2:%.*]] = load <16 x float>, <16 x float>* [[TMP29]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_2]], <16 x float*> [[TMP27]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP30:%.*]] = bitcast float* [[NEXT_GEP_2]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_2:%.*]] = load <16 x float>, <16 x float>* [[TMP30]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP27]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_2]], <16 x float*> [[TMP31]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP25:%.*]] = getelementptr float, float* [[PTR_IND_1]], <16 x i64> +; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_2]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP26]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_2:%.*]] = load <16 x float>, <16 x float>* [[TMP27]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_2]], <16 x float*> [[TMP25]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP28:%.*]] = bitcast float* [[NEXT_GEP_2]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_2:%.*]] = load <16 x float>, <16 x float>* [[TMP28]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP25]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_2]], <16 x float*> [[TMP29]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_2:%.*]] = or i64 [[INDEX]], 48 ; AVX512-NEXT: [[PTR_IND_2:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 768 ; AVX512-NEXT: [[NEXT_GEP_3:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT_2]] -; AVX512-NEXT: [[TMP32:%.*]] = getelementptr float, float* [[PTR_IND_2]], <16 x i64> -; AVX512-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_3]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP34:%.*]] = bitcast float* [[TMP33]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_3:%.*]] = load <16 x float>, <16 x float>* [[TMP34]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_3]], <16 x float*> [[TMP32]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP35:%.*]] = bitcast float* [[NEXT_GEP_3]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_3:%.*]] = load <16 x float>, <16 x float>* [[TMP35]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP32]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_3]], <16 x float*> [[TMP36]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP30:%.*]] = getelementptr float, float* [[PTR_IND_2]], <16 x i64> +; AVX512-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_3]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP32:%.*]] = bitcast float* [[TMP31]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_3:%.*]] = load <16 x float>, <16 x float>* [[TMP32]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_3]], <16 x float*> [[TMP30]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP33:%.*]] = bitcast float* [[NEXT_GEP_3]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_3:%.*]] = load <16 x float>, <16 x float>* [[TMP33]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP30]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_3]], <16 x float*> [[TMP34]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_3:%.*]] = or i64 [[INDEX]], 64 ; AVX512-NEXT: [[PTR_IND_3:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 1024 ; AVX512-NEXT: [[NEXT_GEP_4:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT_3]] -; AVX512-NEXT: [[TMP37:%.*]] = getelementptr float, float* [[PTR_IND_3]], <16 x i64> -; AVX512-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_4]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP39:%.*]] = bitcast float* [[TMP38]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_4:%.*]] = load <16 x float>, <16 x float>* [[TMP39]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_4]], <16 x float*> [[TMP37]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP40:%.*]] = bitcast float* [[NEXT_GEP_4]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_4:%.*]] = load <16 x float>, <16 x float>* [[TMP40]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP37]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_4]], <16 x float*> [[TMP41]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP35:%.*]] = getelementptr float, float* [[PTR_IND_3]], <16 x i64> +; AVX512-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_4]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP37:%.*]] = bitcast float* [[TMP36]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_4:%.*]] = load <16 x float>, <16 x float>* [[TMP37]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_4]], <16 x float*> [[TMP35]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP38:%.*]] = bitcast float* [[NEXT_GEP_4]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_4:%.*]] = load <16 x float>, <16 x float>* [[TMP38]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP35]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_4]], <16 x float*> [[TMP39]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_4:%.*]] = or i64 [[INDEX]], 80 ; AVX512-NEXT: [[PTR_IND_4:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 1280 ; AVX512-NEXT: [[NEXT_GEP_5:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT_4]] -; AVX512-NEXT: [[TMP42:%.*]] = getelementptr float, float* [[PTR_IND_4]], <16 x i64> -; AVX512-NEXT: [[TMP43:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_5]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP44:%.*]] = bitcast float* [[TMP43]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_5:%.*]] = load <16 x float>, <16 x float>* [[TMP44]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_5]], <16 x float*> [[TMP42]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP45:%.*]] = bitcast float* [[NEXT_GEP_5]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_5:%.*]] = load <16 x float>, <16 x float>* [[TMP45]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP42]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_5]], <16 x float*> [[TMP46]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP40:%.*]] = getelementptr float, float* [[PTR_IND_4]], <16 x i64> +; AVX512-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_5]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP42:%.*]] = bitcast float* [[TMP41]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_5:%.*]] = load <16 x float>, <16 x float>* [[TMP42]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_5]], <16 x float*> [[TMP40]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP43:%.*]] = bitcast float* [[NEXT_GEP_5]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_5:%.*]] = load <16 x float>, <16 x float>* [[TMP43]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP40]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_5]], <16 x float*> [[TMP44]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_5:%.*]] = or i64 [[INDEX]], 96 ; AVX512-NEXT: [[PTR_IND_5:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 1536 ; AVX512-NEXT: [[NEXT_GEP_6:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT_5]] -; AVX512-NEXT: [[TMP47:%.*]] = getelementptr float, float* [[PTR_IND_5]], <16 x i64> -; AVX512-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_6]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP49:%.*]] = bitcast float* [[TMP48]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_6:%.*]] = load <16 x float>, <16 x float>* [[TMP49]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_6]], <16 x float*> [[TMP47]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP50:%.*]] = bitcast float* [[NEXT_GEP_6]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_6:%.*]] = load <16 x float>, <16 x float>* [[TMP50]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP51:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP47]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_6]], <16 x float*> [[TMP51]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP45:%.*]] = getelementptr float, float* [[PTR_IND_5]], <16 x i64> +; AVX512-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_6]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP47:%.*]] = bitcast float* [[TMP46]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_6:%.*]] = load <16 x float>, <16 x float>* [[TMP47]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_6]], <16 x float*> [[TMP45]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP48:%.*]] = bitcast float* [[NEXT_GEP_6]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_6:%.*]] = load <16 x float>, <16 x float>* [[TMP48]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP45]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_6]], <16 x float*> [[TMP49]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_6:%.*]] = or i64 [[INDEX]], 112 ; AVX512-NEXT: [[PTR_IND_6:%.*]] = getelementptr float, float* [[POINTER_PHI]], i64 1792 ; AVX512-NEXT: [[NEXT_GEP_7:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_NEXT_6]] -; AVX512-NEXT: [[TMP52:%.*]] = getelementptr float, float* [[PTR_IND_6]], <16 x i64> -; AVX512-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_7]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP54:%.*]] = bitcast float* [[TMP53]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_7:%.*]] = load <16 x float>, <16 x float>* [[TMP54]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_7]], <16 x float*> [[TMP52]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP55:%.*]] = bitcast float* [[NEXT_GEP_7]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_7:%.*]] = load <16 x float>, <16 x float>* [[TMP55]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP52]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_7]], <16 x float*> [[TMP56]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP50:%.*]] = getelementptr float, float* [[PTR_IND_6]], <16 x i64> +; AVX512-NEXT: [[TMP51:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_7]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP52:%.*]] = bitcast float* [[TMP51]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_7:%.*]] = load <16 x float>, <16 x float>* [[TMP52]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_7]], <16 x float*> [[TMP50]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP53:%.*]] = bitcast float* [[NEXT_GEP_7]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_7:%.*]] = load <16 x float>, <16 x float>* [[TMP53]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP54:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP50]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_7]], <16 x float*> [[TMP54]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_7]] = add nuw i64 [[INDEX]], 128 ; AVX512-NEXT: [[PTR_IND_7]] = getelementptr float, float* [[POINTER_PHI]], i64 2048 ; AVX512-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8 @@ -1560,22 +1558,22 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; AVX512-NEXT: [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_NEXT_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[INDEX_UNR]], [[MIDDLE_BLOCK_UNR_LCSSA]] ] ; AVX512-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[VECTOR_BODY_EPIL]] ], [ 0, [[MIDDLE_BLOCK_UNR_LCSSA]] ] ; AVX512-NEXT: [[NEXT_GEP_EPIL:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_EPIL]] -; AVX512-NEXT: [[TMP57:%.*]] = getelementptr float, float* [[POINTER_PHI_EPIL]], <16 x i64> -; AVX512-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_EPIL]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP59:%.*]] = bitcast float* [[TMP58]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD_EPIL:%.*]] = load <16 x float>, <16 x float>* [[TMP59]], align 4, !alias.scope !2 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_EPIL]], <16 x float*> [[TMP57]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[TMP60:%.*]] = bitcast float* [[NEXT_GEP_EPIL]] to <16 x float>* -; AVX512-NEXT: [[WIDE_LOAD15_EPIL:%.*]] = load <16 x float>, <16 x float>* [[TMP60]], align 4, !alias.scope !9 -; AVX512-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP57]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_EPIL]], <16 x float*> [[TMP61]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP55:%.*]] = getelementptr float, float* [[POINTER_PHI_EPIL]], <16 x i64> +; AVX512-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_EPIL]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP57:%.*]] = bitcast float* [[TMP56]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD_EPIL:%.*]] = load <16 x float>, <16 x float>* [[TMP57]], align 4, !alias.scope !2 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD_EPIL]], <16 x float*> [[TMP55]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 +; AVX512-NEXT: [[TMP58:%.*]] = bitcast float* [[NEXT_GEP_EPIL]] to <16 x float>* +; AVX512-NEXT: [[WIDE_LOAD15_EPIL:%.*]] = load <16 x float>, <16 x float>* [[TMP58]], align 4, !alias.scope !9 +; AVX512-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP55]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_EPIL]], <16 x float*> [[TMP59]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 ; AVX512-NEXT: [[INDEX_NEXT_EPIL]] = add nuw i64 [[INDEX_EPIL]], 16 ; AVX512-NEXT: [[PTR_IND_EPIL]] = getelementptr float, float* [[POINTER_PHI_EPIL]], i64 256 ; AVX512-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1 ; AVX512-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]] ; AVX512-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY_EPIL]], !llvm.loop [[LOOP11:![0-9]+]] ; AVX512: middle.block: -; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]] ; AVX512: for.body.preheader: ; AVX512-NEXT: [[PTR_ADDR_012_PH:%.*]] = phi float* [ [[PTR]], [[VECTOR_MEMCHECK]] ], [ [[PTR]], [[FOR_BODY_LR_PH]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] @@ -1585,11 +1583,11 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; AVX512-NEXT: [[PTR_ADDR_012:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[PTR_ADDR_012_PH]], [[FOR_BODY_PREHEADER]] ] ; AVX512-NEXT: [[DEST_ADDR_011:%.*]] = phi float* [ [[ADD_PTR6:%.*]], [[FOR_BODY]] ], [ [[DEST_ADDR_011_PH]], [[FOR_BODY_PREHEADER]] ] ; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[PTR_ADDR_012]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP62:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; AVX512-NEXT: store float [[TMP62]], float* [[DEST_ADDR_011]], align 4 -; AVX512-NEXT: [[TMP63:%.*]] = load float, float* [[PTR_ADDR_012]], align 4 +; AVX512-NEXT: [[TMP60:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; AVX512-NEXT: store float [[TMP60]], float* [[DEST_ADDR_011]], align 4 +; AVX512-NEXT: [[TMP61:%.*]] = load float, float* [[PTR_ADDR_012]], align 4 ; AVX512-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DEST_ADDR_011]], i64 1 -; AVX512-NEXT: store float [[TMP63]], float* [[ARRAYIDX5]], align 4 +; AVX512-NEXT: store float [[TMP61]], float* [[ARRAYIDX5]], align 4 ; AVX512-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[PTR_ADDR_012]], i64 1 ; AVX512-NEXT: [[ADD_PTR6]] = getelementptr inbounds float, float* [[DEST_ADDR_011]], i64 16 ; AVX512-NEXT: [[CMP_NOT:%.*]] = icmp eq float* [[INCDEC_PTR]], [[ADD_PTR]] @@ -1606,25 +1604,23 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; FVW2: for.body.lr.ph: ; FVW2-NEXT: [[MUL:%.*]] = sub nsw i32 0, [[D]] ; FVW2-NEXT: [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64 -; FVW2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[IDX_EXT]], 2 -; FVW2-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], -4 -; FVW2-NEXT: [[TMP2:%.*]] = lshr exact i64 [[TMP1]], 2 -; FVW2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 -; FVW2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 12 +; FVW2-NEXT: [[TMP0:%.*]] = add nsw i64 [[IDX_EXT]], 4611686018427387903 +; FVW2-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4611686018427387903 +; FVW2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; FVW2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 3 ; FVW2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; FVW2: vector.memcheck: -; FVW2-NEXT: [[TMP4:%.*]] = shl nsw i64 [[IDX_EXT]], 2 -; FVW2-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], -4 -; FVW2-NEXT: [[TMP6:%.*]] = lshr exact i64 [[TMP5]], 2 -; FVW2-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP5]], 2 -; FVW2-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], 2 -; FVW2-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[DEST:%.*]], i64 [[TMP8]] -; FVW2-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP6]], 1 -; FVW2-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP9]] +; FVW2-NEXT: [[TMP3:%.*]] = add nsw i64 [[IDX_EXT]], 4611686018427387903 +; FVW2-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 4611686018427387903 +; FVW2-NEXT: [[TMP5:%.*]] = shl i64 [[TMP3]], 4 +; FVW2-NEXT: [[TMP6:%.*]] = or i64 [[TMP5]], 2 +; FVW2-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[DEST:%.*]], i64 [[TMP6]] +; FVW2-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP4]], 1 +; FVW2-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP7]] ; FVW2-NEXT: [[SCEVGEP6:%.*]] = getelementptr float, float* [[PTR]], i64 [[IDXPROM]] -; FVW2-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 1 -; FVW2-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], [[IDX_EXT]] -; FVW2-NEXT: [[SCEVGEP8:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP11]] +; FVW2-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 1 +; FVW2-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[IDX_EXT]] +; FVW2-NEXT: [[SCEVGEP8:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP9]] ; FVW2-NEXT: [[BOUND0:%.*]] = icmp ugt float* [[SCEVGEP4]], [[DEST]] ; FVW2-NEXT: [[BOUND1:%.*]] = icmp ugt float* [[SCEVGEP]], [[PTR]] ; FVW2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] @@ -1634,61 +1630,61 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; FVW2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]] ; FVW2-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER]], label [[VECTOR_PH:%.*]] ; FVW2: vector.ph: -; FVW2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 9223372036854775804 +; FVW2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775804 ; FVW2-NEXT: [[IND_END:%.*]] = getelementptr float, float* [[PTR]], i64 [[N_VEC]] -; FVW2-NEXT: [[TMP12:%.*]] = shl i64 [[N_VEC]], 4 -; FVW2-NEXT: [[IND_END14:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP12]] +; FVW2-NEXT: [[TMP10:%.*]] = shl i64 [[N_VEC]], 4 +; FVW2-NEXT: [[IND_END14:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP10]] ; FVW2-NEXT: br label [[VECTOR_BODY:%.*]] ; FVW2: vector.body: ; FVW2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FVW2-NEXT: [[NEXT_GEP:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX]] -; FVW2-NEXT: [[TMP13:%.*]] = shl i64 [[INDEX]], 4 -; FVW2-NEXT: [[NEXT_GEP16:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP13]] +; FVW2-NEXT: [[TMP11:%.*]] = shl i64 [[INDEX]], 4 +; FVW2-NEXT: [[NEXT_GEP16:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP11]] +; FVW2-NEXT: [[TMP12:%.*]] = shl i64 [[INDEX]], 4 +; FVW2-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], 16 +; FVW2-NEXT: [[NEXT_GEP17:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP13]] ; FVW2-NEXT: [[TMP14:%.*]] = shl i64 [[INDEX]], 4 -; FVW2-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], 16 -; FVW2-NEXT: [[NEXT_GEP17:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP15]] +; FVW2-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], 32 +; FVW2-NEXT: [[NEXT_GEP18:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP15]] ; FVW2-NEXT: [[TMP16:%.*]] = shl i64 [[INDEX]], 4 -; FVW2-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], 32 -; FVW2-NEXT: [[NEXT_GEP18:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP17]] -; FVW2-NEXT: [[TMP18:%.*]] = shl i64 [[INDEX]], 4 -; FVW2-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], 48 -; FVW2-NEXT: [[NEXT_GEP19:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP19]] -; FVW2-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP]], i64 [[IDXPROM]] +; FVW2-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], 48 +; FVW2-NEXT: [[NEXT_GEP19:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP17]] +; FVW2-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP]], i64 [[IDXPROM]] +; FVW2-NEXT: [[TMP19:%.*]] = bitcast float* [[TMP18]] to <2 x float>* +; FVW2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP19]], align 4, !alias.scope !7 +; FVW2-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 2 ; FVW2-NEXT: [[TMP21:%.*]] = bitcast float* [[TMP20]] to <2 x float>* -; FVW2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP21]], align 4, !alias.scope !7 -; FVW2-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 2 -; FVW2-NEXT: [[TMP23:%.*]] = bitcast float* [[TMP22]] to <2 x float>* -; FVW2-NEXT: [[WIDE_LOAD20:%.*]] = load <2 x float>, <2 x float>* [[TMP23]], align 4, !alias.scope !7 -; FVW2-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i64 0 -; FVW2-NEXT: store float [[TMP24]], float* [[NEXT_GEP16]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP25:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i64 1 -; FVW2-NEXT: store float [[TMP25]], float* [[NEXT_GEP17]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[WIDE_LOAD20]], i64 0 -; FVW2-NEXT: store float [[TMP26]], float* [[NEXT_GEP18]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[WIDE_LOAD20]], i64 1 -; FVW2-NEXT: store float [[TMP27]], float* [[NEXT_GEP19]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP28:%.*]] = bitcast float* [[NEXT_GEP]] to <2 x float>* -; FVW2-NEXT: [[WIDE_LOAD21:%.*]] = load <2 x float>, <2 x float>* [[TMP28]], align 4, !alias.scope !14 -; FVW2-NEXT: [[TMP29:%.*]] = getelementptr float, float* [[NEXT_GEP]], i64 2 -; FVW2-NEXT: [[TMP30:%.*]] = bitcast float* [[TMP29]] to <2 x float>* -; FVW2-NEXT: [[WIDE_LOAD22:%.*]] = load <2 x float>, <2 x float>* [[TMP30]], align 4, !alias.scope !14 -; FVW2-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP16]], i64 1 -; FVW2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP17]], i64 1 -; FVW2-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP18]], i64 1 -; FVW2-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP19]], i64 1 -; FVW2-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[WIDE_LOAD21]], i64 0 +; FVW2-NEXT: [[WIDE_LOAD20:%.*]] = load <2 x float>, <2 x float>* [[TMP21]], align 4, !alias.scope !7 +; FVW2-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i64 0 +; FVW2-NEXT: store float [[TMP22]], float* [[NEXT_GEP16]], align 4, !alias.scope !10, !noalias !12 +; FVW2-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i64 1 +; FVW2-NEXT: store float [[TMP23]], float* [[NEXT_GEP17]], align 4, !alias.scope !10, !noalias !12 +; FVW2-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[WIDE_LOAD20]], i64 0 +; FVW2-NEXT: store float [[TMP24]], float* [[NEXT_GEP18]], align 4, !alias.scope !10, !noalias !12 +; FVW2-NEXT: [[TMP25:%.*]] = extractelement <2 x float> [[WIDE_LOAD20]], i64 1 +; FVW2-NEXT: store float [[TMP25]], float* [[NEXT_GEP19]], align 4, !alias.scope !10, !noalias !12 +; FVW2-NEXT: [[TMP26:%.*]] = bitcast float* [[NEXT_GEP]] to <2 x float>* +; FVW2-NEXT: [[WIDE_LOAD21:%.*]] = load <2 x float>, <2 x float>* [[TMP26]], align 4, !alias.scope !14 +; FVW2-NEXT: [[TMP27:%.*]] = getelementptr float, float* [[NEXT_GEP]], i64 2 +; FVW2-NEXT: [[TMP28:%.*]] = bitcast float* [[TMP27]] to <2 x float>* +; FVW2-NEXT: [[WIDE_LOAD22:%.*]] = load <2 x float>, <2 x float>* [[TMP28]], align 4, !alias.scope !14 +; FVW2-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP16]], i64 1 +; FVW2-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP17]], i64 1 +; FVW2-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP18]], i64 1 +; FVW2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP19]], i64 1 +; FVW2-NEXT: [[TMP33:%.*]] = extractelement <2 x float> [[WIDE_LOAD21]], i64 0 +; FVW2-NEXT: store float [[TMP33]], float* [[TMP29]], align 4, !alias.scope !10, !noalias !12 +; FVW2-NEXT: [[TMP34:%.*]] = extractelement <2 x float> [[WIDE_LOAD21]], i64 1 +; FVW2-NEXT: store float [[TMP34]], float* [[TMP30]], align 4, !alias.scope !10, !noalias !12 +; FVW2-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[WIDE_LOAD22]], i64 0 ; FVW2-NEXT: store float [[TMP35]], float* [[TMP31]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP36:%.*]] = extractelement <2 x float> [[WIDE_LOAD21]], i64 1 +; FVW2-NEXT: [[TMP36:%.*]] = extractelement <2 x float> [[WIDE_LOAD22]], i64 1 ; FVW2-NEXT: store float [[TMP36]], float* [[TMP32]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP37:%.*]] = extractelement <2 x float> [[WIDE_LOAD22]], i64 0 -; FVW2-NEXT: store float [[TMP37]], float* [[TMP33]], align 4, !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[TMP38:%.*]] = extractelement <2 x float> [[WIDE_LOAD22]], i64 1 -; FVW2-NEXT: store float [[TMP38]], float* [[TMP34]], align 4, !alias.scope !10, !noalias !12 ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; FVW2-NEXT: [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; FVW2-NEXT: br i1 [[TMP39]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; FVW2-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; FVW2-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; FVW2: middle.block: -; FVW2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; FVW2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; FVW2-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]] ; FVW2: for.body.preheader: ; FVW2-NEXT: [[PTR_ADDR_012_PH:%.*]] = phi float* [ [[PTR]], [[VECTOR_MEMCHECK]] ], [ [[PTR]], [[FOR_BODY_LR_PH]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] @@ -1698,11 +1694,11 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl ; FVW2-NEXT: [[PTR_ADDR_012:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[PTR_ADDR_012_PH]], [[FOR_BODY_PREHEADER]] ] ; FVW2-NEXT: [[DEST_ADDR_011:%.*]] = phi float* [ [[ADD_PTR6:%.*]], [[FOR_BODY]] ], [ [[DEST_ADDR_011_PH]], [[FOR_BODY_PREHEADER]] ] ; FVW2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[PTR_ADDR_012]], i64 [[IDXPROM]] -; FVW2-NEXT: [[TMP40:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; FVW2-NEXT: store float [[TMP40]], float* [[DEST_ADDR_011]], align 4 -; FVW2-NEXT: [[TMP41:%.*]] = load float, float* [[PTR_ADDR_012]], align 4 +; FVW2-NEXT: [[TMP38:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; FVW2-NEXT: store float [[TMP38]], float* [[DEST_ADDR_011]], align 4 +; FVW2-NEXT: [[TMP39:%.*]] = load float, float* [[PTR_ADDR_012]], align 4 ; FVW2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DEST_ADDR_011]], i64 1 -; FVW2-NEXT: store float [[TMP41]], float* [[ARRAYIDX5]], align 4 +; FVW2-NEXT: store float [[TMP39]], float* [[ARRAYIDX5]], align 4 ; FVW2-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[PTR_ADDR_012]], i64 1 ; FVW2-NEXT: [[ADD_PTR6]] = getelementptr inbounds float, float* [[DEST_ADDR_011]], i64 16 ; FVW2-NEXT: [[CMP_NOT:%.*]] = icmp eq float* [[INCDEC_PTR]], [[ADD_PTR]] From e60d6dfd5acdc821d391ad5af2c706397bdfd36a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Thu, 30 Dec 2021 17:43:23 +0000 Subject: [PATCH 237/992] [lld] Add support for other demanglers other than Itanium LLVM core library supports demangling other mangled symbols other than itanium, such as D and Rust. LLD should use those demanglers in order to output pretty demangled symbols on error messages. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D116279 --- lld/COFF/Symbols.cpp | 4 ++-- lld/Common/Strings.cpp | 13 ------------- lld/ELF/SymbolTable.cpp | 9 +++++---- lld/ELF/Symbols.cpp | 11 ++--------- lld/MachO/Symbols.cpp | 12 ++++-------- lld/include/lld/Common/Strings.h | 11 ++++++++--- lld/test/ELF/undef.s | 15 +++++++++++++++ lld/wasm/Symbols.cpp | 6 +++--- 8 files changed, 39 insertions(+), 42 deletions(-) diff --git a/lld/COFF/Symbols.cpp b/lld/COFF/Symbols.cpp index 8a6a9b27d45f..a03cb03f8d17 100644 --- a/lld/COFF/Symbols.cpp +++ b/lld/COFF/Symbols.cpp @@ -36,9 +36,9 @@ static std::string maybeDemangleSymbol(StringRef symName) { StringRef demangleInput = prefixless; if (config->machine == I386) demangleInput.consume_front("_"); - std::string demangled = demangle(std::string(demangleInput)); + std::string demangled = demangle(demangleInput, true); if (demangled != demangleInput) - return prefix + demangle(std::string(demangleInput)); + return prefix + demangle(demangleInput, true); return (prefix + prefixless).str(); } return std::string(symName); diff --git a/lld/Common/Strings.cpp b/lld/Common/Strings.cpp index 7bf336490dae..6e5478e335ca 100644 --- a/lld/Common/Strings.cpp +++ b/lld/Common/Strings.cpp @@ -9,7 +9,6 @@ #include "lld/Common/Strings.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/LLVM.h" -#include "llvm/Demangle/Demangle.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/GlobPattern.h" #include @@ -19,18 +18,6 @@ using namespace llvm; using namespace lld; -// Returns the demangled C++ symbol name for name. -std::string lld::demangleItanium(StringRef name) { - // demangleItanium() can be called for all symbols. Only demangle C++ symbols, - // to avoid getting unexpected result for a C symbol that happens to match a - // mangled type name such as "Pi" (which would demangle to "int*"). - if (!name.startswith("_Z") && !name.startswith("__Z") && - !name.startswith("___Z") && !name.startswith("____Z")) - return std::string(name); - - return demangle(std::string(name)); -} - SingleStringMatcher::SingleStringMatcher(StringRef Pattern) { if (Pattern.size() > 2 && Pattern.startswith("\"") && Pattern.endswith("\"")) { diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index c93a166daa6e..ec425cd7e1d1 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -143,12 +143,13 @@ StringMap> &SymbolTable::getDemangledSyms() { StringRef name = sym->getName(); size_t pos = name.find('@'); if (pos == std::string::npos) - demangled = demangleItanium(name); + demangled = demangle(name, config->demangle); else if (pos + 1 == name.size() || name[pos + 1] == '@') - demangled = demangleItanium(name.substr(0, pos)); + demangled = demangle(name.substr(0, pos), config->demangle); else - demangled = - (demangleItanium(name.substr(0, pos)) + name.substr(pos)).str(); + demangled = (demangle(name.substr(0, pos), config->demangle) + + name.substr(pos)) + .str(); (*demangledSyms)[demangled].push_back(sym); } } diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index acb0dd27d0ab..ed83b75953dd 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -26,16 +26,9 @@ using namespace llvm::ELF; using namespace lld; using namespace lld::elf; -// Returns a symbol for an error message. -static std::string demangle(StringRef symName) { - if (elf::config->demangle) - return demangleItanium(symName); - return std::string(symName); -} - std::string lld::toString(const elf::Symbol &sym) { StringRef name = sym.getName(); - std::string ret = demangle(name); + std::string ret = demangle(name, config->demangle); const char *suffix = sym.getVersionSuffix(); if (*suffix == '@') @@ -44,7 +37,7 @@ std::string lld::toString(const elf::Symbol &sym) { } std::string lld::toELFString(const Archive::Symbol &b) { - return demangle(b.getName()); + return demangle(b.getName(), config->demangle); } Defined *ElfSym::bss; diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp index bb6d073dcf30..b0ebd224affb 100644 --- a/lld/MachO/Symbols.cpp +++ b/lld/MachO/Symbols.cpp @@ -9,6 +9,7 @@ #include "Symbols.h" #include "InputFiles.h" #include "SyntheticSections.h" +#include "lld/Common/Strings.h" using namespace llvm; using namespace lld; @@ -27,17 +28,12 @@ static_assert(sizeof(void *) != 8 || sizeof(Defined) == 80, static_assert(sizeof(SymbolUnion) == sizeof(Defined), "Defined should be the largest Symbol kind"); -// Returns a symbol for an error message. -static std::string demangle(StringRef symName) { - if (config->demangle) - return demangleItanium(symName); - return std::string(symName); +std::string lld::toString(const Symbol &sym) { + return demangle(sym.getName(), config->demangle); } -std::string lld::toString(const Symbol &sym) { return demangle(sym.getName()); } - std::string lld::toMachOString(const object::Archive::Symbol &b) { - return demangle(b.getName()); + return demangle(b.getName(), config->demangle); } uint64_t Symbol::getStubVA() const { return in.stubs->getVA(stubsIndex); } diff --git a/lld/include/lld/Common/Strings.h b/lld/include/lld/Common/Strings.h index 71126f615017..ece801892767 100644 --- a/lld/include/lld/Common/Strings.h +++ b/lld/include/lld/Common/Strings.h @@ -12,14 +12,19 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Demangle/Demangle.h" #include "llvm/Support/GlobPattern.h" #include #include namespace lld { -// Returns a demangled C++ symbol name. If Name is not a mangled -// name, it returns name. -std::string demangleItanium(llvm::StringRef name); +// Returns a demangled symbol name. If Name is not a mangled name, it returns +// name. +inline std::string demangle(llvm::StringRef symName, bool shouldDemangle) { + if (shouldDemangle) + return llvm::demangle(symName.str().c_str()); + return std::string(symName); +} std::vector parseHex(llvm::StringRef s); bool isValidCIdentifier(llvm::StringRef s); diff --git a/lld/test/ELF/undef.s b/lld/test/ELF/undef.s index 931a482e1810..6398b73d51e6 100644 --- a/lld/test/ELF/undef.s +++ b/lld/test/ELF/undef.s @@ -31,6 +31,18 @@ # CHECK-NEXT: >>> referenced by undef.s # CHECK-NEXT: >>> {{.*}}:(.text+0x1A) +# CHECK: error: undefined symbol: Pi +# CHECK-NEXT: >>> referenced by undef.s +# CHECK-NEXT: >>> {{.*}}:(.text+0x1F) + +# CHECK: error: undefined symbol: D main +# CHECK-NEXT: >>> referenced by undef.s +# CHECK-NEXT: >>> {{.*}}:(.text+0x24) + +# CHECK: error: undefined symbol: a::main +# CHECK-NEXT: >>> referenced by undef.s +# CHECK-NEXT: >>> {{.*}}:(.text+0x29) + # CHECK: error: undefined symbol: zed2 # CHECK-NEXT: >>> referenced by {{.*}}.o:(.text+0x0) in archive {{.*}}2.a @@ -84,3 +96,6 @@ _start: call _Z3fooi call _ZTV3Foo call __Z3fooi + call Pi + call _Dmain + call _RNvC1a4main diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp index 684f4832b267..6c134fd380bc 100644 --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -22,6 +22,7 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::wasm; +using namespace lld::wasm; namespace lld { std::string toString(const wasm::Symbol &sym) { @@ -33,9 +34,8 @@ std::string maybeDemangleSymbol(StringRef name) { // `main` in the case where we need to pass it arguments. if (name == "__main_argc_argv") return "main"; - if (wasm::config->demangle) - return demangleItanium(name); - return std::string(name); + + return demangle(name, config->demangle); } std::string toString(wasm::Symbol::Kind kind) { From 4a8cef157b758e3f3d85f9fa63ebc7b5ee02f488 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Thu, 30 Dec 2021 09:57:51 -0800 Subject: [PATCH 238/992] [mlir] Change SCF/Complex to prefixed (NFC) See https://llvm.discourse.group/t/psa-ods-generated-accessors-will-change-to-have-a-get-prefix-update-you-apis/4476 --- mlir/include/mlir/Dialect/Complex/IR/ComplexBase.td | 2 +- mlir/include/mlir/Dialect/SCF/SCFOps.td | 2 +- mlir/lib/Dialect/SCF/SCF.cpp | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/include/mlir/Dialect/Complex/IR/ComplexBase.td b/mlir/include/mlir/Dialect/Complex/IR/ComplexBase.td index 9981f69be4df..4382183254ac 100644 --- a/mlir/include/mlir/Dialect/Complex/IR/ComplexBase.td +++ b/mlir/include/mlir/Dialect/Complex/IR/ComplexBase.td @@ -21,7 +21,7 @@ def Complex_Dialect : Dialect { let dependentDialects = ["arith::ArithmeticDialect", "StandardOpsDialect"]; let hasConstantMaterializer = 1; - let emitAccessorPrefix = kEmitAccessorPrefix_Both; + let emitAccessorPrefix = kEmitAccessorPrefix_Prefixed; } #endif // COMPLEX_BASE diff --git a/mlir/include/mlir/Dialect/SCF/SCFOps.td b/mlir/include/mlir/Dialect/SCF/SCFOps.td index 01386ce4a5c2..e3ae535f59d3 100644 --- a/mlir/include/mlir/Dialect/SCF/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/SCFOps.td @@ -21,7 +21,7 @@ def SCF_Dialect : Dialect { let name = "scf"; let cppNamespace = "::mlir::scf"; let dependentDialects = ["arith::ArithmeticDialect"]; - let emitAccessorPrefix = kEmitAccessorPrefix_Both; + let emitAccessorPrefix = kEmitAccessorPrefix_Prefixed; } // Base class for SCF dialect ops. diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp index 534c0f03a0b7..1ca7e49c5a3a 100644 --- a/mlir/lib/Dialect/SCF/SCF.cpp +++ b/mlir/lib/Dialect/SCF/SCF.cpp @@ -1631,8 +1631,8 @@ struct CombineNestedIfs : public OpRewritePattern { return failure(); Location loc = op.getLoc(); - Value newCondition = rewriter.create(loc, op.condition(), - nestedIf.condition()); + Value newCondition = rewriter.create( + loc, op.getCondition(), nestedIf.getCondition()); auto newIf = rewriter.create(loc, newCondition); Block *newIfBlock = newIf.thenBlock(); rewriter.eraseOp(newIfBlock->getTerminator()); From cd997689f26d1f6558ab7a04499bd5594d09861e Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 30 Dec 2021 09:55:44 -0800 Subject: [PATCH 239/992] [Hexagon] Fix isTypeForHVX to recognize floating point types Co-authored-by: Sumanth Gundapaneni --- llvm/lib/Target/Hexagon/HexagonSubtarget.cpp | 4 +++- llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index 08bb4580b585..21bb1633fa79 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -228,7 +228,9 @@ bool HexagonSubtarget::isTypeForHVX(Type *VecTy, bool IncludeBool) const { if (!VecTy->isVectorTy() || isa(VecTy)) return false; // Avoid types like <2 x i32*>. - if (!cast(VecTy)->getElementType()->isIntegerTy()) + Type *ScalTy = VecTy->getScalarType(); + if (!ScalTy->isIntegerTy() && + !(ScalTy->isFloatingPointTy() && useHVXFloatingPoint())) return false; // The given type may be something like <17 x i32>, which is not MVT, // but can be represented as (non-simple) EVT. diff --git a/llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll b/llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll index 884eb6e7ac75..6ebe077ae120 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=hexagon < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-vector-combine=false < %s | FileCheck %s define void @f0(<128 x i8> %a0, <128 x i8>* %a1) #0 { ; CHECK-LABEL: f0: From 8792cd75d0ccc0a9043b5456f147a198a9d90cd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Thu, 30 Dec 2021 18:04:21 +0000 Subject: [PATCH 240/992] Revert "[lld] Add support for other demanglers other than Itanium" This reverts commit e60d6dfd5acdc821d391ad5af2c706397bdfd36a. clang-ppc64le-rhel buildbot failed (https://lab.llvm.org/buildbot#builders/57/builds/13424): tools/lld/MachO/CMakeFiles/lldMachO.dir/Symbols.cpp.o: In function `lld::demangle(llvm::StringRef, bool)': Symbols.cpp:(.text._ZN3lld8demangleEN4llvm9StringRefEb[_ZN3lld8demangleEN4llvm9StringRefEb]+0x90): undefined reference to `llvm::demangle(std::string const&)' --- lld/COFF/Symbols.cpp | 4 ++-- lld/Common/Strings.cpp | 13 +++++++++++++ lld/ELF/SymbolTable.cpp | 9 ++++----- lld/ELF/Symbols.cpp | 11 +++++++++-- lld/MachO/Symbols.cpp | 12 ++++++++---- lld/include/lld/Common/Strings.h | 11 +++-------- lld/test/ELF/undef.s | 15 --------------- lld/wasm/Symbols.cpp | 6 +++--- 8 files changed, 42 insertions(+), 39 deletions(-) diff --git a/lld/COFF/Symbols.cpp b/lld/COFF/Symbols.cpp index a03cb03f8d17..8a6a9b27d45f 100644 --- a/lld/COFF/Symbols.cpp +++ b/lld/COFF/Symbols.cpp @@ -36,9 +36,9 @@ static std::string maybeDemangleSymbol(StringRef symName) { StringRef demangleInput = prefixless; if (config->machine == I386) demangleInput.consume_front("_"); - std::string demangled = demangle(demangleInput, true); + std::string demangled = demangle(std::string(demangleInput)); if (demangled != demangleInput) - return prefix + demangle(demangleInput, true); + return prefix + demangle(std::string(demangleInput)); return (prefix + prefixless).str(); } return std::string(symName); diff --git a/lld/Common/Strings.cpp b/lld/Common/Strings.cpp index 6e5478e335ca..7bf336490dae 100644 --- a/lld/Common/Strings.cpp +++ b/lld/Common/Strings.cpp @@ -9,6 +9,7 @@ #include "lld/Common/Strings.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/LLVM.h" +#include "llvm/Demangle/Demangle.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/GlobPattern.h" #include @@ -18,6 +19,18 @@ using namespace llvm; using namespace lld; +// Returns the demangled C++ symbol name for name. +std::string lld::demangleItanium(StringRef name) { + // demangleItanium() can be called for all symbols. Only demangle C++ symbols, + // to avoid getting unexpected result for a C symbol that happens to match a + // mangled type name such as "Pi" (which would demangle to "int*"). + if (!name.startswith("_Z") && !name.startswith("__Z") && + !name.startswith("___Z") && !name.startswith("____Z")) + return std::string(name); + + return demangle(std::string(name)); +} + SingleStringMatcher::SingleStringMatcher(StringRef Pattern) { if (Pattern.size() > 2 && Pattern.startswith("\"") && Pattern.endswith("\"")) { diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index ec425cd7e1d1..c93a166daa6e 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -143,13 +143,12 @@ StringMap> &SymbolTable::getDemangledSyms() { StringRef name = sym->getName(); size_t pos = name.find('@'); if (pos == std::string::npos) - demangled = demangle(name, config->demangle); + demangled = demangleItanium(name); else if (pos + 1 == name.size() || name[pos + 1] == '@') - demangled = demangle(name.substr(0, pos), config->demangle); + demangled = demangleItanium(name.substr(0, pos)); else - demangled = (demangle(name.substr(0, pos), config->demangle) + - name.substr(pos)) - .str(); + demangled = + (demangleItanium(name.substr(0, pos)) + name.substr(pos)).str(); (*demangledSyms)[demangled].push_back(sym); } } diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index ed83b75953dd..acb0dd27d0ab 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -26,9 +26,16 @@ using namespace llvm::ELF; using namespace lld; using namespace lld::elf; +// Returns a symbol for an error message. +static std::string demangle(StringRef symName) { + if (elf::config->demangle) + return demangleItanium(symName); + return std::string(symName); +} + std::string lld::toString(const elf::Symbol &sym) { StringRef name = sym.getName(); - std::string ret = demangle(name, config->demangle); + std::string ret = demangle(name); const char *suffix = sym.getVersionSuffix(); if (*suffix == '@') @@ -37,7 +44,7 @@ std::string lld::toString(const elf::Symbol &sym) { } std::string lld::toELFString(const Archive::Symbol &b) { - return demangle(b.getName(), config->demangle); + return demangle(b.getName()); } Defined *ElfSym::bss; diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp index b0ebd224affb..bb6d073dcf30 100644 --- a/lld/MachO/Symbols.cpp +++ b/lld/MachO/Symbols.cpp @@ -9,7 +9,6 @@ #include "Symbols.h" #include "InputFiles.h" #include "SyntheticSections.h" -#include "lld/Common/Strings.h" using namespace llvm; using namespace lld; @@ -28,12 +27,17 @@ static_assert(sizeof(void *) != 8 || sizeof(Defined) == 80, static_assert(sizeof(SymbolUnion) == sizeof(Defined), "Defined should be the largest Symbol kind"); -std::string lld::toString(const Symbol &sym) { - return demangle(sym.getName(), config->demangle); +// Returns a symbol for an error message. +static std::string demangle(StringRef symName) { + if (config->demangle) + return demangleItanium(symName); + return std::string(symName); } +std::string lld::toString(const Symbol &sym) { return demangle(sym.getName()); } + std::string lld::toMachOString(const object::Archive::Symbol &b) { - return demangle(b.getName(), config->demangle); + return demangle(b.getName()); } uint64_t Symbol::getStubVA() const { return in.stubs->getVA(stubsIndex); } diff --git a/lld/include/lld/Common/Strings.h b/lld/include/lld/Common/Strings.h index ece801892767..71126f615017 100644 --- a/lld/include/lld/Common/Strings.h +++ b/lld/include/lld/Common/Strings.h @@ -12,19 +12,14 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Demangle/Demangle.h" #include "llvm/Support/GlobPattern.h" #include #include namespace lld { -// Returns a demangled symbol name. If Name is not a mangled name, it returns -// name. -inline std::string demangle(llvm::StringRef symName, bool shouldDemangle) { - if (shouldDemangle) - return llvm::demangle(symName.str().c_str()); - return std::string(symName); -} +// Returns a demangled C++ symbol name. If Name is not a mangled +// name, it returns name. +std::string demangleItanium(llvm::StringRef name); std::vector parseHex(llvm::StringRef s); bool isValidCIdentifier(llvm::StringRef s); diff --git a/lld/test/ELF/undef.s b/lld/test/ELF/undef.s index 6398b73d51e6..931a482e1810 100644 --- a/lld/test/ELF/undef.s +++ b/lld/test/ELF/undef.s @@ -31,18 +31,6 @@ # CHECK-NEXT: >>> referenced by undef.s # CHECK-NEXT: >>> {{.*}}:(.text+0x1A) -# CHECK: error: undefined symbol: Pi -# CHECK-NEXT: >>> referenced by undef.s -# CHECK-NEXT: >>> {{.*}}:(.text+0x1F) - -# CHECK: error: undefined symbol: D main -# CHECK-NEXT: >>> referenced by undef.s -# CHECK-NEXT: >>> {{.*}}:(.text+0x24) - -# CHECK: error: undefined symbol: a::main -# CHECK-NEXT: >>> referenced by undef.s -# CHECK-NEXT: >>> {{.*}}:(.text+0x29) - # CHECK: error: undefined symbol: zed2 # CHECK-NEXT: >>> referenced by {{.*}}.o:(.text+0x0) in archive {{.*}}2.a @@ -96,6 +84,3 @@ _start: call _Z3fooi call _ZTV3Foo call __Z3fooi - call Pi - call _Dmain - call _RNvC1a4main diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp index 6c134fd380bc..684f4832b267 100644 --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -22,7 +22,6 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::wasm; -using namespace lld::wasm; namespace lld { std::string toString(const wasm::Symbol &sym) { @@ -34,8 +33,9 @@ std::string maybeDemangleSymbol(StringRef name) { // `main` in the case where we need to pass it arguments. if (name == "__main_argc_argv") return "main"; - - return demangle(name, config->demangle); + if (wasm::config->demangle) + return demangleItanium(name); + return std::string(name); } std::string toString(wasm::Symbol::Kind kind) { From eb574259b69641ae230d25299d5dadcffb394218 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 30 Dec 2021 09:03:31 -0800 Subject: [PATCH 241/992] [Hexagon] Handle HVX/FP {masked,wide} loads/stores Co-authored-by: Rahul Utkoor Co-authored-by: Anirudh Sundar Subramaniam --- .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 10 ++++++++++ .../CodeGen/Hexagon/autohvx/isel-mstore-fp16.ll | 17 +++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/isel-mstore-fp16.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 6c7e18a3a207..ef7f87066381 100755 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -119,6 +119,16 @@ HexagonTargetLowering::initializeHVXLowering() { // Make concat-vectors custom to handle concats of more than 2 vectors. setOperationAction(ISD::CONCAT_VECTORS, MVT::v128f16, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v64f32, Custom); + + setOperationAction(ISD::LOAD, MVT::v64f32, Custom); + setOperationAction(ISD::STORE, MVT::v64f32, Custom); + + setOperationAction(ISD::MLOAD, MVT::v32f32, Custom); + setOperationAction(ISD::MSTORE, MVT::v32f32, Custom); + setOperationAction(ISD::MLOAD, MVT::v64f16, Custom); + setOperationAction(ISD::MSTORE, MVT::v64f16, Custom); + setOperationAction(ISD::MLOAD, MVT::v64f32, Custom); + setOperationAction(ISD::MSTORE, MVT::v64f32, Custom); } for (MVT T : LegalV) { diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-mstore-fp16.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-mstore-fp16.ll new file mode 100644 index 000000000000..923660cfecc0 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-mstore-fp16.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + + +; Check for a non-crashing output. +; CHECK: vmem +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define dllexport void @fred() #0 { + tail call void @llvm.masked.store.v64f16.p0v64f16(<64 x half> , <64 x half>* undef, i32 64, <64 x i1> ) + ret void +} + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.masked.store.v64f16.p0v64f16(<64 x half>, <64 x half>*, i32 immarg, <64 x i1>) #0 + +attributes #0 = { argmemonly nounwind willreturn writeonly "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" } From e107374e40a20de84a0fd9b2a6b828d53056b3d5 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Sat, 12 Dec 2020 11:14:08 -0600 Subject: [PATCH 242/992] [Hexagon] Explicitly use integer types when rescaling a mask --- .../Target/Hexagon/HexagonVectorCombine.cpp | 7 +++-- .../CodeGen/Hexagon/autohvx/calling-conv.ll | 2 +- .../autohvx/vector-align-rescale-nonint.ll | 28 +++++++++++++++++++ 3 files changed, 34 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/vector-align-rescale-nonint.ll diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp index 21386a91c7b3..755ad96e58c4 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -1181,12 +1181,15 @@ auto HexagonVectorCombine::rescale(IRBuilder<> &Builder, Value *Mask, int ToCount = (FromCount * FromSize) / ToSize; assert((FromCount * FromSize) % ToSize == 0); + auto *FromITy = IntegerType::get(F.getContext(), FromSize * 8); + auto *ToITy = IntegerType::get(F.getContext(), ToSize * 8); + // Mask -> sext to -> bitcast to -> // -> trunc to . Value *Ext = Builder.CreateSExt( - Mask, VectorType::get(FromSTy, FromCount, /*Scalable*/ false)); + Mask, VectorType::get(FromITy, FromCount, /*Scalable*/ false)); Value *Cast = Builder.CreateBitCast( - Ext, VectorType::get(ToSTy, ToCount, /*Scalable*/ false)); + Ext, VectorType::get(ToITy, ToCount, /*Scalable*/ false)); return Builder.CreateTrunc( Cast, VectorType::get(getBoolTy(), ToCount, /*Scalable*/ false)); } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll b/llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll index 6ebe077ae120..884eb6e7ac75 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=hexagon -hexagon-vector-combine=false < %s | FileCheck %s +; RUN: llc -march=hexagon < %s | FileCheck %s define void @f0(<128 x i8> %a0, <128 x i8>* %a1) #0 { ; CHECK-LABEL: f0: diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-rescale-nonint.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-rescale-nonint.ll new file mode 100644 index 000000000000..157207d5ad9d --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-rescale-nonint.ll @@ -0,0 +1,28 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check that this doesn't crash. +; CHECK: vmem + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define dllexport void @f0(float* %a0, <32 x float> %a1, <32 x float> %a2) local_unnamed_addr #0 { +b0: + %v0 = add nuw nsw i32 0, 64 + %v1 = getelementptr inbounds float, float* %a0, i32 %v0 + %v2 = bitcast float* %v1 to <32 x float>* + %v3 = add nuw nsw i32 0, 96 + %v4 = getelementptr inbounds float, float* %a0, i32 %v3 + %v5 = bitcast float* %v4 to <32 x float>* + br label %b1 + +b1: ; preds = %b1, %b0 + br i1 undef, label %b2, label %b1 + +b2: ; preds = %b1 + store <32 x float> %a1, <32 x float>* %v2, align 4 + store <32 x float> %a2, <32 x float>* %v5, align 4 + ret void +} + +attributes #0 = { "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" } From 25ff448aac63339dc71f92d2e89af672ffdd775b Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 30 Dec 2021 10:37:17 -0800 Subject: [PATCH 243/992] [docs][llvm-profdata] Prefer double-dash long options To match the `--help` message and most other utilities. While here, change `option:: -output=output` to `option:: --output=` and omit the value name for the short options (convention of other utilities). Reviewed By: snehasish Differential Revision: https://reviews.llvm.org/D116353 --- llvm/docs/CommandGuide/llvm-profdata.rst | 90 ++++++++++++------------ 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst index da5285b6ca4f..acea59daa057 100644 --- a/llvm/docs/CommandGuide/llvm-profdata.rst +++ b/llvm/docs/CommandGuide/llvm-profdata.rst @@ -54,16 +54,16 @@ arguments are processed once for each time they are seen. OPTIONS ^^^^^^^ -.. option:: -help +.. option:: --help Print a summary of command line options. -.. option:: -output=output, -o=output +.. option:: --output=, -o Specify the output file name. *Output* cannot be ``-`` as the resulting indexed profile data can't be written to standard output. -.. option:: -weighted-input=weight,filename +.. option:: --weighted-input= Specify an input file name along with a weight. The profile counts of the supplied ``filename`` will be scaled (multiplied) by the supplied @@ -71,13 +71,13 @@ OPTIONS Input files specified without using this option are assigned a default weight of 1. Examples are shown below. -.. option:: -input-files=path, -f=path +.. option:: --input-files=, -f Specify a file which contains a list of files to merge. The entries in this file are newline-separated. Lines starting with '#' are skipped. Entries may be of the form or ,. -.. option:: -remapping-file=path, -r=path +.. option:: --remapping-file=, -r Specify a file which contains a remapping from symbol names in the input profile to the symbol names that should be used in the output profile. The @@ -87,51 +87,51 @@ OPTIONS The :doc:`llvm-cxxmap ` tool can be used to generate the symbol remapping file. -.. option:: -instr (default) +.. option:: --instr (default) Specify that the input profile is an instrumentation-based profile. -.. option:: -sample +.. option:: --sample Specify that the input profile is a sample-based profile. The format of the generated file can be generated in one of three ways: - .. option:: -binary (default) + .. option:: --binary (default) Emit the profile using a binary encoding. For instrumentation-based profile the output format is the indexed binary format. - .. option:: -extbinary + .. option:: --extbinary Emit the profile using an extensible binary encoding. This option can only be used with sample-based profile. The extensible binary encoding can be more compact with compression enabled and can be loaded faster than the default binary encoding. - .. option:: -text + .. option:: --text Emit the profile in text mode. This option can also be used with both sample-based and instrumentation-based profile. When this option is used the profile will be dumped in the text format that is parsable by the profile reader. - .. option:: -gcc + .. option:: --gcc Emit the profile using GCC's gcov format (Not yet supported). -.. option:: -sparse[=true|false] +.. option:: --sparse[=true|false] Do not emit function records with 0 execution count. Can only be used in conjunction with -instr. Defaults to false, since it can inhibit compiler optimization during PGO. -.. option:: -num-threads=N, -j=N +.. option:: --num-threads=, -j Use N threads to perform profile merging. When N=0, llvm-profdata auto-detects an appropriate number of threads to use. This is the default. -.. option:: -failure-mode=[any|all] +.. option:: --failure-mode=[any|all] Set the failure mode. There are two options: 'any' causes the merge command to fail if any profiles are invalid, and 'all' causes the merge command to fail @@ -139,53 +139,53 @@ OPTIONS invalid profiles is excluded from the final merged product. The default failure mode is 'any'. -.. option:: -prof-sym-list=path +.. option:: --prof-sym-list= Specify a file which contains a list of symbols to generate profile symbol list in the profile. This option can only be used with sample-based profile in extbinary format. The entries in this file are newline-separated. -.. option:: -compress-all-sections=[true|false] +.. option:: --compress-all-sections=[true|false] Compress all sections when writing the profile. This option can only be used with sample-based profile in extbinary format. -.. option:: -use-md5=[true|false] +.. option:: --use-md5=[true|false] Use MD5 to represent string in name table when writing the profile. This option can only be used with sample-based profile in extbinary format. -.. option:: -gen-partial-profile=[true|false] +.. option:: --gen-partial-profile=[true|false] Mark the profile to be a partial profile which only provides partial profile coverage for the optimized target. This option can only be used with sample-based profile in extbinary format. -.. option:: -supplement-instr-with-sample=path_to_sample_profile +.. option:: --supplement-instr-with-sample= Supplement an instrumentation profile with sample profile. The sample profile is the input of the flag. Output will be in instrumentation format (only works with -instr). -.. option:: -zero-counter-threshold=threshold_float_number +.. option:: --zero-counter-threshold= For the function which is cold in instr profile but hot in sample profile, if the ratio of the number of zero counters divided by the the total number of counters is above the threshold, the profile of the function will be regarded as being harmful for performance and will be dropped. -.. option:: -instr-prof-cold-threshold=threshold_int_number +.. option:: --instr-prof-cold-threshold= User specified cold threshold for instr profile which will override the cold threshold got from profile summary. -.. option:: -suppl-min-size-threshold=threshold_int_number +.. option:: --suppl-min-size-threshold= If the size of a function is smaller than the threshold, assume it can be inlined by PGO early inliner and it will not be adjusted based on sample profile. -.. option:: -debug-info=path +.. option:: --debug-info= Specify the executable or ``.dSYM`` that contains debug info for the raw profile. When ``-debug-info-correlate`` was used for instrumentation, use this option @@ -208,13 +208,13 @@ The input file ``foo.profdata`` is especially important, multiply its counts by :: - llvm-profdata merge -weighted-input=10,foo.profdata bar.profdata baz.profdata -output merged.profdata + llvm-profdata merge --weighted-input=10,foo.profdata bar.profdata baz.profdata --output merged.profdata Exactly equivalent to the previous invocation (explicit form; useful for programmatic invocation): :: - llvm-profdata merge -weighted-input=10,foo.profdata -weighted-input=1,bar.profdata -weighted-input=1,baz.profdata -output merged.profdata + llvm-profdata merge --weighted-input=10,foo.profdata --weighted-input=1,bar.profdata --weighted-input=1,baz.profdata --output merged.profdata .. program:: llvm-profdata show @@ -241,73 +241,73 @@ input from standard input. OPTIONS ^^^^^^^ -.. option:: -all-functions +.. option:: --all-functions Print details for every function. -.. option:: -counts +.. option:: --counts Print the counter values for the displayed functions. -.. option:: -function=string +.. option:: --function= Print details for a function if the function's name contains the given string. -.. option:: -help +.. option:: --help Print a summary of command line options. -.. option:: -output=output, -o=output +.. option:: --output=, -o Specify the output file name. If *output* is ``-`` or it isn't specified, then the output is sent to standard output. -.. option:: -instr (default) +.. option:: --instr (default) Specify that the input profile is an instrumentation-based profile. -.. option:: -text +.. option:: --text Instruct the profile dumper to show profile counts in the text format of the instrumentation-based profile data representation. By default, the profile information is dumped in a more human readable form (also in text) with annotations. -.. option:: -topn=n +.. option:: --topn= Instruct the profile dumper to show the top ``n`` functions with the hottest basic blocks in the summary section. By default, the topn functions are not dumped. -.. option:: -sample +.. option:: --sample Specify that the input profile is a sample-based profile. -.. option:: -memop-sizes +.. option:: --memop-sizes Show the profiled sizes of the memory intrinsic calls for shown functions. -.. option:: -value-cutoff=n +.. option:: --value-cutoff= Show only those functions whose max count values are greater or equal to ``n``. By default, the value-cutoff is set to 0. -.. option:: -list-below-cutoff +.. option:: --list-below-cutoff Only output names of functions whose max count value are below the cutoff value. -.. option:: -showcs +.. option:: --showcs Only show context sensitive profile counts. The default is to filter all context sensitive profile counts. -.. option:: -show-prof-sym-list=[true|false] +.. option:: --show-prof-sym-list=[true|false] Show profile symbol list if it exists in the profile. This option is only meaningful for sample-based profile in extbinary format. -.. option:: -show-sec-info-only=[true|false] +.. option:: --show-sec-info-only=[true|false] Show basic information about each section in the profile. This option is only meaningful for sample-based profile in extbinary format. @@ -354,25 +354,25 @@ Here is an example, if *base profile file* has counts of {400, 600}, and OPTIONS ^^^^^^^ -.. option:: -function=string +.. option:: --function= Print details for a function if the function's name contains the given string. -.. option:: -help +.. option:: --help Print a summary of command line options. -.. option:: -o=output or -o output +.. option:: --output=, -o Specify the output file name. If *output* is ``-`` or it isn't specified, then the output is sent to standard output. -.. option:: -value-cutoff=n +.. option:: --value-cutoff= Show only those functions whose max count values are greater or equal to ``n``. By default, the value-cutoff is set to max of unsigned long long. -.. option:: -cs +.. option:: --cs Only show overlap for the context sensitive profile counts. The default is to show non-context sensitive profile counts. From 890e8c8f7e9e448a870224068d6b4181d96a293d Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 30 Dec 2021 10:42:28 -0800 Subject: [PATCH 244/992] [Support] Add MemoryBuffer::dontNeedIfMmap On *NIX systems, this API calls madvise(MADV_DONTNEED) on read-only file mappings. It should not be used on a writable buffer. The API is used to implement ld.lld LTO memory saving trick (D116367). Note: on read-only file mappings, Linux's MADV_DONTNEED semantics match POSIX POSIX_MADV_DONTNEED and BSD systems' MADV_DONTNEED. On Windows, VirtualAllocEx MEM_COMMIT/MEM_RESET have similar semantics but are unfortunately not drop-in replacements. dontNeedIfMmap is currently a no-op. Reviewed By: aganea Differential Revision: https://reviews.llvm.org/D116366 --- llvm/include/llvm/Support/FileSystem.h | 2 ++ llvm/include/llvm/Support/MemoryBuffer.h | 7 +++++++ llvm/lib/Support/MemoryBuffer.cpp | 2 ++ llvm/lib/Support/Unix/Path.inc | 6 ++++++ llvm/lib/Support/Windows/Path.inc | 2 ++ 5 files changed, 19 insertions(+) diff --git a/llvm/include/llvm/Support/FileSystem.h b/llvm/include/llvm/Support/FileSystem.h index 1a049533b82b..dabd384b400b 100644 --- a/llvm/include/llvm/Support/FileSystem.h +++ b/llvm/include/llvm/Support/FileSystem.h @@ -1279,6 +1279,7 @@ class mapped_file_region { } void unmapImpl(); + void dontNeedImpl(); std::error_code init(sys::fs::file_t FD, uint64_t Offset, mapmode Mode); @@ -1308,6 +1309,7 @@ class mapped_file_region { unmapImpl(); copyFrom(mapped_file_region()); } + void dontNeed() { dontNeedImpl(); } size_t size() const; char *data() const; diff --git a/llvm/include/llvm/Support/MemoryBuffer.h b/llvm/include/llvm/Support/MemoryBuffer.h index c9ceeedbf3dc..6385805eba1d 100644 --- a/llvm/include/llvm/Support/MemoryBuffer.h +++ b/llvm/include/llvm/Support/MemoryBuffer.h @@ -74,6 +74,13 @@ class MemoryBuffer { /// from. virtual StringRef getBufferIdentifier() const { return "Unknown buffer"; } + /// For read-only MemoryBuffer_MMap, mark the buffer as unused in the near + /// future and the kernel can free resources associated with it. Further + /// access is supported but may be expensive. This calls + /// madvise(MADV_DONTNEED) on read-only file mappings on *NIX systems. This + /// function should not be called on a writable buffer. + virtual void dontNeedIfMmap() {} + /// Open the specified file as a MemoryBuffer, returning a new MemoryBuffer /// if successful, otherwise returning null. /// diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp index d3fa3c6f065d..345b0d4aede5 100644 --- a/llvm/lib/Support/MemoryBuffer.cpp +++ b/llvm/lib/Support/MemoryBuffer.cpp @@ -220,6 +220,8 @@ class MemoryBufferMMapFile : public MB { MemoryBuffer::BufferKind getBufferKind() const override { return MemoryBuffer::MemoryBuffer_MMap; } + + void dontNeedIfMmap() override { MFR.dontNeed(); } }; } // namespace diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc index c0712e0a0681..a18650aadb6e 100644 --- a/llvm/lib/Support/Unix/Path.inc +++ b/llvm/lib/Support/Unix/Path.inc @@ -870,6 +870,12 @@ void mapped_file_region::unmapImpl() { ::munmap(Mapping, Size); } +void mapped_file_region::dontNeedImpl() { + assert(Mode == mapped_file_region::readonly); + if (Mapping) + ::madvise(Mapping, Size, MADV_DONTNEED); +} + int mapped_file_region::alignment() { return Process::getPageSizeEstimate(); } diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc index b15e71a9ce2a..175a96a8ba6c 100644 --- a/llvm/lib/Support/Windows/Path.inc +++ b/llvm/lib/Support/Windows/Path.inc @@ -959,6 +959,8 @@ void mapped_file_region::unmapImpl() { } } +void mapped_file_region::dontNeedImpl() {} + int mapped_file_region::alignment() { SYSTEM_INFO SysInfo; ::GetSystemInfo(&SysInfo); From 9d37d0ea34858288faf6351b9bdc0a0b91107c82 Mon Sep 17 00:00:00 2001 From: Jack Andersen Date: Thu, 30 Dec 2021 13:42:13 -0500 Subject: [PATCH 245/992] [Support] Expand `` as the base directory in configuration files. Extends response file expansion to recognize `` and expand to the current file's directory. This makes it much easier to author clang config files rooted in portable, potentially not-installed SDK directories. A typical use case may be something like the following: ``` # sample_sdk.cfg --target=sample -isystem /include -L /lib -T /ldscripts/link.ld ``` Reviewed By: sepavloff Differential Revision: https://reviews.llvm.org/D115604 --- clang/docs/ReleaseNotes.rst | 3 + clang/docs/UsersManual.rst | 18 +++++ ...ExpandResponseFilesCompilationDatabase.cpp | 2 +- llvm/include/llvm/Support/CommandLine.h | 9 ++- llvm/lib/Support/CommandLine.cpp | 74 +++++++++++++++---- llvm/unittests/Support/CommandLineTest.cpp | 67 ++++++++++++----- 6 files changed, 134 insertions(+), 39 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 44485fcd7e26..ce9b3547155a 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -63,6 +63,9 @@ Non-comprehensive list of changes in this release - Maximum _ExtInt size was decreased from 16,777,215 bits to 8,388,608 bits. Motivation for this was discussed in PR51829. +- Configuration file syntax extended with ```` token. This expands to + the base path of the current config file. See :ref:`configuration-files` for + details. New Compiler Flags ------------------ diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 26da5a0ff255..1173fd337841 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -843,6 +843,8 @@ a special character, which is the convention used by GNU Make. The -MV option tells Clang to put double-quotes around the entire filename, which is the convention used by NMake and Jom. +.. _configuration-files: + Configuration files ------------------- @@ -917,6 +919,22 @@ relative to the including file. For example, if a configuration file `~/.llvm/target.cfg` contains the directive `@os/linux.opts`, the file `linux.opts` is searched for in the directory `~/.llvm/os`. +To generate paths relative to the configuration file, the `` token may +be used. This will expand to the absolute path of the directory containing the +configuration file. + +In cases where a configuration file is deployed alongside SDK contents, the +SDK directory can remain fully portable by using `` prefixed paths. +In this way, the user may only need to specify a root configuration file with +`--config` to establish every aspect of the SDK with the compiler: + +:: + + --target=foo + -isystem /include + -L /lib + -T /ldscripts/link.ld + Language and Target-Independent Features ======================================== diff --git a/clang/lib/Tooling/ExpandResponseFilesCompilationDatabase.cpp b/clang/lib/Tooling/ExpandResponseFilesCompilationDatabase.cpp index 29787b8a8894..75d0d50d851f 100644 --- a/clang/lib/Tooling/ExpandResponseFilesCompilationDatabase.cpp +++ b/clang/lib/Tooling/ExpandResponseFilesCompilationDatabase.cpp @@ -61,7 +61,7 @@ class ExpandResponseFilesDatabase : public CompilationDatabase { continue; llvm::BumpPtrAllocator Alloc; llvm::StringSaver Saver(Alloc); - llvm::cl::ExpandResponseFiles(Saver, Tokenizer, Argv, false, false, + llvm::cl::ExpandResponseFiles(Saver, Tokenizer, Argv, false, false, false, llvm::StringRef(Cmd.Directory), *FS); // Don't assign directly, Argv aliases CommandLine. std::vector ExpandedArgv(Argv.begin(), Argv.end()); diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index 2c3edd858a3f..120ab1840915 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -2082,7 +2082,8 @@ void tokenizeConfigFile(StringRef Source, StringSaver &Saver, /// /// It reads content of the specified file, tokenizes it and expands "@file" /// commands resolving file names in them relative to the directory where -/// CfgFilename resides. +/// CfgFilename resides. It also expands "" to the base path of the +/// current config file. /// bool readConfigFile(StringRef CfgFileName, StringSaver &Saver, SmallVectorImpl &Argv); @@ -2102,13 +2103,15 @@ bool readConfigFile(StringRef CfgFileName, StringSaver &Saver, /// with nullptrs in the Argv vector. /// \param [in] RelativeNames true if names of nested response files must be /// resolved relative to including file. +/// \param [in] ExpandBasePath If true, "" expands to the base path of +/// the current response file. /// \param [in] FS File system used for all file access when running the tool. /// \param [in] CurrentDir Path used to resolve relative rsp files. If set to /// None, process' cwd is used instead. /// \return true if all @files were expanded successfully or there were none. bool ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, SmallVectorImpl &Argv, bool MarkEOLs, - bool RelativeNames, + bool RelativeNames, bool ExpandBasePath, llvm::Optional CurrentDir, llvm::vfs::FileSystem &FS); @@ -2117,7 +2120,7 @@ bool ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, bool ExpandResponseFiles( StringSaver &Saver, TokenizerCallback Tokenizer, SmallVectorImpl &Argv, bool MarkEOLs = false, - bool RelativeNames = false, + bool RelativeNames = false, bool ExpandBasePath = false, llvm::Optional CurrentDir = llvm::None); /// A convenience helper which concatenates the options specified by the diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index 4153a69abf5d..481ba56c4077 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -1078,11 +1078,45 @@ static bool hasUTF8ByteOrderMark(ArrayRef S) { return (S.size() >= 3 && S[0] == '\xef' && S[1] == '\xbb' && S[2] == '\xbf'); } +// Substitute with the file's base path. +static void ExpandBasePaths(StringRef BasePath, StringSaver &Saver, + const char *&Arg) { + assert(sys::path::is_absolute(BasePath)); + constexpr StringLiteral Token(""); + const StringRef ArgString(Arg); + + SmallString<128> ResponseFile; + StringRef::size_type StartPos = 0; + for (StringRef::size_type TokenPos = ArgString.find(Token); + TokenPos != StringRef::npos; + TokenPos = ArgString.find(Token, StartPos)) { + // Token may appear more than once per arg (e.g. comma-separated linker + // args). Support by using path-append on any subsequent appearances. + const StringRef LHS = ArgString.substr(StartPos, TokenPos - StartPos); + if (ResponseFile.empty()) + ResponseFile = LHS; + else + llvm::sys::path::append(ResponseFile, LHS); + ResponseFile.append(BasePath); + StartPos = TokenPos + Token.size(); + } + + if (!ResponseFile.empty()) { + // Path-append the remaining arg substring if at least one token appeared. + const StringRef Remaining = ArgString.substr(StartPos); + if (!Remaining.empty()) + llvm::sys::path::append(ResponseFile, Remaining); + Arg = Saver.save(ResponseFile.str()).data(); + } +} + // FName must be an absolute path. -static llvm::Error ExpandResponseFile( - StringRef FName, StringSaver &Saver, TokenizerCallback Tokenizer, - SmallVectorImpl &NewArgv, bool MarkEOLs, bool RelativeNames, - llvm::vfs::FileSystem &FS) { +static llvm::Error ExpandResponseFile(StringRef FName, StringSaver &Saver, + TokenizerCallback Tokenizer, + SmallVectorImpl &NewArgv, + bool MarkEOLs, bool RelativeNames, + bool ExpandBasePath, + llvm::vfs::FileSystem &FS) { assert(sys::path::is_absolute(FName)); llvm::ErrorOr> MemBufOrErr = FS.getBufferForFile(FName); @@ -1116,8 +1150,15 @@ static llvm::Error ExpandResponseFile( // file, replace the included response file names with their full paths // obtained by required resolution. for (auto &Arg : NewArgv) { + if (!Arg) + continue; + + // Substitute with the file's base path. + if (ExpandBasePath) + ExpandBasePaths(BasePath, Saver, Arg); + // Skip non-rsp file arguments. - if (!Arg || Arg[0] != '@') + if (Arg[0] != '@') continue; StringRef FileName(Arg + 1); @@ -1129,7 +1170,7 @@ static llvm::Error ExpandResponseFile( ResponseFile.push_back('@'); ResponseFile.append(BasePath); llvm::sys::path::append(ResponseFile, FileName); - Arg = Saver.save(ResponseFile.c_str()).data(); + Arg = Saver.save(ResponseFile.str()).data(); } return Error::success(); } @@ -1138,7 +1179,7 @@ static llvm::Error ExpandResponseFile( /// StringSaver and tokenization strategy. bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, SmallVectorImpl &Argv, bool MarkEOLs, - bool RelativeNames, + bool RelativeNames, bool ExpandBasePath, llvm::Optional CurrentDir, llvm::vfs::FileSystem &FS) { bool AllExpanded = true; @@ -1218,7 +1259,7 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, SmallVector ExpandedArgv; if (llvm::Error Err = ExpandResponseFile(FName, Saver, Tokenizer, ExpandedArgv, MarkEOLs, - RelativeNames, FS)) { + RelativeNames, ExpandBasePath, FS)) { // We couldn't read this file, so we leave it in the argument stream and // move on. // TODO: The error should be propagated up the stack. @@ -1250,11 +1291,11 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, SmallVectorImpl &Argv, bool MarkEOLs, - bool RelativeNames, + bool RelativeNames, bool ExpandBasePath, llvm::Optional CurrentDir) { return ExpandResponseFiles(Saver, std::move(Tokenizer), Argv, MarkEOLs, - RelativeNames, std::move(CurrentDir), - *vfs::getRealFileSystem()); + RelativeNames, ExpandBasePath, + std::move(CurrentDir), *vfs::getRealFileSystem()); } bool cl::expandResponseFiles(int Argc, const char *const *Argv, @@ -1281,16 +1322,17 @@ bool cl::readConfigFile(StringRef CfgFile, StringSaver &Saver, llvm::sys::path::append(AbsPath, CfgFile); CfgFile = AbsPath.str(); } - if (llvm::Error Err = - ExpandResponseFile(CfgFile, Saver, cl::tokenizeConfigFile, Argv, - /*MarkEOLs=*/false, /*RelativeNames=*/true, - *llvm::vfs::getRealFileSystem())) { + if (llvm::Error Err = ExpandResponseFile( + CfgFile, Saver, cl::tokenizeConfigFile, Argv, + /*MarkEOLs=*/false, /*RelativeNames=*/true, /*ExpandBasePath=*/true, + *llvm::vfs::getRealFileSystem())) { // TODO: The error should be propagated up the stack. llvm::consumeError(std::move(Err)); return false; } return ExpandResponseFiles(Saver, cl::tokenizeConfigFile, Argv, - /*MarkEOLs=*/false, /*RelativeNames=*/true); + /*MarkEOLs=*/false, /*RelativeNames=*/true, + /*ExpandBasePath=*/true, llvm::None); } static void initCommonOptions(); diff --git a/llvm/unittests/Support/CommandLineTest.cpp b/llvm/unittests/Support/CommandLineTest.cpp index db7255e5569a..4e1160fe2dbc 100644 --- a/llvm/unittests/Support/CommandLineTest.cpp +++ b/llvm/unittests/Support/CommandLineTest.cpp @@ -827,7 +827,7 @@ TEST(CommandLineTest, ResponseFiles) { llvm::BumpPtrAllocator A; llvm::StringSaver Saver(A); ASSERT_TRUE(llvm::cl::ExpandResponseFiles( - Saver, llvm::cl::TokenizeGNUCommandLine, Argv, false, true, + Saver, llvm::cl::TokenizeGNUCommandLine, Argv, false, true, false, /*CurrentDir=*/StringRef(TestRoot), FS)); EXPECT_THAT(Argv, testing::Pointwise( StringEquality(), @@ -889,9 +889,9 @@ TEST(CommandLineTest, RecursiveResponseFiles) { #else cl::TokenizerCallback Tokenizer = cl::TokenizeGNUCommandLine; #endif - ASSERT_FALSE(cl::ExpandResponseFiles(Saver, Tokenizer, Argv, false, false, - /*CurrentDir=*/llvm::StringRef(TestRoot), - FS)); + ASSERT_FALSE( + cl::ExpandResponseFiles(Saver, Tokenizer, Argv, false, false, false, + /*CurrentDir=*/llvm::StringRef(TestRoot), FS)); EXPECT_THAT(Argv, testing::Pointwise(StringEquality(), @@ -929,7 +929,7 @@ TEST(CommandLineTest, ResponseFilesAtArguments) { BumpPtrAllocator A; StringSaver Saver(A); ASSERT_FALSE(cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Argv, - false, false, + false, false, false, /*CurrentDir=*/StringRef(TestRoot), FS)); // ASSERT instead of EXPECT to prevent potential out-of-bounds access. @@ -964,7 +964,7 @@ TEST(CommandLineTest, ResponseFileRelativePath) { BumpPtrAllocator A; StringSaver Saver(A); ASSERT_TRUE(cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Argv, - false, true, + false, true, false, /*CurrentDir=*/StringRef(TestRoot), FS)); EXPECT_THAT(Argv, testing::Pointwise(StringEquality(), {"test/test", "-flag"})); @@ -984,7 +984,7 @@ TEST(CommandLineTest, ResponseFileEOLs) { BumpPtrAllocator A; StringSaver Saver(A); ASSERT_TRUE(cl::ExpandResponseFiles(Saver, cl::TokenizeWindowsCommandLine, - Argv, true, true, + Argv, true, true, false, /*CurrentDir=*/StringRef(TestRoot), FS)); const char *Expected[] = {"clang", "-Xclang", "-Wno-whatever", nullptr, "input.cpp"}; @@ -1038,25 +1038,39 @@ TEST(CommandLineTest, ReadConfigFile) { llvm::SmallVector Argv; TempDir TestDir("unittest", /*Unique*/ true); + TempDir TestSubDir(TestDir.path("subdir"), /*Unique*/ false); - llvm::SmallString<128> TestCfg; - llvm::sys::path::append(TestCfg, TestDir.path(), "foo"); - + llvm::SmallString<128> TestCfg = TestDir.path("foo"); TempFile ConfigFile(TestCfg, "", "# Comment\n" "-option_1\n" + "-option_2=/dir1\n" + "-option_3=\n" + "-option_4 \n" + "-option_5=\n" + "-option_6=/dir1,/dir2\n" "@subconfig\n" - "-option_3=abcd\n" - "-option_4=\\\n" + "-option_11=abcd\n" + "-option_12=\\\n" "cdef\n"); - llvm::SmallString<128> TestCfg2; - llvm::sys::path::append(TestCfg2, TestDir.path(), "subconfig"); + llvm::SmallString<128> TestCfg2 = TestDir.path("subconfig"); TempFile ConfigFile2(TestCfg2, "", - "-option_2\n" + "-option_7\n" + "-option_8=/dir2\n" + "@subdir/subfoo\n" "\n" " # comment\n"); + llvm::SmallString<128> TestCfg3 = TestSubDir.path("subfoo"); + TempFile ConfigFile3(TestCfg3, "", + "-option_9=/dir3\n" + "@/subfoo2\n"); + + llvm::SmallString<128> TestCfg4 = TestSubDir.path("subfoo2"); + TempFile ConfigFile4(TestCfg4, "", "-option_10\n"); + // Make sure the current directory is not the directory where config files // resides. In this case the code that expands response files will not find // 'subconfig' unless it resolves nested inclusions relative to the including @@ -1071,11 +1085,26 @@ TEST(CommandLineTest, ReadConfigFile) { bool Result = llvm::cl::readConfigFile(ConfigFile.path(), Saver, Argv); EXPECT_TRUE(Result); - EXPECT_EQ(Argv.size(), 4U); + EXPECT_EQ(Argv.size(), 13U); EXPECT_STREQ(Argv[0], "-option_1"); - EXPECT_STREQ(Argv[1], "-option_2"); - EXPECT_STREQ(Argv[2], "-option_3=abcd"); - EXPECT_STREQ(Argv[3], "-option_4=cdef"); + EXPECT_STREQ(Argv[1], + ("-option_2=" + TestDir.path() + "/dir1").str().c_str()); + EXPECT_STREQ(Argv[2], ("-option_3=" + TestDir.path()).str().c_str()); + EXPECT_STREQ(Argv[3], "-option_4"); + EXPECT_STREQ(Argv[4], TestDir.path().str().c_str()); + EXPECT_STREQ(Argv[5], ("-option_5=" + TestDir.path()).str().c_str()); + EXPECT_STREQ(Argv[6], ("-option_6=" + TestDir.path() + "/dir1," + + TestDir.path() + "/dir2") + .str() + .c_str()); + EXPECT_STREQ(Argv[7], "-option_7"); + EXPECT_STREQ(Argv[8], + ("-option_8=" + TestDir.path() + "/dir2").str().c_str()); + EXPECT_STREQ(Argv[9], + ("-option_9=" + TestSubDir.path() + "/dir3").str().c_str()); + EXPECT_STREQ(Argv[10], "-option_10"); + EXPECT_STREQ(Argv[11], "-option_11=abcd"); + EXPECT_STREQ(Argv[12], "-option_12=cdef"); } TEST(CommandLineTest, PositionalEatArgsError) { From a6a583dae40485cacfac56811e6d9131bac6ca74 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Thu, 30 Dec 2021 00:59:58 -0500 Subject: [PATCH 246/992] [MLIR] Move AtomicRMW into MemRef dialect and enum into Arith Per the discussion in https://reviews.llvm.org/D116345 it makes sense to move AtomicRMWOp out of the standard dialect. This was accentuated by the need to add a fold op with a memref::cast. The only dialect that would permit this is the memref dialect (keeping it in the standard dialect or moving it to the arithmetic dialect would require those dialects to have a dependency on the memref dialect, which breaks linking). As the AtomicRMWKind enum is used throughout, this has been moved to Arith. Reviewed By: Mogball Differential Revision: https://reviews.llvm.org/D116392 --- mlir/include/mlir/Analysis/AffineAnalysis.h | 3 +- .../mlir/Dialect/Affine/IR/AffineOps.td | 6 +- .../mlir/Dialect/Arithmetic/IR/Arithmetic.h | 12 ++ .../Dialect/Arithmetic/IR/ArithmeticBase.td | 24 ++++ .../mlir/Dialect/MemRef/IR/MemRefOps.td | 48 +++++++ .../include/mlir/Dialect/StandardOps/IR/Ops.h | 27 ---- .../mlir/Dialect/StandardOps/IR/Ops.td | 47 ------- .../Dialect/StandardOps/IR/StandardOpsBase.td | 42 ------ mlir/include/mlir/Dialect/Vector/VectorOps.h | 5 +- mlir/lib/Analysis/AffineAnalysis.cpp | 34 ++--- .../AffineToStandard/AffineToStandard.cpp | 18 +-- .../Conversion/MemRefToLLVM/MemRefToLLVM.cpp | 57 ++++++++ .../StandardToLLVM/StandardToLLVM.cpp | 56 -------- mlir/lib/Dialect/Affine/IR/AffineOps.cpp | 18 +-- .../Affine/Transforms/SuperVectorize.cpp | 9 +- .../Dialect/Arithmetic/IR/ArithmeticOps.cpp | 96 +++++++++++++ mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp | 44 ++++++ mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 128 ------------------ .../StandardOps/Transforms/ExpandOps.cpp | 25 ++-- mlir/lib/Dialect/Vector/VectorOps.cpp | 25 ++-- mlir/lib/Transforms/Utils/LoopUtils.cpp | 2 +- .../MemRefToLLVM/memref-to-llvm.mlir | 25 ++++ .../StandardToLLVM/standard-to-llvm.mlir | 25 ---- mlir/test/Dialect/MemRef/canonicalize.mlir | 11 ++ mlir/test/Dialect/MemRef/invalid.mlir | 24 ++++ mlir/test/Dialect/MemRef/ops.mlir | 10 ++ mlir/test/Dialect/Standard/expand-ops.mlir | 4 +- mlir/test/IR/core-ops.mlir | 8 -- mlir/test/IR/invalid-ops.mlir | 24 ---- 29 files changed, 429 insertions(+), 428 deletions(-) delete mode 100644 mlir/include/mlir/Dialect/StandardOps/IR/StandardOpsBase.td diff --git a/mlir/include/mlir/Analysis/AffineAnalysis.h b/mlir/include/mlir/Analysis/AffineAnalysis.h index 120a5be4596a..fa793a9e17f8 100644 --- a/mlir/include/mlir/Analysis/AffineAnalysis.h +++ b/mlir/include/mlir/Analysis/AffineAnalysis.h @@ -15,6 +15,7 @@ #ifndef MLIR_ANALYSIS_AFFINE_ANALYSIS_H #define MLIR_ANALYSIS_AFFINE_ANALYSIS_H +#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/Value.h" #include "llvm/ADT/Optional.h" @@ -32,7 +33,7 @@ class Operation; /// A description of a (parallelizable) reduction in an affine loop. struct LoopReduction { /// Reduction kind. - AtomicRMWKind kind; + arith::AtomicRMWKind kind; /// Position of the iteration argument that acts as accumulator. unsigned iterArgPosition; diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td index c04115476d35..53b58fa23d34 100644 --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td @@ -13,7 +13,7 @@ #ifndef AFFINE_OPS #define AFFINE_OPS -include "mlir/Dialect/StandardOps/IR/StandardOpsBase.td" +include "mlir/Dialect/Arithmetic/IR/ArithmeticBase.td" include "mlir/Dialect/Affine/IR/AffineMemoryOpInterfaces.td" include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/Interfaces/LoopLikeInterface.td" @@ -691,9 +691,9 @@ def AffineParallelOp : Affine_Op<"parallel", let builders = [ OpBuilder<(ins "TypeRange":$resultTypes, - "ArrayRef":$reductions, "ArrayRef":$ranges)>, + "ArrayRef":$reductions, "ArrayRef":$ranges)>, OpBuilder<(ins "TypeRange":$resultTypes, - "ArrayRef":$reductions, "ArrayRef":$lbMaps, + "ArrayRef":$reductions, "ArrayRef":$lbMaps, "ValueRange":$lbArgs, "ArrayRef":$ubMaps, "ValueRange":$ubArgs, "ArrayRef":$steps)> ]; diff --git a/mlir/include/mlir/Dialect/Arithmetic/IR/Arithmetic.h b/mlir/include/mlir/Dialect/Arithmetic/IR/Arithmetic.h index 4fa592d003a0..31d623938845 100644 --- a/mlir/include/mlir/Dialect/Arithmetic/IR/Arithmetic.h +++ b/mlir/include/mlir/Dialect/Arithmetic/IR/Arithmetic.h @@ -109,6 +109,18 @@ bool applyCmpPredicate(arith::CmpIPredicate predicate, const APInt &lhs, bool applyCmpPredicate(arith::CmpFPredicate predicate, const APFloat &lhs, const APFloat &rhs); +/// Returns the identity value attribute associated with an AtomicRMWKind op. +Attribute getIdentityValueAttr(AtomicRMWKind kind, Type resultType, + OpBuilder &builder, Location loc); + +/// Returns the identity value associated with an AtomicRMWKind op. +Value getIdentityValue(AtomicRMWKind op, Type resultType, OpBuilder &builder, + Location loc); + +/// Returns the value obtained by applying the reduction operation kind +/// associated with a binary AtomicRMWKind op to `lhs` and `rhs`. +Value getReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc, + Value lhs, Value rhs); } // namespace arith } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticBase.td b/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticBase.td index 87439da95640..704edbb587bd 100644 --- a/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticBase.td +++ b/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticBase.td @@ -68,4 +68,28 @@ def Arith_CmpIPredicateAttr : I64EnumAttr< let cppNamespace = "::mlir::arith"; } +def ATOMIC_RMW_KIND_ADDF : I64EnumAttrCase<"addf", 0>; +def ATOMIC_RMW_KIND_ADDI : I64EnumAttrCase<"addi", 1>; +def ATOMIC_RMW_KIND_ASSIGN : I64EnumAttrCase<"assign", 2>; +def ATOMIC_RMW_KIND_MAXF : I64EnumAttrCase<"maxf", 3>; +def ATOMIC_RMW_KIND_MAXS : I64EnumAttrCase<"maxs", 4>; +def ATOMIC_RMW_KIND_MAXU : I64EnumAttrCase<"maxu", 5>; +def ATOMIC_RMW_KIND_MINF : I64EnumAttrCase<"minf", 6>; +def ATOMIC_RMW_KIND_MINS : I64EnumAttrCase<"mins", 7>; +def ATOMIC_RMW_KIND_MINU : I64EnumAttrCase<"minu", 8>; +def ATOMIC_RMW_KIND_MULF : I64EnumAttrCase<"mulf", 9>; +def ATOMIC_RMW_KIND_MULI : I64EnumAttrCase<"muli", 10>; +def ATOMIC_RMW_KIND_ORI : I64EnumAttrCase<"ori", 11>; +def ATOMIC_RMW_KIND_ANDI : I64EnumAttrCase<"andi", 12>; + +def AtomicRMWKindAttr : I64EnumAttr< + "AtomicRMWKind", "", + [ATOMIC_RMW_KIND_ADDF, ATOMIC_RMW_KIND_ADDI, ATOMIC_RMW_KIND_ASSIGN, + ATOMIC_RMW_KIND_MAXF, ATOMIC_RMW_KIND_MAXS, ATOMIC_RMW_KIND_MAXU, + ATOMIC_RMW_KIND_MINF, ATOMIC_RMW_KIND_MINS, ATOMIC_RMW_KIND_MINU, + ATOMIC_RMW_KIND_MULF, ATOMIC_RMW_KIND_MULI, ATOMIC_RMW_KIND_ORI, + ATOMIC_RMW_KIND_ANDI]> { + let cppNamespace = "::mlir::arith"; +} + #endif // ARITHMETIC_BASE diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td index e529a50dae93..884dc0f5b051 100644 --- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td +++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td @@ -11,6 +11,7 @@ include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/Dialect/MemRef/IR/MemRefBase.td" +include "mlir/Dialect/Arithmetic/IR/ArithmeticBase.td" include "mlir/IR/OpBase.td" include "mlir/Interfaces/CastInterfaces.td" include "mlir/Interfaces/CopyOpInterface.td" @@ -1673,4 +1674,51 @@ def MemRef_ViewOp : MemRef_Op<"view", [ let hasCanonicalizer = 1; } +//===----------------------------------------------------------------------===// +// AtomicRMWOp +//===----------------------------------------------------------------------===// + +def AtomicRMWOp : MemRef_Op<"atomic_rmw", [ + AllTypesMatch<["value", "result"]>, + TypesMatchWith<"value type matches element type of memref", + "memref", "value", + "$_self.cast().getElementType()"> + ]> { + let summary = "atomic read-modify-write operation"; + let description = [{ + The `atomic_rmw` operation provides a way to perform a read-modify-write + sequence that is free from data races. The kind enumeration specifies the + modification to perform. The value operand represents the new value to be + applied during the modification. The memref operand represents the buffer + that the read and write will be performed against, as accessed by the + specified indices. The arity of the indices is the rank of the memref. The + result represents the latest value that was stored. + + Example: + + ```mlir + %x = arith.atomic_rmw "addf" %value, %I[%i] : (f32, memref<10xf32>) -> f32 + ``` + }]; + + let arguments = (ins + AtomicRMWKindAttr:$kind, + AnyTypeOf<[AnySignlessInteger, AnyFloat]>:$value, + MemRefOf<[AnySignlessInteger, AnyFloat]>:$memref, + Variadic:$indices); + let results = (outs AnyTypeOf<[AnySignlessInteger, AnyFloat]>:$result); + + let assemblyFormat = [{ + $kind $value `,` $memref `[` $indices `]` attr-dict `:` `(` type($value) `,` + type($memref) `)` `->` type($result) + }]; + + let extraClassDeclaration = [{ + MemRefType getMemRefType() { + return memref().getType().cast(); + } + }]; + let hasFolder = 1; +} + #endif // MEMREF_OPS diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h index fe9c3c6e26f1..b309488aa4f5 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h @@ -42,31 +42,4 @@ class PatternRewriter; #include "mlir/Dialect/StandardOps/IR/OpsDialect.h.inc" -namespace mlir { - -/// Compute `lhs` `pred` `rhs`, where `pred` is one of the known integer -/// comparison predicates. -bool applyCmpPredicate(arith::CmpIPredicate predicate, const APInt &lhs, - const APInt &rhs); - -/// Compute `lhs` `pred` `rhs`, where `pred` is one of the known floating point -/// comparison predicates. -bool applyCmpPredicate(arith::CmpFPredicate predicate, const APFloat &lhs, - const APFloat &rhs); - -/// Returns the identity value attribute associated with an AtomicRMWKind op. -Attribute getIdentityValueAttr(AtomicRMWKind kind, Type resultType, - OpBuilder &builder, Location loc); - -/// Returns the identity value associated with an AtomicRMWKind op. -Value getIdentityValue(AtomicRMWKind op, Type resultType, OpBuilder &builder, - Location loc); - -/// Returns the value obtained by applying the reduction operation kind -/// associated with a binary AtomicRMWKind op to `lhs` and `rhs`. -Value getReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc, - Value lhs, Value rhs); - -} // namespace mlir - #endif // MLIR_DIALECT_IR_STANDARDOPS_IR_OPS_H diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 2e50971db9e7..794f0157ef14 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -13,7 +13,6 @@ #ifndef STANDARD_OPS #define STANDARD_OPS -include "mlir/Dialect/StandardOps/IR/StandardOpsBase.td" include "mlir/IR/OpAsmInterface.td" include "mlir/IR/SymbolInterfaces.td" include "mlir/Interfaces/CallInterfaces.td" @@ -179,52 +178,6 @@ def AssertOp : Std_Op<"assert"> { let hasCanonicalizeMethod = 1; } -//===----------------------------------------------------------------------===// -// AtomicRMWOp -//===----------------------------------------------------------------------===// - -def AtomicRMWOp : Std_Op<"atomic_rmw", [ - AllTypesMatch<["value", "result"]>, - TypesMatchWith<"value type matches element type of memref", - "memref", "value", - "$_self.cast().getElementType()"> - ]> { - let summary = "atomic read-modify-write operation"; - let description = [{ - The `atomic_rmw` operation provides a way to perform a read-modify-write - sequence that is free from data races. The kind enumeration specifies the - modification to perform. The value operand represents the new value to be - applied during the modification. The memref operand represents the buffer - that the read and write will be performed against, as accessed by the - specified indices. The arity of the indices is the rank of the memref. The - result represents the latest value that was stored. - - Example: - - ```mlir - %x = atomic_rmw "addf" %value, %I[%i] : (f32, memref<10xf32>) -> f32 - ``` - }]; - - let arguments = (ins - AtomicRMWKindAttr:$kind, - AnyTypeOf<[AnySignlessInteger, AnyFloat]>:$value, - MemRefOf<[AnySignlessInteger, AnyFloat]>:$memref, - Variadic:$indices); - let results = (outs AnyTypeOf<[AnySignlessInteger, AnyFloat]>:$result); - - let assemblyFormat = [{ - $kind $value `,` $memref `[` $indices `]` attr-dict `:` `(` type($value) `,` - type($memref) `)` `->` type($result) - }]; - - let extraClassDeclaration = [{ - MemRefType getMemRefType() { - return getMemref().getType().cast(); - } - }]; -} - def GenericAtomicRMWOp : Std_Op<"generic_atomic_rmw", [ SingleBlockImplicitTerminator<"AtomicYieldOp">, TypesMatchWith<"result type matches element type of memref", diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/StandardOpsBase.td b/mlir/include/mlir/Dialect/StandardOps/IR/StandardOpsBase.td deleted file mode 100644 index 3016a197df0d..000000000000 --- a/mlir/include/mlir/Dialect/StandardOps/IR/StandardOpsBase.td +++ /dev/null @@ -1,42 +0,0 @@ -//===- StandardOpsBase.td - Standard ops definitions -------*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Defines base support for standard operations. -// -//===----------------------------------------------------------------------===// - -#ifndef STANDARD_OPS_BASE -#define STANDARD_OPS_BASE - -include "mlir/IR/OpBase.td" - -def ATOMIC_RMW_KIND_ADDF : I64EnumAttrCase<"addf", 0>; -def ATOMIC_RMW_KIND_ADDI : I64EnumAttrCase<"addi", 1>; -def ATOMIC_RMW_KIND_ASSIGN : I64EnumAttrCase<"assign", 2>; -def ATOMIC_RMW_KIND_MAXF : I64EnumAttrCase<"maxf", 3>; -def ATOMIC_RMW_KIND_MAXS : I64EnumAttrCase<"maxs", 4>; -def ATOMIC_RMW_KIND_MAXU : I64EnumAttrCase<"maxu", 5>; -def ATOMIC_RMW_KIND_MINF : I64EnumAttrCase<"minf", 6>; -def ATOMIC_RMW_KIND_MINS : I64EnumAttrCase<"mins", 7>; -def ATOMIC_RMW_KIND_MINU : I64EnumAttrCase<"minu", 8>; -def ATOMIC_RMW_KIND_MULF : I64EnumAttrCase<"mulf", 9>; -def ATOMIC_RMW_KIND_MULI : I64EnumAttrCase<"muli", 10>; -def ATOMIC_RMW_KIND_ORI : I64EnumAttrCase<"ori", 11>; -def ATOMIC_RMW_KIND_ANDI : I64EnumAttrCase<"andi", 12>; - -def AtomicRMWKindAttr : I64EnumAttr< - "AtomicRMWKind", "", - [ATOMIC_RMW_KIND_ADDF, ATOMIC_RMW_KIND_ADDI, ATOMIC_RMW_KIND_ASSIGN, - ATOMIC_RMW_KIND_MAXF, ATOMIC_RMW_KIND_MAXS, ATOMIC_RMW_KIND_MAXU, - ATOMIC_RMW_KIND_MINF, ATOMIC_RMW_KIND_MINS, ATOMIC_RMW_KIND_MINU, - ATOMIC_RMW_KIND_MULF, ATOMIC_RMW_KIND_MULI, ATOMIC_RMW_KIND_ORI, - ATOMIC_RMW_KIND_ANDI]> { - let cppNamespace = "::mlir"; -} - -#endif // STANDARD_OPS_BASE diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.h b/mlir/include/mlir/Dialect/Vector/VectorOps.h index 14bd03968fcf..816ec204acfe 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.h +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.h @@ -13,6 +13,7 @@ #ifndef MLIR_DIALECT_VECTOR_VECTOROPS_H #define MLIR_DIALECT_VECTOR_VECTOROPS_H +#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/Attributes.h" @@ -145,8 +146,8 @@ ArrayAttr getVectorSubscriptAttr(Builder &b, ArrayRef values); /// Returns the value obtained by reducing the vector into a scalar using the /// operation kind associated with a binary AtomicRMWKind op. -Value getVectorReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc, - Value vector); +Value getVectorReductionOp(arith::AtomicRMWKind op, OpBuilder &builder, + Location loc, Value vector); /// Return true if the last dimension of the MemRefType has unit stride. Also /// return true for memrefs with no strides. diff --git a/mlir/lib/Analysis/AffineAnalysis.cpp b/mlir/lib/Analysis/AffineAnalysis.cpp index 79a367e33713..c8022e046548 100644 --- a/mlir/lib/Analysis/AffineAnalysis.cpp +++ b/mlir/lib/Analysis/AffineAnalysis.cpp @@ -40,7 +40,7 @@ using llvm::dbgs; /// reduction kind suitable for use in affine parallel loop builder. If the /// reduction is not supported, returns null. static Value getSupportedReduction(AffineForOp forOp, unsigned pos, - AtomicRMWKind &kind) { + arith::AtomicRMWKind &kind) { SmallVector combinerOps; Value reducedVal = matchReduction(forOp.getRegionIterArgs(), pos, combinerOps); @@ -52,21 +52,21 @@ static Value getSupportedReduction(AffineForOp forOp, unsigned pos, return nullptr; Operation *combinerOp = combinerOps.back(); - Optional maybeKind = - TypeSwitch>(combinerOp) - .Case([](arith::AddFOp) { return AtomicRMWKind::addf; }) - .Case([](arith::MulFOp) { return AtomicRMWKind::mulf; }) - .Case([](arith::AddIOp) { return AtomicRMWKind::addi; }) - .Case([](arith::AndIOp) { return AtomicRMWKind::andi; }) - .Case([](arith::OrIOp) { return AtomicRMWKind::ori; }) - .Case([](arith::MulIOp) { return AtomicRMWKind::muli; }) - .Case([](arith::MinFOp) { return AtomicRMWKind::minf; }) - .Case([](arith::MaxFOp) { return AtomicRMWKind::maxf; }) - .Case([](arith::MinSIOp) { return AtomicRMWKind::mins; }) - .Case([](arith::MaxSIOp) { return AtomicRMWKind::maxs; }) - .Case([](arith::MinUIOp) { return AtomicRMWKind::minu; }) - .Case([](arith::MaxUIOp) { return AtomicRMWKind::maxu; }) - .Default([](Operation *) -> Optional { + Optional maybeKind = + TypeSwitch>(combinerOp) + .Case([](arith::AddFOp) { return arith::AtomicRMWKind::addf; }) + .Case([](arith::MulFOp) { return arith::AtomicRMWKind::mulf; }) + .Case([](arith::AddIOp) { return arith::AtomicRMWKind::addi; }) + .Case([](arith::AndIOp) { return arith::AtomicRMWKind::andi; }) + .Case([](arith::OrIOp) { return arith::AtomicRMWKind::ori; }) + .Case([](arith::MulIOp) { return arith::AtomicRMWKind::muli; }) + .Case([](arith::MinFOp) { return arith::AtomicRMWKind::minf; }) + .Case([](arith::MaxFOp) { return arith::AtomicRMWKind::maxf; }) + .Case([](arith::MinSIOp) { return arith::AtomicRMWKind::mins; }) + .Case([](arith::MaxSIOp) { return arith::AtomicRMWKind::maxs; }) + .Case([](arith::MinUIOp) { return arith::AtomicRMWKind::minu; }) + .Case([](arith::MaxUIOp) { return arith::AtomicRMWKind::maxu; }) + .Default([](Operation *) -> Optional { // TODO: AtomicRMW supports other kinds of reductions this is // currently not detecting, add those when the need arises. return llvm::None; @@ -86,7 +86,7 @@ void mlir::getSupportedReductions( return; supportedReductions.reserve(numIterArgs); for (unsigned i = 0; i < numIterArgs; ++i) { - AtomicRMWKind kind; + arith::AtomicRMWKind kind; if (Value value = getSupportedReduction(forOp, i, kind)) supportedReductions.emplace_back(LoopReduction{kind, i, value}); } diff --git a/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp b/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp index 8b48549b3805..bc2f5917160e 100644 --- a/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp +++ b/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp @@ -430,13 +430,14 @@ class AffineParallelLowering : public OpRewritePattern { // initialization of the result values. Attribute reduction = std::get<0>(pair); Type resultType = std::get<1>(pair); - Optional reductionOp = symbolizeAtomicRMWKind( - static_cast(reduction.cast().getInt())); + Optional reductionOp = + arith::symbolizeAtomicRMWKind( + static_cast(reduction.cast().getInt())); assert(reductionOp.hasValue() && "Reduction operation cannot be of None Type"); - AtomicRMWKind reductionOpValue = reductionOp.getValue(); + arith::AtomicRMWKind reductionOpValue = reductionOp.getValue(); identityVals.push_back( - getIdentityValue(reductionOpValue, resultType, rewriter, loc)); + arith::getIdentityValue(reductionOpValue, resultType, rewriter, loc)); } parOp = rewriter.create( loc, lowerBoundTuple, upperBoundTuple, steps, identityVals, @@ -450,16 +451,17 @@ class AffineParallelLowering : public OpRewritePattern { "Unequal number of reductions and operands."); for (unsigned i = 0, end = reductions.size(); i < end; i++) { // For each of the reduction operations get the respective mlir::Value. - Optional reductionOp = - symbolizeAtomicRMWKind(reductions[i].cast().getInt()); + Optional reductionOp = + arith::symbolizeAtomicRMWKind( + reductions[i].cast().getInt()); assert(reductionOp.hasValue() && "Reduction Operation cannot be of None Type"); - AtomicRMWKind reductionOpValue = reductionOp.getValue(); + arith::AtomicRMWKind reductionOpValue = reductionOp.getValue(); rewriter.setInsertionPoint(&parOp.getBody()->back()); auto reduceOp = rewriter.create( loc, affineParOpTerminator->getOperand(i)); rewriter.setInsertionPointToEnd(&reduceOp.getReductionOperator().front()); - Value reductionResult = getReductionOp( + Value reductionResult = arith::getReductionOp( reductionOpValue, rewriter, loc, reduceOp.getReductionOperator().front().getArgument(0), reduceOp.getReductionOperator().front().getArgument(1)); diff --git a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp index 28981dd87ecc..b1f7d0452ee1 100644 --- a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp +++ b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp @@ -1553,6 +1553,62 @@ struct ViewOpLowering : public ConvertOpToLLVMPattern { } }; +//===----------------------------------------------------------------------===// +// AtomicRMWOpLowering +//===----------------------------------------------------------------------===// + +/// Try to match the kind of a std.atomic_rmw to determine whether to use a +/// lowering to llvm.atomicrmw or fallback to llvm.cmpxchg. +static Optional +matchSimpleAtomicOp(memref::AtomicRMWOp atomicOp) { + switch (atomicOp.kind()) { + case arith::AtomicRMWKind::addf: + return LLVM::AtomicBinOp::fadd; + case arith::AtomicRMWKind::addi: + return LLVM::AtomicBinOp::add; + case arith::AtomicRMWKind::assign: + return LLVM::AtomicBinOp::xchg; + case arith::AtomicRMWKind::maxs: + return LLVM::AtomicBinOp::max; + case arith::AtomicRMWKind::maxu: + return LLVM::AtomicBinOp::umax; + case arith::AtomicRMWKind::mins: + return LLVM::AtomicBinOp::min; + case arith::AtomicRMWKind::minu: + return LLVM::AtomicBinOp::umin; + case arith::AtomicRMWKind::ori: + return LLVM::AtomicBinOp::_or; + case arith::AtomicRMWKind::andi: + return LLVM::AtomicBinOp::_and; + default: + return llvm::None; + } + llvm_unreachable("Invalid AtomicRMWKind"); +} + +struct AtomicRMWOpLowering : public LoadStoreOpLowering { + using Base::Base; + + LogicalResult + matchAndRewrite(memref::AtomicRMWOp atomicOp, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + if (failed(match(atomicOp))) + return failure(); + auto maybeKind = matchSimpleAtomicOp(atomicOp); + if (!maybeKind) + return failure(); + auto resultType = adaptor.value().getType(); + auto memRefType = atomicOp.getMemRefType(); + auto dataPtr = + getStridedElementPtr(atomicOp.getLoc(), memRefType, adaptor.memref(), + adaptor.indices(), rewriter); + rewriter.replaceOpWithNewOp( + atomicOp, resultType, *maybeKind, dataPtr, adaptor.value(), + LLVM::AtomicOrdering::acq_rel); + return success(); + } +}; + } // namespace void mlir::populateMemRefToLLVMConversionPatterns(LLVMTypeConverter &converter, @@ -1561,6 +1617,7 @@ void mlir::populateMemRefToLLVMConversionPatterns(LLVMTypeConverter &converter, patterns.add< AllocaOpLowering, AllocaScopeOpLowering, + AtomicRMWOpLowering, AssumeAlignmentOpLowering, DimOpLowering, GlobalMemrefOpLowering, diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index da429dd8af11..feaa140cc710 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -772,61 +772,6 @@ struct SplatNdOpLowering : public ConvertOpToLLVMPattern { } }; -} // namespace - -/// Try to match the kind of a std.atomic_rmw to determine whether to use a -/// lowering to llvm.atomicrmw or fallback to llvm.cmpxchg. -static Optional matchSimpleAtomicOp(AtomicRMWOp atomicOp) { - switch (atomicOp.getKind()) { - case AtomicRMWKind::addf: - return LLVM::AtomicBinOp::fadd; - case AtomicRMWKind::addi: - return LLVM::AtomicBinOp::add; - case AtomicRMWKind::assign: - return LLVM::AtomicBinOp::xchg; - case AtomicRMWKind::maxs: - return LLVM::AtomicBinOp::max; - case AtomicRMWKind::maxu: - return LLVM::AtomicBinOp::umax; - case AtomicRMWKind::mins: - return LLVM::AtomicBinOp::min; - case AtomicRMWKind::minu: - return LLVM::AtomicBinOp::umin; - case AtomicRMWKind::ori: - return LLVM::AtomicBinOp::_or; - case AtomicRMWKind::andi: - return LLVM::AtomicBinOp::_and; - default: - return llvm::None; - } - llvm_unreachable("Invalid AtomicRMWKind"); -} - -namespace { - -struct AtomicRMWOpLowering : public LoadStoreOpLowering { - using Base::Base; - - LogicalResult - matchAndRewrite(AtomicRMWOp atomicOp, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - if (failed(match(atomicOp))) - return failure(); - auto maybeKind = matchSimpleAtomicOp(atomicOp); - if (!maybeKind) - return failure(); - auto resultType = adaptor.getValue().getType(); - auto memRefType = atomicOp.getMemRefType(); - auto dataPtr = - getStridedElementPtr(atomicOp.getLoc(), memRefType, adaptor.getMemref(), - adaptor.getIndices(), rewriter); - rewriter.replaceOpWithNewOp( - atomicOp, resultType, *maybeKind, dataPtr, adaptor.getValue(), - LLVM::AtomicOrdering::acq_rel); - return success(); - } -}; - /// Wrap a llvm.cmpxchg operation in a while loop so that the operation can be /// retried until it succeeds in atomically storing a new value into memory. /// @@ -962,7 +907,6 @@ void mlir::populateStdToLLVMConversionPatterns(LLVMTypeConverter &converter, // clang-format off patterns.add< AssertOpLowering, - AtomicRMWOpLowering, BranchOpLowering, CallIndirectOpLowering, CallOpLowering, diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp index 071838dcf2be..c3c1b5129480 100644 --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -2801,7 +2801,7 @@ LogicalResult AffinePrefetchOp::fold(ArrayRef cstOperands, void AffineParallelOp::build(OpBuilder &builder, OperationState &result, TypeRange resultTypes, - ArrayRef reductions, + ArrayRef reductions, ArrayRef ranges) { SmallVector lbs(ranges.size(), builder.getConstantAffineMap(0)); auto ubs = llvm::to_vector<4>(llvm::map_range(ranges, [&](int64_t value) { @@ -2814,7 +2814,7 @@ void AffineParallelOp::build(OpBuilder &builder, OperationState &result, void AffineParallelOp::build(OpBuilder &builder, OperationState &result, TypeRange resultTypes, - ArrayRef reductions, + ArrayRef reductions, ArrayRef lbMaps, ValueRange lbArgs, ArrayRef ubMaps, ValueRange ubArgs, ArrayRef steps) { @@ -2843,7 +2843,7 @@ void AffineParallelOp::build(OpBuilder &builder, OperationState &result, // Convert the reductions to integer attributes. SmallVector reductionAttrs; - for (AtomicRMWKind reduction : reductions) + for (arith::AtomicRMWKind reduction : reductions) reductionAttrs.push_back( builder.getI64IntegerAttr(static_cast(reduction))); result.addAttribute(getReductionsAttrName(), @@ -3050,7 +3050,7 @@ static LogicalResult verify(AffineParallelOp op) { // Verify reduction ops are all valid for (Attribute attr : op.reductions()) { auto intAttr = attr.dyn_cast(); - if (!intAttr || !symbolizeAtomicRMWKind(intAttr.getInt())) + if (!intAttr || !arith::symbolizeAtomicRMWKind(intAttr.getInt())) return op.emitOpError("invalid reduction attribute"); } @@ -3150,9 +3150,9 @@ static void print(OpAsmPrinter &p, AffineParallelOp op) { if (op.getNumResults()) { p << " reduce ("; llvm::interleaveComma(op.reductions(), p, [&](auto &attr) { - AtomicRMWKind sym = - *symbolizeAtomicRMWKind(attr.template cast().getInt()); - p << "\"" << stringifyAtomicRMWKind(sym) << "\""; + arith::AtomicRMWKind sym = *arith::symbolizeAtomicRMWKind( + attr.template cast().getInt()); + p << "\"" << arith::stringifyAtomicRMWKind(sym) << "\""; }); p << ") -> (" << op.getResultTypes() << ")"; } @@ -3374,8 +3374,8 @@ static ParseResult parseAffineParallelOp(OpAsmParser &parser, if (parser.parseAttribute(attrVal, builder.getNoneType(), "reduce", attrStorage)) return failure(); - llvm::Optional reduction = - symbolizeAtomicRMWKind(attrVal.getValue()); + llvm::Optional reduction = + arith::symbolizeAtomicRMWKind(attrVal.getValue()); if (!reduction) return parser.emitError(loc, "invalid reduction value: ") << attrVal; reductions.push_back(builder.getI64IntegerAttr( diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp index 9d59b89ea1a2..7ecc6750bcca 100644 --- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp @@ -971,7 +971,7 @@ static arith::ConstantOp vectorizeConstant(arith::ConstantOp constOp, /// Creates a constant vector filled with the neutral elements of the given /// reduction. The scalar type of vector elements will be taken from /// `oldOperand`. -static arith::ConstantOp createInitialVector(AtomicRMWKind reductionKind, +static arith::ConstantOp createInitialVector(arith::AtomicRMWKind reductionKind, Value oldOperand, VectorizationState &state) { Type scalarTy = oldOperand.getType(); @@ -1245,8 +1245,8 @@ static Operation *vectorizeAffineStore(AffineStoreOp storeOp, /// Returns true if `value` is a constant equal to the neutral element of the /// given vectorizable reduction. -static bool isNeutralElementConst(AtomicRMWKind reductionKind, Value value, - VectorizationState &state) { +static bool isNeutralElementConst(arith::AtomicRMWKind reductionKind, + Value value, VectorizationState &state) { Type scalarTy = value.getType(); if (!VectorType::isValidElementType(scalarTy)) return false; @@ -1361,7 +1361,8 @@ static Operation *vectorizeAffineForOp(AffineForOp forOp, Value origInit = forOp.getOperand(forOp.getNumControlOperands() + i); Value finalRes = reducedRes; if (!isNeutralElementConst(reductions[i].kind, origInit, state)) - finalRes = getReductionOp(reductions[i].kind, state.builder, + finalRes = + arith::getReductionOp(reductions[i].kind, state.builder, reducedRes.getLoc(), reducedRes, origInit); state.registerLoopResultScalarReplacement(forOp.getResult(i), finalRes); } diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp index f0ce1b7a4d70..048e4d89186c 100644 --- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp +++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp @@ -8,6 +8,7 @@ #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/CommonFolders.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/IR/Builders.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/OpImplementation.h" @@ -1208,6 +1209,101 @@ OpFoldResult arith::CmpFOp::fold(ArrayRef operands) { return BoolAttr::get(getContext(), val); } +//===----------------------------------------------------------------------===// +// Atomic Enum +//===----------------------------------------------------------------------===// + +/// Returns the identity value attribute associated with an AtomicRMWKind op. +Attribute mlir::arith::getIdentityValueAttr(AtomicRMWKind kind, Type resultType, + OpBuilder &builder, Location loc) { + switch (kind) { + case AtomicRMWKind::maxf: + return builder.getFloatAttr( + resultType, + APFloat::getInf(resultType.cast().getFloatSemantics(), + /*Negative=*/true)); + case AtomicRMWKind::addf: + case AtomicRMWKind::addi: + case AtomicRMWKind::maxu: + case AtomicRMWKind::ori: + return builder.getZeroAttr(resultType); + case AtomicRMWKind::andi: + return builder.getIntegerAttr( + resultType, + APInt::getAllOnes(resultType.cast().getWidth())); + case AtomicRMWKind::maxs: + return builder.getIntegerAttr( + resultType, + APInt::getSignedMinValue(resultType.cast().getWidth())); + case AtomicRMWKind::minf: + return builder.getFloatAttr( + resultType, + APFloat::getInf(resultType.cast().getFloatSemantics(), + /*Negative=*/false)); + case AtomicRMWKind::mins: + return builder.getIntegerAttr( + resultType, + APInt::getSignedMaxValue(resultType.cast().getWidth())); + case AtomicRMWKind::minu: + return builder.getIntegerAttr( + resultType, + APInt::getMaxValue(resultType.cast().getWidth())); + case AtomicRMWKind::muli: + return builder.getIntegerAttr(resultType, 1); + case AtomicRMWKind::mulf: + return builder.getFloatAttr(resultType, 1); + // TODO: Add remaining reduction operations. + default: + (void)emitOptionalError(loc, "Reduction operation type not supported"); + break; + } + return nullptr; +} + +/// Returns the identity value associated with an AtomicRMWKind op. +Value mlir::arith::getIdentityValue(AtomicRMWKind op, Type resultType, + OpBuilder &builder, Location loc) { + Attribute attr = getIdentityValueAttr(op, resultType, builder, loc); + return builder.create(loc, attr); +} + +/// Return the value obtained by applying the reduction operation kind +/// associated with a binary AtomicRMWKind op to `lhs` and `rhs`. +Value mlir::arith::getReductionOp(AtomicRMWKind op, OpBuilder &builder, + Location loc, Value lhs, Value rhs) { + switch (op) { + case AtomicRMWKind::addf: + return builder.create(loc, lhs, rhs); + case AtomicRMWKind::addi: + return builder.create(loc, lhs, rhs); + case AtomicRMWKind::mulf: + return builder.create(loc, lhs, rhs); + case AtomicRMWKind::muli: + return builder.create(loc, lhs, rhs); + case AtomicRMWKind::maxf: + return builder.create(loc, lhs, rhs); + case AtomicRMWKind::minf: + return builder.create(loc, lhs, rhs); + case AtomicRMWKind::maxs: + return builder.create(loc, lhs, rhs); + case AtomicRMWKind::mins: + return builder.create(loc, lhs, rhs); + case AtomicRMWKind::maxu: + return builder.create(loc, lhs, rhs); + case AtomicRMWKind::minu: + return builder.create(loc, lhs, rhs); + case AtomicRMWKind::ori: + return builder.create(loc, lhs, rhs); + case AtomicRMWKind::andi: + return builder.create(loc, lhs, rhs); + // TODO: Add remaining reduction operations. + default: + (void)emitOptionalError(loc, "Reduction operation type not supported"); + break; + } + return nullptr; +} + //===----------------------------------------------------------------------===// // TableGen'd op method definitions //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp index ab7e8305ab5b..45ba726d5bf9 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -2286,6 +2286,50 @@ void ViewOp::getCanonicalizationPatterns(RewritePatternSet &results, results.add(context); } +//===----------------------------------------------------------------------===// +// AtomicRMWOp +//===----------------------------------------------------------------------===// + +static LogicalResult verify(AtomicRMWOp op) { + if (op.getMemRefType().getRank() != op.getNumOperands() - 2) + return op.emitOpError( + "expects the number of subscripts to be equal to memref rank"); + switch (op.kind()) { + case arith::AtomicRMWKind::addf: + case arith::AtomicRMWKind::maxf: + case arith::AtomicRMWKind::minf: + case arith::AtomicRMWKind::mulf: + if (!op.value().getType().isa()) + return op.emitOpError() + << "with kind '" << arith::stringifyAtomicRMWKind(op.kind()) + << "' expects a floating-point type"; + break; + case arith::AtomicRMWKind::addi: + case arith::AtomicRMWKind::maxs: + case arith::AtomicRMWKind::maxu: + case arith::AtomicRMWKind::mins: + case arith::AtomicRMWKind::minu: + case arith::AtomicRMWKind::muli: + case arith::AtomicRMWKind::ori: + case arith::AtomicRMWKind::andi: + if (!op.value().getType().isa()) + return op.emitOpError() + << "with kind '" << arith::stringifyAtomicRMWKind(op.kind()) + << "' expects an integer type"; + break; + default: + break; + } + return success(); +} + +OpFoldResult AtomicRMWOp::fold(ArrayRef operands) { + /// atomicrmw(memrefcast) -> atomicrmw + if (succeeded(foldMemRefCast(*this, value()))) + return getResult(); + return OpFoldResult(); +} + //===----------------------------------------------------------------------===// // TableGen'd op method definitions //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 02d54472baf5..a74b46c034c4 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -131,134 +131,6 @@ LogicalResult AssertOp::canonicalize(AssertOp op, PatternRewriter &rewriter) { return failure(); } -//===----------------------------------------------------------------------===// -// AtomicRMWOp -//===----------------------------------------------------------------------===// - -static LogicalResult verify(AtomicRMWOp op) { - if (op.getMemRefType().getRank() != op.getNumOperands() - 2) - return op.emitOpError( - "expects the number of subscripts to be equal to memref rank"); - switch (op.getKind()) { - case AtomicRMWKind::addf: - case AtomicRMWKind::maxf: - case AtomicRMWKind::minf: - case AtomicRMWKind::mulf: - if (!op.getValue().getType().isa()) - return op.emitOpError() - << "with kind '" << stringifyAtomicRMWKind(op.getKind()) - << "' expects a floating-point type"; - break; - case AtomicRMWKind::addi: - case AtomicRMWKind::maxs: - case AtomicRMWKind::maxu: - case AtomicRMWKind::mins: - case AtomicRMWKind::minu: - case AtomicRMWKind::muli: - case AtomicRMWKind::ori: - case AtomicRMWKind::andi: - if (!op.getValue().getType().isa()) - return op.emitOpError() - << "with kind '" << stringifyAtomicRMWKind(op.getKind()) - << "' expects an integer type"; - break; - default: - break; - } - return success(); -} - -/// Returns the identity value attribute associated with an AtomicRMWKind op. -Attribute mlir::getIdentityValueAttr(AtomicRMWKind kind, Type resultType, - OpBuilder &builder, Location loc) { - switch (kind) { - case AtomicRMWKind::maxf: - return builder.getFloatAttr( - resultType, - APFloat::getInf(resultType.cast().getFloatSemantics(), - /*Negative=*/true)); - case AtomicRMWKind::addf: - case AtomicRMWKind::addi: - case AtomicRMWKind::maxu: - case AtomicRMWKind::ori: - return builder.getZeroAttr(resultType); - case AtomicRMWKind::andi: - return builder.getIntegerAttr( - resultType, - APInt::getAllOnes(resultType.cast().getWidth())); - case AtomicRMWKind::maxs: - return builder.getIntegerAttr( - resultType, - APInt::getSignedMinValue(resultType.cast().getWidth())); - case AtomicRMWKind::minf: - return builder.getFloatAttr( - resultType, - APFloat::getInf(resultType.cast().getFloatSemantics(), - /*Negative=*/false)); - case AtomicRMWKind::mins: - return builder.getIntegerAttr( - resultType, - APInt::getSignedMaxValue(resultType.cast().getWidth())); - case AtomicRMWKind::minu: - return builder.getIntegerAttr( - resultType, - APInt::getMaxValue(resultType.cast().getWidth())); - case AtomicRMWKind::muli: - return builder.getIntegerAttr(resultType, 1); - case AtomicRMWKind::mulf: - return builder.getFloatAttr(resultType, 1); - // TODO: Add remaining reduction operations. - default: - (void)emitOptionalError(loc, "Reduction operation type not supported"); - break; - } - return nullptr; -} - -/// Returns the identity value associated with an AtomicRMWKind op. -Value mlir::getIdentityValue(AtomicRMWKind op, Type resultType, - OpBuilder &builder, Location loc) { - Attribute attr = getIdentityValueAttr(op, resultType, builder, loc); - return builder.create(loc, attr); -} - -/// Return the value obtained by applying the reduction operation kind -/// associated with a binary AtomicRMWKind op to `lhs` and `rhs`. -Value mlir::getReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc, - Value lhs, Value rhs) { - switch (op) { - case AtomicRMWKind::addf: - return builder.create(loc, lhs, rhs); - case AtomicRMWKind::addi: - return builder.create(loc, lhs, rhs); - case AtomicRMWKind::mulf: - return builder.create(loc, lhs, rhs); - case AtomicRMWKind::muli: - return builder.create(loc, lhs, rhs); - case AtomicRMWKind::maxf: - return builder.create(loc, lhs, rhs); - case AtomicRMWKind::minf: - return builder.create(loc, lhs, rhs); - case AtomicRMWKind::maxs: - return builder.create(loc, lhs, rhs); - case AtomicRMWKind::mins: - return builder.create(loc, lhs, rhs); - case AtomicRMWKind::maxu: - return builder.create(loc, lhs, rhs); - case AtomicRMWKind::minu: - return builder.create(loc, lhs, rhs); - case AtomicRMWKind::ori: - return builder.create(loc, lhs, rhs); - case AtomicRMWKind::andi: - return builder.create(loc, lhs, rhs); - // TODO: Add remaining reduction operations. - default: - (void)emitOptionalError(loc, "Reduction operation type not supported"); - break; - } - return nullptr; -} - //===----------------------------------------------------------------------===// // GenericAtomicRMWOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/StandardOps/Transforms/ExpandOps.cpp b/mlir/lib/Dialect/StandardOps/Transforms/ExpandOps.cpp index 71a1a55903c6..a62f6a076f93 100644 --- a/mlir/lib/Dialect/StandardOps/Transforms/ExpandOps.cpp +++ b/mlir/lib/Dialect/StandardOps/Transforms/ExpandOps.cpp @@ -40,18 +40,18 @@ namespace { /// %new_value = select %cmp, %current, %fval : f32 /// atomic_yield %new_value : f32 /// } -struct AtomicRMWOpConverter : public OpRewritePattern { +struct AtomicRMWOpConverter : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(AtomicRMWOp op, + LogicalResult matchAndRewrite(memref::AtomicRMWOp op, PatternRewriter &rewriter) const final { arith::CmpFPredicate predicate; - switch (op.getKind()) { - case AtomicRMWKind::maxf: + switch (op.kind()) { + case arith::AtomicRMWKind::maxf: predicate = arith::CmpFPredicate::OGT; break; - case AtomicRMWKind::minf: + case arith::AtomicRMWKind::minf: predicate = arith::CmpFPredicate::OLT; break; default: @@ -59,13 +59,13 @@ struct AtomicRMWOpConverter : public OpRewritePattern { } auto loc = op.getLoc(); - auto genericOp = rewriter.create(loc, op.getMemref(), - op.getIndices()); + auto genericOp = + rewriter.create(loc, op.memref(), op.indices()); OpBuilder bodyBuilder = OpBuilder::atBlockEnd(genericOp.getBody(), rewriter.getListener()); Value lhs = genericOp.getCurrentValue(); - Value rhs = op.getValue(); + Value rhs = op.value(); Value cmp = bodyBuilder.create(loc, predicate, lhs, rhs); Value select = bodyBuilder.create(loc, cmp, lhs, rhs); bodyBuilder.create(loc, select); @@ -130,10 +130,11 @@ struct StdExpandOpsPass : public StdExpandOpsBase { target.addLegalDialect(); - target.addDynamicallyLegalOp([](AtomicRMWOp op) { - return op.getKind() != AtomicRMWKind::maxf && - op.getKind() != AtomicRMWKind::minf; - }); + target.addDynamicallyLegalOp( + [](memref::AtomicRMWOp op) { + return op.kind() != arith::AtomicRMWKind::maxf && + op.kind() != arith::AtomicRMWKind::minf; + }); target.addDynamicallyLegalOp([](memref::ReshapeOp op) { return !op.shape().getType().cast().hasStaticShape(); }); diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 3a65d0e93dfd..60c0aac1a4be 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -359,41 +359,42 @@ static void print(OpAsmPrinter &p, ReductionOp op) { p << " : " << op.vector().getType() << " into " << op.dest().getType(); } -Value mlir::vector::getVectorReductionOp(AtomicRMWKind op, OpBuilder &builder, - Location loc, Value vector) { +Value mlir::vector::getVectorReductionOp(arith::AtomicRMWKind op, + OpBuilder &builder, Location loc, + Value vector) { Type scalarType = vector.getType().cast().getElementType(); switch (op) { - case AtomicRMWKind::addf: - case AtomicRMWKind::addi: + case arith::AtomicRMWKind::addf: + case arith::AtomicRMWKind::addi: return builder.create(vector.getLoc(), scalarType, builder.getStringAttr("add"), vector, ValueRange{}); - case AtomicRMWKind::mulf: - case AtomicRMWKind::muli: + case arith::AtomicRMWKind::mulf: + case arith::AtomicRMWKind::muli: return builder.create(vector.getLoc(), scalarType, builder.getStringAttr("mul"), vector, ValueRange{}); - case AtomicRMWKind::minf: + case arith::AtomicRMWKind::minf: return builder.create(vector.getLoc(), scalarType, builder.getStringAttr("minf"), vector, ValueRange{}); - case AtomicRMWKind::mins: + case arith::AtomicRMWKind::mins: return builder.create(vector.getLoc(), scalarType, builder.getStringAttr("minsi"), vector, ValueRange{}); - case AtomicRMWKind::minu: + case arith::AtomicRMWKind::minu: return builder.create(vector.getLoc(), scalarType, builder.getStringAttr("minui"), vector, ValueRange{}); - case AtomicRMWKind::maxf: + case arith::AtomicRMWKind::maxf: return builder.create(vector.getLoc(), scalarType, builder.getStringAttr("maxf"), vector, ValueRange{}); - case AtomicRMWKind::maxs: + case arith::AtomicRMWKind::maxs: return builder.create(vector.getLoc(), scalarType, builder.getStringAttr("maxsi"), vector, ValueRange{}); - case AtomicRMWKind::maxu: + case arith::AtomicRMWKind::maxu: return builder.create(vector.getLoc(), scalarType, builder.getStringAttr("maxui"), vector, ValueRange{}); diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp index fbb79b5af3f8..91d4a7cd5d19 100644 --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -1551,7 +1551,7 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp, for (unsigned i = unrollJamFactor - 1; i >= 1; --i) { rhs = forOp.getResult(i * oldNumResults + pos); // Create ops based on reduction type. - lhs = getReductionOp(reduction.kind, builder, loc, lhs, rhs); + lhs = arith::getReductionOp(reduction.kind, builder, loc, lhs, rhs); if (!lhs) return failure(); Operation *op = lhs.getDefiningOp(); diff --git a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir index 5682c853964c..70ba47d2d176 100644 --- a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir @@ -859,3 +859,28 @@ func @rank_of_ranked(%ranked: memref) { } // CHECK: llvm.mlir.constant(1 : index) : i64 // CHECK32: llvm.mlir.constant(1 : index) : i32 + +// ----- + +// CHECK-LABEL: func @atomic_rmw +func @atomic_rmw(%I : memref<10xi32>, %ival : i32, %F : memref<10xf32>, %fval : f32, %i : index) { + memref.atomic_rmw assign %fval, %F[%i] : (f32, memref<10xf32>) -> f32 + // CHECK: llvm.atomicrmw xchg %{{.*}}, %{{.*}} acq_rel + memref.atomic_rmw addi %ival, %I[%i] : (i32, memref<10xi32>) -> i32 + // CHECK: llvm.atomicrmw add %{{.*}}, %{{.*}} acq_rel + memref.atomic_rmw maxs %ival, %I[%i] : (i32, memref<10xi32>) -> i32 + // CHECK: llvm.atomicrmw max %{{.*}}, %{{.*}} acq_rel + memref.atomic_rmw mins %ival, %I[%i] : (i32, memref<10xi32>) -> i32 + // CHECK: llvm.atomicrmw min %{{.*}}, %{{.*}} acq_rel + memref.atomic_rmw maxu %ival, %I[%i] : (i32, memref<10xi32>) -> i32 + // CHECK: llvm.atomicrmw umax %{{.*}}, %{{.*}} acq_rel + memref.atomic_rmw minu %ival, %I[%i] : (i32, memref<10xi32>) -> i32 + // CHECK: llvm.atomicrmw umin %{{.*}}, %{{.*}} acq_rel + memref.atomic_rmw addf %fval, %F[%i] : (f32, memref<10xf32>) -> f32 + // CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} acq_rel + memref.atomic_rmw ori %ival, %I[%i] : (i32, memref<10xi32>) -> i32 + // CHECK: llvm.atomicrmw _or %{{.*}}, %{{.*}} acq_rel + memref.atomic_rmw andi %ival, %I[%i] : (i32, memref<10xi32>) -> i32 + // CHECK: llvm.atomicrmw _and %{{.*}}, %{{.*}} acq_rel + return +} diff --git a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir index c3282e1903d6..0dc6bf10dc5e 100644 --- a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir +++ b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir @@ -486,31 +486,6 @@ func @splat(%a: vector<4xf32>, %b: f32) -> vector<4xf32> { // ----- -// CHECK-LABEL: func @atomic_rmw -func @atomic_rmw(%I : memref<10xi32>, %ival : i32, %F : memref<10xf32>, %fval : f32, %i : index) { - atomic_rmw assign %fval, %F[%i] : (f32, memref<10xf32>) -> f32 - // CHECK: llvm.atomicrmw xchg %{{.*}}, %{{.*}} acq_rel - atomic_rmw addi %ival, %I[%i] : (i32, memref<10xi32>) -> i32 - // CHECK: llvm.atomicrmw add %{{.*}}, %{{.*}} acq_rel - atomic_rmw maxs %ival, %I[%i] : (i32, memref<10xi32>) -> i32 - // CHECK: llvm.atomicrmw max %{{.*}}, %{{.*}} acq_rel - atomic_rmw mins %ival, %I[%i] : (i32, memref<10xi32>) -> i32 - // CHECK: llvm.atomicrmw min %{{.*}}, %{{.*}} acq_rel - atomic_rmw maxu %ival, %I[%i] : (i32, memref<10xi32>) -> i32 - // CHECK: llvm.atomicrmw umax %{{.*}}, %{{.*}} acq_rel - atomic_rmw minu %ival, %I[%i] : (i32, memref<10xi32>) -> i32 - // CHECK: llvm.atomicrmw umin %{{.*}}, %{{.*}} acq_rel - atomic_rmw addf %fval, %F[%i] : (f32, memref<10xf32>) -> f32 - // CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} acq_rel - atomic_rmw ori %ival, %I[%i] : (i32, memref<10xi32>) -> i32 - // CHECK: llvm.atomicrmw _or %{{.*}}, %{{.*}} acq_rel - atomic_rmw andi %ival, %I[%i] : (i32, memref<10xi32>) -> i32 - // CHECK: llvm.atomicrmw _and %{{.*}}, %{{.*}} acq_rel - return -} - -// ----- - // CHECK-LABEL: func @generic_atomic_rmw func @generic_atomic_rmw(%I : memref<10xi32>, %i : index) -> i32 { %x = generic_atomic_rmw %I[%i] : memref<10xi32> { diff --git a/mlir/test/Dialect/MemRef/canonicalize.mlir b/mlir/test/Dialect/MemRef/canonicalize.mlir index 39f9847f4c9e..2e81705049f5 100644 --- a/mlir/test/Dialect/MemRef/canonicalize.mlir +++ b/mlir/test/Dialect/MemRef/canonicalize.mlir @@ -499,3 +499,14 @@ func @no_fold_dynamic_no_op_subview(%arg0 : memref) -> memref, %c : index) { + %v = memref.cast %arg1 : memref<4xf32> to memref + %a = memref.atomic_rmw addf %arg0, %v[%c] : (f32, memref) -> f32 + return +} + +// CHECK-LABEL: func @atomicrmw_cast_fold +// CHECK-NEXT: memref.atomic_rmw addf %arg0, %arg1[%arg2] : (f32, memref<4xf32>) -> f32 diff --git a/mlir/test/Dialect/MemRef/invalid.mlir b/mlir/test/Dialect/MemRef/invalid.mlir index 5cf32703c9eb..90f851959748 100644 --- a/mlir/test/Dialect/MemRef/invalid.mlir +++ b/mlir/test/Dialect/MemRef/invalid.mlir @@ -848,3 +848,27 @@ func @illegal_num_offsets(%arg0 : memref, %arg1 : index, %arg2 : inde // expected-error@+1 {{expected 3 offset values}} %0 = memref.subview %arg0[0, 0] [%arg1, %arg2] [1, 1] : memref to memref } + +// ----- + +func @atomic_rmw_idxs_rank_mismatch(%I: memref<16x10xf32>, %i : index, %val : f32) { + // expected-error@+1 {{expects the number of subscripts to be equal to memref rank}} + %x = memref.atomic_rmw addf %val, %I[%i] : (f32, memref<16x10xf32>) -> f32 + return +} + +// ----- + +func @atomic_rmw_expects_float(%I: memref<16x10xi32>, %i : index, %val : i32) { + // expected-error@+1 {{expects a floating-point type}} + %x = memref.atomic_rmw addf %val, %I[%i, %i] : (i32, memref<16x10xi32>) -> i32 + return +} + +// ----- + +func @atomic_rmw_expects_int(%I: memref<16x10xf32>, %i : index, %val : f32) { + // expected-error@+1 {{expects an integer type}} + %x = memref.atomic_rmw addi %val, %I[%i, %i] : (f32, memref<16x10xf32>) -> f32 + return +} diff --git a/mlir/test/Dialect/MemRef/ops.mlir b/mlir/test/Dialect/MemRef/ops.mlir index 963c817af398..71b6038a2f9d 100644 --- a/mlir/test/Dialect/MemRef/ops.mlir +++ b/mlir/test/Dialect/MemRef/ops.mlir @@ -227,3 +227,13 @@ func @rank(%t : memref<4x4x?xf32>) { %1 = memref.rank %t : memref<4x4x?xf32> return } + +// ------ + +// CHECK-LABEL: func @atomic_rmw +// CHECK-SAME: ([[BUF:%.*]]: memref<10xf32>, [[VAL:%.*]]: f32, [[I:%.*]]: index) +func @atomic_rmw(%I: memref<10xf32>, %val: f32, %i : index) { + %x = memref.atomic_rmw addf %val, %I[%i] : (f32, memref<10xf32>) -> f32 + // CHECK: memref.atomic_rmw addf [[VAL]], [[BUF]]{{\[}}[[I]]] + return +} diff --git a/mlir/test/Dialect/Standard/expand-ops.mlir b/mlir/test/Dialect/Standard/expand-ops.mlir index 45659aee0763..cb650ffd11bd 100644 --- a/mlir/test/Dialect/Standard/expand-ops.mlir +++ b/mlir/test/Dialect/Standard/expand-ops.mlir @@ -3,7 +3,7 @@ // CHECK-LABEL: func @atomic_rmw_to_generic // CHECK-SAME: ([[F:%.*]]: memref<10xf32>, [[f:%.*]]: f32, [[i:%.*]]: index) func @atomic_rmw_to_generic(%F: memref<10xf32>, %f: f32, %i: index) -> f32 { - %x = atomic_rmw maxf %f, %F[%i] : (f32, memref<10xf32>) -> f32 + %x = memref.atomic_rmw maxf %f, %F[%i] : (f32, memref<10xf32>) -> f32 return %x : f32 } // CHECK: %0 = generic_atomic_rmw %arg0[%arg2] : memref<10xf32> { @@ -18,7 +18,7 @@ func @atomic_rmw_to_generic(%F: memref<10xf32>, %f: f32, %i: index) -> f32 { // CHECK-LABEL: func @atomic_rmw_no_conversion func @atomic_rmw_no_conversion(%F: memref<10xf32>, %f: f32, %i: index) -> f32 { - %x = atomic_rmw addf %f, %F[%i] : (f32, memref<10xf32>) -> f32 + %x = memref.atomic_rmw addf %f, %F[%i] : (f32, memref<10xf32>) -> f32 return %x : f32 } // CHECK-NOT: generic_atomic_rmw diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir index b83f530eeacc..351e8a6b39c1 100644 --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -325,14 +325,6 @@ func @unranked_tensor_load_store(%0 : memref<*xi32>, %1 : tensor<*xi32>) { return } -// CHECK-LABEL: func @atomic_rmw -// CHECK-SAME: ([[BUF:%.*]]: memref<10xf32>, [[VAL:%.*]]: f32, [[I:%.*]]: index) -func @atomic_rmw(%I: memref<10xf32>, %val: f32, %i : index) { - %x = atomic_rmw addf %val, %I[%i] : (f32, memref<10xf32>) -> f32 - // CHECK: atomic_rmw addf [[VAL]], [[BUF]]{{\[}}[[I]]] - return -} - // CHECK-LABEL: func @generic_atomic_rmw // CHECK-SAME: ([[BUF:%.*]]: memref<1x2xf32>, [[I:%.*]]: index, [[J:%.*]]: index) func @generic_atomic_rmw(%I: memref<1x2xf32>, %i : index, %j : index) { diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir index 49f29f09bf49..2aae390af4d0 100644 --- a/mlir/test/IR/invalid-ops.mlir +++ b/mlir/test/IR/invalid-ops.mlir @@ -130,30 +130,6 @@ func @invalid_splat(%v : f32) { // expected-note {{prior use here}} // ----- -func @atomic_rmw_idxs_rank_mismatch(%I: memref<16x10xf32>, %i : index, %val : f32) { - // expected-error@+1 {{expects the number of subscripts to be equal to memref rank}} - %x = atomic_rmw addf %val, %I[%i] : (f32, memref<16x10xf32>) -> f32 - return -} - -// ----- - -func @atomic_rmw_expects_float(%I: memref<16x10xi32>, %i : index, %val : i32) { - // expected-error@+1 {{expects a floating-point type}} - %x = atomic_rmw addf %val, %I[%i, %i] : (i32, memref<16x10xi32>) -> i32 - return -} - -// ----- - -func @atomic_rmw_expects_int(%I: memref<16x10xf32>, %i : index, %val : f32) { - // expected-error@+1 {{expects an integer type}} - %x = atomic_rmw addi %val, %I[%i, %i] : (f32, memref<16x10xf32>) -> f32 - return -} - -// ----- - func @generic_atomic_rmw_wrong_arg_num(%I: memref<10xf32>, %i : index) { // expected-error@+1 {{expected single number of entry block arguments}} %x = generic_atomic_rmw %I[%i] : memref<10xf32> { From a96fe1bf3b320d62f1564ded5f7259eef1869cf9 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 30 Dec 2021 11:36:57 -0800 Subject: [PATCH 247/992] [ELF][LTO] Call madvise(MADV_DONTNEED) on MemoryBuffer instances @tejohnson noticed that freeing MemoryBuffer instances right before `lto->compile` can save RSS, likely because the memory can be reused by LTO indexing (e.g. ThinLTO import/export lists).). For ELFFileBase instances, symbol and section names are backed by MemoryBuffer, so destroying MemoryBuffer would make some infrequent passes (parseSymbolVersion, reportBackrefs) crash and make debugging difficult. For a BitcodeFile, its content is completely unused, but destroying its MemoryBuffer makes the buffer identifier inaccessible and may introduce constraints for future changes. This patch leverages madvise(MADV_DONTNEED) which achieves the major gain without the latent issues. `Maximum resident set size (kbytes): ` for a large --thinlto-index-only link: * current behavior: 10146104KiB * destroy MemoryBuffer instances: 8555240KiB * madvise(MADV_DONTNEED) just bitcodeFiles and lazyBitcodeFiles: 8737372KiB * madvise(MADV_DONTNEED) all MemoryBuffers: 8739796KiB (16% decrease) Depends on D116366 Reviewed By: tejohnson Differential Revision: https://reviews.llvm.org/D116367 --- lld/ELF/Driver.cpp | 54 ++++++++++++++++++++++++++++++++---------- lld/ELF/Driver.h | 2 +- lld/ELF/InputFiles.cpp | 6 ++--- lld/ELF/InputFiles.h | 1 + 4 files changed, 47 insertions(+), 16 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index dab6a537a2ee..505602f7da62 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -87,6 +87,7 @@ bool elf::link(ArrayRef args, bool canExitEarly, inputSections.clear(); outputSections.clear(); + memoryBuffers.clear(); archiveFiles.clear(); binaryFiles.clear(); bitcodeFiles.clear(); @@ -1987,6 +1988,28 @@ static Symbol *addUnusedUndefined(StringRef name, return symtab->addSymbol(sym); } +static void markBuffersAsDontNeed(bool skipLinkedOutput) { + // With --thinlto-index-only, all buffers are nearly unused from now on + // (except symbol/section names used by infrequent passes). Mark input file + // buffers as MADV_DONTNEED so that these pages can be reused by the expensive + // thin link, saving memory. + if (skipLinkedOutput) { + for (MemoryBuffer &mb : llvm::make_pointee_range(memoryBuffers)) + mb.dontNeedIfMmap(); + return; + } + + // Otherwise, just mark MemoryBuffers backing BitcodeFiles. + DenseSet bufs; + for (BitcodeFile *file : bitcodeFiles) + bufs.insert(file->mb.getBufferStart()); + for (BitcodeFile *file : lazyBitcodeFiles) + bufs.insert(file->mb.getBufferStart()); + for (MemoryBuffer &mb : llvm::make_pointee_range(memoryBuffers)) + if (bufs.count(mb.getBufferStart())) + mb.dontNeedIfMmap(); +} + // This function is where all the optimizations of link-time // optimization takes place. When LTO is in use, some input files are // not in native object file format but in the LLVM bitcode format. @@ -1994,13 +2017,17 @@ static Symbol *addUnusedUndefined(StringRef name, // using LLVM functions and replaces bitcode symbols with the results. // Because all bitcode files that the program consists of are passed to // the compiler at once, it can do a whole-program optimization. -template void LinkerDriver::compileBitcodeFiles() { +template +void LinkerDriver::compileBitcodeFiles(bool skipLinkedOutput) { llvm::TimeTraceScope timeScope("LTO"); // Compile bitcode files and replace bitcode symbols. lto.reset(new BitcodeCompiler); for (BitcodeFile *file : bitcodeFiles) lto->add(*file); + if (!bitcodeFiles.empty()) + markBuffersAsDontNeed(skipLinkedOutput); + for (InputFile *file : lto->compile()) { auto *obj = cast>(file); obj->parse(/*ignoreComdats=*/true); @@ -2364,28 +2391,31 @@ template void LinkerDriver::link(opt::InputArgList &args) { symtab->scanVersionScript(); } + // Skip the normal linked output if some LTO options are specified. + // + // For --thinlto-index-only, index file creation is performed in + // compileBitcodeFiles, so we are done afterwards. --plugin-opt=emit-llvm and + // --plugin-opt=emit-asm create output files in bitcode or assembly code, + // respectively. When only certain thinLTO modules are specified for + // compilation, the intermediate object file are the expected output. + const bool skipLinkedOutput = config->thinLTOIndexOnly || config->emitLLVM || + config->ltoEmitAsm || + !config->thinLTOModulesToCompile.empty(); + // Do link-time optimization if given files are LLVM bitcode files. // This compiles bitcode files into real object files. // // With this the symbol table should be complete. After this, no new names // except a few linker-synthesized ones will be added to the symbol table. - compileBitcodeFiles(); + compileBitcodeFiles(skipLinkedOutput); // Symbol resolution finished. Report backward reference problems. reportBackrefs(); if (errorCount()) return; - // If --thinlto-index-only is given, we should create only "index - // files" and not object files. Index file creation is already done - // in compileBitcodeFiles, so we are done if that's the case. - // Likewise, --plugin-opt=emit-llvm and --plugin-opt=emit-asm are the - // options to create output files in bitcode or assembly code - // respectively. No object files are generated. - // Also bail out here when only certain thinLTO modules are specified for - // compilation. The intermediate object file are the expected output. - if (config->thinLTOIndexOnly || config->emitLLVM || config->ltoEmitAsm || - !config->thinLTOModulesToCompile.empty()) + // Bail out if normal linked output is skipped due to LTO. + if (skipLinkedOutput) return; // Handle --exclude-libs again because lto.tmp may reference additional diff --git a/lld/ELF/Driver.h b/lld/ELF/Driver.h index 5961e1f69472..b8cbb3b19268 100644 --- a/lld/ELF/Driver.h +++ b/lld/ELF/Driver.h @@ -34,7 +34,7 @@ class LinkerDriver { void createFiles(llvm::opt::InputArgList &args); void inferMachineType(); template void link(llvm::opt::InputArgList &args); - template void compileBitcodeFiles(); + template void compileBitcodeFiles(bool skipLinkedOutput); // True if we are in --whole-archive and --no-whole-archive. bool inWholeArchive = false; diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index f1e29547de12..dd7fd954bc51 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -43,6 +43,7 @@ using namespace lld::elf; bool InputFile::isInGroup; uint32_t InputFile::nextGroupId; +SmallVector> elf::memoryBuffers; SmallVector elf::archiveFiles; SmallVector elf::binaryFiles; SmallVector elf::bitcodeFiles; @@ -122,9 +123,8 @@ Optional elf::readFile(StringRef path) { return None; } - std::unique_ptr &mb = *mbOrErr; - MemoryBufferRef mbref = mb->getMemBufferRef(); - make>(std::move(mb)); // take MB ownership + MemoryBufferRef mbref = (*mbOrErr)->getMemBufferRef(); + memoryBuffers.push_back(std::move(*mbOrErr)); // take MB ownership if (tar) tar->append(relativeToRoot(path), mbref.getBuffer()); diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 7bf5423bed44..1084cf94313b 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -407,6 +407,7 @@ inline bool isBitcode(MemoryBufferRef mb) { std::string replaceThinLTOSuffix(StringRef path); +extern SmallVector> memoryBuffers; extern SmallVector archiveFiles; extern SmallVector binaryFiles; extern SmallVector bitcodeFiles; From 4683ce2cd899b2d682453e173c3466fa270c09f7 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 30 Dec 2021 20:35:45 +0100 Subject: [PATCH 248/992] [InferAttrs] Give strnlen the same attributes as strlen This moves the only string function out of the big list of math funcs. And let's us CSE strnlen calls. --- llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 2 +- llvm/test/Transforms/InferFunctionAttrs/annotate.ll | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 580cfd80141e..bec23a20ac49 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -233,6 +233,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { switch (TheLibFunc) { case LibFunc_strlen: + case LibFunc_strnlen: case LibFunc_wcslen: Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); @@ -1158,7 +1159,6 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: - case LibFunc_strnlen: case LibFunc_tan: case LibFunc_tanf: case LibFunc_tanh: diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll index 7bdb2a1d8e0c..4fdf32b60923 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll @@ -921,10 +921,10 @@ declare i8* @strncpy(i8*, i8*, i64) ; CHECK: declare noalias i8* @strndup(i8* nocapture readonly, i64 noundef) [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN]] declare i8* @strndup(i8*, i64) -; CHECK: declare i64 @strnlen(i8*, i64) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare i64 @strnlen(i8* nocapture, i64) [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN:#[0-9]+]] declare i64 @strnlen(i8*, i64) -; CHECK: declare i8* @strpbrk(i8*, i8* nocapture) [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN:#[0-9]+]] +; CHECK: declare i8* @strpbrk(i8*, i8* nocapture) [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN]] declare i8* @strpbrk(i8*, i8*) ; CHECK: declare i8* @strrchr(i8*, i32) [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY]] From d295dd10f2d8fb91c2bc9af8f72f00cf2d24f0f8 Mon Sep 17 00:00:00 2001 From: "long.chen" Date: Thu, 30 Dec 2021 07:56:09 +0000 Subject: [PATCH 249/992] [MLIR] Add explicit `using` to disambiguate between multiple implementations from base classes (NFC) Both of DenseElementsAttr and ElementsAttrTrait define the method of getElementType, this commit makes it available on DenseIntOrFPElementsAttr and DenseStringElementsAttr. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D116389 --- mlir/include/mlir/IR/BuiltinAttributes.td | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mlir/include/mlir/IR/BuiltinAttributes.td b/mlir/include/mlir/IR/BuiltinAttributes.td index 724120a88eb2..aadec07fbbc1 100644 --- a/mlir/include/mlir/IR/BuiltinAttributes.td +++ b/mlir/include/mlir/IR/BuiltinAttributes.td @@ -175,6 +175,7 @@ def Builtin_DenseIntOrFPElementsAttr : Builtin_Attr< let extraClassDeclaration = [{ using DenseElementsAttr::empty; using DenseElementsAttr::getNumElements; + using DenseElementsAttr::getElementType; using DenseElementsAttr::getValues; using DenseElementsAttr::isSplat; using DenseElementsAttr::size; @@ -312,6 +313,7 @@ def Builtin_DenseStringElementsAttr : Builtin_Attr< let extraClassDeclaration = [{ using DenseElementsAttr::empty; using DenseElementsAttr::getNumElements; + using DenseElementsAttr::getElementType; using DenseElementsAttr::getValues; using DenseElementsAttr::isSplat; using DenseElementsAttr::size; From 95c25fd52a27a8f4094cc68be122ae929d629ea1 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 30 Dec 2021 11:47:54 -0800 Subject: [PATCH 250/992] [Bazel] Make mlir:MemRefOpsTdFiles depend on :ArithmeticOpsTdFiles --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index fa972559a9d7..ea11a63d6154 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -7509,6 +7509,7 @@ td_library( ], includes = ["include"], deps = [ + ":ArithmeticOpsTdFiles", ":CastInterfacesTdFiles", ":ControlFlowInterfacesTdFiles", ":CopyOpInterfaceTdFiles", From 15787ccd4574400a511de4a66392a3653f70c7e5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 30 Dec 2021 11:41:59 -0800 Subject: [PATCH 251/992] [RISCV] Add support for STRICT_LRINT/LLRINT/LROUND/LLROUND. Tests for other strict intrinsics. This patch adds isel support for STRICT_LRINT/LLRINT/LROUND/LLROUND. It also adds test cases for f32 and f64 constrained intrinsics that correspond to the intrinsics in float-intrinsics.ll and double-intrinsics.ll. Support for promoting the integer argument of STRICT_FPOWI was added. I've skipped adding tests for f16 intrinsics, since we don't have libcalls for them and we have inconsistent support for promoting them in LegalizeDAG. This will need to be examined more closely. Reviewed By: asb Differential Revision: https://reviews.llvm.org/D116323 --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 19 +- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 15 + llvm/lib/Target/RISCV/RISCVInstrInfoD.td | 12 +- llvm/lib/Target/RISCV/RISCVInstrInfoF.td | 12 +- llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td | 12 +- .../CodeGen/RISCV/double-intrinsics-strict.ll | 1214 +++++++++++++++++ .../CodeGen/RISCV/float-intrinsics-strict.ll | 1157 ++++++++++++++++ .../RISCV/rv64zfh-half-intrinsics-strict.ll | 41 + .../RISCV/zfh-half-intrinsics-strict.ll | 65 + 9 files changed, 2521 insertions(+), 26 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll create mode 100644 llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll create mode 100644 llvm/test/CodeGen/RISCV/rv64zfh-half-intrinsics-strict.ll create mode 100644 llvm/test/CodeGen/RISCV/zfh-half-intrinsics-strict.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 518e525e13d0..2109ac01a83c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1653,7 +1653,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::UDIVFIX: case ISD::UDIVFIXSAT: Res = PromoteIntOp_FIX(N); break; - case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break; + case ISD::FPOWI: + case ISD::STRICT_FPOWI: Res = PromoteIntOp_FPOWI(N); break; case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: @@ -2099,8 +2100,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) { } SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) { - // FIXME: Support for promotion of STRICT_FPOWI is not implemented yet. - assert(N->getOpcode() == ISD::FPOWI && "No STRICT_FPOWI support here yet."); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); // The integer operand is the last operand in FPOWI (so the result and // floating point operand is already type legalized). @@ -2118,17 +2119,19 @@ SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) { DAG.getContext()->emitError("Don't know how to promote fpowi to fpow"); return DAG.getUNDEF(N->getValueType(0)); } + unsigned OpOffset = IsStrict ? 1 : 0; // The exponent should fit in a sizeof(int) type for the libcall to be valid. assert(DAG.getLibInfo().getIntSize() == - N->getOperand(1).getValueType().getSizeInBits() && + N->getOperand(1 + OpOffset).getValueType().getSizeInBits() && "POWI exponent should match with sizeof(int) when doing the libcall."); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - std::pair Tmp = - TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, - CallOptions, SDLoc(N), SDValue()); + SDValue Ops[2] = {N->getOperand(0 + OpOffset), N->getOperand(1 + OpOffset)}; + std::pair Tmp = TLI.makeLibCall( + DAG, LC, N->getValueType(0), Ops, CallOptions, SDLoc(N), Chain); ReplaceValueWith(SDValue(N, 0), Tmp.first); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); return SDValue(); } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 9d4f3b28f5b3..c782a6be4d64 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -330,6 +330,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::LLRINT, MVT::f16, Legal); setOperationAction(ISD::LROUND, MVT::f16, Legal); setOperationAction(ISD::LLROUND, MVT::f16, Legal); + setOperationAction(ISD::STRICT_LRINT, MVT::f16, Legal); + setOperationAction(ISD::STRICT_LLRINT, MVT::f16, Legal); + setOperationAction(ISD::STRICT_LROUND, MVT::f16, Legal); + setOperationAction(ISD::STRICT_LLROUND, MVT::f16, Legal); setOperationAction(ISD::STRICT_FADD, MVT::f16, Legal); setOperationAction(ISD::STRICT_FMA, MVT::f16, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::f16, Legal); @@ -363,6 +367,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FLOG2, MVT::f16, Promote); setOperationAction(ISD::FLOG10, MVT::f16, Promote); + // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have + // complete support for all operations in LegalizeDAG. + // We need to custom promote this. if (Subtarget.is64Bit()) setOperationAction(ISD::FPOWI, MVT::i32, Custom); @@ -375,6 +382,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::LLRINT, MVT::f32, Legal); setOperationAction(ISD::LROUND, MVT::f32, Legal); setOperationAction(ISD::LLROUND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_LRINT, MVT::f32, Legal); + setOperationAction(ISD::STRICT_LLRINT, MVT::f32, Legal); + setOperationAction(ISD::STRICT_LROUND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_LLROUND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal); setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal); @@ -402,6 +413,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::LLRINT, MVT::f64, Legal); setOperationAction(ISD::LROUND, MVT::f64, Legal); setOperationAction(ISD::LLROUND, MVT::f64, Legal); + setOperationAction(ISD::STRICT_LRINT, MVT::f64, Legal); + setOperationAction(ISD::STRICT_LLRINT, MVT::f64, Legal); + setOperationAction(ISD::STRICT_LROUND, MVT::f64, Legal); + setOperationAction(ISD::STRICT_LLROUND, MVT::f64, Legal); setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal); setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index 6bfc9bbdc0a3..72e818da797c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -270,10 +270,10 @@ def : Pat<(i32 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_W_D $rs1, 0b001)>; def : Pat<(i32 (riscv_fcvt_xu_rtz FPR64:$rs1)), (FCVT_WU_D $rs1, 0b001)>; // float->int32 with current rounding mode. -def : Pat<(i32 (lrint FPR64:$rs1)), (FCVT_W_D $rs1, 0b111)>; +def : Pat<(i32 (any_lrint FPR64:$rs1)), (FCVT_W_D $rs1, 0b111)>; // float->int32 rounded to nearest with ties rounded away from zero. -def : Pat<(i32 (lround FPR64:$rs1)), (FCVT_W_D $rs1, 0b100)>; +def : Pat<(i32 (any_lround FPR64:$rs1)), (FCVT_W_D $rs1, 0b100)>; // [u]int->double. def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1)>; @@ -308,12 +308,12 @@ def : Pat<(i64 (any_fp_to_sint FPR64:$rs1)), (FCVT_L_D FPR64:$rs1, 0b001)>; def : Pat<(i64 (any_fp_to_uint FPR64:$rs1)), (FCVT_LU_D FPR64:$rs1, 0b001)>; // double->int64 with current rounding mode. -def : Pat<(i64 (lrint FPR64:$rs1)), (FCVT_L_D $rs1, 0b111)>; -def : Pat<(i64 (llrint FPR64:$rs1)), (FCVT_L_D $rs1, 0b111)>; +def : Pat<(i64 (any_lrint FPR64:$rs1)), (FCVT_L_D $rs1, 0b111)>; +def : Pat<(i64 (any_llrint FPR64:$rs1)), (FCVT_L_D $rs1, 0b111)>; // double->int64 rounded to nearest with ties rounded away from zero. -def : Pat<(i64 (lround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>; -def : Pat<(i64 (llround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>; +def : Pat<(i64 (any_lround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>; +def : Pat<(i64 (any_llround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>; // [u]int64->fp. Match GCC and default to using dynamic rounding mode. def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_D_L GPR:$rs1, 0b111)>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index 5dbdc428d372..1d981fe64aaa 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -405,10 +405,10 @@ def : Pat<(i32 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>; def : Pat<(i32 (riscv_fcvt_xu_rtz FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>; // float->int32 with current rounding mode. -def : Pat<(i32 (lrint FPR32:$rs1)), (FCVT_W_S $rs1, 0b111)>; +def : Pat<(i32 (any_lrint FPR32:$rs1)), (FCVT_W_S $rs1, 0b111)>; // float->int32 rounded to nearest with ties rounded away from zero. -def : Pat<(i32 (lround FPR32:$rs1)), (FCVT_W_S $rs1, 0b100)>; +def : Pat<(i32 (any_lround FPR32:$rs1)), (FCVT_W_S $rs1, 0b100)>; // [u]int->float. Match GCC and default to using dynamic rounding mode. def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, 0b111)>; @@ -437,12 +437,12 @@ def : Pat<(i64 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>; def : Pat<(i64 (riscv_fcvt_xu_rtz FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>; // float->int64 with current rounding mode. -def : Pat<(i64 (lrint FPR32:$rs1)), (FCVT_L_S $rs1, 0b111)>; -def : Pat<(i64 (llrint FPR32:$rs1)), (FCVT_L_S $rs1, 0b111)>; +def : Pat<(i64 (any_lrint FPR32:$rs1)), (FCVT_L_S $rs1, 0b111)>; +def : Pat<(i64 (any_llrint FPR32:$rs1)), (FCVT_L_S $rs1, 0b111)>; // float->int64 rounded to neartest with ties rounded away from zero. -def : Pat<(i64 (lround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>; -def : Pat<(i64 (llround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>; +def : Pat<(i64 (any_lround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>; +def : Pat<(i64 (any_llround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>; // [u]int->fp. Match GCC and default to using dynamic rounding mode. def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_S_W $rs1, 0b111)>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index fa2eaa13ec57..dcec8fda3165 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -287,10 +287,10 @@ def : Pat<(i32 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>; def : Pat<(i32 (riscv_fcvt_xu_rtz FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>; // half->int32 with current rounding mode. -def : Pat<(i32 (lrint FPR16:$rs1)), (FCVT_W_H $rs1, 0b111)>; +def : Pat<(i32 (any_lrint FPR16:$rs1)), (FCVT_W_H $rs1, 0b111)>; // half->int32 rounded to nearest with ties rounded away from zero. -def : Pat<(i32 (lround FPR16:$rs1)), (FCVT_W_H $rs1, 0b100)>; +def : Pat<(i32 (any_lround FPR16:$rs1)), (FCVT_W_H $rs1, 0b100)>; // [u]int->half. Match GCC and default to using dynamic rounding mode. def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W $rs1, 0b111)>; @@ -313,12 +313,12 @@ def : Pat<(i64 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>; def : Pat<(i64 (riscv_fcvt_xu_rtz FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>; // half->int64 with current rounding mode. -def : Pat<(i64 (lrint FPR16:$rs1)), (FCVT_L_H $rs1, 0b111)>; -def : Pat<(i64 (llrint FPR16:$rs1)), (FCVT_L_H $rs1, 0b111)>; +def : Pat<(i64 (any_lrint FPR16:$rs1)), (FCVT_L_H $rs1, 0b111)>; +def : Pat<(i64 (any_llrint FPR16:$rs1)), (FCVT_L_H $rs1, 0b111)>; // half->int64 rounded to nearest with ties rounded away from zero. -def : Pat<(i64 (lround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>; -def : Pat<(i64 (llround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>; +def : Pat<(i64 (any_lround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>; +def : Pat<(i64 (any_llround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>; // [u]int->fp. Match GCC and default to using dynamic rounding mode. def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_H_W $rs1, 0b111)>; diff --git a/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll new file mode 100644 index 000000000000..1e76500d2001 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll @@ -0,0 +1,1214 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+d \ +; RUN: -verify-machineinstrs -disable-strictnode-mutation \ +; RUN: | FileCheck -check-prefix=RV32IFD %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d \ +; RUN: -verify-machineinstrs -disable-strictnode-mutation \ +; RUN: | FileCheck -check-prefix=RV64IFD %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 \ +; RUN: -verify-machineinstrs -disable-strictnode-mutation \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 \ +; RUN: -verify-machineinstrs -disable-strictnode-mutation \ +; RUN: | FileCheck -check-prefix=RV64I %s + +declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) + +define double @sqrt_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: sqrt_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fsqrt.d ft0, ft0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: sqrt_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fsqrt.d ft0, ft0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: sqrt_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call sqrt@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: sqrt_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call sqrt@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.sqrt.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare double @llvm.experimental.constrained.powi.f64.i32(double, i32, metadata, metadata) + +define double @powi_f64(double %a, i32 %b) nounwind strictfp { +; RV32IFD-LABEL: powi_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call __powidf2@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: powi_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: sext.w a1, a1 +; RV64IFD-NEXT: call __powidf2@plt +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: powi_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __powidf2@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: powi_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a1, a1 +; RV64I-NEXT: call __powidf2@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.powi.f64.i32(double %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata) + +define double @sin_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: sin_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call sin@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: sin_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call sin@plt +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: sin_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call sin@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: sin_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call sin@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.sin.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) + +define double @cos_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: cos_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call cos@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: cos_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call cos@plt +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: cos_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call cos@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: cos_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call cos@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.cos.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +; The sin+cos combination results in an FSINCOS SelectionDAG node. +define double @sincos_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: sincos_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: mv s0, a1 +; RV32IFD-NEXT: mv s1, a0 +; RV32IFD-NEXT: call sin@plt +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fsd ft0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: mv a0, s1 +; RV32IFD-NEXT: mv a1, s0 +; RV32IFD-NEXT: call cos@plt +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fld ft1, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: fadd.d ft0, ft1, ft0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 32 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: sincos_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -32 +; RV64IFD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: mv s0, a0 +; RV64IFD-NEXT: call sin@plt +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fsd ft0, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: mv a0, s0 +; RV64IFD-NEXT: call cos@plt +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fld ft1, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: fadd.d ft0, ft1, ft0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 32 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: sincos_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: call sin@plt +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call cos@plt +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: call __adddf3@plt +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: sincos_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: call sin@plt +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call cos@plt +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __adddf3@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.sin.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + %2 = call double @llvm.experimental.constrained.cos.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + %3 = fadd double %1, %2 + ret double %3 +} + +declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata) + +define double @pow_f64(double %a, double %b) nounwind strictfp { +; RV32IFD-LABEL: pow_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call pow@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: pow_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call pow@plt +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: pow_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call pow@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: pow_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call pow@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.pow.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare double @llvm.experimental.constrained.exp.f64(double, metadata, metadata) + +define double @exp_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: exp_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call exp@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: exp_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call exp@plt +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: exp_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call exp@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: exp_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call exp@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.exp.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare double @llvm.experimental.constrained.exp2.f64(double, metadata, metadata) + +define double @exp2_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: exp2_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call exp2@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: exp2_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call exp2@plt +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: exp2_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call exp2@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: exp2_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call exp2@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.exp2.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata) + +define double @log_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: log_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call log@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: log_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call log@plt +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: log_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call log@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: log_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call log@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.log.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare double @llvm.experimental.constrained.log10.f64(double, metadata, metadata) + +define double @log10_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: log10_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call log10@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: log10_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call log10@plt +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: log10_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call log10@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: log10_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call log10@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.log10.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadata) + +define double @log2_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: log2_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call log2@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: log2_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call log2@plt +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: log2_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call log2@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: log2_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call log2@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.log2.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) + +define double @fma_f64(double %a, double %b, double %c) nounwind strictfp { +; RV32IFD-LABEL: fma_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a4, 8(sp) +; RV32IFD-NEXT: sw a5, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: sw a2, 8(sp) +; RV32IFD-NEXT: sw a3, 12(sp) +; RV32IFD-NEXT: fld ft1, 8(sp) +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft2, 8(sp) +; RV32IFD-NEXT: fmadd.d ft0, ft2, ft1, ft0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fma_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a2 +; RV64IFD-NEXT: fmv.d.x ft1, a1 +; RV64IFD-NEXT: fmv.d.x ft2, a0 +; RV64IFD-NEXT: fmadd.d ft0, ft2, ft1, ft0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fma_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fma@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fma_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fma@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.fma.f64(double %a, double %b, double %c, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare double @llvm.experimental.constrained.fmuladd.f64(double, double, double, metadata, metadata) + +define double @fmuladd_f64(double %a, double %b, double %c) nounwind strictfp { +; RV32IFD-LABEL: fmuladd_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a4, 8(sp) +; RV32IFD-NEXT: sw a5, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: sw a2, 8(sp) +; RV32IFD-NEXT: sw a3, 12(sp) +; RV32IFD-NEXT: fld ft1, 8(sp) +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft2, 8(sp) +; RV32IFD-NEXT: fmadd.d ft0, ft2, ft1, ft0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fmuladd_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a2 +; RV64IFD-NEXT: fmv.d.x ft1, a1 +; RV64IFD-NEXT: fmv.d.x ft2, a0 +; RV64IFD-NEXT: fmadd.d ft0, ft2, ft1, ft0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fmuladd_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a5 +; RV32I-NEXT: mv s1, a4 +; RV32I-NEXT: call __muldf3@plt +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: call __adddf3@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmuladd_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: call __muldf3@plt +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __adddf3@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.fmuladd.f64(double %a, double %b, double %c, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare double @llvm.experimental.constrained.minnum.f64(double, double, metadata) + +define double @minnum_f64(double %a, double %b) nounwind strictfp { +; RV32IFD-LABEL: minnum_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call fmin@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: minnum_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call fmin@plt +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: minnum_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmin@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: minnum_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmin@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.minnum.f64(double %a, double %b, metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare double @llvm.experimental.constrained.maxnum.f64(double, double, metadata) + +define double @maxnum_f64(double %a, double %b) nounwind strictfp { +; RV32IFD-LABEL: maxnum_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call fmax@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: maxnum_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call fmax@plt +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: maxnum_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmax@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: maxnum_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmax@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.maxnum.f64(double %a, double %b, metadata !"fpexcept.strict") strictfp + ret double %1 +} + +; TODO: FMINNAN and FMAXNAN aren't handled in +; SelectionDAGLegalize::ExpandNode. + +; declare double @llvm.experimental.constrained.minimum.f64(double, double, metadata) + +; define double @fminimum_f64(double %a, double %b) nounwind strictfp { +; %1 = call double @llvm.experimental.constrained.minimum.f64(double %a, double %b, metadata !"fpexcept.strict") strictfp +; ret double %1 +; } + +; declare double @llvm.experimental.constrained.maximum.f64(double, double, metadata) + +; define double @fmaximum_f64(double %a, double %b) nounwind strictfp { +; %1 = call double @llvm.experimental.constrained.maximum.f64(double %a, double %b, metadata !"fpexcept.strict") strictfp +; ret double %1 +; } + +declare double @llvm.experimental.constrained.floor.f64(double, metadata) + +define double @floor_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: floor_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call floor@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: floor_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call floor@plt +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: floor_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call floor@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: floor_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call floor@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.floor.f64(double %a, metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare double @llvm.experimental.constrained.ceil.f64(double, metadata) + +define double @ceil_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: ceil_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call ceil@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: ceil_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call ceil@plt +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: ceil_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call ceil@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ceil_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call ceil@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.ceil.f64(double %a, metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare double @llvm.experimental.constrained.trunc.f64(double, metadata) + +define double @trunc_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: trunc_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call trunc@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: trunc_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call trunc@plt +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: trunc_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call trunc@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: trunc_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call trunc@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.trunc.f64(double %a, metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) + +define double @rint_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: rint_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call rint@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: rint_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call rint@plt +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: rint_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call rint@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rint_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call rint@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.rint.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) + +define double @nearbyint_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: nearbyint_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call nearbyint@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: nearbyint_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call nearbyint@plt +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: nearbyint_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call nearbyint@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: nearbyint_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call nearbyint@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.nearbyint.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare double @llvm.experimental.constrained.round.f64(double, metadata) + +define double @round_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: round_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call round@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: round_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call round@plt +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: round_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call round@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: round_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call round@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.round.f64(double %a, metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare double @llvm.experimental.constrained.roundeven.f64(double, metadata) + +define double @roundeven_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: roundeven_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call roundeven@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: roundeven_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call roundeven@plt +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: roundeven_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call roundeven@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: roundeven_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call roundeven@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.roundeven.f64(double %a, metadata !"fpexcept.strict") strictfp + ret double %1 +} + +declare iXLen @llvm.experimental.constrained.lrint.iXLen.f64(double, metadata, metadata) + +define iXLen @lrint_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: lrint_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fcvt.w.d a0, ft0 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: lrint_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.l.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: lrint_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call lrint@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: lrint_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call lrint@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call iXLen @llvm.experimental.constrained.lrint.iXLen.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret iXLen %1 +} + +declare iXLen @llvm.experimental.constrained.lround.iXLen.f64(double, metadata) + +define iXLen @lround_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: lround_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fcvt.w.d a0, ft0, rmm +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: lround_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.l.d a0, ft0, rmm +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: lround_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call lround@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: lround_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call lround@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call iXLen @llvm.experimental.constrained.lround.iXLen.f64(double %a, metadata !"fpexcept.strict") strictfp + ret iXLen %1 +} + +declare i64 @llvm.experimental.constrained.llrint.i64.f64(double, metadata, metadata) + +define i64 @llrint_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: llrint_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call llrint@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: llrint_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.l.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: llrint_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call llrint@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: llrint_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call llrint@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.llrint.i64.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret i64 %1 +} + +declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata) + +define i64 @llround_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: llround_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call llround@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: llround_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.l.d a0, ft0, rmm +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: llround_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call llround@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: llround_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call llround@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.llround.i64.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll new file mode 100644 index 000000000000..7dee4588fe46 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll @@ -0,0 +1,1157 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+f \ +; RUN: -verify-machineinstrs -disable-strictnode-mutation \ +; RUN: | FileCheck -check-prefix=RV32IF %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+f \ +; RUN: -verify-machineinstrs -disable-strictnode-mutation \ +; RUN: | FileCheck -check-prefix=RV64IF %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 \ +; RUN: -verify-machineinstrs -disable-strictnode-mutation \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 \ +; RUN: -verify-machineinstrs -disable-strictnode-mutation \ +; RUN: | FileCheck -check-prefix=RV64I %s + +declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) + +define float @sqrt_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: sqrt_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fsqrt.s ft0, ft0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: sqrt_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fsqrt.s ft0, ft0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: sqrt_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call sqrtf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: sqrt_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call sqrtf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.sqrt.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare float @llvm.experimental.constrained.powi.f32.i32(float, i32, metadata, metadata) + +define float @powi_f32(float %a, i32 %b) nounwind strictfp { +; RV32IF-LABEL: powi_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call __powisf2@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: powi_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: sext.w a1, a1 +; RV64IF-NEXT: call __powisf2@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: powi_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __powisf2@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: powi_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a1, a1 +; RV64I-NEXT: call __powisf2@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.powi.f32.i32(float %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata) + +define float @sin_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: sin_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call sinf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: sin_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call sinf@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: sin_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call sinf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: sin_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call sinf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.sin.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare float @llvm.experimental.constrained.cos.f32(float, metadata, metadata) + +define float @cos_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: cos_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call cosf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: cos_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call cosf@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: cos_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call cosf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: cos_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call cosf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.cos.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +; The sin+cos combination results in an FSINCOS SelectionDAG node. +define float @sincos_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: sincos_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: mv s0, a0 +; RV32IF-NEXT: call sinf@plt +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: mv a0, s0 +; RV32IF-NEXT: call cosf@plt +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: flw ft1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: fadd.s ft0, ft1, ft0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: sincos_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -32 +; RV64IF-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IF-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64IF-NEXT: mv s0, a0 +; RV64IF-NEXT: call sinf@plt +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill +; RV64IF-NEXT: mv a0, s0 +; RV64IF-NEXT: call cosf@plt +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload +; RV64IF-NEXT: fadd.s ft0, ft1, ft0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IF-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 32 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: sincos_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: call sinf@plt +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call cosf@plt +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __addsf3@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: sincos_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: call sinf@plt +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call cosf@plt +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __addsf3@plt +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.sin.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + %2 = call float @llvm.experimental.constrained.cos.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + %3 = fadd float %1, %2 + ret float %3 +} + +declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata) + +define float @pow_f32(float %a, float %b) nounwind strictfp { +; RV32IF-LABEL: pow_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call powf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: pow_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call powf@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: pow_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call powf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: pow_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call powf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.pow.f32(float %a, float %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare float @llvm.experimental.constrained.exp.f32(float, metadata, metadata) + +define float @exp_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: exp_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call expf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: exp_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call expf@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: exp_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call expf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: exp_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call expf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.exp.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare float @llvm.experimental.constrained.exp2.f32(float, metadata, metadata) + +define float @exp2_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: exp2_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call exp2f@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: exp2_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call exp2f@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: exp2_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call exp2f@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: exp2_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call exp2f@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.exp2.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare float @llvm.experimental.constrained.log.f32(float, metadata, metadata) + +define float @log_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: log_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call logf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: log_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call logf@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: log_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call logf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: log_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call logf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.log.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare float @llvm.experimental.constrained.log10.f32(float, metadata, metadata) + +define float @log10_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: log10_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call log10f@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: log10_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call log10f@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: log10_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call log10f@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: log10_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call log10f@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.log10.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare float @llvm.experimental.constrained.log2.f32(float, metadata, metadata) + +define float @log2_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: log2_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call log2f@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: log2_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call log2f@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: log2_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call log2f@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: log2_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call log2f@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.log2.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) + +define float @fma_f32(float %a, float %b, float %c) nounwind strictfp { +; RV32IF-LABEL: fma_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a2 +; RV32IF-NEXT: fmv.w.x ft1, a1 +; RV32IF-NEXT: fmv.w.x ft2, a0 +; RV32IF-NEXT: fmadd.s ft0, ft2, ft1, ft0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fma_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a2 +; RV64IF-NEXT: fmv.w.x ft1, a1 +; RV64IF-NEXT: fmv.w.x ft2, a0 +; RV64IF-NEXT: fmadd.s ft0, ft2, ft1, ft0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fma_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmaf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fma_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmaf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.fma.f32(float %a, float %b, float %c, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare float @llvm.experimental.constrained.fmuladd.f32(float, float, float, metadata, metadata) + +define float @fmuladd_f32(float %a, float %b, float %c) nounwind strictfp { +; RV32IF-LABEL: fmuladd_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a2 +; RV32IF-NEXT: fmv.w.x ft1, a1 +; RV32IF-NEXT: fmv.w.x ft2, a0 +; RV32IF-NEXT: fmadd.s ft0, ft2, ft1, ft0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fmuladd_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a2 +; RV64IF-NEXT: fmv.w.x ft1, a1 +; RV64IF-NEXT: fmv.w.x ft2, a0 +; RV64IF-NEXT: fmadd.s ft0, ft2, ft1, ft0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fmuladd_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: call __mulsf3@plt +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __addsf3@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmuladd_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: call __mulsf3@plt +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __addsf3@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.fmuladd.f32(float %a, float %b, float %c, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare float @llvm.experimental.constrained.minnum.f32(float, float, metadata) + +define float @minnum_f32(float %a, float %b) nounwind strictfp { +; RV32IF-LABEL: minnum_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call fminf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: minnum_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call fminf@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: minnum_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fminf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: minnum_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fminf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.minnum.f32(float %a, float %b, metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare float @llvm.experimental.constrained.maxnum.f32(float, float, metadata) + +define float @maxnum_f32(float %a, float %b) nounwind strictfp { +; RV32IF-LABEL: maxnum_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call fmaxf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: maxnum_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call fmaxf@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: maxnum_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmaxf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: maxnum_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmaxf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.maxnum.f32(float %a, float %b, metadata !"fpexcept.strict") strictfp + ret float %1 +} + +; TODO: FMINNAN and FMAXNAN aren't handled in +; SelectionDAGLegalize::ExpandNode. + +; declare float @llvm.experimental.constrained.minimum.f32(float, float, metadata) + +; define float @fminimum_f32(float %a, float %b) nounwind strictfp { +; %1 = call float @llvm.experimental.constrained.minimum.f32(float %a, float %b, metadata !"fpexcept.strict") strictfp +; ret float %1 +; } + +; declare float @llvm.experimental.constrained.maximum.f32(float, float, metadata) + +; define float @fmaximum_f32(float %a, float %b) nounwind strictfp { +; %1 = call float @llvm.experimental.constrained.maximum.f32(float %a, float %b, metadata !"fpexcept.strict") strictfp +; ret float %1 +; } + +declare float @llvm.experimental.constrained.floor.f32(float, metadata) + +define float @floor_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: floor_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call floorf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: floor_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call floorf@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: floor_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call floorf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: floor_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call floorf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.floor.f32(float %a, metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare float @llvm.experimental.constrained.ceil.f32(float, metadata) + +define float @ceil_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: ceil_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call ceilf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: ceil_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call ceilf@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: ceil_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call ceilf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ceil_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call ceilf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.ceil.f32(float %a, metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare float @llvm.experimental.constrained.trunc.f32(float, metadata) + +define float @trunc_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: trunc_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call truncf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: trunc_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call truncf@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: trunc_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call truncf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: trunc_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call truncf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.trunc.f32(float %a, metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata) + +define float @rint_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: rint_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call rintf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: rint_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call rintf@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: rint_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call rintf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rint_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call rintf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.rint.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata) + +define float @nearbyint_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: nearbyint_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call nearbyintf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: nearbyint_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call nearbyintf@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: nearbyint_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call nearbyintf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: nearbyint_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call nearbyintf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.nearbyint.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare float @llvm.experimental.constrained.round.f32(float, metadata) + +define float @round_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: round_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call roundf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: round_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call roundf@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: round_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call roundf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: round_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call roundf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.round.f32(float %a, metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare float @llvm.experimental.constrained.roundeven.f32(float, metadata) + +define float @roundeven_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: roundeven_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call roundevenf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: roundeven_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call roundevenf@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: roundeven_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call roundevenf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: roundeven_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call roundevenf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.roundeven.f32(float %a, metadata !"fpexcept.strict") strictfp + ret float %1 +} + +declare iXLen @llvm.experimental.constrained.lrint.iXLen.f32(float, metadata, metadata) + +define iXLen @lrint_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: lrint_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fcvt.w.s a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: lrint_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.l.s a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: lrint_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call lrintf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: lrint_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call lrintf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call iXLen @llvm.experimental.constrained.lrint.iXLen.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret iXLen %1 +} + +declare iXLen @llvm.experimental.constrained.lround.iXLen.f32(float, metadata) + +define iXLen @lround_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: lround_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fcvt.w.s a0, ft0, rmm +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: lround_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.l.s a0, ft0, rmm +; RV64IF-NEXT: ret +; +; RV32I-LABEL: lround_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call lroundf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: lround_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call lroundf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call iXLen @llvm.experimental.constrained.lround.iXLen.f32(float %a, metadata !"fpexcept.strict") strictfp + ret iXLen %1 +} + +declare i64 @llvm.experimental.constrained.llrint.i64.f32(float, metadata, metadata) + +define i64 @llrint_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: llrint_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call llrintf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: llrint_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.l.s a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: llrint_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call llrintf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: llrint_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call llrintf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.llrint.i64.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret i64 %1 +} + +declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata) + +define i64 @llround_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: llround_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call llroundf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: llround_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.l.s a0, ft0, rmm +; RV64IF-NEXT: ret +; +; RV32I-LABEL: llround_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call llroundf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: llround_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call llroundf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.llround.i64.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} diff --git a/llvm/test/CodeGen/RISCV/rv64zfh-half-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/rv64zfh-half-intrinsics-strict.ll new file mode 100644 index 000000000000..38cf8ca3c5ae --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64zfh-half-intrinsics-strict.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv64 -mattr=+experimental-zfh \ +; RUN: -verify-machineinstrs -target-abi lp64f -disable-strictnode-mutation \ +; RUN: | FileCheck -check-prefix=RV64IZFH %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+d \ +; RUN: -mattr=+experimental-zfh -verify-machineinstrs -target-abi lp64d \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV64IDZFH %s + +; These intrinsics require half and i64 to be legal types. + +declare i64 @llvm.experimental.constrained.llrint.i64.f16(half, metadata, metadata) + +define i64 @llrint_f16(half %a) nounwind strictfp { +; RV64IZFH-LABEL: llrint_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: llrint_f16: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.l.h a0, fa0 +; RV64IDZFH-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.llrint.i64.f16(half %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret i64 %1 +} + +declare i64 @llvm.experimental.constrained.llround.i64.f16(half, metadata) + +define i64 @llround_f16(half %a) nounwind strictfp { +; RV64IZFH-LABEL: llround_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rmm +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: llround_f16: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rmm +; RV64IDZFH-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.llround.i64.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} diff --git a/llvm/test/CodeGen/RISCV/zfh-half-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/zfh-half-intrinsics-strict.ll new file mode 100644 index 000000000000..7db3a9120137 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/zfh-half-intrinsics-strict.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+experimental-zfh \ +; RUN: -verify-machineinstrs -target-abi ilp32f -disable-strictnode-mutation \ +; RUN: | FileCheck -check-prefix=RV32IZFH %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+experimental-zfh \ +; RUN: -verify-machineinstrs -target-abi lp64f -disable-strictnode-mutation \ +; RUN: | FileCheck -check-prefix=RV64IZFH %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+d \ +; RUN: -mattr=+experimental-zfh -verify-machineinstrs -target-abi ilp32d \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV32IDZFH %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d \ +; RUN: -mattr=+experimental-zfh -verify-machineinstrs -target-abi lp64d \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV64IDZFH %s + +declare iXLen @llvm.experimental.constrained.lrint.iXLen.f16(half, metadata, metadata) + +define iXLen @lrint_f16(half %a) nounwind strictfp { +; RV32IZFH-LABEL: lrint_f16: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: lrint_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: lrint_f16: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.w.h a0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: lrint_f16: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.l.h a0, fa0 +; RV64IDZFH-NEXT: ret + %1 = call iXLen @llvm.experimental.constrained.lrint.iXLen.f16(half %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret iXLen %1 +} + +declare iXLen @llvm.experimental.constrained.lround.iXLen.f16(half, metadata) + +define iXLen @lround_f16(half %a) nounwind strictfp { +; RV32IZFH-LABEL: lround_f16: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rmm +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: lround_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rmm +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: lround_f16: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.w.h a0, fa0, rmm +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: lround_f16: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rmm +; RV64IDZFH-NEXT: ret + %1 = call iXLen @llvm.experimental.constrained.lround.iXLen.f16(half %a, metadata !"fpexcept.strict") strictfp + ret iXLen %1 +} From dabac5feecdde0441b22a19088ac7384e4763dd1 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 30 Dec 2021 12:03:29 -0800 Subject: [PATCH 252/992] [ELF][LTO] Cache symbol table of lazy BitcodeFile Similar to D62188: a BitcodeFile's symbol table may be iterated twice, once in --start-lib (lazy) state, and once in the non-lazy state. This patch makes `parseLazy` save `symbols[i]` so that the non-lazy state does not need to re-insert to the global symbol table. Avoiding a redundant `saver.save` may save memory. `Maximum resident set size (kbytes)` for a large --thinlto-index-only link: * without the patch: 10164000 * with the patch: 10095716 (0.6% decrease) Note: we can remove `saver.save` if `BitcodeCompiler::add` does not transfer the ownership of `f.obj` in `checkError(ltoObj->add(std::move(f.obj), resols));`. Reviewed By: tejohnson Differential Revision: https://reviews.llvm.org/D116390 --- lld/ELF/InputFiles.cpp | 57 +++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index dd7fd954bc51..04fa48f63c03 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1680,34 +1680,42 @@ static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) { } template -static Symbol *createBitcodeSymbol(const std::vector &keptComdats, - const lto::InputFile::Symbol &objSym, - BitcodeFile &f) { - StringRef name = saver.save(objSym.getName()); +static void +createBitcodeSymbol(Symbol *&sym, const std::vector &keptComdats, + const lto::InputFile::Symbol &objSym, BitcodeFile &f) { uint8_t binding = objSym.isWeak() ? STB_WEAK : STB_GLOBAL; uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE; uint8_t visibility = mapVisibility(objSym.getVisibility()); bool canOmitFromDynSym = objSym.canBeOmittedFromSymbolTable(); + StringRef name; + if (sym) { + name = sym->getName(); + } else { + name = saver.save(objSym.getName()); + sym = symtab->insert(name); + } + int c = objSym.getComdatIndex(); if (objSym.isUndefined() || (c != -1 && !keptComdats[c])) { Undefined newSym(&f, name, binding, visibility, type); if (canOmitFromDynSym) newSym.exportDynamic = false; - Symbol *ret = symtab->addSymbol(newSym); - ret->referenced = true; - return ret; + sym->resolve(newSym); + sym->referenced = true; + return; } - if (objSym.isCommon()) - return symtab->addSymbol( - CommonSymbol{&f, name, binding, visibility, STT_OBJECT, - objSym.getCommonAlignment(), objSym.getCommonSize()}); - - Defined newSym(&f, name, binding, visibility, type, 0, 0, nullptr); - if (canOmitFromDynSym) - newSym.exportDynamic = false; - return symtab->addSymbol(newSym); + if (objSym.isCommon()) { + sym->resolve(CommonSymbol{&f, name, binding, visibility, STT_OBJECT, + objSym.getCommonAlignment(), + objSym.getCommonSize()}); + } else { + Defined newSym(&f, name, binding, visibility, type, 0, 0, nullptr); + if (canOmitFromDynSym) + newSym.exportDynamic = false; + sym->resolve(newSym); + } } template void BitcodeFile::parse() { @@ -1719,10 +1727,11 @@ template void BitcodeFile::parse() { .second); } - symbols.assign(obj->symbols().size(), nullptr); - for (auto it : llvm::enumerate(obj->symbols())) - symbols[it.index()] = - createBitcodeSymbol(keptComdats, it.value(), *this); + symbols.resize(obj->symbols().size()); + for (auto it : llvm::enumerate(obj->symbols())) { + Symbol *&sym = symbols[it.index()]; + createBitcodeSymbol(sym, keptComdats, it.value(), *this); + } for (auto l : obj->getDependentLibraries()) addDependentLibrary(l, this); @@ -1730,9 +1739,11 @@ template void BitcodeFile::parse() { void BitcodeFile::parseLazy() { SymbolTable &symtab = *elf::symtab; - for (const lto::InputFile::Symbol &sym : obj->symbols()) - if (!sym.isUndefined()) - symtab.addSymbol(LazyObject{*this, saver.save(sym.getName())}); + symbols.resize(obj->symbols().size()); + for (auto it : llvm::enumerate(obj->symbols())) + if (!it.value().isUndefined()) + symbols[it.index()] = + symtab.addSymbol(LazyObject{*this, saver.save(it.value().getName())}); } void BinaryFile::parse() { From 9e6afbedb0166af0244ee77870ba782f4cb38a9a Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 30 Dec 2021 11:44:48 -0800 Subject: [PATCH 253/992] [Hexagon] Generate HVX/FP compare instructions Co-authored-by: Anirudh Sundar Subramaniam --- .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 35 ++ llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 98 ++++ .../CodeGen/Hexagon/autohvx/minmax-float.ll | 226 +++++++++ .../Hexagon/autohvx/vector-compare-float.ll | 466 ++++++++++++++++++ 4 files changed, 825 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/minmax-float.ll create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/vector-compare-float.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index ef7f87066381..80287518fa56 100755 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -91,6 +91,10 @@ HexagonTargetLowering::initializeHVXLowering() { if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) { + setOperationAction(ISD::FMINNUM, MVT::v64f16, Legal); + setOperationAction(ISD::FMAXNUM, MVT::v64f16, Legal); + setOperationAction(ISD::FMINNUM, MVT::v32f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::v32f32, Legal); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64f16, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64f16, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f32, Custom); @@ -122,6 +126,9 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::LOAD, MVT::v64f32, Custom); setOperationAction(ISD::STORE, MVT::v64f32, Custom); + setOperationAction(ISD::FMINNUM, MVT::v64f32, Custom); + setOperationAction(ISD::FMAXNUM, MVT::v64f32, Custom); + setOperationAction(ISD::VSELECT, MVT::v64f32, Custom); setOperationAction(ISD::MLOAD, MVT::v32f32, Custom); setOperationAction(ISD::MSTORE, MVT::v32f32, Custom); @@ -248,6 +255,32 @@ HexagonTargetLowering::initializeHVXLowering() { } } + setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand); + + setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand); + // Boolean vectors. for (MVT T : LegalW) { @@ -2258,6 +2291,8 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CTLZ: case ISD::CTTZ: case ISD::MUL: + case ISD::FMINNUM: + case ISD::FMAXNUM: case ISD::MULHS: case ISD::MULHU: case ISD::AND: diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index 8d94a9978831..f72f02eb9cba 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -453,6 +453,44 @@ let Predicates = [UseHVX] in { (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; } +let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { + def: Pat<(vselect HQ16:$Qu, HVF16:$Vs, HVF16:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(vselect (qnot HQ16:$Qu), HVF16:$Vs, HVF16:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; + + def: Pat<(vselect HQ32:$Qu, HVF32:$Vs, HVF32:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(vselect (qnot HQ32:$Qu), HVF32:$Vs, HVF32:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; +} + +let Predicates = [UseHVXV68, UseHVX128B, UseHVXQFloat] in { + let AddedComplexity = 220 in { + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + } + def: OpR_RR_pat, VecF16, HVF16>; + def: OpR_RR_pat, VecF16, HVF16>; + def: OpR_RR_pat, VecF32, HVF32>; + def: OpR_RR_pat, VecF32, HVF32>; +} + +let Predicates = [UseHVXV68, UseHVX128B, UseHVXIEEEFP] in { + let AddedComplexity = 220 in { + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + } + def: OpR_RR_pat, VecF16, HVF16>; + def: OpR_RR_pat, VecF16, HVF16>; + def: OpR_RR_pat, VecF32, HVF32>; + def: OpR_RR_pat, VecF32, HVF32>; +} + let Predicates = [UseHVX] in { // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, @@ -707,3 +745,63 @@ let Predicates = [UseHVX] in { def: AccRRR_pat; def: AccRRR_pat; } + +let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + + def: Pat<(VecQ16 (setone HVF16:$Vt, HVF16:$Vu)), + (V6_pred_not (V6_veqh HvxVR:$Vt, HvxVR:$Vu))>; + + def: Pat<(VecQ32 (setone HVF32:$Vt, HVF32:$Vu)), + (V6_pred_not (V6_veqw HvxVR:$Vt, HvxVR:$Vu))>; +} diff --git a/llvm/test/CodeGen/Hexagon/autohvx/minmax-float.ll b/llvm/test/CodeGen/Hexagon/autohvx/minmax-float.ll new file mode 100644 index 000000000000..cb58004e6350 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/minmax-float.ll @@ -0,0 +1,226 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon -mattr=+hvxv69,+hvx-length128b,+hvx-qfloat < %s | FileCheck %s +; RUN: llc -march=hexagon -mattr=+hvxv69,+hvx-length128b,+hvx-ieee-fp < %s | FileCheck %s + +; min + +define <64 x half> @test_00(<64 x half> %v0, <64 x half> %v1) #0 { +; CHECK-LABEL: test_00: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = vmin(v1.hf,v0.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp olt <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + +define <64 x half> @test_01(<64 x half> %v0, <64 x half> %v1) #0 { +; CHECK-LABEL: test_01: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ole <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + +define <64 x half> @test_02(<64 x half> %v0, <64 x half> %v1) #0 { +; CHECK-LABEL: test_02: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = vmin(v0.hf,v1.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ogt <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v0 + ret <64 x half> %t1 +} + +define <64 x half> @test_03(<64 x half> %v0, <64 x half> %v1) #0 { +; CHECK-LABEL: test_03: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v1.hf,v0.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp oge <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v0 + ret <64 x half> %t1 +} + +define <32 x float> @test_10(<32 x float> %v0, <32 x float> %v1) #0 { +; CHECK-LABEL: test_10: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = vmin(v1.sf,v0.sf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp olt <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +define <32 x float> @test_11(<32 x float> %v0, <32 x float> %v1) #0 { +; CHECK-LABEL: test_11: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ole <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +define <32 x float> @test_12(<32 x float> %v0, <32 x float> %v1) #0 { +; CHECK-LABEL: test_12: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = vmin(v0.sf,v1.sf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ogt <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v0 + ret <32 x float> %t1 +} + +define <32 x float> @test_13(<32 x float> %v0, <32 x float> %v1) #0 { +; CHECK-LABEL: test_13: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v1.sf,v0.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp oge <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v0 + ret <32 x float> %t1 +} + +; max + +define <64 x half> @test_20(<64 x half> %v0, <64 x half> %v1) #0 { +; CHECK-LABEL: test_20: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = vmax(v1.hf,v0.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp olt <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v0 + ret <64 x half> %t1 +} + +define <64 x half> @test_21(<64 x half> %v0, <64 x half> %v1) #0 { +; CHECK-LABEL: test_21: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ole <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v0 + ret <64 x half> %t1 +} + +define <64 x half> @test_22(<64 x half> %v0, <64 x half> %v1) #0 { +; CHECK-LABEL: test_22: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = vmax(v0.hf,v1.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ogt <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + +define <64 x half> @test_23(<64 x half> %v0, <64 x half> %v1) #0 { +; CHECK-LABEL: test_23: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v1.hf,v0.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp oge <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + +define <32 x float> @test_30(<32 x float> %v0, <32 x float> %v1) #0 { +; CHECK-LABEL: test_30: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = vmax(v1.sf,v0.sf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp olt <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v0 + ret <32 x float> %t1 +} + +define <32 x float> @test_31(<32 x float> %v0, <32 x float> %v1) #0 { +; CHECK-LABEL: test_31: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ole <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v0 + ret <32 x float> %t1 +} + +define <32 x float> @test_32(<32 x float> %v0, <32 x float> %v1) #0 { +; CHECK-LABEL: test_32: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = vmax(v0.sf,v1.sf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ogt <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +define <32 x float> @test_33(<32 x float> %v0, <32 x float> %v1) #0 { +; CHECK-LABEL: test_33: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v1.sf,v0.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp oge <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +attributes #0 = { readnone nounwind "target-cpu"="hexagonv69" } + diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-float.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-float.ll new file mode 100644 index 000000000000..3937b2ed97f2 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-float.ll @@ -0,0 +1,466 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon -mattr=+hvxv69,+hvx-length128b,+hvx-qfloat < %s | FileCheck %s +; RUN: llc -march=hexagon -mattr=+hvxv69,+hvx-length128b,+hvx-ieee-fp < %s | FileCheck %s + +; --- Half + +define <64 x half> @test_00(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 { +; CHECK-LABEL: test_00: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.eq(v0.h,v1.h) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v2) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp oeq <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2 + ret <64 x half> %t1 +} + +define <64 x half> @test_01(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 { +; CHECK-LABEL: test_01: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.eq(v0.h,v1.h) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v2,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp one <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2 + ret <64 x half> %t1 +} + +define <64 x half> @test_02(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 { +; CHECK-LABEL: test_02: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v1.hf,v0.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v2) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp olt <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2 + ret <64 x half> %t1 +} + +define <64 x half> @test_03(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 { +; CHECK-LABEL: test_03: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v2,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ole <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2 + ret <64 x half> %t1 +} + +define <64 x half> @test_04(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 { +; CHECK-LABEL: test_04: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v2) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ogt <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2 + ret <64 x half> %t1 +} + +define <64 x half> @test_05(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 { +; CHECK-LABEL: test_05: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v1.hf,v0.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v2,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp oge <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2 + ret <64 x half> %t1 +} + +define <64 x half> @test_0a(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 { +; CHECK-LABEL: test_0a: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 &= vcmp.eq(v0.h,v1.h) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp oeq <64 x half> %v0, %v1 + %q1 = trunc <64 x i16> %v2 to <64 x i1> + %q2 = and <64 x i1> %q0, %q1 + %t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + +define <64 x half> @test_0b(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 { +; CHECK-LABEL: test_0b: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 |= vcmp.eq(v0.h,v1.h) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp oeq <64 x half> %v0, %v1 + %q1 = trunc <64 x i16> %v2 to <64 x i1> + %q2 = or <64 x i1> %q0, %q1 + %t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + +define <64 x half> @test_0c(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 { +; CHECK-LABEL: test_0c: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 ^= vcmp.eq(v0.h,v1.h) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp oeq <64 x half> %v0, %v1 + %q1 = trunc <64 x i16> %v2 to <64 x i1> + %q2 = xor <64 x i1> %q0, %q1 + %t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + +define <64 x half> @test_0d(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 { +; CHECK-LABEL: test_0d: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 &= vcmp.gt(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp ogt <64 x half> %v0, %v1 + %q1 = trunc <64 x i16> %v2 to <64 x i1> + %q2 = and <64 x i1> %q0, %q1 + %t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + +define <64 x half> @test_0e(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 { +; CHECK-LABEL: test_0e: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 |= vcmp.gt(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp ogt <64 x half> %v0, %v1 + %q1 = trunc <64 x i16> %v2 to <64 x i1> + %q2 = or <64 x i1> %q0, %q1 + %t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + +define <64 x half> @test_0f(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 { +; CHECK-LABEL: test_0f: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 ^= vcmp.gt(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp ogt <64 x half> %v0, %v1 + %q1 = trunc <64 x i16> %v2 to <64 x i1> + %q2 = xor <64 x i1> %q0, %q1 + %t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + + +; --- Single + +define <32 x float> @test_10(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 { +; CHECK-LABEL: test_10: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.eq(v0.w,v1.w) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v2) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp oeq <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2 + ret <32 x float> %t1 +} + +define <32 x float> @test_11(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 { +; CHECK-LABEL: test_11: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.eq(v0.w,v1.w) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v2,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp one <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2 + ret <32 x float> %t1 +} + +define <32 x float> @test_12(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 { +; CHECK-LABEL: test_12: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v1.sf,v0.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v2) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp olt <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2 + ret <32 x float> %t1 +} + +define <32 x float> @test_13(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 { +; CHECK-LABEL: test_13: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v2,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ole <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2 + ret <32 x float> %t1 +} + +define <32 x float> @test_14(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 { +; CHECK-LABEL: test_14: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v2) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ogt <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2 + ret <32 x float> %t1 +} + +define <32 x float> @test_15(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 { +; CHECK-LABEL: test_15: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v1.sf,v0.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v2,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp oge <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2 + ret <32 x float> %t1 +} + +define <32 x float> @test_1a(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 { +; CHECK-LABEL: test_1a: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 &= vcmp.eq(v0.w,v1.w) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp oeq <32 x float> %v0, %v1 + %q1 = trunc <32 x i32> %v2 to <32 x i1> + %q2 = and <32 x i1> %q0, %q1 + %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +define <32 x float> @test_1b(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 { +; CHECK-LABEL: test_1b: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 |= vcmp.eq(v0.w,v1.w) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp oeq <32 x float> %v0, %v1 + %q1 = trunc <32 x i32> %v2 to <32 x i1> + %q2 = or <32 x i1> %q0, %q1 + %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +define <32 x float> @test_1c(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 { +; CHECK-LABEL: test_1c: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 ^= vcmp.eq(v0.w,v1.w) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp oeq <32 x float> %v0, %v1 + %q1 = trunc <32 x i32> %v2 to <32 x i1> + %q2 = xor <32 x i1> %q0, %q1 + %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +define <32 x float> @test_1d(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 { +; CHECK-LABEL: test_1d: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 &= vcmp.gt(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp ogt <32 x float> %v0, %v1 + %q1 = trunc <32 x i32> %v2 to <32 x i1> + %q2 = and <32 x i1> %q0, %q1 + %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +define <32 x float> @test_1e(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 { +; CHECK-LABEL: test_1e: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 |= vcmp.gt(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp ogt <32 x float> %v0, %v1 + %q1 = trunc <32 x i32> %v2 to <32 x i1> + %q2 = or <32 x i1> %q0, %q1 + %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +define <32 x float> @test_1f(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 { +; CHECK-LABEL: test_1f: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 ^= vcmp.gt(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp ogt <32 x float> %v0, %v1 + %q1 = trunc <32 x i32> %v2 to <32 x i1> + %q2 = xor <32 x i1> %q0, %q1 + %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +attributes #0 = { nounwind readnone "target-cpu"="hexagonv69" } From 4943cda3988af2d4996a740b9ab14eb52579cda7 Mon Sep 17 00:00:00 2001 From: Mogball Date: Thu, 30 Dec 2021 20:39:22 +0000 Subject: [PATCH 254/992] [mlir][arith] fixing dependencies on memref/arith --- mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp | 1 - utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 12 +++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp index 048e4d89186c..641cd6f9324d 100644 --- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp +++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp @@ -8,7 +8,6 @@ #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/CommonFolders.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/IR/Builders.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/OpImplementation.h" diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index ea11a63d6154..00b339e1fa5c 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -965,6 +965,7 @@ td_library( ], includes = ["include"], deps = [ + ":ArithmeticOpsTdFiles", ":LoopLikeInterfaceTdFiles", ":OpBaseTdFiles", ":SideEffectInterfacesTdFiles", @@ -985,11 +986,17 @@ gentbl_cc_library( "include/mlir/Dialect/Affine/IR/AffineOps.cpp.inc", ), ( - ["-gen-dialect-decls"], + [ + "-gen-dialect-decls", + "-dialect=affine", + ], "include/mlir/Dialect/Affine/IR/AffineOpsDialect.h.inc", ), ( - ["-gen-dialect-defs"], + [ + "-gen-dialect-defs", + "-dialect=affine", + ], "include/mlir/Dialect/Affine/IR/AffineOpsDialect.cpp.inc", ), ], @@ -1864,7 +1871,6 @@ td_library( name = "StdOpsTdFiles", srcs = [ "include/mlir/Dialect/StandardOps/IR/Ops.td", - "include/mlir/Dialect/StandardOps/IR/StandardOpsBase.td", ], includes = ["include"], deps = [ From ee8e81b40e03d8f46224fab053b10b722d982357 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 30 Dec 2021 15:43:07 -0500 Subject: [PATCH 255/992] [libc++][NFC] Fix incorrect synopsis in transform_view test --- .../range.adaptors/range.transform/iterator/iter_move.pass.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/iter_move.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/iter_move.pass.cpp index 4ec70792d07e..d492df53e80e 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/iter_move.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/iter_move.pass.cpp @@ -10,7 +10,8 @@ // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: libcpp-has-no-incomplete-ranges -// transform_view::::operator[] +// friend constexpr decltype(auto) iter_move(const iterator& i) +// noexcept(noexcept(invoke(i.parent_->fun_, *i.current_))) #include From db83e3e5071afeb161ce3cdc246668ace9e1bb8e Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 30 Dec 2021 12:38:36 -0800 Subject: [PATCH 256/992] [Hexagon] Generate HVX/FP arithmetic instructions Co-authored-by: Anirudh Sundar Subramaniam Co-authored-by: Sumanth Gundapaneni Co-authored-by: Joshua Herrera --- .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 12 ++ llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 46 +++++ .../CodeGen/Hexagon/autohvx/arith-float.ll | 167 ++++++++++++++++++ 3 files changed, 225 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/arith-float.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 80287518fa56..a65ceccb60a1 100755 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -93,6 +93,12 @@ HexagonTargetLowering::initializeHVXLowering() { Subtarget.useHVXFloatingPoint()) { setOperationAction(ISD::FMINNUM, MVT::v64f16, Legal); setOperationAction(ISD::FMAXNUM, MVT::v64f16, Legal); + setOperationAction(ISD::FADD, MVT::v64f16, Legal); + setOperationAction(ISD::FSUB, MVT::v64f16, Legal); + setOperationAction(ISD::FMUL, MVT::v64f16, Legal); + setOperationAction(ISD::FADD, MVT::v32f32, Legal); + setOperationAction(ISD::FSUB, MVT::v32f32, Legal); + setOperationAction(ISD::FMUL, MVT::v32f32, Legal); setOperationAction(ISD::FMINNUM, MVT::v32f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::v32f32, Legal); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64f16, Custom); @@ -126,6 +132,9 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::LOAD, MVT::v64f32, Custom); setOperationAction(ISD::STORE, MVT::v64f32, Custom); + setOperationAction(ISD::FADD, MVT::v64f32, Custom); + setOperationAction(ISD::FSUB, MVT::v64f32, Custom); + setOperationAction(ISD::FMUL, MVT::v64f32, Custom); setOperationAction(ISD::FMINNUM, MVT::v64f32, Custom); setOperationAction(ISD::FMAXNUM, MVT::v64f32, Custom); setOperationAction(ISD::VSELECT, MVT::v64f32, Custom); @@ -2291,6 +2300,9 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CTLZ: case ISD::CTTZ: case ISD::MUL: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: case ISD::FMINNUM: case ISD::FMAXNUM: case ISD::MULHS: diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index f72f02eb9cba..33bf8ed71a9c 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -453,6 +453,52 @@ let Predicates = [UseHVX] in { (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; } +// For now, we always deal with vector floating point in SF mode. +class OpR_RR_pat_conv + : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), + (V6_vconv_sf_qf32 (VecF32 (MI RsPred:$Rs, RtPred:$Rt)))>; + +class OpR_RR_pat_conv_hf + : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), + (V6_vconv_hf_qf16 (VecF16 (MI RsPred:$Rs, RtPred:$Rt)))>; + +let Predicates = [UseHVXV68, UseHVXQFloat] in { + def: OpR_RR_pat_conv_hf, VecF16, HVF16>; + def: OpR_RR_pat_conv_hf, VecF16, HVF16>; + def: OpR_RR_pat_conv_hf, VecF16, HVF16>; + def: OpR_RR_pat_conv, VecF32, HVF32>; + def: OpR_RR_pat_conv, VecF32, HVF32>; + def: OpR_RR_pat_conv, VecF32, HVF32>; + + // For now we assume that the fp32 register is always coming in as IEEE float + // since the qfloat arithmetic instructions above always generate the + // accompanying conversions as part of their pattern + def: Pat<(VecF16 (pf1 HWF32:$Vuu)), + (V6_vdealh (V6_vconv_hf_qf32 + (VecPF32 (Combinev (V6_vadd_sf (HiVec HvxWR:$Vuu), (V6_vd0)), + (V6_vadd_sf (LoVec HvxWR:$Vuu), (V6_vd0)) + ))))>; + +} + +// HVX IEEE arithmetic Instructions +let Predicates = [UseHVXV68, UseHVXIEEEFP] in { + def: Pat<(fadd HVF16:$Rs, HVF16:$Rt), + (V6_vadd_hf_hf HVF16:$Rs, HVF16:$Rt)>; + def: Pat<(fadd HVF32:$Rs, HVF32:$Rt), + (V6_vadd_sf_sf HVF32:$Rs, HVF32:$Rt)>; + def: Pat<(fsub HVF16:$Rs, HVF16:$Rt), + (V6_vsub_hf_hf HVF16:$Rs, HVF16:$Rt)>; + def: Pat<(fsub HVF32:$Rs, HVF32:$Rt), + (V6_vsub_sf_sf HVF32:$Rs, HVF32:$Rt)>; + def: Pat<(fmul HVF16:$Rs, HVF16:$Rt), + (V6_vmpy_hf_hf HVF16:$Rs, HVF16:$Rt)>; + def: Pat<(fmul HVF32:$Rs, HVF32:$Rt), + (V6_vmpy_sf_sf HVF32:$Rs, HVF32:$Rt)>; +} + let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { def: Pat<(vselect HQ16:$Qu, HVF16:$Vs, HVF16:$Vt), (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; diff --git a/llvm/test/CodeGen/Hexagon/autohvx/arith-float.ll b/llvm/test/CodeGen/Hexagon/autohvx/arith-float.ll new file mode 100644 index 000000000000..0ba7f2c04601 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/arith-float.ll @@ -0,0 +1,167 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +define <64 x half> @f0(<64 x half> %a0, <64 x half> %a1) #0 { +; CHECK-LABEL: f0: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.qf16 = vadd(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = v0.qf16 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fadd <64 x half> %a0, %a1 + ret <64 x half> %v0 +} + +define <32 x float> @f1(<32 x float> %a0, <32 x float> %a1) #0 { +; CHECK-LABEL: f1: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.qf32 = vadd(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = v0.qf32 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fadd <32 x float> %a0, %a1 + ret <32 x float> %v0 +} + +define <64 x half> @f2(<64 x half> %a0, <64 x half> %a1) #0 { +; CHECK-LABEL: f2: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.qf16 = vsub(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = v0.qf16 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fsub <64 x half> %a0, %a1 + ret <64 x half> %v0 +} + +define <32 x float> @f3(<32 x float> %a0, <32 x float> %a1) #0 { +; CHECK-LABEL: f3: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.qf32 = vsub(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = v0.qf32 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fsub <32 x float> %a0, %a1 + ret <32 x float> %v0 +} + +define <64 x half> @f4(<64 x half> %a0, <64 x half> %a1) #0 { +; CHECK-LABEL: f4: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.qf16 = vmpy(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = v0.qf16 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fmul <64 x half> %a0, %a1 + ret <64 x half> %v0 +} + +define <32 x float> @f5(<32 x float> %a0, <32 x float> %a1) #0 { +; CHECK-LABEL: f5: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.qf32 = vmpy(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = v0.qf32 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fmul <32 x float> %a0, %a1 + ret <32 x float> %v0 +} + +define <64 x half> @f6(<64 x half> %a0, <64 x half> %a1) #1 { +; CHECK-LABEL: f6: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = vadd(v0.hf,v1.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fadd <64 x half> %a0, %a1 + ret <64 x half> %v0 +} + +define <32 x float> @f7(<32 x float> %a0, <32 x float> %a1) #1 { +; CHECK-LABEL: f7: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = vadd(v0.sf,v1.sf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fadd <32 x float> %a0, %a1 + ret <32 x float> %v0 +} + +define <64 x half> @f8(<64 x half> %a0, <64 x half> %a1) #1 { +; CHECK-LABEL: f8: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = vsub(v0.hf,v1.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fsub <64 x half> %a0, %a1 + ret <64 x half> %v0 +} + +define <32 x float> @f9(<32 x float> %a0, <32 x float> %a1) #1 { +; CHECK-LABEL: f9: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = vsub(v0.sf,v1.sf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fsub <32 x float> %a0, %a1 + ret <32 x float> %v0 +} + +define <64 x half> @f10(<64 x half> %a0, <64 x half> %a1) #1 { +; CHECK-LABEL: f10: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = vmpy(v0.hf,v1.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fmul <64 x half> %a0, %a1 + ret <64 x half> %v0 +} + +define <32 x float> @f11(<32 x float> %a0, <32 x float> %a1) #1 { +; CHECK-LABEL: f11: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = vmpy(v0.sf,v1.sf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fmul <32 x float> %a0, %a1 + ret <32 x float> %v0 +} + +attributes #0 = { nounwind "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" } +attributes #1 = { nounwind "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-ieee-fp" } From e0efedd2c33bf2bef232f255f1449a18d2b61d77 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 30 Dec 2021 13:08:36 -0800 Subject: [PATCH 257/992] [SLP][NFC]Fix non-determinism in reordering, NFC. Need to clear CurrentOrder order mask if it is determined that extractelements form identity order and need to use a vector-like construct when iterating over ordered entries in the reorderTopToBottom function. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 37ae13666f7a..1dd31dcae253 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3040,7 +3040,7 @@ Optional BoUpSLP::getReorderingData(const TreeEntry &TE, void BoUpSLP::reorderTopToBottom() { // Maps VF to the graph nodes. - DenseMap> VFToOrderedEntries; + DenseMap> VFToOrderedEntries; // ExtractElement gather nodes which can be vectorized and need to handle // their ordering. DenseMap GathersToOrders; @@ -3066,7 +3066,7 @@ void BoUpSLP::reorderTopToBottom() { // Try to find the most profitable order. We just are looking for the most // used order and reorder scalar elements in the nodes according to this // mostly used order. - const SmallPtrSetImpl &OrderedEntries = It->getSecond(); + ArrayRef OrderedEntries = It->second.getArrayRef(); // All operands are reordered and used only in this node - propagate the // most used order to the user node. MapVector VL, Value *OpValue, CurrentOrder.clear(); return false; } + if (ShouldKeepOrder) + CurrentOrder.clear(); return ShouldKeepOrder; } From 441de75f69e975b0c7690044560520f8538b4efb Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 30 Dec 2021 13:34:45 -0800 Subject: [PATCH 258/992] [lld][docs] Update _templates/indexsidebar.html after Bugzilla->GitHub issue migration --- lld/docs/_templates/indexsidebar.html | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lld/docs/_templates/indexsidebar.html b/lld/docs/_templates/indexsidebar.html index 588be9309bde..f9ecb724153d 100644 --- a/lld/docs/_templates/indexsidebar.html +++ b/lld/docs/_templates/indexsidebar.html @@ -1,4 +1,9 @@

Bugs

-

lld bugs should be reported at the - LLVM Bugzilla.

+

+To report bugs, please visit +PE/COFF, +ELF, +Mach-O, or +WebAssembly. +

From 59442a54608f84636c8e167fb38c3e27667e4671 Mon Sep 17 00:00:00 2001 From: MaheshRavishankar Date: Thu, 30 Dec 2021 14:01:38 -0800 Subject: [PATCH 259/992] [mlir][Linalg] Change signature of `get(Parallel/Reduce/Window)Dims` method. These method currently takes a SmallVector & as an argument to return the dims as AffineExpr. This creation of AffineExpr objects is unnecessary. Differential Revision: https://reviews.llvm.org/D116422 --- mlir/include/mlir/Dialect/Linalg/IR/Linalg.h | 2 +- mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td | 6 +++--- mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp | 7 +++---- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 5 ++--- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/Linalg.h b/mlir/include/mlir/Dialect/Linalg/IR/Linalg.h index 61ed0fe2d778..4e1a02177f1c 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/Linalg.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/Linalg.h @@ -90,7 +90,7 @@ SmallVector concat(ArrayRef a, /// Return the dims that are `iteratorTypeName` loops in the LinalgOp `op`. /// Assumes `op` is a LinalgOp. void getDimsOfType(Operation *op, StringRef iteratorTypeName, - SmallVectorImpl &res); + SmallVectorImpl &res); namespace detail { LogicalResult verifyStructuredOpInterface(Operation *op); diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td index 66745096ded3..413c2cc18ace 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td @@ -158,7 +158,7 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { }], /*retTy=*/"void", /*methodName=*/"getParallelDims", - /*args=*/(ins "SmallVectorImpl &":$res), + /*args=*/(ins "SmallVectorImpl &":$res), /*methodBody=*/"", /*defaultImplementation=*/[{ return getDimsOfType($_op, getParallelIteratorTypeName(), res); @@ -183,7 +183,7 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { }], /*retTy=*/"void", /*methodName=*/"getReductionDims", - /*args=*/(ins "SmallVectorImpl &":$res), + /*args=*/(ins "SmallVectorImpl &":$res), /*methodBody=*/"", /*defaultImplementation=*/[{ return getDimsOfType($_op, getReductionIteratorTypeName(), res); @@ -208,7 +208,7 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { }], /*retTy=*/"void", /*methodName=*/"getWindowDims", - /*args=*/(ins "SmallVectorImpl &":$res), + /*args=*/(ins "SmallVectorImpl &":$res), /*methodBody=*/"", /*defaultImplementation=*/[{ return getDimsOfType($_op.getOperation(), getWindowIteratorTypeName(), res); diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp index a0e38e46e020..7604d14eb7d1 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp @@ -612,7 +612,7 @@ LogicalResult mlir::linalg::detail::verifyStructuredOpInterface(Operation *op) { << indexingMap.getNumResults() << ")"; } - SmallVector redDims; + SmallVector redDims; linalgOp.getReductionDims(redDims); // Simplifying assumption: either full tensor or full buffer mode. @@ -638,9 +638,8 @@ LogicalResult mlir::linalg::detail::verifyStructuredOpInterface(Operation *op) { // Output tensor indexing map may not depend on reduction indices. for (OpOperand *opOperand : linalgOp.getOutputOperands()) { AffineMap indexingMap = linalgOp.getTiedIndexingMap(opOperand); - for (auto expr : indexingMap.getResults()) { - for (auto dim : redDims) { - unsigned pos = dim.cast().getPosition(); + for (AffineExpr expr : indexingMap.getResults()) { + for (unsigned pos : redDims) { if (expr.isFunctionOfDim(pos)) { std::string exprStr; { diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 7e864ab4722e..e7ddee95f387 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -2318,16 +2318,15 @@ static LogicalResult verify(IndexOp op) { /// Return the dims that are `iteratorTypeName` loops in the LinalgOp `op`. /// Assumes `op` is a LinalgOp. void mlir::linalg::getDimsOfType(Operation *op, StringRef iteratorTypeName, - SmallVectorImpl &res) { + SmallVectorImpl &res) { if (!cast(op).iterator_types()) return; unsigned dim = 0; - MLIRContext *ctx = op->getContext(); for (auto tn : cast(op).iterator_types().getAsValueRange()) { if (tn == iteratorTypeName) - res.push_back(getAffineDimExpr(dim, ctx)); + res.push_back(dim); ++dim; } } From a699b2f1c029c2f19fa5673370fdb7d68871ae03 Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Thu, 30 Dec 2021 14:49:50 -0800 Subject: [PATCH 260/992] [InstrProf] Mark counters as used in debug correlation mode In debug info correlation mode we do not emit the data globals so we need to explicitly mark the counter globals as used so they don't get stripped. Reviewed By: kyulee Differential Revision: https://reviews.llvm.org/D115981 --- llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index de34348606ef..73f208abcb07 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -997,8 +997,11 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx)); } - if (DebugInfoCorrelate) + if (DebugInfoCorrelate) { + // Mark the counter variable as used so that it isn't optimized out. + CompilerUsedVars.push_back(PD.RegionCounters); return PD.RegionCounters; + } // Create data variable. auto *IntPtrTy = M->getDataLayout().getIntPtrType(M->getContext()); From 7d659c6ac741f66927938d1e5c6b85deeb294882 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 30 Dec 2021 15:27:45 -0800 Subject: [PATCH 261/992] [LegalizeIntegerTypes] Rename NewLHS/NewRHS arguments to DAGTypeLegalizer::PromoteSetCCOperands. NFC The 'New' only makes sense in the context of these being output arguments, but they are also used as inputs first. Drop the 'New' and just call them LHS/RHS. Factored out of D116421. --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 2109ac01a83c..d977f4ea3dbd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1704,7 +1704,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { /// PromoteSetCCOperands - Promote the operands of a comparison. This code is /// shared among BR_CC, SELECT_CC, and SETCC handlers. -void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, +void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &LHS, SDValue &RHS, ISD::CondCode CCCode) { // We have to insert explicit sign or zero extends. Note that we could // insert sign extends for ALL conditions. For those operations where either @@ -1714,22 +1714,22 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, default: llvm_unreachable("Unknown integer comparison!"); case ISD::SETEQ: case ISD::SETNE: { - SDValue OpL = GetPromotedInteger(NewLHS); - SDValue OpR = GetPromotedInteger(NewRHS); + SDValue OpL = GetPromotedInteger(LHS); + SDValue OpR = GetPromotedInteger(RHS); // We would prefer to promote the comparison operand with sign extension. // If the width of OpL/OpR excluding the duplicated sign bits is no greater - // than the width of NewLHS/NewRH, we can avoid inserting real truncate + // than the width of LHS/RHS, we can avoid inserting real truncate // instruction, which is redundant eventually. unsigned OpLEffectiveBits = DAG.ComputeMinSignedBits(OpL); unsigned OpREffectiveBits = DAG.ComputeMinSignedBits(OpR); - if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() && - OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) { - NewLHS = OpL; - NewRHS = OpR; + if (OpLEffectiveBits <= LHS.getScalarValueSizeInBits() && + OpREffectiveBits <= RHS.getScalarValueSizeInBits()) { + LHS = OpL; + RHS = OpR; } else { - NewLHS = SExtOrZExtPromotedInteger(NewLHS); - NewRHS = SExtOrZExtPromotedInteger(NewRHS); + LHS = SExtOrZExtPromotedInteger(LHS); + RHS = SExtOrZExtPromotedInteger(RHS); } break; } @@ -1737,15 +1737,15 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, case ISD::SETUGT: case ISD::SETULE: case ISD::SETULT: - NewLHS = SExtOrZExtPromotedInteger(NewLHS); - NewRHS = SExtOrZExtPromotedInteger(NewRHS); + LHS = SExtOrZExtPromotedInteger(LHS); + RHS = SExtOrZExtPromotedInteger(RHS); break; case ISD::SETGE: case ISD::SETGT: case ISD::SETLT: case ISD::SETLE: - NewLHS = SExtPromotedInteger(NewLHS); - NewRHS = SExtPromotedInteger(NewRHS); + LHS = SExtPromotedInteger(LHS); + RHS = SExtPromotedInteger(RHS); break; } } From ed67d5a03aafcb867e1cee50438d88989fcf7f57 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 30 Dec 2021 16:08:26 -0800 Subject: [PATCH 262/992] [ELF] Switch cNamedSections to SmallVector. NFC Make it smaller --- lld/ELF/MarkLive.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index d2622e95e1e8..4d3d79d4ee80 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -68,8 +68,8 @@ template class MarkLive { SmallVector queue; // There are normally few input sections whose names are valid C - // identifiers, so we just store a std::vector instead of a multimap. - DenseMap> cNamedSections; + // identifiers, so we just store a SmallVector instead of a multimap. + DenseMap> cNamedSections; }; } // namespace From 09f8315bba391eac1dbdfbdc3fd654c0c0cbe3e7 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Mon, 2 Aug 2021 15:56:00 +0200 Subject: [PATCH 263/992] [Sema] a[x] has type T when a has type T* or T[], even when T is dependent This more precise type is useful for tools, e.g. fixes https://github.com/clangd/clangd/issues/831 Differential Revision: https://reviews.llvm.org/D107275 --- clang/lib/Sema/SemaExpr.cpp | 40 ++++++++++++++++++++-- clang/test/AST/ast-dump-array.cpp | 55 +++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 3 deletions(-) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 736e76152fe4..d454e4877bce 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -4645,6 +4645,38 @@ static bool isMSPropertySubscriptExpr(Sema &S, Expr *Base) { return isa(BaseNoParens); } +// Returns the type used for LHS[RHS], given one of LHS, RHS is type-dependent. +// Typically this is DependentTy, but can sometimes be more precise. +// +// There are cases when we could determine a non-dependent type: +// - LHS and RHS may have non-dependent types despite being type-dependent +// (e.g. unbounded array static members of the current instantiation) +// - one may be a dependent-sized array with known element type +// - one may be a dependent-typed valid index (enum in current instantiation) +// +// We *always* return a dependent type, in such cases it is DependentTy. +// This avoids creating type-dependent expressions with non-dependent types. +// FIXME: is this important to avoid? See https://reviews.llvm.org/D107275 +static QualType getDependentArraySubscriptType(Expr *LHS, Expr *RHS, + const ASTContext &Ctx) { + assert(LHS->isTypeDependent() || RHS->isTypeDependent()); + QualType LTy = LHS->getType(), RTy = RHS->getType(); + QualType Result = Ctx.DependentTy; + if (RTy->isIntegralOrUnscopedEnumerationType()) { + if (const PointerType *PT = LTy->getAs()) + Result = PT->getPointeeType(); + else if (const ArrayType *AT = LTy->getAsArrayTypeUnsafe()) + Result = AT->getElementType(); + } else if (LTy->isIntegralOrUnscopedEnumerationType()) { + if (const PointerType *PT = RTy->getAs()) + Result = PT->getPointeeType(); + else if (const ArrayType *AT = RTy->getAsArrayTypeUnsafe()) + Result = AT->getElementType(); + } + // Ensure we return a dependent type. + return Result->isDependentType() ? Result : Ctx.DependentTy; +} + ExprResult Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc, Expr *idx, SourceLocation rbLoc) { @@ -4737,8 +4769,9 @@ Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc, // Build an unanalyzed expression if either operand is type-dependent. if (getLangOpts().CPlusPlus && (base->isTypeDependent() || idx->isTypeDependent())) { - return new (Context) ArraySubscriptExpr(base, idx, Context.DependentTy, - VK_LValue, OK_Ordinary, rbLoc); + return new (Context) ArraySubscriptExpr( + base, idx, getDependentArraySubscriptType(base, idx, getASTContext()), + VK_LValue, OK_Ordinary, rbLoc); } // MSDN, property (C++) @@ -5492,7 +5525,8 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, if (LHSTy->isDependentType() || RHSTy->isDependentType()) { BaseExpr = LHSExp; IndexExpr = RHSExp; - ResultType = Context.DependentTy; + ResultType = + getDependentArraySubscriptType(LHSExp, RHSExp, getASTContext()); } else if (const PointerType *PTy = LHSTy->getAs()) { BaseExpr = LHSExp; IndexExpr = RHSExp; diff --git a/clang/test/AST/ast-dump-array.cpp b/clang/test/AST/ast-dump-array.cpp index 609ad31a0e42..418e4292680e 100644 --- a/clang/test/AST/ast-dump-array.cpp +++ b/clang/test/AST/ast-dump-array.cpp @@ -26,3 +26,58 @@ class array { using const_array_T_size = const T[Size]; // CHECK: `-DependentSizedArrayType 0x{{[^ ]*}} 'const T[Size]' dependent }; + +struct V {}; +template +void testDependentSubscript() { + U* a; + U b[5]; + Idx i{}; + enum E { One = 1 }; + + // Can types of subscript expressions can be determined? + // LHS is a type-dependent array, RHS is a known integer type. + a[1]; + // CHECK: ArraySubscriptExpr {{.*}}line:[[@LINE-1]]{{.*}} 'U' + b[1]; + // CHECK: ArraySubscriptExpr {{.*}}line:[[@LINE-1]]{{.*}} 'U' + + // Reverse case: RHS is a type-dependent array, LHS is an integer. + 1[a]; + // CHECK: ArraySubscriptExpr {{.*}}line:[[@LINE-1]]{{.*}} 'U' + 1[b]; + // CHECK: ArraySubscriptExpr {{.*}}line:[[@LINE-1]]{{.*}} 'U' + + // LHS is a type-dependent array, RHS is type-dependent. + a[i]; + // CHECK: ArraySubscriptExpr {{.*}}line:[[@LINE-1]]{{.*}} '' + b[i]; + // CHECK: ArraySubscriptExpr {{.*}}line:[[@LINE-1]]{{.*}} '' + + V *a2; + V b2[5]; + + // LHS is a known array, RHS is type-dependent. + a2[i]; + // CHECK: ArraySubscriptExpr {{.*}}line:[[@LINE-1]]{{.*}} '' + b2[i]; + // CHECK: ArraySubscriptExpr {{.*}}line:[[@LINE-1]]{{.*}} '' + + // LHS is a known array, RHS is a type-dependent index. + // We know the element type is V, but insist on some dependent type. + a2[One]; + // CHECK: ArraySubscriptExpr {{.*}}line:[[@LINE-1]]{{.*}} '' + b2[One]; + // CHECK: ArraySubscriptExpr {{.*}}line:[[@LINE-1]]{{.*}} '' + + V b3[N]; + // LHS is an array with dependent bounds but known elements. + // We insist on a dependent type. + b3[0]; + // CHECK: ArraySubscriptExpr {{.*}}line:[[@LINE-1]]{{.*}} '' + + U b4[N]; + // LHS is an array with dependent bounds and dependent elements. + b4[0]; + // CHECK: ArraySubscriptExpr {{.*}}line:[[@LINE-1]]{{.*}} 'U' +} From 7cd109b92c72855937273a6c8ab19016fbe27d33 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Thu, 30 Dec 2021 10:33:25 -0500 Subject: [PATCH 264/992] [asan] Additionnal prologue decoding for WinSDK 10.0.22000 Fixes interception of atoi() entry point. --- compiler-rt/lib/interception/interception_win.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp index 38b8c058246a..9289e06b88fc 100644 --- a/compiler-rt/lib/interception/interception_win.cpp +++ b/compiler-rt/lib/interception/interception_win.cpp @@ -602,6 +602,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { case 0x246c8948: // 48 89 6C 24 XX : mov QWORD ptr [rsp + XX], rbp case 0x245c8948: // 48 89 5c 24 XX : mov QWORD PTR [rsp + XX], rbx case 0x24748948: // 48 89 74 24 XX : mov QWORD PTR [rsp + XX], rsi + case 0x247c8948: // 48 89 7c 24 XX : mov QWORD PTR [rsp + XX], rdi case 0x244C8948: // 48 89 4C 24 XX : mov QWORD PTR [rsp + XX], rcx case 0x24548948: // 48 89 54 24 XX : mov QWORD PTR [rsp + XX], rdx case 0x244c894c: // 4c 89 4c 24 XX : mov QWORD PTR [rsp + XX], r9 From 5cd0b817e2398d9643f74728970fe4c65776c012 Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Thu, 30 Dec 2021 20:32:49 -0800 Subject: [PATCH 265/992] [mlir] Allow IntegerAttr to parse zero width integers. https://reviews.llvm.org/D109555 added support to APInt for this, so the special case to disable it is no longer valid. It is in fact legal to construct these programmatically today, and they print properly but do not parse. Justification: zero bit integers arise naturally in various bit reduction optimization problems, and having them defined for MLIR reduces special casing. I think there is a solid case for i0 and ui0 being supported. I'm less convinced about si0 and opted to just allow the parser to round-trip values that already verify. The counter argument is that the proper singular value for an si0 is -1. But the counter to this counter is that the sign bit is N-1, which does not exist for si0 and it is not unreasonable to consider this non-existent bit to be 0. Various sources consider it having the singular value "0" to be the least surprising. Reviewed By: lattner Differential Revision: https://reviews.llvm.org/D116413 --- mlir/lib/Parser/AttributeParser.cpp | 11 +++---- mlir/test/IR/attribute.mlir | 46 +++++++++++++++++++++++++++++ mlir/test/IR/invalid-ops.mlir | 8 ----- 3 files changed, 52 insertions(+), 13 deletions(-) diff --git a/mlir/lib/Parser/AttributeParser.cpp b/mlir/lib/Parser/AttributeParser.cpp index 881e5b6d0e6d..d7087de8aad9 100644 --- a/mlir/lib/Parser/AttributeParser.cpp +++ b/mlir/lib/Parser/AttributeParser.cpp @@ -335,10 +335,6 @@ static Optional buildAttributeAPInt(Type type, bool isNegative, unsigned width = type.isIndex() ? IndexType::kInternalStorageBitWidth : type.getIntOrFloatBitWidth(); - // APInt cannot hold a zero bit value. - if (width == 0) - return llvm::None; - if (width > result.getBitWidth()) { result = result.zext(width); } else if (width < result.getBitWidth()) { @@ -350,7 +346,12 @@ static Optional buildAttributeAPInt(Type type, bool isNegative, result = result.trunc(width); } - if (isNegative) { + if (width == 0) { + // 0 bit integers cannot be negative and manipulation of their sign bit will + // assert, so short-cut validation here. + if (isNegative) + return llvm::None; + } else if (isNegative) { // The value is negative, we have an overflow if the sign bit is not set // in the negated apInt. result.negate(); diff --git a/mlir/test/IR/attribute.mlir b/mlir/test/IR/attribute.mlir index f8c07e37b63d..63c05c0cd057 100644 --- a/mlir/test/IR/attribute.mlir +++ b/mlir/test/IR/attribute.mlir @@ -150,8 +150,54 @@ func @int_attrs_pass() { } : () -> () return } + // ----- +//===----------------------------------------------------------------------===// +// Check that i0 is parsed and verified correctly. It can only have value 0. +// We check it explicitly because there are various special cases for it that +// are good to verify. +//===----------------------------------------------------------------------===// + +func @int0_attrs_pass() { + "test.i0_attr"() { + // CHECK: attr_00 = 0 : i0 + attr_00 = 0 : i0, + // CHECK: attr_01 = 0 : si0 + attr_01 = 0 : si0, + // CHECK: attr_02 = 0 : ui0 + attr_02 = 0 : ui0, + // CHECK: attr_03 = 0 : i0 + attr_03 = 0x0000 : i0, + // CHECK: attr_04 = 0 : si0 + attr_04 = 0x0000 : si0, + // CHECK: attr_05 = 0 : ui0 + attr_05 = 0x0000 : ui0 + } : () -> () + return +} + +// ----- + +func @int0_attrs_negative_fail() { + "test.i0_attr"() { + // expected-error @+1 {{integer constant out of range for attribute}} + attr_00 = -1 : i0 + } : () -> () + return +} + +// ----- + +func @int0_attrs_positive_fail() { + "test.i0_attr"() { + // expected-error @+1 {{integer constant out of range for attribute}} + attr_00 = 1 : i0 + } : () -> () + return +} + +// ----- func @wrong_int_attrs_signedness_fail() { // expected-error @+1 {{'si32_attr' failed to satisfy constraint: 32-bit signed integer attribute}} diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir index 2aae390af4d0..09237ccf036d 100644 --- a/mlir/test/IR/invalid-ops.mlir +++ b/mlir/test/IR/invalid-ops.mlir @@ -187,11 +187,3 @@ func @atomic_yield_type_mismatch(%I: memref<10xf32>, %i : index) { } return } - -// ----- - -func @no_zero_bit_integer_attrs() { - // expected-error @+1 {{integer constant out of range for attribute}} - %x = "some.op"(){value = 0 : i0} : () -> f32 - return -} From 05f82dc877a81b99c91a3f2e81dae895361ce1c7 Mon Sep 17 00:00:00 2001 From: jacquesguan Date: Fri, 31 Dec 2021 10:59:24 +0800 Subject: [PATCH 266/992] [RISCV] Fix incorrect cases of vmv.s.f in the VSETVLI insert pass. Fix incorrect cases of vmv.s.f and add test cases for it. Differential Revision: https://reviews.llvm.org/D116432 --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 42 ++++++------- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll | 63 +++++++++++++++++++ 2 files changed, 84 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index eab9ee916fd2..15a75ba411c0 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -467,27 +467,27 @@ static bool isScalarMoveInstr(const MachineInstr &MI) { case RISCV::PseudoVMV_S_X_MF2: case RISCV::PseudoVMV_S_X_MF4: case RISCV::PseudoVMV_S_X_MF8: - case RISCV::PseudoVFMV_F16_S_M1: - case RISCV::PseudoVFMV_F16_S_M2: - case RISCV::PseudoVFMV_F16_S_M4: - case RISCV::PseudoVFMV_F16_S_M8: - case RISCV::PseudoVFMV_F16_S_MF2: - case RISCV::PseudoVFMV_F16_S_MF4: - case RISCV::PseudoVFMV_F16_S_MF8: - case RISCV::PseudoVFMV_F32_S_M1: - case RISCV::PseudoVFMV_F32_S_M2: - case RISCV::PseudoVFMV_F32_S_M4: - case RISCV::PseudoVFMV_F32_S_M8: - case RISCV::PseudoVFMV_F32_S_MF2: - case RISCV::PseudoVFMV_F32_S_MF4: - case RISCV::PseudoVFMV_F32_S_MF8: - case RISCV::PseudoVFMV_F64_S_M1: - case RISCV::PseudoVFMV_F64_S_M2: - case RISCV::PseudoVFMV_F64_S_M4: - case RISCV::PseudoVFMV_F64_S_M8: - case RISCV::PseudoVFMV_F64_S_MF2: - case RISCV::PseudoVFMV_F64_S_MF4: - case RISCV::PseudoVFMV_F64_S_MF8: + case RISCV::PseudoVFMV_S_F16_M1: + case RISCV::PseudoVFMV_S_F16_M2: + case RISCV::PseudoVFMV_S_F16_M4: + case RISCV::PseudoVFMV_S_F16_M8: + case RISCV::PseudoVFMV_S_F16_MF2: + case RISCV::PseudoVFMV_S_F16_MF4: + case RISCV::PseudoVFMV_S_F16_MF8: + case RISCV::PseudoVFMV_S_F32_M1: + case RISCV::PseudoVFMV_S_F32_M2: + case RISCV::PseudoVFMV_S_F32_M4: + case RISCV::PseudoVFMV_S_F32_M8: + case RISCV::PseudoVFMV_S_F32_MF2: + case RISCV::PseudoVFMV_S_F32_MF4: + case RISCV::PseudoVFMV_S_F32_MF8: + case RISCV::PseudoVFMV_S_F64_M1: + case RISCV::PseudoVFMV_S_F64_M2: + case RISCV::PseudoVFMV_S_F64_M4: + case RISCV::PseudoVFMV_S_F64_M8: + case RISCV::PseudoVFMV_S_F64_MF2: + case RISCV::PseudoVFMV_S_F64_MF4: + case RISCV::PseudoVFMV_S_F64_MF8: return true; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll index 7b97b72c9587..c8c50ac8dca9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -190,6 +190,55 @@ entry: ret %y } +define @test10( %a, double %b) nounwind { +; CHECK-LABEL: test10: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, tu, mu +; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: ret +entry: + %x = tail call i64 @llvm.riscv.vsetvlimax(i64 3, i64 0) + %y = call @llvm.riscv.vfmv.s.f.nxv1f64( + %a, double %b, i64 1) + ret %y +} + +define @test11( %a, double %b) nounwind { +; CHECK-LABEL: test11: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetivli a0, 6, e64, m1, tu, mu +; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: ret +entry: + %x = tail call i64 @llvm.riscv.vsetvli(i64 6, i64 3, i64 0) + %y = call @llvm.riscv.vfmv.s.f.nxv1f64( + %a, double %b, i64 2) + ret %y +} + +define @test12( %a, double %b, %mask) nounwind { +; CHECK-LABEL: test12: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.d.x ft0, a0 +; CHECK-NEXT: vsetivli zero, 9, e64, m1, tu, mu +; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: ret +entry: + %x = call @llvm.riscv.vfadd.mask.nxv1f64.f64( + %a, + %a, + %a, + %mask, + i64 9, + i64 0) + %y = call @llvm.riscv.vfmv.s.f.nxv1f64( + %x, double %b, i64 2) + ret %y +} + declare @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( , , @@ -198,10 +247,24 @@ declare @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( i64, i64); +declare @llvm.riscv.vfadd.mask.nxv1f64.f64( + , + , + , + , + i64, + i64); + declare @llvm.riscv.vmv.s.x.nxv1i64( , i64, i64); + +declare @llvm.riscv.vfmv.s.f.nxv1f64 + (, + double, + i64) + declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) declare @llvm.riscv.vle.nxv2i32.i64(* nocapture, i64) declare @llvm.riscv.vmslt.nxv2i32.i32.i64(, i32, i64) From 41454ab25645f28d53fe6db08ae41b9898142655 Mon Sep 17 00:00:00 2001 From: wangpc Date: Fri, 31 Dec 2021 14:01:53 +0800 Subject: [PATCH 267/992] [RISCV] Use constant pool for large integers For large integers (for example, magic numbers generated by TargetLowering::BuildSDIV when dividing by constant), we may need about 4~8 instructions to build them. In the same time, it just takes two instructions to load constants (with extra cycles to access memory), so it may be profitable to put these integers into constant pool. Reviewed By: asb, craig.topper Differential Revision: https://reviews.llvm.org/D114950 --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 33 +- llvm/lib/Target/RISCV/RISCVISelLowering.h | 5 +- llvm/lib/Target/RISCV/RISCVSubtarget.cpp | 25 + llvm/lib/Target/RISCV/RISCVSubtarget.h | 6 + .../CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll | 150 +- llvm/test/CodeGen/RISCV/div-by-constant.ll | 60 +- llvm/test/CodeGen/RISCV/div.ll | 20 +- llvm/test/CodeGen/RISCV/double-imm.ll | 13 +- llvm/test/CodeGen/RISCV/imm.ll | 8 +- llvm/test/CodeGen/RISCV/rv64zbb.ll | 150 +- llvm/test/CodeGen/RISCV/rv64zbp.ll | 1219 +++++------------ .../CodeGen/RISCV/rvv/bitreverse-sdnode.ll | 120 +- llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll | 352 ++--- llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll | 176 +-- llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll | 352 ++--- .../RISCV/rvv/fixed-vectors-bitreverse.ll | 158 +-- .../CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll | 270 +--- .../CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll | 208 +-- .../CodeGen/RISCV/rvv/fixed-vectors-cttz.ll | 270 +--- .../RISCV/rvv/fixed-vectors-int-buildvec.ll | 30 +- .../CodeGen/RISCV/rvv/fixed-vectors-int.ll | 213 +-- .../RISCV/rvv/fixed-vectors-mask-buildvec.ll | 80 +- .../CodeGen/RISCV/rvv/interleave-crash.ll | 138 +- llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll | 40 +- llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll | 40 +- llvm/test/CodeGen/RISCV/srem-lkk.ll | 10 +- .../CodeGen/RISCV/srem-seteq-illegal-types.ll | 188 +-- llvm/test/CodeGen/RISCV/srem-vector-lkk.ll | 537 +++----- llvm/test/CodeGen/RISCV/urem-lkk.ll | 14 +- llvm/test/CodeGen/RISCV/urem-vector-lkk.ll | 430 +++--- llvm/test/CodeGen/RISCV/vararg.ll | 60 +- 31 files changed, 1662 insertions(+), 3713 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index ab53c0eadfcf..c489569d2dcb 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -125,12 +125,37 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() { CurDAG->RemoveDeadNodes(); } -static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm, - const RISCVSubtarget &Subtarget) { +static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL, + const MVT VT, int64_t Imm, + const RISCVSubtarget &Subtarget) { + assert(VT == MVT::i64 && "Expecting MVT::i64"); + const RISCVTargetLowering *TLI = Subtarget.getTargetLowering(); + ConstantPoolSDNode *CP = cast(CurDAG->getConstantPool( + ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT)); + SDValue Addr = TLI->getAddr(CP, *CurDAG); + SDValue Offset = CurDAG->getTargetConstant(0, DL, VT); + // Since there is no data race, the chain can be the entry node. + SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset, + CurDAG->getEntryNode()); + MachineFunction &MF = CurDAG->getMachineFunction(); + MachineMemOperand *MemOp = MF.getMachineMemOperand( + MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, + LLT(VT), CP->getAlign()); + CurDAG->setNodeMemRefs(cast(Load), {MemOp}); + return Load; +} + +static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, + int64_t Imm, const RISCVSubtarget &Subtarget) { MVT XLenVT = Subtarget.getXLenVT(); RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); + // If Imm is expensive to build, then we put it into constant pool. + if (Subtarget.useConstantPoolForLargeInts() && + Seq.size() > Subtarget.getMaxBuildIntsCost()) + return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget); + SDNode *Result = nullptr; SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT); for (RISCVMatInt::Inst &Inst : Seq) { @@ -498,7 +523,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) Imm = SignExtend64(Imm, 32); - ReplaceNode(Node, selectImm(CurDAG, DL, Imm, *Subtarget)); + ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); return; } case ISD::FrameIndex: { @@ -774,7 +799,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { ShiftedC1 = SignExtend64(ShiftedC1, 32); // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). - SDNode *Imm = selectImm(CurDAG, DL, ShiftedC1, *Subtarget); + SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget); SDNode *SLLI = CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), CurDAG->getTargetConstant(LeadingZeros, DL, VT)); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 48c5ce730933..3f941937bd83 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -462,6 +462,8 @@ class RISCVTargetLowering : public TargetLowering { SelectionDAG &DAG) const override; SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; + template + SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const; bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override { @@ -544,9 +546,6 @@ class RISCVTargetLowering : public TargetLowering { bool IsRet, CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const; - template - SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const; - SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, bool UseGOT) const; SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index 1063134b8a6c..4d078bc7e0f3 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -50,6 +50,16 @@ static cl::opt RVVVectorELENMax( cl::desc("The maximum ELEN value to use for fixed length vectors."), cl::init(64), cl::Hidden); +static cl::opt RISCVDisableUsingConstantPoolForLargeInts( + "riscv-disable-using-constant-pool-for-large-ints", + cl::desc("Disable using constant pool for large integers."), + cl::init(false), cl::Hidden); + +static cl::opt RISCVMaxBuildIntsCost( + "riscv-max-build-ints-cost", + cl::desc("The maximum cost used for building integers."), cl::init(0), + cl::Hidden); + void RISCVSubtarget::anchor() {} RISCVSubtarget & @@ -110,6 +120,21 @@ const RegisterBankInfo *RISCVSubtarget::getRegBankInfo() const { return RegBankInfo.get(); } +bool RISCVSubtarget::useConstantPoolForLargeInts() const { + return !RISCVDisableUsingConstantPoolForLargeInts; +} + +unsigned RISCVSubtarget::getMaxBuildIntsCost() const { + // Loading integer from constant pool needs two instructions (the reason why + // the minimum cost is 2): an address calculation instruction and a load + // instruction. Usually, address calculation and instructions used for + // building integers (addi, slli, etc.) can be done in one cycle, so here we + // set the default cost to (LoadLatency + 1) if no threshold is provided. + return RISCVMaxBuildIntsCost == 0 + ? getSchedModel().LoadLatency + 1 + : std::max(2, RISCVMaxBuildIntsCost); +} + unsigned RISCVSubtarget::getMaxRVVVectorSizeInBits() const { assert(hasVInstructions() && "Tried to get vector length without Zve or V extension support!"); diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index d0330e6984a5..6b568eca4e55 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -157,6 +157,12 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; + bool useConstantPoolForLargeInts() const; + + // Maximum cost used for building integers, integers will be put into constant + // pool if exceeded. + unsigned getMaxBuildIntsCost() const; + // Return the known range for the bit length of RVV data registers. A value // of 0 means nothing is known about that particular limit beyond what's // implied by the architecture. diff --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll index 4a08a7d3f119..e7be4070fe02 100644 --- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll @@ -513,46 +513,24 @@ define i64 @test_cttz_i64(i64 %a) nounwind { ; RV64I-NEXT: addi a1, a0, -1 ; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI7_0) +; RV64I-NEXT: ld a1, %lo(.LCPI7_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI7_1) +; RV64I-NEXT: ld a2, %lo(.LCPI7_1)(a2) +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 13107 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, %hi(.LCPI7_2) +; RV64I-NEXT: ld a2, %lo(.LCPI7_2)(a2) +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 3855 -; RV64I-NEXT: addiw a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI7_3) +; RV64I-NEXT: ld a1, %lo(.LCPI7_3)(a1) ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -811,46 +789,24 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind { ; RV64I-NEXT: addi a1, a0, -1 ; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI11_0) +; RV64I-NEXT: ld a1, %lo(.LCPI11_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI11_1) +; RV64I-NEXT: ld a2, %lo(.LCPI11_1)(a2) +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 13107 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, %hi(.LCPI11_2) +; RV64I-NEXT: ld a2, %lo(.LCPI11_2)(a2) +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 3855 -; RV64I-NEXT: addiw a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI11_3) +; RV64I-NEXT: ld a1, %lo(.LCPI11_3)(a1) ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -982,46 +938,24 @@ define i64 @test_ctpop_i64(i64 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI13_0) +; RV64I-NEXT: ld a1, %lo(.LCPI13_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI13_1) +; RV64I-NEXT: ld a2, %lo(.LCPI13_1)(a2) +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 13107 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, %hi(.LCPI13_2) +; RV64I-NEXT: ld a2, %lo(.LCPI13_2)(a2) +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 3855 -; RV64I-NEXT: addiw a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI13_3) +; RV64I-NEXT: ld a1, %lo(.LCPI13_3)(a1) ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll index 8f354bd38b88..85c3603167a8 100644 --- a/llvm/test/CodeGen/RISCV/div-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll @@ -100,14 +100,8 @@ define i64 @udiv64_constant_no_add(i64 %a) nounwind { ; ; RV64-LABEL: udiv64_constant_no_add: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, 1035469 -; RV64-NEXT: addiw a1, a1, -819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -819 +; RV64-NEXT: lui a1, %hi(.LCPI2_0) +; RV64-NEXT: ld a1, %lo(.LCPI2_0)(a1) ; RV64-NEXT: mulhu a0, a0, a1 ; RV64-NEXT: srli a0, a0, 2 ; RV64-NEXT: ret @@ -129,14 +123,8 @@ define i64 @udiv64_constant_add(i64 %a) nounwind { ; ; RV64-LABEL: udiv64_constant_add: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, 4681 -; RV64-NEXT: addiw a1, a1, 585 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 585 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 585 -; RV64-NEXT: slli a1, a1, 13 -; RV64-NEXT: addi a1, a1, 1171 +; RV64-NEXT: lui a1, %hi(.LCPI3_0) +; RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) ; RV64-NEXT: mulhu a1, a0, a1 ; RV64-NEXT: sub a0, a0, a1 ; RV64-NEXT: srli a0, a0, 1 @@ -443,14 +431,8 @@ define i64 @sdiv64_constant_no_srai(i64 %a) nounwind { ; ; RV64-LABEL: sdiv64_constant_no_srai: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, 21845 -; RV64-NEXT: addiw a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1366 +; RV64-NEXT: lui a1, %hi(.LCPI12_0) +; RV64-NEXT: ld a1, %lo(.LCPI12_0)(a1) ; RV64-NEXT: mulh a0, a0, a1 ; RV64-NEXT: srli a1, a0, 63 ; RV64-NEXT: add a0, a0, a1 @@ -473,14 +455,8 @@ define i64 @sdiv64_constant_srai(i64 %a) nounwind { ; ; RV64-LABEL: sdiv64_constant_srai: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, 13107 -; RV64-NEXT: addiw a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 13 -; RV64-NEXT: addi a1, a1, 1639 +; RV64-NEXT: lui a1, %hi(.LCPI13_0) +; RV64-NEXT: ld a1, %lo(.LCPI13_0)(a1) ; RV64-NEXT: mulh a0, a0, a1 ; RV64-NEXT: srli a1, a0, 63 ; RV64-NEXT: srai a0, a0, 1 @@ -504,14 +480,8 @@ define i64 @sdiv64_constant_add_srai(i64 %a) nounwind { ; ; RV64-LABEL: sdiv64_constant_add_srai: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, 1017993 -; RV64-NEXT: addiw a1, a1, -1911 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1911 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1911 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1911 +; RV64-NEXT: lui a1, %hi(.LCPI14_0) +; RV64-NEXT: ld a1, %lo(.LCPI14_0)(a1) ; RV64-NEXT: mulh a1, a0, a1 ; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: srli a1, a0, 63 @@ -536,14 +506,8 @@ define i64 @sdiv64_constant_sub_srai(i64 %a) nounwind { ; ; RV64-LABEL: sdiv64_constant_sub_srai: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, 21845 -; RV64-NEXT: addiw a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 +; RV64-NEXT: lui a1, %hi(.LCPI15_0) +; RV64-NEXT: ld a1, %lo(.LCPI15_0)(a1) ; RV64-NEXT: mulh a1, a0, a1 ; RV64-NEXT: sub a0, a1, a0 ; RV64-NEXT: srli a1, a0, 63 diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll index 5d6b793b38a7..af6855e94ff6 100644 --- a/llvm/test/CodeGen/RISCV/div.ll +++ b/llvm/test/CodeGen/RISCV/div.ll @@ -222,14 +222,8 @@ define i64 @udiv64_constant(i64 %a) nounwind { ; ; RV64IM-LABEL: udiv64_constant: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, 1035469 -; RV64IM-NEXT: addiw a1, a1, -819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -819 +; RV64IM-NEXT: lui a1, %hi(.LCPI5_0) +; RV64IM-NEXT: ld a1, %lo(.LCPI5_0)(a1) ; RV64IM-NEXT: mulhu a0, a0, a1 ; RV64IM-NEXT: srli a0, a0, 2 ; RV64IM-NEXT: ret @@ -866,14 +860,8 @@ define i64 @sdiv64_constant(i64 %a) nounwind { ; ; RV64IM-LABEL: sdiv64_constant: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, 13107 -; RV64IM-NEXT: addiw a1, a1, 819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, 819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, 819 -; RV64IM-NEXT: slli a1, a1, 13 -; RV64IM-NEXT: addi a1, a1, 1639 +; RV64IM-NEXT: lui a1, %hi(.LCPI21_0) +; RV64IM-NEXT: ld a1, %lo(.LCPI21_0)(a1) ; RV64IM-NEXT: mulh a0, a0, a1 ; RV64IM-NEXT: srli a1, a0, 63 ; RV64IM-NEXT: srai a0, a0, 1 diff --git a/llvm/test/CodeGen/RISCV/double-imm.ll b/llvm/test/CodeGen/RISCV/double-imm.ll index ad4a1e5f4dce..7df02a84b9bd 100644 --- a/llvm/test/CodeGen/RISCV/double-imm.ll +++ b/llvm/test/CodeGen/RISCV/double-imm.ll @@ -5,9 +5,6 @@ ; RUN: | FileCheck -check-prefix=RV64IFD %s define double @double_imm() nounwind { -; TODO: Should probably prefer fld or ld on RV64 rather than materialising an -; expensive constant. -; ; RV32IFD-LABEL: double_imm: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: lui a0, 345155 @@ -18,14 +15,8 @@ define double @double_imm() nounwind { ; ; RV64IFD-LABEL: double_imm: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: lui a0, 512 -; RV64IFD-NEXT: addiw a0, a0, 1169 -; RV64IFD-NEXT: slli a0, a0, 15 -; RV64IFD-NEXT: addi a0, a0, -299 -; RV64IFD-NEXT: slli a0, a0, 14 -; RV64IFD-NEXT: addi a0, a0, 1091 -; RV64IFD-NEXT: slli a0, a0, 12 -; RV64IFD-NEXT: addi a0, a0, -744 +; RV64IFD-NEXT: lui a0, %hi(.LCPI0_0) +; RV64IFD-NEXT: ld a0, %lo(.LCPI0_0)(a0) ; RV64IFD-NEXT: ret ret double 3.1415926535897931159979634685441851615905761718750 } diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll index b7d7df6ced1f..ed8a839b03b6 100644 --- a/llvm/test/CodeGen/RISCV/imm.ll +++ b/llvm/test/CodeGen/RISCV/imm.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -riscv-disable-using-constant-pool-for-large-ints -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I -; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zba \ +; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+experimental-zba \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IZBA -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbs \ +; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+experimental-zbs \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IZBS ; Materializing constants diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll index 6893afc33535..0d892e650814 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -328,46 +328,24 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV64I-NEXT: srli a1, a0, 32 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI5_0) +; RV64I-NEXT: ld a1, %lo(.LCPI5_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI5_1) +; RV64I-NEXT: ld a2, %lo(.LCPI5_1)(a2) +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 13107 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, %hi(.LCPI5_2) +; RV64I-NEXT: ld a2, %lo(.LCPI5_2)(a2) +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 3855 -; RV64I-NEXT: addiw a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI5_3) +; RV64I-NEXT: ld a1, %lo(.LCPI5_3)(a1) ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -597,46 +575,24 @@ define i64 @cttz_i64(i64 %a) nounwind { ; RV64I-NEXT: addi a1, a0, -1 ; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI10_0) +; RV64I-NEXT: ld a1, %lo(.LCPI10_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI10_1) +; RV64I-NEXT: ld a2, %lo(.LCPI10_1)(a2) +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 13107 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, %hi(.LCPI10_2) +; RV64I-NEXT: ld a2, %lo(.LCPI10_2)(a2) +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 3855 -; RV64I-NEXT: addiw a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI10_3) +; RV64I-NEXT: ld a1, %lo(.LCPI10_3)(a1) ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -740,46 +696,24 @@ define i64 @ctpop_i64(i64 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI13_0) +; RV64I-NEXT: ld a1, %lo(.LCPI13_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI13_1) +; RV64I-NEXT: ld a2, %lo(.LCPI13_1)(a2) +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 13107 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, %hi(.LCPI13_2) +; RV64I-NEXT: ld a2, %lo(.LCPI13_2)(a2) +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 3855 -; RV64I-NEXT: addiw a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI13_3) +; RV64I-NEXT: ld a1, %lo(.LCPI13_3)(a1) ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll index a26823b32959..041c53622cb0 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll @@ -35,26 +35,14 @@ define signext i32 @gorc1_i32(i32 signext %a) nounwind { define i64 @gorc1_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc1_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 1 -; RV64I-NEXT: lui a3, 21845 -; RV64I-NEXT: addiw a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI1_0) +; RV64I-NEXT: ld a1, %lo(.LCPI1_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI1_1) +; RV64I-NEXT: ld a2, %lo(.LCPI1_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -103,26 +91,14 @@ define signext i32 @gorc2_i32(i32 signext %a) nounwind { define i64 @gorc2_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc2_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 2 -; RV64I-NEXT: lui a3, 13107 -; RV64I-NEXT: addiw a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI3_0) +; RV64I-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI3_1) +; RV64I-NEXT: ld a2, %lo(.LCPI3_1)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 2 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -187,48 +163,24 @@ define signext i32 @gorc3_i32(i32 signext %a) nounwind { define i64 @gorc3_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc3_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 1 -; RV64I-NEXT: lui a3, 21845 -; RV64I-NEXT: addiw a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI5_0) +; RV64I-NEXT: ld a1, %lo(.LCPI5_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI5_1) +; RV64I-NEXT: ld a2, %lo(.LCPI5_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 2 -; RV64I-NEXT: lui a3, 13107 -; RV64I-NEXT: addiw a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI5_2) +; RV64I-NEXT: ld a1, %lo(.LCPI5_2)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI5_3) +; RV64I-NEXT: ld a2, %lo(.LCPI5_3)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 2 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -283,26 +235,14 @@ define signext i32 @gorc4_i32(i32 signext %a) nounwind { define i64 @gorc4_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc4_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 1044721 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 4 -; RV64I-NEXT: lui a3, 3855 -; RV64I-NEXT: addiw a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI7_0) +; RV64I-NEXT: ld a1, %lo(.LCPI7_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI7_1) +; RV64I-NEXT: ld a2, %lo(.LCPI7_1)(a2) +; RV64I-NEXT: slli a3, a0, 4 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 4 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -367,48 +307,24 @@ define signext i32 @gorc5_i32(i32 signext %a) nounwind { define i64 @gorc5_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc5_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 1 -; RV64I-NEXT: lui a3, 21845 -; RV64I-NEXT: addiw a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI9_0) +; RV64I-NEXT: ld a1, %lo(.LCPI9_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI9_1) +; RV64I-NEXT: ld a2, %lo(.LCPI9_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 1044721 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 4 -; RV64I-NEXT: lui a3, 3855 -; RV64I-NEXT: addiw a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI9_2) +; RV64I-NEXT: ld a1, %lo(.LCPI9_2)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI9_3) +; RV64I-NEXT: ld a2, %lo(.LCPI9_3)(a2) +; RV64I-NEXT: slli a3, a0, 4 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 4 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -479,48 +395,24 @@ define signext i32 @gorc6_i32(i32 signext %a) nounwind { define i64 @gorc6_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc6_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 2 -; RV64I-NEXT: lui a3, 13107 -; RV64I-NEXT: addiw a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI11_0) +; RV64I-NEXT: ld a1, %lo(.LCPI11_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI11_1) +; RV64I-NEXT: ld a2, %lo(.LCPI11_1)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 2 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 1044721 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 4 -; RV64I-NEXT: lui a3, 3855 -; RV64I-NEXT: addiw a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI11_2) +; RV64I-NEXT: ld a1, %lo(.LCPI11_2)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI11_3) +; RV64I-NEXT: ld a2, %lo(.LCPI11_3)(a2) +; RV64I-NEXT: slli a3, a0, 4 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 4 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -608,70 +500,34 @@ define signext i32 @gorc7_i32(i32 signext %a) nounwind { define i64 @gorc7_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc7_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 1 -; RV64I-NEXT: lui a3, 21845 -; RV64I-NEXT: addiw a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI13_0) +; RV64I-NEXT: ld a1, %lo(.LCPI13_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI13_1) +; RV64I-NEXT: ld a2, %lo(.LCPI13_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 2 -; RV64I-NEXT: lui a3, 13107 -; RV64I-NEXT: addiw a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI13_2) +; RV64I-NEXT: ld a1, %lo(.LCPI13_2)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI13_3) +; RV64I-NEXT: ld a2, %lo(.LCPI13_3)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 2 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 1044721 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 4 -; RV64I-NEXT: lui a3, 3855 -; RV64I-NEXT: addiw a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI13_4) +; RV64I-NEXT: ld a1, %lo(.LCPI13_4)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI13_5) +; RV64I-NEXT: ld a2, %lo(.LCPI13_5)(a2) +; RV64I-NEXT: slli a3, a0, 4 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 4 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -732,22 +588,14 @@ define signext i32 @gorc8_i32(i32 signext %a) nounwind { define i64 @gorc8_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc8_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 8 -; RV64I-NEXT: lui a2, 1044496 -; RV64I-NEXT: addiw a2, a2, -255 -; RV64I-NEXT: slli a2, a2, 16 -; RV64I-NEXT: addi a2, a2, -255 -; RV64I-NEXT: slli a2, a2, 16 -; RV64I-NEXT: addi a2, a2, -256 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 8 -; RV64I-NEXT: lui a3, 4080 -; RV64I-NEXT: addiw a3, a3, 255 -; RV64I-NEXT: slli a3, a3, 16 -; RV64I-NEXT: addi a3, a3, 255 -; RV64I-NEXT: slli a3, a3, 16 -; RV64I-NEXT: addi a3, a3, 255 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI15_0) +; RV64I-NEXT: ld a1, %lo(.LCPI15_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI15_1) +; RV64I-NEXT: ld a2, %lo(.LCPI15_1)(a2) +; RV64I-NEXT: slli a3, a0, 8 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 8 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -917,32 +765,20 @@ define signext i32 @gorc2b_i32(i32 signext %a) nounwind { define i64 @gorc2b_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc2b_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI22_0) +; RV64I-NEXT: ld a1, %lo(.LCPI22_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI22_1) +; RV64I-NEXT: ld a2, %lo(.LCPI22_1)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a3, a3, a1 +; RV64I-NEXT: srli a4, a0, 2 +; RV64I-NEXT: and a4, a4, a2 +; RV64I-NEXT: or a0, a4, a0 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a3, a0, 2 -; RV64I-NEXT: lui a4, 13107 -; RV64I-NEXT: addiw a4, a4, 819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 819 -; RV64I-NEXT: and a3, a3, a4 -; RV64I-NEXT: or a0, a3, a0 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 2 -; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -1028,54 +864,30 @@ define signext i32 @gorc3b_i32(i32 signext %a) nounwind { define i64 @gorc3b_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc3b_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI24_0) +; RV64I-NEXT: ld a1, %lo(.LCPI24_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI24_1) +; RV64I-NEXT: ld a2, %lo(.LCPI24_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a3, a3, a1 +; RV64I-NEXT: srli a4, a0, 1 +; RV64I-NEXT: and a4, a4, a2 +; RV64I-NEXT: or a0, a4, a0 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: lui a3, %hi(.LCPI24_2) +; RV64I-NEXT: ld a3, %lo(.LCPI24_2)(a3) +; RV64I-NEXT: lui a4, %hi(.LCPI24_3) +; RV64I-NEXT: ld a4, %lo(.LCPI24_3)(a4) +; RV64I-NEXT: slli a5, a0, 2 +; RV64I-NEXT: and a3, a5, a3 +; RV64I-NEXT: srli a5, a0, 2 +; RV64I-NEXT: and a4, a5, a4 +; RV64I-NEXT: or a0, a4, a0 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a3, a0, 1 -; RV64I-NEXT: lui a4, 21845 -; RV64I-NEXT: addiw a4, a4, 1365 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 1365 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 1365 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 1365 -; RV64I-NEXT: and a3, a3, a4 -; RV64I-NEXT: or a0, a3, a0 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a3, 1035469 -; RV64I-NEXT: addiw a3, a3, -819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -820 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: srli a3, a0, 2 -; RV64I-NEXT: lui a5, 13107 -; RV64I-NEXT: addiw a5, a5, 819 -; RV64I-NEXT: slli a5, a5, 12 -; RV64I-NEXT: addi a5, a5, 819 -; RV64I-NEXT: slli a5, a5, 12 -; RV64I-NEXT: addi a5, a5, 819 -; RV64I-NEXT: slli a5, a5, 12 -; RV64I-NEXT: addi a5, a5, 819 -; RV64I-NEXT: and a3, a3, a5 -; RV64I-NEXT: or a0, a3, a0 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 1 -; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -1170,25 +982,13 @@ define signext i32 @grev1_i32(i32 signext %a) nounwind { define i64 @grev1_i64(i64 %a) nounwind { ; RV64I-LABEL: grev1_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI28_0) +; RV64I-NEXT: ld a1, %lo(.LCPI28_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI28_1) +; RV64I-NEXT: ld a2, %lo(.LCPI28_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -1234,25 +1034,13 @@ define signext i32 @grev2_i32(i32 signext %a) nounwind { define i64 @grev2_i64(i64 %a) nounwind { ; RV64I-LABEL: grev2_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI30_0) +; RV64I-NEXT: ld a1, %lo(.LCPI30_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI30_1) +; RV64I-NEXT: ld a2, %lo(.LCPI30_1)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: lui a2, 13107 -; RV64I-NEXT: addiw a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -1312,46 +1100,22 @@ define signext i32 @grev3_i32(i32 signext %a) nounwind { define i64 @grev3_i64(i64 %a) nounwind { ; RV64I-LABEL: grev3_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI32_0) +; RV64I-NEXT: ld a1, %lo(.LCPI32_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI32_1) +; RV64I-NEXT: ld a2, %lo(.LCPI32_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI32_2) +; RV64I-NEXT: ld a1, %lo(.LCPI32_2)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI32_3) +; RV64I-NEXT: ld a2, %lo(.LCPI32_3)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: lui a2, 13107 -; RV64I-NEXT: addiw a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -1402,25 +1166,13 @@ define signext i32 @grev4_i32(i32 signext %a) nounwind { define i64 @grev4_i64(i64 %a) nounwind { ; RV64I-LABEL: grev4_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 1044721 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI34_0) +; RV64I-NEXT: ld a1, %lo(.LCPI34_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI34_1) +; RV64I-NEXT: ld a2, %lo(.LCPI34_1)(a2) +; RV64I-NEXT: slli a3, a0, 4 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 4 -; RV64I-NEXT: lui a2, 3855 -; RV64I-NEXT: addiw a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -1480,46 +1232,22 @@ define signext i32 @grev5_i32(i32 signext %a) nounwind { define i64 @grev5_i64(i64 %a) nounwind { ; RV64I-LABEL: grev5_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI36_0) +; RV64I-NEXT: ld a1, %lo(.LCPI36_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI36_1) +; RV64I-NEXT: ld a2, %lo(.LCPI36_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 1044721 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI36_2) +; RV64I-NEXT: ld a1, %lo(.LCPI36_2)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI36_3) +; RV64I-NEXT: ld a2, %lo(.LCPI36_3)(a2) +; RV64I-NEXT: slli a3, a0, 4 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 4 -; RV64I-NEXT: lui a2, 3855 -; RV64I-NEXT: addiw a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -1585,46 +1313,22 @@ define signext i32 @grev6_i32(i32 signext %a) nounwind { define i64 @grev6_i64(i64 %a) nounwind { ; RV64I-LABEL: grev6_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI38_0) +; RV64I-NEXT: ld a1, %lo(.LCPI38_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI38_1) +; RV64I-NEXT: ld a2, %lo(.LCPI38_1)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: lui a2, 13107 -; RV64I-NEXT: addiw a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 1044721 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI38_2) +; RV64I-NEXT: ld a1, %lo(.LCPI38_2)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI38_3) +; RV64I-NEXT: ld a2, %lo(.LCPI38_3)(a2) +; RV64I-NEXT: slli a3, a0, 4 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 4 -; RV64I-NEXT: lui a2, 3855 -; RV64I-NEXT: addiw a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -1703,67 +1407,31 @@ define signext i32 @grev7_i32(i32 signext %a) nounwind { define i64 @grev7_i64(i64 %a) nounwind { ; RV64I-LABEL: grev7_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI40_0) +; RV64I-NEXT: ld a1, %lo(.LCPI40_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI40_1) +; RV64I-NEXT: ld a2, %lo(.LCPI40_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI40_2) +; RV64I-NEXT: ld a1, %lo(.LCPI40_2)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI40_3) +; RV64I-NEXT: ld a2, %lo(.LCPI40_3)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: lui a2, 13107 -; RV64I-NEXT: addiw a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 1044721 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI40_4) +; RV64I-NEXT: ld a1, %lo(.LCPI40_4)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI40_5) +; RV64I-NEXT: ld a2, %lo(.LCPI40_5)(a2) +; RV64I-NEXT: slli a3, a0, 4 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 4 -; RV64I-NEXT: lui a2, 3855 -; RV64I-NEXT: addiw a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -1819,21 +1487,13 @@ define signext i32 @grev8_i32(i32 signext %a) nounwind { define i64 @grev8_i64(i64 %a) nounwind { ; RV64I-LABEL: grev8_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 8 -; RV64I-NEXT: lui a2, 1044496 -; RV64I-NEXT: addiw a2, a2, -255 -; RV64I-NEXT: slli a2, a2, 16 -; RV64I-NEXT: addi a2, a2, -255 -; RV64I-NEXT: slli a2, a2, 16 -; RV64I-NEXT: addi a2, a2, -256 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI42_0) +; RV64I-NEXT: ld a1, %lo(.LCPI42_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI42_1) +; RV64I-NEXT: ld a2, %lo(.LCPI42_1)(a2) +; RV64I-NEXT: slli a3, a0, 8 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: lui a2, 4080 -; RV64I-NEXT: addiw a2, a2, 255 -; RV64I-NEXT: slli a2, a2, 16 -; RV64I-NEXT: addi a2, a2, 255 -; RV64I-NEXT: slli a2, a2, 16 -; RV64I-NEXT: addi a2, a2, 255 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -1993,46 +1653,22 @@ define signext i32 @grev3b_i32(i32 signext %a) nounwind { define i64 @grev3b_i64(i64 %a) nounwind { ; RV64I-LABEL: grev3b_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI49_0) +; RV64I-NEXT: ld a1, %lo(.LCPI49_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI49_1) +; RV64I-NEXT: ld a2, %lo(.LCPI49_1)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: lui a2, 13107 -; RV64I-NEXT: addiw a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI49_2) +; RV64I-NEXT: ld a1, %lo(.LCPI49_2)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI49_3) +; RV64I-NEXT: ld a2, %lo(.LCPI49_3)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -2109,52 +1745,28 @@ define signext i32 @grev2b_i32(i32 signext %a) nounwind { define i64 @grev2b_i64(i64 %a) nounwind { ; RV64I-LABEL: grev2b_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI51_0) +; RV64I-NEXT: ld a1, %lo(.LCPI51_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI51_1) +; RV64I-NEXT: ld a2, %lo(.LCPI51_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a3, a3, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: lui a3, 21845 -; RV64I-NEXT: addiw a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a4, 1035469 -; RV64I-NEXT: addiw a4, a4, -819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, -819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, -819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, -820 -; RV64I-NEXT: and a1, a1, a4 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: lui a3, %hi(.LCPI51_2) +; RV64I-NEXT: ld a3, %lo(.LCPI51_2)(a3) +; RV64I-NEXT: lui a4, %hi(.LCPI51_3) +; RV64I-NEXT: ld a4, %lo(.LCPI51_3)(a4) +; RV64I-NEXT: slli a5, a0, 2 +; RV64I-NEXT: and a3, a5, a3 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: lui a4, 13107 -; RV64I-NEXT: addiw a4, a4, 819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 819 ; RV64I-NEXT: and a0, a0, a4 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; @@ -2244,57 +1856,33 @@ define signext i32 @grev0_i32(i32 signext %a) nounwind { define i64 @grev0_i64(i64 %a) nounwind { ; RV64I-LABEL: grev0_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI53_0) +; RV64I-NEXT: ld a1, %lo(.LCPI53_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI53_1) +; RV64I-NEXT: ld a2, %lo(.LCPI53_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a3, a3, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: lui a3, 21845 -; RV64I-NEXT: addiw a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a4, 1035469 -; RV64I-NEXT: addiw a4, a4, -819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, -819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, -819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, -820 -; RV64I-NEXT: and a1, a1, a4 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: lui a3, %hi(.LCPI53_2) +; RV64I-NEXT: ld a3, %lo(.LCPI53_2)(a3) +; RV64I-NEXT: lui a4, %hi(.LCPI53_3) +; RV64I-NEXT: ld a4, %lo(.LCPI53_3)(a4) +; RV64I-NEXT: slli a5, a0, 2 +; RV64I-NEXT: and a5, a5, a3 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: lui a5, 13107 -; RV64I-NEXT: addiw a5, a5, 819 -; RV64I-NEXT: slli a5, a5, 12 -; RV64I-NEXT: addi a5, a5, 819 -; RV64I-NEXT: slli a5, a5, 12 -; RV64I-NEXT: addi a5, a5, 819 -; RV64I-NEXT: slli a5, a5, 12 -; RV64I-NEXT: addi a5, a5, 819 -; RV64I-NEXT: and a0, a0, a5 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a4 +; RV64I-NEXT: or a0, a5, a0 +; RV64I-NEXT: slli a5, a0, 1 +; RV64I-NEXT: and a1, a5, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: and a1, a1, a4 +; RV64I-NEXT: and a1, a1, a3 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a5 +; RV64I-NEXT: and a0, a0, a4 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; @@ -2676,43 +2264,25 @@ define i64 @bitreverse_i64(i64 %a) nounwind { ; RV64I-NEXT: and a3, a4, a3 ; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: lui a3, %hi(.LCPI64_0) +; RV64I-NEXT: ld a3, %lo(.LCPI64_0)(a3) ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: lui a2, 3855 -; RV64I-NEXT: addiw a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: lui a2, %hi(.LCPI64_1) +; RV64I-NEXT: ld a2, %lo(.LCPI64_1)(a2) ; RV64I-NEXT: slli a0, a0, 4 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 2 -; RV64I-NEXT: lui a2, 13107 -; RV64I-NEXT: addiw a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, %hi(.LCPI64_2) +; RV64I-NEXT: ld a2, %lo(.LCPI64_2)(a2) ; RV64I-NEXT: slli a0, a0, 2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: slli a0, a0, 1 @@ -2838,94 +2408,76 @@ define i32 @bitreverse_bswap_i32(i32 %a) { define i64 @bitreverse_bswap_i64(i64 %a) { ; RV64I-LABEL: bitreverse_bswap_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a2, a0, 24 +; RV64I-NEXT: srli a1, a0, 24 ; RV64I-NEXT: lui a6, 4080 -; RV64I-NEXT: and a3, a2, a6 -; RV64I-NEXT: srli a4, a0, 8 -; RV64I-NEXT: li a1, 255 -; RV64I-NEXT: slli a7, a1, 24 -; RV64I-NEXT: and a4, a4, a7 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: srli a4, a0, 40 -; RV64I-NEXT: lui a5, 16 -; RV64I-NEXT: addiw a5, a5, -256 -; RV64I-NEXT: and a4, a4, a5 -; RV64I-NEXT: srli a2, a0, 56 -; RV64I-NEXT: or a2, a4, a2 -; RV64I-NEXT: or a2, a3, a2 -; RV64I-NEXT: slli a4, a0, 24 -; RV64I-NEXT: slli t0, a1, 40 -; RV64I-NEXT: and a4, a4, t0 -; RV64I-NEXT: srliw a3, a0, 24 -; RV64I-NEXT: slli a3, a3, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: slli a4, a0, 40 -; RV64I-NEXT: slli a1, a1, 48 -; RV64I-NEXT: and a4, a4, a1 +; RV64I-NEXT: and a1, a1, a6 +; RV64I-NEXT: srli a3, a0, 8 +; RV64I-NEXT: li a4, 255 +; RV64I-NEXT: slli a7, a4, 24 +; RV64I-NEXT: and a3, a3, a7 +; RV64I-NEXT: or a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 40 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 +; RV64I-NEXT: and a3, a3, a2 +; RV64I-NEXT: srli a5, a0, 56 +; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: slli a3, a0, 24 +; RV64I-NEXT: slli t0, a4, 40 +; RV64I-NEXT: and a3, a3, t0 +; RV64I-NEXT: srliw a5, a0, 24 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: slli a5, a0, 40 +; RV64I-NEXT: slli a4, a4, 48 +; RV64I-NEXT: and a5, a5, a4 ; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: lui a5, %hi(.LCPI68_0) +; RV64I-NEXT: ld a5, %lo(.LCPI68_0)(a5) ; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srli a2, a0, 4 -; RV64I-NEXT: lui a3, 3855 -; RV64I-NEXT: addiw a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: and a2, a2, a3 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: and a1, a1, a5 +; RV64I-NEXT: and a0, a0, a5 +; RV64I-NEXT: lui a3, %hi(.LCPI68_1) +; RV64I-NEXT: ld a3, %lo(.LCPI68_1)(a3) ; RV64I-NEXT: slli a0, a0, 4 -; RV64I-NEXT: or a0, a2, a0 -; RV64I-NEXT: srli a2, a0, 2 -; RV64I-NEXT: lui a3, 13107 -; RV64I-NEXT: addiw a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: and a1, a1, a3 ; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: lui a3, %hi(.LCPI68_2) +; RV64I-NEXT: ld a3, %lo(.LCPI68_2)(a3) ; RV64I-NEXT: slli a0, a0, 2 -; RV64I-NEXT: or a0, a2, a0 -; RV64I-NEXT: srli a2, a0, 1 -; RV64I-NEXT: lui a3, 21845 -; RV64I-NEXT: addiw a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: and a1, a1, a3 ; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: slli a0, a0, 1 -; RV64I-NEXT: or a0, a2, a0 -; RV64I-NEXT: srli a2, a0, 40 -; RV64I-NEXT: and a2, a2, a5 -; RV64I-NEXT: srli a3, a0, 56 -; RV64I-NEXT: or a2, a2, a3 -; RV64I-NEXT: srli a3, a0, 24 -; RV64I-NEXT: and a3, a3, a6 -; RV64I-NEXT: srli a4, a0, 8 -; RV64I-NEXT: and a4, a4, a7 -; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 56 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 24 +; RV64I-NEXT: and a2, a2, a6 +; RV64I-NEXT: srli a3, a0, 8 +; RV64I-NEXT: and a3, a3, a7 ; RV64I-NEXT: or a2, a3, a2 -; RV64I-NEXT: slli a3, a0, 24 -; RV64I-NEXT: and a3, a3, t0 -; RV64I-NEXT: srliw a4, a0, 24 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a4, a0, 40 -; RV64I-NEXT: and a1, a4, a1 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: slli a2, a0, 24 +; RV64I-NEXT: and a2, a2, t0 +; RV64I-NEXT: srliw a3, a0, 24 +; RV64I-NEXT: slli a3, a3, 32 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: slli a3, a0, 40 +; RV64I-NEXT: and a3, a3, a4 ; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBP-LABEL: bitreverse_bswap_i64: @@ -2972,30 +2524,18 @@ define signext i32 @shfl1_i32(i32 signext %a, i32 signext %b) nounwind { define i64 @shfl1_i64(i64 %a, i64 %b) nounwind { ; RV64I-LABEL: shfl1_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, 1035469 -; RV64I-NEXT: addiw a1, a1, -819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -819 -; RV64I-NEXT: slli a1, a1, 13 -; RV64I-NEXT: addi a1, a1, -1639 -; RV64I-NEXT: and a1, a0, a1 -; RV64I-NEXT: slli a2, a0, 1 -; RV64I-NEXT: lui a3, 4369 -; RV64I-NEXT: addiw a3, a3, 273 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 273 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 273 -; RV64I-NEXT: slli a4, a3, 14 -; RV64I-NEXT: addi a4, a4, 1092 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI70_1) +; RV64I-NEXT: ld a1, %lo(.LCPI70_1)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI70_0) +; RV64I-NEXT: ld a2, %lo(.LCPI70_0)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: lui a3, %hi(.LCPI70_2) +; RV64I-NEXT: ld a3, %lo(.LCPI70_2)(a3) +; RV64I-NEXT: and a2, a0, a2 +; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: slli a2, a3, 13 -; RV64I-NEXT: addi a2, a2, 546 -; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; @@ -3048,31 +2588,18 @@ define signext i32 @shfl2_i32(i32 signext %a, i32 signext %b) nounwind { define i64 @shfl2_i64(i64 %a, i64 %b) nounwind { ; RV64I-LABEL: shfl2_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, 1044721 -; RV64I-NEXT: addiw a1, a1, -241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: slli a1, a1, 14 -; RV64I-NEXT: addi a1, a1, 963 -; RV64I-NEXT: and a1, a0, a1 -; RV64I-NEXT: slli a2, a0, 2 -; RV64I-NEXT: lui a3, 197379 -; RV64I-NEXT: slli a3, a3, 4 -; RV64I-NEXT: addi a3, a3, 771 -; RV64I-NEXT: slli a4, a3, 16 -; RV64I-NEXT: addi a4, a4, 771 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 48 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI72_1) +; RV64I-NEXT: ld a1, %lo(.LCPI72_1)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI72_0) +; RV64I-NEXT: ld a2, %lo(.LCPI72_0)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: lui a3, %hi(.LCPI72_2) +; RV64I-NEXT: ld a3, %lo(.LCPI72_2)(a3) +; RV64I-NEXT: and a2, a0, a2 +; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: slli a2, a3, 14 -; RV64I-NEXT: addi a2, a2, 193 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1012 -; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; @@ -3125,31 +2652,17 @@ define signext i32 @shfl4_i32(i32 signext %a, i32 signext %b) nounwind { define i64 @shfl4_i64(i64 %a, i64 %b) nounwind { ; RV64I-LABEL: shfl4_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, 983295 -; RV64I-NEXT: slli a1, a1, 4 -; RV64I-NEXT: addi a1, a1, 255 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 255 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 15 +; RV64I-NEXT: lui a1, %hi(.LCPI74_0) +; RV64I-NEXT: ld a1, %lo(.LCPI74_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI74_1) +; RV64I-NEXT: ld a2, %lo(.LCPI74_1)(a2) +; RV64I-NEXT: slli a3, a0, 4 +; RV64I-NEXT: lui a4, %hi(.LCPI74_2) +; RV64I-NEXT: ld a4, %lo(.LCPI74_2)(a4) +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: and a1, a0, a1 -; RV64I-NEXT: slli a2, a0, 4 -; RV64I-NEXT: lui a3, 983055 -; RV64I-NEXT: slli a3, a3, 4 -; RV64I-NEXT: addi a3, a3, 15 -; RV64I-NEXT: slli a3, a3, 16 -; RV64I-NEXT: addi a3, a3, 15 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: srli a3, a3, 4 -; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: srli a0, a0, 4 -; RV64I-NEXT: lui a3, 240 -; RV64I-NEXT: addiw a3, a3, 15 -; RV64I-NEXT: slli a3, a3, 16 -; RV64I-NEXT: addi a3, a3, 15 -; RV64I-NEXT: slli a3, a3, 20 -; RV64I-NEXT: addi a3, a3, 240 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: and a0, a0, a4 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll index 164dfc68e538..d9ef5cc385f9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll @@ -1090,43 +1090,25 @@ define @bitreverse_nxv1i64( %va) { ; RV64-NEXT: slli a0, a2, 48 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v11, v8 +; RV64-NEXT: lui a0, %hi(.LCPI18_0) +; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: lui a0, %hi(.LCPI18_1) +; RV64-NEXT: ld a0, %lo(.LCPI18_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: lui a0, %hi(.LCPI18_2) +; RV64-NEXT: ld a0, %lo(.LCPI18_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -1253,43 +1235,25 @@ define @bitreverse_nxv2i64( %va) { ; RV64-NEXT: slli a0, a2, 48 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v14, v8 +; RV64-NEXT: lui a0, %hi(.LCPI19_0) +; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: lui a0, %hi(.LCPI19_1) +; RV64-NEXT: ld a0, %lo(.LCPI19_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: lui a0, %hi(.LCPI19_2) +; RV64-NEXT: ld a0, %lo(.LCPI19_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -1416,43 +1380,25 @@ define @bitreverse_nxv4i64( %va) { ; RV64-NEXT: slli a0, a2, 48 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v20, v8 +; RV64-NEXT: lui a0, %hi(.LCPI20_0) +; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: lui a0, %hi(.LCPI20_1) +; RV64-NEXT: ld a0, %lo(.LCPI20_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v12, v8 ; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: lui a0, %hi(.LCPI20_2) +; RV64-NEXT: ld a0, %lo(.LCPI20_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v12, v8 ; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -1609,43 +1555,25 @@ define @bitreverse_nxv8i64( %va) { ; RV64-NEXT: slli a0, a2, 48 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v0, v8 +; RV64-NEXT: lui a0, %hi(.LCPI21_0) +; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: lui a0, %hi(.LCPI21_1) +; RV64-NEXT: ld a0, %lo(.LCPI21_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: lui a0, %hi(.LCPI21_2) +; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll index 20bf367ff3e3..12b11d6452fe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll @@ -1653,47 +1653,25 @@ define @ctlz_nxv1i64( %va) { ; RV64-NEXT: vsrl.vx v9, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: lui a0, %hi(.LCPI18_0) +; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI18_1) +; RV64-NEXT: ld a1, %lo(.LCPI18_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 +; RV64-NEXT: vand.vx v9, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v9, v8 +; RV64-NEXT: lui a0, %hi(.LCPI18_2) +; RV64-NEXT: ld a0, %lo(.LCPI18_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI18_3) +; RV64-NEXT: ld a1, %lo(.LCPI18_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1779,47 +1757,25 @@ define @ctlz_nxv2i64( %va) { ; RV64-NEXT: vsrl.vx v10, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: lui a0, %hi(.LCPI19_0) +; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI19_1) +; RV64-NEXT: ld a1, %lo(.LCPI19_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 +; RV64-NEXT: vand.vx v10, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v10, v8 +; RV64-NEXT: lui a0, %hi(.LCPI19_2) +; RV64-NEXT: ld a0, %lo(.LCPI19_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI19_3) +; RV64-NEXT: ld a1, %lo(.LCPI19_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1905,47 +1861,25 @@ define @ctlz_nxv4i64( %va) { ; RV64-NEXT: vsrl.vx v12, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: lui a0, %hi(.LCPI20_0) +; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI20_1) +; RV64-NEXT: ld a1, %lo(.LCPI20_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 +; RV64-NEXT: vand.vx v12, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v12, v8 +; RV64-NEXT: lui a0, %hi(.LCPI20_2) +; RV64-NEXT: ld a0, %lo(.LCPI20_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI20_3) +; RV64-NEXT: ld a1, %lo(.LCPI20_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2031,47 +1965,25 @@ define @ctlz_nxv8i64( %va) { ; RV64-NEXT: vsrl.vx v16, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: lui a0, %hi(.LCPI21_0) +; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI21_1) +; RV64-NEXT: ld a1, %lo(.LCPI21_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 +; RV64-NEXT: vand.vx v16, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v16, v8 +; RV64-NEXT: lui a0, %hi(.LCPI21_2) +; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI21_3) +; RV64-NEXT: ld a1, %lo(.LCPI21_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3637,47 +3549,25 @@ define @ctlz_zero_undef_nxv1i64( %va) { ; RV64-NEXT: vsrl.vx v9, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: lui a0, %hi(.LCPI40_0) +; RV64-NEXT: ld a0, %lo(.LCPI40_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI40_1) +; RV64-NEXT: ld a1, %lo(.LCPI40_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 +; RV64-NEXT: vand.vx v9, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v9, v8 +; RV64-NEXT: lui a0, %hi(.LCPI40_2) +; RV64-NEXT: ld a0, %lo(.LCPI40_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI40_3) +; RV64-NEXT: ld a1, %lo(.LCPI40_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3762,47 +3652,25 @@ define @ctlz_zero_undef_nxv2i64( %va) { ; RV64-NEXT: vsrl.vx v10, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: lui a0, %hi(.LCPI41_0) +; RV64-NEXT: ld a0, %lo(.LCPI41_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI41_1) +; RV64-NEXT: ld a1, %lo(.LCPI41_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 +; RV64-NEXT: vand.vx v10, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v10, v8 +; RV64-NEXT: lui a0, %hi(.LCPI41_2) +; RV64-NEXT: ld a0, %lo(.LCPI41_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI41_3) +; RV64-NEXT: ld a1, %lo(.LCPI41_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3887,47 +3755,25 @@ define @ctlz_zero_undef_nxv4i64( %va) { ; RV64-NEXT: vsrl.vx v12, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: lui a0, %hi(.LCPI42_0) +; RV64-NEXT: ld a0, %lo(.LCPI42_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI42_1) +; RV64-NEXT: ld a1, %lo(.LCPI42_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 +; RV64-NEXT: vand.vx v12, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v12, v8 +; RV64-NEXT: lui a0, %hi(.LCPI42_2) +; RV64-NEXT: ld a0, %lo(.LCPI42_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI42_3) +; RV64-NEXT: ld a1, %lo(.LCPI42_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -4012,47 +3858,25 @@ define @ctlz_zero_undef_nxv8i64( %va) { ; RV64-NEXT: vsrl.vx v16, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: lui a0, %hi(.LCPI43_0) +; RV64-NEXT: ld a0, %lo(.LCPI43_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI43_1) +; RV64-NEXT: ld a1, %lo(.LCPI43_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 +; RV64-NEXT: vand.vx v16, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v16, v8 +; RV64-NEXT: lui a0, %hi(.LCPI43_2) +; RV64-NEXT: ld a0, %lo(.LCPI43_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI43_3) +; RV64-NEXT: ld a1, %lo(.LCPI43_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll index f60b0380c265..0a5b2bf3caa0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll @@ -798,47 +798,25 @@ define @ctpop_nxv1i64( %va) { ; RV64-LABEL: ctpop_nxv1i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV64-NEXT: lui a0, %hi(.LCPI18_0) +; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI18_1) +; RV64-NEXT: ld a1, %lo(.LCPI18_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 +; RV64-NEXT: vand.vx v9, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v9, v8 +; RV64-NEXT: lui a0, %hi(.LCPI18_2) +; RV64-NEXT: ld a0, %lo(.LCPI18_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI18_3) +; RV64-NEXT: ld a1, %lo(.LCPI18_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -896,47 +874,25 @@ define @ctpop_nxv2i64( %va) { ; RV64-LABEL: ctpop_nxv2i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV64-NEXT: lui a0, %hi(.LCPI19_0) +; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI19_1) +; RV64-NEXT: ld a1, %lo(.LCPI19_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 +; RV64-NEXT: vand.vx v10, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v10, v8 +; RV64-NEXT: lui a0, %hi(.LCPI19_2) +; RV64-NEXT: ld a0, %lo(.LCPI19_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI19_3) +; RV64-NEXT: ld a1, %lo(.LCPI19_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -994,47 +950,25 @@ define @ctpop_nxv4i64( %va) { ; RV64-LABEL: ctpop_nxv4i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV64-NEXT: lui a0, %hi(.LCPI20_0) +; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI20_1) +; RV64-NEXT: ld a1, %lo(.LCPI20_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 +; RV64-NEXT: vand.vx v12, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v12, v8 +; RV64-NEXT: lui a0, %hi(.LCPI20_2) +; RV64-NEXT: ld a0, %lo(.LCPI20_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI20_3) +; RV64-NEXT: ld a1, %lo(.LCPI20_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1092,47 +1026,25 @@ define @ctpop_nxv8i64( %va) { ; RV64-LABEL: ctpop_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV64-NEXT: lui a0, %hi(.LCPI21_0) +; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI21_1) +; RV64-NEXT: ld a1, %lo(.LCPI21_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 +; RV64-NEXT: vand.vx v16, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v16, v8 +; RV64-NEXT: lui a0, %hi(.LCPI21_2) +; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI21_3) +; RV64-NEXT: ld a1, %lo(.LCPI21_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll index bbe4d82d9d12..93c58c70a3b4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -1557,47 +1557,25 @@ define @cttz_nxv1i64( %va) { ; RV64-NEXT: vsub.vx v9, v8, a0 ; RV64-NEXT: vxor.vi v8, v8, -1 ; RV64-NEXT: vand.vv v8, v8, v9 +; RV64-NEXT: lui a0, %hi(.LCPI18_0) +; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI18_1) +; RV64-NEXT: ld a1, %lo(.LCPI18_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 +; RV64-NEXT: vand.vx v9, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v9, v8 +; RV64-NEXT: lui a0, %hi(.LCPI18_2) +; RV64-NEXT: ld a0, %lo(.LCPI18_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI18_3) +; RV64-NEXT: ld a1, %lo(.LCPI18_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1663,47 +1641,25 @@ define @cttz_nxv2i64( %va) { ; RV64-NEXT: vsub.vx v10, v8, a0 ; RV64-NEXT: vxor.vi v8, v8, -1 ; RV64-NEXT: vand.vv v8, v8, v10 +; RV64-NEXT: lui a0, %hi(.LCPI19_0) +; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI19_1) +; RV64-NEXT: ld a1, %lo(.LCPI19_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 +; RV64-NEXT: vand.vx v10, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v10, v8 +; RV64-NEXT: lui a0, %hi(.LCPI19_2) +; RV64-NEXT: ld a0, %lo(.LCPI19_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI19_3) +; RV64-NEXT: ld a1, %lo(.LCPI19_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1769,47 +1725,25 @@ define @cttz_nxv4i64( %va) { ; RV64-NEXT: vsub.vx v12, v8, a0 ; RV64-NEXT: vxor.vi v8, v8, -1 ; RV64-NEXT: vand.vv v8, v8, v12 +; RV64-NEXT: lui a0, %hi(.LCPI20_0) +; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI20_1) +; RV64-NEXT: ld a1, %lo(.LCPI20_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 +; RV64-NEXT: vand.vx v12, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v12, v8 +; RV64-NEXT: lui a0, %hi(.LCPI20_2) +; RV64-NEXT: ld a0, %lo(.LCPI20_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI20_3) +; RV64-NEXT: ld a1, %lo(.LCPI20_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1875,47 +1809,25 @@ define @cttz_nxv8i64( %va) { ; RV64-NEXT: vsub.vx v16, v8, a0 ; RV64-NEXT: vxor.vi v8, v8, -1 ; RV64-NEXT: vand.vv v8, v8, v16 +; RV64-NEXT: lui a0, %hi(.LCPI21_0) +; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI21_1) +; RV64-NEXT: ld a1, %lo(.LCPI21_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 +; RV64-NEXT: vand.vx v16, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v16, v8 +; RV64-NEXT: lui a0, %hi(.LCPI21_2) +; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI21_3) +; RV64-NEXT: ld a1, %lo(.LCPI21_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3361,47 +3273,25 @@ define @cttz_zero_undef_nxv1i64( %va) { ; RV64-NEXT: vsub.vx v9, v8, a0 ; RV64-NEXT: vxor.vi v8, v8, -1 ; RV64-NEXT: vand.vv v8, v8, v9 +; RV64-NEXT: lui a0, %hi(.LCPI40_0) +; RV64-NEXT: ld a0, %lo(.LCPI40_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI40_1) +; RV64-NEXT: ld a1, %lo(.LCPI40_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 +; RV64-NEXT: vand.vx v9, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v9, v8 +; RV64-NEXT: lui a0, %hi(.LCPI40_2) +; RV64-NEXT: ld a0, %lo(.LCPI40_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI40_3) +; RV64-NEXT: ld a1, %lo(.LCPI40_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3466,47 +3356,25 @@ define @cttz_zero_undef_nxv2i64( %va) { ; RV64-NEXT: vsub.vx v10, v8, a0 ; RV64-NEXT: vxor.vi v8, v8, -1 ; RV64-NEXT: vand.vv v8, v8, v10 +; RV64-NEXT: lui a0, %hi(.LCPI41_0) +; RV64-NEXT: ld a0, %lo(.LCPI41_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI41_1) +; RV64-NEXT: ld a1, %lo(.LCPI41_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 +; RV64-NEXT: vand.vx v10, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v10, v8 +; RV64-NEXT: lui a0, %hi(.LCPI41_2) +; RV64-NEXT: ld a0, %lo(.LCPI41_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI41_3) +; RV64-NEXT: ld a1, %lo(.LCPI41_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3571,47 +3439,25 @@ define @cttz_zero_undef_nxv4i64( %va) { ; RV64-NEXT: vsub.vx v12, v8, a0 ; RV64-NEXT: vxor.vi v8, v8, -1 ; RV64-NEXT: vand.vv v8, v8, v12 +; RV64-NEXT: lui a0, %hi(.LCPI42_0) +; RV64-NEXT: ld a0, %lo(.LCPI42_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI42_1) +; RV64-NEXT: ld a1, %lo(.LCPI42_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 +; RV64-NEXT: vand.vx v12, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v12, v8 +; RV64-NEXT: lui a0, %hi(.LCPI42_2) +; RV64-NEXT: ld a0, %lo(.LCPI42_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI42_3) +; RV64-NEXT: ld a1, %lo(.LCPI42_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3676,47 +3522,25 @@ define @cttz_zero_undef_nxv8i64( %va) { ; RV64-NEXT: vsub.vx v16, v8, a0 ; RV64-NEXT: vxor.vi v8, v8, -1 ; RV64-NEXT: vand.vv v8, v8, v16 +; RV64-NEXT: lui a0, %hi(.LCPI43_0) +; RV64-NEXT: ld a0, %lo(.LCPI43_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI43_1) +; RV64-NEXT: ld a1, %lo(.LCPI43_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 +; RV64-NEXT: vand.vx v16, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v16, v8 +; RV64-NEXT: lui a0, %hi(.LCPI43_2) +; RV64-NEXT: ld a0, %lo(.LCPI43_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI43_3) +; RV64-NEXT: ld a1, %lo(.LCPI43_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll index 26dfff7fe57b..65b8911749b3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -422,43 +422,25 @@ define void @bitreverse_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX2-RV64-NEXT: slli a1, a3, 48 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vor.vv v8, v11, v8 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI2_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI2_0)(a1) ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64-NEXT: lui a1, 3855 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI2_1) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI2_1)(a1) ; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 4 ; LMULMAX2-RV64-NEXT: vor.vv v8, v9, v8 ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV64-NEXT: lui a1, 13107 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 ; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI2_2) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI2_2)(a1) ; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX2-RV64-NEXT: vor.vv v8, v9, v8 ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 21845 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v8 @@ -583,43 +565,25 @@ define void @bitreverse_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX1-RV64-NEXT: slli a1, a3, 48 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX1-RV64-NEXT: vor.vv v8, v11, v8 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI2_0) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI2_0)(a1) ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: lui a1, 3855 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -241 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI2_1) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI2_1)(a1) ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 4 ; LMULMAX1-RV64-NEXT: vor.vv v8, v9, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV64-NEXT: lui a1, 13107 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI2_2) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI2_2)(a1) ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: vor.vv v8, v9, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 21845 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8 @@ -1148,43 +1112,25 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-RV64-NEXT: slli a1, a3, 48 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vor.vv v8, v14, v8 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI5_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI5_0)(a1) ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: lui a1, 3855 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI5_1) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI5_1)(a1) ; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 4 ; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV64-NEXT: lui a1, 13107 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI5_2) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI5_2)(a1) ; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 21845 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v8 @@ -1325,70 +1271,52 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, t0 ; LMULMAX1-RV64-NEXT: li t1, 40 ; LMULMAX1-RV64-NEXT: vsrl.vx v11, v9, t1 -; LMULMAX1-RV64-NEXT: lui a1, 16 -; LMULMAX1-RV64-NEXT: addiw t2, a1, -256 +; LMULMAX1-RV64-NEXT: lui a4, 16 +; LMULMAX1-RV64-NEXT: addiw t2, a4, -256 ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t2 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV64-NEXT: lui a6, 4080 ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a6 ; LMULMAX1-RV64-NEXT: vsrl.vi v12, v9, 8 -; LMULMAX1-RV64-NEXT: li a3, 255 -; LMULMAX1-RV64-NEXT: slli t3, a3, 24 +; LMULMAX1-RV64-NEXT: li a5, 255 +; LMULMAX1-RV64-NEXT: slli t3, a5, 24 ; LMULMAX1-RV64-NEXT: vand.vx v12, v12, t3 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsll.vi v11, v9, 8 -; LMULMAX1-RV64-NEXT: slli t4, a3, 32 +; LMULMAX1-RV64-NEXT: slli t4, a5, 32 ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t4 ; LMULMAX1-RV64-NEXT: vsll.vi v12, v9, 24 -; LMULMAX1-RV64-NEXT: slli a2, a3, 40 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a2 +; LMULMAX1-RV64-NEXT: slli a3, a5, 40 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a3 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vsll.vx v12, v9, t0 ; LMULMAX1-RV64-NEXT: vsll.vx v9, v9, t1 -; LMULMAX1-RV64-NEXT: slli a3, a3, 48 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 +; LMULMAX1-RV64-NEXT: slli a5, a5, 48 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 ; LMULMAX1-RV64-NEXT: vor.vv v9, v12, v9 +; LMULMAX1-RV64-NEXT: lui a4, %hi(.LCPI5_0) +; LMULMAX1-RV64-NEXT: ld a4, %lo(.LCPI5_0)(a4) ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: lui a4, 3855 -; LMULMAX1-RV64-NEXT: addiw a4, a4, 241 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, -241 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, 241 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, -241 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI5_1) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI5_1)(a1) ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 4 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV64-NEXT: lui a5, 13107 -; LMULMAX1-RV64-NEXT: addiw a5, a5, 819 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, 819 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, 819 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, 819 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a5 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a1 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI5_2) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI5_2)(a2) ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 2 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: lui a1, 21845 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a2 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v9 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, t0 @@ -1404,11 +1332,11 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 8 ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t4 ; LMULMAX1-RV64-NEXT: vsll.vi v12, v8, 24 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a2 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a3 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vsll.vx v12, v8, t0 ; LMULMAX1-RV64-NEXT: vsll.vx v8, v8, t1 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 ; LMULMAX1-RV64-NEXT: vor.vv v8, v12, v8 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 @@ -1418,13 +1346,13 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 4 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a5 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a1 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll index 6b1573c0dc1e..fc211b3a5474 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -658,47 +658,25 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) nounwind { ; LMULMAX2-RV64-NEXT: vsrl.vx v9, v8, a1 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 ; LMULMAX2-RV64-NEXT: vxor.vi v8, v8, -1 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI3_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI3_1) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 21845 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, 13107 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a2 ; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v9, v8 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI3_2) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI3_3) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, 3855 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: li a1, 56 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) @@ -779,47 +757,25 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) nounwind { ; LMULMAX1-RV64-NEXT: vsrl.vx v9, v8, a1 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 ; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI3_0) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI3_1) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 21845 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 13107 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 +; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI3_2) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI3_3) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 3855 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -241 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: lui a1, 4112 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX1-RV64-NEXT: slli a1, a1, 16 -; LMULMAX1-RV64-NEXT: addi a1, a1, 257 -; LMULMAX1-RV64-NEXT: slli a1, a1, 16 -; LMULMAX1-RV64-NEXT: addi a1, a1, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX1-RV64-NEXT: li a1, 56 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) @@ -900,47 +856,25 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) nounwind { ; LMULMAX8-RV64-NEXT: vsrl.vx v9, v8, a1 ; LMULMAX8-RV64-NEXT: vor.vv v8, v8, v9 ; LMULMAX8-RV64-NEXT: vxor.vi v8, v8, -1 +; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI3_0) +; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI3_1) +; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX8-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX8-RV64-NEXT: lui a1, 21845 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX8-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX8-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: lui a1, 13107 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: vand.vx v9, v8, a1 +; LMULMAX8-RV64-NEXT: vand.vx v9, v8, a2 ; LMULMAX8-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX8-RV64-NEXT: vadd.vv v8, v9, v8 +; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI3_2) +; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) +; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI3_3) +; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX8-RV64-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX8-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: lui a1, 3855 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, -241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, -241 ; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX8-RV64-NEXT: lui a1, 4112 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX8-RV64-NEXT: slli a1, a1, 16 -; LMULMAX8-RV64-NEXT: addi a1, a1, 257 -; LMULMAX8-RV64-NEXT: slli a1, a1, 16 -; LMULMAX8-RV64-NEXT: addi a1, a1, 257 -; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX8-RV64-NEXT: li a1, 56 ; LMULMAX8-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) @@ -1687,47 +1621,25 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind { ; LMULMAX2-RV64-NEXT: vsrl.vx v10, v8, a1 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vxor.vi v8, v8, -1 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI7_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI7_0)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI7_1) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI7_1)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 21845 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 13107 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a2 ; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI7_2) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI7_2)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI7_3) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 3855 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: li a1, 56 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) @@ -1821,8 +1733,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind { ; LMULMAX1-RV64-LABEL: ctlz_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: addi a7, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v8, (a7) +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 @@ -1838,49 +1750,27 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind { ; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a6 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 +; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI7_0) +; LMULMAX1-RV64-NEXT: ld a3, %lo(.LCPI7_0)(a3) +; LMULMAX1-RV64-NEXT: lui a4, %hi(.LCPI7_1) +; LMULMAX1-RV64-NEXT: ld a4, %lo(.LCPI7_1)(a4) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a3, 21845 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 1365 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 1365 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 1365 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 1365 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a4, 13107 -; LMULMAX1-RV64-NEXT: addiw a4, a4, 819 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, 819 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, 819 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, 819 ; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a4 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX1-RV64-NEXT: lui a5, %hi(.LCPI7_2) +; LMULMAX1-RV64-NEXT: ld a5, %lo(.LCPI7_2)(a5) +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI7_3) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a5, 3855 -; LMULMAX1-RV64-NEXT: addiw a5, a5, 241 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, -241 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, 241 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, -241 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: lui a2, 4112 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 257 -; LMULMAX1-RV64-NEXT: slli a2, a2, 16 -; LMULMAX1-RV64-NEXT: addi a2, a2, 257 -; LMULMAX1-RV64-NEXT: slli a2, a2, 16 -; LMULMAX1-RV64-NEXT: addi a2, a2, 257 ; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 -; LMULMAX1-RV64-NEXT: li a1, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: li a7, 56 +; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a7 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2 @@ -1905,9 +1795,9 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind { ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 ; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a2 -; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a1 +; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a7 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v8, (a7) +; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret ; ; LMULMAX8-RV32-LABEL: ctlz_v4i64: @@ -1985,47 +1875,25 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind { ; LMULMAX8-RV64-NEXT: vsrl.vx v10, v8, a1 ; LMULMAX8-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX8-RV64-NEXT: vxor.vi v8, v8, -1 +; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI7_0) +; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI7_0)(a1) +; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI7_1) +; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI7_1)(a2) ; LMULMAX8-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX8-RV64-NEXT: lui a1, 21845 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX8-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX8-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: lui a1, 13107 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: vand.vx v10, v8, a1 +; LMULMAX8-RV64-NEXT: vand.vx v10, v8, a2 ; LMULMAX8-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX8-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI7_2) +; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI7_2)(a1) +; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI7_3) +; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) ; LMULMAX8-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX8-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: lui a1, 3855 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, -241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, -241 ; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX8-RV64-NEXT: lui a1, 4112 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX8-RV64-NEXT: slli a1, a1, 16 -; LMULMAX8-RV64-NEXT: addi a1, a1, 257 -; LMULMAX8-RV64-NEXT: slli a1, a1, 16 -; LMULMAX8-RV64-NEXT: addi a1, a1, 257 -; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX8-RV64-NEXT: li a1, 56 ; LMULMAX8-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll index d3af39b15585..24afb1d6ee5c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll @@ -304,47 +304,25 @@ define void @ctpop_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI3_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI3_1) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 21845 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, 13107 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a2 ; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v9, v8 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI3_2) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI3_3) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, 3855 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: li a1, 56 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) @@ -394,47 +372,25 @@ define void @ctpop_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI3_0) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI3_1) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 21845 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 13107 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 +; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI3_2) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI3_3) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 3855 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -241 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: lui a1, 4112 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX1-RV64-NEXT: slli a1, a1, 16 -; LMULMAX1-RV64-NEXT: addi a1, a1, 257 -; LMULMAX1-RV64-NEXT: slli a1, a1, 16 -; LMULMAX1-RV64-NEXT: addi a1, a1, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX1-RV64-NEXT: li a1, 56 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) @@ -840,47 +796,25 @@ define void @ctpop_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI7_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI7_0)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI7_1) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI7_1)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 21845 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 13107 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a2 ; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI7_2) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI7_2)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI7_3) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 3855 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: li a1, 56 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) @@ -944,52 +878,13 @@ define void @ctpop_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV64-LABEL: ctpop_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: addi a6, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v8, (a6) -; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a2, 21845 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a3, 13107 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 819 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 819 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 819 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 819 -; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a4, 3855 -; LMULMAX1-RV64-NEXT: addiw a4, a4, 241 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, -241 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, 241 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: lui a5, 4112 -; LMULMAX1-RV64-NEXT: addiw a5, a5, 257 -; LMULMAX1-RV64-NEXT: slli a5, a5, 16 -; LMULMAX1-RV64-NEXT: addi a5, a5, 257 -; LMULMAX1-RV64-NEXT: slli a5, a5, 16 -; LMULMAX1-RV64-NEXT: addi a5, a5, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: li a1, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI7_0) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI7_0)(a2) +; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI7_1) +; LMULMAX1-RV64-NEXT: ld a3, %lo(.LCPI7_1)(a3) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 @@ -997,13 +892,30 @@ define void @ctpop_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 +; LMULMAX1-RV64-NEXT: lui a4, %hi(.LCPI7_2) +; LMULMAX1-RV64-NEXT: ld a4, %lo(.LCPI7_2)(a4) +; LMULMAX1-RV64-NEXT: lui a5, %hi(.LCPI7_3) +; LMULMAX1-RV64-NEXT: ld a5, %lo(.LCPI7_3)(a5) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 ; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v8, (a6) +; LMULMAX1-RV64-NEXT: li a6, 56 +; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a6 +; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 +; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 +; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3 +; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 +; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 +; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a5 +; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a6 +; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll index a59223515fb6..7b7e9d6fc3c1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -598,47 +598,25 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) nounwind { ; LMULMAX2-RV64-NEXT: vsub.vx v9, v8, a1 ; LMULMAX2-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX2-RV64-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI3_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI3_1) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 21845 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, 13107 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a2 ; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v9, v8 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI3_2) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI3_3) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, 3855 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: li a1, 56 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) @@ -699,47 +677,25 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) nounwind { ; LMULMAX1-RV64-NEXT: vsub.vx v9, v8, a1 ; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v9 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI3_0) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI3_1) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 21845 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 13107 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 +; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI3_2) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI3_3) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 3855 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -241 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: lui a1, 4112 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX1-RV64-NEXT: slli a1, a1, 16 -; LMULMAX1-RV64-NEXT: addi a1, a1, 257 -; LMULMAX1-RV64-NEXT: slli a1, a1, 16 -; LMULMAX1-RV64-NEXT: addi a1, a1, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX1-RV64-NEXT: li a1, 56 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) @@ -800,47 +756,25 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) nounwind { ; LMULMAX8-RV64-NEXT: vsub.vx v9, v8, a1 ; LMULMAX8-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX8-RV64-NEXT: vand.vv v8, v8, v9 +; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI3_0) +; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI3_1) +; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX8-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX8-RV64-NEXT: lui a1, 21845 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX8-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX8-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: lui a1, 13107 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: vand.vx v9, v8, a1 +; LMULMAX8-RV64-NEXT: vand.vx v9, v8, a2 ; LMULMAX8-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX8-RV64-NEXT: vadd.vv v8, v9, v8 +; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI3_2) +; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) +; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI3_3) +; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX8-RV64-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX8-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: lui a1, 3855 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, -241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, -241 ; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX8-RV64-NEXT: lui a1, 4112 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX8-RV64-NEXT: slli a1, a1, 16 -; LMULMAX8-RV64-NEXT: addi a1, a1, 257 -; LMULMAX8-RV64-NEXT: slli a1, a1, 16 -; LMULMAX8-RV64-NEXT: addi a1, a1, 257 -; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX8-RV64-NEXT: li a1, 56 ; LMULMAX8-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) @@ -1483,47 +1417,25 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind { ; LMULMAX2-RV64-NEXT: vsub.vx v10, v8, a1 ; LMULMAX2-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX2-RV64-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI7_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI7_0)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI7_1) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI7_1)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 21845 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 13107 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a2 ; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI7_2) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI7_2)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI7_3) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 3855 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: li a1, 56 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) @@ -1597,56 +1509,34 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind { ; LMULMAX1-RV64-LABEL: cttz_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: addi a7, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v8, (a7) +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) ; LMULMAX1-RV64-NEXT: li a6, 1 ; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a6 ; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10 +; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI7_0) +; LMULMAX1-RV64-NEXT: ld a3, %lo(.LCPI7_0)(a3) +; LMULMAX1-RV64-NEXT: lui a4, %hi(.LCPI7_1) +; LMULMAX1-RV64-NEXT: ld a4, %lo(.LCPI7_1)(a4) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a3, 21845 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 1365 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 1365 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 1365 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 1365 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a4, 13107 -; LMULMAX1-RV64-NEXT: addiw a4, a4, 819 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, 819 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, 819 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, 819 ; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a4 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX1-RV64-NEXT: lui a5, %hi(.LCPI7_2) +; LMULMAX1-RV64-NEXT: ld a5, %lo(.LCPI7_2)(a5) +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI7_3) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a5, 3855 -; LMULMAX1-RV64-NEXT: addiw a5, a5, 241 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, -241 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, 241 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, -241 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: lui a2, 4112 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 257 -; LMULMAX1-RV64-NEXT: slli a2, a2, 16 -; LMULMAX1-RV64-NEXT: addi a2, a2, 257 -; LMULMAX1-RV64-NEXT: slli a2, a2, 16 -; LMULMAX1-RV64-NEXT: addi a2, a2, 257 ; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 -; LMULMAX1-RV64-NEXT: li a1, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: li a7, 56 +; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a7 ; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a6 ; LMULMAX1-RV64-NEXT: vxor.vi v9, v9, -1 ; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10 @@ -1661,9 +1551,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind { ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 ; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a2 -; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a1 +; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a7 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v8, (a7) +; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret ; ; LMULMAX8-RV32-LABEL: cttz_v4i64: @@ -1721,47 +1611,25 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind { ; LMULMAX8-RV64-NEXT: vsub.vx v10, v8, a1 ; LMULMAX8-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX8-RV64-NEXT: vand.vv v8, v8, v10 +; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI7_0) +; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI7_0)(a1) +; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI7_1) +; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI7_1)(a2) ; LMULMAX8-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX8-RV64-NEXT: lui a1, 21845 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX8-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX8-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: lui a1, 13107 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: vand.vx v10, v8, a1 +; LMULMAX8-RV64-NEXT: vand.vx v10, v8, a2 ; LMULMAX8-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX8-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI7_2) +; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI7_2)(a1) +; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI7_3) +; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) ; LMULMAX8-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX8-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: lui a1, 3855 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, -241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, -241 ; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX8-RV64-NEXT: lui a1, 4112 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX8-RV64-NEXT: slli a1, a1, 16 -; LMULMAX8-RV64-NEXT: addi a1, a1, 257 -; LMULMAX8-RV64-NEXT: slli a1, a1, 16 -; LMULMAX8-RV64-NEXT: addi a1, a1, 257 -; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX8-RV64-NEXT: li a1, 56 ; LMULMAX8-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index 568a393f4bb4..5a6b1f2126ed 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -376,16 +376,10 @@ define void @buildvec_dominant0_v2i32(<2 x i64>* %x) { ; ; RV64-LABEL: buildvec_dominant0_v2i32: ; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI20_0) +; RV64-NEXT: ld a1, %lo(.LCPI20_0)(a1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vmv.v.i v8, -1 -; RV64-NEXT: lui a1, 3641 -; RV64-NEXT: addiw a1, a1, -455 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -455 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -455 -; RV64-NEXT: slli a1, a1, 13 -; RV64-NEXT: addi a1, a1, -910 ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; RV64-NEXT: vmv.s.x v8, a1 ; RV64-NEXT: vse64.v v8, (a0) @@ -465,12 +459,8 @@ define void @buildvec_seq_v16i8_v2i64(<16 x i8>* %x) { ; ; RV64-LABEL: buildvec_seq_v16i8_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, 32880 -; RV64-NEXT: addiw a1, a1, 1541 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi a1, a1, 1027 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi a1, a1, 513 +; RV64-NEXT: lui a1, %hi(.LCPI24_0) +; RV64-NEXT: ld a1, %lo(.LCPI24_0)(a1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vmv.v.x v8, a1 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu @@ -524,14 +514,10 @@ define void @buildvec_seq_v9i8(<9 x i8>* %x) { ; ; RV64-LABEL: buildvec_seq_v9i8: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 3 -; RV64-NEXT: sb a1, 8(a0) -; RV64-NEXT: lui a1, 4104 -; RV64-NEXT: addiw a1, a1, 385 -; RV64-NEXT: slli a1, a1, 17 -; RV64-NEXT: addi a1, a1, 259 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi a1, a1, 513 +; RV64-NEXT: lui a1, %hi(.LCPI26_0) +; RV64-NEXT: ld a1, %lo(.LCPI26_0)(a1) +; RV64-NEXT: li a2, 3 +; RV64-NEXT: sb a2, 8(a0) ; RV64-NEXT: sd a1, 0(a0) ; RV64-NEXT: ret store <9 x i8> , <9 x i8>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index e26a232d8a94..faa8ff5cd2e9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1032,29 +1032,17 @@ define void @mulhu_v2i64(<2 x i64>* %x) { ; ; RV64-LABEL: mulhu_v2i64: ; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI55_0) +; RV64-NEXT: ld a1, %lo(.LCPI55_0)(a1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: lui a1, 1035469 -; RV64-NEXT: addiw a1, a1, -819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -819 -; RV64-NEXT: vmv.v.x v9, a1 -; RV64-NEXT: lui a1, 1026731 -; RV64-NEXT: addiw a1, a1, -1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1365 +; RV64-NEXT: lui a2, %hi(.LCPI55_1) +; RV64-NEXT: ld a2, %lo(.LCPI55_1)(a2) +; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: vle64.v v9, (a0) ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; RV64-NEXT: vmv.s.x v9, a1 +; RV64-NEXT: vmv.s.x v8, a2 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vmulhu.vv v8, v8, v9 +; RV64-NEXT: vmulhu.vv v8, v9, v8 ; RV64-NEXT: vid.v v9 ; RV64-NEXT: vadd.vi v9, v9, 1 ; RV64-NEXT: vsrl.vv v8, v8, v9 @@ -1184,16 +1172,10 @@ define void @mulhs_v4i32(<4 x i32>* %x) { ; ; RV64-LABEL: mulhs_v4i32: ; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI58_0) +; RV64-NEXT: ld a1, %lo(.LCPI58_0)(a1) ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: lui a1, 13107 -; RV64-NEXT: addiw a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 973 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -819 -; RV64-NEXT: slli a1, a1, 13 -; RV64-NEXT: addi a1, a1, -1639 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vmv.v.x v9, a1 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu @@ -1245,25 +1227,20 @@ define void @mulhs_v2i64(<2 x i64>* %x) { ; ; RV64-LABEL: mulhs_v2i64: ; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI59_0) +; RV64-NEXT: ld a1, %lo(.LCPI59_0)(a1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: lui a1, 21845 -; RV64-NEXT: addiw a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a2, a1, 1365 -; RV64-NEXT: vmv.v.x v9, a2 -; RV64-NEXT: addi a1, a1, 1366 +; RV64-NEXT: lui a2, %hi(.LCPI59_1) +; RV64-NEXT: ld a2, %lo(.LCPI59_1)(a2) +; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: vle64.v v9, (a0) ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; RV64-NEXT: vmv.s.x v9, a1 +; RV64-NEXT: vmv.s.x v8, a2 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vmulh.vv v9, v8, v9 +; RV64-NEXT: vmulh.vv v8, v9, v8 ; RV64-NEXT: vid.v v10 ; RV64-NEXT: vrsub.vi v11, v10, 0 -; RV64-NEXT: vmadd.vv v11, v8, v9 +; RV64-NEXT: vmadd.vv v11, v9, v8 ; RV64-NEXT: li a1, 63 ; RV64-NEXT: vsrl.vx v8, v11, a1 ; RV64-NEXT: vsra.vv v9, v11, v10 @@ -4366,51 +4343,27 @@ define void @mulhu_v4i64(<4 x i64>* %x) { ; LMULMAX1-RV64-NEXT: slli a2, a2, 63 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2 -; LMULMAX1-RV64-NEXT: lui a2, 1044935 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 455 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, 455 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, 455 -; LMULMAX1-RV64-NEXT: slli a2, a2, 13 -; LMULMAX1-RV64-NEXT: addi a2, a2, 911 +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI132_0) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI132_0)(a2) +; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI132_1) +; LMULMAX1-RV64-NEXT: ld a3, %lo(.LCPI132_1)(a3) ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vmv.v.x v11, a2 -; LMULMAX1-RV64-NEXT: lui a2, 4681 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 585 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, 585 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, 585 -; LMULMAX1-RV64-NEXT: slli a2, a2, 13 -; LMULMAX1-RV64-NEXT: addi a2, a2, 1171 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 +; LMULMAX1-RV64-NEXT: vmv.s.x v11, a3 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vmulhu.vv v11, v9, v11 ; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v11 +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI132_2) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI132_2)(a2) ; LMULMAX1-RV64-NEXT: vmulhu.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vid.v v10 -; LMULMAX1-RV64-NEXT: vadd.vi v11, v10, 2 -; LMULMAX1-RV64-NEXT: vsrl.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: lui a2, 1035469 -; LMULMAX1-RV64-NEXT: addiw a2, a2, -819 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, -819 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, -819 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, -819 ; LMULMAX1-RV64-NEXT: vmv.v.x v11, a2 -; LMULMAX1-RV64-NEXT: lui a2, 1026731 -; LMULMAX1-RV64-NEXT: addiw a2, a2, -1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, -1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, -1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, -1365 +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI132_3) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI132_3)(a2) +; LMULMAX1-RV64-NEXT: vadd.vi v12, v10, 2 +; LMULMAX1-RV64-NEXT: vsrl.vv v9, v9, v12 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -4600,16 +4553,10 @@ define void @mulhs_v8i32(<8 x i32>* %x) { ; ; LMULMAX2-RV64-LABEL: mulhs_v8i32: ; LMULMAX2-RV64: # %bb.0: +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI135_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI135_0)(a1) ; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64-NEXT: lui a1, 13107 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 973 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 13 -; LMULMAX2-RV64-NEXT: addi a1, a1, -1639 ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu @@ -4716,27 +4663,22 @@ define void @mulhs_v4i64(<4 x i64>* %x) { ; LMULMAX2-RV64-NEXT: li a1, 5 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI136_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI136_0)(a1) ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; LMULMAX2-RV64-NEXT: vmv.v.i v10, -1 -; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 0, v0 -; LMULMAX2-RV64-NEXT: lui a1, 21845 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a2, a1, 1365 -; LMULMAX2-RV64-NEXT: vmv.v.x v12, a2 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1366 -; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v12, a1, v0 -; LMULMAX2-RV64-NEXT: vmulh.vv v12, v8, v12 -; LMULMAX2-RV64-NEXT: vmacc.vv v12, v8, v10 +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI136_1) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI136_1)(a2) +; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV64-NEXT: vmv.v.i v12, -1 +; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 0, v0 +; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a2, v0 +; LMULMAX2-RV64-NEXT: vmulh.vv v10, v8, v10 +; LMULMAX2-RV64-NEXT: vmacc.vv v10, v8, v12 ; LMULMAX2-RV64-NEXT: li a1, 63 -; LMULMAX2-RV64-NEXT: vsrl.vx v8, v12, a1 -; LMULMAX2-RV64-NEXT: vmv.v.i v10, 1 -; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 0, v0 -; LMULMAX2-RV64-NEXT: vsra.vv v10, v12, v10 +; LMULMAX2-RV64-NEXT: vsrl.vx v8, v10, a1 +; LMULMAX2-RV64-NEXT: vmv.v.i v12, 1 +; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 0, v0 +; LMULMAX2-RV64-NEXT: vsra.vv v10, v10, v12 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret @@ -4760,38 +4702,33 @@ define void @mulhs_v4i64(<4 x i64>* %x) { ; ; LMULMAX1-RV64-LABEL: mulhs_v4i64: ; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI136_0) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI136_0)(a1) ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV64-NEXT: lui a2, 21845 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a3, a2, 1365 -; LMULMAX1-RV64-NEXT: vmv.v.x v10, a3 -; LMULMAX1-RV64-NEXT: addi a2, a2, 1366 +; LMULMAX1-RV64-NEXT: vmv.v.x v9, a1 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI136_1) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI136_1)(a1) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2 +; LMULMAX1-RV64-NEXT: vmv.s.x v9, a1 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: vmulh.vv v11, v9, v10 +; LMULMAX1-RV64-NEXT: vmulh.vv v11, v10, v9 ; LMULMAX1-RV64-NEXT: vid.v v12 ; LMULMAX1-RV64-NEXT: vrsub.vi v13, v12, 0 -; LMULMAX1-RV64-NEXT: vmacc.vv v11, v13, v9 -; LMULMAX1-RV64-NEXT: li a2, 63 -; LMULMAX1-RV64-NEXT: vsrl.vx v9, v11, a2 +; LMULMAX1-RV64-NEXT: vmacc.vv v11, v13, v10 +; LMULMAX1-RV64-NEXT: li a1, 63 +; LMULMAX1-RV64-NEXT: vsrl.vx v10, v11, a1 ; LMULMAX1-RV64-NEXT: vsra.vv v11, v11, v12 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v11, v9 -; LMULMAX1-RV64-NEXT: vmulh.vv v10, v8, v10 -; LMULMAX1-RV64-NEXT: vmacc.vv v10, v8, v13 -; LMULMAX1-RV64-NEXT: vsrl.vx v8, v10, a2 -; LMULMAX1-RV64-NEXT: vsra.vv v10, v10, v12 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX1-RV64-NEXT: vadd.vv v10, v11, v10 +; LMULMAX1-RV64-NEXT: vmulh.vv v9, v8, v9 +; LMULMAX1-RV64-NEXT: vmacc.vv v9, v8, v13 +; LMULMAX1-RV64-NEXT: vsrl.vx v8, v9, a1 +; LMULMAX1-RV64-NEXT: vsra.vv v9, v9, v12 +; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) +; LMULMAX1-RV64-NEXT: vse64.v v10, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = sdiv <4 x i64> %a, @@ -7405,14 +7342,8 @@ define void @mulhu_vx_v2i64(<2 x i64>* %x) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: lui a1, 1026731 -; RV64-NEXT: addiw a1, a1, -1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1365 +; RV64-NEXT: lui a1, %hi(.LCPI265_0) +; RV64-NEXT: ld a1, %lo(.LCPI265_0)(a1) ; RV64-NEXT: vmulhu.vx v8, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 1 ; RV64-NEXT: vse64.v v8, (a0) @@ -7529,14 +7460,8 @@ define void @mulhs_vx_v2i64(<2 x i64>* %x) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: lui a1, 21845 -; RV64-NEXT: addiw a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1366 +; RV64-NEXT: lui a1, %hi(.LCPI269_0) +; RV64-NEXT: ld a1, %lo(.LCPI269_0)(a1) ; RV64-NEXT: vmulh.vx v8, v8, a1 ; RV64-NEXT: li a1, 63 ; RV64-NEXT: vsrl.vx v9, v8, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll index e450a5e81548..f4294c78713f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -469,14 +469,8 @@ define <64 x i1> @buildvec_mask_v64i1() { ; ; RV64-LMULMAX4-LABEL: buildvec_mask_v64i1: ; RV64-LMULMAX4: # %bb.0: -; RV64-LMULMAX4-NEXT: lui a0, 1048429 -; RV64-LMULMAX4-NEXT: addiw a0, a0, 1735 -; RV64-LMULMAX4-NEXT: slli a0, a0, 13 -; RV64-LMULMAX4-NEXT: addi a0, a0, 1023 -; RV64-LMULMAX4-NEXT: slli a0, a0, 13 -; RV64-LMULMAX4-NEXT: addi a0, a0, -1189 -; RV64-LMULMAX4-NEXT: slli a0, a0, 17 -; RV64-LMULMAX4-NEXT: addi a0, a0, 1776 +; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI19_0) +; RV64-LMULMAX4-NEXT: ld a0, %lo(.LCPI19_0)(a0) ; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; RV64-LMULMAX4-NEXT: vmv.s.x v0, a0 ; RV64-LMULMAX4-NEXT: ret @@ -496,14 +490,8 @@ define <64 x i1> @buildvec_mask_v64i1() { ; ; RV64-LMULMAX8-LABEL: buildvec_mask_v64i1: ; RV64-LMULMAX8: # %bb.0: -; RV64-LMULMAX8-NEXT: lui a0, 1048429 -; RV64-LMULMAX8-NEXT: addiw a0, a0, 1735 -; RV64-LMULMAX8-NEXT: slli a0, a0, 13 -; RV64-LMULMAX8-NEXT: addi a0, a0, 1023 -; RV64-LMULMAX8-NEXT: slli a0, a0, 13 -; RV64-LMULMAX8-NEXT: addi a0, a0, -1189 -; RV64-LMULMAX8-NEXT: slli a0, a0, 17 -; RV64-LMULMAX8-NEXT: addi a0, a0, 1776 +; RV64-LMULMAX8-NEXT: lui a0, %hi(.LCPI19_0) +; RV64-LMULMAX8-NEXT: ld a0, %lo(.LCPI19_0)(a0) ; RV64-LMULMAX8-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; RV64-LMULMAX8-NEXT: vmv.s.x v0, a0 ; RV64-LMULMAX8-NEXT: ret @@ -613,23 +601,13 @@ define <128 x i1> @buildvec_mask_v128i1() { ; ; RV64-LMULMAX4-LABEL: buildvec_mask_v128i1: ; RV64-LMULMAX4: # %bb.0: -; RV64-LMULMAX4-NEXT: lui a0, 841543 -; RV64-LMULMAX4-NEXT: addiw a0, a0, 511 -; RV64-LMULMAX4-NEXT: slli a0, a0, 14 -; RV64-LMULMAX4-NEXT: addi a0, a0, 859 -; RV64-LMULMAX4-NEXT: slli a0, a0, 17 -; RV64-LMULMAX4-NEXT: addi a0, a0, 1776 +; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI20_0) +; RV64-LMULMAX4-NEXT: ld a0, %lo(.LCPI20_0)(a0) +; RV64-LMULMAX4-NEXT: lui a1, %hi(.LCPI20_1) +; RV64-LMULMAX4-NEXT: ld a1, %lo(.LCPI20_1)(a1) ; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV64-LMULMAX4-NEXT: vmv.s.x v8, a0 -; RV64-LMULMAX4-NEXT: lui a0, 1048429 -; RV64-LMULMAX4-NEXT: addiw a0, a0, 1735 -; RV64-LMULMAX4-NEXT: slli a0, a0, 13 -; RV64-LMULMAX4-NEXT: addi a0, a0, 1023 -; RV64-LMULMAX4-NEXT: slli a0, a0, 13 -; RV64-LMULMAX4-NEXT: addi a0, a0, -1189 -; RV64-LMULMAX4-NEXT: slli a0, a0, 17 -; RV64-LMULMAX4-NEXT: addi a0, a0, 1776 ; RV64-LMULMAX4-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX4-NEXT: vmv.s.x v8, a1 ; RV64-LMULMAX4-NEXT: ret ; ; RV32-LMULMAX8-LABEL: buildvec_mask_v128i1: @@ -659,23 +637,13 @@ define <128 x i1> @buildvec_mask_v128i1() { ; ; RV64-LMULMAX8-LABEL: buildvec_mask_v128i1: ; RV64-LMULMAX8: # %bb.0: -; RV64-LMULMAX8-NEXT: lui a0, 841543 -; RV64-LMULMAX8-NEXT: addiw a0, a0, 511 -; RV64-LMULMAX8-NEXT: slli a0, a0, 14 -; RV64-LMULMAX8-NEXT: addi a0, a0, 859 -; RV64-LMULMAX8-NEXT: slli a0, a0, 17 -; RV64-LMULMAX8-NEXT: addi a0, a0, 1776 +; RV64-LMULMAX8-NEXT: lui a0, %hi(.LCPI20_0) +; RV64-LMULMAX8-NEXT: ld a0, %lo(.LCPI20_0)(a0) +; RV64-LMULMAX8-NEXT: lui a1, %hi(.LCPI20_1) +; RV64-LMULMAX8-NEXT: ld a1, %lo(.LCPI20_1)(a1) ; RV64-LMULMAX8-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-LMULMAX8-NEXT: vmv.s.x v8, a0 -; RV64-LMULMAX8-NEXT: lui a0, 1048429 -; RV64-LMULMAX8-NEXT: addiw a0, a0, 1735 -; RV64-LMULMAX8-NEXT: slli a0, a0, 13 -; RV64-LMULMAX8-NEXT: addi a0, a0, 1023 -; RV64-LMULMAX8-NEXT: slli a0, a0, 13 -; RV64-LMULMAX8-NEXT: addi a0, a0, -1189 -; RV64-LMULMAX8-NEXT: slli a0, a0, 17 -; RV64-LMULMAX8-NEXT: addi a0, a0, 1776 -; RV64-LMULMAX8-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX8-NEXT: vmv.s.x v0, a1 ; RV64-LMULMAX8-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; RV64-LMULMAX8-NEXT: vslideup.vi v0, v8, 1 ; RV64-LMULMAX8-NEXT: ret @@ -775,23 +743,13 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize { ; ; RV64-LMULMAX4-LABEL: buildvec_mask_optsize_v128i1: ; RV64-LMULMAX4: # %bb.0: -; RV64-LMULMAX4-NEXT: lui a0, 841543 -; RV64-LMULMAX4-NEXT: addiw a0, a0, 511 -; RV64-LMULMAX4-NEXT: slli a0, a0, 14 -; RV64-LMULMAX4-NEXT: addi a0, a0, 859 -; RV64-LMULMAX4-NEXT: slli a0, a0, 17 -; RV64-LMULMAX4-NEXT: addi a0, a0, 1776 +; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI21_0) +; RV64-LMULMAX4-NEXT: ld a0, %lo(.LCPI21_0)(a0) +; RV64-LMULMAX4-NEXT: lui a1, %hi(.LCPI21_1) +; RV64-LMULMAX4-NEXT: ld a1, %lo(.LCPI21_1)(a1) ; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV64-LMULMAX4-NEXT: vmv.s.x v8, a0 -; RV64-LMULMAX4-NEXT: lui a0, 1048429 -; RV64-LMULMAX4-NEXT: addiw a0, a0, 1735 -; RV64-LMULMAX4-NEXT: slli a0, a0, 13 -; RV64-LMULMAX4-NEXT: addi a0, a0, 1023 -; RV64-LMULMAX4-NEXT: slli a0, a0, 13 -; RV64-LMULMAX4-NEXT: addi a0, a0, -1189 -; RV64-LMULMAX4-NEXT: slli a0, a0, 17 -; RV64-LMULMAX4-NEXT: addi a0, a0, 1776 ; RV64-LMULMAX4-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX4-NEXT: vmv.s.x v8, a1 ; RV64-LMULMAX4-NEXT: ret ; ; RV32-LMULMAX8-LABEL: buildvec_mask_optsize_v128i1: diff --git a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll index 4bd06da73d6f..ba957ff654f9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll @@ -27,16 +27,10 @@ define void @interleave256(<256 x i16>* %agg.result, <128 x i16>* %0, <128 x i16 ; RV64-1024-NEXT: vslideup.vi v8, v16, 0 ; RV64-1024-NEXT: vsetvli zero, a1, e16, m4, tu, mu ; RV64-1024-NEXT: vslideup.vx v8, v24, a3 +; RV64-1024-NEXT: lui a2, %hi(.LCPI0_0) +; RV64-1024-NEXT: ld a2, %lo(.LCPI0_0)(a2) ; RV64-1024-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; RV64-1024-NEXT: vrgather.vv v16, v0, v28 -; RV64-1024-NEXT: lui a2, 1026731 -; RV64-1024-NEXT: addiw a2, a2, -1365 -; RV64-1024-NEXT: slli a2, a2, 12 -; RV64-1024-NEXT: addi a2, a2, -1365 -; RV64-1024-NEXT: slli a2, a2, 12 -; RV64-1024-NEXT: addi a2, a2, -1365 -; RV64-1024-NEXT: slli a2, a2, 12 -; RV64-1024-NEXT: addi a2, a2, -1366 ; RV64-1024-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; RV64-1024-NEXT: vmv.s.x v20, a2 ; RV64-1024-NEXT: vsetivli zero, 2, e64, m1, tu, mu @@ -64,39 +58,33 @@ define void @interleave256(<256 x i16>* %agg.result, <128 x i16>* %0, <128 x i16 ; RV64-2048-NEXT: vmv2r.v v14, v8 ; RV64-2048-NEXT: vslideup.vi v14, v10, 0 ; RV64-2048-NEXT: vsetvli zero, a3, e16, m1, ta, mu -; RV64-2048-NEXT: vmv.v.i v16, 0 +; RV64-2048-NEXT: vmv.v.i v10, 0 ; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; RV64-2048-NEXT: vslideup.vx v14, v16, a3 +; RV64-2048-NEXT: vslideup.vx v14, v10, a3 ; RV64-2048-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64-2048-NEXT: vid.v v18 -; RV64-2048-NEXT: vsrl.vi v10, v18, 1 -; RV64-2048-NEXT: vrgather.vv v20, v14, v10 +; RV64-2048-NEXT: vid.v v16 +; RV64-2048-NEXT: vsrl.vi v18, v16, 1 +; RV64-2048-NEXT: vrgather.vv v20, v14, v18 ; RV64-2048-NEXT: vsetvli zero, a3, e16, m2, tu, mu ; RV64-2048-NEXT: vslideup.vi v8, v12, 0 ; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; RV64-2048-NEXT: vslideup.vx v8, v16, a3 +; RV64-2048-NEXT: vslideup.vx v8, v10, a3 +; RV64-2048-NEXT: lui a2, %hi(.LCPI0_0) +; RV64-2048-NEXT: ld a2, %lo(.LCPI0_0)(a2) ; RV64-2048-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64-2048-NEXT: vrgather.vv v12, v20, v18 -; RV64-2048-NEXT: lui a2, 1026731 -; RV64-2048-NEXT: addiw a2, a2, -1365 -; RV64-2048-NEXT: slli a2, a2, 12 -; RV64-2048-NEXT: addi a2, a2, -1365 -; RV64-2048-NEXT: slli a2, a2, 12 -; RV64-2048-NEXT: addi a2, a2, -1365 -; RV64-2048-NEXT: slli a2, a2, 12 -; RV64-2048-NEXT: addi a2, a2, -1366 +; RV64-2048-NEXT: vrgather.vv v10, v20, v16 ; RV64-2048-NEXT: vsetivli zero, 4, e64, m1, ta, mu -; RV64-2048-NEXT: vmv.s.x v14, a2 +; RV64-2048-NEXT: vmv.s.x v12, a2 ; RV64-2048-NEXT: vsetivli zero, 2, e64, m1, tu, mu -; RV64-2048-NEXT: vmv1r.v v0, v14 -; RV64-2048-NEXT: vslideup.vi v0, v14, 1 +; RV64-2048-NEXT: vmv1r.v v0, v12 +; RV64-2048-NEXT: vslideup.vi v0, v12, 1 ; RV64-2048-NEXT: vsetivli zero, 3, e64, m1, tu, mu -; RV64-2048-NEXT: vslideup.vi v0, v14, 2 +; RV64-2048-NEXT: vslideup.vi v0, v12, 2 ; RV64-2048-NEXT: vsetivli zero, 4, e64, m1, tu, mu -; RV64-2048-NEXT: vslideup.vi v0, v14, 3 +; RV64-2048-NEXT: vslideup.vi v0, v12, 3 ; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; RV64-2048-NEXT: vrgather.vv v12, v8, v10, v0.t -; RV64-2048-NEXT: vse16.v v12, (a0) +; RV64-2048-NEXT: vrgather.vv v10, v8, v18, v0.t +; RV64-2048-NEXT: vse16.v v10, (a0) ; RV64-2048-NEXT: ret entry: %ve = load <128 x i16>, <128 x i16>* %0, align 256 @@ -119,10 +107,11 @@ define void @interleave512(<512 x i16>* %agg.result, <256 x i16>* %0, <256 x i16 ; RV64-1024-NEXT: sub sp, sp, a3 ; RV64-1024-NEXT: li a3, 256 ; RV64-1024-NEXT: vsetvli zero, a3, e16, m4, ta, mu -; RV64-1024-NEXT: vle16.v v16, (a1) +; RV64-1024-NEXT: vle16.v v24, (a1) ; RV64-1024-NEXT: vle16.v v8, (a2) ; RV64-1024-NEXT: csrr a1, vlenb -; RV64-1024-NEXT: slli a1, a1, 4 +; RV64-1024-NEXT: li a2, 24 +; RV64-1024-NEXT: mul a1, a1, a2 ; RV64-1024-NEXT: add a1, sp, a1 ; RV64-1024-NEXT: addi a1, a1, 16 ; RV64-1024-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill @@ -130,12 +119,12 @@ define void @interleave512(<512 x i16>* %agg.result, <256 x i16>* %0, <256 x i16 ; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; RV64-1024-NEXT: vmv.v.i v8, 0 ; RV64-1024-NEXT: csrr a2, vlenb -; RV64-1024-NEXT: slli a2, a2, 5 +; RV64-1024-NEXT: slli a2, a2, 4 ; RV64-1024-NEXT: add a2, sp, a2 ; RV64-1024-NEXT: addi a2, a2, 16 ; RV64-1024-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV64-1024-NEXT: vsetvli zero, a3, e16, m8, tu, mu -; RV64-1024-NEXT: vslideup.vi v8, v16, 0 +; RV64-1024-NEXT: vslideup.vi v8, v24, 0 ; RV64-1024-NEXT: vsetvli zero, a3, e16, m4, ta, mu ; RV64-1024-NEXT: vmv.v.i v16, 0 ; RV64-1024-NEXT: addi a2, sp, 16 @@ -146,8 +135,7 @@ define void @interleave512(<512 x i16>* %agg.result, <256 x i16>* %0, <256 x i16 ; RV64-1024-NEXT: vid.v v24 ; RV64-1024-NEXT: vsrl.vi v16, v24, 1 ; RV64-1024-NEXT: csrr a2, vlenb -; RV64-1024-NEXT: li a4, 24 -; RV64-1024-NEXT: mul a2, a2, a4 +; RV64-1024-NEXT: slli a2, a2, 5 ; RV64-1024-NEXT: add a2, sp, a2 ; RV64-1024-NEXT: addi a2, a2, 16 ; RV64-1024-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill @@ -159,70 +147,54 @@ define void @interleave512(<512 x i16>* %agg.result, <256 x i16>* %0, <256 x i16 ; RV64-1024-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill ; RV64-1024-NEXT: vsetvli zero, a3, e16, m8, tu, mu ; RV64-1024-NEXT: csrr a2, vlenb -; RV64-1024-NEXT: slli a2, a2, 5 -; RV64-1024-NEXT: add a2, sp, a2 -; RV64-1024-NEXT: addi a2, a2, 16 -; RV64-1024-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload -; RV64-1024-NEXT: csrr a2, vlenb ; RV64-1024-NEXT: slli a2, a2, 4 ; RV64-1024-NEXT: add a2, sp, a2 ; RV64-1024-NEXT: addi a2, a2, 16 ; RV64-1024-NEXT: vl8re8.v v16, (a2) # Unknown-size Folded Reload -; RV64-1024-NEXT: vslideup.vi v8, v16, 0 -; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, tu, mu -; RV64-1024-NEXT: addi a2, sp, 16 -; RV64-1024-NEXT: vl8re8.v v16, (a2) # Unknown-size Folded Reload -; RV64-1024-NEXT: vslideup.vx v8, v16, a3 ; RV64-1024-NEXT: csrr a2, vlenb -; RV64-1024-NEXT: slli a2, a2, 5 +; RV64-1024-NEXT: li a4, 24 +; RV64-1024-NEXT: mul a2, a2, a4 ; RV64-1024-NEXT: add a2, sp, a2 ; RV64-1024-NEXT: addi a2, a2, 16 -; RV64-1024-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-1024-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV64-1024-NEXT: vslideup.vi v16, v8, 0 +; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; RV64-1024-NEXT: addi a2, sp, 16 +; RV64-1024-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV64-1024-NEXT: vslideup.vx v16, v8, a3 +; RV64-1024-NEXT: lui a2, %hi(.LCPI1_0) +; RV64-1024-NEXT: ld a2, %lo(.LCPI1_0)(a2) ; RV64-1024-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; RV64-1024-NEXT: csrr a2, vlenb -; RV64-1024-NEXT: slli a2, a2, 3 -; RV64-1024-NEXT: add a2, sp, a2 -; RV64-1024-NEXT: addi a2, a2, 16 -; RV64-1024-NEXT: vl8re8.v v16, (a2) # Unknown-size Folded Reload -; RV64-1024-NEXT: vrgather.vv v8, v16, v24 -; RV64-1024-NEXT: lui a2, 1026731 -; RV64-1024-NEXT: addiw a2, a2, -1365 -; RV64-1024-NEXT: slli a2, a2, 12 -; RV64-1024-NEXT: addi a2, a2, -1365 -; RV64-1024-NEXT: slli a2, a2, 12 -; RV64-1024-NEXT: addi a2, a2, -1365 -; RV64-1024-NEXT: slli a2, a2, 12 -; RV64-1024-NEXT: addi a2, a2, -1366 +; RV64-1024-NEXT: csrr a3, vlenb +; RV64-1024-NEXT: slli a3, a3, 3 +; RV64-1024-NEXT: add a3, sp, a3 +; RV64-1024-NEXT: addi a3, a3, 16 +; RV64-1024-NEXT: vl8re8.v v0, (a3) # Unknown-size Folded Reload +; RV64-1024-NEXT: vrgather.vv v8, v0, v24 ; RV64-1024-NEXT: vsetivli zero, 8, e64, m1, ta, mu -; RV64-1024-NEXT: vmv.s.x v16, a2 +; RV64-1024-NEXT: vmv.s.x v24, a2 ; RV64-1024-NEXT: vsetivli zero, 2, e64, m1, tu, mu -; RV64-1024-NEXT: vmv1r.v v0, v16 -; RV64-1024-NEXT: vslideup.vi v0, v16, 1 +; RV64-1024-NEXT: vmv1r.v v0, v24 +; RV64-1024-NEXT: vslideup.vi v0, v24, 1 ; RV64-1024-NEXT: vsetivli zero, 3, e64, m1, tu, mu -; RV64-1024-NEXT: vslideup.vi v0, v16, 2 +; RV64-1024-NEXT: vslideup.vi v0, v24, 2 ; RV64-1024-NEXT: vsetivli zero, 4, e64, m1, tu, mu -; RV64-1024-NEXT: vslideup.vi v0, v16, 3 +; RV64-1024-NEXT: vslideup.vi v0, v24, 3 ; RV64-1024-NEXT: vsetivli zero, 5, e64, m1, tu, mu -; RV64-1024-NEXT: vslideup.vi v0, v16, 4 +; RV64-1024-NEXT: vslideup.vi v0, v24, 4 ; RV64-1024-NEXT: vsetivli zero, 6, e64, m1, tu, mu -; RV64-1024-NEXT: vslideup.vi v0, v16, 5 +; RV64-1024-NEXT: vslideup.vi v0, v24, 5 ; RV64-1024-NEXT: vsetivli zero, 7, e64, m1, tu, mu -; RV64-1024-NEXT: vslideup.vi v0, v16, 6 +; RV64-1024-NEXT: vslideup.vi v0, v24, 6 ; RV64-1024-NEXT: vsetivli zero, 8, e64, m1, tu, mu -; RV64-1024-NEXT: vslideup.vi v0, v16, 7 +; RV64-1024-NEXT: vslideup.vi v0, v24, 7 ; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; RV64-1024-NEXT: csrr a1, vlenb -; RV64-1024-NEXT: li a2, 24 -; RV64-1024-NEXT: mul a1, a1, a2 -; RV64-1024-NEXT: add a1, sp, a1 -; RV64-1024-NEXT: addi a1, a1, 16 -; RV64-1024-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; RV64-1024-NEXT: csrr a1, vlenb ; RV64-1024-NEXT: slli a1, a1, 5 ; RV64-1024-NEXT: add a1, sp, a1 ; RV64-1024-NEXT: addi a1, a1, 16 ; RV64-1024-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload -; RV64-1024-NEXT: vrgather.vv v8, v24, v16, v0.t +; RV64-1024-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV64-1024-NEXT: vse16.v v8, (a0) ; RV64-1024-NEXT: csrr a0, vlenb ; RV64-1024-NEXT: li a1, 40 @@ -255,16 +227,10 @@ define void @interleave512(<512 x i16>* %agg.result, <256 x i16>* %0, <256 x i16 ; RV64-2048-NEXT: vslideup.vi v8, v16, 0 ; RV64-2048-NEXT: vsetvli zero, a1, e16, m4, tu, mu ; RV64-2048-NEXT: vslideup.vx v8, v24, a3 +; RV64-2048-NEXT: lui a2, %hi(.LCPI1_0) +; RV64-2048-NEXT: ld a2, %lo(.LCPI1_0)(a2) ; RV64-2048-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; RV64-2048-NEXT: vrgather.vv v16, v0, v28 -; RV64-2048-NEXT: lui a2, 1026731 -; RV64-2048-NEXT: addiw a2, a2, -1365 -; RV64-2048-NEXT: slli a2, a2, 12 -; RV64-2048-NEXT: addi a2, a2, -1365 -; RV64-2048-NEXT: slli a2, a2, 12 -; RV64-2048-NEXT: addi a2, a2, -1365 -; RV64-2048-NEXT: slli a2, a2, 12 -; RV64-2048-NEXT: addi a2, a2, -1366 ; RV64-2048-NEXT: vsetivli zero, 8, e64, m1, ta, mu ; RV64-2048-NEXT: vmv.s.x v20, a2 ; RV64-2048-NEXT: vsetivli zero, 2, e64, m1, tu, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll index 6273a55eb97b..6adaa2476659 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll @@ -918,14 +918,8 @@ define @vdiv_vi_nxv1i64_0( %va) { ; ; RV64-LABEL: vdiv_vi_nxv1i64_0: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1029851 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 +; RV64-NEXT: lui a0, %hi(.LCPI58_0) +; RV64-NEXT: ld a0, %lo(.LCPI58_0)(a0) ; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu ; RV64-NEXT: vmulh.vx v8, v8, a0 ; RV64-NEXT: li a0, 63 @@ -998,14 +992,8 @@ define @vdiv_vi_nxv2i64_0( %va) { ; ; RV64-LABEL: vdiv_vi_nxv2i64_0: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1029851 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 +; RV64-NEXT: lui a0, %hi(.LCPI61_0) +; RV64-NEXT: ld a0, %lo(.LCPI61_0)(a0) ; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu ; RV64-NEXT: vmulh.vx v8, v8, a0 ; RV64-NEXT: li a0, 63 @@ -1078,14 +1066,8 @@ define @vdiv_vi_nxv4i64_0( %va) { ; ; RV64-LABEL: vdiv_vi_nxv4i64_0: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1029851 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 +; RV64-NEXT: lui a0, %hi(.LCPI64_0) +; RV64-NEXT: ld a0, %lo(.LCPI64_0)(a0) ; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu ; RV64-NEXT: vmulh.vx v8, v8, a0 ; RV64-NEXT: li a0, 63 @@ -1158,14 +1140,8 @@ define @vdiv_vi_nxv8i64_0( %va) { ; ; RV64-LABEL: vdiv_vi_nxv8i64_0: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1029851 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 +; RV64-NEXT: lui a0, %hi(.LCPI67_0) +; RV64-NEXT: ld a0, %lo(.LCPI67_0)(a0) ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vmulh.vx v8, v8, a0 ; RV64-NEXT: li a0, 63 diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll index e77aeed9b77d..c58c3025050e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll @@ -954,14 +954,8 @@ define @vrem_vi_nxv1i64_0( %va) { ; ; RV64-LABEL: vrem_vi_nxv1i64_0: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1029851 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 +; RV64-NEXT: lui a0, %hi(.LCPI56_0) +; RV64-NEXT: ld a0, %lo(.LCPI56_0)(a0) ; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu ; RV64-NEXT: vmulh.vx v9, v8, a0 ; RV64-NEXT: li a0, 63 @@ -1038,14 +1032,8 @@ define @vrem_vi_nxv2i64_0( %va) { ; ; RV64-LABEL: vrem_vi_nxv2i64_0: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1029851 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 +; RV64-NEXT: lui a0, %hi(.LCPI59_0) +; RV64-NEXT: ld a0, %lo(.LCPI59_0)(a0) ; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu ; RV64-NEXT: vmulh.vx v10, v8, a0 ; RV64-NEXT: li a0, 63 @@ -1122,14 +1110,8 @@ define @vrem_vi_nxv4i64_0( %va) { ; ; RV64-LABEL: vrem_vi_nxv4i64_0: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1029851 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 +; RV64-NEXT: lui a0, %hi(.LCPI62_0) +; RV64-NEXT: ld a0, %lo(.LCPI62_0)(a0) ; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu ; RV64-NEXT: vmulh.vx v12, v8, a0 ; RV64-NEXT: li a0, 63 @@ -1206,14 +1188,8 @@ define @vrem_vi_nxv8i64_0( %va) { ; ; RV64-LABEL: vrem_vi_nxv8i64_0: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1029851 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 +; RV64-NEXT: lui a0, %hi(.LCPI65_0) +; RV64-NEXT: ld a0, %lo(.LCPI65_0)(a0) ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vmulh.vx v16, v8, a0 ; RV64-NEXT: li a0, 63 diff --git a/llvm/test/CodeGen/RISCV/srem-lkk.ll b/llvm/test/CodeGen/RISCV/srem-lkk.ll index da5900208a24..a1a9a06d42b1 100644 --- a/llvm/test/CodeGen/RISCV/srem-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-lkk.ll @@ -439,14 +439,8 @@ define i64 @dont_fold_srem_i64(i64 %x) nounwind { ; ; RV64IM-LABEL: dont_fold_srem_i64: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, 2675 -; RV64IM-NEXT: addiw a1, a1, -251 -; RV64IM-NEXT: slli a1, a1, 13 -; RV64IM-NEXT: addi a1, a1, 1839 -; RV64IM-NEXT: slli a1, a1, 13 -; RV64IM-NEXT: addi a1, a1, 167 -; RV64IM-NEXT: slli a1, a1, 13 -; RV64IM-NEXT: addi a1, a1, 1505 +; RV64IM-NEXT: lui a1, %hi(.LCPI8_0) +; RV64IM-NEXT: ld a1, %lo(.LCPI8_0)(a1) ; RV64IM-NEXT: mulh a1, a0, a1 ; RV64IM-NEXT: srli a2, a1, 63 ; RV64IM-NEXT: srai a1, a1, 5 diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index 361fd88afbc6..24a7c78d2666 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -162,16 +162,10 @@ define i1 @test_srem_even(i4 %X) nounwind { ; ; RV64M-LABEL: test_srem_even: ; RV64M: # %bb.0: +; RV64M-NEXT: lui a1, %hi(.LCPI1_0) +; RV64M-NEXT: ld a1, %lo(.LCPI1_0)(a1) ; RV64M-NEXT: slli a0, a0, 60 ; RV64M-NEXT: srai a0, a0, 60 -; RV64M-NEXT: lui a1, 10923 -; RV64M-NEXT: addiw a1, a1, -1365 -; RV64M-NEXT: slli a1, a1, 12 -; RV64M-NEXT: addi a1, a1, -1365 -; RV64M-NEXT: slli a1, a1, 12 -; RV64M-NEXT: addi a1, a1, -1365 -; RV64M-NEXT: slli a1, a1, 12 -; RV64M-NEXT: addi a1, a1, -1365 ; RV64M-NEXT: mulh a1, a0, a1 ; RV64M-NEXT: srli a2, a1, 63 ; RV64M-NEXT: add a1, a1, a2 @@ -200,16 +194,10 @@ define i1 @test_srem_even(i4 %X) nounwind { ; ; RV64MV-LABEL: test_srem_even: ; RV64MV: # %bb.0: +; RV64MV-NEXT: lui a1, %hi(.LCPI1_0) +; RV64MV-NEXT: ld a1, %lo(.LCPI1_0)(a1) ; RV64MV-NEXT: slli a0, a0, 60 ; RV64MV-NEXT: srai a0, a0, 60 -; RV64MV-NEXT: lui a1, 10923 -; RV64MV-NEXT: addiw a1, a1, -1365 -; RV64MV-NEXT: slli a1, a1, 12 -; RV64MV-NEXT: addi a1, a1, -1365 -; RV64MV-NEXT: slli a1, a1, 12 -; RV64MV-NEXT: addi a1, a1, -1365 -; RV64MV-NEXT: slli a1, a1, 12 -; RV64MV-NEXT: addi a1, a1, -1365 ; RV64MV-NEXT: mulh a1, a0, a1 ; RV64MV-NEXT: srli a2, a1, 63 ; RV64MV-NEXT: add a1, a1, a2 @@ -426,24 +414,12 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind { ; RV64-NEXT: mv a0, s1 ; RV64-NEXT: call __moddi3@plt ; RV64-NEXT: mv s1, a0 -; RV64-NEXT: lui a0, 1026731 -; RV64-NEXT: addiw a0, a0, -1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a1, a0, -1365 +; RV64-NEXT: lui a0, %hi(.LCPI3_0) +; RV64-NEXT: ld a1, %lo(.LCPI3_0)(a0) ; RV64-NEXT: mv a0, s2 ; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lui a1, 10923 -; RV64-NEXT: addiw a1, a1, -1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1366 +; RV64-NEXT: lui a1, %hi(.LCPI3_1) +; RV64-NEXT: ld a1, %lo(.LCPI3_1)(a1) ; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: slli a2, a0, 63 ; RV64-NEXT: srli a0, a0, 1 @@ -567,44 +543,32 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind { ; RV64M-NEXT: lb a1, 12(a0) ; RV64M-NEXT: lwu a2, 8(a0) ; RV64M-NEXT: slli a1, a1, 32 -; RV64M-NEXT: or a2, a2, a1 +; RV64M-NEXT: or a1, a2, a1 ; RV64M-NEXT: li a6, -1 ; RV64M-NEXT: srli a3, a6, 24 -; RV64M-NEXT: and a2, a2, a3 +; RV64M-NEXT: and a1, a1, a3 ; RV64M-NEXT: ld a3, 0(a0) -; RV64M-NEXT: slli a4, a2, 29 +; RV64M-NEXT: slli a4, a1, 29 ; RV64M-NEXT: srai a4, a4, 31 -; RV64M-NEXT: slli a2, a2, 31 +; RV64M-NEXT: slli a1, a1, 31 ; RV64M-NEXT: srli a5, a3, 33 -; RV64M-NEXT: or a2, a5, a2 -; RV64M-NEXT: slli a2, a2, 31 -; RV64M-NEXT: srai a2, a2, 31 +; RV64M-NEXT: lui a2, %hi(.LCPI3_0) +; RV64M-NEXT: ld a2, %lo(.LCPI3_0)(a2) +; RV64M-NEXT: or a1, a5, a1 +; RV64M-NEXT: slli a1, a1, 31 +; RV64M-NEXT: srai a1, a1, 31 +; RV64M-NEXT: mulh a2, a1, a2 +; RV64M-NEXT: srli a5, a2, 63 +; RV64M-NEXT: srai a2, a2, 1 +; RV64M-NEXT: add a2, a2, a5 +; RV64M-NEXT: slli a5, a2, 3 +; RV64M-NEXT: sub a2, a2, a5 +; RV64M-NEXT: lui a5, %hi(.LCPI3_1) +; RV64M-NEXT: ld a5, %lo(.LCPI3_1)(a5) ; RV64M-NEXT: slli a3, a3, 31 ; RV64M-NEXT: srai a3, a3, 31 -; RV64M-NEXT: lui a5, 18725 -; RV64M-NEXT: addiw a5, a5, -1755 -; RV64M-NEXT: slli a5, a5, 12 -; RV64M-NEXT: addi a5, a5, -1755 -; RV64M-NEXT: slli a5, a5, 12 -; RV64M-NEXT: addi a5, a5, -1755 -; RV64M-NEXT: slli a5, a5, 12 -; RV64M-NEXT: addi a5, a5, -1755 -; RV64M-NEXT: mulh a5, a2, a5 -; RV64M-NEXT: srli a1, a5, 63 -; RV64M-NEXT: srai a5, a5, 1 -; RV64M-NEXT: add a1, a5, a1 -; RV64M-NEXT: slli a5, a1, 3 -; RV64M-NEXT: sub a1, a1, a5 -; RV64M-NEXT: add a1, a2, a1 -; RV64M-NEXT: lui a2, 1035469 -; RV64M-NEXT: addiw a2, a2, -819 -; RV64M-NEXT: slli a2, a2, 12 -; RV64M-NEXT: addi a2, a2, -819 -; RV64M-NEXT: slli a2, a2, 12 -; RV64M-NEXT: addi a2, a2, -819 -; RV64M-NEXT: slli a2, a2, 13 -; RV64M-NEXT: addi a2, a2, -1639 -; RV64M-NEXT: mulh a2, a4, a2 +; RV64M-NEXT: add a1, a1, a2 +; RV64M-NEXT: mulh a2, a4, a5 ; RV64M-NEXT: srli a5, a2, 63 ; RV64M-NEXT: srai a2, a2, 1 ; RV64M-NEXT: add a2, a2, a5 @@ -613,30 +577,18 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind { ; RV64M-NEXT: add a2, a4, a2 ; RV64M-NEXT: addi a2, a2, -2 ; RV64M-NEXT: snez a2, a2 +; RV64M-NEXT: lui a4, %hi(.LCPI3_2) +; RV64M-NEXT: ld a4, %lo(.LCPI3_2)(a4) +; RV64M-NEXT: lui a5, %hi(.LCPI3_3) +; RV64M-NEXT: ld a5, %lo(.LCPI3_3)(a5) ; RV64M-NEXT: addi a1, a1, -1 ; RV64M-NEXT: snez a1, a1 -; RV64M-NEXT: lui a4, 1026731 -; RV64M-NEXT: addiw a4, a4, -1365 -; RV64M-NEXT: slli a4, a4, 12 -; RV64M-NEXT: addi a4, a4, -1365 -; RV64M-NEXT: slli a4, a4, 12 -; RV64M-NEXT: addi a4, a4, -1365 -; RV64M-NEXT: slli a4, a4, 12 -; RV64M-NEXT: addi a4, a4, -1365 ; RV64M-NEXT: mul a3, a3, a4 -; RV64M-NEXT: lui a4, 10923 -; RV64M-NEXT: addiw a4, a4, -1365 -; RV64M-NEXT: slli a4, a4, 12 -; RV64M-NEXT: addi a4, a4, -1365 -; RV64M-NEXT: slli a4, a4, 12 -; RV64M-NEXT: addi a4, a4, -1365 -; RV64M-NEXT: slli a4, a4, 12 -; RV64M-NEXT: addi a4, a4, -1366 -; RV64M-NEXT: add a3, a3, a4 -; RV64M-NEXT: slli a5, a3, 63 +; RV64M-NEXT: add a3, a3, a5 +; RV64M-NEXT: slli a4, a3, 63 ; RV64M-NEXT: srli a3, a3, 1 -; RV64M-NEXT: or a3, a3, a5 -; RV64M-NEXT: sltu a3, a4, a3 +; RV64M-NEXT: or a3, a3, a4 +; RV64M-NEXT: sltu a3, a5, a3 ; RV64M-NEXT: neg a1, a1 ; RV64M-NEXT: neg a4, a2 ; RV64M-NEXT: neg a3, a3 @@ -771,60 +723,42 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind { ; RV64MV-NEXT: lb a1, 12(a0) ; RV64MV-NEXT: lwu a2, 8(a0) ; RV64MV-NEXT: slli a1, a1, 32 -; RV64MV-NEXT: or a2, a2, a1 +; RV64MV-NEXT: or a1, a2, a1 ; RV64MV-NEXT: li a6, -1 ; RV64MV-NEXT: ld a3, 0(a0) ; RV64MV-NEXT: srli a4, a6, 24 -; RV64MV-NEXT: and a2, a2, a4 -; RV64MV-NEXT: slli a4, a2, 31 +; RV64MV-NEXT: and a1, a1, a4 +; RV64MV-NEXT: slli a4, a1, 31 ; RV64MV-NEXT: srli a5, a3, 33 ; RV64MV-NEXT: or a4, a5, a4 ; RV64MV-NEXT: slli a4, a4, 31 ; RV64MV-NEXT: srai a4, a4, 31 -; RV64MV-NEXT: slli a2, a2, 29 -; RV64MV-NEXT: srai a2, a2, 31 +; RV64MV-NEXT: lui a5, %hi(.LCPI3_0) +; RV64MV-NEXT: ld a5, %lo(.LCPI3_0)(a5) +; RV64MV-NEXT: slli a1, a1, 29 ; RV64MV-NEXT: slli a3, a3, 31 ; RV64MV-NEXT: srai a3, a3, 31 -; RV64MV-NEXT: lui a5, 10923 -; RV64MV-NEXT: addiw a5, a5, -1365 -; RV64MV-NEXT: slli a5, a5, 12 -; RV64MV-NEXT: addi a5, a5, -1365 -; RV64MV-NEXT: slli a5, a5, 12 -; RV64MV-NEXT: addi a5, a5, -1365 -; RV64MV-NEXT: slli a5, a5, 12 -; RV64MV-NEXT: addi a5, a5, -1365 ; RV64MV-NEXT: mulh a5, a3, a5 -; RV64MV-NEXT: srli a1, a5, 63 -; RV64MV-NEXT: add a1, a5, a1 +; RV64MV-NEXT: srli a2, a5, 63 +; RV64MV-NEXT: add a2, a5, a2 ; RV64MV-NEXT: li a5, 6 -; RV64MV-NEXT: mul a1, a1, a5 -; RV64MV-NEXT: sub a1, a3, a1 -; RV64MV-NEXT: sd a1, 32(sp) -; RV64MV-NEXT: lui a1, 1035469 -; RV64MV-NEXT: addiw a1, a1, -819 -; RV64MV-NEXT: slli a1, a1, 12 -; RV64MV-NEXT: addi a1, a1, -819 -; RV64MV-NEXT: slli a1, a1, 12 -; RV64MV-NEXT: addi a1, a1, -819 -; RV64MV-NEXT: slli a1, a1, 13 -; RV64MV-NEXT: addi a1, a1, -1639 -; RV64MV-NEXT: mulh a1, a2, a1 -; RV64MV-NEXT: srli a3, a1, 63 -; RV64MV-NEXT: srai a1, a1, 1 -; RV64MV-NEXT: add a1, a1, a3 -; RV64MV-NEXT: slli a3, a1, 2 -; RV64MV-NEXT: add a1, a3, a1 -; RV64MV-NEXT: add a1, a2, a1 +; RV64MV-NEXT: mul a2, a2, a5 +; RV64MV-NEXT: lui a5, %hi(.LCPI3_1) +; RV64MV-NEXT: ld a5, %lo(.LCPI3_1)(a5) +; RV64MV-NEXT: srai a1, a1, 31 +; RV64MV-NEXT: sub a2, a3, a2 +; RV64MV-NEXT: sd a2, 32(sp) +; RV64MV-NEXT: mulh a2, a1, a5 +; RV64MV-NEXT: srli a3, a2, 63 +; RV64MV-NEXT: srai a2, a2, 1 +; RV64MV-NEXT: add a2, a2, a3 +; RV64MV-NEXT: slli a3, a2, 2 +; RV64MV-NEXT: lui a5, %hi(.LCPI3_2) +; RV64MV-NEXT: ld a5, %lo(.LCPI3_2)(a5) +; RV64MV-NEXT: add a2, a3, a2 +; RV64MV-NEXT: add a1, a1, a2 ; RV64MV-NEXT: sd a1, 48(sp) -; RV64MV-NEXT: lui a1, 18725 -; RV64MV-NEXT: addiw a1, a1, -1755 -; RV64MV-NEXT: slli a1, a1, 12 -; RV64MV-NEXT: addi a1, a1, -1755 -; RV64MV-NEXT: slli a1, a1, 12 -; RV64MV-NEXT: addi a1, a1, -1755 -; RV64MV-NEXT: slli a1, a1, 12 -; RV64MV-NEXT: addi a1, a1, -1755 -; RV64MV-NEXT: mulh a1, a4, a1 +; RV64MV-NEXT: mulh a1, a4, a5 ; RV64MV-NEXT: srli a2, a1, 63 ; RV64MV-NEXT: srai a1, a1, 1 ; RV64MV-NEXT: add a1, a1, a2 @@ -835,8 +769,8 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind { ; RV64MV-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64MV-NEXT: addi a1, sp, 32 ; RV64MV-NEXT: vle64.v v8, (a1) -; RV64MV-NEXT: lui a1, %hi(.LCPI3_0) -; RV64MV-NEXT: addi a1, a1, %lo(.LCPI3_0) +; RV64MV-NEXT: lui a1, %hi(.LCPI3_3) +; RV64MV-NEXT: addi a1, a1, %lo(.LCPI3_3) ; RV64MV-NEXT: vle64.v v10, (a1) ; RV64MV-NEXT: srli a1, a6, 31 ; RV64MV-NEXT: vand.vx v8, v8, a1 diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll index 84af77b944f6..0295f955292b 100644 --- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll @@ -149,64 +149,41 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: fold_srem_vec_1: ; RV64IM: # %bb.0: +; RV64IM-NEXT: lh a2, 0(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI0_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI0_0)(a3) ; RV64IM-NEXT: lh a6, 24(a1) -; RV64IM-NEXT: lh a3, 16(a1) -; RV64IM-NEXT: lh a4, 8(a1) -; RV64IM-NEXT: lh a1, 0(a1) -; RV64IM-NEXT: lui a5, 1045903 -; RV64IM-NEXT: addiw a5, a5, -733 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1035 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, -905 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, -1767 -; RV64IM-NEXT: mulh a5, a1, a5 -; RV64IM-NEXT: add a5, a5, a1 -; RV64IM-NEXT: srli a2, a5, 63 -; RV64IM-NEXT: srli a5, a5, 6 -; RV64IM-NEXT: addw a2, a5, a2 +; RV64IM-NEXT: lh a7, 16(a1) +; RV64IM-NEXT: lh a1, 8(a1) +; RV64IM-NEXT: mulh a3, a2, a3 +; RV64IM-NEXT: add a3, a3, a2 +; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a3, a3, 6 +; RV64IM-NEXT: addw a3, a3, a4 +; RV64IM-NEXT: lui a4, %hi(.LCPI0_1) +; RV64IM-NEXT: ld a4, %lo(.LCPI0_1)(a4) ; RV64IM-NEXT: li a5, 95 -; RV64IM-NEXT: mulw a2, a2, a5 -; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: lui a2, 777976 -; RV64IM-NEXT: addiw a2, a2, -1057 -; RV64IM-NEXT: slli a2, a2, 15 -; RV64IM-NEXT: addi a2, a2, -1057 -; RV64IM-NEXT: slli a2, a2, 14 -; RV64IM-NEXT: addi a2, a2, -529 -; RV64IM-NEXT: srli a2, a2, 1 -; RV64IM-NEXT: mulh a2, a4, a2 -; RV64IM-NEXT: sub a2, a2, a4 -; RV64IM-NEXT: srli a5, a2, 63 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: addw a2, a2, a5 +; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: subw a2, a2, a3 +; RV64IM-NEXT: mulh a3, a1, a4 +; RV64IM-NEXT: sub a3, a3, a1 +; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a3, a3, 6 +; RV64IM-NEXT: addw a3, a3, a4 +; RV64IM-NEXT: lui a4, %hi(.LCPI0_2) +; RV64IM-NEXT: ld a4, %lo(.LCPI0_2)(a4) ; RV64IM-NEXT: li a5, -124 -; RV64IM-NEXT: mulw a2, a2, a5 -; RV64IM-NEXT: subw a2, a4, a2 -; RV64IM-NEXT: lui a4, 2675 -; RV64IM-NEXT: addiw a4, a4, -251 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1839 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 167 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1505 -; RV64IM-NEXT: mulh a4, a3, a4 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srli a4, a4, 5 -; RV64IM-NEXT: addw a4, a4, a5 +; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: subw a1, a1, a3 +; RV64IM-NEXT: mulh a3, a7, a4 +; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a3, a3, 5 +; RV64IM-NEXT: addw a3, a3, a4 +; RV64IM-NEXT: lui a4, %hi(.LCPI0_3) +; RV64IM-NEXT: ld a4, %lo(.LCPI0_3)(a4) ; RV64IM-NEXT: li a5, 98 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a3, a3, a4 -; RV64IM-NEXT: lui a4, 1040212 -; RV64IM-NEXT: addiw a4, a4, 1977 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -1907 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -453 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -1213 +; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: subw a3, a7, a3 ; RV64IM-NEXT: mulh a4, a6, a4 ; RV64IM-NEXT: srli a5, a4, 63 ; RV64IM-NEXT: srli a4, a4, 7 @@ -216,8 +193,8 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind { ; RV64IM-NEXT: subw a4, a6, a4 ; RV64IM-NEXT: sh a4, 6(a0) ; RV64IM-NEXT: sh a3, 4(a0) -; RV64IM-NEXT: sh a2, 2(a0) -; RV64IM-NEXT: sh a1, 0(a0) +; RV64IM-NEXT: sh a1, 2(a0) +; RV64IM-NEXT: sh a2, 0(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -357,46 +334,40 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: fold_srem_vec_2: ; RV64IM: # %bb.0: +; RV64IM-NEXT: lh a2, 0(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI1_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI1_0)(a3) ; RV64IM-NEXT: lh a6, 24(a1) -; RV64IM-NEXT: lh a7, 16(a1) -; RV64IM-NEXT: lh a4, 8(a1) -; RV64IM-NEXT: lh a1, 0(a1) -; RV64IM-NEXT: lui a5, 1045903 -; RV64IM-NEXT: addiw a5, a5, -733 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1035 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, -905 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, -1767 -; RV64IM-NEXT: mulh a2, a1, a5 -; RV64IM-NEXT: add a2, a2, a1 -; RV64IM-NEXT: srli a3, a2, 63 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: addw a2, a2, a3 -; RV64IM-NEXT: li a3, 95 -; RV64IM-NEXT: mulw a2, a2, a3 -; RV64IM-NEXT: subw t0, a1, a2 -; RV64IM-NEXT: mulh a2, a4, a5 -; RV64IM-NEXT: add a2, a2, a4 -; RV64IM-NEXT: srli a1, a2, 63 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: addw a1, a2, a1 -; RV64IM-NEXT: mulw a1, a1, a3 -; RV64IM-NEXT: subw a1, a4, a1 -; RV64IM-NEXT: mulh a2, a7, a5 -; RV64IM-NEXT: add a2, a2, a7 +; RV64IM-NEXT: lh a5, 16(a1) +; RV64IM-NEXT: lh a1, 8(a1) +; RV64IM-NEXT: mulh a4, a2, a3 +; RV64IM-NEXT: add a4, a4, a2 +; RV64IM-NEXT: srli a7, a4, 63 +; RV64IM-NEXT: srli a4, a4, 6 +; RV64IM-NEXT: addw a4, a4, a7 +; RV64IM-NEXT: li a7, 95 +; RV64IM-NEXT: mulw a4, a4, a7 +; RV64IM-NEXT: subw t0, a2, a4 +; RV64IM-NEXT: mulh a4, a1, a3 +; RV64IM-NEXT: add a4, a4, a1 +; RV64IM-NEXT: srli a2, a4, 63 +; RV64IM-NEXT: srli a4, a4, 6 +; RV64IM-NEXT: addw a2, a4, a2 +; RV64IM-NEXT: mulw a2, a2, a7 +; RV64IM-NEXT: subw a1, a1, a2 +; RV64IM-NEXT: mulh a2, a5, a3 +; RV64IM-NEXT: add a2, a2, a5 ; RV64IM-NEXT: srli a4, a2, 63 ; RV64IM-NEXT: srli a2, a2, 6 ; RV64IM-NEXT: addw a2, a2, a4 -; RV64IM-NEXT: mulw a2, a2, a3 -; RV64IM-NEXT: subw a2, a7, a2 -; RV64IM-NEXT: mulh a4, a6, a5 -; RV64IM-NEXT: add a4, a4, a6 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srli a4, a4, 6 -; RV64IM-NEXT: addw a4, a4, a5 -; RV64IM-NEXT: mulw a3, a4, a3 +; RV64IM-NEXT: mulw a2, a2, a7 +; RV64IM-NEXT: subw a2, a5, a2 +; RV64IM-NEXT: mulh a3, a6, a3 +; RV64IM-NEXT: add a3, a3, a6 +; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a3, a3, 6 +; RV64IM-NEXT: addw a3, a3, a4 +; RV64IM-NEXT: mulw a3, a3, a7 ; RV64IM-NEXT: subw a3, a6, a3 ; RV64IM-NEXT: sh a3, 6(a0) ; RV64IM-NEXT: sh a2, 4(a0) @@ -603,55 +574,49 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: combine_srem_sdiv: ; RV64IM: # %bb.0: +; RV64IM-NEXT: lh a2, 24(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI2_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI2_0)(a3) ; RV64IM-NEXT: lh a6, 0(a1) -; RV64IM-NEXT: lh a7, 8(a1) -; RV64IM-NEXT: lh a4, 16(a1) -; RV64IM-NEXT: lh a1, 24(a1) -; RV64IM-NEXT: lui a5, 1045903 -; RV64IM-NEXT: addiw a5, a5, -733 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1035 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, -905 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, -1767 -; RV64IM-NEXT: mulh a2, a1, a5 -; RV64IM-NEXT: add a2, a2, a1 -; RV64IM-NEXT: srli a3, a2, 63 -; RV64IM-NEXT: srai a2, a2, 6 -; RV64IM-NEXT: addw t3, a2, a3 -; RV64IM-NEXT: li t0, 95 -; RV64IM-NEXT: mulw a3, t3, t0 -; RV64IM-NEXT: subw t1, a1, a3 -; RV64IM-NEXT: mulh a3, a4, a5 -; RV64IM-NEXT: add a3, a3, a4 -; RV64IM-NEXT: srli a1, a3, 63 -; RV64IM-NEXT: srai a3, a3, 6 -; RV64IM-NEXT: addw a1, a3, a1 -; RV64IM-NEXT: mulw a3, a1, t0 -; RV64IM-NEXT: subw t2, a4, a3 -; RV64IM-NEXT: mulh a4, a7, a5 -; RV64IM-NEXT: add a4, a4, a7 -; RV64IM-NEXT: srli a3, a4, 63 +; RV64IM-NEXT: lh a5, 8(a1) +; RV64IM-NEXT: lh a1, 16(a1) +; RV64IM-NEXT: mulh a4, a2, a3 +; RV64IM-NEXT: add a4, a4, a2 +; RV64IM-NEXT: srli a7, a4, 63 +; RV64IM-NEXT: srai a4, a4, 6 +; RV64IM-NEXT: addw t0, a4, a7 +; RV64IM-NEXT: li a7, 95 +; RV64IM-NEXT: mulw a4, t0, a7 +; RV64IM-NEXT: subw t1, a2, a4 +; RV64IM-NEXT: mulh a4, a1, a3 +; RV64IM-NEXT: add a4, a4, a1 +; RV64IM-NEXT: srli a2, a4, 63 ; RV64IM-NEXT: srai a4, a4, 6 -; RV64IM-NEXT: addw a3, a4, a3 -; RV64IM-NEXT: mulw a4, a3, t0 -; RV64IM-NEXT: subw a4, a7, a4 -; RV64IM-NEXT: mulh a5, a6, a5 -; RV64IM-NEXT: add a5, a5, a6 -; RV64IM-NEXT: srli a2, a5, 63 -; RV64IM-NEXT: srai a5, a5, 6 -; RV64IM-NEXT: addw a2, a5, a2 -; RV64IM-NEXT: mulw a5, a2, t0 +; RV64IM-NEXT: addw a2, a4, a2 +; RV64IM-NEXT: mulw a4, a2, a7 +; RV64IM-NEXT: subw t2, a1, a4 +; RV64IM-NEXT: mulh a4, a5, a3 +; RV64IM-NEXT: add a4, a4, a5 +; RV64IM-NEXT: srli a1, a4, 63 +; RV64IM-NEXT: srai a4, a4, 6 +; RV64IM-NEXT: addw a1, a4, a1 +; RV64IM-NEXT: mulw a4, a1, a7 +; RV64IM-NEXT: subw a4, a5, a4 +; RV64IM-NEXT: mulh a3, a6, a3 +; RV64IM-NEXT: add a3, a3, a6 +; RV64IM-NEXT: srli a5, a3, 63 +; RV64IM-NEXT: srai a3, a3, 6 +; RV64IM-NEXT: addw a3, a3, a5 +; RV64IM-NEXT: mulw a5, a3, a7 ; RV64IM-NEXT: subw a5, a6, a5 -; RV64IM-NEXT: addw a2, a5, a2 -; RV64IM-NEXT: addw a3, a4, a3 -; RV64IM-NEXT: addw a1, t2, a1 -; RV64IM-NEXT: addw a4, t1, t3 +; RV64IM-NEXT: addw a3, a5, a3 +; RV64IM-NEXT: addw a1, a4, a1 +; RV64IM-NEXT: addw a2, t2, a2 +; RV64IM-NEXT: addw a4, t1, t0 ; RV64IM-NEXT: sh a4, 6(a0) -; RV64IM-NEXT: sh a1, 4(a0) -; RV64IM-NEXT: sh a3, 2(a0) -; RV64IM-NEXT: sh a2, 0(a0) +; RV64IM-NEXT: sh a2, 4(a0) +; RV64IM-NEXT: sh a1, 2(a0) +; RV64IM-NEXT: sh a3, 0(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, %2 = sdiv <4 x i16> %x, @@ -775,42 +740,36 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: dont_fold_srem_power_of_two: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lh a6, 16(a1) -; RV64IM-NEXT: lh a3, 8(a1) -; RV64IM-NEXT: lh a4, 0(a1) -; RV64IM-NEXT: lh a1, 24(a1) -; RV64IM-NEXT: lui a5, 1045903 -; RV64IM-NEXT: addiw a5, a5, -733 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1035 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, -905 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, -1767 -; RV64IM-NEXT: mulh a5, a1, a5 -; RV64IM-NEXT: add a5, a5, a1 -; RV64IM-NEXT: srli a2, a5, 63 -; RV64IM-NEXT: srli a5, a5, 6 -; RV64IM-NEXT: addw a2, a5, a2 -; RV64IM-NEXT: li a5, 95 -; RV64IM-NEXT: mulw a2, a2, a5 -; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: srli a2, a4, 58 -; RV64IM-NEXT: add a2, a4, a2 -; RV64IM-NEXT: andi a2, a2, -64 -; RV64IM-NEXT: subw a2, a4, a2 -; RV64IM-NEXT: srli a4, a3, 59 -; RV64IM-NEXT: add a4, a3, a4 -; RV64IM-NEXT: andi a4, a4, -32 -; RV64IM-NEXT: subw a3, a3, a4 -; RV64IM-NEXT: srli a4, a6, 61 -; RV64IM-NEXT: add a4, a6, a4 -; RV64IM-NEXT: andi a4, a4, -8 -; RV64IM-NEXT: subw a4, a6, a4 +; RV64IM-NEXT: lh a2, 24(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI3_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI3_0)(a3) +; RV64IM-NEXT: lh a4, 16(a1) +; RV64IM-NEXT: lh a5, 8(a1) +; RV64IM-NEXT: lh a1, 0(a1) +; RV64IM-NEXT: mulh a3, a2, a3 +; RV64IM-NEXT: add a3, a3, a2 +; RV64IM-NEXT: srli a6, a3, 63 +; RV64IM-NEXT: srli a3, a3, 6 +; RV64IM-NEXT: addw a6, a3, a6 +; RV64IM-NEXT: li a3, 95 +; RV64IM-NEXT: mulw a3, a6, a3 +; RV64IM-NEXT: subw a2, a2, a3 +; RV64IM-NEXT: srli a3, a1, 58 +; RV64IM-NEXT: add a3, a1, a3 +; RV64IM-NEXT: andi a3, a3, -64 +; RV64IM-NEXT: subw a1, a1, a3 +; RV64IM-NEXT: srli a3, a5, 59 +; RV64IM-NEXT: add a3, a5, a3 +; RV64IM-NEXT: andi a3, a3, -32 +; RV64IM-NEXT: subw a3, a5, a3 +; RV64IM-NEXT: srli a5, a4, 61 +; RV64IM-NEXT: add a5, a4, a5 +; RV64IM-NEXT: andi a5, a5, -8 +; RV64IM-NEXT: subw a4, a4, a5 ; RV64IM-NEXT: sh a4, 4(a0) ; RV64IM-NEXT: sh a3, 2(a0) -; RV64IM-NEXT: sh a2, 0(a0) -; RV64IM-NEXT: sh a1, 6(a0) +; RV64IM-NEXT: sh a1, 0(a0) +; RV64IM-NEXT: sh a2, 6(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -933,60 +892,42 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: dont_fold_srem_one: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lh a2, 24(a1) -; RV64IM-NEXT: lh a3, 8(a1) -; RV64IM-NEXT: lh a1, 16(a1) -; RV64IM-NEXT: lui a4, 1043590 -; RV64IM-NEXT: addiw a4, a4, -1781 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1069 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -1959 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 357 -; RV64IM-NEXT: mulh a4, a1, a4 -; RV64IM-NEXT: add a4, a4, a1 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srli a4, a4, 4 -; RV64IM-NEXT: addw a4, a4, a5 -; RV64IM-NEXT: li a5, 23 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a1, a1, a4 -; RV64IM-NEXT: lui a4, 6413 -; RV64IM-NEXT: addiw a4, a4, 1265 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1027 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1077 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 965 -; RV64IM-NEXT: mulh a4, a3, a4 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srli a4, a4, 8 -; RV64IM-NEXT: addw a4, a4, a5 +; RV64IM-NEXT: lh a2, 16(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI4_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI4_0)(a3) +; RV64IM-NEXT: lh a6, 24(a1) +; RV64IM-NEXT: lh a1, 8(a1) +; RV64IM-NEXT: mulh a3, a2, a3 +; RV64IM-NEXT: add a3, a3, a2 +; RV64IM-NEXT: srli a5, a3, 63 +; RV64IM-NEXT: srli a3, a3, 4 +; RV64IM-NEXT: addw a3, a3, a5 +; RV64IM-NEXT: lui a5, %hi(.LCPI4_1) +; RV64IM-NEXT: ld a5, %lo(.LCPI4_1)(a5) +; RV64IM-NEXT: li a4, 23 +; RV64IM-NEXT: mulw a3, a3, a4 +; RV64IM-NEXT: subw a2, a2, a3 +; RV64IM-NEXT: mulh a3, a1, a5 +; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a3, a3, 8 +; RV64IM-NEXT: addw a3, a3, a4 +; RV64IM-NEXT: lui a4, %hi(.LCPI4_2) +; RV64IM-NEXT: ld a4, %lo(.LCPI4_2)(a4) ; RV64IM-NEXT: li a5, 654 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a3, a3, a4 -; RV64IM-NEXT: lui a4, 12375 -; RV64IM-NEXT: addiw a4, a4, -575 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 883 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, -431 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 1959 -; RV64IM-NEXT: mulh a4, a2, a4 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srli a4, a4, 11 -; RV64IM-NEXT: addw a4, a4, a5 -; RV64IM-NEXT: lui a5, 1 -; RV64IM-NEXT: addiw a5, a5, 1327 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a2, a2, a4 +; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: subw a1, a1, a3 +; RV64IM-NEXT: mulh a3, a6, a4 +; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a3, a3, 11 +; RV64IM-NEXT: addw a3, a3, a4 +; RV64IM-NEXT: lui a4, 1 +; RV64IM-NEXT: addiw a4, a4, 1327 +; RV64IM-NEXT: mulw a3, a3, a4 +; RV64IM-NEXT: subw a3, a6, a3 ; RV64IM-NEXT: sh zero, 0(a0) -; RV64IM-NEXT: sh a2, 6(a0) -; RV64IM-NEXT: sh a3, 2(a0) -; RV64IM-NEXT: sh a1, 4(a0) +; RV64IM-NEXT: sh a3, 6(a0) +; RV64IM-NEXT: sh a1, 2(a0) +; RV64IM-NEXT: sh a2, 4(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -1104,50 +1045,38 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: dont_fold_urem_i16_smax: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lh a2, 8(a1) -; RV64IM-NEXT: lh a3, 24(a1) -; RV64IM-NEXT: lh a1, 16(a1) -; RV64IM-NEXT: lui a4, 1043590 -; RV64IM-NEXT: addiw a4, a4, -1781 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1069 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -1959 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 357 -; RV64IM-NEXT: mulh a4, a1, a4 -; RV64IM-NEXT: add a4, a4, a1 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srli a4, a4, 4 -; RV64IM-NEXT: addw a4, a4, a5 -; RV64IM-NEXT: li a5, 23 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a1, a1, a4 -; RV64IM-NEXT: lui a4, 12375 -; RV64IM-NEXT: addiw a4, a4, -575 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 883 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, -431 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 1959 -; RV64IM-NEXT: mulh a4, a3, a4 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srli a4, a4, 11 -; RV64IM-NEXT: addw a4, a4, a5 +; RV64IM-NEXT: lh a2, 16(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI5_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI5_0)(a3) +; RV64IM-NEXT: lh a4, 24(a1) +; RV64IM-NEXT: mulh a3, a2, a3 +; RV64IM-NEXT: add a3, a3, a2 +; RV64IM-NEXT: srli a5, a3, 63 +; RV64IM-NEXT: srli a3, a3, 4 +; RV64IM-NEXT: addw a3, a3, a5 +; RV64IM-NEXT: li a6, 23 +; RV64IM-NEXT: lui a5, %hi(.LCPI5_1) +; RV64IM-NEXT: ld a5, %lo(.LCPI5_1)(a5) +; RV64IM-NEXT: mulw a3, a3, a6 +; RV64IM-NEXT: lh a1, 8(a1) +; RV64IM-NEXT: subw a2, a2, a3 +; RV64IM-NEXT: mulh a3, a4, a5 +; RV64IM-NEXT: srli a5, a3, 63 +; RV64IM-NEXT: srli a3, a3, 11 +; RV64IM-NEXT: addw a3, a3, a5 ; RV64IM-NEXT: lui a5, 1 ; RV64IM-NEXT: addiw a5, a5, 1327 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a3, a3, a4 -; RV64IM-NEXT: srli a4, a2, 49 -; RV64IM-NEXT: add a4, a2, a4 +; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: subw a3, a4, a3 +; RV64IM-NEXT: srli a4, a1, 49 +; RV64IM-NEXT: add a4, a1, a4 ; RV64IM-NEXT: lui a5, 8 ; RV64IM-NEXT: and a4, a4, a5 -; RV64IM-NEXT: subw a2, a2, a4 +; RV64IM-NEXT: subw a1, a1, a4 ; RV64IM-NEXT: sh zero, 0(a0) -; RV64IM-NEXT: sh a2, 2(a0) +; RV64IM-NEXT: sh a1, 2(a0) ; RV64IM-NEXT: sh a3, 6(a0) -; RV64IM-NEXT: sh a1, 4(a0) +; RV64IM-NEXT: sh a2, 4(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -1335,60 +1264,42 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; ; RV64IM-LABEL: dont_fold_srem_i64: ; RV64IM: # %bb.0: -; RV64IM-NEXT: ld a2, 24(a1) -; RV64IM-NEXT: ld a3, 8(a1) -; RV64IM-NEXT: ld a1, 16(a1) -; RV64IM-NEXT: lui a4, 1043590 -; RV64IM-NEXT: addiw a4, a4, -1781 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1069 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -1959 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 357 -; RV64IM-NEXT: mulh a4, a1, a4 -; RV64IM-NEXT: add a4, a4, a1 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srai a4, a4, 4 -; RV64IM-NEXT: add a4, a4, a5 -; RV64IM-NEXT: li a5, 23 -; RV64IM-NEXT: mul a4, a4, a5 -; RV64IM-NEXT: sub a1, a1, a4 -; RV64IM-NEXT: lui a4, 6413 -; RV64IM-NEXT: addiw a4, a4, 1265 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1027 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1077 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 965 -; RV64IM-NEXT: mulh a4, a3, a4 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srai a4, a4, 8 -; RV64IM-NEXT: add a4, a4, a5 +; RV64IM-NEXT: ld a2, 16(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI6_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI6_0)(a3) +; RV64IM-NEXT: ld a6, 24(a1) +; RV64IM-NEXT: ld a1, 8(a1) +; RV64IM-NEXT: mulh a3, a2, a3 +; RV64IM-NEXT: add a3, a3, a2 +; RV64IM-NEXT: srli a5, a3, 63 +; RV64IM-NEXT: srai a3, a3, 4 +; RV64IM-NEXT: add a3, a3, a5 +; RV64IM-NEXT: lui a5, %hi(.LCPI6_1) +; RV64IM-NEXT: ld a5, %lo(.LCPI6_1)(a5) +; RV64IM-NEXT: li a4, 23 +; RV64IM-NEXT: mul a3, a3, a4 +; RV64IM-NEXT: sub a2, a2, a3 +; RV64IM-NEXT: mulh a3, a1, a5 +; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srai a3, a3, 8 +; RV64IM-NEXT: add a3, a3, a4 +; RV64IM-NEXT: lui a4, %hi(.LCPI6_2) +; RV64IM-NEXT: ld a4, %lo(.LCPI6_2)(a4) ; RV64IM-NEXT: li a5, 654 -; RV64IM-NEXT: mul a4, a4, a5 -; RV64IM-NEXT: sub a3, a3, a4 -; RV64IM-NEXT: lui a4, 12375 -; RV64IM-NEXT: addiw a4, a4, -575 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 883 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, -431 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 1959 -; RV64IM-NEXT: mulh a4, a2, a4 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srai a4, a4, 11 -; RV64IM-NEXT: add a4, a4, a5 -; RV64IM-NEXT: lui a5, 1 -; RV64IM-NEXT: addiw a5, a5, 1327 -; RV64IM-NEXT: mul a4, a4, a5 -; RV64IM-NEXT: sub a2, a2, a4 +; RV64IM-NEXT: mul a3, a3, a5 +; RV64IM-NEXT: sub a1, a1, a3 +; RV64IM-NEXT: mulh a3, a6, a4 +; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srai a3, a3, 11 +; RV64IM-NEXT: add a3, a3, a4 +; RV64IM-NEXT: lui a4, 1 +; RV64IM-NEXT: addiw a4, a4, 1327 +; RV64IM-NEXT: mul a3, a3, a4 +; RV64IM-NEXT: sub a3, a6, a3 ; RV64IM-NEXT: sd zero, 0(a0) -; RV64IM-NEXT: sd a2, 24(a0) -; RV64IM-NEXT: sd a3, 8(a0) -; RV64IM-NEXT: sd a1, 16(a0) +; RV64IM-NEXT: sd a3, 24(a0) +; RV64IM-NEXT: sd a1, 8(a0) +; RV64IM-NEXT: sd a2, 16(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i64> %x, ret <4 x i64> %1 diff --git a/llvm/test/CodeGen/RISCV/urem-lkk.ll b/llvm/test/CodeGen/RISCV/urem-lkk.ll index 5ea52028868e..35f4d0bbb30e 100644 --- a/llvm/test/CodeGen/RISCV/urem-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-lkk.ll @@ -264,16 +264,10 @@ define i64 @dont_fold_urem_i64(i64 %x) nounwind { ; ; RV64IM-LABEL: dont_fold_urem_i64: ; RV64IM: # %bb.0: -; RV64IM-NEXT: srli a1, a0, 1 -; RV64IM-NEXT: lui a2, 2675 -; RV64IM-NEXT: addiw a2, a2, -251 -; RV64IM-NEXT: slli a2, a2, 13 -; RV64IM-NEXT: addi a2, a2, 1839 -; RV64IM-NEXT: slli a2, a2, 13 -; RV64IM-NEXT: addi a2, a2, 167 -; RV64IM-NEXT: slli a2, a2, 13 -; RV64IM-NEXT: addi a2, a2, 1505 -; RV64IM-NEXT: mulhu a1, a1, a2 +; RV64IM-NEXT: lui a1, %hi(.LCPI6_0) +; RV64IM-NEXT: ld a1, %lo(.LCPI6_0)(a1) +; RV64IM-NEXT: srli a2, a0, 1 +; RV64IM-NEXT: mulhu a1, a2, a1 ; RV64IM-NEXT: srli a1, a1, 4 ; RV64IM-NEXT: li a2, 98 ; RV64IM-NEXT: mul a1, a1, a2 diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll index 1bcb90696813..c028c7d387dc 100644 --- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll @@ -144,71 +144,47 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: fold_urem_vec_1: ; RV64IM: # %bb.0: +; RV64IM-NEXT: lhu a2, 0(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI0_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI0_0)(a3) ; RV64IM-NEXT: lhu a6, 24(a1) -; RV64IM-NEXT: lhu a3, 16(a1) -; RV64IM-NEXT: lhu a4, 8(a1) -; RV64IM-NEXT: lhu a1, 0(a1) -; RV64IM-NEXT: lui a5, 1423 -; RV64IM-NEXT: addiw a5, a5, -733 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1035 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, -1811 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, 561 -; RV64IM-NEXT: mulhu a5, a1, a5 -; RV64IM-NEXT: sub a2, a1, a5 -; RV64IM-NEXT: srli a2, a2, 1 -; RV64IM-NEXT: add a2, a2, a5 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: li a5, 95 -; RV64IM-NEXT: mulw a2, a2, a5 -; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: srli a2, a4, 2 -; RV64IM-NEXT: lui a5, 264 -; RV64IM-NEXT: addiw a5, a5, 1057 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1057 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1057 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, 133 -; RV64IM-NEXT: mulhu a2, a2, a5 -; RV64IM-NEXT: srli a2, a2, 3 -; RV64IM-NEXT: li a5, 124 -; RV64IM-NEXT: mulw a2, a2, a5 -; RV64IM-NEXT: subw a2, a4, a2 -; RV64IM-NEXT: srli a4, a3, 1 -; RV64IM-NEXT: lui a5, 2675 -; RV64IM-NEXT: addiw a5, a5, -251 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, 1839 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, 167 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, 1505 -; RV64IM-NEXT: mulhu a4, a4, a5 -; RV64IM-NEXT: srli a4, a4, 4 -; RV64IM-NEXT: li a5, 98 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a3, a3, a4 -; RV64IM-NEXT: lui a4, 8364 -; RV64IM-NEXT: addiw a4, a4, -1977 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 1907 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 453 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 1213 -; RV64IM-NEXT: mulhu a4, a6, a4 -; RV64IM-NEXT: srli a4, a4, 7 -; RV64IM-NEXT: li a5, 1003 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a4, a6, a4 -; RV64IM-NEXT: sh a4, 6(a0) -; RV64IM-NEXT: sh a3, 4(a0) -; RV64IM-NEXT: sh a2, 2(a0) -; RV64IM-NEXT: sh a1, 0(a0) +; RV64IM-NEXT: lhu a5, 16(a1) +; RV64IM-NEXT: lhu a1, 8(a1) +; RV64IM-NEXT: mulhu a3, a2, a3 +; RV64IM-NEXT: sub a4, a2, a3 +; RV64IM-NEXT: srli a4, a4, 1 +; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: srli a3, a3, 6 +; RV64IM-NEXT: li a7, 95 +; RV64IM-NEXT: lui a4, %hi(.LCPI0_1) +; RV64IM-NEXT: ld a4, %lo(.LCPI0_1)(a4) +; RV64IM-NEXT: mulw a3, a3, a7 +; RV64IM-NEXT: subw t0, a2, a3 +; RV64IM-NEXT: srli a3, a1, 2 +; RV64IM-NEXT: mulhu a3, a3, a4 +; RV64IM-NEXT: srli a3, a3, 3 +; RV64IM-NEXT: li a7, 124 +; RV64IM-NEXT: lui a4, %hi(.LCPI0_2) +; RV64IM-NEXT: ld a4, %lo(.LCPI0_2)(a4) +; RV64IM-NEXT: mulw a3, a3, a7 +; RV64IM-NEXT: subw a1, a1, a3 +; RV64IM-NEXT: srli a3, a5, 1 +; RV64IM-NEXT: mulhu a3, a3, a4 +; RV64IM-NEXT: srli a3, a3, 4 +; RV64IM-NEXT: lui a4, %hi(.LCPI0_3) +; RV64IM-NEXT: ld a4, %lo(.LCPI0_3)(a4) +; RV64IM-NEXT: li a2, 98 +; RV64IM-NEXT: mulw a2, a3, a2 +; RV64IM-NEXT: subw a2, a5, a2 +; RV64IM-NEXT: mulhu a3, a6, a4 +; RV64IM-NEXT: srli a3, a3, 7 +; RV64IM-NEXT: li a4, 1003 +; RV64IM-NEXT: mulw a3, a3, a4 +; RV64IM-NEXT: subw a3, a6, a3 +; RV64IM-NEXT: sh a3, 6(a0) +; RV64IM-NEXT: sh a2, 4(a0) +; RV64IM-NEXT: sh a1, 2(a0) +; RV64IM-NEXT: sh t0, 0(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -348,46 +324,40 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: fold_urem_vec_2: ; RV64IM: # %bb.0: +; RV64IM-NEXT: lhu a2, 0(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI1_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI1_0)(a3) ; RV64IM-NEXT: lhu a6, 24(a1) ; RV64IM-NEXT: lhu a7, 16(a1) -; RV64IM-NEXT: lhu a4, 8(a1) -; RV64IM-NEXT: lhu a1, 0(a1) -; RV64IM-NEXT: lui a5, 1423 -; RV64IM-NEXT: addiw a5, a5, -733 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1035 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, -1811 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, 561 -; RV64IM-NEXT: mulhu a2, a1, a5 -; RV64IM-NEXT: sub a3, a1, a2 -; RV64IM-NEXT: srli a3, a3, 1 -; RV64IM-NEXT: add a2, a3, a2 +; RV64IM-NEXT: lhu a1, 8(a1) +; RV64IM-NEXT: mulhu a4, a2, a3 +; RV64IM-NEXT: sub a5, a2, a4 +; RV64IM-NEXT: srli a5, a5, 1 +; RV64IM-NEXT: add a4, a5, a4 +; RV64IM-NEXT: srli a4, a4, 6 +; RV64IM-NEXT: li a5, 95 +; RV64IM-NEXT: mulw a4, a4, a5 +; RV64IM-NEXT: subw t0, a2, a4 +; RV64IM-NEXT: mulhu a4, a1, a3 +; RV64IM-NEXT: sub a2, a1, a4 +; RV64IM-NEXT: srli a2, a2, 1 +; RV64IM-NEXT: add a2, a2, a4 ; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: li a3, 95 -; RV64IM-NEXT: mulw a2, a2, a3 -; RV64IM-NEXT: subw t0, a1, a2 -; RV64IM-NEXT: mulhu a2, a4, a5 -; RV64IM-NEXT: sub a1, a4, a2 -; RV64IM-NEXT: srli a1, a1, 1 -; RV64IM-NEXT: add a1, a1, a2 -; RV64IM-NEXT: srli a1, a1, 6 -; RV64IM-NEXT: mulw a1, a1, a3 -; RV64IM-NEXT: subw a1, a4, a1 -; RV64IM-NEXT: mulhu a2, a7, a5 +; RV64IM-NEXT: mulw a2, a2, a5 +; RV64IM-NEXT: subw a1, a1, a2 +; RV64IM-NEXT: mulhu a2, a7, a3 ; RV64IM-NEXT: sub a4, a7, a2 ; RV64IM-NEXT: srli a4, a4, 1 ; RV64IM-NEXT: add a2, a4, a2 ; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: mulw a2, a2, a3 +; RV64IM-NEXT: mulw a2, a2, a5 ; RV64IM-NEXT: subw a2, a7, a2 -; RV64IM-NEXT: mulhu a4, a6, a5 -; RV64IM-NEXT: sub a5, a6, a4 -; RV64IM-NEXT: srli a5, a5, 1 -; RV64IM-NEXT: add a4, a5, a4 -; RV64IM-NEXT: srli a4, a4, 6 -; RV64IM-NEXT: mulw a3, a4, a3 +; RV64IM-NEXT: mulhu a3, a6, a3 +; RV64IM-NEXT: sub a4, a6, a3 +; RV64IM-NEXT: srli a4, a4, 1 +; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: srli a3, a3, 6 +; RV64IM-NEXT: mulw a3, a3, a5 ; RV64IM-NEXT: subw a3, a6, a3 ; RV64IM-NEXT: sh a3, 6(a0) ; RV64IM-NEXT: sh a2, 4(a0) @@ -594,55 +564,49 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: combine_urem_udiv: ; RV64IM: # %bb.0: +; RV64IM-NEXT: lhu a2, 24(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI2_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI2_0)(a3) ; RV64IM-NEXT: lhu a6, 0(a1) ; RV64IM-NEXT: lhu a7, 8(a1) -; RV64IM-NEXT: lhu a4, 16(a1) -; RV64IM-NEXT: lhu a1, 24(a1) -; RV64IM-NEXT: lui a5, 1423 -; RV64IM-NEXT: addiw a5, a5, -733 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1035 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, -1811 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, 561 -; RV64IM-NEXT: mulhu a2, a1, a5 -; RV64IM-NEXT: sub a3, a1, a2 -; RV64IM-NEXT: srli a3, a3, 1 -; RV64IM-NEXT: add a2, a3, a2 -; RV64IM-NEXT: srli t3, a2, 6 +; RV64IM-NEXT: lhu a1, 16(a1) +; RV64IM-NEXT: mulhu a4, a2, a3 +; RV64IM-NEXT: sub a5, a2, a4 +; RV64IM-NEXT: srli a5, a5, 1 +; RV64IM-NEXT: add a4, a5, a4 +; RV64IM-NEXT: srli t3, a4, 6 ; RV64IM-NEXT: li t0, 95 -; RV64IM-NEXT: mulw a3, t3, t0 -; RV64IM-NEXT: subw t1, a1, a3 -; RV64IM-NEXT: mulhu a3, a4, a5 -; RV64IM-NEXT: sub a1, a4, a3 -; RV64IM-NEXT: srli a1, a1, 1 -; RV64IM-NEXT: add a1, a1, a3 -; RV64IM-NEXT: srli a1, a1, 6 -; RV64IM-NEXT: mulw a3, a1, t0 -; RV64IM-NEXT: subw t2, a4, a3 -; RV64IM-NEXT: mulhu a4, a7, a5 -; RV64IM-NEXT: sub a3, a7, a4 -; RV64IM-NEXT: srli a3, a3, 1 -; RV64IM-NEXT: add a3, a3, a4 -; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: mulw a4, a3, t0 -; RV64IM-NEXT: subw a4, a7, a4 -; RV64IM-NEXT: mulhu a5, a6, a5 -; RV64IM-NEXT: sub a2, a6, a5 +; RV64IM-NEXT: mulw a5, t3, t0 +; RV64IM-NEXT: subw t1, a2, a5 +; RV64IM-NEXT: mulhu a5, a1, a3 +; RV64IM-NEXT: sub a2, a1, a5 ; RV64IM-NEXT: srli a2, a2, 1 ; RV64IM-NEXT: add a2, a2, a5 ; RV64IM-NEXT: srli a2, a2, 6 ; RV64IM-NEXT: mulw a5, a2, t0 -; RV64IM-NEXT: subw a5, a6, a5 -; RV64IM-NEXT: addw a2, a5, a2 +; RV64IM-NEXT: subw t2, a1, a5 +; RV64IM-NEXT: mulhu a5, a7, a3 +; RV64IM-NEXT: sub a1, a7, a5 +; RV64IM-NEXT: srli a1, a1, 1 +; RV64IM-NEXT: add a1, a1, a5 +; RV64IM-NEXT: srli a1, a1, 6 +; RV64IM-NEXT: mulw a5, a1, t0 +; RV64IM-NEXT: subw a5, a7, a5 +; RV64IM-NEXT: mulhu a3, a6, a3 +; RV64IM-NEXT: sub a4, a6, a3 +; RV64IM-NEXT: srli a4, a4, 1 +; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: srli a3, a3, 6 +; RV64IM-NEXT: mulw a4, a3, t0 +; RV64IM-NEXT: subw a4, a6, a4 ; RV64IM-NEXT: addw a3, a4, a3 -; RV64IM-NEXT: addw a1, t2, a1 +; RV64IM-NEXT: addw a1, a5, a1 +; RV64IM-NEXT: addw a2, t2, a2 ; RV64IM-NEXT: addw a4, t1, t3 ; RV64IM-NEXT: sh a4, 6(a0) -; RV64IM-NEXT: sh a1, 4(a0) -; RV64IM-NEXT: sh a3, 2(a0) -; RV64IM-NEXT: sh a2, 0(a0) +; RV64IM-NEXT: sh a2, 4(a0) +; RV64IM-NEXT: sh a1, 2(a0) +; RV64IM-NEXT: sh a3, 0(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, %2 = udiv <4 x i16> %x, @@ -741,33 +705,27 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: dont_fold_urem_power_of_two: ; RV64IM: # %bb.0: +; RV64IM-NEXT: lhu a2, 24(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI3_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI3_0)(a3) ; RV64IM-NEXT: lhu a6, 16(a1) -; RV64IM-NEXT: lhu a3, 8(a1) -; RV64IM-NEXT: lhu a4, 0(a1) -; RV64IM-NEXT: lhu a1, 24(a1) -; RV64IM-NEXT: lui a5, 1423 -; RV64IM-NEXT: addiw a5, a5, -733 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1035 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, -1811 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, 561 -; RV64IM-NEXT: mulhu a5, a1, a5 -; RV64IM-NEXT: sub a2, a1, a5 -; RV64IM-NEXT: srli a2, a2, 1 -; RV64IM-NEXT: add a2, a2, a5 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: li a5, 95 -; RV64IM-NEXT: mulw a2, a2, a5 -; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: andi a2, a4, 63 -; RV64IM-NEXT: andi a3, a3, 31 +; RV64IM-NEXT: lhu a5, 8(a1) +; RV64IM-NEXT: lhu a1, 0(a1) +; RV64IM-NEXT: mulhu a3, a2, a3 +; RV64IM-NEXT: sub a4, a2, a3 +; RV64IM-NEXT: srli a4, a4, 1 +; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: srli a3, a3, 6 +; RV64IM-NEXT: li a4, 95 +; RV64IM-NEXT: mulw a3, a3, a4 +; RV64IM-NEXT: subw a2, a2, a3 +; RV64IM-NEXT: andi a1, a1, 63 +; RV64IM-NEXT: andi a3, a5, 31 ; RV64IM-NEXT: andi a4, a6, 7 ; RV64IM-NEXT: sh a4, 4(a0) ; RV64IM-NEXT: sh a3, 2(a0) -; RV64IM-NEXT: sh a2, 0(a0) -; RV64IM-NEXT: sh a1, 6(a0) +; RV64IM-NEXT: sh a1, 0(a0) +; RV64IM-NEXT: sh a2, 6(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -883,57 +841,39 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: dont_fold_urem_one: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lhu a2, 24(a1) -; RV64IM-NEXT: lhu a3, 8(a1) -; RV64IM-NEXT: lhu a1, 16(a1) -; RV64IM-NEXT: lui a4, 3206 -; RV64IM-NEXT: addiw a4, a4, -1781 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1069 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -1959 -; RV64IM-NEXT: slli a4, a4, 14 -; RV64IM-NEXT: addi a4, a4, 713 -; RV64IM-NEXT: mulhu a4, a1, a4 -; RV64IM-NEXT: sub a5, a1, a4 +; RV64IM-NEXT: lhu a2, 16(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI4_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI4_0)(a3) +; RV64IM-NEXT: lhu a4, 24(a1) +; RV64IM-NEXT: lhu a1, 8(a1) +; RV64IM-NEXT: mulhu a3, a2, a3 +; RV64IM-NEXT: sub a5, a2, a3 ; RV64IM-NEXT: srli a5, a5, 1 -; RV64IM-NEXT: add a4, a5, a4 -; RV64IM-NEXT: srli a4, a4, 4 -; RV64IM-NEXT: li a5, 23 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a1, a1, a4 -; RV64IM-NEXT: srli a4, a3, 1 -; RV64IM-NEXT: lui a5, 6413 -; RV64IM-NEXT: addiw a5, a5, 1265 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, 1027 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, 1077 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, 965 -; RV64IM-NEXT: mulhu a4, a4, a5 -; RV64IM-NEXT: srli a4, a4, 7 -; RV64IM-NEXT: li a5, 654 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a3, a3, a4 -; RV64IM-NEXT: lui a4, 1044567 -; RV64IM-NEXT: addiw a4, a4, -575 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 883 -; RV64IM-NEXT: slli a4, a4, 14 -; RV64IM-NEXT: addi a4, a4, -861 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -179 -; RV64IM-NEXT: mulhu a4, a2, a4 -; RV64IM-NEXT: srli a4, a4, 12 -; RV64IM-NEXT: lui a5, 1 -; RV64IM-NEXT: addiw a5, a5, 1327 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a2, a2, a4 +; RV64IM-NEXT: add a3, a5, a3 +; RV64IM-NEXT: srli a3, a3, 4 +; RV64IM-NEXT: li a6, 23 +; RV64IM-NEXT: lui a5, %hi(.LCPI4_1) +; RV64IM-NEXT: ld a5, %lo(.LCPI4_1)(a5) +; RV64IM-NEXT: mulw a3, a3, a6 +; RV64IM-NEXT: subw a6, a2, a3 +; RV64IM-NEXT: srli a3, a1, 1 +; RV64IM-NEXT: mulhu a3, a3, a5 +; RV64IM-NEXT: srli a3, a3, 7 +; RV64IM-NEXT: lui a5, %hi(.LCPI4_2) +; RV64IM-NEXT: ld a5, %lo(.LCPI4_2)(a5) +; RV64IM-NEXT: li a2, 654 +; RV64IM-NEXT: mulw a2, a3, a2 +; RV64IM-NEXT: subw a1, a1, a2 +; RV64IM-NEXT: mulhu a2, a4, a5 +; RV64IM-NEXT: srli a2, a2, 12 +; RV64IM-NEXT: lui a3, 1 +; RV64IM-NEXT: addiw a3, a3, 1327 +; RV64IM-NEXT: mulw a2, a2, a3 +; RV64IM-NEXT: subw a2, a4, a2 ; RV64IM-NEXT: sh zero, 0(a0) ; RV64IM-NEXT: sh a2, 6(a0) -; RV64IM-NEXT: sh a3, 2(a0) -; RV64IM-NEXT: sh a1, 4(a0) +; RV64IM-NEXT: sh a1, 2(a0) +; RV64IM-NEXT: sh a6, 4(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -1130,57 +1070,39 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; ; RV64IM-LABEL: dont_fold_urem_i64: ; RV64IM: # %bb.0: -; RV64IM-NEXT: ld a2, 24(a1) -; RV64IM-NEXT: ld a3, 8(a1) -; RV64IM-NEXT: ld a1, 16(a1) -; RV64IM-NEXT: lui a4, 3206 -; RV64IM-NEXT: addiw a4, a4, -1781 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1069 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -1959 -; RV64IM-NEXT: slli a4, a4, 14 -; RV64IM-NEXT: addi a4, a4, 713 -; RV64IM-NEXT: mulhu a4, a1, a4 -; RV64IM-NEXT: sub a5, a1, a4 +; RV64IM-NEXT: ld a2, 16(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI6_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI6_0)(a3) +; RV64IM-NEXT: ld a4, 24(a1) +; RV64IM-NEXT: ld a1, 8(a1) +; RV64IM-NEXT: mulhu a3, a2, a3 +; RV64IM-NEXT: sub a5, a2, a3 ; RV64IM-NEXT: srli a5, a5, 1 -; RV64IM-NEXT: add a4, a5, a4 -; RV64IM-NEXT: srli a4, a4, 4 -; RV64IM-NEXT: li a5, 23 -; RV64IM-NEXT: mul a4, a4, a5 -; RV64IM-NEXT: sub a1, a1, a4 -; RV64IM-NEXT: srli a4, a3, 1 -; RV64IM-NEXT: lui a5, 6413 -; RV64IM-NEXT: addiw a5, a5, 1265 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, 1027 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, 1077 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, 965 -; RV64IM-NEXT: mulhu a4, a4, a5 -; RV64IM-NEXT: srli a4, a4, 7 -; RV64IM-NEXT: li a5, 654 -; RV64IM-NEXT: mul a4, a4, a5 -; RV64IM-NEXT: sub a3, a3, a4 -; RV64IM-NEXT: lui a4, 1044567 -; RV64IM-NEXT: addiw a4, a4, -575 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 883 -; RV64IM-NEXT: slli a4, a4, 14 -; RV64IM-NEXT: addi a4, a4, -861 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -179 -; RV64IM-NEXT: mulhu a4, a2, a4 -; RV64IM-NEXT: srli a4, a4, 12 -; RV64IM-NEXT: lui a5, 1 -; RV64IM-NEXT: addiw a5, a5, 1327 -; RV64IM-NEXT: mul a4, a4, a5 -; RV64IM-NEXT: sub a2, a2, a4 +; RV64IM-NEXT: add a3, a5, a3 +; RV64IM-NEXT: srli a3, a3, 4 +; RV64IM-NEXT: li a6, 23 +; RV64IM-NEXT: lui a5, %hi(.LCPI6_1) +; RV64IM-NEXT: ld a5, %lo(.LCPI6_1)(a5) +; RV64IM-NEXT: mul a3, a3, a6 +; RV64IM-NEXT: sub a6, a2, a3 +; RV64IM-NEXT: srli a3, a1, 1 +; RV64IM-NEXT: mulhu a3, a3, a5 +; RV64IM-NEXT: srli a3, a3, 7 +; RV64IM-NEXT: lui a5, %hi(.LCPI6_2) +; RV64IM-NEXT: ld a5, %lo(.LCPI6_2)(a5) +; RV64IM-NEXT: li a2, 654 +; RV64IM-NEXT: mul a2, a3, a2 +; RV64IM-NEXT: sub a1, a1, a2 +; RV64IM-NEXT: mulhu a2, a4, a5 +; RV64IM-NEXT: srli a2, a2, 12 +; RV64IM-NEXT: lui a3, 1 +; RV64IM-NEXT: addiw a3, a3, 1327 +; RV64IM-NEXT: mul a2, a2, a3 +; RV64IM-NEXT: sub a2, a4, a2 ; RV64IM-NEXT: sd zero, 0(a0) ; RV64IM-NEXT: sd a2, 24(a0) -; RV64IM-NEXT: sd a3, 8(a0) -; RV64IM-NEXT: sd a1, 16(a0) +; RV64IM-NEXT: sd a1, 8(a0) +; RV64IM-NEXT: sd a6, 16(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i64> %x, ret <4 x i64> %1 diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll index 5282bd532caf..7ef595344faf 100644 --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -1534,34 +1534,16 @@ define void @va5_aligned_stack_caller() nounwind { ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 16(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: li a0, 15 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 2049 -; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, -1147 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 13 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 983 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 14 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 655 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 12 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi t0, a0, 1475 +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, %hi(.LCPI11_0) +; LP64-LP64F-LP64D-FPELIM-NEXT: ld t0, %lo(.LCPI11_0)(a0) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, %hi(.LCPI11_1) +; LP64-LP64F-LP64D-FPELIM-NEXT: ld a2, %lo(.LCPI11_1)(a0) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, %hi(.LCPI11_2) +; LP64-LP64F-LP64D-FPELIM-NEXT: ld a3, %lo(.LCPI11_2)(a0) ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 1192 ; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 381 ; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 12 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a6, a0, -2048 -; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 1048248 -; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 1311 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 12 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, -1147 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 13 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 983 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 15 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a2, a0, 1311 -; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 512 -; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 73 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 15 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, -1311 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 12 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 1147 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 14 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a3, a0, -1967 ; LP64-LP64F-LP64D-FPELIM-NEXT: li a0, 1 ; LP64-LP64F-LP64D-FPELIM-NEXT: li a1, 11 ; LP64-LP64F-LP64D-FPELIM-NEXT: li a4, 12 @@ -1585,34 +1567,16 @@ define void @va5_aligned_stack_caller() nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 16(sp) ; LP64-LP64F-LP64D-WITHFP-NEXT: li a0, 15 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 8(sp) -; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, 2049 -; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, -1147 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 13 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 983 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 14 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 655 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 12 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi t0, a0, 1475 +; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, %hi(.LCPI11_0) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld t0, %lo(.LCPI11_0)(a0) +; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, %hi(.LCPI11_1) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld a2, %lo(.LCPI11_1)(a0) +; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, %hi(.LCPI11_2) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld a3, %lo(.LCPI11_2)(a0) ; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, 1192 ; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, 381 ; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 12 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a6, a0, -2048 -; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, 1048248 -; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, 1311 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 12 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, -1147 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 13 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 983 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 15 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a2, a0, 1311 -; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, 512 -; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, 73 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 15 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, -1311 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 12 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 1147 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 14 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a3, a0, -1967 ; LP64-LP64F-LP64D-WITHFP-NEXT: li a0, 1 ; LP64-LP64F-LP64D-WITHFP-NEXT: li a1, 11 ; LP64-LP64F-LP64D-WITHFP-NEXT: li a4, 12 From 8811a87e8c762e5b1e3faefc15e419e7b697bb26 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 30 Dec 2021 23:47:55 -0800 Subject: [PATCH 268/992] [RISCV] Use defvar to simplify some code. NFC Rather than wrapping a def around a list, we can just make a defvar of the list. --- .../Target/RISCV/RISCVInstrInfoVPseudos.td | 175 +++++++++--------- 1 file changed, 83 insertions(+), 92 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 5b85d5adb188..7c161a7e37d1 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -71,25 +71,19 @@ def V_MF4 : LMULInfo<0b110, 2, VR, VR, VR,/*NoVReg*/VR,/*NoVReg*/VR, "M def V_MF2 : LMULInfo<0b111, 4, VR, VR, VR, VR,/*NoVReg*/VR, "MF2">; // Used to iterate over all possible LMULs. -def MxList { - list m = [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8]; -} +defvar MxList = [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8]; + // Used for widening and narrowing instructions as it doesn't contain M8. -def MxListW { - list m = [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4]; -} +defvar MxListW = [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4]; + // Use for zext/sext.vf2 -def MxListVF2 { - list m = [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8]; -} +defvar MxListVF2 = [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8]; + // Use for zext/sext.vf4 -def MxListVF4 { - list m = [V_MF2, V_M1, V_M2, V_M4, V_M8]; -} +defvar MxListVF4 = [V_MF2, V_M1, V_M2, V_M4, V_M8]; + // Use for zext/sext.vf8 -def MxListVF8 { - list m = [V_M1, V_M2, V_M4, V_M8]; -} +defvar MxListVF8 = [V_M1, V_M2, V_M4, V_M8]; class FPR_Info { RegisterClass fprclass = regclass; @@ -100,13 +94,10 @@ def SCALAR_F16 : FPR_Info; def SCALAR_F32 : FPR_Info; def SCALAR_F64 : FPR_Info; -def FPList { - list fpinfo = [SCALAR_F16, SCALAR_F32, SCALAR_F64]; -} +defvar FPList = [SCALAR_F16, SCALAR_F32, SCALAR_F64]; + // Used for widening instructions. It excludes F64. -def FPListW { - list fpinfo = [SCALAR_F16, SCALAR_F32]; -} +defvar FPListW = [SCALAR_F16, SCALAR_F32]; class MxSet { list m = !cond(!eq(eew, 8) : [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8], @@ -1504,7 +1495,7 @@ multiclass VPseudoVSFS_M { } multiclass VPseudoVID_V { - foreach m = MxList.m in { + foreach m = MxList in { let VLMul = m.value in { def "_V_" # m.MX : VPseudoNullaryNoMask, Sched<[WriteVMIdxV, ReadVMask]>; @@ -1524,7 +1515,7 @@ multiclass VPseudoNullaryPseudoM { multiclass VPseudoVIOT_M { defvar constraint = "@earlyclobber $rd"; - foreach m = MxList.m in { + foreach m = MxList in { let VLMul = m.value in { def "_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>; @@ -1535,7 +1526,7 @@ multiclass VPseudoVIOT_M { } multiclass VPseudoVCPR_V { - foreach m = MxList.m in { + foreach m = MxList in { let VLMul = m.value in def _VM # "_" # m.MX : VPseudoUnaryAnyMask, Sched<[WriteVCompressV, ReadVCompressV, ReadVCompressV]>; @@ -1596,12 +1587,12 @@ multiclass VPseudoTiedBinary { - foreach m = MxList.m in + foreach m = MxList in defm _VV : VPseudoBinary; } multiclass VPseudoVGTR_VV_EEW { - foreach m = MxList.m in { + foreach m = MxList in { foreach sew = EEWList in { defvar octuple_lmul = m.octuple; // emul = lmul * eew / sew @@ -1617,38 +1608,38 @@ multiclass VPseudoVGTR_VV_EEW { } multiclass VPseudoBinaryV_VX { - foreach m = MxList.m in + foreach m = MxList in defm "_VX" : VPseudoBinary; } multiclass VPseudoVSLD1_VX { - foreach m = MxList.m in + foreach m = MxList in defm "_VX" : VPseudoBinary, Sched<[WriteVISlide1X, ReadVISlideV, ReadVISlideX, ReadVMask]>; } multiclass VPseudoBinaryV_VF { - foreach m = MxList.m in - foreach f = FPList.fpinfo in + foreach m = MxList in + foreach f = FPList in defm "_V" # f.FX : VPseudoBinary; } multiclass VPseudoVSLD1_VF { - foreach m = MxList.m in - foreach f = FPList.fpinfo in + foreach m = MxList in + foreach f = FPList in defm "_V" # f.FX : VPseudoBinary, Sched<[WriteVFSlide1F, ReadVFSlideV, ReadVFSlideF, ReadVMask]>; } multiclass VPseudoBinaryV_VI { - foreach m = MxList.m in + foreach m = MxList in defm _VI : VPseudoBinary; } multiclass VPseudoVALU_MM { - foreach m = MxList.m in + foreach m = MxList in let VLMul = m.value in { def "_MM_" # m.MX : VPseudoBinaryNoMask, Sched<[WriteVMALUV, ReadVMALUV, ReadVMALUV]>; @@ -1663,27 +1654,27 @@ multiclass VPseudoVALU_MM { // at least 1, and the overlap is in the highest-numbered part of the // destination register group is legal. Otherwise, it is illegal. multiclass VPseudoBinaryW_VV { - foreach m = MxListW.m in + foreach m = MxListW in defm _VV : VPseudoBinary; } multiclass VPseudoBinaryW_VX { - foreach m = MxListW.m in + foreach m = MxListW in defm "_VX" : VPseudoBinary; } multiclass VPseudoBinaryW_VF { - foreach m = MxListW.m in - foreach f = FPListW.fpinfo in + foreach m = MxListW in + foreach f = FPListW in defm "_V" # f.FX : VPseudoBinary; } multiclass VPseudoBinaryW_WV { - foreach m = MxListW.m in { + foreach m = MxListW in { defm _WV : VPseudoBinary; defm _WV : VPseudoTiedBinary; } multiclass VPseudoBinaryW_WF { - foreach m = MxListW.m in - foreach f = FPListW.fpinfo in + foreach m = MxListW in + foreach f = FPListW in defm "_W" # f.FX : VPseudoBinary; } @@ -1709,19 +1700,19 @@ multiclass VPseudoBinaryW_WF { // "The destination EEW is smaller than the source EEW and the overlap is in the // lowest-numbered part of the source register group." multiclass VPseudoBinaryV_WV { - foreach m = MxListW.m in + foreach m = MxListW in defm _WV : VPseudoBinary; } multiclass VPseudoBinaryV_WX { - foreach m = MxListW.m in + foreach m = MxListW in defm _WX : VPseudoBinary; } multiclass VPseudoBinaryV_WI { - foreach m = MxListW.m in + foreach m = MxListW in defm _WI : VPseudoBinary; } @@ -1731,7 +1722,7 @@ multiclass VPseudoBinaryV_WI { // For vadc and vsbc, CarryIn == 1 and CarryOut == 0 multiclass VPseudoBinaryV_VM { - foreach m = MxList.m in + foreach m = MxList in def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX : VPseudoBinaryCarryIn { - foreach m = MxList.m in + foreach m = MxList in def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX : VPseudoBinaryCarryIn.R, m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">, @@ -1760,7 +1751,7 @@ multiclass VPseudoVMRG_FM { multiclass VPseudoBinaryV_IM { - foreach m = MxList.m in + foreach m = MxList in def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX : VPseudoBinaryCarryIn, Sched<[WriteVIMovV, ReadVIMovV]>; @@ -1782,8 +1773,8 @@ multiclass VPseudoUnaryVMV_V_X_I { } multiclass VPseudoVMV_F { - foreach m = MxList.m in { - foreach f = FPList.fpinfo in { + foreach m = MxList in { + foreach f = FPList in { let VLMul = m.value in { def "_" # f.FX # "_" # m.MX : VPseudoUnaryNoDummyMask, @@ -1794,7 +1785,7 @@ multiclass VPseudoVMV_F { } multiclass VPseudoVCLS_V { - foreach m = MxList.m in { + foreach m = MxList in { let VLMul = m.value in { def "_V_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>; @@ -1805,7 +1796,7 @@ multiclass VPseudoVCLS_V { } multiclass VPseudoVSQR_V { - foreach m = MxList.m in { + foreach m = MxList in { let VLMul = m.value in { def "_V_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>; @@ -1816,7 +1807,7 @@ multiclass VPseudoVSQR_V { } multiclass VPseudoVRCP_V { - foreach m = MxList.m in { + foreach m = MxList in { let VLMul = m.value in { def "_V_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>; @@ -1828,7 +1819,7 @@ multiclass VPseudoVRCP_V { multiclass PseudoVEXT_VF2 { defvar constraints = "@earlyclobber $rd"; - foreach m = MxListVF2.m in + foreach m = MxListVF2 in { let VLMul = m.value in { def "_" # m.MX : VPseudoUnaryNoMask, @@ -1842,7 +1833,7 @@ multiclass PseudoVEXT_VF2 { multiclass PseudoVEXT_VF4 { defvar constraints = "@earlyclobber $rd"; - foreach m = MxListVF4.m in + foreach m = MxListVF4 in { let VLMul = m.value in { def "_" # m.MX : VPseudoUnaryNoMask, @@ -1856,7 +1847,7 @@ multiclass PseudoVEXT_VF4 { multiclass PseudoVEXT_VF8 { defvar constraints = "@earlyclobber $rd"; - foreach m = MxListVF8.m in + foreach m = MxListVF8 in { let VLMul = m.value in { def "_" # m.MX : VPseudoUnaryNoMask, @@ -1880,28 +1871,28 @@ multiclass PseudoVEXT_VF8 { // With LMUL<=1 the source and dest occupy a single register so any overlap // is in the lowest-numbered part. multiclass VPseudoBinaryM_VV { - foreach m = MxList.m in + foreach m = MxList in defm _VV : VPseudoBinaryM; } multiclass VPseudoBinaryM_VX { - foreach m = MxList.m in + foreach m = MxList in defm "_VX" : VPseudoBinaryM; } multiclass VPseudoBinaryM_VF { - foreach m = MxList.m in - foreach f = FPList.fpinfo in + foreach m = MxList in + foreach f = FPList in defm "_V" # f.FX : VPseudoBinaryM; } multiclass VPseudoBinaryM_VI { - foreach m = MxList.m in + foreach m = MxList in defm _VI : VPseudoBinaryM; } @@ -2200,26 +2191,26 @@ multiclass VPseudoTernaryWithPolicy { - foreach m = MxList.m in { + foreach m = MxList in { defm _VV : VPseudoTernaryWithPolicy; } } multiclass VPseudoTernaryV_VX { - foreach m = MxList.m in + foreach m = MxList in defm _VX : VPseudoTernary; } multiclass VPseudoTernaryV_VX_AAXA { - foreach m = MxList.m in + foreach m = MxList in defm "_VX" : VPseudoTernaryWithPolicy; } multiclass VPseudoTernaryV_VF_AAXA { - foreach m = MxList.m in - foreach f = FPList.fpinfo in + foreach m = MxList in + foreach f = FPList in defm "_V" # f.FX : VPseudoTernaryWithPolicy; @@ -2227,28 +2218,28 @@ multiclass VPseudoTernaryV_VF_AAXA { multiclass VPseudoTernaryW_VV { defvar constraint = "@earlyclobber $rd"; - foreach m = MxListW.m in + foreach m = MxListW in defm _VV : VPseudoTernaryWithPolicy; } multiclass VPseudoTernaryW_VX { defvar constraint = "@earlyclobber $rd"; - foreach m = MxListW.m in + foreach m = MxListW in defm "_VX" : VPseudoTernaryWithPolicy; } multiclass VPseudoTernaryW_VF { defvar constraint = "@earlyclobber $rd"; - foreach m = MxListW.m in - foreach f = FPListW.fpinfo in + foreach m = MxListW in + foreach f = FPListW in defm "_V" # f.FX : VPseudoTernaryWithPolicy; } multiclass VPseudoTernaryV_VI { - foreach m = MxList.m in + foreach m = MxList in defm _VI : VPseudoTernary; } @@ -2328,35 +2319,35 @@ multiclass VPseudoVCMPM_VX_VI { } multiclass VPseudoVRED_VS { - foreach m = MxList.m in { + foreach m = MxList in { defm _VS : VPseudoTernary, Sched<[WriteVIRedV, ReadVIRedV, ReadVIRedV, ReadVIRedV, ReadVMask]>; } } multiclass VPseudoVWRED_VS { - foreach m = MxList.m in { + foreach m = MxList in { defm _VS : VPseudoTernary, Sched<[WriteVIWRedV, ReadVIWRedV, ReadVIWRedV, ReadVIWRedV, ReadVMask]>; } } multiclass VPseudoVFRED_VS { - foreach m = MxList.m in { + foreach m = MxList in { defm _VS : VPseudoTernary, Sched<[WriteVFRedV, ReadVFRedV, ReadVFRedV, ReadVFRedV, ReadVMask]>; } } multiclass VPseudoVFREDO_VS { - foreach m = MxList.m in { + foreach m = MxList in { defm _VS : VPseudoTernary, Sched<[WriteVFRedOV, ReadVFRedOV, ReadVFRedOV, ReadVFRedOV, ReadVMask]>; } } multiclass VPseudoVFWRED_VS { - foreach m = MxList.m in { + foreach m = MxList in { defm _VS : VPseudoTernary, Sched<[WriteVFWRedV, ReadVFWRedV, ReadVFWRedV, ReadVFWRedV, ReadVMask]>; } @@ -2374,61 +2365,61 @@ multiclass VPseudoConversion, Sched<[WriteVFCvtFToIV, ReadVFCvtFToIV, ReadVMask]>; } multiclass VPseudoVCVTF_V { - foreach m = MxList.m in + foreach m = MxList in defm _V : VPseudoConversion, Sched<[WriteVFCvtIToFV, ReadVFCvtIToFV, ReadVMask]>; } multiclass VPseudoConversionW_V { defvar constraint = "@earlyclobber $rd"; - foreach m = MxListW.m in + foreach m = MxListW in defm _V : VPseudoConversion; } multiclass VPseudoVWCVTI_V { defvar constraint = "@earlyclobber $rd"; - foreach m = MxList.m[0-5] in + foreach m = MxList[0-5] in defm _V : VPseudoConversion, Sched<[WriteVFWCvtFToIV, ReadVFWCvtFToIV, ReadVMask]>; } multiclass VPseudoVWCVTF_V { defvar constraint = "@earlyclobber $rd"; - foreach m = MxList.m[0-5] in + foreach m = MxList[0-5] in defm _V : VPseudoConversion, Sched<[WriteVFWCvtIToFV, ReadVFWCvtIToFV, ReadVMask]>; } multiclass VPseudoVWCVTD_V { defvar constraint = "@earlyclobber $rd"; - foreach m = MxList.m[0-5] in + foreach m = MxList[0-5] in defm _V : VPseudoConversion, Sched<[WriteVFWCvtFToFV, ReadVFWCvtFToFV, ReadVMask]>; } multiclass VPseudoVNCVTI_W { defvar constraint = "@earlyclobber $rd"; - foreach m = MxList.m[0-5] in + foreach m = MxList[0-5] in defm _W : VPseudoConversion, Sched<[WriteVFNCvtFToIV, ReadVFNCvtFToIV, ReadVMask]>; } multiclass VPseudoVNCVTF_W { defvar constraint = "@earlyclobber $rd"; - foreach m = MxList.m[0-5] in + foreach m = MxList[0-5] in defm _W : VPseudoConversion, Sched<[WriteVFNCvtIToFV, ReadVFNCvtIToFV, ReadVMask]>; } multiclass VPseudoVNCVTD_W { defvar constraint = "@earlyclobber $rd"; - foreach m = MxListW.m in + foreach m = MxListW in defm _W : VPseudoConversion, Sched<[WriteVFNCvtFToFV, ReadVFNCvtFToFV, ReadVMask]>; } @@ -3782,7 +3773,7 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1 in { def PseudoVRELOAD_M8 : VPseudo; } -foreach lmul = MxList.m in { +foreach lmul = MxList in { foreach nf = NFSet.L in { defvar vreg = SegRegClass.RC; let hasSideEffects = 0, mayLoad = 0, mayStore = 1, isCodeGenOnly = 1 in { @@ -4345,7 +4336,7 @@ defm PseudoVID : VPseudoVID_V; let Predicates = [HasVInstructions] in { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - foreach m = MxList.m in { + foreach m = MxList in { let VLMul = m.value in { let HasSEWOp = 1, BaseInstr = VMV_X_S in def PseudoVMV_X_S # "_" # m.MX: @@ -4371,8 +4362,8 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { let Predicates = [HasVInstructionsAnyF] in { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - foreach m = MxList.m in { - foreach f = FPList.fpinfo in { + foreach m = MxList in { + foreach f = FPList in { let VLMul = m.value in { let HasSEWOp = 1, BaseInstr = VFMV_F_S in def "PseudoVFMV_" # f.FX # "_S_" # m.MX : From 6f45fe9851c673883b3a258351ee4997aa2c028c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 31 Dec 2021 00:22:52 -0800 Subject: [PATCH 269/992] [RISCV] Use MxListW instead of MxList[0-5]. NFC Better to use the named list instead of assuming the size of MxList. --- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 7c161a7e37d1..dca3f5318812 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -2384,35 +2384,35 @@ multiclass VPseudoConversionW_V { multiclass VPseudoVWCVTI_V { defvar constraint = "@earlyclobber $rd"; - foreach m = MxList[0-5] in + foreach m = MxListW in defm _V : VPseudoConversion, Sched<[WriteVFWCvtFToIV, ReadVFWCvtFToIV, ReadVMask]>; } multiclass VPseudoVWCVTF_V { defvar constraint = "@earlyclobber $rd"; - foreach m = MxList[0-5] in + foreach m = MxListW in defm _V : VPseudoConversion, Sched<[WriteVFWCvtIToFV, ReadVFWCvtIToFV, ReadVMask]>; } multiclass VPseudoVWCVTD_V { defvar constraint = "@earlyclobber $rd"; - foreach m = MxList[0-5] in + foreach m = MxListW in defm _V : VPseudoConversion, Sched<[WriteVFWCvtFToFV, ReadVFWCvtFToFV, ReadVMask]>; } multiclass VPseudoVNCVTI_W { defvar constraint = "@earlyclobber $rd"; - foreach m = MxList[0-5] in + foreach m = MxListW in defm _W : VPseudoConversion, Sched<[WriteVFNCvtFToIV, ReadVFNCvtFToIV, ReadVMask]>; } multiclass VPseudoVNCVTF_W { defvar constraint = "@earlyclobber $rd"; - foreach m = MxList[0-5] in + foreach m = MxListW in defm _W : VPseudoConversion, Sched<[WriteVFNCvtIToFV, ReadVFNCvtIToFV, ReadVMask]>; } From 249a5fb005ea27b57d12fc4425d6f1039d85c1cb Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Thu, 30 Dec 2021 15:29:00 +0100 Subject: [PATCH 270/992] [lldb/qemu] Support setting arg0 of the debugged program Just what it says on the box. --- .../source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp | 4 ++++ lldb/test/API/qemu/TestQemuLaunch.py | 8 ++++++++ lldb/test/API/qemu/qemu.py | 1 + 3 files changed, 13 insertions(+) diff --git a/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp index 572a5b39985e..84e10042a97c 100644 --- a/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp +++ b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp @@ -175,6 +175,10 @@ lldb::ProcessSP PlatformQemuUser::DebugProcess(ProcessLaunchInfo &launch_info, } while (FileSystem::Instance().Exists(socket_path)); Args args({qemu.GetPath(), "-g", socket_path}); + if (!launch_info.GetArg0().empty()) { + args.AppendArgument("-0"); + args.AppendArgument(launch_info.GetArg0()); + } args.AppendArguments(GetGlobalProperties().GetEmulatorArgs()); args.AppendArgument("--"); args.AppendArgument(launch_info.GetExecutableFile().GetPath()); diff --git a/lldb/test/API/qemu/TestQemuLaunch.py b/lldb/test/API/qemu/TestQemuLaunch.py index 01c4143c9e77..e27d7a70fa0b 100644 --- a/lldb/test/API/qemu/TestQemuLaunch.py +++ b/lldb/test/API/qemu/TestQemuLaunch.py @@ -241,3 +241,11 @@ def cleanup(): "%s=from platform,%s=from target" % (var(1), var(2))) self.assertEqual(state["environ"]["QEMU_UNSET_ENV"], "%s,%s,QEMU_SET_ENV,QEMU_UNSET_ENV" % (var(3), var(4))) + + def test_arg0(self): + target = self._create_target() + self.runCmd("settings set target.arg0 ARG0") + state = self._run_and_get_state(target) + + self.assertEqual(state["program"], self.getBuildArtifact()) + self.assertEqual(state["0"], "ARG0") diff --git a/lldb/test/API/qemu/qemu.py b/lldb/test/API/qemu/qemu.py index a74976881cbc..05fbfddc017b 100755 --- a/lldb/test/API/qemu/qemu.py +++ b/lldb/test/API/qemu/qemu.py @@ -56,6 +56,7 @@ def main(): parser = argparse.ArgumentParser(description=_description, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-g', metavar="unix-socket", required=True) + parser.add_argument('-0', metavar="arg0") parser.add_argument('-fake-arg', dest="fake-arg") parser.add_argument('program', help="The program to 'emulate'.") parser.add_argument("args", nargs=argparse.REMAINDER) From 866b195cb9d7205de25d5df1eeb1cbf7229619f4 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 31 Dec 2021 11:27:15 +0000 Subject: [PATCH 271/992] [AMDGPU] Regenerate checks for waitcnt-overflow.mir --- llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir | 440 +++++++++--------- 1 file changed, 220 insertions(+), 220 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir index eb8dc2775989..3b48d23cd38b 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir @@ -21,60 +21,60 @@ body: | ; GFX9-LABEL: name: max-counter-lgkmcnt ; GFX9: S_WAITCNT 0 - ; GFX9: $vgpr0_vgpr1 = DS_READ2_B32_gfx9 renamable $vgpr99, 0, 1, 0, implicit $exec - ; GFX9: $vgpr2_vgpr3 = DS_READ2_B32_gfx9 renamable $vgpr99, 2, 3, 0, implicit $exec - ; GFX9: $vgpr4_vgpr5 = DS_READ2_B32_gfx9 renamable $vgpr99, 4, 5, 0, implicit $exec - ; GFX9: $vgpr6_vgpr7 = DS_READ2_B32_gfx9 renamable $vgpr99, 6, 7, 0, implicit $exec - ; GFX9: $vgpr8_vgpr9 = DS_READ2_B32_gfx9 renamable $vgpr99, 8, 9, 0, implicit $exec - ; GFX9: $vgpr10_vgpr11 = DS_READ2_B32_gfx9 renamable $vgpr99, 10, 11, 0, implicit $exec - ; GFX9: $vgpr12_vgpr13 = DS_READ2_B32_gfx9 renamable $vgpr99, 12, 13, 0, implicit $exec - ; GFX9: $vgpr14_vgpr15 = DS_READ2_B32_gfx9 renamable $vgpr99, 14, 15, 0, implicit $exec - ; GFX9: $vgpr16_vgpr17 = DS_READ2_B32_gfx9 renamable $vgpr99, 16, 17, 0, implicit $exec - ; GFX9: $vgpr18_vgpr19 = DS_READ2_B32_gfx9 renamable $vgpr99, 18, 19, 0, implicit $exec - ; GFX9: $vgpr20_vgpr21 = DS_READ2_B32_gfx9 renamable $vgpr99, 20, 21, 0, implicit $exec - ; GFX9: $vgpr22_vgpr23 = DS_READ2_B32_gfx9 renamable $vgpr99, 22, 23, 0, implicit $exec - ; GFX9: $vgpr24_vgpr25 = DS_READ2_B32_gfx9 renamable $vgpr99, 24, 25, 0, implicit $exec - ; GFX9: $vgpr26_vgpr27 = DS_READ2_B32_gfx9 renamable $vgpr99, 26, 27, 0, implicit $exec - ; GFX9: $vgpr28_vgpr29 = DS_READ2_B32_gfx9 renamable $vgpr99, 28, 29, 0, implicit $exec - ; GFX9: $vgpr30_vgpr31 = DS_READ2_B32_gfx9 renamable $vgpr99, 30, 31, 0, implicit $exec - ; GFX9: $vgpr32_vgpr33 = DS_READ2_B32_gfx9 renamable $vgpr99, 32, 33, 0, implicit $exec - ; GFX9: $vgpr34_vgpr35 = DS_READ2_B32_gfx9 renamable $vgpr99, 34, 35, 0, implicit $exec - ; GFX9: S_WAITCNT 52863 - ; GFX9: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec - ; GFX9: $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec - ; GFX9: $vgpr4 = V_MAC_F32_e32 0, $vgpr5, $vgpr4, implicit $mode, implicit $exec - ; GFX9: $vgpr6 = V_MAC_F32_e32 0, $vgpr7, $vgpr6, implicit $mode, implicit $exec - ; GFX9: S_ENDPGM 0 + ; GFX9-NEXT: $vgpr0_vgpr1 = DS_READ2_B32_gfx9 renamable $vgpr99, 0, 1, 0, implicit $exec + ; GFX9-NEXT: $vgpr2_vgpr3 = DS_READ2_B32_gfx9 renamable $vgpr99, 2, 3, 0, implicit $exec + ; GFX9-NEXT: $vgpr4_vgpr5 = DS_READ2_B32_gfx9 renamable $vgpr99, 4, 5, 0, implicit $exec + ; GFX9-NEXT: $vgpr6_vgpr7 = DS_READ2_B32_gfx9 renamable $vgpr99, 6, 7, 0, implicit $exec + ; GFX9-NEXT: $vgpr8_vgpr9 = DS_READ2_B32_gfx9 renamable $vgpr99, 8, 9, 0, implicit $exec + ; GFX9-NEXT: $vgpr10_vgpr11 = DS_READ2_B32_gfx9 renamable $vgpr99, 10, 11, 0, implicit $exec + ; GFX9-NEXT: $vgpr12_vgpr13 = DS_READ2_B32_gfx9 renamable $vgpr99, 12, 13, 0, implicit $exec + ; GFX9-NEXT: $vgpr14_vgpr15 = DS_READ2_B32_gfx9 renamable $vgpr99, 14, 15, 0, implicit $exec + ; GFX9-NEXT: $vgpr16_vgpr17 = DS_READ2_B32_gfx9 renamable $vgpr99, 16, 17, 0, implicit $exec + ; GFX9-NEXT: $vgpr18_vgpr19 = DS_READ2_B32_gfx9 renamable $vgpr99, 18, 19, 0, implicit $exec + ; GFX9-NEXT: $vgpr20_vgpr21 = DS_READ2_B32_gfx9 renamable $vgpr99, 20, 21, 0, implicit $exec + ; GFX9-NEXT: $vgpr22_vgpr23 = DS_READ2_B32_gfx9 renamable $vgpr99, 22, 23, 0, implicit $exec + ; GFX9-NEXT: $vgpr24_vgpr25 = DS_READ2_B32_gfx9 renamable $vgpr99, 24, 25, 0, implicit $exec + ; GFX9-NEXT: $vgpr26_vgpr27 = DS_READ2_B32_gfx9 renamable $vgpr99, 26, 27, 0, implicit $exec + ; GFX9-NEXT: $vgpr28_vgpr29 = DS_READ2_B32_gfx9 renamable $vgpr99, 28, 29, 0, implicit $exec + ; GFX9-NEXT: $vgpr30_vgpr31 = DS_READ2_B32_gfx9 renamable $vgpr99, 30, 31, 0, implicit $exec + ; GFX9-NEXT: $vgpr32_vgpr33 = DS_READ2_B32_gfx9 renamable $vgpr99, 32, 33, 0, implicit $exec + ; GFX9-NEXT: $vgpr34_vgpr35 = DS_READ2_B32_gfx9 renamable $vgpr99, 34, 35, 0, implicit $exec + ; GFX9-NEXT: S_WAITCNT 52863 + ; GFX9-NEXT: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec + ; GFX9-NEXT: $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec + ; GFX9-NEXT: $vgpr4 = V_MAC_F32_e32 0, $vgpr5, $vgpr4, implicit $mode, implicit $exec + ; GFX9-NEXT: $vgpr6 = V_MAC_F32_e32 0, $vgpr7, $vgpr6, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: max-counter-lgkmcnt ; GFX10: S_WAITCNT 0 - ; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 0 - ; GFX10: $vgpr0_vgpr1 = DS_READ2_B32_gfx9 renamable $vgpr99, 0, 1, 0, implicit $exec - ; GFX10: $vgpr2_vgpr3 = DS_READ2_B32_gfx9 renamable $vgpr99, 2, 3, 0, implicit $exec - ; GFX10: $vgpr4_vgpr5 = DS_READ2_B32_gfx9 renamable $vgpr99, 4, 5, 0, implicit $exec - ; GFX10: $vgpr6_vgpr7 = DS_READ2_B32_gfx9 renamable $vgpr99, 6, 7, 0, implicit $exec - ; GFX10: $vgpr8_vgpr9 = DS_READ2_B32_gfx9 renamable $vgpr99, 8, 9, 0, implicit $exec - ; GFX10: $vgpr10_vgpr11 = DS_READ2_B32_gfx9 renamable $vgpr99, 10, 11, 0, implicit $exec - ; GFX10: $vgpr12_vgpr13 = DS_READ2_B32_gfx9 renamable $vgpr99, 12, 13, 0, implicit $exec - ; GFX10: $vgpr14_vgpr15 = DS_READ2_B32_gfx9 renamable $vgpr99, 14, 15, 0, implicit $exec - ; GFX10: $vgpr16_vgpr17 = DS_READ2_B32_gfx9 renamable $vgpr99, 16, 17, 0, implicit $exec - ; GFX10: $vgpr18_vgpr19 = DS_READ2_B32_gfx9 renamable $vgpr99, 18, 19, 0, implicit $exec - ; GFX10: $vgpr20_vgpr21 = DS_READ2_B32_gfx9 renamable $vgpr99, 20, 21, 0, implicit $exec - ; GFX10: $vgpr22_vgpr23 = DS_READ2_B32_gfx9 renamable $vgpr99, 22, 23, 0, implicit $exec - ; GFX10: $vgpr24_vgpr25 = DS_READ2_B32_gfx9 renamable $vgpr99, 24, 25, 0, implicit $exec - ; GFX10: $vgpr26_vgpr27 = DS_READ2_B32_gfx9 renamable $vgpr99, 26, 27, 0, implicit $exec - ; GFX10: $vgpr28_vgpr29 = DS_READ2_B32_gfx9 renamable $vgpr99, 28, 29, 0, implicit $exec - ; GFX10: $vgpr30_vgpr31 = DS_READ2_B32_gfx9 renamable $vgpr99, 30, 31, 0, implicit $exec - ; GFX10: $vgpr32_vgpr33 = DS_READ2_B32_gfx9 renamable $vgpr99, 32, 33, 0, implicit $exec - ; GFX10: $vgpr34_vgpr35 = DS_READ2_B32_gfx9 renamable $vgpr99, 34, 35, 0, implicit $exec - ; GFX10: S_WAITCNT 53631 - ; GFX10: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec - ; GFX10: S_WAITCNT 53375 - ; GFX10: $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec - ; GFX10: S_WAITCNT 53119 - ; GFX10: $vgpr4 = V_MAC_F32_e32 0, $vgpr5, $vgpr4, implicit $mode, implicit $exec - ; GFX10: S_WAITCNT 52863 - ; GFX10: $vgpr6 = V_MAC_F32_e32 0, $vgpr7, $vgpr6, implicit $mode, implicit $exec - ; GFX10: S_ENDPGM 0 + ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 + ; GFX10-NEXT: $vgpr0_vgpr1 = DS_READ2_B32_gfx9 renamable $vgpr99, 0, 1, 0, implicit $exec + ; GFX10-NEXT: $vgpr2_vgpr3 = DS_READ2_B32_gfx9 renamable $vgpr99, 2, 3, 0, implicit $exec + ; GFX10-NEXT: $vgpr4_vgpr5 = DS_READ2_B32_gfx9 renamable $vgpr99, 4, 5, 0, implicit $exec + ; GFX10-NEXT: $vgpr6_vgpr7 = DS_READ2_B32_gfx9 renamable $vgpr99, 6, 7, 0, implicit $exec + ; GFX10-NEXT: $vgpr8_vgpr9 = DS_READ2_B32_gfx9 renamable $vgpr99, 8, 9, 0, implicit $exec + ; GFX10-NEXT: $vgpr10_vgpr11 = DS_READ2_B32_gfx9 renamable $vgpr99, 10, 11, 0, implicit $exec + ; GFX10-NEXT: $vgpr12_vgpr13 = DS_READ2_B32_gfx9 renamable $vgpr99, 12, 13, 0, implicit $exec + ; GFX10-NEXT: $vgpr14_vgpr15 = DS_READ2_B32_gfx9 renamable $vgpr99, 14, 15, 0, implicit $exec + ; GFX10-NEXT: $vgpr16_vgpr17 = DS_READ2_B32_gfx9 renamable $vgpr99, 16, 17, 0, implicit $exec + ; GFX10-NEXT: $vgpr18_vgpr19 = DS_READ2_B32_gfx9 renamable $vgpr99, 18, 19, 0, implicit $exec + ; GFX10-NEXT: $vgpr20_vgpr21 = DS_READ2_B32_gfx9 renamable $vgpr99, 20, 21, 0, implicit $exec + ; GFX10-NEXT: $vgpr22_vgpr23 = DS_READ2_B32_gfx9 renamable $vgpr99, 22, 23, 0, implicit $exec + ; GFX10-NEXT: $vgpr24_vgpr25 = DS_READ2_B32_gfx9 renamable $vgpr99, 24, 25, 0, implicit $exec + ; GFX10-NEXT: $vgpr26_vgpr27 = DS_READ2_B32_gfx9 renamable $vgpr99, 26, 27, 0, implicit $exec + ; GFX10-NEXT: $vgpr28_vgpr29 = DS_READ2_B32_gfx9 renamable $vgpr99, 28, 29, 0, implicit $exec + ; GFX10-NEXT: $vgpr30_vgpr31 = DS_READ2_B32_gfx9 renamable $vgpr99, 30, 31, 0, implicit $exec + ; GFX10-NEXT: $vgpr32_vgpr33 = DS_READ2_B32_gfx9 renamable $vgpr99, 32, 33, 0, implicit $exec + ; GFX10-NEXT: $vgpr34_vgpr35 = DS_READ2_B32_gfx9 renamable $vgpr99, 34, 35, 0, implicit $exec + ; GFX10-NEXT: S_WAITCNT 53631 + ; GFX10-NEXT: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_WAITCNT 53375 + ; GFX10-NEXT: $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec + ; GFX10-NEXT: S_WAITCNT 53119 + ; GFX10-NEXT: $vgpr4 = V_MAC_F32_e32 0, $vgpr5, $vgpr4, implicit $mode, implicit $exec + ; GFX10-NEXT: S_WAITCNT 52863 + ; GFX10-NEXT: $vgpr6 = V_MAC_F32_e32 0, $vgpr7, $vgpr6, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0 $vgpr0_vgpr1 = DS_READ2_B32_gfx9 renamable $vgpr99, 0, 1, 0, implicit $exec $vgpr2_vgpr3 = DS_READ2_B32_gfx9 renamable $vgpr99, 2, 3, 0, implicit $exec $vgpr4_vgpr5 = DS_READ2_B32_gfx9 renamable $vgpr99, 4, 5, 0, implicit $exec @@ -109,155 +109,155 @@ body: | ; GFX9-LABEL: name: max-counter-vmcnt ; GFX9: S_WAITCNT 0 - ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - ; GFX9: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, implicit $exec - ; GFX9: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, implicit $exec - ; GFX9: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, implicit $exec - ; GFX9: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, implicit $exec - ; GFX9: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 20, 0, 0, 0, implicit $exec - ; GFX9: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 24, 0, 0, 0, implicit $exec - ; GFX9: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 28, 0, 0, 0, implicit $exec - ; GFX9: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 32, 0, 0, 0, implicit $exec - ; GFX9: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 36, 0, 0, 0, implicit $exec - ; GFX9: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 40, 0, 0, 0, implicit $exec - ; GFX9: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 44, 0, 0, 0, implicit $exec - ; GFX9: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 48, 0, 0, 0, implicit $exec - ; GFX9: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 52, 0, 0, 0, implicit $exec - ; GFX9: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 56, 0, 0, 0, implicit $exec - ; GFX9: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 60, 0, 0, 0, implicit $exec - ; GFX9: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 64, 0, 0, 0, implicit $exec - ; GFX9: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 68, 0, 0, 0, implicit $exec - ; GFX9: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 72, 0, 0, 0, implicit $exec - ; GFX9: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 76, 0, 0, 0, implicit $exec - ; GFX9: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 80, 0, 0, 0, implicit $exec - ; GFX9: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 84, 0, 0, 0, implicit $exec - ; GFX9: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 88, 0, 0, 0, implicit $exec - ; GFX9: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 92, 0, 0, 0, implicit $exec - ; GFX9: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 96, 0, 0, 0, implicit $exec - ; GFX9: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 100, 0, 0, 0, implicit $exec - ; GFX9: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 104, 0, 0, 0, implicit $exec - ; GFX9: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 108, 0, 0, 0, implicit $exec - ; GFX9: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 112, 0, 0, 0, implicit $exec - ; GFX9: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 116, 0, 0, 0, implicit $exec - ; GFX9: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 120, 0, 0, 0, implicit $exec - ; GFX9: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 124, 0, 0, 0, implicit $exec - ; GFX9: $vgpr32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 128, 0, 0, 0, implicit $exec - ; GFX9: $vgpr33 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 132, 0, 0, 0, implicit $exec - ; GFX9: $vgpr34 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 136, 0, 0, 0, implicit $exec - ; GFX9: $vgpr35 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 140, 0, 0, 0, implicit $exec - ; GFX9: $vgpr36 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 144, 0, 0, 0, implicit $exec - ; GFX9: $vgpr37 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 148, 0, 0, 0, implicit $exec - ; GFX9: $vgpr38 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 152, 0, 0, 0, implicit $exec - ; GFX9: $vgpr39 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 156, 0, 0, 0, implicit $exec - ; GFX9: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 160, 0, 0, 0, implicit $exec - ; GFX9: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 164, 0, 0, 0, implicit $exec - ; GFX9: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 168, 0, 0, 0, implicit $exec - ; GFX9: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 172, 0, 0, 0, implicit $exec - ; GFX9: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 176, 0, 0, 0, implicit $exec - ; GFX9: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 180, 0, 0, 0, implicit $exec - ; GFX9: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 184, 0, 0, 0, implicit $exec - ; GFX9: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 188, 0, 0, 0, implicit $exec - ; GFX9: $vgpr48 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 192, 0, 0, 0, implicit $exec - ; GFX9: $vgpr49 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 196, 0, 0, 0, implicit $exec - ; GFX9: $vgpr50 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 200, 0, 0, 0, implicit $exec - ; GFX9: $vgpr51 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 204, 0, 0, 0, implicit $exec - ; GFX9: $vgpr52 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 208, 0, 0, 0, implicit $exec - ; GFX9: $vgpr53 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 212, 0, 0, 0, implicit $exec - ; GFX9: $vgpr54 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 216, 0, 0, 0, implicit $exec - ; GFX9: $vgpr55 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 220, 0, 0, 0, implicit $exec - ; GFX9: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 224, 0, 0, 0, implicit $exec - ; GFX9: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 228, 0, 0, 0, implicit $exec - ; GFX9: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 232, 0, 0, 0, implicit $exec - ; GFX9: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 236, 0, 0, 0, implicit $exec - ; GFX9: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 240, 0, 0, 0, implicit $exec - ; GFX9: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 244, 0, 0, 0, implicit $exec - ; GFX9: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 248, 0, 0, 0, implicit $exec - ; GFX9: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 252, 0, 0, 0, implicit $exec - ; GFX9: $vgpr64 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 256, 0, 0, 0, implicit $exec - ; GFX9: $vgpr65 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 260, 0, 0, 0, implicit $exec - ; GFX9: $vgpr66 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 264, 0, 0, 0, implicit $exec - ; GFX9: S_WAITCNT 53118 - ; GFX9: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec - ; GFX9: $vgpr1 = V_MAC_F32_e32 0, $vgpr2, $vgpr1, implicit $mode, implicit $exec - ; GFX9: $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec - ; GFX9: $vgpr3 = V_MAC_F32_e32 0, $vgpr4, $vgpr3, implicit $mode, implicit $exec - ; GFX9: S_ENDPGM 0 + ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 20, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 24, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 28, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 32, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 36, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 40, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 44, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 48, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 52, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 56, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 60, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 64, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 68, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 72, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 76, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 80, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 84, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 88, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 92, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 96, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 100, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 104, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 108, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 112, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 116, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 120, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 124, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 128, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr33 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 132, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr34 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 136, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr35 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 140, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr36 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 144, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr37 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 148, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr38 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 152, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr39 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 156, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 160, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 164, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 168, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 172, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 176, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 180, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 184, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 188, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr48 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 192, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr49 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 196, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr50 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 200, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr51 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 204, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr52 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 208, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr53 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 212, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr54 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 216, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr55 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 220, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 224, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 228, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 232, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 236, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 240, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 244, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 248, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 252, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr64 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 256, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr65 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 260, 0, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr66 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 264, 0, 0, 0, implicit $exec + ; GFX9-NEXT: S_WAITCNT 53118 + ; GFX9-NEXT: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec + ; GFX9-NEXT: $vgpr1 = V_MAC_F32_e32 0, $vgpr2, $vgpr1, implicit $mode, implicit $exec + ; GFX9-NEXT: $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec + ; GFX9-NEXT: $vgpr3 = V_MAC_F32_e32 0, $vgpr4, $vgpr3, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: max-counter-vmcnt ; GFX10: S_WAITCNT 0 - ; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 0 - ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - ; GFX10: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, implicit $exec - ; GFX10: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, implicit $exec - ; GFX10: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, implicit $exec - ; GFX10: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, implicit $exec - ; GFX10: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 20, 0, 0, 0, implicit $exec - ; GFX10: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 24, 0, 0, 0, implicit $exec - ; GFX10: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 28, 0, 0, 0, implicit $exec - ; GFX10: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 32, 0, 0, 0, implicit $exec - ; GFX10: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 36, 0, 0, 0, implicit $exec - ; GFX10: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 40, 0, 0, 0, implicit $exec - ; GFX10: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 44, 0, 0, 0, implicit $exec - ; GFX10: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 48, 0, 0, 0, implicit $exec - ; GFX10: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 52, 0, 0, 0, implicit $exec - ; GFX10: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 56, 0, 0, 0, implicit $exec - ; GFX10: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 60, 0, 0, 0, implicit $exec - ; GFX10: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 64, 0, 0, 0, implicit $exec - ; GFX10: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 68, 0, 0, 0, implicit $exec - ; GFX10: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 72, 0, 0, 0, implicit $exec - ; GFX10: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 76, 0, 0, 0, implicit $exec - ; GFX10: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 80, 0, 0, 0, implicit $exec - ; GFX10: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 84, 0, 0, 0, implicit $exec - ; GFX10: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 88, 0, 0, 0, implicit $exec - ; GFX10: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 92, 0, 0, 0, implicit $exec - ; GFX10: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 96, 0, 0, 0, implicit $exec - ; GFX10: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 100, 0, 0, 0, implicit $exec - ; GFX10: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 104, 0, 0, 0, implicit $exec - ; GFX10: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 108, 0, 0, 0, implicit $exec - ; GFX10: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 112, 0, 0, 0, implicit $exec - ; GFX10: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 116, 0, 0, 0, implicit $exec - ; GFX10: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 120, 0, 0, 0, implicit $exec - ; GFX10: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 124, 0, 0, 0, implicit $exec - ; GFX10: $vgpr32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 128, 0, 0, 0, implicit $exec - ; GFX10: $vgpr33 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 132, 0, 0, 0, implicit $exec - ; GFX10: $vgpr34 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 136, 0, 0, 0, implicit $exec - ; GFX10: $vgpr35 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 140, 0, 0, 0, implicit $exec - ; GFX10: $vgpr36 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 144, 0, 0, 0, implicit $exec - ; GFX10: $vgpr37 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 148, 0, 0, 0, implicit $exec - ; GFX10: $vgpr38 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 152, 0, 0, 0, implicit $exec - ; GFX10: $vgpr39 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 156, 0, 0, 0, implicit $exec - ; GFX10: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 160, 0, 0, 0, implicit $exec - ; GFX10: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 164, 0, 0, 0, implicit $exec - ; GFX10: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 168, 0, 0, 0, implicit $exec - ; GFX10: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 172, 0, 0, 0, implicit $exec - ; GFX10: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 176, 0, 0, 0, implicit $exec - ; GFX10: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 180, 0, 0, 0, implicit $exec - ; GFX10: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 184, 0, 0, 0, implicit $exec - ; GFX10: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 188, 0, 0, 0, implicit $exec - ; GFX10: $vgpr48 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 192, 0, 0, 0, implicit $exec - ; GFX10: $vgpr49 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 196, 0, 0, 0, implicit $exec - ; GFX10: $vgpr50 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 200, 0, 0, 0, implicit $exec - ; GFX10: $vgpr51 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 204, 0, 0, 0, implicit $exec - ; GFX10: $vgpr52 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 208, 0, 0, 0, implicit $exec - ; GFX10: $vgpr53 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 212, 0, 0, 0, implicit $exec - ; GFX10: $vgpr54 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 216, 0, 0, 0, implicit $exec - ; GFX10: $vgpr55 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 220, 0, 0, 0, implicit $exec - ; GFX10: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 224, 0, 0, 0, implicit $exec - ; GFX10: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 228, 0, 0, 0, implicit $exec - ; GFX10: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 232, 0, 0, 0, implicit $exec - ; GFX10: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 236, 0, 0, 0, implicit $exec - ; GFX10: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 240, 0, 0, 0, implicit $exec - ; GFX10: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 244, 0, 0, 0, implicit $exec - ; GFX10: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 248, 0, 0, 0, implicit $exec - ; GFX10: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 252, 0, 0, 0, implicit $exec - ; GFX10: $vgpr64 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 256, 0, 0, 0, implicit $exec - ; GFX10: $vgpr65 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 260, 0, 0, 0, implicit $exec - ; GFX10: $vgpr66 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 264, 0, 0, 0, implicit $exec - ; GFX10: S_WAITCNT 65406 - ; GFX10: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec - ; GFX10: $vgpr1 = V_MAC_F32_e32 0, $vgpr2, $vgpr1, implicit $mode, implicit $exec - ; GFX10: $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec - ; GFX10: $vgpr3 = V_MAC_F32_e32 0, $vgpr4, $vgpr3, implicit $mode, implicit $exec - ; GFX10: S_ENDPGM 0 + ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 + ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 20, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 24, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 28, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 32, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 36, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 40, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 44, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 48, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 52, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 56, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 60, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 64, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 68, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 72, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 76, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 80, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 84, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 88, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 92, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 96, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 100, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 104, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 108, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 112, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 116, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 120, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 124, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 128, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr33 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 132, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr34 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 136, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr35 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 140, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr36 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 144, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr37 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 148, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr38 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 152, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr39 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 156, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 160, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 164, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 168, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 172, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 176, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 180, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 184, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 188, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr48 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 192, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr49 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 196, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr50 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 200, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr51 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 204, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr52 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 208, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr53 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 212, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr54 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 216, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr55 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 220, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 224, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 228, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 232, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 236, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 240, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 244, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 248, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 252, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr64 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 256, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr65 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 260, 0, 0, 0, implicit $exec + ; GFX10-NEXT: $vgpr66 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 264, 0, 0, 0, implicit $exec + ; GFX10-NEXT: S_WAITCNT 65406 + ; GFX10-NEXT: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec + ; GFX10-NEXT: $vgpr1 = V_MAC_F32_e32 0, $vgpr2, $vgpr1, implicit $mode, implicit $exec + ; GFX10-NEXT: $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec + ; GFX10-NEXT: $vgpr3 = V_MAC_F32_e32 0, $vgpr4, $vgpr3, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, implicit $exec $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, implicit $exec @@ -341,29 +341,29 @@ body: | ; GFX9-LABEL: name: max-counter-expcnt ; GFX9: S_WAITCNT 0 - ; GFX9: EXP 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec - ; GFX9: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec - ; GFX9: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec - ; GFX9: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec - ; GFX9: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec - ; GFX9: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec - ; GFX9: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec - ; GFX9: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec - ; GFX9: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec - ; GFX9: S_ENDPGM 0 + ; GFX9-NEXT: EXP 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec + ; GFX9-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec + ; GFX9-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec + ; GFX9-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec + ; GFX9-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec + ; GFX9-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec + ; GFX9-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec + ; GFX9-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec + ; GFX9-NEXT: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: max-counter-expcnt ; GFX10: S_WAITCNT 0 - ; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 0 - ; GFX10: EXP 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec - ; GFX10: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec - ; GFX10: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec - ; GFX10: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec - ; GFX10: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec - ; GFX10: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec - ; GFX10: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec - ; GFX10: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec - ; GFX10: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec - ; GFX10: S_ENDPGM 0 + ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 + ; GFX10-NEXT: EXP 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec + ; GFX10-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec + ; GFX10-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec + ; GFX10-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec + ; GFX10-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec + ; GFX10-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec + ; GFX10-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec + ; GFX10-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec + ; GFX10-NEXT: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0 EXP 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec From 2edcde00cb396cc17d8d8b171a6ebaa97fd30e59 Mon Sep 17 00:00:00 2001 From: Random Date: Mon, 27 Dec 2021 16:12:26 +0300 Subject: [PATCH 272/992] [MIPS] Add -mfix4300 flag to enable vr4300 mulmul bugfix pass Early revisions of the VR4300 have a hardware bug where two consecutive multiplications can produce an incorrect result in the second multiply. This revision adds the `-mfix4300` flag to llvm (and clang) which, when passed, provides a software fix for this issue. More precise description of the "mulmul" bug: ``` mul.[s,d] fd,fs,ft mul.[s,d] fd,fs,ft or [D]MULT[U] rs,rt ``` When the above sequence is executed by the CPU, if at least one of the source operands of the first mul instruction happens to be `sNaN`, `0` or `Infinity`, then the second mul instruction may produce an incorrect result. This can happen both if the two mul instructions are next to each other and if the first one is in a delay slot and the second is the first instruction of the branch target. Description of the fix: This fix adds a backend pass to llvm which scans for mul instructions in each basic block and inserts a nop whenever the following conditions are met: - The current instruction is a single or double-precision floating-point mul instruction. - The next instruction is either a mul instruction (any kind) or a branch instruction. Differential Revision: https://reviews.llvm.org/D116238 --- clang/include/clang/Driver/Options.td | 1 + clang/lib/Driver/ToolChains/Clang.cpp | 5 + llvm/lib/Target/Mips/CMakeLists.txt | 1 + llvm/lib/Target/Mips/Mips.h | 2 + llvm/lib/Target/Mips/MipsMulMulBugPass.cpp | 134 +++++++++++++++++++++ llvm/lib/Target/Mips/MipsTargetMachine.cpp | 10 ++ llvm/test/CodeGen/Mips/vr4300-mulbranch.ll | 27 +++++ llvm/test/CodeGen/Mips/vr4300-mulmul.ll | 24 ++++ 8 files changed, 204 insertions(+) create mode 100644 llvm/lib/Target/Mips/MipsMulMulBugPass.cpp create mode 100644 llvm/test/CodeGen/Mips/vr4300-mulbranch.ll create mode 100644 llvm/test/CodeGen/Mips/vr4300-mulmul.ll diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index dc8bd831f2a2..6c56d9739de2 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3617,6 +3617,7 @@ def mcheck_zero_division : Flag<["-"], "mcheck-zero-division">, Group; def mno_check_zero_division : Flag<["-"], "mno-check-zero-division">, Group; +def mfix4300 : Flag<["-"], "mfix4300">, Group; def mcompact_branches_EQ : Joined<["-"], "mcompact-branches=">, Group; def mbranch_likely : Flag<["-"], "mbranch-likely">, Group, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 2c3439215093..3a4e9153689e 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1929,6 +1929,11 @@ void Clang::AddMIPSTargetArgs(const ArgList &Args, } } + if (Arg *A = Args.getLastArg(options::OPT_mfix4300)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-mfix4300"); + } + if (Arg *A = Args.getLastArg(options::OPT_G)) { StringRef v = A->getValue(); CmdArgs.push_back("-mllvm"); diff --git a/llvm/lib/Target/Mips/CMakeLists.txt b/llvm/lib/Target/Mips/CMakeLists.txt index cbfd187fdfa2..5759fd9736e7 100644 --- a/llvm/lib/Target/Mips/CMakeLists.txt +++ b/llvm/lib/Target/Mips/CMakeLists.txt @@ -59,6 +59,7 @@ add_llvm_target(MipsCodeGen MipsTargetMachine.cpp MipsTargetObjectFile.cpp MicroMipsSizeReduction.cpp + MipsMulMulBugPass.cpp LINK_COMPONENTS Analysis diff --git a/llvm/lib/Target/Mips/Mips.h b/llvm/lib/Target/Mips/Mips.h index b3faaab436f0..faf58545db62 100644 --- a/llvm/lib/Target/Mips/Mips.h +++ b/llvm/lib/Target/Mips/Mips.h @@ -38,6 +38,7 @@ namespace llvm { FunctionPass *createMicroMipsSizeReducePass(); FunctionPass *createMipsExpandPseudoPass(); FunctionPass *createMipsPreLegalizeCombiner(); + FunctionPass *createMipsMulMulBugPass(); InstructionSelector *createMipsInstructionSelector(const MipsTargetMachine &, MipsSubtarget &, @@ -47,6 +48,7 @@ namespace llvm { void initializeMipsBranchExpansionPass(PassRegistry &); void initializeMicroMipsSizeReducePass(PassRegistry &); void initializeMipsPreLegalizerCombinerPass(PassRegistry&); + void initializeMipsMulMulBugFixPass(PassRegistry&); } // end namespace llvm; #endif diff --git a/llvm/lib/Target/Mips/MipsMulMulBugPass.cpp b/llvm/lib/Target/Mips/MipsMulMulBugPass.cpp new file mode 100644 index 000000000000..cb112ca1dfff --- /dev/null +++ b/llvm/lib/Target/Mips/MipsMulMulBugPass.cpp @@ -0,0 +1,134 @@ +//===- MipsMulMulBugPass.cpp - Mips VR4300 mulmul bugfix pass -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Early revisions of the VR4300 have a hardware bug where two consecutive +// multiplications can produce an incorrect result in the second multiply. +// +// This pass scans for mul instructions in each basic block and inserts +// a nop whenever the following conditions are met: +// +// - The current instruction is a single or double-precision floating-point +// mul instruction. +// - The next instruction is either a mul instruction (any kind) +// or a branch instruction. +//===----------------------------------------------------------------------===// + +#include "Mips.h" +#include "MipsInstrInfo.h" +#include "MipsSubtarget.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetMachine.h" + +#define DEBUG_TYPE "mips-vr4300-mulmul-fix" + +using namespace llvm; + +namespace { + +class MipsMulMulBugFix : public MachineFunctionPass { +public: + MipsMulMulBugFix() : MachineFunctionPass(ID) { + initializeMipsMulMulBugFixPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "Mips VR4300 mulmul bugfix"; } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + static char ID; + +private: + bool fixMulMulBB(MachineBasicBlock &MBB, const MipsInstrInfo &MipsII); +}; + +} // namespace + +INITIALIZE_PASS(MipsMulMulBugFix, "mips-vr4300-mulmul-fix", + "Mips VR4300 mulmul bugfix", false, false) + +char MipsMulMulBugFix::ID = 0; + +bool MipsMulMulBugFix::runOnMachineFunction(MachineFunction &MF) { + const MipsInstrInfo &MipsII = + *static_cast(MF.getSubtarget().getInstrInfo()); + + bool Modified = false; + + for (auto &MBB : MF) + Modified |= fixMulMulBB(MBB, MipsII); + + return Modified; +} + +static bool isFirstMul(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case Mips::FMUL_S: + case Mips::FMUL_D: + case Mips::FMUL_D32: + case Mips::FMUL_D64: + return true; + default: + return false; + } +} + +static bool isSecondMulOrBranch(const MachineInstr &MI) { + if (MI.isBranch() || MI.isIndirectBranch() || MI.isCall()) + return true; + + switch (MI.getOpcode()) { + case Mips::MUL: + case Mips::FMUL_S: + case Mips::FMUL_D: + case Mips::FMUL_D32: + case Mips::FMUL_D64: + case Mips::MULT: + case Mips::MULTu: + case Mips::DMULT: + case Mips::DMULTu: + return true; + default: + return false; + } +} + +bool MipsMulMulBugFix::fixMulMulBB(MachineBasicBlock &MBB, + const MipsInstrInfo &MipsII) { + bool Modified = false; + + // Iterate through the instructions in the basic block + for (MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), + E = MBB.instr_end(); + MII != E; ++MII) { + + MachineBasicBlock::instr_iterator NextMII = std::next(MII); + + // Trigger when the current instruction is a mul and the next instruction + // is either a mul or a branch in case the branch target start with a mul + if (NextMII != E && isFirstMul(*MII) && isSecondMulOrBranch(*NextMII)) { + LLVM_DEBUG(dbgs() << "Found mulmul!"); + + const MCInstrDesc &NewMCID = MipsII.get(Mips::NOP); + BuildMI(MBB, NextMII, DebugLoc(), NewMCID); + Modified = true; + } + } + + return Modified; +} + +FunctionPass *llvm::createMipsMulMulBugPass() { return new MipsMulMulBugFix(); } diff --git a/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/llvm/lib/Target/Mips/MipsTargetMachine.cpp index 8de3c9fd25bd..f9f662a00117 100644 --- a/llvm/lib/Target/Mips/MipsTargetMachine.cpp +++ b/llvm/lib/Target/Mips/MipsTargetMachine.cpp @@ -45,6 +45,10 @@ using namespace llvm; #define DEBUG_TYPE "mips" +static cl::opt + EnableMulMulFix("mfix4300", cl::init(false), + cl::desc("Enable the VR4300 mulmul bug fix."), cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsTarget() { // Register the target. RegisterTargetMachine X(getTheMipsTarget()); @@ -58,6 +62,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsTarget() { initializeMipsBranchExpansionPass(*PR); initializeMicroMipsSizeReducePass(*PR); initializeMipsPreLegalizerCombinerPass(*PR); + initializeMipsMulMulBugFixPass(*PR); } static std::string computeDataLayout(const Triple &TT, StringRef CPU, @@ -292,6 +297,11 @@ void MipsPassConfig::addPreEmitPass() { // instructions which can be remapped to a 16 bit instruction. addPass(createMicroMipsSizeReducePass()); + // This pass inserts a nop instruction between two back-to-back multiplication + // instructions when the "mfix4300" flag is passed. + if (EnableMulMulFix) + addPass(createMipsMulMulBugPass()); + // The delay slot filler pass can potientially create forbidden slot hazards // for MIPSR6 and therefore it should go before MipsBranchExpansion pass. addPass(createMipsDelaySlotFillerPass()); diff --git a/llvm/test/CodeGen/Mips/vr4300-mulbranch.ll b/llvm/test/CodeGen/Mips/vr4300-mulbranch.ll new file mode 100644 index 000000000000..c3f15fb6afa6 --- /dev/null +++ b/llvm/test/CodeGen/Mips/vr4300-mulbranch.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=mips -mfix4300 -verify-machineinstrs < %s | FileCheck %s + +; Function Attrs: nounwind +define dso_local void @fun_s(float %a) local_unnamed_addr #0 { +entry: +; CHECK-LABEL: fun_s +; CHECK: mul.s +; CHECK-NEXT: nop + %mul = fmul float %a, %a + tail call void @foo_s(float %mul) #2 + ret void +} + +declare dso_local void @foo_s(float) local_unnamed_addr #1 + +; Function Attrs: nounwind +define dso_local void @fun_d(double %a) local_unnamed_addr #0 { +entry: +; CHECK-LABEL: fun_d +; CHECK: mul.d +; CHECK-NEXT: nop + %mul = fmul double %a, %a + tail call void @foo_d(double %mul) #2 + ret void +} + +declare dso_local void @foo_d(double) local_unnamed_addr #1 diff --git a/llvm/test/CodeGen/Mips/vr4300-mulmul.ll b/llvm/test/CodeGen/Mips/vr4300-mulmul.ll new file mode 100644 index 000000000000..f20cc169825e --- /dev/null +++ b/llvm/test/CodeGen/Mips/vr4300-mulmul.ll @@ -0,0 +1,24 @@ +; RUN: llc -march=mips -mfix4300 -verify-machineinstrs < %s | FileCheck %s + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define dso_local float @fun_s(float %x) local_unnamed_addr #0 { +entry: +; CHECK-LABEL: fun_s +; CHECK: mul.s +; CHECK-NEXT: nop +; CHECK: mul.s + %mul = fmul float %x, %x + %mul1 = fmul float %mul, %x + ret float %mul1 +} + +define dso_local double @fun_d(double %x) local_unnamed_addr #0 { +entry: +; CHECK-LABEL: fun_d +; CHECK: mul.d +; CHECK-NEXT: nop +; CHECK: mul.d + %mul = fmul double %x, %x + %mul1 = fmul double %mul, %x + ret double %mul1 +} From e2f1c4c7066b34ad171d91879263cfa32bc6c55c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 31 Dec 2021 13:19:03 +0000 Subject: [PATCH 273/992] [LV] Turn check for unexpected VF into assertion (NFC). VF should always be non-zero in widenIntOrFpInduction. Turn check into assertion. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 5635b1596bac..828f1f3e107b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2499,6 +2499,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, assert((IV->getType()->isIntegerTy() || IV != OldInduction) && "Primary induction variable must have an integer type"); assert(IV->getType() == ID.getStartValue()->getType() && "Types must match"); + assert(!State.VF.isZero() && "VF must be non-zero"); // The value from the original loop to which we are mapping the new induction // variable. @@ -2573,7 +2574,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, // Now do the actual transformations, and start with creating the step value. Value *Step = CreateStepValue(ID.getStep()); - if (State.VF.isZero() || State.VF.isScalar()) { + if (State.VF.isScalar()) { Value *ScalarIV = CreateScalarIV(Step); CreateSplatIV(ScalarIV, Step); return; From b8db44251371afd71d17f2a34a85766188c4b0a8 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Fri, 24 Dec 2021 18:10:58 +0000 Subject: [PATCH 274/992] [NFC][SVE] Minor reorder of some AArch64ISD nodes and ISel patterns. --- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 4 +-- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 32 +++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 367ba3039a0c..9b541de8c80b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -81,10 +81,10 @@ enum NodeType : unsigned { FADD_PRED, FDIV_PRED, FMA_PRED, - FMAXNM_PRED, - FMINNM_PRED, FMAX_PRED, + FMAXNM_PRED, FMIN_PRED, + FMINNM_PRED, FMUL_PRED, FSUB_PRED, MUL_PRED, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index eb55a472a69a..ae5330efde3a 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -180,10 +180,10 @@ def AArch64asr_p : SDNode<"AArch64ISD::SRA_PRED", SDT_AArch64Arith>; def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>; def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>; def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>; -def AArch64fmaxnm_p : SDNode<"AArch64ISD::FMAXNM_PRED", SDT_AArch64Arith>; -def AArch64fminnm_p : SDNode<"AArch64ISD::FMINNM_PRED", SDT_AArch64Arith>; def AArch64fmax_p : SDNode<"AArch64ISD::FMAX_PRED", SDT_AArch64Arith>; +def AArch64fmaxnm_p : SDNode<"AArch64ISD::FMAXNM_PRED", SDT_AArch64Arith>; def AArch64fmin_p : SDNode<"AArch64ISD::FMIN_PRED", SDT_AArch64Arith>; +def AArch64fminnm_p : SDNode<"AArch64ISD::FMINNM_PRED", SDT_AArch64Arith>; def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>; def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>; def AArch64lsl_p : SDNode<"AArch64ISD::SHL_PRED", SDT_AArch64Arith>; @@ -642,11 +642,11 @@ let Predicates = [HasSVEorStreamingSVE] in { (DUP_ZI_D $a, $b)>; // Duplicate immediate FP into all vector elements. - def : Pat<(nxv2f32 (AArch64dup (f32 fpimm:$val))), + def : Pat<(nxv2f32 (AArch64dup (f32 fpimm:$val))), (DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>; - def : Pat<(nxv4f32 (AArch64dup (f32 fpimm:$val))), + def : Pat<(nxv4f32 (AArch64dup (f32 fpimm:$val))), (DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>; - def : Pat<(nxv2f64 (AArch64dup (f64 fpimm:$val))), + def : Pat<(nxv2f64 (AArch64dup (f64 fpimm:$val))), (DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>; // Duplicate FP immediate into all vector elements @@ -1419,6 +1419,16 @@ let Predicates = [HasSVEorStreamingSVE] in { (INSR_ZV_D ZPR:$Z2, (INSERT_SUBREG (IMPLICIT_DEF), (LASTB_VPZ_D (PTRUE_D 31), ZPR:$Z1), dsub))>; + // Splice with lane bigger or equal to 0 + def : Pat<(nxv16i8 (vector_splice (nxv16i8 ZPR:$Z1), (nxv16i8 ZPR:$Z2), (i64 (sve_ext_imm_0_255 i32:$index)))), + (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>; + def : Pat<(nxv8i16 (vector_splice (nxv8i16 ZPR:$Z1), (nxv8i16 ZPR:$Z2), (i64 (sve_ext_imm_0_127 i32:$index)))), + (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>; + def : Pat<(nxv4i32 (vector_splice (nxv4i32 ZPR:$Z1), (nxv4i32 ZPR:$Z2), (i64 (sve_ext_imm_0_63 i32:$index)))), + (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>; + def : Pat<(nxv2i64 (vector_splice (nxv2i64 ZPR:$Z1), (nxv2i64 ZPR:$Z2), (i64 (sve_ext_imm_0_31 i32:$index)))), + (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>; + defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>; defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>; defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>; @@ -2496,6 +2506,7 @@ let Predicates = [HasSVEorStreamingSVE] in { // 16-element contiguous store defm : st1; + // Insert scalar into undef[0] def : Pat<(nxv16i8 (vector_insert (nxv16i8 (undef)), (i32 FPR32:$src), 0)), (INSERT_SUBREG (nxv16i8 (IMPLICIT_DEF)), FPR32:$src, ssub)>; def : Pat<(nxv8i16 (vector_insert (nxv8i16 (undef)), (i32 FPR32:$src), 0)), @@ -2691,17 +2702,6 @@ let Predicates = [HasSVEorStreamingSVE] in { def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)), (f64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>; } - - // Splice with lane bigger or equal to 0 - def : Pat<(nxv16i8 (vector_splice (nxv16i8 ZPR:$Z1), (nxv16i8 ZPR:$Z2), (i64 (sve_ext_imm_0_255 i32:$index)))), - (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>; - def : Pat<(nxv8i16 (vector_splice (nxv8i16 ZPR:$Z1), (nxv8i16 ZPR:$Z2), (i64 (sve_ext_imm_0_127 i32:$index)))), - (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>; - def : Pat<(nxv4i32 (vector_splice (nxv4i32 ZPR:$Z1), (nxv4i32 ZPR:$Z2), (i64 (sve_ext_imm_0_63 i32:$index)))), - (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>; - def : Pat<(nxv2i64 (vector_splice (nxv2i64 ZPR:$Z1), (nxv2i64 ZPR:$Z2), (i64 (sve_ext_imm_0_31 i32:$index)))), - (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>; - } // End HasSVEorStreamingSVE let Predicates = [HasSVE, HasMatMulInt8] in { From d50072f74e3ee50b750a618fcdf05739dec9542d Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 11 Feb 2021 11:11:55 +0000 Subject: [PATCH 275/992] [ARM] Introduce an empty "armv8.8-a" architecture. This is the first commit in a series that implements support for "armv8.8-a" architecture. This should contain all the necessary boilerplate to make the 8.8-A architecture exist from LLVM and Clang's point of view: it adds the new arch as a subtarget feature, a definition in TargetParser, a name on the command line, an appropriate set of predefined macros, and adds appropriate tests. The new architecture name is supported in both AArch32 and AArch64. However, in this commit, no actual _functionality_ is added as part of the new architecture. If you specify -march=armv8.8a, the compiler will accept it and set the right predefines, but generate no code any differently. Differential Revision: https://reviews.llvm.org/D115694 --- clang/lib/Basic/Targets/AArch64.cpp | 11 ++++ clang/lib/Basic/Targets/AArch64.h | 2 + clang/lib/Basic/Targets/ARM.cpp | 3 + clang/lib/Driver/ToolChains/Arch/AArch64.cpp | 8 ++- clang/test/Driver/aarch64-cpus.c | 16 ++++++ clang/test/Driver/arm-cortex-cpus.c | 17 ++++++ clang/test/Preprocessor/arm-target-features.c | 5 ++ llvm/include/llvm/ADT/Triple.h | 1 + .../llvm/Support/AArch64TargetParser.def | 7 +++ llvm/include/llvm/Support/ARMTargetParser.def | 6 ++ llvm/lib/Support/AArch64TargetParser.cpp | 2 + llvm/lib/Support/ARMTargetParser.cpp | 3 + llvm/lib/Support/Triple.cpp | 2 + llvm/lib/Target/AArch64/AArch64.td | 4 ++ llvm/lib/Target/AArch64/AArch64Subtarget.h | 1 + .../AArch64/AsmParser/AArch64AsmParser.cpp | 4 ++ llvm/lib/Target/ARM/ARM.td | 17 ++++++ llvm/lib/Target/ARM/ARMSubtarget.h | 3 + llvm/unittests/Support/TargetParserTest.cpp | 55 +++++++++++-------- 19 files changed, 140 insertions(+), 27 deletions(-) diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 4089a393b762..d7eb770995cb 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -223,6 +223,12 @@ void AArch64TargetInfo::getTargetDefinesARMV87A(const LangOptions &Opts, getTargetDefinesARMV86A(Opts, Builder); } +void AArch64TargetInfo::getTargetDefinesARMV88A(const LangOptions &Opts, + MacroBuilder &Builder) const { + // Also include the Armv8.7 defines + getTargetDefinesARMV87A(Opts, Builder); +} + void AArch64TargetInfo::getTargetDefinesARMV9A(const LangOptions &Opts, MacroBuilder &Builder) const { // Armv9-A maps to Armv8.5-A @@ -446,6 +452,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, case llvm::AArch64::ArchKind::ARMV8_7A: getTargetDefinesARMV87A(Opts, Builder); break; + case llvm::AArch64::ArchKind::ARMV8_8A: + getTargetDefinesARMV88A(Opts, Builder); + break; case llvm::AArch64::ArchKind::ARMV9A: getTargetDefinesARMV9A(Opts, Builder); break; @@ -603,6 +612,8 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, ArchKind = llvm::AArch64::ArchKind::ARMV8_6A; if (Feature == "+v8.7a") ArchKind = llvm::AArch64::ArchKind::ARMV8_7A; + if (Feature == "+v8.8a") + ArchKind = llvm::AArch64::ArchKind::ARMV8_8A; if (Feature == "+v9a") ArchKind = llvm::AArch64::ArchKind::ARMV9A; if (Feature == "+v9.1a") diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index 74745df3be8d..6bc0ea4eb5e1 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -92,6 +92,8 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { MacroBuilder &Builder) const; void getTargetDefinesARMV87A(const LangOptions &Opts, MacroBuilder &Builder) const; + void getTargetDefinesARMV88A(const LangOptions &Opts, + MacroBuilder &Builder) const; void getTargetDefinesARMV9A(const LangOptions &Opts, MacroBuilder &Builder) const; void getTargetDefinesARMV91A(const LangOptions &Opts, diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index c619d6cde41d..bb0044bdc596 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -212,6 +212,8 @@ StringRef ARMTargetInfo::getCPUAttr() const { return "8_6A"; case llvm::ARM::ArchKind::ARMV8_7A: return "8_7A"; + case llvm::ARM::ArchKind::ARMV8_8A: + return "8_8A"; case llvm::ARM::ArchKind::ARMV9A: return "9A"; case llvm::ARM::ArchKind::ARMV9_1A: @@ -930,6 +932,7 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, case llvm::ARM::ArchKind::ARMV8_4A: case llvm::ARM::ArchKind::ARMV8_5A: case llvm::ARM::ArchKind::ARMV8_6A: + case llvm::ARM::ArchKind::ARMV8_8A: case llvm::ARM::ArchKind::ARMV9A: case llvm::ARM::ArchKind::ARMV9_1A: case llvm::ARM::ArchKind::ARMV9_2A: diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index be13d6d583ce..8cb9318df4cd 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -98,10 +98,11 @@ static bool DecodeAArch64Features(const Driver &D, StringRef text, Features.push_back("-sve2-sm4"); } - // +sve implies +f32mm if the base architecture is v8.6A, v8.7A, v9.1A or - // v9.2A. It isn't the case in general that sve implies both f64mm and f32mm + // +sve implies +f32mm if the base architecture is >= v8.6A (except v9A) + // It isn't the case in general that sve implies both f64mm and f32mm if ((ArchKind == llvm::AArch64::ArchKind::ARMV8_6A || ArchKind == llvm::AArch64::ArchKind::ARMV8_7A || + ArchKind == llvm::AArch64::ArchKind::ARMV8_8A || ArchKind == llvm::AArch64::ArchKind::ARMV9_1A || ArchKind == llvm::AArch64::ArchKind::ARMV9_2A) && Feature == "sve") @@ -390,6 +391,7 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, } if (std::find(ItBegin, ItEnd, "+v8.4a") != ItEnd || + std::find(ItBegin, ItEnd, "+v8.8a") != ItEnd || std::find(ItBegin, ItEnd, "+v9a") != ItEnd || std::find(ItBegin, ItEnd, "+v9.1a") != ItEnd || std::find(ItBegin, ItEnd, "+v9.2a") != ItEnd) { @@ -451,7 +453,7 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, } } - const char *Archs[] = {"+v8.6a", "+v8.7a", "+v9.1a", "+v9.2a"}; + const char *Archs[] = {"+v8.6a", "+v8.7a", "+v8.8a", "+v9.1a", "+v9.2a"}; auto Pos = std::find_first_of(Features.begin(), Features.end(), std::begin(Archs), std::end(Archs)); if (Pos != std::end(Features)) diff --git a/clang/test/Driver/aarch64-cpus.c b/clang/test/Driver/aarch64-cpus.c index 4a377df99f92..71b7139ca266 100644 --- a/clang/test/Driver/aarch64-cpus.c +++ b/clang/test/Driver/aarch64-cpus.c @@ -803,6 +803,22 @@ // NO-LS64-NOT: "-target-feature" "+ls64" // LS64: "-target-feature" "+ls64" +// RUN: %clang -target aarch64 -march=armv8.8a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV88A %s +// RUN: %clang -target aarch64 -march=armv8.8-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV88A %s +// RUN: %clang -target aarch64 -mlittle-endian -march=armv8.8a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV88A %s +// RUN: %clang -target aarch64 -mlittle-endian -march=armv8.8-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV88A %s +// RUN: %clang -target aarch64_be -mlittle-endian -march=armv8.8a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV88A %s +// RUN: %clang -target aarch64_be -mlittle-endian -march=armv8.8-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV88A %s +// GENERICV88A: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.8a" + +// RUN: %clang -target aarch64_be -march=armv8.8a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV88A-BE %s +// RUN: %clang -target aarch64_be -march=armv8.8-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV88A-BE %s +// RUN: %clang -target aarch64 -mbig-endian -march=armv8.8a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV88A-BE %s +// RUN: %clang -target aarch64 -mbig-endian -march=armv8.8-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV88A-BE %s +// RUN: %clang -target aarch64_be -mbig-endian -march=armv8.8a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV88A-BE %s +// RUN: %clang -target aarch64_be -mbig-endian -march=armv8.8-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV88A-BE %s +// GENERICV88A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.8a" +// // RUN: %clang -target aarch64 -march=armv9a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV9A %s // RUN: %clang -target aarch64 -march=armv9-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV9A %s // RUN: %clang -target aarch64 -mlittle-endian -march=armv9a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV9A %s diff --git a/clang/test/Driver/arm-cortex-cpus.c b/clang/test/Driver/arm-cortex-cpus.c index 8ca7bcd518c9..0c4a79bb773f 100644 --- a/clang/test/Driver/arm-cortex-cpus.c +++ b/clang/test/Driver/arm-cortex-cpus.c @@ -369,6 +369,23 @@ // RUN: %clang -target arm -march=armebv8.7-a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V87A %s // CHECK-BE-V87A: "-cc1"{{.*}} "-triple" "armebv8.7{{.*}}" "-target-cpu" "generic" +// RUN: %clang -target armv8.8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V88A %s +// RUN: %clang -target arm -march=armv8.8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V88A %s +// RUN: %clang -target arm -march=armv8.8-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V88A %s +// RUN: %clang -target arm -march=armv8.8a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V88A %s +// RUN: %clang -target armv8.8a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V88A %s +// RUN: %clang -target arm -march=armv8.8a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V88A %s +// RUN: %clang -target arm -mlittle-endian -march=armv8.8-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V88A %s +// CHECK-V88A: "-cc1"{{.*}} "-triple" "armv8.8{{.*}}" "-target-cpu" "generic" + +// RUN: %clang -target armebv8.8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V88A %s +// RUN: %clang -target armv8.8a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V88A %s +// RUN: %clang -target armeb -march=armebv8.8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V88A %s +// RUN: %clang -target armeb -march=armebv8.8-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V88A %s +// RUN: %clang -target arm -march=armebv8.8a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V88A %s +// RUN: %clang -target arm -march=armebv8.8-a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V88A %s +// CHECK-BE-V88A: "-cc1"{{.*}} "-triple" "armebv8.8{{.*}}" "-target-cpu" "generic" + // RUN: %clang -target armv9a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V9A %s // RUN: %clang -target arm -march=armv9a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V9A %s // RUN: %clang -target arm -march=armv9-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V9A %s diff --git a/clang/test/Preprocessor/arm-target-features.c b/clang/test/Preprocessor/arm-target-features.c index bf4e7c41e3e2..88203535c71e 100644 --- a/clang/test/Preprocessor/arm-target-features.c +++ b/clang/test/Preprocessor/arm-target-features.c @@ -859,6 +859,11 @@ // CHECK-V87A: #define __ARM_ARCH_8_7A__ 1 // CHECK-V87A: #define __ARM_ARCH_PROFILE 'A' +// RUN: %clang -target armv8.8a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V88A %s +// CHECK-V88A: #define __ARM_ARCH 8 +// CHECK-V88A: #define __ARM_ARCH_8_8A__ 1 +// CHECK-V88A: #define __ARM_ARCH_PROFILE 'A' +// // RUN: %clang -target armv9a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V9A %s // CHECK-V9A: #define __ARM_ARCH 9 // CHECK-V9A: #define __ARM_ARCH_9A__ 1 diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h index 5dbd4f16bfd5..6f1f1618fbc2 100644 --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -110,6 +110,7 @@ class Triple { ARMSubArch_v9_2a, ARMSubArch_v9_1a, ARMSubArch_v9, + ARMSubArch_v8_8a, ARMSubArch_v8_7a, ARMSubArch_v8_6a, ARMSubArch_v8_5a, diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index 48e82fa55a0f..4a5a41eefed2 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -58,6 +58,13 @@ AARCH64_ARCH("armv8.7-a", ARMV8_7A, "8.7-A", "v8.7a", AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | AArch64::AEK_SM4 | AArch64::AEK_SHA3 | AArch64::AEK_BF16 | AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_I8MM)) +AARCH64_ARCH("armv8.8-a", ARMV8_8A, "8.8-A", "v8.8a", + ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, + (AArch64::AEK_CRC | AArch64::AEK_FP | + AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE | + AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | + AArch64::AEK_SM4 | AArch64::AEK_SHA3 | AArch64::AEK_BF16 | + AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_I8MM)) AARCH64_ARCH("armv9-a", ARMV9A, "9-A", "v9a", ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8, (AArch64::AEK_CRC | AArch64::AEK_FP | diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def index 7d29808f0501..13841036d9bd 100644 --- a/llvm/include/llvm/Support/ARMTargetParser.def +++ b/llvm/include/llvm/Support/ARMTargetParser.def @@ -122,6 +122,12 @@ ARM_ARCH("armv8.7-a", ARMV8_7A, "8.7-A", "v8.7a", (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM)) +ARM_ARCH("armv8.8-a", ARMV8_8A, "8.8-A", "v8.8a", + ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, + (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | + ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES | + ARM::AEK_I8MM)) ARM_ARCH("armv9-a", ARMV9A, "9-A", "v9a", ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8, (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | diff --git a/llvm/lib/Support/AArch64TargetParser.cpp b/llvm/lib/Support/AArch64TargetParser.cpp index 4bc9c8487131..b436b9b2ca24 100644 --- a/llvm/lib/Support/AArch64TargetParser.cpp +++ b/llvm/lib/Support/AArch64TargetParser.cpp @@ -136,6 +136,8 @@ bool AArch64::getArchFeatures(AArch64::ArchKind AK, Features.push_back("+v8.6a"); if (AK == AArch64::ArchKind::ARMV8_7A) Features.push_back("+v8.7a"); + if (AK == AArch64::ArchKind::ARMV8_8A) + Features.push_back("+v8.8a"); if (AK == AArch64::ArchKind::ARMV9A) Features.push_back("+v9a"); if (AK == AArch64::ArchKind::ARMV9_1A) diff --git a/llvm/lib/Support/ARMTargetParser.cpp b/llvm/lib/Support/ARMTargetParser.cpp index 4405ed176fe2..7521d3e4c147 100644 --- a/llvm/lib/Support/ARMTargetParser.cpp +++ b/llvm/lib/Support/ARMTargetParser.cpp @@ -77,6 +77,7 @@ unsigned ARM::parseArchVersion(StringRef Arch) { case ArchKind::ARMV8_5A: case ArchKind::ARMV8_6A: case ArchKind::ARMV8_7A: + case ArchKind::ARMV8_8A: case ArchKind::ARMV8R: case ArchKind::ARMV8MBaseline: case ArchKind::ARMV8MMainline: @@ -117,6 +118,7 @@ ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) { case ArchKind::ARMV8_5A: case ArchKind::ARMV8_6A: case ArchKind::ARMV8_7A: + case ArchKind::ARMV8_8A: case ArchKind::ARMV9A: case ArchKind::ARMV9_1A: case ArchKind::ARMV9_2A: @@ -164,6 +166,7 @@ StringRef ARM::getArchSynonym(StringRef Arch) { .Case("v8.5a", "v8.5-a") .Case("v8.6a", "v8.6-a") .Case("v8.7a", "v8.7-a") + .Case("v8.8a", "v8.8-a") .Case("v8r", "v8-r") .Cases("v9", "v9a", "v9-a") .Case("v9.1a", "v9.1-a") diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp index 2819dc0c139a..dca39716a5f5 100644 --- a/llvm/lib/Support/Triple.cpp +++ b/llvm/lib/Support/Triple.cpp @@ -663,6 +663,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { return Triple::ARMSubArch_v8_6a; case ARM::ArchKind::ARMV8_7A: return Triple::ARMSubArch_v8_7a; + case ARM::ArchKind::ARMV8_8A: + return Triple::ARMSubArch_v8_8a; case ARM::ArchKind::ARMV9A: return Triple::ARMSubArch_v9; case ARM::ArchKind::ARMV9_1A: diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index cb17fd94c335..bc5f0c090d43 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -497,6 +497,10 @@ def HasV8_7aOps : SubtargetFeature< "v8.7a", "HasV8_7aOps", "true", "Support ARM v8.7a instructions", [HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX]>; +def HasV8_8aOps : SubtargetFeature< + "v8.8a", "HasV8_8aOps", "true", "Support ARM v8.8a instructions", + [HasV8_7aOps]>; + def HasV9_0aOps : SubtargetFeature< "v9a", "HasV9_0aOps", "true", "Support ARM v9a instructions", [HasV8_5aOps, FeatureSVE2]>; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index b3cd5ebd5f65..336c92d73e3e 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -94,6 +94,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool HasV8_5aOps = false; bool HasV8_6aOps = false; bool HasV8_7aOps = false; + bool HasV8_8aOps = false; bool HasV9_0aOps = false; bool HasV9_1aOps = false; bool HasV9_2aOps = false; diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 62038b10fccd..03ef327e93c8 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -3307,6 +3307,8 @@ static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) { Str += "ARMv8.6a"; else if (FBS[AArch64::HasV8_7aOps]) Str += "ARMv8.7a"; + else if (FBS[AArch64::HasV8_8aOps]) + Str += "ARMv8.8a"; else if (FBS[AArch64::HasV9_0aOps]) Str += "ARMv9-a"; else if (FBS[AArch64::HasV9_1aOps]) @@ -5931,6 +5933,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind, case AArch64::ArchKind::ARMV8_5A: case AArch64::ArchKind::ARMV8_6A: case AArch64::ArchKind::ARMV8_7A: + case AArch64::ArchKind::ARMV8_8A: case AArch64::ArchKind::ARMV9A: case AArch64::ArchKind::ARMV9_1A: case AArch64::ArchKind::ARMV9_2A: @@ -5956,6 +5959,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind, case AArch64::ArchKind::ARMV8_5A: case AArch64::ArchKind::ARMV8_6A: case AArch64::ArchKind::ARMV8_7A: + case AArch64::ArchKind::ARMV8_8A: case AArch64::ArchKind::ARMV9A: case AArch64::ArchKind::ARMV9_1A: case AArch64::ArchKind::ARMV9_2A: diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 8173fe4036a8..fa7b415447c5 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -553,6 +553,10 @@ def HasV8_7aOps : SubtargetFeature<"v8.7a", "HasV8_7aOps", "true", "Support ARM v8.7a instructions", [HasV8_6aOps]>; +def HasV8_8aOps : SubtargetFeature<"v8.8a", "HasV8_8aOps", "true", + "Support ARM v8.8a instructions", + [HasV8_7aOps]>; + def HasV9_0aOps : SubtargetFeature<"v9a", "HasV9_0aOps", "true", "Support ARM v9a instructions", [HasV8_5aOps]>; @@ -894,6 +898,19 @@ def ARMv87a : Architecture<"armv8.7-a", "ARMv87a", [HasV8_7aOps, FeatureCRC, FeatureRAS, FeatureDotProd]>; +def ARMv88a : Architecture<"armv8.8-a", "ARMv88a", [HasV8_8aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS, + FeatureDotProd]>; def ARMv9a : Architecture<"armv9-a", "ARMv9a", [HasV9_0aOps, FeatureAClass, diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index e61b90af31b0..0a4dc099bd84 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -121,6 +121,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo { ARMv85a, ARMv86a, ARMv87a, + ARMv88a, ARMv8a, ARMv8mBaseline, ARMv8mMainline, @@ -174,6 +175,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo { bool HasV8_4aOps = false; bool HasV8_5aOps = false; bool HasV8_6aOps = false; + bool HasV8_8aOps = false; bool HasV8_7aOps = false; bool HasV9_0aOps = false; bool HasV9_1aOps = false; @@ -635,6 +637,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo { bool hasV8_5aOps() const { return HasV8_5aOps; } bool hasV8_6aOps() const { return HasV8_6aOps; } bool hasV8_7aOps() const { return HasV8_7aOps; } + bool hasV8_8aOps() const { return HasV8_8aOps; } bool hasV9_0aOps() const { return HasV9_0aOps; } bool hasV9_1aOps() const { return HasV9_1aOps; } bool hasV9_2aOps() const { return HasV9_2aOps; } diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp index 900a944324e4..b21ead171a64 100644 --- a/llvm/unittests/Support/TargetParserTest.cpp +++ b/llvm/unittests/Support/TargetParserTest.cpp @@ -18,21 +18,21 @@ using namespace llvm; namespace { const char *ARMArch[] = { - "armv2", "armv2a", "armv3", "armv3m", "armv4", - "armv4t", "armv5", "armv5t", "armv5e", "armv5te", - "armv5tej", "armv6", "armv6j", "armv6k", "armv6hl", - "armv6t2", "armv6kz", "armv6z", "armv6zk", "armv6-m", - "armv6m", "armv6sm", "armv6s-m", "armv7-a", "armv7", - "armv7a", "armv7ve", "armv7hl", "armv7l", "armv7-r", - "armv7r", "armv7-m", "armv7m", "armv7k", "armv7s", - "armv7e-m", "armv7em", "armv8-a", "armv8", "armv8a", - "armv8l", "armv8.1-a", "armv8.1a", "armv8.2-a", "armv8.2a", - "armv8.3-a", "armv8.3a", "armv8.4-a", "armv8.4a", "armv8.5-a", - "armv8.5a", "armv8.6-a", "armv8.6a", "armv8.7-a", "armv8.7a", - "armv8-r", "armv8r", "armv8-m.base","armv8m.base", "armv8-m.main", - "armv8m.main", "iwmmxt", "iwmmxt2", "xscale", "armv8.1-m.main", - "armv9-a", "armv9", "armv9a", "armv9.1-a", "armv9.1a", - "armv9.2-a", "armv9.2a", + "armv2", "armv2a", "armv3", "armv3m", "armv4", + "armv4t", "armv5", "armv5t", "armv5e", "armv5te", + "armv5tej", "armv6", "armv6j", "armv6k", "armv6hl", + "armv6t2", "armv6kz", "armv6z", "armv6zk", "armv6-m", + "armv6m", "armv6sm", "armv6s-m", "armv7-a", "armv7", + "armv7a", "armv7ve", "armv7hl", "armv7l", "armv7-r", + "armv7r", "armv7-m", "armv7m", "armv7k", "armv7s", + "armv7e-m", "armv7em", "armv8-a", "armv8", "armv8a", + "armv8l", "armv8.1-a", "armv8.1a", "armv8.2-a", "armv8.2a", + "armv8.3-a", "armv8.3a", "armv8.4-a", "armv8.4a", "armv8.5-a", + "armv8.5a", "armv8.6-a", "armv8.6a", "armv8.7-a", "armv8.7a", + "armv8.8-a", "armv8.8a", "armv8-r", "armv8r", "armv8-m.base", + "armv8m.base", "armv8-m.main", "armv8m.main", "iwmmxt", "iwmmxt2", + "xscale", "armv8.1-m.main", "armv9-a", "armv9", "armv9a", + "armv9.1-a", "armv9.1a", "armv9.2-a", "armv9.2a", }; template @@ -501,6 +501,8 @@ TEST(TargetParserTest, testARMArch) { EXPECT_TRUE( testARMArch("armv8.7-a", "generic", "v8.7a", ARMBuildAttrs::CPUArch::v8_A)); + EXPECT_TRUE(testARMArch("armv8.8-a", "generic", "v8.8a", + ARMBuildAttrs::CPUArch::v8_A)); EXPECT_TRUE( testARMArch("armv9-a", "generic", "v9a", ARMBuildAttrs::CPUArch::v8_A)); @@ -765,15 +767,17 @@ TEST(TargetParserTest, ARMparseHWDiv) { TEST(TargetParserTest, ARMparseArchEndianAndISA) { const char *Arch[] = { - "v2", "v2a", "v3", "v3m", "v4", "v4t", "v5", "v5t", - "v5e", "v5te", "v5tej", "v6", "v6j", "v6k", "v6hl", "v6t2", - "v6kz", "v6z", "v6zk", "v6-m", "v6m", "v6sm", "v6s-m", "v7-a", - "v7", "v7a", "v7ve", "v7hl", "v7l", "v7-r", "v7r", "v7-m", - "v7m", "v7k", "v7s", "v7e-m", "v7em", "v8-a", "v8", "v8a", - "v8l", "v8.1-a", "v8.1a", "v8.2-a", "v8.2a", "v8.3-a", "v8.3a", "v8.4-a", - "v8.4a", "v8.5-a","v8.5a", "v8.6-a", "v8.6a", "v8.7-a", "v8.7a", "v8-r", - "v8m.base", "v8m.main", "v8.1m.main" - }; + "v2", "v2a", "v3", "v3m", "v4", "v4t", + "v5", "v5t", "v5e", "v5te", "v5tej", "v6", + "v6j", "v6k", "v6hl", "v6t2", "v6kz", "v6z", + "v6zk", "v6-m", "v6m", "v6sm", "v6s-m", "v7-a", + "v7", "v7a", "v7ve", "v7hl", "v7l", "v7-r", + "v7r", "v7-m", "v7m", "v7k", "v7s", "v7e-m", + "v7em", "v8-a", "v8", "v8a", "v8l", "v8.1-a", + "v8.1a", "v8.2-a", "v8.2a", "v8.3-a", "v8.3a", "v8.4-a", + "v8.4a", "v8.5-a", "v8.5a", "v8.6-a", "v8.6a", "v8.7-a", + "v8.7a", "v8.8-a", "v8.8a", "v8-r", "v8m.base", "v8m.main", + "v8.1m.main"}; for (unsigned i = 0; i < array_lengthof(Arch); i++) { std::string arm_1 = "armeb" + (std::string)(Arch[i]); @@ -839,6 +843,7 @@ TEST(TargetParserTest, ARMparseArchProfile) { case ARM::ArchKind::ARMV8_5A: case ARM::ArchKind::ARMV8_6A: case ARM::ArchKind::ARMV8_7A: + case ARM::ArchKind::ARMV8_8A: case ARM::ArchKind::ARMV9A: case ARM::ArchKind::ARMV9_1A: case ARM::ArchKind::ARMV9_2A: @@ -1266,6 +1271,8 @@ TEST(TargetParserTest, testAArch64Arch) { ARMBuildAttrs::CPUArch::v8_A)); EXPECT_TRUE(testAArch64Arch("armv8.7-a", "generic", "v8.7a", ARMBuildAttrs::CPUArch::v8_A)); + EXPECT_TRUE(testAArch64Arch("armv8.8-a", "generic", "v8.8a", + ARMBuildAttrs::CPUArch::v8_A)); EXPECT_TRUE(testAArch64Arch("armv9-a", "generic", "v9a", ARMBuildAttrs::CPUArch::v8_A)); EXPECT_TRUE(testAArch64Arch("armv9.1-a", "generic", "v9.1a", From 5ee769296ead9138d7905917f20fdcf736ea63d0 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 31 Dec 2021 10:25:16 -0800 Subject: [PATCH 276/992] [Analysis] Remove unused forward declarations (NFC) --- llvm/include/llvm/Analysis/AliasAnalysis.h | 1 - llvm/include/llvm/Analysis/DivergenceAnalysis.h | 1 - llvm/include/llvm/Analysis/IVUsers.h | 1 - llvm/include/llvm/Analysis/InlineCost.h | 1 - llvm/include/llvm/Analysis/InlineOrder.h | 1 - llvm/include/llvm/Analysis/LazyBlockFrequencyInfo.h | 1 - llvm/include/llvm/Analysis/Loads.h | 1 - llvm/include/llvm/Analysis/LoopAccessAnalysis.h | 1 - llvm/include/llvm/Analysis/MemoryBuiltins.h | 1 - llvm/include/llvm/Analysis/MemorySSA.h | 1 - llvm/include/llvm/Analysis/MemorySSAUpdater.h | 1 - llvm/include/llvm/Analysis/ReplayInlineAdvisor.h | 2 -- llvm/include/llvm/Analysis/SyncDependenceAnalysis.h | 1 - llvm/include/llvm/Analysis/TargetTransformInfo.h | 1 - llvm/include/llvm/Analysis/ValueTracking.h | 1 - 15 files changed, 16 deletions(-) diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h index 2770a1a9b277..1ecbd4b83004 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -60,7 +60,6 @@ class CatchReturnInst; class DominatorTree; class FenceInst; class Function; -class InvokeInst; class LoopInfo; class PreservedAnalyses; class TargetLibraryInfo; diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h index 6f759a81fdef..7e526b2fad84 100644 --- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DivergenceAnalysis.h @@ -22,7 +22,6 @@ #include namespace llvm { -class Module; class Value; class Instruction; class Loop; diff --git a/llvm/include/llvm/Analysis/IVUsers.h b/llvm/include/llvm/Analysis/IVUsers.h index e2026a4d5875..390d09848dde 100644 --- a/llvm/include/llvm/Analysis/IVUsers.h +++ b/llvm/include/llvm/Analysis/IVUsers.h @@ -28,7 +28,6 @@ class Value; class ScalarEvolution; class SCEV; class IVUsers; -class DataLayout; /// IVStrideUse - Keep track of one use of a strided induction variable. /// The Expr member keeps track of the expression, User is the actual user diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h index 776749b9a07f..f86ee5a14874 100644 --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -21,7 +21,6 @@ #include namespace llvm { -class AssumptionCacheTracker; class BlockFrequencyInfo; class CallBase; class DataLayout; diff --git a/llvm/include/llvm/Analysis/InlineOrder.h b/llvm/include/llvm/Analysis/InlineOrder.h index def3192356f4..feefa9b9ddd1 100644 --- a/llvm/include/llvm/Analysis/InlineOrder.h +++ b/llvm/include/llvm/Analysis/InlineOrder.h @@ -20,7 +20,6 @@ namespace llvm { class CallBase; class Function; -class Module; template class InlineOrder { public: diff --git a/llvm/include/llvm/Analysis/LazyBlockFrequencyInfo.h b/llvm/include/llvm/Analysis/LazyBlockFrequencyInfo.h index 0e7dc943bacf..ab6d6ce9ec5a 100644 --- a/llvm/include/llvm/Analysis/LazyBlockFrequencyInfo.h +++ b/llvm/include/llvm/Analysis/LazyBlockFrequencyInfo.h @@ -22,7 +22,6 @@ namespace llvm { class AnalysisUsage; -class BranchProbabilityInfo; class Function; class LoopInfo; diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h index ced1943b81d9..3db501c51a17 100644 --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -24,7 +24,6 @@ class DominatorTree; class Instruction; class LoadInst; class Loop; -class MDNode; class MemoryLocation; class ScalarEvolution; class TargetLibraryInfo; diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index 2b4edfac61fc..e55a90b0ea41 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -26,7 +26,6 @@ class AAResults; class DataLayout; class Loop; class LoopAccessInfo; -class OptimizationRemarkEmitter; class raw_ostream; class SCEV; class SCEVUnionPredicate; diff --git a/llvm/include/llvm/Analysis/MemoryBuiltins.h b/llvm/include/llvm/Analysis/MemoryBuiltins.h index 94495a518042..6cbd700e67ac 100644 --- a/llvm/include/llvm/Analysis/MemoryBuiltins.h +++ b/llvm/include/llvm/Analysis/MemoryBuiltins.h @@ -30,7 +30,6 @@ namespace llvm { class AllocaInst; class Argument; class CallInst; -class ConstantInt; class ConstantPointerNull; class DataLayout; class ExtractElementInst; diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h index 48aeef371e3d..9198bd8412e6 100644 --- a/llvm/include/llvm/Analysis/MemorySSA.h +++ b/llvm/include/llvm/Analysis/MemorySSA.h @@ -106,7 +106,6 @@ namespace llvm { -class AllocaInst; class Function; class Instruction; class MemoryAccess; diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h index 659e6aff6e28..3e5ebe9cb427 100644 --- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h +++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h @@ -44,7 +44,6 @@ namespace llvm { class BasicBlock; -class BranchInst; class DominatorTree; class Instruction; class LoopBlocksRPO; diff --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h index a0eb9af62205..dc2efeafb568 100644 --- a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h @@ -14,11 +14,9 @@ #include "llvm/IR/LLVMContext.h" namespace llvm { -class BasicBlock; class CallBase; class Function; class Module; -class OptimizationRemarkEmitter; struct CallSiteFormat { enum class Format : int { diff --git a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h index 92459ea79ab4..d42bfe5ba25f 100644 --- a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h @@ -27,7 +27,6 @@ namespace llvm { class BasicBlock; class DominatorTree; -class Loop; class PostDominatorTree; using ConstBlockSet = SmallPtrSet; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index d9f5c9689d5c..a405b532c892 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -43,7 +43,6 @@ class BlockFrequencyInfo; class DominatorTree; class BranchInst; class CallBase; -class ExtractElementInst; class Function; class GlobalValue; class InstCombiner; diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index f0f78c0eaed4..b943ec1cebd5 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -33,7 +33,6 @@ class APInt; class AssumptionCache; class DominatorTree; class GEPOperator; -class IntrinsicInst; class LoadInst; class WithOverflowInst; struct KnownBits; From 255ee643a8c559375a9c9787ef7d121790040818 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 31 Dec 2021 10:51:10 -0800 Subject: [PATCH 277/992] [clang-tools-extra] Remove unused using (NFC) Identified by misc-unused-using-decls. --- clang-tools-extra/clang-doc/Mapper.cpp | 2 -- clang-tools-extra/clang-include-fixer/YamlSymbolIndex.cpp | 1 - .../clang-include-fixer/find-all-symbols/SymbolInfo.cpp | 2 -- 3 files changed, 5 deletions(-) diff --git a/clang-tools-extra/clang-doc/Mapper.cpp b/clang-tools-extra/clang-doc/Mapper.cpp index de7e4c341086..16a52e843fcc 100644 --- a/clang-tools-extra/clang-doc/Mapper.cpp +++ b/clang-tools-extra/clang-doc/Mapper.cpp @@ -14,8 +14,6 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Error.h" -using clang::comments::FullComment; - namespace clang { namespace doc { diff --git a/clang-tools-extra/clang-include-fixer/YamlSymbolIndex.cpp b/clang-tools-extra/clang-include-fixer/YamlSymbolIndex.cpp index de72e9a9b932..4271d9aa4e67 100644 --- a/clang-tools-extra/clang-include-fixer/YamlSymbolIndex.cpp +++ b/clang-tools-extra/clang-include-fixer/YamlSymbolIndex.cpp @@ -15,7 +15,6 @@ #include #include -using clang::find_all_symbols::SymbolInfo; using clang::find_all_symbols::SymbolAndSignals; namespace clang { diff --git a/clang-tools-extra/clang-include-fixer/find-all-symbols/SymbolInfo.cpp b/clang-tools-extra/clang-include-fixer/find-all-symbols/SymbolInfo.cpp index e5b4dba4b7ad..4a5f8353b410 100644 --- a/clang-tools-extra/clang-include-fixer/find-all-symbols/SymbolInfo.cpp +++ b/clang-tools-extra/clang-include-fixer/find-all-symbols/SymbolInfo.cpp @@ -13,8 +13,6 @@ #include "llvm/Support/raw_ostream.h" using llvm::yaml::MappingTraits; -using llvm::yaml::IO; -using llvm::yaml::Input; using ContextType = clang::find_all_symbols::SymbolInfo::ContextType; using clang::find_all_symbols::SymbolInfo; using clang::find_all_symbols::SymbolAndSignals; From 896537048df70762b5b0051a0e1b065a74e8d0ce Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 30 Dec 2021 06:22:48 +0000 Subject: [PATCH 278/992] [lld][CMake] Use `GNUInstallDirs` to support custom installation dirs Extracted from D99484. My new plan is to start from the outside and work inward. Reviewed By: stephenneuendorffer Differential Revision: https://reviews.llvm.org/D115568 --- lld/CMakeLists.txt | 14 ++++++++++---- lld/cmake/modules/AddLLD.cmake | 5 +++-- lld/cmake/modules/CMakeLists.txt | 4 +++- lld/tools/lld/CMakeLists.txt | 2 +- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/lld/CMakeLists.txt b/lld/CMakeLists.txt index fb5cfd5f28a1..00f8e1bb2a77 100644 --- a/lld/CMakeLists.txt +++ b/lld/CMakeLists.txt @@ -1,3 +1,5 @@ +include(GNUInstallDirs) + # Check if lld is built as a standalone project. if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) project(lld) @@ -34,8 +36,8 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) set(LLVM_MAIN_INCLUDE_DIR ${MAIN_INCLUDE_DIR} CACHE PATH "path to llvm/include") set(LLVM_MAIN_SRC_DIR ${MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree") - file(TO_CMAKE_PATH ${LLVM_OBJ_ROOT} LLVM_BINARY_DIR) - file(TO_CMAKE_PATH ${LLVM_CMAKE_DIR} LLVM_CMAKE_DIR) + file(TO_CMAKE_PATH "${LLVM_OBJ_ROOT}" LLVM_BINARY_DIR) + file(TO_CMAKE_PATH "${LLVM_CMAKE_DIR}" LLVM_CMAKE_DIR) if(NOT EXISTS "${LLVM_CMAKE_DIR}/LLVMConfig.cmake") message(FATAL_ERROR "LLVMConfig.cmake not found") @@ -151,7 +153,11 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR) "`CMakeFiles'. Please delete them.") endif() -list (APPEND CMAKE_MODULE_PATH "${LLD_SOURCE_DIR}/cmake/modules") +# Add path for custom modules. +list(INSERT CMAKE_MODULE_PATH 0 + "${LLD_SOURCE_DIR}/cmake/modules" + "${LLD_SOURCE_DIR}/../cmake/Modules" + ) include(AddLLD) @@ -188,7 +194,7 @@ include_directories(BEFORE if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) install(DIRECTORY include/ - DESTINATION include + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" FILES_MATCHING PATTERN "*.h" ) diff --git a/lld/cmake/modules/AddLLD.cmake b/lld/cmake/modules/AddLLD.cmake index 9883475d1aa5..dd2898ce6236 100644 --- a/lld/cmake/modules/AddLLD.cmake +++ b/lld/cmake/modules/AddLLD.cmake @@ -1,3 +1,4 @@ +include(GNUInstallDirs) include(LLVMDistributionSupport) macro(add_lld_library name) @@ -19,7 +20,7 @@ macro(add_lld_library name) ${export_to_lldtargets} LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} - RUNTIME DESTINATION bin) + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") if (${ARG_SHARED} AND NOT CMAKE_CONFIGURATION_TYPES) add_llvm_install_targets(install-${name} @@ -46,7 +47,7 @@ macro(add_lld_tool name) get_target_export_arg(${name} LLD export_to_lldtargets) install(TARGETS ${name} ${export_to_lldtargets} - RUNTIME DESTINATION bin + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT ${name}) if(NOT CMAKE_CONFIGURATION_TYPES) diff --git a/lld/cmake/modules/CMakeLists.txt b/lld/cmake/modules/CMakeLists.txt index 62d03fa901dd..022824ede631 100644 --- a/lld/cmake/modules/CMakeLists.txt +++ b/lld/cmake/modules/CMakeLists.txt @@ -1,3 +1,5 @@ +include(ExtendPath) + # Generate a list of CMake library targets so that other CMake projects can # link against them. LLVM calls its version of this file LLVMExports.cmake, but # the usual CMake convention seems to be ${Project}Targets.cmake. @@ -40,7 +42,7 @@ endforeach(p) set(LLD_CONFIG_CMAKE_DIR "\${LLD_INSTALL_PREFIX}/${LLD_INSTALL_PACKAGE_DIR}") set(LLD_CONFIG_LLVM_CMAKE_DIR "\${LLD_INSTALL_PREFIX}/${LLVM_INSTALL_PACKAGE_DIR}") get_config_exports_includes(LLD LLD_CONFIG_INCLUDE_EXPORTS) -set(LLD_CONFIG_INCLUDE_DIRS "\${LLD_INSTALL_PREFIX}/include") +extend_path(LLD_CONFIG_INCLUDE_DIRS "\${LLD_INSTALL_PREFIX}" "${CMAKE_INSTALL_INCLUDEDIR}") configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/LLDConfig.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/LLDConfig.cmake diff --git a/lld/tools/lld/CMakeLists.txt b/lld/tools/lld/CMakeLists.txt index c5c86c604c67..df48cc0d6c51 100644 --- a/lld/tools/lld/CMakeLists.txt +++ b/lld/tools/lld/CMakeLists.txt @@ -20,7 +20,7 @@ target_link_libraries(lld ) install(TARGETS lld - RUNTIME DESTINATION bin) + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") if(NOT LLD_SYMLINKS_TO_CREATE) set(LLD_SYMLINKS_TO_CREATE From 5c3347aa8babd1678e73dea182bf78bf6e33822c Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sun, 4 Apr 2021 13:02:18 -0400 Subject: [PATCH 279/992] [flang] Use `GNUInstallDirs` to support custom installation dirs. Extracted from D99484. My new plan is to start from the outside and work inward. Reviewed By: stephenneuendorffer Differential Revision: https://reviews.llvm.org/D115569 --- flang/CMakeLists.txt | 11 ++++++++--- flang/cmake/modules/AddFlang.cmake | 5 +++-- flang/cmake/modules/CMakeLists.txt | 4 +++- flang/tools/f18/CMakeLists.txt | 4 ++-- flang/tools/flang-driver/CMakeLists.txt | 2 +- 5 files changed, 17 insertions(+), 9 deletions(-) diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index f317340f9d35..64852fcd1a98 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -7,6 +7,8 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED TRUE) set(CMAKE_CXX_EXTENSIONS OFF) +include(GNUInstallDirs) + set(FLANG_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) if (CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND NOT MSVC_IDE) @@ -226,7 +228,10 @@ include_directories(BEFORE ${FLANG_SOURCE_DIR}/include) # Add Flang-centric modules to cmake path. -list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules") +list(INSERT CMAKE_MODULE_PATH 0 + "${FLANG_SOURCE_DIR}/cmake/modules" + "${FLANG_SOURCE_DIR}/../cmake/Modules" + ) include(AddFlang) if (NOT DEFAULT_SYSROOT) @@ -444,7 +449,7 @@ endif() if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) install(DIRECTORY include/flang - DESTINATION include + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" COMPONENT flang-headers FILES_MATCHING PATTERN "*.def" @@ -456,7 +461,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) PATTERN "CMakeFiles" EXCLUDE) install(DIRECTORY ${FLANG_INCLUDE_DIR}/flang - DESTINATION include + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" COMPONENT flang-headers FILES_MATCHING PATTERN "*.inc" diff --git a/flang/cmake/modules/AddFlang.cmake b/flang/cmake/modules/AddFlang.cmake index 5da58a59ed12..369e303e148a 100644 --- a/flang/cmake/modules/AddFlang.cmake +++ b/flang/cmake/modules/AddFlang.cmake @@ -1,3 +1,4 @@ +include(GNUInstallDirs) include(LLVMDistributionSupport) macro(set_flang_windows_version_resource_properties name) @@ -71,7 +72,7 @@ macro(add_flang_library name) ${export_to_flangtargets} LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} - RUNTIME DESTINATION bin) + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") if (NOT LLVM_ENABLE_IDE) add_llvm_install_targets(install-${name} @@ -108,7 +109,7 @@ macro(add_flang_tool name) get_target_export_arg(${name} Flang export_to_flangtargets) install(TARGETS ${name} ${export_to_flangtargets} - RUNTIME DESTINATION bin + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT ${name}) if(NOT LLVM_ENABLE_IDE) diff --git a/flang/cmake/modules/CMakeLists.txt b/flang/cmake/modules/CMakeLists.txt index e46498a6bc35..06466c066674 100644 --- a/flang/cmake/modules/CMakeLists.txt +++ b/flang/cmake/modules/CMakeLists.txt @@ -1,3 +1,5 @@ +include(ExtendPath) + # Generate a list of CMake library targets so that other CMake projects can # link against them. LLVM calls its version of this file LLVMExports.cmake, but # the usual CMake convention seems to be ${Project}Targets.cmake. @@ -41,7 +43,7 @@ endforeach(p) set(FLANG_CONFIG_CMAKE_DIR "\${FLANG_INSTALL_PREFIX}/${FLANG_INSTALL_PACKAGE_DIR}") set(FLANG_CONFIG_LLVM_CMAKE_DIR "\${FLANG_INSTALL_PREFIX}/${LLVM_INSTALL_PACKAGE_DIR}") get_config_exports_includes(Flang FLANG_CONFIG_INCLUDE_EXPORTS) -set(FLANG_CONFIG_INCLUDE_DIRS "\${FLANG_INSTALL_PREFIX}/include") +extend_path(FLANG_CONFIG_INCLUDE_DIRS "\${FLANG_INSTALL_PREFIX}" "${CMAKE_INSTALL_INCLUDEDIR}") configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/FlangConfig.cmake.in diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt index 16f2dbcd12b3..8c09842359e7 100644 --- a/flang/tools/f18/CMakeLists.txt +++ b/flang/tools/f18/CMakeLists.txt @@ -38,7 +38,7 @@ foreach(filename ${MODULES}) DEPENDS ${base}.mod COMMAND ${CMAKE_COMMAND} -E copy ${base}.mod ${base}.f18.mod) list(APPEND MODULE_FILES ${base}.mod ${base}.f18.mod) - install(FILES ${base}.mod ${base}.f18.mod DESTINATION include/flang) + install(FILES ${base}.mod ${base}.f18.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang") endforeach() add_custom_target(module_files ALL DEPENDS ${MODULE_FILES}) @@ -48,5 +48,5 @@ if (NOT WIN32) file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/flang DESTINATION ${CMAKE_BINARY_DIR}/bin FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE) - install(PROGRAMS ${CMAKE_BINARY_DIR}/bin/flang DESTINATION bin) + install(PROGRAMS ${CMAKE_BINARY_DIR}/bin/flang DESTINATION "${CMAKE_INSTALL_BINDIR}") endif() diff --git a/flang/tools/flang-driver/CMakeLists.txt b/flang/tools/flang-driver/CMakeLists.txt index d747fb19dfc6..b3e90746e786 100644 --- a/flang/tools/flang-driver/CMakeLists.txt +++ b/flang/tools/flang-driver/CMakeLists.txt @@ -34,4 +34,4 @@ if(FLANG_PLUGIN_SUPPORT) export_executable_symbols_for_plugins(flang-new) endif() -install(TARGETS flang-new DESTINATION bin) +install(TARGETS flang-new DESTINATION "${CMAKE_INSTALL_BINDIR}") From 3bf2373d1980d89ef0001c21e4c71539a523d19e Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 31 Dec 2021 15:06:23 -0500 Subject: [PATCH 280/992] [InstSimplify] add tests for or-nand-xor; NFC --- llvm/test/Transforms/InstSimplify/or.ll | 90 +++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/or.ll b/llvm/test/Transforms/InstSimplify/or.ll index 821715100c5e..bb5144bad1b4 100644 --- a/llvm/test/Transforms/InstSimplify/or.ll +++ b/llvm/test/Transforms/InstSimplify/or.ll @@ -951,3 +951,93 @@ define i16 @or_xor_not_op_or_wrong_val(i16 %a, i16 %b, i16 %c) { %r = or i16 %xor, %or ret i16 %r } + +define i4 @or_nand_xor(i4 %x, i4 %y) { +; CHECK-LABEL: @or_nand_xor( +; CHECK-NEXT: [[AND:%.*]] = and i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[XOR:%.*]] = xor i4 [[X]], [[Y]] +; CHECK-NEXT: [[NAND:%.*]] = xor i4 [[AND]], -1 +; CHECK-NEXT: [[OR:%.*]] = or i4 [[XOR]], [[NAND]] +; CHECK-NEXT: ret i4 [[OR]] +; + %and = and i4 %x, %y + %xor = xor i4 %x, %y + %nand = xor i4 %and, -1 + %or = or i4 %xor, %nand + ret i4 %or +} + +define <2 x i4> @or_nand_xor_commute1(<2 x i4> %x, <2 x i4> %y) { +; CHECK-LABEL: @or_nand_xor_commute1( +; CHECK-NEXT: [[AND:%.*]] = and <2 x i4> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i4> [[X]], [[Y]] +; CHECK-NEXT: [[NAND:%.*]] = xor <2 x i4> [[AND]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i4> [[XOR]], [[NAND]] +; CHECK-NEXT: ret <2 x i4> [[OR]] +; + %and = and <2 x i4> %y, %x + %xor = xor <2 x i4> %x, %y + %nand = xor <2 x i4> %and, + %or = or <2 x i4> %xor, %nand + ret <2 x i4> %or +} + +define i71 @or_nand_xor_commute2(i71 %x, i71 %y) { +; CHECK-LABEL: @or_nand_xor_commute2( +; CHECK-NEXT: [[AND:%.*]] = and i71 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[XOR:%.*]] = xor i71 [[X]], [[Y]] +; CHECK-NEXT: [[NAND:%.*]] = xor i71 [[AND]], -1 +; CHECK-NEXT: [[OR:%.*]] = or i71 [[NAND]], [[XOR]] +; CHECK-NEXT: ret i71 [[OR]] +; + %and = and i71 %x, %y + %xor = xor i71 %x, %y + %nand = xor i71 %and, -1 + %or = or i71 %nand, %xor + ret i71 %or +} + +define i4 @or_nand_xor_commute3(i4 %x, i4 %y) { +; CHECK-LABEL: @or_nand_xor_commute3( +; CHECK-NEXT: [[AND:%.*]] = and i4 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[XOR:%.*]] = xor i4 [[X]], [[Y]] +; CHECK-NEXT: [[NAND:%.*]] = xor i4 [[AND]], -1 +; CHECK-NEXT: [[OR:%.*]] = or i4 [[NAND]], [[XOR]] +; CHECK-NEXT: ret i4 [[OR]] +; + %and = and i4 %y, %x + %xor = xor i4 %x, %y + %nand = xor i4 %and, -1 + %or = or i4 %nand, %xor + ret i4 %or +} + +define i4 @or_nand_xor_wrong_val(i4 %x, i4 %y, i4 %z) { +; CHECK-LABEL: @or_nand_xor_wrong_val( +; CHECK-NEXT: [[AND:%.*]] = and i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[XOR:%.*]] = xor i4 [[X]], [[Z:%.*]] +; CHECK-NEXT: [[NAND:%.*]] = xor i4 [[AND]], -1 +; CHECK-NEXT: [[OR:%.*]] = or i4 [[XOR]], [[NAND]] +; CHECK-NEXT: ret i4 [[OR]] +; + %and = and i4 %x, %y + %xor = xor i4 %x, %z + %nand = xor i4 %and, -1 + %or = or i4 %xor, %nand + ret i4 %or +} + +define <2 x i4> @or_nand_xor_undef_elt(<2 x i4> %x, <2 x i4> %y) { +; CHECK-LABEL: @or_nand_xor_undef_elt( +; CHECK-NEXT: [[AND:%.*]] = and <2 x i4> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i4> [[X]], [[Y]] +; CHECK-NEXT: [[NAND:%.*]] = xor <2 x i4> [[AND]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i4> [[XOR]], [[NAND]] +; CHECK-NEXT: ret <2 x i4> [[OR]] +; + %and = and <2 x i4> %y, %x + %xor = xor <2 x i4> %x, %y + %nand = xor <2 x i4> %and, + %or = or <2 x i4> %xor, %nand + ret <2 x i4> %or +} From c054402170cd8466683a20385befc0523aba3359 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 31 Dec 2021 15:10:19 -0500 Subject: [PATCH 281/992] [InstSimplify] fold or-nand-xor ~(A & B) | (A ^ B) --> ~(A & B) https://alive2.llvm.org/ce/z/hXQucg --- llvm/lib/Analysis/InstructionSimplify.cpp | 11 +++++++++-- llvm/test/Transforms/InstSimplify/or.ll | 22 ++++++++++------------ 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 4a8dc754349b..15800898a15a 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -2258,14 +2258,21 @@ static Value *simplifyOrLogic(Value *X, Value *Y) { match(Y, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) return NotA; - // ~(A ^ B) | (A & B) --> ~(A & B) - // ~(A ^ B) | (B & A) --> ~(A & B) + // ~(A ^ B) | (A & B) --> ~(A ^ B) + // ~(A ^ B) | (B & A) --> ~(A ^ B) Value *NotAB; if (match(X, m_CombineAnd(m_NotForbidUndef(m_Xor(m_Value(A), m_Value(B))), m_Value(NotAB))) && match(Y, m_c_And(m_Specific(A), m_Specific(B)))) return NotAB; + // ~(A & B) | (A ^ B) --> ~(A & B) + // ~(A & B) | (B ^ A) --> ~(A & B) + if (match(X, m_CombineAnd(m_NotForbidUndef(m_And(m_Value(A), m_Value(B))), + m_Value(NotAB))) && + match(Y, m_c_Xor(m_Specific(A), m_Specific(B)))) + return NotAB; + return nullptr; } diff --git a/llvm/test/Transforms/InstSimplify/or.ll b/llvm/test/Transforms/InstSimplify/or.ll index bb5144bad1b4..3e8e0fa9de7f 100644 --- a/llvm/test/Transforms/InstSimplify/or.ll +++ b/llvm/test/Transforms/InstSimplify/or.ll @@ -952,13 +952,13 @@ define i16 @or_xor_not_op_or_wrong_val(i16 %a, i16 %b, i16 %c) { ret i16 %r } +; ~(x & y) | (x ^ y) --> ~(x & y) + define i4 @or_nand_xor(i4 %x, i4 %y) { ; CHECK-LABEL: @or_nand_xor( ; CHECK-NEXT: [[AND:%.*]] = and i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[XOR:%.*]] = xor i4 [[X]], [[Y]] ; CHECK-NEXT: [[NAND:%.*]] = xor i4 [[AND]], -1 -; CHECK-NEXT: [[OR:%.*]] = or i4 [[XOR]], [[NAND]] -; CHECK-NEXT: ret i4 [[OR]] +; CHECK-NEXT: ret i4 [[NAND]] ; %and = and i4 %x, %y %xor = xor i4 %x, %y @@ -970,10 +970,8 @@ define i4 @or_nand_xor(i4 %x, i4 %y) { define <2 x i4> @or_nand_xor_commute1(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @or_nand_xor_commute1( ; CHECK-NEXT: [[AND:%.*]] = and <2 x i4> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i4> [[X]], [[Y]] ; CHECK-NEXT: [[NAND:%.*]] = xor <2 x i4> [[AND]], -; CHECK-NEXT: [[OR:%.*]] = or <2 x i4> [[XOR]], [[NAND]] -; CHECK-NEXT: ret <2 x i4> [[OR]] +; CHECK-NEXT: ret <2 x i4> [[NAND]] ; %and = and <2 x i4> %y, %x %xor = xor <2 x i4> %x, %y @@ -985,10 +983,8 @@ define <2 x i4> @or_nand_xor_commute1(<2 x i4> %x, <2 x i4> %y) { define i71 @or_nand_xor_commute2(i71 %x, i71 %y) { ; CHECK-LABEL: @or_nand_xor_commute2( ; CHECK-NEXT: [[AND:%.*]] = and i71 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[XOR:%.*]] = xor i71 [[X]], [[Y]] ; CHECK-NEXT: [[NAND:%.*]] = xor i71 [[AND]], -1 -; CHECK-NEXT: [[OR:%.*]] = or i71 [[NAND]], [[XOR]] -; CHECK-NEXT: ret i71 [[OR]] +; CHECK-NEXT: ret i71 [[NAND]] ; %and = and i71 %x, %y %xor = xor i71 %x, %y @@ -1000,10 +996,8 @@ define i71 @or_nand_xor_commute2(i71 %x, i71 %y) { define i4 @or_nand_xor_commute3(i4 %x, i4 %y) { ; CHECK-LABEL: @or_nand_xor_commute3( ; CHECK-NEXT: [[AND:%.*]] = and i4 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[XOR:%.*]] = xor i4 [[X]], [[Y]] ; CHECK-NEXT: [[NAND:%.*]] = xor i4 [[AND]], -1 -; CHECK-NEXT: [[OR:%.*]] = or i4 [[NAND]], [[XOR]] -; CHECK-NEXT: ret i4 [[OR]] +; CHECK-NEXT: ret i4 [[NAND]] ; %and = and i4 %y, %x %xor = xor i4 %x, %y @@ -1012,6 +1006,8 @@ define i4 @or_nand_xor_commute3(i4 %x, i4 %y) { ret i4 %or } +; negative test wrong operand + define i4 @or_nand_xor_wrong_val(i4 %x, i4 %y, i4 %z) { ; CHECK-LABEL: @or_nand_xor_wrong_val( ; CHECK-NEXT: [[AND:%.*]] = and i4 [[X:%.*]], [[Y:%.*]] @@ -1027,6 +1023,8 @@ define i4 @or_nand_xor_wrong_val(i4 %x, i4 %y, i4 %z) { ret i4 %or } +; negative test - undef element in 'not' is not allowed + define <2 x i4> @or_nand_xor_undef_elt(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @or_nand_xor_undef_elt( ; CHECK-NEXT: [[AND:%.*]] = and <2 x i4> [[Y:%.*]], [[X:%.*]] From e47a224ccfbbfd61f31136db53768dabbb85e563 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 31 Dec 2021 13:54:34 -0800 Subject: [PATCH 282/992] [clang-tidy] Use nullptr instead of 0 or NULL (NFC) Identified with modernize-use-nullptr. --- .../clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp | 4 ++-- .../clang-tidy/readability/IdentifierNamingCheck.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp index 36d83b2a3ea3..200528b1c061 100644 --- a/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp @@ -144,7 +144,7 @@ static StringRef exprToStr(const Expr *E, return Lexer::getSourceText( CharSourceRange::getTokenRange(E->getSourceRange()), - *Result.SourceManager, Result.Context->getLangOpts(), 0); + *Result.SourceManager, Result.Context->getLangOpts(), nullptr); } // Returns the proper token based end location of \p E. @@ -477,7 +477,7 @@ static void insertNullTerminatorExpr(StringRef Name, FunctionExpr->getBeginLoc()); StringRef SpaceBeforeStmtStr = Lexer::getSourceText( CharSourceRange::getCharRange(SpaceRange), *Result.SourceManager, - Result.Context->getLangOpts(), 0); + Result.Context->getLangOpts(), nullptr); SmallString<128> NewAddNullTermExprStr; NewAddNullTermExprStr = diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp index cfbe79c52594..8cede1b2c17b 100644 --- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp @@ -1404,8 +1404,8 @@ IdentifierNamingCheck::getMacroFailureInfo(const Token &MacroNameTok, if (!Style.isActive()) return llvm::None; - return getFailureInfo("", MacroNameTok.getIdentifierInfo()->getName(), NULL, - Loc, Style.getStyles(), Style.getHNOption(), + return getFailureInfo("", MacroNameTok.getIdentifierInfo()->getName(), + nullptr, Loc, Style.getStyles(), Style.getHNOption(), SK_MacroDefinition, SM, IgnoreFailedSplit); } From 732e8968a82d7128b5b264023c00c1478a3fe677 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 31 Dec 2021 14:02:29 -0800 Subject: [PATCH 283/992] [Scalar] Remove a redundant declaration (NFC) InitializePasses.h contains the proper declaration. Identified with readability-redundant-declaration. --- llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index 883d4afff3bd..8f5933b7bd71 100644 --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -250,12 +250,6 @@ class InferAddressSpacesImpl { char InferAddressSpaces::ID = 0; -namespace llvm { - -void initializeInferAddressSpacesPass(PassRegistry &); - -} // end namespace llvm - INITIALIZE_PASS_BEGIN(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) From 813f00835d6af2bedcab148d881d2b5a876edfb0 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 31 Dec 2021 14:10:30 -0800 Subject: [PATCH 284/992] [CodeGen] Remove unused forward declarations (NFC) --- llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h | 1 - llvm/include/llvm/CodeGen/FaultMaps.h | 1 - llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h | 1 - llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h | 1 - llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h | 1 - llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h | 1 - llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h | 1 - llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h | 1 - llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 1 - llvm/include/llvm/CodeGen/LiveRangeEdit.h | 2 -- llvm/include/llvm/CodeGen/MachineLoopUtils.h | 1 - llvm/include/llvm/CodeGen/MachineModuleInfo.h | 1 - llvm/include/llvm/CodeGen/Passes.h | 1 - llvm/include/llvm/CodeGen/TailDuplicator.h | 1 - llvm/include/llvm/CodeGen/TargetLowering.h | 1 - 15 files changed, 16 deletions(-) diff --git a/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h b/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h index e7425dd3dc04..2ac9d938d281 100644 --- a/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h +++ b/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h @@ -17,7 +17,6 @@ namespace llvm { -class DILocalVariable; class DILocation; class DINode; class MachineFunction; diff --git a/llvm/include/llvm/CodeGen/FaultMaps.h b/llvm/include/llvm/CodeGen/FaultMaps.h index 12d2872c8c5b..8a8b1d2e6008 100644 --- a/llvm/include/llvm/CodeGen/FaultMaps.h +++ b/llvm/include/llvm/CodeGen/FaultMaps.h @@ -18,7 +18,6 @@ namespace llvm { class AsmPrinter; class MCExpr; -class raw_ostream; class FaultMaps { public: diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h index 4a1a4ff2528a..e73f8489497e 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h @@ -20,7 +20,6 @@ class GISelChangeObserver; class LegalizerInfo; class MachineInstr; class MachineIRBuilder; -class MachineRegisterInfo; // Contains information relevant to enabling/disabling various combines for a // pass. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h index c5af64d2bcbe..7d198fada411 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h @@ -15,7 +15,6 @@ namespace llvm { class MachineInstr; -class MachineFunction; // Worklist which mostly works similar to InstCombineWorkList, but on // MachineInstrs. The main difference with something like a SetVector is that diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h index 4871d8d32ebd..c19f1d5330ba 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h @@ -25,7 +25,6 @@ namespace llvm { -class MachineRegisterInfo; class LostDebugLocObserver; class Legalizer : public MachineFunctionPass { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 044f2e22cfdd..3b2f937375eb 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -30,7 +30,6 @@ namespace llvm { // Forward declarations. class LegalizerInfo; -class Legalizer; class MachineRegisterInfo; class GISelChangeObserver; class LostDebugLocObserver; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 0b37539030b1..a02b15639946 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -38,7 +38,6 @@ class LegalizerHelper; class MachineInstr; class MachineRegisterInfo; class MCInstrInfo; -class GISelChangeObserver; namespace LegalizeActions { enum LegalizeAction : std::uint8_t { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h index 29575f386d7a..0845c001abdb 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h @@ -30,7 +30,6 @@ namespace llvm { // Forward declarations. class MachineRegisterInfo; -class TargetTransformInfo; namespace GISelAddressing { /// Helper struct to store a base, index and offset that forms an address struct BaseIndexOffset { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 8fed79585fe9..a77db3145915 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -44,7 +44,6 @@ class TargetLowering; class TargetPassConfig; class TargetRegisterInfo; class TargetRegisterClass; -class ConstantInt; class ConstantFP; class APFloat; class MachineIRBuilder; diff --git a/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/llvm/include/llvm/CodeGen/LiveRangeEdit.h index fa4e80179eec..d80522f5bdac 100644 --- a/llvm/include/llvm/CodeGen/LiveRangeEdit.h +++ b/llvm/include/llvm/CodeGen/LiveRangeEdit.h @@ -34,9 +34,7 @@ namespace llvm { class AAResults; class LiveIntervals; -class MachineBlockFrequencyInfo; class MachineInstr; -class MachineLoopInfo; class MachineOperand; class TargetInstrInfo; class TargetRegisterInfo; diff --git a/llvm/include/llvm/CodeGen/MachineLoopUtils.h b/llvm/include/llvm/CodeGen/MachineLoopUtils.h index 2352fbca548d..b9bf93b71e25 100644 --- a/llvm/include/llvm/CodeGen/MachineLoopUtils.h +++ b/llvm/include/llvm/CodeGen/MachineLoopUtils.h @@ -10,7 +10,6 @@ #define LLVM_CODEGEN_MACHINELOOPUTILS_H namespace llvm { -class MachineLoop; class MachineBasicBlock; class MachineRegisterInfo; class TargetInstrInfo; diff --git a/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/llvm/include/llvm/CodeGen/MachineModuleInfo.h index 860a86ee991b..c07606e89374 100644 --- a/llvm/include/llvm/CodeGen/MachineModuleInfo.h +++ b/llvm/include/llvm/CodeGen/MachineModuleInfo.h @@ -44,7 +44,6 @@ namespace llvm { class BasicBlock; -class CallInst; class Function; class LLVMTargetMachine; class MMIAddrLabelMap; diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index d5ad12fadfa0..f4c6edba61f2 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -30,7 +30,6 @@ class MemoryBuffer; class ModulePass; class Pass; class TargetMachine; -class TargetRegisterClass; class raw_ostream; } // End llvm namespace diff --git a/llvm/include/llvm/CodeGen/TailDuplicator.h b/llvm/include/llvm/CodeGen/TailDuplicator.h index 6862bb2c3f44..daaa27f72d52 100644 --- a/llvm/include/llvm/CodeGen/TailDuplicator.h +++ b/llvm/include/llvm/CodeGen/TailDuplicator.h @@ -26,7 +26,6 @@ namespace llvm { class MachineBasicBlock; -class MachineBlockFrequencyInfo; class MachineBranchProbabilityInfo; class MachineFunction; class MachineInstr; diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index b2d82e0cc6e8..aef7973ff20f 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -63,7 +63,6 @@ namespace llvm { -class BranchProbability; class CCState; class CCValAssign; class Constant; From 120b93e1a88c74fe6a1c8376f88f1f07e9fda7b9 Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Fri, 31 Dec 2021 23:04:46 +0000 Subject: [PATCH 285/992] fix test so it doesn't use nonnull assumes on non-pointers The IR verifier should probably catch this. Alive2 did, though. --- llvm/test/Transforms/InstCombine/assume.ll | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll index dfd566c433b7..c3ae342efca7 100644 --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -486,7 +486,7 @@ define i1 @nonnull3B(i32** %a, i1 %control) { ; CHECK: taken: ; CHECK-NEXT: [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32* [[LOAD]], null -; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) [ "nonnull"(i32* [[LOAD]]), "nonnull"(i1 [[CMP]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) [ "nonnull"(i32* [[LOAD]]) ] ; CHECK-NEXT: ret i1 true ; CHECK: not_taken: ; CHECK-NEXT: ret i1 [[CONTROL]] @@ -496,10 +496,10 @@ entry: %cmp = icmp ne i32* %load, null br i1 %control, label %taken, label %not_taken taken: - call void @llvm.assume(i1 %cmp) ["nonnull"(i32* %load), "nonnull"(i1 %cmp)] + call void @llvm.assume(i1 %cmp) ["nonnull"(i32* %load)] ret i1 %cmp not_taken: - call void @llvm.assume(i1 %cmp) ["nonnull"(i32* %load), "nonnull"(i1 %cmp)] + call void @llvm.assume(i1 %cmp) ["nonnull"(i32* %load)] ret i1 %control } @@ -528,7 +528,7 @@ taken: br label %exit exit: ; FIXME: this shouldn't be dropped because it is still dominated by the new position of %load - call void @llvm.assume(i1 %cmp) ["nonnull"(i32* %load), "nonnull"(i1 %cmp)] + call void @llvm.assume(i1 %cmp) ["nonnull"(i32* %load)] ret i1 %cmp2 not_taken: call void @llvm.assume(i1 %cmp) @@ -547,7 +547,6 @@ define i1 @nonnull3D(i32** %a, i1 %control) { ; CHECK: exit: ; CHECK-NEXT: ret i1 [[CMP2]] ; CHECK: not_taken: -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "ignore"(i32* undef), "ignore"(i1 undef), "nonnull"(i1 [[CONTROL]]) ] ; CHECK-NEXT: ret i1 [[CONTROL]] ; entry: @@ -560,7 +559,7 @@ taken: exit: ret i1 %cmp2 not_taken: - call void @llvm.assume(i1 %cmp) ["nonnull"(i32* %load), "nonnull"(i1 %cmp), "nonnull"(i1 %control)] + call void @llvm.assume(i1 %cmp) ["nonnull"(i32* %load)] ret i1 %control } From f8f5f1b3a48e7497d8ff09ff35d3fd20a4c8c9b1 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 31 Dec 2021 15:17:25 -0800 Subject: [PATCH 286/992] [Hexagon] Use range-based for loops (NFC) --- llvm/lib/Target/Hexagon/HexagonGenInsert.cpp | 93 +++++++++---------- .../Target/Hexagon/HexagonGenPredicate.cpp | 4 +- .../Target/Hexagon/HexagonHardwareLoops.cpp | 4 +- .../Target/Hexagon/HexagonISelDAGToDAGHVX.cpp | 12 +-- 4 files changed, 52 insertions(+), 61 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp index 85230cac9d7c..0bb1658e7698 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -583,14 +583,12 @@ namespace { char HexagonGenInsert::ID = 0; void HexagonGenInsert::dump_map() const { - using iterator = IFMapType::const_iterator; - - for (iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { - dbgs() << " " << printReg(I->first, HRI) << ":\n"; - const IFListType &LL = I->second; - for (unsigned i = 0, n = LL.size(); i < n; ++i) - dbgs() << " " << PrintIFR(LL[i].first, HRI) << ", " - << PrintRegSet(LL[i].second, HRI) << '\n'; + for (const auto &I : IFMap) { + dbgs() << " " << printReg(I.first, HRI) << ":\n"; + const IFListType &LL = I.second; + for (const auto &J : LL) + dbgs() << " " << PrintIFR(J.first, HRI) << ", " + << PrintRegSet(J.second, HRI) << '\n'; } } @@ -627,8 +625,8 @@ void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB, using SortableVectorType = std::vector; SortableVectorType VRs; - for (RegisterOrdering::iterator I = RB.begin(), E = RB.end(); I != E; ++I) - VRs.push_back(I->first); + for (auto &I : RB) + VRs.push_back(I.first); llvm::sort(VRs, LexCmp); // Transfer the results to the outgoing register ordering. for (unsigned i = 0, n = VRs.size(); i < n; ++i) @@ -853,20 +851,18 @@ bool HexagonGenInsert::findRecordInsertForms(unsigned VR, if (isDebug()) { dbgs() << "Prefixes matching register " << printReg(VR, HRI) << "\n"; - for (LRSMapType::iterator I = LM.begin(), E = LM.end(); I != E; ++I) { - dbgs() << " L=" << I->first << ':'; - const RSListType &LL = I->second; - for (unsigned i = 0, n = LL.size(); i < n; ++i) - dbgs() << " (" << printReg(LL[i].first, HRI) << ",@" - << LL[i].second << ')'; + for (const auto &I : LM) { + dbgs() << " L=" << I.first << ':'; + const RSListType &LL = I.second; + for (const auto &J : LL) + dbgs() << " (" << printReg(J.first, HRI) << ",@" << J.second << ')'; dbgs() << '\n'; } } bool Recorded = false; - for (iterator I = AVs.begin(), E = AVs.end(); I != E; ++I) { - unsigned SrcR = *I; + for (unsigned SrcR : AVs) { int FDi = -1, LDi = -1; // First/last different bit. const BitTracker::RegisterCell &AC = CMS->lookup(SrcR); uint16_t AW = AC.width(); @@ -888,8 +884,8 @@ bool HexagonGenInsert::findRecordInsertForms(unsigned VR, if (F == LM.end()) continue; RSListType &LL = F->second; - for (unsigned i = 0, n = LL.size(); i < n; ++i) { - uint16_t S = LL[i].second; + for (const auto &I : LL) { + uint16_t S = I.second; // MinL is the minimum length of the prefix. Any length above MinL // allows some flexibility as to where the prefix can start: // given the extra length EL=L-MinL, the prefix must start between @@ -900,7 +896,7 @@ bool HexagonGenInsert::findRecordInsertForms(unsigned VR, uint16_t LowS = (EL < FD) ? FD-EL : 0; if (S < LowS) // Starts too early. continue; - unsigned InsR = LL[i].first; + unsigned InsR = I.first; if (!isValidInsertForm(VR, SrcR, InsR, L, S)) continue; if (isDebug()) { @@ -1029,10 +1025,10 @@ void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF, } void HexagonGenInsert::computeRemovableRegisters() { - for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { - IFListType &LL = I->second; - for (unsigned i = 0, n = LL.size(); i < n; ++i) - findRemovableRegisters(I->first, LL[i].first, LL[i].second); + for (auto &I : IFMap) { + IFListType &LL = I.second; + for (auto &J : LL) + findRemovableRegisters(I.first, J.first, J.second); } } @@ -1064,8 +1060,8 @@ void HexagonGenInsert::pruneCoveredSets(unsigned VR) { MachineInstr *DefVR = MRI->getVRegDef(VR); bool DefEx = HII->isConstExtended(*DefVR); bool HasNE = false; - for (unsigned i = 0, n = LL.size(); i < n; ++i) { - if (LL[i].second.empty()) + for (const auto &I : LL) { + if (I.second.empty()) continue; HasNE = true; break; @@ -1172,8 +1168,8 @@ void HexagonGenInsert::pruneCandidates() { // selection method. // First, remove candidates whose potentially removable set is a subset // of another candidate's set. - for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) - pruneCoveredSets(I->first); + for (const auto &I : IFMap) + pruneCoveredSets(I.first); UnsignedMap RPO; @@ -1181,18 +1177,18 @@ void HexagonGenInsert::pruneCandidates() { RPOTType RPOT(MFN); unsigned RPON = 0; - for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) - RPO[(*I)->getNumber()] = RPON++; + for (const auto &I : RPOT) + RPO[I->getNumber()] = RPON++; PairMapType Memo; // Memoization map for distance calculation. // Remove candidates that would use registers defined too far away. - for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) - pruneUsesTooFar(I->first, RPO, Memo); + for (const auto &I : IFMap) + pruneUsesTooFar(I.first, RPO, Memo); pruneEmptyLists(); - for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) - pruneRegCopies(I->first); + for (const auto &I : IFMap) + pruneRegCopies(I.first); } namespace { @@ -1277,8 +1273,8 @@ void HexagonGenInsert::selectCandidates() { for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { const IFListType &LL = I->second; RegisterSet TT; - for (unsigned i = 0, n = LL.size(); i < n; ++i) - TT.insert(LL[i].second); + for (const auto &J : LL) + TT.insert(J.second); for (unsigned R = TT.find_first(); R; R = TT.find_next(R)) RemC[R]++; AllRMs.insert(TT); @@ -1384,8 +1380,8 @@ bool HexagonGenInsert::generateInserts() { // Create a new register for each one from IFMap, and store them in the // map. UnsignedMap RegMap; - for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { - unsigned VR = I->first; + for (auto &I : IFMap) { + unsigned VR = I.first; const TargetRegisterClass *RC = MRI->getRegClass(VR); Register NewVR = MRI->createVirtualRegister(RC); RegMap[VR] = NewVR; @@ -1394,15 +1390,15 @@ bool HexagonGenInsert::generateInserts() { // We can generate the "insert" instructions using potentially stale re- // gisters: SrcR and InsR for a given VR may be among other registers that // are also replaced. This is fine, we will do the mass "rauw" a bit later. - for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { - MachineInstr *MI = MRI->getVRegDef(I->first); + for (auto &I : IFMap) { + MachineInstr *MI = MRI->getVRegDef(I.first); MachineBasicBlock &B = *MI->getParent(); DebugLoc DL = MI->getDebugLoc(); - unsigned NewR = RegMap[I->first]; + unsigned NewR = RegMap[I.first]; bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass; const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert) : HII->get(Hexagon::S2_insertp); - IFRecord IF = I->second[0].first; + IFRecord IF = I.second[0].first; unsigned Wdh = IF.Wdh, Off = IF.Off; unsigned InsS = 0; if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) { @@ -1428,9 +1424,9 @@ bool HexagonGenInsert::generateInserts() { MRI->clearKillFlags(IF.InsR); } - for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { - MachineInstr *DefI = MRI->getVRegDef(I->first); - MRI->replaceRegWith(I->first, RegMap[I->first]); + for (const auto &I : IFMap) { + MachineInstr *DefI = MRI->getVRegDef(I.first); + MRI->replaceRegWith(I.first, RegMap[I.first]); DefI->eraseFromParent(); } @@ -1523,9 +1519,8 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { if (isDebug()) { dbgs() << "Cell ordering:\n"; - for (RegisterOrdering::iterator I = CellOrd.begin(), E = CellOrd.end(); - I != E; ++I) { - unsigned VR = I->first, Pos = I->second; + for (const auto &I : CellOrd) { + unsigned VR = I.first, Pos = I.second; dbgs() << printReg(VR, HRI) << " -> " << Pos << "\n"; } } diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp index 1a66394e9757..00615f355146 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -505,8 +505,8 @@ bool HexagonGenPredicate::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; collectPredicateGPR(MF); - for (SetOfReg::iterator I = PredGPRs.begin(), E = PredGPRs.end(); I != E; ++I) - processPredicateGPR(*I); + for (const RegisterSubReg &R : PredGPRs) + processPredicateGPR(R); bool Again; do { diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 5d2e1b259449..338fda57c53a 100644 --- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -1127,8 +1127,8 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, bool L1Used = false; // Process nested loops first. - for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { - Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used); + for (MachineLoop *I : *L) { + Changed |= convertToHardwareLoop(I, RecL0used, RecL1used); L0Used |= RecL0used; L1Used |= RecL1used; } diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index ed4874baf7c8..95e202647246 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -230,8 +230,7 @@ bool Coloring::color() { WorkQ.push_back(N); } - for (unsigned I = 0; I < WorkQ.size(); ++I) { - Node N = WorkQ[I]; + for (Node N : WorkQ) { NodeSet &Ns = Edges[N]; auto P = getUniqueColor(Ns); if (P.first) { @@ -270,8 +269,7 @@ bool Coloring::color() { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void Coloring::dump() const { dbgs() << "{ Order: {"; - for (unsigned I = 0; I != Order.size(); ++I) { - Node P = Order[I]; + for (Node P : Order) { if (P != Ignore) dbgs() << ' ' << P; else @@ -761,8 +759,7 @@ void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const { namespace { struct ShuffleMask { ShuffleMask(ArrayRef M) : Mask(M) { - for (unsigned I = 0, E = Mask.size(); I != E; ++I) { - int M = Mask[I]; + for (int M : Mask) { if (M == -1) continue; MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M); @@ -935,8 +932,7 @@ static SmallVector getInputSegmentList(ShuffleMask SM, unsigned Shift = Log2_32(SegLen); BitVector Segs(alignTo(SM.MaxSrc + 1, SegLen) >> Shift); - for (int I = 0, E = SM.Mask.size(); I != E; ++I) { - int M = SM.Mask[I]; + for (int M : SM.Mask) { if (M >= 0) Segs.set(M >> Shift); } From bfc8f76e60a8efd920dbd6efc4467ffb6de15919 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 31 Dec 2021 16:06:19 -0800 Subject: [PATCH 287/992] [X86] Remove unused declaration getTileStoreShape (NFC) --- llvm/lib/Target/X86/X86FastTileConfig.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86FastTileConfig.cpp b/llvm/lib/Target/X86/X86FastTileConfig.cpp index 47874e82ff3b..061fff50bcea 100644 --- a/llvm/lib/Target/X86/X86FastTileConfig.cpp +++ b/llvm/lib/Target/X86/X86FastTileConfig.cpp @@ -56,8 +56,6 @@ class X86FastTileConfig : public MachineFunctionPass { bool isTileLoad(MachineInstr &MI); bool isTileStore(MachineInstr &MI); bool isAMXInstr(MachineInstr &MI); - void getTileStoreShape(MachineInstr &MI, - SmallVector &ShapedTiles); MachineInstr *getKeyAMXInstr(MachineInstr *MI); void getTileShapesCfg(MachineInstr *MI, From 5c4b9ea4a7b36c625555262a07f744c0fc694461 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 31 Dec 2021 16:43:06 -0800 Subject: [PATCH 288/992] [AMDGPU] Remove replaceWithNative (NFC) The function was introduced without any use on Aug 11, 2017 in commit 7f37794ebd2c6c36224597800e4d1e5a99ad80e9. --- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp | 24 ----------------------- 1 file changed, 24 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index c221b55d9c70..0e1bb3305b2d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -58,9 +58,6 @@ class AMDGPULibCalls { // "FuncName" exists. It may create a new function prototype in pre-link mode. FunctionCallee getFunction(Module *M, const FuncInfo &fInfo); - // Replace a normal function with its native version. - bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo); - bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo); bool TDOFold(CallInst *CI, const FuncInfo &FInfo); @@ -779,27 +776,6 @@ bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { return false; } -bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) { - Module *M = CI->getModule(); - if (getArgType(FInfo) != AMDGPULibFunc::F32 || - FInfo.getPrefix() != AMDGPULibFunc::NOPFX || - !HasNative(FInfo.getId())) - return false; - - AMDGPULibFunc nf = FInfo; - nf.setPrefix(AMDGPULibFunc::NATIVE); - if (FunctionCallee FPExpr = getFunction(M, nf)) { - LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> "); - - CI->setCalledFunction(FPExpr); - - LLVM_DEBUG(dbgs() << *CI << '\n'); - - return true; - } - return false; -} - // [native_]half_recip(c) ==> 1.0/c bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo) { From bc360fd83a87a461cb7f90b54d00b5e99510424f Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 31 Dec 2021 16:50:18 -0800 Subject: [PATCH 289/992] [AMDGPU] Remove unused declarations fold_exp* and fold_log* (NFC) --- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index 0e1bb3305b2d..c28427758ac7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -87,24 +87,6 @@ class AMDGPULibCalls { double& Res1, Constant *copr0, Constant *copr1, Constant *copr2); bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo); - // exp - bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); - - // exp2 - bool fold_exp2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); - - // exp10 - bool fold_exp10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); - - // log - bool fold_log(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); - - // log2 - bool fold_log2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); - - // log10 - bool fold_log10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); - // sqrt bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); From 3536d24a1aad1bf7e8f383cfd8f4673742df22a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20B=C3=B6ck?= Date: Sat, 1 Jan 2022 02:03:00 +0100 Subject: [PATCH 290/992] [mlir][LLVMIR] Add `llvm.eh.typeid.for` intrinsic MLIR already exposes landingpads, the invokeop and the personality function on LLVM functions. With this intrinsic it should be possible to implement exception handling via the exception handling mechanisms provided by the Itanium ABI. Differential Revision: https://reviews.llvm.org/D116436 --- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 9 +++++++++ mlir/test/Dialect/LLVMIR/roundtrip.mlir | 2 ++ mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir | 7 +++++++ 3 files changed, 18 insertions(+) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index a2a1f7a57f43..345d03e93940 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -1542,6 +1542,15 @@ def LLVM_CoroResumeOp : LLVM_IntrOp<"coro.resume", [], [], [], 0> { let assemblyFormat = "$handle attr-dict"; } +// +// Exception handling intrinsics. +// + +def LLVM_EhTypeidForOp : LLVM_OneResultIntrOp<"eh.typeid.for"> { + let arguments = (ins LLVM_i8Ptr:$type_info); + let assemblyFormat = "$type_info attr-dict `:` type($res)"; +} + // // Stack save/restore intrinsics. // diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir index 9e504e7fe081..b6a09d6ff09e 100644 --- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir +++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir @@ -332,9 +332,11 @@ llvm.func @invokeLandingpad() -> i32 attributes { personality = @__gxx_personali // CHECK: ^[[BB1]]: // CHECK: %[[lp:.*]] = llvm.landingpad cleanup (catch %[[a3]] : !llvm.ptr>) (catch %[[a6]] : !llvm.ptr) (filter %[[a2]] : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> +// CHECK: %{{.*}} = llvm.intr.eh.typeid.for %6 : i32 // CHECK: llvm.resume %[[lp]] : !llvm.struct<(ptr, i32)> ^bb1: %10 = llvm.landingpad cleanup (catch %3 : !llvm.ptr>) (catch %6 : !llvm.ptr) (filter %2 : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> + %11 = llvm.intr.eh.typeid.for %6 : i32 llvm.resume %10 : !llvm.struct<(ptr, i32)> // CHECK: ^[[BB2]]: diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir index e1eff69a4251..752e9e961bcd 100644 --- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir @@ -485,6 +485,13 @@ llvm.func @coro_resume(%arg0: !llvm.ptr) { llvm.return } +// CHECK-LABEL: @eh_typeid_for +llvm.func @eh_typeid_for(%arg0 : !llvm.ptr) { + // CHECK: call i32 @llvm.eh.typeid.for + %0 = llvm.intr.eh.typeid.for %arg0 : i32 + llvm.return +} + // CHECK-LABEL: @stack_save llvm.func @stack_save() { // CHECK: call i8* @llvm.stacksave From 8637be74a038ece5f97040895963e0ef6797f324 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sat, 1 Jan 2022 01:14:27 +0000 Subject: [PATCH 291/992] Remove redundant return after return in CodegenStrategy (NFC) Reported by Coverity --- .../include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h b/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h index 9a32279f8421..a532478d72af 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h @@ -242,7 +242,6 @@ struct CodegenStrategy { promoteIf(bool b, StringRef opName, linalg::LinalgPromotionOptions options, LinalgTransformationFilter::FilterFunction f = nullptr) { return b ? promote(opName, options, f) : *this; - return *this; } /// Append a pattern to generalize named operations. CodegenStrategy & @@ -257,7 +256,6 @@ struct CodegenStrategy { generalizeIf(bool b, StringRef opName, LinalgTransformationFilter::FilterFunction f = nullptr) { return b ? generalize(opName, f) : *this; - return *this; } /// Append a pattern to interchange iterators. CodegenStrategy & @@ -272,7 +270,6 @@ struct CodegenStrategy { interchangeIf(bool b, ArrayRef iteratorInterchange, LinalgTransformationFilter::FilterFunction f = nullptr) { return b ? interchange(iteratorInterchange, f) : *this; - return *this; } /// Append patterns to decompose convolutions. CodegenStrategy & @@ -284,7 +281,6 @@ struct CodegenStrategy { CodegenStrategy & decomposeIf(bool b, LinalgTransformationFilter::FilterFunction f = nullptr) { return b ? decompose(f) : *this; - return *this; } /// Append a pattern to rewrite `LinalgOpType` as a vector operation. CodegenStrategy & @@ -302,7 +298,6 @@ struct CodegenStrategy { LinalgTransformationFilter::FilterFunction f = nullptr, bool vectorizePadding = false) { return b ? vectorize(opName, f, vectorizePadding) : *this; - return *this; } /// Append a pattern to lower all vector operations. CodegenStrategy &vectorLowering(LinalgVectorLoweringOptions options) { From d00e438cfe7ef6af6654810a34ef461988e93172 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 31 Dec 2021 17:13:36 -0800 Subject: [PATCH 292/992] [RISCV][LegalizeIntegerTypes] Teach PromoteSetCCOperands not to sext i32 comparisons for RV64 if the promoted values are already zero extended. This is similar to what is done for targets that prefer zero extend where we avoid using a zero extend if the promoted values are sign extended. We'll also check for zero extended operands for ugt, ult, uge, and ule when the target prefers sign extend. This is different than preferring zero extend, where we only check for sign bits on equality comparisons. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D116421 --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 80 +++++++++------ llvm/test/CodeGen/RISCV/fpclamptosat.ll | 3 +- llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll | 97 ++++++++----------- llvm/test/CodeGen/RISCV/half-convert.ll | 9 +- 4 files changed, 96 insertions(+), 93 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index d977f4ea3dbd..8ce6ad1b66a0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1708,46 +1708,62 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &LHS, SDValue &RHS, ISD::CondCode CCCode) { // We have to insert explicit sign or zero extends. Note that we could // insert sign extends for ALL conditions. For those operations where either - // zero or sign extension would be valid, use SExtOrZExtPromotedInteger - // which will choose the cheapest for the target. - switch (CCCode) { - default: llvm_unreachable("Unknown integer comparison!"); - case ISD::SETEQ: - case ISD::SETNE: { - SDValue OpL = GetPromotedInteger(LHS); - SDValue OpR = GetPromotedInteger(RHS); - - // We would prefer to promote the comparison operand with sign extension. - // If the width of OpL/OpR excluding the duplicated sign bits is no greater - // than the width of LHS/RHS, we can avoid inserting real truncate - // instruction, which is redundant eventually. - unsigned OpLEffectiveBits = DAG.ComputeMinSignedBits(OpL); - unsigned OpREffectiveBits = DAG.ComputeMinSignedBits(OpR); + // zero or sign extension would be valid, we ask the target which extension + // it would prefer. + + // Signed comparisons always require sign extension. + if (ISD::isSignedIntSetCC(CCCode)) { + LHS = SExtPromotedInteger(LHS); + RHS = SExtPromotedInteger(RHS); + return; + } + + assert((ISD::isUnsignedIntSetCC(CCCode) || ISD::isIntEqualitySetCC(CCCode)) && + "Unknown integer comparison!"); + + SDValue OpL = GetPromotedInteger(LHS); + SDValue OpR = GetPromotedInteger(RHS); + + if (TLI.isSExtCheaperThanZExt(LHS.getValueType(), OpL.getValueType())) { + // The target would prefer to promote the comparison operand with sign + // extension. Honor that unless the promoted values are already zero + // extended. + unsigned OpLEffectiveBits = + DAG.computeKnownBits(OpL).countMaxActiveBits(); + unsigned OpREffectiveBits = + DAG.computeKnownBits(OpR).countMaxActiveBits(); if (OpLEffectiveBits <= LHS.getScalarValueSizeInBits() && OpREffectiveBits <= RHS.getScalarValueSizeInBits()) { LHS = OpL; RHS = OpR; - } else { - LHS = SExtOrZExtPromotedInteger(LHS); - RHS = SExtOrZExtPromotedInteger(RHS); + return; } - break; - } - case ISD::SETUGE: - case ISD::SETUGT: - case ISD::SETULE: - case ISD::SETULT: - LHS = SExtOrZExtPromotedInteger(LHS); - RHS = SExtOrZExtPromotedInteger(RHS); - break; - case ISD::SETGE: - case ISD::SETGT: - case ISD::SETLT: - case ISD::SETLE: + + // The promoted values aren't zero extended, use a sext_inreg. LHS = SExtPromotedInteger(LHS); RHS = SExtPromotedInteger(RHS); - break; + return; } + + // Prefer to promote the comparison operand with zero extension. + + // If this is an equality comparison and the width of OpL/OpR excluding the + // duplicated sign bits is no greater than the width of LHS/RHS, we can avoid + // inserting a zext_inreg operation that we might not be able to remove. + if (ISD::isIntEqualitySetCC(CCCode)) { + unsigned OpLEffectiveBits = DAG.ComputeMinSignedBits(OpL); + unsigned OpREffectiveBits = DAG.ComputeMinSignedBits(OpR); + if (OpLEffectiveBits <= LHS.getScalarValueSizeInBits() && + OpREffectiveBits <= RHS.getScalarValueSizeInBits()) { + LHS = OpL; + RHS = OpR; + return; + } + } + + // Otherwise, use zext_inreg. + LHS = ZExtPromotedInteger(LHS); + RHS = ZExtPromotedInteger(RHS); } SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) { diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index e1cc853aa5ac..46e78e375edf 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -1072,10 +1072,9 @@ define i16 @utesth_f16i16(half %x) { ; RV64-NEXT: call __gnu_h2f_ieee@plt ; RV64-NEXT: fmv.w.x ft0, a0 ; RV64-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64-NEXT: sext.w a2, a0 ; RV64-NEXT: lui a1, 16 ; RV64-NEXT: addiw a1, a1, -1 -; RV64-NEXT: bltu a2, a1, .LBB16_2 +; RV64-NEXT: bltu a0, a1, .LBB16_2 ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB16_2: # %entry diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll index 53285d759864..4cba269baaa5 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll @@ -1091,7 +1091,6 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s9, 8(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -1102,12 +1101,11 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NEXT: .cfi_offset s6, -64 ; CHECK-NEXT: .cfi_offset s7, -72 ; CHECK-NEXT: .cfi_offset s8, -80 -; CHECK-NEXT: .cfi_offset s9, -88 -; CHECK-NEXT: lhu s5, 0(a1) +; CHECK-NEXT: lhu s6, 0(a1) ; CHECK-NEXT: lhu s2, 56(a1) ; CHECK-NEXT: lhu s3, 48(a1) ; CHECK-NEXT: lhu s4, 40(a1) -; CHECK-NEXT: lhu s6, 32(a1) +; CHECK-NEXT: lhu s5, 32(a1) ; CHECK-NEXT: lhu s7, 24(a1) ; CHECK-NEXT: lhu s1, 16(a1) ; CHECK-NEXT: lhu a1, 8(a1) @@ -1117,13 +1115,13 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NEXT: mv s8, a0 ; CHECK-NEXT: mv a0, s1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s9, a0 +; CHECK-NEXT: mv s1, a0 ; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: mv a0, s6 +; CHECK-NEXT: mv s7, a0 +; CHECK-NEXT: mv a0, s5 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s6, a0 +; CHECK-NEXT: mv s5, a0 ; CHECK-NEXT: mv a0, s4 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s4, a0 @@ -1134,69 +1132,61 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: fmv.w.x ft0, s1 -; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s9 -; CHECK-NEXT: fcvt.lu.s s7, ft0, rtz +; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill ; CHECK-NEXT: fmv.w.x ft0, s8 ; CHECK-NEXT: fcvt.lu.s s8, ft0, rtz -; CHECK-NEXT: sext.w s1, s8 -; CHECK-NEXT: mv a0, s5 +; CHECK-NEXT: mv a0, s6 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 ; CHECK-NEXT: fcvt.lu.s a6, ft0, rtz -; CHECK-NEXT: sext.w a0, a6 -; CHECK-NEXT: lui a1, 16 -; CHECK-NEXT: addiw a1, a1, -1 -; CHECK-NEXT: bltu a0, a1, .LBB16_2 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addiw a1, a0, -1 +; CHECK-NEXT: bltu a6, a1, .LBB16_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: mv a6, a1 ; CHECK-NEXT: .LBB16_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s6 -; CHECK-NEXT: flw ft1, 4(sp) # 4-byte Folded Reload -; CHECK-NEXT: fcvt.lu.s a7, ft1, rtz -; CHECK-NEXT: sext.w a4, s7 -; CHECK-NEXT: bltu s1, a1, .LBB16_4 +; CHECK-NEXT: fmv.w.x ft1, s7 +; CHECK-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: fcvt.lu.s a2, ft0, rtz +; CHECK-NEXT: bltu s8, a1, .LBB16_4 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: mv s8, a1 ; CHECK-NEXT: .LBB16_4: # %entry -; CHECK-NEXT: fmv.w.x ft1, s4 -; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz -; CHECK-NEXT: sext.w a5, a7 -; CHECK-NEXT: bltu a4, a1, .LBB16_6 +; CHECK-NEXT: fmv.w.x ft0, s5 +; CHECK-NEXT: fcvt.lu.s a3, ft1, rtz +; CHECK-NEXT: bltu a2, a1, .LBB16_6 ; CHECK-NEXT: # %bb.5: # %entry -; CHECK-NEXT: mv s7, a1 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB16_6: # %entry -; CHECK-NEXT: fmv.w.x ft0, s3 -; CHECK-NEXT: fcvt.lu.s a4, ft1, rtz -; CHECK-NEXT: sext.w s1, a3 -; CHECK-NEXT: bltu a5, a1, .LBB16_8 +; CHECK-NEXT: fmv.w.x ft1, s4 +; CHECK-NEXT: fcvt.lu.s a4, ft0, rtz +; CHECK-NEXT: bltu a3, a1, .LBB16_8 ; CHECK-NEXT: # %bb.7: # %entry -; CHECK-NEXT: mv a7, a1 +; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB16_8: # %entry -; CHECK-NEXT: fmv.w.x ft1, s2 -; CHECK-NEXT: fcvt.lu.s a5, ft0, rtz -; CHECK-NEXT: sext.w a0, a4 -; CHECK-NEXT: bltu s1, a1, .LBB16_10 +; CHECK-NEXT: fmv.w.x ft0, s3 +; CHECK-NEXT: fcvt.lu.s a5, ft1, rtz +; CHECK-NEXT: bltu a4, a1, .LBB16_10 ; CHECK-NEXT: # %bb.9: # %entry -; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB16_10: # %entry -; CHECK-NEXT: fcvt.lu.s s1, ft1, rtz -; CHECK-NEXT: sext.w a2, a5 -; CHECK-NEXT: bgeu a0, a1, .LBB16_15 +; CHECK-NEXT: fmv.w.x ft1, s2 +; CHECK-NEXT: fcvt.lu.s s1, ft0, rtz +; CHECK-NEXT: bgeu a5, a1, .LBB16_15 ; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: sext.w a0, s1 -; CHECK-NEXT: bgeu a2, a1, .LBB16_16 +; CHECK-NEXT: fcvt.lu.s a0, ft1, rtz +; CHECK-NEXT: bgeu s1, a1, .LBB16_16 ; CHECK-NEXT: .LBB16_12: # %entry ; CHECK-NEXT: bltu a0, a1, .LBB16_14 ; CHECK-NEXT: .LBB16_13: # %entry -; CHECK-NEXT: mv s1, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB16_14: # %entry -; CHECK-NEXT: sh s1, 14(s0) -; CHECK-NEXT: sh a5, 12(s0) -; CHECK-NEXT: sh a4, 10(s0) -; CHECK-NEXT: sh a3, 8(s0) -; CHECK-NEXT: sh a7, 6(s0) -; CHECK-NEXT: sh s7, 4(s0) +; CHECK-NEXT: sh a0, 14(s0) +; CHECK-NEXT: sh s1, 12(s0) +; CHECK-NEXT: sh a5, 10(s0) +; CHECK-NEXT: sh a4, 8(s0) +; CHECK-NEXT: sh a3, 6(s0) +; CHECK-NEXT: sh a2, 4(s0) ; CHECK-NEXT: sh s8, 2(s0) ; CHECK-NEXT: sh a6, 0(s0) ; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload @@ -1209,15 +1199,14 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s9, 8(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 96 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB16_15: # %entry -; CHECK-NEXT: mv a4, a1 -; CHECK-NEXT: sext.w a0, s1 -; CHECK-NEXT: bltu a2, a1, .LBB16_12 -; CHECK-NEXT: .LBB16_16: # %entry ; CHECK-NEXT: mv a5, a1 +; CHECK-NEXT: fcvt.lu.s a0, ft1, rtz +; CHECK-NEXT: bltu s1, a1, .LBB16_12 +; CHECK-NEXT: .LBB16_16: # %entry +; CHECK-NEXT: mv s1, a1 ; CHECK-NEXT: bgeu a0, a1, .LBB16_13 ; CHECK-NEXT: j .LBB16_14 entry: diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll index a833b472211b..9a535a803a02 100644 --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -745,13 +745,12 @@ define i32 @fcvt_wu_h_multiple_use(half %x, i32* %y) { ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: call __fixunssfdi@plt -; RV64I-NEXT: sext.w a2, a0 -; RV64I-NEXT: li a1, 1 -; RV64I-NEXT: beqz a2, .LBB7_2 -; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: .LBB7_2: +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: beqz a1, .LBB7_2 +; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB7_2: ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret From 243b7aaf51e8ad04910ea1f8779db07d6fee2481 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 31 Dec 2021 17:29:57 -0800 Subject: [PATCH 293/992] [SelectionDAG] Use KnownBits::countMinSignBits() to simplify the end of ComputeNumSignBits. This matches what is done in ValueTracking.cpp Reviewed By: RKSimon, foad Differential Revision: https://reviews.llvm.org/D116423 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 2ae0d4df7b77..d14647d0eb0a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4294,21 +4294,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. KnownBits Known = computeKnownBits(Op, DemandedElts, Depth); - - APInt Mask; - if (Known.isNonNegative()) { // sign bit is 0 - Mask = Known.Zero; - } else if (Known.isNegative()) { // sign bit is 1; - Mask = Known.One; - } else { - // Nothing known. - return FirstAnswer; - } - - // Okay, we know that the sign bit in Mask is set. Use CLO to determine - // the number of identical bits in the top of the input value. - Mask <<= Mask.getBitWidth()-VTBits; - return std::max(FirstAnswer, Mask.countLeadingOnes()); + return std::max(FirstAnswer, Known.countMinSignBits()); } unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, unsigned Depth) const { From a9f13f80658c20bfc1b41187cefc2e90fdc0fd6f Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sat, 1 Jan 2022 01:39:04 +0000 Subject: [PATCH 294/992] Fix a few unitialized class members in MLIR (NFC) Flagged by Coverity. --- mlir/include/mlir/Analysis/AffineAnalysis.h | 2 +- mlir/include/mlir/Analysis/Liveness.h | 2 +- mlir/include/mlir/Analysis/NestedMatcher.h | 2 +- mlir/include/mlir/Analysis/Utils.h | 2 +- mlir/include/mlir/IR/AffineMap.h | 6 +++--- mlir/include/mlir/IR/AttributeSupport.h | 2 +- mlir/include/mlir/Reducer/ReductionNode.h | 8 ++++---- mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp | 2 +- mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp | 2 +- mlir/lib/Pass/PassCrashRecovery.cpp | 4 ++-- mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp | 2 +- mlir/lib/Target/SPIRV/Deserialization/Deserializer.h | 2 +- mlir/lib/Transforms/BufferOptimizations.cpp | 2 +- mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp | 4 ++-- 14 files changed, 21 insertions(+), 21 deletions(-) diff --git a/mlir/include/mlir/Analysis/AffineAnalysis.h b/mlir/include/mlir/Analysis/AffineAnalysis.h index fa793a9e17f8..4c10bed1690c 100644 --- a/mlir/include/mlir/Analysis/AffineAnalysis.h +++ b/mlir/include/mlir/Analysis/AffineAnalysis.h @@ -137,7 +137,7 @@ struct MemRefAccess { // lb < ub. Note that ub/lb == None means unbounded. struct DependenceComponent { // The AffineForOp Operation associated with this dependence component. - Operation *op; + Operation *op = nullptr; // The lower bound of the dependence distance. Optional lb; // The upper bound of the dependence distance (inclusive). diff --git a/mlir/include/mlir/Analysis/Liveness.h b/mlir/include/mlir/Analysis/Liveness.h index 74891520139d..3b2050691430 100644 --- a/mlir/include/mlir/Analysis/Liveness.h +++ b/mlir/include/mlir/Analysis/Liveness.h @@ -131,7 +131,7 @@ class LivenessBlockInfo { private: /// The underlying block. - Block *block; + Block *block = nullptr; /// The set of all live in values. ValueSetT inValues; diff --git a/mlir/include/mlir/Analysis/NestedMatcher.h b/mlir/include/mlir/Analysis/NestedMatcher.h index f0a7088bd8fc..fb725e3ec864 100644 --- a/mlir/include/mlir/Analysis/NestedMatcher.h +++ b/mlir/include/mlir/Analysis/NestedMatcher.h @@ -65,7 +65,7 @@ class NestedMatch { NestedMatch() = default; /// Payload, holds a NestedMatch and all its children along this branch. - Operation *matchedOperation; + Operation *matchedOperation = nullptr; ArrayRef matchedChildren; }; diff --git a/mlir/include/mlir/Analysis/Utils.h b/mlir/include/mlir/Analysis/Utils.h index 7c5b5b1d2aa0..ee231e1713e0 100644 --- a/mlir/include/mlir/Analysis/Utils.h +++ b/mlir/include/mlir/Analysis/Utils.h @@ -333,7 +333,7 @@ struct MemRefRegion { Value memref; /// Read or write. - bool write; + bool write = false; /// If there is more than one load/store op associated with the region, the /// location information would correspond to one of those op's. diff --git a/mlir/include/mlir/IR/AffineMap.h b/mlir/include/mlir/IR/AffineMap.h index 14bddf5794b5..28a4261b9b80 100644 --- a/mlir/include/mlir/IR/AffineMap.h +++ b/mlir/include/mlir/IR/AffineMap.h @@ -353,11 +353,11 @@ struct MutableAffineMap { private: // Same meaning as AffineMap's fields. SmallVector results; - unsigned numDims; - unsigned numSymbols; + unsigned numDims = 0; + unsigned numSymbols = 0; /// A pointer to the IR's context to store all newly created /// AffineExprStorage's. - MLIRContext *context; + MLIRContext *context = nullptr; }; /// Simplifies an affine map by simplifying its underlying AffineExpr results. diff --git a/mlir/include/mlir/IR/AttributeSupport.h b/mlir/include/mlir/IR/AttributeSupport.h index 97e120208d39..9745207fd2ef 100644 --- a/mlir/include/mlir/IR/AttributeSupport.h +++ b/mlir/include/mlir/IR/AttributeSupport.h @@ -152,7 +152,7 @@ class alignas(8) AttributeStorage : public StorageUniquer::BaseStorage { Type type; /// The abstract descriptor for this attribute. - const AbstractAttribute *abstractAttribute; + const AbstractAttribute *abstractAttribute = nullptr; }; /// Default storage type for attributes that require no additional diff --git a/mlir/include/mlir/Reducer/ReductionNode.h b/mlir/include/mlir/Reducer/ReductionNode.h index c938a1b5453d..442bc59589f3 100644 --- a/mlir/include/mlir/Reducer/ReductionNode.h +++ b/mlir/include/mlir/Reducer/ReductionNode.h @@ -145,19 +145,19 @@ class ReductionNode { OwningOpRef module; /// The region of certain operation we're reducing in the module - Region *region; + Region *region = nullptr; /// The node we are reduced from. It means we will be in variants of parent /// node. - ReductionNode *parent; + ReductionNode *parent = nullptr; /// The size of module after applying the reducer patterns with range /// constraints. This is only valid while the interestingness has been tested. - size_t size; + size_t size = 0; /// This is true if the module has been evaluated and it exhibits the /// interesting behavior. - Tester::Interestingness interesting; + Tester::Interestingness interesting = Tester::Interestingness::Untested; /// `ranges` represents the selected subset of operations in the region. We /// implicitly number each operation in the region and ReductionTreePass will diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp index 7ecc6750bcca..036cc6b03f76 100644 --- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp @@ -755,7 +755,7 @@ struct VectorizationState { DenseMap vecLoopToMask; // The strategy drives which loop to vectorize by which amount. - const VectorizationStrategy *strategy; + const VectorizationStrategy *strategy = nullptr; private: /// Internal implementation to map input scalar values to new vector or scalar diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp index 05b5dafb8a68..b66f569c352d 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp @@ -773,7 +773,7 @@ struct spirv::detail::StructTypeStorage : public TypeStorage { /// in order to mutate the storage object providing the actual content. StructTypeStorage(StringRef identifier) : memberTypesAndIsBodySet(nullptr, false), offsetInfo(nullptr), - numMemberDecorations(0), memberDecorationsInfo(nullptr), + numMembers(0), numMemberDecorations(0), memberDecorationsInfo(nullptr), identifier(identifier) {} /// Construct a storage object for a literal struct type. A struct type diff --git a/mlir/lib/Pass/PassCrashRecovery.cpp b/mlir/lib/Pass/PassCrashRecovery.cpp index ea642ce44f5d..d2d646dc7a32 100644 --- a/mlir/lib/Pass/PassCrashRecovery.cpp +++ b/mlir/lib/Pass/PassCrashRecovery.cpp @@ -180,7 +180,7 @@ struct PassCrashReproducerGenerator::Impl { /// Flag indicating if reproducer generation should be localized to the /// failing pass. - bool localReproducer; + bool localReproducer = false; /// A record of all of the currently active reproducer contexts. SmallVector> activeContexts; @@ -190,7 +190,7 @@ struct PassCrashReproducerGenerator::Impl { SetVector> runningPasses; /// Various pass manager flags that get emitted when generating a reproducer. - bool pmFlagVerifyPasses; + bool pmFlagVerifyPasses = false; }; PassCrashReproducerGenerator::PassCrashReproducerGenerator( diff --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp index 65f0fc9b3849..366a3d7ce24a 100644 --- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp @@ -105,7 +105,7 @@ class Importer { /// The current module being created. ModuleOp module; /// The entry block of the current function being processed. - Block *currentEntryBlock; + Block *currentEntryBlock = nullptr; /// Globals are inserted before the first function, if any. Block::iterator getGlobalInsertPt() { diff --git a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h index 402bfcbae1d9..d5a9e9913851 100644 --- a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h +++ b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h @@ -500,7 +500,7 @@ class Deserializer { OpBuilder opBuilder; - spirv::Version version; + spirv::Version version = spirv::Version::V_1_0; /// The list of capabilities used by the module. llvm::SmallSetVector capabilities; diff --git a/mlir/lib/Transforms/BufferOptimizations.cpp b/mlir/lib/Transforms/BufferOptimizations.cpp index 27e00a14c0d4..9421e85193b1 100644 --- a/mlir/lib/Transforms/BufferOptimizations.cpp +++ b/mlir/lib/Transforms/BufferOptimizations.cpp @@ -284,7 +284,7 @@ struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase { using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase; /// Remembers the dominator block of all aliases. - Block *aliasDominatorBlock; + Block *aliasDominatorBlock = nullptr; /// Computes the upper bound for the placement block search. Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) { diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp index 22b9b36c908d..8042ecc6fe96 100644 --- a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp @@ -265,8 +265,8 @@ class AttrOrTypeFormat { std::vector> elements; /// Flags for printing spaces. - bool shouldEmitSpace; - bool lastWasPunctuation; + bool shouldEmitSpace = false; + bool lastWasPunctuation = false; }; } // namespace From 36a6e56bff7759f14160ad7413b69cb1eadc2fc2 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sat, 1 Jan 2022 01:42:26 +0000 Subject: [PATCH 295/992] Fix possible memory leak in a MLIR unit-test Flagged by Coverity --- mlir/test/CAPI/ir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mlir/test/CAPI/ir.c b/mlir/test/CAPI/ir.c index ac74c5748286..51e0c2b4bcd2 100644 --- a/mlir/test/CAPI/ir.c +++ b/mlir/test/CAPI/ir.c @@ -276,8 +276,10 @@ int collectStats(MlirOperation operation) { do { int retval = collectStatsSingle(head, &stats); - if (retval) + if (retval) { + free(head); return retval; + } OpListNode *next = head->next; free(head); head = next; From bb6109aae6b47ce56388c9c426c959c87b6a44d5 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sat, 1 Jan 2022 01:56:50 +0000 Subject: [PATCH 296/992] Pass the LLVMTypeConverter by reference in MemRefBuilder (NFC) This is a fairly large structure (952B according to Coverity), it was already passed by reference in most places but not consistently. --- mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h | 6 +++--- mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h b/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h index f9eede519ca0..8f755d5a1cb3 100644 --- a/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h +++ b/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h @@ -223,7 +223,7 @@ class UnrankedMemRefDescriptor : public StructBuilder { Value index); /// Builds IR inserting the size[index] into the descriptor. static void setSize(OpBuilder &builder, Location loc, - LLVMTypeConverter typeConverter, Value sizeBasePtr, + LLVMTypeConverter &typeConverter, Value sizeBasePtr, Value index, Value size); /// Builds IR extracting the pointer to the first element of the stride array. @@ -232,11 +232,11 @@ class UnrankedMemRefDescriptor : public StructBuilder { Value sizeBasePtr, Value rank); /// Builds IR extracting the stride[index] from the descriptor. static Value stride(OpBuilder &builder, Location loc, - LLVMTypeConverter typeConverter, Value strideBasePtr, + LLVMTypeConverter &typeConverter, Value strideBasePtr, Value index, Value stride); /// Builds IR inserting the stride[index] into the descriptor. static void setStride(OpBuilder &builder, Location loc, - LLVMTypeConverter typeConverter, Value strideBasePtr, + LLVMTypeConverter &typeConverter, Value strideBasePtr, Value index, Value stride); }; diff --git a/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp b/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp index 10ce877e24fa..edb43b3cce37 100644 --- a/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp +++ b/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp @@ -487,7 +487,7 @@ Value UnrankedMemRefDescriptor::size(OpBuilder &builder, Location loc, } void UnrankedMemRefDescriptor::setSize(OpBuilder &builder, Location loc, - LLVMTypeConverter typeConverter, + LLVMTypeConverter &typeConverter, Value sizeBasePtr, Value index, Value size) { Type indexPtrTy = LLVM::LLVMPointerType::get(typeConverter.getIndexType()); @@ -505,7 +505,7 @@ Value UnrankedMemRefDescriptor::strideBasePtr(OpBuilder &builder, Location loc, } Value UnrankedMemRefDescriptor::stride(OpBuilder &builder, Location loc, - LLVMTypeConverter typeConverter, + LLVMTypeConverter &typeConverter, Value strideBasePtr, Value index, Value stride) { Type indexPtrTy = LLVM::LLVMPointerType::get(typeConverter.getIndexType()); @@ -515,7 +515,7 @@ Value UnrankedMemRefDescriptor::stride(OpBuilder &builder, Location loc, } void UnrankedMemRefDescriptor::setStride(OpBuilder &builder, Location loc, - LLVMTypeConverter typeConverter, + LLVMTypeConverter &typeConverter, Value strideBasePtr, Value index, Value stride) { Type indexPtrTy = LLVM::LLVMPointerType::get(typeConverter.getIndexType()); From 07b264d1f02f4ee3a8db30f2a146c455cdccb751 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sat, 1 Jan 2022 02:01:41 +0000 Subject: [PATCH 297/992] Pass the LLVMTypeConverter by reference in UnrankedMemRefBuilder (NFC) This is a fairly large structure (952B according to Coverity), it was already passed by reference in most places but not consistently. --- mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h | 2 +- mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h b/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h index 8f755d5a1cb3..26c9df2d6b2d 100644 --- a/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h +++ b/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h @@ -219,7 +219,7 @@ class UnrankedMemRefDescriptor : public StructBuilder { LLVM::LLVMPointerType elemPtrPtrType); /// Builds IR extracting the size[index] from the descriptor. static Value size(OpBuilder &builder, Location loc, - LLVMTypeConverter typeConverter, Value sizeBasePtr, + LLVMTypeConverter &typeConverter, Value sizeBasePtr, Value index); /// Builds IR inserting the size[index] into the descriptor. static void setSize(OpBuilder &builder, Location loc, diff --git a/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp b/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp index edb43b3cce37..4f78461572f3 100644 --- a/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp +++ b/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp @@ -478,7 +478,7 @@ Value UnrankedMemRefDescriptor::sizeBasePtr( } Value UnrankedMemRefDescriptor::size(OpBuilder &builder, Location loc, - LLVMTypeConverter typeConverter, + LLVMTypeConverter &typeConverter, Value sizeBasePtr, Value index) { Type indexPtrTy = LLVM::LLVMPointerType::get(typeConverter.getIndexType()); Value sizeStoreGep = builder.create(loc, indexPtrTy, sizeBasePtr, From ed56007ac53beb14924afa8429718a7ee9930781 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Sat, 1 Jan 2022 02:17:49 +0000 Subject: [PATCH 298/992] [gn build] Port 2edcde00cb39 --- llvm/utils/gn/secondary/llvm/lib/Target/Mips/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/Mips/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/Mips/BUILD.gn index 6030476c7d2b..055f2ae5aaae 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/Mips/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/Mips/BUILD.gn @@ -85,6 +85,7 @@ static_library("LLVMMipsCodeGen") { "MipsMCInstLower.cpp", "MipsMachineFunction.cpp", "MipsModuleISelDAGToDAG.cpp", + "MipsMulMulBugPass.cpp", "MipsOptimizePICCall.cpp", "MipsOs16.cpp", "MipsPreLegalizerCombiner.cpp", From eb6b2efe4e953afc4b3befedebbf37d4175e8f48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20B=C3=B6ck?= Date: Sat, 1 Jan 2022 14:52:32 +0100 Subject: [PATCH 299/992] [mlir][NFC] Fully qualify use of SmallVector in generated C++ code of mlir-tblgen --- mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index 2f9f079da8c9..484bb230e6b0 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -1756,7 +1756,7 @@ void OpEmitter::genCodeForAddingArgAndRegionForBuilder( // Add the segment attribute. body << " {\n" - << " SmallVector rangeSegments;\n" + << " ::llvm::SmallVector rangeSegments;\n" << " for (::mlir::ValueRange range : " << argName << ")\n" << " rangeSegments.push_back(range.size());\n" << " " << builderOpState << ".addAttribute(" From 7305798049112496323773335a503b694ff36e5b Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 28 Dec 2021 18:31:41 +0100 Subject: [PATCH 300/992] [VPlan] Remove VPWidenPHIRecipe constructor without start value (NFC). This was suggested as a separate cleanup in recent reviews. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 11 +++++++---- llvm/lib/Transforms/Vectorize/VPlan.h | 10 ++++------ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 828f1f3e107b..4b588109bcda 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4612,7 +4612,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, Type *PhiType = II.getStep()->getType(); // Build a pointer phi - Value *ScalarStartValue = II.getStartValue(); + Value *ScalarStartValue = PhiR->getStartValue()->getLiveInIRValue(); Type *ScStValueType = ScalarStartValue->getType(); PHINode *NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi", Induction); @@ -8874,11 +8874,14 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch()))); PhisToFix.push_back(PhiRecipe); } else { - // TODO: record start and backedge value for remaining pointer induction - // phis. + // TODO: record backedge value for remaining pointer induction phis. assert(Phi->getType()->isPointerTy() && "only pointer phis should be handled here"); - PhiRecipe = new VPWidenPHIRecipe(Phi); + assert(Legal->getInductionVars().count(Phi) && + "Not an induction variable"); + InductionDescriptor II = Legal->getInductionVars().lookup(Phi); + VPValue *Start = Plan->getOrAddVPValue(II.getStartValue()); + PhiRecipe = new VPWidenPHIRecipe(Phi, Start); } return toVPRecipeResult(PhiRecipe); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 96de9114b618..a8102c0b07b8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1118,13 +1118,11 @@ class VPWidenPHIRecipe : public VPHeaderPHIRecipe { SmallVector IncomingBlocks; public: - /// Create a VPWidenPHIRecipe for \p Phi - VPWidenPHIRecipe(PHINode *Phi) - : VPHeaderPHIRecipe(VPVWidenPHISC, VPWidenPHISC, Phi) {} - /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start. - VPWidenPHIRecipe(PHINode *Phi, VPValue &Start) : VPWidenPHIRecipe(Phi) { - addOperand(&Start); + VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr) + : VPHeaderPHIRecipe(VPVWidenPHISC, VPWidenPHISC, Phi) { + if (Start) + addOperand(Start); } ~VPWidenPHIRecipe() override = default; From f85c91f1e512bd79959b6fc74294148d16ef34e0 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jan 2022 08:45:35 -0800 Subject: [PATCH 301/992] [Transforms] Remove unused forward declarations (NFC) --- llvm/include/llvm/Transforms/IPO/Attributor.h | 1 - llvm/include/llvm/Transforms/IPO/ModuleInliner.h | 3 --- llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h | 1 - llvm/include/llvm/Transforms/Scalar/GVN.h | 2 -- llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h | 2 -- llvm/include/llvm/Transforms/Scalar/LoopReroll.h | 2 -- llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h | 1 - llvm/include/llvm/Transforms/Scalar/SCCP.h | 2 -- llvm/include/llvm/Transforms/Scalar/SROA.h | 1 - llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h | 2 -- llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h | 1 - llvm/include/llvm/Transforms/Utils/Cloning.h | 2 -- llvm/include/llvm/Transforms/Utils/CodeLayout.h | 2 -- llvm/include/llvm/Transforms/Utils/CtorUtils.h | 1 - llvm/include/llvm/Transforms/Utils/Local.h | 2 -- llvm/include/llvm/Transforms/Utils/LoopUtils.h | 1 - llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h | 2 -- llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h | 2 -- 18 files changed, 30 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 884f8191368c..1a9dde03aabc 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -133,7 +133,6 @@ struct InformationCache; struct AAIsDead; struct AttributorCallGraph; -class AAManager; class AAResults; class Function; diff --git a/llvm/include/llvm/Transforms/IPO/ModuleInliner.h b/llvm/include/llvm/Transforms/IPO/ModuleInliner.h index 963d74d71003..7474e48aafaf 100644 --- a/llvm/include/llvm/Transforms/IPO/ModuleInliner.h +++ b/llvm/include/llvm/Transforms/IPO/ModuleInliner.h @@ -18,9 +18,6 @@ namespace llvm { -class AssumptionCacheTracker; -class ProfileSummaryInfo; - /// The module inliner pass for the new pass manager. /// /// This pass wires together the inlining utilities and the inline cost diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h index 7f321a688aff..3b944878a810 100644 --- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -24,7 +24,6 @@ namespace llvm { class ModuleSummaryIndex; class Pass; class TargetLibraryInfoImpl; -class TargetMachine; // The old pass manager infrastructure is hidden in a legacy namespace now. namespace legacy { diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h index cbe5057b9cde..9e660c92124e 100644 --- a/llvm/include/llvm/Transforms/Scalar/GVN.h +++ b/llvm/include/llvm/Transforms/Scalar/GVN.h @@ -39,11 +39,9 @@ class AssumptionCache; class BasicBlock; class BranchInst; class CallInst; -class Constant; class ExtractValueInst; class Function; class FunctionPass; -class IntrinsicInst; class LoadInst; class LoopInfo; class MemDepResult; diff --git a/llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h b/llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h index f5781e085f7b..09a4a95401d8 100644 --- a/llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h +++ b/llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h @@ -18,8 +18,6 @@ namespace llvm { -class FunctionPass; - /// Run instruction simplification across each instruction in the function. /// /// Instruction simplification has useful constraints in some contexts: diff --git a/llvm/include/llvm/Transforms/Scalar/LoopReroll.h b/llvm/include/llvm/Transforms/Scalar/LoopReroll.h index 6ae309e48a28..496e8df85ea0 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopReroll.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopReroll.h @@ -14,8 +14,6 @@ namespace llvm { -class Function; - class LoopRerollPass : public PassInfoMixin { public: PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, diff --git a/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h b/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h index 6125fc7636a0..72663d3d62a8 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h @@ -13,7 +13,6 @@ #include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { -class Function; /// A simple loop rotation transformation. class LoopUnrollAndJamPass : public PassInfoMixin { diff --git a/llvm/include/llvm/Transforms/Scalar/SCCP.h b/llvm/include/llvm/Transforms/Scalar/SCCP.h index 2d7c94918699..cd4100447880 100644 --- a/llvm/include/llvm/Transforms/Scalar/SCCP.h +++ b/llvm/include/llvm/Transforms/Scalar/SCCP.h @@ -32,8 +32,6 @@ namespace llvm { -class PostDominatorTree; - /// This pass performs function-level constant propagation and merging. class SCCPPass : public PassInfoMixin { public: diff --git a/llvm/include/llvm/Transforms/Scalar/SROA.h b/llvm/include/llvm/Transforms/Scalar/SROA.h index f1a43435d89a..b74c45e71d95 100644 --- a/llvm/include/llvm/Transforms/Scalar/SROA.h +++ b/llvm/include/llvm/Transforms/Scalar/SROA.h @@ -27,7 +27,6 @@ class AllocaInst; class AssumptionCache; class DominatorTree; class Function; -class Instruction; class LLVMContext; class PHINode; class SelectInst; diff --git a/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h b/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h index 2d5942a3f569..04a5f7e6ff38 100644 --- a/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h +++ b/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h @@ -17,8 +17,6 @@ namespace llvm { class Function; -class Loop; -class LPMUpdater; // New pass manager boilerplate. class WarnMissedTransformationsPass diff --git a/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h b/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h index 5c06af9bc84c..a497722eece6 100644 --- a/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h +++ b/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h @@ -22,7 +22,6 @@ #include "llvm/IR/PassManager.h" namespace llvm { -class IntrinsicInst; class AssumptionCache; class DominatorTree; diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index 5a1f322b2054..fdc55bea99e7 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -33,13 +33,11 @@ class AAResults; class AllocaInst; class BasicBlock; class BlockFrequencyInfo; -class CallInst; class CallGraph; class DebugInfoFinder; class DominatorTree; class Function; class Instruction; -class InvokeInst; class Loop; class LoopInfo; class Module; diff --git a/llvm/include/llvm/Transforms/Utils/CodeLayout.h b/llvm/include/llvm/Transforms/Utils/CodeLayout.h index 987a5651a8b6..a0e5f8c7d014 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeLayout.h +++ b/llvm/include/llvm/Transforms/Utils/CodeLayout.h @@ -20,8 +20,6 @@ namespace llvm { -class MachineBasicBlock; - /// Find a layout of nodes (basic blocks) of a given CFG optimizing jump /// locality and thus processor I-cache utilization. This is achieved via /// increasing the number of fall-through jumps and co-locating frequently diff --git a/llvm/include/llvm/Transforms/Utils/CtorUtils.h b/llvm/include/llvm/Transforms/Utils/CtorUtils.h index 3625ee662b1c..3ef3ba244b43 100644 --- a/llvm/include/llvm/Transforms/Utils/CtorUtils.h +++ b/llvm/include/llvm/Transforms/Utils/CtorUtils.h @@ -17,7 +17,6 @@ namespace llvm { -class GlobalVariable; class Function; class Module; diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index a914c6e0925f..8a6f172b78d4 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -42,9 +42,7 @@ class BasicBlock; class BranchInst; class CallBase; class CallInst; -class DbgDeclareInst; class DbgVariableIntrinsic; -class DbgValueInst; class DIBuilder; class DomTreeUpdater; class Function; diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index e0a9115f61b0..3a712d78df67 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -37,7 +37,6 @@ class MemorySSAUpdater; class OptimizationRemarkEmitter; class PredIteratorCache; class ScalarEvolution; -class ScalarEvolutionExpander; class SCEV; class SCEVExpander; class TargetLibraryInfo; diff --git a/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h b/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h index e5f8a46eaf23..8dc0f1e26a92 100644 --- a/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h +++ b/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h @@ -27,8 +27,6 @@ class Instruction; class IntrinsicInst; class Value; class OptimizationRemarkEmitter; -class OptimizationRemarkMissed; -class OptimizationRemarkAnalysis; class StoreInst; // FIXME: Once we get to more remarks like this one, we need to re-evaluate how diff --git a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h index 20b360212506..461669d6a217 100644 --- a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h +++ b/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h @@ -19,8 +19,6 @@ namespace llvm { -class BasicBlock; - class UnifyFunctionExitNodesLegacyPass : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid From 24d240558811604354a8d6080405f6bad8d15b5c Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jan 2022 08:54:05 -0800 Subject: [PATCH 302/992] [lldb] Use nullptr instead of 0 or NULL (NFC) Identified with modernize-use-nullptr. --- lldb/include/lldb/Host/HostNativeThreadBase.h | 2 +- lldb/source/API/SystemInitializerFull.cpp | 2 +- lldb/source/Host/common/HostNativeThreadBase.cpp | 6 +++--- .../Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 10 +++++----- .../ObjectFile/Minidump/MinidumpFileBuilder.cpp | 5 +++-- .../Process/Utility/RegisterInfoPOSIX_arm64.cpp | 4 ++-- .../Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp | 11 ++++++----- lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h | 4 ++-- lldb/source/Symbol/Symbol.cpp | 2 +- 9 files changed, 24 insertions(+), 22 deletions(-) diff --git a/lldb/include/lldb/Host/HostNativeThreadBase.h b/lldb/include/lldb/Host/HostNativeThreadBase.h index bfd70d745593..57707f07cea8 100644 --- a/lldb/include/lldb/Host/HostNativeThreadBase.h +++ b/lldb/include/lldb/Host/HostNativeThreadBase.h @@ -46,7 +46,7 @@ class HostNativeThreadBase { ThreadCreateTrampoline(lldb::thread_arg_t arg); lldb::thread_t m_thread = LLDB_INVALID_HOST_THREAD; - lldb::thread_result_t m_result = 0; + lldb::thread_result_t m_result = nullptr; }; } diff --git a/lldb/source/API/SystemInitializerFull.cpp b/lldb/source/API/SystemInitializerFull.cpp index b01cb2ff545b..d662d370f813 100644 --- a/lldb/source/API/SystemInitializerFull.cpp +++ b/lldb/source/API/SystemInitializerFull.cpp @@ -39,7 +39,7 @@ constexpr lldb_private::HostInfo::SharedLibraryDirectoryHelper #else constexpr lldb_private::HostInfo::SharedLibraryDirectoryHelper - *g_shlib_dir_helper = 0; + *g_shlib_dir_helper = nullptr; #endif using namespace lldb_private; diff --git a/lldb/source/Host/common/HostNativeThreadBase.cpp b/lldb/source/Host/common/HostNativeThreadBase.cpp index b15160b143ca..e7966d9ebaa5 100644 --- a/lldb/source/Host/common/HostNativeThreadBase.cpp +++ b/lldb/source/Host/common/HostNativeThreadBase.cpp @@ -18,7 +18,7 @@ using namespace lldb; using namespace lldb_private; HostNativeThreadBase::HostNativeThreadBase(thread_t thread) - : m_thread(thread), m_result(0) {} + : m_thread(thread), m_result(nullptr) {} lldb::thread_t HostNativeThreadBase::GetSystemHandle() const { return m_thread; @@ -34,7 +34,7 @@ bool HostNativeThreadBase::IsJoinable() const { void HostNativeThreadBase::Reset() { m_thread = LLDB_INVALID_HOST_THREAD; - m_result = 0; + m_result = nullptr; } bool HostNativeThreadBase::EqualsThread(lldb::thread_t thread) const { @@ -44,7 +44,7 @@ bool HostNativeThreadBase::EqualsThread(lldb::thread_t thread) const { lldb::thread_t HostNativeThreadBase::Release() { lldb::thread_t result = m_thread; m_thread = LLDB_INVALID_HOST_THREAD; - m_result = 0; + m_result = nullptr; return result; } diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index e72d55dd2aba..30d7d239834b 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -768,9 +768,9 @@ class RegisterContextDarwin_arm64_Mach : public RegisterContextDarwin_arm64 { // Write out the EXC registers data.PutHex32(EXCRegSet); data.PutHex32(EXCWordCount); - PrintRegisterValue(reg_ctx, "far", NULL, 8, data); - PrintRegisterValue(reg_ctx, "esr", NULL, 4, data); - PrintRegisterValue(reg_ctx, "exception", NULL, 4, data); + PrintRegisterValue(reg_ctx, "far", nullptr, 8, data); + PrintRegisterValue(reg_ctx, "esr", nullptr, 4, data); + PrintRegisterValue(reg_ctx, "exception", nullptr, 4, data); return true; } return false; @@ -5073,7 +5073,7 @@ void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header, lldb::offset_t offset = lc_offset; for (uint32_t i = 0; i < header.ncmds; ++i) { const lldb::offset_t cmd_offset = offset; - if (data.GetU32(&offset, &load_cmd, 2) == NULL) + if (data.GetU32(&offset, &load_cmd, 2) == nullptr) break; llvm::MachO::version_min_command version_min; @@ -5123,7 +5123,7 @@ void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header, offset = lc_offset; for (uint32_t i = 0; i < header.ncmds; ++i) { const lldb::offset_t cmd_offset = offset; - if (data.GetU32(&offset, &load_cmd, 2) == NULL) + if (data.GetU32(&offset, &load_cmd, 2) == nullptr) break; do { diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp index ef419d9a89e8..516bcb21b019 100644 --- a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp +++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp @@ -272,7 +272,8 @@ Status MinidumpFileBuilder::AddModuleList(Target &target) { mod->GetObjectFile()->GetBaseAddress().GetLoadAddress(&target)); m.SizeOfImage = static_cast(mod_size); m.Checksum = static_cast(0); - m.TimeDateStamp = static_cast(std::time(0)); + m.TimeDateStamp = + static_cast(std::time(nullptr)); m.ModuleNameRVA = static_cast( size_before + module_stream_size + helper_data.GetByteSize()); m.VersionInfo = info; @@ -719,7 +720,7 @@ Status MinidumpFileBuilder::Dump(lldb::FileUP &core_file) const { header.Checksum = static_cast( 0u), // not used in most of the writers header.TimeDateStamp = - static_cast(std::time(0)); + static_cast(std::time(nullptr)); header.Flags = static_cast(0u); // minidump normal flag diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp index 6c130be7b741..d6c4a8687ec5 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp @@ -178,10 +178,10 @@ static const lldb_private::RegisterSet g_reg_sets_arm64[k_num_register_sets] = { g_sve_regnums_arm64}}; static const lldb_private::RegisterSet g_reg_set_pauth_arm64 = { - "Pointer Authentication Registers", "pauth", k_num_pauth_register, NULL}; + "Pointer Authentication Registers", "pauth", k_num_pauth_register, nullptr}; static const lldb_private::RegisterSet g_reg_set_mte_arm64 = { - "MTE Control Register", "mte", k_num_mte_register, NULL}; + "MTE Control Register", "mte", k_num_mte_register, nullptr}; RegisterInfoPOSIX_arm64::RegisterInfoPOSIX_arm64( const lldb_private::ArchSpec &target_arch, lldb_private::Flags opt_regsets) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index b90f104c4d21..be555c130bfe 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -933,7 +933,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, DW_TAG_value_to_name(tag), type_name_cstr); CompilerType return_clang_type; - Type *func_type = NULL; + Type *func_type = nullptr; if (attrs.type.IsValid()) func_type = dwarf->ResolveTypeUID(attrs.type.Reference(), true); @@ -1027,7 +1027,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, class_opaque_type, attrs.name.GetCString(), clang_type, attrs.accessibility, attrs.is_artificial, is_variadic, attrs.is_objc_direct_call); - type_handled = objc_method_decl != NULL; + type_handled = objc_method_decl != nullptr; if (type_handled) { LinkDeclContextToDIE(objc_method_decl, die); m_ast.SetMetadataAsUserID(objc_method_decl, die.GetID()); @@ -1178,7 +1178,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, is_static, attrs.is_inline, attrs.is_explicit, is_attr_used, attrs.is_artificial); - type_handled = cxx_method_decl != NULL; + type_handled = cxx_method_decl != nullptr; // Artificial methods are always handled even when we // don't create a new declaration for them. type_handled |= attrs.is_artificial; @@ -2036,7 +2036,7 @@ bool DWARFASTParserClang::ParseTemplateDIE( if (name && name[0]) template_param_infos.names.push_back(name); else - template_param_infos.names.push_back(NULL); + template_param_infos.names.push_back(nullptr); // Get the signed value for any integer or enumeration if available clang_type.IsIntegerOrEnumerationType(is_signed); @@ -3336,7 +3336,8 @@ DWARFASTParserClang::GetOwningClangModule(const DWARFDIE &die) { auto it = m_die_to_module.find(module_die.GetDIE()); if (it != m_die_to_module.end()) return it->second; - const char *name = module_die.GetAttributeValueAsString(DW_AT_name, 0); + const char *name = + module_die.GetAttributeValueAsString(DW_AT_name, nullptr); if (!name) return {}; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index 71d4c1e6c52f..2457e8276e20 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -269,7 +269,7 @@ class DWARFUnit : public lldb_private::UserID { ExtractUnitDIENoDwoIfNeeded(); // m_first_die_mutex is not required as m_first_die is never cleared. if (!m_first_die) - return NULL; + return nullptr; return &m_first_die; } @@ -277,7 +277,7 @@ class DWARFUnit : public lldb_private::UserID { const DWARFDebugInfoEntry *DIEPtr() { ExtractDIEsIfNeeded(); if (m_die_array.empty()) - return NULL; + return nullptr; return &m_die_array[0]; } diff --git a/lldb/source/Symbol/Symbol.cpp b/lldb/source/Symbol/Symbol.cpp index fa7226dfd046..5ee5b0fe2223 100644 --- a/lldb/source/Symbol/Symbol.cpp +++ b/lldb/source/Symbol/Symbol.cpp @@ -680,7 +680,7 @@ void Symbol::Encode(DataEncoder &file, ConstStringTable &strtab) const { // symbol's base address doesn't have a section, then it is a constant value. // If it does have a section, we will encode the file address and re-resolve // the address when we decode it. - bool is_addr = m_addr_range.GetBaseAddress().GetSection().get() != NULL; + bool is_addr = m_addr_range.GetBaseAddress().GetSection().get() != nullptr; file.AppendU8(is_addr); file.AppendU64(m_addr_range.GetBaseAddress().GetFileAddress()); file.AppendU64(m_addr_range.GetByteSize()); From 63846a634d4a92ba256385d9bc96905f8c12f10e Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jan 2022 09:14:19 -0800 Subject: [PATCH 303/992] [mlir] Remove unused "using" (NFC) Identified by misc-unused-using-decls. --- mlir/lib/Analysis/AffineAnalysis.cpp | 2 -- mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp | 2 -- mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp | 2 -- mlir/lib/IR/MLIRContext.cpp | 2 -- mlir/lib/Parser/AffineParser.cpp | 2 -- mlir/lib/Parser/Lexer.cpp | 1 - mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp | 1 - mlir/tools/mlir-tblgen/DirectiveCommonGen.cpp | 1 - mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp | 1 - 9 files changed, 14 deletions(-) diff --git a/mlir/lib/Analysis/AffineAnalysis.cpp b/mlir/lib/Analysis/AffineAnalysis.cpp index c8022e046548..9f976a63c145 100644 --- a/mlir/lib/Analysis/AffineAnalysis.cpp +++ b/mlir/lib/Analysis/AffineAnalysis.cpp @@ -32,8 +32,6 @@ using namespace mlir; -using llvm::dbgs; - /// Get the value that is being reduced by `pos`-th reduction in the loop if /// such a reduction can be performed by affine parallel loops. This assumes /// floating-point operations are commutative. On success, `kind` will be the diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp index 90c9b0808f9b..0e0bc1ad48d1 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp @@ -38,8 +38,6 @@ using namespace mlir; using namespace mlir::linalg; -using llvm::dbgs; - /// Implements a simple high-level fusion pass on linalg structured operations. /// /// In each block, linalg ops are processed in reverse textual order. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 17e78d085d06..939400ed5027 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -38,8 +38,6 @@ using namespace mlir; using namespace mlir::linalg; -using llvm::dbgs; - #define DEBUG_TYPE "linalg-vectorization" #define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ") diff --git a/mlir/lib/IR/MLIRContext.cpp b/mlir/lib/IR/MLIRContext.cpp index 7e811316c4e6..f4ffc297de99 100644 --- a/mlir/lib/IR/MLIRContext.cpp +++ b/mlir/lib/IR/MLIRContext.cpp @@ -43,8 +43,6 @@ using namespace mlir; using namespace mlir::detail; -using llvm::hash_combine; - //===----------------------------------------------------------------------===// // MLIRContext CommandLine Options //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Parser/AffineParser.cpp b/mlir/lib/Parser/AffineParser.cpp index 1ba2ad5c7a2d..9bff88a7f167 100644 --- a/mlir/lib/Parser/AffineParser.cpp +++ b/mlir/lib/Parser/AffineParser.cpp @@ -17,9 +17,7 @@ using namespace mlir; using namespace mlir::detail; -using llvm::MemoryBuffer; using llvm::SMLoc; -using llvm::SourceMgr; namespace { diff --git a/mlir/lib/Parser/Lexer.cpp b/mlir/lib/Parser/Lexer.cpp index 56ffc90cf6e8..5a882d08594f 100644 --- a/mlir/lib/Parser/Lexer.cpp +++ b/mlir/lib/Parser/Lexer.cpp @@ -20,7 +20,6 @@ using namespace mlir; using llvm::SMLoc; -using llvm::SourceMgr; // Returns true if 'c' is an allowable punctuation character: [$._-] // Returns false otherwise. diff --git a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp index 7651e19b43c2..a07ada3c8e66 100644 --- a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp +++ b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp @@ -30,7 +30,6 @@ using namespace mlir; using llvm::yaml::Input; -using llvm::yaml::IO; using llvm::yaml::MappingTraits; using llvm::yaml::ScalarEnumerationTraits; using llvm::yaml::ScalarTraits; diff --git a/mlir/tools/mlir-tblgen/DirectiveCommonGen.cpp b/mlir/tools/mlir-tblgen/DirectiveCommonGen.cpp index aaad51794821..7cca598cc6ec 100644 --- a/mlir/tools/mlir-tblgen/DirectiveCommonGen.cpp +++ b/mlir/tools/mlir-tblgen/DirectiveCommonGen.cpp @@ -22,7 +22,6 @@ using llvm::Clause; using llvm::ClauseVal; using llvm::raw_ostream; using llvm::RecordKeeper; -using llvm::Twine; // LLVM has multiple places (Clang, Flang, MLIR) where information about // the directives (OpenMP/OpenACC), and clauses are needed. It is good software diff --git a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp index ad0b05e4c47b..683307229e13 100644 --- a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp +++ b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp @@ -40,7 +40,6 @@ using llvm::SmallVector; using llvm::SMLoc; using llvm::StringMap; using llvm::StringRef; -using llvm::Twine; using mlir::tblgen::Attribute; using mlir::tblgen::EnumAttr; using mlir::tblgen::EnumAttrCase; From 491b4e1faaaffbf966f94ed61aba338988f4f3cc Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jan 2022 09:14:21 -0800 Subject: [PATCH 304/992] [IR] Remove redundant return statements (NFC) Identified by readability-redundant-control-flow. --- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index f5bb64b0f911..90909582b2e5 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1359,7 +1359,6 @@ static void printAtomicReadOp(OpAsmPrinter &p, AtomicReadOp op) { if (op.hintAttr()) printSynchronizationHint(p << " ", op, op.hintAttr()); p << ": " << op.address().getType() << " -> " << op.getType(); - return; } /// Verifier for AtomicReadOp @@ -1409,7 +1408,6 @@ static void printAtomicWriteOp(OpAsmPrinter &p, AtomicWriteOp op) { if (op.hintAttr()) printSynchronizationHint(p, op, op.hintAttr()); p << ": " << op.address().getType() << ", " << op.value().getType(); - return; } /// Verifier for AtomicWriteOp From 683e6ee7d04f01e068b5d17b54ac2395fb6bf554 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jan 2022 09:14:23 -0800 Subject: [PATCH 305/992] [CodeGen] Remove redundant string initialization (NFC) Identified with readability-redundant-string-init. --- clang/lib/CodeGen/CodeGenModule.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index f1565511f98a..7cb6935afb20 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -311,7 +311,7 @@ class CodeGenModule : public CodeGenTypeCache { const TargetInfo &Target; std::unique_ptr ABI; llvm::LLVMContext &VMContext; - std::string ModuleNameHash = ""; + std::string ModuleNameHash; std::unique_ptr TBAA; From f1d562952ed4f0bf3d34691a948d0faa93083cde Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jan 2022 09:14:25 -0800 Subject: [PATCH 306/992] [Aarch64] Remove redundant declaration initializeSVEIntrinsicOptsPass (NFC) The function is declared in AArch64.h. Identified with readability-redundant-declaration. --- llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp index 642080a0d40d..4a24162540a5 100644 --- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp +++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp @@ -40,10 +40,6 @@ using namespace llvm::PatternMatch; #define DEBUG_TYPE "aarch64-sve-intrinsic-opts" -namespace llvm { -void initializeSVEIntrinsicOptsPass(PassRegistry &); -} - namespace { struct SVEIntrinsicOpts : public ModulePass { static char ID; // Pass identification, replacement for typeid From b3af9fbcc908c3a4b0fae721fa5102459070db83 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sat, 1 Jan 2022 17:51:16 +0000 Subject: [PATCH 307/992] Set the path to the shared cmake modules based on the llvm directory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It’s still possible to build parts of the main llvm build (lld, clang etc) by symlinking them into llvm/tools. Reviewed By: Ericson2314 Differential Revision: https://reviews.llvm.org/D116472 --- flang/CMakeLists.txt | 6 +++++- lld/CMakeLists.txt | 6 +++++- llvm/CMakeLists.txt | 1 + 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 64852fcd1a98..abb9a47d3abb 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -227,10 +227,14 @@ include_directories(BEFORE ${FLANG_BINARY_DIR}/include ${FLANG_SOURCE_DIR}/include) +if(NOT DEFINED LLVM_COMMON_CMAKE_UTILS) + set(LLVM_COMMON_CMAKE_UTILS ${FLANG_SOURCE_DIR}/../cmake) +endif() + # Add Flang-centric modules to cmake path. list(INSERT CMAKE_MODULE_PATH 0 "${FLANG_SOURCE_DIR}/cmake/modules" - "${FLANG_SOURCE_DIR}/../cmake/Modules" + "${LLVM_COMMON_CMAKE_UTILS}/Modules" ) include(AddFlang) diff --git a/lld/CMakeLists.txt b/lld/CMakeLists.txt index 00f8e1bb2a77..881f16cc0aba 100644 --- a/lld/CMakeLists.txt +++ b/lld/CMakeLists.txt @@ -153,10 +153,14 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR) "`CMakeFiles'. Please delete them.") endif() +if(NOT DEFINED LLVM_COMMON_CMAKE_UTILS) + set(LLVM_COMMON_CMAKE_UTILS ${LLD_SOURCE_DIR}/../cmake) +endif() + # Add path for custom modules. list(INSERT CMAKE_MODULE_PATH 0 "${LLD_SOURCE_DIR}/cmake/modules" - "${LLD_SOURCE_DIR}/../cmake/Modules" + "${LLVM_COMMON_CMAKE_UTILS}/Modules" ) include(AddLLD) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 06aca534bd74..5d6079c4c83d 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -308,6 +308,7 @@ set(LLVM_MAIN_INCLUDE_DIR ${LLVM_MAIN_SRC_DIR}/include ) # --includedir set(LLVM_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} ) # --prefix set(LLVM_THIRD_PARTY_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../third-party) +set(LLVM_COMMON_CMAKE_UTILS ${CMAKE_CURRENT_SOURCE_DIR}/../cmake) # Note: LLVM_CMAKE_DIR does not include generated files set(LLVM_CMAKE_DIR ${LLVM_MAIN_SRC_DIR}/cmake/modules) From b23669123afc3f68bbce7e746f0d92621dcb9be9 Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Sat, 1 Jan 2022 18:00:52 +0000 Subject: [PATCH 308/992] [docs] Mark @llvm.sideeffect() as willreturn Changed by https://reviews.llvm.org/D65455 --- llvm/docs/LangRef.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 46265e847eda..ce3af423f9f0 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -23103,7 +23103,7 @@ Syntax: :: - declare void @llvm.sideeffect() inaccessiblememonly nounwind + declare void @llvm.sideeffect() inaccessiblememonly nounwind willreturn Overview: """"""""" From dd2ad7fa47f8c0a186521bae42f4dcabf3710670 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jan 2022 10:14:05 -0800 Subject: [PATCH 309/992] [Target] Use range-based for loops (NFC) --- .../Target/Hexagon/HexagonISelLowering.cpp | 19 +++--- llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 6 +- .../lib/Target/Hexagon/HexagonSplitDouble.cpp | 3 +- .../Hexagon/HexagonVectorLoopCarriedReuse.cpp | 3 +- .../MCTargetDesc/HexagonMCShuffler.cpp | 7 +-- .../Hexagon/MCTargetDesc/HexagonShuffler.cpp | 6 +- llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp | 5 +- llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp | 4 +- .../Target/Mips/MipsConstantIslandPass.cpp | 61 +++++++++---------- 9 files changed, 52 insertions(+), 62 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 90dda37a886a..01fcbba7f8fb 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -543,9 +543,8 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // The Glue is necessary since all emitted instructions must be // stuck together. if (!CLI.IsTailCall) { - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, Glue); + for (const auto &R : RegsToPass) { + Chain = DAG.getCopyToReg(Chain, dl, R.first, R.second, Glue); Glue = Chain.getValue(1); } } else { @@ -560,9 +559,8 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // // Do not flag preceding copytoreg stuff together with the following stuff. Glue = SDValue(); - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, Glue); + for (const auto &R : RegsToPass) { + Chain = DAG.getCopyToReg(Chain, dl, R.first, R.second, Glue); Glue = Chain.getValue(1); } Glue = SDValue(); @@ -589,10 +587,8 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Add argument registers to the end of the list so that they are // known live into the call. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); - } + for (const auto &R : RegsToPass) + Ops.push_back(DAG.getRegister(R.first, R.second.getValueType())); const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); @@ -2204,8 +2200,7 @@ HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) // Express the shuffle mask in terms of bytes. SmallVector ByteMask; unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - int M = Mask[i]; + for (int M : Mask) { if (M < 0) { for (unsigned j = 0; j != ElemBytes; ++j) ByteMask.push_back(-1); diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 931b0c0e0090..fdf480722a2f 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -3486,9 +3486,9 @@ int HexagonInstrInfo::getDuplexOpcode(const MachineInstr &MI, if (Iter != DupMap.end()) return Iter->second; } else { // Conversion to Tiny core. - for (auto Iter = DupMap.begin(), End = DupMap.end(); Iter != End; ++Iter) - if (Iter->second == OpNum) - return Iter->first; + for (const auto &Iter : DupMap) + if (Iter.second == OpNum) + return Iter.first; } return -1; } diff --git a/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp b/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp index 9a0f57fce97d..9dedca4b1190 100644 --- a/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp @@ -578,8 +578,7 @@ void HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) { append_range(WorkQ, *WorkQ[i]); USet Rs; - for (unsigned i = 0, n = WorkQ.size(); i < n; ++i) { - MachineLoop *L = WorkQ[i]; + for (MachineLoop *L : WorkQ) { Rs.clear(); collectIndRegsForLoop(L, Rs); if (!Rs.empty()) diff --git a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp index f973862a0c9b..94b878e21f4d 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp @@ -659,8 +659,7 @@ void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() { delete D; } LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n"); - LLVM_DEBUG(for (size_t i = 0; i < Dependences.size(); - ++i) { dbgs() << *Dependences[i] << "\n"; }); + LLVM_DEBUG(for (const DepChain *D : Dependences) dbgs() << *D << "\n";); } Pass *llvm::createHexagonVectorLoopCarriedReuseLegacyPass() { diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp index d38b77b42fbc..0c15db08aaf6 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp @@ -81,10 +81,9 @@ void HexagonMCShuffler::copyTo(MCInst &MCB) { MCB.addOperand(MCOperand::createImm(BundleFlags)); MCB.setLoc(Loc); // Copy the results into the bundle. - for (HexagonShuffler::iterator I = begin(); I != end(); ++I) { - - MCInst const &MI = I->getDesc(); - MCInst const *Extender = I->getExtender(); + for (auto &I : *this) { + MCInst const &MI = I.getDesc(); + MCInst const *Extender = I.getExtender(); if (Extender) MCB.addOperand(MCOperand::createInst(Extender)); MCB.addOperand(MCOperand::createInst(&MI)); diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp index 1fce90b82864..ce396d0d8312 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -330,10 +330,10 @@ bool HexagonShuffler::ValidResourceUsage(HexagonPacketSummary const &Summary) { // create vector of hvx instructions to check HVXInstsT hvxInsts; hvxInsts.clear(); - for (const_iterator I = cbegin(); I != cend(); ++I) { + for (const auto &I : *this) { struct CVIUnits inst; - inst.Units = I->CVI.getUnits(); - inst.Lanes = I->CVI.getLanes(); + inst.Units = I.CVI.getUnits(); + inst.Lanes = I.CVI.getLanes(); if (inst.Units == 0) continue; // not an hvx inst or an hvx inst that doesn't uses any pipes hvxInsts.push_back(inst); diff --git a/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp b/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp index 67443b771d3d..ce79bdafc425 100644 --- a/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp +++ b/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp @@ -412,9 +412,8 @@ bool LanaiMemAluCombiner::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); bool Modified = false; - for (MfIterator MFI = MF.begin(); MFI != MF.end(); ++MFI) { - Modified |= combineMemAluInBasicBlock(&*MFI); - } + for (MachineBasicBlock &MBB : MF) + Modified |= combineMemAluInBasicBlock(&MBB); return Modified; } } // namespace diff --git a/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp index ae2b83c414db..33da0ff31be8 100644 --- a/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp +++ b/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp @@ -25,8 +25,8 @@ void MipsAnalyzeImmediate::AddInstr(InstSeqLs &SeqLs, const Inst &I) { return; } - for (InstSeqLs::iterator Iter = SeqLs.begin(); Iter != SeqLs.end(); ++Iter) - Iter->push_back(I); + for (auto &S : SeqLs) + S.push_back(I); } void MipsAnalyzeImmediate::GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize, diff --git a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp index 491d379bfe0b..1efbf5570287 100644 --- a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -604,9 +604,9 @@ MipsConstantIslands::CPEntry std::vector &CPEs = CPEntries[CPI]; // Number of entries per constpool index should be small, just do a // linear search. - for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { - if (CPEs[i].CPEMI == CPEMI) - return &CPEs[i]; + for (CPEntry &CPE : CPEs) { + if (CPE.CPEMI == CPEMI) + return &CPE; } return nullptr; } @@ -1052,27 +1052,27 @@ int MipsConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) // No. Look for previously created clones of the CPE that are in range. unsigned CPI = CPEMI->getOperand(1).getIndex(); std::vector &CPEs = CPEntries[CPI]; - for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { + for (CPEntry &CPE : CPEs) { // We already tried this one - if (CPEs[i].CPEMI == CPEMI) + if (CPE.CPEMI == CPEMI) continue; // Removing CPEs can leave empty entries, skip - if (CPEs[i].CPEMI == nullptr) + if (CPE.CPEMI == nullptr) continue; - if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(), - U.NegOk)) { - LLVM_DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" - << CPEs[i].CPI << "\n"); + if (isCPEntryInRange(UserMI, UserOffset, CPE.CPEMI, U.getMaxDisp(), + U.NegOk)) { + LLVM_DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPE.CPI + << "\n"); // Point the CPUser node to the replacement - U.CPEMI = CPEs[i].CPEMI; + U.CPEMI = CPE.CPEMI; // Change the CPI in the instruction operand to refer to the clone. for (MachineOperand &MO : UserMI->operands()) if (MO.isCPI()) { - MO.setIndex(CPEs[i].CPI); + MO.setIndex(CPE.CPI); break; } // Adjust the refcount of the clone... - CPEs[i].RefCount++; + CPE.RefCount++; // ...and the original. If we didn't remove the old entry, none of the // addresses changed, so we don't need another pass. return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1; @@ -1108,27 +1108,27 @@ int MipsConstantIslands::findLongFormInRangeCPEntry // No. Look for previously created clones of the CPE that are in range. unsigned CPI = CPEMI->getOperand(1).getIndex(); std::vector &CPEs = CPEntries[CPI]; - for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { + for (CPEntry &CPE : CPEs) { // We already tried this one - if (CPEs[i].CPEMI == CPEMI) + if (CPE.CPEMI == CPEMI) continue; // Removing CPEs can leave empty entries, skip - if (CPEs[i].CPEMI == nullptr) + if (CPE.CPEMI == nullptr) continue; - if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, - U.getLongFormMaxDisp(), U.NegOk)) { - LLVM_DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" - << CPEs[i].CPI << "\n"); + if (isCPEntryInRange(UserMI, UserOffset, CPE.CPEMI, U.getLongFormMaxDisp(), + U.NegOk)) { + LLVM_DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPE.CPI + << "\n"); // Point the CPUser node to the replacement - U.CPEMI = CPEs[i].CPEMI; + U.CPEMI = CPE.CPEMI; // Change the CPI in the instruction operand to refer to the clone. for (MachineOperand &MO : UserMI->operands()) if (MO.isCPI()) { - MO.setIndex(CPEs[i].CPI); + MO.setIndex(CPE.CPI); break; } // Adjust the refcount of the clone... - CPEs[i].RefCount++; + CPE.RefCount++; // ...and the original. If we didn't remove the old entry, none of the // addresses changed, so we don't need another pass. return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1; @@ -1435,15 +1435,14 @@ void MipsConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { /// are zero. bool MipsConstantIslands::removeUnusedCPEntries() { unsigned MadeChange = false; - for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { - std::vector &CPEs = CPEntries[i]; - for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) { - if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) { - removeDeadCPEMI(CPEs[j].CPEMI); - CPEs[j].CPEMI = nullptr; - MadeChange = true; - } + for (std::vector &CPEs : CPEntries) { + for (CPEntry &CPE : CPEs) { + if (CPE.RefCount == 0 && CPE.CPEMI) { + removeDeadCPEMI(CPE.CPEMI); + CPE.CPEMI = nullptr; + MadeChange = true; } + } } return MadeChange; } From 4f2eeb6a657abf82d13e95c7ab0d7d02fab98ed1 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Sat, 1 Jan 2022 13:35:54 -0500 Subject: [PATCH 310/992] Revert "[lldb] Use nullptr instead of 0 or NULL (NFC)" This reverts commit 24d240558811604354a8d6080405f6bad8d15b5c. Breaks building on Windows: ../../lldb/include\lldb/Host/HostNativeThreadBase.h(49,36): error: cannot initialize a member subobject of type 'lldb::thread_result_t' (aka 'unsigned int') with an rvalue of type 'std::nullptr_t' lldb::thread_result_t m_result = nullptr; ^~~~~~~ 1 error generated. --- lldb/include/lldb/Host/HostNativeThreadBase.h | 2 +- lldb/source/API/SystemInitializerFull.cpp | 2 +- lldb/source/Host/common/HostNativeThreadBase.cpp | 6 +++--- .../Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 10 +++++----- .../ObjectFile/Minidump/MinidumpFileBuilder.cpp | 5 ++--- .../Process/Utility/RegisterInfoPOSIX_arm64.cpp | 4 ++-- .../Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp | 11 +++++------ lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h | 4 ++-- lldb/source/Symbol/Symbol.cpp | 2 +- 9 files changed, 22 insertions(+), 24 deletions(-) diff --git a/lldb/include/lldb/Host/HostNativeThreadBase.h b/lldb/include/lldb/Host/HostNativeThreadBase.h index 57707f07cea8..bfd70d745593 100644 --- a/lldb/include/lldb/Host/HostNativeThreadBase.h +++ b/lldb/include/lldb/Host/HostNativeThreadBase.h @@ -46,7 +46,7 @@ class HostNativeThreadBase { ThreadCreateTrampoline(lldb::thread_arg_t arg); lldb::thread_t m_thread = LLDB_INVALID_HOST_THREAD; - lldb::thread_result_t m_result = nullptr; + lldb::thread_result_t m_result = 0; }; } diff --git a/lldb/source/API/SystemInitializerFull.cpp b/lldb/source/API/SystemInitializerFull.cpp index d662d370f813..b01cb2ff545b 100644 --- a/lldb/source/API/SystemInitializerFull.cpp +++ b/lldb/source/API/SystemInitializerFull.cpp @@ -39,7 +39,7 @@ constexpr lldb_private::HostInfo::SharedLibraryDirectoryHelper #else constexpr lldb_private::HostInfo::SharedLibraryDirectoryHelper - *g_shlib_dir_helper = nullptr; + *g_shlib_dir_helper = 0; #endif using namespace lldb_private; diff --git a/lldb/source/Host/common/HostNativeThreadBase.cpp b/lldb/source/Host/common/HostNativeThreadBase.cpp index e7966d9ebaa5..b15160b143ca 100644 --- a/lldb/source/Host/common/HostNativeThreadBase.cpp +++ b/lldb/source/Host/common/HostNativeThreadBase.cpp @@ -18,7 +18,7 @@ using namespace lldb; using namespace lldb_private; HostNativeThreadBase::HostNativeThreadBase(thread_t thread) - : m_thread(thread), m_result(nullptr) {} + : m_thread(thread), m_result(0) {} lldb::thread_t HostNativeThreadBase::GetSystemHandle() const { return m_thread; @@ -34,7 +34,7 @@ bool HostNativeThreadBase::IsJoinable() const { void HostNativeThreadBase::Reset() { m_thread = LLDB_INVALID_HOST_THREAD; - m_result = nullptr; + m_result = 0; } bool HostNativeThreadBase::EqualsThread(lldb::thread_t thread) const { @@ -44,7 +44,7 @@ bool HostNativeThreadBase::EqualsThread(lldb::thread_t thread) const { lldb::thread_t HostNativeThreadBase::Release() { lldb::thread_t result = m_thread; m_thread = LLDB_INVALID_HOST_THREAD; - m_result = nullptr; + m_result = 0; return result; } diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 30d7d239834b..e72d55dd2aba 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -768,9 +768,9 @@ class RegisterContextDarwin_arm64_Mach : public RegisterContextDarwin_arm64 { // Write out the EXC registers data.PutHex32(EXCRegSet); data.PutHex32(EXCWordCount); - PrintRegisterValue(reg_ctx, "far", nullptr, 8, data); - PrintRegisterValue(reg_ctx, "esr", nullptr, 4, data); - PrintRegisterValue(reg_ctx, "exception", nullptr, 4, data); + PrintRegisterValue(reg_ctx, "far", NULL, 8, data); + PrintRegisterValue(reg_ctx, "esr", NULL, 4, data); + PrintRegisterValue(reg_ctx, "exception", NULL, 4, data); return true; } return false; @@ -5073,7 +5073,7 @@ void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header, lldb::offset_t offset = lc_offset; for (uint32_t i = 0; i < header.ncmds; ++i) { const lldb::offset_t cmd_offset = offset; - if (data.GetU32(&offset, &load_cmd, 2) == nullptr) + if (data.GetU32(&offset, &load_cmd, 2) == NULL) break; llvm::MachO::version_min_command version_min; @@ -5123,7 +5123,7 @@ void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header, offset = lc_offset; for (uint32_t i = 0; i < header.ncmds; ++i) { const lldb::offset_t cmd_offset = offset; - if (data.GetU32(&offset, &load_cmd, 2) == nullptr) + if (data.GetU32(&offset, &load_cmd, 2) == NULL) break; do { diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp index 516bcb21b019..ef419d9a89e8 100644 --- a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp +++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp @@ -272,8 +272,7 @@ Status MinidumpFileBuilder::AddModuleList(Target &target) { mod->GetObjectFile()->GetBaseAddress().GetLoadAddress(&target)); m.SizeOfImage = static_cast(mod_size); m.Checksum = static_cast(0); - m.TimeDateStamp = - static_cast(std::time(nullptr)); + m.TimeDateStamp = static_cast(std::time(0)); m.ModuleNameRVA = static_cast( size_before + module_stream_size + helper_data.GetByteSize()); m.VersionInfo = info; @@ -720,7 +719,7 @@ Status MinidumpFileBuilder::Dump(lldb::FileUP &core_file) const { header.Checksum = static_cast( 0u), // not used in most of the writers header.TimeDateStamp = - static_cast(std::time(nullptr)); + static_cast(std::time(0)); header.Flags = static_cast(0u); // minidump normal flag diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp index d6c4a8687ec5..6c130be7b741 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp @@ -178,10 +178,10 @@ static const lldb_private::RegisterSet g_reg_sets_arm64[k_num_register_sets] = { g_sve_regnums_arm64}}; static const lldb_private::RegisterSet g_reg_set_pauth_arm64 = { - "Pointer Authentication Registers", "pauth", k_num_pauth_register, nullptr}; + "Pointer Authentication Registers", "pauth", k_num_pauth_register, NULL}; static const lldb_private::RegisterSet g_reg_set_mte_arm64 = { - "MTE Control Register", "mte", k_num_mte_register, nullptr}; + "MTE Control Register", "mte", k_num_mte_register, NULL}; RegisterInfoPOSIX_arm64::RegisterInfoPOSIX_arm64( const lldb_private::ArchSpec &target_arch, lldb_private::Flags opt_regsets) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index be555c130bfe..b90f104c4d21 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -933,7 +933,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, DW_TAG_value_to_name(tag), type_name_cstr); CompilerType return_clang_type; - Type *func_type = nullptr; + Type *func_type = NULL; if (attrs.type.IsValid()) func_type = dwarf->ResolveTypeUID(attrs.type.Reference(), true); @@ -1027,7 +1027,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, class_opaque_type, attrs.name.GetCString(), clang_type, attrs.accessibility, attrs.is_artificial, is_variadic, attrs.is_objc_direct_call); - type_handled = objc_method_decl != nullptr; + type_handled = objc_method_decl != NULL; if (type_handled) { LinkDeclContextToDIE(objc_method_decl, die); m_ast.SetMetadataAsUserID(objc_method_decl, die.GetID()); @@ -1178,7 +1178,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, is_static, attrs.is_inline, attrs.is_explicit, is_attr_used, attrs.is_artificial); - type_handled = cxx_method_decl != nullptr; + type_handled = cxx_method_decl != NULL; // Artificial methods are always handled even when we // don't create a new declaration for them. type_handled |= attrs.is_artificial; @@ -2036,7 +2036,7 @@ bool DWARFASTParserClang::ParseTemplateDIE( if (name && name[0]) template_param_infos.names.push_back(name); else - template_param_infos.names.push_back(nullptr); + template_param_infos.names.push_back(NULL); // Get the signed value for any integer or enumeration if available clang_type.IsIntegerOrEnumerationType(is_signed); @@ -3336,8 +3336,7 @@ DWARFASTParserClang::GetOwningClangModule(const DWARFDIE &die) { auto it = m_die_to_module.find(module_die.GetDIE()); if (it != m_die_to_module.end()) return it->second; - const char *name = - module_die.GetAttributeValueAsString(DW_AT_name, nullptr); + const char *name = module_die.GetAttributeValueAsString(DW_AT_name, 0); if (!name) return {}; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index 2457e8276e20..71d4c1e6c52f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -269,7 +269,7 @@ class DWARFUnit : public lldb_private::UserID { ExtractUnitDIENoDwoIfNeeded(); // m_first_die_mutex is not required as m_first_die is never cleared. if (!m_first_die) - return nullptr; + return NULL; return &m_first_die; } @@ -277,7 +277,7 @@ class DWARFUnit : public lldb_private::UserID { const DWARFDebugInfoEntry *DIEPtr() { ExtractDIEsIfNeeded(); if (m_die_array.empty()) - return nullptr; + return NULL; return &m_die_array[0]; } diff --git a/lldb/source/Symbol/Symbol.cpp b/lldb/source/Symbol/Symbol.cpp index 5ee5b0fe2223..fa7226dfd046 100644 --- a/lldb/source/Symbol/Symbol.cpp +++ b/lldb/source/Symbol/Symbol.cpp @@ -680,7 +680,7 @@ void Symbol::Encode(DataEncoder &file, ConstStringTable &strtab) const { // symbol's base address doesn't have a section, then it is a constant value. // If it does have a section, we will encode the file address and re-resolve // the address when we decode it. - bool is_addr = m_addr_range.GetBaseAddress().GetSection().get() != nullptr; + bool is_addr = m_addr_range.GetBaseAddress().GetSection().get() != NULL; file.AppendU8(is_addr); file.AppendU64(m_addr_range.GetBaseAddress().GetFileAddress()); file.AppendU64(m_addr_range.GetByteSize()); From 69ccc96162aa3471389d98184e0d683573edb47d Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jan 2022 10:36:59 -0800 Subject: [PATCH 311/992] [llvm] Use the default constructor for SDValue (NFC) --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 2 +- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 3 +-- llvm/lib/Target/AVR/AVRISelLowering.cpp | 2 +- llvm/lib/Target/Lanai/LanaiISelLowering.cpp | 2 +- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 5 ++--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +- llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp | 3 +-- llvm/lib/Target/Sparc/SparcISelLowering.cpp | 2 +- 10 files changed, 11 insertions(+), 14 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 067ad819e0d2..8153b7061094 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3908,7 +3908,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one // use. { - SDValue Sh(nullptr, 0), Y(nullptr, 0); + SDValue Sh, Y; // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). if (N0.getOpcode() == ISD::SHL && diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index aec2cf38b400..bec240d6c4d4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -286,7 +286,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { // Cluster loads by adding MVT::Glue outputs and inputs. This also // ensure they are scheduled in order of increasing addresses. SDNode *Lead = Loads[0]; - SDValue InGlue = SDValue(nullptr, 0); + SDValue InGlue; if (AddGlue(Lead, InGlue, true, DAG)) InGlue = SDValue(Lead, Lead->getNumValues() - 1); for (unsigned I = 1, E = Loads.size(); I != E; ++I) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d14647d0eb0a..56253909cb53 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6344,7 +6344,7 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG, Type *Ty = VT.getTypeForEVT(*DAG.getContext()); if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty)) return DAG.getConstant(Val, dl, VT); - return SDValue(nullptr, 0); + return SDValue(); } SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset, diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index bb2859c766c2..038637a4b9d9 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -5737,8 +5737,7 @@ bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ // them into a GPRPair. SDLoc dl(N); - SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) - : SDValue(nullptr,0); + SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue(); SmallVector OpChanged; // Glue node will be appended late. diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index 1fb71ab205e7..39fba74a1ec7 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -2013,7 +2013,7 @@ void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector &Ops, SelectionDAG &DAG) const { - SDValue Result(nullptr, 0); + SDValue Result; SDLoc DL(Op); EVT Ty = Op.getValueType(); diff --git a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp index 0d9e63c112fb..3e483e75ac6f 100644 --- a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp +++ b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp @@ -284,7 +284,7 @@ LanaiTargetLowering::getSingleConstraintMatchWeight( void LanaiTargetLowering::LowerAsmOperandForConstraint( SDValue Op, std::string &Constraint, std::vector &Ops, SelectionDAG &DAG) const { - SDValue Result(nullptr, 0); + SDValue Result; // Only support length 1 constraints for now. if (Constraint.length() > 1) diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index ba74af5ef5f7..fdcf6e7e80f2 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1365,8 +1365,7 @@ class BitPermutationSelector { ValueBit(SDValue V, unsigned I, Kind K = Variable) : V(V), Idx(I), K(K) {} - ValueBit(Kind K = Variable) - : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {} + ValueBit(Kind K = Variable) : Idx(UINT32_MAX), K(K) {} bool isZero() const { return K == ConstZero || K == VariableKnownToBeZero; @@ -4438,7 +4437,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { // Force the ccreg into CR7. SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); - SDValue InFlag(nullptr, 0); // Null incoming flag value. + SDValue InFlag; // Null incoming flag value. CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, InFlag).getValue(1); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 8d6edf07bc53..d6c57d94e412 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2433,7 +2433,7 @@ unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, /// the constant being splatted. The ByteSize field indicates the number of /// bytes of each element [124] -> [bhw]. SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { - SDValue OpVal(nullptr, 0); + SDValue OpVal; // If ByteSize of the splat is bigger than the element size of the // build_vector, then we have a case where we are checking for a splat where diff --git a/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp index afb69899e724..ed380d309bd7 100644 --- a/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -168,8 +168,7 @@ bool SparcDAGToDAGISel::tryInlineAsm(SDNode *N){ // placement. SDLoc dl(N); - SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) - : SDValue(nullptr,0); + SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue(); SmallVector OpChanged; // Glue node will be appended late. diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index ed1faf6b1fe8..20deabd233a9 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -3245,7 +3245,7 @@ LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector &Ops, SelectionDAG &DAG) const { - SDValue Result(nullptr, 0); + SDValue Result; // Only support length 1 constraints for now. if (Constraint.length() > 1) From 913457acf07be7f22d71ac41ad1076517d7f45c6 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jan 2022 10:48:56 -0800 Subject: [PATCH 312/992] [lldb] Use nullptr instead of 0 or NULL (NFC) This is a re-submission of 24d240558811604354a8d6080405f6bad8d15b5c without the hunk in HostNativeThreadBase.h, which breaks builds on Windows. Identified with modernize-use-nullptr. --- lldb/source/API/SystemInitializerFull.cpp | 2 +- lldb/source/Host/common/HostNativeThreadBase.cpp | 6 +++--- .../Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 10 +++++----- .../ObjectFile/Minidump/MinidumpFileBuilder.cpp | 5 +++-- .../Process/Utility/RegisterInfoPOSIX_arm64.cpp | 4 ++-- .../Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp | 11 ++++++----- lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h | 4 ++-- lldb/source/Symbol/Symbol.cpp | 2 +- 8 files changed, 23 insertions(+), 21 deletions(-) diff --git a/lldb/source/API/SystemInitializerFull.cpp b/lldb/source/API/SystemInitializerFull.cpp index b01cb2ff545b..d662d370f813 100644 --- a/lldb/source/API/SystemInitializerFull.cpp +++ b/lldb/source/API/SystemInitializerFull.cpp @@ -39,7 +39,7 @@ constexpr lldb_private::HostInfo::SharedLibraryDirectoryHelper #else constexpr lldb_private::HostInfo::SharedLibraryDirectoryHelper - *g_shlib_dir_helper = 0; + *g_shlib_dir_helper = nullptr; #endif using namespace lldb_private; diff --git a/lldb/source/Host/common/HostNativeThreadBase.cpp b/lldb/source/Host/common/HostNativeThreadBase.cpp index b15160b143ca..e7966d9ebaa5 100644 --- a/lldb/source/Host/common/HostNativeThreadBase.cpp +++ b/lldb/source/Host/common/HostNativeThreadBase.cpp @@ -18,7 +18,7 @@ using namespace lldb; using namespace lldb_private; HostNativeThreadBase::HostNativeThreadBase(thread_t thread) - : m_thread(thread), m_result(0) {} + : m_thread(thread), m_result(nullptr) {} lldb::thread_t HostNativeThreadBase::GetSystemHandle() const { return m_thread; @@ -34,7 +34,7 @@ bool HostNativeThreadBase::IsJoinable() const { void HostNativeThreadBase::Reset() { m_thread = LLDB_INVALID_HOST_THREAD; - m_result = 0; + m_result = nullptr; } bool HostNativeThreadBase::EqualsThread(lldb::thread_t thread) const { @@ -44,7 +44,7 @@ bool HostNativeThreadBase::EqualsThread(lldb::thread_t thread) const { lldb::thread_t HostNativeThreadBase::Release() { lldb::thread_t result = m_thread; m_thread = LLDB_INVALID_HOST_THREAD; - m_result = 0; + m_result = nullptr; return result; } diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index e72d55dd2aba..30d7d239834b 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -768,9 +768,9 @@ class RegisterContextDarwin_arm64_Mach : public RegisterContextDarwin_arm64 { // Write out the EXC registers data.PutHex32(EXCRegSet); data.PutHex32(EXCWordCount); - PrintRegisterValue(reg_ctx, "far", NULL, 8, data); - PrintRegisterValue(reg_ctx, "esr", NULL, 4, data); - PrintRegisterValue(reg_ctx, "exception", NULL, 4, data); + PrintRegisterValue(reg_ctx, "far", nullptr, 8, data); + PrintRegisterValue(reg_ctx, "esr", nullptr, 4, data); + PrintRegisterValue(reg_ctx, "exception", nullptr, 4, data); return true; } return false; @@ -5073,7 +5073,7 @@ void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header, lldb::offset_t offset = lc_offset; for (uint32_t i = 0; i < header.ncmds; ++i) { const lldb::offset_t cmd_offset = offset; - if (data.GetU32(&offset, &load_cmd, 2) == NULL) + if (data.GetU32(&offset, &load_cmd, 2) == nullptr) break; llvm::MachO::version_min_command version_min; @@ -5123,7 +5123,7 @@ void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header, offset = lc_offset; for (uint32_t i = 0; i < header.ncmds; ++i) { const lldb::offset_t cmd_offset = offset; - if (data.GetU32(&offset, &load_cmd, 2) == NULL) + if (data.GetU32(&offset, &load_cmd, 2) == nullptr) break; do { diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp index ef419d9a89e8..516bcb21b019 100644 --- a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp +++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp @@ -272,7 +272,8 @@ Status MinidumpFileBuilder::AddModuleList(Target &target) { mod->GetObjectFile()->GetBaseAddress().GetLoadAddress(&target)); m.SizeOfImage = static_cast(mod_size); m.Checksum = static_cast(0); - m.TimeDateStamp = static_cast(std::time(0)); + m.TimeDateStamp = + static_cast(std::time(nullptr)); m.ModuleNameRVA = static_cast( size_before + module_stream_size + helper_data.GetByteSize()); m.VersionInfo = info; @@ -719,7 +720,7 @@ Status MinidumpFileBuilder::Dump(lldb::FileUP &core_file) const { header.Checksum = static_cast( 0u), // not used in most of the writers header.TimeDateStamp = - static_cast(std::time(0)); + static_cast(std::time(nullptr)); header.Flags = static_cast(0u); // minidump normal flag diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp index 6c130be7b741..d6c4a8687ec5 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp @@ -178,10 +178,10 @@ static const lldb_private::RegisterSet g_reg_sets_arm64[k_num_register_sets] = { g_sve_regnums_arm64}}; static const lldb_private::RegisterSet g_reg_set_pauth_arm64 = { - "Pointer Authentication Registers", "pauth", k_num_pauth_register, NULL}; + "Pointer Authentication Registers", "pauth", k_num_pauth_register, nullptr}; static const lldb_private::RegisterSet g_reg_set_mte_arm64 = { - "MTE Control Register", "mte", k_num_mte_register, NULL}; + "MTE Control Register", "mte", k_num_mte_register, nullptr}; RegisterInfoPOSIX_arm64::RegisterInfoPOSIX_arm64( const lldb_private::ArchSpec &target_arch, lldb_private::Flags opt_regsets) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index b90f104c4d21..be555c130bfe 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -933,7 +933,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, DW_TAG_value_to_name(tag), type_name_cstr); CompilerType return_clang_type; - Type *func_type = NULL; + Type *func_type = nullptr; if (attrs.type.IsValid()) func_type = dwarf->ResolveTypeUID(attrs.type.Reference(), true); @@ -1027,7 +1027,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, class_opaque_type, attrs.name.GetCString(), clang_type, attrs.accessibility, attrs.is_artificial, is_variadic, attrs.is_objc_direct_call); - type_handled = objc_method_decl != NULL; + type_handled = objc_method_decl != nullptr; if (type_handled) { LinkDeclContextToDIE(objc_method_decl, die); m_ast.SetMetadataAsUserID(objc_method_decl, die.GetID()); @@ -1178,7 +1178,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, is_static, attrs.is_inline, attrs.is_explicit, is_attr_used, attrs.is_artificial); - type_handled = cxx_method_decl != NULL; + type_handled = cxx_method_decl != nullptr; // Artificial methods are always handled even when we // don't create a new declaration for them. type_handled |= attrs.is_artificial; @@ -2036,7 +2036,7 @@ bool DWARFASTParserClang::ParseTemplateDIE( if (name && name[0]) template_param_infos.names.push_back(name); else - template_param_infos.names.push_back(NULL); + template_param_infos.names.push_back(nullptr); // Get the signed value for any integer or enumeration if available clang_type.IsIntegerOrEnumerationType(is_signed); @@ -3336,7 +3336,8 @@ DWARFASTParserClang::GetOwningClangModule(const DWARFDIE &die) { auto it = m_die_to_module.find(module_die.GetDIE()); if (it != m_die_to_module.end()) return it->second; - const char *name = module_die.GetAttributeValueAsString(DW_AT_name, 0); + const char *name = + module_die.GetAttributeValueAsString(DW_AT_name, nullptr); if (!name) return {}; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index 71d4c1e6c52f..2457e8276e20 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -269,7 +269,7 @@ class DWARFUnit : public lldb_private::UserID { ExtractUnitDIENoDwoIfNeeded(); // m_first_die_mutex is not required as m_first_die is never cleared. if (!m_first_die) - return NULL; + return nullptr; return &m_first_die; } @@ -277,7 +277,7 @@ class DWARFUnit : public lldb_private::UserID { const DWARFDebugInfoEntry *DIEPtr() { ExtractDIEsIfNeeded(); if (m_die_array.empty()) - return NULL; + return nullptr; return &m_die_array[0]; } diff --git a/lldb/source/Symbol/Symbol.cpp b/lldb/source/Symbol/Symbol.cpp index fa7226dfd046..5ee5b0fe2223 100644 --- a/lldb/source/Symbol/Symbol.cpp +++ b/lldb/source/Symbol/Symbol.cpp @@ -680,7 +680,7 @@ void Symbol::Encode(DataEncoder &file, ConstStringTable &strtab) const { // symbol's base address doesn't have a section, then it is a constant value. // If it does have a section, we will encode the file address and re-resolve // the address when we decode it. - bool is_addr = m_addr_range.GetBaseAddress().GetSection().get() != NULL; + bool is_addr = m_addr_range.GetBaseAddress().GetSection().get() != nullptr; file.AppendU8(is_addr); file.AppendU64(m_addr_range.GetBaseAddress().GetFileAddress()); file.AppendU64(m_addr_range.GetByteSize()); From 95f7112be8daa521e607fb3c231012a6d5eafa96 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jan 2022 11:15:14 -0800 Subject: [PATCH 313/992] Revert "[lldb] Use nullptr instead of 0 or NULL (NFC)" This reverts commit 913457acf07be7f22d71ac41ad1076517d7f45c6. It again broke builds on Windows: lldb/source/Host/common/HostNativeThreadBase.cpp(37,14): error: assigning to 'lldb::thread_result_t' (aka 'unsigned int') from incompatible type 'std::nullptr_t' --- lldb/source/API/SystemInitializerFull.cpp | 2 +- lldb/source/Host/common/HostNativeThreadBase.cpp | 6 +++--- .../Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 10 +++++----- .../ObjectFile/Minidump/MinidumpFileBuilder.cpp | 5 ++--- .../Process/Utility/RegisterInfoPOSIX_arm64.cpp | 4 ++-- .../Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp | 11 +++++------ lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h | 4 ++-- lldb/source/Symbol/Symbol.cpp | 2 +- 8 files changed, 21 insertions(+), 23 deletions(-) diff --git a/lldb/source/API/SystemInitializerFull.cpp b/lldb/source/API/SystemInitializerFull.cpp index d662d370f813..b01cb2ff545b 100644 --- a/lldb/source/API/SystemInitializerFull.cpp +++ b/lldb/source/API/SystemInitializerFull.cpp @@ -39,7 +39,7 @@ constexpr lldb_private::HostInfo::SharedLibraryDirectoryHelper #else constexpr lldb_private::HostInfo::SharedLibraryDirectoryHelper - *g_shlib_dir_helper = nullptr; + *g_shlib_dir_helper = 0; #endif using namespace lldb_private; diff --git a/lldb/source/Host/common/HostNativeThreadBase.cpp b/lldb/source/Host/common/HostNativeThreadBase.cpp index e7966d9ebaa5..b15160b143ca 100644 --- a/lldb/source/Host/common/HostNativeThreadBase.cpp +++ b/lldb/source/Host/common/HostNativeThreadBase.cpp @@ -18,7 +18,7 @@ using namespace lldb; using namespace lldb_private; HostNativeThreadBase::HostNativeThreadBase(thread_t thread) - : m_thread(thread), m_result(nullptr) {} + : m_thread(thread), m_result(0) {} lldb::thread_t HostNativeThreadBase::GetSystemHandle() const { return m_thread; @@ -34,7 +34,7 @@ bool HostNativeThreadBase::IsJoinable() const { void HostNativeThreadBase::Reset() { m_thread = LLDB_INVALID_HOST_THREAD; - m_result = nullptr; + m_result = 0; } bool HostNativeThreadBase::EqualsThread(lldb::thread_t thread) const { @@ -44,7 +44,7 @@ bool HostNativeThreadBase::EqualsThread(lldb::thread_t thread) const { lldb::thread_t HostNativeThreadBase::Release() { lldb::thread_t result = m_thread; m_thread = LLDB_INVALID_HOST_THREAD; - m_result = nullptr; + m_result = 0; return result; } diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 30d7d239834b..e72d55dd2aba 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -768,9 +768,9 @@ class RegisterContextDarwin_arm64_Mach : public RegisterContextDarwin_arm64 { // Write out the EXC registers data.PutHex32(EXCRegSet); data.PutHex32(EXCWordCount); - PrintRegisterValue(reg_ctx, "far", nullptr, 8, data); - PrintRegisterValue(reg_ctx, "esr", nullptr, 4, data); - PrintRegisterValue(reg_ctx, "exception", nullptr, 4, data); + PrintRegisterValue(reg_ctx, "far", NULL, 8, data); + PrintRegisterValue(reg_ctx, "esr", NULL, 4, data); + PrintRegisterValue(reg_ctx, "exception", NULL, 4, data); return true; } return false; @@ -5073,7 +5073,7 @@ void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header, lldb::offset_t offset = lc_offset; for (uint32_t i = 0; i < header.ncmds; ++i) { const lldb::offset_t cmd_offset = offset; - if (data.GetU32(&offset, &load_cmd, 2) == nullptr) + if (data.GetU32(&offset, &load_cmd, 2) == NULL) break; llvm::MachO::version_min_command version_min; @@ -5123,7 +5123,7 @@ void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header, offset = lc_offset; for (uint32_t i = 0; i < header.ncmds; ++i) { const lldb::offset_t cmd_offset = offset; - if (data.GetU32(&offset, &load_cmd, 2) == nullptr) + if (data.GetU32(&offset, &load_cmd, 2) == NULL) break; do { diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp index 516bcb21b019..ef419d9a89e8 100644 --- a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp +++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp @@ -272,8 +272,7 @@ Status MinidumpFileBuilder::AddModuleList(Target &target) { mod->GetObjectFile()->GetBaseAddress().GetLoadAddress(&target)); m.SizeOfImage = static_cast(mod_size); m.Checksum = static_cast(0); - m.TimeDateStamp = - static_cast(std::time(nullptr)); + m.TimeDateStamp = static_cast(std::time(0)); m.ModuleNameRVA = static_cast( size_before + module_stream_size + helper_data.GetByteSize()); m.VersionInfo = info; @@ -720,7 +719,7 @@ Status MinidumpFileBuilder::Dump(lldb::FileUP &core_file) const { header.Checksum = static_cast( 0u), // not used in most of the writers header.TimeDateStamp = - static_cast(std::time(nullptr)); + static_cast(std::time(0)); header.Flags = static_cast(0u); // minidump normal flag diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp index d6c4a8687ec5..6c130be7b741 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp @@ -178,10 +178,10 @@ static const lldb_private::RegisterSet g_reg_sets_arm64[k_num_register_sets] = { g_sve_regnums_arm64}}; static const lldb_private::RegisterSet g_reg_set_pauth_arm64 = { - "Pointer Authentication Registers", "pauth", k_num_pauth_register, nullptr}; + "Pointer Authentication Registers", "pauth", k_num_pauth_register, NULL}; static const lldb_private::RegisterSet g_reg_set_mte_arm64 = { - "MTE Control Register", "mte", k_num_mte_register, nullptr}; + "MTE Control Register", "mte", k_num_mte_register, NULL}; RegisterInfoPOSIX_arm64::RegisterInfoPOSIX_arm64( const lldb_private::ArchSpec &target_arch, lldb_private::Flags opt_regsets) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index be555c130bfe..b90f104c4d21 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -933,7 +933,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, DW_TAG_value_to_name(tag), type_name_cstr); CompilerType return_clang_type; - Type *func_type = nullptr; + Type *func_type = NULL; if (attrs.type.IsValid()) func_type = dwarf->ResolveTypeUID(attrs.type.Reference(), true); @@ -1027,7 +1027,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, class_opaque_type, attrs.name.GetCString(), clang_type, attrs.accessibility, attrs.is_artificial, is_variadic, attrs.is_objc_direct_call); - type_handled = objc_method_decl != nullptr; + type_handled = objc_method_decl != NULL; if (type_handled) { LinkDeclContextToDIE(objc_method_decl, die); m_ast.SetMetadataAsUserID(objc_method_decl, die.GetID()); @@ -1178,7 +1178,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, is_static, attrs.is_inline, attrs.is_explicit, is_attr_used, attrs.is_artificial); - type_handled = cxx_method_decl != nullptr; + type_handled = cxx_method_decl != NULL; // Artificial methods are always handled even when we // don't create a new declaration for them. type_handled |= attrs.is_artificial; @@ -2036,7 +2036,7 @@ bool DWARFASTParserClang::ParseTemplateDIE( if (name && name[0]) template_param_infos.names.push_back(name); else - template_param_infos.names.push_back(nullptr); + template_param_infos.names.push_back(NULL); // Get the signed value for any integer or enumeration if available clang_type.IsIntegerOrEnumerationType(is_signed); @@ -3336,8 +3336,7 @@ DWARFASTParserClang::GetOwningClangModule(const DWARFDIE &die) { auto it = m_die_to_module.find(module_die.GetDIE()); if (it != m_die_to_module.end()) return it->second; - const char *name = - module_die.GetAttributeValueAsString(DW_AT_name, nullptr); + const char *name = module_die.GetAttributeValueAsString(DW_AT_name, 0); if (!name) return {}; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index 2457e8276e20..71d4c1e6c52f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -269,7 +269,7 @@ class DWARFUnit : public lldb_private::UserID { ExtractUnitDIENoDwoIfNeeded(); // m_first_die_mutex is not required as m_first_die is never cleared. if (!m_first_die) - return nullptr; + return NULL; return &m_first_die; } @@ -277,7 +277,7 @@ class DWARFUnit : public lldb_private::UserID { const DWARFDebugInfoEntry *DIEPtr() { ExtractDIEsIfNeeded(); if (m_die_array.empty()) - return nullptr; + return NULL; return &m_die_array[0]; } diff --git a/lldb/source/Symbol/Symbol.cpp b/lldb/source/Symbol/Symbol.cpp index 5ee5b0fe2223..fa7226dfd046 100644 --- a/lldb/source/Symbol/Symbol.cpp +++ b/lldb/source/Symbol/Symbol.cpp @@ -680,7 +680,7 @@ void Symbol::Encode(DataEncoder &file, ConstStringTable &strtab) const { // symbol's base address doesn't have a section, then it is a constant value. // If it does have a section, we will encode the file address and re-resolve // the address when we decode it. - bool is_addr = m_addr_range.GetBaseAddress().GetSection().get() != nullptr; + bool is_addr = m_addr_range.GetBaseAddress().GetSection().get() != NULL; file.AppendU8(is_addr); file.AppendU64(m_addr_range.GetBaseAddress().GetFileAddress()); file.AppendU64(m_addr_range.GetByteSize()); From b8336280d8244039a648ffde06b1e857cb664b15 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jan 2022 11:54:25 -0800 Subject: [PATCH 314/992] [lldb] Use nullptr instead of 0 or NULL (NFC) This is a re-submission of 24d240558811604354a8d6080405f6bad8d15b5c without the hunks in HostNativeThreadBase.{h,cpp}, which break builds on Windows. Identified with modernize-use-nullptr. --- lldb/source/API/SystemInitializerFull.cpp | 2 +- .../Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 10 +++++----- .../ObjectFile/Minidump/MinidumpFileBuilder.cpp | 5 +++-- .../Process/Utility/RegisterInfoPOSIX_arm64.cpp | 4 ++-- .../Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp | 11 ++++++----- lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h | 4 ++-- lldb/source/Symbol/Symbol.cpp | 2 +- 7 files changed, 20 insertions(+), 18 deletions(-) diff --git a/lldb/source/API/SystemInitializerFull.cpp b/lldb/source/API/SystemInitializerFull.cpp index b01cb2ff545b..d662d370f813 100644 --- a/lldb/source/API/SystemInitializerFull.cpp +++ b/lldb/source/API/SystemInitializerFull.cpp @@ -39,7 +39,7 @@ constexpr lldb_private::HostInfo::SharedLibraryDirectoryHelper #else constexpr lldb_private::HostInfo::SharedLibraryDirectoryHelper - *g_shlib_dir_helper = 0; + *g_shlib_dir_helper = nullptr; #endif using namespace lldb_private; diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index e72d55dd2aba..30d7d239834b 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -768,9 +768,9 @@ class RegisterContextDarwin_arm64_Mach : public RegisterContextDarwin_arm64 { // Write out the EXC registers data.PutHex32(EXCRegSet); data.PutHex32(EXCWordCount); - PrintRegisterValue(reg_ctx, "far", NULL, 8, data); - PrintRegisterValue(reg_ctx, "esr", NULL, 4, data); - PrintRegisterValue(reg_ctx, "exception", NULL, 4, data); + PrintRegisterValue(reg_ctx, "far", nullptr, 8, data); + PrintRegisterValue(reg_ctx, "esr", nullptr, 4, data); + PrintRegisterValue(reg_ctx, "exception", nullptr, 4, data); return true; } return false; @@ -5073,7 +5073,7 @@ void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header, lldb::offset_t offset = lc_offset; for (uint32_t i = 0; i < header.ncmds; ++i) { const lldb::offset_t cmd_offset = offset; - if (data.GetU32(&offset, &load_cmd, 2) == NULL) + if (data.GetU32(&offset, &load_cmd, 2) == nullptr) break; llvm::MachO::version_min_command version_min; @@ -5123,7 +5123,7 @@ void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header, offset = lc_offset; for (uint32_t i = 0; i < header.ncmds; ++i) { const lldb::offset_t cmd_offset = offset; - if (data.GetU32(&offset, &load_cmd, 2) == NULL) + if (data.GetU32(&offset, &load_cmd, 2) == nullptr) break; do { diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp index ef419d9a89e8..516bcb21b019 100644 --- a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp +++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp @@ -272,7 +272,8 @@ Status MinidumpFileBuilder::AddModuleList(Target &target) { mod->GetObjectFile()->GetBaseAddress().GetLoadAddress(&target)); m.SizeOfImage = static_cast(mod_size); m.Checksum = static_cast(0); - m.TimeDateStamp = static_cast(std::time(0)); + m.TimeDateStamp = + static_cast(std::time(nullptr)); m.ModuleNameRVA = static_cast( size_before + module_stream_size + helper_data.GetByteSize()); m.VersionInfo = info; @@ -719,7 +720,7 @@ Status MinidumpFileBuilder::Dump(lldb::FileUP &core_file) const { header.Checksum = static_cast( 0u), // not used in most of the writers header.TimeDateStamp = - static_cast(std::time(0)); + static_cast(std::time(nullptr)); header.Flags = static_cast(0u); // minidump normal flag diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp index 6c130be7b741..d6c4a8687ec5 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp @@ -178,10 +178,10 @@ static const lldb_private::RegisterSet g_reg_sets_arm64[k_num_register_sets] = { g_sve_regnums_arm64}}; static const lldb_private::RegisterSet g_reg_set_pauth_arm64 = { - "Pointer Authentication Registers", "pauth", k_num_pauth_register, NULL}; + "Pointer Authentication Registers", "pauth", k_num_pauth_register, nullptr}; static const lldb_private::RegisterSet g_reg_set_mte_arm64 = { - "MTE Control Register", "mte", k_num_mte_register, NULL}; + "MTE Control Register", "mte", k_num_mte_register, nullptr}; RegisterInfoPOSIX_arm64::RegisterInfoPOSIX_arm64( const lldb_private::ArchSpec &target_arch, lldb_private::Flags opt_regsets) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index b90f104c4d21..be555c130bfe 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -933,7 +933,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, DW_TAG_value_to_name(tag), type_name_cstr); CompilerType return_clang_type; - Type *func_type = NULL; + Type *func_type = nullptr; if (attrs.type.IsValid()) func_type = dwarf->ResolveTypeUID(attrs.type.Reference(), true); @@ -1027,7 +1027,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, class_opaque_type, attrs.name.GetCString(), clang_type, attrs.accessibility, attrs.is_artificial, is_variadic, attrs.is_objc_direct_call); - type_handled = objc_method_decl != NULL; + type_handled = objc_method_decl != nullptr; if (type_handled) { LinkDeclContextToDIE(objc_method_decl, die); m_ast.SetMetadataAsUserID(objc_method_decl, die.GetID()); @@ -1178,7 +1178,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, is_static, attrs.is_inline, attrs.is_explicit, is_attr_used, attrs.is_artificial); - type_handled = cxx_method_decl != NULL; + type_handled = cxx_method_decl != nullptr; // Artificial methods are always handled even when we // don't create a new declaration for them. type_handled |= attrs.is_artificial; @@ -2036,7 +2036,7 @@ bool DWARFASTParserClang::ParseTemplateDIE( if (name && name[0]) template_param_infos.names.push_back(name); else - template_param_infos.names.push_back(NULL); + template_param_infos.names.push_back(nullptr); // Get the signed value for any integer or enumeration if available clang_type.IsIntegerOrEnumerationType(is_signed); @@ -3336,7 +3336,8 @@ DWARFASTParserClang::GetOwningClangModule(const DWARFDIE &die) { auto it = m_die_to_module.find(module_die.GetDIE()); if (it != m_die_to_module.end()) return it->second; - const char *name = module_die.GetAttributeValueAsString(DW_AT_name, 0); + const char *name = + module_die.GetAttributeValueAsString(DW_AT_name, nullptr); if (!name) return {}; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index 71d4c1e6c52f..2457e8276e20 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -269,7 +269,7 @@ class DWARFUnit : public lldb_private::UserID { ExtractUnitDIENoDwoIfNeeded(); // m_first_die_mutex is not required as m_first_die is never cleared. if (!m_first_die) - return NULL; + return nullptr; return &m_first_die; } @@ -277,7 +277,7 @@ class DWARFUnit : public lldb_private::UserID { const DWARFDebugInfoEntry *DIEPtr() { ExtractDIEsIfNeeded(); if (m_die_array.empty()) - return NULL; + return nullptr; return &m_die_array[0]; } diff --git a/lldb/source/Symbol/Symbol.cpp b/lldb/source/Symbol/Symbol.cpp index fa7226dfd046..5ee5b0fe2223 100644 --- a/lldb/source/Symbol/Symbol.cpp +++ b/lldb/source/Symbol/Symbol.cpp @@ -680,7 +680,7 @@ void Symbol::Encode(DataEncoder &file, ConstStringTable &strtab) const { // symbol's base address doesn't have a section, then it is a constant value. // If it does have a section, we will encode the file address and re-resolve // the address when we decode it. - bool is_addr = m_addr_range.GetBaseAddress().GetSection().get() != NULL; + bool is_addr = m_addr_range.GetBaseAddress().GetSection().get() != nullptr; file.AppendU8(is_addr); file.AppendU64(m_addr_range.GetBaseAddress().GetFileAddress()); file.AppendU64(m_addr_range.GetByteSize()); From 319e77592f02ae479d92d47c691343e8aabfb714 Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 1 Jan 2022 20:08:45 +0000 Subject: [PATCH 315/992] [ARM] Verify addressing immediates This adds at extra check into ARMBaseInstrInfo::verifyInstruction to verify the offsets used in addressing mode immediates using isLegalAddressImm. Some tests needed fixing up as a result, adjusting the opcode created from CMSE stack adjustments. Differential Revision: https://reviews.llvm.org/D114939 --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 30 ++++++++++++++++++++ llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp | 4 +-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 884f38ff6c58..cde715880376 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -4868,6 +4868,36 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI, return false; } } + + // Check the address model by taking the first Imm operand and checking it is + // legal for that addressing mode. + ARMII::AddrMode AddrMode = + (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask); + switch (AddrMode) { + default: + break; + case ARMII::AddrModeT2_i7: + case ARMII::AddrModeT2_i7s2: + case ARMII::AddrModeT2_i7s4: + case ARMII::AddrModeT2_i8: + case ARMII::AddrModeT2_i8pos: + case ARMII::AddrModeT2_i8neg: + case ARMII::AddrModeT2_i8s4: + case ARMII::AddrModeT2_i12: { + uint32_t Imm = 0; + for (auto Op : MI.operands()) { + if (Op.isImm()) { + Imm = Op.getImm(); + break; + } + } + if (!isLegalAddressImm(MI.getOpcode(), Imm, this)) { + ErrInfo = "Incorrect AddrMode Imm for instruction"; + return false; + } + break; + } + } return true; } diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index fa244786a80d..663eceae6ee7 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1451,9 +1451,9 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV8( // restore FPSCR from stack and clear bits 0-4, 7, 28-31 // The other bits are program global according to the AAPCS if (passesFPReg) { - BuildMI(MBB, MBBI, DL, TII->get(ARM::t2LDRi8), SpareReg) + BuildMI(MBB, MBBI, DL, TII->get(ARM::tLDRspi), SpareReg) .addReg(ARM::SP) - .addImm(0x40) + .addImm(0x10) .add(predOps(ARMCC::AL)); BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg) .addReg(SpareReg) From f4ffcab1787ad19ad9c2596688306de8a45f904a Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jan 2022 12:34:11 -0800 Subject: [PATCH 316/992] Remove redundant string initialization (NFC) Identified by readability-redundant-string-init. --- clang/include/clang/Basic/CodeGenOptions.h | 2 +- clang/include/clang/Tooling/ReplacementsYaml.h | 3 +-- clang/lib/Driver/Driver.cpp | 6 +++--- clang/utils/TableGen/NeonEmitter.cpp | 2 +- .../ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp | 2 +- llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp | 4 ++-- 6 files changed, 9 insertions(+), 10 deletions(-) diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 33ec03a17136..5a5c2689c689 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -307,7 +307,7 @@ class CodeGenOptions : public CodeGenOptionsBase { std::shared_ptr Regex; /// By default, optimization remark is missing. - OptRemark() : Kind(RK_Missing), Pattern(""), Regex(nullptr) {} + OptRemark() : Kind(RK_Missing), Regex(nullptr) {} /// Returns true iff the optimization remark holds a valid regular /// expression. diff --git a/clang/include/clang/Tooling/ReplacementsYaml.h b/clang/include/clang/Tooling/ReplacementsYaml.h index 83e35d623255..838f87fd1978 100644 --- a/clang/include/clang/Tooling/ReplacementsYaml.h +++ b/clang/include/clang/Tooling/ReplacementsYaml.h @@ -30,8 +30,7 @@ template <> struct MappingTraits { /// Helper to (de)serialize a Replacement since we don't have direct /// access to its data members. struct NormalizedReplacement { - NormalizedReplacement(const IO &) - : FilePath(""), Offset(0), Length(0), ReplacementText("") {} + NormalizedReplacement(const IO &) : Offset(0), Length(0) {} NormalizedReplacement(const IO &, const clang::tooling::Replacement &R) : FilePath(R.getFilePath()), Offset(R.getOffset()), diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index a4a53d989851..a73421c0535d 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -174,9 +174,9 @@ Driver::Driver(StringRef ClangExecutable, StringRef TargetTriple, CCPrintHeadersFilename(), CCLogDiagnosticsFilename(), CCCPrintBindings(false), CCPrintOptions(false), CCPrintHeaders(false), CCLogDiagnostics(false), CCGenDiagnostics(false), - CCPrintProcessStats(false), TargetTriple(TargetTriple), - CCCGenericGCCName(""), Saver(Alloc), CheckInputsExist(true), - GenReproducer(false), SuppressMissingInputWarning(false) { + CCPrintProcessStats(false), TargetTriple(TargetTriple), Saver(Alloc), + CheckInputsExist(true), GenReproducer(false), + SuppressMissingInputWarning(false) { // Provide a sane fallback if no VFS is specified. if (!this->VFS) this->VFS = llvm::vfs::getRealFileSystem(); diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index af0544b54b17..c2e19723a091 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -292,7 +292,7 @@ class Variable { std::string N; public: - Variable() : T(Type::getVoid()), N("") {} + Variable() : T(Type::getVoid()) {} Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {} Type getType() const { return T; } diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp index c6cb2be981a7..40a0ea3e97a4 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp @@ -59,7 +59,7 @@ uint32_t AppleObjCTypeEncodingParser::ReadNumber(StringLexer &type) { // "{CGRect=\"origin\"{CGPoint=\"x\"d\"y\"d}\"size\"{CGSize=\"width\"d\"height\"d}}" AppleObjCTypeEncodingParser::StructElement::StructElement() - : name(""), type(clang::QualType()) {} + : type(clang::QualType()) {} AppleObjCTypeEncodingParser::StructElement AppleObjCTypeEncodingParser::ReadStructElement(TypeSystemClang &ast_ctx, diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp index 2b88c481dab0..33db23408cf2 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp @@ -97,8 +97,8 @@ class RuntimeDyldCheckerExprEval { class EvalResult { public: - EvalResult() : Value(0), ErrorMsg("") {} - EvalResult(uint64_t Value) : Value(Value), ErrorMsg("") {} + EvalResult() : Value(0) {} + EvalResult(uint64_t Value) : Value(Value) {} EvalResult(std::string ErrorMsg) : Value(0), ErrorMsg(std::move(ErrorMsg)) {} uint64_t getValue() const { return Value; } From 8b649f98f68c5fe6e39f0d3666bb2dfef4543beb Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jan 2022 13:14:59 -0800 Subject: [PATCH 317/992] [lldb] Add NOLINT(modernize-use-nullptr) thread_result_t is defined as unsigned on Windows. This patch prevents clang-tidy from replacing 0 with nullptr. --- lldb/include/lldb/Host/HostNativeThreadBase.h | 2 +- lldb/source/Host/common/HostNativeThreadBase.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lldb/include/lldb/Host/HostNativeThreadBase.h b/lldb/include/lldb/Host/HostNativeThreadBase.h index bfd70d745593..1c581c279e1e 100644 --- a/lldb/include/lldb/Host/HostNativeThreadBase.h +++ b/lldb/include/lldb/Host/HostNativeThreadBase.h @@ -46,7 +46,7 @@ class HostNativeThreadBase { ThreadCreateTrampoline(lldb::thread_arg_t arg); lldb::thread_t m_thread = LLDB_INVALID_HOST_THREAD; - lldb::thread_result_t m_result = 0; + lldb::thread_result_t m_result = 0; // NOLINT(modernize-use-nullptr) }; } diff --git a/lldb/source/Host/common/HostNativeThreadBase.cpp b/lldb/source/Host/common/HostNativeThreadBase.cpp index b15160b143ca..b8223e3ec42a 100644 --- a/lldb/source/Host/common/HostNativeThreadBase.cpp +++ b/lldb/source/Host/common/HostNativeThreadBase.cpp @@ -18,7 +18,7 @@ using namespace lldb; using namespace lldb_private; HostNativeThreadBase::HostNativeThreadBase(thread_t thread) - : m_thread(thread), m_result(0) {} + : m_thread(thread), m_result(0) {} // NOLINT(modernize-use-nullptr) lldb::thread_t HostNativeThreadBase::GetSystemHandle() const { return m_thread; @@ -34,7 +34,7 @@ bool HostNativeThreadBase::IsJoinable() const { void HostNativeThreadBase::Reset() { m_thread = LLDB_INVALID_HOST_THREAD; - m_result = 0; + m_result = 0; // NOLINT(modernize-use-nullptr) } bool HostNativeThreadBase::EqualsThread(lldb::thread_t thread) const { @@ -44,7 +44,7 @@ bool HostNativeThreadBase::EqualsThread(lldb::thread_t thread) const { lldb::thread_t HostNativeThreadBase::Release() { lldb::thread_t result = m_thread; m_thread = LLDB_INVALID_HOST_THREAD; - m_result = 0; + m_result = 0; // NOLINT(modernize-use-nullptr) return result; } From a978847e3aa4bdd64b242a31bc9037019581d1e3 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sat, 1 Jan 2022 02:16:11 +0000 Subject: [PATCH 318/992] Use const reference for diagnostic in callback (NFC) This isn't a "small" struct, flagged by Coverity. --- mlir/lib/IR/Diagnostics.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/IR/Diagnostics.cpp b/mlir/lib/IR/Diagnostics.cpp index 327264a046c8..2e2a1465ac0f 100644 --- a/mlir/lib/IR/Diagnostics.cpp +++ b/mlir/lib/IR/Diagnostics.cpp @@ -907,7 +907,7 @@ struct ParallelDiagnosticHandlerImpl : public llvm::PrettyStackTraceEntry { return; os << "In-Flight Diagnostics:\n"; - emitDiagnostics([&](Diagnostic diag) { + emitDiagnostics([&](const Diagnostic &diag) { os.indent(4); // Print each diagnostic with the format: From 104a827ea6de0cbe0f5faef4407552ede31d165c Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sat, 1 Jan 2022 21:19:31 +0000 Subject: [PATCH 319/992] Move LinalgDetensorize pass option from .cpp file to the .td declaration (NFC) --- mlir/include/mlir/Dialect/Linalg/Passes.td | 6 ++++++ mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp | 7 ------- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td index 5bcc8cc6e33f..893487df4a35 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -237,6 +237,12 @@ def LinalgDetensorize : Pass<"linalg-detensorize", ""> { run on others. This is because it performs specific legalization of the blocks that make up the body, which it assumes has a FunctionLike trait. }]; + let options = [ + Option<"aggressiveMode", "aggressive-mode", "bool", /*default=*/"false", + "Detensorize all ops that qualify for detensoring along with branch" + " operands and basic-block arguments."> + + ]; } def LinalgStrategyTileAndFusePass diff --git a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp index 9be95a181533..a20da7ffa207 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp @@ -196,8 +196,6 @@ struct ExtractFromReshapeFromElements /// @see LinalgDetensorize in Linalg/Passes.td for more details. struct LinalgDetensorize : public LinalgDetensorizeBase { LinalgDetensorize() = default; - LinalgDetensorize(const LinalgDetensorize &pass) - : LinalgDetensorizeBase() {} class CostModel { public: @@ -600,11 +598,6 @@ struct LinalgDetensorize : public LinalgDetensorizeBase { std::move(canonPatterns)))) signalPassFailure(); } - - Option aggressiveMode{ - *this, "aggressive-mode", - llvm::cl::desc("Detensorize all ops that qualify for detensoring along " - "with branch operands and basic-block arguments.")}; }; } // namespace From fd4808887ee47f3ec8a030e9211169ef4fb094c3 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jan 2022 16:18:18 -0800 Subject: [PATCH 320/992] [llvm] Remove redundant member initialization (NFC) Identified with readability-redundant-member-init. --- llvm/include/llvm/ADT/Triple.h | 4 +--- llvm/include/llvm/Analysis/BasicAliasAnalysis.h | 2 +- llvm/include/llvm/Analysis/DDG.h | 2 +- llvm/include/llvm/Analysis/LazyCallGraph.h | 2 +- llvm/include/llvm/Analysis/MemoryLocation.h | 3 +-- llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h | 2 +- .../llvm/Analysis/ScalarEvolutionAliasAnalysis.h | 2 +- llvm/include/llvm/CodeGen/CodeGenPassBuilder.h | 2 +- .../include/llvm/CodeGen/GlobalISel/CallLowering.h | 2 +- .../llvm/CodeGen/GlobalISel/LegalizerInfo.h | 2 +- llvm/include/llvm/CodeGen/MachinePassManager.h | 6 +++--- .../llvm/CodeGen/SelectionDAGAddressAnalysis.h | 2 +- llvm/include/llvm/DWARFLinker/DWARFLinker.h | 4 ++-- llvm/include/llvm/DebugInfo/GSYM/StringTable.h | 2 +- llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h | 4 ++-- llvm/include/llvm/FileCheck/FileCheck.h | 3 +-- llvm/include/llvm/IR/LegacyPassManagers.h | 3 +-- llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h | 4 +--- llvm/include/llvm/MCA/HardwareUnits/LSUnit.h | 2 +- .../llvm/MCA/HardwareUnits/ResourceManager.h | 4 ++-- llvm/include/llvm/MCA/Stages/EntryStage.h | 2 +- llvm/include/llvm/MCA/Stages/ExecuteStage.h | 2 +- llvm/include/llvm/MCA/Stages/InOrderIssueStage.h | 2 +- llvm/include/llvm/MCA/Stages/InstructionTables.h | 2 +- llvm/include/llvm/MCA/Stages/RetireStage.h | 2 +- .../llvm/ProfileData/Coverage/CoverageMapping.h | 2 +- llvm/include/llvm/Remarks/RemarkSerializer.h | 2 +- llvm/include/llvm/Support/ScopedPrinter.h | 4 ++-- llvm/include/llvm/Transforms/IPO/Attributor.h | 2 +- .../llvm/Transforms/Scalar/LoopPassManager.h | 3 +-- llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp | 2 +- llvm/lib/Analysis/CallGraphSCCPass.cpp | 2 +- llvm/lib/Analysis/DDG.cpp | 2 +- llvm/lib/Analysis/GlobalsModRef.cpp | 4 ++-- llvm/lib/Analysis/IVUsers.cpp | 2 +- llvm/lib/Analysis/LoopCacheAnalysis.cpp | 5 ++--- llvm/lib/Analysis/LoopPass.cpp | 3 +-- llvm/lib/Analysis/RegionPass.cpp | 3 +-- .../lib/CodeGen/GlobalISel/InstructionSelector.cpp | 2 +- llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp | 10 +++++----- llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 3 +-- .../CodeGen/MachineOptimizationRemarkEmitter.cpp | 3 +-- llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp | 3 +-- llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 9 +++------ llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp | 2 +- .../ExecutionEngine/GDBRegistrationListener.cpp | 2 +- llvm/lib/IR/LegacyPassManager.cpp | 14 ++++++-------- llvm/lib/IR/Module.cpp | 3 +-- llvm/lib/InterfaceStub/IFSStub.cpp | 4 ++-- llvm/lib/MC/MCParser/AsmParser.cpp | 2 +- llvm/lib/MC/MCParser/MasmParser.cpp | 2 +- llvm/lib/MCA/Stages/DispatchStage.cpp | 2 +- llvm/lib/MCA/Stages/InOrderIssueStage.cpp | 2 +- llvm/lib/Remarks/BitstreamRemarkSerializer.cpp | 2 +- llvm/lib/Remarks/RemarkStreamer.cpp | 2 +- llvm/lib/Remarks/RemarkStringTable.cpp | 2 +- llvm/lib/Remarks/YAMLRemarkParser.cpp | 2 +- llvm/lib/Support/YAMLParser.cpp | 4 ++-- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 1 - .../lib/Target/AArch64/AArch64TargetObjectFile.cpp | 3 +-- .../Target/AArch64/AsmParser/AArch64AsmParser.cpp | 2 +- .../AArch64/GISel/AArch64InstructionSelector.cpp | 4 ++-- .../AArch64/GISel/AArch64RegisterBankInfo.cpp | 4 ++-- llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h | 2 +- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 3 +-- llvm/lib/Target/AMDGPU/AMDGPULibFunc.h | 2 +- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 4 +--- .../Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 2 +- .../Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp | 3 +-- llvm/lib/Target/ARM/ARMHazardRecognizer.cpp | 2 +- llvm/lib/Target/ARM/ARMHazardRecognizer.h | 2 +- llvm/lib/Target/ARM/ARMInstrInfo.cpp | 3 +-- llvm/lib/Target/ARM/ARMInstructionSelector.cpp | 4 ++-- llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp | 3 +-- llvm/lib/Target/ARM/ARMRegisterInfo.cpp | 2 +- llvm/lib/Target/ARM/ARMTargetObjectFile.h | 3 +-- llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 2 +- llvm/lib/Target/ARM/Thumb1InstrInfo.cpp | 2 +- llvm/lib/Target/ARM/ThumbRegisterInfo.cpp | 2 +- llvm/lib/Target/AVR/AVRSubtarget.cpp | 2 -- llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp | 8 ++++---- llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp | 4 ++-- llvm/lib/Target/BPF/BPFSubtarget.cpp | 2 +- .../Target/Hexagon/AsmParser/HexagonAsmParser.cpp | 6 ++---- llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 2 +- .../Hexagon/MCTargetDesc/HexagonMCChecker.cpp | 5 ++--- llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp | 2 +- llvm/lib/Target/Lanai/LanaiSubtarget.cpp | 2 +- .../Target/MSP430/AsmParser/MSP430AsmParser.cpp | 11 ++++++----- llvm/lib/Target/MSP430/MSP430Subtarget.cpp | 2 +- llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 3 +-- llvm/lib/Target/Mips/Mips16RegisterInfo.cpp | 2 +- llvm/lib/Target/Mips/MipsInstructionSelector.cpp | 4 ++-- llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp | 3 +-- llvm/lib/Target/Mips/MipsSERegisterInfo.cpp | 2 +- llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp | 2 +- llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp | 4 ++-- llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h | 2 +- llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 5 +++-- .../PowerPC/GISel/PPCInstructionSelector.cpp | 3 +-- .../Target/PowerPC/GISel/PPCRegisterBankInfo.cpp | 3 +-- llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp | 4 ++-- llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp | 3 +-- llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp | 3 +-- llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp | 2 +- llvm/lib/Target/Sparc/SparcTargetObjectFile.h | 4 +--- llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 3 +-- llvm/lib/Target/SystemZ/SystemZSubtarget.cpp | 2 +- llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp | 2 +- llvm/lib/Target/VE/VEMachineFunctionInfo.h | 5 ++--- .../Target/WebAssembly/WebAssemblySubtarget.cpp | 5 ++--- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 6 +++--- llvm/lib/Target/X86/X86InstructionSelector.cpp | 4 ++-- llvm/lib/Target/X86/X86RegisterBankInfo.cpp | 3 +-- llvm/lib/Target/XCore/XCoreSubtarget.cpp | 4 ++-- llvm/lib/Transforms/IPO/Inliner.cpp | 3 +-- llvm/lib/Transforms/IPO/PartialInlining.cpp | 3 +-- .../Transforms/Instrumentation/InstrProfiling.cpp | 3 +-- .../Transforms/Scalar/LowerMatrixIntrinsics.cpp | 6 ++---- llvm/lib/Transforms/Vectorize/VPlan.h | 4 ++-- llvm/tools/dsymutil/BinaryHolder.h | 2 +- llvm/tools/dsymutil/Reproducer.cpp | 2 +- llvm/tools/llvm-cov/CoverageSummaryInfo.h | 7 ++----- llvm/tools/llvm-mca/CodeRegion.h | 2 +- llvm/tools/llvm-mca/PipelinePrinter.h | 2 +- llvm/tools/llvm-objcopy/ELF/Object.h | 6 ++---- llvm/tools/llvm-objdump/SourcePrinter.h | 2 +- llvm/tools/llvm-profdata/llvm-profdata.cpp | 7 +++---- llvm/tools/llvm-readobj/llvm-readobj.cpp | 4 ++-- llvm/utils/TableGen/GlobalISel/GIMatchDag.h | 4 +--- llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp | 1 - llvm/utils/TableGen/GlobalISelEmitter.cpp | 4 +--- llvm/utils/TableGen/PredicateExpander.h | 2 +- llvm/utils/TableGen/RegisterBankEmitter.cpp | 2 +- 134 files changed, 184 insertions(+), 244 deletions(-) diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h index 6f1f1618fbc2..45a8b8d92714 100644 --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -271,9 +271,7 @@ class Triple { /// Default constructor is the same as an empty string and leaves all /// triple fields unknown. - Triple() - : Data(), Arch(), SubArch(), Vendor(), OS(), Environment(), - ObjectFormat() {} + Triple() : Arch(), SubArch(), Vendor(), OS(), Environment(), ObjectFormat() {} explicit Triple(const Twine &Str); Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr); diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h index ed9d1ba4c5a7..361765d85257 100644 --- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h @@ -58,7 +58,7 @@ class BasicAAResult : public AAResultBase { BasicAAResult(const DataLayout &DL, const Function &F, const TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree *DT = nullptr, PhiValues *PV = nullptr) - : AAResultBase(), DL(DL), F(F), TLI(TLI), AC(AC), DT(DT), PV(PV) {} + : DL(DL), F(F), TLI(TLI), AC(AC), DT(DT), PV(PV) {} BasicAAResult(const BasicAAResult &Arg) : AAResultBase(Arg), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), diff --git a/llvm/include/llvm/Analysis/DDG.h b/llvm/include/llvm/Analysis/DDG.h index 51dd4a738f00..4ea589ec7efc 100644 --- a/llvm/include/llvm/Analysis/DDG.h +++ b/llvm/include/llvm/Analysis/DDG.h @@ -52,7 +52,7 @@ class DDGNode : public DDGNodeBase { }; DDGNode() = delete; - DDGNode(const NodeKind K) : DDGNodeBase(), Kind(K) {} + DDGNode(const NodeKind K) : Kind(K) {} DDGNode(const DDGNode &N) : DDGNodeBase(N), Kind(N.Kind) {} DDGNode(DDGNode &&N) : DDGNodeBase(std::move(N)), Kind(N.Kind) {} virtual ~DDGNode() = 0; diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h index 0580f4d7b226..5828274cc02b 100644 --- a/llvm/include/llvm/Analysis/LazyCallGraph.h +++ b/llvm/include/llvm/Analysis/LazyCallGraph.h @@ -1190,7 +1190,7 @@ class LazyCallGraph { } }; -inline LazyCallGraph::Edge::Edge() : Value() {} +inline LazyCallGraph::Edge::Edge() {} inline LazyCallGraph::Edge::Edge(Node &N, Kind K) : Value(&N, K) {} inline LazyCallGraph::Edge::operator bool() const { diff --git a/llvm/include/llvm/Analysis/MemoryLocation.h b/llvm/include/llvm/Analysis/MemoryLocation.h index 833fce1b1726..23e50f601e04 100644 --- a/llvm/include/llvm/Analysis/MemoryLocation.h +++ b/llvm/include/llvm/Analysis/MemoryLocation.h @@ -284,8 +284,7 @@ class MemoryLocation { return T.isScalable() ? UnknownSize : T.getFixedSize(); } - MemoryLocation() - : Ptr(nullptr), Size(LocationSize::beforeOrAfterPointer()), AATags() {} + MemoryLocation() : Ptr(nullptr), Size(LocationSize::beforeOrAfterPointer()) {} explicit MemoryLocation(const Value *Ptr, LocationSize Size, const AAMDNodes &AATags = AAMDNodes()) diff --git a/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h b/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h index b4f4e5f29768..d19a6394bd48 100644 --- a/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h @@ -40,7 +40,7 @@ class ObjCARCAAResult : public AAResultBase { const DataLayout &DL; public: - explicit ObjCARCAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {} + explicit ObjCARCAAResult(const DataLayout &DL) : DL(DL) {} ObjCARCAAResult(ObjCARCAAResult &&Arg) : AAResultBase(std::move(Arg)), DL(Arg.DL) {} diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h b/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h index 20acb407ead0..ebd427354cee 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h @@ -27,7 +27,7 @@ class SCEVAAResult : public AAResultBase { ScalarEvolution &SE; public: - explicit SCEVAAResult(ScalarEvolution &SE) : AAResultBase(), SE(SE) {} + explicit SCEVAAResult(ScalarEvolution &SE) : SE(SE) {} SCEVAAResult(SCEVAAResult &&Arg) : AAResultBase(std::move(Arg)), SE(Arg.SE) {} AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h index 1fd07ca2c8d4..f6563971f981 100644 --- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h +++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h @@ -159,7 +159,7 @@ template class CodeGenPassBuilder { class AddIRPass { public: AddIRPass(ModulePassManager &MPM, bool DebugPM, bool Check = true) - : MPM(MPM), FPM() { + : MPM(MPM) { if (Check) AddingFunctionPasses = false; } diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index 9c878d4b087b..82c125993ec3 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -95,7 +95,7 @@ class CallLowering { bool IsFixed = true) : ArgInfo(Regs, OrigValue.getType(), OrigIndex, Flags, IsFixed, &OrigValue) {} - ArgInfo() : BaseArgInfo() {} + ArgInfo() {} }; struct CallLoweringInfo { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index a02b15639946..9507c3411b5c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -556,7 +556,7 @@ class LegalizeRuleSet { } public: - LegalizeRuleSet() : AliasOf(0), IsAliasedByAnother(false), Rules() {} + LegalizeRuleSet() : AliasOf(0), IsAliasedByAnother(false) {} bool isAliasedByAnother() { return IsAliasedByAnother; } void setIsAliasedByAnother() { IsAliasedByAnother = true; } diff --git a/llvm/include/llvm/CodeGen/MachinePassManager.h b/llvm/include/llvm/CodeGen/MachinePassManager.h index f967167c65e1..75b8a89c812e 100644 --- a/llvm/include/llvm/CodeGen/MachinePassManager.h +++ b/llvm/include/llvm/CodeGen/MachinePassManager.h @@ -40,10 +40,10 @@ class MachineFunctionAnalysisManager : public AnalysisManager { public: using Base = AnalysisManager; - MachineFunctionAnalysisManager() : Base(), FAM(nullptr), MAM(nullptr) {} + MachineFunctionAnalysisManager() : FAM(nullptr), MAM(nullptr) {} MachineFunctionAnalysisManager(FunctionAnalysisManager &FAM, ModuleAnalysisManager &MAM) - : Base(), FAM(&FAM), MAM(&MAM) {} + : FAM(&FAM), MAM(&MAM) {} MachineFunctionAnalysisManager(MachineFunctionAnalysisManager &&) = default; MachineFunctionAnalysisManager & operator=(MachineFunctionAnalysisManager &&) = default; @@ -135,7 +135,7 @@ class MachineFunctionPassManager MachineFunctionPassManager(bool DebugLogging = false, bool RequireCodeGenSCCOrder = false, bool VerifyMachineFunction = false) - : Base(), RequireCodeGenSCCOrder(RequireCodeGenSCCOrder), + : RequireCodeGenSCCOrder(RequireCodeGenSCCOrder), VerifyMachineFunction(VerifyMachineFunction) {} MachineFunctionPassManager(MachineFunctionPassManager &&) = default; MachineFunctionPassManager & diff --git a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h index 6a3d76be0ed6..0f3af915da64 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h @@ -39,7 +39,7 @@ class BaseIndexOffset { public: BaseIndexOffset() = default; BaseIndexOffset(SDValue Base, SDValue Index, bool IsIndexSignExt) - : Base(Base), Index(Index), Offset(), IsIndexSignExt(IsIndexSignExt) {} + : Base(Base), Index(Index), IsIndexSignExt(IsIndexSignExt) {} BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, bool IsIndexSignExt) : Base(Base), Index(Index), Offset(Offset), diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/DWARFLinker.h index 1c6d0b1ead86..4f1c666df35f 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinker.h +++ b/llvm/include/llvm/DWARFLinker/DWARFLinker.h @@ -385,8 +385,8 @@ class DWARFLinker { : Die(Die), Type(T), CU(CU), Flags(0), OtherInfo(OtherInfo) {} WorklistItem(unsigned AncestorIdx, CompileUnit &CU, unsigned Flags) - : Die(), Type(WorklistItemType::LookForParentDIEsToKeep), CU(CU), - Flags(Flags), AncestorIdx(AncestorIdx) {} + : Type(WorklistItemType::LookForParentDIEsToKeep), CU(CU), Flags(Flags), + AncestorIdx(AncestorIdx) {} }; /// returns true if we need to translate strings. diff --git a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h index 045c9e3f3ebd..6dd90499c203 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h +++ b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h @@ -20,7 +20,7 @@ namespace gsym { /// string at offset zero. Strings must be UTF8 NULL terminated strings. struct StringTable { StringRef Data; - StringTable() : Data() {} + StringTable() {} StringTable(StringRef D) : Data(D) {} StringRef operator[](size_t Offset) const { return getString(Offset); } StringRef getString(uint32_t Offset) const { diff --git a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h index 4bb11bf62593..779dc885372d 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h @@ -87,7 +87,7 @@ class PlainPrinterBase : public DIPrinter { public: PlainPrinterBase(raw_ostream &OS, raw_ostream &ES, PrinterConfig &Config) - : DIPrinter(), OS(OS), ES(ES), Config(Config) {} + : OS(OS), ES(ES), Config(Config) {} void print(const Request &Request, const DILineInfo &Info) override; void print(const Request &Request, const DIInliningInfo &Info) override; @@ -138,7 +138,7 @@ class JSONPrinter : public DIPrinter { public: JSONPrinter(raw_ostream &OS, PrinterConfig &Config) - : DIPrinter(), OS(OS), Config(Config) {} + : OS(OS), Config(Config) {} void print(const Request &Request, const DILineInfo &Info) override; void print(const Request &Request, const DIInliningInfo &Info) override; diff --git a/llvm/include/llvm/FileCheck/FileCheck.h b/llvm/include/llvm/FileCheck/FileCheck.h index 6ed75e14ccb6..7a6c98db3029 100644 --- a/llvm/include/llvm/FileCheck/FileCheck.h +++ b/llvm/include/llvm/FileCheck/FileCheck.h @@ -80,8 +80,7 @@ class FileCheckType { std::bitset Modifiers; public: - FileCheckType(FileCheckKind Kind = CheckNone) - : Kind(Kind), Count(1), Modifiers() {} + FileCheckType(FileCheckKind Kind = CheckNone) : Kind(Kind), Count(1) {} FileCheckType(const FileCheckType &) = default; FileCheckType &operator=(const FileCheckType &) = default; diff --git a/llvm/include/llvm/IR/LegacyPassManagers.h b/llvm/include/llvm/IR/LegacyPassManagers.h index 0bcb408d4929..e161bdee961a 100644 --- a/llvm/include/llvm/IR/LegacyPassManagers.h +++ b/llvm/include/llvm/IR/LegacyPassManagers.h @@ -460,8 +460,7 @@ class PMDataManager { class FPPassManager : public ModulePass, public PMDataManager { public: static char ID; - explicit FPPassManager() - : ModulePass(ID), PMDataManager() { } + explicit FPPassManager() : ModulePass(ID) {} /// run - Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. diff --git a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h index 908ee30e4060..2f57b85a9232 100644 --- a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h @@ -68,9 +68,7 @@ struct IntelExpr { StringRef OffsetName; unsigned Scale; - IntelExpr() - : NeedBracs(false), Imm(0), BaseReg(StringRef()), IndexReg(StringRef()), - OffsetName(StringRef()), Scale(1) {} + IntelExpr() : NeedBracs(false), Imm(0), Scale(1) {} // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression] IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale, StringRef offsetName, int64_t imm, bool needBracs) diff --git a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h index 7eddd067aa0c..c05f770df8eb 100644 --- a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h +++ b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h @@ -55,7 +55,7 @@ class MemoryGroup { MemoryGroup() : NumPredecessors(0), NumExecutingPredecessors(0), NumExecutedPredecessors(0), NumInstructions(0), NumExecuting(0), - NumExecuted(0), CriticalPredecessor(), CriticalMemoryInstruction() {} + NumExecuted(0), CriticalPredecessor() {} MemoryGroup(MemoryGroup &&) = default; size_t getNumSuccessors() const { diff --git a/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h b/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h index b679b0d7d537..7467fd6754f0 100644 --- a/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h +++ b/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h @@ -118,8 +118,8 @@ class DefaultResourceStrategy final : public ResourceStrategy { public: DefaultResourceStrategy(uint64_t UnitMask) - : ResourceStrategy(), ResourceUnitMask(UnitMask), - NextInSequenceMask(UnitMask), RemovedFromNextInSequence(0) {} + : ResourceUnitMask(UnitMask), NextInSequenceMask(UnitMask), + RemovedFromNextInSequence(0) {} virtual ~DefaultResourceStrategy() = default; uint64_t select(uint64_t ReadyMask) override; diff --git a/llvm/include/llvm/MCA/Stages/EntryStage.h b/llvm/include/llvm/MCA/Stages/EntryStage.h index 1c133898d603..4c50838bef4b 100644 --- a/llvm/include/llvm/MCA/Stages/EntryStage.h +++ b/llvm/include/llvm/MCA/Stages/EntryStage.h @@ -36,7 +36,7 @@ class EntryStage final : public Stage { EntryStage &operator=(const EntryStage &Other) = delete; public: - EntryStage(SourceMgr &SM) : CurrentInstruction(), SM(SM), NumRetired(0) { } + EntryStage(SourceMgr &SM) : SM(SM), NumRetired(0) {} bool isAvailable(const InstRef &IR) const override; bool hasWorkToComplete() const override; diff --git a/llvm/include/llvm/MCA/Stages/ExecuteStage.h b/llvm/include/llvm/MCA/Stages/ExecuteStage.h index 4c09ca8255ff..03a78a8b6b85 100644 --- a/llvm/include/llvm/MCA/Stages/ExecuteStage.h +++ b/llvm/include/llvm/MCA/Stages/ExecuteStage.h @@ -49,7 +49,7 @@ class ExecuteStage final : public Stage { public: ExecuteStage(Scheduler &S) : ExecuteStage(S, false) {} ExecuteStage(Scheduler &S, bool ShouldPerformBottleneckAnalysis) - : Stage(), HWS(S), NumDispatchedOpcodes(0), NumIssuedOpcodes(0), + : HWS(S), NumDispatchedOpcodes(0), NumIssuedOpcodes(0), EnablePressureEvents(ShouldPerformBottleneckAnalysis) {} // This stage works under the assumption that the Pipeline will eventually diff --git a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h index 42f386a13d85..40bc3b5aed94 100644 --- a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h +++ b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h @@ -38,7 +38,7 @@ struct StallInfo { unsigned CyclesLeft; StallKind Kind; - StallInfo() : IR(), CyclesLeft(), Kind(StallKind::DEFAULT) {} + StallInfo() : CyclesLeft(), Kind(StallKind::DEFAULT) {} StallKind getStallKind() const { return Kind; } unsigned getCyclesLeft() const { return CyclesLeft; } diff --git a/llvm/include/llvm/MCA/Stages/InstructionTables.h b/llvm/include/llvm/MCA/Stages/InstructionTables.h index 35b21b0ba94d..9617fd49db6e 100644 --- a/llvm/include/llvm/MCA/Stages/InstructionTables.h +++ b/llvm/include/llvm/MCA/Stages/InstructionTables.h @@ -32,7 +32,7 @@ class InstructionTables final : public Stage { public: InstructionTables(const MCSchedModel &Model) - : Stage(), SM(Model), Masks(Model.getNumProcResourceKinds()) { + : SM(Model), Masks(Model.getNumProcResourceKinds()) { computeProcResourceMasks(Model, Masks); } diff --git a/llvm/include/llvm/MCA/Stages/RetireStage.h b/llvm/include/llvm/MCA/Stages/RetireStage.h index b635a01db85e..aafe2815df15 100644 --- a/llvm/include/llvm/MCA/Stages/RetireStage.h +++ b/llvm/include/llvm/MCA/Stages/RetireStage.h @@ -36,7 +36,7 @@ class RetireStage final : public Stage { public: RetireStage(RetireControlUnit &R, RegisterFile &F, LSUnitBase &LS) - : Stage(), RCU(R), PRF(F), LSU(LS) {} + : RCU(R), PRF(F), LSU(LS) {} bool hasWorkToComplete() const override { return !RCU.isEmpty(); } Error cycleStart() override; diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index d3a5d44ce8dd..e1f45019b1a9 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -702,7 +702,7 @@ class LineCoverageIterator LineCoverageIterator(const CoverageData &CD, unsigned Line) : CD(CD), WrappedSegment(nullptr), Next(CD.begin()), Ended(false), - Line(Line), Segments(), Stats() { + Line(Line) { this->operator++(); } diff --git a/llvm/include/llvm/Remarks/RemarkSerializer.h b/llvm/include/llvm/Remarks/RemarkSerializer.h index 97fd224ea082..90e556df87e7 100644 --- a/llvm/include/llvm/Remarks/RemarkSerializer.h +++ b/llvm/include/llvm/Remarks/RemarkSerializer.h @@ -48,7 +48,7 @@ struct RemarkSerializer { RemarkSerializer(Format SerializerFormat, raw_ostream &OS, SerializerMode Mode) - : SerializerFormat(SerializerFormat), OS(OS), Mode(Mode), StrTab() {} + : SerializerFormat(SerializerFormat), OS(OS), Mode(Mode) {} /// This is just an interface. virtual ~RemarkSerializer() = default; diff --git a/llvm/include/llvm/Support/ScopedPrinter.h b/llvm/include/llvm/Support/ScopedPrinter.h index 865337e3cc7f..803ae47793df 100644 --- a/llvm/include/llvm/Support/ScopedPrinter.h +++ b/llvm/include/llvm/Support/ScopedPrinter.h @@ -799,7 +799,7 @@ struct DelimitedScope { }; struct DictScope : DelimitedScope { - explicit DictScope() : DelimitedScope() {} + explicit DictScope() {} explicit DictScope(ScopedPrinter &W) : DelimitedScope(W) { W.objectBegin(); } DictScope(ScopedPrinter &W, StringRef N) : DelimitedScope(W) { @@ -818,7 +818,7 @@ struct DictScope : DelimitedScope { }; struct ListScope : DelimitedScope { - explicit ListScope() : DelimitedScope() {} + explicit ListScope() {} explicit ListScope(ScopedPrinter &W) : DelimitedScope(W) { W.arrayBegin(); } ListScope(ScopedPrinter &W, StringRef N) : DelimitedScope(W) { diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 1a9dde03aabc..233f1be04f56 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -2365,7 +2365,7 @@ struct BooleanState : public IntegerStateBase { using super = IntegerStateBase; using base_t = IntegerStateBase::base_t; - BooleanState() : super() {} + BooleanState() {} BooleanState(base_t Assumed) : super(Assumed) {} /// Set the assumed value to \p Value but never below the known one. diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h index 419729271a23..7ba9d65cae55 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h @@ -435,8 +435,7 @@ class FunctionToLoopPassAdaptor bool UseBlockFrequencyInfo = false, bool UseBranchProbabilityInfo = false, bool LoopNestMode = false) - : Pass(std::move(Pass)), LoopCanonicalizationFPM(), - UseMemorySSA(UseMemorySSA), + : Pass(std::move(Pass)), UseMemorySSA(UseMemorySSA), UseBlockFrequencyInfo(UseBlockFrequencyInfo), UseBranchProbabilityInfo(UseBranchProbabilityInfo), LoopNestMode(LoopNestMode) { diff --git a/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp index 9467bb3c9b2d..090dccc53b6e 100644 --- a/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp +++ b/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp @@ -63,7 +63,7 @@ using namespace llvm::cflaa; CFLSteensAAResult::CFLSteensAAResult( std::function GetTLI) - : AAResultBase(), GetTLI(std::move(GetTLI)) {} + : GetTLI(std::move(GetTLI)) {} CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg) : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {} CFLSteensAAResult::~CFLSteensAAResult() = default; diff --git a/llvm/lib/Analysis/CallGraphSCCPass.cpp b/llvm/lib/Analysis/CallGraphSCCPass.cpp index f2e5eab72bf2..930cb13c0cb3 100644 --- a/llvm/lib/Analysis/CallGraphSCCPass.cpp +++ b/llvm/lib/Analysis/CallGraphSCCPass.cpp @@ -61,7 +61,7 @@ class CGPassManager : public ModulePass, public PMDataManager { public: static char ID; - explicit CGPassManager() : ModulePass(ID), PMDataManager() {} + explicit CGPassManager() : ModulePass(ID) {} /// Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. diff --git a/llvm/lib/Analysis/DDG.cpp b/llvm/lib/Analysis/DDG.cpp index da5de75a038c..7e1357959a3f 100644 --- a/llvm/lib/Analysis/DDG.cpp +++ b/llvm/lib/Analysis/DDG.cpp @@ -106,7 +106,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode &N) { //===--------------------------------------------------------------------===// SimpleDDGNode::SimpleDDGNode(Instruction &I) - : DDGNode(NodeKind::SingleInstruction), InstList() { + : DDGNode(NodeKind::SingleInstruction) { assert(InstList.empty() && "Expected empty list."); InstList.push_back(&I); } diff --git a/llvm/lib/Analysis/GlobalsModRef.cpp b/llvm/lib/Analysis/GlobalsModRef.cpp index d00a7c944f10..53262d88ba51 100644 --- a/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/llvm/lib/Analysis/GlobalsModRef.cpp @@ -102,7 +102,7 @@ class GlobalsAAResult::FunctionInfo { "Insufficient low bits to store our flag and ModRef info."); public: - FunctionInfo() : Info() {} + FunctionInfo() {} ~FunctionInfo() { delete Info.getPointer(); } @@ -963,7 +963,7 @@ ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call, GlobalsAAResult::GlobalsAAResult( const DataLayout &DL, std::function GetTLI) - : AAResultBase(), DL(DL), GetTLI(std::move(GetTLI)) {} + : DL(DL), GetTLI(std::move(GetTLI)) {} GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg) : AAResultBase(std::move(Arg)), DL(Arg.DL), GetTLI(std::move(Arg.GetTLI)), diff --git a/llvm/lib/Analysis/IVUsers.cpp b/llvm/lib/Analysis/IVUsers.cpp index d7b202f83189..0f3929f45506 100644 --- a/llvm/lib/Analysis/IVUsers.cpp +++ b/llvm/lib/Analysis/IVUsers.cpp @@ -254,7 +254,7 @@ IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) { IVUsers::IVUsers(Loop *L, AssumptionCache *AC, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE) - : L(L), AC(AC), LI(LI), DT(DT), SE(SE), IVUses() { + : L(L), AC(AC), LI(LI), DT(DT), SE(SE) { // Collect ephemeral values so that AddUsersIfInteresting skips them. EphValues.clear(); CodeMetrics::collectEphemeralValues(L, AC, EphValues); diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp index 7b895d8a5dc2..ba014bd08c98 100644 --- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp +++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp @@ -477,9 +477,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) { CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, - AAResults &AA, DependenceInfo &DI, - Optional TRT) - : Loops(Loops), TripCounts(), LoopCosts(), + AAResults &AA, DependenceInfo &DI, Optional TRT) + : Loops(Loops), TRT((TRT == None) ? Optional(TemporalReuseThreshold) : TRT), LI(LI), SE(SE), TTI(TTI), AA(AA), DI(DI) { assert(!Loops.empty() && "Expecting a non-empty loop vector."); diff --git a/llvm/lib/Analysis/LoopPass.cpp b/llvm/lib/Analysis/LoopPass.cpp index 9e470e998e67..b720bab454e9 100644 --- a/llvm/lib/Analysis/LoopPass.cpp +++ b/llvm/lib/Analysis/LoopPass.cpp @@ -69,8 +69,7 @@ char PrintLoopPassWrapper::ID = 0; char LPPassManager::ID = 0; -LPPassManager::LPPassManager() - : FunctionPass(ID), PMDataManager() { +LPPassManager::LPPassManager() : FunctionPass(ID) { LI = nullptr; CurrentLoop = nullptr; } diff --git a/llvm/lib/Analysis/RegionPass.cpp b/llvm/lib/Analysis/RegionPass.cpp index c20ecff5f912..10c8569096c6 100644 --- a/llvm/lib/Analysis/RegionPass.cpp +++ b/llvm/lib/Analysis/RegionPass.cpp @@ -30,8 +30,7 @@ using namespace llvm; char RGPassManager::ID = 0; -RGPassManager::RGPassManager() - : FunctionPass(ID), PMDataManager() { +RGPassManager::RGPassManager() : FunctionPass(ID) { RI = nullptr; CurrentRegion = nullptr; } diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp index dc5a4d8f85aa..1d0c106fd5db 100644 --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -29,7 +29,7 @@ using namespace llvm; InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers) - : Renderers(MaxRenderers), MIs() {} + : Renderers(MaxRenderers) {} InstructionSelector::InstructionSelector() = default; diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index 7e43a0cbbe73..2ee9379cb286 100644 --- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -185,7 +185,7 @@ class Polynomial { APInt A; public: - Polynomial(Value *V) : ErrorMSBs((unsigned)-1), V(V), B(), A() { + Polynomial(Value *V) : ErrorMSBs((unsigned)-1), V(V) { IntegerType *Ty = dyn_cast(V->getType()); if (Ty) { ErrorMSBs = 0; @@ -195,12 +195,12 @@ class Polynomial { } Polynomial(const APInt &A, unsigned ErrorMSBs = 0) - : ErrorMSBs(ErrorMSBs), V(nullptr), B(), A(A) {} + : ErrorMSBs(ErrorMSBs), V(nullptr), A(A) {} Polynomial(unsigned BitWidth, uint64_t A, unsigned ErrorMSBs = 0) - : ErrorMSBs(ErrorMSBs), V(nullptr), B(), A(BitWidth, A) {} + : ErrorMSBs(ErrorMSBs), V(nullptr), A(BitWidth, A) {} - Polynomial() : ErrorMSBs((unsigned)-1), V(nullptr), B(), A() {} + Polynomial() : ErrorMSBs((unsigned)-1), V(nullptr) {} /// Increment and clamp the number of undefined bits. void incErrorMSBs(unsigned amt) { @@ -677,7 +677,7 @@ struct VectorInfo { FixedVectorType *const VTy; VectorInfo(FixedVectorType *VTy) - : BB(nullptr), PV(nullptr), LIs(), Is(), SVI(nullptr), VTy(VTy) { + : BB(nullptr), PV(nullptr), SVI(nullptr), VTy(VTy) { EI = new ElementInfo[VTy->getNumElements()]; } diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index d0323eaf3d78..f144639770bc 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -182,8 +182,7 @@ static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) { MIRParserImpl::MIRParserImpl(std::unique_ptr Contents, StringRef Filename, LLVMContext &Context, std::function Callback) - : SM(), - Context(Context), + : Context(Context), In(SM.getMemoryBuffer(SM.AddNewSourceBuffer(std::move(Contents), SMLoc())) ->getBuffer(), nullptr, handleYAMLDiag, this), diff --git a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index 59fc23983d3d..5347a7b0d890 100644 --- a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -22,8 +22,7 @@ using namespace llvm; DiagnosticInfoMIROptimization::MachineArgument::MachineArgument( - StringRef MKey, const MachineInstr &MI) - : Argument() { + StringRef MKey, const MachineInstr &MI) { Key = std::string(MKey); raw_string_ostream OS(Val); diff --git a/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp index 6e05de888cc0..a61a2b2728fa 100644 --- a/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -30,8 +30,7 @@ using namespace llvm; ScoreboardHazardRecognizer::ScoreboardHazardRecognizer( const InstrItineraryData *II, const ScheduleDAG *SchedDAG, const char *ParentDebugType) - : ScheduleHazardRecognizer(), DebugType(ParentDebugType), ItinData(II), - DAG(SchedDAG) { + : DebugType(ParentDebugType), ItinData(II), DAG(SchedDAG) { (void)DebugType; // Determine the maximum depth of any itinerary. This determines the depth of // the scoreboard. We always make the scoreboard at least 1 cycle deep to diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index d1c2cdeb133b..697d9df54779 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -108,8 +108,7 @@ static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags, // ELF //===----------------------------------------------------------------------===// -TargetLoweringObjectFileELF::TargetLoweringObjectFileELF() - : TargetLoweringObjectFile() { +TargetLoweringObjectFileELF::TargetLoweringObjectFileELF() { SupportDSOLocalEquivalentLowering = true; } @@ -1139,8 +1138,7 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { // MachO //===----------------------------------------------------------------------===// -TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO() - : TargetLoweringObjectFile() { +TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO() { SupportIndirectSymViaGOTPCRel = true; } @@ -2543,8 +2541,7 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry( //===----------------------------------------------------------------------===// // GOFF //===----------------------------------------------------------------------===// -TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF() - : TargetLoweringObjectFile() {} +TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF() {} MCSection *TargetLoweringObjectFileGOFF::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp index ac217df1ee48..2524e10cb6c5 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp @@ -23,7 +23,7 @@ using namespace llvm::pdb; NativeEnumTypes::NativeEnumTypes(NativeSession &PDBSession, LazyRandomTypeCollection &Types, std::vector Kinds) - : Matches(), Index(0), Session(PDBSession) { + : Index(0), Session(PDBSession) { Optional TI = Types.getFirst(); while (TI) { CVType CVT = Types.getType(*TI); diff --git a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp index e15bce0d6c4b..1fb37ce7c57c 100644 --- a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp +++ b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp @@ -96,7 +96,7 @@ class GDBJITRegistrationListener : public JITEventListener { public: /// Instantiates the JIT service. - GDBJITRegistrationListener() : ObjectBufferMap() {} + GDBJITRegistrationListener() {} /// Unregisters each object that was previously registered and releases all /// internal resources. diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp index bb72bec93066..4357c95aa9f6 100644 --- a/llvm/lib/IR/LegacyPassManager.cpp +++ b/llvm/lib/IR/LegacyPassManager.cpp @@ -256,9 +256,9 @@ class FunctionPassManagerImpl : public Pass, bool wasRun; public: static char ID; - explicit FunctionPassManagerImpl() : - Pass(PT_PassManager, ID), PMDataManager(), - PMTopLevelManager(new FPPassManager()), wasRun(false) {} + explicit FunctionPassManagerImpl() + : Pass(PT_PassManager, ID), PMTopLevelManager(new FPPassManager()), + wasRun(false) {} /// \copydoc FunctionPassManager::add() void add(Pass *P) { @@ -387,8 +387,7 @@ namespace { class MPPassManager : public Pass, public PMDataManager { public: static char ID; - explicit MPPassManager() : - Pass(PT_PassManager, ID), PMDataManager() { } + explicit MPPassManager() : Pass(PT_PassManager, ID) {} // Delete on the fly managers. ~MPPassManager() override { @@ -478,9 +477,8 @@ class PassManagerImpl : public Pass, public: static char ID; - explicit PassManagerImpl() : - Pass(PT_PassManager, ID), PMDataManager(), - PMTopLevelManager(new MPPassManager()) {} + explicit PassManagerImpl() + : Pass(PT_PassManager, ID), PMTopLevelManager(new MPPassManager()) {} /// \copydoc PassManager::add() void add(Pass *P) { diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp index a0485a59d0e0..b3b4b8a80a1c 100644 --- a/llvm/lib/IR/Module.cpp +++ b/llvm/lib/IR/Module.cpp @@ -73,8 +73,7 @@ template class llvm::SymbolTableListTraits; Module::Module(StringRef MID, LLVMContext &C) : Context(C), ValSymTab(std::make_unique(-1)), - Materializer(), ModuleID(std::string(MID)), - SourceFileName(std::string(MID)), DL("") { + ModuleID(std::string(MID)), SourceFileName(std::string(MID)), DL("") { Context.addModule(this); } diff --git a/llvm/lib/InterfaceStub/IFSStub.cpp b/llvm/lib/InterfaceStub/IFSStub.cpp index 008263f8db9f..bbc91ada1ded 100644 --- a/llvm/lib/InterfaceStub/IFSStub.cpp +++ b/llvm/lib/InterfaceStub/IFSStub.cpp @@ -29,7 +29,7 @@ IFSStub::IFSStub(IFSStub &&Stub) { Symbols = std::move(Stub.Symbols); } -IFSStubTriple::IFSStubTriple(IFSStubTriple const &Stub) : IFSStub() { +IFSStubTriple::IFSStubTriple(IFSStubTriple const &Stub) { IfsVersion = Stub.IfsVersion; Target = Stub.Target; SoName = Stub.SoName; @@ -37,7 +37,7 @@ IFSStubTriple::IFSStubTriple(IFSStubTriple const &Stub) : IFSStub() { Symbols = Stub.Symbols; } -IFSStubTriple::IFSStubTriple(IFSStub const &Stub) : IFSStub() { +IFSStubTriple::IFSStubTriple(IFSStub const &Stub) { IfsVersion = Stub.IfsVersion; Target = Stub.Target; SoName = Stub.SoName; diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 705f7159d55b..5c94174aa161 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -159,7 +159,7 @@ class AsmParser : public MCAsmParser { int64_t LineNumber; SMLoc Loc; unsigned Buf; - CppHashInfoTy() : Filename(), LineNumber(0), Loc(), Buf(0) {} + CppHashInfoTy() : LineNumber(0), Buf(0) {} }; CppHashInfoTy CppHashInfo; diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index f1704cef46ac..e2dfd339e93e 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -424,7 +424,7 @@ class MasmParser : public MCAsmParser { int64_t LineNumber; SMLoc Loc; unsigned Buf; - CppHashInfoTy() : Filename(), LineNumber(0), Loc(), Buf(0) {} + CppHashInfoTy() : LineNumber(0), Buf(0) {} }; CppHashInfoTy CppHashInfo; diff --git a/llvm/lib/MCA/Stages/DispatchStage.cpp b/llvm/lib/MCA/Stages/DispatchStage.cpp index 5385142698e6..66228bd5a862 100644 --- a/llvm/lib/MCA/Stages/DispatchStage.cpp +++ b/llvm/lib/MCA/Stages/DispatchStage.cpp @@ -30,7 +30,7 @@ DispatchStage::DispatchStage(const MCSubtargetInfo &Subtarget, unsigned MaxDispatchWidth, RetireControlUnit &R, RegisterFile &F) : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth), - CarryOver(0U), CarriedOver(), STI(Subtarget), RCU(R), PRF(F) { + CarryOver(0U), STI(Subtarget), RCU(R), PRF(F) { if (!DispatchWidth) DispatchWidth = Subtarget.getSchedModel().IssueWidth; } diff --git a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp index fa5c0fc66b9e..abfbc80f17c9 100644 --- a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp +++ b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp @@ -47,7 +47,7 @@ InOrderIssueStage::InOrderIssueStage(const MCSubtargetInfo &STI, RegisterFile &PRF, CustomBehaviour &CB, LSUnit &LSU) : STI(STI), PRF(PRF), RM(STI.getSchedModel()), CB(CB), LSU(LSU), - NumIssued(), SI(), CarryOver(), Bandwidth(), LastWriteBackCycle() {} + NumIssued(), CarryOver(), Bandwidth(), LastWriteBackCycle() {} unsigned InOrderIssueStage::getIssueWidth() const { return STI.getSchedModel().IssueWidth; diff --git a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp index 36ba93564771..0810bf531db8 100644 --- a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp +++ b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp @@ -18,7 +18,7 @@ using namespace llvm::remarks; BitstreamRemarkSerializerHelper::BitstreamRemarkSerializerHelper( BitstreamRemarkContainerType ContainerType) - : Encoded(), R(), Bitstream(Encoded), ContainerType(ContainerType) {} + : Bitstream(Encoded), ContainerType(ContainerType) {} static void push(SmallVectorImpl &R, StringRef Str) { append_range(R, Str); diff --git a/llvm/lib/Remarks/RemarkStreamer.cpp b/llvm/lib/Remarks/RemarkStreamer.cpp index 2f00b8e73670..543b00723659 100644 --- a/llvm/lib/Remarks/RemarkStreamer.cpp +++ b/llvm/lib/Remarks/RemarkStreamer.cpp @@ -26,7 +26,7 @@ static cl::opt EnableRemarksSection( RemarkStreamer::RemarkStreamer( std::unique_ptr RemarkSerializer, Optional FilenameIn) - : PassFilter(), RemarkSerializer(std::move(RemarkSerializer)), + : RemarkSerializer(std::move(RemarkSerializer)), Filename(FilenameIn ? Optional(FilenameIn->str()) : None) {} Error RemarkStreamer::setFilter(StringRef Filter) { diff --git a/llvm/lib/Remarks/RemarkStringTable.cpp b/llvm/lib/Remarks/RemarkStringTable.cpp index 5f462f01bb9a..03d93baba038 100644 --- a/llvm/lib/Remarks/RemarkStringTable.cpp +++ b/llvm/lib/Remarks/RemarkStringTable.cpp @@ -20,7 +20,7 @@ using namespace llvm; using namespace llvm::remarks; -StringTable::StringTable(const ParsedStringTable &Other) : StrTab() { +StringTable::StringTable(const ParsedStringTable &Other) { for (unsigned i = 0, e = Other.size(); i < e; ++i) if (Expected MaybeStr = Other[i]) add(*MaybeStr); diff --git a/llvm/lib/Remarks/YAMLRemarkParser.cpp b/llvm/lib/Remarks/YAMLRemarkParser.cpp index 3d9996c931ae..a32629c9f557 100644 --- a/llvm/lib/Remarks/YAMLRemarkParser.cpp +++ b/llvm/lib/Remarks/YAMLRemarkParser.cpp @@ -171,7 +171,7 @@ YAMLRemarkParser::YAMLRemarkParser(StringRef Buf) YAMLRemarkParser::YAMLRemarkParser(StringRef Buf, Optional StrTab) - : RemarkParser{Format::YAML}, StrTab(std::move(StrTab)), LastErrorMessage(), + : RemarkParser{Format::YAML}, StrTab(std::move(StrTab)), SM(setupSM(LastErrorMessage)), Stream(Buf, SM), YAMLIt(Stream.begin()) {} Error YAMLRemarkParser::error(StringRef Message, yaml::Node &Node) { diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp index 2adf37a511d1..0ba019b3c46a 100644 --- a/llvm/lib/Support/YAMLParser.cpp +++ b/llvm/lib/Support/YAMLParser.cpp @@ -1841,11 +1841,11 @@ bool Scanner::fetchMoreTokens() { Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors, std::error_code *EC) - : scanner(new Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {} + : scanner(new Scanner(Input, SM, ShowColors, EC)) {} Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors, std::error_code *EC) - : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {} + : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)) {} Stream::~Stream() = default; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index f7d3dd0bc222..672739f25599 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -228,7 +228,6 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU, IsLittle(LittleEndian), MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride), MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT), - FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)), TSInfo(), TLInfo(TM, *this) { if (AArch64::isX18ReservedByDefault(TT)) diff --git a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp index dfc66f0cb4c1..7ed934cfabc0 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -25,8 +25,7 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx, SupportDebugThreadLocalLocation = false; } -AArch64_MachoTargetObjectFile::AArch64_MachoTargetObjectFile() - : TargetLoweringObjectFileMachO() { +AArch64_MachoTargetObjectFile::AArch64_MachoTargetObjectFile() { SupportGOTPCRelWithOffset = false; } diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 03ef327e93c8..4f8f8078b69d 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -491,7 +491,7 @@ class AArch64Operand : public MCParsedAsmOperand { public: AArch64Operand(KindTy K, MCContext &Ctx) : Kind(K), Ctx(Ctx) {} - AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) { + AArch64Operand(const AArch64Operand &o) : Ctx(o.Ctx) { Kind = o.Kind; StartLoc = o.StartLoc; EndLoc = o.EndLoc; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 3d9a626d3ac3..ea8a7c7b83da 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -472,8 +472,8 @@ class AArch64InstructionSelector : public InstructionSelector { AArch64InstructionSelector::AArch64InstructionSelector( const AArch64TargetMachine &TM, const AArch64Subtarget &STI, const AArch64RegisterBankInfo &RBI) - : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI), + : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), + RBI(RBI), #define GET_GLOBALISEL_PREDICATES_INIT #include "AArch64GenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATES_INIT diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 515a5c63a559..92d22881f328 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -42,8 +42,8 @@ using namespace llvm; -AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI) - : AArch64GenRegisterBankInfo() { +AArch64RegisterBankInfo::AArch64RegisterBankInfo( + const TargetRegisterInfo &TRI) { static llvm::once_flag InitializeRegisterBankFlag; static auto InitializeRegisterBankOnce = [&]() { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h index 22be014813b0..5ba9b2cd187e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h @@ -26,7 +26,7 @@ class AMDGPUAAResult : public AAResultBase { const DataLayout &DL; public: - explicit AMDGPUAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {} + explicit AMDGPUAAResult(const DataLayout &DL) : DL(DL) {} AMDGPUAAResult(AMDGPUAAResult &&Arg) : AAResultBase(std::move(Arg)), DL(Arg.DL) {} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index e16bead81b65..e5c5d36d1d4f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -46,8 +46,7 @@ static cl::opt AllowRiskySelect( AMDGPUInstructionSelector::AMDGPUInstructionSelector( const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM) - : InstructionSelector(), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), + : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), STI(STI), EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG), #define GET_GLOBALISEL_PREDICATES_INIT diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h index c97223b047e8..fb6a64b75c20 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h @@ -356,7 +356,7 @@ class AMDGPULibFuncImpl : public AMDGPULibFuncBase { /// Wrapper class for AMDGPULIbFuncImpl class AMDGPULibFunc : public AMDGPULibFuncBase { public: - explicit AMDGPULibFunc() : Impl(std::unique_ptr()) {} + explicit AMDGPULibFunc() {} AMDGPULibFunc(const AMDGPULibFunc &F); /// Clone a mangled library func with the Id \p Id and argument info from \p /// CopyFrom. diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index c60012bcfe2e..ab463ce8940d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -193,9 +193,7 @@ class ApplyRegBankMapping final : public GISelChangeObserver { } AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST) - : AMDGPUGenRegisterBankInfo(), - Subtarget(ST), - TRI(Subtarget.getRegisterInfo()), + : Subtarget(ST), TRI(Subtarget.getRegisterInfo()), TII(Subtarget.getInstrInfo()) { // HACK: Until this is fully tablegen'd. diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 2bb59086f391..c71205b17a1a 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -62,7 +62,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { public: AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) - : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} + : Kind(Kind_), AsmParser(AsmParser_) {} using Ptr = std::unique_ptr; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 7708579a4491..ded3fb7ab8d9 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -15,8 +15,7 @@ using namespace llvm; AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT, - const MCTargetOptions &Options) - : MCAsmInfoELF() { + const MCTargetOptions &Options) { CodePointerSize = (TT.getArch() == Triple::amdgcn) ? 8 : 4; StackGrowsUp = true; HasSingleParameterDotFile = false; diff --git a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp index f083fa6662e9..0d201a67af46 100644 --- a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -164,7 +164,7 @@ static bool getBaseOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, ARMBankConflictHazardRecognizer::ARMBankConflictHazardRecognizer( const ScheduleDAG *DAG, int64_t CPUBankMask, bool CPUAssumeITCMConflict) - : ScheduleHazardRecognizer(), MF(DAG->MF), DL(DAG->MF.getDataLayout()), + : MF(DAG->MF), DL(DAG->MF.getDataLayout()), DataMask(DataBankMask.getNumOccurrences() ? int64_t(DataBankMask) : CPUBankMask), AssumeITCMBankConflict(AssumeITCMConflict.getNumOccurrences() diff --git a/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/llvm/lib/Target/ARM/ARMHazardRecognizer.h index c1f1bcd0a629..66a1477e5e08 100644 --- a/llvm/lib/Target/ARM/ARMHazardRecognizer.h +++ b/llvm/lib/Target/ARM/ARMHazardRecognizer.h @@ -34,7 +34,7 @@ class ARMHazardRecognizerFPMLx : public ScheduleHazardRecognizer { unsigned FpMLxStalls = 0; public: - ARMHazardRecognizerFPMLx() : ScheduleHazardRecognizer() { MaxLookAhead = 1; } + ARMHazardRecognizerFPMLx() { MaxLookAhead = 1; } HazardType getHazardType(SUnit *SU, int Stalls) override; void Reset() override; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/llvm/lib/Target/ARM/ARMInstrInfo.cpp index 5dee5e04af81..00db13f2eb52 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMInstrInfo.cpp @@ -28,8 +28,7 @@ #include "llvm/MC/MCInst.h" using namespace llvm; -ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI() {} +ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI) {} /// Return the noop instruction to use for a noop. MCInst ARMInstrInfo::getNop() const { diff --git a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp index 8be4e3f160e3..188b5562cac9 100644 --- a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp @@ -171,8 +171,8 @@ createARMInstructionSelector(const ARMBaseTargetMachine &TM, ARMInstructionSelector::ARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI, const ARMRegisterBankInfo &RBI) - : InstructionSelector(), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), TM(TM), RBI(RBI), STI(STI), Opcodes(STI), + : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), TM(TM), RBI(RBI), + STI(STI), Opcodes(STI), #define GET_GLOBALISEL_PREDICATES_INIT #include "ARMGenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATES_INIT diff --git a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp index 1a7f10a13ed3..2523752a717e 100644 --- a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -129,8 +129,7 @@ static void checkValueMappings() { } // end namespace arm } // end namespace llvm -ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI) - : ARMGenRegisterBankInfo() { +ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI) { // We have only one set of register banks, whatever the subtarget // is. Therefore, the initialization of the RegBanks table should be // done only once. Indeed the table of all register banks diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterInfo.cpp index 6649750bb388..ff4647dd46fd 100644 --- a/llvm/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMRegisterInfo.cpp @@ -15,4 +15,4 @@ using namespace llvm; void ARMRegisterInfo::anchor() { } -ARMRegisterInfo::ARMRegisterInfo() : ARMBaseRegisterInfo() {} +ARMRegisterInfo::ARMRegisterInfo() {} diff --git a/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/llvm/lib/Target/ARM/ARMTargetObjectFile.h index f86774beb397..47334b9a8a45 100644 --- a/llvm/lib/Target/ARM/ARMTargetObjectFile.h +++ b/llvm/lib/Target/ARM/ARMTargetObjectFile.h @@ -17,8 +17,7 @@ namespace llvm { class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF { public: - ARMElfTargetObjectFile() - : TargetLoweringObjectFileELF() { + ARMElfTargetObjectFile() { PLTRelativeVariantKind = MCSymbolRefExpr::VK_ARM_PREL31; } diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index c8cec88d6e11..c7734cc2cf11 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -921,7 +921,7 @@ class ARMOperand : public MCParsedAsmOperand { }; public: - ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + ARMOperand(KindTy K) : Kind(K) {} /// getStartLoc - Get the location of the first token of this operand. SMLoc getStartLoc() const override { return StartLoc; } diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index 4b18f5e20d40..1a36c2ca9152 100644 --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -21,7 +21,7 @@ using namespace llvm; Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI() {} + : ARMBaseInstrInfo(STI) {} /// Return the noop instruction to use for a noop. MCInst Thumb1InstrInfo::getNop() const { diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp index 4da6f6ab6994..5d2bc4ebe191 100644 --- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -37,7 +37,7 @@ extern cl::opt ReuseFrameIndexVals; using namespace llvm; -ThumbRegisterInfo::ThumbRegisterInfo() : ARMBaseRegisterInfo() {} +ThumbRegisterInfo::ThumbRegisterInfo() {} const TargetRegisterClass * ThumbRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, diff --git a/llvm/lib/Target/AVR/AVRSubtarget.cpp b/llvm/lib/Target/AVR/AVRSubtarget.cpp index 990e1c57e63f..820efe79bf8a 100644 --- a/llvm/lib/Target/AVR/AVRSubtarget.cpp +++ b/llvm/lib/Target/AVR/AVRSubtarget.cpp @@ -39,8 +39,6 @@ AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU, m_supportsRMW(false), m_supportsMultiplication(false), m_hasBREAK(false), m_hasTinyEncoding(false), m_hasMemMappedGPR(false), m_FeatureSetDummy(false), - - InstrInfo(), FrameLowering(), TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)), TSInfo() { // Parse features string. ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS); diff --git a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index 95c737aa272e..f19e7840eb31 100644 --- a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -107,13 +107,13 @@ class AVROperand : public MCParsedAsmOperand { public: AVROperand(StringRef Tok, SMLoc const &S) - : Base(), Kind(k_Token), Tok(Tok), Start(S), End(S) {} + : Kind(k_Token), Tok(Tok), Start(S), End(S) {} AVROperand(unsigned Reg, SMLoc const &S, SMLoc const &E) - : Base(), Kind(k_Register), RegImm({Reg, nullptr}), Start(S), End(E) {} + : Kind(k_Register), RegImm({Reg, nullptr}), Start(S), End(E) {} AVROperand(MCExpr const *Imm, SMLoc const &S, SMLoc const &E) - : Base(), Kind(k_Immediate), RegImm({0, Imm}), Start(S), End(E) {} + : Kind(k_Immediate), RegImm({0, Imm}), Start(S), End(E) {} AVROperand(unsigned Reg, MCExpr const *Imm, SMLoc const &S, SMLoc const &E) - : Base(), Kind(k_Memri), RegImm({Reg, Imm}), Start(S), End(E) {} + : Kind(k_Memri), RegImm({Reg, Imm}), Start(S), End(E) {} struct RegisterImmediate { unsigned Reg; diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp index 50298bf5e943..d55510a2455c 100644 --- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp +++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp @@ -101,10 +101,10 @@ struct BPFOperand : public MCParsedAsmOperand { ImmOp Imm; }; - BPFOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + BPFOperand(KindTy K) : Kind(K) {} public: - BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() { + BPFOperand(const BPFOperand &o) { Kind = o.Kind; StartLoc = o.StartLoc; EndLoc = o.EndLoc; diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp index 77e3cd393f87..e4d98b85e58b 100644 --- a/llvm/lib/Target/BPF/BPFSubtarget.cpp +++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp @@ -59,6 +59,6 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM) - : BPFGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), InstrInfo(), + : BPFGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), FrameLowering(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) {} diff --git a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index d131cf896834..58f5ea78c541 100644 --- a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -211,12 +211,10 @@ struct HexagonOperand : public MCParsedAsmOperand { struct ImmTy Imm; }; - HexagonOperand(KindTy K, MCContext &Context) - : MCParsedAsmOperand(), Kind(K), Context(Context) {} + HexagonOperand(KindTy K, MCContext &Context) : Kind(K), Context(Context) {} public: - HexagonOperand(const HexagonOperand &o) - : MCParsedAsmOperand(), Context(o.Context) { + HexagonOperand(const HexagonOperand &o) : Context(o.Context) { Kind = o.Kind; StartLoc = o.StartLoc; EndLoc = o.EndLoc; diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 2679e399852f..091542f2e76a 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -1652,7 +1652,7 @@ struct WeightedLeaf { int Weight; int InsertionOrder; - WeightedLeaf() : Value(SDValue()) { } + WeightedLeaf() {} WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) : Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) { diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index 5f094dfeb95c..a47d414af831 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -204,7 +204,7 @@ HexagonMCChecker::HexagonMCChecker(MCContext &Context, MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst &mcb, MCRegisterInfo const &ri, bool ReportErrors) : Context(Context), MCB(mcb), RI(ri), MCII(MCII), STI(STI), - ReportErrors(ReportErrors), ReversePairs() { + ReportErrors(ReportErrors) { init(); } @@ -212,8 +212,7 @@ HexagonMCChecker::HexagonMCChecker(HexagonMCChecker const &Other, MCSubtargetInfo const &STI, bool CopyReportErrors) : Context(Other.Context), MCB(Other.MCB), RI(Other.RI), MCII(Other.MCII), - STI(STI), ReportErrors(CopyReportErrors ? Other.ReportErrors : false), - ReversePairs() { + STI(STI), ReportErrors(CopyReportErrors ? Other.ReportErrors : false) { init(); } diff --git a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp index a994bd7e57a4..660215ca7435 100644 --- a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp +++ b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp @@ -141,7 +141,7 @@ struct LanaiOperand : public MCParsedAsmOperand { struct MemOp Mem; }; - explicit LanaiOperand(KindTy Kind) : MCParsedAsmOperand(), Kind(Kind) {} + explicit LanaiOperand(KindTy Kind) : Kind(Kind) {} public: // The functions below are used by the autogenerated ASM matcher and hence to diff --git a/llvm/lib/Target/Lanai/LanaiSubtarget.cpp b/llvm/lib/Target/Lanai/LanaiSubtarget.cpp index d9d7847a0c5a..37a4843e1bc4 100644 --- a/llvm/lib/Target/Lanai/LanaiSubtarget.cpp +++ b/llvm/lib/Target/Lanai/LanaiSubtarget.cpp @@ -43,4 +43,4 @@ LanaiSubtarget::LanaiSubtarget(const Triple &TargetTriple, StringRef Cpu, CodeGenOpt::Level /*OptLevel*/) : LanaiGenSubtargetInfo(TargetTriple, Cpu, /*TuneCPU*/ Cpu, FeatureString), FrameLowering(initializeSubtargetDependencies(Cpu, FeatureString)), - InstrInfo(), TLInfo(TM, *this), TSInfo() {} + TLInfo(TM, *this) {} diff --git a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp index c1677baf52a7..13cba8b079a9 100644 --- a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp +++ b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp @@ -114,13 +114,14 @@ class MSP430Operand : public MCParsedAsmOperand { public: MSP430Operand(StringRef Tok, SMLoc const &S) - : Base(), Kind(k_Tok), Tok(Tok), Start(S), End(S) {} + : Kind(k_Tok), Tok(Tok), Start(S), End(S) {} MSP430Operand(KindTy Kind, unsigned Reg, SMLoc const &S, SMLoc const &E) - : Base(), Kind(Kind), Reg(Reg), Start(S), End(E) {} + : Kind(Kind), Reg(Reg), Start(S), End(E) {} MSP430Operand(MCExpr const *Imm, SMLoc const &S, SMLoc const &E) - : Base(), Kind(k_Imm), Imm(Imm), Start(S), End(E) {} - MSP430Operand(unsigned Reg, MCExpr const *Expr, SMLoc const &S, SMLoc const &E) - : Base(), Kind(k_Mem), Mem({Reg, Expr}), Start(S), End(E) {} + : Kind(k_Imm), Imm(Imm), Start(S), End(E) {} + MSP430Operand(unsigned Reg, MCExpr const *Expr, SMLoc const &S, + SMLoc const &E) + : Kind(k_Mem), Mem({Reg, Expr}), Start(S), End(E) {} void addRegOperands(MCInst &Inst, unsigned N) const { assert((Kind == k_Reg || Kind == k_IndReg || Kind == k_PostIndReg) && diff --git a/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/llvm/lib/Target/MSP430/MSP430Subtarget.cpp index 2fd58717c4db..0604d47597e2 100644 --- a/llvm/lib/Target/MSP430/MSP430Subtarget.cpp +++ b/llvm/lib/Target/MSP430/MSP430Subtarget.cpp @@ -57,5 +57,5 @@ MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { MSP430Subtarget::MSP430Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM) - : MSP430GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), FrameLowering(), + : MSP430GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) {} diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 01b5dff2e448..736c41f8ac03 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -827,8 +827,7 @@ class MipsOperand : public MCParsedAsmOperand { } Kind; public: - MipsOperand(KindTy K, MipsAsmParser &Parser) - : MCParsedAsmOperand(), Kind(K), AsmParser(Parser) {} + MipsOperand(KindTy K, MipsAsmParser &Parser) : Kind(K), AsmParser(Parser) {} ~MipsOperand() override { switch (Kind) { diff --git a/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp b/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp index f6f43da9abf8..563118dfe627 100644 --- a/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -37,7 +37,7 @@ using namespace llvm; #define DEBUG_TYPE "mips16-registerinfo" -Mips16RegisterInfo::Mips16RegisterInfo() : MipsRegisterInfo() {} +Mips16RegisterInfo::Mips16RegisterInfo() {} bool Mips16RegisterInfo::requiresRegisterScavenging (const MachineFunction &MF) const { diff --git a/llvm/lib/Target/Mips/MipsInstructionSelector.cpp b/llvm/lib/Target/Mips/MipsInstructionSelector.cpp index 6d44ce2ab563..59f158688b16 100644 --- a/llvm/lib/Target/Mips/MipsInstructionSelector.cpp +++ b/llvm/lib/Target/Mips/MipsInstructionSelector.cpp @@ -80,8 +80,8 @@ class MipsInstructionSelector : public InstructionSelector { MipsInstructionSelector::MipsInstructionSelector( const MipsTargetMachine &TM, const MipsSubtarget &STI, const MipsRegisterBankInfo &RBI) - : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI), + : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), + RBI(RBI), #define GET_GLOBALISEL_PREDICATES_INIT #include "MipsGenGlobalISel.inc" diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp index 04b69c66bc0d..2cb59e696031 100644 --- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -73,8 +73,7 @@ RegisterBankInfo::ValueMapping ValueMappings[] = { using namespace llvm; -MipsRegisterBankInfo::MipsRegisterBankInfo(const TargetRegisterInfo &TRI) - : MipsGenRegisterBankInfo() {} +MipsRegisterBankInfo::MipsRegisterBankInfo(const TargetRegisterInfo &TRI) {} const RegisterBank & MipsRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, diff --git a/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp b/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp index b05e9ad827c4..d6481793ef49 100644 --- a/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -38,7 +38,7 @@ using namespace llvm; #define DEBUG_TYPE "mips-reg-info" -MipsSERegisterInfo::MipsSERegisterInfo() : MipsRegisterInfo() {} +MipsSERegisterInfo::MipsSERegisterInfo() {} bool MipsSERegisterInfo:: requiresRegisterScavenging(const MachineFunction &MF) const { diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 953d95e55f65..8df6f13aa68e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -27,7 +27,7 @@ using namespace llvm; // Pin the vtable to this file. void NVPTXInstrInfo::anchor() {} -NVPTXInstrInfo::NVPTXInstrInfo() : NVPTXGenInstrInfo(), RegInfo() {} +NVPTXInstrInfo::NVPTXInstrInfo() : RegInfo() {} void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp index 05c20369abf4..5a6440c91fca 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -49,8 +49,8 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const NVPTXTargetMachine &TM) : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0), - SmVersion(20), TM(TM), InstrInfo(), - TLInfo(TM, initializeSubtargetDependencies(CPU, FS)), FrameLowering() {} + SmVersion(20), TM(TM), + TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {} bool NVPTXSubtarget::hasImageHandles() const { // Enable handles for Kepler+, where CUDA supports indirect surfaces and diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h index 366d92a5a805..4645671a0cd8 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -17,7 +17,7 @@ namespace llvm { class NVPTXTargetObjectFile : public TargetLoweringObjectFile { public: - NVPTXTargetObjectFile() : TargetLoweringObjectFile() {} + NVPTXTargetObjectFile() {} ~NVPTXTargetObjectFile() override; diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index ded922329ebf..1f509afb723b 100644 --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -201,9 +201,10 @@ struct PPCOperand : public MCParsedAsmOperand { struct TLSRegOp TLSReg; }; - PPCOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + PPCOperand(KindTy K) : Kind(K) {} + public: - PPCOperand(const PPCOperand &o) : MCParsedAsmOperand() { + PPCOperand(const PPCOperand &o) { Kind = o.Kind; StartLoc = o.StartLoc; EndLoc = o.EndLoc; diff --git a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp index 7d64816ed6c7..0cd8350e3fdd 100644 --- a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp +++ b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp @@ -65,8 +65,7 @@ class PPCInstructionSelector : public InstructionSelector { PPCInstructionSelector::PPCInstructionSelector(const PPCTargetMachine &TM, const PPCSubtarget &STI, const PPCRegisterBankInfo &RBI) - : InstructionSelector(), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI), + : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), #define GET_GLOBALISEL_PREDICATES_INIT #include "PPCGenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATES_INIT diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp index 6af79324919c..58165fcaac03 100644 --- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp +++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp @@ -23,5 +23,4 @@ using namespace llvm; -PPCRegisterBankInfo::PPCRegisterBankInfo(const TargetRegisterInfo &TRI) - : PPCGenRegisterBankInfo() {} +PPCRegisterBankInfo::PPCRegisterBankInfo(const TargetRegisterInfo &TRI) {} diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 75592dd4c6f5..858e78076b56 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -302,10 +302,10 @@ struct RISCVOperand : public MCParsedAsmOperand { struct VTypeOp VType; }; - RISCVOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + RISCVOperand(KindTy K) : Kind(K) {} public: - RISCVOperand(const RISCVOperand &o) : MCParsedAsmOperand() { + RISCVOperand(const RISCVOperand &o) { Kind = o.Kind; IsRV64 = o.IsRV64; StartLoc = o.StartLoc; diff --git a/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp index 4d1f47da209d..8dfd71ac0b6b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp @@ -69,8 +69,7 @@ class RISCVInstructionSelector : public InstructionSelector { RISCVInstructionSelector::RISCVInstructionSelector( const RISCVTargetMachine &TM, const RISCVSubtarget &STI, const RISCVRegisterBankInfo &RBI) - : InstructionSelector(), STI(STI), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI), + : STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), #define GET_GLOBALISEL_PREDICATES_INIT #include "RISCVGenGlobalISel.inc" diff --git a/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp index bd3b95a98b9f..4ff3a44f3511 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp @@ -22,5 +22,4 @@ using namespace llvm; -RISCVRegisterBankInfo::RISCVRegisterBankInfo(const TargetRegisterInfo &TRI) - : RISCVGenRegisterBankInfo() {} +RISCVRegisterBankInfo::RISCVRegisterBankInfo(const TargetRegisterInfo &TRI) {} diff --git a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index 48e6903bd1b1..af3304f0907d 100644 --- a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -257,7 +257,7 @@ class SparcOperand : public MCParsedAsmOperand { }; public: - SparcOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + SparcOperand(KindTy K) : Kind(K) {} bool isToken() const override { return Kind == k_Token; } bool isReg() const override { return Kind == k_Register; } diff --git a/llvm/lib/Target/Sparc/SparcTargetObjectFile.h b/llvm/lib/Target/Sparc/SparcTargetObjectFile.h index 9bbe602b32b3..f30ddc7b4955 100644 --- a/llvm/lib/Target/Sparc/SparcTargetObjectFile.h +++ b/llvm/lib/Target/Sparc/SparcTargetObjectFile.h @@ -18,9 +18,7 @@ class TargetMachine; class SparcELFTargetObjectFile : public TargetLoweringObjectFileELF { public: - SparcELFTargetObjectFile() : - TargetLoweringObjectFileELF() - {} + SparcELFTargetObjectFile() {} void Initialize(MCContext &Ctx, const TargetMachine &TM) override; diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 39a82e2c07e0..cf55318d328d 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -62,8 +62,7 @@ struct SystemZAddressingMode { bool IncludesDynAlloc; SystemZAddressingMode(AddrForm form, DispRange dr) - : Form(form), DR(dr), Base(), Disp(0), Index(), - IncludesDynAlloc(false) {} + : Form(form), DR(dr), Disp(0), IncludesDynAlloc(false) {} // True if the address can have an index register. bool hasIndexField() { return Form != FormBD; } diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp index 0f03d96655bf..75c0d454d904 100644 --- a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -89,7 +89,7 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU, HasSoftFloat(false), TargetTriple(TT), SpecialRegisters(initializeSpecialRegisters()), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), - TSInfo(), FrameLowering(SystemZFrameLowering::create(*this)) {} + FrameLowering(SystemZFrameLowering::create(*this)) {} bool SystemZSubtarget::enableSubRegLiveness() const { return UseSubRegLiveness; diff --git a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp index fd9dc32b04f5..4a318e493c52 100644 --- a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp +++ b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp @@ -210,7 +210,7 @@ class VEOperand : public MCParsedAsmOperand { }; public: - VEOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + VEOperand(KindTy K) : Kind(K) {} bool isToken() const override { return Kind == k_Token; } bool isReg() const override { return Kind == k_Register; } diff --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.h b/llvm/lib/Target/VE/VEMachineFunctionInfo.h index 16b25fed3f11..3160f6a552d7 100644 --- a/llvm/lib/Target/VE/VEMachineFunctionInfo.h +++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.h @@ -29,10 +29,9 @@ class VEMachineFunctionInfo : public MachineFunctionInfo { bool IsLeafProc; public: - VEMachineFunctionInfo() - : GlobalBaseReg(), VarArgsFrameOffset(0), IsLeafProc(false) {} + VEMachineFunctionInfo() : VarArgsFrameOffset(0), IsLeafProc(false) {} explicit VEMachineFunctionInfo(MachineFunction &MF) - : GlobalBaseReg(), VarArgsFrameOffset(0), IsLeafProc(false) {} + : VarArgsFrameOffset(0), IsLeafProc(false) {} Register getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(Register Reg) { GlobalBaseReg = Reg; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp index add3c799f4aa..103b634ecf5b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -42,9 +42,8 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT, const std::string &FS, const TargetMachine &TM) : WebAssemblyGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), - TargetTriple(TT), FrameLowering(), - InstrInfo(initializeSubtargetDependencies(CPU, FS)), TSInfo(), - TLInfo(TM, *this) {} + TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), + TSInfo(), TLInfo(TM, *this) {} bool WebAssemblySubtarget::enableAtomicExpand() const { // If atomics are disabled, atomic ops are lowered instead of expanded diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 7ed05fd0331d..5b90c67deae6 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -80,9 +80,9 @@ namespace { bool NegateIndex = false; X86ISelAddressMode() - : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0), - Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr), - MCSym(nullptr), JT(-1), SymbolFlags(X86II::MO_NO_FLAG) {} + : BaseType(RegBase), Base_FrameIndex(0), Scale(1), Disp(0), GV(nullptr), + CP(nullptr), BlockAddr(nullptr), ES(nullptr), MCSym(nullptr), JT(-1), + SymbolFlags(X86II::MO_NO_FLAG) {} bool hasSymbolicDisplacement() const { return GV != nullptr || CP != nullptr || ES != nullptr || diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp index 8abbaa92c8cf..28d57ca9ae3c 100644 --- a/llvm/lib/Target/X86/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp @@ -153,8 +153,8 @@ class X86InstructionSelector : public InstructionSelector { X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI, const X86RegisterBankInfo &RBI) - : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI), + : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), + RBI(RBI), #define GET_GLOBALISEL_PREDICATES_INIT #include "X86GenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATES_INIT diff --git a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp index 9c076d2d6769..497a8f6e065f 100644 --- a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp @@ -25,8 +25,7 @@ using namespace llvm; #define GET_TARGET_REGBANK_INFO_IMPL #include "X86GenRegisterBankInfo.def" -X86RegisterBankInfo::X86RegisterBankInfo(const TargetRegisterInfo &TRI) - : X86GenRegisterBankInfo() { +X86RegisterBankInfo::X86RegisterBankInfo(const TargetRegisterInfo &TRI) { // validate RegBank initialization. const RegisterBank &RBGPR = getRegBank(X86::GPRRegBankID); diff --git a/llvm/lib/Target/XCore/XCoreSubtarget.cpp b/llvm/lib/Target/XCore/XCoreSubtarget.cpp index 1be707cb488c..051d51178baa 100644 --- a/llvm/lib/Target/XCore/XCoreSubtarget.cpp +++ b/llvm/lib/Target/XCore/XCoreSubtarget.cpp @@ -26,5 +26,5 @@ void XCoreSubtarget::anchor() { } XCoreSubtarget::XCoreSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM) - : XCoreGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), InstrInfo(), - FrameLowering(*this), TLInfo(TM, *this), TSInfo() {} + : XCoreGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), FrameLowering(*this), + TLInfo(TM, *this), TSInfo() {} diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index 4e3689f09536..0fef01a47b04 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -1073,8 +1073,7 @@ ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params, bool MandatoryFirst, InliningAdvisorMode Mode, unsigned MaxDevirtIterations) - : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations), - PM(), MPM() { + : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations) { // Run the inliner first. The theory is that we are walking bottom-up and so // the callees have already been fully optimized, and we want to inline them // into the callers so that our optimizations can reflect that. diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index 2d717475ce7f..fe9586ce75a6 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -169,8 +169,7 @@ struct FunctionOutliningInfo { }; struct FunctionOutliningMultiRegionInfo { - FunctionOutliningMultiRegionInfo() - : ORI() {} + FunctionOutliningMultiRegionInfo() {} // Container for outline regions struct OutlineRegionInfo { diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 73f208abcb07..e9c4a56a90c2 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -248,8 +248,7 @@ class PGOCounterPromoter { PGOCounterPromoter( DenseMap> &LoopToCands, Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI) - : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop), - LI(LI), BFI(BFI) { + : LoopToCandidates(LoopToCands), L(CurLoop), LI(LI), BFI(BFI) { // Skip collection of ExitBlocks and InsertPts for loops that will not be // able to have counters promoted. diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index 4e4097e13271..accaa1088d6f 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -220,9 +220,7 @@ class LowerMatrixIntrinsics { bool IsColumnMajor = true; public: - MatrixTy() - : Vectors(), - IsColumnMajor(MatrixLayout == MatrixLayoutTy::ColumnMajor) {} + MatrixTy() : IsColumnMajor(MatrixLayout == MatrixLayoutTy::ColumnMajor) {} MatrixTy(ArrayRef Vectors) : Vectors(Vectors.begin(), Vectors.end()), IsColumnMajor(MatrixLayout == MatrixLayoutTy::ColumnMajor) {} @@ -1832,7 +1830,7 @@ class LowerMatrixIntrinsics { const DenseMap> &Shared, const SmallSetVector &ExprsInSubprogram, Value *Leaf) - : Str(), Stream(Str), DL(DL), Inst2Matrix(Inst2Matrix), Shared(Shared), + : Stream(Str), DL(DL), Inst2Matrix(Inst2Matrix), Shared(Shared), ExprsInSubprogram(ExprsInSubprogram), Leaf(Leaf) {} void indent(unsigned N) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index a8102c0b07b8..503cb1123e4e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -198,8 +198,8 @@ struct VPTransformState { VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI, DominatorTree *DT, IRBuilder<> &Builder, InnerLoopVectorizer *ILV, VPlan *Plan) - : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder), ILV(ILV), - Plan(Plan) {} + : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan) { + } /// The chosen Vectorization and Unroll Factors of the loop being vectorized. ElementCount VF; diff --git a/llvm/tools/dsymutil/BinaryHolder.h b/llvm/tools/dsymutil/BinaryHolder.h index 5e81fe4b93b1..6245e4924733 100644 --- a/llvm/tools/dsymutil/BinaryHolder.h +++ b/llvm/tools/dsymutil/BinaryHolder.h @@ -103,7 +103,7 @@ class BinaryHolder { std::string Filename; TimestampTy Timestamp; - KeyTy() : Filename(), Timestamp() {} + KeyTy() {} KeyTy(StringRef Filename, TimestampTy Timestamp) : Filename(Filename.str()), Timestamp(Timestamp) {} }; diff --git a/llvm/tools/dsymutil/Reproducer.cpp b/llvm/tools/dsymutil/Reproducer.cpp index 5c60758c6f80..4f2e0db297e5 100644 --- a/llvm/tools/dsymutil/Reproducer.cpp +++ b/llvm/tools/dsymutil/Reproducer.cpp @@ -27,7 +27,7 @@ Reproducer::Reproducer() : VFS(vfs::getRealFileSystem()) {} Reproducer::~Reproducer() = default; ReproducerGenerate::ReproducerGenerate(std::error_code &EC) - : Root(createReproducerDir(EC)), FC() { + : Root(createReproducerDir(EC)) { if (!Root.empty()) FC = std::make_shared(Root, Root); VFS = FileCollector::createCollectorVFS(vfs::getRealFileSystem(), FC); diff --git a/llvm/tools/llvm-cov/CoverageSummaryInfo.h b/llvm/tools/llvm-cov/CoverageSummaryInfo.h index 62e7cad1012b..84a3228f22b9 100644 --- a/llvm/tools/llvm-cov/CoverageSummaryInfo.h +++ b/llvm/tools/llvm-cov/CoverageSummaryInfo.h @@ -191,8 +191,7 @@ struct FunctionCoverageSummary { BranchCoverageInfo BranchCoverage; FunctionCoverageSummary(const std::string &Name) - : Name(Name), ExecutionCount(0), RegionCoverage(), LineCoverage(), - BranchCoverage() {} + : Name(Name), ExecutionCount(0) {} FunctionCoverageSummary(const std::string &Name, uint64_t ExecutionCount, const RegionCoverageInfo &RegionCoverage, @@ -223,9 +222,7 @@ struct FileCoverageSummary { FunctionCoverageInfo FunctionCoverage; FunctionCoverageInfo InstantiationCoverage; - FileCoverageSummary(StringRef Name) - : Name(Name), RegionCoverage(), LineCoverage(), FunctionCoverage(), - InstantiationCoverage() {} + FileCoverageSummary(StringRef Name) : Name(Name) {} FileCoverageSummary &operator+=(const FileCoverageSummary &RHS) { RegionCoverage += RHS.RegionCoverage; diff --git a/llvm/tools/llvm-mca/CodeRegion.h b/llvm/tools/llvm-mca/CodeRegion.h index 0b2590767dfa..0e1e02a533d8 100644 --- a/llvm/tools/llvm-mca/CodeRegion.h +++ b/llvm/tools/llvm-mca/CodeRegion.h @@ -63,7 +63,7 @@ class CodeRegion { public: CodeRegion(llvm::StringRef Desc, llvm::SMLoc Start) - : Description(Desc), RangeStart(Start), RangeEnd() {} + : Description(Desc), RangeStart(Start) {} void addInstruction(const llvm::MCInst &Instruction) { Instructions.emplace_back(Instruction); diff --git a/llvm/tools/llvm-mca/PipelinePrinter.h b/llvm/tools/llvm-mca/PipelinePrinter.h index fd262f0a8a5d..d89e913f979f 100644 --- a/llvm/tools/llvm-mca/PipelinePrinter.h +++ b/llvm/tools/llvm-mca/PipelinePrinter.h @@ -53,7 +53,7 @@ class PipelinePrinter { public: PipelinePrinter(Pipeline &Pipe, const CodeRegion &R, unsigned Idx, const MCSubtargetInfo &STI, const PipelineOptions &PO) - : P(Pipe), Region(R), RegionIdx(Idx), STI(STI), PO(PO), Views() {} + : P(Pipe), Region(R), RegionIdx(Idx), STI(STI), PO(PO) {} void addView(std::unique_ptr V) { P.addEventListener(V.get()); diff --git a/llvm/tools/llvm-objcopy/ELF/Object.h b/llvm/tools/llvm-objcopy/ELF/Object.h index 439380fc725b..681ab8f56381 100644 --- a/llvm/tools/llvm-objcopy/ELF/Object.h +++ b/llvm/tools/llvm-objcopy/ELF/Object.h @@ -934,8 +934,7 @@ class BinaryELFBuilder : public BasicELFBuilder { public: BinaryELFBuilder(MemoryBuffer *MB, uint8_t NewSymbolVisibility) - : BasicELFBuilder(), MemBuf(MB), - NewSymbolVisibility(NewSymbolVisibility) {} + : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {} Expected> build(); }; @@ -946,8 +945,7 @@ class IHexELFBuilder : public BasicELFBuilder { void addDataSections(); public: - IHexELFBuilder(const std::vector &Records) - : BasicELFBuilder(), Records(Records) {} + IHexELFBuilder(const std::vector &Records) : Records(Records) {} Expected> build(); }; diff --git a/llvm/tools/llvm-objdump/SourcePrinter.h b/llvm/tools/llvm-objdump/SourcePrinter.h index 21d5bdcf8a49..31d46e3108f6 100644 --- a/llvm/tools/llvm-objdump/SourcePrinter.h +++ b/llvm/tools/llvm-objdump/SourcePrinter.h @@ -80,7 +80,7 @@ class LiveVariablePrinter { public: LiveVariablePrinter(const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) - : LiveVariables(), ActiveCols(Column()), MRI(MRI), STI(STI) {} + : ActiveCols(Column()), MRI(MRI), STI(STI) {} void dump() const; diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 6c12750a9ddf..0d7eabd6d158 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -204,8 +204,8 @@ struct WriterContext { WriterContext(bool IsSparse, std::mutex &ErrLock, SmallSet &WriterErrorCodes) - : Lock(), Writer(IsSparse), Errors(), ErrLock(ErrLock), - WriterErrorCodes(WriterErrorCodes) {} + : Writer(IsSparse), ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) { + } }; /// Computer the overlap b/w profile BaseFilename and TestFileName, @@ -2303,8 +2303,7 @@ struct HotFuncInfo { uint64_t EntryCount; HotFuncInfo() - : FuncName(), TotalCount(0), TotalCountPercent(0.0f), MaxCount(0), - EntryCount(0) {} + : TotalCount(0), TotalCountPercent(0.0f), MaxCount(0), EntryCount(0) {} HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES) : FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP), diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp index 46862bbad7cb..eea486abe0a1 100644 --- a/llvm/tools/llvm-readobj/llvm-readobj.cpp +++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp @@ -286,8 +286,8 @@ static void parseOptions(const opt::InputArgList &Args) { namespace { struct ReadObjTypeTableBuilder { ReadObjTypeTableBuilder() - : Allocator(), IDTable(Allocator), TypeTable(Allocator), - GlobalIDTable(Allocator), GlobalTypeTable(Allocator) {} + : IDTable(Allocator), TypeTable(Allocator), GlobalIDTable(Allocator), + GlobalTypeTable(Allocator) {} llvm::BumpPtrAllocator Allocator; llvm::codeview::MergingTypeTableBuilder IDTable; diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDag.h b/llvm/utils/TableGen/GlobalISel/GIMatchDag.h index 567580540877..37570648cad1 100644 --- a/llvm/utils/TableGen/GlobalISel/GIMatchDag.h +++ b/llvm/utils/TableGen/GlobalISel/GIMatchDag.h @@ -84,9 +84,7 @@ class GIMatchDag { bool HasPostMatchPredicate = false; public: - GIMatchDag(GIMatchDagContext &Ctx) - : Ctx(Ctx), InstrNodes(), PredicateNodes(), Edges(), - PredicateDependencies() {} + GIMatchDag(GIMatchDagContext &Ctx) : Ctx(Ctx) {} GIMatchDag(const GIMatchDag &) = delete; GIMatchDagContext &getContext() const { return Ctx; } diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp index d08a83333c30..00d57404b069 100644 --- a/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp +++ b/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp @@ -82,7 +82,6 @@ GIMatchTreeBuilderLeafInfo::GIMatchTreeBuilderLeafInfo( GIMatchTreeBuilder &Builder, StringRef Name, unsigned RootIdx, const GIMatchDag &MatchDag, void *Data) : Builder(Builder), Info(Name, RootIdx, Data), MatchDag(MatchDag), - InstrNodeToInfo(), RemainingInstrNodes(BitVector(MatchDag.getNumInstrNodes(), true)), RemainingEdges(BitVector(MatchDag.getNumEdges(), true)), RemainingPredicates(BitVector(MatchDag.getNumPredicates(), true)), diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index 7b1bd41a951b..25bc0adc2a81 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -883,9 +883,7 @@ class RuleMatcher : public Matcher { public: RuleMatcher(ArrayRef SrcLoc) - : Matchers(), Actions(), InsnVariableIDs(), MutatableInsns(), - DefinedOperands(), NextInsnVarID(0), NextOutputInsnID(0), - NextTempRegID(0), SrcLoc(SrcLoc), ComplexSubOperands(), + : NextInsnVarID(0), NextOutputInsnID(0), NextTempRegID(0), SrcLoc(SrcLoc), RuleID(NextRuleID++) {} RuleMatcher(RuleMatcher &&Other) = default; RuleMatcher &operator=(RuleMatcher &&Other) = default; diff --git a/llvm/utils/TableGen/PredicateExpander.h b/llvm/utils/TableGen/PredicateExpander.h index 29cca92d902c..9e7a4a3925ac 100644 --- a/llvm/utils/TableGen/PredicateExpander.h +++ b/llvm/utils/TableGen/PredicateExpander.h @@ -111,7 +111,7 @@ class STIPredicateExpander : public PredicateExpander { public: STIPredicateExpander(StringRef Target) - : PredicateExpander(Target), ClassPrefix(), ExpandDefinition(false) {} + : PredicateExpander(Target), ExpandDefinition(false) {} bool shouldExpandDefinition() const { return ExpandDefinition; } StringRef getClassPrefix() const { return ClassPrefix; } diff --git a/llvm/utils/TableGen/RegisterBankEmitter.cpp b/llvm/utils/TableGen/RegisterBankEmitter.cpp index 0725657150f8..61f71309b6fb 100644 --- a/llvm/utils/TableGen/RegisterBankEmitter.cpp +++ b/llvm/utils/TableGen/RegisterBankEmitter.cpp @@ -42,7 +42,7 @@ class RegisterBank { public: RegisterBank(const Record &TheDef) - : TheDef(TheDef), RCs(), RCWithLargestRegsSize(nullptr) {} + : TheDef(TheDef), RCWithLargestRegsSize(nullptr) {} /// Get the human-readable name for the bank. StringRef getName() const { return TheDef.getValueAsString("Name"); } From ab6502ea67378d972db8179a48936b102c442105 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 22 Dec 2021 00:18:47 +0000 Subject: [PATCH 321/992] Enable a few clang-tidy checks in MLIR The dry-run of clang-tidy on the codebase with these enable were well receive, and the codebase is "clean" (or almost) with respect to these right now. --- mlir/.clang-tidy | 50 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/mlir/.clang-tidy b/mlir/.clang-tidy index acd9361b526d..88da7aba0a11 100644 --- a/mlir/.clang-tidy +++ b/mlir/.clang-tidy @@ -1,4 +1,54 @@ InheritParentConfig: true +Checks: > + bugprone-argument-comment, + bugprone-assert-side-effect, + bugprone-branch-clone, + bugprone-copy-constructor-init, + bugprone-dangling-handle, + bugprone-dynamic-static-initializers, + bugprone-macro-parentheses, + bugprone-macro-repeated-side-effects, + bugprone-misplaced-widening-cast, + bugprone-move-forwarding-reference, + bugprone-multiple-statement-macro, + bugprone-suspicious-semicolon, + bugprone-swapped-arguments, + bugprone-terminating-continue, + bugprone-unused-raii, + bugprone-unused-return-value, + misc-redundant-expression, + misc-static-assert, + misc-unused-using-decls, + modernize-use-bool-literals, + modernize-loop-convert, + modernize-make-unique, + modernize-raw-string-literal, + modernize-use-equals-default, + modernize-use-default-member-init, + modernize-use-emplace, + modernize-use-nullptr, + modernize-use-override, + modernize-use-using, + performance-for-range-copy, + performance-implicit-conversion-in-loop, + performance-inefficient-algorithm, + performance-inefficient-vector-operation, + performance-move-const-arg, + performance-no-automatic-move, + performance-trivially-destructible, + performance-unnecessary-copy-initialization, + performance-unnecessary-value-param, + readability-avoid-const-params-in-decls, + readability-const-return-type, + readability-container-size-empty, + readability-inconsistent-declaration-parameter-name, + readability-misleading-indentation, + readability-redundant-control-flow, + readability-simplify-boolean-expr, + readability-simplify-subscript-expr, + readability-use-anyofallof + + CheckOptions: - key: readability-identifier-naming.MemberCase value: camelBack From ced8690d84791083f08ba3ba64862ac43ccb6590 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 29 Dec 2021 05:12:02 +0000 Subject: [PATCH 322/992] Apply clang-tidy fixes for bugprone-argument-comment to MLIR (NFC) Differential Revision: https://reviews.llvm.org/D116244 --- .../Dialect/Linalg/Transforms/Vectorization.cpp | 4 ++-- .../Math/Transforms/PolynomialApproximation.cpp | 2 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 4 ++-- mlir/lib/Transforms/LoopFusion.cpp | 2 +- mlir/lib/Transforms/NormalizeMemRefs.cpp | 16 ++++++++-------- mlir/lib/Transforms/PipelineDataTransfer.cpp | 2 +- mlir/lib/Transforms/Utils/LoopUtils.cpp | 6 +++--- mlir/test/lib/Transforms/TestLoopFusion.cpp | 2 +- 8 files changed, 19 insertions(+), 19 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 939400ed5027..ab22ee0e0de5 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -80,7 +80,7 @@ static OpType getSingleOpOfType(Block &block) { /// map is reindexed to `affine_map<(d0, d1, d2) -> (d2, d0, d1)>`, the second /// affine map is reindexed to `affine_map<(d0, d1) -> (d0, d1)>`. static AffineMap reindexIndexingMap(AffineMap map) { - assert(map.isProjectedPermutation(/*allowZerosInResults=*/true) && + assert(map.isProjectedPermutation(/*allowZeroInResults=*/true) && "expected projected permutation"); auto res = compressUnusedDims(map); assert(res.getNumDims() == res.getNumResults() && @@ -576,7 +576,7 @@ vectorizeAsLinalgGeneric(OpBuilder &b, LinalgOp linalgOp, // TODO: drop reliance on a specific pattern. static bool allIndexingsAreProjectedPermutation(LinalgOp op) { return llvm::all_of(op.getIndexingMaps(), [](AffineMap m) { - return m.isProjectedPermutation(/*allowZerosInResults=*/true); + return m.isProjectedPermutation(/*allowZeroInResults=*/true); }); } diff --git a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp index a42dfe79b39c..dc92436b97be 100644 --- a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp +++ b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp @@ -421,7 +421,7 @@ LogApproximationBase::logMatchAndRewrite(Op op, PatternRewriter &rewriter, x = max(builder, x, cstMinNormPos); // Extract significant in the range [0.5,1) and exponent. - std::pair pair = frexp(builder, x, /*is_positive=*/true); + std::pair pair = frexp(builder, x, /*isPositive=*/true); x = pair.first; Value e = pair.second; diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 90909582b2e5..e1d2dcadff8a 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1185,8 +1185,8 @@ void WsLoopOp::build(OpBuilder &builder, OperationState &state, ValueRange lowerBound, ValueRange upperBound, ValueRange step, ArrayRef attributes) { build(builder, state, TypeRange(), lowerBound, upperBound, step, - /*private_vars=*/ValueRange(), - /*firstprivate_vars=*/ValueRange(), /*lastprivate_vars=*/ValueRange(), + /*privateVars=*/ValueRange(), + /*firstprivateVars=*/ValueRange(), /*lastprivate_vars=*/ValueRange(), /*linear_vars=*/ValueRange(), /*linear_step_vars=*/ValueRange(), /*reduction_vars=*/ValueRange(), /*schedule_val=*/nullptr, /*schedule_chunk_var=*/nullptr, /*collapse_val=*/nullptr, diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp index e6db0cf6927b..56bc8e3dca6c 100644 --- a/mlir/lib/Transforms/LoopFusion.cpp +++ b/mlir/lib/Transforms/LoopFusion.cpp @@ -982,7 +982,7 @@ static Value createPrivateMemRef(AffineForOp forOp, Operation *srcStoreOpInst, replaceAllMemRefUsesWith(oldMemRef, newMemRef, {}, indexRemap, /*extraOperands=*/outerIVs, /*symbolOperands=*/{}, - /*domInstFilter=*/&*forOp.getBody()->begin()); + /*domOpFilter=*/&*forOp.getBody()->begin()); assert(succeeded(res) && "replaceAllMemrefUsesWith should always succeed here"); (void)res; diff --git a/mlir/lib/Transforms/NormalizeMemRefs.cpp b/mlir/lib/Transforms/NormalizeMemRefs.cpp index c33d1b6175e9..d830b607904e 100644 --- a/mlir/lib/Transforms/NormalizeMemRefs.cpp +++ b/mlir/lib/Transforms/NormalizeMemRefs.cpp @@ -275,9 +275,9 @@ void NormalizeMemRefs::updateFunctionSignature(FuncOp funcOp, /*indexRemap=*/layoutMap, /*extraOperands=*/{}, /*symbolOperands=*/{}, - /*domInstFilter=*/nullptr, - /*postDomInstFilter=*/nullptr, - /*allowDereferencingOps=*/true, + /*domOpFilter=*/nullptr, + /*postDomOpFilter=*/nullptr, + /*allowNonDereferencingOps=*/true, /*replaceInDeallocOp=*/true))) { // If it failed (due to escapes for example), bail out. // It should never hit this part of the code because it is called by @@ -370,8 +370,8 @@ void NormalizeMemRefs::normalizeFuncOpMemRefs(FuncOp funcOp, /*indexRemap=*/layoutMap, /*extraOperands=*/{}, /*symbolOperands=*/{}, - /*domInstFilter=*/nullptr, - /*postDomInstFilter=*/nullptr, + /*domOpFilter=*/nullptr, + /*postDomOpFilter=*/nullptr, /*allowNonDereferencingOps=*/true, /*replaceInDeallocOp=*/true))) { // If it failed (due to escapes for example), bail out. Removing the @@ -419,9 +419,9 @@ void NormalizeMemRefs::normalizeFuncOpMemRefs(FuncOp funcOp, /*indexRemap=*/layoutMap, /*extraOperands=*/{}, /*symbolOperands=*/{}, - /*domInstFilter=*/nullptr, - /*postDomInstFilter=*/nullptr, - /*allowDereferencingOps=*/true, + /*domOpFilter=*/nullptr, + /*postDomOpFilter=*/nullptr, + /*allowNonDereferencingOps=*/true, /*replaceInDeallocOp=*/true))) { newOp->erase(); replacingMemRefUsesFailed = true; diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Transforms/PipelineDataTransfer.cpp index 6ac860592dda..e32c54264c65 100644 --- a/mlir/lib/Transforms/PipelineDataTransfer.cpp +++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp @@ -110,7 +110,7 @@ static bool doubleBuffer(Value oldMemRef, AffineForOp forOp) { /*indexRemap=*/AffineMap(), /*extraOperands=*/{}, /*symbolOperands=*/{}, - /*domInstFilter=*/&*forOp.getBody()->begin()))) { + /*domOpFilter=*/&*forOp.getBody()->begin()))) { LLVM_DEBUG( forOp.emitError("memref replacement for double buffering failed")); ivModTwoOp.erase(); diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp index 91d4a7cd5d19..9a81f1d5d076 100644 --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -2882,8 +2882,8 @@ static LogicalResult generateCopy( /*extraIndices=*/{}, indexRemap, /*extraOperands=*/regionSymbols, /*symbolOperands=*/{}, - /*domInstFilter=*/&*begin, - /*postDomInstFilter=*/&*postDomFilter); + /*domOpFilter=*/&*begin, + /*postDomOpFilter=*/&*postDomFilter); *nBegin = isBeginAtStartOfBlock ? block->begin() : std::next(prevOfBegin); @@ -3258,7 +3258,7 @@ static AffineIfOp createSeparationCondition(MutableArrayRef loops, 1); unsigned fullTileLbPos, fullTileUbPos; if (!cst.getConstantBoundOnDimSize(0, /*lb=*/nullptr, - /*lbFloorDivisor=*/nullptr, + /*boundFloorDivisor=*/nullptr, /*ub=*/nullptr, &fullTileLbPos, &fullTileUbPos)) { LLVM_DEBUG(llvm::dbgs() << "Can't get constant diff pair for a loop\n"); diff --git a/mlir/test/lib/Transforms/TestLoopFusion.cpp b/mlir/test/lib/Transforms/TestLoopFusion.cpp index 30de8ebe05e1..7dc40e0d503e 100644 --- a/mlir/test/lib/Transforms/TestLoopFusion.cpp +++ b/mlir/test/lib/Transforms/TestLoopFusion.cpp @@ -186,7 +186,7 @@ void TestLoopFusion::runOnFunction() { // Try to fuse all combinations of src/dst loop nests in 'depthToLoops'. } while (iterateLoops(depthToLoops, testLoopFusionTransformation, - /*return_on_change=*/true)); + /*returnOnChange=*/true)); return; } From 3bab9d4eb0913e07f453361b2104f85dc0c2fc3d Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sat, 1 Jan 2022 21:31:24 +0000 Subject: [PATCH 323/992] Apply clang-tidy fixes for bugprone-copy-constructor-init to MLIR (NFC) Reviewed By: rriddle, Mogball Differential Revision: https://reviews.llvm.org/D116245 --- .../Transforms/ComprehensiveBufferizePass.cpp | 3 ++- .../Transforms/SparseTensorPasses.cpp | 2 +- .../lib/Dialect/Affine/TestAffineDataCopy.cpp | 2 +- .../Affine/TestAffineLoopUnswitching.cpp | 3 ++- .../Dialect/Affine/TestLoopPermutation.cpp | 2 +- .../Linalg/TestComprehensiveBufferize.cpp | 3 ++- .../Linalg/TestLinalgCodegenStrategy.cpp | 3 ++- .../Dialect/Linalg/TestLinalgDistribution.cpp | 3 ++- .../Linalg/TestLinalgFusionTransforms.cpp | 3 ++- .../lib/Dialect/Linalg/TestLinalgHoisting.cpp | 2 +- .../Dialect/Linalg/TestLinalgTransforms.cpp | 2 +- .../Math/TestPolynomialApproximation.cpp | 3 ++- .../Dialect/Vector/TestVectorTransforms.cpp | 27 ++++++++++++------- 13 files changed, 37 insertions(+), 21 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp index 61a9f1b0cdb3..6425c058f0b8 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp @@ -36,7 +36,8 @@ struct LinalgComprehensiveModuleBufferize LinalgComprehensiveModuleBufferize() = default; LinalgComprehensiveModuleBufferize( - const LinalgComprehensiveModuleBufferize &p) {} + const LinalgComprehensiveModuleBufferize &p) + : LinalgComprehensiveModuleBufferizeBase(p) {} void runOnOperation() override; diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp index 8404aa621338..61d90c031f3d 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp @@ -34,7 +34,7 @@ struct SparsificationPass : public SparsificationBase { SparsificationPass() = default; SparsificationPass(const SparsificationPass &pass) - : SparsificationBase() {} + : SparsificationBase(pass) {} /// Returns parallelization strategy given on command line. SparseParallelizationStrategy parallelOption() { diff --git a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp index df74cf71ed53..e28b0c38f0ff 100644 --- a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp +++ b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp @@ -34,7 +34,7 @@ struct TestAffineDataCopy return "Tests affine data copy utility functions."; } TestAffineDataCopy() = default; - TestAffineDataCopy(const TestAffineDataCopy &pass){}; + TestAffineDataCopy(const TestAffineDataCopy &pass) : PassWrapper(pass){}; void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); diff --git a/mlir/test/lib/Dialect/Affine/TestAffineLoopUnswitching.cpp b/mlir/test/lib/Dialect/Affine/TestAffineLoopUnswitching.cpp index 542e109ebbb8..533b51eade97 100644 --- a/mlir/test/lib/Dialect/Affine/TestAffineLoopUnswitching.cpp +++ b/mlir/test/lib/Dialect/Affine/TestAffineLoopUnswitching.cpp @@ -30,7 +30,8 @@ struct TestAffineLoopUnswitching return "Tests affine loop unswitching / if/else hoisting"; } TestAffineLoopUnswitching() = default; - TestAffineLoopUnswitching(const TestAffineLoopUnswitching &pass) {} + TestAffineLoopUnswitching(const TestAffineLoopUnswitching &pass) + : PassWrapper(pass) {} void runOnFunction() override; diff --git a/mlir/test/lib/Dialect/Affine/TestLoopPermutation.cpp b/mlir/test/lib/Dialect/Affine/TestLoopPermutation.cpp index 8a88d15a4255..1a997dc60e9a 100644 --- a/mlir/test/lib/Dialect/Affine/TestLoopPermutation.cpp +++ b/mlir/test/lib/Dialect/Affine/TestLoopPermutation.cpp @@ -32,7 +32,7 @@ struct TestLoopPermutation return "Tests affine loop permutation utility"; } TestLoopPermutation() = default; - TestLoopPermutation(const TestLoopPermutation &pass){}; + TestLoopPermutation(const TestLoopPermutation &pass) : PassWrapper(pass){}; void runOnFunction() override; diff --git a/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp b/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp index 53d241286a09..d54948e23844 100644 --- a/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp @@ -49,7 +49,8 @@ struct TestComprehensiveFunctionBufferize TestComprehensiveFunctionBufferize() = default; TestComprehensiveFunctionBufferize( - const TestComprehensiveFunctionBufferize &pass) {} + const TestComprehensiveFunctionBufferize &pass) + : PassWrapper(pass) {} void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); } diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgFusionTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgFusionTransforms.cpp index 39f8cca3c112..d5391a1b3b3e 100644 --- a/mlir/test/lib/Dialect/Linalg/TestLinalgFusionTransforms.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgFusionTransforms.cpp @@ -264,7 +264,8 @@ struct TestLinalgTileAndFuseSequencePass } TestLinalgTileAndFuseSequencePass() = default; TestLinalgTileAndFuseSequencePass( - const TestLinalgTileAndFuseSequencePass &pass){}; + const TestLinalgTileAndFuseSequencePass &pass) + : PassWrapper(pass){}; ListOption tileSizes{ *this, "tile-sizes", llvm::cl::desc("Tile sizes to use for ops"), diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgHoisting.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgHoisting.cpp index 59b13b93e2a4..4fa90ede1a98 100644 --- a/mlir/test/lib/Dialect/Linalg/TestLinalgHoisting.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgHoisting.cpp @@ -22,7 +22,7 @@ namespace { struct TestLinalgHoisting : public PassWrapper { TestLinalgHoisting() = default; - TestLinalgHoisting(const TestLinalgHoisting &pass) {} + TestLinalgHoisting(const TestLinalgHoisting &pass) : PassWrapper(pass) {} void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); } diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp index d24de7a13fe1..51cdd8732140 100644 --- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp @@ -34,7 +34,7 @@ namespace { struct TestLinalgTransforms : public PassWrapper { TestLinalgTransforms() = default; - TestLinalgTransforms(const TestLinalgTransforms &pass) {} + TestLinalgTransforms(const TestLinalgTransforms &pass) : PassWrapper(pass) {} void getDependentDialects(DialectRegistry ®istry) const override { // clang-format off diff --git a/mlir/test/lib/Dialect/Math/TestPolynomialApproximation.cpp b/mlir/test/lib/Dialect/Math/TestPolynomialApproximation.cpp index 7cce0ef907e7..486ea582cb76 100644 --- a/mlir/test/lib/Dialect/Math/TestPolynomialApproximation.cpp +++ b/mlir/test/lib/Dialect/Math/TestPolynomialApproximation.cpp @@ -26,7 +26,8 @@ struct TestMathPolynomialApproximationPass : public PassWrapper { TestMathPolynomialApproximationPass() = default; TestMathPolynomialApproximationPass( - const TestMathPolynomialApproximationPass &pass) {} + const TestMathPolynomialApproximationPass &pass) + : PassWrapper(pass) {} void runOnFunction() override; void getDependentDialects(DialectRegistry ®istry) const override { diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp index e0902db67370..db52be8dcff5 100644 --- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp +++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp @@ -31,7 +31,8 @@ namespace { struct TestVectorToVectorLowering : public PassWrapper { TestVectorToVectorLowering() = default; - TestVectorToVectorLowering(const TestVectorToVectorLowering &pass) {} + TestVectorToVectorLowering(const TestVectorToVectorLowering &pass) + : PassWrapper(pass) {} StringRef getArgument() const final { return "test-vector-to-vector-lowering"; } @@ -110,7 +111,8 @@ struct TestVectorContractionLowering "dialect"; } TestVectorContractionLowering() = default; - TestVectorContractionLowering(const TestVectorContractionLowering &pass) {} + TestVectorContractionLowering(const TestVectorContractionLowering &pass) + : PassWrapper(pass) {} Option lowerToFlatMatrix{ *this, "vector-lower-matrix-intrinsics", @@ -182,7 +184,8 @@ struct TestVectorTransposeLowering "dialect"; } TestVectorTransposeLowering() = default; - TestVectorTransposeLowering(const TestVectorTransposeLowering &pass) {} + TestVectorTransposeLowering(const TestVectorTransposeLowering &pass) + : PassWrapper(pass) {} Option lowerToEltwise{ *this, "eltwise", @@ -253,7 +256,8 @@ struct TestVectorUnrollingPatterns "dialect"; } TestVectorUnrollingPatterns() = default; - TestVectorUnrollingPatterns(const TestVectorUnrollingPatterns &pass) {} + TestVectorUnrollingPatterns(const TestVectorUnrollingPatterns &pass) + : PassWrapper(pass) {} void runOnFunction() override { MLIRContext *ctx = &getContext(); RewritePatternSet patterns(ctx); @@ -312,7 +316,8 @@ struct TestVectorDistributePatterns "dialect"; } TestVectorDistributePatterns() = default; - TestVectorDistributePatterns(const TestVectorDistributePatterns &pass) {} + TestVectorDistributePatterns(const TestVectorDistributePatterns &pass) + : PassWrapper(pass) {} void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); registry.insert(); @@ -365,7 +370,8 @@ struct TestVectorToLoopPatterns return "Test lowering patterns to break up a vector op into a for loop"; } TestVectorToLoopPatterns() = default; - TestVectorToLoopPatterns(const TestVectorToLoopPatterns &pass) {} + TestVectorToLoopPatterns(const TestVectorToLoopPatterns &pass) + : PassWrapper(pass) {} void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); registry.insert(); @@ -456,7 +462,8 @@ struct TestVectorTransferFullPartialSplitPatterns } TestVectorTransferFullPartialSplitPatterns() = default; TestVectorTransferFullPartialSplitPatterns( - const TestVectorTransferFullPartialSplitPatterns &pass) {} + const TestVectorTransferFullPartialSplitPatterns &pass) + : PassWrapper(pass) {} void getDependentDialects(DialectRegistry ®istry) const override { registry.insert { TestVectorMultiReductionLoweringPatterns() = default; TestVectorMultiReductionLoweringPatterns( - const TestVectorMultiReductionLoweringPatterns &pass) {} + const TestVectorMultiReductionLoweringPatterns &pass) + : PassWrapper(pass) {} void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); } @@ -544,7 +552,8 @@ struct TestVectorTransferCollapseInnerMostContiguousDims FunctionPass> { TestVectorTransferCollapseInnerMostContiguousDims() = default; TestVectorTransferCollapseInnerMostContiguousDims( - const TestVectorTransferCollapseInnerMostContiguousDims &pass) {} + const TestVectorTransferCollapseInnerMostContiguousDims &pass) + : PassWrapper(pass) {} void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); From 322c891483440c370e2ab66fdb0f4c4cf61743f1 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 23 Dec 2021 22:13:00 +0000 Subject: [PATCH 324/992] Apply clang-tidy fixes for modernize-use-equals-default to MLIR (NFC) Differential Revision: https://reviews.llvm.org/D116247 --- mlir/test/lib/Dialect/Affine/TestAffineLoopUnswitching.cpp | 3 +-- mlir/test/lib/Dialect/Linalg/TestLinalgDistribution.cpp | 3 +-- mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/mlir/test/lib/Dialect/Affine/TestAffineLoopUnswitching.cpp b/mlir/test/lib/Dialect/Affine/TestAffineLoopUnswitching.cpp index 533b51eade97..ca65d9550a46 100644 --- a/mlir/test/lib/Dialect/Affine/TestAffineLoopUnswitching.cpp +++ b/mlir/test/lib/Dialect/Affine/TestAffineLoopUnswitching.cpp @@ -30,8 +30,7 @@ struct TestAffineLoopUnswitching return "Tests affine loop unswitching / if/else hoisting"; } TestAffineLoopUnswitching() = default; - TestAffineLoopUnswitching(const TestAffineLoopUnswitching &pass) - : PassWrapper(pass) {} + TestAffineLoopUnswitching(const TestAffineLoopUnswitching &pass) = default; void runOnFunction() override; diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgDistribution.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgDistribution.cpp index fa982e7fb3c2..2543b13eae29 100644 --- a/mlir/test/lib/Dialect/Linalg/TestLinalgDistribution.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgDistribution.cpp @@ -43,8 +43,7 @@ struct TestLinalgDistribution StringRef getArgument() const final { return "test-linalg-distribution"; } StringRef getDescription() const final { return "Test Linalg distribution."; } TestLinalgDistribution() = default; - TestLinalgDistribution(const TestLinalgDistribution &pass) - : PassWrapper(pass) {} + TestLinalgDistribution(const TestLinalgDistribution &pass) = default; void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); } diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp index db52be8dcff5..0d054720a61b 100644 --- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp +++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp @@ -552,8 +552,7 @@ struct TestVectorTransferCollapseInnerMostContiguousDims FunctionPass> { TestVectorTransferCollapseInnerMostContiguousDims() = default; TestVectorTransferCollapseInnerMostContiguousDims( - const TestVectorTransferCollapseInnerMostContiguousDims &pass) - : PassWrapper(pass) {} + const TestVectorTransferCollapseInnerMostContiguousDims &pass) = default; void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); From 89de9cc8a772a4e541547d6cc542583b34bed5b8 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 23 Dec 2021 22:13:06 +0000 Subject: [PATCH 325/992] Apply clang-tidy fixes for performance-for-range-copy to MLIR (NFC) Differential Revision: https://reviews.llvm.org/D116248 --- mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCounting.cpp | 2 +- mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp | 2 +- .../StandardOps/Transforms/DecomposeCallGraphTypes.cpp | 2 +- mlir/lib/Dialect/Tosa/IR/TosaOps.cpp | 4 ++-- mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp | 2 +- mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp | 2 +- mlir/test/lib/Dialect/Test/TestPatterns.cpp | 2 +- mlir/test/lib/IR/TestPrintDefUse.cpp | 2 +- mlir/test/lib/IR/TestSlicing.cpp | 2 +- mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp | 4 ++-- mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp | 2 +- mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp | 2 +- mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp | 2 +- mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp | 4 ++-- mlir/tools/mlir-tblgen/OpDocGen.cpp | 2 +- mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp | 2 +- 16 files changed, 19 insertions(+), 19 deletions(-) diff --git a/mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCounting.cpp b/mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCounting.cpp index 4b4217a94616..81fa42438c32 100644 --- a/mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCounting.cpp +++ b/mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCounting.cpp @@ -418,7 +418,7 @@ AsyncRuntimeRefCountingPass::addDropRefInDivergentLivenessSuccessor( continue; // Update terminator `successor` block to `refCountingBlock`. - for (auto pair : llvm::enumerate(terminator->getSuccessors())) + for (const auto &pair : llvm::enumerate(terminator->getSuccessors())) if (pair.value() == successor) terminator->setSuccessor(refCountingBlock, pair.index()); } diff --git a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp index dc92436b97be..2dbee7eaac6f 100644 --- a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp +++ b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp @@ -134,7 +134,7 @@ handleMultidimensionalVectors(ImplicitLocOpBuilder &builder, auto offsets = delinearize(strides, i); SmallVector extracted(expandedOperands.size()); - for (auto tuple : llvm::enumerate(expandedOperands)) + for (const auto &tuple : llvm::enumerate(expandedOperands)) extracted[tuple.index()] = builder.create(tuple.value(), offsets); diff --git a/mlir/lib/Dialect/StandardOps/Transforms/DecomposeCallGraphTypes.cpp b/mlir/lib/Dialect/StandardOps/Transforms/DecomposeCallGraphTypes.cpp index 3686568244e2..f9d9c5c31e66 100644 --- a/mlir/lib/Dialect/StandardOps/Transforms/DecomposeCallGraphTypes.cpp +++ b/mlir/lib/Dialect/StandardOps/Transforms/DecomposeCallGraphTypes.cpp @@ -67,7 +67,7 @@ struct DecomposeCallGraphTypesForFuncArgs // Convert function arguments using the provided TypeConverter. TypeConverter::SignatureConversion conversion(functionType.getNumInputs()); - for (auto argType : llvm::enumerate(functionType.getInputs())) { + for (const auto &argType : llvm::enumerate(functionType.getInputs())) { SmallVector decomposedTypes; if (failed(typeConverter->convertType(argType.value(), decomposedTypes))) return failure(); diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index e56f83b8044e..69da41c34370 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -1791,7 +1791,7 @@ LogicalResult IfOp::inferReturnTypeComponents( if (resultKnowledge.size() != yieldOp.getNumOperands()) return failure(); - for (auto it : llvm::enumerate(yieldOp.getOperands())) { + for (const auto &it : llvm::enumerate(yieldOp.getOperands())) { int32_t index = it.index(); auto meet = ValueKnowledge::meet( resultKnowledge[index], @@ -1835,7 +1835,7 @@ LogicalResult WhileOp::inferReturnTypeComponents( if (resultKnowledge.size() != yieldOp.getNumOperands()) return failure(); - for (auto it : llvm::enumerate(yieldOp.getOperands())) { + for (const auto &it : llvm::enumerate(yieldOp.getOperands())) { int32_t index = it.index(); if (auto meet = ValueKnowledge::meet( resultKnowledge[index], diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp index 34d480c3917e..3a50336c3d4a 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp @@ -132,7 +132,7 @@ void propagateShapesToTosaWhile( } for (auto yieldOp : yieldOps) { - for (auto it : llvm::enumerate(yieldOp.getOperands())) { + for (const auto &it : llvm::enumerate(yieldOp.getOperands())) { auto newKnowledge = ValueKnowledge::getKnowledgeFromType(it.value().getType()); yieldTypeInfo[it.index()] = diff --git a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp index f1acfb3ab504..ddba1d3c4432 100644 --- a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp +++ b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp @@ -253,7 +253,7 @@ bool mlir::isReassociationValid(ArrayRef reassociation, return true; unsigned nDims = reassociation[0].getNumDims(); unsigned nextExpectedDim = 0; - for (auto it : llvm::enumerate(reassociation)) { + for (const auto &it : llvm::enumerate(reassociation)) { auto m = it.value(); if (m.getNumDims() != nDims || m.getNumSymbols() != 0) { if (invalidIndex) diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index 2f06d82bd521..17a58713763d 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -182,7 +182,7 @@ static void reifyReturnShape(Operation *op) { if (failed(shapedOp.reifyReturnTypeShapes(b, op->getOperands(), shapes)) || !llvm::hasSingleElement(shapes)) return; - for (auto it : llvm::enumerate(shapes)) { + for (const auto &it : llvm::enumerate(shapes)) { op->emitRemark() << "value " << it.index() << ": " << it.value().getDefiningOp(); } diff --git a/mlir/test/lib/IR/TestPrintDefUse.cpp b/mlir/test/lib/IR/TestPrintDefUse.cpp index 6fa37b9cec73..8a854e6a059d 100644 --- a/mlir/test/lib/IR/TestPrintDefUse.cpp +++ b/mlir/test/lib/IR/TestPrintDefUse.cpp @@ -41,7 +41,7 @@ struct TestPrintDefUsePass // Print information about the user of each of the result. llvm::outs() << "Has " << op->getNumResults() << " results:\n"; - for (auto indexedResult : llvm::enumerate(op->getResults())) { + for (const auto &indexedResult : llvm::enumerate(op->getResults())) { Value result = indexedResult.value(); llvm::outs() << " - Result " << indexedResult.index(); if (result.use_empty()) { diff --git a/mlir/test/lib/IR/TestSlicing.cpp b/mlir/test/lib/IR/TestSlicing.cpp index 2489b14c6875..a253c5970601 100644 --- a/mlir/test/lib/IR/TestSlicing.cpp +++ b/mlir/test/lib/IR/TestSlicing.cpp @@ -33,7 +33,7 @@ static LogicalResult createBackwardSliceFunction(Operation *op, builder.create(loc, clonedFuncOpName, parentFuncOp.getType()); BlockAndValueMapping mapper; builder.setInsertionPointToEnd(clonedFuncOp.addEntryBlock()); - for (auto arg : enumerate(parentFuncOp.getArguments())) + for (const auto &arg : enumerate(parentFuncOp.getArguments())) mapper.map(arg.value(), clonedFuncOp.getArgument(arg.index())); SetVector slice; getBackwardSlice(op, &slice); diff --git a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp index a07ada3c8e66..ed0b5f403f96 100644 --- a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp +++ b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp @@ -360,7 +360,7 @@ static std::string interleaveToString(Container &container, static Optional findTensorDefArgIndex(StringRef name, SmallVectorImpl &args) { - for (auto it : llvm::enumerate(args)) { + for (const auto &it : llvm::enumerate(args)) { if (it.value().name == name) return it.index(); } @@ -381,7 +381,7 @@ findTypeValue(StringRef typeVar, SmallVectorImpl &args) { return std::string("helper.getFloat64Type()"); // Search all argument types. - for (auto it : llvm::enumerate(args)) { + for (const auto &it : llvm::enumerate(args)) { if (it.value().typeVar == typeVar) return llvm::formatv("block.getArgument({0}).getType()", it.index()) .str(); diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp index eac8e5638bad..d90adbc47ef4 100644 --- a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp +++ b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp @@ -561,7 +561,7 @@ void DefGen::emitConstruct() { MethodParameter("const KeyTy &", "tblgenKey")); if (!def.hasStorageCustomConstructor()) { auto &body = construct->body().indent(); - for (auto it : llvm::enumerate(params)) { + for (const auto &it : llvm::enumerate(params)) { body << formatv("auto {0} = std::get<{1}>(tblgenKey);\n", it.value().getName(), it.index()); } diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp index 8042ecc6fe96..4a07157b884d 100644 --- a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp @@ -661,7 +661,7 @@ FailureOr> FormatParser::parseParamsDirective() { /// Collect all of the attribute's or type's parameters. SmallVector> vars; /// Ensure that none of the parameters have already been captured. - for (auto it : llvm::enumerate(def.getParameters())) { + for (const auto &it : llvm::enumerate(def.getParameters())) { if (seenParams.test(it.index())) { return emitError("`params` captures duplicate parameter: " + it.value().getName()); diff --git a/mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp b/mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp index 18315ec366a0..646527d1bdcc 100644 --- a/mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp +++ b/mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp @@ -65,7 +65,7 @@ static IndicesTy getOverloadableTypeIdxs(const llvm::Record &record, const char *listName) { auto results = record.getValueAsListOfDefs(listName); IndicesTy overloadedOps(results.size()); - for (auto r : llvm::enumerate(results)) { + for (const auto &r : llvm::enumerate(results)) { llvm::MVT::SimpleValueType vt = getValueType(r.value()); switch (vt) { case llvm::MVT::iAny: diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index 484bb230e6b0..f27b537d6362 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -2267,7 +2267,7 @@ void OpEmitter::genOperandResultVerifier(MethodBody &body, body << " {\n unsigned index = 0; (void)index;\n"; - for (auto staticValue : llvm::enumerate(values)) { + for (const auto &staticValue : llvm::enumerate(values)) { const NamedTypeConstraint &value = staticValue.value(); bool hasPredicate = value.hasPredicate(); @@ -2332,7 +2332,7 @@ void OpEmitter::genRegionVerifier(MethodBody &body) { return; body << " {\n unsigned index = 0; (void)index;\n"; - for (auto it : llvm::enumerate(regions)) { + for (const auto &it : llvm::enumerate(regions)) { const auto ®ion = it.value(); if (canSkip(region)) continue; diff --git a/mlir/tools/mlir-tblgen/OpDocGen.cpp b/mlir/tools/mlir-tblgen/OpDocGen.cpp index fc2f81417630..3025df140ea9 100644 --- a/mlir/tools/mlir-tblgen/OpDocGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDocGen.cpp @@ -235,7 +235,7 @@ static void emitAttrOrTypeDefAssemblyFormat(const AttrOrTypeDef &def, os << "\nSyntax:\n\n```\n!" << def.getDialect().getName() << "." << def.getMnemonic() << "<\n"; - for (auto it : llvm::enumerate(parameters)) { + for (const auto &it : llvm::enumerate(parameters)) { const AttrOrTypeParameter ¶m = it.value(); os << " " << param.getSyntax(); if (it.index() < (parameters.size() - 1)) diff --git a/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp b/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp index fb634a1be395..16fccff973ca 100644 --- a/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp +++ b/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp @@ -916,7 +916,7 @@ static void emitRegionAttributes(const Operator &op, raw_ostream &os) { /// Emits named accessors to regions. static void emitRegionAccessors(const Operator &op, raw_ostream &os) { - for (auto en : llvm::enumerate(op.getRegions())) { + for (const auto &en : llvm::enumerate(op.getRegions())) { const NamedRegion ®ion = en.value(); if (region.name.empty()) continue; From ee1fcb2fb684a54a3969adeb22108b62b18ea751 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 01:11:13 +0000 Subject: [PATCH 326/992] Apply clang-tidy fixes for performance-move-const-arg to MLIR (NFC) Reviewed By: rriddle, Mogball Differential Revision: https://reviews.llvm.org/D116249 --- mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp index 51cdd8732140..64713b02c7b6 100644 --- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp @@ -528,8 +528,7 @@ applyMatmulToVectorPatterns(FuncOp funcOp, llvm::move(stage1Patterns, std::back_inserter(frozenStage1Patterns)); FrozenRewritePatternSet stage2Patterns = getLinalgTilingCanonicalizationPatterns(ctx); - (void)applyStagedPatterns(funcOp, frozenStage1Patterns, - std::move(stage2Patterns)); + (void)applyStagedPatterns(funcOp, frozenStage1Patterns, stage2Patterns); } static void applyVectorTransferForwardingPatterns(FuncOp funcOp) { From a86b957fd766e458eca3d61f9010610ac76d4cd7 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 01:18:41 +0000 Subject: [PATCH 327/992] Apply clang-tidy fixes for bugprone-macro-parentheses to MLIR (NFC) Reviewed By: rriddle, Mogball Differential Revision: https://reviews.llvm.org/D116354 --- .../Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp index e2edc9d15267..31ebbac73a27 100644 --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp @@ -30,7 +30,7 @@ namespace comprehensive_bufferize { #define DEBUG_TYPE "bufferizable-op-interface" #define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ") -#define LDBG(X) LLVM_DEBUG(DBGS() << X) +#define LDBG(X) LLVM_DEBUG(DBGS() << (X)) using namespace mlir; using namespace linalg::comprehensive_bufferize; From 513463b589c9472d8a8454e95cd0c5d9acbb47fb Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 01:21:01 +0000 Subject: [PATCH 328/992] Apply clang-tidy fixes for llvm-qualified-auto to MLIR (NFC) Reviewed By: rriddle, Mogball Differential Revision: https://reviews.llvm.org/D116355 --- mlir/lib/Analysis/Utils.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp index a9697ca83a5a..e6fe34996359 100644 --- a/mlir/lib/Analysis/Utils.cpp +++ b/mlir/lib/Analysis/Utils.cpp @@ -817,9 +817,9 @@ mlir::computeSliceUnion(ArrayRef opsA, ArrayRef opsB, FlatAffineValueConstraints sliceUnionCst; assert(sliceUnionCst.getNumDimAndSymbolIds() == 0); std::vector> dependentOpPairs; - for (auto i : opsA) { + for (auto *i : opsA) { MemRefAccess srcAccess(i); - for (auto j : opsB) { + for (auto *j : opsB) { MemRefAccess dstAccess(j); if (srcAccess.memref != dstAccess.memref) continue; From 0ae2e9580c5764eeb15f96cfb661f44307d4e6fa Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 01:22:19 +0000 Subject: [PATCH 329/992] Apply clang-tidy fixes for modernize-use-override to MLIR (NFC) Reviewed By: rriddle, jpienaar Differential Revision: https://reviews.llvm.org/D116356 --- mlir/lib/ExecutionEngine/SparseTensorUtils.cpp | 2 +- mlir/unittests/Dialect/Quant/QuantizationUtilsTest.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index 5cc40665a8af..32faad2d0d41 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -277,7 +277,7 @@ class SparseTensorStorage : public SparseTensorStorageBase { } } - virtual ~SparseTensorStorage() = default; + ~SparseTensorStorage() override = default; /// Get the rank of the tensor. uint64_t getRank() const { return sizes.size(); } diff --git a/mlir/unittests/Dialect/Quant/QuantizationUtilsTest.cpp b/mlir/unittests/Dialect/Quant/QuantizationUtilsTest.cpp index d30815410af9..0b4085911675 100644 --- a/mlir/unittests/Dialect/Quant/QuantizationUtilsTest.cpp +++ b/mlir/unittests/Dialect/Quant/QuantizationUtilsTest.cpp @@ -25,7 +25,7 @@ class TestUniformQuantizedValueConverter public: TestUniformQuantizedValueConverter(UniformQuantizedType type) : UniformQuantizedValueConverter(type), qtype(type) {} - APInt quantizeFloatToInt(APFloat expressedValue) const { + APInt quantizeFloatToInt(APFloat expressedValue) const override { return APInt(qtype.getStorageType().cast().getWidth(), 5L); } From b11510d5dfaf82b0979b3d1d0b075c937d6371a7 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 01:24:54 +0000 Subject: [PATCH 330/992] Apply clang-tidy fixes for modernize-use-using to MLIR (NFC) Reviewed By: rriddle, Mogball Differential Revision: https://reviews.llvm.org/D116357 --- mlir/lib/ExecutionEngine/SparseTensorUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index 32faad2d0d41..927284ec13f4 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -691,7 +691,7 @@ extern "C" { /// type is 64-bit, but targets with different "index" bit widths should link /// with an alternatively built runtime support library. // TODO: support such targets? -typedef uint64_t index_t; +using index_t = uint64_t; //===----------------------------------------------------------------------===// // From 1fc096af1e495d121679340b527701a5c0a9ef8b Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 01:26:44 +0000 Subject: [PATCH 331/992] Apply clang-tidy fixes for performance-unnecessary-value-param to MLIR (NFC) Reviewed By: Mogball Differential Revision: https://reviews.llvm.org/D116250 --- mlir/include/mlir/Analysis/NestedMatcher.h | 12 +- mlir/include/mlir/Dialect/Async/Transforms.h | 2 +- .../BufferizableOpInterface.h | 2 +- mlir/include/mlir/Dialect/Linalg/Passes.h | 64 ++--- .../Dialect/Linalg/Transforms/Transforms.h | 13 +- mlir/include/mlir/IR/AffineMap.h | 2 +- mlir/include/mlir/IR/Dialect.h | 7 +- mlir/include/mlir/Reducer/ReductionNode.h | 2 +- mlir/include/mlir/TableGen/GenInfo.h | 3 +- mlir/include/mlir/TableGen/Pattern.h | 3 +- mlir/include/mlir/Translation.h | 2 +- mlir/lib/Analysis/LoopAnalysis.cpp | 2 +- mlir/lib/Analysis/NestedMatcher.cpp | 21 +- .../Bindings/Python/DialectSparseTensor.cpp | 2 +- mlir/lib/Bindings/Python/Dialects.h | 2 +- .../Bindings/Python/ExecutionEngineModule.cpp | 2 +- mlir/lib/Bindings/Python/IRAffine.cpp | 25 +- mlir/lib/Bindings/Python/IRAttributes.cpp | 6 +- mlir/lib/Bindings/Python/IRCore.cpp | 41 ++-- mlir/lib/Bindings/Python/IRInterfaces.cpp | 4 +- mlir/lib/Bindings/Python/IRModule.h | 25 +- .../Conversion/TosaToLinalg/TosaToLinalg.cpp | 35 ++- .../Dialect/Arithmetic/IR/ArithmeticOps.cpp | 228 ++++++++++-------- .../Async/Transforms/AsyncParallelFor.cpp | 8 +- .../Transforms/BufferDeallocation.cpp | 2 +- .../GPU/Transforms/KernelOutlining.cpp | 3 +- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 5 +- .../BufferizableOpInterface.cpp | 2 +- .../SCFInterfaceImpl.cpp | 2 +- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 12 +- .../Dialect/Linalg/Transforms/Detensorize.cpp | 3 +- .../Linalg/Transforms/Distribution.cpp | 6 +- .../Linalg/Transforms/ElementwiseOpFusion.cpp | 8 +- .../Linalg/Transforms/Generalization.cpp | 2 +- .../Transforms/LinalgStrategyPasses.cpp | 67 ++--- .../Dialect/Linalg/Transforms/Promotion.cpp | 4 +- mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp | 4 +- .../Dialect/Linalg/Transforms/Transforms.cpp | 61 ++--- .../Transforms/PolynomialApproximation.cpp | 2 +- .../SPIRV/IR/SPIRVCanonicalization.cpp | 14 +- mlir/lib/Dialect/Shape/IR/Shape.cpp | 6 +- mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 4 +- mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp | 2 +- .../VectorMultiDimReductionTransforms.cpp | 2 +- mlir/lib/IR/AffineExpr.cpp | 6 +- mlir/lib/IR/AffineMap.cpp | 2 +- mlir/lib/IR/Diagnostics.cpp | 2 +- mlir/lib/IR/Dialect.cpp | 6 +- mlir/lib/IR/Operation.cpp | 2 +- mlir/lib/Pass/IRPrinting.cpp | 6 +- mlir/lib/Pass/PassRegistry.cpp | 5 +- mlir/lib/Reducer/ReductionNode.cpp | 2 +- mlir/lib/TableGen/Pattern.cpp | 7 +- mlir/lib/TableGen/Predicate.cpp | 3 +- mlir/lib/Target/Cpp/TranslateToCpp.cpp | 14 +- .../Tools/mlir-lsp-server/lsp/Transport.cpp | 5 +- mlir/lib/Transforms/Inliner.cpp | 2 +- mlir/lib/Transforms/ViewOpGraph.cpp | 13 +- mlir/lib/Translation/Translation.cpp | 2 +- .../Linalg/TestLinalgCodegenStrategy.cpp | 11 +- .../Dialect/Linalg/TestLinalgTransforms.cpp | 2 +- mlir/tools/mlir-tblgen/DialectGen.cpp | 9 +- mlir/tools/mlir-tblgen/OpDocGen.cpp | 2 +- mlir/tools/mlir-tblgen/OpFormatGen.cpp | 4 +- mlir/tools/mlir-tblgen/OpInterfacesGen.cpp | 2 +- mlir/tools/mlir-tblgen/mlir-tblgen.cpp | 2 +- .../Analysis/AffineStructuresParserTest.cpp | 2 +- .../Analysis/Presburger/SimplexTest.cpp | 4 +- mlir/unittests/Analysis/PresburgerSetTest.cpp | 13 +- .../PDLToPDLInterp/RootOrderingTest.cpp | 2 +- .../Dialect/SparseTensor/MergerTest.cpp | 23 +- 71 files changed, 500 insertions(+), 377 deletions(-) diff --git a/mlir/include/mlir/Analysis/NestedMatcher.h b/mlir/include/mlir/Analysis/NestedMatcher.h index fb725e3ec864..844fc3a53ad1 100644 --- a/mlir/include/mlir/Analysis/NestedMatcher.h +++ b/mlir/include/mlir/Analysis/NestedMatcher.h @@ -180,15 +180,15 @@ class NestedPatternContext { namespace matcher { // Syntactic sugar NestedPattern builder functions. NestedPattern Op(FilterFunctionType filter = defaultFilterFunction); -NestedPattern If(NestedPattern child); -NestedPattern If(FilterFunctionType filter, NestedPattern child); +NestedPattern If(const NestedPattern &child); +NestedPattern If(const FilterFunctionType &filter, const NestedPattern &child); NestedPattern If(ArrayRef nested = {}); -NestedPattern If(FilterFunctionType filter, +NestedPattern If(const FilterFunctionType &filter, ArrayRef nested = {}); -NestedPattern For(NestedPattern child); -NestedPattern For(FilterFunctionType filter, NestedPattern child); +NestedPattern For(const NestedPattern &child); +NestedPattern For(const FilterFunctionType &filter, const NestedPattern &child); NestedPattern For(ArrayRef nested = {}); -NestedPattern For(FilterFunctionType filter, +NestedPattern For(const FilterFunctionType &filter, ArrayRef nested = {}); bool isParallelLoop(Operation &op); diff --git a/mlir/include/mlir/Dialect/Async/Transforms.h b/mlir/include/mlir/Dialect/Async/Transforms.h index 97a070b0757e..09a57ca89708 100644 --- a/mlir/include/mlir/Dialect/Async/Transforms.h +++ b/mlir/include/mlir/Dialect/Async/Transforms.h @@ -32,7 +32,7 @@ using AsyncMinTaskSizeComputationFunction = /// operations. void populateAsyncParallelForPatterns( RewritePatternSet &patterns, bool asyncDispatch, int32_t numWorkerThreads, - AsyncMinTaskSizeComputationFunction computeMinTaskSize); + const AsyncMinTaskSizeComputationFunction &computeMinTaskSize); } // namespace async } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h index 891d59b61616..6d0a32cfd344 100644 --- a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h @@ -352,7 +352,7 @@ class BufferizationState { /// { 2, 7, 8, 5 } llvm::SetVector findValueInReverseUseDefChain(Value value, - std::function condition); + llvm::function_ref condition); /// Find the Value of the last preceding write of a given Value. /// diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h index 8ebaaa8f8e4d..5b0abdec15c6 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.h +++ b/mlir/include/mlir/Dialect/Linalg/Passes.h @@ -82,70 +82,70 @@ std::unique_ptr createLinalgDetensorizePass(); //===----------------------------------------------------------------------===// /// Create a LinalgStrategyTileAndFusePass. std::unique_ptr> createLinalgStrategyTileAndFusePass( - StringRef opName = "", linalg::LinalgTilingAndFusionOptions opt = {}, - linalg::LinalgTransformationFilter filter = + StringRef opName = "", const linalg::LinalgTilingAndFusionOptions &opt = {}, + const linalg::LinalgTransformationFilter &filter = linalg::LinalgTransformationFilter()); /// Create a LinalgStrategyTilePass. std::unique_ptr> createLinalgStrategyTilePass( StringRef opName = "", - linalg::LinalgTilingOptions opt = linalg::LinalgTilingOptions(), - linalg::LinalgTransformationFilter filter = + const linalg::LinalgTilingOptions &opt = linalg::LinalgTilingOptions(), + const linalg::LinalgTransformationFilter &filter = linalg::LinalgTransformationFilter()); /// Create a LinalgStrategyPadPass. std::unique_ptr> createLinalgStrategyPadPass( StringRef opName = "", - linalg::LinalgPaddingOptions opt = linalg::LinalgPaddingOptions(), - linalg::LinalgTransformationFilter filter = + const linalg::LinalgPaddingOptions &opt = linalg::LinalgPaddingOptions(), + const linalg::LinalgTransformationFilter &filter = linalg::LinalgTransformationFilter()); /// Create a LinalgStrategyPromotePass. std::unique_ptr> createLinalgStrategyPromotePass( StringRef opName = "", - linalg::LinalgPromotionOptions opt = linalg::LinalgPromotionOptions(), - linalg::LinalgTransformationFilter filter = + const linalg::LinalgPromotionOptions &opt = + linalg::LinalgPromotionOptions(), + const linalg::LinalgTransformationFilter &filter = linalg::LinalgTransformationFilter()); /// Create a LinalgStrategyGeneralizePass. -std::unique_ptr> -createLinalgStrategyGeneralizePass(StringRef opName = "", - linalg::LinalgTransformationFilter filter = - linalg::LinalgTransformationFilter()); +std::unique_ptr> createLinalgStrategyGeneralizePass( + StringRef opName = "", const linalg::LinalgTransformationFilter &filter = + linalg::LinalgTransformationFilter()); /// Create a LinalgStrategyDecomposePass. // TODO: if/when we need finer control add an `opName` parameter. -std::unique_ptr> -createLinalgStrategyDecomposePass(linalg::LinalgTransformationFilter filter = - linalg::LinalgTransformationFilter()); +std::unique_ptr> createLinalgStrategyDecomposePass( + const linalg::LinalgTransformationFilter &filter = + linalg::LinalgTransformationFilter()); /// Create a LinalgStrategyInterchangePass. -std::unique_ptr> -createLinalgStrategyInterchangePass(ArrayRef iteratorInterchange = {}, - linalg::LinalgTransformationFilter filter = - linalg::LinalgTransformationFilter()); +std::unique_ptr> createLinalgStrategyInterchangePass( + ArrayRef iteratorInterchange = {}, + const linalg::LinalgTransformationFilter &filter = + linalg::LinalgTransformationFilter()); /// Create a LinalgStrategyVectorizePass. -std::unique_ptr> -createLinalgStrategyVectorizePass(StringRef opName = "", - linalg::LinalgVectorizationOptions opt = - linalg::LinalgVectorizationOptions(), - linalg::LinalgTransformationFilter filter = - linalg::LinalgTransformationFilter(), - bool padVectorize = false); +std::unique_ptr> createLinalgStrategyVectorizePass( + StringRef opName = "", + linalg::LinalgVectorizationOptions opt = + linalg::LinalgVectorizationOptions(), + const linalg::LinalgTransformationFilter &filter = + linalg::LinalgTransformationFilter(), + bool padVectorize = false); /// Create a LinalgStrategyEnablePass. std::unique_ptr> createLinalgStrategyEnablePass( linalg::LinalgEnablingOptions opt = linalg::LinalgEnablingOptions(), - linalg::LinalgTransformationFilter filter = + const linalg::LinalgTransformationFilter &filter = linalg::LinalgTransformationFilter()); /// Create a LinalgStrategyLowerVectorsPass. -std::unique_ptr> -createLinalgStrategyLowerVectorsPass(linalg::LinalgVectorLoweringOptions opt = - linalg::LinalgVectorLoweringOptions(), - linalg::LinalgTransformationFilter filter = - linalg::LinalgTransformationFilter()); +std::unique_ptr> createLinalgStrategyLowerVectorsPass( + linalg::LinalgVectorLoweringOptions opt = + linalg::LinalgVectorLoweringOptions(), + const linalg::LinalgTransformationFilter &filter = + linalg::LinalgTransformationFilter()); /// Create a LinalgStrategyRemoveMarkersPass. std::unique_ptr> createLinalgStrategyRemoveMarkersPass(); diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 34eef99dc729..bb396ce5a554 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -70,7 +70,8 @@ using ControlElementwiseOpsFusionFn = /// loop in the generic op. void populateFoldReshapeOpsByExpansionPatterns( RewritePatternSet &patterns, - ControlElementwiseOpsFusionFn controlFoldingReshapes = skipUnitDimReshape); + const ControlElementwiseOpsFusionFn &controlFoldingReshapes = + skipUnitDimReshape); /// Patterns to fold a collapsing (expanding) tensor_reshape operation with its /// producer (consumer) generic operation by linearizing the indexing map used @@ -356,7 +357,7 @@ struct PromotionInfo { }; FailureOr promoteSubviewAsNewBuffer(OpBuilder &b, Location loc, memref::SubViewOp subView, - AllocBufferCallbackFn allocationFn, + const AllocBufferCallbackFn &allocationFn, DataLayout &layout); /// Promotes the `subViews` into a new buffer allocated at the insertion point @@ -370,7 +371,7 @@ promoteSubviewAsNewBuffer(OpBuilder &b, Location loc, memref::SubViewOp subView, /// Returns the modified linalg op (the modification happens in place) as well /// as all the copy ops created. FailureOr promoteSubViews(OpBuilder &b, LinalgOp op, - LinalgPromotionOptions options); + const LinalgPromotionOptions &options); /// Emit a suitable vector form for a Linalg op with fully static shape. LogicalResult vectorizeLinalgOp(OpBuilder &builder, Operation *op, @@ -437,7 +438,7 @@ struct LinalgTransformationFilter { Optional replacement = None); explicit LinalgTransformationFilter( - FilterFunction f, ArrayRef matchDisjunction = {}, + const FilterFunction &f, ArrayRef matchDisjunction = {}, Optional replacement = None); LinalgTransformationFilter(LinalgTransformationFilter &&) = default; @@ -1180,7 +1181,7 @@ struct LinalgLoweringPattern : public RewritePattern { /// linalg.generic ops. void populateLinalgNamedOpsGeneralizationPatterns( RewritePatternSet &patterns, - LinalgTransformationFilter filter = LinalgTransformationFilter()); + const LinalgTransformationFilter &filter = LinalgTransformationFilter()); /// Linalg decompose convolutions patterns @@ -1189,7 +1190,7 @@ void populateLinalgNamedOpsGeneralizationPatterns( /// vectorize the low-D convolution ops. void populateDecomposeConvolutionPatterns( RewritePatternSet &patterns, - LinalgTransformationFilter filter = LinalgTransformationFilter(), + const LinalgTransformationFilter &filter = LinalgTransformationFilter(), PatternBenefit benefit = 1); /// Linalg distribution patterns diff --git a/mlir/include/mlir/IR/AffineMap.h b/mlir/include/mlir/IR/AffineMap.h index 28a4261b9b80..3fa166c056ae 100644 --- a/mlir/include/mlir/IR/AffineMap.h +++ b/mlir/include/mlir/IR/AffineMap.h @@ -182,7 +182,7 @@ class AffineMap { /// Walk all of the AffineExpr's in this mapping. Each node in an expression /// tree is visited in postorder. - void walkExprs(std::function callback) const; + void walkExprs(llvm::function_ref callback) const; /// This method substitutes any uses of dimensions and symbols (e.g. /// dim#0 with dimReplacements[0]) in subexpressions and returns the modified diff --git a/mlir/include/mlir/IR/Dialect.h b/mlir/include/mlir/IR/Dialect.h index d7dc51189d6a..0ee8057d1e66 100644 --- a/mlir/include/mlir/IR/Dialect.h +++ b/mlir/include/mlir/IR/Dialect.h @@ -317,7 +317,8 @@ class DialectRegistry { /// Add a new dialect constructor to the registry. The constructor must be /// calling MLIRContext::getOrLoadDialect in order for the context to take /// ownership of the dialect and for delayed interface registration to happen. - void insert(TypeID typeID, StringRef name, DialectAllocatorFunction ctor); + void insert(TypeID typeID, StringRef name, + const DialectAllocatorFunction &ctor); /// Return an allocation function for constructing the dialect identified by /// its namespace, or nullptr if the namespace is not in this registry. @@ -397,13 +398,13 @@ class DialectRegistry { /// Add an interface constructed with the given allocation function to the /// dialect identified by its namespace. void addDialectInterface(StringRef dialectName, TypeID interfaceTypeID, - DialectInterfaceAllocatorFunction allocator); + const DialectInterfaceAllocatorFunction &allocator); /// Add an attribute/operation/type interface constructible with the given /// allocation function to the dialect identified by its namespace. void addObjectInterface(StringRef dialectName, TypeID objectID, TypeID interfaceTypeID, - ObjectInterfaceAllocatorFunction allocator); + const ObjectInterfaceAllocatorFunction &allocator); /// Add an external model for an attribute/type interface to the dialect /// identified by its namespace. diff --git a/mlir/include/mlir/Reducer/ReductionNode.h b/mlir/include/mlir/Reducer/ReductionNode.h index 442bc59589f3..814c88533b44 100644 --- a/mlir/include/mlir/Reducer/ReductionNode.h +++ b/mlir/include/mlir/Reducer/ReductionNode.h @@ -48,7 +48,7 @@ class ReductionNode { using Range = std::pair; - ReductionNode(ReductionNode *parent, std::vector range, + ReductionNode(ReductionNode *parent, const std::vector &range, llvm::SpecificBumpPtrAllocator &allocator); ReductionNode *getParent() const { return parent; } diff --git a/mlir/include/mlir/TableGen/GenInfo.h b/mlir/include/mlir/TableGen/GenInfo.h index 6a9364d3f7f5..bbba88f1c065 100644 --- a/mlir/include/mlir/TableGen/GenInfo.h +++ b/mlir/include/mlir/TableGen/GenInfo.h @@ -64,7 +64,8 @@ class GenInfo { /// // At namespace scope. /// static GenRegistration Print("print", "Print records", [](...){...}); struct GenRegistration { - GenRegistration(StringRef arg, StringRef description, GenFunction function); + GenRegistration(StringRef arg, StringRef description, + const GenFunction &function); }; } // namespace mlir diff --git a/mlir/include/mlir/TableGen/Pattern.h b/mlir/include/mlir/TableGen/Pattern.h index 3c7fe6dad707..f55b1afe0947 100644 --- a/mlir/include/mlir/TableGen/Pattern.h +++ b/mlir/include/mlir/TableGen/Pattern.h @@ -398,7 +398,8 @@ class SymbolInfoMap { // with index `argIndex` for operator `op`. const_iterator findBoundSymbol(StringRef key, DagNode node, const Operator &op, int argIndex) const; - const_iterator findBoundSymbol(StringRef key, SymbolInfo symbolInfo) const; + const_iterator findBoundSymbol(StringRef key, + const SymbolInfo &symbolInfo) const; // Returns the bounds of a range that includes all the elements which // bind to the `key`. diff --git a/mlir/include/mlir/Translation.h b/mlir/include/mlir/Translation.h index 79220cc0d447..61b6fc803734 100644 --- a/mlir/include/mlir/Translation.h +++ b/mlir/include/mlir/Translation.h @@ -78,7 +78,7 @@ struct TranslateToMLIRRegistration { struct TranslateFromMLIRRegistration { TranslateFromMLIRRegistration( llvm::StringRef name, const TranslateFromMLIRFunction &function, - std::function dialectRegistration = + const std::function &dialectRegistration = [](DialectRegistry &) {}); }; struct TranslateRegistration { diff --git a/mlir/lib/Analysis/LoopAnalysis.cpp b/mlir/lib/Analysis/LoopAnalysis.cpp index e6c9863887b3..0672f25671ec 100644 --- a/mlir/lib/Analysis/LoopAnalysis.cpp +++ b/mlir/lib/Analysis/LoopAnalysis.cpp @@ -264,7 +264,7 @@ using VectorizableOpFun = std::function; static bool isVectorizableLoopBodyWithOpCond(AffineForOp loop, - VectorizableOpFun isVectorizableOp, + const VectorizableOpFun &isVectorizableOp, NestedPattern &vectorTransferMatcher) { auto *forOp = loop.getOperation(); diff --git a/mlir/lib/Analysis/NestedMatcher.cpp b/mlir/lib/Analysis/NestedMatcher.cpp index a72b3cbb356a..8e6b2457b238 100644 --- a/mlir/lib/Analysis/NestedMatcher.cpp +++ b/mlir/lib/Analysis/NestedMatcher.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "mlir/Analysis/NestedMatcher.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" @@ -55,7 +57,7 @@ void NestedPattern::freeNested() { NestedPattern::NestedPattern(ArrayRef nested, FilterFunctionType filter) - : nestedPatterns(), filter(filter), skip(nullptr) { + : nestedPatterns(), filter(std::move(filter)), skip(nullptr) { copyNestedToThis(nested); } @@ -132,13 +134,13 @@ namespace mlir { namespace matcher { NestedPattern Op(FilterFunctionType filter) { - return NestedPattern({}, filter); + return NestedPattern({}, std::move(filter)); } -NestedPattern If(NestedPattern child) { +NestedPattern If(const NestedPattern &child) { return NestedPattern(child, isAffineIfOp); } -NestedPattern If(FilterFunctionType filter, NestedPattern child) { +NestedPattern If(const FilterFunctionType &filter, const NestedPattern &child) { return NestedPattern(child, [filter](Operation &op) { return isAffineIfOp(op) && filter(op); }); @@ -146,23 +148,26 @@ NestedPattern If(FilterFunctionType filter, NestedPattern child) { NestedPattern If(ArrayRef nested) { return NestedPattern(nested, isAffineIfOp); } -NestedPattern If(FilterFunctionType filter, ArrayRef nested) { +NestedPattern If(const FilterFunctionType &filter, + ArrayRef nested) { return NestedPattern(nested, [filter](Operation &op) { return isAffineIfOp(op) && filter(op); }); } -NestedPattern For(NestedPattern child) { +NestedPattern For(const NestedPattern &child) { return NestedPattern(child, isAffineForOp); } -NestedPattern For(FilterFunctionType filter, NestedPattern child) { +NestedPattern For(const FilterFunctionType &filter, + const NestedPattern &child) { return NestedPattern( child, [=](Operation &op) { return isAffineForOp(op) && filter(op); }); } NestedPattern For(ArrayRef nested) { return NestedPattern(nested, isAffineForOp); } -NestedPattern For(FilterFunctionType filter, ArrayRef nested) { +NestedPattern For(const FilterFunctionType &filter, + ArrayRef nested) { return NestedPattern( nested, [=](Operation &op) { return isAffineForOp(op) && filter(op); }); } diff --git a/mlir/lib/Bindings/Python/DialectSparseTensor.cpp b/mlir/lib/Bindings/Python/DialectSparseTensor.cpp index 6afd0815de2a..7de0b8156f44 100644 --- a/mlir/lib/Bindings/Python/DialectSparseTensor.cpp +++ b/mlir/lib/Bindings/Python/DialectSparseTensor.cpp @@ -17,7 +17,7 @@ using namespace mlir; using namespace mlir::python::adaptors; void mlir::python::populateDialectSparseTensorSubmodule( - py::module m, const py::module &irModule) { + const py::module &m, const py::module &irModule) { auto attributeClass = irModule.attr("Attribute"); py::enum_(m, "DimLevelType", py::module_local()) diff --git a/mlir/lib/Bindings/Python/Dialects.h b/mlir/lib/Bindings/Python/Dialects.h index 301d539275d0..c1725074c7a2 100644 --- a/mlir/lib/Bindings/Python/Dialects.h +++ b/mlir/lib/Bindings/Python/Dialects.h @@ -15,7 +15,7 @@ namespace mlir { namespace python { void populateDialectLinalgSubmodule(pybind11::module m); -void populateDialectSparseTensorSubmodule(pybind11::module m, +void populateDialectSparseTensorSubmodule(const pybind11::module &m, const pybind11::module &irModule); } // namespace python diff --git a/mlir/lib/Bindings/Python/ExecutionEngineModule.cpp b/mlir/lib/Bindings/Python/ExecutionEngineModule.cpp index 814209197505..9016900185c6 100644 --- a/mlir/lib/Bindings/Python/ExecutionEngineModule.cpp +++ b/mlir/lib/Bindings/Python/ExecutionEngineModule.cpp @@ -42,7 +42,7 @@ class PyExecutionEngine { // Add an object to the list of referenced objects whose lifetime must exceed // those of the ExecutionEngine. - void addReferencedObject(pybind11::object obj) { + void addReferencedObject(const pybind11::object &obj) { referencedObjects.push_back(obj); } diff --git a/mlir/lib/Bindings/Python/IRAffine.cpp b/mlir/lib/Bindings/Python/IRAffine.cpp index c7cdc8243479..16c7ca335ac3 100644 --- a/mlir/lib/Bindings/Python/IRAffine.cpp +++ b/mlir/lib/Bindings/Python/IRAffine.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "IRModule.h" #include "PybindUtils.h" @@ -30,7 +32,8 @@ static const char kDumpDocstring[] = /// Throws errors in case of failure, using "action" to describe what the caller /// was attempting to do. template -static void pyListToVector(py::list list, llvm::SmallVectorImpl &result, +static void pyListToVector(const py::list &list, + llvm::SmallVectorImpl &result, StringRef action) { result.reserve(py::len(list)); for (py::handle item : list) { @@ -203,7 +206,7 @@ class PyAffineAddExpr static constexpr const char *pyClassName = "AffineAddExpr"; using PyConcreteAffineExpr::PyConcreteAffineExpr; - static PyAffineAddExpr get(PyAffineExpr lhs, PyAffineExpr rhs) { + static PyAffineAddExpr get(PyAffineExpr lhs, const PyAffineExpr &rhs) { MlirAffineExpr expr = mlirAffineAddExprGet(lhs, rhs); return PyAffineAddExpr(lhs.getContext(), expr); } @@ -232,7 +235,7 @@ class PyAffineMulExpr static constexpr const char *pyClassName = "AffineMulExpr"; using PyConcreteAffineExpr::PyConcreteAffineExpr; - static PyAffineMulExpr get(PyAffineExpr lhs, PyAffineExpr rhs) { + static PyAffineMulExpr get(PyAffineExpr lhs, const PyAffineExpr &rhs) { MlirAffineExpr expr = mlirAffineMulExprGet(lhs, rhs); return PyAffineMulExpr(lhs.getContext(), expr); } @@ -261,7 +264,7 @@ class PyAffineModExpr static constexpr const char *pyClassName = "AffineModExpr"; using PyConcreteAffineExpr::PyConcreteAffineExpr; - static PyAffineModExpr get(PyAffineExpr lhs, PyAffineExpr rhs) { + static PyAffineModExpr get(PyAffineExpr lhs, const PyAffineExpr &rhs) { MlirAffineExpr expr = mlirAffineModExprGet(lhs, rhs); return PyAffineModExpr(lhs.getContext(), expr); } @@ -290,7 +293,7 @@ class PyAffineFloorDivExpr static constexpr const char *pyClassName = "AffineFloorDivExpr"; using PyConcreteAffineExpr::PyConcreteAffineExpr; - static PyAffineFloorDivExpr get(PyAffineExpr lhs, PyAffineExpr rhs) { + static PyAffineFloorDivExpr get(PyAffineExpr lhs, const PyAffineExpr &rhs) { MlirAffineExpr expr = mlirAffineFloorDivExprGet(lhs, rhs); return PyAffineFloorDivExpr(lhs.getContext(), expr); } @@ -319,7 +322,7 @@ class PyAffineCeilDivExpr static constexpr const char *pyClassName = "AffineCeilDivExpr"; using PyConcreteAffineExpr::PyConcreteAffineExpr; - static PyAffineCeilDivExpr get(PyAffineExpr lhs, PyAffineExpr rhs) { + static PyAffineCeilDivExpr get(PyAffineExpr lhs, const PyAffineExpr &rhs) { MlirAffineExpr expr = mlirAffineCeilDivExprGet(lhs, rhs); return PyAffineCeilDivExpr(lhs.getContext(), expr); } @@ -375,7 +378,7 @@ class PyAffineMapExprList public: static constexpr const char *pyClassName = "AffineExprList"; - PyAffineMapExprList(PyAffineMap map, intptr_t startIndex = 0, + PyAffineMapExprList(const PyAffineMap &map, intptr_t startIndex = 0, intptr_t length = -1, intptr_t step = 1) : Sliceable(startIndex, length == -1 ? mlirAffineMapGetNumResults(map) : length, @@ -423,7 +426,8 @@ namespace { class PyIntegerSetConstraint { public: - PyIntegerSetConstraint(PyIntegerSet set, intptr_t pos) : set(set), pos(pos) {} + PyIntegerSetConstraint(PyIntegerSet set, intptr_t pos) + : set(std::move(set)), pos(pos) {} PyAffineExpr getExpr() { return PyAffineExpr(set.getContext(), @@ -449,7 +453,7 @@ class PyIntegerSetConstraintList public: static constexpr const char *pyClassName = "IntegerSetConstraintList"; - PyIntegerSetConstraintList(PyIntegerSet set, intptr_t startIndex = 0, + PyIntegerSetConstraintList(const PyIntegerSet &set, intptr_t startIndex = 0, intptr_t length = -1, intptr_t step = 1) : Sliceable(startIndex, length == -1 ? mlirIntegerSetGetNumConstraints(set) : length, @@ -692,7 +696,8 @@ void mlir::python::populateIRAffine(py::module &m) { DefaultingPyMlirContext context) { SmallVector affineExprs; pyListToVector( - exprs, affineExprs, "attempting to create an AffineMap"); + std::move(exprs), affineExprs, + "attempting to create an AffineMap"); MlirAffineMap map = mlirAffineMapGet(context->get(), dimCount, symbolCount, affineExprs.size(), affineExprs.data()); diff --git a/mlir/lib/Bindings/Python/IRAttributes.cpp b/mlir/lib/Bindings/Python/IRAttributes.cpp index 56d16b337c07..fd44ffe6ba5f 100644 --- a/mlir/lib/Bindings/Python/IRAttributes.cpp +++ b/mlir/lib/Bindings/Python/IRAttributes.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "IRModule.h" #include "PybindUtils.h" @@ -116,7 +118,7 @@ class PyArrayAttribute : public PyConcreteAttribute { class PyArrayAttributeIterator { public: - PyArrayAttributeIterator(PyAttribute attr) : attr(attr) {} + PyArrayAttributeIterator(PyAttribute attr) : attr(std::move(attr)) {} PyArrayAttributeIterator &dunderIter() { return *this; } @@ -459,7 +461,7 @@ class PyDenseElementsAttribute arrayInfo.format); } - static PyDenseElementsAttribute getSplat(PyType shapedType, + static PyDenseElementsAttribute getSplat(const PyType &shapedType, PyAttribute &elementAttr) { auto contextWrapper = PyMlirContext::forContext(mlirTypeGetContext(shapedType)); diff --git a/mlir/lib/Bindings/Python/IRCore.cpp b/mlir/lib/Bindings/Python/IRCore.cpp index 864144226d45..ccdd159fd438 100644 --- a/mlir/lib/Bindings/Python/IRCore.cpp +++ b/mlir/lib/Bindings/Python/IRCore.cpp @@ -21,6 +21,8 @@ #include "llvm/ADT/SmallVector.h" #include +#include + namespace py = pybind11; using namespace mlir; using namespace mlir::python; @@ -176,7 +178,7 @@ static MlirStringRef toMlirStringRef(const std::string &s) { struct PyGlobalDebugFlag { static void set(py::object &o, bool enable) { mlirEnableGlobalDebug(enable); } - static bool get(py::object) { return mlirIsGlobalDebugEnabled(); } + static bool get(const py::object &) { return mlirIsGlobalDebugEnabled(); } static void bind(py::module &m) { // Debug flags. @@ -320,7 +322,7 @@ class PyBlockList { throw SetPyError(PyExc_IndexError, "attempt to access out of bounds block"); } - PyBlock appendBlock(py::args pyArgTypes) { + PyBlock appendBlock(const py::args &pyArgTypes) { operation->checkValid(); llvm::SmallVector argTypes; argTypes.reserve(pyArgTypes.size()); @@ -503,9 +505,9 @@ pybind11::object PyMlirContext::contextEnter() { return PyThreadContextEntry::pushContext(*this); } -void PyMlirContext::contextExit(pybind11::object excType, - pybind11::object excVal, - pybind11::object excTb) { +void PyMlirContext::contextExit(const pybind11::object &excType, + const pybind11::object &excVal, + const pybind11::object &excTb) { PyThreadContextEntry::popContext(*this); } @@ -689,8 +691,9 @@ py::object PyLocation::contextEnter() { return PyThreadContextEntry::pushLocation(*this); } -void PyLocation::contextExit(py::object excType, py::object excVal, - py::object excTb) { +void PyLocation::contextExit(const pybind11::object &excType, + const pybind11::object &excVal, + const pybind11::object &excTb) { PyThreadContextEntry::popLocation(*this); } @@ -945,11 +948,11 @@ py::object PyOperation::createFromCapsule(py::object capsule) { } py::object PyOperation::create( - std::string name, llvm::Optional> results, + const std::string &name, llvm::Optional> results, llvm::Optional> operands, llvm::Optional attributes, llvm::Optional> successors, int regions, - DefaultingPyLocation location, py::object maybeIp) { + DefaultingPyLocation location, const py::object &maybeIp) { llvm::SmallVector mlirOperands; llvm::SmallVector mlirResults; llvm::SmallVector mlirSuccessors; @@ -1105,7 +1108,7 @@ void PyOperation::erase() { //------------------------------------------------------------------------------ py::object -PyOpView::buildGeneric(py::object cls, py::list resultTypeList, +PyOpView::buildGeneric(const py::object &cls, py::list resultTypeList, py::list operandList, llvm::Optional attributes, llvm::Optional> successors, @@ -1359,16 +1362,17 @@ PyOpView::buildGeneric(py::object cls, py::list resultTypeList, /*operands=*/std::move(operands), /*attributes=*/std::move(attributes), /*successors=*/std::move(successors), - /*regions=*/*regions, location, maybeIp); + /*regions=*/*regions, location, + std::move(maybeIp)); } -PyOpView::PyOpView(py::object operationObject) +PyOpView::PyOpView(const py::object &operationObject) // Casting through the PyOperationBase base-class and then back to the // Operation lets us accept any PyOperationBase subclass. : operation(py::cast(operationObject).getOperation()), operationObject(operation.getRef().getObject()) {} -py::object PyOpView::createRawSubclass(py::object userClass) { +py::object PyOpView::createRawSubclass(const py::object &userClass) { // This is... a little gross. The typical pattern is to have a pure python // class that extends OpView like: // class AddFOp(_cext.ir.OpView): @@ -1465,9 +1469,9 @@ py::object PyInsertionPoint::contextEnter() { return PyThreadContextEntry::pushInsertionPoint(*this); } -void PyInsertionPoint::contextExit(pybind11::object excType, - pybind11::object excVal, - pybind11::object excTb) { +void PyInsertionPoint::contextExit(const pybind11::object &excType, + const pybind11::object &excVal, + const pybind11::object &excTb) { PyThreadContextEntry::popInsertionPoint(*this); } @@ -1954,7 +1958,8 @@ class PyOpResultList : public Sliceable { /// attributes, or by index, producing named attributes. class PyOpAttributeMap { public: - PyOpAttributeMap(PyOperationRef operation) : operation(operation) {} + PyOpAttributeMap(PyOperationRef operation) + : operation(std::move(operation)) {} PyAttribute dunderGetItemNamed(const std::string &name) { MlirAttribute attr = mlirOperationGetAttributeByName(operation->get(), @@ -1979,7 +1984,7 @@ class PyOpAttributeMap { mlirIdentifierStr(namedAttr.name).length)); } - void dunderSetItem(const std::string &name, PyAttribute attr) { + void dunderSetItem(const std::string &name, const PyAttribute &attr) { mlirOperationSetAttributeByName(operation->get(), toMlirStringRef(name), attr); } diff --git a/mlir/lib/Bindings/Python/IRInterfaces.cpp b/mlir/lib/Bindings/Python/IRInterfaces.cpp index 564f36b9dac3..1fc66fef468c 100644 --- a/mlir/lib/Bindings/Python/IRInterfaces.cpp +++ b/mlir/lib/Bindings/Python/IRInterfaces.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "IRModule.h" #include "mlir-c/BuiltinAttributes.h" #include "mlir-c/Interfaces.h" @@ -58,7 +60,7 @@ class PyConcreteOpInterface { /// operation or a subclass of OpView. In the latter case, only the static /// methods of the interface are accessible to the caller. PyConcreteOpInterface(py::object object, DefaultingPyMlirContext context) - : obj(object) { + : obj(std::move(object)) { try { operation = &py::cast(obj); } catch (py::cast_error &err) { diff --git a/mlir/lib/Bindings/Python/IRModule.h b/mlir/lib/Bindings/Python/IRModule.h index d5e8eb4aece5..df4aaebf3036 100644 --- a/mlir/lib/Bindings/Python/IRModule.h +++ b/mlir/lib/Bindings/Python/IRModule.h @@ -203,8 +203,9 @@ class PyMlirContext { /// Enter and exit the context manager. pybind11::object contextEnter(); - void contextExit(pybind11::object excType, pybind11::object excVal, - pybind11::object excTb); + void contextExit(const pybind11::object &excType, + const pybind11::object &excVal, + const pybind11::object &excTb); private: PyMlirContext(MlirContext context); @@ -316,8 +317,9 @@ class PyLocation : public BaseContextObject { /// Enter and exit the context manager. pybind11::object contextEnter(); - void contextExit(pybind11::object excType, pybind11::object excVal, - pybind11::object excTb); + void contextExit(const pybind11::object &excType, + const pybind11::object &excVal, + const pybind11::object &excTb); /// Gets a capsule wrapping the void* within the MlirLocation. pybind11::object getCapsule(); @@ -482,11 +484,11 @@ class PyOperation : public PyOperationBase, public BaseContextObject { /// Creates an operation. See corresponding python docstring. static pybind11::object - create(std::string name, llvm::Optional> results, + create(const std::string &name, llvm::Optional> results, llvm::Optional> operands, llvm::Optional attributes, llvm::Optional> successors, int regions, - DefaultingPyLocation location, pybind11::object ip); + DefaultingPyLocation location, const pybind11::object &ip); /// Creates an OpView suitable for this operation. pybind11::object createOpView(); @@ -524,15 +526,15 @@ class PyOperation : public PyOperationBase, public BaseContextObject { /// python types. class PyOpView : public PyOperationBase { public: - PyOpView(pybind11::object operationObject); + PyOpView(const pybind11::object &operationObject); PyOperation &getOperation() override { return operation; } - static pybind11::object createRawSubclass(pybind11::object userClass); + static pybind11::object createRawSubclass(const pybind11::object &userClass); pybind11::object getOperationObject() { return operationObject; } static pybind11::object - buildGeneric(pybind11::object cls, pybind11::list resultTypeList, + buildGeneric(const pybind11::object &cls, pybind11::list resultTypeList, pybind11::list operandList, llvm::Optional attributes, llvm::Optional> successors, @@ -607,8 +609,9 @@ class PyInsertionPoint { /// Enter and exit the context manager. pybind11::object contextEnter(); - void contextExit(pybind11::object excType, pybind11::object excVal, - pybind11::object excTb); + void contextExit(const pybind11::object &excType, + const pybind11::object &excVal, + const pybind11::object &excTb); PyBlock &getBlock() { return block; } diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index 04262234ceaa..280f22b8525b 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -34,7 +34,7 @@ static SmallVector getNParallelLoopsAttrs(unsigned nParallelLoops) { template static arith::ConstantOp -createConstFromIntAttribute(Operation *op, std::string attrName, +createConstFromIntAttribute(Operation *op, const std::string &attrName, Type requiredAttrType, OpBuilder &rewriter) { auto castedN = static_cast( op->getAttr(attrName).cast().getValue().getSExtValue()); @@ -61,7 +61,38 @@ static mlir::SelectOp clampHelper(Location loc, Value arg, return rewriter.create(loc, largerThanMax, max, minOrArg); } -static SmallVector filterDynamicDims(SmallVector dynDims) { +static mlir::Value applyPad(Location loc, Value input, ArrayRef pad, + Attribute padAttr, OpBuilder &rewriter) { + // Input should be padded if necessary. + if (llvm::all_of(pad, [](int64_t p) { return p == 0; })) + return input; + + ShapedType inputTy = input.getType().cast(); + Type inputETy = inputTy.getElementType(); + auto inputShape = inputTy.getShape(); + + assert((inputShape.size() * 2) == pad.size()); + + SmallVector paddedShape; + SmallVector lowIndices; + SmallVector highIndices; + for (int i = 0, s = inputShape.size(); i < s; i++) { + auto lowPad = pad[i * 2]; + auto highPad = pad[i * 2 + 1]; + paddedShape.push_back(inputShape[i] + highPad + lowPad); + lowIndices.push_back(rewriter.getIndexAttr(lowPad)); + highIndices.push_back(rewriter.getIndexAttr(highPad)); + } + + Value padValue = rewriter.create(loc, padAttr); + + return linalg::PadTensorOp::createPadScalarOp( + RankedTensorType::get(paddedShape, inputETy), input, padValue, + lowIndices, highIndices, /*nofold=*/false, loc, rewriter) + .result(); +} + +static SmallVector filterDynamicDims(const SmallVector &dynDims) { SmallVector filteredDims; for (auto dim : dynDims) if (dim) diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp index 641cd6f9324d..98f617082f21 100644 --- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp +++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/CommonFolders.h" #include "mlir/IR/Builders.h" @@ -192,8 +194,8 @@ OpFoldResult arith::AddIOp::fold(ArrayRef operands) { if (matchPattern(getRhs(), m_Zero())) return getLhs(); - return constFoldBinaryOp(operands, - [](APInt a, APInt b) { return a + b; }); + return constFoldBinaryOp( + operands, [](APInt a, const APInt &b) { return std::move(a) + b; }); } void arith::AddIOp::getCanonicalizationPatterns( @@ -214,8 +216,8 @@ OpFoldResult arith::SubIOp::fold(ArrayRef operands) { if (matchPattern(getRhs(), m_Zero())) return getLhs(); - return constFoldBinaryOp(operands, - [](APInt a, APInt b) { return a - b; }); + return constFoldBinaryOp( + operands, [](APInt a, const APInt &b) { return std::move(a) - b; }); } void arith::SubIOp::getCanonicalizationPatterns( @@ -239,8 +241,8 @@ OpFoldResult arith::MulIOp::fold(ArrayRef operands) { // TODO: Handle the overflow case. // default folder - return constFoldBinaryOp(operands, - [](APInt a, APInt b) { return a * b; }); + return constFoldBinaryOp( + operands, [](const APInt &a, const APInt &b) { return a * b; }); } //===----------------------------------------------------------------------===// @@ -250,13 +252,14 @@ OpFoldResult arith::MulIOp::fold(ArrayRef operands) { OpFoldResult arith::DivUIOp::fold(ArrayRef operands) { // Don't fold if it would require a division by zero. bool div0 = false; - auto result = constFoldBinaryOp(operands, [&](APInt a, APInt b) { - if (div0 || !b) { - div0 = true; - return a; - } - return a.udiv(b); - }); + auto result = + constFoldBinaryOp(operands, [&](APInt a, const APInt &b) { + if (div0 || !b) { + div0 = true; + return a; + } + return a.udiv(b); + }); // Fold out division by one. Assumes all tensors of all ones are splats. if (auto rhs = operands[1].dyn_cast_or_null()) { @@ -277,13 +280,14 @@ OpFoldResult arith::DivUIOp::fold(ArrayRef operands) { OpFoldResult arith::DivSIOp::fold(ArrayRef operands) { // Don't fold if it would overflow or if it requires a division by zero. bool overflowOrDiv0 = false; - auto result = constFoldBinaryOp(operands, [&](APInt a, APInt b) { - if (overflowOrDiv0 || !b) { - overflowOrDiv0 = true; - return a; - } - return a.sdiv_ov(b, overflowOrDiv0); - }); + auto result = + constFoldBinaryOp(operands, [&](APInt a, const APInt &b) { + if (overflowOrDiv0 || !b) { + overflowOrDiv0 = true; + return a; + } + return a.sdiv_ov(b, overflowOrDiv0); + }); // Fold out division by one. Assumes all tensors of all ones are splats. if (auto rhs = operands[1].dyn_cast_or_null()) { @@ -301,7 +305,8 @@ OpFoldResult arith::DivSIOp::fold(ArrayRef operands) { // Ceil and floor division folding helpers //===----------------------------------------------------------------------===// -static APInt signedCeilNonnegInputs(APInt a, APInt b, bool &overflow) { +static APInt signedCeilNonnegInputs(const APInt &a, const APInt &b, + bool &overflow) { // Returns (a-1)/b + 1 APInt one(a.getBitWidth(), 1, true); // Signed value 1. APInt val = a.ssub_ov(one, overflow).sdiv_ov(b, overflow); @@ -314,17 +319,18 @@ static APInt signedCeilNonnegInputs(APInt a, APInt b, bool &overflow) { OpFoldResult arith::CeilDivUIOp::fold(ArrayRef operands) { bool overflowOrDiv0 = false; - auto result = constFoldBinaryOp(operands, [&](APInt a, APInt b) { - if (overflowOrDiv0 || !b) { - overflowOrDiv0 = true; - return a; - } - APInt quotient = a.udiv(b); - if (!a.urem(b)) - return quotient; - APInt one(a.getBitWidth(), 1, true); - return quotient.uadd_ov(one, overflowOrDiv0); - }); + auto result = + constFoldBinaryOp(operands, [&](APInt a, const APInt &b) { + if (overflowOrDiv0 || !b) { + overflowOrDiv0 = true; + return a; + } + APInt quotient = a.udiv(b); + if (!a.urem(b)) + return quotient; + APInt one(a.getBitWidth(), 1, true); + return quotient.uadd_ov(one, overflowOrDiv0); + }); // Fold out ceil division by one. Assumes all tensors of all ones are // splats. if (auto rhs = operands[1].dyn_cast_or_null()) { @@ -345,34 +351,35 @@ OpFoldResult arith::CeilDivUIOp::fold(ArrayRef operands) { OpFoldResult arith::CeilDivSIOp::fold(ArrayRef operands) { // Don't fold if it would overflow or if it requires a division by zero. bool overflowOrDiv0 = false; - auto result = constFoldBinaryOp(operands, [&](APInt a, APInt b) { - if (overflowOrDiv0 || !b) { - overflowOrDiv0 = true; - return a; - } - unsigned bits = a.getBitWidth(); - APInt zero = APInt::getZero(bits); - if (a.sgt(zero) && b.sgt(zero)) { - // Both positive, return ceil(a, b). - return signedCeilNonnegInputs(a, b, overflowOrDiv0); - } - if (a.slt(zero) && b.slt(zero)) { - // Both negative, return ceil(-a, -b). - APInt posA = zero.ssub_ov(a, overflowOrDiv0); - APInt posB = zero.ssub_ov(b, overflowOrDiv0); - return signedCeilNonnegInputs(posA, posB, overflowOrDiv0); - } - if (a.slt(zero) && b.sgt(zero)) { - // A is negative, b is positive, return - ( -a / b). - APInt posA = zero.ssub_ov(a, overflowOrDiv0); - APInt div = posA.sdiv_ov(b, overflowOrDiv0); - return zero.ssub_ov(div, overflowOrDiv0); - } - // A is positive (or zero), b is negative, return - (a / -b). - APInt posB = zero.ssub_ov(b, overflowOrDiv0); - APInt div = a.sdiv_ov(posB, overflowOrDiv0); - return zero.ssub_ov(div, overflowOrDiv0); - }); + auto result = + constFoldBinaryOp(operands, [&](APInt a, const APInt &b) { + if (overflowOrDiv0 || !b) { + overflowOrDiv0 = true; + return a; + } + unsigned bits = a.getBitWidth(); + APInt zero = APInt::getZero(bits); + if (a.sgt(zero) && b.sgt(zero)) { + // Both positive, return ceil(a, b). + return signedCeilNonnegInputs(a, b, overflowOrDiv0); + } + if (a.slt(zero) && b.slt(zero)) { + // Both negative, return ceil(-a, -b). + APInt posA = zero.ssub_ov(a, overflowOrDiv0); + APInt posB = zero.ssub_ov(b, overflowOrDiv0); + return signedCeilNonnegInputs(posA, posB, overflowOrDiv0); + } + if (a.slt(zero) && b.sgt(zero)) { + // A is negative, b is positive, return - ( -a / b). + APInt posA = zero.ssub_ov(a, overflowOrDiv0); + APInt div = posA.sdiv_ov(b, overflowOrDiv0); + return zero.ssub_ov(div, overflowOrDiv0); + } + // A is positive (or zero), b is negative, return - (a / -b). + APInt posB = zero.ssub_ov(b, overflowOrDiv0); + APInt div = a.sdiv_ov(posB, overflowOrDiv0); + return zero.ssub_ov(div, overflowOrDiv0); + }); // Fold out ceil division by one. Assumes all tensors of all ones are // splats. @@ -394,34 +401,35 @@ OpFoldResult arith::CeilDivSIOp::fold(ArrayRef operands) { OpFoldResult arith::FloorDivSIOp::fold(ArrayRef operands) { // Don't fold if it would overflow or if it requires a division by zero. bool overflowOrDiv0 = false; - auto result = constFoldBinaryOp(operands, [&](APInt a, APInt b) { - if (overflowOrDiv0 || !b) { - overflowOrDiv0 = true; - return a; - } - unsigned bits = a.getBitWidth(); - APInt zero = APInt::getZero(bits); - if (a.sge(zero) && b.sgt(zero)) { - // Both positive (or a is zero), return a / b. - return a.sdiv_ov(b, overflowOrDiv0); - } - if (a.sle(zero) && b.slt(zero)) { - // Both negative (or a is zero), return -a / -b. - APInt posA = zero.ssub_ov(a, overflowOrDiv0); - APInt posB = zero.ssub_ov(b, overflowOrDiv0); - return posA.sdiv_ov(posB, overflowOrDiv0); - } - if (a.slt(zero) && b.sgt(zero)) { - // A is negative, b is positive, return - ceil(-a, b). - APInt posA = zero.ssub_ov(a, overflowOrDiv0); - APInt ceil = signedCeilNonnegInputs(posA, b, overflowOrDiv0); - return zero.ssub_ov(ceil, overflowOrDiv0); - } - // A is positive, b is negative, return - ceil(a, -b). - APInt posB = zero.ssub_ov(b, overflowOrDiv0); - APInt ceil = signedCeilNonnegInputs(a, posB, overflowOrDiv0); - return zero.ssub_ov(ceil, overflowOrDiv0); - }); + auto result = + constFoldBinaryOp(operands, [&](APInt a, const APInt &b) { + if (overflowOrDiv0 || !b) { + overflowOrDiv0 = true; + return a; + } + unsigned bits = a.getBitWidth(); + APInt zero = APInt::getZero(bits); + if (a.sge(zero) && b.sgt(zero)) { + // Both positive (or a is zero), return a / b. + return a.sdiv_ov(b, overflowOrDiv0); + } + if (a.sle(zero) && b.slt(zero)) { + // Both negative (or a is zero), return -a / -b. + APInt posA = zero.ssub_ov(a, overflowOrDiv0); + APInt posB = zero.ssub_ov(b, overflowOrDiv0); + return posA.sdiv_ov(posB, overflowOrDiv0); + } + if (a.slt(zero) && b.sgt(zero)) { + // A is negative, b is positive, return - ceil(-a, b). + APInt posA = zero.ssub_ov(a, overflowOrDiv0); + APInt ceil = signedCeilNonnegInputs(posA, b, overflowOrDiv0); + return zero.ssub_ov(ceil, overflowOrDiv0); + } + // A is positive, b is negative, return - ceil(a, -b). + APInt posB = zero.ssub_ov(b, overflowOrDiv0); + APInt ceil = signedCeilNonnegInputs(a, posB, overflowOrDiv0); + return zero.ssub_ov(ceil, overflowOrDiv0); + }); // Fold out floor division by one. Assumes all tensors of all ones are // splats. @@ -497,8 +505,8 @@ OpFoldResult arith::AndIOp::fold(ArrayRef operands) { if (matchPattern(getRhs(), m_ConstantInt(&intValue)) && intValue.isAllOnes()) return getLhs(); - return constFoldBinaryOp(operands, - [](APInt a, APInt b) { return a & b; }); + return constFoldBinaryOp( + operands, [](APInt a, const APInt &b) { return std::move(a) & b; }); } //===----------------------------------------------------------------------===// @@ -514,8 +522,8 @@ OpFoldResult arith::OrIOp::fold(ArrayRef operands) { if (rhsAttr.getValue().isAllOnes()) return rhsAttr; - return constFoldBinaryOp(operands, - [](APInt a, APInt b) { return a | b; }); + return constFoldBinaryOp( + operands, [](APInt a, const APInt &b) { return std::move(a) | b; }); } //===----------------------------------------------------------------------===// @@ -530,8 +538,8 @@ OpFoldResult arith::XOrIOp::fold(ArrayRef operands) { if (getLhs() == getRhs()) return Builder(getContext()).getZeroAttr(getType()); - return constFoldBinaryOp(operands, - [](APInt a, APInt b) { return a ^ b; }); + return constFoldBinaryOp( + operands, [](APInt a, const APInt &b) { return std::move(a) ^ b; }); } void arith::XOrIOp::getCanonicalizationPatterns( @@ -545,7 +553,7 @@ void arith::XOrIOp::getCanonicalizationPatterns( OpFoldResult arith::AddFOp::fold(ArrayRef operands) { return constFoldBinaryOp( - operands, [](APFloat a, APFloat b) { return a + b; }); + operands, [](const APFloat &a, const APFloat &b) { return a + b; }); } //===----------------------------------------------------------------------===// @@ -554,7 +562,7 @@ OpFoldResult arith::AddFOp::fold(ArrayRef operands) { OpFoldResult arith::SubFOp::fold(ArrayRef operands) { return constFoldBinaryOp( - operands, [](APFloat a, APFloat b) { return a - b; }); + operands, [](const APFloat &a, const APFloat &b) { return a - b; }); } //===----------------------------------------------------------------------===// @@ -579,8 +587,10 @@ OpFoldResult MaxSIOp::fold(ArrayRef operands) { intValue.isMinSignedValue()) return getLhs(); - return constFoldBinaryOp( - operands, [](APInt a, APInt b) { return llvm::APIntOps::smax(a, b); }); + return constFoldBinaryOp(operands, + [](const APInt &a, const APInt &b) { + return llvm::APIntOps::smax(a, b); + }); } //===----------------------------------------------------------------------===// @@ -603,8 +613,10 @@ OpFoldResult MaxUIOp::fold(ArrayRef operands) { if (matchPattern(getRhs(), m_ConstantInt(&intValue)) && intValue.isMinValue()) return getLhs(); - return constFoldBinaryOp( - operands, [](APInt a, APInt b) { return llvm::APIntOps::umax(a, b); }); + return constFoldBinaryOp(operands, + [](const APInt &a, const APInt &b) { + return llvm::APIntOps::umax(a, b); + }); } //===----------------------------------------------------------------------===// @@ -629,8 +641,10 @@ OpFoldResult MinSIOp::fold(ArrayRef operands) { intValue.isMaxSignedValue()) return getLhs(); - return constFoldBinaryOp( - operands, [](APInt a, APInt b) { return llvm::APIntOps::smin(a, b); }); + return constFoldBinaryOp(operands, + [](const APInt &a, const APInt &b) { + return llvm::APIntOps::smin(a, b); + }); } //===----------------------------------------------------------------------===// @@ -653,8 +667,10 @@ OpFoldResult MinUIOp::fold(ArrayRef operands) { if (matchPattern(getRhs(), m_ConstantInt(&intValue)) && intValue.isMaxValue()) return getLhs(); - return constFoldBinaryOp( - operands, [](APInt a, APInt b) { return llvm::APIntOps::umin(a, b); }); + return constFoldBinaryOp(operands, + [](const APInt &a, const APInt &b) { + return llvm::APIntOps::umin(a, b); + }); } //===----------------------------------------------------------------------===// @@ -663,7 +679,7 @@ OpFoldResult MinUIOp::fold(ArrayRef operands) { OpFoldResult arith::MulFOp::fold(ArrayRef operands) { return constFoldBinaryOp( - operands, [](APFloat a, APFloat b) { return a * b; }); + operands, [](const APFloat &a, const APFloat &b) { return a * b; }); } //===----------------------------------------------------------------------===// @@ -672,7 +688,7 @@ OpFoldResult arith::MulFOp::fold(ArrayRef operands) { OpFoldResult arith::DivFOp::fold(ArrayRef operands) { return constFoldBinaryOp( - operands, [](APFloat a, APFloat b) { return a / b; }); + operands, [](const APFloat &a, const APFloat &b) { return a / b; }); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp b/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp index f0b4244c9f4f..db09c11574ac 100644 --- a/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp +++ b/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp @@ -10,6 +10,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "PassDetail.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/Async/IR/Async.h" @@ -111,7 +113,7 @@ struct AsyncParallelForRewrite : public OpRewritePattern { AsyncMinTaskSizeComputationFunction computeMinTaskSize) : OpRewritePattern(ctx), asyncDispatch(asyncDispatch), numWorkerThreads(numWorkerThreads), - computeMinTaskSize(computeMinTaskSize) {} + computeMinTaskSize(std::move(computeMinTaskSize)) {} LogicalResult matchAndRewrite(scf::ParallelOp op, PatternRewriter &rewriter) const override; @@ -244,7 +246,7 @@ getParallelComputeFunctionType(scf::ParallelOp op, PatternRewriter &rewriter) { // Create a parallel compute fuction from the parallel operation. static ParallelComputeFunction createParallelComputeFunction( - scf::ParallelOp op, ParallelComputeFunctionBounds bounds, + scf::ParallelOp op, const ParallelComputeFunctionBounds &bounds, unsigned numBlockAlignedInnerLoops, PatternRewriter &rewriter) { OpBuilder::InsertionGuard guard(rewriter); ImplicitLocOpBuilder b(op.getLoc(), rewriter); @@ -902,7 +904,7 @@ std::unique_ptr mlir::createAsyncParallelForPass(bool asyncDispatch, void mlir::async::populateAsyncParallelForPatterns( RewritePatternSet &patterns, bool asyncDispatch, int32_t numWorkerThreads, - AsyncMinTaskSizeComputationFunction computeMinTaskSize) { + const AsyncMinTaskSizeComputationFunction &computeMinTaskSize) { MLIRContext *ctx = patterns.getContext(); patterns.add(ctx, asyncDispatch, numWorkerThreads, computeMinTaskSize); diff --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp index 49a39b621f37..33c8793cbc25 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp @@ -64,7 +64,7 @@ using namespace mlir; /// Walks over all immediate return-like terminators in the given region. static LogicalResult walkReturnOperations(Region *region, - std::function func) { + llvm::function_ref func) { for (Block &block : *region) { Operation *terminator = block.getTerminator(); // Skip non region-return-like terminators. diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp index 1a03867a18cb..ac5272348438 100644 --- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp @@ -78,7 +78,8 @@ static bool isSinkingBeneficiary(Operation *op) { /// is updated with results that will be available after sinking the identified /// ops. static bool -extractBeneficiaryOps(Operation *op, SetVector existingDependencies, +extractBeneficiaryOps(Operation *op, + const SetVector &existingDependencies, SetVector &beneficiaryOps, llvm::SmallPtrSetImpl &availableValues) { if (beneficiaryOps.count(op)) diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 5b64a29974ce..66157371f30c 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -313,7 +313,7 @@ static void printSwitchOpCases(OpAsmPrinter &p, SwitchOp op, Type flagType, ElementsAttr caseValues, SuccessorRange caseDestinations, OperandRangeRange caseOperands, - TypeRangeRange caseOperandTypes) { + const TypeRangeRange &caseOperandTypes) { if (!caseValues) return; @@ -361,7 +361,8 @@ SwitchOp::getMutableSuccessorOperands(unsigned index) { LogicalResult verifySymbolAttribute( Operation *op, StringRef attributeName, - std::function verifySymbolType) { + llvm::function_ref + verifySymbolType) { if (Attribute attribute = op->getAttr(attributeName)) { // The attribute is already verified to be a symbol ref array attribute via // a constraint in the operation definition. diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp index 31ebbac73a27..f7d22251eadb 100644 --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp @@ -285,7 +285,7 @@ bool mlir::linalg::comprehensive_bufferize::BufferizationState::isValueRead( llvm::SetVector mlir::linalg::comprehensive_bufferize::BufferizationState:: findValueInReverseUseDefChain(Value value, - std::function condition) { + llvm::function_ref condition) { llvm::SetVector result, workingSet; workingSet.insert(value); diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp index 8a2630c29b7f..5db5deb6aee6 100644 --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp @@ -296,7 +296,7 @@ struct ForOpInterface // Given a range of values, apply `func` to those marked in `indices`. // Otherwise, store the unmodified value in the result vector. auto convert = [&](ValueRange values, - std::function func) { + llvm::function_ref func) { SmallVector result; for (const auto &it : llvm::enumerate(values)) { size_t idx = it.index(); diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index e7ddee95f387..fe3363d7d0de 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -50,7 +50,7 @@ template static void fillStructuredOpRegion( OpBuilder &opBuilder, Region ®ion, TypeRange inputTypes, TypeRange outputTypes, - std::function errorHandler = nullptr); + llvm::function_ref errorHandler = nullptr); /// Generic entry point to create both the region and the block of a LinalgOp. template @@ -323,7 +323,7 @@ class RegionBuilderHelper { builder.create(first.getLoc(), values); } - Value constant(std::string value) { + Value constant(const std::string &value) { OpBuilder builder = getBuilder(); Location loc = builder.getUnknownLoc(); Attribute valueAttr = parseAttribute(value, builder.getContext()); @@ -2406,10 +2406,10 @@ std::string mlir::linalg::generateLibraryCallName(Operation *op) { /// to the elemental types of `inputTypes` and `outputTypes`, which are asserted /// to be ShapedType. template -static void -fillStructuredOpRegion(OpBuilder &opBuilder, Region ®ion, - TypeRange inputTypes, TypeRange outputTypes, - std::function errorHandler) { +static void fillStructuredOpRegion( + OpBuilder &opBuilder, Region ®ion, TypeRange inputTypes, + TypeRange outputTypes, + llvm::function_ref errorHandler) { assert(llvm::all_of(outputTypes, [](Type t) { return t.isa(); })); // TODO: atm all operands go through getElementTypeOrSelf, diff --git a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp index a20da7ffa207..8812dd4dff76 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp @@ -16,6 +16,7 @@ #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include #include +#include using namespace mlir; using namespace mlir::linalg; @@ -97,7 +98,7 @@ struct FunctionNonEntryBlockConversion : public ConversionPattern { : ConversionPattern(converter, MatchTraitOpTypeTag(), TypeID::get(), /*benefit=*/1, ctx), - blockArgsToDetensor(blockArgsToDetensor) {} + blockArgsToDetensor(std::move(blockArgsToDetensor)) {} LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, diff --git a/mlir/lib/Dialect/Linalg/Transforms/Distribution.cpp b/mlir/lib/Dialect/Linalg/Transforms/Distribution.cpp index e951d6882022..692df291b2f6 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Distribution.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Distribution.cpp @@ -11,6 +11,8 @@ // //===----------------------------------------------------------------------===// // +#include + #include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/IR/MLIRContext.h" @@ -31,8 +33,8 @@ struct DistributeTiledLoopPattern DistributeTiledLoopPattern(MLIRContext *context, LinalgLoopDistributionOptions options, LinalgTransformationFilter marker) - : OpRewritePattern(context), options(options), - marker(marker) {} + : OpRewritePattern(context), + options(std::move(options)), marker(std::move(marker)) {} LogicalResult matchAndRewrite(linalg::TiledLoopOp op, PatternRewriter &rewriter) const override { if (failed(marker.checkAndNotify(rewriter, op))) diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp index 34ee7a9714f8..795a23d7b1d8 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp @@ -9,6 +9,8 @@ // This file implements the linalg dialect Fusion on tensors operations pass. // //===----------------------------------------------------------------------===// +#include + #include "PassDetail.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" @@ -1078,7 +1080,7 @@ class FoldWithProducerReshapeOpByExpansion MLIRContext *context, ControlElementwiseOpsFusionFn foldReshapes, PatternBenefit benefit = 1) : OpRewritePattern(context, benefit), - controlFoldingReshapes(foldReshapes) {} + controlFoldingReshapes(std::move(foldReshapes)) {} LogicalResult matchAndRewrite(GenericOp genericOp, PatternRewriter &rewriter) const override { @@ -1181,7 +1183,7 @@ struct FoldReshapeWithGenericOpByExpansion MLIRContext *context, ControlElementwiseOpsFusionFn foldReshapes, PatternBenefit benefit = 1) : OpRewritePattern(context, benefit), - controlFoldingReshapes(foldReshapes) {} + controlFoldingReshapes(std::move(foldReshapes)) {} LogicalResult matchAndRewrite(tensor::ExpandShapeOp reshapeOp, PatternRewriter &rewriter) const override { @@ -1755,7 +1757,7 @@ void mlir::linalg::populateFoldUnitDimsReshapeOpsByLinearizationPatterns( void mlir::linalg::populateFoldReshapeOpsByExpansionPatterns( RewritePatternSet &patterns, - ControlElementwiseOpsFusionFn controlFoldingReshapes) { + const ControlElementwiseOpsFusionFn &controlFoldingReshapes) { patterns.add(patterns.getContext(), controlFoldingReshapes); patterns.add(patterns.getContext(), diff --git a/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp index 85add4f8dc9d..a42ac8d81c4b 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp @@ -78,7 +78,7 @@ void LinalgGeneralizationPass::runOnFunction() { } void mlir::linalg::populateLinalgNamedOpsGeneralizationPatterns( - RewritePatternSet &patterns, LinalgTransformationFilter marker) { + RewritePatternSet &patterns, const LinalgTransformationFilter &marker) { patterns.add(patterns.getContext(), marker); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp index d9e4c578b6d6..136f38feedf3 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp @@ -11,6 +11,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "PassDetail.h" #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" @@ -46,7 +48,7 @@ struct LinalgStrategyTileAndFusePass LinalgStrategyTileAndFusePass(StringRef opName, LinalgTilingAndFusionOptions opt, LinalgTransformationFilter filt) - : options(opt), filter(filt) { + : options(std::move(opt)), filter(std::move(filt)) { this->anchorOpName.setValue(opName.str()); } @@ -82,7 +84,7 @@ struct LinalgStrategyTilePass LinalgStrategyTilePass(StringRef opName, LinalgTilingOptions opt, LinalgTransformationFilter filt) - : options(opt), filter(filt) { + : options(std::move(opt)), filter(std::move(filt)) { this->anchorOpName.setValue(opName.str()); } @@ -114,7 +116,7 @@ struct LinalgStrategyPadPass LinalgStrategyPadPass(StringRef opName, LinalgPaddingOptions opt, LinalgTransformationFilter filt) - : options(opt), filter(filt) { + : options(std::move(opt)), filter(std::move(filt)) { this->anchorOpName.setValue(opName.str()); } @@ -146,7 +148,7 @@ struct LinalgStrategyGeneralizePass LinalgStrategyGeneralizePass(StringRef opName, LinalgTransformationFilter filter) - : filter(filter) { + : filter(std::move(filter)) { this->anchorOpName.setValue(opName.str()); } @@ -179,7 +181,7 @@ struct LinalgStrategyDecomposePass LinalgStrategyDecomposePass() = default; LinalgStrategyDecomposePass(LinalgTransformationFilter filter) - : filter(filter) {} + : filter(std::move(filter)) {} void runOnFunction() override { auto funcOp = getFunction(); @@ -205,7 +207,7 @@ struct LinalgStrategyInterchangePass LinalgTransformationFilter filter) : iteratorInterchange(iteratorInterchange.begin(), iteratorInterchange.end()), - filter(filter) {} + filter(std::move(filter)) {} void runOnFunction() override { auto funcOp = getFunction(); @@ -234,7 +236,7 @@ struct LinalgStrategyPromotePass LinalgStrategyPromotePass(StringRef opName, LinalgPromotionOptions opt, LinalgTransformationFilter filt) - : options(opt), filter(filt) { + : options(std::move(opt)), filter(std::move(filt)) { this->anchorOpName.setValue(opName.str()); } @@ -267,7 +269,7 @@ struct LinalgStrategyVectorizePass LinalgStrategyVectorizePass(StringRef opName, LinalgVectorizationOptions opt, LinalgTransformationFilter filt, bool padVectorize = false) - : options(opt), filter(filt) { + : options(opt), filter(std::move(filt)) { this->anchorOpName.setValue(opName.str()); this->vectorizePadding.setValue(padVectorize); } @@ -320,7 +322,7 @@ struct LinalgStrategyEnablePass LinalgStrategyEnablePass(LinalgEnablingOptions opt, LinalgTransformationFilter filt) - : options(opt), filter(filt) {} + : options(opt), filter(std::move(filt)) {} void runOnFunction() override { auto funcOp = getFunction(); @@ -370,7 +372,7 @@ struct LinalgStrategyLowerVectorsPass LinalgStrategyLowerVectorsPass(LinalgVectorLoweringOptions opt, LinalgTransformationFilter filt) - : options(opt), filter(filt) {} + : options(opt), filter(std::move(filt)) {} void runOnFunction() override { auto funcOp = getFunction(); @@ -446,53 +448,54 @@ struct LinalgStrategyRemoveMarkersPass /// Create a LinalgStrategyTileAndFusePass. std::unique_ptr> -mlir::createLinalgStrategyTileAndFusePass(StringRef opName, - LinalgTilingAndFusionOptions options, - LinalgTransformationFilter filter) { +mlir::createLinalgStrategyTileAndFusePass( + StringRef opName, const LinalgTilingAndFusionOptions &options, + const LinalgTransformationFilter &filter) { return std::make_unique(opName, options, filter); } /// Create a LinalgStrategyTilePass. std::unique_ptr> -mlir::createLinalgStrategyTilePass(StringRef opName, LinalgTilingOptions opt, - LinalgTransformationFilter filter) { +mlir::createLinalgStrategyTilePass(StringRef opName, + const LinalgTilingOptions &opt, + const LinalgTransformationFilter &filter) { return std::make_unique(opName, opt, filter); } /// Create a LinalgStrategyPadPass. std::unique_ptr> -mlir::createLinalgStrategyPadPass(StringRef opName, LinalgPaddingOptions opt, - LinalgTransformationFilter filter) { +mlir::createLinalgStrategyPadPass(StringRef opName, + const LinalgPaddingOptions &opt, + const LinalgTransformationFilter &filter) { return std::make_unique(opName, opt, filter); } /// Create a LinalgStrategyPromotePass. -std::unique_ptr> -mlir::createLinalgStrategyPromotePass(StringRef opName, - LinalgPromotionOptions opt, - LinalgTransformationFilter filter) { +std::unique_ptr> mlir::createLinalgStrategyPromotePass( + StringRef opName, const LinalgPromotionOptions &opt, + const LinalgTransformationFilter &filter) { return std::make_unique(opName, opt, filter); } /// Create a LinalgStrategyGeneralizePass. -std::unique_ptr> -mlir::createLinalgStrategyGeneralizePass(StringRef opName, - LinalgTransformationFilter filter) { +std::unique_ptr> mlir::createLinalgStrategyGeneralizePass( + StringRef opName, const LinalgTransformationFilter &filter) { return std::make_unique(opName, filter); } /// Create a LinalgStrategyDecomposePass. // TODO: if/when we need finer control add an `opName` parameter. -std::unique_ptr> -mlir::createLinalgStrategyDecomposePass(LinalgTransformationFilter filter) { +std::unique_ptr> mlir::createLinalgStrategyDecomposePass( + const LinalgTransformationFilter &filter) { return std::make_unique(filter); } /// Create a LinalgStrategyInterchangePass. std::unique_ptr> -mlir::createLinalgStrategyInterchangePass(ArrayRef iteratorInterchange, - LinalgTransformationFilter filter) { +mlir::createLinalgStrategyInterchangePass( + ArrayRef iteratorInterchange, + const LinalgTransformationFilter &filter) { return std::make_unique(iteratorInterchange, filter); } @@ -500,7 +503,7 @@ mlir::createLinalgStrategyInterchangePass(ArrayRef iteratorInterchange, /// Create a LinalgStrategyVectorizePass. std::unique_ptr> mlir::createLinalgStrategyVectorizePass( StringRef opName, LinalgVectorizationOptions opt, - LinalgTransformationFilter filter, bool padVectorize) { + const LinalgTransformationFilter &filter, bool padVectorize) { return std::make_unique(opName, opt, filter, padVectorize); } @@ -508,14 +511,14 @@ std::unique_ptr> mlir::createLinalgStrategyVectorizePass( /// Create a LinalgStrategyEnablePass. std::unique_ptr> mlir::createLinalgStrategyEnablePass(LinalgEnablingOptions opt, - LinalgTransformationFilter filter) { + const LinalgTransformationFilter &filter) { return std::make_unique(opt, filter); } /// Create a LinalgStrategyLowerVectorsPass. std::unique_ptr> -mlir::createLinalgStrategyLowerVectorsPass(LinalgVectorLoweringOptions opt, - LinalgTransformationFilter filter) { +mlir::createLinalgStrategyLowerVectorsPass( + LinalgVectorLoweringOptions opt, const LinalgTransformationFilter &filter) { return std::make_unique(opt, filter); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp index 73f766f578ac..7fa2aed8dfd8 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp @@ -212,7 +212,7 @@ LinalgOpInstancePromotionOptions::LinalgOpInstancePromotionOptions( // by a partial `copy` op. FailureOr mlir::linalg::promoteSubviewAsNewBuffer( OpBuilder &b, Location loc, memref::SubViewOp subView, - AllocBufferCallbackFn allocationFn, DataLayout &layout) { + const AllocBufferCallbackFn &allocationFn, DataLayout &layout) { auto viewType = subView.getType(); auto rank = viewType.getRank(); SmallVector fullSizes; @@ -379,7 +379,7 @@ mlir::linalg::promoteSubviewsPrecondition(Operation *op, FailureOr mlir::linalg::promoteSubViews(OpBuilder &builder, LinalgOp linalgOp, - LinalgPromotionOptions options) { + const LinalgPromotionOptions &options) { LinalgOpInstancePromotionOptions linalgOptions(linalgOp, options); auto layout = DataLayout::closest(linalgOp); ImplicitLocOpBuilder b(linalgOp.getLoc(), builder); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index 21a76e1d84f1..b4d2860101fd 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -10,6 +10,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "PassDetail.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Linalg/Passes.h" @@ -392,7 +394,7 @@ static LogicalResult tilePadTensorOp(OpBuilder &builder, PadTensorOp op, namespace { struct PadTensorOpTilingPattern : public OpRewritePattern { PadTensorOpTilingPattern(MLIRContext *ctx, LinalgTilingOptions opt) - : OpRewritePattern(ctx), options(opt) {} + : OpRewritePattern(ctx), options(std::move(opt)) {} LogicalResult matchAndRewrite(PadTensorOp op, PatternRewriter &rewriter) const override { diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index 6ec237c56100..6d2af0c1cece 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include +#include #define DEBUG_TYPE "linalg-transforms" @@ -54,7 +55,7 @@ mlir::linalg::LinalgTransformationFilter::LinalgTransformationFilter( replacement(replacement), matchByDefault(false) {} mlir::linalg::LinalgTransformationFilter::LinalgTransformationFilter( - FilterFunction f, ArrayRef matchDisjunction, + const FilterFunction &f, ArrayRef matchDisjunction, Optional replacement) : filters(), matchDisjunction(matchDisjunction.begin(), matchDisjunction.end()), @@ -287,14 +288,14 @@ linalg::rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad, mlir::linalg::LinalgBaseTilingPattern::LinalgBaseTilingPattern( StringRef opName, MLIRContext *context, LinalgTilingOptions options, LinalgTransformationFilter filter, PatternBenefit benefit) - : RewritePattern(opName, benefit, context), filter(filter), - options(options) {} + : RewritePattern(opName, benefit, context), filter(std::move(filter)), + options(std::move(options)) {} mlir::linalg::LinalgBaseTilingPattern::LinalgBaseTilingPattern( MLIRContext *context, LinalgTilingOptions options, LinalgTransformationFilter filter, PatternBenefit benefit) - : RewritePattern(MatchAnyOpTypeTag(), benefit, context), filter(filter), - options(options) {} + : RewritePattern(MatchAnyOpTypeTag(), benefit, context), + filter(std::move(filter)), options(std::move(options)) {} /// Try to peel a loop `op` and return the new result. // TODO: Add support for scf.parallel and affine.for loops. @@ -393,9 +394,10 @@ mlir::linalg::LinalgBaseTileAndFusePattern::LinalgBaseTileAndFusePattern( LinalgTransformationFilter filter, LinalgTransformationFilter fusedOpMarker, LinalgTransformationFilter originalOpMarker, PatternBenefit benefit) : RewritePattern(opName, benefit, context, {}), - dependenceGraph(dependenceGraph), tilingOptions(tilingOptions), - fusionOptions(fusionOptions), filter(filter), - fusedOpMarker(fusedOpMarker), originalOpMarker(originalOpMarker) {} + dependenceGraph(dependenceGraph), tilingOptions(std::move(tilingOptions)), + fusionOptions(std::move(fusionOptions)), filter(std::move(filter)), + fusedOpMarker(std::move(fusedOpMarker)), + originalOpMarker(std::move(originalOpMarker)) {} LogicalResult mlir::linalg::LinalgBaseTileAndFusePattern::matchAndRewrite( Operation *op, PatternRewriter &rewriter) const { @@ -487,14 +489,14 @@ LogicalResult mlir::linalg::LinalgBaseTileAndFusePattern::matchAndRewrite( mlir::linalg::LinalgPaddingPattern::LinalgPaddingPattern( MLIRContext *context, LinalgPaddingOptions options, LinalgTransformationFilter filter, PatternBenefit benefit) - : RewritePattern(MatchAnyOpTypeTag(), benefit, context), filter(filter), - options(options) {} + : RewritePattern(MatchAnyOpTypeTag(), benefit, context), + filter(std::move(filter)), options(std::move(options)) {} mlir::linalg::LinalgPaddingPattern::LinalgPaddingPattern( StringRef opName, MLIRContext *context, LinalgPaddingOptions options, LinalgTransformationFilter filter, PatternBenefit benefit) - : RewritePattern(opName, benefit, context, {}), filter(filter), - options(options) {} + : RewritePattern(opName, benefit, context, {}), filter(std::move(filter)), + options(std::move(options)) {} LogicalResult mlir::linalg::LinalgPaddingPattern::matchAndRewrite( Operation *op, PatternRewriter &rewriter) const { @@ -547,16 +549,16 @@ mlir::linalg::LinalgTileAndFuseTensorOpsPattern:: LinalgTilingAndFusionOptions options, LinalgTransformationFilter filter, PatternBenefit benefit) - : RewritePattern(MatchAnyOpTypeTag(), benefit, context), filter(filter), - options(options) {} + : RewritePattern(MatchAnyOpTypeTag(), benefit, context), + filter(std::move(filter)), options(std::move(options)) {} mlir::linalg::LinalgTileAndFuseTensorOpsPattern:: LinalgTileAndFuseTensorOpsPattern(StringRef opName, MLIRContext *context, LinalgTilingAndFusionOptions options, LinalgTransformationFilter filter, PatternBenefit benefit) - : RewritePattern(opName, benefit, context), filter(filter), - options(options) {} + : RewritePattern(opName, benefit, context), filter(std::move(filter)), + options(std::move(options)) {} LogicalResult mlir::linalg::LinalgTileAndFuseTensorOpsPattern::matchAndRewrite( Operation *op, PatternRewriter &rewriter) const { @@ -614,7 +616,7 @@ LogicalResult mlir::linalg::LinalgTileAndFuseTensorOpsPattern::matchAndRewrite( mlir::linalg::GenericOpInterchangePattern::GenericOpInterchangePattern( MLIRContext *context, ArrayRef interchangeVector, LinalgTransformationFilter filter, PatternBenefit benefit) - : OpRewritePattern(context, benefit), filter(filter), + : OpRewritePattern(context, benefit), filter(std::move(filter)), interchangeVector(interchangeVector.begin(), interchangeVector.end()) {} LogicalResult mlir::linalg::GenericOpInterchangePattern::matchAndRewrite( @@ -638,12 +640,13 @@ LogicalResult mlir::linalg::GenericOpInterchangePattern::matchAndRewrite( mlir::linalg::LinalgGeneralizationPattern::LinalgGeneralizationPattern( MLIRContext *context, LinalgTransformationFilter filter, PatternBenefit benefit) - : RewritePattern(MatchAnyOpTypeTag(), benefit, context), filter(filter) {} + : RewritePattern(MatchAnyOpTypeTag(), benefit, context), + filter(std::move(filter)) {} mlir::linalg::LinalgGeneralizationPattern::LinalgGeneralizationPattern( StringRef opName, MLIRContext *context, LinalgTransformationFilter filter, PatternBenefit benefit) - : RewritePattern(opName, benefit, context, {}), filter(filter) {} + : RewritePattern(opName, benefit, context, {}), filter(std::move(filter)) {} LogicalResult mlir::linalg::LinalgGeneralizationPattern::matchAndRewrite( Operation *op, PatternRewriter &rewriter) const { @@ -661,14 +664,14 @@ LogicalResult mlir::linalg::LinalgGeneralizationPattern::matchAndRewrite( mlir::linalg::LinalgBasePromotionPattern::LinalgBasePromotionPattern( MLIRContext *context, LinalgTransformationFilter filter, LinalgPromotionOptions options, PatternBenefit benefit) - : RewritePattern(MatchAnyOpTypeTag(), benefit, context), filter(filter), - options(options) {} + : RewritePattern(MatchAnyOpTypeTag(), benefit, context), + filter(std::move(filter)), options(std::move(options)) {} mlir::linalg::LinalgBasePromotionPattern::LinalgBasePromotionPattern( StringRef opName, MLIRContext *context, LinalgPromotionOptions options, LinalgTransformationFilter filter, PatternBenefit benefit) - : RewritePattern(opName, benefit, context, {}), filter(filter), - options(options) {} + : RewritePattern(opName, benefit, context, {}), filter(std::move(filter)), + options(std::move(options)) {} LogicalResult mlir::linalg::LinalgBasePromotionPattern::matchAndRewrite( Operation *op, PatternRewriter &rewriter) const { @@ -695,12 +698,13 @@ LogicalResult mlir::linalg::LinalgBasePromotionPattern::matchAndRewrite( mlir::linalg::LinalgBaseVectorizationPattern::LinalgBaseVectorizationPattern( MLIRContext *context, LinalgTransformationFilter filter, PatternBenefit benefit) - : RewritePattern(MatchAnyOpTypeTag(), benefit, context), filter(filter) {} + : RewritePattern(MatchAnyOpTypeTag(), benefit, context), + filter(std::move(filter)) {} mlir::linalg::LinalgBaseVectorizationPattern::LinalgBaseVectorizationPattern( StringRef opName, MLIRContext *context, LinalgTransformationFilter filter, PatternBenefit benefit) - : RewritePattern(opName, benefit, context, {}), filter(filter) {} + : RewritePattern(opName, benefit, context, {}), filter(std::move(filter)) {} LogicalResult mlir::linalg::LinalgBaseVectorizationPattern::matchAndRewrite( Operation *op, PatternRewriter &rewriter) const { @@ -942,7 +946,8 @@ struct DownscaleSizeOneWindowed2DConvolution final MLIRContext *context, LinalgTransformationFilter filter = LinalgTransformationFilter(), PatternBenefit benefit = 1) - : OpRewritePattern(context, benefit), filter(filter) {} + : OpRewritePattern(context, benefit), + filter(std::move(filter)) {} LogicalResult matchAndRewrite(linalg::Conv2DNhwcHwcfOp convOp, PatternRewriter &rewriter) const override { @@ -1028,7 +1033,7 @@ struct DownscaleDepthwiseConv2DNhwcHwcOp final LinalgTransformationFilter filter = LinalgTransformationFilter(), PatternBenefit benefit = 1) : OpRewritePattern(context, benefit), - filter(filter) {} + filter(std::move(filter)) {} LogicalResult matchAndRewrite(DepthwiseConv2DNhwcHwcOp convOp, PatternRewriter &rewriter) const override { @@ -1108,7 +1113,7 @@ struct DownscaleDepthwiseConv2DNhwcHwcOp final } // namespace void linalg::populateDecomposeConvolutionPatterns( - RewritePatternSet &patterns, LinalgTransformationFilter filter, + RewritePatternSet &patterns, const LinalgTransformationFilter &filter, PatternBenefit benefit) { patterns.add(patterns.getContext(), filter, diff --git a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp index 2dbee7eaac6f..9931e89647bc 100644 --- a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp +++ b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp @@ -83,7 +83,7 @@ static Value broadcast(ImplicitLocOpBuilder &builder, Value value, static Value handleMultidimensionalVectors(ImplicitLocOpBuilder &builder, ValueRange operands, int64_t vectorWidth, - std::function compute) { + llvm::function_ref compute) { assert(!operands.empty() && "operands must be not empty"); assert(vectorWidth > 0 && "vector width must be larger than 0"); diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.cpp index d1bd271d389b..aaa0fb33ae91 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.cpp @@ -10,6 +10,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "mlir/Dialect/SPIRV/IR/SPIRVOps.h" #include "mlir/Dialect/CommonFolders.h" @@ -161,8 +163,8 @@ OpFoldResult spirv::IAddOp::fold(ArrayRef operands) { // The resulting value will equal the low-order N bits of the correct result // R, where N is the component width and R is computed with enough precision // to avoid overflow and underflow. - return constFoldBinaryOp(operands, - [](APInt a, APInt b) { return a + b; }); + return constFoldBinaryOp( + operands, [](APInt a, const APInt &b) { return std::move(a) + b; }); } //===----------------------------------------------------------------------===// @@ -183,8 +185,8 @@ OpFoldResult spirv::IMulOp::fold(ArrayRef operands) { // The resulting value will equal the low-order N bits of the correct result // R, where N is the component width and R is computed with enough precision // to avoid overflow and underflow. - return constFoldBinaryOp(operands, - [](APInt a, APInt b) { return a * b; }); + return constFoldBinaryOp( + operands, [](const APInt &a, const APInt &b) { return a * b; }); } //===----------------------------------------------------------------------===// @@ -201,8 +203,8 @@ OpFoldResult spirv::ISubOp::fold(ArrayRef operands) { // The resulting value will equal the low-order N bits of the correct result // R, where N is the component width and R is computed with enough precision // to avoid overflow and underflow. - return constFoldBinaryOp(operands, - [](APInt a, APInt b) { return a - b; }); + return constFoldBinaryOp( + operands, [](APInt a, const APInt &b) { return std::move(a) - b; }); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp index db86e2288673..4a415b456826 100644 --- a/mlir/lib/Dialect/Shape/IR/Shape.cpp +++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "mlir/Dialect/Shape/IR/Shape.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" @@ -444,8 +446,8 @@ OpFoldResult mlir::shape::AddOp::fold(ArrayRef operands) { if (matchPattern(getRhs(), m_Zero())) return getLhs(); - return constFoldBinaryOp(operands, - [](APInt a, APInt b) { return a + b; }); + return constFoldBinaryOp( + operands, [](APInt a, const APInt &b) { return std::move(a) + b; }); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index a74b46c034c4..de45339f8955 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -1046,7 +1046,7 @@ static void printSwitchOpCases( OpAsmPrinter &p, SwitchOp op, Type flagType, Block *defaultDestination, OperandRange defaultOperands, TypeRange defaultOperandTypes, DenseIntElementsAttr caseValues, SuccessorRange caseDestinations, - OperandRangeRange caseOperands, TypeRangeRange caseOperandTypes) { + OperandRangeRange caseOperands, const TypeRangeRange &caseOperandTypes) { p << " default: "; p.printSuccessorAndUseList(defaultDestination, defaultOperands); @@ -1172,7 +1172,7 @@ dropSwitchCasesThatMatchDefault(SwitchOp op, PatternRewriter &rewriter) { /// ] /// -> br ^bb2 static void foldSwitch(SwitchOp op, PatternRewriter &rewriter, - APInt caseValue) { + const APInt &caseValue) { auto caseValues = op.getCaseValues(); for (const auto &it : llvm::enumerate(caseValues->getValues())) { if (it.value() == caseValue) { diff --git a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp index ddba1d3c4432..b5136273d635 100644 --- a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp +++ b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp @@ -215,7 +215,7 @@ ArrayAttr mlir::getReassociationIndicesAttribute( OpBuilder &b, ArrayRef reassociation) { SmallVector reassociationAttr = llvm::to_vector<4>(llvm::map_range( - reassociation, [&](ReassociationIndices indices) -> Attribute { + reassociation, [&](const ReassociationIndices &indices) -> Attribute { return b.getI64ArrayAttr(indices).cast(); })); return b.getArrayAttr(reassociationAttr); diff --git a/mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp b/mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp index 965b29257ee6..f012bf486bdf 100644 --- a/mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp @@ -46,7 +46,7 @@ class InnerOuterDimReductionConversion auto reductionDimsRange = multiReductionOp.reduction_dims().getAsValueRange(); auto reductionDims = llvm::to_vector<4>(llvm::map_range( - reductionDimsRange, [](APInt a) { return a.getZExtValue(); })); + reductionDimsRange, [](const APInt &a) { return a.getZExtValue(); })); llvm::SmallDenseSet reductionDimsSet(reductionDims.begin(), reductionDims.end()); int64_t reductionSize = reductionDims.size(); diff --git a/mlir/lib/IR/AffineExpr.cpp b/mlir/lib/IR/AffineExpr.cpp index f0f54ce7f8a0..47dcff627a33 100644 --- a/mlir/lib/IR/AffineExpr.cpp +++ b/mlir/lib/IR/AffineExpr.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "mlir/IR/AffineExpr.h" #include "AffineExprDetail.h" #include "mlir/IR/AffineExprVisitor.h" @@ -28,7 +30,7 @@ void AffineExpr::walk(std::function callback) const { std::function callback; AffineExprWalker(std::function callback) - : callback(callback) {} + : callback(std::move(callback)) {} void visitAffineBinaryOpExpr(AffineBinaryOpExpr expr) { callback(expr); } void visitConstantExpr(AffineConstantExpr expr) { callback(expr); } @@ -36,7 +38,7 @@ void AffineExpr::walk(std::function callback) const { void visitSymbolExpr(AffineSymbolExpr expr) { callback(expr); } }; - AffineExprWalker(callback).walkPostOrder(*this); + AffineExprWalker(std::move(callback)).walkPostOrder(*this); } // Dispatch affine expression construction based on kind. diff --git a/mlir/lib/IR/AffineMap.cpp b/mlir/lib/IR/AffineMap.cpp index b15d49fd4105..a60120637011 100644 --- a/mlir/lib/IR/AffineMap.cpp +++ b/mlir/lib/IR/AffineMap.cpp @@ -384,7 +384,7 @@ AffineMap::partialConstantFold(ArrayRef operandConstants, /// Walk all of the AffineExpr's in this mapping. Each node in an expression /// tree is visited in postorder. -void AffineMap::walkExprs(std::function callback) const { +void AffineMap::walkExprs(llvm::function_ref callback) const { for (auto expr : getResults()) expr.walk(callback); } diff --git a/mlir/lib/IR/Diagnostics.cpp b/mlir/lib/IR/Diagnostics.cpp index 2e2a1465ac0f..d29feacb9786 100644 --- a/mlir/lib/IR/Diagnostics.cpp +++ b/mlir/lib/IR/Diagnostics.cpp @@ -875,7 +875,7 @@ struct ParallelDiagnosticHandlerImpl : public llvm::PrettyStackTraceEntry { } /// Utility method to emit any held diagnostics. - void emitDiagnostics(std::function emitFn) const { + void emitDiagnostics(llvm::function_ref emitFn) const { // Stable sort all of the diagnostics that were emitted. This creates a // deterministic ordering for the diagnostics based upon which order id they // were emitted for. diff --git a/mlir/lib/IR/Dialect.cpp b/mlir/lib/IR/Dialect.cpp index 6d1d48edbefd..b14876a16576 100644 --- a/mlir/lib/IR/Dialect.cpp +++ b/mlir/lib/IR/Dialect.cpp @@ -32,7 +32,7 @@ DialectRegistry::DialectRegistry() { insert(); } void DialectRegistry::addDialectInterface( StringRef dialectName, TypeID interfaceTypeID, - DialectInterfaceAllocatorFunction allocator) { + const DialectInterfaceAllocatorFunction &allocator) { assert(allocator && "unexpected null interface allocation function"); auto it = registry.find(dialectName.str()); assert(it != registry.end() && @@ -57,7 +57,7 @@ void DialectRegistry::addDialectInterface( void DialectRegistry::addObjectInterface( StringRef dialectName, TypeID objectID, TypeID interfaceTypeID, - ObjectInterfaceAllocatorFunction allocator) { + const ObjectInterfaceAllocatorFunction &allocator) { assert(allocator && "unexpected null interface allocation function"); auto it = registry.find(dialectName.str()); @@ -88,7 +88,7 @@ DialectRegistry::getDialectAllocator(StringRef name) const { } void DialectRegistry::insert(TypeID typeID, StringRef name, - DialectAllocatorFunction ctor) { + const DialectAllocatorFunction &ctor) { auto inserted = registry.insert( std::make_pair(std::string(name), std::make_pair(typeID, ctor))); if (!inserted.second && inserted.first->second.first != typeID) { diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp index 888cab9eb568..e06492292c1d 100644 --- a/mlir/lib/IR/Operation.cpp +++ b/mlir/lib/IR/Operation.cpp @@ -995,7 +995,7 @@ LogicalResult OpTrait::impl::verifyValueSizeAttr(Operation *op, size_t totalCount = std::accumulate( sizeAttr.begin(), sizeAttr.end(), 0, - [](unsigned all, APInt one) { return all + one.getZExtValue(); }); + [](unsigned all, const APInt &one) { return all + one.getZExtValue(); }); if (totalCount != expectedCount) return op->emitOpError() diff --git a/mlir/lib/Pass/IRPrinting.cpp b/mlir/lib/Pass/IRPrinting.cpp index e3d22f2b1cb0..30d5a8e1a779 100644 --- a/mlir/lib/Pass/IRPrinting.cpp +++ b/mlir/lib/Pass/IRPrinting.cpp @@ -223,9 +223,9 @@ struct BasicIRPrinterConfig : public PassManager::IRPrinterConfig { raw_ostream &out) : IRPrinterConfig(printModuleScope, printAfterOnlyOnChange, printAfterOnlyOnFailure, opPrintingFlags), - shouldPrintBeforePass(shouldPrintBeforePass), - shouldPrintAfterPass(shouldPrintAfterPass), out(out) { - assert((shouldPrintBeforePass || shouldPrintAfterPass) && + shouldPrintBeforePass(std::move(shouldPrintBeforePass)), + shouldPrintAfterPass(std::move(shouldPrintAfterPass)), out(out) { + assert((this->shouldPrintBeforePass || this->shouldPrintAfterPass) && "expected at least one valid filter function"); } diff --git a/mlir/lib/Pass/PassRegistry.cpp b/mlir/lib/Pass/PassRegistry.cpp index e86a315dc355..933d77bbef12 100644 --- a/mlir/lib/Pass/PassRegistry.cpp +++ b/mlir/lib/Pass/PassRegistry.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "mlir/Pass/PassRegistry.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" @@ -88,7 +90,8 @@ size_t PassRegistryEntry::getOptionWidth() const { void mlir::registerPassPipeline( StringRef arg, StringRef description, const PassRegistryFunction &function, std::function)> optHandler) { - PassPipelineInfo pipelineInfo(arg, description, function, optHandler); + PassPipelineInfo pipelineInfo(arg, description, function, + std::move(optHandler)); bool inserted = passPipelineRegistry->try_emplace(arg, pipelineInfo).second; assert(inserted && "Pass pipeline registered multiple times"); (void)inserted; diff --git a/mlir/lib/Reducer/ReductionNode.cpp b/mlir/lib/Reducer/ReductionNode.cpp index 05d4c594100b..2aa551ed8338 100644 --- a/mlir/lib/Reducer/ReductionNode.cpp +++ b/mlir/lib/Reducer/ReductionNode.cpp @@ -24,7 +24,7 @@ using namespace mlir; ReductionNode::ReductionNode( - ReductionNode *parentNode, std::vector ranges, + ReductionNode *parentNode, const std::vector &ranges, llvm::SpecificBumpPtrAllocator &allocator) /// Root node will have the parent pointer point to themselves. : parent(parentNode == nullptr ? this : parentNode), diff --git a/mlir/lib/TableGen/Pattern.cpp b/mlir/lib/TableGen/Pattern.cpp index 92cd68162093..d8c5e08bfcef 100644 --- a/mlir/lib/TableGen/Pattern.cpp +++ b/mlir/lib/TableGen/Pattern.cpp @@ -11,6 +11,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "mlir/TableGen/Pattern.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" @@ -217,7 +219,7 @@ StringRef SymbolInfoMap::getValuePackName(StringRef symbol, int *index) { SymbolInfoMap::SymbolInfo::SymbolInfo(const Operator *op, SymbolInfo::Kind kind, Optional dagAndConstant) - : op(op), kind(kind), dagAndConstant(dagAndConstant) {} + : op(op), kind(kind), dagAndConstant(std::move(dagAndConstant)) {} int SymbolInfoMap::SymbolInfo::getStaticValueCount() const { switch (kind) { @@ -502,7 +504,8 @@ SymbolInfoMap::findBoundSymbol(StringRef key, DagNode node, const Operator &op, } SymbolInfoMap::const_iterator -SymbolInfoMap::findBoundSymbol(StringRef key, SymbolInfo symbolInfo) const { +SymbolInfoMap::findBoundSymbol(StringRef key, + const SymbolInfo &symbolInfo) const { std::string name = getValuePackName(key).str(); auto range = symbolInfoMap.equal_range(name); diff --git a/mlir/lib/TableGen/Predicate.cpp b/mlir/lib/TableGen/Predicate.cpp index 7238a866a446..46de82079989 100644 --- a/mlir/lib/TableGen/Predicate.cpp +++ b/mlir/lib/TableGen/Predicate.cpp @@ -286,7 +286,8 @@ propagateGroundTruth(PredNode *node, // Combine a list of predicate expressions using a binary combiner. If a list // is empty, return "init". static std::string combineBinary(ArrayRef children, - std::string combiner, std::string init) { + const std::string &combiner, + std::string init) { if (children.empty()) return init; diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp index bf2a2e846146..473c9ad7f171 100644 --- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp +++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "mlir/Dialect/EmitC/IR/EmitC.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" @@ -689,7 +691,7 @@ bool CppEmitter::hasBlockLabel(Block &block) { } LogicalResult CppEmitter::emitAttribute(Location loc, Attribute attr) { - auto printInt = [&](APInt val, bool isUnsigned) { + auto printInt = [&](const APInt &val, bool isUnsigned) { if (val.getBitWidth() == 1) { if (val.getBoolValue()) os << "true"; @@ -702,7 +704,7 @@ LogicalResult CppEmitter::emitAttribute(Location loc, Attribute attr) { } }; - auto printFloat = [&](APFloat val) { + auto printFloat = [&](const APFloat &val) { if (val.isFinite()) { SmallString<128> strValue; // Use default values of toString except don't truncate zeros. @@ -734,7 +736,8 @@ LogicalResult CppEmitter::emitAttribute(Location loc, Attribute attr) { } if (auto dense = attr.dyn_cast()) { os << '{'; - interleaveComma(dense, os, [&](APFloat val) { printFloat(val); }); + interleaveComma(dense, os, + [&](APFloat val) { printFloat(std::move(val)); }); os << '}'; return success(); } @@ -757,7 +760,7 @@ LogicalResult CppEmitter::emitAttribute(Location loc, Attribute attr) { .dyn_cast()) { os << '{'; interleaveComma(dense, os, [&](APInt val) { - printInt(val, shouldMapToUnsigned(iType.getSignedness())); + printInt(std::move(val), shouldMapToUnsigned(iType.getSignedness())); }); os << '}'; return success(); @@ -767,7 +770,8 @@ LogicalResult CppEmitter::emitAttribute(Location loc, Attribute attr) { .getElementType() .dyn_cast()) { os << '{'; - interleaveComma(dense, os, [&](APInt val) { printInt(val, false); }); + interleaveComma(dense, os, + [&](APInt val) { printInt(std::move(val), false); }); os << '}'; return success(); } diff --git a/mlir/lib/Tools/mlir-lsp-server/lsp/Transport.cpp b/mlir/lib/Tools/mlir-lsp-server/lsp/Transport.cpp index 35d6734b1935..9a64938281dd 100644 --- a/mlir/lib/Tools/mlir-lsp-server/lsp/Transport.cpp +++ b/mlir/lib/Tools/mlir-lsp-server/lsp/Transport.cpp @@ -13,6 +13,7 @@ #include "llvm/Support/Errno.h" #include "llvm/Support/Error.h" #include +#include using namespace mlir; using namespace mlir::lsp; @@ -87,7 +88,7 @@ bool MessageHandler::onNotify(llvm::StringRef method, llvm::json::Value value) { } else { auto it = notificationHandlers.find(method); if (it != notificationHandlers.end()) - it->second(value); + it->second(std::move(value)); } return true; } @@ -100,7 +101,7 @@ bool MessageHandler::onCall(llvm::StringRef method, llvm::json::Value params, auto it = methodHandlers.find(method); if (it != methodHandlers.end()) { - it->second(params, std::move(reply)); + it->second(std::move(params), std::move(reply)); } else { reply(llvm::make_error("method not found: " + method.str(), ErrorCode::MethodNotFound)); diff --git a/mlir/lib/Transforms/Inliner.cpp b/mlir/lib/Transforms/Inliner.cpp index f080b4d112ba..c0befb76eed0 100644 --- a/mlir/lib/Transforms/Inliner.cpp +++ b/mlir/lib/Transforms/Inliner.cpp @@ -563,7 +563,7 @@ class InlinerPass : public InlinerBase { InlinerPass::InlinerPass() : InlinerPass(defaultInlinerOptPipeline) {} InlinerPass::InlinerPass(std::function defaultPipeline) - : defaultPipeline(defaultPipeline) { + : defaultPipeline(std::move(defaultPipeline)) { opPipelines.push_back({}); // Initialize the pass options with the provided arguments. diff --git a/mlir/lib/Transforms/ViewOpGraph.cpp b/mlir/lib/Transforms/ViewOpGraph.cpp index 365779494e48..04a1e0fd4ef0 100644 --- a/mlir/lib/Transforms/ViewOpGraph.cpp +++ b/mlir/lib/Transforms/ViewOpGraph.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "mlir/Transforms/ViewOpGraph.h" #include "PassDetail.h" #include "mlir/IR/Block.h" @@ -43,7 +45,9 @@ static std::string escapeString(std::string str) { } /// Put quotation marks around a given string. -static std::string quoteString(std::string str) { return "\"" + str + "\""; } +static std::string quoteString(const std::string &str) { + return "\"" + str + "\""; +} using AttributeMap = llvm::StringMap; @@ -104,7 +108,8 @@ class PrintOpPass : public ViewOpGraphBase { os.indent(); // Emit invisible anchor node from/to which arrows can be drawn. Node anchorNode = emitNodeStmt(" ", kShapeNone); - os << attrStmt("label", quoteString(escapeString(label))) << ";\n"; + os << attrStmt("label", quoteString(escapeString(std::move(label)))) + << ";\n"; builder(); os.unindent(); os << "}\n"; @@ -167,7 +172,7 @@ class PrintOpPass : public ViewOpGraphBase { // clipped at the boundary, but labels are not. This can lead to labels // floating around without any edge next to them. if (!n1.clusterId && !n2.clusterId) - attrs["label"] = quoteString(escapeString(label)); + attrs["label"] = quoteString(escapeString(std::move(label))); // Use `ltail` and `lhead` to draw edges between clusters. if (n1.clusterId) attrs["ltail"] = "cluster_" + std::to_string(*n1.clusterId); @@ -195,7 +200,7 @@ class PrintOpPass : public ViewOpGraphBase { Node emitNodeStmt(std::string label, StringRef shape = kShapeNode) { int nodeId = ++counter; AttributeMap attrs; - attrs["label"] = quoteString(escapeString(label)); + attrs["label"] = quoteString(escapeString(std::move(label))); attrs["shape"] = shape.str(); os << llvm::format("v%i ", nodeId); emitAttrList(os, attrs); diff --git a/mlir/lib/Translation/Translation.cpp b/mlir/lib/Translation/Translation.cpp index 0dc0e6c5ae66..43349a82c263 100644 --- a/mlir/lib/Translation/Translation.cpp +++ b/mlir/lib/Translation/Translation.cpp @@ -94,7 +94,7 @@ TranslateToMLIRRegistration::TranslateToMLIRRegistration( TranslateFromMLIRRegistration::TranslateFromMLIRRegistration( StringRef name, const TranslateFromMLIRFunction &function, - std::function dialectRegistration) { + const std::function &dialectRegistration) { registerTranslation(name, [function, dialectRegistration]( llvm::SourceMgr &sourceMgr, raw_ostream &output, MLIRContext *context) { diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp index d798042bfac8..0e18725b5eb2 100644 --- a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp @@ -10,6 +10,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/GPU/GPUDialect.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" @@ -175,19 +177,20 @@ void TestLinalgCodegenStrategy::runStrategy( CodegenStrategy strategy; strategy .tileAndFuseIf(fuse && !tileSizes.empty(), anchorOpName, - tilingAndFusionOptions) - .tileIf(!fuse && !tileSizes.empty(), anchorOpName, tilingOptions) + std::move(tilingAndFusionOptions)) + .tileIf(!fuse && !tileSizes.empty(), anchorOpName, + std::move(tilingOptions)) .promoteIf(!fuse && promote, anchorOpName, LinalgPromotionOptions() .setAlignment(16) .setUseFullTileBuffersByDefault(promoteFullTile)) .tileIf(!fuse && !registerTileSizes.empty(), anchorOpName, - registerTilingOptions) + std::move(registerTilingOptions)) .promoteIf(!fuse && registerPromote, anchorOpName, LinalgPromotionOptions() .setAlignment(16) .setUseFullTileBuffersByDefault(registerPromoteFullTile)) - .padIf(pad, "", paddingOptions) + .padIf(pad, "", std::move(paddingOptions)) .decomposeIf(decompose) .generalizeIf(generalize, "") .interchangeIf(!iteratorInterchange.empty(), iteratorInterchange) diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp index 64713b02c7b6..d4119f26c819 100644 --- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp @@ -567,7 +567,7 @@ static void applyExtractSliceOfPadTensorSwapPattern(FuncOp funcOp) { (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); } -static void applyTilePattern(FuncOp funcOp, std::string loopType, +static void applyTilePattern(FuncOp funcOp, const std::string &loopType, ArrayRef tileSizes, ArrayRef peeledLoops, bool scalarizeDynamicDims) { diff --git a/mlir/tools/mlir-tblgen/DialectGen.cpp b/mlir/tools/mlir-tblgen/DialectGen.cpp index 73b41ebde7ec..7da5a3c84681 100644 --- a/mlir/tools/mlir-tblgen/DialectGen.cpp +++ b/mlir/tools/mlir-tblgen/DialectGen.cpp @@ -185,10 +185,11 @@ static const char *const operationInterfaceFallbackDecl = R"( )"; /// Generate the declaration for the given dialect class. -static void emitDialectDecl(Dialect &dialect, - iterator_range dialectAttrs, - iterator_range dialectTypes, - raw_ostream &os) { +static void +emitDialectDecl(Dialect &dialect, + const iterator_range &dialectAttrs, + const iterator_range &dialectTypes, + raw_ostream &os) { /// Build the list of dependent dialects std::string dependentDialectRegistrations; { diff --git a/mlir/tools/mlir-tblgen/OpDocGen.cpp b/mlir/tools/mlir-tblgen/OpDocGen.cpp index 3025df140ea9..525d96d06ac5 100644 --- a/mlir/tools/mlir-tblgen/OpDocGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDocGen.cpp @@ -88,7 +88,7 @@ static void emitAssemblyFormat(StringRef opName, StringRef format, os << "```\n\n"; } -static void emitOpTraitsDoc(Operator op, raw_ostream &os) { +static void emitOpTraitsDoc(const Operator &op, raw_ostream &os) { // TODO: We should link to the trait/documentation of it. That also means we // should add descriptions to traits that can be queried. // Collect using set to sort effects, interfaces & traits. diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index adbd8407af99..02d0e81b6860 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -2217,7 +2217,7 @@ class FormatParser { /// attribute. void handleTypesMatchConstraint( llvm::StringMap &variableTyResolver, - llvm::Record def); + const llvm::Record &def); /// Returns an argument or attribute with the given name that has been seen /// within the format. @@ -2621,7 +2621,7 @@ void FormatParser::handleSameTypesConstraint( void FormatParser::handleTypesMatchConstraint( llvm::StringMap &variableTyResolver, - llvm::Record def) { + const llvm::Record &def) { StringRef lhsName = def.getValueAsString("lhs"); StringRef rhsName = def.getValueAsString("rhs"); StringRef transformer = def.getValueAsString("transformer"); diff --git a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp index 366a236c72c7..4ec33a901b5a 100644 --- a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp +++ b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp @@ -154,7 +154,7 @@ struct TypeInterfaceGenerator : public InterfaceGenerator { // GEN: Interface definitions //===----------------------------------------------------------------------===// -static void emitInterfaceDef(Interface interface, StringRef valueType, +static void emitInterfaceDef(const Interface &interface, StringRef valueType, raw_ostream &os) { StringRef interfaceName = interface.getName(); StringRef cppNamespace = interface.getCppNamespace(); diff --git a/mlir/tools/mlir-tblgen/mlir-tblgen.cpp b/mlir/tools/mlir-tblgen/mlir-tblgen.cpp index 0f14b190d891..6a2bbcd0baa4 100644 --- a/mlir/tools/mlir-tblgen/mlir-tblgen.cpp +++ b/mlir/tools/mlir-tblgen/mlir-tblgen.cpp @@ -29,7 +29,7 @@ using namespace mlir; static llvm::ManagedStatic> generatorRegistry; mlir::GenRegistration::GenRegistration(StringRef arg, StringRef description, - GenFunction function) { + const GenFunction &function) { generatorRegistry->emplace_back(arg, description, function); } diff --git a/mlir/unittests/Analysis/AffineStructuresParserTest.cpp b/mlir/unittests/Analysis/AffineStructuresParserTest.cpp index 14be5a3faf3e..d219326bce47 100644 --- a/mlir/unittests/Analysis/AffineStructuresParserTest.cpp +++ b/mlir/unittests/Analysis/AffineStructuresParserTest.cpp @@ -73,7 +73,7 @@ TEST(ParseFACTest, InvalidInputTest) { /// Parses and compares the `str` to the `ex`. The equality check is performed /// by using PresburgerSet::isEqual -static bool parseAndCompare(StringRef str, FlatAffineConstraints ex, +static bool parseAndCompare(StringRef str, const FlatAffineConstraints &ex, MLIRContext *context) { FailureOr fac = parseIntegerSetToFAC(str, context); diff --git a/mlir/unittests/Analysis/Presburger/SimplexTest.cpp b/mlir/unittests/Analysis/Presburger/SimplexTest.cpp index fd5750dd3756..40fa9e3d60be 100644 --- a/mlir/unittests/Analysis/Presburger/SimplexTest.cpp +++ b/mlir/unittests/Analysis/Presburger/SimplexTest.cpp @@ -93,8 +93,8 @@ TEST(SimplexTest, addInequality_rollback) { } Simplex simplexFromConstraints(unsigned nDim, - SmallVector, 8> ineqs, - SmallVector, 8> eqs) { + ArrayRef> ineqs, + ArrayRef> eqs) { Simplex simplex(nDim); for (const auto &ineq : ineqs) simplex.addInequality(ineq); diff --git a/mlir/unittests/Analysis/PresburgerSetTest.cpp b/mlir/unittests/Analysis/PresburgerSetTest.cpp index c7c3c0db4f5f..0606db4d8957 100644 --- a/mlir/unittests/Analysis/PresburgerSetTest.cpp +++ b/mlir/unittests/Analysis/PresburgerSetTest.cpp @@ -48,7 +48,7 @@ static PresburgerSet parsePresburgerSetFromFACStrings(unsigned numDims, /// Compute the union of s and t, and check that each of the given points /// belongs to the union iff it belongs to at least one of s and t. -static void testUnionAtPoints(PresburgerSet s, PresburgerSet t, +static void testUnionAtPoints(const PresburgerSet &s, const PresburgerSet &t, ArrayRef> points) { PresburgerSet unionSet = s.unionSet(t); for (const SmallVector &point : points) { @@ -61,7 +61,8 @@ static void testUnionAtPoints(PresburgerSet s, PresburgerSet t, /// Compute the intersection of s and t, and check that each of the given points /// belongs to the intersection iff it belongs to both s and t. -static void testIntersectAtPoints(PresburgerSet s, PresburgerSet t, +static void testIntersectAtPoints(const PresburgerSet &s, + const PresburgerSet &t, ArrayRef> points) { PresburgerSet intersection = s.intersect(t); for (const SmallVector &point : points) { @@ -74,7 +75,7 @@ static void testIntersectAtPoints(PresburgerSet s, PresburgerSet t, /// Compute the set difference s \ t, and check that each of the given points /// belongs to the difference iff it belongs to s and does not belong to t. -static void testSubtractAtPoints(PresburgerSet s, PresburgerSet t, +static void testSubtractAtPoints(const PresburgerSet &s, const PresburgerSet &t, ArrayRef> points) { PresburgerSet diff = s.subtract(t); for (const SmallVector &point : points) { @@ -90,7 +91,7 @@ static void testSubtractAtPoints(PresburgerSet s, PresburgerSet t, /// Compute the complement of s, and check that each of the given points /// belongs to the complement iff it does not belong to s. -static void testComplementAtPoints(PresburgerSet s, +static void testComplementAtPoints(const PresburgerSet &s, ArrayRef> points) { PresburgerSet complement = s.complement(); complement.complement(); @@ -473,7 +474,7 @@ void expectEqual(const PresburgerSet &s, const PresburgerSet &t) { EXPECT_TRUE(s.isEqual(t)); } -void expectEmpty(PresburgerSet s) { EXPECT_TRUE(s.isIntegerEmpty()); } +void expectEmpty(const PresburgerSet &s) { EXPECT_TRUE(s.isIntegerEmpty()); } TEST(SetTest, divisions) { MLIRContext context; @@ -511,7 +512,7 @@ TEST(SetTest, divisions) { /// Coalesce `set` and check that the `newSet` is equal to `set and that /// `expectedNumFACs` matches the number of FACs in the coalesced set. /// If one of the two -void expectCoalesce(size_t expectedNumFACs, const PresburgerSet set) { +void expectCoalesce(size_t expectedNumFACs, const PresburgerSet &set) { PresburgerSet newSet = set.coalesce(); EXPECT_TRUE(set.isEqual(newSet)); EXPECT_TRUE(expectedNumFACs == newSet.getNumFACs()); diff --git a/mlir/unittests/Conversion/PDLToPDLInterp/RootOrderingTest.cpp b/mlir/unittests/Conversion/PDLToPDLInterp/RootOrderingTest.cpp index 5d9b1af7847a..5f209fb6fdfe 100644 --- a/mlir/unittests/Conversion/PDLToPDLInterp/RootOrderingTest.cpp +++ b/mlir/unittests/Conversion/PDLToPDLInterp/RootOrderingTest.cpp @@ -46,7 +46,7 @@ class RootOrderingTest : public ::testing::Test { /// Checks that optimal branching on graph has the given cost and /// its preorder traversal results in the specified edges. - void check(unsigned cost, OptimalBranching::EdgeList edges) { + void check(unsigned cost, const OptimalBranching::EdgeList &edges) { OptimalBranching opt(graph, v[0]); EXPECT_EQ(opt.solve(), cost); EXPECT_EQ(opt.preOrderTraversal({v, v + edges.size()}), edges); diff --git a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp index 9ec6c1b15b1a..fe69878c8283 100644 --- a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp +++ b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp @@ -22,7 +22,8 @@ struct Pattern { /// Rather than using these, please use the readable helper constructor /// functions below to make tests more readable. Pattern(unsigned tensorNum) : kind(Kind::kTensor), tensorNum(tensorNum) {} - Pattern(Kind kind, std::shared_ptr e0, std::shared_ptr e1) + Pattern(Kind kind, const std::shared_ptr &e0, + const std::shared_ptr &e1) : kind(kind), e0(e0), e1(e1) { assert(kind >= Kind::kMulF); assert(e0 && e1); @@ -38,13 +39,15 @@ static std::shared_ptr tensorPattern(unsigned tensorNum) { return std::make_shared(tensorNum); } -static std::shared_ptr addfPattern(std::shared_ptr e0, - std::shared_ptr e1) { +static std::shared_ptr +addfPattern(const std::shared_ptr &e0, + const std::shared_ptr &e1) { return std::make_shared(Kind::kAddF, e0, e1); } -static std::shared_ptr mulfPattern(std::shared_ptr e0, - std::shared_ptr e1) { +static std::shared_ptr +mulfPattern(const std::shared_ptr &e0, + const std::shared_ptr &e1) { return std::make_shared(Kind::kMulF, e0, e1); } @@ -84,8 +87,8 @@ class MergerTestBase : public ::testing::Test { /// groups of lattice points should be ordered with respect to other groups, /// but there is no required ordering within groups. bool latPointWithinRange(unsigned s, unsigned p, unsigned n, - std::shared_ptr pattern, - llvm::BitVector bits) { + const std::shared_ptr &pattern, + const llvm::BitVector &bits) { for (unsigned i = p; i < p + n; ++i) { if (compareExpression(merger.lat(merger.set(s)[i]).exp, pattern) && compareBits(s, i, bits)) @@ -110,7 +113,7 @@ class MergerTestBase : public ::testing::Test { /// Converts a vector of (loop, tensor) pairs to a bitvector with the /// corresponding bits set. llvm::BitVector - loopsToBits(std::vector> loops) { + loopsToBits(const std::vector> &loops) { llvm::BitVector testBits = llvm::BitVector(numTensors + 1, false); for (auto l : loops) { auto loop = std::get<0>(l); @@ -121,7 +124,7 @@ class MergerTestBase : public ::testing::Test { } /// Returns true if the bits of lattice point p in set s match the given bits. - bool compareBits(unsigned s, unsigned p, llvm::BitVector bits) { + bool compareBits(unsigned s, unsigned p, const llvm::BitVector &bits) { return merger.lat(merger.set(s)[p]).bits == bits; } @@ -136,7 +139,7 @@ class MergerTestBase : public ::testing::Test { /// children are equal. /// - Expressions with Kind invariant or tensor are equal if they have the /// same expression id. - bool compareExpression(unsigned e, std::shared_ptr pattern) { + bool compareExpression(unsigned e, const std::shared_ptr &pattern) { auto tensorExp = merger.exp(e); if (tensorExp.kind != pattern->kind) return false; From ad5d7ace34e8b1c4bfdeb855b92b860b95127c7b Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 01:50:43 +0000 Subject: [PATCH 332/992] Apply clang-tidy fixes for readability-const-return-type to MLIR (NFC) Reviewed By: rriddle, Mogball Differential Revision: https://reviews.llvm.org/D116251 --- mlir/include/mlir/Dialect/Quant/UniformSupport.h | 2 +- mlir/include/mlir/TableGen/Predicate.h | 2 +- mlir/lib/Dialect/Quant/Utils/UniformSupport.cpp | 2 +- mlir/lib/TableGen/Predicate.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/include/mlir/Dialect/Quant/UniformSupport.h b/mlir/include/mlir/Dialect/Quant/UniformSupport.h index 91f2a798430c..5bcf4d5921a5 100644 --- a/mlir/include/mlir/Dialect/Quant/UniformSupport.h +++ b/mlir/include/mlir/Dialect/Quant/UniformSupport.h @@ -31,7 +31,7 @@ namespace quant { /// process. struct ExpressedToQuantizedConverter { /// Creates a converter for the given input type. - static const ExpressedToQuantizedConverter forInputType(Type inputType); + static ExpressedToQuantizedConverter forInputType(Type inputType); /// Converts the inputType to be based on the given elemental type, /// returning the new type (or nullptr and emit an error on failure). diff --git a/mlir/include/mlir/TableGen/Predicate.h b/mlir/include/mlir/TableGen/Predicate.h index 6c1b762025a0..2c91588a542f 100644 --- a/mlir/include/mlir/TableGen/Predicate.h +++ b/mlir/include/mlir/TableGen/Predicate.h @@ -101,7 +101,7 @@ class CombinedPred : public Pred { const llvm::Record *getCombinerDef() const; // Get the predicates that are combined by this predicate. - const std::vector getChildren() const; + std::vector getChildren() const; }; // A combined predicate that requires all child predicates of 'CPred' type to diff --git a/mlir/lib/Dialect/Quant/Utils/UniformSupport.cpp b/mlir/lib/Dialect/Quant/Utils/UniformSupport.cpp index 6d11bc05c941..ddd8ae0c0fd3 100644 --- a/mlir/lib/Dialect/Quant/Utils/UniformSupport.cpp +++ b/mlir/lib/Dialect/Quant/Utils/UniformSupport.cpp @@ -17,7 +17,7 @@ static bool isQuantizablePrimitiveType(Type inputType) { return inputType.isa(); } -const ExpressedToQuantizedConverter +ExpressedToQuantizedConverter ExpressedToQuantizedConverter::forInputType(Type inputType) { if (inputType.isa()) { Type elementType = inputType.cast().getElementType(); diff --git a/mlir/lib/TableGen/Predicate.cpp b/mlir/lib/TableGen/Predicate.cpp index 46de82079989..e034e7084b8c 100644 --- a/mlir/lib/TableGen/Predicate.cpp +++ b/mlir/lib/TableGen/Predicate.cpp @@ -79,7 +79,7 @@ const llvm::Record *CombinedPred::getCombinerDef() const { return def->getValueAsDef("kind"); } -const std::vector CombinedPred::getChildren() const { +std::vector CombinedPred::getChildren() const { assert(def->getValue("children") && "CombinedPred must have a value 'children'"); return def->getValueAsListOfDefs("children"); From f0fff1dfde9dae9d5ce4e9c647da0fdc368053c0 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 01:53:18 +0000 Subject: [PATCH 333/992] Remove unused applyPad function from TosaToLinalg.cpp (NFC) --- .../Conversion/TosaToLinalg/TosaToLinalg.cpp | 31 ------------------- 1 file changed, 31 deletions(-) diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index 280f22b8525b..9ddad2af4dc6 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -61,37 +61,6 @@ static mlir::SelectOp clampHelper(Location loc, Value arg, return rewriter.create(loc, largerThanMax, max, minOrArg); } -static mlir::Value applyPad(Location loc, Value input, ArrayRef pad, - Attribute padAttr, OpBuilder &rewriter) { - // Input should be padded if necessary. - if (llvm::all_of(pad, [](int64_t p) { return p == 0; })) - return input; - - ShapedType inputTy = input.getType().cast(); - Type inputETy = inputTy.getElementType(); - auto inputShape = inputTy.getShape(); - - assert((inputShape.size() * 2) == pad.size()); - - SmallVector paddedShape; - SmallVector lowIndices; - SmallVector highIndices; - for (int i = 0, s = inputShape.size(); i < s; i++) { - auto lowPad = pad[i * 2]; - auto highPad = pad[i * 2 + 1]; - paddedShape.push_back(inputShape[i] + highPad + lowPad); - lowIndices.push_back(rewriter.getIndexAttr(lowPad)); - highIndices.push_back(rewriter.getIndexAttr(highPad)); - } - - Value padValue = rewriter.create(loc, padAttr); - - return linalg::PadTensorOp::createPadScalarOp( - RankedTensorType::get(paddedShape, inputETy), input, padValue, - lowIndices, highIndices, /*nofold=*/false, loc, rewriter) - .result(); -} - static SmallVector filterDynamicDims(const SmallVector &dynDims) { SmallVector filteredDims; for (auto dim : dynDims) From 5a1f6077ec2b0eb9c1a314967f789026d015bdb1 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 01:55:30 +0000 Subject: [PATCH 334/992] Apply clang-tidy fixes for readability-container-size-empty for MLIR (NFC) Reviewed By: rriddle, Mogball Differential Revision: https://reviews.llvm.org/D116252 --- mlir/lib/Analysis/Utils.cpp | 2 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 8 ++++---- mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp | 4 ++-- mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp | 2 +- .../Dialect/Vector/VectorMultiDimReductionTransforms.cpp | 4 ++-- mlir/lib/Dialect/Vector/VectorOps.cpp | 6 +++--- mlir/lib/Reducer/ReductionNode.cpp | 2 +- .../LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 2 +- mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp | 2 +- mlir/tools/mlir-tblgen/DirectiveCommonGen.cpp | 6 +++--- 10 files changed, 19 insertions(+), 19 deletions(-) diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp index e6fe34996359..432aeecde2db 100644 --- a/mlir/lib/Analysis/Utils.cpp +++ b/mlir/lib/Analysis/Utils.cpp @@ -151,7 +151,7 @@ void ComputationSliceState::dump() const { /// if both the src and the dst loops don't have the same bounds. Returns /// llvm::None if none of the above can be proven. Optional ComputationSliceState::isSliceMaximalFastCheck() const { - assert(lbs.size() == ubs.size() && lbs.size() && ivs.size() && + assert(lbs.size() == ubs.size() && !lbs.empty() && !ivs.empty() && "Unexpected number of lbs, ubs and ivs in slice"); for (unsigned i = 0, end = lbs.size(); i < end; ++i) { diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index e1d2dcadff8a..0b4346ddd08d 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -105,7 +105,7 @@ static void printOperandAndTypeList(OpAsmPrinter &p, OperandRange operands) { /// Print data variables corresponding to a data-sharing clause `name` static void printDataVars(OpAsmPrinter &p, OperandRange operands, StringRef name) { - if (operands.size()) { + if (!operands.empty()) { p << name; printOperandAndTypeList(p, operands); } @@ -390,7 +390,7 @@ static void printReductionVarList(OpAsmPrinter &p, static LogicalResult verifyReductionVarList(Operation *op, Optional reductions, OperandRange reductionVars) { - if (reductionVars.size() != 0) { + if (!reductionVars.empty()) { if (!reductions || reductions->size() != reductionVars.size()) return op->emitOpError() << "expected as many reduction symbol references " @@ -863,7 +863,7 @@ static ParseResult parseClauses(OpAsmParser &parser, OperationState &result, schedule[0] = llvm::toUpper(schedule[0]); auto attr = parser.getBuilder().getStringAttr(schedule); result.addAttribute("schedule_val", attr); - if (modifiers.size() > 0) { + if (!modifiers.empty()) { auto mod = parser.getBuilder().getStringAttr(modifiers[0]); result.addAttribute("schedule_modifier", mod); // Only SIMD attribute is allowed here! @@ -1072,7 +1072,7 @@ static void printWsLoopOp(OpAsmPrinter &p, WsLoopOp op) { printDataVars(p, op.firstprivate_vars(), "firstprivate"); printDataVars(p, op.lastprivate_vars(), "lastprivate"); - if (op.linear_vars().size()) + if (!op.linear_vars().empty()) printLinearClause(p, op.linear_vars(), op.linear_step_vars()); if (auto sched = op.schedule_val()) diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp index 8090b235cc46..f85cc03b104e 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp @@ -669,7 +669,7 @@ getElementType(Type type, Attribute indices, emitErrorFn("expected a 32-bit integer array attribute for 'indices'"); return nullptr; } - if (!indicesArrayAttr.size()) { + if (indicesArrayAttr.empty()) { emitErrorFn("expected at least one index for spv.CompositeExtract"); return nullptr; } @@ -1929,7 +1929,7 @@ static void print(spirv::ExecutionModeOp execModeOp, OpAsmPrinter &printer) { printer << " \"" << stringifyExecutionMode(execModeOp.execution_mode()) << "\""; auto values = execModeOp.values(); - if (!values.size()) + if (values.empty()) return; printer << ", "; llvm::interleaveComma(values, printer, [&](Attribute a) { diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp index a863a9cd8e01..9163aabd3c81 100644 --- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp +++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp @@ -137,7 +137,7 @@ unsigned Merger::mapSet(Kind kind, unsigned s0, Value v) { unsigned Merger::optimizeSet(unsigned s0) { unsigned s = addSet(); - assert(latSets[s0].size() != 0); + assert(!latSets[s0].empty()); unsigned p0 = latSets[s0][0]; for (unsigned p1 : latSets[s0]) { bool add = true; diff --git a/mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp b/mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp index f012bf486bdf..9a1ca53074d0 100644 --- a/mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp @@ -141,12 +141,12 @@ class ReduceMultiDimReductionRank // 2. Compute flattened parallel and reduction sizes. int flattenedParallelDim = 0; int flattenedReductionDim = 0; - if (parallelShapes.size() > 0) { + if (!parallelShapes.empty()) { flattenedParallelDim = 1; for (auto d : parallelShapes) flattenedParallelDim *= d; } - if (reductionShapes.size() > 0) { + if (!reductionShapes.empty()) { flattenedReductionDim = 1; for (auto d : reductionShapes) flattenedReductionDim *= d; diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 60c0aac1a4be..fa608113b079 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -340,13 +340,13 @@ static ParseResult parseReductionOp(OpAsmParser &parser, parser.parseComma() || parser.parseOperandList(operandsInfo) || parser.parseColonType(redType) || parser.parseKeywordType("into", resType) || - (operandsInfo.size() > 0 && + (!operandsInfo.empty() && parser.resolveOperand(operandsInfo[0], redType, result.operands)) || (operandsInfo.size() > 1 && parser.resolveOperand(operandsInfo[1], resType, result.operands)) || parser.addTypeToList(resType, result.types)) return failure(); - if (operandsInfo.size() < 1 || operandsInfo.size() > 2) + if (operandsInfo.empty() || operandsInfo.size() > 2) return parser.emitError(parser.getNameLoc(), "unsupported number of operands"); return success(); @@ -546,7 +546,7 @@ static LogicalResult verifyOutputShape( } // Verify 'expectedResultDims'. - if (expectedResultDims.size() == 0) { + if (expectedResultDims.empty()) { // No batch or free dimension implies a scalar result. if (resType.isa() || accType.isa()) return op.emitOpError("invalid accumulator/result vector shape"); diff --git a/mlir/lib/Reducer/ReductionNode.cpp b/mlir/lib/Reducer/ReductionNode.cpp index 2aa551ed8338..9f0f461d676e 100644 --- a/mlir/lib/Reducer/ReductionNode.cpp +++ b/mlir/lib/Reducer/ReductionNode.cpp @@ -61,7 +61,7 @@ ArrayRef ReductionNode::generateNewVariants() { // If we haven't created new variant, then we can create varients by removing // each of them respectively. For example, given {{1, 3}, {4, 9}}, we can // produce variants with range {{1, 3}} and {{4, 9}}. - if (variants.size() == 0 && getRanges().size() > 1) { + if (variants.empty() && getRanges().size() > 1) { for (const Range &range : getRanges()) { std::vector subRanges = getRanges(); llvm::erase_value(subRanges, range); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 581065f7cd0a..7a3eca3e32d1 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -591,7 +591,7 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, // No sections within omp.sections operation - skip generation. This situation // is only possible if there is only a terminator operation inside the // sections operation - if (sectionCBs.size() == 0) + if (sectionCBs.empty()) return success(); assert(isa(*sectionsOp.region().op_begin())); diff --git a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp index e28b0c38f0ff..edb2fb39b179 100644 --- a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp +++ b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp @@ -59,7 +59,7 @@ void TestAffineDataCopy::runOnFunction() { // Gather all AffineForOps by loop depth. std::vector> depthToLoops; gatherLoops(getFunction(), depthToLoops); - assert(depthToLoops.size() && "Loop nest not found"); + assert(!depthToLoops.empty() && "Loop nest not found"); // Only support tests with a single loop nest and a single innermost loop // for now. diff --git a/mlir/tools/mlir-tblgen/DirectiveCommonGen.cpp b/mlir/tools/mlir-tblgen/DirectiveCommonGen.cpp index 7cca598cc6ec..718f7070f3bc 100644 --- a/mlir/tools/mlir-tblgen/DirectiveCommonGen.cpp +++ b/mlir/tools/mlir-tblgen/DirectiveCommonGen.cpp @@ -42,18 +42,18 @@ using llvm::RecordKeeper; static bool emitDecls(const RecordKeeper &recordKeeper, raw_ostream &os) { const auto &directiveLanguages = recordKeeper.getAllDerivedDefinitions("DirectiveLanguage"); - assert(directiveLanguages.size() != 0 && "DirectiveLanguage missing."); + assert(!directiveLanguages.empty() && "DirectiveLanguage missing."); const auto &clauses = recordKeeper.getAllDerivedDefinitions("Clause"); for (const auto &r : clauses) { Clause c{r}; const auto &clauseVals = c.getClauseVals(); - if (clauseVals.size() <= 0) + if (clauseVals.empty()) continue; const auto enumName = c.getEnumName(); - assert(enumName.size() != 0 && "enumClauseValue field not set."); + assert(!enumName.empty() && "enumClauseValue field not set."); std::vector cvDefs; for (const auto &cv : clauseVals) { From 6786d7e4f5b14c4913d17410042fa226fad7187b Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 01:58:56 +0000 Subject: [PATCH 335/992] Apply clang-tidy fixes for readability-simplify-boolean-expr to MLIR (NFC) Reviewed By: rriddle, Mogball Differential Revision: https://reviews.llvm.org/D116253 --- mlir/lib/Analysis/Utils.cpp | 2 +- mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp | 6 +----- mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp | 4 +--- .../ComprehensiveBufferize.cpp | 5 +---- .../ComprehensiveBufferize/LinalgInterfaceImpl.cpp | 10 ++++------ mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp | 13 +++++-------- mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp | 5 +---- .../StandardOps/Transforms/FuncConversions.cpp | 5 +---- mlir/lib/Dialect/Vector/VectorUtils.cpp | 6 +----- mlir/lib/Transforms/NormalizeMemRefs.cpp | 12 +++++------- mlir/lib/Transforms/Utils/LoopFusionUtils.cpp | 3 +-- mlir/lib/Transforms/Utils/LoopUtils.cpp | 4 +--- mlir/test/lib/Dialect/Tosa/TosaTestPasses.cpp | 2 +- .../lib/Dialect/Vector/TestVectorTransforms.cpp | 4 +--- 14 files changed, 25 insertions(+), 56 deletions(-) diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp index 432aeecde2db..de78f1dfdf48 100644 --- a/mlir/lib/Analysis/Utils.cpp +++ b/mlir/lib/Analysis/Utils.cpp @@ -564,7 +564,7 @@ LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth, for (auto id : ids) { AffineForOp iv; if ((iv = getForInductionVarOwner(id)) && - llvm::is_contained(enclosingIVs, iv) == false) { + !llvm::is_contained(enclosingIVs, iv)) { cst.projectOut(id); } } diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index 9ddad2af4dc6..38c8276f2843 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -1014,11 +1014,7 @@ static bool createReassociationMapsForCollapse( // If both iterators didn't reach the end, we have leftover dimentions which // implies that we have a mismatch in shape. - if (currSrcDim != srcShape.size() || currDstDim != dstShape.size()) { - return false; - } - - return true; + return !(currSrcDim != srcShape.size() || currDstDim != dstShape.size()); } namespace { diff --git a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp index 985cfc9b91e2..30bad881a319 100644 --- a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp +++ b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp @@ -84,9 +84,7 @@ static bool transferReadSupportsMMAMatrixType(vector::TransferReadOp readOp) { readOp.getContext()); // TODO: Support transpose once it is added to GPU dialect ops. // For now we only support (d0, d1) -> (d0, d1) and (d0, d1) -> (0, d1). - if (!map.isMinorIdentity() && map != broadcastInnerDim) - return false; - return true; + return !(!map.isMinorIdentity() && map != broadcastInnerDim); } // Return true if the transfer op can be converted to a MMA matrix store. diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp index babbec5493ae..485fb735b3ee 100644 --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp @@ -472,10 +472,7 @@ wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand, OpResult opResult, bool hasWrite = aliasesInPlaceWrite(opResult, aliasInfo, state) || aliasesInPlaceWrite(opOperand.get(), aliasInfo, state) || state.bufferizesToMemoryWrite(opOperand); - if (!hasWrite) - return false; - - return true; + return hasWrite; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp index 158ad6a76343..190f0fea5108 100644 --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp @@ -425,12 +425,10 @@ LogicalResult mlir::linalg::comprehensive_bufferize::linalg_ext:: // TODO: Support cases such as extract_slice(init_tensor). SmallVector opOperands = state.getAliasingOpOperand(opResult); - if (!llvm::all_of(opOperands, [&](OpOperand *operand) { - return aliasInfo.areEquivalentBufferizedValues(operand->get(), - opResult); - })) - return true; - return false; + return !llvm::all_of(opOperands, [&](OpOperand *operand) { + return aliasInfo.areEquivalentBufferizedValues(operand->get(), + opResult); + }); }); // Replace only if the reverse use-def chain ends at exactly one diff --git a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp index 8812dd4dff76..aa8a3b9f4771 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp @@ -543,14 +543,11 @@ struct LinalgDetensorize : public LinalgDetensorizeBase { if (op->hasTrait()) { auto &body = function_like_impl::getFunctionBody(op); return llvm::all_of(llvm::drop_begin(body, 1), [&](Block &block) { - if (llvm::any_of( - blockArgsToDetensor, [&](BlockArgument blockArgument) { - return blockArgument.getOwner() == &block && - !typeConverter.isLegal(blockArgument.getType()); - })) { - return false; - } - return true; + return !llvm::any_of( + blockArgsToDetensor, [&](BlockArgument blockArgument) { + return blockArgument.getOwner() == &block && + !typeConverter.isLegal(blockArgument.getType()); + }); }); } diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp index 45ba726d5bf9..d2f989b561e4 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -428,10 +428,7 @@ bool CastOp::areCastCompatible(TypeRange inputs, TypeRange outputs) { auto aMemSpace = (aT) ? aT.getMemorySpace() : uaT.getMemorySpace(); auto bMemSpace = (bT) ? bT.getMemorySpace() : ubT.getMemorySpace(); - if (aMemSpace != bMemSpace) - return false; - - return true; + return aMemSpace == bMemSpace; } return false; diff --git a/mlir/lib/Dialect/StandardOps/Transforms/FuncConversions.cpp b/mlir/lib/Dialect/StandardOps/Transforms/FuncConversions.cpp index 2cd22ef9aa2f..0f07e547a7a3 100644 --- a/mlir/lib/Dialect/StandardOps/Transforms/FuncConversions.cpp +++ b/mlir/lib/Dialect/StandardOps/Transforms/FuncConversions.cpp @@ -146,10 +146,7 @@ bool mlir::isLegalForReturnOpTypeConversionPattern(Operation *op, // ReturnLike operations have to be legalized with their parent. For // return this is handled, for other ops they remain as is. - if (op->hasTrait()) - return true; - - return false; + return op->hasTrait(); } bool mlir::isNotBranchOpInterfaceOrReturnLikeOp(Operation *op) { diff --git a/mlir/lib/Dialect/Vector/VectorUtils.cpp b/mlir/lib/Dialect/Vector/VectorUtils.cpp index e799c96fd6b0..a6a0a25dcd89 100644 --- a/mlir/lib/Dialect/Vector/VectorUtils.cpp +++ b/mlir/lib/Dialect/Vector/VectorUtils.cpp @@ -302,11 +302,7 @@ bool matcher::operatesOnSuperVectorsOf(Operation &op, // This could be useful information if we wanted to reshape at the level of // the vector type (but we would have to look at the compute and distinguish // between parallel, reduction and possibly other cases. - if (!ratio.hasValue()) { - return false; - } - - return true; + return ratio.hasValue(); } bool mlir::isDisjointTransferIndices(VectorTransferOpInterface transferA, diff --git a/mlir/lib/Transforms/NormalizeMemRefs.cpp b/mlir/lib/Transforms/NormalizeMemRefs.cpp index d830b607904e..477d0b6ec304 100644 --- a/mlir/lib/Transforms/NormalizeMemRefs.cpp +++ b/mlir/lib/Transforms/NormalizeMemRefs.cpp @@ -92,13 +92,11 @@ void NormalizeMemRefs::runOnOperation() { /// are satisfied will the value become a candidate for replacement. /// TODO: Extend this for DimOps. static bool isMemRefNormalizable(Value::user_range opUsers) { - if (llvm::any_of(opUsers, [](Operation *op) { - if (op->hasTrait()) - return false; - return true; - })) - return false; - return true; + return !llvm::any_of(opUsers, [](Operation *op) { + if (op->hasTrait()) + return false; + return true; + }); } /// Set all the calling functions and the callees of the function as not diff --git a/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp b/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp index dd97de1eebf1..a4043b20ec50 100644 --- a/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp @@ -54,8 +54,7 @@ static void getLoadAndStoreMemRefAccesses(Operation *opA, static bool isDependentLoadOrStoreOp(Operation *op, DenseMap &values) { if (auto loadOp = dyn_cast(op)) { - return values.count(loadOp.getMemRef()) > 0 && - values[loadOp.getMemRef()] == true; + return values.count(loadOp.getMemRef()) > 0 && values[loadOp.getMemRef()]; } if (auto storeOp = dyn_cast(op)) { return values.count(storeOp.getMemRef()) > 0; diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp index 9a81f1d5d076..1700d60a9173 100644 --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -1345,9 +1345,7 @@ static bool areInnerBoundsInvariant(AffineForOp forOp) { } return WalkResult::advance(); }); - if (walkResult.wasInterrupted()) - return false; - return true; + return !walkResult.wasInterrupted(); } // Gathers all maximal sub-blocks of operations that do not themselves diff --git a/mlir/test/lib/Dialect/Tosa/TosaTestPasses.cpp b/mlir/test/lib/Dialect/Tosa/TosaTestPasses.cpp index afff0faf0c2f..d753378559a3 100644 --- a/mlir/test/lib/Dialect/Tosa/TosaTestPasses.cpp +++ b/mlir/test/lib/Dialect/Tosa/TosaTestPasses.cpp @@ -71,7 +71,7 @@ ConvertTosaNegateOp::matchAndRewrite(Operation *op, double typeRangeMax = double(outputElementType.getStorageTypeMax() - outputElementType.getZeroPoint()) * outputElementType.getScale(); - bool narrowRange = outputElementType.getStorageTypeMin() == 1 ? true : false; + bool narrowRange = outputElementType.getStorageTypeMin() == 1; auto dstQConstType = RankedTensorType::get( outputType.getShape(), diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp index 0d054720a61b..6fc3d4a22fcb 100644 --- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp +++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp @@ -391,9 +391,7 @@ struct TestVectorToLoopPatterns type.getNumElements() % multiplicity != 0) return mlir::WalkResult::advance(); auto filterAlloc = [](Operation *op) { - if (isa(op)) - return false; - return true; + return !isa(op); }; auto dependentOps = getSlice(op, filterAlloc); // Create a loop and move instructions from the Op slice into the loop. From 4602f4169a21e75b82261ba1599046b157d1d021 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 1 Jan 2022 19:53:52 -0800 Subject: [PATCH 336/992] [RISCV] Prune unnecessary vector pseudo instructions. NFC For .vf instructions, we don't need MF8 pseudos for f16. We don't need MF8 or MF4 pseudos for f32. Or MF8, MF4, MF2 for f64. Reviewed By: khchen Differential Revision: https://reviews.llvm.org/D116437 --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 6 -- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 48 +++++++++----- .../Target/RISCV/RISCVInstrInfoVPseudos.td | 63 ++++++++++--------- 3 files changed, 65 insertions(+), 52 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 15a75ba411c0..d39e0805a79c 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -473,21 +473,15 @@ static bool isScalarMoveInstr(const MachineInstr &MI) { case RISCV::PseudoVFMV_S_F16_M8: case RISCV::PseudoVFMV_S_F16_MF2: case RISCV::PseudoVFMV_S_F16_MF4: - case RISCV::PseudoVFMV_S_F16_MF8: case RISCV::PseudoVFMV_S_F32_M1: case RISCV::PseudoVFMV_S_F32_M2: case RISCV::PseudoVFMV_S_F32_M4: case RISCV::PseudoVFMV_S_F32_M8: case RISCV::PseudoVFMV_S_F32_MF2: - case RISCV::PseudoVFMV_S_F32_MF4: - case RISCV::PseudoVFMV_S_F32_MF8: case RISCV::PseudoVFMV_S_F64_M1: case RISCV::PseudoVFMV_S_F64_M2: case RISCV::PseudoVFMV_S_F64_M4: case RISCV::PseudoVFMV_S_F64_M8: - case RISCV::PseudoVFMV_S_F64_MF2: - case RISCV::PseudoVFMV_S_F64_MF4: - case RISCV::PseudoVFMV_S_F64_MF8: return true; } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index dee1ce635c73..9dc2bcb36322 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1400,19 +1400,28 @@ MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall( #define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL) \ RISCV::PseudoV##OP##_##TYPE##_##LMUL -#define CASE_VFMA_OPCODE_LMULS(OP, TYPE) \ - CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF8): \ - case CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4): \ - case CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2): \ - case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1): \ +#define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE) \ + CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1): \ case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2): \ case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4): \ case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8) +#define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE) \ + CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2): \ + case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE) + +#define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE) \ + CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4): \ + case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE) + +#define CASE_VFMA_OPCODE_LMULS(OP, TYPE) \ + CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF8): \ + case CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE) + #define CASE_VFMA_SPLATS(OP) \ - CASE_VFMA_OPCODE_LMULS(OP, VF16): \ - case CASE_VFMA_OPCODE_LMULS(OP, VF32): \ - case CASE_VFMA_OPCODE_LMULS(OP, VF64) + CASE_VFMA_OPCODE_LMULS_MF4(OP, VF16): \ + case CASE_VFMA_OPCODE_LMULS_MF2(OP, VF32): \ + case CASE_VFMA_OPCODE_LMULS_M1(OP, VF64) // clang-format on bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, @@ -1534,19 +1543,28 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \ break; -#define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \ - CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \ - CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \ - CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \ +#define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) \ CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \ CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \ CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \ CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8) +#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) \ + CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \ + CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) + +#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) \ + CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \ + CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) + +#define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \ + CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \ + CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) + #define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \ - CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, VF16) \ - CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, VF32) \ - CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, VF64) + CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VF16) \ + CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VF32) \ + CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VF64) MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index dca3f5318812..40ab0bb20402 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -85,27 +85,28 @@ defvar MxListVF4 = [V_MF2, V_M1, V_M2, V_M4, V_M8]; // Use for zext/sext.vf8 defvar MxListVF8 = [V_M1, V_M2, V_M4, V_M8]; -class FPR_Info { +class MxSet { + list m = !cond(!eq(eew, 8) : [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8], + !eq(eew, 16) : [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8], + !eq(eew, 32) : [V_MF2, V_M1, V_M2, V_M4, V_M8], + !eq(eew, 64) : [V_M1, V_M2, V_M4, V_M8]); +} + +class FPR_Info mxlist> { RegisterClass fprclass = regclass; string FX = fx; + list MxList = mxlist; } -def SCALAR_F16 : FPR_Info; -def SCALAR_F32 : FPR_Info; -def SCALAR_F64 : FPR_Info; +def SCALAR_F16 : FPR_Info.m>; +def SCALAR_F32 : FPR_Info.m>; +def SCALAR_F64 : FPR_Info.m>; defvar FPList = [SCALAR_F16, SCALAR_F32, SCALAR_F64]; // Used for widening instructions. It excludes F64. defvar FPListW = [SCALAR_F16, SCALAR_F32]; -class MxSet { - list m = !cond(!eq(eew, 8) : [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8], - !eq(eew, 16) : [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8], - !eq(eew, 32) : [V_MF2, V_M1, V_M2, V_M4, V_M8], - !eq(eew, 64) : [V_M1, V_M2, V_M4, V_M8]); -} - class NFSet { list L = !cond(!eq(m.value, V_M8.value): [], !eq(m.value, V_M4.value): [2], @@ -1619,15 +1620,15 @@ multiclass VPseudoVSLD1_VX { } multiclass VPseudoBinaryV_VF { - foreach m = MxList in - foreach f = FPList in + foreach f = FPList in + foreach m = f.MxList in defm "_V" # f.FX : VPseudoBinary; } multiclass VPseudoVSLD1_VF { - foreach m = MxList in - foreach f = FPList in + foreach f = FPList in + foreach m = f.MxList in defm "_V" # f.FX : VPseudoBinary, Sched<[WriteVFSlide1F, ReadVFSlideV, ReadVFSlideF, ReadVMask]>; @@ -1666,8 +1667,8 @@ multiclass VPseudoBinaryW_VX { } multiclass VPseudoBinaryW_VF { - foreach m = MxListW in - foreach f = FPListW in + foreach f = FPListW in + foreach m = f.MxList in defm "_V" # f.FX : VPseudoBinary; @@ -1688,8 +1689,8 @@ multiclass VPseudoBinaryW_WX { } multiclass VPseudoBinaryW_WF { - foreach m = MxListW in - foreach f = FPListW in + foreach f = FPListW in + foreach m = f.MxList in defm "_W" # f.FX : VPseudoBinary; } @@ -1741,8 +1742,8 @@ multiclass VPseudoBinaryV_XM.R, m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">, @@ -1773,8 +1774,8 @@ multiclass VPseudoUnaryVMV_V_X_I { } multiclass VPseudoVMV_F { - foreach m = MxList in { - foreach f = FPList in { + foreach f = FPList in { + foreach m = f.MxList in { let VLMul = m.value in { def "_" # f.FX # "_" # m.MX : VPseudoUnaryNoDummyMask, @@ -1884,8 +1885,8 @@ multiclass VPseudoBinaryM_VX { } multiclass VPseudoBinaryM_VF { - foreach m = MxList in - foreach f = FPList in + foreach f = FPList in + foreach m = f.MxList in defm "_V" # f.FX : VPseudoBinaryM; @@ -2209,8 +2210,8 @@ multiclass VPseudoTernaryV_VX_AAXA { } multiclass VPseudoTernaryV_VF_AAXA { - foreach m = MxList in - foreach f = FPList in + foreach f = FPList in + foreach m = f.MxList in defm "_V" # f.FX : VPseudoTernaryWithPolicy; @@ -2232,8 +2233,8 @@ multiclass VPseudoTernaryW_VX { multiclass VPseudoTernaryW_VF { defvar constraint = "@earlyclobber $rd"; - foreach m = MxListW in - foreach f = FPListW in + foreach f = FPListW in + foreach m = f.MxList in defm "_V" # f.FX : VPseudoTernaryWithPolicy; } @@ -4362,8 +4363,8 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { let Predicates = [HasVInstructionsAnyF] in { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - foreach m = MxList in { - foreach f = FPList in { + foreach f = FPList in { + foreach m = f.MxList in { let VLMul = m.value in { let HasSEWOp = 1, BaseInstr = VFMV_F_S in def "PseudoVFMV_" # f.FX # "_S_" # m.MX : From fc2b09a744dca3c995a6cc82482f135e1e4e41ef Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jan 2022 22:05:14 -0800 Subject: [PATCH 337/992] [ADT] Remove ImmutableSet::foreach and ImmutableMap::foreach (NFC) These functions seem to be unused for at least 1 year. --- llvm/include/llvm/ADT/ImmutableMap.h | 37 ------------------------- llvm/include/llvm/ADT/ImmutableSet.h | 20 ------------- llvm/unittests/ADT/ImmutableSetTest.cpp | 35 ----------------------- 3 files changed, 92 deletions(-) diff --git a/llvm/include/llvm/ADT/ImmutableMap.h b/llvm/include/llvm/ADT/ImmutableMap.h index cf6fb870897a..f0e898cafaf9 100644 --- a/llvm/include/llvm/ADT/ImmutableMap.h +++ b/llvm/include/llvm/ADT/ImmutableMap.h @@ -140,44 +140,7 @@ class ImmutableMap { bool isEmpty() const { return !Root; } - //===--------------------------------------------------===// - // Foreach - A limited form of map iteration. - //===--------------------------------------------------===// - -private: - template - struct CBWrapper { - Callback C; - - void operator()(value_type_ref V) { C(V.first,V.second); } - }; - - template - struct CBWrapperRef { - Callback &C; - - CBWrapperRef(Callback& c) : C(c) {} - - void operator()(value_type_ref V) { C(V.first,V.second); } - }; - public: - template - void foreach(Callback& C) { - if (Root) { - CBWrapperRef CB(C); - Root->foreach(CB); - } - } - - template - void foreach() { - if (Root) { - CBWrapper CB; - Root->foreach(CB); - } - } - //===--------------------------------------------------===// // For testing. //===--------------------------------------------------===// diff --git a/llvm/include/llvm/ADT/ImmutableSet.h b/llvm/include/llvm/ADT/ImmutableSet.h index 48b253d3b75e..8cef5acbafaa 100644 --- a/llvm/include/llvm/ADT/ImmutableSet.h +++ b/llvm/include/llvm/ADT/ImmutableSet.h @@ -169,20 +169,6 @@ class ImutAVLTree { /// is logarithmic in the size of the tree. bool contains(key_type_ref K) { return (bool) find(K); } - /// foreach - A member template the accepts invokes operator() on a functor - /// object (specified by Callback) for every node/subtree in the tree. - /// Nodes are visited using an inorder traversal. - template - void foreach(Callback& C) { - if (ImutAVLTree* L = getLeft()) - L->foreach(C); - - C(value); - - if (ImutAVLTree* R = getRight()) - R->foreach(C); - } - /// validateTree - A utility method that checks that the balancing and /// ordering invariants of the tree are satisfied. It is a recursive /// method that returns the height of the tree, which is then consumed @@ -1063,12 +1049,6 @@ class ImmutableSet { /// This method runs in constant time. bool isSingleton() const { return getHeight() == 1; } - template - void foreach(Callback& C) { if (Root) Root->foreach(C); } - - template - void foreach() { if (Root) { Callback C; Root->foreach(C); } } - //===--------------------------------------------------===// // Iterators. //===--------------------------------------------------===// diff --git a/llvm/unittests/ADT/ImmutableSetTest.cpp b/llvm/unittests/ADT/ImmutableSetTest.cpp index e23cd2b3d1a8..c0bde4c4d680 100644 --- a/llvm/unittests/ADT/ImmutableSetTest.cpp +++ b/llvm/unittests/ADT/ImmutableSetTest.cpp @@ -136,41 +136,6 @@ TEST_F(ImmutableSetTest, RemoveIntSetTest) { EXPECT_TRUE(S4.contains(5)); } -TEST_F(ImmutableSetTest, CallbackCharSetTest) { - ImmutableSet::Factory f; - ImmutableSet S = f.getEmptySet(); - - ImmutableSet S2 = f.add(f.add(f.add(S, 'a'), 'e'), 'i'); - ImmutableSet S3 = f.add(f.add(S2, 'o'), 'u'); - - S3.foreach(); - - ASSERT_STREQ("aeiou", buffer); -} - -TEST_F(ImmutableSetTest, Callback2CharSetTest) { - ImmutableSet::Factory f; - ImmutableSet S = f.getEmptySet(); - - ImmutableSet S2 = f.add(f.add(f.add(S, 'b'), 'c'), 'd'); - ImmutableSet S3 = f.add(f.add(f.add(S2, 'f'), 'g'), 'h'); - - MyIter obj; - S3.foreach(obj); - ASSERT_STREQ("bcdfgh", buffer); - ASSERT_EQ(6, obj.counter); - - MyIter obj2; - S2.foreach(obj2); - ASSERT_STREQ("bcd", buffer); - ASSERT_EQ(3, obj2.counter); - - MyIter obj3; - S.foreach(obj); - ASSERT_STREQ("", buffer); - ASSERT_EQ(0, obj3.counter); -} - TEST_F(ImmutableSetTest, IterLongSetTest) { ImmutableSet::Factory f; ImmutableSet S = f.getEmptySet(); From 3f7fd50c27c0fe744069ae5b0005a72970b860a6 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jan 2022 22:05:16 -0800 Subject: [PATCH 338/992] [AArch64] Remove unused constant NeonBitsPerVector (NFC) --- llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index caee2acd2606..9409c329943b 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -757,7 +757,6 @@ namespace AArch64 { // vector (such as index 1) are undefined. static constexpr unsigned SVEBitsPerBlock = 128; static constexpr unsigned SVEMaxBitsPerVector = 2048; -const unsigned NeonBitsPerVector = 128; } // end namespace AArch64 } // end namespace llvm From 7b53fd1cff9a5e04c41fb5426ed74f9247994374 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jan 2022 22:50:24 -0800 Subject: [PATCH 339/992] [CodeGen] Remove DisconnectedComponentsRenamed (NFC) The last use was removed on May 31, 2016 in commit f9acacaa928d7ba9db900c42893c244fb19714c4. --- llvm/include/llvm/CodeGen/MachineScheduler.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h index e368fd7d056a..267c4b595eec 100644 --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -425,10 +425,6 @@ class ScheduleDAGMILive : public ScheduleDAGMI { IntervalPressure BotPressure; RegPressureTracker BotRPTracker; - /// True if disconnected subregister components are already renamed. - /// The renaming is only done on demand if lane masks are tracked. - bool DisconnectedComponentsRenamed = false; - public: ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr S) From 3782624c126bc0cbf81f5251ef206ccf63cce7f2 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Jan 2022 22:50:26 -0800 Subject: [PATCH 340/992] [DebugInfo] Remove hasInterestingContent (NFC) hasInterestingContent was introduced without a use on Sep 15, 2015 in commit e5162dba49890d9d436ea99d003c792897c9b880. --- llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h | 6 ------ llvm/lib/DWARFLinker/DWARFLinker.cpp | 1 - llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp | 2 -- 3 files changed, 9 deletions(-) diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h index a6310bcb5df1..afba19ac7d42 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h +++ b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h @@ -148,9 +148,6 @@ class CompileUnit { return LocationAttributes; } - void setHasInterestingContent() { HasInterestingContent = true; } - bool hasInterestingContent() { return HasInterestingContent; } - /// Mark every DIE in this unit as kept. This function also /// marks variables as InDebugMap so that they appear in the /// reconstructed accelerator tables. @@ -298,9 +295,6 @@ class CompileUnit { /// Is this unit subject to the ODR rule? bool HasODR; - /// Did a DIE actually contain a valid reloc? - bool HasInterestingContent; - /// The DW_AT_language of this unit. uint16_t Language = 0; diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp index ae0859e1ecfd..4cc146e086f9 100644 --- a/llvm/lib/DWARFLinker/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp @@ -2107,7 +2107,6 @@ Error DWARFLinker::loadClangModule( // Add this module. Unit = std::make_unique(*CU, UnitID++, !Options.NoODR, ModuleName); - Unit->setHasInterestingContent(); analyzeContextInfo(CUDie, 0, *Unit, &ODRContexts.getRoot(), ODRContexts, ModulesEndOffset, Options.ParseableSwiftInterfaces, [&](const Twine &Warning, const DWARFDie &DIE) { diff --git a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp b/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp index 925ab3d295c2..acecb1788d10 100644 --- a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp +++ b/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp @@ -40,8 +40,6 @@ StringRef CompileUnit::getSysRoot() { void CompileUnit::markEverythingAsKept() { unsigned Idx = 0; - setHasInterestingContent(); - for (auto &I : Info) { // Mark everything that wasn't explicit marked for pruning. I.Keep = !I.Prune; From 1b708b67f625507cf2c24319b30479cc2af41c86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20B=C3=B6ck?= Date: Sun, 2 Jan 2022 00:59:23 +0100 Subject: [PATCH 341/992] [lld][MinGW] Ignore `--[no-]as-neeed` flags in MinGW driver These flags are specific to ELF, but are still accepted by GNU ld, even if it does not do anything. This patch adds them as ignored option for the sake of compatibility. Part of https://github.com/llvm/llvm-project/issues/52947 Differential Revision: https://reviews.llvm.org/D116484 --- lld/MinGW/Options.td | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lld/MinGW/Options.td b/lld/MinGW/Options.td index 50ac71bced85..c38b0710d39b 100644 --- a/lld/MinGW/Options.td +++ b/lld/MinGW/Options.td @@ -146,6 +146,7 @@ def alias_undefined_u: JoinedOrSeparate<["-"], "u">, Alias; // Ignored options def: Joined<["-"], "O">; +def: F<"as-needed">; def: F<"build-id">; def: F<"disable-auto-image-base">; def: F<"enable-auto-image-base">; @@ -153,6 +154,7 @@ def: F<"end-group">; def: Flag<["--"], "full-shutdown">; defm: EqNoHelp<"major-image-version">; defm: EqNoHelp<"minor-image-version">; +def: F<"no-as-needed">; def: F<"no-undefined">; def: F<"pic-executable">; defm: EqNoHelp<"plugin">; From dbeeb136abcb03eaa85e2ee47a5169f5298e8944 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20B=C3=B6ck?= Date: Sun, 2 Jan 2022 12:06:54 +0100 Subject: [PATCH 342/992] [clang][MinGW] Explicitly ignore `-fPIC` & friends GCC on Windows ignores this flag completely [0] which some build systems sadly rely on when compiling for Windows using MinGW. The current behaviour of clang however is to error out as -fPIC & friends has no effect on Windows. This patch instead changes the behaviour for MinGW to ignore the option for the sake of compatibility Fixes https://github.com/llvm/llvm-project/issues/52947 [0] https://gcc.gnu.org/legacy-ml/gcc-patches/2015-08/msg00836.html Differential Revision: https://reviews.llvm.org/D116485 --- clang/lib/Driver/ToolChains/CommonArgs.cpp | 7 +++---- clang/lib/Driver/ToolChains/MinGW.cpp | 5 +---- clang/test/Driver/pic.c | 10 ++++++++++ clang/test/Driver/windows-pic.cpp | 18 ------------------ 4 files changed, 14 insertions(+), 26 deletions(-) diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index ad50c66cb6c1..f25fe9ba34c4 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1186,10 +1186,9 @@ tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) { options::OPT_fpic, options::OPT_fno_pic, options::OPT_fPIE, options::OPT_fno_PIE, options::OPT_fpie, options::OPT_fno_pie); - if (Triple.isOSWindows() && LastPICArg && - LastPICArg == - Args.getLastArg(options::OPT_fPIC, options::OPT_fpic, - options::OPT_fPIE, options::OPT_fpie)) { + if (Triple.isOSWindows() && !Triple.isOSCygMing() && LastPICArg && + LastPICArg == Args.getLastArg(options::OPT_fPIC, options::OPT_fpic, + options::OPT_fPIE, options::OPT_fpie)) { ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target) << LastPICArg->getSpelling() << Triple.str(); if (Triple.getArch() == llvm::Triple::x86_64) diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp index ecce2f062bd7..6d8bfc358dd3 100644 --- a/clang/lib/Driver/ToolChains/MinGW.cpp +++ b/clang/lib/Driver/ToolChains/MinGW.cpp @@ -486,10 +486,7 @@ bool toolchains::MinGW::isPIEDefault(const llvm::opt::ArgList &Args) const { return false; } -bool toolchains::MinGW::isPICDefaultForced() const { - return getArch() == llvm::Triple::x86_64 || - getArch() == llvm::Triple::aarch64; -} +bool toolchains::MinGW::isPICDefaultForced() const { return true; } llvm::ExceptionHandling toolchains::MinGW::GetExceptionModel(const ArgList &Args) const { diff --git a/clang/test/Driver/pic.c b/clang/test/Driver/pic.c index acb0bad022f4..2124bf1b277b 100644 --- a/clang/test/Driver/pic.c +++ b/clang/test/Driver/pic.c @@ -301,3 +301,13 @@ // RUN: | FileCheck %s --check-prefix=CHECK-PIC2 // RUN: %clang -c %s -target aarch64-windows-gnu -### 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-PIC2 +// +// On MinGW, allow specifying -fPIC & friends but ignore them +// RUN: %clang -fno-PIC -c %s -target x86_64-pc-windows-gnu -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-PIC2 +// RUN: %clang -fPIC -c %s -target i686-pc-windows-gnu -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NO-PIC +// RUN: %clang -fno-PIC -c %s -target aarch64-pc-windows-gnu -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-PIC2 +// RUN: %clang -fPIC -c %s -target armv7-pc-windows-gnu -### 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NO-PIC diff --git a/clang/test/Driver/windows-pic.cpp b/clang/test/Driver/windows-pic.cpp index 3b77a7cc5a33..00807d663dff 100644 --- a/clang/test/Driver/windows-pic.cpp +++ b/clang/test/Driver/windows-pic.cpp @@ -16,15 +16,6 @@ // RUN: %clang -### -target i686-windows-itanium -fPIE -fno-pie %s // RUN: %clang -### -target i686-windows-itanium -fpie -fno-pie %s -// RUN: %clang -### -target i686-windows-gnu -fPIC %s 2>&1 | FileCheck -check-prefix CHECK-PIC-ERROR %s -// RUN: %clang -### -target i686-windows-gnu -fpic %s 2>&1 | FileCheck -check-prefix CHECK-pic-ERROR %s -// RUN: %clang -### -target i686-windows-gnu -fPIE %s 2>&1 | FileCheck -check-prefix CHECK-PIE-ERROR %s -// RUN: %clang -### -target i686-windows-gnu -fpie %s 2>&1 | FileCheck -check-prefix CHECK-pie-ERROR %s -// RUN: %clang -### -target i686-windows-gnu -fPIC -fno-pic %s -// RUN: %clang -### -target i686-windows-gnu -Fpic -fno-pic %s -// RUN: %clang -### -target i686-windows-gnu -fPIE -fno-pie %s -// RUN: %clang -### -target i686-windows-gnu -fpie -fno-pie %s - // RUN: %clang -### -target x86_64-windows -fPIC %s 2>&1 | FileCheck -check-prefix CHECK-PIC-ERROR %s // RUN: %clang -### -target x86_64-windows -fpic %s 2>&1 | FileCheck -check-prefix CHECK-pic-ERROR %s // RUN: %clang -### -target x86_64-windows -fPIE %s 2>&1 | FileCheck -check-prefix CHECK-PIE-ERROR %s @@ -43,15 +34,6 @@ // RUN: %clang -### -target x86_64-windows-itanium -fPIE -fno-pie %s // RUN: %clang -### -target x86_64-windows-itanium -fpie -fno-pie %s -// RUN: %clang -### -target x86_64-windows-gnu -fPIC %s 2>&1 | FileCheck -check-prefix CHECK-PIC-ERROR %s -// RUN: %clang -### -target x86_64-windows-gnu -fpic %s 2>&1 | FileCheck -check-prefix CHECK-pic-ERROR %s -// RUN: %clang -### -target x86_64-windows-gnu -fPIE %s 2>&1 | FileCheck -check-prefix CHECK-PIE-ERROR %s -// RUN: %clang -### -target x86_64-windows-gnu -fpie %s 2>&1 | FileCheck -check-prefix CHECK-pie-ERROR %s -// RUN: %clang -### -target x86_64-windows-gnu -fPIC -fno-pic %s -// RUN: %clang -### -target x86_64-windows-gnu -Fpic -fno-pic %s -// RUN: %clang -### -target x86_64-windows-gnu -fPIE -fno-pie %s -// RUN: %clang -### -target x86_64-windows-gnu -fpie -fno-pie %s - // CHECK-PIC-ERROR: unsupported option '-fPIC' for target '{{.*}} // CHECK-pic-ERROR: unsupported option '-fpic' for target '{{.*}} // CHECK-PIE-ERROR: unsupported option '-fPIE' for target '{{.*}} From b1a333f0feb84d781a6820cef9aad2a722f26bd1 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 2 Jan 2022 12:24:13 +0000 Subject: [PATCH 343/992] [VPlan] Don't consider VPWidenCanonicalIVRecipe phi-like. VPWidenCanonicalIVRecipe does not create PHI instructions, so it does not need to be placed in the phi section of a VPBasicBlock. Also tidies the code so the WidenCanonicalIV recipe and the compare/lane-masks are created in the header. Discussed D113223. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D116473 --- .../Transforms/Vectorize/LoopVectorize.cpp | 19 +++++----- llvm/lib/Transforms/Vectorize/VPlanValue.h | 4 +-- .../x86-interleaved-accesses-masked-group.ll | 36 +++++++++---------- .../pr46525-expander-insertpoint.ll | 22 ++++++------ .../LoopVectorize/reduction-order.ll | 4 +-- .../LoopVectorize/select-reduction.ll | 2 +- .../vplan-sink-scalars-and-merge.ll | 6 ++-- 7 files changed, 46 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 4b588109bcda..e9db5042753f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8442,25 +8442,24 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { return BlockMaskCache[BB] = BlockMask; // Loop incoming mask is all-one. // Introduce the early-exit compare IV <= BTC to form header block mask. - // This is used instead of IV < TC because TC may wrap, unlike BTC. - // Start by constructing the desired canonical IV in the header block. + // This is used instead of IV < TC because TC may wrap, unlike BTC. Start by + // constructing the desired canonical IV in the header block as its first + // non-phi instructions. + assert(CM.foldTailByMasking() && "must fold the tail"); + VPBasicBlock *HeaderVPBB = Plan->getEntry()->getEntryBasicBlock(); + auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi(); + VPValue *IV = nullptr; if (Legal->getPrimaryInduction()) IV = Plan->getOrAddVPValue(Legal->getPrimaryInduction()); else { - VPBasicBlock *HeaderVPBB = Plan->getEntry()->getEntryBasicBlock(); auto *IVRecipe = new VPWidenCanonicalIVRecipe(); - HeaderVPBB->insert(IVRecipe, HeaderVPBB->getFirstNonPhi()); + HeaderVPBB->insert(IVRecipe, NewInsertionPoint); IV = IVRecipe; } - // Create the block in mask as the first non-phi instruction in the block. VPBuilder::InsertPointGuard Guard(Builder); - auto NewInsertionPoint = Builder.getInsertBlock()->getFirstNonPhi(); - Builder.setInsertPoint(Builder.getInsertBlock(), NewInsertionPoint); - - assert(CM.foldTailByMasking() && "must fold the tail"); - + Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint); if (CM.TTI.emitGetActiveLaneMask()) { VPValue *TC = Plan->getOrCreateTripCount(); BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV, TC}); diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index fd92201614df..3e36bc455324 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -96,6 +96,7 @@ class VPValue { VPVReplicateSC, VPVWidenSC, VPVWidenCallSC, + VPVWidenCanonicalIVSC, VPVWidenGEPSC, VPVWidenSelectSC, @@ -103,7 +104,6 @@ class VPValue { VPVBlendSC, VPVFirstOrderRecurrencePHISC, VPVWidenPHISC, - VPVWidenCanonicalIVSC, VPVWidenIntOrFpInductionSC, VPVPredInstPHI, VPVReductionPHISC, @@ -325,6 +325,7 @@ class VPDef { VPReductionSC, VPReplicateSC, VPWidenCallSC, + VPWidenCanonicalIVSC, VPWidenGEPSC, VPWidenMemoryInstructionSC, VPWidenSC, @@ -334,7 +335,6 @@ class VPDef { VPBlendSC, VPFirstOrderRecurrencePHISC, VPWidenPHISC, - VPWidenCanonicalIVSC, VPWidenIntOrFpInductionSC, VPPredInstPHISC, VPReductionPHISC, diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll index 8887acd0cd59..d91cd28c0bb4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll @@ -405,10 +405,10 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE16:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE16]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], -; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i1> [[TMP0]], <8 x i1> zeroinitializer +; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer ; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -516,11 +516,11 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl i32 [[INDEX]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[P:%.*]], i32 [[TMP2]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP1]], <8 x i1> [[TMP0]], <8 x i1> zeroinitializer +; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>* ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], @@ -612,10 +612,10 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE16:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE16]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nsw <8 x i32> [[VEC_IND]], -; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i1> [[TMP0]], <8 x i1> zeroinitializer +; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer ; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -723,11 +723,11 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul i32 [[INDEX]], 3 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[P:%.*]], i32 [[TMP2]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP1]], <8 x i1> [[TMP0]], <8 x i1> zeroinitializer +; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <24 x i8>* ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <24 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = and <24 x i1> [[INTERLEAVED_MASK]], @@ -2300,10 +2300,10 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE62:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE62]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], -; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i1> [[TMP0]], <8 x i1> zeroinitializer +; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer ; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -2635,11 +2635,11 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly ; ENABLED_MASKED_STRIDED: vector.body: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl i32 [[INDEX]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[P:%.*]], i32 [[TMP2]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP1]], <8 x i1> [[TMP0]], <8 x i1> zeroinitializer +; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>* ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison) diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll index 2ce88e6a409c..3fb5a9b39d3e 100644 --- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -39,20 +39,20 @@ define void @test(i16 %x, i64 %y, i32* %ptr) { ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[BROADCAST_SPLAT2]], [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 0, [[INC]] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], [[TMP4]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT3]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT4]], -; CHECK-NEXT: [[OFFSET_IDX5:%.*]] = mul i64 [[INDEX]], [[INC]] -; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[OFFSET_IDX5]] to i8 +; CHECK-NEXT: [[OFFSET_IDX3:%.*]] = mul i64 [[INDEX]], [[INC]] +; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[OFFSET_IDX3]] to i8 ; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[INC]] to i8 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x i8> poison, i8 [[TMP6]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT6]], <2 x i8> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <2 x i8> poison, i8 [[TMP7]], i32 0 -; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT8]], <2 x i8> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i8> , [[DOTSPLAT9]] -; CHECK-NEXT: [[INDUCTION10:%.*]] = add <2 x i8> [[BROADCAST_SPLAT7]], [[TMP8]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i8> poison, i8 [[TMP6]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT4]], <2 x i8> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <2 x i8> poison, i8 [[TMP7]], i32 0 +; CHECK-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT6]], <2 x i8> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i8> , [[DOTSPLAT7]] +; CHECK-NEXT: [[INDUCTION8:%.*]] = add <2 x i8> [[BROADCAST_SPLAT5]], [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = mul i8 0, [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = add i8 [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT9]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT10]], ; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0 ; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-order.ll b/llvm/test/Transforms/LoopVectorize/reduction-order.ll index 47ac8e424fd6..dac56d4b9fe1 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-order.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-order.ll @@ -7,9 +7,9 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 ; in deterministic order. ; CHECK-LABEL: @foo( ; CHECK: vector.body: -; CHECK: %[[VAR1:.*]] = add <4 x i32> , %vec.phi1 +; CHECK: icmp ule <4 x i64> +; CHECK-NEXT: %[[VAR1:.*]] = add <4 x i32> , %vec.phi1 ; CHECK-NEXT: %[[VAR2:.*]] = add <4 x i32> %vec.phi, -; CHECK-NEXT: icmp ule <4 x i64> ; CHECK-NEXT: select <4 x i1> {{.*}}, <4 x i32> %[[VAR2]], <4 x i32> ; CHECK-NEXT: select <4 x i1> {{.*}}, <4 x i32> %[[VAR1]], <4 x i32> ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll index e487b3a34f78..70920bd2a986 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -33,9 +33,9 @@ define i32 @test(i64 %N, i32 %x) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT3]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT4]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], ; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 059ea6fd6bdb..ead5fc1738bd 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -312,12 +312,12 @@ define void @pred_cfg1(i32 %k, i32 %j) { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%c.1> = icmp ir<%iv>, ir<%j> ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> ; CHECK-NEXT: Successor(s): then.0 ; CHECK-EMPTY: ; CHECK-NEXT: then.0: -; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = select vp<[[MASK1]]> ir<%c.1> ir ; CHECK-NEXT: Successor(s): pred.load ; CHECK-EMPTY: @@ -408,13 +408,13 @@ define void @pred_cfg2(i32 %k, i32 %j) { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> ; CHECK-NEXT: WIDEN ir<%c.0> = icmp ir<%iv>, ir<%j> ; CHECK-NEXT: WIDEN ir<%c.1> = icmp ir<%iv>, ir<%j> ; CHECK-NEXT: Successor(s): then.0 ; CHECK-EMPTY: ; CHECK-NEXT: then.0: -; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = select vp<[[MASK1]]> ir<%c.0> ir ; CHECK-NEXT: Successor(s): pred.load ; CHECK-EMPTY: @@ -519,12 +519,12 @@ define void @pred_cfg3(i32 %k, i32 %j) { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> ; CHECK-NEXT: WIDEN ir<%c.0> = icmp ir<%iv>, ir<%j> ; CHECK-NEXT: Successor(s): then.0 ; CHECK-EMPTY: ; CHECK-NEXT: then.0: -; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = select vp<[[MASK1:%.+]]> ir<%c.0> ir ; CHECK-NEXT: Successor(s): pred.load ; CHECK-EMPTY: From d4f09786e079361eba1ade1e351be8771d016f29 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Sat, 25 Dec 2021 13:29:58 -0500 Subject: [PATCH 344/992] [clang] More informative mixed namespace diagnostics First, let's check we get a TemplateDecl, before complaining about where it might have been found. Second, if it came from an unexpected place, show where that location is. Reviewed By: ChuanqiXu Differential Revision: https://reviews.llvm.org/D116164 --- clang/lib/Sema/SemaCoroutine.cpp | 36 +++++++++++-------- .../co_await-range-for-exp-namespace.cpp | 1 + .../SemaCXX/coreturn-eh-exp-namespace.cpp | 1 + clang/test/SemaCXX/coreturn-exp-namespace.cpp | 1 + ...e-final-suspend-noexcept-exp-namespace.cpp | 1 + .../SemaCXX/coroutine-mixed-exp-namespace.cpp | 4 ++- .../coroutine-mixed2-exp-namespace.cpp | 6 ++-- .../SemaCXX/coroutine-rvo-exp-namespace.cpp | 1 + .../SemaCXX/coroutine-seh-exp-namespace.cpp | 1 + ...raits-undefined-template-exp-namespace.cpp | 2 +- ...andled_exception-warning-exp-namespace.cpp | 1 + ...ndle-address-return-type-exp-namespace.cpp | 1 + .../test/SemaCXX/coroutines-exp-namespace.cpp | 1 + 13 files changed, 39 insertions(+), 18 deletions(-) diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index e89cecd08cca..3a6d9f0b9f26 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -1740,30 +1740,38 @@ ClassTemplateDecl *Sema::lookupCoroutineTraits(SourceLocation KwLoc, return nullptr; } - if (!InStd) { - // Found only in std::experimental. - Diag(KwLoc, diag::warn_deprecated_coroutine_namespace) - << "coroutine_traits"; - } else if (InExp) { - // Found in std and std::experimental. - Diag(KwLoc, - diag::err_mixed_use_std_and_experimental_namespace_for_coroutine); - Diag(KwLoc, diag::warn_deprecated_coroutine_namespace) - << "coroutine_traits"; - return nullptr; - } - // Prefer ::std to std::experimental. auto &Result = InStd ? ResStd : ResExp; CoroTraitsNamespaceCache = InStd ? StdSpace : ExpSpace; // coroutine_traits is required to be a class template. - if (!(StdCoroutineTraitsCache = Result.getAsSingle())) { + StdCoroutineTraitsCache = Result.getAsSingle(); + if (!StdCoroutineTraitsCache) { Result.suppressDiagnostics(); NamedDecl *Found = *Result.begin(); Diag(Found->getLocation(), diag::err_malformed_std_coroutine_traits); return nullptr; } + + if (InExp) { + // Found in std::experimental + Diag(KwLoc, diag::warn_deprecated_coroutine_namespace) + << "coroutine_traits"; + ResExp.suppressDiagnostics(); + auto *Found = *ResExp.begin(); + Diag(Found->getLocation(), diag::note_entity_declared_at) << Found; + + if (InStd) { + // Also found in std + Diag(KwLoc, + diag::err_mixed_use_std_and_experimental_namespace_for_coroutine); + Diag(StdCoroutineTraitsCache->getLocation(), + diag::note_entity_declared_at) + << StdCoroutineTraitsCache; + + return nullptr; + } + } } Namespace = CoroTraitsNamespaceCache; return StdCoroutineTraitsCache; diff --git a/clang/test/SemaCXX/co_await-range-for-exp-namespace.cpp b/clang/test/SemaCXX/co_await-range-for-exp-namespace.cpp index 75568505ab55..df6b8a4e86b3 100644 --- a/clang/test/SemaCXX/co_await-range-for-exp-namespace.cpp +++ b/clang/test/SemaCXX/co_await-range-for-exp-namespace.cpp @@ -53,6 +53,7 @@ MyForLoopArrayAwaiter g() { for co_await (auto i : arr) {} // expected-warning {{support for std::experimental::coroutine_traits will be removed}} // expected-error@-1 {{call to deleted member function 'await_transform'}} // expected-note@-2 {{'await_transform' implicitly required by 'co_await' here}} + // expected-note@Inputs/std-coroutine-exp-namespace.h:8 {{'coroutine_traits' declared here}} } struct ForLoopAwaiterBadBeginTransform { diff --git a/clang/test/SemaCXX/coreturn-eh-exp-namespace.cpp b/clang/test/SemaCXX/coreturn-eh-exp-namespace.cpp index 7d85c924f669..facdedf14d01 100644 --- a/clang/test/SemaCXX/coreturn-eh-exp-namespace.cpp +++ b/clang/test/SemaCXX/coreturn-eh-exp-namespace.cpp @@ -40,6 +40,7 @@ VoidTagReturnValue test() { object x = {}; try { co_return {}; // expected-warning {{support for std::experimental::coroutine_traits will be removed}} + // expected-note@Inputs/std-coroutine-exp-namespace.h:8 {{'coroutine_traits' declared here}} } catch (...) { throw; } diff --git a/clang/test/SemaCXX/coreturn-exp-namespace.cpp b/clang/test/SemaCXX/coreturn-exp-namespace.cpp index c4023c2a94fa..f45b030f322b 100644 --- a/clang/test/SemaCXX/coreturn-exp-namespace.cpp +++ b/clang/test/SemaCXX/coreturn-exp-namespace.cpp @@ -84,6 +84,7 @@ template struct std::experimental::coroutine_traits { using promise_type = promise_int; }; void test0() { co_await a; } // expected-warning {{support for std::experimental::coroutine_traits will be removed}} +// expected-note@Inputs/std-coroutine-exp-namespace.h:8 {{'coroutine_traits' declared here}} float test1() { co_await a; } int test2() { diff --git a/clang/test/SemaCXX/coroutine-final-suspend-noexcept-exp-namespace.cpp b/clang/test/SemaCXX/coroutine-final-suspend-noexcept-exp-namespace.cpp index 5e4e4802eb48..131bae0d294f 100644 --- a/clang/test/SemaCXX/coroutine-final-suspend-noexcept-exp-namespace.cpp +++ b/clang/test/SemaCXX/coroutine-final-suspend-noexcept-exp-namespace.cpp @@ -7,6 +7,7 @@ namespace std { namespace experimental { template struct coroutine_traits { using promise_type = typename Ret::promise_type; }; +// expected-note@-1{{declared here}} template struct coroutine_handle { diff --git a/clang/test/SemaCXX/coroutine-mixed-exp-namespace.cpp b/clang/test/SemaCXX/coroutine-mixed-exp-namespace.cpp index 8f6457355190..5c214ca732be 100644 --- a/clang/test/SemaCXX/coroutine-mixed-exp-namespace.cpp +++ b/clang/test/SemaCXX/coroutine-mixed-exp-namespace.cpp @@ -3,7 +3,7 @@ // RUN: %clang_cc1 -verify -std=c++20 -fsyntax-only %s #include "Inputs/std-coroutine-exp-namespace.h" -#include "Inputs/std-coroutine.h" +#include "Inputs/std-coroutine.h" // Second struct my_awaitable { bool await_ready() noexcept; @@ -25,4 +25,6 @@ struct std::coroutine_traits { using promise_type = promise_void; }; void test() { co_return; // expected-error {{mixed use of std and std::experimental namespaces for coroutine components}} // expected-warning@-1{{support for std::experimental::coroutine_traits will be removed}} + // expected-note@Inputs/std-coroutine-exp-namespace.h:8 {{'coroutine_traits' declared here}} + // expected-note@Inputs/std-coroutine.h:8 {{'coroutine_traits' declared here}} } diff --git a/clang/test/SemaCXX/coroutine-mixed2-exp-namespace.cpp b/clang/test/SemaCXX/coroutine-mixed2-exp-namespace.cpp index 67cb42afa90d..3d37b34c642e 100644 --- a/clang/test/SemaCXX/coroutine-mixed2-exp-namespace.cpp +++ b/clang/test/SemaCXX/coroutine-mixed2-exp-namespace.cpp @@ -1,10 +1,10 @@ // This file is to test the mixed use of `std::experimental::coroutine_traits` and `std::coroutine_traits` -// which is similar to coroutine-mixed-exp-namesapce. This file tests the relative order of +// which is similar to coroutine-mixed-exp-namespace. This file tests the relative order of // included header wouldn't affect the diagnostic messages. // RUN: %clang_cc1 -verify -std=c++20 -fsyntax-only %s +#include "Inputs/std-coroutine.h" // First #include "Inputs/std-coroutine-exp-namespace.h" -#include "Inputs/std-coroutine.h" struct my_awaitable { bool await_ready() noexcept; @@ -26,4 +26,6 @@ struct std::coroutine_traits { using promise_type = promise_void; }; void test() { co_return; // expected-error {{mixed use of std and std::experimental namespaces for coroutine components}} // expected-warning@-1{{support for std::experimental::coroutine_traits will be removed}} + // expected-note@Inputs/std-coroutine-exp-namespace.h:8 {{'coroutine_traits' declared here}} + // expected-note@Inputs/std-coroutine.h:8 {{'coroutine_traits' declared here}} } diff --git a/clang/test/SemaCXX/coroutine-rvo-exp-namespace.cpp b/clang/test/SemaCXX/coroutine-rvo-exp-namespace.cpp index f73ff3880c04..f8941f8ed2dc 100644 --- a/clang/test/SemaCXX/coroutine-rvo-exp-namespace.cpp +++ b/clang/test/SemaCXX/coroutine-rvo-exp-namespace.cpp @@ -30,6 +30,7 @@ struct traits_sfinae_base> { template struct coroutine_traits : public traits_sfinae_base {}; +// expected-note@-1{{declared here}} } // namespace std::experimental struct suspend_never { diff --git a/clang/test/SemaCXX/coroutine-seh-exp-namespace.cpp b/clang/test/SemaCXX/coroutine-seh-exp-namespace.cpp index 9384397687db..bfe421595c00 100644 --- a/clang/test/SemaCXX/coroutine-seh-exp-namespace.cpp +++ b/clang/test/SemaCXX/coroutine-seh-exp-namespace.cpp @@ -1,6 +1,7 @@ // RUN: %clang_cc1 -std=c++1z -fcoroutines-ts -verify %s -fcxx-exceptions -fexceptions -triple x86_64-windows-msvc -fms-extensions namespace std::experimental { template struct coroutine_traits; +// expected-note@-1{{declared here}} template struct coroutine_handle { coroutine_handle() = default; diff --git a/clang/test/SemaCXX/coroutine-traits-undefined-template-exp-namespace.cpp b/clang/test/SemaCXX/coroutine-traits-undefined-template-exp-namespace.cpp index 649249f814d4..c71023ad5af5 100644 --- a/clang/test/SemaCXX/coroutine-traits-undefined-template-exp-namespace.cpp +++ b/clang/test/SemaCXX/coroutine-traits-undefined-template-exp-namespace.cpp @@ -6,7 +6,7 @@ namespace std { namespace experimental { template -struct coroutine_traits { +struct coroutine_traits { // expected-note{{declared here}} struct promise_type {}; }; diff --git a/clang/test/SemaCXX/coroutine-unhandled_exception-warning-exp-namespace.cpp b/clang/test/SemaCXX/coroutine-unhandled_exception-warning-exp-namespace.cpp index 76d5ae87e365..1987eeaa90ae 100644 --- a/clang/test/SemaCXX/coroutine-unhandled_exception-warning-exp-namespace.cpp +++ b/clang/test/SemaCXX/coroutine-unhandled_exception-warning-exp-namespace.cpp @@ -33,6 +33,7 @@ struct std::experimental::coroutine_traits { using promise_type = pr #ifndef DISABLE_WARNING void test0() { // expected-warning {{'promise_void' is required to declare the member 'unhandled_exception()' when exceptions are enabled}} co_return; // expected-warning {{support for std::experimental::coroutine_traits will be removed}} + // expected-note@Inputs/std-coroutine-exp-namespace.h:8 {{'coroutine_traits' declared here}} } #else void test0() { // expected-no-diagnostics diff --git a/clang/test/SemaCXX/coroutine_handle-address-return-type-exp-namespace.cpp b/clang/test/SemaCXX/coroutine_handle-address-return-type-exp-namespace.cpp index c722495db390..f167d167746a 100644 --- a/clang/test/SemaCXX/coroutine_handle-address-return-type-exp-namespace.cpp +++ b/clang/test/SemaCXX/coroutine_handle-address-return-type-exp-namespace.cpp @@ -32,6 +32,7 @@ struct traits_sfinae_base> { template struct coroutine_traits : public traits_sfinae_base {}; +// expected-note@-1{{declared here}} } // namespace std::experimental struct suspend_never { diff --git a/clang/test/SemaCXX/coroutines-exp-namespace.cpp b/clang/test/SemaCXX/coroutines-exp-namespace.cpp index a5ad37e338d2..caa141367d87 100644 --- a/clang/test/SemaCXX/coroutines-exp-namespace.cpp +++ b/clang/test/SemaCXX/coroutines-exp-namespace.cpp @@ -45,6 +45,7 @@ struct traits_sfinae_base> { template struct coroutine_traits : public traits_sfinae_base {}; +// expected-note@-1{{declared here}} } // namespace experimental } // namespace std From 840fa887418800877404abf7d158198cffb4187a Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Sun, 2 Jan 2022 09:25:19 -0800 Subject: [PATCH 345/992] autogen unroll test for ease of futre update --- llvm/test/Transforms/LoopUnroll/pr31718.ll | 47 ++++++++++++++++++---- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/llvm/test/Transforms/LoopUnroll/pr31718.ll b/llvm/test/Transforms/LoopUnroll/pr31718.ll index a06e67ace740..15dca4845248 100644 --- a/llvm/test/Transforms/LoopUnroll/pr31718.ll +++ b/llvm/test/Transforms/LoopUnroll/pr31718.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-unroll -verify-loop-lcssa -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -5,15 +6,47 @@ target triple = "x86_64-unknown-linux-gnu" @b = external local_unnamed_addr global i32, align 4 -; CHECK-LABEL: @main -; CHECK: exit.loopexit: -; CHECK: {{.*}} = phi i32 [ %d.0, %h3 ] -; CHECK: br label %exit -; CHECK: exit.loopexit1: -; CHECK: {{.*}} = phi i32 [ %d.0, %h3.1 ] -; CHECK: br label %exit define void @main(i1 %c) local_unnamed_addr #0 { +; CHECK-LABEL: @main( +; CHECK-NEXT: ph1: +; CHECK-NEXT: br label [[H1:%.*]] +; CHECK: h1: +; CHECK-NEXT: [[D_0:%.*]] = phi i32 [ [[TMP0:%.*]], [[LATCH1:%.*]] ], [ undef, [[PH1:%.*]] ] +; CHECK-NEXT: br label [[PH2:%.*]] +; CHECK: ph2: +; CHECK-NEXT: br label [[H2:%.*]] +; CHECK: h2: +; CHECK-NEXT: br label [[H3:%.*]] +; CHECK: h3: +; CHECK-NEXT: br i1 [[C:%.*]], label [[LATCH3:%.*]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: latch3: +; CHECK-NEXT: br i1 false, label [[EXIT3:%.*]], label [[H3]] +; CHECK: exit3: +; CHECK-NEXT: br label [[LATCH2:%.*]] +; CHECK: latch2: +; CHECK-NEXT: br label [[H3_1:%.*]] +; CHECK: h3.1: +; CHECK-NEXT: br i1 [[C]], label [[LATCH3_1:%.*]], label [[EXIT_LOOPEXIT1:%.*]] +; CHECK: latch3.1: +; CHECK-NEXT: br i1 false, label [[EXIT3_1:%.*]], label [[H3_1]] +; CHECK: exit3.1: +; CHECK-NEXT: br label [[LATCH2_1:%.*]] +; CHECK: latch2.1: +; CHECK-NEXT: br i1 [[C]], label [[LATCH1]], label [[PH2]] +; CHECK: latch1: +; CHECK-NEXT: [[TMP0]] = load i32, i32* @b, align 4 +; CHECK-NEXT: br label [[H1]] +; CHECK: exit.loopexit: +; CHECK-NEXT: [[D_0_LCSSA_PH:%.*]] = phi i32 [ [[D_0]], [[H3]] ] +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit.loopexit1: +; CHECK-NEXT: [[D_0_LCSSA_PH2:%.*]] = phi i32 [ [[D_0]], [[H3_1]] ] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[D_0_LCSSA:%.*]] = phi i32 [ [[D_0_LCSSA_PH]], [[EXIT_LOOPEXIT]] ], [ [[D_0_LCSSA_PH2]], [[EXIT_LOOPEXIT1]] ] +; CHECK-NEXT: ret void +; ph1: br label %h1 From eda5bbfb9db4d0dc3861333f7b4adf135fe20af8 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Sun, 2 Jan 2022 12:49:55 -0500 Subject: [PATCH 346/992] [libc++] [test] Remove an erroneously copy-paste in the hypot() tests. NFC. Line 1140 is a duplicate of line 1119; it tests the two-argument version of std::hypot, whereas all the lines in this section are supposed to be testing the C++17 three-argument version. Remove the erroneous duplicated line. Split out of D116295. --- libcxx/test/std/numerics/c.math/cmath.pass.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/libcxx/test/std/numerics/c.math/cmath.pass.cpp b/libcxx/test/std/numerics/c.math/cmath.pass.cpp index 160959b1075c..3d739f52ec75 100644 --- a/libcxx/test/std/numerics/c.math/cmath.pass.cpp +++ b/libcxx/test/std/numerics/c.math/cmath.pass.cpp @@ -1137,7 +1137,6 @@ void test_hypot() static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); - static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); From f19a95bbed1605f3b7575063054eb9fa1d13b125 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Sun, 2 Jan 2022 09:49:45 -0800 Subject: [PATCH 347/992] [SCEV] Split computeExitLimitFromICmp into two versions [NFC] This is in advance of a following change which needs to the non-icmp API. --- llvm/include/llvm/Analysis/ScalarEvolution.h | 11 +++++++ llvm/lib/Analysis/ScalarEvolution.cpp | 31 +++++++++++++++----- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index df50611832ce..ac7e3a46a6ea 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1713,6 +1713,17 @@ class ScalarEvolution { bool IsSubExpr, bool AllowPredicates = false); + /// Variant of previous which takes the components representing an ICmp + /// as opposed to the ICmpInst itself. Note that the prior version can + /// return more precise results in some cases and is preferred when caller + /// has a materialized ICmp. + ExitLimit computeExitLimitFromICmp(const Loop *L, ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + bool ExitIfTrue, + bool IsSubExpr, + bool AllowPredicates = false); + + /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a switch with a single exiting case /// to ExitingBB. diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 0c3f32295ae1..d80505aef092 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -8203,6 +8203,28 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, const SCEV *LHS = getSCEV(ExitCond->getOperand(0)); const SCEV *RHS = getSCEV(ExitCond->getOperand(1)); + ExitLimit EL = computeExitLimitFromICmp(L, Pred, LHS, RHS, ExitIfTrue, + ControlsExit, AllowPredicates); + if (EL.hasAnyInfo()) return EL; + + auto *ExhaustiveCount = + computeExitCountExhaustively(L, ExitCond, ExitIfTrue); + + if (!isa(ExhaustiveCount)) + return ExhaustiveCount; + + return computeShiftCompareExitLimit(ExitCond->getOperand(0), + ExitCond->getOperand(1), L, OriginalPred); +} +ScalarEvolution::ExitLimit +ScalarEvolution::computeExitLimitFromICmp(const Loop *L, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + bool ExitIfTrue, + bool ControlsExit, + bool AllowPredicates) { + + // Try to evaluate any dependencies out of the loop. LHS = getSCEVAtScope(LHS, L); RHS = getSCEVAtScope(RHS, L); @@ -8312,14 +8334,7 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, break; } - auto *ExhaustiveCount = - computeExitCountExhaustively(L, ExitCond, ExitIfTrue); - - if (!isa(ExhaustiveCount)) - return ExhaustiveCount; - - return computeShiftCompareExitLimit(ExitCond->getOperand(0), - ExitCond->getOperand(1), L, OriginalPred); + return getCouldNotCompute(); } ScalarEvolution::ExitLimit From 890e6854924be98c7765599d48326f734be05c5c Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Sun, 2 Jan 2022 10:15:17 -0800 Subject: [PATCH 348/992] [SCEV] Drop unused param from new version of computeExitLimitFromICmp [NFC] --- llvm/include/llvm/Analysis/ScalarEvolution.h | 2 -- llvm/lib/Analysis/ScalarEvolution.cpp | 6 ++---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index ac7e3a46a6ea..1484d2cdce83 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1719,11 +1719,9 @@ class ScalarEvolution { /// has a materialized ICmp. ExitLimit computeExitLimitFromICmp(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - bool ExitIfTrue, bool IsSubExpr, bool AllowPredicates = false); - /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a switch with a single exiting case /// to ExitingBB. diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index d80505aef092..d48e81f28af9 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -8203,8 +8203,8 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, const SCEV *LHS = getSCEV(ExitCond->getOperand(0)); const SCEV *RHS = getSCEV(ExitCond->getOperand(1)); - ExitLimit EL = computeExitLimitFromICmp(L, Pred, LHS, RHS, ExitIfTrue, - ControlsExit, AllowPredicates); + ExitLimit EL = computeExitLimitFromICmp(L, Pred, LHS, RHS, ControlsExit, + AllowPredicates); if (EL.hasAnyInfo()) return EL; auto *ExhaustiveCount = @@ -8220,11 +8220,9 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - bool ExitIfTrue, bool ControlsExit, bool AllowPredicates) { - // Try to evaluate any dependencies out of the loop. LHS = getSCEVAtScope(LHS, L); RHS = getSCEVAtScope(RHS, L); From 41bfac6aed1817831e7b83994f4963f604987bd5 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 2 Jan 2022 10:20:15 -0800 Subject: [PATCH 349/992] [Target] Remove unused forward declarations (NFC) --- llvm/lib/Target/AArch64/AArch64.h | 1 - llvm/lib/Target/AArch64/AArch64ISelLowering.h | 1 - llvm/lib/Target/AArch64/AArch64InstrInfo.h | 1 - llvm/lib/Target/AArch64/AArch64MCInstLower.h | 3 --- llvm/lib/Target/AArch64/AArch64TargetMachine.h | 2 -- llvm/lib/Target/AArch64/AArch64TargetObjectFile.h | 1 - llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h | 2 -- llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h | 1 - llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h | 1 - llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h | 4 ---- llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h | 3 --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 2 -- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 1 - llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h | 2 -- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h | 2 -- llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 - llvm/lib/Target/AMDGPU/R600ISelLowering.h | 1 - llvm/lib/Target/AMDGPU/R600InstrInfo.h | 1 - llvm/lib/Target/AMDGPU/R600Subtarget.h | 6 ------ llvm/lib/Target/AMDGPU/SIFrameLowering.h | 5 ----- llvm/lib/Target/ARM/ARM.h | 2 -- llvm/lib/Target/ARM/ARMCallLowering.h | 1 - llvm/lib/Target/ARM/ARMRegisterInfo.h | 2 -- llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h | 2 -- llvm/lib/Target/ARM/Thumb2InstrInfo.h | 1 - llvm/lib/Target/Mips/MipsCallLowering.h | 1 - llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h | 1 - llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h | 4 ---- llvm/lib/Target/SystemZ/SystemZAsmPrinter.h | 1 - llvm/lib/Target/SystemZ/SystemZFrameLowering.h | 1 - llvm/lib/Target/SystemZ/SystemZISelLowering.h | 1 - llvm/lib/Target/SystemZ/SystemZMCInstLower.h | 1 - llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h | 2 -- llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h | 4 ---- llvm/lib/Target/VE/VE.h | 1 - .../WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h | 1 - llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h | 1 - llvm/lib/Target/X86/X86CallLowering.h | 2 -- 38 files changed, 70 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h index b0dd30c13137..4d1464901777 100644 --- a/llvm/lib/Target/AArch64/AArch64.h +++ b/llvm/lib/Target/AArch64/AArch64.h @@ -26,7 +26,6 @@ class AArch64Subtarget; class AArch64TargetMachine; class FunctionPass; class InstructionSelector; -class MachineFunctionPass; FunctionPass *createAArch64DeadRegisterDefinitions(); FunctionPass *createAArch64RedundantCopyEliminationPass(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 9b541de8c80b..d5876b7bbd27 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -487,7 +487,6 @@ const unsigned RoundingBitsPos = 22; } // namespace AArch64 class AArch64Subtarget; -class AArch64TargetMachine; class AArch64TargetLowering : public TargetLowering { public: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index b2f9e82a7e8b..1054bea40e68 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -26,7 +26,6 @@ namespace llvm { class AArch64Subtarget; -class AArch64TargetMachine; static const MachineMemOperand::Flags MOSuppressPair = MachineMemOperand::MOTargetFlag1; diff --git a/llvm/lib/Target/AArch64/AArch64MCInstLower.h b/llvm/lib/Target/AArch64/AArch64MCInstLower.h index 8f3148a98410..b008e49d52dd 100644 --- a/llvm/lib/Target/AArch64/AArch64MCInstLower.h +++ b/llvm/lib/Target/AArch64/AArch64MCInstLower.h @@ -14,15 +14,12 @@ namespace llvm { class AsmPrinter; -class MCAsmInfo; class MCContext; class MCInst; class MCOperand; class MCSymbol; class MachineInstr; -class MachineModuleInfoMachO; class MachineOperand; -class Mangler; /// AArch64MCInstLower - This class is used to lower an MachineInstr /// into an MCInst. diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h index 25e626134317..7d314bce99b1 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h @@ -20,8 +20,6 @@ namespace llvm { -class AArch64RegisterBankInfo; - class AArch64TargetMachine : public LLVMTargetMachine { protected: std::unique_ptr TLOF; diff --git a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h index 28324c2ae608..9f098230bbd7 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h +++ b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -13,7 +13,6 @@ #include "llvm/Target/TargetLoweringObjectFile.h" namespace llvm { -class AArch64TargetMachine; /// This implementation is used for AArch64 ELF targets (Linux in particular). class AArch64_ELFTargetObjectFile : public TargetLoweringObjectFileELF { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h index add0342c90fd..aafb1d19640a 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h @@ -24,9 +24,7 @@ namespace llvm { class AArch64TargetLowering; class CCValAssign; -class DataLayout; class MachineIRBuilder; -class MachineRegisterInfo; class Type; class AArch64CallLowering: public CallLowering { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h index 35456d95dc2b..e2c46f4b4c1f 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -21,7 +21,6 @@ namespace llvm { -class LLVMContext; class AArch64Subtarget; /// This class provides the information for the target register banks. diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h index 7274ae79f74a..225e0c8e55fc 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -19,7 +19,6 @@ namespace llvm { class MCStreamer; -class Target; class Triple; struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin { diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index 941226b83e44..66cb7a37a958 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -30,11 +30,7 @@ class MCStreamer; class MCSubtargetInfo; class MCTargetOptions; class MCTargetStreamer; -class StringRef; class Target; -class Triple; -class raw_ostream; -class raw_pwrite_stream; MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h index b1263618c5db..673011f48289 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -20,9 +20,6 @@ namespace llvm { class GCNSubtarget; -class MachineFunction; -class MachineInstr; -class MachineInstrBuilder; class MachineMemOperand; class AMDGPUInstrInfo { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 26996e42af53..275c4cd229ba 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -30,7 +30,6 @@ namespace AMDGPU { struct ImageDimIntrinsicInfo; } -class AMDGPUInstrInfo; class AMDGPURegisterBankInfo; class AMDGPUTargetMachine; class BlockFrequencyInfo; @@ -42,7 +41,6 @@ class MachineOperand; class MachineRegisterInfo; class RegisterBank; class SIInstrInfo; -class SIMachineFunctionInfo; class SIRegisterInfo; class TargetRegisterClass; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 7faf0436f995..964a41d3d740 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -21,7 +21,6 @@ namespace llvm { class GCNTargetMachine; -class LLVMContext; class GCNSubtarget; class MachineIRBuilder; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h index 10ff50040c6a..48cf46b5f871 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -15,8 +15,6 @@ namespace llvm { -class GCNSubtarget; - class AMDGPUMachineFunction : public MachineFunctionInfo { /// A map to keep track of local memory objects and their offsets within the /// local memory space. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 226646a96953..dd3676f3b707 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -21,8 +21,6 @@ namespace llvm { -class ScheduleDAGMILive; - //===----------------------------------------------------------------------===// // AMDGPU Target Machine (R600+) //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index 162121c2c525..716bc027a894 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -25,7 +25,6 @@ class MachineFunction; class MachineInstr; class MachineOperand; class MachineRegisterInfo; -class ScheduleDAG; class SIInstrInfo; class SIRegisterInfo; class GCNSubtarget; diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h index f9a9a6127322..1e75a0432ec3 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h @@ -19,7 +19,6 @@ namespace llvm { -class R600InstrInfo; class R600Subtarget; class R600TargetLowering final : public AMDGPUTargetLowering { diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/llvm/lib/Target/AMDGPU/R600InstrInfo.h index fc567f1a1fca..bc8a4786df77 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.h +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.h @@ -29,7 +29,6 @@ enum : uint64_t { }; } -class AMDGPUTargetMachine; class DFAPacketizer; class MachineFunction; class MachineInstr; diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h index 94403b88f21a..92d559b1f8e6 100644 --- a/llvm/lib/Target/AMDGPU/R600Subtarget.h +++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h @@ -21,12 +21,6 @@ #include "Utils/AMDGPUBaseInfo.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" -namespace llvm { - -class MCInstrInfo; - -} // namespace llvm - #define GET_SUBTARGETINFO_HEADER #include "R600GenSubtargetInfo.inc" diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index 56fbb875ffd9..7949dcfa6632 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -13,11 +13,6 @@ namespace llvm { -class SIInstrInfo; -class SIMachineFunctionInfo; -class SIRegisterInfo; -class GCNSubtarget; - class SIFrameLowering final : public AMDGPUFrameLowering { public: SIFrameLowering(StackDirection D, Align StackAl, int LAO, diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h index 1d5e45aec06c..63a7c69ac7e8 100644 --- a/llvm/lib/Target/ARM/ARM.h +++ b/llvm/lib/Target/ARM/ARM.h @@ -29,8 +29,6 @@ struct BasicBlockInfo; class Function; class FunctionPass; class InstructionSelector; -class MachineBasicBlock; -class MachineFunction; class MachineInstr; class MCInst; class PassRegistry; diff --git a/llvm/lib/Target/ARM/ARMCallLowering.h b/llvm/lib/Target/ARM/ARMCallLowering.h index 87b18f811747..38095617fb4f 100644 --- a/llvm/lib/Target/ARM/ARMCallLowering.h +++ b/llvm/lib/Target/ARM/ARMCallLowering.h @@ -23,7 +23,6 @@ namespace llvm { class ARMTargetLowering; -class MachineFunction; class MachineInstrBuilder; class MachineIRBuilder; class Value; diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.h b/llvm/lib/Target/ARM/ARMRegisterInfo.h index 87c0f322d3b3..2971b765a6fc 100644 --- a/llvm/lib/Target/ARM/ARMRegisterInfo.h +++ b/llvm/lib/Target/ARM/ARMRegisterInfo.h @@ -17,8 +17,6 @@ namespace llvm { -class ARMSubtarget; - struct ARMRegisterInfo : public ARMBaseRegisterInfo { virtual void anchor(); public: diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 7ccdc6f85500..5c8f9bfdca08 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -36,8 +36,6 @@ class MCTargetStreamer; class StringRef; class Target; class Triple; -class raw_ostream; -class raw_pwrite_stream; namespace ARM_MC { std::string ParseARMTriple(const Triple &TT, StringRef CPU); diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h index e6d51796ba4d..a83ff5e51004 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -18,7 +18,6 @@ namespace llvm { class ARMSubtarget; -class ScheduleHazardRecognizer; class Thumb2InstrInfo : public ARMBaseInstrInfo { ThumbRegisterInfo RI; diff --git a/llvm/lib/Target/Mips/MipsCallLowering.h b/llvm/lib/Target/Mips/MipsCallLowering.h index 1d1406da3201..9f114d55db4c 100644 --- a/llvm/lib/Target/Mips/MipsCallLowering.h +++ b/llvm/lib/Target/Mips/MipsCallLowering.h @@ -18,7 +18,6 @@ namespace llvm { -class MachineMemOperand; class MipsTargetLowering; class MipsCallLowering : public CallLowering { diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h index 02b4b18f54bd..de982f2108a1 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h @@ -15,7 +15,6 @@ namespace llvm { class APInt; -class MCSubtargetInfo; namespace RISCVMatInt { struct Inst { diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index 899fec6c3328..e76fa03af3bf 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -23,11 +23,7 @@ class MCObjectTargetWriter; class MCRegisterInfo; class MCSubtargetInfo; class MCTargetOptions; -class StringRef; class Target; -class Triple; -class raw_pwrite_stream; -class raw_ostream; namespace SystemZMC { // How many bytes are in the ABI-defined, caller-allocated part of diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h index 6cfd7bd4c486..47a3336b8e63 100644 --- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h +++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h @@ -19,7 +19,6 @@ namespace llvm { class MCStreamer; -class MachineBasicBlock; class MachineInstr; class Module; class raw_ostream; diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h index 106b9e8ebe06..3a1af888d8f9 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -17,7 +17,6 @@ #include "llvm/Support/TypeSize.h" namespace llvm { -class SystemZTargetMachine; class SystemZSubtarget; class SystemZFrameLowering : public TargetFrameLowering { diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 940c0a857ea4..a8ddb8c62d18 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -381,7 +381,6 @@ enum { } // end namespace SystemZICMP class SystemZSubtarget; -class SystemZTargetMachine; class SystemZTargetLowering : public TargetLowering { public: diff --git a/llvm/lib/Target/SystemZ/SystemZMCInstLower.h b/llvm/lib/Target/SystemZ/SystemZMCInstLower.h index 14ad06488312..eb09033d1850 100644 --- a/llvm/lib/Target/SystemZ/SystemZMCInstLower.h +++ b/llvm/lib/Target/SystemZ/SystemZMCInstLower.h @@ -18,7 +18,6 @@ class MCInst; class MCOperand; class MachineInstr; class MachineOperand; -class Mangler; class SystemZAsmPrinter; class LLVM_LIBRARY_VISIBILITY SystemZMCInstLower { diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index a4a5b1fbdf90..da6725777e43 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -17,8 +17,6 @@ namespace llvm { -class SystemZTargetMachine; - class SystemZSelectionDAGInfo : public SelectionDAGTargetInfo { public: explicit SystemZSelectionDAGInfo() = default; diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h index 7fb8a556aa74..f0bb6e3acdee 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h @@ -27,10 +27,6 @@ class MCRegisterInfo; class MCSubtargetInfo; class MCTargetOptions; class Target; -class Triple; -class StringRef; -class raw_pwrite_stream; -class raw_ostream; MCCodeEmitter *createVEMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx); diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h index 8c1fa840f19c..2a729a1a311c 100644 --- a/llvm/lib/Target/VE/VE.h +++ b/llvm/lib/Target/VE/VE.h @@ -22,7 +22,6 @@ namespace llvm { class FunctionPass; class VETargetMachine; -class formatted_raw_ostream; class AsmPrinter; class MCInst; class MachineInstr; diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index b2f10ca93a4f..75d5d0675990 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -26,7 +26,6 @@ class MCAsmBackend; class MCCodeEmitter; class MCInstrInfo; class MCObjectTargetWriter; -class MVT; class Triple; MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII); diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h index d024185defb4..57e40f6cd8d7 100644 --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h @@ -25,7 +25,6 @@ class MachineInstr; class MachineOperand; class MCContext; class MCSymbolWasm; -class StringRef; class WebAssemblyFunctionInfo; class WebAssemblySubtarget; diff --git a/llvm/lib/Target/X86/X86CallLowering.h b/llvm/lib/Target/X86/X86CallLowering.h index ac5b92bf4aae..0ad67cfd3532 100644 --- a/llvm/lib/Target/X86/X86CallLowering.h +++ b/llvm/lib/Target/X86/X86CallLowering.h @@ -20,8 +20,6 @@ namespace llvm { template class ArrayRef; -class DataLayout; -class MachineRegisterInfo; class X86TargetLowering; class X86CallLowering : public CallLowering { From 677bbec9fda098ef0750c69f5bda8f74454ce6b9 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 2 Jan 2022 10:20:17 -0800 Subject: [PATCH 350/992] Remove unused "using" (NFC) Identified by misc-unused-using-decls. --- lldb/source/Core/IOHandlerCursesGUI.cpp | 2 -- llvm/lib/InterfaceStub/ELFObjHandler.cpp | 1 - 2 files changed, 3 deletions(-) diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp index 60207f75b7df..b37e84a13c5e 100644 --- a/lldb/source/Core/IOHandlerCursesGUI.cpp +++ b/lldb/source/Core/IOHandlerCursesGUI.cpp @@ -78,8 +78,6 @@ using namespace lldb; using namespace lldb_private; -using llvm::None; -using llvm::Optional; using llvm::StringRef; // we may want curses to be disabled for some builds for instance, windows diff --git a/llvm/lib/InterfaceStub/ELFObjHandler.cpp b/llvm/lib/InterfaceStub/ELFObjHandler.cpp index 0d1a864f31ac..cb72f57f7bde 100644 --- a/llvm/lib/InterfaceStub/ELFObjHandler.cpp +++ b/llvm/lib/InterfaceStub/ELFObjHandler.cpp @@ -19,7 +19,6 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Process.h" -using llvm::MemoryBufferRef; using llvm::object::ELFObjectFile; using namespace llvm; From 7e163afd9ec7cb4a23bda681c22a2c6e7387049d Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 2 Jan 2022 10:20:19 -0800 Subject: [PATCH 351/992] Remove redundant void arguments (NFC) Identified by modernize-redundant-void-arg. --- lldb/include/lldb/Symbol/UnwindPlan.h | 2 +- lldb/source/Target/Thread.cpp | 4 +--- llvm/include/llvm/MCA/Instruction.h | 2 +- .../CodeGen/LiveDebugValues/InstrRefBasedImpl.h | 6 +++--- llvm/lib/CodeGen/TypePromotion.cpp | 10 +++++----- llvm/lib/Target/BPF/BPFMIChecking.cpp | 4 ++-- llvm/lib/Target/BPF/BPFMIPeephole.cpp | 16 ++++++++-------- llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp | 2 +- llvm/lib/Target/PowerPC/PPCMIPeephole.cpp | 8 ++++---- llvm/lib/Target/SystemZ/SystemZSubtarget.h | 2 +- llvm/lib/Target/VE/VESubtarget.h | 2 +- llvm/lib/Target/X86/X86ISelLowering.h | 2 +- .../lib/Transforms/Vectorize/VPlanPredicator.cpp | 2 +- llvm/lib/Transforms/Vectorize/VPlanPredicator.h | 2 +- llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp | 2 +- 15 files changed, 32 insertions(+), 34 deletions(-) diff --git a/lldb/include/lldb/Symbol/UnwindPlan.h b/lldb/include/lldb/Symbol/UnwindPlan.h index cc2302d25831..7b0fbe87315c 100644 --- a/lldb/include/lldb/Symbol/UnwindPlan.h +++ b/lldb/include/lldb/Symbol/UnwindPlan.h @@ -442,7 +442,7 @@ class UnwindPlan { m_return_addr_register = regnum; } - uint32_t GetReturnAddressRegister(void) { return m_return_addr_register; } + uint32_t GetReturnAddressRegister() { return m_return_addr_register; } uint32_t GetInitialCFARegister() const { if (m_row_list.empty()) diff --git a/lldb/source/Target/Thread.cpp b/lldb/source/Target/Thread.cpp index 481a39a576e9..c5f16b4e6c1d 100644 --- a/lldb/source/Target/Thread.cpp +++ b/lldb/source/Target/Thread.cpp @@ -471,9 +471,7 @@ void Thread::SetStopInfoToNothing() { StopInfo::CreateStopReasonWithSignal(*this, LLDB_INVALID_SIGNAL_NUMBER)); } -bool Thread::ThreadStoppedForAReason(void) { - return (bool)GetPrivateStopInfo(); -} +bool Thread::ThreadStoppedForAReason() { return (bool)GetPrivateStopInfo(); } bool Thread::CheckpointThreadState(ThreadStateCheckpoint &saved_state) { saved_state.register_backup_sp.reset(); diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h index 3eb32186d551..089c607749f1 100644 --- a/llvm/include/llvm/MCA/Instruction.h +++ b/llvm/include/llvm/MCA/Instruction.h @@ -406,7 +406,7 @@ class CycleSegment { bool operator<(const CycleSegment &Other) const { return Begin < Other.Begin; } - CycleSegment &operator--(void) { + CycleSegment &operator--() { if (Begin) Begin--; if (End) diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h index 789205e61cdb..6423ff74b563 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h @@ -494,7 +494,7 @@ class MLocTracker { return StackIdxesToPos.find(Idx)->second; } - unsigned getNumLocs(void) const { return LocIdxToIDNum.size(); } + unsigned getNumLocs() const { return LocIdxToIDNum.size(); } /// Reset all locations to contain a PHI value at the designated block. Used /// sometimes for actual PHI values, othertimes to indicate the block entry @@ -516,7 +516,7 @@ class MLocTracker { } /// Wipe any un-necessary location records after traversing a block. - void reset(void) { + void reset() { // We could reset all the location values too; however either loadFromArray // or setMPhis should be called before this object is re-used. Just // clear Masks, they're definitely not needed. @@ -525,7 +525,7 @@ class MLocTracker { /// Clear all data. Destroys the LocID <=> LocIdx map, which makes most of /// the information in this pass uninterpretable. - void clear(void) { + void clear() { reset(); LocIDToLocIdx.clear(); LocIdxToLocID.clear(); diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp index d042deefd746..01ea171e5ea2 100644 --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -116,11 +116,11 @@ class IRPromoter { SmallPtrSet Promoted; void ReplaceAllUsersOfWith(Value *From, Value *To); - void ExtendSources(void); - void ConvertTruncs(void); - void PromoteTree(void); - void TruncateSinks(void); - void Cleanup(void); + void ExtendSources(); + void ConvertTruncs(); + void PromoteTree(); + void TruncateSinks(); + void Cleanup(); public: IRPromoter(LLVMContext &C, IntegerType *Ty, unsigned Width, diff --git a/llvm/lib/Target/BPF/BPFMIChecking.cpp b/llvm/lib/Target/BPF/BPFMIChecking.cpp index eb8c48ac49de..2bc2302cf55c 100644 --- a/llvm/lib/Target/BPF/BPFMIChecking.cpp +++ b/llvm/lib/Target/BPF/BPFMIChecking.cpp @@ -41,7 +41,7 @@ struct BPFMIPreEmitChecking : public MachineFunctionPass { // Initialize class variables. void initialize(MachineFunction &MFParm); - bool processAtomicInsts(void); + bool processAtomicInsts(); public: @@ -151,7 +151,7 @@ static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) { return false; } -bool BPFMIPreEmitChecking::processAtomicInsts(void) { +bool BPFMIPreEmitChecking::processAtomicInsts() { for (MachineBasicBlock &MBB : *MF) { for (MachineInstr &MI : MBB) { if (MI.getOpcode() != BPF::XADDW && diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp index 354980e4bf3c..7f69c8a63443 100644 --- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp +++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp @@ -56,8 +56,8 @@ struct BPFMIPeephole : public MachineFunctionPass { bool isInsnFrom32Def(MachineInstr *DefInsn); bool isPhiFrom32Def(MachineInstr *MovMI); bool isMovFrom32Def(MachineInstr *MovMI); - bool eliminateZExtSeq(void); - bool eliminateZExt(void); + bool eliminateZExtSeq(); + bool eliminateZExt(); std::set PhiInsns; @@ -172,7 +172,7 @@ bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI) return true; } -bool BPFMIPeephole::eliminateZExtSeq(void) { +bool BPFMIPeephole::eliminateZExtSeq() { MachineInstr* ToErase = nullptr; bool Eliminated = false; @@ -240,7 +240,7 @@ bool BPFMIPeephole::eliminateZExtSeq(void) { return Eliminated; } -bool BPFMIPeephole::eliminateZExt(void) { +bool BPFMIPeephole::eliminateZExt() { MachineInstr* ToErase = nullptr; bool Eliminated = false; @@ -312,7 +312,7 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass { // Initialize class variables. void initialize(MachineFunction &MFParm); - bool eliminateRedundantMov(void); + bool eliminateRedundantMov(); public: @@ -334,7 +334,7 @@ void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) { LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n"); } -bool BPFMIPreEmitPeephole::eliminateRedundantMov(void) { +bool BPFMIPreEmitPeephole::eliminateRedundantMov() { MachineInstr* ToErase = nullptr; bool Eliminated = false; @@ -405,7 +405,7 @@ struct BPFMIPeepholeTruncElim : public MachineFunctionPass { // Initialize class variables. void initialize(MachineFunction &MFParm); - bool eliminateTruncSeq(void); + bool eliminateTruncSeq(); public: @@ -452,7 +452,7 @@ void BPFMIPeepholeTruncElim::initialize(MachineFunction &MFParm) { // are 32-bit registers, but later on, kernel verifier will rewrite // it with 64-bit value. Therefore, truncating the value after the // load will result in incorrect code. -bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) { +bool BPFMIPeepholeTruncElim::eliminateTruncSeq() { MachineInstr* ToErase = nullptr; bool Eliminated = false; diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp index 7e829ea43e89..b4232875383c 100644 --- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -55,7 +55,7 @@ struct BPFMISimplifyPatchable : public MachineFunctionPass { // Initialize class variables. void initialize(MachineFunction &MFParm); - bool removeLD(void); + bool removeLD(); void processCandidate(MachineRegisterInfo *MRI, MachineBasicBlock &MBB, MachineInstr &MI, Register &SrcReg, Register &DstReg, const GlobalValue *GVal, bool IsAma); diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index d12a9b806fd0..3b485be39736 100644 --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -107,10 +107,10 @@ struct PPCMIPeephole : public MachineFunctionPass { void initialize(MachineFunction &MFParm); // Perform peepholes. - bool simplifyCode(void); + bool simplifyCode(); // Perform peepholes. - bool eliminateRedundantCompare(void); + bool eliminateRedundantCompare(); bool eliminateRedundantTOCSaves(std::map &TOCSaves); bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase); bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI); @@ -381,7 +381,7 @@ static void convertUnprimedAccPHIs(const PPCInstrInfo *TII, } // Perform peephole optimizations. -bool PPCMIPeephole::simplifyCode(void) { +bool PPCMIPeephole::simplifyCode() { bool Simplified = false; bool TrapOpt = false; MachineInstr* ToErase = nullptr; @@ -1334,7 +1334,7 @@ bool PPCMIPeephole::eliminateRedundantTOCSaves( // cmpwi r3, 0 ; greather than -1 means greater or equal to 0 // bge 0, .LBB0_4 -bool PPCMIPeephole::eliminateRedundantCompare(void) { +bool PPCMIPeephole::eliminateRedundantCompare() { bool Simplified = false; for (MachineBasicBlock &MBB2 : *MF) { diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/llvm/lib/Target/SystemZ/SystemZSubtarget.h index 67c5b8eb09b6..98f7094fcb48 100644 --- a/llvm/lib/Target/SystemZ/SystemZSubtarget.h +++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.h @@ -85,7 +85,7 @@ class SystemZSubtarget : public SystemZGenSubtargetInfo { SystemZSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); - SystemZCallingConventionRegisters *initializeSpecialRegisters(void); + SystemZCallingConventionRegisters *initializeSpecialRegisters(); public: SystemZSubtarget(const Triple &TT, const std::string &CPU, diff --git a/llvm/lib/Target/VE/VESubtarget.h b/llvm/lib/Target/VE/VESubtarget.h index 213aca2ea3f9..0c3dc0a08072 100644 --- a/llvm/lib/Target/VE/VESubtarget.h +++ b/llvm/lib/Target/VE/VESubtarget.h @@ -76,7 +76,7 @@ class VESubtarget : public VEGenSubtargetInfo { /// Get the size of RSA, return address, and frame pointer as described /// in VEFrameLowering.cpp. - unsigned getRsaSize(void) const { return 176; }; + unsigned getRsaSize() const { return 176; }; bool isTargetLinux() const { return TargetTriple.isOSLinux(); } }; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index d1d6e319f16b..3f6d567d3f4d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1540,7 +1540,7 @@ namespace llvm { unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG) const; - unsigned getAddressSpace(void) const; + unsigned getAddressSpace() const; SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, SDValue &Chain) const; diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index 86ecd6817873..e879a33db6ee 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -231,7 +231,7 @@ void VPlanPredicator::linearizeRegionRec(VPRegionBlock *Region) { } // Entry point. The driver function for the predicator. -void VPlanPredicator::predicate(void) { +void VPlanPredicator::predicate() { // Predicate the blocks within Region. predicateRegionRec(cast(Plan.getEntry())); diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.h b/llvm/lib/Transforms/Vectorize/VPlanPredicator.h index 692afd2978d5..a5db9a54da3c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.h @@ -68,7 +68,7 @@ class VPlanPredicator { VPlanPredicator(VPlan &Plan); /// Predicate Plan's HCFG. - void predicate(void); + void predicate(); }; } // end namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_PREDICATOR_H diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp index 688945afe944..52f20794cc57 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp @@ -614,7 +614,7 @@ void MachOWriter::writeExportsTrieData() { } void MachOWriter::writeTail() { - typedef void (MachOWriter::*WriteHandlerType)(void); + typedef void (MachOWriter::*WriteHandlerType)(); typedef std::pair WriteOperation; SmallVector Queue; From 5e1177302bd33d2ef2a5361734afc001ced47926 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 2 Jan 2022 10:20:21 -0800 Subject: [PATCH 352/992] [wasm] Use nullptr instead of NULL (NFC) Identified with modernize-use-nullptr. --- lld/wasm/Symbols.cpp | 2 +- lld/wasm/Writer.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp index 684f4832b267..08f532deea0b 100644 --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -154,7 +154,7 @@ bool Symbol::isLive() const { void Symbol::markLive() { assert(!isDiscarded()); referenced = true; - if (file != NULL && isDefined()) + if (file != nullptr && isDefined()) file->markLive(); if (auto *g = dyn_cast(this)) g->global->live = true; diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 91e90ae5103a..f1ebe54ca60d 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -728,7 +728,7 @@ void Writer::createCommandExportWrappers() { // If there are no ctors and there's no libc `__wasm_call_dtors` to // call, don't wrap the exports. - if (initFunctions.empty() && WasmSym::callDtors == NULL) + if (initFunctions.empty() && WasmSym::callDtors == nullptr) return; std::vector toWrap; From d677a7cb056b17145a50ec8ca2ab6d5f4c494749 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 2 Jan 2022 10:20:23 -0800 Subject: [PATCH 353/992] [clang] Remove redundant member initialization (NFC) Identified with readability-redundant-member-init. --- clang/include/clang/APINotes/Types.h | 32 ++++++++----------- clang/include/clang/AST/ASTConcept.h | 16 +++++----- clang/include/clang/AST/Comment.h | 16 +++------- clang/include/clang/AST/DeclObjC.h | 14 +++----- clang/include/clang/AST/Expr.h | 2 +- .../clang/ASTMatchers/Dynamic/Diagnostics.h | 2 +- .../clang/Analysis/Analyses/Consumed.h | 3 +- clang/include/clang/Basic/Diagnostic.h | 2 +- clang/include/clang/Basic/PartialDiagnostic.h | 3 +- clang/include/clang/Sema/DeclSpec.h | 3 +- clang/include/clang/Sema/Overload.h | 3 +- clang/include/clang/Sema/Sema.h | 8 ++--- .../Core/PathSensitive/DynamicTypeInfo.h | 2 +- clang/lib/ARCMigrate/Internals.h | 10 +++--- clang/lib/ARCMigrate/TransAutoreleasePool.cpp | 5 +-- clang/lib/AST/DeclCXX.cpp | 2 +- clang/lib/AST/ExprConcepts.cpp | 6 ++-- clang/lib/AST/ExprConstant.cpp | 4 +-- clang/lib/Analysis/CFG.cpp | 4 +-- clang/lib/Basic/TargetInfo.cpp | 2 +- clang/lib/CodeGen/BackendUtil.cpp | 3 +- clang/lib/CodeGen/CGCall.h | 4 +-- clang/lib/CodeGen/CGRecordLayout.h | 4 +-- clang/lib/CodeGen/CGStmtOpenMP.cpp | 2 +- clang/lib/CodeGen/CodeGenFunction.h | 7 ++-- clang/lib/CodeGen/CodeGenPGO.cpp | 2 +- clang/lib/Driver/Driver.cpp | 6 ++-- clang/lib/Frontend/MultiplexConsumer.cpp | 4 +-- .../Frontend/SerializedDiagnosticPrinter.cpp | 3 +- clang/lib/Sema/AnalysisBasedWarnings.cpp | 2 +- clang/lib/Sema/SemaChecking.cpp | 4 +-- clang/lib/Sema/SemaCodeComplete.cpp | 2 +- clang/lib/Sema/SemaTemplateDeduction.cpp | 2 +- .../lib/StaticAnalyzer/Core/ProgramState.cpp | 6 +--- .../ClangOffloadBundler.cpp | 8 ++--- clang/tools/libclang/CXIndexDataConsumer.h | 7 ++-- clang/tools/libclang/CXLoadedDiagnostic.cpp | 2 +- 37 files changed, 87 insertions(+), 120 deletions(-) diff --git a/clang/include/clang/APINotes/Types.h b/clang/include/clang/APINotes/Types.h index 0d97e9ad8623..f741d9b91d76 100644 --- a/clang/include/clang/APINotes/Types.h +++ b/clang/include/clang/APINotes/Types.h @@ -133,7 +133,7 @@ class CommonTypeInfo : public CommonEntityInfo { llvm::Optional NSErrorDomain; public: - CommonTypeInfo() : CommonEntityInfo() {} + CommonTypeInfo() {} const llvm::Optional &getSwiftBridge() const { return SwiftBridge; @@ -208,10 +208,9 @@ class ObjCContextInfo : public CommonTypeInfo { public: ObjCContextInfo() - : CommonTypeInfo(), HasDefaultNullability(0), DefaultNullability(0), - HasDesignatedInits(0), SwiftImportAsNonGenericSpecified(false), - SwiftImportAsNonGeneric(false), SwiftObjCMembersSpecified(false), - SwiftObjCMembers(false) {} + : HasDefaultNullability(0), DefaultNullability(0), HasDesignatedInits(0), + SwiftImportAsNonGenericSpecified(false), SwiftImportAsNonGeneric(false), + SwiftObjCMembersSpecified(false), SwiftObjCMembers(false) {} /// Determine the default nullability for properties and methods of this /// class. @@ -309,7 +308,7 @@ class VariableInfo : public CommonEntityInfo { std::string Type; public: - VariableInfo() : CommonEntityInfo(), NullabilityAudited(false), Nullable(0) {} + VariableInfo() : NullabilityAudited(false), Nullable(0) {} llvm::Optional getNullability() const { return NullabilityAudited ? llvm::Optional( @@ -358,8 +357,7 @@ class ObjCPropertyInfo : public VariableInfo { public: ObjCPropertyInfo() - : VariableInfo(), SwiftImportAsAccessorsSpecified(false), - SwiftImportAsAccessors(false) {} + : SwiftImportAsAccessorsSpecified(false), SwiftImportAsAccessors(false) {} llvm::Optional getSwiftImportAsAccessors() const { return SwiftImportAsAccessorsSpecified @@ -423,8 +421,7 @@ class ParamInfo : public VariableInfo { public: ParamInfo() - : VariableInfo(), NoEscapeSpecified(false), NoEscape(false), - RawRetainCountConvention() {} + : NoEscapeSpecified(false), NoEscape(false), RawRetainCountConvention() {} llvm::Optional isNoEscape() const { if (!NoEscapeSpecified) @@ -514,7 +511,7 @@ class FunctionInfo : public CommonEntityInfo { std::vector Params; FunctionInfo() - : CommonEntityInfo(), NullabilityAudited(false), NumAdjustedNullable(0), + : NullabilityAudited(false), NumAdjustedNullable(0), RawRetainCountConvention() {} static unsigned getMaxNullabilityIndex() { @@ -607,8 +604,7 @@ class ObjCMethodInfo : public FunctionInfo { /// Whether this is a required initializer. unsigned RequiredInit : 1; - ObjCMethodInfo() - : FunctionInfo(), DesignatedInit(false), RequiredInit(false) {} + ObjCMethodInfo() : DesignatedInit(false), RequiredInit(false) {} friend bool operator==(const ObjCMethodInfo &, const ObjCMethodInfo &); @@ -639,19 +635,19 @@ inline bool operator!=(const ObjCMethodInfo &LHS, const ObjCMethodInfo &RHS) { /// Describes API notes data for a global variable. class GlobalVariableInfo : public VariableInfo { public: - GlobalVariableInfo() : VariableInfo() {} + GlobalVariableInfo() {} }; /// Describes API notes data for a global function. class GlobalFunctionInfo : public FunctionInfo { public: - GlobalFunctionInfo() : FunctionInfo() {} + GlobalFunctionInfo() {} }; /// Describes API notes data for an enumerator. class EnumConstantInfo : public CommonEntityInfo { public: - EnumConstantInfo() : CommonEntityInfo() {} + EnumConstantInfo() {} }; /// Describes API notes data for a tag. @@ -662,7 +658,7 @@ class TagInfo : public CommonTypeInfo { public: llvm::Optional EnumExtensibility; - TagInfo() : CommonTypeInfo(), HasFlagEnum(0), IsFlagEnum(0) {} + TagInfo() : HasFlagEnum(0), IsFlagEnum(0) {} llvm::Optional isFlagEnum() const { if (HasFlagEnum) @@ -706,7 +702,7 @@ class TypedefInfo : public CommonTypeInfo { public: llvm::Optional SwiftWrapper; - TypedefInfo() : CommonTypeInfo() {} + TypedefInfo() {} TypedefInfo &operator|=(const TypedefInfo &RHS) { static_cast(*this) |= RHS; diff --git a/clang/include/clang/AST/ASTConcept.h b/clang/include/clang/AST/ASTConcept.h index aba18b060b02..c9930844add8 100644 --- a/clang/include/clang/AST/ASTConcept.h +++ b/clang/include/clang/AST/ASTConcept.h @@ -123,17 +123,17 @@ class ConceptReference { const ASTTemplateArgumentListInfo *ArgsAsWritten; public: - ConceptReference(NestedNameSpecifierLoc NNS, SourceLocation TemplateKWLoc, DeclarationNameInfo ConceptNameInfo, NamedDecl *FoundDecl, ConceptDecl *NamedConcept, - const ASTTemplateArgumentListInfo *ArgsAsWritten) : - NestedNameSpec(NNS), TemplateKWLoc(TemplateKWLoc), - ConceptName(ConceptNameInfo), FoundDecl(FoundDecl), - NamedConcept(NamedConcept), ArgsAsWritten(ArgsAsWritten) {} - - ConceptReference() : NestedNameSpec(), TemplateKWLoc(), ConceptName(), - FoundDecl(nullptr), NamedConcept(nullptr), ArgsAsWritten(nullptr) {} + const ASTTemplateArgumentListInfo *ArgsAsWritten) + : NestedNameSpec(NNS), TemplateKWLoc(TemplateKWLoc), + ConceptName(ConceptNameInfo), FoundDecl(FoundDecl), + NamedConcept(NamedConcept), ArgsAsWritten(ArgsAsWritten) {} + + ConceptReference() + : TemplateKWLoc(), FoundDecl(nullptr), NamedConcept(nullptr), + ArgsAsWritten(nullptr) {} const NestedNameSpecifierLoc &getNestedNameSpecifierLoc() const { return NestedNameSpec; diff --git a/clang/include/clang/AST/Comment.h b/clang/include/clang/AST/Comment.h index 4184e103206d..5ecc35791b7b 100644 --- a/clang/include/clang/AST/Comment.h +++ b/clang/include/clang/AST/Comment.h @@ -424,19 +424,13 @@ class HTMLStartTagComment : public HTMLTagComment { Attribute() { } - Attribute(SourceLocation NameLocBegin, StringRef Name) : - NameLocBegin(NameLocBegin), Name(Name), - EqualsLoc(SourceLocation()), - ValueRange(SourceRange()), Value(StringRef()) - { } + Attribute(SourceLocation NameLocBegin, StringRef Name) + : NameLocBegin(NameLocBegin), Name(Name), EqualsLoc(SourceLocation()) {} Attribute(SourceLocation NameLocBegin, StringRef Name, - SourceLocation EqualsLoc, - SourceRange ValueRange, StringRef Value) : - NameLocBegin(NameLocBegin), Name(Name), - EqualsLoc(EqualsLoc), - ValueRange(ValueRange), Value(Value) - { } + SourceLocation EqualsLoc, SourceRange ValueRange, StringRef Value) + : NameLocBegin(NameLocBegin), Name(Name), EqualsLoc(EqualsLoc), + ValueRange(ValueRange), Value(Value) {} SourceLocation getNameLocEnd() const { return NameLocBegin.getLocWithOffset(Name.size()); diff --git a/clang/include/clang/AST/DeclObjC.h b/clang/include/clang/AST/DeclObjC.h index 79ec1d6e5c3c..f227561b8fcb 100644 --- a/clang/include/clang/AST/DeclObjC.h +++ b/clang/include/clang/AST/DeclObjC.h @@ -779,17 +779,13 @@ class ObjCPropertyDecl : public NamedDecl { LParenLoc(LParenLocation), DeclType(T), DeclTypeSourceInfo(TSI), PropertyAttributes(ObjCPropertyAttribute::kind_noattr), PropertyAttributesAsWritten(ObjCPropertyAttribute::kind_noattr), - PropertyImplementation(propControl), GetterName(Selector()), - SetterName(Selector()) {} + PropertyImplementation(propControl) {} public: - static ObjCPropertyDecl *Create(ASTContext &C, DeclContext *DC, - SourceLocation L, - IdentifierInfo *Id, SourceLocation AtLocation, - SourceLocation LParenLocation, - QualType T, - TypeSourceInfo *TSI, - PropertyControl propControl = None); + static ObjCPropertyDecl * + Create(ASTContext &C, DeclContext *DC, SourceLocation L, IdentifierInfo *Id, + SourceLocation AtLocation, SourceLocation LParenLocation, QualType T, + TypeSourceInfo *TSI, PropertyControl propControl = None); static ObjCPropertyDecl *CreateDeserialized(ASTContext &C, unsigned ID); diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index e2c36e12393f..c32e74ac76ee 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -2388,7 +2388,7 @@ class OffsetOfNode { /// Create an offsetof node that refers into a C++ base class. explicit OffsetOfNode(const CXXBaseSpecifier *Base) - : Range(), Data(reinterpret_cast(Base) | OffsetOfNode::Base) {} + : Data(reinterpret_cast(Base) | OffsetOfNode::Base) {} /// Determine what kind of offsetof node this is. Kind getKind() const { return static_cast(Data & Mask); } diff --git a/clang/include/clang/ASTMatchers/Dynamic/Diagnostics.h b/clang/include/clang/ASTMatchers/Dynamic/Diagnostics.h index af1affc16dbc..25eb38e6435e 100644 --- a/clang/include/clang/ASTMatchers/Dynamic/Diagnostics.h +++ b/clang/include/clang/ASTMatchers/Dynamic/Diagnostics.h @@ -40,7 +40,7 @@ struct SourceRange { /// A VariantValue instance annotated with its parser context. struct ParserValue { - ParserValue() : Text(), Range(), Value() {} + ParserValue() : Range() {} StringRef Text; SourceRange Range; VariantValue Value; diff --git a/clang/include/clang/Analysis/Analyses/Consumed.h b/clang/include/clang/Analysis/Analyses/Consumed.h index dec1ae3b2b4b..24702567ab6c 100644 --- a/clang/include/clang/Analysis/Analyses/Consumed.h +++ b/clang/include/clang/Analysis/Analyses/Consumed.h @@ -153,8 +153,7 @@ namespace consumed { public: ConsumedStateMap() = default; ConsumedStateMap(const ConsumedStateMap &Other) - : Reachable(Other.Reachable), From(Other.From), VarMap(Other.VarMap), - TmpMap() {} + : Reachable(Other.Reachable), From(Other.From), VarMap(Other.VarMap) {} /// Warn if any of the parameters being tracked are not in the state /// they were declared to be in upon return from a function. diff --git a/clang/include/clang/Basic/Diagnostic.h b/clang/include/clang/Basic/Diagnostic.h index e5577e74fa63..6a80823d1242 100644 --- a/clang/include/clang/Basic/Diagnostic.h +++ b/clang/include/clang/Basic/Diagnostic.h @@ -1326,7 +1326,7 @@ class DiagnosticBuilder : public StreamingDiagnostic { public: /// Copy constructor. When copied, this "takes" the diagnostic info from the /// input and neuters it. - DiagnosticBuilder(const DiagnosticBuilder &D) : StreamingDiagnostic() { + DiagnosticBuilder(const DiagnosticBuilder &D) { DiagObj = D.DiagObj; DiagStorage = D.DiagStorage; IsActive = D.IsActive; diff --git a/clang/include/clang/Basic/PartialDiagnostic.h b/clang/include/clang/Basic/PartialDiagnostic.h index 9fb70bff7fee..217441979869 100644 --- a/clang/include/clang/Basic/PartialDiagnostic.h +++ b/clang/include/clang/Basic/PartialDiagnostic.h @@ -49,8 +49,7 @@ class PartialDiagnostic : public StreamingDiagnostic { PartialDiagnostic(unsigned DiagID, DiagStorageAllocator &Allocator_) : StreamingDiagnostic(Allocator_), DiagID(DiagID) {} - PartialDiagnostic(const PartialDiagnostic &Other) - : StreamingDiagnostic(), DiagID(Other.DiagID) { + PartialDiagnostic(const PartialDiagnostic &Other) : DiagID(Other.DiagID) { Allocator = Other.Allocator; if (Other.DiagStorage) { DiagStorage = getStorage(); diff --git a/clang/include/clang/Sema/DeclSpec.h b/clang/include/clang/Sema/DeclSpec.h index 2704a9c1fc78..2437be497de4 100644 --- a/clang/include/clang/Sema/DeclSpec.h +++ b/clang/include/clang/Sema/DeclSpec.h @@ -434,8 +434,7 @@ class DeclSpec { FS_noreturn_specified(false), Friend_specified(false), ConstexprSpecifier( static_cast(ConstexprSpecKind::Unspecified)), - FS_explicit_specifier(), Attrs(attrFactory), writtenBS(), - ObjCQualifiers(nullptr) {} + Attrs(attrFactory), writtenBS(), ObjCQualifiers(nullptr) {} // storage-class-specifier SCS getStorageClassSpec() const { return (SCS)StorageClassSpec; } diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h index 88405a63b735..48997e186ef6 100644 --- a/clang/include/clang/Sema/Overload.h +++ b/clang/include/clang/Sema/Overload.h @@ -577,8 +577,7 @@ class Sema; ImplicitConversionSequence() : ConversionKind(Uninitialized), - InitializerListOfIncompleteArray(false), - InitializerListContainerType() { + InitializerListOfIncompleteArray(false) { Standard.setAsIdentityConversion(); } diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 6758e7ef2c30..f97a785c7426 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -1365,10 +1365,10 @@ class Sema final { }; private: - llvm::PointerIntPair Pair; + llvm::PointerIntPair Pair; public: - SpecialMemberOverloadResult() : Pair() {} + SpecialMemberOverloadResult() {} SpecialMemberOverloadResult(CXXMethodDecl *MD) : Pair(MD, MD->isDeleted() ? NoMemberOrDeleted : Success) {} @@ -7520,7 +7520,7 @@ class Sema final { RequiredTemplateKind(SourceLocation TemplateKWLoc = SourceLocation()) : TemplateKW(TemplateKWLoc) {} /// Template name is unconditionally required. - RequiredTemplateKind(TemplateNameIsRequiredTag) : TemplateKW() {} + RequiredTemplateKind(TemplateNameIsRequiredTag) {} SourceLocation getTemplateKeywordLoc() const { return TemplateKW.getValueOr(SourceLocation()); @@ -13061,7 +13061,7 @@ class Sema final { ValueDecl *MD; CharUnits Alignment; - MisalignedMember() : E(), RD(), MD(), Alignment() {} + MisalignedMember() : E(), RD(), MD() {} MisalignedMember(Expr *E, RecordDecl *RD, ValueDecl *MD, CharUnits Alignment) : E(E), RD(RD), MD(MD), Alignment(Alignment) {} diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h index 6d2b495dc0f5..3ff453a8de4f 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h @@ -18,7 +18,7 @@ namespace ento { /// of a region in a given state along the analysis path. class DynamicTypeInfo { public: - DynamicTypeInfo() : DynTy(QualType()) {} + DynamicTypeInfo() {} DynamicTypeInfo(QualType Ty, bool CanBeSub = true) : DynTy(Ty), CanBeASubClass(CanBeSub) {} diff --git a/clang/lib/ARCMigrate/Internals.h b/clang/lib/ARCMigrate/Internals.h index ed0136e4867a..8b482738cc89 100644 --- a/clang/lib/ARCMigrate/Internals.h +++ b/clang/lib/ARCMigrate/Internals.h @@ -154,13 +154,11 @@ class MigrationPass { std::vector &ARCMTMacroLocs; Optional EnableCFBridgeFns; - MigrationPass(ASTContext &Ctx, LangOptions::GCMode OrigGCMode, - Sema &sema, TransformActions &TA, - const CapturedDiagList &capturedDiags, + MigrationPass(ASTContext &Ctx, LangOptions::GCMode OrigGCMode, Sema &sema, + TransformActions &TA, const CapturedDiagList &capturedDiags, std::vector &ARCMTMacroLocs) - : Ctx(Ctx), OrigGCMode(OrigGCMode), MigOptions(), - SemaRef(sema), TA(TA), CapturedDiags(capturedDiags), - ARCMTMacroLocs(ARCMTMacroLocs) { } + : Ctx(Ctx), OrigGCMode(OrigGCMode), SemaRef(sema), TA(TA), + CapturedDiags(capturedDiags), ARCMTMacroLocs(ARCMTMacroLocs) {} const CapturedDiagList &getDiags() const { return CapturedDiags; } diff --git a/clang/lib/ARCMigrate/TransAutoreleasePool.cpp b/clang/lib/ARCMigrate/TransAutoreleasePool.cpp index 393adcd85a3f..47587d81850a 100644 --- a/clang/lib/ARCMigrate/TransAutoreleasePool.cpp +++ b/clang/lib/ARCMigrate/TransAutoreleasePool.cpp @@ -229,8 +229,9 @@ class AutoreleasePoolRewriter bool IsFollowedBySimpleReturnStmt; SmallVector Releases; - PoolScope() : PoolVar(nullptr), CompoundParent(nullptr), Begin(), End(), - IsFollowedBySimpleReturnStmt(false) { } + PoolScope() + : PoolVar(nullptr), CompoundParent(nullptr), + IsFollowedBySimpleReturnStmt(false) {} SourceRange getIndentedRange() const { Stmt::child_iterator rangeS = Begin; diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index 1780358cc348..108113274b8e 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -3272,7 +3272,7 @@ void MSGuidDecl::anchor() {} MSGuidDecl::MSGuidDecl(DeclContext *DC, QualType T, Parts P) : ValueDecl(Decl::MSGuid, DC, SourceLocation(), DeclarationName(), T), - PartVal(P), APVal() {} + PartVal(P) {} MSGuidDecl *MSGuidDecl::Create(const ASTContext &C, QualType T, Parts P) { DeclContext *DC = C.getTranslationUnitDecl(); diff --git a/clang/lib/AST/ExprConcepts.cpp b/clang/lib/AST/ExprConcepts.cpp index 8cb8625e2a1a..c17453fb45fb 100644 --- a/clang/lib/AST/ExprConcepts.cpp +++ b/clang/lib/AST/ExprConcepts.cpp @@ -57,9 +57,9 @@ ConceptSpecializationExpr::ConceptSpecializationExpr( } ConceptSpecializationExpr::ConceptSpecializationExpr(EmptyShell Empty, - unsigned NumTemplateArgs) - : Expr(ConceptSpecializationExprClass, Empty), ConceptReference(), - NumTemplateArgs(NumTemplateArgs) { } + unsigned NumTemplateArgs) + : Expr(ConceptSpecializationExprClass, Empty), + NumTemplateArgs(NumTemplateArgs) {} void ConceptSpecializationExpr::setTemplateArguments( ArrayRef Converted) { diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 105cd7a3506d..3bf205d8cb06 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -1706,8 +1706,8 @@ namespace { struct MemberPtr { MemberPtr() {} - explicit MemberPtr(const ValueDecl *Decl) : - DeclAndIsDerivedMember(Decl, false), Path() {} + explicit MemberPtr(const ValueDecl *Decl) + : DeclAndIsDerivedMember(Decl, false) {} /// The member or (direct or indirect) field referred to by this member /// pointer, or 0 if this is a null member pointer. diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp index 9ef3b5b6277a..9bde034d5e09 100644 --- a/clang/lib/Analysis/CFG.cpp +++ b/clang/lib/Analysis/CFG.cpp @@ -531,9 +531,7 @@ class CFGBuilder { public: explicit CFGBuilder(ASTContext *astContext, const CFG::BuildOptions &buildOpts) - : Context(astContext), cfg(new CFG()), // crew a new CFG - ConstructionContextMap(), BuildOpts(buildOpts) {} - + : Context(astContext), cfg(new CFG()), BuildOpts(buildOpts) {} // buildCFG - Used by external clients to construct the CFG. std::unique_ptr buildCFG(const Decl *D, Stmt *Statement); diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index 646bbe8b7387..ddb20008bc9d 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -25,7 +25,7 @@ using namespace clang; static const LangASMap DefaultAddrSpaceMap = {0}; // TargetInfo Constructor. -TargetInfo::TargetInfo(const llvm::Triple &T) : TargetOpts(), Triple(T) { +TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) { // Set defaults. Defaults are set for a 32-bit RISC platform, like PPC or // SPARC. These should be overridden by concrete targets as needed. BigEndian = !T.isLittleEndian(); diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index bacac0a20d4d..6ce125c02736 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -197,8 +197,7 @@ class PassManagerBuilderWrapper : public PassManagerBuilder { PassManagerBuilderWrapper(const Triple &TargetTriple, const CodeGenOptions &CGOpts, const LangOptions &LangOpts) - : PassManagerBuilder(), TargetTriple(TargetTriple), CGOpts(CGOpts), - LangOpts(LangOpts) {} + : TargetTriple(TargetTriple), CGOpts(CGOpts), LangOpts(LangOpts) {} const Triple &getTargetTriple() const { return TargetTriple; } const CodeGenOptions &getCGOpts() const { return CGOpts; } const LangOptions &getLangOpts() const { return LangOpts; } diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h index c8594068c3fc..8d63739fbbad 100644 --- a/clang/lib/CodeGen/CGCall.h +++ b/clang/lib/CodeGen/CGCall.h @@ -49,11 +49,11 @@ class CGCalleeInfo { GlobalDecl CalleeDecl; public: - explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl() {} + explicit CGCalleeInfo() : CalleeProtoTy(nullptr) {} CGCalleeInfo(const FunctionProtoType *calleeProtoTy, GlobalDecl calleeDecl) : CalleeProtoTy(calleeProtoTy), CalleeDecl(calleeDecl) {} CGCalleeInfo(const FunctionProtoType *calleeProtoTy) - : CalleeProtoTy(calleeProtoTy), CalleeDecl() {} + : CalleeProtoTy(calleeProtoTy) {} CGCalleeInfo(GlobalDecl calleeDecl) : CalleeProtoTy(nullptr), CalleeDecl(calleeDecl) {} diff --git a/clang/lib/CodeGen/CGRecordLayout.h b/clang/lib/CodeGen/CGRecordLayout.h index e6665b72bcba..5a3bcdf72f7b 100644 --- a/clang/lib/CodeGen/CGRecordLayout.h +++ b/clang/lib/CodeGen/CGRecordLayout.h @@ -93,8 +93,8 @@ struct CGBitFieldInfo { CharUnits VolatileStorageOffset; CGBitFieldInfo() - : Offset(), Size(), IsSigned(), StorageSize(), StorageOffset(), - VolatileOffset(), VolatileStorageSize(), VolatileStorageOffset() {} + : Offset(), Size(), IsSigned(), StorageSize(), VolatileOffset(), + VolatileStorageSize() {} CGBitFieldInfo(unsigned Offset, unsigned Size, bool IsSigned, unsigned StorageSize, CharUnits StorageOffset) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 4c11f7d67534..94472668c8e7 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -6789,7 +6789,7 @@ void CodeGenFunction::EmitOMPTargetDataDirective( public: explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) - : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} + : PrivatizeDevicePointers(PrivatizeDevicePointers) {} void Enter(CodeGenFunction &CGF) override { PrivatizeDevicePointers = true; } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index ece863ad1077..17bdbc0bd334 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -242,11 +242,10 @@ class CodeGenFunction : public CodeGenTypeCache { /// A jump destination is an abstract label, branching to which may /// require a jump out through normal cleanups. struct JumpDest { - JumpDest() : Block(nullptr), ScopeDepth(), Index(0) {} - JumpDest(llvm::BasicBlock *Block, - EHScopeStack::stable_iterator Depth, + JumpDest() : Block(nullptr), Index(0) {} + JumpDest(llvm::BasicBlock *Block, EHScopeStack::stable_iterator Depth, unsigned Index) - : Block(Block), ScopeDepth(Depth), Index(Index) {} + : Block(Block), ScopeDepth(Depth), Index(Index) {} bool isValid() const { return Block != nullptr; } llvm::BasicBlock *getBlock() const { return Block; } diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index ab953c2c7d52..6657f2a91e3d 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -131,7 +131,7 @@ class PGOHash { static_assert(LastHashType <= TooBig, "Too many types in HashType"); PGOHash(PGOHashVersion HashVersion) - : Working(0), Count(0), HashVersion(HashVersion), MD5() {} + : Working(0), Count(0), HashVersion(HashVersion) {} void combine(HashType Type); uint64_t finalize(); PGOHashVersion getHashVersion() const { return HashVersion; } diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index a73421c0535d..4ac48cc28016 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -170,10 +170,8 @@ Driver::Driver(StringRef ClangExecutable, StringRef TargetTriple, : Diags(Diags), VFS(std::move(VFS)), Mode(GCCMode), SaveTemps(SaveTempsNone), BitcodeEmbed(EmbedNone), LTOMode(LTOK_None), ClangExecutable(ClangExecutable), SysRoot(DEFAULT_SYSROOT), - DriverTitle(Title), CCPrintStatReportFilename(), CCPrintOptionsFilename(), - CCPrintHeadersFilename(), CCLogDiagnosticsFilename(), - CCCPrintBindings(false), CCPrintOptions(false), CCPrintHeaders(false), - CCLogDiagnostics(false), CCGenDiagnostics(false), + DriverTitle(Title), CCCPrintBindings(false), CCPrintOptions(false), + CCPrintHeaders(false), CCLogDiagnostics(false), CCGenDiagnostics(false), CCPrintProcessStats(false), TargetTriple(TargetTriple), Saver(Alloc), CheckInputsExist(true), GenReproducer(false), SuppressMissingInputWarning(false) { diff --git a/clang/lib/Frontend/MultiplexConsumer.cpp b/clang/lib/Frontend/MultiplexConsumer.cpp index 5abbb3a235b4..34bbc365e647 100644 --- a/clang/lib/Frontend/MultiplexConsumer.cpp +++ b/clang/lib/Frontend/MultiplexConsumer.cpp @@ -236,10 +236,10 @@ void MultiplexASTMutationListener::AddedAttributeToRecord( MultiplexConsumer::MultiplexConsumer( std::vector> C) - : Consumers(std::move(C)), MutationListener(), DeserializationListener() { + : Consumers(std::move(C)) { // Collect the mutation listeners and deserialization listeners of all // children, and create a multiplex listener each if so. - std::vector mutationListeners; + std::vector mutationListeners; std::vector serializationListeners; for (auto &Consumer : Consumers) { if (auto *mutationListener = Consumer->GetASTMutationListener()) diff --git a/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp b/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp index 462aeda6e027..fc8fce4b42b8 100644 --- a/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp +++ b/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp @@ -95,8 +95,7 @@ class SDiagsMerger : SerializedDiagnosticReader { AbbrevLookup DiagFlagLookup; public: - SDiagsMerger(SDiagsWriter &Writer) - : SerializedDiagnosticReader(), Writer(Writer) {} + SDiagsMerger(SDiagsWriter &Writer) : Writer(Writer) {} std::error_code mergeRecordsFromFile(const char *File) { return readDiagnostics(File); diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index b4dcc9759b99..ac5ad52c0b1d 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -128,7 +128,7 @@ class LogicalErrorHandler : public CFGCallback { Sema &S; public: - LogicalErrorHandler(Sema &S) : CFGCallback(), S(S) {} + LogicalErrorHandler(Sema &S) : S(S) {} static bool HasMacroID(const Expr *E) { if (E->getExprLoc().isMacroID()) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 4e83fa1fffca..03608a339e55 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -14021,7 +14021,7 @@ class SequenceChecker : public ConstEvaluatedExprVisitor { const Expr *UsageExpr; SequenceTree::Seq Seq; - Usage() : UsageExpr(nullptr), Seq() {} + Usage() : UsageExpr(nullptr) {} }; struct UsageInfo { @@ -14030,7 +14030,7 @@ class SequenceChecker : public ConstEvaluatedExprVisitor { /// Have we issued a diagnostic for this object already? bool Diagnosed; - UsageInfo() : Uses(), Diagnosed(false) {} + UsageInfo() : Diagnosed(false) {} }; using UsageInfoMap = llvm::SmallDenseMap; diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index 93c07ccc891f..be492b5f3607 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -98,7 +98,7 @@ class ResultBuilder { unsigned SingleDeclIndex; public: - ShadowMapEntry() : DeclOrVector(), SingleDeclIndex(0) {} + ShadowMapEntry() : SingleDeclIndex(0) {} ShadowMapEntry(const ShadowMapEntry &) = delete; ShadowMapEntry(ShadowMapEntry &&Move) { *this = std::move(Move); } ShadowMapEntry &operator=(const ShadowMapEntry &) = delete; diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index e9636d2b942e..22dd395d9943 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -4452,7 +4452,7 @@ namespace { public: SubstituteDeducedTypeTransform(Sema &SemaRef, DependentAuto DA) - : TreeTransform(SemaRef), Replacement(), + : TreeTransform(SemaRef), ReplacementIsPack(DA.IsPack), UseTypeSugar(true) {} SubstituteDeducedTypeTransform(Sema &SemaRef, QualType Replacement, diff --git a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp index 1ccb0de92fba..8d4e0bbb7dec 100644 --- a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp +++ b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp @@ -54,11 +54,7 @@ ProgramState::ProgramState(ProgramStateManager *mgr, const Environment& env, } ProgramState::ProgramState(const ProgramState &RHS) - : llvm::FoldingSetNode(), - stateMgr(RHS.stateMgr), - Env(RHS.Env), - store(RHS.store), - GDM(RHS.GDM), + : stateMgr(RHS.stateMgr), Env(RHS.Env), store(RHS.store), GDM(RHS.GDM), refCount(0) { stateMgr->getStoreManager().incrementReferenceCount(store); } diff --git a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp index f9ccbd36dc40..ce673628866a 100644 --- a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp +++ b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp @@ -356,7 +356,7 @@ class BinaryFileHandler final : public FileHandler { std::string CurWriteBundleTarget; public: - BinaryFileHandler() : FileHandler() {} + BinaryFileHandler() {} ~BinaryFileHandler() final {} @@ -576,8 +576,7 @@ class ObjectFileHandler final : public FileHandler { public: ObjectFileHandler(std::unique_ptr ObjIn) - : FileHandler(), Obj(std::move(ObjIn)), - CurrentSection(Obj->section_begin()), + : Obj(std::move(ObjIn)), CurrentSection(Obj->section_begin()), NextSection(Obj->section_begin()) {} ~ObjectFileHandler() final {} @@ -813,8 +812,7 @@ class TextFileHandler final : public FileHandler { } public: - TextFileHandler(StringRef Comment) - : FileHandler(), Comment(Comment), ReadChars(0) { + TextFileHandler(StringRef Comment) : Comment(Comment), ReadChars(0) { BundleStartString = "\n" + Comment.str() + " " OFFLOAD_BUNDLER_MAGIC_STR "__START__ "; BundleEndString = diff --git a/clang/tools/libclang/CXIndexDataConsumer.h b/clang/tools/libclang/CXIndexDataConsumer.h index ace9d59bf045..8e6045dbf6bc 100644 --- a/clang/tools/libclang/CXIndexDataConsumer.h +++ b/clang/tools/libclang/CXIndexDataConsumer.h @@ -332,10 +332,9 @@ class CXIndexDataConsumer : public index::IndexDataConsumer { public: CXIndexDataConsumer(CXClientData clientData, IndexerCallbacks &indexCallbacks, - unsigned indexOptions, CXTranslationUnit cxTU) - : Ctx(nullptr), ClientData(clientData), CB(indexCallbacks), - IndexOptions(indexOptions), CXTU(cxTU), - StrScratch(), StrAdapterCount(0) { } + unsigned indexOptions, CXTranslationUnit cxTU) + : Ctx(nullptr), ClientData(clientData), CB(indexCallbacks), + IndexOptions(indexOptions), CXTU(cxTU), StrAdapterCount(0) {} ASTContext &getASTContext() const { return *Ctx; } CXTranslationUnit getCXTU() const { return CXTU; } diff --git a/clang/tools/libclang/CXLoadedDiagnostic.cpp b/clang/tools/libclang/CXLoadedDiagnostic.cpp index b3dcf977b921..bb6942a45f46 100644 --- a/clang/tools/libclang/CXLoadedDiagnostic.cpp +++ b/clang/tools/libclang/CXLoadedDiagnostic.cpp @@ -235,7 +235,7 @@ class DiagLoader : serialized_diags::SerializedDiagnosticReader { public: DiagLoader(enum CXLoadDiag_Error *e, CXString *es) - : SerializedDiagnosticReader(), error(e), errorString(es) { + : error(e), errorString(es) { if (error) *error = CXLoadDiag_None; if (errorString) From 6e0a333f7120cb2a327c62c3983d7fe9b6ffca92 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 2 Jan 2022 19:09:30 +0000 Subject: [PATCH 354/992] [LV] Use Builder.CreateVectorReverse directly. (NFC) IRBuilder::CreateVectorReverse already handles all cases required by LoopVectorize. It can be used directly instead of reverseVector. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e9db5042753f..bb5691842111 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -633,9 +633,6 @@ class InnerLoopVectorizer { /// Returns true if we should generate a scalar version of \p IV. bool needsScalarInduction(Instruction *IV) const; - /// Generate a shuffle sequence that will reverse the vector Vec. - virtual Value *reverseVector(Value *Vec); - /// Returns (and creates if needed) the original loop trip count. Value *getOrCreateTripCount(Loop *NewLoop); @@ -849,7 +846,6 @@ class InnerLoopUnroller : public InnerLoopVectorizer { private: Value *getBroadcastInstrs(Value *V) override; - Value *reverseVector(Value *Vec) override; }; /// Encapsulate information regarding vectorization of a loop and its epilogue. @@ -2701,11 +2697,6 @@ void InnerLoopVectorizer::packScalarIntoVectorValue(VPValue *Def, State.set(Def, VectorValue, Instance.Part); } -Value *InnerLoopVectorizer::reverseVector(Value *Vec) { - assert(Vec->getType()->isVectorTy() && "Invalid type"); - return Builder.CreateVectorReverse(Vec, "reverse"); -} - // Return whether we allow using masked interleave-groups (for dealing with // strided loads/stores that reside in predicated blocks, or for dealing // with gaps). @@ -2868,7 +2859,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( } if (Group->isReverse()) - StridedVec = reverseVector(StridedVec); + StridedVec = Builder.CreateVectorReverse(StridedVec, "reverse"); State.set(VPDefs[J], StridedVec, Part); } @@ -2904,7 +2895,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( Value *StoredVec = State.get(StoredValues[i], Part); if (Group->isReverse()) - StoredVec = reverseVector(StoredVec); + StoredVec = Builder.CreateVectorReverse(StoredVec, "reverse"); // If this member has different type, cast it to a unified type. @@ -8042,8 +8033,6 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions( } } -Value *InnerLoopUnroller::reverseVector(Value *Vec) { return Vec; } - Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; } static void AddRuntimeUnrollDisableMetaData(Loop *L) { From f28c8e46c98e816b3834b26b1c9b027016e2f16d Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Sun, 2 Jan 2022 11:27:18 -0800 Subject: [PATCH 355/992] Autogen a SCEV test for ease of update --- .../overflow-intrinsics-trip-count.ll | 63 +++++++++++++++---- 1 file changed, 50 insertions(+), 13 deletions(-) diff --git a/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll index 641f911129fb..d84e0ca21178 100644 --- a/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll +++ b/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s declare { i16, i1 } @llvm.sadd.with.overflow.i16(i16, i16) nounwind readnone declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) nounwind readnone @@ -7,9 +8,15 @@ declare { i16, i1 } @llvm.usub.with.overflow.i16(i16, i16) nounwind readnone declare { i16, i1 } @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone declare { i16, i1 } @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone -; CHECK-LABEL: Classifying expressions for: @uadd_exhaustive -; CHECK: Loop %for.body: backedge-taken count is 35 define void @uadd_exhaustive() { +; CHECK-LABEL: 'uadd_exhaustive' +; CHECK-NEXT: Determining loop execution counts for: @uadd_exhaustive +; CHECK-NEXT: Loop %for.body: backedge-taken count is 35 +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 35 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is 35 +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 36 +; entry: br i1 undef, label %for.end, label %for.body.preheader @@ -27,9 +34,15 @@ for.end: ; preds = %for.body, %entry ret void } -; CHECK-LABEL: Classifying expressions for: @sadd_exhaustive -; CHECK: Loop %for.body: backedge-taken count is 67 define void @sadd_exhaustive() { +; CHECK-LABEL: 'sadd_exhaustive' +; CHECK-NEXT: Determining loop execution counts for: @sadd_exhaustive +; CHECK-NEXT: Loop %for.body: backedge-taken count is 67 +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 67 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is 67 +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 68 +; entry: br i1 undef, label %for.end, label %for.body.preheader @@ -47,9 +60,15 @@ for.end: ; preds = %for.body, %entry ret void } -; CHECK-LABEL: Classifying expressions for: @usub_exhaustive -; CHECK: Loop %for.body: backedge-taken count is 50 define void @usub_exhaustive() { +; CHECK-LABEL: 'usub_exhaustive' +; CHECK-NEXT: Determining loop execution counts for: @usub_exhaustive +; CHECK-NEXT: Loop %for.body: backedge-taken count is 50 +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 50 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is 50 +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 51 +; entry: br i1 undef, label %for.end, label %for.body.preheader @@ -67,9 +86,15 @@ for.end: ; preds = %for.body, %entry ret void } -; CHECK-LABEL: Classifying expressions for: @ssub_exhaustive -; CHECK: Loop %for.body: backedge-taken count is 68 define void @ssub_exhaustive() { +; CHECK-LABEL: 'ssub_exhaustive' +; CHECK-NEXT: Determining loop execution counts for: @ssub_exhaustive +; CHECK-NEXT: Loop %for.body: backedge-taken count is 68 +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 68 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is 68 +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 69 +; entry: br i1 undef, label %for.end, label %for.body.preheader @@ -87,9 +112,15 @@ for.end: ; preds = %for.body, %entry ret void } -; CHECK-LABEL: Classifying expressions for: @smul_exhaustive -; CHECK: Loop %for.body: backedge-taken count is 14 define void @smul_exhaustive() { +; CHECK-LABEL: 'smul_exhaustive' +; CHECK-NEXT: Determining loop execution counts for: @smul_exhaustive +; CHECK-NEXT: Loop %for.body: backedge-taken count is 14 +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 14 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is 14 +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 15 +; entry: br i1 undef, label %for.end, label %for.body.preheader @@ -107,9 +138,15 @@ for.end: ; preds = %for.body, %entry ret void } -; CHECK-LABEL: Classifying expressions for: @umul_exhaustive -; CHECK: Loop %for.body: backedge-taken count is 15 define void @umul_exhaustive() { +; CHECK-LABEL: 'umul_exhaustive' +; CHECK-NEXT: Determining loop execution counts for: @umul_exhaustive +; CHECK-NEXT: Loop %for.body: backedge-taken count is 15 +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 15 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is 15 +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 16 +; entry: br i1 undef, label %for.end, label %for.body.preheader From a553969712edeedbaf1b2c8c570869ec43b5dd32 Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Sat, 1 Jan 2022 19:30:08 -0800 Subject: [PATCH 356/992] [lldb] Remove unused AproposAllSubCommands (NFC) --- lldb/include/lldb/Interpreter/CommandObject.h | 5 --- .../lldb/Interpreter/CommandObjectMultiword.h | 10 ------ .../Commands/CommandObjectMultiword.cpp | 35 ------------------- 3 files changed, 50 deletions(-) diff --git a/lldb/include/lldb/Interpreter/CommandObject.h b/lldb/include/lldb/Interpreter/CommandObject.h index 89cc161993a9..bfddf559e5fe 100644 --- a/lldb/include/lldb/Interpreter/CommandObject.h +++ b/lldb/include/lldb/Interpreter/CommandObject.h @@ -172,11 +172,6 @@ class CommandObject { return nullptr; } - virtual void AproposAllSubCommands(llvm::StringRef prefix, - llvm::StringRef search_word, - StringList &commands_found, - StringList &commands_help) {} - void FormatLongHelpText(Stream &output_strm, llvm::StringRef long_help); void GenerateHelpText(CommandReturnObject &result); diff --git a/lldb/include/lldb/Interpreter/CommandObjectMultiword.h b/lldb/include/lldb/Interpreter/CommandObjectMultiword.h index a0e8d163c4b6..ab580c11eb4f 100644 --- a/lldb/include/lldb/Interpreter/CommandObjectMultiword.h +++ b/lldb/include/lldb/Interpreter/CommandObjectMultiword.h @@ -51,11 +51,6 @@ class CommandObjectMultiword : public CommandObject { CommandObject *GetSubcommandObject(llvm::StringRef sub_cmd, StringList *matches = nullptr) override; - void AproposAllSubCommands(llvm::StringRef prefix, - llvm::StringRef search_word, - StringList &commands_found, - StringList &commands_help) override; - bool WantsRawCommandString() override { return false; } void HandleCompletion(CompletionRequest &request) override; @@ -110,11 +105,6 @@ class CommandObjectProxy : public CommandObject { CommandObject *GetSubcommandObject(llvm::StringRef sub_cmd, StringList *matches = nullptr) override; - void AproposAllSubCommands(llvm::StringRef prefix, - llvm::StringRef search_word, - StringList &commands_found, - StringList &commands_help) override; - bool LoadSubCommand(llvm::StringRef cmd_name, const lldb::CommandObjectSP &command_obj) override; diff --git a/lldb/source/Commands/CommandObjectMultiword.cpp b/lldb/source/Commands/CommandObjectMultiword.cpp index e800bcc12bd3..0629342748aa 100644 --- a/lldb/source/Commands/CommandObjectMultiword.cpp +++ b/lldb/source/Commands/CommandObjectMultiword.cpp @@ -302,31 +302,6 @@ const char *CommandObjectMultiword::GetRepeatCommand(Args ¤t_command_args, return sub_command_object->GetRepeatCommand(current_command_args, index); } -void CommandObjectMultiword::AproposAllSubCommands(llvm::StringRef prefix, - llvm::StringRef search_word, - StringList &commands_found, - StringList &commands_help) { - CommandObject::CommandMap::const_iterator pos; - - for (pos = m_subcommand_dict.begin(); pos != m_subcommand_dict.end(); ++pos) { - const char *command_name = pos->first.c_str(); - CommandObject *sub_cmd_obj = pos->second.get(); - StreamString complete_command_name; - - complete_command_name << prefix << " " << command_name; - - if (sub_cmd_obj->HelpTextContainsWord(search_word)) { - commands_found.AppendString(complete_command_name.GetString()); - commands_help.AppendString(sub_cmd_obj->GetHelp()); - } - - if (sub_cmd_obj->IsMultiwordObject()) - sub_cmd_obj->AproposAllSubCommands(complete_command_name.GetString(), - search_word, commands_found, - commands_help); - } -} - CommandObjectProxy::CommandObjectProxy(CommandInterpreter &interpreter, const char *name, const char *help, const char *syntax, uint32_t flags) @@ -409,16 +384,6 @@ CommandObject *CommandObjectProxy::GetSubcommandObject(llvm::StringRef sub_cmd, return nullptr; } -void CommandObjectProxy::AproposAllSubCommands(llvm::StringRef prefix, - llvm::StringRef search_word, - StringList &commands_found, - StringList &commands_help) { - CommandObject *proxy_command = GetProxyCommandObject(); - if (proxy_command) - return proxy_command->AproposAllSubCommands(prefix, search_word, - commands_found, commands_help); -} - bool CommandObjectProxy::LoadSubCommand( llvm::StringRef cmd_name, const lldb::CommandObjectSP &command_sp) { CommandObject *proxy_command = GetProxyCommandObject(); From 65035e0d06762a80aaf54a21b0e643088e44d411 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Sun, 2 Jan 2022 11:37:06 -0800 Subject: [PATCH 357/992] Precommit SCEV symbolic w.overflow exit tests --- .../overflow-intrinsics-trip-count.ll | 199 ++++++++++++++++++ 1 file changed, 199 insertions(+) diff --git a/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll index d84e0ca21178..de05551ab7df 100644 --- a/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll +++ b/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll @@ -163,3 +163,202 @@ for.body: ; preds = %for.body.preheader, for.end: ; preds = %for.body, %entry ret void } + +define void @uadd_symbolic_start(i16 %start) { +; CHECK-LABEL: 'uadd_symbolic_start' +; CHECK-NEXT: Determining loop execution counts for: @uadd_symbolic_start +; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable predicated backedge-taken count. +; +entry: + br i1 undef, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i16 [ %math, %for.body ], [ %start, %for.body.preheader ] + %0 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %indvars.iv, i16 1) + %math = extractvalue { i16, i1 } %0, 0 + %ov = extractvalue { i16, i1 } %0, 1 + br i1 %ov, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @sadd_symbolic_start(i16 %start) { +; CHECK-LABEL: 'sadd_symbolic_start' +; CHECK-NEXT: Determining loop execution counts for: @sadd_symbolic_start +; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable predicated backedge-taken count. +; +entry: + br i1 undef, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i16 [ %math, %for.body ], [ %start, %for.body.preheader ] + %0 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 %indvars.iv, i16 1) + %math = extractvalue { i16, i1 } %0, 0 + %ov = extractvalue { i16, i1 } %0, 1 + br i1 %ov, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @sadd_symbolic_start2(i16 %start) { +; CHECK-LABEL: 'sadd_symbolic_start2' +; CHECK-NEXT: Determining loop execution counts for: @sadd_symbolic_start2 +; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable predicated backedge-taken count. +; +entry: + br i1 undef, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i16 [ %math, %for.body ], [ %start, %for.body.preheader ] + %0 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 %indvars.iv, i16 1) + %math = extractvalue { i16, i1 } %0, 0 + %ov = extractvalue { i16, i1 } %0, 1 + %not = xor i1 true, %ov + br i1 %not, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @usub_symbolic_start(i16 %start) { +; CHECK-LABEL: 'usub_symbolic_start' +; CHECK-NEXT: Determining loop execution counts for: @usub_symbolic_start +; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable predicated backedge-taken count. +; +entry: + br i1 undef, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i16 [ %math, %for.body ], [ %start, %for.body.preheader ] + %0 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 %indvars.iv, i16 1) + %math = extractvalue { i16, i1 } %0, 0 + %ov = extractvalue { i16, i1 } %0, 1 + br i1 %ov, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @ssub_symbolic_start(i16 %start) { +; CHECK-LABEL: 'ssub_symbolic_start' +; CHECK-NEXT: Determining loop execution counts for: @ssub_symbolic_start +; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable predicated backedge-taken count. +; +entry: + br i1 undef, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i16 [ %math, %for.body ], [ %start, %for.body.preheader ] + %0 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 %indvars.iv, i16 1) + %math = extractvalue { i16, i1 } %0, 0 + %ov = extractvalue { i16, i1 } %0, 1 + br i1 %ov, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @smul_symbolic_start(i16 %start) { +; CHECK-LABEL: 'smul_symbolic_start' +; CHECK-NEXT: Determining loop execution counts for: @smul_symbolic_start +; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable predicated backedge-taken count. +; +entry: + br i1 undef, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i16 [ %math, %for.body ], [ %start, %for.body.preheader ] + %0 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 %indvars.iv, i16 2) + %math = extractvalue { i16, i1 } %0, 0 + %ov = extractvalue { i16, i1 } %0, 1 + br i1 %ov, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @umul_symbolic_start(i16 %start) { +; CHECK-LABEL: 'umul_symbolic_start' +; CHECK-NEXT: Determining loop execution counts for: @umul_symbolic_start +; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable predicated backedge-taken count. +; +entry: + br i1 undef, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i16 [ %math, %for.body ], [ %start, %for.body.preheader ] + %0 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 %indvars.iv, i16 2) + %math = extractvalue { i16, i1 } %0, 0 + %ov = extractvalue { i16, i1 } %0, 1 + br i1 %ov, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @sadd_symbolic_non_latch(i16 %start) { +; CHECK-LABEL: 'sadd_symbolic_non_latch' +; CHECK-NEXT: Determining loop execution counts for: @sadd_symbolic_non_latch +; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. +; CHECK-NEXT: exit count for for.body: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: exit count for for.latch: (230 + (-1 * %start)) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is -1 +; CHECK-NEXT: Loop %for.body: Unpredictable predicated backedge-taken count. +; +entry: + br i1 undef, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i16 [ %math, %for.latch ], [ %start, %for.body.preheader ] + %0 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 %indvars.iv, i16 1) + %math = extractvalue { i16, i1 } %0, 0 + %ov = extractvalue { i16, i1 } %0, 1 + br i1 %ov, label %for.end, label %for.latch + +for.latch: + %cmp = icmp eq i16 %math, 231 + br i1 %cmp, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} From b50fea47b6c454581fce89af359f3afe5154986c Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Sat, 25 Dec 2021 13:30:42 -0500 Subject: [PATCH 358/992] [clang] Allow using std::coroutine_traits in std::experimental This is that diff I was aiming for. When transitioning code from coroutines-ts to c++20, it can be useful to add a using declaration to std::experimental pointing to std::coroutine_traits. This permits that use by checking whether lookup in std::experimentl finds a different decl to lookup in std. You still get a warning about std::experimental::coroutine_traits being a thing, just not an error. Reviewed By: ChuanqiXu Differential Revision: https://reviews.llvm.org/D115943 --- .../clang/Basic/DiagnosticSemaKinds.td | 2 +- clang/lib/Sema/SemaCoroutine.cpp | 5 +-- .../coroutine-mixed3-exp-namespace.cpp | 30 +++++++++++++++++ .../coroutine-mixed4-exp-namespace.cpp | 32 +++++++++++++++++++ 4 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 clang/test/SemaCXX/coroutine-mixed3-exp-namespace.cpp create mode 100644 clang/test/SemaCXX/coroutine-mixed4-exp-namespace.cpp diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 8ef9195944d5..afc63d480627 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11047,7 +11047,7 @@ def warn_deprecated_coroutine_namespace : Warning< "use std::%0 instead">, InGroup; def err_mixed_use_std_and_experimental_namespace_for_coroutine : Error< - "mixed use of std and std::experimental namespaces for " + "conflicting mixed use of std and std::experimental namespaces for " "coroutine components">; def err_implicit_coroutine_std_nothrow_type_not_found : Error< "std::nothrow was not found; include before defining a coroutine which " diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index 3a6d9f0b9f26..f5f78c02e370 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -1761,8 +1761,9 @@ ClassTemplateDecl *Sema::lookupCoroutineTraits(SourceLocation KwLoc, auto *Found = *ResExp.begin(); Diag(Found->getLocation(), diag::note_entity_declared_at) << Found; - if (InStd) { - // Also found in std + if (InStd && + StdCoroutineTraitsCache != ResExp.getAsSingle()) { + // Also found something different in std Diag(KwLoc, diag::err_mixed_use_std_and_experimental_namespace_for_coroutine); Diag(StdCoroutineTraitsCache->getLocation(), diff --git a/clang/test/SemaCXX/coroutine-mixed3-exp-namespace.cpp b/clang/test/SemaCXX/coroutine-mixed3-exp-namespace.cpp new file mode 100644 index 000000000000..533f9d78e278 --- /dev/null +++ b/clang/test/SemaCXX/coroutine-mixed3-exp-namespace.cpp @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 -verify -std=c++20 -fsyntax-only %s + +#include "Inputs/std-coroutine.h" + +namespace std::experimental { +using std::coroutine_handle; +using std::coroutine_traits; // expected-note{{declared here}} +} // namespace std::experimental + +struct my_awaitable { + bool await_ready() noexcept; + void await_suspend(std::coroutine_handle<> coro) noexcept; + void await_resume() noexcept; +}; + +struct promise_void { + void get_return_object(); + my_awaitable initial_suspend(); + my_awaitable final_suspend() noexcept; + void return_void(); + void unhandled_exception(); +}; + +template <> +struct std::coroutine_traits { using promise_type = promise_void; }; + +void test() { + co_return; + // expected-warning@-1{{support for std::experimental::coroutine_traits will be removed}} +} diff --git a/clang/test/SemaCXX/coroutine-mixed4-exp-namespace.cpp b/clang/test/SemaCXX/coroutine-mixed4-exp-namespace.cpp new file mode 100644 index 000000000000..715282dd2df8 --- /dev/null +++ b/clang/test/SemaCXX/coroutine-mixed4-exp-namespace.cpp @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 -verify -std=c++20 -fsyntax-only %s + +#include "Inputs/std-coroutine.h" + +namespace std::experimental { +// expected-note@+1{{declared here}} +template using coroutine_traits = std::coroutine_traits; +using std::coroutine_handle; +} // namespace std::experimental + +struct my_awaitable { + bool await_ready() noexcept; + void await_suspend(std::experimental::coroutine_handle<> coro) noexcept; + void await_resume() noexcept; +}; + +struct promise_void { + void get_return_object(); + my_awaitable initial_suspend(); + my_awaitable final_suspend() noexcept; + void return_void(); + void unhandled_exception(); +}; + +template <> +struct std::coroutine_traits { using promise_type = promise_void; }; + +void test() { + co_return; // expected-error {{mixed use of std and std::experimental namespaces for coroutine components}} + // expected-warning@-1{{support for std::experimental::coroutine_traits will be removed}} + // expected-note@Inputs/std-coroutine.h:8 {{'coroutine_traits' declared here}} +} From 5caee2176ae79fb923de174bf229d9b75eb37add Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 22:01:07 +0000 Subject: [PATCH 359/992] Apply clang-tidy fixes for bugprone-argument-comment to MLIR ArithmeticOps.cpp (NFC) --- mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp index 98f617082f21..91cbf4bdb528 100644 --- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp +++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp @@ -905,7 +905,7 @@ OpFoldResult arith::UIToFPOp::fold(ArrayRef operands) { FloatType floatTy = getType().cast(); APFloat apf(floatTy.getFloatSemantics(), APInt::getZero(floatTy.getWidth())); - apf.convertFromAPInt(api, /*signed=*/false, APFloat::rmNearestTiesToEven); + apf.convertFromAPInt(api, /*IsSigned=*/false, APFloat::rmNearestTiesToEven); return FloatAttr::get(floatTy, apf); } return {}; @@ -925,7 +925,7 @@ OpFoldResult arith::SIToFPOp::fold(ArrayRef operands) { FloatType floatTy = getType().cast(); APFloat apf(floatTy.getFloatSemantics(), APInt::getZero(floatTy.getWidth())); - apf.convertFromAPInt(api, /*signed=*/true, APFloat::rmNearestTiesToEven); + apf.convertFromAPInt(api, /*IsSigned=*/true, APFloat::rmNearestTiesToEven); return FloatAttr::get(floatTy, apf); } return {}; @@ -943,7 +943,7 @@ OpFoldResult arith::FPToUIOp::fold(ArrayRef operands) { const APFloat &apf = lhs.getValue(); IntegerType intTy = getType().cast(); bool ignored; - APSInt api(intTy.getWidth(), /*unsigned=*/true); + APSInt api(intTy.getWidth(), /*isUnsigned=*/true); if (APFloat::opInvalidOp == apf.convertToInteger(api, APFloat::rmTowardZero, &ignored)) { // Undefined behavior invoked - the destination type can't represent @@ -969,7 +969,7 @@ OpFoldResult arith::FPToSIOp::fold(ArrayRef operands) { const APFloat &apf = lhs.getValue(); IntegerType intTy = getType().cast(); bool ignored; - APSInt api(intTy.getWidth(), /*unsigned=*/false); + APSInt api(intTy.getWidth(), /*isUnsigned=*/false); if (APFloat::opInvalidOp == apf.convertToInteger(api, APFloat::rmTowardZero, &ignored)) { // Undefined behavior invoked - the destination type can't represent From f829d62c219c6b880f0106a4a75c0f8640fcfe54 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 22:01:48 +0000 Subject: [PATCH 360/992] Apply clang-tidy fixes for modernize-use-default-member-init to MLIR ReductionNode.cpp (NFC) --- mlir/lib/Reducer/ReductionNode.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mlir/lib/Reducer/ReductionNode.cpp b/mlir/lib/Reducer/ReductionNode.cpp index 9f0f461d676e..83892be4d4f5 100644 --- a/mlir/lib/Reducer/ReductionNode.cpp +++ b/mlir/lib/Reducer/ReductionNode.cpp @@ -28,8 +28,7 @@ ReductionNode::ReductionNode( llvm::SpecificBumpPtrAllocator &allocator) /// Root node will have the parent pointer point to themselves. : parent(parentNode == nullptr ? this : parentNode), - size(std::numeric_limits::max()), - interesting(Tester::Interestingness::Untested), ranges(ranges), + size(std::numeric_limits::max()), ranges(ranges), startRanges(ranges), allocator(allocator) { if (parent != this) if (failed(initialize(parent->getModule(), parent->getRegion()))) From abb336d26b5e91c991f031dd7291cdac07154cb8 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 22:01:50 +0000 Subject: [PATCH 361/992] Apply clang-tidy fixes for modernize-use-equals-default to MLIR (NFC) --- mlir/lib/Analysis/Liveness.cpp | 2 +- .../Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp | 3 +-- .../lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp | 3 +-- mlir/lib/Pass/PassRegistry.cpp | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Analysis/Liveness.cpp b/mlir/lib/Analysis/Liveness.cpp index 64ff321cb1e6..d904ec0d3966 100644 --- a/mlir/lib/Analysis/Liveness.cpp +++ b/mlir/lib/Analysis/Liveness.cpp @@ -27,7 +27,7 @@ struct BlockInfoBuilder { using ValueSetT = Liveness::ValueSetT; /// Constructs an empty block builder. - BlockInfoBuilder() {} + BlockInfoBuilder() = default; /// Fills the block builder with initial liveness information. BlockInfoBuilder(Block *block) : block(block) { diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp index 6425c058f0b8..7ee9caf004af 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp @@ -36,8 +36,7 @@ struct LinalgComprehensiveModuleBufferize LinalgComprehensiveModuleBufferize() = default; LinalgComprehensiveModuleBufferize( - const LinalgComprehensiveModuleBufferize &p) - : LinalgComprehensiveModuleBufferizeBase(p) {} + const LinalgComprehensiveModuleBufferize &p) = default; void runOnOperation() override; diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp index 61d90c031f3d..fbd526ba51ff 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp @@ -33,8 +33,7 @@ namespace { struct SparsificationPass : public SparsificationBase { SparsificationPass() = default; - SparsificationPass(const SparsificationPass &pass) - : SparsificationBase(pass) {} + SparsificationPass(const SparsificationPass &pass) = default; /// Returns parallelization strategy given on command line. SparseParallelizationStrategy parallelOption() { diff --git a/mlir/lib/Pass/PassRegistry.cpp b/mlir/lib/Pass/PassRegistry.cpp index 933d77bbef12..c6f501044600 100644 --- a/mlir/lib/Pass/PassRegistry.cpp +++ b/mlir/lib/Pass/PassRegistry.cpp @@ -508,7 +508,7 @@ namespace { /// This struct represents the possible data entries in a parsed pass pipeline /// list. struct PassArgData { - PassArgData() {} + PassArgData() = default; PassArgData(const PassRegistryEntry *registryEntry) : registryEntry(registryEntry) {} From e4853be2f130c3e27f6c84fc4ad1d66d8b9a3810 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 22:02:14 +0000 Subject: [PATCH 362/992] Apply clang-tidy fixes for performance-for-range-copy to MLIR (NFC) --- mlir/lib/Analysis/AffineStructures.cpp | 2 +- mlir/lib/Analysis/LoopAnalysis.cpp | 3 ++- mlir/lib/Analysis/NumberOfExecutions.cpp | 2 +- mlir/lib/Analysis/SliceAnalysis.cpp | 2 +- mlir/lib/Bindings/Python/IRCore.cpp | 8 +++---- .../Conversion/GPUCommon/GPUOpsLowering.cpp | 6 ++--- .../GPUCommon/GPUToLLVMConversion.cpp | 2 +- mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp | 2 +- .../ConvertLaunchFuncToVulkanCalls.cpp | 2 +- mlir/lib/Conversion/LLVMCommon/Pattern.cpp | 4 ++-- .../Conversion/LLVMCommon/VectorPattern.cpp | 2 +- .../Conversion/MemRefToLLVM/MemRefToLLVM.cpp | 3 ++- .../PDLToPDLInterp/PDLToPDLInterp.cpp | 4 ++-- .../PDLToPDLInterp/PredicateTree.cpp | 4 ++-- mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp | 2 +- mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp | 2 +- .../ConvertLaunchFuncToLLVMCalls.cpp | 2 +- .../StandardToLLVM/StandardToLLVM.cpp | 2 +- .../Conversion/TosaToLinalg/TosaToLinalg.cpp | 4 ++-- .../Conversion/VectorToGPU/VectorToGPU.cpp | 4 ++-- .../VectorToLLVM/ConvertVectorToLLVM.cpp | 5 +++-- mlir/lib/Dialect/Affine/IR/AffineOps.cpp | 2 +- mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 2 +- .../GPU/Transforms/KernelOutlining.cpp | 4 ++-- .../GPU/Transforms/MemoryPromotion.cpp | 2 +- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 2 +- .../Linalg/Analysis/DependenceAnalysis.cpp | 2 +- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 10 ++++----- .../Dialect/Linalg/Transforms/Bufferize.cpp | 2 +- .../Linalg/Transforms/DropUnitDims.cpp | 8 +++---- .../Linalg/Transforms/ElementwiseOpFusion.cpp | 16 +++++++------- mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp | 8 +++---- .../Linalg/Transforms/FusionOnTensors.cpp | 4 ++-- .../Dialect/Linalg/Transforms/Hoisting.cpp | 2 +- mlir/lib/Dialect/Linalg/Transforms/Loops.cpp | 2 +- .../Dialect/Linalg/Transforms/Promotion.cpp | 4 ++-- mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp | 2 +- .../Dialect/Linalg/Transforms/Transforms.cpp | 8 +++---- .../Linalg/Transforms/Vectorization.cpp | 2 +- mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 12 +++++----- mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp | 10 ++++----- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 2 +- mlir/lib/Dialect/PDL/IR/PDL.cpp | 2 +- mlir/lib/Dialect/SCF/SCF.cpp | 11 +++++----- .../lib/Dialect/SCF/Transforms/ForToWhile.cpp | 4 ++-- .../Dialect/SCF/Transforms/LoopPipelining.cpp | 6 ++--- .../SCF/Transforms/ParallelLoopTiling.cpp | 4 ++-- .../Transforms/LowerABIAttributesPass.cpp | 2 +- .../SPIRV/Transforms/SPIRVConversion.cpp | 6 ++--- mlir/lib/Dialect/Shape/IR/Shape.cpp | 2 +- mlir/lib/Dialect/Tensor/IR/TensorOps.cpp | 4 ++-- .../VectorMultiDimReductionTransforms.cpp | 2 +- mlir/lib/Dialect/Vector/VectorOps.cpp | 22 +++++++++---------- ...rTransferPermutationMapRewritePatterns.cpp | 2 +- mlir/lib/Dialect/Vector/VectorTransforms.cpp | 6 ++--- .../Dialect/Vector/VectorUnrollDistribute.cpp | 4 ++-- mlir/lib/IR/AffineExpr.cpp | 2 +- mlir/lib/IR/AffineMap.cpp | 6 ++--- mlir/lib/IR/BuiltinTypes.cpp | 2 +- mlir/lib/IR/Verifier.cpp | 2 +- mlir/lib/Interfaces/ControlFlowInterfaces.cpp | 4 ++-- mlir/lib/Interfaces/InferTypeOpInterface.cpp | 2 +- mlir/lib/Reducer/ReductionTreePass.cpp | 2 +- mlir/lib/Rewrite/ByteCode.cpp | 6 ++--- mlir/lib/Rewrite/PatternApplicator.cpp | 2 +- mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp | 2 +- mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 2 +- .../Transforms/BufferResultsToOutParams.cpp | 2 +- mlir/lib/Transforms/PipelineDataTransfer.cpp | 2 +- .../Transforms/Utils/DialectConversion.cpp | 4 ++-- mlir/lib/Transforms/Utils/InliningUtils.cpp | 2 +- mlir/lib/Transforms/Utils/LoopUtils.cpp | 2 +- mlir/lib/Transforms/Utils/RegionUtils.cpp | 2 +- 73 files changed, 150 insertions(+), 146 deletions(-) diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index 205abe280d9b..3742a2428148 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -3346,7 +3346,7 @@ AffineMap mlir::alignAffineMapWithValues(AffineMap map, ValueRange operands, newSyms->append(syms.begin(), syms.end()); } - for (auto operand : llvm::enumerate(operands)) { + for (const auto &operand : llvm::enumerate(operands)) { // Compute replacement dim/sym of operand. AffineExpr replacement; auto dimIt = std::find(dims.begin(), dims.end(), operand.value()); diff --git a/mlir/lib/Analysis/LoopAnalysis.cpp b/mlir/lib/Analysis/LoopAnalysis.cpp index 0672f25671ec..914bc1604d39 100644 --- a/mlir/lib/Analysis/LoopAnalysis.cpp +++ b/mlir/lib/Analysis/LoopAnalysis.cpp @@ -353,7 +353,8 @@ bool mlir::isOpwiseShiftValid(AffineForOp forOp, ArrayRef shifts) { // Work backwards over the body of the block so that the shift of a use's // ancestor operation in the block gets recorded before it's looked up. DenseMap forBodyShift; - for (auto it : llvm::enumerate(llvm::reverse(forBody->getOperations()))) { + for (const auto &it : + llvm::enumerate(llvm::reverse(forBody->getOperations()))) { auto &op = it.value(); // Get the index of the current operation, note that we are iterating in diff --git a/mlir/lib/Analysis/NumberOfExecutions.cpp b/mlir/lib/Analysis/NumberOfExecutions.cpp index ad30058d3d73..ad90cee92ee8 100644 --- a/mlir/lib/Analysis/NumberOfExecutions.cpp +++ b/mlir/lib/Analysis/NumberOfExecutions.cpp @@ -52,7 +52,7 @@ static void computeRegionBlockNumberOfExecutions( // Query RegionBranchOpInterface interface if it is available. if (auto regionInterface = dyn_cast(parentOp)) { SmallVector operands(parentOp->getNumOperands()); - for (auto operandIt : llvm::enumerate(parentOp->getOperands())) + for (const auto &operandIt : llvm::enumerate(parentOp->getOperands())) matchPattern(operandIt.value(), m_Constant(&operands[operandIt.index()])); regionInterface.getNumRegionInvocations(operands, numRegionsInvocations); diff --git a/mlir/lib/Analysis/SliceAnalysis.cpp b/mlir/lib/Analysis/SliceAnalysis.cpp index b45ee4c0faae..fa78a804175d 100644 --- a/mlir/lib/Analysis/SliceAnalysis.cpp +++ b/mlir/lib/Analysis/SliceAnalysis.cpp @@ -86,7 +86,7 @@ static void getBackwardSliceImpl(Operation *op, if (filter && !filter(op)) return; - for (auto en : llvm::enumerate(op->getOperands())) { + for (const auto &en : llvm::enumerate(op->getOperands())) { auto operand = en.value(); if (auto *definingOp = operand.getDefiningOp()) { if (backwardSlice->count(definingOp) == 0) diff --git a/mlir/lib/Bindings/Python/IRCore.cpp b/mlir/lib/Bindings/Python/IRCore.cpp index ccdd159fd438..be2abcdd501f 100644 --- a/mlir/lib/Bindings/Python/IRCore.cpp +++ b/mlir/lib/Bindings/Python/IRCore.cpp @@ -1155,7 +1155,7 @@ PyOpView::buildGeneric(const py::object &cls, py::list resultTypeList, resultTypes.reserve(resultTypeList.size()); if (resultSegmentSpecObj.is_none()) { // Non-variadic result unpacking. - for (auto it : llvm::enumerate(resultTypeList)) { + for (const auto &it : llvm::enumerate(resultTypeList)) { try { resultTypes.push_back(py::cast(it.value())); if (!resultTypes.back()) @@ -1179,7 +1179,7 @@ PyOpView::buildGeneric(const py::object &cls, py::list resultTypeList, .str()); } resultSegmentLengths.reserve(resultTypeList.size()); - for (auto it : + for (const auto &it : llvm::enumerate(llvm::zip(resultTypeList, resultSegmentSpec))) { int segmentSpec = std::get<1>(it.value()); if (segmentSpec == 1 || segmentSpec == 0) { @@ -1240,7 +1240,7 @@ PyOpView::buildGeneric(const py::object &cls, py::list resultTypeList, operands.reserve(operands.size()); if (operandSegmentSpecObj.is_none()) { // Non-sized operand unpacking. - for (auto it : llvm::enumerate(operandList)) { + for (const auto &it : llvm::enumerate(operandList)) { try { operands.push_back(py::cast(it.value())); if (!operands.back()) @@ -1264,7 +1264,7 @@ PyOpView::buildGeneric(const py::object &cls, py::list resultTypeList, .str()); } operandSegmentLengths.reserve(operandList.size()); - for (auto it : + for (const auto &it : llvm::enumerate(llvm::zip(operandList, operandSegmentSpec))) { int segmentSpec = std::get<1>(it.value()); if (segmentSpec == 1 || segmentSpec == 0) { diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index 8c269ee8a4df..828f0ef15120 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -21,7 +21,7 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, SmallVector workgroupBuffers; workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions()); - for (auto en : llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) { + for (const auto &en : llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) { Value attribution = en.value(); auto type = attribution.getType().dyn_cast(); @@ -88,7 +88,7 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, if (!workgroupBuffers.empty()) zero = rewriter.create(loc, i32Type, rewriter.getI32IntegerAttr(0)); - for (auto en : llvm::enumerate(workgroupBuffers)) { + for (const auto &en : llvm::enumerate(workgroupBuffers)) { LLVM::GlobalOp global = en.value(); Value address = rewriter.create(loc, global); auto elementType = @@ -111,7 +111,7 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, // Rewrite private memory attributions to alloca'ed buffers. unsigned numWorkgroupAttributions = gpuFuncOp.getNumWorkgroupAttributions(); auto int64Ty = IntegerType::get(rewriter.getContext(), 64); - for (auto en : llvm::enumerate(gpuFuncOp.getPrivateAttributions())) { + for (const auto &en : llvm::enumerate(gpuFuncOp.getPrivateAttributions())) { Value attribution = en.value(); auto type = attribution.getType().cast(); assert(type && type.hasStaticShape() && "unexpected type in attribution"); diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp index 757f3828bdc7..f7f8b6b14235 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp @@ -634,7 +634,7 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateParamsArray( arraySize, /*alignment=*/0); auto zero = builder.create(loc, llvmInt32Type, builder.getI32IntegerAttr(0)); - for (auto en : llvm::enumerate(arguments)) { + for (const auto &en : llvm::enumerate(arguments)) { auto index = builder.create( loc, llvmInt32Type, builder.getI32IntegerAttr(en.index())); auto fieldPtr = builder.create( diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp index 7405f6f91a4f..96dd32aaa99d 100644 --- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp +++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp @@ -206,7 +206,7 @@ lowerAsEntryFunction(gpu::GPUFuncOp funcOp, TypeConverter &typeConverter, // LowerABIAttributesPass. TypeConverter::SignatureConversion signatureConverter(fnType.getNumInputs()); { - for (auto argType : enumerate(funcOp.getType().getInputs())) { + for (const auto &argType : enumerate(funcOp.getType().getInputs())) { auto convertedType = typeConverter.convertType(argType.value()); signatureConverter.addInputs(argType.index(), convertedType); } diff --git a/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp b/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp index b8d1a8556395..e7e64aece05d 100644 --- a/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp +++ b/mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp @@ -222,7 +222,7 @@ void VulkanLaunchFuncToVulkanCallsPass::createBindMemRefCalls( Value descriptorSet = builder.create( loc, getInt32Type(), builder.getI32IntegerAttr(0)); - for (auto en : + for (const auto &en : llvm::enumerate(cInterfaceVulkanLaunchCallOp.getOperands().drop_front( kVulkanLaunchNumConfigOperands))) { // Create LLVM constant for the descriptor binding index. diff --git a/mlir/lib/Conversion/LLVMCommon/Pattern.cpp b/mlir/lib/Conversion/LLVMCommon/Pattern.cpp index 47dabc90bce5..0003bd859e47 100644 --- a/mlir/lib/Conversion/LLVMCommon/Pattern.cpp +++ b/mlir/lib/Conversion/LLVMCommon/Pattern.cpp @@ -213,11 +213,11 @@ MemRefDescriptor ConvertToLLVMPattern::createMemRefDescriptor( createIndexConstant(rewriter, loc, 0)); // Fields 4: Sizes. - for (auto en : llvm::enumerate(sizes)) + for (const auto &en : llvm::enumerate(sizes)) memRefDescriptor.setSize(rewriter, loc, en.index(), en.value()); // Field 5: Strides. - for (auto en : llvm::enumerate(strides)) + for (const auto &en : llvm::enumerate(strides)) memRefDescriptor.setStride(rewriter, loc, en.index(), en.value()); return memRefDescriptor; diff --git a/mlir/lib/Conversion/LLVMCommon/VectorPattern.cpp b/mlir/lib/Conversion/LLVMCommon/VectorPattern.cpp index ace5bec09f4e..54c5b93877ff 100644 --- a/mlir/lib/Conversion/LLVMCommon/VectorPattern.cpp +++ b/mlir/lib/Conversion/LLVMCommon/VectorPattern.cpp @@ -101,7 +101,7 @@ LogicalResult LLVM::detail::handleMultidimensionalVectors( // For this unrolled `position` corresponding to the `linearIndex`^th // element, extract operand vectors SmallVector extractedOperands; - for (auto operand : llvm::enumerate(operands)) { + for (const auto &operand : llvm::enumerate(operands)) { extractedOperands.push_back(rewriter.create( loc, operand1DVectorTypes[operand.index()], operand.value(), position)); diff --git a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp index b1f7d0452ee1..9142be183174 100644 --- a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp +++ b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp @@ -1420,7 +1420,8 @@ class TransposeOpLowering : public ConvertOpToLLVMPattern { targetMemRef.setOffset(rewriter, loc, viewMemRef.offset(rewriter, loc)); // Iterate over the dimensions and apply size/stride permutation. - for (auto en : llvm::enumerate(transposeOp.permutation().getResults())) { + for (const auto &en : + llvm::enumerate(transposeOp.permutation().getResults())) { int sourcePos = en.index(); int targetPos = en.value().cast().getPosition(); targetMemRef.setSize(rewriter, loc, targetPos, diff --git a/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp b/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp index 7db7dc03dc80..367bbb55ee1b 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp +++ b/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp @@ -736,7 +736,7 @@ void PatternLowering::generateRewriter( bool seenVariableLength = false; Type valueTy = builder.getType(); Type valueRangeTy = pdl::RangeType::get(valueTy); - for (auto it : llvm::enumerate(resultTys)) { + for (const auto &it : llvm::enumerate(resultTys)) { Value &type = rewriteValues[it.value()]; if (type) continue; @@ -862,7 +862,7 @@ void PatternLowering::generateOperationResultTypeRewriter( // Otherwise, handle inference for each of the result types individually. OperandRange resultTypeValues = op.types(); types.reserve(resultTypeValues.size()); - for (auto it : llvm::enumerate(resultTypeValues)) { + for (const auto &it : llvm::enumerate(resultTypeValues)) { Value resultType = it.value(); // Check for an already translated value. diff --git a/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp b/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp index 517f28c2044f..c325bfb42456 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp +++ b/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp @@ -162,7 +162,7 @@ static void getTreePredicates(std::vector &predList, builder.getAllOperands(opPos)); } else { bool foundVariableLength = false; - for (auto operandIt : llvm::enumerate(operands)) { + for (const auto &operandIt : llvm::enumerate(operands)) { bool isVariadic = operandIt.value().getType().isa(); foundVariableLength |= isVariadic; @@ -460,7 +460,7 @@ static void buildCostGraph(ArrayRef roots, RootOrderingGraph &graph, } // Default case: visit all the operands. - for (auto p : llvm::enumerate(operationOp.operands())) + for (const auto &p : llvm::enumerate(operationOp.operands())) toVisit.emplace(p.value(), entry.value, p.index(), entry.depth + 1); }) diff --git a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp index f3547e580501..d2faff9d3238 100644 --- a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp +++ b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp @@ -261,7 +261,7 @@ void AffineLoopToGpuConverter::createLaunch(AffineForOp rootForOp, builder.setInsertionPointToStart(&launchOp.body().front()); auto *lbArgumentIt = lbs.begin(); auto *stepArgumentIt = steps.begin(); - for (auto en : llvm::enumerate(ivs)) { + for (const auto &en : llvm::enumerate(ivs)) { Value id = en.index() < numBlockDims ? getDim3Value(launchOp.getBlockIds(), en.index()) diff --git a/mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp b/mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp index fd6ec8208620..6bb3da666ce7 100644 --- a/mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp +++ b/mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp @@ -387,7 +387,7 @@ WhileOpConversion::matchAndRewrite(scf::WhileOp whileOp, OpAdaptor adaptor, // the before region, which may not matching the whole op's result. Instead, // the scf.condition op returns values matching the whole op's results. So we // need to create/load/store variables according to that. - for (auto it : llvm::enumerate(condArgs)) { + for (const auto &it : llvm::enumerate(condArgs)) { auto res = it.value(); auto i = it.index(); auto pointerType = diff --git a/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp b/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp index bf60f4b6a211..9f1f93f9abf7 100644 --- a/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp +++ b/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp @@ -208,7 +208,7 @@ class GPULaunchLowering : public ConvertOpToLLVMPattern { SmallVector copyInfo; auto numKernelOperands = launchOp.getNumKernelOperands(); auto kernelOperands = adaptor.getOperands().take_back(numKernelOperands); - for (auto operand : llvm::enumerate(kernelOperands)) { + for (const auto &operand : llvm::enumerate(kernelOperands)) { // Check if the kernel's operand is a ranked memref. auto memRefType = launchOp.getKernelOperand(operand.index()) .getType() diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index feaa140cc710..88c7f43b8dc5 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -254,7 +254,7 @@ struct FuncOpConversionBase : public ConvertOpToLLVMPattern { rewriter.getNamedAttr(function_like_impl::getArgDictAttrName(), rewriter.getArrayAttr(newArgAttrs))); } - for (auto pair : llvm::enumerate(attributes)) { + for (const auto &pair : llvm::enumerate(attributes)) { if (pair.value().getName() == "llvm.linkage") { attributes.erase(attributes.begin() + pair.index()); break; diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index 38c8276f2843..f28527d185c1 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -694,7 +694,7 @@ elementwiseMatchAndRewriteHelper(Operation *operation, SmallVector newShape; SmallVector affineExprs; newShape.reserve(type.getRank()); - for (auto it : llvm::enumerate(type.getShape())) { + for (const auto &it : llvm::enumerate(type.getShape())) { if (it.value() == resultTy.getDimSize(it.index())) { newShape.push_back(it.value()); affineExprs.push_back( @@ -1175,7 +1175,7 @@ class TransposeConverter : public OpRewritePattern { SmallVector inputExprs; inputExprs.resize(resultTy.getRank()); auto operandTy = input.getType().cast(); - for (auto permutation : llvm::enumerate(perms.getValues())) { + for (const auto &permutation : llvm::enumerate(perms.getValues())) { auto index = permutation.index(); auto value = permutation.value().getZExtValue(); if (!operandTy.hasRank() || operandTy.isDynamicDim(index)) { diff --git a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp index 30bad881a319..725264d31fc7 100644 --- a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp +++ b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp @@ -449,7 +449,7 @@ static void convertForOp(scf::ForOp op, llvm::DenseMap &valueMapping) { SmallVector newOperands; SmallVector> argMapping; - for (auto operand : llvm::enumerate(op.getIterOperands())) { + for (const auto &operand : llvm::enumerate(op.getIterOperands())) { auto it = valueMapping.find(operand.value()); if (it == valueMapping.end()) continue; @@ -474,7 +474,7 @@ static void convertYieldOp(scf::YieldOp op, OpBuilder b(op); auto loop = cast(op->getParentOp()); auto yieldOperands = llvm::to_vector<4>(op.getOperands()); - for (auto operand : llvm::enumerate(op.getOperands())) { + for (const auto &operand : llvm::enumerate(op.getOperands())) { auto it = valueMapping.find(operand.value()); if (it == valueMapping.end()) continue; diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index 062a54432cea..0a938430a5b9 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -497,7 +497,7 @@ class VectorShuffleOpConversion eltType = llvmType.cast().getElementType(); Value insert = rewriter.create(loc, llvmType); int64_t insPos = 0; - for (auto en : llvm::enumerate(maskArrayAttr)) { + for (const auto &en : llvm::enumerate(maskArrayAttr)) { int64_t extPos = en.value().cast().getInt(); Value value = adaptor.v1(); if (extPos >= v1Dim) { @@ -883,7 +883,8 @@ class VectorTypeCastOpConversion desc.setOffset(rewriter, loc, zero); // Fill size and stride descriptors in memref. - for (auto indexedSize : llvm::enumerate(targetMemRefType.getShape())) { + for (const auto &indexedSize : + llvm::enumerate(targetMemRefType.getShape())) { int64_t index = indexedSize.index(); auto sizeAttr = rewriter.getIntegerAttr(rewriter.getIndexType(), indexedSize.value()); diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp index c3c1b5129480..c89588e0b87b 100644 --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -680,7 +680,7 @@ static void composeAffineMapAndOperands(AffineMap *map, for (auto *container : {&dims, &syms}) { bool isDim = (container == &dims); auto &repls = isDim ? dimReplacements : symReplacements; - for (auto en : llvm::enumerate(*container)) { + for (const auto &en : llvm::enumerate(*container)) { Value v = en.value(); if (!v) { assert(isDim ? !map->isFunctionOfDim(en.index()) diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 579c385d653c..ea8ce177848e 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -940,7 +940,7 @@ static LogicalResult verify(gpu::ReturnOp returnOp) { .attachNote(function.getLoc()) .append("return type declared here"); - for (auto pair : llvm::enumerate( + for (const auto &pair : llvm::enumerate( llvm::zip(function.getType().getResults(), returnOp.operands()))) { Type type; Value operand; diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp index ac5272348438..a2e64d9c92a1 100644 --- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp @@ -54,7 +54,7 @@ static void injectGpuIndexOperations(Location loc, Region &launchFuncOpBody, createForAllDimensions(builder, loc, indexOps); // Replace the leading 12 function args with the respective thread/block index // operations. Iterate backwards since args are erased and indices change. - for (auto indexOp : enumerate(indexOps)) + for (const auto &indexOp : enumerate(indexOps)) map.map(firstBlock.getArgument(indexOp.index()), indexOp.value()); } @@ -174,7 +174,7 @@ static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp, // Map arguments from gpu.launch region to the arguments of the gpu.func // operation. Block &entryBlock = outlinedFuncBody.front(); - for (auto operand : enumerate(operands)) + for (const auto &operand : enumerate(operands)) map.map(operand.value(), entryBlock.getArgument(operand.index())); // Clone the region of the gpu.launch operation into the gpu.func operation. diff --git a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp index 55098a9c5b46..f7c5ca8d5a77 100644 --- a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp @@ -89,7 +89,7 @@ static void insertCopyLoops(ImplicitLocOpBuilder &b, Value from, Value to) { }); // Map the innermost loops to threads in reverse order. - for (auto en : + for (const auto &en : llvm::enumerate(llvm::reverse(llvm::makeArrayRef(ivs).take_back( GPUDialect::getNumWorkgroupDimensions())))) { Value v = en.value(); diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 66157371f30c..77f436f109ca 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -1485,7 +1485,7 @@ static void printGlobalOp(OpAsmPrinter &p, GlobalOp op) { // list is parsed, returns -1. static int parseOptionalKeywordAlternative(OpAsmParser &parser, ArrayRef keywords) { - for (auto en : llvm::enumerate(keywords)) { + for (const auto &en : llvm::enumerate(keywords)) { if (succeeded(parser.parseOptionalKeyword(en.value()))) return en.index(); } diff --git a/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp b/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp index 70117e533b19..ee5c6da544b0 100644 --- a/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp +++ b/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp @@ -103,7 +103,7 @@ LinalgDependenceGraph::buildDependenceGraph(Aliases &aliases, FuncOp f) { LinalgDependenceGraph::LinalgDependenceGraph(Aliases &aliases, ArrayRef ops) : aliases(aliases), linalgOps(ops.begin(), ops.end()) { - for (auto en : llvm::enumerate(linalgOps)) { + for (const auto &en : llvm::enumerate(linalgOps)) { linalgOpPositions.insert( std::make_pair(en.value().getOperation(), en.index())); } diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index fe3363d7d0de..083d8b75463a 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -1093,7 +1093,7 @@ static LogicalResult verify(PadTensorOp op) { return op.emitError("expected the block to have ") << rank << " arguments"; // Note: the number and type of yield values are checked in the YieldOp. - for (auto en : llvm::enumerate(block.getArgumentTypes())) { + for (const auto &en : llvm::enumerate(block.getArgumentTypes())) { if (!en.value().isIndex()) return op.emitOpError("expected block argument ") << (en.index() + 1) << " to be an index"; @@ -1204,7 +1204,7 @@ PadTensorOp PadTensorOp::createPadHighOp(Type type, Value source, Value pad, SmallVector low, high; auto rankedTensorType = type.cast(); assert(rankedTensorType.hasStaticShape()); - for (auto en : enumerate(rankedTensorType.getShape())) { + for (const auto &en : enumerate(rankedTensorType.getShape())) { AffineExpr d0; bindDims(b.getContext(), d0); auto dimOp = b.createOrFold(loc, source, en.index()); @@ -1275,7 +1275,7 @@ SmallVector PadTensorOp::getIterationDomain(OpBuilder &b) { // Initialize all the ranges to {zero, one, one}. All the `ub`s are // overwritten. SmallVector loopRanges(reifiedShapes[0].size(), {zero, one, one}); - for (auto ub : enumerate(reifiedShapes[0])) + for (const auto &ub : enumerate(reifiedShapes[0])) loopRanges[ub.index()].size = ub.value(); return loopRanges; } @@ -2001,7 +2001,7 @@ struct TiledLoopInputsFolder : public OpRewritePattern { // Store ids of the corresponding old and new input operands. SmallVector oldInputIdToNew(tiledLoop.inputs().size(), kNoMatch); - for (auto en : llvm::enumerate( + for (const auto &en : llvm::enumerate( llvm::zip(tiledLoop.inputs(), tiledLoop.getRegionInputArgs()))) { Value in, bbArg; size_t index = en.index(); @@ -2215,7 +2215,7 @@ struct TiledLoopResultsFolder : public OpRewritePattern { SmallVector oldResultIdToNew(tiledLoop.getNumResults(), kNoMatch); SmallVector resultReplacement(tiledLoop.getNumResults()); - for (auto en : llvm::enumerate( + for (const auto &en : llvm::enumerate( llvm::zip(tiledLoop.outputs(), tiledLoop.getRegionOutputArgs()))) { size_t index = en.index(); Value out = std::get<0>(en.value()); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp index 32d536384331..da01ec496bec 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp @@ -43,7 +43,7 @@ allocateBuffersForResults(Location loc, LinalgOp linalgOp, ValueRange outputs, // Allocate a buffer for every tensor result. assert(linalgOp.getNumOutputs() == linalgOp->getNumResults()); - for (auto en : llvm::enumerate(linalgOp->getResultTypes())) { + for (const auto &en : llvm::enumerate(linalgOp->getResultTypes())) { size_t resultIndex = en.index(); Type resultType = en.value(); diff --git a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp index 521fdd9d2e89..eaf95a3751a8 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp @@ -186,7 +186,7 @@ struct FoldUnitDimLoops : public OpRewritePattern { DenseSet unitDims; SmallVector unitDimsReductionLoops; ArrayAttr iteratorTypes = genericOp.iterator_types(); - for (auto expr : enumerate(invertedMap.getResults())) { + for (const auto &expr : enumerate(invertedMap.getResults())) { if (AffineDimExpr dimExpr = expr.value().dyn_cast()) if (dims[dimExpr.getPosition()] == 1) unitDims.insert(expr.index()); @@ -205,7 +205,7 @@ struct FoldUnitDimLoops : public OpRewritePattern { // Compute the iterator types of the modified op by dropping the one-trip // count loops. SmallVector newIteratorTypes; - for (auto attr : llvm::enumerate(iteratorTypes)) { + for (const auto &attr : llvm::enumerate(iteratorTypes)) { if (!unitDims.count(attr.index())) newIteratorTypes.push_back(attr.value()); } @@ -439,7 +439,7 @@ struct ReplaceUnitExtents : public OpRewritePattern { // If any result tensor has a modified shape, then add reshape to recover // the original shape. SmallVector resultReplacements; - for (auto result : llvm::enumerate(replacementOp.getResults())) { + for (const auto &result : llvm::enumerate(replacementOp.getResults())) { unsigned index = result.index() + replacementOp.getNumInputs(); auto origResultType = genericOp.getResult(result.index()).getType(); @@ -465,7 +465,7 @@ static Optional> getReassociationMapForFoldingUnitDims(ArrayRef mixedSizes) { SmallVector reassociation; ReassociationIndices curr; - for (auto it : llvm::enumerate(mixedSizes)) { + for (const auto &it : llvm::enumerate(mixedSizes)) { auto dim = it.index(); auto size = it.value(); curr.push_back(dim); diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp index 795a23d7b1d8..6fd3927c80ca 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp @@ -565,7 +565,7 @@ LogicalResult ExpansionInfo::compute(LinalgOp linalgOp, // dimension of the original op. SmallVector numExpandedDims(fusedIndexMap.getNumDims(), 1); expandedShapeMap.resize(fusedIndexMap.getNumDims()); - for (auto resultExpr : llvm::enumerate(fusedIndexMap.getResults())) { + for (const auto &resultExpr : llvm::enumerate(fusedIndexMap.getResults())) { unsigned pos = resultExpr.value().cast().getPosition(); AffineMap foldedDims = reassociationMaps[resultExpr.index()]; numExpandedDims[pos] = foldedDims.getNumResults(); @@ -581,7 +581,7 @@ LogicalResult ExpansionInfo::compute(LinalgOp linalgOp, // Compute reassociation map from the original op to the expanded op. unsigned sum = 0; reassociation.reserve(fusedIndexMap.getNumDims()); - for (auto numFoldedDim : llvm::enumerate(numExpandedDims)) { + for (const auto &numFoldedDim : llvm::enumerate(numExpandedDims)) { auto seq = llvm::seq(sum, sum + numFoldedDim.value()); reassociation.emplace_back(seq.begin(), seq.end()); sum += numFoldedDim.value(); @@ -861,7 +861,7 @@ struct FoldProducerReshapeOpByLinearization if (!genericOp.hasTensorSemantics()) return failure(); SmallVector inputOperands = genericOp.getInputOperands(); - for (auto en : llvm::enumerate(inputOperands)) { + for (const auto &en : llvm::enumerate(inputOperands)) { auto reshapeOp = en.value()->get().getDefiningOp(); if (!reshapeOp) continue; @@ -976,7 +976,7 @@ struct PushExpandingReshape : public OpRewritePattern { // 1. Look for tensor_expand_shape operands and figure out save the // dimensions merged. SmallVector inputOperands = genericOp.getInputOperands(); - for (auto en : llvm::enumerate(inputOperands)) { + for (const auto &en : llvm::enumerate(inputOperands)) { auto reshapeOp = en.value()->get().template getDefiningOp(); if (!reshapeOp) @@ -1010,7 +1010,7 @@ struct PushExpandingReshape : public OpRewritePattern { // 2. Verify that we can merge the dimensions in the linalg and that we // don't need to create new reshapes operands. Inserting new reshape // operands would defeat the purpose of the transformation. - for (auto en : llvm::enumerate(inputOperands)) { + for (const auto &en : llvm::enumerate(inputOperands)) { if (en.value()->get() == newOperands[en.index()]) { AffineMap map = genericOp.getTiedIndexingMap(en.value()); for (unsigned i : llvm::seq(unsigned(0), map.getNumResults())) { @@ -1060,7 +1060,7 @@ struct PushExpandingReshape : public OpRewritePattern { newOp.region().begin()); // 6. Reshape the so that the type matches the uses. SmallVector newResults; - for (auto result : llvm::enumerate(newOp->getResults())) { + for (const auto &result : llvm::enumerate(newOp->getResults())) { newResults.push_back(rewriter.create( genericOp->getLoc(), genericOp.getOutputTensorTypes()[result.index()], result.value(), reassociation)); @@ -1407,7 +1407,7 @@ class FoldConstantBase : public OpRewritePattern { // All inputs should be constants. int numInputs = genericOp.getNumInputs(); SmallVector inputValues(numInputs); - for (auto operand : llvm::enumerate(genericOp.getInputOperands())) { + for (const auto &operand : llvm::enumerate(genericOp.getInputOperands())) { if (!matchPattern(operand.value()->get(), m_Constant(&inputValues[operand.index()]))) return failure(); @@ -1712,7 +1712,7 @@ struct RemoveOutsDependency : public OpRewritePattern { continue; modifiedOutput = true; SmallVector dynamicDims; - for (auto dim : llvm::enumerate(operandType.getShape())) { + for (const auto &dim : llvm::enumerate(operandType.getShape())) { if (dim.value() != ShapedType::kDynamicSize) continue; dynamicDims.push_back(rewriter.createOrFold( diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp index 0e0bc1ad48d1..f426af01d872 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp @@ -87,7 +87,7 @@ getShapeDefiningLoopRange(LinalgOp op, unsigned loopDepth, LLVM_DEBUG(llvm::dbgs() << "getShapeDefiningLoopRange map: " << map << "\n"); SmallVector shapeRanges(map.getNumResults(), nullptr); - for (auto en : llvm::enumerate(map.getResults())) { + for (const auto &en : llvm::enumerate(map.getResults())) { auto dimExpr = en.value().dyn_cast(); if (!dimExpr) continue; @@ -250,7 +250,7 @@ static LinalgOp fuse(OpBuilder &b, LinalgOp producerOp, AffineMap producerMap, LLVM_DEBUG(llvm::dbgs() << "Producer map: " << producerMap << "\n"); DenseMap fusedLoopsAndRanges; Value shapedOperand = consumerOpOperand.get(); - for (auto en : llvm::enumerate(producerMap.getResults())) { + for (const auto &en : llvm::enumerate(producerMap.getResults())) { unsigned posInProducerLoop = en.value().cast().getPosition(); fusedLoopsAndRanges[posInProducerLoop] = getRangeFromOperandShape( b, consumerOpOperand.getOwner()->getLoc(), shapedOperand, en.index()); @@ -521,7 +521,7 @@ mlir::linalg::fuseProducerOfTensor(OpBuilder &b, OpResult producerOpResult, static AffineMap pruneReductionDimsFromMap(ArrayRef iteratorTypes, AffineMap map) { llvm::SmallDenseSet projectedDims; - for (auto attr : llvm::enumerate(iteratorTypes)) { + for (const auto &attr : llvm::enumerate(iteratorTypes)) { if (!isParallelIterator(attr.value())) projectedDims.insert(attr.index()); } @@ -810,7 +810,7 @@ fuseOperations(OpBuilder &b, LinalgOp rootOp, TiledLinalgOp tiledLinalgOp, SmallVector fusedOps(fusionCandidates.size()); DenseMap origOpToFusedOp; origOpToFusedOp[rootOp.getOperation()] = tiledOp; - for (auto candidate : enumerate(llvm::reverse(fusionCandidates))) { + for (const auto &candidate : enumerate(llvm::reverse(fusionCandidates))) { LinalgOp origOp = candidate.value(); LinalgOp fusedOp = fuse(b, origOp, fusedLoopsAndRanges); origOpToFusedOp[origOp.getOperation()] = fusedOp; diff --git a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp index c171de854880..6bdcc192e27a 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp @@ -42,7 +42,7 @@ static SmallVector getTiledSliceDims(OpOperand *consumerOperand, // Search the slice dimensions tiled by a tile loop dimension. DenseSet tiledSliceDimIndices; - for (auto en : enumerate(indexingMap.getResults())) { + for (const auto &en : enumerate(indexingMap.getResults())) { for (auto tiledLoopDim : tiledLoopDims) { if (en.value().isFunctionOfDim(tiledLoopDim)) tiledSliceDimIndices.insert(en.index()); @@ -304,7 +304,7 @@ LogicalResult TileLoopNest::tileRootOp(OpBuilder &b, // Update the root operation and append the loops and tile loop dimensions. rootOp = tiledRootOp->op; tileLoopOps.append(tiledRootOp->loops.begin(), tiledRootOp->loops.end()); - for (auto en : enumerate(tileSizes)) { + for (const auto &en : enumerate(tileSizes)) { // Copy only the tiled loop dimensions with non-zero tile size. if (en.value() == 0) continue; diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp index 97b586cdf762..d8875663487d 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp @@ -346,7 +346,7 @@ void mlir::linalg::hoistRedundantVectorTransfersOnTensor(FuncOp func) { changed = false; func.walk([&](scf::ForOp forOp) { Operation *yield = forOp.getBody()->getTerminator(); - for (auto it : llvm::enumerate(forOp.getRegionIterArgs())) { + for (const auto &it : llvm::enumerate(forOp.getRegionIterArgs())) { OpOperand &ret = yield->getOpOperand(it.index()); HoistableWrite write = getLoopInvariantTransferWriteOpDefining(forOp, ret); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp index 6a9fb2189855..d3936eb366cc 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -277,7 +277,7 @@ struct TiledLoopToSCFPattern : public OpRewritePattern { // Collect loop control parameters for parallel and sequential dimensions. SmallVector seqLBs, seqUBs, seqSteps, seqIVs; SmallVector parLBs, parUBs, parSteps, parIVs; - for (auto en : llvm::enumerate( + for (const auto &en : llvm::enumerate( llvm::zip(tiledLoop.lowerBound(), tiledLoop.upperBound(), tiledLoop.step(), tiledLoop.getInductionVars()))) { Value lb, ub, step, iv; diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp index 7fa2aed8dfd8..fb281b319f67 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp @@ -87,7 +87,7 @@ defaultAllocBufferCallBack(const LinalgPromotionOptions &options, auto one = b.createOrFold(1); Value allocSize = one; - for (auto size : llvm::enumerate(boundingSubViewSize)) + for (const auto &size : llvm::enumerate(boundingSubViewSize)) allocSize = b.createOrFold(allocSize, size.value()); Value buffer = allocBuffer(b, options, viewType.getElementType(), allocSize, layout, alignment); @@ -219,7 +219,7 @@ FailureOr mlir::linalg::promoteSubviewAsNewBuffer( SmallVector partialSizes; fullSizes.reserve(rank); partialSizes.reserve(rank); - for (auto en : llvm::enumerate(subView.getOrCreateRanges(b, loc))) { + for (const auto &en : llvm::enumerate(subView.getOrCreateRanges(b, loc))) { auto rangeValue = en.value(); // Try to extract a tight constant. LLVM_DEBUG(llvm::dbgs() << "Extract tightest: " << rangeValue.size << "\n"); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index b4d2860101fd..cb2987973ea5 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -181,7 +181,7 @@ tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ValueRange tileSizes, b, op.getLoc(), shapeSizesToLoopsMap, allShapeSizes, tileSizes); SmallVector iteratorTypes; - for (auto attr : + for (const auto &attr : enumerate(op.iterator_types().cast().getValue())) { if (loopIndexToRangeIndex.count(attr.index())) iteratorTypes.push_back(attr.value()); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index 6d2af0c1cece..8156c5d45744 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -194,7 +194,7 @@ static LogicalResult padOperandToSmallestStaticBoundingBox( SmallVector staticSizes; staticSizes.reserve(shape.size()); auto shapedOp = cast(sliceOp.getOperation()); - for (auto en : enumerate(shapedOp.getMixedSizes())) { + for (const auto &en : enumerate(shapedOp.getMixedSizes())) { // Skip dropped dimensions. if (droppedDims.contains(en.index())) continue; @@ -269,7 +269,7 @@ linalg::rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad, // linalg op around because it uses the dims of the original results. SmallVector paddedSubviewResults; paddedSubviewResults.reserve(opToPad->getNumResults()); - for (auto en : llvm::enumerate(paddedOp->getResults())) { + for (const auto &en : llvm::enumerate(paddedOp->getResults())) { Value paddedResult = en.value(); int64_t resultNumber = en.index(); int64_t rank = paddedResult.getType().cast().getRank(); @@ -443,7 +443,7 @@ LogicalResult mlir::linalg::LinalgBaseTileAndFusePattern::matchAndRewrite( // Tile the unfused loops; SmallVector unfusedLoopTileSizes; Value zero = rewriter.create(op->getLoc(), 0); - for (auto tileSize : enumerate(tileSizes)) { + for (const auto &tileSize : enumerate(tileSizes)) { if (tiledAndFusedOps->fusedLoopDims.count(tileSize.index())) unfusedLoopTileSizes.push_back(zero); else @@ -524,7 +524,7 @@ LogicalResult mlir::linalg::LinalgPaddingPattern::matchAndRewrite( } // Hoist the padding. - for (auto en : enumerate(depths)) { + for (const auto &en : enumerate(depths)) { OpOperand &opOperand = paddedOp->getOpOperand(en.index()); auto padTensorOp = opOperand.get().getDefiningOp(); if (!padTensorOp || en.value() == 0) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index ab22ee0e0de5..5fda632b2f86 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -249,7 +249,7 @@ vectorizeLinalgYield(OpBuilder &b, Operation *op, auto yieldOp = dyn_cast(op); if (!yieldOp) return VectorizationResult{VectorizationStatus::Failure, nullptr}; - for (auto outputs : llvm::enumerate(yieldOp.values())) { + for (const auto &outputs : llvm::enumerate(yieldOp.values())) { // TODO: Scan for an opportunity for reuse. // TODO: use a map. Value vectorValue = bvm.lookup(outputs.value()); diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index a54582493a36..a197c141403b 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -169,7 +169,7 @@ Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim) { SmallVector getDynOperands(Location loc, Value val, OpBuilder &b) { SmallVector dynOperands; auto shapedType = val.getType().cast(); - for (auto dim : llvm::enumerate(shapedType.getShape())) { + for (const auto &dim : llvm::enumerate(shapedType.getShape())) { if (dim.value() == ShapedType::kDynamicSize) dynOperands.push_back(createOrFoldDimOp(b, loc, val, dim.index())); } @@ -310,7 +310,7 @@ tensor::ExtractSliceOp makeComposedExtractSliceOp( SmallVector foldedOffsets(offsets.begin(), offsets.end()); AffineExpr dim1, dim2; bindDims(b.getContext(), dim1, dim2); - for (auto en : enumerate(producerOp.getMixedOffsets())) { + for (const auto &en : enumerate(producerOp.getMixedOffsets())) { SmallVector offsetValues = { getValueOrCreateConstantIndexOp(b, loc, foldedOffsets[en.index()]), getValueOrCreateConstantIndexOp(b, loc, en.value())}; @@ -403,7 +403,7 @@ void GenerateLoopNest::doit( if (distributionOptions.hasValue()) { // Collect loop ranges for parallel dimensions. SmallVector parallelLoopRanges; - for (auto iteratorType : enumerate(iteratorTypes)) + for (const auto &iteratorType : enumerate(iteratorTypes)) if (isParallelIterator(iteratorType.value())) parallelLoopRanges.push_back(loopRanges[iteratorType.index()]); @@ -435,7 +435,7 @@ void GenerateLoopNest::doit( // Filter out scf.for loops that were created out of parallel dimensions. SmallVector loops; - for (auto iteratorType : enumerate(iteratorTypes)) + for (const auto &iteratorType : enumerate(iteratorTypes)) if (isParallelIterator(iteratorType.value())) loops.push_back(loopNest.loops[iteratorType.index()]); @@ -677,7 +677,7 @@ void GenerateLoopNest::doit( distributionMethod.assign(distributionOptions->distributionMethod.begin(), distributionOptions->distributionMethod.end()); SmallVector parallelLoopRanges; - for (auto iteratorType : enumerate(iteratorTypes)) { + for (const auto &iteratorType : enumerate(iteratorTypes)) { if (isParallelIterator(iteratorType.value())) parallelLoopRanges.push_back(loopRanges[iteratorType.index()]); } @@ -686,7 +686,7 @@ void GenerateLoopNest::doit( SmallVector procInfo = options.procInfo(b, loc, parallelLoopRanges); unsigned index = 0; - for (auto iteratorType : enumerate(iteratorTypes)) { + for (const auto &iteratorType : enumerate(iteratorTypes)) { if (index >= procInfo.size()) break; if (isParallelIterator(iteratorType.value())) { diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp index d2f989b561e4..ced119aea1a4 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -395,7 +395,7 @@ bool CastOp::areCastCompatible(TypeRange inputs, TypeRange outputs) { }; if (!checkCompatible(aOffset, bOffset)) return false; - for (auto aStride : enumerate(aStrides)) + for (const auto &aStride : enumerate(aStrides)) if (!checkCompatible(aStride.value(), bStrides[aStride.index()])) return false; } @@ -515,7 +515,7 @@ computeMemRefRankReductionMask(MemRefType originalType, MemRefType reducedType, if (originalType.getRank() == reducedType.getRank()) return unusedDims; - for (auto dim : llvm::enumerate(sizes)) + for (const auto &dim : llvm::enumerate(sizes)) if (auto attr = dim.value().dyn_cast()) if (attr.cast().getInt() == 1) unusedDims.insert(dim.index()); @@ -1851,7 +1851,7 @@ static MemRefType getCanonicalSubViewResultType( if (!unusedDims) return nullptr; SmallVector shape; - for (auto sizes : llvm::enumerate(nonRankReducedType.getShape())) { + for (const auto &sizes : llvm::enumerate(nonRankReducedType.getShape())) { if (unusedDims->count(sizes.index())) continue; shape.push_back(sizes.value()); @@ -1903,7 +1903,7 @@ static bool isTrivialSubViewOp(SubViewOp subViewOp) { // Check all size values are static and matches the (static) source shape. ArrayRef sourceShape = subViewOp.getSourceType().getShape(); - for (auto size : llvm::enumerate(mixedSizes)) { + for (const auto &size : llvm::enumerate(mixedSizes)) { Optional intValue = getConstantIntValue(size.value()); if (!intValue || intValue.getValue() != sourceShape[size.index()]) return false; @@ -2040,7 +2040,7 @@ static MemRefType inferTransposeResultType(MemRefType memRefType, auto originalSizes = memRefType.getShape(); // Compute permuted sizes. SmallVector sizes(rank, 0); - for (auto en : llvm::enumerate(permutationMap.getResults())) + for (const auto &en : llvm::enumerate(permutationMap.getResults())) sizes[en.index()] = originalSizes[en.value().cast().getPosition()]; diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 0b4346ddd08d..1f004c6c9950 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -249,7 +249,7 @@ verifyScheduleModifiers(OpAsmParser &parser, SmallVectorImpl> &modifiers) { if (modifiers.size() > 2) return parser.emitError(parser.getNameLoc()) << " unexpected modifier(s)"; - for (auto mod : modifiers) { + for (const auto &mod : modifiers) { // Translate the string. If it has no value, then it was not a valid // modifier! auto symbol = symbolizeScheduleModifier(mod); diff --git a/mlir/lib/Dialect/PDL/IR/PDL.cpp b/mlir/lib/Dialect/PDL/IR/PDL.cpp index b9e5415dadcc..2a399ec2169e 100644 --- a/mlir/lib/Dialect/PDL/IR/PDL.cpp +++ b/mlir/lib/Dialect/PDL/IR/PDL.cpp @@ -198,7 +198,7 @@ static LogicalResult verifyResultTypesAreInferrable(OperationOp op, return success(); // Otherwise, make sure each of the types can be inferred. - for (auto it : llvm::enumerate(resultTypes)) { + for (const auto &it : llvm::enumerate(resultTypes)) { Operation *resultTypeOp = it.value().getDefiningOp(); assert(resultTypeOp && "expected valid result type operation"); diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp index 1ca7e49c5a3a..dd47a55fe6b9 100644 --- a/mlir/lib/Dialect/SCF/SCF.cpp +++ b/mlir/lib/Dialect/SCF/SCF.cpp @@ -1247,7 +1247,7 @@ struct RemoveUnusedResults : public OpRewritePattern { // Replace the operation by the new one. SmallVector repResults(op.getNumResults()); - for (auto en : llvm::enumerate(usedResults)) + for (const auto &en : llvm::enumerate(usedResults)) repResults[en.value().getResultNumber()] = newOp.getResult(en.index()); rewriter.replaceOp(op, repResults); return success(); @@ -1296,7 +1296,8 @@ struct ConvertTrivialIfToSelect : public OpRewritePattern { SmallVector results(op->getNumResults()); assert(thenYieldArgs.size() == results.size()); assert(elseYieldArgs.size() == results.size()); - for (auto it : llvm::enumerate(llvm::zip(thenYieldArgs, elseYieldArgs))) { + for (const auto &it : + llvm::enumerate(llvm::zip(thenYieldArgs, elseYieldArgs))) { Value trueVal = std::get<0>(it.value()); Value falseVal = std::get<1>(it.value()); if (trueVal == falseVal) @@ -1564,7 +1565,7 @@ struct CombineIfs : public OpRewritePattern { SmallVector prevValues; SmallVector nextValues; - for (auto pair : llvm::enumerate(combinedIf.getResults())) { + for (const auto &pair : llvm::enumerate(combinedIf.getResults())) { if (pair.index() < prevIf.getNumResults()) prevValues.push_back(pair.value()); else @@ -2368,7 +2369,7 @@ struct WhileUnusedResult : public OpRewritePattern { SmallVector newResultTypes; SmallVector newTermArgs; bool needUpdate = false; - for (auto it : + for (const auto &it : llvm::enumerate(llvm::zip(op.getResults(), afterArgs, termArgs))) { auto i = static_cast(it.index()); Value result = std::get<0>(it.value()); @@ -2403,7 +2404,7 @@ struct WhileUnusedResult : public OpRewritePattern { // null). SmallVector newResults(op.getNumResults()); SmallVector newAfterBlockArgs(op.getNumResults()); - for (auto it : llvm::enumerate(newResultsIndices)) { + for (const auto &it : llvm::enumerate(newResultsIndices)) { newResults[it.value()] = newWhile.getResult(it.index()); newAfterBlockArgs[it.value()] = newAfterBlock.getArgument(it.index()); } diff --git a/mlir/lib/Dialect/SCF/Transforms/ForToWhile.cpp b/mlir/lib/Dialect/SCF/Transforms/ForToWhile.cpp index a3f307044052..d74b5d0457cc 100644 --- a/mlir/lib/Dialect/SCF/Transforms/ForToWhile.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/ForToWhile.cpp @@ -69,7 +69,7 @@ struct ForLoopLoweringPattern : public OpRewritePattern { // Rewrite uses of the for-loop block arguments to the new while-loop // "after" arguments - for (auto barg : enumerate(forOp.getBody(0)->getArguments())) + for (const auto &barg : enumerate(forOp.getBody(0)->getArguments())) barg.value().replaceAllUsesWith(afterBlock->getArgument(barg.index())); // Inline for-loop body operations into 'after' region. @@ -87,7 +87,7 @@ struct ForLoopLoweringPattern : public OpRewritePattern { // an extra value (the induction variable escapes the loop through being // carried in the set of iterargs). Instead, rewrite uses of the forOp // results. - for (auto arg : llvm::enumerate(forOp.getResults())) + for (const auto &arg : llvm::enumerate(forOp.getResults())) arg.value().replaceAllUsesWith(whileOp.getResult(arg.index() + 1)); rewriter.eraseOp(forOp); diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp index 321d953c17ed..3ef508275a76 100644 --- a/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp @@ -198,7 +198,7 @@ scf::ForOp LoopPipelinerInternal::createKernelLoop( llvm::SmallVector newLoopArg; // For existing loop argument initialize them with the right version from the // prologue. - for (auto retVal : + for (const auto &retVal : llvm::enumerate(forOp.getBody()->getTerminator()->getOperands())) { Operation *def = retVal.value().getDefiningOp(); assert(def && "Only support loop carried dependencies of distance 1"); @@ -245,7 +245,7 @@ void LoopPipelinerInternal::createKernel( rewriter.setInsertionPoint(newForOp.getBody(), newForOp.getBody()->begin()); BlockAndValueMapping mapping; mapping.map(forOp.getInductionVar(), newForOp.getInductionVar()); - for (auto arg : llvm::enumerate(forOp.getRegionIterArgs())) { + for (const auto &arg : llvm::enumerate(forOp.getRegionIterArgs())) { mapping.map(arg.value(), newForOp.getRegionIterArgs()[arg.index()]); } for (Operation *op : opOrder) { @@ -325,7 +325,7 @@ void LoopPipelinerInternal::createKernel( yieldOperands.push_back(mapping.lookupOrDefault(it.first)); } // Map the yield operand to the forOp returned value. - for (auto retVal : + for (const auto &retVal : llvm::enumerate(forOp.getBody()->getTerminator()->getOperands())) { Operation *def = retVal.value().getDefiningOp(); assert(def && "Only support loop carried dependencies of distance 1"); diff --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp index 998c73624ca7..8bf32ac68c7f 100644 --- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp @@ -160,8 +160,8 @@ mlir::scf::tileParallelLoop(ParallelOp op, ArrayRef tileSizes, ifInbound.getThenRegion().takeBody(op.getRegion()); Block &thenBlock = ifInbound.getThenRegion().front(); b.setInsertionPointToStart(innerLoop.getBody()); - for (auto ivs : llvm::enumerate(llvm::zip(innerLoop.getInductionVars(), - outerLoop.getInductionVars()))) { + for (const auto &ivs : llvm::enumerate(llvm::zip( + innerLoop.getInductionVars(), outerLoop.getInductionVars()))) { auto newIndex = b.create( op.getLoc(), std::get<0>(ivs.value()), std::get<1>(ivs.value())); thenBlock.getArgument(ivs.index()) diff --git a/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp b/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp index 10a3ba646771..6094ad8bf224 100644 --- a/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp +++ b/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp @@ -182,7 +182,7 @@ LogicalResult ProcessInterfaceVarABI::matchAndRewrite( auto indexType = typeConverter.getIndexType(); auto attrName = spirv::getInterfaceVarABIAttrName(); - for (auto argType : llvm::enumerate(funcOp.getType().getInputs())) { + for (const auto &argType : llvm::enumerate(funcOp.getType().getInputs())) { auto abiInfo = funcOp.getArgAttrOfType( argType.index(), attrName); if (!abiInfo) { diff --git a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp index afa26650b4c4..776f022fe260 100644 --- a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp +++ b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp @@ -235,7 +235,7 @@ getTypeNumBytes(const SPIRVTypeConverter::Options &options, Type type) { return llvm::None; int64_t memrefSize = -1; - for (auto shape : enumerate(dims)) + for (const auto &shape : enumerate(dims)) memrefSize = std::max(memrefSize, shape.value() * strides[shape.index()]); return (offset + memrefSize) * elementSize.getValue(); @@ -557,7 +557,7 @@ FuncOpConversion::matchAndRewrite(FuncOp funcOp, OpAdaptor adaptor, return failure(); TypeConverter::SignatureConversion signatureConverter(fnType.getNumInputs()); - for (auto argType : enumerate(fnType.getInputs())) { + for (const auto &argType : enumerate(fnType.getInputs())) { auto convertedType = getTypeConverter()->convertType(argType.value()); if (!convertedType) return failure(); @@ -778,7 +778,7 @@ Value mlir::spirv::linearizeIndex(ValueRange indices, ArrayRef strides, Value linearizedIndex = builder.create( loc, integerType, IntegerAttr::get(integerType, offset)); - for (auto index : llvm::enumerate(indices)) { + for (const auto &index : llvm::enumerate(indices)) { Value strideVal = builder.create( loc, integerType, IntegerAttr::get(integerType, strides[index.index()])); diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp index 4a415b456826..481e191e9df4 100644 --- a/mlir/lib/Dialect/Shape/IR/Shape.cpp +++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp @@ -1669,7 +1669,7 @@ static LogicalResult verify(ReduceOp op) { "ReduceOp operates on an extent tensor"); } - for (auto type : llvm::enumerate(op.getInitVals())) + for (const auto &type : llvm::enumerate(op.getInitVals())) if (block.getArgument(type.index() + 2).getType() != type.value().getType()) return op.emitOpError() << "type mismatch between argument " << type.index() + 2 diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index f7665135b5b1..665021b4c70d 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -1017,7 +1017,7 @@ llvm::SmallDenseSet ExtractSliceOp::getDroppedDims() { ArrayRef resultShape = getType().getShape(); SmallVector mixedSizes = getMixedSizes(); unsigned shapePos = 0; - for (auto size : enumerate(mixedSizes)) { + for (const auto &size : enumerate(mixedSizes)) { Optional sizeVal = getConstantIntValue(size.value()); // If the size is not 1, or if the current matched dimension of the result // is the same static shape as the size value (which is 1), then the @@ -1039,7 +1039,7 @@ LogicalResult ExtractSliceOp::reifyResultShapes( SmallVector mixedSizes = getMixedSizes(); llvm::SmallDenseSet droppedDims = getDroppedDims(); Location loc = getLoc(); - for (auto size : enumerate(mixedSizes)) { + for (const auto &size : enumerate(mixedSizes)) { if (droppedDims.count(size.index())) continue; if (auto attr = size.value().dyn_cast()) { diff --git a/mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp b/mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp index 9a1ca53074d0..92daab5e8b8f 100644 --- a/mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp @@ -126,7 +126,7 @@ class ReduceMultiDimReductionRank // 1. Separate reduction and parallel dims. SmallVector parallelDims, parallelShapes; SmallVector reductionDims, reductionShapes; - for (auto it : llvm::enumerate(reductionMask)) { + for (const auto &it : llvm::enumerate(reductionMask)) { int64_t i = it.index(); bool isReduction = it.value(); if (isReduction) { diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index fa608113b079..3f83578caade 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -270,7 +270,7 @@ void vector::MultiDimReductionOp::build(OpBuilder &builder, result.addTypes(targetType); SmallVector reductionDims; - for (auto en : llvm::enumerate(reductionMask)) + for (const auto &en : llvm::enumerate(reductionMask)) if (en.value()) reductionDims.push_back(en.index()); result.addAttribute(getReductionDimsAttrName(), @@ -615,7 +615,7 @@ static LogicalResult verify(ContractionOp op) { // that the number of map outputs equals the rank of its associated // vector operand. unsigned numIterators = op.iterator_types().getValue().size(); - for (auto it : llvm::enumerate(op.indexing_maps())) { + for (const auto &it : llvm::enumerate(op.indexing_maps())) { auto index = it.index(); auto map = it.value().cast().getValue(); if (map.getNumSymbols() != 0) @@ -695,7 +695,7 @@ static std::vector> getDimMap(ArrayRef indexingMaps, ArrayAttr iteratorTypes, StringRef targetIteratorTypeName, MLIRContext *context) { std::vector> dimMap; - for (auto it : llvm::enumerate(iteratorTypes)) { + for (const auto &it : llvm::enumerate(iteratorTypes)) { auto iteratorTypeName = it.value().cast().getValue(); if (iteratorTypeName != targetIteratorTypeName) continue; @@ -715,7 +715,7 @@ void ContractionOp::getIterationBounds( auto resVectorType = getResultType().dyn_cast(); SmallVector indexingMaps(getIndexingMaps()); SmallVector iterationShape; - for (auto it : llvm::enumerate(iterator_types())) { + for (const auto &it : llvm::enumerate(iterator_types())) { // Search lhs/rhs map results for 'targetExpr'. auto targetExpr = getAffineDimExpr(it.index(), getContext()); auto iteratorTypeName = it.value().cast().getValue(); @@ -738,7 +738,7 @@ void ContractionOp::getIterationIndexMap( std::vector> &iterationIndexMap) { unsigned numMaps = indexing_maps().getValue().size(); iterationIndexMap.resize(numMaps); - for (auto it : llvm::enumerate(indexing_maps())) { + for (const auto &it : llvm::enumerate(indexing_maps())) { auto index = it.index(); auto map = it.value().cast().getValue(); for (unsigned i = 0, e = map.getNumResults(); i < e; ++i) { @@ -933,7 +933,7 @@ static LogicalResult verify(vector::ExtractOp op) { if (positionAttr.size() > static_cast(op.getVectorType().getRank())) return op.emitOpError( "expected position attribute of rank smaller than vector rank"); - for (auto en : llvm::enumerate(positionAttr)) { + for (const auto &en : llvm::enumerate(positionAttr)) { auto attr = en.value().dyn_cast(); if (!attr || attr.getInt() < 0 || attr.getInt() >= op.getVectorType().getDimSize(en.index())) @@ -1511,7 +1511,7 @@ static LogicalResult verify(ShuffleOp op) { return op.emitOpError("mask length mismatch"); // Verify all indices. int64_t indexSize = v1Type.getDimSize(0) + v2Type.getDimSize(0); - for (auto en : llvm::enumerate(maskAttr)) { + for (const auto &en : llvm::enumerate(maskAttr)) { auto attr = en.value().dyn_cast(); if (!attr || attr.getInt() < 0 || attr.getInt() >= indexSize) return op.emitOpError("mask index #") @@ -1621,7 +1621,7 @@ static LogicalResult verify(InsertOp op) { (positionAttr.size() != static_cast(destVectorType.getRank()))) return op.emitOpError( "expected position attribute rank to match the dest vector rank"); - for (auto en : llvm::enumerate(positionAttr)) { + for (const auto &en : llvm::enumerate(positionAttr)) { auto attr = en.value().dyn_cast(); if (!attr || attr.getInt() < 0 || attr.getInt() >= destVectorType.getDimSize(en.index())) @@ -2822,7 +2822,7 @@ struct FoldExtractSliceIntoTransferRead newIndices.push_back(getValueOrCreateConstantIndexOp( rewriter, extractOp.getLoc(), offset)); } - for (auto it : llvm::enumerate(xferOp.indices())) { + for (const auto &it : llvm::enumerate(xferOp.indices())) { OpFoldResult offset = extractOp.getMixedOffsets()[it.index() + rankReduced]; newIndices.push_back(rewriter.create( @@ -3913,7 +3913,7 @@ static LogicalResult verify(vector::TransposeOp op) { if (rank != size) return op.emitOpError("transposition length mismatch: ") << size; SmallVector seen(rank, false); - for (auto ta : llvm::enumerate(transpAttr)) { + for (const auto &ta : llvm::enumerate(transpAttr)) { int64_t i = ta.value().cast().getInt(); if (i < 0 || i >= rank) return op.emitOpError("transposition index out of range: ") << i; @@ -4004,7 +4004,7 @@ static LogicalResult verify(ConstantMaskOp &op) { // result dimension size. auto resultShape = resultType.getShape(); SmallVector maskDimSizes; - for (auto it : llvm::enumerate(op.mask_dim_sizes())) { + for (const auto &it : llvm::enumerate(op.mask_dim_sizes())) { int64_t attrValue = it.value().cast().getInt(); if (attrValue < 0 || attrValue > resultShape[it.index()]) return op.emitOpError( diff --git a/mlir/lib/Dialect/Vector/VectorTransferPermutationMapRewritePatterns.cpp b/mlir/lib/Dialect/Vector/VectorTransferPermutationMapRewritePatterns.cpp index 36725e03ae09..c47ef94e2e23 100644 --- a/mlir/lib/Dialect/Vector/VectorTransferPermutationMapRewritePatterns.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransferPermutationMapRewritePatterns.cpp @@ -79,7 +79,7 @@ struct TransferReadPermutationLowering // Apply the reverse transpose to deduce the type of the transfer_read. ArrayRef originalShape = op.getVectorType().getShape(); SmallVector newVectorShape(originalShape.size()); - for (auto pos : llvm::enumerate(permutation)) { + for (const auto &pos : llvm::enumerate(permutation)) { newVectorShape[pos.value()] = originalShape[pos.index()]; } diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index 3cac3302af32..0b49ccd58b27 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -53,7 +53,7 @@ static Optional getResultIndex(AffineMap map, int64_t index) { static SmallVector adjustIter(ArrayAttr iteratorTypes, int64_t index) { SmallVector results; - for (auto it : llvm::enumerate(iteratorTypes)) { + for (const auto &it : llvm::enumerate(iteratorTypes)) { int64_t idx = it.index(); if (idx == index) continue; @@ -871,7 +871,7 @@ struct MultiReduceToContract auto srcMap = rewriter.getMultiDimIdentityMap(reductionMask.size()); SmallVector exprs; SmallVector iteratorTypes; - for (auto isReduceDim : llvm::enumerate(reductionMask)) { + for (const auto &isReduceDim : llvm::enumerate(reductionMask)) { if (!isReduceDim.value()) { iteratorTypes.push_back(getParallelIteratorTypeName()); exprs.push_back(rewriter.getAffineDimExpr(isReduceDim.index())); @@ -997,7 +997,7 @@ struct CombineContractBroadcast broadcast.getVectorType().getRank() - srcType.getRank(); bool innerDimBroadcast = false; SmallVector originalDims; - for (auto dim : llvm::enumerate(srcType.getShape())) { + for (const auto &dim : llvm::enumerate(srcType.getShape())) { if (dim.value() != broadcast.getVectorType().getDimSize(rankDiff + dim.index())) { innerDimBroadcast = true; diff --git a/mlir/lib/Dialect/Vector/VectorUnrollDistribute.cpp b/mlir/lib/Dialect/Vector/VectorUnrollDistribute.cpp index 4c31164b433e..de5b2fdcfceb 100644 --- a/mlir/lib/Dialect/Vector/VectorUnrollDistribute.cpp +++ b/mlir/lib/Dialect/Vector/VectorUnrollDistribute.cpp @@ -52,7 +52,7 @@ sliceTransferIndices(int64_t index, ArrayRef originalShape, getVectorOffset(originalShape, targetShape, index); // Compute 'sliceIndices' by adding 'sliceOffsets[i]' to 'indices[i]'. SmallVector slicedIndices(indices.begin(), indices.end()); - for (auto dim : llvm::enumerate(permutationMap.getResults())) { + for (const auto &dim : llvm::enumerate(permutationMap.getResults())) { if (isBroadcast(dim.value())) continue; unsigned pos = dim.value().cast().getPosition(); @@ -429,7 +429,7 @@ struct ContractExtractPattern : public OpRewritePattern { for (unsigned i : llvm::seq(unsigned(0), affineMap.getNumResults())) map[affineMap.getDimPosition(i)] = extract.getResultType().getDimSize(i); SmallVector extractOperands; - for (auto it : llvm::enumerate(contract.getIndexingMaps())) { + for (const auto &it : llvm::enumerate(contract.getIndexingMaps())) { // For each operands calculate the new vector type after distribution. Value operand = contract->getOperand(it.index()); auto vecType = operand.getType().cast(); diff --git a/mlir/lib/IR/AffineExpr.cpp b/mlir/lib/IR/AffineExpr.cpp index 47dcff627a33..2a3b9819b320 100644 --- a/mlir/lib/IR/AffineExpr.cpp +++ b/mlir/lib/IR/AffineExpr.cpp @@ -1022,7 +1022,7 @@ static AffineExpr getSemiAffineExprFromFlatForm(ArrayRef flatExprs, // as lhs/rhs, and store the indices, constant coefficient corresponding to // the indices in `coefficients` map, and affine expression corresponding to // in indices in `indexToExprMap` map. - for (auto it : llvm::enumerate(localExprs)) { + for (const auto &it : llvm::enumerate(localExprs)) { AffineExpr expr = it.value(); if (flatExprs[numDims + numSymbols + it.index()] == 0) continue; diff --git a/mlir/lib/IR/AffineMap.cpp b/mlir/lib/IR/AffineMap.cpp index a60120637011..ecdf8376b5fc 100644 --- a/mlir/lib/IR/AffineMap.cpp +++ b/mlir/lib/IR/AffineMap.cpp @@ -121,7 +121,7 @@ bool AffineMap::isMinorIdentityWithBroadcasting( if (getNumDims() < getNumResults()) return false; unsigned suffixStart = getNumDims() - getNumResults(); - for (auto idxAndExpr : llvm::enumerate(getResults())) { + for (const auto &idxAndExpr : llvm::enumerate(getResults())) { unsigned resIdx = idxAndExpr.index(); AffineExpr expr = idxAndExpr.value(); if (auto constExpr = expr.dyn_cast()) { @@ -168,7 +168,7 @@ bool AffineMap::isPermutationOfMinorIdentityWithBroadcasting( getNumResults() > getNumInputs() ? getNumResults() - getNumInputs() : 0; llvm::SmallBitVector dimFound(std::max(getNumInputs(), getNumResults()), false); - for (auto idxAndExpr : llvm::enumerate(getResults())) { + for (const auto &idxAndExpr : llvm::enumerate(getResults())) { unsigned resIdx = idxAndExpr.index(); AffineExpr expr = idxAndExpr.value(); // Each result may be either a constant 0 (broadcast dimension) or a @@ -675,7 +675,7 @@ AffineMap mlir::inversePermutation(AffineMap map) { return map; assert(map.getNumSymbols() == 0 && "expected map without symbols"); SmallVector exprs(map.getNumDims()); - for (auto en : llvm::enumerate(map.getResults())) { + for (const auto &en : llvm::enumerate(map.getResults())) { auto expr = en.value(); // Skip non-permutations. if (auto d = expr.dyn_cast()) { diff --git a/mlir/lib/IR/BuiltinTypes.cpp b/mlir/lib/IR/BuiltinTypes.cpp index e965afb0feaa..6efd384ad3cc 100644 --- a/mlir/lib/IR/BuiltinTypes.cpp +++ b/mlir/lib/IR/BuiltinTypes.cpp @@ -1036,7 +1036,7 @@ AffineMap mlir::makeStridedLinearLayoutMap(ArrayRef strides, } // AffineExpr for strides. - for (auto en : llvm::enumerate(strides)) { + for (const auto &en : llvm::enumerate(strides)) { auto dim = en.index(); auto stride = en.value(); assert(stride != 0 && "Invalid stride specification"); diff --git a/mlir/lib/IR/Verifier.cpp b/mlir/lib/IR/Verifier.cpp index 840a3156f283..bbc560d429d7 100644 --- a/mlir/lib/IR/Verifier.cpp +++ b/mlir/lib/IR/Verifier.cpp @@ -316,7 +316,7 @@ OperationVerifier::verifyDominanceOfContainedRegions(Operation &op, for (Operation &op : block) { if (isReachable) { // Check that operands properly dominate this use. - for (auto operand : llvm::enumerate(op.getOperands())) { + for (const auto &operand : llvm::enumerate(op.getOperands())) { if (domInfo.properlyDominates(operand.value(), &op)) continue; diff --git a/mlir/lib/Interfaces/ControlFlowInterfaces.cpp b/mlir/lib/Interfaces/ControlFlowInterfaces.cpp index 26c80795c650..d2ab30282562 100644 --- a/mlir/lib/Interfaces/ControlFlowInterfaces.cpp +++ b/mlir/lib/Interfaces/ControlFlowInterfaces.cpp @@ -131,7 +131,7 @@ verifyTypesAlongAllEdges(Operation *op, Optional sourceNo, << succInputsTypes.size(); } - for (auto typesIdx : + for (const auto &typesIdx : llvm::enumerate(llvm::zip(*sourceTypes, succInputsTypes))) { Type sourceType = std::get<0>(typesIdx.value()); Type inputType = std::get<1>(typesIdx.value()); @@ -266,7 +266,7 @@ bool mlir::insideMutuallyExclusiveRegions(Operation *a, Operation *b) { return false; // Compute index of region. int64_t beginIndex = -1; - for (auto it : llvm::enumerate(branchOp->getRegions())) + for (const auto &it : llvm::enumerate(branchOp->getRegions())) if (&it.value() == begin) beginIndex = it.index(); assert(beginIndex != -1 && "could not find region in op"); diff --git a/mlir/lib/Interfaces/InferTypeOpInterface.cpp b/mlir/lib/Interfaces/InferTypeOpInterface.cpp index 67c9ccbaec5b..ff17ed0498bb 100644 --- a/mlir/lib/Interfaces/InferTypeOpInterface.cpp +++ b/mlir/lib/Interfaces/InferTypeOpInterface.cpp @@ -189,7 +189,7 @@ LogicalResult mlir::detail::inferReturnTensorTypes( if (failed(componentTypeFn(context, location, operands, attributes, regions, retComponents))) return failure(); - for (auto shapeAndType : retComponents) { + for (const auto &shapeAndType : retComponents) { assert(shapeAndType.getAttribute() == nullptr && "attribute not supported"); if (shapeAndType.hasRank()) inferredReturnTypes.push_back(RankedTensorType::get( diff --git a/mlir/lib/Reducer/ReductionTreePass.cpp b/mlir/lib/Reducer/ReductionTreePass.cpp index 859f64a01e28..a1308f936255 100644 --- a/mlir/lib/Reducer/ReductionTreePass.cpp +++ b/mlir/lib/Reducer/ReductionTreePass.cpp @@ -41,7 +41,7 @@ static void applyPatterns(Region ®ion, std::vector opsNotInRange; std::vector opsInRange; size_t keepIndex = 0; - for (auto op : enumerate(region.getOps())) { + for (const auto &op : enumerate(region.getOps())) { int index = op.index(); if (keepIndex < rangeToKeep.size() && index == rangeToKeep[keepIndex].second) diff --git a/mlir/lib/Rewrite/ByteCode.cpp b/mlir/lib/Rewrite/ByteCode.cpp index bd98ce0c1e09..765c47b2ed0c 100644 --- a/mlir/lib/Rewrite/ByteCode.cpp +++ b/mlir/lib/Rewrite/ByteCode.cpp @@ -198,9 +198,9 @@ class Generator { maxTypeRangeMemoryIndex(maxTypeRangeMemoryIndex), maxValueRangeMemoryIndex(maxValueRangeMemoryIndex), maxLoopLevel(maxLoopLevel) { - for (auto it : llvm::enumerate(constraintFns)) + for (const auto &it : llvm::enumerate(constraintFns)) constraintToMemIndex.try_emplace(it.value().first(), it.index()); - for (auto it : llvm::enumerate(rewriteFns)) + for (const auto &it : llvm::enumerate(rewriteFns)) externalRewriterToMemIndex.try_emplace(it.value().first(), it.index()); } @@ -631,7 +631,7 @@ void Generator::allocateMemoryIndices(FuncOp matcherFunc, ByteCodeLiveRange &defRange = defIt.second; // Try to allocate to an existing index. - for (auto existingIndexIt : llvm::enumerate(allocatedIndices)) { + for (const auto &existingIndexIt : llvm::enumerate(allocatedIndices)) { ByteCodeLiveRange &existingRange = existingIndexIt.value(); if (!defRange.overlaps(existingRange)) { existingRange.unionWith(defRange); diff --git a/mlir/lib/Rewrite/PatternApplicator.cpp b/mlir/lib/Rewrite/PatternApplicator.cpp index d5a98fef09e7..edaf13e575d3 100644 --- a/mlir/lib/Rewrite/PatternApplicator.cpp +++ b/mlir/lib/Rewrite/PatternApplicator.cpp @@ -53,7 +53,7 @@ void PatternApplicator::applyCostModel(CostModel model) { // Apply the cost model to the bytecode patterns first, and then the native // patterns. if (const PDLByteCode *bytecode = frozenPatternList.getPDLByteCode()) { - for (auto it : llvm::enumerate(bytecode->getPatterns())) + for (const auto &it : llvm::enumerate(bytecode->getPatterns())) mutableByteCodeState->updatePatternBenefit(it.index(), model(it.value())); } diff --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp index 366a3d7ce24a..3e06f9caf7b1 100644 --- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp @@ -821,7 +821,7 @@ LogicalResult Importer::processFunction(llvm::Function *f) { currentEntryBlock = blockList[0]; // Add function arguments to the entry block. - for (auto kv : llvm::enumerate(f->args())) + for (const auto &kv : llvm::enumerate(f->args())) instMap[&kv.value()] = blockList[0]->addArgument(functionType.getParamType(kv.index())); diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 7f238afd2c92..404018bebe93 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -381,7 +381,7 @@ static Value getPHISourceValue(Block *current, Block *pred, // the case branch that was taken. if (switchOp.getDefaultDestination() == current) return switchOp.getDefaultOperands()[index]; - for (auto i : llvm::enumerate(switchOp.getCaseDestinations())) + for (const auto &i : llvm::enumerate(switchOp.getCaseDestinations())) if (i.value() == current) return switchOp.getCaseOperands(i.index())[index]; } diff --git a/mlir/lib/Transforms/BufferResultsToOutParams.cpp b/mlir/lib/Transforms/BufferResultsToOutParams.cpp index 2abdccc6866c..2d74c4085e70 100644 --- a/mlir/lib/Transforms/BufferResultsToOutParams.cpp +++ b/mlir/lib/Transforms/BufferResultsToOutParams.cpp @@ -25,7 +25,7 @@ static void updateFuncOp(FuncOp func, // Collect information about the results will become appended arguments. SmallVector erasedResultTypes; SmallVector erasedResultIndices; - for (auto resultType : llvm::enumerate(functionType.getResults())) { + for (const auto &resultType : llvm::enumerate(functionType.getResults())) { if (resultType.value().isa()) { erasedResultIndices.push_back(resultType.index()); erasedResultTypes.push_back(resultType.value()); diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Transforms/PipelineDataTransfer.cpp index e32c54264c65..fd9bba81c4a2 100644 --- a/mlir/lib/Transforms/PipelineDataTransfer.cpp +++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp @@ -84,7 +84,7 @@ static bool doubleBuffer(Value oldMemRef, AffineForOp forOp) { OpBuilder bOuter(forOp); // Put together alloc operands for any dynamic dimensions of the memref. SmallVector allocOperands; - for (auto dim : llvm::enumerate(oldMemRefType.getShape())) { + for (const auto &dim : llvm::enumerate(oldMemRefType.getShape())) { if (dim.value() == ShapedType::kDynamicSize) allocOperands.push_back(bOuter.createOrFold( forOp.getLoc(), oldMemRef, dim.index())); diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index a299b8c5b660..24711b0de132 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -252,7 +252,7 @@ class OperationTransactionState { op->setLoc(loc); op->setAttrs(attrs); op->setOperands(operands); - for (auto it : llvm::enumerate(successors)) + for (const auto &it : llvm::enumerate(successors)) op->setSuccessor(it.value(), it.index()); } @@ -1256,7 +1256,7 @@ LogicalResult ConversionPatternRewriterImpl::remapValues( remapped.reserve(llvm::size(values)); SmallVector legalTypes; - for (auto it : llvm::enumerate(values)) { + for (const auto &it : llvm::enumerate(values)) { Value operand = it.value(); Type origType = operand.getType(); diff --git a/mlir/lib/Transforms/Utils/InliningUtils.cpp b/mlir/lib/Transforms/Utils/InliningUtils.cpp index 3eed22d8a5b4..8b2040633a1a 100644 --- a/mlir/lib/Transforms/Utils/InliningUtils.cpp +++ b/mlir/lib/Transforms/Utils/InliningUtils.cpp @@ -215,7 +215,7 @@ inlineRegionImpl(InlinerInterface &interface, Region *src, Block *inlineBlock, } else { // Otherwise, there were multiple blocks inlined. Add arguments to the post // insertion block to represent the results to replace. - for (auto resultToRepl : llvm::enumerate(resultsToReplace)) { + for (const auto &resultToRepl : llvm::enumerate(resultsToReplace)) { resultToRepl.value().replaceAllUsesWith(postInsertBlock->addArgument( regionResultTypes[resultToRepl.index()])); } diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp index 1700d60a9173..6328b59d9008 100644 --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -3353,7 +3353,7 @@ createFullTiles(MutableArrayRef inputNest, // Add the body for the full tile loop nest. BlockAndValueMapping operandMap; - for (auto loopEn : llvm::enumerate(inputNest)) + for (const auto &loopEn : llvm::enumerate(inputNest)) operandMap.map(loopEn.value().getInductionVar(), fullTileLoops[loopEn.index()].getInductionVar()); b = OpBuilder::atBlockTerminator(fullTileLoops.back().getBody()); diff --git a/mlir/lib/Transforms/Utils/RegionUtils.cpp b/mlir/lib/Transforms/Utils/RegionUtils.cpp index 023b1d6ed5ea..e60e7e65cd60 100644 --- a/mlir/lib/Transforms/Utils/RegionUtils.cpp +++ b/mlir/lib/Transforms/Utils/RegionUtils.cpp @@ -589,7 +589,7 @@ LogicalResult BlockMergeCluster::merge(RewriterBase &rewriter) { 1 + blocksToMerge.size(), SmallVector(operandsToMerge.size())); unsigned curOpIndex = 0; - for (auto it : llvm::enumerate(operandsToMerge)) { + for (const auto &it : llvm::enumerate(operandsToMerge)) { unsigned nextOpOffset = it.value().first - curOpIndex; curOpIndex = it.value().first; From a8877c5ccc0e05495d60f1669d47826e60f373b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 16 Nov 2021 23:10:48 +0200 Subject: [PATCH 363/992] [clang] [MinGW] Pass --no-demangle through to the mingw linker Clang has custom handling of --no-demangle, where it is removed from the input -Wl and -Xlinker options, and readded specifically by the drivers where it's known to be supported. Both ld.bfd and lld support the --no-demangle option. This handles the option in the same way as in ToolChains/Gnu.cpp. Differential Revision: https://reviews.llvm.org/D114064 --- clang/lib/Driver/ToolChains/MinGW.cpp | 3 +++ clang/test/Driver/Xlinker-args.c | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp index 6d8bfc358dd3..0501f9737404 100644 --- a/clang/lib/Driver/ToolChains/MinGW.cpp +++ b/clang/lib/Driver/ToolChains/MinGW.cpp @@ -164,6 +164,9 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("--enable-auto-image-base"); } + if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) + CmdArgs.push_back("--no-demangle"); + CmdArgs.push_back("-o"); const char *OutputFile = Output.getFilename(); // GCC implicitly adds an .exe extension if it is given an output file name diff --git a/clang/test/Driver/Xlinker-args.c b/clang/test/Driver/Xlinker-args.c index 0fba8e711bd0..ad59e0beee22 100644 --- a/clang/test/Driver/Xlinker-args.c +++ b/clang/test/Driver/Xlinker-args.c @@ -12,6 +12,11 @@ // RUN: -Wl,two,--no-demangle,three -Xlinker four -z five -r %s 2> %t // RUN: FileCheck -check-prefix=LINUX < %t %s +/// Check that --no-demangle gets forwarded to the mingw linker +// RUN: %clang -target x86_64-w64-mingw32 -### \ +// RUN: -Wl,--no-demangle %s 2> %t +// RUN: FileCheck -check-prefix=MINGW < %t %s + // RUN: %clang -target powerpc-unknown-aix -### \ // RUN: -b one -b two %s 2> %t // RUN: FileCheck -check-prefix=AIX < %t %s @@ -23,6 +28,7 @@ // DARWIN-NOT: --no-demangle // DARWIN: "one" "two" "three" "four" "-z" "five" "-r" // LINUX: "--no-demangle" "-e" "_start" "one" "two" "three" "four" "-z" "five" "-r" {{.*}} "-T" "a.lds" +// MINGW: "--no-demangle" // AIX: "-b" "one" "-b" "two" // NOT-AIX: error: unsupported option '-b' for target 'powerpc-unknown-linux' From 7964568632c07270e7b9da8b80ff98fa98a558a7 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 22:02:21 +0000 Subject: [PATCH 364/992] Apply clang-tidy fixes for readability-simplify-boolean-expr to MLIR NormalizeMemRefs.cpp (NFC) --- mlir/lib/Transforms/NormalizeMemRefs.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlir/lib/Transforms/NormalizeMemRefs.cpp b/mlir/lib/Transforms/NormalizeMemRefs.cpp index 477d0b6ec304..aaf1a5717b46 100644 --- a/mlir/lib/Transforms/NormalizeMemRefs.cpp +++ b/mlir/lib/Transforms/NormalizeMemRefs.cpp @@ -93,9 +93,7 @@ void NormalizeMemRefs::runOnOperation() { /// TODO: Extend this for DimOps. static bool isMemRefNormalizable(Value::user_range opUsers) { return !llvm::any_of(opUsers, [](Operation *op) { - if (op->hasTrait()) - return false; - return true; + return !op->hasTrait(); }); } From 337c937ddb94a3fd1257af482cb14c02abbda709 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 22:02:18 +0000 Subject: [PATCH 365/992] Apply clang-tidy fixes for performance-move-const-arg to MLIR (NFC) --- mlir/lib/Analysis/PresburgerSet.cpp | 4 ++-- mlir/lib/Bindings/Python/IRAffine.cpp | 3 +-- mlir/lib/Bindings/Python/IRCore.cpp | 7 +++--- .../PDLToPDLInterp/PredicateTree.cpp | 2 +- mlir/lib/Reducer/ReductionNode.cpp | 3 +-- mlir/lib/Reducer/ReductionTreePass.cpp | 2 +- mlir/lib/Support/Timing.cpp | 6 ++--- mlir/lib/Target/Cpp/TranslateToCpp.cpp | 8 +++---- mlir/unittests/TableGen/OpBuildGen.cpp | 24 +++++++++---------- 9 files changed, 27 insertions(+), 32 deletions(-) diff --git a/mlir/lib/Analysis/PresburgerSet.cpp b/mlir/lib/Analysis/PresburgerSet.cpp index 84be397a221d..22ea7a3a7eec 100644 --- a/mlir/lib/Analysis/PresburgerSet.cpp +++ b/mlir/lib/Analysis/PresburgerSet.cpp @@ -117,9 +117,9 @@ PresburgerSet PresburgerSet::intersect(const PresburgerSet &set) const { for (const FlatAffineConstraints &csB : set.flatAffineConstraints) { FlatAffineConstraints csACopy = csA, csBCopy = csB; csACopy.mergeLocalIds(csBCopy); - csACopy.append(std::move(csBCopy)); + csACopy.append(csBCopy); if (!csACopy.isEmpty()) - result.unionFACInPlace(std::move(csACopy)); + result.unionFACInPlace(csACopy); } } return result; diff --git a/mlir/lib/Bindings/Python/IRAffine.cpp b/mlir/lib/Bindings/Python/IRAffine.cpp index 16c7ca335ac3..0da936e85bc3 100644 --- a/mlir/lib/Bindings/Python/IRAffine.cpp +++ b/mlir/lib/Bindings/Python/IRAffine.cpp @@ -696,8 +696,7 @@ void mlir::python::populateIRAffine(py::module &m) { DefaultingPyMlirContext context) { SmallVector affineExprs; pyListToVector( - std::move(exprs), affineExprs, - "attempting to create an AffineMap"); + exprs, affineExprs, "attempting to create an AffineMap"); MlirAffineMap map = mlirAffineMapGet(context->get(), dimCount, symbolCount, affineExprs.size(), affineExprs.data()); diff --git a/mlir/lib/Bindings/Python/IRCore.cpp b/mlir/lib/Bindings/Python/IRCore.cpp index be2abcdd501f..b9d31b27b6bc 100644 --- a/mlir/lib/Bindings/Python/IRCore.cpp +++ b/mlir/lib/Bindings/Python/IRCore.cpp @@ -1357,13 +1357,12 @@ PyOpView::buildGeneric(const py::object &cls, py::list resultTypeList, } // Delegate to create. - return PyOperation::create(std::move(name), + return PyOperation::create(name, /*results=*/std::move(resultTypes), /*operands=*/std::move(operands), /*attributes=*/std::move(attributes), /*successors=*/std::move(successors), - /*regions=*/*regions, location, - std::move(maybeIp)); + /*regions=*/*regions, location, maybeIp); } PyOpView::PyOpView(const py::object &operationObject) @@ -1705,7 +1704,7 @@ void PySymbolTable::walkSymbolTables(PyOperationBase &from, if (userData.gotException) { std::string message("Exception raised in callback: "); message.append(userData.exceptionWhat); - throw std::runtime_error(std::move(message)); + throw std::runtime_error(message); } } diff --git a/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp b/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp index c325bfb42456..43c57a8e6033 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp +++ b/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp @@ -271,7 +271,7 @@ static void getConstraintPredicates(pdl::ApplyNativeConstraintOp op, Position *pos = *std::max_element(allPositions.begin(), allPositions.end(), comparePosDepth); PredicateBuilder::Predicate pred = - builder.getConstraint(op.name(), std::move(allPositions), parameters); + builder.getConstraint(op.name(), allPositions, parameters); predList.emplace_back(pos, pred); } diff --git a/mlir/lib/Reducer/ReductionNode.cpp b/mlir/lib/Reducer/ReductionNode.cpp index 83892be4d4f5..1555bce33325 100644 --- a/mlir/lib/Reducer/ReductionNode.cpp +++ b/mlir/lib/Reducer/ReductionNode.cpp @@ -53,8 +53,7 @@ ArrayRef ReductionNode::generateNewVariants() { int oldNumVariant = getVariants().size(); auto createNewNode = [this](std::vector ranges) { - return new (allocator.Allocate()) - ReductionNode(this, std::move(ranges), allocator); + return new (allocator.Allocate()) ReductionNode(this, ranges, allocator); }; // If we haven't created new variant, then we can create varients by removing diff --git a/mlir/lib/Reducer/ReductionTreePass.cpp b/mlir/lib/Reducer/ReductionTreePass.cpp index a1308f936255..05f0f749166e 100644 --- a/mlir/lib/Reducer/ReductionTreePass.cpp +++ b/mlir/lib/Reducer/ReductionTreePass.cpp @@ -92,7 +92,7 @@ static LogicalResult findOptimal(ModuleOp module, Region ®ion, {0, std::distance(region.op_begin(), region.op_end())}}; ReductionNode *root = allocator.Allocate(); - new (root) ReductionNode(nullptr, std::move(ranges), allocator); + new (root) ReductionNode(nullptr, ranges, allocator); // Duplicate the module for root node and locate the region in the copy. if (failed(root->initialize(module, region))) llvm_unreachable("unexpected initialization failure"); diff --git a/mlir/lib/Support/Timing.cpp b/mlir/lib/Support/Timing.cpp index 7277e4859582..5732710f36c3 100644 --- a/mlir/lib/Support/Timing.cpp +++ b/mlir/lib/Support/Timing.cpp @@ -196,9 +196,9 @@ class TimerImpl { TimerImpl *nest(const void *id, function_ref nameBuilder) { auto tid = llvm::get_threadid(); if (tid == threadId) - return nestTail(children[id], std::move(nameBuilder)); + return nestTail(children[id], nameBuilder); std::unique_lock lock(asyncMutex); - return nestTail(asyncChildren[tid][id], std::move(nameBuilder)); + return nestTail(asyncChildren[tid][id], nameBuilder); } /// Tail-called from `nest()`. @@ -524,7 +524,7 @@ void DefaultTimingManager::stopTimer(void *handle) { void *DefaultTimingManager::nestTimer(void *handle, const void *id, function_ref nameBuilder) { - return static_cast(handle)->nest(id, std::move(nameBuilder)); + return static_cast(handle)->nest(id, nameBuilder); } void DefaultTimingManager::hideTimer(void *handle) { diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp index 473c9ad7f171..a51bf16e3c8a 100644 --- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp +++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp @@ -736,8 +736,7 @@ LogicalResult CppEmitter::emitAttribute(Location loc, Attribute attr) { } if (auto dense = attr.dyn_cast()) { os << '{'; - interleaveComma(dense, os, - [&](APFloat val) { printFloat(std::move(val)); }); + interleaveComma(dense, os, [&](APFloat val) { printFloat(val); }); os << '}'; return success(); } @@ -760,7 +759,7 @@ LogicalResult CppEmitter::emitAttribute(Location loc, Attribute attr) { .dyn_cast()) { os << '{'; interleaveComma(dense, os, [&](APInt val) { - printInt(std::move(val), shouldMapToUnsigned(iType.getSignedness())); + printInt(val, shouldMapToUnsigned(iType.getSignedness())); }); os << '}'; return success(); @@ -770,8 +769,7 @@ LogicalResult CppEmitter::emitAttribute(Location loc, Attribute attr) { .getElementType() .dyn_cast()) { os << '{'; - interleaveComma(dense, os, - [&](APInt val) { printInt(std::move(val), false); }); + interleaveComma(dense, os, [&](APInt val) { printInt(val, false); }); os << '}'; return success(); } diff --git a/mlir/unittests/TableGen/OpBuildGen.cpp b/mlir/unittests/TableGen/OpBuildGen.cpp index 3b6f48936cc7..9b985e32062e 100644 --- a/mlir/unittests/TableGen/OpBuildGen.cpp +++ b/mlir/unittests/TableGen/OpBuildGen.cpp @@ -140,24 +140,24 @@ TEST_F(OpBuildGenTest, BuildMethodsSingleVariadicArgAndResult) { // Test collective args, collective results method, building a unary op. auto op = builder.create(loc, TypeRange{i32Ty}, ValueRange{*cstI32}); - verifyOp(std::move(op), {i32Ty}, {*cstI32}, noAttrs); + verifyOp(op, {i32Ty}, {*cstI32}, noAttrs); // Test collective args, collective results method, building a unary op with // named attributes. op = builder.create(loc, TypeRange{i32Ty}, ValueRange{*cstI32}, attrs); - verifyOp(std::move(op), {i32Ty}, {*cstI32}, attrs); + verifyOp(op, {i32Ty}, {*cstI32}, attrs); // Test collective args, collective results method, building a binary op. op = builder.create(loc, TypeRange{i32Ty, f32Ty}, ValueRange{*cstI32, *cstF32}); - verifyOp(std::move(op), {i32Ty, f32Ty}, {*cstI32, *cstF32}, noAttrs); + verifyOp(op, {i32Ty, f32Ty}, {*cstI32, *cstF32}, noAttrs); // Test collective args, collective results method, building a binary op with // named attributes. op = builder.create( loc, TypeRange{i32Ty, f32Ty}, ValueRange{*cstI32, *cstF32}, attrs); - verifyOp(std::move(op), {i32Ty, f32Ty}, {*cstI32, *cstF32}, attrs); + verifyOp(op, {i32Ty, f32Ty}, {*cstI32, *cstF32}, attrs); } /// Test build methods for an Op with a single varadic arg and a non-variadic @@ -166,22 +166,22 @@ TEST_F(OpBuildGenTest, BuildMethodsSingleVariadicArgNonVariadicResults) { // Test separate arg, separate param build method. auto op = builder.create(loc, i32Ty, ValueRange{*cstI32}); - verifyOp(std::move(op), {i32Ty}, {*cstI32}, noAttrs); + verifyOp(op, {i32Ty}, {*cstI32}, noAttrs); // Test collective params build method, no attributes. op = builder.create(loc, TypeRange{i32Ty}, ValueRange{*cstI32}); - verifyOp(std::move(op), {i32Ty}, {*cstI32}, noAttrs); + verifyOp(op, {i32Ty}, {*cstI32}, noAttrs); // Test collective params build method no attributes, 2 inputs. op = builder.create(loc, TypeRange{i32Ty}, ValueRange{*cstI32, *cstF32}); - verifyOp(std::move(op), {i32Ty}, {*cstI32, *cstF32}, noAttrs); + verifyOp(op, {i32Ty}, {*cstI32, *cstF32}, noAttrs); // Test collective params build method, non-empty attributes. op = builder.create( loc, TypeRange{i32Ty}, ValueRange{*cstI32, *cstF32}, attrs); - verifyOp(std::move(op), {i32Ty}, {*cstI32, *cstF32}, attrs); + verifyOp(op, {i32Ty}, {*cstI32, *cstF32}, attrs); } /// Test build methods for an Op with a single varadic arg and multiple variadic @@ -191,17 +191,17 @@ TEST_F(OpBuildGenTest, // Test separate arg, separate param build method. auto op = builder.create( loc, TypeRange{i32Ty}, TypeRange{f32Ty}, ValueRange{*cstI32}); - verifyOp(std::move(op), {i32Ty, f32Ty}, {*cstI32}, noAttrs); + verifyOp(op, {i32Ty, f32Ty}, {*cstI32}, noAttrs); // Test collective params build method, no attributes. op = builder.create(loc, TypeRange{i32Ty, f32Ty}, ValueRange{*cstI32}); - verifyOp(std::move(op), {i32Ty, f32Ty}, {*cstI32}, noAttrs); + verifyOp(op, {i32Ty, f32Ty}, {*cstI32}, noAttrs); // Test collective params build method, with attributes. op = builder.create(loc, TypeRange{i32Ty, f32Ty}, ValueRange{*cstI32}, attrs); - verifyOp(std::move(op), {i32Ty, f32Ty}, {*cstI32}, attrs); + verifyOp(op, {i32Ty, f32Ty}, {*cstI32}, attrs); } // The next 2 tests test supression of ambiguous build methods for ops that @@ -223,7 +223,7 @@ TEST_F(OpBuildGenTest, BuildMethodsRegionsAndInferredType) { auto op = builder.create( loc, ValueRange{*cstI32, *cstF32}, /*attributes=*/noAttrs); ASSERT_EQ(op->getNumRegions(), 1u); - verifyOp(std::move(op), {i32Ty}, {*cstI32, *cstF32}, noAttrs); + verifyOp(op, {i32Ty}, {*cstI32, *cstF32}, noAttrs); } } // namespace mlir From 4f415216ca812fc81fddeefe48623474c35009c9 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 22:02:20 +0000 Subject: [PATCH 366/992] Apply clang-tidy fixes for performance-unnecessary-value-param to MLIR (NFC) --- mlir/lib/Bindings/Python/IRCore.cpp | 13 ++++++------- mlir/lib/Bindings/Python/IRModule.h | 2 +- .../Conversion/TosaToLinalg/TosaToLinalgNamed.cpp | 2 +- mlir/lib/Reducer/ReductionNode.cpp | 2 +- mlir/lib/Target/Cpp/TranslateToCpp.cpp | 7 ++++--- mlir/unittests/Dialect/SparseTensor/MergerTest.cpp | 9 +++++---- 6 files changed, 18 insertions(+), 17 deletions(-) diff --git a/mlir/lib/Bindings/Python/IRCore.cpp b/mlir/lib/Bindings/Python/IRCore.cpp index b9d31b27b6bc..686153227203 100644 --- a/mlir/lib/Bindings/Python/IRCore.cpp +++ b/mlir/lib/Bindings/Python/IRCore.cpp @@ -1107,13 +1107,12 @@ void PyOperation::erase() { // PyOpView //------------------------------------------------------------------------------ -py::object -PyOpView::buildGeneric(const py::object &cls, py::list resultTypeList, - py::list operandList, - llvm::Optional attributes, - llvm::Optional> successors, - llvm::Optional regions, - DefaultingPyLocation location, py::object maybeIp) { +py::object PyOpView::buildGeneric( + const py::object &cls, py::list resultTypeList, py::list operandList, + llvm::Optional attributes, + llvm::Optional> successors, + llvm::Optional regions, DefaultingPyLocation location, + const py::object &maybeIp) { PyMlirContextRef context = location->getContext(); // Class level operation construction metadata. std::string name = py::cast(cls.attr("OPERATION_NAME")); diff --git a/mlir/lib/Bindings/Python/IRModule.h b/mlir/lib/Bindings/Python/IRModule.h index df4aaebf3036..117435d633b1 100644 --- a/mlir/lib/Bindings/Python/IRModule.h +++ b/mlir/lib/Bindings/Python/IRModule.h @@ -539,7 +539,7 @@ class PyOpView : public PyOperationBase { llvm::Optional attributes, llvm::Optional> successors, llvm::Optional regions, DefaultingPyLocation location, - pybind11::object maybeIp); + const pybind11::object &maybeIp); private: PyOperation &operation; // For efficient, cast-free access from C++ diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp index 90220ef44e97..a9c525f43aa3 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp @@ -82,7 +82,7 @@ static mlir::Value applyPad(Location loc, Value input, ArrayRef pad, .result(); } -static SmallVector filterDynamicDims(SmallVector dynDims) { +static SmallVector filterDynamicDims(const SmallVector &dynDims) { SmallVector filteredDims; for (auto dim : dynDims) if (dim) diff --git a/mlir/lib/Reducer/ReductionNode.cpp b/mlir/lib/Reducer/ReductionNode.cpp index 1555bce33325..2288b0ac0be1 100644 --- a/mlir/lib/Reducer/ReductionNode.cpp +++ b/mlir/lib/Reducer/ReductionNode.cpp @@ -52,7 +52,7 @@ LogicalResult ReductionNode::initialize(ModuleOp parentModule, ArrayRef ReductionNode::generateNewVariants() { int oldNumVariant = getVariants().size(); - auto createNewNode = [this](std::vector ranges) { + auto createNewNode = [this](const std::vector &ranges) { return new (allocator.Allocate()) ReductionNode(this, ranges, allocator); }; diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp index a51bf16e3c8a..1fc120a7dcd9 100644 --- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp +++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp @@ -736,7 +736,7 @@ LogicalResult CppEmitter::emitAttribute(Location loc, Attribute attr) { } if (auto dense = attr.dyn_cast()) { os << '{'; - interleaveComma(dense, os, [&](APFloat val) { printFloat(val); }); + interleaveComma(dense, os, [&](const APFloat &val) { printFloat(val); }); os << '}'; return success(); } @@ -758,7 +758,7 @@ LogicalResult CppEmitter::emitAttribute(Location loc, Attribute attr) { .getElementType() .dyn_cast()) { os << '{'; - interleaveComma(dense, os, [&](APInt val) { + interleaveComma(dense, os, [&](const APInt &val) { printInt(val, shouldMapToUnsigned(iType.getSignedness())); }); os << '}'; @@ -769,7 +769,8 @@ LogicalResult CppEmitter::emitAttribute(Location loc, Attribute attr) { .getElementType() .dyn_cast()) { os << '{'; - interleaveComma(dense, os, [&](APInt val) { printInt(val, false); }); + interleaveComma(dense, os, + [&](const APInt &val) { printInt(val, false); }); os << '}'; return success(); } diff --git a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp index fe69878c8283..6ea9960428f0 100644 --- a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp +++ b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp @@ -99,14 +99,15 @@ class MergerTestBase : public ::testing::Test { /// Wrapper over latPointWithinRange for readability of tests. void expectLatPointWithinRange(unsigned s, unsigned p, unsigned n, - std::shared_ptr pattern, - llvm::BitVector bits) { + const std::shared_ptr &pattern, + const llvm::BitVector &bits) { EXPECT_TRUE(latPointWithinRange(s, p, n, pattern, bits)); } /// Wrapper over expectLatPointWithinRange for a single lat point. - void expectLatPoint(unsigned s, unsigned p, std::shared_ptr pattern, - llvm::BitVector bits) { + void expectLatPoint(unsigned s, unsigned p, + const std::shared_ptr &pattern, + const llvm::BitVector &bits) { EXPECT_TRUE(latPointWithinRange(s, p, 1, pattern, bits)); } From 56f5e4abb83fb5d668e0f12e64d68a510beead03 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 22:39:57 +0000 Subject: [PATCH 367/992] Replace raw-loop with llvm::any_of() in PresburgerSet.cpp (NFC) Reported by clang-tidy. --- mlir/lib/Analysis/PresburgerSet.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Analysis/PresburgerSet.cpp b/mlir/lib/Analysis/PresburgerSet.cpp index 22ea7a3a7eec..981622140050 100644 --- a/mlir/lib/Analysis/PresburgerSet.cpp +++ b/mlir/lib/Analysis/PresburgerSet.cpp @@ -85,11 +85,10 @@ PresburgerSet PresburgerSet::unionSet(const PresburgerSet &set) const { /// A point is contained in the union iff any of the parts contain the point. bool PresburgerSet::containsPoint(ArrayRef point) const { - for (const FlatAffineConstraints &fac : flatAffineConstraints) { - if (fac.containsPoint(point)) - return true; - } - return false; + return llvm::any_of(flatAffineConstraints, + [&](const FlatAffineConstraints &fac) { + return (fac.containsPoint(point)); + }); } PresburgerSet PresburgerSet::getUniverse(unsigned nDim, unsigned nSym) { From 3f127d8aaaef2ad4fc461c13bab2ec1482ea34f0 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Jan 2022 23:43:52 +0000 Subject: [PATCH 368/992] Use `= default` for the ValueShapeRange copy-constructor (NFC) This makes it trivially copyable. --- mlir/include/mlir/Interfaces/InferTypeOpInterface.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mlir/include/mlir/Interfaces/InferTypeOpInterface.h b/mlir/include/mlir/Interfaces/InferTypeOpInterface.h index c4f8f2d905e5..3ed9dcc9d3dd 100644 --- a/mlir/include/mlir/Interfaces/InferTypeOpInterface.h +++ b/mlir/include/mlir/Interfaces/InferTypeOpInterface.h @@ -165,10 +165,7 @@ class ValueShapeRange : public ValueRange::RangeBaseT { ValueShapeRange(const std::initializer_list &values) : ValueShapeRange(ValueRange(values)) {} - ValueShapeRange(const ValueShapeRange &other) : RangeBaseT(other) { - operandShape = other.operandShape; - valueToShape = other.valueToShape; - } + ValueShapeRange(const ValueShapeRange &) = default; /// Sets the Value to ShapeAdaptor mapping function and returns this. ValueShapeRange &setValueToShapeMapping(ValueShapeMapFn fn) { From 9a2120a6e1fcefbfabbe8d25f14e056fadc37f2d Mon Sep 17 00:00:00 2001 From: Wenlei He Date: Sun, 2 Jan 2022 16:25:27 -0800 Subject: [PATCH 369/992] [llvm-profgen] Error out for unsupported AutoFDO profile generate with probe Error out instead of siliently generate empty profile when trying to generate AutoFDO profile with probe binary. Differential Revision: https://reviews.llvm.org/D116508 --- llvm/tools/llvm-profgen/ProfileGenerator.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index f03e407dbe7d..7155ec4a4219 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -383,6 +383,8 @@ ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) { void ProfileGenerator::generateProfile() { if (Binary->usePseudoProbes()) { // TODO: Support probe based profile generation + exitWithError("Probe based profile generation not supported for AutoFDO, " + "consider dropping `--ignore-stack-samples` or adding `--use-dwarf-correlation`."); } else { generateLineNumBasedProfile(); } From 1a0a177965e88d61b5d3cd3e7f7f89011f0827c1 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Sun, 2 Jan 2022 17:44:08 -0500 Subject: [PATCH 370/992] [MLIR] Create fold for cmp of ext This patch creates folds for cmpi( ext(%x : i1, iN) != 0) -> %x In essence this matches patterns matching an extension of a boolean, that != 0, which is equivalent to the original condition. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D116504 --- .../Dialect/Arithmetic/IR/ArithmeticOps.cpp | 19 ++++++++++++++++++ .../test/Dialect/Arithmetic/canonicalize.mlir | 20 +++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp index 91cbf4bdb528..2a6b463bd4e8 100644 --- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp +++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp @@ -1150,6 +1150,25 @@ OpFoldResult arith::CmpIOp::fold(ArrayRef operands) { return getBoolAttribute(getType(), getContext(), val); } + if (matchPattern(getRhs(), m_Zero())) { + if (auto extOp = getLhs().getDefiningOp()) { + if (extOp.getOperand().getType().cast().getWidth() == 1) { + // extsi(%x : i1 -> iN) != 0 -> %x + if (getPredicate() == arith::CmpIPredicate::ne) { + return extOp.getOperand(); + } + } + } + if (auto extOp = getLhs().getDefiningOp()) { + if (extOp.getOperand().getType().cast().getWidth() == 1) { + // extui(%x : i1 -> iN) != 0 -> %x + if (getPredicate() == arith::CmpIPredicate::ne) { + return extOp.getOperand(); + } + } + } + } + auto lhs = operands.front().dyn_cast_or_null(); auto rhs = operands.back().dyn_cast_or_null(); if (!lhs || !rhs) diff --git a/mlir/test/Dialect/Arithmetic/canonicalize.mlir b/mlir/test/Dialect/Arithmetic/canonicalize.mlir index 834842c0f351..b4a5cf43ba82 100644 --- a/mlir/test/Dialect/Arithmetic/canonicalize.mlir +++ b/mlir/test/Dialect/Arithmetic/canonicalize.mlir @@ -50,6 +50,26 @@ func @cmpi_equal_vector_operands(%arg0: vector<1x8xi64>) // ----- +// CHECK-LABEL: @cmpOfExtSI +// CHECK-NEXT: return %arg0 +func @cmpOfExtSI(%arg0: i1) -> i1 { + %ext = arith.extsi %arg0 : i1 to i64 + %c0 = arith.constant 0 : i64 + %res = arith.cmpi ne, %ext, %c0 : i64 + return %res : i1 +} + +// CHECK-LABEL: @cmpOfExtUI +// CHECK-NEXT: return %arg0 +func @cmpOfExtUI(%arg0: i1) -> i1 { + %ext = arith.extui %arg0 : i1 to i64 + %c0 = arith.constant 0 : i64 + %res = arith.cmpi ne, %ext, %c0 : i64 + return %res : i1 +} + +// ----- + // CHECK-LABEL: @indexCastOfSignExtend // CHECK: %[[res:.+]] = arith.index_cast %arg0 : i8 to index // CHECK: return %[[res]] From e27b5f9371382952eb5482ad151bb6fcb4cd0d7c Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Sun, 2 Jan 2022 18:02:08 -0800 Subject: [PATCH 371/992] [clang][AST] Fix crash when printing error Clang will crash if it tries to compile the following code. This commit fixes it. ``` $ cat foo.c void foo(_Nullable int *ptr) { __auto_type _Nonnull a = ptr; }; $ clang foo.c -c -Wnullable-to-nonnull-conversion ``` Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D116342 --- clang/lib/AST/TypePrinter.cpp | 2 +- clang/test/Sema/nullability.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 2a33a69f288d..cf520fcb037e 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -280,7 +280,7 @@ bool TypePrinter::canPrefixQualifiers(const Type *T, case Type::Attributed: { // We still want to print the address_space before the type if it is an // address_space attribute. - const auto *AttrTy = cast(T); + const auto *AttrTy = cast(UnderlyingType); CanPrefixQualifiers = AttrTy->getAttrKind() == attr::AddressSpace; } } diff --git a/clang/test/Sema/nullability.c b/clang/test/Sema/nullability.c index d462886de043..977b29e9bf9d 100644 --- a/clang/test/Sema/nullability.c +++ b/clang/test/Sema/nullability.c @@ -125,6 +125,7 @@ void nullable_to_nonnull(_Nullable int *ptr) { int *a = ptr; // okay _Nonnull int *b = ptr; // expected-warning{{implicit conversion from nullable pointer 'int * _Nullable' to non-nullable pointer type 'int * _Nonnull'}} b = ptr; // expected-warning{{implicit conversion from nullable pointer 'int * _Nullable' to non-nullable pointer type 'int * _Nonnull'}} + __auto_type _Nonnull c = ptr; // expected-warning{{implicit conversion from nullable pointer 'int * _Nullable' to non-nullable pointer type 'int * _Nullable _Nonnull'}} accepts_nonnull_1(ptr); // expected-warning{{implicit conversion from nullable pointer 'int * _Nullable' to non-nullable pointer type 'int * _Nonnull'}} } From 717c4bf7e364184d70a71b0b5b00615d2b835fd9 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 3 Jan 2022 02:32:10 +0000 Subject: [PATCH 372/992] Apply clang-tidy fixes for misc-unused-parameters in MLIR AffineStructures.cpp (NFC) --- mlir/lib/Analysis/AffineStructures.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index 3742a2428148..e475808d400c 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -47,7 +47,7 @@ struct AffineExprFlattener : public SimpleAffineExprFlattener { // inequalities. FlatAffineConstraints localVarCst; - AffineExprFlattener(unsigned nDims, unsigned nSymbols, MLIRContext *ctx) + AffineExprFlattener(unsigned nDims, unsigned nSymbols) : SimpleAffineExprFlattener(nDims, nSymbols) { localVarCst.reset(nDims, nSymbols, /*numLocals=*/0); } @@ -81,7 +81,7 @@ getFlattenedAffineExprs(ArrayRef exprs, unsigned numDims, return success(); } - AffineExprFlattener flattener(numDims, numSymbols, exprs[0].getContext()); + AffineExprFlattener flattener(numDims, numSymbols); // Use the same flattener to simplify each expression successively. This way // local identifiers / expressions are shared. for (auto expr : exprs) { From 6c348c1d3f5a1c36cec7b8360bd986549eee62ba Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Mon, 3 Jan 2022 14:33:23 +1100 Subject: [PATCH 373/992] [JITLink] Move AllocActions and associated types out of JITLinkMemoryManager. They're shared with LinkGraph, so having them as top-level types makes sense, and saves users from qualifying the names everywhere. --- .../llvm/ExecutionEngine/JITLink/JITLink.h | 4 +- .../JITLink/JITLinkMemoryManager.h | 83 ++++++++++--------- .../JITLink/JITLinkMemoryManager.cpp | 6 +- 3 files changed, 46 insertions(+), 47 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index 83d85953fce6..69106fcb4c28 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -1377,7 +1377,7 @@ class LinkGraph { /// /// Accessing this object after finalization will result in undefined /// behavior. - JITLinkMemoryManager::AllocActions &allocActions() { return AAs; } + AllocActions &allocActions() { return AAs; } /// Dump the graph. void dump(raw_ostream &OS); @@ -1395,7 +1395,7 @@ class LinkGraph { SectionList Sections; ExternalSymbolSet ExternalSymbols; ExternalSymbolSet AbsoluteSymbols; - JITLinkMemoryManager::AllocActions AAs; + AllocActions AAs; }; inline MutableArrayRef Block::getMutableContent(LinkGraph &G) { diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h index 62c271dfc0b2..7dd382facde8 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h @@ -33,52 +33,53 @@ class Block; class LinkGraph; class Section; +/// Represents a call to a graph-memory-management support function in the +/// executor. +/// +/// Support functions are called as: +/// +/// auto *Result = +/// ((char*(*)(const void*, size_t))FnAddr)( +/// (const void*)CtxAddr, (size_t)CtxSize) +/// +/// A null result is interpreted as success. +/// +/// A non-null result is interpreted as a heap-allocated string containing +/// an error message to report to the allocator (the allocator's +/// executor-side implementation code is responsible for freeing the error +/// string). +struct AllocActionCall { + JITTargetAddress FnAddr = 0; + JITTargetAddress CtxAddr = 0; + JITTargetAddress CtxSize = 0; +}; + +/// A pair of AllocActionCalls, one to be run at finalization time, one to be +/// run at deallocation time. +/// +/// AllocActionCallPairs should be constructed for paired operations (e.g. +/// __register_ehframe and __deregister_ehframe for eh-frame registration). +/// See comments for AllocActions for execution ordering. +/// +/// For unpaired operations one or the other member can be left unused, as +/// AllocationActionCalls with an FnAddr of zero will be skipped. +struct AllocActionCallPair { + AllocActionCall Finalize; + AllocActionCall Dealloc; +}; + +/// A vector of allocation actions to be run for this allocation. +/// +/// Finalize allocations will be run in order at finalize time. Dealloc +/// actions will be run in reverse order at deallocation time. +using AllocActions = std::vector; + /// Manages allocations of JIT memory. /// /// Instances of this class may be accessed concurrently from multiple threads /// and their implemetations should include any necessary synchronization. class JITLinkMemoryManager { public: - /// Represents a call to a graph-memory-management support function in the - /// executor. - /// - /// Support functions are called as: - /// - /// auto *Result = - /// ((char*(*)(const void*, size_t))FnAddr)( - /// (const void*)CtxAddr, (size_t)CtxSize) - /// - /// A null result is interpreted as success. - /// - /// A non-null result is interpreted as a heap-allocated string containing - /// an error message to report to the allocator (the allocator's - /// executor-side implementation code is responsible for freeing the error - /// string). - struct AllocActionCall { - JITTargetAddress FnAddr = 0; - JITTargetAddress CtxAddr = 0; - JITTargetAddress CtxSize = 0; - }; - - /// A pair of AllocActionCalls, one to be run at finalization time, one to be - /// run at deallocation time. - /// - /// AllocActionCallPairs should be constructed for paired operations (e.g. - /// __register_ehframe and __deregister_ehframe for eh-frame registration). - /// See comments for AllocActions for execution ordering. - /// - /// For unpaired operations one or the other member can be left unused, as - /// AllocationActionCalls with an FnAddr of zero will be skipped. - struct AllocActionCallPair { - AllocActionCall Finalize; - AllocActionCall Dealloc; - }; - - /// A vector of allocation actions to be run for this allocation. - /// - /// Finalize allocations will be run in order at finalize time. Dealloc - /// actions will be run in reverse order at deallocation time. - using AllocActions = std::vector; /// Represents a finalized allocation. /// @@ -312,7 +313,7 @@ class BasicLayout { /// Returns a reference to the AllocActions in the graph. /// This convenience function saves callers from having to #include /// LinkGraph.h if all they need are allocation actions. - JITLinkMemoryManager::AllocActions &graphAllocActions(); + AllocActions &graphAllocActions(); private: LinkGraph &G; diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp index 831b9b26d2fd..67fe6287e388 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp @@ -64,7 +64,7 @@ namespace jitlink { JITLinkMemoryManager::~JITLinkMemoryManager() = default; JITLinkMemoryManager::InFlightAlloc::~InFlightAlloc() = default; -static Error runAllocAction(JITLinkMemoryManager::AllocActionCall &C) { +static Error runAllocAction(AllocActionCall &C) { using WrapperFnTy = CWrapperFunctionResult (*)(const void *, size_t); auto *Fn = jitTargetAddressToPointer(C.FnAddr); @@ -189,9 +189,7 @@ Error BasicLayout::apply() { return Error::success(); } -JITLinkMemoryManager::AllocActions &BasicLayout::graphAllocActions() { - return G.allocActions(); -} +AllocActions &BasicLayout::graphAllocActions() { return G.allocActions(); } void SimpleSegmentAlloc::Create(JITLinkMemoryManager &MemMgr, const JITLinkDylib *JD, SegmentMap Segments, From 590a62d1b253c2348859d6b66464c2070f186c52 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 3 Jan 2022 04:53:10 +0000 Subject: [PATCH 374/992] Add a script to run clang-tidy on the entire MLIR codebase --- mlir/utils/clang-tidy/README.md | 50 ++++++++++ mlir/utils/clang-tidy/apply-clang-tidy.sh | 116 ++++++++++++++++++++++ 2 files changed, 166 insertions(+) create mode 100644 mlir/utils/clang-tidy/README.md create mode 100755 mlir/utils/clang-tidy/apply-clang-tidy.sh diff --git a/mlir/utils/clang-tidy/README.md b/mlir/utils/clang-tidy/README.md new file mode 100644 index 000000000000..f8ddcbe74f92 --- /dev/null +++ b/mlir/utils/clang-tidy/README.md @@ -0,0 +1,50 @@ +### Apply clang-tidy fixes on the repo + +This script runs clang-tidy on every C++ source file in MLIR and commit +the results of the checks individually. Be aware that it'll take over +10h to process the entire codebase. + +The advised way to use this is to build clang-tidy (in release mode) and +have another build directory for MLIR. Here is a sample invocation from +the root of the repo: + +```bash +{ time \ + CLANG_TIDY=build-clang/bin/clang-tidy \ + TIMING_TIDY=time \ + ./mlir/utils/apply-clang-tidy.sh build mlir ~/clang-tidy-fails/ +; } 2>&1 | tee ~/clang-tidy.log +``` + +- `build-clang/` contains the result of a build of clang-tidy, configured + and built somehow with: +```bash +$ cmake ../llvm \ + -DLLVM_ENABLE_PROJECTS="clang;mlir;clang-tools-extra" \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_TARGETS_TO_BUILD=Native \ + -G Ninja +$ ninja clang-tidy +``` +- `build/` must be a directory with MLIR onfigured. It is highly advised to + use `ccache` as well, as this directory will be used to rerun + `ninja check-mlir` after every single clang-tidy fix. +```bash +$ cmake ../llvm \ + -DLLVM_ENABLE_PROJECTS="mlir" \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DLLVM_TARGETS_TO_BUILD="Native;NVPTX;AMDGPU" \ + -DLLVM_CCACHE_BUILD=ON \ + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DLLVM_ENABLE_LLD=ON \ + -DLLVM_BUILD_EXAMPLES=OFF \ + -DMLIR_ENABLE_BINDINGS_PYTHON=ON \ + -G Ninja +``` +- `mlir/` is the directory where to find the files, it can be replaced by a + subfolder or the path to a single file. +- `mkdir -p ~/clang-tidy-fails/` will be a directory containing the patches + that clang-tidy produces but also fail the build. + diff --git a/mlir/utils/clang-tidy/apply-clang-tidy.sh b/mlir/utils/clang-tidy/apply-clang-tidy.sh new file mode 100755 index 000000000000..8266de8d6af2 --- /dev/null +++ b/mlir/utils/clang-tidy/apply-clang-tidy.sh @@ -0,0 +1,116 @@ +#!/bin/bash -u + +if [[ $# -lt 2 || $# -gt 4 ]]; then + echo "Usage: $0 [rejects dir] [checks]" + echo " - has to be a LLVM build directory (you should use CCACHE!)." + echo " - is the path that contains the .cpp files to update." + echo " - [rejects dir] is a directory where rejected patch (build failure) will be stored." + echo " - [checks] is an optional space-separated list of check to use instead of auto-detecting" + echo " Also define the env var CLANG_TIDY the path to use for the clang-tidy binary (default to 'clang-tidy' in the PATH)" + echo " Also define the env var TIMING_TIDY to 'time' to prefix clang-tidy execution with it" + echo "" + echo "This tool will execute clang-tidy on every .cpp file in the provided path and" + echo "rerun the tests. On success, a commit is added to the repo for each individual" + echo "pair ." + exit 1 +fi +BUILD_DIR=$1 +SRCS=$2 +REJECT_DIR=${3:-} +PRESET_CHECKS=${4:-} +SRC_DIR=$PWD +if [[ -v CLANG_TIDY ]] && [[ ! -z "$CLANG_TIDY" ]] ; then + CLANG_TIDY=$(realpath $CLANG_TIDY) + if [[ ! -f "$CLANG_TIDY" ]]; then + echo "Invalid path '$CLANG_TIDY'" + exit 1 + fi +else + CLANG_TIDY=clang-tidy +fi +TIMING_TIDY=${TIMING_TIDY:-} +echo "Using: '$CLANG_TIDY" + +if [[ ! -z "$REJECT_DIR" ]] && [[ ! -d "$REJECT_DIR" ]]; then + echo "Expects 'rejects dir' to be a directory, got '$REJECT_DIR'" + exit 1 +fi + +ensure_clean_build() { + git reset --hard HEAD + time ninja -C $BUILD_DIR check-mlir-build-only > ${REJECT_DIR}/ninja.clean.log 2>&1 + if [[ $? != 0 ]] ; then + echo "-- Build failed on clean state, cleaning TableGen files and retry" + # Reinitialize the TableGen generated file to have a clean state + find $BUILD_DIR/tools/mlir/ | grep '\.inc' | while read file ; do rm $file ; done + time ninja -C $BUILD_DIR check-mlir-build-only > ${REJECT_DIR}/ninja.clean.log 2>&1 + if [[ $? != 0 ]] ; then + echo "check-mlir-build-only failed on clean state! (see ninja.clean.log)" + git status + exit 1 + fi + fi +} + +tmpfile=$(mktemp /tmp/mhlo-temp-checks.XXXXXX) +find $SRCS | grep ".cpp$" | sort | while read file ; do + echo "================================" + echo "======= Processing $file =======" + date + echo "================================" + CHECKS= + if [[ ! -z "$PRESET_CHECKS" ]]; then + CHECKS="$PRESET_CHECKS" + else + CHECKS=$($CLANG_TIDY $file -p $BUILD_DIR --list-checks \ + | grep -v "Enabled checks:" | grep -v "^$" \ + | while read check ; do echo -n "${check} " ; done;) + fi + echo "-----------------------------------" + echo "-- Reset state before applying all checks on file $file" + ensure_clean_build + + echo "-----------------------------------" + echo "-- Apply all checks on file $file" + echo "$TIMING_TIDY $CLANG_TIDY -p $BUILD_DIR $file -fix" + $TIMING_TIDY $CLANG_TIDY -p $BUILD_DIR $file -fix \ + | grep "warning:.*\]$" | sed -r 's#.*\[(.*)]$#\1#' | sort -u > $tmpfile + git clang-format -f + if [[ $(git diff --stat) == '' ]]; then + echo 'Nothing was applied, skip' + continue + fi + echo "-----------------------------------" + echo "-- Got some diff, run one check at a time now" + cat $tmpfile | while read check ; do + echo "-----------------------------------" + echo "-- Reset state before applying check $check on file $file" + ensure_clean_build + + echo "-----------------------------------" + echo "-- Apply check $check on file $file" + echo "$TIMING_TIDY $CLANG_TIDY -p $BUILD_DIR $file --checks="-*,$check" -fix" + { $TIMING_TIDY $CLANG_TIDY -p $BUILD_DIR $file --checks="-*,$check" -fix ; } 2>&1 + git clang-format -f + if [[ $(git diff --stat) == '' ]]; then + echo 'Nothing was applied, skip' + continue + fi + echo "-----------------------------------" + echo "-- Test check $check on file $file" + # Clang-tidy sometimes update files in the build directory, erase the .inc file generate by tablegen + # to force them to be regenerated now. + find $BUILD_DIR/tools/mlir/ | grep '\.inc' | while read file ; do rm $file ; done + ninja -C $BUILD_DIR check-mlir > ${REJECT_DIR}/ninja.${check}.$(basename $file).log 2>&1 + if [[ $? != 0 ]] ; then + echo "check-mlir failed! (see ninja.${check}.${file}.log)" + [[ ! -z "$REJECT_DIR" ]] && git diff > "${REJECT_DIR}/${check}_$(basename ${file}).reject.diff" + continue + fi + echo "-----------------------------------" + echo "-- Success, commit changes for check $check on file $file" + git clang-format -f + + git commit -a -m "Apply clang-tidy fixes for $check in $(basename $file) (NFC)" + done +done From 89af17c0c74eb9d8d11870f6510e475eff74eef4 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 3 Jan 2022 04:56:08 +0000 Subject: [PATCH 375/992] Define a `cppAccessorType` to const-ref in APFloatParameter and update ODS emitter to use it for verifier signatures This reduce an unnecessary amount of copy of non-trivial objects, like APFloat. Reviewed By: rriddle, jpienaar Differential Revision: https://reviews.llvm.org/D116505 --- mlir/include/mlir/IR/OpBase.td | 1 + mlir/lib/IR/BuiltinAttributes.cpp | 2 +- mlir/test/lib/Dialect/Test/TestAttributes.cpp | 2 +- mlir/test/mlir-tblgen/attrdefs.td | 8 ++++---- mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index f1a5446ad1f9..e46b8fb35ec7 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -3135,6 +3135,7 @@ class StringRefParameter : class APFloatParameter : AttrOrTypeParameter<"::llvm::APFloat", desc> { let comparator = "$_lhs.bitwiseIsEqual($_rhs)"; + let cppAccessorType = "const ::llvm::APFloat &"; } // For standard ArrayRefs, which require allocation. diff --git a/mlir/lib/IR/BuiltinAttributes.cpp b/mlir/lib/IR/BuiltinAttributes.cpp index 802df2dada9d..2a34ed34e7d1 100644 --- a/mlir/lib/IR/BuiltinAttributes.cpp +++ b/mlir/lib/IR/BuiltinAttributes.cpp @@ -283,7 +283,7 @@ double FloatAttr::getValueAsDouble(APFloat value) { } LogicalResult FloatAttr::verify(function_ref emitError, - Type type, APFloat value) { + Type type, const APFloat &value) { // Verify that the type is correct. if (!type.isa()) return emitError() << "expected floating point type"; diff --git a/mlir/test/lib/Dialect/Test/TestAttributes.cpp b/mlir/test/lib/Dialect/Test/TestAttributes.cpp index 3a860994f0e8..909f5b399543 100644 --- a/mlir/test/lib/Dialect/Test/TestAttributes.cpp +++ b/mlir/test/lib/Dialect/Test/TestAttributes.cpp @@ -129,7 +129,7 @@ TestI64ElementsAttr::verify(function_ref emitError, LogicalResult TestAttrWithFormatAttr::verify(function_ref emitError, - int64_t one, std::string two, IntegerAttr three, + int64_t one, StringRef two, IntegerAttr three, ArrayRef four) { if (four.size() != static_cast(one)) return emitError() << "expected 'one' to equal 'four.size()'"; diff --git a/mlir/test/mlir-tblgen/attrdefs.td b/mlir/test/mlir-tblgen/attrdefs.td index 34c8588225f7..f53705dd662f 100644 --- a/mlir/test/mlir-tblgen/attrdefs.td +++ b/mlir/test/mlir-tblgen/attrdefs.td @@ -61,8 +61,8 @@ def B_CompoundAttrA : TestAttr<"CompoundA"> { let genVerifyDecl = 1; // DECL-LABEL: class CompoundAAttr : public ::mlir::Attribute -// DECL: static CompoundAAttr getChecked(::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError, ::mlir::MLIRContext *context, int widthOfSomething, ::test::SimpleTypeA exampleTdType, ::llvm::APFloat apFloat, ::llvm::ArrayRef dims, ::mlir::Type inner); -// DECL: static ::mlir::LogicalResult verify(::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError, int widthOfSomething, ::test::SimpleTypeA exampleTdType, ::llvm::APFloat apFloat, ::llvm::ArrayRef dims, ::mlir::Type inner); +// DECL: static CompoundAAttr getChecked(::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError, ::mlir::MLIRContext *context, int widthOfSomething, ::test::SimpleTypeA exampleTdType, const ::llvm::APFloat &apFloat, ::llvm::ArrayRef dims, ::mlir::Type inner); +// DECL: static ::mlir::LogicalResult verify(::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError, int widthOfSomething, ::test::SimpleTypeA exampleTdType, const ::llvm::APFloat &apFloat, ::llvm::ArrayRef dims, ::mlir::Type inner); // DECL: static constexpr ::llvm::StringLiteral getMnemonic() { // DECL: return {"cmpnd_a"}; // DECL: } @@ -71,7 +71,7 @@ def B_CompoundAttrA : TestAttr<"CompoundA"> { // DECL: void print(::mlir::AsmPrinter &printer) const; // DECL: int getWidthOfSomething() const; // DECL: ::test::SimpleTypeA getExampleTdType() const; -// DECL: ::llvm::APFloat getApFloat() const; +// DECL: const ::llvm::APFloat &getApFloat() const; // Check that AttributeSelfTypeParameter is handled properly. // DEF-LABEL: struct CompoundAAttrStorage @@ -139,5 +139,5 @@ def F_ParamWithAccessorTypeAttr : TestAttr<"ParamWithAccessorType"> { // DECL-LABEL: class ParamWithAccessorTypeAttr // DECL: StringRef getParam() // DEF: ParamWithAccessorTypeAttrStorage -// DEF: ParamWithAccessorTypeAttrStorage(std::string param) +// DEF: ParamWithAccessorTypeAttrStorage(StringRef param) // DEF: StringRef ParamWithAccessorTypeAttr::getParam() diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp index d90adbc47ef4..f2df6c8ae765 100644 --- a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp +++ b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp @@ -315,7 +315,7 @@ DefGen::getBuilderParams(std::initializer_list prefix) const { SmallVector builderParams; builderParams.append(prefix.begin(), prefix.end()); for (auto ¶m : params) - builderParams.emplace_back(param.getCppType(), param.getName()); + builderParams.emplace_back(param.getCppAccessorType(), param.getName()); return builderParams; } From 1bb9f4e482e9b98c05a055c8edc338a81bbeca2d Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Sun, 2 Jan 2022 22:06:57 -0500 Subject: [PATCH 376/992] [MLIR] Create folders for extsi/extui Create folders/canonicalizers for extsi/extui. Specifically, extui(extui(x)) -> extui(x) extsi(extsi(x)) -> extsi(x) extsi(extui(x)) -> extui(x) Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D116515 --- .../Dialect/Arithmetic/IR/ArithmeticOps.td | 1 + .../IR/ArithmeticCanonicalization.td | 8 +++++ .../Dialect/Arithmetic/IR/ArithmeticOps.cpp | 15 ++++++++++ .../test/Dialect/Arithmetic/canonicalize.mlir | 29 +++++++++++++++++++ 4 files changed, 53 insertions(+) diff --git a/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticOps.td b/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticOps.td index a08f811d92b0..fb6d32fd04f6 100644 --- a/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticOps.td +++ b/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticOps.td @@ -818,6 +818,7 @@ def Arith_ExtSIOp : Arith_IToICastOp<"extsi"> { }]; let hasFolder = 1; + let hasCanonicalizer = 1; let verifier = [{ return verifyExtOp(*this); }]; } diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticCanonicalization.td b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticCanonicalization.td index a2d1aeb9b3a2..49ce6ec5ae0a 100644 --- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticCanonicalization.td +++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticCanonicalization.td @@ -128,4 +128,12 @@ def IndexCastOfExtSI : def BitcastOfBitcast : Pat<(Arith_BitcastOp (Arith_BitcastOp $x)), (replaceWithValue $x)>; +//===----------------------------------------------------------------------===// +// ExtSIOp +//===----------------------------------------------------------------------===// + +// extsi(extui(x iN : iM) : iL) -> extui(x : iL) +def ExtSIOfExtUI : + Pat<(Arith_ExtSIOp (Arith_ExtUIOp $x)), (Arith_ExtUIOp $x)>; + #endif // ARITHMETIC_PATTERNS diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp index 2a6b463bd4e8..aa485d39e0db 100644 --- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp +++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp @@ -788,6 +788,11 @@ OpFoldResult arith::ExtUIOp::fold(ArrayRef operands) { return IntegerAttr::get( getType(), lhs.getValue().zext(getType().getIntOrFloatBitWidth())); + if (auto lhs = getIn().getDefiningOp()) { + getInMutable().assign(lhs.getIn()); + return getResult(); + } + return {}; } @@ -804,6 +809,11 @@ OpFoldResult arith::ExtSIOp::fold(ArrayRef operands) { return IntegerAttr::get( getType(), lhs.getValue().sext(getType().getIntOrFloatBitWidth())); + if (auto lhs = getIn().getDefiningOp()) { + getInMutable().assign(lhs.getIn()); + return getResult(); + } + return {}; } @@ -811,6 +821,11 @@ bool arith::ExtSIOp::areCastCompatible(TypeRange inputs, TypeRange outputs) { return checkWidthChangeCast(inputs, outputs); } +void arith::ExtSIOp::getCanonicalizationPatterns( + OwningRewritePatternList &patterns, MLIRContext *context) { + patterns.insert(context); +} + //===----------------------------------------------------------------------===// // ExtFOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Arithmetic/canonicalize.mlir b/mlir/test/Dialect/Arithmetic/canonicalize.mlir index b4a5cf43ba82..87a198cb7c4a 100644 --- a/mlir/test/Dialect/Arithmetic/canonicalize.mlir +++ b/mlir/test/Dialect/Arithmetic/canonicalize.mlir @@ -70,6 +70,35 @@ func @cmpOfExtUI(%arg0: i1) -> i1 { // ----- +// CHECK-LABEL: @extSIOfExtUI +// CHECK: %[[res:.+]] = arith.extui %arg0 : i1 to i64 +// CHECK: return %[[res]] +func @extSIOfExtUI(%arg0: i1) -> i64 { + %ext1 = arith.extui %arg0 : i1 to i8 + %ext2 = arith.extsi %ext1 : i8 to i64 + return %ext2 : i64 +} + +// CHECK-LABEL: @extUIOfExtUI +// CHECK: %[[res:.+]] = arith.extui %arg0 : i1 to i64 +// CHECK: return %[[res]] +func @extUIOfExtUI(%arg0: i1) -> i64 { + %ext1 = arith.extui %arg0 : i1 to i8 + %ext2 = arith.extui %ext1 : i8 to i64 + return %ext2 : i64 +} + +// CHECK-LABEL: @extSIOfExtSI +// CHECK: %[[res:.+]] = arith.extsi %arg0 : i1 to i64 +// CHECK: return %[[res]] +func @extSIOfExtSI(%arg0: i1) -> i64 { + %ext1 = arith.extsi %arg0 : i1 to i8 + %ext2 = arith.extsi %ext1 : i8 to i64 + return %ext2 : i64 +} + +// ----- + // CHECK-LABEL: @indexCastOfSignExtend // CHECK: %[[res:.+]] = arith.index_cast %arg0 : i8 to index // CHECK: return %[[res]] From 8506c8c13bbe257270d7df69790e7f87605011af Mon Sep 17 00:00:00 2001 From: Groverkss Date: Mon, 3 Jan 2022 11:10:14 +0530 Subject: [PATCH 377/992] [MLIR] Move LinearTransform to Presburger/ This patch moves LinearTransform to Presburger/ and makes it use IntegerPolyhedron instead of FlatAffineConstraints. Also modifies its usage in `FlatAffineConstraints::findIntegerSample` to support the changes. This patch is part of a series of patches for moving presburger math functionality into Presburger directory. Reviewed By: arjunp Differential Revision: https://reviews.llvm.org/D116311 --- mlir/include/mlir/Analysis/AffineStructures.h | 7 ++--- .../Analysis/Presburger/IntegerPolyhedron.h | 4 +++ .../{ => Presburger}/LinearTransform.h | 10 +++---- mlir/lib/Analysis/AffineStructures.cpp | 30 ++++--------------- mlir/lib/Analysis/CMakeLists.txt | 2 -- mlir/lib/Analysis/Presburger/CMakeLists.txt | 1 + .../Analysis/Presburger/IntegerPolyhedron.cpp | 18 +++++++++++ .../{ => Presburger}/LinearTransform.cpp | 18 +++++------ mlir/unittests/Analysis/CMakeLists.txt | 1 - .../Analysis/Presburger/CMakeLists.txt | 1 + .../{ => Presburger}/LinearTransformTest.cpp | 2 +- 11 files changed, 48 insertions(+), 46 deletions(-) rename mlir/include/mlir/Analysis/{ => Presburger}/LinearTransform.h (83%) rename mlir/lib/Analysis/{ => Presburger}/LinearTransform.cpp (92%) rename mlir/unittests/Analysis/{ => Presburger}/LinearTransformTest.cpp (97%) diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h index 3f632dcc96a8..bb76b4ff13d2 100644 --- a/mlir/include/mlir/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Analysis/AffineStructures.h @@ -81,6 +81,9 @@ class FlatAffineConstraints : public IntegerPolyhedron { 1, numDims, numSymbols, numLocals) {} + explicit FlatAffineConstraints(const IntegerPolyhedron &poly) + : IntegerPolyhedron(poly) {} + /// Return a system with no constraints, i.e., one which is satisfied by all /// points. static FlatAffineConstraints getUniverse(unsigned numDims = 0, @@ -212,10 +215,6 @@ class FlatAffineConstraints : public IntegerPolyhedron { void projectOut(unsigned pos, unsigned num); inline void projectOut(unsigned pos) { return projectOut(pos, 1); } - /// Sets the `values.size()` identifiers starting at `po`s to the specified - /// values and removes them. - void setAndEliminate(unsigned pos, ArrayRef values); - /// Changes the partition between dimensions and symbols. Depending on the new /// symbol count, either a chunk of trailing dimensional identifiers becomes /// symbols, or some of the leading symbols become dimensions. diff --git a/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h b/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h index b46874fb8072..9185ea14cd53 100644 --- a/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h +++ b/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h @@ -185,6 +185,10 @@ class IntegerPolyhedron { /// Removes all equalities and inequalities. void clearConstraints(); + /// Sets the `values.size()` identifiers starting at `po`s to the specified + /// values and removes them. + void setAndEliminate(unsigned pos, ArrayRef values); + /// Gather positions of all lower and upper bounds of the identifier at `pos`, /// and optionally any equalities on it. In addition, the bounds are to be /// independent of identifiers in position range [`offset`, `offset` + `num`). diff --git a/mlir/include/mlir/Analysis/LinearTransform.h b/mlir/include/mlir/Analysis/Presburger/LinearTransform.h similarity index 83% rename from mlir/include/mlir/Analysis/LinearTransform.h rename to mlir/include/mlir/Analysis/Presburger/LinearTransform.h index 2f3aaf800ab0..a6a36f764e97 100644 --- a/mlir/include/mlir/Analysis/LinearTransform.h +++ b/mlir/include/mlir/Analysis/Presburger/LinearTransform.h @@ -6,14 +6,14 @@ // //===----------------------------------------------------------------------===// // -// Support for linear transforms and applying them to FlatAffineConstraints. +// Support for linear transforms and applying them to an IntegerPolyhedron. // //===----------------------------------------------------------------------===// #ifndef MLIR_ANALYSIS_LINEARTRANSFORM_H #define MLIR_ANALYSIS_LINEARTRANSFORM_H -#include "mlir/Analysis/AffineStructures.h" +#include "mlir/Analysis/Presburger/IntegerPolyhedron.h" #include "mlir/Analysis/Presburger/Matrix.h" #include "llvm/ADT/SmallVector.h" @@ -33,9 +33,9 @@ class LinearTransform { static std::pair makeTransformToColumnEchelon(Matrix m); - // Returns a FlatAffineConstraints having a constraint vector vT for every - // constraint vector v in fac, where T is this transform. - FlatAffineConstraints applyTo(const FlatAffineConstraints &fac) const; + // Returns an IntegerPolyhedron having a constraint vector vT for every + // constraint vector v in poly, where T is this transform. + IntegerPolyhedron applyTo(const IntegerPolyhedron &poly) const; // The given vector is interpreted as a row vector v. Post-multiply v with // this transform, say T, and return vT. diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index e475808d400c..b6f592a71023 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -11,7 +11,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Analysis/AffineStructures.h" -#include "mlir/Analysis/LinearTransform.h" +#include "mlir/Analysis/Presburger/LinearTransform.h" #include "mlir/Analysis/Presburger/Simplex.h" #include "mlir/Analysis/Presburger/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" @@ -1090,11 +1090,11 @@ FlatAffineConstraints::findIntegerSample() const { LinearTransform::makeTransformToColumnEchelon(std::move(m)); const LinearTransform &transform = result.second; // 1) Apply T to S to obtain S*T. - FlatAffineConstraints transformedSet = transform.applyTo(*this); + IntegerPolyhedron transformedSet = transform.applyTo(*this); // 2) Remove the unbounded dimensions and constraints involving them to // obtain a bounded set. - FlatAffineConstraints boundedSet = transformedSet; + FlatAffineConstraints boundedSet(transformedSet); unsigned numBoundedDims = result.first; unsigned numUnboundedDims = getNumIds() - numBoundedDims; removeConstraintsInvolvingSuffixDims(boundedSet, numUnboundedDims); @@ -1111,7 +1111,7 @@ FlatAffineConstraints::findIntegerSample() const { // 4) Substitute the values of the bounded dimensions into S*T to obtain a // full-dimensional cone, which necessarily contains an integer sample. transformedSet.setAndEliminate(0, *boundedSample); - FlatAffineConstraints &cone = transformedSet; + IntegerPolyhedron &cone = transformedSet; // 5) Obtain an integer sample from the cone. // @@ -1139,10 +1139,10 @@ FlatAffineConstraints::findIntegerSample() const { // negative a_i, so we accomodate this by shifting the inequality by this // amount for the shrunken cone. for (unsigned i = 0, e = cone.getNumInequalities(); i < e; ++i) { - for (unsigned j = 0; j < cone.numIds; ++j) { + for (unsigned j = 0; j < cone.getNumIds(); ++j) { int64_t coeff = cone.atIneq(i, j); if (coeff < 0) - cone.atIneq(i, cone.numIds) += coeff; + cone.atIneq(i, cone.getNumIds()) += coeff; } } @@ -2303,24 +2303,6 @@ static int findEqualityToConstant(const FlatAffineConstraints &cst, return -1; } -void FlatAffineConstraints::setAndEliminate(unsigned pos, - ArrayRef values) { - if (values.empty()) - return; - assert(pos + values.size() <= getNumIds() && - "invalid position or too many values"); - // Setting x_j = p in sum_i a_i x_i + c is equivalent to adding p*a_j to the - // constant term and removing the id x_j. We do this for all the ids - // pos, pos + 1, ... pos + values.size() - 1. - for (unsigned r = 0, e = getNumInequalities(); r < e; r++) - for (unsigned i = 0, numVals = values.size(); i < numVals; ++i) - atIneq(r, getNumCols() - 1) += atIneq(r, pos + i) * values[i]; - for (unsigned r = 0, e = getNumEqualities(); r < e; r++) - for (unsigned i = 0, numVals = values.size(); i < numVals; ++i) - atEq(r, getNumCols() - 1) += atEq(r, pos + i) * values[i]; - removeIdRange(pos, pos + values.size()); -} - LogicalResult FlatAffineConstraints::constantFoldId(unsigned pos) { assert(pos < getNumIds() && "invalid position"); int rowIdx; diff --git a/mlir/lib/Analysis/CMakeLists.txt b/mlir/lib/Analysis/CMakeLists.txt index 9128ef9474ac..3724ea834e52 100644 --- a/mlir/lib/Analysis/CMakeLists.txt +++ b/mlir/lib/Analysis/CMakeLists.txt @@ -6,7 +6,6 @@ set(LLVM_OPTIONAL_SOURCES CallGraph.cpp DataFlowAnalysis.cpp DataLayoutAnalysis.cpp - LinearTransform.cpp Liveness.cpp LoopAnalysis.cpp NestedMatcher.cpp @@ -48,7 +47,6 @@ add_mlir_library(MLIRAnalysis add_mlir_library(MLIRLoopAnalysis AffineAnalysis.cpp AffineStructures.cpp - LinearTransform.cpp LoopAnalysis.cpp NestedMatcher.cpp PresburgerSet.cpp diff --git a/mlir/lib/Analysis/Presburger/CMakeLists.txt b/mlir/lib/Analysis/Presburger/CMakeLists.txt index d52d4ccdb1c2..d3187278db2f 100644 --- a/mlir/lib/Analysis/Presburger/CMakeLists.txt +++ b/mlir/lib/Analysis/Presburger/CMakeLists.txt @@ -1,5 +1,6 @@ add_mlir_library(MLIRPresburger IntegerPolyhedron.cpp + LinearTransform.cpp Matrix.cpp Simplex.cpp Utils.cpp diff --git a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp index 4eed7ca91dd4..627e2bb8728e 100644 --- a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp +++ b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp @@ -285,6 +285,24 @@ bool IntegerPolyhedron::hasConsistentState() const { return true; } +void IntegerPolyhedron::setAndEliminate(unsigned pos, + ArrayRef values) { + if (values.empty()) + return; + assert(pos + values.size() <= getNumIds() && + "invalid position or too many values"); + // Setting x_j = p in sum_i a_i x_i + c is equivalent to adding p*a_j to the + // constant term and removing the id x_j. We do this for all the ids + // pos, pos + 1, ... pos + values.size() - 1. + for (unsigned r = 0, e = getNumInequalities(); r < e; r++) + for (unsigned i = 0, numVals = values.size(); i < numVals; ++i) + atIneq(r, getNumCols() - 1) += atIneq(r, pos + i) * values[i]; + for (unsigned r = 0, e = getNumEqualities(); r < e; r++) + for (unsigned i = 0, numVals = values.size(); i < numVals; ++i) + atEq(r, getNumCols() - 1) += atEq(r, pos + i) * values[i]; + removeIdRange(pos, pos + values.size()); +} + void IntegerPolyhedron::printSpace(raw_ostream &os) const { os << "\nConstraints (" << getNumDimIds() << " dims, " << getNumSymbolIds() << " symbols, " << getNumLocalIds() << " locals), (" << getNumConstraints() diff --git a/mlir/lib/Analysis/LinearTransform.cpp b/mlir/lib/Analysis/Presburger/LinearTransform.cpp similarity index 92% rename from mlir/lib/Analysis/LinearTransform.cpp rename to mlir/lib/Analysis/Presburger/LinearTransform.cpp index c4dc0e746840..09d7eb731576 100644 --- a/mlir/lib/Analysis/LinearTransform.cpp +++ b/mlir/lib/Analysis/Presburger/LinearTransform.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Analysis/LinearTransform.h" -#include "mlir/Analysis/AffineStructures.h" +#include "mlir/Analysis/Presburger/LinearTransform.h" +#include "mlir/Analysis/Presburger/IntegerPolyhedron.h" namespace mlir { @@ -135,12 +135,12 @@ LinearTransform::preMultiplyColumn(ArrayRef colVec) const { return result; } -FlatAffineConstraints -LinearTransform::applyTo(const FlatAffineConstraints &fac) const { - FlatAffineConstraints result(fac.getNumIds()); +IntegerPolyhedron +LinearTransform::applyTo(const IntegerPolyhedron &poly) const { + IntegerPolyhedron result(poly.getNumIds()); - for (unsigned i = 0, e = fac.getNumEqualities(); i < e; ++i) { - ArrayRef eq = fac.getEquality(i); + for (unsigned i = 0, e = poly.getNumEqualities(); i < e; ++i) { + ArrayRef eq = poly.getEquality(i); int64_t c = eq.back(); @@ -149,8 +149,8 @@ LinearTransform::applyTo(const FlatAffineConstraints &fac) const { result.addEquality(newEq); } - for (unsigned i = 0, e = fac.getNumInequalities(); i < e; ++i) { - ArrayRef ineq = fac.getInequality(i); + for (unsigned i = 0, e = poly.getNumInequalities(); i < e; ++i) { + ArrayRef ineq = poly.getInequality(i); int64_t c = ineq.back(); diff --git a/mlir/unittests/Analysis/CMakeLists.txt b/mlir/unittests/Analysis/CMakeLists.txt index b6340ec72812..b70d6822ac87 100644 --- a/mlir/unittests/Analysis/CMakeLists.txt +++ b/mlir/unittests/Analysis/CMakeLists.txt @@ -2,7 +2,6 @@ add_mlir_unittest(MLIRAnalysisTests AffineStructuresParser.cpp AffineStructuresParserTest.cpp AffineStructuresTest.cpp - LinearTransformTest.cpp PresburgerSetTest.cpp ) diff --git a/mlir/unittests/Analysis/Presburger/CMakeLists.txt b/mlir/unittests/Analysis/Presburger/CMakeLists.txt index fd4957a93a86..b371e4852b35 100644 --- a/mlir/unittests/Analysis/Presburger/CMakeLists.txt +++ b/mlir/unittests/Analysis/Presburger/CMakeLists.txt @@ -1,5 +1,6 @@ add_mlir_unittest(MLIRPresburgerTests IntegerPolyhedronTest.cpp + LinearTransformTest.cpp MatrixTest.cpp SimplexTest.cpp ../AffineStructuresParser.cpp diff --git a/mlir/unittests/Analysis/LinearTransformTest.cpp b/mlir/unittests/Analysis/Presburger/LinearTransformTest.cpp similarity index 97% rename from mlir/unittests/Analysis/LinearTransformTest.cpp rename to mlir/unittests/Analysis/Presburger/LinearTransformTest.cpp index 6cdb1ba95bb4..8a6650f609f7 100644 --- a/mlir/unittests/Analysis/LinearTransformTest.cpp +++ b/mlir/unittests/Analysis/Presburger/LinearTransformTest.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Analysis/LinearTransform.h" +#include "mlir/Analysis/Presburger/LinearTransform.h" #include #include From a1e62aa75b66e2b25e85fd98f41a8d6134192783 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 3 Jan 2022 05:51:35 +0000 Subject: [PATCH 378/992] Minor reflow of FloorDivSIOp/CeilDivSIOp folder to limit the number of APInt API calls (NFC) Cache the result of the comparison in boolean, and check early for 0 to leverage `(a < 0) == !(a > 0)`. --- .../Dialect/Arithmetic/IR/ArithmeticOps.cpp | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp index aa485d39e0db..2fe32597b03f 100644 --- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp +++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp @@ -357,25 +357,30 @@ OpFoldResult arith::CeilDivSIOp::fold(ArrayRef operands) { overflowOrDiv0 = true; return a; } + if (!a) + return a; + // After this point we know that neither a or b are zero. unsigned bits = a.getBitWidth(); APInt zero = APInt::getZero(bits); - if (a.sgt(zero) && b.sgt(zero)) { + bool aGtZero = a.sgt(zero); + bool bGtZero = b.sgt(zero); + if (aGtZero && bGtZero) { // Both positive, return ceil(a, b). return signedCeilNonnegInputs(a, b, overflowOrDiv0); } - if (a.slt(zero) && b.slt(zero)) { + if (!aGtZero && !bGtZero) { // Both negative, return ceil(-a, -b). APInt posA = zero.ssub_ov(a, overflowOrDiv0); APInt posB = zero.ssub_ov(b, overflowOrDiv0); return signedCeilNonnegInputs(posA, posB, overflowOrDiv0); } - if (a.slt(zero) && b.sgt(zero)) { + if (!aGtZero && bGtZero) { // A is negative, b is positive, return - ( -a / b). APInt posA = zero.ssub_ov(a, overflowOrDiv0); APInt div = posA.sdiv_ov(b, overflowOrDiv0); return zero.ssub_ov(div, overflowOrDiv0); } - // A is positive (or zero), b is negative, return - (a / -b). + // A is positive, b is negative, return - (a / -b). APInt posB = zero.ssub_ov(b, overflowOrDiv0); APInt div = a.sdiv_ov(posB, overflowOrDiv0); return zero.ssub_ov(div, overflowOrDiv0); @@ -407,19 +412,24 @@ OpFoldResult arith::FloorDivSIOp::fold(ArrayRef operands) { overflowOrDiv0 = true; return a; } + if (!a) + return a; + // After this point we know that neither a or b are zero. unsigned bits = a.getBitWidth(); APInt zero = APInt::getZero(bits); - if (a.sge(zero) && b.sgt(zero)) { - // Both positive (or a is zero), return a / b. + bool aGtZero = a.sgt(zero); + bool bGtZero = b.sgt(zero); + if (aGtZero && bGtZero) { + // Both positive, return a / b. return a.sdiv_ov(b, overflowOrDiv0); } - if (a.sle(zero) && b.slt(zero)) { - // Both negative (or a is zero), return -a / -b. + if (!aGtZero && !bGtZero) { + // Both negative, return -a / -b. APInt posA = zero.ssub_ov(a, overflowOrDiv0); APInt posB = zero.ssub_ov(b, overflowOrDiv0); return posA.sdiv_ov(posB, overflowOrDiv0); } - if (a.slt(zero) && b.sgt(zero)) { + if (!aGtZero && bGtZero) { // A is negative, b is positive, return - ceil(-a, b). APInt posA = zero.ssub_ov(a, overflowOrDiv0); APInt ceil = signedCeilNonnegInputs(posA, b, overflowOrDiv0); From e4e463e7476920d1f52667447bda2bc1635ae390 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 3 Jan 2022 06:02:21 +0000 Subject: [PATCH 379/992] Remove useless nesting blok and dead return statement in TosaToLinalg.cpp (NFC) Flagged by Coverity. --- .../Conversion/TosaToLinalg/TosaToLinalg.cpp | 408 +++++++++--------- 1 file changed, 200 insertions(+), 208 deletions(-) diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index f28527d185c1..ed6b3847db96 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -1431,248 +1431,240 @@ class ResizeConverter : public OpRewritePattern { getNParallelLoopsAttrs(resultTy.getRank())); rewriter.replaceOp(op, genericOp.getResult(0)); - { - OpBuilder::InsertionGuard regionGuard(rewriter); - rewriter.createBlock(&genericOp.region(), genericOp.region().end(), - TypeRange({resultElementTy})); - Value batch = rewriter.create(loc, 0); - Value y = rewriter.create(loc, 1); - Value x = rewriter.create(loc, 2); - Value channel = rewriter.create(loc, 3); - - auto hwMin = rewriter.create( - loc, rewriter.getI32IntegerAttr(0)); - auto hMax = rewriter.create( - loc, rewriter.getI32IntegerAttr(imageH - 1)); - auto wMax = rewriter.create( - loc, rewriter.getI32IntegerAttr(imageW - 1)); + OpBuilder::InsertionGuard regionGuard(rewriter); + rewriter.createBlock(&genericOp.region(), genericOp.region().end(), + TypeRange({resultElementTy})); + Value batch = rewriter.create(loc, 0); + Value y = rewriter.create(loc, 1); + Value x = rewriter.create(loc, 2); + Value channel = rewriter.create(loc, 3); + + auto hwMin = + rewriter.create(loc, rewriter.getI32IntegerAttr(0)); + auto hMax = rewriter.create( + loc, rewriter.getI32IntegerAttr(imageH - 1)); + auto wMax = rewriter.create( + loc, rewriter.getI32IntegerAttr(imageW - 1)); + + Value inY = + rewriter.create(loc, rewriter.getI32Type(), y); + Value inX = + rewriter.create(loc, rewriter.getI32Type(), x); + + int32_t shift = op.shift(); + bool floatingPointMode = shift == 0; + + Value yStride, xStride, yOffset, xOffset; + if (floatingPointMode) { + yStride = rewriter.create(loc, op.stride_fp()[0]); + xStride = rewriter.create(loc, op.stride_fp()[1]); + yOffset = rewriter.create(loc, op.offset_fp()[0]); + xOffset = rewriter.create(loc, op.offset_fp()[1]); + } else { + SmallVector stride, offset; + getValuesFromIntArrayAttribute(op.stride(), stride); + getValuesFromIntArrayAttribute(op.offset(), offset); + + yStride = rewriter.create( + loc, rewriter.getI32IntegerAttr(stride[0])); + xStride = rewriter.create( + loc, rewriter.getI32IntegerAttr(stride[1])); + yOffset = rewriter.create( + loc, rewriter.getI32IntegerAttr(offset[0])); + xOffset = rewriter.create( + loc, rewriter.getI32IntegerAttr(offset[1])); + } - Value inY = - rewriter.create(loc, rewriter.getI32Type(), y); - Value inX = - rewriter.create(loc, rewriter.getI32Type(), x); + // Compute the the integer index and partial offset. + // x = x * stride + offset; + // ix = floor(x) + // dx = x - ix + Value ix, iy, dx, dy; + if (floatingPointMode) { + Value y = + rewriter.create(loc, rewriter.getF32Type(), inY); + Value x = + rewriter.create(loc, rewriter.getF32Type(), inX); - int32_t shift = op.shift(); - bool floatingPointMode = shift == 0; + y = rewriter.create(loc, y, yStride); + x = rewriter.create(loc, x, xStride); - Value yStride, xStride, yOffset, xOffset; - if (floatingPointMode) { - yStride = rewriter.create(loc, op.stride_fp()[0]); - xStride = rewriter.create(loc, op.stride_fp()[1]); - yOffset = rewriter.create(loc, op.offset_fp()[0]); - xOffset = rewriter.create(loc, op.offset_fp()[1]); - } else { - SmallVector stride, offset; - getValuesFromIntArrayAttribute(op.stride(), stride); - getValuesFromIntArrayAttribute(op.offset(), offset); - - yStride = rewriter.create( - loc, rewriter.getI32IntegerAttr(stride[0])); - xStride = rewriter.create( - loc, rewriter.getI32IntegerAttr(stride[1])); - yOffset = rewriter.create( - loc, rewriter.getI32IntegerAttr(offset[0])); - xOffset = rewriter.create( - loc, rewriter.getI32IntegerAttr(offset[1])); - } + y = rewriter.create(loc, y, yOffset); + x = rewriter.create(loc, x, xOffset); - // Compute the the integer index and partial offset. - // x = x * stride + offset; - // ix = floor(x) - // dx = x - ix - Value ix, iy, dx, dy; - if (floatingPointMode) { - Value y = - rewriter.create(loc, rewriter.getF32Type(), inY); - Value x = - rewriter.create(loc, rewriter.getF32Type(), inX); + iy = rewriter.create(loc, y); + ix = rewriter.create(loc, x); - y = rewriter.create(loc, y, yStride); - x = rewriter.create(loc, x, xStride); + dy = rewriter.create(loc, y, iy); + dx = rewriter.create(loc, x, ix); - y = rewriter.create(loc, y, yOffset); - x = rewriter.create(loc, x, xOffset); + iy = rewriter.create(loc, rewriter.getI32Type(), iy); + ix = rewriter.create(loc, rewriter.getI32Type(), ix); + } else { + Value shiftVal = rewriter.create( + loc, rewriter.getI32IntegerAttr(shift)); - iy = rewriter.create(loc, y); - ix = rewriter.create(loc, x); + Value y = rewriter.create(loc, inY, yStride); + Value x = rewriter.create(loc, inX, xStride); - dy = rewriter.create(loc, y, iy); - dx = rewriter.create(loc, x, ix); + y = rewriter.create(loc, y, yOffset); + x = rewriter.create(loc, x, xOffset); - iy = rewriter.create(loc, rewriter.getI32Type(), iy); - ix = rewriter.create(loc, rewriter.getI32Type(), ix); - } else { - Value shiftVal = rewriter.create( - loc, rewriter.getI32IntegerAttr(shift)); + iy = rewriter.create(loc, y, shiftVal); + ix = rewriter.create(loc, x, shiftVal); + + Value yTrunc = rewriter.create(loc, iy, shiftVal); + Value xTrunc = rewriter.create(loc, ix, shiftVal); - Value y = rewriter.create(loc, inY, yStride); - Value x = rewriter.create(loc, inX, xStride); + dy = rewriter.create(loc, y, yTrunc); + dx = rewriter.create(loc, x, xTrunc); + } - y = rewriter.create(loc, y, yOffset); - x = rewriter.create(loc, x, xOffset); + if (op.mode() == "NEAREST_NEIGHBOR") { + Value yPred, xPred; + // Round the index position towards the closest pixel location. + if (floatingPointMode) { + auto halfVal = rewriter.create( + loc, rewriter.getF32FloatAttr(0.5f)); + yPred = rewriter.create(loc, arith::CmpFPredicate::OGE, + dy, halfVal); + xPred = rewriter.create(loc, arith::CmpFPredicate::OGE, + dx, halfVal); + } else { + auto halfVal = rewriter.create( + loc, rewriter.getI32IntegerAttr(1 << (shift - 1))); + yPred = rewriter.create(loc, arith::CmpIPredicate::sge, + dy, halfVal); + xPred = rewriter.create(loc, arith::CmpIPredicate::sge, + dx, halfVal); + } - iy = rewriter.create(loc, y, shiftVal); - ix = rewriter.create(loc, x, shiftVal); + auto zeroVal = rewriter.create( + loc, rewriter.getI32IntegerAttr(0)); + auto oneVal = rewriter.create( + loc, rewriter.getI32IntegerAttr(1)); - Value yTrunc = rewriter.create(loc, iy, shiftVal); - Value xTrunc = rewriter.create(loc, ix, shiftVal); + auto yOffset = + rewriter.create(loc, yPred, oneVal, zeroVal); + auto xOffset = + rewriter.create(loc, xPred, oneVal, zeroVal); - dy = rewriter.create(loc, y, yTrunc); - dx = rewriter.create(loc, x, xTrunc); - } + iy = rewriter.create(loc, iy, yOffset); + ix = rewriter.create(loc, ix, xOffset); - if (op.mode() == "NEAREST_NEIGHBOR") { - Value yPred, xPred; - // Round the index position towards the closest pixel location. - if (floatingPointMode) { - auto halfVal = rewriter.create( - loc, rewriter.getF32FloatAttr(0.5f)); - yPred = rewriter.create(loc, arith::CmpFPredicate::OGE, - dy, halfVal); - xPred = rewriter.create(loc, arith::CmpFPredicate::OGE, - dx, halfVal); - } else { - auto halfVal = rewriter.create( - loc, rewriter.getI32IntegerAttr(1 << (shift - 1))); - yPred = rewriter.create(loc, arith::CmpIPredicate::sge, - dy, halfVal); - xPred = rewriter.create(loc, arith::CmpIPredicate::sge, - dx, halfVal); - } + // Clamp the to be within the bounds of the input image. - auto zeroVal = rewriter.create( - loc, rewriter.getI32IntegerAttr(0)); - auto oneVal = rewriter.create( - loc, rewriter.getI32IntegerAttr(1)); + iy = clampHelper(loc, iy, hwMin, hMax, + arith::CmpIPredicate::slt, rewriter); + ix = clampHelper(loc, ix, hwMin, wMax, + arith::CmpIPredicate::slt, rewriter); - auto yOffset = - rewriter.create(loc, yPred, oneVal, zeroVal); - auto xOffset = - rewriter.create(loc, xPred, oneVal, zeroVal); + // Read the value from the input array. + iy = + rewriter.create(loc, rewriter.getIndexType(), iy); + ix = + rewriter.create(loc, rewriter.getIndexType(), ix); - iy = rewriter.create(loc, iy, yOffset); - ix = rewriter.create(loc, ix, xOffset); + Value result = rewriter.create( + loc, input, ValueRange{batch, iy, ix, channel}); - // Clamp the to be within the bounds of the input image. + rewriter.create(loc, result); - iy = clampHelper(loc, iy, hwMin, hMax, - arith::CmpIPredicate::slt, rewriter); - ix = clampHelper(loc, ix, hwMin, wMax, - arith::CmpIPredicate::slt, rewriter); + return success(); + } - // Read the value from the input array. - iy = rewriter.create(loc, rewriter.getIndexType(), - iy); - ix = rewriter.create(loc, rewriter.getIndexType(), - ix); + if (op.mode() == "BILINEAR") { + Value y0 = iy; + Value x0 = ix; - Value result = rewriter.create( - loc, input, ValueRange{batch, iy, ix, channel}); + auto oneVal = rewriter.create( + loc, rewriter.getI32IntegerAttr(1)); + Value y1 = rewriter.create(loc, y0, oneVal); + Value x1 = rewriter.create(loc, x0, oneVal); - rewriter.create(loc, result); + y0 = clampHelper(loc, y0, hwMin, hMax, + arith::CmpIPredicate::slt, rewriter); + y1 = clampHelper(loc, y1, hwMin, hMax, + arith::CmpIPredicate::slt, rewriter); - return success(); - } + x0 = clampHelper(loc, x0, hwMin, wMax, + arith::CmpIPredicate::slt, rewriter); + x1 = clampHelper(loc, x1, hwMin, wMax, + arith::CmpIPredicate::slt, rewriter); - if (op.mode() == "BILINEAR") { - Value y0 = iy; - Value x0 = ix; + y0 = + rewriter.create(loc, rewriter.getIndexType(), y0); + y1 = + rewriter.create(loc, rewriter.getIndexType(), y1); + x0 = + rewriter.create(loc, rewriter.getIndexType(), x0); + x1 = + rewriter.create(loc, rewriter.getIndexType(), x1); + + Value y0x0 = rewriter.create( + loc, input, ValueRange{batch, y0, x0, channel}); + Value y0x1 = rewriter.create( + loc, input, ValueRange{batch, y0, x1, channel}); + Value y1x0 = rewriter.create( + loc, input, ValueRange{batch, y1, x0, channel}); + Value y1x1 = rewriter.create( + loc, input, ValueRange{batch, y1, x1, channel}); + if (floatingPointMode) { auto oneVal = rewriter.create( - loc, rewriter.getI32IntegerAttr(1)); - Value y1 = rewriter.create(loc, y0, oneVal); - Value x1 = rewriter.create(loc, x0, oneVal); - - y0 = clampHelper(loc, y0, hwMin, hMax, - arith::CmpIPredicate::slt, rewriter); - y1 = clampHelper(loc, y1, hwMin, hMax, - arith::CmpIPredicate::slt, rewriter); - - x0 = clampHelper(loc, x0, hwMin, wMax, - arith::CmpIPredicate::slt, rewriter); - x1 = clampHelper(loc, x1, hwMin, wMax, - arith::CmpIPredicate::slt, rewriter); - - y0 = rewriter.create(loc, rewriter.getIndexType(), - y0); - y1 = rewriter.create(loc, rewriter.getIndexType(), - y1); - x0 = rewriter.create(loc, rewriter.getIndexType(), - x0); - x1 = rewriter.create(loc, rewriter.getIndexType(), - x1); - - Value y0x0 = rewriter.create( - loc, input, ValueRange{batch, y0, x0, channel}); - Value y0x1 = rewriter.create( - loc, input, ValueRange{batch, y0, x1, channel}); - Value y1x0 = rewriter.create( - loc, input, ValueRange{batch, y1, x0, channel}); - Value y1x1 = rewriter.create( - loc, input, ValueRange{batch, y1, x1, channel}); - - if (floatingPointMode) { - auto oneVal = rewriter.create( - loc, rewriter.getF32FloatAttr(1.f)); - Value rightPart = dx; - Value leftPart = rewriter.create(loc, oneVal, dx); - - y0x0 = rewriter.create(loc, y0x0, leftPart); - y0x1 = rewriter.create(loc, y0x1, rightPart); - Value topAcc = rewriter.create(loc, y0x0, y0x1); - - y1x0 = rewriter.create(loc, y1x0, leftPart); - y1x1 = rewriter.create(loc, y1x1, rightPart); - Value bottomAcc = rewriter.create(loc, y1x0, y1x1); - - Value bottomPart = dy; - Value topPart = rewriter.create(loc, oneVal, dy); - topAcc = rewriter.create(loc, topAcc, topPart); - bottomAcc = - rewriter.create(loc, bottomAcc, bottomPart); - Value result = rewriter.create(loc, topAcc, bottomAcc); - - rewriter.create(loc, result); - return success(); - } - y0x0 = rewriter.create(loc, resultElementTy, y0x0); - y0x1 = rewriter.create(loc, resultElementTy, y0x1); - y1x0 = rewriter.create(loc, resultElementTy, y1x0); - y1x1 = rewriter.create(loc, resultElementTy, y1x1); - - if (resultElementTy.getIntOrFloatBitWidth() > 32) { - dx = rewriter.create(loc, resultElementTy, dx); - dy = rewriter.create(loc, resultElementTy, dy); - } + loc, rewriter.getF32FloatAttr(1.f)); + Value rightPart = dx; + Value leftPart = rewriter.create(loc, oneVal, dx); - auto unitVal = rewriter.create( - loc, rewriter.getIntegerAttr(resultElementTy, 1 << shift)); - Value rightPart = dx; - Value leftPart = rewriter.create(loc, unitVal, dx); + y0x0 = rewriter.create(loc, y0x0, leftPart); + y0x1 = rewriter.create(loc, y0x1, rightPart); + Value topAcc = rewriter.create(loc, y0x0, y0x1); - y0x0 = rewriter.create(loc, y0x0, leftPart); - y0x1 = rewriter.create(loc, y0x1, rightPart); - Value topAcc = rewriter.create(loc, y0x0, y0x1); + y1x0 = rewriter.create(loc, y1x0, leftPart); + y1x1 = rewriter.create(loc, y1x1, rightPart); + Value bottomAcc = rewriter.create(loc, y1x0, y1x1); - y1x0 = rewriter.create(loc, y1x0, leftPart); - y1x1 = rewriter.create(loc, y1x1, rightPart); - Value bottomAcc = rewriter.create(loc, y1x0, y1x1); + Value bottomPart = dy; + Value topPart = rewriter.create(loc, oneVal, dy); + topAcc = rewriter.create(loc, topAcc, topPart); + bottomAcc = rewriter.create(loc, bottomAcc, bottomPart); + Value result = rewriter.create(loc, topAcc, bottomAcc); - Value bottomPart = dy; - Value topPart = rewriter.create(loc, unitVal, dy); - topAcc = rewriter.create(loc, topAcc, topPart); - bottomAcc = - rewriter.create(loc, bottomAcc, bottomPart); - Value result = rewriter.create(loc, topAcc, bottomAcc); - - rewriter.create(loc, result); - return success(); + rewriter.create(loc, result); + return success(); + } + y0x0 = rewriter.create(loc, resultElementTy, y0x0); + y0x1 = rewriter.create(loc, resultElementTy, y0x1); + y1x0 = rewriter.create(loc, resultElementTy, y1x0); + y1x1 = rewriter.create(loc, resultElementTy, y1x1); + + if (resultElementTy.getIntOrFloatBitWidth() > 32) { + dx = rewriter.create(loc, resultElementTy, dx); + dy = rewriter.create(loc, resultElementTy, dy); } - return failure(); - } + auto unitVal = rewriter.create( + loc, rewriter.getIntegerAttr(resultElementTy, 1 << shift)); + Value rightPart = dx; + Value leftPart = rewriter.create(loc, unitVal, dx); - return success(); + y0x0 = rewriter.create(loc, y0x0, leftPart); + y0x1 = rewriter.create(loc, y0x1, rightPart); + Value topAcc = rewriter.create(loc, y0x0, y0x1); + + y1x0 = rewriter.create(loc, y1x0, leftPart); + y1x1 = rewriter.create(loc, y1x1, rightPart); + Value bottomAcc = rewriter.create(loc, y1x0, y1x1); + + Value bottomPart = dy; + Value topPart = rewriter.create(loc, unitVal, dy); + topAcc = rewriter.create(loc, topAcc, topPart); + bottomAcc = rewriter.create(loc, bottomAcc, bottomPart); + Value result = rewriter.create(loc, topAcc, bottomAcc); + + rewriter.create(loc, result); + return success(); + } } }; From 891a0d7ccd96bb09eacee3cf7439ac03408f2a14 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 3 Jan 2022 06:03:34 +0000 Subject: [PATCH 380/992] Remove dead return after return (NFC) --- mlir/lib/Conversion/ArithmeticToLLVM/ArithmeticToLLVM.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/mlir/lib/Conversion/ArithmeticToLLVM/ArithmeticToLLVM.cpp b/mlir/lib/Conversion/ArithmeticToLLVM/ArithmeticToLLVM.cpp index f8d59e08f983..564b8343df79 100644 --- a/mlir/lib/Conversion/ArithmeticToLLVM/ArithmeticToLLVM.cpp +++ b/mlir/lib/Conversion/ArithmeticToLLVM/ArithmeticToLLVM.cpp @@ -187,8 +187,6 @@ CmpIOpLowering::matchAndRewrite(arith::CmpIOp op, OpAdaptor adaptor, adaptor.getLhs(), adaptor.getRhs()); }, rewriter); - - return success(); } //===----------------------------------------------------------------------===// From 564619b786c35fc281eea5c3c45ff29c00b3c16d Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 3 Jan 2022 06:06:36 +0000 Subject: [PATCH 381/992] Use cast<> instead of dyn_cast<> when we don't check the result (NFC) --- mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index f27b537d6362..638369e90c3a 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -2065,8 +2065,8 @@ void OpEmitter::genTypeInterfaceMethods() { return; // Generate 'inferReturnTypes' method declaration using the interface method // declared in 'InferTypeOpInterface' op interface. - const auto *trait = dyn_cast( - op.getTrait("::mlir::InferTypeOpInterface::Trait")); + const auto *trait = + cast(op.getTrait("::mlir::InferTypeOpInterface::Trait")); Interface interface = trait->getInterface(); Method *method = [&]() -> Method * { for (const InterfaceMethod &interfaceMethod : interface.getMethods()) { From 29120a51307c3f114426649117060d068bc28cfe Mon Sep 17 00:00:00 2001 From: Groverkss Date: Mon, 3 Jan 2022 11:36:35 +0530 Subject: [PATCH 382/992] [MLIR][NFC] Fix clang-tidy errors in Analysis/Presburger/ This patch fixes clang-tidy errors related to different parameter names in header file and source file in Analysis/Presburger/ directory. --- mlir/include/mlir/Analysis/Presburger/Simplex.h | 6 +++--- mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/mlir/include/mlir/Analysis/Presburger/Simplex.h b/mlir/include/mlir/Analysis/Presburger/Simplex.h index 074353aa3a02..87486a481d30 100644 --- a/mlir/include/mlir/Analysis/Presburger/Simplex.h +++ b/mlir/include/mlir/Analysis/Presburger/Simplex.h @@ -181,7 +181,7 @@ class SimplexBase { void rollback(unsigned snapshot); /// Add all the constraints from the given IntegerPolyhedron. - void intersectIntegerPolyhedron(const IntegerPolyhedron &fac); + void intersectIntegerPolyhedron(const IntegerPolyhedron &poly); /// Returns a rational sample point. This should not be called when Simplex is /// empty. @@ -387,9 +387,9 @@ class Simplex : public SimplexBase { /// Check if the specified equality already holds in the polytope. bool isRedundantEquality(ArrayRef coeffs); - /// Returns true if this Simplex's polytope is a rational subset of `fac`. + /// Returns true if this Simplex's polytope is a rational subset of `poly`. /// Otherwise, returns false. - bool isRationalSubsetOf(const IntegerPolyhedron &fac); + bool isRationalSubsetOf(const IntegerPolyhedron &poly); private: friend class GBRSimplex; diff --git a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp index 627e2bb8728e..03de0e97aaee 100644 --- a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp +++ b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp @@ -168,16 +168,16 @@ void IntegerPolyhedron::removeInequality(unsigned pos) { inequalities.removeRow(pos); } -void IntegerPolyhedron::removeEqualityRange(unsigned begin, unsigned end) { - if (begin >= end) +void IntegerPolyhedron::removeEqualityRange(unsigned start, unsigned end) { + if (start >= end) return; - equalities.removeRows(begin, end - begin); + equalities.removeRows(start, end - start); } -void IntegerPolyhedron::removeInequalityRange(unsigned begin, unsigned end) { - if (begin >= end) +void IntegerPolyhedron::removeInequalityRange(unsigned start, unsigned end) { + if (start >= end) return; - inequalities.removeRows(begin, end - begin); + inequalities.removeRows(start, end - start); } void IntegerPolyhedron::swapId(unsigned posA, unsigned posB) { From a5a24c93706d5cf67ccaaaeb1b7ac13a801275b4 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 3 Jan 2022 06:17:00 +0000 Subject: [PATCH 383/992] Remove misused RAII gil_scoped_release/gil_scoped_acquire: without name they don't have any effect I'm not sure what is the right fix here, but adding a name to all these lead to many segfaults. Reviewed By: stellaraccident Differential Revision: https://reviews.llvm.org/D116506 --- mlir/lib/Bindings/Python/IRCore.cpp | 1 - mlir/lib/Bindings/Python/IRModule.cpp | 8 -------- 2 files changed, 9 deletions(-) diff --git a/mlir/lib/Bindings/Python/IRCore.cpp b/mlir/lib/Bindings/Python/IRCore.cpp index 686153227203..b39a1ea844e4 100644 --- a/mlir/lib/Bindings/Python/IRCore.cpp +++ b/mlir/lib/Bindings/Python/IRCore.cpp @@ -868,7 +868,6 @@ void PyOperationBase::print(py::object fileObject, bool binary, mlirOpPrintingFlagsPrintGenericOpForm(flags); PyFileAccumulator accum(fileObject, binary); - py::gil_scoped_release(); mlirOperationPrintWithFlags(operation, flags, accum.getCallback(), accum.getUserData()); mlirOpPrintingFlagsDestroy(flags); diff --git a/mlir/lib/Bindings/Python/IRModule.cpp b/mlir/lib/Bindings/Python/IRModule.cpp index 633ffe4e111b..ba6b2d29f74b 100644 --- a/mlir/lib/Bindings/Python/IRModule.cpp +++ b/mlir/lib/Bindings/Python/IRModule.cpp @@ -35,7 +35,6 @@ PyGlobals::PyGlobals() { PyGlobals::~PyGlobals() { instance = nullptr; } void PyGlobals::loadDialectModule(llvm::StringRef dialectNamespace) { - py::gil_scoped_acquire(); if (loadedDialectModulesCache.contains(dialectNamespace)) return; // Since re-entrancy is possible, make a copy of the search prefixes. @@ -46,7 +45,6 @@ void PyGlobals::loadDialectModule(llvm::StringRef dialectNamespace) { moduleName.append(dialectNamespace.data(), dialectNamespace.size()); try { - py::gil_scoped_release(); loaded = py::module::import(moduleName.c_str()); } catch (py::error_already_set &e) { if (e.matches(PyExc_ModuleNotFoundError)) { @@ -64,7 +62,6 @@ void PyGlobals::loadDialectModule(llvm::StringRef dialectNamespace) { void PyGlobals::registerDialectImpl(const std::string &dialectNamespace, py::object pyClass) { - py::gil_scoped_acquire(); py::object &found = dialectClassMap[dialectNamespace]; if (found) { throw SetPyError(PyExc_RuntimeError, llvm::Twine("Dialect namespace '") + @@ -77,7 +74,6 @@ void PyGlobals::registerDialectImpl(const std::string &dialectNamespace, void PyGlobals::registerOperationImpl(const std::string &operationName, py::object pyClass, py::object rawOpViewClass) { - py::gil_scoped_acquire(); py::object &found = operationClassMap[operationName]; if (found) { throw SetPyError(PyExc_RuntimeError, llvm::Twine("Operation '") + @@ -90,7 +86,6 @@ void PyGlobals::registerOperationImpl(const std::string &operationName, llvm::Optional PyGlobals::lookupDialectClass(const std::string &dialectNamespace) { - py::gil_scoped_acquire(); loadDialectModule(dialectNamespace); // Fast match against the class map first (common case). const auto foundIt = dialectClassMap.find(dialectNamespace); @@ -109,7 +104,6 @@ PyGlobals::lookupDialectClass(const std::string &dialectNamespace) { llvm::Optional PyGlobals::lookupRawOpViewClass(llvm::StringRef operationName) { { - py::gil_scoped_acquire(); auto foundIt = rawOpViewClassMapCache.find(operationName); if (foundIt != rawOpViewClassMapCache.end()) { if (foundIt->second.is_none()) @@ -126,7 +120,6 @@ PyGlobals::lookupRawOpViewClass(llvm::StringRef operationName) { // Attempt to find from the canonical map and cache. { - py::gil_scoped_acquire(); auto foundIt = rawOpViewClassMap.find(operationName); if (foundIt != rawOpViewClassMap.end()) { if (foundIt->second.is_none()) @@ -143,7 +136,6 @@ PyGlobals::lookupRawOpViewClass(llvm::StringRef operationName) { } void PyGlobals::clearImportCache() { - py::gil_scoped_acquire(); loadedDialectModulesCache.clear(); rawOpViewClassMapCache.clear(); } From 78389de4d396ee180aa87174b78996d4a83ae819 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 3 Jan 2022 06:18:25 +0000 Subject: [PATCH 384/992] Add back missing return to non-void function It was incorrectly removed accidentally in e4e463e7476. --- mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index ed6b3847db96..e9a5c37708e6 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -1665,6 +1665,7 @@ class ResizeConverter : public OpRewritePattern { rewriter.create(loc, result); return success(); } + return failure(); } }; From 834cf3be222d2acebc82b2a022f3db8918a2bfe9 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Mon, 3 Jan 2022 00:38:41 -0500 Subject: [PATCH 385/992] [MLIR][Arith] Canonicalize and/or with ext Replace and(ext(a),ext(b)) with ext(and(a,b)). This both reduces one instruction, and results in the computation (and/or) being done on a smaller type. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D116519 --- .../Dialect/Arithmetic/IR/ArithmeticOps.td | 2 + .../IR/ArithmeticCanonicalization.td | 28 +++++++++++ .../Dialect/Arithmetic/IR/ArithmeticOps.cpp | 18 ++++++++ .../test/Dialect/Arithmetic/canonicalize.mlir | 46 +++++++++++++++++++ 4 files changed, 94 insertions(+) diff --git a/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticOps.td b/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticOps.td index fb6d32fd04f6..b57c05ab87e6 100644 --- a/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticOps.td +++ b/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticOps.td @@ -437,6 +437,7 @@ def Arith_AndIOp : Arith_IntBinaryOp<"andi", [Commutative, Idempotent]> { ``` }]; let hasFolder = 1; + let hasCanonicalizer = 1; } //===----------------------------------------------------------------------===// @@ -465,6 +466,7 @@ def Arith_OrIOp : Arith_IntBinaryOp<"ori", [Commutative, Idempotent]> { ``` }]; let hasFolder = 1; + let hasCanonicalizer = 1; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticCanonicalization.td b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticCanonicalization.td index 49ce6ec5ae0a..efe41f048ce3 100644 --- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticCanonicalization.td +++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticCanonicalization.td @@ -136,4 +136,32 @@ def BitcastOfBitcast : def ExtSIOfExtUI : Pat<(Arith_ExtSIOp (Arith_ExtUIOp $x)), (Arith_ExtUIOp $x)>; +//===----------------------------------------------------------------------===// +// AndIOp +//===----------------------------------------------------------------------===// + +// and extui(x), extui(y) -> extui(and(x,y)) +def AndOfExtUI : + Pat<(Arith_AndIOp (Arith_ExtUIOp $x), (Arith_ExtUIOp $y)), (Arith_ExtUIOp (Arith_AndIOp $x, $y)), + [(Constraint> $x, $y)]>; + +// and extsi(x), extsi(y) -> extsi(and(x,y)) +def AndOfExtSI : + Pat<(Arith_AndIOp (Arith_ExtSIOp $x), (Arith_ExtSIOp $y)), (Arith_ExtSIOp (Arith_AndIOp $x, $y)), + [(Constraint> $x, $y)]>; + +//===----------------------------------------------------------------------===// +// OrIOp +//===----------------------------------------------------------------------===// + +// or extui(x), extui(y) -> extui(or(x,y)) +def OrOfExtUI : + Pat<(Arith_OrIOp (Arith_ExtUIOp $x), (Arith_ExtUIOp $y)), (Arith_ExtUIOp (Arith_OrIOp $x, $y)), + [(Constraint> $x, $y)]>; + +// or extsi(x), extsi(y) -> extsi(or(x,y)) +def OrOfExtSI : + Pat<(Arith_OrIOp (Arith_ExtSIOp $x), (Arith_ExtSIOp $y)), (Arith_ExtSIOp (Arith_OrIOp $x, $y)), + [(Constraint> $x, $y)]>; + #endif // ARITHMETIC_PATTERNS diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp index 2fe32597b03f..59af0a5c999b 100644 --- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp +++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp @@ -901,6 +901,24 @@ bool arith::TruncFOp::areCastCompatible(TypeRange inputs, TypeRange outputs) { return checkWidthChangeCast(inputs, outputs); } +//===----------------------------------------------------------------------===// +// AndIOp +//===----------------------------------------------------------------------===// + +void arith::AndIOp::getCanonicalizationPatterns( + OwningRewritePatternList &patterns, MLIRContext *context) { + patterns.insert(context); +} + +//===----------------------------------------------------------------------===// +// OrIOp +//===----------------------------------------------------------------------===// + +void arith::OrIOp::getCanonicalizationPatterns( + OwningRewritePatternList &patterns, MLIRContext *context) { + patterns.insert(context); +} + //===----------------------------------------------------------------------===// // Verifiers for casts between integers and floats. //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Arithmetic/canonicalize.mlir b/mlir/test/Dialect/Arithmetic/canonicalize.mlir index 87a198cb7c4a..dff4141a3838 100644 --- a/mlir/test/Dialect/Arithmetic/canonicalize.mlir +++ b/mlir/test/Dialect/Arithmetic/canonicalize.mlir @@ -99,6 +99,52 @@ func @extSIOfExtSI(%arg0: i1) -> i64 { // ----- +// CHECK-LABEL: @andOfExtSI +// CHECK: %[[comb:.+]] = arith.andi %arg0, %arg1 : i8 +// CHECK: %[[ext:.+]] = arith.extsi %[[comb]] : i8 to i64 +// CHECK: return %[[ext]] +func @andOfExtSI(%arg0: i8, %arg1: i8) -> i64 { + %ext0 = arith.extsi %arg0 : i8 to i64 + %ext1 = arith.extsi %arg1 : i8 to i64 + %res = arith.andi %ext0, %ext1 : i64 + return %res : i64 +} + +// CHECK-LABEL: @andOfExtUI +// CHECK: %[[comb:.+]] = arith.andi %arg0, %arg1 : i8 +// CHECK: %[[ext:.+]] = arith.extui %[[comb]] : i8 to i64 +// CHECK: return %[[ext]] +func @andOfExtUI(%arg0: i8, %arg1: i8) -> i64 { + %ext0 = arith.extui %arg0 : i8 to i64 + %ext1 = arith.extui %arg1 : i8 to i64 + %res = arith.andi %ext0, %ext1 : i64 + return %res : i64 +} + +// CHECK-LABEL: @orOfExtSI +// CHECK: %[[comb:.+]] = arith.ori %arg0, %arg1 : i8 +// CHECK: %[[ext:.+]] = arith.extsi %[[comb]] : i8 to i64 +// CHECK: return %[[ext]] +func @orOfExtSI(%arg0: i8, %arg1: i8) -> i64 { + %ext0 = arith.extsi %arg0 : i8 to i64 + %ext1 = arith.extsi %arg1 : i8 to i64 + %res = arith.ori %ext0, %ext1 : i64 + return %res : i64 +} + +// CHECK-LABEL: @orOfExtUI +// CHECK: %[[comb:.+]] = arith.ori %arg0, %arg1 : i8 +// CHECK: %[[ext:.+]] = arith.extui %[[comb]] : i8 to i64 +// CHECK: return %[[ext]] +func @orOfExtUI(%arg0: i8, %arg1: i8) -> i64 { + %ext0 = arith.extui %arg0 : i8 to i64 + %ext1 = arith.extui %arg1 : i8 to i64 + %res = arith.ori %ext0, %ext1 : i64 + return %res : i64 +} + +// ----- + // CHECK-LABEL: @indexCastOfSignExtend // CHECK: %[[res:.+]] = arith.index_cast %arg0 : i8 to index // CHECK: return %[[res]] From 93c791839a42cb5d81dc198452ef486fa712a860 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Sun, 2 Jan 2022 23:49:29 -0500 Subject: [PATCH 386/992] [MLIR] Canonicalize/fold select %x, 1, 0 to extui Two canonicalizations for select %x, 1, 0 If the return type is i1, return simply the condition %x, otherwise extui %x to the return type. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D116517 --- mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 42 +++++++++++++++++++- mlir/test/Dialect/Standard/canonicalize.mlir | 35 ++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index de45339f8955..a1047a58ce2c 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -840,9 +840,43 @@ struct SelectToNot : public OpRewritePattern { } }; +// select %arg, %c1, %c0 => extui %arg +struct SelectToExtUI : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(SelectOp op, + PatternRewriter &rewriter) const override { + // Cannot extui i1 to i1, or i1 to f32 + if (!op.getType().isa() || op.getType().isInteger(1)) + return failure(); + + // select %x, c1, %c0 => extui %arg + if (matchPattern(op.getTrueValue(), m_One())) + if (matchPattern(op.getFalseValue(), m_Zero())) { + rewriter.replaceOpWithNewOp(op, op.getType(), + op.getCondition()); + return success(); + } + + // select %x, c0, %c1 => extui (xor %arg, true) + if (matchPattern(op.getTrueValue(), m_Zero())) + if (matchPattern(op.getFalseValue(), m_One())) { + rewriter.replaceOpWithNewOp( + op, op.getType(), + rewriter.create( + op.getLoc(), op.getCondition(), + rewriter.create( + op.getLoc(), 1, op.getCondition().getType()))); + return success(); + } + + return failure(); + } +}; + void SelectOp::getCanonicalizationPatterns(OwningRewritePatternList &results, MLIRContext *context) { - results.insert(context); + results.insert(context); } OpFoldResult SelectOp::fold(ArrayRef operands) { @@ -861,6 +895,12 @@ OpFoldResult SelectOp::fold(ArrayRef operands) { if (matchPattern(condition, m_Zero())) return falseVal; + // select %x, true, false => %x + if (getType().isInteger(1)) + if (matchPattern(getTrueValue(), m_One())) + if (matchPattern(getFalseValue(), m_Zero())) + return condition; + if (auto cmp = dyn_cast_or_null(condition.getDefiningOp())) { auto pred = cmp.getPredicate(); if (pred == arith::CmpIPredicate::eq || pred == arith::CmpIPredicate::ne) { diff --git a/mlir/test/Dialect/Standard/canonicalize.mlir b/mlir/test/Dialect/Standard/canonicalize.mlir index 875d9f7bc4fa..b44f78f96d07 100644 --- a/mlir/test/Dialect/Standard/canonicalize.mlir +++ b/mlir/test/Dialect/Standard/canonicalize.mlir @@ -29,6 +29,41 @@ func @select_cmp_ne_select(%arg0: i64, %arg1: i64) -> i64 { // ----- +// CHECK-LABEL: @select_extui +// CHECK: %[[res:.+]] = arith.extui %arg0 : i1 to i64 +// CHECK: return %[[res]] +func @select_extui(%arg0: i1) -> i64 { + %c0_i64 = arith.constant 0 : i64 + %c1_i64 = arith.constant 1 : i64 + %res = select %arg0, %c1_i64, %c0_i64 : i64 + return %res : i64 +} + +// CHECK-LABEL: @select_extui2 +// CHECK-DAG: %true = arith.constant true +// CHECK-DAG: %[[xor:.+]] = arith.xori %arg0, %true : i1 +// CHECK-DAG: %[[res:.+]] = arith.extui %[[xor]] : i1 to i64 +// CHECK: return %[[res]] +func @select_extui2(%arg0: i1) -> i64 { + %c0_i64 = arith.constant 0 : i64 + %c1_i64 = arith.constant 1 : i64 + %res = select %arg0, %c0_i64, %c1_i64 : i64 + return %res : i64 +} + +// ----- + +// CHECK-LABEL: @select_extui_i1 +// CHECK-NEXT: return %arg0 +func @select_extui_i1(%arg0: i1) -> i1 { + %c0_i1 = arith.constant false + %c1_i1 = arith.constant true + %res = select %arg0, %c1_i1, %c0_i1 : i1 + return %res : i1 +} + +// ----- + // CHECK-LABEL: @branchCondProp // CHECK: %[[trueval:.+]] = arith.constant true // CHECK: %[[falseval:.+]] = arith.constant false From 1461bd13c91b78cc20b097e3dd9231f52f96ece0 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 3 Jan 2022 06:31:44 +0000 Subject: [PATCH 387/992] Revert "Define a `cppAccessorType` to const-ref in APFloatParameter and update ODS emitter to use it for verifier signatures" This reverts commit 89af17c0c74eb9d8d11870f6510e475eff74eef4. This broke the gcc5 build. --- mlir/include/mlir/IR/OpBase.td | 1 - mlir/lib/IR/BuiltinAttributes.cpp | 2 +- mlir/test/lib/Dialect/Test/TestAttributes.cpp | 2 +- mlir/test/mlir-tblgen/attrdefs.td | 8 ++++---- mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp | 2 +- 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index e46b8fb35ec7..f1a5446ad1f9 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -3135,7 +3135,6 @@ class StringRefParameter : class APFloatParameter : AttrOrTypeParameter<"::llvm::APFloat", desc> { let comparator = "$_lhs.bitwiseIsEqual($_rhs)"; - let cppAccessorType = "const ::llvm::APFloat &"; } // For standard ArrayRefs, which require allocation. diff --git a/mlir/lib/IR/BuiltinAttributes.cpp b/mlir/lib/IR/BuiltinAttributes.cpp index 2a34ed34e7d1..802df2dada9d 100644 --- a/mlir/lib/IR/BuiltinAttributes.cpp +++ b/mlir/lib/IR/BuiltinAttributes.cpp @@ -283,7 +283,7 @@ double FloatAttr::getValueAsDouble(APFloat value) { } LogicalResult FloatAttr::verify(function_ref emitError, - Type type, const APFloat &value) { + Type type, APFloat value) { // Verify that the type is correct. if (!type.isa()) return emitError() << "expected floating point type"; diff --git a/mlir/test/lib/Dialect/Test/TestAttributes.cpp b/mlir/test/lib/Dialect/Test/TestAttributes.cpp index 909f5b399543..3a860994f0e8 100644 --- a/mlir/test/lib/Dialect/Test/TestAttributes.cpp +++ b/mlir/test/lib/Dialect/Test/TestAttributes.cpp @@ -129,7 +129,7 @@ TestI64ElementsAttr::verify(function_ref emitError, LogicalResult TestAttrWithFormatAttr::verify(function_ref emitError, - int64_t one, StringRef two, IntegerAttr three, + int64_t one, std::string two, IntegerAttr three, ArrayRef four) { if (four.size() != static_cast(one)) return emitError() << "expected 'one' to equal 'four.size()'"; diff --git a/mlir/test/mlir-tblgen/attrdefs.td b/mlir/test/mlir-tblgen/attrdefs.td index f53705dd662f..34c8588225f7 100644 --- a/mlir/test/mlir-tblgen/attrdefs.td +++ b/mlir/test/mlir-tblgen/attrdefs.td @@ -61,8 +61,8 @@ def B_CompoundAttrA : TestAttr<"CompoundA"> { let genVerifyDecl = 1; // DECL-LABEL: class CompoundAAttr : public ::mlir::Attribute -// DECL: static CompoundAAttr getChecked(::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError, ::mlir::MLIRContext *context, int widthOfSomething, ::test::SimpleTypeA exampleTdType, const ::llvm::APFloat &apFloat, ::llvm::ArrayRef dims, ::mlir::Type inner); -// DECL: static ::mlir::LogicalResult verify(::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError, int widthOfSomething, ::test::SimpleTypeA exampleTdType, const ::llvm::APFloat &apFloat, ::llvm::ArrayRef dims, ::mlir::Type inner); +// DECL: static CompoundAAttr getChecked(::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError, ::mlir::MLIRContext *context, int widthOfSomething, ::test::SimpleTypeA exampleTdType, ::llvm::APFloat apFloat, ::llvm::ArrayRef dims, ::mlir::Type inner); +// DECL: static ::mlir::LogicalResult verify(::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError, int widthOfSomething, ::test::SimpleTypeA exampleTdType, ::llvm::APFloat apFloat, ::llvm::ArrayRef dims, ::mlir::Type inner); // DECL: static constexpr ::llvm::StringLiteral getMnemonic() { // DECL: return {"cmpnd_a"}; // DECL: } @@ -71,7 +71,7 @@ def B_CompoundAttrA : TestAttr<"CompoundA"> { // DECL: void print(::mlir::AsmPrinter &printer) const; // DECL: int getWidthOfSomething() const; // DECL: ::test::SimpleTypeA getExampleTdType() const; -// DECL: const ::llvm::APFloat &getApFloat() const; +// DECL: ::llvm::APFloat getApFloat() const; // Check that AttributeSelfTypeParameter is handled properly. // DEF-LABEL: struct CompoundAAttrStorage @@ -139,5 +139,5 @@ def F_ParamWithAccessorTypeAttr : TestAttr<"ParamWithAccessorType"> { // DECL-LABEL: class ParamWithAccessorTypeAttr // DECL: StringRef getParam() // DEF: ParamWithAccessorTypeAttrStorage -// DEF: ParamWithAccessorTypeAttrStorage(StringRef param) +// DEF: ParamWithAccessorTypeAttrStorage(std::string param) // DEF: StringRef ParamWithAccessorTypeAttr::getParam() diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp index f2df6c8ae765..d90adbc47ef4 100644 --- a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp +++ b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp @@ -315,7 +315,7 @@ DefGen::getBuilderParams(std::initializer_list prefix) const { SmallVector builderParams; builderParams.append(prefix.begin(), prefix.end()); for (auto ¶m : params) - builderParams.emplace_back(param.getCppAccessorType(), param.getName()); + builderParams.emplace_back(param.getCppType(), param.getName()); return builderParams; } From a3436f7340cb5bdb9dddf2b6b3e72ca6cf9f7e22 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 2 Jan 2022 22:44:15 -0800 Subject: [PATCH 388/992] [API] Remove redundant member initialization (NFC) Identified with readability-redundant-member-init. --- lldb/source/API/SBBreakpoint.cpp | 2 +- lldb/source/API/SBBroadcaster.cpp | 2 +- .../API/SBCommandInterpreterRunOptions.cpp | 6 ++---- lldb/source/API/SBCommandReturnObject.cpp | 3 +-- lldb/source/API/SBDeclaration.cpp | 7 +++---- lldb/source/API/SBError.cpp | 4 ++-- lldb/source/API/SBEvent.cpp | 4 ++-- lldb/source/API/SBExecutionContext.cpp | 2 +- lldb/source/API/SBExpressionOptions.cpp | 3 +-- lldb/source/API/SBFileSpec.cpp | 2 +- lldb/source/API/SBFileSpecList.cpp | 3 +-- lldb/source/API/SBFrame.cpp | 2 +- lldb/source/API/SBInstruction.cpp | 2 +- lldb/source/API/SBInstructionList.cpp | 2 +- lldb/source/API/SBLaunchInfo.cpp | 3 +-- lldb/source/API/SBLineEntry.cpp | 9 +++------ lldb/source/API/SBListener.cpp | 4 +--- lldb/source/API/SBMemoryRegionInfo.cpp | 3 +-- lldb/source/API/SBModule.cpp | 9 +++------ lldb/source/API/SBModuleSpec.cpp | 2 +- lldb/source/API/SBPlatform.cpp | 16 +++++----------- lldb/source/API/SBProcess.cpp | 4 +--- lldb/source/API/SBProcessInfo.cpp | 4 ++-- lldb/source/API/SBQueue.cpp | 5 ++--- lldb/source/API/SBQueueItem.cpp | 4 +--- lldb/source/API/SBReproducerPrivate.h | 2 +- lldb/source/API/SBSection.cpp | 11 ++++------- lldb/source/API/SBSourceManager.cpp | 5 ++--- lldb/source/API/SBStringList.cpp | 9 +++------ lldb/source/API/SBSymbolContext.cpp | 4 ++-- lldb/source/API/SBSymbolContextList.cpp | 3 +-- lldb/source/API/SBTarget.cpp | 4 +--- lldb/source/API/SBThread.cpp | 2 +- lldb/source/API/SBThreadCollection.cpp | 2 +- lldb/source/API/SBType.cpp | 12 +++++------- lldb/source/API/SBTypeCategory.cpp | 4 ++-- lldb/source/API/SBTypeEnumMember.cpp | 5 ++--- lldb/source/API/SBTypeFilter.cpp | 4 +--- lldb/source/API/SBTypeFormat.cpp | 4 +--- lldb/source/API/SBTypeNameSpecifier.cpp | 4 ++-- lldb/source/API/SBTypeSummary.cpp | 2 +- lldb/source/API/SBTypeSynthetic.cpp | 2 +- lldb/source/API/SBValue.cpp | 6 +++--- lldb/source/API/SBValueList.cpp | 10 ++++------ 44 files changed, 78 insertions(+), 124 deletions(-) diff --git a/lldb/source/API/SBBreakpoint.cpp b/lldb/source/API/SBBreakpoint.cpp index 0f0a93519993..031ad7b62788 100644 --- a/lldb/source/API/SBBreakpoint.cpp +++ b/lldb/source/API/SBBreakpoint.cpp @@ -821,7 +821,7 @@ BreakpointSP SBBreakpoint::GetSP() const { return m_opaque_wp.lock(); } // This is simple collection of breakpoint id's and their target. class SBBreakpointListImpl { public: - SBBreakpointListImpl(lldb::TargetSP target_sp) : m_target_wp() { + SBBreakpointListImpl(lldb::TargetSP target_sp) { if (target_sp && target_sp->IsValid()) m_target_wp = target_sp; } diff --git a/lldb/source/API/SBBroadcaster.cpp b/lldb/source/API/SBBroadcaster.cpp index 2e6d837f102b..46558480aa1a 100644 --- a/lldb/source/API/SBBroadcaster.cpp +++ b/lldb/source/API/SBBroadcaster.cpp @@ -16,7 +16,7 @@ using namespace lldb; using namespace lldb_private; -SBBroadcaster::SBBroadcaster() : m_opaque_sp() { +SBBroadcaster::SBBroadcaster() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBBroadcaster); } diff --git a/lldb/source/API/SBCommandInterpreterRunOptions.cpp b/lldb/source/API/SBCommandInterpreterRunOptions.cpp index 317ec6d37127..4d72926137cf 100644 --- a/lldb/source/API/SBCommandInterpreterRunOptions.cpp +++ b/lldb/source/API/SBCommandInterpreterRunOptions.cpp @@ -25,8 +25,7 @@ SBCommandInterpreterRunOptions::SBCommandInterpreterRunOptions() { } SBCommandInterpreterRunOptions::SBCommandInterpreterRunOptions( - const SBCommandInterpreterRunOptions &rhs) - : m_opaque_up() { + const SBCommandInterpreterRunOptions &rhs) { LLDB_RECORD_CONSTRUCTOR(SBCommandInterpreterRunOptions, (const lldb::SBCommandInterpreterRunOptions &), rhs); @@ -215,8 +214,7 @@ SBCommandInterpreterRunResult::SBCommandInterpreterRunResult( } SBCommandInterpreterRunResult::SBCommandInterpreterRunResult( - const CommandInterpreterRunResult &rhs) - : m_opaque_up() { + const CommandInterpreterRunResult &rhs) { m_opaque_up = std::make_unique(rhs); } diff --git a/lldb/source/API/SBCommandReturnObject.cpp b/lldb/source/API/SBCommandReturnObject.cpp index 00150d198fca..d66580f33ad6 100644 --- a/lldb/source/API/SBCommandReturnObject.cpp +++ b/lldb/source/API/SBCommandReturnObject.cpp @@ -55,8 +55,7 @@ SBCommandReturnObject::SBCommandReturnObject(CommandReturnObject &ref) (lldb_private::CommandReturnObject &), ref); } -SBCommandReturnObject::SBCommandReturnObject(const SBCommandReturnObject &rhs) - : m_opaque_up() { +SBCommandReturnObject::SBCommandReturnObject(const SBCommandReturnObject &rhs) { LLDB_RECORD_CONSTRUCTOR(SBCommandReturnObject, (const lldb::SBCommandReturnObject &), rhs); diff --git a/lldb/source/API/SBDeclaration.cpp b/lldb/source/API/SBDeclaration.cpp index 1496096e46d1..f44e55af331a 100644 --- a/lldb/source/API/SBDeclaration.cpp +++ b/lldb/source/API/SBDeclaration.cpp @@ -19,18 +19,17 @@ using namespace lldb; using namespace lldb_private; -SBDeclaration::SBDeclaration() : m_opaque_up() { +SBDeclaration::SBDeclaration() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBDeclaration); } -SBDeclaration::SBDeclaration(const SBDeclaration &rhs) : m_opaque_up() { +SBDeclaration::SBDeclaration(const SBDeclaration &rhs) { LLDB_RECORD_CONSTRUCTOR(SBDeclaration, (const lldb::SBDeclaration &), rhs); m_opaque_up = clone(rhs.m_opaque_up); } -SBDeclaration::SBDeclaration(const lldb_private::Declaration *lldb_object_ptr) - : m_opaque_up() { +SBDeclaration::SBDeclaration(const lldb_private::Declaration *lldb_object_ptr) { if (lldb_object_ptr) m_opaque_up = std::make_unique(*lldb_object_ptr); } diff --git a/lldb/source/API/SBError.cpp b/lldb/source/API/SBError.cpp index 89b5f26fd80c..d80183c4200a 100644 --- a/lldb/source/API/SBError.cpp +++ b/lldb/source/API/SBError.cpp @@ -17,9 +17,9 @@ using namespace lldb; using namespace lldb_private; -SBError::SBError() : m_opaque_up() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBError); } +SBError::SBError() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBError); } -SBError::SBError(const SBError &rhs) : m_opaque_up() { +SBError::SBError(const SBError &rhs) { LLDB_RECORD_CONSTRUCTOR(SBError, (const lldb::SBError &), rhs); m_opaque_up = clone(rhs.m_opaque_up); diff --git a/lldb/source/API/SBEvent.cpp b/lldb/source/API/SBEvent.cpp index a0b606e3812e..f34dd17d6c9e 100644 --- a/lldb/source/API/SBEvent.cpp +++ b/lldb/source/API/SBEvent.cpp @@ -22,7 +22,7 @@ using namespace lldb; using namespace lldb_private; -SBEvent::SBEvent() : m_event_sp() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBEvent); } +SBEvent::SBEvent() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBEvent); } SBEvent::SBEvent(uint32_t event_type, const char *cstr, uint32_t cstr_len) : m_event_sp(new Event(event_type, new EventDataBytes(cstr, cstr_len))), @@ -36,7 +36,7 @@ SBEvent::SBEvent(EventSP &event_sp) LLDB_RECORD_CONSTRUCTOR(SBEvent, (lldb::EventSP &), event_sp); } -SBEvent::SBEvent(Event *event_ptr) : m_event_sp(), m_opaque_ptr(event_ptr) { +SBEvent::SBEvent(Event *event_ptr) : m_opaque_ptr(event_ptr) { LLDB_RECORD_CONSTRUCTOR(SBEvent, (lldb_private::Event *), event_ptr); } diff --git a/lldb/source/API/SBExecutionContext.cpp b/lldb/source/API/SBExecutionContext.cpp index caf02b4164ea..8ebb152b7063 100644 --- a/lldb/source/API/SBExecutionContext.cpp +++ b/lldb/source/API/SBExecutionContext.cpp @@ -19,7 +19,7 @@ using namespace lldb; using namespace lldb_private; -SBExecutionContext::SBExecutionContext() : m_exe_ctx_sp() { +SBExecutionContext::SBExecutionContext() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBExecutionContext); } diff --git a/lldb/source/API/SBExpressionOptions.cpp b/lldb/source/API/SBExpressionOptions.cpp index 217e8ad5c21b..2c966b2ca486 100644 --- a/lldb/source/API/SBExpressionOptions.cpp +++ b/lldb/source/API/SBExpressionOptions.cpp @@ -20,8 +20,7 @@ SBExpressionOptions::SBExpressionOptions() LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBExpressionOptions); } -SBExpressionOptions::SBExpressionOptions(const SBExpressionOptions &rhs) - : m_opaque_up() { +SBExpressionOptions::SBExpressionOptions(const SBExpressionOptions &rhs) { LLDB_RECORD_CONSTRUCTOR(SBExpressionOptions, (const lldb::SBExpressionOptions &), rhs); diff --git a/lldb/source/API/SBFileSpec.cpp b/lldb/source/API/SBFileSpec.cpp index 0a6b63bb460c..2b9e2d002edd 100644 --- a/lldb/source/API/SBFileSpec.cpp +++ b/lldb/source/API/SBFileSpec.cpp @@ -27,7 +27,7 @@ SBFileSpec::SBFileSpec() : m_opaque_up(new lldb_private::FileSpec()) { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBFileSpec); } -SBFileSpec::SBFileSpec(const SBFileSpec &rhs) : m_opaque_up() { +SBFileSpec::SBFileSpec(const SBFileSpec &rhs) { LLDB_RECORD_CONSTRUCTOR(SBFileSpec, (const lldb::SBFileSpec &), rhs); m_opaque_up = clone(rhs.m_opaque_up); diff --git a/lldb/source/API/SBFileSpecList.cpp b/lldb/source/API/SBFileSpecList.cpp index 768ff0affd15..8f20aed7e851 100644 --- a/lldb/source/API/SBFileSpecList.cpp +++ b/lldb/source/API/SBFileSpecList.cpp @@ -25,10 +25,9 @@ SBFileSpecList::SBFileSpecList() : m_opaque_up(new FileSpecList()) { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBFileSpecList); } -SBFileSpecList::SBFileSpecList(const SBFileSpecList &rhs) : m_opaque_up() { +SBFileSpecList::SBFileSpecList(const SBFileSpecList &rhs) { LLDB_RECORD_CONSTRUCTOR(SBFileSpecList, (const lldb::SBFileSpecList &), rhs); - m_opaque_up = clone(rhs.m_opaque_up); } diff --git a/lldb/source/API/SBFrame.cpp b/lldb/source/API/SBFrame.cpp index c6bc3288c4b2..ba9b268be83f 100644 --- a/lldb/source/API/SBFrame.cpp +++ b/lldb/source/API/SBFrame.cpp @@ -64,7 +64,7 @@ SBFrame::SBFrame(const StackFrameSP &lldb_object_sp) lldb_object_sp); } -SBFrame::SBFrame(const SBFrame &rhs) : m_opaque_sp() { +SBFrame::SBFrame(const SBFrame &rhs) { LLDB_RECORD_CONSTRUCTOR(SBFrame, (const lldb::SBFrame &), rhs); m_opaque_sp = clone(rhs.m_opaque_sp); diff --git a/lldb/source/API/SBInstruction.cpp b/lldb/source/API/SBInstruction.cpp index 579ddf84cf45..b845d3c50c44 100644 --- a/lldb/source/API/SBInstruction.cpp +++ b/lldb/source/API/SBInstruction.cpp @@ -66,7 +66,7 @@ class InstructionImpl { using namespace lldb; using namespace lldb_private; -SBInstruction::SBInstruction() : m_opaque_sp() { +SBInstruction::SBInstruction() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBInstruction); } diff --git a/lldb/source/API/SBInstructionList.cpp b/lldb/source/API/SBInstructionList.cpp index a0c6fbe7e338..859f5212c355 100644 --- a/lldb/source/API/SBInstructionList.cpp +++ b/lldb/source/API/SBInstructionList.cpp @@ -21,7 +21,7 @@ using namespace lldb; using namespace lldb_private; -SBInstructionList::SBInstructionList() : m_opaque_sp() { +SBInstructionList::SBInstructionList() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBInstructionList); } diff --git a/lldb/source/API/SBLaunchInfo.cpp b/lldb/source/API/SBLaunchInfo.cpp index 0735e62a16cf..95d9a1d935cc 100644 --- a/lldb/source/API/SBLaunchInfo.cpp +++ b/lldb/source/API/SBLaunchInfo.cpp @@ -23,8 +23,7 @@ using namespace lldb_private; class lldb_private::SBLaunchInfoImpl : public ProcessLaunchInfo { public: - SBLaunchInfoImpl() - : ProcessLaunchInfo(), m_envp(GetEnvironment().getEnvp()) {} + SBLaunchInfoImpl() : m_envp(GetEnvironment().getEnvp()) {} const char *const *GetEnvp() const { return m_envp; } void RegenerateEnvp() { m_envp = GetEnvironment().getEnvp(); } diff --git a/lldb/source/API/SBLineEntry.cpp b/lldb/source/API/SBLineEntry.cpp index 29ffda9b0471..0221b19f5bdf 100644 --- a/lldb/source/API/SBLineEntry.cpp +++ b/lldb/source/API/SBLineEntry.cpp @@ -19,18 +19,15 @@ using namespace lldb; using namespace lldb_private; -SBLineEntry::SBLineEntry() : m_opaque_up() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBLineEntry); -} +SBLineEntry::SBLineEntry() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBLineEntry); } -SBLineEntry::SBLineEntry(const SBLineEntry &rhs) : m_opaque_up() { +SBLineEntry::SBLineEntry(const SBLineEntry &rhs) { LLDB_RECORD_CONSTRUCTOR(SBLineEntry, (const lldb::SBLineEntry &), rhs); m_opaque_up = clone(rhs.m_opaque_up); } -SBLineEntry::SBLineEntry(const lldb_private::LineEntry *lldb_object_ptr) - : m_opaque_up() { +SBLineEntry::SBLineEntry(const lldb_private::LineEntry *lldb_object_ptr) { if (lldb_object_ptr) m_opaque_up = std::make_unique(*lldb_object_ptr); } diff --git a/lldb/source/API/SBListener.cpp b/lldb/source/API/SBListener.cpp index 6e5e15de7b3d..da5c010befcf 100644 --- a/lldb/source/API/SBListener.cpp +++ b/lldb/source/API/SBListener.cpp @@ -20,9 +20,7 @@ using namespace lldb; using namespace lldb_private; -SBListener::SBListener() : m_opaque_sp() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBListener); -} +SBListener::SBListener() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBListener); } SBListener::SBListener(const char *name) : m_opaque_sp(Listener::MakeListener(name)), m_unused_ptr(nullptr) { diff --git a/lldb/source/API/SBMemoryRegionInfo.cpp b/lldb/source/API/SBMemoryRegionInfo.cpp index 9cf7874b54a3..9a01faf3daa3 100644 --- a/lldb/source/API/SBMemoryRegionInfo.cpp +++ b/lldb/source/API/SBMemoryRegionInfo.cpp @@ -46,8 +46,7 @@ SBMemoryRegionInfo::SBMemoryRegionInfo(const MemoryRegionInfo *lldb_object_ptr) ref() = *lldb_object_ptr; } -SBMemoryRegionInfo::SBMemoryRegionInfo(const SBMemoryRegionInfo &rhs) - : m_opaque_up() { +SBMemoryRegionInfo::SBMemoryRegionInfo(const SBMemoryRegionInfo &rhs) { LLDB_RECORD_CONSTRUCTOR(SBMemoryRegionInfo, (const lldb::SBMemoryRegionInfo &), rhs); m_opaque_up = clone(rhs.m_opaque_up); diff --git a/lldb/source/API/SBModule.cpp b/lldb/source/API/SBModule.cpp index 710ee8551bd6..ed17e8fdc9c3 100644 --- a/lldb/source/API/SBModule.cpp +++ b/lldb/source/API/SBModule.cpp @@ -29,13 +29,11 @@ using namespace lldb; using namespace lldb_private; -SBModule::SBModule() : m_opaque_sp() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBModule); -} +SBModule::SBModule() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBModule); } SBModule::SBModule(const lldb::ModuleSP &module_sp) : m_opaque_sp(module_sp) {} -SBModule::SBModule(const SBModuleSpec &module_spec) : m_opaque_sp() { +SBModule::SBModule(const SBModuleSpec &module_spec) { LLDB_RECORD_CONSTRUCTOR(SBModule, (const lldb::SBModuleSpec &), module_spec); ModuleSP module_sp; @@ -49,8 +47,7 @@ SBModule::SBModule(const SBModule &rhs) : m_opaque_sp(rhs.m_opaque_sp) { LLDB_RECORD_CONSTRUCTOR(SBModule, (const lldb::SBModule &), rhs); } -SBModule::SBModule(lldb::SBProcess &process, lldb::addr_t header_addr) - : m_opaque_sp() { +SBModule::SBModule(lldb::SBProcess &process, lldb::addr_t header_addr) { LLDB_RECORD_CONSTRUCTOR(SBModule, (lldb::SBProcess &, lldb::addr_t), process, header_addr); diff --git a/lldb/source/API/SBModuleSpec.cpp b/lldb/source/API/SBModuleSpec.cpp index 5d88272a399b..d8154bea8a1a 100644 --- a/lldb/source/API/SBModuleSpec.cpp +++ b/lldb/source/API/SBModuleSpec.cpp @@ -23,7 +23,7 @@ SBModuleSpec::SBModuleSpec() : m_opaque_up(new lldb_private::ModuleSpec()) { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBModuleSpec); } -SBModuleSpec::SBModuleSpec(const SBModuleSpec &rhs) : m_opaque_up() { +SBModuleSpec::SBModuleSpec(const SBModuleSpec &rhs) { LLDB_RECORD_CONSTRUCTOR(SBModuleSpec, (const lldb::SBModuleSpec &), rhs); m_opaque_up = clone(rhs.m_opaque_up); diff --git a/lldb/source/API/SBPlatform.cpp b/lldb/source/API/SBPlatform.cpp index d7a86f0ad1dd..a6769428a211 100644 --- a/lldb/source/API/SBPlatform.cpp +++ b/lldb/source/API/SBPlatform.cpp @@ -30,10 +30,7 @@ using namespace lldb_private; // PlatformConnectOptions struct PlatformConnectOptions { - PlatformConnectOptions(const char *url = nullptr) - : m_url(), m_rsync_options(), m_rsync_remote_path_prefix(), - - m_local_cache_directory() { + PlatformConnectOptions(const char *url = nullptr) { if (url && url[0]) m_url = url; } @@ -52,7 +49,7 @@ struct PlatformConnectOptions { struct PlatformShellCommand { PlatformShellCommand(llvm::StringRef shell_interpreter, llvm::StringRef shell_command) - : m_command(), m_working_dir(), m_status(0), m_signo(0) { + : m_status(0), m_signo(0) { if (!shell_interpreter.empty()) m_shell = shell_interpreter.str(); @@ -60,8 +57,7 @@ struct PlatformShellCommand { m_command = shell_command.str(); } - PlatformShellCommand(llvm::StringRef shell_command = llvm::StringRef()) - : m_shell(), m_command(), m_working_dir() { + PlatformShellCommand(llvm::StringRef shell_command = llvm::StringRef()) { if (!shell_command.empty()) m_command = shell_command.str(); } @@ -313,11 +309,9 @@ const char *SBPlatformShellCommand::GetOutput() { } // SBPlatform -SBPlatform::SBPlatform() : m_opaque_sp() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBPlatform); -} +SBPlatform::SBPlatform() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBPlatform); } -SBPlatform::SBPlatform(const char *platform_name) : m_opaque_sp() { +SBPlatform::SBPlatform(const char *platform_name) { LLDB_RECORD_CONSTRUCTOR(SBPlatform, (const char *), platform_name); Status error; diff --git a/lldb/source/API/SBProcess.cpp b/lldb/source/API/SBProcess.cpp index 797e19462800..3da96d3466bf 100644 --- a/lldb/source/API/SBProcess.cpp +++ b/lldb/source/API/SBProcess.cpp @@ -49,9 +49,7 @@ using namespace lldb; using namespace lldb_private; -SBProcess::SBProcess() : m_opaque_wp() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBProcess); -} +SBProcess::SBProcess() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBProcess); } // SBProcess constructor diff --git a/lldb/source/API/SBProcessInfo.cpp b/lldb/source/API/SBProcessInfo.cpp index cba3bdc179f3..10a149f90c34 100644 --- a/lldb/source/API/SBProcessInfo.cpp +++ b/lldb/source/API/SBProcessInfo.cpp @@ -15,11 +15,11 @@ using namespace lldb; using namespace lldb_private; -SBProcessInfo::SBProcessInfo() : m_opaque_up() { +SBProcessInfo::SBProcessInfo() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBProcessInfo); } -SBProcessInfo::SBProcessInfo(const SBProcessInfo &rhs) : m_opaque_up() { +SBProcessInfo::SBProcessInfo(const SBProcessInfo &rhs) { LLDB_RECORD_CONSTRUCTOR(SBProcessInfo, (const lldb::SBProcessInfo &), rhs); m_opaque_up = clone(rhs.m_opaque_up); diff --git a/lldb/source/API/SBQueue.cpp b/lldb/source/API/SBQueue.cpp index 746df9e79d61..7981d956061e 100644 --- a/lldb/source/API/SBQueue.cpp +++ b/lldb/source/API/SBQueue.cpp @@ -27,11 +27,10 @@ namespace lldb_private { class QueueImpl { public: - QueueImpl() : m_queue_wp(), m_threads(), m_pending_items() {} + QueueImpl() {} QueueImpl(const lldb::QueueSP &queue_sp) - : m_queue_wp(), m_threads(), m_thread_list_fetched(false), - m_pending_items(), m_pending_items_fetched(false) { + : m_thread_list_fetched(false), m_pending_items_fetched(false) { m_queue_wp = queue_sp; } diff --git a/lldb/source/API/SBQueueItem.cpp b/lldb/source/API/SBQueueItem.cpp index 6cd9e4514caf..94981e34e070 100644 --- a/lldb/source/API/SBQueueItem.cpp +++ b/lldb/source/API/SBQueueItem.cpp @@ -21,9 +21,7 @@ using namespace lldb; using namespace lldb_private; // Constructors -SBQueueItem::SBQueueItem() : m_queue_item_sp() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBQueueItem); -} +SBQueueItem::SBQueueItem() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBQueueItem); } SBQueueItem::SBQueueItem(const QueueItemSP &queue_item_sp) : m_queue_item_sp(queue_item_sp) { diff --git a/lldb/source/API/SBReproducerPrivate.h b/lldb/source/API/SBReproducerPrivate.h index 02ac31c2ad89..7d51beec4e94 100644 --- a/lldb/source/API/SBReproducerPrivate.h +++ b/lldb/source/API/SBReproducerPrivate.h @@ -59,7 +59,7 @@ class SBProvider : public Provider { class ReplayData { public: ReplayData(std::unique_ptr memory_buffer) - : m_memory_buffer(std::move(memory_buffer)), m_registry(), + : m_memory_buffer(std::move(memory_buffer)), m_deserializer(m_memory_buffer->getBuffer()) {} Deserializer &GetDeserializer() { return m_deserializer; } Registry &GetRegistry() { return m_registry; } diff --git a/lldb/source/API/SBSection.cpp b/lldb/source/API/SBSection.cpp index bb56fa18d9ca..71e80e04a0e2 100644 --- a/lldb/source/API/SBSection.cpp +++ b/lldb/source/API/SBSection.cpp @@ -20,18 +20,15 @@ using namespace lldb; using namespace lldb_private; -SBSection::SBSection() : m_opaque_wp() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBSection); -} +SBSection::SBSection() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBSection); } SBSection::SBSection(const SBSection &rhs) : m_opaque_wp(rhs.m_opaque_wp) { LLDB_RECORD_CONSTRUCTOR(SBSection, (const lldb::SBSection &), rhs); } -SBSection::SBSection(const lldb::SectionSP §ion_sp) - : m_opaque_wp() // Don't init with section_sp otherwise this will throw if - // section_sp doesn't contain a valid Section * -{ +SBSection::SBSection(const lldb::SectionSP §ion_sp) { + // Don't init with section_sp otherwise this will throw if + // section_sp doesn't contain a valid Section * if (section_sp) m_opaque_wp = section_sp; } diff --git a/lldb/source/API/SBSourceManager.cpp b/lldb/source/API/SBSourceManager.cpp index 43c3443672f7..5241d054e274 100644 --- a/lldb/source/API/SBSourceManager.cpp +++ b/lldb/source/API/SBSourceManager.cpp @@ -24,10 +24,9 @@ namespace lldb_private { class SourceManagerImpl { public: SourceManagerImpl(const lldb::DebuggerSP &debugger_sp) - : m_debugger_wp(debugger_sp), m_target_wp() {} + : m_debugger_wp(debugger_sp) {} - SourceManagerImpl(const lldb::TargetSP &target_sp) - : m_debugger_wp(), m_target_wp(target_sp) {} + SourceManagerImpl(const lldb::TargetSP &target_sp) : m_target_wp(target_sp) {} SourceManagerImpl(const SourceManagerImpl &rhs) { if (&rhs == this) diff --git a/lldb/source/API/SBStringList.cpp b/lldb/source/API/SBStringList.cpp index d9b03692ec0e..a199957e1501 100644 --- a/lldb/source/API/SBStringList.cpp +++ b/lldb/source/API/SBStringList.cpp @@ -14,17 +14,14 @@ using namespace lldb; using namespace lldb_private; -SBStringList::SBStringList() : m_opaque_up() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBStringList); -} +SBStringList::SBStringList() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBStringList); } -SBStringList::SBStringList(const lldb_private::StringList *lldb_strings_ptr) - : m_opaque_up() { +SBStringList::SBStringList(const lldb_private::StringList *lldb_strings_ptr) { if (lldb_strings_ptr) m_opaque_up = std::make_unique(*lldb_strings_ptr); } -SBStringList::SBStringList(const SBStringList &rhs) : m_opaque_up() { +SBStringList::SBStringList(const SBStringList &rhs) { LLDB_RECORD_CONSTRUCTOR(SBStringList, (const lldb::SBStringList &), rhs); m_opaque_up = clone(rhs.m_opaque_up); diff --git a/lldb/source/API/SBSymbolContext.cpp b/lldb/source/API/SBSymbolContext.cpp index 89fe051658ff..9b67d5fd3396 100644 --- a/lldb/source/API/SBSymbolContext.cpp +++ b/lldb/source/API/SBSymbolContext.cpp @@ -18,7 +18,7 @@ using namespace lldb; using namespace lldb_private; -SBSymbolContext::SBSymbolContext() : m_opaque_up() { +SBSymbolContext::SBSymbolContext() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBSymbolContext); } @@ -28,7 +28,7 @@ SBSymbolContext::SBSymbolContext(const SymbolContext &sc) (const lldb_private::SymbolContext &), sc); } -SBSymbolContext::SBSymbolContext(const SBSymbolContext &rhs) : m_opaque_up() { +SBSymbolContext::SBSymbolContext(const SBSymbolContext &rhs) { LLDB_RECORD_CONSTRUCTOR(SBSymbolContext, (const lldb::SBSymbolContext &), rhs); diff --git a/lldb/source/API/SBSymbolContextList.cpp b/lldb/source/API/SBSymbolContextList.cpp index 70a8bbe6694c..3f36ba4569c3 100644 --- a/lldb/source/API/SBSymbolContextList.cpp +++ b/lldb/source/API/SBSymbolContextList.cpp @@ -20,8 +20,7 @@ SBSymbolContextList::SBSymbolContextList() LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBSymbolContextList); } -SBSymbolContextList::SBSymbolContextList(const SBSymbolContextList &rhs) - : m_opaque_up() { +SBSymbolContextList::SBSymbolContextList(const SBSymbolContextList &rhs) { LLDB_RECORD_CONSTRUCTOR(SBSymbolContextList, (const lldb::SBSymbolContextList &), rhs); diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp index dc79c77fee9e..7049c90b9fc1 100644 --- a/lldb/source/API/SBTarget.cpp +++ b/lldb/source/API/SBTarget.cpp @@ -93,9 +93,7 @@ static Status AttachToProcess(ProcessAttachInfo &attach_info, Target &target) { } // SBTarget constructor -SBTarget::SBTarget() : m_opaque_sp() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTarget); -} +SBTarget::SBTarget() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTarget); } SBTarget::SBTarget(const SBTarget &rhs) : m_opaque_sp(rhs.m_opaque_sp) { LLDB_RECORD_CONSTRUCTOR(SBTarget, (const lldb::SBTarget &), rhs); diff --git a/lldb/source/API/SBThread.cpp b/lldb/source/API/SBThread.cpp index 8d5b6f2a5423..f12826b31008 100644 --- a/lldb/source/API/SBThread.cpp +++ b/lldb/source/API/SBThread.cpp @@ -67,7 +67,7 @@ SBThread::SBThread(const ThreadSP &lldb_object_sp) LLDB_RECORD_CONSTRUCTOR(SBThread, (const lldb::ThreadSP &), lldb_object_sp); } -SBThread::SBThread(const SBThread &rhs) : m_opaque_sp() { +SBThread::SBThread(const SBThread &rhs) { LLDB_RECORD_CONSTRUCTOR(SBThread, (const lldb::SBThread &), rhs); m_opaque_sp = clone(rhs.m_opaque_sp); diff --git a/lldb/source/API/SBThreadCollection.cpp b/lldb/source/API/SBThreadCollection.cpp index bfca864d6bcd..cbcf20d9dd66 100644 --- a/lldb/source/API/SBThreadCollection.cpp +++ b/lldb/source/API/SBThreadCollection.cpp @@ -14,7 +14,7 @@ using namespace lldb; using namespace lldb_private; -SBThreadCollection::SBThreadCollection() : m_opaque_sp() { +SBThreadCollection::SBThreadCollection() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBThreadCollection); } diff --git a/lldb/source/API/SBType.cpp b/lldb/source/API/SBType.cpp index 550c4b065914..b95edc62d56b 100644 --- a/lldb/source/API/SBType.cpp +++ b/lldb/source/API/SBType.cpp @@ -26,7 +26,7 @@ using namespace lldb; using namespace lldb_private; -SBType::SBType() : m_opaque_sp() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBType); } +SBType::SBType() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBType); } SBType::SBType(const CompilerType &type) : m_opaque_sp(new TypeImpl( @@ -38,7 +38,7 @@ SBType::SBType(const lldb::TypeSP &type_sp) SBType::SBType(const lldb::TypeImplSP &type_impl_sp) : m_opaque_sp(type_impl_sp) {} -SBType::SBType(const SBType &rhs) : m_opaque_sp() { +SBType::SBType(const SBType &rhs) { LLDB_RECORD_CONSTRUCTOR(SBType, (const lldb::SBType &), rhs); if (this != &rhs) { @@ -649,13 +649,11 @@ uint32_t SBTypeList::GetSize() { SBTypeList::~SBTypeList() = default; -SBTypeMember::SBTypeMember() : m_opaque_up() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeMember); -} +SBTypeMember::SBTypeMember() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeMember); } SBTypeMember::~SBTypeMember() = default; -SBTypeMember::SBTypeMember(const SBTypeMember &rhs) : m_opaque_up() { +SBTypeMember::SBTypeMember(const SBTypeMember &rhs) { LLDB_RECORD_CONSTRUCTOR(SBTypeMember, (const lldb::SBTypeMember &), rhs); if (this != &rhs) { @@ -780,7 +778,7 @@ TypeMemberImpl &SBTypeMember::ref() { const TypeMemberImpl &SBTypeMember::ref() const { return *m_opaque_up; } -SBTypeMemberFunction::SBTypeMemberFunction() : m_opaque_sp() { +SBTypeMemberFunction::SBTypeMemberFunction() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeMemberFunction); } diff --git a/lldb/source/API/SBTypeCategory.cpp b/lldb/source/API/SBTypeCategory.cpp index e7432959b260..75acb5a14c50 100644 --- a/lldb/source/API/SBTypeCategory.cpp +++ b/lldb/source/API/SBTypeCategory.cpp @@ -26,11 +26,11 @@ using namespace lldb_private; typedef std::pair ImplType; -SBTypeCategory::SBTypeCategory() : m_opaque_sp() { +SBTypeCategory::SBTypeCategory() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeCategory); } -SBTypeCategory::SBTypeCategory(const char *name) : m_opaque_sp() { +SBTypeCategory::SBTypeCategory(const char *name) { DataVisualization::Categories::GetCategory(ConstString(name), m_opaque_sp); } diff --git a/lldb/source/API/SBTypeEnumMember.cpp b/lldb/source/API/SBTypeEnumMember.cpp index 43a4891b54b1..036b3ce7d1de 100644 --- a/lldb/source/API/SBTypeEnumMember.cpp +++ b/lldb/source/API/SBTypeEnumMember.cpp @@ -21,7 +21,7 @@ using namespace lldb; using namespace lldb_private; -SBTypeEnumMember::SBTypeEnumMember() : m_opaque_sp() { +SBTypeEnumMember::SBTypeEnumMember() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeEnumMember); } @@ -31,8 +31,7 @@ SBTypeEnumMember::SBTypeEnumMember( const lldb::TypeEnumMemberImplSP &enum_member_sp) : m_opaque_sp(enum_member_sp) {} -SBTypeEnumMember::SBTypeEnumMember(const SBTypeEnumMember &rhs) - : m_opaque_sp() { +SBTypeEnumMember::SBTypeEnumMember(const SBTypeEnumMember &rhs) { LLDB_RECORD_CONSTRUCTOR(SBTypeEnumMember, (const lldb::SBTypeEnumMember &), rhs); diff --git a/lldb/source/API/SBTypeFilter.cpp b/lldb/source/API/SBTypeFilter.cpp index 5f91a194f16b..8b7fa4bc5b38 100644 --- a/lldb/source/API/SBTypeFilter.cpp +++ b/lldb/source/API/SBTypeFilter.cpp @@ -16,9 +16,7 @@ using namespace lldb; using namespace lldb_private; -SBTypeFilter::SBTypeFilter() : m_opaque_sp() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeFilter); -} +SBTypeFilter::SBTypeFilter() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeFilter); } SBTypeFilter::SBTypeFilter(uint32_t options) : m_opaque_sp(TypeFilterImplSP(new TypeFilterImpl(options))) { diff --git a/lldb/source/API/SBTypeFormat.cpp b/lldb/source/API/SBTypeFormat.cpp index 70289bef8db5..eeb2cbb4465f 100644 --- a/lldb/source/API/SBTypeFormat.cpp +++ b/lldb/source/API/SBTypeFormat.cpp @@ -16,9 +16,7 @@ using namespace lldb; using namespace lldb_private; -SBTypeFormat::SBTypeFormat() : m_opaque_sp() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeFormat); -} +SBTypeFormat::SBTypeFormat() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeFormat); } SBTypeFormat::SBTypeFormat(lldb::Format format, uint32_t options) : m_opaque_sp( diff --git a/lldb/source/API/SBTypeNameSpecifier.cpp b/lldb/source/API/SBTypeNameSpecifier.cpp index 3673a5024530..b84c42eff9e3 100644 --- a/lldb/source/API/SBTypeNameSpecifier.cpp +++ b/lldb/source/API/SBTypeNameSpecifier.cpp @@ -17,7 +17,7 @@ using namespace lldb; using namespace lldb_private; -SBTypeNameSpecifier::SBTypeNameSpecifier() : m_opaque_sp() { +SBTypeNameSpecifier::SBTypeNameSpecifier() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeNameSpecifier); } @@ -30,7 +30,7 @@ SBTypeNameSpecifier::SBTypeNameSpecifier(const char *name, bool is_regex) m_opaque_sp.reset(); } -SBTypeNameSpecifier::SBTypeNameSpecifier(SBType type) : m_opaque_sp() { +SBTypeNameSpecifier::SBTypeNameSpecifier(SBType type) { LLDB_RECORD_CONSTRUCTOR(SBTypeNameSpecifier, (lldb::SBType), type); if (type.IsValid()) diff --git a/lldb/source/API/SBTypeSummary.cpp b/lldb/source/API/SBTypeSummary.cpp index 2d7f8ef340c9..1072baf4fd9c 100644 --- a/lldb/source/API/SBTypeSummary.cpp +++ b/lldb/source/API/SBTypeSummary.cpp @@ -107,7 +107,7 @@ SBTypeSummaryOptions::SBTypeSummaryOptions( lldb_object); } -SBTypeSummary::SBTypeSummary() : m_opaque_sp() { +SBTypeSummary::SBTypeSummary() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeSummary); } diff --git a/lldb/source/API/SBTypeSynthetic.cpp b/lldb/source/API/SBTypeSynthetic.cpp index af5e167b9c24..502feb40120b 100644 --- a/lldb/source/API/SBTypeSynthetic.cpp +++ b/lldb/source/API/SBTypeSynthetic.cpp @@ -16,7 +16,7 @@ using namespace lldb; using namespace lldb_private; -SBTypeSynthetic::SBTypeSynthetic() : m_opaque_sp() { +SBTypeSynthetic::SBTypeSynthetic() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeSynthetic); } diff --git a/lldb/source/API/SBValue.cpp b/lldb/source/API/SBValue.cpp index e3325b8d36fa..d61f11bb807d 100644 --- a/lldb/source/API/SBValue.cpp +++ b/lldb/source/API/SBValue.cpp @@ -58,8 +58,8 @@ class ValueImpl { ValueImpl(lldb::ValueObjectSP in_valobj_sp, lldb::DynamicValueType use_dynamic, bool use_synthetic, const char *name = nullptr) - : m_valobj_sp(), m_use_dynamic(use_dynamic), - m_use_synthetic(use_synthetic), m_name(name) { + : m_use_dynamic(use_dynamic), m_use_synthetic(use_synthetic), + m_name(name) { if (in_valobj_sp) { if ((m_valobj_sp = in_valobj_sp->GetQualifiedRepresentationIfAvailable( lldb::eNoDynamicValues, false))) { @@ -215,7 +215,7 @@ class ValueLocker { Status m_lock_error; }; -SBValue::SBValue() : m_opaque_sp() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBValue); } +SBValue::SBValue() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBValue); } SBValue::SBValue(const lldb::ValueObjectSP &value_sp) { LLDB_RECORD_CONSTRUCTOR(SBValue, (const lldb::ValueObjectSP &), value_sp); diff --git a/lldb/source/API/SBValueList.cpp b/lldb/source/API/SBValueList.cpp index 0fd2a591c321..7e797d2b7801 100644 --- a/lldb/source/API/SBValueList.cpp +++ b/lldb/source/API/SBValueList.cpp @@ -19,7 +19,7 @@ using namespace lldb_private; class ValueListImpl { public: - ValueListImpl() : m_values() {} + ValueListImpl() {} ValueListImpl(const ValueListImpl &rhs) : m_values(rhs.m_values) {} @@ -67,18 +67,16 @@ class ValueListImpl { std::vector m_values; }; -SBValueList::SBValueList() : m_opaque_up() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBValueList); -} +SBValueList::SBValueList() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBValueList); } -SBValueList::SBValueList(const SBValueList &rhs) : m_opaque_up() { +SBValueList::SBValueList(const SBValueList &rhs) { LLDB_RECORD_CONSTRUCTOR(SBValueList, (const lldb::SBValueList &), rhs); if (rhs.IsValid()) m_opaque_up = std::make_unique(*rhs); } -SBValueList::SBValueList(const ValueListImpl *lldb_object_ptr) : m_opaque_up() { +SBValueList::SBValueList(const ValueListImpl *lldb_object_ptr) { if (lldb_object_ptr) m_opaque_up = std::make_unique(*lldb_object_ptr); } From b788e352abb8abd166d62066dd24fcc1a3923856 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 2 Jan 2022 22:53:09 -0800 Subject: [PATCH 389/992] [Transforms] Use all_of instead of any_of (NFC) --- mlir/lib/Transforms/NormalizeMemRefs.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Transforms/NormalizeMemRefs.cpp b/mlir/lib/Transforms/NormalizeMemRefs.cpp index aaf1a5717b46..b6f9d1858fa3 100644 --- a/mlir/lib/Transforms/NormalizeMemRefs.cpp +++ b/mlir/lib/Transforms/NormalizeMemRefs.cpp @@ -92,8 +92,8 @@ void NormalizeMemRefs::runOnOperation() { /// are satisfied will the value become a candidate for replacement. /// TODO: Extend this for DimOps. static bool isMemRefNormalizable(Value::user_range opUsers) { - return !llvm::any_of(opUsers, [](Operation *op) { - return !op->hasTrait(); + return llvm::all_of(opUsers, [](Operation *op) { + return op->hasTrait(); }); } From 7f42c40ff2684673cc9ffb375f688316a23064b0 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 3 Jan 2022 06:58:45 +0000 Subject: [PATCH 390/992] Fix doc on how to run clang-tidy on MLIR codebase (NFC) --- mlir/utils/clang-tidy/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/utils/clang-tidy/README.md b/mlir/utils/clang-tidy/README.md index f8ddcbe74f92..ca43a77c4a0e 100644 --- a/mlir/utils/clang-tidy/README.md +++ b/mlir/utils/clang-tidy/README.md @@ -12,8 +12,8 @@ the root of the repo: { time \ CLANG_TIDY=build-clang/bin/clang-tidy \ TIMING_TIDY=time \ - ./mlir/utils/apply-clang-tidy.sh build mlir ~/clang-tidy-fails/ -; } 2>&1 | tee ~/clang-tidy.log + ./mlir/utils/clang-tidy/apply-clang-tidy.sh build mlir ~/clang-tidy-fails/ +} 2>&1 | tee ~/clang-tidy.log ``` - `build-clang/` contains the result of a build of clang-tidy, configured From bc091e08621e8038c38abf986e5f6f4fb0280b1f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 2 Jan 2022 23:00:09 -0800 Subject: [PATCH 391/992] [RISCV] Prune more unnecessary vector pseudo instructions. NFC For floating point specific vector instructions, we don't need pseudos for mf8. Reviewed By: khchen Differential Revision: https://reviews.llvm.org/D116460 --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 72 ++++++++-------- .../Target/RISCV/RISCVInstrInfoVPseudos.td | 82 ++++++++++--------- 2 files changed, 83 insertions(+), 71 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 9dc2bcb36322..0bcc156a5017 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1440,10 +1440,10 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, case CASE_VFMA_SPLATS(FNMSUB): case CASE_VFMA_SPLATS(FNMACC): case CASE_VFMA_SPLATS(FNMSAC): - case CASE_VFMA_OPCODE_LMULS(FMACC, VV): - case CASE_VFMA_OPCODE_LMULS(FMSAC, VV): - case CASE_VFMA_OPCODE_LMULS(FNMACC, VV): - case CASE_VFMA_OPCODE_LMULS(FNMSAC, VV): + case CASE_VFMA_OPCODE_LMULS_MF4(FMACC, VV): + case CASE_VFMA_OPCODE_LMULS_MF4(FMSAC, VV): + case CASE_VFMA_OPCODE_LMULS_MF4(FNMACC, VV): + case CASE_VFMA_OPCODE_LMULS_MF4(FNMSAC, VV): case CASE_VFMA_OPCODE_LMULS(MADD, VX): case CASE_VFMA_OPCODE_LMULS(NMSUB, VX): case CASE_VFMA_OPCODE_LMULS(MACC, VX): @@ -1464,10 +1464,10 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, return false; return true; } - case CASE_VFMA_OPCODE_LMULS(FMADD, VV): - case CASE_VFMA_OPCODE_LMULS(FMSUB, VV): - case CASE_VFMA_OPCODE_LMULS(FNMADD, VV): - case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV): + case CASE_VFMA_OPCODE_LMULS_MF4(FMADD, VV): + case CASE_VFMA_OPCODE_LMULS_MF4(FMSUB, VV): + case CASE_VFMA_OPCODE_LMULS_MF4(FNMADD, VV): + case CASE_VFMA_OPCODE_LMULS_MF4(FNMSUB, VV): case CASE_VFMA_OPCODE_LMULS(MADD, VV): case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): { // If the tail policy is undisturbed we can't commute. @@ -1585,10 +1585,10 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, case CASE_VFMA_SPLATS(FNMADD): case CASE_VFMA_SPLATS(FNMSAC): case CASE_VFMA_SPLATS(FNMSUB): - case CASE_VFMA_OPCODE_LMULS(FMACC, VV): - case CASE_VFMA_OPCODE_LMULS(FMSAC, VV): - case CASE_VFMA_OPCODE_LMULS(FNMACC, VV): - case CASE_VFMA_OPCODE_LMULS(FNMSAC, VV): + case CASE_VFMA_OPCODE_LMULS_MF4(FMACC, VV): + case CASE_VFMA_OPCODE_LMULS_MF4(FMSAC, VV): + case CASE_VFMA_OPCODE_LMULS_MF4(FNMACC, VV): + case CASE_VFMA_OPCODE_LMULS_MF4(FNMSAC, VV): case CASE_VFMA_OPCODE_LMULS(MADD, VX): case CASE_VFMA_OPCODE_LMULS(NMSUB, VX): case CASE_VFMA_OPCODE_LMULS(MACC, VX): @@ -1611,10 +1611,10 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC) CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB) CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC) - CASE_VFMA_CHANGE_OPCODE_LMULS(FMACC, FMADD, VV) - CASE_VFMA_CHANGE_OPCODE_LMULS(FMSAC, FMSUB, VV) - CASE_VFMA_CHANGE_OPCODE_LMULS(FNMACC, FNMADD, VV) - CASE_VFMA_CHANGE_OPCODE_LMULS(FNMSAC, FNMSUB, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMACC, FMADD, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMSAC, FMSUB, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMACC, FNMADD, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMSAC, FNMSUB, VV) CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX) CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX) CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX) @@ -1628,10 +1628,10 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } - case CASE_VFMA_OPCODE_LMULS(FMADD, VV): - case CASE_VFMA_OPCODE_LMULS(FMSUB, VV): - case CASE_VFMA_OPCODE_LMULS(FNMADD, VV): - case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV): + case CASE_VFMA_OPCODE_LMULS_MF4(FMADD, VV): + case CASE_VFMA_OPCODE_LMULS_MF4(FMSUB, VV): + case CASE_VFMA_OPCODE_LMULS_MF4(FNMADD, VV): + case CASE_VFMA_OPCODE_LMULS_MF4(FNMSUB, VV): case CASE_VFMA_OPCODE_LMULS(MADD, VV): case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): { assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); @@ -1642,10 +1642,10 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected opcode"); - CASE_VFMA_CHANGE_OPCODE_LMULS(FMADD, FMACC, VV) - CASE_VFMA_CHANGE_OPCODE_LMULS(FMSUB, FMSAC, VV) - CASE_VFMA_CHANGE_OPCODE_LMULS(FNMADD, FNMACC, VV) - CASE_VFMA_CHANGE_OPCODE_LMULS(FNMSUB, FNMSAC, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMADD, FMACC, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMSUB, FMSAC, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMADD, FNMACC, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMSUB, FNMSAC, VV) CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV) CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV) } @@ -1674,13 +1674,16 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, #define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \ RISCV::PseudoV##OP##_##LMUL##_TIED -#define CASE_WIDEOP_OPCODE_LMULS(OP) \ - CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \ - case CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \ +#define CASE_WIDEOP_OPCODE_LMULS_MF4(OP) \ + CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \ case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \ case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \ case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \ case CASE_WIDEOP_OPCODE_COMMON(OP, M4) + +#define CASE_WIDEOP_OPCODE_LMULS(OP) \ + CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \ + case CASE_WIDEOP_OPCODE_LMULS_MF4(OP) // clang-format on #define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \ @@ -1688,22 +1691,25 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, NewOpc = RISCV::PseudoV##OP##_##LMUL; \ break; -#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \ - CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \ +#define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \ CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \ CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \ CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \ CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \ CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4) +#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \ + CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \ + CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) + MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const { switch (MI.getOpcode()) { default: break; - case CASE_WIDEOP_OPCODE_LMULS(FWADD_WV): - case CASE_WIDEOP_OPCODE_LMULS(FWSUB_WV): + case CASE_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV): + case CASE_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): case CASE_WIDEOP_OPCODE_LMULS(WADD_WV): case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV): case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV): @@ -1713,8 +1719,8 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI, switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected opcode"); - CASE_WIDEOP_CHANGE_OPCODE_LMULS(FWADD_WV) - CASE_WIDEOP_CHANGE_OPCODE_LMULS(FWSUB_WV) + CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV) + CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV) CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV) CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV) CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 40ab0bb20402..fe06006c9798 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -72,9 +72,13 @@ def V_MF2 : LMULInfo<0b111, 4, VR, VR, VR, VR,/*NoVReg*/VR, "M // Used to iterate over all possible LMULs. defvar MxList = [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8]; +// For floating point which don't need MF8. +defvar MxListF = [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8]; // Used for widening and narrowing instructions as it doesn't contain M8. defvar MxListW = [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4]; +// For floating point which don't need MF8. +defvar MxListFW = [V_MF4, V_MF2, V_M1, V_M2, V_M4]; // Use for zext/sext.vf2 defvar MxListVF2 = [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8]; @@ -1592,6 +1596,12 @@ multiclass VPseudoBinaryV_VV { defm _VV : VPseudoBinary; } +// Similar to VPseudoBinaryV_VV, but uses MxListF. +multiclass VPseudoBinaryFV_VV { + foreach m = MxListF in + defm _VV : VPseudoBinary; +} + multiclass VPseudoVGTR_VV_EEW { foreach m = MxList in { foreach sew = EEWList in { @@ -1654,8 +1664,8 @@ multiclass VPseudoVALU_MM { // * The destination EEW is greater than the source EEW, the source EMUL is // at least 1, and the overlap is in the highest-numbered part of the // destination register group is legal. Otherwise, it is illegal. -multiclass VPseudoBinaryW_VV { - foreach m = MxListW in +multiclass VPseudoBinaryW_VV mxlist = MxListW> { + foreach m = mxlist in defm _VV : VPseudoBinary; } @@ -1674,8 +1684,8 @@ multiclass VPseudoBinaryW_VF { "@earlyclobber $rd">; } -multiclass VPseudoBinaryW_WV { - foreach m = MxListW in { +multiclass VPseudoBinaryW_WV mxlist = MxListW> { + foreach m = mxlist in { defm _WV : VPseudoBinary; defm _WV : VPseudoTiedBinary, Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>; @@ -1797,7 +1807,7 @@ multiclass VPseudoVCLS_V { } multiclass VPseudoVSQR_V { - foreach m = MxList in { + foreach m = MxListF in { let VLMul = m.value in { def "_V_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>; @@ -1808,7 +1818,7 @@ multiclass VPseudoVSQR_V { } multiclass VPseudoVRCP_V { - foreach m = MxList in { + foreach m = MxListF in { let VLMul = m.value in { def "_V_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>; @@ -1871,8 +1881,8 @@ multiclass PseudoVEXT_VF8 { // lowest-numbered part of the source register group". // With LMUL<=1 the source and dest occupy a single register so any overlap // is in the lowest-numbered part. -multiclass VPseudoBinaryM_VV { - foreach m = MxList in +multiclass VPseudoBinaryM_VV mxlist = MxList> { + foreach m = mxlist in defm _VV : VPseudoBinaryM; } @@ -1987,14 +1997,14 @@ multiclass VPseudoVDIV_VV_VX { } multiclass VPseudoVFMUL_VV_VF { - defm "" : VPseudoBinaryV_VV, + defm "" : VPseudoBinaryFV_VV, Sched<[WriteVFMulV, ReadVFMulV, ReadVFMulV, ReadVMask]>; defm "" : VPseudoBinaryV_VF, Sched<[WriteVFMulF, ReadVFMulV, ReadVFMulF, ReadVMask]>; } multiclass VPseudoVFDIV_VV_VF { - defm "" : VPseudoBinaryV_VV, + defm "" : VPseudoBinaryFV_VV, Sched<[WriteVFDivV, ReadVFDivV, ReadVFDivV, ReadVMask]>; defm "" : VPseudoBinaryV_VF, Sched<[WriteVFDivF, ReadVFDivV, ReadVFDivF, ReadVMask]>; @@ -2013,21 +2023,21 @@ multiclass VPseudoVALU_VV_VX { } multiclass VPseudoVSGNJ_VV_VF { - defm "" : VPseudoBinaryV_VV, + defm "" : VPseudoBinaryFV_VV, Sched<[WriteVFSgnjV, ReadVFSgnjV, ReadVFSgnjV, ReadVMask]>; defm "" : VPseudoBinaryV_VF, Sched<[WriteVFSgnjF, ReadVFSgnjV, ReadVFSgnjF, ReadVMask]>; } multiclass VPseudoVMAX_VV_VF { - defm "" : VPseudoBinaryV_VV, + defm "" : VPseudoBinaryFV_VV, Sched<[WriteVFCmpV, ReadVFCmpV, ReadVFCmpV, ReadVMask]>; defm "" : VPseudoBinaryV_VF, Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>; } multiclass VPseudoVALU_VV_VF { - defm "" : VPseudoBinaryV_VV, + defm "" : VPseudoBinaryFV_VV, Sched<[WriteVFALUV, ReadVFALUV, ReadVFALUV, ReadVMask]>; defm "" : VPseudoBinaryV_VF, Sched<[WriteVFALUF, ReadVFALUV, ReadVFALUF, ReadVMask]>; @@ -2060,17 +2070,12 @@ multiclass VPseudoVWMUL_VV_VX { } multiclass VPseudoVWMUL_VV_VF { - defm "" : VPseudoBinaryW_VV, + defm "" : VPseudoBinaryW_VV, Sched<[WriteVFWMulV, ReadVFWMulV, ReadVFWMulV, ReadVMask]>; defm "" : VPseudoBinaryW_VF, Sched<[WriteVFWMulF, ReadVFWMulV, ReadVFWMulF, ReadVMask]>; } -multiclass VPseudoBinaryW_VV_VF { - defm "" : VPseudoBinaryW_VV; - defm "" : VPseudoBinaryW_VF; -} - multiclass VPseudoVWALU_WV_WX { defm "" : VPseudoBinaryW_WV, Sched<[WriteVIWALUV, ReadVIWALUV, ReadVIWALUV, ReadVMask]>; @@ -2079,14 +2084,14 @@ multiclass VPseudoVWALU_WV_WX { } multiclass VPseudoVFWALU_VV_VF { - defm "" : VPseudoBinaryW_VV, + defm "" : VPseudoBinaryW_VV, Sched<[WriteVFWALUV, ReadVFWALUV, ReadVFWALUV, ReadVMask]>; defm "" : VPseudoBinaryW_VF, Sched<[WriteVFWALUF, ReadVFWALUV, ReadVFWALUF, ReadVMask]>; } multiclass VPseudoVFWALU_WV_WF { - defm "" : VPseudoBinaryW_WV, + defm "" : VPseudoBinaryW_WV, Sched<[WriteVFWALUV, ReadVFWALUV, ReadVFWALUV, ReadVMask]>; defm "" : VPseudoBinaryW_WF, Sched<[WriteVFWALUF, ReadVFWALUV, ReadVFWALUF, ReadVMask]>; @@ -2191,8 +2196,9 @@ multiclass VPseudoTernaryWithPolicy { - foreach m = MxList in { +multiclass VPseudoTernaryV_VV_AAXA mxlist = MxList> { + foreach m = mxlist in { defm _VV : VPseudoTernaryWithPolicy; } @@ -2217,9 +2223,9 @@ multiclass VPseudoTernaryV_VF_AAXA { /*Commutable*/1>; } -multiclass VPseudoTernaryW_VV { +multiclass VPseudoTernaryW_VV mxlist = MxListW> { defvar constraint = "@earlyclobber $rd"; - foreach m = MxListW in + foreach m = mxlist in defm _VV : VPseudoTernaryWithPolicy; } @@ -2252,7 +2258,7 @@ multiclass VPseudoVMAC_VV_VX_AAXA { } multiclass VPseudoVMAC_VV_VF_AAXA { - defm "" : VPseudoTernaryV_VV_AAXA, + defm "" : VPseudoTernaryV_VV_AAXA, Sched<[WriteVFMulAddV, ReadVFMulAddV, ReadVFMulAddV, ReadVFMulAddV, ReadVMask]>; defm "" : VPseudoTernaryV_VF_AAXA, Sched<[WriteVFMulAddF, ReadVFMulAddV, ReadVFMulAddV, ReadVFMulAddF, ReadVMask]>; @@ -2278,7 +2284,7 @@ multiclass VPseudoVWMAC_VX { } multiclass VPseudoVWMAC_VV_VF { - defm "" : VPseudoTernaryW_VV, + defm "" : VPseudoTernaryW_VV, Sched<[WriteVFWMulAddV, ReadVFWMulAddV, ReadVFWMulAddV, ReadVFWMulAddV, ReadVMask]>; defm "" : VPseudoTernaryW_VF, Sched<[WriteVFWMulAddF, ReadVFWMulAddV, ReadVFWMulAddV, ReadVFWMulAddF, ReadVMask]>; @@ -2301,7 +2307,7 @@ multiclass VPseudoVCMPM_VV_VX { } multiclass VPseudoVCMPM_VV_VF { - defm "" : VPseudoBinaryM_VV, + defm "" : VPseudoBinaryM_VV, Sched<[WriteVFCmpV, ReadVFCmpV, ReadVFCmpV, ReadVMask]>; defm "" : VPseudoBinaryM_VF, Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>; @@ -2334,21 +2340,21 @@ multiclass VPseudoVWRED_VS { } multiclass VPseudoVFRED_VS { - foreach m = MxList in { + foreach m = MxListF in { defm _VS : VPseudoTernary, Sched<[WriteVFRedV, ReadVFRedV, ReadVFRedV, ReadVFRedV, ReadVMask]>; } } multiclass VPseudoVFREDO_VS { - foreach m = MxList in { + foreach m = MxListF in { defm _VS : VPseudoTernary, Sched<[WriteVFRedOV, ReadVFRedOV, ReadVFRedOV, ReadVFRedOV, ReadVMask]>; } } multiclass VPseudoVFWRED_VS { - foreach m = MxList in { + foreach m = MxListF in { defm _VS : VPseudoTernary, Sched<[WriteVFWRedV, ReadVFWRedV, ReadVFWRedV, ReadVFWRedV, ReadVMask]>; } @@ -2366,13 +2372,13 @@ multiclass VPseudoConversion, Sched<[WriteVFCvtFToIV, ReadVFCvtFToIV, ReadVMask]>; } multiclass VPseudoVCVTF_V { - foreach m = MxList in + foreach m = MxListF in defm _V : VPseudoConversion, Sched<[WriteVFCvtIToFV, ReadVFCvtIToFV, ReadVMask]>; } @@ -2385,7 +2391,7 @@ multiclass VPseudoConversionW_V { multiclass VPseudoVWCVTI_V { defvar constraint = "@earlyclobber $rd"; - foreach m = MxListW in + foreach m = MxListFW in defm _V : VPseudoConversion, Sched<[WriteVFWCvtFToIV, ReadVFWCvtFToIV, ReadVMask]>; } @@ -2399,7 +2405,7 @@ multiclass VPseudoVWCVTF_V { multiclass VPseudoVWCVTD_V { defvar constraint = "@earlyclobber $rd"; - foreach m = MxListW in + foreach m = MxListFW in defm _V : VPseudoConversion, Sched<[WriteVFWCvtFToFV, ReadVFWCvtFToFV, ReadVMask]>; } @@ -2413,14 +2419,14 @@ multiclass VPseudoVNCVTI_W { multiclass VPseudoVNCVTF_W { defvar constraint = "@earlyclobber $rd"; - foreach m = MxListW in + foreach m = MxListFW in defm _W : VPseudoConversion, Sched<[WriteVFNCvtIToFV, ReadVFNCvtIToFV, ReadVMask]>; } multiclass VPseudoVNCVTD_W { defvar constraint = "@earlyclobber $rd"; - foreach m = MxListW in + foreach m = MxListFW in defm _W : VPseudoConversion, Sched<[WriteVFNCvtFToFV, ReadVFNCvtFToFV, ReadVMask]>; } From 42a4f5103b7087b1460463d11f7ba68d1832abfc Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 2 Jan 2022 23:08:40 -0800 Subject: [PATCH 392/992] [Transform] Remove redundant declaration PollyAllowFullFunction (NFC) The variable is declared in ScopDetection.h, which ScopInliner.cpp includes. Identified by readability-redundant-declaration. --- polly/lib/Transform/ScopInliner.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/polly/lib/Transform/ScopInliner.cpp b/polly/lib/Transform/ScopInliner.cpp index b35d3518e72d..c93c00f5870c 100644 --- a/polly/lib/Transform/ScopInliner.cpp +++ b/polly/lib/Transform/ScopInliner.cpp @@ -26,8 +26,6 @@ using namespace llvm; using namespace polly; -extern bool polly::PollyAllowFullFunction; - namespace { class ScopInliner : public CallGraphSCCPass { using llvm::Pass::doInitialization; From 5527139302d9b0416b9fa7f1b84760d6acacda12 Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Sun, 2 Jan 2022 23:12:06 -0800 Subject: [PATCH 393/992] [RISCV][VP] Add RVV codegen for [nX]vXi1 vp.select Expand [nX]vXi1 vp.select the same way as [nX]vXi1 vselect. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D115546 --- .../SelectionDAG/LegalizeVectorOps.cpp | 36 +++++++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 1 + .../RISCV/rvv/fixed-vectors-vselect-vp.ll | 70 +++++++++++++ llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll | 98 +++++++++++++++++++ 4 files changed, 205 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 1493f36fcd3e..96c5a79cf995 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -133,6 +133,7 @@ class VectorLegalizer { /// Implement vselect in terms of XOR, AND, OR when blend is not /// supported by the target. SDValue ExpandVSELECT(SDNode *Node); + SDValue ExpandVP_SELECT(SDNode *Node); SDValue ExpandSELECT(SDNode *Node); std::pair ExpandLoad(SDNode *N); SDValue ExpandStore(SDNode *N); @@ -349,6 +350,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::CTPOP: case ISD::SELECT: case ISD::VSELECT: + case ISD::VP_SELECT: case ISD::SELECT_CC: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: @@ -718,6 +720,9 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { case ISD::VSELECT: Results.push_back(ExpandVSELECT(Node)); return; + case ISD::VP_SELECT: + Results.push_back(ExpandVP_SELECT(Node)); + return; case ISD::SELECT: Results.push_back(ExpandSELECT(Node)); return; @@ -1195,6 +1200,37 @@ SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) { return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val); } +SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) { + // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which + // do not support it natively. + SDLoc DL(Node); + + SDValue Mask = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + SDValue Op2 = Node->getOperand(2); + SDValue EVL = Node->getOperand(3); + + EVT VT = Mask.getValueType(); + + // If we can't even use the basic vector operations of + // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op. + if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Node); + + // This operation also isn't safe when the operands aren't also booleans. + if (Op1.getValueType().getVectorElementType() != MVT::i1) + return DAG.UnrollVectorOp(Node); + + SDValue Ones = DAG.getAllOnesConstant(DL, VT); + SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Mask, EVL); + + Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Mask, EVL); + Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Mask, EVL); + return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Mask, EVL); +} + void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl &Results) { // Attempt to expand using TargetLowering. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c782a6be4d64..6154385d6901 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -561,6 +561,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); + setOperationAction(ISD::VP_SELECT, VT, Expand); setOperationAction(ISD::VP_AND, VT, Custom); setOperationAction(ISD::VP_OR, VT, Custom); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll index a6b5af9e625a..38897b32b8e5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -4,6 +4,76 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s +declare <1 x i1> @llvm.vp.select.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @select_v1i1(<1 x i1> %a, <1 x i1> %b, <1 x i1> %c, i32 zeroext %evl) { +; CHECK-LABEL: select_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.select.v1i1(<1 x i1> %a, <1 x i1> %b, <1 x i1> %c, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.select.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @select_v2i1(<2 x i1> %a, <2 x i1> %b, <2 x i1> %c, i32 zeroext %evl) { +; CHECK-LABEL: select_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.select.v2i1(<2 x i1> %a, <2 x i1> %b, <2 x i1> %c, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.select.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @select_v4i1(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c, i32 zeroext %evl) { +; CHECK-LABEL: select_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.select.v4i1(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.select.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @select_v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %c, i32 zeroext %evl) { +; CHECK-LABEL: select_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.select.v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %c, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.select.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @select_v16i1(<16 x i1> %a, <16 x i1> %b, <16 x i1> %c, i32 zeroext %evl) { +; CHECK-LABEL: select_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.select.v16i1(<16 x i1> %a, <16 x i1> %b, <16 x i1> %c, i32 %evl) + ret <16 x i1> %v +} + declare <2 x i8> @llvm.vp.select.v2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32) define <2 x i8> @select_v2i8(<2 x i1> %a, <2 x i8> %b, <2 x i8> %c, i32 zeroext %evl) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll index 7d3ccc391238..ca407f8436e3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll @@ -4,6 +4,104 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s +declare @llvm.vp.select.nxv1i1(, , , i32) + +define @select_nxv1i1( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv1i1( %a, %b, %c, i32 %evl) + ret %v +} + +declare @llvm.vp.select.nxv2i1(, , , i32) + +define @select_nxv2i1( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv2i1( %a, %b, %c, i32 %evl) + ret %v +} + +declare @llvm.vp.select.nxv4i1(, , , i32) + +define @select_nxv4i1( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv4i1( %a, %b, %c, i32 %evl) + ret %v +} + +declare @llvm.vp.select.nxv8i1(, , , i32) + +define @select_nxv8i1( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv8i1( %a, %b, %c, i32 %evl) + ret %v +} + +declare @llvm.vp.select.nxv16i1(, , , i32) + +define @select_nxv16i1( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv16i1( %a, %b, %c, i32 %evl) + ret %v +} + +declare @llvm.vp.select.nxv32i1(, , , i32) + +define @select_nxv32i1( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv32i1( %a, %b, %c, i32 %evl) + ret %v +} + +declare @llvm.vp.select.nxv64i1(, , , i32) + +define @select_nxv64i1( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv64i1( %a, %b, %c, i32 %evl) + ret %v +} + declare @llvm.vp.select.nxv1i8(, , , i32) define @select_nxv1i8( %a, %b, %c, i32 zeroext %evl) { From db81324c8da56cd498b58d0081b49b08095ec534 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 2 Jan 2022 23:20:32 -0800 Subject: [PATCH 394/992] [Support] Add KnownBits::countMaxSignedBits(). Make KnownBits::countMinSignBits() always return at least 1. Even if we don't have any known bits, we can assume that there is at least 1 sign bit. This is consistent with ComputeNumSignBits which always returns at least 1. Add KnownBits::countMaxSignedBits() which computes the number of bits needed to represent all signed values with those known bits. This is the signed equivalent of countMaxActiveBits(). Split from D116469. Reviewed By: lebedev.ri Differential Revision: https://reviews.llvm.org/D116500 --- llvm/include/llvm/Support/KnownBits.h | 9 ++++++++- llvm/unittests/Support/KnownBitsTest.cpp | 11 +++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h index 1f32760a6fd1..1674bad21e95 100644 --- a/llvm/include/llvm/Support/KnownBits.h +++ b/llvm/include/llvm/Support/KnownBits.h @@ -249,7 +249,14 @@ struct KnownBits { return countMinLeadingZeros(); if (isNegative()) return countMinLeadingOnes(); - return 0; + // Every value has at least 1 sign bit. + return 1; + } + + /// Returns the maximum number of bits needed to represent all possible + /// signed values with these known bits. + unsigned countMaxSignedBits() const { + return getBitWidth() - countMinSignBits() + 1; } /// Returns the maximum number of trailing zero bits possible. diff --git a/llvm/unittests/Support/KnownBitsTest.cpp b/llvm/unittests/Support/KnownBitsTest.cpp index f9631f29902f..d41402b69fc0 100644 --- a/llvm/unittests/Support/KnownBitsTest.cpp +++ b/llvm/unittests/Support/KnownBitsTest.cpp @@ -442,6 +442,17 @@ TEST(KnownBitsTest, CountMaxActiveBits) { }); } +TEST(KnownBitsTest, CountMaxSignedBits) { + unsigned Bits = 4; + ForeachKnownBits(Bits, [&](const KnownBits &Known) { + unsigned Expected = 0; + ForeachNumInKnownBits(Known, [&](const APInt &N) { + Expected = std::max(Expected, N.getMinSignedBits()); + }); + EXPECT_EQ(Expected, Known.countMaxSignedBits()); + }); +} + TEST(KnownBitsTest, SExtOrTrunc) { const unsigned NarrowerSize = 4; const unsigned BaseSize = 6; From fc78b62cbadaff472f8deaeae3aeac101ea04851 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 3 Jan 2022 01:29:39 +0000 Subject: [PATCH 395/992] [llvm][cmake] Normalize some indent - Change a stray tab to spaces - 4 not 2 spaces in second line of `set` like above. Reviewed By: mstorsjo Differential Revision: https://reviews.llvm.org/D116510 --- llvm/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 5d6079c4c83d..672183f62da0 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -70,7 +70,7 @@ set(LLVM_EXTRA_PROJECTS "flang") # List of all known projects in the mono repo set(LLVM_KNOWN_PROJECTS "${LLVM_ALL_PROJECTS};${LLVM_EXTRA_PROJECTS}") set(LLVM_ENABLE_PROJECTS "" CACHE STRING - "Semicolon-separated list of projects to build (${LLVM_KNOWN_PROJECTS}), or \"all\".") + "Semicolon-separated list of projects to build (${LLVM_KNOWN_PROJECTS}), or \"all\".") foreach(proj ${LLVM_ENABLE_PROJECTS}) if (NOT proj STREQUAL "all" AND NOT proj STREQUAL "llvm" AND NOT "${proj}" IN_LIST LLVM_KNOWN_PROJECTS) MESSAGE(FATAL_ERROR "${proj} isn't a known project: ${LLVM_KNOWN_PROJECTS}") @@ -344,7 +344,7 @@ set(LLVM_TARGETS_TO_BUILD "all" CACHE STRING "Semicolon-separated list of targets to build, or \"all\".") set(LLVM_EXPERIMENTAL_TARGETS_TO_BUILD "" - CACHE STRING "Semicolon-separated list of experimental targets to build.") + CACHE STRING "Semicolon-separated list of experimental targets to build.") option(BUILD_SHARED_LIBS "Build all libraries as shared libraries instead of static" OFF) From 63078f79db6b2ee6d90edb376f72f5e6369e2c1a Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 2 Jan 2022 23:59:52 -0800 Subject: [PATCH 396/992] [llvm] Fix namespace comments (NFC) Identified with llvm-namespace-comment. --- llvm/include/llvm/Analysis/CFLAliasAnalysisUtils.h | 2 +- llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolExe.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h | 2 +- llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h | 2 +- llvm/include/llvm/MC/MCFixedLenDisassembler.h | 2 +- 26 files changed, 26 insertions(+), 26 deletions(-) diff --git a/llvm/include/llvm/Analysis/CFLAliasAnalysisUtils.h b/llvm/include/llvm/Analysis/CFLAliasAnalysisUtils.h index 02f999a5b913..2eae2824bec3 100644 --- a/llvm/include/llvm/Analysis/CFLAliasAnalysisUtils.h +++ b/llvm/include/llvm/Analysis/CFLAliasAnalysisUtils.h @@ -50,8 +50,8 @@ static inline const Function *parentFunctionOfValue(const Value *Val) { if (auto *Arg = dyn_cast(Val)) return Arg->getParent(); return nullptr; +} } // namespace cflaa } // namespace llvm -} #endif // LLVM_ANALYSIS_CFLALIASANALYSISUTILS_H diff --git a/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h b/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h index 296a4840b779..b5f0596fceed 100644 --- a/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h +++ b/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h @@ -154,7 +154,7 @@ class WritableMappedBlockStream : public WritableBinaryStream { WritableBinaryStreamRef WriteInterface; }; -} // end namespace pdb +} // namespace msf } // end namespace llvm #endif // LLVM_DEBUGINFO_MSF_MAPPEDBLOCKSTREAM_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolExe.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolExe.h index 1a9fb240a248..cde66d399243 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolExe.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolExe.h @@ -38,7 +38,7 @@ class PDBSymbolExe : public PDBSymbol { void dumpChildren(raw_ostream &OS, StringRef Label, PDB_SymType ChildType, int Indent) const; }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLEXE_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h index 6be27c8d3bc7..f50057c68406 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h @@ -79,7 +79,7 @@ class PDBSymbolFunc : public PDBSymbol { uint32_t getCompilandId() const; }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLFUNC_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h index 7152249cbd03..1cdc1811bb1a 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h @@ -40,7 +40,7 @@ class PDBSymbolFuncDebugEnd : public PDBSymbol { FORWARD_SYMBOL_METHOD(getVirtualAddress) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLFUNCDEBUGEND_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h index 3125c271d2e8..021f27c7f0f7 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h @@ -39,7 +39,7 @@ class PDBSymbolFuncDebugStart : public PDBSymbol { FORWARD_SYMBOL_METHOD(getVirtualAddress) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLFUNCDEBUGSTART_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h index 3625e23f014f..33eb36696cc2 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h @@ -39,7 +39,7 @@ class PDBSymbolLabel : public PDBSymbol { FORWARD_SYMBOL_METHOD(getVirtualAddress) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLLABEL_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h index e2b2545d78ec..f8dcb2ba9d5f 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h @@ -37,7 +37,7 @@ class PDBSymbolPublicSymbol : public PDBSymbol { FORWARD_SYMBOL_METHOD(getUndecoratedName) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLPUBLICSYMBOL_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h index 274de8b0b16f..a5f795cc1303 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h @@ -46,7 +46,7 @@ class PDBSymbolThunk : public PDBSymbol { FORWARD_SYMBOL_METHOD(getVirtualBaseOffset) FORWARD_SYMBOL_METHOD(isVolatileType) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLTHUNK_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h index c0215c9ee4b1..d4cd6e71423e 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h @@ -34,7 +34,7 @@ class PDBSymbolTypeArray : public PDBSymbol { FORWARD_SYMBOL_METHOD(isVolatileType) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEARRAY_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h index bab292ee0d46..bd2dbc914725 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h @@ -53,7 +53,7 @@ class PDBSymbolTypeBaseClass : public PDBSymbol { FORWARD_SYMBOL_METHOD(isVolatileType) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEBASECLASS_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h index 7d94c3c97a2b..df6309b1545c 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h @@ -30,7 +30,7 @@ class PDBSymbolTypeBuiltin : public PDBSymbol { FORWARD_SYMBOL_METHOD(isVolatileType) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEBUILTIN_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h index dc647aff48d3..7bf0317ff1ca 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h @@ -26,7 +26,7 @@ class PDBSymbolTypeCustom : public PDBSymbol { FORWARD_SYMBOL_METHOD(getOemSymbolId) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPECUSTOM_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h index 7a9e43785d67..5d742237bac4 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h @@ -26,7 +26,7 @@ class PDBSymbolTypeDimension : public PDBSymbol { FORWARD_SYMBOL_METHOD(getUpperBoundId) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEDIMENSION_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h index 3ac72801b202..0aab91039509 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h @@ -46,7 +46,7 @@ class PDBSymbolTypeEnum : public PDBSymbol { FORWARD_SYMBOL_METHOD(isVolatileType) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEENUM_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h index c4d9dd6308a3..d56a90662dae 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h @@ -27,7 +27,7 @@ class PDBSymbolTypeFriend : public PDBSymbol { FORWARD_SYMBOL_ID_METHOD(getType) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEFRIEND_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h index 22d3623496f2..559ceec5aace 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h @@ -27,7 +27,7 @@ class PDBSymbolTypeFunctionArg : public PDBSymbol { FORWARD_SYMBOL_ID_METHOD(getType) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEFUNCTIONARG_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h index a1491ca2e415..ceb4bff5b7b4 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h @@ -41,7 +41,7 @@ class PDBSymbolTypeFunctionSig : public PDBSymbol { FORWARD_SYMBOL_METHOD(isVolatileType) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEFUNCTIONSIG_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h index 6bc70bca82e7..5e7b83ce8004 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h @@ -25,7 +25,7 @@ class PDBSymbolTypeManaged : public PDBSymbol { FORWARD_SYMBOL_METHOD(getName) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEMANAGED_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h index b36f459e880c..da25eab50f9b 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h @@ -37,7 +37,7 @@ class PDBSymbolTypePointer : public PDBSymbol { FORWARD_SYMBOL_METHOD(isVolatileType) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEPOINTER_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h index 2712d0617e0e..8dc29ca26192 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h @@ -44,7 +44,7 @@ class PDBSymbolTypeTypedef : public PDBSymbol { FORWARD_SYMBOL_METHOD(isVolatileType) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPETYPEDEF_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h index e8161d311ea7..d08728dafa76 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h @@ -31,7 +31,7 @@ class PDBSymbolTypeVTable : public PDBSymbol { FORWARD_SYMBOL_METHOD(isVolatileType) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEVTABLE_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h index 614060867042..c7e2ac148503 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h @@ -29,7 +29,7 @@ class PDBSymbolTypeVTableShape : public PDBSymbol { FORWARD_SYMBOL_METHOD(isVolatileType) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEVTABLESHAPE_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h index cc29d38c2578..5b4909b800b9 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h @@ -24,7 +24,7 @@ class PDBSymbolUnknown : public PDBSymbol { void dump(PDBSymDumper &Dumper) const override; }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLUNKNOWN_H diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h index fd812cb2f793..19a8f414eb43 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h @@ -27,7 +27,7 @@ class PDBSymbolUsingNamespace : public PDBSymbol { FORWARD_SYMBOL_METHOD(getName) }; +} // namespace pdb } // namespace llvm -} #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLUSINGNAMESPACE_H diff --git a/llvm/include/llvm/MC/MCFixedLenDisassembler.h b/llvm/include/llvm/MC/MCFixedLenDisassembler.h index 218ae0d13189..1edf3899c130 100644 --- a/llvm/include/llvm/MC/MCFixedLenDisassembler.h +++ b/llvm/include/llvm/MC/MCFixedLenDisassembler.h @@ -27,7 +27,7 @@ enum DecoderOps { OPC_Fail // OPC_Fail() }; -} // namespace MCDecode +} // namespace MCD } // namespace llvm #endif From 80e20f9cbdd534b49785f24aa6fd05b70a6bd79d Mon Sep 17 00:00:00 2001 From: Marek Kurdej Date: Mon, 3 Jan 2022 09:10:37 +0100 Subject: [PATCH 397/992] [clang-format] [docs] Fix Mozilla coding style URL. Fixes https://github.com/llvm/llvm-project/issues/52889. --- clang/docs/ClangFormatStyleOptions.rst | 2 +- clang/include/clang/Format/Format.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 111ebd90d95b..c2f85d564ba9 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -152,7 +152,7 @@ the configuration (without a prefix: ``Auto``). `_ * ``Mozilla`` A style complying with `Mozilla's style guide - `_ + `_ * ``WebKit`` A style complying with `WebKit's style guide `_ diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 0f97e80d425e..f6b4926de846 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -3888,7 +3888,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language); FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language); /// Returns a format style complying with Mozilla's style guide: -/// https://developer.mozilla.org/en-US/docs/Developer_Guide/Coding_Style. +/// https://firefox-source-docs.mozilla.org/code-quality/coding-style/index.html. FormatStyle getMozillaStyle(); /// Returns a format style complying with Webkit's style guide: From c36081fe49068608f05fce6c5165444236260c90 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 3 Jan 2022 00:34:20 -0800 Subject: [PATCH 398/992] [NFC][sanitizer] Allow madvise in symbolizer --- .../lib/sanitizer_common/symbolizer/scripts/global_symbols.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt index 0bb38ba951a8..071dbbb279c6 100644 --- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt +++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt @@ -94,6 +94,7 @@ isxdigit U log10 U lseek U lseek64 U +madvise U malloc U mbrlen U mbrtowc U From 3478d64ee4bf350d98da9734aee1bd0faab60da9 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 24 Dec 2021 12:09:38 +0100 Subject: [PATCH 399/992] [DSE] Check for whole object overwrite even if dead store size not known If the killing store overwrites the whole object, we know that the preceding store is dead, regardless of the accessed offset or size. This case was previously only handled if the size of the dead store was also known. This allows us to perform conventional DSE for calls that write to an argument (but without known size). Differential Revision: https://reviews.llvm.org/D116267 --- .../Scalar/DeadStoreElimination.cpp | 28 +++++++++---------- .../DeadStoreElimination/trivial-dse-calls.ll | 8 ++---- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index eadbb4293539..a8f7c4f95b01 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -835,6 +835,20 @@ struct DSEState { if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc)) return OW_Unknown; + const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts(); + const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts(); + const Value *DeadUndObj = getUnderlyingObject(DeadPtr); + const Value *KillingUndObj = getUnderlyingObject(KillingPtr); + + // Check whether the killing store overwrites the whole object, in which + // case the size/offset of the dead store does not matter. + if (DeadUndObj == KillingUndObj && KillingLoc.Size.isPrecise()) { + uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F); + if (KillingUndObjSize != MemoryLocation::UnknownSize && + KillingUndObjSize == KillingLoc.Size.getValue()) + return OW_Complete; + } + // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll // get imprecise values here, though (except for unknown sizes). if (!KillingLoc.Size.isPrecise() || !DeadLoc.Size.isPrecise()) { @@ -875,14 +889,6 @@ struct DSEState { return OW_Complete; } - // Check to see if the killing store is to the entire object (either a - // global, an alloca, or a byval/inalloca argument). If so, then it clearly - // overwrites any other store to the same object. - const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts(); - const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts(); - const Value *DeadUndObj = getUnderlyingObject(DeadPtr); - const Value *KillingUndObj = getUnderlyingObject(KillingPtr); - // If we can't resolve the same pointers to the same object, then we can't // analyze them at all. if (DeadUndObj != KillingUndObj) { @@ -896,12 +902,6 @@ struct DSEState { return OW_Unknown; } - // If the KillingI store is to a recognizable object, get its size. - uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F); - if (KillingUndObjSize != MemoryLocation::UnknownSize) - if (KillingUndObjSize == KillingSize && KillingUndObjSize >= DeadSize) - return OW_Complete; - // Okay, we have stores to two completely different pointers. Try to // decompose the pointer into a "base + constant_offset" form. If the base // pointers are equal, then we can reason about the two stores. diff --git a/llvm/test/Transforms/DeadStoreElimination/trivial-dse-calls.ll b/llvm/test/Transforms/DeadStoreElimination/trivial-dse-calls.ll index 2b3838b00b3e..6c4db3385513 100644 --- a/llvm/test/Transforms/DeadStoreElimination/trivial-dse-calls.ll +++ b/llvm/test/Transforms/DeadStoreElimination/trivial-dse-calls.ll @@ -252,14 +252,12 @@ define void @test_self_read() { ret void } -; TODO: We should be able to remove the call because while we don't know -; the size of the write done by the call, we do know the following store -; writes to the entire contents of the alloca. +; We can remove the call because while we don't know the size of the write done +; by the call, we do know the following store writes to the entire contents of +; the alloca. define i32 @test_dse_overwrite() { ; CHECK-LABEL: @test_dse_overwrite( ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[A]] to i8* -; CHECK-NEXT: call void @f(i8* nocapture writeonly [[BITCAST]]) #[[ATTR1]] ; CHECK-NEXT: store i32 0, i32* [[A]], align 4 ; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: ret i32 [[V]] From b5a2627423f3e03cd82a1ec92b43c7573eb4d64a Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 3 Jan 2022 09:38:04 +0100 Subject: [PATCH 400/992] [DSE] Fix DSE test to use non-extern global (NFC) The intended transform is not legal with an extern global, because the actual global defined in a different TU might have larger size. Make it non-extern to show that the desired transform already works. --- .../Transforms/DeadStoreElimination/trivial-dse-calls.ll | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/llvm/test/Transforms/DeadStoreElimination/trivial-dse-calls.ll b/llvm/test/Transforms/DeadStoreElimination/trivial-dse-calls.ll index 6c4db3385513..1966559f5d39 100644 --- a/llvm/test/Transforms/DeadStoreElimination/trivial-dse-calls.ll +++ b/llvm/test/Transforms/DeadStoreElimination/trivial-dse-calls.ll @@ -305,14 +305,11 @@ define i32 @test_neg_dse_unsized(i32* %a) { ret i32 %v } +@G = global i8 0 -@G = external global i8 - -; TODO: Should be able to kill call in analogous manner to test_dse_overwrite. -; Difference is non-alloca object. +; Same as test_dse_overwrite, but with a non-alloca object. define void @test_dse_non_alloca() { ; CHECK-LABEL: @test_dse_non_alloca( -; CHECK-NEXT: call void @f(i8* nocapture writeonly @G) #[[ATTR1]] ; CHECK-NEXT: store i8 0, i8* @G, align 1 ; CHECK-NEXT: ret void ; From 5afbfe33e7d6ce40af0ad6d99421b443c45b351b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 28 Dec 2021 12:27:04 +0100 Subject: [PATCH 401/992] [ConstantFold] Make icmp of gep fold offset based We can fold an equality or unsigned icmp between base+offset1 and base+offset2 with inbounds offsets by comparing the offsets directly. This replaces a pair of specialized folds that tried to reason based on the GEP structure instead. One of those folds was plain wrong (because it does not account for negative offsets), while the other is unnecessarily complicated and limited (e.g. it will fail with bitcasts involved). The disadvantage of this change is that it requires data layout, so the fold is no longer performed by datalayout-independent constant folding. I don't think this is a loss in practice, but it does regress the ConstantExprFold.ll test, which checks folding without running any passes. Differential Revision: https://reviews.llvm.org/D116332 --- llvm/lib/Analysis/ConstantFolding.cpp | 25 ++++- llvm/lib/IR/ConstantFold.cpp | 93 +------------------ llvm/test/Assembler/ConstantExprFold.ll | 5 +- .../InstSimplify/ConstProp/icmp-global.ll | 5 +- 4 files changed, 30 insertions(+), 98 deletions(-) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 922b38e92785..c8ee130d6655 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1171,10 +1171,11 @@ Constant *llvm::ConstantFoldInstOperands(Instruction *I, return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI); } -Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, +Constant *llvm::ConstantFoldCompareInstOperands(unsigned IntPredicate, Constant *Ops0, Constant *Ops1, const DataLayout &DL, const TargetLibraryInfo *TLI) { + CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate; // fold: icmp (inttoptr x), null -> icmp x, 0 // fold: icmp null, (inttoptr x) -> icmp 0, x // fold: icmp (ptrtoint x), 0 -> icmp x, null @@ -1248,10 +1249,30 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; return ConstantFoldBinaryOpOperands(OpC, LHS, RHS, DL); } + + // Convert pointer comparison (base+offset1) pred (base+offset2) into + // offset1 pred offset2, for the case where the offset is inbounds. This + // only works for equality and unsigned comparison, as inbounds permits + // crossing the sign boundary. However, the offset comparison itself is + // signed. + if (Ops0->getType()->isPointerTy() && !ICmpInst::isSigned(Predicate)) { + unsigned IndexWidth = DL.getIndexTypeSizeInBits(Ops0->getType()); + APInt Offset0(IndexWidth, 0); + Value *Stripped0 = + Ops0->stripAndAccumulateInBoundsConstantOffsets(DL, Offset0); + APInt Offset1(IndexWidth, 0); + Value *Stripped1 = + Ops1->stripAndAccumulateInBoundsConstantOffsets(DL, Offset1); + if (Stripped0 == Stripped1) + return ConstantExpr::getCompare( + ICmpInst::getSignedPredicate(Predicate), + ConstantInt::get(CE0->getContext(), Offset0), + ConstantInt::get(CE0->getContext(), Offset1)); + } } else if (isa(Ops1)) { // If RHS is a constant expression, but the left side isn't, swap the // operands and try again. - Predicate = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)Predicate); + Predicate = ICmpInst::getSwappedPredicate(Predicate); return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI); } diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 16b0880ce2f9..36d27beff814 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1316,46 +1316,6 @@ static bool isMaybeZeroSizedType(Type *Ty) { return false; } -/// Compare the two constants as though they were getelementptr indices. -/// This allows coercion of the types to be the same thing. -/// -/// If the two constants are the "same" (after coercion), return 0. If the -/// first is less than the second, return -1, if the second is less than the -/// first, return 1. If the constants are not integral, return -2. -/// -static int IdxCompare(Constant *C1, Constant *C2, Type *ElTy) { - if (C1 == C2) return 0; - - // Ok, we found a different index. If they are not ConstantInt, we can't do - // anything with them. - if (!isa(C1) || !isa(C2)) - return -2; // don't know! - - // We cannot compare the indices if they don't fit in an int64_t. - if (cast(C1)->getValue().getActiveBits() > 64 || - cast(C2)->getValue().getActiveBits() > 64) - return -2; // don't know! - - // Ok, we have two differing integer indices. Sign extend them to be the same - // type. - int64_t C1Val = cast(C1)->getSExtValue(); - int64_t C2Val = cast(C2)->getSExtValue(); - - if (C1Val == C2Val) return 0; // They are equal - - // If the type being indexed over is really just a zero sized type, there is - // no pointer difference being made here. - if (isMaybeZeroSizedType(ElTy)) - return -2; // dunno. - - // If they are really different, now that they are the same type, then we - // found a difference! - if (C1Val < C2Val) - return -1; - else - return 1; -} - /// This function determines if there is anything we can decide about the two /// constants provided. This doesn't need to handle simple things like /// ConstantFP comparisons, but should instead handle ConstantExprs. @@ -1614,16 +1574,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, if (!GV2->hasExternalWeakLinkage()) return ICmpInst::ICMP_ULT; } else if (const GlobalValue *GV = dyn_cast(CE1Op0)) { - if (GV == GV2) { - // If this is a getelementptr of the same global, then it must be - // different. Because the types must match, the getelementptr could - // only have at most one index, and because we fold getelementptr's - // with a single zero index, it must be nonzero. - assert(CE1->getNumOperands() == 2 && - !CE1->getOperand(1)->isNullValue() && - "Surprising getelementptr!"); - return ICmpInst::ICMP_UGT; - } else { + if (GV != GV2) { if (CE1GEP->hasAllZeroIndices()) return areGlobalsPotentiallyEqual(GV, GV2); return ICmpInst::BAD_ICMP_PREDICATE; @@ -1649,48 +1600,6 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, cast(CE2Op0)); return ICmpInst::BAD_ICMP_PREDICATE; } - // Ok, we know that both getelementptr instructions are based on the - // same global. From this, we can precisely determine the relative - // ordering of the resultant pointers. - unsigned i = 1; - - // The logic below assumes that the result of the comparison - // can be determined by finding the first index that differs. - // This doesn't work if there is over-indexing in any - // subsequent indices, so check for that case first. - if (!CE1->isGEPWithNoNotionalOverIndexing() || - !CE2->isGEPWithNoNotionalOverIndexing()) - return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal. - - // Compare all of the operands the GEP's have in common. - gep_type_iterator GTI = gep_type_begin(CE1); - for (;i != CE1->getNumOperands() && i != CE2->getNumOperands(); - ++i, ++GTI) - switch (IdxCompare(CE1->getOperand(i), - CE2->getOperand(i), GTI.getIndexedType())) { - case -1: return ICmpInst::ICMP_ULT; - case 1: return ICmpInst::ICMP_UGT; - case -2: return ICmpInst::BAD_ICMP_PREDICATE; - } - - // Ok, we ran out of things they have in common. If any leftovers - // are non-zero then we have a difference, otherwise we are equal. - for (; i < CE1->getNumOperands(); ++i) - if (!CE1->getOperand(i)->isNullValue()) { - if (isa(CE1->getOperand(i))) - return ICmpInst::ICMP_UGT; - else - return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal. - } - - for (; i < CE2->getNumOperands(); ++i) - if (!CE2->getOperand(i)->isNullValue()) { - if (isa(CE2->getOperand(i))) - return ICmpInst::ICMP_ULT; - else - return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal. - } - return ICmpInst::ICMP_EQ; } } } diff --git a/llvm/test/Assembler/ConstantExprFold.ll b/llvm/test/Assembler/ConstantExprFold.ll index ab85fa525ae3..fed2d6c4fd89 100644 --- a/llvm/test/Assembler/ConstantExprFold.ll +++ b/llvm/test/Assembler/ConstantExprFold.ll @@ -21,6 +21,7 @@ %Ty = type { i32, i32 } @B = external global %Ty +; @9 and @11 will be folded by the target-dependent constant folder instead. @9 = global i1 icmp ult (i64* @A, i64* getelementptr (i64, i64* @A, i64 1)) ; true @10 = global i1 icmp slt (i64* @A, i64* getelementptr (i64, i64* @A, i64 0)) ; false @11 = global i1 icmp ult (i32* getelementptr (%Ty, %Ty* @B, i64 0, i32 0), @@ -50,9 +51,9 @@ ; CHECK: @[[GLOB7:[0-9]+]] = global i64 -1 ; CHECK: @[[GLOB8:[0-9]+]] = global i64* @A ; CHECK: @[[B:[a-zA-Z0-9_$"\\.-]+]] = external global [[TY:%.*]] -; CHECK: @[[GLOB9:[0-9]+]] = global i1 true +; CHECK: @[[GLOB9:[0-9]+]] = global i1 icmp ugt (i64* getelementptr inbounds (i64, i64* @A, i64 1), i64* @A) ; CHECK: @[[GLOB10:[0-9]+]] = global i1 false -; CHECK: @[[GLOB11:[0-9]+]] = global i1 true +; CHECK: @[[GLOB11:[0-9]+]] = global i1 icmp ult (i32* getelementptr inbounds ([[TY:%.*]], %Ty* @B, i64 0, i32 0), i32* getelementptr inbounds ([[TY]], %Ty* @B, i64 0, i32 1)) ; CHECK: @[[CONS:[a-zA-Z0-9_$"\\.-]+]] = weak global i32 0, align 8 ; CHECK: @[[GLOB12:[0-9]+]] = global i64 0 ; CHECK: @[[GLOB13:[0-9]+]] = global <2 x i8*> undef diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll index d22317a18924..e681c62a4a61 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll @@ -198,9 +198,10 @@ define i1 @global_gep_sgt_global() { ret i1 %cmp } +; This should not fold to true, as the offset is negative. define i1 @global_gep_ugt_global_neg_offset() { ; CHECK-LABEL: @global_gep_ugt_global_neg_offset( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: ret i1 icmp ugt ([2 x i32]* getelementptr ([2 x i32], [2 x i32]* @g, i64 -1), [2 x i32]* @g) ; %gep = getelementptr [2 x i32], [2 x i32]* @g, i64 -1 %cmp = icmp ugt [2 x i32]* %gep, @g @@ -239,7 +240,7 @@ define i1 @global_gep_sgt_global_gep() { define i1 @global_gep_ugt_global_gep_complex() { ; CHECK-LABEL: @global_gep_ugt_global_gep_complex( -; CHECK-NEXT: ret i1 icmp ugt (i32* bitcast (i8* getelementptr inbounds (i8, i8* bitcast ([2 x i32]* @g to i8*), i64 2) to i32*), i32* getelementptr inbounds ([2 x i32], [2 x i32]* @g, i64 0, i64 0)) +; CHECK-NEXT: ret i1 true ; %gep1 = getelementptr inbounds [2 x i32], [2 x i32]* @g, i64 0, i64 0 %gep2 = getelementptr inbounds [2 x i32], [2 x i32]* @g, i64 0, i64 0 From ab0bfbdaaa5189377a605e0584d9ba9b1fc49e75 Mon Sep 17 00:00:00 2001 From: Marek Kurdej Date: Mon, 3 Jan 2022 09:40:24 +0100 Subject: [PATCH 402/992] [clang-format] Use get*StyleWithColumns helper. NFC. --- clang/unittests/Format/FormatTest.cpp | 54 +++++++++------------------ 1 file changed, 18 insertions(+), 36 deletions(-) diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 374f3865acc3..005fc4db9159 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -2758,10 +2758,9 @@ TEST_F(FormatTest, FormatsLabels) { } TEST_F(FormatTest, MultiLineControlStatements) { - FormatStyle Style = getLLVMStyle(); + FormatStyle Style = getLLVMStyleWithColumns(20); Style.BreakBeforeBraces = FormatStyle::BraceBreakingStyle::BS_Custom; Style.BraceWrapping.AfterControlStatement = FormatStyle::BWACS_MultiLine; - Style.ColumnLimit = 20; // Short lines should keep opening brace on same line. EXPECT_EQ("if (foo) {\n" " bar();\n" @@ -3328,8 +3327,7 @@ TEST_F(FormatTest, FormatsEnumTypes) { } TEST_F(FormatTest, FormatsTypedefEnum) { - FormatStyle Style = getLLVMStyle(); - Style.ColumnLimit = 40; + FormatStyle Style = getLLVMStyleWithColumns(40); verifyFormat("typedef enum {} EmptyEnum;"); verifyFormat("typedef enum { A, B, C } ShortEnum;"); verifyFormat("typedef enum {\n" @@ -4808,9 +4806,8 @@ TEST_F(FormatTest, LayoutMacroDefinitionsStatementsSpanningBlocks) { } TEST_F(FormatTest, IndentPreprocessorDirectives) { - FormatStyle Style = getLLVMStyle(); + FormatStyle Style = getLLVMStyleWithColumns(40); Style.IndentPPDirectives = FormatStyle::PPDIS_None; - Style.ColumnLimit = 40; verifyFormat("#ifdef _WIN32\n" "#define A 0\n" "#ifdef VAR2\n" @@ -6000,10 +5997,9 @@ TEST_F(FormatTest, BreakingBeforeNonAssigmentOperators) { } TEST_F(FormatTest, AllowBinPackingInsideArguments) { - FormatStyle Style = getLLVMStyle(); + FormatStyle Style = getLLVMStyleWithColumns(40); Style.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment; Style.BinPackArguments = false; - Style.ColumnLimit = 40; verifyFormat("void test() {\n" " someFunction(\n" " this + argument + is + quite\n" @@ -6182,9 +6178,8 @@ TEST_F(FormatTest, ConstructorInitializers) { } TEST_F(FormatTest, AllowAllConstructorInitializersOnNextLine) { - FormatStyle Style = getLLVMStyle(); + FormatStyle Style = getLLVMStyleWithColumns(60); Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma; - Style.ColumnLimit = 60; Style.BinPackParameters = false; for (int i = 0; i < 4; ++i) { @@ -6312,8 +6307,7 @@ TEST_F(FormatTest, AllowAllConstructorInitializersOnNextLine) { } TEST_F(FormatTest, AllowAllArgumentsOnNextLine) { - FormatStyle Style = getLLVMStyle(); - Style.ColumnLimit = 60; + FormatStyle Style = getLLVMStyleWithColumns(60); Style.BinPackArguments = false; for (int i = 0; i < 4; ++i) { // Test all combinations of parameters that should not have an effect. @@ -6367,8 +6361,7 @@ TEST_F(FormatTest, AllowAllArgumentsOnNextLine) { TEST_F(FormatTest, AllowAllArgumentsOnNextLineDontAlign) { // Check that AllowAllArgumentsOnNextLine is respected for both BAS_DontAlign // and BAS_Align. - auto Style = getLLVMStyle(); - Style.ColumnLimit = 35; + FormatStyle Style = getLLVMStyleWithColumns(35); StringRef Input = "functionCall(paramA, paramB, paramC);\n" "void functionDecl(int A, int B, int C);"; Style.AllowAllArgumentsOnNextLine = false; @@ -6869,8 +6862,7 @@ TEST_F(FormatTest, DeductionGuides) { TEST_F(FormatTest, BreaksFunctionDeclarationsWithTrailingTokens) { // Avoid breaking before trailing 'const' or other trailing annotations, if // they are not function-like. - FormatStyle Style = getGoogleStyle(); - Style.ColumnLimit = 47; + FormatStyle Style = getGoogleStyleWithColumns(47); verifyFormat("void someLongFunction(\n" " int someLoooooooooooooongParameter) const {\n}", getLLVMStyleWithColumns(47)); @@ -7689,8 +7681,7 @@ TEST_F(FormatTest, BreaksConditionalExpressions) { " : a;"); // Chained conditionals - FormatStyle Style = getLLVMStyle(); - Style.ColumnLimit = 70; + FormatStyle Style = getLLVMStyleWithColumns(70); Style.AlignOperands = FormatStyle::OAS_Align; verifyFormat("return aaaaaaaaaaaaaaaa ? 1111111111111111\n" " : bbbbbbbbbbbbbb ? 2222222222222222\n" @@ -7843,9 +7834,8 @@ TEST_F(FormatTest, BreaksConditionalExpressions) { } TEST_F(FormatTest, BreaksConditionalExpressionsAfterOperator) { - FormatStyle Style = getLLVMStyle(); + FormatStyle Style = getLLVMStyleWithColumns(70); Style.BreakBeforeTernaryOperators = false; - Style.ColumnLimit = 70; verifyFormat( "aaaa(aaaaaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaaaaaaaaaaa ?\n" " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa :\n" @@ -11994,8 +11984,7 @@ TEST_F(FormatTest, PullTrivialFunctionDefinitionsIntoSingleLine) { "};", getGoogleStyle()); - FormatStyle NoColumnLimit = getLLVMStyle(); - NoColumnLimit.ColumnLimit = 0; + FormatStyle NoColumnLimit = getLLVMStyleWithColumns(0); EXPECT_EQ("A() : b(0) {}", format("A():b(0){}", NoColumnLimit)); EXPECT_EQ("class C {\n" " A() : b(0) {}\n" @@ -12154,12 +12143,11 @@ TEST_F(FormatTest, PullInlineOnlyFunctionDefinitionsIntoSingleLine) { } TEST_F(FormatTest, SplitEmptyFunction) { - FormatStyle Style = getLLVMStyle(); + FormatStyle Style = getLLVMStyleWithColumns(40); Style.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; Style.BreakBeforeBraces = FormatStyle::BS_Custom; Style.BraceWrapping.AfterFunction = true; Style.BraceWrapping.SplitEmptyFunction = false; - Style.ColumnLimit = 40; verifyFormat("int f()\n" "{}", @@ -12222,13 +12210,12 @@ TEST_F(FormatTest, SplitEmptyFunction) { } TEST_F(FormatTest, SplitEmptyFunctionButNotRecord) { - FormatStyle Style = getLLVMStyle(); + FormatStyle Style = getLLVMStyleWithColumns(40); Style.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; Style.BreakBeforeBraces = FormatStyle::BS_Custom; Style.BraceWrapping.AfterFunction = true; Style.BraceWrapping.SplitEmptyFunction = true; Style.BraceWrapping.SplitEmptyRecord = false; - Style.ColumnLimit = 40; verifyFormat("class C {};", Style); verifyFormat("struct C {};", Style); @@ -17411,14 +17398,13 @@ TEST_F(FormatTest, AllmanBraceBreaking) { } TEST_F(FormatTest, WhitesmithsBraceBreaking) { - FormatStyle WhitesmithsBraceStyle = getLLVMStyle(); + FormatStyle WhitesmithsBraceStyle = getLLVMStyleWithColumns(0); WhitesmithsBraceStyle.BreakBeforeBraces = FormatStyle::BS_Whitesmiths; // Make a few changes to the style for testing purposes WhitesmithsBraceStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; WhitesmithsBraceStyle.AllowShortLambdasOnASingleLine = FormatStyle::SLS_None; - WhitesmithsBraceStyle.ColumnLimit = 0; // FIXME: this test case can't decide whether there should be a blank line // after the ~D() line or not. It adds one if one doesn't exist in the test @@ -18407,8 +18393,7 @@ TEST_F(FormatTest, UnderstandPragmaOption) { } TEST_F(FormatTest, OptimizeBreakPenaltyVsExcess) { - FormatStyle Style = getLLVMStyle(); - Style.ColumnLimit = 20; + FormatStyle Style = getLLVMStyleWithColumns(20); // See PR41213 EXPECT_EQ("/*\n" @@ -20865,8 +20850,7 @@ TEST_F(FormatTest, FormatsBlocks) { } TEST_F(FormatTest, FormatsBlocksWithZeroColumnWidth) { - FormatStyle ZeroColumn = getLLVMStyle(); - ZeroColumn.ColumnLimit = 0; + FormatStyle ZeroColumn = getLLVMStyleWithColumns(0); verifyFormat("[[SessionService sharedService] " "loadWindowWithCompletionBlock:^(SessionWindow *window) {\n" @@ -22266,8 +22250,7 @@ TEST_F(FormatTest, WhitespaceSensitiveMacros) { TEST_F(FormatTest, VeryLongNamespaceCommentSplit) { // These tests are not in NamespaceFixer because that doesn't // test its interaction with line wrapping - FormatStyle Style = getLLVMStyle(); - Style.ColumnLimit = 80; + FormatStyle Style = getLLVMStyleWithColumns(80); verifyFormat("namespace {\n" "int i;\n" "int j;\n" @@ -22974,8 +22957,7 @@ TEST_F(FormatTest, CoroutineCoAwait) { verifyFormat("co_await [this](int a, int b) -> Task { co_return co_await " "foo(); }(x, y);"); - FormatStyle Style = getLLVMStyle(); - Style.ColumnLimit = 40; + FormatStyle Style = getLLVMStyleWithColumns(40); verifyFormat("co_await [this](int a, int b) -> Task {\n" " co_return co_await foo();\n" "}(x, y);", From d38637a0e6012cd32d901ed349ad733610293111 Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Mon, 3 Jan 2022 15:03:55 +0800 Subject: [PATCH 403/992] [RISCV] Fix the code alignment for GroupFloatVectors. NFC Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D116520 --- .../lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index fe06006c9798..7970ad0dad60 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -232,25 +232,25 @@ defset list AllVectors = { defset list GroupFloatVectors = { def VF16M2: GroupVTypeInfo; + VRM2, V_M2, f16, FPR16>; def VF16M4: GroupVTypeInfo; + VRM4, V_M4, f16, FPR16>; def VF16M8: GroupVTypeInfo; + VRM8, V_M8, f16, FPR16>; def VF32M2: GroupVTypeInfo; + VRM2, V_M2, f32, FPR32>; def VF32M4: GroupVTypeInfo; + VRM4, V_M4, f32, FPR32>; def VF32M8: GroupVTypeInfo; + VRM8, V_M8, f32, FPR32>; def VF64M2: GroupVTypeInfo; + VRM2, V_M2, f64, FPR64>; def VF64M4: GroupVTypeInfo; + VRM4, V_M4, f64, FPR64>; def VF64M8: GroupVTypeInfo; + VRM8, V_M8, f64, FPR64>; } } } From 127d955441649e97411cc1299f39d37aa743c073 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 3 Jan 2022 10:14:00 +0100 Subject: [PATCH 404/992] [ConstantFold] Drop unused function (NFC) isMaybeZeroSizeType() is no longer used after 5afbfe33e7d6ce40af0ad6d99421b443c45b351b. --- llvm/lib/IR/ConstantFold.cpp | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 36d27beff814..c3f3d3c4b4c1 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1299,23 +1299,6 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, return nullptr; } -/// This type is zero-sized if it's an array or structure of zero-sized types. -/// The only leaf zero-sized type is an empty structure. -static bool isMaybeZeroSizedType(Type *Ty) { - if (StructType *STy = dyn_cast(Ty)) { - if (STy->isOpaque()) return true; // Can't say. - - // If all of elements have zero size, this does too. - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) - if (!isMaybeZeroSizedType(STy->getElementType(i))) return false; - return true; - - } else if (ArrayType *ATy = dyn_cast(Ty)) { - return isMaybeZeroSizedType(ATy->getElementType()); - } - return false; -} - /// This function determines if there is anything we can decide about the two /// constants provided. This doesn't need to handle simple things like /// ConstantFP comparisons, but should instead handle ConstantExprs. From 330cb0326911ca7090be56c1641ba86f26b6c3c8 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 3 Jan 2022 10:55:47 +0100 Subject: [PATCH 405/992] [LoadStoreVectorizer] Check for guaranteed-to-transfer (PR52950) Rather than checking for nounwind in particular, make sure the instruction is guaranteed to transfer execution, which will also handle non-willreturn calls correctly. Fixes https://github.com/llvm/llvm-project/issues/52950. --- .../Vectorize/LoadStoreVectorizer.cpp | 5 +- .../NVPTX/merge-across-side-effects.ll | 8 +- .../LoadStoreVectorizer/int_sideeffect.ll | 74 ++++++++++++++++++- 3 files changed, 79 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index 5a4a2f0924f6..d2e0d1d474b0 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -698,8 +698,9 @@ Vectorizer::getVectorizablePrefix(ArrayRef Chain) { ChainInstrs.push_back(&I); continue; } - if (I.mayThrow()) { - LLVM_DEBUG(dbgs() << "LSV: Found may-throw operation: " << I << '\n'); + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) { + LLVM_DEBUG(dbgs() << "LSV: Found instruction may not transfer execution: " + << I << '\n'); break; } if (I.mayReadOrWriteMemory()) diff --git a/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll index 72c13b4d12e5..73623dade42f 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll @@ -200,10 +200,10 @@ define void @store_fn_readnone(i32* %p) #0 { } -attributes #0 = { nounwind } -attributes #1 = { nounwind writeonly } -attributes #2 = { nounwind readonly } +attributes #0 = { nounwind willreturn } +attributes #1 = { nounwind willreturn writeonly } +attributes #2 = { nounwind readonly willreturn } attributes #3 = { writeonly } attributes #4 = { readonly } ; readnone implies nounwind, so no need to test separately -attributes #5 = { nounwind readnone } +attributes #5 = { nounwind willreturn readnone } diff --git a/llvm/test/Transforms/LoadStoreVectorizer/int_sideeffect.ll b/llvm/test/Transforms/LoadStoreVectorizer/int_sideeffect.ll index 23108e308f81..bd47d66ddaaa 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/int_sideeffect.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/int_sideeffect.ll @@ -44,8 +44,8 @@ define void @test_sideeffect(float* %p) { declare void @foo() -define void @test_inaccessiblememonly(float* %p) { -; CHECK-LABEL: @test_inaccessiblememonly( +define void @test_inaccessiblememonly_nounwind_willreturn(float* %p) { +; CHECK-LABEL: @test_inaccessiblememonly_nounwind_willreturn( ; CHECK-NEXT: [[P0:%.*]] = getelementptr float, float* [[P:%.*]], i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P0]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 @@ -62,6 +62,41 @@ define void @test_inaccessiblememonly(float* %p) { ; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[P0]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP6]], <4 x float>* [[TMP7]], align 16 ; CHECK-NEXT: ret void +; + %p0 = getelementptr float, float* %p, i64 0 + %p1 = getelementptr float, float* %p, i64 1 + %p2 = getelementptr float, float* %p, i64 2 + %p3 = getelementptr float, float* %p, i64 3 + %l0 = load float, float* %p0, align 16 + %l1 = load float, float* %p1 + %l2 = load float, float* %p2 + call void @foo() inaccessiblememonly nounwind willreturn + %l3 = load float, float* %p3 + store float %l0, float* %p0, align 16 + call void @foo() inaccessiblememonly nounwind willreturn + store float %l1, float* %p1 + store float %l2, float* %p2 + store float %l3, float* %p3 + ret void +} + +define void @test_inaccessiblememonly_not_willreturn(float* %p) { +; CHECK-LABEL: @test_inaccessiblememonly_not_willreturn( +; CHECK-NEXT: [[P0:%.*]] = getelementptr float, float* [[P:%.*]], i64 0 +; CHECK-NEXT: [[P1:%.*]] = getelementptr float, float* [[P]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr float, float* [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr float, float* [[P]], i64 3 +; CHECK-NEXT: [[L0:%.*]] = load float, float* [[P0]], align 16 +; CHECK-NEXT: [[L1:%.*]] = load float, float* [[P1]], align 4 +; CHECK-NEXT: [[L2:%.*]] = load float, float* [[P2]], align 4 +; CHECK-NEXT: call void @foo() #[[ATTR2:[0-9]+]] +; CHECK-NEXT: [[L3:%.*]] = load float, float* [[P3]], align 4 +; CHECK-NEXT: store float [[L0]], float* [[P0]], align 16 +; CHECK-NEXT: call void @foo() #[[ATTR2]] +; CHECK-NEXT: store float [[L1]], float* [[P1]], align 4 +; CHECK-NEXT: store float [[L2]], float* [[P2]], align 4 +; CHECK-NEXT: store float [[L3]], float* [[P3]], align 4 +; CHECK-NEXT: ret void ; %p0 = getelementptr float, float* %p, i64 0 %p1 = getelementptr float, float* %p, i64 1 @@ -79,3 +114,38 @@ define void @test_inaccessiblememonly(float* %p) { store float %l3, float* %p3 ret void } + +define void @test_inaccessiblememonly_not_nounwind(float* %p) { +; CHECK-LABEL: @test_inaccessiblememonly_not_nounwind( +; CHECK-NEXT: [[P0:%.*]] = getelementptr float, float* [[P:%.*]], i64 0 +; CHECK-NEXT: [[P1:%.*]] = getelementptr float, float* [[P]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr float, float* [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr float, float* [[P]], i64 3 +; CHECK-NEXT: [[L0:%.*]] = load float, float* [[P0]], align 16 +; CHECK-NEXT: [[L1:%.*]] = load float, float* [[P1]], align 4 +; CHECK-NEXT: [[L2:%.*]] = load float, float* [[P2]], align 4 +; CHECK-NEXT: call void @foo() #[[ATTR3:[0-9]+]] +; CHECK-NEXT: [[L3:%.*]] = load float, float* [[P3]], align 4 +; CHECK-NEXT: store float [[L0]], float* [[P0]], align 16 +; CHECK-NEXT: call void @foo() #[[ATTR3]] +; CHECK-NEXT: store float [[L1]], float* [[P1]], align 4 +; CHECK-NEXT: store float [[L2]], float* [[P2]], align 4 +; CHECK-NEXT: store float [[L3]], float* [[P3]], align 4 +; CHECK-NEXT: ret void +; + %p0 = getelementptr float, float* %p, i64 0 + %p1 = getelementptr float, float* %p, i64 1 + %p2 = getelementptr float, float* %p, i64 2 + %p3 = getelementptr float, float* %p, i64 3 + %l0 = load float, float* %p0, align 16 + %l1 = load float, float* %p1 + %l2 = load float, float* %p2 + call void @foo() inaccessiblememonly willreturn + %l3 = load float, float* %p3 + store float %l0, float* %p0, align 16 + call void @foo() inaccessiblememonly willreturn + store float %l1, float* %p1 + store float %l2, float* %p2 + store float %l3, float* %p3 + ret void +} From 791523bae6153b13bb41ba05c9fc89e502cc4a1a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 3 Jan 2022 09:59:39 +0000 Subject: [PATCH 406/992] [LV] Set loop metadata after VPlan execution (NFC). Setting the loop metadata for the vector loop after VPlan execution allows generating the full loop body during VPlan execution. This is in preparation for D113224. --- .../Transforms/Vectorize/LoopVectorize.cpp | 40 ++++++++++--------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index bb5691842111..a277ee37d12c 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3547,25 +3547,6 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton(Loop *L, "Inconsistent vector loop preheader"); Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt()); - Optional VectorizedLoopID = - makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll, - LLVMLoopVectorizeFollowupVectorized}); - if (VectorizedLoopID.hasValue()) { - L->setLoopID(VectorizedLoopID.getValue()); - - // Do not setAlreadyVectorized if loop attributes have been defined - // explicitly. - return LoopVectorPreHeader; - } - - // Keep all loop hints from the original loop on the vector loop (we'll - // replace the vectorizer-specific hints below). - if (MDNode *LID = OrigLoop->getLoopID()) - L->setLoopID(LID); - - LoopVectorizeHints Hints(L, true, *ORE, TTI); - Hints.setAlreadyVectorized(); - #ifdef EXPENSIVE_CHECKS assert(DT->verify(DominatorTree::VerificationLevel::Fast)); LI->verify(*DT); @@ -7971,6 +7952,27 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF, BestVPlan.prepareToExecute(ILV.getOrCreateTripCount(nullptr), State); BestVPlan.execute(&State); + // Keep all loop hints from the original loop on the vector loop (we'll + // replace the vectorizer-specific hints below). + MDNode *OrigLoopID = OrigLoop->getLoopID(); + + Optional VectorizedLoopID = + makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll, + LLVMLoopVectorizeFollowupVectorized}); + + Loop *L = LI->getLoopFor(State.CFG.PrevBB); + if (VectorizedLoopID.hasValue()) + L->setLoopID(VectorizedLoopID.getValue()); + else { + // Keep all loop hints from the original loop on the vector loop (we'll + // replace the vectorizer-specific hints below). + if (MDNode *LID = OrigLoop->getLoopID()) + L->setLoopID(LID); + + LoopVectorizeHints Hints(L, true, *ORE); + Hints.setAlreadyVectorized(); + } + // 3. Fix the vectorized code: take care of header phi's, live-outs, // predication, updating analyses. ILV.fixVectorizedLoop(State); From 0090cd4e7a24bedeb24dfe5b3b55167ad74e231e Mon Sep 17 00:00:00 2001 From: Zhao Wei Liew Date: Mon, 3 Jan 2022 11:30:24 +0100 Subject: [PATCH 407/992] [clang-format] Support inheriting from more than 1 parents in the fallback case Currently, we are unable to inherit from a chain of parent configs where the outermost parent config has `BasedOnStyle: InheritParentConfig` set. This patch adds a test case for this scenario, and adds support for it. To illustrate, suppose we have the following directory structure: ``` - e/ |- .clang-format (BasedOnStyle: InheritParentConfig) <-- outermost config |- sub/ |- .clang-format (BasedOnStyle: InheritParentConfig) |- sub/ |- .clang-format (BasedOnStyle: InheritParentConfig) |- code.cpp ``` Now consider what happens when we run `clang-format --style=file /e/sub/sub/code.cpp`. Without this patch, on a release build, only the innermost config will be applied. On a debug build, clang-format crashes due to an assertion failure. With this patch, clang-format behaves as we'd expect, applying all 3 configs. Reviewed By: HazardyKnusperkeks, curdeius Differential Revision: https://reviews.llvm.org/D116371 --- clang/lib/Format/Format.cpp | 31 +++++++++++-------------- clang/unittests/Format/FormatTest.cpp | 33 ++++++++++++++++++--------- 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index be01daa38929..f3c337a92822 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -3288,6 +3288,16 @@ llvm::Expected getStyle(StringRef StyleName, StringRef FileName, auto dropDiagnosticHandler = [](const llvm::SMDiagnostic &, void *) {}; + auto applyChildFormatTexts = [&](FormatStyle *Style) { + for (const auto &MemBuf : llvm::reverse(ChildFormatTextToApply)) { + auto EC = parseConfiguration(*MemBuf, Style, AllowUnknownOptions, + dropDiagnosticHandler); + // It was already correctly parsed. + assert(!EC); + static_cast(EC); + } + }; + for (StringRef Directory = Path; !Directory.empty(); Directory = llvm::sys::path::parent_path(Directory)) { @@ -3330,14 +3340,7 @@ llvm::Expected getStyle(StringRef StyleName, StringRef FileName, return Style; LLVM_DEBUG(llvm::dbgs() << "Applying child configurations\n"); - - for (const auto &MemBuf : llvm::reverse(ChildFormatTextToApply)) { - auto Ec = parseConfiguration(*MemBuf, &Style, AllowUnknownOptions, - dropDiagnosticHandler); - // It was already correctly parsed. - assert(!Ec); - static_cast(Ec); - } + applyChildFormatTexts(&Style); return Style; } @@ -3363,17 +3366,9 @@ llvm::Expected getStyle(StringRef StyleName, StringRef FileName, UnsuitableConfigFiles); if (!ChildFormatTextToApply.empty()) { - assert(ChildFormatTextToApply.size() == 1); - LLVM_DEBUG(llvm::dbgs() - << "Applying child configuration on fallback style\n"); - - auto Ec = - parseConfiguration(*ChildFormatTextToApply.front(), &FallbackStyle, - AllowUnknownOptions, dropDiagnosticHandler); - // It was already correctly parsed. - assert(!Ec); - static_cast(Ec); + << "Applying child configurations on fallback style\n"); + applyChildFormatTexts(&FallbackStyle); } return FallbackStyle; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 005fc4db9159..bb344d4383ea 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -21464,8 +21464,8 @@ TEST(FormatStyle, GetStyleOfFile) { ASSERT_TRUE((bool)StyleTd); ASSERT_EQ(*StyleTd, getLLVMStyle(FormatStyle::LK_TableGen)); - // Test 9.1: overwriting a file style, when parent no file exists with no - // fallback style + // Test 9.1.1: overwriting a file style, when no parent file exists with no + // fallback style. ASSERT_TRUE(FS.addFile( "/e/sub/.clang-format", 0, llvm::MemoryBuffer::getMemBuffer("BasedOnStyle: InheritParentConfig\n" @@ -21480,6 +21480,25 @@ TEST(FormatStyle, GetStyleOfFile) { return Style; }()); + // Test 9.1.2: propagate more than one level with no parent file. + ASSERT_TRUE(FS.addFile("/e/sub/sub/code.cpp", 0, + llvm::MemoryBuffer::getMemBuffer("int i;"))); + ASSERT_TRUE(FS.addFile("/e/sub/sub/.clang-format", 0, + llvm::MemoryBuffer::getMemBuffer( + "BasedOnStyle: InheritParentConfig\n" + "WhitespaceSensitiveMacros: ['FOO', 'BAR']"))); + std::vector NonDefaultWhiteSpaceMacros{"FOO", "BAR"}; + + ASSERT_NE(Style9->WhitespaceSensitiveMacros, NonDefaultWhiteSpaceMacros); + Style9 = getStyle("file", "/e/sub/sub/code.cpp", "none", "", &FS); + ASSERT_TRUE(static_cast(Style9)); + ASSERT_EQ(*Style9, [&NonDefaultWhiteSpaceMacros] { + auto Style = getNoStyle(); + Style.ColumnLimit = 20; + Style.WhitespaceSensitiveMacros = NonDefaultWhiteSpaceMacros; + return Style; + }()); + // Test 9.2: with LLVM fallback style Style9 = getStyle("file", "/e/sub/code.cpp", "LLVM", "", &FS); ASSERT_TRUE(static_cast(Style9)); @@ -21503,15 +21522,7 @@ TEST(FormatStyle, GetStyleOfFile) { return Style; }()); - // Test 9.4: propagate more than one level - ASSERT_TRUE(FS.addFile("/e/sub/sub/code.cpp", 0, - llvm::MemoryBuffer::getMemBuffer("int i;"))); - ASSERT_TRUE(FS.addFile("/e/sub/sub/.clang-format", 0, - llvm::MemoryBuffer::getMemBuffer( - "BasedOnStyle: InheritParentConfig\n" - "WhitespaceSensitiveMacros: ['FOO', 'BAR']"))); - std::vector NonDefaultWhiteSpaceMacros{"FOO", "BAR"}; - + // Test 9.4: propagate more than one level with a parent file. const auto SubSubStyle = [&NonDefaultWhiteSpaceMacros] { auto Style = getGoogleStyle(); Style.ColumnLimit = 20; From b9e173fcd46b336b5589f577a74de9472d4deae3 Mon Sep 17 00:00:00 2001 From: Zhao Wei Liew Date: Mon, 3 Jan 2022 11:37:20 +0100 Subject: [PATCH 408/992] [clang-format] Add option to explicitly specify a config file This diff extends the -style=file option to allow a config file to be specified explicitly. This is useful (for instance) when adding IDE commands to reformat code to a personal style. Usage: `clang-format -style=file: ...` Reviewed By: HazardyKnusperkeks, curdeius, MyDeveloperDay, zwliew Differential Revision: https://reviews.llvm.org/D72326 --- clang/docs/ClangFormat.rst | 4 ++ clang/docs/ClangFormatStyleOptions.rst | 4 ++ clang/docs/ReleaseNotes.rst | 4 ++ clang/include/clang/Format/Format.h | 2 + clang/lib/Format/Format.cpp | 47 ++++++++++++++++--- clang/unittests/Format/FormatTest.cpp | 64 ++++++++++++++++++++++++++ 6 files changed, 118 insertions(+), 7 deletions(-) diff --git a/clang/docs/ClangFormat.rst b/clang/docs/ClangFormat.rst index 4a1422e85b06..8c0273c8eb3c 100644 --- a/clang/docs/ClangFormat.rst +++ b/clang/docs/ClangFormat.rst @@ -82,6 +82,10 @@ to format C/C++/Java/JavaScript/JSON/Objective-C/Protobuf/C# code. .clang-format file located in one of the parent directories of the source file (or current directory for stdin). + Use -style=file: to load style + configuration from a format file located at + . This path can be absolute or + relative to the working directory. Use -style="{key: value, ...}" to set specific parameters, e.g.: -style="{BasedOnStyle: llvm, IndentWidth: 8}" diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index c2f85d564ba9..07c77acb8481 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -32,6 +32,10 @@ try to find the ``.clang-format`` file located in the closest parent directory of the input file. When the standard input is used, the search is started from the current directory. +When using ``-style=file:``, :program:`clang-format` for +each input file will use the format file located at ``. +The path may be absolute or relative to the working directory. + The ``.clang-format`` file uses YAML format: .. code-block:: yaml diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index ce9b3547155a..20e7e6cc26ce 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -301,6 +301,10 @@ clang-format space before parentheses. The custom options can be set using ``SpaceBeforeParensOptions``. +- The command line argument `-style=` has been extended so that a specific + format file at location can be selected. This is supported + via the syntax: `-style=file:`. + - Improved C++20 Modules and Coroutines support. libclang diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index f6b4926de846..dbc406417ba1 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -4066,6 +4066,8 @@ extern const char *DefaultFallbackStyle; /// * "file" - Load style configuration from a file called ``.clang-format`` /// located in one of the parent directories of ``FileName`` or the current /// directory if ``FileName`` is empty. +/// * "file:" to explicitly specify the configuration file to +/// use. /// /// \param[in] StyleName Style name to interpret according to the description /// above. diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index f3c337a92822..0ae9fa60d337 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -3181,6 +3181,8 @@ const char *StyleOptionHelpDescription = ".clang-format file located in one of the parent\n" "directories of the source file (or current\n" "directory for stdin).\n" + "Use -style=file: to explicitly specify" + "the configuration file.\n" "Use -style=\"{key: value, ...}\" to set specific\n" "parameters, e.g.:\n" " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\""; @@ -3233,6 +3235,18 @@ const char *DefaultFormatStyle = "file"; const char *DefaultFallbackStyle = "LLVM"; +llvm::ErrorOr> +loadAndParseConfigFile(StringRef ConfigFile, llvm::vfs::FileSystem *FS, + FormatStyle *Style, bool AllowUnknownOptions) { + llvm::ErrorOr> Text = + FS->getBufferForFile(ConfigFile.str()); + if (auto EC = Text.getError()) + return EC; + if (auto EC = parseConfiguration(*Text.get(), Style, AllowUnknownOptions)) + return EC; + return Text; +} + llvm::Expected getStyle(StringRef StyleName, StringRef FileName, StringRef FallbackStyleName, StringRef Code, llvm::vfs::FileSystem *FS, @@ -3263,6 +3277,28 @@ llvm::Expected getStyle(StringRef StyleName, StringRef FileName, return Style; } + // User provided clang-format file using -style=file:path/to/format/file. + if (!Style.InheritsParentConfig && + StyleName.startswith_insensitive("file:")) { + auto ConfigFile = StyleName.substr(5); + llvm::ErrorOr> Text = + loadAndParseConfigFile(ConfigFile, FS, &Style, AllowUnknownOptions); + if (auto EC = Text.getError()) + return make_string_error("Error reading " + ConfigFile + ": " + + EC.message()); + + LLVM_DEBUG(llvm::dbgs() + << "Using configuration file " << ConfigFile << "\n"); + + if (!Style.InheritsParentConfig) + return Style; + + // Search for parent configs starting from the parent directory of + // ConfigFile. + FileName = ConfigFile; + ChildFormatTextToApply.emplace_back(std::move(*Text)); + } + // If the style inherits the parent configuration it is a command line // configuration, which wants to inherit, so we have to skip the check of the // StyleName. @@ -3318,19 +3354,16 @@ llvm::Expected getStyle(StringRef StyleName, StringRef FileName, if (Status && (Status->getType() == llvm::sys::fs::file_type::regular_file)) { llvm::ErrorOr> Text = - FS->getBufferForFile(ConfigFile.str()); - if (std::error_code EC = Text.getError()) - return make_string_error(EC.message()); - if (std::error_code ec = - parseConfiguration(*Text.get(), &Style, AllowUnknownOptions)) { - if (ec == ParseError::Unsuitable) { + loadAndParseConfigFile(ConfigFile, FS, &Style, AllowUnknownOptions); + if (auto EC = Text.getError()) { + if (EC == ParseError::Unsuitable) { if (!UnsuitableConfigFiles.empty()) UnsuitableConfigFiles.append(", "); UnsuitableConfigFiles.append(ConfigFile); continue; } return make_string_error("Error reading " + ConfigFile + ": " + - ec.message()); + EC.message()); } LLVM_DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n"); diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index bb344d4383ea..7160c7a90073 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -21579,6 +21579,70 @@ TEST(FormatStyle, GetStyleOfFile) { Style.IndentWidth = 7; return Style; }()); + + // Test 9.9: use inheritance from a specific config file. + Style9 = getStyle("file:/e/sub/sub/.clang-format", "/e/sub/sub/code.cpp", + "none", "", &FS); + ASSERT_TRUE(static_cast(Style9)); + ASSERT_EQ(*Style9, SubSubStyle); +} + +TEST(FormatStyle, GetStyleOfSpecificFile) { + llvm::vfs::InMemoryFileSystem FS; + // Specify absolute path to a format file in a parent directory. + ASSERT_TRUE( + FS.addFile("/e/.clang-format", 0, + llvm::MemoryBuffer::getMemBuffer("BasedOnStyle: LLVM"))); + ASSERT_TRUE( + FS.addFile("/e/explicit.clang-format", 0, + llvm::MemoryBuffer::getMemBuffer("BasedOnStyle: Google"))); + ASSERT_TRUE(FS.addFile("/e/sub/sub/sub/test.cpp", 0, + llvm::MemoryBuffer::getMemBuffer("int i;"))); + auto Style = getStyle("file:/e/explicit.clang-format", + "/e/sub/sub/sub/test.cpp", "LLVM", "", &FS); + ASSERT_TRUE(static_cast(Style)); + ASSERT_EQ(*Style, getGoogleStyle()); + + // Specify relative path to a format file. + ASSERT_TRUE( + FS.addFile("../../e/explicit.clang-format", 0, + llvm::MemoryBuffer::getMemBuffer("BasedOnStyle: Google"))); + Style = getStyle("file:../../e/explicit.clang-format", + "/e/sub/sub/sub/test.cpp", "LLVM", "", &FS); + ASSERT_TRUE(static_cast(Style)); + ASSERT_EQ(*Style, getGoogleStyle()); + + // Specify path to a format file that does not exist. + Style = getStyle("file:/e/missing.clang-format", "/e/sub/sub/sub/test.cpp", + "LLVM", "", &FS); + ASSERT_FALSE(static_cast(Style)); + llvm::consumeError(Style.takeError()); + + // Specify path to a file on the filesystem. + SmallString<128> FormatFilePath; + std::error_code ECF = llvm::sys::fs::createTemporaryFile( + "FormatFileTest", "tpl", FormatFilePath); + EXPECT_FALSE((bool)ECF); + llvm::raw_fd_ostream FormatFileTest(FormatFilePath, ECF); + EXPECT_FALSE((bool)ECF); + FormatFileTest << "BasedOnStyle: Google\n"; + FormatFileTest.close(); + + SmallString<128> TestFilePath; + std::error_code ECT = + llvm::sys::fs::createTemporaryFile("CodeFileTest", "cc", TestFilePath); + EXPECT_FALSE((bool)ECT); + llvm::raw_fd_ostream CodeFileTest(TestFilePath, ECT); + CodeFileTest << "int i;\n"; + CodeFileTest.close(); + + std::string format_file_arg = std::string("file:") + FormatFilePath.c_str(); + Style = getStyle(format_file_arg, TestFilePath, "LLVM", "", nullptr); + + llvm::sys::fs::remove(FormatFilePath.c_str()); + llvm::sys::fs::remove(TestFilePath.c_str()); + ASSERT_TRUE(static_cast(Style)); + ASSERT_EQ(*Style, getGoogleStyle()); } TEST_F(ReplacementTest, FormatCodeAfterReplacements) { From 694e6bcd525f9ceae6f377faa43115e22e839244 Mon Sep 17 00:00:00 2001 From: Gabriel Smith Date: Mon, 3 Jan 2022 11:50:31 +0100 Subject: [PATCH 409/992] [clang-format][NFC] Correct comment about checking merging of blocks Reviewed By: HazardyKnusperkeks, MyDeveloperDay, owenpan Differential Revision: https://reviews.llvm.org/D116189 --- clang/lib/Format/UnwrappedLineFormatter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index f652a4e7088f..89f87f0375cd 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -391,7 +391,7 @@ class LineJoiner { } } - // Try to merge a block with left brace wrapped that wasn't yet covered + // Try to merge a block with left brace unwrapped that wasn't yet covered if (TheLine->Last->is(tok::l_brace)) { const FormatToken *Tok = TheLine->First; bool ShouldMerge = false; From 4ca510b1d1ea826a357ae75c36618d52016200c7 Mon Sep 17 00:00:00 2001 From: Groverkss Date: Mon, 3 Jan 2022 16:23:07 +0530 Subject: [PATCH 410/992] [MLIR] Remove dependency on IR for Simplex This patch removes unnecessary dependency on IR for Simplex. This patch allows users to use Presburger library without depending on MLIRIR. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D116530 --- mlir/include/mlir/Analysis/Presburger/Simplex.h | 1 - mlir/lib/Analysis/Presburger/CMakeLists.txt | 4 ---- 2 files changed, 5 deletions(-) diff --git a/mlir/include/mlir/Analysis/Presburger/Simplex.h b/mlir/include/mlir/Analysis/Presburger/Simplex.h index 87486a481d30..2773a6319079 100644 --- a/mlir/include/mlir/Analysis/Presburger/Simplex.h +++ b/mlir/include/mlir/Analysis/Presburger/Simplex.h @@ -17,7 +17,6 @@ #include "mlir/Analysis/Presburger/Fraction.h" #include "mlir/Analysis/Presburger/IntegerPolyhedron.h" #include "mlir/Analysis/Presburger/Matrix.h" -#include "mlir/IR/Location.h" #include "mlir/Support/LogicalResult.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" diff --git a/mlir/lib/Analysis/Presburger/CMakeLists.txt b/mlir/lib/Analysis/Presburger/CMakeLists.txt index d3187278db2f..a40ae6e53ccb 100644 --- a/mlir/lib/Analysis/Presburger/CMakeLists.txt +++ b/mlir/lib/Analysis/Presburger/CMakeLists.txt @@ -5,10 +5,6 @@ add_mlir_library(MLIRPresburger Simplex.cpp Utils.cpp - DEPENDS - MLIRBuiltinLocationAttributesIncGen - LINK_LIBS PUBLIC - MLIRIR MLIRSupport ) From cd2b050fa4995b75b9c36fae16c0d9f105b67585 Mon Sep 17 00:00:00 2001 From: mydeveloperday Date: Mon, 3 Jan 2022 11:19:02 +0000 Subject: [PATCH 411/992] [clang-format] spacesRequiredBetween is not honouring clang-format off/on https://github.com/llvm/llvm-project/issues/52881 It seems that clang-format off/on is not being honoured in regard to adding spaces. My understanding of clang-format off/on is that it marks the token as finalized based on whether formatting is currently enabled or disabled. This was causing a space to be added between the `<` and `<<` in the Cuda kernel `foo<<<1, 1>>>();` This if doesn't solve this actual issue but ensure that clang-format is at least honoured. Reviewed By: curdeius, owenpan Differential Revision: https://reviews.llvm.org/D116494 --- clang/lib/Format/TokenAnnotator.cpp | 5 +++++ clang/unittests/Format/FormatTest.cpp | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 505a7250572b..914997a54989 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -3294,6 +3294,11 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd(); }; + // If the token is finalized don't touch it (as it could be in a + // clang-format-off section). + if (Left.Finalized) + return HasExistingWhitespace(); + if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo()) return true; // Never ever merge two identifiers. diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 7160c7a90073..7a7976c8b081 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -21156,6 +21156,16 @@ TEST_F(FormatTest, SpacesInAngles) { verifyFormat("A< A< int > >();", Spaces); verifyFormat("A >();", Spaces); verifyFormat("A< A< int>>();", Spaces); + + Spaces.SpacesInAngles = FormatStyle::SIAS_Always; + verifyFormat("// clang-format off\n" + "foo<<<1, 1>>>();\n" + "// clang-format on\n", + Spaces); + verifyFormat("// clang-format off\n" + "foo< < <1, 1> > >();\n" + "// clang-format on\n", + Spaces); } TEST_F(FormatTest, SpaceAfterTemplateKeyword) { From 8c1e520c903e0b7e758f8fbf4f1c3824f0d3efad Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 11 Feb 2021 11:19:44 +0000 Subject: [PATCH 412/992] [AArch64] Adding "armv8.8-a" BC instruction. This instruction is described in the Arm A64 Instruction Set Architecture documentation available here: https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BC-cond--Branch-Consistent-conditionally-?lang=en FEAT_HBC "Hinted Conditional Branches" is listed in the 2021 A-Profile Architecture Extensions: https://developer.arm.com/architectures/cpu-architecture/a-profile/exploration-tools/feature-names-for-a-profile 'BC.cc', where 'cc' is any ordinary condition code, is an instruction that looks exactly like B.cc (the normal conditional branch), except that bit 4 of the encoding is 1 rather than 0, which hints something to the branch predictor (specifically, that this branch is expected to be highly consistent, even though _which way_ it will consistently go is not known at compile time). This commit introduces a special subtarget feature for HBC, which is a dependency of the top-level 8.8-A feature, and uses that to enable the new BC instruction. Differential Revision: https://reviews.llvm.org/D116156 --- llvm/lib/Target/AArch64/AArch64.td | 5 +- .../lib/Target/AArch64/AArch64InstrFormats.td | 10 +-- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 9 ++- llvm/lib/Target/AArch64/AArch64Subtarget.h | 4 + .../AArch64/AsmParser/AArch64AsmParser.cpp | 2 +- llvm/test/MC/AArch64/armv8.8a-hbc.s | 75 +++++++++++++++++++ .../MC/Disassembler/AArch64/armv8.8a-hbc.txt | 49 ++++++++++++ 7 files changed, 146 insertions(+), 8 deletions(-) create mode 100644 llvm/test/MC/AArch64/armv8.8a-hbc.s create mode 100644 llvm/test/MC/Disassembler/AArch64/armv8.8a-hbc.txt diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index bc5f0c090d43..7a5dd4f6ae9c 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -416,6 +416,9 @@ def FeatureHCX : SubtargetFeature< def FeatureLS64 : SubtargetFeature<"ls64", "HasLS64", "true", "Enable Armv8.7-A LD64B/ST64B Accelerator Extension">; +def FeatureHBC : SubtargetFeature<"hbc", "HasHBC", + "true", "Enable Armv8.8-A Hinted Conditional Branches Extension">; + def FeatureBRBE : SubtargetFeature<"brbe", "HasBRBE", "true", "Enable Branch Record Buffer Extension">; @@ -499,7 +502,7 @@ def HasV8_7aOps : SubtargetFeature< def HasV8_8aOps : SubtargetFeature< "v8.8a", "HasV8_8aOps", "true", "Support ARM v8.8a instructions", - [HasV8_7aOps]>; + [HasV8_7aOps, FeatureHBC]>; def HasV9_0aOps : SubtargetFeature< "v9a", "HasV9_0aOps", "true", "Support ARM v9a instructions", diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index f8d492188744..7433552f7a53 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1816,10 +1816,10 @@ def am_brcond : Operand { let OperandType = "OPERAND_PCREL"; } -class BranchCond : I<(outs), (ins ccode:$cond, am_brcond:$target), - "b", ".$cond\t$target", "", - [(AArch64brcond bb:$target, imm:$cond, NZCV)]>, - Sched<[WriteBr]> { +class BranchCond + : I<(outs), (ins ccode:$cond, am_brcond:$target), + mnemonic, ".$cond\t$target", "", + [(AArch64brcond bb:$target, imm:$cond, NZCV)]>, Sched<[WriteBr]> { let isBranch = 1; let isTerminator = 1; let Uses = [NZCV]; @@ -1828,7 +1828,7 @@ class BranchCond : I<(outs), (ins ccode:$cond, am_brcond:$target), bits<19> target; let Inst{31-24} = 0b01010100; let Inst{23-5} = target; - let Inst{4} = 0; + let Inst{4} = bit4; let Inst{3-0} = cond; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index efdc8e6f1be8..3fc94a0114c3 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -198,6 +198,8 @@ def HasBRBE : Predicate<"Subtarget->hasBRBE()">, AssemblerPredicate<(all_of FeatureBRBE), "brbe">; def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">, AssemblerPredicate<(all_of FeatureSPE_EEF), "spe-eef">; +def HasHBC : Predicate<"Subtarget->hasHBC()">, + AssemblerPredicate<(all_of FeatureHBC), "hbc">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsWindows : Predicate<"Subtarget->isTargetWindows()">; @@ -2362,7 +2364,12 @@ def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), //===----------------------------------------------------------------------===// // Conditional branch (immediate) instruction. //===----------------------------------------------------------------------===// -def Bcc : BranchCond; +def Bcc : BranchCond<0, "b">; + +// Armv8.8-A variant form which hints to the branch predictor that +// this branch is very likely to go the same way nearly all the time +// (even though it is not known at compile time _which_ way that is). +def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>; //===----------------------------------------------------------------------===// // Compare-and-branch instructions. diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 336c92d73e3e..27cc99424ec3 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -189,6 +189,9 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool HasHCX = false; bool HasLS64 = false; + // Armv8.8-A Extensions + bool HasHBC = false; + // Arm SVE2 extensions bool HasSVE2 = false; bool HasSVE2AES = false; @@ -573,6 +576,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool hasRCPC_IMMO() const { return HasRCPC_IMMO; } bool hasEL2VMSA() const { return HasEL2VMSA; } bool hasEL3() const { return HasEL3; } + bool hasHBC() const { return HasHBC; } bool fixCortexA53_835769() const { return FixCortexA53_835769; } diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 4f8f8078b69d..13ff5e5b1d7e 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -4533,7 +4533,7 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, Mnemonic = Head; // Handle condition codes for a branch mnemonic - if (Head == "b" && Next != StringRef::npos) { + if ((Head == "b" || Head == "bc") && Next != StringRef::npos) { Start = Next; Next = Name.find('.', Start + 1); Head = Name.slice(Start + 1, Next); diff --git a/llvm/test/MC/AArch64/armv8.8a-hbc.s b/llvm/test/MC/AArch64/armv8.8a-hbc.s new file mode 100644 index 000000000000..c4c8d5feb34a --- /dev/null +++ b/llvm/test/MC/AArch64/armv8.8a-hbc.s @@ -0,0 +1,75 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+hbc < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.8a < %s | FileCheck %s +// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck --check-prefix=CHECK-NO-HBC-ERR %s + +lbl: + bc.eq lbl + bc.ne lbl + bc.cs lbl + bc.hs lbl + bc.lo lbl + bc.cc lbl + bc.mi lbl + bc.pl lbl + bc.vs lbl + bc.vc lbl + bc.hi lbl + bc.ls lbl + bc.ge lbl + bc.lt lbl + bc.gt lbl + bc.le lbl + bc.al lbl + +// CHECK: bc.eq lbl // encoding: [0bAAA10000,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: bc.ne lbl // encoding: [0bAAA10001,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: bc.hs lbl // encoding: [0bAAA10010,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: bc.hs lbl // encoding: [0bAAA10010,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: bc.lo lbl // encoding: [0bAAA10011,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: bc.lo lbl // encoding: [0bAAA10011,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: bc.mi lbl // encoding: [0bAAA10100,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: bc.pl lbl // encoding: [0bAAA10101,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: bc.vs lbl // encoding: [0bAAA10110,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: bc.vc lbl // encoding: [0bAAA10111,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: bc.hi lbl // encoding: [0bAAA11000,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: bc.ls lbl // encoding: [0bAAA11001,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: bc.ge lbl // encoding: [0bAAA11010,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: bc.lt lbl // encoding: [0bAAA11011,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: bc.gt lbl // encoding: [0bAAA11100,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: bc.le lbl // encoding: [0bAAA11101,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 +// CHECK: bc.al lbl // encoding: [0bAAA11110,A,A,0x54] +// CHECK: // fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19 + +// CHECK-NO-HBC-ERR: [[@LINE-53]]:9: error: instruction requires: hbc +// CHECK-NO-HBC-ERR: [[@LINE-53]]:9: error: instruction requires: hbc +// CHECK-NO-HBC-ERR: [[@LINE-53]]:9: error: instruction requires: hbc +// CHECK-NO-HBC-ERR: [[@LINE-53]]:9: error: instruction requires: hbc +// CHECK-NO-HBC-ERR: [[@LINE-53]]:9: error: instruction requires: hbc +// CHECK-NO-HBC-ERR: [[@LINE-53]]:9: error: instruction requires: hbc +// CHECK-NO-HBC-ERR: [[@LINE-53]]:9: error: instruction requires: hbc +// CHECK-NO-HBC-ERR: [[@LINE-53]]:9: error: instruction requires: hbc +// CHECK-NO-HBC-ERR: [[@LINE-53]]:9: error: instruction requires: hbc +// CHECK-NO-HBC-ERR: [[@LINE-53]]:9: error: instruction requires: hbc +// CHECK-NO-HBC-ERR: [[@LINE-53]]:9: error: instruction requires: hbc +// CHECK-NO-HBC-ERR: [[@LINE-53]]:9: error: instruction requires: hbc +// CHECK-NO-HBC-ERR: [[@LINE-53]]:9: error: instruction requires: hbc +// CHECK-NO-HBC-ERR: [[@LINE-53]]:9: error: instruction requires: hbc +// CHECK-NO-HBC-ERR: [[@LINE-53]]:9: error: instruction requires: hbc +// CHECK-NO-HBC-ERR: [[@LINE-53]]:9: error: instruction requires: hbc +// CHECK-NO-HBC-ERR: [[@LINE-53]]:9: error: instruction requires: hbc diff --git a/llvm/test/MC/Disassembler/AArch64/armv8.8a-hbc.txt b/llvm/test/MC/Disassembler/AArch64/armv8.8a-hbc.txt new file mode 100644 index 000000000000..c58c292785b0 --- /dev/null +++ b/llvm/test/MC/Disassembler/AArch64/armv8.8a-hbc.txt @@ -0,0 +1,49 @@ +# RUN: llvm-mc -triple=aarch64 -mattr=+hbc -disassemble %s | FileCheck %s +# RUN: llvm-mc -triple=aarch64 -mattr=+v8.8a -disassemble %s | FileCheck %s +# RUN: not llvm-mc -triple=aarch64 -disassemble %s 2>&1 | FileCheck --check-prefix=ERROR-NO-HBC %s + +[0x30,0x00,0x00,0x54] +# CHECK: bc.eq #4 +# ERROR-NO-HBC: [[@LINE-2]]:2: warning: invalid instruction encoding +[0x51,0x00,0x00,0x54] +# CHECK: bc.ne #8 +# ERROR-NO-HBC: [[@LINE-2]]:2: warning: invalid instruction encoding +[0x92,0x00,0x00,0x54] +# CHECK: bc.hs #16 +# ERROR-NO-HBC: [[@LINE-2]]:2: warning: invalid instruction encoding +[0x13,0x01,0x00,0x54] +# CHECK: bc.lo #32 +# ERROR-NO-HBC: [[@LINE-2]]:2: warning: invalid instruction encoding +[0x14,0x02,0x00,0x54] +# CHECK: bc.mi #64 +# ERROR-NO-HBC: [[@LINE-2]]:2: warning: invalid instruction encoding +[0x15,0x04,0x00,0x54] +# CHECK: bc.pl #128 +# ERROR-NO-HBC: [[@LINE-2]]:2: warning: invalid instruction encoding +[0x16,0x08,0x00,0x54] +# CHECK: bc.vs #256 +# ERROR-NO-HBC: [[@LINE-2]]:2: warning: invalid instruction encoding +[0x17,0x10,0x00,0x54] +# CHECK: bc.vc #512 +# ERROR-NO-HBC: [[@LINE-2]]:2: warning: invalid instruction encoding +[0x18,0x20,0x00,0x54] +# CHECK: bc.hi #1024 +# ERROR-NO-HBC: [[@LINE-2]]:2: warning: invalid instruction encoding +[0x19,0x40,0x00,0x54] +# CHECK: bc.ls #2048 +# ERROR-NO-HBC: [[@LINE-2]]:2: warning: invalid instruction encoding +[0x1a,0x80,0x00,0x54] +# CHECK: bc.ge #4096 +# ERROR-NO-HBC: [[@LINE-2]]:2: warning: invalid instruction encoding +[0x1b,0x00,0x01,0x54] +# CHECK: bc.lt #8192 +# ERROR-NO-HBC: [[@LINE-2]]:2: warning: invalid instruction encoding +[0x1c,0x00,0x02,0x54] +# CHECK: bc.gt #16384 +# ERROR-NO-HBC: [[@LINE-2]]:2: warning: invalid instruction encoding +[0x1d,0x00,0x04,0x54] +# CHECK: bc.le #32768 +# ERROR-NO-HBC: [[@LINE-2]]:2: warning: invalid instruction encoding +[0x1e,0x00,0x08,0x54] +# CHECK: bc.al #65536 +# ERROR-NO-HBC: [[@LINE-2]]:2: warning: invalid instruction encoding From cd7f621a0aa474d6953a50226aca38d35fcac895 Mon Sep 17 00:00:00 2001 From: Lucas Prates Date: Thu, 2 Sep 2021 16:12:55 +0100 Subject: [PATCH 413/992] [ARM][AArch64] Introduce Armv9.3-A This patch introduces support for targetting the Armv9.3-A architecture, which should map to the existing Armv8.8-A extensions. Differential Revision: https://reviews.llvm.org/D116158 --- llvm/include/llvm/ADT/Triple.h | 1 + .../llvm/Support/AArch64TargetParser.def | 6 ++++++ llvm/include/llvm/Support/ARMTargetParser.def | 5 +++++ llvm/lib/Support/AArch64TargetParser.cpp | 2 ++ llvm/lib/Support/ARMTargetParser.cpp | 3 +++ llvm/lib/Support/Triple.cpp | 2 ++ llvm/lib/Target/AArch64/AArch64.td | 4 ++++ llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 ++ llvm/lib/Target/AArch64/AArch64Subtarget.h | 2 ++ .../AArch64/AsmParser/AArch64AsmParser.cpp | 3 +++ llvm/lib/Target/ARM/ARM.td | 17 +++++++++++++++++ llvm/lib/Target/ARM/ARMSubtarget.h | 3 +++ .../Target/ARM/MCTargetDesc/ARMELFStreamer.cpp | 1 + llvm/unittests/Support/TargetParserTest.cpp | 1 + 14 files changed, 52 insertions(+) diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h index 45a8b8d92714..0f0a7b08b5d3 100644 --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -107,6 +107,7 @@ class Triple { enum SubArchType { NoSubArch, + ARMSubArch_v9_3a, ARMSubArch_v9_2a, ARMSubArch_v9_1a, ARMSubArch_v9, diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index 4a5a41eefed2..9d45f6abae6b 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -83,6 +83,12 @@ AARCH64_ARCH("armv9.2-a", ARMV9_2A, "9.2-A", "v9.2a", AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE | AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | AArch64::AEK_SVE2)) +AARCH64_ARCH("armv9.3-a", ARMV9_3A, "9.3-A", "v9.3", + ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, + (AArch64::AEK_CRC | AArch64::AEK_FP | + AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE | + AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | + AArch64::AEK_SVE2)) // For v8-R, we do not enable crypto and align with GCC that enables a more // minimal set of optional architecture extensions. AARCH64_ARCH("armv8-r", ARMV8R, "8-R", "v8r", diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def index 13841036d9bd..433d7fdc2c3b 100644 --- a/llvm/include/llvm/Support/ARMTargetParser.def +++ b/llvm/include/llvm/Support/ARMTargetParser.def @@ -143,6 +143,11 @@ ARM_ARCH("armv9.2-a", ARMV9_2A, "9.2-A", "v9.2a", (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM)) +ARM_ARCH("armv9.3-a", ARMV9_3A, "9.3-A", "v9.3a", + ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, + (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | + ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM)) ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R, FK_NEON_FP_ARMV8, (ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | diff --git a/llvm/lib/Support/AArch64TargetParser.cpp b/llvm/lib/Support/AArch64TargetParser.cpp index b436b9b2ca24..42a941ca08e6 100644 --- a/llvm/lib/Support/AArch64TargetParser.cpp +++ b/llvm/lib/Support/AArch64TargetParser.cpp @@ -144,6 +144,8 @@ bool AArch64::getArchFeatures(AArch64::ArchKind AK, Features.push_back("+v9.1a"); if (AK == AArch64::ArchKind::ARMV9_2A) Features.push_back("+v9.2a"); + if (AK == AArch64::ArchKind::ARMV9_3A) + Features.push_back("+v9.3a"); if(AK == AArch64::ArchKind::ARMV8R) Features.push_back("+v8r"); diff --git a/llvm/lib/Support/ARMTargetParser.cpp b/llvm/lib/Support/ARMTargetParser.cpp index 7521d3e4c147..d7294b5b1074 100644 --- a/llvm/lib/Support/ARMTargetParser.cpp +++ b/llvm/lib/Support/ARMTargetParser.cpp @@ -86,6 +86,7 @@ unsigned ARM::parseArchVersion(StringRef Arch) { case ArchKind::ARMV9A: case ArchKind::ARMV9_1A: case ArchKind::ARMV9_2A: + case ArchKind::ARMV9_3A: return 9; case ArchKind::INVALID: return 0; @@ -122,6 +123,7 @@ ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) { case ArchKind::ARMV9A: case ArchKind::ARMV9_1A: case ArchKind::ARMV9_2A: + case ArchKind::ARMV9_3A: return ProfileKind::A; case ArchKind::ARMV2: case ArchKind::ARMV2A: @@ -171,6 +173,7 @@ StringRef ARM::getArchSynonym(StringRef Arch) { .Cases("v9", "v9a", "v9-a") .Case("v9.1a", "v9.1-a") .Case("v9.2a", "v9.2-a") + .Case("v9.3a", "v9.3-a") .Case("v8m.base", "v8-m.base") .Case("v8m.main", "v8-m.main") .Case("v8.1m.main", "v8.1-m.main") diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp index dca39716a5f5..1452fa62f5fd 100644 --- a/llvm/lib/Support/Triple.cpp +++ b/llvm/lib/Support/Triple.cpp @@ -671,6 +671,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { return Triple::ARMSubArch_v9_1a; case ARM::ArchKind::ARMV9_2A: return Triple::ARMSubArch_v9_2a; + case ARM::ArchKind::ARMV9_3A: + return Triple::ARMSubArch_v9_3a; case ARM::ArchKind::ARMV8R: return Triple::ARMSubArch_v8r; case ARM::ArchKind::ARMV8MBaseline: diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 7a5dd4f6ae9c..22e0c490b506 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -516,6 +516,10 @@ def HasV9_2aOps : SubtargetFeature< "v9.2a", "HasV9_2aOps", "true", "Support ARM v9.2a instructions", [HasV8_7aOps, HasV9_1aOps]>; +def HasV9_3aOps : SubtargetFeature< + "v9.3a", "HasV9_3aOps", "true", "Support ARM v9.3a instructions", + [HasV8_8aOps, HasV9_2aOps]>; + def HasV8_0rOps : SubtargetFeature< "v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions", [//v8.1 diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 3fc94a0114c3..7ee2a74491c5 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -33,6 +33,8 @@ def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">, AssemblerPredicate<(all_of HasV9_1aOps), "armv9.1a">; def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">, AssemblerPredicate<(all_of HasV9_2aOps), "armv9.2a">; +def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">, + AssemblerPredicate<(all_of HasV9_3aOps), "armv9.3a">; def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">, AssemblerPredicate<(all_of HasV8_0rOps), "armv8-r">; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 27cc99424ec3..bcd3d873985f 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -98,6 +98,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool HasV9_0aOps = false; bool HasV9_1aOps = false; bool HasV9_2aOps = false; + bool HasV9_3aOps = false; bool HasV8_0rOps = false; bool HasCONTEXTIDREL2 = false; @@ -369,6 +370,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool hasV9_0aOps() const { return HasV9_0aOps; } bool hasV9_1aOps() const { return HasV9_1aOps; } bool hasV9_2aOps() const { return HasV9_2aOps; } + bool hasV9_3aOps() const { return HasV9_3aOps; } bool hasV8_0rOps() const { return HasV8_0rOps; } bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; } diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 13ff5e5b1d7e..4bd08f29864b 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -3315,6 +3315,8 @@ static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) { Str += "ARMv9.1a"; else if (FBS[AArch64::HasV9_2aOps]) Str += "ARMv9.2a"; + else if (FBS[AArch64::HasV9_3aOps]) + Str += "ARMv9.3a"; else if (FBS[AArch64::HasV8_0rOps]) Str += "ARMv8r"; else { @@ -5937,6 +5939,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind, case AArch64::ArchKind::ARMV9A: case AArch64::ArchKind::ARMV9_1A: case AArch64::ArchKind::ARMV9_2A: + case AArch64::ArchKind::ARMV9_3A: case AArch64::ArchKind::ARMV8R: RequestedExtensions.push_back("sm4"); RequestedExtensions.push_back("sha3"); diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index fa7b415447c5..101d3c160ba5 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -569,6 +569,10 @@ def HasV9_2aOps : SubtargetFeature<"v9.2a", "HasV9_2aOps", "true", "Support ARM v9.2a instructions", [HasV8_7aOps, HasV9_1aOps]>; +def HasV9_3aOps : SubtargetFeature<"v9.3a", "HasV9_3aOps", "true", + "Support ARM v9.3a instructions", + [HasV8_8aOps, HasV9_2aOps]>; + def HasV8_1MMainlineOps : SubtargetFeature< "v8.1m.main", "HasV8_1MMainlineOps", "true", "Support ARM v8-1M Mainline instructions", @@ -948,6 +952,19 @@ def ARMv92a : Architecture<"armv9.2-a", "ARMv92a", [HasV9_2aOps, FeatureCRC, FeatureRAS, FeatureDotProd]>; +def ARMv93a : Architecture<"armv9.3-a", "ARMv93a", [HasV9_3aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS, + FeatureDotProd]>; def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops, FeatureRClass, diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 0a4dc099bd84..1c2b7ee6ba35 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -130,6 +130,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo { ARMv9a, ARMv91a, ARMv92a, + ARMv93a, }; public: @@ -180,6 +181,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo { bool HasV9_0aOps = false; bool HasV9_1aOps = false; bool HasV9_2aOps = false; + bool HasV9_3aOps = false; bool HasV8MBaselineOps = false; bool HasV8MMainlineOps = false; bool HasV8_1MMainlineOps = false; @@ -641,6 +643,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo { bool hasV9_0aOps() const { return HasV9_0aOps; } bool hasV9_1aOps() const { return HasV9_1aOps; } bool hasV9_2aOps() const { return HasV9_2aOps; } + bool hasV9_3aOps() const { return HasV9_3aOps; } bool hasV8MBaselineOps() const { return HasV8MBaselineOps; } bool hasV8MMainlineOps() const { return HasV8MMainlineOps; } bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; } diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 0de5bf5d2d49..16bc0ca179a7 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -786,6 +786,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { case ARM::ArchKind::ARMV9A: case ARM::ArchKind::ARMV9_1A: case ARM::ArchKind::ARMV9_2A: + case ARM::ArchKind::ARMV9_3A: S.setAttributeItem(CPU_arch_profile, ApplicationProfile, false); S.setAttributeItem(ARM_ISA_use, Allowed, false); S.setAttributeItem(THUMB_ISA_use, AllowThumb32, false); diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp index b21ead171a64..3ea7ed4f2c30 100644 --- a/llvm/unittests/Support/TargetParserTest.cpp +++ b/llvm/unittests/Support/TargetParserTest.cpp @@ -847,6 +847,7 @@ TEST(TargetParserTest, ARMparseArchProfile) { case ARM::ArchKind::ARMV9A: case ARM::ArchKind::ARMV9_1A: case ARM::ArchKind::ARMV9_2A: + case ARM::ArchKind::ARMV9_3A: EXPECT_EQ(ARM::ProfileKind::A, ARM::parseArchProfile(ARMArch[i])); break; default: From d76279404073e676a31f592d87a2f60306a00a12 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Mon, 6 Dec 2021 15:49:23 +0000 Subject: [PATCH 414/992] [IR] Allow the 'align' param attr on vectors of pointers This patch extends the available uses of the 'align' parameter attribute to include vectors of pointers. The attribute specifies pointer alignment element-wise. This change was previously requested and discussed in D87304. The vector predication (VP) intrinsics intend to use this for scatter and gather operations, as they lack the explicit alignment parameter that the masked versions use. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D115161 --- llvm/docs/LangRef.rst | 14 ++++++++------ llvm/lib/IR/Attributes.cpp | 9 ++++++--- llvm/test/Bitcode/attributes.ll | 6 ++++++ 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index ce3af423f9f0..8c72e3255ab5 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1210,12 +1210,14 @@ Currently, only the following parameter attributes are defined: .. _attr_align: ``align `` or ``align()`` - This indicates that the pointer value has the specified alignment. - If the pointer value does not have the specified alignment, - :ref:`poison value ` is returned or passed instead. The - ``align`` attribute should be combined with the ``noundef`` attribute to - ensure a pointer is aligned, or otherwise the behavior is undefined. Note - that ``align 1`` has no effect on non-byval, non-preallocated arguments. + This indicates that the pointer value or vector of pointers has the + specified alignment. If applied to a vector of pointers, *all* pointers + (elements) have the specified alignment. If the pointer value does not have + the specified alignment, :ref:`poison value ` is returned or + passed instead. The ``align`` attribute should be combined with the + ``noundef`` attribute to ensure a pointer is aligned, or otherwise the + behavior is undefined. Note that ``align 1`` has no effect on non-byval, + non-preallocated arguments. Note that this attribute has additional semantics when combined with the ``byval`` or ``preallocated`` attribute, which are documented there. diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp index c899afae6cce..2c917e46dfde 100644 --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -1839,12 +1839,12 @@ AttrBuilder AttributeFuncs::typeIncompatible(Type *Ty) { AttrBuilder Incompatible; if (!Ty->isIntegerTy()) - // Attribute that only apply to integers. + // Attributes that only apply to integers. Incompatible.addAttribute(Attribute::SExt) .addAttribute(Attribute::ZExt); if (!Ty->isPointerTy()) - // Attribute that only apply to pointers. + // Attributes that only apply to pointers. Incompatible.addAttribute(Attribute::Nest) .addAttribute(Attribute::NoAlias) .addAttribute(Attribute::NoCapture) @@ -1852,7 +1852,6 @@ AttrBuilder AttributeFuncs::typeIncompatible(Type *Ty) { .addAttribute(Attribute::ReadNone) .addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::SwiftError) - .addAlignmentAttr(1) // the int here is ignored .addDereferenceableAttr(1) // the int here is ignored .addDereferenceableOrNullAttr(1) // the int here is ignored .addPreallocatedAttr(Ty) @@ -1862,6 +1861,10 @@ AttrBuilder AttributeFuncs::typeIncompatible(Type *Ty) { .addByRefAttr(Ty) .addTypeAttr(Attribute::ElementType, Ty); + if (!Ty->isPtrOrPtrVectorTy()) + // Attributes that only apply to pointers or vectors of pointers. + Incompatible.addAlignmentAttr(1); // the int here is ignored + // Some attributes can apply to all "values" but there are no `void` values. if (Ty->isVoidTy()) Incompatible.addAttribute(Attribute::NoUndef); diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll index ec4f903e710e..b2b92bb6e12d 100644 --- a/llvm/test/Bitcode/attributes.ll +++ b/llvm/test/Bitcode/attributes.ll @@ -510,6 +510,12 @@ define void @f82(i32* %0) ret void; } +; CHECK-TYPED: define void @f83(<4 x i8*> align 32 %0, align 64 %1) +; CHECK-OPQUE: define void @f83(<4 x ptr> align 32 %0, align 64 %1) +define void @f83(<4 x i8*> align 32 %0, align 64 %1) { + ret void +} + ; CHECK: attributes #0 = { noreturn } ; CHECK: attributes #1 = { nounwind } ; CHECK: attributes #2 = { readnone } From 587495ffa1a4240179c0f0959c1709f23cc52513 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 3 Jan 2022 14:06:01 +0100 Subject: [PATCH 415/992] [CodeExtractor] Separate function from param/ret attributes (NFC) This list is confusing because it conflates functions attributes (which are either extractable or not) and other attribute kinds, which are simply irrelevant for this code. --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 58 +++++++++++---------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 96aff563aa9b..4669f4bb4a18 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -893,53 +893,26 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, } else switch (Attr.getKindAsEnum()) { // Those attributes cannot be propagated safely. Explicitly list them - // here so we get a warning if new attributes are added. This list also - // includes non-function attributes. - case Attribute::Alignment: + // here so we get a warning if new attributes are added. case Attribute::AllocSize: case Attribute::ArgMemOnly: case Attribute::Builtin: - case Attribute::ByVal: case Attribute::Convergent: - case Attribute::Dereferenceable: - case Attribute::DereferenceableOrNull: - case Attribute::ElementType: - case Attribute::InAlloca: - case Attribute::InReg: case Attribute::InaccessibleMemOnly: case Attribute::InaccessibleMemOrArgMemOnly: case Attribute::JumpTable: case Attribute::Naked: - case Attribute::Nest: - case Attribute::NoAlias: case Attribute::NoBuiltin: - case Attribute::NoCapture: case Attribute::NoMerge: case Attribute::NoReturn: case Attribute::NoSync: - case Attribute::NoUndef: - case Attribute::None: - case Attribute::NonNull: - case Attribute::Preallocated: case Attribute::ReadNone: case Attribute::ReadOnly: - case Attribute::Returned: case Attribute::ReturnsTwice: - case Attribute::SExt: case Attribute::Speculatable: case Attribute::StackAlignment: - case Attribute::StructRet: - case Attribute::SwiftError: - case Attribute::SwiftSelf: - case Attribute::SwiftAsync: case Attribute::WillReturn: case Attribute::WriteOnly: - case Attribute::ZExt: - case Attribute::ImmArg: - case Attribute::ByRef: - case Attribute::EndAttrKinds: - case Attribute::EmptyKey: - case Attribute::TombstoneKey: continue; // Those attributes should be safe to propagate to the extracted function. case Attribute::AlwaysInline: @@ -980,6 +953,35 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::MustProgress: case Attribute::NoProfile: break; + // These attributes cannot be applied to functions. + case Attribute::Alignment: + case Attribute::ByVal: + case Attribute::Dereferenceable: + case Attribute::DereferenceableOrNull: + case Attribute::ElementType: + case Attribute::InAlloca: + case Attribute::InReg: + case Attribute::Nest: + case Attribute::NoAlias: + case Attribute::NoCapture: + case Attribute::NoUndef: + case Attribute::NonNull: + case Attribute::Preallocated: + case Attribute::Returned: + case Attribute::SExt: + case Attribute::StructRet: + case Attribute::SwiftError: + case Attribute::SwiftSelf: + case Attribute::SwiftAsync: + case Attribute::ZExt: + case Attribute::ImmArg: + case Attribute::ByRef: + // These are not really attributes. + case Attribute::None: + case Attribute::EndAttrKinds: + case Attribute::EmptyKey: + case Attribute::TombstoneKey: + llvm_unreachable("Not a function attribute"); } newFunction->addFnAttr(Attr); From b5c84626bb650235fc0902f02621de928e5faf80 Mon Sep 17 00:00:00 2001 From: mydeveloperday Date: Mon, 3 Jan 2022 13:04:51 +0000 Subject: [PATCH 416/992] [clang-format] NFC update LLVM overall clang-formatted status A 1% increase in the number of clang-formatted files. An additional 235 files have been added to LLVM, and an additional 268 files are now clang-format clean. Raising the overall % to 52% There are now 8407 files clean out of 15902 (ignoring lit tests) --- clang/docs/ClangFormattedStatus.rst | 1007 +++++++++++--------- clang/docs/tools/clang-formatted-files.txt | 395 ++++++-- 2 files changed, 888 insertions(+), 514 deletions(-) diff --git a/clang/docs/ClangFormattedStatus.rst b/clang/docs/ClangFormattedStatus.rst index c0f8cb8bd3b1..14c3638468ac 100644 --- a/clang/docs/ClangFormattedStatus.rst +++ b/clang/docs/ClangFormattedStatus.rst @@ -17,7 +17,7 @@ Clang Formatted Status ====================== :doc:`ClangFormattedStatus` describes the state of LLVM source -tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:36:33 (`976bb4724c04 `_). +tree in terms of conformance to :doc:`ClangFormat` as of: January 03, 2022 11:33:59 (`cd2b050fa499 `_). .. list-table:: LLVM Clang-Format Status @@ -80,10 +80,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `2` - :none:`0%` * - clang/include/clang/Analysis/FlowSensitive - - `2` - - `1` + - `7` + - `6` - `1` - - :part:`50%` + - :part:`85%` * - clang/include/clang/Analysis/Support - `1` - `0` @@ -266,9 +266,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :part:`80%` * - clang/include/clang/Tooling/Refactoring/Extract - `2` - - `1` - - `1` - - :part:`50%` + - `2` + - `0` + - :good:`100%` * - clang/include/clang/Tooling/Refactoring/Rename - `6` - `5` @@ -299,6 +299,11 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `4` - `24` - :part:`14%` + * - clang/lib/Analysis/FlowSensitive + - `2` + - `2` + - `0` + - :good:`100%` * - clang/lib/Analysis/plugins/CheckerDependencyHandling - `1` - `1` @@ -395,10 +400,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `12` - :part:`14%` * - clang/lib/Driver/ToolChains - - `91` - - `36` - - `55` - - :part:`39%` + - `95` + - `41` + - `54` + - :part:`43%` * - clang/lib/Driver/ToolChains/Arch - `20` - `7` @@ -430,15 +435,15 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - :none:`0%` * - clang/lib/Headers - - `145` - - `15` + - `146` + - `16` - `130` - :part:`10%` * - clang/lib/Headers/openmp_wrappers - `5` - - `4` - - `1` - - :part:`80%` + - `5` + - `0` + - :good:`100%` * - clang/lib/Headers/ppc_wrappers - `7` - `2` @@ -476,9 +481,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :none:`0%` * - clang/lib/Sema - `55` - - `4` - - `51` - - :part:`7%` + - `5` + - `50` + - :part:`9%` * - clang/lib/Serialization - `17` - `2` @@ -546,9 +551,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :none:`0%` * - clang/lib/Tooling/DependencyScanning - `5` - - `2` - `3` - - :part:`40%` + - `2` + - :part:`60%` * - clang/lib/Tooling/DumpTool - `4` - `3` @@ -729,6 +734,11 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `2` - `4` - :part:`33%` + * - clang/unittests/Analysis/FlowSensitive + - `5` + - `5` + - `0` + - :good:`100%` * - clang/unittests/AST - `30` - `8` @@ -901,9 +911,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :good:`100%` * - clang-tools-extra/clang-include-fixer - `13` - - `7` - - `6` - - :part:`53%` + - `8` + - `5` + - :part:`61%` * - clang-tools-extra/clang-include-fixer/find-all-symbols - `17` - `13` @@ -956,14 +966,14 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :none:`0%` * - clang-tools-extra/clang-tidy - `18` - - `11` - - `7` - - :part:`61%` + - `12` + - `6` + - :part:`66%` * - clang-tools-extra/clang-tidy/abseil - `42` - - `30` - - `12` - - :part:`71%` + - `31` + - `11` + - :part:`73%` * - clang-tools-extra/clang-tidy/altera - `11` - `9` @@ -980,8 +990,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - clang-tools-extra/clang-tidy/bugprone - - `119` - - `99` + - `121` + - `101` - `20` - :part:`83%` * - clang-tools-extra/clang-tidy/cert @@ -1050,10 +1060,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - clang-tools-extra/clang-tidy/objc - - `15` - - `10` + - `17` + - `12` - `5` - - :part:`66%` + - :part:`70%` * - clang-tools-extra/clang-tidy/openmp - `5` - `5` @@ -1180,8 +1190,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - clang-tools-extra/clangd/unittests - - `79` - - `66` + - `78` + - `65` - `13` - :part:`83%` * - clang-tools-extra/clangd/unittests/decision_forest_model @@ -1276,9 +1286,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :none:`0%` * - clang-tools-extra/unittests/clang-tidy - `16` - - `7` - - `9` - - :part:`43%` + - `8` + - `8` + - :part:`50%` * - clang-tools-extra/unittests/include/common - `1` - `0` @@ -1301,9 +1311,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :part:`66%` * - compiler-rt/lib/asan - `56` - - `3` - - `53` - - :part:`5%` + - `4` + - `52` + - :part:`7%` * - compiler-rt/lib/asan/tests - `17` - `1` @@ -1401,9 +1411,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :part:`33%` * - compiler-rt/lib/lsan - `20` - - `2` - - `18` - - :part:`10%` + - `4` + - `16` + - :part:`20%` * - compiler-rt/lib/memprof - `32` - `31` @@ -1445,20 +1455,20 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `2` - :part:`33%` * - compiler-rt/lib/sanitizer_common - - `164` - - `24` - - `140` - - :part:`14%` + - `167` + - `28` + - `139` + - :part:`16%` * - compiler-rt/lib/sanitizer_common/symbolizer - `2` - `2` - `0` - :good:`100%` * - compiler-rt/lib/sanitizer_common/tests - - `44` - - `9` - - `35` - - :part:`20%` + - `46` + - `12` + - `34` + - :part:`26%` * - compiler-rt/lib/scudo - `20` - `0` @@ -1486,9 +1496,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :good:`100%` * - compiler-rt/lib/scudo/standalone/tests - `25` - - `24` - - `1` - - :part:`96%` + - `25` + - `0` + - :good:`100%` * - compiler-rt/lib/scudo/standalone/tools - `1` - `1` @@ -1515,6 +1525,11 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - :none:`0%` * - compiler-rt/lib/tsan/rtl + - `59` + - `14` + - `45` + - :part:`23%` + * - compiler-rt/lib/tsan/rtl-old - `61` - `13` - `48` @@ -1525,10 +1540,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `10` - :none:`0%` * - compiler-rt/lib/tsan/tests/unit - - `12` + - `11` - `3` - - `9` - - :part:`25%` + - `8` + - :part:`27%` * - compiler-rt/lib/ubsan - `27` - `7` @@ -1540,10 +1555,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - :none:`0%` * - compiler-rt/lib/xray - - `39` + - `40` - `27` - - `12` - - :part:`69%` + - `13` + - :part:`67%` * - compiler-rt/lib/xray/tests/unit - `10` - `8` @@ -1560,15 +1575,20 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - :none:`0%` * - cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty - - `7` + - `8` - `0` - - `7` + - `8` - :none:`0%` * - cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect - `5` - `0` - `5` - :none:`0%` + * - cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address + - `7` + - `0` + - `7` + - :none:`0%` * - cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source - `1` - `1` @@ -1626,9 +1646,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :none:`0%` * - cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb - `2` - - `2` - - `0` - - :good:`100%` + - `1` + - `1` + - :part:`50%` * - flang/examples - `1` - `1` @@ -1685,8 +1705,13 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - flang/include/flang/Optimizer/Builder - - `6` - - `6` + - `7` + - `7` + - `0` + - :good:`100%` + * - flang/include/flang/Optimizer/Builder/Runtime + - `8` + - `8` - `0` - :good:`100%` * - flang/include/flang/Optimizer/CodeGen @@ -1715,10 +1740,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - :part:`94%` * - flang/include/flang/Runtime - - `24` - - `23` + - `27` + - `26` - `1` - - :part:`95%` + - :part:`96%` * - flang/include/flang/Semantics - `9` - `8` @@ -1759,6 +1784,11 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `6` - `0` - :good:`100%` + * - flang/lib/Optimizer/Builder/Runtime + - `7` + - `7` + - `0` + - :good:`100%` * - flang/lib/Optimizer/CodeGen - `10` - `9` @@ -1775,8 +1805,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - flang/lib/Optimizer/Transforms - - `8` - - `8` + - `11` + - `11` - `0` - :good:`100%` * - flang/lib/Parser @@ -1795,8 +1825,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - flang/runtime - - `70` - - `69` + - `73` + - `72` - `1` - :part:`98%` * - flang/tools/f18 @@ -1849,9 +1879,14 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `4` - `0` - :good:`100%` + * - flang/unittests/Optimizer/Builder/Runtime + - `8` + - `8` + - `0` + - :good:`100%` * - flang/unittests/Runtime - - `20` - - `20` + - `21` + - `21` - `0` - :good:`100%` * - libc/AOR_v20.02/math @@ -1960,8 +1995,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - libc/src/math - - `87` - - `87` + - `88` + - `88` - `0` - :good:`100%` * - libc/src/math/aarch64 @@ -1970,8 +2005,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - libc/src/math/generic - - `88` - - `88` + - `89` + - `89` - `0` - :good:`100%` * - libc/src/math/x86_64 @@ -1995,8 +2030,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - libc/src/stdlib - - `39` - - `39` + - `41` + - `41` - `0` - :good:`100%` * - libc/src/stdlib/linux @@ -2010,8 +2045,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - libc/src/string/memory_utils - - `7` - - `7` + - `8` + - `8` - `0` - :good:`100%` * - libc/src/sys/mman @@ -2031,9 +2066,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :good:`100%` * - libc/src/threads/linux - `16` - - `8` - - `8` - - :part:`50%` + - `9` + - `7` + - :part:`56%` * - libc/src/time - `12` - `12` @@ -2060,8 +2095,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - libc/src/__support/FPUtil - - `19` - - `19` + - `16` + - `16` - `0` - :good:`100%` * - libc/src/__support/FPUtil/aarch64 @@ -2075,8 +2110,23 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - libc/src/__support/FPUtil/x86_64 - - `2` - - `2` + - `6` + - `6` + - `0` + - :good:`100%` + * - libc/src/__support/OSUtil + - `3` + - `3` + - `0` + - :good:`100%` + * - libc/src/__support/OSUtil/linux + - `3` + - `3` + - `0` + - :good:`100%` + * - libc/src/__support/OSUtil/linux/x86_64 + - `1` + - `1` - `0` - :good:`100%` * - libc/utils/HdrGen @@ -2255,10 +2305,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - :none:`0%` * - libcxx/benchmarks - - `23` - - `5` + - `27` + - `9` - `18` - - :part:`21%` + - :part:`33%` * - libcxx/include - `23` - `0` @@ -2294,6 +2344,11 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - `4` - :none:`0%` + * - libcxx/include/__filesystem + - `16` + - `3` + - `13` + - :part:`18%` * - libcxx/include/__format - `15` - `0` @@ -2310,14 +2365,19 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `32` - :none:`0%` * - libcxx/include/__memory - - `15` - - `0` - - `15` - - :none:`0%` + - `18` + - `1` + - `17` + - :part:`5%` + * - libcxx/include/__numeric + - `13` + - `4` + - `9` + - :part:`30%` * - libcxx/include/__random - - `35` + - `36` - `0` - - `35` + - `36` - :none:`0%` * - libcxx/include/__ranges - `25` @@ -2375,9 +2435,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - :none:`0%` * - libcxx/include/__utility - - `15` + - `16` - `0` - - `15` + - `16` - :none:`0%` * - libcxx/include/__variant - `1` @@ -2385,25 +2445,35 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - :none:`0%` * - libcxx/src - - `39` - - `3` + - `41` + - `5` - `36` - - :part:`7%` + - :part:`12%` * - libcxx/src/experimental + - `2` - `1` - - `0` - `1` - - :none:`0%` + - :part:`50%` * - libcxx/src/filesystem - `5` - `0` - `5` - :none:`0%` * - libcxx/src/include + - `6` + - `1` - `5` - - `0` - - `5` - - :none:`0%` + - :part:`16%` + * - libcxx/src/include/ryu + - `9` + - `8` + - `1` + - :part:`88%` + * - libcxx/src/ryu + - `3` + - `3` + - `0` + - :good:`100%` * - libcxx/src/support/ibm - `3` - `0` @@ -2419,21 +2489,6 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - `3` - :none:`0%` - * - libcxx/utils/google-benchmark/cmake - - `5` - - `1` - - `4` - - :part:`20%` - * - libcxx/utils/google-benchmark/include/benchmark - - `1` - - `0` - - `1` - - :none:`0%` - * - libcxx/utils/google-benchmark/src - - `21` - - `18` - - `3` - - :part:`85%` * - libcxxabi/fuzz - `1` - `0` @@ -2481,9 +2536,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :part:`80%` * - lld/ELF - `48` - - `24` - - `24` - - :part:`50%` + - `25` + - `23` + - :part:`52%` * - lld/ELF/Arch - `14` - `4` @@ -2499,36 +2554,6 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `4` - `16` - :part:`20%` - * - lld/include/lld/ReaderWriter - - `2` - - `0` - - `2` - - :none:`0%` - * - lld/lib/Core - - `8` - - `2` - - `6` - - :part:`25%` - * - lld/lib/Driver - - `1` - - `0` - - `1` - - :none:`0%` - * - lld/lib/ReaderWriter - - `1` - - `0` - - `1` - - :none:`0%` - * - lld/lib/ReaderWriter/MachO - - `30` - - `1` - - `29` - - :part:`3%` - * - lld/lib/ReaderWriter/YAML - - `1` - - `0` - - `1` - - :none:`0%` * - lld/MachO - `43` - `41` @@ -2549,16 +2574,6 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - `0` - :good:`100%` - * - lld/unittests/DriverTests - - `1` - - `0` - - `1` - - :none:`0%` - * - lld/unittests/MachOTests - - `4` - - `0` - - `4` - - :none:`0%` * - lld/wasm - `29` - `15` @@ -2615,10 +2630,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `16` - :part:`36%` * - lldb/include/lldb/Core - - `60` - - `31` + - `61` + - `32` - `29` - - :part:`51%` + - :part:`52%` * - lldb/include/lldb/DataFormatters - `18` - `10` @@ -2704,7 +2719,7 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `40` - `23` - :part:`63%` - * - lldb/source + * - lldb/include/lldb/Version - `1` - `1` - `0` @@ -2725,10 +2740,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `13` - :part:`81%` * - lldb/source/Core - - `48` - - `26` - - `22` - - :part:`54%` + - `49` + - `25` + - `24` + - :part:`51%` * - lldb/source/DataFormatters - `16` - `3` @@ -2960,10 +2975,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - lldb/source/Plugins/Language/CPlusPlus - - `29` - - `18` + - `30` + - `19` - `11` - - :part:`62%` + - :part:`63%` * - lldb/source/Plugins/Language/ObjC - `21` - `14` @@ -3104,6 +3119,11 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - `2` - :none:`0%` + * - lldb/source/Plugins/Platform/QemuUser + - `2` + - `2` + - `0` + - :good:`100%` * - lldb/source/Plugins/Platform/Windows - `2` - `1` @@ -3119,6 +3139,11 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `12` - `4` - :part:`75%` + * - lldb/source/Plugins/Process/FreeBSDKernel + - `10` + - `8` + - `2` + - :part:`80%` * - lldb/source/Plugins/Process/gdb-remote - `26` - `14` @@ -3195,10 +3220,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - :part:`50%` * - lldb/source/Plugins/ScriptInterpreter/Lua - - `4` - - `3` - - `1` - - :part:`75%` + - `5` + - `5` + - `0` + - :good:`100%` * - lldb/source/Plugins/ScriptInterpreter/None - `2` - `2` @@ -3206,9 +3231,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :good:`100%` * - lldb/source/Plugins/ScriptInterpreter/Python - `16` - - `11` - - `5` - - :part:`68%` + - `12` + - `4` + - :part:`75%` * - lldb/source/Plugins/StructuredData/DarwinLog - `2` - `0` @@ -3221,14 +3246,14 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :none:`0%` * - lldb/source/Plugins/SymbolFile/DWARF - `65` - - `37` - - `28` - - :part:`56%` + - `38` + - `27` + - :part:`58%` * - lldb/source/Plugins/SymbolFile/NativePDB - `20` - - `11` - - `9` - - :part:`55%` + - `10` + - `10` + - :part:`50%` * - lldb/source/Plugins/SymbolFile/PDB - `6` - `4` @@ -3296,9 +3321,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :part:`50%` * - lldb/source/Symbol - `31` - - `17` - - `14` - - :part:`54%` + - `18` + - `13` + - :part:`58%` * - lldb/source/Target - `68` - `33` @@ -3309,6 +3334,11 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `45` - `13` - :part:`77%` + * - lldb/source/Version + - `1` + - `1` + - `0` + - :good:`100%` * - lldb/tools/argdumper - `1` - `1` @@ -3401,9 +3431,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :good:`100%` * - lldb/unittests/Core - `10` - - `8` - - `2` - - :part:`80%` + - `9` + - `1` + - :part:`90%` * - lldb/unittests/DataFormatter - `3` - `3` @@ -3426,9 +3456,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :good:`100%` * - lldb/unittests/Expression - `5` - - `2` - `3` - - :part:`40%` + - `2` + - :part:`60%` * - lldb/unittests/Host - `15` - `10` @@ -3536,29 +3566,29 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :part:`66%` * - lldb/unittests/ScriptInterpreter/Lua - `2` - - `1` - - `1` - - :part:`50%` + - `2` + - `0` + - :good:`100%` * - lldb/unittests/ScriptInterpreter/Python - `3` - - `1` - `2` - - :part:`33%` + - `1` + - :part:`66%` * - lldb/unittests/Signals - `1` - `1` - `0` - :good:`100%` * - lldb/unittests/Symbol - - `8` - - `4` + - `11` + - `7` - `4` - - :part:`50%` + - :part:`63%` * - lldb/unittests/SymbolFile/DWARF - - `5` - - `3` + - `6` + - `4` - `2` - - :part:`60%` + - :part:`66%` * - lldb/unittests/SymbolFile/DWARF/Inputs - `1` - `1` @@ -3630,10 +3660,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - :none:`0%` * - lldb/unittests/Utility - - `45` + - `46` - `32` - - `13` - - :part:`71%` + - `14` + - :part:`69%` * - lldb/utils/lit-cpuid - `1` - `0` @@ -3865,15 +3895,15 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `6` - :part:`25%` * - llvm/include/llvm/ADT - - `88` - - `24` - - `64` - - :part:`27%` + - `91` + - `26` + - `65` + - :part:`28%` * - llvm/include/llvm/Analysis - - `125` - - `46` - - `79` - - :part:`36%` + - `129` + - `51` + - `78` + - :part:`39%` * - llvm/include/llvm/Analysis/Utils - `3` - `1` @@ -3900,10 +3930,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `3` - :none:`0%` * - llvm/include/llvm/CodeGen - - `153` - - `47` + - `156` + - `50` - `106` - - :part:`30%` + - :part:`32%` * - llvm/include/llvm/CodeGen/GlobalISel - `29` - `9` @@ -3931,9 +3961,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :part:`70%` * - llvm/include/llvm/DebugInfo/DWARF - `32` - - `15` - - `17` - - :part:`46%` + - `14` + - `18` + - :part:`43%` * - llvm/include/llvm/DebugInfo/GSYM - `14` - `4` @@ -3946,9 +3976,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :part:`80%` * - llvm/include/llvm/DebugInfo/PDB - `50` - - `7` - - `43` - - :part:`14%` + - `30` + - `20` + - :part:`60%` * - llvm/include/llvm/DebugInfo/PDB/DIA - `20` - `9` @@ -3964,6 +3994,11 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `2` - `1` - :part:`66%` + * - llvm/include/llvm/Debuginfod + - `2` + - `2` + - `0` + - :good:`100%` * - llvm/include/llvm/Demangle - `7` - `3` @@ -3986,14 +4021,14 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :part:`16%` * - llvm/include/llvm/ExecutionEngine/JITLink - `16` - - `14` - - `2` - - :part:`87%` + - `13` + - `3` + - :part:`81%` * - llvm/include/llvm/ExecutionEngine/Orc - - `37` - - `27` + - `38` + - `28` - `10` - - :part:`72%` + - :part:`73%` * - llvm/include/llvm/ExecutionEngine/Orc/Shared - `7` - `4` @@ -4010,10 +4045,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - llvm/include/llvm/Frontend/OpenMP + - `5` - `4` - - `4` - - `0` - - :good:`100%` + - `1` + - :part:`80%` * - llvm/include/llvm/FuzzMutate - `6` - `0` @@ -4025,10 +4060,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - llvm/include/llvm/IR - - `91` - - `25` - - `66` - - :part:`27%` + - `92` + - `28` + - `64` + - :part:`30%` * - llvm/include/llvm/IRReader - `1` - `0` @@ -4056,9 +4091,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :none:`0%` * - llvm/include/llvm/MC - `74` - - `22` - - `52` - - :part:`29%` + - `23` + - `51` + - :part:`31%` * - llvm/include/llvm/MC/MCDisassembler - `4` - `1` @@ -4081,9 +4116,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :part:`66%` * - llvm/include/llvm/MCA/Stages - `8` - - `7` - - `1` - - :part:`87%` + - `8` + - `0` + - :good:`100%` * - llvm/include/llvm/Object - `31` - `12` @@ -4105,10 +4140,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `2` - :part:`50%` * - llvm/include/llvm/ProfileData - - `8` - - `3` + - `10` - `5` - - :part:`37%` + - `5` + - :part:`50%` * - llvm/include/llvm/ProfileData/Coverage - `3` - `2` @@ -4121,9 +4156,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :part:`91%` * - llvm/include/llvm/Support - `182` - - `65` - - `117` - - :part:`35%` + - `67` + - `115` + - :part:`36%` * - llvm/include/llvm/Support/FileSystem - `1` - `1` @@ -4200,15 +4235,15 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `11` - :part:`71%` * - llvm/include/llvm/Transforms/Scalar - - `74` - - `46` + - `75` + - `47` - `28` - :part:`62%` * - llvm/include/llvm/Transforms/Utils - - `71` - - `41` + - `73` + - `43` - `30` - - :part:`57%` + - :part:`58%` * - llvm/include/llvm/Transforms/Vectorize - `5` - `1` @@ -4230,20 +4265,20 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `4` - :part:`76%` * - llvm/include/llvm-c - - `26` + - `27` - `12` - - `14` - - :part:`46%` + - `15` + - :part:`44%` * - llvm/include/llvm-c/Transforms - `9` - `3` - `6` - :part:`33%` * - llvm/lib/Analysis - - `117` - - `38` + - `119` + - `40` - `79` - - :part:`32%` + - :part:`33%` * - llvm/lib/AsmParser - `3` - `1` @@ -4256,9 +4291,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :part:`75%` * - llvm/lib/Bitcode/Reader - `7` - - `1` - - `6` - - :part:`14%` + - `2` + - `5` + - :part:`28%` * - llvm/lib/Bitcode/Writer - `5` - `0` @@ -4270,15 +4305,15 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - :none:`0%` * - llvm/lib/CodeGen - - `209` - - `51` + - `215` + - `57` - `158` - - :part:`24%` + - :part:`26%` * - llvm/lib/CodeGen/AsmPrinter - `45` - - `17` - - `28` - - :part:`37%` + - `18` + - `27` + - :part:`40%` * - llvm/lib/CodeGen/GlobalISel - `26` - `9` @@ -4339,6 +4374,11 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `2` - `2` - :part:`50%` + * - llvm/lib/Debuginfod + - `2` + - `2` + - `0` + - :good:`100%` * - llvm/lib/Demangle - `6` - `4` @@ -4346,9 +4386,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :part:`66%` * - llvm/lib/DWARFLinker - `4` - - `4` - - `0` - - :good:`100%` + - `3` + - `1` + - :part:`75%` * - llvm/lib/DWP - `2` - `2` @@ -4385,10 +4425,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `2` - :none:`0%` * - llvm/lib/ExecutionEngine/Orc - - `36` - - `20` - - `16` - - :part:`55%` + - `37` + - `22` + - `15` + - :part:`59%` * - llvm/lib/ExecutionEngine/Orc/Shared - `3` - `3` @@ -4445,10 +4485,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - llvm/lib/IR - - `68` - - `19` + - `69` + - `20` - `49` - - :part:`27%` + - :part:`28%` * - llvm/lib/IRReader - `1` - `0` @@ -4516,14 +4556,14 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :none:`0%` * - llvm/lib/Passes - `6` - - `4` - - `2` - - :part:`66%` + - `3` + - `3` + - :part:`50%` * - llvm/lib/ProfileData - - `8` - - `2` + - `10` + - `4` - `6` - - :part:`25%` + - :part:`40%` * - llvm/lib/ProfileData/Coverage - `3` - `0` @@ -4536,9 +4576,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :part:`76%` * - llvm/lib/Support - `141` - - `57` - - `84` - - :part:`40%` + - `58` + - `83` + - :part:`41%` * - llvm/lib/Support/Unix - `1` - `0` @@ -4556,9 +4596,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :none:`0%` * - llvm/lib/Target/AArch64 - `60` - - `7` - - `53` - - :part:`11%` + - `6` + - `54` + - :part:`10%` * - llvm/lib/Target/AArch64/AsmParser - `1` - `0` @@ -4645,10 +4685,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - llvm/lib/Target/ARM - - `75` - - `8` - - `67` - - :part:`10%` + - `76` + - `10` + - `66` + - :part:`13%` * - llvm/lib/Target/ARM/AsmParser - `1` - `0` @@ -4701,9 +4741,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :good:`100%` * - llvm/lib/Target/BPF - `32` - - `8` - - `24` - - :part:`25%` + - `9` + - `23` + - :part:`28%` * - llvm/lib/Target/BPF/AsmParser - `1` - `0` @@ -4746,9 +4786,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :good:`100%` * - llvm/lib/Target/Hexagon - `80` - - `4` - - `76` - - :part:`5%` + - `6` + - `74` + - :part:`7%` * - llvm/lib/Target/Hexagon/AsmParser - `1` - `0` @@ -4825,10 +4865,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - llvm/lib/Target/Mips - - `69` - - `11` + - `70` + - `12` - `58` - - :part:`15%` + - :part:`17%` * - llvm/lib/Target/Mips/AsmParser - `1` - `0` @@ -4946,9 +4986,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :good:`100%` * - llvm/lib/Target/Sparc - `23` - - `2` - - `21` - - :part:`8%` + - `3` + - `20` + - :part:`13%` * - llvm/lib/Target/Sparc/AsmParser - `1` - `0` @@ -5011,9 +5051,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :good:`100%` * - llvm/lib/Target/VE/MCTargetDesc - `14` - - `13` - - `1` - - :part:`92%` + - `14` + - `0` + - :good:`100%` * - llvm/lib/Target/VE/TargetInfo - `2` - `1` @@ -5146,9 +5186,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :part:`36%` * - llvm/lib/Transforms/IPO - `44` - - `11` - - `33` - - :part:`25%` + - `10` + - `34` + - :part:`22%` * - llvm/lib/Transforms/ObjCARC - `15` - `4` @@ -5156,14 +5196,14 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :part:`26%` * - llvm/lib/Transforms/Scalar - `79` - - `15` - - `64` - - :part:`18%` - * - llvm/lib/Transforms/Utils - - `75` - `16` + - `63` + - :part:`20%` + * - llvm/lib/Transforms/Utils + - `77` + - `18` - `59` - - :part:`21%` + - :part:`23%` * - llvm/lib/Transforms/Vectorize - `22` - `13` @@ -5284,6 +5324,11 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - `1` - :none:`0%` + * - llvm/tools/llvm-debuginfod-find + - `1` + - `1` + - `0` + - :good:`100%` * - llvm/tools/llvm-diff - `1` - `0` @@ -5516,9 +5561,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :none:`0%` * - llvm/tools/llvm-profgen - `11` - - `8` - - `3` - - :part:`72%` + - `7` + - `4` + - :part:`63%` * - llvm/tools/llvm-rc - `12` - `6` @@ -5675,10 +5720,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `46` - :part:`41%` * - llvm/unittests/Analysis - - `37` - - `11` - - `26` - - :part:`29%` + - `38` + - `13` + - `25` + - :part:`34%` * - llvm/unittests/AsmParser - `1` - `1` @@ -5700,10 +5745,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - :part:`50%` * - llvm/unittests/CodeGen - - `18` - - `8` + - `19` + - `9` - `10` - - :part:`44%` + - :part:`47%` * - llvm/unittests/CodeGen/GlobalISel - `13` - `2` @@ -5739,6 +5784,11 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - `0` - :good:`100%` + * - llvm/unittests/Debuginfod + - `2` + - `2` + - `0` + - :good:`100%` * - llvm/unittests/Demangle - `7` - `5` @@ -5771,9 +5821,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :none:`0%` * - llvm/unittests/Frontend - `4` - - `3` - - `1` - - :part:`75%` + - `4` + - `0` + - :good:`100%` * - llvm/unittests/FuzzMutate - `4` - `0` @@ -5855,10 +5905,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `3` - :part:`62%` * - llvm/unittests/Support - - `97` - - `32` + - `98` + - `33` - `65` - - :part:`32%` + - :part:`33%` * - llvm/unittests/Support/CommandLineInit - `1` - `1` @@ -5979,21 +6029,6 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `7` - `1` - :part:`87%` - * - llvm/utils/benchmark/cmake - - `5` - - `3` - - `2` - - :part:`60%` - * - llvm/utils/benchmark/include/benchmark - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/utils/benchmark/src - - `19` - - `0` - - `19` - - :none:`0%` * - llvm/utils/FileCheck - `1` - `0` @@ -6250,18 +6285,18 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - mlir/include/mlir/Analysis - - `15` - - `13` + - `14` + - `12` - `2` - - :part:`86%` + - :part:`85%` * - mlir/include/mlir/Analysis/AliasAnalysis - `1` - `1` - `0` - :good:`100%` * - mlir/include/mlir/Analysis/Presburger - - `3` - - `3` + - `6` + - `6` - `0` - :good:`100%` * - mlir/include/mlir/Bindings/Python @@ -6304,6 +6339,11 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - `0` - :good:`100%` + * - mlir/include/mlir/Conversion/BufferizationToMemRef + - `1` + - `1` + - `0` + - :good:`100%` * - mlir/include/mlir/Conversion/ComplexToLLVM - `1` - `1` @@ -6325,8 +6365,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - :none:`0%` * - mlir/include/mlir/Conversion/GPUToROCDL - - `1` - - `1` + - `2` + - `2` - `0` - :good:`100%` * - mlir/include/mlir/Conversion/GPUToSPIRV @@ -6530,8 +6570,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - mlir/include/mlir/Dialect/Async - - `1` - - `1` + - `2` + - `2` - `0` - :good:`100%` * - mlir/include/mlir/Dialect/Async/IR @@ -6544,6 +6584,11 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `2` - `0` - :good:`100%` + * - mlir/include/mlir/Dialect/Bufferization/Transforms + - `2` + - `2` + - `0` + - :good:`100%` * - mlir/include/mlir/Dialect/Complex/IR - `1` - `1` @@ -6575,13 +6620,13 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize - - `7` - - `7` - - `0` - - :good:`100%` + - `10` + - `9` + - `1` + - :part:`90%` * - mlir/include/mlir/Dialect/Linalg/IR - - `3` - - `3` + - `2` + - `2` - `0` - :good:`100%` * - mlir/include/mlir/Dialect/Linalg/Transforms @@ -6655,10 +6700,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - mlir/include/mlir/Dialect/SCF + - `5` - `4` - - `3` - `1` - - :part:`75%` + - :part:`80%` * - mlir/include/mlir/Dialect/Shape/IR - `1` - `1` @@ -6720,8 +6765,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - mlir/include/mlir/Dialect/Tensor/IR - - `1` - - `1` + - `2` + - `2` - `0` - :good:`100%` * - mlir/include/mlir/Dialect/Tensor/Transforms @@ -6801,9 +6846,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :part:`60%` * - mlir/include/mlir/TableGen - `21` - - `18` - - `3` - - :part:`85%` + - `19` + - `2` + - :part:`90%` * - mlir/include/mlir/Target/Cpp - `1` - `1` @@ -6879,11 +6924,21 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - `0` - :good:`100%` + * - mlir/include/mlir/Tools/PDLL/AST + - `4` + - `2` + - `2` + - :part:`50%` + * - mlir/include/mlir/Tools/PDLL/Parser + - `1` + - `1` + - `0` + - :good:`100%` * - mlir/include/mlir/Transforms - - `13` - - `11` + - `12` + - `10` - `2` - - :part:`84%` + - :part:`83%` * - mlir/include/mlir-c - `15` - `15` @@ -6900,8 +6955,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - mlir/lib/Analysis - - `15` - - `15` + - `14` + - `14` - `0` - :good:`100%` * - mlir/lib/Analysis/AliasAnalysis @@ -6910,15 +6965,15 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - mlir/lib/Analysis/Presburger - - `2` - - `2` + - `5` + - `5` - `0` - :good:`100%` * - mlir/lib/Bindings/Python - `22` - - `22` - - `0` - - :good:`100%` + - `21` + - `1` + - :part:`95%` * - mlir/lib/Bindings/Python/Conversions - `1` - `1` @@ -6999,6 +7054,11 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - `0` - :good:`100%` + * - mlir/lib/Conversion/BufferizationToMemRef + - `1` + - `0` + - `1` + - :none:`0%` * - mlir/lib/Conversion/ComplexToLLVM - `1` - `1` @@ -7036,9 +7096,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :good:`100%` * - mlir/lib/Conversion/LinalgToLLVM - `1` - - `0` - `1` - - :none:`0%` + - `0` + - :good:`100%` * - mlir/lib/Conversion/LinalgToSPIRV - `2` - `1` @@ -7095,8 +7155,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - mlir/lib/Conversion/PDLToPDLInterp - - `5` - - `5` + - `7` + - `7` - `0` - :good:`100%` * - mlir/lib/Conversion/ReconcileUnrealizedCasts @@ -7150,8 +7210,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - mlir/lib/Conversion/TosaToLinalg - - `2` - - `2` + - `4` + - `4` - `0` - :good:`100%` * - mlir/lib/Conversion/TosaToSCF @@ -7259,6 +7319,11 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `3` - `0` - :good:`100%` + * - mlir/lib/Dialect/Bufferization/Transforms + - `3` + - `3` + - `0` + - :good:`100%` * - mlir/lib/Dialect/Complex/IR - `2` - `2` @@ -7290,18 +7355,18 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - mlir/lib/Dialect/Linalg/ComprehensiveBufferize - - `7` - - `7` - - `0` - - :good:`100%` + - `10` + - `9` + - `1` + - :part:`90%` * - mlir/lib/Dialect/Linalg/IR - `3` - `3` - `0` - :good:`100%` * - mlir/lib/Dialect/Linalg/Transforms - - `22` - - `22` + - `23` + - `23` - `0` - :good:`100%` * - mlir/lib/Dialect/Linalg/Utils @@ -7385,10 +7450,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - mlir/lib/Dialect/SCF/Transforms + - `12` - `11` - - `10` - `1` - - :part:`90%` + - :part:`91%` * - mlir/lib/Dialect/Shape/IR - `1` - `1` @@ -7450,8 +7515,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - mlir/lib/Dialect/Tensor/IR - - `2` - - `2` + - `3` + - `3` - `0` - :good:`100%` * - mlir/lib/Dialect/Tensor/Transforms @@ -7461,12 +7526,12 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :good:`100%` * - mlir/lib/Dialect/Tosa/IR - `1` - - `0` - `1` - - :none:`0%` + - `0` + - :good:`100%` * - mlir/lib/Dialect/Tosa/Transforms - - `3` - - `3` + - `4` + - `4` - `0` - :good:`100%` * - mlir/lib/Dialect/Tosa/Utils @@ -7480,10 +7545,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - mlir/lib/Dialect/Vector + - `9` - `8` - - `7` - `1` - - :part:`87%` + - :part:`88%` * - mlir/lib/Dialect/X86Vector/IR - `1` - `1` @@ -7506,9 +7571,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :good:`100%` * - mlir/lib/IR - `37` - - `35` - - `2` - - :part:`94%` + - `34` + - `3` + - :part:`91%` * - mlir/lib/Parser - `14` - `14` @@ -7516,9 +7581,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :good:`100%` * - mlir/lib/Pass - `8` - - `7` - - `1` - - :part:`87%` + - `6` + - `2` + - :part:`75%` * - mlir/lib/Reducer - `4` - `4` @@ -7624,16 +7689,26 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - `0` - :good:`100%` + * - mlir/lib/Tools/PDLL/AST + - `6` + - `5` + - `1` + - :part:`83%` + * - mlir/lib/Tools/PDLL/Parser + - `3` + - `1` + - `2` + - :part:`33%` * - mlir/lib/Transforms - - `21` - - `17` - - `4` - - :part:`80%` + - `19` + - `16` + - `3` + - :part:`84%` * - mlir/lib/Transforms/Utils - `8` - - `7` - - `1` - - :part:`87%` + - `8` + - `0` + - :good:`100%` * - mlir/lib/Translation - `1` - `1` @@ -7659,6 +7734,11 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - `0` - :good:`100%` + * - mlir/tools/mlir-pdll + - `1` + - `1` + - `0` + - :good:`100%` * - mlir/tools/mlir-reduce - `1` - `1` @@ -7675,8 +7755,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - mlir/tools/mlir-tblgen - - `27` - - `26` + - `29` + - `28` - `1` - :part:`96%` * - mlir/tools/mlir-translate @@ -7690,13 +7770,18 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - mlir/unittests/Analysis - - `6` - - `6` + - `5` + - `5` - `0` - :good:`100%` * - mlir/unittests/Analysis/Presburger - - `2` - - `2` + - `4` + - `4` + - `0` + - :good:`100%` + * - mlir/unittests/Conversion/PDLToPDLInterp + - `1` + - `1` - `0` - :good:`100%` * - mlir/unittests/Dialect @@ -7709,6 +7794,11 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - `0` - :good:`100%` + * - mlir/unittests/Dialect/SCF + - `1` + - `1` + - `0` + - :good:`100%` * - mlir/unittests/Dialect/SparseTensor - `1` - `1` @@ -7735,8 +7825,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - mlir/unittests/IR - - `6` - - `6` + - `7` + - `7` - `0` - :good:`100%` * - mlir/unittests/Pass @@ -7760,8 +7850,8 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `2` - :part:`60%` * - mlir/unittests/Transforms - - `1` - - `1` + - `2` + - `2` - `0` - :good:`100%` * - openmp/libompd/src @@ -7776,9 +7866,9 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - :good:`100%` * - openmp/libomptarget/DeviceRTL/src - `12` - - `11` - - `1` - - :part:`91%` + - `10` + - `2` + - :part:`83%` * - openmp/libomptarget/deviceRTLs - `2` - `2` @@ -7815,15 +7905,15 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - openmp/libomptarget/include - - `6` - - `6` + - `8` + - `8` - `0` - :good:`100%` * - openmp/libomptarget/plugins/amdgpu/dynamic_hsa - `3` - - `3` - - `0` - - :good:`100%` + - `2` + - `1` + - :part:`66%` * - openmp/libomptarget/plugins/amdgpu/impl - `13` - `10` @@ -7885,10 +7975,10 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `0` - :good:`100%` * - openmp/libomptarget/src - - `8` - `6` + - `4` - `2` - - :part:`75%` + - :part:`66%` * - openmp/libomptarget/tools/deviceinfo - `1` - `1` @@ -8069,6 +8159,21 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `8` - `3` - :part:`72%` + * - third-party/benchmark/cmake + - `5` + - `1` + - `4` + - :part:`20%` + * - third-party/benchmark/include/benchmark + - `1` + - `0` + - `1` + - :none:`0%` + * - third-party/benchmark/src + - `21` + - `21` + - `0` + - :good:`100%` * - utils/bazel/llvm-project-overlay/clang/include/clang/Config - `1` - `1` @@ -8080,7 +8185,7 @@ tree in terms of conformance to :doc:`ClangFormat` as of: November 25, 2021 16:3 - `1` - :part:`50%` * - Total - - :total:`15667` - - :total:`8139` - - :total:`7528` - - :total:`51%` + - :total:`15902` + - :total:`8407` + - :total:`7495` + - :total:`52%` diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt index 96a12ff7ae85..c7defa9cd88c 100644 --- a/clang/docs/tools/clang-formatted-files.txt +++ b/clang/docs/tools/clang-formatted-files.txt @@ -11,7 +11,12 @@ clang/include/clang/Analysis/MacroExpansionContext.h clang/include/clang/Analysis/Analyses/CalledOnceCheck.h clang/include/clang/Analysis/Analyses/CFGReachabilityAnalysis.h clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h +clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h +clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h +clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +clang/include/clang/Analysis/FlowSensitive/DataflowLattice.h clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h +clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h clang/include/clang/APINotes/APINotesYAMLCompiler.h clang/include/clang/APINotes/Types.h clang/include/clang/AST/AST.h @@ -150,6 +155,7 @@ clang/include/clang/Tooling/Refactoring/RefactoringOptions.h clang/include/clang/Tooling/Refactoring/RefactoringOptionVisitor.h clang/include/clang/Tooling/Refactoring/RefactoringRuleContext.h clang/include/clang/Tooling/Refactoring/Extract/Extract.h +clang/include/clang/Tooling/Refactoring/Extract/SourceExtraction.h clang/include/clang/Tooling/Refactoring/Rename/SymbolName.h clang/include/clang/Tooling/Refactoring/Rename/SymbolOccurrences.h clang/include/clang/Tooling/Refactoring/Rename/USRFinder.h @@ -173,6 +179,8 @@ clang/lib/Analysis/CalledOnceCheck.cpp clang/lib/Analysis/CloneDetection.cpp clang/lib/Analysis/CodeInjector.cpp clang/lib/Analysis/ExprMutationAnalyzer.cpp +clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp +clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp clang/lib/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp clang/lib/APINotes/APINotesFormat.h @@ -265,6 +273,11 @@ clang/lib/Driver/ToolChains/CrossWindows.h clang/lib/Driver/ToolChains/DragonFly.h clang/lib/Driver/ToolChains/FreeBSD.cpp clang/lib/Driver/ToolChains/FreeBSD.h +clang/lib/Driver/ToolChains/HIPAMD.h +clang/lib/Driver/ToolChains/HIPSPV.cpp +clang/lib/Driver/ToolChains/HIPSPV.h +clang/lib/Driver/ToolChains/HIPUtility.cpp +clang/lib/Driver/ToolChains/HIPUtility.h clang/lib/Driver/ToolChains/Hurd.cpp clang/lib/Driver/ToolChains/Hurd.h clang/lib/Driver/ToolChains/InterfaceStubs.cpp @@ -332,6 +345,7 @@ clang/lib/Frontend/FrontendOptions.cpp clang/lib/Frontend/InterfaceStubFunctionsConsumer.cpp clang/lib/Frontend/SerializedDiagnosticReader.cpp clang/lib/Headers/amxintrin.h +clang/lib/Headers/arm_neon_sve_bridge.h clang/lib/Headers/avx512fp16intrin.h clang/lib/Headers/avx512vlfp16intrin.h clang/lib/Headers/builtins.h @@ -350,6 +364,7 @@ clang/lib/Headers/openmp_wrappers/complex.h clang/lib/Headers/openmp_wrappers/complex_cmath.h clang/lib/Headers/openmp_wrappers/math.h clang/lib/Headers/openmp_wrappers/time.h +clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h clang/lib/Headers/ppc_wrappers/mmintrin.h clang/lib/Headers/ppc_wrappers/smmintrin.h clang/lib/Index/FileIndexRecord.cpp @@ -364,6 +379,7 @@ clang/lib/Lex/PreprocessorLexer.cpp clang/lib/Parse/ParseOpenMP.cpp clang/lib/Sema/CodeCompleteConsumer.cpp clang/lib/Sema/CoroutineStmtBuilder.h +clang/lib/Sema/SemaOpenMP.cpp clang/lib/Sema/SemaSYCL.cpp clang/lib/Sema/UsedDeclVisitor.h clang/lib/Serialization/InMemoryModuleCache.cpp @@ -416,6 +432,7 @@ clang/lib/Tooling/FixIt.cpp clang/lib/Tooling/GuessTargetAndModeCompilationDatabase.cpp clang/lib/Tooling/NodeIntrospection.cpp clang/lib/Tooling/StandaloneExecution.cpp +clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp clang/lib/Tooling/DumpTool/APIData.h @@ -466,6 +483,11 @@ clang/tools/libclang/CXCursor.h clang/tools/scan-build-py/tests/functional/src/include/clean-one.h clang/unittests/Analysis/CFGBuildResult.h clang/unittests/Analysis/MacroExpansionContextTest.cpp +clang/unittests/Analysis/FlowSensitive/SingleVarConstantPropagationTest.cpp +clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp +clang/unittests/Analysis/FlowSensitive/TestingSupport.h +clang/unittests/Analysis/FlowSensitive/TestingSupportTest.cpp +clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp clang/unittests/AST/ASTImporterFixtures.cpp clang/unittests/AST/ASTImporterFixtures.h clang/unittests/AST/ASTImporterObjCTest.cpp @@ -585,6 +607,7 @@ clang-tools-extra/clang-include-fixer/IncludeFixerContext.h clang-tools-extra/clang-include-fixer/InMemorySymbolIndex.cpp clang-tools-extra/clang-include-fixer/InMemorySymbolIndex.h clang-tools-extra/clang-include-fixer/SymbolIndex.h +clang-tools-extra/clang-include-fixer/YamlSymbolIndex.cpp clang-tools-extra/clang-include-fixer/YamlSymbolIndex.h clang-tools-extra/clang-include-fixer/find-all-symbols/FindAllMacros.cpp clang-tools-extra/clang-include-fixer/find-all-symbols/FindAllMacros.h @@ -618,6 +641,7 @@ clang-tools-extra/clang-tidy/ClangTidyOptions.h clang-tools-extra/clang-tidy/ClangTidyProfiling.cpp clang-tools-extra/clang-tidy/ClangTidyProfiling.h clang-tools-extra/clang-tidy/GlobList.cpp +clang-tools-extra/clang-tidy/GlobList.h clang-tools-extra/clang-tidy/abseil/AbseilMatcher.h clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.cpp clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.h @@ -641,6 +665,7 @@ clang-tools-extra/clang-tidy/abseil/DurationUnnecessaryConversionCheck.h clang-tools-extra/clang-tidy/abseil/FasterStrsplitDelimiterCheck.cpp clang-tools-extra/clang-tidy/abseil/FasterStrsplitDelimiterCheck.h clang-tools-extra/clang-tidy/abseil/NoNamespaceCheck.h +clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.h clang-tools-extra/clang-tidy/abseil/StringFindStrContainsCheck.cpp clang-tools-extra/clang-tidy/abseil/StringFindStrContainsCheck.h @@ -749,6 +774,8 @@ clang-tools-extra/clang-tidy/bugprone/StringConstructorCheck.h clang-tools-extra/clang-tidy/bugprone/StringIntegerAssignmentCheck.cpp clang-tools-extra/clang-tidy/bugprone/StringIntegerAssignmentCheck.h clang-tools-extra/clang-tidy/bugprone/StringLiteralWithEmbeddedNulCheck.h +clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.cpp +clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.h clang-tools-extra/clang-tidy/bugprone/SuspiciousEnumUsageCheck.cpp clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.cpp clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.h @@ -997,6 +1024,8 @@ clang-tools-extra/clang-tidy/mpi/BufferDerefCheck.h clang-tools-extra/clang-tidy/mpi/MPITidyModule.cpp clang-tools-extra/clang-tidy/mpi/TypeMismatchCheck.cpp clang-tools-extra/clang-tidy/mpi/TypeMismatchCheck.h +clang-tools-extra/clang-tidy/objc/AssertEquals.cpp +clang-tools-extra/clang-tidy/objc/AssertEquals.h clang-tools-extra/clang-tidy/objc/DeallocInCategoryCheck.cpp clang-tools-extra/clang-tidy/objc/DeallocInCategoryCheck.h clang-tools-extra/clang-tidy/objc/ForbiddenSubclassingCheck.h @@ -1381,7 +1410,6 @@ clang-tools-extra/clangd/unittests/TestFS.cpp clang-tools-extra/clangd/unittests/TestFS.h clang-tools-extra/clangd/unittests/TestIndex.cpp clang-tools-extra/clangd/unittests/TestIndex.h -clang-tools-extra/clangd/unittests/TestScheme.h clang-tools-extra/clangd/unittests/TestTU.cpp clang-tools-extra/clangd/unittests/TestTU.h clang-tools-extra/clangd/unittests/TestWorkspace.cpp @@ -1447,6 +1475,7 @@ clang-tools-extra/unittests/clang-tidy/AddConstTest.cpp clang-tools-extra/unittests/clang-tidy/ClangTidyDiagnosticConsumerTest.cpp clang-tools-extra/unittests/clang-tidy/ClangTidyTest.h clang-tools-extra/unittests/clang-tidy/DeclRefExprUtilsTest.cpp +clang-tools-extra/unittests/clang-tidy/GlobListTest.cpp clang-tools-extra/unittests/clang-tidy/OptionsProviderTest.cpp clang-tools-extra/unittests/clang-tidy/OverlappingReplacementsTest.cpp clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp @@ -1457,6 +1486,7 @@ compiler-rt/include/xray/xray_interface.h compiler-rt/include/xray/xray_log_interface.h compiler-rt/lib/asan/asan_activation.h compiler-rt/lib/asan/asan_lock.h +compiler-rt/lib/asan/asan_mapping.h compiler-rt/lib/asan/asan_mapping_sparc64.h compiler-rt/lib/asan/tests/asan_globals_test.cpp compiler-rt/lib/builtins/fp_extend.h @@ -1549,6 +1579,8 @@ compiler-rt/lib/hwasan/hwasan_poisoning.cpp compiler-rt/lib/hwasan/hwasan_poisoning.h compiler-rt/lib/interception/interception_mac.cpp compiler-rt/lib/interception/tests/interception_test_main.cpp +compiler-rt/lib/lsan/lsan.h +compiler-rt/lib/lsan/lsan_common.cpp compiler-rt/lib/lsan/lsan_thread.cpp compiler-rt/lib/lsan/lsan_thread.h compiler-rt/lib/memprof/memprof_allocator.cpp @@ -1622,15 +1654,19 @@ compiler-rt/lib/sanitizer_common/sanitizer_dense_map_info.h compiler-rt/lib/sanitizer_common/sanitizer_errno.h compiler-rt/lib/sanitizer_common/sanitizer_errno_codes.h compiler-rt/lib/sanitizer_common/sanitizer_flat_map.h +compiler-rt/lib/sanitizer_common/sanitizer_leb128.h compiler-rt/lib/sanitizer_common/sanitizer_local_address_space_view.h +compiler-rt/lib/sanitizer_common/sanitizer_lzw.h compiler-rt/lib/sanitizer_common/sanitizer_openbsd.cpp compiler-rt/lib/sanitizer_common/sanitizer_placement_new.h +compiler-rt/lib/sanitizer_common/sanitizer_platform.h compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_openbsd.cpp compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_openbsd.h compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h compiler-rt/lib/sanitizer_common/sanitizer_stack_store.cpp compiler-rt/lib/sanitizer_common/sanitizer_stack_store.h compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_fuchsia.h +compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_win.cpp compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_fuchsia.h compiler-rt/lib/sanitizer_common/sanitizer_thread_safety.h compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.h @@ -1643,8 +1679,11 @@ compiler-rt/lib/sanitizer_common/tests/sanitizer_chained_origin_depot_test.cpp compiler-rt/lib/sanitizer_common/tests/sanitizer_dense_map_test.cpp compiler-rt/lib/sanitizer_common/tests/sanitizer_flat_map_test.cpp compiler-rt/lib/sanitizer_common/tests/sanitizer_hash_test.cpp +compiler-rt/lib/sanitizer_common/tests/sanitizer_leb128_test.cpp +compiler-rt/lib/sanitizer_common/tests/sanitizer_lzw_test.cpp compiler-rt/lib/sanitizer_common/tests/sanitizer_stackdepot_test.cpp compiler-rt/lib/sanitizer_common/tests/sanitizer_stack_store_test.cpp +compiler-rt/lib/sanitizer_common/tests/sanitizer_stoptheworld_test.cpp compiler-rt/lib/sanitizer_common/tests/sanitizer_test_main.cpp compiler-rt/lib/sanitizer_common/tests/sanitizer_type_traits_test.cpp compiler-rt/lib/scudo/standalone/allocator_config.h @@ -1722,6 +1761,7 @@ compiler-rt/lib/scudo/standalone/tests/stats_test.cpp compiler-rt/lib/scudo/standalone/tests/strings_test.cpp compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp compiler-rt/lib/scudo/standalone/tests/vector_test.cpp +compiler-rt/lib/scudo/standalone/tests/wrappers_cpp_test.cpp compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp compiler-rt/lib/scudo/standalone/tools/compute_size_class_config.cpp compiler-rt/lib/tsan/rtl/tsan_fd.h @@ -1736,7 +1776,21 @@ compiler-rt/lib/tsan/rtl/tsan_shadow.h compiler-rt/lib/tsan/rtl/tsan_stack_trace.h compiler-rt/lib/tsan/rtl/tsan_suppressions.h compiler-rt/lib/tsan/rtl/tsan_symbolize.h +compiler-rt/lib/tsan/rtl/tsan_trace.h compiler-rt/lib/tsan/rtl/tsan_vector_clock.h +compiler-rt/lib/tsan/rtl-old/tsan_fd.h +compiler-rt/lib/tsan/rtl-old/tsan_ignoreset.h +compiler-rt/lib/tsan/rtl-old/tsan_ilist.h +compiler-rt/lib/tsan/rtl-old/tsan_interface_ann.h +compiler-rt/lib/tsan/rtl-old/tsan_mman.h +compiler-rt/lib/tsan/rtl-old/tsan_mutexset.h +compiler-rt/lib/tsan/rtl-old/tsan_ppc_regs.h +compiler-rt/lib/tsan/rtl-old/tsan_rtl_access.cpp +compiler-rt/lib/tsan/rtl-old/tsan_shadow.h +compiler-rt/lib/tsan/rtl-old/tsan_stack_trace.h +compiler-rt/lib/tsan/rtl-old/tsan_suppressions.h +compiler-rt/lib/tsan/rtl-old/tsan_symbolize.h +compiler-rt/lib/tsan/rtl-old/tsan_vector_clock.h compiler-rt/lib/tsan/tests/unit/tsan_ilist_test.cpp compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp compiler-rt/lib/tsan/tests/unit/tsan_vector_clock_test.cpp @@ -1793,7 +1847,6 @@ cross-project-tests/debuginfo-tests/dexter-tests/global-constant.cpp cross-project-tests/debuginfo-tests/dexter-tests/nrvo.cpp cross-project-tests/debuginfo-tests/dexter-tests/realigned-frame.cpp cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp -cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp flang/examples/external-hello.cpp flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.cpp flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.h @@ -1880,8 +1933,17 @@ flang/include/flang/Optimizer/Builder/BoxValue.h flang/include/flang/Optimizer/Builder/Character.h flang/include/flang/Optimizer/Builder/Complex.h flang/include/flang/Optimizer/Builder/DoLoopHelper.h +flang/include/flang/Optimizer/Builder/Factory.h flang/include/flang/Optimizer/Builder/FIRBuilder.h flang/include/flang/Optimizer/Builder/MutableBox.h +flang/include/flang/Optimizer/Builder/Runtime/Assign.h +flang/include/flang/Optimizer/Builder/Runtime/Character.h +flang/include/flang/Optimizer/Builder/Runtime/Derived.h +flang/include/flang/Optimizer/Builder/Runtime/Numeric.h +flang/include/flang/Optimizer/Builder/Runtime/Ragged.h +flang/include/flang/Optimizer/Builder/Runtime/Reduction.h +flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h +flang/include/flang/Optimizer/Builder/Runtime/Transformational.h flang/include/flang/Optimizer/CodeGen/CodeGen.h flang/include/flang/Optimizer/Dialect/FIRAttr.h flang/include/flang/Optimizer/Dialect/FIRDialect.h @@ -1922,6 +1984,7 @@ flang/include/flang/Runtime/cpp-type.h flang/include/flang/Runtime/derived-api.h flang/include/flang/Runtime/descriptor.h flang/include/flang/Runtime/entry-names.h +flang/include/flang/Runtime/extensions.h flang/include/flang/Runtime/io-api.h flang/include/flang/Runtime/iostat.h flang/include/flang/Runtime/main.h @@ -1930,9 +1993,11 @@ flang/include/flang/Runtime/memory.h flang/include/flang/Runtime/misc-intrinsic.h flang/include/flang/Runtime/numeric.h flang/include/flang/Runtime/pointer.h +flang/include/flang/Runtime/ragged.h flang/include/flang/Runtime/random.h flang/include/flang/Runtime/reduction.h flang/include/flang/Runtime/stop.h +flang/include/flang/Runtime/support.h flang/include/flang/Runtime/time-intrinsic.h flang/include/flang/Runtime/transformational.h flang/include/flang/Runtime/type-code.h @@ -2011,6 +2076,13 @@ flang/lib/Optimizer/Builder/Complex.cpp flang/lib/Optimizer/Builder/DoLoopHelper.cpp flang/lib/Optimizer/Builder/FIRBuilder.cpp flang/lib/Optimizer/Builder/MutableBox.cpp +flang/lib/Optimizer/Builder/Runtime/Assign.cpp +flang/lib/Optimizer/Builder/Runtime/Character.cpp +flang/lib/Optimizer/Builder/Runtime/Derived.cpp +flang/lib/Optimizer/Builder/Runtime/Numeric.cpp +flang/lib/Optimizer/Builder/Runtime/Ragged.cpp +flang/lib/Optimizer/Builder/Runtime/Reduction.cpp +flang/lib/Optimizer/Builder/Runtime/Transformational.cpp flang/lib/Optimizer/CodeGen/CGOps.cpp flang/lib/Optimizer/CodeGen/CGOps.h flang/lib/Optimizer/CodeGen/DescriptorModel.h @@ -2029,9 +2101,12 @@ flang/lib/Optimizer/Support/KindMapping.cpp flang/lib/Optimizer/Transforms/AbstractResult.cpp flang/lib/Optimizer/Transforms/AffineDemotion.cpp flang/lib/Optimizer/Transforms/AffinePromotion.cpp +flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp flang/lib/Optimizer/Transforms/CharacterConversion.cpp flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp flang/lib/Optimizer/Transforms/Inliner.cpp +flang/lib/Optimizer/Transforms/MemoryAllocation.cpp +flang/lib/Optimizer/Transforms/MemRefDataFlowOpt.cpp flang/lib/Optimizer/Transforms/PassDetail.h flang/lib/Optimizer/Transforms/RewriteLoop.cpp flang/lib/Parser/basic-parsers.h @@ -2166,6 +2241,7 @@ flang/runtime/edit-output.cpp flang/runtime/edit-output.h flang/runtime/environment.cpp flang/runtime/environment.h +flang/runtime/extensions.cpp flang/runtime/extrema.cpp flang/runtime/file.cpp flang/runtime/file.h @@ -2192,6 +2268,7 @@ flang/runtime/namelist.h flang/runtime/numeric.cpp flang/runtime/pointer.cpp flang/runtime/product.cpp +flang/runtime/ragged.cpp flang/runtime/random.cpp flang/runtime/reduction-templates.h flang/runtime/reduction.cpp @@ -2199,6 +2276,7 @@ flang/runtime/stat.cpp flang/runtime/stat.h flang/runtime/stop.cpp flang/runtime/sum.cpp +flang/runtime/support.cpp flang/runtime/terminator.cpp flang/runtime/terminator.h flang/runtime/time-intrinsic.cpp @@ -2245,6 +2323,14 @@ flang/unittests/Optimizer/Builder/CharacterTest.cpp flang/unittests/Optimizer/Builder/ComplexTest.cpp flang/unittests/Optimizer/Builder/DoLoopHelperTest.cpp flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp +flang/unittests/Optimizer/Builder/Runtime/AssignTest.cpp +flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp +flang/unittests/Optimizer/Builder/Runtime/DerivedTest.cpp +flang/unittests/Optimizer/Builder/Runtime/NumericTest.cpp +flang/unittests/Optimizer/Builder/Runtime/RaggedTest.cpp +flang/unittests/Optimizer/Builder/Runtime/ReductionTest.cpp +flang/unittests/Optimizer/Builder/Runtime/RuntimeCallTestBase.h +flang/unittests/Optimizer/Builder/Runtime/TransformationalTest.cpp flang/unittests/Runtime/BufferTest.cpp flang/unittests/Runtime/CharacterTest.cpp flang/unittests/Runtime/CommandTest.cpp @@ -2258,6 +2344,7 @@ flang/unittests/Runtime/MiscIntrinsic.cpp flang/unittests/Runtime/Namelist.cpp flang/unittests/Runtime/Numeric.cpp flang/unittests/Runtime/NumericalFormatTest.cpp +flang/unittests/Runtime/Ragged.cpp flang/unittests/Runtime/Random.cpp flang/unittests/Runtime/Reduction.cpp flang/unittests/Runtime/RuntimeCrashTest.cpp @@ -2429,6 +2516,7 @@ libc/src/math/llroundl.h libc/src/math/logb.h libc/src/math/logbf.h libc/src/math/logbl.h +libc/src/math/logf.h libc/src/math/lrint.h libc/src/math/lrintf.h libc/src/math/lrintl.h @@ -2525,6 +2613,7 @@ libc/src/math/generic/llroundl.cpp libc/src/math/generic/logb.cpp libc/src/math/generic/logbf.cpp libc/src/math/generic/logbl.cpp +libc/src/math/generic/logf.cpp libc/src/math/generic/lrint.cpp libc/src/math/generic/lrintf.cpp libc/src/math/generic/lrintl.cpp @@ -2623,6 +2712,8 @@ libc/src/stdlib/strtof.cpp libc/src/stdlib/strtof.h libc/src/stdlib/strtol.cpp libc/src/stdlib/strtol.h +libc/src/stdlib/strtold.cpp +libc/src/stdlib/strtold.h libc/src/stdlib/strtoll.cpp libc/src/stdlib/strtoll.h libc/src/stdlib/strtoul.cpp @@ -2692,6 +2783,7 @@ libc/src/string/strtok.cpp libc/src/string/strtok.h libc/src/string/strtok_r.cpp libc/src/string/strtok_r.h +libc/src/string/memory_utils/bcmp_implementations.h libc/src/string/memory_utils/elements.h libc/src/string/memory_utils/elements_aarch64.h libc/src/string/memory_utils/elements_x86.h @@ -2721,6 +2813,7 @@ libc/src/threads/linux/Futex.h libc/src/threads/linux/mtx_destroy.cpp libc/src/threads/linux/mtx_init.cpp libc/src/threads/linux/Mutex.h +libc/src/threads/linux/thrd_create.cpp libc/src/threads/linux/thrd_join.cpp libc/src/threads/linux/Thread.h libc/src/time/asctime.cpp @@ -2756,21 +2849,18 @@ libc/src/__support/CPP/StringView.h libc/src/__support/CPP/TypeTraits.h libc/src/__support/FPUtil/BasicOperations.h libc/src/__support/FPUtil/DivisionAndRemainderOperations.h -libc/src/__support/FPUtil/DummyFEnvImpl.h +libc/src/__support/FPUtil/FEnvImpl.h libc/src/__support/FPUtil/FEnvUtils.h libc/src/__support/FPUtil/FloatProperties.h libc/src/__support/FPUtil/FMA.h libc/src/__support/FPUtil/FPBits.h libc/src/__support/FPUtil/Hypot.h -libc/src/__support/FPUtil/LongDoubleBitsX86.h libc/src/__support/FPUtil/ManipulationFunctions.h libc/src/__support/FPUtil/NearestIntegerOperations.h -libc/src/__support/FPUtil/NextAfterLongDoubleX86.h libc/src/__support/FPUtil/NormalFloat.h libc/src/__support/FPUtil/PlatformDefs.h libc/src/__support/FPUtil/PolyEval.h libc/src/__support/FPUtil/Sqrt.h -libc/src/__support/FPUtil/SqrtLongDoubleX86.h libc/src/__support/FPUtil/UInt.h libc/src/__support/FPUtil/XFloat.h libc/src/__support/FPUtil/aarch64/FEnvImpl.h @@ -2778,6 +2868,17 @@ libc/src/__support/FPUtil/aarch64/FMA.h libc/src/__support/FPUtil/generic/FMA.h libc/src/__support/FPUtil/x86_64/FEnvImpl.h libc/src/__support/FPUtil/x86_64/FMA.h +libc/src/__support/FPUtil/x86_64/LongDoubleBits.h +libc/src/__support/FPUtil/x86_64/NextAfterLongDouble.h +libc/src/__support/FPUtil/x86_64/PolyEval.h +libc/src/__support/FPUtil/x86_64/SqrtLongDouble.h +libc/src/__support/OSUtil/io.h +libc/src/__support/OSUtil/quick_exit.h +libc/src/__support/OSUtil/syscall.h +libc/src/__support/OSUtil/linux/io.h +libc/src/__support/OSUtil/linux/quick_exit.h +libc/src/__support/OSUtil/linux/syscall.h +libc/src/__support/OSUtil/linux/x86_64/syscall.h libc/utils/HdrGen/Command.cpp libc/utils/HdrGen/Command.h libc/utils/HdrGen/Generator.cpp @@ -3017,38 +3118,46 @@ libclc/generic/include/math/clc_sqrt.h libclc/generic/include/math/clc_tan.h libclc/generic/include/math/clc_tanpi.h libclc/generic/lib/math/ep_log.h +libcxx/benchmarks/format.bench.cpp +libcxx/benchmarks/formatted_size.bench.cpp +libcxx/benchmarks/format_to.bench.cpp +libcxx/benchmarks/format_to_n.bench.cpp libcxx/benchmarks/to_chars.bench.cpp libcxx/benchmarks/util_smartptr.bench.cpp libcxx/benchmarks/variant_visit_1.bench.cpp libcxx/benchmarks/variant_visit_2.bench.cpp libcxx/benchmarks/variant_visit_3.bench.cpp +libcxx/include/__filesystem/file_time_type.h +libcxx/include/__filesystem/file_type.h +libcxx/include/__filesystem/space_info.h +libcxx/include/__memory/voidify.h +libcxx/include/__numeric/exclusive_scan.h +libcxx/include/__numeric/inclusive_scan.h +libcxx/include/__numeric/reduce.h +libcxx/include/__numeric/transform_reduce.h libcxx/include/__support/ibm/gettod_zos.h libcxx/include/__support/ibm/nanosleep.h libcxx/include/__support/openbsd/xlocale.h libcxx/include/__support/solaris/floatingpoint.h libcxx/include/__support/solaris/wchar.h +libcxx/src/chrono_system_time_init.h libcxx/src/format.cpp +libcxx/src/iostream_init.h libcxx/src/legacy_pointer_safety.cpp libcxx/src/utility.cpp -libcxx/utils/google-benchmark/cmake/thread_safety_attributes.cpp -libcxx/utils/google-benchmark/src/arraysize.h -libcxx/utils/google-benchmark/src/benchmark_api_internal.h -libcxx/utils/google-benchmark/src/benchmark_runner.h -libcxx/utils/google-benchmark/src/check.h -libcxx/utils/google-benchmark/src/colorprint.h -libcxx/utils/google-benchmark/src/commandlineflags.h -libcxx/utils/google-benchmark/src/complexity.h -libcxx/utils/google-benchmark/src/counter.h -libcxx/utils/google-benchmark/src/internal_macros.h -libcxx/utils/google-benchmark/src/log.h -libcxx/utils/google-benchmark/src/mutex.h -libcxx/utils/google-benchmark/src/perf_counters.h -libcxx/utils/google-benchmark/src/re.h -libcxx/utils/google-benchmark/src/sleep.h -libcxx/utils/google-benchmark/src/statistics.h -libcxx/utils/google-benchmark/src/thread_manager.h -libcxx/utils/google-benchmark/src/thread_timer.h -libcxx/utils/google-benchmark/src/timers.h +libcxx/src/experimental/memory_resource_init_helper.h +libcxx/src/include/to_chars_floating_point.h +libcxx/src/include/ryu/common.h +libcxx/src/include/ryu/d2fixed.h +libcxx/src/include/ryu/d2fixed_full_table.h +libcxx/src/include/ryu/d2s.h +libcxx/src/include/ryu/d2s_full_table.h +libcxx/src/include/ryu/d2s_intrinsics.h +libcxx/src/include/ryu/digit_table.h +libcxx/src/include/ryu/f2s.h +libcxx/src/ryu/d2fixed.cpp +libcxx/src/ryu/d2s.cpp +libcxx/src/ryu/f2s.cpp libcxxabi/src/cxa_guard_impl.h libcxxabi/src/demangle/StringView.h libcxxabi/src/demangle/Utility.h @@ -3083,6 +3192,7 @@ lld/ELF/DWARF.cpp lld/ELF/DWARF.h lld/ELF/EhFrame.h lld/ELF/ICF.h +lld/ELF/LinkerScript.cpp lld/ELF/LTO.h lld/ELF/MapFile.h lld/ELF/MarkLive.cpp @@ -3112,9 +3222,6 @@ lld/include/lld/Core/Pass.h lld/include/lld/Core/SharedLibraryAtom.h lld/include/lld/Core/UndefinedAtom.h lld/include/lld/Core/Writer.h -lld/lib/Core/File.cpp -lld/lib/Core/Writer.cpp -lld/lib/ReaderWriter/MachO/MachOPasses.h lld/MachO/ConcatOutputSection.cpp lld/MachO/ConcatOutputSection.h lld/MachO/Config.h @@ -3260,6 +3367,7 @@ lldb/include/lldb/Breakpoint/WatchpointList.h lldb/include/lldb/Breakpoint/WatchpointOptions.h lldb/include/lldb/Core/AddressResolver.h lldb/include/lldb/Core/AddressResolverFileLine.h +lldb/include/lldb/Core/DataFileCache.h lldb/include/lldb/Core/Debugger.h lldb/include/lldb/Core/Declaration.h lldb/include/lldb/Core/DumpRegisterValue.h @@ -3390,7 +3498,7 @@ lldb/include/lldb/Symbol/LocateSymbolFile.h lldb/include/lldb/Symbol/SourceModule.h lldb/include/lldb/Symbol/SymbolContextScope.h lldb/include/lldb/Symbol/SymbolVendor.h -lldb/include/lldb/Symbol/Symtab.h +lldb/include/lldb/Symbol/Type.h lldb/include/lldb/Symbol/TypeList.h lldb/include/lldb/Symbol/TypeMap.h lldb/include/lldb/Symbol/UnwindTable.h @@ -3485,7 +3593,7 @@ lldb/include/lldb/Utility/UserID.h lldb/include/lldb/Utility/UserIDResolver.h lldb/include/lldb/Utility/VASPrintf.h lldb/include/lldb/Utility/VMRange.h -lldb/source/lldb.cpp +lldb/include/lldb/Version/Version.h lldb/source/API/SBCommandInterpreterRunOptions.cpp lldb/source/API/SBModule.cpp lldb/source/API/SBReproducerPrivate.h @@ -3567,7 +3675,6 @@ lldb/source/Core/FormatEntity.cpp lldb/source/Core/Highlighter.cpp lldb/source/Core/IOHandler.cpp lldb/source/Core/IOHandlerCursesGUI.cpp -lldb/source/Core/Mangled.cpp lldb/source/Core/ModuleChild.cpp lldb/source/Core/Opcode.cpp lldb/source/Core/Progress.cpp @@ -3743,14 +3850,15 @@ lldb/source/Plugins/Language/CPlusPlus/BlockPointer.h lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.h lldb/source/Plugins/Language/CPlusPlus/CxxStringTypes.h +lldb/source/Plugins/Language/CPlusPlus/Generic.h lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp +lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp lldb/source/Plugins/Language/CPlusPlus/LibCxx.h lldb/source/Plugins/Language/CPlusPlus/LibCxxAtomic.cpp lldb/source/Plugins/Language/CPlusPlus/LibCxxAtomic.h lldb/source/Plugins/Language/CPlusPlus/LibCxxInitializerList.cpp lldb/source/Plugins/Language/CPlusPlus/LibCxxList.cpp lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp -lldb/source/Plugins/Language/CPlusPlus/LibCxxOptional.cpp lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.h lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.h @@ -3822,6 +3930,8 @@ lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.h lldb/source/Plugins/Platform/MacOSX/objcxx/PlatformiOSSimulatorCoreSimulatorSupport.h lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h lldb/source/Plugins/Platform/OpenBSD/PlatformOpenBSD.h +lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp +lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.h lldb/source/Plugins/Platform/Windows/PlatformWindows.h lldb/source/Plugins/Process/elf-core/ProcessElfCore.h lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.cpp @@ -3853,6 +3963,14 @@ lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.h lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_x86_64.cpp lldb/source/Plugins/Process/FreeBSD/NativeThreadFreeBSD.cpp lldb/source/Plugins/Process/FreeBSD/NativeThreadFreeBSD.h +lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp +lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.h +lldb/source/Plugins/Process/FreeBSDKernel/RegisterContextFreeBSDKernel_arm64.cpp +lldb/source/Plugins/Process/FreeBSDKernel/RegisterContextFreeBSDKernel_arm64.h +lldb/source/Plugins/Process/FreeBSDKernel/RegisterContextFreeBSDKernel_i386.h +lldb/source/Plugins/Process/FreeBSDKernel/RegisterContextFreeBSDKernel_x86_64.cpp +lldb/source/Plugins/Process/FreeBSDKernel/RegisterContextFreeBSDKernel_x86_64.h +lldb/source/Plugins/Process/FreeBSDKernel/ThreadFreeBSDKernel.h lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.h lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationHistory.h @@ -4039,9 +4157,11 @@ lldb/source/Plugins/Process/Windows/Common/TargetThreadWindows.h lldb/source/Plugins/Process/Windows/Common/arm/RegisterContextWindows_arm.h lldb/source/Plugins/Process/Windows/Common/arm64/RegisterContextWindows_arm64.h lldb/source/Plugins/REPL/Clang/ClangREPL.h +lldb/source/Plugins/ScriptInterpreter/Lua/Lua.cpp lldb/source/Plugins/ScriptInterpreter/Lua/Lua.h lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.h +lldb/source/Plugins/ScriptInterpreter/Lua/SWIGLuaBridge.h lldb/source/Plugins/ScriptInterpreter/None/ScriptInterpreterNone.cpp lldb/source/Plugins/ScriptInterpreter/None/ScriptInterpreterNone.h lldb/source/Plugins/ScriptInterpreter/Python/lldb-python.h @@ -4053,6 +4173,7 @@ lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPythonInterface.cpp lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPythonInterface.h lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h +lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.h lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.cpp lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp @@ -4087,9 +4208,10 @@ lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.h -lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp +lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp lldb/source/Plugins/SymbolFile/NativePDB/CodeViewRegisterMapping.cpp @@ -4102,7 +4224,6 @@ lldb/source/Plugins/SymbolFile/NativePDB/PdbIndex.h lldb/source/Plugins/SymbolFile/NativePDB/PdbSymUid.cpp lldb/source/Plugins/SymbolFile/NativePDB/PdbSymUid.h lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp -lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.h lldb/source/Plugins/SymbolFile/PDB/PDBLocationToDWARFExpression.cpp @@ -4158,6 +4279,7 @@ lldb/source/Symbol/LineEntry.cpp lldb/source/Symbol/LocateSymbolFile.cpp lldb/source/Symbol/PostfixExpression.cpp lldb/source/Symbol/SymbolContext.cpp +lldb/source/Symbol/SymbolFile.cpp lldb/source/Symbol/SymbolVendor.cpp lldb/source/Symbol/TypeList.cpp lldb/source/Symbol/TypeMap.cpp @@ -4243,6 +4365,7 @@ lldb/source/Utility/UuidCompatibility.h lldb/source/Utility/VASprintf.cpp lldb/source/Utility/VMRange.cpp lldb/source/Utility/XcodeSDK.cpp +lldb/source/Version/Version.cpp lldb/tools/argdumper/argdumper.cpp lldb/tools/darwin-debug/darwin-debug.cpp lldb/tools/debugserver/source/ARM_DWARF_Registers.h @@ -4348,6 +4471,7 @@ lldb/unittests/Breakpoint/BreakpointIDTest.cpp lldb/unittests/Core/CommunicationTest.cpp lldb/unittests/Core/DumpDataExtractorTest.cpp lldb/unittests/Core/FormatEntityTest.cpp +lldb/unittests/Core/MangledTest.cpp lldb/unittests/Core/ModuleSpecTest.cpp lldb/unittests/Core/RichManglingContextTest.cpp lldb/unittests/Core/SourceLocationSpecTest.cpp @@ -4360,6 +4484,7 @@ lldb/unittests/debugserver/JSONTest.cpp lldb/unittests/debugserver/RNBSocketTest.cpp lldb/unittests/Editline/EditlineTest.cpp lldb/unittests/Expression/ClangExpressionDeclMapTest.cpp +lldb/unittests/Expression/CppModuleConfigurationTest.cpp lldb/unittests/Expression/DiagnosticManagerTest.cpp lldb/unittests/Host/ConnectionFileDescriptorTest.cpp lldb/unittests/Host/FileActionTest.cpp @@ -4391,15 +4516,21 @@ lldb/unittests/Process/Utility/LinuxProcMapsTest.cpp lldb/unittests/Process/Utility/MemoryTagManagerAArch64MTETest.cpp lldb/unittests/Process/Utility/RegisterContextFreeBSDTest.cpp lldb/unittests/Process/Utility/RegisterContextTest.cpp +lldb/unittests/ScriptInterpreter/Lua/LuaTests.cpp lldb/unittests/ScriptInterpreter/Lua/ScriptInterpreterTests.cpp +lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.h lldb/unittests/Signals/UnixSignalsTest.cpp lldb/unittests/Symbol/LocateSymbolFileTest.cpp +lldb/unittests/Symbol/MangledTest.cpp +lldb/unittests/Symbol/SymbolTest.cpp +lldb/unittests/Symbol/SymtabTest.cpp lldb/unittests/Symbol/TestClangASTImporter.cpp lldb/unittests/Symbol/TestDWARFCallFrameInfo.cpp lldb/unittests/Symbol/TestTypeSystem.cpp lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp +lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp lldb/unittests/SymbolFile/DWARF/DWARFUnitTest.cpp lldb/unittests/SymbolFile/DWARF/Inputs/test-dwarf.cpp lldb/unittests/SymbolFile/NativePDB/PdbFPOProgramToDWARFExpressionTests.cpp @@ -4499,6 +4630,8 @@ llvm/include/llvm/ADT/CombinationGenerator.h llvm/include/llvm/ADT/DAGDeltaAlgorithm.h llvm/include/llvm/ADT/EnumeratedArray.h llvm/include/llvm/ADT/EpochTracker.h +llvm/include/llvm/ADT/GenericCycleInfo.h +llvm/include/llvm/ADT/GenericSSAContext.h llvm/include/llvm/ADT/ilist_base.h llvm/include/llvm/ADT/ilist_iterator.h llvm/include/llvm/ADT/ilist_node.h @@ -4515,10 +4648,12 @@ llvm/include/llvm/ADT/StringSet.h llvm/include/llvm/ADT/TypeSwitch.h llvm/include/llvm/ADT/Waymarking.h llvm/include/llvm/Analysis/BlockFrequencyInfo.h +llvm/include/llvm/Analysis/CFLAliasAnalysisUtils.h llvm/include/llvm/Analysis/CFLAndersAliasAnalysis.h llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h llvm/include/llvm/Analysis/ConstraintSystem.h llvm/include/llvm/Analysis/CostModel.h +llvm/include/llvm/Analysis/CycleAnalysis.h llvm/include/llvm/Analysis/DDGPrinter.h llvm/include/llvm/Analysis/Delinearization.h llvm/include/llvm/Analysis/DependenceGraphBuilder.h @@ -4542,13 +4677,16 @@ llvm/include/llvm/Analysis/LoopNestAnalysis.h llvm/include/llvm/Analysis/MemDerefPrinter.h llvm/include/llvm/Analysis/MLInlineAdvisor.h llvm/include/llvm/Analysis/MLModelRunner.h +llvm/include/llvm/Analysis/ModelUnderTrainingRunner.h llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h llvm/include/llvm/Analysis/ModuleSummaryAnalysis.h +llvm/include/llvm/Analysis/NoInferenceModelRunner.h llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h llvm/include/llvm/Analysis/ObjCARCInstKind.h llvm/include/llvm/Analysis/ObjCARCUtil.h llvm/include/llvm/Analysis/OverflowInstAnalysis.h llvm/include/llvm/Analysis/PhiValues.h +llvm/include/llvm/Analysis/ReleaseModeModelRunner.h llvm/include/llvm/Analysis/ReplayInlineAdvisor.h llvm/include/llvm/Analysis/ScalarEvolutionDivision.h llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h @@ -4595,6 +4733,7 @@ llvm/include/llvm/CodeGen/LiveStacks.h llvm/include/llvm/CodeGen/LoopTraversal.h llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h llvm/include/llvm/CodeGen/MachineCombinerPattern.h +llvm/include/llvm/CodeGen/MachineCycleAnalysis.h llvm/include/llvm/CodeGen/MachineInstrBundleIterator.h llvm/include/llvm/CodeGen/MachineLoopUtils.h llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h @@ -4602,6 +4741,7 @@ llvm/include/llvm/CodeGen/MachineModuleSlotTracker.h llvm/include/llvm/CodeGen/MachineOutliner.h llvm/include/llvm/CodeGen/MachinePassManager.h llvm/include/llvm/CodeGen/MachineRegionInfo.h +llvm/include/llvm/CodeGen/MachineSSAContext.h llvm/include/llvm/CodeGen/MachineStableHash.h llvm/include/llvm/CodeGen/MIRFormatter.h llvm/include/llvm/CodeGen/MIRFSDiscriminator.h @@ -4619,6 +4759,7 @@ llvm/include/llvm/CodeGen/StableHashing.h llvm/include/llvm/CodeGen/TargetOpcodes.h llvm/include/llvm/CodeGen/TileShapeInfo.h llvm/include/llvm/CodeGen/UnreachableBlockElim.h +llvm/include/llvm/CodeGen/VLIWMachineScheduler.h llvm/include/llvm/CodeGen/WasmEHFuncInfo.h llvm/include/llvm/CodeGen/WinEHFuncInfo.h llvm/include/llvm/CodeGen/GlobalISel/Combiner.h @@ -4676,7 +4817,6 @@ llvm/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h -llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h @@ -4702,6 +4842,29 @@ llvm/include/llvm/DebugInfo/PDB/IPDBFrameData.h llvm/include/llvm/DebugInfo/PDB/IPDBInjectedSource.h llvm/include/llvm/DebugInfo/PDB/PDB.h llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolExe.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h +llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h llvm/include/llvm/DebugInfo/PDB/UDTLayout.h llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h @@ -4749,6 +4912,8 @@ llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h llvm/include/llvm/DebugInfo/PDB/Native/TpiHashing.h llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h +llvm/include/llvm/Debuginfod/Debuginfod.h +llvm/include/llvm/Debuginfod/HTTPClient.h llvm/include/llvm/Demangle/Demangle.h llvm/include/llvm/Demangle/StringView.h llvm/include/llvm/Demangle/Utility.h @@ -4767,7 +4932,6 @@ llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h llvm/include/llvm/ExecutionEngine/JITLink/JITLinkDylib.h -llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h llvm/include/llvm/ExecutionEngine/JITLink/MachO.h llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h @@ -4796,6 +4960,7 @@ llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h llvm/include/llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h llvm/include/llvm/ExecutionEngine/Orc/Mangling.h +llvm/include/llvm/ExecutionEngine/Orc/ObjectFileInterface.h llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h @@ -4814,20 +4979,22 @@ llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager. llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h llvm/include/llvm/FileCheck/FileCheck.h +llvm/include/llvm/Frontend/OpenMP/OMPAssume.h llvm/include/llvm/Frontend/OpenMP/OMPConstants.h llvm/include/llvm/Frontend/OpenMP/OMPContext.h llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h -llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h llvm/include/llvm/InterfaceStub/ELFObjHandler.h llvm/include/llvm/InterfaceStub/IFSHandler.h llvm/include/llvm/InterfaceStub/IFSStub.h llvm/include/llvm/IR/Assumptions.h llvm/include/llvm/IR/BuiltinGCs.h llvm/include/llvm/IR/Comdat.h +llvm/include/llvm/IR/Constants.h llvm/include/llvm/IR/DebugInfo.h llvm/include/llvm/IR/DebugInfoMetadata.h llvm/include/llvm/IR/DiagnosticHandler.h llvm/include/llvm/IR/DiagnosticPrinter.h +llvm/include/llvm/IR/GlobalIFunc.h llvm/include/llvm/IR/GlobalObject.h llvm/include/llvm/IR/GVMaterializer.h llvm/include/llvm/IR/IRPrintingPasses.h @@ -4843,6 +5010,7 @@ llvm/include/llvm/IR/PrintPasses.h llvm/include/llvm/IR/ProfileSummary.h llvm/include/llvm/IR/PseudoProbe.h llvm/include/llvm/IR/ReplaceConstant.h +llvm/include/llvm/IR/SSAContext.h llvm/include/llvm/IR/StructuralHash.h llvm/include/llvm/IR/TrackingMDRef.h llvm/include/llvm/IR/UseListOrder.h @@ -4855,6 +5023,7 @@ llvm/include/llvm/MC/MCAsmInfoWasm.h llvm/include/llvm/MC/MCAsmInfoXCOFF.h llvm/include/llvm/MC/MCAsmLayout.h llvm/include/llvm/MC/MCCodeView.h +llvm/include/llvm/MC/MCFixedLenDisassembler.h llvm/include/llvm/MC/MCLabel.h llvm/include/llvm/MC/MCObjectWriter.h llvm/include/llvm/MC/MCPseudoProbe.h @@ -4888,6 +5057,7 @@ llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h llvm/include/llvm/MCA/Stages/DispatchStage.h +llvm/include/llvm/MCA/Stages/EntryStage.h llvm/include/llvm/MCA/Stages/ExecuteStage.h llvm/include/llvm/MCA/Stages/InOrderIssueStage.h llvm/include/llvm/MCA/Stages/InstructionTables.h @@ -4922,8 +5092,10 @@ llvm/include/llvm/Option/OptSpecifier.h llvm/include/llvm/Passes/OptimizationLevel.h llvm/include/llvm/Passes/StandardInstrumentations.h llvm/include/llvm/ProfileData/GCOV.h +llvm/include/llvm/ProfileData/InstrProfCorrelator.h llvm/include/llvm/ProfileData/InstrProfWriter.h llvm/include/llvm/ProfileData/ProfileCommon.h +llvm/include/llvm/ProfileData/RawMemProfReader.h llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h llvm/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h llvm/include/llvm/Remarks/BitstreamRemarkContainer.h @@ -4985,6 +5157,7 @@ llvm/include/llvm/Support/RISCVAttributeParser.h llvm/include/llvm/Support/RISCVAttributes.h llvm/include/llvm/Support/RISCVISAInfo.h llvm/include/llvm/Support/RWMutex.h +llvm/include/llvm/Support/ScopedPrinter.h llvm/include/llvm/Support/SHA256.h llvm/include/llvm/Support/Signposts.h llvm/include/llvm/Support/SmallVectorMemoryBuffer.h @@ -4994,6 +5167,7 @@ llvm/include/llvm/Support/SourceMgr.h llvm/include/llvm/Support/SuffixTree.h llvm/include/llvm/Support/SymbolRemappingReader.h llvm/include/llvm/Support/SystemUtils.h +llvm/include/llvm/Support/TargetParser.h llvm/include/llvm/Support/TimeProfiler.h llvm/include/llvm/Support/TrailingObjects.h llvm/include/llvm/Support/Unicode.h @@ -5072,6 +5246,7 @@ llvm/include/llvm/Transforms/Scalar/CorrelatedValuePropagation.h llvm/include/llvm/Transforms/Scalar/DeadStoreElimination.h llvm/include/llvm/Transforms/Scalar/DFAJumpThreading.h llvm/include/llvm/Transforms/Scalar/EarlyCSE.h +llvm/include/llvm/Transforms/Scalar/FlattenCFG.h llvm/include/llvm/Transforms/Scalar/GVNExpression.h llvm/include/llvm/Transforms/Scalar/InductiveRangeCheckElimination.h llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h @@ -5119,6 +5294,7 @@ llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h llvm/include/llvm/Transforms/Utils/CanonicalizeAliases.h llvm/include/llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h +llvm/include/llvm/Transforms/Utils/CodeLayout.h llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h llvm/include/llvm/Transforms/Utils/Debugify.h llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h @@ -5139,6 +5315,7 @@ llvm/include/llvm/Transforms/Utils/Mem2Reg.h llvm/include/llvm/Transforms/Utils/MetaRenamer.h llvm/include/llvm/Transforms/Utils/NameAnonGlobals.h llvm/include/llvm/Transforms/Utils/RelLookupTableConverter.h +llvm/include/llvm/Transforms/Utils/SampleProfileInference.h llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h llvm/include/llvm/Transforms/Utils/SCCPSolver.h @@ -5186,6 +5363,7 @@ llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp llvm/lib/Analysis/CFLGraph.h llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp llvm/lib/Analysis/CodeMetrics.cpp +llvm/lib/Analysis/CycleAnalysis.cpp llvm/lib/Analysis/DDGPrinter.cpp llvm/lib/Analysis/Delinearization.cpp llvm/lib/Analysis/DependenceGraphBuilder.cpp @@ -5203,12 +5381,13 @@ llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp llvm/lib/Analysis/LoopNestAnalysis.cpp llvm/lib/Analysis/LoopUnrollAnalyzer.cpp llvm/lib/Analysis/MLInlineAdvisor.cpp +llvm/lib/Analysis/ModelUnderTrainingRunner.cpp +llvm/lib/Analysis/NoInferenceModelRunner.cpp llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp llvm/lib/Analysis/ObjCARCInstKind.cpp llvm/lib/Analysis/OptimizationRemarkEmitter.cpp llvm/lib/Analysis/OverflowInstAnalysis.cpp -llvm/lib/Analysis/ReleaseModeModelRunner.cpp llvm/lib/Analysis/ReplayInlineAdvisor.cpp llvm/lib/Analysis/ScalarEvolutionDivision.cpp llvm/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -5231,6 +5410,7 @@ llvm/lib/BinaryFormat/MsgPackWriter.cpp llvm/lib/BinaryFormat/Wasm.cpp llvm/lib/BinaryFormat/XCOFF.cpp llvm/lib/Bitcode/Reader/MetadataLoader.cpp +llvm/lib/Bitcode/Reader/ValueList.cpp llvm/lib/CodeGen/AllocationOrder.cpp llvm/lib/CodeGen/AllocationOrder.h llvm/lib/CodeGen/CFGuardLongjmp.cpp @@ -5252,6 +5432,7 @@ llvm/lib/CodeGen/LoopTraversal.cpp llvm/lib/CodeGen/LowLevelType.cpp llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp llvm/lib/CodeGen/MachineCheckDebugify.cpp +llvm/lib/CodeGen/MachineCycleAnalysis.cpp llvm/lib/CodeGen/MachineDebugify.cpp llvm/lib/CodeGen/MachineFunctionPass.cpp llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -5259,6 +5440,7 @@ llvm/lib/CodeGen/MachineModuleInfoImpls.cpp llvm/lib/CodeGen/MachineModuleSlotTracker.cpp llvm/lib/CodeGen/MachineOutliner.cpp llvm/lib/CodeGen/MachinePassManager.cpp +llvm/lib/CodeGen/MachineSSAContext.cpp llvm/lib/CodeGen/MachineStableHash.cpp llvm/lib/CodeGen/MachineStripDebug.cpp llvm/lib/CodeGen/MIRFSDiscriminator.cpp @@ -5273,13 +5455,17 @@ llvm/lib/CodeGen/ParallelCG.cpp llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp llvm/lib/CodeGen/PseudoProbeInserter.cpp llvm/lib/CodeGen/RegAllocBase.cpp +llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp llvm/lib/CodeGen/RegAllocEvictionAdvisor.h +llvm/lib/CodeGen/RegAllocScore.cpp +llvm/lib/CodeGen/RegAllocScore.h llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp llvm/lib/CodeGen/ReplaceWithVeclib.cpp llvm/lib/CodeGen/SafeStackLayout.cpp llvm/lib/CodeGen/SafeStackLayout.h llvm/lib/CodeGen/SpillPlacement.h llvm/lib/CodeGen/TargetOptionsImpl.cpp +llvm/lib/CodeGen/VLIWMachineScheduler.cpp llvm/lib/CodeGen/WasmEHPrepare.cpp llvm/lib/CodeGen/XRayInstrumentation.cpp llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -5288,6 +5474,7 @@ llvm/lib/CodeGen/AsmPrinter/AIXException.cpp llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp llvm/lib/CodeGen/AsmPrinter/DwarfException.h +llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h @@ -5438,11 +5625,12 @@ llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp llvm/lib/DebugInfo/Symbolize/Symbolize.cpp +llvm/lib/Debuginfod/Debuginfod.cpp +llvm/lib/Debuginfod/HTTPClient.cpp llvm/lib/Demangle/Demangle.cpp llvm/lib/Demangle/DLangDemangle.cpp llvm/lib/Demangle/MicrosoftDemangleNodes.cpp llvm/lib/Demangle/RustDemangle.cpp -llvm/lib/DWARFLinker/DWARFLinker.cpp llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp llvm/lib/DWARFLinker/DWARFStreamer.cpp @@ -5482,6 +5670,8 @@ llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp llvm/lib/ExecutionEngine/Orc/Layer.cpp llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp +llvm/lib/ExecutionEngine/Orc/Mangling.cpp +llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp llvm/lib/ExecutionEngine/Orc/Speculation.cpp @@ -5524,6 +5714,7 @@ llvm/lib/IR/PassManager.cpp llvm/lib/IR/PrintPasses.cpp llvm/lib/IR/PseudoProbe.cpp llvm/lib/IR/ReplaceConstant.cpp +llvm/lib/IR/SSAContext.cpp llvm/lib/IR/Statepoint.cpp llvm/lib/IR/StructuralHash.cpp llvm/lib/IR/ValueSymbolTable.cpp @@ -5600,9 +5791,10 @@ llvm/lib/ObjectYAML/WasmYAML.cpp llvm/lib/ObjectYAML/yaml2obj.cpp llvm/lib/Passes/OptimizationLevel.cpp llvm/lib/Passes/PassBuilderBindings.cpp -llvm/lib/Passes/PassBuilderPipelines.cpp llvm/lib/Passes/PassPlugin.cpp llvm/lib/ProfileData/GCOV.cpp +llvm/lib/ProfileData/InstrProfCorrelator.cpp +llvm/lib/ProfileData/RawMemProfReader.cpp llvm/lib/ProfileData/SampleProfWriter.cpp llvm/lib/Remarks/BitstreamRemarkParser.h llvm/lib/Remarks/BitstreamRemarkSerializer.cpp @@ -5643,6 +5835,7 @@ llvm/lib/Support/InstructionCost.cpp llvm/lib/Support/IntEqClasses.cpp llvm/lib/Support/LineIterator.cpp llvm/lib/Support/LowLevelType.cpp +llvm/lib/Support/MemAlloc.cpp llvm/lib/Support/Memory.cpp llvm/lib/Support/MemoryBufferRef.cpp llvm/lib/Support/MSP430AttributeParser.cpp @@ -5675,7 +5868,6 @@ llvm/lib/TableGen/TableGenBackendSkeleton.cpp llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp -llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.h llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp llvm/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -5770,11 +5962,13 @@ llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.h llvm/lib/Target/ARM/ARMBlockPlacement.cpp +llvm/lib/Target/ARM/ARMBranchTargets.cpp llvm/lib/Target/ARM/ARMCallingConv.h llvm/lib/Target/ARM/ARMHazardRecognizer.h llvm/lib/Target/ARM/ARMInstrInfo.cpp llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp llvm/lib/Target/ARM/ARMTargetMachine.h +llvm/lib/Target/ARM/ARMTargetObjectFile.h llvm/lib/Target/ARM/MVETailPredUtils.h llvm/lib/Target/ARM/MVEVPTBlockPass.cpp llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -5835,6 +6029,7 @@ llvm/lib/Target/BPF/BPFCORE.h llvm/lib/Target/BPF/BPFFrameLowering.cpp llvm/lib/Target/BPF/BPFIRPeephole.cpp llvm/lib/Target/BPF/BPFMCInstLower.cpp +llvm/lib/Target/BPF/BPFPreserveDIType.cpp llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp llvm/lib/Target/BPF/BPFSubtarget.cpp llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp @@ -5878,6 +6073,8 @@ llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.h llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.h +llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp +llvm/lib/Target/Hexagon/HexagonMachineScheduler.h llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.h @@ -5975,6 +6172,7 @@ llvm/lib/Target/Mips/Mips16RegisterInfo.h llvm/lib/Target/Mips/MipsCallLowering.h llvm/lib/Target/Mips/MipsLegalizerInfo.h llvm/lib/Target/Mips/MipsMCInstLower.h +llvm/lib/Target/Mips/MipsMulMulBugPass.cpp llvm/lib/Target/Mips/MipsOptionRecord.h llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp llvm/lib/Target/Mips/MipsRegisterBankInfo.h @@ -6066,6 +6264,7 @@ llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h llvm/lib/Target/Sparc/LeonPasses.h llvm/lib/Target/Sparc/SparcTargetObjectFile.cpp +llvm/lib/Target/Sparc/SparcTargetObjectFile.h llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.cpp @@ -6111,6 +6310,7 @@ llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.h llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp +llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h @@ -6235,7 +6435,6 @@ llvm/lib/Transforms/IPO/Annotation2Metadata.cpp llvm/lib/Transforms/IPO/Attributor.cpp llvm/lib/Transforms/IPO/AttributorAttributes.cpp llvm/lib/Transforms/IPO/ElimAvailExtern.cpp -llvm/lib/Transforms/IPO/Inliner.cpp llvm/lib/Transforms/IPO/ModuleInliner.cpp llvm/lib/Transforms/IPO/OpenMPOpt.cpp llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -6250,6 +6449,7 @@ llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp llvm/lib/Transforms/Scalar/ConstraintElimination.cpp llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp llvm/lib/Transforms/Scalar/DivRemPairs.cpp +llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp llvm/lib/Transforms/Scalar/GVNHoist.cpp llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp llvm/lib/Transforms/Scalar/IVUsersPrinter.cpp @@ -6264,6 +6464,7 @@ llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp llvm/lib/Transforms/Utils/CallGraphUpdater.cpp llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp +llvm/lib/Transforms/Utils/CodeLayout.cpp llvm/lib/Transforms/Utils/CodeMoverUtils.cpp llvm/lib/Transforms/Utils/EscapeEnumerator.cpp llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -6273,6 +6474,7 @@ llvm/lib/Transforms/Utils/HelloWorld.cpp llvm/lib/Transforms/Utils/InjectTLIMappings.cpp llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp llvm/lib/Transforms/Utils/MatrixUtils.cpp +llvm/lib/Transforms/Utils/SampleProfileInference.cpp llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp llvm/lib/Transforms/Utils/SCCPSolver.cpp llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp @@ -6341,6 +6543,7 @@ llvm/tools/llvm-cov/TestingSupport.cpp llvm/tools/llvm-cxxdump/Error.cpp llvm/tools/llvm-cxxdump/llvm-cxxdump.h llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp +llvm/tools/llvm-debuginfod-find/llvm-debuginfod-find.cpp llvm/tools/llvm-dlang-demangle-fuzzer/DummyDemanglerFuzzer.cpp llvm/tools/llvm-dlang-demangle-fuzzer/llvm-dlang-demangle-fuzzer.cpp llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -6491,7 +6694,6 @@ llvm/tools/llvm-profgen/CSPreInliner.h llvm/tools/llvm-profgen/llvm-profgen.cpp llvm/tools/llvm-profgen/PerfReader.cpp llvm/tools/llvm-profgen/PerfReader.h -llvm/tools/llvm-profgen/ProfileGenerator.cpp llvm/tools/llvm-profgen/ProfileGenerator.h llvm/tools/llvm-rc/ResourceScriptCppFilter.cpp llvm/tools/llvm-rc/ResourceScriptCppFilter.h @@ -6631,7 +6833,9 @@ llvm/unittests/Analysis/DomTreeUpdaterTest.cpp llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp llvm/unittests/Analysis/GlobalsModRefTest.cpp llvm/unittests/Analysis/InlineCostTest.cpp +llvm/unittests/Analysis/MLModelRunnerTest.cpp llvm/unittests/Analysis/SparsePropagation.cpp +llvm/unittests/Analysis/TargetLibraryInfoTest.cpp llvm/unittests/Analysis/ValueLatticeTest.cpp llvm/unittests/AsmParser/AsmParserTest.cpp llvm/unittests/BinaryFormat/DwarfTest.cpp @@ -6646,6 +6850,7 @@ llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp llvm/unittests/CodeGen/DIETest.cpp llvm/unittests/CodeGen/LexicalScopesTest.cpp llvm/unittests/CodeGen/MachineInstrBundleIteratorTest.cpp +llvm/unittests/CodeGen/RegAllocScoreTest.cpp llvm/unittests/CodeGen/SelectionDAGAddressAnalysisTest.cpp llvm/unittests/CodeGen/TestAsmPrinter.cpp llvm/unittests/CodeGen/TestAsmPrinter.h @@ -6671,6 +6876,8 @@ llvm/unittests/DebugInfo/PDB/NativeSessionTest.cpp llvm/unittests/DebugInfo/PDB/NativeSymbolReuseTest.cpp llvm/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp llvm/unittests/DebugInfo/PDB/Inputs/SimpleTest.cpp +llvm/unittests/Debuginfod/DebuginfodTests.cpp +llvm/unittests/Debuginfod/HTTPClientTests.cpp llvm/unittests/Demangle/DemangleTest.cpp llvm/unittests/Demangle/DLangDemangleTest.cpp llvm/unittests/Demangle/OutputBufferTest.cpp @@ -6693,6 +6900,7 @@ llvm/unittests/ExecutionEngine/Orc/TaskDispatchTest.cpp llvm/unittests/ExecutionEngine/Orc/ThreadSafeModuleTest.cpp llvm/unittests/Frontend/OpenACCTest.cpp llvm/unittests/Frontend/OpenMPContextTest.cpp +llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp llvm/unittests/Frontend/OpenMPParsingTest.cpp llvm/unittests/InterfaceStub/ELFYAMLTest.cpp llvm/unittests/IR/DemandedBitsTest.cpp @@ -6749,6 +6957,7 @@ llvm/unittests/Support/raw_fd_stream_test.cpp llvm/unittests/Support/raw_sha1_ostream_test.cpp llvm/unittests/Support/RISCVAttributeParserTest.cpp llvm/unittests/Support/ScaledNumberTest.cpp +llvm/unittests/Support/ScopedPrinterTest.cpp llvm/unittests/Support/SHA256.cpp llvm/unittests/Support/SuffixTreeTest.cpp llvm/unittests/Support/SymbolRemappingReaderTest.cpp @@ -6808,9 +7017,6 @@ llvm/unittests/XRay/FDRRecordPrinterTest.cpp llvm/unittests/XRay/FDRRecordsTest.cpp llvm/unittests/XRay/FDRTraceWriterTest.cpp llvm/unittests/XRay/ProfileTest.cpp -llvm/utils/benchmark/cmake/gnu_posix_regex.cpp -llvm/utils/benchmark/cmake/posix_regex.cpp -llvm/utils/benchmark/cmake/thread_safety_attributes.cpp llvm/utils/not/not.cpp llvm/utils/TableGen/CodeBeadsGen.cpp llvm/utils/TableGen/CompressInstEmitter.cpp @@ -6924,7 +7130,6 @@ mlir/include/mlir/Analysis/AffineStructures.h mlir/include/mlir/Analysis/BufferViewFlowAnalysis.h mlir/include/mlir/Analysis/DataFlowAnalysis.h mlir/include/mlir/Analysis/DataLayoutAnalysis.h -mlir/include/mlir/Analysis/LinearTransform.h mlir/include/mlir/Analysis/Liveness.h mlir/include/mlir/Analysis/LoopAnalysis.h mlir/include/mlir/Analysis/NestedMatcher.h @@ -6934,8 +7139,11 @@ mlir/include/mlir/Analysis/SliceAnalysis.h mlir/include/mlir/Analysis/Utils.h mlir/include/mlir/Analysis/AliasAnalysis/LocalAliasAnalysis.h mlir/include/mlir/Analysis/Presburger/Fraction.h +mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h +mlir/include/mlir/Analysis/Presburger/LinearTransform.h mlir/include/mlir/Analysis/Presburger/Matrix.h mlir/include/mlir/Analysis/Presburger/Simplex.h +mlir/include/mlir/Analysis/Presburger/Utils.h mlir/include/mlir/CAPI/AffineExpr.h mlir/include/mlir/CAPI/AffineMap.h mlir/include/mlir/CAPI/Diagnostics.h @@ -6954,10 +7162,12 @@ mlir/include/mlir/Conversion/ArithmeticToLLVM/ArithmeticToLLVM.h mlir/include/mlir/Conversion/ArithmeticToSPIRV/ArithmeticToSPIRV.h mlir/include/mlir/Conversion/ArmNeon2dToIntr/ArmNeon2dToIntr.h mlir/include/mlir/Conversion/AsyncToLLVM/AsyncToLLVM.h +mlir/include/mlir/Conversion/BufferizationToMemRef/BufferizationToMemRef.h mlir/include/mlir/Conversion/ComplexToLLVM/ComplexToLLVM.h mlir/include/mlir/Conversion/ComplexToStandard/ComplexToStandard.h mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h +mlir/include/mlir/Conversion/GPUToROCDL/Runtimes.h mlir/include/mlir/Conversion/GPUToSPIRV/GPUToSPIRV.h mlir/include/mlir/Conversion/GPUToSPIRV/GPUToSPIRVPass.h mlir/include/mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h @@ -7021,10 +7231,13 @@ mlir/include/mlir/Dialect/ArmNeon/ArmNeonDialect.h mlir/include/mlir/Dialect/ArmSVE/ArmSVEDialect.h mlir/include/mlir/Dialect/ArmSVE/Transforms.h mlir/include/mlir/Dialect/Async/Passes.h +mlir/include/mlir/Dialect/Async/Transforms.h mlir/include/mlir/Dialect/Async/IR/Async.h mlir/include/mlir/Dialect/Async/IR/AsyncTypes.h mlir/include/mlir/Dialect/Bufferization/IR/AllocationOpInterface.h mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h +mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h +mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h mlir/include/mlir/Dialect/Complex/IR/Complex.h mlir/include/mlir/Dialect/DLTI/DLTI.h mlir/include/mlir/Dialect/DLTI/Traits.h @@ -7036,15 +7249,17 @@ mlir/include/mlir/Dialect/GPU/Passes.h mlir/include/mlir/Dialect/GPU/Utils.h mlir/include/mlir/Dialect/Linalg/Passes.h mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h +mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/AffineInterfaceImpl.h mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.h -mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h +mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.h mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.h mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.h +mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.h mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.h mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/VectorInterfaceImpl.h -mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h mlir/include/mlir/Dialect/Linalg/IR/Linalg.h +mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h @@ -7075,6 +7290,7 @@ mlir/include/mlir/Dialect/Quant/QuantizeUtils.h mlir/include/mlir/Dialect/Quant/QuantOps.h mlir/include/mlir/Dialect/Quant/QuantTypes.h mlir/include/mlir/Dialect/Quant/UniformSupport.h +mlir/include/mlir/Dialect/SCF/AffineCanonicalizationUtils.h mlir/include/mlir/Dialect/SCF/Passes.h mlir/include/mlir/Dialect/SCF/SCF.h mlir/include/mlir/Dialect/SCF/Transforms.h @@ -7103,6 +7319,7 @@ mlir/include/mlir/Dialect/StandardOps/Transforms/FuncConversions.h mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h mlir/include/mlir/Dialect/StandardOps/Utils/Utils.h mlir/include/mlir/Dialect/Tensor/IR/Tensor.h +mlir/include/mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h mlir/include/mlir/Dialect/Tosa/IR/TosaOps.h mlir/include/mlir/Dialect/Tosa/Transforms/PassDetail.h @@ -7187,6 +7404,7 @@ mlir/include/mlir/TableGen/Argument.h mlir/include/mlir/TableGen/Attribute.h mlir/include/mlir/TableGen/AttrOrTypeDef.h mlir/include/mlir/TableGen/Builder.h +mlir/include/mlir/TableGen/Class.h mlir/include/mlir/TableGen/Constraint.h mlir/include/mlir/TableGen/Dialect.h mlir/include/mlir/TableGen/GenInfo.h @@ -7223,7 +7441,9 @@ mlir/include/mlir/Target/SPIRV/Serialization.h mlir/include/mlir/Target/SPIRV/SPIRVBinaryUtils.h mlir/include/mlir/Tools/mlir-lsp-server/MlirLspServerMain.h mlir/include/mlir/Tools/mlir-reduce/MlirReduceMain.h -mlir/include/mlir/Transforms/Bufferize.h +mlir/include/mlir/Tools/PDLL/AST/Context.h +mlir/include/mlir/Tools/PDLL/AST/Diagnostic.h +mlir/include/mlir/Tools/PDLL/Parser/Parser.h mlir/include/mlir/Transforms/BufferUtils.h mlir/include/mlir/Transforms/DialectConversion.h mlir/include/mlir/Transforms/GreedyPatternRewriteDriver.h @@ -7266,7 +7486,6 @@ mlir/lib/Analysis/BufferViewFlowAnalysis.cpp mlir/lib/Analysis/CallGraph.cpp mlir/lib/Analysis/DataFlowAnalysis.cpp mlir/lib/Analysis/DataLayoutAnalysis.cpp -mlir/lib/Analysis/LinearTransform.cpp mlir/lib/Analysis/Liveness.cpp mlir/lib/Analysis/LoopAnalysis.cpp mlir/lib/Analysis/NestedMatcher.cpp @@ -7275,8 +7494,11 @@ mlir/lib/Analysis/PresburgerSet.cpp mlir/lib/Analysis/SliceAnalysis.cpp mlir/lib/Analysis/Utils.cpp mlir/lib/Analysis/AliasAnalysis/LocalAliasAnalysis.cpp +mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp +mlir/lib/Analysis/Presburger/LinearTransform.cpp mlir/lib/Analysis/Presburger/Matrix.cpp mlir/lib/Analysis/Presburger/Simplex.cpp +mlir/lib/Analysis/Presburger/Utils.cpp mlir/lib/Bindings/Python/AllPassesRegistration.cpp mlir/lib/Bindings/Python/AsyncPasses.cpp mlir/lib/Bindings/Python/DialectLinalg.cpp @@ -7287,7 +7509,6 @@ mlir/lib/Bindings/Python/Globals.h mlir/lib/Bindings/Python/GPUPasses.cpp mlir/lib/Bindings/Python/IRAffine.cpp mlir/lib/Bindings/Python/IRAttributes.cpp -mlir/lib/Bindings/Python/IRCore.cpp mlir/lib/Bindings/Python/IRInterfaces.cpp mlir/lib/Bindings/Python/IRModule.cpp mlir/lib/Bindings/Python/IRModule.h @@ -7350,6 +7571,7 @@ mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp +mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.cpp mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp mlir/lib/Conversion/LLVMCommon/ConversionTarget.cpp @@ -7376,6 +7598,8 @@ mlir/lib/Conversion/PDLToPDLInterp/Predicate.cpp mlir/lib/Conversion/PDLToPDLInterp/Predicate.h mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.h +mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.cpp +mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.h mlir/lib/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.cpp mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp @@ -7393,6 +7617,8 @@ mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRV.cpp mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRVPass.cpp mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp +mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamedPass.cpp mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp mlir/lib/Conversion/TosaToSCF/TosaToSCF.cpp mlir/lib/Conversion/TosaToSCF/TosaToSCFPass.cpp @@ -7439,6 +7665,9 @@ mlir/lib/Dialect/Async/Transforms/PassDetail.h mlir/lib/Dialect/Bufferization/IR/AllocationOpInterface.cpp mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp +mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp +mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +mlir/lib/Dialect/Bufferization/Transforms/PassDetail.h mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp mlir/lib/Dialect/Complex/IR/ComplexOps.cpp mlir/lib/Dialect/DLTI/DLTI.cpp @@ -7453,16 +7682,18 @@ mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp mlir/lib/Dialect/GPU/Transforms/PassDetail.h mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp +mlir/lib/Dialect/Linalg/ComprehensiveBufferize/AffineInterfaceImpl.cpp mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp +mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp mlir/lib/Dialect/Linalg/ComprehensiveBufferize/VectorInterfaceImpl.cpp +mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp -mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp @@ -7480,6 +7711,7 @@ mlir/lib/Dialect/Linalg/Transforms/InlineScalarOperands.cpp mlir/lib/Dialect/Linalg/Transforms/Interchange.cpp mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +mlir/lib/Dialect/Linalg/Transforms/NamedOpConversions.cpp mlir/lib/Dialect/Linalg/Transforms/PassDetail.h mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -7521,6 +7753,7 @@ mlir/lib/Dialect/Quant/Utils/FakeQuantSupport.cpp mlir/lib/Dialect/Quant/Utils/QuantizeUtils.cpp mlir/lib/Dialect/Quant/Utils/UniformSupport.cpp mlir/lib/Dialect/SCF/SCF.cpp +mlir/lib/Dialect/SCF/Transforms/AffineCanonicalizationUtils.cpp mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp mlir/lib/Dialect/SCF/Transforms/ForToWhile.cpp mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp @@ -7566,12 +7799,15 @@ mlir/lib/Dialect/StandardOps/Transforms/PassDetail.h mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp mlir/lib/Dialect/StandardOps/Utils/Utils.cpp mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp +mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp mlir/lib/Dialect/Tensor/IR/TensorOps.cpp mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp mlir/lib/Dialect/Tensor/Transforms/PassDetail.h +mlir/lib/Dialect/Tosa/IR/TosaOps.cpp mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeTransposeConv.cpp mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp mlir/lib/Dialect/Tosa/Transforms/TosaMakeBroadcastable.cpp +mlir/lib/Dialect/Tosa/Transforms/TosaOptimization.cpp mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp mlir/lib/Dialect/Utils/StaticValueUtils.cpp @@ -7579,9 +7815,10 @@ mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp mlir/lib/Dialect/Vector/VectorDropLeadUnitDim.cpp mlir/lib/Dialect/Vector/VectorInsertExtractStridedSliceRewritePatterns.cpp mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp +mlir/lib/Dialect/Vector/VectorOps.cpp mlir/lib/Dialect/Vector/VectorTransferOpTransforms.cpp mlir/lib/Dialect/Vector/VectorTransferPermutationMapRewritePatterns.cpp -mlir/lib/Dialect/Vector/VectorTransforms.cpp +mlir/lib/Dialect/Vector/VectorUnrollDistribute.cpp mlir/lib/Dialect/Vector/VectorUtils.cpp mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp @@ -7606,7 +7843,6 @@ mlir/lib/Interfaces/SideEffectInterfaces.cpp mlir/lib/Interfaces/TilingInterface.cpp mlir/lib/Interfaces/VectorInterfaces.cpp mlir/lib/Interfaces/ViewLikeInterface.cpp -mlir/lib/IR/AffineExpr.cpp mlir/lib/IR/AffineExprDetail.h mlir/lib/IR/AffineMap.cpp mlir/lib/IR/AffineMapDetail.h @@ -7659,7 +7895,6 @@ mlir/lib/Pass/Pass.cpp mlir/lib/Pass/PassCrashRecovery.cpp mlir/lib/Pass/PassDetail.h mlir/lib/Pass/PassManagerOptions.cpp -mlir/lib/Pass/PassRegistry.cpp mlir/lib/Pass/PassStatistics.cpp mlir/lib/Pass/PassTiming.cpp mlir/lib/Reducer/OptReductionPass.cpp @@ -7730,13 +7965,18 @@ mlir/lib/Tools/mlir-lsp-server/lsp/Protocol.cpp mlir/lib/Tools/mlir-lsp-server/lsp/Transport.cpp mlir/lib/Tools/mlir-lsp-server/lsp/Transport.h mlir/lib/Tools/mlir-reduce/MlirReduceMain.cpp -mlir/lib/Transforms/BufferDeallocation.cpp -mlir/lib/Transforms/Bufferize.cpp +mlir/lib/Tools/PDLL/AST/Context.cpp +mlir/lib/Tools/PDLL/AST/Diagnostic.cpp +mlir/lib/Tools/PDLL/AST/NodePrinter.cpp +mlir/lib/Tools/PDLL/AST/TypeDetail.h +mlir/lib/Tools/PDLL/AST/Types.cpp +mlir/lib/Tools/PDLL/Parser/Parser.cpp mlir/lib/Transforms/BufferOptimizations.cpp mlir/lib/Transforms/BufferResultsToOutParams.cpp mlir/lib/Transforms/BufferUtils.cpp mlir/lib/Transforms/Canonicalizer.cpp mlir/lib/Transforms/CSE.cpp +mlir/lib/Transforms/Inliner.cpp mlir/lib/Transforms/LocationSnapshot.cpp mlir/lib/Transforms/LoopCoalescing.cpp mlir/lib/Transforms/LoopFusion.cpp @@ -7749,6 +7989,7 @@ mlir/lib/Transforms/StripDebugInfo.cpp mlir/lib/Transforms/SymbolDCE.cpp mlir/lib/Transforms/Utils/DialectConversion.cpp mlir/lib/Transforms/Utils/FoldUtils.cpp +mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp mlir/lib/Transforms/Utils/InliningUtils.cpp mlir/lib/Transforms/Utils/LoopFusionUtils.cpp mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -7758,6 +7999,7 @@ mlir/lib/Translation/Translation.cpp mlir/tools/mlir-cpu-runner/mlir-cpu-runner.cpp mlir/tools/mlir-lsp-server/mlir-lsp-server.cpp mlir/tools/mlir-opt/mlir-opt.cpp +mlir/tools/mlir-pdll/mlir-pdll.cpp mlir/tools/mlir-reduce/mlir-reduce.cpp mlir/tools/mlir-shlib/mlir-shlib.cpp mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp @@ -7774,6 +8016,8 @@ mlir/tools/mlir-tblgen/FormatGen.h mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp mlir/tools/mlir-tblgen/mlir-tblgen.cpp +mlir/tools/mlir-tblgen/OpClass.cpp +mlir/tools/mlir-tblgen/OpClass.h mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp mlir/tools/mlir-tblgen/OpDocGen.cpp mlir/tools/mlir-tblgen/OpFormatGen.h @@ -7796,12 +8040,15 @@ mlir/unittests/Analysis/AffineStructuresParser.cpp mlir/unittests/Analysis/AffineStructuresParser.h mlir/unittests/Analysis/AffineStructuresParserTest.cpp mlir/unittests/Analysis/AffineStructuresTest.cpp -mlir/unittests/Analysis/LinearTransformTest.cpp mlir/unittests/Analysis/PresburgerSetTest.cpp +mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp +mlir/unittests/Analysis/Presburger/LinearTransformTest.cpp mlir/unittests/Analysis/Presburger/MatrixTest.cpp mlir/unittests/Analysis/Presburger/SimplexTest.cpp +mlir/unittests/Conversion/PDLToPDLInterp/RootOrderingTest.cpp mlir/unittests/Dialect/BroadcastShapeTest.cpp mlir/unittests/Dialect/Quant/QuantizationUtilsTest.cpp +mlir/unittests/Dialect/SCF/SCFOps.cpp mlir/unittests/Dialect/SparseTensor/MergerTest.cpp mlir/unittests/Dialect/SPIRV/DeserializationTest.cpp mlir/unittests/Dialect/SPIRV/SerializationTest.cpp @@ -7814,6 +8061,7 @@ mlir/unittests/IR/AttributeTest.cpp mlir/unittests/IR/DialectTest.cpp mlir/unittests/IR/InterfaceAttachmentTest.cpp mlir/unittests/IR/OperationSupportTest.cpp +mlir/unittests/IR/PatternMatchTest.cpp mlir/unittests/IR/ShapedTypeTest.cpp mlir/unittests/IR/SubElementInterfaceTest.cpp mlir/unittests/Pass/AnalysisManagerTest.cpp @@ -7826,6 +8074,7 @@ mlir/unittests/Support/StorageUniquerTest.cpp mlir/unittests/TableGen/EnumsGenTest.cpp mlir/unittests/TableGen/FormatTest.cpp mlir/unittests/TableGen/PassGenTest.cpp +mlir/unittests/Transforms/Canonicalizer.cpp mlir/unittests/Transforms/DialectConversion.cpp openmp/libompd/src/Debug.h openmp/libompd/src/omp-debug.cpp @@ -7845,7 +8094,6 @@ openmp/libomptarget/DeviceRTL/include/Synchronization.h openmp/libomptarget/DeviceRTL/include/Types.h openmp/libomptarget/DeviceRTL/include/Utils.h openmp/libomptarget/DeviceRTL/src/Configuration.cpp -openmp/libomptarget/DeviceRTL/src/Debug.cpp openmp/libomptarget/DeviceRTL/src/Kernel.cpp openmp/libomptarget/DeviceRTL/src/Mapping.cpp openmp/libomptarget/DeviceRTL/src/Misc.cpp @@ -7871,14 +8119,15 @@ openmp/libomptarget/deviceRTLs/common/src/shuffle.cpp openmp/libomptarget/deviceRTLs/nvptx/src/nvptx_interface.h openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h openmp/libomptarget/include/Debug.h +openmp/libomptarget/include/device.h openmp/libomptarget/include/DeviceEnvironment.h openmp/libomptarget/include/dlwrap.h openmp/libomptarget/include/omptarget.h openmp/libomptarget/include/omptargetplugin.h +openmp/libomptarget/include/rtl.h openmp/libomptarget/include/SourceInfo.h openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h -openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa_ext_amd.h openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h openmp/libomptarget/plugins/amdgpu/impl/hsa_api.h @@ -7906,11 +8155,9 @@ openmp/libomptarget/plugins/remote/src/Client.cpp openmp/libomptarget/plugins/remote/src/Client.h openmp/libomptarget/plugins/ve/src/rtl.cpp openmp/libomptarget/src/api.cpp -openmp/libomptarget/src/device.h openmp/libomptarget/src/interface.cpp openmp/libomptarget/src/private.h openmp/libomptarget/src/rtl.cpp -openmp/libomptarget/src/rtl.h openmp/libomptarget/tools/deviceinfo/llvm-omp-device-info.cpp openmp/runtime/doc/doxygen/libomp_interface.h openmp/runtime/src/extractExternal.cpp @@ -8134,5 +8381,27 @@ pstl/include/pstl/internal/omp/parallel_scan.h pstl/include/pstl/internal/omp/parallel_stable_partial_sort.h pstl/include/pstl/internal/omp/parallel_transform_scan.h pstl/include/pstl/internal/omp/util.h +third-party/benchmark/cmake/thread_safety_attributes.cpp +third-party/benchmark/src/arraysize.h +third-party/benchmark/src/benchmark_api_internal.h +third-party/benchmark/src/benchmark_register.h +third-party/benchmark/src/benchmark_runner.h +third-party/benchmark/src/check.h +third-party/benchmark/src/colorprint.h +third-party/benchmark/src/commandlineflags.h +third-party/benchmark/src/complexity.h +third-party/benchmark/src/counter.h +third-party/benchmark/src/cycleclock.h +third-party/benchmark/src/internal_macros.h +third-party/benchmark/src/log.h +third-party/benchmark/src/mutex.h +third-party/benchmark/src/perf_counters.h +third-party/benchmark/src/re.h +third-party/benchmark/src/sleep.h +third-party/benchmark/src/statistics.h +third-party/benchmark/src/string_util.h +third-party/benchmark/src/thread_manager.h +third-party/benchmark/src/thread_timer.h +third-party/benchmark/src/timers.h utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h From 730414b3419e0b0124fa1f683ca904640727d455 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 3 Jan 2022 14:11:56 +0100 Subject: [PATCH 417/992] [CodeExtractor] Remove unnecessary explicit attribute handling (NFC) The nounwind and uwtable attributes will get handled as part of the loop below as well, there is no need to special-case them here. --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 4669f4bb4a18..f577643f81b0 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -871,13 +871,6 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, Function *newFunction = Function::Create( funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), oldFunction->getName() + "." + SuffixToUse, M); - // If the old function is no-throw, so is the new one. - if (oldFunction->doesNotThrow()) - newFunction->setDoesNotThrow(); - - // Inherit the uwtable attribute if we need to. - if (oldFunction->hasUWTable()) - newFunction->setHasUWTable(); // Inherit all of the target dependent attributes and white-listed // target independent attributes. From ca271f4ef5a2a4bf115ac11ada70bbd7c737d77d Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Wed, 29 Dec 2021 13:48:16 +0100 Subject: [PATCH 418/992] [lldb-server/linux] Fix waitpid for multithreaded forks The lldb-server code is currently set up in a way that each NativeProcess instance does its own waitpid handling. This works fine for BSDs, where the code can do a waitpid(process_id), and get information for all threads in that process. The situation is trickier on linux, because waitpid(pid) will only return information for the main thread of the process (one whose tid == pid). For this reason the linux code does a waitpid(-1), to get information for all threads. This was fine while we were supporting just a single process, but becomes a problem when we have multiple processes as they end up stealing each others events. There are two possible solutions to this problem: - call waitpid(-1) centrally, and then dispatch the events to the appropriate process - have each process call waitpid(tid) for all the threads it manages This patch implements the second approach. Besides fitting better into the existing design, it also has the added benefit of ensuring predictable ordering for thread/process creation events (which come in pairs -- one for the parent and one for the child). The first approach OTOH, would make this ordering even more complicated since we would have to keep the half-threads hanging in mid-air until we find the process we should attach them to. The downside to this approach is an increased number of syscalls (one waitpid for each thread), but I think we're pretty far from optimizing things like this, and so the cleanliness of the design is worth it. The included test reproduces the circumstances which should demonstrate the bug (which manifests as a hung test), but I have not been able to get it to fail. The only place I've seen this failure modes are very rare hangs in the thread sanitizer tests (tsan forks an addr2line process to produce its error messages). Differential Revision: https://reviews.llvm.org/D116372 --- .../Process/Linux/NativeProcessLinux.cpp | 188 +++++++----------- .../Process/Linux/NativeProcessLinux.h | 21 +- .../tools/lldb-server/TestGdbRemoteFork.py | 34 ++++ 3 files changed, 109 insertions(+), 134 deletions(-) diff --git a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp index 8f5496d9f4e5..4a77e791343c 100644 --- a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp @@ -426,22 +426,24 @@ Status NativeProcessLinux::SetDefaultPtraceOpts(lldb::pid_t pid) { } // Handles all waitpid events from the inferior process. -void NativeProcessLinux::MonitorCallback(lldb::pid_t pid, WaitStatus status) { +void NativeProcessLinux::MonitorCallback(NativeThreadLinux &thread, + WaitStatus status) { Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS)); // Certain activities differ based on whether the pid is the tid of the main // thread. - const bool is_main_thread = (pid == GetID()); + const bool is_main_thread = (thread.GetID() == GetID()); // Handle when the thread exits. if (status.type == WaitStatus::Exit || status.type == WaitStatus::Signal) { LLDB_LOG(log, "got exit status({0}) , tid = {1} ({2} main thread), process " "state = {3}", - status, pid, is_main_thread ? "is" : "is not", GetState()); + status, thread.GetID(), is_main_thread ? "is" : "is not", + GetState()); // This is a thread that exited. Ensure we're not tracking it anymore. - StopTrackingThread(pid); + StopTrackingThread(thread); if (is_main_thread) { // The main thread exited. We're done monitoring. Report to delegate. @@ -454,37 +456,15 @@ void NativeProcessLinux::MonitorCallback(lldb::pid_t pid, WaitStatus status) { } siginfo_t info; - const auto info_err = GetSignalInfo(pid, &info); - auto thread_sp = GetThreadByID(pid); - - if (!thread_sp) { - // Normally, the only situation when we cannot find the thread is if we - // have just received a new thread notification. This is indicated by - // GetSignalInfo() returning si_code == SI_USER and si_pid == 0 - LLDB_LOG(log, "received notification about an unknown tid {0}.", pid); - - if (info_err.Fail()) { - LLDB_LOG(log, - "(tid {0}) GetSignalInfo failed ({1}). " - "Ingoring this notification.", - pid, info_err); - return; - } - - LLDB_LOG(log, "tid {0}, si_code: {1}, si_pid: {2}", pid, info.si_code, - info.si_pid); - - MonitorClone(pid, llvm::None); - return; - } + const auto info_err = GetSignalInfo(thread.GetID(), &info); // Get details on the signal raised. if (info_err.Success()) { // We have retrieved the signal info. Dispatch appropriately. if (info.si_signo == SIGTRAP) - MonitorSIGTRAP(info, *thread_sp); + MonitorSIGTRAP(info, thread); else - MonitorSignal(info, *thread_sp); + MonitorSignal(info, thread); } else { if (info_err.GetError() == EINVAL) { // This is a group stop reception for this tid. We can reach here if we @@ -500,9 +480,8 @@ void NativeProcessLinux::MonitorCallback(lldb::pid_t pid, WaitStatus status) { "received a group stop for pid {0} tid {1}. Transparent " "handling of group stops not supported, resuming the " "thread.", - GetID(), pid); - ResumeThread(*thread_sp, thread_sp->GetState(), - LLDB_INVALID_SIGNAL_NUMBER); + GetID(), thread.GetID()); + ResumeThread(thread, thread.GetState(), LLDB_INVALID_SIGNAL_NUMBER); } else { // ptrace(GETSIGINFO) failed (but not due to group-stop). @@ -512,12 +491,12 @@ void NativeProcessLinux::MonitorCallback(lldb::pid_t pid, WaitStatus status) { // Stop tracking the metadata for the thread since it's entirely off the // system now. - const bool thread_found = StopTrackingThread(pid); + StopTrackingThread(thread); LLDB_LOG(log, "GetSignalInfo failed: {0}, tid = {1}, status = {2}, " - "status = {3}, main_thread = {4}, thread_found: {5}", - info_err, pid, status, status, is_main_thread, thread_found); + "status = {3}, main_thread = {4}", + info_err, thread.GetID(), status, status, is_main_thread); if (is_main_thread) { // Notify the delegate - our process is not available but appears to @@ -532,7 +511,7 @@ void NativeProcessLinux::MonitorCallback(lldb::pid_t pid, WaitStatus status) { "pid {0} tid {1} non-main thread exit occurred, didn't " "tell delegate anything since thread disappeared out " "from underneath us", - GetID(), pid); + GetID(), thread.GetID()); } } } @@ -549,29 +528,14 @@ void NativeProcessLinux::WaitForCloneNotification(::pid_t pid) { pid); ::pid_t wait_pid = llvm::sys::RetryAfterSignal(-1, ::waitpid, pid, &status, __WALL); - // Since we are waiting on a specific pid, this must be the creation event. - // But let's do some checks just in case. - if (wait_pid != pid) { - LLDB_LOG(log, - "waiting for pid {0} failed. Assuming the pid has " - "disappeared in the meantime", - pid); - // The only way I know of this could happen is if the whole process was - // SIGKILLed in the mean time. In any case, we can't do anything about that - // now. - return; - } - if (WIFEXITED(status)) { - LLDB_LOG(log, - "waiting for pid {0} returned an 'exited' event. Not " - "tracking it.", - pid); - // Also a very improbable event. - m_pending_pid_map.erase(pid); - return; - } - MonitorClone(pid, llvm::None); + // It's theoretically possible to get other events if the entire process was + // SIGKILLed before we got a chance to check this. In that case, we'll just + // clean everything up when we get the process exit event. + + LLDB_LOG(log, + "waitpid({0}, &status, __WALL) => {1} (errno: {2}, status = {3})", + pid, wait_pid, errno, WaitStatus::Decode(status)); } void NativeProcessLinux::MonitorSIGTRAP(const siginfo_t &info, @@ -598,8 +562,7 @@ void NativeProcessLinux::MonitorSIGTRAP(const siginfo_t &info, thread.GetID()); ResumeThread(thread, thread.GetState(), LLDB_INVALID_SIGNAL_NUMBER); } else { - if (!MonitorClone(event_message, {{(info.si_code >> 8), thread.GetID()}})) - WaitForCloneNotification(event_message); + MonitorClone(thread, event_message, info.si_code >> 8); } break; @@ -886,36 +849,15 @@ void NativeProcessLinux::MonitorSignal(const siginfo_t &info, StopRunningThreads(thread.GetID()); } -bool NativeProcessLinux::MonitorClone( - lldb::pid_t child_pid, - llvm::Optional clone_info) { +bool NativeProcessLinux::MonitorClone(NativeThreadLinux &parent, + lldb::pid_t child_pid, int event) { Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS)); - LLDB_LOG(log, "clone, child_pid={0}, clone info?={1}", child_pid, - clone_info.hasValue()); + LLDB_LOG(log, "parent_tid={0}, child_pid={1}, event={2}", parent.GetID(), + child_pid, event); - auto find_it = m_pending_pid_map.find(child_pid); - if (find_it == m_pending_pid_map.end()) { - // not in the map, so this is the first signal for the PID - m_pending_pid_map.insert({child_pid, clone_info}); - return false; - } - m_pending_pid_map.erase(find_it); - - // second signal for the pid - assert(clone_info.hasValue() != find_it->second.hasValue()); - if (!clone_info) { - // child signal does not indicate the event, so grab the one stored - // earlier - clone_info = find_it->second; - } - - LLDB_LOG(log, "second signal for child_pid={0}, parent_tid={1}, event={2}", - child_pid, clone_info->parent_tid, clone_info->event); + WaitForCloneNotification(child_pid); - auto *parent_thread = GetThreadByID(clone_info->parent_tid); - assert(parent_thread); - - switch (clone_info->event) { + switch (event) { case PTRACE_EVENT_CLONE: { // PTRACE_EVENT_CLONE can either mean a new thread or a new process. // Try to grab the new process' PGID to figure out which one it is. @@ -930,15 +872,14 @@ bool NativeProcessLinux::MonitorClone( ThreadWasCreated(child_thread); // Resume the parent. - ResumeThread(*parent_thread, parent_thread->GetState(), - LLDB_INVALID_SIGNAL_NUMBER); + ResumeThread(parent, parent.GetState(), LLDB_INVALID_SIGNAL_NUMBER); break; } } LLVM_FALLTHROUGH; case PTRACE_EVENT_FORK: case PTRACE_EVENT_VFORK: { - bool is_vfork = clone_info->event == PTRACE_EVENT_VFORK; + bool is_vfork = event == PTRACE_EVENT_VFORK; std::unique_ptr child_process{new NativeProcessLinux( static_cast<::pid_t>(child_pid), m_terminal_fd, m_delegate, m_arch, m_main_loop, {static_cast<::pid_t>(child_pid)})}; @@ -949,12 +890,11 @@ bool NativeProcessLinux::MonitorClone( if (bool(m_enabled_extensions & expected_ext)) { m_delegate.NewSubprocess(this, std::move(child_process)); // NB: non-vfork clone() is reported as fork - parent_thread->SetStoppedByFork(is_vfork, child_pid); - StopRunningThreads(parent_thread->GetID()); + parent.SetStoppedByFork(is_vfork, child_pid); + StopRunningThreads(parent.GetID()); } else { child_process->Detach(); - ResumeThread(*parent_thread, parent_thread->GetState(), - LLDB_INVALID_SIGNAL_NUMBER); + ResumeThread(parent, parent.GetState(), LLDB_INVALID_SIGNAL_NUMBER); } break; } @@ -1729,24 +1669,19 @@ bool NativeProcessLinux::HasThreadNoLock(lldb::tid_t thread_id) { return false; } -bool NativeProcessLinux::StopTrackingThread(lldb::tid_t thread_id) { +void NativeProcessLinux::StopTrackingThread(NativeThreadLinux &thread) { Log *const log = ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD); - LLDB_LOG(log, "tid: {0})", thread_id); - - bool found = false; - for (auto it = m_threads.begin(); it != m_threads.end(); ++it) { - if (*it && ((*it)->GetID() == thread_id)) { - m_threads.erase(it); - found = true; - break; - } - } + lldb::tid_t thread_id = thread.GetID(); + LLDB_LOG(log, "tid: {0}", thread_id); - if (found) - NotifyTracersOfThreadDestroyed(thread_id); + auto it = llvm::find_if(m_threads, [&](const auto &thread_up) { + return thread_up.get() == &thread; + }); + assert(it != m_threads.end()); + m_threads.erase(it); + NotifyTracersOfThreadDestroyed(thread_id); SignalIfAllThreadsStopped(); - return found; } Status NativeProcessLinux::NotifyTracersOfNewThread(lldb::tid_t tid) { @@ -1945,27 +1880,44 @@ void NativeProcessLinux::ThreadWasCreated(NativeThreadLinux &thread) { void NativeProcessLinux::SigchldHandler() { Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS)); - // Process all pending waitpid notifications. - while (true) { + + // Threads can appear or disappear as a result of event processing, so gather + // the events upfront. + llvm::DenseMap tid_events; + for (const auto &thread_up : m_threads) { int status = -1; - ::pid_t wait_pid = llvm::sys::RetryAfterSignal(-1, ::waitpid, -1, &status, - __WALL | __WNOTHREAD | WNOHANG); + ::pid_t wait_pid = + llvm::sys::RetryAfterSignal(-1, ::waitpid, thread_up->GetID(), &status, + __WALL | __WNOTHREAD | WNOHANG); if (wait_pid == 0) - break; // We are done. + continue; // Nothing to do for this thread. if (wait_pid == -1) { Status error(errno, eErrorTypePOSIX); - LLDB_LOG(log, "waitpid (-1, &status, _) failed: {0}", error); - break; + LLDB_LOG(log, "waitpid({0}, &status, _) failed: {1}", thread_up->GetID(), + error); + continue; } + assert(wait_pid == static_cast<::pid_t>(thread_up->GetID())); + WaitStatus wait_status = WaitStatus::Decode(status); - LLDB_LOG(log, "waitpid (-1, &status, _) => pid = {0}, status = {1}", - wait_pid, wait_status); + LLDB_LOG(log, "waitpid({0}) got status = {1}", thread_up->GetID(), + wait_status); + tid_events.try_emplace(thread_up->GetID(), wait_status); + } - MonitorCallback(wait_pid, wait_status); + for (auto &KV : tid_events) { + LLDB_LOG(log, "processing {0}({1}) ...", KV.first, KV.second); + NativeThreadLinux *thread = GetThreadByID(KV.first); + if (thread) { + MonitorCallback(*thread, KV.second); + } else { + // This can happen if one of the events is an main thread exit. + LLDB_LOG(log, "... but the thread has disappeared"); + } } } diff --git a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.h b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.h index 5d33c4753ca8..65f455a10968 100644 --- a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.h +++ b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.h @@ -164,7 +164,7 @@ class NativeProcessLinux : public NativeProcessELF, static Status SetDefaultPtraceOpts(const lldb::pid_t); - void MonitorCallback(lldb::pid_t pid, WaitStatus status); + void MonitorCallback(NativeThreadLinux &thread, WaitStatus status); void WaitForCloneNotification(::pid_t pid); @@ -180,7 +180,7 @@ class NativeProcessLinux : public NativeProcessELF, bool HasThreadNoLock(lldb::tid_t thread_id); - bool StopTrackingThread(lldb::tid_t thread_id); + void StopTrackingThread(NativeThreadLinux &thread); /// Create a new thread. /// @@ -243,20 +243,9 @@ class NativeProcessLinux : public NativeProcessELF, /// Manages Intel PT process and thread traces. IntelPTManager m_intel_pt_manager; - struct CloneInfo { - int event; - lldb::tid_t parent_tid; - }; - - // Map of child processes that have been signaled once, and we are - // waiting for the second signal. - llvm::DenseMap> m_pending_pid_map; - - // Handle a clone()-like event. If received by parent, clone_info contains - // additional info. Returns true if the event is handled, or false if it - // is pending second notification. - bool MonitorClone(lldb::pid_t child_pid, - llvm::Optional clone_info); + // Handle a clone()-like event. + bool MonitorClone(NativeThreadLinux &parent, lldb::pid_t child_pid, + int event); }; } // namespace process_linux diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteFork.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteFork.py index 8937621fb601..88ef72a06a6d 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteFork.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteFork.py @@ -6,6 +6,40 @@ class TestGdbRemoteFork(gdbremote_testcase.GdbRemoteTestCaseBase): mydir = TestBase.compute_mydir(__file__) + @add_test_categories(["fork"]) + def test_fork_multithreaded(self): + self.build() + self.prep_debug_monitor_and_inferior(inferior_args=["thread:new"]*2 + ["fork"]) + self.add_qSupported_packets(["multiprocess+", "fork-events+"]) + ret = self.expect_gdbremote_sequence() + self.assertIn("fork-events+", ret["qSupported_response"]) + self.reset_test_sequence() + + # continue and expect fork + fork_regex = "[$]T.*;fork:p([0-9a-f]+)[.]([0-9a-f]+).*" + self.test_sequence.add_log_lines([ + "read packet: $c#00", + {"direction": "send", "regex": fork_regex, + "capture": {1: "pid", 2: "tid"}}, + ], True) + ret = self.expect_gdbremote_sequence() + pid = int(ret["pid"], 16) + self.reset_test_sequence() + + # detach the forked child + self.test_sequence.add_log_lines([ + "read packet: $D;{:x}#00".format(pid), + {"direction": "send", "regex": r"[$]OK#.*"}, + ], True) + ret = self.expect_gdbremote_sequence() + self.reset_test_sequence() + + # resume the parent + self.test_sequence.add_log_lines([ + "read packet: $k#00", + ], True) + self.expect_gdbremote_sequence() + def fork_and_detach_test(self, variant): self.build() self.prep_debug_monitor_and_inferior(inferior_args=[variant]) From 862fffd8231c8c44a8ea8071041eac8919aed346 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Mon, 3 Jan 2022 14:48:13 +0100 Subject: [PATCH 419/992] [lldb/qemu] Set qemu's "ld prefix" based on the platform sysroot Both serve the same purpose (finding shared libraries) and allow one to launch a dynamically linked executable by just specifying the platform sysroot. --- .../source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp | 2 ++ lldb/test/API/qemu/TestQemuLaunch.py | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp index 84e10042a97c..dd7546d8fa15 100644 --- a/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp +++ b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp @@ -191,6 +191,8 @@ lldb::ProcessSP PlatformQemuUser::DebugProcess(ProcessLaunchInfo &launch_info, launch_info.SetArguments(args, true); Environment emulator_env = Host::GetEnvironment(); + if (ConstString sysroot = GetSDKRootDirectory()) + emulator_env["QEMU_LD_PREFIX"] = sysroot.GetStringRef().str(); for (const auto &KV : GetGlobalProperties().GetEmulatorEnvVars()) emulator_env[KV.first()] = KV.second; launch_info.GetEnvironment() = ComputeLaunchEnvironment( diff --git a/lldb/test/API/qemu/TestQemuLaunch.py b/lldb/test/API/qemu/TestQemuLaunch.py index e27d7a70fa0b..afa158339b6e 100644 --- a/lldb/test/API/qemu/TestQemuLaunch.py +++ b/lldb/test/API/qemu/TestQemuLaunch.py @@ -249,3 +249,11 @@ def test_arg0(self): self.assertEqual(state["program"], self.getBuildArtifact()) self.assertEqual(state["0"], "ARG0") + + def test_sysroot(self): + sysroot = self.getBuildArtifact("sysroot") + self.runCmd("platform select qemu-user --sysroot %s" % sysroot) + state = self._run_and_get_state() + self.assertEqual(state["environ"]["QEMU_LD_PREFIX"], sysroot) + self.assertIn("QEMU_LD_PREFIX", + state["environ"]["QEMU_UNSET_ENV"].split(",")) From 21aa2a1b09118c4678c198672ef8eb23e0cfd8e7 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Sat, 1 Jan 2022 00:39:49 -0500 Subject: [PATCH 420/992] [MLIR] Create add of sub folder Create folders for add(sub(a, b), b) -> a and add(b, sub(a, b)) -> a Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D116471 --- mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp | 10 ++++++++++ mlir/test/Dialect/Arithmetic/canonicalize.mlir | 16 ++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp index 59af0a5c999b..1536eeaf48af 100644 --- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp +++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp @@ -194,6 +194,16 @@ OpFoldResult arith::AddIOp::fold(ArrayRef operands) { if (matchPattern(getRhs(), m_Zero())) return getLhs(); + // add(sub(a, b), b) -> a + if (auto sub = getLhs().getDefiningOp()) + if (getRhs() == sub.getRhs()) + return sub.getLhs(); + + // add(b, sub(a, b)) -> a + if (auto sub = getRhs().getDefiningOp()) + if (getLhs() == sub.getRhs()) + return sub.getLhs(); + return constFoldBinaryOp( operands, [](APInt a, const APInt &b) { return std::move(a) + b; }); } diff --git a/mlir/test/Dialect/Arithmetic/canonicalize.mlir b/mlir/test/Dialect/Arithmetic/canonicalize.mlir index dff4141a3838..e4cbe21710ba 100644 --- a/mlir/test/Dialect/Arithmetic/canonicalize.mlir +++ b/mlir/test/Dialect/Arithmetic/canonicalize.mlir @@ -299,6 +299,22 @@ func @tripleSubSub3(%arg0: index) -> index { return %add2 : index } +// CHECK-LABEL: @doubleAddSub1 +// CHECK-NEXT: return %arg0 +func @doubleAddSub1(%arg0: index, %arg1 : index) -> index { + %sub = arith.subi %arg0, %arg1 : index + %add = arith.addi %sub, %arg1 : index + return %add : index +} + +// CHECK-LABEL: @doubleAddSub2 +// CHECK-NEXT: return %arg0 +func @doubleAddSub2(%arg0: index, %arg1 : index) -> index { + %sub = arith.subi %arg0, %arg1 : index + %add = arith.addi %arg1, %sub : index + return %add : index +} + // CHECK-LABEL: @notCmpEQ // CHECK: %[[cres:.+]] = arith.cmpi ne, %arg0, %arg1 : i8 // CHECK: return %[[cres]] From e49c0e483fd751aa60c943eb9f573e4bd28d2a47 Mon Sep 17 00:00:00 2001 From: Uday Bondhugula Date: Wed, 22 Dec 2021 16:04:55 +0530 Subject: [PATCH 421/992] [MLIR] Fix confusing diagnostic during dialect conversion Fix confusing diagnostic during partial dialect conversion. A failure to legalize is not the same as an operation being illegal: for eg. an operation neither explicity marked legal nor explicitly marked illegal could have been generated and may have failed to legalize further. The op isn't an illegal one per https://mlir.llvm.org/docs/DialectConversion/#conversion-target which is an op that is explicitly marked illegal. Differential Revision: https://reviews.llvm.org/D116152 --- mlir/docs/DialectConversion.md | 4 ++++ mlir/lib/Transforms/Utils/DialectConversion.cpp | 10 +++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/mlir/docs/DialectConversion.md b/mlir/docs/DialectConversion.md index 394b15cda362..39cee4b822b5 100644 --- a/mlir/docs/DialectConversion.md +++ b/mlir/docs/DialectConversion.md @@ -75,6 +75,10 @@ legality actions below: conversion to be successful. This action also allows for selectively marking specific operations as illegal in an otherwise legal dialect. +Operations and dialects that are neither explicitly marked legal nor illegal are +separate from the above ("unknown" operations) and are treated differently, for +example, for the purposes of partial conversion as mentioned above. + An example conversion target is shown below: ```c++ diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 24711b0de132..657b0eb834e6 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -1931,7 +1931,7 @@ OperationLegalizer::legalizeWithFold(Operation *op, Operation *cstOp = rewriterImpl.createdOps[i]; if (failed(legalize(cstOp, rewriter))) { LLVM_DEBUG(logFailure(rewriterImpl.logger, - "generated constant '{0}' was illegal", + "failed to legalize generated constant '{0}'", cstOp->getName())); rewriterImpl.resetState(curState); return failure(); @@ -2106,7 +2106,7 @@ LogicalResult OperationLegalizer::legalizePatternCreatedOperations( Operation *op = impl.createdOps[i]; if (failed(legalize(op, rewriter))) { LLVM_DEBUG(logFailure(impl.logger, - "generated operation '{0}'({1}) was illegal", + "failed to legalize generated operation '{0}'({1})", op->getName(), op)); return failure(); } @@ -2120,9 +2120,9 @@ LogicalResult OperationLegalizer::legalizePatternRootUpdates( for (int i = state.numRootUpdates, e = newState.numRootUpdates; i != e; ++i) { Operation *op = impl.rootUpdates[i].getOperation(); if (failed(legalize(op, rewriter))) { - LLVM_DEBUG(logFailure(impl.logger, - "operation updated in-place '{0}' was illegal", - op->getName())); + LLVM_DEBUG(logFailure( + impl.logger, "failed to legalize operation updated in-place '{0}'", + op->getName())); return failure(); } } From a8ae6828a98dcd5ea083eb07be8ad6db77b688a2 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Mon, 3 Jan 2022 16:12:47 +0100 Subject: [PATCH 422/992] [lldb] Delete GDBRemoteCommunicationReplayServer This survived the reproducer deletion. --- .../gdb-server/PlatformRemoteGDBServer.h | 2 - .../Plugins/Process/gdb-remote/CMakeLists.txt | 1 - .../GDBRemoteCommunicationReplayServer.cpp | 314 ------------------ .../GDBRemoteCommunicationReplayServer.h | 88 ----- .../Process/gdb-remote/ProcessGDBRemote.cpp | 37 +-- .../Process/gdb-remote/ProcessGDBRemote.h | 5 - 6 files changed, 9 insertions(+), 438 deletions(-) delete mode 100644 lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationReplayServer.cpp delete mode 100644 lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationReplayServer.h diff --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h index f594f43b3f13..425839c883a4 100644 --- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h +++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h @@ -14,7 +14,6 @@ #include "Plugins/Process/Utility/GDBRemoteSignals.h" #include "Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h" -#include "Plugins/Process/gdb-remote/GDBRemoteCommunicationReplayServer.h" #include "lldb/Target/Platform.h" namespace lldb_private { @@ -155,7 +154,6 @@ class PlatformRemoteGDBServer : public Platform, private UserIDResolver { protected: process_gdb_remote::GDBRemoteCommunicationClient m_gdb_client; - process_gdb_remote::GDBRemoteCommunicationReplayServer m_gdb_replay_server; std::string m_platform_description; // After we connect we can get a more // complete description of what we are // connected to diff --git a/lldb/source/Plugins/Process/gdb-remote/CMakeLists.txt b/lldb/source/Plugins/Process/gdb-remote/CMakeLists.txt index 448d032b381f..d578033e1c41 100644 --- a/lldb/source/Plugins/Process/gdb-remote/CMakeLists.txt +++ b/lldb/source/Plugins/Process/gdb-remote/CMakeLists.txt @@ -20,7 +20,6 @@ add_lldb_library(lldbPluginProcessGDBRemote PLUGIN GDBRemoteCommunication.cpp GDBRemoteCommunicationClient.cpp GDBRemoteCommunicationHistory.cpp - GDBRemoteCommunicationReplayServer.cpp GDBRemoteCommunicationServer.cpp GDBRemoteCommunicationServerCommon.cpp GDBRemoteCommunicationServerLLGS.cpp diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationReplayServer.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationReplayServer.cpp deleted file mode 100644 index c91d7cb5ac30..000000000000 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationReplayServer.cpp +++ /dev/null @@ -1,314 +0,0 @@ -//===-- GDBRemoteCommunicationReplayServer.cpp ----------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -#include "lldb/Host/Config.h" -#include "llvm/ADT/ScopeExit.h" - -#include "GDBRemoteCommunicationReplayServer.h" -#include "ProcessGDBRemoteLog.h" - -// C Includes -// C++ Includes -#include - -// Project includes -#include "lldb/Host/ThreadLauncher.h" -#include "lldb/Utility/ConstString.h" -#include "lldb/Utility/Event.h" -#include "lldb/Utility/FileSpec.h" -#include "lldb/Utility/StreamString.h" -#include "lldb/Utility/StringExtractorGDBRemote.h" - -using namespace llvm; -using namespace lldb; -using namespace lldb_private; -using namespace lldb_private::process_gdb_remote; - -/// Check if the given expected packet matches the actual packet. -static bool unexpected(llvm::StringRef expected, llvm::StringRef actual) { - // The 'expected' string contains the raw data, including the leading $ and - // trailing checksum. The 'actual' string contains only the packet's content. - if (expected.contains(actual)) - return false; - // Contains a PID which might be different. - if (expected.contains("vAttach")) - return false; - // Contains a ascii-hex-path. - if (expected.contains("QSetSTD")) - return false; - // Contains environment values. - if (expected.contains("QEnvironment")) - return false; - - return true; -} - -/// Check if we should reply to the given packet. -static bool skip(llvm::StringRef data) { - assert(!data.empty() && "Empty packet?"); - - // We've already acknowledge the '+' packet so we're done here. - if (data == "+") - return true; - - /// Don't 't reply to ^C. We need this because of stop reply packets, which - /// are only returned when the target halts. Reproducers synchronize these - /// 'asynchronous' replies, by recording them as a regular replies to the - /// previous packet (e.g. vCont). As a result, we should ignore real - /// asynchronous requests. - if (data.data()[0] == 0x03) - return true; - - return false; -} - -GDBRemoteCommunicationReplayServer::GDBRemoteCommunicationReplayServer() - : GDBRemoteCommunication("gdb-replay", "gdb-replay.rx_packet"), - m_async_broadcaster(nullptr, "lldb.gdb-replay.async-broadcaster"), - m_async_listener_sp( - Listener::MakeListener("lldb.gdb-replay.async-listener")), - m_async_thread_state_mutex() { - m_async_broadcaster.SetEventName(eBroadcastBitAsyncContinue, - "async thread continue"); - m_async_broadcaster.SetEventName(eBroadcastBitAsyncThreadShouldExit, - "async thread should exit"); - - const uint32_t async_event_mask = - eBroadcastBitAsyncContinue | eBroadcastBitAsyncThreadShouldExit; - m_async_listener_sp->StartListeningForEvents(&m_async_broadcaster, - async_event_mask); -} - -GDBRemoteCommunicationReplayServer::~GDBRemoteCommunicationReplayServer() { - StopAsyncThread(); -} - -GDBRemoteCommunication::PacketResult -GDBRemoteCommunicationReplayServer::GetPacketAndSendResponse( - Timeout timeout, Status &error, bool &interrupt, bool &quit) { - std::lock_guard guard(m_async_thread_state_mutex); - - StringExtractorGDBRemote packet; - PacketResult packet_result = WaitForPacketNoLock(packet, timeout, false); - - if (packet_result != PacketResult::Success) { - if (!IsConnected()) { - error.SetErrorString("lost connection"); - quit = true; - } else { - error.SetErrorString("timeout"); - } - return packet_result; - } - - m_async_broadcaster.BroadcastEvent(eBroadcastBitAsyncContinue); - - // Check if we should reply to this packet. - if (skip(packet.GetStringRef())) - return PacketResult::Success; - - // This completes the handshake. Since m_send_acks was true, we can unset it - // already. - if (packet.GetStringRef() == "QStartNoAckMode") - m_send_acks = false; - - // A QEnvironment packet is sent for every environment variable. If the - // number of environment variables is different during replay, the replies - // become out of sync. - if (packet.GetStringRef().find("QEnvironment") == 0) - return SendRawPacketNoLock("$OK#9a"); - - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); - while (!m_packet_history.empty()) { - // Pop last packet from the history. - GDBRemotePacket entry = m_packet_history.back(); - m_packet_history.pop_back(); - - // Decode run-length encoding. - const std::string expanded_data = - GDBRemoteCommunication::ExpandRLE(entry.packet.data); - - // We've handled the handshake implicitly before. Skip the packet and move - // on. - if (entry.packet.data == "+") - continue; - - if (entry.type == GDBRemotePacket::ePacketTypeSend) { - if (unexpected(expanded_data, packet.GetStringRef())) { - LLDB_LOG(log, - "GDBRemoteCommunicationReplayServer expected packet: '{0}'", - expanded_data); - LLDB_LOG(log, "GDBRemoteCommunicationReplayServer actual packet: '{0}'", - packet.GetStringRef()); -#ifndef NDEBUG - // This behaves like a regular assert, but prints the expected and - // received packet before aborting. - printf("Reproducer expected packet: '%s'\n", expanded_data.c_str()); - printf("Reproducer received packet: '%s'\n", - packet.GetStringRef().data()); - llvm::report_fatal_error("Encountered unexpected packet during replay"); -#endif - return PacketResult::ErrorSendFailed; - } - - // Ignore QEnvironment packets as they're handled earlier. - if (expanded_data.find("QEnvironment") == 1) { - assert(m_packet_history.back().type == - GDBRemotePacket::ePacketTypeRecv); - m_packet_history.pop_back(); - } - - continue; - } - - if (entry.type == GDBRemotePacket::ePacketTypeInvalid) { - LLDB_LOG( - log, - "GDBRemoteCommunicationReplayServer skipped invalid packet: '{0}'", - packet.GetStringRef()); - continue; - } - - LLDB_LOG(log, - "GDBRemoteCommunicationReplayServer replied to '{0}' with '{1}'", - packet.GetStringRef(), entry.packet.data); - return SendRawPacketNoLock(entry.packet.data); - } - - quit = true; - - return packet_result; -} - -llvm::Error -GDBRemoteCommunicationReplayServer::LoadReplayHistory(const FileSpec &path) { - auto error_or_file = MemoryBuffer::getFile(path.GetPath()); - if (auto err = error_or_file.getError()) - return errorCodeToError(err); - - yaml::Input yin((*error_or_file)->getBuffer()); - yin >> m_packet_history; - - if (auto err = yin.error()) - return errorCodeToError(err); - - // We want to manipulate the vector like a stack so we need to reverse the - // order of the packets to have the oldest on at the back. - std::reverse(m_packet_history.begin(), m_packet_history.end()); - - return Error::success(); -} - -bool GDBRemoteCommunicationReplayServer::StartAsyncThread() { - std::lock_guard guard(m_async_thread_state_mutex); - if (!m_async_thread.IsJoinable()) { - // Create a thread that watches our internal state and controls which - // events make it to clients (into the DCProcess event queue). - llvm::Expected async_thread = ThreadLauncher::LaunchThread( - "", - GDBRemoteCommunicationReplayServer::AsyncThread, this); - if (!async_thread) { - LLDB_LOG_ERROR(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_HOST), - async_thread.takeError(), - "failed to launch host thread: {}"); - return false; - } - m_async_thread = *async_thread; - } - - // Wait for handshake. - m_async_broadcaster.BroadcastEvent(eBroadcastBitAsyncContinue); - - return m_async_thread.IsJoinable(); -} - -void GDBRemoteCommunicationReplayServer::StopAsyncThread() { - std::lock_guard guard(m_async_thread_state_mutex); - - if (!m_async_thread.IsJoinable()) - return; - - // Request thread to stop. - m_async_broadcaster.BroadcastEvent(eBroadcastBitAsyncThreadShouldExit); - - // Disconnect client. - Disconnect(); - - // Stop the thread. - m_async_thread.Join(nullptr); - m_async_thread.Reset(); -} - -void GDBRemoteCommunicationReplayServer::ReceivePacket( - GDBRemoteCommunicationReplayServer &server, bool &done) { - Status error; - bool interrupt; - auto packet_result = server.GetPacketAndSendResponse(std::chrono::seconds(1), - error, interrupt, done); - if (packet_result != GDBRemoteCommunication::PacketResult::Success && - packet_result != - GDBRemoteCommunication::PacketResult::ErrorReplyTimeout) { - done = true; - } else { - server.m_async_broadcaster.BroadcastEvent(eBroadcastBitAsyncContinue); - } -} - -thread_result_t GDBRemoteCommunicationReplayServer::AsyncThread(void *arg) { - GDBRemoteCommunicationReplayServer *server = - (GDBRemoteCommunicationReplayServer *)arg; - auto D = make_scope_exit([&]() { server->Disconnect(); }); - EventSP event_sp; - bool done = false; - while (!done) { - if (server->m_async_listener_sp->GetEvent(event_sp, llvm::None)) { - const uint32_t event_type = event_sp->GetType(); - if (event_sp->BroadcasterIs(&server->m_async_broadcaster)) { - switch (event_type) { - case eBroadcastBitAsyncContinue: - ReceivePacket(*server, done); - if (done) - return {}; - break; - case eBroadcastBitAsyncThreadShouldExit: - default: - return {}; - } - } - } - } - - return {}; -} - -Status GDBRemoteCommunicationReplayServer::Connect( - process_gdb_remote::GDBRemoteCommunicationClient &client) { - repro::Loader *loader = repro::Reproducer::Instance().GetLoader(); - if (!loader) - return Status("No loader provided."); - - static std::unique_ptr> - multi_loader = repro::MultiLoader::Create( - repro::Reproducer::Instance().GetLoader()); - if (!multi_loader) - return Status("No gdb remote provider found."); - - llvm::Optional history_file = multi_loader->GetNextFile(); - if (!history_file) - return Status("No gdb remote packet log found."); - - if (auto error = LoadReplayHistory(FileSpec(*history_file))) - return Status("Unable to load replay history"); - - if (auto error = GDBRemoteCommunication::ConnectLocally(client, *this)) - return Status("Unable to connect to replay server"); - - return {}; -} diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationReplayServer.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationReplayServer.h deleted file mode 100644 index 2f8770d0accf..000000000000 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationReplayServer.h +++ /dev/null @@ -1,88 +0,0 @@ -//===-- GDBRemoteCommunicationReplayServer.h --------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLDB_SOURCE_PLUGINS_PROCESS_GDB_REMOTE_GDBREMOTECOMMUNICATIONREPLAYSERVER_H -#define LLDB_SOURCE_PLUGINS_PROCESS_GDB_REMOTE_GDBREMOTECOMMUNICATIONREPLAYSERVER_H - -// Other libraries and framework includes -#include "GDBRemoteCommunication.h" -#include "GDBRemoteCommunicationClient.h" -#include "GDBRemoteCommunicationHistory.h" - -// Project includes -#include "lldb/Host/HostThread.h" -#include "lldb/Utility/Broadcaster.h" -#include "lldb/lldb-private-forward.h" -#include "llvm/Support/Error.h" - -// C Includes -// C++ Includes -#include -#include -#include - -class StringExtractorGDBRemote; - -namespace lldb_private { -namespace process_gdb_remote { - -class ProcessGDBRemote; - -/// Dummy GDB server that replays packets from the GDB Remote Communication -/// history. This is used to replay GDB packets. -class GDBRemoteCommunicationReplayServer : public GDBRemoteCommunication { -public: - GDBRemoteCommunicationReplayServer(); - - ~GDBRemoteCommunicationReplayServer() override; - - PacketResult GetPacketAndSendResponse(Timeout timeout, - Status &error, bool &interrupt, - bool &quit); - - bool HandshakeWithClient() { return GetAck() == PacketResult::Success; } - - llvm::Error LoadReplayHistory(const FileSpec &path); - - bool StartAsyncThread(); - void StopAsyncThread(); - - Status Connect(process_gdb_remote::GDBRemoteCommunicationClient &client); - -protected: - enum { - eBroadcastBitAsyncContinue = (1 << 0), - eBroadcastBitAsyncThreadShouldExit = (1 << 1), - }; - - static void ReceivePacket(GDBRemoteCommunicationReplayServer &server, - bool &done); - static lldb::thread_result_t AsyncThread(void *arg); - - /// Replay history with the oldest packet at the end. - std::vector m_packet_history; - - /// Server thread. - Broadcaster m_async_broadcaster; - lldb::ListenerSP m_async_listener_sp; - HostThread m_async_thread; - std::recursive_mutex m_async_thread_state_mutex; - - bool m_skip_acks = false; - -private: - GDBRemoteCommunicationReplayServer( - const GDBRemoteCommunicationReplayServer &) = delete; - const GDBRemoteCommunicationReplayServer & - operator=(const GDBRemoteCommunicationReplayServer &) = delete; -}; - -} // namespace process_gdb_remote -} // namespace lldb_private - -#endif // LLDB_SOURCE_PLUGINS_PROCESS_GDB_REMOTE_GDBREMOTECOMMUNICATIONREPLAYSERVER_H diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 93fe36c0d9d6..cb5ec7f18d19 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -23,13 +23,6 @@ #include #include -#include -#include -#include -#include -#include -#include - #include "lldb/Breakpoint/Watchpoint.h" #include "lldb/Core/Debugger.h" #include "lldb/Core/Module.h" @@ -70,6 +63,13 @@ #include "lldb/Utility/State.h" #include "lldb/Utility/StreamString.h" #include "lldb/Utility/Timer.h" +#include +#include +#include +#include +#include +#include +#include #include "GDBRemoteRegisterContext.h" #include "Plugins/Platform/MacOSX/PlatformRemoteiOS.h" @@ -253,9 +253,8 @@ ProcessGDBRemote::ProcessGDBRemote(lldb::TargetSP target_sp, m_addr_to_mmap_size(), m_thread_create_bp_sp(), m_waiting_for_attach(false), m_destroy_tried_resuming(false), m_command_sp(), m_breakpoint_pc_offset(0), - m_initial_tid(LLDB_INVALID_THREAD_ID), m_replay_mode(false), - m_allow_flash_writes(false), m_erased_flash_ranges(), - m_vfork_in_progress(false) { + m_initial_tid(LLDB_INVALID_THREAD_ID), m_allow_flash_writes(false), + m_erased_flash_ranges(), m_vfork_in_progress(false) { m_async_broadcaster.SetEventName(eBroadcastBitAsyncThreadShouldExit, "async thread should exit"); m_async_broadcaster.SetEventName(eBroadcastBitAsyncContinue, @@ -3316,24 +3315,6 @@ Status ProcessGDBRemote::DoSignal(int signo) { return error; } -Status ProcessGDBRemote::ConnectToReplayServer() { - Status status = m_gdb_replay_server.Connect(m_gdb_comm); - if (status.Fail()) - return status; - - // Enable replay mode. - m_replay_mode = true; - - // Start server thread. - m_gdb_replay_server.StartAsyncThread(); - - // Start client thread. - StartAsyncThread(); - - // Do the usual setup. - return ConnectToDebugserver(""); -} - Status ProcessGDBRemote::EstablishConnectionIfNeeded(const ProcessInfo &process_info) { // Make sure we aren't already connected? diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h index 488336b8c1b8..bdf130e3ec11 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h @@ -34,7 +34,6 @@ #include "lldb/lldb-private-forward.h" #include "GDBRemoteCommunicationClient.h" -#include "GDBRemoteCommunicationReplayServer.h" #include "GDBRemoteRegisterContext.h" #include "llvm/ADT/DenseMap.h" @@ -251,7 +250,6 @@ class ProcessGDBRemote : public Process, }; GDBRemoteCommunicationClient m_gdb_comm; - GDBRemoteCommunicationReplayServer m_gdb_replay_server; std::atomic m_debugserver_pid; llvm::Optional m_last_stop_packet; @@ -292,7 +290,6 @@ class ProcessGDBRemote : public Process, lldb::tid_t m_initial_tid; // The initial thread ID, given by stub on attach bool m_use_g_packet_for_reading; - bool m_replay_mode; bool m_allow_flash_writes; using FlashRangeVector = lldb_private::RangeVector; using FlashRange = FlashRangeVector::Entry; @@ -320,8 +317,6 @@ class ProcessGDBRemote : public Process, bool DoUpdateThreadList(ThreadList &old_thread_list, ThreadList &new_thread_list) override; - Status ConnectToReplayServer(); - Status EstablishConnectionIfNeeded(const ProcessInfo &process_info); Status LaunchAndConnectToDebugserver(const ProcessInfo &process_info); From 3a33c0b1ce0db465c9d85c493674efc6c5005dbe Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 3 Jan 2022 15:14:39 +0000 Subject: [PATCH 423/992] [gn build] Port a8ae6828a98d --- .../gn/secondary/lldb/source/Plugins/Process/gdb-remote/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/Process/gdb-remote/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/Process/gdb-remote/BUILD.gn index e5daaa0a3019..eff3e6a1e18a 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/Process/gdb-remote/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/Process/gdb-remote/BUILD.gn @@ -36,7 +36,6 @@ static_library("gdb-remote") { "GDBRemoteCommunication.cpp", "GDBRemoteCommunicationClient.cpp", "GDBRemoteCommunicationHistory.cpp", - "GDBRemoteCommunicationReplayServer.cpp", "GDBRemoteCommunicationServer.cpp", "GDBRemoteCommunicationServerCommon.cpp", "GDBRemoteCommunicationServerLLGS.cpp", From cd45e8c7bc16dec2eeec9cc71eb3ba87d1bd6bab Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 29 Dec 2021 04:16:47 +0100 Subject: [PATCH 424/992] [CodeCompletion] Signature help for template argument lists Provide signature while typing template arguments: Foo< ^here > Here the parameters are e.g. "typename x", and the result type is e.g. "struct" (class template) or "int" (variable template) or "bool (std::string)" (function template). Multiple overloads are possible when a template name is used for several overloaded function templates. Fixes https://github.com/clangd/clangd/issues/299 Differential Revision: https://reviews.llvm.org/D116352 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 2 +- clang-tools-extra/clangd/CodeComplete.cpp | 24 +-- .../clangd/test/initialize-params.test | 4 +- .../clangd/unittests/CodeCompleteTests.cpp | 19 +++ clang/include/clang/Parse/Parser.h | 6 +- .../include/clang/Sema/CodeCompleteConsumer.h | 21 ++- clang/include/clang/Sema/Sema.h | 2 + clang/lib/Parse/ParseExprCXX.cpp | 4 +- clang/lib/Parse/ParseTemplate.cpp | 40 +++-- clang/lib/Sema/CodeCompleteConsumer.cpp | 11 ++ clang/lib/Sema/SemaCodeComplete.cpp | 138 +++++++++++++++++- .../CodeCompletion/template-signature.cpp | 28 ++++ 12 files changed, 266 insertions(+), 33 deletions(-) create mode 100644 clang/test/CodeCompletion/template-signature.cpp diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 18539877ec97..774cdea218d0 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -555,7 +555,7 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, }}, {"signatureHelpProvider", llvm::json::Object{ - {"triggerCharacters", {"(", ",", ")"}}, + {"triggerCharacters", {"(", ",", ")", "<", ">"}}, }}, {"declarationProvider", true}, {"definitionProvider", true}, diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp index 1c6b91e34828..bdfa1df19453 100644 --- a/clang-tools-extra/clangd/CodeComplete.cpp +++ b/clang-tools-extra/clangd/CodeComplete.cpp @@ -895,14 +895,12 @@ struct ScoredSignature { // part of it. int paramIndexForArg(const CodeCompleteConsumer::OverloadCandidate &Candidate, int Arg) { - int NumParams = 0; + int NumParams = Candidate.getNumParams(); if (const auto *F = Candidate.getFunction()) { - NumParams = F->getNumParams(); if (F->isVariadic()) ++NumParams; } else if (auto *T = Candidate.getFunctionType()) { if (auto *Proto = T->getAs()) { - NumParams = Proto->getNumParams(); if (Proto->isVariadic()) ++NumParams; } @@ -1016,6 +1014,9 @@ class SignatureHelpCollector final : public CodeCompleteConsumer { return R.Quality.Kind != OC::CK_Function; case OC::CK_FunctionTemplate: return false; + case OC::CK_Template: + assert(false && "Never see templates and other overloads mixed"); + return false; } llvm_unreachable("Unknown overload candidate type."); } @@ -1168,13 +1169,18 @@ class ParamNameCollector final : public CodeCompleteConsumer { for (unsigned I = 0; I < NumCandidates; ++I) { OverloadCandidate Candidate = Candidates[I]; - auto *Func = Candidate.getFunction(); - if (!Func || Func->getNumParams() <= CurrentArg) - continue; - auto *PVD = Func->getParamDecl(CurrentArg); - if (!PVD) + NamedDecl *Param = nullptr; + if (auto *Func = Candidate.getFunction()) { + if (CurrentArg < Func->getNumParams()) + Param = Func->getParamDecl(CurrentArg); + } else if (auto *Template = Candidate.getTemplate()) { + if (CurrentArg < Template->getTemplateParameters()->size()) + Param = Template->getTemplateParameters()->getParam(CurrentArg); + } + + if (!Param) continue; - auto *Ident = PVD->getIdentifier(); + auto *Ident = Param->getIdentifier(); if (!Ident) continue; auto Name = Ident->getName(); diff --git a/clang-tools-extra/clangd/test/initialize-params.test b/clang-tools-extra/clangd/test/initialize-params.test index a79f1075118a..72823f3a0683 100644 --- a/clang-tools-extra/clangd/test/initialize-params.test +++ b/clang-tools-extra/clangd/test/initialize-params.test @@ -108,7 +108,9 @@ # CHECK-NEXT: "triggerCharacters": [ # CHECK-NEXT: "(", # CHECK-NEXT: ",", -# CHECK-NEXT: ")" +# CHECK-NEXT: ")", +# CHECK-NEXT: "<", +# CHECK-NEXT: ">" # CHECK-NEXT: ] # CHECK-NEXT: }, # CHECK-NEXT: "textDocumentSync": { diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index 2d4236d0763f..d32950fd6e13 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -3453,6 +3453,25 @@ TEST(SignatureHelp, DocFormat) { } } +TEST(SignatureHelp, TemplateArguments) { + std::string Top = R"cpp( + template bool foo(char); + template bool foo(float); + )cpp"; + + auto First = signatures(Top + "bool x = foo<^"); + EXPECT_THAT( + First.signatures, + UnorderedElementsAre(Sig("foo<[[typename T]], [[int]]>() -> bool"), + Sig("foo<[[int I]], [[int]]>() -> bool"))); + EXPECT_EQ(First.activeParameter, 0); + + auto Second = signatures(Top + "bool x = foo<1, ^"); + EXPECT_THAT(Second.signatures, + ElementsAre(Sig("foo<[[int I]], [[int]]>() -> bool"))); + EXPECT_EQ(Second.activeParameter, 1); +} + } // namespace } // namespace clangd } // namespace clang diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 741a484390b2..fd2221f03086 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -3454,7 +3454,8 @@ class Parser : public CodeCompletionHandler { bool ParseTemplateIdAfterTemplateName(bool ConsumeLastToken, SourceLocation &LAngleLoc, TemplateArgList &TemplateArgs, - SourceLocation &RAngleLoc); + SourceLocation &RAngleLoc, + TemplateTy NameHint = nullptr); bool AnnotateTemplateIdToken(TemplateTy Template, TemplateNameKind TNK, CXXScopeSpec &SS, @@ -3464,7 +3465,8 @@ class Parser : public CodeCompletionHandler { bool TypeConstraint = false); void AnnotateTemplateIdTokenAsType(CXXScopeSpec &SS, bool IsClassName = false); - bool ParseTemplateArgumentList(TemplateArgList &TemplateArgs); + bool ParseTemplateArgumentList(TemplateArgList &TemplateArgs, + TemplateTy Template, SourceLocation OpenLoc); ParsedTemplateArgument ParseTemplateTemplateArgument(); ParsedTemplateArgument ParseTemplateArgument(); Decl *ParseExplicitInstantiation(DeclaratorContext Context, diff --git a/clang/include/clang/Sema/CodeCompleteConsumer.h b/clang/include/clang/Sema/CodeCompleteConsumer.h index 6b37e3c50dba..7a369dfd6a43 100644 --- a/clang/include/clang/Sema/CodeCompleteConsumer.h +++ b/clang/include/clang/Sema/CodeCompleteConsumer.h @@ -1009,12 +1009,15 @@ class CodeCompleteConsumer { /// The candidate is a function declaration. CK_Function, - /// The candidate is a function template. + /// The candidate is a function template, arguments are being completed. CK_FunctionTemplate, /// The "candidate" is actually a variable, expression, or block /// for which we only have a function prototype. - CK_FunctionType + CK_FunctionType, + + /// The candidate is a template, template arguments are being completed. + CK_Template, }; private: @@ -1033,6 +1036,10 @@ class CodeCompleteConsumer { /// The function type that describes the entity being called, /// when Kind == CK_FunctionType. const FunctionType *Type; + + /// The template overload candidate, available when + /// Kind == CK_Template. + const TemplateDecl *Template; }; public: @@ -1045,6 +1052,9 @@ class CodeCompleteConsumer { OverloadCandidate(const FunctionType *Type) : Kind(CK_FunctionType), Type(Type) {} + OverloadCandidate(const TemplateDecl *Template) + : Kind(CK_Template), Template(Template) {} + /// Determine the kind of overload candidate. CandidateKind getKind() const { return Kind; } @@ -1062,6 +1072,13 @@ class CodeCompleteConsumer { /// function is stored. const FunctionType *getFunctionType() const; + const TemplateDecl *getTemplate() const { + assert(getKind() == CK_Template && "Not a template"); + return Template; + } + + unsigned getNumParams() const; + /// Create a new code-completion string that describes the function /// signature of this overload candidate. CodeCompletionString *CreateSignatureString(unsigned CurrentArg, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index f97a785c7426..bb13d9527175 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12549,6 +12549,8 @@ class Sema final { ArrayRef ArgExprs, IdentifierInfo *II, SourceLocation OpenParLoc); + QualType ProduceTemplateArgumentSignatureHelp( + TemplateTy, ArrayRef, SourceLocation LAngleLoc); void CodeCompleteInitializer(Scope *S, Decl *D); /// Trigger code completion for a record of \p BaseType. \p InitExprs are /// expressions in the initializer list seen so far and \p D is the current diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index 76c510ddd36c..9cdc16f8ce8d 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -2454,8 +2454,8 @@ bool Parser::ParseUnqualifiedIdTemplateId( // Parse the enclosed template argument list. SourceLocation LAngleLoc, RAngleLoc; TemplateArgList TemplateArgs; - if (ParseTemplateIdAfterTemplateName(true, LAngleLoc, TemplateArgs, - RAngleLoc)) + if (ParseTemplateIdAfterTemplateName(true, LAngleLoc, TemplateArgs, RAngleLoc, + Template)) return true; // If this is a non-template, we already issued a diagnostic. diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp index 45af61a3926a..204b53441ab4 100644 --- a/clang/lib/Parse/ParseTemplate.cpp +++ b/clang/lib/Parse/ParseTemplate.cpp @@ -1222,7 +1222,6 @@ bool Parser::ParseGreaterThanInTemplateList(SourceLocation LAngleLoc, return false; } - /// Parses a template-id that after the template name has /// already been parsed. /// @@ -1234,11 +1233,13 @@ bool Parser::ParseGreaterThanInTemplateList(SourceLocation LAngleLoc, /// token that forms the template-id. Otherwise, we will leave the /// last token in the stream (e.g., so that it can be replaced with an /// annotation token). -bool -Parser::ParseTemplateIdAfterTemplateName(bool ConsumeLastToken, - SourceLocation &LAngleLoc, - TemplateArgList &TemplateArgs, - SourceLocation &RAngleLoc) { +/// +/// \param NameHint is not required, and merely affects code completion. +bool Parser::ParseTemplateIdAfterTemplateName(bool ConsumeLastToken, + SourceLocation &LAngleLoc, + TemplateArgList &TemplateArgs, + SourceLocation &RAngleLoc, + TemplateTy Template) { assert(Tok.is(tok::less) && "Must have already parsed the template-name"); // Consume the '<'. @@ -1251,7 +1252,7 @@ Parser::ParseTemplateIdAfterTemplateName(bool ConsumeLastToken, if (!Tok.isOneOf(tok::greater, tok::greatergreater, tok::greatergreatergreater, tok::greaterequal, tok::greatergreaterequal)) - Invalid = ParseTemplateArgumentList(TemplateArgs); + Invalid = ParseTemplateArgumentList(TemplateArgs, Template, LAngleLoc); if (Invalid) { // Try to find the closing '>'. @@ -1332,8 +1333,8 @@ bool Parser::AnnotateTemplateIdToken(TemplateTy Template, TemplateNameKind TNK, TemplateArgList TemplateArgs; bool ArgsInvalid = false; if (!TypeConstraint || Tok.is(tok::less)) { - ArgsInvalid = ParseTemplateIdAfterTemplateName(false, LAngleLoc, - TemplateArgs, RAngleLoc); + ArgsInvalid = ParseTemplateIdAfterTemplateName( + false, LAngleLoc, TemplateArgs, RAngleLoc, Template); // If we couldn't recover from invalid arguments, don't form an annotation // token -- we don't know how much to annotate. // FIXME: This can lead to duplicate diagnostics if we retry parsing this @@ -1585,19 +1586,34 @@ ParsedTemplateArgument Parser::ParseTemplateArgument() { /// template-argument-list: [C++ 14.2] /// template-argument /// template-argument-list ',' template-argument -bool -Parser::ParseTemplateArgumentList(TemplateArgList &TemplateArgs) { +/// +/// \param Template is only used for code completion, and may be null. +bool Parser::ParseTemplateArgumentList(TemplateArgList &TemplateArgs, + TemplateTy Template, + SourceLocation OpenLoc) { ColonProtectionRAIIObject ColonProtection(*this, false); + auto RunSignatureHelp = [&] { + if (!Template) + return QualType(); + CalledSignatureHelp = true; + return Actions.ProduceTemplateArgumentSignatureHelp(Template, TemplateArgs, + OpenLoc); + }; + do { + PreferredType.enterFunctionArgument(Tok.getLocation(), RunSignatureHelp); ParsedTemplateArgument Arg = ParseTemplateArgument(); SourceLocation EllipsisLoc; if (TryConsumeToken(tok::ellipsis, EllipsisLoc)) Arg = Actions.ActOnPackExpansion(Arg, EllipsisLoc); - if (Arg.isInvalid()) + if (Arg.isInvalid()) { + if (PP.isCodeCompletionReached() && !CalledSignatureHelp) + RunSignatureHelp(); return true; + } // Save this template argument. TemplateArgs.push_back(Arg); diff --git a/clang/lib/Sema/CodeCompleteConsumer.cpp b/clang/lib/Sema/CodeCompleteConsumer.cpp index 0a2ca54e244a..f0968ed0e503 100644 --- a/clang/lib/Sema/CodeCompleteConsumer.cpp +++ b/clang/lib/Sema/CodeCompleteConsumer.cpp @@ -506,11 +506,22 @@ CodeCompleteConsumer::OverloadCandidate::getFunctionType() const { case CK_FunctionType: return Type; + + case CK_Template: + return nullptr; } llvm_unreachable("Invalid CandidateKind!"); } +unsigned CodeCompleteConsumer::OverloadCandidate::getNumParams() const { + if (Kind == CK_Template) + return Template->getTemplateParameters()->size(); + if (const auto *FPT = dyn_cast_or_null(getFunctionType())) + return FPT->getNumParams(); + return 0; +} + //===----------------------------------------------------------------------===// // Code completion consumer implementation //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index be492b5f3607..e81faf6d2a93 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -36,6 +36,7 @@ #include "clang/Sema/Lookup.h" #include "clang/Sema/Overload.h" #include "clang/Sema/ParsedAttr.h" +#include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/Sema.h" @@ -3757,6 +3758,78 @@ static void AddOverloadParameterChunks(ASTContext &Context, } } +static std::string +formatTemplateParameterPlaceholder(const NamedDecl *Param, bool &Optional, + const PrintingPolicy &Policy) { + if (const auto *Type = dyn_cast(Param)) { + Optional = Type->hasDefaultArgument(); + } else if (const auto *NonType = dyn_cast(Param)) { + Optional = NonType->hasDefaultArgument(); + } else if (const auto *Template = dyn_cast(Param)) { + Optional = Template->hasDefaultArgument(); + } + std::string Result; + llvm::raw_string_ostream OS(Result); + Param->print(OS, Policy); + return Result; +} + +static std::string templateResultType(const TemplateDecl *TD, + const PrintingPolicy &Policy) { + if (const auto *CTD = dyn_cast(TD)) + return CTD->getTemplatedDecl()->getKindName().str(); + if (const auto *VTD = dyn_cast(TD)) + return VTD->getTemplatedDecl()->getType().getAsString(Policy); + if (const auto *FTD = dyn_cast(TD)) + return FTD->getTemplatedDecl()->getReturnType().getAsString(Policy); + if (isa(TD)) + return "type"; + if (isa(TD)) + return "class"; + if (isa(TD)) + return "concept"; + return ""; +} + +static CodeCompletionString *createTemplateSignatureString( + const TemplateDecl *TD, CodeCompletionBuilder &Builder, unsigned CurrentArg, + const PrintingPolicy &Policy) { + llvm::ArrayRef Params = TD->getTemplateParameters()->asArray(); + CodeCompletionBuilder OptionalBuilder(Builder.getAllocator(), + Builder.getCodeCompletionTUInfo()); + std::string ResultType = templateResultType(TD, Policy); + if (!ResultType.empty()) + Builder.AddResultTypeChunk(Builder.getAllocator().CopyString(ResultType)); + Builder.AddTextChunk( + Builder.getAllocator().CopyString(TD->getNameAsString())); + Builder.AddChunk(CodeCompletionString::CK_LeftAngle); + // Initially we're writing into the main string. Once we see an optional arg + // (with default), we're writing into the nested optional chunk. + CodeCompletionBuilder *Current = &Builder; + for (unsigned I = 0; I < Params.size(); ++I) { + bool Optional = false; + std::string Placeholder = + formatTemplateParameterPlaceholder(Params[I], Optional, Policy); + if (Optional) + Current = &OptionalBuilder; + if (I > 0) + Current->AddChunk(CodeCompletionString::CK_Comma); + Current->AddChunk(I == CurrentArg + ? CodeCompletionString::CK_CurrentParameter + : CodeCompletionString::CK_Placeholder, + Current->getAllocator().CopyString(Placeholder)); + } + // Add the optional chunk to the main string if we ever used it. + if (Current == &OptionalBuilder) + Builder.AddOptionalChunk(OptionalBuilder.TakeString()); + Builder.AddChunk(CodeCompletionString::CK_RightAngle); + // For function templates, ResultType was the function's return type. + // Give some clue this is a function. (Don't show the possibly-bulky params). + if (isa(TD)) + Builder.AddInformativeChunk("()"); + return Builder.TakeString(); +} + CodeCompletionString * CodeCompleteConsumer::OverloadCandidate::CreateSignatureString( unsigned CurrentArg, Sema &S, CodeCompletionAllocator &Allocator, @@ -3770,6 +3843,11 @@ CodeCompleteConsumer::OverloadCandidate::CreateSignatureString( // FIXME: Set priority, availability appropriately. CodeCompletionBuilder Result(Allocator, CCTUInfo, 1, CXAvailability_Available); + + if (getKind() == CK_Template) + return createTemplateSignatureString(getTemplate(), Result, CurrentArg, + Policy); + FunctionDecl *FDecl = getFunction(); const FunctionProtoType *Proto = dyn_cast(getFunctionType()); @@ -5843,6 +5921,7 @@ static QualType getParamType(Sema &SemaRef, // overload candidates. QualType ParamType; for (auto &Candidate : Candidates) { + // FIXME: handle non-type-template-parameters by merging with D116326 if (const auto *FType = Candidate.getFunctionType()) if (const auto *Proto = dyn_cast(FType)) if (N < Proto->getNumParams()) { @@ -5860,8 +5939,7 @@ static QualType getParamType(Sema &SemaRef, } static QualType -ProduceSignatureHelp(Sema &SemaRef, Scope *S, - MutableArrayRef Candidates, +ProduceSignatureHelp(Sema &SemaRef, MutableArrayRef Candidates, unsigned CurrentArg, SourceLocation OpenParLoc) { if (Candidates.empty()) return QualType(); @@ -5970,7 +6048,7 @@ QualType Sema::ProduceCallSignatureHelp(Scope *S, Expr *Fn, } mergeCandidatesWithResults(*this, Results, CandidateSet, Loc, Args.size()); QualType ParamType = - ProduceSignatureHelp(*this, S, Results, Args.size(), OpenParLoc); + ProduceSignatureHelp(*this, Results, Args.size(), OpenParLoc); return !CandidateSet.empty() ? ParamType : QualType(); } @@ -6010,7 +6088,7 @@ QualType Sema::ProduceConstructorSignatureHelp(Scope *S, QualType Type, SmallVector Results; mergeCandidatesWithResults(*this, Results, CandidateSet, Loc, Args.size()); - return ProduceSignatureHelp(*this, S, Results, Args.size(), OpenParLoc); + return ProduceSignatureHelp(*this, Results, Args.size(), OpenParLoc); } QualType Sema::ProduceCtorInitMemberSignatureHelp( @@ -6032,6 +6110,58 @@ QualType Sema::ProduceCtorInitMemberSignatureHelp( return QualType(); } +static bool argMatchesTemplateParams(const ParsedTemplateArgument &Arg, + unsigned Index, + const TemplateParameterList &Params) { + const NamedDecl *Param; + if (Index < Params.size()) + Param = Params.getParam(Index); + else if (Params.hasParameterPack()) + Param = Params.asArray().back(); + else + return false; // too many args + + switch (Arg.getKind()) { + case ParsedTemplateArgument::Type: + return llvm::isa(Param); // constraints not checked + case ParsedTemplateArgument::NonType: + return llvm::isa(Param); // type not checked + case ParsedTemplateArgument::Template: + return llvm::isa(Param); // signature not checked + } +} + +QualType Sema::ProduceTemplateArgumentSignatureHelp( + TemplateTy ParsedTemplate, ArrayRef Args, + SourceLocation LAngleLoc) { + if (!CodeCompleter || !ParsedTemplate) + return QualType(); + + SmallVector Results; + auto Consider = [&](const TemplateDecl *TD) { + // Only add if the existing args are compatible with the template. + bool Matches = true; + for (unsigned I = 0; I < Args.size(); ++I) { + if (!argMatchesTemplateParams(Args[I], I, *TD->getTemplateParameters())) { + Matches = false; + break; + } + } + if (Matches) + Results.emplace_back(TD); + }; + + TemplateName Template = ParsedTemplate.get(); + if (const auto *TD = Template.getAsTemplateDecl()) { + Consider(TD); + } else if (const auto *OTS = Template.getAsOverloadedTemplate()) { + for (const NamedDecl *ND : *OTS) + if (const auto *TD = llvm::dyn_cast(ND)) + Consider(TD); + } + return ProduceSignatureHelp(*this, Results, Args.size(), LAngleLoc); +} + static QualType getDesignatedType(QualType BaseType, const Designation &Desig) { for (unsigned I = 0; I < Desig.getNumDesignators(); ++I) { if (BaseType.isNull()) diff --git a/clang/test/CodeCompletion/template-signature.cpp b/clang/test/CodeCompletion/template-signature.cpp new file mode 100644 index 000000000000..4425faf24912 --- /dev/null +++ b/clang/test/CodeCompletion/template-signature.cpp @@ -0,0 +1,28 @@ +template float overloaded(int); +template bool overloaded(char); + +auto m = overloaded<1, 2>(0); +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:4:21 %s -o - | FileCheck -check-prefix=CHECK-CC1 %s +// CHECK-CC1: OPENING_PAREN_LOC: {{.*}}4:20 +// CHECK-CC1-DAG: OVERLOAD: [#float#]overloaded<<#int#>, char y>[#()#] +// CHECK-CC1-DAG: OVERLOAD: [#bool#]overloaded<<#class#>, int x>[#()#] +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:4:24 %s -o - | FileCheck -check-prefix=CHECK-CC2 %s +// CHECK-CC2-NOT: OVERLOAD: {{.*}}int x +// CHECK-CC2: OVERLOAD: [#float#]overloaded>[#()#] + +template int n = 0; +int val = n; +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:14:18 %s -o - | FileCheck -check-prefix=CHECK-CC3 %s +// CHECK-CC3: OVERLOAD: [#int#]n> +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:14:24 %s -o - | FileCheck -check-prefix=CHECK-CC4 %s +// CHECK-CC4: OVERLOAD: [#int#]n + +template struct Vector {}; +template class Container = Vector> +struct Collection { Container container; }; +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:22:31 %s -o - | FileCheck -check-prefix=CHECK-CC5 %s +// CHECK-CC5: OVERLOAD: [#class#]Container<<#typename E#>> +Collection collection; +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:25:12 %s -o - | FileCheck -check-prefix=CHECK-CC6 %s +// CHECK-CC6: OVERLOAD: [#struct#]Collection<<#typename Element#>> + From 550ea385abc2805fd3e0a539bf55bc82edb5c13e Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Mon, 3 Jan 2022 16:25:00 +0100 Subject: [PATCH 425/992] [mlir] Remove unnecessary canonicalization from Linalg Detensorize.cpp After https://reviews.llvm.org/D115821 it became possible to create `tensor` with a single `tensor.from_elements` operation without collapsing tensor shape from `tensor<1xelem_type>` to `tensor` Differential Revision: https://reviews.llvm.org/D115891 --- .../Dialect/Linalg/Transforms/Detensorize.cpp | 47 ++----------------- mlir/test/Dialect/Linalg/detensorize_0d.mlir | 12 ++--- .../Linalg/detensorize_br_operands.mlir | 16 +++---- mlir/test/Dialect/Linalg/detensorize_if.mlir | 44 +++++++---------- .../Dialect/Linalg/detensorize_trivial.mlir | 6 +-- .../Dialect/Linalg/detensorize_while.mlir | 4 +- .../Linalg/detensorize_while_impure_cf.mlir | 6 +-- 7 files changed, 37 insertions(+), 98 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp index aa8a3b9f4771..5aebbe08fcd7 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp @@ -24,18 +24,14 @@ using namespace mlir::linalg; static Value sourceMaterializationCallback(OpBuilder &builder, Type type, ValueRange inputs, Location loc) { assert(inputs.size() == 1); - if (inputs[0].getType().isa()) + auto inputType = inputs[0].getType(); + if (inputType.isa()) return nullptr; // A detensored value is converted back by creating a new tensor from its // element(s). - auto createNewTensorOp = - builder.create(loc, inputs[0]); - - // FromElementsOp results in a tensor<1xdtype>, we need to reshape that to - // a tensor instead. - return builder.create( - loc, type, createNewTensorOp, ArrayRef{}); + return builder.create( + loc, RankedTensorType::get({}, inputType), inputs[0]); } namespace { @@ -161,39 +157,6 @@ class DetensorizeTypeConverter : public TypeConverter { } }; -/// Canonicalizes the pattern of the form -/// -/// %tensor = tensor.from_elements(%element) : (i32) -> tensor<1xi32> -/// %reshaped_tensor = tensor.collapse_shape %tensor [] -/// : tensor<1xi32> into tensor -/// %extracted_element = tensor.extract %reshaped_tensor[] : tensor -/// -/// to just %element. -struct ExtractFromReshapeFromElements - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(tensor::ExtractOp extract, - PatternRewriter &rewriter) const final { - if (!extract.indices().empty()) - return failure(); - - auto tensorReshape = - extract.tensor().getDefiningOp(); - if (tensorReshape == nullptr) - return failure(); - - auto tensorFromElements = - tensorReshape.getOperand() - .getDefiningOp(); - if (tensorFromElements == nullptr) - return failure(); - - rewriter.replaceOp(extract, tensorFromElements.getOperand(0)); - return success(); - } -}; - /// @see LinalgDetensorize in Linalg/Passes.td for more details. struct LinalgDetensorize : public LinalgDetensorizeBase { LinalgDetensorize() = default; @@ -591,7 +554,7 @@ struct LinalgDetensorize : public LinalgDetensorizeBase { signalPassFailure(); RewritePatternSet canonPatterns(context); - canonPatterns.add(context); + tensor::FromElementsOp::getCanonicalizationPatterns(canonPatterns, context); if (failed(applyPatternsAndFoldGreedily(getOperation(), std::move(canonPatterns)))) signalPassFailure(); diff --git a/mlir/test/Dialect/Linalg/detensorize_0d.mlir b/mlir/test/Dialect/Linalg/detensorize_0d.mlir index 2d73c5b97c45..9ce2f8ccfa5a 100644 --- a/mlir/test/Dialect/Linalg/detensorize_0d.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_0d.mlir @@ -19,8 +19,7 @@ func @detensor_simple(%arg1: tensor, %arg2: tensor) -> tensor att // CHECK-DAG: %[[arg2_val:.*]] = tensor.extract %[[arg2]] // CHECK: %[[detensored_res:.*]] = arith.addf %[[arg1_val]], %[[arg2_val]] // CHECK: %[[new_tensor_res:.*]] = tensor.from_elements %[[detensored_res]] -// CHECK: %[[reshaped_tensor_res:.*]] = tensor.collapse_shape %[[new_tensor_res]] -// CHECK: return %[[reshaped_tensor_res]] +// CHECK: return %[[new_tensor_res]] func @detensor_op_sequence(%arg1: tensor, %arg2: tensor) -> tensor attributes {iree.module.export} { %0 = linalg.init_tensor [] : tensor @@ -60,8 +59,7 @@ func @detensor_op_sequence(%arg1: tensor, %arg2: tensor) -> tensor, %arg2: tensor) -> tensor attributes {iree.module.export} { %0 = linalg.init_tensor [] : tensor @@ -82,8 +80,7 @@ func @detensor_multiple_ops(%arg1: tensor, %arg2: tensor) -> tensor, %arg2: tensor) -> tensor attributes {iree.module.export} { %0 = linalg.init_tensor [] : tensor @@ -102,5 +99,4 @@ func @detensor_foreign_op(%arg1: tensor, %arg2: tensor) -> tensor // CHECK-DAG: %[[arg2_val:.*]] = tensor.extract %[[arg2]] // CHECK: %[[detensored_res:.*]] = "foreign.do_something"(%[[arg1_val]], %[[arg2_val]]) // CHECK: %[[new_tensor_res:.*]] = tensor.from_elements %[[detensored_res]] -// CHECK: %[[reshaped_tensor_res:.*]] = tensor.collapse_shape %[[new_tensor_res]] -// CHECK: return %[[reshaped_tensor_res]] +// CHECK: return %[[new_tensor_res]] diff --git a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir index 2682a298dd2a..ff7cd003ad4a 100644 --- a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir @@ -2,17 +2,14 @@ // TODO: Detensoring breaks if %arg0 or %arg1 are passed directly as tensors. Fix that. func @if_true_test(%arg0: i1, %arg1: i32) -> tensor attributes {} { - %arg0_t = tensor.from_elements %arg0 : tensor<1xi1> - %arg0_t2 = tensor.collapse_shape %arg0_t [] : tensor<1xi1> into tensor - - %arg1_t = tensor.from_elements %arg1 : tensor<1xi32> - %arg1_t2 = tensor.collapse_shape %arg1_t [] : tensor<1xi32> into tensor + %arg0_t = tensor.from_elements %arg0 : tensor + %arg1_t = tensor.from_elements %arg1 : tensor %cst = arith.constant dense<10> : tensor %2 = linalg.init_tensor [] : tensor %3 = linalg.generic {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []} - ins(%arg0_t2 : tensor) + ins(%arg0_t : tensor) outs(%2 : tensor) { ^bb0(%arg2: i1, %arg3: i8): // no predecessors %10 = arith.extui %arg2 : i1 to i8 @@ -20,12 +17,12 @@ func @if_true_test(%arg0: i1, %arg1: i32) -> tensor attributes {} { } -> tensor %4 = tensor.extract %3[] : tensor %5 = arith.trunci %4 : i8 to i1 - cond_br %5, ^bb1, ^bb2(%arg1_t2 : tensor) + cond_br %5, ^bb1, ^bb2(%arg1_t : tensor) ^bb1: %6 = linalg.init_tensor [] : tensor %7 = linalg.generic {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []} - ins(%arg1_t2, %cst : tensor, tensor) + ins(%arg1_t, %cst : tensor, tensor) outs(%6 : tensor) { ^bb0(%arg2: i32, %arg3: i32, %arg4: i32): // no predecessors %10 = arith.addi %arg2, %arg3 : i32 @@ -44,6 +41,5 @@ func @if_true_test(%arg0: i1, %arg1: i32) -> tensor attributes {} { // CHECK-NEXT: %[[add_res:.*]] = arith.addi // CHECK-NEXT: br ^[[bb2]](%[[add_res]] : i32) // CHECK-NEXT: ^[[bb2]] -// CHECK-NEXT: tensor.from_elements -// CHECK-NEXT: %[[func_res:.*]] = tensor.collapse_shape +// CHECK-NEXT: %[[func_res:.*]] = tensor.from_elements // CHECK-NEXT: return %[[func_res]] diff --git a/mlir/test/Dialect/Linalg/detensorize_if.mlir b/mlir/test/Dialect/Linalg/detensorize_if.mlir index c9e843bc7d69..4341cf262fb6 100644 --- a/mlir/test/Dialect/Linalg/detensorize_if.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_if.mlir @@ -9,17 +9,15 @@ func @main() -> (tensor) attributes {} { %c0 = arith.constant 0 : i32 - %0 = tensor.from_elements %c0 : tensor<1xi32> - %reshaped0 = tensor.collapse_shape %0 [] : tensor<1xi32> into tensor + %0 = tensor.from_elements %c0 : tensor %c10 = arith.constant 10 : i32 - %1 = tensor.from_elements %c10 : tensor<1xi32> - %reshaped1 = tensor.collapse_shape %1 [] : tensor<1xi32> into tensor - br ^bb1(%reshaped0 : tensor) + %1 = tensor.from_elements %c10 : tensor + br ^bb1(%0 : tensor) ^bb1(%2: tensor): // 2 preds: ^bb0, ^bb2 %3 = linalg.init_tensor [] : tensor %4 = linalg.generic #attrs - ins(%2, %reshaped1 : tensor, tensor) + ins(%2, %1 : tensor, tensor) outs(%3 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): // no predecessors %8 = arith.cmpi slt, %arg0, %arg1 : i32 @@ -54,8 +52,7 @@ func @main() -> (tensor) attributes {} { // CHECK-NEXT: arith.addi %{{.*}}, %{{.*}} // CHECK-NEXT: br ^[[bb3:.*]](%{{.*}} : i32) // CHECK-NEXT: ^[[bb3]](%{{.*}}: i32) -// CHECK-NEXT: tensor.from_elements %{{.*}} : tensor<1xi32> -// CHECK-NEXT: tensor.collapse_shape %{{.*}} [] : tensor<1xi32> into tensor +// CHECK-NEXT: tensor.from_elements %{{.*}} : tensor // CHECK-NEXT: return %{{.*}} // CHECK-NEXT: } @@ -73,17 +70,15 @@ func @main() -> (tensor) attributes {} { func @main() -> (tensor) attributes {} { %c0 = arith.constant 0 : i32 - %0 = tensor.from_elements %c0 : tensor<1xi32> - %reshaped0 = tensor.collapse_shape %0 [] : tensor<1xi32> into tensor + %0 = tensor.from_elements %c0 : tensor %c10 = arith.constant 10 : i32 - %1 = tensor.from_elements %c10 : tensor<1xi32> - %reshaped1 = tensor.collapse_shape %1 [] : tensor<1xi32> into tensor - br ^bb1(%reshaped0 : tensor) + %1 = tensor.from_elements %c10 : tensor + br ^bb1(%0 : tensor) ^bb1(%2: tensor): // 2 preds: ^bb0, ^bb2 %3 = linalg.init_tensor [] : tensor %4 = linalg.generic #attrs - ins(%2, %reshaped1 : tensor, tensor) + ins(%2, %1 : tensor, tensor) outs(%3 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): // no predecessors %8 = arith.cmpi slt, %arg0, %arg1 : i32 @@ -123,8 +118,7 @@ func @main() -> (tensor) attributes {} { // CHECK-NEXT: ^[[bb3]](%{{.*}}: i32) // CHECK-NEXT: br ^[[bb4:.*]](%{{.*}} : i32) // CHECK-NEXT: ^[[bb4]](%{{.*}}: i32) -// CHECK-NEXT: tensor.from_elements %{{.*}} : tensor<1xi32> -// CHECK-NEXT: tensor.collapse_shape %{{.*}} [] : tensor<1xi32> into tensor +// CHECK-NEXT: tensor.from_elements %{{.*}} : tensor // CHECK-NEXT: return %{{.*}} // CHECK-NEXT: } @@ -139,17 +133,15 @@ func @main() -> (tensor) attributes {} { func @main() -> (tensor) attributes {} { %c0 = arith.constant 0 : i32 - %0 = tensor.from_elements %c0 : tensor<1xi32> - %reshaped0 = tensor.collapse_shape %0 [] : tensor<1xi32> into tensor + %0 = tensor.from_elements %c0 : tensor %c10 = arith.constant 10 : i32 - %1 = tensor.from_elements %c10 : tensor<1xi32> - %reshaped1 = tensor.collapse_shape %1 [] : tensor<1xi32> into tensor - br ^bb1(%reshaped0 : tensor) + %1 = tensor.from_elements %c10 : tensor + br ^bb1(%0 : tensor) ^bb1(%2: tensor): // 2 preds: ^bb0, ^bb2 %3 = linalg.init_tensor [] : tensor %4 = linalg.generic #attrs - ins(%2, %reshaped1 : tensor, tensor) + ins(%2, %1 : tensor, tensor) outs(%3 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): // no predecessors %8 = arith.cmpi slt, %arg0, %arg1 : i32 @@ -163,11 +155,10 @@ func @main() -> (tensor) attributes {} { cond_br %5, ^bb2(%2 : tensor), ^bb2(%2 : tensor) ^bb2(%6: tensor): // pred: ^bb1 - %12 = tensor.from_elements %c10 : tensor<1xi32> - %reshaped12 = tensor.collapse_shape %12 [] : tensor<1xi32> into tensor + %12 = tensor.from_elements %c10 : tensor %7 = linalg.init_tensor [] : tensor %8 = linalg.generic #attrs - ins(%6, %reshaped12 : tensor, tensor) + ins(%6, %12 : tensor, tensor) outs(%7 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i32): // no predecessors %9 = arith.addi %arg0, %arg1 : i32 @@ -190,7 +181,6 @@ func @main() -> (tensor) attributes {} { // CHECK-NEXT: arith.addi %{{.*}}, %{{.*}} // CHECK-NEXT: br ^[[bb3:.*]](%{{.*}} : i32) // CHECK-NEXT: ^[[bb3]](%{{.*}}: i32) -// CHECK-NEXT: tensor.from_elements %{{.*}} : tensor<1xi32> -// CHECK-NEXT: tensor.collapse_shape %{{.*}} [] : tensor<1xi32> into tensor +// CHECK-NEXT: tensor.from_elements %{{.*}} : tensor // CHECK-NEXT: return %{{.*}} // CHECK-NEXT: } diff --git a/mlir/test/Dialect/Linalg/detensorize_trivial.mlir b/mlir/test/Dialect/Linalg/detensorize_trivial.mlir index 5862327ebe6c..76b99d916acb 100644 --- a/mlir/test/Dialect/Linalg/detensorize_trivial.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_trivial.mlir @@ -11,11 +11,10 @@ func @main(%farg0 : tensor) -> (tensor) attributes {} { %c10 = arith.constant 10 : i32 - %1 = tensor.from_elements %c10 : tensor<1xi32> - %reshaped1 = tensor.collapse_shape %1 [] : tensor<1xi32> into tensor + %1 = tensor.from_elements %c10 : tensor %3 = linalg.init_tensor [] : tensor %4 = linalg.generic #attrs - ins(%farg0, %reshaped1 : tensor, tensor) + ins(%farg0, %1 : tensor, tensor) outs(%3 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): %8 = arith.cmpi slt, %arg0, %arg1 : i32 @@ -30,7 +29,6 @@ func @main(%farg0 : tensor) -> (tensor) attributes {} { // DET-ALL-NEXT: tensor.extract %{{.*}}[] // DET-ALL-NEXT: arith.cmpi slt, %{{.*}}, %{{.*}} // DET-ALL-NEXT: tensor.from_elements %{{.*}} -// DET-ALL-NEXT: tensor.collapse_shape %{{.*}} // DET-ALL-NEXT: return %{{.*}} : tensor // DET-ALL-NEXT: } diff --git a/mlir/test/Dialect/Linalg/detensorize_while.mlir b/mlir/test/Dialect/Linalg/detensorize_while.mlir index 9ece0029737c..6ae4c1ddef2d 100644 --- a/mlir/test/Dialect/Linalg/detensorize_while.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_while.mlir @@ -52,7 +52,6 @@ func @main(%farg0: tensor, %farg1: tensor) -> tensor attributes { // DET-ALL: br ^[[bb1]](%{{.*}} : i32) // DET-ALL: ^[[bb3]](%{{.*}}: i32) // DET-ALL: tensor.from_elements {{.*}} -// DET-ALL: tensor.collapse_shape {{.*}} // DET-ALL: return %{{.*}} : tensor // Test detensoring only ops involed in control-flow. @@ -68,6 +67,5 @@ func @main(%farg0: tensor, %farg1: tensor) -> tensor attributes { // DET-CF: arith.addi {{.*}} // DET-CF: br ^[[bb1]](%{{.*}} : i32) // DET-CF: ^[[bb3]](%{{.*}}: i32) -// DET-CF: tensor.from_elements %{{.*}} : tensor<1xi32> -// DET-CF: tensor.collapse_shape %{{.*}} [] : tensor<1xi32> into tensor +// DET-CF: tensor.from_elements %{{.*}} : tensor // DET-CF: return %{{.*}} : tensor diff --git a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir index 765692fa2d3d..a464fb1a90e8 100644 --- a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir @@ -76,8 +76,7 @@ func @main(%farg0: tensor<10xi32>, %farg1: tensor) -> tensor attribute // DET-ALL: cmpi slt, %{{.*}}, %{{.*}} : i32 // DET-ALL: cond_br %{{.*}}, ^[[bb2:.*]](%{{.*}} : i32), ^[[bb3:.*]](%{{.*}} : i32) // DET-ALL: ^[[bb2]](%{{.*}}: i32) -// DET-ALL: tensor.from_elements %{{.*}} : tensor<1xi32> -// DET-ALL: tensor.collapse_shape %{{.*}} [] : tensor<1xi32> into tensor +// DET-ALL: tensor.from_elements %{{.*}} : tensor // DET-ALL: linalg.init_tensor [10] : tensor<10xi32> // DET-ALL: linalg.generic {{{.*}}} ins(%{{.*}} : tensor) outs(%{{.*}} : tensor<10xi32>) { // DET-ALL: ^bb0(%{{.*}}: i32, %{{.*}}: i32): @@ -85,8 +84,7 @@ func @main(%farg0: tensor<10xi32>, %farg1: tensor) -> tensor attribute // DET-ALL: } -> tensor<10xi32> // DET-ALL: br ^[[bb1]](%{{.*}} : tensor<10xi32>) // DET-ALL: ^[[bb3]](%{{.*}}: i32) -// DET-ALL: tensor.from_elements %{{.*}} : tensor<1xi32> -// DET-ALL: tensor.collapse_shape %{{.*}} [] : tensor<1xi32> into tensor +// DET-ALL: tensor.from_elements %{{.*}} : tensor // DET-ALL: return %{{.*}} : tensor // DET-ALL: } From 4435d1819efec06e11461799fe83d6f148b098f4 Mon Sep 17 00:00:00 2001 From: Tomas Matheson Date: Tue, 21 Dec 2021 16:22:44 +0000 Subject: [PATCH 426/992] [ARM][AArch64] clang support for Armv9.3-A This patch introduces support for targetting the Armv9.3-A architecture, which should map to the existing Armv8.8-A extensions. Differential Revision: https://reviews.llvm.org/D116159 --- clang/lib/Basic/Targets/AArch64.cpp | 12 ++++++++++++ clang/lib/Basic/Targets/AArch64.h | 2 ++ clang/lib/Basic/Targets/ARM.cpp | 3 +++ clang/lib/Driver/ToolChains/Arch/AArch64.cpp | 9 ++++++--- clang/test/Driver/aarch64-cpus.c | 16 ++++++++++++++++ clang/test/Driver/arm-cortex-cpus.c | 16 ++++++++++++++++ clang/test/Preprocessor/arm-target-features.c | 5 +++++ 7 files changed, 60 insertions(+), 3 deletions(-) diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index d7eb770995cb..334ff584c9e3 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -45,6 +45,7 @@ static StringRef getArchVersionString(llvm::AArch64::ArchKind Kind) { case llvm::AArch64::ArchKind::ARMV9A: case llvm::AArch64::ArchKind::ARMV9_1A: case llvm::AArch64::ArchKind::ARMV9_2A: + case llvm::AArch64::ArchKind::ARMV9_3A: return "9"; default: return "8"; @@ -247,6 +248,12 @@ void AArch64TargetInfo::getTargetDefinesARMV92A(const LangOptions &Opts, getTargetDefinesARMV87A(Opts, Builder); } +void AArch64TargetInfo::getTargetDefinesARMV93A(const LangOptions &Opts, + MacroBuilder &Builder) const { + // Armv9.3-A maps to Armv8.8-A + getTargetDefinesARMV88A(Opts, Builder); +} + void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { // Target identification. @@ -464,6 +471,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, case llvm::AArch64::ArchKind::ARMV9_2A: getTargetDefinesARMV92A(Opts, Builder); break; + case llvm::AArch64::ArchKind::ARMV9_3A: + getTargetDefinesARMV93A(Opts, Builder); + break; } // All of the __sync_(bool|val)_compare_and_swap_(1|2|4|8) builtins work. @@ -620,6 +630,8 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, ArchKind = llvm::AArch64::ArchKind::ARMV9_1A; if (Feature == "+v9.2a") ArchKind = llvm::AArch64::ArchKind::ARMV9_2A; + if (Feature == "+v9.3a") + ArchKind = llvm::AArch64::ArchKind::ARMV9_3A; if (Feature == "+v8r") ArchKind = llvm::AArch64::ArchKind::ARMV8R; if (Feature == "+fullfp16") diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index 6bc0ea4eb5e1..43e5b6fe2de0 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -100,6 +100,8 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { MacroBuilder &Builder) const; void getTargetDefinesARMV92A(const LangOptions &Opts, MacroBuilder &Builder) const; + void getTargetDefinesARMV93A(const LangOptions &Opts, + MacroBuilder &Builder) const; void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index bb0044bdc596..478a0233398d 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -220,6 +220,8 @@ StringRef ARMTargetInfo::getCPUAttr() const { return "9_1A"; case llvm::ARM::ArchKind::ARMV9_2A: return "9_2A"; + case llvm::ARM::ArchKind::ARMV9_3A: + return "9_3A"; case llvm::ARM::ArchKind::ARMV8MBaseline: return "8M_BASE"; case llvm::ARM::ArchKind::ARMV8MMainline: @@ -936,6 +938,7 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, case llvm::ARM::ArchKind::ARMV9A: case llvm::ARM::ArchKind::ARMV9_1A: case llvm::ARM::ArchKind::ARMV9_2A: + case llvm::ARM::ArchKind::ARMV9_3A: getTargetDefinesARMV83A(Opts, Builder); break; } diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index 8cb9318df4cd..9ffb5d73b2aa 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -104,7 +104,8 @@ static bool DecodeAArch64Features(const Driver &D, StringRef text, ArchKind == llvm::AArch64::ArchKind::ARMV8_7A || ArchKind == llvm::AArch64::ArchKind::ARMV8_8A || ArchKind == llvm::AArch64::ArchKind::ARMV9_1A || - ArchKind == llvm::AArch64::ArchKind::ARMV9_2A) && + ArchKind == llvm::AArch64::ArchKind::ARMV9_2A || + ArchKind == llvm::AArch64::ArchKind::ARMV9_3A) && Feature == "sve") Features.push_back("+f32mm"); } @@ -394,7 +395,8 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, std::find(ItBegin, ItEnd, "+v8.8a") != ItEnd || std::find(ItBegin, ItEnd, "+v9a") != ItEnd || std::find(ItBegin, ItEnd, "+v9.1a") != ItEnd || - std::find(ItBegin, ItEnd, "+v9.2a") != ItEnd) { + std::find(ItBegin, ItEnd, "+v9.2a") != ItEnd || + std::find(ItBegin, ItEnd, "+v9.3a") != ItEnd) { if (HasCrypto && !NoCrypto) { // Check if we have NOT disabled an algorithm with something like: // +crypto, -algorithm @@ -453,7 +455,8 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, } } - const char *Archs[] = {"+v8.6a", "+v8.7a", "+v8.8a", "+v9.1a", "+v9.2a"}; + const char *Archs[] = {"+v8.6a", "+v8.7a", "+v8.8a", + "+v9.1a", "+v9.2a", "+v9.3a"}; auto Pos = std::find_first_of(Features.begin(), Features.end(), std::begin(Archs), std::end(Archs)); if (Pos != std::end(Features)) diff --git a/clang/test/Driver/aarch64-cpus.c b/clang/test/Driver/aarch64-cpus.c index 71b7139ca266..3e9923e8d185 100644 --- a/clang/test/Driver/aarch64-cpus.c +++ b/clang/test/Driver/aarch64-cpus.c @@ -876,6 +876,22 @@ // RUN: %clang -target aarch64_be -mbig-endian -march=armv9.2-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV92A-BE %s // GENERICV92A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v9.2a" "-target-feature" "+i8mm" "-target-feature" "+bf16" "-target-feature" "+sve" "-target-feature" "+sve2" +// RUN: %clang -target aarch64 -march=armv9.3a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV93A %s +// RUN: %clang -target aarch64 -march=armv9.3-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV93A %s +// RUN: %clang -target aarch64 -mlittle-endian -march=armv9.3a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV93A %s +// RUN: %clang -target aarch64 -mlittle-endian -march=armv9.3-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV93A %s +// RUN: %clang -target aarch64_be -mlittle-endian -march=armv9.3a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV93A %s +// RUN: %clang -target aarch64_be -mlittle-endian -march=armv9.3-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV93A %s +// GENERICV93A: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v9.3a" + +// RUN: %clang -target aarch64_be -march=armv9.3a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV93A-BE %s +// RUN: %clang -target aarch64_be -march=armv9.3-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV93A-BE %s +// RUN: %clang -target aarch64 -mbig-endian -march=armv9.3a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV93A-BE %s +// RUN: %clang -target aarch64 -mbig-endian -march=armv9.3-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV93A-BE %s +// RUN: %clang -target aarch64_be -mbig-endian -march=armv9.3a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV93A-BE %s +// RUN: %clang -target aarch64_be -mbig-endian -march=armv9.3-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV93A-BE %s +// GENERICV93A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v9.3a" + // fullfp16 is off by default for v8a, feature must not be mentioned // RUN: %clang -target aarch64 -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=V82ANOFP16 -check-prefix=GENERIC %s // RUN: %clang -target aarch64 -march=armv8-a -### -c %s 2>&1 | FileCheck -check-prefix=V82ANOFP16 -check-prefix=GENERIC %s diff --git a/clang/test/Driver/arm-cortex-cpus.c b/clang/test/Driver/arm-cortex-cpus.c index 0c4a79bb773f..7b7a4b355dcc 100644 --- a/clang/test/Driver/arm-cortex-cpus.c +++ b/clang/test/Driver/arm-cortex-cpus.c @@ -437,6 +437,22 @@ // RUN: %clang -target arm -march=armebv9.2-a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V92A %s // CHECK-BE-V92A: "-cc1"{{.*}} "-triple" "armebv9.2{{.*}}" "-target-cpu" "generic" +// RUN: %clang -target armv9.3a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V93A %s +// RUN: %clang -target arm -march=armv9.3a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V93A %s +// RUN: %clang -target arm -march=armv9.3-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V93A %s +// RUN: %clang -target armv9.3a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V93A %s +// RUN: %clang -target arm -march=armv9.3a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V93A %s +// RUN: %clang -target arm -march=armv9.3-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V93A %s +// CHECK-V93A: "-cc1"{{.*}} "-triple" "armv9.3{{.*}}" "-target-cpu" "generic" + +// RUN: %clang -target armebv9.3a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V93A %s +// RUN: %clang -target armv9.3a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V93A %s +// RUN: %clang -target armeb -march=armebv9.3a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V93A %s +// RUN: %clang -target armeb -march=armebv9.3-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V93A %s +// RUN: %clang -target arm -march=armebv9.3a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V93A %s +// RUN: %clang -target arm -march=armebv9.3-a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V93A %s +// CHECK-BE-V93A: "-cc1"{{.*}} "-triple" "armebv9.3{{.*}}" "-target-cpu" "generic" + // Once we have CPUs with optional v8.2-A FP16, we will need a way to turn it // on and off. Cortex-A53 is a placeholder for now. // RUN: %clang -target armv8a-linux-eabi -mcpu=cortex-a53+fp16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-CORTEX-A53-FP16 %s diff --git a/clang/test/Preprocessor/arm-target-features.c b/clang/test/Preprocessor/arm-target-features.c index 88203535c71e..81531a39f29f 100644 --- a/clang/test/Preprocessor/arm-target-features.c +++ b/clang/test/Preprocessor/arm-target-features.c @@ -879,6 +879,11 @@ // CHECK-V92A: #define __ARM_ARCH_9_2A__ 1 // CHECK-V92A: #define __ARM_ARCH_PROFILE 'A' +// RUN: %clang -target armv9.3a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V93A %s +// CHECK-V93A: #define __ARM_ARCH 9 +// CHECK-V93A: #define __ARM_ARCH_9_3A__ 1 +// CHECK-V93A: #define __ARM_ARCH_PROFILE 'A' + // RUN: %clang -target arm-none-none-eabi -march=armv7-m -mfpu=softvfp -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SOFTVFP %s // CHECK-SOFTVFP-NOT: #define __ARM_FP 0x From 7505aeefc4e615520e2c822b9647dad4a48276b9 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Mon, 3 Jan 2022 10:58:40 -0500 Subject: [PATCH 427/992] [clang] Pacify GCC warning. NFC. - This partially reverts d677a7cb056b17145a50ec8ca2ab6d5f4c494749 to pacify GCC warnings like ``` base class should be explicitly initialized in the copy constructor ``` - Shall we keep turning on option `IgnoreBaseInCopyConstructors` when enabling `readability-redundant-member-init` check? --- clang/include/clang/Basic/Diagnostic.h | 2 +- clang/include/clang/Basic/PartialDiagnostic.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/Diagnostic.h b/clang/include/clang/Basic/Diagnostic.h index 6a80823d1242..e5577e74fa63 100644 --- a/clang/include/clang/Basic/Diagnostic.h +++ b/clang/include/clang/Basic/Diagnostic.h @@ -1326,7 +1326,7 @@ class DiagnosticBuilder : public StreamingDiagnostic { public: /// Copy constructor. When copied, this "takes" the diagnostic info from the /// input and neuters it. - DiagnosticBuilder(const DiagnosticBuilder &D) { + DiagnosticBuilder(const DiagnosticBuilder &D) : StreamingDiagnostic() { DiagObj = D.DiagObj; DiagStorage = D.DiagStorage; IsActive = D.IsActive; diff --git a/clang/include/clang/Basic/PartialDiagnostic.h b/clang/include/clang/Basic/PartialDiagnostic.h index 217441979869..9fb70bff7fee 100644 --- a/clang/include/clang/Basic/PartialDiagnostic.h +++ b/clang/include/clang/Basic/PartialDiagnostic.h @@ -49,7 +49,8 @@ class PartialDiagnostic : public StreamingDiagnostic { PartialDiagnostic(unsigned DiagID, DiagStorageAllocator &Allocator_) : StreamingDiagnostic(Allocator_), DiagID(DiagID) {} - PartialDiagnostic(const PartialDiagnostic &Other) : DiagID(Other.DiagID) { + PartialDiagnostic(const PartialDiagnostic &Other) + : StreamingDiagnostic(), DiagID(Other.DiagID) { Allocator = Other.Allocator; if (Other.DiagStorage) { DiagStorage = getStorage(); From 9e6f88b31a7f7957a850d3541ffa759f2993ffc1 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 22 Dec 2021 11:07:50 +0100 Subject: [PATCH 428/992] [clangd] Respect .clang-tidy ExtraArgs (-Wfoo only) when producing diagnostics This mechanism is used almost exclusively to enable extra warnings in clang-tidy using ExtraArgs=-Wfoo, Checks="clang-diagnostic-foo". Its presence is a strong signal that these flags are useful. We choose not to actually emit them as clang-tidy diagnostics, but under their "main" name - this ensures we show the same diagnostic in a consistent way. We don't add the ExtraArgs to the compile command in general, but rather just handle the -W flags, which is the common case and avoids unexpected side-effects. And we only do this for the main file parse, when producing diagnostics. Differential Revision: https://reviews.llvm.org/D116147 --- clang-tools-extra/clangd/ParsedAST.cpp | 74 +++++++++++++++++-- .../clangd/unittests/DiagnosticsTests.cpp | 74 +++++++++++++++++++ 2 files changed, 143 insertions(+), 5 deletions(-) diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp index 4b96725de441..732c36813800 100644 --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -246,6 +246,49 @@ class ReplayPreamble : private PPCallbacks { std::vector MainFileTokens; }; +// Find -W and -Wno- options in ExtraArgs and apply them to Diags. +// +// This is used to handle ExtraArgs in clang-tidy configuration. +// We don't use clang's standard handling of this as we want slightly different +// behavior (e.g. we want to exclude these from -Wno-error). +void applyWarningOptions(llvm::ArrayRef ExtraArgs, + DiagnosticsEngine &Diags) { + for (llvm::StringRef Group : ExtraArgs) { + // Only handle args that are of the form -W[no-]. + // Other flags are possible but rare and deliberately out of scope. + llvm::SmallVector Members; + if (!Group.consume_front("-W") || Group.empty()) + continue; + bool Enable = !Group.consume_front("no-"); + if (Diags.getDiagnosticIDs()->getDiagnosticsInGroup( + diag::Flavor::WarningOrError, Group, Members)) + continue; + + // Upgrade (or downgrade) the severity of each diagnostic in the group. + // If -Werror is on, newly added warnings will be treated as errors. + // We don't want this, so keep track of them to fix afterwards. + bool NeedsWerrorExclusion = false; + for (diag::kind ID : Members) { + if (Enable) { + if (Diags.getDiagnosticLevel(ID, SourceLocation()) < + DiagnosticsEngine::Warning) { + Diags.setSeverity(ID, diag::Severity::Warning, SourceLocation()); + if (Diags.getWarningsAsErrors()) + NeedsWerrorExclusion = true; + } + } else { + Diags.setSeverity(ID, diag::Severity::Ignored, SourceLocation()); + } + } + if (NeedsWerrorExclusion) { + // FIXME: there's no API to suppress -Werror for single diagnostics. + // In some cases with sub-groups, we may end up erroneously + // downgrading diagnostics that were -Werror in the compile command. + Diags.setDiagnosticGroupWarningAsError(Group, false); + } + } +} + } // namespace llvm::Optional @@ -311,7 +354,32 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs, : "unknown error"); return None; } - if (!PreserveDiags) { + tidy::ClangTidyOptions ClangTidyOpts; + if (PreserveDiags) { + trace::Span Tracer("ClangTidyOpts"); + ClangTidyOpts = getTidyOptionsForFile(Inputs.ClangTidyProvider, Filename); + dlog("ClangTidy configuration for file {0}: {1}", Filename, + tidy::configurationAsText(ClangTidyOpts)); + + // If clang-tidy is configured to emit clang warnings, we should too. + // + // Such clang-tidy configuration consists of two parts: + // - ExtraArgs: ["-Wfoo"] causes clang to produce the warnings + // - Checks: "clang-diagnostic-foo" prevents clang-tidy filtering them out + // + // We treat these as clang warnings, so the Checks part is not relevant. + // We must enable the warnings specified in ExtraArgs. + // + // We *don't* want to change the compile command directly. this can have + // too many unexpected effects: breaking the command, interactions with + // -- and -Werror, etc. Besides, we've already parsed the command. + // Instead we parse the -W flags and handle them directly. + auto &Diags = Clang->getDiagnostics(); + if (ClangTidyOpts.ExtraArgsBefore) + applyWarningOptions(*ClangTidyOpts.ExtraArgsBefore, Diags); + if (ClangTidyOpts.ExtraArgs) + applyWarningOptions(*ClangTidyOpts.ExtraArgs, Diags); + } else { // Skips some analysis. Clang->getDiagnosticOpts().IgnoreWarnings = true; } @@ -348,10 +416,6 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs, // diagnostics. if (PreserveDiags) { trace::Span Tracer("ClangTidyInit"); - tidy::ClangTidyOptions ClangTidyOpts = - getTidyOptionsForFile(Inputs.ClangTidyProvider, Filename); - dlog("ClangTidy configuration for file {0}: {1}", Filename, - tidy::configurationAsText(ClangTidyOpts)); tidy::ClangTidyCheckFactories CTFactories; for (const auto &E : tidy::ClangTidyModuleRegistry::entries()) E.instantiate()->addCheckFactories(CTFactories); diff --git a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp index 5cf68c9c8e9c..3d4bc1cea87c 100644 --- a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp +++ b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp @@ -517,6 +517,80 @@ TEST(DiagnosticTest, ClangTidyWarningAsError) { DiagSeverity(DiagnosticsEngine::Error))))); } +TidyProvider addClangArgs(std::vector ExtraArgs) { + return [ExtraArgs = std::move(ExtraArgs)](tidy::ClangTidyOptions &Opts, + llvm::StringRef) { + if (!Opts.ExtraArgs) + Opts.ExtraArgs.emplace(); + for (llvm::StringRef Arg : ExtraArgs) + Opts.ExtraArgs->emplace_back(Arg); + }; +} + +TEST(DiagnosticTest, ClangTidyEnablesClangWarning) { + Annotations Main(R"cpp( // error-ok + static void [[foo]]() {} + )cpp"); + TestTU TU = TestTU::withCode(Main.code()); + // This is always emitted as a clang warning, not a clang-tidy diagnostic. + auto UnusedFooWarning = + AllOf(Diag(Main.range(), "unused function 'foo'"), + DiagName("-Wunused-function"), DiagSource(Diag::Clang), + DiagSeverity(DiagnosticsEngine::Warning)); + + // Check the -Wunused warning isn't initially on. + EXPECT_THAT(*TU.build().getDiagnostics(), IsEmpty()); + + // We enable warnings based on clang-tidy extra args. + TU.ClangTidyProvider = addClangArgs({"-Wunused"}); + EXPECT_THAT(*TU.build().getDiagnostics(), ElementsAre(UnusedFooWarning)); + + // But we don't respect other args. + TU.ClangTidyProvider = addClangArgs({"-Wunused", "-Dfoo=bar"}); + EXPECT_THAT(*TU.build().getDiagnostics(), ElementsAre(UnusedFooWarning)) + << "Not unused function 'bar'!"; + + // -Werror doesn't apply to warnings enabled by clang-tidy extra args. + TU.ExtraArgs = {"-Werror"}; + TU.ClangTidyProvider = addClangArgs({"-Wunused"}); + EXPECT_THAT(*TU.build().getDiagnostics(), + ElementsAre(DiagSeverity(DiagnosticsEngine::Warning))); + + // But clang-tidy extra args won't *downgrade* errors to warnings either. + TU.ExtraArgs = {"-Wunused", "-Werror"}; + TU.ClangTidyProvider = addClangArgs({"-Wunused"}); + EXPECT_THAT(*TU.build().getDiagnostics(), + ElementsAre(DiagSeverity(DiagnosticsEngine::Error))); + + // FIXME: we're erroneously downgrading the whole group, this should be Error. + TU.ExtraArgs = {"-Wunused-function", "-Werror"}; + TU.ClangTidyProvider = addClangArgs({"-Wunused"}); + EXPECT_THAT(*TU.build().getDiagnostics(), + ElementsAre(DiagSeverity(DiagnosticsEngine::Warning))); + + // This looks silly, but it's the typical result if a warning is enabled by a + // high-level .clang-tidy file and disabled by a low-level one. + TU.ExtraArgs = {}; + TU.ClangTidyProvider = addClangArgs({"-Wunused", "-Wno-unused"}); + EXPECT_THAT(*TU.build().getDiagnostics(), IsEmpty()); + + // Overriding only works in the proper order. + TU.ClangTidyProvider = addClangArgs({"-Wno-unused", "-Wunused"}); + EXPECT_THAT(*TU.build().getDiagnostics(), SizeIs(1)); + + // More specific vs less-specific: match clang behavior + TU.ClangTidyProvider = addClangArgs({"-Wunused", "-Wno-unused-function"}); + EXPECT_THAT(*TU.build().getDiagnostics(), IsEmpty()); + TU.ClangTidyProvider = addClangArgs({"-Wunused-function", "-Wno-unused"}); + EXPECT_THAT(*TU.build().getDiagnostics(), IsEmpty()); + + // We do allow clang-tidy config to disable warnings from the compile command. + // It's unclear this is ideal, but it's hard to avoid. + TU.ExtraArgs = {"-Wunused"}; + TU.ClangTidyProvider = addClangArgs({"-Wno-unused"}); + EXPECT_THAT(*TU.build().getDiagnostics(), IsEmpty()); +} + TEST(DiagnosticTest, LongFixMessages) { // We limit the size of printed code. Annotations Source(R"cpp( From fe68088d44f760c7d3d8ee6735d396d97cb55478 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Sun, 2 Jan 2022 23:22:18 +0100 Subject: [PATCH 429/992] [clangd] Helper for determining member insertion point. To be used in D116490 and D116385, and an upcoming patch to generate C++ constructors. Differential Revision: https://reviews.llvm.org/D116502 --- clang-tools-extra/clangd/CMakeLists.txt | 1 + .../clangd/refactor/InsertionPoint.cpp | 157 +++++++++++++ .../clangd/refactor/InsertionPoint.h | 53 +++++ .../clangd/unittests/CMakeLists.txt | 1 + .../clangd/unittests/InsertionPointTests.cpp | 210 ++++++++++++++++++ 5 files changed, 422 insertions(+) create mode 100644 clang-tools-extra/clangd/refactor/InsertionPoint.cpp create mode 100644 clang-tools-extra/clangd/refactor/InsertionPoint.h create mode 100644 clang-tools-extra/clangd/unittests/InsertionPointTests.cpp diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index 056a3272ebd1..9c37cfe7b700 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -131,6 +131,7 @@ add_clang_library(clangDaemon index/dex/PostingList.cpp index/dex/Trigram.cpp + refactor/InsertionPoint.cpp refactor/Rename.cpp refactor/Tweak.cpp diff --git a/clang-tools-extra/clangd/refactor/InsertionPoint.cpp b/clang-tools-extra/clangd/refactor/InsertionPoint.cpp new file mode 100644 index 000000000000..ce5f3a5a1d5f --- /dev/null +++ b/clang-tools-extra/clangd/refactor/InsertionPoint.cpp @@ -0,0 +1,157 @@ +//===--- InsertionPoint.cpp - Where should we add new code? ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "refactor/InsertionPoint.h" +#include "support/Logger.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/Basic/SourceManager.h" + +namespace clang { +namespace clangd { +namespace { + +// Choose the decl to insert before, according to an anchor. +// Nullptr means insert at end of DC. +// None means no valid place to insert. +llvm::Optional insertionDecl(const DeclContext &DC, + const Anchor &A) { + bool LastMatched = false; + bool ReturnNext = false; + for (const auto *D : DC.decls()) { + if (D->isImplicit()) + continue; + if (ReturnNext) + return D; + + const Decl *NonTemplate = D; + if (auto *TD = llvm::dyn_cast(D)) + NonTemplate = TD->getTemplatedDecl(); + bool Matches = A.Match(NonTemplate); + dlog(" {0} {1} {2}", Matches, D->getDeclKindName(), D); + + switch (A.Direction) { + case Anchor::Above: + if (Matches && !LastMatched) { + // Special case: if "above" matches an access specifier, we actually + // want to insert below it! + if (llvm::isa(D)) { + ReturnNext = true; + continue; + } + return D; + } + break; + case Anchor::Below: + if (LastMatched && !Matches) + return D; + break; + } + + LastMatched = Matches; + } + if (ReturnNext || (LastMatched && A.Direction == Anchor::Below)) + return nullptr; + return llvm::None; +} + +SourceLocation beginLoc(const Decl &D) { + auto Loc = D.getBeginLoc(); + if (RawComment *Comment = D.getASTContext().getRawCommentForDeclNoCache(&D)) { + auto CommentLoc = Comment->getBeginLoc(); + if (CommentLoc.isValid() && Loc.isValid() && + D.getASTContext().getSourceManager().isBeforeInTranslationUnit( + CommentLoc, Loc)) + Loc = CommentLoc; + } + return Loc; +} + +bool any(const Decl *D) { return true; } + +SourceLocation endLoc(const DeclContext &DC) { + const Decl *D = llvm::cast(&DC); + if (auto *OCD = llvm::dyn_cast(D)) + return OCD->getAtEndRange().getBegin(); + return D->getEndLoc(); +} + +AccessSpecifier getAccessAtEnd(const CXXRecordDecl &C) { + AccessSpecifier Spec = (C.getTagKind() == TTK_Class ? AS_private : AS_public); + for (const auto *D : C.decls()) + if (const auto *ASD = llvm::dyn_cast(D)) + Spec = ASD->getAccess(); + return Spec; +} + +} // namespace + +SourceLocation insertionPoint(const DeclContext &DC, + llvm::ArrayRef Anchors) { + dlog("Looking for insertion point in {0}", DC.getDeclKindName()); + for (const auto &A : Anchors) { + dlog(" anchor ({0})", A.Direction == Anchor::Above ? "above" : "below"); + if (auto D = insertionDecl(DC, A)) { + dlog(" anchor matched before {0}", *D); + return *D ? beginLoc(**D) : endLoc(DC); + } + } + dlog("no anchor matched"); + return SourceLocation(); +} + +llvm::Expected +insertDecl(llvm::StringRef Code, const DeclContext &DC, + llvm::ArrayRef Anchors) { + auto Loc = insertionPoint(DC, Anchors); + // Fallback: insert at the end. + if (Loc.isInvalid()) + Loc = endLoc(DC); + const auto &SM = DC.getParentASTContext().getSourceManager(); + if (!SM.isWrittenInSameFile(Loc, cast(DC).getLocation())) + return error("{0} body in wrong file: {1}", DC.getDeclKindName(), + Loc.printToString(SM)); + return tooling::Replacement(SM, Loc, 0, Code); +} + +SourceLocation insertionPoint(const CXXRecordDecl &InClass, + std::vector Anchors, + AccessSpecifier Protection) { + for (auto &A : Anchors) + A.Match = [Inner(std::move(A.Match)), Protection](const Decl *D) { + return D->getAccess() == Protection && Inner(D); + }; + return insertionPoint(InClass, Anchors); +} + +llvm::Expected insertDecl(llvm::StringRef Code, + const CXXRecordDecl &InClass, + std::vector Anchors, + AccessSpecifier Protection) { + // Fallback: insert at the bottom of the relevant access section. + Anchors.push_back({any, Anchor::Below}); + auto Loc = insertionPoint(InClass, std::move(Anchors), Protection); + std::string CodeBuffer; + auto &SM = InClass.getASTContext().getSourceManager(); + // Fallback: insert at the end of the class. Check if protection matches! + if (Loc.isInvalid()) { + Loc = InClass.getBraceRange().getEnd(); + if (Protection != getAccessAtEnd(InClass)) { + CodeBuffer = (getAccessSpelling(Protection) + ":\n" + Code).str(); + Code = CodeBuffer; + } + } + if (!SM.isWrittenInSameFile(Loc, InClass.getLocation())) + return error("Class body in wrong file: {0}", Loc.printToString(SM)); + return tooling::Replacement(SM, Loc, 0, Code); +} + +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/refactor/InsertionPoint.h b/clang-tools-extra/clangd/refactor/InsertionPoint.h new file mode 100644 index 000000000000..eee158b77e1f --- /dev/null +++ b/clang-tools-extra/clangd/refactor/InsertionPoint.h @@ -0,0 +1,53 @@ +//===--- InsertionPoint.h - Where should we add new code? --------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/DeclCXX.h" +#include "clang/Basic/Specifiers.h" +#include "clang/Tooling/Core/Replacement.h" + +namespace clang { +namespace clangd { + +// An anchor describes where to insert code into a decl sequence. +// +// It allows inserting above or below a block of decls matching some criterion. +// For example, "insert after existing constructors". +struct Anchor { + // A predicate describing which decls are considered part of a block. + // Match need not handle TemplateDecls, which are unwrapped before matching. + std::function Match; + // Whether the insertion point should be before or after the matching block. + enum Dir { Above, Below } Direction = Below; +}; + +// Returns the point to insert a declaration according to Anchors. +// Anchors are tried in order. For each, the first matching location is chosen. +SourceLocation insertionPoint(const DeclContext &Ctx, + llvm::ArrayRef Anchors); + +// Returns an edit inserting Code inside Ctx. +// Location is chosen according to Anchors, falling back to the end of Ctx. +// Fails if the chosen insertion point is in a different file than Ctx itself. +llvm::Expected insertDecl(llvm::StringRef Code, + const DeclContext &Ctx, + llvm::ArrayRef Anchors); + +// Variant for C++ classes that ensures the right access control. +SourceLocation insertionPoint(const CXXRecordDecl &InClass, + std::vector Anchors, + AccessSpecifier Protection); + +// Variant for C++ classes that ensures the right access control. +// May insert a new access specifier if needed. +llvm::Expected insertDecl(llvm::StringRef Code, + const CXXRecordDecl &InClass, + std::vector Anchors, + AccessSpecifier Protection); + +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt index 3c17bcdbc17a..29d177435f48 100644 --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -62,6 +62,7 @@ add_unittest(ClangdUnitTests ClangdTests IndexActionTests.cpp IndexTests.cpp InlayHintTests.cpp + InsertionPointTests.cpp JSONTransportTests.cpp LoggerTests.cpp LSPBinderTests.cpp diff --git a/clang-tools-extra/clangd/unittests/InsertionPointTests.cpp b/clang-tools-extra/clangd/unittests/InsertionPointTests.cpp new file mode 100644 index 000000000000..2a2756a703ef --- /dev/null +++ b/clang-tools-extra/clangd/unittests/InsertionPointTests.cpp @@ -0,0 +1,210 @@ +//===-- InsertionPointTess.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Annotations.h" +#include "Protocol.h" +#include "SourceCode.h" +#include "TestTU.h" +#include "TestWorkspace.h" +#include "XRefs.h" +#include "refactor/InsertionPoint.h" +#include "clang/AST/DeclBase.h" +#include "llvm/Testing/Support/Error.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { +namespace { +using llvm::HasValue; + +TEST(InsertionPointTests, Generic) { + Annotations Code(R"cpp( + namespace ns { + $a^int a1; + $b^// leading comment + int b; + $c^int c1; // trailing comment + int c2; + $a2^int a2; + $end^}; + )cpp"); + + auto StartsWith = + [&](llvm::StringLiteral S) -> std::function { + return [S](const Decl *D) { + if (const auto *ND = llvm::dyn_cast(D)) + return llvm::StringRef(ND->getNameAsString()).startswith(S); + return false; + }; + }; + + auto AST = TestTU::withCode(Code.code()).build(); + auto &NS = cast(findDecl(AST, "ns")); + + // Test single anchors. + auto Point = [&](llvm::StringLiteral Prefix, Anchor::Dir Direction) { + auto Loc = insertionPoint(NS, {Anchor{StartsWith(Prefix), Direction}}); + return sourceLocToPosition(AST.getSourceManager(), Loc); + }; + EXPECT_EQ(Point("a", Anchor::Above), Code.point("a")); + EXPECT_EQ(Point("a", Anchor::Below), Code.point("b")); + EXPECT_EQ(Point("b", Anchor::Above), Code.point("b")); + EXPECT_EQ(Point("b", Anchor::Below), Code.point("c")); + EXPECT_EQ(Point("c", Anchor::Above), Code.point("c")); + EXPECT_EQ(Point("c", Anchor::Below), Code.point("a2")); + EXPECT_EQ(Point("", Anchor::Above), Code.point("a")); + EXPECT_EQ(Point("", Anchor::Below), Code.point("end")); + EXPECT_EQ(Point("no_match", Anchor::Below), Position{}); + + // Test anchor chaining. + auto Chain = [&](llvm::StringLiteral P1, llvm::StringLiteral P2) { + auto Loc = insertionPoint(NS, {Anchor{StartsWith(P1), Anchor::Above}, + Anchor{StartsWith(P2), Anchor::Above}}); + return sourceLocToPosition(AST.getSourceManager(), Loc); + }; + EXPECT_EQ(Chain("a", "b"), Code.point("a")); + EXPECT_EQ(Chain("b", "a"), Code.point("b")); + EXPECT_EQ(Chain("no_match", "a"), Code.point("a")); + + // Test edit generation. + auto Edit = insertDecl("foo;", NS, {Anchor{StartsWith("a"), Anchor::Below}}); + ASSERT_THAT_EXPECTED(Edit, llvm::Succeeded()); + EXPECT_EQ(offsetToPosition(Code.code(), Edit->getOffset()), Code.point("b")); + EXPECT_EQ(Edit->getReplacementText(), "foo;"); + // If no match, the edit is inserted at the end. + Edit = insertDecl("x;", NS, {Anchor{StartsWith("no_match"), Anchor::Below}}); + ASSERT_THAT_EXPECTED(Edit, llvm::Succeeded()); + EXPECT_EQ(offsetToPosition(Code.code(), Edit->getOffset()), + Code.point("end")); +} + +// For CXX, we should check: +// - special handling for access specifiers +// - unwrapping of template decls +TEST(InsertionPointTests, CXX) { + Annotations Code(R"cpp( + class C { + public: + $Method^void pubMethod(); + $Field^int PubField; + + $private^private: + $field^int PrivField; + $method^void privMethod(); + template void privTemplateMethod(); + $end^}; + )cpp"); + + auto AST = TestTU::withCode(Code.code()).build(); + const CXXRecordDecl &C = cast(findDecl(AST, "C")); + + auto IsMethod = [](const Decl *D) { return llvm::isa(D); }; + auto Any = [](const Decl *D) { return true; }; + + // Test single anchors. + auto Point = [&](Anchor A, AccessSpecifier Protection) { + auto Loc = insertionPoint(C, {A}, Protection); + return sourceLocToPosition(AST.getSourceManager(), Loc); + }; + EXPECT_EQ(Point({IsMethod, Anchor::Above}, AS_public), Code.point("Method")); + EXPECT_EQ(Point({IsMethod, Anchor::Below}, AS_public), Code.point("Field")); + EXPECT_EQ(Point({Any, Anchor::Above}, AS_public), Code.point("Method")); + EXPECT_EQ(Point({Any, Anchor::Below}, AS_public), Code.point("private")); + EXPECT_EQ(Point({IsMethod, Anchor::Above}, AS_private), Code.point("method")); + EXPECT_EQ(Point({IsMethod, Anchor::Below}, AS_private), Code.point("end")); + EXPECT_EQ(Point({Any, Anchor::Above}, AS_private), Code.point("field")); + EXPECT_EQ(Point({Any, Anchor::Below}, AS_private), Code.point("end")); + EXPECT_EQ(Point({IsMethod, Anchor::Above}, AS_protected), Position{}); + EXPECT_EQ(Point({IsMethod, Anchor::Below}, AS_protected), Position{}); + EXPECT_EQ(Point({Any, Anchor::Above}, AS_protected), Position{}); + EXPECT_EQ(Point({Any, Anchor::Below}, AS_protected), Position{}); + + // Edits when there's no match --> end of matching access control section. + auto Edit = insertDecl("x", C, {}, AS_public); + ASSERT_THAT_EXPECTED(Edit, llvm::Succeeded()); + EXPECT_EQ(offsetToPosition(Code.code(), Edit->getOffset()), + Code.point("private")); + + Edit = insertDecl("x", C, {}, AS_private); + ASSERT_THAT_EXPECTED(Edit, llvm::Succeeded()); + EXPECT_EQ(offsetToPosition(Code.code(), Edit->getOffset()), + Code.point("end")); + + Edit = insertDecl("x", C, {}, AS_protected); + ASSERT_THAT_EXPECTED(Edit, llvm::Succeeded()); + EXPECT_EQ(offsetToPosition(Code.code(), Edit->getOffset()), + Code.point("end")); + EXPECT_EQ(Edit->getReplacementText(), "protected:\nx"); +} + +MATCHER_P(replacementText, Text, "") { + if (arg.getReplacementText() != Text) { + *result_listener << "replacement is " << arg.getReplacementText().str(); + return false; + } + return true; +} + +TEST(InsertionPointTests, CXXAccessProtection) { + // Empty class uses default access. + auto AST = TestTU::withCode("struct S{};").build(); + const CXXRecordDecl &S = cast(findDecl(AST, "S")); + ASSERT_THAT_EXPECTED(insertDecl("x", S, {}, AS_public), + HasValue(replacementText("x"))); + ASSERT_THAT_EXPECTED(insertDecl("x", S, {}, AS_private), + HasValue(replacementText("private:\nx"))); + + // We won't insert above the first access specifier if there's nothing there. + AST = TestTU::withCode("struct T{private:};").build(); + const CXXRecordDecl &T = cast(findDecl(AST, "T")); + ASSERT_THAT_EXPECTED(insertDecl("x", T, {}, AS_public), + HasValue(replacementText("public:\nx"))); + ASSERT_THAT_EXPECTED(insertDecl("x", T, {}, AS_private), + HasValue(replacementText("x"))); + + // But we will if there are declarations. + AST = TestTU::withCode("struct U{int i;private:};").build(); + const CXXRecordDecl &U = cast(findDecl(AST, "U")); + ASSERT_THAT_EXPECTED(insertDecl("x", U, {}, AS_public), + HasValue(replacementText("x"))); + ASSERT_THAT_EXPECTED(insertDecl("x", U, {}, AS_private), + HasValue(replacementText("x"))); +} + +// In ObjC we need to take care to get the @end fallback right. +TEST(InsertionPointTests, ObjC) { + Annotations Code(R"objc( + @interface Foo + -(void) v; + $endIface^@end + @implementation Foo + -(void) v {} + $endImpl^@end + )objc"); + auto TU = TestTU::withCode(Code.code()); + TU.Filename = "TestTU.m"; + auto AST = TU.build(); + + auto &Impl = + cast(findDecl(AST, [&](const NamedDecl &D) { + return llvm::isa(D); + })); + auto &Iface = *Impl.getClassInterface(); + Anchor End{[](const Decl *) { return true; }, Anchor::Below}; + + const auto &SM = AST.getSourceManager(); + EXPECT_EQ(sourceLocToPosition(SM, insertionPoint(Iface, {End})), + Code.point("endIface")); + EXPECT_EQ(sourceLocToPosition(SM, insertionPoint(Impl, {End})), + Code.point("endImpl")); +} + +} // namespace +} // namespace clangd +} // namespace clang From b9ed95afc4b18f8e1e16305720c72ed4411a3f71 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 3 Jan 2022 17:01:27 +0000 Subject: [PATCH 430/992] [gn build] Port fe68088d44f7 --- llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn | 1 + .../gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn index 8e2cd1cf73df..42b74319e1cf 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn @@ -142,6 +142,7 @@ static_library("clangd") { "index/dex/Iterator.cpp", "index/dex/PostingList.cpp", "index/dex/Trigram.cpp", + "refactor/InsertionPoint.cpp", "refactor/Rename.cpp", "refactor/Tweak.cpp", ] diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn index 7f37a360efeb..826e1c7bbd8b 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn @@ -80,6 +80,7 @@ unittest("ClangdTests") { "IndexActionTests.cpp", "IndexTests.cpp", "InlayHintTests.cpp", + "InsertionPointTests.cpp", "JSONTransportTests.cpp", "LSPBinderTests.cpp", "LSPClient.cpp", From 478863ef58c7f7314e0669d332a90d6e233d44fb Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 17 Nov 2021 13:27:58 +0100 Subject: [PATCH 431/992] [clangd] Basic IncludeCleaner support for c/c++ standard library There are some limitations here, so this is behind a flag for now (in addition to the config setting for the overall feature). - symbols without exactly one associated header aren't handled right - no macro support - referencing std::size_t usually doesn't leave any trace in the AST that the alias in std was used, so we associate with stddef.h instead of cstddef. (An AST issue not specific to stdlib, but much worse there) Differential Revision: https://reviews.llvm.org/D114077 --- clang-tools-extra/clangd/Headers.cpp | 163 +++++++++++++++++- clang-tools-extra/clangd/Headers.h | 110 +++++++++++- clang-tools-extra/clangd/IncludeCleaner.cpp | 72 +++++--- clang-tools-extra/clangd/IncludeCleaner.h | 28 ++- clang-tools-extra/clangd/tool/ClangdMain.cpp | 11 ++ .../clangd/unittests/HeadersTests.cpp | 53 ++++++ .../clangd/unittests/IncludeCleanerTests.cpp | 90 +++++++++- 7 files changed, 487 insertions(+), 40 deletions(-) diff --git a/clang-tools-extra/clangd/Headers.cpp b/clang-tools-extra/clangd/Headers.cpp index 30cca7448be2..72da1be99283 100644 --- a/clang-tools-extra/clangd/Headers.cpp +++ b/clang-tools-extra/clangd/Headers.cpp @@ -61,10 +61,19 @@ class IncludeStructure::RecordHeaders : public PPCallbacks, SM.getLineNumber(SM.getFileID(HashLoc), Inc.HashOffset) - 1; Inc.FileKind = FileKind; Inc.Directive = IncludeTok.getIdentifierInfo()->getPPKeywordID(); - if (File) - Inc.HeaderID = static_cast(Out->getOrCreateID(File)); if (LastPragmaKeepInMainFileLine == Inc.HashLine) Inc.BehindPragmaKeep = true; + if (File) { + IncludeStructure::HeaderID HID = Out->getOrCreateID(File); + Inc.HeaderID = static_cast(HID); + if (IsAngled) + if (auto StdlibHeader = stdlib::Header::named(Inc.Written)) { + auto &IDs = Out->StdlibHeaders[*StdlibHeader]; + // Few physical files for one stdlib header name, linear scan is ok. + if (!llvm::is_contained(IDs, HID)) + IDs.push_back(HID); + } + } } // Record include graph (not just for main-file includes) @@ -340,5 +349,155 @@ bool operator==(const Inclusion &LHS, const Inclusion &RHS) { std::tie(RHS.Directive, RHS.FileKind, RHS.HashOffset, RHS.HashLine, RHS.Resolved, RHS.Written); } + +namespace stdlib { +static llvm::StringRef *HeaderNames; +static std::pair *SymbolNames; +static unsigned *SymbolHeaderIDs; +static llvm::DenseMap *HeaderIDs; +// Maps symbol name -> Symbol::ID, within a namespace. +using NSSymbolMap = llvm::DenseMap; +static llvm::DenseMap *NamespaceSymbols; + +static int initialize() { + unsigned SymCount = 0; +#define SYMBOL(Name, NS, Header) ++SymCount; +#include "CSymbolMap.inc" +#include "StdSymbolMap.inc" +#undef SYMBOL + SymbolNames = new std::remove_reference_t[SymCount]; + SymbolHeaderIDs = + new std::remove_reference_t[SymCount]; + NamespaceSymbols = new std::remove_reference_t; + HeaderIDs = new std::remove_reference_t; + + auto AddNS = [&](llvm::StringRef NS) -> NSSymbolMap & { + auto R = NamespaceSymbols->try_emplace(NS, nullptr); + if (R.second) + R.first->second = new NSSymbolMap(); + return *R.first->second; + }; + + auto AddHeader = [&](llvm::StringRef Header) -> unsigned { + return HeaderIDs->try_emplace(Header, HeaderIDs->size()).first->second; + }; + + auto Add = [&, SymIndex(0)](llvm::StringRef Name, llvm::StringRef NS, + llvm::StringRef HeaderName) mutable { + if (NS == "None") + NS = ""; + + SymbolNames[SymIndex] = {NS, Name}; + SymbolHeaderIDs[SymIndex] = AddHeader(HeaderName); + + NSSymbolMap &NSSymbols = AddNS(NS); + NSSymbols.try_emplace(Name, SymIndex); + + ++SymIndex; + }; +#define SYMBOL(Name, NS, Header) Add(#Name, #NS, #Header); +#include "CSymbolMap.inc" +#include "StdSymbolMap.inc" +#undef SYMBOL + + HeaderNames = new llvm::StringRef[HeaderIDs->size()]; + for (const auto &E : *HeaderIDs) + HeaderNames[E.second] = E.first; + + return 0; +} + +static void ensureInitialized() { + static int Dummy = initialize(); + (void)Dummy; +} + +llvm::Optional
Header::named(llvm::StringRef Name) { + ensureInitialized(); + auto It = HeaderIDs->find(Name); + if (It == HeaderIDs->end()) + return llvm::None; + return Header(It->second); +} +llvm::StringRef Header::name() const { return HeaderNames[ID]; } +llvm::StringRef Symbol::scope() const { return SymbolNames[ID].first; } +llvm::StringRef Symbol::name() const { return SymbolNames[ID].second; } +llvm::Optional Symbol::named(llvm::StringRef Scope, + llvm::StringRef Name) { + ensureInitialized(); + if (NSSymbolMap *NSSymbols = NamespaceSymbols->lookup(Scope)) { + auto It = NSSymbols->find(Name); + if (It != NSSymbols->end()) + return Symbol(It->second); + } + return llvm::None; +} +Header Symbol::header() const { return Header(SymbolHeaderIDs[ID]); } +llvm::SmallVector
Symbol::headers() const { + return {header()}; // FIXME: multiple in case of ambiguity +} + +Recognizer::Recognizer() { ensureInitialized(); } + +NSSymbolMap *Recognizer::namespaceSymbols(const NamespaceDecl *D) { + auto It = NamespaceCache.find(D); + if (It != NamespaceCache.end()) + return It->second; + + NSSymbolMap *Result = [&]() -> NSSymbolMap * { + if (!D) // Nullptr means the global namespace + return NamespaceSymbols->lookup(""); + if (D->isAnonymousNamespace()) + return nullptr; + if (D->isInlineNamespace()) { + if (auto *Parent = llvm::dyn_cast_or_null(D->getParent())) + return namespaceSymbols(Parent); + return nullptr; + } + return NamespaceSymbols->lookup(printNamespaceScope(*D)); + }(); + NamespaceCache.try_emplace(D, Result); + return Result; +} + +llvm::Optional Recognizer::operator()(const Decl *D) { + // If D is std::vector::iterator, `vector` is the outer symbol to look up. + // We keep all the candidate DCs as some may turn out to be anon enums. + // Do this resolution lazily as we may turn out not to have a std namespace. + llvm::SmallVector IntermediateDecl; + const DeclContext *DC = D->getDeclContext(); + while (DC && !DC->isNamespace()) { + if (NamedDecl::classofKind(DC->getDeclKind())) + IntermediateDecl.push_back(DC); + DC = DC->getParent(); + } + NSSymbolMap *Symbols = namespaceSymbols(cast_or_null(DC)); + if (!Symbols) + return llvm::None; + + llvm::StringRef Name = [&]() -> llvm::StringRef { + for (const auto *SymDC : llvm::reverse(IntermediateDecl)) { + DeclarationName N = cast(SymDC)->getDeclName(); + if (const auto *II = N.getAsIdentifierInfo()) + return II->getName(); + if (!N.isEmpty()) + return ""; // e.g. operator<: give up + } + if (const auto *ND = llvm::dyn_cast(D)) + if (const auto *II = ND->getIdentifier()) + return II->getName(); + return ""; + }(); + if (Name.empty()) + return llvm::None; + + auto It = Symbols->find(Name); + if (It == Symbols->end()) + return llvm::None; + return Symbol(It->second); +} + +} // namespace stdlib + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/Headers.h b/clang-tools-extra/clangd/Headers.h index c5f5746f2577..9612ce8def46 100644 --- a/clang-tools-extra/clangd/Headers.h +++ b/clang-tools-extra/clangd/Headers.h @@ -32,8 +32,78 @@ #include namespace clang { +class Decl; +class NamespaceDecl; namespace clangd { +// clangd has a built-in database of standard library symbols. +namespace stdlib { + +// A standard library header, such as +// Lightweight class, in fact just an index into a table. +class Header { +public: + static llvm::Optional
named(llvm::StringRef Name); + + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Header &H) { + return OS << H.name(); + } + llvm::StringRef name() const; + +private: + Header(unsigned ID) : ID(ID) {} + unsigned ID; + friend class Symbol; + friend llvm::DenseMapInfo
; + friend bool operator==(const Header &L, const Header &R) { + return L.ID == R.ID; + } +}; + +// A top-level standard library symbol, such as std::vector +// Lightweight class, in fact just an index into a table. +class Symbol { +public: + static llvm::Optional named(llvm::StringRef Scope, + llvm::StringRef Name); + + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S) { + return OS << S.scope() << S.name(); + } + llvm::StringRef scope() const; + llvm::StringRef name() const; + // The preferred header for this symbol (e.g. the suggested insertion). + Header header() const; + // Some symbols may be provided my multiple headers. + llvm::SmallVector
headers() const; + +private: + Symbol(unsigned ID) : ID(ID) {} + unsigned ID; + friend class Recognizer; + friend llvm::DenseMapInfo; + friend bool operator==(const Symbol &L, const Symbol &R) { + return L.ID == R.ID; + } +}; + +// A functor to find the stdlib::Symbol associated with a decl. +// +// For non-top-level decls (std::vector::iterator), returns the top-level +// symbol (std::vector). +class Recognizer { +public: + Recognizer(); + llvm::Optional operator()(const Decl *D); + +private: + using NSSymbolMap = llvm::DenseMap; + NSSymbolMap *namespaceSymbols(const NamespaceDecl *D); + llvm::DenseMap NamespaceCache; +}; + +} // namespace stdlib + /// Returns true if \p Include is literal include like "path" or . bool isLiteralInclude(llvm::StringRef Include); @@ -160,6 +230,8 @@ class IncludeStructure { // Maps HeaderID to the ids of the files included from it. llvm::DenseMap> IncludeChildren; + llvm::DenseMap> StdlibHeaders; + std::vector MainFileIncludes; // We reserve HeaderID(0) for the main file and will manually check for that @@ -250,13 +322,11 @@ namespace llvm { // Support HeaderIDs as DenseMap keys. template <> struct DenseMapInfo { static inline clang::clangd::IncludeStructure::HeaderID getEmptyKey() { - return static_cast( - DenseMapInfo::getEmptyKey()); + return static_cast(-1); } static inline clang::clangd::IncludeStructure::HeaderID getTombstoneKey() { - return static_cast( - DenseMapInfo::getTombstoneKey()); + return static_cast(-2); } static unsigned @@ -270,6 +340,38 @@ template <> struct DenseMapInfo { } }; +template <> struct DenseMapInfo { + static inline clang::clangd::stdlib::Header getEmptyKey() { + return clang::clangd::stdlib::Header(-1); + } + static inline clang::clangd::stdlib::Header getTombstoneKey() { + return clang::clangd::stdlib::Header(-2); + } + static unsigned getHashValue(const clang::clangd::stdlib::Header &H) { + return hash_value(H.ID); + } + static bool isEqual(const clang::clangd::stdlib::Header &LHS, + const clang::clangd::stdlib::Header &RHS) { + return LHS == RHS; + } +}; + +template <> struct DenseMapInfo { + static inline clang::clangd::stdlib::Symbol getEmptyKey() { + return clang::clangd::stdlib::Symbol(-1); + } + static inline clang::clangd::stdlib::Symbol getTombstoneKey() { + return clang::clangd::stdlib::Symbol(-2); + } + static unsigned getHashValue(const clang::clangd::stdlib::Symbol &S) { + return hash_value(S.ID); + } + static bool isEqual(const clang::clangd::stdlib::Symbol &LHS, + const clang::clangd::stdlib::Symbol &RHS) { + return LHS == RHS; + } +}; + } // namespace llvm #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H diff --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp index 9e51f5430be8..85ba59519e8a 100644 --- a/clang-tools-extra/clangd/IncludeCleaner.cpp +++ b/clang-tools-extra/clangd/IncludeCleaner.cpp @@ -26,6 +26,10 @@ namespace clang { namespace clangd { + +static bool AnalyzeStdlib = false; +void setIncludeCleanerAnalyzesStdlib(bool B) { AnalyzeStdlib = B; } + namespace { /// Crawler traverses the AST and feeds in the locations of (sometimes @@ -127,6 +131,10 @@ class ReferencedLocationCrawler void add(const Decl *D) { if (!D || !isNew(D->getCanonicalDecl())) return; + if (auto SS = StdRecognizer(D)) { + Result.Stdlib.insert(*SS); + return; + } // Special case RecordDecls, as it is common for them to be forward // declared multiple times. The most common cases are: // - Definition available in TU, only mark that one as usage. The rest is @@ -136,14 +144,14 @@ class ReferencedLocationCrawler // redecls. if (const auto *RD = llvm::dyn_cast(D)) { if (const auto *Definition = RD->getDefinition()) { - Result.insert(Definition->getLocation()); + Result.User.insert(Definition->getLocation()); return; } if (SM.isInMainFile(RD->getMostRecentDecl()->getLocation())) return; } for (const Decl *Redecl : D->redecls()) - Result.insert(Redecl->getLocation()); + Result.User.insert(Redecl->getLocation()); } bool isNew(const void *P) { return P && Visited.insert(P).second; } @@ -151,13 +159,14 @@ class ReferencedLocationCrawler ReferencedLocations &Result; llvm::DenseSet Visited; const SourceManager &SM; + stdlib::Recognizer StdRecognizer; }; // Given a set of referenced FileIDs, determines all the potentially-referenced // files and macros by traversing expansion/spelling locations of macro IDs. // This is used to map the referenced SourceLocations onto real files. -struct ReferencedFiles { - ReferencedFiles(const SourceManager &SM) : SM(SM) {} +struct ReferencedFilesBuilder { + ReferencedFilesBuilder(const SourceManager &SM) : SM(SM) {} llvm::DenseSet Files; llvm::DenseSet Macros; const SourceManager &SM; @@ -218,18 +227,23 @@ void findReferencedMacros(ParsedAST &AST, ReferencedLocations &Result) { continue; auto Loc = Macro->Info->getDefinitionLoc(); if (Loc.isValid()) - Result.insert(Loc); + Result.User.insert(Loc); + // FIXME: support stdlib macros } } -bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST) { +static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST) { + if (Inc.BehindPragmaKeep) + return false; + // FIXME(kirillbobyrev): We currently do not support the umbrella headers. - // Standard Library headers are typically umbrella headers, and system - // headers are likely to be the Standard Library headers. Until we have a - // good support for umbrella headers and Standard Library headers, don't warn - // about them. - if (Inc.Written.front() == '<' || Inc.BehindPragmaKeep) + // System headers are likely to be standard library headers. + // Until we have good support for umbrella headers, don't warn about them. + if (Inc.Written.front() == '<') { + if (AnalyzeStdlib && stdlib::Header::named(Inc.Written)) + return true; return false; + } // Headers without include guards have side effects and are not // self-contained, skip them. assert(Inc.HeaderID); @@ -282,29 +296,36 @@ ReferencedLocations findReferencedLocations(ParsedAST &AST) { return Result; } -llvm::DenseSet -findReferencedFiles(const llvm::DenseSet &Locs, - const IncludeStructure &Includes, const SourceManager &SM) { - std::vector Sorted{Locs.begin(), Locs.end()}; +ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs, + const IncludeStructure &Includes, + const SourceManager &SM) { + std::vector Sorted{Locs.User.begin(), Locs.User.end()}; llvm::sort(Sorted); // Group by FileID. - ReferencedFiles Files(SM); + ReferencedFilesBuilder Builder(SM); for (auto It = Sorted.begin(); It < Sorted.end();) { FileID FID = SM.getFileID(*It); - Files.add(FID, *It); + Builder.add(FID, *It); // Cheaply skip over all the other locations from the same FileID. // This avoids lots of redundant Loc->File lookups for the same file. do ++It; while (It != Sorted.end() && SM.isInFileID(*It, FID)); } + // If a header is not self-contained, we consider its symbols a logical part // of the including file. Therefore, mark the parents of all used // non-self-contained FileIDs as used. Perform this on FileIDs rather than // HeaderIDs, as each inclusion of a non-self-contained file is distinct. - llvm::DenseSet Result; - for (FileID ID : Files.Files) - Result.insert(headerResponsible(ID, SM, Includes)); - return Result; + llvm::DenseSet UserFiles; + for (FileID ID : Builder.Files) + UserFiles.insert(headerResponsible(ID, SM, Includes)); + + llvm::DenseSet StdlibFiles; + for (const auto &Symbol : Locs.Stdlib) + for (const auto &Header : Symbol.headers()) + StdlibFiles.insert(Header); + + return {std::move(UserFiles), std::move(StdlibFiles)}; } std::vector @@ -338,13 +359,13 @@ static bool isSpecialBuffer(FileID FID, const SourceManager &SM) { #endif llvm::DenseSet -translateToHeaderIDs(const llvm::DenseSet &Files, +translateToHeaderIDs(const ReferencedFiles &Files, const IncludeStructure &Includes, const SourceManager &SM) { trace::Span Tracer("IncludeCleaner::translateToHeaderIDs"); llvm::DenseSet TranslatedHeaderIDs; - TranslatedHeaderIDs.reserve(Files.size()); - for (FileID FID : Files) { + TranslatedHeaderIDs.reserve(Files.User.size()); + for (FileID FID : Files.User) { const FileEntry *FE = SM.getFileEntryForID(FID); if (!FE) { assert(isSpecialBuffer(FID, SM)); @@ -354,6 +375,9 @@ translateToHeaderIDs(const llvm::DenseSet &Files, assert(File); TranslatedHeaderIDs.insert(*File); } + for (stdlib::Header StdlibUsed : Files.Stdlib) + for (auto HID : Includes.StdlibHeaders.lookup(StdlibUsed)) + TranslatedHeaderIDs.insert(HID); return TranslatedHeaderIDs; } diff --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h index 368cc7032327..198de95ea2fd 100644 --- a/clang-tools-extra/clangd/IncludeCleaner.h +++ b/clang-tools-extra/clangd/IncludeCleaner.h @@ -30,7 +30,11 @@ namespace clang { namespace clangd { -using ReferencedLocations = llvm::DenseSet; +struct ReferencedLocations { + llvm::DenseSet User; + llvm::DenseSet Stdlib; +}; + /// Finds locations of all symbols used in the main file. /// /// - RecursiveASTVisitor finds references to symbols and records their @@ -48,17 +52,22 @@ using ReferencedLocations = llvm::DenseSet; /// - err on the side of reporting all possible locations ReferencedLocations findReferencedLocations(ParsedAST &AST); +struct ReferencedFiles { + llvm::DenseSet User; + llvm::DenseSet Stdlib; +}; + /// Retrieves IDs of all files containing SourceLocations from \p Locs. /// The output only includes things SourceManager sees as files (not macro IDs). /// This can include , etc that are not true files. -llvm::DenseSet findReferencedFiles(const ReferencedLocations &Locs, - const IncludeStructure &Includes, - const SourceManager &SM); +ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs, + const IncludeStructure &Includes, + const SourceManager &SM); /// Maps FileIDs to the internal IncludeStructure representation (HeaderIDs). /// FileIDs that are not true files ( etc) are dropped. llvm::DenseSet -translateToHeaderIDs(const llvm::DenseSet &Files, +translateToHeaderIDs(const ReferencedFiles &Files, const IncludeStructure &Includes, const SourceManager &SM); /// Retrieves headers that are referenced from the main file but not used. @@ -72,6 +81,15 @@ std::vector computeUnusedIncludes(ParsedAST &AST); std::vector issueUnusedIncludesDiagnostics(ParsedAST &AST, llvm::StringRef Code); +/// Affects whether standard library includes should be considered for removal. +/// This is off by default for now due to implementation limitations: +/// - macros are not tracked +/// - symbol names without a unique associated header are not tracked +/// - references to std-namespaced C types are not properly tracked: +/// instead of std::size_t -> we see ::size_t -> +/// FIXME: remove this hack once the implementation is good enough. +void setIncludeCleanerAnalyzesStdlib(bool B); + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 08631a31eda6..ba38aeed87a1 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -12,6 +12,7 @@ #include "Config.h" #include "ConfigProvider.h" #include "Feature.h" +#include "IncludeCleaner.h" #include "PathMapping.h" #include "Protocol.h" #include "TidyProvider.h" @@ -251,6 +252,15 @@ opt HeaderInsertion{ "Never insert #include directives as part of code completion")), }; +opt IncludeCleanerStdlib{ + "include-cleaner-stdlib", + cat(Features), + desc("Apply include-cleaner analysis to standard library headers " + "(immature!)"), + init(false), + Hidden, +}; + opt HeaderInsertionDecorators{ "header-insertion-decorators", cat(Features), @@ -932,6 +942,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var }; if (ForceOffsetEncoding != OffsetEncoding::UnsupportedEncoding) Opts.Encoding = ForceOffsetEncoding; + setIncludeCleanerAnalyzesStdlib(IncludeCleanerStdlib); if (CheckFile.getNumOccurrences()) { llvm::SmallString<256> Path; diff --git a/clang-tools-extra/clangd/unittests/HeadersTests.cpp b/clang-tools-extra/clangd/unittests/HeadersTests.cpp index 22caa59e3320..738a2fc18b2d 100644 --- a/clang-tools-extra/clangd/unittests/HeadersTests.cpp +++ b/clang-tools-extra/clangd/unittests/HeadersTests.cpp @@ -20,6 +20,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Path.h" +#include "llvm/Support/ScopedPrinter.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -408,6 +409,58 @@ void foo(); EXPECT_FALSE(Includes.isSelfContained(getID("pp_depend.h", Includes))); } +TEST(StdlibTest, All) { + auto VectorH = stdlib::Header::named(""); + EXPECT_TRUE(VectorH); + EXPECT_EQ(llvm::to_string(*VectorH), ""); + EXPECT_FALSE(stdlib::Header::named("HeadersTests.cpp")); + + auto Vector = stdlib::Symbol::named("std::", "vector"); + EXPECT_TRUE(Vector); + EXPECT_EQ(llvm::to_string(*Vector), "std::vector"); + EXPECT_FALSE(stdlib::Symbol::named("std::", "dongle")); + EXPECT_FALSE(stdlib::Symbol::named("clang::", "ASTContext")); + + EXPECT_EQ(Vector->header(), *VectorH); + EXPECT_THAT(Vector->headers(), ElementsAre(*VectorH)); +} + +TEST(StdlibTest, Recognizer) { + auto TU = TestTU::withCode(R"cpp( + namespace std { + inline namespace inl { + + template + struct vector { class nested {}; }; + + class secret {}; + + } // inl + } // std + + class vector {}; + std::vector vec; + std::vector::nested nest; + std::secret sec; + )cpp"); + + auto AST = TU.build(); + auto &vector_nonstd = findDecl(AST, "vector"); + auto *vec = + cast(findDecl(AST, "vec")).getType()->getAsCXXRecordDecl(); + auto *nest = + cast(findDecl(AST, "nest")).getType()->getAsCXXRecordDecl(); + auto *sec = + cast(findDecl(AST, "sec")).getType()->getAsCXXRecordDecl(); + + stdlib::Recognizer recognizer; + + EXPECT_EQ(recognizer(&vector_nonstd), llvm::None); + EXPECT_EQ(recognizer(vec), stdlib::Symbol::named("std::", "vector")); + EXPECT_EQ(recognizer(nest), stdlib::Symbol::named("std::", "vector")); + EXPECT_EQ(recognizer(sec), llvm::None); +} + } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp index 12bcc9440f2d..b7792ca6f90d 100644 --- a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp +++ b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp @@ -8,7 +8,9 @@ #include "Annotations.h" #include "IncludeCleaner.h" +#include "SourceCode.h" #include "TestTU.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/Testing/Support/SupportHelpers.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -18,7 +20,9 @@ namespace clangd { namespace { using ::testing::ElementsAre; +using ::testing::ElementsAreArray; using ::testing::IsEmpty; +using ::testing::Pointee; using ::testing::UnorderedElementsAre; std::string guard(llvm::StringRef Code) { @@ -211,7 +215,7 @@ TEST(IncludeCleaner, ReferencedLocations) { auto AST = TU.build(); std::vector Points; - for (const auto &Loc : findReferencedLocations(AST)) { + for (const auto &Loc : findReferencedLocations(AST).User) { if (AST.getSourceManager().getBufferName(Loc).endswith( TU.HeaderFilename)) { Points.push_back(offsetToPosition( @@ -225,6 +229,82 @@ TEST(IncludeCleaner, ReferencedLocations) { } } +TEST(IncludeCleaner, Stdlib) { + // Smoke tests only for finding used symbols/headers. + // Details of Decl -> stdlib::Symbol -> stdlib::Headers mapping tested there. + auto TU = TestTU::withHeaderCode(R"cpp( + namespace std { class error_code {}; } + class error_code {}; + namespace nonstd { class error_code {}; } + )cpp"); + struct { + llvm::StringRef Code; + std::vector Symbols; + std::vector Headers; + } Tests[] = { + {"std::error_code x;", {"std::error_code"}, {""}}, + {"error_code x;", {}, {}}, + {"nonstd::error_code x;", {}, {}}, + }; + + for (const auto &Test : Tests) { + TU.Code = Test.Code.str(); + ParsedAST AST = TU.build(); + std::vector WantSyms; + for (const auto &SymName : Test.Symbols) { + auto QName = splitQualifiedName(SymName); + auto Sym = stdlib::Symbol::named(QName.first, QName.second); + EXPECT_TRUE(Sym) << SymName; + WantSyms.push_back(*Sym); + } + std::vector WantHeaders; + for (const auto &HeaderName : Test.Headers) { + auto Header = stdlib::Header::named(HeaderName); + EXPECT_TRUE(Header) << HeaderName; + WantHeaders.push_back(*Header); + } + + ReferencedLocations Locs = findReferencedLocations(AST); + EXPECT_THAT(Locs.Stdlib, ElementsAreArray(WantSyms)); + ReferencedFiles Files = findReferencedFiles(Locs, AST.getIncludeStructure(), + AST.getSourceManager()); + EXPECT_THAT(Files.Stdlib, ElementsAreArray(WantHeaders)); + } +} + +MATCHER_P(WrittenInclusion, Written, "") { + if (arg.Written != Written) + *result_listener << arg.Written; + return arg.Written == Written; +} + +TEST(IncludeCleaner, StdlibUnused) { + setIncludeCleanerAnalyzesStdlib(true); + auto Cleanup = + llvm::make_scope_exit([] { setIncludeCleanerAnalyzesStdlib(false); }); + + auto TU = TestTU::withCode(R"cpp( + #include + #include + std::list x; + )cpp"); + // Layout of std library impl is not relevant. + TU.AdditionalFiles["bits"] = R"cpp( + #pragma once + namespace std { + template class list {}; + template class queue {}; + } + )cpp"; + TU.AdditionalFiles["list"] = "#include "; + TU.AdditionalFiles["queue"] = "#include "; + TU.ExtraArgs = {"-isystem", testRoot()}; + auto AST = TU.build(); + + auto Unused = computeUnusedIncludes(AST); + EXPECT_THAT(Unused, ElementsAre(Pointee(WrittenInclusion("")))); +} + TEST(IncludeCleaner, GetUnusedHeaders) { llvm::StringLiteral MainFile = R"cpp( #include "a.h" @@ -301,7 +381,7 @@ TEST(IncludeCleaner, VirtualBuffers) { auto ReferencedFiles = findReferencedFiles(findReferencedLocations(AST), Includes, SM); llvm::StringSet<> ReferencedFileNames; - for (FileID FID : ReferencedFiles) + for (FileID FID : ReferencedFiles.User) ReferencedFileNames.insert( SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename()); // Note we deduped the names as _number_ of s is uninteresting. @@ -352,7 +432,7 @@ TEST(IncludeCleaner, DistinctUnguardedInclusions) { AST.getIncludeStructure(), AST.getSourceManager()); llvm::StringSet<> ReferencedFileNames; auto &SM = AST.getSourceManager(); - for (FileID FID : ReferencedFiles) + for (FileID FID : ReferencedFiles.User) ReferencedFileNames.insert( SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename()); // Note that we have uplifted the referenced files from non self-contained @@ -386,7 +466,7 @@ TEST(IncludeCleaner, NonSelfContainedHeaders) { AST.getIncludeStructure(), AST.getSourceManager()); llvm::StringSet<> ReferencedFileNames; auto &SM = AST.getSourceManager(); - for (FileID FID : ReferencedFiles) + for (FileID FID : ReferencedFiles.User) ReferencedFileNames.insert( SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename()); // Note that we have uplifted the referenced files from non self-contained @@ -406,7 +486,7 @@ TEST(IncludeCleaner, IWYUPragmas) { auto ReferencedFiles = findReferencedFiles(findReferencedLocations(AST), AST.getIncludeStructure(), AST.getSourceManager()); - EXPECT_TRUE(ReferencedFiles.empty()); + EXPECT_TRUE(ReferencedFiles.User.empty()); EXPECT_THAT(AST.getDiagnostics(), llvm::ValueIs(IsEmpty())); } From 378b0ac17984edc1a040e3a084cb80af78075bda Mon Sep 17 00:00:00 2001 From: RitanyaB Date: Mon, 3 Jan 2022 11:22:50 -0600 Subject: [PATCH 432/992] SIGSEGV in ompt_tsan_dependences with for-ordered Segmentation fault in ompt_tsan_dependences function due to an unchecked NULL pointer dereference is as follows: ``` ThreadSanitizer:DEADLYSIGNAL ==140865==ERROR: ThreadSanitizer: SEGV on unknown address 0x000000000050 (pc 0x7f217c2d3652 bp 0x7ffe8cfc7e00 sp 0x7ffe8cfc7d90 T140865) ==140865==The signal is caused by a READ memory access. ==140865==Hint: address points to the zero page. /usr/bin/addr2line: DWARF error: could not find variable specification at offset 1012a /usr/bin/addr2line: DWARF error: could not find variable specification at offset 133b5 /usr/bin/addr2line: DWARF error: could not find variable specification at offset 1371a /usr/bin/addr2line: DWARF error: could not find variable specification at offset 13a58 #0 ompt_tsan_dependences(ompt_data_t*, ompt_dependence_t const*, int) /ptmp/bhararit/llvm-project/openmp/tools/archer/ompt-tsan.cpp:1004 (libarcher.so+0x15652) #1 __kmpc_doacross_post /ptmp/bhararit/llvm-project/openmp/runtime/src/kmp_csupport.cpp:4280 (libomp.so+0x74d98) #2 .omp_outlined. for_ordered_01.c:? (for_ordered_01.exe+0x5186cb) #3 __kmp_invoke_microtask /ptmp/bhararit/llvm-project/openmp/runtime/src/z_Linux_asm.S:1166 (libomp.so+0x14e592) #4 __kmp_invoke_task_func /ptmp/bhararit/llvm-project/openmp/runtime/src/kmp_runtime.cpp:7556 (libomp.so+0x909ad) #5 __kmp_fork_call /ptmp/bhararit/llvm-project/openmp/runtime/src/kmp_runtime.cpp:2284 (libomp.so+0x8461a) #6 __kmpc_fork_call /ptmp/bhararit/llvm-project/openmp/runtime/src/kmp_csupport.cpp:308 (libomp.so+0x6db55) #7 main ??:? (for_ordered_01.exe+0x51828f) #8 __libc_start_main ??:? (libc.so.6+0x24349) #9 _start /home/abuild/rpmbuild/BUILD/glibc-2.26/csu/../sysdeps/x86_64/start.S:120 (for_ordered_01.exe+0x4214e9) ThreadSanitizer can not provide additional info. SUMMARY: ThreadSanitizer: SEGV /ptmp/bhararit/llvm-project/openmp/tools/archer/ompt-tsan.cpp:1004 in ompt_tsan_dependences(ompt_data_t*, ompt_dependence_t const*, int) ==140865==ABORTING ``` To reproduce the error, use the following openmp code snippet: ``` /* initialise testMatrixInt Matrix, cols, r and c */ #pragma omp parallel private(r,c) shared(testMatrixInt) { #pragma omp for ordered(2) for (r=1; r < rows; r++) { for (c=1; c < cols; c++) { #pragma omp ordered depend(sink:r-1, c+1) depend(sink:r-1,c-1) testMatrixInt[r][c] = (testMatrixInt[r-1][c] + testMatrixInt[r-1][c-1]) % cols ; #pragma omp ordered depend (source) } } } ``` Compilation: ``` clang -g -stdlib=libc++ -fsanitize=thread -fopenmp -larcher test_case.c ``` It seems like the changes introduced by the commit https://reviews.llvm.org/D114005 causes this particular SEGV while using Archer. Reviewed By: protze.joachim Differential Revision: https://reviews.llvm.org/D115328 --- openmp/tools/archer/ompt-tsan.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/openmp/tools/archer/ompt-tsan.cpp b/openmp/tools/archer/ompt-tsan.cpp index bcd8417aa789..7e384ddfeaa1 100644 --- a/openmp/tools/archer/ompt-tsan.cpp +++ b/openmp/tools/archer/ompt-tsan.cpp @@ -1001,6 +1001,10 @@ static void ompt_tsan_dependences(ompt_data_t *task_data, if (ndeps > 0) { // Copy the data to use it in task_switch and task_end. TaskData *Data = ToTaskData(task_data); + if (!Data->Parent) { + // Return since doacross dependences are not supported yet. + return; + } if (!Data->Parent->DependencyMap) Data->Parent->DependencyMap = new std::unordered_map(); From 89f4a18f371d813fc65d04945f02590235c30d9c Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 3 Jan 2022 12:27:04 -0500 Subject: [PATCH 433/992] [libc++][CI] Temporarily disable several Linux jobs There is an ongoing CI outage with our Linux nodes, so I temporarily set up a couple of nodes. These nodes will be much slower than the usual ones and there's only a few of them, so I am temporarily disabling most of our CI to keep things working. --- libcxx/utils/ci/buildkite-pipeline.yml | 636 +++++++++++++------------ 1 file changed, 319 insertions(+), 317 deletions(-) diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml index f72d26d53452..11ad8b442047 100644 --- a/libcxx/utils/ci/buildkite-pipeline.yml +++ b/libcxx/utils/ci/buildkite-pipeline.yml @@ -160,152 +160,154 @@ steps: # - wait - # Tests with the supported compilers. - - label: "GCC 11 / C++11" - command: "libcxx/utils/ci/run-buildbot generic-gcc-cxx11" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - - label: "Clang 12" - command: "libcxx/utils/ci/run-buildbot generic-clang-12" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - - label: "Clang 13" - command: "libcxx/utils/ci/run-buildbot generic-clang-13" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - # Tests with the sanitizers. - - label: "ASAN" - command: "libcxx/utils/ci/run-buildbot generic-asan" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - - label: "TSAN" - command: "libcxx/utils/ci/run-buildbot generic-tsan" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - - label: "UBSAN" - command: "libcxx/utils/ci/run-buildbot generic-ubsan" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - # Tests with the various supported ways to build libc++. - - label: "Bootstrapping build" - command: "libcxx/utils/ci/run-buildbot bootstrapping-build" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - - label: "Legacy Lit configuration" - command: "libcxx/utils/ci/run-buildbot legacy-test-config" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - - label: "Legacy standalone build" - command: "libcxx/utils/ci/run-buildbot legacy-standalone" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - - label: "Legacy LLVM_ENABLE_PROJECTS build" - command: "libcxx/utils/ci/run-buildbot legacy-project-build" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - # Tests with various build configurations. - - label: "-fno-exceptions" - command: "libcxx/utils/ci/run-buildbot generic-noexceptions" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 + # TODO: Due to ongoing CI outage on our Linux nodes, most configurations running on Linux + # are disabled. We are currently running off of a much smaller fleet than normally. + # # Tests with the supported compilers. + # - label: "GCC 11 / C++11" + # command: "libcxx/utils/ci/run-buildbot generic-gcc-cxx11" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # - label: "Clang 12" + # command: "libcxx/utils/ci/run-buildbot generic-clang-12" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # - label: "Clang 13" + # command: "libcxx/utils/ci/run-buildbot generic-clang-13" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # # Tests with the sanitizers. + # - label: "ASAN" + # command: "libcxx/utils/ci/run-buildbot generic-asan" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # - label: "TSAN" + # command: "libcxx/utils/ci/run-buildbot generic-tsan" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # - label: "UBSAN" + # command: "libcxx/utils/ci/run-buildbot generic-ubsan" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # # Tests with the various supported ways to build libc++. + # - label: "Bootstrapping build" + # command: "libcxx/utils/ci/run-buildbot bootstrapping-build" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # - label: "Legacy Lit configuration" + # command: "libcxx/utils/ci/run-buildbot legacy-test-config" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # - label: "Legacy standalone build" + # command: "libcxx/utils/ci/run-buildbot legacy-standalone" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # - label: "Legacy LLVM_ENABLE_PROJECTS build" + # command: "libcxx/utils/ci/run-buildbot legacy-project-build" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # # Tests with various build configurations. + # - label: "-fno-exceptions" + # command: "libcxx/utils/ci/run-buildbot generic-noexceptions" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 - label: "Modular build" command: "libcxx/utils/ci/run-buildbot generic-modules" @@ -320,177 +322,177 @@ steps: limit: 2 timeout_in_minutes: 120 - - label: "Static libraries" - command: "libcxx/utils/ci/run-buildbot generic-static" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - - label: "Assertions enabled" - command: "libcxx/utils/ci/run-buildbot generic-assertions" - artifact_paths: - - "**/test-results.xml" - - "**/*.abilist" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - - label: "Debug iterators" - command: "libcxx/utils/ci/run-buildbot generic-debug-iterators" - artifact_paths: - - "**/test-results.xml" - - "**/*.abilist" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - - label: "With LLVM's libunwind" - command: "libcxx/utils/ci/run-buildbot generic-with_llvm_unwinder" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - - label: "Single-threaded" - command: "libcxx/utils/ci/run-buildbot generic-singlethreaded" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - - label: "No debug mode" - command: "libcxx/utils/ci/run-buildbot generic-no-debug" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - - label: "No filesystem" - command: "libcxx/utils/ci/run-buildbot generic-no-filesystem" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - - label: "No random device" - command: "libcxx/utils/ci/run-buildbot generic-no-random_device" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - - label: "No locale" - command: "libcxx/utils/ci/run-buildbot generic-no-localization" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - - label: "No Unicode" - command: "libcxx/utils/ci/run-buildbot generic-no-unicode" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - - label: "No wide characters" - command: "libcxx/utils/ci/run-buildbot generic-no-wide-characters" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - # Other non-testing CI jobs - - label: "Benchmarks" - command: "libcxx/utils/ci/run-buildbot benchmarks" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 - - - label: "Documentation" - command: "libcxx/utils/ci/run-buildbot documentation" - artifact_paths: - - "**/test-results.xml" - agents: - queue: "libcxx-builders" - os: "linux" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 2 - timeout_in_minutes: 120 + # - label: "Static libraries" + # command: "libcxx/utils/ci/run-buildbot generic-static" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # - label: "Assertions enabled" + # command: "libcxx/utils/ci/run-buildbot generic-assertions" + # artifact_paths: + # - "**/test-results.xml" + # - "**/*.abilist" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # - label: "Debug iterators" + # command: "libcxx/utils/ci/run-buildbot generic-debug-iterators" + # artifact_paths: + # - "**/test-results.xml" + # - "**/*.abilist" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # - label: "With LLVM's libunwind" + # command: "libcxx/utils/ci/run-buildbot generic-with_llvm_unwinder" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # - label: "Single-threaded" + # command: "libcxx/utils/ci/run-buildbot generic-singlethreaded" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # - label: "No debug mode" + # command: "libcxx/utils/ci/run-buildbot generic-no-debug" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # - label: "No filesystem" + # command: "libcxx/utils/ci/run-buildbot generic-no-filesystem" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # - label: "No random device" + # command: "libcxx/utils/ci/run-buildbot generic-no-random_device" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # - label: "No locale" + # command: "libcxx/utils/ci/run-buildbot generic-no-localization" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # - label: "No Unicode" + # command: "libcxx/utils/ci/run-buildbot generic-no-unicode" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # - label: "No wide characters" + # command: "libcxx/utils/ci/run-buildbot generic-no-wide-characters" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # # Other non-testing CI jobs + # - label: "Benchmarks" + # command: "libcxx/utils/ci/run-buildbot benchmarks" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 + + # - label: "Documentation" + # command: "libcxx/utils/ci/run-buildbot documentation" + # artifact_paths: + # - "**/test-results.xml" + # agents: + # queue: "libcxx-builders" + # os: "linux" + # retry: + # automatic: + # - exit_status: -1 # Agent was lost + # limit: 2 + # timeout_in_minutes: 120 # Tests on non-Unix platforms - label: "Clang-cl (DLL)" From f6e90fac35553be15829a114595ab042335d914f Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 3 Jan 2022 09:39:26 -0800 Subject: [PATCH 434/992] Remove loop invariant exit conditions from tests in advance of D116496 Reviewer suggested this was more in spirit of the original tests. --- llvm/test/Transforms/LoopUnroll/pr31718.ll | 27 ++- .../runtime-loop-multiexit-dom-verify.ll | 198 +++++++++++------- 2 files changed, 145 insertions(+), 80 deletions(-) diff --git a/llvm/test/Transforms/LoopUnroll/pr31718.ll b/llvm/test/Transforms/LoopUnroll/pr31718.ll index 15dca4845248..be25809b410c 100644 --- a/llvm/test/Transforms/LoopUnroll/pr31718.ll +++ b/llvm/test/Transforms/LoopUnroll/pr31718.ll @@ -6,8 +6,9 @@ target triple = "x86_64-unknown-linux-gnu" @b = external local_unnamed_addr global i32, align 4 +declare i1 @unknown(i32) readonly nounwind willreturn -define void @main(i1 %c) local_unnamed_addr #0 { +define void @main() local_unnamed_addr #0 { ; CHECK-LABEL: @main( ; CHECK-NEXT: ph1: ; CHECK-NEXT: br label [[H1:%.*]] @@ -19,21 +20,26 @@ define void @main(i1 %c) local_unnamed_addr #0 { ; CHECK: h2: ; CHECK-NEXT: br label [[H3:%.*]] ; CHECK: h3: -; CHECK-NEXT: br i1 [[C:%.*]], label [[LATCH3:%.*]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: [[C1:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1]], label [[LATCH3:%.*]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: latch3: -; CHECK-NEXT: br i1 false, label [[EXIT3:%.*]], label [[H3]] +; CHECK-NEXT: [[C2:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2]], label [[EXIT3:%.*]], label [[H3]] ; CHECK: exit3: ; CHECK-NEXT: br label [[LATCH2:%.*]] ; CHECK: latch2: ; CHECK-NEXT: br label [[H3_1:%.*]] ; CHECK: h3.1: -; CHECK-NEXT: br i1 [[C]], label [[LATCH3_1:%.*]], label [[EXIT_LOOPEXIT1:%.*]] +; CHECK-NEXT: [[C1_1:%.*]] = call i1 @unknown(i32 1) +; CHECK-NEXT: br i1 [[C1_1]], label [[LATCH3_1:%.*]], label [[EXIT_LOOPEXIT1:%.*]] ; CHECK: latch3.1: -; CHECK-NEXT: br i1 false, label [[EXIT3_1:%.*]], label [[H3_1]] +; CHECK-NEXT: [[C2_1:%.*]] = call i1 @unknown(i32 1) +; CHECK-NEXT: br i1 [[C2_1]], label [[EXIT3_1:%.*]], label [[H3_1]] ; CHECK: exit3.1: ; CHECK-NEXT: br label [[LATCH2_1:%.*]] ; CHECK: latch2.1: -; CHECK-NEXT: br i1 [[C]], label [[LATCH1]], label [[PH2]] +; CHECK-NEXT: [[C3:%.*]] = call i1 @unknown(i32 [[D_0]]) +; CHECK-NEXT: br i1 [[C3]], label [[LATCH1]], label [[PH2]] ; CHECK: latch1: ; CHECK-NEXT: [[TMP0]] = load i32, i32* @b, align 4 ; CHECK-NEXT: br label [[H1]] @@ -62,10 +68,12 @@ h2: br label %h3 h3: - br i1 %c, label %latch3, label %exit + %c1 = call i1 @unknown(i32 %0) + br i1 %c1, label %latch3, label %exit latch3: - br i1 false, label %exit3, label %h3 + %c2 = call i1 @unknown(i32 %0) + br i1 %c2, label %exit3, label %h3 exit3: br label %latch2 @@ -76,7 +84,8 @@ latch2: br i1 %cmp, label %h2, label %exit2 exit2: - br i1 %c, label %latch1, label %ph2 + %c3 = call i1 @unknown(i32 %d.0) + br i1 %c3, label %latch1, label %ph2 latch1: ; preds = %exit2 %1 = load i32, i32* @b, align 4 diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll index f8dab0fa2c3c..d8ad6fe3a831 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll @@ -7,6 +7,8 @@ ; We explicitly set the unroll count so that expensiveTripCount computation is allowed. +declare i1 @unknown(i32) readonly nounwind willreturn + ; mergedexit block has edges from loop exit blocks. define i64 @test1() { ; CHECK-LABEL: @test1( @@ -366,7 +368,7 @@ otherexit: ; preds = %exiting ; exit block (%exitB) has an exiting block and another exit block as predecessors. ; exiting block comes from inner loop. -define void @test5(i1 %c) { +define void @test5() { ; CHECK-LABEL: @test5( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP:%.*]] = icmp sgt i32 undef, 79 @@ -380,27 +382,35 @@ define void @test5(i1 %c) { ; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i32 [ 0, [[OUTERH_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[OUTERLATCH_PROL]] ] ; CHECK-NEXT: br label [[INNERH_PROL:%.*]] ; CHECK: innerH.prol: -; CHECK-NEXT: br i1 [[C:%.*]], label [[INNEREXITING_PROL:%.*]], label [[OTHEREXITB_LOOPEXIT1:%.*]] +; CHECK-NEXT: [[C1_PROL:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_PROL]], label [[INNEREXITING_PROL:%.*]], label [[OTHEREXITB_LOOPEXIT1:%.*]] ; CHECK: innerexiting.prol: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_PROL:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT2:%.*]] +; CHECK-NEXT: [[C2_PROL:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_PROL]], label [[INNERLATCH_PROL:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT2:%.*]] ; CHECK: innerLatch.prol: ; CHECK-NEXT: br i1 false, label [[INNERH_1_PROL:%.*]], label [[OUTERLATCH_PROL]] ; CHECK: innerH.1.prol: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_1_PROL:%.*]], label [[OTHEREXITB_LOOPEXIT1]] +; CHECK-NEXT: [[C1_1_PROL:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_1_PROL]], label [[INNEREXITING_1_PROL:%.*]], label [[OTHEREXITB_LOOPEXIT1]] ; CHECK: innerexiting.1.prol: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_1_PROL:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT2]] +; CHECK-NEXT: [[C2_1_PROL:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_1_PROL]], label [[INNERLATCH_1_PROL:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT2]] ; CHECK: innerLatch.1.prol: ; CHECK-NEXT: br i1 false, label [[INNERH_2_PROL:%.*]], label [[OUTERLATCH_PROL]] ; CHECK: innerH.2.prol: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_2_PROL:%.*]], label [[OTHEREXITB_LOOPEXIT1]] +; CHECK-NEXT: [[C1_2_PROL:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_2_PROL]], label [[INNEREXITING_2_PROL:%.*]], label [[OTHEREXITB_LOOPEXIT1]] ; CHECK: innerexiting.2.prol: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_2_PROL:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT2]] +; CHECK-NEXT: [[C2_2_PROL:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_2_PROL]], label [[INNERLATCH_2_PROL:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT2]] ; CHECK: innerLatch.2.prol: ; CHECK-NEXT: br i1 false, label [[INNERH_3_PROL:%.*]], label [[OUTERLATCH_PROL]] ; CHECK: innerH.3.prol: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_3_PROL:%.*]], label [[OTHEREXITB_LOOPEXIT1]] +; CHECK-NEXT: [[C1_3_PROL:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_3_PROL]], label [[INNEREXITING_3_PROL:%.*]], label [[OTHEREXITB_LOOPEXIT1]] ; CHECK: innerexiting.3.prol: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_3_PROL:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT2]] +; CHECK-NEXT: [[C2_3_PROL:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_3_PROL]], label [[INNERLATCH_3_PROL:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT2]] ; CHECK: innerLatch.3.prol: ; CHECK-NEXT: br i1 false, label [[INNERH_PROL]], label [[OUTERLATCH_PROL]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: outerLatch.prol: @@ -421,110 +431,142 @@ define void @test5(i1 %c) { ; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP4_UNR]], [[BB1_NEW]] ], [ [[TMP6_3:%.*]], [[OUTERLATCH_3:%.*]] ] ; CHECK-NEXT: br label [[INNERH:%.*]] ; CHECK: innerH: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: [[C1:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1]], label [[INNEREXITING:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT:%.*]] ; CHECK: innerexiting: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: [[C2:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2]], label [[INNERLATCH:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT:%.*]] ; CHECK: innerLatch: ; CHECK-NEXT: br i1 false, label [[INNERH_1:%.*]], label [[OUTERLATCH:%.*]] ; CHECK: innerH.1: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_1:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT]] +; CHECK-NEXT: [[C1_1:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_1]], label [[INNEREXITING_1:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT]] ; CHECK: innerexiting.1: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_1:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT]] +; CHECK-NEXT: [[C2_1:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_1]], label [[INNERLATCH_1:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT]] ; CHECK: innerLatch.1: ; CHECK-NEXT: br i1 false, label [[INNERH_2:%.*]], label [[OUTERLATCH]] ; CHECK: innerH.2: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_2:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT]] +; CHECK-NEXT: [[C1_2:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_2]], label [[INNEREXITING_2:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT]] ; CHECK: innerexiting.2: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_2:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT]] +; CHECK-NEXT: [[C2_2:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_2]], label [[INNERLATCH_2:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT]] ; CHECK: innerLatch.2: ; CHECK-NEXT: br i1 false, label [[INNERH_3:%.*]], label [[OUTERLATCH]] ; CHECK: innerH.3: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_3:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT]] +; CHECK-NEXT: [[C1_3:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_3]], label [[INNEREXITING_3:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT]] ; CHECK: innerexiting.3: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT]] +; CHECK-NEXT: [[C2_3:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_3]], label [[INNERLATCH_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT]] ; CHECK: innerLatch.3: ; CHECK-NEXT: br i1 false, label [[INNERH]], label [[OUTERLATCH]], !llvm.loop [[LOOP6]] ; CHECK: outerLatch: ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], 1 -; CHECK-NEXT: br label [[INNERH_13:%.*]] -; CHECK: innerH.13: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_14:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT12:%.*]] -; CHECK: innerexiting.14: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_15:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT13:%.*]] -; CHECK: innerLatch.15: +; CHECK-NEXT: br label [[INNERH_14:%.*]] +; CHECK: innerH.14: +; CHECK-NEXT: [[C1_13:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_13]], label [[INNEREXITING_16:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT18:%.*]] +; CHECK: innerexiting.16: +; CHECK-NEXT: [[C2_15:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_15]], label [[INNERLATCH_17:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT19:%.*]] +; CHECK: innerLatch.17: ; CHECK-NEXT: br i1 false, label [[INNERH_1_1:%.*]], label [[OUTERLATCH_1:%.*]] ; CHECK: innerH.1.1: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_1_1:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT12]] +; CHECK-NEXT: [[C1_1_1:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_1_1]], label [[INNEREXITING_1_1:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT18]] ; CHECK: innerexiting.1.1: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_1_1:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT13]] +; CHECK-NEXT: [[C2_1_1:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_1_1]], label [[INNERLATCH_1_1:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT19]] ; CHECK: innerLatch.1.1: ; CHECK-NEXT: br i1 false, label [[INNERH_2_1:%.*]], label [[OUTERLATCH_1]] ; CHECK: innerH.2.1: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_2_1:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT12]] +; CHECK-NEXT: [[C1_2_1:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_2_1]], label [[INNEREXITING_2_1:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT18]] ; CHECK: innerexiting.2.1: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_2_1:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT13]] +; CHECK-NEXT: [[C2_2_1:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_2_1]], label [[INNERLATCH_2_1:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT19]] ; CHECK: innerLatch.2.1: ; CHECK-NEXT: br i1 false, label [[INNERH_3_1:%.*]], label [[OUTERLATCH_1]] ; CHECK: innerH.3.1: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_3_1:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT12]] +; CHECK-NEXT: [[C1_3_1:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_3_1]], label [[INNEREXITING_3_1:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT18]] ; CHECK: innerexiting.3.1: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_3_1:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT13]] +; CHECK-NEXT: [[C2_3_1:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_3_1]], label [[INNERLATCH_3_1:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT19]] ; CHECK: innerLatch.3.1: -; CHECK-NEXT: br i1 false, label [[INNERH_13]], label [[OUTERLATCH_1]], !llvm.loop [[LOOP6]] +; CHECK-NEXT: br i1 false, label [[INNERH_14]], label [[OUTERLATCH_1]], !llvm.loop [[LOOP6]] ; CHECK: outerLatch.1: ; CHECK-NEXT: [[TMP6_1:%.*]] = add i32 [[TMP6]], 1 -; CHECK-NEXT: br label [[INNERH_26:%.*]] -; CHECK: innerH.26: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_27:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT14:%.*]] -; CHECK: innerexiting.27: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_28:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT15:%.*]] -; CHECK: innerLatch.28: +; CHECK-NEXT: br label [[INNERH_29:%.*]] +; CHECK: innerH.29: +; CHECK-NEXT: [[C1_28:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_28]], label [[INNEREXITING_211:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT20:%.*]] +; CHECK: innerexiting.211: +; CHECK-NEXT: [[C2_210:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_210]], label [[INNERLATCH_212:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT21:%.*]] +; CHECK: innerLatch.212: ; CHECK-NEXT: br i1 false, label [[INNERH_1_2:%.*]], label [[OUTERLATCH_2:%.*]] ; CHECK: innerH.1.2: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_1_2:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT14]] +; CHECK-NEXT: [[C1_1_2:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_1_2]], label [[INNEREXITING_1_2:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT20]] ; CHECK: innerexiting.1.2: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_1_2:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT15]] +; CHECK-NEXT: [[C2_1_2:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_1_2]], label [[INNERLATCH_1_2:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT21]] ; CHECK: innerLatch.1.2: ; CHECK-NEXT: br i1 false, label [[INNERH_2_2:%.*]], label [[OUTERLATCH_2]] ; CHECK: innerH.2.2: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_2_2:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT14]] +; CHECK-NEXT: [[C1_2_2:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_2_2]], label [[INNEREXITING_2_2:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT20]] ; CHECK: innerexiting.2.2: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_2_2:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT15]] +; CHECK-NEXT: [[C2_2_2:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_2_2]], label [[INNERLATCH_2_2:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT21]] ; CHECK: innerLatch.2.2: ; CHECK-NEXT: br i1 false, label [[INNERH_3_2:%.*]], label [[OUTERLATCH_2]] ; CHECK: innerH.3.2: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_3_2:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT14]] +; CHECK-NEXT: [[C1_3_2:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_3_2]], label [[INNEREXITING_3_2:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT20]] ; CHECK: innerexiting.3.2: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_3_2:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT15]] +; CHECK-NEXT: [[C2_3_2:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_3_2]], label [[INNERLATCH_3_2:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT21]] ; CHECK: innerLatch.3.2: -; CHECK-NEXT: br i1 false, label [[INNERH_26]], label [[OUTERLATCH_2]], !llvm.loop [[LOOP6]] +; CHECK-NEXT: br i1 false, label [[INNERH_29]], label [[OUTERLATCH_2]], !llvm.loop [[LOOP6]] ; CHECK: outerLatch.2: ; CHECK-NEXT: [[TMP6_2:%.*]] = add i32 [[TMP6_1]], 1 -; CHECK-NEXT: br label [[INNERH_39:%.*]] -; CHECK: innerH.39: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_310:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT16:%.*]] -; CHECK: innerexiting.310: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_311:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT17:%.*]] -; CHECK: innerLatch.311: +; CHECK-NEXT: br label [[INNERH_314:%.*]] +; CHECK: innerH.314: +; CHECK-NEXT: [[C1_313:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_313]], label [[INNEREXITING_316:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT22:%.*]] +; CHECK: innerexiting.316: +; CHECK-NEXT: [[C2_315:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_315]], label [[INNERLATCH_317:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT23:%.*]] +; CHECK: innerLatch.317: ; CHECK-NEXT: br i1 false, label [[INNERH_1_3:%.*]], label [[OUTERLATCH_3]] ; CHECK: innerH.1.3: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_1_3:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT16]] +; CHECK-NEXT: [[C1_1_3:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_1_3]], label [[INNEREXITING_1_3:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT22]] ; CHECK: innerexiting.1.3: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_1_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT17]] +; CHECK-NEXT: [[C2_1_3:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_1_3]], label [[INNERLATCH_1_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT23]] ; CHECK: innerLatch.1.3: ; CHECK-NEXT: br i1 false, label [[INNERH_2_3:%.*]], label [[OUTERLATCH_3]] ; CHECK: innerH.2.3: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_2_3:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT16]] +; CHECK-NEXT: [[C1_2_3:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_2_3]], label [[INNEREXITING_2_3:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT22]] ; CHECK: innerexiting.2.3: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_2_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT17]] +; CHECK-NEXT: [[C2_2_3:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_2_3]], label [[INNERLATCH_2_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT23]] ; CHECK: innerLatch.2.3: ; CHECK-NEXT: br i1 false, label [[INNERH_3_3:%.*]], label [[OUTERLATCH_3]] ; CHECK: innerH.3.3: -; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_3_3:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT16]] +; CHECK-NEXT: [[C1_3_3:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C1_3_3]], label [[INNEREXITING_3_3:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT22]] ; CHECK: innerexiting.3.3: -; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_3_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT17]] +; CHECK-NEXT: [[C2_3_3:%.*]] = call i1 @unknown(i32 0) +; CHECK-NEXT: br i1 [[C2_3_3]], label [[INNERLATCH_3_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT23]] ; CHECK: innerLatch.3.3: -; CHECK-NEXT: br i1 false, label [[INNERH_39]], label [[OUTERLATCH_3]], !llvm.loop [[LOOP6]] +; CHECK-NEXT: br i1 false, label [[INNERH_314]], label [[OUTERLATCH_3]], !llvm.loop [[LOOP6]] ; CHECK: outerLatch.3: ; CHECK-NEXT: [[TMP6_3]] = add i32 [[TMP6_2]], 1 ; CHECK-NEXT: [[TMP7_3:%.*]] = icmp sgt i32 [[TMP6_3]], 79 @@ -537,11 +579,11 @@ define void @test5(i1 %c) { ; CHECK-NEXT: ret void ; CHECK: exitB.loopexit.loopexit.loopexit: ; CHECK-NEXT: br label [[EXITB_LOOPEXIT_LOOPEXIT:%.*]] -; CHECK: exitB.loopexit.loopexit.loopexit13: +; CHECK: exitB.loopexit.loopexit.loopexit19: ; CHECK-NEXT: br label [[EXITB_LOOPEXIT_LOOPEXIT]] -; CHECK: exitB.loopexit.loopexit.loopexit15: +; CHECK: exitB.loopexit.loopexit.loopexit21: ; CHECK-NEXT: br label [[EXITB_LOOPEXIT_LOOPEXIT]] -; CHECK: exitB.loopexit.loopexit.loopexit17: +; CHECK: exitB.loopexit.loopexit.loopexit23: ; CHECK-NEXT: br label [[EXITB_LOOPEXIT_LOOPEXIT]] ; CHECK: exitB.loopexit.loopexit: ; CHECK-NEXT: br label [[EXITB_LOOPEXIT:%.*]] @@ -553,11 +595,11 @@ define void @test5(i1 %c) { ; CHECK-NEXT: ret void ; CHECK: otherexitB.loopexit.loopexit: ; CHECK-NEXT: br label [[OTHEREXITB_LOOPEXIT:%.*]] -; CHECK: otherexitB.loopexit.loopexit12: +; CHECK: otherexitB.loopexit.loopexit18: ; CHECK-NEXT: br label [[OTHEREXITB_LOOPEXIT]] -; CHECK: otherexitB.loopexit.loopexit14: +; CHECK: otherexitB.loopexit.loopexit20: ; CHECK-NEXT: br label [[OTHEREXITB_LOOPEXIT]] -; CHECK: otherexitB.loopexit.loopexit16: +; CHECK: otherexitB.loopexit.loopexit22: ; CHECK-NEXT: br label [[OTHEREXITB_LOOPEXIT]] ; CHECK: otherexitB.loopexit: ; CHECK-NEXT: br label [[OTHEREXITB:%.*]] @@ -578,10 +620,12 @@ outerH: ; preds = %outerLatch, %bb1 br label %innerH innerH: ; preds = %innerLatch, %outerH - br i1 %c, label %innerexiting, label %otherexitB + %c1 = call i1 @unknown(i32 0) + br i1 %c1, label %innerexiting, label %otherexitB innerexiting: ; preds = %innerH - br i1 %c, label %innerLatch, label %exitB + %c2 = call i1 @unknown(i32 0) + br i1 %c2, label %innerLatch, label %exitB innerLatch: ; preds = %innerexiting %tmp13 = fcmp olt double undef, 2.000000e+00 @@ -605,7 +649,7 @@ otherexitB: ; preds = %innerH ; Blocks reachable from exits (not_zero44) have the IDom as the block within the loop (Header). ; Update the IDom to the preheader. -define void @test6(i1 %c) { +define void @test6() { ; CHECK-LABEL: @test6( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 undef, i64 616) @@ -621,7 +665,9 @@ define void @test6(i1 %c) { ; CHECK: header.prol: ; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ undef, [[HEADER_PROL_PREHEADER]] ], [ [[INDVARS_IV_NEXT_PROL:%.*]], [[LATCH_PROL:%.*]] ] ; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, [[HEADER_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[LATCH_PROL]] ] -; CHECK-NEXT: br i1 [[C:%.*]], label [[LATCH_PROL]], label [[OTHEREXIT_LOOPEXIT1:%.*]] +; CHECK-NEXT: [[IV_I32_PROL:%.*]] = trunc i64 [[INDVARS_IV_PROL]] to i32 +; CHECK-NEXT: [[C1_PROL:%.*]] = call i1 @unknown(i32 [[IV_I32_PROL]]) +; CHECK-NEXT: br i1 [[C1_PROL]], label [[LATCH_PROL]], label [[OTHEREXIT_LOOPEXIT1:%.*]] ; CHECK: latch.prol: ; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nsw i64 [[INDVARS_IV_PROL]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_PROL]], 616 @@ -639,16 +685,24 @@ define void @test6(i1 %c) { ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[LATCH_3:%.*]] ] -; CHECK-NEXT: br i1 [[C]], label [[LATCH:%.*]], label [[OTHEREXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: [[IV_I32:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[C1:%.*]] = call i1 @unknown(i32 [[IV_I32]]) +; CHECK-NEXT: br i1 [[C1]], label [[LATCH:%.*]], label [[OTHEREXIT_LOOPEXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nsw i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: br i1 [[C]], label [[LATCH_1:%.*]], label [[OTHEREXIT_LOOPEXIT]] +; CHECK-NEXT: [[IV_I32_1:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[C1_1:%.*]] = call i1 @unknown(i32 [[IV_I32_1]]) +; CHECK-NEXT: br i1 [[C1_1]], label [[LATCH_1:%.*]], label [[OTHEREXIT_LOOPEXIT]] ; CHECK: latch.1: ; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nsw i64 [[INDVARS_IV_NEXT]], 2 -; CHECK-NEXT: br i1 [[C]], label [[LATCH_2:%.*]], label [[OTHEREXIT_LOOPEXIT]] +; CHECK-NEXT: [[IV_I32_2:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32 +; CHECK-NEXT: [[C1_2:%.*]] = call i1 @unknown(i32 [[IV_I32_2]]) +; CHECK-NEXT: br i1 [[C1_2]], label [[LATCH_2:%.*]], label [[OTHEREXIT_LOOPEXIT]] ; CHECK: latch.2: ; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_1]], 2 -; CHECK-NEXT: br i1 [[C]], label [[LATCH_3]], label [[OTHEREXIT_LOOPEXIT]] +; CHECK-NEXT: [[IV_I32_3:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32 +; CHECK-NEXT: [[C1_3:%.*]] = call i1 @unknown(i32 [[IV_I32_3]]) +; CHECK-NEXT: br i1 [[C1_3]], label [[LATCH_3]], label [[OTHEREXIT_LOOPEXIT]] ; CHECK: latch.3: ; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nsw i64 [[INDVARS_IV_NEXT_2]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_3]], 616 @@ -675,7 +729,9 @@ entry: header: ; preds = %latch, %entry %indvars.iv = phi i64 [ undef, %entry ], [ %indvars.iv.next, %latch ] - br i1 %c, label %latch, label %otherexit + %iv.i32 = trunc i64 %indvars.iv to i32 + %c1 = call i1 @unknown(i32 %iv.i32) + br i1 %c1, label %latch, label %otherexit latch: ; preds = %header %indvars.iv.next = add nsw i64 %indvars.iv, 2 From 9bd22595bad36cd19f5e7ae18ccd9f41cba29dc5 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 3 Jan 2022 09:55:19 -0800 Subject: [PATCH 435/992] [unroll] Prune all but first copy of invariant exit If we have an exit which is controlled by a loop invariant condition and which dominates the latch, we know only the copy in the first unrolled iteration can be taken. All other copies are dead. The change itself is pretty straight forward, but let me add two points of context: * I'd have expected other transform passes to catch this after unrolling, but I'm seeing multiple examples where we get to the end of O2/O3 without simplifying. * I'd like to do a stronger change which did CSE during unroll and accounted for invariant expressions (as defined by SCEV instead of trivial ones from LoopInfo), but that doesn't fit cleanly into the current code structure. Differential Revision: https://reviews.llvm.org/D116496 --- llvm/lib/Transforms/Utils/LoopUnroll.cpp | 5 + .../Transforms/LoopUnroll/nonlatchcondbr.ll | 12 +- .../LoopUnroll/runtime-loop-multiple-exits.ll | 436 +++++++++--------- llvm/test/Transforms/LoopUnroll/scevunroll.ll | 8 +- 4 files changed, 231 insertions(+), 230 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index b0c622b98d5e..0a530f5292c5 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -310,6 +310,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, unsigned TripMultiple; unsigned BreakoutTrip; bool ExitOnTrue; + bool InvariantExit; SmallVector ExitingBlocks; }; DenseMap ExitInfos; @@ -333,6 +334,8 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, (unsigned)GreatestCommonDivisor64(ULO.Count, Info.TripMultiple); } Info.ExitOnTrue = !L->contains(BI->getSuccessor(0)); + Info.InvariantExit = L->isLoopInvariant(BI->getCondition()) && + DT->dominates(ExitingBlock, LatchBlock); Info.ExitingBlocks.push_back(ExitingBlock); LLVM_DEBUG(dbgs() << " Exiting block %" << ExitingBlock->getName() << ": TripCount=" << Info.TripCount @@ -685,6 +688,8 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, auto WillExit = [&](const ExitInfo &Info, unsigned i, unsigned j, bool IsLatch) -> Optional { + if (Info.InvariantExit && i != 0) + return false; if (CompletelyUnroll) { if (PreserveOnlyFirst) { if (i == 0) diff --git a/llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll b/llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll index eca86b4cd9c3..de3b0e769abd 100644 --- a/llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll +++ b/llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll @@ -173,21 +173,21 @@ define void @test3(i32* noalias %A, i1 %cond) { ; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1 ; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4 ; CHECK-NEXT: call void @bar(i32 [[DOTPRE]]) -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY_1:%.*]], label [[FOR_END]] +; CHECK-NEXT: br label [[FOR_BODY_1:%.*]] ; CHECK: for.body.1: ; CHECK-NEXT: br label [[FOR_BODY_FOR_BODY_CRIT_EDGE_1:%.*]] ; CHECK: for.body.for.body_crit_edge.1: ; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 ; CHECK-NEXT: [[DOTPRE_1:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_1]], align 4 ; CHECK-NEXT: call void @bar(i32 [[DOTPRE_1]]) -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY_2:%.*]], label [[FOR_END]] +; CHECK-NEXT: br label [[FOR_BODY_2:%.*]] ; CHECK: for.body.2: ; CHECK-NEXT: br label [[FOR_BODY_FOR_BODY_CRIT_EDGE_2:%.*]] ; CHECK: for.body.for.body_crit_edge.2: ; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 ; CHECK-NEXT: [[DOTPRE_2:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_2]], align 4 ; CHECK-NEXT: call void @bar(i32 [[DOTPRE_2]]) -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY_3:%.*]], label [[FOR_END]] +; CHECK-NEXT: br label [[FOR_BODY_3:%.*]] ; CHECK: for.body.3: ; CHECK-NEXT: br i1 false, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_3:%.*]], label [[FOR_END]] ; CHECK: for.body.for.body_crit_edge.3: @@ -229,11 +229,7 @@ define void @test4(i32 %arg) { ; CHECK: bb1: ; CHECK-NEXT: br i1 false, label [[BB4:%.*]], label [[BB1_1:%.*]] ; CHECK: bb1.1: -; CHECK-NEXT: br i1 false, label [[BB4]], label [[BB1_2:%.*]] -; CHECK: bb1.2: -; CHECK-NEXT: br i1 false, label [[BB4]], label [[BB1_3:%.*]] -; CHECK: bb1.3: -; CHECK-NEXT: br i1 false, label [[BB4]], label [[BB1]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br label [[BB1]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: bb4: ; CHECK-NEXT: unreachable ; diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll index 9863116137dd..9c408e07e279 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll @@ -403,7 +403,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-NEXT: %add = add nsw i32 %2, %sum.02 ; EPILOG-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1 +; EPILOG-NEXT: br label %for.exiting_block.1 ; EPILOG: for.exiting_block.1: ; EPILOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %for.body.1 @@ -413,7 +413,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-NEXT: %add.1 = add nsw i32 %3, %add ; EPILOG-NEXT: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 -; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.2 +; EPILOG-NEXT: br label %for.exiting_block.2 ; EPILOG: for.exiting_block.2: ; EPILOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.2, label %for.exit2.loopexit, label %for.body.2 @@ -423,7 +423,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-NEXT: %add.2 = add nsw i32 %4, %add.1 ; EPILOG-NEXT: %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 -; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.3 +; EPILOG-NEXT: br label %for.exiting_block.3 ; EPILOG: for.exiting_block.3: ; EPILOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.3, label %for.exit2.loopexit, label %for.body.3 @@ -433,7 +433,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-NEXT: %add.3 = add nsw i32 %5, %add.2 ; EPILOG-NEXT: %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 -; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.4 +; EPILOG-NEXT: br label %for.exiting_block.4 ; EPILOG: for.exiting_block.4: ; EPILOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.4, label %for.exit2.loopexit, label %for.body.4 @@ -443,7 +443,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-NEXT: %add.4 = add nsw i32 %6, %add.3 ; EPILOG-NEXT: %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 -; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.5 +; EPILOG-NEXT: br label %for.exiting_block.5 ; EPILOG: for.exiting_block.5: ; EPILOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.5, label %for.exit2.loopexit, label %for.body.5 @@ -453,7 +453,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-NEXT: %add.5 = add nsw i32 %7, %add.4 ; EPILOG-NEXT: %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 -; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.6 +; EPILOG-NEXT: br label %for.exiting_block.6 ; EPILOG: for.exiting_block.6: ; EPILOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.6, label %for.exit2.loopexit, label %for.body.6 @@ -463,7 +463,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-NEXT: %add.6 = add nsw i32 %8, %add.5 ; EPILOG-NEXT: %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 -; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.7 +; EPILOG-NEXT: br label %for.exiting_block.7 ; EPILOG: for.exiting_block.7: ; EPILOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.7, label %for.exit2.loopexit, label %for.body.7 @@ -512,7 +512,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %for.end.unr-lcssa ], [ %sum.0.lcssa.ph1, %for.end.epilog-lcssa ] ; EPILOG-NEXT: ret i32 %sum.0.lcssa ; EPILOG: for.exit2.loopexit: -; EPILOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ], [ %add.3, %for.body.3 ], [ 42, %for.exiting_block.4 ], [ %add.4, %for.body.4 ], [ 42, %for.exiting_block.5 ], [ %add.5, %for.body.5 ], [ 42, %for.exiting_block.6 ], [ %add.6, %for.body.6 ], [ 42, %for.exiting_block.7 ] +; EPILOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ 42, %for.exiting_block.1 ], [ 42, %for.exiting_block.2 ], [ 42, %for.exiting_block.3 ], [ 42, %for.exiting_block.4 ], [ 42, %for.exiting_block.5 ], [ 42, %for.exiting_block.6 ], [ 42, %for.exiting_block.7 ] ; EPILOG-NEXT: br label %for.exit2 ; EPILOG: for.exit2.loopexit2: ; EPILOG-NEXT: %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ] @@ -544,7 +544,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-BLOCK-NEXT: %add = add nsw i32 %2, %sum.02 ; EPILOG-BLOCK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-BLOCK-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1 +; EPILOG-BLOCK-NEXT: br label %for.exiting_block.1 ; EPILOG-BLOCK: for.exiting_block.1: ; EPILOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-BLOCK-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %for.body.1 @@ -583,7 +583,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-BLOCK-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %for.end.unr-lcssa ], [ %add.epil, %for.body.epil ] ; EPILOG-BLOCK-NEXT: ret i32 %sum.0.lcssa ; EPILOG-BLOCK: for.exit2.loopexit: -; EPILOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ] +; EPILOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ 42, %for.exiting_block.1 ] ; EPILOG-BLOCK-NEXT: br label %for.exit2 ; EPILOG-BLOCK: for.exit2: ; EPILOG-BLOCK-NEXT: %retval = phi i32 [ %sum.02.unr, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ] @@ -639,7 +639,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-NEXT: %3 = load i32, i32* %arrayidx, align 4 ; PROLOG-NEXT: %add = add nsw i32 %3, %sum.02 ; PROLOG-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1 +; PROLOG-NEXT: br label %for.exiting_block.1 ; PROLOG: for.exiting_block.1: ; PROLOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %for.body.1 @@ -648,7 +648,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-NEXT: %4 = load i32, i32* %arrayidx.1, align 4 ; PROLOG-NEXT: %add.1 = add nsw i32 %4, %add ; PROLOG-NEXT: %indvars.iv.next.1 = add i64 %indvars.iv.next, 1 -; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.2 +; PROLOG-NEXT: br label %for.exiting_block.2 ; PROLOG: for.exiting_block.2: ; PROLOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.2, label %for.exit2.loopexit, label %for.body.2 @@ -657,7 +657,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-NEXT: %5 = load i32, i32* %arrayidx.2, align 4 ; PROLOG-NEXT: %add.2 = add nsw i32 %5, %add.1 ; PROLOG-NEXT: %indvars.iv.next.2 = add i64 %indvars.iv.next.1, 1 -; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.3 +; PROLOG-NEXT: br label %for.exiting_block.3 ; PROLOG: for.exiting_block.3: ; PROLOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.3, label %for.exit2.loopexit, label %for.body.3 @@ -666,7 +666,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-NEXT: %6 = load i32, i32* %arrayidx.3, align 4 ; PROLOG-NEXT: %add.3 = add nsw i32 %6, %add.2 ; PROLOG-NEXT: %indvars.iv.next.3 = add i64 %indvars.iv.next.2, 1 -; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.4 +; PROLOG-NEXT: br label %for.exiting_block.4 ; PROLOG: for.exiting_block.4: ; PROLOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.4, label %for.exit2.loopexit, label %for.body.4 @@ -675,7 +675,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-NEXT: %7 = load i32, i32* %arrayidx.4, align 4 ; PROLOG-NEXT: %add.4 = add nsw i32 %7, %add.3 ; PROLOG-NEXT: %indvars.iv.next.4 = add i64 %indvars.iv.next.3, 1 -; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.5 +; PROLOG-NEXT: br label %for.exiting_block.5 ; PROLOG: for.exiting_block.5: ; PROLOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.5, label %for.exit2.loopexit, label %for.body.5 @@ -684,7 +684,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-NEXT: %8 = load i32, i32* %arrayidx.5, align 4 ; PROLOG-NEXT: %add.5 = add nsw i32 %8, %add.4 ; PROLOG-NEXT: %indvars.iv.next.5 = add i64 %indvars.iv.next.4, 1 -; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.6 +; PROLOG-NEXT: br label %for.exiting_block.6 ; PROLOG: for.exiting_block.6: ; PROLOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.6, label %for.exit2.loopexit, label %for.body.6 @@ -693,7 +693,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-NEXT: %9 = load i32, i32* %arrayidx.6, align 4 ; PROLOG-NEXT: %add.6 = add nsw i32 %9, %add.5 ; PROLOG-NEXT: %indvars.iv.next.6 = add i64 %indvars.iv.next.5, 1 -; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.7 +; PROLOG-NEXT: br label %for.exiting_block.7 ; PROLOG: for.exiting_block.7: ; PROLOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.7, label %for.exit2.loopexit, label %for.body.7 @@ -711,7 +711,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.unr, %header.prol.loopexit ], [ %sum.0.lcssa.ph, %for.end.unr-lcssa ] ; PROLOG-NEXT: ret i32 %sum.0.lcssa ; PROLOG: for.exit2.loopexit: -; PROLOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ], [ %add.3, %for.body.3 ], [ 42, %for.exiting_block.4 ], [ %add.4, %for.body.4 ], [ 42, %for.exiting_block.5 ], [ %add.5, %for.body.5 ], [ 42, %for.exiting_block.6 ], [ %add.6, %for.body.6 ], [ 42, %for.exiting_block.7 ] +; PROLOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ 42, %for.exiting_block.1 ], [ 42, %for.exiting_block.2 ], [ 42, %for.exiting_block.3 ], [ 42, %for.exiting_block.4 ], [ 42, %for.exiting_block.5 ], [ 42, %for.exiting_block.6 ], [ 42, %for.exiting_block.7 ] ; PROLOG-NEXT: br label %for.exit2 ; PROLOG: for.exit2.loopexit1: ; PROLOG-NEXT: %retval.ph2 = phi i32 [ 42, %for.exiting_block.prol ], [ %sum.02.prol, %header.prol ] @@ -756,7 +756,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx, align 4 ; PROLOG-BLOCK-NEXT: %add = add nsw i32 %3, %sum.02 ; PROLOG-BLOCK-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-BLOCK-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1 +; PROLOG-BLOCK-NEXT: br label %for.exiting_block.1 ; PROLOG-BLOCK: for.exiting_block.1: ; PROLOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-BLOCK-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %for.body.1 @@ -774,7 +774,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-BLOCK-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.unr, %header.prol.loopexit ], [ %sum.0.lcssa.ph, %for.end.unr-lcssa ] ; PROLOG-BLOCK-NEXT: ret i32 %sum.0.lcssa ; PROLOG-BLOCK: for.exit2.loopexit: -; PROLOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ] +; PROLOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ 42, %for.exiting_block.1 ] ; PROLOG-BLOCK-NEXT: br label %for.exit2 ; PROLOG-BLOCK: for.exit2: ; PROLOG-BLOCK-NEXT: %retval = phi i32 [ 0, %header.prol ], [ 42, %for.exiting_block.prol ], [ %retval.ph, %for.exit2.loopexit ] @@ -1265,7 +1265,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add = add nsw i32 %2, %sum.02 ; EPILOG-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-NEXT: br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.1 +; EPILOG-NEXT: br label %for.exiting_block.1 ; EPILOG: for.exiting_block.1: ; EPILOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1 @@ -1275,7 +1275,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.1 = add nsw i32 %3, %add ; EPILOG-NEXT: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 -; EPILOG-NEXT: br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.2 +; EPILOG-NEXT: br label %for.exiting_block.2 ; EPILOG: for.exiting_block.2: ; EPILOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.2, label %for.exit2.loopexit, label %latch.2 @@ -1285,7 +1285,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.2 = add nsw i32 %4, %add.1 ; EPILOG-NEXT: %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 -; EPILOG-NEXT: br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.3 +; EPILOG-NEXT: br label %for.exiting_block.3 ; EPILOG: for.exiting_block.3: ; EPILOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.3, label %for.exit2.loopexit, label %latch.3 @@ -1295,7 +1295,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.3 = add nsw i32 %5, %add.2 ; EPILOG-NEXT: %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 -; EPILOG-NEXT: br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.4 +; EPILOG-NEXT: br label %for.exiting_block.4 ; EPILOG: for.exiting_block.4: ; EPILOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.4, label %for.exit2.loopexit, label %latch.4 @@ -1305,7 +1305,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.4 = add nsw i32 %6, %add.3 ; EPILOG-NEXT: %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 -; EPILOG-NEXT: br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.5 +; EPILOG-NEXT: br label %for.exiting_block.5 ; EPILOG: for.exiting_block.5: ; EPILOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.5, label %for.exit2.loopexit, label %latch.5 @@ -1315,7 +1315,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.5 = add nsw i32 %7, %add.4 ; EPILOG-NEXT: %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 -; EPILOG-NEXT: br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.6 +; EPILOG-NEXT: br label %for.exiting_block.6 ; EPILOG: for.exiting_block.6: ; EPILOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.6, label %for.exit2.loopexit, label %latch.6 @@ -1325,7 +1325,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.6 = add nsw i32 %8, %add.5 ; EPILOG-NEXT: %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 -; EPILOG-NEXT: br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.7 +; EPILOG-NEXT: br label %for.exiting_block.7 ; EPILOG: for.exiting_block.7: ; EPILOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.7, label %for.exit2.loopexit, label %latch.7 @@ -1368,7 +1368,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter ; EPILOG-NEXT: br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit2, !llvm.loop !4 ; EPILOG: latchExit.epilog-lcssa.loopexit: -; EPILOG-NEXT: %result.ph1.ph = phi i32 [ 0, %header ], [ 0, %latch ], [ 0, %latch.1 ], [ 0, %latch.2 ], [ 0, %latch.3 ], [ 0, %latch.4 ], [ 0, %latch.5 ], [ 0, %latch.6 ] +; EPILOG-NEXT: %result.ph1.ph = phi i32 [ 0, %header ] ; EPILOG-NEXT: br label %latchExit.epilog-lcssa ; EPILOG: latchExit.epilog-lcssa.loopexit2: ; EPILOG-NEXT: %result.ph1.ph3 = phi i32 [ 0, %header.epil ], [ %add.epil, %latch.epil ] @@ -1409,7 +1409,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-BLOCK-NEXT: %add = add nsw i32 %2, %sum.02 ; EPILOG-BLOCK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-BLOCK-NEXT: br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.1 +; EPILOG-BLOCK-NEXT: br label %for.exiting_block.1 ; EPILOG-BLOCK: for.exiting_block.1: ; EPILOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-BLOCK-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1 @@ -1445,7 +1445,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %4, %sum.02.unr ; EPILOG-BLOCK-NEXT: br label %latchExit.epilog-lcssa ; EPILOG-BLOCK: latchExit.epilog-lcssa.loopexit: -; EPILOG-BLOCK-NEXT: %result.ph1.ph = phi i32 [ 0, %header ], [ 0, %latch ] +; EPILOG-BLOCK-NEXT: %result.ph1.ph = phi i32 [ 0, %header ] ; EPILOG-BLOCK-NEXT: br label %latchExit.epilog-lcssa ; EPILOG-BLOCK: latchExit.epilog-lcssa: ; EPILOG-BLOCK-NEXT: %result.ph1 = phi i32 [ %add.epil, %latch.epil ], [ 0, %header.epil ], [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ] @@ -1508,7 +1508,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %3 = load i32, i32* %arrayidx, align 4 ; PROLOG-NEXT: %add = add nsw i32 %3, %sum.02 ; PROLOG-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-NEXT: br i1 %cond, label %latchExit.unr-lcssa.loopexit, label %for.exiting_block.1 +; PROLOG-NEXT: br label %for.exiting_block.1 ; PROLOG: for.exiting_block.1: ; PROLOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1 @@ -1517,7 +1517,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %4 = load i32, i32* %arrayidx.1, align 4 ; PROLOG-NEXT: %add.1 = add nsw i32 %4, %add ; PROLOG-NEXT: %indvars.iv.next.1 = add i64 %indvars.iv.next, 1 -; PROLOG-NEXT: br i1 %cond, label %latchExit.unr-lcssa.loopexit, label %for.exiting_block.2 +; PROLOG-NEXT: br label %for.exiting_block.2 ; PROLOG: for.exiting_block.2: ; PROLOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.2, label %for.exit2.loopexit, label %latch.2 @@ -1526,7 +1526,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %5 = load i32, i32* %arrayidx.2, align 4 ; PROLOG-NEXT: %add.2 = add nsw i32 %5, %add.1 ; PROLOG-NEXT: %indvars.iv.next.2 = add i64 %indvars.iv.next.1, 1 -; PROLOG-NEXT: br i1 %cond, label %latchExit.unr-lcssa.loopexit, label %for.exiting_block.3 +; PROLOG-NEXT: br label %for.exiting_block.3 ; PROLOG: for.exiting_block.3: ; PROLOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.3, label %for.exit2.loopexit, label %latch.3 @@ -1535,7 +1535,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %6 = load i32, i32* %arrayidx.3, align 4 ; PROLOG-NEXT: %add.3 = add nsw i32 %6, %add.2 ; PROLOG-NEXT: %indvars.iv.next.3 = add i64 %indvars.iv.next.2, 1 -; PROLOG-NEXT: br i1 %cond, label %latchExit.unr-lcssa.loopexit, label %for.exiting_block.4 +; PROLOG-NEXT: br label %for.exiting_block.4 ; PROLOG: for.exiting_block.4: ; PROLOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.4, label %for.exit2.loopexit, label %latch.4 @@ -1544,7 +1544,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %7 = load i32, i32* %arrayidx.4, align 4 ; PROLOG-NEXT: %add.4 = add nsw i32 %7, %add.3 ; PROLOG-NEXT: %indvars.iv.next.4 = add i64 %indvars.iv.next.3, 1 -; PROLOG-NEXT: br i1 %cond, label %latchExit.unr-lcssa.loopexit, label %for.exiting_block.5 +; PROLOG-NEXT: br label %for.exiting_block.5 ; PROLOG: for.exiting_block.5: ; PROLOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.5, label %for.exit2.loopexit, label %latch.5 @@ -1553,7 +1553,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %8 = load i32, i32* %arrayidx.5, align 4 ; PROLOG-NEXT: %add.5 = add nsw i32 %8, %add.4 ; PROLOG-NEXT: %indvars.iv.next.5 = add i64 %indvars.iv.next.4, 1 -; PROLOG-NEXT: br i1 %cond, label %latchExit.unr-lcssa.loopexit, label %for.exiting_block.6 +; PROLOG-NEXT: br label %for.exiting_block.6 ; PROLOG: for.exiting_block.6: ; PROLOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.6, label %for.exit2.loopexit, label %latch.6 @@ -1562,7 +1562,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %9 = load i32, i32* %arrayidx.6, align 4 ; PROLOG-NEXT: %add.6 = add nsw i32 %9, %add.5 ; PROLOG-NEXT: %indvars.iv.next.6 = add i64 %indvars.iv.next.5, 1 -; PROLOG-NEXT: br i1 %cond, label %latchExit.unr-lcssa.loopexit, label %for.exiting_block.7 +; PROLOG-NEXT: br label %for.exiting_block.7 ; PROLOG: for.exiting_block.7: ; PROLOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.7, label %for.exit2.loopexit, label %latch.7 @@ -1574,7 +1574,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %exitcond.7 = icmp eq i64 %indvars.iv.next.7, %n ; PROLOG-NEXT: br i1 %exitcond.7, label %latchExit.unr-lcssa.loopexit, label %header ; PROLOG: latchExit.unr-lcssa.loopexit: -; PROLOG-NEXT: %result.ph.ph = phi i32 [ 0, %header ], [ 0, %latch ], [ 0, %latch.1 ], [ 0, %latch.2 ], [ 0, %latch.3 ], [ 0, %latch.4 ], [ 0, %latch.5 ], [ 0, %latch.6 ], [ %add.7, %latch.7 ] +; PROLOG-NEXT: %result.ph.ph = phi i32 [ 0, %header ], [ %add.7, %latch.7 ] ; PROLOG-NEXT: br label %latchExit.unr-lcssa ; PROLOG: latchExit.unr-lcssa.loopexit1: ; PROLOG-NEXT: %result.ph.ph2 = phi i32 [ 0, %header.prol ] @@ -1628,7 +1628,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx, align 4 ; PROLOG-BLOCK-NEXT: %add = add nsw i32 %3, %sum.02 ; PROLOG-BLOCK-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-BLOCK-NEXT: br i1 %cond, label %latchExit.unr-lcssa.loopexit, label %for.exiting_block.1 +; PROLOG-BLOCK-NEXT: br label %for.exiting_block.1 ; PROLOG-BLOCK: for.exiting_block.1: ; PROLOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-BLOCK-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1 @@ -1640,7 +1640,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-BLOCK-NEXT: %exitcond.1 = icmp eq i64 %indvars.iv.next.1, %n ; PROLOG-BLOCK-NEXT: br i1 %exitcond.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !4 ; PROLOG-BLOCK: latchExit.unr-lcssa.loopexit: -; PROLOG-BLOCK-NEXT: %result.ph.ph = phi i32 [ 0, %header ], [ 0, %latch ], [ %add.1, %latch.1 ] +; PROLOG-BLOCK-NEXT: %result.ph.ph = phi i32 [ 0, %header ], [ %add.1, %latch.1 ] ; PROLOG-BLOCK-NEXT: br label %latchExit.unr-lcssa ; PROLOG-BLOCK: latchExit.unr-lcssa: ; PROLOG-BLOCK-NEXT: %result.ph = phi i32 [ 0, %header.prol ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ] @@ -1709,7 +1709,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add = add nsw i32 %2, %sum.02 ; EPILOG-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 +; EPILOG-NEXT: br label %for.exiting_block.1 ; EPILOG: for.exiting_block.1: ; EPILOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1 @@ -1719,7 +1719,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.1 = add nsw i32 %3, %add ; EPILOG-NEXT: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.2 +; EPILOG-NEXT: br label %for.exiting_block.2 ; EPILOG: for.exiting_block.2: ; EPILOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.2, label %latchExit.epilog-lcssa.loopexit, label %latch.2 @@ -1729,7 +1729,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.2 = add nsw i32 %4, %add.1 ; EPILOG-NEXT: %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.3 +; EPILOG-NEXT: br label %for.exiting_block.3 ; EPILOG: for.exiting_block.3: ; EPILOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.3, label %latchExit.epilog-lcssa.loopexit, label %latch.3 @@ -1739,7 +1739,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.3 = add nsw i32 %5, %add.2 ; EPILOG-NEXT: %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.4 +; EPILOG-NEXT: br label %for.exiting_block.4 ; EPILOG: for.exiting_block.4: ; EPILOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.4, label %latchExit.epilog-lcssa.loopexit, label %latch.4 @@ -1749,7 +1749,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.4 = add nsw i32 %6, %add.3 ; EPILOG-NEXT: %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.5 +; EPILOG-NEXT: br label %for.exiting_block.5 ; EPILOG: for.exiting_block.5: ; EPILOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.5, label %latchExit.epilog-lcssa.loopexit, label %latch.5 @@ -1759,7 +1759,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.5 = add nsw i32 %7, %add.4 ; EPILOG-NEXT: %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.6 +; EPILOG-NEXT: br label %for.exiting_block.6 ; EPILOG: for.exiting_block.6: ; EPILOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.6, label %latchExit.epilog-lcssa.loopexit, label %latch.6 @@ -1769,7 +1769,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.6 = add nsw i32 %8, %add.5 ; EPILOG-NEXT: %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.7 +; EPILOG-NEXT: br label %for.exiting_block.7 ; EPILOG: for.exiting_block.7: ; EPILOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.7, label %latchExit.epilog-lcssa.loopexit, label %latch.7 @@ -1853,7 +1853,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-BLOCK-NEXT: %add = add nsw i32 %2, %sum.02 ; EPILOG-BLOCK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-BLOCK-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 +; EPILOG-BLOCK-NEXT: br label %for.exiting_block.1 ; EPILOG-BLOCK: for.exiting_block.1: ; EPILOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-BLOCK-NEXT: br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1 @@ -1952,7 +1952,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %3 = load i32, i32* %arrayidx, align 4 ; PROLOG-NEXT: %add = add nsw i32 %3, %sum.02 ; PROLOG-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 +; PROLOG-NEXT: br label %for.exiting_block.1 ; PROLOG: for.exiting_block.1: ; PROLOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.1, label %latchExit.unr-lcssa.loopexit, label %latch.1 @@ -1961,7 +1961,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %4 = load i32, i32* %arrayidx.1, align 4 ; PROLOG-NEXT: %add.1 = add nsw i32 %4, %add ; PROLOG-NEXT: %indvars.iv.next.1 = add i64 %indvars.iv.next, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.2 +; PROLOG-NEXT: br label %for.exiting_block.2 ; PROLOG: for.exiting_block.2: ; PROLOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.2, label %latchExit.unr-lcssa.loopexit, label %latch.2 @@ -1970,7 +1970,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %5 = load i32, i32* %arrayidx.2, align 4 ; PROLOG-NEXT: %add.2 = add nsw i32 %5, %add.1 ; PROLOG-NEXT: %indvars.iv.next.2 = add i64 %indvars.iv.next.1, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.3 +; PROLOG-NEXT: br label %for.exiting_block.3 ; PROLOG: for.exiting_block.3: ; PROLOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.3, label %latchExit.unr-lcssa.loopexit, label %latch.3 @@ -1979,7 +1979,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %6 = load i32, i32* %arrayidx.3, align 4 ; PROLOG-NEXT: %add.3 = add nsw i32 %6, %add.2 ; PROLOG-NEXT: %indvars.iv.next.3 = add i64 %indvars.iv.next.2, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.4 +; PROLOG-NEXT: br label %for.exiting_block.4 ; PROLOG: for.exiting_block.4: ; PROLOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.4, label %latchExit.unr-lcssa.loopexit, label %latch.4 @@ -1988,7 +1988,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %7 = load i32, i32* %arrayidx.4, align 4 ; PROLOG-NEXT: %add.4 = add nsw i32 %7, %add.3 ; PROLOG-NEXT: %indvars.iv.next.4 = add i64 %indvars.iv.next.3, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.5 +; PROLOG-NEXT: br label %for.exiting_block.5 ; PROLOG: for.exiting_block.5: ; PROLOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.5, label %latchExit.unr-lcssa.loopexit, label %latch.5 @@ -1997,7 +1997,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %8 = load i32, i32* %arrayidx.5, align 4 ; PROLOG-NEXT: %add.5 = add nsw i32 %8, %add.4 ; PROLOG-NEXT: %indvars.iv.next.5 = add i64 %indvars.iv.next.4, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.6 +; PROLOG-NEXT: br label %for.exiting_block.6 ; PROLOG: for.exiting_block.6: ; PROLOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.6, label %latchExit.unr-lcssa.loopexit, label %latch.6 @@ -2006,7 +2006,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %9 = load i32, i32* %arrayidx.6, align 4 ; PROLOG-NEXT: %add.6 = add nsw i32 %9, %add.5 ; PROLOG-NEXT: %indvars.iv.next.6 = add i64 %indvars.iv.next.5, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.7 +; PROLOG-NEXT: br label %for.exiting_block.7 ; PROLOG: for.exiting_block.7: ; PROLOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.7, label %latchExit.unr-lcssa.loopexit, label %latch.7 @@ -2072,7 +2072,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx, align 4 ; PROLOG-BLOCK-NEXT: %add = add nsw i32 %3, %sum.02 ; PROLOG-BLOCK-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-BLOCK-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 +; PROLOG-BLOCK-NEXT: br label %for.exiting_block.1 ; PROLOG-BLOCK: for.exiting_block.1: ; PROLOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-BLOCK-NEXT: br i1 %cmp.1, label %latchExit.unr-lcssa.loopexit, label %latch.1 @@ -2154,7 +2154,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add = add nsw i32 %2, %sum.02 ; EPILOG-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 +; EPILOG-NEXT: br label %for.exiting_block.1 ; EPILOG: for.exiting_block.1: ; EPILOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1 @@ -2164,7 +2164,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.1 = add nsw i32 %3, %add ; EPILOG-NEXT: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.2 +; EPILOG-NEXT: br label %for.exiting_block.2 ; EPILOG: for.exiting_block.2: ; EPILOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.2, label %latchExit.epilog-lcssa.loopexit, label %latch.2 @@ -2174,7 +2174,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.2 = add nsw i32 %4, %add.1 ; EPILOG-NEXT: %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.3 +; EPILOG-NEXT: br label %for.exiting_block.3 ; EPILOG: for.exiting_block.3: ; EPILOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.3, label %latchExit.epilog-lcssa.loopexit, label %latch.3 @@ -2184,7 +2184,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.3 = add nsw i32 %5, %add.2 ; EPILOG-NEXT: %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.4 +; EPILOG-NEXT: br label %for.exiting_block.4 ; EPILOG: for.exiting_block.4: ; EPILOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.4, label %latchExit.epilog-lcssa.loopexit, label %latch.4 @@ -2194,7 +2194,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.4 = add nsw i32 %6, %add.3 ; EPILOG-NEXT: %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.5 +; EPILOG-NEXT: br label %for.exiting_block.5 ; EPILOG: for.exiting_block.5: ; EPILOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.5, label %latchExit.epilog-lcssa.loopexit, label %latch.5 @@ -2204,7 +2204,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.5 = add nsw i32 %7, %add.4 ; EPILOG-NEXT: %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.6 +; EPILOG-NEXT: br label %for.exiting_block.6 ; EPILOG: for.exiting_block.6: ; EPILOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.6, label %latchExit.epilog-lcssa.loopexit, label %latch.6 @@ -2214,7 +2214,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.6 = add nsw i32 %8, %add.5 ; EPILOG-NEXT: %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.7 +; EPILOG-NEXT: br label %for.exiting_block.7 ; EPILOG: for.exiting_block.7: ; EPILOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.7, label %latchExit.epilog-lcssa.loopexit, label %latch.7 @@ -2298,7 +2298,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-BLOCK-NEXT: %add = add nsw i32 %2, %sum.02 ; EPILOG-BLOCK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-BLOCK-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 +; EPILOG-BLOCK-NEXT: br label %for.exiting_block.1 ; EPILOG-BLOCK: for.exiting_block.1: ; EPILOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-BLOCK-NEXT: br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1 @@ -2397,7 +2397,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %3 = load i32, i32* %arrayidx, align 4 ; PROLOG-NEXT: %add = add nsw i32 %3, %sum.02 ; PROLOG-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 +; PROLOG-NEXT: br label %for.exiting_block.1 ; PROLOG: for.exiting_block.1: ; PROLOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.1, label %latchExit.unr-lcssa.loopexit, label %latch.1 @@ -2406,7 +2406,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %4 = load i32, i32* %arrayidx.1, align 4 ; PROLOG-NEXT: %add.1 = add nsw i32 %4, %add ; PROLOG-NEXT: %indvars.iv.next.1 = add i64 %indvars.iv.next, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.2 +; PROLOG-NEXT: br label %for.exiting_block.2 ; PROLOG: for.exiting_block.2: ; PROLOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.2, label %latchExit.unr-lcssa.loopexit, label %latch.2 @@ -2415,7 +2415,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %5 = load i32, i32* %arrayidx.2, align 4 ; PROLOG-NEXT: %add.2 = add nsw i32 %5, %add.1 ; PROLOG-NEXT: %indvars.iv.next.2 = add i64 %indvars.iv.next.1, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.3 +; PROLOG-NEXT: br label %for.exiting_block.3 ; PROLOG: for.exiting_block.3: ; PROLOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.3, label %latchExit.unr-lcssa.loopexit, label %latch.3 @@ -2424,7 +2424,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %6 = load i32, i32* %arrayidx.3, align 4 ; PROLOG-NEXT: %add.3 = add nsw i32 %6, %add.2 ; PROLOG-NEXT: %indvars.iv.next.3 = add i64 %indvars.iv.next.2, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.4 +; PROLOG-NEXT: br label %for.exiting_block.4 ; PROLOG: for.exiting_block.4: ; PROLOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.4, label %latchExit.unr-lcssa.loopexit, label %latch.4 @@ -2433,7 +2433,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %7 = load i32, i32* %arrayidx.4, align 4 ; PROLOG-NEXT: %add.4 = add nsw i32 %7, %add.3 ; PROLOG-NEXT: %indvars.iv.next.4 = add i64 %indvars.iv.next.3, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.5 +; PROLOG-NEXT: br label %for.exiting_block.5 ; PROLOG: for.exiting_block.5: ; PROLOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.5, label %latchExit.unr-lcssa.loopexit, label %latch.5 @@ -2442,7 +2442,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %8 = load i32, i32* %arrayidx.5, align 4 ; PROLOG-NEXT: %add.5 = add nsw i32 %8, %add.4 ; PROLOG-NEXT: %indvars.iv.next.5 = add i64 %indvars.iv.next.4, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.6 +; PROLOG-NEXT: br label %for.exiting_block.6 ; PROLOG: for.exiting_block.6: ; PROLOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.6, label %latchExit.unr-lcssa.loopexit, label %latch.6 @@ -2451,7 +2451,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %9 = load i32, i32* %arrayidx.6, align 4 ; PROLOG-NEXT: %add.6 = add nsw i32 %9, %add.5 ; PROLOG-NEXT: %indvars.iv.next.6 = add i64 %indvars.iv.next.5, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.7 +; PROLOG-NEXT: br label %for.exiting_block.7 ; PROLOG: for.exiting_block.7: ; PROLOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.7, label %latchExit.unr-lcssa.loopexit, label %latch.7 @@ -2517,7 +2517,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx, align 4 ; PROLOG-BLOCK-NEXT: %add = add nsw i32 %3, %sum.02 ; PROLOG-BLOCK-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-BLOCK-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 +; PROLOG-BLOCK-NEXT: br label %for.exiting_block.1 ; PROLOG-BLOCK: for.exiting_block.1: ; PROLOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-BLOCK-NEXT: br i1 %cmp.1, label %latchExit.unr-lcssa.loopexit, label %latch.1 @@ -2600,7 +2600,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG: latch: ; EPILOG-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 +; EPILOG-NEXT: br label %for.exiting_block.1 ; EPILOG: for.exiting_block.1: ; EPILOG-NEXT: %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next ; EPILOG-NEXT: %3 = load i32, i32* %arrayidx.1, align 4 @@ -2610,7 +2610,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG: latch.1: ; EPILOG-NEXT: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.2 +; EPILOG-NEXT: br label %for.exiting_block.2 ; EPILOG: for.exiting_block.2: ; EPILOG-NEXT: %arrayidx.2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.1 ; EPILOG-NEXT: %4 = load i32, i32* %arrayidx.2, align 4 @@ -2620,7 +2620,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG: latch.2: ; EPILOG-NEXT: %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.3 +; EPILOG-NEXT: br label %for.exiting_block.3 ; EPILOG: for.exiting_block.3: ; EPILOG-NEXT: %arrayidx.3 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.2 ; EPILOG-NEXT: %5 = load i32, i32* %arrayidx.3, align 4 @@ -2630,7 +2630,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG: latch.3: ; EPILOG-NEXT: %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.4 +; EPILOG-NEXT: br label %for.exiting_block.4 ; EPILOG: for.exiting_block.4: ; EPILOG-NEXT: %arrayidx.4 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.3 ; EPILOG-NEXT: %6 = load i32, i32* %arrayidx.4, align 4 @@ -2640,7 +2640,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG: latch.4: ; EPILOG-NEXT: %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.5 +; EPILOG-NEXT: br label %for.exiting_block.5 ; EPILOG: for.exiting_block.5: ; EPILOG-NEXT: %arrayidx.5 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.4 ; EPILOG-NEXT: %7 = load i32, i32* %arrayidx.5, align 4 @@ -2650,7 +2650,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG: latch.5: ; EPILOG-NEXT: %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.6 +; EPILOG-NEXT: br label %for.exiting_block.6 ; EPILOG: for.exiting_block.6: ; EPILOG-NEXT: %arrayidx.6 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.5 ; EPILOG-NEXT: %8 = load i32, i32* %arrayidx.6, align 4 @@ -2660,7 +2660,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG: latch.6: ; EPILOG-NEXT: %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 -; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.7 +; EPILOG-NEXT: br label %for.exiting_block.7 ; EPILOG: for.exiting_block.7: ; EPILOG-NEXT: %arrayidx.7 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.6 ; EPILOG-NEXT: %9 = load i32, i32* %arrayidx.7, align 4 @@ -2744,7 +2744,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-BLOCK: latch: ; EPILOG-BLOCK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-BLOCK-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 +; EPILOG-BLOCK-NEXT: br label %for.exiting_block.1 ; EPILOG-BLOCK: for.exiting_block.1: ; EPILOG-BLOCK-NEXT: %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next ; EPILOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx.1, align 4 @@ -2843,7 +2843,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: br i1 %cmp, label %latchExit.unr-lcssa.loopexit, label %latch ; PROLOG: latch: ; PROLOG-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 +; PROLOG-NEXT: br label %for.exiting_block.1 ; PROLOG: for.exiting_block.1: ; PROLOG-NEXT: %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next ; PROLOG-NEXT: %4 = load i32, i32* %arrayidx.1, align 4 @@ -2852,7 +2852,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: br i1 %cmp.1, label %latchExit.unr-lcssa.loopexit, label %latch.1 ; PROLOG: latch.1: ; PROLOG-NEXT: %indvars.iv.next.1 = add i64 %indvars.iv.next, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.2 +; PROLOG-NEXT: br label %for.exiting_block.2 ; PROLOG: for.exiting_block.2: ; PROLOG-NEXT: %arrayidx.2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.1 ; PROLOG-NEXT: %5 = load i32, i32* %arrayidx.2, align 4 @@ -2861,7 +2861,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: br i1 %cmp.2, label %latchExit.unr-lcssa.loopexit, label %latch.2 ; PROLOG: latch.2: ; PROLOG-NEXT: %indvars.iv.next.2 = add i64 %indvars.iv.next.1, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.3 +; PROLOG-NEXT: br label %for.exiting_block.3 ; PROLOG: for.exiting_block.3: ; PROLOG-NEXT: %arrayidx.3 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.2 ; PROLOG-NEXT: %6 = load i32, i32* %arrayidx.3, align 4 @@ -2870,7 +2870,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: br i1 %cmp.3, label %latchExit.unr-lcssa.loopexit, label %latch.3 ; PROLOG: latch.3: ; PROLOG-NEXT: %indvars.iv.next.3 = add i64 %indvars.iv.next.2, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.4 +; PROLOG-NEXT: br label %for.exiting_block.4 ; PROLOG: for.exiting_block.4: ; PROLOG-NEXT: %arrayidx.4 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.3 ; PROLOG-NEXT: %7 = load i32, i32* %arrayidx.4, align 4 @@ -2879,7 +2879,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: br i1 %cmp.4, label %latchExit.unr-lcssa.loopexit, label %latch.4 ; PROLOG: latch.4: ; PROLOG-NEXT: %indvars.iv.next.4 = add i64 %indvars.iv.next.3, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.5 +; PROLOG-NEXT: br label %for.exiting_block.5 ; PROLOG: for.exiting_block.5: ; PROLOG-NEXT: %arrayidx.5 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.4 ; PROLOG-NEXT: %8 = load i32, i32* %arrayidx.5, align 4 @@ -2888,7 +2888,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: br i1 %cmp.5, label %latchExit.unr-lcssa.loopexit, label %latch.5 ; PROLOG: latch.5: ; PROLOG-NEXT: %indvars.iv.next.5 = add i64 %indvars.iv.next.4, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.6 +; PROLOG-NEXT: br label %for.exiting_block.6 ; PROLOG: for.exiting_block.6: ; PROLOG-NEXT: %arrayidx.6 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.5 ; PROLOG-NEXT: %9 = load i32, i32* %arrayidx.6, align 4 @@ -2897,7 +2897,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: br i1 %cmp.6, label %latchExit.unr-lcssa.loopexit, label %latch.6 ; PROLOG: latch.6: ; PROLOG-NEXT: %indvars.iv.next.6 = add i64 %indvars.iv.next.5, 1 -; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.7 +; PROLOG-NEXT: br label %for.exiting_block.7 ; PROLOG: for.exiting_block.7: ; PROLOG-NEXT: %arrayidx.7 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.6 ; PROLOG-NEXT: %10 = load i32, i32* %arrayidx.7, align 4 @@ -2963,7 +2963,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-BLOCK-NEXT: br i1 %cmp, label %latchExit.unr-lcssa.loopexit, label %latch ; PROLOG-BLOCK: latch: ; PROLOG-BLOCK-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-BLOCK-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 +; PROLOG-BLOCK-NEXT: br label %for.exiting_block.1 ; PROLOG-BLOCK: for.exiting_block.1: ; PROLOG-BLOCK-NEXT: %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next ; PROLOG-BLOCK-NEXT: %4 = load i32, i32* %arrayidx.1, align 4 @@ -3837,7 +3837,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-NEXT: %add = add nsw i32 %load, %sum.02 ; EPILOG-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1 +; EPILOG-NEXT: br label %for.exiting_block.1 ; EPILOG: for.exiting_block.1: ; EPILOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1 @@ -3847,7 +3847,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-NEXT: %add.1 = add nsw i32 %load.1, %add ; EPILOG-NEXT: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 -; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.2 +; EPILOG-NEXT: br label %for.exiting_block.2 ; EPILOG: for.exiting_block.2: ; EPILOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.2, label %for.exit2.loopexit, label %latch.2 @@ -3857,7 +3857,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-NEXT: %add.2 = add nsw i32 %load.2, %add.1 ; EPILOG-NEXT: %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 -; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.3 +; EPILOG-NEXT: br label %for.exiting_block.3 ; EPILOG: for.exiting_block.3: ; EPILOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.3, label %for.exit2.loopexit, label %latch.3 @@ -3867,7 +3867,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-NEXT: %add.3 = add nsw i32 %load.3, %add.2 ; EPILOG-NEXT: %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 -; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.4 +; EPILOG-NEXT: br label %for.exiting_block.4 ; EPILOG: for.exiting_block.4: ; EPILOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.4, label %for.exit2.loopexit, label %latch.4 @@ -3877,7 +3877,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-NEXT: %add.4 = add nsw i32 %load.4, %add.3 ; EPILOG-NEXT: %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 -; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.5 +; EPILOG-NEXT: br label %for.exiting_block.5 ; EPILOG: for.exiting_block.5: ; EPILOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.5, label %for.exit2.loopexit, label %latch.5 @@ -3887,7 +3887,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-NEXT: %add.5 = add nsw i32 %load.5, %add.4 ; EPILOG-NEXT: %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 -; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.6 +; EPILOG-NEXT: br label %for.exiting_block.6 ; EPILOG: for.exiting_block.6: ; EPILOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.6, label %for.exit2.loopexit, label %latch.6 @@ -3897,7 +3897,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-NEXT: %add.6 = add nsw i32 %load.6, %add.5 ; EPILOG-NEXT: %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 -; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.7 +; EPILOG-NEXT: br label %for.exiting_block.7 ; EPILOG: for.exiting_block.7: ; EPILOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.7, label %for.exit2.loopexit, label %latch.7 @@ -3946,7 +3946,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ], [ %sum.0.lcssa.ph1, %latch_exit.epilog-lcssa ] ; EPILOG-NEXT: ret i32 %sum.0.lcssa ; EPILOG: for.exit2.loopexit: -; EPILOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ], [ 42, %for.exiting_block.3 ], [ %add.3, %latch.3 ], [ 42, %for.exiting_block.4 ], [ %add.4, %latch.4 ], [ 42, %for.exiting_block.5 ], [ %add.5, %latch.5 ], [ 42, %for.exiting_block.6 ], [ %add.6, %latch.6 ], [ 42, %for.exiting_block.7 ] +; EPILOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ 42, %for.exiting_block.1 ], [ 42, %for.exiting_block.2 ], [ 42, %for.exiting_block.3 ], [ 42, %for.exiting_block.4 ], [ 42, %for.exiting_block.5 ], [ 42, %for.exiting_block.6 ], [ 42, %for.exiting_block.7 ] ; EPILOG-NEXT: br label %for.exit2 ; EPILOG: for.exit2.loopexit2: ; EPILOG-NEXT: %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ] @@ -3983,7 +3983,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-BLOCK-NEXT: %add = add nsw i32 %load, %sum.02 ; EPILOG-BLOCK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-BLOCK-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1 +; EPILOG-BLOCK-NEXT: br label %for.exiting_block.1 ; EPILOG-BLOCK: for.exiting_block.1: ; EPILOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-BLOCK-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1 @@ -4022,7 +4022,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-BLOCK-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ], [ %add.epil, %latch.epil ] ; EPILOG-BLOCK-NEXT: ret i32 %sum.0.lcssa ; EPILOG-BLOCK: for.exit2.loopexit: -; EPILOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ] +; EPILOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ 42, %for.exiting_block.1 ] ; EPILOG-BLOCK-NEXT: br label %for.exit2 ; EPILOG-BLOCK: for.exit2: ; EPILOG-BLOCK-NEXT: %retval = phi i32 [ %sum.02.unr, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ] @@ -4083,7 +4083,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-NEXT: %load = load i32, i32* %arrayidx, align 4 ; PROLOG-NEXT: %add = add nsw i32 %load, %sum.02 ; PROLOG-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1 +; PROLOG-NEXT: br label %for.exiting_block.1 ; PROLOG: for.exiting_block.1: ; PROLOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1 @@ -4092,7 +4092,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-NEXT: %load.1 = load i32, i32* %arrayidx.1, align 4 ; PROLOG-NEXT: %add.1 = add nsw i32 %load.1, %add ; PROLOG-NEXT: %indvars.iv.next.1 = add i64 %indvars.iv.next, 1 -; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.2 +; PROLOG-NEXT: br label %for.exiting_block.2 ; PROLOG: for.exiting_block.2: ; PROLOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.2, label %for.exit2.loopexit, label %latch.2 @@ -4101,7 +4101,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-NEXT: %load.2 = load i32, i32* %arrayidx.2, align 4 ; PROLOG-NEXT: %add.2 = add nsw i32 %load.2, %add.1 ; PROLOG-NEXT: %indvars.iv.next.2 = add i64 %indvars.iv.next.1, 1 -; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.3 +; PROLOG-NEXT: br label %for.exiting_block.3 ; PROLOG: for.exiting_block.3: ; PROLOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.3, label %for.exit2.loopexit, label %latch.3 @@ -4110,7 +4110,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-NEXT: %load.3 = load i32, i32* %arrayidx.3, align 4 ; PROLOG-NEXT: %add.3 = add nsw i32 %load.3, %add.2 ; PROLOG-NEXT: %indvars.iv.next.3 = add i64 %indvars.iv.next.2, 1 -; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.4 +; PROLOG-NEXT: br label %for.exiting_block.4 ; PROLOG: for.exiting_block.4: ; PROLOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.4, label %for.exit2.loopexit, label %latch.4 @@ -4119,7 +4119,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-NEXT: %load.4 = load i32, i32* %arrayidx.4, align 4 ; PROLOG-NEXT: %add.4 = add nsw i32 %load.4, %add.3 ; PROLOG-NEXT: %indvars.iv.next.4 = add i64 %indvars.iv.next.3, 1 -; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.5 +; PROLOG-NEXT: br label %for.exiting_block.5 ; PROLOG: for.exiting_block.5: ; PROLOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.5, label %for.exit2.loopexit, label %latch.5 @@ -4128,7 +4128,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-NEXT: %load.5 = load i32, i32* %arrayidx.5, align 4 ; PROLOG-NEXT: %add.5 = add nsw i32 %load.5, %add.4 ; PROLOG-NEXT: %indvars.iv.next.5 = add i64 %indvars.iv.next.4, 1 -; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.6 +; PROLOG-NEXT: br label %for.exiting_block.6 ; PROLOG: for.exiting_block.6: ; PROLOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.6, label %for.exit2.loopexit, label %latch.6 @@ -4137,7 +4137,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-NEXT: %load.6 = load i32, i32* %arrayidx.6, align 4 ; PROLOG-NEXT: %add.6 = add nsw i32 %load.6, %add.5 ; PROLOG-NEXT: %indvars.iv.next.6 = add i64 %indvars.iv.next.5, 1 -; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.7 +; PROLOG-NEXT: br label %for.exiting_block.7 ; PROLOG: for.exiting_block.7: ; PROLOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.7, label %for.exit2.loopexit, label %latch.7 @@ -4155,7 +4155,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.unr, %header.prol.loopexit ], [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ] ; PROLOG-NEXT: ret i32 %sum.0.lcssa ; PROLOG: for.exit2.loopexit: -; PROLOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ], [ 42, %for.exiting_block.3 ], [ %add.3, %latch.3 ], [ 42, %for.exiting_block.4 ], [ %add.4, %latch.4 ], [ 42, %for.exiting_block.5 ], [ %add.5, %latch.5 ], [ 42, %for.exiting_block.6 ], [ %add.6, %latch.6 ], [ 42, %for.exiting_block.7 ] +; PROLOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ 42, %for.exiting_block.1 ], [ 42, %for.exiting_block.2 ], [ 42, %for.exiting_block.3 ], [ 42, %for.exiting_block.4 ], [ 42, %for.exiting_block.5 ], [ 42, %for.exiting_block.6 ], [ 42, %for.exiting_block.7 ] ; PROLOG-NEXT: br label %for.exit2 ; PROLOG: for.exit2.loopexit1: ; PROLOG-NEXT: %retval.ph2 = phi i32 [ 42, %for.exiting_block.prol ], [ %sum.02.prol, %header.prol ] @@ -4205,7 +4205,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-BLOCK-NEXT: %load = load i32, i32* %arrayidx, align 4 ; PROLOG-BLOCK-NEXT: %add = add nsw i32 %load, %sum.02 ; PROLOG-BLOCK-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-BLOCK-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1 +; PROLOG-BLOCK-NEXT: br label %for.exiting_block.1 ; PROLOG-BLOCK: for.exiting_block.1: ; PROLOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-BLOCK-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1 @@ -4223,7 +4223,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-BLOCK-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.unr, %header.prol.loopexit ], [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ] ; PROLOG-BLOCK-NEXT: ret i32 %sum.0.lcssa ; PROLOG-BLOCK: for.exit2.loopexit: -; PROLOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ] +; PROLOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ 42, %for.exiting_block.1 ] ; PROLOG-BLOCK-NEXT: br label %for.exit2 ; PROLOG-BLOCK: for.exit2: ; PROLOG-BLOCK-NEXT: %retval = phi i32 [ 0, %header.prol ], [ 42, %for.exiting_block.prol ], [ %retval.ph, %for.exit2.loopexit ] @@ -4295,31 +4295,31 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; EPILOG: latch: ; EPILOG-NEXT: %add = add nuw nsw i64 %i6, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.1 +; EPILOG-NEXT: br label %latch.1 ; EPILOG: latch.1: ; EPILOG-NEXT: %add.1 = add nuw nsw i64 %add, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 -; EPILOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.2 +; EPILOG-NEXT: br label %latch.2 ; EPILOG: latch.2: ; EPILOG-NEXT: %add.2 = add nuw nsw i64 %add.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 -; EPILOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.3 +; EPILOG-NEXT: br label %latch.3 ; EPILOG: latch.3: ; EPILOG-NEXT: %add.3 = add nuw nsw i64 %add.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 -; EPILOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.4 +; EPILOG-NEXT: br label %latch.4 ; EPILOG: latch.4: ; EPILOG-NEXT: %add.4 = add nuw nsw i64 %add.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 -; EPILOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.5 +; EPILOG-NEXT: br label %latch.5 ; EPILOG: latch.5: ; EPILOG-NEXT: %add.5 = add nuw nsw i64 %add.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 -; EPILOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.6 +; EPILOG-NEXT: br label %latch.6 ; EPILOG: latch.6: ; EPILOG-NEXT: %add.6 = add nuw nsw i64 %add.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 -; EPILOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.7 +; EPILOG-NEXT: br label %latch.7 ; EPILOG: latch.7: ; EPILOG-NEXT: %add.7 = add nuw nsw i64 %add.6, 1 ; EPILOG-NEXT: %niter.next.7 = add i64 %niter.next.6, 1 @@ -4351,7 +4351,7 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; EPILOG: loopexit2: ; EPILOG-NEXT: ret i32 %shft ; EPILOG: loopexit1.loopexit: -; EPILOG-NEXT: %sext3.ph = phi i32 [ %shft, %header ], [ %shft, %latch ], [ %shft, %latch.1 ], [ %shft, %latch.2 ], [ %shft, %latch.3 ], [ %shft, %latch.4 ], [ %shft, %latch.5 ], [ %shft, %latch.6 ] +; EPILOG-NEXT: %sext3.ph = phi i32 [ %shft, %header ] ; EPILOG-NEXT: br label %loopexit1 ; EPILOG: loopexit1.loopexit1: ; EPILOG-NEXT: %sext3.ph2 = phi i32 [ %shft, %header.epil ] @@ -4382,7 +4382,7 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; EPILOG-BLOCK: latch: ; EPILOG-BLOCK-NEXT: %add = add nuw nsw i64 %i6, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-BLOCK-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.1 +; EPILOG-BLOCK-NEXT: br label %latch.1 ; EPILOG-BLOCK: latch.1: ; EPILOG-BLOCK-NEXT: %add.1 = add nuw nsw i64 %add, 1 ; EPILOG-BLOCK-NEXT: %niter.next.1 = add i64 %niter.next, 1 @@ -4404,7 +4404,7 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; EPILOG-BLOCK: loopexit2: ; EPILOG-BLOCK-NEXT: ret i32 %shft ; EPILOG-BLOCK: loopexit1.loopexit: -; EPILOG-BLOCK-NEXT: %sext3.ph = phi i32 [ %shft, %header ], [ %shft, %latch ] +; EPILOG-BLOCK-NEXT: %sext3.ph = phi i32 [ %shft, %header ] ; EPILOG-BLOCK-NEXT: br label %loopexit1 ; EPILOG-BLOCK: loopexit1: ; EPILOG-BLOCK-NEXT: %sext3 = phi i32 [ %shft, %header.epil ], [ %sext3.ph, %loopexit1.loopexit ] @@ -4448,25 +4448,25 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; PROLOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch ; PROLOG: latch: ; PROLOG-NEXT: %add = add nuw nsw i64 %i6, 1 -; PROLOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.1 +; PROLOG-NEXT: br label %latch.1 ; PROLOG: latch.1: ; PROLOG-NEXT: %add.1 = add nuw nsw i64 %add, 1 -; PROLOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.2 +; PROLOG-NEXT: br label %latch.2 ; PROLOG: latch.2: ; PROLOG-NEXT: %add.2 = add nuw nsw i64 %add.1, 1 -; PROLOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.3 +; PROLOG-NEXT: br label %latch.3 ; PROLOG: latch.3: ; PROLOG-NEXT: %add.3 = add nuw nsw i64 %add.2, 1 -; PROLOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.4 +; PROLOG-NEXT: br label %latch.4 ; PROLOG: latch.4: ; PROLOG-NEXT: %add.4 = add nuw nsw i64 %add.3, 1 -; PROLOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.5 +; PROLOG-NEXT: br label %latch.5 ; PROLOG: latch.5: ; PROLOG-NEXT: %add.5 = add nuw nsw i64 %add.4, 1 -; PROLOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.6 +; PROLOG-NEXT: br label %latch.6 ; PROLOG: latch.6: ; PROLOG-NEXT: %add.6 = add nuw nsw i64 %add.5, 1 -; PROLOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.7 +; PROLOG-NEXT: br label %latch.7 ; PROLOG: latch.7: ; PROLOG-NEXT: %add.7 = add nuw nsw i64 %add.6, 1 ; PROLOG-NEXT: %i9.7 = icmp slt i64 %add.7, %sext @@ -4478,7 +4478,7 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; PROLOG: loopexit2: ; PROLOG-NEXT: ret i32 %shft ; PROLOG: loopexit1.loopexit: -; PROLOG-NEXT: %sext3.ph = phi i32 [ %shft, %header ], [ %shft, %latch ], [ %shft, %latch.1 ], [ %shft, %latch.2 ], [ %shft, %latch.3 ], [ %shft, %latch.4 ], [ %shft, %latch.5 ], [ %shft, %latch.6 ] +; PROLOG-NEXT: %sext3.ph = phi i32 [ %shft, %header ] ; PROLOG-NEXT: br label %loopexit1 ; PROLOG: loopexit1.loopexit1: ; PROLOG-NEXT: %sext3.ph2 = phi i32 [ %shft, %header.prol ] @@ -4516,7 +4516,7 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; PROLOG-BLOCK-NEXT: br i1 false, label %loopexit1.loopexit, label %latch ; PROLOG-BLOCK: latch: ; PROLOG-BLOCK-NEXT: %add = add nuw nsw i64 %i6, 1 -; PROLOG-BLOCK-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.1 +; PROLOG-BLOCK-NEXT: br label %latch.1 ; PROLOG-BLOCK: latch.1: ; PROLOG-BLOCK-NEXT: %add.1 = add nuw nsw i64 %add, 1 ; PROLOG-BLOCK-NEXT: %i9.1 = icmp slt i64 %add.1, %sext @@ -4528,7 +4528,7 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; PROLOG-BLOCK: loopexit2: ; PROLOG-BLOCK-NEXT: ret i32 %shft ; PROLOG-BLOCK: loopexit1.loopexit: -; PROLOG-BLOCK-NEXT: %sext3.ph = phi i32 [ %shft, %header ], [ %shft, %latch ] +; PROLOG-BLOCK-NEXT: %sext3.ph = phi i32 [ %shft, %header ] ; PROLOG-BLOCK-NEXT: br label %loopexit1 ; PROLOG-BLOCK: loopexit1: ; PROLOG-BLOCK-NEXT: %sext3 = phi i32 [ %shft, %header.prol ], [ %sext3.ph, %loopexit1.loopexit ] @@ -4594,31 +4594,31 @@ define void @test8() { ; EPILOG: latch: ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 ; EPILOG-NEXT: %i4.1 = add nuw nsw i64 %i4, 1 -; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.1 +; EPILOG-NEXT: br label %latch.1 ; EPILOG: latch.1: ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 ; EPILOG-NEXT: %i4.2 = add nuw nsw i64 %i4.1, 1 -; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.2 +; EPILOG-NEXT: br label %latch.2 ; EPILOG: latch.2: ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 ; EPILOG-NEXT: %i4.3 = add nuw nsw i64 %i4.2, 1 -; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.3 +; EPILOG-NEXT: br label %latch.3 ; EPILOG: latch.3: ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 ; EPILOG-NEXT: %i4.4 = add nuw nsw i64 %i4.3, 1 -; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.4 +; EPILOG-NEXT: br label %latch.4 ; EPILOG: latch.4: ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 ; EPILOG-NEXT: %i4.5 = add nuw nsw i64 %i4.4, 1 -; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.5 +; EPILOG-NEXT: br label %latch.5 ; EPILOG: latch.5: ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 ; EPILOG-NEXT: %i4.6 = add nuw nsw i64 %i4.5, 1 -; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.6 +; EPILOG-NEXT: br label %latch.6 ; EPILOG: latch.6: ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 ; EPILOG-NEXT: %i4.7 = add nuw nsw i64 %i4.6, 1 -; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.7 +; EPILOG-NEXT: br label %latch.7 ; EPILOG: latch.7: ; EPILOG-NEXT: %niter.next.7 = add i64 %niter.next.6, 1 ; EPILOG-NEXT: %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter @@ -4666,7 +4666,7 @@ define void @test8() { ; EPILOG-BLOCK: latch.14: ; EPILOG-BLOCK-NEXT: %niter.next.13 = add nuw nsw i64 %niter.1, 1 ; EPILOG-BLOCK-NEXT: %i4.1.1 = add nuw nsw i64 %i4.12, 1 -; EPILOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.1.1 +; EPILOG-BLOCK-NEXT: br label %latch.1.1 ; EPILOG-BLOCK: latch.1.1: ; EPILOG-BLOCK-NEXT: %niter.next.1.1 = add i64 %niter.next.13, 1 ; EPILOG-BLOCK-NEXT: %niter.ncmp.1.1 = icmp ne i64 %niter.next.1.1, 100 @@ -4701,7 +4701,7 @@ define void @test8() { ; EPILOG-BLOCK: latch: ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 ; EPILOG-BLOCK-NEXT: %i4.1 = add nuw nsw i64 %i4, 1 -; EPILOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.1 +; EPILOG-BLOCK-NEXT: br label %latch.1 ; EPILOG-BLOCK: latch.1: ; EPILOG-BLOCK-NEXT: %niter.next.1 = add i64 %niter.next, 1 ; EPILOG-BLOCK-NEXT: %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter @@ -4765,25 +4765,25 @@ define void @test8() { ; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch ; PROLOG: latch: ; PROLOG-NEXT: %i4.1 = add nuw nsw i64 %i4, 1 -; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.1 +; PROLOG-NEXT: br label %latch.1 ; PROLOG: latch.1: ; PROLOG-NEXT: %i4.2 = add nuw nsw i64 %i4.1, 1 -; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.2 +; PROLOG-NEXT: br label %latch.2 ; PROLOG: latch.2: ; PROLOG-NEXT: %i4.3 = add nuw nsw i64 %i4.2, 1 -; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.3 +; PROLOG-NEXT: br label %latch.3 ; PROLOG: latch.3: ; PROLOG-NEXT: %i4.4 = add nuw nsw i64 %i4.3, 1 -; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.4 +; PROLOG-NEXT: br label %latch.4 ; PROLOG: latch.4: ; PROLOG-NEXT: %i4.5 = add nuw nsw i64 %i4.4, 1 -; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.5 +; PROLOG-NEXT: br label %latch.5 ; PROLOG: latch.5: ; PROLOG-NEXT: %i4.6 = add nuw nsw i64 %i4.5, 1 -; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.6 +; PROLOG-NEXT: br label %latch.6 ; PROLOG: latch.6: ; PROLOG-NEXT: %i4.7 = add nuw nsw i64 %i4.6, 1 -; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.7 +; PROLOG-NEXT: br label %latch.7 ; PROLOG: latch.7: ; PROLOG-NEXT: %i6.7 = icmp ult i64 %i4.7, 100 ; PROLOG-NEXT: br i1 %i6.7, label %innerH, label %exit.unr-lcssa @@ -4818,7 +4818,7 @@ define void @test8() { ; PROLOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.12 ; PROLOG-BLOCK: latch.12: ; PROLOG-BLOCK-NEXT: %i4.1.1 = add nuw nsw i64 %i4.11, 1 -; PROLOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.1.1 +; PROLOG-BLOCK-NEXT: br label %latch.1.1 ; PROLOG-BLOCK: latch.1.1: ; PROLOG-BLOCK-NEXT: %i6.1.1 = icmp ult i64 %i4.1.1, 100 ; PROLOG-BLOCK-NEXT: br i1 %i6.1.1, label %innerH.1, label %exit.unr-lcssa.loopexit3, !llvm.loop !12 @@ -4852,7 +4852,7 @@ define void @test8() { ; PROLOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch ; PROLOG-BLOCK: latch: ; PROLOG-BLOCK-NEXT: %i4.1 = add nuw nsw i64 %i4, 1 -; PROLOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.1 +; PROLOG-BLOCK-NEXT: br label %latch.1 ; PROLOG-BLOCK: latch.1: ; PROLOG-BLOCK-NEXT: %i6.1 = icmp ult i64 %i4.1, 100 ; PROLOG-BLOCK-NEXT: br i1 %i6.1, label %innerH, label %exit.unr-lcssa.loopexit, !llvm.loop !12 @@ -4915,7 +4915,7 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; EPILOG-NEXT: %niter = phi i32 [ 0, %preheader.new ], [ %niter.next.7, %latch.7 ] ; EPILOG-NEXT: br i1 true, label %latch, label %innerexit.loopexit ; EPILOG: innerexit.loopexit: -; EPILOG-NEXT: %trip.lcssa.ph = phi i32 [ %trip, %header ], [ %trip, %latch ], [ %trip, %latch.1 ], [ %trip, %latch.2 ], [ %trip, %latch.3 ], [ %trip, %latch.4 ], [ %trip, %latch.5 ], [ %trip, %latch.6 ] +; EPILOG-NEXT: %trip.lcssa.ph = phi i32 [ %trip, %header ] ; EPILOG-NEXT: br label %innerexit ; EPILOG: innerexit.loopexit1: ; EPILOG-NEXT: %trip.lcssa.ph2 = phi i32 [ %trip, %header.epil ] @@ -4927,31 +4927,31 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; EPILOG: latch: ; EPILOG-NEXT: %iv.next = add nuw nsw i64 %phi, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i32 %niter, 1 -; EPILOG-NEXT: br i1 true, label %latch.1, label %innerexit.loopexit +; EPILOG-NEXT: br label %latch.1 ; EPILOG: latch.1: ; EPILOG-NEXT: %iv.next.1 = add nuw nsw i64 %iv.next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i32 %niter.next, 1 -; EPILOG-NEXT: br i1 true, label %latch.2, label %innerexit.loopexit +; EPILOG-NEXT: br label %latch.2 ; EPILOG: latch.2: ; EPILOG-NEXT: %iv.next.2 = add nuw nsw i64 %iv.next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i32 %niter.next.1, 1 -; EPILOG-NEXT: br i1 true, label %latch.3, label %innerexit.loopexit +; EPILOG-NEXT: br label %latch.3 ; EPILOG: latch.3: ; EPILOG-NEXT: %iv.next.3 = add nuw nsw i64 %iv.next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i32 %niter.next.2, 1 -; EPILOG-NEXT: br i1 true, label %latch.4, label %innerexit.loopexit +; EPILOG-NEXT: br label %latch.4 ; EPILOG: latch.4: ; EPILOG-NEXT: %iv.next.4 = add nuw nsw i64 %iv.next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i32 %niter.next.3, 1 -; EPILOG-NEXT: br i1 true, label %latch.5, label %innerexit.loopexit +; EPILOG-NEXT: br label %latch.5 ; EPILOG: latch.5: ; EPILOG-NEXT: %iv.next.5 = add nuw nsw i64 %iv.next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i32 %niter.next.4, 1 -; EPILOG-NEXT: br i1 true, label %latch.6, label %innerexit.loopexit +; EPILOG-NEXT: br label %latch.6 ; EPILOG: latch.6: ; EPILOG-NEXT: %iv.next.6 = add nuw nsw i64 %iv.next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i32 %niter.next.5, 1 -; EPILOG-NEXT: br i1 true, label %latch.7, label %innerexit.loopexit +; EPILOG-NEXT: br label %latch.7 ; EPILOG: latch.7: ; EPILOG-NEXT: %iv.next.7 = add nuw nsw i64 %iv.next.6, 1 ; EPILOG-NEXT: %niter.next.7 = add i32 %niter.next.6, 1 @@ -5006,10 +5006,10 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; EPILOG-BLOCK-NEXT: %niter = phi i32 [ 0, %preheader.new ], [ %niter.next.1, %latch.1 ] ; EPILOG-BLOCK-NEXT: br i1 true, label %latch, label %innerexit.loopexit.loopexit ; EPILOG-BLOCK: innerexit.loopexit.loopexit: -; EPILOG-BLOCK-NEXT: %trip.lcssa.ph.ph = phi i32 [ %trip, %latch ], [ %trip, %header ] +; EPILOG-BLOCK-NEXT: %trip.lcssa.ph.ph = phi i32 [ %trip, %header ] ; EPILOG-BLOCK-NEXT: br label %innerexit.loopexit ; EPILOG-BLOCK: innerexit.loopexit.loopexit6: -; EPILOG-BLOCK-NEXT: %trip.lcssa.ph.ph7 = phi i32 [ %trip.1, %latch.15 ], [ %trip.1, %header.1 ] +; EPILOG-BLOCK-NEXT: %trip.lcssa.ph.ph7 = phi i32 [ %trip.1, %header.1 ] ; EPILOG-BLOCK-NEXT: br label %innerexit.loopexit ; EPILOG-BLOCK: innerexit.loopexit: ; EPILOG-BLOCK-NEXT: %trip.lcssa.ph = phi i32 [ %trip.lcssa.ph.ph, %innerexit.loopexit.loopexit ], [ %trip.lcssa.ph.ph7, %innerexit.loopexit.loopexit6 ] @@ -5024,7 +5024,7 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; EPILOG-BLOCK: latch: ; EPILOG-BLOCK-NEXT: %iv.next = add nuw nsw i64 %phi, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i32 %niter, 1 -; EPILOG-BLOCK-NEXT: br i1 true, label %latch.1, label %innerexit.loopexit.loopexit +; EPILOG-BLOCK-NEXT: br label %latch.1 ; EPILOG-BLOCK: latch.1: ; EPILOG-BLOCK-NEXT: %iv.next.1 = add nuw nsw i64 %iv.next, 1 ; EPILOG-BLOCK-NEXT: %niter.next.1 = add i32 %niter.next, 1 @@ -5061,7 +5061,7 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; EPILOG-BLOCK: latch.15: ; EPILOG-BLOCK-NEXT: %iv.next.13 = add nuw nsw i64 %phi.1, 1 ; EPILOG-BLOCK-NEXT: %niter.next.14 = add nuw nsw i32 %niter.1, 1 -; EPILOG-BLOCK-NEXT: br i1 true, label %latch.1.1, label %innerexit.loopexit.loopexit6 +; EPILOG-BLOCK-NEXT: br label %latch.1.1 ; EPILOG-BLOCK: latch.1.1: ; EPILOG-BLOCK-NEXT: %iv.next.1.1 = add nuw nsw i64 %iv.next.13, 1 ; EPILOG-BLOCK-NEXT: %niter.next.1.1 = add i32 %niter.next.14, 1 @@ -5124,7 +5124,7 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; PROLOG-NEXT: %phi = phi i64 [ %phi.unr, %preheader.new ], [ %iv.next.7, %latch.7 ] ; PROLOG-NEXT: br i1 true, label %latch, label %innerexit.loopexit ; PROLOG: innerexit.loopexit: -; PROLOG-NEXT: %trip.lcssa.ph = phi i32 [ %trip, %header ], [ %trip, %latch ], [ %trip, %latch.1 ], [ %trip, %latch.2 ], [ %trip, %latch.3 ], [ %trip, %latch.4 ], [ %trip, %latch.5 ], [ %trip, %latch.6 ] +; PROLOG-NEXT: %trip.lcssa.ph = phi i32 [ %trip, %header ] ; PROLOG-NEXT: br label %innerexit ; PROLOG: innerexit.loopexit1: ; PROLOG-NEXT: %trip.lcssa.ph2 = phi i32 [ %trip, %header.prol ] @@ -5135,26 +5135,26 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; PROLOG-NEXT: ret i8 addrspace(1)* %i9 ; PROLOG: latch: ; PROLOG-NEXT: %iv.next = add nuw nsw i64 %phi, 1 -; PROLOG-NEXT: br i1 true, label %latch.1, label %innerexit.loopexit +; PROLOG-NEXT: br label %latch.1 ; PROLOG: latch.1: ; PROLOG-NEXT: %iv.next.1 = add nuw nsw i64 %iv.next, 1 -; PROLOG-NEXT: br i1 true, label %latch.2, label %innerexit.loopexit +; PROLOG-NEXT: br label %latch.2 ; PROLOG: latch.2: ; PROLOG-NEXT: %iv.next.2 = add nuw nsw i64 %iv.next.1, 1 -; PROLOG-NEXT: br i1 true, label %latch.3, label %innerexit.loopexit +; PROLOG-NEXT: br label %latch.3 ; PROLOG: latch.3: ; PROLOG-NEXT: %iv.next.3 = add nuw nsw i64 %iv.next.2, 1 -; PROLOG-NEXT: br i1 true, label %latch.4, label %innerexit.loopexit +; PROLOG-NEXT: br label %latch.4 ; PROLOG: latch.4: ; PROLOG-NEXT: %iv.next.4 = add nuw nsw i64 %iv.next.3, 1 -; PROLOG-NEXT: br i1 true, label %latch.5, label %innerexit.loopexit +; PROLOG-NEXT: br label %latch.5 ; PROLOG: latch.5: ; PROLOG-NEXT: %iv.next.5 = add nuw nsw i64 %iv.next.4, 1 -; PROLOG-NEXT: br i1 true, label %latch.6, label %innerexit.loopexit +; PROLOG-NEXT: br label %latch.6 ; PROLOG: latch.6: ; PROLOG-NEXT: %iv.next.6 = add nuw nsw i64 %iv.next.5, 1 ; PROLOG-NEXT: %i7.7 = trunc i64 %iv.next.6 to i32 -; PROLOG-NEXT: br i1 true, label %latch.7, label %innerexit.loopexit +; PROLOG-NEXT: br label %latch.7 ; PROLOG: latch.7: ; PROLOG-NEXT: %i11.7 = add nsw i32 %i7.7, 1 ; PROLOG-NEXT: %innercnd.7 = icmp slt i32 %i11.7, %trip @@ -5196,10 +5196,10 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; PROLOG-BLOCK-NEXT: %phi = phi i64 [ %phi.unr, %preheader.new ], [ %iv.next.1, %latch.1 ] ; PROLOG-BLOCK-NEXT: br i1 true, label %latch, label %innerexit.loopexit.loopexit ; PROLOG-BLOCK: innerexit.loopexit.loopexit: -; PROLOG-BLOCK-NEXT: %trip.lcssa.ph.ph = phi i32 [ %trip, %latch ], [ %trip, %header ] +; PROLOG-BLOCK-NEXT: %trip.lcssa.ph.ph = phi i32 [ %trip, %header ] ; PROLOG-BLOCK-NEXT: br label %innerexit.loopexit ; PROLOG-BLOCK: innerexit.loopexit.loopexit5: -; PROLOG-BLOCK-NEXT: %trip.lcssa.ph.ph6 = phi i32 [ %trip.1, %latch.14 ], [ %trip.1, %header.1 ] +; PROLOG-BLOCK-NEXT: %trip.lcssa.ph.ph6 = phi i32 [ %trip.1, %header.1 ] ; PROLOG-BLOCK-NEXT: br label %innerexit.loopexit ; PROLOG-BLOCK: innerexit.loopexit: ; PROLOG-BLOCK-NEXT: %trip.lcssa.ph = phi i32 [ %trip.lcssa.ph.ph, %innerexit.loopexit.loopexit ], [ %trip.lcssa.ph.ph6, %innerexit.loopexit.loopexit5 ] @@ -5214,7 +5214,7 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; PROLOG-BLOCK: latch: ; PROLOG-BLOCK-NEXT: %iv.next = add nuw nsw i64 %phi, 1 ; PROLOG-BLOCK-NEXT: %i7.1 = trunc i64 %iv.next to i32 -; PROLOG-BLOCK-NEXT: br i1 true, label %latch.1, label %innerexit.loopexit.loopexit +; PROLOG-BLOCK-NEXT: br label %latch.1 ; PROLOG-BLOCK: latch.1: ; PROLOG-BLOCK-NEXT: %i11.1 = add nsw i32 %i7.1, 1 ; PROLOG-BLOCK-NEXT: %innercnd.1 = icmp slt i32 %i11.1, %trip @@ -5250,7 +5250,7 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; PROLOG-BLOCK: latch.14: ; PROLOG-BLOCK-NEXT: %iv.next.13 = add nuw nsw i64 %phi.1, 1 ; PROLOG-BLOCK-NEXT: %i7.1.1 = trunc i64 %iv.next.13 to i32 -; PROLOG-BLOCK-NEXT: br i1 true, label %latch.1.1, label %innerexit.loopexit.loopexit5 +; PROLOG-BLOCK-NEXT: br label %latch.1.1 ; PROLOG-BLOCK: latch.1.1: ; PROLOG-BLOCK-NEXT: %i11.1.1 = add nsw i32 %i7.1.1, 1 ; PROLOG-BLOCK-NEXT: %innercnd.1.1 = icmp slt i32 %i11.1.1, %trip.1 @@ -5611,37 +5611,37 @@ define void @test11(i64 %trip, i1 %cond) { ; EPILOG-NEXT: %iv_next = add nuw nsw i64 %iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br i1 %cond, label %loop_latch.1, label %exit1.loopexit +; EPILOG-NEXT: br label %loop_latch.1 ; EPILOG: loop_latch.1: ; EPILOG-NEXT: %iv_next.1 = add nuw nsw i64 %iv_next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br i1 %cond, label %loop_latch.2, label %exit1.loopexit +; EPILOG-NEXT: br label %loop_latch.2 ; EPILOG: loop_latch.2: ; EPILOG-NEXT: %iv_next.2 = add nuw nsw i64 %iv_next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br i1 %cond, label %loop_latch.3, label %exit1.loopexit +; EPILOG-NEXT: br label %loop_latch.3 ; EPILOG: loop_latch.3: ; EPILOG-NEXT: %iv_next.3 = add nuw nsw i64 %iv_next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br i1 %cond, label %loop_latch.4, label %exit1.loopexit +; EPILOG-NEXT: br label %loop_latch.4 ; EPILOG: loop_latch.4: ; EPILOG-NEXT: %iv_next.4 = add nuw nsw i64 %iv_next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br i1 %cond, label %loop_latch.5, label %exit1.loopexit +; EPILOG-NEXT: br label %loop_latch.5 ; EPILOG: loop_latch.5: ; EPILOG-NEXT: %iv_next.5 = add nuw nsw i64 %iv_next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br i1 %cond, label %loop_latch.6, label %exit1.loopexit +; EPILOG-NEXT: br label %loop_latch.6 ; EPILOG: loop_latch.6: ; EPILOG-NEXT: %iv_next.6 = add nuw nsw i64 %iv_next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br i1 %cond, label %loop_latch.7, label %exit1.loopexit +; EPILOG-NEXT: br label %loop_latch.7 ; EPILOG: loop_latch.7: ; EPILOG-NEXT: %iv_next.7 = add i64 %iv_next.6, 1 ; EPILOG-NEXT: %niter.next.7 = add i64 %niter.next.6, 1 @@ -5696,7 +5696,7 @@ define void @test11(i64 %trip, i1 %cond) { ; EPILOG-BLOCK-NEXT: %iv_next = add nuw nsw i64 %iv, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 ; EPILOG-BLOCK-NEXT: call void @bar() -; EPILOG-BLOCK-NEXT: br i1 %cond, label %loop_latch.1, label %exit1.loopexit +; EPILOG-BLOCK-NEXT: br label %loop_latch.1 ; EPILOG-BLOCK: loop_latch.1: ; EPILOG-BLOCK-NEXT: %iv_next.1 = add i64 %iv_next, 1 ; EPILOG-BLOCK-NEXT: %niter.next.1 = add i64 %niter.next, 1 @@ -5756,31 +5756,31 @@ define void @test11(i64 %trip, i1 %cond) { ; PROLOG: loop_latch: ; PROLOG-NEXT: %iv_next = add i64 %iv, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br i1 %cond, label %loop_latch.1, label %exit1.loopexit +; PROLOG-NEXT: br label %loop_latch.1 ; PROLOG: loop_latch.1: ; PROLOG-NEXT: %iv_next.1 = add i64 %iv_next, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br i1 %cond, label %loop_latch.2, label %exit1.loopexit +; PROLOG-NEXT: br label %loop_latch.2 ; PROLOG: loop_latch.2: ; PROLOG-NEXT: %iv_next.2 = add i64 %iv_next.1, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br i1 %cond, label %loop_latch.3, label %exit1.loopexit +; PROLOG-NEXT: br label %loop_latch.3 ; PROLOG: loop_latch.3: ; PROLOG-NEXT: %iv_next.3 = add i64 %iv_next.2, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br i1 %cond, label %loop_latch.4, label %exit1.loopexit +; PROLOG-NEXT: br label %loop_latch.4 ; PROLOG: loop_latch.4: ; PROLOG-NEXT: %iv_next.4 = add i64 %iv_next.3, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br i1 %cond, label %loop_latch.5, label %exit1.loopexit +; PROLOG-NEXT: br label %loop_latch.5 ; PROLOG: loop_latch.5: ; PROLOG-NEXT: %iv_next.5 = add i64 %iv_next.4, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br i1 %cond, label %loop_latch.6, label %exit1.loopexit +; PROLOG-NEXT: br label %loop_latch.6 ; PROLOG: loop_latch.6: ; PROLOG-NEXT: %iv_next.6 = add i64 %iv_next.5, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br i1 %cond, label %loop_latch.7, label %exit1.loopexit +; PROLOG-NEXT: br label %loop_latch.7 ; PROLOG: loop_latch.7: ; PROLOG-NEXT: %iv_next.7 = add i64 %iv_next.6, 1 ; PROLOG-NEXT: %cmp.7 = icmp ne i64 %iv_next.7, %trip @@ -5822,7 +5822,7 @@ define void @test11(i64 %trip, i1 %cond) { ; PROLOG-BLOCK: loop_latch: ; PROLOG-BLOCK-NEXT: %iv_next = add i64 %iv, 1 ; PROLOG-BLOCK-NEXT: call void @bar() -; PROLOG-BLOCK-NEXT: br i1 %cond, label %loop_latch.1, label %exit1.loopexit +; PROLOG-BLOCK-NEXT: br label %loop_latch.1 ; PROLOG-BLOCK: loop_latch.1: ; PROLOG-BLOCK-NEXT: %iv_next.1 = add i64 %iv_next, 1 ; PROLOG-BLOCK-NEXT: %cmp.1 = icmp ne i64 %iv_next.1, %trip @@ -5883,7 +5883,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; EPILOG-NEXT: %cmp_early.1 = icmp ne i64 %iv_next, %trip2 ; EPILOG-NEXT: br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.1: -; EPILOG-NEXT: br i1 %cond, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br label %loop_latch.1 ; EPILOG: loop_latch.1: ; EPILOG-NEXT: %iv_next.1 = add nuw nsw i64 %iv_next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 @@ -5891,7 +5891,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; EPILOG-NEXT: %cmp_early.2 = icmp ne i64 %iv_next.1, %trip2 ; EPILOG-NEXT: br i1 %cmp_early.2, label %loop_exiting_bb2.2, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.2: -; EPILOG-NEXT: br i1 %cond, label %loop_latch.2, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br label %loop_latch.2 ; EPILOG: loop_latch.2: ; EPILOG-NEXT: %iv_next.2 = add nuw nsw i64 %iv_next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 @@ -5899,7 +5899,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; EPILOG-NEXT: %cmp_early.3 = icmp ne i64 %iv_next.2, %trip2 ; EPILOG-NEXT: br i1 %cmp_early.3, label %loop_exiting_bb2.3, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.3: -; EPILOG-NEXT: br i1 %cond, label %loop_latch.3, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br label %loop_latch.3 ; EPILOG: loop_latch.3: ; EPILOG-NEXT: %iv_next.3 = add nuw nsw i64 %iv_next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 @@ -5907,7 +5907,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; EPILOG-NEXT: %cmp_early.4 = icmp ne i64 %iv_next.3, %trip2 ; EPILOG-NEXT: br i1 %cmp_early.4, label %loop_exiting_bb2.4, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.4: -; EPILOG-NEXT: br i1 %cond, label %loop_latch.4, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br label %loop_latch.4 ; EPILOG: loop_latch.4: ; EPILOG-NEXT: %iv_next.4 = add nuw nsw i64 %iv_next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 @@ -5915,7 +5915,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; EPILOG-NEXT: %cmp_early.5 = icmp ne i64 %iv_next.4, %trip2 ; EPILOG-NEXT: br i1 %cmp_early.5, label %loop_exiting_bb2.5, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.5: -; EPILOG-NEXT: br i1 %cond, label %loop_latch.5, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br label %loop_latch.5 ; EPILOG: loop_latch.5: ; EPILOG-NEXT: %iv_next.5 = add nuw nsw i64 %iv_next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 @@ -5923,7 +5923,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; EPILOG-NEXT: %cmp_early.6 = icmp ne i64 %iv_next.5, %trip2 ; EPILOG-NEXT: br i1 %cmp_early.6, label %loop_exiting_bb2.6, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.6: -; EPILOG-NEXT: br i1 %cond, label %loop_latch.6, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br label %loop_latch.6 ; EPILOG: loop_latch.6: ; EPILOG-NEXT: %iv_next.6 = add nuw nsw i64 %iv_next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 @@ -5931,7 +5931,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; EPILOG-NEXT: %cmp_early.7 = icmp ne i64 %iv_next.6, %trip2 ; EPILOG-NEXT: br i1 %cmp_early.7, label %loop_exiting_bb2.7, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.7: -; EPILOG-NEXT: br i1 %cond, label %loop_latch.7, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br label %loop_latch.7 ; EPILOG: loop_latch.7: ; EPILOG-NEXT: %iv_next.7 = add i64 %iv_next.6, 1 ; EPILOG-NEXT: %niter.next.7 = add i64 %niter.next.6, 1 @@ -5993,7 +5993,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; EPILOG-BLOCK-NEXT: %cmp_early.1 = icmp ne i64 %iv_next, %trip2 ; EPILOG-BLOCK-NEXT: br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit ; EPILOG-BLOCK: loop_exiting_bb2.1: -; EPILOG-BLOCK-NEXT: br i1 %cond, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit +; EPILOG-BLOCK-NEXT: br label %loop_latch.1 ; EPILOG-BLOCK: loop_latch.1: ; EPILOG-BLOCK-NEXT: %iv_next.1 = add i64 %iv_next, 1 ; EPILOG-BLOCK-NEXT: %niter.next.1 = add i64 %niter.next, 1 @@ -6067,49 +6067,49 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; PROLOG-NEXT: %cmp_early.1 = icmp ne i64 %iv_next, %trip2 ; PROLOG-NEXT: br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.1: -; PROLOG-NEXT: br i1 %cond, label %loop_latch.1, label %exit1.unr-lcssa.loopexit +; PROLOG-NEXT: br label %loop_latch.1 ; PROLOG: loop_latch.1: ; PROLOG-NEXT: %iv_next.1 = add i64 %iv_next, 1 ; PROLOG-NEXT: call void @bar() ; PROLOG-NEXT: %cmp_early.2 = icmp ne i64 %iv_next.1, %trip2 ; PROLOG-NEXT: br i1 %cmp_early.2, label %loop_exiting_bb2.2, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.2: -; PROLOG-NEXT: br i1 %cond, label %loop_latch.2, label %exit1.unr-lcssa.loopexit +; PROLOG-NEXT: br label %loop_latch.2 ; PROLOG: loop_latch.2: ; PROLOG-NEXT: %iv_next.2 = add i64 %iv_next.1, 1 ; PROLOG-NEXT: call void @bar() ; PROLOG-NEXT: %cmp_early.3 = icmp ne i64 %iv_next.2, %trip2 ; PROLOG-NEXT: br i1 %cmp_early.3, label %loop_exiting_bb2.3, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.3: -; PROLOG-NEXT: br i1 %cond, label %loop_latch.3, label %exit1.unr-lcssa.loopexit +; PROLOG-NEXT: br label %loop_latch.3 ; PROLOG: loop_latch.3: ; PROLOG-NEXT: %iv_next.3 = add i64 %iv_next.2, 1 ; PROLOG-NEXT: call void @bar() ; PROLOG-NEXT: %cmp_early.4 = icmp ne i64 %iv_next.3, %trip2 ; PROLOG-NEXT: br i1 %cmp_early.4, label %loop_exiting_bb2.4, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.4: -; PROLOG-NEXT: br i1 %cond, label %loop_latch.4, label %exit1.unr-lcssa.loopexit +; PROLOG-NEXT: br label %loop_latch.4 ; PROLOG: loop_latch.4: ; PROLOG-NEXT: %iv_next.4 = add i64 %iv_next.3, 1 ; PROLOG-NEXT: call void @bar() ; PROLOG-NEXT: %cmp_early.5 = icmp ne i64 %iv_next.4, %trip2 ; PROLOG-NEXT: br i1 %cmp_early.5, label %loop_exiting_bb2.5, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.5: -; PROLOG-NEXT: br i1 %cond, label %loop_latch.5, label %exit1.unr-lcssa.loopexit +; PROLOG-NEXT: br label %loop_latch.5 ; PROLOG: loop_latch.5: ; PROLOG-NEXT: %iv_next.5 = add i64 %iv_next.4, 1 ; PROLOG-NEXT: call void @bar() ; PROLOG-NEXT: %cmp_early.6 = icmp ne i64 %iv_next.5, %trip2 ; PROLOG-NEXT: br i1 %cmp_early.6, label %loop_exiting_bb2.6, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.6: -; PROLOG-NEXT: br i1 %cond, label %loop_latch.6, label %exit1.unr-lcssa.loopexit +; PROLOG-NEXT: br label %loop_latch.6 ; PROLOG: loop_latch.6: ; PROLOG-NEXT: %iv_next.6 = add i64 %iv_next.5, 1 ; PROLOG-NEXT: call void @bar() ; PROLOG-NEXT: %cmp_early.7 = icmp ne i64 %iv_next.6, %trip2 ; PROLOG-NEXT: br i1 %cmp_early.7, label %loop_exiting_bb2.7, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.7: -; PROLOG-NEXT: br i1 %cond, label %loop_latch.7, label %exit1.unr-lcssa.loopexit +; PROLOG-NEXT: br label %loop_latch.7 ; PROLOG: loop_latch.7: ; PROLOG-NEXT: %iv_next.7 = add i64 %iv_next.6, 1 ; PROLOG-NEXT: %cmp.7 = icmp ne i64 %iv_next.7, %trip @@ -6158,7 +6158,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; PROLOG-BLOCK-NEXT: %cmp_early.1 = icmp ne i64 %iv_next, %trip2 ; PROLOG-BLOCK-NEXT: br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.unr-lcssa.loopexit ; PROLOG-BLOCK: loop_exiting_bb2.1: -; PROLOG-BLOCK-NEXT: br i1 %cond, label %loop_latch.1, label %exit1.unr-lcssa.loopexit +; PROLOG-BLOCK-NEXT: br label %loop_latch.1 ; PROLOG-BLOCK: loop_latch.1: ; PROLOG-BLOCK-NEXT: %iv_next.1 = add i64 %iv_next, 1 ; PROLOG-BLOCK-NEXT: %cmp.1 = icmp ne i64 %iv_next.1, %trip @@ -6575,7 +6575,7 @@ define void @test14(i64 %trip, i1 %cond) { ; EPILOG-NEXT: %iv_next = add nuw nsw i64 %iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br i1 %cond, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br label %loop_exiting_bb2.1 ; EPILOG: loop_exiting_bb2.1: ; EPILOG-NEXT: %unknown.1 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.1, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit @@ -6583,7 +6583,7 @@ define void @test14(i64 %trip, i1 %cond) { ; EPILOG-NEXT: %iv_next.1 = add nuw nsw i64 %iv_next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br i1 %cond, label %loop_exiting_bb2.2, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br label %loop_exiting_bb2.2 ; EPILOG: loop_exiting_bb2.2: ; EPILOG-NEXT: %unknown.2 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.2, label %loop_latch.2, label %exit1.epilog-lcssa.loopexit @@ -6591,7 +6591,7 @@ define void @test14(i64 %trip, i1 %cond) { ; EPILOG-NEXT: %iv_next.2 = add nuw nsw i64 %iv_next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br i1 %cond, label %loop_exiting_bb2.3, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br label %loop_exiting_bb2.3 ; EPILOG: loop_exiting_bb2.3: ; EPILOG-NEXT: %unknown.3 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.3, label %loop_latch.3, label %exit1.epilog-lcssa.loopexit @@ -6599,7 +6599,7 @@ define void @test14(i64 %trip, i1 %cond) { ; EPILOG-NEXT: %iv_next.3 = add nuw nsw i64 %iv_next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br i1 %cond, label %loop_exiting_bb2.4, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br label %loop_exiting_bb2.4 ; EPILOG: loop_exiting_bb2.4: ; EPILOG-NEXT: %unknown.4 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.4, label %loop_latch.4, label %exit1.epilog-lcssa.loopexit @@ -6607,7 +6607,7 @@ define void @test14(i64 %trip, i1 %cond) { ; EPILOG-NEXT: %iv_next.4 = add nuw nsw i64 %iv_next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br i1 %cond, label %loop_exiting_bb2.5, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br label %loop_exiting_bb2.5 ; EPILOG: loop_exiting_bb2.5: ; EPILOG-NEXT: %unknown.5 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.5, label %loop_latch.5, label %exit1.epilog-lcssa.loopexit @@ -6615,7 +6615,7 @@ define void @test14(i64 %trip, i1 %cond) { ; EPILOG-NEXT: %iv_next.5 = add nuw nsw i64 %iv_next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br i1 %cond, label %loop_exiting_bb2.6, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br label %loop_exiting_bb2.6 ; EPILOG: loop_exiting_bb2.6: ; EPILOG-NEXT: %unknown.6 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.6, label %loop_latch.6, label %exit1.epilog-lcssa.loopexit @@ -6623,7 +6623,7 @@ define void @test14(i64 %trip, i1 %cond) { ; EPILOG-NEXT: %iv_next.6 = add nuw nsw i64 %iv_next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br i1 %cond, label %loop_exiting_bb2.7, label %exit1.epilog-lcssa.loopexit +; EPILOG-NEXT: br label %loop_exiting_bb2.7 ; EPILOG: loop_exiting_bb2.7: ; EPILOG-NEXT: %unknown.7 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.7, label %loop_latch.7, label %exit1.epilog-lcssa.loopexit @@ -6685,7 +6685,7 @@ define void @test14(i64 %trip, i1 %cond) { ; EPILOG-BLOCK-NEXT: %iv_next = add nuw nsw i64 %iv, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 ; EPILOG-BLOCK-NEXT: call void @bar() -; EPILOG-BLOCK-NEXT: br i1 %cond, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit +; EPILOG-BLOCK-NEXT: br label %loop_exiting_bb2.1 ; EPILOG-BLOCK: loop_exiting_bb2.1: ; EPILOG-BLOCK-NEXT: %unknown.1 = call i1 @unknown_cond() ; EPILOG-BLOCK-NEXT: br i1 %unknown.1, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit @@ -6757,49 +6757,49 @@ define void @test14(i64 %trip, i1 %cond) { ; PROLOG: loop_latch: ; PROLOG-NEXT: %iv_next = add i64 %iv, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br i1 %cond, label %loop_exiting_bb2.1, label %exit1.unr-lcssa.loopexit +; PROLOG-NEXT: br label %loop_exiting_bb2.1 ; PROLOG: loop_exiting_bb2.1: ; PROLOG-NEXT: %unknown.1 = call i1 @unknown_cond() ; PROLOG-NEXT: br i1 %unknown.1, label %loop_latch.1, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.1: ; PROLOG-NEXT: %iv_next.1 = add i64 %iv_next, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br i1 %cond, label %loop_exiting_bb2.2, label %exit1.unr-lcssa.loopexit +; PROLOG-NEXT: br label %loop_exiting_bb2.2 ; PROLOG: loop_exiting_bb2.2: ; PROLOG-NEXT: %unknown.2 = call i1 @unknown_cond() ; PROLOG-NEXT: br i1 %unknown.2, label %loop_latch.2, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.2: ; PROLOG-NEXT: %iv_next.2 = add i64 %iv_next.1, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br i1 %cond, label %loop_exiting_bb2.3, label %exit1.unr-lcssa.loopexit +; PROLOG-NEXT: br label %loop_exiting_bb2.3 ; PROLOG: loop_exiting_bb2.3: ; PROLOG-NEXT: %unknown.3 = call i1 @unknown_cond() ; PROLOG-NEXT: br i1 %unknown.3, label %loop_latch.3, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.3: ; PROLOG-NEXT: %iv_next.3 = add i64 %iv_next.2, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br i1 %cond, label %loop_exiting_bb2.4, label %exit1.unr-lcssa.loopexit +; PROLOG-NEXT: br label %loop_exiting_bb2.4 ; PROLOG: loop_exiting_bb2.4: ; PROLOG-NEXT: %unknown.4 = call i1 @unknown_cond() ; PROLOG-NEXT: br i1 %unknown.4, label %loop_latch.4, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.4: ; PROLOG-NEXT: %iv_next.4 = add i64 %iv_next.3, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br i1 %cond, label %loop_exiting_bb2.5, label %exit1.unr-lcssa.loopexit +; PROLOG-NEXT: br label %loop_exiting_bb2.5 ; PROLOG: loop_exiting_bb2.5: ; PROLOG-NEXT: %unknown.5 = call i1 @unknown_cond() ; PROLOG-NEXT: br i1 %unknown.5, label %loop_latch.5, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.5: ; PROLOG-NEXT: %iv_next.5 = add i64 %iv_next.4, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br i1 %cond, label %loop_exiting_bb2.6, label %exit1.unr-lcssa.loopexit +; PROLOG-NEXT: br label %loop_exiting_bb2.6 ; PROLOG: loop_exiting_bb2.6: ; PROLOG-NEXT: %unknown.6 = call i1 @unknown_cond() ; PROLOG-NEXT: br i1 %unknown.6, label %loop_latch.6, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.6: ; PROLOG-NEXT: %iv_next.6 = add i64 %iv_next.5, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br i1 %cond, label %loop_exiting_bb2.7, label %exit1.unr-lcssa.loopexit +; PROLOG-NEXT: br label %loop_exiting_bb2.7 ; PROLOG: loop_exiting_bb2.7: ; PROLOG-NEXT: %unknown.7 = call i1 @unknown_cond() ; PROLOG-NEXT: br i1 %unknown.7, label %loop_latch.7, label %exit1.unr-lcssa.loopexit @@ -6848,7 +6848,7 @@ define void @test14(i64 %trip, i1 %cond) { ; PROLOG-BLOCK: loop_latch: ; PROLOG-BLOCK-NEXT: %iv_next = add i64 %iv, 1 ; PROLOG-BLOCK-NEXT: call void @bar() -; PROLOG-BLOCK-NEXT: br i1 %cond, label %loop_exiting_bb2.1, label %exit1.unr-lcssa.loopexit +; PROLOG-BLOCK-NEXT: br label %loop_exiting_bb2.1 ; PROLOG-BLOCK: loop_exiting_bb2.1: ; PROLOG-BLOCK-NEXT: %unknown.1 = call i1 @unknown_cond() ; PROLOG-BLOCK-NEXT: br i1 %unknown.1, label %loop_latch.1, label %exit1.unr-lcssa.loopexit diff --git a/llvm/test/Transforms/LoopUnroll/scevunroll.ll b/llvm/test/Transforms/LoopUnroll/scevunroll.ll index 70c8d29e4a82..7f395e700e10 100644 --- a/llvm/test/Transforms/LoopUnroll/scevunroll.ll +++ b/llvm/test/Transforms/LoopUnroll/scevunroll.ll @@ -213,7 +213,7 @@ define i32 @multiExitIncomplete(i32* %base) nounwind { ; CHECK: l1.5: ; CHECK-NEXT: br i1 false, label [[L2_5:%.*]], label [[EXIT1:%.*]] ; CHECK: l2.5: -; CHECK-NEXT: br i1 true, label [[L3_5:%.*]], label [[EXIT2:%.*]] +; CHECK-NEXT: br label [[L3_5:%.*]] ; CHECK: l3.5: ; CHECK-NEXT: br label [[EXIT3]] ; CHECK: exit1: @@ -316,10 +316,10 @@ define void @nsw_latch(i32* %a) nounwind { ; CHECK: for.body.1: ; CHECK-NEXT: br i1 false, label [[FOR_COND_1:%.*]], label [[RETURN]] ; CHECK: for.cond.1: -; CHECK-NEXT: br label [[RETURN]] +; CHECK-NEXT: br label [[FOR_BODY]] ; CHECK: return: -; CHECK-NEXT: [[B_03_LCSSA:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 8, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1]] ] -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1]] ] +; CHECK-NEXT: [[B_03_LCSSA:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 8, [[FOR_BODY_1]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY_1]] ] ; CHECK-NEXT: store i32 [[B_03_LCSSA]], i32* [[A:%.*]], align 4 ; CHECK-NEXT: ret void ; From 14849fe55432110cfdcfed225f6ffda919da10f4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 3 Jan 2022 10:07:37 -0800 Subject: [PATCH 436/992] [SimplifyCFG] Make use of ComputeMinSignedBits and KnownBits::getBitWidth. NFC --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 1046998c26de..02bd0bd8cc5f 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -4935,14 +4935,12 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL) { Value *Cond = SI->getCondition(); - unsigned Bits = Cond->getType()->getIntegerBitWidth(); KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI); // We can also eliminate cases by determining that their values are outside of // the limited range of the condition based on how many significant (non-sign) // bits are in the condition value. - unsigned ExtraSignBits = ComputeNumSignBits(Cond, DL, 0, AC, SI) - 1; - unsigned MaxSignificantBitsInCond = Bits - ExtraSignBits; + unsigned MaxSignificantBitsInCond = ComputeMinSignedBits(Cond, DL, 0, AC, SI); // Gather dead cases. SmallVector DeadCases; @@ -4973,8 +4971,8 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, bool HasDefault = !isa(SI->getDefaultDest()->getFirstNonPHIOrDbg()); const unsigned NumUnknownBits = - Bits - (Known.Zero | Known.One).countPopulation(); - assert(NumUnknownBits <= Bits); + Known.getBitWidth() - (Known.Zero | Known.One).countPopulation(); + assert(NumUnknownBits <= Known.getBitWidth()); if (HasDefault && DeadCases.empty() && NumUnknownBits < 64 /* avoid overflow */ && SI->getNumCases() == (1ULL << NumUnknownBits)) { From 361216f3c4812e4359b576c8912ccf828c4d02f0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 3 Jan 2022 10:09:50 -0800 Subject: [PATCH 437/992] [AMDGPU] Use ComputeMinSignedBits and KnownBits::countMaxActiveBits to simplify some code. NFC Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D116516 --- .../Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 29 ++++++++----------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index a55729586b8d..a5923c82bfef 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -150,13 +150,13 @@ class AMDGPUCodeGenPrepare : public FunctionPass, /// \returns The minimum number of bits needed to store the value of \Op as an /// unsigned integer. Truncating to this size and then zero-extending to - /// ScalarSize will not change the value. - unsigned numBitsUnsigned(Value *Op, unsigned ScalarSize) const; + /// the original will not change the value. + unsigned numBitsUnsigned(Value *Op) const; /// \returns The minimum number of bits needed to store the value of \Op as a /// signed integer. Truncating to this size and then sign-extending to - /// ScalarSize will not change the value. - unsigned numBitsSigned(Value *Op, unsigned ScalarSize) const; + /// the original size will not change the value. + unsigned numBitsSigned(Value *Op) const; /// Replace mul instructions with llvm.amdgcn.mul.u24 or llvm.amdgcn.mul.s24. /// SelectionDAG has an issue where an and asserting the bits are known @@ -445,17 +445,12 @@ bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32( return true; } -unsigned AMDGPUCodeGenPrepare::numBitsUnsigned(Value *Op, - unsigned ScalarSize) const { - KnownBits Known = computeKnownBits(Op, *DL, 0, AC); - return ScalarSize - Known.countMinLeadingZeros(); +unsigned AMDGPUCodeGenPrepare::numBitsUnsigned(Value *Op) const { + return computeKnownBits(Op, *DL, 0, AC).countMaxActiveBits(); } -unsigned AMDGPUCodeGenPrepare::numBitsSigned(Value *Op, - unsigned ScalarSize) const { - // In order for this to be a signed 24-bit value, bit 23, must - // be a sign bit. - return ScalarSize - ComputeNumSignBits(Op, *DL, 0, AC) + 1; +unsigned AMDGPUCodeGenPrepare::numBitsSigned(Value *Op) const { + return ComputeMinSignedBits(Op, *DL, 0, AC); } static void extractValues(IRBuilder<> &Builder, @@ -532,12 +527,12 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const { unsigned LHSBits = 0, RHSBits = 0; bool IsSigned = false; - if (ST->hasMulU24() && (LHSBits = numBitsUnsigned(LHS, Size)) <= 24 && - (RHSBits = numBitsUnsigned(RHS, Size)) <= 24) { + if (ST->hasMulU24() && (LHSBits = numBitsUnsigned(LHS)) <= 24 && + (RHSBits = numBitsUnsigned(RHS)) <= 24) { IsSigned = false; - } else if (ST->hasMulI24() && (LHSBits = numBitsSigned(LHS, Size)) <= 24 && - (RHSBits = numBitsSigned(RHS, Size)) <= 24) { + } else if (ST->hasMulI24() && (LHSBits = numBitsSigned(LHS)) <= 24 && + (RHSBits = numBitsSigned(RHS)) <= 24) { IsSigned = true; } else From a44ef999fb5e9243df9c4459df1b17b66aa3fd23 Mon Sep 17 00:00:00 2001 From: Chris Bieneman Date: Mon, 3 Jan 2022 11:52:44 -0600 Subject: [PATCH 438/992] [NFC][CMake] Add FOLDER to utility targets As Visual Studio's CMake support is getting better and better the line between IDE generator and non-IDE generators is blurring. Visual Studio 2019 and later have a very useful UI that can handle all of the various targets we create, but if they are unsorted it is wildly unwieldy. This change sorts the lit testsuite targets and per-component install targets into folders, which are not generated for IDE generators but are generated by default under Visual Studio's CMake + Ninja integration. --- llvm/cmake/modules/AddLLVM.cmake | 9 ++++++++- llvm/test/CMakeLists.txt | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 327b8e0ba2e7..4d19d9edecf4 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -731,6 +731,7 @@ function(add_llvm_install_targets target) ${prefix_option} -P "${CMAKE_BINARY_DIR}/cmake_install.cmake" USES_TERMINAL) + set_target_properties(${target} PROPERTIES FOLDER "Component Install Targets") add_custom_target(${target}-stripped DEPENDS ${file_dependencies} COMMAND "${CMAKE_COMMAND}" @@ -739,6 +740,7 @@ function(add_llvm_install_targets target) -DCMAKE_INSTALL_DO_STRIP=1 -P "${CMAKE_BINARY_DIR}/cmake_install.cmake" USES_TERMINAL) + set_target_properties(${target}-stripped PROPERTIES FOLDER "Component Install Targets (Stripped)") if(target_dependencies) add_dependencies(${target} ${target_dependencies}) add_dependencies(${target}-stripped ${target_dependencies}) @@ -1844,7 +1846,11 @@ endfunction() function(add_lit_testsuites project directory) if (NOT LLVM_ENABLE_IDE) - cmake_parse_arguments(ARG "EXCLUDE_FROM_CHECK_ALL" "" "PARAMS;DEPENDS;ARGS" ${ARGN}) + cmake_parse_arguments(ARG "EXCLUDE_FROM_CHECK_ALL" "FOLDER" "PARAMS;DEPENDS;ARGS" ${ARGN}) + + if (NOT ARG_FOLDER) + set(ARG_FOLDER "Test Subdirectories") + endif() # Search recursively for test directories by assuming anything not # in a directory called Inputs contains tests. @@ -1872,6 +1878,7 @@ function(add_lit_testsuites project directory) DEPENDS ${ARG_DEPENDS} ARGS ${ARG_ARGS} ) + set_target_properties(check-${name_var} PROPERTIES FOLDER ${ARG_FOLDER}) endif() endforeach() endif() diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt index e588fd2f6159..6dbede411813 100644 --- a/llvm/test/CMakeLists.txt +++ b/llvm/test/CMakeLists.txt @@ -228,6 +228,7 @@ set_target_properties(check-llvm PROPERTIES FOLDER "Tests") add_lit_testsuites(LLVM ${CMAKE_CURRENT_SOURCE_DIR} ${exclude_from_check_all} DEPENDS ${LLVM_TEST_DEPENDS} + FOLDER "Tests/Subdirectories" ) # Setup an alias for 'check-all'. From a3ab2c94a2715c45e39abdb36b4d78e028b051d2 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sun, 2 Jan 2022 06:29:26 +0000 Subject: [PATCH 439/992] [clang][cmake] Rearrange top-level CMakeLists.txt for D116492 In that revision, I make LLD match Clang in deprecating `llvm-config`. This patch isn't to worthwhile on its own --- there isn't a sense in which the new order is "better" in isolation --- but by putting the steps that LLD also neeeds to do first, I make the diff between LLD and Clang's top-level `CMakeLists.txt` very legible. Longer term I hope: 1. We can remove calling `llvm-config` altogether, and just go strait to finding the CMake config file. This is what Flang does, at least. 2. Hopefully the diffable part is smaller then --- i.e. there is less duplicated boilerplate. 3. Any duplicate boilerplate that remains can be factored out. I didn't both trying to factor anything out in e.g. the top level common CMake Utility modules because this deprecated-but-not-removed state is a merely transitional. Reviewed By: beanz Differential Revision: https://reviews.llvm.org/D116548 --- clang/CMakeLists.txt | 46 +++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 69d639fcec1b..aa21d0588243 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.13.4) # If we are not building as a part of LLVM, build Clang as an # standalone project, using LLVM as an external library: -if( CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) +if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) project(Clang) set(CMAKE_CXX_STANDARD 14 CACHE STRING "C++ standard to conform to") @@ -10,7 +10,7 @@ if( CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) set(CMAKE_CXX_EXTENSIONS NO) # Rely on llvm-config. - set(CONFIG_OUTPUT) + set(LLVM_CONFIG_OUTPUT) if(LLVM_CONFIG) set (LLVM_CONFIG_FOUND 1) message(STATUS "Found LLVM_CONFIG as ${LLVM_CONFIG}") @@ -20,35 +20,36 @@ if( CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) automatically, but you can also use LLVM_DIR to specify \ the path containing LLVMConfig.cmake.") set(CONFIG_COMMAND ${LLVM_CONFIG} - "--assertion-mode" - "--bindir" - "--libdir" "--includedir" "--prefix" "--src-root" - "--cmakedir") + "--cmakedir" + "--bindir" + "--libdir" + "--assertion-mode" + ) execute_process( COMMAND ${CONFIG_COMMAND} RESULT_VARIABLE HAD_ERROR - OUTPUT_VARIABLE CONFIG_OUTPUT + OUTPUT_VARIABLE LLVM_CONFIG_OUTPUT ) if(NOT HAD_ERROR) string(REGEX REPLACE "[ \t]*[\r\n]+[ \t]*" ";" - CONFIG_OUTPUT ${CONFIG_OUTPUT}) + LLVM_CONFIG_OUTPUT ${LLVM_CONFIG_OUTPUT}) else() string(REPLACE ";" " " CONFIG_COMMAND_STR "${CONFIG_COMMAND}") message(STATUS "${CONFIG_COMMAND_STR}") message(FATAL_ERROR "llvm-config failed with status ${HAD_ERROR}") endif() - list(GET CONFIG_OUTPUT 0 ENABLE_ASSERTIONS) - list(GET CONFIG_OUTPUT 1 TOOLS_BINARY_DIR) - list(GET CONFIG_OUTPUT 2 LIBRARY_DIR) - list(GET CONFIG_OUTPUT 3 INCLUDE_DIR) - list(GET CONFIG_OUTPUT 4 LLVM_OBJ_ROOT) - list(GET CONFIG_OUTPUT 5 MAIN_SRC_DIR) - list(GET CONFIG_OUTPUT 6 LLVM_CONFIG_CMAKE_DIR) + list(GET LLVM_CONFIG_OUTPUT 0 MAIN_INCLUDE_DIR) + list(GET LLVM_CONFIG_OUTPUT 1 LLVM_OBJ_ROOT) + list(GET LLVM_CONFIG_OUTPUT 2 MAIN_SRC_DIR) + list(GET LLVM_CONFIG_OUTPUT 3 LLVM_CONFIG_CMAKE_DIR) + list(GET LLVM_CONFIG_OUTPUT 4 TOOLS_BINARY_DIR) + list(GET LLVM_CONFIG_OUTPUT 5 LIBRARY_DIR) + list(GET LLVM_CONFIG_OUTPUT 6 ENABLE_ASSERTIONS) # Normalize LLVM_CMAKE_DIR. --cmakedir might contain backslashes. # CMake assumes slashes as PATH. @@ -71,17 +72,17 @@ if( CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) if (NOT LLVM_CONFIG_FOUND) # Pull values from LLVMConfig.cmake. We can drop this once the llvm-config # path is removed. + set(MAIN_INCLUDE_DIR ${LLVM_INCLUDE_DIR}) + set(LLVM_OBJ_DIR ${LLVM_BINARY_DIR}) set(TOOLS_BINARY_DIR ${LLVM_TOOLS_BINARY_DIR}) set(LIBRARY_DIR ${LLVM_LIBRARY_DIR}) - set(INCLUDE_DIR ${LLVM_INCLUDE_DIR}) - set(LLVM_OBJ_DIR ${LLVM_BINARY_DIR}) endif() - set(LLVM_TOOLS_BINARY_DIR ${TOOLS_BINARY_DIR} CACHE PATH "Path to llvm/bin") - set(LLVM_LIBRARY_DIR ${LIBRARY_DIR} CACHE PATH "Path to llvm/lib") - set(LLVM_MAIN_INCLUDE_DIR ${INCLUDE_DIR} CACHE PATH "Path to llvm/include") + set(LLVM_MAIN_INCLUDE_DIR ${MAIN_INCLUDE_DIR} CACHE PATH "Path to llvm/include") set(LLVM_BINARY_DIR ${LLVM_OBJ_ROOT} CACHE PATH "Path to LLVM build tree") set(LLVM_MAIN_SRC_DIR ${MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree") + set(LLVM_TOOLS_BINARY_DIR ${TOOLS_BINARY_DIR} CACHE PATH "Path to llvm/bin") + set(LLVM_LIBRARY_DIR ${LIBRARY_DIR} CACHE PATH "Path to llvm/lib") find_program(LLVM_TABLEGEN_EXE "llvm-tblgen" ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) @@ -184,11 +185,12 @@ if( CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) endif() endif() - set( CLANG_BUILT_STANDALONE 1 ) + set(CLANG_BUILT_STANDALONE TRUE) + set(BACKEND_PACKAGE_STRING "LLVM ${LLVM_PACKAGE_VERSION}") else() set(BACKEND_PACKAGE_STRING "${PACKAGE_STRING}") -endif() +endif() # standalone # Make sure that our source directory is on the current cmake module path so that # we can include cmake files from this directory. From e32936aef4a2e7da471e84b72d3be3499adf0a21 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Mon, 3 Jan 2022 13:22:26 -0500 Subject: [PATCH 440/992] [MSVC] Silence -Wnon-virtual-dtor on DIA APIs Differential Revision: https://reviews.llvm.org/D116313 --- clang/lib/Driver/ToolChains/MSVCSetupApi.h | 9 +++++++++ llvm/lib/DebugInfo/PDB/CMakeLists.txt | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/MSVCSetupApi.h b/clang/lib/Driver/ToolChains/MSVCSetupApi.h index a890b85fd5e9..28e6e3e08e37 100644 --- a/clang/lib/Driver/ToolChains/MSVCSetupApi.h +++ b/clang/lib/Driver/ToolChains/MSVCSetupApi.h @@ -28,6 +28,11 @@ #pragma once +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnon-virtual-dtor" +#endif + // Constants // #ifndef E_NOTFOUND @@ -512,3 +517,7 @@ STDMETHODIMP GetSetupConfiguration(_Out_ ISetupConfiguration **ppConfiguration, #ifdef __cplusplus } #endif + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif diff --git a/llvm/lib/DebugInfo/PDB/CMakeLists.txt b/llvm/lib/DebugInfo/PDB/CMakeLists.txt index 090711c83404..851c5c3dc03c 100644 --- a/llvm/lib/DebugInfo/PDB/CMakeLists.txt +++ b/llvm/lib/DebugInfo/PDB/CMakeLists.txt @@ -4,7 +4,7 @@ macro(add_pdb_impl_folder group) endmacro() if(LLVM_ENABLE_DIA_SDK) - include_directories(${MSVC_DIA_SDK_DIR}/include) + include_directories(SYSTEM ${MSVC_DIA_SDK_DIR}/include) set(LIBPDB_LINK_FOLDERS "${MSVC_DIA_SDK_DIR}\\lib") if ("$ENV{VSCMD_ARG_TGT_ARCH}" STREQUAL "arm64") From 364eb371012b2641b07194ae67a4f685f1da65e4 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Sun, 2 Jan 2022 14:19:12 -0500 Subject: [PATCH 441/992] [CodeView] Remove unnecessary property that was commited by accident. --- llvm/include/llvm/MC/MCTargetOptions.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/MC/MCTargetOptions.h b/llvm/include/llvm/MC/MCTargetOptions.h index 3510eeca8953..db50dc6749e2 100644 --- a/llvm/include/llvm/MC/MCTargetOptions.h +++ b/llvm/include/llvm/MC/MCTargetOptions.h @@ -62,7 +62,6 @@ class MCTargetOptions { std::string ABIName; std::string AssemblyLanguage; std::string SplitDwarfFile; - std::string COFFOutputFilename; const char *Argv0 = nullptr; ArrayRef CommandLineArgs; From 7972b2e42276346e85bb6d4fb7e03bbd5a9af53f Mon Sep 17 00:00:00 2001 From: Michael Zimmermann Date: Mon, 3 Jan 2022 12:16:11 +0100 Subject: [PATCH 442/992] [clang-format] respect AfterEnum for enums There is some similar looking code in `TokenAnnotator.cpp` but given that I've never worked on clang-format before I don't know what the purpose of that code is and how it's related to `UnwrappedLineParser.cpp`. Either way, it fixes clang-format with `BraceWrapping.AfterEnum=true` and `AllowShortEnumsOnASingleLine=false` to behave like the documentation says. Before this patch: ``` enum { A, B } myEnum; ``` After this patch: ``` enum { A, B } myEnum; ``` According to the unittests which I had to modify this would change the LLVM style. Please evaluate if you want to change the defaults or if you consider the current style a bug. Reviewed By: curdeius, HazardyKnusperkeks Differential Revision: https://reviews.llvm.org/D106349 --- clang/lib/Format/UnwrappedLineParser.cpp | 6 ++++-- clang/unittests/Format/FormatTest.cpp | 7 +++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index b6e55aab708f..0579acf36391 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -782,6 +782,8 @@ static bool ShouldBreakBeforeBrace(const FormatStyle &Style, return Style.BraceWrapping.AfterUnion; if (InitialToken.is(tok::kw_struct)) return Style.BraceWrapping.AfterStruct; + if (InitialToken.is(tok::kw_enum)) + return Style.BraceWrapping.AfterEnum; return false; } @@ -2606,12 +2608,12 @@ void UnwrappedLineParser::parseRequires() { } bool UnwrappedLineParser::parseEnum() { + const FormatToken &InitialToken = *FormatTok; + // Won't be 'enum' for NS_ENUMs. if (FormatTok->Tok.is(tok::kw_enum)) nextToken(); - const FormatToken &InitialToken = *FormatTok; - // In TypeScript, "enum" can also be used as property name, e.g. in interface // declarations. An "enum" keyword followed by a colon would be a syntax // error and thus assume it is just an identifier. diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 7a7976c8b081..470b0c7a19e5 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -2535,6 +2535,13 @@ TEST_F(FormatTest, ShortEnums) { " C\n" "} ShortEnum1, ShortEnum2;", Style); + verifyFormat("typedef enum\n" + "{\n" + " A,\n" + " B,\n" + " C\n" + "} ShortEnum1, ShortEnum2;", + Style); } TEST_F(FormatTest, ShortCaseLabels) { From 95cf30401cecf63897d203c6ee6d54a988aaff67 Mon Sep 17 00:00:00 2001 From: Erik Desjardins Date: Mon, 3 Jan 2022 10:58:26 -0800 Subject: [PATCH 443/992] [X86] autogen segmented stacks tests (NFC) Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D116420 --- .../CodeGen/X86/segmented-stacks-dynamic.ll | 212 +- .../X86/segmented-stacks-standalone.ll | 4 + llvm/test/CodeGen/X86/segmented-stacks.ll | 2632 +++++++++++++---- 3 files changed, 2186 insertions(+), 662 deletions(-) diff --git a/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll b/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll index 3af7902c7874..4f9c2d77d171 100644 --- a/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll +++ b/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X86 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s -check-prefix=X64 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux-gnux32 -verify-machineinstrs | FileCheck %s -check-prefix=X32ABI @@ -9,6 +10,174 @@ declare void @dummy_use(i32*, i32) define i32 @test_basic(i32 %l) #0 { +; X86-LABEL: test_basic: +; X86: # %bb.0: +; X86-NEXT: cmpl %gs:48, %esp +; X86-NEXT: jbe .LBB0_1 +; X86-NEXT: .LBB0_2: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_offset %esi, -12 +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: leal 15(,%esi,4), %ecx +; X86-NEXT: andl $-16, %ecx +; X86-NEXT: movl %esp, %eax +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: cmpl %eax, %gs:48 +; X86-NEXT: jg .LBB0_4 +; X86-NEXT: # %bb.3: +; X86-NEXT: movl %eax, %esp +; X86-NEXT: jmp .LBB0_5 +; X86-NEXT: .LBB0_4: +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl %ecx +; X86-NEXT: calll __morestack_allocate_stack_space +; X86-NEXT: addl $16, %esp +; X86-NEXT: .LBB0_5: +; X86-NEXT: subl $8, %esp +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %eax +; X86-NEXT: calll dummy_use@PLT +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %esi, %esi +; X86-NEXT: je .LBB0_6 +; X86-NEXT: # %bb.8: # %false +; X86-NEXT: addl $-1, %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl %esi +; X86-NEXT: calll test_basic@PLT +; X86-NEXT: jmp .LBB0_7 +; X86-NEXT: .LBB0_6: # %true +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: .LBB0_7: # %true +; X86-NEXT: leal -4(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl +; X86-NEXT: .LBB0_1: +; X86-NEXT: .cfi_restore %ebp +; X86-NEXT: .cfi_restore %esi +; X86-NEXT: pushl $4 +; X86-NEXT: pushl $12 +; X86-NEXT: calll __morestack +; X86-NEXT: retl +; X86-NEXT: jmp .LBB0_2 +; +; X64-LABEL: test_basic: +; X64: # %bb.0: +; X64-NEXT: cmpq %fs:112, %rsp +; X64-NEXT: jbe .LBB0_1 +; X64-NEXT: .LBB0_2: +; X64-NEXT: pushq %rbp +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: .cfi_offset %rbp, -16 +; X64-NEXT: movq %rsp, %rbp +; X64-NEXT: .cfi_def_cfa_register %rbp +; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_offset %rbx, -24 +; X64-NEXT: movl %edi, %ebx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: leaq 15(,%rax,4), %rax +; X64-NEXT: andq $-16, %rax +; X64-NEXT: movq %rsp, %rdi +; X64-NEXT: subq %rax, %rdi +; X64-NEXT: cmpq %rdi, %fs:112 +; X64-NEXT: jg .LBB0_4 +; X64-NEXT: # %bb.3: +; X64-NEXT: movq %rdi, %rsp +; X64-NEXT: jmp .LBB0_5 +; X64-NEXT: .LBB0_4: +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: callq __morestack_allocate_stack_space +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: .LBB0_5: +; X64-NEXT: movl %ebx, %esi +; X64-NEXT: callq dummy_use@PLT +; X64-NEXT: testl %ebx, %ebx +; X64-NEXT: je .LBB0_6 +; X64-NEXT: # %bb.8: # %false +; X64-NEXT: addl $-1, %ebx +; X64-NEXT: movl %ebx, %edi +; X64-NEXT: callq test_basic@PLT +; X64-NEXT: jmp .LBB0_7 +; X64-NEXT: .LBB0_6: # %true +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: .LBB0_7: # %true +; X64-NEXT: leaq -8(%rbp), %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: popq %rbp +; X64-NEXT: .cfi_def_cfa %rsp, 8 +; X64-NEXT: retq +; X64-NEXT: .LBB0_1: +; X64-NEXT: .cfi_restore %rbx +; X64-NEXT: .cfi_restore %rbp +; X64-NEXT: movabsq $24, %r10 +; X64-NEXT: movabsq $0, %r11 +; X64-NEXT: callq __morestack +; X64-NEXT: retq +; X64-NEXT: jmp .LBB0_2 +; +; X32ABI-LABEL: test_basic: +; X32ABI: # %bb.0: +; X32ABI-NEXT: cmpl %fs:64, %esp +; X32ABI-NEXT: jbe .LBB0_1 +; X32ABI-NEXT: .LBB0_2: +; X32ABI-NEXT: pushq %rbp +; X32ABI-NEXT: .cfi_def_cfa_offset 16 +; X32ABI-NEXT: .cfi_offset %rbp, -16 +; X32ABI-NEXT: movl %esp, %ebp +; X32ABI-NEXT: .cfi_def_cfa_register %rbp +; X32ABI-NEXT: pushq %rbx +; X32ABI-NEXT: pushq %rax +; X32ABI-NEXT: .cfi_offset %rbx, -24 +; X32ABI-NEXT: movl %edi, %ebx +; X32ABI-NEXT: leal 15(,%rbx,4), %eax +; X32ABI-NEXT: andl $-16, %eax +; X32ABI-NEXT: movl %esp, %edi +; X32ABI-NEXT: subl %eax, %edi +; X32ABI-NEXT: cmpl %edi, %fs:64 +; X32ABI-NEXT: jg .LBB0_4 +; X32ABI-NEXT: # %bb.3: +; X32ABI-NEXT: movl %edi, %esp +; X32ABI-NEXT: jmp .LBB0_5 +; X32ABI-NEXT: .LBB0_4: +; X32ABI-NEXT: movl %eax, %edi +; X32ABI-NEXT: callq __morestack_allocate_stack_space +; X32ABI-NEXT: movl %eax, %edi +; X32ABI-NEXT: .LBB0_5: +; X32ABI-NEXT: movl %ebx, %esi +; X32ABI-NEXT: callq dummy_use@PLT +; X32ABI-NEXT: testl %ebx, %ebx +; X32ABI-NEXT: je .LBB0_6 +; X32ABI-NEXT: # %bb.8: # %false +; X32ABI-NEXT: addl $-1, %ebx +; X32ABI-NEXT: movl %ebx, %edi +; X32ABI-NEXT: callq test_basic@PLT +; X32ABI-NEXT: jmp .LBB0_7 +; X32ABI-NEXT: .LBB0_6: # %true +; X32ABI-NEXT: xorl %eax, %eax +; X32ABI-NEXT: .LBB0_7: # %true +; X32ABI-NEXT: leal -8(%ebp), %esp +; X32ABI-NEXT: popq %rbx +; X32ABI-NEXT: popq %rbp +; X32ABI-NEXT: .cfi_def_cfa %rsp, 8 +; X32ABI-NEXT: retq +; X32ABI-NEXT: .LBB0_1: +; X32ABI-NEXT: .cfi_def_cfa_register 4294967294 +; X32ABI-NEXT: .cfi_restore %rbx +; X32ABI-NEXT: .cfi_restore %rbp +; X32ABI-NEXT: movl $24, %r10d +; X32ABI-NEXT: movl $0, %r11d +; X32ABI-NEXT: callq __morestack +; X32ABI-NEXT: retq +; X32ABI-NEXT: jmp .LBB0_2 %mem = alloca i32, i32 %l call void @dummy_use (i32* %mem, i32 %l) %terminate = icmp eq i32 %l, 0 @@ -22,66 +191,23 @@ false: %retvalue = call i32 @test_basic(i32 %newlen) ret i32 %retvalue -; X86-LABEL: test_basic: -; X86: cmpl %gs:48, %esp -; X86-NEXT: jbe .LBB0_1 -; X86: movl %esp, %eax -; X86: subl %ecx, %eax -; X86-NEXT: cmpl %eax, %gs:48 -; X86: movl %eax, %esp -; X86: subl $12, %esp -; X86-NEXT: pushl %ecx -; X86-NEXT: calll __morestack_allocate_stack_space -; X86-NEXT: addl $16, %esp -; X86: pushl $4 -; X86-NEXT: pushl $12 -; X86-NEXT: calll __morestack -; X86-NEXT: ret -; X64-LABEL: test_basic: -; X64: cmpq %fs:112, %rsp -; X64-NEXT: jbe .LBB0_1 -; X64: movq %rsp, %[[RDI:rdi|rax]] -; X64: subq %{{.*}}, %[[RDI]] -; X64-NEXT: cmpq %[[RDI]], %fs:112 -; X64: movq %[[RDI]], %rsp -; X64: movq %{{.*}}, %rdi -; X64-NEXT: callq __morestack_allocate_stack_space -; X64: movq %rax, %rdi -; X64: movabsq $24, %r10 -; X64-NEXT: movabsq $0, %r11 -; X64-NEXT: callq __morestack -; X64-NEXT: ret -; X32ABI-LABEL: test_basic: -; X32ABI: cmpl %fs:64, %esp -; X32ABI-NEXT: jbe .LBB0_1 -; X32ABI: movl %esp, %[[EDI:edi|eax]] -; X32ABI: subl %{{.*}}, %[[EDI]] -; X32ABI-NEXT: cmpl %[[EDI]], %fs:64 -; X32ABI: movl %[[EDI]], %esp -; X32ABI: movl %{{.*}}, %edi -; X32ABI-NEXT: callq __morestack_allocate_stack_space -; X32ABI: movl %eax, %edi -; X32ABI: movl $24, %r10d -; X32ABI-NEXT: movl $0, %r11d -; X32ABI-NEXT: callq __morestack -; X32ABI-NEXT: ret } diff --git a/llvm/test/CodeGen/X86/segmented-stacks-standalone.ll b/llvm/test/CodeGen/X86/segmented-stacks-standalone.ll index 3c9598ecb534..cbd01229eaa4 100644 --- a/llvm/test/CodeGen/X86/segmented-stacks-standalone.ll +++ b/llvm/test/CodeGen/X86/segmented-stacks-standalone.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s @@ -9,6 +10,9 @@ ; of any other contents of the compilation unit. define void @test_nostack() #0 { +; CHECK-LABEL: test_nostack: +; CHECK: # %bb.0: +; CHECK-NEXT: ret{{[l|q]}} ret void } diff --git a/llvm/test/CodeGen/X86/segmented-stacks.ll b/llvm/test/CodeGen/X86/segmented-stacks.ll index d17582131333..627825c78e27 100644 --- a/llvm/test/CodeGen/X86/segmented-stacks.ll +++ b/llvm/test/CodeGen/X86/segmented-stacks.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X86-Linux ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -code-model=large -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux-Large @@ -34,611 +35,1678 @@ declare void @dummy_use(i32*, i32) define void @test_basic() #0 { +; X86-Linux-LABEL: test_basic: +; X86-Linux: # %bb.0: +; X86-Linux-NEXT: cmpl %gs:48, %esp +; X86-Linux-NEXT: jbe .LBB0_1 +; X86-Linux-NEXT: .LBB0_2: +; X86-Linux-NEXT: subl $52, %esp +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 52 +; X86-Linux-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-Linux-NEXT: pushl $10 +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 4 +; X86-Linux-NEXT: pushl %eax +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 4 +; X86-Linux-NEXT: calll dummy_use@PLT +; X86-Linux-NEXT: addl $60, %esp +; X86-Linux-NEXT: .cfi_adjust_cfa_offset -60 +; X86-Linux-NEXT: retl +; X86-Linux-NEXT: .LBB0_1: +; X86-Linux-NEXT: pushl $0 +; X86-Linux-NEXT: pushl $44 +; X86-Linux-NEXT: calll __morestack +; X86-Linux-NEXT: retl +; X86-Linux-NEXT: jmp .LBB0_2 +; +; X64-Linux-LABEL: test_basic: +; X64-Linux: # %bb.0: +; X64-Linux-NEXT: cmpq %fs:112, %rsp +; X64-Linux-NEXT: jbe .LBB0_1 +; X64-Linux-NEXT: .LBB0_2: +; X64-Linux-NEXT: subq $40, %rsp +; X64-Linux-NEXT: .cfi_def_cfa_offset 48 +; X64-Linux-NEXT: movq %rsp, %rdi +; X64-Linux-NEXT: movl $10, %esi +; X64-Linux-NEXT: callq dummy_use@PLT +; X64-Linux-NEXT: addq $40, %rsp +; X64-Linux-NEXT: .cfi_def_cfa_offset 8 +; X64-Linux-NEXT: retq +; X64-Linux-NEXT: .LBB0_1: +; X64-Linux-NEXT: movabsq $40, %r10 +; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: callq __morestack +; X64-Linux-NEXT: retq +; X64-Linux-NEXT: jmp .LBB0_2 +; +; X64-Linux-Large-LABEL: test_basic: +; X64-Linux-Large: # %bb.0: +; X64-Linux-Large-NEXT: cmpq %fs:112, %rsp +; X64-Linux-Large-NEXT: jbe .LBB0_1 +; X64-Linux-Large-NEXT: .LBB0_2: +; X64-Linux-Large-NEXT: subq $40, %rsp +; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 48 +; X64-Linux-Large-NEXT: movabsq $dummy_use, %rax +; X64-Linux-Large-NEXT: movq %rsp, %rdi +; X64-Linux-Large-NEXT: movl $10, %esi +; X64-Linux-Large-NEXT: callq *%rax +; X64-Linux-Large-NEXT: addq $40, %rsp +; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 8 +; X64-Linux-Large-NEXT: retq +; X64-Linux-Large-NEXT: .LBB0_1: +; X64-Linux-Large-NEXT: movabsq $40, %r10 +; X64-Linux-Large-NEXT: movabsq $0, %r11 +; X64-Linux-Large-NEXT: callq *__morestack_addr(%rip) +; X64-Linux-Large-NEXT: retq +; X64-Linux-Large-NEXT: jmp .LBB0_2 +; +; X32ABI-LABEL: test_basic: +; X32ABI: # %bb.0: +; X32ABI-NEXT: cmpl %fs:64, %esp +; X32ABI-NEXT: jbe .LBB0_1 +; X32ABI-NEXT: .LBB0_2: +; X32ABI-NEXT: subl $40, %esp +; X32ABI-NEXT: .cfi_def_cfa_offset 48 +; X32ABI-NEXT: movl %esp, %edi +; X32ABI-NEXT: movl $10, %esi +; X32ABI-NEXT: callq dummy_use@PLT +; X32ABI-NEXT: addl $40, %esp +; X32ABI-NEXT: .cfi_def_cfa_offset 8 +; X32ABI-NEXT: retq +; X32ABI-NEXT: .LBB0_1: +; X32ABI-NEXT: movl $40, %r10d +; X32ABI-NEXT: movl $0, %r11d +; X32ABI-NEXT: callq __morestack +; X32ABI-NEXT: retq +; X32ABI-NEXT: jmp .LBB0_2 +; +; X86-Darwin-LABEL: test_basic: +; X86-Darwin: ## %bb.0: +; X86-Darwin-NEXT: movl $432, %ecx ## imm = 0x1B0 +; X86-Darwin-NEXT: cmpl %gs:(%ecx), %esp +; X86-Darwin-NEXT: jbe LBB0_1 +; X86-Darwin-NEXT: LBB0_2: +; X86-Darwin-NEXT: subl $60, %esp +; X86-Darwin-NEXT: .cfi_def_cfa_offset 64 +; X86-Darwin-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-Darwin-NEXT: movl %eax, (%esp) +; X86-Darwin-NEXT: movl $10, {{[0-9]+}}(%esp) +; X86-Darwin-NEXT: calll _dummy_use +; X86-Darwin-NEXT: addl $60, %esp +; X86-Darwin-NEXT: retl +; X86-Darwin-NEXT: LBB0_1: +; X86-Darwin-NEXT: pushl $0 +; X86-Darwin-NEXT: pushl $60 +; X86-Darwin-NEXT: calll ___morestack +; X86-Darwin-NEXT: retl +; X86-Darwin-NEXT: jmp LBB0_2 +; +; X64-Darwin-LABEL: test_basic: +; X64-Darwin: ## %bb.0: +; X64-Darwin-NEXT: cmpq %gs:816, %rsp +; X64-Darwin-NEXT: jbe LBB0_1 +; X64-Darwin-NEXT: LBB0_2: +; X64-Darwin-NEXT: subq $40, %rsp +; X64-Darwin-NEXT: .cfi_def_cfa_offset 48 +; X64-Darwin-NEXT: movq %rsp, %rdi +; X64-Darwin-NEXT: movl $10, %esi +; X64-Darwin-NEXT: callq _dummy_use +; X64-Darwin-NEXT: addq $40, %rsp +; X64-Darwin-NEXT: retq +; X64-Darwin-NEXT: LBB0_1: +; X64-Darwin-NEXT: movabsq $40, %r10 +; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: callq ___morestack +; X64-Darwin-NEXT: retq +; X64-Darwin-NEXT: jmp LBB0_2 +; +; X86-MinGW-LABEL: test_basic: +; X86-MinGW: # %bb.0: +; X86-MinGW-NEXT: cmpl %fs:20, %esp +; X86-MinGW-NEXT: jbe LBB0_1 +; X86-MinGW-NEXT: LBB0_2: +; X86-MinGW-NEXT: subl $40, %esp +; X86-MinGW-NEXT: .cfi_def_cfa_offset 44 +; X86-MinGW-NEXT: movl %esp, %eax +; X86-MinGW-NEXT: pushl $10 +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset 4 +; X86-MinGW-NEXT: pushl %eax +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset 4 +; X86-MinGW-NEXT: calll _dummy_use +; X86-MinGW-NEXT: addl $48, %esp +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset -48 +; X86-MinGW-NEXT: retl +; X86-MinGW-NEXT: LBB0_1: +; X86-MinGW-NEXT: pushl $0 +; X86-MinGW-NEXT: pushl $40 +; X86-MinGW-NEXT: calll ___morestack +; X86-MinGW-NEXT: retl +; X86-MinGW-NEXT: jmp LBB0_2 +; +; X64-FreeBSD-LABEL: test_basic: +; X64-FreeBSD: # %bb.0: +; X64-FreeBSD-NEXT: cmpq %fs:24, %rsp +; X64-FreeBSD-NEXT: jbe .LBB0_1 +; X64-FreeBSD-NEXT: .LBB0_2: +; X64-FreeBSD-NEXT: subq $40, %rsp +; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 48 +; X64-FreeBSD-NEXT: movq %rsp, %rdi +; X64-FreeBSD-NEXT: movl $10, %esi +; X64-FreeBSD-NEXT: callq dummy_use@PLT +; X64-FreeBSD-NEXT: addq $40, %rsp +; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 8 +; X64-FreeBSD-NEXT: retq +; X64-FreeBSD-NEXT: .LBB0_1: +; X64-FreeBSD-NEXT: movabsq $40, %r10 +; X64-FreeBSD-NEXT: movabsq $0, %r11 +; X64-FreeBSD-NEXT: callq __morestack +; X64-FreeBSD-NEXT: retq +; X64-FreeBSD-NEXT: jmp .LBB0_2 +; +; X86-DFlyBSD-LABEL: test_basic: +; X86-DFlyBSD: # %bb.0: +; X86-DFlyBSD-NEXT: cmpl %fs:16, %esp +; X86-DFlyBSD-NEXT: jbe .LBB0_1 +; X86-DFlyBSD-NEXT: .LBB0_2: +; X86-DFlyBSD-NEXT: subl $40, %esp +; X86-DFlyBSD-NEXT: .cfi_def_cfa_offset 44 +; X86-DFlyBSD-NEXT: movl %esp, %eax +; X86-DFlyBSD-NEXT: pushl $10 +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset 4 +; X86-DFlyBSD-NEXT: pushl %eax +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset 4 +; X86-DFlyBSD-NEXT: calll dummy_use@PLT +; X86-DFlyBSD-NEXT: addl $48, %esp +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset -48 +; X86-DFlyBSD-NEXT: retl +; X86-DFlyBSD-NEXT: .LBB0_1: +; X86-DFlyBSD-NEXT: pushl $0 +; X86-DFlyBSD-NEXT: pushl $40 +; X86-DFlyBSD-NEXT: calll __morestack +; X86-DFlyBSD-NEXT: retl +; X86-DFlyBSD-NEXT: jmp .LBB0_2 +; +; X64-DFlyBSD-LABEL: test_basic: +; X64-DFlyBSD: # %bb.0: +; X64-DFlyBSD-NEXT: cmpq %fs:32, %rsp +; X64-DFlyBSD-NEXT: jbe .LBB0_1 +; X64-DFlyBSD-NEXT: .LBB0_2: +; X64-DFlyBSD-NEXT: subq $40, %rsp +; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 48 +; X64-DFlyBSD-NEXT: movq %rsp, %rdi +; X64-DFlyBSD-NEXT: movl $10, %esi +; X64-DFlyBSD-NEXT: callq dummy_use@PLT +; X64-DFlyBSD-NEXT: addq $40, %rsp +; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 8 +; X64-DFlyBSD-NEXT: retq +; X64-DFlyBSD-NEXT: .LBB0_1: +; X64-DFlyBSD-NEXT: movabsq $40, %r10 +; X64-DFlyBSD-NEXT: movabsq $0, %r11 +; X64-DFlyBSD-NEXT: callq __morestack +; X64-DFlyBSD-NEXT: retq +; X64-DFlyBSD-NEXT: jmp .LBB0_2 +; +; X64-MinGW-LABEL: test_basic: +; X64-MinGW: # %bb.0: +; X64-MinGW-NEXT: cmpq %gs:40, %rsp +; X64-MinGW-NEXT: jbe .LBB0_1 +; X64-MinGW-NEXT: .LBB0_2: +; X64-MinGW-NEXT: subq $72, %rsp +; X64-MinGW-NEXT: .seh_stackalloc 72 +; X64-MinGW-NEXT: .seh_endprologue +; X64-MinGW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; X64-MinGW-NEXT: movl $10, %edx +; X64-MinGW-NEXT: callq dummy_use +; X64-MinGW-NEXT: nop +; X64-MinGW-NEXT: addq $72, %rsp +; X64-MinGW-NEXT: retq +; X64-MinGW-NEXT: .LBB0_1: +; X64-MinGW-NEXT: movabsq $72, %r10 +; X64-MinGW-NEXT: movabsq $32, %r11 +; X64-MinGW-NEXT: callq __morestack +; X64-MinGW-NEXT: retq +; X64-MinGW-NEXT: jmp .LBB0_2 +; X64-MinGW-NEXT: .seh_endproc %mem = alloca i32, i32 10 call void @dummy_use (i32* %mem, i32 10) ret void - -; X86-Linux-LABEL: test_basic: - -; X86-Linux: cmpl %gs:48, %esp -; X86-Linux-NEXT: jbe .LBB0_1 - -; X86-Linux: pushl $0 -; X86-Linux-NEXT: pushl $44 -; X86-Linux-NEXT: calll __morestack -; X86-Linux-NEXT: ret - -; X64-Linux-LABEL: test_basic: - -; X64-Linux: cmpq %fs:112, %rsp -; X64-Linux-NEXT: jbe .LBB0_1 - -; X64-Linux: movabsq $40, %r10 -; X64-Linux-NEXT: movabsq $0, %r11 -; X64-Linux-NEXT: callq __morestack -; X64-Linux-NEXT: ret - -; X64-Linux-Large-LABEL: test_basic: - -; X64-Linux-Large: cmpq %fs:112, %rsp -; X64-Linux-Large-NEXT: jbe .LBB0_1 - -; X64-Linux-Large: movabsq $40, %r10 -; X64-Linux-Large-NEXT: movabsq $0, %r11 -; X64-Linux-Large-NEXT: callq *__morestack_addr(%rip) -; X64-Linux-Large-NEXT: ret - -; X32ABI-LABEL: test_basic: - -; X32ABI: cmpl %fs:64, %esp -; X32ABI-NEXT: jbe .LBB0_1 - -; X32ABI: movl $40, %r10d -; X32ABI-NEXT: movl $0, %r11d -; X32ABI-NEXT: callq __morestack -; X32ABI-NEXT: ret - -; X86-Darwin-LABEL: test_basic: - -; X86-Darwin: movl $432, %ecx -; X86-Darwin-NEXT: cmpl %gs:(%ecx), %esp -; X86-Darwin-NEXT: jbe LBB0_1 - -; X86-Darwin: pushl $0 -; X86-Darwin-NEXT: pushl $60 -; X86-Darwin-NEXT: calll ___morestack -; X86-Darwin-NEXT: ret - -; X64-Darwin-LABEL: test_basic: - -; X64-Darwin: cmpq %gs:816, %rsp -; X64-Darwin-NEXT: jbe LBB0_1 - -; X64-Darwin: movabsq $40, %r10 -; X64-Darwin-NEXT: movabsq $0, %r11 -; X64-Darwin-NEXT: callq ___morestack -; X64-Darwin-NEXT: ret - -; X86-MinGW-LABEL: test_basic: - -; X86-MinGW: cmpl %fs:20, %esp -; X86-MinGW-NEXT: jbe LBB0_1 - -; X86-MinGW: pushl $0 -; X86-MinGW-NEXT: pushl $40 -; X86-MinGW-NEXT: calll ___morestack -; X86-MinGW-NEXT: ret - -; X64-MinGW-LABEL: test_basic: - -; X64-MinGW: cmpq %gs:40, %rsp -; X64-MinGW-NEXT: jbe .LBB0_1 - -; X64-MinGW: movabsq $72, %r10 -; X64-MinGW-NEXT: movabsq $32, %r11 -; X64-MinGW-NEXT: callq __morestack -; X64-MinGW-NEXT: retq - -; X64-FreeBSD-LABEL: test_basic: - -; X64-FreeBSD: cmpq %fs:24, %rsp -; X64-FreeBSD-NEXT: jbe .LBB0_1 - -; X64-FreeBSD: movabsq $40, %r10 -; X64-FreeBSD-NEXT: movabsq $0, %r11 -; X64-FreeBSD-NEXT: callq __morestack -; X64-FreeBSD-NEXT: ret - -; X86-DFlyBSD-LABEL: test_basic: - -; X86-DFlyBSD: cmpl %fs:16, %esp -; X86-DFlyBSD-NEXT: jbe .LBB0_1 - -; X86-DFlyBSD: pushl $0 -; X86-DFlyBSD-NEXT: pushl $40 -; X86-DFlyBSD-NEXT: calll __morestack -; X86-DFlyBSD-NEXT: ret - -; X64-DFlyBSD-LABEL: test_basic: - -; X64-DFlyBSD: cmpq %fs:32, %rsp -; X64-DFlyBSD-NEXT: jbe .LBB0_1 - -; X64-DFlyBSD: movabsq $40, %r10 -; X64-DFlyBSD-NEXT: movabsq $0, %r11 -; X64-DFlyBSD-NEXT: callq __morestack -; X64-DFlyBSD-NEXT: ret - } define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { +; X86-Linux-LABEL: test_nested: +; X86-Linux: # %bb.0: +; X86-Linux-NEXT: cmpl %gs:48, %esp +; X86-Linux-NEXT: jbe .LBB1_1 +; X86-Linux-NEXT: .LBB1_2: +; X86-Linux-NEXT: pushl %esi +; X86-Linux-NEXT: .cfi_def_cfa_offset 8 +; X86-Linux-NEXT: subl $40, %esp +; X86-Linux-NEXT: .cfi_def_cfa_offset 48 +; X86-Linux-NEXT: .cfi_offset %esi, -8 +; X86-Linux-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-Linux-NEXT: addl (%ecx), %esi +; X86-Linux-NEXT: subl $8, %esp +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 8 +; X86-Linux-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-Linux-NEXT: pushl $10 +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 4 +; X86-Linux-NEXT: pushl %eax +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 4 +; X86-Linux-NEXT: calll dummy_use@PLT +; X86-Linux-NEXT: addl $16, %esp +; X86-Linux-NEXT: .cfi_adjust_cfa_offset -16 +; X86-Linux-NEXT: movl %esi, %eax +; X86-Linux-NEXT: addl $40, %esp +; X86-Linux-NEXT: .cfi_def_cfa_offset 8 +; X86-Linux-NEXT: popl %esi +; X86-Linux-NEXT: .cfi_def_cfa_offset 4 +; X86-Linux-NEXT: retl +; X86-Linux-NEXT: .LBB1_1: +; X86-Linux-NEXT: .cfi_restore %esi +; X86-Linux-NEXT: pushl $4 +; X86-Linux-NEXT: pushl $44 +; X86-Linux-NEXT: calll __morestack +; X86-Linux-NEXT: retl +; X86-Linux-NEXT: jmp .LBB1_2 +; +; X64-Linux-LABEL: test_nested: +; X64-Linux: # %bb.0: +; X64-Linux-NEXT: cmpq %fs:112, %rsp +; X64-Linux-NEXT: jbe .LBB1_1 +; X64-Linux-NEXT: .LBB1_2: +; X64-Linux-NEXT: pushq %rbx +; X64-Linux-NEXT: .cfi_def_cfa_offset 16 +; X64-Linux-NEXT: subq $48, %rsp +; X64-Linux-NEXT: .cfi_def_cfa_offset 64 +; X64-Linux-NEXT: .cfi_offset %rbx, -16 +; X64-Linux-NEXT: movl %edi, %ebx +; X64-Linux-NEXT: addl (%r10), %ebx +; X64-Linux-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-Linux-NEXT: movl $10, %esi +; X64-Linux-NEXT: callq dummy_use@PLT +; X64-Linux-NEXT: movl %ebx, %eax +; X64-Linux-NEXT: addq $48, %rsp +; X64-Linux-NEXT: .cfi_def_cfa_offset 16 +; X64-Linux-NEXT: popq %rbx +; X64-Linux-NEXT: .cfi_def_cfa_offset 8 +; X64-Linux-NEXT: retq +; X64-Linux-NEXT: .LBB1_1: +; X64-Linux-NEXT: .cfi_restore %rbx +; X64-Linux-NEXT: movq %r10, %rax +; X64-Linux-NEXT: movabsq $56, %r10 +; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: callq __morestack +; X64-Linux-NEXT: retq +; X64-Linux-NEXT: movq %rax, %r10 +; X64-Linux-NEXT: jmp .LBB1_2 +; +; X64-Linux-Large-LABEL: test_nested: +; X64-Linux-Large: # %bb.0: +; X64-Linux-Large-NEXT: cmpq %fs:112, %rsp +; X64-Linux-Large-NEXT: jbe .LBB1_1 +; X64-Linux-Large-NEXT: .LBB1_2: +; X64-Linux-Large-NEXT: pushq %rbx +; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 16 +; X64-Linux-Large-NEXT: subq $48, %rsp +; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 64 +; X64-Linux-Large-NEXT: .cfi_offset %rbx, -16 +; X64-Linux-Large-NEXT: movl %edi, %ebx +; X64-Linux-Large-NEXT: addl (%r10), %ebx +; X64-Linux-Large-NEXT: movabsq $dummy_use, %rax +; X64-Linux-Large-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-Linux-Large-NEXT: movl $10, %esi +; X64-Linux-Large-NEXT: callq *%rax +; X64-Linux-Large-NEXT: movl %ebx, %eax +; X64-Linux-Large-NEXT: addq $48, %rsp +; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 16 +; X64-Linux-Large-NEXT: popq %rbx +; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 8 +; X64-Linux-Large-NEXT: retq +; X64-Linux-Large-NEXT: .LBB1_1: +; X64-Linux-Large-NEXT: .cfi_restore %rbx +; X64-Linux-Large-NEXT: movq %r10, %rax +; X64-Linux-Large-NEXT: movabsq $56, %r10 +; X64-Linux-Large-NEXT: movabsq $0, %r11 +; X64-Linux-Large-NEXT: callq *__morestack_addr(%rip) +; X64-Linux-Large-NEXT: retq +; X64-Linux-Large-NEXT: movq %rax, %r10 +; X64-Linux-Large-NEXT: jmp .LBB1_2 +; +; X32ABI-LABEL: test_nested: +; X32ABI: # %bb.0: +; X32ABI-NEXT: cmpl %fs:64, %esp +; X32ABI-NEXT: jbe .LBB1_1 +; X32ABI-NEXT: .LBB1_2: +; X32ABI-NEXT: pushq %rbx +; X32ABI-NEXT: .cfi_def_cfa_offset 16 +; X32ABI-NEXT: subl $48, %esp +; X32ABI-NEXT: .cfi_def_cfa_offset 64 +; X32ABI-NEXT: .cfi_offset %rbx, -16 +; X32ABI-NEXT: movl %edi, %ebx +; X32ABI-NEXT: addl (%r10d), %ebx +; X32ABI-NEXT: leal {{[0-9]+}}(%rsp), %edi +; X32ABI-NEXT: movl $10, %esi +; X32ABI-NEXT: callq dummy_use@PLT +; X32ABI-NEXT: movl %ebx, %eax +; X32ABI-NEXT: addl $48, %esp +; X32ABI-NEXT: .cfi_def_cfa_offset 16 +; X32ABI-NEXT: popq %rbx +; X32ABI-NEXT: .cfi_def_cfa_offset 8 +; X32ABI-NEXT: retq +; X32ABI-NEXT: .LBB1_1: +; X32ABI-NEXT: .cfi_restore %rbx +; X32ABI-NEXT: movl %r10d, %eax +; X32ABI-NEXT: movl $56, %r10d +; X32ABI-NEXT: movl $0, %r11d +; X32ABI-NEXT: callq __morestack +; X32ABI-NEXT: retq +; X32ABI-NEXT: movq %rax, %r10 +; X32ABI-NEXT: jmp .LBB1_2 +; +; X86-Darwin-LABEL: test_nested: +; X86-Darwin: ## %bb.0: +; X86-Darwin-NEXT: movl $432, %edx ## imm = 0x1B0 +; X86-Darwin-NEXT: cmpl %gs:(%edx), %esp +; X86-Darwin-NEXT: jbe LBB1_1 +; X86-Darwin-NEXT: LBB1_2: +; X86-Darwin-NEXT: pushl %esi +; X86-Darwin-NEXT: .cfi_def_cfa_offset 8 +; X86-Darwin-NEXT: subl $56, %esp +; X86-Darwin-NEXT: .cfi_def_cfa_offset 64 +; X86-Darwin-NEXT: .cfi_offset %esi, -8 +; X86-Darwin-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-Darwin-NEXT: addl (%ecx), %esi +; X86-Darwin-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-Darwin-NEXT: movl %eax, (%esp) +; X86-Darwin-NEXT: movl $10, {{[0-9]+}}(%esp) +; X86-Darwin-NEXT: calll _dummy_use +; X86-Darwin-NEXT: movl %esi, %eax +; X86-Darwin-NEXT: addl $56, %esp +; X86-Darwin-NEXT: popl %esi +; X86-Darwin-NEXT: retl +; X86-Darwin-NEXT: LBB1_1: +; X86-Darwin-NEXT: pushl $4 +; X86-Darwin-NEXT: pushl $60 +; X86-Darwin-NEXT: calll ___morestack +; X86-Darwin-NEXT: retl +; X86-Darwin-NEXT: jmp LBB1_2 +; +; X64-Darwin-LABEL: test_nested: +; X64-Darwin: ## %bb.0: +; X64-Darwin-NEXT: cmpq %gs:816, %rsp +; X64-Darwin-NEXT: jbe LBB1_1 +; X64-Darwin-NEXT: LBB1_2: +; X64-Darwin-NEXT: pushq %rbx +; X64-Darwin-NEXT: .cfi_def_cfa_offset 16 +; X64-Darwin-NEXT: subq $48, %rsp +; X64-Darwin-NEXT: .cfi_def_cfa_offset 64 +; X64-Darwin-NEXT: .cfi_offset %rbx, -16 +; X64-Darwin-NEXT: movl %edi, %ebx +; X64-Darwin-NEXT: addl (%r10), %ebx +; X64-Darwin-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-Darwin-NEXT: movl $10, %esi +; X64-Darwin-NEXT: callq _dummy_use +; X64-Darwin-NEXT: movl %ebx, %eax +; X64-Darwin-NEXT: addq $48, %rsp +; X64-Darwin-NEXT: popq %rbx +; X64-Darwin-NEXT: retq +; X64-Darwin-NEXT: LBB1_1: +; X64-Darwin-NEXT: movq %r10, %rax +; X64-Darwin-NEXT: movabsq $56, %r10 +; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: callq ___morestack +; X64-Darwin-NEXT: retq +; X64-Darwin-NEXT: movq %rax, %r10 +; X64-Darwin-NEXT: jmp LBB1_2 +; +; X86-MinGW-LABEL: test_nested: +; X86-MinGW: # %bb.0: +; X86-MinGW-NEXT: cmpl %fs:20, %esp +; X86-MinGW-NEXT: jbe LBB1_1 +; X86-MinGW-NEXT: LBB1_2: +; X86-MinGW-NEXT: pushl %esi +; X86-MinGW-NEXT: .cfi_def_cfa_offset 8 +; X86-MinGW-NEXT: subl $40, %esp +; X86-MinGW-NEXT: .cfi_def_cfa_offset 48 +; X86-MinGW-NEXT: .cfi_offset %esi, -8 +; X86-MinGW-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-MinGW-NEXT: addl (%ecx), %esi +; X86-MinGW-NEXT: movl %esp, %eax +; X86-MinGW-NEXT: pushl $10 +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset 4 +; X86-MinGW-NEXT: pushl %eax +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset 4 +; X86-MinGW-NEXT: calll _dummy_use +; X86-MinGW-NEXT: addl $8, %esp +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset -8 +; X86-MinGW-NEXT: movl %esi, %eax +; X86-MinGW-NEXT: addl $40, %esp +; X86-MinGW-NEXT: popl %esi +; X86-MinGW-NEXT: retl +; X86-MinGW-NEXT: LBB1_1: +; X86-MinGW-NEXT: .cfi_def_cfa_offset 4 +; X86-MinGW-NEXT: .cfi_restore %esi +; X86-MinGW-NEXT: pushl $4 +; X86-MinGW-NEXT: pushl $44 +; X86-MinGW-NEXT: calll ___morestack +; X86-MinGW-NEXT: retl +; X86-MinGW-NEXT: jmp LBB1_2 +; +; X64-FreeBSD-LABEL: test_nested: +; X64-FreeBSD: # %bb.0: +; X64-FreeBSD-NEXT: cmpq %fs:24, %rsp +; X64-FreeBSD-NEXT: jbe .LBB1_1 +; X64-FreeBSD-NEXT: .LBB1_2: +; X64-FreeBSD-NEXT: pushq %rbx +; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 16 +; X64-FreeBSD-NEXT: subq $48, %rsp +; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 64 +; X64-FreeBSD-NEXT: .cfi_offset %rbx, -16 +; X64-FreeBSD-NEXT: movl %edi, %ebx +; X64-FreeBSD-NEXT: addl (%r10), %ebx +; X64-FreeBSD-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-FreeBSD-NEXT: movl $10, %esi +; X64-FreeBSD-NEXT: callq dummy_use@PLT +; X64-FreeBSD-NEXT: movl %ebx, %eax +; X64-FreeBSD-NEXT: addq $48, %rsp +; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 16 +; X64-FreeBSD-NEXT: popq %rbx +; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 8 +; X64-FreeBSD-NEXT: retq +; X64-FreeBSD-NEXT: .LBB1_1: +; X64-FreeBSD-NEXT: .cfi_restore %rbx +; X64-FreeBSD-NEXT: movq %r10, %rax +; X64-FreeBSD-NEXT: movabsq $56, %r10 +; X64-FreeBSD-NEXT: movabsq $0, %r11 +; X64-FreeBSD-NEXT: callq __morestack +; X64-FreeBSD-NEXT: retq +; X64-FreeBSD-NEXT: movq %rax, %r10 +; X64-FreeBSD-NEXT: jmp .LBB1_2 +; +; X86-DFlyBSD-LABEL: test_nested: +; X86-DFlyBSD: # %bb.0: +; X86-DFlyBSD-NEXT: cmpl %fs:16, %esp +; X86-DFlyBSD-NEXT: jbe .LBB1_1 +; X86-DFlyBSD-NEXT: .LBB1_2: +; X86-DFlyBSD-NEXT: pushl %esi +; X86-DFlyBSD-NEXT: .cfi_def_cfa_offset 8 +; X86-DFlyBSD-NEXT: subl $40, %esp +; X86-DFlyBSD-NEXT: .cfi_def_cfa_offset 48 +; X86-DFlyBSD-NEXT: .cfi_offset %esi, -8 +; X86-DFlyBSD-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-DFlyBSD-NEXT: addl (%ecx), %esi +; X86-DFlyBSD-NEXT: movl %esp, %eax +; X86-DFlyBSD-NEXT: pushl $10 +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset 4 +; X86-DFlyBSD-NEXT: pushl %eax +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset 4 +; X86-DFlyBSD-NEXT: calll dummy_use@PLT +; X86-DFlyBSD-NEXT: addl $8, %esp +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset -8 +; X86-DFlyBSD-NEXT: movl %esi, %eax +; X86-DFlyBSD-NEXT: addl $40, %esp +; X86-DFlyBSD-NEXT: .cfi_def_cfa_offset 8 +; X86-DFlyBSD-NEXT: popl %esi +; X86-DFlyBSD-NEXT: .cfi_def_cfa_offset 4 +; X86-DFlyBSD-NEXT: retl +; X86-DFlyBSD-NEXT: .LBB1_1: +; X86-DFlyBSD-NEXT: .cfi_restore %esi +; X86-DFlyBSD-NEXT: pushl $4 +; X86-DFlyBSD-NEXT: pushl $44 +; X86-DFlyBSD-NEXT: calll __morestack +; X86-DFlyBSD-NEXT: retl +; X86-DFlyBSD-NEXT: jmp .LBB1_2 +; +; X64-DFlyBSD-LABEL: test_nested: +; X64-DFlyBSD: # %bb.0: +; X64-DFlyBSD-NEXT: cmpq %fs:32, %rsp +; X64-DFlyBSD-NEXT: jbe .LBB1_1 +; X64-DFlyBSD-NEXT: .LBB1_2: +; X64-DFlyBSD-NEXT: pushq %rbx +; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 16 +; X64-DFlyBSD-NEXT: subq $48, %rsp +; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 64 +; X64-DFlyBSD-NEXT: .cfi_offset %rbx, -16 +; X64-DFlyBSD-NEXT: movl %edi, %ebx +; X64-DFlyBSD-NEXT: addl (%r10), %ebx +; X64-DFlyBSD-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-DFlyBSD-NEXT: movl $10, %esi +; X64-DFlyBSD-NEXT: callq dummy_use@PLT +; X64-DFlyBSD-NEXT: movl %ebx, %eax +; X64-DFlyBSD-NEXT: addq $48, %rsp +; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 16 +; X64-DFlyBSD-NEXT: popq %rbx +; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 8 +; X64-DFlyBSD-NEXT: retq +; X64-DFlyBSD-NEXT: .LBB1_1: +; X64-DFlyBSD-NEXT: .cfi_restore %rbx +; X64-DFlyBSD-NEXT: movq %r10, %rax +; X64-DFlyBSD-NEXT: movabsq $56, %r10 +; X64-DFlyBSD-NEXT: movabsq $0, %r11 +; X64-DFlyBSD-NEXT: callq __morestack +; X64-DFlyBSD-NEXT: retq +; X64-DFlyBSD-NEXT: movq %rax, %r10 +; X64-DFlyBSD-NEXT: jmp .LBB1_2 +; +; X64-MinGW-LABEL: test_nested: +; X64-MinGW: # %bb.0: +; X64-MinGW-NEXT: cmpq %gs:40, %rsp +; X64-MinGW-NEXT: jbe .LBB1_1 +; X64-MinGW-NEXT: .LBB1_2: +; X64-MinGW-NEXT: pushq %rsi +; X64-MinGW-NEXT: .seh_pushreg %rsi +; X64-MinGW-NEXT: subq $80, %rsp +; X64-MinGW-NEXT: .seh_stackalloc 80 +; X64-MinGW-NEXT: .seh_endprologue +; X64-MinGW-NEXT: movl %ecx, %esi +; X64-MinGW-NEXT: addl (%r10), %esi +; X64-MinGW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; X64-MinGW-NEXT: movl $10, %edx +; X64-MinGW-NEXT: callq dummy_use +; X64-MinGW-NEXT: movl %esi, %eax +; X64-MinGW-NEXT: addq $80, %rsp +; X64-MinGW-NEXT: popq %rsi +; X64-MinGW-NEXT: retq +; X64-MinGW-NEXT: .LBB1_1: +; X64-MinGW-NEXT: movq %r10, %rax +; X64-MinGW-NEXT: movabsq $88, %r10 +; X64-MinGW-NEXT: movabsq $32, %r11 +; X64-MinGW-NEXT: callq __morestack +; X64-MinGW-NEXT: retq +; X64-MinGW-NEXT: movq %rax, %r10 +; X64-MinGW-NEXT: jmp .LBB1_2 +; X64-MinGW-NEXT: .seh_endproc %addend = load i32 , i32 * %closure %result = add i32 %other, %addend %mem = alloca i32, i32 10 call void @dummy_use (i32* %mem, i32 10) ret i32 %result - -; X86-Linux: cmpl %gs:48, %esp -; X86-Linux-NEXT: jbe .LBB1_1 - -; X86-Linux: pushl $4 -; X86-Linux-NEXT: pushl $44 -; X86-Linux-NEXT: calll __morestack -; X86-Linux-NEXT: ret - -; X64-Linux: cmpq %fs:112, %rsp -; X64-Linux-NEXT: jbe .LBB1_1 - -; X64-Linux: movq %r10, %rax -; X64-Linux-NEXT: movabsq $56, %r10 -; X64-Linux-NEXT: movabsq $0, %r11 -; X64-Linux-NEXT: callq __morestack -; X64-Linux-NEXT: ret -; X64-Linux-NEXT: movq %rax, %r10 - -; X32ABI: cmpl %fs:64, %esp -; X32ABI-NEXT: jbe .LBB1_1 - -; X32ABI: movl %r10d, %eax -; X32ABI-NEXT: movl $56, %r10d -; X32ABI-NEXT: movl $0, %r11d -; X32ABI-NEXT: callq __morestack -; X32ABI-NEXT: ret -; X32ABI-NEXT: movq %rax, %r10 - -; X86-Darwin: movl $432, %edx -; X86-Darwin-NEXT: cmpl %gs:(%edx), %esp -; X86-Darwin-NEXT: jbe LBB1_1 - -; X86-Darwin: pushl $4 -; X86-Darwin-NEXT: pushl $60 -; X86-Darwin-NEXT: calll ___morestack -; X86-Darwin-NEXT: ret - -; X64-Darwin: cmpq %gs:816, %rsp -; X64-Darwin-NEXT: jbe LBB1_1 - -; X64-Darwin: movq %r10, %rax -; X64-Darwin-NEXT: movabsq $56, %r10 -; X64-Darwin-NEXT: movabsq $0, %r11 -; X64-Darwin-NEXT: callq ___morestack -; X64-Darwin-NEXT: ret -; X64-Darwin-NEXT: movq %rax, %r10 - -; X86-MinGW: cmpl %fs:20, %esp -; X86-MinGW-NEXT: jbe LBB1_1 - -; X86-MinGW: pushl $4 -; X86-MinGW-NEXT: pushl $44 -; X86-MinGW-NEXT: calll ___morestack -; X86-MinGW-NEXT: ret - -; X64-MinGW-LABEL: test_nested: -; X64-MinGW: cmpq %gs:40, %rsp -; X64-MinGW-NEXT: jbe .LBB1_1 - -; X64-MinGW: movq %r10, %rax -; X64-MinGW-NEXT: movabsq $88, %r10 -; X64-MinGW-NEXT: movabsq $32, %r11 -; X64-MinGW-NEXT: callq __morestack -; X64-MinGW-NEXT: retq -; X64-MinGW-NEXT: movq %rax, %r10 - -; X64-FreeBSD: cmpq %fs:24, %rsp -; X64-FreeBSD-NEXT: jbe .LBB1_1 - -; X64-FreeBSD: movq %r10, %rax -; X64-FreeBSD-NEXT: movabsq $56, %r10 -; X64-FreeBSD-NEXT: movabsq $0, %r11 -; X64-FreeBSD-NEXT: callq __morestack -; X64-FreeBSD-NEXT: ret -; X64-FreeBSD-NEXT: movq %rax, %r10 - -; X86-DFlyBSD: cmpl %fs:16, %esp -; X86-DFlyBSD-NEXT: jbe .LBB1_1 - -; X86-DFlyBSD: pushl $4 -; X86-DFlyBSD-NEXT: pushl $44 -; X86-DFlyBSD-NEXT: calll __morestack -; X86-DFlyBSD-NEXT: ret - -; X64-DFlyBSD: cmpq %fs:32, %rsp -; X64-DFlyBSD-NEXT: jbe .LBB1_1 - -; X64-DFlyBSD: movq %r10, %rax -; X64-DFlyBSD-NEXT: movabsq $56, %r10 -; X64-DFlyBSD-NEXT: movabsq $0, %r11 -; X64-DFlyBSD-NEXT: callq __morestack -; X64-DFlyBSD-NEXT: ret -; X64-DFlyBSD-NEXT: movq %rax, %r10 - } define void @test_large() #0 { +; X86-Linux-LABEL: test_large: +; X86-Linux: # %bb.0: +; X86-Linux-NEXT: leal -{{[0-9]+}}(%esp), %ecx +; X86-Linux-NEXT: cmpl %gs:48, %ecx +; X86-Linux-NEXT: jbe .LBB2_1 +; X86-Linux-NEXT: .LBB2_2: +; X86-Linux-NEXT: subl $40020, %esp # imm = 0x9C54 +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 40020 +; X86-Linux-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-Linux-NEXT: pushl $3 +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 4 +; X86-Linux-NEXT: pushl %eax +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 4 +; X86-Linux-NEXT: calll dummy_use@PLT +; X86-Linux-NEXT: addl $40028, %esp # imm = 0x9C5C +; X86-Linux-NEXT: .cfi_adjust_cfa_offset -40028 +; X86-Linux-NEXT: retl +; X86-Linux-NEXT: .LBB2_1: +; X86-Linux-NEXT: pushl $0 +; X86-Linux-NEXT: pushl $40012 # imm = 0x9C4C +; X86-Linux-NEXT: calll __morestack +; X86-Linux-NEXT: retl +; X86-Linux-NEXT: jmp .LBB2_2 +; +; X64-Linux-LABEL: test_large: +; X64-Linux: # %bb.0: +; X64-Linux-NEXT: leaq -{{[0-9]+}}(%rsp), %r11 +; X64-Linux-NEXT: cmpq %fs:112, %r11 +; X64-Linux-NEXT: jbe .LBB2_1 +; X64-Linux-NEXT: .LBB2_2: +; X64-Linux-NEXT: subq $40008, %rsp # imm = 0x9C48 +; X64-Linux-NEXT: .cfi_def_cfa_offset 40016 +; X64-Linux-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-Linux-NEXT: movl $3, %esi +; X64-Linux-NEXT: callq dummy_use@PLT +; X64-Linux-NEXT: addq $40008, %rsp # imm = 0x9C48 +; X64-Linux-NEXT: .cfi_def_cfa_offset 8 +; X64-Linux-NEXT: retq +; X64-Linux-NEXT: .LBB2_1: +; X64-Linux-NEXT: movabsq $40008, %r10 # imm = 0x9C48 +; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: callq __morestack +; X64-Linux-NEXT: retq +; X64-Linux-NEXT: jmp .LBB2_2 +; +; X64-Linux-Large-LABEL: test_large: +; X64-Linux-Large: # %bb.0: +; X64-Linux-Large-NEXT: leaq -{{[0-9]+}}(%rsp), %r11 +; X64-Linux-Large-NEXT: cmpq %fs:112, %r11 +; X64-Linux-Large-NEXT: jbe .LBB2_1 +; X64-Linux-Large-NEXT: .LBB2_2: +; X64-Linux-Large-NEXT: subq $40008, %rsp # imm = 0x9C48 +; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 40016 +; X64-Linux-Large-NEXT: movabsq $dummy_use, %rax +; X64-Linux-Large-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-Linux-Large-NEXT: movl $3, %esi +; X64-Linux-Large-NEXT: callq *%rax +; X64-Linux-Large-NEXT: addq $40008, %rsp # imm = 0x9C48 +; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 8 +; X64-Linux-Large-NEXT: retq +; X64-Linux-Large-NEXT: .LBB2_1: +; X64-Linux-Large-NEXT: movabsq $40008, %r10 # imm = 0x9C48 +; X64-Linux-Large-NEXT: movabsq $0, %r11 +; X64-Linux-Large-NEXT: callq *__morestack_addr(%rip) +; X64-Linux-Large-NEXT: retq +; X64-Linux-Large-NEXT: jmp .LBB2_2 +; +; X32ABI-LABEL: test_large: +; X32ABI: # %bb.0: +; X32ABI-NEXT: leal -{{[0-9]+}}(%rsp), %r11d +; X32ABI-NEXT: cmpl %fs:64, %r11d +; X32ABI-NEXT: jbe .LBB2_1 +; X32ABI-NEXT: .LBB2_2: +; X32ABI-NEXT: subl $40008, %esp # imm = 0x9C48 +; X32ABI-NEXT: .cfi_def_cfa_offset 40016 +; X32ABI-NEXT: leal {{[0-9]+}}(%rsp), %edi +; X32ABI-NEXT: movl $3, %esi +; X32ABI-NEXT: callq dummy_use@PLT +; X32ABI-NEXT: addl $40008, %esp # imm = 0x9C48 +; X32ABI-NEXT: .cfi_def_cfa_offset 8 +; X32ABI-NEXT: retq +; X32ABI-NEXT: .LBB2_1: +; X32ABI-NEXT: movl $40008, %r10d # imm = 0x9C48 +; X32ABI-NEXT: movl $0, %r11d +; X32ABI-NEXT: callq __morestack +; X32ABI-NEXT: retq +; X32ABI-NEXT: jmp .LBB2_2 +; +; X86-Darwin-LABEL: test_large: +; X86-Darwin: ## %bb.0: +; X86-Darwin-NEXT: leal -{{[0-9]+}}(%esp), %ecx +; X86-Darwin-NEXT: movl $432, %eax ## imm = 0x1B0 +; X86-Darwin-NEXT: cmpl %gs:(%eax), %ecx +; X86-Darwin-NEXT: jbe LBB2_1 +; X86-Darwin-NEXT: LBB2_2: +; X86-Darwin-NEXT: subl $40012, %esp ## imm = 0x9C4C +; X86-Darwin-NEXT: .cfi_def_cfa_offset 40016 +; X86-Darwin-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-Darwin-NEXT: movl %eax, (%esp) +; X86-Darwin-NEXT: movl $3, {{[0-9]+}}(%esp) +; X86-Darwin-NEXT: calll _dummy_use +; X86-Darwin-NEXT: addl $40012, %esp ## imm = 0x9C4C +; X86-Darwin-NEXT: retl +; X86-Darwin-NEXT: LBB2_1: +; X86-Darwin-NEXT: pushl $0 +; X86-Darwin-NEXT: pushl $40012 ## imm = 0x9C4C +; X86-Darwin-NEXT: calll ___morestack +; X86-Darwin-NEXT: retl +; X86-Darwin-NEXT: jmp LBB2_2 +; +; X64-Darwin-LABEL: test_large: +; X64-Darwin: ## %bb.0: +; X64-Darwin-NEXT: leaq -{{[0-9]+}}(%rsp), %r11 +; X64-Darwin-NEXT: cmpq %gs:816, %r11 +; X64-Darwin-NEXT: jbe LBB2_1 +; X64-Darwin-NEXT: LBB2_2: +; X64-Darwin-NEXT: subq $40008, %rsp ## imm = 0x9C48 +; X64-Darwin-NEXT: .cfi_def_cfa_offset 40016 +; X64-Darwin-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-Darwin-NEXT: movl $3, %esi +; X64-Darwin-NEXT: callq _dummy_use +; X64-Darwin-NEXT: addq $40008, %rsp ## imm = 0x9C48 +; X64-Darwin-NEXT: retq +; X64-Darwin-NEXT: LBB2_1: +; X64-Darwin-NEXT: movabsq $40008, %r10 ## imm = 0x9C48 +; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: callq ___morestack +; X64-Darwin-NEXT: retq +; X64-Darwin-NEXT: jmp LBB2_2 +; +; X86-MinGW-LABEL: test_large: +; X86-MinGW: # %bb.0: +; X86-MinGW-NEXT: leal -{{[0-9]+}}(%esp), %ecx +; X86-MinGW-NEXT: cmpl %fs:20, %ecx +; X86-MinGW-NEXT: jbe LBB2_1 +; X86-MinGW-NEXT: LBB2_2: +; X86-MinGW-NEXT: movl $40000, %eax # imm = 0x9C40 +; X86-MinGW-NEXT: calll __alloca +; X86-MinGW-NEXT: .cfi_def_cfa_offset 40004 +; X86-MinGW-NEXT: movl %esp, %eax +; X86-MinGW-NEXT: pushl $3 +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset 4 +; X86-MinGW-NEXT: pushl %eax +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset 4 +; X86-MinGW-NEXT: calll _dummy_use +; X86-MinGW-NEXT: addl $40008, %esp # imm = 0x9C48 +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset -40008 +; X86-MinGW-NEXT: retl +; X86-MinGW-NEXT: LBB2_1: +; X86-MinGW-NEXT: pushl $0 +; X86-MinGW-NEXT: pushl $40000 # imm = 0x9C40 +; X86-MinGW-NEXT: calll ___morestack +; X86-MinGW-NEXT: retl +; X86-MinGW-NEXT: jmp LBB2_2 +; +; X64-FreeBSD-LABEL: test_large: +; X64-FreeBSD: # %bb.0: +; X64-FreeBSD-NEXT: leaq -{{[0-9]+}}(%rsp), %r11 +; X64-FreeBSD-NEXT: cmpq %fs:24, %r11 +; X64-FreeBSD-NEXT: jbe .LBB2_1 +; X64-FreeBSD-NEXT: .LBB2_2: +; X64-FreeBSD-NEXT: subq $40008, %rsp # imm = 0x9C48 +; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 40016 +; X64-FreeBSD-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-FreeBSD-NEXT: movl $3, %esi +; X64-FreeBSD-NEXT: callq dummy_use@PLT +; X64-FreeBSD-NEXT: addq $40008, %rsp # imm = 0x9C48 +; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 8 +; X64-FreeBSD-NEXT: retq +; X64-FreeBSD-NEXT: .LBB2_1: +; X64-FreeBSD-NEXT: movabsq $40008, %r10 # imm = 0x9C48 +; X64-FreeBSD-NEXT: movabsq $0, %r11 +; X64-FreeBSD-NEXT: callq __morestack +; X64-FreeBSD-NEXT: retq +; X64-FreeBSD-NEXT: jmp .LBB2_2 +; +; X86-DFlyBSD-LABEL: test_large: +; X86-DFlyBSD: # %bb.0: +; X86-DFlyBSD-NEXT: leal -{{[0-9]+}}(%esp), %ecx +; X86-DFlyBSD-NEXT: cmpl %fs:16, %ecx +; X86-DFlyBSD-NEXT: jbe .LBB2_1 +; X86-DFlyBSD-NEXT: .LBB2_2: +; X86-DFlyBSD-NEXT: subl $40000, %esp # imm = 0x9C40 +; X86-DFlyBSD-NEXT: .cfi_def_cfa_offset 40004 +; X86-DFlyBSD-NEXT: movl %esp, %eax +; X86-DFlyBSD-NEXT: pushl $3 +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset 4 +; X86-DFlyBSD-NEXT: pushl %eax +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset 4 +; X86-DFlyBSD-NEXT: calll dummy_use@PLT +; X86-DFlyBSD-NEXT: addl $40008, %esp # imm = 0x9C48 +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset -40008 +; X86-DFlyBSD-NEXT: retl +; X86-DFlyBSD-NEXT: .LBB2_1: +; X86-DFlyBSD-NEXT: pushl $0 +; X86-DFlyBSD-NEXT: pushl $40000 # imm = 0x9C40 +; X86-DFlyBSD-NEXT: calll __morestack +; X86-DFlyBSD-NEXT: retl +; X86-DFlyBSD-NEXT: jmp .LBB2_2 +; +; X64-DFlyBSD-LABEL: test_large: +; X64-DFlyBSD: # %bb.0: +; X64-DFlyBSD-NEXT: leaq -{{[0-9]+}}(%rsp), %r11 +; X64-DFlyBSD-NEXT: cmpq %fs:32, %r11 +; X64-DFlyBSD-NEXT: jbe .LBB2_1 +; X64-DFlyBSD-NEXT: .LBB2_2: +; X64-DFlyBSD-NEXT: subq $40008, %rsp # imm = 0x9C48 +; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 40016 +; X64-DFlyBSD-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-DFlyBSD-NEXT: movl $3, %esi +; X64-DFlyBSD-NEXT: callq dummy_use@PLT +; X64-DFlyBSD-NEXT: addq $40008, %rsp # imm = 0x9C48 +; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 8 +; X64-DFlyBSD-NEXT: retq +; X64-DFlyBSD-NEXT: .LBB2_1: +; X64-DFlyBSD-NEXT: movabsq $40008, %r10 # imm = 0x9C48 +; X64-DFlyBSD-NEXT: movabsq $0, %r11 +; X64-DFlyBSD-NEXT: callq __morestack +; X64-DFlyBSD-NEXT: retq +; X64-DFlyBSD-NEXT: jmp .LBB2_2 +; +; X64-MinGW-LABEL: test_large: +; X64-MinGW: # %bb.0: +; X64-MinGW-NEXT: leaq -{{[0-9]+}}(%rsp), %r11 +; X64-MinGW-NEXT: cmpq %gs:40, %r11 +; X64-MinGW-NEXT: jbe .LBB2_1 +; X64-MinGW-NEXT: .LBB2_2: +; X64-MinGW-NEXT: movl $40040, %eax # imm = 0x9C68 +; X64-MinGW-NEXT: callq ___chkstk_ms +; X64-MinGW-NEXT: subq %rax, %rsp +; X64-MinGW-NEXT: .seh_stackalloc 40040 +; X64-MinGW-NEXT: .seh_endprologue +; X64-MinGW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; X64-MinGW-NEXT: movl $3, %edx +; X64-MinGW-NEXT: callq dummy_use +; X64-MinGW-NEXT: nop +; X64-MinGW-NEXT: addq $40040, %rsp # imm = 0x9C68 +; X64-MinGW-NEXT: retq +; X64-MinGW-NEXT: .LBB2_1: +; X64-MinGW-NEXT: movabsq $40040, %r10 # imm = 0x9C68 +; X64-MinGW-NEXT: movabsq $32, %r11 +; X64-MinGW-NEXT: callq __morestack +; X64-MinGW-NEXT: retq +; X64-MinGW-NEXT: jmp .LBB2_2 +; X64-MinGW-NEXT: .seh_endproc %mem = alloca i32, i32 10000 call void @dummy_use (i32* %mem, i32 3) ret void - -; X86-Linux-LABEL: test_large: - -; X86-Linux: leal -40012(%esp), %ecx -; X86-Linux-NEXT: cmpl %gs:48, %ecx -; X86-Linux-NEXT: jbe .LBB2_1 - -; X86-Linux: pushl $0 -; X86-Linux-NEXT: pushl $40012 -; X86-Linux-NEXT: calll __morestack -; X86-Linux-NEXT: ret - -; X64-Linux: leaq -40008(%rsp), %r11 -; X64-Linux-NEXT: cmpq %fs:112, %r11 -; X64-Linux-NEXT: jbe .LBB2_1 - -; X64-Linux: movabsq $40008, %r10 -; X64-Linux-NEXT: movabsq $0, %r11 -; X64-Linux-NEXT: callq __morestack -; X64-Linux-NEXT: ret - -; X32ABI: leal -40008(%rsp), %r11d -; X32ABI-NEXT: cmpl %fs:64, %r11d -; X32ABI-NEXT: jbe .LBB2_1 - -; X32ABI: movl $40008, %r10d -; X32ABI-NEXT: movl $0, %r11d -; X32ABI-NEXT: callq __morestack -; X32ABI-NEXT: ret - -; X86-Darwin: leal -40012(%esp), %ecx -; X86-Darwin-NEXT: movl $432, %eax -; X86-Darwin-NEXT: cmpl %gs:(%eax), %ecx -; X86-Darwin-NEXT: jbe LBB2_1 - -; X86-Darwin: pushl $0 -; X86-Darwin-NEXT: pushl $40012 -; X86-Darwin-NEXT: calll ___morestack -; X86-Darwin-NEXT: ret - -; X64-Darwin: leaq -40008(%rsp), %r11 -; X64-Darwin-NEXT: cmpq %gs:816, %r11 -; X64-Darwin-NEXT: jbe LBB2_1 - -; X64-Darwin: movabsq $40008, %r10 -; X64-Darwin-NEXT: movabsq $0, %r11 -; X64-Darwin-NEXT: callq ___morestack -; X64-Darwin-NEXT: ret - -; X86-MinGW: leal -40000(%esp), %ecx -; X86-MinGW-NEXT: cmpl %fs:20, %ecx -; X86-MinGW-NEXT: jbe LBB2_1 - -; X86-MinGW: pushl $0 -; X86-MinGW-NEXT: pushl $40000 -; X86-MinGW-NEXT: calll ___morestack -; X86-MinGW-NEXT: ret - -; X64-MinGW-LABEL: test_large: -; X64-MinGW: leaq -40040(%rsp), %r11 -; X64-MinGW-NEXT: cmpq %gs:40, %r11 -; X64-MinGW-NEXT: jbe .LBB2_1 - -; X64-MinGW: movabsq $40040, %r10 -; X64-MinGW-NEXT: movabsq $32, %r11 -; X64-MinGW-NEXT: callq __morestack -; X64-MinGW-NEXT: retq - -; X64-FreeBSD: leaq -40008(%rsp), %r11 -; X64-FreeBSD-NEXT: cmpq %fs:24, %r11 -; X64-FreeBSD-NEXT: jbe .LBB2_1 - -; X64-FreeBSD: movabsq $40008, %r10 -; X64-FreeBSD-NEXT: movabsq $0, %r11 -; X64-FreeBSD-NEXT: callq __morestack -; X64-FreeBSD-NEXT: ret - -; X86-DFlyBSD: leal -40000(%esp), %ecx -; X86-DFlyBSD-NEXT: cmpl %fs:16, %ecx -; X86-DFlyBSD-NEXT: jbe .LBB2_1 - -; X86-DFlyBSD: pushl $0 -; X86-DFlyBSD-NEXT: pushl $40000 -; X86-DFlyBSD-NEXT: calll __morestack -; X86-DFlyBSD-NEXT: ret - -; X64-DFlyBSD: leaq -40008(%rsp), %r11 -; X64-DFlyBSD-NEXT: cmpq %fs:32, %r11 -; X64-DFlyBSD-NEXT: jbe .LBB2_1 - -; X64-DFlyBSD: movabsq $40008, %r10 -; X64-DFlyBSD-NEXT: movabsq $0, %r11 -; X64-DFlyBSD-NEXT: callq __morestack -; X64-DFlyBSD-NEXT: ret - } define fastcc void @test_fastcc() #0 { +; X86-Linux-LABEL: test_fastcc: +; X86-Linux: # %bb.0: +; X86-Linux-NEXT: cmpl %gs:48, %esp +; X86-Linux-NEXT: jbe .LBB3_1 +; X86-Linux-NEXT: .LBB3_2: +; X86-Linux-NEXT: subl $52, %esp +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 52 +; X86-Linux-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-Linux-NEXT: pushl $10 +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 4 +; X86-Linux-NEXT: pushl %eax +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 4 +; X86-Linux-NEXT: calll dummy_use@PLT +; X86-Linux-NEXT: addl $60, %esp +; X86-Linux-NEXT: .cfi_adjust_cfa_offset -60 +; X86-Linux-NEXT: retl +; X86-Linux-NEXT: .LBB3_1: +; X86-Linux-NEXT: pushl $0 +; X86-Linux-NEXT: pushl $44 +; X86-Linux-NEXT: calll __morestack +; X86-Linux-NEXT: retl +; X86-Linux-NEXT: jmp .LBB3_2 +; +; X64-Linux-LABEL: test_fastcc: +; X64-Linux: # %bb.0: +; X64-Linux-NEXT: cmpq %fs:112, %rsp +; X64-Linux-NEXT: jbe .LBB3_1 +; X64-Linux-NEXT: .LBB3_2: +; X64-Linux-NEXT: subq $40, %rsp +; X64-Linux-NEXT: .cfi_def_cfa_offset 48 +; X64-Linux-NEXT: movq %rsp, %rdi +; X64-Linux-NEXT: movl $10, %esi +; X64-Linux-NEXT: callq dummy_use@PLT +; X64-Linux-NEXT: addq $40, %rsp +; X64-Linux-NEXT: .cfi_def_cfa_offset 8 +; X64-Linux-NEXT: retq +; X64-Linux-NEXT: .LBB3_1: +; X64-Linux-NEXT: movabsq $40, %r10 +; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: callq __morestack +; X64-Linux-NEXT: retq +; X64-Linux-NEXT: jmp .LBB3_2 +; +; X64-Linux-Large-LABEL: test_fastcc: +; X64-Linux-Large: # %bb.0: +; X64-Linux-Large-NEXT: cmpq %fs:112, %rsp +; X64-Linux-Large-NEXT: jbe .LBB3_1 +; X64-Linux-Large-NEXT: .LBB3_2: +; X64-Linux-Large-NEXT: subq $40, %rsp +; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 48 +; X64-Linux-Large-NEXT: movabsq $dummy_use, %rax +; X64-Linux-Large-NEXT: movq %rsp, %rdi +; X64-Linux-Large-NEXT: movl $10, %esi +; X64-Linux-Large-NEXT: callq *%rax +; X64-Linux-Large-NEXT: addq $40, %rsp +; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 8 +; X64-Linux-Large-NEXT: retq +; X64-Linux-Large-NEXT: .LBB3_1: +; X64-Linux-Large-NEXT: movabsq $40, %r10 +; X64-Linux-Large-NEXT: movabsq $0, %r11 +; X64-Linux-Large-NEXT: callq *__morestack_addr(%rip) +; X64-Linux-Large-NEXT: retq +; X64-Linux-Large-NEXT: jmp .LBB3_2 +; +; X32ABI-LABEL: test_fastcc: +; X32ABI: # %bb.0: +; X32ABI-NEXT: cmpl %fs:64, %esp +; X32ABI-NEXT: jbe .LBB3_1 +; X32ABI-NEXT: .LBB3_2: +; X32ABI-NEXT: subl $40, %esp +; X32ABI-NEXT: .cfi_def_cfa_offset 48 +; X32ABI-NEXT: movl %esp, %edi +; X32ABI-NEXT: movl $10, %esi +; X32ABI-NEXT: callq dummy_use@PLT +; X32ABI-NEXT: addl $40, %esp +; X32ABI-NEXT: .cfi_def_cfa_offset 8 +; X32ABI-NEXT: retq +; X32ABI-NEXT: .LBB3_1: +; X32ABI-NEXT: movl $40, %r10d +; X32ABI-NEXT: movl $0, %r11d +; X32ABI-NEXT: callq __morestack +; X32ABI-NEXT: retq +; X32ABI-NEXT: jmp .LBB3_2 +; +; X86-Darwin-LABEL: test_fastcc: +; X86-Darwin: ## %bb.0: +; X86-Darwin-NEXT: movl $432, %eax ## imm = 0x1B0 +; X86-Darwin-NEXT: cmpl %gs:(%eax), %esp +; X86-Darwin-NEXT: jbe LBB3_1 +; X86-Darwin-NEXT: LBB3_2: +; X86-Darwin-NEXT: subl $60, %esp +; X86-Darwin-NEXT: .cfi_def_cfa_offset 64 +; X86-Darwin-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-Darwin-NEXT: movl %eax, (%esp) +; X86-Darwin-NEXT: movl $10, {{[0-9]+}}(%esp) +; X86-Darwin-NEXT: calll _dummy_use +; X86-Darwin-NEXT: addl $60, %esp +; X86-Darwin-NEXT: retl +; X86-Darwin-NEXT: LBB3_1: +; X86-Darwin-NEXT: pushl $0 +; X86-Darwin-NEXT: pushl $60 +; X86-Darwin-NEXT: calll ___morestack +; X86-Darwin-NEXT: retl +; X86-Darwin-NEXT: jmp LBB3_2 +; +; X64-Darwin-LABEL: test_fastcc: +; X64-Darwin: ## %bb.0: +; X64-Darwin-NEXT: cmpq %gs:816, %rsp +; X64-Darwin-NEXT: jbe LBB3_1 +; X64-Darwin-NEXT: LBB3_2: +; X64-Darwin-NEXT: subq $40, %rsp +; X64-Darwin-NEXT: .cfi_def_cfa_offset 48 +; X64-Darwin-NEXT: movq %rsp, %rdi +; X64-Darwin-NEXT: movl $10, %esi +; X64-Darwin-NEXT: callq _dummy_use +; X64-Darwin-NEXT: addq $40, %rsp +; X64-Darwin-NEXT: retq +; X64-Darwin-NEXT: LBB3_1: +; X64-Darwin-NEXT: movabsq $40, %r10 +; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: callq ___morestack +; X64-Darwin-NEXT: retq +; X64-Darwin-NEXT: jmp LBB3_2 +; +; X86-MinGW-LABEL: test_fastcc: +; X86-MinGW: # %bb.0: +; X86-MinGW-NEXT: cmpl %fs:20, %esp +; X86-MinGW-NEXT: jbe LBB3_1 +; X86-MinGW-NEXT: LBB3_2: +; X86-MinGW-NEXT: subl $40, %esp +; X86-MinGW-NEXT: .cfi_def_cfa_offset 44 +; X86-MinGW-NEXT: movl %esp, %eax +; X86-MinGW-NEXT: pushl $10 +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset 4 +; X86-MinGW-NEXT: pushl %eax +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset 4 +; X86-MinGW-NEXT: calll _dummy_use +; X86-MinGW-NEXT: addl $48, %esp +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset -48 +; X86-MinGW-NEXT: retl +; X86-MinGW-NEXT: LBB3_1: +; X86-MinGW-NEXT: pushl $0 +; X86-MinGW-NEXT: pushl $40 +; X86-MinGW-NEXT: calll ___morestack +; X86-MinGW-NEXT: retl +; X86-MinGW-NEXT: jmp LBB3_2 +; +; X64-FreeBSD-LABEL: test_fastcc: +; X64-FreeBSD: # %bb.0: +; X64-FreeBSD-NEXT: cmpq %fs:24, %rsp +; X64-FreeBSD-NEXT: jbe .LBB3_1 +; X64-FreeBSD-NEXT: .LBB3_2: +; X64-FreeBSD-NEXT: subq $40, %rsp +; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 48 +; X64-FreeBSD-NEXT: movq %rsp, %rdi +; X64-FreeBSD-NEXT: movl $10, %esi +; X64-FreeBSD-NEXT: callq dummy_use@PLT +; X64-FreeBSD-NEXT: addq $40, %rsp +; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 8 +; X64-FreeBSD-NEXT: retq +; X64-FreeBSD-NEXT: .LBB3_1: +; X64-FreeBSD-NEXT: movabsq $40, %r10 +; X64-FreeBSD-NEXT: movabsq $0, %r11 +; X64-FreeBSD-NEXT: callq __morestack +; X64-FreeBSD-NEXT: retq +; X64-FreeBSD-NEXT: jmp .LBB3_2 +; +; X86-DFlyBSD-LABEL: test_fastcc: +; X86-DFlyBSD: # %bb.0: +; X86-DFlyBSD-NEXT: cmpl %fs:16, %esp +; X86-DFlyBSD-NEXT: jbe .LBB3_1 +; X86-DFlyBSD-NEXT: .LBB3_2: +; X86-DFlyBSD-NEXT: subl $40, %esp +; X86-DFlyBSD-NEXT: .cfi_def_cfa_offset 44 +; X86-DFlyBSD-NEXT: movl %esp, %eax +; X86-DFlyBSD-NEXT: pushl $10 +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset 4 +; X86-DFlyBSD-NEXT: pushl %eax +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset 4 +; X86-DFlyBSD-NEXT: calll dummy_use@PLT +; X86-DFlyBSD-NEXT: addl $48, %esp +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset -48 +; X86-DFlyBSD-NEXT: retl +; X86-DFlyBSD-NEXT: .LBB3_1: +; X86-DFlyBSD-NEXT: pushl $0 +; X86-DFlyBSD-NEXT: pushl $40 +; X86-DFlyBSD-NEXT: calll __morestack +; X86-DFlyBSD-NEXT: retl +; X86-DFlyBSD-NEXT: jmp .LBB3_2 +; +; X64-DFlyBSD-LABEL: test_fastcc: +; X64-DFlyBSD: # %bb.0: +; X64-DFlyBSD-NEXT: cmpq %fs:32, %rsp +; X64-DFlyBSD-NEXT: jbe .LBB3_1 +; X64-DFlyBSD-NEXT: .LBB3_2: +; X64-DFlyBSD-NEXT: subq $40, %rsp +; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 48 +; X64-DFlyBSD-NEXT: movq %rsp, %rdi +; X64-DFlyBSD-NEXT: movl $10, %esi +; X64-DFlyBSD-NEXT: callq dummy_use@PLT +; X64-DFlyBSD-NEXT: addq $40, %rsp +; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 8 +; X64-DFlyBSD-NEXT: retq +; X64-DFlyBSD-NEXT: .LBB3_1: +; X64-DFlyBSD-NEXT: movabsq $40, %r10 +; X64-DFlyBSD-NEXT: movabsq $0, %r11 +; X64-DFlyBSD-NEXT: callq __morestack +; X64-DFlyBSD-NEXT: retq +; X64-DFlyBSD-NEXT: jmp .LBB3_2 +; +; X64-MinGW-LABEL: test_fastcc: +; X64-MinGW: # %bb.0: +; X64-MinGW-NEXT: cmpq %gs:40, %rsp +; X64-MinGW-NEXT: jbe .LBB3_1 +; X64-MinGW-NEXT: .LBB3_2: +; X64-MinGW-NEXT: subq $72, %rsp +; X64-MinGW-NEXT: .seh_stackalloc 72 +; X64-MinGW-NEXT: .seh_endprologue +; X64-MinGW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; X64-MinGW-NEXT: movl $10, %edx +; X64-MinGW-NEXT: callq dummy_use +; X64-MinGW-NEXT: nop +; X64-MinGW-NEXT: addq $72, %rsp +; X64-MinGW-NEXT: retq +; X64-MinGW-NEXT: .LBB3_1: +; X64-MinGW-NEXT: movabsq $72, %r10 +; X64-MinGW-NEXT: movabsq $32, %r11 +; X64-MinGW-NEXT: callq __morestack +; X64-MinGW-NEXT: retq +; X64-MinGW-NEXT: jmp .LBB3_2 +; X64-MinGW-NEXT: .seh_endproc %mem = alloca i32, i32 10 call void @dummy_use (i32* %mem, i32 10) ret void - -; X86-Linux-LABEL: test_fastcc: - -; X86-Linux: cmpl %gs:48, %esp -; X86-Linux-NEXT: jbe .LBB3_1 - -; X86-Linux: pushl $0 -; X86-Linux-NEXT: pushl $44 -; X86-Linux-NEXT: calll __morestack -; X86-Linux-NEXT: ret - -; X64-Linux-LABEL: test_fastcc: - -; X64-Linux: cmpq %fs:112, %rsp -; X64-Linux-NEXT: jbe .LBB3_1 - -; X64-Linux: movabsq $40, %r10 -; X64-Linux-NEXT: movabsq $0, %r11 -; X64-Linux-NEXT: callq __morestack -; X64-Linux-NEXT: ret - -; X32ABI-LABEL: test_fastcc: - -; X32ABI: cmpl %fs:64, %esp -; X32ABI-NEXT: jbe .LBB3_1 - -; X32ABI: movl $40, %r10d -; X32ABI-NEXT: movl $0, %r11d -; X32ABI-NEXT: callq __morestack -; X32ABI-NEXT: ret - -; X86-Darwin-LABEL: test_fastcc: - -; X86-Darwin: movl $432, %eax -; X86-Darwin-NEXT: cmpl %gs:(%eax), %esp -; X86-Darwin-NEXT: jbe LBB3_1 - -; X86-Darwin: pushl $0 -; X86-Darwin-NEXT: pushl $60 -; X86-Darwin-NEXT: calll ___morestack -; X86-Darwin-NEXT: ret - -; X64-Darwin-LABEL: test_fastcc: - -; X64-Darwin: cmpq %gs:816, %rsp -; X64-Darwin-NEXT: jbe LBB3_1 - -; X64-Darwin: movabsq $40, %r10 -; X64-Darwin-NEXT: movabsq $0, %r11 -; X64-Darwin-NEXT: callq ___morestack -; X64-Darwin-NEXT: ret - -; X86-MinGW-LABEL: test_fastcc: - -; X86-MinGW: cmpl %fs:20, %esp -; X86-MinGW-NEXT: jbe LBB3_1 - -; X86-MinGW: pushl $0 -; X86-MinGW-NEXT: pushl $40 -; X86-MinGW-NEXT: calll ___morestack -; X86-MinGW-NEXT: ret - -; X64-MinGW-LABEL: test_fastcc: - -; X64-MinGW: cmpq %gs:40, %rsp -; X64-MinGW-NEXT: jbe .LBB3_1 - -; X64-MinGW: movabsq $72, %r10 -; X64-MinGW-NEXT: movabsq $32, %r11 -; X64-MinGW-NEXT: callq __morestack -; X64-MinGW-NEXT: retq - -; X64-FreeBSD-LABEL: test_fastcc: - -; X64-FreeBSD: cmpq %fs:24, %rsp -; X64-FreeBSD-NEXT: jbe .LBB3_1 - -; X64-FreeBSD: movabsq $40, %r10 -; X64-FreeBSD-NEXT: movabsq $0, %r11 -; X64-FreeBSD-NEXT: callq __morestack -; X64-FreeBSD-NEXT: ret - -; X86-DFlyBSD-LABEL: test_fastcc: - -; X86-DFlyBSD: cmpl %fs:16, %esp -; X86-DFlyBSD-NEXT: jbe .LBB3_1 - -; X86-DFlyBSD: pushl $0 -; X86-DFlyBSD-NEXT: pushl $40 -; X86-DFlyBSD-NEXT: calll __morestack -; X86-DFlyBSD-NEXT: ret - -; X64-DFlyBSD-LABEL: test_fastcc: - -; X64-DFlyBSD: cmpq %fs:32, %rsp -; X64-DFlyBSD-NEXT: jbe .LBB3_1 - -; X64-DFlyBSD: movabsq $40, %r10 -; X64-DFlyBSD-NEXT: movabsq $0, %r11 -; X64-DFlyBSD-NEXT: callq __morestack -; X64-DFlyBSD-NEXT: ret - } define fastcc void @test_fastcc_large() #0 { +; X86-Linux-LABEL: test_fastcc_large: +; X86-Linux: # %bb.0: +; X86-Linux-NEXT: leal -{{[0-9]+}}(%esp), %eax +; X86-Linux-NEXT: cmpl %gs:48, %eax +; X86-Linux-NEXT: jbe .LBB4_1 +; X86-Linux-NEXT: .LBB4_2: +; X86-Linux-NEXT: subl $40020, %esp # imm = 0x9C54 +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 40020 +; X86-Linux-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-Linux-NEXT: pushl $3 +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 4 +; X86-Linux-NEXT: pushl %eax +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 4 +; X86-Linux-NEXT: calll dummy_use@PLT +; X86-Linux-NEXT: addl $40028, %esp # imm = 0x9C5C +; X86-Linux-NEXT: .cfi_adjust_cfa_offset -40028 +; X86-Linux-NEXT: retl +; X86-Linux-NEXT: .LBB4_1: +; X86-Linux-NEXT: pushl $0 +; X86-Linux-NEXT: pushl $40012 # imm = 0x9C4C +; X86-Linux-NEXT: calll __morestack +; X86-Linux-NEXT: retl +; X86-Linux-NEXT: jmp .LBB4_2 +; +; X64-Linux-LABEL: test_fastcc_large: +; X64-Linux: # %bb.0: +; X64-Linux-NEXT: leaq -{{[0-9]+}}(%rsp), %r11 +; X64-Linux-NEXT: cmpq %fs:112, %r11 +; X64-Linux-NEXT: jbe .LBB4_1 +; X64-Linux-NEXT: .LBB4_2: +; X64-Linux-NEXT: subq $40008, %rsp # imm = 0x9C48 +; X64-Linux-NEXT: .cfi_def_cfa_offset 40016 +; X64-Linux-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-Linux-NEXT: movl $3, %esi +; X64-Linux-NEXT: callq dummy_use@PLT +; X64-Linux-NEXT: addq $40008, %rsp # imm = 0x9C48 +; X64-Linux-NEXT: .cfi_def_cfa_offset 8 +; X64-Linux-NEXT: retq +; X64-Linux-NEXT: .LBB4_1: +; X64-Linux-NEXT: movabsq $40008, %r10 # imm = 0x9C48 +; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: callq __morestack +; X64-Linux-NEXT: retq +; X64-Linux-NEXT: jmp .LBB4_2 +; +; X64-Linux-Large-LABEL: test_fastcc_large: +; X64-Linux-Large: # %bb.0: +; X64-Linux-Large-NEXT: leaq -{{[0-9]+}}(%rsp), %r11 +; X64-Linux-Large-NEXT: cmpq %fs:112, %r11 +; X64-Linux-Large-NEXT: jbe .LBB4_1 +; X64-Linux-Large-NEXT: .LBB4_2: +; X64-Linux-Large-NEXT: subq $40008, %rsp # imm = 0x9C48 +; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 40016 +; X64-Linux-Large-NEXT: movabsq $dummy_use, %rax +; X64-Linux-Large-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-Linux-Large-NEXT: movl $3, %esi +; X64-Linux-Large-NEXT: callq *%rax +; X64-Linux-Large-NEXT: addq $40008, %rsp # imm = 0x9C48 +; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 8 +; X64-Linux-Large-NEXT: retq +; X64-Linux-Large-NEXT: .LBB4_1: +; X64-Linux-Large-NEXT: movabsq $40008, %r10 # imm = 0x9C48 +; X64-Linux-Large-NEXT: movabsq $0, %r11 +; X64-Linux-Large-NEXT: callq *__morestack_addr(%rip) +; X64-Linux-Large-NEXT: retq +; X64-Linux-Large-NEXT: jmp .LBB4_2 +; +; X32ABI-LABEL: test_fastcc_large: +; X32ABI: # %bb.0: +; X32ABI-NEXT: leal -{{[0-9]+}}(%rsp), %r11d +; X32ABI-NEXT: cmpl %fs:64, %r11d +; X32ABI-NEXT: jbe .LBB4_1 +; X32ABI-NEXT: .LBB4_2: +; X32ABI-NEXT: subl $40008, %esp # imm = 0x9C48 +; X32ABI-NEXT: .cfi_def_cfa_offset 40016 +; X32ABI-NEXT: leal {{[0-9]+}}(%rsp), %edi +; X32ABI-NEXT: movl $3, %esi +; X32ABI-NEXT: callq dummy_use@PLT +; X32ABI-NEXT: addl $40008, %esp # imm = 0x9C48 +; X32ABI-NEXT: .cfi_def_cfa_offset 8 +; X32ABI-NEXT: retq +; X32ABI-NEXT: .LBB4_1: +; X32ABI-NEXT: movl $40008, %r10d # imm = 0x9C48 +; X32ABI-NEXT: movl $0, %r11d +; X32ABI-NEXT: callq __morestack +; X32ABI-NEXT: retq +; X32ABI-NEXT: jmp .LBB4_2 +; +; X86-Darwin-LABEL: test_fastcc_large: +; X86-Darwin: ## %bb.0: +; X86-Darwin-NEXT: leal -{{[0-9]+}}(%esp), %eax +; X86-Darwin-NEXT: movl $432, %ecx ## imm = 0x1B0 +; X86-Darwin-NEXT: cmpl %gs:(%ecx), %eax +; X86-Darwin-NEXT: jbe LBB4_1 +; X86-Darwin-NEXT: LBB4_2: +; X86-Darwin-NEXT: subl $40012, %esp ## imm = 0x9C4C +; X86-Darwin-NEXT: .cfi_def_cfa_offset 40016 +; X86-Darwin-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-Darwin-NEXT: movl %eax, (%esp) +; X86-Darwin-NEXT: movl $3, {{[0-9]+}}(%esp) +; X86-Darwin-NEXT: calll _dummy_use +; X86-Darwin-NEXT: addl $40012, %esp ## imm = 0x9C4C +; X86-Darwin-NEXT: retl +; X86-Darwin-NEXT: LBB4_1: +; X86-Darwin-NEXT: pushl $0 +; X86-Darwin-NEXT: pushl $40012 ## imm = 0x9C4C +; X86-Darwin-NEXT: calll ___morestack +; X86-Darwin-NEXT: retl +; X86-Darwin-NEXT: jmp LBB4_2 +; +; X64-Darwin-LABEL: test_fastcc_large: +; X64-Darwin: ## %bb.0: +; X64-Darwin-NEXT: leaq -{{[0-9]+}}(%rsp), %r11 +; X64-Darwin-NEXT: cmpq %gs:816, %r11 +; X64-Darwin-NEXT: jbe LBB4_1 +; X64-Darwin-NEXT: LBB4_2: +; X64-Darwin-NEXT: subq $40008, %rsp ## imm = 0x9C48 +; X64-Darwin-NEXT: .cfi_def_cfa_offset 40016 +; X64-Darwin-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-Darwin-NEXT: movl $3, %esi +; X64-Darwin-NEXT: callq _dummy_use +; X64-Darwin-NEXT: addq $40008, %rsp ## imm = 0x9C48 +; X64-Darwin-NEXT: retq +; X64-Darwin-NEXT: LBB4_1: +; X64-Darwin-NEXT: movabsq $40008, %r10 ## imm = 0x9C48 +; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: callq ___morestack +; X64-Darwin-NEXT: retq +; X64-Darwin-NEXT: jmp LBB4_2 +; +; X86-MinGW-LABEL: test_fastcc_large: +; X86-MinGW: # %bb.0: +; X86-MinGW-NEXT: leal -{{[0-9]+}}(%esp), %eax +; X86-MinGW-NEXT: cmpl %fs:20, %eax +; X86-MinGW-NEXT: jbe LBB4_1 +; X86-MinGW-NEXT: LBB4_2: +; X86-MinGW-NEXT: movl $40000, %eax # imm = 0x9C40 +; X86-MinGW-NEXT: calll __alloca +; X86-MinGW-NEXT: .cfi_def_cfa_offset 40004 +; X86-MinGW-NEXT: movl %esp, %eax +; X86-MinGW-NEXT: pushl $3 +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset 4 +; X86-MinGW-NEXT: pushl %eax +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset 4 +; X86-MinGW-NEXT: calll _dummy_use +; X86-MinGW-NEXT: addl $40008, %esp # imm = 0x9C48 +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset -40008 +; X86-MinGW-NEXT: retl +; X86-MinGW-NEXT: LBB4_1: +; X86-MinGW-NEXT: pushl $0 +; X86-MinGW-NEXT: pushl $40000 # imm = 0x9C40 +; X86-MinGW-NEXT: calll ___morestack +; X86-MinGW-NEXT: retl +; X86-MinGW-NEXT: jmp LBB4_2 +; +; X64-FreeBSD-LABEL: test_fastcc_large: +; X64-FreeBSD: # %bb.0: +; X64-FreeBSD-NEXT: leaq -{{[0-9]+}}(%rsp), %r11 +; X64-FreeBSD-NEXT: cmpq %fs:24, %r11 +; X64-FreeBSD-NEXT: jbe .LBB4_1 +; X64-FreeBSD-NEXT: .LBB4_2: +; X64-FreeBSD-NEXT: subq $40008, %rsp # imm = 0x9C48 +; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 40016 +; X64-FreeBSD-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-FreeBSD-NEXT: movl $3, %esi +; X64-FreeBSD-NEXT: callq dummy_use@PLT +; X64-FreeBSD-NEXT: addq $40008, %rsp # imm = 0x9C48 +; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 8 +; X64-FreeBSD-NEXT: retq +; X64-FreeBSD-NEXT: .LBB4_1: +; X64-FreeBSD-NEXT: movabsq $40008, %r10 # imm = 0x9C48 +; X64-FreeBSD-NEXT: movabsq $0, %r11 +; X64-FreeBSD-NEXT: callq __morestack +; X64-FreeBSD-NEXT: retq +; X64-FreeBSD-NEXT: jmp .LBB4_2 +; +; X86-DFlyBSD-LABEL: test_fastcc_large: +; X86-DFlyBSD: # %bb.0: +; X86-DFlyBSD-NEXT: leal -{{[0-9]+}}(%esp), %eax +; X86-DFlyBSD-NEXT: cmpl %fs:16, %eax +; X86-DFlyBSD-NEXT: jbe .LBB4_1 +; X86-DFlyBSD-NEXT: .LBB4_2: +; X86-DFlyBSD-NEXT: subl $40000, %esp # imm = 0x9C40 +; X86-DFlyBSD-NEXT: .cfi_def_cfa_offset 40004 +; X86-DFlyBSD-NEXT: movl %esp, %eax +; X86-DFlyBSD-NEXT: pushl $3 +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset 4 +; X86-DFlyBSD-NEXT: pushl %eax +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset 4 +; X86-DFlyBSD-NEXT: calll dummy_use@PLT +; X86-DFlyBSD-NEXT: addl $40008, %esp # imm = 0x9C48 +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset -40008 +; X86-DFlyBSD-NEXT: retl +; X86-DFlyBSD-NEXT: .LBB4_1: +; X86-DFlyBSD-NEXT: pushl $0 +; X86-DFlyBSD-NEXT: pushl $40000 # imm = 0x9C40 +; X86-DFlyBSD-NEXT: calll __morestack +; X86-DFlyBSD-NEXT: retl +; X86-DFlyBSD-NEXT: jmp .LBB4_2 +; +; X64-DFlyBSD-LABEL: test_fastcc_large: +; X64-DFlyBSD: # %bb.0: +; X64-DFlyBSD-NEXT: leaq -{{[0-9]+}}(%rsp), %r11 +; X64-DFlyBSD-NEXT: cmpq %fs:32, %r11 +; X64-DFlyBSD-NEXT: jbe .LBB4_1 +; X64-DFlyBSD-NEXT: .LBB4_2: +; X64-DFlyBSD-NEXT: subq $40008, %rsp # imm = 0x9C48 +; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 40016 +; X64-DFlyBSD-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-DFlyBSD-NEXT: movl $3, %esi +; X64-DFlyBSD-NEXT: callq dummy_use@PLT +; X64-DFlyBSD-NEXT: addq $40008, %rsp # imm = 0x9C48 +; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 8 +; X64-DFlyBSD-NEXT: retq +; X64-DFlyBSD-NEXT: .LBB4_1: +; X64-DFlyBSD-NEXT: movabsq $40008, %r10 # imm = 0x9C48 +; X64-DFlyBSD-NEXT: movabsq $0, %r11 +; X64-DFlyBSD-NEXT: callq __morestack +; X64-DFlyBSD-NEXT: retq +; X64-DFlyBSD-NEXT: jmp .LBB4_2 +; +; X64-MinGW-LABEL: test_fastcc_large: +; X64-MinGW: # %bb.0: +; X64-MinGW-NEXT: leaq -{{[0-9]+}}(%rsp), %r11 +; X64-MinGW-NEXT: cmpq %gs:40, %r11 +; X64-MinGW-NEXT: jbe .LBB4_1 +; X64-MinGW-NEXT: .LBB4_2: +; X64-MinGW-NEXT: movl $40040, %eax # imm = 0x9C68 +; X64-MinGW-NEXT: callq ___chkstk_ms +; X64-MinGW-NEXT: subq %rax, %rsp +; X64-MinGW-NEXT: .seh_stackalloc 40040 +; X64-MinGW-NEXT: .seh_endprologue +; X64-MinGW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; X64-MinGW-NEXT: movl $3, %edx +; X64-MinGW-NEXT: callq dummy_use +; X64-MinGW-NEXT: nop +; X64-MinGW-NEXT: addq $40040, %rsp # imm = 0x9C68 +; X64-MinGW-NEXT: retq +; X64-MinGW-NEXT: .LBB4_1: +; X64-MinGW-NEXT: movabsq $40040, %r10 # imm = 0x9C68 +; X64-MinGW-NEXT: movabsq $32, %r11 +; X64-MinGW-NEXT: callq __morestack +; X64-MinGW-NEXT: retq +; X64-MinGW-NEXT: jmp .LBB4_2 +; X64-MinGW-NEXT: .seh_endproc %mem = alloca i32, i32 10000 call void @dummy_use (i32* %mem, i32 3) ret void - -; X86-Linux-LABEL: test_fastcc_large: - -; X86-Linux: leal -40012(%esp), %eax -; X86-Linux-NEXT: cmpl %gs:48, %eax -; X86-Linux-NEXT: jbe .LBB4_1 - -; X86-Linux: pushl $0 -; X86-Linux-NEXT: pushl $40012 -; X86-Linux-NEXT: calll __morestack -; X86-Linux-NEXT: ret - -; X64-Linux-LABEL: test_fastcc_large: - -; X64-Linux: leaq -40008(%rsp), %r11 -; X64-Linux-NEXT: cmpq %fs:112, %r11 -; X64-Linux-NEXT: jbe .LBB4_1 - -; X64-Linux: movabsq $40008, %r10 -; X64-Linux-NEXT: movabsq $0, %r11 -; X64-Linux-NEXT: callq __morestack -; X64-Linux-NEXT: ret - -; X32ABI-LABEL: test_fastcc_large: - -; X32ABI: leal -40008(%rsp), %r11d -; X32ABI-NEXT: cmpl %fs:64, %r11d -; X32ABI-NEXT: jbe .LBB4_1 - -; X32ABI: movl $40008, %r10d -; X32ABI-NEXT: movl $0, %r11d -; X32ABI-NEXT: callq __morestack -; X32ABI-NEXT: ret - -; X86-Darwin-LABEL: test_fastcc_large: - -; X86-Darwin: leal -40012(%esp), %eax -; X86-Darwin-NEXT: movl $432, %ecx -; X86-Darwin-NEXT: cmpl %gs:(%ecx), %eax -; X86-Darwin-NEXT: jbe LBB4_1 - -; X86-Darwin: pushl $0 -; X86-Darwin-NEXT: pushl $40012 -; X86-Darwin-NEXT: calll ___morestack -; X86-Darwin-NEXT: ret - -; X64-Darwin-LABEL: test_fastcc_large: - -; X64-Darwin: leaq -40008(%rsp), %r11 -; X64-Darwin-NEXT: cmpq %gs:816, %r11 -; X64-Darwin-NEXT: jbe LBB4_1 - -; X64-Darwin: movabsq $40008, %r10 -; X64-Darwin-NEXT: movabsq $0, %r11 -; X64-Darwin-NEXT: callq ___morestack -; X64-Darwin-NEXT: ret - -; X86-MinGW-LABEL: test_fastcc_large: - -; X86-MinGW: leal -40000(%esp), %eax -; X86-MinGW-NEXT: cmpl %fs:20, %eax -; X86-MinGW-NEXT: jbe LBB4_1 - -; X86-MinGW: pushl $0 -; X86-MinGW-NEXT: pushl $40000 -; X86-MinGW-NEXT: calll ___morestack -; X86-MinGW-NEXT: ret - -; X64-MinGW-LABEL: test_fastcc_large: - -; X64-MinGW: leaq -40040(%rsp), %r11 -; X64-MinGW-NEXT: cmpq %gs:40, %r11 -; X64-MinGW-NEXT: jbe .LBB4_1 - -; X64-MinGW: movabsq $40040, %r10 -; X64-MinGW-NEXT: movabsq $32, %r11 -; X64-MinGW-NEXT: callq __morestack -; X64-MinGW-NEXT: retq - -; X64-FreeBSD-LABEL: test_fastcc_large: - -; X64-FreeBSD: leaq -40008(%rsp), %r11 -; X64-FreeBSD-NEXT: cmpq %fs:24, %r11 -; X64-FreeBSD-NEXT: jbe .LBB4_1 - -; X64-FreeBSD: movabsq $40008, %r10 -; X64-FreeBSD-NEXT: movabsq $0, %r11 -; X64-FreeBSD-NEXT: callq __morestack -; X64-FreeBSD-NEXT: ret - -; X86-DFlyBSD-LABEL: test_fastcc_large: - -; X86-DFlyBSD: leal -40000(%esp), %eax -; X86-DFlyBSD-NEXT: cmpl %fs:16, %eax -; X86-DFlyBSD-NEXT: jbe .LBB4_1 - -; X86-DFlyBSD: pushl $0 -; X86-DFlyBSD-NEXT: pushl $40000 -; X86-DFlyBSD-NEXT: calll __morestack -; X86-DFlyBSD-NEXT: ret - -; X64-DFlyBSD-LABEL: test_fastcc_large: - -; X64-DFlyBSD: leaq -40008(%rsp), %r11 -; X64-DFlyBSD-NEXT: cmpq %fs:32, %r11 -; X64-DFlyBSD-NEXT: jbe .LBB4_1 - -; X64-DFlyBSD: movabsq $40008, %r10 -; X64-DFlyBSD-NEXT: movabsq $0, %r11 -; X64-DFlyBSD-NEXT: callq __morestack -; X64-DFlyBSD-NEXT: ret - } +; This is testing that the Mac implementation preserves ecx + define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) #0 { +; X86-Linux-LABEL: test_fastcc_large_with_ecx_arg: +; X86-Linux: # %bb.0: +; X86-Linux-NEXT: leal -{{[0-9]+}}(%esp), %eax +; X86-Linux-NEXT: cmpl %gs:48, %eax +; X86-Linux-NEXT: jbe .LBB5_1 +; X86-Linux-NEXT: .LBB5_2: +; X86-Linux-NEXT: subl $40020, %esp # imm = 0x9C54 +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 40020 +; X86-Linux-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-Linux-NEXT: pushl %ecx +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 4 +; X86-Linux-NEXT: pushl %eax +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 4 +; X86-Linux-NEXT: calll dummy_use@PLT +; X86-Linux-NEXT: addl $40028, %esp # imm = 0x9C5C +; X86-Linux-NEXT: .cfi_adjust_cfa_offset -40028 +; X86-Linux-NEXT: retl +; X86-Linux-NEXT: .LBB5_1: +; X86-Linux-NEXT: pushl $0 +; X86-Linux-NEXT: pushl $40012 # imm = 0x9C4C +; X86-Linux-NEXT: calll __morestack +; X86-Linux-NEXT: retl +; X86-Linux-NEXT: jmp .LBB5_2 +; +; X64-Linux-LABEL: test_fastcc_large_with_ecx_arg: +; X64-Linux: # %bb.0: +; X64-Linux-NEXT: leaq -{{[0-9]+}}(%rsp), %r11 +; X64-Linux-NEXT: cmpq %fs:112, %r11 +; X64-Linux-NEXT: jbe .LBB5_1 +; X64-Linux-NEXT: .LBB5_2: +; X64-Linux-NEXT: subq $40008, %rsp # imm = 0x9C48 +; X64-Linux-NEXT: .cfi_def_cfa_offset 40016 +; X64-Linux-NEXT: movl %edi, %esi +; X64-Linux-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-Linux-NEXT: callq dummy_use@PLT +; X64-Linux-NEXT: addq $40008, %rsp # imm = 0x9C48 +; X64-Linux-NEXT: .cfi_def_cfa_offset 8 +; X64-Linux-NEXT: retq +; X64-Linux-NEXT: .LBB5_1: +; X64-Linux-NEXT: movabsq $40008, %r10 # imm = 0x9C48 +; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: callq __morestack +; X64-Linux-NEXT: retq +; X64-Linux-NEXT: jmp .LBB5_2 +; +; X64-Linux-Large-LABEL: test_fastcc_large_with_ecx_arg: +; X64-Linux-Large: # %bb.0: +; X64-Linux-Large-NEXT: leaq -{{[0-9]+}}(%rsp), %r11 +; X64-Linux-Large-NEXT: cmpq %fs:112, %r11 +; X64-Linux-Large-NEXT: jbe .LBB5_1 +; X64-Linux-Large-NEXT: .LBB5_2: +; X64-Linux-Large-NEXT: subq $40008, %rsp # imm = 0x9C48 +; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 40016 +; X64-Linux-Large-NEXT: movl %edi, %esi +; X64-Linux-Large-NEXT: movabsq $dummy_use, %rax +; X64-Linux-Large-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-Linux-Large-NEXT: callq *%rax +; X64-Linux-Large-NEXT: addq $40008, %rsp # imm = 0x9C48 +; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 8 +; X64-Linux-Large-NEXT: retq +; X64-Linux-Large-NEXT: .LBB5_1: +; X64-Linux-Large-NEXT: movabsq $40008, %r10 # imm = 0x9C48 +; X64-Linux-Large-NEXT: movabsq $0, %r11 +; X64-Linux-Large-NEXT: callq *__morestack_addr(%rip) +; X64-Linux-Large-NEXT: retq +; X64-Linux-Large-NEXT: jmp .LBB5_2 +; +; X32ABI-LABEL: test_fastcc_large_with_ecx_arg: +; X32ABI: # %bb.0: +; X32ABI-NEXT: leal -{{[0-9]+}}(%rsp), %r11d +; X32ABI-NEXT: cmpl %fs:64, %r11d +; X32ABI-NEXT: jbe .LBB5_1 +; X32ABI-NEXT: .LBB5_2: +; X32ABI-NEXT: subl $40008, %esp # imm = 0x9C48 +; X32ABI-NEXT: .cfi_def_cfa_offset 40016 +; X32ABI-NEXT: movl %edi, %esi +; X32ABI-NEXT: leal {{[0-9]+}}(%rsp), %edi +; X32ABI-NEXT: callq dummy_use@PLT +; X32ABI-NEXT: addl $40008, %esp # imm = 0x9C48 +; X32ABI-NEXT: .cfi_def_cfa_offset 8 +; X32ABI-NEXT: retq +; X32ABI-NEXT: .LBB5_1: +; X32ABI-NEXT: movl $40008, %r10d # imm = 0x9C48 +; X32ABI-NEXT: movl $0, %r11d +; X32ABI-NEXT: callq __morestack +; X32ABI-NEXT: retq +; X32ABI-NEXT: jmp .LBB5_2 +; +; X86-Darwin-LABEL: test_fastcc_large_with_ecx_arg: +; X86-Darwin: ## %bb.0: +; X86-Darwin-NEXT: leal -{{[0-9]+}}(%esp), %eax +; X86-Darwin-NEXT: pushl %ecx +; X86-Darwin-NEXT: movl $432, %ecx ## imm = 0x1B0 +; X86-Darwin-NEXT: cmpl %gs:(%ecx), %eax +; X86-Darwin-NEXT: popl %ecx +; X86-Darwin-NEXT: jbe LBB5_1 +; X86-Darwin-NEXT: LBB5_2: +; X86-Darwin-NEXT: subl $40012, %esp ## imm = 0x9C4C +; X86-Darwin-NEXT: .cfi_def_cfa_offset 40016 +; X86-Darwin-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-Darwin-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-Darwin-NEXT: movl %eax, (%esp) +; X86-Darwin-NEXT: calll _dummy_use +; X86-Darwin-NEXT: addl $40012, %esp ## imm = 0x9C4C +; X86-Darwin-NEXT: retl +; X86-Darwin-NEXT: LBB5_1: +; X86-Darwin-NEXT: pushl $0 +; X86-Darwin-NEXT: pushl $40012 ## imm = 0x9C4C +; X86-Darwin-NEXT: calll ___morestack +; X86-Darwin-NEXT: retl +; X86-Darwin-NEXT: jmp LBB5_2 +; +; X64-Darwin-LABEL: test_fastcc_large_with_ecx_arg: +; X64-Darwin: ## %bb.0: +; X64-Darwin-NEXT: leaq -{{[0-9]+}}(%rsp), %r11 +; X64-Darwin-NEXT: cmpq %gs:816, %r11 +; X64-Darwin-NEXT: jbe LBB5_1 +; X64-Darwin-NEXT: LBB5_2: +; X64-Darwin-NEXT: subq $40008, %rsp ## imm = 0x9C48 +; X64-Darwin-NEXT: .cfi_def_cfa_offset 40016 +; X64-Darwin-NEXT: movl %edi, %esi +; X64-Darwin-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-Darwin-NEXT: callq _dummy_use +; X64-Darwin-NEXT: addq $40008, %rsp ## imm = 0x9C48 +; X64-Darwin-NEXT: retq +; X64-Darwin-NEXT: LBB5_1: +; X64-Darwin-NEXT: movabsq $40008, %r10 ## imm = 0x9C48 +; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: callq ___morestack +; X64-Darwin-NEXT: retq +; X64-Darwin-NEXT: jmp LBB5_2 +; +; X86-MinGW-LABEL: test_fastcc_large_with_ecx_arg: +; X86-MinGW: # %bb.0: +; X86-MinGW-NEXT: leal -{{[0-9]+}}(%esp), %eax +; X86-MinGW-NEXT: cmpl %fs:20, %eax +; X86-MinGW-NEXT: jbe LBB5_1 +; X86-MinGW-NEXT: LBB5_2: +; X86-MinGW-NEXT: movl $40000, %eax # imm = 0x9C40 +; X86-MinGW-NEXT: calll __alloca +; X86-MinGW-NEXT: .cfi_def_cfa_offset 40004 +; X86-MinGW-NEXT: movl %esp, %eax +; X86-MinGW-NEXT: pushl %ecx +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset 4 +; X86-MinGW-NEXT: pushl %eax +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset 4 +; X86-MinGW-NEXT: calll _dummy_use +; X86-MinGW-NEXT: addl $40008, %esp # imm = 0x9C48 +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset -40008 +; X86-MinGW-NEXT: retl +; X86-MinGW-NEXT: LBB5_1: +; X86-MinGW-NEXT: pushl $0 +; X86-MinGW-NEXT: pushl $40000 # imm = 0x9C40 +; X86-MinGW-NEXT: calll ___morestack +; X86-MinGW-NEXT: retl +; X86-MinGW-NEXT: jmp LBB5_2 +; +; X64-FreeBSD-LABEL: test_fastcc_large_with_ecx_arg: +; X64-FreeBSD: # %bb.0: +; X64-FreeBSD-NEXT: leaq -{{[0-9]+}}(%rsp), %r11 +; X64-FreeBSD-NEXT: cmpq %fs:24, %r11 +; X64-FreeBSD-NEXT: jbe .LBB5_1 +; X64-FreeBSD-NEXT: .LBB5_2: +; X64-FreeBSD-NEXT: subq $40008, %rsp # imm = 0x9C48 +; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 40016 +; X64-FreeBSD-NEXT: movl %edi, %esi +; X64-FreeBSD-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-FreeBSD-NEXT: callq dummy_use@PLT +; X64-FreeBSD-NEXT: addq $40008, %rsp # imm = 0x9C48 +; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 8 +; X64-FreeBSD-NEXT: retq +; X64-FreeBSD-NEXT: .LBB5_1: +; X64-FreeBSD-NEXT: movabsq $40008, %r10 # imm = 0x9C48 +; X64-FreeBSD-NEXT: movabsq $0, %r11 +; X64-FreeBSD-NEXT: callq __morestack +; X64-FreeBSD-NEXT: retq +; X64-FreeBSD-NEXT: jmp .LBB5_2 +; +; X86-DFlyBSD-LABEL: test_fastcc_large_with_ecx_arg: +; X86-DFlyBSD: # %bb.0: +; X86-DFlyBSD-NEXT: leal -{{[0-9]+}}(%esp), %eax +; X86-DFlyBSD-NEXT: cmpl %fs:16, %eax +; X86-DFlyBSD-NEXT: jbe .LBB5_1 +; X86-DFlyBSD-NEXT: .LBB5_2: +; X86-DFlyBSD-NEXT: subl $40000, %esp # imm = 0x9C40 +; X86-DFlyBSD-NEXT: .cfi_def_cfa_offset 40004 +; X86-DFlyBSD-NEXT: movl %esp, %eax +; X86-DFlyBSD-NEXT: pushl %ecx +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset 4 +; X86-DFlyBSD-NEXT: pushl %eax +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset 4 +; X86-DFlyBSD-NEXT: calll dummy_use@PLT +; X86-DFlyBSD-NEXT: addl $40008, %esp # imm = 0x9C48 +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset -40008 +; X86-DFlyBSD-NEXT: retl +; X86-DFlyBSD-NEXT: .LBB5_1: +; X86-DFlyBSD-NEXT: pushl $0 +; X86-DFlyBSD-NEXT: pushl $40000 # imm = 0x9C40 +; X86-DFlyBSD-NEXT: calll __morestack +; X86-DFlyBSD-NEXT: retl +; X86-DFlyBSD-NEXT: jmp .LBB5_2 +; +; X64-DFlyBSD-LABEL: test_fastcc_large_with_ecx_arg: +; X64-DFlyBSD: # %bb.0: +; X64-DFlyBSD-NEXT: leaq -{{[0-9]+}}(%rsp), %r11 +; X64-DFlyBSD-NEXT: cmpq %fs:32, %r11 +; X64-DFlyBSD-NEXT: jbe .LBB5_1 +; X64-DFlyBSD-NEXT: .LBB5_2: +; X64-DFlyBSD-NEXT: subq $40008, %rsp # imm = 0x9C48 +; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 40016 +; X64-DFlyBSD-NEXT: movl %edi, %esi +; X64-DFlyBSD-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-DFlyBSD-NEXT: callq dummy_use@PLT +; X64-DFlyBSD-NEXT: addq $40008, %rsp # imm = 0x9C48 +; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 8 +; X64-DFlyBSD-NEXT: retq +; X64-DFlyBSD-NEXT: .LBB5_1: +; X64-DFlyBSD-NEXT: movabsq $40008, %r10 # imm = 0x9C48 +; X64-DFlyBSD-NEXT: movabsq $0, %r11 +; X64-DFlyBSD-NEXT: callq __morestack +; X64-DFlyBSD-NEXT: retq +; X64-DFlyBSD-NEXT: jmp .LBB5_2 +; +; X64-MinGW-LABEL: test_fastcc_large_with_ecx_arg: +; X64-MinGW: # %bb.0: +; X64-MinGW-NEXT: leaq -{{[0-9]+}}(%rsp), %r11 +; X64-MinGW-NEXT: cmpq %gs:40, %r11 +; X64-MinGW-NEXT: jbe .LBB5_1 +; X64-MinGW-NEXT: .LBB5_2: +; X64-MinGW-NEXT: movl $40040, %eax # imm = 0x9C68 +; X64-MinGW-NEXT: callq ___chkstk_ms +; X64-MinGW-NEXT: subq %rax, %rsp +; X64-MinGW-NEXT: .seh_stackalloc 40040 +; X64-MinGW-NEXT: .seh_endprologue +; X64-MinGW-NEXT: movl %ecx, %edx +; X64-MinGW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; X64-MinGW-NEXT: callq dummy_use +; X64-MinGW-NEXT: nop +; X64-MinGW-NEXT: addq $40040, %rsp # imm = 0x9C68 +; X64-MinGW-NEXT: retq +; X64-MinGW-NEXT: .LBB5_1: +; X64-MinGW-NEXT: movabsq $40040, %r10 # imm = 0x9C68 +; X64-MinGW-NEXT: movabsq $32, %r11 +; X64-MinGW-NEXT: callq __morestack +; X64-MinGW-NEXT: retq +; X64-MinGW-NEXT: jmp .LBB5_2 +; X64-MinGW-NEXT: .seh_endproc %mem = alloca i32, i32 10000 call void @dummy_use (i32* %mem, i32 %a) ret void - -; This is testing that the Mac implementation preserves ecx - -; X86-Darwin-LABEL: test_fastcc_large_with_ecx_arg: - -; X86-Darwin: leal -40012(%esp), %eax -; X86-Darwin-NEXT: pushl %ecx -; X86-Darwin-NEXT: movl $432, %ecx -; X86-Darwin-NEXT: cmpl %gs:(%ecx), %eax -; X86-Darwin-NEXT: popl %ecx -; X86-Darwin-NEXT: jbe LBB5_1 - -; X86-Darwin: pushl $0 -; X86-Darwin-NEXT: pushl $40012 -; X86-Darwin-NEXT: calll ___morestack -; X86-Darwin-NEXT: ret - } define void @test_nostack() #0 { - ret void - ; X86-Linux-LABEL: test_nostack: -; X86-Linux-NOT: calll __morestack - +; X86-Linux: # %bb.0: +; X86-Linux-NEXT: retl +; ; X64-Linux-LABEL: test_nostack: -; X64-Linux-NOT: callq __morestack - +; X64-Linux: # %bb.0: +; X64-Linux-NEXT: retq +; +; X64-Linux-Large-LABEL: test_nostack: +; X64-Linux-Large: # %bb.0: +; X64-Linux-Large-NEXT: retq +; ; X32ABI-LABEL: test_nostack: -; X32ABI-NOT: callq __morestack - +; X32ABI: # %bb.0: +; X32ABI-NEXT: retq +; ; X86-Darwin-LABEL: test_nostack: -; X86-Darwin-NOT: calll __morestack - +; X86-Darwin: ## %bb.0: +; X86-Darwin-NEXT: retl +; ; X64-Darwin-LABEL: test_nostack: -; X64-Darwin-NOT: callq __morestack - +; X64-Darwin: ## %bb.0: +; X64-Darwin-NEXT: retq +; ; X86-MinGW-LABEL: test_nostack: -; X86-MinGW-NOT: calll __morestack - -; X64-MinGW-LABEL: test_nostack: -; X64-MinGW-NOT: callq __morestack - +; X86-MinGW: # %bb.0: +; X86-MinGW-NEXT: retl +; ; X64-FreeBSD-LABEL: test_nostack: -; X64-FreeBSD-NOT: callq __morestack - +; X64-FreeBSD: # %bb.0: +; X64-FreeBSD-NEXT: retq +; ; X86-DFlyBSD-LABEL: test_nostack: -; X86-DFlyBSD-NOT: calll __morestack - +; X86-DFlyBSD: # %bb.0: +; X86-DFlyBSD-NEXT: retl +; ; X64-DFlyBSD-LABEL: test_nostack: -; X64-DFlyBSD-NOT: callq __morestack +; X64-DFlyBSD: # %bb.0: +; X64-DFlyBSD-NEXT: retq +; +; X64-MinGW-LABEL: test_nostack: +; X64-MinGW: # %bb.0: +; X64-MinGW-NEXT: retq + ret void } define void @test_nosplitstck() { +; X86-Linux-LABEL: test_nosplitstck: +; X86-Linux: # %bb.0: +; X86-Linux-NEXT: retl +; +; X64-Linux-LABEL: test_nosplitstck: +; X64-Linux: # %bb.0: +; X64-Linux-NEXT: retq +; +; X64-Linux-Large-LABEL: test_nosplitstck: +; X64-Linux-Large: # %bb.0: +; X64-Linux-Large-NEXT: retq +; +; X32ABI-LABEL: test_nosplitstck: +; X32ABI: # %bb.0: +; X32ABI-NEXT: retq +; +; X86-Darwin-LABEL: test_nosplitstck: +; X86-Darwin: ## %bb.0: +; X86-Darwin-NEXT: retl +; +; X64-Darwin-LABEL: test_nosplitstck: +; X64-Darwin: ## %bb.0: +; X64-Darwin-NEXT: retq +; +; X86-MinGW-LABEL: test_nosplitstck: +; X86-MinGW: # %bb.0: +; X86-MinGW-NEXT: retl +; +; X64-FreeBSD-LABEL: test_nosplitstck: +; X64-FreeBSD: # %bb.0: +; X64-FreeBSD-NEXT: retq +; +; X86-DFlyBSD-LABEL: test_nosplitstck: +; X86-DFlyBSD: # %bb.0: +; X86-DFlyBSD-NEXT: retl +; +; X64-DFlyBSD-LABEL: test_nosplitstck: +; X64-DFlyBSD: # %bb.0: +; X64-DFlyBSD-NEXT: retq +; +; X64-MinGW-LABEL: test_nosplitstck: +; X64-MinGW: # %bb.0: +; X64-MinGW-NEXT: retq ret void } @@ -649,82 +1717,408 @@ define void @test_nosplitstck() { declare i32 @callee(i32) define i32 @test_sibling_call_empty_frame(i32 %x) #0 { +; X86-Linux-LABEL: test_sibling_call_empty_frame: +; X86-Linux: # %bb.0: +; X86-Linux-NEXT: cmpl %gs:48, %esp +; X86-Linux-NEXT: jbe .LBB8_1 +; X86-Linux-NEXT: # %bb.2: +; X86-Linux-NEXT: jmp callee@PLT # TAILCALL +; X86-Linux-NEXT: .LBB8_1: +; X86-Linux-NEXT: pushl $4 +; X86-Linux-NEXT: pushl $0 +; X86-Linux-NEXT: calll __morestack +; X86-Linux-NEXT: retl +; X86-Linux-NEXT: jmp callee@PLT # TAILCALL +; +; X64-Linux-LABEL: test_sibling_call_empty_frame: +; X64-Linux: # %bb.0: +; X64-Linux-NEXT: cmpq %fs:112, %rsp +; X64-Linux-NEXT: jbe .LBB8_1 +; X64-Linux-NEXT: # %bb.2: +; X64-Linux-NEXT: jmp callee@PLT # TAILCALL +; X64-Linux-NEXT: .LBB8_1: +; X64-Linux-NEXT: movabsq $0, %r10 +; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: callq __morestack +; X64-Linux-NEXT: retq +; X64-Linux-NEXT: jmp callee@PLT # TAILCALL +; +; X64-Linux-Large-LABEL: test_sibling_call_empty_frame: +; X64-Linux-Large: # %bb.0: +; X64-Linux-Large-NEXT: cmpq %fs:112, %rsp +; X64-Linux-Large-NEXT: jbe .LBB8_1 +; X64-Linux-Large-NEXT: # %bb.2: +; X64-Linux-Large-NEXT: movabsq $callee, %rax +; X64-Linux-Large-NEXT: jmpq *%rax # TAILCALL +; X64-Linux-Large-NEXT: .LBB8_1: +; X64-Linux-Large-NEXT: movabsq $0, %r10 +; X64-Linux-Large-NEXT: movabsq $0, %r11 +; X64-Linux-Large-NEXT: callq *__morestack_addr(%rip) +; X64-Linux-Large-NEXT: retq +; X64-Linux-Large-NEXT: movabsq $callee, %rax +; X64-Linux-Large-NEXT: jmpq *%rax # TAILCALL +; +; X32ABI-LABEL: test_sibling_call_empty_frame: +; X32ABI: # %bb.0: +; X32ABI-NEXT: cmpl %fs:64, %esp +; X32ABI-NEXT: jbe .LBB8_1 +; X32ABI-NEXT: # %bb.2: +; X32ABI-NEXT: jmp callee@PLT # TAILCALL +; X32ABI-NEXT: .LBB8_1: +; X32ABI-NEXT: movl $0, %r10d +; X32ABI-NEXT: movl $0, %r11d +; X32ABI-NEXT: callq __morestack +; X32ABI-NEXT: retq +; X32ABI-NEXT: jmp callee@PLT # TAILCALL +; +; X86-Darwin-LABEL: test_sibling_call_empty_frame: +; X86-Darwin: ## %bb.0: +; X86-Darwin-NEXT: movl $432, %ecx ## imm = 0x1B0 +; X86-Darwin-NEXT: cmpl %gs:(%ecx), %esp +; X86-Darwin-NEXT: jbe LBB8_1 +; X86-Darwin-NEXT: ## %bb.2: +; X86-Darwin-NEXT: jmp _callee ## TAILCALL +; X86-Darwin-NEXT: LBB8_1: +; X86-Darwin-NEXT: pushl $4 +; X86-Darwin-NEXT: pushl $0 +; X86-Darwin-NEXT: calll ___morestack +; X86-Darwin-NEXT: retl +; X86-Darwin-NEXT: jmp _callee ## TAILCALL +; +; X64-Darwin-LABEL: test_sibling_call_empty_frame: +; X64-Darwin: ## %bb.0: +; X64-Darwin-NEXT: cmpq %gs:816, %rsp +; X64-Darwin-NEXT: jbe LBB8_1 +; X64-Darwin-NEXT: ## %bb.2: +; X64-Darwin-NEXT: jmp _callee ## TAILCALL +; X64-Darwin-NEXT: LBB8_1: +; X64-Darwin-NEXT: movabsq $0, %r10 +; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: callq ___morestack +; X64-Darwin-NEXT: retq +; X64-Darwin-NEXT: jmp _callee ## TAILCALL +; +; X86-MinGW-LABEL: test_sibling_call_empty_frame: +; X86-MinGW: # %bb.0: +; X86-MinGW-NEXT: cmpl %fs:20, %esp +; X86-MinGW-NEXT: jbe LBB8_1 +; X86-MinGW-NEXT: # %bb.2: +; X86-MinGW-NEXT: jmp _callee # TAILCALL +; X86-MinGW-NEXT: LBB8_1: +; X86-MinGW-NEXT: pushl $4 +; X86-MinGW-NEXT: pushl $0 +; X86-MinGW-NEXT: calll ___morestack +; X86-MinGW-NEXT: retl +; X86-MinGW-NEXT: jmp _callee # TAILCALL +; +; X64-FreeBSD-LABEL: test_sibling_call_empty_frame: +; X64-FreeBSD: # %bb.0: +; X64-FreeBSD-NEXT: cmpq %fs:24, %rsp +; X64-FreeBSD-NEXT: jbe .LBB8_1 +; X64-FreeBSD-NEXT: # %bb.2: +; X64-FreeBSD-NEXT: jmp callee@PLT # TAILCALL +; X64-FreeBSD-NEXT: .LBB8_1: +; X64-FreeBSD-NEXT: movabsq $0, %r10 +; X64-FreeBSD-NEXT: movabsq $0, %r11 +; X64-FreeBSD-NEXT: callq __morestack +; X64-FreeBSD-NEXT: retq +; X64-FreeBSD-NEXT: jmp callee@PLT # TAILCALL +; +; X86-DFlyBSD-LABEL: test_sibling_call_empty_frame: +; X86-DFlyBSD: # %bb.0: +; X86-DFlyBSD-NEXT: cmpl %fs:16, %esp +; X86-DFlyBSD-NEXT: jbe .LBB8_1 +; X86-DFlyBSD-NEXT: # %bb.2: +; X86-DFlyBSD-NEXT: jmp callee@PLT # TAILCALL +; X86-DFlyBSD-NEXT: .LBB8_1: +; X86-DFlyBSD-NEXT: pushl $4 +; X86-DFlyBSD-NEXT: pushl $0 +; X86-DFlyBSD-NEXT: calll __morestack +; X86-DFlyBSD-NEXT: retl +; X86-DFlyBSD-NEXT: jmp callee@PLT # TAILCALL +; +; X64-DFlyBSD-LABEL: test_sibling_call_empty_frame: +; X64-DFlyBSD: # %bb.0: +; X64-DFlyBSD-NEXT: cmpq %fs:32, %rsp +; X64-DFlyBSD-NEXT: jbe .LBB8_1 +; X64-DFlyBSD-NEXT: # %bb.2: +; X64-DFlyBSD-NEXT: jmp callee@PLT # TAILCALL +; X64-DFlyBSD-NEXT: .LBB8_1: +; X64-DFlyBSD-NEXT: movabsq $0, %r10 +; X64-DFlyBSD-NEXT: movabsq $0, %r11 +; X64-DFlyBSD-NEXT: callq __morestack +; X64-DFlyBSD-NEXT: retq +; X64-DFlyBSD-NEXT: jmp callee@PLT # TAILCALL +; +; X64-MinGW-LABEL: test_sibling_call_empty_frame: +; X64-MinGW: # %bb.0: +; X64-MinGW-NEXT: cmpq %gs:40, %rsp +; X64-MinGW-NEXT: jbe .LBB8_1 +; X64-MinGW-NEXT: # %bb.2: +; X64-MinGW-NEXT: jmp callee # TAILCALL +; X64-MinGW-NEXT: .LBB8_1: +; X64-MinGW-NEXT: movabsq $0, %r10 +; X64-MinGW-NEXT: movabsq $32, %r11 +; X64-MinGW-NEXT: callq __morestack +; X64-MinGW-NEXT: retq +; X64-MinGW-NEXT: jmp callee # TAILCALL %call = tail call i32 @callee(i32 %x) #0 ret i32 %call - -; X86-Linux-LABEL: test_sibling_call_empty_frame: -; X86-Linux: calll __morestack - -; X64-Linux-LABEL: test_sibling_call_empty_frame: -; X64-Linux: callq __morestack - -; X64-Linux-Large-LABEL: test_sibling_call_empty_frame: -; X64-Linux-Large: callq *__morestack_addr(%rip) - -; X32ABI-LABEL: test_sibling_call_empty_frame: -; X32ABI: callq __morestack - -; X86-Darwin-LABEL: test_sibling_call_empty_frame: -; X86-Darwin: calll ___morestack - -; X64-Darwin-LABEL: test_sibling_call_empty_frame: -; X64-Darwin: callq ___morestack - -; X86-MinGW-LABEL: test_sibling_call_empty_frame: -; X86-MinGW: calll ___morestack - -; X64-MinGW-LABEL: test_sibling_call_empty_frame: -; X64-MinGW: callq __morestack - -; X64-FreeBSD-LABEL: test_sibling_call_empty_frame: -; X64-FreeBSD: callq __morestack - -; X86-DFlyBSD-LABEL: test_sibling_call_empty_frame: -; X86-DFlyBSD: calll __morestack -; X86-DFlyBSD-NEXT: ret - -; X64-DFlyBSD-LABEL: test_sibling_call_empty_frame: -; X64-DFlyBSD: callq __morestack - } ; Test that unused nested argument doesn't need saving/restoring. define i32 @test_nested_unused(i32 * nest %unused) #0 { - %mem = alloca i32, i32 10 - call void @dummy_use (i32* %mem, i32 10) - ret i32 123 - +; X86-Linux-LABEL: test_nested_unused: +; X86-Linux: # %bb.0: +; X86-Linux-NEXT: cmpl %gs:48, %esp +; X86-Linux-NEXT: jbe .LBB9_1 +; X86-Linux-NEXT: .LBB9_2: +; X86-Linux-NEXT: subl $52, %esp +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 52 +; X86-Linux-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-Linux-NEXT: pushl $10 +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 4 +; X86-Linux-NEXT: pushl %eax +; X86-Linux-NEXT: .cfi_adjust_cfa_offset 4 +; X86-Linux-NEXT: calll dummy_use@PLT +; X86-Linux-NEXT: addl $16, %esp +; X86-Linux-NEXT: .cfi_adjust_cfa_offset -16 +; X86-Linux-NEXT: movl $123, %eax +; X86-Linux-NEXT: addl $44, %esp +; X86-Linux-NEXT: .cfi_def_cfa_offset 4 +; X86-Linux-NEXT: retl +; X86-Linux-NEXT: .LBB9_1: +; X86-Linux-NEXT: pushl $0 +; X86-Linux-NEXT: pushl $44 +; X86-Linux-NEXT: calll __morestack +; X86-Linux-NEXT: retl +; X86-Linux-NEXT: jmp .LBB9_2 +; ; X64-Linux-LABEL: test_nested_unused: -; X64-Linux-NOT: movq %r10, %rax -; X64-Linux: callq __morestack -; X64-Linux-NOT: movq %rax, %r10 - +; X64-Linux: # %bb.0: +; X64-Linux-NEXT: cmpq %fs:112, %rsp +; X64-Linux-NEXT: jbe .LBB9_1 +; X64-Linux-NEXT: .LBB9_2: +; X64-Linux-NEXT: subq $40, %rsp +; X64-Linux-NEXT: .cfi_def_cfa_offset 48 +; X64-Linux-NEXT: movq %rsp, %rdi +; X64-Linux-NEXT: movl $10, %esi +; X64-Linux-NEXT: callq dummy_use@PLT +; X64-Linux-NEXT: movl $123, %eax +; X64-Linux-NEXT: addq $40, %rsp +; X64-Linux-NEXT: .cfi_def_cfa_offset 8 +; X64-Linux-NEXT: retq +; X64-Linux-NEXT: .LBB9_1: +; X64-Linux-NEXT: movabsq $40, %r10 +; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: callq __morestack +; X64-Linux-NEXT: retq +; X64-Linux-NEXT: jmp .LBB9_2 +; +; X64-Linux-Large-LABEL: test_nested_unused: +; X64-Linux-Large: # %bb.0: +; X64-Linux-Large-NEXT: cmpq %fs:112, %rsp +; X64-Linux-Large-NEXT: jbe .LBB9_1 +; X64-Linux-Large-NEXT: .LBB9_2: +; X64-Linux-Large-NEXT: subq $40, %rsp +; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 48 +; X64-Linux-Large-NEXT: movabsq $dummy_use, %rax +; X64-Linux-Large-NEXT: movq %rsp, %rdi +; X64-Linux-Large-NEXT: movl $10, %esi +; X64-Linux-Large-NEXT: callq *%rax +; X64-Linux-Large-NEXT: movl $123, %eax +; X64-Linux-Large-NEXT: addq $40, %rsp +; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 8 +; X64-Linux-Large-NEXT: retq +; X64-Linux-Large-NEXT: .LBB9_1: +; X64-Linux-Large-NEXT: movabsq $40, %r10 +; X64-Linux-Large-NEXT: movabsq $0, %r11 +; X64-Linux-Large-NEXT: callq *__morestack_addr(%rip) +; X64-Linux-Large-NEXT: retq +; X64-Linux-Large-NEXT: jmp .LBB9_2 +; +; X32ABI-LABEL: test_nested_unused: +; X32ABI: # %bb.0: +; X32ABI-NEXT: cmpl %fs:64, %esp +; X32ABI-NEXT: jbe .LBB9_1 +; X32ABI-NEXT: .LBB9_2: +; X32ABI-NEXT: subl $40, %esp +; X32ABI-NEXT: .cfi_def_cfa_offset 48 +; X32ABI-NEXT: movl %esp, %edi +; X32ABI-NEXT: movl $10, %esi +; X32ABI-NEXT: callq dummy_use@PLT +; X32ABI-NEXT: movl $123, %eax +; X32ABI-NEXT: addl $40, %esp +; X32ABI-NEXT: .cfi_def_cfa_offset 8 +; X32ABI-NEXT: retq +; X32ABI-NEXT: .LBB9_1: +; X32ABI-NEXT: movl $40, %r10d +; X32ABI-NEXT: movl $0, %r11d +; X32ABI-NEXT: callq __morestack +; X32ABI-NEXT: retq +; X32ABI-NEXT: jmp .LBB9_2 +; +; X86-Darwin-LABEL: test_nested_unused: +; X86-Darwin: ## %bb.0: +; X86-Darwin-NEXT: movl $432, %ecx ## imm = 0x1B0 +; X86-Darwin-NEXT: cmpl %gs:(%ecx), %esp +; X86-Darwin-NEXT: jbe LBB9_1 +; X86-Darwin-NEXT: LBB9_2: +; X86-Darwin-NEXT: subl $60, %esp +; X86-Darwin-NEXT: .cfi_def_cfa_offset 64 +; X86-Darwin-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-Darwin-NEXT: movl %eax, (%esp) +; X86-Darwin-NEXT: movl $10, {{[0-9]+}}(%esp) +; X86-Darwin-NEXT: calll _dummy_use +; X86-Darwin-NEXT: movl $123, %eax +; X86-Darwin-NEXT: addl $60, %esp +; X86-Darwin-NEXT: retl +; X86-Darwin-NEXT: LBB9_1: +; X86-Darwin-NEXT: pushl $0 +; X86-Darwin-NEXT: pushl $60 +; X86-Darwin-NEXT: calll ___morestack +; X86-Darwin-NEXT: retl +; X86-Darwin-NEXT: jmp LBB9_2 +; ; X64-Darwin-LABEL: test_nested_unused: -; X64-Darwin-NOT: movq %r10, %rax -; X64-Darwin: callq ___morestack -; X64-Darwin-NOT: movq %rax, %r10 - +; X64-Darwin: ## %bb.0: +; X64-Darwin-NEXT: cmpq %gs:816, %rsp +; X64-Darwin-NEXT: jbe LBB9_1 +; X64-Darwin-NEXT: LBB9_2: +; X64-Darwin-NEXT: subq $40, %rsp +; X64-Darwin-NEXT: .cfi_def_cfa_offset 48 +; X64-Darwin-NEXT: movq %rsp, %rdi +; X64-Darwin-NEXT: movl $10, %esi +; X64-Darwin-NEXT: callq _dummy_use +; X64-Darwin-NEXT: movl $123, %eax +; X64-Darwin-NEXT: addq $40, %rsp +; X64-Darwin-NEXT: retq +; X64-Darwin-NEXT: LBB9_1: +; X64-Darwin-NEXT: movabsq $40, %r10 +; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: callq ___morestack +; X64-Darwin-NEXT: retq +; X64-Darwin-NEXT: jmp LBB9_2 +; +; X86-MinGW-LABEL: test_nested_unused: +; X86-MinGW: # %bb.0: +; X86-MinGW-NEXT: cmpl %fs:20, %esp +; X86-MinGW-NEXT: jbe LBB9_1 +; X86-MinGW-NEXT: LBB9_2: +; X86-MinGW-NEXT: subl $40, %esp +; X86-MinGW-NEXT: .cfi_def_cfa_offset 44 +; X86-MinGW-NEXT: movl %esp, %eax +; X86-MinGW-NEXT: pushl $10 +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset 4 +; X86-MinGW-NEXT: pushl %eax +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset 4 +; X86-MinGW-NEXT: calll _dummy_use +; X86-MinGW-NEXT: addl $8, %esp +; X86-MinGW-NEXT: .cfi_adjust_cfa_offset -8 +; X86-MinGW-NEXT: movl $123, %eax +; X86-MinGW-NEXT: addl $40, %esp +; X86-MinGW-NEXT: retl +; X86-MinGW-NEXT: LBB9_1: +; X86-MinGW-NEXT: .cfi_def_cfa_offset 4 +; X86-MinGW-NEXT: pushl $0 +; X86-MinGW-NEXT: pushl $40 +; X86-MinGW-NEXT: calll ___morestack +; X86-MinGW-NEXT: retl +; X86-MinGW-NEXT: jmp LBB9_2 +; ; X64-FreeBSD-LABEL: test_nested_unused: -; X64-FreeBSD-NOT: movq %r10, %rax -; X64-FreeBSD: callq __morestack -; X64-FreeBSD-NOT: movq %rax, %r10 - +; X64-FreeBSD: # %bb.0: +; X64-FreeBSD-NEXT: cmpq %fs:24, %rsp +; X64-FreeBSD-NEXT: jbe .LBB9_1 +; X64-FreeBSD-NEXT: .LBB9_2: +; X64-FreeBSD-NEXT: subq $40, %rsp +; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 48 +; X64-FreeBSD-NEXT: movq %rsp, %rdi +; X64-FreeBSD-NEXT: movl $10, %esi +; X64-FreeBSD-NEXT: callq dummy_use@PLT +; X64-FreeBSD-NEXT: movl $123, %eax +; X64-FreeBSD-NEXT: addq $40, %rsp +; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 8 +; X64-FreeBSD-NEXT: retq +; X64-FreeBSD-NEXT: .LBB9_1: +; X64-FreeBSD-NEXT: movabsq $40, %r10 +; X64-FreeBSD-NEXT: movabsq $0, %r11 +; X64-FreeBSD-NEXT: callq __morestack +; X64-FreeBSD-NEXT: retq +; X64-FreeBSD-NEXT: jmp .LBB9_2 +; +; X86-DFlyBSD-LABEL: test_nested_unused: +; X86-DFlyBSD: # %bb.0: +; X86-DFlyBSD-NEXT: cmpl %fs:16, %esp +; X86-DFlyBSD-NEXT: jbe .LBB9_1 +; X86-DFlyBSD-NEXT: .LBB9_2: +; X86-DFlyBSD-NEXT: subl $40, %esp +; X86-DFlyBSD-NEXT: .cfi_def_cfa_offset 44 +; X86-DFlyBSD-NEXT: movl %esp, %eax +; X86-DFlyBSD-NEXT: pushl $10 +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset 4 +; X86-DFlyBSD-NEXT: pushl %eax +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset 4 +; X86-DFlyBSD-NEXT: calll dummy_use@PLT +; X86-DFlyBSD-NEXT: addl $8, %esp +; X86-DFlyBSD-NEXT: .cfi_adjust_cfa_offset -8 +; X86-DFlyBSD-NEXT: movl $123, %eax +; X86-DFlyBSD-NEXT: addl $40, %esp +; X86-DFlyBSD-NEXT: .cfi_def_cfa_offset 4 +; X86-DFlyBSD-NEXT: retl +; X86-DFlyBSD-NEXT: .LBB9_1: +; X86-DFlyBSD-NEXT: pushl $0 +; X86-DFlyBSD-NEXT: pushl $40 +; X86-DFlyBSD-NEXT: calll __morestack +; X86-DFlyBSD-NEXT: retl +; X86-DFlyBSD-NEXT: jmp .LBB9_2 +; ; X64-DFlyBSD-LABEL: test_nested_unused: -; X64-DFlyBSD-NOT: movq %r10, %rax -; X64-DFlyBSD: callq __morestack -; X64-DFlyBSD-NOT: movq %rax, %r10 - +; X64-DFlyBSD: # %bb.0: +; X64-DFlyBSD-NEXT: cmpq %fs:32, %rsp +; X64-DFlyBSD-NEXT: jbe .LBB9_1 +; X64-DFlyBSD-NEXT: .LBB9_2: +; X64-DFlyBSD-NEXT: subq $40, %rsp +; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 48 +; X64-DFlyBSD-NEXT: movq %rsp, %rdi +; X64-DFlyBSD-NEXT: movl $10, %esi +; X64-DFlyBSD-NEXT: callq dummy_use@PLT +; X64-DFlyBSD-NEXT: movl $123, %eax +; X64-DFlyBSD-NEXT: addq $40, %rsp +; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 8 +; X64-DFlyBSD-NEXT: retq +; X64-DFlyBSD-NEXT: .LBB9_1: +; X64-DFlyBSD-NEXT: movabsq $40, %r10 +; X64-DFlyBSD-NEXT: movabsq $0, %r11 +; X64-DFlyBSD-NEXT: callq __morestack +; X64-DFlyBSD-NEXT: retq +; X64-DFlyBSD-NEXT: jmp .LBB9_2 +; ; X64-MinGW-LABEL: test_nested_unused: -; X64-MinGW-NOT: movq %r10, %rax -; X64-MinGW: callq __morestack -; X64-MinGW-NOT: movq %rax, %r10 - -; X32ABI-LABEL: test_nested_unused: -; X32ABI-NOT: movl %r10d, %eax -; X32ABI: callq __morestack -; X32ABI-NOT: movq %rax, %r10 - +; X64-MinGW: # %bb.0: +; X64-MinGW-NEXT: cmpq %gs:40, %rsp +; X64-MinGW-NEXT: jbe .LBB9_1 +; X64-MinGW-NEXT: .LBB9_2: +; X64-MinGW-NEXT: subq $72, %rsp +; X64-MinGW-NEXT: .seh_stackalloc 72 +; X64-MinGW-NEXT: .seh_endprologue +; X64-MinGW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; X64-MinGW-NEXT: movl $10, %edx +; X64-MinGW-NEXT: callq dummy_use +; X64-MinGW-NEXT: movl $123, %eax +; X64-MinGW-NEXT: addq $72, %rsp +; X64-MinGW-NEXT: retq +; X64-MinGW-NEXT: .LBB9_1: +; X64-MinGW-NEXT: movabsq $72, %r10 +; X64-MinGW-NEXT: movabsq $32, %r11 +; X64-MinGW-NEXT: callq __morestack +; X64-MinGW-NEXT: retq +; X64-MinGW-NEXT: jmp .LBB9_2 +; X64-MinGW-NEXT: .seh_endproc + %mem = alloca i32, i32 10 + call void @dummy_use (i32* %mem, i32 10) + ret i32 123 } attributes #0 = { "split-stack" } From a390c9905d4d1e7a7437fc1ab57f720c06618d79 Mon Sep 17 00:00:00 2001 From: Erik Desjardins Date: Mon, 3 Jan 2022 11:09:50 -0800 Subject: [PATCH 444/992] [X86] Improve selection of the mov instruction in FrameLowering MOV64ri results in a significantly longer encoding, and use of this operator is fairly avoidable as we can always check the size of the immediate we're using. This is an updated version of D99045. Co-authored-by: Simonas Kazlauskas Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D116458 --- llvm/lib/Target/X86/X86FrameLowering.cpp | 43 ++-- .../CodeGen/X86/segmented-stacks-dynamic.ll | 4 +- llvm/test/CodeGen/X86/segmented-stacks.ll | 192 +++++++++--------- 3 files changed, 120 insertions(+), 119 deletions(-) diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 0a7aea467809..82cd060f1c7f 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -149,6 +149,17 @@ static unsigned getLEArOpcode(bool IsLP64) { return IsLP64 ? X86::LEA64r : X86::LEA32r; } +static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) { + if (Use64BitReg) { + if (isUInt<32>(Imm)) + return X86::MOV32ri64; + if (isInt<32>(Imm)) + return X86::MOV64ri32; + return X86::MOV64ri; + } + return X86::MOV32ri; +} + static bool isEAXLiveIn(MachineBasicBlock &MBB) { for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) { unsigned Reg = RegMask.PhysReg; @@ -237,11 +248,10 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, else Reg = TRI->findDeadCallerSavedReg(MBB, MBBI); - unsigned MovRIOpc = Is64Bit ? X86::MOV64ri : X86::MOV32ri; unsigned AddSubRROpc = isSub ? getSUBrrOpcode(Is64Bit) : getADDrrOpcode(Is64Bit); if (Reg) { - BuildMI(MBB, MBBI, DL, TII.get(MovRIOpc), Reg) + BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Offset)), Reg) .addImm(Offset) .setMIFlag(Flag); MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr) @@ -267,7 +277,7 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, Offset = -(Offset - SlotSize); else Offset = Offset + SlotSize; - BuildMI(MBB, MBBI, DL, TII.get(MovRIOpc), Rax) + BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Offset)), Rax) .addImm(Offset) .setMIFlag(Flag); MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax) @@ -1705,19 +1715,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Handle the 64-bit Windows ABI case where we need to call __chkstk. // Function prologue is responsible for adjusting the stack pointer. int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes; - if (isUInt<32>(Alloc)) { - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(Alloc) - .setMIFlag(MachineInstr::FrameSetup); - } else if (isInt<32>(Alloc)) { - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri32), X86::RAX) - .addImm(Alloc) - .setMIFlag(MachineInstr::FrameSetup); - } else { - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) - .addImm(Alloc) - .setMIFlag(MachineInstr::FrameSetup); - } + BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX) + .addImm(Alloc) + .setMIFlag(MachineInstr::FrameSetup); } else { // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. // We'll also use 4 already allocated bytes for EAX. @@ -2944,15 +2944,16 @@ void X86FrameLowering::adjustForSegmentedStacks( const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D; const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D; const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr; - const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri; if (IsNested) BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10); - BuildMI(allocMBB, DL, TII.get(MOVri), Reg10) - .addImm(StackSize); - BuildMI(allocMBB, DL, TII.get(MOVri), Reg11) - .addImm(X86FI->getArgumentStackSize()); + BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10) + .addImm(StackSize); + BuildMI(allocMBB, DL, + TII.get(getMOVriOpcode(IsLP64, X86FI->getArgumentStackSize())), + Reg11) + .addImm(X86FI->getArgumentStackSize()); } else { BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) .addImm(X86FI->getArgumentStackSize()); diff --git a/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll b/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll index 4f9c2d77d171..60e0092060fd 100644 --- a/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll +++ b/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll @@ -118,8 +118,8 @@ define i32 @test_basic(i32 %l) #0 { ; X64-NEXT: .LBB0_1: ; X64-NEXT: .cfi_restore %rbx ; X64-NEXT: .cfi_restore %rbp -; X64-NEXT: movabsq $24, %r10 -; X64-NEXT: movabsq $0, %r11 +; X64-NEXT: movl $24, %r10d +; X64-NEXT: movl $0, %r11d ; X64-NEXT: callq __morestack ; X64-NEXT: retq ; X64-NEXT: jmp .LBB0_2 diff --git a/llvm/test/CodeGen/X86/segmented-stacks.ll b/llvm/test/CodeGen/X86/segmented-stacks.ll index 627825c78e27..f9dcc9faf831 100644 --- a/llvm/test/CodeGen/X86/segmented-stacks.ll +++ b/llvm/test/CodeGen/X86/segmented-stacks.ll @@ -72,8 +72,8 @@ define void @test_basic() #0 { ; X64-Linux-NEXT: .cfi_def_cfa_offset 8 ; X64-Linux-NEXT: retq ; X64-Linux-NEXT: .LBB0_1: -; X64-Linux-NEXT: movabsq $40, %r10 -; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: movl $40, %r10d +; X64-Linux-NEXT: movl $0, %r11d ; X64-Linux-NEXT: callq __morestack ; X64-Linux-NEXT: retq ; X64-Linux-NEXT: jmp .LBB0_2 @@ -93,8 +93,8 @@ define void @test_basic() #0 { ; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 8 ; X64-Linux-Large-NEXT: retq ; X64-Linux-Large-NEXT: .LBB0_1: -; X64-Linux-Large-NEXT: movabsq $40, %r10 -; X64-Linux-Large-NEXT: movabsq $0, %r11 +; X64-Linux-Large-NEXT: movl $40, %r10d +; X64-Linux-Large-NEXT: movl $0, %r11d ; X64-Linux-Large-NEXT: callq *__morestack_addr(%rip) ; X64-Linux-Large-NEXT: retq ; X64-Linux-Large-NEXT: jmp .LBB0_2 @@ -153,8 +153,8 @@ define void @test_basic() #0 { ; X64-Darwin-NEXT: addq $40, %rsp ; X64-Darwin-NEXT: retq ; X64-Darwin-NEXT: LBB0_1: -; X64-Darwin-NEXT: movabsq $40, %r10 -; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: movl $40, %r10d +; X64-Darwin-NEXT: movl $0, %r11d ; X64-Darwin-NEXT: callq ___morestack ; X64-Darwin-NEXT: retq ; X64-Darwin-NEXT: jmp LBB0_2 @@ -196,8 +196,8 @@ define void @test_basic() #0 { ; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 8 ; X64-FreeBSD-NEXT: retq ; X64-FreeBSD-NEXT: .LBB0_1: -; X64-FreeBSD-NEXT: movabsq $40, %r10 -; X64-FreeBSD-NEXT: movabsq $0, %r11 +; X64-FreeBSD-NEXT: movl $40, %r10d +; X64-FreeBSD-NEXT: movl $0, %r11d ; X64-FreeBSD-NEXT: callq __morestack ; X64-FreeBSD-NEXT: retq ; X64-FreeBSD-NEXT: jmp .LBB0_2 @@ -239,8 +239,8 @@ define void @test_basic() #0 { ; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 8 ; X64-DFlyBSD-NEXT: retq ; X64-DFlyBSD-NEXT: .LBB0_1: -; X64-DFlyBSD-NEXT: movabsq $40, %r10 -; X64-DFlyBSD-NEXT: movabsq $0, %r11 +; X64-DFlyBSD-NEXT: movl $40, %r10d +; X64-DFlyBSD-NEXT: movl $0, %r11d ; X64-DFlyBSD-NEXT: callq __morestack ; X64-DFlyBSD-NEXT: retq ; X64-DFlyBSD-NEXT: jmp .LBB0_2 @@ -260,8 +260,8 @@ define void @test_basic() #0 { ; X64-MinGW-NEXT: addq $72, %rsp ; X64-MinGW-NEXT: retq ; X64-MinGW-NEXT: .LBB0_1: -; X64-MinGW-NEXT: movabsq $72, %r10 -; X64-MinGW-NEXT: movabsq $32, %r11 +; X64-MinGW-NEXT: movl $72, %r10d +; X64-MinGW-NEXT: movl $32, %r11d ; X64-MinGW-NEXT: callq __morestack ; X64-MinGW-NEXT: retq ; X64-MinGW-NEXT: jmp .LBB0_2 @@ -332,8 +332,8 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { ; X64-Linux-NEXT: .LBB1_1: ; X64-Linux-NEXT: .cfi_restore %rbx ; X64-Linux-NEXT: movq %r10, %rax -; X64-Linux-NEXT: movabsq $56, %r10 -; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: movl $56, %r10d +; X64-Linux-NEXT: movl $0, %r11d ; X64-Linux-NEXT: callq __morestack ; X64-Linux-NEXT: retq ; X64-Linux-NEXT: movq %rax, %r10 @@ -364,8 +364,8 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { ; X64-Linux-Large-NEXT: .LBB1_1: ; X64-Linux-Large-NEXT: .cfi_restore %rbx ; X64-Linux-Large-NEXT: movq %r10, %rax -; X64-Linux-Large-NEXT: movabsq $56, %r10 -; X64-Linux-Large-NEXT: movabsq $0, %r11 +; X64-Linux-Large-NEXT: movl $56, %r10d +; X64-Linux-Large-NEXT: movl $0, %r11d ; X64-Linux-Large-NEXT: callq *__morestack_addr(%rip) ; X64-Linux-Large-NEXT: retq ; X64-Linux-Large-NEXT: movq %rax, %r10 @@ -451,8 +451,8 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { ; X64-Darwin-NEXT: retq ; X64-Darwin-NEXT: LBB1_1: ; X64-Darwin-NEXT: movq %r10, %rax -; X64-Darwin-NEXT: movabsq $56, %r10 -; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: movl $56, %r10d +; X64-Darwin-NEXT: movl $0, %r11d ; X64-Darwin-NEXT: callq ___morestack ; X64-Darwin-NEXT: retq ; X64-Darwin-NEXT: movq %rax, %r10 @@ -515,8 +515,8 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { ; X64-FreeBSD-NEXT: .LBB1_1: ; X64-FreeBSD-NEXT: .cfi_restore %rbx ; X64-FreeBSD-NEXT: movq %r10, %rax -; X64-FreeBSD-NEXT: movabsq $56, %r10 -; X64-FreeBSD-NEXT: movabsq $0, %r11 +; X64-FreeBSD-NEXT: movl $56, %r10d +; X64-FreeBSD-NEXT: movl $0, %r11d ; X64-FreeBSD-NEXT: callq __morestack ; X64-FreeBSD-NEXT: retq ; X64-FreeBSD-NEXT: movq %rax, %r10 @@ -580,8 +580,8 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { ; X64-DFlyBSD-NEXT: .LBB1_1: ; X64-DFlyBSD-NEXT: .cfi_restore %rbx ; X64-DFlyBSD-NEXT: movq %r10, %rax -; X64-DFlyBSD-NEXT: movabsq $56, %r10 -; X64-DFlyBSD-NEXT: movabsq $0, %r11 +; X64-DFlyBSD-NEXT: movl $56, %r10d +; X64-DFlyBSD-NEXT: movl $0, %r11d ; X64-DFlyBSD-NEXT: callq __morestack ; X64-DFlyBSD-NEXT: retq ; X64-DFlyBSD-NEXT: movq %rax, %r10 @@ -608,8 +608,8 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { ; X64-MinGW-NEXT: retq ; X64-MinGW-NEXT: .LBB1_1: ; X64-MinGW-NEXT: movq %r10, %rax -; X64-MinGW-NEXT: movabsq $88, %r10 -; X64-MinGW-NEXT: movabsq $32, %r11 +; X64-MinGW-NEXT: movl $88, %r10d +; X64-MinGW-NEXT: movl $32, %r11d ; X64-MinGW-NEXT: callq __morestack ; X64-MinGW-NEXT: retq ; X64-MinGW-NEXT: movq %rax, %r10 @@ -662,8 +662,8 @@ define void @test_large() #0 { ; X64-Linux-NEXT: .cfi_def_cfa_offset 8 ; X64-Linux-NEXT: retq ; X64-Linux-NEXT: .LBB2_1: -; X64-Linux-NEXT: movabsq $40008, %r10 # imm = 0x9C48 -; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: movl $40008, %r10d # imm = 0x9C48 +; X64-Linux-NEXT: movl $0, %r11d ; X64-Linux-NEXT: callq __morestack ; X64-Linux-NEXT: retq ; X64-Linux-NEXT: jmp .LBB2_2 @@ -684,8 +684,8 @@ define void @test_large() #0 { ; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 8 ; X64-Linux-Large-NEXT: retq ; X64-Linux-Large-NEXT: .LBB2_1: -; X64-Linux-Large-NEXT: movabsq $40008, %r10 # imm = 0x9C48 -; X64-Linux-Large-NEXT: movabsq $0, %r11 +; X64-Linux-Large-NEXT: movl $40008, %r10d # imm = 0x9C48 +; X64-Linux-Large-NEXT: movl $0, %r11d ; X64-Linux-Large-NEXT: callq *__morestack_addr(%rip) ; X64-Linux-Large-NEXT: retq ; X64-Linux-Large-NEXT: jmp .LBB2_2 @@ -747,8 +747,8 @@ define void @test_large() #0 { ; X64-Darwin-NEXT: addq $40008, %rsp ## imm = 0x9C48 ; X64-Darwin-NEXT: retq ; X64-Darwin-NEXT: LBB2_1: -; X64-Darwin-NEXT: movabsq $40008, %r10 ## imm = 0x9C48 -; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: movl $40008, %r10d ## imm = 0x9C48 +; X64-Darwin-NEXT: movl $0, %r11d ; X64-Darwin-NEXT: callq ___morestack ; X64-Darwin-NEXT: retq ; X64-Darwin-NEXT: jmp LBB2_2 @@ -793,8 +793,8 @@ define void @test_large() #0 { ; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 8 ; X64-FreeBSD-NEXT: retq ; X64-FreeBSD-NEXT: .LBB2_1: -; X64-FreeBSD-NEXT: movabsq $40008, %r10 # imm = 0x9C48 -; X64-FreeBSD-NEXT: movabsq $0, %r11 +; X64-FreeBSD-NEXT: movl $40008, %r10d # imm = 0x9C48 +; X64-FreeBSD-NEXT: movl $0, %r11d ; X64-FreeBSD-NEXT: callq __morestack ; X64-FreeBSD-NEXT: retq ; X64-FreeBSD-NEXT: jmp .LBB2_2 @@ -838,8 +838,8 @@ define void @test_large() #0 { ; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 8 ; X64-DFlyBSD-NEXT: retq ; X64-DFlyBSD-NEXT: .LBB2_1: -; X64-DFlyBSD-NEXT: movabsq $40008, %r10 # imm = 0x9C48 -; X64-DFlyBSD-NEXT: movabsq $0, %r11 +; X64-DFlyBSD-NEXT: movl $40008, %r10d # imm = 0x9C48 +; X64-DFlyBSD-NEXT: movl $0, %r11d ; X64-DFlyBSD-NEXT: callq __morestack ; X64-DFlyBSD-NEXT: retq ; X64-DFlyBSD-NEXT: jmp .LBB2_2 @@ -862,8 +862,8 @@ define void @test_large() #0 { ; X64-MinGW-NEXT: addq $40040, %rsp # imm = 0x9C68 ; X64-MinGW-NEXT: retq ; X64-MinGW-NEXT: .LBB2_1: -; X64-MinGW-NEXT: movabsq $40040, %r10 # imm = 0x9C68 -; X64-MinGW-NEXT: movabsq $32, %r11 +; X64-MinGW-NEXT: movl $40040, %r10d # imm = 0x9C68 +; X64-MinGW-NEXT: movl $32, %r11d ; X64-MinGW-NEXT: callq __morestack ; X64-MinGW-NEXT: retq ; X64-MinGW-NEXT: jmp .LBB2_2 @@ -911,8 +911,8 @@ define fastcc void @test_fastcc() #0 { ; X64-Linux-NEXT: .cfi_def_cfa_offset 8 ; X64-Linux-NEXT: retq ; X64-Linux-NEXT: .LBB3_1: -; X64-Linux-NEXT: movabsq $40, %r10 -; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: movl $40, %r10d +; X64-Linux-NEXT: movl $0, %r11d ; X64-Linux-NEXT: callq __morestack ; X64-Linux-NEXT: retq ; X64-Linux-NEXT: jmp .LBB3_2 @@ -932,8 +932,8 @@ define fastcc void @test_fastcc() #0 { ; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 8 ; X64-Linux-Large-NEXT: retq ; X64-Linux-Large-NEXT: .LBB3_1: -; X64-Linux-Large-NEXT: movabsq $40, %r10 -; X64-Linux-Large-NEXT: movabsq $0, %r11 +; X64-Linux-Large-NEXT: movl $40, %r10d +; X64-Linux-Large-NEXT: movl $0, %r11d ; X64-Linux-Large-NEXT: callq *__morestack_addr(%rip) ; X64-Linux-Large-NEXT: retq ; X64-Linux-Large-NEXT: jmp .LBB3_2 @@ -992,8 +992,8 @@ define fastcc void @test_fastcc() #0 { ; X64-Darwin-NEXT: addq $40, %rsp ; X64-Darwin-NEXT: retq ; X64-Darwin-NEXT: LBB3_1: -; X64-Darwin-NEXT: movabsq $40, %r10 -; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: movl $40, %r10d +; X64-Darwin-NEXT: movl $0, %r11d ; X64-Darwin-NEXT: callq ___morestack ; X64-Darwin-NEXT: retq ; X64-Darwin-NEXT: jmp LBB3_2 @@ -1035,8 +1035,8 @@ define fastcc void @test_fastcc() #0 { ; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 8 ; X64-FreeBSD-NEXT: retq ; X64-FreeBSD-NEXT: .LBB3_1: -; X64-FreeBSD-NEXT: movabsq $40, %r10 -; X64-FreeBSD-NEXT: movabsq $0, %r11 +; X64-FreeBSD-NEXT: movl $40, %r10d +; X64-FreeBSD-NEXT: movl $0, %r11d ; X64-FreeBSD-NEXT: callq __morestack ; X64-FreeBSD-NEXT: retq ; X64-FreeBSD-NEXT: jmp .LBB3_2 @@ -1078,8 +1078,8 @@ define fastcc void @test_fastcc() #0 { ; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 8 ; X64-DFlyBSD-NEXT: retq ; X64-DFlyBSD-NEXT: .LBB3_1: -; X64-DFlyBSD-NEXT: movabsq $40, %r10 -; X64-DFlyBSD-NEXT: movabsq $0, %r11 +; X64-DFlyBSD-NEXT: movl $40, %r10d +; X64-DFlyBSD-NEXT: movl $0, %r11d ; X64-DFlyBSD-NEXT: callq __morestack ; X64-DFlyBSD-NEXT: retq ; X64-DFlyBSD-NEXT: jmp .LBB3_2 @@ -1099,8 +1099,8 @@ define fastcc void @test_fastcc() #0 { ; X64-MinGW-NEXT: addq $72, %rsp ; X64-MinGW-NEXT: retq ; X64-MinGW-NEXT: .LBB3_1: -; X64-MinGW-NEXT: movabsq $72, %r10 -; X64-MinGW-NEXT: movabsq $32, %r11 +; X64-MinGW-NEXT: movl $72, %r10d +; X64-MinGW-NEXT: movl $32, %r11d ; X64-MinGW-NEXT: callq __morestack ; X64-MinGW-NEXT: retq ; X64-MinGW-NEXT: jmp .LBB3_2 @@ -1150,8 +1150,8 @@ define fastcc void @test_fastcc_large() #0 { ; X64-Linux-NEXT: .cfi_def_cfa_offset 8 ; X64-Linux-NEXT: retq ; X64-Linux-NEXT: .LBB4_1: -; X64-Linux-NEXT: movabsq $40008, %r10 # imm = 0x9C48 -; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: movl $40008, %r10d # imm = 0x9C48 +; X64-Linux-NEXT: movl $0, %r11d ; X64-Linux-NEXT: callq __morestack ; X64-Linux-NEXT: retq ; X64-Linux-NEXT: jmp .LBB4_2 @@ -1172,8 +1172,8 @@ define fastcc void @test_fastcc_large() #0 { ; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 8 ; X64-Linux-Large-NEXT: retq ; X64-Linux-Large-NEXT: .LBB4_1: -; X64-Linux-Large-NEXT: movabsq $40008, %r10 # imm = 0x9C48 -; X64-Linux-Large-NEXT: movabsq $0, %r11 +; X64-Linux-Large-NEXT: movl $40008, %r10d # imm = 0x9C48 +; X64-Linux-Large-NEXT: movl $0, %r11d ; X64-Linux-Large-NEXT: callq *__morestack_addr(%rip) ; X64-Linux-Large-NEXT: retq ; X64-Linux-Large-NEXT: jmp .LBB4_2 @@ -1235,8 +1235,8 @@ define fastcc void @test_fastcc_large() #0 { ; X64-Darwin-NEXT: addq $40008, %rsp ## imm = 0x9C48 ; X64-Darwin-NEXT: retq ; X64-Darwin-NEXT: LBB4_1: -; X64-Darwin-NEXT: movabsq $40008, %r10 ## imm = 0x9C48 -; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: movl $40008, %r10d ## imm = 0x9C48 +; X64-Darwin-NEXT: movl $0, %r11d ; X64-Darwin-NEXT: callq ___morestack ; X64-Darwin-NEXT: retq ; X64-Darwin-NEXT: jmp LBB4_2 @@ -1281,8 +1281,8 @@ define fastcc void @test_fastcc_large() #0 { ; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 8 ; X64-FreeBSD-NEXT: retq ; X64-FreeBSD-NEXT: .LBB4_1: -; X64-FreeBSD-NEXT: movabsq $40008, %r10 # imm = 0x9C48 -; X64-FreeBSD-NEXT: movabsq $0, %r11 +; X64-FreeBSD-NEXT: movl $40008, %r10d # imm = 0x9C48 +; X64-FreeBSD-NEXT: movl $0, %r11d ; X64-FreeBSD-NEXT: callq __morestack ; X64-FreeBSD-NEXT: retq ; X64-FreeBSD-NEXT: jmp .LBB4_2 @@ -1326,8 +1326,8 @@ define fastcc void @test_fastcc_large() #0 { ; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 8 ; X64-DFlyBSD-NEXT: retq ; X64-DFlyBSD-NEXT: .LBB4_1: -; X64-DFlyBSD-NEXT: movabsq $40008, %r10 # imm = 0x9C48 -; X64-DFlyBSD-NEXT: movabsq $0, %r11 +; X64-DFlyBSD-NEXT: movl $40008, %r10d # imm = 0x9C48 +; X64-DFlyBSD-NEXT: movl $0, %r11d ; X64-DFlyBSD-NEXT: callq __morestack ; X64-DFlyBSD-NEXT: retq ; X64-DFlyBSD-NEXT: jmp .LBB4_2 @@ -1350,8 +1350,8 @@ define fastcc void @test_fastcc_large() #0 { ; X64-MinGW-NEXT: addq $40040, %rsp # imm = 0x9C68 ; X64-MinGW-NEXT: retq ; X64-MinGW-NEXT: .LBB4_1: -; X64-MinGW-NEXT: movabsq $40040, %r10 # imm = 0x9C68 -; X64-MinGW-NEXT: movabsq $32, %r11 +; X64-MinGW-NEXT: movl $40040, %r10d # imm = 0x9C68 +; X64-MinGW-NEXT: movl $32, %r11d ; X64-MinGW-NEXT: callq __morestack ; X64-MinGW-NEXT: retq ; X64-MinGW-NEXT: jmp .LBB4_2 @@ -1403,8 +1403,8 @@ define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) #0 { ; X64-Linux-NEXT: .cfi_def_cfa_offset 8 ; X64-Linux-NEXT: retq ; X64-Linux-NEXT: .LBB5_1: -; X64-Linux-NEXT: movabsq $40008, %r10 # imm = 0x9C48 -; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: movl $40008, %r10d # imm = 0x9C48 +; X64-Linux-NEXT: movl $0, %r11d ; X64-Linux-NEXT: callq __morestack ; X64-Linux-NEXT: retq ; X64-Linux-NEXT: jmp .LBB5_2 @@ -1425,8 +1425,8 @@ define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) #0 { ; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 8 ; X64-Linux-Large-NEXT: retq ; X64-Linux-Large-NEXT: .LBB5_1: -; X64-Linux-Large-NEXT: movabsq $40008, %r10 # imm = 0x9C48 -; X64-Linux-Large-NEXT: movabsq $0, %r11 +; X64-Linux-Large-NEXT: movl $40008, %r10d # imm = 0x9C48 +; X64-Linux-Large-NEXT: movl $0, %r11d ; X64-Linux-Large-NEXT: callq *__morestack_addr(%rip) ; X64-Linux-Large-NEXT: retq ; X64-Linux-Large-NEXT: jmp .LBB5_2 @@ -1490,8 +1490,8 @@ define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) #0 { ; X64-Darwin-NEXT: addq $40008, %rsp ## imm = 0x9C48 ; X64-Darwin-NEXT: retq ; X64-Darwin-NEXT: LBB5_1: -; X64-Darwin-NEXT: movabsq $40008, %r10 ## imm = 0x9C48 -; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: movl $40008, %r10d ## imm = 0x9C48 +; X64-Darwin-NEXT: movl $0, %r11d ; X64-Darwin-NEXT: callq ___morestack ; X64-Darwin-NEXT: retq ; X64-Darwin-NEXT: jmp LBB5_2 @@ -1536,8 +1536,8 @@ define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) #0 { ; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 8 ; X64-FreeBSD-NEXT: retq ; X64-FreeBSD-NEXT: .LBB5_1: -; X64-FreeBSD-NEXT: movabsq $40008, %r10 # imm = 0x9C48 -; X64-FreeBSD-NEXT: movabsq $0, %r11 +; X64-FreeBSD-NEXT: movl $40008, %r10d # imm = 0x9C48 +; X64-FreeBSD-NEXT: movl $0, %r11d ; X64-FreeBSD-NEXT: callq __morestack ; X64-FreeBSD-NEXT: retq ; X64-FreeBSD-NEXT: jmp .LBB5_2 @@ -1581,8 +1581,8 @@ define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) #0 { ; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 8 ; X64-DFlyBSD-NEXT: retq ; X64-DFlyBSD-NEXT: .LBB5_1: -; X64-DFlyBSD-NEXT: movabsq $40008, %r10 # imm = 0x9C48 -; X64-DFlyBSD-NEXT: movabsq $0, %r11 +; X64-DFlyBSD-NEXT: movl $40008, %r10d # imm = 0x9C48 +; X64-DFlyBSD-NEXT: movl $0, %r11d ; X64-DFlyBSD-NEXT: callq __morestack ; X64-DFlyBSD-NEXT: retq ; X64-DFlyBSD-NEXT: jmp .LBB5_2 @@ -1605,8 +1605,8 @@ define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) #0 { ; X64-MinGW-NEXT: addq $40040, %rsp # imm = 0x9C68 ; X64-MinGW-NEXT: retq ; X64-MinGW-NEXT: .LBB5_1: -; X64-MinGW-NEXT: movabsq $40040, %r10 # imm = 0x9C68 -; X64-MinGW-NEXT: movabsq $32, %r11 +; X64-MinGW-NEXT: movl $40040, %r10d # imm = 0x9C68 +; X64-MinGW-NEXT: movl $32, %r11d ; X64-MinGW-NEXT: callq __morestack ; X64-MinGW-NEXT: retq ; X64-MinGW-NEXT: jmp .LBB5_2 @@ -1737,8 +1737,8 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 { ; X64-Linux-NEXT: # %bb.2: ; X64-Linux-NEXT: jmp callee@PLT # TAILCALL ; X64-Linux-NEXT: .LBB8_1: -; X64-Linux-NEXT: movabsq $0, %r10 -; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: movl $0, %r10d +; X64-Linux-NEXT: movl $0, %r11d ; X64-Linux-NEXT: callq __morestack ; X64-Linux-NEXT: retq ; X64-Linux-NEXT: jmp callee@PLT # TAILCALL @@ -1751,8 +1751,8 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 { ; X64-Linux-Large-NEXT: movabsq $callee, %rax ; X64-Linux-Large-NEXT: jmpq *%rax # TAILCALL ; X64-Linux-Large-NEXT: .LBB8_1: -; X64-Linux-Large-NEXT: movabsq $0, %r10 -; X64-Linux-Large-NEXT: movabsq $0, %r11 +; X64-Linux-Large-NEXT: movl $0, %r10d +; X64-Linux-Large-NEXT: movl $0, %r11d ; X64-Linux-Large-NEXT: callq *__morestack_addr(%rip) ; X64-Linux-Large-NEXT: retq ; X64-Linux-Large-NEXT: movabsq $callee, %rax @@ -1792,8 +1792,8 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 { ; X64-Darwin-NEXT: ## %bb.2: ; X64-Darwin-NEXT: jmp _callee ## TAILCALL ; X64-Darwin-NEXT: LBB8_1: -; X64-Darwin-NEXT: movabsq $0, %r10 -; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: movl $0, %r10d +; X64-Darwin-NEXT: movl $0, %r11d ; X64-Darwin-NEXT: callq ___morestack ; X64-Darwin-NEXT: retq ; X64-Darwin-NEXT: jmp _callee ## TAILCALL @@ -1818,8 +1818,8 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 { ; X64-FreeBSD-NEXT: # %bb.2: ; X64-FreeBSD-NEXT: jmp callee@PLT # TAILCALL ; X64-FreeBSD-NEXT: .LBB8_1: -; X64-FreeBSD-NEXT: movabsq $0, %r10 -; X64-FreeBSD-NEXT: movabsq $0, %r11 +; X64-FreeBSD-NEXT: movl $0, %r10d +; X64-FreeBSD-NEXT: movl $0, %r11d ; X64-FreeBSD-NEXT: callq __morestack ; X64-FreeBSD-NEXT: retq ; X64-FreeBSD-NEXT: jmp callee@PLT # TAILCALL @@ -1844,8 +1844,8 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 { ; X64-DFlyBSD-NEXT: # %bb.2: ; X64-DFlyBSD-NEXT: jmp callee@PLT # TAILCALL ; X64-DFlyBSD-NEXT: .LBB8_1: -; X64-DFlyBSD-NEXT: movabsq $0, %r10 -; X64-DFlyBSD-NEXT: movabsq $0, %r11 +; X64-DFlyBSD-NEXT: movl $0, %r10d +; X64-DFlyBSD-NEXT: movl $0, %r11d ; X64-DFlyBSD-NEXT: callq __morestack ; X64-DFlyBSD-NEXT: retq ; X64-DFlyBSD-NEXT: jmp callee@PLT # TAILCALL @@ -1857,8 +1857,8 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 { ; X64-MinGW-NEXT: # %bb.2: ; X64-MinGW-NEXT: jmp callee # TAILCALL ; X64-MinGW-NEXT: .LBB8_1: -; X64-MinGW-NEXT: movabsq $0, %r10 -; X64-MinGW-NEXT: movabsq $32, %r11 +; X64-MinGW-NEXT: movl $0, %r10d +; X64-MinGW-NEXT: movl $32, %r11d ; X64-MinGW-NEXT: callq __morestack ; X64-MinGW-NEXT: retq ; X64-MinGW-NEXT: jmp callee # TAILCALL @@ -1910,8 +1910,8 @@ define i32 @test_nested_unused(i32 * nest %unused) #0 { ; X64-Linux-NEXT: .cfi_def_cfa_offset 8 ; X64-Linux-NEXT: retq ; X64-Linux-NEXT: .LBB9_1: -; X64-Linux-NEXT: movabsq $40, %r10 -; X64-Linux-NEXT: movabsq $0, %r11 +; X64-Linux-NEXT: movl $40, %r10d +; X64-Linux-NEXT: movl $0, %r11d ; X64-Linux-NEXT: callq __morestack ; X64-Linux-NEXT: retq ; X64-Linux-NEXT: jmp .LBB9_2 @@ -1932,8 +1932,8 @@ define i32 @test_nested_unused(i32 * nest %unused) #0 { ; X64-Linux-Large-NEXT: .cfi_def_cfa_offset 8 ; X64-Linux-Large-NEXT: retq ; X64-Linux-Large-NEXT: .LBB9_1: -; X64-Linux-Large-NEXT: movabsq $40, %r10 -; X64-Linux-Large-NEXT: movabsq $0, %r11 +; X64-Linux-Large-NEXT: movl $40, %r10d +; X64-Linux-Large-NEXT: movl $0, %r11d ; X64-Linux-Large-NEXT: callq *__morestack_addr(%rip) ; X64-Linux-Large-NEXT: retq ; X64-Linux-Large-NEXT: jmp .LBB9_2 @@ -1995,8 +1995,8 @@ define i32 @test_nested_unused(i32 * nest %unused) #0 { ; X64-Darwin-NEXT: addq $40, %rsp ; X64-Darwin-NEXT: retq ; X64-Darwin-NEXT: LBB9_1: -; X64-Darwin-NEXT: movabsq $40, %r10 -; X64-Darwin-NEXT: movabsq $0, %r11 +; X64-Darwin-NEXT: movl $40, %r10d +; X64-Darwin-NEXT: movl $0, %r11d ; X64-Darwin-NEXT: callq ___morestack ; X64-Darwin-NEXT: retq ; X64-Darwin-NEXT: jmp LBB9_2 @@ -2042,8 +2042,8 @@ define i32 @test_nested_unused(i32 * nest %unused) #0 { ; X64-FreeBSD-NEXT: .cfi_def_cfa_offset 8 ; X64-FreeBSD-NEXT: retq ; X64-FreeBSD-NEXT: .LBB9_1: -; X64-FreeBSD-NEXT: movabsq $40, %r10 -; X64-FreeBSD-NEXT: movabsq $0, %r11 +; X64-FreeBSD-NEXT: movl $40, %r10d +; X64-FreeBSD-NEXT: movl $0, %r11d ; X64-FreeBSD-NEXT: callq __morestack ; X64-FreeBSD-NEXT: retq ; X64-FreeBSD-NEXT: jmp .LBB9_2 @@ -2089,8 +2089,8 @@ define i32 @test_nested_unused(i32 * nest %unused) #0 { ; X64-DFlyBSD-NEXT: .cfi_def_cfa_offset 8 ; X64-DFlyBSD-NEXT: retq ; X64-DFlyBSD-NEXT: .LBB9_1: -; X64-DFlyBSD-NEXT: movabsq $40, %r10 -; X64-DFlyBSD-NEXT: movabsq $0, %r11 +; X64-DFlyBSD-NEXT: movl $40, %r10d +; X64-DFlyBSD-NEXT: movl $0, %r11d ; X64-DFlyBSD-NEXT: callq __morestack ; X64-DFlyBSD-NEXT: retq ; X64-DFlyBSD-NEXT: jmp .LBB9_2 @@ -2110,8 +2110,8 @@ define i32 @test_nested_unused(i32 * nest %unused) #0 { ; X64-MinGW-NEXT: addq $72, %rsp ; X64-MinGW-NEXT: retq ; X64-MinGW-NEXT: .LBB9_1: -; X64-MinGW-NEXT: movabsq $72, %r10 -; X64-MinGW-NEXT: movabsq $32, %r11 +; X64-MinGW-NEXT: movl $72, %r10d +; X64-MinGW-NEXT: movl $32, %r11d ; X64-MinGW-NEXT: callq __morestack ; X64-MinGW-NEXT: retq ; X64-MinGW-NEXT: jmp .LBB9_2 From 92417eaf3329dc823c905ec6a608b83ac62b4f7c Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Mon, 27 Dec 2021 20:42:11 +0100 Subject: [PATCH 445/992] [CodeCompletion] Signature help for braced constructor calls Implementation is based on the "expected type" as used for designated-initializers in braced init lists. This means it can deduce the type in some cases where it's not written: void foo(Widget); foo({ /*help here*/ }); Only basic constructor calls are in scope of this patch, excluded are: - aggregate initialization (no help is offered for aggregates) - initializer_list initialization (no help is offered for these constructors) Fixes https://github.com/clangd/clangd/issues/306 Differential Revision: https://reviews.llvm.org/D116317 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 2 +- clang-tools-extra/clangd/CodeComplete.cpp | 11 ++-- .../clangd/test/initialize-params.test | 4 +- .../clangd/unittests/CodeCompleteTests.cpp | 34 +++++++++++++ .../include/clang/Sema/CodeCompleteConsumer.h | 16 +++--- clang/include/clang/Sema/Sema.h | 13 +++-- clang/lib/Frontend/ASTUnit.cpp | 5 +- clang/lib/Parse/ParseDecl.cpp | 6 ++- clang/lib/Parse/ParseDeclCXX.cpp | 4 +- clang/lib/Parse/ParseExprCXX.cpp | 5 +- clang/lib/Parse/ParseInit.cpp | 16 ++++-- clang/lib/Parse/ParseOpenMP.cpp | 2 +- clang/lib/Sema/CodeCompleteConsumer.cpp | 4 +- clang/lib/Sema/SemaCodeComplete.cpp | 50 +++++++++++++------ clang/test/CodeCompletion/ctor-signature.cpp | 37 ++++++++++++++ clang/tools/libclang/CIndexCodeCompletion.cpp | 11 ++-- .../Clang/ClangExpressionParser.cpp | 3 +- 17 files changed, 169 insertions(+), 54 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 774cdea218d0..edde19f96202 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -555,7 +555,7 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, }}, {"signatureHelpProvider", llvm::json::Object{ - {"triggerCharacters", {"(", ",", ")", "<", ">"}}, + {"triggerCharacters", {"(", ")", "{", "}", "<", ">", ","}}, }}, {"declarationProvider", true}, {"definitionProvider", true}, diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp index bdfa1df19453..53d8f0d6cdeb 100644 --- a/clang-tools-extra/clangd/CodeComplete.cpp +++ b/clang-tools-extra/clangd/CodeComplete.cpp @@ -921,7 +921,8 @@ class SignatureHelpCollector final : public CodeCompleteConsumer { void ProcessOverloadCandidates(Sema &S, unsigned CurrentArg, OverloadCandidate *Candidates, unsigned NumCandidates, - SourceLocation OpenParLoc) override { + SourceLocation OpenParLoc, + bool Braced) override { assert(!OpenParLoc.isInvalid()); SourceManager &SrcMgr = S.getSourceManager(); OpenParLoc = SrcMgr.getFileLoc(OpenParLoc); @@ -961,8 +962,9 @@ class SignatureHelpCollector final : public CodeCompleteConsumer { paramIndexForArg(Candidate, SigHelp.activeParameter); } - const auto *CCS = Candidate.CreateSignatureString( - CurrentArg, S, *Allocator, CCTUInfo, true); + const auto *CCS = + Candidate.CreateSignatureString(CurrentArg, S, *Allocator, CCTUInfo, + /*IncludeBriefComment=*/true, Braced); assert(CCS && "Expected the CodeCompletionString to be non-null"); ScoredSignatures.push_back(processOverloadCandidate( Candidate, *CCS, @@ -1163,7 +1165,8 @@ class ParamNameCollector final : public CodeCompleteConsumer { void ProcessOverloadCandidates(Sema &S, unsigned CurrentArg, OverloadCandidate *Candidates, unsigned NumCandidates, - SourceLocation OpenParLoc) override { + SourceLocation OpenParLoc, + bool Braced) override { assert(CurrentArg <= (unsigned)std::numeric_limits::max() && "too many arguments"); diff --git a/clang-tools-extra/clangd/test/initialize-params.test b/clang-tools-extra/clangd/test/initialize-params.test index 72823f3a0683..2affc8b2466d 100644 --- a/clang-tools-extra/clangd/test/initialize-params.test +++ b/clang-tools-extra/clangd/test/initialize-params.test @@ -107,10 +107,12 @@ # CHECK-NEXT: "signatureHelpProvider": { # CHECK-NEXT: "triggerCharacters": [ # CHECK-NEXT: "(", -# CHECK-NEXT: ",", # CHECK-NEXT: ")", +# CHECK-NEXT: "{", +# CHECK-NEXT: "}", # CHECK-NEXT: "<", # CHECK-NEXT: ">" +# CHECK-NEXT: "," # CHECK-NEXT: ] # CHECK-NEXT: }, # CHECK-NEXT: "textDocumentSync": { diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index d32950fd6e13..9d5c57670be1 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -1212,6 +1212,10 @@ struct ExpectedParameter { std::string Text; std::pair Offsets; }; +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const ExpectedParameter &P) { + return OS << P.Text; +} MATCHER_P(ParamsAre, P, "") { if (P.size() != arg.parameters.size()) return false; @@ -1260,6 +1264,36 @@ TEST(SignatureHelpTest, Overloads) { EXPECT_EQ(0, Results.activeParameter); } +TEST(SignatureHelpTest, Constructors) { + std::string Top = R"cpp( + struct S { + S(int); + S(const S &) = delete; + }; + )cpp"; + + auto CheckParenInit = [&](std::string Init) { + EXPECT_THAT(signatures(Top + Init).signatures, + UnorderedElementsAre(Sig("S([[int]])"))) + << Init; + }; + CheckParenInit("S s(^);"); + CheckParenInit("auto s = S(^);"); + CheckParenInit("auto s = new S(^);"); + + auto CheckBracedInit = [&](std::string Init) { + EXPECT_THAT(signatures(Top + Init).signatures, + UnorderedElementsAre(Sig("S{[[int]]}"))) + << Init; + }; + CheckBracedInit("S s{^};"); + CheckBracedInit("S s = {^};"); + CheckBracedInit("auto s = S{^};"); + // FIXME: doesn't work: no ExpectedType set in ParseCXXNewExpression. + // CheckBracedInit("auto s = new S{^};"); + CheckBracedInit("int x(S); int i = x({^});"); +} + TEST(SignatureHelpTest, OverloadInitListRegression) { auto Results = signatures(R"cpp( struct A {int x;}; diff --git a/clang/include/clang/Sema/CodeCompleteConsumer.h b/clang/include/clang/Sema/CodeCompleteConsumer.h index 7a369dfd6a43..70c34703f0a0 100644 --- a/clang/include/clang/Sema/CodeCompleteConsumer.h +++ b/clang/include/clang/Sema/CodeCompleteConsumer.h @@ -1081,11 +1081,11 @@ class CodeCompleteConsumer { /// Create a new code-completion string that describes the function /// signature of this overload candidate. - CodeCompletionString *CreateSignatureString(unsigned CurrentArg, - Sema &S, - CodeCompletionAllocator &Allocator, - CodeCompletionTUInfo &CCTUInfo, - bool IncludeBriefComments) const; + CodeCompletionString * + CreateSignatureString(unsigned CurrentArg, Sema &S, + CodeCompletionAllocator &Allocator, + CodeCompletionTUInfo &CCTUInfo, + bool IncludeBriefComments, bool Braced) const; }; CodeCompleteConsumer(const CodeCompleteOptions &CodeCompleteOpts) @@ -1159,7 +1159,8 @@ class CodeCompleteConsumer { virtual void ProcessOverloadCandidates(Sema &S, unsigned CurrentArg, OverloadCandidate *Candidates, unsigned NumCandidates, - SourceLocation OpenParLoc) {} + SourceLocation OpenParLoc, + bool Braced) {} //@} /// Retrieve the allocator that will be used to allocate @@ -1210,7 +1211,8 @@ class PrintingCodeCompleteConsumer : public CodeCompleteConsumer { void ProcessOverloadCandidates(Sema &S, unsigned CurrentArg, OverloadCandidate *Candidates, unsigned NumCandidates, - SourceLocation OpenParLoc) override; + SourceLocation OpenParLoc, + bool Braced) override; bool isResultFilteredOut(StringRef Filter, CodeCompletionResult Results) override; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index bb13d9527175..016a12e3b1fe 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12542,13 +12542,12 @@ class Sema final { QualType ProduceConstructorSignatureHelp(Scope *S, QualType Type, SourceLocation Loc, ArrayRef Args, - SourceLocation OpenParLoc); - QualType ProduceCtorInitMemberSignatureHelp(Scope *S, Decl *ConstructorDecl, - CXXScopeSpec SS, - ParsedType TemplateTypeTy, - ArrayRef ArgExprs, - IdentifierInfo *II, - SourceLocation OpenParLoc); + SourceLocation OpenParLoc, + bool Braced); + QualType ProduceCtorInitMemberSignatureHelp( + Scope *S, Decl *ConstructorDecl, CXXScopeSpec SS, + ParsedType TemplateTypeTy, ArrayRef ArgExprs, IdentifierInfo *II, + SourceLocation OpenParLoc, bool Braced); QualType ProduceTemplateArgumentSignatureHelp( TemplateTy, ArrayRef, SourceLocation LAngleLoc); void CodeCompleteInitializer(Scope *S, Decl *D); diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp index 52589677ca28..e82b7fe6db9e 100644 --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -1922,9 +1922,10 @@ namespace { void ProcessOverloadCandidates(Sema &S, unsigned CurrentArg, OverloadCandidate *Candidates, unsigned NumCandidates, - SourceLocation OpenParLoc) override { + SourceLocation OpenParLoc, + bool Braced) override { Next.ProcessOverloadCandidates(S, CurrentArg, Candidates, NumCandidates, - OpenParLoc); + OpenParLoc, Braced); } CodeCompletionAllocator &getAllocator() override { diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 0c1f88bc51d1..5900075e5a90 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -2420,7 +2420,8 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes( auto RunSignatureHelp = [&]() { QualType PreferredType = Actions.ProduceConstructorSignatureHelp( getCurScope(), ThisVarDecl->getType()->getCanonicalTypeInternal(), - ThisDecl->getLocation(), Exprs, T.getOpenLocation()); + ThisDecl->getLocation(), Exprs, T.getOpenLocation(), + /*Braced=*/false); CalledSignatureHelp = true; return PreferredType; }; @@ -2440,7 +2441,8 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes( if (ThisVarDecl && PP.isCodeCompletionReached() && !CalledSignatureHelp) { Actions.ProduceConstructorSignatureHelp( getCurScope(), ThisVarDecl->getType()->getCanonicalTypeInternal(), - ThisDecl->getLocation(), Exprs, T.getOpenLocation()); + ThisDecl->getLocation(), Exprs, T.getOpenLocation(), + /*Braced=*/false); CalledSignatureHelp = true; } Actions.ActOnInitializerError(ThisDecl); diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index f5a6ffcff9e9..942b813b3935 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -3740,8 +3740,8 @@ MemInitResult Parser::ParseMemInitializer(Decl *ConstructorDecl) { if (TemplateTypeTy.isInvalid()) return QualType(); QualType PreferredType = Actions.ProduceCtorInitMemberSignatureHelp( - getCurScope(), ConstructorDecl, SS, TemplateTypeTy.get(), ArgExprs, II, - T.getOpenLocation()); + getCurScope(), ConstructorDecl, SS, TemplateTypeTy.get(), ArgExprs, + II, T.getOpenLocation(), /*Braced=*/false); CalledSignatureHelp = true; return PreferredType; }; diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index 9cdc16f8ce8d..0ba0fd529002 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -1878,7 +1878,7 @@ Parser::ParseCXXTypeConstructExpression(const DeclSpec &DS) { if (TypeRep) PreferredType = Actions.ProduceConstructorSignatureHelp( getCurScope(), TypeRep.get()->getCanonicalTypeInternal(), - DS.getEndLoc(), Exprs, T.getOpenLocation()); + DS.getEndLoc(), Exprs, T.getOpenLocation(), /*Braced=*/false); CalledSignatureHelp = true; return PreferredType; }; @@ -3168,7 +3168,8 @@ Parser::ParseCXXNewExpression(bool UseGlobal, SourceLocation Start) { if (TypeRep) PreferredType = Actions.ProduceConstructorSignatureHelp( getCurScope(), TypeRep.get()->getCanonicalTypeInternal(), - DeclaratorInfo.getEndLoc(), ConstructorArgs, ConstructorLParen); + DeclaratorInfo.getEndLoc(), ConstructorArgs, ConstructorLParen, + /*Braced=*/false); CalledSignatureHelp = true; return PreferredType; }; diff --git a/clang/lib/Parse/ParseInit.cpp b/clang/lib/Parse/ParseInit.cpp index 9d9c03d28a97..efb162af642d 100644 --- a/clang/lib/Parse/ParseInit.cpp +++ b/clang/lib/Parse/ParseInit.cpp @@ -459,12 +459,22 @@ ExprResult Parser::ParseBraceInitializer() { Actions, EnterExpressionEvaluationContext::InitList); bool InitExprsOk = true; - DesignatorCompletionInfo DesignatorCompletion{ - InitExprs, - PreferredType.get(T.getOpenLocation()), + QualType LikelyType = PreferredType.get(T.getOpenLocation()); + DesignatorCompletionInfo DesignatorCompletion{InitExprs, LikelyType}; + bool CalledSignatureHelp = false; + auto RunSignatureHelp = [&] { + QualType PreferredType; + if (!LikelyType.isNull()) + PreferredType = Actions.ProduceConstructorSignatureHelp( + getCurScope(), LikelyType->getCanonicalTypeInternal(), + T.getOpenLocation(), InitExprs, T.getOpenLocation(), /*Braced=*/true); + CalledSignatureHelp = true; + return PreferredType; }; while (1) { + PreferredType.enterFunctionArgument(Tok.getLocation(), RunSignatureHelp); + // Handle Microsoft __if_exists/if_not_exists if necessary. if (getLangOpts().MicrosoftExt && (Tok.is(tok::kw___if_exists) || Tok.is(tok::kw___if_not_exists))) { diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 7c783ef0b02b..2500cf834a34 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -471,7 +471,7 @@ void Parser::ParseOpenMPReductionInitializerForDecl(VarDecl *OmpPrivParm) { auto RunSignatureHelp = [this, OmpPrivParm, LParLoc, &Exprs]() { QualType PreferredType = Actions.ProduceConstructorSignatureHelp( getCurScope(), OmpPrivParm->getType()->getCanonicalTypeInternal(), - OmpPrivParm->getLocation(), Exprs, LParLoc); + OmpPrivParm->getLocation(), Exprs, LParLoc, /*Braced=*/false); CalledSignatureHelp = true; return PreferredType; }; diff --git a/clang/lib/Sema/CodeCompleteConsumer.cpp b/clang/lib/Sema/CodeCompleteConsumer.cpp index f0968ed0e503..bb088fd5fe97 100644 --- a/clang/lib/Sema/CodeCompleteConsumer.cpp +++ b/clang/lib/Sema/CodeCompleteConsumer.cpp @@ -656,7 +656,7 @@ static std::string getOverloadAsString(const CodeCompletionString &CCS) { void PrintingCodeCompleteConsumer::ProcessOverloadCandidates( Sema &SemaRef, unsigned CurrentArg, OverloadCandidate *Candidates, - unsigned NumCandidates, SourceLocation OpenParLoc) { + unsigned NumCandidates, SourceLocation OpenParLoc, bool Braced) { OS << "OPENING_PAREN_LOC: "; OpenParLoc.print(OS, SemaRef.getSourceManager()); OS << "\n"; @@ -664,7 +664,7 @@ void PrintingCodeCompleteConsumer::ProcessOverloadCandidates( for (unsigned I = 0; I != NumCandidates; ++I) { if (CodeCompletionString *CCS = Candidates[I].CreateSignatureString( CurrentArg, SemaRef, getAllocator(), CCTUInfo, - includeBriefComments())) { + includeBriefComments(), Braced)) { OS << "OVERLOAD: " << getOverloadAsString(*CCS) << "\n"; } } diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index e81faf6d2a93..d3c154f18937 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -3833,7 +3833,8 @@ static CodeCompletionString *createTemplateSignatureString( CodeCompletionString * CodeCompleteConsumer::OverloadCandidate::CreateSignatureString( unsigned CurrentArg, Sema &S, CodeCompletionAllocator &Allocator, - CodeCompletionTUInfo &CCTUInfo, bool IncludeBriefComments) const { + CodeCompletionTUInfo &CCTUInfo, bool IncludeBriefComments, + bool Braced) const { PrintingPolicy Policy = getCompletionPrintingPolicy(S); // Show signatures of constructors as they are declared: // vector(int n) rather than vector(int n) @@ -3857,9 +3858,11 @@ CodeCompleteConsumer::OverloadCandidate::CreateSignatureString( const FunctionType *FT = getFunctionType(); Result.AddResultTypeChunk(Result.getAllocator().CopyString( FT->getReturnType().getAsString(Policy))); - Result.AddChunk(CodeCompletionString::CK_LeftParen); + Result.AddChunk(Braced ? CodeCompletionString::CK_LeftBrace + : CodeCompletionString::CK_LeftParen); Result.AddChunk(CodeCompletionString::CK_CurrentParameter, "..."); - Result.AddChunk(CodeCompletionString::CK_RightParen); + Result.AddChunk(Braced ? CodeCompletionString::CK_RightBrace + : CodeCompletionString::CK_RightParen); return Result.TakeString(); } @@ -3879,10 +3882,12 @@ CodeCompleteConsumer::OverloadCandidate::CreateSignatureString( Proto->getReturnType().getAsString(Policy))); } - Result.AddChunk(CodeCompletionString::CK_LeftParen); + Result.AddChunk(Braced ? CodeCompletionString::CK_LeftBrace + : CodeCompletionString::CK_LeftParen); AddOverloadParameterChunks(S.getASTContext(), Policy, FDecl, Proto, Result, CurrentArg); - Result.AddChunk(CodeCompletionString::CK_RightParen); + Result.AddChunk(Braced ? CodeCompletionString::CK_RightBrace + : CodeCompletionString::CK_RightParen); return Result.TakeString(); } @@ -5940,12 +5945,14 @@ static QualType getParamType(Sema &SemaRef, static QualType ProduceSignatureHelp(Sema &SemaRef, MutableArrayRef Candidates, - unsigned CurrentArg, SourceLocation OpenParLoc) { + unsigned CurrentArg, SourceLocation OpenParLoc, + bool Braced) { if (Candidates.empty()) return QualType(); if (SemaRef.getPreprocessor().isCodeCompletionReached()) SemaRef.CodeCompleter->ProcessOverloadCandidates( - SemaRef, CurrentArg, Candidates.data(), Candidates.size(), OpenParLoc); + SemaRef, CurrentArg, Candidates.data(), Candidates.size(), OpenParLoc, + Braced); return getParamType(SemaRef, Candidates, CurrentArg); } @@ -6047,15 +6054,16 @@ QualType Sema::ProduceCallSignatureHelp(Scope *S, Expr *Fn, } } mergeCandidatesWithResults(*this, Results, CandidateSet, Loc, Args.size()); - QualType ParamType = - ProduceSignatureHelp(*this, Results, Args.size(), OpenParLoc); + QualType ParamType = ProduceSignatureHelp(*this, Results, Args.size(), + OpenParLoc, /*Braced=*/false); return !CandidateSet.empty() ? ParamType : QualType(); } QualType Sema::ProduceConstructorSignatureHelp(Scope *S, QualType Type, SourceLocation Loc, ArrayRef Args, - SourceLocation OpenParLoc) { + SourceLocation OpenParLoc, + bool Braced) { if (!CodeCompleter) return QualType(); @@ -6064,6 +6072,10 @@ QualType Sema::ProduceConstructorSignatureHelp(Scope *S, QualType Type, isCompleteType(Loc, Type) ? Type->getAsCXXRecordDecl() : nullptr; if (!RD) return Type; + // FIXME: we don't support signature help for aggregate initialization, so + // don't offer a confusing partial list (e.g. the copy constructor). + if (Braced && RD->isAggregate()) + return Type; // FIXME: Provide support for member initializers. // FIXME: Provide support for variadic template constructors. @@ -6072,12 +6084,20 @@ QualType Sema::ProduceConstructorSignatureHelp(Scope *S, QualType Type, for (NamedDecl *C : LookupConstructors(RD)) { if (auto *FD = dyn_cast(C)) { + // FIXME: we can't yet provide correct signature help for initializer + // list constructors, so skip them entirely. + if (Braced && LangOpts.CPlusPlus && isInitListConstructor(FD)) + continue; AddOverloadCandidate(FD, DeclAccessPair::make(FD, C->getAccess()), Args, CandidateSet, /*SuppressUserConversions=*/false, /*PartialOverloading=*/true, /*AllowExplicit*/ true); } else if (auto *FTD = dyn_cast(C)) { + if (Braced && LangOpts.CPlusPlus && + isInitListConstructor(FTD->getTemplatedDecl())) + continue; + AddTemplateOverloadCandidate( FTD, DeclAccessPair::make(FTD, C->getAccess()), /*ExplicitTemplateArgs=*/nullptr, Args, CandidateSet, @@ -6088,12 +6108,13 @@ QualType Sema::ProduceConstructorSignatureHelp(Scope *S, QualType Type, SmallVector Results; mergeCandidatesWithResults(*this, Results, CandidateSet, Loc, Args.size()); - return ProduceSignatureHelp(*this, Results, Args.size(), OpenParLoc); + return ProduceSignatureHelp(*this, Results, Args.size(), OpenParLoc, Braced); } QualType Sema::ProduceCtorInitMemberSignatureHelp( Scope *S, Decl *ConstructorDecl, CXXScopeSpec SS, ParsedType TemplateTypeTy, - ArrayRef ArgExprs, IdentifierInfo *II, SourceLocation OpenParLoc) { + ArrayRef ArgExprs, IdentifierInfo *II, SourceLocation OpenParLoc, + bool Braced) { if (!CodeCompleter) return QualType(); @@ -6106,7 +6127,7 @@ QualType Sema::ProduceCtorInitMemberSignatureHelp( Constructor->getParent(), SS, TemplateTypeTy, II)) return ProduceConstructorSignatureHelp(getCurScope(), MemberDecl->getType(), MemberDecl->getLocation(), ArgExprs, - OpenParLoc); + OpenParLoc, Braced); return QualType(); } @@ -6159,7 +6180,8 @@ QualType Sema::ProduceTemplateArgumentSignatureHelp( if (const auto *TD = llvm::dyn_cast(ND)) Consider(TD); } - return ProduceSignatureHelp(*this, Results, Args.size(), LAngleLoc); + return ProduceSignatureHelp(*this, Results, Args.size(), LAngleLoc, + /*Braced=*/false); } static QualType getDesignatedType(QualType BaseType, const Designation &Desig) { diff --git a/clang/test/CodeCompletion/ctor-signature.cpp b/clang/test/CodeCompletion/ctor-signature.cpp index 4dbd92300566..b02c8811bbcf 100644 --- a/clang/test/CodeCompletion/ctor-signature.cpp +++ b/clang/test/CodeCompletion/ctor-signature.cpp @@ -15,3 +15,40 @@ void foo() { // CHECK-CC2: OVERLOAD: Foo(<#const Foo &#>) // CHECK-CC2: OVERLOAD: Foo(<#Foo &&#> } + +namespace std { +template struct initializer_list {}; +} // namespace std + +struct Bar { + // CHECK-BRACED: OVERLOAD: Bar{<#int#>} + Bar(int); + // CHECK-BRACED: OVERLOAD: Bar{<#double#>, double} + Bar(double, double); + // FIXME: no support for init-list constructors yet. + // CHECK-BRACED-NOT: OVERLOAD: {{.*}}char + Bar(std::initializer_list C); + // CHECK-BRACED: OVERLOAD: Bar{<#const Bar &#>} + // CHECK-BRACED: OVERLOAD: Bar{<#T *Pointer#>} + template Bar(T *Pointer); +}; + +auto b1 = Bar{}; +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:36:15 %s | FileCheck -check-prefix=CHECK-BRACED %s +Bar b2{}; +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:38:8 %s | FileCheck -check-prefix=CHECK-BRACED %s +static int consumeBar(Bar) { return 0; } +int b3 = consumeBar({}); +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:41:22 %s | FileCheck -check-prefix=CHECK-BRACED %s + +struct Aggregate { + // FIXME: no support for aggregates yet. + // CHECK-AGGREGATE-NOT: OVERLOAD: Aggregate{<#const Aggregate &#>} + // CHECK-AGGREGATE-NOT: OVERLOAD: {{.*}}first + int first; + int second; +}; + +Aggregate a{}; +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:52:13 %s | FileCheck -check-prefix=CHECK-AGGREGATE %s + diff --git a/clang/tools/libclang/CIndexCodeCompletion.cpp b/clang/tools/libclang/CIndexCodeCompletion.cpp index 044cac93563b..0d75970f2f65 100644 --- a/clang/tools/libclang/CIndexCodeCompletion.cpp +++ b/clang/tools/libclang/CIndexCodeCompletion.cpp @@ -656,14 +656,15 @@ namespace { void ProcessOverloadCandidates(Sema &S, unsigned CurrentArg, OverloadCandidate *Candidates, unsigned NumCandidates, - SourceLocation OpenParLoc) override { + SourceLocation OpenParLoc, + bool Braced) override { StoredResults.reserve(StoredResults.size() + NumCandidates); for (unsigned I = 0; I != NumCandidates; ++I) { - CodeCompletionString *StoredCompletion - = Candidates[I].CreateSignatureString(CurrentArg, S, getAllocator(), + CodeCompletionString *StoredCompletion = + Candidates[I].CreateSignatureString(CurrentArg, S, getAllocator(), getCodeCompletionTUInfo(), - includeBriefComments()); - + includeBriefComments(), Braced); + CXCompletionResult R; R.CursorKind = CXCursor_OverloadCandidate; R.CompletionString = StoredCompletion; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp index a0cff3cc9bf8..51f34369c383 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp @@ -995,7 +995,8 @@ class CodeComplete : public CodeCompleteConsumer { void ProcessOverloadCandidates(Sema &S, unsigned CurrentArg, OverloadCandidate *Candidates, unsigned NumCandidates, - SourceLocation OpenParLoc) override { + SourceLocation OpenParLoc, + bool Braced) override { // At the moment we don't filter out any overloaded candidates. } From a61f34ea2502d900c57a332174d4c103b6963c80 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Mon, 3 Jan 2022 20:17:20 +0100 Subject: [PATCH 446/992] [clangd] Fix windows build after 478863ef58c7f7314e06 http://45.33.8.238/win/51774/step_4.txt MS extension causes the wrong class to be friended. --- clang-tools-extra/clangd/Headers.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/Headers.h b/clang-tools-extra/clangd/Headers.h index 9612ce8def46..3b510325da04 100644 --- a/clang-tools-extra/clangd/Headers.h +++ b/clang-tools-extra/clangd/Headers.h @@ -38,6 +38,7 @@ namespace clangd { // clangd has a built-in database of standard library symbols. namespace stdlib { +class Symbol; // A standard library header, such as // Lightweight class, in fact just an index into a table. @@ -53,7 +54,7 @@ class Header { private: Header(unsigned ID) : ID(ID) {} unsigned ID; - friend class Symbol; + friend Symbol; friend llvm::DenseMapInfo
; friend bool operator==(const Header &L, const Header &R) { return L.ID == R.ID; From db77f7a074d70f0092ba0870ad0ce42417095348 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Mon, 3 Jan 2022 14:18:45 -0500 Subject: [PATCH 447/992] Silence a "not all control paths return a value" warning; NFC --- clang/lib/Sema/SemaCodeComplete.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index d3c154f18937..f9f20af1497a 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -6150,6 +6150,7 @@ static bool argMatchesTemplateParams(const ParsedTemplateArgument &Arg, case ParsedTemplateArgument::Template: return llvm::isa(Param); // signature not checked } + llvm_unreachable("Unhandled switch case"); } QualType Sema::ProduceTemplateArgumentSignatureHelp( From e5947760c2e568c2090841a452aa8469cc66f6c8 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Mon, 3 Jan 2022 11:28:47 -0800 Subject: [PATCH 448/992] Revert "[llvm] Remove redundant member initialization (NFC)" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit fd4808887ee47f3ec8a030e9211169ef4fb094c3. This patch causes gcc to issue a lot of warnings like: warning: base class ‘class llvm::MCParsedAsmOperand’ should be explicitly initialized in the copy constructor [-Wextra] --- llvm/include/llvm/ADT/Triple.h | 4 +++- llvm/include/llvm/Analysis/BasicAliasAnalysis.h | 2 +- llvm/include/llvm/Analysis/DDG.h | 2 +- llvm/include/llvm/Analysis/LazyCallGraph.h | 2 +- llvm/include/llvm/Analysis/MemoryLocation.h | 3 ++- llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h | 2 +- .../llvm/Analysis/ScalarEvolutionAliasAnalysis.h | 2 +- llvm/include/llvm/CodeGen/CodeGenPassBuilder.h | 2 +- .../include/llvm/CodeGen/GlobalISel/CallLowering.h | 2 +- .../llvm/CodeGen/GlobalISel/LegalizerInfo.h | 2 +- llvm/include/llvm/CodeGen/MachinePassManager.h | 6 +++--- .../llvm/CodeGen/SelectionDAGAddressAnalysis.h | 2 +- llvm/include/llvm/DWARFLinker/DWARFLinker.h | 4 ++-- llvm/include/llvm/DebugInfo/GSYM/StringTable.h | 2 +- llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h | 4 ++-- llvm/include/llvm/FileCheck/FileCheck.h | 3 ++- llvm/include/llvm/IR/LegacyPassManagers.h | 3 ++- llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h | 4 +++- llvm/include/llvm/MCA/HardwareUnits/LSUnit.h | 2 +- .../llvm/MCA/HardwareUnits/ResourceManager.h | 4 ++-- llvm/include/llvm/MCA/Stages/EntryStage.h | 2 +- llvm/include/llvm/MCA/Stages/ExecuteStage.h | 2 +- llvm/include/llvm/MCA/Stages/InOrderIssueStage.h | 2 +- llvm/include/llvm/MCA/Stages/InstructionTables.h | 2 +- llvm/include/llvm/MCA/Stages/RetireStage.h | 2 +- .../llvm/ProfileData/Coverage/CoverageMapping.h | 2 +- llvm/include/llvm/Remarks/RemarkSerializer.h | 2 +- llvm/include/llvm/Support/ScopedPrinter.h | 4 ++-- llvm/include/llvm/Transforms/IPO/Attributor.h | 2 +- .../llvm/Transforms/Scalar/LoopPassManager.h | 3 ++- llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp | 2 +- llvm/lib/Analysis/CallGraphSCCPass.cpp | 2 +- llvm/lib/Analysis/DDG.cpp | 2 +- llvm/lib/Analysis/GlobalsModRef.cpp | 4 ++-- llvm/lib/Analysis/IVUsers.cpp | 2 +- llvm/lib/Analysis/LoopCacheAnalysis.cpp | 5 +++-- llvm/lib/Analysis/LoopPass.cpp | 3 ++- llvm/lib/Analysis/RegionPass.cpp | 3 ++- .../lib/CodeGen/GlobalISel/InstructionSelector.cpp | 2 +- llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp | 10 +++++----- llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 3 ++- .../CodeGen/MachineOptimizationRemarkEmitter.cpp | 3 ++- llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp | 3 ++- llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 9 ++++++--- llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp | 2 +- .../ExecutionEngine/GDBRegistrationListener.cpp | 2 +- llvm/lib/IR/LegacyPassManager.cpp | 14 ++++++++------ llvm/lib/IR/Module.cpp | 3 ++- llvm/lib/InterfaceStub/IFSStub.cpp | 4 ++-- llvm/lib/MC/MCParser/AsmParser.cpp | 2 +- llvm/lib/MC/MCParser/MasmParser.cpp | 2 +- llvm/lib/MCA/Stages/DispatchStage.cpp | 2 +- llvm/lib/MCA/Stages/InOrderIssueStage.cpp | 2 +- llvm/lib/Remarks/BitstreamRemarkSerializer.cpp | 2 +- llvm/lib/Remarks/RemarkStreamer.cpp | 2 +- llvm/lib/Remarks/RemarkStringTable.cpp | 2 +- llvm/lib/Remarks/YAMLRemarkParser.cpp | 2 +- llvm/lib/Support/YAMLParser.cpp | 4 ++-- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 1 + .../lib/Target/AArch64/AArch64TargetObjectFile.cpp | 3 ++- .../Target/AArch64/AsmParser/AArch64AsmParser.cpp | 2 +- .../AArch64/GISel/AArch64InstructionSelector.cpp | 4 ++-- .../AArch64/GISel/AArch64RegisterBankInfo.cpp | 4 ++-- llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h | 2 +- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 3 ++- llvm/lib/Target/AMDGPU/AMDGPULibFunc.h | 2 +- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 4 +++- .../Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 2 +- .../Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp | 3 ++- llvm/lib/Target/ARM/ARMHazardRecognizer.cpp | 2 +- llvm/lib/Target/ARM/ARMHazardRecognizer.h | 2 +- llvm/lib/Target/ARM/ARMInstrInfo.cpp | 3 ++- llvm/lib/Target/ARM/ARMInstructionSelector.cpp | 4 ++-- llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp | 3 ++- llvm/lib/Target/ARM/ARMRegisterInfo.cpp | 2 +- llvm/lib/Target/ARM/ARMTargetObjectFile.h | 3 ++- llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 2 +- llvm/lib/Target/ARM/Thumb1InstrInfo.cpp | 2 +- llvm/lib/Target/ARM/ThumbRegisterInfo.cpp | 2 +- llvm/lib/Target/AVR/AVRSubtarget.cpp | 2 ++ llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp | 8 ++++---- llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp | 4 ++-- llvm/lib/Target/BPF/BPFSubtarget.cpp | 2 +- .../Target/Hexagon/AsmParser/HexagonAsmParser.cpp | 6 ++++-- llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 2 +- .../Hexagon/MCTargetDesc/HexagonMCChecker.cpp | 5 +++-- llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp | 2 +- llvm/lib/Target/Lanai/LanaiSubtarget.cpp | 2 +- .../Target/MSP430/AsmParser/MSP430AsmParser.cpp | 11 +++++------ llvm/lib/Target/MSP430/MSP430Subtarget.cpp | 2 +- llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 3 ++- llvm/lib/Target/Mips/Mips16RegisterInfo.cpp | 2 +- llvm/lib/Target/Mips/MipsInstructionSelector.cpp | 4 ++-- llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp | 3 ++- llvm/lib/Target/Mips/MipsSERegisterInfo.cpp | 2 +- llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp | 2 +- llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp | 4 ++-- llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h | 2 +- llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 5 ++--- .../PowerPC/GISel/PPCInstructionSelector.cpp | 3 ++- .../Target/PowerPC/GISel/PPCRegisterBankInfo.cpp | 3 ++- llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp | 4 ++-- llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp | 3 ++- llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp | 3 ++- llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp | 2 +- llvm/lib/Target/Sparc/SparcTargetObjectFile.h | 4 +++- llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 3 ++- llvm/lib/Target/SystemZ/SystemZSubtarget.cpp | 2 +- llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp | 2 +- llvm/lib/Target/VE/VEMachineFunctionInfo.h | 5 +++-- .../Target/WebAssembly/WebAssemblySubtarget.cpp | 5 +++-- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 6 +++--- llvm/lib/Target/X86/X86InstructionSelector.cpp | 4 ++-- llvm/lib/Target/X86/X86RegisterBankInfo.cpp | 3 ++- llvm/lib/Target/XCore/XCoreSubtarget.cpp | 4 ++-- llvm/lib/Transforms/IPO/Inliner.cpp | 3 ++- llvm/lib/Transforms/IPO/PartialInlining.cpp | 3 ++- .../Transforms/Instrumentation/InstrProfiling.cpp | 3 ++- .../Transforms/Scalar/LowerMatrixIntrinsics.cpp | 6 ++++-- llvm/lib/Transforms/Vectorize/VPlan.h | 4 ++-- llvm/tools/dsymutil/BinaryHolder.h | 2 +- llvm/tools/dsymutil/Reproducer.cpp | 2 +- llvm/tools/llvm-cov/CoverageSummaryInfo.h | 7 +++++-- llvm/tools/llvm-mca/CodeRegion.h | 2 +- llvm/tools/llvm-mca/PipelinePrinter.h | 2 +- llvm/tools/llvm-objcopy/ELF/Object.h | 6 ++++-- llvm/tools/llvm-objdump/SourcePrinter.h | 2 +- llvm/tools/llvm-profdata/llvm-profdata.cpp | 7 ++++--- llvm/tools/llvm-readobj/llvm-readobj.cpp | 4 ++-- llvm/utils/TableGen/GlobalISel/GIMatchDag.h | 4 +++- llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp | 1 + llvm/utils/TableGen/GlobalISelEmitter.cpp | 4 +++- llvm/utils/TableGen/PredicateExpander.h | 2 +- llvm/utils/TableGen/RegisterBankEmitter.cpp | 2 +- 134 files changed, 244 insertions(+), 184 deletions(-) diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h index 0f0a7b08b5d3..89961f67222d 100644 --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -272,7 +272,9 @@ class Triple { /// Default constructor is the same as an empty string and leaves all /// triple fields unknown. - Triple() : Arch(), SubArch(), Vendor(), OS(), Environment(), ObjectFormat() {} + Triple() + : Data(), Arch(), SubArch(), Vendor(), OS(), Environment(), + ObjectFormat() {} explicit Triple(const Twine &Str); Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr); diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h index 361765d85257..ed9d1ba4c5a7 100644 --- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h @@ -58,7 +58,7 @@ class BasicAAResult : public AAResultBase { BasicAAResult(const DataLayout &DL, const Function &F, const TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree *DT = nullptr, PhiValues *PV = nullptr) - : DL(DL), F(F), TLI(TLI), AC(AC), DT(DT), PV(PV) {} + : AAResultBase(), DL(DL), F(F), TLI(TLI), AC(AC), DT(DT), PV(PV) {} BasicAAResult(const BasicAAResult &Arg) : AAResultBase(Arg), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), diff --git a/llvm/include/llvm/Analysis/DDG.h b/llvm/include/llvm/Analysis/DDG.h index 4ea589ec7efc..51dd4a738f00 100644 --- a/llvm/include/llvm/Analysis/DDG.h +++ b/llvm/include/llvm/Analysis/DDG.h @@ -52,7 +52,7 @@ class DDGNode : public DDGNodeBase { }; DDGNode() = delete; - DDGNode(const NodeKind K) : Kind(K) {} + DDGNode(const NodeKind K) : DDGNodeBase(), Kind(K) {} DDGNode(const DDGNode &N) : DDGNodeBase(N), Kind(N.Kind) {} DDGNode(DDGNode &&N) : DDGNodeBase(std::move(N)), Kind(N.Kind) {} virtual ~DDGNode() = 0; diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h index 5828274cc02b..0580f4d7b226 100644 --- a/llvm/include/llvm/Analysis/LazyCallGraph.h +++ b/llvm/include/llvm/Analysis/LazyCallGraph.h @@ -1190,7 +1190,7 @@ class LazyCallGraph { } }; -inline LazyCallGraph::Edge::Edge() {} +inline LazyCallGraph::Edge::Edge() : Value() {} inline LazyCallGraph::Edge::Edge(Node &N, Kind K) : Value(&N, K) {} inline LazyCallGraph::Edge::operator bool() const { diff --git a/llvm/include/llvm/Analysis/MemoryLocation.h b/llvm/include/llvm/Analysis/MemoryLocation.h index 23e50f601e04..833fce1b1726 100644 --- a/llvm/include/llvm/Analysis/MemoryLocation.h +++ b/llvm/include/llvm/Analysis/MemoryLocation.h @@ -284,7 +284,8 @@ class MemoryLocation { return T.isScalable() ? UnknownSize : T.getFixedSize(); } - MemoryLocation() : Ptr(nullptr), Size(LocationSize::beforeOrAfterPointer()) {} + MemoryLocation() + : Ptr(nullptr), Size(LocationSize::beforeOrAfterPointer()), AATags() {} explicit MemoryLocation(const Value *Ptr, LocationSize Size, const AAMDNodes &AATags = AAMDNodes()) diff --git a/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h b/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h index d19a6394bd48..b4f4e5f29768 100644 --- a/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h @@ -40,7 +40,7 @@ class ObjCARCAAResult : public AAResultBase { const DataLayout &DL; public: - explicit ObjCARCAAResult(const DataLayout &DL) : DL(DL) {} + explicit ObjCARCAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {} ObjCARCAAResult(ObjCARCAAResult &&Arg) : AAResultBase(std::move(Arg)), DL(Arg.DL) {} diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h b/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h index ebd427354cee..20acb407ead0 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h @@ -27,7 +27,7 @@ class SCEVAAResult : public AAResultBase { ScalarEvolution &SE; public: - explicit SCEVAAResult(ScalarEvolution &SE) : SE(SE) {} + explicit SCEVAAResult(ScalarEvolution &SE) : AAResultBase(), SE(SE) {} SCEVAAResult(SCEVAAResult &&Arg) : AAResultBase(std::move(Arg)), SE(Arg.SE) {} AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h index f6563971f981..1fd07ca2c8d4 100644 --- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h +++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h @@ -159,7 +159,7 @@ template class CodeGenPassBuilder { class AddIRPass { public: AddIRPass(ModulePassManager &MPM, bool DebugPM, bool Check = true) - : MPM(MPM) { + : MPM(MPM), FPM() { if (Check) AddingFunctionPasses = false; } diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index 82c125993ec3..9c878d4b087b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -95,7 +95,7 @@ class CallLowering { bool IsFixed = true) : ArgInfo(Regs, OrigValue.getType(), OrigIndex, Flags, IsFixed, &OrigValue) {} - ArgInfo() {} + ArgInfo() : BaseArgInfo() {} }; struct CallLoweringInfo { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 9507c3411b5c..a02b15639946 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -556,7 +556,7 @@ class LegalizeRuleSet { } public: - LegalizeRuleSet() : AliasOf(0), IsAliasedByAnother(false) {} + LegalizeRuleSet() : AliasOf(0), IsAliasedByAnother(false), Rules() {} bool isAliasedByAnother() { return IsAliasedByAnother; } void setIsAliasedByAnother() { IsAliasedByAnother = true; } diff --git a/llvm/include/llvm/CodeGen/MachinePassManager.h b/llvm/include/llvm/CodeGen/MachinePassManager.h index 75b8a89c812e..f967167c65e1 100644 --- a/llvm/include/llvm/CodeGen/MachinePassManager.h +++ b/llvm/include/llvm/CodeGen/MachinePassManager.h @@ -40,10 +40,10 @@ class MachineFunctionAnalysisManager : public AnalysisManager { public: using Base = AnalysisManager; - MachineFunctionAnalysisManager() : FAM(nullptr), MAM(nullptr) {} + MachineFunctionAnalysisManager() : Base(), FAM(nullptr), MAM(nullptr) {} MachineFunctionAnalysisManager(FunctionAnalysisManager &FAM, ModuleAnalysisManager &MAM) - : FAM(&FAM), MAM(&MAM) {} + : Base(), FAM(&FAM), MAM(&MAM) {} MachineFunctionAnalysisManager(MachineFunctionAnalysisManager &&) = default; MachineFunctionAnalysisManager & operator=(MachineFunctionAnalysisManager &&) = default; @@ -135,7 +135,7 @@ class MachineFunctionPassManager MachineFunctionPassManager(bool DebugLogging = false, bool RequireCodeGenSCCOrder = false, bool VerifyMachineFunction = false) - : RequireCodeGenSCCOrder(RequireCodeGenSCCOrder), + : Base(), RequireCodeGenSCCOrder(RequireCodeGenSCCOrder), VerifyMachineFunction(VerifyMachineFunction) {} MachineFunctionPassManager(MachineFunctionPassManager &&) = default; MachineFunctionPassManager & diff --git a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h index 0f3af915da64..6a3d76be0ed6 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h @@ -39,7 +39,7 @@ class BaseIndexOffset { public: BaseIndexOffset() = default; BaseIndexOffset(SDValue Base, SDValue Index, bool IsIndexSignExt) - : Base(Base), Index(Index), IsIndexSignExt(IsIndexSignExt) {} + : Base(Base), Index(Index), Offset(), IsIndexSignExt(IsIndexSignExt) {} BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, bool IsIndexSignExt) : Base(Base), Index(Index), Offset(Offset), diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/DWARFLinker.h index 4f1c666df35f..1c6d0b1ead86 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinker.h +++ b/llvm/include/llvm/DWARFLinker/DWARFLinker.h @@ -385,8 +385,8 @@ class DWARFLinker { : Die(Die), Type(T), CU(CU), Flags(0), OtherInfo(OtherInfo) {} WorklistItem(unsigned AncestorIdx, CompileUnit &CU, unsigned Flags) - : Type(WorklistItemType::LookForParentDIEsToKeep), CU(CU), Flags(Flags), - AncestorIdx(AncestorIdx) {} + : Die(), Type(WorklistItemType::LookForParentDIEsToKeep), CU(CU), + Flags(Flags), AncestorIdx(AncestorIdx) {} }; /// returns true if we need to translate strings. diff --git a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h index 6dd90499c203..045c9e3f3ebd 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h +++ b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h @@ -20,7 +20,7 @@ namespace gsym { /// string at offset zero. Strings must be UTF8 NULL terminated strings. struct StringTable { StringRef Data; - StringTable() {} + StringTable() : Data() {} StringTable(StringRef D) : Data(D) {} StringRef operator[](size_t Offset) const { return getString(Offset); } StringRef getString(uint32_t Offset) const { diff --git a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h index 779dc885372d..4bb11bf62593 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h @@ -87,7 +87,7 @@ class PlainPrinterBase : public DIPrinter { public: PlainPrinterBase(raw_ostream &OS, raw_ostream &ES, PrinterConfig &Config) - : OS(OS), ES(ES), Config(Config) {} + : DIPrinter(), OS(OS), ES(ES), Config(Config) {} void print(const Request &Request, const DILineInfo &Info) override; void print(const Request &Request, const DIInliningInfo &Info) override; @@ -138,7 +138,7 @@ class JSONPrinter : public DIPrinter { public: JSONPrinter(raw_ostream &OS, PrinterConfig &Config) - : OS(OS), Config(Config) {} + : DIPrinter(), OS(OS), Config(Config) {} void print(const Request &Request, const DILineInfo &Info) override; void print(const Request &Request, const DIInliningInfo &Info) override; diff --git a/llvm/include/llvm/FileCheck/FileCheck.h b/llvm/include/llvm/FileCheck/FileCheck.h index 7a6c98db3029..6ed75e14ccb6 100644 --- a/llvm/include/llvm/FileCheck/FileCheck.h +++ b/llvm/include/llvm/FileCheck/FileCheck.h @@ -80,7 +80,8 @@ class FileCheckType { std::bitset Modifiers; public: - FileCheckType(FileCheckKind Kind = CheckNone) : Kind(Kind), Count(1) {} + FileCheckType(FileCheckKind Kind = CheckNone) + : Kind(Kind), Count(1), Modifiers() {} FileCheckType(const FileCheckType &) = default; FileCheckType &operator=(const FileCheckType &) = default; diff --git a/llvm/include/llvm/IR/LegacyPassManagers.h b/llvm/include/llvm/IR/LegacyPassManagers.h index e161bdee961a..0bcb408d4929 100644 --- a/llvm/include/llvm/IR/LegacyPassManagers.h +++ b/llvm/include/llvm/IR/LegacyPassManagers.h @@ -460,7 +460,8 @@ class PMDataManager { class FPPassManager : public ModulePass, public PMDataManager { public: static char ID; - explicit FPPassManager() : ModulePass(ID) {} + explicit FPPassManager() + : ModulePass(ID), PMDataManager() { } /// run - Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. diff --git a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h index 2f57b85a9232..908ee30e4060 100644 --- a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h @@ -68,7 +68,9 @@ struct IntelExpr { StringRef OffsetName; unsigned Scale; - IntelExpr() : NeedBracs(false), Imm(0), Scale(1) {} + IntelExpr() + : NeedBracs(false), Imm(0), BaseReg(StringRef()), IndexReg(StringRef()), + OffsetName(StringRef()), Scale(1) {} // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression] IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale, StringRef offsetName, int64_t imm, bool needBracs) diff --git a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h index c05f770df8eb..7eddd067aa0c 100644 --- a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h +++ b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h @@ -55,7 +55,7 @@ class MemoryGroup { MemoryGroup() : NumPredecessors(0), NumExecutingPredecessors(0), NumExecutedPredecessors(0), NumInstructions(0), NumExecuting(0), - NumExecuted(0), CriticalPredecessor() {} + NumExecuted(0), CriticalPredecessor(), CriticalMemoryInstruction() {} MemoryGroup(MemoryGroup &&) = default; size_t getNumSuccessors() const { diff --git a/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h b/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h index 7467fd6754f0..b679b0d7d537 100644 --- a/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h +++ b/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h @@ -118,8 +118,8 @@ class DefaultResourceStrategy final : public ResourceStrategy { public: DefaultResourceStrategy(uint64_t UnitMask) - : ResourceUnitMask(UnitMask), NextInSequenceMask(UnitMask), - RemovedFromNextInSequence(0) {} + : ResourceStrategy(), ResourceUnitMask(UnitMask), + NextInSequenceMask(UnitMask), RemovedFromNextInSequence(0) {} virtual ~DefaultResourceStrategy() = default; uint64_t select(uint64_t ReadyMask) override; diff --git a/llvm/include/llvm/MCA/Stages/EntryStage.h b/llvm/include/llvm/MCA/Stages/EntryStage.h index 4c50838bef4b..1c133898d603 100644 --- a/llvm/include/llvm/MCA/Stages/EntryStage.h +++ b/llvm/include/llvm/MCA/Stages/EntryStage.h @@ -36,7 +36,7 @@ class EntryStage final : public Stage { EntryStage &operator=(const EntryStage &Other) = delete; public: - EntryStage(SourceMgr &SM) : SM(SM), NumRetired(0) {} + EntryStage(SourceMgr &SM) : CurrentInstruction(), SM(SM), NumRetired(0) { } bool isAvailable(const InstRef &IR) const override; bool hasWorkToComplete() const override; diff --git a/llvm/include/llvm/MCA/Stages/ExecuteStage.h b/llvm/include/llvm/MCA/Stages/ExecuteStage.h index 03a78a8b6b85..4c09ca8255ff 100644 --- a/llvm/include/llvm/MCA/Stages/ExecuteStage.h +++ b/llvm/include/llvm/MCA/Stages/ExecuteStage.h @@ -49,7 +49,7 @@ class ExecuteStage final : public Stage { public: ExecuteStage(Scheduler &S) : ExecuteStage(S, false) {} ExecuteStage(Scheduler &S, bool ShouldPerformBottleneckAnalysis) - : HWS(S), NumDispatchedOpcodes(0), NumIssuedOpcodes(0), + : Stage(), HWS(S), NumDispatchedOpcodes(0), NumIssuedOpcodes(0), EnablePressureEvents(ShouldPerformBottleneckAnalysis) {} // This stage works under the assumption that the Pipeline will eventually diff --git a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h index 40bc3b5aed94..42f386a13d85 100644 --- a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h +++ b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h @@ -38,7 +38,7 @@ struct StallInfo { unsigned CyclesLeft; StallKind Kind; - StallInfo() : CyclesLeft(), Kind(StallKind::DEFAULT) {} + StallInfo() : IR(), CyclesLeft(), Kind(StallKind::DEFAULT) {} StallKind getStallKind() const { return Kind; } unsigned getCyclesLeft() const { return CyclesLeft; } diff --git a/llvm/include/llvm/MCA/Stages/InstructionTables.h b/llvm/include/llvm/MCA/Stages/InstructionTables.h index 9617fd49db6e..35b21b0ba94d 100644 --- a/llvm/include/llvm/MCA/Stages/InstructionTables.h +++ b/llvm/include/llvm/MCA/Stages/InstructionTables.h @@ -32,7 +32,7 @@ class InstructionTables final : public Stage { public: InstructionTables(const MCSchedModel &Model) - : SM(Model), Masks(Model.getNumProcResourceKinds()) { + : Stage(), SM(Model), Masks(Model.getNumProcResourceKinds()) { computeProcResourceMasks(Model, Masks); } diff --git a/llvm/include/llvm/MCA/Stages/RetireStage.h b/llvm/include/llvm/MCA/Stages/RetireStage.h index aafe2815df15..b635a01db85e 100644 --- a/llvm/include/llvm/MCA/Stages/RetireStage.h +++ b/llvm/include/llvm/MCA/Stages/RetireStage.h @@ -36,7 +36,7 @@ class RetireStage final : public Stage { public: RetireStage(RetireControlUnit &R, RegisterFile &F, LSUnitBase &LS) - : RCU(R), PRF(F), LSU(LS) {} + : Stage(), RCU(R), PRF(F), LSU(LS) {} bool hasWorkToComplete() const override { return !RCU.isEmpty(); } Error cycleStart() override; diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index e1f45019b1a9..d3a5d44ce8dd 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -702,7 +702,7 @@ class LineCoverageIterator LineCoverageIterator(const CoverageData &CD, unsigned Line) : CD(CD), WrappedSegment(nullptr), Next(CD.begin()), Ended(false), - Line(Line) { + Line(Line), Segments(), Stats() { this->operator++(); } diff --git a/llvm/include/llvm/Remarks/RemarkSerializer.h b/llvm/include/llvm/Remarks/RemarkSerializer.h index 90e556df87e7..97fd224ea082 100644 --- a/llvm/include/llvm/Remarks/RemarkSerializer.h +++ b/llvm/include/llvm/Remarks/RemarkSerializer.h @@ -48,7 +48,7 @@ struct RemarkSerializer { RemarkSerializer(Format SerializerFormat, raw_ostream &OS, SerializerMode Mode) - : SerializerFormat(SerializerFormat), OS(OS), Mode(Mode) {} + : SerializerFormat(SerializerFormat), OS(OS), Mode(Mode), StrTab() {} /// This is just an interface. virtual ~RemarkSerializer() = default; diff --git a/llvm/include/llvm/Support/ScopedPrinter.h b/llvm/include/llvm/Support/ScopedPrinter.h index 803ae47793df..865337e3cc7f 100644 --- a/llvm/include/llvm/Support/ScopedPrinter.h +++ b/llvm/include/llvm/Support/ScopedPrinter.h @@ -799,7 +799,7 @@ struct DelimitedScope { }; struct DictScope : DelimitedScope { - explicit DictScope() {} + explicit DictScope() : DelimitedScope() {} explicit DictScope(ScopedPrinter &W) : DelimitedScope(W) { W.objectBegin(); } DictScope(ScopedPrinter &W, StringRef N) : DelimitedScope(W) { @@ -818,7 +818,7 @@ struct DictScope : DelimitedScope { }; struct ListScope : DelimitedScope { - explicit ListScope() {} + explicit ListScope() : DelimitedScope() {} explicit ListScope(ScopedPrinter &W) : DelimitedScope(W) { W.arrayBegin(); } ListScope(ScopedPrinter &W, StringRef N) : DelimitedScope(W) { diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 233f1be04f56..1a9dde03aabc 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -2365,7 +2365,7 @@ struct BooleanState : public IntegerStateBase { using super = IntegerStateBase; using base_t = IntegerStateBase::base_t; - BooleanState() {} + BooleanState() : super() {} BooleanState(base_t Assumed) : super(Assumed) {} /// Set the assumed value to \p Value but never below the known one. diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h index 7ba9d65cae55..419729271a23 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h @@ -435,7 +435,8 @@ class FunctionToLoopPassAdaptor bool UseBlockFrequencyInfo = false, bool UseBranchProbabilityInfo = false, bool LoopNestMode = false) - : Pass(std::move(Pass)), UseMemorySSA(UseMemorySSA), + : Pass(std::move(Pass)), LoopCanonicalizationFPM(), + UseMemorySSA(UseMemorySSA), UseBlockFrequencyInfo(UseBlockFrequencyInfo), UseBranchProbabilityInfo(UseBranchProbabilityInfo), LoopNestMode(LoopNestMode) { diff --git a/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp index 090dccc53b6e..9467bb3c9b2d 100644 --- a/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp +++ b/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp @@ -63,7 +63,7 @@ using namespace llvm::cflaa; CFLSteensAAResult::CFLSteensAAResult( std::function GetTLI) - : GetTLI(std::move(GetTLI)) {} + : AAResultBase(), GetTLI(std::move(GetTLI)) {} CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg) : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {} CFLSteensAAResult::~CFLSteensAAResult() = default; diff --git a/llvm/lib/Analysis/CallGraphSCCPass.cpp b/llvm/lib/Analysis/CallGraphSCCPass.cpp index 930cb13c0cb3..f2e5eab72bf2 100644 --- a/llvm/lib/Analysis/CallGraphSCCPass.cpp +++ b/llvm/lib/Analysis/CallGraphSCCPass.cpp @@ -61,7 +61,7 @@ class CGPassManager : public ModulePass, public PMDataManager { public: static char ID; - explicit CGPassManager() : ModulePass(ID) {} + explicit CGPassManager() : ModulePass(ID), PMDataManager() {} /// Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. diff --git a/llvm/lib/Analysis/DDG.cpp b/llvm/lib/Analysis/DDG.cpp index 7e1357959a3f..da5de75a038c 100644 --- a/llvm/lib/Analysis/DDG.cpp +++ b/llvm/lib/Analysis/DDG.cpp @@ -106,7 +106,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode &N) { //===--------------------------------------------------------------------===// SimpleDDGNode::SimpleDDGNode(Instruction &I) - : DDGNode(NodeKind::SingleInstruction) { + : DDGNode(NodeKind::SingleInstruction), InstList() { assert(InstList.empty() && "Expected empty list."); InstList.push_back(&I); } diff --git a/llvm/lib/Analysis/GlobalsModRef.cpp b/llvm/lib/Analysis/GlobalsModRef.cpp index 53262d88ba51..d00a7c944f10 100644 --- a/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/llvm/lib/Analysis/GlobalsModRef.cpp @@ -102,7 +102,7 @@ class GlobalsAAResult::FunctionInfo { "Insufficient low bits to store our flag and ModRef info."); public: - FunctionInfo() {} + FunctionInfo() : Info() {} ~FunctionInfo() { delete Info.getPointer(); } @@ -963,7 +963,7 @@ ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call, GlobalsAAResult::GlobalsAAResult( const DataLayout &DL, std::function GetTLI) - : DL(DL), GetTLI(std::move(GetTLI)) {} + : AAResultBase(), DL(DL), GetTLI(std::move(GetTLI)) {} GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg) : AAResultBase(std::move(Arg)), DL(Arg.DL), GetTLI(std::move(Arg.GetTLI)), diff --git a/llvm/lib/Analysis/IVUsers.cpp b/llvm/lib/Analysis/IVUsers.cpp index 0f3929f45506..d7b202f83189 100644 --- a/llvm/lib/Analysis/IVUsers.cpp +++ b/llvm/lib/Analysis/IVUsers.cpp @@ -254,7 +254,7 @@ IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) { IVUsers::IVUsers(Loop *L, AssumptionCache *AC, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE) - : L(L), AC(AC), LI(LI), DT(DT), SE(SE) { + : L(L), AC(AC), LI(LI), DT(DT), SE(SE), IVUses() { // Collect ephemeral values so that AddUsersIfInteresting skips them. EphValues.clear(); CodeMetrics::collectEphemeralValues(L, AC, EphValues); diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp index ba014bd08c98..7b895d8a5dc2 100644 --- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp +++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp @@ -477,8 +477,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) { CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, - AAResults &AA, DependenceInfo &DI, Optional TRT) - : Loops(Loops), + AAResults &AA, DependenceInfo &DI, + Optional TRT) + : Loops(Loops), TripCounts(), LoopCosts(), TRT((TRT == None) ? Optional(TemporalReuseThreshold) : TRT), LI(LI), SE(SE), TTI(TTI), AA(AA), DI(DI) { assert(!Loops.empty() && "Expecting a non-empty loop vector."); diff --git a/llvm/lib/Analysis/LoopPass.cpp b/llvm/lib/Analysis/LoopPass.cpp index b720bab454e9..9e470e998e67 100644 --- a/llvm/lib/Analysis/LoopPass.cpp +++ b/llvm/lib/Analysis/LoopPass.cpp @@ -69,7 +69,8 @@ char PrintLoopPassWrapper::ID = 0; char LPPassManager::ID = 0; -LPPassManager::LPPassManager() : FunctionPass(ID) { +LPPassManager::LPPassManager() + : FunctionPass(ID), PMDataManager() { LI = nullptr; CurrentLoop = nullptr; } diff --git a/llvm/lib/Analysis/RegionPass.cpp b/llvm/lib/Analysis/RegionPass.cpp index 10c8569096c6..c20ecff5f912 100644 --- a/llvm/lib/Analysis/RegionPass.cpp +++ b/llvm/lib/Analysis/RegionPass.cpp @@ -30,7 +30,8 @@ using namespace llvm; char RGPassManager::ID = 0; -RGPassManager::RGPassManager() : FunctionPass(ID) { +RGPassManager::RGPassManager() + : FunctionPass(ID), PMDataManager() { RI = nullptr; CurrentRegion = nullptr; } diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 1d0c106fd5db..dc5a4d8f85aa 100644 --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -29,7 +29,7 @@ using namespace llvm; InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers) - : Renderers(MaxRenderers) {} + : Renderers(MaxRenderers), MIs() {} InstructionSelector::InstructionSelector() = default; diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index 2ee9379cb286..7e43a0cbbe73 100644 --- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -185,7 +185,7 @@ class Polynomial { APInt A; public: - Polynomial(Value *V) : ErrorMSBs((unsigned)-1), V(V) { + Polynomial(Value *V) : ErrorMSBs((unsigned)-1), V(V), B(), A() { IntegerType *Ty = dyn_cast(V->getType()); if (Ty) { ErrorMSBs = 0; @@ -195,12 +195,12 @@ class Polynomial { } Polynomial(const APInt &A, unsigned ErrorMSBs = 0) - : ErrorMSBs(ErrorMSBs), V(nullptr), A(A) {} + : ErrorMSBs(ErrorMSBs), V(nullptr), B(), A(A) {} Polynomial(unsigned BitWidth, uint64_t A, unsigned ErrorMSBs = 0) - : ErrorMSBs(ErrorMSBs), V(nullptr), A(BitWidth, A) {} + : ErrorMSBs(ErrorMSBs), V(nullptr), B(), A(BitWidth, A) {} - Polynomial() : ErrorMSBs((unsigned)-1), V(nullptr) {} + Polynomial() : ErrorMSBs((unsigned)-1), V(nullptr), B(), A() {} /// Increment and clamp the number of undefined bits. void incErrorMSBs(unsigned amt) { @@ -677,7 +677,7 @@ struct VectorInfo { FixedVectorType *const VTy; VectorInfo(FixedVectorType *VTy) - : BB(nullptr), PV(nullptr), SVI(nullptr), VTy(VTy) { + : BB(nullptr), PV(nullptr), LIs(), Is(), SVI(nullptr), VTy(VTy) { EI = new ElementInfo[VTy->getNumElements()]; } diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index f144639770bc..d0323eaf3d78 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -182,7 +182,8 @@ static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) { MIRParserImpl::MIRParserImpl(std::unique_ptr Contents, StringRef Filename, LLVMContext &Context, std::function Callback) - : Context(Context), + : SM(), + Context(Context), In(SM.getMemoryBuffer(SM.AddNewSourceBuffer(std::move(Contents), SMLoc())) ->getBuffer(), nullptr, handleYAMLDiag, this), diff --git a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index 5347a7b0d890..59fc23983d3d 100644 --- a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -22,7 +22,8 @@ using namespace llvm; DiagnosticInfoMIROptimization::MachineArgument::MachineArgument( - StringRef MKey, const MachineInstr &MI) { + StringRef MKey, const MachineInstr &MI) + : Argument() { Key = std::string(MKey); raw_string_ostream OS(Val); diff --git a/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp index a61a2b2728fa..6e05de888cc0 100644 --- a/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -30,7 +30,8 @@ using namespace llvm; ScoreboardHazardRecognizer::ScoreboardHazardRecognizer( const InstrItineraryData *II, const ScheduleDAG *SchedDAG, const char *ParentDebugType) - : DebugType(ParentDebugType), ItinData(II), DAG(SchedDAG) { + : ScheduleHazardRecognizer(), DebugType(ParentDebugType), ItinData(II), + DAG(SchedDAG) { (void)DebugType; // Determine the maximum depth of any itinerary. This determines the depth of // the scoreboard. We always make the scoreboard at least 1 cycle deep to diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 697d9df54779..d1c2cdeb133b 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -108,7 +108,8 @@ static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags, // ELF //===----------------------------------------------------------------------===// -TargetLoweringObjectFileELF::TargetLoweringObjectFileELF() { +TargetLoweringObjectFileELF::TargetLoweringObjectFileELF() + : TargetLoweringObjectFile() { SupportDSOLocalEquivalentLowering = true; } @@ -1138,7 +1139,8 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { // MachO //===----------------------------------------------------------------------===// -TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO() { +TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO() + : TargetLoweringObjectFile() { SupportIndirectSymViaGOTPCRel = true; } @@ -2541,7 +2543,8 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry( //===----------------------------------------------------------------------===// // GOFF //===----------------------------------------------------------------------===// -TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF() {} +TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF() + : TargetLoweringObjectFile() {} MCSection *TargetLoweringObjectFileGOFF::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp index 2524e10cb6c5..ac217df1ee48 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp @@ -23,7 +23,7 @@ using namespace llvm::pdb; NativeEnumTypes::NativeEnumTypes(NativeSession &PDBSession, LazyRandomTypeCollection &Types, std::vector Kinds) - : Index(0), Session(PDBSession) { + : Matches(), Index(0), Session(PDBSession) { Optional TI = Types.getFirst(); while (TI) { CVType CVT = Types.getType(*TI); diff --git a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp index 1fb37ce7c57c..e15bce0d6c4b 100644 --- a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp +++ b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp @@ -96,7 +96,7 @@ class GDBJITRegistrationListener : public JITEventListener { public: /// Instantiates the JIT service. - GDBJITRegistrationListener() {} + GDBJITRegistrationListener() : ObjectBufferMap() {} /// Unregisters each object that was previously registered and releases all /// internal resources. diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp index 4357c95aa9f6..bb72bec93066 100644 --- a/llvm/lib/IR/LegacyPassManager.cpp +++ b/llvm/lib/IR/LegacyPassManager.cpp @@ -256,9 +256,9 @@ class FunctionPassManagerImpl : public Pass, bool wasRun; public: static char ID; - explicit FunctionPassManagerImpl() - : Pass(PT_PassManager, ID), PMTopLevelManager(new FPPassManager()), - wasRun(false) {} + explicit FunctionPassManagerImpl() : + Pass(PT_PassManager, ID), PMDataManager(), + PMTopLevelManager(new FPPassManager()), wasRun(false) {} /// \copydoc FunctionPassManager::add() void add(Pass *P) { @@ -387,7 +387,8 @@ namespace { class MPPassManager : public Pass, public PMDataManager { public: static char ID; - explicit MPPassManager() : Pass(PT_PassManager, ID) {} + explicit MPPassManager() : + Pass(PT_PassManager, ID), PMDataManager() { } // Delete on the fly managers. ~MPPassManager() override { @@ -477,8 +478,9 @@ class PassManagerImpl : public Pass, public: static char ID; - explicit PassManagerImpl() - : Pass(PT_PassManager, ID), PMTopLevelManager(new MPPassManager()) {} + explicit PassManagerImpl() : + Pass(PT_PassManager, ID), PMDataManager(), + PMTopLevelManager(new MPPassManager()) {} /// \copydoc PassManager::add() void add(Pass *P) { diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp index b3b4b8a80a1c..a0485a59d0e0 100644 --- a/llvm/lib/IR/Module.cpp +++ b/llvm/lib/IR/Module.cpp @@ -73,7 +73,8 @@ template class llvm::SymbolTableListTraits; Module::Module(StringRef MID, LLVMContext &C) : Context(C), ValSymTab(std::make_unique(-1)), - ModuleID(std::string(MID)), SourceFileName(std::string(MID)), DL("") { + Materializer(), ModuleID(std::string(MID)), + SourceFileName(std::string(MID)), DL("") { Context.addModule(this); } diff --git a/llvm/lib/InterfaceStub/IFSStub.cpp b/llvm/lib/InterfaceStub/IFSStub.cpp index bbc91ada1ded..008263f8db9f 100644 --- a/llvm/lib/InterfaceStub/IFSStub.cpp +++ b/llvm/lib/InterfaceStub/IFSStub.cpp @@ -29,7 +29,7 @@ IFSStub::IFSStub(IFSStub &&Stub) { Symbols = std::move(Stub.Symbols); } -IFSStubTriple::IFSStubTriple(IFSStubTriple const &Stub) { +IFSStubTriple::IFSStubTriple(IFSStubTriple const &Stub) : IFSStub() { IfsVersion = Stub.IfsVersion; Target = Stub.Target; SoName = Stub.SoName; @@ -37,7 +37,7 @@ IFSStubTriple::IFSStubTriple(IFSStubTriple const &Stub) { Symbols = Stub.Symbols; } -IFSStubTriple::IFSStubTriple(IFSStub const &Stub) { +IFSStubTriple::IFSStubTriple(IFSStub const &Stub) : IFSStub() { IfsVersion = Stub.IfsVersion; Target = Stub.Target; SoName = Stub.SoName; diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 5c94174aa161..705f7159d55b 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -159,7 +159,7 @@ class AsmParser : public MCAsmParser { int64_t LineNumber; SMLoc Loc; unsigned Buf; - CppHashInfoTy() : LineNumber(0), Buf(0) {} + CppHashInfoTy() : Filename(), LineNumber(0), Loc(), Buf(0) {} }; CppHashInfoTy CppHashInfo; diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index e2dfd339e93e..f1704cef46ac 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -424,7 +424,7 @@ class MasmParser : public MCAsmParser { int64_t LineNumber; SMLoc Loc; unsigned Buf; - CppHashInfoTy() : LineNumber(0), Buf(0) {} + CppHashInfoTy() : Filename(), LineNumber(0), Loc(), Buf(0) {} }; CppHashInfoTy CppHashInfo; diff --git a/llvm/lib/MCA/Stages/DispatchStage.cpp b/llvm/lib/MCA/Stages/DispatchStage.cpp index 66228bd5a862..5385142698e6 100644 --- a/llvm/lib/MCA/Stages/DispatchStage.cpp +++ b/llvm/lib/MCA/Stages/DispatchStage.cpp @@ -30,7 +30,7 @@ DispatchStage::DispatchStage(const MCSubtargetInfo &Subtarget, unsigned MaxDispatchWidth, RetireControlUnit &R, RegisterFile &F) : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth), - CarryOver(0U), STI(Subtarget), RCU(R), PRF(F) { + CarryOver(0U), CarriedOver(), STI(Subtarget), RCU(R), PRF(F) { if (!DispatchWidth) DispatchWidth = Subtarget.getSchedModel().IssueWidth; } diff --git a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp index abfbc80f17c9..fa5c0fc66b9e 100644 --- a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp +++ b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp @@ -47,7 +47,7 @@ InOrderIssueStage::InOrderIssueStage(const MCSubtargetInfo &STI, RegisterFile &PRF, CustomBehaviour &CB, LSUnit &LSU) : STI(STI), PRF(PRF), RM(STI.getSchedModel()), CB(CB), LSU(LSU), - NumIssued(), CarryOver(), Bandwidth(), LastWriteBackCycle() {} + NumIssued(), SI(), CarryOver(), Bandwidth(), LastWriteBackCycle() {} unsigned InOrderIssueStage::getIssueWidth() const { return STI.getSchedModel().IssueWidth; diff --git a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp index 0810bf531db8..36ba93564771 100644 --- a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp +++ b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp @@ -18,7 +18,7 @@ using namespace llvm::remarks; BitstreamRemarkSerializerHelper::BitstreamRemarkSerializerHelper( BitstreamRemarkContainerType ContainerType) - : Bitstream(Encoded), ContainerType(ContainerType) {} + : Encoded(), R(), Bitstream(Encoded), ContainerType(ContainerType) {} static void push(SmallVectorImpl &R, StringRef Str) { append_range(R, Str); diff --git a/llvm/lib/Remarks/RemarkStreamer.cpp b/llvm/lib/Remarks/RemarkStreamer.cpp index 543b00723659..2f00b8e73670 100644 --- a/llvm/lib/Remarks/RemarkStreamer.cpp +++ b/llvm/lib/Remarks/RemarkStreamer.cpp @@ -26,7 +26,7 @@ static cl::opt EnableRemarksSection( RemarkStreamer::RemarkStreamer( std::unique_ptr RemarkSerializer, Optional FilenameIn) - : RemarkSerializer(std::move(RemarkSerializer)), + : PassFilter(), RemarkSerializer(std::move(RemarkSerializer)), Filename(FilenameIn ? Optional(FilenameIn->str()) : None) {} Error RemarkStreamer::setFilter(StringRef Filter) { diff --git a/llvm/lib/Remarks/RemarkStringTable.cpp b/llvm/lib/Remarks/RemarkStringTable.cpp index 03d93baba038..5f462f01bb9a 100644 --- a/llvm/lib/Remarks/RemarkStringTable.cpp +++ b/llvm/lib/Remarks/RemarkStringTable.cpp @@ -20,7 +20,7 @@ using namespace llvm; using namespace llvm::remarks; -StringTable::StringTable(const ParsedStringTable &Other) { +StringTable::StringTable(const ParsedStringTable &Other) : StrTab() { for (unsigned i = 0, e = Other.size(); i < e; ++i) if (Expected MaybeStr = Other[i]) add(*MaybeStr); diff --git a/llvm/lib/Remarks/YAMLRemarkParser.cpp b/llvm/lib/Remarks/YAMLRemarkParser.cpp index a32629c9f557..3d9996c931ae 100644 --- a/llvm/lib/Remarks/YAMLRemarkParser.cpp +++ b/llvm/lib/Remarks/YAMLRemarkParser.cpp @@ -171,7 +171,7 @@ YAMLRemarkParser::YAMLRemarkParser(StringRef Buf) YAMLRemarkParser::YAMLRemarkParser(StringRef Buf, Optional StrTab) - : RemarkParser{Format::YAML}, StrTab(std::move(StrTab)), + : RemarkParser{Format::YAML}, StrTab(std::move(StrTab)), LastErrorMessage(), SM(setupSM(LastErrorMessage)), Stream(Buf, SM), YAMLIt(Stream.begin()) {} Error YAMLRemarkParser::error(StringRef Message, yaml::Node &Node) { diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp index 0ba019b3c46a..2adf37a511d1 100644 --- a/llvm/lib/Support/YAMLParser.cpp +++ b/llvm/lib/Support/YAMLParser.cpp @@ -1841,11 +1841,11 @@ bool Scanner::fetchMoreTokens() { Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors, std::error_code *EC) - : scanner(new Scanner(Input, SM, ShowColors, EC)) {} + : scanner(new Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {} Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors, std::error_code *EC) - : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)) {} + : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {} Stream::~Stream() = default; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 672739f25599..f7d3dd0bc222 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -228,6 +228,7 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU, IsLittle(LittleEndian), MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride), MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT), + FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)), TSInfo(), TLInfo(TM, *this) { if (AArch64::isX18ReservedByDefault(TT)) diff --git a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp index 7ed934cfabc0..dfc66f0cb4c1 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -25,7 +25,8 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx, SupportDebugThreadLocalLocation = false; } -AArch64_MachoTargetObjectFile::AArch64_MachoTargetObjectFile() { +AArch64_MachoTargetObjectFile::AArch64_MachoTargetObjectFile() + : TargetLoweringObjectFileMachO() { SupportGOTPCRelWithOffset = false; } diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 4bd08f29864b..fb709b92de64 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -491,7 +491,7 @@ class AArch64Operand : public MCParsedAsmOperand { public: AArch64Operand(KindTy K, MCContext &Ctx) : Kind(K), Ctx(Ctx) {} - AArch64Operand(const AArch64Operand &o) : Ctx(o.Ctx) { + AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) { Kind = o.Kind; StartLoc = o.StartLoc; EndLoc = o.EndLoc; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index ea8a7c7b83da..3d9a626d3ac3 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -472,8 +472,8 @@ class AArch64InstructionSelector : public InstructionSelector { AArch64InstructionSelector::AArch64InstructionSelector( const AArch64TargetMachine &TM, const AArch64Subtarget &STI, const AArch64RegisterBankInfo &RBI) - : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), - RBI(RBI), + : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), + TRI(*STI.getRegisterInfo()), RBI(RBI), #define GET_GLOBALISEL_PREDICATES_INIT #include "AArch64GenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATES_INIT diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 92d22881f328..515a5c63a559 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -42,8 +42,8 @@ using namespace llvm; -AArch64RegisterBankInfo::AArch64RegisterBankInfo( - const TargetRegisterInfo &TRI) { +AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI) + : AArch64GenRegisterBankInfo() { static llvm::once_flag InitializeRegisterBankFlag; static auto InitializeRegisterBankOnce = [&]() { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h index 5ba9b2cd187e..22be014813b0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h @@ -26,7 +26,7 @@ class AMDGPUAAResult : public AAResultBase { const DataLayout &DL; public: - explicit AMDGPUAAResult(const DataLayout &DL) : DL(DL) {} + explicit AMDGPUAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {} AMDGPUAAResult(AMDGPUAAResult &&Arg) : AAResultBase(std::move(Arg)), DL(Arg.DL) {} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index e5c5d36d1d4f..e16bead81b65 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -46,7 +46,8 @@ static cl::opt AllowRiskySelect( AMDGPUInstructionSelector::AMDGPUInstructionSelector( const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM) - : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), + : InstructionSelector(), TII(*STI.getInstrInfo()), + TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), STI(STI), EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG), #define GET_GLOBALISEL_PREDICATES_INIT diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h index fb6a64b75c20..c97223b047e8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h @@ -356,7 +356,7 @@ class AMDGPULibFuncImpl : public AMDGPULibFuncBase { /// Wrapper class for AMDGPULIbFuncImpl class AMDGPULibFunc : public AMDGPULibFuncBase { public: - explicit AMDGPULibFunc() {} + explicit AMDGPULibFunc() : Impl(std::unique_ptr()) {} AMDGPULibFunc(const AMDGPULibFunc &F); /// Clone a mangled library func with the Id \p Id and argument info from \p /// CopyFrom. diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index ab463ce8940d..c60012bcfe2e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -193,7 +193,9 @@ class ApplyRegBankMapping final : public GISelChangeObserver { } AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST) - : Subtarget(ST), TRI(Subtarget.getRegisterInfo()), + : AMDGPUGenRegisterBankInfo(), + Subtarget(ST), + TRI(Subtarget.getRegisterInfo()), TII(Subtarget.getInstrInfo()) { // HACK: Until this is fully tablegen'd. diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index c71205b17a1a..2bb59086f391 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -62,7 +62,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { public: AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) - : Kind(Kind_), AsmParser(AsmParser_) {} + : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} using Ptr = std::unique_ptr; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp index ded3fb7ab8d9..7708579a4491 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -15,7 +15,8 @@ using namespace llvm; AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT, - const MCTargetOptions &Options) { + const MCTargetOptions &Options) + : MCAsmInfoELF() { CodePointerSize = (TT.getArch() == Triple::amdgcn) ? 8 : 4; StackGrowsUp = true; HasSingleParameterDotFile = false; diff --git a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp index 0d201a67af46..f083fa6662e9 100644 --- a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -164,7 +164,7 @@ static bool getBaseOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, ARMBankConflictHazardRecognizer::ARMBankConflictHazardRecognizer( const ScheduleDAG *DAG, int64_t CPUBankMask, bool CPUAssumeITCMConflict) - : MF(DAG->MF), DL(DAG->MF.getDataLayout()), + : ScheduleHazardRecognizer(), MF(DAG->MF), DL(DAG->MF.getDataLayout()), DataMask(DataBankMask.getNumOccurrences() ? int64_t(DataBankMask) : CPUBankMask), AssumeITCMBankConflict(AssumeITCMConflict.getNumOccurrences() diff --git a/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/llvm/lib/Target/ARM/ARMHazardRecognizer.h index 66a1477e5e08..c1f1bcd0a629 100644 --- a/llvm/lib/Target/ARM/ARMHazardRecognizer.h +++ b/llvm/lib/Target/ARM/ARMHazardRecognizer.h @@ -34,7 +34,7 @@ class ARMHazardRecognizerFPMLx : public ScheduleHazardRecognizer { unsigned FpMLxStalls = 0; public: - ARMHazardRecognizerFPMLx() { MaxLookAhead = 1; } + ARMHazardRecognizerFPMLx() : ScheduleHazardRecognizer() { MaxLookAhead = 1; } HazardType getHazardType(SUnit *SU, int Stalls) override; void Reset() override; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/llvm/lib/Target/ARM/ARMInstrInfo.cpp index 00db13f2eb52..5dee5e04af81 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMInstrInfo.cpp @@ -28,7 +28,8 @@ #include "llvm/MC/MCInst.h" using namespace llvm; -ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI) {} +ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) + : ARMBaseInstrInfo(STI), RI() {} /// Return the noop instruction to use for a noop. MCInst ARMInstrInfo::getNop() const { diff --git a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp index 188b5562cac9..8be4e3f160e3 100644 --- a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp @@ -171,8 +171,8 @@ createARMInstructionSelector(const ARMBaseTargetMachine &TM, ARMInstructionSelector::ARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI, const ARMRegisterBankInfo &RBI) - : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), TM(TM), RBI(RBI), - STI(STI), Opcodes(STI), + : InstructionSelector(), TII(*STI.getInstrInfo()), + TRI(*STI.getRegisterInfo()), TM(TM), RBI(RBI), STI(STI), Opcodes(STI), #define GET_GLOBALISEL_PREDICATES_INIT #include "ARMGenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATES_INIT diff --git a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp index 2523752a717e..1a7f10a13ed3 100644 --- a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -129,7 +129,8 @@ static void checkValueMappings() { } // end namespace arm } // end namespace llvm -ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI) { +ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI) + : ARMGenRegisterBankInfo() { // We have only one set of register banks, whatever the subtarget // is. Therefore, the initialization of the RegBanks table should be // done only once. Indeed the table of all register banks diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterInfo.cpp index ff4647dd46fd..6649750bb388 100644 --- a/llvm/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMRegisterInfo.cpp @@ -15,4 +15,4 @@ using namespace llvm; void ARMRegisterInfo::anchor() { } -ARMRegisterInfo::ARMRegisterInfo() {} +ARMRegisterInfo::ARMRegisterInfo() : ARMBaseRegisterInfo() {} diff --git a/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/llvm/lib/Target/ARM/ARMTargetObjectFile.h index 47334b9a8a45..f86774beb397 100644 --- a/llvm/lib/Target/ARM/ARMTargetObjectFile.h +++ b/llvm/lib/Target/ARM/ARMTargetObjectFile.h @@ -17,7 +17,8 @@ namespace llvm { class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF { public: - ARMElfTargetObjectFile() { + ARMElfTargetObjectFile() + : TargetLoweringObjectFileELF() { PLTRelativeVariantKind = MCSymbolRefExpr::VK_ARM_PREL31; } diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index c7734cc2cf11..c8cec88d6e11 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -921,7 +921,7 @@ class ARMOperand : public MCParsedAsmOperand { }; public: - ARMOperand(KindTy K) : Kind(K) {} + ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} /// getStartLoc - Get the location of the first token of this operand. SMLoc getStartLoc() const override { return StartLoc; } diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index 1a36c2ca9152..4b18f5e20d40 100644 --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -21,7 +21,7 @@ using namespace llvm; Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI) {} + : ARMBaseInstrInfo(STI), RI() {} /// Return the noop instruction to use for a noop. MCInst Thumb1InstrInfo::getNop() const { diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp index 5d2bc4ebe191..4da6f6ab6994 100644 --- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -37,7 +37,7 @@ extern cl::opt ReuseFrameIndexVals; using namespace llvm; -ThumbRegisterInfo::ThumbRegisterInfo() {} +ThumbRegisterInfo::ThumbRegisterInfo() : ARMBaseRegisterInfo() {} const TargetRegisterClass * ThumbRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, diff --git a/llvm/lib/Target/AVR/AVRSubtarget.cpp b/llvm/lib/Target/AVR/AVRSubtarget.cpp index 820efe79bf8a..990e1c57e63f 100644 --- a/llvm/lib/Target/AVR/AVRSubtarget.cpp +++ b/llvm/lib/Target/AVR/AVRSubtarget.cpp @@ -39,6 +39,8 @@ AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU, m_supportsRMW(false), m_supportsMultiplication(false), m_hasBREAK(false), m_hasTinyEncoding(false), m_hasMemMappedGPR(false), m_FeatureSetDummy(false), + + InstrInfo(), FrameLowering(), TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)), TSInfo() { // Parse features string. ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS); diff --git a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index f19e7840eb31..95c737aa272e 100644 --- a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -107,13 +107,13 @@ class AVROperand : public MCParsedAsmOperand { public: AVROperand(StringRef Tok, SMLoc const &S) - : Kind(k_Token), Tok(Tok), Start(S), End(S) {} + : Base(), Kind(k_Token), Tok(Tok), Start(S), End(S) {} AVROperand(unsigned Reg, SMLoc const &S, SMLoc const &E) - : Kind(k_Register), RegImm({Reg, nullptr}), Start(S), End(E) {} + : Base(), Kind(k_Register), RegImm({Reg, nullptr}), Start(S), End(E) {} AVROperand(MCExpr const *Imm, SMLoc const &S, SMLoc const &E) - : Kind(k_Immediate), RegImm({0, Imm}), Start(S), End(E) {} + : Base(), Kind(k_Immediate), RegImm({0, Imm}), Start(S), End(E) {} AVROperand(unsigned Reg, MCExpr const *Imm, SMLoc const &S, SMLoc const &E) - : Kind(k_Memri), RegImm({Reg, Imm}), Start(S), End(E) {} + : Base(), Kind(k_Memri), RegImm({Reg, Imm}), Start(S), End(E) {} struct RegisterImmediate { unsigned Reg; diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp index d55510a2455c..50298bf5e943 100644 --- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp +++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp @@ -101,10 +101,10 @@ struct BPFOperand : public MCParsedAsmOperand { ImmOp Imm; }; - BPFOperand(KindTy K) : Kind(K) {} + BPFOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} public: - BPFOperand(const BPFOperand &o) { + BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() { Kind = o.Kind; StartLoc = o.StartLoc; EndLoc = o.EndLoc; diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp index e4d98b85e58b..77e3cd393f87 100644 --- a/llvm/lib/Target/BPF/BPFSubtarget.cpp +++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp @@ -59,6 +59,6 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM) - : BPFGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), + : BPFGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), InstrInfo(), FrameLowering(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) {} diff --git a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index 58f5ea78c541..d131cf896834 100644 --- a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -211,10 +211,12 @@ struct HexagonOperand : public MCParsedAsmOperand { struct ImmTy Imm; }; - HexagonOperand(KindTy K, MCContext &Context) : Kind(K), Context(Context) {} + HexagonOperand(KindTy K, MCContext &Context) + : MCParsedAsmOperand(), Kind(K), Context(Context) {} public: - HexagonOperand(const HexagonOperand &o) : Context(o.Context) { + HexagonOperand(const HexagonOperand &o) + : MCParsedAsmOperand(), Context(o.Context) { Kind = o.Kind; StartLoc = o.StartLoc; EndLoc = o.EndLoc; diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 091542f2e76a..2679e399852f 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -1652,7 +1652,7 @@ struct WeightedLeaf { int Weight; int InsertionOrder; - WeightedLeaf() {} + WeightedLeaf() : Value(SDValue()) { } WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) : Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) { diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index a47d414af831..5f094dfeb95c 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -204,7 +204,7 @@ HexagonMCChecker::HexagonMCChecker(MCContext &Context, MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst &mcb, MCRegisterInfo const &ri, bool ReportErrors) : Context(Context), MCB(mcb), RI(ri), MCII(MCII), STI(STI), - ReportErrors(ReportErrors) { + ReportErrors(ReportErrors), ReversePairs() { init(); } @@ -212,7 +212,8 @@ HexagonMCChecker::HexagonMCChecker(HexagonMCChecker const &Other, MCSubtargetInfo const &STI, bool CopyReportErrors) : Context(Other.Context), MCB(Other.MCB), RI(Other.RI), MCII(Other.MCII), - STI(STI), ReportErrors(CopyReportErrors ? Other.ReportErrors : false) { + STI(STI), ReportErrors(CopyReportErrors ? Other.ReportErrors : false), + ReversePairs() { init(); } diff --git a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp index 660215ca7435..a994bd7e57a4 100644 --- a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp +++ b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp @@ -141,7 +141,7 @@ struct LanaiOperand : public MCParsedAsmOperand { struct MemOp Mem; }; - explicit LanaiOperand(KindTy Kind) : Kind(Kind) {} + explicit LanaiOperand(KindTy Kind) : MCParsedAsmOperand(), Kind(Kind) {} public: // The functions below are used by the autogenerated ASM matcher and hence to diff --git a/llvm/lib/Target/Lanai/LanaiSubtarget.cpp b/llvm/lib/Target/Lanai/LanaiSubtarget.cpp index 37a4843e1bc4..d9d7847a0c5a 100644 --- a/llvm/lib/Target/Lanai/LanaiSubtarget.cpp +++ b/llvm/lib/Target/Lanai/LanaiSubtarget.cpp @@ -43,4 +43,4 @@ LanaiSubtarget::LanaiSubtarget(const Triple &TargetTriple, StringRef Cpu, CodeGenOpt::Level /*OptLevel*/) : LanaiGenSubtargetInfo(TargetTriple, Cpu, /*TuneCPU*/ Cpu, FeatureString), FrameLowering(initializeSubtargetDependencies(Cpu, FeatureString)), - TLInfo(TM, *this) {} + InstrInfo(), TLInfo(TM, *this), TSInfo() {} diff --git a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp index 13cba8b079a9..c1677baf52a7 100644 --- a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp +++ b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp @@ -114,14 +114,13 @@ class MSP430Operand : public MCParsedAsmOperand { public: MSP430Operand(StringRef Tok, SMLoc const &S) - : Kind(k_Tok), Tok(Tok), Start(S), End(S) {} + : Base(), Kind(k_Tok), Tok(Tok), Start(S), End(S) {} MSP430Operand(KindTy Kind, unsigned Reg, SMLoc const &S, SMLoc const &E) - : Kind(Kind), Reg(Reg), Start(S), End(E) {} + : Base(), Kind(Kind), Reg(Reg), Start(S), End(E) {} MSP430Operand(MCExpr const *Imm, SMLoc const &S, SMLoc const &E) - : Kind(k_Imm), Imm(Imm), Start(S), End(E) {} - MSP430Operand(unsigned Reg, MCExpr const *Expr, SMLoc const &S, - SMLoc const &E) - : Kind(k_Mem), Mem({Reg, Expr}), Start(S), End(E) {} + : Base(), Kind(k_Imm), Imm(Imm), Start(S), End(E) {} + MSP430Operand(unsigned Reg, MCExpr const *Expr, SMLoc const &S, SMLoc const &E) + : Base(), Kind(k_Mem), Mem({Reg, Expr}), Start(S), End(E) {} void addRegOperands(MCInst &Inst, unsigned N) const { assert((Kind == k_Reg || Kind == k_IndReg || Kind == k_PostIndReg) && diff --git a/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/llvm/lib/Target/MSP430/MSP430Subtarget.cpp index 0604d47597e2..2fd58717c4db 100644 --- a/llvm/lib/Target/MSP430/MSP430Subtarget.cpp +++ b/llvm/lib/Target/MSP430/MSP430Subtarget.cpp @@ -57,5 +57,5 @@ MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { MSP430Subtarget::MSP430Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM) - : MSP430GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), + : MSP430GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), FrameLowering(), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) {} diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 736c41f8ac03..01b5dff2e448 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -827,7 +827,8 @@ class MipsOperand : public MCParsedAsmOperand { } Kind; public: - MipsOperand(KindTy K, MipsAsmParser &Parser) : Kind(K), AsmParser(Parser) {} + MipsOperand(KindTy K, MipsAsmParser &Parser) + : MCParsedAsmOperand(), Kind(K), AsmParser(Parser) {} ~MipsOperand() override { switch (Kind) { diff --git a/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp b/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp index 563118dfe627..f6f43da9abf8 100644 --- a/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -37,7 +37,7 @@ using namespace llvm; #define DEBUG_TYPE "mips16-registerinfo" -Mips16RegisterInfo::Mips16RegisterInfo() {} +Mips16RegisterInfo::Mips16RegisterInfo() : MipsRegisterInfo() {} bool Mips16RegisterInfo::requiresRegisterScavenging (const MachineFunction &MF) const { diff --git a/llvm/lib/Target/Mips/MipsInstructionSelector.cpp b/llvm/lib/Target/Mips/MipsInstructionSelector.cpp index 59f158688b16..6d44ce2ab563 100644 --- a/llvm/lib/Target/Mips/MipsInstructionSelector.cpp +++ b/llvm/lib/Target/Mips/MipsInstructionSelector.cpp @@ -80,8 +80,8 @@ class MipsInstructionSelector : public InstructionSelector { MipsInstructionSelector::MipsInstructionSelector( const MipsTargetMachine &TM, const MipsSubtarget &STI, const MipsRegisterBankInfo &RBI) - : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), - RBI(RBI), + : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), + TRI(*STI.getRegisterInfo()), RBI(RBI), #define GET_GLOBALISEL_PREDICATES_INIT #include "MipsGenGlobalISel.inc" diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp index 2cb59e696031..04b69c66bc0d 100644 --- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -73,7 +73,8 @@ RegisterBankInfo::ValueMapping ValueMappings[] = { using namespace llvm; -MipsRegisterBankInfo::MipsRegisterBankInfo(const TargetRegisterInfo &TRI) {} +MipsRegisterBankInfo::MipsRegisterBankInfo(const TargetRegisterInfo &TRI) + : MipsGenRegisterBankInfo() {} const RegisterBank & MipsRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, diff --git a/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp b/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp index d6481793ef49..b05e9ad827c4 100644 --- a/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -38,7 +38,7 @@ using namespace llvm; #define DEBUG_TYPE "mips-reg-info" -MipsSERegisterInfo::MipsSERegisterInfo() {} +MipsSERegisterInfo::MipsSERegisterInfo() : MipsRegisterInfo() {} bool MipsSERegisterInfo:: requiresRegisterScavenging(const MachineFunction &MF) const { diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 8df6f13aa68e..953d95e55f65 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -27,7 +27,7 @@ using namespace llvm; // Pin the vtable to this file. void NVPTXInstrInfo::anchor() {} -NVPTXInstrInfo::NVPTXInstrInfo() : RegInfo() {} +NVPTXInstrInfo::NVPTXInstrInfo() : NVPTXGenInstrInfo(), RegInfo() {} void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp index 5a6440c91fca..05c20369abf4 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -49,8 +49,8 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const NVPTXTargetMachine &TM) : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0), - SmVersion(20), TM(TM), - TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {} + SmVersion(20), TM(TM), InstrInfo(), + TLInfo(TM, initializeSubtargetDependencies(CPU, FS)), FrameLowering() {} bool NVPTXSubtarget::hasImageHandles() const { // Enable handles for Kepler+, where CUDA supports indirect surfaces and diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h index 4645671a0cd8..366d92a5a805 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -17,7 +17,7 @@ namespace llvm { class NVPTXTargetObjectFile : public TargetLoweringObjectFile { public: - NVPTXTargetObjectFile() {} + NVPTXTargetObjectFile() : TargetLoweringObjectFile() {} ~NVPTXTargetObjectFile() override; diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 1f509afb723b..ded922329ebf 100644 --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -201,10 +201,9 @@ struct PPCOperand : public MCParsedAsmOperand { struct TLSRegOp TLSReg; }; - PPCOperand(KindTy K) : Kind(K) {} - + PPCOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} public: - PPCOperand(const PPCOperand &o) { + PPCOperand(const PPCOperand &o) : MCParsedAsmOperand() { Kind = o.Kind; StartLoc = o.StartLoc; EndLoc = o.EndLoc; diff --git a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp index 0cd8350e3fdd..7d64816ed6c7 100644 --- a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp +++ b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp @@ -65,7 +65,8 @@ class PPCInstructionSelector : public InstructionSelector { PPCInstructionSelector::PPCInstructionSelector(const PPCTargetMachine &TM, const PPCSubtarget &STI, const PPCRegisterBankInfo &RBI) - : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), + : InstructionSelector(), TII(*STI.getInstrInfo()), + TRI(*STI.getRegisterInfo()), RBI(RBI), #define GET_GLOBALISEL_PREDICATES_INIT #include "PPCGenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATES_INIT diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp index 58165fcaac03..6af79324919c 100644 --- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp +++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp @@ -23,4 +23,5 @@ using namespace llvm; -PPCRegisterBankInfo::PPCRegisterBankInfo(const TargetRegisterInfo &TRI) {} +PPCRegisterBankInfo::PPCRegisterBankInfo(const TargetRegisterInfo &TRI) + : PPCGenRegisterBankInfo() {} diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 858e78076b56..75592dd4c6f5 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -302,10 +302,10 @@ struct RISCVOperand : public MCParsedAsmOperand { struct VTypeOp VType; }; - RISCVOperand(KindTy K) : Kind(K) {} + RISCVOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} public: - RISCVOperand(const RISCVOperand &o) { + RISCVOperand(const RISCVOperand &o) : MCParsedAsmOperand() { Kind = o.Kind; IsRV64 = o.IsRV64; StartLoc = o.StartLoc; diff --git a/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp index 8dfd71ac0b6b..4d1f47da209d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp @@ -69,7 +69,8 @@ class RISCVInstructionSelector : public InstructionSelector { RISCVInstructionSelector::RISCVInstructionSelector( const RISCVTargetMachine &TM, const RISCVSubtarget &STI, const RISCVRegisterBankInfo &RBI) - : STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), + : InstructionSelector(), STI(STI), TII(*STI.getInstrInfo()), + TRI(*STI.getRegisterInfo()), RBI(RBI), #define GET_GLOBALISEL_PREDICATES_INIT #include "RISCVGenGlobalISel.inc" diff --git a/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp index 4ff3a44f3511..bd3b95a98b9f 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp @@ -22,4 +22,5 @@ using namespace llvm; -RISCVRegisterBankInfo::RISCVRegisterBankInfo(const TargetRegisterInfo &TRI) {} +RISCVRegisterBankInfo::RISCVRegisterBankInfo(const TargetRegisterInfo &TRI) + : RISCVGenRegisterBankInfo() {} diff --git a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index af3304f0907d..48e6903bd1b1 100644 --- a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -257,7 +257,7 @@ class SparcOperand : public MCParsedAsmOperand { }; public: - SparcOperand(KindTy K) : Kind(K) {} + SparcOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} bool isToken() const override { return Kind == k_Token; } bool isReg() const override { return Kind == k_Register; } diff --git a/llvm/lib/Target/Sparc/SparcTargetObjectFile.h b/llvm/lib/Target/Sparc/SparcTargetObjectFile.h index f30ddc7b4955..9bbe602b32b3 100644 --- a/llvm/lib/Target/Sparc/SparcTargetObjectFile.h +++ b/llvm/lib/Target/Sparc/SparcTargetObjectFile.h @@ -18,7 +18,9 @@ class TargetMachine; class SparcELFTargetObjectFile : public TargetLoweringObjectFileELF { public: - SparcELFTargetObjectFile() {} + SparcELFTargetObjectFile() : + TargetLoweringObjectFileELF() + {} void Initialize(MCContext &Ctx, const TargetMachine &TM) override; diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index cf55318d328d..39a82e2c07e0 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -62,7 +62,8 @@ struct SystemZAddressingMode { bool IncludesDynAlloc; SystemZAddressingMode(AddrForm form, DispRange dr) - : Form(form), DR(dr), Disp(0), IncludesDynAlloc(false) {} + : Form(form), DR(dr), Base(), Disp(0), Index(), + IncludesDynAlloc(false) {} // True if the address can have an index register. bool hasIndexField() { return Form != FormBD; } diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp index 75c0d454d904..0f03d96655bf 100644 --- a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -89,7 +89,7 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU, HasSoftFloat(false), TargetTriple(TT), SpecialRegisters(initializeSpecialRegisters()), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), - FrameLowering(SystemZFrameLowering::create(*this)) {} + TSInfo(), FrameLowering(SystemZFrameLowering::create(*this)) {} bool SystemZSubtarget::enableSubRegLiveness() const { return UseSubRegLiveness; diff --git a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp index 4a318e493c52..fd9dc32b04f5 100644 --- a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp +++ b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp @@ -210,7 +210,7 @@ class VEOperand : public MCParsedAsmOperand { }; public: - VEOperand(KindTy K) : Kind(K) {} + VEOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} bool isToken() const override { return Kind == k_Token; } bool isReg() const override { return Kind == k_Register; } diff --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.h b/llvm/lib/Target/VE/VEMachineFunctionInfo.h index 3160f6a552d7..16b25fed3f11 100644 --- a/llvm/lib/Target/VE/VEMachineFunctionInfo.h +++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.h @@ -29,9 +29,10 @@ class VEMachineFunctionInfo : public MachineFunctionInfo { bool IsLeafProc; public: - VEMachineFunctionInfo() : VarArgsFrameOffset(0), IsLeafProc(false) {} + VEMachineFunctionInfo() + : GlobalBaseReg(), VarArgsFrameOffset(0), IsLeafProc(false) {} explicit VEMachineFunctionInfo(MachineFunction &MF) - : VarArgsFrameOffset(0), IsLeafProc(false) {} + : GlobalBaseReg(), VarArgsFrameOffset(0), IsLeafProc(false) {} Register getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(Register Reg) { GlobalBaseReg = Reg; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp index 103b634ecf5b..add3c799f4aa 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -42,8 +42,9 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT, const std::string &FS, const TargetMachine &TM) : WebAssemblyGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), - TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), - TSInfo(), TLInfo(TM, *this) {} + TargetTriple(TT), FrameLowering(), + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TSInfo(), + TLInfo(TM, *this) {} bool WebAssemblySubtarget::enableAtomicExpand() const { // If atomics are disabled, atomic ops are lowered instead of expanded diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 5b90c67deae6..7ed05fd0331d 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -80,9 +80,9 @@ namespace { bool NegateIndex = false; X86ISelAddressMode() - : BaseType(RegBase), Base_FrameIndex(0), Scale(1), Disp(0), GV(nullptr), - CP(nullptr), BlockAddr(nullptr), ES(nullptr), MCSym(nullptr), JT(-1), - SymbolFlags(X86II::MO_NO_FLAG) {} + : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0), + Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr), + MCSym(nullptr), JT(-1), SymbolFlags(X86II::MO_NO_FLAG) {} bool hasSymbolicDisplacement() const { return GV != nullptr || CP != nullptr || ES != nullptr || diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp index 28d57ca9ae3c..8abbaa92c8cf 100644 --- a/llvm/lib/Target/X86/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp @@ -153,8 +153,8 @@ class X86InstructionSelector : public InstructionSelector { X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI, const X86RegisterBankInfo &RBI) - : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), - RBI(RBI), + : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), + TRI(*STI.getRegisterInfo()), RBI(RBI), #define GET_GLOBALISEL_PREDICATES_INIT #include "X86GenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATES_INIT diff --git a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp index 497a8f6e065f..9c076d2d6769 100644 --- a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp @@ -25,7 +25,8 @@ using namespace llvm; #define GET_TARGET_REGBANK_INFO_IMPL #include "X86GenRegisterBankInfo.def" -X86RegisterBankInfo::X86RegisterBankInfo(const TargetRegisterInfo &TRI) { +X86RegisterBankInfo::X86RegisterBankInfo(const TargetRegisterInfo &TRI) + : X86GenRegisterBankInfo() { // validate RegBank initialization. const RegisterBank &RBGPR = getRegBank(X86::GPRRegBankID); diff --git a/llvm/lib/Target/XCore/XCoreSubtarget.cpp b/llvm/lib/Target/XCore/XCoreSubtarget.cpp index 051d51178baa..1be707cb488c 100644 --- a/llvm/lib/Target/XCore/XCoreSubtarget.cpp +++ b/llvm/lib/Target/XCore/XCoreSubtarget.cpp @@ -26,5 +26,5 @@ void XCoreSubtarget::anchor() { } XCoreSubtarget::XCoreSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM) - : XCoreGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), FrameLowering(*this), - TLInfo(TM, *this), TSInfo() {} + : XCoreGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), InstrInfo(), + FrameLowering(*this), TLInfo(TM, *this), TSInfo() {} diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index 0fef01a47b04..4e3689f09536 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -1073,7 +1073,8 @@ ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params, bool MandatoryFirst, InliningAdvisorMode Mode, unsigned MaxDevirtIterations) - : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations) { + : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations), + PM(), MPM() { // Run the inliner first. The theory is that we are walking bottom-up and so // the callees have already been fully optimized, and we want to inline them // into the callers so that our optimizations can reflect that. diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index fe9586ce75a6..2d717475ce7f 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -169,7 +169,8 @@ struct FunctionOutliningInfo { }; struct FunctionOutliningMultiRegionInfo { - FunctionOutliningMultiRegionInfo() {} + FunctionOutliningMultiRegionInfo() + : ORI() {} // Container for outline regions struct OutlineRegionInfo { diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index e9c4a56a90c2..73f208abcb07 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -248,7 +248,8 @@ class PGOCounterPromoter { PGOCounterPromoter( DenseMap> &LoopToCands, Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI) - : LoopToCandidates(LoopToCands), L(CurLoop), LI(LI), BFI(BFI) { + : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop), + LI(LI), BFI(BFI) { // Skip collection of ExitBlocks and InsertPts for loops that will not be // able to have counters promoted. diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index accaa1088d6f..4e4097e13271 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -220,7 +220,9 @@ class LowerMatrixIntrinsics { bool IsColumnMajor = true; public: - MatrixTy() : IsColumnMajor(MatrixLayout == MatrixLayoutTy::ColumnMajor) {} + MatrixTy() + : Vectors(), + IsColumnMajor(MatrixLayout == MatrixLayoutTy::ColumnMajor) {} MatrixTy(ArrayRef Vectors) : Vectors(Vectors.begin(), Vectors.end()), IsColumnMajor(MatrixLayout == MatrixLayoutTy::ColumnMajor) {} @@ -1830,7 +1832,7 @@ class LowerMatrixIntrinsics { const DenseMap> &Shared, const SmallSetVector &ExprsInSubprogram, Value *Leaf) - : Stream(Str), DL(DL), Inst2Matrix(Inst2Matrix), Shared(Shared), + : Str(), Stream(Str), DL(DL), Inst2Matrix(Inst2Matrix), Shared(Shared), ExprsInSubprogram(ExprsInSubprogram), Leaf(Leaf) {} void indent(unsigned N) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 503cb1123e4e..a8102c0b07b8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -198,8 +198,8 @@ struct VPTransformState { VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI, DominatorTree *DT, IRBuilder<> &Builder, InnerLoopVectorizer *ILV, VPlan *Plan) - : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan) { - } + : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder), ILV(ILV), + Plan(Plan) {} /// The chosen Vectorization and Unroll Factors of the loop being vectorized. ElementCount VF; diff --git a/llvm/tools/dsymutil/BinaryHolder.h b/llvm/tools/dsymutil/BinaryHolder.h index 6245e4924733..5e81fe4b93b1 100644 --- a/llvm/tools/dsymutil/BinaryHolder.h +++ b/llvm/tools/dsymutil/BinaryHolder.h @@ -103,7 +103,7 @@ class BinaryHolder { std::string Filename; TimestampTy Timestamp; - KeyTy() {} + KeyTy() : Filename(), Timestamp() {} KeyTy(StringRef Filename, TimestampTy Timestamp) : Filename(Filename.str()), Timestamp(Timestamp) {} }; diff --git a/llvm/tools/dsymutil/Reproducer.cpp b/llvm/tools/dsymutil/Reproducer.cpp index 4f2e0db297e5..5c60758c6f80 100644 --- a/llvm/tools/dsymutil/Reproducer.cpp +++ b/llvm/tools/dsymutil/Reproducer.cpp @@ -27,7 +27,7 @@ Reproducer::Reproducer() : VFS(vfs::getRealFileSystem()) {} Reproducer::~Reproducer() = default; ReproducerGenerate::ReproducerGenerate(std::error_code &EC) - : Root(createReproducerDir(EC)) { + : Root(createReproducerDir(EC)), FC() { if (!Root.empty()) FC = std::make_shared(Root, Root); VFS = FileCollector::createCollectorVFS(vfs::getRealFileSystem(), FC); diff --git a/llvm/tools/llvm-cov/CoverageSummaryInfo.h b/llvm/tools/llvm-cov/CoverageSummaryInfo.h index 84a3228f22b9..62e7cad1012b 100644 --- a/llvm/tools/llvm-cov/CoverageSummaryInfo.h +++ b/llvm/tools/llvm-cov/CoverageSummaryInfo.h @@ -191,7 +191,8 @@ struct FunctionCoverageSummary { BranchCoverageInfo BranchCoverage; FunctionCoverageSummary(const std::string &Name) - : Name(Name), ExecutionCount(0) {} + : Name(Name), ExecutionCount(0), RegionCoverage(), LineCoverage(), + BranchCoverage() {} FunctionCoverageSummary(const std::string &Name, uint64_t ExecutionCount, const RegionCoverageInfo &RegionCoverage, @@ -222,7 +223,9 @@ struct FileCoverageSummary { FunctionCoverageInfo FunctionCoverage; FunctionCoverageInfo InstantiationCoverage; - FileCoverageSummary(StringRef Name) : Name(Name) {} + FileCoverageSummary(StringRef Name) + : Name(Name), RegionCoverage(), LineCoverage(), FunctionCoverage(), + InstantiationCoverage() {} FileCoverageSummary &operator+=(const FileCoverageSummary &RHS) { RegionCoverage += RHS.RegionCoverage; diff --git a/llvm/tools/llvm-mca/CodeRegion.h b/llvm/tools/llvm-mca/CodeRegion.h index 0e1e02a533d8..0b2590767dfa 100644 --- a/llvm/tools/llvm-mca/CodeRegion.h +++ b/llvm/tools/llvm-mca/CodeRegion.h @@ -63,7 +63,7 @@ class CodeRegion { public: CodeRegion(llvm::StringRef Desc, llvm::SMLoc Start) - : Description(Desc), RangeStart(Start) {} + : Description(Desc), RangeStart(Start), RangeEnd() {} void addInstruction(const llvm::MCInst &Instruction) { Instructions.emplace_back(Instruction); diff --git a/llvm/tools/llvm-mca/PipelinePrinter.h b/llvm/tools/llvm-mca/PipelinePrinter.h index d89e913f979f..fd262f0a8a5d 100644 --- a/llvm/tools/llvm-mca/PipelinePrinter.h +++ b/llvm/tools/llvm-mca/PipelinePrinter.h @@ -53,7 +53,7 @@ class PipelinePrinter { public: PipelinePrinter(Pipeline &Pipe, const CodeRegion &R, unsigned Idx, const MCSubtargetInfo &STI, const PipelineOptions &PO) - : P(Pipe), Region(R), RegionIdx(Idx), STI(STI), PO(PO) {} + : P(Pipe), Region(R), RegionIdx(Idx), STI(STI), PO(PO), Views() {} void addView(std::unique_ptr V) { P.addEventListener(V.get()); diff --git a/llvm/tools/llvm-objcopy/ELF/Object.h b/llvm/tools/llvm-objcopy/ELF/Object.h index 681ab8f56381..439380fc725b 100644 --- a/llvm/tools/llvm-objcopy/ELF/Object.h +++ b/llvm/tools/llvm-objcopy/ELF/Object.h @@ -934,7 +934,8 @@ class BinaryELFBuilder : public BasicELFBuilder { public: BinaryELFBuilder(MemoryBuffer *MB, uint8_t NewSymbolVisibility) - : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {} + : BasicELFBuilder(), MemBuf(MB), + NewSymbolVisibility(NewSymbolVisibility) {} Expected> build(); }; @@ -945,7 +946,8 @@ class IHexELFBuilder : public BasicELFBuilder { void addDataSections(); public: - IHexELFBuilder(const std::vector &Records) : Records(Records) {} + IHexELFBuilder(const std::vector &Records) + : BasicELFBuilder(), Records(Records) {} Expected> build(); }; diff --git a/llvm/tools/llvm-objdump/SourcePrinter.h b/llvm/tools/llvm-objdump/SourcePrinter.h index 31d46e3108f6..21d5bdcf8a49 100644 --- a/llvm/tools/llvm-objdump/SourcePrinter.h +++ b/llvm/tools/llvm-objdump/SourcePrinter.h @@ -80,7 +80,7 @@ class LiveVariablePrinter { public: LiveVariablePrinter(const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) - : ActiveCols(Column()), MRI(MRI), STI(STI) {} + : LiveVariables(), ActiveCols(Column()), MRI(MRI), STI(STI) {} void dump() const; diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 0d7eabd6d158..6c12750a9ddf 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -204,8 +204,8 @@ struct WriterContext { WriterContext(bool IsSparse, std::mutex &ErrLock, SmallSet &WriterErrorCodes) - : Writer(IsSparse), ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) { - } + : Lock(), Writer(IsSparse), Errors(), ErrLock(ErrLock), + WriterErrorCodes(WriterErrorCodes) {} }; /// Computer the overlap b/w profile BaseFilename and TestFileName, @@ -2303,7 +2303,8 @@ struct HotFuncInfo { uint64_t EntryCount; HotFuncInfo() - : TotalCount(0), TotalCountPercent(0.0f), MaxCount(0), EntryCount(0) {} + : FuncName(), TotalCount(0), TotalCountPercent(0.0f), MaxCount(0), + EntryCount(0) {} HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES) : FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP), diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp index eea486abe0a1..46862bbad7cb 100644 --- a/llvm/tools/llvm-readobj/llvm-readobj.cpp +++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp @@ -286,8 +286,8 @@ static void parseOptions(const opt::InputArgList &Args) { namespace { struct ReadObjTypeTableBuilder { ReadObjTypeTableBuilder() - : IDTable(Allocator), TypeTable(Allocator), GlobalIDTable(Allocator), - GlobalTypeTable(Allocator) {} + : Allocator(), IDTable(Allocator), TypeTable(Allocator), + GlobalIDTable(Allocator), GlobalTypeTable(Allocator) {} llvm::BumpPtrAllocator Allocator; llvm::codeview::MergingTypeTableBuilder IDTable; diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDag.h b/llvm/utils/TableGen/GlobalISel/GIMatchDag.h index 37570648cad1..567580540877 100644 --- a/llvm/utils/TableGen/GlobalISel/GIMatchDag.h +++ b/llvm/utils/TableGen/GlobalISel/GIMatchDag.h @@ -84,7 +84,9 @@ class GIMatchDag { bool HasPostMatchPredicate = false; public: - GIMatchDag(GIMatchDagContext &Ctx) : Ctx(Ctx) {} + GIMatchDag(GIMatchDagContext &Ctx) + : Ctx(Ctx), InstrNodes(), PredicateNodes(), Edges(), + PredicateDependencies() {} GIMatchDag(const GIMatchDag &) = delete; GIMatchDagContext &getContext() const { return Ctx; } diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp index 00d57404b069..d08a83333c30 100644 --- a/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp +++ b/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp @@ -82,6 +82,7 @@ GIMatchTreeBuilderLeafInfo::GIMatchTreeBuilderLeafInfo( GIMatchTreeBuilder &Builder, StringRef Name, unsigned RootIdx, const GIMatchDag &MatchDag, void *Data) : Builder(Builder), Info(Name, RootIdx, Data), MatchDag(MatchDag), + InstrNodeToInfo(), RemainingInstrNodes(BitVector(MatchDag.getNumInstrNodes(), true)), RemainingEdges(BitVector(MatchDag.getNumEdges(), true)), RemainingPredicates(BitVector(MatchDag.getNumPredicates(), true)), diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index 25bc0adc2a81..7b1bd41a951b 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -883,7 +883,9 @@ class RuleMatcher : public Matcher { public: RuleMatcher(ArrayRef SrcLoc) - : NextInsnVarID(0), NextOutputInsnID(0), NextTempRegID(0), SrcLoc(SrcLoc), + : Matchers(), Actions(), InsnVariableIDs(), MutatableInsns(), + DefinedOperands(), NextInsnVarID(0), NextOutputInsnID(0), + NextTempRegID(0), SrcLoc(SrcLoc), ComplexSubOperands(), RuleID(NextRuleID++) {} RuleMatcher(RuleMatcher &&Other) = default; RuleMatcher &operator=(RuleMatcher &&Other) = default; diff --git a/llvm/utils/TableGen/PredicateExpander.h b/llvm/utils/TableGen/PredicateExpander.h index 9e7a4a3925ac..29cca92d902c 100644 --- a/llvm/utils/TableGen/PredicateExpander.h +++ b/llvm/utils/TableGen/PredicateExpander.h @@ -111,7 +111,7 @@ class STIPredicateExpander : public PredicateExpander { public: STIPredicateExpander(StringRef Target) - : PredicateExpander(Target), ExpandDefinition(false) {} + : PredicateExpander(Target), ClassPrefix(), ExpandDefinition(false) {} bool shouldExpandDefinition() const { return ExpandDefinition; } StringRef getClassPrefix() const { return ClassPrefix; } diff --git a/llvm/utils/TableGen/RegisterBankEmitter.cpp b/llvm/utils/TableGen/RegisterBankEmitter.cpp index 61f71309b6fb..0725657150f8 100644 --- a/llvm/utils/TableGen/RegisterBankEmitter.cpp +++ b/llvm/utils/TableGen/RegisterBankEmitter.cpp @@ -42,7 +42,7 @@ class RegisterBank { public: RegisterBank(const Record &TheDef) - : TheDef(TheDef), RCWithLargestRegsSize(nullptr) {} + : TheDef(TheDef), RCs(), RCWithLargestRegsSize(nullptr) {} /// Get the human-readable name for the bank. StringRef getName() const { return TheDef.getValueAsString("Name"); } From cbcbbd6ac8ece43eaff65d40401c75144bf2631f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 3 Jan 2022 11:13:25 -0800 Subject: [PATCH 449/992] [ValueTracking][SelectionDAG] Rename ComputeMinSignedBits->ComputeMaxSignificantBits. NFC This function returns an upper bound on the number of bits needed to represent the signed value. Use "Max" to match similar functions in KnownBits like countMaxActiveBits. Rename APInt::getMinSignedBits->getSignificantBits. Keeping the old name around to keep this patch size down. Will do a bulk rename as follow up. Rename KnownBits::countMaxSignedBits->countMaxSignificantBits. Reviewed By: lebedev.ri, RKSimon, spatel Differential Revision: https://reviews.llvm.org/D116522 --- llvm/include/llvm/ADT/APInt.h | 21 ++++++++++++------- llvm/include/llvm/Analysis/ValueTracking.h | 16 +++++++------- llvm/include/llvm/CodeGen/SelectionDAG.h | 18 ++++++++-------- llvm/include/llvm/Support/KnownBits.h | 10 +++++++-- .../Transforms/InstCombine/InstCombiner.h | 6 +++--- llvm/lib/Analysis/ValueTracking.cpp | 8 +++---- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 ++++--- .../SelectionDAG/LegalizeIntegerTypes.cpp | 4 ++-- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 9 ++++---- .../CodeGen/SelectionDAG/TargetLowering.cpp | 2 +- .../Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 2 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 13 ++++++------ .../InstCombine/InstCombineCompares.cpp | 4 ++-- .../InstCombine/InstCombineSelect.cpp | 5 +++-- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 3 ++- llvm/unittests/Support/KnownBitsTest.cpp | 6 +++--- 17 files changed, 77 insertions(+), 59 deletions(-) diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index c2660502a419..b1fc85d3c09d 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -417,7 +417,7 @@ class LLVM_NODISCARD APInt { bool isIntN(unsigned N) const { return getActiveBits() <= N; } /// Check if this APInt has an N-bits signed integer value. - bool isSignedIntN(unsigned N) const { return getMinSignedBits() <= N; } + bool isSignedIntN(unsigned N) const { return getSignificantBits() <= N; } /// Check if this APInt's value is a power of two greater than zero. /// @@ -1069,8 +1069,9 @@ class LLVM_NODISCARD APInt { /// /// \returns true if *this < RHS when considered signed. bool slt(int64_t RHS) const { - return (!isSingleWord() && getMinSignedBits() > 64) ? isNegative() - : getSExtValue() < RHS; + return (!isSingleWord() && getSignificantBits() > 64) + ? isNegative() + : getSExtValue() < RHS; } /// Unsigned less or equal comparison @@ -1139,8 +1140,9 @@ class LLVM_NODISCARD APInt { /// /// \returns true if *this > RHS when considered signed. bool sgt(int64_t RHS) const { - return (!isSingleWord() && getMinSignedBits() > 64) ? !isNegative() - : getSExtValue() > RHS; + return (!isSingleWord() && getSignificantBits() > 64) + ? !isNegative() + : getSExtValue() > RHS; } /// Unsigned greater or equal comparison @@ -1450,7 +1452,12 @@ class LLVM_NODISCARD APInt { /// returns the smallest bit width that will retain the negative value. For /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so /// for -1, this function will always return 1. - unsigned getMinSignedBits() const { return BitWidth - getNumSignBits() + 1; } + unsigned getSignificantBits() const { + return BitWidth - getNumSignBits() + 1; + } + + /// NOTE: This is soft-deprecated. Please use `getSignificantBits()` instead. + unsigned getMinSignedBits() const { return getSignificantBits(); } /// Get zero extended value /// @@ -1472,7 +1479,7 @@ class LLVM_NODISCARD APInt { int64_t getSExtValue() const { if (isSingleWord()) return SignExtend64(U.VAL, BitWidth); - assert(getMinSignedBits() <= 64 && "Too many bits for int64_t"); + assert(getSignificantBits() <= 64 && "Too many bits for int64_t"); return int64_t(U.pVal[0]); } diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index b943ec1cebd5..f2988b4e9414 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -202,14 +202,14 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; const DominatorTree *DT = nullptr, bool UseInstrInfo = true); - /// Get the minimum bit size for this Value \p Op as a signed integer. - /// i.e. x == sext(trunc(x to MinSignedBits) to bitwidth(x)). - /// Similar to the APInt::getMinSignedBits function. - unsigned ComputeMinSignedBits(const Value *Op, const DataLayout &DL, - unsigned Depth = 0, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + /// Get the upper bound on bit size for this Value \p Op as a signed integer. + /// i.e. x == sext(trunc(x to MaxSignificantBits) to bitwidth(x)). + /// Similar to the APInt::getSignificantBits function. + unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, + unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); /// This function computes the integer multiple of Base that equals V. If /// successful, it returns true and returns the multiple in Multiple. If diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index d21844555f5b..c6f45ae25eed 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1833,18 +1833,18 @@ class SelectionDAG { unsigned ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, unsigned Depth = 0) const; - /// Get the minimum bit size for this Value \p Op as a signed integer. - /// i.e. x == sext(trunc(x to MinSignedBits) to bitwidth(x)). - /// Similar to the APInt::getMinSignedBits function. + /// Get the upper bound on bit size for this Value \p Op as a signed integer. + /// i.e. x == sext(trunc(x to MaxSignedBits) to bitwidth(x)). + /// Similar to the APInt::getSignificantBits function. /// Helper wrapper to ComputeNumSignBits. - unsigned ComputeMinSignedBits(SDValue Op, unsigned Depth = 0) const; + unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth = 0) const; - /// Get the minimum bit size for this Value \p Op as a signed integer. - /// i.e. x == sext(trunc(x to MinSignedBits) to bitwidth(x)). - /// Similar to the APInt::getMinSignedBits function. + /// Get the upper bound on bit size for this Value \p Op as a signed integer. + /// i.e. x == sext(trunc(x to MaxSignedBits) to bitwidth(x)). + /// Similar to the APInt::getSignificantBits function. /// Helper wrapper to ComputeNumSignBits. - unsigned ComputeMinSignedBits(SDValue Op, const APInt &DemandedElts, - unsigned Depth = 0) const; + unsigned ComputeMaxSignificantBits(SDValue Op, const APInt &DemandedElts, + unsigned Depth = 0) const; /// Return true if this function can prove that \p Op is never poison /// and, if \p PoisonOnly is false, does not have undef bits. diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h index 1674bad21e95..5ef0ba31f785 100644 --- a/llvm/include/llvm/Support/KnownBits.h +++ b/llvm/include/llvm/Support/KnownBits.h @@ -254,8 +254,11 @@ struct KnownBits { } /// Returns the maximum number of bits needed to represent all possible - /// signed values with these known bits. - unsigned countMaxSignedBits() const { + /// signed values with these known bits. This is the inverse of the minimum + /// number of known sign bits. Examples for bitwidth 5: + /// 110?? --> 4 + /// 0000? --> 2 + unsigned countMaxSignificantBits() const { return getBitWidth() - countMinSignBits() + 1; } @@ -289,6 +292,9 @@ struct KnownBits { return getBitWidth() - Zero.countPopulation(); } + /// Returns the maximum number of bits needed to represent all possible + /// unsigned values with these known bits. This is the inverse of the + /// minimum number of leading zeros. unsigned countMaxActiveBits() const { return getBitWidth() - countMinLeadingZeros(); } diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h index c6aee439b5a0..f8cb6dc73a6f 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -480,9 +480,9 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner { return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT); } - unsigned ComputeMinSignedBits(const Value *Op, unsigned Depth = 0, - const Instruction *CxtI = nullptr) const { - return llvm::ComputeMinSignedBits(Op, DL, Depth, &AC, CxtI, &DT); + unsigned ComputeMaxSignificantBits(const Value *Op, unsigned Depth = 0, + const Instruction *CxtI = nullptr) const { + return llvm::ComputeMaxSignificantBits(Op, DL, Depth, &AC, CxtI, &DT); } OverflowResult computeOverflowForUnsignedMul(const Value *LHS, diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 7876e209acc6..16bd0adff983 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -396,10 +396,10 @@ unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); } -unsigned llvm::ComputeMinSignedBits(const Value *V, const DataLayout &DL, - unsigned Depth, AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { +unsigned llvm::ComputeMaxSignificantBits(const Value *V, const DataLayout &DL, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT); return V->getType()->getScalarSizeInBits() - SignBits + 1; } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8153b7061094..9ac937664642 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12320,7 +12320,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. - if (ExtVTBits >= DAG.ComputeMinSignedBits(N0)) + if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0)) return N0; // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 @@ -12336,7 +12336,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); unsigned N00Bits = N00.getScalarValueSizeInBits(); - if ((N00Bits <= ExtVTBits || DAG.ComputeMinSignedBits(N00) <= ExtVTBits) && + if ((N00Bits <= ExtVTBits || + DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00); } @@ -12355,7 +12356,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts); if ((N00Bits == ExtVTBits || (!IsZext && (N00Bits < ExtVTBits || - DAG.ComputeMinSignedBits(N00) <= ExtVTBits))) && + DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))) return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 8ce6ad1b66a0..32086a79acdf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1751,8 +1751,8 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &LHS, SDValue &RHS, // duplicated sign bits is no greater than the width of LHS/RHS, we can avoid // inserting a zext_inreg operation that we might not be able to remove. if (ISD::isIntEqualitySetCC(CCCode)) { - unsigned OpLEffectiveBits = DAG.ComputeMinSignedBits(OpL); - unsigned OpREffectiveBits = DAG.ComputeMinSignedBits(OpR); + unsigned OpLEffectiveBits = DAG.ComputeMaxSignificantBits(OpL); + unsigned OpREffectiveBits = DAG.ComputeMaxSignificantBits(OpR); if (OpLEffectiveBits <= LHS.getScalarValueSizeInBits() && OpREffectiveBits <= RHS.getScalarValueSizeInBits()) { LHS = OpL; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 56253909cb53..da3beaee81f1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4297,14 +4297,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return std::max(FirstAnswer, Known.countMinSignBits()); } -unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, unsigned Depth) const { +unsigned SelectionDAG::ComputeMaxSignificantBits(SDValue Op, + unsigned Depth) const { unsigned SignBits = ComputeNumSignBits(Op, Depth); return Op.getScalarValueSizeInBits() - SignBits + 1; } -unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, - const APInt &DemandedElts, - unsigned Depth) const { +unsigned SelectionDAG::ComputeMaxSignificantBits(SDValue Op, + const APInt &DemandedElts, + unsigned Depth) const { unsigned SignBits = ComputeNumSignBits(Op, DemandedElts, Depth); return Op.getScalarValueSizeInBits() - SignBits + 1; } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 03163c896799..2781c760d297 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1833,7 +1833,7 @@ bool TargetLowering::SimplifyDemandedBits( // If we only care about the highest bit, don't bother shifting right. if (DemandedBits.isSignMask()) { unsigned MinSignedBits = - TLO.DAG.ComputeMinSignedBits(Op0, DemandedElts, Depth + 1); + TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1); bool AlreadySignExtended = ExVTBits >= MinSignedBits; // However if the input is already sign extended we expect the sign // extension to be dropped altogether later and do not simplify. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index a5923c82bfef..1920684d8f1f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -450,7 +450,7 @@ unsigned AMDGPUCodeGenPrepare::numBitsUnsigned(Value *Op) const { } unsigned AMDGPUCodeGenPrepare::numBitsSigned(Value *Op) const { - return ComputeMinSignedBits(Op, *DL, 0, AC); + return ComputeMaxSignificantBits(Op, *DL, 0, AC); } static void extractValues(IRBuilder<> &Builder, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 54177564afbc..148265afa391 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -51,7 +51,7 @@ unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) { unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) { // In order for this to be a signed 24-bit value, bit 23, must // be a sign bit. - return DAG.ComputeMinSignedBits(Op); + return DAG.ComputeMaxSignificantBits(Op); } AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6f6361b6757b..7d14ed79e1a9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6850,8 +6850,8 @@ static SDValue getPack(SelectionDAG &DAG, const X86Subtarget &Subtarget, DAG.computeKnownBits(RHS).countMaxActiveBits() <= EltSizeInBits) return DAG.getNode(X86ISD::PACKUS, dl, VT, LHS, RHS); - if (DAG.ComputeMinSignedBits(LHS) <= EltSizeInBits && - DAG.ComputeMinSignedBits(RHS) <= EltSizeInBits) + if (DAG.ComputeMaxSignificantBits(LHS) <= EltSizeInBits && + DAG.ComputeMaxSignificantBits(RHS) <= EltSizeInBits) return DAG.getNode(X86ISD::PACKSS, dl, VT, LHS, RHS); } @@ -23157,10 +23157,10 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, // For equality comparisons try to use SIGN_EXTEND if the input was // truncate from something with enough sign bits. if (Op0.getOpcode() == ISD::TRUNCATE) { - if (DAG.ComputeMinSignedBits(Op0.getOperand(0)) <= 16) + if (DAG.ComputeMaxSignificantBits(Op0.getOperand(0)) <= 16) ExtendOp = ISD::SIGN_EXTEND; } else if (Op1.getOpcode() == ISD::TRUNCATE) { - if (DAG.ComputeMinSignedBits(Op1.getOperand(0)) <= 16) + if (DAG.ComputeMaxSignificantBits(Op1.getOperand(0)) <= 16) ExtendOp = ISD::SIGN_EXTEND; } } @@ -44732,7 +44732,8 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG, return SDValue(); // Sign bits must extend down to the lowest i16. - if (DAG.ComputeMinSignedBits(N1) > 16 || DAG.ComputeMinSignedBits(N0) > 16) + if (DAG.ComputeMaxSignificantBits(N1) > 16 || + DAG.ComputeMaxSignificantBits(N0) > 16) return SDValue(); // At least one of the elements must be zero in the upper 17 bits, or can be @@ -48714,7 +48715,7 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, // sequence or using AVX512 truncations. If the inputs are sext/zext then the // truncations may actually be free by peeking through to the ext source. auto IsSext = [&DAG](SDValue V) { - return DAG.ComputeMinSignedBits(V) <= 16; + return DAG.ComputeMaxSignificantBits(V) <= 16; }; auto IsZext = [&DAG](SDValue V) { return DAG.computeKnownBits(V).countMaxActiveBits() <= 16; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 5b6728e466fc..c14e28dc4032 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1263,8 +1263,8 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // This is only really a signed overflow check if the inputs have been // sign-extended; check for that condition. For example, if CI2 is 2^31 and // the operands of the add are 64 bits wide, we need at least 33 sign bits. - if (IC.ComputeMinSignedBits(A, 0, &I) > NewWidth || - IC.ComputeMinSignedBits(B, 0, &I) > NewWidth) + if (IC.ComputeMaxSignificantBits(A, 0, &I) > NewWidth || + IC.ComputeMaxSignificantBits(B, 0, &I) > NewWidth) return nullptr; // In order to replace the original add with a narrower diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index a6d6b5199105..cbdf04572042 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2325,8 +2325,9 @@ Instruction *InstCombinerImpl::matchSAddSubSat(Instruction &MinMax1) { // The two operands of the add/sub must be nsw-truncatable to the NewTy. This // is usually achieved via a sext from a smaller type. - if (ComputeMinSignedBits(AddSub->getOperand(0), 0, AddSub) > NewBitWidth || - ComputeMinSignedBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth) + if (ComputeMaxSignificantBits(AddSub->getOperand(0), 0, AddSub) > + NewBitWidth || + ComputeMaxSignificantBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth) return nullptr; // Finally create and return the sat intrinsic, truncated to the new type diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 02bd0bd8cc5f..303038dbd53d 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -4940,7 +4940,8 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, // We can also eliminate cases by determining that their values are outside of // the limited range of the condition based on how many significant (non-sign) // bits are in the condition value. - unsigned MaxSignificantBitsInCond = ComputeMinSignedBits(Cond, DL, 0, AC, SI); + unsigned MaxSignificantBitsInCond = + ComputeMaxSignificantBits(Cond, DL, 0, AC, SI); // Gather dead cases. SmallVector DeadCases; diff --git a/llvm/unittests/Support/KnownBitsTest.cpp b/llvm/unittests/Support/KnownBitsTest.cpp index d41402b69fc0..c8d27b9a2460 100644 --- a/llvm/unittests/Support/KnownBitsTest.cpp +++ b/llvm/unittests/Support/KnownBitsTest.cpp @@ -442,14 +442,14 @@ TEST(KnownBitsTest, CountMaxActiveBits) { }); } -TEST(KnownBitsTest, CountMaxSignedBits) { +TEST(KnownBitsTest, CountMaxSignificantBits) { unsigned Bits = 4; ForeachKnownBits(Bits, [&](const KnownBits &Known) { unsigned Expected = 0; ForeachNumInKnownBits(Known, [&](const APInt &N) { - Expected = std::max(Expected, N.getMinSignedBits()); + Expected = std::max(Expected, N.getSignificantBits()); }); - EXPECT_EQ(Expected, Known.countMaxSignedBits()); + EXPECT_EQ(Expected, Known.countMaxSignificantBits()); }); } From 7203140748cec7185479cc413ea68f34d2eced99 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 3 Jan 2022 11:56:02 -0800 Subject: [PATCH 450/992] Revert "[unroll] Prune all but first copy of invariant exit" This reverts commit 9bd22595bad36cd19f5e7ae18ccd9f41cba29dc5. Seeing some bot failures which look plausibly connected. Revert while investigating/waiting for bots to stablize. e.g. https://lab.llvm.org/buildbot#builders/36/builds/15933 --- llvm/lib/Transforms/Utils/LoopUnroll.cpp | 5 - .../Transforms/LoopUnroll/nonlatchcondbr.ll | 12 +- .../LoopUnroll/runtime-loop-multiple-exits.ll | 436 +++++++++--------- llvm/test/Transforms/LoopUnroll/scevunroll.ll | 8 +- 4 files changed, 230 insertions(+), 231 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 0a530f5292c5..b0c622b98d5e 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -310,7 +310,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, unsigned TripMultiple; unsigned BreakoutTrip; bool ExitOnTrue; - bool InvariantExit; SmallVector ExitingBlocks; }; DenseMap ExitInfos; @@ -334,8 +333,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, (unsigned)GreatestCommonDivisor64(ULO.Count, Info.TripMultiple); } Info.ExitOnTrue = !L->contains(BI->getSuccessor(0)); - Info.InvariantExit = L->isLoopInvariant(BI->getCondition()) && - DT->dominates(ExitingBlock, LatchBlock); Info.ExitingBlocks.push_back(ExitingBlock); LLVM_DEBUG(dbgs() << " Exiting block %" << ExitingBlock->getName() << ": TripCount=" << Info.TripCount @@ -688,8 +685,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, auto WillExit = [&](const ExitInfo &Info, unsigned i, unsigned j, bool IsLatch) -> Optional { - if (Info.InvariantExit && i != 0) - return false; if (CompletelyUnroll) { if (PreserveOnlyFirst) { if (i == 0) diff --git a/llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll b/llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll index de3b0e769abd..eca86b4cd9c3 100644 --- a/llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll +++ b/llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll @@ -173,21 +173,21 @@ define void @test3(i32* noalias %A, i1 %cond) { ; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1 ; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4 ; CHECK-NEXT: call void @bar(i32 [[DOTPRE]]) -; CHECK-NEXT: br label [[FOR_BODY_1:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY_1:%.*]], label [[FOR_END]] ; CHECK: for.body.1: ; CHECK-NEXT: br label [[FOR_BODY_FOR_BODY_CRIT_EDGE_1:%.*]] ; CHECK: for.body.for.body_crit_edge.1: ; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 ; CHECK-NEXT: [[DOTPRE_1:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_1]], align 4 ; CHECK-NEXT: call void @bar(i32 [[DOTPRE_1]]) -; CHECK-NEXT: br label [[FOR_BODY_2:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY_2:%.*]], label [[FOR_END]] ; CHECK: for.body.2: ; CHECK-NEXT: br label [[FOR_BODY_FOR_BODY_CRIT_EDGE_2:%.*]] ; CHECK: for.body.for.body_crit_edge.2: ; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 ; CHECK-NEXT: [[DOTPRE_2:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_2]], align 4 ; CHECK-NEXT: call void @bar(i32 [[DOTPRE_2]]) -; CHECK-NEXT: br label [[FOR_BODY_3:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY_3:%.*]], label [[FOR_END]] ; CHECK: for.body.3: ; CHECK-NEXT: br i1 false, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_3:%.*]], label [[FOR_END]] ; CHECK: for.body.for.body_crit_edge.3: @@ -229,7 +229,11 @@ define void @test4(i32 %arg) { ; CHECK: bb1: ; CHECK-NEXT: br i1 false, label [[BB4:%.*]], label [[BB1_1:%.*]] ; CHECK: bb1.1: -; CHECK-NEXT: br label [[BB1]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 false, label [[BB4]], label [[BB1_2:%.*]] +; CHECK: bb1.2: +; CHECK-NEXT: br i1 false, label [[BB4]], label [[BB1_3:%.*]] +; CHECK: bb1.3: +; CHECK-NEXT: br i1 false, label [[BB4]], label [[BB1]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: bb4: ; CHECK-NEXT: unreachable ; diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll index 9c408e07e279..9863116137dd 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll @@ -403,7 +403,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-NEXT: %add = add nsw i32 %2, %sum.02 ; EPILOG-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-NEXT: br label %for.exiting_block.1 +; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1 ; EPILOG: for.exiting_block.1: ; EPILOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %for.body.1 @@ -413,7 +413,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-NEXT: %add.1 = add nsw i32 %3, %add ; EPILOG-NEXT: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 -; EPILOG-NEXT: br label %for.exiting_block.2 +; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.2 ; EPILOG: for.exiting_block.2: ; EPILOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.2, label %for.exit2.loopexit, label %for.body.2 @@ -423,7 +423,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-NEXT: %add.2 = add nsw i32 %4, %add.1 ; EPILOG-NEXT: %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 -; EPILOG-NEXT: br label %for.exiting_block.3 +; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.3 ; EPILOG: for.exiting_block.3: ; EPILOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.3, label %for.exit2.loopexit, label %for.body.3 @@ -433,7 +433,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-NEXT: %add.3 = add nsw i32 %5, %add.2 ; EPILOG-NEXT: %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 -; EPILOG-NEXT: br label %for.exiting_block.4 +; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.4 ; EPILOG: for.exiting_block.4: ; EPILOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.4, label %for.exit2.loopexit, label %for.body.4 @@ -443,7 +443,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-NEXT: %add.4 = add nsw i32 %6, %add.3 ; EPILOG-NEXT: %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 -; EPILOG-NEXT: br label %for.exiting_block.5 +; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.5 ; EPILOG: for.exiting_block.5: ; EPILOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.5, label %for.exit2.loopexit, label %for.body.5 @@ -453,7 +453,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-NEXT: %add.5 = add nsw i32 %7, %add.4 ; EPILOG-NEXT: %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 -; EPILOG-NEXT: br label %for.exiting_block.6 +; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.6 ; EPILOG: for.exiting_block.6: ; EPILOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.6, label %for.exit2.loopexit, label %for.body.6 @@ -463,7 +463,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-NEXT: %add.6 = add nsw i32 %8, %add.5 ; EPILOG-NEXT: %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 -; EPILOG-NEXT: br label %for.exiting_block.7 +; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.7 ; EPILOG: for.exiting_block.7: ; EPILOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.7, label %for.exit2.loopexit, label %for.body.7 @@ -512,7 +512,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %for.end.unr-lcssa ], [ %sum.0.lcssa.ph1, %for.end.epilog-lcssa ] ; EPILOG-NEXT: ret i32 %sum.0.lcssa ; EPILOG: for.exit2.loopexit: -; EPILOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ 42, %for.exiting_block.1 ], [ 42, %for.exiting_block.2 ], [ 42, %for.exiting_block.3 ], [ 42, %for.exiting_block.4 ], [ 42, %for.exiting_block.5 ], [ 42, %for.exiting_block.6 ], [ 42, %for.exiting_block.7 ] +; EPILOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ], [ %add.3, %for.body.3 ], [ 42, %for.exiting_block.4 ], [ %add.4, %for.body.4 ], [ 42, %for.exiting_block.5 ], [ %add.5, %for.body.5 ], [ 42, %for.exiting_block.6 ], [ %add.6, %for.body.6 ], [ 42, %for.exiting_block.7 ] ; EPILOG-NEXT: br label %for.exit2 ; EPILOG: for.exit2.loopexit2: ; EPILOG-NEXT: %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ] @@ -544,7 +544,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-BLOCK-NEXT: %add = add nsw i32 %2, %sum.02 ; EPILOG-BLOCK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-BLOCK-NEXT: br label %for.exiting_block.1 +; EPILOG-BLOCK-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1 ; EPILOG-BLOCK: for.exiting_block.1: ; EPILOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-BLOCK-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %for.body.1 @@ -583,7 +583,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; EPILOG-BLOCK-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %for.end.unr-lcssa ], [ %add.epil, %for.body.epil ] ; EPILOG-BLOCK-NEXT: ret i32 %sum.0.lcssa ; EPILOG-BLOCK: for.exit2.loopexit: -; EPILOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ 42, %for.exiting_block.1 ] +; EPILOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ] ; EPILOG-BLOCK-NEXT: br label %for.exit2 ; EPILOG-BLOCK: for.exit2: ; EPILOG-BLOCK-NEXT: %retval = phi i32 [ %sum.02.unr, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ] @@ -639,7 +639,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-NEXT: %3 = load i32, i32* %arrayidx, align 4 ; PROLOG-NEXT: %add = add nsw i32 %3, %sum.02 ; PROLOG-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-NEXT: br label %for.exiting_block.1 +; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1 ; PROLOG: for.exiting_block.1: ; PROLOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %for.body.1 @@ -648,7 +648,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-NEXT: %4 = load i32, i32* %arrayidx.1, align 4 ; PROLOG-NEXT: %add.1 = add nsw i32 %4, %add ; PROLOG-NEXT: %indvars.iv.next.1 = add i64 %indvars.iv.next, 1 -; PROLOG-NEXT: br label %for.exiting_block.2 +; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.2 ; PROLOG: for.exiting_block.2: ; PROLOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.2, label %for.exit2.loopexit, label %for.body.2 @@ -657,7 +657,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-NEXT: %5 = load i32, i32* %arrayidx.2, align 4 ; PROLOG-NEXT: %add.2 = add nsw i32 %5, %add.1 ; PROLOG-NEXT: %indvars.iv.next.2 = add i64 %indvars.iv.next.1, 1 -; PROLOG-NEXT: br label %for.exiting_block.3 +; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.3 ; PROLOG: for.exiting_block.3: ; PROLOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.3, label %for.exit2.loopexit, label %for.body.3 @@ -666,7 +666,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-NEXT: %6 = load i32, i32* %arrayidx.3, align 4 ; PROLOG-NEXT: %add.3 = add nsw i32 %6, %add.2 ; PROLOG-NEXT: %indvars.iv.next.3 = add i64 %indvars.iv.next.2, 1 -; PROLOG-NEXT: br label %for.exiting_block.4 +; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.4 ; PROLOG: for.exiting_block.4: ; PROLOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.4, label %for.exit2.loopexit, label %for.body.4 @@ -675,7 +675,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-NEXT: %7 = load i32, i32* %arrayidx.4, align 4 ; PROLOG-NEXT: %add.4 = add nsw i32 %7, %add.3 ; PROLOG-NEXT: %indvars.iv.next.4 = add i64 %indvars.iv.next.3, 1 -; PROLOG-NEXT: br label %for.exiting_block.5 +; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.5 ; PROLOG: for.exiting_block.5: ; PROLOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.5, label %for.exit2.loopexit, label %for.body.5 @@ -684,7 +684,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-NEXT: %8 = load i32, i32* %arrayidx.5, align 4 ; PROLOG-NEXT: %add.5 = add nsw i32 %8, %add.4 ; PROLOG-NEXT: %indvars.iv.next.5 = add i64 %indvars.iv.next.4, 1 -; PROLOG-NEXT: br label %for.exiting_block.6 +; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.6 ; PROLOG: for.exiting_block.6: ; PROLOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.6, label %for.exit2.loopexit, label %for.body.6 @@ -693,7 +693,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-NEXT: %9 = load i32, i32* %arrayidx.6, align 4 ; PROLOG-NEXT: %add.6 = add nsw i32 %9, %add.5 ; PROLOG-NEXT: %indvars.iv.next.6 = add i64 %indvars.iv.next.5, 1 -; PROLOG-NEXT: br label %for.exiting_block.7 +; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.7 ; PROLOG: for.exiting_block.7: ; PROLOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.7, label %for.exit2.loopexit, label %for.body.7 @@ -711,7 +711,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.unr, %header.prol.loopexit ], [ %sum.0.lcssa.ph, %for.end.unr-lcssa ] ; PROLOG-NEXT: ret i32 %sum.0.lcssa ; PROLOG: for.exit2.loopexit: -; PROLOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ 42, %for.exiting_block.1 ], [ 42, %for.exiting_block.2 ], [ 42, %for.exiting_block.3 ], [ 42, %for.exiting_block.4 ], [ 42, %for.exiting_block.5 ], [ 42, %for.exiting_block.6 ], [ 42, %for.exiting_block.7 ] +; PROLOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ], [ %add.3, %for.body.3 ], [ 42, %for.exiting_block.4 ], [ %add.4, %for.body.4 ], [ 42, %for.exiting_block.5 ], [ %add.5, %for.body.5 ], [ 42, %for.exiting_block.6 ], [ %add.6, %for.body.6 ], [ 42, %for.exiting_block.7 ] ; PROLOG-NEXT: br label %for.exit2 ; PROLOG: for.exit2.loopexit1: ; PROLOG-NEXT: %retval.ph2 = phi i32 [ 42, %for.exiting_block.prol ], [ %sum.02.prol, %header.prol ] @@ -756,7 +756,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx, align 4 ; PROLOG-BLOCK-NEXT: %add = add nsw i32 %3, %sum.02 ; PROLOG-BLOCK-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-BLOCK-NEXT: br label %for.exiting_block.1 +; PROLOG-BLOCK-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1 ; PROLOG-BLOCK: for.exiting_block.1: ; PROLOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-BLOCK-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %for.body.1 @@ -774,7 +774,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) { ; PROLOG-BLOCK-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.unr, %header.prol.loopexit ], [ %sum.0.lcssa.ph, %for.end.unr-lcssa ] ; PROLOG-BLOCK-NEXT: ret i32 %sum.0.lcssa ; PROLOG-BLOCK: for.exit2.loopexit: -; PROLOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ 42, %for.exiting_block.1 ] +; PROLOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ] ; PROLOG-BLOCK-NEXT: br label %for.exit2 ; PROLOG-BLOCK: for.exit2: ; PROLOG-BLOCK-NEXT: %retval = phi i32 [ 0, %header.prol ], [ 42, %for.exiting_block.prol ], [ %retval.ph, %for.exit2.loopexit ] @@ -1265,7 +1265,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add = add nsw i32 %2, %sum.02 ; EPILOG-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-NEXT: br label %for.exiting_block.1 +; EPILOG-NEXT: br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.1 ; EPILOG: for.exiting_block.1: ; EPILOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1 @@ -1275,7 +1275,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.1 = add nsw i32 %3, %add ; EPILOG-NEXT: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 -; EPILOG-NEXT: br label %for.exiting_block.2 +; EPILOG-NEXT: br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.2 ; EPILOG: for.exiting_block.2: ; EPILOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.2, label %for.exit2.loopexit, label %latch.2 @@ -1285,7 +1285,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.2 = add nsw i32 %4, %add.1 ; EPILOG-NEXT: %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 -; EPILOG-NEXT: br label %for.exiting_block.3 +; EPILOG-NEXT: br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.3 ; EPILOG: for.exiting_block.3: ; EPILOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.3, label %for.exit2.loopexit, label %latch.3 @@ -1295,7 +1295,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.3 = add nsw i32 %5, %add.2 ; EPILOG-NEXT: %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 -; EPILOG-NEXT: br label %for.exiting_block.4 +; EPILOG-NEXT: br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.4 ; EPILOG: for.exiting_block.4: ; EPILOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.4, label %for.exit2.loopexit, label %latch.4 @@ -1305,7 +1305,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.4 = add nsw i32 %6, %add.3 ; EPILOG-NEXT: %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 -; EPILOG-NEXT: br label %for.exiting_block.5 +; EPILOG-NEXT: br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.5 ; EPILOG: for.exiting_block.5: ; EPILOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.5, label %for.exit2.loopexit, label %latch.5 @@ -1315,7 +1315,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.5 = add nsw i32 %7, %add.4 ; EPILOG-NEXT: %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 -; EPILOG-NEXT: br label %for.exiting_block.6 +; EPILOG-NEXT: br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.6 ; EPILOG: for.exiting_block.6: ; EPILOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.6, label %for.exit2.loopexit, label %latch.6 @@ -1325,7 +1325,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.6 = add nsw i32 %8, %add.5 ; EPILOG-NEXT: %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 -; EPILOG-NEXT: br label %for.exiting_block.7 +; EPILOG-NEXT: br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.7 ; EPILOG: for.exiting_block.7: ; EPILOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.7, label %for.exit2.loopexit, label %latch.7 @@ -1368,7 +1368,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter ; EPILOG-NEXT: br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit2, !llvm.loop !4 ; EPILOG: latchExit.epilog-lcssa.loopexit: -; EPILOG-NEXT: %result.ph1.ph = phi i32 [ 0, %header ] +; EPILOG-NEXT: %result.ph1.ph = phi i32 [ 0, %header ], [ 0, %latch ], [ 0, %latch.1 ], [ 0, %latch.2 ], [ 0, %latch.3 ], [ 0, %latch.4 ], [ 0, %latch.5 ], [ 0, %latch.6 ] ; EPILOG-NEXT: br label %latchExit.epilog-lcssa ; EPILOG: latchExit.epilog-lcssa.loopexit2: ; EPILOG-NEXT: %result.ph1.ph3 = phi i32 [ 0, %header.epil ], [ %add.epil, %latch.epil ] @@ -1409,7 +1409,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-BLOCK-NEXT: %add = add nsw i32 %2, %sum.02 ; EPILOG-BLOCK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-BLOCK-NEXT: br label %for.exiting_block.1 +; EPILOG-BLOCK-NEXT: br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.1 ; EPILOG-BLOCK: for.exiting_block.1: ; EPILOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-BLOCK-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1 @@ -1445,7 +1445,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %4, %sum.02.unr ; EPILOG-BLOCK-NEXT: br label %latchExit.epilog-lcssa ; EPILOG-BLOCK: latchExit.epilog-lcssa.loopexit: -; EPILOG-BLOCK-NEXT: %result.ph1.ph = phi i32 [ 0, %header ] +; EPILOG-BLOCK-NEXT: %result.ph1.ph = phi i32 [ 0, %header ], [ 0, %latch ] ; EPILOG-BLOCK-NEXT: br label %latchExit.epilog-lcssa ; EPILOG-BLOCK: latchExit.epilog-lcssa: ; EPILOG-BLOCK-NEXT: %result.ph1 = phi i32 [ %add.epil, %latch.epil ], [ 0, %header.epil ], [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ] @@ -1508,7 +1508,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %3 = load i32, i32* %arrayidx, align 4 ; PROLOG-NEXT: %add = add nsw i32 %3, %sum.02 ; PROLOG-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-NEXT: br label %for.exiting_block.1 +; PROLOG-NEXT: br i1 %cond, label %latchExit.unr-lcssa.loopexit, label %for.exiting_block.1 ; PROLOG: for.exiting_block.1: ; PROLOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1 @@ -1517,7 +1517,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %4 = load i32, i32* %arrayidx.1, align 4 ; PROLOG-NEXT: %add.1 = add nsw i32 %4, %add ; PROLOG-NEXT: %indvars.iv.next.1 = add i64 %indvars.iv.next, 1 -; PROLOG-NEXT: br label %for.exiting_block.2 +; PROLOG-NEXT: br i1 %cond, label %latchExit.unr-lcssa.loopexit, label %for.exiting_block.2 ; PROLOG: for.exiting_block.2: ; PROLOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.2, label %for.exit2.loopexit, label %latch.2 @@ -1526,7 +1526,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %5 = load i32, i32* %arrayidx.2, align 4 ; PROLOG-NEXT: %add.2 = add nsw i32 %5, %add.1 ; PROLOG-NEXT: %indvars.iv.next.2 = add i64 %indvars.iv.next.1, 1 -; PROLOG-NEXT: br label %for.exiting_block.3 +; PROLOG-NEXT: br i1 %cond, label %latchExit.unr-lcssa.loopexit, label %for.exiting_block.3 ; PROLOG: for.exiting_block.3: ; PROLOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.3, label %for.exit2.loopexit, label %latch.3 @@ -1535,7 +1535,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %6 = load i32, i32* %arrayidx.3, align 4 ; PROLOG-NEXT: %add.3 = add nsw i32 %6, %add.2 ; PROLOG-NEXT: %indvars.iv.next.3 = add i64 %indvars.iv.next.2, 1 -; PROLOG-NEXT: br label %for.exiting_block.4 +; PROLOG-NEXT: br i1 %cond, label %latchExit.unr-lcssa.loopexit, label %for.exiting_block.4 ; PROLOG: for.exiting_block.4: ; PROLOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.4, label %for.exit2.loopexit, label %latch.4 @@ -1544,7 +1544,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %7 = load i32, i32* %arrayidx.4, align 4 ; PROLOG-NEXT: %add.4 = add nsw i32 %7, %add.3 ; PROLOG-NEXT: %indvars.iv.next.4 = add i64 %indvars.iv.next.3, 1 -; PROLOG-NEXT: br label %for.exiting_block.5 +; PROLOG-NEXT: br i1 %cond, label %latchExit.unr-lcssa.loopexit, label %for.exiting_block.5 ; PROLOG: for.exiting_block.5: ; PROLOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.5, label %for.exit2.loopexit, label %latch.5 @@ -1553,7 +1553,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %8 = load i32, i32* %arrayidx.5, align 4 ; PROLOG-NEXT: %add.5 = add nsw i32 %8, %add.4 ; PROLOG-NEXT: %indvars.iv.next.5 = add i64 %indvars.iv.next.4, 1 -; PROLOG-NEXT: br label %for.exiting_block.6 +; PROLOG-NEXT: br i1 %cond, label %latchExit.unr-lcssa.loopexit, label %for.exiting_block.6 ; PROLOG: for.exiting_block.6: ; PROLOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.6, label %for.exit2.loopexit, label %latch.6 @@ -1562,7 +1562,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %9 = load i32, i32* %arrayidx.6, align 4 ; PROLOG-NEXT: %add.6 = add nsw i32 %9, %add.5 ; PROLOG-NEXT: %indvars.iv.next.6 = add i64 %indvars.iv.next.5, 1 -; PROLOG-NEXT: br label %for.exiting_block.7 +; PROLOG-NEXT: br i1 %cond, label %latchExit.unr-lcssa.loopexit, label %for.exiting_block.7 ; PROLOG: for.exiting_block.7: ; PROLOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.7, label %for.exit2.loopexit, label %latch.7 @@ -1574,7 +1574,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %exitcond.7 = icmp eq i64 %indvars.iv.next.7, %n ; PROLOG-NEXT: br i1 %exitcond.7, label %latchExit.unr-lcssa.loopexit, label %header ; PROLOG: latchExit.unr-lcssa.loopexit: -; PROLOG-NEXT: %result.ph.ph = phi i32 [ 0, %header ], [ %add.7, %latch.7 ] +; PROLOG-NEXT: %result.ph.ph = phi i32 [ 0, %header ], [ 0, %latch ], [ 0, %latch.1 ], [ 0, %latch.2 ], [ 0, %latch.3 ], [ 0, %latch.4 ], [ 0, %latch.5 ], [ 0, %latch.6 ], [ %add.7, %latch.7 ] ; PROLOG-NEXT: br label %latchExit.unr-lcssa ; PROLOG: latchExit.unr-lcssa.loopexit1: ; PROLOG-NEXT: %result.ph.ph2 = phi i32 [ 0, %header.prol ] @@ -1628,7 +1628,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx, align 4 ; PROLOG-BLOCK-NEXT: %add = add nsw i32 %3, %sum.02 ; PROLOG-BLOCK-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-BLOCK-NEXT: br label %for.exiting_block.1 +; PROLOG-BLOCK-NEXT: br i1 %cond, label %latchExit.unr-lcssa.loopexit, label %for.exiting_block.1 ; PROLOG-BLOCK: for.exiting_block.1: ; PROLOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-BLOCK-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1 @@ -1640,7 +1640,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-BLOCK-NEXT: %exitcond.1 = icmp eq i64 %indvars.iv.next.1, %n ; PROLOG-BLOCK-NEXT: br i1 %exitcond.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !4 ; PROLOG-BLOCK: latchExit.unr-lcssa.loopexit: -; PROLOG-BLOCK-NEXT: %result.ph.ph = phi i32 [ 0, %header ], [ %add.1, %latch.1 ] +; PROLOG-BLOCK-NEXT: %result.ph.ph = phi i32 [ 0, %header ], [ 0, %latch ], [ %add.1, %latch.1 ] ; PROLOG-BLOCK-NEXT: br label %latchExit.unr-lcssa ; PROLOG-BLOCK: latchExit.unr-lcssa: ; PROLOG-BLOCK-NEXT: %result.ph = phi i32 [ 0, %header.prol ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ] @@ -1709,7 +1709,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add = add nsw i32 %2, %sum.02 ; EPILOG-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-NEXT: br label %for.exiting_block.1 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 ; EPILOG: for.exiting_block.1: ; EPILOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1 @@ -1719,7 +1719,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.1 = add nsw i32 %3, %add ; EPILOG-NEXT: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 -; EPILOG-NEXT: br label %for.exiting_block.2 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.2 ; EPILOG: for.exiting_block.2: ; EPILOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.2, label %latchExit.epilog-lcssa.loopexit, label %latch.2 @@ -1729,7 +1729,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.2 = add nsw i32 %4, %add.1 ; EPILOG-NEXT: %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 -; EPILOG-NEXT: br label %for.exiting_block.3 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.3 ; EPILOG: for.exiting_block.3: ; EPILOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.3, label %latchExit.epilog-lcssa.loopexit, label %latch.3 @@ -1739,7 +1739,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.3 = add nsw i32 %5, %add.2 ; EPILOG-NEXT: %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 -; EPILOG-NEXT: br label %for.exiting_block.4 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.4 ; EPILOG: for.exiting_block.4: ; EPILOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.4, label %latchExit.epilog-lcssa.loopexit, label %latch.4 @@ -1749,7 +1749,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.4 = add nsw i32 %6, %add.3 ; EPILOG-NEXT: %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 -; EPILOG-NEXT: br label %for.exiting_block.5 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.5 ; EPILOG: for.exiting_block.5: ; EPILOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.5, label %latchExit.epilog-lcssa.loopexit, label %latch.5 @@ -1759,7 +1759,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.5 = add nsw i32 %7, %add.4 ; EPILOG-NEXT: %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 -; EPILOG-NEXT: br label %for.exiting_block.6 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.6 ; EPILOG: for.exiting_block.6: ; EPILOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.6, label %latchExit.epilog-lcssa.loopexit, label %latch.6 @@ -1769,7 +1769,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.6 = add nsw i32 %8, %add.5 ; EPILOG-NEXT: %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 -; EPILOG-NEXT: br label %for.exiting_block.7 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.7 ; EPILOG: for.exiting_block.7: ; EPILOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.7, label %latchExit.epilog-lcssa.loopexit, label %latch.7 @@ -1853,7 +1853,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-BLOCK-NEXT: %add = add nsw i32 %2, %sum.02 ; EPILOG-BLOCK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-BLOCK-NEXT: br label %for.exiting_block.1 +; EPILOG-BLOCK-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 ; EPILOG-BLOCK: for.exiting_block.1: ; EPILOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-BLOCK-NEXT: br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1 @@ -1952,7 +1952,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %3 = load i32, i32* %arrayidx, align 4 ; PROLOG-NEXT: %add = add nsw i32 %3, %sum.02 ; PROLOG-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-NEXT: br label %for.exiting_block.1 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 ; PROLOG: for.exiting_block.1: ; PROLOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.1, label %latchExit.unr-lcssa.loopexit, label %latch.1 @@ -1961,7 +1961,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %4 = load i32, i32* %arrayidx.1, align 4 ; PROLOG-NEXT: %add.1 = add nsw i32 %4, %add ; PROLOG-NEXT: %indvars.iv.next.1 = add i64 %indvars.iv.next, 1 -; PROLOG-NEXT: br label %for.exiting_block.2 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.2 ; PROLOG: for.exiting_block.2: ; PROLOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.2, label %latchExit.unr-lcssa.loopexit, label %latch.2 @@ -1970,7 +1970,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %5 = load i32, i32* %arrayidx.2, align 4 ; PROLOG-NEXT: %add.2 = add nsw i32 %5, %add.1 ; PROLOG-NEXT: %indvars.iv.next.2 = add i64 %indvars.iv.next.1, 1 -; PROLOG-NEXT: br label %for.exiting_block.3 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.3 ; PROLOG: for.exiting_block.3: ; PROLOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.3, label %latchExit.unr-lcssa.loopexit, label %latch.3 @@ -1979,7 +1979,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %6 = load i32, i32* %arrayidx.3, align 4 ; PROLOG-NEXT: %add.3 = add nsw i32 %6, %add.2 ; PROLOG-NEXT: %indvars.iv.next.3 = add i64 %indvars.iv.next.2, 1 -; PROLOG-NEXT: br label %for.exiting_block.4 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.4 ; PROLOG: for.exiting_block.4: ; PROLOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.4, label %latchExit.unr-lcssa.loopexit, label %latch.4 @@ -1988,7 +1988,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %7 = load i32, i32* %arrayidx.4, align 4 ; PROLOG-NEXT: %add.4 = add nsw i32 %7, %add.3 ; PROLOG-NEXT: %indvars.iv.next.4 = add i64 %indvars.iv.next.3, 1 -; PROLOG-NEXT: br label %for.exiting_block.5 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.5 ; PROLOG: for.exiting_block.5: ; PROLOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.5, label %latchExit.unr-lcssa.loopexit, label %latch.5 @@ -1997,7 +1997,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %8 = load i32, i32* %arrayidx.5, align 4 ; PROLOG-NEXT: %add.5 = add nsw i32 %8, %add.4 ; PROLOG-NEXT: %indvars.iv.next.5 = add i64 %indvars.iv.next.4, 1 -; PROLOG-NEXT: br label %for.exiting_block.6 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.6 ; PROLOG: for.exiting_block.6: ; PROLOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.6, label %latchExit.unr-lcssa.loopexit, label %latch.6 @@ -2006,7 +2006,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %9 = load i32, i32* %arrayidx.6, align 4 ; PROLOG-NEXT: %add.6 = add nsw i32 %9, %add.5 ; PROLOG-NEXT: %indvars.iv.next.6 = add i64 %indvars.iv.next.5, 1 -; PROLOG-NEXT: br label %for.exiting_block.7 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.7 ; PROLOG: for.exiting_block.7: ; PROLOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.7, label %latchExit.unr-lcssa.loopexit, label %latch.7 @@ -2072,7 +2072,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx, align 4 ; PROLOG-BLOCK-NEXT: %add = add nsw i32 %3, %sum.02 ; PROLOG-BLOCK-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-BLOCK-NEXT: br label %for.exiting_block.1 +; PROLOG-BLOCK-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 ; PROLOG-BLOCK: for.exiting_block.1: ; PROLOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-BLOCK-NEXT: br i1 %cmp.1, label %latchExit.unr-lcssa.loopexit, label %latch.1 @@ -2154,7 +2154,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add = add nsw i32 %2, %sum.02 ; EPILOG-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-NEXT: br label %for.exiting_block.1 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 ; EPILOG: for.exiting_block.1: ; EPILOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1 @@ -2164,7 +2164,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.1 = add nsw i32 %3, %add ; EPILOG-NEXT: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 -; EPILOG-NEXT: br label %for.exiting_block.2 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.2 ; EPILOG: for.exiting_block.2: ; EPILOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.2, label %latchExit.epilog-lcssa.loopexit, label %latch.2 @@ -2174,7 +2174,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.2 = add nsw i32 %4, %add.1 ; EPILOG-NEXT: %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 -; EPILOG-NEXT: br label %for.exiting_block.3 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.3 ; EPILOG: for.exiting_block.3: ; EPILOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.3, label %latchExit.epilog-lcssa.loopexit, label %latch.3 @@ -2184,7 +2184,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.3 = add nsw i32 %5, %add.2 ; EPILOG-NEXT: %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 -; EPILOG-NEXT: br label %for.exiting_block.4 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.4 ; EPILOG: for.exiting_block.4: ; EPILOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.4, label %latchExit.epilog-lcssa.loopexit, label %latch.4 @@ -2194,7 +2194,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.4 = add nsw i32 %6, %add.3 ; EPILOG-NEXT: %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 -; EPILOG-NEXT: br label %for.exiting_block.5 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.5 ; EPILOG: for.exiting_block.5: ; EPILOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.5, label %latchExit.epilog-lcssa.loopexit, label %latch.5 @@ -2204,7 +2204,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.5 = add nsw i32 %7, %add.4 ; EPILOG-NEXT: %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 -; EPILOG-NEXT: br label %for.exiting_block.6 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.6 ; EPILOG: for.exiting_block.6: ; EPILOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.6, label %latchExit.epilog-lcssa.loopexit, label %latch.6 @@ -2214,7 +2214,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-NEXT: %add.6 = add nsw i32 %8, %add.5 ; EPILOG-NEXT: %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 -; EPILOG-NEXT: br label %for.exiting_block.7 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.7 ; EPILOG: for.exiting_block.7: ; EPILOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.7, label %latchExit.epilog-lcssa.loopexit, label %latch.7 @@ -2298,7 +2298,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-BLOCK-NEXT: %add = add nsw i32 %2, %sum.02 ; EPILOG-BLOCK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-BLOCK-NEXT: br label %for.exiting_block.1 +; EPILOG-BLOCK-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 ; EPILOG-BLOCK: for.exiting_block.1: ; EPILOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-BLOCK-NEXT: br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1 @@ -2397,7 +2397,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %3 = load i32, i32* %arrayidx, align 4 ; PROLOG-NEXT: %add = add nsw i32 %3, %sum.02 ; PROLOG-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-NEXT: br label %for.exiting_block.1 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 ; PROLOG: for.exiting_block.1: ; PROLOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.1, label %latchExit.unr-lcssa.loopexit, label %latch.1 @@ -2406,7 +2406,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %4 = load i32, i32* %arrayidx.1, align 4 ; PROLOG-NEXT: %add.1 = add nsw i32 %4, %add ; PROLOG-NEXT: %indvars.iv.next.1 = add i64 %indvars.iv.next, 1 -; PROLOG-NEXT: br label %for.exiting_block.2 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.2 ; PROLOG: for.exiting_block.2: ; PROLOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.2, label %latchExit.unr-lcssa.loopexit, label %latch.2 @@ -2415,7 +2415,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %5 = load i32, i32* %arrayidx.2, align 4 ; PROLOG-NEXT: %add.2 = add nsw i32 %5, %add.1 ; PROLOG-NEXT: %indvars.iv.next.2 = add i64 %indvars.iv.next.1, 1 -; PROLOG-NEXT: br label %for.exiting_block.3 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.3 ; PROLOG: for.exiting_block.3: ; PROLOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.3, label %latchExit.unr-lcssa.loopexit, label %latch.3 @@ -2424,7 +2424,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %6 = load i32, i32* %arrayidx.3, align 4 ; PROLOG-NEXT: %add.3 = add nsw i32 %6, %add.2 ; PROLOG-NEXT: %indvars.iv.next.3 = add i64 %indvars.iv.next.2, 1 -; PROLOG-NEXT: br label %for.exiting_block.4 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.4 ; PROLOG: for.exiting_block.4: ; PROLOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.4, label %latchExit.unr-lcssa.loopexit, label %latch.4 @@ -2433,7 +2433,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %7 = load i32, i32* %arrayidx.4, align 4 ; PROLOG-NEXT: %add.4 = add nsw i32 %7, %add.3 ; PROLOG-NEXT: %indvars.iv.next.4 = add i64 %indvars.iv.next.3, 1 -; PROLOG-NEXT: br label %for.exiting_block.5 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.5 ; PROLOG: for.exiting_block.5: ; PROLOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.5, label %latchExit.unr-lcssa.loopexit, label %latch.5 @@ -2442,7 +2442,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %8 = load i32, i32* %arrayidx.5, align 4 ; PROLOG-NEXT: %add.5 = add nsw i32 %8, %add.4 ; PROLOG-NEXT: %indvars.iv.next.5 = add i64 %indvars.iv.next.4, 1 -; PROLOG-NEXT: br label %for.exiting_block.6 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.6 ; PROLOG: for.exiting_block.6: ; PROLOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.6, label %latchExit.unr-lcssa.loopexit, label %latch.6 @@ -2451,7 +2451,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: %9 = load i32, i32* %arrayidx.6, align 4 ; PROLOG-NEXT: %add.6 = add nsw i32 %9, %add.5 ; PROLOG-NEXT: %indvars.iv.next.6 = add i64 %indvars.iv.next.5, 1 -; PROLOG-NEXT: br label %for.exiting_block.7 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.7 ; PROLOG: for.exiting_block.7: ; PROLOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.7, label %latchExit.unr-lcssa.loopexit, label %latch.7 @@ -2517,7 +2517,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx, align 4 ; PROLOG-BLOCK-NEXT: %add = add nsw i32 %3, %sum.02 ; PROLOG-BLOCK-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-BLOCK-NEXT: br label %for.exiting_block.1 +; PROLOG-BLOCK-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 ; PROLOG-BLOCK: for.exiting_block.1: ; PROLOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-BLOCK-NEXT: br i1 %cmp.1, label %latchExit.unr-lcssa.loopexit, label %latch.1 @@ -2600,7 +2600,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG: latch: ; EPILOG-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-NEXT: br label %for.exiting_block.1 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 ; EPILOG: for.exiting_block.1: ; EPILOG-NEXT: %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next ; EPILOG-NEXT: %3 = load i32, i32* %arrayidx.1, align 4 @@ -2610,7 +2610,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG: latch.1: ; EPILOG-NEXT: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 -; EPILOG-NEXT: br label %for.exiting_block.2 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.2 ; EPILOG: for.exiting_block.2: ; EPILOG-NEXT: %arrayidx.2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.1 ; EPILOG-NEXT: %4 = load i32, i32* %arrayidx.2, align 4 @@ -2620,7 +2620,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG: latch.2: ; EPILOG-NEXT: %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 -; EPILOG-NEXT: br label %for.exiting_block.3 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.3 ; EPILOG: for.exiting_block.3: ; EPILOG-NEXT: %arrayidx.3 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.2 ; EPILOG-NEXT: %5 = load i32, i32* %arrayidx.3, align 4 @@ -2630,7 +2630,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG: latch.3: ; EPILOG-NEXT: %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 -; EPILOG-NEXT: br label %for.exiting_block.4 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.4 ; EPILOG: for.exiting_block.4: ; EPILOG-NEXT: %arrayidx.4 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.3 ; EPILOG-NEXT: %6 = load i32, i32* %arrayidx.4, align 4 @@ -2640,7 +2640,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG: latch.4: ; EPILOG-NEXT: %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 -; EPILOG-NEXT: br label %for.exiting_block.5 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.5 ; EPILOG: for.exiting_block.5: ; EPILOG-NEXT: %arrayidx.5 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.4 ; EPILOG-NEXT: %7 = load i32, i32* %arrayidx.5, align 4 @@ -2650,7 +2650,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG: latch.5: ; EPILOG-NEXT: %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 -; EPILOG-NEXT: br label %for.exiting_block.6 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.6 ; EPILOG: for.exiting_block.6: ; EPILOG-NEXT: %arrayidx.6 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.5 ; EPILOG-NEXT: %8 = load i32, i32* %arrayidx.6, align 4 @@ -2660,7 +2660,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG: latch.6: ; EPILOG-NEXT: %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 -; EPILOG-NEXT: br label %for.exiting_block.7 +; EPILOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.7 ; EPILOG: for.exiting_block.7: ; EPILOG-NEXT: %arrayidx.7 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.6 ; EPILOG-NEXT: %9 = load i32, i32* %arrayidx.7, align 4 @@ -2744,7 +2744,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG-BLOCK: latch: ; EPILOG-BLOCK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-BLOCK-NEXT: br label %for.exiting_block.1 +; EPILOG-BLOCK-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 ; EPILOG-BLOCK: for.exiting_block.1: ; EPILOG-BLOCK-NEXT: %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next ; EPILOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx.1, align 4 @@ -2843,7 +2843,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: br i1 %cmp, label %latchExit.unr-lcssa.loopexit, label %latch ; PROLOG: latch: ; PROLOG-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-NEXT: br label %for.exiting_block.1 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 ; PROLOG: for.exiting_block.1: ; PROLOG-NEXT: %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next ; PROLOG-NEXT: %4 = load i32, i32* %arrayidx.1, align 4 @@ -2852,7 +2852,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: br i1 %cmp.1, label %latchExit.unr-lcssa.loopexit, label %latch.1 ; PROLOG: latch.1: ; PROLOG-NEXT: %indvars.iv.next.1 = add i64 %indvars.iv.next, 1 -; PROLOG-NEXT: br label %for.exiting_block.2 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.2 ; PROLOG: for.exiting_block.2: ; PROLOG-NEXT: %arrayidx.2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.1 ; PROLOG-NEXT: %5 = load i32, i32* %arrayidx.2, align 4 @@ -2861,7 +2861,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: br i1 %cmp.2, label %latchExit.unr-lcssa.loopexit, label %latch.2 ; PROLOG: latch.2: ; PROLOG-NEXT: %indvars.iv.next.2 = add i64 %indvars.iv.next.1, 1 -; PROLOG-NEXT: br label %for.exiting_block.3 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.3 ; PROLOG: for.exiting_block.3: ; PROLOG-NEXT: %arrayidx.3 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.2 ; PROLOG-NEXT: %6 = load i32, i32* %arrayidx.3, align 4 @@ -2870,7 +2870,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: br i1 %cmp.3, label %latchExit.unr-lcssa.loopexit, label %latch.3 ; PROLOG: latch.3: ; PROLOG-NEXT: %indvars.iv.next.3 = add i64 %indvars.iv.next.2, 1 -; PROLOG-NEXT: br label %for.exiting_block.4 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.4 ; PROLOG: for.exiting_block.4: ; PROLOG-NEXT: %arrayidx.4 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.3 ; PROLOG-NEXT: %7 = load i32, i32* %arrayidx.4, align 4 @@ -2879,7 +2879,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: br i1 %cmp.4, label %latchExit.unr-lcssa.loopexit, label %latch.4 ; PROLOG: latch.4: ; PROLOG-NEXT: %indvars.iv.next.4 = add i64 %indvars.iv.next.3, 1 -; PROLOG-NEXT: br label %for.exiting_block.5 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.5 ; PROLOG: for.exiting_block.5: ; PROLOG-NEXT: %arrayidx.5 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.4 ; PROLOG-NEXT: %8 = load i32, i32* %arrayidx.5, align 4 @@ -2888,7 +2888,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: br i1 %cmp.5, label %latchExit.unr-lcssa.loopexit, label %latch.5 ; PROLOG: latch.5: ; PROLOG-NEXT: %indvars.iv.next.5 = add i64 %indvars.iv.next.4, 1 -; PROLOG-NEXT: br label %for.exiting_block.6 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.6 ; PROLOG: for.exiting_block.6: ; PROLOG-NEXT: %arrayidx.6 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.5 ; PROLOG-NEXT: %9 = load i32, i32* %arrayidx.6, align 4 @@ -2897,7 +2897,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-NEXT: br i1 %cmp.6, label %latchExit.unr-lcssa.loopexit, label %latch.6 ; PROLOG: latch.6: ; PROLOG-NEXT: %indvars.iv.next.6 = add i64 %indvars.iv.next.5, 1 -; PROLOG-NEXT: br label %for.exiting_block.7 +; PROLOG-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.7 ; PROLOG: for.exiting_block.7: ; PROLOG-NEXT: %arrayidx.7 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.6 ; PROLOG-NEXT: %10 = load i32, i32* %arrayidx.7, align 4 @@ -2963,7 +2963,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; PROLOG-BLOCK-NEXT: br i1 %cmp, label %latchExit.unr-lcssa.loopexit, label %latch ; PROLOG-BLOCK: latch: ; PROLOG-BLOCK-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-BLOCK-NEXT: br label %for.exiting_block.1 +; PROLOG-BLOCK-NEXT: br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1 ; PROLOG-BLOCK: for.exiting_block.1: ; PROLOG-BLOCK-NEXT: %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next ; PROLOG-BLOCK-NEXT: %4 = load i32, i32* %arrayidx.1, align 4 @@ -3837,7 +3837,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-NEXT: %add = add nsw i32 %load, %sum.02 ; EPILOG-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-NEXT: br label %for.exiting_block.1 +; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1 ; EPILOG: for.exiting_block.1: ; EPILOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1 @@ -3847,7 +3847,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-NEXT: %add.1 = add nsw i32 %load.1, %add ; EPILOG-NEXT: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 -; EPILOG-NEXT: br label %for.exiting_block.2 +; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.2 ; EPILOG: for.exiting_block.2: ; EPILOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.2, label %for.exit2.loopexit, label %latch.2 @@ -3857,7 +3857,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-NEXT: %add.2 = add nsw i32 %load.2, %add.1 ; EPILOG-NEXT: %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 -; EPILOG-NEXT: br label %for.exiting_block.3 +; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.3 ; EPILOG: for.exiting_block.3: ; EPILOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.3, label %for.exit2.loopexit, label %latch.3 @@ -3867,7 +3867,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-NEXT: %add.3 = add nsw i32 %load.3, %add.2 ; EPILOG-NEXT: %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 -; EPILOG-NEXT: br label %for.exiting_block.4 +; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.4 ; EPILOG: for.exiting_block.4: ; EPILOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.4, label %for.exit2.loopexit, label %latch.4 @@ -3877,7 +3877,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-NEXT: %add.4 = add nsw i32 %load.4, %add.3 ; EPILOG-NEXT: %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 -; EPILOG-NEXT: br label %for.exiting_block.5 +; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.5 ; EPILOG: for.exiting_block.5: ; EPILOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.5, label %for.exit2.loopexit, label %latch.5 @@ -3887,7 +3887,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-NEXT: %add.5 = add nsw i32 %load.5, %add.4 ; EPILOG-NEXT: %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 -; EPILOG-NEXT: br label %for.exiting_block.6 +; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.6 ; EPILOG: for.exiting_block.6: ; EPILOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.6, label %for.exit2.loopexit, label %latch.6 @@ -3897,7 +3897,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-NEXT: %add.6 = add nsw i32 %load.6, %add.5 ; EPILOG-NEXT: %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 -; EPILOG-NEXT: br label %for.exiting_block.7 +; EPILOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.7 ; EPILOG: for.exiting_block.7: ; EPILOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; EPILOG-NEXT: br i1 %cmp.7, label %for.exit2.loopexit, label %latch.7 @@ -3946,7 +3946,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ], [ %sum.0.lcssa.ph1, %latch_exit.epilog-lcssa ] ; EPILOG-NEXT: ret i32 %sum.0.lcssa ; EPILOG: for.exit2.loopexit: -; EPILOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ 42, %for.exiting_block.1 ], [ 42, %for.exiting_block.2 ], [ 42, %for.exiting_block.3 ], [ 42, %for.exiting_block.4 ], [ 42, %for.exiting_block.5 ], [ 42, %for.exiting_block.6 ], [ 42, %for.exiting_block.7 ] +; EPILOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ], [ 42, %for.exiting_block.3 ], [ %add.3, %latch.3 ], [ 42, %for.exiting_block.4 ], [ %add.4, %latch.4 ], [ 42, %for.exiting_block.5 ], [ %add.5, %latch.5 ], [ 42, %for.exiting_block.6 ], [ %add.6, %latch.6 ], [ 42, %for.exiting_block.7 ] ; EPILOG-NEXT: br label %for.exit2 ; EPILOG: for.exit2.loopexit2: ; EPILOG-NEXT: %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ] @@ -3983,7 +3983,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-BLOCK-NEXT: %add = add nsw i32 %load, %sum.02 ; EPILOG-BLOCK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-BLOCK-NEXT: br label %for.exiting_block.1 +; EPILOG-BLOCK-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1 ; EPILOG-BLOCK: for.exiting_block.1: ; EPILOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; EPILOG-BLOCK-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1 @@ -4022,7 +4022,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; EPILOG-BLOCK-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ], [ %add.epil, %latch.epil ] ; EPILOG-BLOCK-NEXT: ret i32 %sum.0.lcssa ; EPILOG-BLOCK: for.exit2.loopexit: -; EPILOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ 42, %for.exiting_block.1 ] +; EPILOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ] ; EPILOG-BLOCK-NEXT: br label %for.exit2 ; EPILOG-BLOCK: for.exit2: ; EPILOG-BLOCK-NEXT: %retval = phi i32 [ %sum.02.unr, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ] @@ -4083,7 +4083,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-NEXT: %load = load i32, i32* %arrayidx, align 4 ; PROLOG-NEXT: %add = add nsw i32 %load, %sum.02 ; PROLOG-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-NEXT: br label %for.exiting_block.1 +; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1 ; PROLOG: for.exiting_block.1: ; PROLOG-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1 @@ -4092,7 +4092,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-NEXT: %load.1 = load i32, i32* %arrayidx.1, align 4 ; PROLOG-NEXT: %add.1 = add nsw i32 %load.1, %add ; PROLOG-NEXT: %indvars.iv.next.1 = add i64 %indvars.iv.next, 1 -; PROLOG-NEXT: br label %for.exiting_block.2 +; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.2 ; PROLOG: for.exiting_block.2: ; PROLOG-NEXT: %cmp.2 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.2, label %for.exit2.loopexit, label %latch.2 @@ -4101,7 +4101,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-NEXT: %load.2 = load i32, i32* %arrayidx.2, align 4 ; PROLOG-NEXT: %add.2 = add nsw i32 %load.2, %add.1 ; PROLOG-NEXT: %indvars.iv.next.2 = add i64 %indvars.iv.next.1, 1 -; PROLOG-NEXT: br label %for.exiting_block.3 +; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.3 ; PROLOG: for.exiting_block.3: ; PROLOG-NEXT: %cmp.3 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.3, label %for.exit2.loopexit, label %latch.3 @@ -4110,7 +4110,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-NEXT: %load.3 = load i32, i32* %arrayidx.3, align 4 ; PROLOG-NEXT: %add.3 = add nsw i32 %load.3, %add.2 ; PROLOG-NEXT: %indvars.iv.next.3 = add i64 %indvars.iv.next.2, 1 -; PROLOG-NEXT: br label %for.exiting_block.4 +; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.4 ; PROLOG: for.exiting_block.4: ; PROLOG-NEXT: %cmp.4 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.4, label %for.exit2.loopexit, label %latch.4 @@ -4119,7 +4119,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-NEXT: %load.4 = load i32, i32* %arrayidx.4, align 4 ; PROLOG-NEXT: %add.4 = add nsw i32 %load.4, %add.3 ; PROLOG-NEXT: %indvars.iv.next.4 = add i64 %indvars.iv.next.3, 1 -; PROLOG-NEXT: br label %for.exiting_block.5 +; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.5 ; PROLOG: for.exiting_block.5: ; PROLOG-NEXT: %cmp.5 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.5, label %for.exit2.loopexit, label %latch.5 @@ -4128,7 +4128,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-NEXT: %load.5 = load i32, i32* %arrayidx.5, align 4 ; PROLOG-NEXT: %add.5 = add nsw i32 %load.5, %add.4 ; PROLOG-NEXT: %indvars.iv.next.5 = add i64 %indvars.iv.next.4, 1 -; PROLOG-NEXT: br label %for.exiting_block.6 +; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.6 ; PROLOG: for.exiting_block.6: ; PROLOG-NEXT: %cmp.6 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.6, label %for.exit2.loopexit, label %latch.6 @@ -4137,7 +4137,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-NEXT: %load.6 = load i32, i32* %arrayidx.6, align 4 ; PROLOG-NEXT: %add.6 = add nsw i32 %load.6, %add.5 ; PROLOG-NEXT: %indvars.iv.next.6 = add i64 %indvars.iv.next.5, 1 -; PROLOG-NEXT: br label %for.exiting_block.7 +; PROLOG-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.7 ; PROLOG: for.exiting_block.7: ; PROLOG-NEXT: %cmp.7 = icmp eq i64 %n, 42 ; PROLOG-NEXT: br i1 %cmp.7, label %for.exit2.loopexit, label %latch.7 @@ -4155,7 +4155,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.unr, %header.prol.loopexit ], [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ] ; PROLOG-NEXT: ret i32 %sum.0.lcssa ; PROLOG: for.exit2.loopexit: -; PROLOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ 42, %for.exiting_block.1 ], [ 42, %for.exiting_block.2 ], [ 42, %for.exiting_block.3 ], [ 42, %for.exiting_block.4 ], [ 42, %for.exiting_block.5 ], [ 42, %for.exiting_block.6 ], [ 42, %for.exiting_block.7 ] +; PROLOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ], [ 42, %for.exiting_block.3 ], [ %add.3, %latch.3 ], [ 42, %for.exiting_block.4 ], [ %add.4, %latch.4 ], [ 42, %for.exiting_block.5 ], [ %add.5, %latch.5 ], [ 42, %for.exiting_block.6 ], [ %add.6, %latch.6 ], [ 42, %for.exiting_block.7 ] ; PROLOG-NEXT: br label %for.exit2 ; PROLOG: for.exit2.loopexit1: ; PROLOG-NEXT: %retval.ph2 = phi i32 [ 42, %for.exiting_block.prol ], [ %sum.02.prol, %header.prol ] @@ -4205,7 +4205,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-BLOCK-NEXT: %load = load i32, i32* %arrayidx, align 4 ; PROLOG-BLOCK-NEXT: %add = add nsw i32 %load, %sum.02 ; PROLOG-BLOCK-NEXT: %indvars.iv.next = add i64 %indvars.iv, 1 -; PROLOG-BLOCK-NEXT: br label %for.exiting_block.1 +; PROLOG-BLOCK-NEXT: br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1 ; PROLOG-BLOCK: for.exiting_block.1: ; PROLOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 ; PROLOG-BLOCK-NEXT: br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1 @@ -4223,7 +4223,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { ; PROLOG-BLOCK-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.unr, %header.prol.loopexit ], [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ] ; PROLOG-BLOCK-NEXT: ret i32 %sum.0.lcssa ; PROLOG-BLOCK: for.exit2.loopexit: -; PROLOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ 42, %for.exiting_block.1 ] +; PROLOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ] ; PROLOG-BLOCK-NEXT: br label %for.exit2 ; PROLOG-BLOCK: for.exit2: ; PROLOG-BLOCK-NEXT: %retval = phi i32 [ 0, %header.prol ], [ 42, %for.exiting_block.prol ], [ %retval.ph, %for.exit2.loopexit ] @@ -4295,31 +4295,31 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; EPILOG: latch: ; EPILOG-NEXT: %add = add nuw nsw i64 %i6, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-NEXT: br label %latch.1 +; EPILOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.1 ; EPILOG: latch.1: ; EPILOG-NEXT: %add.1 = add nuw nsw i64 %add, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 -; EPILOG-NEXT: br label %latch.2 +; EPILOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.2 ; EPILOG: latch.2: ; EPILOG-NEXT: %add.2 = add nuw nsw i64 %add.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 -; EPILOG-NEXT: br label %latch.3 +; EPILOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.3 ; EPILOG: latch.3: ; EPILOG-NEXT: %add.3 = add nuw nsw i64 %add.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 -; EPILOG-NEXT: br label %latch.4 +; EPILOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.4 ; EPILOG: latch.4: ; EPILOG-NEXT: %add.4 = add nuw nsw i64 %add.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 -; EPILOG-NEXT: br label %latch.5 +; EPILOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.5 ; EPILOG: latch.5: ; EPILOG-NEXT: %add.5 = add nuw nsw i64 %add.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 -; EPILOG-NEXT: br label %latch.6 +; EPILOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.6 ; EPILOG: latch.6: ; EPILOG-NEXT: %add.6 = add nuw nsw i64 %add.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 -; EPILOG-NEXT: br label %latch.7 +; EPILOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.7 ; EPILOG: latch.7: ; EPILOG-NEXT: %add.7 = add nuw nsw i64 %add.6, 1 ; EPILOG-NEXT: %niter.next.7 = add i64 %niter.next.6, 1 @@ -4351,7 +4351,7 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; EPILOG: loopexit2: ; EPILOG-NEXT: ret i32 %shft ; EPILOG: loopexit1.loopexit: -; EPILOG-NEXT: %sext3.ph = phi i32 [ %shft, %header ] +; EPILOG-NEXT: %sext3.ph = phi i32 [ %shft, %header ], [ %shft, %latch ], [ %shft, %latch.1 ], [ %shft, %latch.2 ], [ %shft, %latch.3 ], [ %shft, %latch.4 ], [ %shft, %latch.5 ], [ %shft, %latch.6 ] ; EPILOG-NEXT: br label %loopexit1 ; EPILOG: loopexit1.loopexit1: ; EPILOG-NEXT: %sext3.ph2 = phi i32 [ %shft, %header.epil ] @@ -4382,7 +4382,7 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; EPILOG-BLOCK: latch: ; EPILOG-BLOCK-NEXT: %add = add nuw nsw i64 %i6, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 -; EPILOG-BLOCK-NEXT: br label %latch.1 +; EPILOG-BLOCK-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.1 ; EPILOG-BLOCK: latch.1: ; EPILOG-BLOCK-NEXT: %add.1 = add nuw nsw i64 %add, 1 ; EPILOG-BLOCK-NEXT: %niter.next.1 = add i64 %niter.next, 1 @@ -4404,7 +4404,7 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; EPILOG-BLOCK: loopexit2: ; EPILOG-BLOCK-NEXT: ret i32 %shft ; EPILOG-BLOCK: loopexit1.loopexit: -; EPILOG-BLOCK-NEXT: %sext3.ph = phi i32 [ %shft, %header ] +; EPILOG-BLOCK-NEXT: %sext3.ph = phi i32 [ %shft, %header ], [ %shft, %latch ] ; EPILOG-BLOCK-NEXT: br label %loopexit1 ; EPILOG-BLOCK: loopexit1: ; EPILOG-BLOCK-NEXT: %sext3 = phi i32 [ %shft, %header.epil ], [ %sext3.ph, %loopexit1.loopexit ] @@ -4448,25 +4448,25 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; PROLOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch ; PROLOG: latch: ; PROLOG-NEXT: %add = add nuw nsw i64 %i6, 1 -; PROLOG-NEXT: br label %latch.1 +; PROLOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.1 ; PROLOG: latch.1: ; PROLOG-NEXT: %add.1 = add nuw nsw i64 %add, 1 -; PROLOG-NEXT: br label %latch.2 +; PROLOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.2 ; PROLOG: latch.2: ; PROLOG-NEXT: %add.2 = add nuw nsw i64 %add.1, 1 -; PROLOG-NEXT: br label %latch.3 +; PROLOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.3 ; PROLOG: latch.3: ; PROLOG-NEXT: %add.3 = add nuw nsw i64 %add.2, 1 -; PROLOG-NEXT: br label %latch.4 +; PROLOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.4 ; PROLOG: latch.4: ; PROLOG-NEXT: %add.4 = add nuw nsw i64 %add.3, 1 -; PROLOG-NEXT: br label %latch.5 +; PROLOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.5 ; PROLOG: latch.5: ; PROLOG-NEXT: %add.5 = add nuw nsw i64 %add.4, 1 -; PROLOG-NEXT: br label %latch.6 +; PROLOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.6 ; PROLOG: latch.6: ; PROLOG-NEXT: %add.6 = add nuw nsw i64 %add.5, 1 -; PROLOG-NEXT: br label %latch.7 +; PROLOG-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.7 ; PROLOG: latch.7: ; PROLOG-NEXT: %add.7 = add nuw nsw i64 %add.6, 1 ; PROLOG-NEXT: %i9.7 = icmp slt i64 %add.7, %sext @@ -4478,7 +4478,7 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; PROLOG: loopexit2: ; PROLOG-NEXT: ret i32 %shft ; PROLOG: loopexit1.loopexit: -; PROLOG-NEXT: %sext3.ph = phi i32 [ %shft, %header ] +; PROLOG-NEXT: %sext3.ph = phi i32 [ %shft, %header ], [ %shft, %latch ], [ %shft, %latch.1 ], [ %shft, %latch.2 ], [ %shft, %latch.3 ], [ %shft, %latch.4 ], [ %shft, %latch.5 ], [ %shft, %latch.6 ] ; PROLOG-NEXT: br label %loopexit1 ; PROLOG: loopexit1.loopexit1: ; PROLOG-NEXT: %sext3.ph2 = phi i32 [ %shft, %header.prol ] @@ -4516,7 +4516,7 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; PROLOG-BLOCK-NEXT: br i1 false, label %loopexit1.loopexit, label %latch ; PROLOG-BLOCK: latch: ; PROLOG-BLOCK-NEXT: %add = add nuw nsw i64 %i6, 1 -; PROLOG-BLOCK-NEXT: br label %latch.1 +; PROLOG-BLOCK-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.1 ; PROLOG-BLOCK: latch.1: ; PROLOG-BLOCK-NEXT: %add.1 = add nuw nsw i64 %add, 1 ; PROLOG-BLOCK-NEXT: %i9.1 = icmp slt i64 %add.1, %sext @@ -4528,7 +4528,7 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) { ; PROLOG-BLOCK: loopexit2: ; PROLOG-BLOCK-NEXT: ret i32 %shft ; PROLOG-BLOCK: loopexit1.loopexit: -; PROLOG-BLOCK-NEXT: %sext3.ph = phi i32 [ %shft, %header ] +; PROLOG-BLOCK-NEXT: %sext3.ph = phi i32 [ %shft, %header ], [ %shft, %latch ] ; PROLOG-BLOCK-NEXT: br label %loopexit1 ; PROLOG-BLOCK: loopexit1: ; PROLOG-BLOCK-NEXT: %sext3 = phi i32 [ %shft, %header.prol ], [ %sext3.ph, %loopexit1.loopexit ] @@ -4594,31 +4594,31 @@ define void @test8() { ; EPILOG: latch: ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 ; EPILOG-NEXT: %i4.1 = add nuw nsw i64 %i4, 1 -; EPILOG-NEXT: br label %latch.1 +; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.1 ; EPILOG: latch.1: ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 ; EPILOG-NEXT: %i4.2 = add nuw nsw i64 %i4.1, 1 -; EPILOG-NEXT: br label %latch.2 +; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.2 ; EPILOG: latch.2: ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 ; EPILOG-NEXT: %i4.3 = add nuw nsw i64 %i4.2, 1 -; EPILOG-NEXT: br label %latch.3 +; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.3 ; EPILOG: latch.3: ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 ; EPILOG-NEXT: %i4.4 = add nuw nsw i64 %i4.3, 1 -; EPILOG-NEXT: br label %latch.4 +; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.4 ; EPILOG: latch.4: ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 ; EPILOG-NEXT: %i4.5 = add nuw nsw i64 %i4.4, 1 -; EPILOG-NEXT: br label %latch.5 +; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.5 ; EPILOG: latch.5: ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 ; EPILOG-NEXT: %i4.6 = add nuw nsw i64 %i4.5, 1 -; EPILOG-NEXT: br label %latch.6 +; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.6 ; EPILOG: latch.6: ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 ; EPILOG-NEXT: %i4.7 = add nuw nsw i64 %i4.6, 1 -; EPILOG-NEXT: br label %latch.7 +; EPILOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.7 ; EPILOG: latch.7: ; EPILOG-NEXT: %niter.next.7 = add i64 %niter.next.6, 1 ; EPILOG-NEXT: %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter @@ -4666,7 +4666,7 @@ define void @test8() { ; EPILOG-BLOCK: latch.14: ; EPILOG-BLOCK-NEXT: %niter.next.13 = add nuw nsw i64 %niter.1, 1 ; EPILOG-BLOCK-NEXT: %i4.1.1 = add nuw nsw i64 %i4.12, 1 -; EPILOG-BLOCK-NEXT: br label %latch.1.1 +; EPILOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.1.1 ; EPILOG-BLOCK: latch.1.1: ; EPILOG-BLOCK-NEXT: %niter.next.1.1 = add i64 %niter.next.13, 1 ; EPILOG-BLOCK-NEXT: %niter.ncmp.1.1 = icmp ne i64 %niter.next.1.1, 100 @@ -4701,7 +4701,7 @@ define void @test8() { ; EPILOG-BLOCK: latch: ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 ; EPILOG-BLOCK-NEXT: %i4.1 = add nuw nsw i64 %i4, 1 -; EPILOG-BLOCK-NEXT: br label %latch.1 +; EPILOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.1 ; EPILOG-BLOCK: latch.1: ; EPILOG-BLOCK-NEXT: %niter.next.1 = add i64 %niter.next, 1 ; EPILOG-BLOCK-NEXT: %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter @@ -4765,25 +4765,25 @@ define void @test8() { ; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch ; PROLOG: latch: ; PROLOG-NEXT: %i4.1 = add nuw nsw i64 %i4, 1 -; PROLOG-NEXT: br label %latch.1 +; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.1 ; PROLOG: latch.1: ; PROLOG-NEXT: %i4.2 = add nuw nsw i64 %i4.1, 1 -; PROLOG-NEXT: br label %latch.2 +; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.2 ; PROLOG: latch.2: ; PROLOG-NEXT: %i4.3 = add nuw nsw i64 %i4.2, 1 -; PROLOG-NEXT: br label %latch.3 +; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.3 ; PROLOG: latch.3: ; PROLOG-NEXT: %i4.4 = add nuw nsw i64 %i4.3, 1 -; PROLOG-NEXT: br label %latch.4 +; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.4 ; PROLOG: latch.4: ; PROLOG-NEXT: %i4.5 = add nuw nsw i64 %i4.4, 1 -; PROLOG-NEXT: br label %latch.5 +; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.5 ; PROLOG: latch.5: ; PROLOG-NEXT: %i4.6 = add nuw nsw i64 %i4.5, 1 -; PROLOG-NEXT: br label %latch.6 +; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.6 ; PROLOG: latch.6: ; PROLOG-NEXT: %i4.7 = add nuw nsw i64 %i4.6, 1 -; PROLOG-NEXT: br label %latch.7 +; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.7 ; PROLOG: latch.7: ; PROLOG-NEXT: %i6.7 = icmp ult i64 %i4.7, 100 ; PROLOG-NEXT: br i1 %i6.7, label %innerH, label %exit.unr-lcssa @@ -4818,7 +4818,7 @@ define void @test8() { ; PROLOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.12 ; PROLOG-BLOCK: latch.12: ; PROLOG-BLOCK-NEXT: %i4.1.1 = add nuw nsw i64 %i4.11, 1 -; PROLOG-BLOCK-NEXT: br label %latch.1.1 +; PROLOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.1.1 ; PROLOG-BLOCK: latch.1.1: ; PROLOG-BLOCK-NEXT: %i6.1.1 = icmp ult i64 %i4.1.1, 100 ; PROLOG-BLOCK-NEXT: br i1 %i6.1.1, label %innerH.1, label %exit.unr-lcssa.loopexit3, !llvm.loop !12 @@ -4852,7 +4852,7 @@ define void @test8() { ; PROLOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch ; PROLOG-BLOCK: latch: ; PROLOG-BLOCK-NEXT: %i4.1 = add nuw nsw i64 %i4, 1 -; PROLOG-BLOCK-NEXT: br label %latch.1 +; PROLOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.1 ; PROLOG-BLOCK: latch.1: ; PROLOG-BLOCK-NEXT: %i6.1 = icmp ult i64 %i4.1, 100 ; PROLOG-BLOCK-NEXT: br i1 %i6.1, label %innerH, label %exit.unr-lcssa.loopexit, !llvm.loop !12 @@ -4915,7 +4915,7 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; EPILOG-NEXT: %niter = phi i32 [ 0, %preheader.new ], [ %niter.next.7, %latch.7 ] ; EPILOG-NEXT: br i1 true, label %latch, label %innerexit.loopexit ; EPILOG: innerexit.loopexit: -; EPILOG-NEXT: %trip.lcssa.ph = phi i32 [ %trip, %header ] +; EPILOG-NEXT: %trip.lcssa.ph = phi i32 [ %trip, %header ], [ %trip, %latch ], [ %trip, %latch.1 ], [ %trip, %latch.2 ], [ %trip, %latch.3 ], [ %trip, %latch.4 ], [ %trip, %latch.5 ], [ %trip, %latch.6 ] ; EPILOG-NEXT: br label %innerexit ; EPILOG: innerexit.loopexit1: ; EPILOG-NEXT: %trip.lcssa.ph2 = phi i32 [ %trip, %header.epil ] @@ -4927,31 +4927,31 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; EPILOG: latch: ; EPILOG-NEXT: %iv.next = add nuw nsw i64 %phi, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i32 %niter, 1 -; EPILOG-NEXT: br label %latch.1 +; EPILOG-NEXT: br i1 true, label %latch.1, label %innerexit.loopexit ; EPILOG: latch.1: ; EPILOG-NEXT: %iv.next.1 = add nuw nsw i64 %iv.next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i32 %niter.next, 1 -; EPILOG-NEXT: br label %latch.2 +; EPILOG-NEXT: br i1 true, label %latch.2, label %innerexit.loopexit ; EPILOG: latch.2: ; EPILOG-NEXT: %iv.next.2 = add nuw nsw i64 %iv.next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i32 %niter.next.1, 1 -; EPILOG-NEXT: br label %latch.3 +; EPILOG-NEXT: br i1 true, label %latch.3, label %innerexit.loopexit ; EPILOG: latch.3: ; EPILOG-NEXT: %iv.next.3 = add nuw nsw i64 %iv.next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i32 %niter.next.2, 1 -; EPILOG-NEXT: br label %latch.4 +; EPILOG-NEXT: br i1 true, label %latch.4, label %innerexit.loopexit ; EPILOG: latch.4: ; EPILOG-NEXT: %iv.next.4 = add nuw nsw i64 %iv.next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i32 %niter.next.3, 1 -; EPILOG-NEXT: br label %latch.5 +; EPILOG-NEXT: br i1 true, label %latch.5, label %innerexit.loopexit ; EPILOG: latch.5: ; EPILOG-NEXT: %iv.next.5 = add nuw nsw i64 %iv.next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i32 %niter.next.4, 1 -; EPILOG-NEXT: br label %latch.6 +; EPILOG-NEXT: br i1 true, label %latch.6, label %innerexit.loopexit ; EPILOG: latch.6: ; EPILOG-NEXT: %iv.next.6 = add nuw nsw i64 %iv.next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i32 %niter.next.5, 1 -; EPILOG-NEXT: br label %latch.7 +; EPILOG-NEXT: br i1 true, label %latch.7, label %innerexit.loopexit ; EPILOG: latch.7: ; EPILOG-NEXT: %iv.next.7 = add nuw nsw i64 %iv.next.6, 1 ; EPILOG-NEXT: %niter.next.7 = add i32 %niter.next.6, 1 @@ -5006,10 +5006,10 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; EPILOG-BLOCK-NEXT: %niter = phi i32 [ 0, %preheader.new ], [ %niter.next.1, %latch.1 ] ; EPILOG-BLOCK-NEXT: br i1 true, label %latch, label %innerexit.loopexit.loopexit ; EPILOG-BLOCK: innerexit.loopexit.loopexit: -; EPILOG-BLOCK-NEXT: %trip.lcssa.ph.ph = phi i32 [ %trip, %header ] +; EPILOG-BLOCK-NEXT: %trip.lcssa.ph.ph = phi i32 [ %trip, %latch ], [ %trip, %header ] ; EPILOG-BLOCK-NEXT: br label %innerexit.loopexit ; EPILOG-BLOCK: innerexit.loopexit.loopexit6: -; EPILOG-BLOCK-NEXT: %trip.lcssa.ph.ph7 = phi i32 [ %trip.1, %header.1 ] +; EPILOG-BLOCK-NEXT: %trip.lcssa.ph.ph7 = phi i32 [ %trip.1, %latch.15 ], [ %trip.1, %header.1 ] ; EPILOG-BLOCK-NEXT: br label %innerexit.loopexit ; EPILOG-BLOCK: innerexit.loopexit: ; EPILOG-BLOCK-NEXT: %trip.lcssa.ph = phi i32 [ %trip.lcssa.ph.ph, %innerexit.loopexit.loopexit ], [ %trip.lcssa.ph.ph7, %innerexit.loopexit.loopexit6 ] @@ -5024,7 +5024,7 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; EPILOG-BLOCK: latch: ; EPILOG-BLOCK-NEXT: %iv.next = add nuw nsw i64 %phi, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i32 %niter, 1 -; EPILOG-BLOCK-NEXT: br label %latch.1 +; EPILOG-BLOCK-NEXT: br i1 true, label %latch.1, label %innerexit.loopexit.loopexit ; EPILOG-BLOCK: latch.1: ; EPILOG-BLOCK-NEXT: %iv.next.1 = add nuw nsw i64 %iv.next, 1 ; EPILOG-BLOCK-NEXT: %niter.next.1 = add i32 %niter.next, 1 @@ -5061,7 +5061,7 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; EPILOG-BLOCK: latch.15: ; EPILOG-BLOCK-NEXT: %iv.next.13 = add nuw nsw i64 %phi.1, 1 ; EPILOG-BLOCK-NEXT: %niter.next.14 = add nuw nsw i32 %niter.1, 1 -; EPILOG-BLOCK-NEXT: br label %latch.1.1 +; EPILOG-BLOCK-NEXT: br i1 true, label %latch.1.1, label %innerexit.loopexit.loopexit6 ; EPILOG-BLOCK: latch.1.1: ; EPILOG-BLOCK-NEXT: %iv.next.1.1 = add nuw nsw i64 %iv.next.13, 1 ; EPILOG-BLOCK-NEXT: %niter.next.1.1 = add i32 %niter.next.14, 1 @@ -5124,7 +5124,7 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; PROLOG-NEXT: %phi = phi i64 [ %phi.unr, %preheader.new ], [ %iv.next.7, %latch.7 ] ; PROLOG-NEXT: br i1 true, label %latch, label %innerexit.loopexit ; PROLOG: innerexit.loopexit: -; PROLOG-NEXT: %trip.lcssa.ph = phi i32 [ %trip, %header ] +; PROLOG-NEXT: %trip.lcssa.ph = phi i32 [ %trip, %header ], [ %trip, %latch ], [ %trip, %latch.1 ], [ %trip, %latch.2 ], [ %trip, %latch.3 ], [ %trip, %latch.4 ], [ %trip, %latch.5 ], [ %trip, %latch.6 ] ; PROLOG-NEXT: br label %innerexit ; PROLOG: innerexit.loopexit1: ; PROLOG-NEXT: %trip.lcssa.ph2 = phi i32 [ %trip, %header.prol ] @@ -5135,26 +5135,26 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; PROLOG-NEXT: ret i8 addrspace(1)* %i9 ; PROLOG: latch: ; PROLOG-NEXT: %iv.next = add nuw nsw i64 %phi, 1 -; PROLOG-NEXT: br label %latch.1 +; PROLOG-NEXT: br i1 true, label %latch.1, label %innerexit.loopexit ; PROLOG: latch.1: ; PROLOG-NEXT: %iv.next.1 = add nuw nsw i64 %iv.next, 1 -; PROLOG-NEXT: br label %latch.2 +; PROLOG-NEXT: br i1 true, label %latch.2, label %innerexit.loopexit ; PROLOG: latch.2: ; PROLOG-NEXT: %iv.next.2 = add nuw nsw i64 %iv.next.1, 1 -; PROLOG-NEXT: br label %latch.3 +; PROLOG-NEXT: br i1 true, label %latch.3, label %innerexit.loopexit ; PROLOG: latch.3: ; PROLOG-NEXT: %iv.next.3 = add nuw nsw i64 %iv.next.2, 1 -; PROLOG-NEXT: br label %latch.4 +; PROLOG-NEXT: br i1 true, label %latch.4, label %innerexit.loopexit ; PROLOG: latch.4: ; PROLOG-NEXT: %iv.next.4 = add nuw nsw i64 %iv.next.3, 1 -; PROLOG-NEXT: br label %latch.5 +; PROLOG-NEXT: br i1 true, label %latch.5, label %innerexit.loopexit ; PROLOG: latch.5: ; PROLOG-NEXT: %iv.next.5 = add nuw nsw i64 %iv.next.4, 1 -; PROLOG-NEXT: br label %latch.6 +; PROLOG-NEXT: br i1 true, label %latch.6, label %innerexit.loopexit ; PROLOG: latch.6: ; PROLOG-NEXT: %iv.next.6 = add nuw nsw i64 %iv.next.5, 1 ; PROLOG-NEXT: %i7.7 = trunc i64 %iv.next.6 to i32 -; PROLOG-NEXT: br label %latch.7 +; PROLOG-NEXT: br i1 true, label %latch.7, label %innerexit.loopexit ; PROLOG: latch.7: ; PROLOG-NEXT: %i11.7 = add nsw i32 %i7.7, 1 ; PROLOG-NEXT: %innercnd.7 = icmp slt i32 %i11.7, %trip @@ -5196,10 +5196,10 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; PROLOG-BLOCK-NEXT: %phi = phi i64 [ %phi.unr, %preheader.new ], [ %iv.next.1, %latch.1 ] ; PROLOG-BLOCK-NEXT: br i1 true, label %latch, label %innerexit.loopexit.loopexit ; PROLOG-BLOCK: innerexit.loopexit.loopexit: -; PROLOG-BLOCK-NEXT: %trip.lcssa.ph.ph = phi i32 [ %trip, %header ] +; PROLOG-BLOCK-NEXT: %trip.lcssa.ph.ph = phi i32 [ %trip, %latch ], [ %trip, %header ] ; PROLOG-BLOCK-NEXT: br label %innerexit.loopexit ; PROLOG-BLOCK: innerexit.loopexit.loopexit5: -; PROLOG-BLOCK-NEXT: %trip.lcssa.ph.ph6 = phi i32 [ %trip.1, %header.1 ] +; PROLOG-BLOCK-NEXT: %trip.lcssa.ph.ph6 = phi i32 [ %trip.1, %latch.14 ], [ %trip.1, %header.1 ] ; PROLOG-BLOCK-NEXT: br label %innerexit.loopexit ; PROLOG-BLOCK: innerexit.loopexit: ; PROLOG-BLOCK-NEXT: %trip.lcssa.ph = phi i32 [ %trip.lcssa.ph.ph, %innerexit.loopexit.loopexit ], [ %trip.lcssa.ph.ph6, %innerexit.loopexit.loopexit5 ] @@ -5214,7 +5214,7 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; PROLOG-BLOCK: latch: ; PROLOG-BLOCK-NEXT: %iv.next = add nuw nsw i64 %phi, 1 ; PROLOG-BLOCK-NEXT: %i7.1 = trunc i64 %iv.next to i32 -; PROLOG-BLOCK-NEXT: br label %latch.1 +; PROLOG-BLOCK-NEXT: br i1 true, label %latch.1, label %innerexit.loopexit.loopexit ; PROLOG-BLOCK: latch.1: ; PROLOG-BLOCK-NEXT: %i11.1 = add nsw i32 %i7.1, 1 ; PROLOG-BLOCK-NEXT: %innercnd.1 = icmp slt i32 %i11.1, %trip @@ -5250,7 +5250,7 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) { ; PROLOG-BLOCK: latch.14: ; PROLOG-BLOCK-NEXT: %iv.next.13 = add nuw nsw i64 %phi.1, 1 ; PROLOG-BLOCK-NEXT: %i7.1.1 = trunc i64 %iv.next.13 to i32 -; PROLOG-BLOCK-NEXT: br label %latch.1.1 +; PROLOG-BLOCK-NEXT: br i1 true, label %latch.1.1, label %innerexit.loopexit.loopexit5 ; PROLOG-BLOCK: latch.1.1: ; PROLOG-BLOCK-NEXT: %i11.1.1 = add nsw i32 %i7.1.1, 1 ; PROLOG-BLOCK-NEXT: %innercnd.1.1 = icmp slt i32 %i11.1.1, %trip.1 @@ -5611,37 +5611,37 @@ define void @test11(i64 %trip, i1 %cond) { ; EPILOG-NEXT: %iv_next = add nuw nsw i64 %iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br label %loop_latch.1 +; EPILOG-NEXT: br i1 %cond, label %loop_latch.1, label %exit1.loopexit ; EPILOG: loop_latch.1: ; EPILOG-NEXT: %iv_next.1 = add nuw nsw i64 %iv_next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br label %loop_latch.2 +; EPILOG-NEXT: br i1 %cond, label %loop_latch.2, label %exit1.loopexit ; EPILOG: loop_latch.2: ; EPILOG-NEXT: %iv_next.2 = add nuw nsw i64 %iv_next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br label %loop_latch.3 +; EPILOG-NEXT: br i1 %cond, label %loop_latch.3, label %exit1.loopexit ; EPILOG: loop_latch.3: ; EPILOG-NEXT: %iv_next.3 = add nuw nsw i64 %iv_next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br label %loop_latch.4 +; EPILOG-NEXT: br i1 %cond, label %loop_latch.4, label %exit1.loopexit ; EPILOG: loop_latch.4: ; EPILOG-NEXT: %iv_next.4 = add nuw nsw i64 %iv_next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br label %loop_latch.5 +; EPILOG-NEXT: br i1 %cond, label %loop_latch.5, label %exit1.loopexit ; EPILOG: loop_latch.5: ; EPILOG-NEXT: %iv_next.5 = add nuw nsw i64 %iv_next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br label %loop_latch.6 +; EPILOG-NEXT: br i1 %cond, label %loop_latch.6, label %exit1.loopexit ; EPILOG: loop_latch.6: ; EPILOG-NEXT: %iv_next.6 = add nuw nsw i64 %iv_next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br label %loop_latch.7 +; EPILOG-NEXT: br i1 %cond, label %loop_latch.7, label %exit1.loopexit ; EPILOG: loop_latch.7: ; EPILOG-NEXT: %iv_next.7 = add i64 %iv_next.6, 1 ; EPILOG-NEXT: %niter.next.7 = add i64 %niter.next.6, 1 @@ -5696,7 +5696,7 @@ define void @test11(i64 %trip, i1 %cond) { ; EPILOG-BLOCK-NEXT: %iv_next = add nuw nsw i64 %iv, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 ; EPILOG-BLOCK-NEXT: call void @bar() -; EPILOG-BLOCK-NEXT: br label %loop_latch.1 +; EPILOG-BLOCK-NEXT: br i1 %cond, label %loop_latch.1, label %exit1.loopexit ; EPILOG-BLOCK: loop_latch.1: ; EPILOG-BLOCK-NEXT: %iv_next.1 = add i64 %iv_next, 1 ; EPILOG-BLOCK-NEXT: %niter.next.1 = add i64 %niter.next, 1 @@ -5756,31 +5756,31 @@ define void @test11(i64 %trip, i1 %cond) { ; PROLOG: loop_latch: ; PROLOG-NEXT: %iv_next = add i64 %iv, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br label %loop_latch.1 +; PROLOG-NEXT: br i1 %cond, label %loop_latch.1, label %exit1.loopexit ; PROLOG: loop_latch.1: ; PROLOG-NEXT: %iv_next.1 = add i64 %iv_next, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br label %loop_latch.2 +; PROLOG-NEXT: br i1 %cond, label %loop_latch.2, label %exit1.loopexit ; PROLOG: loop_latch.2: ; PROLOG-NEXT: %iv_next.2 = add i64 %iv_next.1, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br label %loop_latch.3 +; PROLOG-NEXT: br i1 %cond, label %loop_latch.3, label %exit1.loopexit ; PROLOG: loop_latch.3: ; PROLOG-NEXT: %iv_next.3 = add i64 %iv_next.2, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br label %loop_latch.4 +; PROLOG-NEXT: br i1 %cond, label %loop_latch.4, label %exit1.loopexit ; PROLOG: loop_latch.4: ; PROLOG-NEXT: %iv_next.4 = add i64 %iv_next.3, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br label %loop_latch.5 +; PROLOG-NEXT: br i1 %cond, label %loop_latch.5, label %exit1.loopexit ; PROLOG: loop_latch.5: ; PROLOG-NEXT: %iv_next.5 = add i64 %iv_next.4, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br label %loop_latch.6 +; PROLOG-NEXT: br i1 %cond, label %loop_latch.6, label %exit1.loopexit ; PROLOG: loop_latch.6: ; PROLOG-NEXT: %iv_next.6 = add i64 %iv_next.5, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br label %loop_latch.7 +; PROLOG-NEXT: br i1 %cond, label %loop_latch.7, label %exit1.loopexit ; PROLOG: loop_latch.7: ; PROLOG-NEXT: %iv_next.7 = add i64 %iv_next.6, 1 ; PROLOG-NEXT: %cmp.7 = icmp ne i64 %iv_next.7, %trip @@ -5822,7 +5822,7 @@ define void @test11(i64 %trip, i1 %cond) { ; PROLOG-BLOCK: loop_latch: ; PROLOG-BLOCK-NEXT: %iv_next = add i64 %iv, 1 ; PROLOG-BLOCK-NEXT: call void @bar() -; PROLOG-BLOCK-NEXT: br label %loop_latch.1 +; PROLOG-BLOCK-NEXT: br i1 %cond, label %loop_latch.1, label %exit1.loopexit ; PROLOG-BLOCK: loop_latch.1: ; PROLOG-BLOCK-NEXT: %iv_next.1 = add i64 %iv_next, 1 ; PROLOG-BLOCK-NEXT: %cmp.1 = icmp ne i64 %iv_next.1, %trip @@ -5883,7 +5883,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; EPILOG-NEXT: %cmp_early.1 = icmp ne i64 %iv_next, %trip2 ; EPILOG-NEXT: br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.1: -; EPILOG-NEXT: br label %loop_latch.1 +; EPILOG-NEXT: br i1 %cond, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_latch.1: ; EPILOG-NEXT: %iv_next.1 = add nuw nsw i64 %iv_next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 @@ -5891,7 +5891,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; EPILOG-NEXT: %cmp_early.2 = icmp ne i64 %iv_next.1, %trip2 ; EPILOG-NEXT: br i1 %cmp_early.2, label %loop_exiting_bb2.2, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.2: -; EPILOG-NEXT: br label %loop_latch.2 +; EPILOG-NEXT: br i1 %cond, label %loop_latch.2, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_latch.2: ; EPILOG-NEXT: %iv_next.2 = add nuw nsw i64 %iv_next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 @@ -5899,7 +5899,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; EPILOG-NEXT: %cmp_early.3 = icmp ne i64 %iv_next.2, %trip2 ; EPILOG-NEXT: br i1 %cmp_early.3, label %loop_exiting_bb2.3, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.3: -; EPILOG-NEXT: br label %loop_latch.3 +; EPILOG-NEXT: br i1 %cond, label %loop_latch.3, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_latch.3: ; EPILOG-NEXT: %iv_next.3 = add nuw nsw i64 %iv_next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 @@ -5907,7 +5907,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; EPILOG-NEXT: %cmp_early.4 = icmp ne i64 %iv_next.3, %trip2 ; EPILOG-NEXT: br i1 %cmp_early.4, label %loop_exiting_bb2.4, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.4: -; EPILOG-NEXT: br label %loop_latch.4 +; EPILOG-NEXT: br i1 %cond, label %loop_latch.4, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_latch.4: ; EPILOG-NEXT: %iv_next.4 = add nuw nsw i64 %iv_next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 @@ -5915,7 +5915,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; EPILOG-NEXT: %cmp_early.5 = icmp ne i64 %iv_next.4, %trip2 ; EPILOG-NEXT: br i1 %cmp_early.5, label %loop_exiting_bb2.5, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.5: -; EPILOG-NEXT: br label %loop_latch.5 +; EPILOG-NEXT: br i1 %cond, label %loop_latch.5, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_latch.5: ; EPILOG-NEXT: %iv_next.5 = add nuw nsw i64 %iv_next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 @@ -5923,7 +5923,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; EPILOG-NEXT: %cmp_early.6 = icmp ne i64 %iv_next.5, %trip2 ; EPILOG-NEXT: br i1 %cmp_early.6, label %loop_exiting_bb2.6, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.6: -; EPILOG-NEXT: br label %loop_latch.6 +; EPILOG-NEXT: br i1 %cond, label %loop_latch.6, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_latch.6: ; EPILOG-NEXT: %iv_next.6 = add nuw nsw i64 %iv_next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 @@ -5931,7 +5931,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; EPILOG-NEXT: %cmp_early.7 = icmp ne i64 %iv_next.6, %trip2 ; EPILOG-NEXT: br i1 %cmp_early.7, label %loop_exiting_bb2.7, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.7: -; EPILOG-NEXT: br label %loop_latch.7 +; EPILOG-NEXT: br i1 %cond, label %loop_latch.7, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_latch.7: ; EPILOG-NEXT: %iv_next.7 = add i64 %iv_next.6, 1 ; EPILOG-NEXT: %niter.next.7 = add i64 %niter.next.6, 1 @@ -5993,7 +5993,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; EPILOG-BLOCK-NEXT: %cmp_early.1 = icmp ne i64 %iv_next, %trip2 ; EPILOG-BLOCK-NEXT: br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit ; EPILOG-BLOCK: loop_exiting_bb2.1: -; EPILOG-BLOCK-NEXT: br label %loop_latch.1 +; EPILOG-BLOCK-NEXT: br i1 %cond, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit ; EPILOG-BLOCK: loop_latch.1: ; EPILOG-BLOCK-NEXT: %iv_next.1 = add i64 %iv_next, 1 ; EPILOG-BLOCK-NEXT: %niter.next.1 = add i64 %niter.next, 1 @@ -6067,49 +6067,49 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; PROLOG-NEXT: %cmp_early.1 = icmp ne i64 %iv_next, %trip2 ; PROLOG-NEXT: br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.1: -; PROLOG-NEXT: br label %loop_latch.1 +; PROLOG-NEXT: br i1 %cond, label %loop_latch.1, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.1: ; PROLOG-NEXT: %iv_next.1 = add i64 %iv_next, 1 ; PROLOG-NEXT: call void @bar() ; PROLOG-NEXT: %cmp_early.2 = icmp ne i64 %iv_next.1, %trip2 ; PROLOG-NEXT: br i1 %cmp_early.2, label %loop_exiting_bb2.2, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.2: -; PROLOG-NEXT: br label %loop_latch.2 +; PROLOG-NEXT: br i1 %cond, label %loop_latch.2, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.2: ; PROLOG-NEXT: %iv_next.2 = add i64 %iv_next.1, 1 ; PROLOG-NEXT: call void @bar() ; PROLOG-NEXT: %cmp_early.3 = icmp ne i64 %iv_next.2, %trip2 ; PROLOG-NEXT: br i1 %cmp_early.3, label %loop_exiting_bb2.3, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.3: -; PROLOG-NEXT: br label %loop_latch.3 +; PROLOG-NEXT: br i1 %cond, label %loop_latch.3, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.3: ; PROLOG-NEXT: %iv_next.3 = add i64 %iv_next.2, 1 ; PROLOG-NEXT: call void @bar() ; PROLOG-NEXT: %cmp_early.4 = icmp ne i64 %iv_next.3, %trip2 ; PROLOG-NEXT: br i1 %cmp_early.4, label %loop_exiting_bb2.4, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.4: -; PROLOG-NEXT: br label %loop_latch.4 +; PROLOG-NEXT: br i1 %cond, label %loop_latch.4, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.4: ; PROLOG-NEXT: %iv_next.4 = add i64 %iv_next.3, 1 ; PROLOG-NEXT: call void @bar() ; PROLOG-NEXT: %cmp_early.5 = icmp ne i64 %iv_next.4, %trip2 ; PROLOG-NEXT: br i1 %cmp_early.5, label %loop_exiting_bb2.5, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.5: -; PROLOG-NEXT: br label %loop_latch.5 +; PROLOG-NEXT: br i1 %cond, label %loop_latch.5, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.5: ; PROLOG-NEXT: %iv_next.5 = add i64 %iv_next.4, 1 ; PROLOG-NEXT: call void @bar() ; PROLOG-NEXT: %cmp_early.6 = icmp ne i64 %iv_next.5, %trip2 ; PROLOG-NEXT: br i1 %cmp_early.6, label %loop_exiting_bb2.6, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.6: -; PROLOG-NEXT: br label %loop_latch.6 +; PROLOG-NEXT: br i1 %cond, label %loop_latch.6, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.6: ; PROLOG-NEXT: %iv_next.6 = add i64 %iv_next.5, 1 ; PROLOG-NEXT: call void @bar() ; PROLOG-NEXT: %cmp_early.7 = icmp ne i64 %iv_next.6, %trip2 ; PROLOG-NEXT: br i1 %cmp_early.7, label %loop_exiting_bb2.7, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.7: -; PROLOG-NEXT: br label %loop_latch.7 +; PROLOG-NEXT: br i1 %cond, label %loop_latch.7, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.7: ; PROLOG-NEXT: %iv_next.7 = add i64 %iv_next.6, 1 ; PROLOG-NEXT: %cmp.7 = icmp ne i64 %iv_next.7, %trip @@ -6158,7 +6158,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) { ; PROLOG-BLOCK-NEXT: %cmp_early.1 = icmp ne i64 %iv_next, %trip2 ; PROLOG-BLOCK-NEXT: br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.unr-lcssa.loopexit ; PROLOG-BLOCK: loop_exiting_bb2.1: -; PROLOG-BLOCK-NEXT: br label %loop_latch.1 +; PROLOG-BLOCK-NEXT: br i1 %cond, label %loop_latch.1, label %exit1.unr-lcssa.loopexit ; PROLOG-BLOCK: loop_latch.1: ; PROLOG-BLOCK-NEXT: %iv_next.1 = add i64 %iv_next, 1 ; PROLOG-BLOCK-NEXT: %cmp.1 = icmp ne i64 %iv_next.1, %trip @@ -6575,7 +6575,7 @@ define void @test14(i64 %trip, i1 %cond) { ; EPILOG-NEXT: %iv_next = add nuw nsw i64 %iv, 1 ; EPILOG-NEXT: %niter.next = add nuw nsw i64 %niter, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br label %loop_exiting_bb2.1 +; EPILOG-NEXT: br i1 %cond, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.1: ; EPILOG-NEXT: %unknown.1 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.1, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit @@ -6583,7 +6583,7 @@ define void @test14(i64 %trip, i1 %cond) { ; EPILOG-NEXT: %iv_next.1 = add nuw nsw i64 %iv_next, 1 ; EPILOG-NEXT: %niter.next.1 = add nuw nsw i64 %niter.next, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br label %loop_exiting_bb2.2 +; EPILOG-NEXT: br i1 %cond, label %loop_exiting_bb2.2, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.2: ; EPILOG-NEXT: %unknown.2 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.2, label %loop_latch.2, label %exit1.epilog-lcssa.loopexit @@ -6591,7 +6591,7 @@ define void @test14(i64 %trip, i1 %cond) { ; EPILOG-NEXT: %iv_next.2 = add nuw nsw i64 %iv_next.1, 1 ; EPILOG-NEXT: %niter.next.2 = add nuw nsw i64 %niter.next.1, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br label %loop_exiting_bb2.3 +; EPILOG-NEXT: br i1 %cond, label %loop_exiting_bb2.3, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.3: ; EPILOG-NEXT: %unknown.3 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.3, label %loop_latch.3, label %exit1.epilog-lcssa.loopexit @@ -6599,7 +6599,7 @@ define void @test14(i64 %trip, i1 %cond) { ; EPILOG-NEXT: %iv_next.3 = add nuw nsw i64 %iv_next.2, 1 ; EPILOG-NEXT: %niter.next.3 = add nuw nsw i64 %niter.next.2, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br label %loop_exiting_bb2.4 +; EPILOG-NEXT: br i1 %cond, label %loop_exiting_bb2.4, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.4: ; EPILOG-NEXT: %unknown.4 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.4, label %loop_latch.4, label %exit1.epilog-lcssa.loopexit @@ -6607,7 +6607,7 @@ define void @test14(i64 %trip, i1 %cond) { ; EPILOG-NEXT: %iv_next.4 = add nuw nsw i64 %iv_next.3, 1 ; EPILOG-NEXT: %niter.next.4 = add nuw nsw i64 %niter.next.3, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br label %loop_exiting_bb2.5 +; EPILOG-NEXT: br i1 %cond, label %loop_exiting_bb2.5, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.5: ; EPILOG-NEXT: %unknown.5 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.5, label %loop_latch.5, label %exit1.epilog-lcssa.loopexit @@ -6615,7 +6615,7 @@ define void @test14(i64 %trip, i1 %cond) { ; EPILOG-NEXT: %iv_next.5 = add nuw nsw i64 %iv_next.4, 1 ; EPILOG-NEXT: %niter.next.5 = add nuw nsw i64 %niter.next.4, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br label %loop_exiting_bb2.6 +; EPILOG-NEXT: br i1 %cond, label %loop_exiting_bb2.6, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.6: ; EPILOG-NEXT: %unknown.6 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.6, label %loop_latch.6, label %exit1.epilog-lcssa.loopexit @@ -6623,7 +6623,7 @@ define void @test14(i64 %trip, i1 %cond) { ; EPILOG-NEXT: %iv_next.6 = add nuw nsw i64 %iv_next.5, 1 ; EPILOG-NEXT: %niter.next.6 = add nuw nsw i64 %niter.next.5, 1 ; EPILOG-NEXT: call void @bar() -; EPILOG-NEXT: br label %loop_exiting_bb2.7 +; EPILOG-NEXT: br i1 %cond, label %loop_exiting_bb2.7, label %exit1.epilog-lcssa.loopexit ; EPILOG: loop_exiting_bb2.7: ; EPILOG-NEXT: %unknown.7 = call i1 @unknown_cond() ; EPILOG-NEXT: br i1 %unknown.7, label %loop_latch.7, label %exit1.epilog-lcssa.loopexit @@ -6685,7 +6685,7 @@ define void @test14(i64 %trip, i1 %cond) { ; EPILOG-BLOCK-NEXT: %iv_next = add nuw nsw i64 %iv, 1 ; EPILOG-BLOCK-NEXT: %niter.next = add nuw nsw i64 %niter, 1 ; EPILOG-BLOCK-NEXT: call void @bar() -; EPILOG-BLOCK-NEXT: br label %loop_exiting_bb2.1 +; EPILOG-BLOCK-NEXT: br i1 %cond, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit ; EPILOG-BLOCK: loop_exiting_bb2.1: ; EPILOG-BLOCK-NEXT: %unknown.1 = call i1 @unknown_cond() ; EPILOG-BLOCK-NEXT: br i1 %unknown.1, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit @@ -6757,49 +6757,49 @@ define void @test14(i64 %trip, i1 %cond) { ; PROLOG: loop_latch: ; PROLOG-NEXT: %iv_next = add i64 %iv, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br label %loop_exiting_bb2.1 +; PROLOG-NEXT: br i1 %cond, label %loop_exiting_bb2.1, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.1: ; PROLOG-NEXT: %unknown.1 = call i1 @unknown_cond() ; PROLOG-NEXT: br i1 %unknown.1, label %loop_latch.1, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.1: ; PROLOG-NEXT: %iv_next.1 = add i64 %iv_next, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br label %loop_exiting_bb2.2 +; PROLOG-NEXT: br i1 %cond, label %loop_exiting_bb2.2, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.2: ; PROLOG-NEXT: %unknown.2 = call i1 @unknown_cond() ; PROLOG-NEXT: br i1 %unknown.2, label %loop_latch.2, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.2: ; PROLOG-NEXT: %iv_next.2 = add i64 %iv_next.1, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br label %loop_exiting_bb2.3 +; PROLOG-NEXT: br i1 %cond, label %loop_exiting_bb2.3, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.3: ; PROLOG-NEXT: %unknown.3 = call i1 @unknown_cond() ; PROLOG-NEXT: br i1 %unknown.3, label %loop_latch.3, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.3: ; PROLOG-NEXT: %iv_next.3 = add i64 %iv_next.2, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br label %loop_exiting_bb2.4 +; PROLOG-NEXT: br i1 %cond, label %loop_exiting_bb2.4, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.4: ; PROLOG-NEXT: %unknown.4 = call i1 @unknown_cond() ; PROLOG-NEXT: br i1 %unknown.4, label %loop_latch.4, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.4: ; PROLOG-NEXT: %iv_next.4 = add i64 %iv_next.3, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br label %loop_exiting_bb2.5 +; PROLOG-NEXT: br i1 %cond, label %loop_exiting_bb2.5, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.5: ; PROLOG-NEXT: %unknown.5 = call i1 @unknown_cond() ; PROLOG-NEXT: br i1 %unknown.5, label %loop_latch.5, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.5: ; PROLOG-NEXT: %iv_next.5 = add i64 %iv_next.4, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br label %loop_exiting_bb2.6 +; PROLOG-NEXT: br i1 %cond, label %loop_exiting_bb2.6, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.6: ; PROLOG-NEXT: %unknown.6 = call i1 @unknown_cond() ; PROLOG-NEXT: br i1 %unknown.6, label %loop_latch.6, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_latch.6: ; PROLOG-NEXT: %iv_next.6 = add i64 %iv_next.5, 1 ; PROLOG-NEXT: call void @bar() -; PROLOG-NEXT: br label %loop_exiting_bb2.7 +; PROLOG-NEXT: br i1 %cond, label %loop_exiting_bb2.7, label %exit1.unr-lcssa.loopexit ; PROLOG: loop_exiting_bb2.7: ; PROLOG-NEXT: %unknown.7 = call i1 @unknown_cond() ; PROLOG-NEXT: br i1 %unknown.7, label %loop_latch.7, label %exit1.unr-lcssa.loopexit @@ -6848,7 +6848,7 @@ define void @test14(i64 %trip, i1 %cond) { ; PROLOG-BLOCK: loop_latch: ; PROLOG-BLOCK-NEXT: %iv_next = add i64 %iv, 1 ; PROLOG-BLOCK-NEXT: call void @bar() -; PROLOG-BLOCK-NEXT: br label %loop_exiting_bb2.1 +; PROLOG-BLOCK-NEXT: br i1 %cond, label %loop_exiting_bb2.1, label %exit1.unr-lcssa.loopexit ; PROLOG-BLOCK: loop_exiting_bb2.1: ; PROLOG-BLOCK-NEXT: %unknown.1 = call i1 @unknown_cond() ; PROLOG-BLOCK-NEXT: br i1 %unknown.1, label %loop_latch.1, label %exit1.unr-lcssa.loopexit diff --git a/llvm/test/Transforms/LoopUnroll/scevunroll.ll b/llvm/test/Transforms/LoopUnroll/scevunroll.ll index 7f395e700e10..70c8d29e4a82 100644 --- a/llvm/test/Transforms/LoopUnroll/scevunroll.ll +++ b/llvm/test/Transforms/LoopUnroll/scevunroll.ll @@ -213,7 +213,7 @@ define i32 @multiExitIncomplete(i32* %base) nounwind { ; CHECK: l1.5: ; CHECK-NEXT: br i1 false, label [[L2_5:%.*]], label [[EXIT1:%.*]] ; CHECK: l2.5: -; CHECK-NEXT: br label [[L3_5:%.*]] +; CHECK-NEXT: br i1 true, label [[L3_5:%.*]], label [[EXIT2:%.*]] ; CHECK: l3.5: ; CHECK-NEXT: br label [[EXIT3]] ; CHECK: exit1: @@ -316,10 +316,10 @@ define void @nsw_latch(i32* %a) nounwind { ; CHECK: for.body.1: ; CHECK-NEXT: br i1 false, label [[FOR_COND_1:%.*]], label [[RETURN]] ; CHECK: for.cond.1: -; CHECK-NEXT: br label [[FOR_BODY]] +; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: -; CHECK-NEXT: [[B_03_LCSSA:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 8, [[FOR_BODY_1]] ] -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY_1]] ] +; CHECK-NEXT: [[B_03_LCSSA:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 8, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1]] ] ; CHECK-NEXT: store i32 [[B_03_LCSSA]], i32* [[A:%.*]], align 4 ; CHECK-NEXT: ret void ; From cfe3180742adfc72ad6f5de55cbfc84befb90c97 Mon Sep 17 00:00:00 2001 From: "G. Pery" Date: Mon, 3 Jan 2022 20:55:13 +0100 Subject: [PATCH 451/992] [clang-format] Add penalty for breaking after '(' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My team has a vendetta against lines ending with an open parenthesis, thought it might be useful for others too 😊 Reviewed By: HazardyKnusperkeks, curdeius Differential Revision: https://reviews.llvm.org/D116170 --- clang/docs/ClangFormatStyleOptions.rst | 3 ++ clang/include/clang/Format/Format.h | 5 +++ clang/lib/Format/Format.cpp | 3 ++ clang/lib/Format/TokenAnnotator.cpp | 2 + clang/unittests/Format/FormatTest.cpp | 62 ++++++++++++++++++++++++++ 5 files changed, 75 insertions(+) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 07c77acb8481..777398f460e0 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -3198,6 +3198,9 @@ the configuration (without a prefix: ``Auto``). **PenaltyBreakTemplateDeclaration** (``Unsigned``) :versionbadge:`clang-format 7` The penalty for breaking after template declaration. +**PenaltyBreakOpenParenthesis** (``Unsigned``) :versionbadge:`clang-format 14` + The penalty for breaking after ``(``. + **PenaltyExcessCharacter** (``Unsigned``) :versionbadge:`clang-format 3.7` The penalty for each character outside of the column limit. diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index dbc406417ba1..5044158a2015 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -2887,6 +2887,10 @@ struct FormatStyle { /// \version 3.7 unsigned PenaltyBreakFirstLessLess; + /// The penalty for breaking after ``(``. + /// \version 14 + unsigned PenaltyBreakOpenParenthesis; + /// The penalty for each line break introduced inside a string literal. /// \version 3.7 unsigned PenaltyBreakString; @@ -3781,6 +3785,7 @@ struct FormatStyle { R.PenaltyBreakBeforeFirstCallParameter && PenaltyBreakComment == R.PenaltyBreakComment && PenaltyBreakFirstLessLess == R.PenaltyBreakFirstLessLess && + PenaltyBreakOpenParenthesis == R.PenaltyBreakOpenParenthesis && PenaltyBreakString == R.PenaltyBreakString && PenaltyExcessCharacter == R.PenaltyExcessCharacter && PenaltyReturnTypeOnItsOwnLine == R.PenaltyReturnTypeOnItsOwnLine && diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 0ae9fa60d337..fdccb8b15e82 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -756,6 +756,8 @@ template <> struct MappingTraits { IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); IO.mapOptional("PenaltyBreakFirstLessLess", Style.PenaltyBreakFirstLessLess); + IO.mapOptional("PenaltyBreakOpenParenthesis", + Style.PenaltyBreakOpenParenthesis); IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString); IO.mapOptional("PenaltyBreakTemplateDeclaration", Style.PenaltyBreakTemplateDeclaration); @@ -1232,6 +1234,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.PenaltyExcessCharacter = 1000000; LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19; + LLVMStyle.PenaltyBreakOpenParenthesis = 0; LLVMStyle.PenaltyBreakTemplateDeclaration = prec::Relational; LLVMStyle.PenaltyIndentedWhitespace = 0; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 914997a54989..a161ee87e6b5 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -2857,6 +2857,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, Left.Previous->isOneOf(tok::identifier, tok::greater)) return 500; + if (Left.is(tok::l_paren) && Style.PenaltyBreakOpenParenthesis != 0) + return Style.PenaltyBreakOpenParenthesis; if (Left.is(tok::l_paren) && InFunctionDecl && Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) return 100; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 470b0c7a19e5..49635f3f15ea 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -18524,6 +18524,66 @@ TEST_F(FormatTest, OptimizeBreakPenaltyVsExcess) { format("int a = /* long block comment */ 42;", Style)); } +TEST_F(FormatTest, BreakPenaltyAfterLParen) { + FormatStyle Style = getLLVMStyle(); + Style.ColumnLimit = 8; + Style.PenaltyExcessCharacter = 15; + verifyFormat("int foo(\n" + " int aaaaaaaaaaaaaaaaaaaaaaaa);", + Style); + Style.PenaltyBreakOpenParenthesis = 200; + EXPECT_EQ("int foo(int aaaaaaaaaaaaaaaaaaaaaaaa);", + format("int foo(\n" + " int aaaaaaaaaaaaaaaaaaaaaaaa);", + Style)); +} + +TEST_F(FormatTest, BreakPenaltyAfterCastLParen) { + FormatStyle Style = getLLVMStyle(); + Style.ColumnLimit = 5; + Style.PenaltyExcessCharacter = 150; + verifyFormat("foo((\n" + " int)aaaaaaaaaaaaaaaaaaaaaaaa);", + + Style); + Style.PenaltyBreakOpenParenthesis = 100000; + EXPECT_EQ("foo((int)\n" + " aaaaaaaaaaaaaaaaaaaaaaaa);", + format("foo((\n" + "int)aaaaaaaaaaaaaaaaaaaaaaaa);", + Style)); +} + +TEST_F(FormatTest, BreakPenaltyAfterForLoopLParen) { + FormatStyle Style = getLLVMStyle(); + Style.ColumnLimit = 4; + Style.PenaltyExcessCharacter = 100; + verifyFormat("for (\n" + " int iiiiiiiiiiiiiiiii =\n" + " 0;\n" + " iiiiiiiiiiiiiiiii <\n" + " 2;\n" + " iiiiiiiiiiiiiiiii++) {\n" + "}", + + Style); + Style.PenaltyBreakOpenParenthesis = 1250; + EXPECT_EQ("for (int iiiiiiiiiiiiiiiii =\n" + " 0;\n" + " iiiiiiiiiiiiiiiii <\n" + " 2;\n" + " iiiiiiiiiiiiiiiii++) {\n" + "}", + format("for (\n" + " int iiiiiiiiiiiiiiiii =\n" + " 0;\n" + " iiiiiiiiiiiiiiiii <\n" + " 2;\n" + " iiiiiiiiiiiiiiiii++) {\n" + "}", + Style)); +} + #define EXPECT_ALL_STYLES_EQUAL(Styles) \ for (size_t i = 1; i < Styles.size(); ++i) \ EXPECT_EQ(Styles[0], Styles[i]) \ @@ -18729,6 +18789,8 @@ TEST_F(FormatTest, ParsesConfiguration) { PenaltyBreakBeforeFirstCallParameter, 1234u); CHECK_PARSE("PenaltyBreakTemplateDeclaration: 1234", PenaltyBreakTemplateDeclaration, 1234u); + CHECK_PARSE("PenaltyBreakOpenParenthesis: 1234", PenaltyBreakOpenParenthesis, + 1234u); CHECK_PARSE("PenaltyExcessCharacter: 1234", PenaltyExcessCharacter, 1234u); CHECK_PARSE("PenaltyReturnTypeOnItsOwnLine: 1234", PenaltyReturnTypeOnItsOwnLine, 1234u); From 6f6f88ffdae1e12e5f950ef418827a77a55c09c7 Mon Sep 17 00:00:00 2001 From: ksyx <18738953+ksyx@users.noreply.github.com> Date: Tue, 28 Dec 2021 10:01:26 -0500 Subject: [PATCH 452/992] [clang-format] Style to separate definition blocks This commit resolves GitHub issue #45895 (Bugzilla #46550), to add or remove empty line between definition blocks including namespaces, classes, structs, enums and functions. Reviewed By: MyDeveloperDay, curdeius, HazardyKnusperkeks Differential Revision: https://reviews.llvm.org/D116314 --- clang/docs/ClangFormatStyleOptions.rst | 59 ++++ clang/docs/ReleaseNotes.rst | 4 + clang/include/clang/Format/Format.h | 68 ++++ clang/lib/Format/CMakeLists.txt | 1 + clang/lib/Format/DefinitionBlockSeparator.cpp | 157 +++++++++ clang/lib/Format/DefinitionBlockSeparator.h | 41 +++ clang/lib/Format/Format.cpp | 35 +- clang/lib/Format/WhitespaceManager.cpp | 6 + clang/lib/Format/WhitespaceManager.h | 3 + clang/unittests/Format/CMakeLists.txt | 1 + .../Format/DefinitionBlockSeparatorTest.cpp | 309 ++++++++++++++++++ 11 files changed, 677 insertions(+), 7 deletions(-) create mode 100644 clang/lib/Format/DefinitionBlockSeparator.cpp create mode 100644 clang/lib/Format/DefinitionBlockSeparator.h create mode 100644 clang/unittests/Format/DefinitionBlockSeparatorTest.cpp diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 777398f460e0..4f3a9eb9f4a6 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -3402,6 +3402,65 @@ the configuration (without a prefix: ``Auto``). /* second veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongComment with plenty of * information */ +**SeparateDefinitionBlocks** (``SeparateDefinitionStyle``) :versionbadge:`clang-format 14` + Specifies the use of empty lines to separate definition blocks, including classes, + structs, enums, and functions. + + Possible values: + + * ``SDS_Leave`` (in configuration: ``Leave``) + Leave definition blocks as they are. + + * ``SDS_Always`` (in configuration: ``Always``) + Insert an empty line between definition blocks. + + * ``SDS_Never`` (in configuration: ``Never``) + Remove any empty line between definition blocks. + + .. code-block:: c++ + + Never v.s. Always + #include #include + struct Foo { + int a, b, c; struct Foo { + }; int a, b, c; + namespace Ns { }; + class Bar { + public: namespace Ns { + struct Foobar { class Bar { + int a; public: + int b; struct Foobar { + }; int a; + private: int b; + int t; }; + int method1() { + // ... private: + } int t; + enum List { + ITEM1, int method1() { + ITEM2 // ... + }; } + template + int method2(T x) { enum List { + // ... ITEM1, + } ITEM2 + int i, j, k; }; + int method3(int par) { + // ... template + } int method2(T x) { + }; // ... + class C {}; } + } + int i, j, k; + + int method3(int par) { + // ... + } + }; + + class C {}; + } + **ShortNamespaceLines** (``Unsigned``) :versionbadge:`clang-format 14` The maximal number of unwrapped lines that a short namespace spans. Defaults to 1. diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 20e7e6cc26ce..2f48b1424d09 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -297,6 +297,10 @@ clang-format `const` `volatile` `static` `inline` `constexpr` `restrict` to be controlled relative to the `type`. +- Option ``SeparateDefinitionBlocks`` has been added to insert or remove empty + lines between definition blocks including functions, classes, structs, enums, + and namespaces. + - Add a ``Custom`` style to ``SpaceBeforeParens``, to better configure the space before parentheses. The custom options can be set using ``SpaceBeforeParensOptions``. diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 5044158a2015..24c245642e6a 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -3054,6 +3054,63 @@ struct FormatStyle { bool ReflowComments; // clang-format on + enum SeparateDefinitionStyle { + /// Leave definition blocks as they are. + SDS_Leave, + /// Insert an empty line between definition blocks. + SDS_Always, + /// Remove any empty line between definition blocks. + SDS_Never + }; + + /// Specifies the use of empty lines to separate definition blocks, including + /// classes, structs, enums, and functions. + /// \code + /// Never v.s. Always + /// #include #include + /// struct Foo { + /// int a, b, c; struct Foo { + /// }; int a, b, c; + /// namespace Ns { }; + /// class Bar { + /// public: namespace Ns { + /// struct Foobar { class Bar { + /// int a; public: + /// int b; struct Foobar { + /// }; int a; + /// private: int b; + /// int t; }; + /// int method1() { + /// // ... private: + /// } int t; + /// enum List { + /// ITEM1, int method1() { + /// ITEM2 // ... + /// }; } + /// template + /// int method2(T x) { enum List { + /// // ... ITEM1, + /// } ITEM2 + /// int i, j, k; }; + /// int method3(int par) { + /// // ... template + /// } int method2(T x) { + /// }; // ... + /// class C {}; } + /// } + /// int i, j, k; + /// + /// int method3(int par) { + /// // ... + /// } + /// }; + /// + /// class C {}; + /// } + /// \endcode + /// \version 14 + SeparateDefinitionStyle SeparateDefinitionBlocks; + /// The maximal number of unwrapped lines that a short namespace spans. /// Defaults to 1. /// @@ -4033,6 +4090,17 @@ tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style, ArrayRef Ranges, StringRef FileName = ""); +/// Inserts or removes empty lines separating definition blocks including +/// classes, structs, functions, namespaces, and enums in the given \p Ranges in +/// \p Code. +/// +/// Returns the ``Replacements`` that inserts or removes empty lines separating +/// definition blocks in all \p Ranges in \p Code. +tooling::Replacements separateDefinitionBlocks(const FormatStyle &Style, + StringRef Code, + ArrayRef Ranges, + StringRef FileName = ""); + /// Sort consecutive using declarations in the given \p Ranges in /// \p Code. /// diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt index 4ff6a532119d..ca455157ae44 100644 --- a/clang/lib/Format/CMakeLists.txt +++ b/clang/lib/Format/CMakeLists.txt @@ -4,6 +4,7 @@ add_clang_library(clangFormat AffectedRangeManager.cpp BreakableToken.cpp ContinuationIndenter.cpp + DefinitionBlockSeparator.cpp Format.cpp FormatToken.cpp FormatTokenLexer.cpp diff --git a/clang/lib/Format/DefinitionBlockSeparator.cpp b/clang/lib/Format/DefinitionBlockSeparator.cpp new file mode 100644 index 000000000000..ba51594f3f69 --- /dev/null +++ b/clang/lib/Format/DefinitionBlockSeparator.cpp @@ -0,0 +1,157 @@ +//===--- DefinitionBlockSeparator.cpp ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements DefinitionBlockSeparator, a TokenAnalyzer that inserts +/// or removes empty lines separating definition blocks like classes, structs, +/// functions, enums, and namespaces in between. +/// +//===----------------------------------------------------------------------===// + +#include "DefinitionBlockSeparator.h" +#include "llvm/Support/Debug.h" +#define DEBUG_TYPE "definition-block-separator" + +namespace clang { +namespace format { +std::pair DefinitionBlockSeparator::analyze( + TokenAnnotator &Annotator, SmallVectorImpl &AnnotatedLines, + FormatTokenLexer &Tokens) { + assert(Style.SeparateDefinitionBlocks != FormatStyle::SDS_Leave); + AffectedRangeMgr.computeAffectedLines(AnnotatedLines); + tooling::Replacements Result; + separateBlocks(AnnotatedLines, Result); + return {Result, 0}; +} + +void DefinitionBlockSeparator::separateBlocks( + SmallVectorImpl &Lines, tooling::Replacements &Result) { + auto LikelyDefinition = [this](const AnnotatedLine *Line) { + if (Line->MightBeFunctionDecl && Line->mightBeFunctionDefinition()) + return true; + FormatToken *CurrentToken = Line->First; + while (CurrentToken) { + if (CurrentToken->isOneOf(tok::kw_class, tok::kw_struct, + tok::kw_namespace, tok::kw_enum) || + (Style.Language == FormatStyle::LK_JavaScript && + CurrentToken->TokenText == "function")) + return true; + CurrentToken = CurrentToken->Next; + } + return false; + }; + unsigned NewlineCount = + (Style.SeparateDefinitionBlocks == FormatStyle::SDS_Always ? 1 : 0) + 1; + WhitespaceManager Whitespaces( + Env.getSourceManager(), Style, + Style.DeriveLineEnding + ? WhitespaceManager::inputUsesCRLF( + Env.getSourceManager().getBufferData(Env.getFileID()), + Style.UseCRLF) + : Style.UseCRLF); + for (unsigned I = 0; I < Lines.size(); I++) { + const auto &CurrentLine = Lines[I]; + FormatToken *TargetToken = nullptr; + AnnotatedLine *TargetLine; + auto OpeningLineIndex = CurrentLine->MatchingOpeningBlockLineIndex; + const auto InsertReplacement = [&](const int NewlineToInsert) { + assert(TargetLine); + assert(TargetToken); + + // Do not handle EOF newlines. + if (TargetToken->is(tok::eof) && NewlineToInsert > 0) + return; + if (!TargetLine->Affected) + return; + Whitespaces.replaceWhitespace(*TargetToken, NewlineToInsert, + TargetToken->SpacesRequiredBefore - 1, + TargetToken->StartsColumn); + }; + const auto FollowingOtherOpening = [&]() { + return OpeningLineIndex == 0 || + Lines[OpeningLineIndex - 1]->Last->opensScope(); + }; + const auto HasEnumOnLine = [CurrentLine]() { + FormatToken *CurrentToken = CurrentLine->First; + while (CurrentToken) { + if (CurrentToken->is(tok::kw_enum)) + return true; + CurrentToken = CurrentToken->Next; + } + return false; + }; + + bool IsDefBlock = 0; + + if (HasEnumOnLine()) { + // We have no scope opening/closing information for enum. + IsDefBlock = 1; + OpeningLineIndex = I; + TargetLine = CurrentLine; + TargetToken = CurrentLine->First; + if (!FollowingOtherOpening()) + InsertReplacement(NewlineCount); + else + InsertReplacement(OpeningLineIndex != 0); + while (TargetToken && !TargetToken->is(tok::r_brace)) + TargetToken = TargetToken->Next; + if (!TargetToken) { + while (I < Lines.size() && !Lines[I]->First->is(tok::r_brace)) + I++; + } + } else if (CurrentLine->First->closesScope()) { + if (OpeningLineIndex > Lines.size()) + continue; + // Handling the case that opening bracket has its own line. + OpeningLineIndex -= Lines[OpeningLineIndex]->First->TokenText == "{"; + AnnotatedLine *OpeningLine = Lines[OpeningLineIndex]; + // Closing a function definition. + if (LikelyDefinition(OpeningLine)) { + IsDefBlock = 1; + if (OpeningLineIndex > 0) { + OpeningLineIndex -= + Style.Language == FormatStyle::LK_CSharp && + Lines[OpeningLineIndex - 1]->First->is(tok::l_square); + OpeningLine = Lines[OpeningLineIndex]; + } + TargetLine = OpeningLine; + TargetToken = TargetLine->First; + if (!FollowingOtherOpening()) { + // Avoid duplicated replacement. + if (!TargetToken->opensScope()) + InsertReplacement(NewlineCount); + } else + InsertReplacement(OpeningLineIndex != 0); + } + } + + // Not the last token. + if (IsDefBlock && I + 1 < Lines.size()) { + TargetLine = Lines[I + 1]; + TargetToken = TargetLine->First; + + // No empty line for continuously closing scopes. The token will be + // handled in another case if the line following is opening a + // definition. + if (!TargetToken->closesScope()) { + if (!LikelyDefinition(TargetLine)) + InsertReplacement(NewlineCount); + } else { + InsertReplacement(OpeningLineIndex != 0); + } + } + } + for (const auto &R : Whitespaces.generateReplacements()) + // The add method returns an Error instance which simulates program exit + // code through overloading boolean operator, thus false here indicates + // success. + if (Result.add(R)) + return; +} +} // namespace format +} // namespace clang diff --git a/clang/lib/Format/DefinitionBlockSeparator.h b/clang/lib/Format/DefinitionBlockSeparator.h new file mode 100644 index 000000000000..13b90c5ab083 --- /dev/null +++ b/clang/lib/Format/DefinitionBlockSeparator.h @@ -0,0 +1,41 @@ +//===--- DefinitionBlockSeparator.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares DefinitionBlockSeparator, a TokenAnalyzer that inserts or +/// removes empty lines separating definition blocks like classes, structs, +/// functions, enums, and namespaces in between. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_FORMAT_DEFINITIONBLOCKSEPARATOR_H +#define LLVM_CLANG_LIB_FORMAT_DEFINITIONBLOCKSEPARATOR_H + +#include "TokenAnalyzer.h" +#include "WhitespaceManager.h" + +namespace clang { +namespace format { +class DefinitionBlockSeparator : public TokenAnalyzer { +public: + DefinitionBlockSeparator(const Environment &Env, const FormatStyle &Style) + : TokenAnalyzer(Env, Style) {} + + std::pair + analyze(TokenAnnotator &Annotator, + SmallVectorImpl &AnnotatedLines, + FormatTokenLexer &Tokens) override; + +private: + void separateBlocks(SmallVectorImpl &Lines, + tooling::Replacements &Result); +}; +} // namespace format +} // namespace clang + +#endif diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index fdccb8b15e82..11c190ebfba7 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -16,6 +16,7 @@ #include "AffectedRangeManager.h" #include "BreakableToken.h" #include "ContinuationIndenter.h" +#include "DefinitionBlockSeparator.h" #include "FormatInternal.h" #include "FormatTokenLexer.h" #include "NamespaceEndCommentsFixer.h" @@ -429,6 +430,15 @@ template <> struct ScalarEnumerationTraits { } }; +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &IO, FormatStyle::SeparateDefinitionStyle &Value) { + IO.enumCase(Value, "Leave", FormatStyle::SDS_Leave); + IO.enumCase(Value, "Always", FormatStyle::SDS_Always); + IO.enumCase(Value, "Never", FormatStyle::SDS_Never); + } +}; + template <> struct ScalarEnumerationTraits { static void @@ -771,6 +781,7 @@ template <> struct MappingTraits { IO.mapOptional("RawStringFormats", Style.RawStringFormats); IO.mapOptional("ReferenceAlignment", Style.ReferenceAlignment); IO.mapOptional("ReflowComments", Style.ReflowComments); + IO.mapOptional("SeparateDefinitionBlocks", Style.SeparateDefinitionBlocks); IO.mapOptional("ShortNamespaceLines", Style.ShortNamespaceLines); IO.mapOptional("SortIncludes", Style.SortIncludes); IO.mapOptional("SortJavaStaticImport", Style.SortJavaStaticImport); @@ -1195,6 +1206,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.ObjCSpaceBeforeProtocolList = true; LLVMStyle.PointerAlignment = FormatStyle::PAS_Right; LLVMStyle.ReferenceAlignment = FormatStyle::RAS_Pointer; + LLVMStyle.SeparateDefinitionBlocks = FormatStyle::SDS_Leave; LLVMStyle.ShortNamespaceLines = 1; LLVMStyle.SpacesBeforeTrailingComments = 1; LLVMStyle.Standard = FormatStyle::LS_Latest; @@ -1843,7 +1855,7 @@ class Formatter : public TokenAnalyzer { WhitespaceManager Whitespaces( Env.getSourceManager(), Style, Style.DeriveLineEnding - ? inputUsesCRLF( + ? WhitespaceManager::inputUsesCRLF( Env.getSourceManager().getBufferData(Env.getFileID()), Style.UseCRLF) : Style.UseCRLF); @@ -1867,12 +1879,6 @@ class Formatter : public TokenAnalyzer { } private: - static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF) { - size_t LF = Text.count('\n'); - size_t CR = Text.count('\r') * 2; - return LF == CR ? DefaultToCRLF : CR > LF; - } - bool hasCpp03IncompatibleFormat(const SmallVectorImpl &Lines) { for (const AnnotatedLine *Line : Lines) { @@ -3053,6 +3059,11 @@ reformat(const FormatStyle &Style, StringRef Code, }); } + if (Style.SeparateDefinitionBlocks != FormatStyle::SDS_Leave) + Passes.emplace_back([&](const Environment &Env) { + return DefinitionBlockSeparator(Env, Expanded).process(); + }); + if (Style.isJavaScript() && Style.JavaScriptQuotes != FormatStyle::JSQS_Leave) Passes.emplace_back([&](const Environment &Env) { return JavaScriptRequoter(Env, Expanded).process(); @@ -3141,6 +3152,16 @@ tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style, return NamespaceEndCommentsFixer(*Env, Style).process().first; } +tooling::Replacements separateDefinitionBlocks(const FormatStyle &Style, + StringRef Code, + ArrayRef Ranges, + StringRef FileName) { + auto Env = Environment::make(Code, FileName, Ranges); + if (!Env) + return {}; + return DefinitionBlockSeparator(*Env, Style).process().first; +} + tooling::Replacements sortUsingDeclarations(const FormatStyle &Style, StringRef Code, ArrayRef Ranges, diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index 96a66da0f82b..f0e0247ce33e 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -74,6 +74,12 @@ WhitespaceManager::addReplacement(const tooling::Replacement &Replacement) { return Replaces.add(Replacement); } +bool WhitespaceManager::inputUsesCRLF(StringRef Text, bool DefaultToCRLF) { + size_t LF = Text.count('\n'); + size_t CR = Text.count('\r') * 2; + return LF == CR ? DefaultToCRLF : CR > LF; +} + void WhitespaceManager::replaceWhitespaceInToken( const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, diff --git a/clang/lib/Format/WhitespaceManager.h b/clang/lib/Format/WhitespaceManager.h index 029f4159b748..e6943b7d167b 100644 --- a/clang/lib/Format/WhitespaceManager.h +++ b/clang/lib/Format/WhitespaceManager.h @@ -45,6 +45,9 @@ class WhitespaceManager { bool useCRLF() const { return UseCRLF; } + /// Infers whether the input is using CRLF. + static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF); + /// Replaces the whitespace in front of \p Tok. Only call once for /// each \c AnnotatedToken. /// diff --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt index 47075807c3b0..a4ece033d607 100644 --- a/clang/unittests/Format/CMakeLists.txt +++ b/clang/unittests/Format/CMakeLists.txt @@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS add_clang_unittest(FormatTests CleanupTest.cpp + DefinitionBlockSeparatorTest.cpp FormatTest.cpp FormatTestComments.cpp FormatTestCSharp.cpp diff --git a/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp b/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp new file mode 100644 index 000000000000..91933956c174 --- /dev/null +++ b/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp @@ -0,0 +1,309 @@ +//===- DefinitionBlockSeparatorTest.cpp - Formatting unit tests -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FormatTestUtils.h" +#include "clang/Format/Format.h" + +#include "llvm/Support/Debug.h" +#include "gtest/gtest.h" + +#define DEBUG_TYPE "definition-block-separator-test" + +namespace clang { +namespace format { +namespace { + +class DefinitionBlockSeparatorTest : public ::testing::Test { +protected: + static std::string + separateDefinitionBlocks(llvm::StringRef Code, + const std::vector &Ranges, + const FormatStyle &Style = getLLVMStyle()) { + LLVM_DEBUG(llvm::errs() << "---\n"); + LLVM_DEBUG(llvm::errs() << Code << "\n\n"); + tooling::Replacements Replaces = reformat(Style, Code, Ranges, ""); + auto Result = applyAllReplacements(Code, Replaces); + EXPECT_TRUE(static_cast(Result)); + LLVM_DEBUG(llvm::errs() << "\n" << *Result << "\n\n"); + return *Result; + } + + static std::string + separateDefinitionBlocks(llvm::StringRef Code, + const FormatStyle &Style = getLLVMStyle()) { + return separateDefinitionBlocks( + Code, + /*Ranges=*/{1, tooling::Range(0, Code.size())}, Style); + } + + static void verifyFormat(llvm::StringRef Code, + const FormatStyle &Style = getLLVMStyle(), + llvm::StringRef ExpectedCode = "") { + bool HasOriginalCode = true; + if (ExpectedCode == "") { + ExpectedCode = Code; + HasOriginalCode = false; + } + + FormatStyle InverseStyle = Style; + if (Style.SeparateDefinitionBlocks == FormatStyle::SDS_Always) + InverseStyle.SeparateDefinitionBlocks = FormatStyle::SDS_Never; + else + InverseStyle.SeparateDefinitionBlocks = FormatStyle::SDS_Always; + EXPECT_EQ(ExpectedCode.str(), separateDefinitionBlocks(ExpectedCode, Style)) + << "Expected code is not stable"; + std::string InverseResult = separateDefinitionBlocks(Code, InverseStyle); + EXPECT_NE(Code.str(), InverseResult) + << "Inverse formatting makes no difference"; + std::string CodeToFormat = + HasOriginalCode ? Code.str() : removeEmptyLines(Code); + std::string Result = separateDefinitionBlocks(CodeToFormat, Style); + EXPECT_EQ(ExpectedCode.str(), Result) << "Test failed. Formatted:\n" + << Result; + } + + static std::string removeEmptyLines(llvm::StringRef Code) { + std::string Result = ""; + for (auto Char : Code.str()) { + if (Result.size()) { + auto LastChar = Result.back(); + if ((Char == '\n' && LastChar == '\n') || + (Char == '\r' && (LastChar == '\r' || LastChar == '\n'))) + continue; + } + Result.push_back(Char); + } + return Result; + } +}; + +TEST_F(DefinitionBlockSeparatorTest, Basic) { + FormatStyle Style = getLLVMStyle(); + Style.SeparateDefinitionBlocks = FormatStyle::SDS_Always; + verifyFormat("int foo(int i, int j) {\n" + " int r = i + j;\n" + " return r;\n" + "}\n" + "\n" + "int bar(int j, int k) {\n" + " int r = j + k;\n" + " return r;\n" + "}", + Style); + + verifyFormat("struct foo {\n" + " int i, j;\n" + "};\n" + "\n" + "struct bar {\n" + " int j, k;\n" + "};", + Style); + + verifyFormat("class foo {\n" + " int i, j;\n" + "};\n" + "\n" + "class bar {\n" + " int j, k;\n" + "};", + Style); + + verifyFormat("namespace foo {\n" + "int i, j;\n" + "}\n" + "\n" + "namespace bar {\n" + "int j, k;\n" + "}", + Style); + + verifyFormat("enum Foo { FOO, BAR };\n" + "\n" + "enum Bar { FOOBAR, BARFOO };\n", + Style); +} + +TEST_F(DefinitionBlockSeparatorTest, Always) { + FormatStyle Style = getLLVMStyle(); + Style.SeparateDefinitionBlocks = FormatStyle::SDS_Always; + std::string Prefix = "namespace {\n"; + std::string Postfix = "enum Foo { FOO, BAR };\n" + "\n" + "int foo(int i, int j) {\n" + " int r = i + j;\n" + " return r;\n" + "}\n" + "\n" + "int i, j, k;\n" + "\n" + "int bar(int j, int k) {\n" + " int r = j * k;\n" + " return r;\n" + "}\n" + "\n" + "enum Bar { FOOBAR, BARFOO };\n" + "} // namespace"; + verifyFormat(Prefix + "\n\n\n" + removeEmptyLines(Postfix), Style, + Prefix + Postfix); +} + +TEST_F(DefinitionBlockSeparatorTest, Never) { + FormatStyle Style = getLLVMStyle(); + Style.SeparateDefinitionBlocks = FormatStyle::SDS_Never; + std::string Prefix = "namespace {\n"; + std::string Postfix = "enum Foo { FOO, BAR };\n" + "\n" + "int foo(int i, int j) {\n" + " int r = i + j;\n" + " return r;\n" + "}\n" + "\n" + "int i, j, k;\n" + "\n" + "int bar(int j, int k) {\n" + " int r = j * k;\n" + " return r;\n" + "}\n" + "\n" + "enum Bar { FOOBAR, BARFOO };\n" + "} // namespace"; + verifyFormat(Prefix + "\n\n\n" + Postfix, Style, + Prefix + removeEmptyLines(Postfix)); +} + +TEST_F(DefinitionBlockSeparatorTest, OpeningBracketOwnsLine) { + FormatStyle Style = getLLVMStyle(); + Style.BreakBeforeBraces = FormatStyle::BS_Allman; + Style.SeparateDefinitionBlocks = FormatStyle::SDS_Always; + verifyFormat("enum Foo\n" + "{\n" + " FOO,\n" + " BAR\n" + "};\n" + "\n" + "int foo(int i, int j)\n" + "{\n" + " int r = i + j;\n" + " return r;\n" + "}\n" + "\n" + "int i, j, k;\n" + "\n" + "int bar(int j, int k)\n" + "{\n" + " int r = j * k;\n" + " return r;\n" + "}\n" + "\n" + "enum Bar\n" + "{\n" + " FOOBAR,\n" + " BARFOO\n" + "};", + Style); +} + +TEST_F(DefinitionBlockSeparatorTest, Leave) { + FormatStyle Style = getLLVMStyle(); + Style.SeparateDefinitionBlocks = FormatStyle::SDS_Leave; + Style.MaxEmptyLinesToKeep = 3; + std::string LeaveAs = "namespace {\n" + "\n" + "enum Foo { FOO, BAR };\n" + "\n\n\n" + "int foo(int i, int j) {\n" + " int r = i + j;\n" + " return r;\n" + "}\n" + "\n" + "int i, j, k;\n" + "\n" + "int bar(int j, int k) {\n" + " int r = j * k;\n" + " return r;\n" + "}\n" + "\n" + "enum Bar { FOOBAR, BARFOO };\n" + "} // namespace"; + verifyFormat(LeaveAs, Style, LeaveAs); +} + +TEST_F(DefinitionBlockSeparatorTest, CSharp) { + FormatStyle Style = getLLVMStyle(FormatStyle::LK_CSharp); + Style.SeparateDefinitionBlocks = FormatStyle::SDS_Always; + Style.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; + Style.AllowShortEnumsOnASingleLine = false; + verifyFormat("namespace {\r\n" + "public class SomeTinyClass {\r\n" + " int X;\r\n" + "}\r\n" + "\r\n" + "public class AnotherTinyClass {\r\n" + " int Y;\r\n" + "}\r\n" + "\r\n" + "internal static String toString() {\r\n" + "}\r\n" + "\r\n" + "public enum var {\r\n" + " none,\r\n" + " @string,\r\n" + " bool,\r\n" + " @enum\r\n" + "}\r\n" + "\r\n" + "[STAThread]\r\n" + "static void Main(string[] args) {\r\n" + " Console.WriteLine(\"HelloWorld\");\r\n" + "}\r\n" + "\r\n" + "static decimal Test() {\r\n" + "}\r\n" + "}\r\n" + "\r\n" + "public class FoobarClass {\r\n" + " int foobar;\r\n" + "}", + Style); +} + +TEST_F(DefinitionBlockSeparatorTest, JavaScript) { + FormatStyle Style = getLLVMStyle(FormatStyle::LK_JavaScript); + Style.SeparateDefinitionBlocks = FormatStyle::SDS_Always; + Style.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; + Style.AllowShortEnumsOnASingleLine = false; + verifyFormat("export const enum Foo {\n" + " A = 1,\n" + " B\n" + "}\n" + "\n" + "export function A() {\n" + "}\n" + "\n" + "export default function B() {\n" + "}\n" + "\n" + "export function C() {\n" + "}\n" + "\n" + "var t, p, q;\n" + "\n" + "export abstract class X {\n" + " y: number;\n" + "}\n" + "\n" + "export const enum Bar {\n" + " D = 1,\n" + " E\n" + "}", + Style); +} +} // namespace +} // namespace format +} // namespace clang From a361320d75ad51fcca6522dc6caa4673457d6ea7 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 3 Jan 2022 20:50:31 +0000 Subject: [PATCH 453/992] [gn build] Port 6f6f88ffdae1 --- llvm/utils/gn/secondary/clang/lib/Format/BUILD.gn | 1 + llvm/utils/gn/secondary/clang/unittests/Format/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/clang/lib/Format/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Format/BUILD.gn index a4c15b31fd11..375129e23557 100644 --- a/llvm/utils/gn/secondary/clang/lib/Format/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Format/BUILD.gn @@ -12,6 +12,7 @@ static_library("Format") { "AffectedRangeManager.cpp", "BreakableToken.cpp", "ContinuationIndenter.cpp", + "DefinitionBlockSeparator.cpp", "Format.cpp", "FormatToken.cpp", "FormatTokenLexer.cpp", diff --git a/llvm/utils/gn/secondary/clang/unittests/Format/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Format/BUILD.gn index dda5a6ce65bb..c5ea4691750b 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Format/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Format/BUILD.gn @@ -12,6 +12,7 @@ unittest("FormatTests") { ] sources = [ "CleanupTest.cpp", + "DefinitionBlockSeparatorTest.cpp", "FormatTest.cpp", "FormatTestCSharp.cpp", "FormatTestComments.cpp", From 949bbd0a689286d0b2f73ae4f212673bc0bd688c Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sat, 1 Jan 2022 07:03:31 +0000 Subject: [PATCH 454/992] [CMake] Use `LLVM_COMMON_CMAKE_UTILS` in runtimes just for clarity In D116472 we created conditionally defined variables for the tools to unbreak the legacy build where they are in `llvm/tools`. The runtimes are not tools, so that flexibility doesn't matter. Still, it might be nice to define (unconditionally) and use the variable for the runtimes simply to make the code a bit clearer and document what is going on. Also, consistently put project dirs at the beginning, not end of `CMAKE_MODULE_PATH`. This ensures they will properly shadow similarly named stuff that happens to be later on the path. Reviewed By: mstorsjo, #libunwind, #libc, #libc_abi, ldionne Differential Revision: https://reviews.llvm.org/D116477 --- compiler-rt/CMakeLists.txt | 6 ++++-- compiler-rt/lib/builtins/CMakeLists.txt | 16 ++++++++++++---- libcxx/CMakeLists.txt | 9 +++++---- libcxxabi/CMakeLists.txt | 9 +++++---- libunwind/CMakeLists.txt | 9 +++++---- runtimes/CMakeLists.txt | 6 ++++-- 6 files changed, 35 insertions(+), 20 deletions(-) diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt index bcab7e006108..1e721a046a2f 100644 --- a/compiler-rt/CMakeLists.txt +++ b/compiler-rt/CMakeLists.txt @@ -12,12 +12,14 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR COMPILER_RT_STANDALONE set_property(GLOBAL PROPERTY USE_FOLDERS ON) endif() +set(LLVM_COMMON_CMAKE_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake") + # Add path for custom compiler-rt modules. list(INSERT CMAKE_MODULE_PATH 0 "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules" - "${CMAKE_CURRENT_SOURCE_DIR}/../cmake" - "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules" + "${LLVM_COMMON_CMAKE_UTILS}" + "${LLVM_COMMON_CMAKE_UTILS}/Modules" ) if(CMAKE_CONFIGURATION_TYPES) diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 2c0477f89235..e2489f1a3ed0 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -9,11 +9,19 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) project(CompilerRTBuiltins C ASM) set(COMPILER_RT_STANDALONE_BUILD TRUE) set(COMPILER_RT_BUILTINS_STANDALONE_BUILD TRUE) + + set(COMPILER_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../..") + + set(LLVM_COMMON_CMAKE_UTILS "${COMPILER_RT_SOURCE_DIR}/../cmake") + + # Add path for custom modules list(INSERT CMAKE_MODULE_PATH 0 - "${CMAKE_SOURCE_DIR}/../../cmake" - "${CMAKE_SOURCE_DIR}/../../cmake/Modules" - "${CMAKE_SOURCE_DIR}/../../../cmake" - "${CMAKE_SOURCE_DIR}/../../../cmake/Modules") + "${COMPILER_RT_SOURCE_DIR}/cmake" + "${COMPILER_RT_SOURCE_DIR}/cmake/Modules" + "${LLVM_COMMON_CMAKE_UTILS}" + "${LLVM_COMMON_CMAKE_UTILS}/Modules" + ) + include(base-config-ix) include(CompilerRTUtils) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index 03a6a0781b5a..b0569a4a54ca 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -10,13 +10,14 @@ endif() #=============================================================================== cmake_minimum_required(VERSION 3.13.4) +set(LLVM_COMMON_CMAKE_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake") + # Add path for custom modules -set(CMAKE_MODULE_PATH +list(INSERT CMAKE_MODULE_PATH 0 "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules" - "${CMAKE_CURRENT_SOURCE_DIR}/../cmake" - "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules" - ${CMAKE_MODULE_PATH} + "${LLVM_COMMON_CMAKE_UTILS}" + "${LLVM_COMMON_CMAKE_UTILS}/Modules" ) set(CMAKE_FOLDER "libc++") diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt index 12bcd2eee099..858f5d5cfd7f 100644 --- a/libcxxabi/CMakeLists.txt +++ b/libcxxabi/CMakeLists.txt @@ -10,13 +10,14 @@ endif() cmake_minimum_required(VERSION 3.13.4) +set(LLVM_COMMON_CMAKE_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake") + # Add path for custom modules -set(CMAKE_MODULE_PATH +list(INSERT CMAKE_MODULE_PATH 0 "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules" - "${CMAKE_CURRENT_SOURCE_DIR}/../cmake" - "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules" - ${CMAKE_MODULE_PATH} + "${LLVM_COMMON_CMAKE_UTILS}" + "${LLVM_COMMON_CMAKE_UTILS}/Modules" ) set(CMAKE_FOLDER "libc++") diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt index eb478e4e7730..e3cc66dd2226 100644 --- a/libunwind/CMakeLists.txt +++ b/libunwind/CMakeLists.txt @@ -8,13 +8,14 @@ endif() cmake_minimum_required(VERSION 3.13.4) +set(LLVM_COMMON_CMAKE_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake") + # Add path for custom modules -set(CMAKE_MODULE_PATH +list(INSERT CMAKE_MODULE_PATH 0 "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules" - "${CMAKE_CURRENT_SOURCE_DIR}/../cmake" - "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules" - ${CMAKE_MODULE_PATH} + "${LLVM_COMMON_CMAKE_UTILS}" + "${LLVM_COMMON_CMAKE_UTILS}/Modules" ) set(LIBUNWIND_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 8f37d0e59feb..cedce7b3541e 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -28,12 +28,14 @@ endfunction() find_package(LLVM PATHS "${LLVM_BINARY_DIR}" NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH) find_package(Clang PATHS "${LLVM_BINARY_DIR}" NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH) +set(LLVM_COMMON_CMAKE_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake") + # Add path for custom and the LLVM build's modules to the CMake module path. list(INSERT CMAKE_MODULE_PATH 0 "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules" - "${CMAKE_CURRENT_SOURCE_DIR}/../cmake" - "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules" + "${LLVM_COMMON_CMAKE_UTILS}" + "${LLVM_COMMON_CMAKE_UTILS}/Modules" "${CMAKE_CURRENT_SOURCE_DIR}/../llvm/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/../llvm/cmake/modules" ) From 4f7f7284c2948e4375567c9f4b04ea8383674310 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 3 Jan 2022 15:08:05 -0500 Subject: [PATCH 455/992] [libc++][NFC] Fix comment for running Docker container --- libcxx/utils/ci/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/utils/ci/Dockerfile b/libcxx/utils/ci/Dockerfile index ba440040213d..1aeeeb73d78b 100644 --- a/libcxx/utils/ci/Dockerfile +++ b/libcxx/utils/ci/Dockerfile @@ -12,7 +12,7 @@ # can also just start the image with a shell to debug CI failures. # # To start a Buildkite Agent, run it as: -# $ docker run --env-file -it $(docker build -q .) +# $ docker run --env-file -it $(docker build -q libcxx/utils/ci) # # The environment variables in `` should be the ones necessary # to run a BuildKite agent. From 4aba7e901e60d168bdb32ab37e9f281f551d47d3 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 3 Jan 2022 15:44:36 -0500 Subject: [PATCH 456/992] [libc++][CI] Don't install libc6-dev-i386 in the Docker image We don't cross-compile to 32 bits in the CI anymore. --- libcxx/utils/ci/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/libcxx/utils/ci/Dockerfile b/libcxx/utils/ci/Dockerfile index 1aeeeb73d78b..034fe7b87a0b 100644 --- a/libcxx/utils/ci/Dockerfile +++ b/libcxx/utils/ci/Dockerfile @@ -43,7 +43,6 @@ RUN apt-get update && apt-get install -y bash curl # Install various tools used by the build or the test suite RUN apt-get update && apt-get install -y ninja-build python3 python3-sphinx python3-distutils git gdb -RUN apt-get update && apt-get install -y libc6-dev-i386 # Required to cross-compile to 32 bits # Locales for gdb and localization tests RUN apt-get update && apt-get install -y language-pack-en language-pack-fr \ From 1bb65bd58fb24db0f48df830274581fa3be7e489 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Mon, 3 Jan 2022 13:41:32 -0800 Subject: [PATCH 457/992] Add IgnoreBaseInCopyConstructors to .clang-tidy gcc issues warnings on copy constructors that do not explicitly initialize the base class. --- .clang-tidy | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index 3f2f2c054eb3..1d4438dbfda0 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -16,4 +16,5 @@ CheckOptions: value: CamelCase - key: readability-identifier-naming.IgnoreMainLikeFunctions value: 1 - + - key: readability-redundant-member-init.IgnoreBaseInCopyConstructors + value: 1 From c40049d6d7f18c3703b038e1e2517c9a551c1678 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20B=C3=B6ck?= Date: Mon, 3 Jan 2022 23:00:39 +0100 Subject: [PATCH 458/992] [lld][MinGW] Remove `--no-as-needed` from ignored flags In the post commit discussion of https://reviews.llvm.org/D116484 it was concluded that `--no-as-needed` should not be ignored. `--as-needed` stays ignored as it is already the default behaviour on COFF, which cannot be changed. --- lld/MinGW/Options.td | 1 - 1 file changed, 1 deletion(-) diff --git a/lld/MinGW/Options.td b/lld/MinGW/Options.td index c38b0710d39b..019ff74ad971 100644 --- a/lld/MinGW/Options.td +++ b/lld/MinGW/Options.td @@ -154,7 +154,6 @@ def: F<"end-group">; def: Flag<["--"], "full-shutdown">; defm: EqNoHelp<"major-image-version">; defm: EqNoHelp<"minor-image-version">; -def: F<"no-as-needed">; def: F<"no-undefined">; def: F<"pic-executable">; defm: EqNoHelp<"plugin">; From f014ab933f35805159021d2d0c856a3c9af21a85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Sch=C3=A4pers?= Date: Fri, 3 Dec 2021 08:13:57 +0100 Subject: [PATCH 459/992] [clang-format][NFC] Code Tidies in UnwrappedLineFormatter * Give I[1] and I[-1] a name: - Easier to understand - Easier to debug (since you don't go through operator[] everytime) * TheLine->First != TheLine->Last follows since last is a l brace and first isn't. * Factor the check for is(tok::l_brace) out. * Drop else after return. Differential Revision: https://reviews.llvm.org/D115060 --- clang/lib/Format/UnwrappedLineFormatter.cpp | 110 ++++++++++---------- 1 file changed, 57 insertions(+), 53 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 89f87f0375cd..2d71a939d7b7 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -211,10 +211,12 @@ class LineJoiner { const AnnotatedLine *TheLine = *I; if (TheLine->Last->is(TT_LineComment)) return 0; - if (I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore) + const auto &NextLine = *I[1]; + const auto &PreviousLine = *I[-1]; + if (NextLine.Type == LT_Invalid || NextLine.First->MustBreakBefore) return 0; if (TheLine->InPPDirective && - (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline)) + (!NextLine.InPPDirective || NextLine.First->HasUnescapedNewline)) return 0; if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit) @@ -231,15 +233,15 @@ class LineJoiner { if (TheLine->Last->is(TT_FunctionLBrace) && TheLine->First == TheLine->Last && !Style.BraceWrapping.SplitEmptyFunction && - I[1]->First->is(tok::r_brace)) + NextLine.First->is(tok::r_brace)) return tryMergeSimpleBlock(I, E, Limit); // Handle empty record blocks where the brace has already been wrapped if (TheLine->Last->is(tok::l_brace) && TheLine->First == TheLine->Last && I != AnnotatedLines.begin()) { - bool EmptyBlock = I[1]->First->is(tok::r_brace); + bool EmptyBlock = NextLine.First->is(tok::r_brace); - const FormatToken *Tok = I[-1]->First; + const FormatToken *Tok = PreviousLine.First; if (Tok && Tok->is(tok::comment)) Tok = Tok->getNextNonComment(); @@ -267,7 +269,7 @@ class LineJoiner { bool MergeShortFunctions = Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All || (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty && - I[1]->First->is(tok::r_brace)) || + NextLine.First->is(tok::r_brace)) || (Style.AllowShortFunctionsOnASingleLine & FormatStyle::SFS_InlineOnly && TheLine->Level != 0); @@ -312,73 +314,75 @@ class LineJoiner { return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0; } // Try to merge a control statement block with left brace unwrapped - if (TheLine->Last->is(tok::l_brace) && TheLine->First != TheLine->Last && + if (TheLine->Last->is(tok::l_brace) && TheLine->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for)) { return Style.AllowShortBlocksOnASingleLine != FormatStyle::SBS_Never ? tryMergeSimpleBlock(I, E, Limit) : 0; } // Try to merge a control statement block with left brace wrapped - if (I[1]->First->is(tok::l_brace) && - (TheLine->First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while, - tok::kw_for, tok::kw_switch, tok::kw_try, - tok::kw_do, TT_ForEachMacro) || - (TheLine->First->is(tok::r_brace) && TheLine->First->Next && - TheLine->First->Next->isOneOf(tok::kw_else, tok::kw_catch))) && - Style.BraceWrapping.AfterControlStatement == - FormatStyle::BWACS_MultiLine) { - // If possible, merge the next line's wrapped left brace with the current - // line. Otherwise, leave it on the next line, as this is a multi-line - // control statement. - return (Style.ColumnLimit == 0 || - TheLine->Last->TotalLength <= Style.ColumnLimit) - ? 1 - : 0; - } else if (I[1]->First->is(tok::l_brace) && - TheLine->First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while, - tok::kw_for)) { - return (Style.BraceWrapping.AfterControlStatement == - FormatStyle::BWACS_Always) - ? tryMergeSimpleBlock(I, E, Limit) - : 0; - } else if (I[1]->First->is(tok::l_brace) && - TheLine->First->isOneOf(tok::kw_else, tok::kw_catch) && - Style.BraceWrapping.AfterControlStatement == - FormatStyle::BWACS_MultiLine) { - // This case if different from the upper BWACS_MultiLine processing - // in that a preceding r_brace is not on the same line as else/catch - // most likely because of BeforeElse/BeforeCatch set to true. - // If the line length doesn't fit ColumnLimit, leave l_brace on the - // next line to respect the BWACS_MultiLine. - return (Style.ColumnLimit == 0 || - TheLine->Last->TotalLength <= Style.ColumnLimit) - ? 1 - : 0; + if (NextLine.First->is(tok::l_brace)) { + if ((TheLine->First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while, + tok::kw_for, tok::kw_switch, tok::kw_try, + tok::kw_do, TT_ForEachMacro) || + (TheLine->First->is(tok::r_brace) && TheLine->First->Next && + TheLine->First->Next->isOneOf(tok::kw_else, tok::kw_catch))) && + Style.BraceWrapping.AfterControlStatement == + FormatStyle::BWACS_MultiLine) { + // If possible, merge the next line's wrapped left brace with the + // current line. Otherwise, leave it on the next line, as this is a + // multi-line control statement. + return (Style.ColumnLimit == 0 || + TheLine->Last->TotalLength <= Style.ColumnLimit) + ? 1 + : 0; + } + if (TheLine->First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while, + tok::kw_for)) { + return (Style.BraceWrapping.AfterControlStatement == + FormatStyle::BWACS_Always) + ? tryMergeSimpleBlock(I, E, Limit) + : 0; + } + if (TheLine->First->isOneOf(tok::kw_else, tok::kw_catch) && + Style.BraceWrapping.AfterControlStatement == + FormatStyle::BWACS_MultiLine) { + // This case if different from the upper BWACS_MultiLine processing + // in that a preceding r_brace is not on the same line as else/catch + // most likely because of BeforeElse/BeforeCatch set to true. + // If the line length doesn't fit ColumnLimit, leave l_brace on the + // next line to respect the BWACS_MultiLine. + return (Style.ColumnLimit == 0 || + TheLine->Last->TotalLength <= Style.ColumnLimit) + ? 1 + : 0; + } } // Don't merge block with left brace wrapped after ObjC special blocks if (TheLine->First->is(tok::l_brace) && I != AnnotatedLines.begin() && - I[-1]->First->is(tok::at) && I[-1]->First->Next) { - tok::ObjCKeywordKind kwId = I[-1]->First->Next->Tok.getObjCKeywordID(); + PreviousLine.First->is(tok::at) && PreviousLine.First->Next) { + tok::ObjCKeywordKind kwId = + PreviousLine.First->Next->Tok.getObjCKeywordID(); if (kwId == clang::tok::objc_autoreleasepool || kwId == clang::tok::objc_synchronized) return 0; } // Don't merge block with left brace wrapped after case labels if (TheLine->First->is(tok::l_brace) && I != AnnotatedLines.begin() && - I[-1]->First->isOneOf(tok::kw_case, tok::kw_default)) + PreviousLine.First->isOneOf(tok::kw_case, tok::kw_default)) return 0; // Don't merge an empty template class or struct if SplitEmptyRecords // is defined. if (Style.BraceWrapping.SplitEmptyRecord && TheLine->Last->is(tok::l_brace) && I != AnnotatedLines.begin() && - I[-1]->Last) { - const FormatToken *Previous = I[-1]->Last; + PreviousLine.Last) { + const FormatToken *Previous = PreviousLine.Last; if (Previous) { if (Previous->is(tok::comment)) Previous = Previous->getPreviousNonComment(); if (Previous) { - if (Previous->is(tok::greater) && !I[-1]->InPPDirective) + if (Previous->is(tok::greater) && !PreviousLine.InPPDirective) return 0; if (Previous->is(tok::identifier)) { const FormatToken *PreviousPrevious = @@ -401,21 +405,21 @@ class LineJoiner { } if (Tok->isOneOf(tok::kw_class, tok::kw_struct)) { ShouldMerge = !Style.BraceWrapping.AfterClass || - (I[1]->First->is(tok::r_brace) && + (NextLine.First->is(tok::r_brace) && !Style.BraceWrapping.SplitEmptyRecord); } else if (Tok->is(tok::kw_enum)) { ShouldMerge = Style.AllowShortEnumsOnASingleLine; } else { ShouldMerge = !Style.BraceWrapping.AfterFunction || - (I[1]->First->is(tok::r_brace) && + (NextLine.First->is(tok::r_brace) && !Style.BraceWrapping.SplitEmptyFunction); } return ShouldMerge ? tryMergeSimpleBlock(I, E, Limit) : 0; } // Try to merge a function block with left brace wrapped - if (I[1]->First->is(TT_FunctionLBrace) && + if (NextLine.First->is(TT_FunctionLBrace) && Style.BraceWrapping.AfterFunction) { - if (I[1]->Last->is(TT_LineComment)) + if (NextLine.Last->is(TT_LineComment)) return 0; // Check for Limit <= 2 to account for the " {". @@ -426,7 +430,7 @@ class LineJoiner { unsigned MergedLines = 0; if (MergeShortFunctions || (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty && - I[1]->First == I[1]->Last && I + 2 != E && + NextLine.First == NextLine.Last && I + 2 != E && I[2]->First->is(tok::r_brace))) { MergedLines = tryMergeSimpleBlock(I + 1, E, Limit); // If we managed to merge the block, count the function header, which is From d48d1f8ee84577a1ca38d4fe03956ee27884e399 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Sch=C3=A4pers?= Date: Fri, 3 Dec 2021 16:37:02 +0100 Subject: [PATCH 460/992] [clang-format][NFC] Merge another two calls to isOneOf Differential Revision: https://reviews.llvm.org/D115069 --- clang/lib/Format/ContinuationIndenter.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 4225d6b67b0e..31f5de673362 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -1288,10 +1288,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, State.Stack[i].NoLineBreak = true; State.Stack[State.Stack.size() - 2].NestedBlockInlined = false; } - if (Previous && - (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) || - Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) && - !Previous->isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) { + if (Previous && (Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr) || + (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) && + !Previous->isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)))) { State.Stack.back().NestedBlockInlined = !Newline && hasNestedBlockInlined(Previous, Current, Style); } From 1188f241acb78dacef00b7b6b3ec0b04cb43c786 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Sch=C3=A4pers?= Date: Sun, 5 Dec 2021 12:31:56 +0100 Subject: [PATCH 461/992] Revert "[clang-format][NFC] Prefer pass by reference" This reverts commit 25f637913fe31b6d23e78ff07c725bb537dd3b97. Differential Revision: https://reviews.llvm.org/D115061 --- clang/lib/Format/UnwrappedLineFormatter.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 2d71a939d7b7..303150348ad8 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -1064,9 +1064,9 @@ class OptimizingLineFormatter : public LineFormatter { FormatDecision LastFormat = Node->State.NextToken->getDecision(); if (LastFormat == FD_Unformatted || LastFormat == FD_Continue) - addNextStateToQueue(Penalty, Node, /*NewLine=*/false, Count, Queue); + addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue); if (LastFormat == FD_Unformatted || LastFormat == FD_Break) - addNextStateToQueue(Penalty, Node, /*NewLine=*/true, Count, Queue); + addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue); } if (Queue.empty()) { @@ -1092,7 +1092,7 @@ class OptimizingLineFormatter : public LineFormatter { /// Assume the current state is \p PreviousNode and has been reached with a /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, - bool NewLine, unsigned &Count, QueueType &Queue) { + bool NewLine, unsigned *Count, QueueType *Queue) { if (NewLine && !Indenter->canBreak(PreviousNode->State)) return; if (!NewLine && Indenter->mustBreak(PreviousNode->State)) @@ -1105,8 +1105,8 @@ class OptimizingLineFormatter : public LineFormatter { Penalty += Indenter->addTokenToState(Node->State, NewLine, true); - Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node)); - ++Count; + Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node)); + ++(*Count); } /// Applies the best formatting by reconstructing the path in the From f1f5a85af8be484c8d0d31bb643e10af0efd01d8 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Mon, 3 Jan 2022 17:17:11 -0500 Subject: [PATCH 462/992] [mlir] NFC - Format ExecutionEngine.cpp --- mlir/lib/ExecutionEngine/ExecutionEngine.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp index d0556e13cf3b..ead15152162e 100644 --- a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp @@ -180,10 +180,10 @@ static void packFunctionArguments(Module *module) { for (auto &indexedArg : llvm::enumerate(func.args())) { llvm::Value *argIndex = llvm::Constant::getIntegerValue( builder.getInt64Ty(), APInt(64, indexedArg.index())); - llvm::Value *argPtrPtr = builder.CreateGEP( - builder.getInt8PtrTy(), argList, argIndex); - llvm::Value *argPtr = builder.CreateLoad(builder.getInt8PtrTy(), - argPtrPtr); + llvm::Value *argPtrPtr = + builder.CreateGEP(builder.getInt8PtrTy(), argList, argIndex); + llvm::Value *argPtr = + builder.CreateLoad(builder.getInt8PtrTy(), argPtrPtr); llvm::Type *argTy = indexedArg.value().getType(); argPtr = builder.CreateBitCast(argPtr, argTy->getPointerTo()); llvm::Value *arg = builder.CreateLoad(argTy, argPtr); @@ -199,8 +199,8 @@ static void packFunctionArguments(Module *module) { builder.getInt64Ty(), APInt(64, llvm::size(func.args()))); llvm::Value *retPtrPtr = builder.CreateGEP(builder.getInt8PtrTy(), argList, retIndex); - llvm::Value *retPtr = builder.CreateLoad(builder.getInt8PtrTy(), - retPtrPtr); + llvm::Value *retPtr = + builder.CreateLoad(builder.getInt8PtrTy(), retPtrPtr); retPtr = builder.CreateBitCast(retPtr, result->getType()->getPointerTo()); builder.CreateStore(result, retPtr); } From c343c200ea0dd321270d03438c9ea4240fd17400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20B=C3=B6ck?= Date: Mon, 3 Jan 2022 23:45:26 +0100 Subject: [PATCH 463/992] [mlir][LLVM] Fix mapping of result values of `llvm.invoke` during export The result value of a llvm.invoke operation is currently not mapped to the corresponding llvm::Value* when exporting to LLVM IR. This leads to any later operations using the result to crash as it receives a nullptr. Differential Revision: https://reviews.llvm.org/D116564 --- .../LLVMIR/LLVMToLLVMIRTranslation.cpp | 25 +++++++++------ mlir/test/Target/LLVMIR/llvmir.mlir | 32 +++++++++++++++++++ 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp index 8d19f466fe5b..4f5e636c0a8e 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp @@ -342,22 +342,29 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder, if (auto invOp = dyn_cast(opInst)) { auto operands = moduleTranslation.lookupValues(opInst.getOperands()); ArrayRef operandsRef(operands); + llvm::Value *result; if (auto attr = opInst.getAttrOfType("callee")) { - builder.CreateInvoke(moduleTranslation.lookupFunction(attr.getValue()), - moduleTranslation.lookupBlock(invOp.getSuccessor(0)), - moduleTranslation.lookupBlock(invOp.getSuccessor(1)), - operandsRef); + result = builder.CreateInvoke( + moduleTranslation.lookupFunction(attr.getValue()), + moduleTranslation.lookupBlock(invOp.getSuccessor(0)), + moduleTranslation.lookupBlock(invOp.getSuccessor(1)), operandsRef); } else { auto *calleePtrType = cast(operandsRef.front()->getType()); auto *calleeType = cast(calleePtrType->getElementType()); - builder.CreateInvoke(calleeType, operandsRef.front(), - moduleTranslation.lookupBlock(invOp.getSuccessor(0)), - moduleTranslation.lookupBlock(invOp.getSuccessor(1)), - operandsRef.drop_front()); + result = builder.CreateInvoke( + calleeType, operandsRef.front(), + moduleTranslation.lookupBlock(invOp.getSuccessor(0)), + moduleTranslation.lookupBlock(invOp.getSuccessor(1)), + operandsRef.drop_front()); } - return success(); + // InvokeOp can only have 0 or 1 result + if (invOp->getNumResults() != 0) { + moduleTranslation.mapValue(opInst.getResult(0), result); + return success(); + } + return success(result->getType()->isVoidTy()); } if (auto lpOp = dyn_cast(opInst)) { diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index f5b6d60662ad..e87eebaca515 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -1296,6 +1296,38 @@ llvm.func @invokeLandingpad() -> i32 attributes { personality = @__gxx_personali %8 = llvm.invoke @bar(%6) to ^bb2 unwind ^bb1 : (!llvm.ptr) -> !llvm.ptr } +// ----- + +llvm.mlir.global external constant @_ZTIi() : !llvm.ptr +llvm.func @foo() -> i8 +llvm.func @__gxx_personality_v0(...) -> i32 + +// CHECK-LABEL: @invoke_result +// CHECK-SAME: %[[a0:[0-9]+]] +llvm.func @invoke_result(%arg0 : !llvm.ptr) attributes { personality = @__gxx_personality_v0 } { + %0 = llvm.mlir.addressof @_ZTIi : !llvm.ptr> +// CHECK: %[[a1:[0-9]+]] = invoke i8 @foo() +// CHECK-NEXT: to label %[[normal:[0-9]+]] unwind label %[[unwind:[0-9]+]] + %1 = llvm.invoke @foo() to ^bb1 unwind ^bb2 : () -> i8 + +// CHECK: [[normal]]: +// CHECK-NEXT: store i8 %[[a1]], i8* %[[a0]] +// CHECK-NEXT: ret void +^bb1: + llvm.store %1, %arg0 : !llvm.ptr + llvm.return + +// CHECK: [[unwind]]: +// CHECK-NEXT: landingpad { i8*, i32 } +// CHECK-NEXT: catch i8** @_ZTIi +// CHECK-NEXT: ret void +^bb2: + %7 = llvm.landingpad (catch %0 : !llvm.ptr>) : !llvm.struct<(ptr, i32)> + llvm.return +} + +// ----- + // CHECK-LABEL: @callFreezeOp llvm.func @callFreezeOp(%x : i32) { // CHECK: freeze i32 %{{[0-9]+}} From 67c937f846b18e3113e126c37c69a222c0e99c1c Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Sun, 2 Jan 2022 14:35:52 -0800 Subject: [PATCH 464/992] [lldb] Use std::move in StringList (NFC) --- lldb/source/Utility/StringList.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lldb/source/Utility/StringList.cpp b/lldb/source/Utility/StringList.cpp index baff34ae3a5e..f78681c05a3d 100644 --- a/lldb/source/Utility/StringList.cpp +++ b/lldb/source/Utility/StringList.cpp @@ -42,7 +42,9 @@ void StringList::AppendString(const char *str) { void StringList::AppendString(const std::string &s) { m_strings.push_back(s); } -void StringList::AppendString(std::string &&s) { m_strings.push_back(s); } +void StringList::AppendString(std::string &&s) { + m_strings.push_back(std::move(s)); +} void StringList::AppendString(const char *str, size_t str_len) { if (str) @@ -133,9 +135,9 @@ void StringList::InsertStringAtIndex(size_t idx, const std::string &str) { void StringList::InsertStringAtIndex(size_t idx, std::string &&str) { if (idx < m_strings.size()) - m_strings.insert(m_strings.begin() + idx, str); + m_strings.insert(m_strings.begin() + idx, std::move(str)); else - m_strings.push_back(str); + m_strings.push_back(std::move(str)); } void StringList::DeleteStringAtIndex(size_t idx) { From d9cf9bd4b3c30221a2ec348cdfb01a24d84927b6 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 3 Jan 2022 17:24:54 -0800 Subject: [PATCH 465/992] [llvm-readobj][test] Rename ELF/reloc-types-elf-*.test to avoid redundant "elf-" infix --- .../{reloc-types-elf-aarch64.test => reloc-types-aarch64.test} | 0 .../ELF/{reloc-types-elf-amdgpu.test => reloc-types-amdgpu.test} | 0 .../ELF/{reloc-types-elf-arm.test => reloc-types-arm.test} | 0 .../ELF/{reloc-types-elf-i386.test => reloc-types-i386.test} | 0 .../ELF/{reloc-types-elf-lanai.test => reloc-types-lanai.test} | 0 .../ELF/{reloc-types-elf-mips.test => reloc-types-mips.test} | 0 .../ELF/{reloc-types-elf-mips64.test => reloc-types-mips64.test} | 0 .../ELF/{reloc-types-elf-ppc64.test => reloc-types-ppc64.test} | 0 .../ELF/{reloc-types-elf-x64.test => reloc-types-x64.test} | 0 9 files changed, 0 insertions(+), 0 deletions(-) rename llvm/test/tools/llvm-readobj/ELF/{reloc-types-elf-aarch64.test => reloc-types-aarch64.test} (100%) rename llvm/test/tools/llvm-readobj/ELF/{reloc-types-elf-amdgpu.test => reloc-types-amdgpu.test} (100%) rename llvm/test/tools/llvm-readobj/ELF/{reloc-types-elf-arm.test => reloc-types-arm.test} (100%) rename llvm/test/tools/llvm-readobj/ELF/{reloc-types-elf-i386.test => reloc-types-i386.test} (100%) rename llvm/test/tools/llvm-readobj/ELF/{reloc-types-elf-lanai.test => reloc-types-lanai.test} (100%) rename llvm/test/tools/llvm-readobj/ELF/{reloc-types-elf-mips.test => reloc-types-mips.test} (100%) rename llvm/test/tools/llvm-readobj/ELF/{reloc-types-elf-mips64.test => reloc-types-mips64.test} (100%) rename llvm/test/tools/llvm-readobj/ELF/{reloc-types-elf-ppc64.test => reloc-types-ppc64.test} (100%) rename llvm/test/tools/llvm-readobj/ELF/{reloc-types-elf-x64.test => reloc-types-x64.test} (100%) diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-aarch64.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-aarch64.test similarity index 100% rename from llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-aarch64.test rename to llvm/test/tools/llvm-readobj/ELF/reloc-types-aarch64.test diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-amdgpu.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-amdgpu.test similarity index 100% rename from llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-amdgpu.test rename to llvm/test/tools/llvm-readobj/ELF/reloc-types-amdgpu.test diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-arm.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-arm.test similarity index 100% rename from llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-arm.test rename to llvm/test/tools/llvm-readobj/ELF/reloc-types-arm.test diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-i386.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-i386.test similarity index 100% rename from llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-i386.test rename to llvm/test/tools/llvm-readobj/ELF/reloc-types-i386.test diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-lanai.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-lanai.test similarity index 100% rename from llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-lanai.test rename to llvm/test/tools/llvm-readobj/ELF/reloc-types-lanai.test diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-mips.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-mips.test similarity index 100% rename from llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-mips.test rename to llvm/test/tools/llvm-readobj/ELF/reloc-types-mips.test diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-mips64.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-mips64.test similarity index 100% rename from llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-mips64.test rename to llvm/test/tools/llvm-readobj/ELF/reloc-types-mips64.test diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-ppc64.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-ppc64.test similarity index 100% rename from llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-ppc64.test rename to llvm/test/tools/llvm-readobj/ELF/reloc-types-ppc64.test diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-x64.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-x64.test similarity index 100% rename from llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-x64.test rename to llvm/test/tools/llvm-readobj/ELF/reloc-types-x64.test From 6bf22ae4d31c8ae6171cbcdfee488136257d341f Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Tue, 4 Jan 2022 10:14:01 +0800 Subject: [PATCH 466/992] [M68k][NFC] Fix file header Let all file header have the same style. NFC. --- llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp | 2 +- llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp | 2 +- llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp | 2 +- llvm/lib/Target/M68k/GISel/M68kCallLowering.h | 2 +- llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp | 2 +- llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp | 2 +- llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h | 2 +- llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp | 2 +- llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h | 2 +- llvm/lib/Target/M68k/GISel/M68kRegisterBanks.td | 2 +- llvm/lib/Target/M68k/M68k.h | 2 +- llvm/lib/Target/M68k/M68k.td | 2 +- llvm/lib/Target/M68k/M68kAsmPrinter.cpp | 2 +- llvm/lib/Target/M68k/M68kAsmPrinter.h | 2 +- llvm/lib/Target/M68k/M68kCallingConv.h | 2 +- llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp | 2 +- llvm/lib/Target/M68k/M68kExpandPseudo.cpp | 2 +- llvm/lib/Target/M68k/M68kFrameLowering.cpp | 2 +- llvm/lib/Target/M68k/M68kFrameLowering.h | 2 +- llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp | 2 +- llvm/lib/Target/M68k/M68kISelLowering.cpp | 2 +- llvm/lib/Target/M68k/M68kISelLowering.h | 2 +- llvm/lib/Target/M68k/M68kInstrBits.td | 2 +- llvm/lib/Target/M68k/M68kInstrBuilder.h | 2 +- llvm/lib/Target/M68k/M68kInstrCompiler.td | 2 +- llvm/lib/Target/M68k/M68kInstrControl.td | 2 +- llvm/lib/Target/M68k/M68kInstrData.td | 2 +- llvm/lib/Target/M68k/M68kInstrFormats.td | 2 +- llvm/lib/Target/M68k/M68kInstrInfo.cpp | 2 +- llvm/lib/Target/M68k/M68kInstrInfo.h | 2 +- llvm/lib/Target/M68k/M68kInstrInfo.td | 2 +- llvm/lib/Target/M68k/M68kInstrShiftRotate.td | 2 +- llvm/lib/Target/M68k/M68kMCInstLower.cpp | 2 +- llvm/lib/Target/M68k/M68kMCInstLower.h | 2 +- llvm/lib/Target/M68k/M68kMachineFunction.cpp | 2 +- llvm/lib/Target/M68k/M68kMachineFunction.h | 2 +- llvm/lib/Target/M68k/M68kRegisterInfo.cpp | 2 +- llvm/lib/Target/M68k/M68kRegisterInfo.h | 2 +- llvm/lib/Target/M68k/M68kRegisterInfo.td | 2 +- llvm/lib/Target/M68k/M68kSchedule.td | 2 +- llvm/lib/Target/M68k/M68kSubtarget.cpp | 2 +- llvm/lib/Target/M68k/M68kSubtarget.h | 2 +- llvm/lib/Target/M68k/M68kTargetMachine.cpp | 2 +- llvm/lib/Target/M68k/M68kTargetMachine.h | 2 +- llvm/lib/Target/M68k/M68kTargetObjectFile.cpp | 2 +- llvm/lib/Target/M68k/M68kTargetObjectFile.h | 2 +- llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp | 2 +- llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h | 2 +- llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp | 2 +- llvm/lib/Target/M68k/MCTargetDesc/M68kFixupKinds.h | 2 +- llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp | 2 +- llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h | 2 +- llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.cpp | 2 +- llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.h | 2 +- llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp | 2 +- llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.h | 2 +- llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp | 2 +- llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h | 2 +- llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp | 2 +- 59 files changed, 59 insertions(+), 59 deletions(-) diff --git a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp index 4db879c34ad9..dcd581875f60 100644 --- a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp +++ b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp @@ -1,4 +1,4 @@ -//===---- M68kAsmParser.cpp - Parse M68k assembly to MCInst instructions --===// +//===-- M68kAsmParser.cpp - Parse M68k assembly to MCInst instructions ----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp index a08ffa787095..a565ff4e004d 100644 --- a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp +++ b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp @@ -1,4 +1,4 @@ -//===- M68kDisassembler.cpp - Disassembler for M68k -------------*- C++ -*-===// +//===-- M68kDisassembler.cpp - Disassembler for M68k ------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp b/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp index 9cd959012e6f..b3d17184f1fe 100644 --- a/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp +++ b/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp @@ -1,4 +1,4 @@ -//===-- M68kCallLowering.cpp - Call lowering -------------------*- C++ -*-===// +//===-- M68kCallLowering.cpp - Call lowering --------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/GISel/M68kCallLowering.h b/llvm/lib/Target/M68k/GISel/M68kCallLowering.h index 47cdefdba100..24212e6dd9c6 100644 --- a/llvm/lib/Target/M68k/GISel/M68kCallLowering.h +++ b/llvm/lib/Target/M68k/GISel/M68kCallLowering.h @@ -1,4 +1,4 @@ -//===-- M68kCallLowering.h - Call lowering -------------------*- C++ -*-===// +//===-- M68kCallLowering.h - Call lowering ----------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp b/llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp index 9ac4ab9a5ba1..a627eccd110d 100644 --- a/llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp +++ b/llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp @@ -1,4 +1,4 @@ -//===- M68kInstructionSelector.cpp ------------------------------*- C++ -*-===// +//===-- M68kInstructionSelector.cpp -----------------------------*- C++ -*-===// //===----------------------------------------------------------------------===// /// \file /// This file implements the targeting of the InstructionSelector class for diff --git a/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp index bcbe62816beb..860c0ce29326 100644 --- a/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp +++ b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp @@ -1,4 +1,4 @@ -//===-- M68kLegalizerInfo.cpp ----------------------------------*- C++ -*-===// +//===-- M68kLegalizerInfo.cpp -----------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h index 205aa81aedcc..a10401ed1a9a 100644 --- a/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h +++ b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h @@ -1,4 +1,4 @@ -//===- M68kLegalizerInfo --------------------------------------*- C++ -*-==// +//===-- M68kLegalizerInfo ---------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp index 5c0f5dae8e37..b6ed6ab28a5d 100644 --- a/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp +++ b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp @@ -1,4 +1,4 @@ -//===-- M68kRegisterBankInfo.cpp -------------------------------*- C++ -*-===// +//===-- M68kRegisterBankInfo.cpp --------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h index 853c75df2bb3..69693c049a55 100644 --- a/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h +++ b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h @@ -1,4 +1,4 @@ -//===-- M68kRegisterBankInfo.h ---------------------------------*- C++ -*-===// +//===-- M68kRegisterBankInfo.h ----------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/GISel/M68kRegisterBanks.td b/llvm/lib/Target/M68k/GISel/M68kRegisterBanks.td index 942677a60e6c..2a00ec065cd4 100644 --- a/llvm/lib/Target/M68k/GISel/M68kRegisterBanks.td +++ b/llvm/lib/Target/M68k/GISel/M68kRegisterBanks.td @@ -1,4 +1,4 @@ -//===-- M68kRegisterBanks.td - Describe the M68k Banks -------*- tablegen -*-===// +//===-- M68kRegisterBanks.td - Describe the M68k Banks -----*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68k.h b/llvm/lib/Target/M68k/M68k.h index cef40bee7d93..1c0d0af40dc2 100644 --- a/llvm/lib/Target/M68k/M68k.h +++ b/llvm/lib/Target/M68k/M68k.h @@ -1,4 +1,4 @@ -//===- M68k.h - Top-level interface for M68k representation -*- C++ -*-===// +//===-- M68k.h - Top-level interface for M68k representation ----*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68k.td b/llvm/lib/Target/M68k/M68k.td index fde491e1b6d5..de7a6c82d110 100644 --- a/llvm/lib/Target/M68k/M68k.td +++ b/llvm/lib/Target/M68k/M68k.td @@ -1,4 +1,4 @@ -//===-- M68k.td - Motorola 680x0 target definitions ------*- tablegen -*-===// +//===-- M68k.td - Motorola 680x0 target definitions --------*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kAsmPrinter.cpp b/llvm/lib/Target/M68k/M68kAsmPrinter.cpp index 08b7153632b4..3bcce9e3ba3b 100644 --- a/llvm/lib/Target/M68k/M68kAsmPrinter.cpp +++ b/llvm/lib/Target/M68k/M68kAsmPrinter.cpp @@ -1,4 +1,4 @@ -//===----- M68kAsmPrinter.cpp - M68k LLVM Assembly Printer -----*- C++ -*-===// +//===-- M68kAsmPrinter.cpp - M68k LLVM Assembly Printer ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kAsmPrinter.h b/llvm/lib/Target/M68k/M68kAsmPrinter.h index dff3bb876336..8e96e598ed47 100644 --- a/llvm/lib/Target/M68k/M68kAsmPrinter.h +++ b/llvm/lib/Target/M68k/M68kAsmPrinter.h @@ -1,4 +1,4 @@ -//===----- M68kAsmPrinter.h - M68k LLVM Assembly Printer -------- C++ -*--===// +//===-- M68kAsmPrinter.h - M68k LLVM Assembly Printer -----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kCallingConv.h b/llvm/lib/Target/M68k/M68kCallingConv.h index 20ffa993897f..efd3cbffaa6d 100644 --- a/llvm/lib/Target/M68k/M68kCallingConv.h +++ b/llvm/lib/Target/M68k/M68kCallingConv.h @@ -1,4 +1,4 @@ -//===-- M68kCallingConv.h - M68k Custom CC Routines ---------*- C++ -*-===// +//===-- M68kCallingConv.h - M68k Custom CC Routines -------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp b/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp index 4149ae92ffe9..7f0c0dd92dbb 100644 --- a/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp +++ b/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp @@ -1,4 +1,4 @@ -//===----- M68kCollapseMOVEMPass.cpp - Expand MOVEM pass --------*- C++ -*-===// +//===-- M68kCollapseMOVEMPass.cpp - Expand MOVEM pass -----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp index 6a4aeaab518a..acfa30f28c2b 100644 --- a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp +++ b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp @@ -1,4 +1,4 @@ -//===--M68kExpandPseudo.cpp - Expand pseudo instructions ------*- C++ -*-===// +//===-- M68kExpandPseudo.cpp - Expand pseudo instructions -------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kFrameLowering.cpp b/llvm/lib/Target/M68k/M68kFrameLowering.cpp index 66ea6ae38f43..6b0413c61463 100644 --- a/llvm/lib/Target/M68k/M68kFrameLowering.cpp +++ b/llvm/lib/Target/M68k/M68kFrameLowering.cpp @@ -1,4 +1,4 @@ -//===-- M68kFrameLowering.cpp - M68k Frame Information ------*- C++ -*-===// +//===-- M68kFrameLowering.cpp - M68k Frame Information ----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kFrameLowering.h b/llvm/lib/Target/M68k/M68kFrameLowering.h index 0eba9e08d858..6948d18118cc 100644 --- a/llvm/lib/Target/M68k/M68kFrameLowering.h +++ b/llvm/lib/Target/M68k/M68kFrameLowering.h @@ -1,4 +1,4 @@ -//===- M68kFrameLowering.h - Define frame lowering for M68k -*- C++ -*-===// +//===-- M68kFrameLowering.h - Define frame lowering for M68k ----*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp b/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp index 0076c2647df3..9ef97b96ea9a 100644 --- a/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp +++ b/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp @@ -1,4 +1,4 @@ -//===- M68kISelDAGToDAG.cpp - M68k Dag to Dag Inst Selector -*- C++ -*-===// +//===-- M68kISelDAGToDAG.cpp - M68k Dag to Dag Inst Selector ----*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp index 79b395f8f984..d99020f7c751 100644 --- a/llvm/lib/Target/M68k/M68kISelLowering.cpp +++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp @@ -1,4 +1,4 @@ -//===-- M68kISelLowering.cpp - M68k DAG Lowering Impl ------*- C++ -*--===// +//===-- M68kISelLowering.cpp - M68k DAG Lowering Impl -----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kISelLowering.h b/llvm/lib/Target/M68k/M68kISelLowering.h index 6a5a40a8815b..e8e0196d7423 100644 --- a/llvm/lib/Target/M68k/M68kISelLowering.h +++ b/llvm/lib/Target/M68k/M68kISelLowering.h @@ -1,4 +1,4 @@ -//===-- M68kISelLowering.h - M68k DAG Lowering Interface ----*- C++ -*-===// +//===-- M68kISelLowering.h - M68k DAG Lowering Interface --------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kInstrBits.td b/llvm/lib/Target/M68k/M68kInstrBits.td index d97ca50f74a9..5543bccd108c 100644 --- a/llvm/lib/Target/M68k/M68kInstrBits.td +++ b/llvm/lib/Target/M68k/M68kInstrBits.td @@ -1,4 +1,4 @@ -//===------- M68kInstrBits.td - Bit Manipulation Instrs --*- tablegen -*-===// +//===-- M68kInstrBits.td - Bit Manipulation Instrs ---------*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kInstrBuilder.h b/llvm/lib/Target/M68k/M68kInstrBuilder.h index e32b1b047a2b..435a437ede64 100644 --- a/llvm/lib/Target/M68k/M68kInstrBuilder.h +++ b/llvm/lib/Target/M68k/M68kInstrBuilder.h @@ -1,4 +1,4 @@ -//===-- M68kInstrBuilder.h - Functions to build M68k insts --*- C++ -*-===// +//===-- M68kInstrBuilder.h - Functions to build M68k insts ------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kInstrCompiler.td b/llvm/lib/Target/M68k/M68kInstrCompiler.td index 8fb331dec0e9..2ecf5ca0e6d0 100644 --- a/llvm/lib/Target/M68k/M68kInstrCompiler.td +++ b/llvm/lib/Target/M68k/M68kInstrCompiler.td @@ -1,4 +1,4 @@ -//===-- M68kInstrCompiler.td - Pseudos and Patterns ------*- tablegen -*-===// +//===-- M68kInstrCompiler.td - Pseudos and Patterns --------*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kInstrControl.td b/llvm/lib/Target/M68k/M68kInstrControl.td index 9f87833ab0e2..be9045b6e0d2 100644 --- a/llvm/lib/Target/M68k/M68kInstrControl.td +++ b/llvm/lib/Target/M68k/M68kInstrControl.td @@ -1,4 +1,4 @@ -//===-- M68kInstrControl.td - Control Flow Instructions --*- tablegen -*-===// +//===-- M68kInstrControl.td - Control Flow Instructions ----*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kInstrData.td b/llvm/lib/Target/M68k/M68kInstrData.td index 40b9e4a2a7fa..3dd5d9f8c7ac 100644 --- a/llvm/lib/Target/M68k/M68kInstrData.td +++ b/llvm/lib/Target/M68k/M68kInstrData.td @@ -1,4 +1,4 @@ -//== M68kInstrData.td - M68k Data Movement Instructions -*- tablegen --===// +//===-- M68kInstrData.td - M68k Data Movement Instructions -*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kInstrFormats.td b/llvm/lib/Target/M68k/M68kInstrFormats.td index 99b7ffd17971..7e0c96a5b1f6 100644 --- a/llvm/lib/Target/M68k/M68kInstrFormats.td +++ b/llvm/lib/Target/M68k/M68kInstrFormats.td @@ -1,4 +1,4 @@ -//=== M68kInstrFormats.td - M68k Instruction Formats ---*- tablegen -*-===// +//===-- M68kInstrFormats.td - M68k Instruction Formats -----*- tablegen -*-===// // The LLVM Compiler Infrastructure // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp index 639bcd455687..009ec52307f7 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp +++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp @@ -1,4 +1,4 @@ -//===-- M68kInstrInfo.cpp - M68k Instruction Information ----*- C++ -*-===// +//===-- M68kInstrInfo.cpp - M68k Instruction Information --------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.h b/llvm/lib/Target/M68k/M68kInstrInfo.h index 6aced1487365..06ae8a876151 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.h +++ b/llvm/lib/Target/M68k/M68kInstrInfo.h @@ -1,4 +1,4 @@ -//===-- M68kInstrInfo.h - M68k Instruction Information ------*- C++ -*-===// +//===-- M68kInstrInfo.h - M68k Instruction Information ----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.td b/llvm/lib/Target/M68k/M68kInstrInfo.td index ed6cd9ecf442..e2a7fb48ddc0 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.td +++ b/llvm/lib/Target/M68k/M68kInstrInfo.td @@ -1,4 +1,4 @@ -//== M68kInstrInfo.td - Main M68k Instruction Definition -*- tablegen -*-=// +//===-- M68kInstrInfo.td - Main M68k Instruction Definition -*- tablegen -*-==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kInstrShiftRotate.td b/llvm/lib/Target/M68k/M68kInstrShiftRotate.td index cab687638076..f1967ec11928 100644 --- a/llvm/lib/Target/M68k/M68kInstrShiftRotate.td +++ b/llvm/lib/Target/M68k/M68kInstrShiftRotate.td @@ -1,4 +1,4 @@ -//===------ M68kInstrShiftRotate.td - Logical Instrs -----*- tablegen -*-===// +//===-- M68kInstrShiftRotate.td - Logical Instrs -----------*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kMCInstLower.cpp b/llvm/lib/Target/M68k/M68kMCInstLower.cpp index f14361559b13..a0b1452ee663 100644 --- a/llvm/lib/Target/M68k/M68kMCInstLower.cpp +++ b/llvm/lib/Target/M68k/M68kMCInstLower.cpp @@ -1,4 +1,4 @@ -//===-- M68kMCInstLower.cpp - M68k MachineInstr to MCInst ---*- C++ -*-===// +//===-- M68kMCInstLower.cpp - M68k MachineInstr to MCInst -------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kMCInstLower.h b/llvm/lib/Target/M68k/M68kMCInstLower.h index d6160629545e..ef7729a7deae 100644 --- a/llvm/lib/Target/M68k/M68kMCInstLower.h +++ b/llvm/lib/Target/M68k/M68kMCInstLower.h @@ -1,4 +1,4 @@ -//===-- M68kMCInstLower.h - Lower MachineInstr to MCInst -----*- C++ -*--===// +//===-- M68kMCInstLower.h - Lower MachineInstr to MCInst --------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kMachineFunction.cpp b/llvm/lib/Target/M68k/M68kMachineFunction.cpp index 3d048df7ba49..b1e7369116d7 100644 --- a/llvm/lib/Target/M68k/M68kMachineFunction.cpp +++ b/llvm/lib/Target/M68k/M68kMachineFunction.cpp @@ -1,4 +1,4 @@ -//===-- M68kMachineFunctionInfo.cpp - M68k private data ----*- C++ -*--===// +//===-- M68kMachineFunctionInfo.cpp - M68k private data ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kMachineFunction.h b/llvm/lib/Target/M68k/M68kMachineFunction.h index 5760bdd4b9e3..fa2859ab7ed0 100644 --- a/llvm/lib/Target/M68k/M68kMachineFunction.h +++ b/llvm/lib/Target/M68k/M68kMachineFunction.h @@ -1,4 +1,4 @@ -//===-- M68kMachineFunctionInfo.h - M68k private data ---------*- C++ -*-=// +//===-- M68kMachineFunctionInfo.h - M68k private data -----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kRegisterInfo.cpp b/llvm/lib/Target/M68k/M68kRegisterInfo.cpp index 69d16035b1d9..0cae7ac4e312 100644 --- a/llvm/lib/Target/M68k/M68kRegisterInfo.cpp +++ b/llvm/lib/Target/M68k/M68kRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===-- M68kRegisterInfo.cpp - CPU0 Register Information -----*- C++ -*--===// +//===-- M68kRegisterInfo.cpp - CPU0 Register Information --------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kRegisterInfo.h b/llvm/lib/Target/M68k/M68kRegisterInfo.h index 51b94294772c..c15e9c1ac56b 100644 --- a/llvm/lib/Target/M68k/M68kRegisterInfo.h +++ b/llvm/lib/Target/M68k/M68kRegisterInfo.h @@ -1,4 +1,4 @@ -//===-- M68kRegisterInfo.h - M68k Register Information Impl --*- C++ --===// +//===-- M68kRegisterInfo.h - M68k Register Information Impl -----*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kRegisterInfo.td b/llvm/lib/Target/M68k/M68kRegisterInfo.td index e2ea2967f75b..49874a2b1099 100644 --- a/llvm/lib/Target/M68k/M68kRegisterInfo.td +++ b/llvm/lib/Target/M68k/M68kRegisterInfo.td @@ -1,4 +1,4 @@ -//== M68kRegisterInfo.td - M68k register definitions ----*- tablegen -*-==// +//==-- M68kRegisterInfo.td - M68k register definitions ------*- tablegen -*-==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kSchedule.td b/llvm/lib/Target/M68k/M68kSchedule.td index a94cd8f31e2e..6a1bf0c6a020 100644 --- a/llvm/lib/Target/M68k/M68kSchedule.td +++ b/llvm/lib/Target/M68k/M68kSchedule.td @@ -1,4 +1,4 @@ -//===-- M68kSchedule.td - M68k Scheduling Definitions --*- tablegen -*-===// +//===-- M68kSchedule.td - M68k Scheduling Definitions ------*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kSubtarget.cpp b/llvm/lib/Target/M68k/M68kSubtarget.cpp index 991889706e67..ec3830243daf 100644 --- a/llvm/lib/Target/M68k/M68kSubtarget.cpp +++ b/llvm/lib/Target/M68k/M68kSubtarget.cpp @@ -1,4 +1,4 @@ -//===-- M68kSubtarget.cpp - M68k Subtarget Information ------*- C++ -*-===// +//===-- M68kSubtarget.cpp - M68k Subtarget Information ----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kSubtarget.h b/llvm/lib/Target/M68k/M68kSubtarget.h index f45cb7edca1f..75cd8418fc0b 100644 --- a/llvm/lib/Target/M68k/M68kSubtarget.h +++ b/llvm/lib/Target/M68k/M68kSubtarget.h @@ -1,4 +1,4 @@ -//===-- M68kSubtarget.h - Define Subtarget for the M68k -----*- C++ -*-===// +//===-- M68kSubtarget.h - Define Subtarget for the M68k ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kTargetMachine.cpp b/llvm/lib/Target/M68k/M68kTargetMachine.cpp index e8126c6219e8..fd21fe6bcea8 100644 --- a/llvm/lib/Target/M68k/M68kTargetMachine.cpp +++ b/llvm/lib/Target/M68k/M68kTargetMachine.cpp @@ -1,4 +1,4 @@ -//===-- M68kTargetMachine.cpp - M68k target machine ---------*- C++ -*-===// +//===-- M68kTargetMachine.cpp - M68k Target Machine -------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kTargetMachine.h b/llvm/lib/Target/M68k/M68kTargetMachine.h index 34fae8e45504..5e27f1f5e3ba 100644 --- a/llvm/lib/Target/M68k/M68kTargetMachine.h +++ b/llvm/lib/Target/M68k/M68kTargetMachine.h @@ -1,4 +1,4 @@ -//===-- M68kTargetMachine.h - Define TargetMachine for M68k ----- C++ -===// +//===-- M68kTargetMachine.h - Define TargetMachine for M68k -----*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kTargetObjectFile.cpp b/llvm/lib/Target/M68k/M68kTargetObjectFile.cpp index 3e26b37e7760..4986d5dbebb9 100644 --- a/llvm/lib/Target/M68k/M68kTargetObjectFile.cpp +++ b/llvm/lib/Target/M68k/M68kTargetObjectFile.cpp @@ -1,4 +1,4 @@ -//===-- M68kELFTargetObjectFile.cpp - M68k Object Files -----*- C++ -*-===// +//===-- M68kELFTargetObjectFile.cpp - M68k Object Files ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/M68kTargetObjectFile.h b/llvm/lib/Target/M68k/M68kTargetObjectFile.h index dbc5375d5423..4c46cf8e63b8 100644 --- a/llvm/lib/Target/M68k/M68kTargetObjectFile.h +++ b/llvm/lib/Target/M68k/M68kTargetObjectFile.h @@ -1,4 +1,4 @@ -//===-- M68kELFTargetObjectFile.h - M68k Object Info ---------*- C++ -====// +//===-- M68kELFTargetObjectFile.h - M68k Object Info ------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp index c1f88fb78ee1..b66557ec6c3a 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp @@ -1,4 +1,4 @@ -//===-- M68kAsmBackend.cpp - M68k Assembler Backend ---------*- C++ -*-===// +//===-- M68kAsmBackend.cpp - M68k Assembler Backend -------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h index 7c56cfdf3123..3c280ca4f074 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h @@ -1,4 +1,4 @@ -//===-- M68kBaseInfo.h - Top level definitions for M68k MC --*- C++ -*-----===// +//===-- M68kBaseInfo.h - Top level definitions for M68k MC ------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp index 4c9a3297424d..27f1b3a3fac8 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp @@ -1,4 +1,4 @@ -//===---------- M68kELFObjectWriter.cpp - M68k ELF Writer ---*- C++ -*-===// +//===-- M68kELFObjectWriter.cpp - M68k ELF Writer ---------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kFixupKinds.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kFixupKinds.h index 2b760dec9e41..5ef9afd0c2d7 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kFixupKinds.h +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kFixupKinds.h @@ -1,4 +1,4 @@ -//===-- M68kFixupKinds.h - M68k Specific Fixup Entries ------*- C++ -*-===// +//===-- M68kFixupKinds.h - M68k Specific Fixup Entries ----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp index a2e41437ee21..9ba28622b5b5 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp @@ -1,4 +1,4 @@ -//===-- M68kInstPrinter.cpp - Convert M68k MCInst to asm ----*- C++ -*-===// +//===-- M68kInstPrinter.cpp - Convert M68k MCInst to asm --------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h index ec26bc4ddbfd..8b9d6514512d 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h @@ -1,4 +1,4 @@ -//===-- M68kInstPrinter.h - Convert M68k MCInst to asm ------*- C++ -*-===// +//===-- M68kInstPrinter.h - Convert M68k MCInst to asm ----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.cpp index ee2041012bb9..005d2d38f53d 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.cpp @@ -1,4 +1,4 @@ -//===-- M68kMCAsmInfo.cpp - M68k Asm Properties -------------*- C++ -*-===// +//===-- M68kMCAsmInfo.cpp - M68k Asm Properties -----------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.h index b3a58cc61223..562370012ea8 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.h +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.h @@ -1,4 +1,4 @@ -//===-- M68kMCAsmInfo.h - M68k Asm Info --------------------*- C++ -*--===// +//===-- M68kMCAsmInfo.h - M68k Asm Info -------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp index 9708abaadf98..9227bd6c3a78 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp @@ -1,4 +1,4 @@ -//===-- M68kMCCodeEmitter.cpp - Convert M68k code emitter ---*- C++ -*-===// +//===-- M68kMCCodeEmitter.cpp - Convert M68k code emitter -------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.h index 242a1297206a..2d0eb230cb6a 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.h +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.h @@ -1,4 +1,4 @@ -//===-- M68kMCCodeEmitter.h - M68k Code Emitter ----------------*- C++ -*--===// +//===-- M68kMCCodeEmitter.h - M68k Code Emitter -----------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp index 9f4db895a821..2606e22410fc 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- M68kMCTargetDesc.cpp - M68k Target Descriptions -----*- C++ -*-===// +//===-- M68kMCTargetDesc.cpp - M68k Target Descriptions ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h index a0ebca0ce36c..64193e5b30fe 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h @@ -1,4 +1,4 @@ -//===-- M68kMCTargetDesc.h - M68k Target Descriptions -------*- C++ -*-===// +//===-- M68kMCTargetDesc.h - M68k Target Descriptions -----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp b/llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp index 2a225b8a43cd..4701f46b0298 100644 --- a/llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp +++ b/llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp @@ -1,4 +1,4 @@ -//===-- M68kTargetInfo.cpp - M68k Target Implementation -----*- C++ -*-===// +//===-- M68kTargetInfo.cpp - M68k Target Implementation ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From 2692eae57428e1136ab58ac4004883245d0623ca Mon Sep 17 00:00:00 2001 From: Stanislav Funiak Date: Tue, 4 Jan 2022 08:03:18 +0530 Subject: [PATCH 467/992] [MLIR][PDL] Refactor the positions for multi-root patterns. When the original version of multi-root patterns was reviewed, several improvements were made to the pdl_interp operations during the review process. Specifically, the "get users of a value at the specified operand index" was split up into "get users" and "compare the users' operands with that value". The iterative execution was also cleaned up to `pdl_interp.foreach`. However, the positions in the pdl-to-pdl_interp lowering were not similarly refactored. This introduced several problems, including hard-to-detect bugs in the lowering and duplicate evaluation of `pdl_interp.get_users`. This diff cleans up the positions. The "upward" `OperationPosition` was split-out into `UsersPosition` and `ForEachPosition`, and the operand comparison was replaced with a simple predicate. In the process, I fixed three bugs: 1. When multiple roots were had the same connector (i.e., a node that they shared with a subtree at the previously visited root), we would generate a single foreach loop rather than one foreach loop for each such root. The reason for this is that such connectors shared the position. The solution for this is to add root index as an id to the newly introduced `ForEachPosition`. 2. Previously, we would use `pdl_interp.get_operands` indiscriminately, whether or not the operand was variadic. We now correctly detect variadic operands and insert `pdl_interp.get_operand` when needed. 3. In certain corner cases, we would trigger the "connector has not been traversed yet" assertion. This was caused by not inserting the values during the upward traversal correctly. This has now been fixed. Reviewed By: Mogball Differential Revision: https://reviews.llvm.org/D116080 --- .../PDLToPDLInterp/PDLToPDLInterp.cpp | 40 ++++---- .../Conversion/PDLToPDLInterp/Predicate.cpp | 4 +- .../lib/Conversion/PDLToPDLInterp/Predicate.h | 94 +++++++++++++------ .../PDLToPDLInterp/PredicateTree.cpp | 82 ++++++++++++---- .../pdl-to-pdl-interp-matcher.mlir | 87 ++++++++++++++++- 5 files changed, 233 insertions(+), 74 deletions(-) diff --git a/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp b/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp index 367bbb55ee1b..9362a29ddb6f 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp +++ b/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp @@ -248,45 +248,43 @@ Value PatternLowering::getValueAt(Block *¤tBlock, Position *pos) { switch (pos->getKind()) { case Predicates::OperationPos: { auto *operationPos = cast(pos); - if (!operationPos->isUpward()) { + if (operationPos->isOperandDefiningOp()) // Standard (downward) traversal which directly follows the defining op. value = builder.create( loc, builder.getType(), parentVal); - break; - } + else + // A passthrough operation position. + value = parentVal; + break; + } + case Predicates::UsersPos: { + auto *usersPos = cast(pos); // The first operation retrieves the representative value of a range. - // This applies only when the parent is a range of values. - if (parentVal.getType().isa()) + // This applies only when the parent is a range of values and we were + // requested to use a representative value (e.g., upward traversal). + if (parentVal.getType().isa() && + usersPos->useRepresentative()) value = builder.create(loc, parentVal, 0); else value = parentVal; // The second operation retrieves the users. value = builder.create(loc, value); - - // The third operation iterates over them. + break; + } + case Predicates::ForEachPos: { assert(!failureBlockStack.empty() && "expected valid failure block"); auto foreach = builder.create( - loc, value, failureBlockStack.back(), /*initLoop=*/true); + loc, parentVal, failureBlockStack.back(), /*initLoop=*/true); value = foreach.getLoopVariable(); - // Create the success and continuation blocks. - Block *successBlock = builder.createBlock(&foreach.region()); - Block *continueBlock = builder.createBlock(successBlock); + // Create the continuation block. + Block *continueBlock = builder.createBlock(&foreach.region()); builder.create(loc); failureBlockStack.push_back(continueBlock); - // The fourth operation extracts the operand(s) of the user at the specified - // index (which can be None, indicating all operands). - builder.setInsertionPointToStart(&foreach.region().front()); - Value operands = builder.create( - loc, parentVal.getType(), value, operationPos->getIndex()); - - // The fifth operation compares the operands to the parent value / range. - builder.create(loc, parentVal, operands, - successBlock, continueBlock); - currentBlock = successBlock; + currentBlock = &foreach.region().front(); break; } case Predicates::OperandPos: { diff --git a/mlir/lib/Conversion/PDLToPDLInterp/Predicate.cpp b/mlir/lib/Conversion/PDLToPDLInterp/Predicate.cpp index 07fa5c77c13f..a12f3171e7af 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/Predicate.cpp +++ b/mlir/lib/Conversion/PDLToPDLInterp/Predicate.cpp @@ -48,4 +48,6 @@ OperandGroupPosition::OperandGroupPosition(const KeyTy &key) : Base(key) { //===----------------------------------------------------------------------===// // OperationPosition -constexpr unsigned OperationPosition::kDown; +bool OperationPosition::isOperandDefiningOp() const { + return isa_and_nonnull(parent); +} diff --git a/mlir/lib/Conversion/PDLToPDLInterp/Predicate.h b/mlir/lib/Conversion/PDLToPDLInterp/Predicate.h index 266580bd41f5..1d723996f8c3 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/Predicate.h +++ b/mlir/lib/Conversion/PDLToPDLInterp/Predicate.h @@ -52,6 +52,8 @@ enum Kind : unsigned { TypePos, AttributeLiteralPos, TypeLiteralPos, + UsersPos, + ForEachPos, // Questions, ordered by dependency and decreasing priority. IsNotNullQuestion, @@ -185,6 +187,20 @@ struct AttributeLiteralPosition using PredicateBase::PredicateBase; }; +//===----------------------------------------------------------------------===// +// ForEachPosition + +/// A position describing an iterative choice of an operation. +struct ForEachPosition : public PredicateBase, + Predicates::ForEachPos> { + explicit ForEachPosition(const KeyTy &key) : Base(key) { parent = key.first; } + + /// Returns the ID, for differentiating various loops. + /// For upward traversals, this is the index of the root. + unsigned getID() const { return key.second; } +}; + //===----------------------------------------------------------------------===// // OperandPosition @@ -229,14 +245,11 @@ struct OperandGroupPosition /// An operation position describes an operation node in the IR. Other position /// kinds are formed with respect to an operation position. -struct OperationPosition - : public PredicateBase, unsigned>, - Predicates::OperationPos> { - static constexpr unsigned kDown = std::numeric_limits::max(); - +struct OperationPosition : public PredicateBase, + Predicates::OperationPos> { explicit OperationPosition(const KeyTy &key) : Base(key) { - parent = std::get<0>(key); + parent = key.first; } /// Returns a hash suitable for the given keytype. @@ -246,31 +259,22 @@ struct OperationPosition /// Gets the root position. static OperationPosition *getRoot(StorageUniquer &uniquer) { - return Base::get(uniquer, nullptr, kDown, 0); + return Base::get(uniquer, nullptr, 0); } - /// Gets an downward operation position with the given parent. + /// Gets an operation position with the given parent. static OperationPosition *get(StorageUniquer &uniquer, Position *parent) { - return Base::get(uniquer, parent, kDown, parent->getOperationDepth() + 1); - } - - /// Gets an upward operation position with the given parent and operand. - static OperationPosition *get(StorageUniquer &uniquer, Position *parent, - Optional operand) { - return Base::get(uniquer, parent, operand, parent->getOperationDepth() + 1); + return Base::get(uniquer, parent, parent->getOperationDepth() + 1); } - /// Returns the operand index for an upward operation position. - Optional getIndex() const { return std::get<1>(key); } - - /// Returns if this operation position is upward, accepting an input. - bool isUpward() const { return getIndex().getValueOr(0) != kDown; } - /// Returns the depth of this position. - unsigned getDepth() const { return std::get<2>(key); } + unsigned getDepth() const { return key.second; } /// Returns if this operation position corresponds to the root. bool isRoot() const { return getDepth() == 0; } + + /// Returns if this operation represents an operand defining op. + bool isOperandDefiningOp() const; }; //===----------------------------------------------------------------------===// @@ -340,6 +344,26 @@ struct TypeLiteralPosition using PredicateBase::PredicateBase; }; +//===----------------------------------------------------------------------===// +// UsersPosition + +/// A position describing the users of a value or a range of values. The second +/// value in the key indicates whether we choose users of a representative for +/// a range (this is true, e.g., in the upward traversals). +struct UsersPosition + : public PredicateBase, + Predicates::UsersPos> { + explicit UsersPosition(const KeyTy &key) : Base(key) { parent = key.first; } + + /// Returns a hash suitable for the given keytype. + static llvm::hash_code hashKey(const KeyTy &key) { + return llvm::hash_value(key); + } + + /// Indicates whether to compute a range of a representative. + bool useRepresentative() const { return key.second; } +}; + //===----------------------------------------------------------------------===// // Qualifiers //===----------------------------------------------------------------------===// @@ -496,6 +520,7 @@ class PredicateUniquer : public StorageUniquer { // Register the types of Positions with the uniquer. registerParametricStorageType(); registerParametricStorageType(); + registerParametricStorageType(); registerParametricStorageType(); registerParametricStorageType(); registerParametricStorageType(); @@ -503,6 +528,7 @@ class PredicateUniquer : public StorageUniquer { registerParametricStorageType(); registerParametricStorageType(); registerParametricStorageType(); + registerParametricStorageType(); // Register the types of Questions with the uniquer. registerParametricStorageType(); @@ -550,12 +576,10 @@ class PredicateBuilder { return OperationPosition::get(uniquer, p); } - /// Returns the position of operation using the value at the given index. - OperationPosition *getUsersOp(Position *p, Optional operand) { - assert((isa(p)) && - "expected result position"); - return OperationPosition::get(uniquer, p, operand); + /// Returns the operation position equivalent to the given position. + OperationPosition *getPassthroughOp(Position *p) { + assert((isa(p)) && "expected users position"); + return OperationPosition::get(uniquer, p); } /// Returns an attribute position for an attribute of the given operation. @@ -568,6 +592,10 @@ class PredicateBuilder { return AttributeLiteralPosition::get(uniquer, attr); } + Position *getForEach(Position *p, unsigned id) { + return ForEachPosition::get(uniquer, p, id); + } + /// Returns an operand position for an operand of the given operation. Position *getOperand(OperationPosition *p, unsigned operand) { return OperandPosition::get(uniquer, p, operand); @@ -605,6 +633,14 @@ class PredicateBuilder { return TypeLiteralPosition::get(uniquer, attr); } + /// Returns the users of a position using the value at the given operand. + UsersPosition *getUsers(Position *p, bool useRepresentative) { + assert((isa(p)) && + "expected result position"); + return UsersPosition::get(uniquer, p, useRepresentative); + } + //===--------------------------------------------------------------------===// // Qualifiers //===--------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp b/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp index 43c57a8e6033..24b2f19e58c2 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp +++ b/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp @@ -158,8 +158,11 @@ static void getTreePredicates(std::vector &predList, // group, we treat it as all of the operands/results of the operation. /// Operands. if (operands.size() == 1 && operands[0].getType().isa()) { - getTreePredicates(predList, operands.front(), builder, inputs, - builder.getAllOperands(opPos)); + // Ignore the operands if we are performing an upward traversal (in that + // case, they have already been visited). + if (opPos->isRoot() || opPos->isOperandDefiningOp()) + getTreePredicates(predList, operands.front(), builder, inputs, + builder.getAllOperands(opPos)); } else { bool foundVariableLength = false; for (const auto &operandIt : llvm::enumerate(operands)) { @@ -502,23 +505,47 @@ static void buildCostGraph(ArrayRef roots, RootOrderingGraph &graph, "the pattern contains a candidate root disconnected from the others"); } +/// Returns true if the operand at the given index needs to be queried using an +/// operand group, i.e., if it is variadic itself or follows a variadic operand. +static bool useOperandGroup(pdl::OperationOp op, unsigned index) { + OperandRange operands = op.operands(); + assert(index < operands.size() && "operand index out of range"); + for (unsigned i = 0; i <= index; ++i) + if (operands[i].getType().isa()) + return true; + return false; +} + /// Visit a node during upward traversal. -void visitUpward(std::vector &predList, OpIndex opIndex, - PredicateBuilder &builder, - DenseMap &valueToPosition, Position *&pos, - bool &first) { +static void visitUpward(std::vector &predList, + OpIndex opIndex, PredicateBuilder &builder, + DenseMap &valueToPosition, + Position *&pos, unsigned rootID) { Value value = opIndex.parent; TypeSwitch(value.getDefiningOp()) .Case([&](auto operationOp) { LLVM_DEBUG(llvm::dbgs() << " * Value: " << value << "\n"); - OperationPosition *opPos = builder.getUsersOp(pos, opIndex.index); - // Guard against traversing back to where we came from. - if (first) { - Position *parent = pos->getParent(); - predList.emplace_back(opPos, builder.getNotEqualTo(parent)); - first = false; + // Get users and iterate over them. + Position *usersPos = builder.getUsers(pos, /*useRepresentative=*/true); + Position *foreachPos = builder.getForEach(usersPos, rootID); + OperationPosition *opPos = builder.getPassthroughOp(foreachPos); + + // Compare the operand(s) of the user against the input value(s). + Position *operandPos; + if (!opIndex.index) { + // We are querying all the operands of the operation. + operandPos = builder.getAllOperands(opPos); + } else if (useOperandGroup(operationOp, *opIndex.index)) { + // We are querying an operand group. + Type type = operationOp.operands()[*opIndex.index].getType(); + bool variadic = type.isa(); + operandPos = builder.getOperandGroup(opPos, opIndex.index, variadic); + } else { + // We are querying an individual operand. + operandPos = builder.getOperand(opPos, *opIndex.index); } + predList.emplace_back(operandPos, builder.getEqualTo(pos)); // Guard against duplicate upward visits. These are not possible, // because if this value was already visited, it would have been @@ -540,6 +567,9 @@ void visitUpward(std::vector &predList, OpIndex opIndex, auto *opPos = dyn_cast(pos); assert(opPos && "operations and results must be interleaved"); pos = builder.getResult(opPos, *opIndex.index); + + // Insert the result position in case we have not visited it yet. + valueToPosition.try_emplace(value, pos); }) .Case([&](auto resultOp) { // Traverse up a group of results. @@ -550,6 +580,9 @@ void visitUpward(std::vector &predList, OpIndex opIndex, pos = builder.getResultGroup(opPos, opIndex.index, isVariadic); else pos = builder.getAllResults(opPos); + + // Insert the result position in case we have not visited it yet. + valueToPosition.try_emplace(value, pos); }); } @@ -568,7 +601,8 @@ static Value buildPredicateList(pdl::PatternOp pattern, LLVM_DEBUG({ llvm::dbgs() << "Graph:\n"; for (auto &target : graph) { - llvm::dbgs() << " * " << target.first << "\n"; + llvm::dbgs() << " * " << target.first.getLoc() << " " << target.first + << "\n"; for (auto &source : target.second) { RootOrderingEntry &entry = source.second; llvm::dbgs() << " <- " << source.first << ": " << entry.cost.first @@ -601,6 +635,17 @@ static Value buildPredicateList(pdl::PatternOp pattern, bestEdges = solver.preOrderTraversal(roots); } + // Print the best solution. + LLVM_DEBUG({ + llvm::dbgs() << "Best tree:\n"; + for (const std::pair &edge : bestEdges) { + llvm::dbgs() << " * " << edge.first; + if (edge.second) + llvm::dbgs() << " <- " << edge.second; + llvm::dbgs() << "\n"; + } + }); + LLVM_DEBUG(llvm::dbgs() << "Calling key getTreePredicates:\n"); LLVM_DEBUG(llvm::dbgs() << " * Value: " << bestRoot << "\n"); @@ -612,9 +657,9 @@ static Value buildPredicateList(pdl::PatternOp pattern, // Traverse the selected optimal branching. For all edges in order, traverse // up starting from the connector, until the candidate root is reached, and // call getTreePredicates at every node along the way. - for (const std::pair &edge : bestEdges) { - Value target = edge.first; - Value source = edge.second; + for (auto it : llvm::enumerate(bestEdges)) { + Value target = it.value().first; + Value source = it.value().second; // Check if we already visited the target root. This happens in two cases: // 1) the initial root (bestRoot); @@ -629,14 +674,13 @@ static Value buildPredicateList(pdl::PatternOp pattern, LLVM_DEBUG(llvm::dbgs() << " * Connector: " << connector.getLoc() << "\n"); DenseMap parentMap = parentMaps.lookup(target); Position *pos = valueToPosition.lookup(connector); - assert(pos && "The value has not been traversed yet"); - bool first = true; + assert(pos && "connector has not been traversed yet"); // Traverse from the connector upwards towards the target root. for (Value value = connector; value != target;) { OpIndex opIndex = parentMap.lookup(value); assert(opIndex.parent && "missing parent"); - visitUpward(predList, opIndex, builder, valueToPosition, pos, first); + visitUpward(predList, opIndex, builder, valueToPosition, pos, it.index()); value = opIndex.parent; } } diff --git a/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-matcher.mlir b/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-matcher.mlir index 984a31790a8b..fd6cfe5fa7c5 100644 --- a/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-matcher.mlir +++ b/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-matcher.mlir @@ -423,8 +423,8 @@ module @multi_root { // CHECK-DAG: %[[OP1:.*]] = pdl_interp.get_defining_op of %[[VAL1]] // CHECK-DAG: %[[OPS:.*]] = pdl_interp.get_users of %[[VAL1]] : !pdl.value // CHECK-DAG: pdl_interp.foreach %[[ROOT2:.*]] : !pdl.operation in %[[OPS]] - // CHECK-DAG: %[[OPERANDS:.*]] = pdl_interp.get_operands 0 of %[[ROOT2]] - // CHECK-DAG: pdl_interp.are_equal %[[VAL1]], %[[OPERANDS]] : !pdl.value -> ^{{.*}}, ^[[CONTINUE:.*]] + // CHECK-DAG: %[[OPERANDS:.*]] = pdl_interp.get_operand 0 of %[[ROOT2]] + // CHECK-DAG: pdl_interp.are_equal %[[OPERANDS]], %[[VAL1]] : !pdl.value -> ^{{.*}}, ^[[CONTINUE:.*]] // CHECK-DAG: pdl_interp.continue // CHECK-DAG: %[[VAL2:.*]] = pdl_interp.get_operand 1 of %[[ROOT2]] // CHECK-DAG: %[[OP2:.*]] = pdl_interp.get_defining_op of %[[VAL2]] @@ -433,7 +433,6 @@ module @multi_root { // CHECK-DAG: pdl_interp.is_not_null %[[VAL1]] : !pdl.value // CHECK-DAG: pdl_interp.is_not_null %[[VAL2]] : !pdl.value // CHECK-DAG: pdl_interp.is_not_null %[[ROOT2]] : !pdl.operation - // CHECK-DAG: pdl_interp.are_equal %[[ROOT2]], %[[ROOT1]] : !pdl.operation -> ^[[CONTINUE]] pdl.pattern @rewrite_multi_root : benefit(1) { %input1 = pdl.operand @@ -556,7 +555,7 @@ module @variadic_results_at { // CHECK-DAG: %[[ROOTS2:.*]] = pdl_interp.get_users of %[[VAL0]] : !pdl.value // CHECK-DAG: pdl_interp.foreach %[[ROOT2:.*]] : !pdl.operation in %[[ROOTS2]] { // CHECK-DAG: %[[OPERANDS:.*]] = pdl_interp.get_operands 1 of %[[ROOT2]] - // CHECK-DAG: pdl_interp.are_equal %[[VALS]], %[[OPERANDS]] : !pdl.range -> ^{{.*}}, ^[[CONTINUE:.*]] + // CHECK-DAG: pdl_interp.are_equal %[[OPERANDS]], %[[VALS]] : !pdl.range -> ^{{.*}}, ^[[CONTINUE:.*]] // CHECK-DAG: pdl_interp.is_not_null %[[ROOT2]] // CHECK-DAG: pdl_interp.check_operand_count of %[[ROOT2]] is at_least 1 // CHECK-DAG: pdl_interp.check_result_count of %[[ROOT2]] is 0 @@ -612,3 +611,83 @@ module @type_literal { } } +// ----- + +// CHECK-LABEL: module @common_connector +module @common_connector { + // Check the correct lowering when multiple roots are using the same + // connector. + + // CHECK: func @matcher(%[[ROOTC:.*]]: !pdl.operation) + // CHECK-DAG: %[[VAL2:.*]] = pdl_interp.get_operand 0 of %[[ROOTC]] + // CHECK-DAG: %[[INTER:.*]] = pdl_interp.get_defining_op of %[[VAL2]] : !pdl.value + // CHECK-DAG: pdl_interp.is_not_null %[[INTER]] : !pdl.operation -> ^bb2, ^bb1 + // CHECK-DAG: %[[VAL1:.*]] = pdl_interp.get_operand 0 of %[[INTER]] + // CHECK-DAG: %[[OP:.*]] = pdl_interp.get_defining_op of %[[VAL1]] : !pdl.value + // CHECK-DAG: pdl_interp.is_not_null %[[OP]] + // CHECK-DAG: %[[VAL0:.*]] = pdl_interp.get_result 0 of %[[OP]] + // CHECK-DAG: %[[ROOTS:.*]] = pdl_interp.get_users of %[[VAL0]] : !pdl.value + // CHECK-DAG: pdl_interp.foreach %[[ROOTA:.*]] : !pdl.operation in %[[ROOTS]] { + // CHECK-DAG: pdl_interp.is_not_null %[[ROOTA]] : !pdl.operation -> ^{{.*}}, ^[[CONTA:.*]] + // CHECK-DAG: pdl_interp.continue + // CHECK-DAG: pdl_interp.foreach %[[ROOTB:.*]] : !pdl.operation in %[[ROOTS]] { + // CHECK-DAG: pdl_interp.is_not_null %[[ROOTB]] : !pdl.operation -> ^{{.*}}, ^[[CONTB:.*]] + // CHECK-DAG: %[[ROOTA_OP:.*]] = pdl_interp.get_operand 0 of %[[ROOTA]] + // CHECK-DAG: pdl_interp.are_equal %[[ROOTA_OP]], %[[VAL0]] : !pdl.value + // CHECK-DAG: %[[ROOTB_OP:.*]] = pdl_interp.get_operand 0 of %[[ROOTB]] + // CHECK-DAG: pdl_interp.are_equal %[[ROOTB_OP]], %[[VAL0]] : !pdl.value + // CHECK-DAG } -> ^[[CONTA:.*]] + pdl.pattern @common_connector : benefit(1) { + %type = pdl.type + %op = pdl.operation -> (%type, %type : !pdl.type, !pdl.type) + %val0 = pdl.result 0 of %op + %val1 = pdl.result 1 of %op + %rootA = pdl.operation (%val0 : !pdl.value) + %rootB = pdl.operation (%val0 : !pdl.value) + %inter = pdl.operation (%val1 : !pdl.value) -> (%type : !pdl.type) + %val2 = pdl.result 0 of %inter + %rootC = pdl.operation (%val2 : !pdl.value) + pdl.rewrite with "rewriter"(%rootA, %rootB, %rootC : !pdl.operation, !pdl.operation, !pdl.operation) + } +} + +// ----- + +// CHECK-LABEL: module @common_connector_range +module @common_connector_range { + // Check the correct lowering when multiple roots are using the same + // connector range. + + // CHECK: func @matcher(%[[ROOTC:.*]]: !pdl.operation) + // CHECK-DAG: %[[VALS2:.*]] = pdl_interp.get_operands of %[[ROOTC]] : !pdl.range + // CHECK-DAG: %[[INTER:.*]] = pdl_interp.get_defining_op of %[[VALS2]] : !pdl.range + // CHECK-DAG: pdl_interp.is_not_null %[[INTER]] : !pdl.operation -> ^bb2, ^bb1 + // CHECK-DAG: %[[VALS1:.*]] = pdl_interp.get_operands of %[[INTER]] : !pdl.range + // CHECK-DAG: %[[OP:.*]] = pdl_interp.get_defining_op of %[[VALS1]] : !pdl.range + // CHECK-DAG: pdl_interp.is_not_null %[[OP]] + // CHECK-DAG: %[[VALS0:.*]] = pdl_interp.get_results 0 of %[[OP]] + // CHECK-DAG: %[[VAL0:.*]] = pdl_interp.extract 0 of %[[VALS0]] : !pdl.value + // CHECK-DAG: %[[ROOTS:.*]] = pdl_interp.get_users of %[[VAL0]] : !pdl.value + // CHECK-DAG: pdl_interp.foreach %[[ROOTA:.*]] : !pdl.operation in %[[ROOTS]] { + // CHECK-DAG: pdl_interp.is_not_null %[[ROOTA]] : !pdl.operation -> ^{{.*}}, ^[[CONTA:.*]] + // CHECK-DAG: pdl_interp.continue + // CHECK-DAG: pdl_interp.foreach %[[ROOTB:.*]] : !pdl.operation in %[[ROOTS]] { + // CHECK-DAG: pdl_interp.is_not_null %[[ROOTB]] : !pdl.operation -> ^{{.*}}, ^[[CONTB:.*]] + // CHECK-DAG: %[[ROOTA_OPS:.*]] = pdl_interp.get_operands of %[[ROOTA]] + // CHECK-DAG: pdl_interp.are_equal %[[ROOTA_OPS]], %[[VALS0]] : !pdl.range + // CHECK-DAG: %[[ROOTB_OPS:.*]] = pdl_interp.get_operands of %[[ROOTB]] + // CHECK-DAG: pdl_interp.are_equal %[[ROOTB_OPS]], %[[VALS0]] : !pdl.range + // CHECK-DAG } -> ^[[CONTA:.*]] + pdl.pattern @common_connector_range : benefit(1) { + %types = pdl.types + %op = pdl.operation -> (%types, %types : !pdl.range, !pdl.range) + %vals0 = pdl.results 0 of %op -> !pdl.range + %vals1 = pdl.results 1 of %op -> !pdl.range + %rootA = pdl.operation (%vals0 : !pdl.range) + %rootB = pdl.operation (%vals0 : !pdl.range) + %inter = pdl.operation (%vals1 : !pdl.range) -> (%types : !pdl.range) + %vals2 = pdl.results of %inter + %rootC = pdl.operation (%vals2 : !pdl.range) + pdl.rewrite with "rewriter"(%rootA, %rootB, %rootC : !pdl.operation, !pdl.operation, !pdl.operation) + } +} From 138803e017739c81b43b73631c7096bfc4d097d8 Mon Sep 17 00:00:00 2001 From: Stanislav Funiak Date: Tue, 4 Jan 2022 08:03:26 +0530 Subject: [PATCH 468/992] [MLIR][PDL] Make predicate order deterministic. The tree merging of pattern predicates places the predicates in an unordered set. When the predicates are sorted, they are taken in the set order, not the insertion order. This results in nondeterministic behavior. One solution to this problem would be to use `SetVector`. However, the value `SetVector` does not provide a `find` function for fast O(1) lookups and stores the predicates twice -- once in the set and once in the vector, which is undesirable, because we store patternToAnswer in each predicate. A simpler solution is to store the tie breaking ID (which follows the insertion order), and use this ID to break any ties when comparing predicates. Reviewed By: Mogball Differential Revision: https://reviews.llvm.org/D116081 --- .../PDLToPDLInterp/PredicateTree.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp b/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp index 24b2f19e58c2..9fd5de11a83d 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp +++ b/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp @@ -721,6 +721,11 @@ struct OrderedPredicate { /// opposed to those shared across patterns. unsigned secondary = 0; + /// The tie breaking ID, used to preserve a deterministic (insertion) order + /// among all the predicates with the same priority, depth, and position / + /// predicate dependency. + unsigned id = 0; + /// A map between a pattern operation and the answer to the predicate question /// within that pattern. DenseMap patternToAnswer; @@ -733,12 +738,13 @@ struct OrderedPredicate { // * lower depth // * lower position dependency // * lower predicate dependency + // * lower tie breaking ID auto *rhsPos = rhs.position; return std::make_tuple(primary, secondary, rhsPos->getOperationDepth(), - rhsPos->getKind(), rhs.question->getKind()) > + rhsPos->getKind(), rhs.question->getKind(), rhs.id) > std::make_tuple(rhs.primary, rhs.secondary, position->getOperationDepth(), position->getKind(), - question->getKind()); + question->getKind(), id); } }; @@ -903,6 +909,9 @@ MatcherNode::generateMatcherTree(ModuleOp module, PredicateBuilder &builder, auto it = uniqued.insert(predicate); it.first->patternToAnswer.try_emplace(patternAndPredList.pattern, predicate.answer); + // Mark the insertion order (0-based indexing). + if (it.second) + it.first->id = uniqued.size() - 1; } } @@ -939,9 +948,9 @@ MatcherNode::generateMatcherTree(ModuleOp module, PredicateBuilder &builder, ordered.reserve(uniqued.size()); for (auto &ip : uniqued) ordered.push_back(&ip); - std::stable_sort( - ordered.begin(), ordered.end(), - [](OrderedPredicate *lhs, OrderedPredicate *rhs) { return *lhs < *rhs; }); + llvm::sort(ordered, [](OrderedPredicate *lhs, OrderedPredicate *rhs) { + return *lhs < *rhs; + }); // Build the matchers for each of the pattern predicate lists. std::unique_ptr root; From b4130e9eadfe46b4d3380c40ce8c3e900a0fd21b Mon Sep 17 00:00:00 2001 From: Stanislav Funiak Date: Tue, 4 Jan 2022 08:03:29 +0530 Subject: [PATCH 469/992] [MLIR][PDL] Integration test of multi-root matching and related fixes. This diff adds an integration test to multi-root PDL matching. It consists of two subtests: 1) A 1-layer perceptron with split forward / backward operations. 2) A 2-layer perceptron with fused forward / backward operations. These tests use a collection of hand-written patterns and TensorFlow operations to be matched. The first test has a DAG / SSA dominant resulting match; the second does not and is therefore stored in a graph region. This diff also includes two bug fixes: 1) Mark the pdl_interp dialect as a dependent in the TestPDLByteCodePass. This is needed, because we create ops from that dialect as a part of the PDL-to-PDLInterp lowering. 2) Fix of the starting index in the liveness range for the ForEach operations (bug exposed by the integration test). Reviewed By: Mogball Differential Revision: https://reviews.llvm.org/D116082 --- mlir/lib/Rewrite/ByteCode.cpp | 24 +- .../Dialect/PDL/CPU/multiroot.mlir | 294 ++++++++++++++++++ mlir/test/lib/Rewrite/TestPDLByteCode.cpp | 6 + 3 files changed, 318 insertions(+), 6 deletions(-) create mode 100644 mlir/test/Integration/Dialect/PDL/CPU/multiroot.mlir diff --git a/mlir/lib/Rewrite/ByteCode.cpp b/mlir/lib/Rewrite/ByteCode.cpp index 765c47b2ed0c..d6a07f9067fe 100644 --- a/mlir/lib/Rewrite/ByteCode.cpp +++ b/mlir/lib/Rewrite/ByteCode.cpp @@ -551,10 +551,22 @@ void Generator::allocateMemoryIndices(FuncOp matcherFunc, // finding the minimal number of overlapping live ranges. This is essentially // a simplified form of register allocation where we don't necessarily have a // limited number of registers, but we still want to minimize the number used. - DenseMap opToIndex; - matcherFunc.getBody().walk([&](Operation *op) { - opToIndex.insert(std::make_pair(op, opToIndex.size())); - }); + DenseMap opToFirstIndex; + DenseMap opToLastIndex; + + // A custom walk that marks the first and the last index of each operation. + // The entry marks the beginning of the liveness range for this operation, + // followed by nested operations, followed by the end of the liveness range. + unsigned index = 0; + llvm::unique_function walk = [&](Operation *op) { + opToFirstIndex.try_emplace(op, index++); + for (Region ®ion : op->getRegions()) + for (Block &block : region.getBlocks()) + for (Operation &nested : block) + walk(&nested); + opToLastIndex.try_emplace(op, index++); + }; + walk(matcherFunc); // Liveness info for each of the defs within the matcher. ByteCodeLiveRange::Allocator allocator; @@ -578,8 +590,8 @@ void Generator::allocateMemoryIndices(FuncOp matcherFunc, // Set indices for the range of this block that the value is used. auto defRangeIt = valueDefRanges.try_emplace(value, allocator).first; defRangeIt->second.liveness->insert( - opToIndex[firstUseOrDef], - opToIndex[info->getEndOperation(value, firstUseOrDef)], + opToFirstIndex[firstUseOrDef], + opToLastIndex[info->getEndOperation(value, firstUseOrDef)], /*dummyValue*/ 0); // Check to see if this value is a range type. diff --git a/mlir/test/Integration/Dialect/PDL/CPU/multiroot.mlir b/mlir/test/Integration/Dialect/PDL/CPU/multiroot.mlir new file mode 100644 index 000000000000..be496ed3a675 --- /dev/null +++ b/mlir/test/Integration/Dialect/PDL/CPU/multiroot.mlir @@ -0,0 +1,294 @@ +// RUN: mlir-opt %s -allow-unregistered-dialect -test-pdl-bytecode-pass -split-input-file | FileCheck %s + +// ----- + +//===----------------------------------------------------------------------===// +// 1-layer perceptron with split fwd/bwd operations +//===----------------------------------------------------------------------===// + +module @patterns { + // fc_fwd + pdl.pattern : benefit(1) { + %in_type = pdl.type + %out_type = pdl.type + %weight_type = pdl.type + %rxact = pdl.operand : %in_type + %weight = pdl.operand : %weight_type + + %attr0 = pdl.attribute false + %op0 = pdl.operation "tf.MatMul" (%rxact, %weight : !pdl.value, !pdl.value) {"transpose_a" = %attr0, "transpose_b" = %attr0} -> (%out_type : !pdl.type) + + pdl.rewrite %op0 { + %op1 = pdl.operation "kernel.FcFwd" (%rxact, %weight : !pdl.value, !pdl.value) -> (%out_type : !pdl.type) + %val1 = pdl.result 0 of %op1 // txact + pdl.replace %op0 with (%val1 : !pdl.value) // tf.MatMul + } + } + + // fc_bwd + pdl.pattern : benefit(4) { + %in_type = pdl.type + %out_type = pdl.type + %weight_type = pdl.type + %const_type = pdl.type + %rxact = pdl.operand : %in_type + %rxdelta = pdl.operand : %out_type + %weight = pdl.operand : %weight_type + + %attr0 = pdl.attribute true + %attr1 = pdl.attribute false + %op0 = pdl.operation "tf.MatMul" (%rxact, %rxdelta : !pdl.value, !pdl.value) {"transpose_a" = %attr0, "transpose_b" = %attr1} -> (%weight_type : !pdl.type) + %val0 = pdl.result 0 of %op0 + %op1 = pdl.operation "tf.Const" -> (%const_type : !pdl.type) + %val1 = pdl.result 0 of %op1 + %op2 = pdl.operation "tf.Mul" (%val0, %val1 : !pdl.value, !pdl.value) -> (%weight_type : !pdl.type) + %val2 = pdl.result 0 of %op2 + %op3 = pdl.operation "tf.Sub" (%weight, %val2 : !pdl.value, !pdl.value) -> (%weight_type : !pdl.type) + + pdl.rewrite %op3 { + %op4 = pdl.operation "kernel.FcBwd" (%rxact, %rxdelta, %weight : !pdl.value, !pdl.value, !pdl.value) -> (%weight_type : !pdl.type) + %val4 = pdl.result 0 of %op4 // weight_out + pdl.replace %op3 with (%val4 : !pdl.value) // tf.Sub + pdl.erase %op2 // tf.Mul + pdl.erase %op1 // tf.Const + pdl.erase %op0 // tf.MatMul + } + } + + // softmax_cross_entropy + pdl.pattern : benefit(6) { + %in_type = pdl.type + %label_type = pdl.type + %loss_type = pdl.type + %mean_loss_type = pdl.type + %mean_const_type = pdl.type + %mul_const_type = pdl.type + %rxact = pdl.operand : %in_type + %rxlabel = pdl.operand : %label_type + + %op0 = pdl.operation "tf.SparseSoftmaxCrossEntropyWithLogits" (%rxact, %rxlabel : !pdl.value, !pdl.value) -> (%loss_type, %in_type : !pdl.type, !pdl.type) + %val0_0 = pdl.result 0 of %op0 // loss + %val0_1 = pdl.result 1 of %op0 // gradient + %op1 = pdl.operation "tf.Const" -> (%mean_const_type : !pdl.type) + %val1 = pdl.result 0 of %op1 + %op2 = pdl.operation "tf.Mean" (%val0_0, %val1 : !pdl.value, !pdl.value) -> (%mean_loss_type : !pdl.type) + %val2 = pdl.result 0 of %op2 + %op3 = pdl.operation "tf.PreventGradient" (%val0_1 : !pdl.value) -> (%in_type : !pdl.type) + %val3 = pdl.result 0 of %op3 + %op4 = pdl.operation "tf.Const" -> (%mul_const_type : !pdl.type) + %val4 = pdl.result 0 of %op4 + %op5 = pdl.operation "tf.Mul" (%val3, %val4 : !pdl.value, !pdl.value) -> (%in_type : !pdl.type) + + pdl.rewrite { // roots: %op2, %op5 + %op6 = pdl.operation "kernel.SoftmaxCrossEntropy" (%rxact, %rxlabel : !pdl.value, !pdl.value) -> (%mean_loss_type, %in_type : !pdl.type, !pdl.type) + %val6_0 = pdl.result 0 of %op6 // txloss + %val6_1 = pdl.result 1 of %op6 // txdelta + pdl.replace %op5 with (%val6_1 : !pdl.value) // tf.Mul + pdl.erase %op4 // tf.Const + pdl.erase %op3 // tf.PreventGradient + pdl.replace %op2 with (%val6_0 : !pdl.value) // tf.Mean + pdl.erase %op1 // tf.Const + pdl.erase %op0 // tf.SparseSoftmaxCrossEntropyWithLogits + } + } +} + +// CHECK-LABEL: test.mlp_split +// CHECK: %[[FWD:.*]] = "kernel.FcFwd"(%arg0, %arg2) : (tensor<2x20xf32>, tensor<20x10xf32>) -> tensor<2x10xf32> +// CHECK: %[[SM:.*]]:2 = "kernel.SoftmaxCrossEntropy"(%[[FWD]], %arg1) : (tensor<2x10xf32>, tensor<2xi32>) -> (tensor, tensor<2x10xf32>) +// CHECK: %[[BWD:.*]] = "kernel.FcBwd"(%arg0, %[[SM]]#1, %arg2) : (tensor<2x20xf32>, tensor<2x10xf32>, tensor<20x10xf32>) -> tensor<20x10xf32> +// CHECK: return %[[SM:.*]]#0, %[[BWD]] : tensor, tensor<20x10xf32> +module @ir attributes { test.mlp_split } { + func @main(%arg0: tensor<2x20xf32>, %arg1: tensor<2xi32>, %arg2: tensor<20x10xf32>) -> (tensor, tensor<20x10xf32>) { + %0 = "tf.Const"() {value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32> + %1 = "tf.Const"() {value = dense<1.000000e-01> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<5.000000e-01> : tensor<2x1xf32>} : () -> tensor<2x1xf32> + %3 = "tf.MatMul"(%arg0, %arg2) {transpose_a = false, transpose_b = false} : (tensor<2x20xf32>, tensor<20x10xf32>) -> tensor<2x10xf32> + %loss, %backprop = "tf.SparseSoftmaxCrossEntropyWithLogits"(%3, %arg1) : (tensor<2x10xf32>, tensor<2xi32>) -> (tensor<2xf32>, tensor<2x10xf32>) + %4 = "tf.Mean"(%loss, %0) {keep_dims = false} : (tensor<2xf32>, tensor<1xi32>) -> tensor + %5 = "tf.PreventGradient"(%backprop) : (tensor<2x10xf32>) -> tensor<2x10xf32> + %6 = "tf.Mul"(%5, %2) : (tensor<2x10xf32>, tensor<2x1xf32>) -> tensor<2x10xf32> + %7 = "tf.MatMul"(%arg0, %6) {transpose_a = true, transpose_b = false} : (tensor<2x20xf32>, tensor<2x10xf32>) -> tensor<20x10xf32> + %8 = "tf.Mul"(%7, %1) : (tensor<20x10xf32>, tensor) -> tensor<20x10xf32> + %9 = "tf.Sub"(%arg2, %8) : (tensor<20x10xf32>, tensor<20x10xf32>) -> tensor<20x10xf32> + return %4, %9 : tensor, tensor<20x10xf32> + } +} + +// ----- + +//===----------------------------------------------------------------------===// +// 2-layer perceptron with fused fwd/bwd operations +//===----------------------------------------------------------------------===// + +module @patterns { + + // gradient descent + pdl.pattern : benefit(3) { + %const_type = pdl.type + %param_type = pdl.type + %param = pdl.operand : %param_type + %gradient = pdl.operand : %param_type + + %attr0 = pdl.attribute + %op0 = pdl.operation "tf.Const" {"value" = %attr0} -> (%const_type : !pdl.type) + %val0 = pdl.result 0 of %op0 + %op1 = pdl.operation "tf.Mul" (%gradient, %val0 : !pdl.value, !pdl.value) -> (%param_type : !pdl.type) + %val1 = pdl.result 0 of %op1 + %op2 = pdl.operation "tf.Sub" (%param, %val1 : !pdl.value, !pdl.value) -> (%param_type : !pdl.type) + + pdl.rewrite %op2 { + %op3 = pdl.operation "kernel.GD" (%param, %gradient : !pdl.value, !pdl.value) -> (%param_type : !pdl.type) + %val3 = pdl.result 0 of %op3 + pdl.replace %op2 with (%val3 : !pdl.value) // tf.Sub + pdl.erase %op1 // tf.Mul + } + } + + // first FC + pdl.pattern : benefit(8) { + %in_type = pdl.type + %out_type = pdl.type + %weight_type = pdl.type + %bias_type = pdl.type + %rxact = pdl.operand : %in_type + %rxdelta = pdl.operand : %out_type + %weight = pdl.operand : %weight_type + %bias = pdl.operand : %bias_type + + %attr0 = pdl.attribute false + %op0 = pdl.operation "tf.MatMul" (%rxact, %weight : !pdl.value, !pdl.value) {"transpose_a" = %attr0, "transpose_b" = %attr0} -> (%out_type : !pdl.type) + %val0 = pdl.result 0 of %op0 + %op1 = pdl.operation "tf.BiasAdd" (%val0, %bias : !pdl.value, !pdl.value) -> (%out_type : !pdl.type) + %val1 = pdl.result 0 of %op1 + %op2 = pdl.operation "tf.Relu" (%val1 : !pdl.value) -> (%out_type : !pdl.type) + %val2 = pdl.result 0 of %op2 + %op3 = pdl.operation "tf.ReluGrad" (%rxdelta, %val2 : !pdl.value, !pdl.value) -> (%out_type : !pdl.type) + %val3 = pdl.result 0 of %op3 + %attr1 = pdl.attribute true + %op4 = pdl.operation "tf.MatMul" (%rxact, %val3 : !pdl.value, !pdl.value) {"transpose_a" = %attr1, "transpose_b" = %attr0} -> (%weight_type : !pdl.type) + %val4 = pdl.result 0 of %op4 + %op5 = pdl.operation "kernel.GD" (%weight, %val4 : !pdl.value, !pdl.value) -> (%weight_type : !pdl.type) + %op6 = pdl.operation "tf.BiasAddGrad" (%val3 : !pdl.value) -> (%bias_type : !pdl.type) + %val6 = pdl.result 0 of %op6 + %op7 = pdl.operation "kernel.GD" (%bias, %val6 : !pdl.value, !pdl.value) -> (%bias_type : !pdl.type) + + pdl.rewrite { // roots: %op2, %op5, %op7 + %op8 = pdl.operation "kernel.FcWithBias" (%rxact, %rxdelta, %weight, %bias : !pdl.value, !pdl.value, !pdl.value, !pdl.value) -> (%out_type, %weight_type, %bias_type : !pdl.type, !pdl.type, !pdl.type) + %val8_0 = pdl.result 0 of %op8 // txact + %val8_1 = pdl.result 1 of %op8 // weight_out + %val8_2 = pdl.result 2 of %op8 // bias_out + pdl.replace %op7 with (%val8_2 : !pdl.value) // kernel.GD + pdl.erase %op6 // tf.BiasAddGrad + pdl.replace %op5 with (%val8_1 : !pdl.value) // kernel.GD + pdl.erase %op4 // tf.MatMul + pdl.erase %op3 // tf.ReluGrad + pdl.replace %op2 with (%val8_0 : !pdl.value) // tf.Relu + pdl.erase %op1 // tf.BiasAdd + pdl.erase %op0 // tf.MatMul + } + } + + // second FC + pdl.pattern : benefit(4) { + %in_type = pdl.type + %out_type = pdl.type + %weight_type = pdl.type + %rxact = pdl.operand : %in_type + %rxdelta = pdl.operand : %out_type + %weight = pdl.operand : %weight_type + + %attr0 = pdl.attribute false + %op0 = pdl.operation "tf.MatMul" (%rxact, %weight : !pdl.value, !pdl.value) {"transpose_a" = %attr0, "transpose_b" = %attr0} -> (%out_type : !pdl.type) + %attr1 = pdl.attribute true + %op1 = pdl.operation "tf.MatMul" (%rxdelta, %weight : !pdl.value, !pdl.value) {"transpose_a" = %attr0, "transpose_b" = %attr1} -> (%in_type : !pdl.type) + %op2 = pdl.operation "tf.MatMul" (%rxact, %rxdelta : !pdl.value, !pdl.value) {"transpose_a" = %attr1, "transpose_b" = %attr0} -> (%weight_type : !pdl.type) + %val2 = pdl.result 0 of %op2 + %op3 = pdl.operation "kernel.GD" (%weight, %val2 : !pdl.value, !pdl.value) -> (%weight_type : !pdl.type) + + pdl.rewrite { // roots: %op0, %op1, %op3 + %op4 = pdl.operation "kernel.Fc" (%rxact, %rxdelta, %weight : !pdl.value, !pdl.value, !pdl.value) -> (%out_type, %in_type, %weight_type : !pdl.type, !pdl.type, !pdl.type) + %val4_0 = pdl.result 0 of %op4 // txact + %val4_1 = pdl.result 1 of %op4 // txdelta + %val4_2 = pdl.result 2 of %op4 // weight_out + pdl.replace %op3 with (%val4_2 : !pdl.value) // Sgd + pdl.erase %op2 // tf.MatMul + pdl.replace %op1 with (%val4_1 : !pdl.value) // tf.MatMul + pdl.replace %op0 with (%val4_0 : !pdl.value) // tf.MatMul + } + } + + // softmax_cross_entropy + pdl.pattern : benefit(6) { + %in_type = pdl.type + %label_type = pdl.type + %loss_type = pdl.type + %mean_loss_type = pdl.type + %mean_const_type = pdl.type + %mul_const_type = pdl.type + %rxact = pdl.operand : %in_type + %rxlabel = pdl.operand : %label_type + + %op0 = pdl.operation "tf.SparseSoftmaxCrossEntropyWithLogits" (%rxact, %rxlabel : !pdl.value, !pdl.value) -> (%loss_type, %in_type : !pdl.type, !pdl.type) + %val0_0 = pdl.result 0 of %op0 // loss + %val0_1 = pdl.result 1 of %op0 // gradient + %op1 = pdl.operation "tf.Const" -> (%mean_const_type : !pdl.type) + %val1 = pdl.result 0 of %op1 + %op2 = pdl.operation "tf.Mean" (%val0_0, %val1 : !pdl.value, !pdl.value) -> (%mean_loss_type : !pdl.type) + %val2 = pdl.result 0 of %op2 + %op3 = pdl.operation "tf.PreventGradient" (%val0_1 : !pdl.value) -> (%in_type : !pdl.type) + %val3 = pdl.result 0 of %op3 + %op4 = pdl.operation "tf.Const" -> (%mul_const_type : !pdl.type) + %val4 = pdl.result 0 of %op4 + %op5 = pdl.operation "tf.Mul" (%val3, %val4 : !pdl.value, !pdl.value) -> (%in_type : !pdl.type) + + pdl.rewrite { // roots: %op2, %op5 + %op6 = pdl.operation "kernel.SoftmaxCrossEntropy" (%rxact, %rxlabel : !pdl.value, !pdl.value) -> (%mean_loss_type, %in_type : !pdl.type, !pdl.type) + %val6_0 = pdl.result 0 of %op6 // txloss + %val6_1 = pdl.result 1 of %op6 // txdelta + pdl.replace %op5 with (%val6_1 : !pdl.value) // tf.Mul + pdl.erase %op4 // tf.Const + pdl.erase %op3 // tf.PreventGradient + pdl.replace %op2 with (%val6_0 : !pdl.value) // tf.Mean + pdl.erase %op1 // tf.Const + pdl.erase %op0 // tf.SparseSoftmaxCrossEntropyWithLogits + } + } +} + +// CHECK-LABEL: test.mlp_fused +// CHECK: %[[FC2:.*]]:3 = "kernel.Fc"(%[[FC1:.*]]#0, %[[SM:.*]]#1, %arg4) : (tensor<2x256xf32>, tensor<2x10xf32>, tensor<256x10xf32>) -> (tensor<2x10xf32>, tensor<2x256xf32>, tensor<256x10xf32>) +// CHECK: %[[SM]]:2 = "kernel.SoftmaxCrossEntropy"(%[[FC2]]#0, %arg1) : (tensor<2x10xf32>, tensor<2xi32>) -> (tensor, tensor<2x10xf32>) +// CHECK: %[[FC1]]:3 = "kernel.FcWithBias"(%arg0, %[[FC2]]#1, %arg3, %arg2) : (tensor<2x20xf32>, tensor<2x256xf32>, tensor<20x256xf32>, tensor<256xf32>) -> (tensor<2x256xf32>, tensor<20x256xf32>, tensor<256xf32>) +module @ir attributes { test.mlp_fused } { + func @main(%arg0: tensor<2x20xf32>, %arg1: tensor<2xi32>, %arg2: tensor<256xf32>, %arg3: tensor<20x256xf32>, %arg4: tensor<256x10xf32>) -> () { // tensor, tensor<256xf32>, tensor<20x256xf32>, tensor<256x10xf32>) { + // The replacement operations fuse forward and backward pass; therefore, the + // resulting graph is not a DAG. To address this, we wrap the operations in + // a graph region. + "test.graph_region"() ({ + %0 = "tf.Const"() {value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32> + %1 = "tf.Const"() {value = dense<1.000000e-01> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<5.000000e-01> : tensor<2x1xf32>} : () -> tensor<2x1xf32> + %3 = "tf.MatMul"(%arg0, %arg3) {transpose_a = false, transpose_b = false} : (tensor<2x20xf32>, tensor<20x256xf32>) -> tensor<2x256xf32> + %4 = "tf.BiasAdd"(%3, %arg2) {data_format = "NHWC"} : (tensor<2x256xf32>, tensor<256xf32>) -> tensor<2x256xf32> + %5 = "tf.Relu"(%4) : (tensor<2x256xf32>) -> tensor<2x256xf32> + %6 = "tf.MatMul"(%5, %arg4) {transpose_a = false, transpose_b = false} : (tensor<2x256xf32>, tensor<256x10xf32>) -> tensor<2x10xf32> + %loss, %backprop = "tf.SparseSoftmaxCrossEntropyWithLogits"(%6, %arg1) : (tensor<2x10xf32>, tensor<2xi32>) -> (tensor<2xf32>, tensor<2x10xf32>) + %7 = "tf.Mean"(%loss, %0) {keep_dims = false} : (tensor<2xf32>, tensor<1xi32>) -> tensor + %8 = "tf.PreventGradient"(%backprop) : (tensor<2x10xf32>) -> tensor<2x10xf32> + %9 = "tf.Mul"(%8, %2) : (tensor<2x10xf32>, tensor<2x1xf32>) -> tensor<2x10xf32> + %10 = "tf.MatMul"(%9, %arg4) {transpose_a = false, transpose_b = true} : (tensor<2x10xf32>, tensor<256x10xf32>) -> tensor<2x256xf32> + %11 = "tf.MatMul"(%5, %9) {transpose_a = true, transpose_b = false} : (tensor<2x256xf32>, tensor<2x10xf32>) -> tensor<256x10xf32> + %12 = "tf.ReluGrad"(%10, %5) : (tensor<2x256xf32>, tensor<2x256xf32>) -> tensor<2x256xf32> + %13 = "tf.BiasAddGrad"(%12) {data_format = "NHWC"} : (tensor<2x256xf32>) -> tensor<256xf32> + %14 = "tf.MatMul"(%arg0, %12) {transpose_a = true, transpose_b = false} : (tensor<2x20xf32>, tensor<2x256xf32>) -> tensor<20x256xf32> + %15 = "tf.Mul"(%14, %1) : (tensor<20x256xf32>, tensor) -> tensor<20x256xf32> + %16 = "tf.Sub"(%arg3, %15) : (tensor<20x256xf32>, tensor<20x256xf32>) -> tensor<20x256xf32> + %17 = "tf.Mul"(%13, %1) : (tensor<256xf32>, tensor) -> tensor<256xf32> + %18 = "tf.Sub"(%arg2, %17) : (tensor<256xf32>, tensor<256xf32>) -> tensor<256xf32> + %19 = "tf.Mul"(%11, %1) : (tensor<256x10xf32>, tensor) -> tensor<256x10xf32> + %20 = "tf.Sub"(%arg4, %19) : (tensor<256x10xf32>, tensor<256x10xf32>) -> tensor<256x10xf32> + }) : () -> () + return + } +} diff --git a/mlir/test/lib/Rewrite/TestPDLByteCode.cpp b/mlir/test/lib/Rewrite/TestPDLByteCode.cpp index ef62d73978d8..748e54822718 100644 --- a/mlir/test/lib/Rewrite/TestPDLByteCode.cpp +++ b/mlir/test/lib/Rewrite/TestPDLByteCode.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Dialect/PDLInterp/IR/PDLInterp.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -75,6 +76,11 @@ struct TestPDLByteCodePass StringRef getDescription() const final { return "Test PDL ByteCode functionality"; } + void getDependentDialects(DialectRegistry ®istry) const override { + // Mark the pdl_interp dialect as a dependent. This is needed, because we + // create ops from that dialect as a part of the PDL-to-PDLInterp lowering. + registry.insert(); + } void runOnOperation() final { ModuleOp module = getOperation(); From de6c82d6fdb9a80b50a415bcc0fa9518fa964d40 Mon Sep 17 00:00:00 2001 From: Stanislav Funiak Date: Tue, 4 Jan 2022 08:11:35 +0530 Subject: [PATCH 470/992] [MLIR][PDL] Generalize result type verification Presently the result type verification checks if the type is used by a `pdl::OperationOp` inside the matcher. This is unnecessarily restrictive; the type could come from a `pdl::OperandOp or `pdl::OperandsOp` and still be inferrable. Reviewed By: rriddle, Mogball Differential Revision: https://reviews.llvm.org/D116083 --- mlir/lib/Dialect/PDL/IR/PDL.cpp | 13 +++++++------ mlir/test/Dialect/PDL/ops.mlir | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/mlir/lib/Dialect/PDL/IR/PDL.cpp b/mlir/lib/Dialect/PDL/IR/PDL.cpp index 2a399ec2169e..95a3fb742fa1 100644 --- a/mlir/lib/Dialect/PDL/IR/PDL.cpp +++ b/mlir/lib/Dialect/PDL/IR/PDL.cpp @@ -207,16 +207,17 @@ static LogicalResult verifyResultTypesAreInferrable(OperationOp op, if (isa(resultTypeOp)) continue; - // If the type operation was defined in the matcher and constrains the - // result of an input operation, it can be used. - auto constrainsInputOp = [rewriterBlock](Operation *user) { - return user->getBlock() != rewriterBlock && isa(user); + // If the type operation was defined in the matcher and constrains an + // operand or the result of an input operation, it can be used. + auto constrainsInput = [rewriterBlock](Operation *user) { + return user->getBlock() != rewriterBlock && + isa(user); }; if (TypeOp typeOp = dyn_cast(resultTypeOp)) { - if (typeOp.type() || llvm::any_of(typeOp->getUsers(), constrainsInputOp)) + if (typeOp.type() || llvm::any_of(typeOp->getUsers(), constrainsInput)) continue; } else if (TypesOp typeOp = dyn_cast(resultTypeOp)) { - if (typeOp.types() || llvm::any_of(typeOp->getUsers(), constrainsInputOp)) + if (typeOp.types() || llvm::any_of(typeOp->getUsers(), constrainsInput)) continue; } diff --git a/mlir/test/Dialect/PDL/ops.mlir b/mlir/test/Dialect/PDL/ops.mlir index 758d5c6ac031..9c7daf46a090 100644 --- a/mlir/test/Dialect/PDL/ops.mlir +++ b/mlir/test/Dialect/PDL/ops.mlir @@ -88,7 +88,7 @@ pdl.pattern @infer_type_from_operation_replace : benefit(1) { // ----- // Check that the result type of an operation within a rewrite can be inferred -// from types used within the match block. +// from the result types of an operation within the match block. pdl.pattern @infer_type_from_type_used_in_match : benefit(1) { %type1 = pdl.type : i32 %type2 = pdl.type @@ -101,7 +101,7 @@ pdl.pattern @infer_type_from_type_used_in_match : benefit(1) { // ----- // Check that the result type of an operation within a rewrite can be inferred -// from types used within the match block. +// from the result types of an operation within the match block. pdl.pattern @infer_type_from_type_used_in_match : benefit(1) { %types = pdl.types %root = pdl.operation -> (%types : !pdl.range) @@ -113,6 +113,34 @@ pdl.pattern @infer_type_from_type_used_in_match : benefit(1) { // ----- +// Check that the result type of an operation within a rewrite can be inferred +// from the type of an operand within the match block. +pdl.pattern @infer_type_from_type_used_in_match : benefit(1) { + %type1 = pdl.type + %type2 = pdl.type + %operand1 = pdl.operand : %type1 + %operand2 = pdl.operand : %type2 + %root = pdl.operation (%operand1, %operand2 : !pdl.value, !pdl.value) + pdl.rewrite %root { + %newOp = pdl.operation "foo.op" -> (%type1, %type2 : !pdl.type, !pdl.type) + } +} + +// ----- + +// Check that the result type of an operation within a rewrite can be inferred +// from the types of operands within the match block. +pdl.pattern @infer_type_from_type_used_in_match : benefit(1) { + %types = pdl.types + %operands = pdl.operands : %types + %root = pdl.operation (%operands : !pdl.range) + pdl.rewrite %root { + %newOp = pdl.operation "foo.op" -> (%types : !pdl.range) + } +} + +// ----- + pdl.pattern @apply_rewrite_with_no_results : benefit(1) { %root = pdl.operation pdl.rewrite %root { From 7de8488c3d7e9f4a5e2d05007e5ea17482a02410 Mon Sep 17 00:00:00 2001 From: Stanislav Funiak Date: Tue, 4 Jan 2022 08:12:51 +0530 Subject: [PATCH 471/992] [MLIR] Printing a null Value. This diff adds support to printing a Value when it is null. We encounter this situation when debugging the PDL bytcode execution (where a null Value is perfectly valid). Currently, the AsmPrinter crashes (with an assert in a cast) when it encounters such Value. We follow the same format used in other printed entities (e.g., null attribute). Reviewed By: mehdi_amini, bondhugula Differential Revision: https://reviews.llvm.org/D116084 --- mlir/lib/IR/AsmPrinter.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index 376e5c16fe6c..7be787f51f8f 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -935,7 +935,7 @@ SSANameState::SSANameState( void SSANameState::printValueID(Value value, bool printResultNo, raw_ostream &stream) const { if (!value) { - stream << "<>"; + stream << "<>"; return; } @@ -2826,6 +2826,11 @@ void IntegerSet::print(raw_ostream &os) const { } void Value::print(raw_ostream &os) { + if (!impl) { + os << "<>"; + return; + } + if (auto *op = getDefiningOp()) return op->print(os); // TODO: Improve BlockArgument print'ing. @@ -2834,6 +2839,11 @@ void Value::print(raw_ostream &os) { << "' at index: " << arg.getArgNumber(); } void Value::print(raw_ostream &os, AsmState &state) { + if (!impl) { + os << "<>"; + return; + } + if (auto *op = getDefiningOp()) return op->print(os, state); From 80b3f08eeee2f0e50ab3d9295e9c1d3f251592fb Mon Sep 17 00:00:00 2001 From: Uday Bondhugula Date: Tue, 4 Jan 2022 08:24:14 +0530 Subject: [PATCH 472/992] [MLIR[PDL] NFC. Fix unused variable warning in PDLToPDLInterp.cpp NFC. Fix unused variable warning in PDLToPDLInterp.cpp. Differential Revision: https://reviews.llvm.org/D116571 --- mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp b/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp index 9362a29ddb6f..b9c4dda9d649 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp +++ b/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp @@ -239,7 +239,7 @@ Value PatternLowering::getValueAt(Block *¤tBlock, Position *pos) { // Get the value for the parent position. Value parentVal; if (Position *parent = pos->getParent()) - parentVal = getValueAt(currentBlock, pos->getParent()); + parentVal = getValueAt(currentBlock, parent); // TODO: Use a location from the position. Location loc = parentVal ? parentVal.getLoc() : builder.getUnknownLoc(); From ea6a3f9f960e52ea39edd5edddf5afad3c11f7a0 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Tue, 4 Jan 2022 11:12:17 +0800 Subject: [PATCH 473/992] [NFC] [Coroutines] Fix incorrect use of coroutine intrinsics The inlined llvm.coro.id should contain the function it refers to. The modifed test would caused the compiler crash under O2. See issue52912 for example. --- llvm/test/Transforms/Coroutines/coro-spill-corobegin.ll | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/Coroutines/coro-spill-corobegin.ll b/llvm/test/Transforms/Coroutines/coro-spill-corobegin.ll index fd1d7c3b4ecd..dbd5a9fa79a1 100644 --- a/llvm/test/Transforms/Coroutines/coro-spill-corobegin.ll +++ b/llvm/test/Transforms/Coroutines/coro-spill-corobegin.ll @@ -7,6 +7,8 @@ declare void @g.dummy(%g.Frame*) +declare i8* @g() + define i8* @f() "coroutine.presplit"="1" { entry: %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) @@ -14,7 +16,7 @@ entry: %alloc = call i8* @malloc(i32 %size) %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) - %innerid = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* bitcast ([3 x void (%g.Frame*)*]* @g.resumers to i8*)) + %innerid = call token @llvm.coro.id(i32 0, i8* null, i8* bitcast (i8* ()* @g to i8*), i8* bitcast ([3 x void (%g.Frame*)*]* @g.resumers to i8*)) %innerhdl = call noalias nonnull i8* @llvm.coro.begin(token %innerid, i8* null) %gframe = bitcast i8* %innerhdl to %g.Frame* @@ -41,7 +43,7 @@ suspend: ; See if the g's coro.begin was spilled into the frame ; CHECK-LABEL: @f( -; CHECK: %innerid = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* bitcast ([3 x void (%g.Frame*)*]* @g.resumers to i8*)) +; CHECK: %innerid = call token @llvm.coro.id(i32 0, i8* null, i8* bitcast (i8* ()* @g to i8*), i8* bitcast ([3 x void (%g.Frame*)*]* @g.resumers to i8*)) ; CHECK: %innerhdl = call noalias nonnull i8* @llvm.coro.begin(token %innerid, i8* null) ; CHECK: %[[spilladdr:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 ; CHECK: store i8* %innerhdl, i8** %[[spilladdr]] From 8b014ad725eb7a132819f138abd34e886bdb98df Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Tue, 4 Jan 2022 11:11:00 +0800 Subject: [PATCH 474/992] [M68k][NFC] Fix typo in comment. PCD->PCI. --- llvm/lib/Target/M68k/M68kInstrInfo.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.td b/llvm/lib/Target/M68k/M68kInstrInfo.td index e2a7fb48ddc0..87ae7be9578a 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.td +++ b/llvm/lib/Target/M68k/M68kInstrInfo.td @@ -587,8 +587,8 @@ class MxType Date: Tue, 4 Jan 2022 03:20:29 +0000 Subject: [PATCH 475/992] [AVR] Optimize int8 arithmetic right shift 6 bits Reviewed By: aykevl Differential Revision: https://reviews.llvm.org/D115593 --- llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | 46 ++++++++++++++++++++ llvm/lib/Target/AVR/AVRISelLowering.cpp | 5 +++ llvm/test/CodeGen/AVR/shift.ll | 10 +++++ 3 files changed, 61 insertions(+) diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp index cb85d73772c5..16c94981947e 100644 --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -92,6 +92,7 @@ class AVRExpandPseudo : public MachineFunctionPass { /// Specific shift implementation. bool expandLSLB7Rd(Block &MBB, BlockIt MBBI); bool expandLSRB7Rd(Block &MBB, BlockIt MBBI); + bool expandASRB6Rd(Block &MBB, BlockIt MBBI); bool expandASRB7Rd(Block &MBB, BlockIt MBBI); bool expandLSLW4Rd(Block &MBB, BlockIt MBBI); bool expandLSRW4Rd(Block &MBB, BlockIt MBBI); @@ -1921,6 +1922,49 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { } } +bool AVRExpandPseudo::expandASRB6Rd(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + bool ImpIsDead = MI.getOperand(3).isDead(); + + // bst r24, 6 + // lsl r24 + // sbc r24, r24 + // bld r24, 0 + + buildMI(MBB, MBBI, AVR::BST) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addImm(6) + ->getOperand(2) + .setIsUndef(true); + + buildMI(MBB, MBBI, AVR::ADDRdRr) // LSL Rd <==> ADD Rd, Rd + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + auto MISBC = + buildMI(MBB, MBBI, AVR::SBCRdRr) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + buildMI(MBB, MBBI, AVR::BLD) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addImm(0) + ->getOperand(3) + .setIsKill(); + + if (ImpIsDead) + MISBC->getOperand(3).setIsDead(); + + MI.eraseFromParent(); + return true; +} + bool AVRExpandPseudo::expandASRB7Rd(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; Register DstReg = MI.getOperand(0).getReg(); @@ -1957,6 +2001,8 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; unsigned Imm = MI.getOperand(2).getImm(); switch (Imm) { + case 6: + return expandASRB6Rd(MBB, MBBI); case 7: return expandASRB7Rd(MBB, MBBI); default: diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index 39fba74a1ec7..f3e74e843695 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -359,6 +359,11 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { Victim = DAG.getNode(AVRISD::LSRBN, dl, VT, Victim, DAG.getConstant(7, dl, VT)); ShiftAmount = 0; + } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 6) { + // Optimize ASR when ShiftAmount == 6. + Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim, + DAG.getConstant(6, dl, VT)); + ShiftAmount = 0; } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 7) { // Optimize ASR when ShiftAmount == 7. Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim, diff --git a/llvm/test/CodeGen/AVR/shift.ll b/llvm/test/CodeGen/AVR/shift.ll index 24bc369cf614..d36655a2cb75 100644 --- a/llvm/test/CodeGen/AVR/shift.ll +++ b/llvm/test/CodeGen/AVR/shift.ll @@ -171,6 +171,16 @@ define i8 @lsr_i8_7(i8 %a) { ret i8 %result } +define i8 @asr_i8_6(i8 %a) { +; CHECK-LABEL: asr_i8_6 +; CHECK: bst r24, 6 +; CHECK-NEXT: lsl r24 +; CHECK-NEXT: sbc r24, r24 +; CHECK-NEXT: bld r24, 0 + %result = ashr i8 %a, 6 + ret i8 %result +} + define i8 @asr_i8_7(i8 %a) { ; CHECK-LABEL: asr_i8_7 ; CHECK: lsl r24 From 0bab7428057048d94774a91c329ae902fcffc170 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Tue, 4 Jan 2022 09:34:45 +0800 Subject: [PATCH 476/992] [X86] Add missing CET intrinsics support These two intrinsics are documented o SDM and intrinsic guide. Reviewed By: pengfei Differential Revision: https://reviews.llvm.org/D116325 --- clang/lib/Headers/cetintrin.h | 10 ++++++++++ clang/test/CodeGen/X86/cetintrin.c | 12 ++++++++++++ 2 files changed, 22 insertions(+) diff --git a/clang/lib/Headers/cetintrin.h b/clang/lib/Headers/cetintrin.h index 4290e9d7355b..019cab0261e7 100644 --- a/clang/lib/Headers/cetintrin.h +++ b/clang/lib/Headers/cetintrin.h @@ -42,10 +42,20 @@ static __inline__ unsigned int __DEFAULT_FN_ATTRS _rdsspd(unsigned int __a) { return __builtin_ia32_rdsspd(__a); } +static __inline__ unsigned int __DEFAULT_FN_ATTRS _rdsspd_i32() { + unsigned int t; + return __builtin_ia32_rdsspd(t); +} + #ifdef __x86_64__ static __inline__ unsigned long long __DEFAULT_FN_ATTRS _rdsspq(unsigned long long __a) { return __builtin_ia32_rdsspq(__a); } + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS _rdsspq_i64() { + unsigned long long t; + return __builtin_ia32_rdsspq(t); +} #endif /* __x86_64__ */ #ifdef __x86_64__ diff --git a/clang/test/CodeGen/X86/cetintrin.c b/clang/test/CodeGen/X86/cetintrin.c index f70d1c80e1cf..dc55745ec450 100644 --- a/clang/test/CodeGen/X86/cetintrin.c +++ b/clang/test/CodeGen/X86/cetintrin.c @@ -37,6 +37,12 @@ unsigned int test_rdsspd(unsigned int a) { return _rdsspd(a); } +unsigned int test_rdsspd_i32() { + // CHECK-LABEL: @test_rdsspd_i32 + // CHECK: call i32 @llvm.x86.rdsspd(i32 %{{[a-z0-9.]+}}) + return _rdsspd_i32(); +} + #ifdef __x86_64__ unsigned long long test_rdsspq(unsigned long long a) { // X86_64-LABEL: @test_rdsspq @@ -44,6 +50,12 @@ unsigned long long test_rdsspq(unsigned long long a) { return _rdsspq(a); } +unsigned long long test_rdsspq_i64() { + // X86_64-LABEL: @test_rdsspq_i64 + // X86_64: call i64 @llvm.x86.rdsspq(i64 %{{[a-z0-9.]+}}) + return _rdsspq_i64(); +} + unsigned long long test_get_ssp(void) { // X86_64-LABEL: @test_get_ssp // X86_64: call i64 @llvm.x86.rdsspq(i64 0) From 9fb4e79d06aa690c611c0ef601c134f57788590a Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Tue, 4 Jan 2022 04:14:15 +0000 Subject: [PATCH 477/992] Revert "[AVR] Optimize int8 arithmetic right shift 6 bits" This reverts commit 5723261370b45fa4d0d295845c6ef9e223f2ff4a. There are failures as reported in https://lab.llvm.org/buildbot#builders/16/builds/21638 https://lab.llvm.org/buildbot#builders/104/builds/5394 --- llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | 46 -------------------- llvm/lib/Target/AVR/AVRISelLowering.cpp | 5 --- llvm/test/CodeGen/AVR/shift.ll | 10 ----- 3 files changed, 61 deletions(-) diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp index 16c94981947e..cb85d73772c5 100644 --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -92,7 +92,6 @@ class AVRExpandPseudo : public MachineFunctionPass { /// Specific shift implementation. bool expandLSLB7Rd(Block &MBB, BlockIt MBBI); bool expandLSRB7Rd(Block &MBB, BlockIt MBBI); - bool expandASRB6Rd(Block &MBB, BlockIt MBBI); bool expandASRB7Rd(Block &MBB, BlockIt MBBI); bool expandLSLW4Rd(Block &MBB, BlockIt MBBI); bool expandLSRW4Rd(Block &MBB, BlockIt MBBI); @@ -1922,49 +1921,6 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { } } -bool AVRExpandPseudo::expandASRB6Rd(Block &MBB, BlockIt MBBI) { - MachineInstr &MI = *MBBI; - Register DstReg = MI.getOperand(0).getReg(); - bool DstIsDead = MI.getOperand(0).isDead(); - bool DstIsKill = MI.getOperand(1).isKill(); - bool ImpIsDead = MI.getOperand(3).isDead(); - - // bst r24, 6 - // lsl r24 - // sbc r24, r24 - // bld r24, 0 - - buildMI(MBB, MBBI, AVR::BST) - .addReg(DstReg, getKillRegState(DstIsKill)) - .addImm(6) - ->getOperand(2) - .setIsUndef(true); - - buildMI(MBB, MBBI, AVR::ADDRdRr) // LSL Rd <==> ADD Rd, Rd - .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg, getKillRegState(DstIsKill)) - .addReg(DstReg, getKillRegState(DstIsKill)); - - auto MISBC = - buildMI(MBB, MBBI, AVR::SBCRdRr) - .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg, getKillRegState(DstIsKill)) - .addReg(DstReg, getKillRegState(DstIsKill)); - - buildMI(MBB, MBBI, AVR::BLD) - .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg, getKillRegState(DstIsKill)) - .addImm(0) - ->getOperand(3) - .setIsKill(); - - if (ImpIsDead) - MISBC->getOperand(3).setIsDead(); - - MI.eraseFromParent(); - return true; -} - bool AVRExpandPseudo::expandASRB7Rd(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; Register DstReg = MI.getOperand(0).getReg(); @@ -2001,8 +1957,6 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; unsigned Imm = MI.getOperand(2).getImm(); switch (Imm) { - case 6: - return expandASRB6Rd(MBB, MBBI); case 7: return expandASRB7Rd(MBB, MBBI); default: diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index f3e74e843695..39fba74a1ec7 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -359,11 +359,6 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { Victim = DAG.getNode(AVRISD::LSRBN, dl, VT, Victim, DAG.getConstant(7, dl, VT)); ShiftAmount = 0; - } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 6) { - // Optimize ASR when ShiftAmount == 6. - Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim, - DAG.getConstant(6, dl, VT)); - ShiftAmount = 0; } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 7) { // Optimize ASR when ShiftAmount == 7. Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim, diff --git a/llvm/test/CodeGen/AVR/shift.ll b/llvm/test/CodeGen/AVR/shift.ll index d36655a2cb75..24bc369cf614 100644 --- a/llvm/test/CodeGen/AVR/shift.ll +++ b/llvm/test/CodeGen/AVR/shift.ll @@ -171,16 +171,6 @@ define i8 @lsr_i8_7(i8 %a) { ret i8 %result } -define i8 @asr_i8_6(i8 %a) { -; CHECK-LABEL: asr_i8_6 -; CHECK: bst r24, 6 -; CHECK-NEXT: lsl r24 -; CHECK-NEXT: sbc r24, r24 -; CHECK-NEXT: bld r24, 0 - %result = ashr i8 %a, 6 - ret i8 %result -} - define i8 @asr_i8_7(i8 %a) { ; CHECK-LABEL: asr_i8_7 ; CHECK: lsl r24 From 304d30bc594bf99bba9ee780007ac78755a9ff7a Mon Sep 17 00:00:00 2001 From: Mikael Holmen Date: Tue, 4 Jan 2022 07:28:16 +0100 Subject: [PATCH 478/992] [clang] Fix warning about unused variable [NFC] --- clang/lib/Driver/ToolChains/Clang.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 3a4e9153689e..d4afefcb24a9 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1929,7 +1929,7 @@ void Clang::AddMIPSTargetArgs(const ArgList &Args, } } - if (Arg *A = Args.getLastArg(options::OPT_mfix4300)) { + if (Args.getLastArg(options::OPT_mfix4300)) { CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-mfix4300"); } From c8e988fa78c6533d59dd2c065ca0393eb244e675 Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Tue, 4 Jan 2022 07:59:16 +0100 Subject: [PATCH 479/992] [mlir] Fix bazel build after b4130e9eadfe46b4d3380c40ce8c3e900a0fd21b. https://github.com/llvm/llvm-project/commit/b4130e9eadfe46b4d3380c40ce8c3e900a0fd21b --- utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index c69a315e05bc..3d3d023610f3 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -260,6 +260,7 @@ cc_library( ], deps = [ "//mlir:IR", + "//mlir:PDLInterpDialect", "//mlir:Pass", "//mlir:Support", "//mlir:TransformUtils", From c64ffa22d143fc58858bdb1105a22a5fc73ad26e Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Tue, 4 Jan 2022 14:40:31 +0800 Subject: [PATCH 480/992] [M68k][NFC] Fix unused argument warnings in M68kInstrArithmetic.td --- llvm/lib/Target/M68k/M68kInstrArithmetic.td | 49 ++++++++++----------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/M68k/M68kInstrArithmetic.td b/llvm/lib/Target/M68k/M68kInstrArithmetic.td index b2c05365d30b..ef50de576641 100644 --- a/llvm/lib/Target/M68k/M68kInstrArithmetic.td +++ b/llvm/lib/Target/M68k/M68kInstrArithmetic.td @@ -150,8 +150,7 @@ let mayLoad = 1, mayStore = 1 in { // FIXME MxBiArOp_FMR/FMI cannot consume CCR from MxAdd/MxSub which leads for // MxAdd to survive the match and subsequent mismatch. -class MxBiArOp_FMR CMD, MxEncEA EA, MxEncExt EXT> : MxInst<(outs), (ins MEMOpd:$dst, TYPE.ROp:$opd), MN#"."#TYPE.Prefix#"\t$opd, $dst", @@ -160,8 +159,7 @@ class MxBiArOp_FMR("MxOpMode"#TYPE.Size#"EA"#TYPE.RLet), MxBeadDReg<1>, EA, EXT>>; -class MxBiArOp_FMI CMD, MxEncEA MEMEA, MxEncExt MEMExt> : MxInst<(outs), (ins MEMOpd:$dst, TYPE.IOp:$opd), MN#"."#TYPE.Prefix#"\t$opd, $dst", @@ -218,47 +216,47 @@ multiclass MxBiArOp_DF; // op $reg, $mem - def NAME#"8pd" : MxBiArOp_FMR; - def NAME#"16pd" : MxBiArOp_FMR; - def NAME#"32pd" : MxBiArOp_FMR; - def NAME#"8fd" : MxBiArOp_FMR; - def NAME#"16fd" : MxBiArOp_FMR; - def NAME#"32fd" : MxBiArOp_FMR; - def NAME#"8jd" : MxBiArOp_FMR; - def NAME#"16jd" : MxBiArOp_FMR; - def NAME#"32jd" : MxBiArOp_FMR; // op $imm, $mem - def NAME#"8pi" : MxBiArOp_FMI; - def NAME#"16pi" : MxBiArOp_FMI; - def NAME#"32pi" : MxBiArOp_FMI; - def NAME#"8fi" : MxBiArOp_FMI; - def NAME#"16fi" : MxBiArOp_FMI; - def NAME#"32fi" : MxBiArOp_FMI; - def NAME#"8ji" : MxBiArOp_FMI; - def NAME#"16ji" : MxBiArOp_FMI; - def NAME#"32ji" : MxBiArOp_FMI; def NAME#"16dr" : MxBiArOp_RFRR_xEA CMD, bits<4> CMDI> { +multiclass MxBiArOp_AF CMD> { def NAME#"32ak" : MxBiArOp_RFRM; @@ -307,9 +304,9 @@ multiclass MxBiArOp_AF; -defm ADD : MxBiArOp_AF<"adda", MxAdd, 1, 0xD, 0x6>; +defm ADD : MxBiArOp_AF<"adda", MxAdd, 0xD>; defm SUB : MxBiArOp_DF<"sub", MxSub, 0, 0x9, 0x4>; -defm SUB : MxBiArOp_AF<"suba", MxSub, 0, 0x9, 0x4>; +defm SUB : MxBiArOp_AF<"suba", MxSub, 0x9>; let Uses = [CCR], Defs = [CCR] in { From f68ecdd45812021b32b738df3bee602ca5042bb4 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Mon, 3 Jan 2022 11:38:59 -0500 Subject: [PATCH 481/992] [mlir] Add CMake flags to properly enable Jit event listeners. By default, the listeners do nothing unless linked in. This revision allows the "Perf" and "Intel" Jit event listeners to be used. The "OProfile" event listener is not enabled at this time, the associated library structure is not well-isolated. Differential Revision: https://reviews.llvm.org/D116552 --- mlir/lib/ExecutionEngine/CMakeLists.txt | 11 +++++++++++ mlir/lib/ExecutionEngine/ExecutionEngine.cpp | 12 +++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt index c52837c2e9ad..7d758fdbdd61 100644 --- a/mlir/lib/ExecutionEngine/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/CMakeLists.txt @@ -13,6 +13,16 @@ set(LLVM_OPTIONAL_SOURCES JitRunner.cpp ) +if(LLVM_USE_INTEL_JITEVENTS) + set(LLVM_JIT_LISTENER_LIB + IntelJITEvents) +endif(LLVM_USE_INTEL_JITEVENTS) + +if(LLVM_USE_PERF) + set(LLVM_JIT_LISTENER_LIB + PerfJITEvents) +endif(LLVM_USE_PERF) + add_mlir_library(MLIRExecutionEngine ExecutionEngine.cpp OptUtils.cpp @@ -42,6 +52,7 @@ add_mlir_library(MLIRExecutionEngine TransformUtils nativecodegen IPO + ${LLVM_JIT_LISTENER_LIB} LINK_LIBS PUBLIC MLIRLLVMIR diff --git a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp index ead15152162e..00569e1d4242 100644 --- a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp @@ -217,9 +217,15 @@ ExecutionEngine::ExecutionEngine(bool enableObjectCache, gdbListener(enableGDBNotificationListener ? llvm::JITEventListener::createGDBRegistrationListener() : nullptr), - perfListener(enablePerfNotificationListener - ? llvm::JITEventListener::createPerfJITEventListener() - : nullptr) {} + perfListener(nullptr) { + if (enablePerfNotificationListener) { + if (auto *listener = llvm::JITEventListener::createPerfJITEventListener()) + perfListener = listener; + else if (auto *listener = + llvm::JITEventListener::createIntelJITEventListener()) + perfListener = listener; + } +} Expected> ExecutionEngine::create( ModuleOp m, From 2a0e05100c26473b3ce94507200b55f71a9c9482 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20B=C3=B6ck?= Date: Mon, 3 Jan 2022 23:52:13 +0100 Subject: [PATCH 482/992] [mlir][LLVM] Set cleanup flag on `llvm.landingpad` when exporting to LLVM IR Exporting a llvm.landingpad operation with the cleanup flag set is currently ignored by the export code. Differential Revision: https://reviews.llvm.org/D116565 --- .../LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp | 1 + mlir/test/Target/LLVMIR/llvmir.mlir | 10 ++++------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp index 4f5e636c0a8e..3254aed6c341 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp @@ -371,6 +371,7 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder, llvm::Type *ty = moduleTranslation.convertType(lpOp.getType()); llvm::LandingPadInst *lpi = builder.CreateLandingPad(ty, lpOp.getNumOperands()); + lpi->setCleanup(lpOp.getCleanup()); // Add clauses for (llvm::Value *operand : diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index e87eebaca515..6741d51b5f39 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -1298,31 +1298,29 @@ llvm.func @invokeLandingpad() -> i32 attributes { personality = @__gxx_personali // ----- -llvm.mlir.global external constant @_ZTIi() : !llvm.ptr llvm.func @foo() -> i8 llvm.func @__gxx_personality_v0(...) -> i32 // CHECK-LABEL: @invoke_result // CHECK-SAME: %[[a0:[0-9]+]] llvm.func @invoke_result(%arg0 : !llvm.ptr) attributes { personality = @__gxx_personality_v0 } { - %0 = llvm.mlir.addressof @_ZTIi : !llvm.ptr> // CHECK: %[[a1:[0-9]+]] = invoke i8 @foo() // CHECK-NEXT: to label %[[normal:[0-9]+]] unwind label %[[unwind:[0-9]+]] - %1 = llvm.invoke @foo() to ^bb1 unwind ^bb2 : () -> i8 + %0 = llvm.invoke @foo() to ^bb1 unwind ^bb2 : () -> i8 // CHECK: [[normal]]: // CHECK-NEXT: store i8 %[[a1]], i8* %[[a0]] // CHECK-NEXT: ret void ^bb1: - llvm.store %1, %arg0 : !llvm.ptr + llvm.store %0, %arg0 : !llvm.ptr llvm.return // CHECK: [[unwind]]: // CHECK-NEXT: landingpad { i8*, i32 } -// CHECK-NEXT: catch i8** @_ZTIi +// CHECK-NEXT: cleanup // CHECK-NEXT: ret void ^bb2: - %7 = llvm.landingpad (catch %0 : !llvm.ptr>) : !llvm.struct<(ptr, i32)> + %7 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> llvm.return } From e2b6e21f19da6fe0da9349264e43286f0441b4ca Mon Sep 17 00:00:00 2001 From: Marek Kurdej Date: Mon, 3 Jan 2022 17:32:20 +0100 Subject: [PATCH 483/992] [clang-format] Fix incorrect formatting of lambdas inside brace initialisation Fixes https://github.com/llvm/llvm-project/issues/27146. Fixes https://github.com/llvm/llvm-project/issues/52943. Before: ``` namespace ns { void foo() { std::variant v; std::visit(overloaded{[](auto &&) -> int (*)[] { return nullptr; }}, v); } } // namespace ns int break_me() { int x = 42; return int{[x = x]() { return x; }()}; } ``` got formatted as: ``` namespace ns { void foo() { std::variant v; std::visit(overloaded{[](auto &&) -> int (*)[] { return nullptr; } } // namespace ns , v); } } // namespace ns int break_me() { int x = 42; return int{[x = x](){return x; } () } ; } ``` Reviewed By: HazardyKnusperkeks, owenpan Differential Revision: https://reviews.llvm.org/D116553 --- clang/lib/Format/UnwrappedLineParser.cpp | 3 +++ clang/unittests/Format/FormatTest.cpp | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 0579acf36391..17187b7996aa 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -1786,6 +1786,9 @@ bool UnwrappedLineParser::tryToParseLambda() { case tok::l_paren: parseParens(); break; + case tok::l_square: + parseSquare(); + break; case tok::amp: case tok::star: case tok::kw_const: diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 49635f3f15ea..71f07412a3b6 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -20232,6 +20232,11 @@ TEST_F(FormatTest, FormatsLambdas) { "};"); verifyFormat("[]() -> Void {};"); verifyFormat("[a, b]() -> Tuple { return {}; };"); + verifyFormat("SomeFunction({[]() -> int[] { return {}; }});"); + verifyFormat("SomeFunction({[]() -> int *[] { return {}; }});"); + verifyFormat("SomeFunction({[]() -> int (*)[] { return {}; }});"); + verifyFormat("SomeFunction({[]() -> ns::type { return {}; }});"); + verifyFormat("return int{[x = x]() { return x; }()};"); // Lambdas with explicit template argument lists. verifyFormat( From bbeaf2aac678633749e7385466da10a1c0120b3b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 9 Dec 2021 16:57:33 +0100 Subject: [PATCH 484/992] [GlobalOpt][Evaluator] Rewrite global ctor evaluation (fixes PR51879) Global ctor evaluation currently models memory as a map from Constant* to Constant*. For this to be correct, it is required that there is only a single Constant* referencing a given memory location. The Evaluator tries to ensure this by imposing certain limitations that could result in ambiguities (by limiting types, casts and GEP formats), but ultimately still fails, as can be seen in PR51879. The approach is fundamentally fragile and will get more so with opaque pointers. My original thought was to instead store memory for each global as an offset => value representation. However, we also need to make sure that we can actually rematerialize the modified global initializer into a Constant in the end, which may not be possible if we allow arbitrary writes. What this patch does instead is to represent globals as a MutableValue, which is either a Constant* or a MutableAggregate*. The mutable aggregate exists to allow efficient mutation of individual aggregate elements, as mutating an element on a Constant would require interning a new constant. When a write to the Constant* is made, it is converted into a MutableAggregate* as needed. I believe this should make the evaluator more robust, compatible with opaque pointers, and a bit simpler as well. Fixes https://github.com/llvm/llvm-project/issues/51221. Differential Revision: https://reviews.llvm.org/D115530 --- .../include/llvm/Transforms/Utils/Evaluator.h | 52 +++- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 196 +------------ llvm/lib/Transforms/Utils/Evaluator.cpp | 259 +++++++----------- llvm/test/Transforms/GlobalOpt/pr51879.ll | 5 +- 4 files changed, 158 insertions(+), 354 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/Evaluator.h b/llvm/include/llvm/Transforms/Utils/Evaluator.h index 1b93b0af86e2..9968cfb47cef 100644 --- a/llvm/include/llvm/Transforms/Utils/Evaluator.h +++ b/llvm/include/llvm/Transforms/Utils/Evaluator.h @@ -36,6 +36,49 @@ class TargetLibraryInfo; /// be iterated over after the evaluation is complete. Once an evaluation call /// fails, the evaluation object should not be reused. class Evaluator { + class MutableAggregate; + + /// The evaluator represents values either as a Constant*, or as a + /// MutableAggregate, which allows changing individual aggregate elements + /// without creating a new interned Constant. + class MutableValue { + PointerUnion Val; + void clear(); + bool makeMutable(); + + public: + MutableValue(Constant *C) { Val = C; } + MutableValue(const MutableValue &) = delete; + MutableValue(MutableValue &&Other) { + Val = Other.Val; + Other.Val = nullptr; + } + ~MutableValue() { clear(); } + + Type *getType() const { + if (auto *C = Val.dyn_cast()) + return C->getType(); + return Val.get()->Ty; + } + + Constant *toConstant() const { + if (auto *C = Val.dyn_cast()) + return C; + return Val.get()->toConstant(); + } + + Constant *read(Type *Ty, APInt Offset, const DataLayout &DL) const; + bool write(Constant *V, APInt Offset, const DataLayout &DL); + }; + + struct MutableAggregate { + Type *Ty; + SmallVector Elements; + + MutableAggregate(Type *Ty) : Ty(Ty) {} + Constant *toConstant() const; + }; + public: Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI) : DL(DL), TLI(TLI) { @@ -57,8 +100,11 @@ class Evaluator { bool EvaluateFunction(Function *F, Constant *&RetVal, const SmallVectorImpl &ActualArgs); - const DenseMap &getMutatedMemory() const { - return MutatedMemory; + DenseMap getMutatedInitializers() const { + DenseMap Result; + for (auto &Pair : MutatedMemory) + Result[Pair.first] = Pair.second.toConstant(); + return Result; } const SmallPtrSetImpl &getInvariants() const { @@ -106,7 +152,7 @@ class Evaluator { /// For each store we execute, we update this map. Loads check this to get /// the most up-to-date value. If evaluation is successful, this state is /// committed to the process. - DenseMap MutatedMemory; + DenseMap MutatedMemory; /// To 'execute' an alloca, we create a temporary global variable to represent /// its body. This vector is needed so we can delete the temporary globals diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index b1f3ff15c97b..04f2b918425c 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2066,194 +2066,6 @@ OptimizeGlobalVars(Module &M, return Changed; } -/// Evaluate a piece of a constantexpr store into a global initializer. This -/// returns 'Init' modified to reflect 'Val' stored into it. At this point, the -/// GEP operands of Addr [0, OpNo) have been stepped into. -static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, - ConstantExpr *Addr, unsigned OpNo) { - // Base case of the recursion. - if (OpNo == Addr->getNumOperands()) { - assert(Val->getType() == Init->getType() && "Type mismatch!"); - return Val; - } - - SmallVector Elts; - if (StructType *STy = dyn_cast(Init->getType())) { - // Break up the constant into its elements. - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) - Elts.push_back(Init->getAggregateElement(i)); - - // Replace the element that we are supposed to. - ConstantInt *CU = cast(Addr->getOperand(OpNo)); - unsigned Idx = CU->getZExtValue(); - assert(Idx < STy->getNumElements() && "Struct index out of range!"); - Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); - - // Return the modified struct. - return ConstantStruct::get(STy, Elts); - } - - ConstantInt *CI = cast(Addr->getOperand(OpNo)); - uint64_t NumElts; - if (ArrayType *ATy = dyn_cast(Init->getType())) - NumElts = ATy->getNumElements(); - else - NumElts = cast(Init->getType())->getNumElements(); - - // Break up the array into elements. - for (uint64_t i = 0, e = NumElts; i != e; ++i) - Elts.push_back(Init->getAggregateElement(i)); - - assert(CI->getZExtValue() < NumElts); - Elts[CI->getZExtValue()] = - EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); - - if (Init->getType()->isArrayTy()) - return ConstantArray::get(cast(Init->getType()), Elts); - return ConstantVector::get(Elts); -} - -/// We have decided that Addr (which satisfies the predicate -/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. -static void CommitValueTo(Constant *Val, Constant *Addr) { - if (GlobalVariable *GV = dyn_cast(Addr)) { - assert(GV->hasInitializer()); - GV->setInitializer(Val); - return; - } - - ConstantExpr *CE = cast(Addr); - GlobalVariable *GV = cast(CE->getOperand(0)); - GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2)); -} - -/// Given a map of address -> value, where addresses are expected to be some form -/// of either a global or a constant GEP, set the initializer for the address to -/// be the value. This performs mostly the same function as CommitValueTo() -/// and EvaluateStoreInto() but is optimized to be more efficient for the common -/// case where the set of addresses are GEPs sharing the same underlying global, -/// processing the GEPs in batches rather than individually. -/// -/// To give an example, consider the following C++ code adapted from the clang -/// regression tests: -/// struct S { -/// int n = 10; -/// int m = 2 * n; -/// S(int a) : n(a) {} -/// }; -/// -/// template -/// struct U { -/// T *r = &q; -/// T q = 42; -/// U *p = this; -/// }; -/// -/// U e; -/// -/// The global static constructor for 'e' will need to initialize 'r' and 'p' of -/// the outer struct, while also initializing the inner 'q' structs 'n' and 'm' -/// members. This batch algorithm will simply use general CommitValueTo() method -/// to handle the complex nested S struct initialization of 'q', before -/// processing the outermost members in a single batch. Using CommitValueTo() to -/// handle member in the outer struct is inefficient when the struct/array is -/// very large as we end up creating and destroy constant arrays for each -/// initialization. -/// For the above case, we expect the following IR to be generated: -/// -/// %struct.U = type { %struct.S*, %struct.S, %struct.U* } -/// %struct.S = type { i32, i32 } -/// @e = global %struct.U { %struct.S* gep inbounds (%struct.U, %struct.U* @e, -/// i64 0, i32 1), -/// %struct.S { i32 42, i32 84 }, %struct.U* @e } -/// The %struct.S { i32 42, i32 84 } inner initializer is treated as a complex -/// constant expression, while the other two elements of @e are "simple". -static void BatchCommitValueTo(const DenseMap &Mem) { - SmallVector, 32> GVs; - SmallVector, 32> ComplexCEs; - SmallVector, 32> SimpleCEs; - SimpleCEs.reserve(Mem.size()); - - for (const auto &I : Mem) { - if (auto *GV = dyn_cast(I.first)) { - GVs.push_back(std::make_pair(GV, I.second)); - } else { - ConstantExpr *GEP = cast(I.first); - // We don't handle the deeply recursive case using the batch method. - if (GEP->getNumOperands() > 3) - ComplexCEs.push_back(std::make_pair(GEP, I.second)); - else - SimpleCEs.push_back(std::make_pair(GEP, I.second)); - } - } - - // The algorithm below doesn't handle cases like nested structs, so use the - // slower fully general method if we have to. - for (auto ComplexCE : ComplexCEs) - CommitValueTo(ComplexCE.second, ComplexCE.first); - - for (auto GVPair : GVs) { - assert(GVPair.first->hasInitializer()); - GVPair.first->setInitializer(GVPair.second); - } - - if (SimpleCEs.empty()) - return; - - // We cache a single global's initializer elements in the case where the - // subsequent address/val pair uses the same one. This avoids throwing away and - // rebuilding the constant struct/vector/array just because one element is - // modified at a time. - SmallVector Elts; - Elts.reserve(SimpleCEs.size()); - GlobalVariable *CurrentGV = nullptr; - - auto commitAndSetupCache = [&](GlobalVariable *GV, bool Update) { - Constant *Init = GV->getInitializer(); - Type *Ty = Init->getType(); - if (Update) { - if (CurrentGV) { - assert(CurrentGV && "Expected a GV to commit to!"); - Type *CurrentInitTy = CurrentGV->getInitializer()->getType(); - // We have a valid cache that needs to be committed. - if (StructType *STy = dyn_cast(CurrentInitTy)) - CurrentGV->setInitializer(ConstantStruct::get(STy, Elts)); - else if (ArrayType *ArrTy = dyn_cast(CurrentInitTy)) - CurrentGV->setInitializer(ConstantArray::get(ArrTy, Elts)); - else - CurrentGV->setInitializer(ConstantVector::get(Elts)); - } - if (CurrentGV == GV) - return; - // Need to clear and set up cache for new initializer. - CurrentGV = GV; - Elts.clear(); - unsigned NumElts; - if (auto *STy = dyn_cast(Ty)) - NumElts = STy->getNumElements(); - else if (auto *ATy = dyn_cast(Ty)) - NumElts = ATy->getNumElements(); - else - NumElts = cast(Ty)->getNumElements(); - for (unsigned i = 0, e = NumElts; i != e; ++i) - Elts.push_back(Init->getAggregateElement(i)); - } - }; - - for (auto CEPair : SimpleCEs) { - ConstantExpr *GEP = CEPair.first; - Constant *Val = CEPair.second; - - GlobalVariable *GV = cast(GEP->getOperand(0)); - commitAndSetupCache(GV, GV != CurrentGV); - ConstantInt *CI = cast(GEP->getOperand(2)); - Elts[CI->getZExtValue()] = Val; - } - // The last initializer in the list needs to be committed, others - // will be committed on a new initializer being processed. - commitAndSetupCache(CurrentGV, true); -} - /// Evaluate static constructors in the function, if we can. Return true if we /// can, false otherwise. static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, @@ -2268,10 +2080,12 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, ++NumCtorsEvaluated; // We succeeded at evaluation: commit the result. + auto NewInitializers = Eval.getMutatedInitializers(); LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" - << F->getName() << "' to " - << Eval.getMutatedMemory().size() << " stores.\n"); - BatchCommitValueTo(Eval.getMutatedMemory()); + << F->getName() << "' to " << NewInitializers.size() + << " stores.\n"); + for (const auto &Pair : NewInitializers) + Pair.first->setInitializer(Pair.second); for (GlobalVariable *GV : Eval.getInvariants()) GV->setConstant(true); } diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp index 91630d876fc8..b1d2747d8045 100644 --- a/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -122,129 +122,112 @@ isSimpleEnoughValueToCommit(Constant *C, return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL); } -/// Return true if this constant is simple enough for us to understand. In -/// particular, if it is a cast to anything other than from one pointer type to -/// another pointer type, we punt. We basically just support direct accesses to -/// globals and GEP's of globals. This should be kept up to date with -/// CommitValueTo. -static bool isSimpleEnoughPointerToCommit(Constant *C, const DataLayout &DL) { - if (GlobalVariable *GV = dyn_cast(C)) - // Do not allow weak/*_odr/linkonce linkage or external globals. - return GV->hasUniqueInitializer(); - - if (ConstantExpr *CE = dyn_cast(C)) { - // Handle a constantexpr gep. - if (CE->getOpcode() == Instruction::GetElementPtr && - isa(CE->getOperand(0)) && - cast(CE)->isInBounds()) { - GlobalVariable *GV = cast(CE->getOperand(0)); - // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or - // external globals. - if (!GV->hasUniqueInitializer()) - return false; +void Evaluator::MutableValue::clear() { + if (auto *Agg = Val.dyn_cast()) + delete Agg; + Val = nullptr; +} - // The first index must be zero. - ConstantInt *CI = dyn_cast(*std::next(CE->op_begin())); - if (!CI || !CI->isZero()) return false; +Constant *Evaluator::MutableValue::read(Type *Ty, APInt Offset, + const DataLayout &DL) const { + TypeSize TySize = DL.getTypeStoreSize(Ty); + const MutableValue *V = this; + while (const auto *Agg = V->Val.dyn_cast()) { + Type *AggTy = Agg->Ty; + Optional Index = DL.getGEPIndexForOffset(AggTy, Offset); + if (!Index || Index->ugt(Agg->Elements.size()) || + !TypeSize::isKnownLE(TySize, DL.getTypeStoreSize(AggTy))) + return nullptr; + + V = &Agg->Elements[Index->getZExtValue()]; + } - // The remaining indices must be compile-time known integers within the - // notional bounds of the corresponding static array types. - if (!CE->isGEPWithNoNotionalOverIndexing()) - return false; + return ConstantFoldLoadFromConst(V->Val.get(), Ty, Offset, DL); +} - return ConstantFoldLoadThroughGEPConstantExpr( - GV->getInitializer(), CE, - cast(CE)->getResultElementType(), DL); - } else if (CE->getOpcode() == Instruction::BitCast && - isa(CE->getOperand(0))) { - // A constantexpr bitcast from a pointer to another pointer is a no-op, - // and we know how to evaluate it by moving the bitcast from the pointer - // operand to the value operand. - // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or - // external globals. - return cast(CE->getOperand(0))->hasUniqueInitializer(); - } - } +bool Evaluator::MutableValue::makeMutable() { + Constant *C = Val.get(); + Type *Ty = C->getType(); + unsigned NumElements; + if (auto *VT = dyn_cast(Ty)) { + NumElements = VT->getNumElements(); + } else if (auto *AT = dyn_cast(Ty)) + NumElements = AT->getNumElements(); + else if (auto *ST = dyn_cast(Ty)) + NumElements = ST->getNumElements(); + else + return false; - return false; + MutableAggregate *MA = new MutableAggregate(Ty); + MA->Elements.reserve(NumElements); + for (unsigned I = 0; I < NumElements; ++I) + MA->Elements.push_back(C->getAggregateElement(I)); + Val = MA; + return true; } -/// Apply \p TryLoad to Ptr. If this returns \p nullptr, introspect the -/// pointer's type and walk down through the initial elements to obtain -/// additional pointers to try. Returns the first non-null return value from -/// \p TryLoad, or \p nullptr if the type can't be introspected further. -static Constant * -evaluateBitcastFromPtr(Constant *Ptr, const DataLayout &DL, - const TargetLibraryInfo *TLI, - std::function TryLoad) { - Constant *Val; - while (!(Val = TryLoad(Ptr))) { - // If Ty is a non-opaque struct, we can convert the pointer to the struct - // into a pointer to its first member. - // FIXME: This could be extended to support arrays as well. - Type *Ty = cast(Ptr->getType())->getElementType(); - if (!isa(Ty) || cast(Ty)->isOpaque()) - break; - - IntegerType *IdxTy = IntegerType::get(Ty->getContext(), 32); - Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); - Constant *const IdxList[] = {IdxZero, IdxZero}; - - Ptr = ConstantExpr::getGetElementPtr(Ty, Ptr, IdxList); - Ptr = ConstantFoldConstant(Ptr, DL, TLI); +bool Evaluator::MutableValue::write(Constant *V, APInt Offset, + const DataLayout &DL) { + Type *Ty = V->getType(); + TypeSize TySize = DL.getTypeStoreSize(Ty); + MutableValue *MV = this; + while (Offset != 0 || + !CastInst::isBitOrNoopPointerCastable(Ty, MV->getType(), DL)) { + if (MV->Val.is() && !MV->makeMutable()) + return false; + + MutableAggregate *Agg = MV->Val.get(); + Type *AggTy = Agg->Ty; + Optional Index = DL.getGEPIndexForOffset(AggTy, Offset); + if (!Index || Index->ugt(Agg->Elements.size()) || + !TypeSize::isKnownLE(TySize, DL.getTypeStoreSize(AggTy))) + return false; + + MV = &Agg->Elements[Index->getZExtValue()]; } - return Val; + + Type *MVType = MV->getType(); + MV->clear(); + if (Ty->isIntegerTy() && MVType->isPointerTy()) + MV->Val = ConstantExpr::getIntToPtr(V, MVType); + else if (Ty->isPointerTy() && MVType->isIntegerTy()) + MV->Val = ConstantExpr::getPtrToInt(V, MVType); + else + MV->Val = ConstantExpr::getBitCast(V, MVType); + return true; } -static Constant *getInitializer(Constant *C) { - auto *GV = dyn_cast(C); - return GV && GV->hasDefinitiveInitializer() ? GV->getInitializer() : nullptr; +Constant *Evaluator::MutableAggregate::toConstant() const { + SmallVector Consts; + for (const MutableValue &MV : Elements) + Consts.push_back(MV.toConstant()); + + if (auto *ST = dyn_cast(Ty)) + return ConstantStruct::get(ST, Consts); + if (auto *AT = dyn_cast(Ty)) + return ConstantArray::get(AT, Consts); + assert(isa(Ty) && "Must be vector"); + return ConstantVector::get(Consts); } /// Return the value that would be computed by a load from P after the stores /// reflected by 'memory' have been performed. If we can't decide, return null. Constant *Evaluator::ComputeLoadResult(Constant *P, Type *Ty) { - // If this memory location has been recently stored, use the stored value: it - // is the most up-to-date. - auto TryFindMemLoc = [this](Constant *Ptr) { - return MutatedMemory.lookup(Ptr); - }; - - if (Constant *Val = TryFindMemLoc(P)) - return Val; - - // Access it. - if (GlobalVariable *GV = dyn_cast(P)) { - if (GV->hasDefinitiveInitializer()) - return GV->getInitializer(); + APInt Offset(DL.getIndexTypeSizeInBits(P->getType()), 0); + P = cast(P->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true)); + Offset = Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(P->getType())); + auto *GV = dyn_cast(P); + if (!GV) return nullptr; - } - if (ConstantExpr *CE = dyn_cast(P)) { - switch (CE->getOpcode()) { - // Handle a constantexpr getelementptr. - case Instruction::GetElementPtr: - if (auto *I = getInitializer(CE->getOperand(0))) - return ConstantFoldLoadThroughGEPConstantExpr(I, CE, Ty, DL); - break; - // Handle a constantexpr bitcast. - case Instruction::BitCast: - // We're evaluating a load through a pointer that was bitcast to a - // different type. See if the "from" pointer has recently been stored. - // If it hasn't, we may still be able to find a stored pointer by - // introspecting the type. - Constant *Val = - evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, TryFindMemLoc); - if (!Val) - Val = getInitializer(CE->getOperand(0)); - if (Val) - return ConstantFoldLoadThroughBitcast( - Val, P->getType()->getPointerElementType(), DL); - break; - } - } + auto It = MutatedMemory.find(GV); + if (It != MutatedMemory.end()) + return It->second.read(Ty, Offset, DL); - return nullptr; // don't know how to evaluate. + if (!GV->hasDefinitiveInitializer()) + return nullptr; + return ConstantFoldLoadFromConst(GV->getInitializer(), Ty, Offset, DL); } static Function *getFunction(Constant *C) { @@ -337,68 +320,30 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB, Ptr = FoldedPtr; LLVM_DEBUG(dbgs() << "; To: " << *Ptr << "\n"); } - // Conservatively, avoid aggregate types. This is because we don't - // want to worry about them partially overlapping other stores. - if (!SI->getValueOperand()->getType()->isSingleValueType() || - !isSimpleEnoughPointerToCommit(Ptr, DL)) { - // If this is too complex for us to commit, reject it. - LLVM_DEBUG( - dbgs() << "Pointer is too complex for us to evaluate store."); + + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = cast(Ptr->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true)); + Offset = Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(Ptr->getType())); + auto *GV = dyn_cast(Ptr); + if (!GV || !GV->hasUniqueInitializer()) { + LLVM_DEBUG(dbgs() << "Store is not to global with unique initializer: " + << *Ptr << "\n"); return false; } - Constant *Val = getVal(SI->getOperand(0)); - // If this might be too difficult for the backend to handle (e.g. the addr // of one global variable divided by another) then we can't commit it. + Constant *Val = getVal(SI->getOperand(0)); if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) { LLVM_DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val << "\n"); return false; } - if (ConstantExpr *CE = dyn_cast(Ptr)) { - if (CE->getOpcode() == Instruction::BitCast) { - LLVM_DEBUG(dbgs() - << "Attempting to resolve bitcast on constant ptr.\n"); - // If we're evaluating a store through a bitcast, then we need - // to pull the bitcast off the pointer type and push it onto the - // stored value. In order to push the bitcast onto the stored value, - // a bitcast from the pointer's element type to Val's type must be - // legal. If it's not, we can try introspecting the type to find a - // legal conversion. - - auto TryCastValTy = [&](Constant *P) -> Constant * { - // The conversion is illegal if the store is wider than the - // pointee proposed by `evaluateBitcastFromPtr`, since that would - // drop stores to other struct elements when the caller attempts to - // look through a struct's 0th element. - Type *NewTy = cast(P->getType())->getElementType(); - Type *STy = Val->getType(); - if (DL.getTypeSizeInBits(NewTy) < DL.getTypeSizeInBits(STy)) - return nullptr; - - if (Constant *FV = ConstantFoldLoadThroughBitcast(Val, NewTy, DL)) { - Ptr = P; - return FV; - } - return nullptr; - }; - - Constant *NewVal = - evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, TryCastValTy); - if (!NewVal) { - LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " - "evaluate.\n"); - return false; - } - - Val = NewVal; - LLVM_DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n"); - } - } - - MutatedMemory[Ptr] = Val; + auto Res = MutatedMemory.try_emplace(GV, GV->getInitializer()); + if (!Res.first->second.write(Val, Offset, DL)) + return false; } else if (BinaryOperator *BO = dyn_cast(CurInst)) { InstResult = ConstantExpr::get(BO->getOpcode(), getVal(BO->getOperand(0)), diff --git a/llvm/test/Transforms/GlobalOpt/pr51879.ll b/llvm/test/Transforms/GlobalOpt/pr51879.ll index 0fe1a9f754a7..e827ae39a230 100644 --- a/llvm/test/Transforms/GlobalOpt/pr51879.ll +++ b/llvm/test/Transforms/GlobalOpt/pr51879.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -S -globalopt < %s | FileCheck %s -; TODO: This currently computes an incorrect initializer value. - %type = type { { i8** } } @g = internal global %type zeroinitializer @@ -11,7 +9,8 @@ @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @ctor, i8* null }] ;. -; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal global [[TYPE:%.*]] zeroinitializer +; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal global [[TYPE:%.*]] { { i8** } { i8** @g2 } } +; CHECK: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = external global i8* ; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending global [0 x { i32, void ()*, i8* }] zeroinitializer ;. define internal void @ctor() { From 4ef560ec6026a2cef71748852c68cd4a79c0ac0a Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 3 Jan 2022 11:21:05 +0100 Subject: [PATCH 485/992] [ELF] Handle .init_array prefix consistently Currently, the code in TargetLoweringObjectFile only assigns @init_array section type to plain .init_array sections, but not prioritized sections like .init_array.00001. This is inconsistent with the interpretation in the AsmParser (see https://github.com/llvm/llvm-project/blob/791523bae6153b13bb41ba05c9fc89e502cc4a1a/llvm/lib/MC/MCParser/ELFAsmParser.cpp#L621-L632) and upcoming expectations in LLD (see https://github.com/rust-lang/rust/issues/92181 for context). This patch assigns @init_array section type to all sections with an .init_array prefix. The same is done for .fini_array and .preinit_array as well. With that, the logic matches the AsmParser. Differential Revision: https://reviews.llvm.org/D116528 --- .../CodeGen/TargetLoweringObjectFileImpl.cpp | 11 ++++-- llvm/test/CodeGen/X86/attribute-sections.ll | 38 ++++++++++++++----- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index d1c2cdeb133b..29482a316c8a 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -478,6 +478,11 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { return K; } +static bool hasPrefix(StringRef SectionName, StringRef Prefix) { + return SectionName.consume_front(Prefix) && + (SectionName.empty() || SectionName[0] == '.'); +} + static unsigned getELFSectionType(StringRef Name, SectionKind K) { // Use SHT_NOTE for section whose name starts with ".note" to allow // emitting ELF notes from C variable declaration. @@ -485,13 +490,13 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) { if (Name.startswith(".note")) return ELF::SHT_NOTE; - if (Name == ".init_array") + if (hasPrefix(Name, ".init_array")) return ELF::SHT_INIT_ARRAY; - if (Name == ".fini_array") + if (hasPrefix(Name, ".fini_array")) return ELF::SHT_FINI_ARRAY; - if (Name == ".preinit_array") + if (hasPrefix(Name, ".preinit_array")) return ELF::SHT_PREINIT_ARRAY; if (K.isBSS() || K.isThreadBSS()) diff --git a/llvm/test/CodeGen/X86/attribute-sections.ll b/llvm/test/CodeGen/X86/attribute-sections.ll index 30353346b5c9..c5d5ea682f00 100644 --- a/llvm/test/CodeGen/X86/attribute-sections.ll +++ b/llvm/test/CodeGen/X86/attribute-sections.ll @@ -1,18 +1,36 @@ -; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s declare i32 @foo() -@G0 = global i32 ()* @foo, section ".init_array" -; LINUX: .section .init_array,"aw" -; LINUX: .globl G0 +@init_array1 = global i32 ()* @foo, section ".init_array" +@init_array2 = global i32 ()* @foo, section ".init_array.00001" +@init_array3 = global i32 ()* @foo, section ".init_arrayfoo" -@G1 = global i32 ()* @foo, section ".fini_array" +; CHECK-LABEL: .section .init_array,"aw",@init_array +; CHECK: init_array1: +; CHECK-LABEL: .section .init_array.00001,"aw",@init_array +; CHECK: init_array2: +; CHECK-LABEL: .section .init_arrayfoo,"aw",@progbits +; CHECK: init_array3: -; LINUX: .section .fini_array,"aw" -; LINUX: .globl G1 +@fini_array1 = global i32 ()* @foo, section ".fini_array" +@fini_array2 = global i32 ()* @foo, section ".fini_array.00001" +@fini_array3 = global i32 ()* @foo, section ".fini_arrayfoo" -@G2 = global i32 ()* @foo, section ".preinit_array" +; CHECK-LABEL: .section .fini_array,"aw",@fini_array +; CHECK: fini_array1: +; CHECK-LABEL: .section .fini_array.00001,"aw",@fini_array +; CHECK: fini_array2: +; CHECK-LABEL: .section .fini_arrayfoo,"aw",@progbits +; CHECK: fini_array3: -; LINUX: .section .preinit_array,"aw" -; LINUX: .globl G2 +@preinit_array1 = global i32 ()* @foo, section ".preinit_array" +@preinit_array2 = global i32 ()* @foo, section ".preinit_array.00001" +@preinit_array3 = global i32 ()* @foo, section ".preinit_arrayfoo" +; CHECK-LABEL: .section .preinit_array,"aw",@preinit_array +; CHECK: preinit_array1: +; CHECK-LABEL: .section .preinit_array.00001,"aw",@preinit_array +; CHECK: preinit_array2: +; CHECK-LABEL: .section .preinit_arrayfoo,"aw",@progbits +; CHECK: preinit_array3: From 29e6e522a488b0d32d9ab4829ec701aeecfc0995 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 4 Jan 2022 09:46:33 +0100 Subject: [PATCH 486/992] [Evaluator] Make forward declaration consistent (NFC) Fix a build warning. --- llvm/include/llvm/Transforms/Utils/Evaluator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/Transforms/Utils/Evaluator.h b/llvm/include/llvm/Transforms/Utils/Evaluator.h index 9968cfb47cef..9346212dd888 100644 --- a/llvm/include/llvm/Transforms/Utils/Evaluator.h +++ b/llvm/include/llvm/Transforms/Utils/Evaluator.h @@ -36,7 +36,7 @@ class TargetLibraryInfo; /// be iterated over after the evaluation is complete. Once an evaluation call /// fails, the evaluation object should not be reused. class Evaluator { - class MutableAggregate; + struct MutableAggregate; /// The evaluator represents values either as a Constant*, or as a /// MutableAggregate, which allows changing individual aggregate elements From 8484bab9cd5e5af11acf64e68c2f82e250e08dbe Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 3 Jan 2022 11:44:39 +0100 Subject: [PATCH 487/992] [LangRef] Require elementtype attribute for indirect inline asm operands Indirect inline asm operands may require the materialization of a memory access according to the pointer element type. As this will no longer be available with opaque pointers, we require it to be explicitly annotated using the elementtype attribute, for example: define void @test(i32* %p, i32 %x) { call void asm "addl $1, $0", "=*rm,r"(i32* elementtype(i32) %p, i32 %x) ret void } This patch only includes the LangRef change and Verifier updates to allow adding the elementtype attribute in this position. It does not yet enforce this, as this will require changes on the clang side (and test updates) first. Something I'm a bit unsure about is whether we really need the elementtype for all indirect constraints, rather than only indirect register constraints. I think indirect memory constraints might not strictly need it (though the backend code is written in a way that does require it). I think it's okay to just make this a general requirement though, as this means we don't need to carefully deal with multiple or alternative constraints. In addition, I believe that MemorySanitizer benefits from having the element type even in cases where it may not be strictly necessary for normal lowering (https://github.com/llvm/llvm-project/blob/cd2b050fa4995b75b9c36fae16c0d9f105b67585/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp#L4066). Differential Revision: https://reviews.llvm.org/D116531 --- llvm/docs/LangRef.rst | 3 ++ llvm/lib/IR/Verifier.cpp | 47 ++++++++++++++--- llvm/test/Verifier/elementtype.ll | 2 +- .../Verifier/inline-asm-indirect-operand.ll | 52 +++++++++++++++++++ 4 files changed, 97 insertions(+), 7 deletions(-) create mode 100644 llvm/test/Verifier/inline-asm-indirect-operand.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 8c72e3255ab5..389c90937bb0 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -4568,6 +4568,9 @@ functionality provides, compared to writing the store explicitly after the asm statement, and it can only produce worse code, since it bypasses many optimization passes. I would recommend not using it.) +Call arguments for indirect constraints must have pointer type and must specify +the :ref:`elementtype ` attribute to indicate the pointer +element type. Clobber constraints """"""""""""""""""" diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index fb7c423e54e2..9ce37db9ea6c 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -551,11 +551,12 @@ class Verifier : public InstVisitor, VerifierSupport { void checkUnsignedBaseTenFuncAttr(AttributeList Attrs, StringRef Attr, const Value *V); void verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, - const Value *V, bool IsIntrinsic); + const Value *V, bool IsIntrinsic, bool IsInlineAsm); void verifyFunctionMetadata(ArrayRef> MDs); void visitConstantExprsRecursively(const Constant *EntryC); void visitConstantExpr(const ConstantExpr *CE); + void verifyInlineAsmCall(const CallBase &Call); void verifyStatepoint(const CallBase &Call); void verifyFrameRecoverIndices(); void verifySiblingFuncletUnwinds(); @@ -1870,7 +1871,8 @@ void Verifier::checkUnsignedBaseTenFuncAttr(AttributeList Attrs, StringRef Attr, // Check parameter attributes against a function type. // The value V is printed in error messages. void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, - const Value *V, bool IsIntrinsic) { + const Value *V, bool IsIntrinsic, + bool IsInlineAsm) { if (Attrs.isEmpty()) return; @@ -1913,8 +1915,10 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, if (!IsIntrinsic) { Assert(!ArgAttrs.hasAttribute(Attribute::ImmArg), "immarg attribute only applies to intrinsics",V); - Assert(!ArgAttrs.hasAttribute(Attribute::ElementType), - "Attribute 'elementtype' can only be applied to intrinsics.", V); + if (!IsInlineAsm) + Assert(!ArgAttrs.hasAttribute(Attribute::ElementType), + "Attribute 'elementtype' can only be applied to intrinsics" + " and inline asm.", V); } verifyParameterAttrs(ArgAttrs, Ty, V); @@ -2141,6 +2145,33 @@ bool Verifier::verifyAttributeCount(AttributeList Attrs, unsigned Params) { return Attrs.getNumAttrSets() <= Params + 2; } +void Verifier::verifyInlineAsmCall(const CallBase &Call) { + const InlineAsm *IA = cast(Call.getCalledOperand()); + unsigned ArgNo = 0; + for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) { + // Only deal with constraints that correspond to call arguments. + bool HasArg = CI.Type == InlineAsm::isInput || + (CI.Type == InlineAsm::isOutput && CI.isIndirect); + if (!HasArg) + continue; + + if (CI.isIndirect) { + const Value *Arg = Call.getArgOperand(ArgNo); + Assert(Arg->getType()->isPointerTy(), + "Operand for indirect constraint must have pointer type", + &Call); + + // TODO: Require elementtype attribute here. + } else { + Assert(!Call.paramHasAttr(ArgNo, Attribute::ElementType), + "Elementtype attribute can only be applied for indirect " + "constraints", &Call); + } + + ArgNo++; + } +} + /// Verify that statepoint intrinsic is well formed. void Verifier::verifyStatepoint(const CallBase &Call) { assert(Call.getCalledFunction() && @@ -2364,7 +2395,7 @@ void Verifier::visitFunction(const Function &F) { bool IsIntrinsic = F.isIntrinsic(); // Check function attributes. - verifyFunctionAttrs(FT, Attrs, &F, IsIntrinsic); + verifyFunctionAttrs(FT, Attrs, &F, IsIntrinsic, /* IsInlineAsm */ false); // On function declarations/definitions, we do not support the builtin // attribute. We do not check this in VerifyFunctionAttrs since that is @@ -2779,6 +2810,7 @@ void Verifier::visitCallBrInst(CallBrInst &CBI) { Assert(ArgBBs.count(BB), "Indirect label missing from arglist.", &CBI); } + verifyInlineAsmCall(CBI); visitTerminator(CBI); } @@ -3123,7 +3155,7 @@ void Verifier::visitCallBase(CallBase &Call) { } // Verify call attributes. - verifyFunctionAttrs(FTy, Attrs, &Call, IsIntrinsic); + verifyFunctionAttrs(FTy, Attrs, &Call, IsIntrinsic, Call.isInlineAsm()); // Conservatively check the inalloca argument. // We have a bug if we can find that there is an underlying alloca without @@ -3316,6 +3348,9 @@ void Verifier::visitCallBase(CallBase &Call) { "debug info must have a !dbg location", Call); + if (Call.isInlineAsm()) + verifyInlineAsmCall(Call); + visitInstruction(Call); } diff --git a/llvm/test/Verifier/elementtype.ll b/llvm/test/Verifier/elementtype.ll index e092e0f54c93..22bfe720c748 100644 --- a/llvm/test/Verifier/elementtype.ll +++ b/llvm/test/Verifier/elementtype.ll @@ -14,7 +14,7 @@ define void @type_mismatch2() { ret void } -; CHECK: Attribute 'elementtype' can only be applied to intrinsics. +; CHECK: Attribute 'elementtype' can only be applied to intrinsics and inline asm. define void @not_intrinsic() { call void @some_function(i32* elementtype(i32) null) ret void diff --git a/llvm/test/Verifier/inline-asm-indirect-operand.ll b/llvm/test/Verifier/inline-asm-indirect-operand.ll new file mode 100644 index 000000000000..4be6f50b9ef5 --- /dev/null +++ b/llvm/test/Verifier/inline-asm-indirect-operand.ll @@ -0,0 +1,52 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +define void @okay(i32* %p, i32 %x) { + call void asm "addl $1, $0", "=*rm,r"(i32* elementtype(i32) %p, i32 %x) + ret void +} + +; CHECK: Attribute 'elementtype' type does not match parameter! +; CHECK-NEXT: call void asm "addl $1, $0", "=*rm,r"(i32* elementtype(i64) %p, i32 %x) +define void @wrong_element_type(i32* %p, i32 %x) { + call void asm "addl $1, $0", "=*rm,r"(i32* elementtype(i64) %p, i32 %x) + ret void +} + +; CHECK: Operand for indirect constraint must have pointer type +; CHECK-NEXT: call void asm "addl $1, $0", "=*rm,r"(i32 %p, i32 %x) +define void @not_pointer_arg(i32 %p, i32 %x) { + call void asm "addl $1, $0", "=*rm,r"(i32 %p, i32 %x) + ret void +} + +; CHECK: Elementtype attribute can only be applied for indirect constraints +; CHECK-NEXT: call void asm "addl $1, $0", "=*rm,r"(i32* %p, i32* elementtype(i32) %x) +define void @not_indirect(i32* %p, i32* %x) { + call void asm "addl $1, $0", "=*rm,r"(i32* %p, i32* elementtype(i32) %x) + ret void +} + +; CHECK: Operand for indirect constraint must have pointer type +; CHECK-NEXT: invoke void asm "addl $1, $0", "=*rm,r"(i32 %p, i32 %x) +define void @not_pointer_arg_invoke(i32 %p, i32 %x) personality i8* null { + invoke void asm "addl $1, $0", "=*rm,r"(i32 %p, i32 %x) + to label %cont unwind label %lpad + +lpad: + %lp = landingpad i32 + cleanup + ret void + +cont: + ret void +} + +; CHECK: Operand for indirect constraint must have pointer type +; CHECK-NEXT: callbr void asm "addl $1, $0", "=*rm,r"(i32 %p, i32 %x) +define void @not_pointer_arg_callbr(i32 %p, i32 %x) { + callbr void asm "addl $1, $0", "=*rm,r"(i32 %p, i32 %x) + to label %cont [] + +cont: + ret void +} From af7bc39ba17d8c5250830e96881fb7211c7576bb Mon Sep 17 00:00:00 2001 From: Stanislav Gatev Date: Wed, 29 Dec 2021 11:31:02 +0000 Subject: [PATCH 488/992] [clang][dataflow] Add transfer function for VarDecl statements This is part of the implementation of the dataflow analysis framework. See "[RFC] A dataflow analysis framework for Clang AST" on cfe-dev. Reviewed-by: xazax.hun Differential Revision: https://reviews.llvm.org/D116368 --- .../FlowSensitive/DataflowAnalysisContext.h | 94 +++ .../FlowSensitive/DataflowEnvironment.h | 86 ++- .../Analysis/FlowSensitive/StorageLocation.h | 89 +++ .../clang/Analysis/FlowSensitive/Transfer.h | 33 ++ .../TypeErasedDataflowAnalysis.h | 4 + .../clang/Analysis/FlowSensitive/Value.h | 117 ++++ .../lib/Analysis/FlowSensitive/CMakeLists.txt | 2 + .../FlowSensitive/DataflowEnvironment.cpp | 191 +++++++ clang/lib/Analysis/FlowSensitive/Transfer.cpp | 72 +++ .../TypeErasedDataflowAnalysis.cpp | 38 +- .../Analysis/FlowSensitive/CMakeLists.txt | 1 + .../Analysis/FlowSensitive/NoopAnalysis.h | 55 ++ .../Analysis/FlowSensitive/TestingSupport.h | 3 +- .../FlowSensitive/TestingSupportTest.cpp | 27 +- .../Analysis/FlowSensitive/TransferTest.cpp | 540 ++++++++++++++++++ .../TypeErasedDataflowAnalysisTest.cpp | 30 +- 16 files changed, 1314 insertions(+), 68 deletions(-) create mode 100644 clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h create mode 100644 clang/include/clang/Analysis/FlowSensitive/StorageLocation.h create mode 100644 clang/include/clang/Analysis/FlowSensitive/Transfer.h create mode 100644 clang/include/clang/Analysis/FlowSensitive/Value.h create mode 100644 clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp create mode 100644 clang/lib/Analysis/FlowSensitive/Transfer.cpp create mode 100644 clang/unittests/Analysis/FlowSensitive/NoopAnalysis.h create mode 100644 clang/unittests/Analysis/FlowSensitive/TransferTest.cpp diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h new file mode 100644 index 000000000000..a99d32df018b --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h @@ -0,0 +1,94 @@ +//===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a DataflowAnalysisContext class that owns objects that +// encompass the state of a program and stores context that is used during +// dataflow analysis. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H + +#include "clang/AST/Decl.h" +#include "clang/Analysis/FlowSensitive/StorageLocation.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/DenseMap.h" +#include +#include +#include +#include + +namespace clang { +namespace dataflow { + +/// Owns objects that encompass the state of a program and stores context that +/// is used during dataflow analysis. +class DataflowAnalysisContext { +public: + /// Takes ownership of `Loc` and returns a reference to it. + /// + /// Requirements: + /// + /// `Loc` must not be null. + StorageLocation &takeOwnership(std::unique_ptr Loc) { + assert(Loc != nullptr); + Locs.push_back(std::move(Loc)); + return *Locs.back().get(); + } + + /// Takes ownership of `Val` and returns a reference to it. + /// + /// Requirements: + /// + /// `Val` must not be null. + Value &takeOwnership(std::unique_ptr Val) { + assert(Val != nullptr); + Vals.push_back(std::move(Val)); + return *Vals.back().get(); + } + + /// Assigns `Loc` as the storage location of `D`. + /// + /// Requirements: + /// + /// `D` must not be assigned a storage location. + void setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { + assert(DeclToLoc.find(&D) == DeclToLoc.end()); + DeclToLoc[&D] = &Loc; + } + + /// Returns the storage location assigned to `D` or null if `D` has no + /// assigned storage location. + StorageLocation *getStorageLocation(const ValueDecl &D) const { + auto It = DeclToLoc.find(&D); + return It == DeclToLoc.end() ? nullptr : It->second; + } + +private: + // Storage for the state of a program. + std::vector> Locs; + std::vector> Vals; + + // Maps from program declarations and statements to storage locations that are + // assigned to them. These assignments are global (aggregated across all basic + // blocks) and are used to produce stable storage locations when the same + // basic blocks are evaluated multiple times. The storage locations that are + // in scope for a particular basic block are stored in `Environment`. + llvm::DenseMap DeclToLoc; + // FIXME: Add `Expr` to `StorageLocation` map. + + // FIXME: Add `StorageLocation` for `this`. + + // FIXME: Add support for boolean expressions. +}; + +} // namespace dataflow +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index 4a3c0239f8e1..2a0ad6dad123 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -15,7 +15,15 @@ #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H +#include "clang/AST/Decl.h" +#include "clang/AST/Type.h" +#include "clang/AST/TypeOrdering.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/StorageLocation.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" namespace clang { namespace dataflow { @@ -23,11 +31,81 @@ namespace dataflow { /// Holds the state of the program (store and heap) at a given program point. class Environment { public: - bool operator==(const Environment &) const { return true; } + Environment(DataflowAnalysisContext &DACtx) : DACtx(&DACtx) {} - LatticeJoinEffect join(const Environment &) { - return LatticeJoinEffect::Unchanged; - } + bool operator==(const Environment &) const; + + LatticeJoinEffect join(const Environment &); + + /// Creates a storage location appropriate for `Type`. Does not assign a value + /// to the returned storage location in the environment. + /// + /// Requirements: + /// + /// `Type` must not be null. + StorageLocation &createStorageLocation(QualType Type); + + /// Creates a storage location for `D`. Does not assign the returned storage + /// location to `D` in the environment. Does not assign a value to the + /// returned storage location in the environment. + StorageLocation &createStorageLocation(const VarDecl &D); + + /// Assigns `Loc` as the storage location of `D` in the environment. + /// + /// Requirements: + /// + /// `D` must not be assigned a storage location in the environment. + void setStorageLocation(const ValueDecl &D, StorageLocation &Loc); + + /// Returns the storage location assigned to `D` in the environment or null if + /// `D` isn't assigned a storage location in the environment. + StorageLocation *getStorageLocation(const ValueDecl &D) const; + + /// Creates a value appropriate for `Type`, assigns it to `Loc`, and returns + /// it, if `Type` is supported, otherwise return null. If `Type` is a pointer + /// or reference type, creates all the necessary storage locations and values + /// for indirections until it finds a non-pointer/non-reference type. + /// + /// Requirements: + /// + /// `Type` must not be null. + Value *initValueInStorageLocation(const StorageLocation &Loc, QualType Type); + + /// Assigns `Val` as the value of `Loc` in the environment. + void setValue(const StorageLocation &Loc, Value &Val); + + /// Returns the value assigned to `Loc` in the environment or null if `Loc` + /// isn't assigned a value in the environment. + Value *getValue(const StorageLocation &Loc) const; + +private: + /// Returns the value assigned to `Loc` in the environment or null if `Type` + /// isn't supported. + /// + /// Recursively initializes storage locations and values until it sees a + /// self-referential pointer or reference type. `Visited` is used to track + /// which types appeared in the reference/pointer chain in order to avoid + /// creating a cyclic dependency with self-referential pointers/references. + /// + /// Requirements: + /// + /// `Type` must not be null. + Value *initValueInStorageLocationUnlessSelfReferential( + const StorageLocation &Loc, QualType Type, + llvm::DenseSet &Visited); + + DataflowAnalysisContext *DACtx; + + // Maps from program declarations and statements to storage locations that are + // assigned to them. Unlike the maps in `DataflowAnalysisContext`, these + // include only storage locations that are in scope for a particular basic + // block. + llvm::DenseMap DeclToLoc; + // FIXME: Add `Expr` to `StorageLocation` map. + + llvm::DenseMap LocToVal; + + // FIXME: Add flow condition constraints. }; } // namespace dataflow diff --git a/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h b/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h new file mode 100644 index 000000000000..5532813d6d29 --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h @@ -0,0 +1,89 @@ +//===-- StorageLocation.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines classes that represent elements of the local variable store +// and of the heap during dataflow analysis. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_STORAGELOCATION_H +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_STORAGELOCATION_H + +#include "clang/AST/Decl.h" +#include "clang/AST/Type.h" +#include "llvm/ADT/DenseMap.h" + +namespace clang { +namespace dataflow { + +/// Base class for elements of the local variable store and of the heap. +/// +/// Each storage location holds a value. The mapping from storage locations to +/// values is stored in the environment. +class StorageLocation { +public: + enum class Kind { Scalar, Aggregate }; + + StorageLocation(Kind LocKind, QualType Type) : LocKind(LocKind), Type(Type) {} + + virtual ~StorageLocation() = default; + + Kind getKind() const { return LocKind; } + + QualType getType() const { return Type; } + +private: + Kind LocKind; + QualType Type; +}; + +/// A storage location that is not subdivided further for the purposes of +/// abstract interpretation. For example: `int`, `int*`, `int&`. +class ScalarStorageLocation final : public StorageLocation { +public: + explicit ScalarStorageLocation(QualType Type) + : StorageLocation(Kind::Scalar, Type) {} + + static bool classof(const StorageLocation *Loc) { + return Loc->getKind() == Kind::Scalar; + } +}; + +/// A storage location which is subdivided into smaller storage locations that +/// can be traced independently by abstract interpretation. For example: a +/// struct with public members. +class AggregateStorageLocation final : public StorageLocation { +public: + explicit AggregateStorageLocation(QualType Type) + : AggregateStorageLocation( + Type, llvm::DenseMap()) {} + + AggregateStorageLocation( + QualType Type, + llvm::DenseMap Children) + : StorageLocation(Kind::Aggregate, Type), Children(std::move(Children)) {} + + static bool classof(const StorageLocation *Loc) { + return Loc->getKind() == Kind::Aggregate; + } + + /// Returns the child storage location for `D`. + StorageLocation &getChild(const ValueDecl &D) const { + auto It = Children.find(&D); + assert(It != Children.end()); + return *It->second; + } + +private: + llvm::DenseMap Children; +}; + +} // namespace dataflow +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_STORAGELOCATION_H diff --git a/clang/include/clang/Analysis/FlowSensitive/Transfer.h b/clang/include/clang/Analysis/FlowSensitive/Transfer.h new file mode 100644 index 000000000000..a12674a173be --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/Transfer.h @@ -0,0 +1,33 @@ +//===-- Transfer.h ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a transfer function that evaluates a program statement and +// updates an environment accordingly. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_TRANSFER_H +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_TRANSFER_H + +#include "clang/AST/Stmt.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" + +namespace clang { +namespace dataflow { + +/// Evaluates `S` and updates `Env` accordingly. +/// +/// Requirements: +/// +/// The type of `S` must not be `ParenExpr`. +void transfer(const Stmt &S, Environment &Env); + +} // namespace dataflow +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_TRANSFER_H diff --git a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h index 9290609068d7..65875445a86b 100644 --- a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h +++ b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h @@ -14,6 +14,7 @@ #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_TYPEERASEDDATAFLOWANALYSIS_H #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_TYPEERASEDDATAFLOWANALYSIS_H +#include #include #include "clang/AST/ASTContext.h" @@ -75,6 +76,9 @@ struct TypeErasedDataflowAnalysisState { /// Model of the state of the program (store and heap). Environment Env; + + TypeErasedDataflowAnalysisState(TypeErasedLattice Lattice, Environment Env) + : Lattice(std::move(Lattice)), Env(std::move(Env)) {} }; /// Transfers the state of a basic block by evaluating each of its statements in diff --git a/clang/include/clang/Analysis/FlowSensitive/Value.h b/clang/include/clang/Analysis/FlowSensitive/Value.h new file mode 100644 index 000000000000..d1de2b64fd95 --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/Value.h @@ -0,0 +1,117 @@ +//===-- Value.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for values computed by abstract interpretation +// during dataflow analysis. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_VALUE_H +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_VALUE_H + +#include "clang/AST/Decl.h" +#include "clang/Analysis/FlowSensitive/StorageLocation.h" +#include "llvm/ADT/DenseMap.h" +#include +#include + +namespace clang { +namespace dataflow { + +/// Base class for all values computed by abstract interpretation. +class Value { +public: + enum class Kind { Integer, Reference, Pointer, Struct }; + + explicit Value(Kind ValKind) : ValKind(ValKind) {} + + virtual ~Value() = default; + + Kind getKind() const { return ValKind; } + +private: + Kind ValKind; +}; + +/// Models an integer. +class IntegerValue : public Value { +public: + explicit IntegerValue() : Value(Kind::Integer) {} + + static bool classof(const Value *Val) { + return Val->getKind() == Kind::Integer; + } +}; + +/// Base class for values that refer to storage locations. +class IndirectionValue : public Value { +public: + /// Constructs a value that refers to `PointeeLoc`. + explicit IndirectionValue(Kind ValueKind, StorageLocation &PointeeLoc) + : Value(ValueKind), PointeeLoc(PointeeLoc) {} + + static bool classof(const Value *Val) { + return Val->getKind() == Kind::Reference || Val->getKind() == Kind::Pointer; + } + + StorageLocation &getPointeeLoc() const { return PointeeLoc; } + +private: + StorageLocation &PointeeLoc; +}; + +/// Models a dereferenced pointer. For example, a reference in C++ or an lvalue +/// in C. +class ReferenceValue final : public IndirectionValue { +public: + explicit ReferenceValue(StorageLocation &PointeeLoc) + : IndirectionValue(Kind::Reference, PointeeLoc) {} + + static bool classof(const Value *Val) { + return Val->getKind() == Kind::Reference; + } +}; + +/// Models a symbolic pointer. Specifically, any value of type `T*`. +class PointerValue final : public IndirectionValue { +public: + explicit PointerValue(StorageLocation &PointeeLoc) + : IndirectionValue(Kind::Pointer, PointeeLoc) {} + + static bool classof(const Value *Val) { + return Val->getKind() == Kind::Pointer; + } +}; + +/// Models a value of `struct` or `class` type. +class StructValue final : public Value { +public: + StructValue() : StructValue(llvm::DenseMap()) {} + + explicit StructValue(llvm::DenseMap Children) + : Value(Kind::Struct), Children(std::move(Children)) {} + + static bool classof(const Value *Val) { + return Val->getKind() == Kind::Struct; + } + + /// Returns the child value for `D`. + Value &getChild(const ValueDecl &D) const { + auto It = Children.find(&D); + assert(It != Children.end()); + return *It->second; + } + +private: + const llvm::DenseMap Children; +}; + +} // namespace dataflow +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_VALUE_H diff --git a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt index e5a8f73c961d..6ac9b97d7e98 100644 --- a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt +++ b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt @@ -1,5 +1,7 @@ add_clang_library(clangAnalysisFlowSensitive ControlFlowContext.cpp + DataflowEnvironment.cpp + Transfer.cpp TypeErasedDataflowAnalysis.cpp LINK_LIBS diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp new file mode 100644 index 000000000000..4d1e5477422e --- /dev/null +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -0,0 +1,191 @@ +//===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines an Environment class that is used by dataflow analyses +// that run over Control-Flow Graphs (CFGs) to keep track of the state of the +// program at given program points. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/StorageLocation.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include +#include + +namespace clang { +namespace dataflow { + +/// Returns a map consisting of key-value entries that are present in both maps. +template +llvm::DenseMap intersectDenseMaps(const llvm::DenseMap &Map1, + const llvm::DenseMap &Map2) { + llvm::DenseMap Result; + for (auto &Entry : Map1) { + auto It = Map2.find(Entry.first); + if (It != Map2.end() && Entry.second == It->second) + Result.insert({Entry.first, Entry.second}); + } + return Result; +} + +bool Environment::operator==(const Environment &Other) const { + assert(DACtx == Other.DACtx); + return DeclToLoc == Other.DeclToLoc && LocToVal == Other.LocToVal; +} + +LatticeJoinEffect Environment::join(const Environment &Other) { + assert(DACtx == Other.DACtx); + + auto Effect = LatticeJoinEffect::Unchanged; + + const unsigned DeclToLocSizeBefore = DeclToLoc.size(); + DeclToLoc = intersectDenseMaps(DeclToLoc, Other.DeclToLoc); + if (DeclToLocSizeBefore != DeclToLoc.size()) + Effect = LatticeJoinEffect::Changed; + + // FIXME: Add support for joining distinct values that are assigned to the + // same storage locations in `LocToVal` and `Other.LocToVal`. + const unsigned LocToValSizeBefore = LocToVal.size(); + LocToVal = intersectDenseMaps(LocToVal, Other.LocToVal); + if (LocToValSizeBefore != LocToVal.size()) + Effect = LatticeJoinEffect::Changed; + + return Effect; +} + +StorageLocation &Environment::createStorageLocation(QualType Type) { + assert(!Type.isNull()); + if (Type->isStructureOrClassType()) { + // FIXME: Explore options to avoid eager initialization of fields as some of + // them might not be needed for a particular analysis. + llvm::DenseMap FieldLocs; + for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) { + FieldLocs.insert({Field, &createStorageLocation(Field->getType())}); + } + return DACtx->takeOwnership( + std::make_unique(Type, std::move(FieldLocs))); + } + return DACtx->takeOwnership(std::make_unique(Type)); +} + +StorageLocation &Environment::createStorageLocation(const VarDecl &D) { + // Evaluated declarations are always assigned the same storage locations to + // ensure that the environment stabilizes across loop iterations. Storage + // locations for evaluated declarations are stored in the analysis context. + if (auto *Loc = DACtx->getStorageLocation(D)) + return *Loc; + auto &Loc = createStorageLocation(D.getType()); + DACtx->setStorageLocation(D, Loc); + return Loc; +} + +void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { + assert(DeclToLoc.find(&D) == DeclToLoc.end()); + DeclToLoc[&D] = &Loc; +} + +StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const { + auto It = DeclToLoc.find(&D); + return It == DeclToLoc.end() ? nullptr : It->second; +} + +void Environment::setValue(const StorageLocation &Loc, Value &Value) { + LocToVal[&Loc] = &Value; +} + +Value *Environment::getValue(const StorageLocation &Loc) const { + auto It = LocToVal.find(&Loc); + return It == LocToVal.end() ? nullptr : It->second; +} + +Value *Environment::initValueInStorageLocation(const StorageLocation &Loc, + QualType Type) { + llvm::DenseSet Visited; + return initValueInStorageLocationUnlessSelfReferential(Loc, Type, Visited); +} + +Value *Environment::initValueInStorageLocationUnlessSelfReferential( + const StorageLocation &Loc, QualType Type, + llvm::DenseSet &Visited) { + assert(!Type.isNull()); + + if (Type->isIntegerType()) { + auto &Value = DACtx->takeOwnership(std::make_unique()); + setValue(Loc, Value); + return &Value; + } + + if (Type->isReferenceType()) { + QualType PointeeType = Type->getAs()->getPointeeType(); + auto &PointeeLoc = createStorageLocation(PointeeType); + + if (!Visited.contains(PointeeType.getCanonicalType())) { + Visited.insert(PointeeType.getCanonicalType()); + initValueInStorageLocationUnlessSelfReferential(PointeeLoc, PointeeType, + Visited); + Visited.erase(PointeeType.getCanonicalType()); + } + + auto &Value = + DACtx->takeOwnership(std::make_unique(PointeeLoc)); + setValue(Loc, Value); + return &Value; + } + + if (Type->isPointerType()) { + QualType PointeeType = Type->getAs()->getPointeeType(); + auto &PointeeLoc = createStorageLocation(PointeeType); + + if (!Visited.contains(PointeeType.getCanonicalType())) { + Visited.insert(PointeeType.getCanonicalType()); + initValueInStorageLocationUnlessSelfReferential(PointeeLoc, PointeeType, + Visited); + Visited.erase(PointeeType.getCanonicalType()); + } + + auto &Value = + DACtx->takeOwnership(std::make_unique(PointeeLoc)); + setValue(Loc, Value); + return &Value; + } + + if (Type->isStructureOrClassType()) { + auto *AggregateLoc = cast(&Loc); + + llvm::DenseMap FieldValues; + for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) { + assert(Field != nullptr); + + QualType FieldType = Field->getType(); + if (Visited.contains(FieldType.getCanonicalType())) + continue; + + Visited.insert(FieldType.getCanonicalType()); + FieldValues.insert( + {Field, initValueInStorageLocationUnlessSelfReferential( + AggregateLoc->getChild(*Field), FieldType, Visited)}); + Visited.erase(FieldType.getCanonicalType()); + } + + auto &Value = DACtx->takeOwnership( + std::make_unique(std::move(FieldValues))); + setValue(Loc, Value); + return &Value; + } + + return nullptr; +} + +} // namespace dataflow +} // namespace clang diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp new file mode 100644 index 000000000000..80005d1de9a1 --- /dev/null +++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -0,0 +1,72 @@ +//===-- Transfer.cpp --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines transfer functions that evaluate program statements and +// update an environment accordingly. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Transfer.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "llvm/Support/Casting.h" +#include + +namespace clang { +namespace dataflow { + +class TransferVisitor : public ConstStmtVisitor { +public: + TransferVisitor(Environment &Env) : Env(Env) {} + + void VisitDeclStmt(const DeclStmt *S) { + // FIXME: Add support for group decls, e.g: `int a, b;` + if (S->isSingleDecl()) { + if (const auto *D = dyn_cast(S->getSingleDecl())) { + visitVarDecl(*D); + } + } + } + + // FIXME: Add support for: + // - BinaryOperator + // - CallExpr + // - CXXBindTemporaryExpr + // - CXXBoolLiteralExpr + // - CXXConstructExpr + // - CXXFunctionalCastExpr + // - CXXOperatorCallExpr + // - CXXStaticCastExpr + // - CXXThisExpr + // - DeclRefExpr + // - ImplicitCastExpr + // - MaterializeTemporaryExpr + // - MemberExpr + // - UnaryOperator + +private: + void visitVarDecl(const VarDecl &D) { + auto &Loc = Env.createStorageLocation(D); + Env.setStorageLocation(D, Loc); + Env.initValueInStorageLocation(Loc, D.getType()); + } + + Environment &Env; +}; + +void transfer(const Stmt &S, Environment &Env) { + assert(!isa(&S)); + TransferVisitor(Env).Visit(&S); +} + +} // namespace dataflow +} // namespace clang diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index ef967c501bf7..deb73b5265ed 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -18,6 +18,7 @@ #include "clang/Analysis/CFG.h" #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "clang/Analysis/FlowSensitive/Transfer.h" #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" @@ -39,12 +40,6 @@ static TypeErasedDataflowAnalysisState computeBlockInputState( std::vector> &BlockStates, const CFGBlock &Block, const Environment &InitEnv, TypeErasedDataflowAnalysis &Analysis) { - // FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()` - // to enable building analyses like computation of dominators that initialize - // the state of each basic block differently. - TypeErasedDataflowAnalysisState State = {Analysis.typeErasedInitialElement(), - InitEnv}; - llvm::DenseSet Preds; Preds.insert(Block.pred_begin(), Block.pred_end()); if (Block.getTerminator().isTemporaryDtorsBranch()) { @@ -77,6 +72,7 @@ static TypeErasedDataflowAnalysisState computeBlockInputState( } } + llvm::Optional MaybeState; for (const CFGBlock *Pred : Preds) { // Skip if the `Block` is unreachable or control flow cannot get past it. if (!Pred || Pred->hasNoReturnElement()) @@ -91,10 +87,20 @@ static TypeErasedDataflowAnalysisState computeBlockInputState( const TypeErasedDataflowAnalysisState &PredState = MaybePredState.getValue(); - Analysis.joinTypeErased(State.Lattice, PredState.Lattice); - State.Env.join(PredState.Env); + if (MaybeState.hasValue()) { + Analysis.joinTypeErased(MaybeState->Lattice, PredState.Lattice); + MaybeState->Env.join(PredState.Env); + } else { + MaybeState = PredState; + } } - return State; + if (!MaybeState.hasValue()) { + // FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()` + // to enable building analyses like computation of dominators that + // initialize the state of each basic block differently. + MaybeState.emplace(Analysis.typeErasedInitialElement(), InitEnv); + } + return *MaybeState; } TypeErasedDataflowAnalysisState transferBlock( @@ -109,16 +115,18 @@ TypeErasedDataflowAnalysisState transferBlock( computeBlockInputState(CFCtx, BlockStates, Block, InitEnv, Analysis); for (const CFGElement &Element : Block) { // FIXME: Evaluate other kinds of `CFGElement`. - const llvm::Optional Stmt = Element.getAs(); - if (!Stmt.hasValue()) + const llvm::Optional CfgStmt = Element.getAs(); + if (!CfgStmt.hasValue()) continue; - // FIXME: Evaluate the statement contained in `Stmt`. + const Stmt *S = CfgStmt.getValue().getStmt(); + assert(S != nullptr); + + transfer(*S, State.Env); + State.Lattice = Analysis.transferTypeErased(S, State.Lattice, State.Env); - State.Lattice = Analysis.transferTypeErased(Stmt.getValue().getStmt(), - State.Lattice, State.Env); if (HandleTransferredStmt != nullptr) - HandleTransferredStmt(Stmt.getValue(), State); + HandleTransferredStmt(CfgStmt.getValue(), State); } return State; } diff --git a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt index 85e1c4473c6d..90c7be6b9068 100644 --- a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt +++ b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt @@ -7,6 +7,7 @@ add_clang_unittest(ClangAnalysisFlowSensitiveTests SingleVarConstantPropagationTest.cpp TestingSupport.cpp TestingSupportTest.cpp + TransferTest.cpp TypeErasedDataflowAnalysisTest.cpp ) diff --git a/clang/unittests/Analysis/FlowSensitive/NoopAnalysis.h b/clang/unittests/Analysis/FlowSensitive/NoopAnalysis.h new file mode 100644 index 000000000000..eb045a24d2e4 --- /dev/null +++ b/clang/unittests/Analysis/FlowSensitive/NoopAnalysis.h @@ -0,0 +1,55 @@ +//===-- NoopAnalysis.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a NoopAnalysis class that is used by dataflow analysis +// tests. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_UNITTESTS_ANALYSIS_FLOWSENSITIVE_NOOPANALYSIS_H +#define LLVM_CLANG_UNITTESTS_ANALYSIS_FLOWSENSITIVE_NOOPANALYSIS_H + +#include "clang/AST/ASTContext.h" +#include "clang/AST/Stmt.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include + +namespace clang { +namespace dataflow { + +class NoopLattice { +public: + bool operator==(const NoopLattice &) const { return true; } + + LatticeJoinEffect join(const NoopLattice &) { + return LatticeJoinEffect::Unchanged; + } +}; + +inline std::ostream &operator<<(std::ostream &OS, const NoopLattice &) { + return OS << "noop"; +} + +class NoopAnalysis : public DataflowAnalysis { +public: + NoopAnalysis(ASTContext &Context) + : DataflowAnalysis(Context) {} + + static NoopLattice initialElement() { return {}; } + + NoopLattice transfer(const Stmt *S, const NoopLattice &E, Environment &Env) { + return {}; + } +}; + +} // namespace dataflow +} // namespace clang + +#endif // LLVM_CLANG_UNITTESTS_ANALYSIS_FLOWSENSITIVE_NOOPANALYSIS_H diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h index 632fe73b26b0..f5137959cfa9 100644 --- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h +++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h @@ -98,7 +98,8 @@ void checkDataflow( auto CFCtx = ControlFlowContext::build(F, F->getBody(), &F->getASTContext()); ASSERT_TRUE((bool)CFCtx) << "Could not build ControlFlowContext."; - Environment Env; + DataflowAnalysisContext DACtx; + Environment Env(DACtx); auto Analysis = MakeAnalysis(Context, Env); llvm::Expected> diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupportTest.cpp b/clang/unittests/Analysis/FlowSensitive/TestingSupportTest.cpp index 5ee54865b305..ff9d65bd7ea2 100644 --- a/clang/unittests/Analysis/FlowSensitive/TestingSupportTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TestingSupportTest.cpp @@ -1,4 +1,5 @@ #include "TestingSupport.h" +#include "NoopAnalysis.h" #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" @@ -19,32 +20,6 @@ using ::testing::IsEmpty; using ::testing::Pair; using ::testing::UnorderedElementsAre; -class NoopLattice { -public: - bool operator==(const NoopLattice &) const { return true; } - - LatticeJoinEffect join(const NoopLattice &) { - return LatticeJoinEffect::Unchanged; - } -}; - -std::ostream &operator<<(std::ostream &OS, const NoopLattice &S) { - OS << "noop"; - return OS; -} - -class NoopAnalysis : public DataflowAnalysis { -public: - NoopAnalysis(ASTContext &Context) - : DataflowAnalysis(Context) {} - - static NoopLattice initialElement() { return {}; } - - NoopLattice transfer(const Stmt *S, const NoopLattice &E, Environment &Env) { - return {}; - } -}; - template const FunctionDecl *findTargetFunc(ASTContext &Context, T FunctionMatcher) { auto TargetMatcher = diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp new file mode 100644 index 000000000000..1f8d6392355e --- /dev/null +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -0,0 +1,540 @@ +//===- unittests/Analysis/FlowSensitive/TransferTest.cpp ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "NoopAnalysis.h" +#include "TestingSupport.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/StorageLocation.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include +#include +#include + +namespace { + +using namespace clang; +using namespace dataflow; +using ::testing::_; +using ::testing::ElementsAre; +using ::testing::IsNull; +using ::testing::NotNull; +using ::testing::Pair; + +class TransferTest : public ::testing::Test { +protected: + template + void runDataflow(llvm::StringRef Code, Matcher Match) { + test::checkDataflow( + Code, "target", + [](ASTContext &C, Environment &) { return NoopAnalysis(C); }, + [&Match](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { Match(Results, ASTCtx); }, + {"-fsyntax-only", "-std=c++17"}); + } +}; + +/// Returns the `ValueDecl` for the given identifier. +/// +/// Requirements: +/// +/// `Name` must be unique in `ASTCtx`. +static const ValueDecl *findValueDecl(ASTContext &ASTCtx, + llvm::StringRef Name) { + auto TargetNodes = ast_matchers::match( + ast_matchers::valueDecl(ast_matchers::hasName(Name)).bind("v"), ASTCtx); + assert(TargetNodes.size() == 1 && "Name must be unique"); + auto *const Result = ast_matchers::selectFirst("v", TargetNodes); + assert(Result != nullptr); + return Result; +} + +TEST_F(TransferTest, IntVarDecl) { + std::string Code = R"( + void target() { + int foo; + // [[p]] + } + )"; + runDataflow( + Code, [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p", _))); + const Environment &Env = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "foo"); + ASSERT_THAT(FooDecl, NotNull()); + + const StorageLocation *FooLoc = Env.getStorageLocation(*FooDecl); + ASSERT_TRUE(isa_and_nonnull(FooLoc)); + + const Value *FooVal = Env.getValue(*FooLoc); + ASSERT_TRUE(isa_and_nonnull(FooVal)); + }); +} + +TEST_F(TransferTest, StructVarDecl) { + std::string Code = R"( + struct Foo { + int Bar; + }; + + void target() { + Foo foo; + // [[p]] + } + )"; + runDataflow( + Code, [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p", _))); + const Environment &Env = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "foo"); + ASSERT_THAT(FooDecl, NotNull()); + + ASSERT_TRUE(FooDecl->getType()->isStructureType()); + auto FooFields = FooDecl->getType()->getAsRecordDecl()->fields(); + + FieldDecl *BarDecl = nullptr; + for (FieldDecl *Field : FooFields) { + if (Field->getNameAsString() == "Bar") { + BarDecl = Field; + } else { + FAIL() << "Unexpected field: " << Field->getNameAsString(); + } + } + ASSERT_THAT(BarDecl, NotNull()); + + const auto *FooLoc = + cast(Env.getStorageLocation(*FooDecl)); + const auto *BarLoc = + cast(&FooLoc->getChild(*BarDecl)); + + const auto *FooVal = cast(Env.getValue(*FooLoc)); + const auto *BarVal = cast(&FooVal->getChild(*BarDecl)); + ASSERT_EQ(Env.getValue(*BarLoc), BarVal); + }); +} + +TEST_F(TransferTest, ClassVarDecl) { + std::string Code = R"( + class Foo { + int Bar; + }; + + void target() { + Foo foo; + // [[p]] + } + )"; + runDataflow( + Code, [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p", _))); + const Environment &Env = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "foo"); + ASSERT_THAT(FooDecl, NotNull()); + + ASSERT_TRUE(FooDecl->getType()->isClassType()); + auto FooFields = FooDecl->getType()->getAsRecordDecl()->fields(); + + FieldDecl *BarDecl = nullptr; + for (FieldDecl *Field : FooFields) { + if (Field->getNameAsString() == "Bar") { + BarDecl = Field; + } else { + FAIL() << "Unexpected field: " << Field->getNameAsString(); + } + } + ASSERT_THAT(BarDecl, NotNull()); + + const auto *FooLoc = + cast(Env.getStorageLocation(*FooDecl)); + const auto *BarLoc = + cast(&FooLoc->getChild(*BarDecl)); + + const auto *FooVal = cast(Env.getValue(*FooLoc)); + const auto *BarVal = cast(&FooVal->getChild(*BarDecl)); + ASSERT_EQ(Env.getValue(*BarLoc), BarVal); + }); +} + +TEST_F(TransferTest, ReferenceVarDecl) { + std::string Code = R"( + struct Foo {}; + + Foo& getFoo(); + + void target() { + Foo& foo = getFoo(); + // [[p]] + } + )"; + runDataflow( + Code, [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p", _))); + const Environment &Env = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "foo"); + ASSERT_THAT(FooDecl, NotNull()); + + const StorageLocation *FooLoc = Env.getStorageLocation(*FooDecl); + ASSERT_TRUE(isa_and_nonnull(FooLoc)); + + const ReferenceValue *FooVal = + cast(Env.getValue(*FooLoc)); + const StorageLocation &FooPointeeLoc = FooVal->getPointeeLoc(); + ASSERT_TRUE(isa(&FooPointeeLoc)); + + const Value *FooPointeeVal = Env.getValue(FooPointeeLoc); + ASSERT_TRUE(isa_and_nonnull(FooPointeeVal)); + }); +} + +TEST_F(TransferTest, SelfReferentialReferenceVarDecl) { + std::string Code = R"( + struct Foo; + + struct Baz {}; + + struct Bar { + Foo& FooRef; + Foo* FooPtr; + Baz& BazRef; + Baz* BazPtr; + }; + + struct Foo { + Bar& Bar; + }; + + Foo& getFoo(); + + void target() { + Foo& foo = getFoo(); + // [[p]] + } + )"; + runDataflow(Code, [](llvm::ArrayRef>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p", _))); + const Environment &Env = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "foo"); + ASSERT_THAT(FooDecl, NotNull()); + + ASSERT_TRUE(FooDecl->getType()->isReferenceType()); + ASSERT_TRUE(FooDecl->getType().getNonReferenceType()->isStructureType()); + const auto FooFields = + FooDecl->getType().getNonReferenceType()->getAsRecordDecl()->fields(); + + FieldDecl *BarDecl = nullptr; + for (FieldDecl *Field : FooFields) { + if (Field->getNameAsString() == "Bar") { + BarDecl = Field; + } else { + FAIL() << "Unexpected field: " << Field->getNameAsString(); + } + } + ASSERT_THAT(BarDecl, NotNull()); + + ASSERT_TRUE(BarDecl->getType()->isReferenceType()); + ASSERT_TRUE(BarDecl->getType().getNonReferenceType()->isStructureType()); + const auto BarFields = + BarDecl->getType().getNonReferenceType()->getAsRecordDecl()->fields(); + + FieldDecl *FooRefDecl = nullptr; + FieldDecl *FooPtrDecl = nullptr; + FieldDecl *BazRefDecl = nullptr; + FieldDecl *BazPtrDecl = nullptr; + for (FieldDecl *Field : BarFields) { + if (Field->getNameAsString() == "FooRef") { + FooRefDecl = Field; + } else if (Field->getNameAsString() == "FooPtr") { + FooPtrDecl = Field; + } else if (Field->getNameAsString() == "BazRef") { + BazRefDecl = Field; + } else if (Field->getNameAsString() == "BazPtr") { + BazPtrDecl = Field; + } else { + FAIL() << "Unexpected field: " << Field->getNameAsString(); + } + } + ASSERT_THAT(FooRefDecl, NotNull()); + ASSERT_THAT(FooPtrDecl, NotNull()); + ASSERT_THAT(BazRefDecl, NotNull()); + ASSERT_THAT(BazPtrDecl, NotNull()); + + const auto *FooLoc = + cast(Env.getStorageLocation(*FooDecl)); + const auto *FooVal = cast(Env.getValue(*FooLoc)); + const auto *FooPointeeVal = + cast(Env.getValue(FooVal->getPointeeLoc())); + + const auto *BarVal = + cast(&FooPointeeVal->getChild(*BarDecl)); + const auto *BarPointeeVal = + cast(Env.getValue(BarVal->getPointeeLoc())); + + const auto *FooRefVal = + cast(&BarPointeeVal->getChild(*FooRefDecl)); + const StorageLocation &FooRefPointeeLoc = FooRefVal->getPointeeLoc(); + ASSERT_THAT(Env.getValue(FooRefPointeeLoc), IsNull()); + + const auto *FooPtrVal = + cast(&BarPointeeVal->getChild(*FooPtrDecl)); + const StorageLocation &FooPtrPointeeLoc = FooPtrVal->getPointeeLoc(); + ASSERT_THAT(Env.getValue(FooPtrPointeeLoc), IsNull()); + + const auto *BazRefVal = + cast(&BarPointeeVal->getChild(*BazRefDecl)); + const StorageLocation &BazRefPointeeLoc = BazRefVal->getPointeeLoc(); + ASSERT_THAT(Env.getValue(BazRefPointeeLoc), NotNull()); + + const auto *BazPtrVal = + cast(&BarPointeeVal->getChild(*BazPtrDecl)); + const StorageLocation &BazPtrPointeeLoc = BazPtrVal->getPointeeLoc(); + ASSERT_THAT(Env.getValue(BazPtrPointeeLoc), NotNull()); + }); +} + +TEST_F(TransferTest, PointerVarDecl) { + std::string Code = R"( + struct Foo {}; + + Foo* getFoo(); + + void target() { + Foo* foo = getFoo(); + // [[p]] + } + )"; + runDataflow( + Code, [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p", _))); + const Environment &Env = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "foo"); + ASSERT_THAT(FooDecl, NotNull()); + + const StorageLocation *FooLoc = Env.getStorageLocation(*FooDecl); + ASSERT_TRUE(isa_and_nonnull(FooLoc)); + + const PointerValue *FooVal = cast(Env.getValue(*FooLoc)); + const StorageLocation &FooPointeeLoc = FooVal->getPointeeLoc(); + ASSERT_TRUE(isa(&FooPointeeLoc)); + + const Value *FooPointeeVal = Env.getValue(FooPointeeLoc); + ASSERT_TRUE(isa_and_nonnull(FooPointeeVal)); + }); +} + +TEST_F(TransferTest, SelfReferentialPointerVarDecl) { + std::string Code = R"( + struct Foo; + + struct Baz {}; + + struct Bar { + Foo& FooRef; + Foo* FooPtr; + Baz& BazRef; + Baz* BazPtr; + }; + + struct Foo { + Bar* Bar; + }; + + Foo* getFoo(); + + void target() { + Foo* foo = getFoo(); + // [[p]] + } + )"; + runDataflow( + Code, [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p", _))); + const Environment &Env = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "foo"); + ASSERT_THAT(FooDecl, NotNull()); + + ASSERT_TRUE(FooDecl->getType()->isPointerType()); + ASSERT_TRUE(FooDecl->getType() + ->getAs() + ->getPointeeType() + ->isStructureType()); + const auto FooFields = FooDecl->getType() + ->getAs() + ->getPointeeType() + ->getAsRecordDecl() + ->fields(); + + FieldDecl *BarDecl = nullptr; + for (FieldDecl *Field : FooFields) { + if (Field->getNameAsString() == "Bar") { + BarDecl = Field; + } else { + FAIL() << "Unexpected field: " << Field->getNameAsString(); + } + } + ASSERT_THAT(BarDecl, NotNull()); + + ASSERT_TRUE(BarDecl->getType()->isPointerType()); + ASSERT_TRUE(BarDecl->getType() + ->getAs() + ->getPointeeType() + ->isStructureType()); + const auto BarFields = BarDecl->getType() + ->getAs() + ->getPointeeType() + ->getAsRecordDecl() + ->fields(); + + FieldDecl *FooRefDecl = nullptr; + FieldDecl *FooPtrDecl = nullptr; + FieldDecl *BazRefDecl = nullptr; + FieldDecl *BazPtrDecl = nullptr; + for (FieldDecl *Field : BarFields) { + if (Field->getNameAsString() == "FooRef") { + FooRefDecl = Field; + } else if (Field->getNameAsString() == "FooPtr") { + FooPtrDecl = Field; + } else if (Field->getNameAsString() == "BazRef") { + BazRefDecl = Field; + } else if (Field->getNameAsString() == "BazPtr") { + BazPtrDecl = Field; + } else { + FAIL() << "Unexpected field: " << Field->getNameAsString(); + } + } + ASSERT_THAT(FooRefDecl, NotNull()); + ASSERT_THAT(FooPtrDecl, NotNull()); + ASSERT_THAT(BazRefDecl, NotNull()); + ASSERT_THAT(BazPtrDecl, NotNull()); + + const auto *FooLoc = + cast(Env.getStorageLocation(*FooDecl)); + const auto *FooVal = cast(Env.getValue(*FooLoc)); + const auto *FooPointeeVal = + cast(Env.getValue(FooVal->getPointeeLoc())); + + const auto *BarVal = + cast(&FooPointeeVal->getChild(*BarDecl)); + const auto *BarPointeeVal = + cast(Env.getValue(BarVal->getPointeeLoc())); + + const auto *FooRefVal = + cast(&BarPointeeVal->getChild(*FooRefDecl)); + const StorageLocation &FooRefPointeeLoc = FooRefVal->getPointeeLoc(); + ASSERT_THAT(Env.getValue(FooRefPointeeLoc), IsNull()); + + const auto *FooPtrVal = + cast(&BarPointeeVal->getChild(*FooPtrDecl)); + const StorageLocation &FooPtrPointeeLoc = FooPtrVal->getPointeeLoc(); + ASSERT_THAT(Env.getValue(FooPtrPointeeLoc), IsNull()); + + const auto *BazRefVal = + cast(&BarPointeeVal->getChild(*BazRefDecl)); + const StorageLocation &BazRefPointeeLoc = BazRefVal->getPointeeLoc(); + ASSERT_THAT(Env.getValue(BazRefPointeeLoc), NotNull()); + + const auto *BazPtrVal = + cast(&BarPointeeVal->getChild(*BazPtrDecl)); + const StorageLocation &BazPtrPointeeLoc = BazPtrVal->getPointeeLoc(); + ASSERT_THAT(Env.getValue(BazPtrPointeeLoc), NotNull()); + }); +} + +TEST_F(TransferTest, JoinVarDecl) { + std::string Code = R"( + void target(bool b) { + int foo; + // [[p1]] + if (b) { + int bar; + // [[p2]] + } else { + int baz; + // [[p3]] + } + (void)0; + // [[p4]] + } + )"; + runDataflow( + Code, [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p4", _), Pair("p3", _), + Pair("p2", _), Pair("p1", _))); + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "foo"); + ASSERT_THAT(FooDecl, NotNull()); + + const ValueDecl *BarDecl = findValueDecl(ASTCtx, "bar"); + ASSERT_THAT(BarDecl, NotNull()); + + const ValueDecl *BazDecl = findValueDecl(ASTCtx, "baz"); + ASSERT_THAT(BazDecl, NotNull()); + + const Environment &Env1 = Results[3].second.Env; + const StorageLocation *FooLoc = Env1.getStorageLocation(*FooDecl); + ASSERT_THAT(FooLoc, NotNull()); + ASSERT_THAT(Env1.getStorageLocation(*BarDecl), IsNull()); + ASSERT_THAT(Env1.getStorageLocation(*BazDecl), IsNull()); + + const Environment &Env2 = Results[2].second.Env; + ASSERT_EQ(Env2.getStorageLocation(*FooDecl), FooLoc); + ASSERT_THAT(Env2.getStorageLocation(*BarDecl), NotNull()); + ASSERT_THAT(Env2.getStorageLocation(*BazDecl), IsNull()); + + const Environment &Env3 = Results[1].second.Env; + ASSERT_EQ(Env3.getStorageLocation(*FooDecl), FooLoc); + ASSERT_THAT(Env3.getStorageLocation(*BarDecl), IsNull()); + ASSERT_THAT(Env3.getStorageLocation(*BazDecl), NotNull()); + + const Environment &Env4 = Results[0].second.Env; + ASSERT_EQ(Env4.getStorageLocation(*FooDecl), FooLoc); + ASSERT_THAT(Env4.getStorageLocation(*BarDecl), IsNull()); + ASSERT_THAT(Env4.getStorageLocation(*BazDecl), IsNull()); + }); +} + +} // namespace diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp index bbd840ca6743..77e48972a484 100644 --- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp @@ -6,12 +6,14 @@ // //===----------------------------------------------------------------------===// +#include "NoopAnalysis.h" #include "TestingSupport.h" #include "clang/AST/Decl.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Analysis/CFG.h" #include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" #include "clang/Tooling/Tooling.h" @@ -28,6 +30,8 @@ #include #include +namespace { + using namespace clang; using namespace dataflow; using ::testing::IsEmpty; @@ -50,7 +54,8 @@ class AnalysisCallback : public ast_matchers::MatchFinder::MatchCallback { ControlFlowContext::build(nullptr, Body, Result.Context)); AnalysisT Analysis(*Result.Context); - Environment Env; + DataflowAnalysisContext DACtx; + Environment Env(DACtx); BlockStates = runDataflowAnalysis(CFCtx, Analysis, Env); } @@ -75,27 +80,6 @@ runAnalysis(llvm::StringRef Code) { return Callback.BlockStates; } -class NoopLattice { -public: - bool operator==(const NoopLattice &) const { return true; } - - LatticeJoinEffect join(const NoopLattice &) { - return LatticeJoinEffect::Unchanged; - } -}; - -class NoopAnalysis : public DataflowAnalysis { -public: - NoopAnalysis(ASTContext &Context) - : DataflowAnalysis(Context) {} - - static NoopLattice initialElement() { return {}; } - - NoopLattice transfer(const Stmt *S, const NoopLattice &E, Environment &Env) { - return {}; - } -}; - TEST(DataflowAnalysisTest, NoopAnalysis) { auto BlockStates = runAnalysis(R"( void target() {} @@ -314,3 +298,5 @@ TEST_F(NoreturnDestructorTest, ConditionalOperatorNestedBranchReturns) { UnorderedElementsAre("baz", "foo")))))); // FIXME: Called functions at point `p` should contain only "foo". } + +} // namespace From fb7bea0a5ab96b8ba3f73653fefea881bfeae350 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 4 Jan 2022 09:23:00 +0000 Subject: [PATCH 489/992] [gn build] Port af7bc39ba17d --- .../gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn | 2 ++ .../secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn | 1 + 2 files changed, 3 insertions(+) diff --git a/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn index 26b620ea3f39..82fff377a7c1 100644 --- a/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn @@ -7,6 +7,8 @@ static_library("FlowSensitive") { ] sources = [ "ControlFlowContext.cpp", + "DataflowEnvironment.cpp", + "Transfer.cpp", "TypeErasedDataflowAnalysis.cpp", ] } diff --git a/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn index 18cd5e4bc000..f89ec8081312 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn @@ -19,6 +19,7 @@ unittest("ClangAnalysisFlowSensitiveTests") { "SingleVarConstantPropagationTest.cpp", "TestingSupport.cpp", "TestingSupportTest.cpp", + "TransferTest.cpp", "TypeErasedDataflowAnalysisTest.cpp", ] } From c9dbf0f2a1e937283b0435b76ce41bcb343fffb8 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Sun, 2 Jan 2022 17:25:40 +0100 Subject: [PATCH 490/992] [libc++] Fix __wrap_iter copy-assignment in constexpr contexts Fixes https://github.com/llvm/llvm-project/issues/52902 In debug mode during constant evaluation the iterator was never assigend. There seem to be no other instances of this bug. Reviewed By: Quuxplusone, Mordante, #libc, ldionne Spies: ldionne, libcxx-commits Differential Revision: https://reviews.llvm.org/D116346 --- libcxx/include/__iterator/wrap_iter.h | 5 ++-- .../string.iterators/iterators.pass.cpp | 23 ++++++++++++++++--- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/libcxx/include/__iterator/wrap_iter.h b/libcxx/include/__iterator/wrap_iter.h index cfcc9857b3fc..5a386eec4b22 100644 --- a/libcxx/include/__iterator/wrap_iter.h +++ b/libcxx/include/__iterator/wrap_iter.h @@ -69,9 +69,10 @@ class __wrap_iter _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter& operator=(const __wrap_iter& __x) { - if (this != _VSTD::addressof(__x) && !__libcpp_is_constant_evaluated()) + if (this != _VSTD::addressof(__x)) { - __get_db()->__iterator_copy(this, _VSTD::addressof(__x)); + if (!__libcpp_is_constant_evaluated()) + __get_db()->__iterator_copy(this, _VSTD::addressof(__x)); __i = __x.__i; } return *this; diff --git a/libcxx/test/std/strings/basic.string/string.iterators/iterators.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/iterators.pass.cpp index c7c1eb25eeb6..187452b6020e 100644 --- a/libcxx/test/std/strings/basic.string/string.iterators/iterators.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.iterators/iterators.pass.cpp @@ -23,12 +23,13 @@ #include "test_macros.h" template -void test() +TEST_CONSTEXPR_CXX20 void test() { { // N3644 testing typename C::iterator ii1{}, ii2{}; typename C::iterator ii4 = ii1; typename C::const_iterator cii{}; + assert ( ii1 == ii2 ); assert ( ii1 == ii4 ); @@ -49,10 +50,17 @@ void test() assert (cii - ii1 == 0); assert (ii1 - cii == 0); } + { + C a; + typename C::iterator i1 = a.begin(); + typename C::iterator i2; + assert ( i1 != i2 ); + i2 = i1; + assert ( i1 == i2 ); + } } -int main(int, char**) -{ +TEST_CONSTEXPR_CXX20 bool test() { test(); #ifndef TEST_HAS_NO_WIDE_CHARACTERS test(); @@ -65,5 +73,14 @@ int main(int, char**) test(); test(); + return true; +} + +int main(int, char**) +{ + test(); +#if defined(__cpp_lib_constexpr_string) && __cpp_lib_constexpr_string >= 201907L + static_assert(test()); +#endif return 0; } From 961f51fdf04fd14f5dc5e7a6d53a5460249d947c Mon Sep 17 00:00:00 2001 From: Rosie Sumpter Date: Mon, 8 Nov 2021 13:15:45 +0000 Subject: [PATCH 491/992] [LoopVectorize][CostModel] Choose smaller VFs for in-loop reductions without loads/stores For loops that contain in-loop reductions but no loads or stores, large VFs are chosen because LoopVectorizationCostModel::getSmallestAndWidestTypes has no element types to check through and so returns the default widths (-1U for the smallest and 8 for the widest). This results in the widest VF being chosen for the following example, float s = 0; for (int i = 0; i < N; ++i) s += (float) i*i; which, for more computationally intensive loops, leads to large loop sizes when the operations end up being scalarized. In this patch, for the case where ElementTypesInLoop is empty, the widest type is determined by finding the smallest type used by recurrences in the loop instead of falling back to a default value of 8 bits. This results in the cost model choosing a more sensible VF for loops like the one above. Differential Revision: https://reviews.llvm.org/D113973 --- llvm/include/llvm/Analysis/IVDescriptors.h | 13 +++- llvm/lib/Analysis/IVDescriptors.cpp | 57 ++++++++++----- .../Transforms/Vectorize/LoopVectorize.cpp | 28 +++++-- .../AArch64/smallest-and-widest-types.ll | 73 ++++++++++++++++++- .../Transforms/LoopVectorize/X86/funclet.ll | 2 +- 5 files changed, 144 insertions(+), 29 deletions(-) diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index 9858a46d16a2..dec488a6f26d 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -77,10 +77,12 @@ class RecurrenceDescriptor { RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurKind K, FastMathFlags FMF, Instruction *ExactFP, Type *RT, bool Signed, bool Ordered, - SmallPtrSetImpl &CI) + SmallPtrSetImpl &CI, + unsigned MinWidthCastToRecurTy) : StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT), IsSigned(Signed), - IsOrdered(Ordered) { + IsOrdered(Ordered), + MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) { CastInsts.insert(CI.begin(), CI.end()); } @@ -251,6 +253,11 @@ class RecurrenceDescriptor { /// recurrence. const SmallPtrSet &getCastInsts() const { return CastInsts; } + /// Returns the minimum width used by the recurrence in bits. + unsigned getMinWidthCastToRecurrenceTypeInBits() const { + return MinWidthCastToRecurrenceType; + } + /// Returns true if all source operands of the recurrence are SExtInsts. bool isSigned() const { return IsSigned; } @@ -291,6 +298,8 @@ class RecurrenceDescriptor { bool IsOrdered = false; // Instructions used for type-promoting the recurrence. SmallPtrSet CastInsts; + // The minimum width used by the recurrence. + unsigned MinWidthCastToRecurrenceType; }; /// A struct for saving information about induction variables. diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index f5fa6748d053..9551eb48e231 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -161,19 +161,22 @@ static std::pair computeRecurrenceType(Instruction *Exit, /// Collect cast instructions that can be ignored in the vectorizer's cost /// model, given a reduction exit value and the minimal type in which the -/// reduction can be represented. -static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, - Type *RecurrenceType, - SmallPtrSetImpl &Casts) { +// reduction can be represented. Also search casts to the recurrence type +// to find the minimum width used by the recurrence. +static void collectCastInstrs(Loop *TheLoop, Instruction *Exit, + Type *RecurrenceType, + SmallPtrSetImpl &Casts, + unsigned &MinWidthCastToRecurTy) { SmallVector Worklist; SmallPtrSet Visited; Worklist.push_back(Exit); + MinWidthCastToRecurTy = -1U; while (!Worklist.empty()) { Instruction *Val = Worklist.pop_back_val(); Visited.insert(Val); - if (auto *Cast = dyn_cast(Val)) + if (auto *Cast = dyn_cast(Val)) { if (Cast->getSrcTy() == RecurrenceType) { // If the source type of a cast instruction is equal to the recurrence // type, it will be eliminated, and should be ignored in the vectorizer @@ -181,7 +184,16 @@ static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, Casts.insert(Cast); continue; } - + if (Cast->getDestTy() == RecurrenceType) { + // The minimum width used by the recurrence is found by checking for + // casts on its operands. The minimum width is used by the vectorizer + // when finding the widest type for in-loop reductions without any + // loads/stores. + MinWidthCastToRecurTy = std::min( + MinWidthCastToRecurTy, Cast->getSrcTy()->getScalarSizeInBits()); + continue; + } + } // Add all operands to the work list if they are loop-varying values that // we haven't yet visited. for (Value *O : cast(Val)->operands()) @@ -265,6 +277,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, // Data used for determining if the recurrence has been type-promoted. Type *RecurrenceType = Phi->getType(); SmallPtrSet CastInsts; + unsigned MinWidthCastToRecurrenceType; Instruction *Start = Phi; bool IsSigned = false; @@ -500,21 +513,24 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, computeRecurrenceType(ExitInstruction, DB, AC, DT); if (ComputedType != RecurrenceType) return false; - - // The recurrence expression will be represented in a narrower type. If - // there are any cast instructions that will be unnecessary, collect them - // in CastInsts. Note that the 'and' instruction was already included in - // this list. - // - // TODO: A better way to represent this may be to tag in some way all the - // instructions that are a part of the reduction. The vectorizer cost - // model could then apply the recurrence type to these instructions, - // without needing a white list of instructions to ignore. - // This may also be useful for the inloop reductions, if it can be - // kept simple enough. - collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts); } + // Collect cast instructions and the minimum width used by the recurrence. + // If the starting value is not the same as the phi node and the computed + // recurrence type is equal to the recurrence type, the recurrence expression + // will be represented in a narrower or wider type. If there are any cast + // instructions that will be unnecessary, collect them in CastsFromRecurTy. + // Note that the 'and' instruction was already included in this list. + // + // TODO: A better way to represent this may be to tag in some way all the + // instructions that are a part of the reduction. The vectorizer cost + // model could then apply the recurrence type to these instructions, + // without needing a white list of instructions to ignore. + // This may also be useful for the inloop reductions, if it can be + // kept simple enough. + collectCastInstrs(TheLoop, ExitInstruction, RecurrenceType, CastInsts, + MinWidthCastToRecurrenceType); + // We found a reduction var if we have reached the original phi node and we // only have a single instruction with out-of-loop users. @@ -524,7 +540,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, // Save the description of this reduction variable. RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF, ReduxDesc.getExactFPMathInst(), RecurrenceType, - IsSigned, IsOrdered, CastInsts); + IsSigned, IsOrdered, CastInsts, + MinWidthCastToRecurrenceType); RedDes = RD; return true; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index a277ee37d12c..a62bd4884fd6 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5960,11 +5960,29 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() { unsigned MinWidth = -1U; unsigned MaxWidth = 8; const DataLayout &DL = TheFunction->getParent()->getDataLayout(); - for (Type *T : ElementTypesInLoop) { - MinWidth = std::min( - MinWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedSize()); - MaxWidth = std::max( - MaxWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedSize()); + // For in-loop reductions, no element types are added to ElementTypesInLoop + // if there are no loads/stores in the loop. In this case, check through the + // reduction variables to determine the maximum width. + if (ElementTypesInLoop.empty() && !Legal->getReductionVars().empty()) { + // Reset MaxWidth so that we can find the smallest type used by recurrences + // in the loop. + MaxWidth = -1U; + for (auto &PhiDescriptorPair : Legal->getReductionVars()) { + const RecurrenceDescriptor &RdxDesc = PhiDescriptorPair.second; + // When finding the min width used by the recurrence we need to account + // for casts on the input operands of the recurrence. + MaxWidth = std::min( + MaxWidth, std::min( + RdxDesc.getMinWidthCastToRecurrenceTypeInBits(), + RdxDesc.getRecurrenceType()->getScalarSizeInBits())); + } + } else { + for (Type *T : ElementTypesInLoop) { + MinWidth = std::min( + MinWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedSize()); + MaxWidth = std::max( + MaxWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedSize()); + } } return {MinWidth, MaxWidth}; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll index 1ae7dadeffd7..fec056ad7c12 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt < %s -loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-target-instruction-cost=1 -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnu" @@ -31,3 +31,74 @@ for.body: for.end: ret void } + +; For in-loop reductions with no loads or stores in the loop the widest type is +; determined by looking through the recurrences, which allows a sensible VF to be +; chosen. The following 3 cases check different combinations of widths. + +; CHECK-LABEL: Checking a loop in "no_loads_stores_32" +; CHECK: The Smallest and Widest types: 4294967295 / 32 bits +; CHECK: Selecting VF: 4 + +define double @no_loads_stores_32(i32 %n) { +entry: + br label %for.body + +for.body: + %s.09 = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ] + %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %conv = sitofp i32 %i.08 to float + %conv1 = fpext float %conv to double + %add = fadd double %s.09, %conv1 + %inc = add nuw i32 %i.08, 1 + %exitcond.not = icmp eq i32 %inc, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + %.lcssa = phi double [ %add, %for.body ] + ret double %.lcssa +} + +; CHECK-LABEL: Checking a loop in "no_loads_stores_16" +; CHECK: The Smallest and Widest types: 4294967295 / 16 bits +; CHECK: Selecting VF: 8 + +define double @no_loads_stores_16() { +entry: + br label %for.body + +for.body: + %s.09 = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ] + %i.08 = phi i16 [ 0, %entry ], [ %inc, %for.body ] + %conv = sitofp i16 %i.08 to double + %add = fadd double %s.09, %conv + %inc = add nuw nsw i16 %i.08, 1 + %exitcond.not = icmp eq i16 %inc, 12345 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + %.lcssa = phi double [ %add, %for.body ] + ret double %.lcssa +} + +; CHECK-LABEL: Checking a loop in "no_loads_stores_8" +; CHECK: The Smallest and Widest types: 4294967295 / 8 bits +; CHECK: Selecting VF: 16 + +define float @no_loads_stores_8() { +entry: + br label %for.body + +for.body: + %s.09 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] + %i.08 = phi i8 [ 0, %entry ], [ %inc, %for.body ] + %conv = sitofp i8 %i.08 to float + %add = fadd float %s.09, %conv + %inc = add nuw nsw i8 %i.08, 1 + %exitcond.not = icmp eq i8 %inc, 12345 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + %.lcssa = phi float [ %add, %for.body ] + ret float %.lcssa +} diff --git a/llvm/test/Transforms/LoopVectorize/X86/funclet.ll b/llvm/test/Transforms/LoopVectorize/X86/funclet.ll index 88f15e7e1485..87df85a69195 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/funclet.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/funclet.ll @@ -33,7 +33,7 @@ unreachable: ; preds = %entry ; CHECK-LABEL: define void @test1( ; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [i8* null, i32 64, i8* null] -; CHECK: call <16 x double> @llvm.floor.v16f64(<16 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ] +; CHECK: call <8 x double> @llvm.floor.v8f64(<8 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ] declare x86_stdcallcc void @_CxxThrowException(i8*, i8*) From f4ef79306cee2b5866aff681174f16b816810c4a Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Tue, 4 Jan 2022 03:20:29 +0000 Subject: [PATCH 492/992] [AVR] Optimize int8 arithmetic right shift 6 bits Reviewed By: aykevl Differential Revision: https://reviews.llvm.org/D115593 --- llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | 42 ++++++++++++++++++++ llvm/lib/Target/AVR/AVRISelLowering.cpp | 5 +++ llvm/test/CodeGen/AVR/shift.ll | 12 +++++- 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp index cb85d73772c5..eaff501ebe3b 100644 --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -92,6 +92,7 @@ class AVRExpandPseudo : public MachineFunctionPass { /// Specific shift implementation. bool expandLSLB7Rd(Block &MBB, BlockIt MBBI); bool expandLSRB7Rd(Block &MBB, BlockIt MBBI); + bool expandASRB6Rd(Block &MBB, BlockIt MBBI); bool expandASRB7Rd(Block &MBB, BlockIt MBBI); bool expandLSLW4Rd(Block &MBB, BlockIt MBBI); bool expandLSRW4Rd(Block &MBB, BlockIt MBBI); @@ -1921,6 +1922,45 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { } } +bool AVRExpandPseudo::expandASRB6Rd(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + bool ImpIsDead = MI.getOperand(3).isDead(); + + // bst r24, 6 + // lsl r24 + // sbc r24, r24 + // bld r24, 0 + + buildMI(MBB, MBBI, AVR::BST) + .addReg(DstReg) + .addImm(6) + ->getOperand(2) + .setIsUndef(true); + + buildMI(MBB, MBBI, AVR::ADDRdRr) // LSL Rd <==> ADD Rd, Rd + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + buildMI(MBB, MBBI, AVR::SBCRdRr) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg, getKillRegState(DstIsKill)); + + buildMI(MBB, MBBI, AVR::BLD) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg, getKillRegState(DstIsKill)) + .addImm(0) + ->getOperand(3) + .setIsKill(); + + MI.eraseFromParent(); + return true; +} + bool AVRExpandPseudo::expandASRB7Rd(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; Register DstReg = MI.getOperand(0).getReg(); @@ -1957,6 +1997,8 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; unsigned Imm = MI.getOperand(2).getImm(); switch (Imm) { + case 6: + return expandASRB6Rd(MBB, MBBI); case 7: return expandASRB7Rd(MBB, MBBI); default: diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index 39fba74a1ec7..f3e74e843695 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -359,6 +359,11 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { Victim = DAG.getNode(AVRISD::LSRBN, dl, VT, Victim, DAG.getConstant(7, dl, VT)); ShiftAmount = 0; + } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 6) { + // Optimize ASR when ShiftAmount == 6. + Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim, + DAG.getConstant(6, dl, VT)); + ShiftAmount = 0; } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 7) { // Optimize ASR when ShiftAmount == 7. Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim, diff --git a/llvm/test/CodeGen/AVR/shift.ll b/llvm/test/CodeGen/AVR/shift.ll index 24bc369cf614..90e1b25bd762 100644 --- a/llvm/test/CodeGen/AVR/shift.ll +++ b/llvm/test/CodeGen/AVR/shift.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=avr | FileCheck %s +; RUN: llc < %s -march=avr -verify-machineinstrs | FileCheck %s ; Optimize for speed. ; CHECK-LABEL: shift_i8_i8_speed @@ -171,6 +171,16 @@ define i8 @lsr_i8_7(i8 %a) { ret i8 %result } +define i8 @asr_i8_6(i8 %a) { +; CHECK-LABEL: asr_i8_6 +; CHECK: bst r24, 6 +; CHECK-NEXT: lsl r24 +; CHECK-NEXT: sbc r24, r24 +; CHECK-NEXT: bld r24, 0 + %result = ashr i8 %a, 6 + ret i8 %result +} + define i8 @asr_i8_7(i8 %a) { ; CHECK-LABEL: asr_i8_7 ; CHECK: lsl r24 From f2b3e25f860ed029ddb9d96744a8158ba2a95b23 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Thu, 23 Dec 2021 01:26:49 +0100 Subject: [PATCH 493/992] [clangd] Add CompileFlags.Compiler option to override argv0 This is separate from --query-driver but can combine with it. Fixes https://github.com/clangd/clangd/issues/642 Differential Revision: https://reviews.llvm.org/D116196 --- clang-tools-extra/clangd/ConfigCompile.cpp | 10 ++++++++++ clang-tools-extra/clangd/ConfigFragment.h | 10 ++++++++++ clang-tools-extra/clangd/ConfigYAML.cpp | 4 ++++ .../clangd/unittests/ConfigCompileTests.cpp | 5 +++-- 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp index 4f7f90847433..18afdeb3cb5c 100644 --- a/clang-tools-extra/clangd/ConfigCompile.cpp +++ b/clang-tools-extra/clangd/ConfigCompile.cpp @@ -253,6 +253,16 @@ struct FragmentCompiler { } void compile(Fragment::CompileFlagsBlock &&F) { + if (F.Compiler) + Out.Apply.push_back( + [Compiler(std::move(**F.Compiler))](const Params &, Config &C) { + C.CompileFlags.Edits.push_back( + [Compiler](std::vector &Args) { + if (!Args.empty()) + Args.front() = Compiler; + }); + }); + if (!F.Remove.empty()) { auto Remove = std::make_shared(); for (auto &A : F.Remove) diff --git a/clang-tools-extra/clangd/ConfigFragment.h b/clang-tools-extra/clangd/ConfigFragment.h index 63d2d75a2262..31c4636efa0b 100644 --- a/clang-tools-extra/clangd/ConfigFragment.h +++ b/clang-tools-extra/clangd/ConfigFragment.h @@ -134,6 +134,16 @@ struct Fragment { /// /// This section modifies how the compile command is constructed. struct CompileFlagsBlock { + /// Override the compiler executable name to simulate. + /// + /// The name can affect how flags are parsed (clang++ vs clang). + /// If the executable name is in the --query-driver allowlist, then it will + /// be invoked to extract include paths. + /// + /// (That this simply replaces argv[0], and may mangle commands that use + /// more complicated drivers like ccache). + llvm::Optional> Compiler; + /// List of flags to append to the compile command. std::vector> Add; /// List of flags to remove from the compile command. diff --git a/clang-tools-extra/clangd/ConfigYAML.cpp b/clang-tools-extra/clangd/ConfigYAML.cpp index 6be11f199b48..0487c3281576 100644 --- a/clang-tools-extra/clangd/ConfigYAML.cpp +++ b/clang-tools-extra/clangd/ConfigYAML.cpp @@ -90,6 +90,10 @@ class Parser { void parse(Fragment::CompileFlagsBlock &F, Node &N) { DictParser Dict("CompileFlags", this); + Dict.handle("Compiler", [&](Node &N) { + if (auto Value = scalarValue(N, "Compiler")) + F.Compiler = std::move(*Value); + }); Dict.handle("Add", [&](Node &N) { if (auto Values = scalarValues(N)) F.Add = std::move(*Values); diff --git a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp index 87d8b9d976f0..661784256af8 100644 --- a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp +++ b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp @@ -121,14 +121,15 @@ TEST_F(ConfigCompileTests, Condition) { } TEST_F(ConfigCompileTests, CompileCommands) { + Frag.CompileFlags.Compiler.emplace("tpc.exe"); Frag.CompileFlags.Add.emplace_back("-foo"); Frag.CompileFlags.Remove.emplace_back("--include-directory="); std::vector Argv = {"clang", "-I", "bar/", "--", "a.cc"}; EXPECT_TRUE(compileAndApply()); - EXPECT_THAT(Conf.CompileFlags.Edits, SizeIs(2)); + EXPECT_THAT(Conf.CompileFlags.Edits, SizeIs(3)); for (auto &Edit : Conf.CompileFlags.Edits) Edit(Argv); - EXPECT_THAT(Argv, ElementsAre("clang", "-foo", "--", "a.cc")); + EXPECT_THAT(Argv, ElementsAre("tpc.exe", "-foo", "--", "a.cc")); } TEST_F(ConfigCompileTests, CompilationDatabase) { From 20f8f46c60b39fb2c6b4371a03e580d0711e8d82 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 4 Jan 2022 11:50:17 +0100 Subject: [PATCH 494/992] [clangd] Fix selection on multi-dimensional array. This involves separating out the concepts of "which tokens should we descend into this node for" vs "which tokens should this node claim". Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D116218 --- clang-tools-extra/clangd/Selection.cpp | 25 ++++++++++++++++++- .../clangd/unittests/SelectionTests.cpp | 8 ++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/Selection.cpp b/clang-tools-extra/clangd/Selection.cpp index 0b10c7a3a6f9..2024228e2b58 100644 --- a/clang-tools-extra/clangd/Selection.cpp +++ b/clang-tools-extra/clangd/Selection.cpp @@ -58,6 +58,7 @@ void recordMetrics(const SelectionTree &S, const LangOptions &Lang) { SelectionUsedRecovery.record(0, LanguageLabel); // unused. } +// Return the range covering a node and all its children. SourceRange getSourceRange(const DynTypedNode &N) { // MemberExprs to implicitly access anonymous fields should not claim any // tokens for themselves. Given: @@ -702,7 +703,7 @@ class SelectionVisitor : public RecursiveASTVisitor { void pop() { Node &N = *Stack.top(); dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1)); - claimRange(getSourceRange(N.ASTNode), N.Selected); + claimTokensFor(N.ASTNode, N.Selected); if (N.Selected == NoTokens) N.Selected = SelectionTree::Unselected; if (N.Selected || !N.Children.empty()) { @@ -744,6 +745,28 @@ class SelectionVisitor : public RecursiveASTVisitor { return SourceRange(); } + // Claim tokens for N, after processing its children. + // By default this claims all unclaimed tokens in getSourceRange(). + // We override this if we want to claim fewer tokens (e.g. there are gaps). + void claimTokensFor(const DynTypedNode &N, SelectionTree::Selection &Result) { + if (const auto *TL = N.get()) { + // e.g. EltType Foo[OuterSize][InnerSize]; + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ArrayTypeLoc (Outer) + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |-ArrayTypeLoc (Inner) + // ~~~~~~~ | |-RecordType + // ~~~~~~~~~ | `-Expr (InnerSize) + // ~~~~~~~~~ `-Expr (OuterSize) + // Inner ATL must not claim its whole SourceRange, or it clobbers Outer. + if (TL->getAs()) { + claimRange(TL->getLocalSourceRange(), Result); + return; + } + // FIXME: maybe LocalSourceRange is a better default for TypeLocs. + // It doesn't seem to be usable for FunctionTypeLocs. + } + claimRange(getSourceRange(N), Result); + } + // Perform hit-testing of a complete Node against the selection. // This runs for every node in the AST, and must be fast in common cases. // This is usually called from pop(), so we can take children into account. diff --git a/clang-tools-extra/clangd/unittests/SelectionTests.cpp b/clang-tools-extra/clangd/unittests/SelectionTests.cpp index 6c6782a097db..971487c9cd27 100644 --- a/clang-tools-extra/clangd/unittests/SelectionTests.cpp +++ b/clang-tools-extra/clangd/unittests/SelectionTests.cpp @@ -318,6 +318,14 @@ TEST(SelectionTest, CommonAncestor) { {"[[st^ruct {int x;}]] y;", "CXXRecordDecl"}, {"[[struct {int x;} ^y]];", "VarDecl"}, {"struct {[[int ^x]];} y;", "FieldDecl"}, + + // Tricky case: nested ArrayTypeLocs have the same token range. + {"const int x = 1, y = 2; int array[^[[x]]][10][y];", "DeclRefExpr"}, + {"const int x = 1, y = 2; int array[x][10][^[[y]]];", "DeclRefExpr"}, + {"const int x = 1, y = 2; int array[x][^[[10]]][y];", "IntegerLiteral"}, + {"const int x = 1, y = 2; [[i^nt]] array[x][10][y];", "BuiltinTypeLoc"}, + {"void func(int x) { int v_array[^[[x]]][10]; }", "DeclRefExpr"}, + // FIXME: the AST has no location info for qualifiers. {"const [[a^uto]] x = 42;", "AutoTypeLoc"}, {"[[co^nst auto x = 42]];", "VarDecl"}, From ca044f5369c7c156c1c7d35601b09fe610cc73d3 Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Tue, 4 Jan 2022 12:09:10 +0100 Subject: [PATCH 495/992] Revert "[clang-format][NFC] Code Tidies in UnwrappedLineFormatter" This reverts commit f014ab933f35805159021d2d0c856a3c9af21a85. These tests are failing with asan: clang/unittests:format_tests clang/unittests:format_tests clang-tools-extra/unittests:clang_move_tests clang/unittests:tooling_tests clang-tools-extra/test/clang-move:move-template-class.cpp.test clang-tools-extra/test/clang-move:move-multiple-classes.cpp.test clang-tools-extra/test/clang-move:move-used-helper-decls.cpp.test clang-tools-extra/clangd/unittests:clangd_tests clang/test/Format:access-modifiers.cpp.test clang/unittests:rename_tests clang/unittests:rename_tests --- clang/lib/Format/UnwrappedLineFormatter.cpp | 110 ++++++++++---------- 1 file changed, 53 insertions(+), 57 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 303150348ad8..5ba5958fbd53 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -211,12 +211,10 @@ class LineJoiner { const AnnotatedLine *TheLine = *I; if (TheLine->Last->is(TT_LineComment)) return 0; - const auto &NextLine = *I[1]; - const auto &PreviousLine = *I[-1]; - if (NextLine.Type == LT_Invalid || NextLine.First->MustBreakBefore) + if (I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore) return 0; if (TheLine->InPPDirective && - (!NextLine.InPPDirective || NextLine.First->HasUnescapedNewline)) + (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline)) return 0; if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit) @@ -233,15 +231,15 @@ class LineJoiner { if (TheLine->Last->is(TT_FunctionLBrace) && TheLine->First == TheLine->Last && !Style.BraceWrapping.SplitEmptyFunction && - NextLine.First->is(tok::r_brace)) + I[1]->First->is(tok::r_brace)) return tryMergeSimpleBlock(I, E, Limit); // Handle empty record blocks where the brace has already been wrapped if (TheLine->Last->is(tok::l_brace) && TheLine->First == TheLine->Last && I != AnnotatedLines.begin()) { - bool EmptyBlock = NextLine.First->is(tok::r_brace); + bool EmptyBlock = I[1]->First->is(tok::r_brace); - const FormatToken *Tok = PreviousLine.First; + const FormatToken *Tok = I[-1]->First; if (Tok && Tok->is(tok::comment)) Tok = Tok->getNextNonComment(); @@ -269,7 +267,7 @@ class LineJoiner { bool MergeShortFunctions = Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All || (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty && - NextLine.First->is(tok::r_brace)) || + I[1]->First->is(tok::r_brace)) || (Style.AllowShortFunctionsOnASingleLine & FormatStyle::SFS_InlineOnly && TheLine->Level != 0); @@ -314,75 +312,73 @@ class LineJoiner { return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0; } // Try to merge a control statement block with left brace unwrapped - if (TheLine->Last->is(tok::l_brace) && + if (TheLine->Last->is(tok::l_brace) && TheLine->First != TheLine->Last && TheLine->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for)) { return Style.AllowShortBlocksOnASingleLine != FormatStyle::SBS_Never ? tryMergeSimpleBlock(I, E, Limit) : 0; } // Try to merge a control statement block with left brace wrapped - if (NextLine.First->is(tok::l_brace)) { - if ((TheLine->First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while, - tok::kw_for, tok::kw_switch, tok::kw_try, - tok::kw_do, TT_ForEachMacro) || - (TheLine->First->is(tok::r_brace) && TheLine->First->Next && - TheLine->First->Next->isOneOf(tok::kw_else, tok::kw_catch))) && - Style.BraceWrapping.AfterControlStatement == - FormatStyle::BWACS_MultiLine) { - // If possible, merge the next line's wrapped left brace with the - // current line. Otherwise, leave it on the next line, as this is a - // multi-line control statement. - return (Style.ColumnLimit == 0 || - TheLine->Last->TotalLength <= Style.ColumnLimit) - ? 1 - : 0; - } - if (TheLine->First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while, - tok::kw_for)) { - return (Style.BraceWrapping.AfterControlStatement == - FormatStyle::BWACS_Always) - ? tryMergeSimpleBlock(I, E, Limit) - : 0; - } - if (TheLine->First->isOneOf(tok::kw_else, tok::kw_catch) && - Style.BraceWrapping.AfterControlStatement == - FormatStyle::BWACS_MultiLine) { - // This case if different from the upper BWACS_MultiLine processing - // in that a preceding r_brace is not on the same line as else/catch - // most likely because of BeforeElse/BeforeCatch set to true. - // If the line length doesn't fit ColumnLimit, leave l_brace on the - // next line to respect the BWACS_MultiLine. - return (Style.ColumnLimit == 0 || - TheLine->Last->TotalLength <= Style.ColumnLimit) - ? 1 - : 0; - } + if (I[1]->First->is(tok::l_brace) && + (TheLine->First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while, + tok::kw_for, tok::kw_switch, tok::kw_try, + tok::kw_do, TT_ForEachMacro) || + (TheLine->First->is(tok::r_brace) && TheLine->First->Next && + TheLine->First->Next->isOneOf(tok::kw_else, tok::kw_catch))) && + Style.BraceWrapping.AfterControlStatement == + FormatStyle::BWACS_MultiLine) { + // If possible, merge the next line's wrapped left brace with the current + // line. Otherwise, leave it on the next line, as this is a multi-line + // control statement. + return (Style.ColumnLimit == 0 || + TheLine->Last->TotalLength <= Style.ColumnLimit) + ? 1 + : 0; + } else if (I[1]->First->is(tok::l_brace) && + TheLine->First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while, + tok::kw_for)) { + return (Style.BraceWrapping.AfterControlStatement == + FormatStyle::BWACS_Always) + ? tryMergeSimpleBlock(I, E, Limit) + : 0; + } else if (I[1]->First->is(tok::l_brace) && + TheLine->First->isOneOf(tok::kw_else, tok::kw_catch) && + Style.BraceWrapping.AfterControlStatement == + FormatStyle::BWACS_MultiLine) { + // This case if different from the upper BWACS_MultiLine processing + // in that a preceding r_brace is not on the same line as else/catch + // most likely because of BeforeElse/BeforeCatch set to true. + // If the line length doesn't fit ColumnLimit, leave l_brace on the + // next line to respect the BWACS_MultiLine. + return (Style.ColumnLimit == 0 || + TheLine->Last->TotalLength <= Style.ColumnLimit) + ? 1 + : 0; } // Don't merge block with left brace wrapped after ObjC special blocks if (TheLine->First->is(tok::l_brace) && I != AnnotatedLines.begin() && - PreviousLine.First->is(tok::at) && PreviousLine.First->Next) { - tok::ObjCKeywordKind kwId = - PreviousLine.First->Next->Tok.getObjCKeywordID(); + I[-1]->First->is(tok::at) && I[-1]->First->Next) { + tok::ObjCKeywordKind kwId = I[-1]->First->Next->Tok.getObjCKeywordID(); if (kwId == clang::tok::objc_autoreleasepool || kwId == clang::tok::objc_synchronized) return 0; } // Don't merge block with left brace wrapped after case labels if (TheLine->First->is(tok::l_brace) && I != AnnotatedLines.begin() && - PreviousLine.First->isOneOf(tok::kw_case, tok::kw_default)) + I[-1]->First->isOneOf(tok::kw_case, tok::kw_default)) return 0; // Don't merge an empty template class or struct if SplitEmptyRecords // is defined. if (Style.BraceWrapping.SplitEmptyRecord && TheLine->Last->is(tok::l_brace) && I != AnnotatedLines.begin() && - PreviousLine.Last) { - const FormatToken *Previous = PreviousLine.Last; + I[-1]->Last) { + const FormatToken *Previous = I[-1]->Last; if (Previous) { if (Previous->is(tok::comment)) Previous = Previous->getPreviousNonComment(); if (Previous) { - if (Previous->is(tok::greater) && !PreviousLine.InPPDirective) + if (Previous->is(tok::greater) && !I[-1]->InPPDirective) return 0; if (Previous->is(tok::identifier)) { const FormatToken *PreviousPrevious = @@ -405,21 +401,21 @@ class LineJoiner { } if (Tok->isOneOf(tok::kw_class, tok::kw_struct)) { ShouldMerge = !Style.BraceWrapping.AfterClass || - (NextLine.First->is(tok::r_brace) && + (I[1]->First->is(tok::r_brace) && !Style.BraceWrapping.SplitEmptyRecord); } else if (Tok->is(tok::kw_enum)) { ShouldMerge = Style.AllowShortEnumsOnASingleLine; } else { ShouldMerge = !Style.BraceWrapping.AfterFunction || - (NextLine.First->is(tok::r_brace) && + (I[1]->First->is(tok::r_brace) && !Style.BraceWrapping.SplitEmptyFunction); } return ShouldMerge ? tryMergeSimpleBlock(I, E, Limit) : 0; } // Try to merge a function block with left brace wrapped - if (NextLine.First->is(TT_FunctionLBrace) && + if (I[1]->First->is(TT_FunctionLBrace) && Style.BraceWrapping.AfterFunction) { - if (NextLine.Last->is(TT_LineComment)) + if (I[1]->Last->is(TT_LineComment)) return 0; // Check for Limit <= 2 to account for the " {". @@ -430,7 +426,7 @@ class LineJoiner { unsigned MergedLines = 0; if (MergeShortFunctions || (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty && - NextLine.First == NextLine.Last && I + 2 != E && + I[1]->First == I[1]->Last && I + 2 != E && I[2]->First->is(tok::r_brace))) { MergedLines = tryMergeSimpleBlock(I + 1, E, Limit); // If we managed to merge the block, count the function header, which is From cb9ccd38c55f729a6bd7986bbdcb34755b774240 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 4 Jan 2022 12:07:37 +0100 Subject: [PATCH 496/992] [clangd] Move the selection decltype hack to getSourceRange. Previously, it was in canSafelySkipNode, which is only used to decide whether we should descend into it and its children, and we still used the incomplete Decltypeloc.getSourceRange() to claim tokens, which will cause some tokens were not claimed correctly. Separate a change of https://reviews.llvm.org/D116536 Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D116586 --- clang-tools-extra/clangd/Selection.cpp | 26 +++++++++++-------- .../clangd/unittests/SelectionTests.cpp | 1 + 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/clang-tools-extra/clangd/Selection.cpp b/clang-tools-extra/clangd/Selection.cpp index 2024228e2b58..40021c62d9e2 100644 --- a/clang-tools-extra/clangd/Selection.cpp +++ b/clang-tools-extra/clangd/Selection.cpp @@ -60,6 +60,21 @@ void recordMetrics(const SelectionTree &S, const LangOptions &Lang) { // Return the range covering a node and all its children. SourceRange getSourceRange(const DynTypedNode &N) { + // DeclTypeTypeLoc::getSourceRange() is incomplete, which would lead to + // failing to descend into the child expression. + // decltype(2+2); + // ~~~~~~~~~~~~~ <-- correct range + // ~~~~~~~~ <-- range reported by getSourceRange() + // ~~~~~~~~~~~~ <-- range with this hack(i.e, missing closing paren) + // FIXME: Alter DecltypeTypeLoc to contain parentheses locations and get + // rid of this patch. + if (const auto *TL = N.get()) { + if (auto DT = TL->getAs()) { + SourceRange S = DT.getSourceRange(); + S.setEnd(DT.getUnderlyingExpr()->getEndLoc()); + return S; + } + } // MemberExprs to implicitly access anonymous fields should not claim any // tokens for themselves. Given: // struct A { struct { int b; }; }; @@ -647,17 +662,6 @@ class SelectionVisitor : public RecursiveASTVisitor { // heuristics. We should consider only pruning critical TypeLoc nodes, to // be more robust. - // DeclTypeTypeLoc::getSourceRange() is incomplete, which would lead to - // failing - // to descend into the child expression. - // decltype(2+2); - // ~~~~~~~~~~~~~ <-- correct range - // ~~~~~~~~ <-- range reported by getSourceRange() - // ~~~~~~~~~~~~ <-- range with this hack(i.e, missing closing paren) - // FIXME: Alter DecltypeTypeLoc to contain parentheses locations and get - // rid of this patch. - if (auto DT = TL->getAs()) - S.setEnd(DT.getUnderlyingExpr()->getEndLoc()); // AttributedTypeLoc may point to the attribute's range, NOT the modified // type's range. if (auto AT = TL->getAs()) diff --git a/clang-tools-extra/clangd/unittests/SelectionTests.cpp b/clang-tools-extra/clangd/unittests/SelectionTests.cpp index 971487c9cd27..6583d89af695 100644 --- a/clang-tools-extra/clangd/unittests/SelectionTests.cpp +++ b/clang-tools-extra/clangd/unittests/SelectionTests.cpp @@ -385,6 +385,7 @@ TEST(SelectionTest, CommonAncestor) { decltype([[^a]] + a) b; )cpp", "DeclRefExpr"}, + {"[[decltype]]^(1) b;", "DecltypeTypeLoc"}, // Not the VarDecl. // Objective-C nullability attributes. { From 30ad1742c08315498e5627fc4b01194564494cb3 Mon Sep 17 00:00:00 2001 From: Anastasia Stulova Date: Tue, 4 Jan 2022 11:14:30 +0000 Subject: [PATCH 497/992] [Docs] Document C++ for OpenCL 2021 support in clang. Along with the new language mode this commit contains misc small updates for OpenCL 3 and GitHub issues for OpenCL. Differential Revision: https://reviews.llvm.org/D116271 --- clang/docs/OpenCLSupport.rst | 35 ++++++++++++++++++++------------ clang/docs/UsersManual.rst | 39 ++++++++++++++++++++++++++---------- 2 files changed, 50 insertions(+), 24 deletions(-) diff --git a/clang/docs/OpenCLSupport.rst b/clang/docs/OpenCLSupport.rst index 7ac5707a9901..c1202601d48d 100644 --- a/clang/docs/OpenCLSupport.rst +++ b/clang/docs/OpenCLSupport.rst @@ -18,15 +18,16 @@ OpenCL Support ================== Clang has complete support of OpenCL C versions from 1.0 to 2.0. +There is an ongoing work to support :ref:`OpenCL 3.0 `. Clang also supports :ref:`the C++ for OpenCL kernel language `. -There is an ongoing work to support :ref:`OpenCL 3.0 `. - -There are also other :ref:`new and experimental features ` available. +There are also other :ref:`new and experimental features ` +available. -For general issues and bugs with OpenCL in clang refer to `Bugzilla -`__. +For general issues and bugs with OpenCL in clang refer to `the GitHub issue +list +`__. Internals Manual ================ @@ -127,7 +128,7 @@ To enable modules for OpenCL: .. code-block:: console - $ clang -target spir-unknown-unknown -c -emit-llvm -Xclang -finclude-default-header -fmodules -fimplicit-module-maps -fm odules-cache-path= test.cl + $ clang -target spir-unknown-unknown -c -emit-llvm -Xclang -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path= test.cl Another way to circumvent long parsing latency for the OpenCL builtin declarations is to use mechanism enabled by :ref:`-fdeclare-opencl-builtins @@ -319,24 +320,32 @@ specified in the Clang's source code. C++ for OpenCL Implementation Status ==================================== -Clang implements language version 1.0 published in `the official +Clang implements language versions 1.0 and 2021 published in `the official release of C++ for OpenCL Documentation -`_. +`_. Limited support of experimental C++ libraries is described in the :ref:`experimental features `. -Bugzilla bugs for this functionality are typically prefixed +GitHub issues for this functionality are typically prefixed with '[C++4OpenCL]' - click `here -`__ +`__ to view the full bug list. Missing features or with limited support ---------------------------------------- -- IR generation for global destructors is incomplete (See: +- Support of C++ for OpenCL 2021 is currently in experimental phase. Refer to + :ref:`OpenCL 3.0 status ` for details of common missing + functionality from OpenCL 3.0. + +- IR generation for non-trivial global destructors is incomplete (See: `PR48047 `_). +- Support of `destrutors with non-default address spaces + `_ + is incomplete (See: `D109609 `_). + .. _opencl_300: OpenCL C 3.0 Usage @@ -408,8 +417,8 @@ Experimental features Clang provides the following new WIP features for the developers to experiment and provide early feedback or contribute with further improvements. Feel free to contact us on `cfe-dev -`_ or via `Bugzilla -`__. +`_ or file `a GitHub issue +`_. .. _opencl_experimental_cxxlibs: diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 1173fd337841..d83b7a27bb3b 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -41,8 +41,8 @@ specific section: variants depending on base language. - :ref:`C++ Language ` - :ref:`Objective C++ Language ` -- :ref:`OpenCL Kernel Language `: OpenCL C v1.0, v1.1, v1.2, v2.0, - plus C++ for OpenCL. +- :ref:`OpenCL Kernel Language `: OpenCL C 1.0, 1.1, 1.2, 2.0, 3.0, + and C++ for OpenCL 1.0 and 2021. In addition to these base languages and their dialects, Clang supports a broad variety of language extensions, which are documented in the @@ -3321,20 +3321,25 @@ implementation of `OpenCL C++ `_ and there is no plan to support it in clang in any new releases in the near future. - -Clang currently supports C++ for OpenCL v1.0. +Clang currently supports C++ for OpenCL 1.0 and 2021. For detailed information about this language refer to the C++ for OpenCL Programming Language Documentation available in `the latest build `_ or in `the official release -`_. +`_. To enable the C++ for OpenCL mode, pass one of following command line options when -compiling ``.cl`` file ``-cl-std=clc++``, ``-cl-std=CLC++``, ``-cl-std=clc++1.0``, -``-cl-std=CLC++1.0``, ``-std=clc++``, ``-std=CLC++``, ``-std=clc++1.0`` or -``-std=CLC++1.0``. +compiling ``.clcpp`` file: + +- C++ for OpenCL 1.0: ``-cl-std=clc++``, ``-cl-std=CLC++``, ``-cl-std=clc++1.0``, + ``-cl-std=CLC++1.0``, ``-std=clc++``, ``-std=CLC++``, ``-std=clc++1.0`` or + ``-std=CLC++1.0``. + +- C++ for OpenCL 2021: ``-cl-std=clc++2021``, ``-cl-std=CLC++2021``, + ``-std=clc++2021``, ``-std=CLC++2021``. +Example of use: .. code-block:: c++ template T add( T x, T y ) @@ -3351,15 +3356,27 @@ compiling ``.cl`` file ``-cl-std=clc++``, ``-cl-std=CLC++``, ``-cl-std=clc++1.0` .. code-block:: console - clang -cl-std=clc++ test.cl + clang -cl-std=clc++1.0 test.clcpp + -Alternatively, files with ``.clcpp`` extension are compiled with the C++ for OpenCL -mode. +By default, files with ``.clcpp`` extension are compiled with the C++ for +OpenCL 1.0 mode. .. code-block:: console clang test.clcpp +For backward compatibility files with ``.cl`` extensions can also be compiled +in C++ for OpenCL mode but the desirable language mode must be activated with +a flag. + + .. code-block:: console + + clang -cl-std=clc++ test.cl + +Support of C++ for OpenCL 2021 is currently in experimental phase, refer to +:doc:`OpenCLSupport` for more details. + C++ for OpenCL kernel sources can also be compiled online in drivers supporting `cl_ext_cxx_for_opencl `_ From 6231ef2624159bdedac346621a52a6ae9ae8e2a5 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 4 Jan 2022 12:19:49 +0100 Subject: [PATCH 498/992] Remove an unused variable, NFC. --- llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp index eaff501ebe3b..3c93da8e2039 100644 --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -1927,7 +1927,6 @@ bool AVRExpandPseudo::expandASRB6Rd(Block &MBB, BlockIt MBBI) { Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); bool DstIsKill = MI.getOperand(1).isKill(); - bool ImpIsDead = MI.getOperand(3).isDead(); // bst r24, 6 // lsl r24 From aefab6f8d5b13c41d000feaa8f0e567d4b6a4681 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 4 Jan 2022 12:10:45 +0100 Subject: [PATCH 499/992] [InstSimplify] Use weak symbol in test to show miscompile (NFC) This fold is incorrect, because it assumes that all indices are non-zero. This happens to be true for the test as written, but doesn't hold if we use an extern weak global instead, for which ptrtoint might be zero. Add separate tests for the simple constant int case. --- .../InstSimplify/ConstProp/icmp-global.ll | 27 ++++++++++++++++--- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll index e681c62a4a61..b2d6b3f1c7fb 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll @@ -65,6 +65,7 @@ define i1 @ult_constexpr_constexpr_one(i8* %x) { @g = global [2 x i32] [i32 1, i32 2] @g2 = global i32 0 +@g2_weak = extern_weak global i32 define i1 @global_ne_null() { ; CHECK-LABEL: @global_ne_null( @@ -121,7 +122,7 @@ define i1 @null_gep_ne_null() { ; CHECK-LABEL: @null_gep_ne_null( ; CHECK-NEXT: ret i1 true ; - %gep = getelementptr i8, i8* null, i64 ptrtoint (i32* @g2 to i64) + %gep = getelementptr i8, i8* null, i64 ptrtoint (i32* @g2_weak to i64) %cmp = icmp ne i8* %gep, null ret i1 %cmp } @@ -130,20 +131,38 @@ define i1 @null_gep_ugt_null() { ; CHECK-LABEL: @null_gep_ugt_null( ; CHECK-NEXT: ret i1 true ; - %gep = getelementptr i8, i8* null, i64 ptrtoint (i32* @g2 to i64) + %gep = getelementptr i8, i8* null, i64 ptrtoint (i32* @g2_weak to i64) %cmp = icmp ugt i8* %gep, null ret i1 %cmp } define i1 @null_gep_sgt_null() { ; CHECK-LABEL: @null_gep_sgt_null( -; CHECK-NEXT: ret i1 icmp sgt (i8* getelementptr (i8, i8* null, i64 ptrtoint (i32* @g2 to i64)), i8* null) +; CHECK-NEXT: ret i1 icmp sgt (i8* getelementptr (i8, i8* null, i64 ptrtoint (i32* @g2_weak to i64)), i8* null) ; - %gep = getelementptr i8, i8* null, i64 ptrtoint (i32* @g2 to i64) + %gep = getelementptr i8, i8* null, i64 ptrtoint (i32* @g2_weak to i64) %cmp = icmp sgt i8* %gep, null ret i1 %cmp } +define i1 @null_gep_ne_null_constant_int() { +; CHECK-LABEL: @null_gep_ne_null_constant_int( +; CHECK-NEXT: ret i1 true +; + %gep = getelementptr i8, i8* null, i64 1 + %cmp = icmp ne i8* %gep, null + ret i1 %cmp +} + +define i1 @null_gep_ugt_null_constant_int() { +; CHECK-LABEL: @null_gep_ugt_null_constant_int( +; CHECK-NEXT: ret i1 true +; + %gep = getelementptr i8, i8* null, i64 1 + %cmp = icmp ugt i8* %gep, null + ret i1 %cmp +} + define i1 @null_gep_ne_global() { ; CHECK-LABEL: @null_gep_ne_global( ; CHECK-NEXT: ret i1 true From 75db002725156fba9e9c38b7cefe57b7ed713734 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 4 Jan 2022 12:23:06 +0100 Subject: [PATCH 500/992] [ConstantFold] Remove another incorrect icmp of GEP fold This fold is not correct, because indices might evaluate to zero even if they are not a literal zero integer. Additionally, this fold would be wrong (in the general case) for non-i8 types as well, due to index overflow. Drop this fold and instead let the target-dependent constant folder compute the actual offset and fold the comparison based on that. --- llvm/lib/IR/ConstantFold.cpp | 10 ---------- .../Transforms/InstSimplify/ConstProp/icmp-global.ll | 6 ++++-- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index c3f3d3c4b4c1..f32d26ba0978 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1539,17 +1539,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, // so the result is greater-than if (!GV->hasExternalWeakLinkage()) return ICmpInst::ICMP_UGT; - } else if (isa(CE1Op0)) { - // If we are indexing from a null pointer, check to see if we have any - // non-zero indices. - for (unsigned i = 1, e = CE1->getNumOperands(); i != e; ++i) - if (!CE1->getOperand(i)->isNullValue()) - // Offsetting from null, must not be equal. - return ICmpInst::ICMP_UGT; - // Only zero indexes from null, must still be zero. - return ICmpInst::ICMP_EQ; } - // Otherwise, we can't really say if the first operand is null or not. } else if (const GlobalValue *GV2 = dyn_cast(V2)) { if (isa(CE1Op0)) { // If its not weak linkage, the GVal must have a non-zero address diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll index b2d6b3f1c7fb..99f1a16c5427 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll @@ -118,9 +118,11 @@ define i1 @global_gep_sgt_null() { ret i1 %cmp } +; @g2_weak may be null, in which case this is a zero-index GEP and the pointers +; are equal. define i1 @null_gep_ne_null() { ; CHECK-LABEL: @null_gep_ne_null( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: ret i1 icmp ne (i8* getelementptr (i8, i8* null, i64 ptrtoint (i32* @g2_weak to i64)), i8* null) ; %gep = getelementptr i8, i8* null, i64 ptrtoint (i32* @g2_weak to i64) %cmp = icmp ne i8* %gep, null @@ -129,7 +131,7 @@ define i1 @null_gep_ne_null() { define i1 @null_gep_ugt_null() { ; CHECK-LABEL: @null_gep_ugt_null( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: ret i1 icmp ugt (i8* getelementptr (i8, i8* null, i64 ptrtoint (i32* @g2_weak to i64)), i8* null) ; %gep = getelementptr i8, i8* null, i64 ptrtoint (i32* @g2_weak to i64) %cmp = icmp ugt i8* %gep, null From 1379eb577607dc7a070272e02bcb6712cbb8feed Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 4 Jan 2022 12:33:38 +0100 Subject: [PATCH 501/992] [ConstFold] Slightly clean up icmp of two geps fold (NFC) As we're only dealing with one type of constant expression here, try to directly cast to GEPOperator. --- llvm/lib/IR/ConstantFold.cpp | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index f32d26ba0978..c9ee60d684f9 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1553,26 +1553,17 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, return ICmpInst::BAD_ICMP_PREDICATE; } } - } else { - ConstantExpr *CE2 = cast(V2); - Constant *CE2Op0 = CE2->getOperand(0); - - // There are MANY other foldings that we could perform here. They will - // probably be added on demand, as they seem needed. - switch (CE2->getOpcode()) { - default: break; - case Instruction::GetElementPtr: - // By far the most common case to handle is when the base pointers are - // obviously to the same global. - if (isa(CE1Op0) && isa(CE2Op0)) { - // Don't know relative ordering, but check for inequality. - if (CE1Op0 != CE2Op0) { - GEPOperator *CE2GEP = cast(CE2); - if (CE1GEP->hasAllZeroIndices() && CE2GEP->hasAllZeroIndices()) - return areGlobalsPotentiallyEqual(cast(CE1Op0), - cast(CE2Op0)); - return ICmpInst::BAD_ICMP_PREDICATE; - } + } else if (const auto *CE2GEP = dyn_cast(V2)) { + // By far the most common case to handle is when the base pointers are + // obviously to the same global. + const Constant *CE2Op0 = cast(CE2GEP->getPointerOperand()); + if (isa(CE1Op0) && isa(CE2Op0)) { + // Don't know relative ordering, but check for inequality. + if (CE1Op0 != CE2Op0) { + if (CE1GEP->hasAllZeroIndices() && CE2GEP->hasAllZeroIndices()) + return areGlobalsPotentiallyEqual(cast(CE1Op0), + cast(CE2Op0)); + return ICmpInst::BAD_ICMP_PREDICATE; } } } From 2a92efd0a23984c910f0ce3a6b515cc9e15ba87c Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 4 Jan 2022 12:36:08 +0100 Subject: [PATCH 502/992] [CodeComplete] drop unused Scope param. NFC --- clang/include/clang/Sema/Sema.h | 11 +++++------ clang/lib/Parse/ParseDecl.cpp | 4 ++-- clang/lib/Parse/ParseDeclCXX.cpp | 4 ++-- clang/lib/Parse/ParseExpr.cpp | 2 +- clang/lib/Parse/ParseExprCXX.cpp | 6 +++--- clang/lib/Parse/ParseInit.cpp | 4 ++-- clang/lib/Parse/ParseOpenMP.cpp | 2 +- clang/lib/Sema/SemaCodeComplete.cpp | 9 ++++----- 8 files changed, 20 insertions(+), 22 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 016a12e3b1fe..1b3944b35cb4 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12537,17 +12537,16 @@ class Sema final { /// signatures that were considered. /// /// FIXME: rename to GuessCallArgumentType to reduce confusion. - QualType ProduceCallSignatureHelp(Scope *S, Expr *Fn, ArrayRef Args, + QualType ProduceCallSignatureHelp(Expr *Fn, ArrayRef Args, SourceLocation OpenParLoc); - QualType ProduceConstructorSignatureHelp(Scope *S, QualType Type, - SourceLocation Loc, + QualType ProduceConstructorSignatureHelp(QualType Type, SourceLocation Loc, ArrayRef Args, SourceLocation OpenParLoc, bool Braced); QualType ProduceCtorInitMemberSignatureHelp( - Scope *S, Decl *ConstructorDecl, CXXScopeSpec SS, - ParsedType TemplateTypeTy, ArrayRef ArgExprs, IdentifierInfo *II, - SourceLocation OpenParLoc, bool Braced); + Decl *ConstructorDecl, CXXScopeSpec SS, ParsedType TemplateTypeTy, + ArrayRef ArgExprs, IdentifierInfo *II, SourceLocation OpenParLoc, + bool Braced); QualType ProduceTemplateArgumentSignatureHelp( TemplateTy, ArrayRef, SourceLocation LAngleLoc); void CodeCompleteInitializer(Scope *S, Decl *D); diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 5900075e5a90..dd2da7720828 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -2419,7 +2419,7 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes( auto ThisVarDecl = dyn_cast_or_null(ThisDecl); auto RunSignatureHelp = [&]() { QualType PreferredType = Actions.ProduceConstructorSignatureHelp( - getCurScope(), ThisVarDecl->getType()->getCanonicalTypeInternal(), + ThisVarDecl->getType()->getCanonicalTypeInternal(), ThisDecl->getLocation(), Exprs, T.getOpenLocation(), /*Braced=*/false); CalledSignatureHelp = true; @@ -2440,7 +2440,7 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes( if (ParseExpressionList(Exprs, CommaLocs, ExpressionStarts)) { if (ThisVarDecl && PP.isCodeCompletionReached() && !CalledSignatureHelp) { Actions.ProduceConstructorSignatureHelp( - getCurScope(), ThisVarDecl->getType()->getCanonicalTypeInternal(), + ThisVarDecl->getType()->getCanonicalTypeInternal(), ThisDecl->getLocation(), Exprs, T.getOpenLocation(), /*Braced=*/false); CalledSignatureHelp = true; diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index 942b813b3935..7c9d1965d117 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -3740,8 +3740,8 @@ MemInitResult Parser::ParseMemInitializer(Decl *ConstructorDecl) { if (TemplateTypeTy.isInvalid()) return QualType(); QualType PreferredType = Actions.ProduceCtorInitMemberSignatureHelp( - getCurScope(), ConstructorDecl, SS, TemplateTypeTy.get(), ArgExprs, - II, T.getOpenLocation(), /*Braced=*/false); + ConstructorDecl, SS, TemplateTypeTy.get(), ArgExprs, II, + T.getOpenLocation(), /*Braced=*/false); CalledSignatureHelp = true; return PreferredType; }; diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 09a3842f5809..b8649f9f0c21 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -2019,7 +2019,7 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { CommaLocsTy CommaLocs; auto RunSignatureHelp = [&]() -> QualType { QualType PreferredType = Actions.ProduceCallSignatureHelp( - getCurScope(), LHS.get(), ArgExprs, PT.getOpenLocation()); + LHS.get(), ArgExprs, PT.getOpenLocation()); CalledSignatureHelp = true; return PreferredType; }; diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index 0ba0fd529002..8adb6378a214 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -1877,8 +1877,8 @@ Parser::ParseCXXTypeConstructExpression(const DeclSpec &DS) { QualType PreferredType; if (TypeRep) PreferredType = Actions.ProduceConstructorSignatureHelp( - getCurScope(), TypeRep.get()->getCanonicalTypeInternal(), - DS.getEndLoc(), Exprs, T.getOpenLocation(), /*Braced=*/false); + TypeRep.get()->getCanonicalTypeInternal(), DS.getEndLoc(), Exprs, + T.getOpenLocation(), /*Braced=*/false); CalledSignatureHelp = true; return PreferredType; }; @@ -3167,7 +3167,7 @@ Parser::ParseCXXNewExpression(bool UseGlobal, SourceLocation Start) { // `new decltype(invalid) (^)`. if (TypeRep) PreferredType = Actions.ProduceConstructorSignatureHelp( - getCurScope(), TypeRep.get()->getCanonicalTypeInternal(), + TypeRep.get()->getCanonicalTypeInternal(), DeclaratorInfo.getEndLoc(), ConstructorArgs, ConstructorLParen, /*Braced=*/false); CalledSignatureHelp = true; diff --git a/clang/lib/Parse/ParseInit.cpp b/clang/lib/Parse/ParseInit.cpp index efb162af642d..e7b444ef043e 100644 --- a/clang/lib/Parse/ParseInit.cpp +++ b/clang/lib/Parse/ParseInit.cpp @@ -466,8 +466,8 @@ ExprResult Parser::ParseBraceInitializer() { QualType PreferredType; if (!LikelyType.isNull()) PreferredType = Actions.ProduceConstructorSignatureHelp( - getCurScope(), LikelyType->getCanonicalTypeInternal(), - T.getOpenLocation(), InitExprs, T.getOpenLocation(), /*Braced=*/true); + LikelyType->getCanonicalTypeInternal(), T.getOpenLocation(), + InitExprs, T.getOpenLocation(), /*Braced=*/true); CalledSignatureHelp = true; return PreferredType; }; diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 2500cf834a34..4af4d321807a 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -470,7 +470,7 @@ void Parser::ParseOpenMPReductionInitializerForDecl(VarDecl *OmpPrivParm) { SourceLocation LParLoc = T.getOpenLocation(); auto RunSignatureHelp = [this, OmpPrivParm, LParLoc, &Exprs]() { QualType PreferredType = Actions.ProduceConstructorSignatureHelp( - getCurScope(), OmpPrivParm->getType()->getCanonicalTypeInternal(), + OmpPrivParm->getType()->getCanonicalTypeInternal(), OmpPrivParm->getLocation(), Exprs, LParLoc, /*Braced=*/false); CalledSignatureHelp = true; return PreferredType; diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index f9f20af1497a..e2bf5edc7b5e 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -5956,8 +5956,7 @@ ProduceSignatureHelp(Sema &SemaRef, MutableArrayRef Candidates, return getParamType(SemaRef, Candidates, CurrentArg); } -QualType Sema::ProduceCallSignatureHelp(Scope *S, Expr *Fn, - ArrayRef Args, +QualType Sema::ProduceCallSignatureHelp(Expr *Fn, ArrayRef Args, SourceLocation OpenParLoc) { Fn = unwrapParenList(Fn); if (!CodeCompleter || !Fn) @@ -6059,7 +6058,7 @@ QualType Sema::ProduceCallSignatureHelp(Scope *S, Expr *Fn, return !CandidateSet.empty() ? ParamType : QualType(); } -QualType Sema::ProduceConstructorSignatureHelp(Scope *S, QualType Type, +QualType Sema::ProduceConstructorSignatureHelp(QualType Type, SourceLocation Loc, ArrayRef Args, SourceLocation OpenParLoc, @@ -6112,7 +6111,7 @@ QualType Sema::ProduceConstructorSignatureHelp(Scope *S, QualType Type, } QualType Sema::ProduceCtorInitMemberSignatureHelp( - Scope *S, Decl *ConstructorDecl, CXXScopeSpec SS, ParsedType TemplateTypeTy, + Decl *ConstructorDecl, CXXScopeSpec SS, ParsedType TemplateTypeTy, ArrayRef ArgExprs, IdentifierInfo *II, SourceLocation OpenParLoc, bool Braced) { if (!CodeCompleter) @@ -6125,7 +6124,7 @@ QualType Sema::ProduceCtorInitMemberSignatureHelp( // FIXME: Add support for Base class constructors as well. if (ValueDecl *MemberDecl = tryLookupCtorInitMemberDecl( Constructor->getParent(), SS, TemplateTypeTy, II)) - return ProduceConstructorSignatureHelp(getCurScope(), MemberDecl->getType(), + return ProduceConstructorSignatureHelp(MemberDecl->getType(), MemberDecl->getLocation(), ArgExprs, OpenParLoc, Braced); return QualType(); From 71b2c4a3cf5c4709fcab66194b3277a31b849c22 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 4 Jan 2022 12:36:10 +0100 Subject: [PATCH 503/992] [ConstantFolding] Remove unused ConstantFoldLoadThroughGEPConstantExpr() This API is no longer used since bbeaf2aac678633749e7385466da10a1c0120b3b. --- llvm/include/llvm/Analysis/ConstantFolding.h | 7 ------- llvm/lib/Analysis/ConstantFolding.cpp | 17 ----------------- 2 files changed, 24 deletions(-) diff --git a/llvm/include/llvm/Analysis/ConstantFolding.h b/llvm/include/llvm/Analysis/ConstantFolding.h index 45fb879f0c1f..7a68cd728f60 100644 --- a/llvm/include/llvm/Analysis/ConstantFolding.h +++ b/llvm/include/llvm/Analysis/ConstantFolding.h @@ -148,13 +148,6 @@ Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, const DataLayout &DL); -/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a -/// getelementptr constantexpr, return the constant value being addressed by the -/// constant expression, or null if something is funny and we can't decide. -Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE, - Type *Ty, - const DataLayout &DL); - /// canConstantFoldCallTo - Return true if its even possible to fold a call to /// the specified function. bool canConstantFoldCallTo(const CallBase *Call, const Function *F); diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index c8ee130d6655..7f7c9a56b1b7 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1368,23 +1368,6 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C, } } -Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, - ConstantExpr *CE, - Type *Ty, - const DataLayout &DL) { - if (!CE->getOperand(1)->isNullValue()) - return nullptr; // Do not allow stepping over the value! - - // Loop over all of the operands, tracking down which value we are - // addressing. - for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) { - C = C->getAggregateElement(CE->getOperand(i)); - if (!C) - return nullptr; - } - return ConstantFoldLoadThroughBitcast(C, Ty, DL); -} - //===----------------------------------------------------------------------===// // Constant Folding for Calls // From 5c57e6aa5777bddf9ddaca5d927f1b47a1a9d381 Mon Sep 17 00:00:00 2001 From: Jun Zhan Date: Tue, 4 Jan 2022 11:47:40 +0000 Subject: [PATCH 504/992] [Clang] Extend emitUnaryBuiltin to avoid duplicate logic. This patch extends `emitUnaryBuiltin` so that we can better emitting IR when implement builtins specified in D111529. Also contains some NFC, applying it to existing code. Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D116161 --- clang/lib/CodeGen/CGBuiltin.cpp | 89 +++++++++++++++------------------ 1 file changed, 40 insertions(+), 49 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 1982b40ff667..c1541ff0c846 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -532,13 +532,13 @@ static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, // Emit a simple mangled intrinsic that has 1 argument and a return type // matching the argument type. -static Value *emitUnaryBuiltin(CodeGenFunction &CGF, - const CallExpr *E, - unsigned IntrinsicID) { +static Value *emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, + unsigned IntrinsicID, + llvm::StringRef Name = "") { llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); - return CGF.Builder.CreateCall(F, Src0); + return CGF.Builder.CreateCall(F, Src0, Name); } // Emit an intrinsic that has 2 operands of the same type as its result. @@ -3122,24 +3122,25 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_elementwise_abs: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Result; - if (Op0->getType()->isIntOrIntVectorTy()) + QualType QT = E->getArg(0)->getType(); + + if (auto *VecTy = QT->getAs()) + QT = VecTy->getElementType(); + if (QT->isIntegerType()) Result = Builder.CreateBinaryIntrinsic( - llvm::Intrinsic::abs, Op0, Builder.getFalse(), nullptr, "elt.abs"); + llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)), + Builder.getFalse(), nullptr, "elt.abs"); else - Result = Builder.CreateUnaryIntrinsic(llvm::Intrinsic::fabs, Op0, nullptr, - "elt.abs"); - return RValue::get(Result); - } + Result = emitUnaryBuiltin(*this, E, llvm::Intrinsic::fabs, "elt.abs"); - case Builtin::BI__builtin_elementwise_ceil: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Result = Builder.CreateUnaryIntrinsic(llvm::Intrinsic::ceil, Op0, - nullptr, "elt.ceil"); return RValue::get(Result); } + case Builtin::BI__builtin_elementwise_ceil: + return RValue::get( + emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil")); + case Builtin::BI__builtin_elementwise_max: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); @@ -3174,50 +3175,40 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_reduce_max: { - auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) { - if (IrTy->isIntOrIntVectorTy()) { - if (auto *VecTy = QT->getAs()) - QT = VecTy->getElementType(); - if (QT->isSignedIntegerType()) - return llvm::Intrinsic::vector_reduce_smax; - else - return llvm::Intrinsic::vector_reduce_umax; - } + auto GetIntrinsicID = [](QualType QT) { + if (auto *VecTy = QT->getAs()) + QT = VecTy->getElementType(); + if (QT->isSignedIntegerType()) + return llvm::Intrinsic::vector_reduce_smax; + if (QT->isUnsignedIntegerType()) + return llvm::Intrinsic::vector_reduce_umax; + assert(QT->isFloatingType() && "must have a float here"); return llvm::Intrinsic::vector_reduce_fmax; }; - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Result = Builder.CreateUnaryIntrinsic( - GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr, - "rdx.min"); - return RValue::get(Result); + return RValue::get(emitUnaryBuiltin( + *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); } case Builtin::BI__builtin_reduce_min: { - auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) { - if (IrTy->isIntOrIntVectorTy()) { - if (auto *VecTy = QT->getAs()) - QT = VecTy->getElementType(); - if (QT->isSignedIntegerType()) - return llvm::Intrinsic::vector_reduce_smin; - else - return llvm::Intrinsic::vector_reduce_umin; - } + auto GetIntrinsicID = [](QualType QT) { + if (auto *VecTy = QT->getAs()) + QT = VecTy->getElementType(); + if (QT->isSignedIntegerType()) + return llvm::Intrinsic::vector_reduce_smin; + if (QT->isUnsignedIntegerType()) + return llvm::Intrinsic::vector_reduce_umin; + assert(QT->isFloatingType() && "must have a float here"); return llvm::Intrinsic::vector_reduce_fmin; }; - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Result = Builder.CreateUnaryIntrinsic( - GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr, - "rdx.min"); - return RValue::get(Result); - } - case Builtin::BI__builtin_reduce_xor: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Result = Builder.CreateUnaryIntrinsic( - llvm::Intrinsic::vector_reduce_xor, Op0, nullptr, "rdx.xor"); - return RValue::get(Result); + return RValue::get(emitUnaryBuiltin( + *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); } + case Builtin::BI__builtin_reduce_xor: + return RValue::get(emitUnaryBuiltin( + *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor")); + case Builtin::BI__builtin_matrix_transpose: { const auto *MatrixTy = E->getArg(0)->getType()->getAs(); Value *MatValue = EmitScalarExpr(E->getArg(0)); From 99e7bf46c9e34644a8c0033684798dcafc790200 Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Tue, 4 Jan 2022 11:14:30 +0000 Subject: [PATCH 505/992] [AVR] Optimize int16 shift operation for shift amount greater than 8 Skip operation on the lower byte in int16 logical left shift when shift amount is greater than 8. Skip operation on the higher byte in int16 logical & arithmetic right shift when shift amount is greater than 8. Reviewed By: aykevl Differential Revision: https://reviews.llvm.org/D115594 --- llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | 73 ++++++++++++++++++++ llvm/lib/Target/AVR/AVRISelLowering.cpp | 17 +++++ llvm/lib/Target/AVR/AVRISelLowering.h | 3 + llvm/lib/Target/AVR/AVRInstrInfo.td | 12 ++++ llvm/test/CodeGen/AVR/shift.ll | 29 +++++--- 5 files changed, 124 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp index 3c93da8e2039..7d101f6cfb14 100644 --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -1412,6 +1412,30 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { return true; } +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstLoReg, DstHiReg; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + bool ImpIsDead = MI.getOperand(2).isDead(); + TRI->splitReg(DstReg, DstLoReg, DstHiReg); + + // add hireg, hireg <==> lsl hireg + auto MILSL = + buildMI(MBB, MBBI, AVR::ADDRdRr) + .addReg(DstHiReg, RegState::Define, getDeadRegState(DstIsDead)) + .addReg(DstHiReg, getKillRegState(DstIsKill)) + .addReg(DstHiReg, getKillRegState(DstIsKill)); + + if (ImpIsDead) + MILSL->getOperand(3).setIsDead(); + + MI.eraseFromParent(); + return true; +} + bool AVRExpandPseudo::expandLSLW4Rd(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; Register DstLoReg, DstHiReg; @@ -1587,6 +1611,29 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { return true; } +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstLoReg, DstHiReg; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + bool ImpIsDead = MI.getOperand(2).isDead(); + TRI->splitReg(DstReg, DstLoReg, DstHiReg); + + // lsr loreg + auto MILSR = + buildMI(MBB, MBBI, AVR::LSRRd) + .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstLoReg, getKillRegState(DstIsKill)); + + if (ImpIsDead) + MILSR->getOperand(2).setIsDead(); + + MI.eraseFromParent(); + return true; +} + bool AVRExpandPseudo::expandLSRW4Rd(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; Register DstLoReg, DstHiReg; @@ -1774,6 +1821,29 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { return true; } +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + MachineInstr &MI = *MBBI; + Register DstLoReg, DstHiReg; + Register DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool DstIsKill = MI.getOperand(1).isKill(); + bool ImpIsDead = MI.getOperand(2).isDead(); + TRI->splitReg(DstReg, DstLoReg, DstHiReg); + + // asr loreg + auto MIASR = + buildMI(MBB, MBBI, AVR::ASRRd) + .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstLoReg, getKillRegState(DstIsKill)); + + if (ImpIsDead) + MIASR->getOperand(2).setIsDead(); + + MI.eraseFromParent(); + return true; +} + bool AVRExpandPseudo::expandASRW8Rd(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; Register DstLoReg, DstHiReg; @@ -2230,6 +2300,9 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) { EXPAND(AVR::RORWRd); EXPAND(AVR::ROLWRd); EXPAND(AVR::ASRWRd); + EXPAND(AVR::LSLWHiRd); + EXPAND(AVR::LSRWLoRd); + EXPAND(AVR::ASRWLoRd); EXPAND(AVR::LSLWNRd); EXPAND(AVR::LSRWNRd); EXPAND(AVR::ASRWNRd); diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index f3e74e843695..f7f560ff5db3 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -392,16 +392,22 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim, DAG.getConstant(8, dl, VT)); ShiftAmount -= 8; + // Only operate on the higher byte for remaining shift bits. + Opc8 = AVRISD::LSLHI; break; case ISD::SRL: Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim, DAG.getConstant(8, dl, VT)); ShiftAmount -= 8; + // Only operate on the lower byte for remaining shift bits. + Opc8 = AVRISD::LSRLO; break; case ISD::SRA: Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim, DAG.getConstant(8, dl, VT)); ShiftAmount -= 8; + // Only operate on the lower byte for remaining shift bits. + Opc8 = AVRISD::ASRLO; break; default: break; @@ -412,11 +418,22 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim, DAG.getConstant(12, dl, VT)); ShiftAmount -= 12; + // Only operate on the higher byte for remaining shift bits. + Opc8 = AVRISD::LSLHI; break; case ISD::SRL: Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim, DAG.getConstant(12, dl, VT)); ShiftAmount -= 12; + // Only operate on the lower byte for remaining shift bits. + Opc8 = AVRISD::LSRLO; + break; + case ISD::SRA: + Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim, + DAG.getConstant(8, dl, VT)); + ShiftAmount -= 8; + // Only operate on the lower byte for remaining shift bits. + Opc8 = AVRISD::ASRLO; break; default: break; diff --git a/llvm/lib/Target/AVR/AVRISelLowering.h b/llvm/lib/Target/AVR/AVRISelLowering.h index 3ae036b66bcb..223a47372ef7 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.h +++ b/llvm/lib/Target/AVR/AVRISelLowering.h @@ -38,12 +38,15 @@ enum NodeType { LSL, ///< Logical shift left. LSLBN, ///< Byte logical shift left N bits. LSLWN, ///< Word logical shift left N bits. + LSLHI, ///< Higher 8-bit of word logical shift left. LSR, ///< Logical shift right. LSRBN, ///< Byte logical shift right N bits. LSRWN, ///< Word logical shift right N bits. + LSRLO, ///< Lower 8-bit of word logical shift right. ASR, ///< Arithmetic shift right. ASRBN, ///< Byte arithmetic shift right N bits. ASRWN, ///< Word arithmetic shift right N bits. + ASRLO, ///< Lower 8-bit of word arithmetic shift right. ROR, ///< Bit rotate right. ROL, ///< Bit rotate left. LSLLOOP, ///< A loop of single logical shift left instructions. diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index c7f423292da0..c695cde04590 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -60,6 +60,9 @@ def AVRlsr : SDNode<"AVRISD::LSR", SDTIntUnaryOp>; def AVRrol : SDNode<"AVRISD::ROL", SDTIntUnaryOp>; def AVRror : SDNode<"AVRISD::ROR", SDTIntUnaryOp>; def AVRasr : SDNode<"AVRISD::ASR", SDTIntUnaryOp>; +def AVRlslhi : SDNode<"AVRISD::LSLHI", SDTIntUnaryOp>; +def AVRlsrlo : SDNode<"AVRISD::LSRLO", SDTIntUnaryOp>; +def AVRasrlo : SDNode<"AVRISD::ASRLO", SDTIntUnaryOp>; def AVRlslbn : SDNode<"AVRISD::LSLBN", SDTIntBinOp>; def AVRlsrbn : SDNode<"AVRISD::LSRBN", SDTIntBinOp>; def AVRasrbn : SDNode<"AVRISD::ASRBN", SDTIntBinOp>; @@ -1848,6 +1851,9 @@ let Constraints = "$src = $rd", Defs = [SREG] in { : $src)), (implicit SREG)]>; + def LSLWHiRd : Pseudo<(outs DREGS:$rd), (ins DREGS:$src), "lslwhi\t$rd", + [(set i16:$rd, (AVRlslhi i16:$src)), (implicit SREG)]>; + def LSLWNRd : Pseudo<(outs DLDREGS : $rd), (ins DREGS @@ -1895,6 +1901,9 @@ let Constraints = "$src = $rd", Defs = [SREG] in { : $src)), (implicit SREG)]>; + def LSRWLoRd : Pseudo<(outs DREGS:$rd), (ins DREGS:$src), "lsrwlo\t$rd", + [(set i16:$rd, (AVRlsrlo i16:$src)), (implicit SREG)]>; + def LSRWNRd : Pseudo<(outs DLDREGS : $rd), (ins DREGS @@ -1968,6 +1977,9 @@ let Constraints = "$src = $rd", Defs = [SREG] in { : $src)), (implicit SREG)]>; + def ASRWLoRd : Pseudo<(outs DREGS:$rd), (ins DREGS:$src), "asrwlo\t$rd", + [(set i16:$rd, (AVRasrlo i16:$src)), (implicit SREG)]>; + def ROLBRd : Pseudo<(outs GPR8 : $rd), (ins GPR8 diff --git a/llvm/test/CodeGen/AVR/shift.ll b/llvm/test/CodeGen/AVR/shift.ll index 90e1b25bd762..beba537cd104 100644 --- a/llvm/test/CodeGen/AVR/shift.ll +++ b/llvm/test/CodeGen/AVR/shift.ll @@ -227,8 +227,7 @@ define i16 @lsl_i16_9(i16 %a) { ; CHECK-LABEL: lsl_i16_9 ; CHECK: mov r25, r24 ; CHECK-NEXT: clr r24 -; CHECK-NEXT: lsl r24 -; CHECK-NEXT: rol r25 +; CHECK-NEXT: lsl r25 ; CHECK-NEXT: ret %result = shl i16 %a, 9 ret i16 %result @@ -240,8 +239,7 @@ define i16 @lsl_i16_13(i16 %a) { ; CHECK-NEXT: swap r25 ; CHECK-NEXT: andi r25, 240 ; CHECK-NEXT: clr r24 -; CHECK-NEXT: lsl r24 -; CHECK-NEXT: rol r25 +; CHECK-NEXT: lsl r25 ; CHECK-NEXT: ret %result = shl i16 %a, 13 ret i16 %result @@ -285,8 +283,7 @@ define i16 @lsr_i16_9(i16 %a) { ; CHECK-LABEL: lsr_i16_9 ; CHECK: mov r24, r25 ; CHECK-NEXT: clr r25 -; CHECK-NEXT: lsr r25 -; CHECK-NEXT: ror r24 +; CHECK-NEXT: lsr r24 ; CHECK-NEXT: ret %result = lshr i16 %a, 9 ret i16 %result @@ -298,8 +295,7 @@ define i16 @lsr_i16_13(i16 %a) { ; CHECK-NEXT: swap r24 ; CHECK-NEXT: andi r24, 15 ; CHECK-NEXT: clr r25 -; CHECK-NEXT: lsr r25 -; CHECK-NEXT: ror r24 +; CHECK-NEXT: lsr r24 ; CHECK-NEXT: ret %result = lshr i16 %a, 13 ret i16 %result @@ -310,9 +306,22 @@ define i16 @asr_i16_9(i16 %a) { ; CHECK: mov r24, r25 ; CHECK-NEXT: lsl r25 ; CHECK-NEXT: sbc r25, r25 -; CHECK-NEXT: asr r25 -; CHECK-NEXT: ror r24 +; CHECK-NEXT: asr r24 ; CHECK-NEXT: ret %result = ashr i16 %a, 9 ret i16 %result } + +define i16 @asr_i16_12(i16 %a) { +; CHECK-LABEL: asr_i16_12 +; CHECK: mov r24, r25 +; CHECK-NEXT: lsl r25 +; CHECK-NEXT: sbc r25, r25 +; CHECK-NEXT: asr r24 +; CHECK-NEXT: asr r24 +; CHECK-NEXT: asr r24 +; CHECK-NEXT: asr r24 +; CHECK-NEXT: ret + %result = ashr i16 %a, 12 + ret i16 %result +} From 32357266fd055e0eba63fc321f31a1c88eae0ea8 Mon Sep 17 00:00:00 2001 From: Saiyedul Islam Date: Mon, 3 Jan 2022 15:12:04 +0000 Subject: [PATCH 506/992] [Clang][NFC] Fix multiline comment prefixes in function headers Cleanup of D105191 after latest clang-format changes. Reviewed By: MyDeveloperDay Differential Revision: https://reviews.llvm.org/D111545 --- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 5 ++--- clang/lib/Driver/ToolChains/Cuda.cpp | 10 ++++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index f282f04b7931..198e3546d4fa 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -131,9 +131,8 @@ const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand( } AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn", - SubArchName, - /* bitcode SDL?*/ true, - /* PostClang Link? */ false); + SubArchName, /*isBitCodeSDL=*/true, + /*postClangLink=*/false); // Add an intermediate output file. CmdArgs.push_back("-o"); const char *OutputFileName = diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index ee573b89bed1..7324339efaa6 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -612,8 +612,9 @@ void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(CubinF); } - AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "nvptx", GPUArch, - false, false); + AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "nvptx", + GPUArch, /*isBitCodeSDL=*/false, + /*postClangLink=*/false); // Find nvlink and pass it as "--nvlink-path=" argument of // clang-nvlink-wrapper. @@ -752,8 +753,9 @@ void CudaToolChain::addClangTargetOptions( addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix, getTriple()); - AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx", GpuArch, - /* bitcode SDL?*/ true, /* PostClang Link? */ true); + AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx", + GpuArch, /*isBitCodeSDL=*/true, + /*postClangLink=*/true); } } From 1c66691ea770c2049bbc5a27fa6e998e01a7eaf8 Mon Sep 17 00:00:00 2001 From: Egor Zhdan Date: Tue, 4 Jan 2022 12:54:17 +0100 Subject: [PATCH 507/992] [Clang][Sema] Adjust formatting (NFC) This is a preparation for another change in the watchOS/tvOS availability logic. It is extracted into a separate commit to simplify reviewing and to keep the linter happy at the same time. rdar://81491680 Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D116459 --- clang/lib/Sema/SemaDeclAttr.cpp | 58 ++++++++++++++++----------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index b6bd2e69629d..642b878e0270 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -2625,37 +2625,37 @@ static void handleAvailabilityAttr(Sema &S, Decl *D, const ParsedAttr &AL) { NewII = &S.Context.Idents.get("watchos_app_extension"); if (NewII) { - auto adjustWatchOSVersion = [](VersionTuple Version) -> VersionTuple { - if (Version.empty()) - return Version; - auto Major = Version.getMajor(); - auto NewMajor = Major >= 9 ? Major - 7 : 0; - if (NewMajor >= 2) { - if (Version.getMinor().hasValue()) { - if (Version.getSubminor().hasValue()) - return VersionTuple(NewMajor, Version.getMinor().getValue(), - Version.getSubminor().getValue()); - else - return VersionTuple(NewMajor, Version.getMinor().getValue()); - } - return VersionTuple(NewMajor); + auto adjustWatchOSVersion = [](VersionTuple Version) -> VersionTuple { + if (Version.empty()) + return Version; + auto Major = Version.getMajor(); + auto NewMajor = Major >= 9 ? Major - 7 : 0; + if (NewMajor >= 2) { + if (Version.getMinor().hasValue()) { + if (Version.getSubminor().hasValue()) + return VersionTuple(NewMajor, Version.getMinor().getValue(), + Version.getSubminor().getValue()); + else + return VersionTuple(NewMajor, Version.getMinor().getValue()); } + return VersionTuple(NewMajor); + } - return VersionTuple(2, 0); - }; + return VersionTuple(2, 0); + }; - auto NewIntroduced = adjustWatchOSVersion(Introduced.Version); - auto NewDeprecated = adjustWatchOSVersion(Deprecated.Version); - auto NewObsoleted = adjustWatchOSVersion(Obsoleted.Version); - - AvailabilityAttr *NewAttr = S.mergeAvailabilityAttr( - ND, AL, NewII, true /*Implicit*/, NewIntroduced, NewDeprecated, - NewObsoleted, IsUnavailable, Str, IsStrict, Replacement, - Sema::AMK_None, - PriorityModifier + Sema::AP_InferredFromOtherPlatform); - if (NewAttr) - D->addAttr(NewAttr); - } + auto NewIntroduced = adjustWatchOSVersion(Introduced.Version); + auto NewDeprecated = adjustWatchOSVersion(Deprecated.Version); + auto NewObsoleted = adjustWatchOSVersion(Obsoleted.Version); + + AvailabilityAttr *NewAttr = S.mergeAvailabilityAttr( + ND, AL, NewII, true /*Implicit*/, NewIntroduced, NewDeprecated, + NewObsoleted, IsUnavailable, Str, IsStrict, Replacement, + Sema::AMK_None, + PriorityModifier + Sema::AP_InferredFromOtherPlatform); + if (NewAttr) + D->addAttr(NewAttr); + } } else if (S.Context.getTargetInfo().getTriple().isTvOS()) { // Transcribe "ios" to "tvos" (and add a new attribute) if the versioning // matches before the start of the tvOS platform. @@ -2673,7 +2673,7 @@ static void handleAvailabilityAttr(Sema &S, Decl *D, const ParsedAttr &AL) { PriorityModifier + Sema::AP_InferredFromOtherPlatform); if (NewAttr) D->addAttr(NewAttr); - } + } } else if (S.Context.getTargetInfo().getTriple().getOS() == llvm::Triple::IOS && S.Context.getTargetInfo().getTriple().isMacCatalystEnvironment()) { From 95b74d4db0686a8d55fdae1af4e985ea52b2c572 Mon Sep 17 00:00:00 2001 From: luxufan <932494295@qq.com> Date: Tue, 4 Jan 2022 19:39:07 +0800 Subject: [PATCH 508/992] [JITLink] Improve extractBits function Address the advice proposed at patch D105429 . Use [Low, Low+size) to represent bits. Reviewed By: lhames Differential Revision: https://reviews.llvm.org/D107250 --- llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp index b057788ce3ef..26ec79ea50cf 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp @@ -157,8 +157,8 @@ static Expected getRISCVPCRelHi20(const Edge &E) { "No HI20 PCREL relocation type be found for LO12 PCREL relocation type"); } -static uint32_t extractBits(uint64_t Num, unsigned High, unsigned Low) { - return (Num & ((1ULL << (High + 1)) - 1)) >> Low; +static uint32_t extractBits(uint32_t Num, unsigned Low, unsigned Size) { + return (Num & (((1ULL << (Size + 1)) - 1) << Low)) >> Low; } class ELFJITLinker_riscv : public JITLinker { @@ -238,8 +238,8 @@ class ELFJITLinker_riscv : public JITLinker { int64_t Value = RelHI20->getTarget().getAddress() + RelHI20->getAddend() - E.getTarget().getAddress(); int64_t Lo = Value & 0xFFF; - uint32_t Imm31_25 = extractBits(Lo, 11, 5) << 25; - uint32_t Imm11_7 = extractBits(Lo, 4, 0) << 7; + uint32_t Imm31_25 = extractBits(Lo, 5, 7) << 25; + uint32_t Imm11_7 = extractBits(Lo, 0, 5) << 7; uint32_t RawInstr = *(little32_t *)FixupPtr; *(little32_t *)FixupPtr = (RawInstr & 0x1FFF07F) | Imm31_25 | Imm11_7; From 051847cfecaea3f55fc4f822facfbf5d21bde8dd Mon Sep 17 00:00:00 2001 From: Oleg Smolsky Date: Tue, 4 Jan 2022 07:27:02 -0500 Subject: [PATCH 509/992] Improve the 'modernize-use-default-member-init' We want to deal with non-default constructors that just happen to contain constant initializers. There was already a negative test case, it is now a positive one. We find and refactor this case: struct PositiveNotDefaultInt { PositiveNotDefaultInt(int) : i(7) {} int i; }; --- .../modernize/UseDefaultMemberInitCheck.cpp | 27 +++++++----- clang-tools-extra/docs/ReleaseNotes.rst | 3 ++ .../modernize-use-default-member-init.rst | 2 +- .../modernize-use-default-member-init.cpp | 42 ++++++++++++++++--- 4 files changed, 56 insertions(+), 18 deletions(-) diff --git a/clang-tools-extra/clang-tidy/modernize/UseDefaultMemberInitCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseDefaultMemberInitCheck.cpp index 6e7e37236b19..d57a88d66883 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseDefaultMemberInitCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseDefaultMemberInitCheck.cpp @@ -212,17 +212,14 @@ void UseDefaultMemberInitCheck::registerMatchers(MatchFinder *Finder) { InitBase); Finder->addMatcher( - cxxConstructorDecl( - isDefaultConstructor(), - forEachConstructorInitializer( - cxxCtorInitializer( - forField(unless(anyOf(getLangOpts().CPlusPlus20 - ? unless(anything()) - : isBitField(), - hasInClassInitializer(anything()), - hasParent(recordDecl(isUnion()))))), - withInitializer(Init)) - .bind("default"))), + cxxConstructorDecl(forEachConstructorInitializer( + cxxCtorInitializer( + forField(unless(anyOf( + getLangOpts().CPlusPlus20 ? unless(anything()) : isBitField(), + hasInClassInitializer(anything()), + hasParent(recordDecl(isUnion()))))), + withInitializer(Init)) + .bind("default"))), this); Finder->addMatcher( @@ -248,6 +245,14 @@ void UseDefaultMemberInitCheck::checkDefaultInit( const MatchFinder::MatchResult &Result, const CXXCtorInitializer *Init) { const FieldDecl *Field = Init->getAnyMember(); + // Check whether we have multiple hand-written constructors and bomb out, as + // it is hard to reconcile their sets of member initializers. + const auto *ClassDecl = dyn_cast(Field->getParent()); + if (llvm::count_if(ClassDecl->ctors(), [](const CXXConstructorDecl *Ctor) { + return !Ctor->isCopyOrMoveConstructor(); + }) > 1) + return; + SourceLocation StartLoc = Field->getBeginLoc(); if (StartLoc.isMacroID() && IgnoreMacros) return; diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 6f8a25e0d04f..5d330d1d2e3c 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -73,6 +73,9 @@ Improvements to clang-tidy - Added support for `NOLINTBEGIN` ... `NOLINTEND` comments to suppress Clang-Tidy warnings over multiple lines. +- Generalized the `modernize-use-default-member-init` check to handle non-default + constructors. + New checks ^^^^^^^^^^ diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize-use-default-member-init.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize-use-default-member-init.rst index a77415301ef6..2d3ed3801493 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize-use-default-member-init.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize-use-default-member-init.rst @@ -3,7 +3,7 @@ modernize-use-default-member-init ================================= -This check converts a default constructor's member initializers into the new +This check converts constructors' member initializers into the new default member initializers in C++11. Other member initializers that match the default member initializer are removed. This can reduce repeated code or allow use of '= default'. diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize-use-default-member-init.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize-use-default-member-init.cpp index 27c947820d9c..464cfbdeacb8 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize-use-default-member-init.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize-use-default-member-init.cpp @@ -45,6 +45,42 @@ struct PositiveInt { // CHECK-FIXES: int j{1}; }; +struct PositiveNotDefaultInt { + PositiveNotDefaultInt(int) : i(7) {} + // CHECK-FIXES: PositiveNotDefaultInt(int) {} + int i; + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use default member initializer for 'i' + // CHECK-FIXES: int i{7}; +}; + +// We cannot reconcile these initializers. +struct TwoConstructors { + TwoConstructors(int) : i(7) {} + TwoConstructors(int, int) : i(8) {} + int i; +}; + +struct PositiveNotDefaultOOLInt { + PositiveNotDefaultOOLInt(int); + int i; + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use default member initializer for 'i' + // CHECK-FIXES: int i{7}; +}; + +PositiveNotDefaultOOLInt::PositiveNotDefaultOOLInt(int) : i(7) {} +// CHECK-FIXES: PositiveNotDefaultOOLInt::PositiveNotDefaultOOLInt(int) {} + +struct PositiveNotDefaultOOLInt2 { + PositiveNotDefaultOOLInt2(int, int); + int i; + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use default member initializer for 'i' + // CHECK-FIXES: int i{7}; + int j; +}; + +PositiveNotDefaultOOLInt2::PositiveNotDefaultOOLInt2(int, int arg) : i(7), j(arg) {} +// CHECK-FIXES: PositiveNotDefaultOOLInt2::PositiveNotDefaultOOLInt2(int, int arg) : j(arg) {} + struct PositiveUnaryMinusInt { PositiveUnaryMinusInt() : j(-1) {} // CHECK-FIXES: PositiveUnaryMinusInt() {} @@ -234,12 +270,6 @@ struct NegativeBitField int i : 5; }; -struct NegativeNotDefaultInt -{ - NegativeNotDefaultInt(int) : i(7) {} - int i; -}; - struct NegativeDefaultArg { NegativeDefaultArg(int i = 4) : i(i) {} From 49f23afdc3453ad6834f32f69b48aa88b5d17338 Mon Sep 17 00:00:00 2001 From: Saiyedul Islam Date: Mon, 3 Jan 2022 16:45:47 +0000 Subject: [PATCH 510/992] [OpenMP] Add nec and nvidia as compiler vendors for OpenMP OpenMP Specs 5.0[1] and 5.1[2] recognizes nec and nvidia as known compiler vendors and their absence is causing compilation error in one of the vendor based metadirective test of sollve_vv project[3]. [1] https://www.openmp.org/wp-content/uploads/Context-Definitions-5.0-v1.0.pdf [2] https://www.openmp.org/wp-content/uploads/OpenMP-API-Additional-Definitions-2-0.pdf [3] https://github.com/SOLLVE/sollve_vv/blob/master/tests/5.0/metadirective/test_metadirective_arch_nvidia_or_amd.c Differential Revision: https://reviews.llvm.org/D116540 --- clang/test/OpenMP/begin_declare_variant_messages.c | 8 ++++---- clang/test/OpenMP/declare_variant_messages.c | 8 ++++---- clang/test/OpenMP/declare_variant_messages.cpp | 12 ++++++------ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def | 2 ++ 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/clang/test/OpenMP/begin_declare_variant_messages.c b/clang/test/OpenMP/begin_declare_variant_messages.c index 5922153b2445..e419ec2345d6 100644 --- a/clang/test/OpenMP/begin_declare_variant_messages.c +++ b/clang/test/OpenMP/begin_declare_variant_messages.c @@ -54,15 +54,15 @@ const int var; #pragma omp end declare variant #pragma omp begin declare variant match(implementation={vendor}) // expected-warning {{the context selector 'vendor' in context set 'implementation' requires a context property defined in parentheses; selector ignored}} expected-note {{the ignored selector spans until here}} #pragma omp end declare variant -#pragma omp begin declare variant match(implementation={vendor(}) // expected-error {{expected ')'}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} +#pragma omp begin declare variant match(implementation={vendor(}) // expected-error {{expected ')'}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'nec' 'nvidia' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} #pragma omp end declare variant -#pragma omp begin declare variant match(implementation={vendor()}) // expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'pgi' 'ti' 'unknown'}} +#pragma omp begin declare variant match(implementation={vendor()}) // expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'nec' 'nvidia' 'pgi' 'ti' 'unknown'}} #pragma omp end declare variant #pragma omp begin declare variant match(implementation={vendor(score ibm)}) // expected-error {{expected '(' after 'score'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} #pragma omp end declare variant -#pragma omp begin declare variant match(implementation={vendor(score( ibm)}) // expected-error {{use of undeclared identifier 'ibm'}} expected-error {{expected ')'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} +#pragma omp begin declare variant match(implementation={vendor(score( ibm)}) // expected-error {{use of undeclared identifier 'ibm'}} expected-error {{expected ')'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'nec' 'nvidia' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} #pragma omp end declare variant -#pragma omp begin declare variant match(implementation={vendor(score(2 ibm)}) // expected-error {{expected ')'}} expected-error {{expected ')'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{to match this '('}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} +#pragma omp begin declare variant match(implementation={vendor(score(2 ibm)}) // expected-error {{expected ')'}} expected-error {{expected ')'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{to match this '('}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'nec' 'nvidia' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} #pragma omp end declare variant #pragma omp begin declare variant match(implementation={vendor(score(foo()) ibm)}) // expected-warning {{expected '':'' after the score expression; '':'' assumed}} #pragma omp end declare variant diff --git a/clang/test/OpenMP/declare_variant_messages.c b/clang/test/OpenMP/declare_variant_messages.c index 0178ca15cabf..b53361f11b9f 100644 --- a/clang/test/OpenMP/declare_variant_messages.c +++ b/clang/test/OpenMP/declare_variant_messages.c @@ -28,11 +28,11 @@ int foo(void); #pragma omp declare variant(foo) match(xxx={vvv}) xxx // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} expected-error {{expected 'match' clause on 'omp declare variant' directive}} #pragma omp declare variant(foo) match(implementation={xxx}) // expected-warning {{'xxx' is not a valid context selector for the context set 'implementation'; selector ignored}} expected-note {{context selector options are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} expected-note {{the ignored selector spans until here}} #pragma omp declare variant(foo) match(implementation={vendor}) // expected-warning {{the context selector 'vendor' in context set 'implementation' requires a context property defined in parentheses; selector ignored}} expected-note {{the ignored selector spans until here}} -#pragma omp declare variant(foo) match(implementation={vendor(}) // expected-error {{expected ')'}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} -#pragma omp declare variant(foo) match(implementation={vendor()}) // expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'pgi' 'ti' 'unknown'}} +#pragma omp declare variant(foo) match(implementation={vendor(}) // expected-error {{expected ')'}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'nec' 'nvidia' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} +#pragma omp declare variant(foo) match(implementation={vendor()}) // expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'nec' 'nvidia' 'pgi' 'ti' 'unknown'}} #pragma omp declare variant(foo) match(implementation={vendor(score ibm)}) // expected-error {{expected '(' after 'score'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} -#pragma omp declare variant(foo) match(implementation={vendor(score( ibm)}) // expected-error {{use of undeclared identifier 'ibm'}} expected-error {{expected ')'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} -#pragma omp declare variant(foo) match(implementation={vendor(score(2 ibm)}) // expected-error {{expected ')'}} expected-error {{expected ')'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{to match this '('}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} +#pragma omp declare variant(foo) match(implementation={vendor(score( ibm)}) // expected-error {{use of undeclared identifier 'ibm'}} expected-error {{expected ')'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'nec' 'nvidia' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} +#pragma omp declare variant(foo) match(implementation={vendor(score(2 ibm)}) // expected-error {{expected ')'}} expected-error {{expected ')'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{to match this '('}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'nec' 'nvidia' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} #pragma omp declare variant(foo) match(implementation={vendor(score(foo()) ibm)}) // expected-warning {{expected '':'' after the score expression; '':'' assumed}} expected-warning {{score expressions in the OpenMP context selector need to be constant; foo() is not and will be ignored}} #pragma omp declare variant(foo) match(implementation={vendor(score(5): ibm), vendor(llvm)}) // expected-warning {{the context selector 'vendor' was used already in the same 'omp declare variant' directive; selector ignored}} expected-note {{the previous context selector 'vendor' used here}} expected-note {{the ignored selector spans until here}} #pragma omp declare variant(foo) match(implementation={vendor(score(5): ibm), kind(cpu)}) // expected-warning {{the context selector 'kind' is not valid for the context set 'implementation'; selector ignored}} expected-note {{the context selector 'kind' can be nested in the context set 'device'; try 'match(device={kind(property)})'}} expected-note {{the ignored selector spans until here}} diff --git a/clang/test/OpenMP/declare_variant_messages.cpp b/clang/test/OpenMP/declare_variant_messages.cpp index 9671df189e31..c93fa4f91159 100644 --- a/clang/test/OpenMP/declare_variant_messages.cpp +++ b/clang/test/OpenMP/declare_variant_messages.cpp @@ -31,11 +31,11 @@ T foofoo(); #pragma omp declare variant(foofoo ) match(implementation = {vvv}) implementation // expected-warning {{'vvv' is not a valid context selector for the context set 'implementation'; selector ignored}} expected-note {{context selector options are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} expected-note {{the ignored selector spans until here}} expected-error {{expected 'match' clause on 'omp declare variant' directive}} #pragma omp declare variant(foofoo ) match(implementation={xxx}) // expected-warning {{'xxx' is not a valid context selector for the context set 'implementation'; selector ignored}} expected-note {{context selector options are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} expected-note {{the ignored selector spans until here}} #pragma omp declare variant(foofoo ) match(implementation={vendor}) // expected-warning {{the context selector 'vendor' in context set 'implementation' requires a context property defined in parentheses; selector ignored}} expected-note {{the ignored selector spans until here}} -#pragma omp declare variant(foofoo ) match(implementation={vendor(}) // expected-error {{expected ')'}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} -#pragma omp declare variant(foofoo ) match(implementation={vendor()}) // expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'pgi' 'ti' 'unknown'}} +#pragma omp declare variant(foofoo ) match(implementation={vendor(}) // expected-error {{expected ')'}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'nec' 'nvidia' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} +#pragma omp declare variant(foofoo ) match(implementation={vendor()}) // expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'nec' 'nvidia' 'pgi' 'ti' 'unknown'}} #pragma omp declare variant(foofoo ) match(implementation={vendor(score ibm)}) // expected-error {{expected '(' after 'score'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} -#pragma omp declare variant(foofoo ) match(implementation={vendor(score( ibm)}) // expected-error {{use of undeclared identifier 'ibm'}} expected-error {{expected ')'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} -#pragma omp declare variant(foofoo ) match(implementation={vendor(score(2 ibm)}) // expected-error {{expected ')'}} expected-error {{expected ')'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{to match this '('}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} +#pragma omp declare variant(foofoo ) match(implementation={vendor(score( ibm)}) // expected-error {{use of undeclared identifier 'ibm'}} expected-error {{expected ')'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'nec' 'nvidia' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} +#pragma omp declare variant(foofoo ) match(implementation={vendor(score(2 ibm)}) // expected-error {{expected ')'}} expected-error {{expected ')'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{to match this '('}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'nec' 'nvidia' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} #pragma omp declare variant(foofoo ) match(implementation={vendor(score(foofoo ()) ibm)}) // expected-warning {{expected '':'' after the score expression; '':'' assumed}} expected-warning {{score expressions in the OpenMP context selector need to be constant; foofoo() is not and will be ignored}} #pragma omp declare variant(foofoo ) match(implementation={vendor(score(5): ibm), vendor(llvm)}) // expected-warning {{the context selector 'vendor' was used already in the same 'omp declare variant' directive; selector ignored}} expected-note {{the previous context selector 'vendor' used here}} expected-note {{the ignored selector spans until here}} #pragma omp declare variant(foofoo ) match(implementation={vendor(score(5): ibm), kind(cpu)}) // expected-warning {{the context selector 'kind' is not valid for the context set 'implementation'; selector ignored}} expected-note {{the context selector 'kind' can be nested in the context set 'device'; try 'match(device={kind(property)})'}} expected-note {{the ignored selector spans until here}} @@ -74,8 +74,8 @@ int bar(); #pragma omp declare variant(foofoo ) match(implementation = {vvv} implementation) // expected-error {{expected ')'}} expected-warning {{'vvv' is not a valid context selector for the context set 'implementation'; selector ignored}} expected-note {{context selector options are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} expected-note {{the ignored selector spans until here}} expected-note {{to match this '('}} #pragma omp declare variant(foofoo ) match(implementation = {vvv}) xxx // expected-warning {{'vvv' is not a valid context selector for the context set 'implementation'; selector ignored}} expected-note {{context selector options are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} expected-note {{the ignored selector spans until here}} expected-error {{expected 'match' clause on 'omp declare variant' directive}} #pragma omp declare variant(foofoo ) match(implementation={vendor(score ibm)}) // expected-error {{expected '(' after 'score'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} -#pragma omp declare variant(foofoo ) match(implementation={vendor(score( ibm)}) // expected-error {{use of undeclared identifier 'ibm'}} expected-error {{expected ')'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} -#pragma omp declare variant(foofoo ) match(implementation={vendor(score(C ibm)}) // expected-error {{expected ')'}} expected-error {{expected ')'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{to match this '('}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} +#pragma omp declare variant(foofoo ) match(implementation={vendor(score( ibm)}) // expected-error {{use of undeclared identifier 'ibm'}} expected-error {{expected ')'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'nec' 'nvidia' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} +#pragma omp declare variant(foofoo ) match(implementation={vendor(score(C ibm)}) // expected-error {{expected ')'}} expected-error {{expected ')'}} expected-warning {{expected '':'' after the score expression; '':'' assumed}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{to match this '('}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'nec' 'nvidia' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}} #pragma omp declare variant(foofoo ) match(implementation={vendor(score(foofoo ()) ibm)}) // expected-warning {{expected '':'' after the score expression; '':'' assumed}} #pragma omp declare variant(foofoo ) match(implementation={vendor(score(C+5): ibm), vendor(llvm)}) // expected-warning {{the context selector 'vendor' was used already in the same 'omp declare variant' directive; selector ignored}} expected-note {{the previous context selector 'vendor' used here}} expected-note {{the ignored selector spans until here}} #pragma omp declare variant(foofoo ) match(implementation={vendor(score(5): ibm), kind(cpu)}) // expected-warning {{the context selector 'kind' is not valid for the context set 'implementation'; selector ignored}} expected-note {{the context selector 'kind' can be nested in the context set 'device'; try 'match(device={kind(property)})'}} expected-note {{the ignored selector spans until here}} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 08bf5981cdc3..5e82925f1b83 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -1130,6 +1130,8 @@ __OMP_TRAIT_PROPERTY(implementation, vendor, gnu) __OMP_TRAIT_PROPERTY(implementation, vendor, ibm) __OMP_TRAIT_PROPERTY(implementation, vendor, intel) __OMP_TRAIT_PROPERTY(implementation, vendor, llvm) +__OMP_TRAIT_PROPERTY(implementation, vendor, nec) +__OMP_TRAIT_PROPERTY(implementation, vendor, nvidia) __OMP_TRAIT_PROPERTY(implementation, vendor, pgi) __OMP_TRAIT_PROPERTY(implementation, vendor, ti) __OMP_TRAIT_PROPERTY(implementation, vendor, unknown) From f552ba6e84057cad56e91e7c54170a60349d3330 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 4 Jan 2022 13:44:01 +0000 Subject: [PATCH 511/992] Revert "[Clang] Extend emitUnaryBuiltin to avoid duplicate logic." This reverts commit 5c57e6aa5777bddf9ddaca5d927f1b47a1a9d381. Reverted due to a typo in the authors name. Will recommit soon with fixed authorship. --- clang/lib/CodeGen/CGBuiltin.cpp | 89 ++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 40 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c1541ff0c846..1982b40ff667 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -532,13 +532,13 @@ static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, // Emit a simple mangled intrinsic that has 1 argument and a return type // matching the argument type. -static Value *emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, - unsigned IntrinsicID, - llvm::StringRef Name = "") { +static Value *emitUnaryBuiltin(CodeGenFunction &CGF, + const CallExpr *E, + unsigned IntrinsicID) { llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); - return CGF.Builder.CreateCall(F, Src0, Name); + return CGF.Builder.CreateCall(F, Src0); } // Emit an intrinsic that has 2 operands of the same type as its result. @@ -3122,24 +3122,23 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_elementwise_abs: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Result; - QualType QT = E->getArg(0)->getType(); - - if (auto *VecTy = QT->getAs()) - QT = VecTy->getElementType(); - if (QT->isIntegerType()) + if (Op0->getType()->isIntOrIntVectorTy()) Result = Builder.CreateBinaryIntrinsic( - llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)), - Builder.getFalse(), nullptr, "elt.abs"); + llvm::Intrinsic::abs, Op0, Builder.getFalse(), nullptr, "elt.abs"); else - Result = emitUnaryBuiltin(*this, E, llvm::Intrinsic::fabs, "elt.abs"); - + Result = Builder.CreateUnaryIntrinsic(llvm::Intrinsic::fabs, Op0, nullptr, + "elt.abs"); return RValue::get(Result); } - case Builtin::BI__builtin_elementwise_ceil: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil")); + case Builtin::BI__builtin_elementwise_ceil: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Result = Builder.CreateUnaryIntrinsic(llvm::Intrinsic::ceil, Op0, + nullptr, "elt.ceil"); + return RValue::get(Result); + } case Builtin::BI__builtin_elementwise_max: { Value *Op0 = EmitScalarExpr(E->getArg(0)); @@ -3175,39 +3174,49 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_reduce_max: { - auto GetIntrinsicID = [](QualType QT) { - if (auto *VecTy = QT->getAs()) - QT = VecTy->getElementType(); - if (QT->isSignedIntegerType()) - return llvm::Intrinsic::vector_reduce_smax; - if (QT->isUnsignedIntegerType()) - return llvm::Intrinsic::vector_reduce_umax; - assert(QT->isFloatingType() && "must have a float here"); + auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) { + if (IrTy->isIntOrIntVectorTy()) { + if (auto *VecTy = QT->getAs()) + QT = VecTy->getElementType(); + if (QT->isSignedIntegerType()) + return llvm::Intrinsic::vector_reduce_smax; + else + return llvm::Intrinsic::vector_reduce_umax; + } return llvm::Intrinsic::vector_reduce_fmax; }; - return RValue::get(emitUnaryBuiltin( - *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Result = Builder.CreateUnaryIntrinsic( + GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr, + "rdx.min"); + return RValue::get(Result); } case Builtin::BI__builtin_reduce_min: { - auto GetIntrinsicID = [](QualType QT) { - if (auto *VecTy = QT->getAs()) - QT = VecTy->getElementType(); - if (QT->isSignedIntegerType()) - return llvm::Intrinsic::vector_reduce_smin; - if (QT->isUnsignedIntegerType()) - return llvm::Intrinsic::vector_reduce_umin; - assert(QT->isFloatingType() && "must have a float here"); + auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) { + if (IrTy->isIntOrIntVectorTy()) { + if (auto *VecTy = QT->getAs()) + QT = VecTy->getElementType(); + if (QT->isSignedIntegerType()) + return llvm::Intrinsic::vector_reduce_smin; + else + return llvm::Intrinsic::vector_reduce_umin; + } return llvm::Intrinsic::vector_reduce_fmin; }; - - return RValue::get(emitUnaryBuiltin( - *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Result = Builder.CreateUnaryIntrinsic( + GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr, + "rdx.min"); + return RValue::get(Result); } - case Builtin::BI__builtin_reduce_xor: - return RValue::get(emitUnaryBuiltin( - *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor")); + case Builtin::BI__builtin_reduce_xor: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Result = Builder.CreateUnaryIntrinsic( + llvm::Intrinsic::vector_reduce_xor, Op0, nullptr, "rdx.xor"); + return RValue::get(Result); + } case Builtin::BI__builtin_matrix_transpose: { const auto *MatrixTy = E->getArg(0)->getType()->getAs(); From 82020de532108969294abd47991c8a08bbee1737 Mon Sep 17 00:00:00 2001 From: Jun Zhang Date: Tue, 4 Jan 2022 13:46:16 +0000 Subject: [PATCH 512/992] Recommit "[Clang] Extend emitUnaryBuiltin to avoid duplicate logic."" This reverts the revert commit f552ba6e84057cad56e91e7c54170a60349d3330. Recommit with fixed author name. --- clang/lib/CodeGen/CGBuiltin.cpp | 89 +++++++++++++++------------------ 1 file changed, 40 insertions(+), 49 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 1982b40ff667..c1541ff0c846 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -532,13 +532,13 @@ static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, // Emit a simple mangled intrinsic that has 1 argument and a return type // matching the argument type. -static Value *emitUnaryBuiltin(CodeGenFunction &CGF, - const CallExpr *E, - unsigned IntrinsicID) { +static Value *emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, + unsigned IntrinsicID, + llvm::StringRef Name = "") { llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); - return CGF.Builder.CreateCall(F, Src0); + return CGF.Builder.CreateCall(F, Src0, Name); } // Emit an intrinsic that has 2 operands of the same type as its result. @@ -3122,24 +3122,25 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_elementwise_abs: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Result; - if (Op0->getType()->isIntOrIntVectorTy()) + QualType QT = E->getArg(0)->getType(); + + if (auto *VecTy = QT->getAs()) + QT = VecTy->getElementType(); + if (QT->isIntegerType()) Result = Builder.CreateBinaryIntrinsic( - llvm::Intrinsic::abs, Op0, Builder.getFalse(), nullptr, "elt.abs"); + llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)), + Builder.getFalse(), nullptr, "elt.abs"); else - Result = Builder.CreateUnaryIntrinsic(llvm::Intrinsic::fabs, Op0, nullptr, - "elt.abs"); - return RValue::get(Result); - } + Result = emitUnaryBuiltin(*this, E, llvm::Intrinsic::fabs, "elt.abs"); - case Builtin::BI__builtin_elementwise_ceil: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Result = Builder.CreateUnaryIntrinsic(llvm::Intrinsic::ceil, Op0, - nullptr, "elt.ceil"); return RValue::get(Result); } + case Builtin::BI__builtin_elementwise_ceil: + return RValue::get( + emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil")); + case Builtin::BI__builtin_elementwise_max: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); @@ -3174,50 +3175,40 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_reduce_max: { - auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) { - if (IrTy->isIntOrIntVectorTy()) { - if (auto *VecTy = QT->getAs()) - QT = VecTy->getElementType(); - if (QT->isSignedIntegerType()) - return llvm::Intrinsic::vector_reduce_smax; - else - return llvm::Intrinsic::vector_reduce_umax; - } + auto GetIntrinsicID = [](QualType QT) { + if (auto *VecTy = QT->getAs()) + QT = VecTy->getElementType(); + if (QT->isSignedIntegerType()) + return llvm::Intrinsic::vector_reduce_smax; + if (QT->isUnsignedIntegerType()) + return llvm::Intrinsic::vector_reduce_umax; + assert(QT->isFloatingType() && "must have a float here"); return llvm::Intrinsic::vector_reduce_fmax; }; - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Result = Builder.CreateUnaryIntrinsic( - GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr, - "rdx.min"); - return RValue::get(Result); + return RValue::get(emitUnaryBuiltin( + *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); } case Builtin::BI__builtin_reduce_min: { - auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) { - if (IrTy->isIntOrIntVectorTy()) { - if (auto *VecTy = QT->getAs()) - QT = VecTy->getElementType(); - if (QT->isSignedIntegerType()) - return llvm::Intrinsic::vector_reduce_smin; - else - return llvm::Intrinsic::vector_reduce_umin; - } + auto GetIntrinsicID = [](QualType QT) { + if (auto *VecTy = QT->getAs()) + QT = VecTy->getElementType(); + if (QT->isSignedIntegerType()) + return llvm::Intrinsic::vector_reduce_smin; + if (QT->isUnsignedIntegerType()) + return llvm::Intrinsic::vector_reduce_umin; + assert(QT->isFloatingType() && "must have a float here"); return llvm::Intrinsic::vector_reduce_fmin; }; - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Result = Builder.CreateUnaryIntrinsic( - GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr, - "rdx.min"); - return RValue::get(Result); - } - case Builtin::BI__builtin_reduce_xor: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Result = Builder.CreateUnaryIntrinsic( - llvm::Intrinsic::vector_reduce_xor, Op0, nullptr, "rdx.xor"); - return RValue::get(Result); + return RValue::get(emitUnaryBuiltin( + *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); } + case Builtin::BI__builtin_reduce_xor: + return RValue::get(emitUnaryBuiltin( + *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor")); + case Builtin::BI__builtin_matrix_transpose: { const auto *MatrixTy = E->getArg(0)->getType()->getAs(); Value *MatValue = EmitScalarExpr(E->getArg(0)); From 882c083889e6d56231e0efc59080376b2c96698a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 31 Dec 2021 23:15:53 +0000 Subject: [PATCH 513/992] [DAG] TargetLowering::SimplifySetCC - use APInt::getMinSignedBits() helper. NFC. --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 2781c760d297..9cca6c895196 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3678,9 +3678,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } // Figure out how many bits we need to preserve this constant. - unsigned ReqdBits = Signed ? - C1.getBitWidth() - C1.getNumSignBits() + 1 : - C1.getActiveBits(); + unsigned ReqdBits = Signed ? C1.getMinSignedBits() : C1.getActiveBits(); // Make sure we're not losing bits from the constant. if (MinBits > 0 && From 0a07c9662e67124b00e375aa4a395998d218b220 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Mon, 20 Dec 2021 14:36:27 +0100 Subject: [PATCH 514/992] [lldb/python] Fix dangling Event and CommandReturnObject references Unlike the rest of our SB objects, SBEvent and SBCommandReturnObject have the ability to hold non-owning pointers to their non-SB counterparts. This makes it hard to ensure the SB objects do not become dangling once their backing object goes away. While we could make these two objects behave like others, that would require plubming even more shared pointers through our internal code (Event objects are mostly prepared for it, CommandReturnObject are not). Doing so seems unnecessarily disruptive, given that (unlike for some of the other objects) I don't see any good reason why would someone want to hold onto these objects after the function terminates. For that reason, this patch implements a different approach -- the SB objects will still hold non-owning pointers, but they will be reset to the empty/default state as soon as the function terminates. This python code will not crash if the user decides to store these objects -- but the objects themselves will be useless/empty. Differential Revision: https://reviews.llvm.org/D116162 --- lldb/bindings/python/python-swigsafecast.swig | 43 +++++++++++++++---- lldb/bindings/python/python-wrapper.swig | 19 +++----- .../command/script/TestCommandScript.py | 10 +++++ .../commands/command/script/persistence.py | 4 +- 4 files changed, 54 insertions(+), 22 deletions(-) diff --git a/lldb/bindings/python/python-swigsafecast.swig b/lldb/bindings/python/python-swigsafecast.swig index 7d639e664f53..eb684133abef 100644 --- a/lldb/bindings/python/python-swigsafecast.swig +++ b/lldb/bindings/python/python-swigsafecast.swig @@ -1,19 +1,32 @@ namespace lldb_private { namespace python { -PyObject *SBTypeToSWIGWrapper(lldb::SBEvent &event_sb) { - return SWIG_NewPointerObj(&event_sb, SWIGTYPE_p_lldb__SBEvent, 0); -} - -PyObject *SBTypeToSWIGWrapper(lldb::SBCommandReturnObject &cmd_ret_obj_sb) { - return SWIG_NewPointerObj(&cmd_ret_obj_sb, - SWIGTYPE_p_lldb__SBCommandReturnObject, 0); -} - PythonObject ToSWIGHelper(void *obj, swig_type_info *info) { return {PyRefType::Owned, SWIG_NewPointerObj(obj, info, SWIG_POINTER_OWN)}; } +/// A class that automatically clears an SB object when it goes out of scope. +/// Use for cases where the SB object points to a temporary/unowned entity. +template class ScopedPythonObject : PythonObject { +public: + ScopedPythonObject(T *sb, swig_type_info *info) + : PythonObject(ToSWIGHelper(sb, info)), m_sb(sb) {} + ~ScopedPythonObject() { + if (m_sb) + *m_sb = T(); + } + ScopedPythonObject(ScopedPythonObject &&rhs) + : PythonObject(std::move(rhs)), m_sb(std::exchange(rhs.m_sb, nullptr)) {} + ScopedPythonObject(const ScopedPythonObject &) = delete; + ScopedPythonObject &operator=(const ScopedPythonObject &) = delete; + ScopedPythonObject &operator=(ScopedPythonObject &&) = delete; + + const PythonObject &obj() const { return *this; } + +private: + T *m_sb; +}; + PythonObject ToSWIGWrapper(std::unique_ptr value_sb) { return ToSWIGHelper(value_sb.release(), SWIGTYPE_p_lldb__SBValue); } @@ -94,5 +107,17 @@ PythonObject ToSWIGWrapper(const SymbolContext &sym_ctx) { SWIGTYPE_p_lldb__SBSymbolContext); } +ScopedPythonObject +ToSWIGWrapper(CommandReturnObject &cmd_retobj) { + return ScopedPythonObject( + new lldb::SBCommandReturnObject(cmd_retobj), + SWIGTYPE_p_lldb__SBCommandReturnObject); +} + +ScopedPythonObject ToSWIGWrapper(Event *event) { + return ScopedPythonObject(new lldb::SBEvent(event), + SWIGTYPE_p_lldb__SBEvent); +} + } // namespace python } // namespace lldb_private diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig index a2c1f756a0a2..4f1d65200b10 100644 --- a/lldb/bindings/python/python-wrapper.swig +++ b/lldb/bindings/python/python-wrapper.swig @@ -376,9 +376,8 @@ bool lldb_private::LLDBSWIGPythonCallThreadPlan( PythonObject result; if (event != nullptr) { - lldb::SBEvent sb_event(event); - PythonObject event_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_event)); - result = pfunc(event_arg); + ScopedPythonObject event_arg = ToSWIGWrapper(event); + result = pfunc(event_arg.obj()); } else result = pfunc(); @@ -795,7 +794,6 @@ bool lldb_private::LLDBSwigPythonCallCommand( lldb::DebuggerSP debugger, const char *args, lldb_private::CommandReturnObject &cmd_retobj, lldb::ExecutionContextRefSP exe_ctx_ref_sp) { - lldb::SBCommandReturnObject cmd_retobj_sb(cmd_retobj); PyErr_Cleaner py_err_cleaner(true); auto dict = PythonModule::MainModule().ResolveName( @@ -812,14 +810,13 @@ bool lldb_private::LLDBSwigPythonCallCommand( return false; } PythonObject debugger_arg = ToSWIGWrapper(std::move(debugger)); - PythonObject cmd_retobj_arg(PyRefType::Owned, - SBTypeToSWIGWrapper(cmd_retobj_sb)); + auto cmd_retobj_arg = ToSWIGWrapper(cmd_retobj); if (argc.get().max_positional_args < 5u) - pfunc(debugger_arg, PythonString(args), cmd_retobj_arg, dict); + pfunc(debugger_arg, PythonString(args), cmd_retobj_arg.obj(), dict); else pfunc(debugger_arg, PythonString(args), - ToSWIGWrapper(std::move(exe_ctx_ref_sp)), cmd_retobj_arg, dict); + ToSWIGWrapper(std::move(exe_ctx_ref_sp)), cmd_retobj_arg.obj(), dict); return true; } @@ -828,7 +825,6 @@ bool lldb_private::LLDBSwigPythonCallCommandObject( PyObject *implementor, lldb::DebuggerSP debugger, const char *args, lldb_private::CommandReturnObject &cmd_retobj, lldb::ExecutionContextRefSP exe_ctx_ref_sp) { - lldb::SBCommandReturnObject cmd_retobj_sb(cmd_retobj); PyErr_Cleaner py_err_cleaner(true); @@ -838,11 +834,10 @@ bool lldb_private::LLDBSwigPythonCallCommandObject( if (!pfunc.IsAllocated()) return false; - PythonObject cmd_retobj_arg(PyRefType::Owned, - SBTypeToSWIGWrapper(cmd_retobj_sb)); + auto cmd_retobj_arg = ToSWIGWrapper(cmd_retobj); pfunc(ToSWIGWrapper(std::move(debugger)), PythonString(args), - ToSWIGWrapper(exe_ctx_ref_sp), cmd_retobj_arg); + ToSWIGWrapper(exe_ctx_ref_sp), cmd_retobj_arg.obj()); return true; } diff --git a/lldb/test/API/commands/command/script/TestCommandScript.py b/lldb/test/API/commands/command/script/TestCommandScript.py index 33e6a00a404f..eed36c95ec32 100644 --- a/lldb/test/API/commands/command/script/TestCommandScript.py +++ b/lldb/test/API/commands/command/script/TestCommandScript.py @@ -167,7 +167,17 @@ def cleanup(): self.runCmd('bug11569', check=False) def test_persistence(self): + """ + Ensure that function arguments meaningfully persist (and do not crash!) + even after the function terminates. + """ self.runCmd("command script import persistence.py") self.runCmd("command script add -f persistence.save_debugger save_debugger") self.expect("save_debugger", substrs=[str(self.dbg)]) + + # After the command completes, the debugger object should still be + # valid. self.expect("script str(persistence.debugger_copy)", substrs=[str(self.dbg)]) + # The result object will be replaced by an empty result object (in the + # "Started" state). + self.expect("script str(persistence.result_copy)", substrs=["Started"]) diff --git a/lldb/test/API/commands/command/script/persistence.py b/lldb/test/API/commands/command/script/persistence.py index bc08b4f4dcf4..5c0be34a7d18 100644 --- a/lldb/test/API/commands/command/script/persistence.py +++ b/lldb/test/API/commands/command/script/persistence.py @@ -1,9 +1,11 @@ import lldb debugger_copy = None +result_copy = None def save_debugger(debugger, command, context, result, internal_dict): - global debugger_copy + global debugger_copy, result_copy debugger_copy = debugger + result_copy = result result.AppendMessage(str(debugger)) result.SetStatus(lldb.eReturnStatusSuccessFinishResult) From 4c2aba999e54270871d5ff8f6c765e8b834dfe27 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Tue, 4 Jan 2022 14:50:37 +0100 Subject: [PATCH 515/992] [VP][ISel] use LEGALPOS for legalization action Use the VPIntrinsics.def's LEGALPOS that is specified with every VP SDNode to determine which return or operand value type shall be used to infer the legalization action. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D116594 --- llvm/include/llvm/IR/VPIntrinsics.def | 6 +++--- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 9 ++++++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def index 121c8bbc6c27..3b360d8d1506 100644 --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -39,9 +39,9 @@ // same name. Since the operands are also the same, we open the property // scopes for both the VPIntrinsic and the SDNode at once. // \p VPSD The SelectionDAG Node id (eg VP_ADD). -// \p LEGALPOS The operand position of the SDNode that is used for legalizing -// this SDNode. This can be `-1`, in which case the return type of -// the SDNode is used. +// \p LEGALPOS The operand position of the SDNode that is used for legalizing. +// If LEGALPOS < 0, then the return type given by +// TheNode->getValueType(-1-LEGALPOS) is used. // \p TDNAME The name of the TableGen definition of this SDNode. // \p MASKPOS The mask operand position. // \p EVLPOS The explicit vector length operand position. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 96c5a79cf995..cbb28863850f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -350,7 +350,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::CTPOP: case ISD::SELECT: case ISD::VSELECT: - case ISD::VP_SELECT: case ISD::SELECT_CC: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: @@ -459,6 +458,14 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; } + +#define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \ + case ISD::VPID: { \ + EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \ + : Node->getOperand(LEGALPOS).getValueType(); \ + Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \ + } break; +#include "llvm/IR/VPIntrinsics.def" } LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); From 21d299172e23a37fc251ce69dc31032a6a4a2bfe Mon Sep 17 00:00:00 2001 From: Kiran Chandramohan Date: Tue, 4 Jan 2022 13:43:40 +0000 Subject: [PATCH 516/992] [Flang] Enable support for conversion of recursive record types Uses the recursive type conversion implemented in D113579, D113580. Tests check for recursive and mutually recursive types. Note: The downstream implementation for recursive types is a bit old and is based on a static map. This was removed while upstreaming (https://reviews.llvm.org/D112961) based on review comments. Since the recursive type conversion is now available in MLIR we are using that. If this patch is accepted we can use the same in the downstream implementation. Part of upstreaming flang from fir-dev branch of https://github.com/flang-compiler/f18-llvm-project. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D115937 Co-authored-by: Eric Schweitz --- flang/lib/Optimizer/CodeGen/TypeConverter.h | 21 +++++++++++++++------ flang/test/Fir/recursive-type.fir | 19 +++++++++++++++++++ 2 files changed, 34 insertions(+), 6 deletions(-) create mode 100644 flang/test/Fir/recursive-type.fir diff --git a/flang/lib/Optimizer/CodeGen/TypeConverter.h b/flang/lib/Optimizer/CodeGen/TypeConverter.h index 91e8ebfea7d7..d96b752be1b8 100644 --- a/flang/lib/Optimizer/CodeGen/TypeConverter.h +++ b/flang/lib/Optimizer/CodeGen/TypeConverter.h @@ -80,8 +80,10 @@ class LLVMTypeConverter : public mlir::LLVMTypeConverter { }); addConversion( [&](fir::PointerType pointer) { return convertPointerLike(pointer); }); - addConversion( - [&](fir::RecordType derived) { return convertRecordType(derived); }); + addConversion([&](fir::RecordType derived, SmallVectorImpl &results, + ArrayRef callStack) { + return convertRecordType(derived, results, callStack); + }); addConversion([&](fir::FieldType field) { // Convert to i32 because of LLVM GEP indexing restriction. return mlir::IntegerType::get(field.getContext(), 32); @@ -127,16 +129,23 @@ class LLVMTypeConverter : public mlir::LLVMTypeConverter { mlir::Type indexType() { return mlir::IntegerType::get(&getContext(), 64); } // fir.type --> llvm<"%name = { ty... }"> - mlir::Type convertRecordType(fir::RecordType derived) { + llvm::Optional + convertRecordType(fir::RecordType derived, SmallVectorImpl &results, + ArrayRef callStack) { auto name = derived.getName(); auto st = mlir::LLVM::LLVMStructType::getIdentified(&getContext(), name); + if (llvm::count(callStack, derived) > 1) { + results.push_back(st); + return success(); + } llvm::SmallVector members; for (auto mem : derived.getTypeList()) { members.push_back(convertType(mem.second).cast()); } - if (mlir::succeeded(st.setBody(members, /*isPacked=*/false))) - return st; - return mlir::Type(); + if (mlir::failed(st.setBody(members, /*isPacked=*/false))) + return failure(); + results.push_back(st); + return success(); } // Is an extended descriptor needed given the element type of a fir.box type ? diff --git a/flang/test/Fir/recursive-type.fir b/flang/test/Fir/recursive-type.fir new file mode 100644 index 000000000000..bd97f9e6225e --- /dev/null +++ b/flang/test/Fir/recursive-type.fir @@ -0,0 +1,19 @@ +// Test lowering FIR to LLVM IR for recursive types + +// RUN: fir-opt --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" %s | FileCheck %s +// RUN: fir-opt --fir-to-llvm-ir="target=aarch64-unknown-linux-gnu" %s | FileCheck %s +// RUN: fir-opt --fir-to-llvm-ir="target=i386-unknown-linux-gnu" %s | FileCheck %s +// RUN: fir-opt --fir-to-llvm-ir="target=powerpc64le-unknown-linux-gn" %s | FileCheck %s + +!t1 = type !fir.type>}> +!t2 = type !fir.type>,b3:i32,b4:!fir.ptr>}> +!t3 = type !fir.type>}> +!t4 = type !fir.type>}> + +// CHECK-LABEL: llvm.func @recursiveTypes +// CHECK-SAME: %{{.*}}: !llvm.struct<"[[T1:.*]]", (ptr>)> +// CHECK-SAME: %{{.*}}: !llvm.struct<"[[T2:.*]]", (f32, ptr>, i32, ptr>)> +// CHECK-SAME: %{{.*}}: !llvm.struct<"[[T3:.*]]", (ptr>)>>)>, %{{.*}}: !llvm.struct<"[[T4]]", (ptr>)>>)>) +func @recursiveTypes(%a : !t1, %b : !t2, %c : !t3, %d : !t4) { + return +} From 73205feb00f57e8a976c385b1e7523683ec26d00 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 4 Jan 2022 14:55:32 +0100 Subject: [PATCH 517/992] [CodeGen] Regenerate test checks (NFC) Switch these tests to use update_cc_test_checks.py to simplify future updates. --- clang/test/CodeGen/clear_cache.c | 9 +- clang/test/CodeGenCXX/for-range.cpp | 189 +++++++++++++++++----------- 2 files changed, 121 insertions(+), 77 deletions(-) diff --git a/clang/test/CodeGen/clear_cache.c b/clang/test/CodeGen/clear_cache.c index 7bbcc03e21ca..e50951e18818 100644 --- a/clang/test/CodeGen/clear_cache.c +++ b/clang/test/CodeGen/clear_cache.c @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s char buffer[32] = "This is a largely unused buffer"; @@ -5,8 +6,14 @@ char buffer[32] = "This is a largely unused buffer"; // __builtin___clear_cache always maps to @llvm.clear_cache, but what // each back-end produces is different, and this is tested in LLVM +// CHECK-LABEL: @main( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// CHECK-NEXT: call void @llvm.clear_cache(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i64 0, i64 0), i8* getelementptr inbounds (i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i64 0, i64 0), i64 32)) +// CHECK-NEXT: ret i32 0 +// int main() { __builtin___clear_cache(buffer, buffer+32); -// CHECK: @llvm.clear_cache(i8* getelementptr inbounds ({{.*}}, i8* getelementptr inbounds (i8, i8* getelementptr inbounds ({{.*}} 32)) return 0; } diff --git a/clang/test/CodeGenCXX/for-range.cpp b/clang/test/CodeGenCXX/for-range.cpp index 98265074adf7..c3b6eaa3eafc 100644 --- a/clang/test/CodeGenCXX/for-range.cpp +++ b/clang/test/CodeGenCXX/for-range.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -std=c++11 -emit-llvm -o - %s | opt -instnamer -S | FileCheck %s +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++11 -emit-llvm -o - %s | FileCheck %s struct A { A(); @@ -32,95 +33,131 @@ B *end(C&); extern B array[5]; -// CHECK-LABEL: define{{.*}} void @_Z9for_arrayv( +// CHECK-LABEL: @_Z9for_arrayv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 1 +// CHECK-NEXT: [[__RANGE1:%.*]] = alloca [5 x %struct.B]*, align 8 +// CHECK-NEXT: [[__BEGIN1:%.*]] = alloca %struct.B*, align 8 +// CHECK-NEXT: [[__END1:%.*]] = alloca %struct.B*, align 8 +// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_B:%.*]], align 1 +// CHECK-NEXT: call void @_ZN1AC1Ev(%struct.A* nonnull align 1 dereferenceable(1) [[A]]) +// CHECK-NEXT: store [5 x %struct.B]* @array, [5 x %struct.B]** [[__RANGE1]], align 8 +// CHECK-NEXT: store %struct.B* getelementptr inbounds ([5 x %struct.B], [5 x %struct.B]* @array, i64 0, i64 0), %struct.B** [[__BEGIN1]], align 8 +// CHECK-NEXT: store %struct.B* getelementptr inbounds ([[STRUCT_B]], %struct.B* getelementptr inbounds ([5 x %struct.B], [5 x %struct.B]* @array, i64 0, i64 0), i64 5), %struct.B** [[__END1]], align 8 +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load %struct.B*, %struct.B** [[__END1]], align 8 +// CHECK-NEXT: [[CMP:%.*]] = icmp ne %struct.B* [[TMP0]], [[TMP1]] +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK: for.body: +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8 +// CHECK-NEXT: call void @_ZN1BC1ERKS_(%struct.B* nonnull align 1 dereferenceable(1) [[B]], %struct.B* nonnull align 1 dereferenceable(1) [[TMP2]]) +// CHECK-NEXT: call void @_ZN1BD1Ev(%struct.B* nonnull align 1 dereferenceable(1) [[B]]) #[[ATTR3:[0-9]+]] +// CHECK-NEXT: br label [[FOR_INC:%.*]] +// CHECK: for.inc: +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8 +// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds [[STRUCT_B]], %struct.B* [[TMP3]], i32 1 +// CHECK-NEXT: store %struct.B* [[INCDEC_PTR]], %struct.B** [[__BEGIN1]], align 8 +// CHECK-NEXT: br label [[FOR_COND]] +// CHECK: for.end: +// CHECK-NEXT: call void @_ZN1AD1Ev(%struct.A* nonnull align 1 dereferenceable(1) [[A]]) #[[ATTR3]] +// CHECK-NEXT: ret void +// void for_array() { - // CHECK: call void @_ZN1AC1Ev(%struct.A* [[A:.*]]) A a; for (B b : array) { - // CHECK-NOT: 5begin - // CHECK-NOT: 3end - // CHECK: getelementptr {{.*}}, i64 0 - // CHECK: getelementptr {{.*}}, i64 5 - // CHECK: br label %[[COND:.*]] - - // CHECK: [[COND]]: - // CHECK: %[[CMP:.*]] = icmp ne - // CHECK: br i1 %[[CMP]], label %[[BODY:.*]], label %[[END:.*]] - - // CHECK: [[BODY]]: - // CHECK: call void @_ZN1BC1ERKS_( - // CHECK: call void @_ZN1BD1Ev( - // CHECK: br label %[[INC:.*]] - - // CHECK: [[INC]]: - // CHECK: getelementptr {{.*}} i32 1 - // CHECK: br label %[[COND]] } - // CHECK: [[END]]: - // CHECK: call void @_ZN1AD1Ev(%struct.A* [[A]]) - // CHECK: ret void } -// CHECK-LABEL: define{{.*}} void @_Z9for_rangev( +// CHECK-LABEL: @_Z9for_rangev( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 1 +// CHECK-NEXT: [[__RANGE1:%.*]] = alloca %struct.C*, align 8 +// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_C:%.*]], align 1 +// CHECK-NEXT: [[__BEGIN1:%.*]] = alloca %struct.B*, align 8 +// CHECK-NEXT: [[__END1:%.*]] = alloca %struct.B*, align 8 +// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_B:%.*]], align 1 +// CHECK-NEXT: call void @_ZN1AC1Ev(%struct.A* nonnull align 1 dereferenceable(1) [[A]]) +// CHECK-NEXT: call void @_ZN1CC1Ev(%struct.C* nonnull align 1 dereferenceable(1) [[REF_TMP]]) +// CHECK-NEXT: store %struct.C* [[REF_TMP]], %struct.C** [[__RANGE1]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.C*, %struct.C** [[__RANGE1]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call %struct.B* @_Z5beginR1C(%struct.C* nonnull align 1 dereferenceable(1) [[TMP0]]) +// CHECK-NEXT: store %struct.B* [[CALL]], %struct.B** [[__BEGIN1]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load %struct.C*, %struct.C** [[__RANGE1]], align 8 +// CHECK-NEXT: [[CALL1:%.*]] = call %struct.B* @_Z3endR1C(%struct.C* nonnull align 1 dereferenceable(1) [[TMP1]]) +// CHECK-NEXT: store %struct.B* [[CALL1]], %struct.B** [[__END1]], align 8 +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.B*, %struct.B** [[__END1]], align 8 +// CHECK-NEXT: [[CMP:%.*]] = icmp ne %struct.B* [[TMP2]], [[TMP3]] +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] +// CHECK: for.cond.cleanup: +// CHECK-NEXT: call void @_ZN1CD1Ev(%struct.C* nonnull align 1 dereferenceable(1) [[REF_TMP]]) #[[ATTR3]] +// CHECK-NEXT: br label [[FOR_END:%.*]] +// CHECK: for.body: +// CHECK-NEXT: [[TMP4:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8 +// CHECK-NEXT: call void @_ZN1BC1ERKS_(%struct.B* nonnull align 1 dereferenceable(1) [[B]], %struct.B* nonnull align 1 dereferenceable(1) [[TMP4]]) +// CHECK-NEXT: call void @_ZN1BD1Ev(%struct.B* nonnull align 1 dereferenceable(1) [[B]]) #[[ATTR3]] +// CHECK-NEXT: br label [[FOR_INC:%.*]] +// CHECK: for.inc: +// CHECK-NEXT: [[TMP5:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8 +// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds [[STRUCT_B]], %struct.B* [[TMP5]], i32 1 +// CHECK-NEXT: store %struct.B* [[INCDEC_PTR]], %struct.B** [[__BEGIN1]], align 8 +// CHECK-NEXT: br label [[FOR_COND]] +// CHECK: for.end: +// CHECK-NEXT: call void @_ZN1AD1Ev(%struct.A* nonnull align 1 dereferenceable(1) [[A]]) #[[ATTR3]] +// CHECK-NEXT: ret void +// void for_range() { - // CHECK: call void @_ZN1AC1Ev(%struct.A* [[A:.*]]) A a; for (B b : C()) { - // CHECK: call void @_ZN1CC1Ev( - // CHECK: = call %struct.B* @_Z5beginR1C( - // CHECK: = call %struct.B* @_Z3endR1C( - // CHECK: br label %[[COND:.*]] - - // CHECK: [[COND]]: - // CHECK: %[[CMP:.*]] = icmp ne - // CHECK: br i1 %[[CMP]], label %[[BODY:.*]], label %[[CLEANUP:.*]] - - // CHECK: [[CLEANUP]]: - // CHECK: call void @_ZN1CD1Ev( - // CHECK: br label %[[END:.*]] - - // CHECK: [[BODY]]: - // CHECK: call void @_ZN1BC1ERKS_( - // CHECK: call void @_ZN1BD1Ev( - // CHECK: br label %[[INC:.*]] - - // CHECK: [[INC]]: - // CHECK: getelementptr {{.*}} i32 1 - // CHECK: br label %[[COND]] } - // CHECK: [[END]]: - // CHECK: call void @_ZN1AD1Ev(%struct.A* [[A]]) - // CHECK: ret void } -// CHECK-LABEL: define{{.*}} void @_Z16for_member_rangev( +// CHECK-LABEL: @_Z16for_member_rangev( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 1 +// CHECK-NEXT: [[__RANGE1:%.*]] = alloca %struct.D*, align 8 +// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_D:%.*]], align 1 +// CHECK-NEXT: [[__BEGIN1:%.*]] = alloca %struct.B*, align 8 +// CHECK-NEXT: [[__END1:%.*]] = alloca %struct.B*, align 8 +// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_B:%.*]], align 1 +// CHECK-NEXT: call void @_ZN1AC1Ev(%struct.A* nonnull align 1 dereferenceable(1) [[A]]) +// CHECK-NEXT: call void @_ZN1DC1Ev(%struct.D* nonnull align 1 dereferenceable(1) [[REF_TMP]]) +// CHECK-NEXT: store %struct.D* [[REF_TMP]], %struct.D** [[__RANGE1]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.D*, %struct.D** [[__RANGE1]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call %struct.B* @_ZN1D5beginEv(%struct.D* nonnull align 1 dereferenceable(1) [[TMP0]]) +// CHECK-NEXT: store %struct.B* [[CALL]], %struct.B** [[__BEGIN1]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load %struct.D*, %struct.D** [[__RANGE1]], align 8 +// CHECK-NEXT: [[CALL1:%.*]] = call %struct.B* @_ZN1D3endEv(%struct.D* nonnull align 1 dereferenceable(1) [[TMP1]]) +// CHECK-NEXT: store %struct.B* [[CALL1]], %struct.B** [[__END1]], align 8 +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.B*, %struct.B** [[__END1]], align 8 +// CHECK-NEXT: [[CMP:%.*]] = icmp ne %struct.B* [[TMP2]], [[TMP3]] +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] +// CHECK: for.cond.cleanup: +// CHECK-NEXT: call void @_ZN1DD1Ev(%struct.D* nonnull align 1 dereferenceable(1) [[REF_TMP]]) #[[ATTR3]] +// CHECK-NEXT: br label [[FOR_END:%.*]] +// CHECK: for.body: +// CHECK-NEXT: [[TMP4:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8 +// CHECK-NEXT: call void @_ZN1BC1ERKS_(%struct.B* nonnull align 1 dereferenceable(1) [[B]], %struct.B* nonnull align 1 dereferenceable(1) [[TMP4]]) +// CHECK-NEXT: call void @_ZN1BD1Ev(%struct.B* nonnull align 1 dereferenceable(1) [[B]]) #[[ATTR3]] +// CHECK-NEXT: br label [[FOR_INC:%.*]] +// CHECK: for.inc: +// CHECK-NEXT: [[TMP5:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8 +// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds [[STRUCT_B]], %struct.B* [[TMP5]], i32 1 +// CHECK-NEXT: store %struct.B* [[INCDEC_PTR]], %struct.B** [[__BEGIN1]], align 8 +// CHECK-NEXT: br label [[FOR_COND]] +// CHECK: for.end: +// CHECK-NEXT: call void @_ZN1AD1Ev(%struct.A* nonnull align 1 dereferenceable(1) [[A]]) #[[ATTR3]] +// CHECK-NEXT: ret void +// void for_member_range() { - // CHECK: call void @_ZN1AC1Ev(%struct.A* [[A:.*]]) A a; for (B b : D()) { - // CHECK: call void @_ZN1DC1Ev( - // CHECK: = call %struct.B* @_ZN1D5beginEv( - // CHECK: = call %struct.B* @_ZN1D3endEv( - // CHECK: br label %[[COND:.*]] - - // CHECK: [[COND]]: - // CHECK: %[[CMP:.*]] = icmp ne - // CHECK: br i1 %[[CMP]], label %[[BODY:.*]], label %[[CLEANUP:.*]] - - // CHECK: [[CLEANUP]]: - // CHECK: call void @_ZN1DD1Ev( - // CHECK: br label %[[END:.*]] - - // CHECK: [[BODY]]: - // CHECK: call void @_ZN1BC1ERKS_( - // CHECK: call void @_ZN1BD1Ev( - // CHECK: br label %[[INC:.*]] - - // CHECK: [[INC]]: - // CHECK: getelementptr {{.*}} i32 1 - // CHECK: br label %[[COND]] } - // CHECK: [[END]]: - // CHECK: call void @_ZN1AD1Ev(%struct.A* [[A]]) - // CHECK: ret void } From c31cf74c3c3ca8ddd6d695ae7591f6cbfee54a6a Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 4 Jan 2022 09:17:37 -0500 Subject: [PATCH 518/992] [libc++] Add `return 0` to some main() functions This unbreaks the tests when running in freestanding mode. --- .../std/utilities/optional/optional.monadic/and_then.pass.cpp | 1 + .../std/utilities/optional/optional.monadic/or_else.pass.cpp | 1 + .../std/utilities/optional/optional.monadic/transform.pass.cpp | 1 + 3 files changed, 3 insertions(+) diff --git a/libcxx/test/std/utilities/optional/optional.monadic/and_then.pass.cpp b/libcxx/test/std/utilities/optional/optional.monadic/and_then.pass.cpp index b2de9c7188db..61c61ff73785 100644 --- a/libcxx/test/std/utilities/optional/optional.monadic/and_then.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.monadic/and_then.pass.cpp @@ -259,4 +259,5 @@ constexpr bool test() { int main(int, char**) { test(); static_assert(test()); + return 0; } diff --git a/libcxx/test/std/utilities/optional/optional.monadic/or_else.pass.cpp b/libcxx/test/std/utilities/optional/optional.monadic/or_else.pass.cpp index 2d9a81ac883c..ccc94ab9be2c 100644 --- a/libcxx/test/std/utilities/optional/optional.monadic/or_else.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.monadic/or_else.pass.cpp @@ -69,4 +69,5 @@ constexpr bool test() { int main(int, char**) { test(); static_assert(test()); + return 0; } diff --git a/libcxx/test/std/utilities/optional/optional.monadic/transform.pass.cpp b/libcxx/test/std/utilities/optional/optional.monadic/transform.pass.cpp index 59c559d149af..209c8a34da05 100644 --- a/libcxx/test/std/utilities/optional/optional.monadic/transform.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.monadic/transform.pass.cpp @@ -202,4 +202,5 @@ constexpr bool test() { int main(int, char**) { test(); static_assert(test()); + return 0; } From d74212987b35c94ddef3aa5c18096fc1f4ee5112 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 4 Jan 2022 11:53:28 +0100 Subject: [PATCH 519/992] [ConstantFold] Remove unnecessary bounded index restriction The fold for merging a GEP of GEP into a single GEP currently bails if doing so would result in notional overindexing. The justification given in the comment above this check is dangerously incorrect: GEPs with notional overindexing are perfectly fine, and if some code treats them incorrectly, then that code is broken, not the GEP. Such a GEP might legally appear in source IR, so only preventing its creation cannot be sufficient. (The constant folder also ends up canonicalizing the GEP to remove the notional overindexing, but that's neither here nor there.) This check dates back to https://github.com/llvm/llvm-project/commit/bd4fef4a8939db18f39b108e19097b25e2c7c47a, and as far as I can tell the original issue this was trying to patch around has since been resolved. Differential Revision: https://reviews.llvm.org/D116587 --- clang/test/CodeGen/clear_cache.c | 2 +- .../cxx11-initializer-aggregate.cpp | 2 +- clang/test/CodeGenCXX/for-range.cpp | 2 +- .../CodeGenCXX/global-array-destruction.cpp | 6 +- .../CodeGenCXX/template-param-objects.cpp | 2 +- .../test/OpenMP/for_firstprivate_codegen.cpp | 17 +- clang/test/OpenMP/parallel_copyin_codegen.cpp | 113 +- .../OpenMP/sections_firstprivate_codegen.cpp | 17 +- clang/test/OpenMP/single_codegen.cpp | 30 +- .../OpenMP/single_firstprivate_codegen.cpp | 17 +- ..._teams_distribute_firstprivate_codegen.cpp | 24 +- ...bute_parallel_for_firstprivate_codegen.cpp | 24 +- ...istribute_parallel_for_private_codegen.cpp | 25 +- ...parallel_for_simd_firstprivate_codegen.cpp | 68 +- ...bute_parallel_for_simd_private_codegen.cpp | 954 +++++----- ...arget_teams_distribute_private_codegen.cpp | 25 +- ...s_distribute_simd_firstprivate_codegen.cpp | 48 +- ..._teams_distribute_simd_private_codegen.cpp | 48 +- .../teams_distribute_firstprivate_codegen.cpp | 24 +- ...bute_parallel_for_firstprivate_codegen.cpp | 24 +- ...istribute_parallel_for_private_codegen.cpp | 25 +- ...parallel_for_simd_firstprivate_codegen.cpp | 48 +- ...bute_parallel_for_simd_private_codegen.cpp | 516 +++--- .../teams_distribute_private_codegen.cpp | 25 +- ...s_distribute_simd_firstprivate_codegen.cpp | 48 +- .../teams_distribute_simd_private_codegen.cpp | 336 ++-- clang/test/OpenMP/threadprivate_codegen.cpp | 1638 ++++++++--------- llvm/lib/IR/ConstantFold.cpp | 24 +- llvm/test/Transforms/SCCP/apint-bigint2.ll | 2 +- ...eferenceable-ptr-with-undereferenceable.ll | 10 +- 30 files changed, 2059 insertions(+), 2085 deletions(-) diff --git a/clang/test/CodeGen/clear_cache.c b/clang/test/CodeGen/clear_cache.c index e50951e18818..2ba0abe06254 100644 --- a/clang/test/CodeGen/clear_cache.c +++ b/clang/test/CodeGen/clear_cache.c @@ -10,7 +10,7 @@ char buffer[32] = "This is a largely unused buffer"; // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK-NEXT: call void @llvm.clear_cache(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i64 0, i64 0), i8* getelementptr inbounds (i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i64 0, i64 0), i64 32)) +// CHECK-NEXT: call void @llvm.clear_cache(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i64 0, i64 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i64 1, i64 0)) // CHECK-NEXT: ret i32 0 // int main() { diff --git a/clang/test/CodeGenCXX/cxx11-initializer-aggregate.cpp b/clang/test/CodeGenCXX/cxx11-initializer-aggregate.cpp index 4e858c54b769..6263fdff5caa 100644 --- a/clang/test/CodeGenCXX/cxx11-initializer-aggregate.cpp +++ b/clang/test/CodeGenCXX/cxx11-initializer-aggregate.cpp @@ -77,7 +77,7 @@ int &fn2(int &v) { // CHECK: br i1 // // CHECK: getelementptr inbounds {{.*}}, i64 1 -// CHECK: icmp eq {{.*}}, i64 30 +// CHECK: icmp eq {{.*}}, getelementptr inbounds {{.*}}, i64 1, i64 0 // CHECK: br i1 // // CHECK: call i32 @__cxa_atexit( diff --git a/clang/test/CodeGenCXX/for-range.cpp b/clang/test/CodeGenCXX/for-range.cpp index c3b6eaa3eafc..52951ea9652c 100644 --- a/clang/test/CodeGenCXX/for-range.cpp +++ b/clang/test/CodeGenCXX/for-range.cpp @@ -43,7 +43,7 @@ extern B array[5]; // CHECK-NEXT: call void @_ZN1AC1Ev(%struct.A* nonnull align 1 dereferenceable(1) [[A]]) // CHECK-NEXT: store [5 x %struct.B]* @array, [5 x %struct.B]** [[__RANGE1]], align 8 // CHECK-NEXT: store %struct.B* getelementptr inbounds ([5 x %struct.B], [5 x %struct.B]* @array, i64 0, i64 0), %struct.B** [[__BEGIN1]], align 8 -// CHECK-NEXT: store %struct.B* getelementptr inbounds ([[STRUCT_B]], %struct.B* getelementptr inbounds ([5 x %struct.B], [5 x %struct.B]* @array, i64 0, i64 0), i64 5), %struct.B** [[__END1]], align 8 +// CHECK-NEXT: store %struct.B* getelementptr inbounds ([5 x %struct.B], [5 x %struct.B]* @array, i64 1, i64 0), %struct.B** [[__END1]], align 8 // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: // CHECK-NEXT: [[TMP0:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8 diff --git a/clang/test/CodeGenCXX/global-array-destruction.cpp b/clang/test/CodeGenCXX/global-array-destruction.cpp index 1ae7b72838bd..0f280e000112 100644 --- a/clang/test/CodeGenCXX/global-array-destruction.cpp +++ b/clang/test/CodeGenCXX/global-array-destruction.cpp @@ -39,7 +39,7 @@ struct T { T t[2][3] = { 1.0, 2, 3.0, 4, 5.0, 6, 7.0, 8, 9.0, 10, 11.0, 12 }; // CHECK: call {{.*}} @__cxa_atexit -// CHECK: getelementptr inbounds ({{.*}} getelementptr inbounds ([2 x [3 x {{.*}}]], [2 x [3 x {{.*}}]]* @t, i32 0, i32 0, i32 0), i64 6) +// CHECK: getelementptr inbounds ([2 x [3 x {{.*}}]], [2 x [3 x {{.*}}]]* @t, i64 1, i64 0, i64 0) // CHECK: call void @_ZN1TD1Ev // CHECK: icmp eq {{.*}} @t // CHECK: br i1 {{.*}} @@ -47,7 +47,7 @@ T t[2][3] = { 1.0, 2, 3.0, 4, 5.0, 6, 7.0, 8, 9.0, 10, 11.0, 12 }; static T t2[2][3] = { 1.0, 2, 3.0, 4, 5.0, 6, 7.0, 8, 9.0, 10, 11.0, 12 }; // CHECK: call {{.*}} @__cxa_atexit -// CHECK: getelementptr inbounds ({{.*}} getelementptr inbounds ([2 x [3 x {{.*}}]], [2 x [3 x {{.*}}]]* @_ZL2t2, i32 0, i32 0, i32 0), i64 6) +// CHECK: getelementptr inbounds ([2 x [3 x {{.*}}]], [2 x [3 x {{.*}}]]* @_ZL2t2, i64 1, i64 0, i64 0) // CHECK: call void @_ZN1TD1Ev // CHECK: icmp eq {{.*}} @_ZL2t2 // CHECK: br i1 {{.*}} @@ -56,7 +56,7 @@ using U = T[2][3]; U &&u = U{ {{1.0, 2}, {3.0, 4}, {5.0, 6}}, {{7.0, 8}, {9.0, 10}, {11.0, 12}} }; // CHECK: call {{.*}} @__cxa_atexit -// CHECK: getelementptr inbounds ({{.*}}* getelementptr inbounds ([2 x [3 x {{.*}}]], [2 x [3 x {{.*}}]]* @_ZGR1u_, i32 0, i32 0, i32 0), i64 6) +// CHECK: getelementptr inbounds ([2 x [3 x {{.*}}]], [2 x [3 x {{.*}}]]* @_ZGR1u_, i64 1, i64 0, i64 0) // CHECK: call void @_ZN1TD1Ev // CHECK: icmp eq {{.*}} @_ZGR1u_ // CHECK: br i1 {{.*}} diff --git a/clang/test/CodeGenCXX/template-param-objects.cpp b/clang/test/CodeGenCXX/template-param-objects.cpp index fa50c1f6c2d7..bbff51b0c8c1 100644 --- a/clang/test/CodeGenCXX/template-param-objects.cpp +++ b/clang/test/CodeGenCXX/template-param-objects.cpp @@ -15,5 +15,5 @@ template constexpr const char *end() { return s.buf + __builtin_strlen(s.bu const char *p = begin(); // ITANIUM: @q // MSABI: @"?q@@3PEBDEB" -// CHECK-SAME: global i8* getelementptr (i8, i8* getelementptr inbounds ({{.*}}* [[HELLO]], i32 0, i32 0, i32 0, i32 0), i64 11) +// CHECK-SAME: global i8* getelementptr ({{.*}}* [[HELLO]], i32 0, i32 0, i32 0, i64 11) const char *q = end(); diff --git a/clang/test/OpenMP/for_firstprivate_codegen.cpp b/clang/test/OpenMP/for_firstprivate_codegen.cpp index 88538c93c302..6beeda328c06 100644 --- a/clang/test/OpenMP/for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/for_firstprivate_codegen.cpp @@ -227,8 +227,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -820,8 +820,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1413,8 +1413,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1638,8 +1638,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1848,4 +1848,3 @@ int main() { // CHECK4-NEXT: call void @__cxx_global_var_init.2() // CHECK4-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/parallel_copyin_codegen.cpp b/clang/test/OpenMP/parallel_copyin_codegen.cpp index e087090553ce..5d5a58f723f6 100644 --- a/clang/test/OpenMP/parallel_copyin_codegen.cpp +++ b/clang/test/OpenMP/parallel_copyin_codegen.cpp @@ -222,7 +222,7 @@ void foo() { // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: [[TMP0:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ4mainE5s_arr to i8*) acquire, align 8 // CHECK1-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 -// CHECK1-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2:![0-9]+]] +// CHECK1-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3:![0-9]+]] // CHECK1: init.check: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ4mainE5s_arr) #[[ATTR3]] // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -238,7 +238,7 @@ void foo() { // CHECK1: init.end: // CHECK1-NEXT: [[TMP4:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ4mainE3var to i8*) acquire, align 8 // CHECK1-NEXT: [[GUARD_UNINITIALIZED1:%.*]] = icmp eq i8 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[GUARD_UNINITIALIZED1]], label [[INIT_CHECK2:%.*]], label [[INIT_END5:%.*]], !prof [[PROF2]] +// CHECK1-NEXT: br i1 [[GUARD_UNINITIALIZED1]], label [[INIT_CHECK2:%.*]], label [[INIT_END5:%.*]], !prof [[PROF3]] // CHECK1: init.check2: // CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ4mainE3var) #[[ATTR3]] // CHECK1-NEXT: [[TOBOOL3:%.*]] = icmp ne i32 [[TMP5]], 0 @@ -345,8 +345,8 @@ void foo() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -474,7 +474,7 @@ void foo() { // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR3]] // CHECK1-NEXT: [[TMP0:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ5tmainIiET_vE5s_arr to i8*) acquire, align 8 // CHECK1-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 -// CHECK1-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2]] +// CHECK1-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3]] // CHECK1: init.check: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ5tmainIiET_vE5s_arr) #[[ATTR3]] // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -490,7 +490,7 @@ void foo() { // CHECK1: init.end: // CHECK1-NEXT: [[TMP4:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ5tmainIiET_vE3var to i8*) acquire, align 8 // CHECK1-NEXT: [[GUARD_UNINITIALIZED1:%.*]] = icmp eq i8 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[GUARD_UNINITIALIZED1]], label [[INIT_CHECK2:%.*]], label [[INIT_END5:%.*]], !prof [[PROF2]] +// CHECK1-NEXT: br i1 [[GUARD_UNINITIALIZED1]], label [[INIT_CHECK2:%.*]], label [[INIT_END5:%.*]], !prof [[PROF3]] // CHECK1: init.check2: // CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ5tmainIiET_vE3var) #[[ATTR3]] // CHECK1-NEXT: [[TOBOOL3:%.*]] = icmp ne i32 [[TMP5]], 0 @@ -639,8 +639,8 @@ void foo() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ getelementptr inbounds ([[STRUCT_S_0:%.*]], %struct.S.0* getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -808,7 +808,7 @@ void foo() { // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: [[TMP0:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ4mainE5s_arr to i8*) acquire, align 8 // CHECK2-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 -// CHECK2-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2:![0-9]+]] +// CHECK2-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3:![0-9]+]] // CHECK2: init.check: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ4mainE5s_arr) #[[ATTR3]] // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -824,7 +824,7 @@ void foo() { // CHECK2: init.end: // CHECK2-NEXT: [[TMP4:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ4mainE3var to i8*) acquire, align 8 // CHECK2-NEXT: [[GUARD_UNINITIALIZED1:%.*]] = icmp eq i8 [[TMP4]], 0 -// CHECK2-NEXT: br i1 [[GUARD_UNINITIALIZED1]], label [[INIT_CHECK2:%.*]], label [[INIT_END5:%.*]], !prof [[PROF2]] +// CHECK2-NEXT: br i1 [[GUARD_UNINITIALIZED1]], label [[INIT_CHECK2:%.*]], label [[INIT_END5:%.*]], !prof [[PROF3]] // CHECK2: init.check2: // CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ4mainE3var) #[[ATTR3]] // CHECK2-NEXT: [[TOBOOL3:%.*]] = icmp ne i32 [[TMP5]], 0 @@ -931,8 +931,8 @@ void foo() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1060,7 +1060,7 @@ void foo() { // CHECK2-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR3]] // CHECK2-NEXT: [[TMP0:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ5tmainIiET_vE5s_arr to i8*) acquire, align 8 // CHECK2-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 -// CHECK2-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2]] +// CHECK2-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3]] // CHECK2: init.check: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ5tmainIiET_vE5s_arr) #[[ATTR3]] // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -1076,7 +1076,7 @@ void foo() { // CHECK2: init.end: // CHECK2-NEXT: [[TMP4:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ5tmainIiET_vE3var to i8*) acquire, align 8 // CHECK2-NEXT: [[GUARD_UNINITIALIZED1:%.*]] = icmp eq i8 [[TMP4]], 0 -// CHECK2-NEXT: br i1 [[GUARD_UNINITIALIZED1]], label [[INIT_CHECK2:%.*]], label [[INIT_END5:%.*]], !prof [[PROF2]] +// CHECK2-NEXT: br i1 [[GUARD_UNINITIALIZED1]], label [[INIT_CHECK2:%.*]], label [[INIT_END5:%.*]], !prof [[PROF3]] // CHECK2: init.check2: // CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ5tmainIiET_vE3var) #[[ATTR3]] // CHECK2-NEXT: [[TOBOOL3:%.*]] = icmp ne i32 [[TMP5]], 0 @@ -1225,8 +1225,8 @@ void foo() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ getelementptr inbounds ([[STRUCT_S_0:%.*]], %struct.S.0* getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1491,7 +1491,7 @@ void foo() { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[TMP0:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ10array_funcvE1s to i8*) acquire, align 8 // CHECK5-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 -// CHECK5-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2:![0-9]+]] +// CHECK5-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3:![0-9]+]] // CHECK5: init.check: // CHECK5-NEXT: [[TMP1:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ10array_funcvE1s) #[[ATTR1:[0-9]+]] // CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -1504,7 +1504,7 @@ void foo() { // CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.St* [ getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i32 0, i32 0), [[INIT]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK5-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[ARRAYCTOR_CUR]]) // CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[ARRAYCTOR_CUR]], i64 1 -// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.St* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([[STRUCT_ST]], %struct.St* getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i32 0, i32 0), i64 2) +// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.St* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i64 1, i64 0) // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: // CHECK5-NEXT: [[TMP3:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR1]] @@ -1582,8 +1582,8 @@ void foo() { // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.St* [ getelementptr inbounds ([[STRUCT_ST:%.*]], %struct.St* getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.St* [ getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR1]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.St* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i32 0, i32 0) // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1675,7 +1675,7 @@ void foo() { // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR4:[0-9]+]] // CHECK11-NEXT: [[TMP0:%.*]] = load i8, i8* @_ZGVZ4mainE5s_arr, align 1 // CHECK11-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 -// CHECK11-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2:![0-9]+]] +// CHECK11-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3:![0-9]+]] // CHECK11: init.check: // CHECK11-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i64 0, i64 0), float 1.000000e+00) // CHECK11-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i64 0, i64 1), float 2.000000e+00) @@ -1685,7 +1685,7 @@ void foo() { // CHECK11: init.end: // CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* @_ZGVZ4mainE3var, align 1 // CHECK11-NEXT: [[GUARD_UNINITIALIZED1:%.*]] = icmp eq i8 [[TMP2]], 0 -// CHECK11-NEXT: br i1 [[GUARD_UNINITIALIZED1]], label [[INIT_CHECK2:%.*]], label [[INIT_END3:%.*]], !prof [[PROF2]] +// CHECK11-NEXT: br i1 [[GUARD_UNINITIALIZED1]], label [[INIT_CHECK2:%.*]], label [[INIT_END3:%.*]], !prof [[PROF3]] // CHECK11: init.check2: // CHECK11-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* nonnull align 4 dereferenceable(4) @_ZZ4mainE3var, float 3.000000e+00) // CHECK11-NEXT: [[TMP3:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S*)* @_ZN1SIfED1Ev to void (i8*)*), i8* bitcast (%struct.S* @_ZZ4mainE3var to i8*), i8* @__dso_handle) #[[ATTR4]] @@ -1752,8 +1752,8 @@ void foo() { // CHECK11-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0) // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1789,14 +1789,14 @@ void foo() { // CHECK11-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* // CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast ([2 x i32]* @_ZZ4mainE3vec to i8*), i8* align 4 [[TMP7]], i64 8, i1 false) // CHECK11-NEXT: [[TMP8:%.*]] = bitcast [2 x %struct.S]* [[TMP2]] to %struct.S* -// CHECK11-NEXT: br i1 icmp eq (%struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), %struct.S* getelementptr ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), i64 2)), label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: br i1 icmp eq (%struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i64 1, i64 0)), label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: // CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP8]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], getelementptr ([[STRUCT_S]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), i64 2) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i64 1, i64 0) // CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK11: omp.arraycpy.done1: // CHECK11-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* nonnull align 4 dereferenceable(4) @_ZZ4mainE3var, %struct.S* nonnull align 4 dereferenceable(4) [[TMP3]]) @@ -1849,7 +1849,7 @@ void foo() { // CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR4]] // CHECK11-NEXT: [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVZ5tmainIiET_vE5s_arr to i8*), align 8 // CHECK11-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 -// CHECK11-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2]] +// CHECK11-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3]] // CHECK11: init.check: // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 0, i64 0), i32 1) // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 0, i64 1), i32 2) @@ -1859,7 +1859,7 @@ void foo() { // CHECK11: init.end: // CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* bitcast (i64* @_ZGVZ5tmainIiET_vE3var to i8*), align 8 // CHECK11-NEXT: [[GUARD_UNINITIALIZED1:%.*]] = icmp eq i8 [[TMP2]], 0 -// CHECK11-NEXT: br i1 [[GUARD_UNINITIALIZED1]], label [[INIT_CHECK2:%.*]], label [[INIT_END3:%.*]], !prof [[PROF2]] +// CHECK11-NEXT: br i1 [[GUARD_UNINITIALIZED1]], label [[INIT_CHECK2:%.*]], label [[INIT_END3:%.*]], !prof [[PROF3]] // CHECK11: init.check2: // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var, i32 3) // CHECK11-NEXT: [[TMP3:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S.0*)* @_ZN1SIiED1Ev to void (i8*)*), i8* bitcast (%struct.S.0* @_ZZ5tmainIiET_vE3var to i8*), i8* @__dso_handle) #[[ATTR4]] @@ -1962,8 +1962,8 @@ void foo() { // CHECK11-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ getelementptr inbounds ([[STRUCT_S_0:%.*]], %struct.S.0* getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0) // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1999,14 +1999,14 @@ void foo() { // CHECK11-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* // CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 bitcast ([2 x i32]* @_ZZ5tmainIiET_vE3vec to i8*), i8* align 128 [[TMP7]], i64 8, i1 false) // CHECK11-NEXT: [[TMP8:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK11-NEXT: br i1 icmp eq (%struct.S.0* getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), %struct.S.0* getelementptr ([[STRUCT_S_0:%.*]], %struct.S.0* getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), i64 2)), label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: br i1 icmp eq (%struct.S.0* getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), %struct.S.0* getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 1, i64 0)), label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: // CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP8]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], getelementptr ([[STRUCT_S_0]], %struct.S.0* getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), i64 2) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 1, i64 0) // CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK11: omp.arraycpy.done1: // CHECK11-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var, %struct.S.0* nonnull align 4 dereferenceable(4) [[TMP3]]) @@ -2100,7 +2100,7 @@ void foo() { // CHECK12-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR4:[0-9]+]] // CHECK12-NEXT: [[TMP0:%.*]] = load i8, i8* @_ZGVZ4mainE5s_arr, align 1 // CHECK12-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 -// CHECK12-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2:![0-9]+]] +// CHECK12-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3:![0-9]+]] // CHECK12: init.check: // CHECK12-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i64 0, i64 0), float 1.000000e+00) // CHECK12-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i64 0, i64 1), float 2.000000e+00) @@ -2110,7 +2110,7 @@ void foo() { // CHECK12: init.end: // CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* @_ZGVZ4mainE3var, align 1 // CHECK12-NEXT: [[GUARD_UNINITIALIZED1:%.*]] = icmp eq i8 [[TMP2]], 0 -// CHECK12-NEXT: br i1 [[GUARD_UNINITIALIZED1]], label [[INIT_CHECK2:%.*]], label [[INIT_END3:%.*]], !prof [[PROF2]] +// CHECK12-NEXT: br i1 [[GUARD_UNINITIALIZED1]], label [[INIT_CHECK2:%.*]], label [[INIT_END3:%.*]], !prof [[PROF3]] // CHECK12: init.check2: // CHECK12-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* nonnull align 4 dereferenceable(4) @_ZZ4mainE3var, float 3.000000e+00) // CHECK12-NEXT: [[TMP3:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S*)* @_ZN1SIfED1Ev to void (i8*)*), i8* bitcast (%struct.S* @_ZZ4mainE3var to i8*), i8* @__dso_handle) #[[ATTR4]] @@ -2177,8 +2177,8 @@ void foo() { // CHECK12-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK12-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK12: arraydestroy.body: -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK12-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK12-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0) // CHECK12-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2214,14 +2214,14 @@ void foo() { // CHECK12-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* // CHECK12-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast ([2 x i32]* @_ZZ4mainE3vec to i8*), i8* align 4 [[TMP7]], i64 8, i1 false) // CHECK12-NEXT: [[TMP8:%.*]] = bitcast [2 x %struct.S]* [[TMP2]] to %struct.S* -// CHECK12-NEXT: br i1 icmp eq (%struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), %struct.S* getelementptr ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), i64 2)), label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK12-NEXT: br i1 icmp eq (%struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i64 1, i64 0)), label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK12: omp.arraycpy.body: // CHECK12-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP8]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK12-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK12-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK12-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK12-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK12-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK12-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], getelementptr ([[STRUCT_S]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), i64 2) +// CHECK12-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i64 1, i64 0) // CHECK12-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK12: omp.arraycpy.done1: // CHECK12-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* nonnull align 4 dereferenceable(4) @_ZZ4mainE3var, %struct.S* nonnull align 4 dereferenceable(4) [[TMP3]]) @@ -2274,7 +2274,7 @@ void foo() { // CHECK12-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR4]] // CHECK12-NEXT: [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVZ5tmainIiET_vE5s_arr to i8*), align 8 // CHECK12-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 -// CHECK12-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2]] +// CHECK12-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3]] // CHECK12: init.check: // CHECK12-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 0, i64 0), i32 1) // CHECK12-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 0, i64 1), i32 2) @@ -2284,7 +2284,7 @@ void foo() { // CHECK12: init.end: // CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* bitcast (i64* @_ZGVZ5tmainIiET_vE3var to i8*), align 8 // CHECK12-NEXT: [[GUARD_UNINITIALIZED1:%.*]] = icmp eq i8 [[TMP2]], 0 -// CHECK12-NEXT: br i1 [[GUARD_UNINITIALIZED1]], label [[INIT_CHECK2:%.*]], label [[INIT_END3:%.*]], !prof [[PROF2]] +// CHECK12-NEXT: br i1 [[GUARD_UNINITIALIZED1]], label [[INIT_CHECK2:%.*]], label [[INIT_END3:%.*]], !prof [[PROF3]] // CHECK12: init.check2: // CHECK12-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var, i32 3) // CHECK12-NEXT: [[TMP3:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S.0*)* @_ZN1SIiED1Ev to void (i8*)*), i8* bitcast (%struct.S.0* @_ZZ5tmainIiET_vE3var to i8*), i8* @__dso_handle) #[[ATTR4]] @@ -2387,8 +2387,8 @@ void foo() { // CHECK12-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK12-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK12: arraydestroy.body: -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ getelementptr inbounds ([[STRUCT_S_0:%.*]], %struct.S.0* getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK12-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK12-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0) // CHECK12-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2424,14 +2424,14 @@ void foo() { // CHECK12-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* // CHECK12-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 bitcast ([2 x i32]* @_ZZ5tmainIiET_vE3vec to i8*), i8* align 128 [[TMP7]], i64 8, i1 false) // CHECK12-NEXT: [[TMP8:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK12-NEXT: br i1 icmp eq (%struct.S.0* getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), %struct.S.0* getelementptr ([[STRUCT_S_0:%.*]], %struct.S.0* getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), i64 2)), label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK12-NEXT: br i1 icmp eq (%struct.S.0* getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), %struct.S.0* getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 1, i64 0)), label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK12: omp.arraycpy.body: // CHECK12-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP8]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK12-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK12-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK12-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK12-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK12-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK12-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], getelementptr ([[STRUCT_S_0]], %struct.S.0* getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), i64 2) +// CHECK12-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 1, i64 0) // CHECK12-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK12: omp.arraycpy.done1: // CHECK12-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var, %struct.S.0* nonnull align 4 dereferenceable(4) [[TMP3]]) @@ -2627,14 +2627,14 @@ void foo() { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[TMP0:%.*]] = load i8, i8* @_ZGVZ10array_funcvE1s, align 1 // CHECK15-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 -// CHECK15-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2:![0-9]+]] +// CHECK15-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3:![0-9]+]] // CHECK15: init.check: // CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK15: arrayctor.loop: // CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.St* [ getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i32 0, i32 0), [[INIT_CHECK]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[ARRAYCTOR_CUR]]) // CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[ARRAYCTOR_CUR]], i64 1 -// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.St* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([[STRUCT_ST]], %struct.St* getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i32 0, i32 0), i64 2) +// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.St* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i64 1, i64 0) // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: [[TMP1:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3:[0-9]+]] @@ -2662,8 +2662,8 @@ void foo() { // CHECK15-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.St* [ getelementptr inbounds ([[STRUCT_ST:%.*]], %struct.St* getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.St* [ getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.St* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i32 0, i32 0) // CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2701,14 +2701,14 @@ void foo() { // CHECK15-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast ([2 x i32]* @_ZZ10array_funcvE1a to i8*), i8* align 4 [[TMP4]], i64 8, i1 false) // CHECK15-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.St]* [[TMP1]] to %struct.St* -// CHECK15-NEXT: br i1 icmp eq (%struct.St* getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i32 0, i32 0), %struct.St* getelementptr ([[STRUCT_ST:%.*]], %struct.St* getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i32 0, i32 0), i64 2)), label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: br i1 icmp eq (%struct.St* getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i32 0, i32 0), %struct.St* getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i64 1, i64 0)), label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: // CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.St* [ [[TMP5]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.St* [ getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i32 0, i32 0), [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.St* @_ZN2StaSERKS_(%struct.St* nonnull align 4 dereferenceable(8) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.St* nonnull align 4 dereferenceable(8) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_ST]], %struct.St* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_ST:%.*]], %struct.St* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_ST]], %struct.St* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.St* [[OMP_ARRAYCPY_DEST_ELEMENT]], getelementptr ([[STRUCT_ST]], %struct.St* getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i32 0, i32 0), i64 2) +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.St* [[OMP_ARRAYCPY_DEST_ELEMENT]], getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i64 1, i64 0) // CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK15: omp.arraycpy.done1: // CHECK15-NEXT: br label [[COPYIN_NOT_MASTER_END]] @@ -2820,7 +2820,7 @@ void foo() { // CHECK16-NEXT: entry: // CHECK16-NEXT: [[TMP0:%.*]] = load i8, i8* @__tls_guard, align 1 // CHECK16-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 -// CHECK16-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF4:![0-9]+]] +// CHECK16-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF5:![0-9]+]] // CHECK16: init: // CHECK16-NEXT: store i8 1, i8* @__tls_guard, align 1 // CHECK16-NEXT: call void @__cxx_global_var_init() @@ -2828,4 +2828,3 @@ void foo() { // CHECK16: exit: // CHECK16-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/sections_firstprivate_codegen.cpp b/clang/test/OpenMP/sections_firstprivate_codegen.cpp index 22fc096043c5..50240253dce3 100644 --- a/clang/test/OpenMP/sections_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/sections_firstprivate_codegen.cpp @@ -226,8 +226,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -777,8 +777,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1328,8 +1328,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1533,8 +1533,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1720,4 +1720,3 @@ int main() { // CHECK4-NEXT: call void @__cxx_global_var_init.2() // CHECK4-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/single_codegen.cpp b/clang/test/OpenMP/single_codegen.cpp index 7cb75113c13a..890451cfafe8 100644 --- a/clang/test/OpenMP/single_codegen.cpp +++ b/clang/test/OpenMP/single_codegen.cpp @@ -289,7 +289,7 @@ void array_func(int n, int a[n], St s[2]) { // CHECK1-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]] // CHECK1: invoke.cont: // CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([[CLASS_TESTCLASS]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2) +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i64 1, i64 0) // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]] @@ -326,8 +326,8 @@ void array_func(int n, int a[n], St s[2]) { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([[CLASS_TESTCLASS:%.*]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1192,7 +1192,7 @@ void array_func(int n, int a[n], St s[2]) { // CHECK2-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]] // CHECK2: invoke.cont: // CHECK2-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1 -// CHECK2-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([[CLASS_TESTCLASS]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2) +// CHECK2-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i64 1, i64 0) // CHECK2-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK2: arrayctor.cont: // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]] @@ -1229,8 +1229,8 @@ void array_func(int n, int a[n], St s[2]) { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([[CLASS_TESTCLASS:%.*]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2307,7 +2307,7 @@ void array_func(int n, int a[n], St s[2]) { // CHECK3-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]] // CHECK3: invoke.cont: // CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1 -// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([[CLASS_TESTCLASS]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2) +// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i64 1, i64 0) // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]] @@ -2344,8 +2344,8 @@ void array_func(int n, int a[n], St s[2]) { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([[CLASS_TESTCLASS:%.*]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK3-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3237,7 +3237,7 @@ void array_func(int n, int a[n], St s[2]) { // CHECK4-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]] // CHECK4: invoke.cont: // CHECK4-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1 -// CHECK4-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([[CLASS_TESTCLASS]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2) +// CHECK4-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i64 1, i64 0) // CHECK4-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK4: arrayctor.cont: // CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]] @@ -3341,8 +3341,8 @@ void array_func(int n, int a[n], St s[2]) { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([[CLASS_TESTCLASS:%.*]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK4-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4311,7 +4311,7 @@ void array_func(int n, int a[n], St s[2]) { // CHECK5-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]], !dbg [[DBG37]] // CHECK5: invoke.cont: // CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1, !dbg [[DBG37]] -// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([[CLASS_TESTCLASS]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2), !dbg [[DBG37]] +// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i64 1, i64 0), !dbg [[DBG37]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]], !dbg [[DBG37]] // CHECK5: arrayctor.cont: // CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG39:![0-9]+]] @@ -4348,8 +4348,8 @@ void array_func(int n, int a[n], St s[2]) { // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG42:![0-9]+]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([[CLASS_TESTCLASS:%.*]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG42]] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG42]] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG42]] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG42]] // CHECK5-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG42]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), !dbg [[DBG42]] // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG42]] diff --git a/clang/test/OpenMP/single_firstprivate_codegen.cpp b/clang/test/OpenMP/single_firstprivate_codegen.cpp index c159d6f8f2d4..1d532a397e99 100644 --- a/clang/test/OpenMP/single_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/single_firstprivate_codegen.cpp @@ -212,8 +212,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -694,8 +694,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1176,8 +1176,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1347,8 +1347,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1500,4 +1500,3 @@ int main() { // CHECK4-NEXT: call void @__cxx_global_var_init.2() // CHECK4-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp index 3af5b56cf7ed..f468026953f3 100644 --- a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp @@ -218,8 +218,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1008,8 +1008,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1798,8 +1798,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2572,8 +2572,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3346,8 +3346,8 @@ int main() { // CHECK9-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3603,8 +3603,8 @@ int main() { // CHECK10-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp index 643cdf2cd375..6c0ab2ecd6dd 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -279,8 +279,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1341,8 +1341,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2403,8 +2403,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3435,8 +3435,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4467,8 +4467,8 @@ int main() { // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4821,8 +4821,8 @@ int main() { // CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK6-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK6: arraydestroy.body: -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK6-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK6-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK6-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp index fbb8f1fbbda7..98dc3322ca4c 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp @@ -259,8 +259,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -952,8 +952,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1645,8 +1645,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2326,8 +2326,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3007,8 +3007,8 @@ int main() { // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3310,8 +3310,8 @@ int main() { // CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK6-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK6: arraydestroy.body: -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK6-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK6-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK6-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -5729,4 +5729,3 @@ int main() { // CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) // CHECK17-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp index e900eb9a3323..c1d23170594e 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -277,8 +277,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1367,8 +1367,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2457,8 +2457,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3517,8 +3517,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4577,8 +4577,8 @@ int main() { // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4945,8 +4945,8 @@ int main() { // CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK6-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK6: arraydestroy.body: -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK6-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK6-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK6-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -5291,8 +5291,8 @@ int main() { // CHECK7-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -5618,8 +5618,8 @@ int main() { // CHECK8-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -5945,8 +5945,8 @@ int main() { // CHECK9-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -6268,8 +6268,8 @@ int main() { // CHECK10-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -6591,8 +6591,8 @@ int main() { // CHECK11-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -6723,8 +6723,8 @@ int main() { // CHECK12-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK12-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK12: arraydestroy.body: -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK12-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK12-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK12-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -10098,8 +10098,8 @@ int main() { // CHECK18-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK18-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK18: arraydestroy.body: -// CHECK18-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK18-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK18-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK18-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK18-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK18-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK18-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -10425,8 +10425,8 @@ int main() { // CHECK19-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK19-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK19: arraydestroy.body: -// CHECK19-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK19-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK19-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK19-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK19-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK19-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK19-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -10752,8 +10752,8 @@ int main() { // CHECK20-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK20-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK20: arraydestroy.body: -// CHECK20-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK20-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK20-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK20-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK20-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK20-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK20-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -11075,8 +11075,8 @@ int main() { // CHECK21-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK21-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK21: arraydestroy.body: -// CHECK21-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK21-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK21-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK21-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK21-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK21-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK21-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -11398,8 +11398,8 @@ int main() { // CHECK22-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK22-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK22: arraydestroy.body: -// CHECK22-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK22-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK22-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK22-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK22-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK22-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK22-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp index 2f7978381a4a..1507c97e5679 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp @@ -259,8 +259,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -370,25 +370,25 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -478,40 +478,40 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM3]] // CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* // CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false), !llvm.access.group !9 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -669,25 +669,25 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -780,37 +780,37 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] // CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* // CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false), !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -980,8 +980,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1091,25 +1091,25 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !5 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1199,40 +1199,40 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 // CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM3]] // CHECK2-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* // CHECK2-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false), !llvm.access.group !9 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK2-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4 +// CHECK2-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4, !llvm.access.group !9 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1390,25 +1390,25 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1501,37 +1501,37 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 // CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] // CHECK2-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* // CHECK2-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false) +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false), !llvm.access.group !17 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1701,8 +1701,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1812,23 +1812,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1916,38 +1916,38 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP12]] // CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* // CHECK3-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false), !llvm.access.group !10 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2105,23 +2105,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2212,35 +2212,35 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP13]] // CHECK3-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* // CHECK3-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false) +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false), !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2410,8 +2410,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2521,23 +2521,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !6 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2625,38 +2625,38 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP12]] // CHECK4-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* // CHECK4-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false) -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false), !llvm.access.group !10 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2814,23 +2814,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !15 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2921,35 +2921,35 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP13]] // CHECK4-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* // CHECK4-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false) +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false), !llvm.access.group !18 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK4-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3119,8 +3119,8 @@ int main() { // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3213,23 +3213,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -3298,35 +3298,35 @@ int main() { // CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK5-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK5-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: store i32 1, i32* [[G]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: store i32 2, i32* [[SIVAR]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store i32* [[G]], i32** [[TMP11]], align 8 +// CHECK5-NEXT: store i32* [[G]], i32** [[TMP11]], align 8, !llvm.access.group !8 // CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK5-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !8 // CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK5-NEXT: store i32* [[SIVAR]], i32** [[TMP14]], align 8 -// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK5-NEXT: store i32* [[SIVAR]], i32** [[TMP14]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -3436,8 +3436,8 @@ int main() { // CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK6-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK6: arraydestroy.body: -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK6-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK6-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK6-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3530,23 +3530,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -3615,35 +3615,35 @@ int main() { // CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK6-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK6-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK6-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: store i32 1, i32* [[G]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: store i32 2, i32* [[SIVAR]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK6-NEXT: store i32* [[G]], i32** [[TMP11]], align 8 +// CHECK6-NEXT: store i32* [[G]], i32** [[TMP11]], align 8, !llvm.access.group !8 // CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK6-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !8 // CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK6-NEXT: store i32* [[SIVAR]], i32** [[TMP14]], align 8 -// CHECK6-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK6-NEXT: store i32* [[SIVAR]], i32** [[TMP14]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK6-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -3731,8 +3731,8 @@ int main() { // CHECK7-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4114,8 +4114,8 @@ int main() { // CHECK8-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4497,8 +4497,8 @@ int main() { // CHECK9-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4876,8 +4876,8 @@ int main() { // CHECK10-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -5255,8 +5255,8 @@ int main() { // CHECK11-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -5387,8 +5387,8 @@ int main() { // CHECK12-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK12-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK12: arraydestroy.body: -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK12-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK12-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK12-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -5521,25 +5521,25 @@ int main() { // CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -5639,40 +5639,40 @@ int main() { // CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 // CHECK13-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM3]] // CHECK13-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* // CHECK13-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false), !llvm.access.group !10 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK13-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4 +// CHECK13-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -5772,25 +5772,25 @@ int main() { // CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK13-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -5893,37 +5893,37 @@ int main() { // CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 // CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] // CHECK13-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* // CHECK13-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false) +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false), !llvm.access.group !18 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -6063,25 +6063,25 @@ int main() { // CHECK14-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK14: omp.inner.for.cond.cleanup: // CHECK14-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !6 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -6181,40 +6181,40 @@ int main() { // CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK14-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK14: omp.inner.for.cond.cleanup: // CHECK14-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 // CHECK14-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM3]] // CHECK14-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* // CHECK14-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK14-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK14-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false), !llvm.access.group !10 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK14-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4 +// CHECK14-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK14-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -6314,25 +6314,25 @@ int main() { // CHECK14-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK14-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK14: omp.inner.for.cond.cleanup: // CHECK14-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -6435,37 +6435,37 @@ int main() { // CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK14-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK14: omp.inner.for.cond.cleanup: // CHECK14-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 // CHECK14-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] // CHECK14-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* // CHECK14-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK14-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false) +// CHECK14-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false), !llvm.access.group !18 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -6605,23 +6605,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 // CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !7 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -6719,38 +6719,38 @@ int main() { // CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP12]] // CHECK15-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* // CHECK15-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false) -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false), !llvm.access.group !11 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK15-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4 +// CHECK15-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4, !llvm.access.group !11 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK15-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -6850,23 +6850,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 // CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !16 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -6967,35 +6967,35 @@ int main() { // CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP13]] // CHECK15-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* // CHECK15-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false) +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false), !llvm.access.group !19 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK15-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -7135,23 +7135,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 // CHECK16-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK16-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK16: omp.inner.for.cond.cleanup: // CHECK16-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !7 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -7249,38 +7249,38 @@ int main() { // CHECK16-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK16-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK16: omp.inner.for.cond.cleanup: // CHECK16-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK16-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP12]] // CHECK16-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* // CHECK16-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK16-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false) -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK16-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false), !llvm.access.group !11 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK16-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4 +// CHECK16-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4, !llvm.access.group !11 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK16-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -7380,23 +7380,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 // CHECK16-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK16-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK16: omp.inner.for.cond.cleanup: // CHECK16-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !16 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -7497,35 +7497,35 @@ int main() { // CHECK16-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK16-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK16: omp.inner.for.cond.cleanup: // CHECK16-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK16-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK16-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP13]] // CHECK16-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* // CHECK16-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK16-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false) +// CHECK16-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false), !llvm.access.group !19 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK16-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -7656,23 +7656,23 @@ int main() { // CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !5 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -7741,35 +7741,35 @@ int main() { // CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK17-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK17-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: store i32 1, i32* [[G]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: store i32 2, i32* [[SIVAR]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 -// CHECK17-NEXT: store i32* [[G]], i32** [[TMP11]], align 8 +// CHECK17-NEXT: store i32* [[G]], i32** [[TMP11]], align 8, !llvm.access.group !9 // CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK17-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !9 // CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 2 -// CHECK17-NEXT: store i32* [[SIVAR]], i32** [[TMP14]], align 8 -// CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon* nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]] +// CHECK17-NEXT: store i32* [[SIVAR]], i32** [[TMP14]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon* nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]], !llvm.access.group !9 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK17-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -7841,8 +7841,8 @@ int main() { // CHECK18-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK18-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK18: arraydestroy.body: -// CHECK18-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK18-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK18-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK18-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK18-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK18-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK18-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -8224,8 +8224,8 @@ int main() { // CHECK19-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK19-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK19: arraydestroy.body: -// CHECK19-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK19-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK19-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK19-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK19-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK19-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK19-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -8607,8 +8607,8 @@ int main() { // CHECK20-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK20-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK20: arraydestroy.body: -// CHECK20-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK20-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK20-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK20-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK20-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK20-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK20-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -8986,8 +8986,8 @@ int main() { // CHECK21-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK21-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK21: arraydestroy.body: -// CHECK21-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK21-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK21-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK21-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK21-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK21-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK21-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -9365,8 +9365,8 @@ int main() { // CHECK22-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK22-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK22: arraydestroy.body: -// CHECK22-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK22-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK22-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK22-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK22-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK22-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK22-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] diff --git a/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp index 11c3c307298c..eeb7d9e02b93 100644 --- a/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp @@ -198,8 +198,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -686,8 +686,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1174,8 +1174,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1658,8 +1658,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2142,8 +2142,8 @@ int main() { // CHECK9-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2368,8 +2368,8 @@ int main() { // CHECK10-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2514,4 +2514,3 @@ int main() { // CHECK10-NEXT: call void @__tgt_register_requires(i64 1) // CHECK10-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp index 138ff30b199a..b12b2e6aacdc 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp @@ -218,8 +218,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1022,8 +1022,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1826,8 +1826,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2614,8 +2614,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3380,8 +3380,8 @@ int main() { // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3706,8 +3706,8 @@ int main() { // CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK6-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK6: arraydestroy.body: -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK6-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK6-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK6-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4032,8 +4032,8 @@ int main() { // CHECK7-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4354,8 +4354,8 @@ int main() { // CHECK8-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4698,8 +4698,8 @@ int main() { // CHECK9-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4962,8 +4962,8 @@ int main() { // CHECK10-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -5204,8 +5204,8 @@ int main() { // CHECK11-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -5336,8 +5336,8 @@ int main() { // CHECK12-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK12-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK12: arraydestroy.body: -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK12-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK12-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK12-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] diff --git a/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp index 55c84fa004e9..50c3ea929399 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp @@ -198,8 +198,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -700,8 +700,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1202,8 +1202,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1700,8 +1700,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2176,8 +2176,8 @@ int main() { // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2559,8 +2559,8 @@ int main() { // CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK6-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK6: arraydestroy.body: -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK6-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK6-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK6-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2942,8 +2942,8 @@ int main() { // CHECK7-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3321,8 +3321,8 @@ int main() { // CHECK8-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3722,8 +3722,8 @@ int main() { // CHECK9-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3955,8 +3955,8 @@ int main() { // CHECK10-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4166,8 +4166,8 @@ int main() { // CHECK11-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4298,8 +4298,8 @@ int main() { // CHECK12-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK12-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK12: arraydestroy.body: -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK12-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK12-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK12-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] diff --git a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp index d428cf999b1e..b6f98f66b9f1 100644 --- a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp @@ -221,8 +221,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1013,8 +1013,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1805,8 +1805,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2581,8 +2581,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3357,8 +3357,8 @@ int main() { // CHECK9-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3614,8 +3614,8 @@ int main() { // CHECK10-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp index bf2459ced3e8..620526f3f0a0 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -255,8 +255,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1319,8 +1319,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2383,8 +2383,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3417,8 +3417,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4451,8 +4451,8 @@ int main() { // CHECK9-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4805,8 +4805,8 @@ int main() { // CHECK10-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp index 47548ae90c50..2816867938be 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp @@ -221,8 +221,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -914,8 +914,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1607,8 +1607,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2288,8 +2288,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2969,8 +2969,8 @@ int main() { // CHECK9-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3277,8 +3277,8 @@ int main() { // CHECK10-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3505,4 +3505,3 @@ int main() { // CHECK10-NEXT: call void @__tgt_register_requires(i64 1) // CHECK10-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp index b0a0e22909a8..3791bd06e172 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -258,8 +258,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1350,8 +1350,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2442,8 +2442,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3504,8 +3504,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4544,8 +4544,8 @@ int main() { // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4873,8 +4873,8 @@ int main() { // CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK6-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK6: arraydestroy.body: -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK6-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK6-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK6-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -5202,8 +5202,8 @@ int main() { // CHECK7-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -5527,8 +5527,8 @@ int main() { // CHECK8-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -5874,8 +5874,8 @@ int main() { // CHECK9-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -6242,8 +6242,8 @@ int main() { // CHECK10-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -6588,8 +6588,8 @@ int main() { // CHECK11-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -6720,8 +6720,8 @@ int main() { // CHECK12-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK12-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK12: arraydestroy.body: -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK12-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK12-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK12-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp index 70aab1a2a1b0..e40d75ffbf79 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp @@ -223,8 +223,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -334,25 +334,25 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -442,40 +442,40 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM3]] // CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* // CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false), !llvm.access.group !9 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -633,25 +633,25 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -744,37 +744,37 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] // CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* // CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false), !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -944,8 +944,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1055,25 +1055,25 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !5 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1163,40 +1163,40 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 // CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM3]] // CHECK2-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* // CHECK2-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false), !llvm.access.group !9 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK2-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4 +// CHECK2-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4, !llvm.access.group !9 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1354,25 +1354,25 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1465,37 +1465,37 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 // CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] // CHECK2-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* // CHECK2-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false) +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false), !llvm.access.group !17 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1665,8 +1665,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1776,23 +1776,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1880,38 +1880,38 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP12]] // CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* // CHECK3-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false), !llvm.access.group !10 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2069,23 +2069,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2176,35 +2176,35 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP13]] // CHECK3-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* // CHECK3-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false) +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false), !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2374,8 +2374,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2485,23 +2485,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !6 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2589,38 +2589,38 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP12]] // CHECK4-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* // CHECK4-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false) -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false), !llvm.access.group !10 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2778,23 +2778,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !15 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2885,35 +2885,35 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP13]] // CHECK4-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* // CHECK4-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false) +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false), !llvm.access.group !18 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK4-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3061,8 +3061,8 @@ int main() { // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3444,8 +3444,8 @@ int main() { // CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK6-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK6: arraydestroy.body: -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK6-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK6-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK6-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3827,8 +3827,8 @@ int main() { // CHECK7-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4206,8 +4206,8 @@ int main() { // CHECK8-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4607,8 +4607,8 @@ int main() { // CHECK9-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4706,23 +4706,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4791,35 +4791,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK9-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK9-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: store i32 1, i32* [[G]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: store i32 2, i32* [[SIVAR]], align 4, !llvm.access.group !8 // CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[G]], i32** [[TMP11]], align 8 +// CHECK9-NEXT: store i32* [[G]], i32** [[TMP11]], align 8, !llvm.access.group !8 // CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK9-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !8 // CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP14]], align 8 -// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP14]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4929,8 +4929,8 @@ int main() { // CHECK10-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -5028,23 +5028,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5113,35 +5113,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK10-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK10-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK10-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK10-NEXT: store i32 1, i32* [[G]], align 4, !llvm.access.group !8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !8 +// CHECK10-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 +// CHECK10-NEXT: store i32 2, i32* [[SIVAR]], align 4, !llvm.access.group !8 // CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK10-NEXT: store i32* [[G]], i32** [[TMP11]], align 8 +// CHECK10-NEXT: store i32* [[G]], i32** [[TMP11]], align 8, !llvm.access.group !8 // CHECK10-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK10-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !8 +// CHECK10-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !8 // CHECK10-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK10-NEXT: store i32* [[SIVAR]], i32** [[TMP14]], align 8 -// CHECK10-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK10-NEXT: store i32* [[SIVAR]], i32** [[TMP14]], align 8, !llvm.access.group !8 +// CHECK10-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK10-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK10-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5229,8 +5229,8 @@ int main() { // CHECK11-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -5361,8 +5361,8 @@ int main() { // CHECK12-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK12-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK12: arraydestroy.body: -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK12-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK12-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK12-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] diff --git a/clang/test/OpenMP/teams_distribute_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_private_codegen.cpp index 01f76e7bf2bf..e4ddc0d33be8 100644 --- a/clang/test/OpenMP/teams_distribute_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_private_codegen.cpp @@ -201,8 +201,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -689,8 +689,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1177,8 +1177,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1661,8 +1661,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2145,8 +2145,8 @@ int main() { // CHECK9-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2376,8 +2376,8 @@ int main() { // CHECK10-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2527,4 +2527,3 @@ int main() { // CHECK10-NEXT: call void @__tgt_register_requires(i64 1) // CHECK10-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp index e04322d0eaaf..2c0294bc0194 100644 --- a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp @@ -221,8 +221,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1027,8 +1027,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1833,8 +1833,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2623,8 +2623,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3391,8 +3391,8 @@ int main() { // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3719,8 +3719,8 @@ int main() { // CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK6-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK6: arraydestroy.body: -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK6-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK6-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK6-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4047,8 +4047,8 @@ int main() { // CHECK7-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4371,8 +4371,8 @@ int main() { // CHECK8-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4717,8 +4717,8 @@ int main() { // CHECK9-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4981,8 +4981,8 @@ int main() { // CHECK10-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -5223,8 +5223,8 @@ int main() { // CHECK11-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -5355,8 +5355,8 @@ int main() { // CHECK12-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK12-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK12: arraydestroy.body: -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK12-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK12-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK12-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] diff --git a/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp index 1af206060597..568dc705208f 100644 --- a/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp @@ -202,8 +202,8 @@ int main() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -313,40 +313,40 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM2]] // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[ARRAYIDX3]] to i8* // CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 4, i1 false), !llvm.access.group !5 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[SIVAR]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -504,37 +504,37 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] // CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* // CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false), !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -704,8 +704,8 @@ int main() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -815,40 +815,40 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 // CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM2]] // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[ARRAYIDX3]] to i8* // CHECK2-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 4, i1 false) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 4, i1 false), !llvm.access.group !5 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[SIVAR]], align 4 +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[SIVAR]], align 4, !llvm.access.group !5 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK2-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1006,37 +1006,37 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] // CHECK2-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* // CHECK2-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false) +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false), !llvm.access.group !11 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1206,8 +1206,8 @@ int main() { // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1317,38 +1317,38 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP10]] // CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* // CHECK3-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 4, i1 false), !llvm.access.group !6 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1506,35 +1506,35 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* // CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false) +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false), !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1704,8 +1704,8 @@ int main() { // CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK4: arraydestroy.body: -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK4-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1815,38 +1815,38 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP9]] -// CHECK4-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP10]] // CHECK4-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* // CHECK4-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 4, i1 false) -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 4, i1 false), !llvm.access.group !6 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4, !llvm.access.group !6 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2004,35 +2004,35 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP9]] -// CHECK4-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP11]] // CHECK4-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* // CHECK4-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false) +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false), !llvm.access.group !12 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK4-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2180,8 +2180,8 @@ int main() { // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2563,8 +2563,8 @@ int main() { // CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK6-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK6: arraydestroy.body: -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK6-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK6-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK6-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK6-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2946,8 +2946,8 @@ int main() { // CHECK7-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3325,8 +3325,8 @@ int main() { // CHECK8-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3726,8 +3726,8 @@ int main() { // CHECK9-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3826,35 +3826,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4 -// CHECK9-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: store i32 1, i32* [[G]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP2]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: store i32 2, i32* [[SIVAR]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[G]], i32** [[TMP9]], align 8 +// CHECK9-NEXT: store i32* [[G]], i32** [[TMP9]], align 8, !llvm.access.group !4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK9-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP2]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8, !llvm.access.group !4 // CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP12]], align 8 -// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP12]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3964,8 +3964,8 @@ int main() { // CHECK10-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4064,35 +4064,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK10-NEXT: store volatile i32 1, i32* [[TMP8]], align 4 -// CHECK10-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: store i32 1, i32* [[G]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP2]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: store volatile i32 1, i32* [[TMP8]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: store i32 2, i32* [[SIVAR]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK10-NEXT: store i32* [[G]], i32** [[TMP9]], align 8 +// CHECK10-NEXT: store i32* [[G]], i32** [[TMP9]], align 8, !llvm.access.group !4 // CHECK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK10-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP2]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8, !llvm.access.group !4 // CHECK10-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK10-NEXT: store i32* [[SIVAR]], i32** [[TMP12]], align 8 -// CHECK10-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK10-NEXT: store i32* [[SIVAR]], i32** [[TMP12]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !4 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -4180,8 +4180,8 @@ int main() { // CHECK11-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4312,8 +4312,8 @@ int main() { // CHECK12-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK12-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK12: arraydestroy.body: -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK12-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK12-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK12-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @s_arr, i32 0, i32 0) // CHECK12-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] diff --git a/clang/test/OpenMP/threadprivate_codegen.cpp b/clang/test/OpenMP/threadprivate_codegen.cpp index f1ccb80cb3ab..000ed6e2cb43 100644 --- a/clang/test/OpenMP/threadprivate_codegen.cpp +++ b/clang/test/OpenMP/threadprivate_codegen.cpp @@ -1394,8 +1394,8 @@ int foobar() { // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 1, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1414,7 +1414,7 @@ int foobar() { // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ4mainE2sm to i8*) acquire, align 8 // CHECK1-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP1]], 0 -// CHECK1-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2:![0-9]+]] +// CHECK1-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3:![0-9]+]] // CHECK1: init.check: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ4mainE2sm) #[[ATTR3]] // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 @@ -1940,8 +1940,8 @@ int foobar() { // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 1, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] // CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0) // CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2093,7 +2093,7 @@ int foobar() { // CHECK2-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ4mainE2sm to i8*) acquire, align 8 // CHECK2-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP1]], 0 -// CHECK2-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2:![0-9]+]] +// CHECK2-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3:![0-9]+]] // CHECK2: init.check: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ4mainE2sm) #[[ATTR3]] // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 @@ -2638,8 +2638,8 @@ int foobar() { // SIMD1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // SIMD1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // SIMD1: arraydestroy.body: -// SIMD1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// SIMD1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// SIMD1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 1, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// SIMD1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // SIMD1-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] // SIMD1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0) // SIMD1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2937,485 +2937,485 @@ int foobar() { // SIMD2-LABEL: define {{[^@]+}}@__cxx_global_var_init // SIMD2-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG115:![0-9]+]] { // SIMD2-NEXT: entry: -// SIMD2-NEXT: call void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) @_ZL3gs1, i32 5), !dbg [[DBG119:![0-9]+]] -// SIMD2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR3:[0-9]+]], !dbg [[DBG121:![0-9]+]] -// SIMD2-NEXT: ret void, !dbg [[DBG122:![0-9]+]] +// SIMD2-NEXT: call void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) @_ZL3gs1, i32 5), !dbg [[DBG118:![0-9]+]] +// SIMD2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR3:[0-9]+]], !dbg [[DBG120:![0-9]+]] +// SIMD2-NEXT: ret void, !dbg [[DBG121:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@_ZN2S1C1Ei -// SIMD2-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 !dbg [[DBG123:![0-9]+]] { +// SIMD2-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 !dbg [[DBG122:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // SIMD2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD2-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META124:![0-9]+]], metadata !DIExpression()), !dbg [[DBG126:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125:![0-9]+]] // SIMD2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META127:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]] // SIMD2-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG129:![0-9]+]] -// SIMD2-NEXT: call void @_ZN2S1C2Ei(%struct.S1* nonnull align 4 dereferenceable(4) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG129]] -// SIMD2-NEXT: ret void, !dbg [[DBG130:![0-9]+]] +// SIMD2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG128:![0-9]+]] +// SIMD2-NEXT: call void @_ZN2S1C2Ei(%struct.S1* nonnull align 4 dereferenceable(4) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG128]] +// SIMD2-NEXT: ret void, !dbg [[DBG129:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@_ZN2S1D1Ev -// SIMD2-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 !dbg [[DBG131:![0-9]+]] { +// SIMD2-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 !dbg [[DBG130:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // SIMD2-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132:![0-9]+]] // SIMD2-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @_ZN2S1D2Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG134:![0-9]+]] -// SIMD2-NEXT: ret void, !dbg [[DBG135:![0-9]+]] +// SIMD2-NEXT: call void @_ZN2S1D2Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG133:![0-9]+]] +// SIMD2-NEXT: ret void, !dbg [[DBG134:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 -// SIMD2-SAME: () #[[ATTR0]] !dbg [[DBG136:![0-9]+]] { +// SIMD2-SAME: () #[[ATTR0]] !dbg [[DBG135:![0-9]+]] { // SIMD2-NEXT: entry: -// SIMD2-NEXT: call void @_ZN2S2C1Ei(%struct.S2* nonnull align 8 dereferenceable(16) @_ZL3gs2, i32 27), !dbg [[DBG137:![0-9]+]] -// SIMD2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG139:![0-9]+]] -// SIMD2-NEXT: ret void, !dbg [[DBG140:![0-9]+]] +// SIMD2-NEXT: call void @_ZN2S2C1Ei(%struct.S2* nonnull align 8 dereferenceable(16) @_ZL3gs2, i32 27), !dbg [[DBG136:![0-9]+]] +// SIMD2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG138:![0-9]+]] +// SIMD2-NEXT: ret void, !dbg [[DBG139:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@_ZN2S2C1Ei -// SIMD2-SAME: (%struct.S2* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG141:![0-9]+]] { +// SIMD2-SAME: (%struct.S2* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG140:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8 // SIMD2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD2-NEXT: store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG144:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META141:![0-9]+]], metadata !DIExpression()), !dbg [[DBG143:![0-9]+]] // SIMD2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG146:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG145:![0-9]+]] // SIMD2-NEXT: [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG147:![0-9]+]] -// SIMD2-NEXT: call void @_ZN2S2C2Ei(%struct.S2* nonnull align 8 dereferenceable(16) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG147]] -// SIMD2-NEXT: ret void, !dbg [[DBG148:![0-9]+]] +// SIMD2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG146:![0-9]+]] +// SIMD2-NEXT: call void @_ZN2S2C2Ei(%struct.S2* nonnull align 8 dereferenceable(16) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG146]] +// SIMD2-NEXT: ret void, !dbg [[DBG147:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@_ZN2S2D1Ev -// SIMD2-SAME: (%struct.S2* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG149:![0-9]+]] { +// SIMD2-SAME: (%struct.S2* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG148:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8 // SIMD2-NEXT: store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META150:![0-9]+]], metadata !DIExpression()), !dbg [[DBG151:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META149:![0-9]+]], metadata !DIExpression()), !dbg [[DBG150:![0-9]+]] // SIMD2-NEXT: [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @_ZN2S2D2Ev(%struct.S2* nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR3]], !dbg [[DBG152:![0-9]+]] -// SIMD2-NEXT: ret void, !dbg [[DBG153:![0-9]+]] +// SIMD2-NEXT: call void @_ZN2S2D2Ev(%struct.S2* nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR3]], !dbg [[DBG151:![0-9]+]] +// SIMD2-NEXT: ret void, !dbg [[DBG152:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 -// SIMD2-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG154:![0-9]+]] { +// SIMD2-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG153:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8 // SIMD2-NEXT: [[ARRAYINIT_ENDOFINIT1:%.*]] = alloca %struct.S1*, align 8 // SIMD2-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8 // SIMD2-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4 // SIMD2-NEXT: [[ARRAYINIT_ENDOFINIT5:%.*]] = alloca %struct.S1*, align 8 -// SIMD2-NEXT: store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG155:![0-9]+]] -// SIMD2-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG157:![0-9]+]] +// SIMD2-NEXT: store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG154:![0-9]+]] +// SIMD2-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG156:![0-9]+]] // SIMD2-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), i32 1) -// SIMD2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG158:![0-9]+]] +// SIMD2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG157:![0-9]+]] // SIMD2: invoke.cont: -// SIMD2-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG157]] +// SIMD2-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG156]] // SIMD2-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), i32 2) -// SIMD2-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]], !dbg [[DBG159:![0-9]+]] +// SIMD2-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]], !dbg [[DBG158:![0-9]+]] // SIMD2: invoke.cont2: -// SIMD2-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG157]] +// SIMD2-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG156]] // SIMD2-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), i32 3) -// SIMD2-NEXT: to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]], !dbg [[DBG160:![0-9]+]] +// SIMD2-NEXT: to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]], !dbg [[DBG159:![0-9]+]] // SIMD2: invoke.cont3: -// SIMD2-NEXT: store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG155]] -// SIMD2-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG161:![0-9]+]] +// SIMD2-NEXT: store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG154]] +// SIMD2-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG160:![0-9]+]] // SIMD2-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), i32 4) -// SIMD2-NEXT: to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD6:%.*]], !dbg [[DBG162:![0-9]+]] +// SIMD2-NEXT: to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD6:%.*]], !dbg [[DBG161:![0-9]+]] // SIMD2: invoke.cont7: -// SIMD2-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG161]] +// SIMD2-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG160]] // SIMD2-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), i32 5) -// SIMD2-NEXT: to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD6]], !dbg [[DBG163:![0-9]+]] +// SIMD2-NEXT: to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD6]], !dbg [[DBG162:![0-9]+]] // SIMD2: invoke.cont8: -// SIMD2-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG161]] +// SIMD2-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG160]] // SIMD2-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), i32 6) -// SIMD2-NEXT: to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]], !dbg [[DBG164:![0-9]+]] +// SIMD2-NEXT: to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]], !dbg [[DBG163:![0-9]+]] // SIMD2: invoke.cont9: -// SIMD2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG165:![0-9]+]] -// SIMD2-NEXT: ret void, !dbg [[DBG165]] +// SIMD2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG164:![0-9]+]] +// SIMD2-NEXT: ret void, !dbg [[DBG164]] // SIMD2: lpad: // SIMD2-NEXT: [[TMP1:%.*]] = landingpad { i8*, i32 } -// SIMD2-NEXT: cleanup, !dbg [[DBG166:![0-9]+]] -// SIMD2-NEXT: [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0, !dbg [[DBG166]] -// SIMD2-NEXT: store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG166]] -// SIMD2-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1, !dbg [[DBG166]] -// SIMD2-NEXT: store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG166]] -// SIMD2-NEXT: [[TMP4:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG157]] -// SIMD2-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[TMP4]], !dbg [[DBG157]] -// SIMD2-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG157]] +// SIMD2-NEXT: cleanup, !dbg [[DBG165:![0-9]+]] +// SIMD2-NEXT: [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0, !dbg [[DBG165]] +// SIMD2-NEXT: store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG165]] +// SIMD2-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1, !dbg [[DBG165]] +// SIMD2-NEXT: store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG165]] +// SIMD2-NEXT: [[TMP4:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG156]] +// SIMD2-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[TMP4]], !dbg [[DBG156]] +// SIMD2-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG156]] // SIMD2: arraydestroy.body: -// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG157]] -// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG157]] -// SIMD2-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG157]] -// SIMD2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG157]] -// SIMD2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG157]] +// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG156]] +// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG156]] +// SIMD2-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG156]] +// SIMD2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG156]] +// SIMD2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG156]] // SIMD2: arraydestroy.done4: -// SIMD2-NEXT: br label [[EHCLEANUP:%.*]], !dbg [[DBG157]] +// SIMD2-NEXT: br label [[EHCLEANUP:%.*]], !dbg [[DBG156]] // SIMD2: lpad6: // SIMD2-NEXT: [[TMP5:%.*]] = landingpad { i8*, i32 } -// SIMD2-NEXT: cleanup, !dbg [[DBG166]] -// SIMD2-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG166]] -// SIMD2-NEXT: store i8* [[TMP6]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG166]] -// SIMD2-NEXT: [[TMP7:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 1, !dbg [[DBG166]] -// SIMD2-NEXT: store i32 [[TMP7]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG166]] -// SIMD2-NEXT: [[TMP8:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG161]] -// SIMD2-NEXT: [[ARRAYDESTROY_ISEMPTY10:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), [[TMP8]], !dbg [[DBG161]] -// SIMD2-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY10]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY11:%.*]], !dbg [[DBG161]] +// SIMD2-NEXT: cleanup, !dbg [[DBG165]] +// SIMD2-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG165]] +// SIMD2-NEXT: store i8* [[TMP6]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG165]] +// SIMD2-NEXT: [[TMP7:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 1, !dbg [[DBG165]] +// SIMD2-NEXT: store i32 [[TMP7]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG165]] +// SIMD2-NEXT: [[TMP8:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG160]] +// SIMD2-NEXT: [[ARRAYDESTROY_ISEMPTY10:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), [[TMP8]], !dbg [[DBG160]] +// SIMD2-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY10]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY11:%.*]], !dbg [[DBG160]] // SIMD2: arraydestroy.body11: -// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi %struct.S1* [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ], !dbg [[DBG161]] -// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1, !dbg [[DBG161]] -// SIMD2-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR3]], !dbg [[DBG161]] -// SIMD2-NEXT: [[ARRAYDESTROY_DONE14:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), !dbg [[DBG161]] -// SIMD2-NEXT: br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]], !dbg [[DBG161]] +// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi %struct.S1* [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ], !dbg [[DBG160]] +// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1, !dbg [[DBG160]] +// SIMD2-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR3]], !dbg [[DBG160]] +// SIMD2-NEXT: [[ARRAYDESTROY_DONE14:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), !dbg [[DBG160]] +// SIMD2-NEXT: br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]], !dbg [[DBG160]] // SIMD2: arraydestroy.done15: -// SIMD2-NEXT: br label [[EHCLEANUP]], !dbg [[DBG161]] +// SIMD2-NEXT: br label [[EHCLEANUP]], !dbg [[DBG160]] // SIMD2: ehcleanup: -// SIMD2-NEXT: [[TMP9:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG155]] -// SIMD2-NEXT: [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP9]], i64 0, i64 0, !dbg [[DBG155]] -// SIMD2-NEXT: [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[PAD_ARRAYEND]], !dbg [[DBG155]] -// SIMD2-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]], !dbg [[DBG155]] +// SIMD2-NEXT: [[TMP9:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG154]] +// SIMD2-NEXT: [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP9]], i64 0, i64 0, !dbg [[DBG154]] +// SIMD2-NEXT: [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[PAD_ARRAYEND]], !dbg [[DBG154]] +// SIMD2-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]], !dbg [[DBG154]] // SIMD2: arraydestroy.body17: -// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG155]] -// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG155]] -// SIMD2-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR3]], !dbg [[DBG155]] -// SIMD2-NEXT: [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG155]] -// SIMD2-NEXT: br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG155]] +// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG154]] +// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG154]] +// SIMD2-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR3]], !dbg [[DBG154]] +// SIMD2-NEXT: [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG154]] +// SIMD2-NEXT: br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG154]] // SIMD2: arraydestroy.done21: -// SIMD2-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG155]] +// SIMD2-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG154]] // SIMD2: eh.resume: -// SIMD2-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG155]] -// SIMD2-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG155]] -// SIMD2-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG155]] -// SIMD2-NEXT: [[LPAD_VAL22:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG155]] -// SIMD2-NEXT: resume { i8*, i32 } [[LPAD_VAL22]], !dbg [[DBG155]] +// SIMD2-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG154]] +// SIMD2-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG154]] +// SIMD2-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG154]] +// SIMD2-NEXT: [[LPAD_VAL22:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG154]] +// SIMD2-NEXT: resume { i8*, i32 } [[LPAD_VAL22]], !dbg [[DBG154]] // // // SIMD2-LABEL: define {{[^@]+}}@__cxx_global_array_dtor -// SIMD2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG167:![0-9]+]] { +// SIMD2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG166:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // SIMD2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172:![0-9]+]] -// SIMD2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG172]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG171:![0-9]+]] +// SIMD2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG171]] // SIMD2: arraydestroy.body: -// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG172]] -// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG172]] -// SIMD2-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG172]] -// SIMD2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), !dbg [[DBG172]] -// SIMD2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG172]] +// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 1, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG171]] +// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG171]] +// SIMD2-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG171]] +// SIMD2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), !dbg [[DBG171]] +// SIMD2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG171]] // SIMD2: arraydestroy.done1: -// SIMD2-NEXT: ret void, !dbg [[DBG172]] +// SIMD2-NEXT: ret void, !dbg [[DBG171]] // // // SIMD2-LABEL: define {{[^@]+}}@main -// SIMD2-SAME: () #[[ATTR5:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG53:![0-9]+]] { +// SIMD2-SAME: () #[[ATTR5:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG52:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // SIMD2-NEXT: [[RES:%.*]] = alloca i32, align 4 // SIMD2-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8 // SIMD2-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4 // SIMD2-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META173:![0-9]+]], metadata !DIExpression()), !dbg [[DBG174:![0-9]+]] -// SIMD2-NEXT: [[TMP0:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ4mainE2sm to i8*) acquire, align 8, !dbg [[DBG175:![0-9]+]] -// SIMD2-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG175]] -// SIMD2-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG175]], !prof [[PROF176:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173:![0-9]+]] +// SIMD2-NEXT: [[TMP0:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ4mainE2sm to i8*) acquire, align 8, !dbg [[DBG174:![0-9]+]] +// SIMD2-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG174]] +// SIMD2-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG174]], !prof [[PROF175:![0-9]+]] // SIMD2: init.check: -// SIMD2-NEXT: [[TMP1:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG175]] -// SIMD2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0, !dbg [[DBG175]] -// SIMD2-NEXT: br i1 [[TOBOOL]], label [[INIT:%.*]], label [[INIT_END]], !dbg [[DBG175]] +// SIMD2-NEXT: [[TMP1:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG174]] +// SIMD2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0, !dbg [[DBG174]] +// SIMD2-NEXT: br i1 [[TOBOOL]], label [[INIT:%.*]], label [[INIT_END]], !dbg [[DBG174]] // SIMD2: init: -// SIMD2-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4, !dbg [[DBG177:![0-9]+]] +// SIMD2-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4, !dbg [[DBG176:![0-9]+]] // SIMD2-NEXT: invoke void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull align 8 dereferenceable(24) @_ZZ4mainE2sm, i32 [[TMP2]]) -// SIMD2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG178:![0-9]+]] +// SIMD2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG177:![0-9]+]] // SIMD2: invoke.cont: -// SIMD2-NEXT: [[TMP3:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.Smain*)* @_ZZ4mainEN5SmainD1Ev to void (i8*)*), i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG175]] -// SIMD2-NEXT: call void @__cxa_guard_release(i64* @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG175]] -// SIMD2-NEXT: br label [[INIT_END]], !dbg [[DBG175]] +// SIMD2-NEXT: [[TMP3:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.Smain*)* @_ZZ4mainEN5SmainD1Ev to void (i8*)*), i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG174]] +// SIMD2-NEXT: call void @__cxa_guard_release(i64* @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG174]] +// SIMD2-NEXT: br label [[INIT_END]], !dbg [[DBG174]] // SIMD2: init.end: -// SIMD2-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S3:%.*]], %struct.S3* @_ZN6Static1sE, i32 0, i32 0), align 4, !dbg [[DBG179:![0-9]+]] -// SIMD2-NEXT: store i32 [[TMP4]], i32* [[RES]], align 4, !dbg [[DBG180:![0-9]+]] -// SIMD2-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_SMAIN:%.*]], %struct.Smain* @_ZZ4mainE2sm, i32 0, i32 0), align 8, !dbg [[DBG181:![0-9]+]] -// SIMD2-NEXT: [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG182:![0-9]+]] -// SIMD2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG182]] -// SIMD2-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG182]] -// SIMD2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4, !dbg [[DBG183:![0-9]+]] -// SIMD2-NEXT: [[TMP8:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG184:![0-9]+]] -// SIMD2-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP7]], !dbg [[DBG184]] -// SIMD2-NEXT: store i32 [[ADD1]], i32* [[RES]], align 4, !dbg [[DBG184]] -// SIMD2-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG185:![0-9]+]] -// SIMD2-NEXT: [[TMP10:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG186:![0-9]+]] -// SIMD2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP9]], !dbg [[DBG186]] -// SIMD2-NEXT: store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG186]] -// SIMD2-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S5:%.*]], %struct.S5* @gs3, i32 0, i32 0), align 4, !dbg [[DBG187:![0-9]+]] -// SIMD2-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG188:![0-9]+]] -// SIMD2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG188]] -// SIMD2-NEXT: store i32 [[ADD3]], i32* [[RES]], align 4, !dbg [[DBG188]] -// SIMD2-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1, i32 0), align 4, !dbg [[DBG189:![0-9]+]] -// SIMD2-NEXT: [[TMP14:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG190:![0-9]+]] -// SIMD2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP13]], !dbg [[DBG190]] -// SIMD2-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG190]] -// SIMD2-NEXT: [[TMP15:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4, !dbg [[DBG191:![0-9]+]] -// SIMD2-NEXT: [[TMP16:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG192:![0-9]+]] -// SIMD2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]], !dbg [[DBG192]] -// SIMD2-NEXT: store i32 [[ADD5]], i32* [[RES]], align 4, !dbg [[DBG192]] -// SIMD2-NEXT: [[TMP17:%.*]] = load float, float* @_ZN2STIfE2stE, align 4, !dbg [[DBG193:![0-9]+]] -// SIMD2-NEXT: [[CONV:%.*]] = fptosi float [[TMP17]] to i32, !dbg [[DBG193]] -// SIMD2-NEXT: [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG194:![0-9]+]] -// SIMD2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[CONV]], !dbg [[DBG194]] -// SIMD2-NEXT: store i32 [[ADD6]], i32* [[RES]], align 4, !dbg [[DBG194]] -// SIMD2-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S4:%.*]], %struct.S4* @_ZN2STI2S4E2stE, i32 0, i32 0), align 4, !dbg [[DBG195:![0-9]+]] -// SIMD2-NEXT: [[TMP20:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG196:![0-9]+]] -// SIMD2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], [[TMP19]], !dbg [[DBG196]] -// SIMD2-NEXT: store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG196]] -// SIMD2-NEXT: [[TMP21:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG197:![0-9]+]] -// SIMD2-NEXT: ret i32 [[TMP21]], !dbg [[DBG198:![0-9]+]] +// SIMD2-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S3:%.*]], %struct.S3* @_ZN6Static1sE, i32 0, i32 0), align 4, !dbg [[DBG178:![0-9]+]] +// SIMD2-NEXT: store i32 [[TMP4]], i32* [[RES]], align 4, !dbg [[DBG179:![0-9]+]] +// SIMD2-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_SMAIN:%.*]], %struct.Smain* @_ZZ4mainE2sm, i32 0, i32 0), align 8, !dbg [[DBG180:![0-9]+]] +// SIMD2-NEXT: [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG181:![0-9]+]] +// SIMD2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG181]] +// SIMD2-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG181]] +// SIMD2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4, !dbg [[DBG182:![0-9]+]] +// SIMD2-NEXT: [[TMP8:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG183:![0-9]+]] +// SIMD2-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP7]], !dbg [[DBG183]] +// SIMD2-NEXT: store i32 [[ADD1]], i32* [[RES]], align 4, !dbg [[DBG183]] +// SIMD2-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG184:![0-9]+]] +// SIMD2-NEXT: [[TMP10:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG185:![0-9]+]] +// SIMD2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP9]], !dbg [[DBG185]] +// SIMD2-NEXT: store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG185]] +// SIMD2-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S5:%.*]], %struct.S5* @gs3, i32 0, i32 0), align 4, !dbg [[DBG186:![0-9]+]] +// SIMD2-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG187:![0-9]+]] +// SIMD2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG187]] +// SIMD2-NEXT: store i32 [[ADD3]], i32* [[RES]], align 4, !dbg [[DBG187]] +// SIMD2-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1, i32 0), align 4, !dbg [[DBG188:![0-9]+]] +// SIMD2-NEXT: [[TMP14:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG189:![0-9]+]] +// SIMD2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP13]], !dbg [[DBG189]] +// SIMD2-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG189]] +// SIMD2-NEXT: [[TMP15:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4, !dbg [[DBG190:![0-9]+]] +// SIMD2-NEXT: [[TMP16:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG191:![0-9]+]] +// SIMD2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]], !dbg [[DBG191]] +// SIMD2-NEXT: store i32 [[ADD5]], i32* [[RES]], align 4, !dbg [[DBG191]] +// SIMD2-NEXT: [[TMP17:%.*]] = load float, float* @_ZN2STIfE2stE, align 4, !dbg [[DBG192:![0-9]+]] +// SIMD2-NEXT: [[CONV:%.*]] = fptosi float [[TMP17]] to i32, !dbg [[DBG192]] +// SIMD2-NEXT: [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG193:![0-9]+]] +// SIMD2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[CONV]], !dbg [[DBG193]] +// SIMD2-NEXT: store i32 [[ADD6]], i32* [[RES]], align 4, !dbg [[DBG193]] +// SIMD2-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S4:%.*]], %struct.S4* @_ZN2STI2S4E2stE, i32 0, i32 0), align 4, !dbg [[DBG194:![0-9]+]] +// SIMD2-NEXT: [[TMP20:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG195:![0-9]+]] +// SIMD2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], [[TMP19]], !dbg [[DBG195]] +// SIMD2-NEXT: store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG195]] +// SIMD2-NEXT: [[TMP21:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG196:![0-9]+]] +// SIMD2-NEXT: ret i32 [[TMP21]], !dbg [[DBG197:![0-9]+]] // SIMD2: lpad: // SIMD2-NEXT: [[TMP22:%.*]] = landingpad { i8*, i32 } -// SIMD2-NEXT: cleanup, !dbg [[DBG199:![0-9]+]] -// SIMD2-NEXT: [[TMP23:%.*]] = extractvalue { i8*, i32 } [[TMP22]], 0, !dbg [[DBG199]] -// SIMD2-NEXT: store i8* [[TMP23]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG199]] -// SIMD2-NEXT: [[TMP24:%.*]] = extractvalue { i8*, i32 } [[TMP22]], 1, !dbg [[DBG199]] -// SIMD2-NEXT: store i32 [[TMP24]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG199]] -// SIMD2-NEXT: call void @__cxa_guard_abort(i64* @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG175]] -// SIMD2-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG175]] +// SIMD2-NEXT: cleanup, !dbg [[DBG198:![0-9]+]] +// SIMD2-NEXT: [[TMP23:%.*]] = extractvalue { i8*, i32 } [[TMP22]], 0, !dbg [[DBG198]] +// SIMD2-NEXT: store i8* [[TMP23]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG198]] +// SIMD2-NEXT: [[TMP24:%.*]] = extractvalue { i8*, i32 } [[TMP22]], 1, !dbg [[DBG198]] +// SIMD2-NEXT: store i32 [[TMP24]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG198]] +// SIMD2-NEXT: call void @__cxa_guard_abort(i64* @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG174]] +// SIMD2-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG174]] // SIMD2: eh.resume: -// SIMD2-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG175]] -// SIMD2-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG175]] -// SIMD2-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG175]] -// SIMD2-NEXT: [[LPAD_VAL8:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG175]] -// SIMD2-NEXT: resume { i8*, i32 } [[LPAD_VAL8]], !dbg [[DBG175]] +// SIMD2-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG174]] +// SIMD2-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG174]] +// SIMD2-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG174]] +// SIMD2-NEXT: [[LPAD_VAL8:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG174]] +// SIMD2-NEXT: resume { i8*, i32 } [[LPAD_VAL8]], !dbg [[DBG174]] // // // SIMD2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei -// SIMD2-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 !dbg [[DBG200:![0-9]+]] { +// SIMD2-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 !dbg [[DBG199:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8 // SIMD2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD2-NEXT: store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META201:![0-9]+]], metadata !DIExpression()), !dbg [[DBG203:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG202:![0-9]+]] // SIMD2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META204:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META203:![0-9]+]], metadata !DIExpression()), !dbg [[DBG204:![0-9]+]] // SIMD2-NEXT: [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG206:![0-9]+]] -// SIMD2-NEXT: call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG206]] -// SIMD2-NEXT: ret void, !dbg [[DBG207:![0-9]+]] +// SIMD2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG205:![0-9]+]] +// SIMD2-NEXT: call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG205]] +// SIMD2-NEXT: ret void, !dbg [[DBG206:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev -// SIMD2-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG208:![0-9]+]] { +// SIMD2-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG207:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8 // SIMD2-NEXT: store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META209:![0-9]+]], metadata !DIExpression()), !dbg [[DBG210:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG209:![0-9]+]] // SIMD2-NEXT: [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR3]], !dbg [[DBG211:![0-9]+]] -// SIMD2-NEXT: ret void, !dbg [[DBG212:![0-9]+]] +// SIMD2-NEXT: call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR3]], !dbg [[DBG210:![0-9]+]] +// SIMD2-NEXT: ret void, !dbg [[DBG211:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@_Z6foobarv -// SIMD2-SAME: () #[[ATTR6:[0-9]+]] !dbg [[DBG213:![0-9]+]] { +// SIMD2-SAME: () #[[ATTR6:[0-9]+]] !dbg [[DBG212:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[RES:%.*]] = alloca i32, align 4 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META214:![0-9]+]], metadata !DIExpression()), !dbg [[DBG215:![0-9]+]] -// SIMD2-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S3:%.*]], %struct.S3* @_ZN6Static1sE, i32 0, i32 0), align 4, !dbg [[DBG216:![0-9]+]] -// SIMD2-NEXT: store i32 [[TMP0]], i32* [[RES]], align 4, !dbg [[DBG217:![0-9]+]] -// SIMD2-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4, !dbg [[DBG218:![0-9]+]] -// SIMD2-NEXT: [[TMP2:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG219:![0-9]+]] -// SIMD2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP1]], !dbg [[DBG219]] -// SIMD2-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG219]] -// SIMD2-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG220:![0-9]+]] -// SIMD2-NEXT: [[TMP4:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG221:![0-9]+]] -// SIMD2-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], [[TMP3]], !dbg [[DBG221]] -// SIMD2-NEXT: store i32 [[ADD1]], i32* [[RES]], align 4, !dbg [[DBG221]] -// SIMD2-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S5:%.*]], %struct.S5* @gs3, i32 0, i32 0), align 4, !dbg [[DBG222:![0-9]+]] -// SIMD2-NEXT: [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG223:![0-9]+]] -// SIMD2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG223]] -// SIMD2-NEXT: store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG223]] -// SIMD2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1, i32 0), align 4, !dbg [[DBG224:![0-9]+]] -// SIMD2-NEXT: [[TMP8:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG225:![0-9]+]] -// SIMD2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP7]], !dbg [[DBG225]] -// SIMD2-NEXT: store i32 [[ADD3]], i32* [[RES]], align 4, !dbg [[DBG225]] -// SIMD2-NEXT: [[TMP9:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4, !dbg [[DBG226:![0-9]+]] -// SIMD2-NEXT: [[TMP10:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG227:![0-9]+]] -// SIMD2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], [[TMP9]], !dbg [[DBG227]] -// SIMD2-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG227]] -// SIMD2-NEXT: [[TMP11:%.*]] = load float, float* @_ZN2STIfE2stE, align 4, !dbg [[DBG228:![0-9]+]] -// SIMD2-NEXT: [[CONV:%.*]] = fptosi float [[TMP11]] to i32, !dbg [[DBG228]] -// SIMD2-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG229:![0-9]+]] -// SIMD2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], [[CONV]], !dbg [[DBG229]] -// SIMD2-NEXT: store i32 [[ADD5]], i32* [[RES]], align 4, !dbg [[DBG229]] -// SIMD2-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S4:%.*]], %struct.S4* @_ZN2STI2S4E2stE, i32 0, i32 0), align 4, !dbg [[DBG230:![0-9]+]] -// SIMD2-NEXT: [[TMP14:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG231:![0-9]+]] -// SIMD2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], [[TMP13]], !dbg [[DBG231]] -// SIMD2-NEXT: store i32 [[ADD6]], i32* [[RES]], align 4, !dbg [[DBG231]] -// SIMD2-NEXT: [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG232:![0-9]+]] -// SIMD2-NEXT: ret i32 [[TMP15]], !dbg [[DBG233:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META213:![0-9]+]], metadata !DIExpression()), !dbg [[DBG214:![0-9]+]] +// SIMD2-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S3:%.*]], %struct.S3* @_ZN6Static1sE, i32 0, i32 0), align 4, !dbg [[DBG215:![0-9]+]] +// SIMD2-NEXT: store i32 [[TMP0]], i32* [[RES]], align 4, !dbg [[DBG216:![0-9]+]] +// SIMD2-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4, !dbg [[DBG217:![0-9]+]] +// SIMD2-NEXT: [[TMP2:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG218:![0-9]+]] +// SIMD2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP1]], !dbg [[DBG218]] +// SIMD2-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG218]] +// SIMD2-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG219:![0-9]+]] +// SIMD2-NEXT: [[TMP4:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG220:![0-9]+]] +// SIMD2-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], [[TMP3]], !dbg [[DBG220]] +// SIMD2-NEXT: store i32 [[ADD1]], i32* [[RES]], align 4, !dbg [[DBG220]] +// SIMD2-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S5:%.*]], %struct.S5* @gs3, i32 0, i32 0), align 4, !dbg [[DBG221:![0-9]+]] +// SIMD2-NEXT: [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG222:![0-9]+]] +// SIMD2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG222]] +// SIMD2-NEXT: store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG222]] +// SIMD2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1, i32 0), align 4, !dbg [[DBG223:![0-9]+]] +// SIMD2-NEXT: [[TMP8:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG224:![0-9]+]] +// SIMD2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP7]], !dbg [[DBG224]] +// SIMD2-NEXT: store i32 [[ADD3]], i32* [[RES]], align 4, !dbg [[DBG224]] +// SIMD2-NEXT: [[TMP9:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4, !dbg [[DBG225:![0-9]+]] +// SIMD2-NEXT: [[TMP10:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG226:![0-9]+]] +// SIMD2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], [[TMP9]], !dbg [[DBG226]] +// SIMD2-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG226]] +// SIMD2-NEXT: [[TMP11:%.*]] = load float, float* @_ZN2STIfE2stE, align 4, !dbg [[DBG227:![0-9]+]] +// SIMD2-NEXT: [[CONV:%.*]] = fptosi float [[TMP11]] to i32, !dbg [[DBG227]] +// SIMD2-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG228:![0-9]+]] +// SIMD2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], [[CONV]], !dbg [[DBG228]] +// SIMD2-NEXT: store i32 [[ADD5]], i32* [[RES]], align 4, !dbg [[DBG228]] +// SIMD2-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S4:%.*]], %struct.S4* @_ZN2STI2S4E2stE, i32 0, i32 0), align 4, !dbg [[DBG229:![0-9]+]] +// SIMD2-NEXT: [[TMP14:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG230:![0-9]+]] +// SIMD2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], [[TMP13]], !dbg [[DBG230]] +// SIMD2-NEXT: store i32 [[ADD6]], i32* [[RES]], align 4, !dbg [[DBG230]] +// SIMD2-NEXT: [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG231:![0-9]+]] +// SIMD2-NEXT: ret i32 [[TMP15]], !dbg [[DBG232:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@__cxx_global_var_init.3 -// SIMD2-SAME: () #[[ATTR0]] comdat($_ZN2STI2S4E2stE) !dbg [[DBG234:![0-9]+]] { +// SIMD2-SAME: () #[[ATTR0]] comdat($_ZN2STI2S4E2stE) !dbg [[DBG233:![0-9]+]] { // SIMD2-NEXT: entry: -// SIMD2-NEXT: [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG235:![0-9]+]] -// SIMD2-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG235]] -// SIMD2-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG235]] +// SIMD2-NEXT: [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG234:![0-9]+]] +// SIMD2-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG234]] +// SIMD2-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG234]] // SIMD2: init.check: -// SIMD2-NEXT: call void @_ZN2S4C1Ei(%struct.S4* nonnull align 4 dereferenceable(8) @_ZN2STI2S4E2stE, i32 23), !dbg [[DBG236:![0-9]+]] -// SIMD2-NEXT: [[TMP1:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG235]] -// SIMD2-NEXT: store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG235]] -// SIMD2-NEXT: br label [[INIT_END]], !dbg [[DBG235]] +// SIMD2-NEXT: call void @_ZN2S4C1Ei(%struct.S4* nonnull align 4 dereferenceable(8) @_ZN2STI2S4E2stE, i32 23), !dbg [[DBG235:![0-9]+]] +// SIMD2-NEXT: [[TMP1:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG234]] +// SIMD2-NEXT: store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG234]] +// SIMD2-NEXT: br label [[INIT_END]], !dbg [[DBG234]] // SIMD2: init.end: -// SIMD2-NEXT: ret void, !dbg [[DBG238:![0-9]+]] +// SIMD2-NEXT: ret void, !dbg [[DBG237:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@_ZN2S4C1Ei -// SIMD2-SAME: (%struct.S4* nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG239:![0-9]+]] { +// SIMD2-SAME: (%struct.S4* nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG238:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8 // SIMD2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD2-NEXT: store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META240:![0-9]+]], metadata !DIExpression()), !dbg [[DBG242:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META239:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]] // SIMD2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META243:![0-9]+]], metadata !DIExpression()), !dbg [[DBG244:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META242:![0-9]+]], metadata !DIExpression()), !dbg [[DBG243:![0-9]+]] // SIMD2-NEXT: [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG245:![0-9]+]] -// SIMD2-NEXT: call void @_ZN2S4C2Ei(%struct.S4* nonnull align 4 dereferenceable(8) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG245]] -// SIMD2-NEXT: ret void, !dbg [[DBG246:![0-9]+]] +// SIMD2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG244:![0-9]+]] +// SIMD2-NEXT: call void @_ZN2S4C2Ei(%struct.S4* nonnull align 4 dereferenceable(8) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG244]] +// SIMD2-NEXT: ret void, !dbg [[DBG245:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@_ZN2S4D1Ev -// SIMD2-SAME: (%struct.S4* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG247:![0-9]+]] { +// SIMD2-SAME: (%struct.S4* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG246:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8 // SIMD2-NEXT: store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META248:![0-9]+]], metadata !DIExpression()), !dbg [[DBG249:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META247:![0-9]+]], metadata !DIExpression()), !dbg [[DBG248:![0-9]+]] // SIMD2-NEXT: [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @_ZN2S4D2Ev(%struct.S4* nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR3]], !dbg [[DBG250:![0-9]+]] -// SIMD2-NEXT: ret void, !dbg [[DBG251:![0-9]+]] +// SIMD2-NEXT: call void @_ZN2S4D2Ev(%struct.S4* nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR3]], !dbg [[DBG249:![0-9]+]] +// SIMD2-NEXT: ret void, !dbg [[DBG250:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@_ZN2S1C2Ei -// SIMD2-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG252:![0-9]+]] { +// SIMD2-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG251:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // SIMD2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD2-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META253:![0-9]+]], metadata !DIExpression()), !dbg [[DBG254:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META252:![0-9]+]], metadata !DIExpression()), !dbg [[DBG253:![0-9]+]] // SIMD2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META255:![0-9]+]], metadata !DIExpression()), !dbg [[DBG256:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META254:![0-9]+]], metadata !DIExpression()), !dbg [[DBG255:![0-9]+]] // SIMD2-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG257:![0-9]+]] -// SIMD2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG258:![0-9]+]] -// SIMD2-NEXT: store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG257]] -// SIMD2-NEXT: ret void, !dbg [[DBG259:![0-9]+]] +// SIMD2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG256:![0-9]+]] +// SIMD2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG257:![0-9]+]] +// SIMD2-NEXT: store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG256]] +// SIMD2-NEXT: ret void, !dbg [[DBG258:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@_ZN2S1D2Ev -// SIMD2-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG260:![0-9]+]] { +// SIMD2-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG259:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // SIMD2-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META261:![0-9]+]], metadata !DIExpression()), !dbg [[DBG262:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META260:![0-9]+]], metadata !DIExpression()), !dbg [[DBG261:![0-9]+]] // SIMD2-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG263:![0-9]+]] -// SIMD2-NEXT: store i32 0, i32* [[A]], align 4, !dbg [[DBG265:![0-9]+]] -// SIMD2-NEXT: ret void, !dbg [[DBG266:![0-9]+]] +// SIMD2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG262:![0-9]+]] +// SIMD2-NEXT: store i32 0, i32* [[A]], align 4, !dbg [[DBG264:![0-9]+]] +// SIMD2-NEXT: ret void, !dbg [[DBG265:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@_ZN2S2C2Ei -// SIMD2-SAME: (%struct.S2* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG267:![0-9]+]] { +// SIMD2-SAME: (%struct.S2* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG266:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8 // SIMD2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD2-NEXT: store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META268:![0-9]+]], metadata !DIExpression()), !dbg [[DBG269:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META267:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268:![0-9]+]] // SIMD2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META270:![0-9]+]], metadata !DIExpression()), !dbg [[DBG271:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG270:![0-9]+]] // SIMD2-NEXT: [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG272:![0-9]+]] -// SIMD2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG273:![0-9]+]] -// SIMD2-NEXT: store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG272]] -// SIMD2-NEXT: ret void, !dbg [[DBG274:![0-9]+]] +// SIMD2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG271:![0-9]+]] +// SIMD2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG272:![0-9]+]] +// SIMD2-NEXT: store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG271]] +// SIMD2-NEXT: ret void, !dbg [[DBG273:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@_ZN2S2D2Ev -// SIMD2-SAME: (%struct.S2* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG275:![0-9]+]] { +// SIMD2-SAME: (%struct.S2* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG274:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8 // SIMD2-NEXT: store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG277:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG276:![0-9]+]] // SIMD2-NEXT: [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG278:![0-9]+]] -// SIMD2-NEXT: store i32 0, i32* [[A]], align 8, !dbg [[DBG280:![0-9]+]] -// SIMD2-NEXT: ret void, !dbg [[DBG281:![0-9]+]] +// SIMD2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG277:![0-9]+]] +// SIMD2-NEXT: store i32 0, i32* [[A]], align 8, !dbg [[DBG279:![0-9]+]] +// SIMD2-NEXT: ret void, !dbg [[DBG280:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei -// SIMD2-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG282:![0-9]+]] { +// SIMD2-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG281:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8 // SIMD2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD2-NEXT: store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META283:![0-9]+]], metadata !DIExpression()), !dbg [[DBG284:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META282:![0-9]+]], metadata !DIExpression()), !dbg [[DBG283:![0-9]+]] // SIMD2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META285:![0-9]+]], metadata !DIExpression()), !dbg [[DBG286:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META284:![0-9]+]], metadata !DIExpression()), !dbg [[DBG285:![0-9]+]] // SIMD2-NEXT: [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG287:![0-9]+]] -// SIMD2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG288:![0-9]+]] -// SIMD2-NEXT: store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG287]] -// SIMD2-NEXT: ret void, !dbg [[DBG289:![0-9]+]] +// SIMD2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG286:![0-9]+]] +// SIMD2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG287:![0-9]+]] +// SIMD2-NEXT: store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG286]] +// SIMD2-NEXT: ret void, !dbg [[DBG288:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev -// SIMD2-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG290:![0-9]+]] { +// SIMD2-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG289:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8 // SIMD2-NEXT: store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META291:![0-9]+]], metadata !DIExpression()), !dbg [[DBG292:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META290:![0-9]+]], metadata !DIExpression()), !dbg [[DBG291:![0-9]+]] // SIMD2-NEXT: [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG293:![0-9]+]] -// SIMD2-NEXT: store i32 0, i32* [[A]], align 8, !dbg [[DBG295:![0-9]+]] -// SIMD2-NEXT: ret void, !dbg [[DBG296:![0-9]+]] +// SIMD2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG292:![0-9]+]] +// SIMD2-NEXT: store i32 0, i32* [[A]], align 8, !dbg [[DBG294:![0-9]+]] +// SIMD2-NEXT: ret void, !dbg [[DBG295:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@_ZN2S4C2Ei -// SIMD2-SAME: (%struct.S4* nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG297:![0-9]+]] { +// SIMD2-SAME: (%struct.S4* nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG296:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8 // SIMD2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD2-NEXT: store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META298:![0-9]+]], metadata !DIExpression()), !dbg [[DBG299:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META297:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298:![0-9]+]] // SIMD2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META300:![0-9]+]], metadata !DIExpression()), !dbg [[DBG301:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300:![0-9]+]] // SIMD2-NEXT: [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG302:![0-9]+]] -// SIMD2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG303:![0-9]+]] -// SIMD2-NEXT: store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG302]] -// SIMD2-NEXT: ret void, !dbg [[DBG304:![0-9]+]] +// SIMD2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG301:![0-9]+]] +// SIMD2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG302:![0-9]+]] +// SIMD2-NEXT: store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG301]] +// SIMD2-NEXT: ret void, !dbg [[DBG303:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@_ZN2S4D2Ev -// SIMD2-SAME: (%struct.S4* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG305:![0-9]+]] { +// SIMD2-SAME: (%struct.S4* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG304:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8 // SIMD2-NEXT: store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META306:![0-9]+]], metadata !DIExpression()), !dbg [[DBG307:![0-9]+]] +// SIMD2-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG306:![0-9]+]] // SIMD2-NEXT: [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8 -// SIMD2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG308:![0-9]+]] -// SIMD2-NEXT: store i32 0, i32* [[A]], align 4, !dbg [[DBG310:![0-9]+]] -// SIMD2-NEXT: ret void, !dbg [[DBG311:![0-9]+]] +// SIMD2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG307:![0-9]+]] +// SIMD2-NEXT: store i32 0, i32* [[A]], align 4, !dbg [[DBG309:![0-9]+]] +// SIMD2-NEXT: ret void, !dbg [[DBG310:![0-9]+]] // // // SIMD2-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_threadprivate_codegen.cpp -// SIMD2-SAME: () #[[ATTR0]] !dbg [[DBG312:![0-9]+]] { +// SIMD2-SAME: () #[[ATTR0]] !dbg [[DBG311:![0-9]+]] { // SIMD2-NEXT: entry: -// SIMD2-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG314:![0-9]+]] -// SIMD2-NEXT: call void @__cxx_global_var_init.1(), !dbg [[DBG314]] -// SIMD2-NEXT: call void @__cxx_global_var_init.2(), !dbg [[DBG314]] +// SIMD2-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG313:![0-9]+]] +// SIMD2-NEXT: call void @__cxx_global_var_init.1(), !dbg [[DBG313]] +// SIMD2-NEXT: call void @__cxx_global_var_init.2(), !dbg [[DBG313]] // SIMD2-NEXT: ret void // // @@ -3631,8 +3631,8 @@ int foobar() { // CHECK-TLS1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK-TLS1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK-TLS1: arraydestroy.body: -// CHECK-TLS1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK-TLS1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-TLS1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 1, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-TLS1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK-TLS1-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] // CHECK-TLS1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0) // CHECK-TLS1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3648,7 +3648,7 @@ int foobar() { // CHECK-TLS1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK-TLS1-NEXT: [[TMP0:%.*]] = load i8, i8* @_ZGVZ4mainE2sm, align 1 // CHECK-TLS1-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 -// CHECK-TLS1-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2:![0-9]+]] +// CHECK-TLS1-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3:![0-9]+]] // CHECK-TLS1: init.check: // CHECK-TLS1-NEXT: [[TMP1:%.*]] = call %struct.S1* @_ZTWL3gs1() // CHECK-TLS1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP1]], i32 0, i32 0 @@ -3921,7 +3921,7 @@ int foobar() { // CHECK-TLS1-NEXT: entry: // CHECK-TLS1-NEXT: [[TMP0:%.*]] = load i8, i8* @__tls_guard, align 1 // CHECK-TLS1-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 -// CHECK-TLS1-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF2]] +// CHECK-TLS1-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF3]] // CHECK-TLS1: init: // CHECK-TLS1-NEXT: store i8 1, i8* @__tls_guard, align 1 // CHECK-TLS1-NEXT: call void @__cxx_global_var_init() @@ -3939,7 +3939,7 @@ int foobar() { // CHECK-TLS2-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK-TLS2-NEXT: [[TMP0:%.*]] = load i8, i8* @_ZGVZ4mainE2sm, align 1 // CHECK-TLS2-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 -// CHECK-TLS2-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2:![0-9]+]] +// CHECK-TLS2-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3:![0-9]+]] // CHECK-TLS2: init.check: // CHECK-TLS2-NEXT: [[TMP1:%.*]] = call %struct.S1* @_ZTWL3gs1() // CHECK-TLS2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP1]], i32 0, i32 0 @@ -4338,8 +4338,8 @@ int foobar() { // CHECK-TLS2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK-TLS2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK-TLS2: arraydestroy.body: -// CHECK-TLS2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK-TLS2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-TLS2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 1, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-TLS2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK-TLS2-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK-TLS2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0) // CHECK-TLS2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -4447,7 +4447,7 @@ int foobar() { // CHECK-TLS2-NEXT: entry: // CHECK-TLS2-NEXT: [[TMP0:%.*]] = load i8, i8* @__tls_guard, align 1 // CHECK-TLS2-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 -// CHECK-TLS2-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF2]] +// CHECK-TLS2-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF3]] // CHECK-TLS2: init: // CHECK-TLS2-NEXT: store i8 1, i8* @__tls_guard, align 1 // CHECK-TLS2-NEXT: call void @__cxx_global_var_init() @@ -4458,7 +4458,7 @@ int foobar() { // // // CHECK-TLS3-LABEL: define {{[^@]+}}@__cxx_global_var_init -// CHECK-TLS3-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG115:![0-9]+]] { +// CHECK-TLS3-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG116:![0-9]+]] { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: call void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) @_ZL3gs1, i32 5), !dbg [[DBG119:![0-9]+]] // CHECK-TLS3-NEXT: [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR3:[0-9]+]], !dbg [[DBG121:![0-9]+]] @@ -4682,8 +4682,8 @@ int foobar() { // CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META201:![0-9]+]], metadata !DIExpression()), !dbg [[DBG202:![0-9]+]] // CHECK-TLS3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG202]] // CHECK-TLS3: arraydestroy.body: -// CHECK-TLS3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG202]] -// CHECK-TLS3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG202]] +// CHECK-TLS3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 1, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG202]] +// CHECK-TLS3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG202]] // CHECK-TLS3-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG202]] // CHECK-TLS3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), !dbg [[DBG202]] // CHECK-TLS3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG202]] @@ -4692,7 +4692,7 @@ int foobar() { // // // CHECK-TLS3-LABEL: define {{[^@]+}}@main -// CHECK-TLS3-SAME: () #[[ATTR5:[0-9]+]] !dbg [[DBG53:![0-9]+]] { +// CHECK-TLS3-SAME: () #[[ATTR5:[0-9]+]] !dbg [[DBG52:![0-9]+]] { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK-TLS3-NEXT: [[RES:%.*]] = alloca i32, align 4 @@ -4997,75 +4997,75 @@ int foobar() { // // // CHECK-TLS4-LABEL: define {{[^@]+}}@main -// CHECK-TLS4-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG10:![0-9]+]] { +// CHECK-TLS4-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG9:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: [[RES:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i8, i8* @_ZGVZ4mainE2sm, align 1, !dbg [[DBG117:![0-9]+]] -// CHECK-TLS4-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG117]] -// CHECK-TLS4-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG117]], !prof [[PROF118:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG117:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i8, i8* @_ZGVZ4mainE2sm, align 1, !dbg [[DBG118:![0-9]+]] +// CHECK-TLS4-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG118]] +// CHECK-TLS4-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG118]], !prof [[PROF119:![0-9]+]] // CHECK-TLS4: init.check: -// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG119:![0-9]+]] -// CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP1]], i32 0, i32 0, !dbg [[DBG120:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG120]] -// CHECK-TLS4-NEXT: call void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull align 8 dereferenceable(24) @_ZZ4mainE2sm, i32 [[TMP2]]), !dbg [[DBG121:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP3:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.Smain*)* @_ZZ4mainEN5SmainD1Ev to void (i8*)*), i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* @__dso_handle) #[[ATTR5:[0-9]+]], !dbg [[DBG117]] -// CHECK-TLS4-NEXT: store i8 1, i8* @_ZGVZ4mainE2sm, align 1, !dbg [[DBG117]] -// CHECK-TLS4-NEXT: br label [[INIT_END]], !dbg [[DBG117]] +// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG120:![0-9]+]] +// CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP1]], i32 0, i32 0, !dbg [[DBG121:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG121]] +// CHECK-TLS4-NEXT: call void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull align 8 dereferenceable(24) @_ZZ4mainE2sm, i32 [[TMP2]]), !dbg [[DBG122:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP3:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.Smain*)* @_ZZ4mainEN5SmainD1Ev to void (i8*)*), i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* @__dso_handle) #[[ATTR5:[0-9]+]], !dbg [[DBG118]] +// CHECK-TLS4-NEXT: store i8 1, i8* @_ZGVZ4mainE2sm, align 1, !dbg [[DBG118]] +// CHECK-TLS4-NEXT: br label [[INIT_END]], !dbg [[DBG118]] // CHECK-TLS4: init.end: -// CHECK-TLS4-NEXT: [[TMP4:%.*]] = call %struct.S3* @_ZTWN6Static1sE(), !dbg [[DBG122:![0-9]+]] -// CHECK-TLS4-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP4]], i32 0, i32 0, !dbg [[DBG123:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP5:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG123]] -// CHECK-TLS4-NEXT: store i32 [[TMP5]], i32* [[RES]], align 4, !dbg [[DBG124:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_SMAIN:%.*]], %struct.Smain* @_ZZ4mainE2sm, i32 0, i32 0), align 8, !dbg [[DBG125:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP7:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG126:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP6]], !dbg [[DBG126]] -// CHECK-TLS4-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG126]] -// CHECK-TLS4-NEXT: [[TMP8:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG127:![0-9]+]] -// CHECK-TLS4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP8]], i32 0, i32 0, !dbg [[DBG128:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP9:%.*]] = load i32, i32* [[A2]], align 4, !dbg [[DBG128]] -// CHECK-TLS4-NEXT: [[TMP10:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG129:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]], !dbg [[DBG129]] -// CHECK-TLS4-NEXT: store i32 [[ADD3]], i32* [[RES]], align 4, !dbg [[DBG129]] -// CHECK-TLS4-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG130:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG131:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG131]] -// CHECK-TLS4-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG131]] -// CHECK-TLS4-NEXT: [[TMP13:%.*]] = call %struct.S5* @_ZTW3gs3(), !dbg [[DBG132:![0-9]+]] -// CHECK-TLS4-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP13]], i32 0, i32 0, !dbg [[DBG133:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP14:%.*]] = load i32, i32* [[A5]], align 4, !dbg [[DBG133]] -// CHECK-TLS4-NEXT: [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG134:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], [[TMP14]], !dbg [[DBG134]] -// CHECK-TLS4-NEXT: store i32 [[ADD6]], i32* [[RES]], align 4, !dbg [[DBG134]] -// CHECK-TLS4-NEXT: [[TMP16:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x(), !dbg [[DBG135:![0-9]+]] -// CHECK-TLS4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP16]], i64 0, i64 1, !dbg [[DBG135]] -// CHECK-TLS4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG135]] -// CHECK-TLS4-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX7]], i32 0, i32 0, !dbg [[DBG136:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP17:%.*]] = load i32, i32* [[A8]], align 4, !dbg [[DBG136]] -// CHECK-TLS4-NEXT: [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG137:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP17]], !dbg [[DBG137]] -// CHECK-TLS4-NEXT: store i32 [[ADD9]], i32* [[RES]], align 4, !dbg [[DBG137]] -// CHECK-TLS4-NEXT: [[TMP19:%.*]] = call i32* @_ZTWN2STIiE2stE(), !dbg [[DBG138:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4, !dbg [[DBG138]] -// CHECK-TLS4-NEXT: [[TMP21:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG139:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]], !dbg [[DBG139]] -// CHECK-TLS4-NEXT: store i32 [[ADD10]], i32* [[RES]], align 4, !dbg [[DBG139]] -// CHECK-TLS4-NEXT: [[TMP22:%.*]] = call float* @_ZTWN2STIfE2stE(), !dbg [[DBG140:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP23:%.*]] = load float, float* [[TMP22]], align 4, !dbg [[DBG140]] -// CHECK-TLS4-NEXT: [[CONV:%.*]] = fptosi float [[TMP23]] to i32, !dbg [[DBG140]] -// CHECK-TLS4-NEXT: [[TMP24:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG141:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP24]], [[CONV]], !dbg [[DBG141]] -// CHECK-TLS4-NEXT: store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG141]] -// CHECK-TLS4-NEXT: [[TMP25:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE(), !dbg [[DBG142:![0-9]+]] -// CHECK-TLS4-NEXT: [[A12:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP25]], i32 0, i32 0, !dbg [[DBG143:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP26:%.*]] = load i32, i32* [[A12]], align 4, !dbg [[DBG143]] -// CHECK-TLS4-NEXT: [[TMP27:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG144:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP27]], [[TMP26]], !dbg [[DBG144]] -// CHECK-TLS4-NEXT: store i32 [[ADD13]], i32* [[RES]], align 4, !dbg [[DBG144]] -// CHECK-TLS4-NEXT: [[TMP28:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG145:![0-9]+]] -// CHECK-TLS4-NEXT: ret i32 [[TMP28]], !dbg [[DBG146:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP4:%.*]] = call %struct.S3* @_ZTWN6Static1sE(), !dbg [[DBG123:![0-9]+]] +// CHECK-TLS4-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP4]], i32 0, i32 0, !dbg [[DBG124:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP5:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG124]] +// CHECK-TLS4-NEXT: store i32 [[TMP5]], i32* [[RES]], align 4, !dbg [[DBG125:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_SMAIN:%.*]], %struct.Smain* @_ZZ4mainE2sm, i32 0, i32 0), align 8, !dbg [[DBG126:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP7:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG127:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP6]], !dbg [[DBG127]] +// CHECK-TLS4-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG127]] +// CHECK-TLS4-NEXT: [[TMP8:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG128:![0-9]+]] +// CHECK-TLS4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP8]], i32 0, i32 0, !dbg [[DBG129:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP9:%.*]] = load i32, i32* [[A2]], align 4, !dbg [[DBG129]] +// CHECK-TLS4-NEXT: [[TMP10:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG130:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]], !dbg [[DBG130]] +// CHECK-TLS4-NEXT: store i32 [[ADD3]], i32* [[RES]], align 4, !dbg [[DBG130]] +// CHECK-TLS4-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG131:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG132:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG132]] +// CHECK-TLS4-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG132]] +// CHECK-TLS4-NEXT: [[TMP13:%.*]] = call %struct.S5* @_ZTW3gs3(), !dbg [[DBG133:![0-9]+]] +// CHECK-TLS4-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP13]], i32 0, i32 0, !dbg [[DBG134:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP14:%.*]] = load i32, i32* [[A5]], align 4, !dbg [[DBG134]] +// CHECK-TLS4-NEXT: [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG135:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], [[TMP14]], !dbg [[DBG135]] +// CHECK-TLS4-NEXT: store i32 [[ADD6]], i32* [[RES]], align 4, !dbg [[DBG135]] +// CHECK-TLS4-NEXT: [[TMP16:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x(), !dbg [[DBG136:![0-9]+]] +// CHECK-TLS4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP16]], i64 0, i64 1, !dbg [[DBG136]] +// CHECK-TLS4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG136]] +// CHECK-TLS4-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX7]], i32 0, i32 0, !dbg [[DBG137:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP17:%.*]] = load i32, i32* [[A8]], align 4, !dbg [[DBG137]] +// CHECK-TLS4-NEXT: [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG138:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP17]], !dbg [[DBG138]] +// CHECK-TLS4-NEXT: store i32 [[ADD9]], i32* [[RES]], align 4, !dbg [[DBG138]] +// CHECK-TLS4-NEXT: [[TMP19:%.*]] = call i32* @_ZTWN2STIiE2stE(), !dbg [[DBG139:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4, !dbg [[DBG139]] +// CHECK-TLS4-NEXT: [[TMP21:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG140:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]], !dbg [[DBG140]] +// CHECK-TLS4-NEXT: store i32 [[ADD10]], i32* [[RES]], align 4, !dbg [[DBG140]] +// CHECK-TLS4-NEXT: [[TMP22:%.*]] = call float* @_ZTWN2STIfE2stE(), !dbg [[DBG141:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP23:%.*]] = load float, float* [[TMP22]], align 4, !dbg [[DBG141]] +// CHECK-TLS4-NEXT: [[CONV:%.*]] = fptosi float [[TMP23]] to i32, !dbg [[DBG141]] +// CHECK-TLS4-NEXT: [[TMP24:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG142:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP24]], [[CONV]], !dbg [[DBG142]] +// CHECK-TLS4-NEXT: store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG142]] +// CHECK-TLS4-NEXT: [[TMP25:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE(), !dbg [[DBG143:![0-9]+]] +// CHECK-TLS4-NEXT: [[A12:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP25]], i32 0, i32 0, !dbg [[DBG144:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP26:%.*]] = load i32, i32* [[A12]], align 4, !dbg [[DBG144]] +// CHECK-TLS4-NEXT: [[TMP27:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG145:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP27]], [[TMP26]], !dbg [[DBG145]] +// CHECK-TLS4-NEXT: store i32 [[ADD13]], i32* [[RES]], align 4, !dbg [[DBG145]] +// CHECK-TLS4-NEXT: [[TMP28:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG146:![0-9]+]] +// CHECK-TLS4-NEXT: ret i32 [[TMP28]], !dbg [[DBG147:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWL3gs1 @@ -5075,29 +5075,29 @@ int foobar() { // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei -// CHECK-TLS4-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] align 2 !dbg [[DBG147:![0-9]+]] { +// CHECK-TLS4-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] align 2 !dbg [[DBG148:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8 // CHECK-TLS4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG150:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META149:![0-9]+]], metadata !DIExpression()), !dbg [[DBG151:![0-9]+]] // CHECK-TLS4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META151:![0-9]+]], metadata !DIExpression()), !dbg [[DBG152:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META152:![0-9]+]], metadata !DIExpression()), !dbg [[DBG153:![0-9]+]] // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG153:![0-9]+]] -// CHECK-TLS4-NEXT: call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG153]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG154:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG154:![0-9]+]] +// CHECK-TLS4-NEXT: call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG154]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG155:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev -// CHECK-TLS4-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR4:[0-9]+]] align 2 !dbg [[DBG155:![0-9]+]] { +// CHECK-TLS4-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR4:[0-9]+]] align 2 !dbg [[DBG156:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8 // CHECK-TLS4-NEXT: store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META156:![0-9]+]], metadata !DIExpression()), !dbg [[DBG157:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG158:![0-9]+]] // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR5]], !dbg [[DBG158:![0-9]+]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG159:![0-9]+]] +// CHECK-TLS4-NEXT: call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR5]], !dbg [[DBG159:![0-9]+]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG160:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWN6Static1sE @@ -5143,61 +5143,61 @@ int foobar() { // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_Z6foobarv -// CHECK-TLS4-SAME: () #[[ATTR6:[0-9]+]] !dbg [[DBG160:![0-9]+]] { +// CHECK-TLS4-SAME: () #[[ATTR6:[0-9]+]] !dbg [[DBG161:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[RES:%.*]] = alloca i32, align 4 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META161:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = call %struct.S3* @_ZTWN6Static1sE(), !dbg [[DBG163:![0-9]+]] -// CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP0]], i32 0, i32 0, !dbg [[DBG164:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG164]] -// CHECK-TLS4-NEXT: store i32 [[TMP1]], i32* [[RES]], align 4, !dbg [[DBG165:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP2:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG166:![0-9]+]] -// CHECK-TLS4-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0, !dbg [[DBG167:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP3:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG167]] -// CHECK-TLS4-NEXT: [[TMP4:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG168:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP3]], !dbg [[DBG168]] -// CHECK-TLS4-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG168]] -// CHECK-TLS4-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG169:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG170:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG170]] -// CHECK-TLS4-NEXT: store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG170]] -// CHECK-TLS4-NEXT: [[TMP7:%.*]] = call %struct.S5* @_ZTW3gs3(), !dbg [[DBG171:![0-9]+]] -// CHECK-TLS4-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP7]], i32 0, i32 0, !dbg [[DBG172:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP8:%.*]] = load i32, i32* [[A3]], align 4, !dbg [[DBG172]] -// CHECK-TLS4-NEXT: [[TMP9:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG173:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP8]], !dbg [[DBG173]] -// CHECK-TLS4-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG173]] -// CHECK-TLS4-NEXT: [[TMP10:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x(), !dbg [[DBG174:![0-9]+]] -// CHECK-TLS4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP10]], i64 0, i64 1, !dbg [[DBG174]] -// CHECK-TLS4-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG174]] -// CHECK-TLS4-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX5]], i32 0, i32 0, !dbg [[DBG175:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP11:%.*]] = load i32, i32* [[A6]], align 4, !dbg [[DBG175]] -// CHECK-TLS4-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG176:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG176]] -// CHECK-TLS4-NEXT: store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG176]] -// CHECK-TLS4-NEXT: [[TMP13:%.*]] = call i32* @_ZTWN2STIiE2stE(), !dbg [[DBG177:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4, !dbg [[DBG177]] -// CHECK-TLS4-NEXT: [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG178:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], [[TMP14]], !dbg [[DBG178]] -// CHECK-TLS4-NEXT: store i32 [[ADD8]], i32* [[RES]], align 4, !dbg [[DBG178]] -// CHECK-TLS4-NEXT: [[TMP16:%.*]] = call float* @_ZTWN2STIfE2stE(), !dbg [[DBG179:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP17:%.*]] = load float, float* [[TMP16]], align 4, !dbg [[DBG179]] -// CHECK-TLS4-NEXT: [[CONV:%.*]] = fptosi float [[TMP17]] to i32, !dbg [[DBG179]] -// CHECK-TLS4-NEXT: [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG180:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[CONV]], !dbg [[DBG180]] -// CHECK-TLS4-NEXT: store i32 [[ADD9]], i32* [[RES]], align 4, !dbg [[DBG180]] -// CHECK-TLS4-NEXT: [[TMP19:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE(), !dbg [[DBG181:![0-9]+]] -// CHECK-TLS4-NEXT: [[A10:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP19]], i32 0, i32 0, !dbg [[DBG182:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP20:%.*]] = load i32, i32* [[A10]], align 4, !dbg [[DBG182]] -// CHECK-TLS4-NEXT: [[TMP21:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG183:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], [[TMP20]], !dbg [[DBG183]] -// CHECK-TLS4-NEXT: store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG183]] -// CHECK-TLS4-NEXT: [[TMP22:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG184:![0-9]+]] -// CHECK-TLS4-NEXT: ret i32 [[TMP22]], !dbg [[DBG185:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META162:![0-9]+]], metadata !DIExpression()), !dbg [[DBG163:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = call %struct.S3* @_ZTWN6Static1sE(), !dbg [[DBG164:![0-9]+]] +// CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP0]], i32 0, i32 0, !dbg [[DBG165:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG165]] +// CHECK-TLS4-NEXT: store i32 [[TMP1]], i32* [[RES]], align 4, !dbg [[DBG166:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP2:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG167:![0-9]+]] +// CHECK-TLS4-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0, !dbg [[DBG168:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP3:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG168]] +// CHECK-TLS4-NEXT: [[TMP4:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG169:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP3]], !dbg [[DBG169]] +// CHECK-TLS4-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG169]] +// CHECK-TLS4-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG170:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG171:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG171]] +// CHECK-TLS4-NEXT: store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG171]] +// CHECK-TLS4-NEXT: [[TMP7:%.*]] = call %struct.S5* @_ZTW3gs3(), !dbg [[DBG172:![0-9]+]] +// CHECK-TLS4-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP7]], i32 0, i32 0, !dbg [[DBG173:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP8:%.*]] = load i32, i32* [[A3]], align 4, !dbg [[DBG173]] +// CHECK-TLS4-NEXT: [[TMP9:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG174:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP8]], !dbg [[DBG174]] +// CHECK-TLS4-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG174]] +// CHECK-TLS4-NEXT: [[TMP10:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x(), !dbg [[DBG175:![0-9]+]] +// CHECK-TLS4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP10]], i64 0, i64 1, !dbg [[DBG175]] +// CHECK-TLS4-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG175]] +// CHECK-TLS4-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX5]], i32 0, i32 0, !dbg [[DBG176:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP11:%.*]] = load i32, i32* [[A6]], align 4, !dbg [[DBG176]] +// CHECK-TLS4-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG177:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG177]] +// CHECK-TLS4-NEXT: store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG177]] +// CHECK-TLS4-NEXT: [[TMP13:%.*]] = call i32* @_ZTWN2STIiE2stE(), !dbg [[DBG178:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4, !dbg [[DBG178]] +// CHECK-TLS4-NEXT: [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG179:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], [[TMP14]], !dbg [[DBG179]] +// CHECK-TLS4-NEXT: store i32 [[ADD8]], i32* [[RES]], align 4, !dbg [[DBG179]] +// CHECK-TLS4-NEXT: [[TMP16:%.*]] = call float* @_ZTWN2STIfE2stE(), !dbg [[DBG180:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP17:%.*]] = load float, float* [[TMP16]], align 4, !dbg [[DBG180]] +// CHECK-TLS4-NEXT: [[CONV:%.*]] = fptosi float [[TMP17]] to i32, !dbg [[DBG180]] +// CHECK-TLS4-NEXT: [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG181:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[CONV]], !dbg [[DBG181]] +// CHECK-TLS4-NEXT: store i32 [[ADD9]], i32* [[RES]], align 4, !dbg [[DBG181]] +// CHECK-TLS4-NEXT: [[TMP19:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE(), !dbg [[DBG182:![0-9]+]] +// CHECK-TLS4-NEXT: [[A10:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP19]], i32 0, i32 0, !dbg [[DBG183:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP20:%.*]] = load i32, i32* [[A10]], align 4, !dbg [[DBG183]] +// CHECK-TLS4-NEXT: [[TMP21:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG184:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], [[TMP20]], !dbg [[DBG184]] +// CHECK-TLS4-NEXT: store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG184]] +// CHECK-TLS4-NEXT: [[TMP22:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG185:![0-9]+]] +// CHECK-TLS4-NEXT: ret i32 [[TMP22]], !dbg [[DBG186:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@__cxx_global_var_init -// CHECK-TLS4-SAME: () #[[ATTR7:[0-9]+]] !dbg [[DBG186:![0-9]+]] { +// CHECK-TLS4-SAME: () #[[ATTR7:[0-9]+]] !dbg [[DBG187:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: call void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) @_ZL3gs1, i32 5), !dbg [[DBG190:![0-9]+]] // CHECK-TLS4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR5]], !dbg [[DBG192:![0-9]+]] @@ -5421,8 +5421,8 @@ int foobar() { // CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META272:![0-9]+]], metadata !DIExpression()), !dbg [[DBG273:![0-9]+]] // CHECK-TLS4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG273]] // CHECK-TLS4: arraydestroy.body: -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG273]] -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG273]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 1, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG273]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG273]] // CHECK-TLS4-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]], !dbg [[DBG273]] // CHECK-TLS4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), !dbg [[DBG273]] // CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG273]] @@ -5539,7 +5539,7 @@ int foobar() { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i8, i8* @__tls_guard, align 1, !dbg [[DBG326:![0-9]+]] // CHECK-TLS4-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG326]] -// CHECK-TLS4-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !dbg [[DBG326]], !prof [[PROF118]] +// CHECK-TLS4-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !dbg [[DBG326]], !prof [[PROF119]] // CHECK-TLS4: init: // CHECK-TLS4-NEXT: store i8 1, i8* @__tls_guard, align 1, !dbg [[DBG326]] // CHECK-TLS4-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG326]] @@ -5711,8 +5711,8 @@ int foobar() { // SIMD3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // SIMD3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // SIMD3: arraydestroy.body: -// SIMD3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// SIMD3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// SIMD3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 1, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// SIMD3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // SIMD3-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] // SIMD3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0) // SIMD3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -6010,549 +6010,549 @@ int foobar() { // SIMD4-LABEL: define {{[^@]+}}@__cxx_global_var_init // SIMD4-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG115:![0-9]+]] { // SIMD4-NEXT: entry: -// SIMD4-NEXT: call void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) @_ZL3gs1, i32 5), !dbg [[DBG119:![0-9]+]] -// SIMD4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR3:[0-9]+]], !dbg [[DBG121:![0-9]+]] -// SIMD4-NEXT: ret void, !dbg [[DBG122:![0-9]+]] +// SIMD4-NEXT: call void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) @_ZL3gs1, i32 5), !dbg [[DBG118:![0-9]+]] +// SIMD4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR3:[0-9]+]], !dbg [[DBG120:![0-9]+]] +// SIMD4-NEXT: ret void, !dbg [[DBG121:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@_ZN2S1C1Ei -// SIMD4-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 !dbg [[DBG123:![0-9]+]] { +// SIMD4-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 !dbg [[DBG122:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // SIMD4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD4-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META124:![0-9]+]], metadata !DIExpression()), !dbg [[DBG126:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125:![0-9]+]] // SIMD4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META127:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]] // SIMD4-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG129:![0-9]+]] -// SIMD4-NEXT: call void @_ZN2S1C2Ei(%struct.S1* nonnull align 4 dereferenceable(4) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG129]] -// SIMD4-NEXT: ret void, !dbg [[DBG130:![0-9]+]] +// SIMD4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG128:![0-9]+]] +// SIMD4-NEXT: call void @_ZN2S1C2Ei(%struct.S1* nonnull align 4 dereferenceable(4) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG128]] +// SIMD4-NEXT: ret void, !dbg [[DBG129:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@_ZN2S1D1Ev -// SIMD4-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 !dbg [[DBG131:![0-9]+]] { +// SIMD4-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 !dbg [[DBG130:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // SIMD4-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132:![0-9]+]] // SIMD4-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @_ZN2S1D2Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG134:![0-9]+]] -// SIMD4-NEXT: ret void, !dbg [[DBG135:![0-9]+]] +// SIMD4-NEXT: call void @_ZN2S1D2Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG133:![0-9]+]] +// SIMD4-NEXT: ret void, !dbg [[DBG134:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 -// SIMD4-SAME: () #[[ATTR0]] !dbg [[DBG136:![0-9]+]] { +// SIMD4-SAME: () #[[ATTR0]] !dbg [[DBG135:![0-9]+]] { // SIMD4-NEXT: entry: -// SIMD4-NEXT: call void @_ZN2S2C1Ei(%struct.S2* nonnull align 8 dereferenceable(16) @_ZL3gs2, i32 27), !dbg [[DBG137:![0-9]+]] -// SIMD4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG139:![0-9]+]] -// SIMD4-NEXT: ret void, !dbg [[DBG140:![0-9]+]] +// SIMD4-NEXT: call void @_ZN2S2C1Ei(%struct.S2* nonnull align 8 dereferenceable(16) @_ZL3gs2, i32 27), !dbg [[DBG136:![0-9]+]] +// SIMD4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG138:![0-9]+]] +// SIMD4-NEXT: ret void, !dbg [[DBG139:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@_ZN2S2C1Ei -// SIMD4-SAME: (%struct.S2* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG141:![0-9]+]] { +// SIMD4-SAME: (%struct.S2* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG140:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8 // SIMD4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD4-NEXT: store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG144:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META141:![0-9]+]], metadata !DIExpression()), !dbg [[DBG143:![0-9]+]] // SIMD4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG146:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG145:![0-9]+]] // SIMD4-NEXT: [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG147:![0-9]+]] -// SIMD4-NEXT: call void @_ZN2S2C2Ei(%struct.S2* nonnull align 8 dereferenceable(16) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG147]] -// SIMD4-NEXT: ret void, !dbg [[DBG148:![0-9]+]] +// SIMD4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG146:![0-9]+]] +// SIMD4-NEXT: call void @_ZN2S2C2Ei(%struct.S2* nonnull align 8 dereferenceable(16) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG146]] +// SIMD4-NEXT: ret void, !dbg [[DBG147:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@_ZN2S2D1Ev -// SIMD4-SAME: (%struct.S2* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG149:![0-9]+]] { +// SIMD4-SAME: (%struct.S2* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG148:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8 // SIMD4-NEXT: store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META150:![0-9]+]], metadata !DIExpression()), !dbg [[DBG151:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META149:![0-9]+]], metadata !DIExpression()), !dbg [[DBG150:![0-9]+]] // SIMD4-NEXT: [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @_ZN2S2D2Ev(%struct.S2* nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR3]], !dbg [[DBG152:![0-9]+]] -// SIMD4-NEXT: ret void, !dbg [[DBG153:![0-9]+]] +// SIMD4-NEXT: call void @_ZN2S2D2Ev(%struct.S2* nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR3]], !dbg [[DBG151:![0-9]+]] +// SIMD4-NEXT: ret void, !dbg [[DBG152:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 -// SIMD4-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG154:![0-9]+]] { +// SIMD4-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG153:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8 // SIMD4-NEXT: [[ARRAYINIT_ENDOFINIT1:%.*]] = alloca %struct.S1*, align 8 // SIMD4-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8 // SIMD4-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4 // SIMD4-NEXT: [[ARRAYINIT_ENDOFINIT5:%.*]] = alloca %struct.S1*, align 8 -// SIMD4-NEXT: store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG155:![0-9]+]] -// SIMD4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG157:![0-9]+]] +// SIMD4-NEXT: store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG154:![0-9]+]] +// SIMD4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG156:![0-9]+]] // SIMD4-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), i32 1) -// SIMD4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG158:![0-9]+]] +// SIMD4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG157:![0-9]+]] // SIMD4: invoke.cont: -// SIMD4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG157]] +// SIMD4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG156]] // SIMD4-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), i32 2) -// SIMD4-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]], !dbg [[DBG159:![0-9]+]] +// SIMD4-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]], !dbg [[DBG158:![0-9]+]] // SIMD4: invoke.cont2: -// SIMD4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG157]] +// SIMD4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG156]] // SIMD4-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), i32 3) -// SIMD4-NEXT: to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]], !dbg [[DBG160:![0-9]+]] +// SIMD4-NEXT: to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]], !dbg [[DBG159:![0-9]+]] // SIMD4: invoke.cont3: -// SIMD4-NEXT: store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG155]] -// SIMD4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG161:![0-9]+]] +// SIMD4-NEXT: store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG154]] +// SIMD4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG160:![0-9]+]] // SIMD4-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), i32 4) -// SIMD4-NEXT: to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD6:%.*]], !dbg [[DBG162:![0-9]+]] +// SIMD4-NEXT: to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD6:%.*]], !dbg [[DBG161:![0-9]+]] // SIMD4: invoke.cont7: -// SIMD4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG161]] +// SIMD4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG160]] // SIMD4-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), i32 5) -// SIMD4-NEXT: to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD6]], !dbg [[DBG163:![0-9]+]] +// SIMD4-NEXT: to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD6]], !dbg [[DBG162:![0-9]+]] // SIMD4: invoke.cont8: -// SIMD4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG161]] +// SIMD4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG160]] // SIMD4-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), i32 6) -// SIMD4-NEXT: to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]], !dbg [[DBG164:![0-9]+]] +// SIMD4-NEXT: to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]], !dbg [[DBG163:![0-9]+]] // SIMD4: invoke.cont9: -// SIMD4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG165:![0-9]+]] -// SIMD4-NEXT: ret void, !dbg [[DBG165]] +// SIMD4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG164:![0-9]+]] +// SIMD4-NEXT: ret void, !dbg [[DBG164]] // SIMD4: lpad: // SIMD4-NEXT: [[TMP1:%.*]] = landingpad { i8*, i32 } -// SIMD4-NEXT: cleanup, !dbg [[DBG166:![0-9]+]] -// SIMD4-NEXT: [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0, !dbg [[DBG166]] -// SIMD4-NEXT: store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG166]] -// SIMD4-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1, !dbg [[DBG166]] -// SIMD4-NEXT: store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG166]] -// SIMD4-NEXT: [[TMP4:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG157]] -// SIMD4-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[TMP4]], !dbg [[DBG157]] -// SIMD4-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG157]] +// SIMD4-NEXT: cleanup, !dbg [[DBG165:![0-9]+]] +// SIMD4-NEXT: [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0, !dbg [[DBG165]] +// SIMD4-NEXT: store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG165]] +// SIMD4-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1, !dbg [[DBG165]] +// SIMD4-NEXT: store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG165]] +// SIMD4-NEXT: [[TMP4:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG156]] +// SIMD4-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[TMP4]], !dbg [[DBG156]] +// SIMD4-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG156]] // SIMD4: arraydestroy.body: -// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG157]] -// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG157]] -// SIMD4-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG157]] -// SIMD4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG157]] -// SIMD4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG157]] +// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG156]] +// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG156]] +// SIMD4-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG156]] +// SIMD4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG156]] +// SIMD4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG156]] // SIMD4: arraydestroy.done4: -// SIMD4-NEXT: br label [[EHCLEANUP:%.*]], !dbg [[DBG157]] +// SIMD4-NEXT: br label [[EHCLEANUP:%.*]], !dbg [[DBG156]] // SIMD4: lpad6: // SIMD4-NEXT: [[TMP5:%.*]] = landingpad { i8*, i32 } -// SIMD4-NEXT: cleanup, !dbg [[DBG166]] -// SIMD4-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG166]] -// SIMD4-NEXT: store i8* [[TMP6]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG166]] -// SIMD4-NEXT: [[TMP7:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 1, !dbg [[DBG166]] -// SIMD4-NEXT: store i32 [[TMP7]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG166]] -// SIMD4-NEXT: [[TMP8:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG161]] -// SIMD4-NEXT: [[ARRAYDESTROY_ISEMPTY10:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), [[TMP8]], !dbg [[DBG161]] -// SIMD4-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY10]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY11:%.*]], !dbg [[DBG161]] +// SIMD4-NEXT: cleanup, !dbg [[DBG165]] +// SIMD4-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG165]] +// SIMD4-NEXT: store i8* [[TMP6]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG165]] +// SIMD4-NEXT: [[TMP7:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 1, !dbg [[DBG165]] +// SIMD4-NEXT: store i32 [[TMP7]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG165]] +// SIMD4-NEXT: [[TMP8:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG160]] +// SIMD4-NEXT: [[ARRAYDESTROY_ISEMPTY10:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), [[TMP8]], !dbg [[DBG160]] +// SIMD4-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY10]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY11:%.*]], !dbg [[DBG160]] // SIMD4: arraydestroy.body11: -// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi %struct.S1* [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ], !dbg [[DBG161]] -// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1, !dbg [[DBG161]] -// SIMD4-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR3]], !dbg [[DBG161]] -// SIMD4-NEXT: [[ARRAYDESTROY_DONE14:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), !dbg [[DBG161]] -// SIMD4-NEXT: br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]], !dbg [[DBG161]] +// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi %struct.S1* [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ], !dbg [[DBG160]] +// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1, !dbg [[DBG160]] +// SIMD4-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR3]], !dbg [[DBG160]] +// SIMD4-NEXT: [[ARRAYDESTROY_DONE14:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), !dbg [[DBG160]] +// SIMD4-NEXT: br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]], !dbg [[DBG160]] // SIMD4: arraydestroy.done15: -// SIMD4-NEXT: br label [[EHCLEANUP]], !dbg [[DBG161]] +// SIMD4-NEXT: br label [[EHCLEANUP]], !dbg [[DBG160]] // SIMD4: ehcleanup: -// SIMD4-NEXT: [[TMP9:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG155]] -// SIMD4-NEXT: [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP9]], i64 0, i64 0, !dbg [[DBG155]] -// SIMD4-NEXT: [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[PAD_ARRAYEND]], !dbg [[DBG155]] -// SIMD4-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]], !dbg [[DBG155]] +// SIMD4-NEXT: [[TMP9:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG154]] +// SIMD4-NEXT: [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP9]], i64 0, i64 0, !dbg [[DBG154]] +// SIMD4-NEXT: [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[PAD_ARRAYEND]], !dbg [[DBG154]] +// SIMD4-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]], !dbg [[DBG154]] // SIMD4: arraydestroy.body17: -// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG155]] -// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG155]] -// SIMD4-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR3]], !dbg [[DBG155]] -// SIMD4-NEXT: [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG155]] -// SIMD4-NEXT: br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG155]] +// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG154]] +// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG154]] +// SIMD4-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR3]], !dbg [[DBG154]] +// SIMD4-NEXT: [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG154]] +// SIMD4-NEXT: br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG154]] // SIMD4: arraydestroy.done21: -// SIMD4-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG155]] +// SIMD4-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG154]] // SIMD4: eh.resume: -// SIMD4-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG155]] -// SIMD4-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG155]] -// SIMD4-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG155]] -// SIMD4-NEXT: [[LPAD_VAL22:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG155]] -// SIMD4-NEXT: resume { i8*, i32 } [[LPAD_VAL22]], !dbg [[DBG155]] +// SIMD4-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG154]] +// SIMD4-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG154]] +// SIMD4-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG154]] +// SIMD4-NEXT: [[LPAD_VAL22:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG154]] +// SIMD4-NEXT: resume { i8*, i32 } [[LPAD_VAL22]], !dbg [[DBG154]] // // // SIMD4-LABEL: define {{[^@]+}}@__cxx_global_array_dtor -// SIMD4-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG167:![0-9]+]] { +// SIMD4-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG166:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // SIMD4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172:![0-9]+]] -// SIMD4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG172]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG171:![0-9]+]] +// SIMD4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG171]] // SIMD4: arraydestroy.body: -// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG172]] -// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG172]] -// SIMD4-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG172]] -// SIMD4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), !dbg [[DBG172]] -// SIMD4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG172]] +// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 1, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG171]] +// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG171]] +// SIMD4-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG171]] +// SIMD4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), !dbg [[DBG171]] +// SIMD4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG171]] // SIMD4: arraydestroy.done1: -// SIMD4-NEXT: ret void, !dbg [[DBG172]] +// SIMD4-NEXT: ret void, !dbg [[DBG171]] // // // SIMD4-LABEL: define {{[^@]+}}@main -// SIMD4-SAME: () #[[ATTR5:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG53:![0-9]+]] { +// SIMD4-SAME: () #[[ATTR5:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG52:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // SIMD4-NEXT: [[RES:%.*]] = alloca i32, align 4 // SIMD4-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8 // SIMD4-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4 // SIMD4-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META173:![0-9]+]], metadata !DIExpression()), !dbg [[DBG174:![0-9]+]] -// SIMD4-NEXT: [[TMP0:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ4mainE2sm to i8*) acquire, align 8, !dbg [[DBG175:![0-9]+]] -// SIMD4-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG175]] -// SIMD4-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG175]], !prof [[PROF176:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173:![0-9]+]] +// SIMD4-NEXT: [[TMP0:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ4mainE2sm to i8*) acquire, align 8, !dbg [[DBG174:![0-9]+]] +// SIMD4-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG174]] +// SIMD4-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG174]], !prof [[PROF175:![0-9]+]] // SIMD4: init.check: -// SIMD4-NEXT: [[TMP1:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG175]] -// SIMD4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0, !dbg [[DBG175]] -// SIMD4-NEXT: br i1 [[TOBOOL]], label [[INIT:%.*]], label [[INIT_END]], !dbg [[DBG175]] +// SIMD4-NEXT: [[TMP1:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG174]] +// SIMD4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0, !dbg [[DBG174]] +// SIMD4-NEXT: br i1 [[TOBOOL]], label [[INIT:%.*]], label [[INIT_END]], !dbg [[DBG174]] // SIMD4: init: -// SIMD4-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4, !dbg [[DBG177:![0-9]+]] +// SIMD4-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4, !dbg [[DBG176:![0-9]+]] // SIMD4-NEXT: invoke void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull align 8 dereferenceable(24) @_ZZ4mainE2sm, i32 [[TMP2]]) -// SIMD4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG178:![0-9]+]] +// SIMD4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG177:![0-9]+]] // SIMD4: invoke.cont: -// SIMD4-NEXT: [[TMP3:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.Smain*)* @_ZZ4mainEN5SmainD1Ev to void (i8*)*), i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG175]] -// SIMD4-NEXT: call void @__cxa_guard_release(i64* @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG175]] -// SIMD4-NEXT: br label [[INIT_END]], !dbg [[DBG175]] +// SIMD4-NEXT: [[TMP3:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.Smain*)* @_ZZ4mainEN5SmainD1Ev to void (i8*)*), i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG174]] +// SIMD4-NEXT: call void @__cxa_guard_release(i64* @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG174]] +// SIMD4-NEXT: br label [[INIT_END]], !dbg [[DBG174]] // SIMD4: init.end: -// SIMD4-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S3:%.*]], %struct.S3* @_ZN6Static1sE, i32 0, i32 0), align 4, !dbg [[DBG179:![0-9]+]] -// SIMD4-NEXT: store i32 [[TMP4]], i32* [[RES]], align 4, !dbg [[DBG180:![0-9]+]] -// SIMD4-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_SMAIN:%.*]], %struct.Smain* @_ZZ4mainE2sm, i32 0, i32 0), align 8, !dbg [[DBG181:![0-9]+]] -// SIMD4-NEXT: [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG182:![0-9]+]] -// SIMD4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG182]] -// SIMD4-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG182]] -// SIMD4-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4, !dbg [[DBG183:![0-9]+]] -// SIMD4-NEXT: [[TMP8:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG184:![0-9]+]] -// SIMD4-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP7]], !dbg [[DBG184]] -// SIMD4-NEXT: store i32 [[ADD1]], i32* [[RES]], align 4, !dbg [[DBG184]] -// SIMD4-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG185:![0-9]+]] -// SIMD4-NEXT: [[TMP10:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG186:![0-9]+]] -// SIMD4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP9]], !dbg [[DBG186]] -// SIMD4-NEXT: store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG186]] -// SIMD4-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S5:%.*]], %struct.S5* @gs3, i32 0, i32 0), align 4, !dbg [[DBG187:![0-9]+]] -// SIMD4-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG188:![0-9]+]] -// SIMD4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG188]] -// SIMD4-NEXT: store i32 [[ADD3]], i32* [[RES]], align 4, !dbg [[DBG188]] -// SIMD4-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1, i32 0), align 4, !dbg [[DBG189:![0-9]+]] -// SIMD4-NEXT: [[TMP14:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG190:![0-9]+]] -// SIMD4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP13]], !dbg [[DBG190]] -// SIMD4-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG190]] -// SIMD4-NEXT: [[TMP15:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4, !dbg [[DBG191:![0-9]+]] -// SIMD4-NEXT: [[TMP16:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG192:![0-9]+]] -// SIMD4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]], !dbg [[DBG192]] -// SIMD4-NEXT: store i32 [[ADD5]], i32* [[RES]], align 4, !dbg [[DBG192]] -// SIMD4-NEXT: [[TMP17:%.*]] = load float, float* @_ZN2STIfE2stE, align 4, !dbg [[DBG193:![0-9]+]] -// SIMD4-NEXT: [[CONV:%.*]] = fptosi float [[TMP17]] to i32, !dbg [[DBG193]] -// SIMD4-NEXT: [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG194:![0-9]+]] -// SIMD4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[CONV]], !dbg [[DBG194]] -// SIMD4-NEXT: store i32 [[ADD6]], i32* [[RES]], align 4, !dbg [[DBG194]] -// SIMD4-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S4:%.*]], %struct.S4* @_ZN2STI2S4E2stE, i32 0, i32 0), align 4, !dbg [[DBG195:![0-9]+]] -// SIMD4-NEXT: [[TMP20:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG196:![0-9]+]] -// SIMD4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], [[TMP19]], !dbg [[DBG196]] -// SIMD4-NEXT: store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG196]] -// SIMD4-NEXT: [[TMP21:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG197:![0-9]+]] -// SIMD4-NEXT: ret i32 [[TMP21]], !dbg [[DBG198:![0-9]+]] +// SIMD4-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S3:%.*]], %struct.S3* @_ZN6Static1sE, i32 0, i32 0), align 4, !dbg [[DBG178:![0-9]+]] +// SIMD4-NEXT: store i32 [[TMP4]], i32* [[RES]], align 4, !dbg [[DBG179:![0-9]+]] +// SIMD4-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_SMAIN:%.*]], %struct.Smain* @_ZZ4mainE2sm, i32 0, i32 0), align 8, !dbg [[DBG180:![0-9]+]] +// SIMD4-NEXT: [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG181:![0-9]+]] +// SIMD4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG181]] +// SIMD4-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG181]] +// SIMD4-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4, !dbg [[DBG182:![0-9]+]] +// SIMD4-NEXT: [[TMP8:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG183:![0-9]+]] +// SIMD4-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP7]], !dbg [[DBG183]] +// SIMD4-NEXT: store i32 [[ADD1]], i32* [[RES]], align 4, !dbg [[DBG183]] +// SIMD4-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG184:![0-9]+]] +// SIMD4-NEXT: [[TMP10:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG185:![0-9]+]] +// SIMD4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP9]], !dbg [[DBG185]] +// SIMD4-NEXT: store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG185]] +// SIMD4-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S5:%.*]], %struct.S5* @gs3, i32 0, i32 0), align 4, !dbg [[DBG186:![0-9]+]] +// SIMD4-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG187:![0-9]+]] +// SIMD4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG187]] +// SIMD4-NEXT: store i32 [[ADD3]], i32* [[RES]], align 4, !dbg [[DBG187]] +// SIMD4-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1, i32 0), align 4, !dbg [[DBG188:![0-9]+]] +// SIMD4-NEXT: [[TMP14:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG189:![0-9]+]] +// SIMD4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP13]], !dbg [[DBG189]] +// SIMD4-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG189]] +// SIMD4-NEXT: [[TMP15:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4, !dbg [[DBG190:![0-9]+]] +// SIMD4-NEXT: [[TMP16:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG191:![0-9]+]] +// SIMD4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]], !dbg [[DBG191]] +// SIMD4-NEXT: store i32 [[ADD5]], i32* [[RES]], align 4, !dbg [[DBG191]] +// SIMD4-NEXT: [[TMP17:%.*]] = load float, float* @_ZN2STIfE2stE, align 4, !dbg [[DBG192:![0-9]+]] +// SIMD4-NEXT: [[CONV:%.*]] = fptosi float [[TMP17]] to i32, !dbg [[DBG192]] +// SIMD4-NEXT: [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG193:![0-9]+]] +// SIMD4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[CONV]], !dbg [[DBG193]] +// SIMD4-NEXT: store i32 [[ADD6]], i32* [[RES]], align 4, !dbg [[DBG193]] +// SIMD4-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S4:%.*]], %struct.S4* @_ZN2STI2S4E2stE, i32 0, i32 0), align 4, !dbg [[DBG194:![0-9]+]] +// SIMD4-NEXT: [[TMP20:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG195:![0-9]+]] +// SIMD4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], [[TMP19]], !dbg [[DBG195]] +// SIMD4-NEXT: store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG195]] +// SIMD4-NEXT: [[TMP21:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG196:![0-9]+]] +// SIMD4-NEXT: ret i32 [[TMP21]], !dbg [[DBG197:![0-9]+]] // SIMD4: lpad: // SIMD4-NEXT: [[TMP22:%.*]] = landingpad { i8*, i32 } -// SIMD4-NEXT: cleanup, !dbg [[DBG199:![0-9]+]] -// SIMD4-NEXT: [[TMP23:%.*]] = extractvalue { i8*, i32 } [[TMP22]], 0, !dbg [[DBG199]] -// SIMD4-NEXT: store i8* [[TMP23]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG199]] -// SIMD4-NEXT: [[TMP24:%.*]] = extractvalue { i8*, i32 } [[TMP22]], 1, !dbg [[DBG199]] -// SIMD4-NEXT: store i32 [[TMP24]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG199]] -// SIMD4-NEXT: call void @__cxa_guard_abort(i64* @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG175]] -// SIMD4-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG175]] +// SIMD4-NEXT: cleanup, !dbg [[DBG198:![0-9]+]] +// SIMD4-NEXT: [[TMP23:%.*]] = extractvalue { i8*, i32 } [[TMP22]], 0, !dbg [[DBG198]] +// SIMD4-NEXT: store i8* [[TMP23]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG198]] +// SIMD4-NEXT: [[TMP24:%.*]] = extractvalue { i8*, i32 } [[TMP22]], 1, !dbg [[DBG198]] +// SIMD4-NEXT: store i32 [[TMP24]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG198]] +// SIMD4-NEXT: call void @__cxa_guard_abort(i64* @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG174]] +// SIMD4-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG174]] // SIMD4: eh.resume: -// SIMD4-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG175]] -// SIMD4-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG175]] -// SIMD4-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG175]] -// SIMD4-NEXT: [[LPAD_VAL8:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG175]] -// SIMD4-NEXT: resume { i8*, i32 } [[LPAD_VAL8]], !dbg [[DBG175]] +// SIMD4-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG174]] +// SIMD4-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG174]] +// SIMD4-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG174]] +// SIMD4-NEXT: [[LPAD_VAL8:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG174]] +// SIMD4-NEXT: resume { i8*, i32 } [[LPAD_VAL8]], !dbg [[DBG174]] // // // SIMD4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei -// SIMD4-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 !dbg [[DBG200:![0-9]+]] { +// SIMD4-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 !dbg [[DBG199:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8 // SIMD4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD4-NEXT: store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META201:![0-9]+]], metadata !DIExpression()), !dbg [[DBG203:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG202:![0-9]+]] // SIMD4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META204:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META203:![0-9]+]], metadata !DIExpression()), !dbg [[DBG204:![0-9]+]] // SIMD4-NEXT: [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG206:![0-9]+]] -// SIMD4-NEXT: call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG206]] -// SIMD4-NEXT: ret void, !dbg [[DBG207:![0-9]+]] +// SIMD4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG205:![0-9]+]] +// SIMD4-NEXT: call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG205]] +// SIMD4-NEXT: ret void, !dbg [[DBG206:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev -// SIMD4-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG208:![0-9]+]] { +// SIMD4-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG207:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8 // SIMD4-NEXT: store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META209:![0-9]+]], metadata !DIExpression()), !dbg [[DBG210:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG209:![0-9]+]] // SIMD4-NEXT: [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR3]], !dbg [[DBG211:![0-9]+]] -// SIMD4-NEXT: ret void, !dbg [[DBG212:![0-9]+]] +// SIMD4-NEXT: call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR3]], !dbg [[DBG210:![0-9]+]] +// SIMD4-NEXT: ret void, !dbg [[DBG211:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@_Z6foobarv -// SIMD4-SAME: () #[[ATTR6:[0-9]+]] !dbg [[DBG213:![0-9]+]] { +// SIMD4-SAME: () #[[ATTR6:[0-9]+]] !dbg [[DBG212:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[RES:%.*]] = alloca i32, align 4 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META214:![0-9]+]], metadata !DIExpression()), !dbg [[DBG215:![0-9]+]] -// SIMD4-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S3:%.*]], %struct.S3* @_ZN6Static1sE, i32 0, i32 0), align 4, !dbg [[DBG216:![0-9]+]] -// SIMD4-NEXT: store i32 [[TMP0]], i32* [[RES]], align 4, !dbg [[DBG217:![0-9]+]] -// SIMD4-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4, !dbg [[DBG218:![0-9]+]] -// SIMD4-NEXT: [[TMP2:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG219:![0-9]+]] -// SIMD4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP1]], !dbg [[DBG219]] -// SIMD4-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG219]] -// SIMD4-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG220:![0-9]+]] -// SIMD4-NEXT: [[TMP4:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG221:![0-9]+]] -// SIMD4-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], [[TMP3]], !dbg [[DBG221]] -// SIMD4-NEXT: store i32 [[ADD1]], i32* [[RES]], align 4, !dbg [[DBG221]] -// SIMD4-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S5:%.*]], %struct.S5* @gs3, i32 0, i32 0), align 4, !dbg [[DBG222:![0-9]+]] -// SIMD4-NEXT: [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG223:![0-9]+]] -// SIMD4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG223]] -// SIMD4-NEXT: store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG223]] -// SIMD4-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1, i32 0), align 4, !dbg [[DBG224:![0-9]+]] -// SIMD4-NEXT: [[TMP8:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG225:![0-9]+]] -// SIMD4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP7]], !dbg [[DBG225]] -// SIMD4-NEXT: store i32 [[ADD3]], i32* [[RES]], align 4, !dbg [[DBG225]] -// SIMD4-NEXT: [[TMP9:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4, !dbg [[DBG226:![0-9]+]] -// SIMD4-NEXT: [[TMP10:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG227:![0-9]+]] -// SIMD4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], [[TMP9]], !dbg [[DBG227]] -// SIMD4-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG227]] -// SIMD4-NEXT: [[TMP11:%.*]] = load float, float* @_ZN2STIfE2stE, align 4, !dbg [[DBG228:![0-9]+]] -// SIMD4-NEXT: [[CONV:%.*]] = fptosi float [[TMP11]] to i32, !dbg [[DBG228]] -// SIMD4-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG229:![0-9]+]] -// SIMD4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], [[CONV]], !dbg [[DBG229]] -// SIMD4-NEXT: store i32 [[ADD5]], i32* [[RES]], align 4, !dbg [[DBG229]] -// SIMD4-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S4:%.*]], %struct.S4* @_ZN2STI2S4E2stE, i32 0, i32 0), align 4, !dbg [[DBG230:![0-9]+]] -// SIMD4-NEXT: [[TMP14:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG231:![0-9]+]] -// SIMD4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], [[TMP13]], !dbg [[DBG231]] -// SIMD4-NEXT: store i32 [[ADD6]], i32* [[RES]], align 4, !dbg [[DBG231]] -// SIMD4-NEXT: [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG232:![0-9]+]] -// SIMD4-NEXT: ret i32 [[TMP15]], !dbg [[DBG233:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META213:![0-9]+]], metadata !DIExpression()), !dbg [[DBG214:![0-9]+]] +// SIMD4-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S3:%.*]], %struct.S3* @_ZN6Static1sE, i32 0, i32 0), align 4, !dbg [[DBG215:![0-9]+]] +// SIMD4-NEXT: store i32 [[TMP0]], i32* [[RES]], align 4, !dbg [[DBG216:![0-9]+]] +// SIMD4-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4, !dbg [[DBG217:![0-9]+]] +// SIMD4-NEXT: [[TMP2:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG218:![0-9]+]] +// SIMD4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP1]], !dbg [[DBG218]] +// SIMD4-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG218]] +// SIMD4-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG219:![0-9]+]] +// SIMD4-NEXT: [[TMP4:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG220:![0-9]+]] +// SIMD4-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], [[TMP3]], !dbg [[DBG220]] +// SIMD4-NEXT: store i32 [[ADD1]], i32* [[RES]], align 4, !dbg [[DBG220]] +// SIMD4-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S5:%.*]], %struct.S5* @gs3, i32 0, i32 0), align 4, !dbg [[DBG221:![0-9]+]] +// SIMD4-NEXT: [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG222:![0-9]+]] +// SIMD4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG222]] +// SIMD4-NEXT: store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG222]] +// SIMD4-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1, i32 0), align 4, !dbg [[DBG223:![0-9]+]] +// SIMD4-NEXT: [[TMP8:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG224:![0-9]+]] +// SIMD4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP7]], !dbg [[DBG224]] +// SIMD4-NEXT: store i32 [[ADD3]], i32* [[RES]], align 4, !dbg [[DBG224]] +// SIMD4-NEXT: [[TMP9:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4, !dbg [[DBG225:![0-9]+]] +// SIMD4-NEXT: [[TMP10:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG226:![0-9]+]] +// SIMD4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], [[TMP9]], !dbg [[DBG226]] +// SIMD4-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG226]] +// SIMD4-NEXT: [[TMP11:%.*]] = load float, float* @_ZN2STIfE2stE, align 4, !dbg [[DBG227:![0-9]+]] +// SIMD4-NEXT: [[CONV:%.*]] = fptosi float [[TMP11]] to i32, !dbg [[DBG227]] +// SIMD4-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG228:![0-9]+]] +// SIMD4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], [[CONV]], !dbg [[DBG228]] +// SIMD4-NEXT: store i32 [[ADD5]], i32* [[RES]], align 4, !dbg [[DBG228]] +// SIMD4-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S4:%.*]], %struct.S4* @_ZN2STI2S4E2stE, i32 0, i32 0), align 4, !dbg [[DBG229:![0-9]+]] +// SIMD4-NEXT: [[TMP14:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG230:![0-9]+]] +// SIMD4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], [[TMP13]], !dbg [[DBG230]] +// SIMD4-NEXT: store i32 [[ADD6]], i32* [[RES]], align 4, !dbg [[DBG230]] +// SIMD4-NEXT: [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG231:![0-9]+]] +// SIMD4-NEXT: ret i32 [[TMP15]], !dbg [[DBG232:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@__cxx_global_var_init.3 -// SIMD4-SAME: () #[[ATTR0]] comdat($_ZN2STI2S4E2stE) !dbg [[DBG234:![0-9]+]] { +// SIMD4-SAME: () #[[ATTR0]] comdat($_ZN2STI2S4E2stE) !dbg [[DBG233:![0-9]+]] { // SIMD4-NEXT: entry: -// SIMD4-NEXT: [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG235:![0-9]+]] -// SIMD4-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG235]] -// SIMD4-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG235]] +// SIMD4-NEXT: [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG234:![0-9]+]] +// SIMD4-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG234]] +// SIMD4-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG234]] // SIMD4: init.check: -// SIMD4-NEXT: call void @_ZN2S4C1Ei(%struct.S4* nonnull align 4 dereferenceable(8) @_ZN2STI2S4E2stE, i32 23), !dbg [[DBG236:![0-9]+]] -// SIMD4-NEXT: [[TMP1:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG235]] -// SIMD4-NEXT: store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG235]] -// SIMD4-NEXT: br label [[INIT_END]], !dbg [[DBG235]] +// SIMD4-NEXT: call void @_ZN2S4C1Ei(%struct.S4* nonnull align 4 dereferenceable(8) @_ZN2STI2S4E2stE, i32 23), !dbg [[DBG235:![0-9]+]] +// SIMD4-NEXT: [[TMP1:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG234]] +// SIMD4-NEXT: store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG234]] +// SIMD4-NEXT: br label [[INIT_END]], !dbg [[DBG234]] // SIMD4: init.end: -// SIMD4-NEXT: ret void, !dbg [[DBG238:![0-9]+]] +// SIMD4-NEXT: ret void, !dbg [[DBG237:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@_ZN2S4C1Ei -// SIMD4-SAME: (%struct.S4* nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG239:![0-9]+]] { +// SIMD4-SAME: (%struct.S4* nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG238:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8 // SIMD4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD4-NEXT: store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META240:![0-9]+]], metadata !DIExpression()), !dbg [[DBG242:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META239:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]] // SIMD4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META243:![0-9]+]], metadata !DIExpression()), !dbg [[DBG244:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META242:![0-9]+]], metadata !DIExpression()), !dbg [[DBG243:![0-9]+]] // SIMD4-NEXT: [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG245:![0-9]+]] -// SIMD4-NEXT: call void @_ZN2S4C2Ei(%struct.S4* nonnull align 4 dereferenceable(8) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG245]] -// SIMD4-NEXT: ret void, !dbg [[DBG246:![0-9]+]] +// SIMD4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG244:![0-9]+]] +// SIMD4-NEXT: call void @_ZN2S4C2Ei(%struct.S4* nonnull align 4 dereferenceable(8) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG244]] +// SIMD4-NEXT: ret void, !dbg [[DBG245:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@_ZN2S4D1Ev -// SIMD4-SAME: (%struct.S4* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG247:![0-9]+]] { +// SIMD4-SAME: (%struct.S4* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG246:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8 // SIMD4-NEXT: store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META248:![0-9]+]], metadata !DIExpression()), !dbg [[DBG249:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META247:![0-9]+]], metadata !DIExpression()), !dbg [[DBG248:![0-9]+]] // SIMD4-NEXT: [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @_ZN2S4D2Ev(%struct.S4* nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR3]], !dbg [[DBG250:![0-9]+]] -// SIMD4-NEXT: ret void, !dbg [[DBG251:![0-9]+]] +// SIMD4-NEXT: call void @_ZN2S4D2Ev(%struct.S4* nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR3]], !dbg [[DBG249:![0-9]+]] +// SIMD4-NEXT: ret void, !dbg [[DBG250:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@_ZN2S1C2Ei -// SIMD4-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG252:![0-9]+]] { +// SIMD4-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG251:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // SIMD4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD4-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META253:![0-9]+]], metadata !DIExpression()), !dbg [[DBG254:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META252:![0-9]+]], metadata !DIExpression()), !dbg [[DBG253:![0-9]+]] // SIMD4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META255:![0-9]+]], metadata !DIExpression()), !dbg [[DBG256:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META254:![0-9]+]], metadata !DIExpression()), !dbg [[DBG255:![0-9]+]] // SIMD4-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG257:![0-9]+]] -// SIMD4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG258:![0-9]+]] -// SIMD4-NEXT: store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG257]] -// SIMD4-NEXT: ret void, !dbg [[DBG259:![0-9]+]] +// SIMD4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG256:![0-9]+]] +// SIMD4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG257:![0-9]+]] +// SIMD4-NEXT: store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG256]] +// SIMD4-NEXT: ret void, !dbg [[DBG258:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@_ZN2S1D2Ev -// SIMD4-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG260:![0-9]+]] { +// SIMD4-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG259:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // SIMD4-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META261:![0-9]+]], metadata !DIExpression()), !dbg [[DBG262:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META260:![0-9]+]], metadata !DIExpression()), !dbg [[DBG261:![0-9]+]] // SIMD4-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG263:![0-9]+]] -// SIMD4-NEXT: store i32 0, i32* [[A]], align 4, !dbg [[DBG265:![0-9]+]] -// SIMD4-NEXT: ret void, !dbg [[DBG266:![0-9]+]] +// SIMD4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG262:![0-9]+]] +// SIMD4-NEXT: store i32 0, i32* [[A]], align 4, !dbg [[DBG264:![0-9]+]] +// SIMD4-NEXT: ret void, !dbg [[DBG265:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@_ZN2S2C2Ei -// SIMD4-SAME: (%struct.S2* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG267:![0-9]+]] { +// SIMD4-SAME: (%struct.S2* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG266:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8 // SIMD4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD4-NEXT: store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META268:![0-9]+]], metadata !DIExpression()), !dbg [[DBG269:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META267:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268:![0-9]+]] // SIMD4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META270:![0-9]+]], metadata !DIExpression()), !dbg [[DBG271:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG270:![0-9]+]] // SIMD4-NEXT: [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG272:![0-9]+]] -// SIMD4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG273:![0-9]+]] -// SIMD4-NEXT: store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG272]] -// SIMD4-NEXT: ret void, !dbg [[DBG274:![0-9]+]] +// SIMD4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG271:![0-9]+]] +// SIMD4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG272:![0-9]+]] +// SIMD4-NEXT: store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG271]] +// SIMD4-NEXT: ret void, !dbg [[DBG273:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@_ZN2S2D2Ev -// SIMD4-SAME: (%struct.S2* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG275:![0-9]+]] { +// SIMD4-SAME: (%struct.S2* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG274:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8 // SIMD4-NEXT: store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG277:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG276:![0-9]+]] // SIMD4-NEXT: [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG278:![0-9]+]] -// SIMD4-NEXT: store i32 0, i32* [[A]], align 8, !dbg [[DBG280:![0-9]+]] -// SIMD4-NEXT: ret void, !dbg [[DBG281:![0-9]+]] +// SIMD4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG277:![0-9]+]] +// SIMD4-NEXT: store i32 0, i32* [[A]], align 8, !dbg [[DBG279:![0-9]+]] +// SIMD4-NEXT: ret void, !dbg [[DBG280:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei -// SIMD4-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG282:![0-9]+]] { +// SIMD4-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG281:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8 // SIMD4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD4-NEXT: store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META283:![0-9]+]], metadata !DIExpression()), !dbg [[DBG284:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META282:![0-9]+]], metadata !DIExpression()), !dbg [[DBG283:![0-9]+]] // SIMD4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META285:![0-9]+]], metadata !DIExpression()), !dbg [[DBG286:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META284:![0-9]+]], metadata !DIExpression()), !dbg [[DBG285:![0-9]+]] // SIMD4-NEXT: [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG287:![0-9]+]] -// SIMD4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG288:![0-9]+]] -// SIMD4-NEXT: store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG287]] -// SIMD4-NEXT: ret void, !dbg [[DBG289:![0-9]+]] +// SIMD4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG286:![0-9]+]] +// SIMD4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG287:![0-9]+]] +// SIMD4-NEXT: store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG286]] +// SIMD4-NEXT: ret void, !dbg [[DBG288:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev -// SIMD4-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG290:![0-9]+]] { +// SIMD4-SAME: (%struct.Smain* nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG289:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8 // SIMD4-NEXT: store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META291:![0-9]+]], metadata !DIExpression()), !dbg [[DBG292:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META290:![0-9]+]], metadata !DIExpression()), !dbg [[DBG291:![0-9]+]] // SIMD4-NEXT: [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG293:![0-9]+]] -// SIMD4-NEXT: store i32 0, i32* [[A]], align 8, !dbg [[DBG295:![0-9]+]] -// SIMD4-NEXT: ret void, !dbg [[DBG296:![0-9]+]] +// SIMD4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG292:![0-9]+]] +// SIMD4-NEXT: store i32 0, i32* [[A]], align 8, !dbg [[DBG294:![0-9]+]] +// SIMD4-NEXT: ret void, !dbg [[DBG295:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@_ZN2S4C2Ei -// SIMD4-SAME: (%struct.S4* nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG297:![0-9]+]] { +// SIMD4-SAME: (%struct.S4* nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG296:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8 // SIMD4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD4-NEXT: store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META298:![0-9]+]], metadata !DIExpression()), !dbg [[DBG299:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META297:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298:![0-9]+]] // SIMD4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META300:![0-9]+]], metadata !DIExpression()), !dbg [[DBG301:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300:![0-9]+]] // SIMD4-NEXT: [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG302:![0-9]+]] -// SIMD4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG303:![0-9]+]] -// SIMD4-NEXT: store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG302]] -// SIMD4-NEXT: ret void, !dbg [[DBG304:![0-9]+]] +// SIMD4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG301:![0-9]+]] +// SIMD4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG302:![0-9]+]] +// SIMD4-NEXT: store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG301]] +// SIMD4-NEXT: ret void, !dbg [[DBG303:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@_ZN2S4D2Ev -// SIMD4-SAME: (%struct.S4* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG305:![0-9]+]] { +// SIMD4-SAME: (%struct.S4* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG304:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8 // SIMD4-NEXT: store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META306:![0-9]+]], metadata !DIExpression()), !dbg [[DBG307:![0-9]+]] +// SIMD4-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG306:![0-9]+]] // SIMD4-NEXT: [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8 -// SIMD4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG308:![0-9]+]] -// SIMD4-NEXT: store i32 0, i32* [[A]], align 4, !dbg [[DBG310:![0-9]+]] -// SIMD4-NEXT: ret void, !dbg [[DBG311:![0-9]+]] +// SIMD4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG307:![0-9]+]] +// SIMD4-NEXT: store i32 0, i32* [[A]], align 4, !dbg [[DBG309:![0-9]+]] +// SIMD4-NEXT: ret void, !dbg [[DBG310:![0-9]+]] // // // SIMD4-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_threadprivate_codegen.cpp -// SIMD4-SAME: () #[[ATTR0]] !dbg [[DBG312:![0-9]+]] { +// SIMD4-SAME: () #[[ATTR0]] !dbg [[DBG311:![0-9]+]] { // SIMD4-NEXT: entry: -// SIMD4-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG314:![0-9]+]] -// SIMD4-NEXT: call void @__cxx_global_var_init.1(), !dbg [[DBG314]] -// SIMD4-NEXT: call void @__cxx_global_var_init.2(), !dbg [[DBG314]] +// SIMD4-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG313:![0-9]+]] +// SIMD4-NEXT: call void @__cxx_global_var_init.1(), !dbg [[DBG313]] +// SIMD4-NEXT: call void @__cxx_global_var_init.2(), !dbg [[DBG313]] // SIMD4-NEXT: ret void // // // DEBUG1-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_. -// DEBUG1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG115:![0-9]+]] { +// DEBUG1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG116:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // DEBUG1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// DEBUG1-NEXT: call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119:![0-9]+]] -// DEBUG1-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG120:![0-9]+]] -// DEBUG1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S1*, !dbg [[DBG120]] -// DEBUG1-NEXT: call void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) [[TMP2]], i32 5), !dbg [[DBG121:![0-9]+]] -// DEBUG1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG120]] -// DEBUG1-NEXT: ret i8* [[TMP3]], !dbg [[DBG120]] +// DEBUG1-NEXT: call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG120:![0-9]+]] +// DEBUG1-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG121:![0-9]+]] +// DEBUG1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S1*, !dbg [[DBG121]] +// DEBUG1-NEXT: call void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) [[TMP2]], i32 5), !dbg [[DBG122:![0-9]+]] +// DEBUG1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG121]] +// DEBUG1-NEXT: ret i8* [[TMP3]], !dbg [[DBG121]] // // // DEBUG1-LABEL: define {{[^@]+}}@_ZN2S1C1Ei -// DEBUG1-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 !dbg [[DBG122:![0-9]+]] { +// DEBUG1-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 !dbg [[DBG123:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // DEBUG1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // DEBUG1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125:![0-9]+]] +// DEBUG1-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META124:![0-9]+]], metadata !DIExpression()), !dbg [[DBG126:![0-9]+]] // DEBUG1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// DEBUG1-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]] +// DEBUG1-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META127:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128:![0-9]+]] // DEBUG1-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG128:![0-9]+]] -// DEBUG1-NEXT: call void @_ZN2S1C2Ei(%struct.S1* nonnull align 4 dereferenceable(4) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG128]] -// DEBUG1-NEXT: ret void, !dbg [[DBG129:![0-9]+]] +// DEBUG1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG129:![0-9]+]] +// DEBUG1-NEXT: call void @_ZN2S1C2Ei(%struct.S1* nonnull align 4 dereferenceable(4) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG129]] +// DEBUG1-NEXT: ret void, !dbg [[DBG130:![0-9]+]] // // // DEBUG1-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_. -// DEBUG1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG130:![0-9]+]] { +// DEBUG1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG131:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // DEBUG1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// DEBUG1-NEXT: call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132:![0-9]+]] -// DEBUG1-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG132]] -// DEBUG1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S1*, !dbg [[DBG132]] -// DEBUG1-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[TMP2]]) #[[ATTR4:[0-9]+]], !dbg [[DBG132]] -// DEBUG1-NEXT: ret void, !dbg [[DBG133:![0-9]+]] +// DEBUG1-NEXT: call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133:![0-9]+]] +// DEBUG1-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG133]] +// DEBUG1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S1*, !dbg [[DBG133]] +// DEBUG1-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[TMP2]]) #[[ATTR4:[0-9]+]], !dbg [[DBG133]] +// DEBUG1-NEXT: ret void, !dbg [[DBG134:![0-9]+]] // // // DEBUG1-LABEL: define {{[^@]+}}@_ZN2S1D1Ev -// DEBUG1-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] comdat align 2 !dbg [[DBG134:![0-9]+]] { +// DEBUG1-SAME: (%struct.S1* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] comdat align 2 !dbg [[DBG135:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // DEBUG1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136:![0-9]+]] +// DEBUG1-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META136:![0-9]+]], metadata !DIExpression()), !dbg [[DBG137:![0-9]+]] // DEBUG1-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: call void @_ZN2S1D2Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]], !dbg [[DBG137:![0-9]+]] -// DEBUG1-NEXT: ret void, !dbg [[DBG138:![0-9]+]] +// DEBUG1-NEXT: call void @_ZN2S1D2Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]], !dbg [[DBG138:![0-9]+]] +// DEBUG1-NEXT: ret void, !dbg [[DBG139:![0-9]+]] // // // DEBUG1-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_. -// DEBUG1-SAME: () #[[ATTR0]] !dbg [[DBG139:![0-9]+]] { +// DEBUG1-SAME: () #[[ATTR0]] !dbg [[DBG140:![0-9]+]] { // DEBUG1-NEXT: entry: -// DEBUG1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG140:![0-9]+]] -// DEBUG1-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.), !dbg [[DBG140]] -// DEBUG1-NEXT: ret void, !dbg [[DBG140]] +// DEBUG1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG141:![0-9]+]] +// DEBUG1-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.), !dbg [[DBG141]] +// DEBUG1-NEXT: ret void, !dbg [[DBG141]] // // // DEBUG1-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..1 -// DEBUG1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG141:![0-9]+]] { +// DEBUG1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG142:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // DEBUG1-NEXT: [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8 @@ -6561,133 +6561,133 @@ int foobar() { // DEBUG1-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4 // DEBUG1-NEXT: [[ARRAYINIT_ENDOFINIT9:%.*]] = alloca %struct.S1*, align 8 // DEBUG1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// DEBUG1-NEXT: call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG143:![0-9]+]] -// DEBUG1-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG144:![0-9]+]] -// DEBUG1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x [3 x %struct.S1]]*, !dbg [[DBG144]] -// DEBUG1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG145:![0-9]+]] -// DEBUG1-NEXT: store [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG145]] -// DEBUG1-NEXT: [[ARRAYINIT_BEGIN1:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], i64 0, i64 0, !dbg [[DBG146:![0-9]+]] -// DEBUG1-NEXT: store %struct.S1* [[ARRAYINIT_BEGIN1]], %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8, !dbg [[DBG146]] +// DEBUG1-NEXT: call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG144:![0-9]+]] +// DEBUG1-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG145:![0-9]+]] +// DEBUG1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x [3 x %struct.S1]]*, !dbg [[DBG145]] +// DEBUG1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG146:![0-9]+]] +// DEBUG1-NEXT: store [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG146]] +// DEBUG1-NEXT: [[ARRAYINIT_BEGIN1:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], i64 0, i64 0, !dbg [[DBG147:![0-9]+]] +// DEBUG1-NEXT: store %struct.S1* [[ARRAYINIT_BEGIN1]], %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8, !dbg [[DBG147]] // DEBUG1-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN1]], i32 1) -// DEBUG1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG147:![0-9]+]] +// DEBUG1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG148:![0-9]+]] // DEBUG1: invoke.cont: -// DEBUG1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYINIT_BEGIN1]], i64 1, !dbg [[DBG146]] -// DEBUG1-NEXT: store %struct.S1* [[ARRAYINIT_ELEMENT]], %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8, !dbg [[DBG146]] +// DEBUG1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYINIT_BEGIN1]], i64 1, !dbg [[DBG147]] +// DEBUG1-NEXT: store %struct.S1* [[ARRAYINIT_ELEMENT]], %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8, !dbg [[DBG147]] // DEBUG1-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) -// DEBUG1-NEXT: to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]], !dbg [[DBG148:![0-9]+]] +// DEBUG1-NEXT: to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]], !dbg [[DBG149:![0-9]+]] // DEBUG1: invoke.cont3: -// DEBUG1-NEXT: [[ARRAYINIT_ELEMENT4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYINIT_ELEMENT]], i64 1, !dbg [[DBG146]] -// DEBUG1-NEXT: store %struct.S1* [[ARRAYINIT_ELEMENT4]], %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8, !dbg [[DBG146]] +// DEBUG1-NEXT: [[ARRAYINIT_ELEMENT4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYINIT_ELEMENT]], i64 1, !dbg [[DBG147]] +// DEBUG1-NEXT: store %struct.S1* [[ARRAYINIT_ELEMENT4]], %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8, !dbg [[DBG147]] // DEBUG1-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT4]], i32 3) -// DEBUG1-NEXT: to label [[INVOKE_CONT5:%.*]] unwind label [[LPAD]], !dbg [[DBG149:![0-9]+]] +// DEBUG1-NEXT: to label [[INVOKE_CONT5:%.*]] unwind label [[LPAD]], !dbg [[DBG150:![0-9]+]] // DEBUG1: invoke.cont5: -// DEBUG1-NEXT: [[ARRAYINIT_ELEMENT7:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], i64 1, !dbg [[DBG145]] -// DEBUG1-NEXT: store [3 x %struct.S1]* [[ARRAYINIT_ELEMENT7]], [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG145]] -// DEBUG1-NEXT: [[ARRAYINIT_BEGIN8:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_ELEMENT7]], i64 0, i64 0, !dbg [[DBG150:![0-9]+]] -// DEBUG1-NEXT: store %struct.S1* [[ARRAYINIT_BEGIN8]], %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8, !dbg [[DBG150]] +// DEBUG1-NEXT: [[ARRAYINIT_ELEMENT7:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], i64 1, !dbg [[DBG146]] +// DEBUG1-NEXT: store [3 x %struct.S1]* [[ARRAYINIT_ELEMENT7]], [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG146]] +// DEBUG1-NEXT: [[ARRAYINIT_BEGIN8:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_ELEMENT7]], i64 0, i64 0, !dbg [[DBG151:![0-9]+]] +// DEBUG1-NEXT: store %struct.S1* [[ARRAYINIT_BEGIN8]], %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8, !dbg [[DBG151]] // DEBUG1-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN8]], i32 4) -// DEBUG1-NEXT: to label [[INVOKE_CONT11:%.*]] unwind label [[LPAD10:%.*]], !dbg [[DBG151:![0-9]+]] +// DEBUG1-NEXT: to label [[INVOKE_CONT11:%.*]] unwind label [[LPAD10:%.*]], !dbg [[DBG152:![0-9]+]] // DEBUG1: invoke.cont11: -// DEBUG1-NEXT: [[ARRAYINIT_ELEMENT12:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYINIT_BEGIN8]], i64 1, !dbg [[DBG150]] -// DEBUG1-NEXT: store %struct.S1* [[ARRAYINIT_ELEMENT12]], %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8, !dbg [[DBG150]] +// DEBUG1-NEXT: [[ARRAYINIT_ELEMENT12:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYINIT_BEGIN8]], i64 1, !dbg [[DBG151]] +// DEBUG1-NEXT: store %struct.S1* [[ARRAYINIT_ELEMENT12]], %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8, !dbg [[DBG151]] // DEBUG1-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT12]], i32 5) -// DEBUG1-NEXT: to label [[INVOKE_CONT13:%.*]] unwind label [[LPAD10]], !dbg [[DBG152:![0-9]+]] +// DEBUG1-NEXT: to label [[INVOKE_CONT13:%.*]] unwind label [[LPAD10]], !dbg [[DBG153:![0-9]+]] // DEBUG1: invoke.cont13: -// DEBUG1-NEXT: [[ARRAYINIT_ELEMENT14:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYINIT_ELEMENT12]], i64 1, !dbg [[DBG150]] -// DEBUG1-NEXT: store %struct.S1* [[ARRAYINIT_ELEMENT14]], %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8, !dbg [[DBG150]] +// DEBUG1-NEXT: [[ARRAYINIT_ELEMENT14:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYINIT_ELEMENT12]], i64 1, !dbg [[DBG151]] +// DEBUG1-NEXT: store %struct.S1* [[ARRAYINIT_ELEMENT14]], %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8, !dbg [[DBG151]] // DEBUG1-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT14]], i32 6) -// DEBUG1-NEXT: to label [[INVOKE_CONT15:%.*]] unwind label [[LPAD10]], !dbg [[DBG153:![0-9]+]] +// DEBUG1-NEXT: to label [[INVOKE_CONT15:%.*]] unwind label [[LPAD10]], !dbg [[DBG154:![0-9]+]] // DEBUG1: invoke.cont15: -// DEBUG1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG144]] -// DEBUG1-NEXT: ret i8* [[TMP3]], !dbg [[DBG144]] +// DEBUG1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG145]] +// DEBUG1-NEXT: ret i8* [[TMP3]], !dbg [[DBG145]] // DEBUG1: lpad: // DEBUG1-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 } -// DEBUG1-NEXT: cleanup, !dbg [[DBG143]] -// DEBUG1-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG143]] -// DEBUG1-NEXT: store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG143]] -// DEBUG1-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1, !dbg [[DBG143]] -// DEBUG1-NEXT: store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG143]] -// DEBUG1-NEXT: [[TMP7:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8, !dbg [[DBG146]] -// DEBUG1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* [[ARRAYINIT_BEGIN1]], [[TMP7]], !dbg [[DBG146]] -// DEBUG1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE6:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG146]] +// DEBUG1-NEXT: cleanup, !dbg [[DBG144]] +// DEBUG1-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG144]] +// DEBUG1-NEXT: store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG144]] +// DEBUG1-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1, !dbg [[DBG144]] +// DEBUG1-NEXT: store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG144]] +// DEBUG1-NEXT: [[TMP7:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8, !dbg [[DBG147]] +// DEBUG1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* [[ARRAYINIT_BEGIN1]], [[TMP7]], !dbg [[DBG147]] +// DEBUG1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE6:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG147]] // DEBUG1: arraydestroy.body: -// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP7]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG146]] -// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG146]] -// DEBUG1-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG146]] -// DEBUG1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], [[ARRAYINIT_BEGIN1]], !dbg [[DBG146]] -// DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE6]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG146]] +// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP7]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG147]] +// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG147]] +// DEBUG1-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG147]] +// DEBUG1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], [[ARRAYINIT_BEGIN1]], !dbg [[DBG147]] +// DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE6]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG147]] // DEBUG1: arraydestroy.done6: -// DEBUG1-NEXT: br label [[EHCLEANUP:%.*]], !dbg [[DBG146]] +// DEBUG1-NEXT: br label [[EHCLEANUP:%.*]], !dbg [[DBG147]] // DEBUG1: lpad10: // DEBUG1-NEXT: [[TMP8:%.*]] = landingpad { i8*, i32 } -// DEBUG1-NEXT: cleanup, !dbg [[DBG143]] -// DEBUG1-NEXT: [[TMP9:%.*]] = extractvalue { i8*, i32 } [[TMP8]], 0, !dbg [[DBG143]] -// DEBUG1-NEXT: store i8* [[TMP9]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG143]] -// DEBUG1-NEXT: [[TMP10:%.*]] = extractvalue { i8*, i32 } [[TMP8]], 1, !dbg [[DBG143]] -// DEBUG1-NEXT: store i32 [[TMP10]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG143]] -// DEBUG1-NEXT: [[TMP11:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8, !dbg [[DBG150]] -// DEBUG1-NEXT: [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* [[ARRAYINIT_BEGIN8]], [[TMP11]], !dbg [[DBG150]] -// DEBUG1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]], !dbg [[DBG150]] +// DEBUG1-NEXT: cleanup, !dbg [[DBG144]] +// DEBUG1-NEXT: [[TMP9:%.*]] = extractvalue { i8*, i32 } [[TMP8]], 0, !dbg [[DBG144]] +// DEBUG1-NEXT: store i8* [[TMP9]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG144]] +// DEBUG1-NEXT: [[TMP10:%.*]] = extractvalue { i8*, i32 } [[TMP8]], 1, !dbg [[DBG144]] +// DEBUG1-NEXT: store i32 [[TMP10]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG144]] +// DEBUG1-NEXT: [[TMP11:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8, !dbg [[DBG151]] +// DEBUG1-NEXT: [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* [[ARRAYINIT_BEGIN8]], [[TMP11]], !dbg [[DBG151]] +// DEBUG1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]], !dbg [[DBG151]] // DEBUG1: arraydestroy.body17: -// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[TMP11]], [[LPAD10]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG150]] -// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG150]] -// DEBUG1-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR4]], !dbg [[DBG150]] -// DEBUG1-NEXT: [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], [[ARRAYINIT_BEGIN8]], !dbg [[DBG150]] -// DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG150]] +// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[TMP11]], [[LPAD10]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG151]] +// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG151]] +// DEBUG1-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR4]], !dbg [[DBG151]] +// DEBUG1-NEXT: [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], [[ARRAYINIT_BEGIN8]], !dbg [[DBG151]] +// DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG151]] // DEBUG1: arraydestroy.done21: -// DEBUG1-NEXT: br label [[EHCLEANUP]], !dbg [[DBG150]] +// DEBUG1-NEXT: br label [[EHCLEANUP]], !dbg [[DBG151]] // DEBUG1: ehcleanup: -// DEBUG1-NEXT: [[TMP12:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG145]] -// DEBUG1-NEXT: [[PAD_ARRAYBEGIN:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], i64 0, i64 0, !dbg [[DBG145]] -// DEBUG1-NEXT: [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP12]], i64 0, i64 0, !dbg [[DBG145]] -// DEBUG1-NEXT: [[ARRAYDESTROY_ISEMPTY22:%.*]] = icmp eq %struct.S1* [[PAD_ARRAYBEGIN]], [[PAD_ARRAYEND]], !dbg [[DBG145]] -// DEBUG1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY22]], label [[ARRAYDESTROY_DONE27:%.*]], label [[ARRAYDESTROY_BODY23:%.*]], !dbg [[DBG145]] +// DEBUG1-NEXT: [[TMP12:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG146]] +// DEBUG1-NEXT: [[PAD_ARRAYBEGIN:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], i64 0, i64 0, !dbg [[DBG146]] +// DEBUG1-NEXT: [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP12]], i64 0, i64 0, !dbg [[DBG146]] +// DEBUG1-NEXT: [[ARRAYDESTROY_ISEMPTY22:%.*]] = icmp eq %struct.S1* [[PAD_ARRAYBEGIN]], [[PAD_ARRAYEND]], !dbg [[DBG146]] +// DEBUG1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY22]], label [[ARRAYDESTROY_DONE27:%.*]], label [[ARRAYDESTROY_BODY23:%.*]], !dbg [[DBG146]] // DEBUG1: arraydestroy.body23: -// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST24:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT25:%.*]], [[ARRAYDESTROY_BODY23]] ], !dbg [[DBG145]] -// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT25]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST24]], i64 -1, !dbg [[DBG145]] -// DEBUG1-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT25]]) #[[ATTR4]], !dbg [[DBG145]] -// DEBUG1-NEXT: [[ARRAYDESTROY_DONE26:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT25]], [[PAD_ARRAYBEGIN]], !dbg [[DBG145]] -// DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE26]], label [[ARRAYDESTROY_DONE27]], label [[ARRAYDESTROY_BODY23]], !dbg [[DBG145]] +// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST24:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT25:%.*]], [[ARRAYDESTROY_BODY23]] ], !dbg [[DBG146]] +// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT25]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST24]], i64 -1, !dbg [[DBG146]] +// DEBUG1-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT25]]) #[[ATTR4]], !dbg [[DBG146]] +// DEBUG1-NEXT: [[ARRAYDESTROY_DONE26:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT25]], [[PAD_ARRAYBEGIN]], !dbg [[DBG146]] +// DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE26]], label [[ARRAYDESTROY_DONE27]], label [[ARRAYDESTROY_BODY23]], !dbg [[DBG146]] // DEBUG1: arraydestroy.done27: -// DEBUG1-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG145]] +// DEBUG1-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG146]] // DEBUG1: eh.resume: -// DEBUG1-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG145]] -// DEBUG1-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG145]] -// DEBUG1-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG145]] -// DEBUG1-NEXT: [[LPAD_VAL28:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG145]] -// DEBUG1-NEXT: resume { i8*, i32 } [[LPAD_VAL28]], !dbg [[DBG145]] +// DEBUG1-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG146]] +// DEBUG1-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG146]] +// DEBUG1-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG146]] +// DEBUG1-NEXT: [[LPAD_VAL28:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG146]] +// DEBUG1-NEXT: resume { i8*, i32 } [[LPAD_VAL28]], !dbg [[DBG146]] // // // DEBUG1-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..2 -// DEBUG1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG154:![0-9]+]] { +// DEBUG1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG155:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // DEBUG1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// DEBUG1-NEXT: call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META155:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156:![0-9]+]] -// DEBUG1-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG156]] -// DEBUG1-NEXT: [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %struct.S1*, !dbg [[DBG156]] -// DEBUG1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAY_BEGIN]], i64 6, !dbg [[DBG156]] -// DEBUG1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG156]] +// DEBUG1-NEXT: call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META156:![0-9]+]], metadata !DIExpression()), !dbg [[DBG157:![0-9]+]] +// DEBUG1-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG157]] +// DEBUG1-NEXT: [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %struct.S1*, !dbg [[DBG157]] +// DEBUG1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAY_BEGIN]], i64 6, !dbg [[DBG157]] +// DEBUG1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG157]] // DEBUG1: arraydestroy.body: -// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG156]] -// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG156]] -// DEBUG1-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG156]] -// DEBUG1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]], !dbg [[DBG156]] -// DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG156]] +// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG157]] +// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG157]] +// DEBUG1-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG157]] +// DEBUG1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]], !dbg [[DBG157]] +// DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG157]] // DEBUG1: arraydestroy.done1: -// DEBUG1-NEXT: ret void, !dbg [[DBG157:![0-9]+]] +// DEBUG1-NEXT: ret void, !dbg [[DBG158:![0-9]+]] // // // DEBUG1-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_..3 -// DEBUG1-SAME: () #[[ATTR0]] !dbg [[DBG158:![0-9]+]] { +// DEBUG1-SAME: () #[[ATTR0]] !dbg [[DBG159:![0-9]+]] { // DEBUG1-NEXT: entry: -// DEBUG1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]), !dbg [[DBG159:![0-9]+]] -// DEBUG1-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB3]], i8* bitcast ([2 x [3 x %struct.S1]]* @arr_x to i8*), i8* (i8*)* @.__kmpc_global_ctor_..1, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..2), !dbg [[DBG159]] -// DEBUG1-NEXT: ret void, !dbg [[DBG159]] +// DEBUG1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]), !dbg [[DBG160:![0-9]+]] +// DEBUG1-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB3]], i8* bitcast ([2 x [3 x %struct.S1]]* @arr_x to i8*), i8* (i8*)* @.__kmpc_global_ctor_..1, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..2), !dbg [[DBG160]] +// DEBUG1-NEXT: ret void, !dbg [[DBG160]] // // // DEBUG1-LABEL: define {{[^@]+}}@__cxx_global_var_init -// DEBUG1-SAME: () #[[ATTR0]] !dbg [[DBG160:![0-9]+]] { +// DEBUG1-SAME: () #[[ATTR0]] !dbg [[DBG161:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: call void @_ZN2S1C1Ei(%struct.S1* nonnull align 4 dereferenceable(4) @_ZL3gs1, i32 5), !dbg [[DBG164:![0-9]+]] // DEBUG1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR4]], !dbg [[DBG166:![0-9]+]] @@ -6885,8 +6885,8 @@ int foobar() { // DEBUG1-NEXT: call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG233:![0-9]+]] // DEBUG1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG233]] // DEBUG1: arraydestroy.body: -// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG233]] -// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG233]] +// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 1, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG233]] +// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG233]] // DEBUG1-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG233]] // DEBUG1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), !dbg [[DBG233]] // DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG233]] @@ -6895,7 +6895,7 @@ int foobar() { // // // DEBUG1-LABEL: define {{[^@]+}}@main -// DEBUG1-SAME: () #[[ATTR5:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG53:![0-9]+]] { +// DEBUG1-SAME: () #[[ATTR5:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG52:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // DEBUG1-NEXT: [[RES:%.*]] = alloca i32, align 4 @@ -7257,7 +7257,7 @@ int foobar() { // // // DEBUG2-LABEL: define {{[^@]+}}@__cxx_global_var_init -// DEBUG2-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG115:![0-9]+]] { +// DEBUG2-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG116:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG119:![0-9]+]] // DEBUG2-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.), !dbg [[DBG119]] @@ -7581,8 +7581,8 @@ int foobar() { // DEBUG2-NEXT: call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META198:![0-9]+]], metadata !DIExpression()), !dbg [[DBG199:![0-9]+]] // DEBUG2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG199]] // DEBUG2: arraydestroy.body: -// DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG199]] -// DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG199]] +// DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 1, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG199]] +// DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG199]] // DEBUG2-NEXT: call void @_ZN2S1D1Ev(%struct.S1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG199]] // DEBUG2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), !dbg [[DBG199]] // DEBUG2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG199]] @@ -7591,7 +7591,7 @@ int foobar() { // // // DEBUG2-LABEL: define {{[^@]+}}@main -// DEBUG2-SAME: () #[[ATTR5:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG53:![0-9]+]] { +// DEBUG2-SAME: () #[[ATTR5:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG52:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // DEBUG2-NEXT: [[RES:%.*]] = alloca i32, align 4 diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index c9ee60d684f9..66e3a75833e2 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1979,32 +1979,14 @@ static Constant *foldGEPOfGEP(GEPOperator *GEP, Type *PointeeTy, bool InBounds, I != E; ++I) LastI = I; - // We cannot combine indices if doing so would take us outside of an - // array or vector. Doing otherwise could trick us if we evaluated such a - // GEP as part of a load. - // - // e.g. Consider if the original GEP was: - // i8* getelementptr ({ [2 x i8], i32, i8, [3 x i8] }* @main.c, - // i32 0, i32 0, i64 0) - // - // If we then tried to offset it by '8' to get to the third element, - // an i8, we should *not* get: - // i8* getelementptr ({ [2 x i8], i32, i8, [3 x i8] }* @main.c, - // i32 0, i32 0, i64 8) - // - // This GEP tries to index array element '8 which runs out-of-bounds. - // Subsequent evaluation would get confused and produce erroneous results. - // - // The following prohibits such a GEP from being formed by checking to see - // if the index is in-range with respect to an array. + // We can't combine GEPs if the last index is a struct type. if (!LastI.isSequential()) return nullptr; + // We could perform the transform with non-constant index, but prefer leaving + // it as GEP of GEP rather than GEP of add for now. ConstantInt *CI = dyn_cast(Idx0); if (!CI) return nullptr; - if (LastI.isBoundedSequential() && - !isIndexInRangeOfArrayType(LastI.getSequentialNumElements(), CI)) - return nullptr; // TODO: This code may be extended to handle vectors as well. auto *LastIdx = cast(GEP->getOperand(GEP->getNumOperands()-1)); diff --git a/llvm/test/Transforms/SCCP/apint-bigint2.ll b/llvm/test/Transforms/SCCP/apint-bigint2.ll index 45b6a068b45d..b8b5f80d5067 100644 --- a/llvm/test/Transforms/SCCP/apint-bigint2.ll +++ b/llvm/test/Transforms/SCCP/apint-bigint2.ll @@ -54,7 +54,7 @@ define i101 @large_aggregate_2() { define void @index_too_large() { ; CHECK-LABEL: @index_too_large( -; CHECK-NEXT: store i101* getelementptr (i101, i101* getelementptr ([6 x i101], [6 x i101]* @Y, i32 0, i32 -1), i101 9224497936761618431), i101** undef, align 8 +; CHECK-NEXT: store i101* getelementptr ([6 x i101], [6 x i101]* @Y, i101 1537416322793603071, i101 4), i101** undef, align 8 ; CHECK-NEXT: ret void ; %ptr1 = getelementptr [6 x i101], [6 x i101]* @Y, i32 0, i32 -1 diff --git a/llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll b/llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll index 2da20d53878e..0a894c08b94b 100644 --- a/llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll +++ b/llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll @@ -8,10 +8,10 @@ define i32 @eq_undereferenceable(i32* %p) { ; CHECK-LABEL: @eq_undereferenceable( ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[P:%.*]], getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[P:%.*]], getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 1, i64 0) ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: -; CHECK-NEXT: store i32 2, i32* getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1), align 4 +; CHECK-NEXT: store i32 2, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 1, i64 0), align 4 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 @@ -62,13 +62,13 @@ if.end: ; preds = %if.then, %entry define i1 @eq_undereferenceable_cmp_simp(i32* %p) { ; CHECK-LABEL: @eq_undereferenceable_cmp_simp( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP_0:%.*]] = icmp eq i32* [[P:%.*]], getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) +; CHECK-NEXT: [[CMP_0:%.*]] = icmp eq i32* [[P:%.*]], getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 1, i64 0) ; CHECK-NEXT: br i1 [[CMP_0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: -; CHECK-NEXT: store i32 2, i32* getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1), align 4 +; CHECK-NEXT: store i32 2, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 1, i64 0), align 4 ; CHECK-NEXT: ret i1 true ; CHECK: if.end: -; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i32* [[P]], getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) +; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i32* [[P]], getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 1, i64 0) ; CHECK-NEXT: ret i1 [[CMP_2]] ; entry: From fd6d3e65dfc3ab444fae0a04f5afbe0f595ea541 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 4 Jan 2022 15:24:14 +0100 Subject: [PATCH 520/992] [CodeGen] Add target triple to test (NFC) Exact IR may depend on target. --- clang/test/CodeGen/clear_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGen/clear_cache.c b/clang/test/CodeGen/clear_cache.c index 2ba0abe06254..1caa33f6bf55 100644 --- a/clang/test/CodeGen/clear_cache.c +++ b/clang/test/CodeGen/clear_cache.c @@ -1,5 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s char buffer[32] = "This is a largely unused buffer"; From 4dcc47aaeaf015c4f1315a13a41819560b9946ab Mon Sep 17 00:00:00 2001 From: Yitzhak Mandelbaum Date: Tue, 28 Dec 2021 19:34:26 +0000 Subject: [PATCH 521/992] [clang][dataflow] Add parameterized map lattice. This patchs adds a `MapLattice` template for lifting a lattice to a keyed map. A typical use is for modeling variables in a scope with a partcular lattice. Differential Revision: https://reviews.llvm.org/D116369 --- .../clang/Analysis/FlowSensitive/MapLattice.h | 140 ++++++++++++++++ .../Analysis/FlowSensitive/CMakeLists.txt | 1 + .../Analysis/FlowSensitive/MapLatticeTest.cpp | 156 ++++++++++++++++++ 3 files changed, 297 insertions(+) create mode 100644 clang/include/clang/Analysis/FlowSensitive/MapLattice.h create mode 100644 clang/unittests/Analysis/FlowSensitive/MapLatticeTest.cpp diff --git a/clang/include/clang/Analysis/FlowSensitive/MapLattice.h b/clang/include/clang/Analysis/FlowSensitive/MapLattice.h new file mode 100644 index 000000000000..ff403f68b7c5 --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/MapLattice.h @@ -0,0 +1,140 @@ +//===------------------------ MapLattice.h ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a parameterized lattice that maps keys to individual +// lattice elements (of the parameter lattice type). A typical usage is lifting +// a particular lattice to all variables in a lexical scope. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE__MAPLATTICE_H +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE__MAPLATTICE_H + +#include +#include +#include + +#include "DataflowAnalysis.h" +#include "clang/AST/Decl.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" + +namespace clang { +namespace dataflow { + +/// A lattice that maps keys to individual lattice elements. When instantiated +/// with an `ElementLattice` that is a bounded semi-lattice, `MapLattice` is +/// itself a bounded semi-lattice, so long as the user limits themselves to a +/// finite number of keys. In that case, `top` is (implicitly), the map +/// containing all valid keys mapped to `top` of `ElementLattice`. +/// +/// Requirements on `ElementLattice`: +/// * Provides standard declarations of a bounded semi-lattice. +template class MapLattice { + using Container = llvm::DenseMap; + Container C; + +public: + using key_type = Key; + using mapped_type = ElementLattice; + using value_type = typename Container::value_type; + using iterator = typename Container::iterator; + using const_iterator = typename Container::const_iterator; + + MapLattice() = default; + + explicit MapLattice(Container C) { C = std::move(C); } + + // The `bottom` element is the empty map. + static MapLattice bottom() { return MapLattice(); } + + void insert(const std::pair &P) { C.insert(P); } + + void insert(std::pair &&P) { + C.insert(std::move(P)); + } + + unsigned size() const { return C.size(); } + bool empty() const { return C.empty(); } + + iterator begin() { return C.begin(); } + iterator end() { return C.end(); } + const_iterator begin() const { return C.begin(); } + const_iterator end() const { return C.end(); } + + // Equality is direct equality of underlying map entries. One implication of + // this definition is that a map with (only) keys that map to bottom is not + // equal to the empty map. + friend bool operator==(const MapLattice &LHS, const MapLattice &RHS) { + return LHS.C == RHS.C; + } + + friend bool operator!=(const MapLattice &LHS, const MapLattice &RHS) { + return !(LHS == RHS); + } + + bool contains(const key_type &K) const { return C.find(K) != C.end(); } + + iterator find(const key_type &K) { return C.find(K); } + const_iterator find(const key_type &K) const { return C.find(K); } + + mapped_type &operator[](const key_type &K) { return C[K]; } + + /// If an entry exists in one map but not the other, the missing entry is + /// treated as implicitly mapping to `bottom`. So, the joined map contains the + /// entry as it was in the source map. + LatticeJoinEffect join(const MapLattice &Other) { + LatticeJoinEffect Effect = LatticeJoinEffect::Unchanged; + for (const auto &O : Other.C) { + auto It = C.find(O.first); + if (It == C.end()) { + C.insert(O); + Effect = LatticeJoinEffect::Changed; + } else if (It->second.join(O.second) == LatticeJoinEffect::Changed) + Effect = LatticeJoinEffect::Changed; + } + return Effect; + } +}; + +/// Convenience alias that captures the common use of map lattices to model +/// in-scope variables. +template +using VarMapLattice = MapLattice; + +template +std::ostream & +operator<<(std::ostream &Os, + const clang::dataflow::MapLattice &M) { + std::string Separator = ""; + Os << "{"; + for (const auto &E : M) { + Os << std::exchange(Separator, ", ") << E.first << " => " << E.second; + } + Os << "}"; + return Os; +} + +template +std::ostream & +operator<<(std::ostream &Os, + const clang::dataflow::VarMapLattice &M) { + std::string Separator = ""; + Os << "{"; + for (const auto &E : M) { + Os << std::exchange(Separator, ", ") << E.first->getName().str() << " => " + << E.second; + } + Os << "}"; + return Os; +} +} // namespace dataflow +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE__MAPLATTICE_H diff --git a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt index 90c7be6b9068..753cf486953e 100644 --- a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt +++ b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt @@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS ) add_clang_unittest(ClangAnalysisFlowSensitiveTests + MapLatticeTest.cpp SingleVarConstantPropagationTest.cpp TestingSupport.cpp TestingSupportTest.cpp diff --git a/clang/unittests/Analysis/FlowSensitive/MapLatticeTest.cpp b/clang/unittests/Analysis/FlowSensitive/MapLatticeTest.cpp new file mode 100644 index 000000000000..d3436e8f9496 --- /dev/null +++ b/clang/unittests/Analysis/FlowSensitive/MapLatticeTest.cpp @@ -0,0 +1,156 @@ +#include "clang/Analysis/FlowSensitive/MapLattice.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "llvm/Support/Error.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include + +using namespace clang; +using namespace dataflow; + +namespace { +// A simple lattice for basic tests. +class BooleanLattice { +public: + BooleanLattice() : Value(false) {} + explicit BooleanLattice(bool B) : Value(B) {} + + static BooleanLattice bottom() { return BooleanLattice(false); } + + static BooleanLattice top() { return BooleanLattice(true); } + + LatticeJoinEffect join(BooleanLattice Other) { + auto Prev = Value; + Value = Value || Other.Value; + return Prev == Value ? LatticeJoinEffect::Unchanged + : LatticeJoinEffect::Changed; + } + + friend bool operator==(BooleanLattice LHS, BooleanLattice RHS) { + return LHS.Value == RHS.Value; + } + + friend bool operator!=(BooleanLattice LHS, BooleanLattice RHS) { + return LHS.Value != RHS.Value; + } + + friend std::ostream &operator<<(std::ostream &Os, const BooleanLattice &B) { + Os << B.Value; + return Os; + } + + bool value() const { return Value; } + +private: + bool Value; +}; +} // namespace + +static constexpr int Key1 = 0; +static constexpr int Key2 = 1; + +namespace { +using ::testing::Pair; +using ::testing::UnorderedElementsAre; + +TEST(MapLatticeTest, InsertWorks) { + MapLattice Lattice; + Lattice.insert({Key1, BooleanLattice(false)}); + Lattice.insert({Key2, BooleanLattice(false)}); + + EXPECT_THAT(Lattice, UnorderedElementsAre(Pair(Key1, BooleanLattice(false)), + Pair(Key2, BooleanLattice(false)))); +} + +TEST(MapLatticeTest, ComparisonWorks) { + MapLattice Lattice1; + Lattice1.insert({Key1, BooleanLattice(true)}); + Lattice1.insert({Key2, BooleanLattice(false)}); + MapLattice Lattice2 = Lattice1; + EXPECT_EQ(Lattice1, Lattice2); + + Lattice2.find(Key2)->second = BooleanLattice(true); + EXPECT_NE(Lattice1, Lattice2); +} + +TEST(MapLatticeTest, JoinChange) { + MapLattice Lattice1; + Lattice1.insert({Key1, BooleanLattice(false)}); + Lattice1.insert({Key2, BooleanLattice(false)}); + + MapLattice Lattice2; + Lattice2.insert({Key1, BooleanLattice(true)}); + Lattice2.insert({Key2, BooleanLattice(true)}); + + ASSERT_THAT(Lattice1, + UnorderedElementsAre(Pair(Key1, BooleanLattice(false)), + Pair(Key2, BooleanLattice(false)))); + + ASSERT_EQ(Lattice1.join(Lattice2), LatticeJoinEffect::Changed); + EXPECT_THAT(Lattice1, UnorderedElementsAre(Pair(Key1, BooleanLattice(true)), + Pair(Key2, BooleanLattice(true)))); +} + +TEST(MapLatticeTest, JoinEqNoChange) { + MapLattice Lattice; + Lattice.insert({Key1, BooleanLattice(false)}); + Lattice.insert({Key2, BooleanLattice(false)}); + + ASSERT_EQ(Lattice.join(Lattice), LatticeJoinEffect::Unchanged); + EXPECT_THAT(Lattice, UnorderedElementsAre(Pair(Key1, BooleanLattice(false)), + Pair(Key2, BooleanLattice(false)))); +} + +TEST(MapLatticeTest, JoinLtNoChange) { + MapLattice Lattice1; + Lattice1.insert({Key1, BooleanLattice(false)}); + Lattice1.insert({Key2, BooleanLattice(false)}); + + MapLattice Lattice2; + Lattice2.insert({Key1, BooleanLattice(true)}); + Lattice2.insert({Key2, BooleanLattice(true)}); + + ASSERT_THAT(Lattice1, + UnorderedElementsAre(Pair(Key1, BooleanLattice(false)), + Pair(Key2, BooleanLattice(false)))); + + ASSERT_THAT(Lattice2, UnorderedElementsAre(Pair(Key1, BooleanLattice(true)), + Pair(Key2, BooleanLattice(true)))); + + ASSERT_EQ(Lattice2.join(Lattice1), LatticeJoinEffect::Unchanged); + EXPECT_THAT(Lattice2, UnorderedElementsAre(Pair(Key1, BooleanLattice(true)), + Pair(Key2, BooleanLattice(true)))); +} + +TEST(MapLatticeTest, JoinDifferentDomainsProducesUnion) { + MapLattice Lattice1; + Lattice1.insert({Key1, BooleanLattice(true)}); + MapLattice Lattice2; + Lattice2.insert({Key2, BooleanLattice(true)}); + + ASSERT_EQ(Lattice1.join(Lattice2), LatticeJoinEffect::Changed); + EXPECT_THAT(Lattice1, UnorderedElementsAre(Pair(Key1, BooleanLattice(true)), + Pair(Key2, BooleanLattice(true)))); +} + +TEST(MapLatticeTest, FindWorks) { + MapLattice Lattice; + Lattice.insert({Key1, BooleanLattice(true)}); + Lattice.insert({Key2, BooleanLattice(false)}); + + auto It = Lattice.find(Key1); + ASSERT_NE(It, Lattice.end()); + EXPECT_EQ(It->second, BooleanLattice(true)); + + It = Lattice.find(Key2); + ASSERT_NE(It, Lattice.end()); + EXPECT_EQ(It->second, BooleanLattice(false)); +} + +TEST(MapLatticeTest, ContainsWorks) { + MapLattice Lattice; + Lattice.insert({Key1, BooleanLattice(true)}); + EXPECT_TRUE(Lattice.contains(Key1)); + EXPECT_FALSE(Lattice.contains(Key2)); +} +} // namespace From 4950198116a5b243b8e7b4267e0397e118a27c43 Mon Sep 17 00:00:00 2001 From: Yitzhak Mandelbaum Date: Tue, 28 Dec 2021 21:10:56 +0000 Subject: [PATCH 522/992] [clang][dataflow] Add multi-variable constant propagation example. Adds another constant-propagation analysis that covers all variables in the scope (vs the existing single-variable demo). But, the analysis is still unsuited to use, in that ignores issues of escaping variables. Differential Revision: https://reviews.llvm.org/D116370 --- .../Analysis/FlowSensitive/CMakeLists.txt | 1 + .../MultiVarConstantPropagationTest.cpp | 486 ++++++++++++++++++ 2 files changed, 487 insertions(+) create mode 100644 clang/unittests/Analysis/FlowSensitive/MultiVarConstantPropagationTest.cpp diff --git a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt index 753cf486953e..414f5c8810c7 100644 --- a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt +++ b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt @@ -5,6 +5,7 @@ set(LLVM_LINK_COMPONENTS add_clang_unittest(ClangAnalysisFlowSensitiveTests MapLatticeTest.cpp + MultiVarConstantPropagationTest.cpp SingleVarConstantPropagationTest.cpp TestingSupport.cpp TestingSupportTest.cpp diff --git a/clang/unittests/Analysis/FlowSensitive/MultiVarConstantPropagationTest.cpp b/clang/unittests/Analysis/FlowSensitive/MultiVarConstantPropagationTest.cpp new file mode 100644 index 000000000000..c5b792a64d55 --- /dev/null +++ b/clang/unittests/Analysis/FlowSensitive/MultiVarConstantPropagationTest.cpp @@ -0,0 +1,486 @@ +//===- unittests/Analysis/FlowSensitive/SingelVarConstantPropagation.cpp --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a simplistic version of Constant Propagation as an example +// of a forward, monotonic dataflow analysis. The analysis tracks all +// variables in the scope, but lacks escape analysis. +// +//===----------------------------------------------------------------------===// + +#include "TestingSupport.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Stmt.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/MapLattice.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Error.h" +#include "llvm/Testing/Support/Annotations.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include +#include +#include +#include +#include + +namespace clang { +namespace dataflow { +namespace { +using namespace ast_matchers; + +// Models the value of an expression at a program point, for all paths through +// the program. +struct ValueLattice { + // FIXME: change the internal representation to use a `std::variant`, once + // clang admits C++17 constructs. + enum class ValueState : bool { + Undefined, + Defined, + }; + // `State` determines the meaning of the lattice when `Value` is `None`: + // * `Undefined` -> bottom, + // * `Defined` -> top. + ValueState State; + + // When `None`, the lattice is either at top or bottom, based on `State`. + llvm::Optional Value; + + constexpr ValueLattice() : State(ValueState::Undefined), Value(llvm::None) {} + constexpr ValueLattice(int64_t V) : State(ValueState::Defined), Value(V) {} + constexpr ValueLattice(ValueState S) : State(S), Value(llvm::None) {} + + static constexpr ValueLattice bottom() { + return ValueLattice(ValueState::Undefined); + } + static constexpr ValueLattice top() { + return ValueLattice(ValueState::Defined); + } + + friend bool operator==(const ValueLattice &Lhs, const ValueLattice &Rhs) { + return Lhs.State == Rhs.State && Lhs.Value == Rhs.Value; + } + friend bool operator!=(const ValueLattice &Lhs, const ValueLattice &Rhs) { + return !(Lhs == Rhs); + } + + LatticeJoinEffect join(const ValueLattice &Other) { + if (*this == Other || Other == bottom() || *this == top()) + return LatticeJoinEffect::Unchanged; + + if (*this == bottom()) { + *this = Other; + return LatticeJoinEffect::Changed; + } + + *this = top(); + return LatticeJoinEffect::Changed; + } +}; + +std::ostream &operator<<(std::ostream &OS, const ValueLattice &L) { + if (L.Value.hasValue()) + return OS << *L.Value; + switch (L.State) { + case ValueLattice::ValueState::Undefined: + return OS << "None"; + case ValueLattice::ValueState::Defined: + return OS << "Any"; + } +} + +using ConstantPropagationLattice = VarMapLattice; + +constexpr char kDecl[] = "decl"; +constexpr char kVar[] = "var"; +constexpr char kInit[] = "init"; +constexpr char kJustAssignment[] = "just-assignment"; +constexpr char kAssignment[] = "assignment"; +constexpr char kRHS[] = "rhs"; + +auto refToVar() { return declRefExpr(to(varDecl().bind(kVar))); } + +// N.B. This analysis is deliberately simplistic, leaving out many important +// details needed for a real analysis. Most notably, the transfer function does +// not account for the variable's address possibly escaping, which would +// invalidate the analysis. It also could be optimized to drop out-of-scope +// variables from the map. +class ConstantPropagationAnalysis + : public DataflowAnalysis { +public: + explicit ConstantPropagationAnalysis(ASTContext &Context) + : DataflowAnalysis(Context) {} + + static ConstantPropagationLattice initialElement() { + return ConstantPropagationLattice::bottom(); + } + + ConstantPropagationLattice + transfer(const Stmt *S, ConstantPropagationLattice Vars, Environment &Env) { + auto matcher = + stmt(anyOf(declStmt(hasSingleDecl( + varDecl(decl().bind(kVar), hasType(isInteger()), + optionally(hasInitializer(expr().bind(kInit)))) + .bind(kDecl))), + binaryOperator(hasOperatorName("="), hasLHS(refToVar()), + hasRHS(expr().bind(kRHS))) + .bind(kJustAssignment), + binaryOperator(isAssignmentOperator(), hasLHS(refToVar())) + .bind(kAssignment))); + + ASTContext &Context = getASTContext(); + auto Results = match(matcher, *S, Context); + if (Results.empty()) + return Vars; + const BoundNodes &Nodes = Results[0]; + + const auto *Var = Nodes.getNodeAs(kVar); + assert(Var != nullptr); + + if (Nodes.getNodeAs(kDecl) != nullptr) { + if (const auto *E = Nodes.getNodeAs(kInit)) { + Expr::EvalResult R; + Vars[Var] = (E->EvaluateAsInt(R, Context) && R.Val.isInt()) + ? ValueLattice(R.Val.getInt().getExtValue()) + : ValueLattice::top(); + } else { + // An unitialized variable holds *some* value, but we don't know what it + // is (it is implementation defined), so we set it to top. + Vars[Var] = ValueLattice::top(); + } + return Vars; + } + + if (Nodes.getNodeAs(kJustAssignment)) { + const auto *E = Nodes.getNodeAs(kRHS); + assert(E != nullptr); + + Expr::EvalResult R; + Vars[Var] = (E->EvaluateAsInt(R, Context) && R.Val.isInt()) + ? ValueLattice(R.Val.getInt().getExtValue()) + : ValueLattice::top(); + return Vars; + } + + // Any assignment involving the expression itself resets the variable to + // "unknown". A more advanced analysis could try to evaluate the compound + // assignment. For example, `x += 0` need not invalidate `x`. + if (Nodes.getNodeAs(kAssignment)) { + Vars[Var] = ValueLattice::top(); + return Vars; + } + + llvm_unreachable("expected at least one bound identifier"); + } +}; + +using ::testing::IsEmpty; +using ::testing::Pair; +using ::testing::UnorderedElementsAre; + +MATCHER_P(Var, name, + (llvm::Twine(negation ? "isn't" : "is") + " a variable named `" + + name + "`") + .str()) { + return arg->getName() == name; +} + +MATCHER_P(HasConstantVal, v, "") { + return arg.Value.hasValue() && *arg.Value == v; +} + +MATCHER(Varies, "") { return arg == arg.top(); } + +MATCHER_P(HoldsCPLattice, m, + ((negation ? "doesn't hold" : "holds") + + llvm::StringRef(" a lattice element that ") + + ::testing::DescribeMatcher(m, negation)) + .str()) { + return ExplainMatchResult(m, arg.Lattice, result_listener); +} + +class MultiVarConstantPropagationTest : public ::testing::Test { +protected: + template + void RunDataflow(llvm::StringRef Code, Matcher Expectations) { + test::checkDataflow( + Code, "fun", + [](ASTContext &C, Environment &) { + return ConstantPropagationAnalysis(C); + }, + [&Expectations]( + llvm::ArrayRef>> + Results, + ASTContext &) { EXPECT_THAT(Results, Expectations); }, + {"-fsyntax-only", "-std=c++17"}); + } +}; + +TEST_F(MultiVarConstantPropagationTest, JustInit) { + std::string Code = R"( + void fun() { + int target = 1; + // [[p]] + } + )"; + RunDataflow(Code, UnorderedElementsAre( + Pair("p", HoldsCPLattice(UnorderedElementsAre(Pair( + Var("target"), HasConstantVal(1))))))); +} + +TEST_F(MultiVarConstantPropagationTest, Assignment) { + std::string Code = R"( + void fun() { + int target = 1; + // [[p1]] + target = 2; + // [[p2]] + } + )"; + RunDataflow(Code, UnorderedElementsAre( + Pair("p1", HoldsCPLattice(UnorderedElementsAre(Pair( + Var("target"), HasConstantVal(1))))), + Pair("p2", HoldsCPLattice(UnorderedElementsAre(Pair( + Var("target"), HasConstantVal(2))))))); +} + +TEST_F(MultiVarConstantPropagationTest, AssignmentCall) { + std::string Code = R"( + int g(); + void fun() { + int target; + target = g(); + // [[p]] + } + )"; + RunDataflow(Code, UnorderedElementsAre( + Pair("p", HoldsCPLattice(UnorderedElementsAre( + Pair(Var("target"), Varies())))))); +} + +TEST_F(MultiVarConstantPropagationTest, AssignmentBinOp) { + std::string Code = R"( + void fun() { + int target; + target = 2 + 3; + // [[p]] + } + )"; + RunDataflow(Code, UnorderedElementsAre( + Pair("p", HoldsCPLattice(UnorderedElementsAre(Pair( + Var("target"), HasConstantVal(5))))))); +} + +TEST_F(MultiVarConstantPropagationTest, PlusAssignment) { + std::string Code = R"( + void fun() { + int target = 1; + // [[p1]] + target += 2; + // [[p2]] + } + )"; + RunDataflow(Code, UnorderedElementsAre( + Pair("p1", HoldsCPLattice(UnorderedElementsAre(Pair( + Var("target"), HasConstantVal(1))))), + Pair("p2", HoldsCPLattice(UnorderedElementsAre( + Pair(Var("target"), Varies())))))); +} + +TEST_F(MultiVarConstantPropagationTest, SameAssignmentInBranches) { + std::string Code = R"cc( + void fun(bool b) { + int target; + // [[p1]] + if (b) { + target = 2; + // [[pT]] + } else { + target = 2; + // [[pF]] + } + (void)0; + // [[p2]] + } + )cc"; + RunDataflow(Code, + UnorderedElementsAre( + Pair("p1", HoldsCPLattice(UnorderedElementsAre( + Pair(Var("target"), Varies())))), + Pair("pT", HoldsCPLattice(UnorderedElementsAre( + Pair(Var("target"), HasConstantVal(2))))), + Pair("pF", HoldsCPLattice(UnorderedElementsAre( + Pair(Var("target"), HasConstantVal(2))))), + Pair("p2", HoldsCPLattice(UnorderedElementsAre( + Pair(Var("target"), HasConstantVal(2))))))); +} + +// Verifies that the analysis tracks multiple variables simultaneously. +TEST_F(MultiVarConstantPropagationTest, TwoVariables) { + std::string Code = R"( + void fun() { + int target = 1; + // [[p1]] + int other = 2; + // [[p2]] + target = 3; + // [[p3]] + } + )"; + RunDataflow(Code, + UnorderedElementsAre( + Pair("p1", HoldsCPLattice(UnorderedElementsAre( + Pair(Var("target"), HasConstantVal(1))))), + Pair("p2", HoldsCPLattice(UnorderedElementsAre( + Pair(Var("target"), HasConstantVal(1)), + Pair(Var("other"), HasConstantVal(2))))), + Pair("p3", HoldsCPLattice(UnorderedElementsAre( + Pair(Var("target"), HasConstantVal(3)), + Pair(Var("other"), HasConstantVal(2))))))); +} + +TEST_F(MultiVarConstantPropagationTest, TwoVariablesInBranches) { + std::string Code = R"cc( + void fun(bool b) { + int target; + int other; + // [[p1]] + if (b) { + target = 2; + // [[pT]] + } else { + other = 3; + // [[pF]] + } + (void)0; + // [[p2]] + } + )cc"; + RunDataflow(Code, UnorderedElementsAre( + Pair("p1", HoldsCPLattice(UnorderedElementsAre( + Pair(Var("target"), Varies()), + Pair(Var("other"), Varies())))), + Pair("pT", HoldsCPLattice(UnorderedElementsAre( + Pair(Var("target"), HasConstantVal(2)), + Pair(Var("other"), Varies())))), + Pair("pF", HoldsCPLattice(UnorderedElementsAre( + Pair(Var("other"), HasConstantVal(3)), + Pair(Var("target"), Varies())))), + Pair("p2", HoldsCPLattice(UnorderedElementsAre( + Pair(Var("target"), Varies()), + Pair(Var("other"), Varies())))))); +} + +TEST_F(MultiVarConstantPropagationTest, SameAssignmentInBranch) { + std::string Code = R"cc( + void fun(bool b) { + int target = 1; + // [[p1]] + if (b) { + target = 1; + } + (void)0; + // [[p2]] + } + )cc"; + RunDataflow(Code, UnorderedElementsAre( + Pair("p1", HoldsCPLattice(UnorderedElementsAre(Pair( + Var("target"), HasConstantVal(1))))), + Pair("p2", HoldsCPLattice(UnorderedElementsAre(Pair( + Var("target"), HasConstantVal(1))))))); +} + +TEST_F(MultiVarConstantPropagationTest, NewVarInBranch) { + std::string Code = R"cc( + void fun(bool b) { + if (b) { + int target; + // [[p1]] + target = 1; + // [[p2]] + } else { + int target; + // [[p3]] + target = 1; + // [[p4]] + } + } + )cc"; + RunDataflow(Code, UnorderedElementsAre( + Pair("p1", HoldsCPLattice(UnorderedElementsAre( + Pair(Var("target"), Varies())))), + Pair("p2", HoldsCPLattice(UnorderedElementsAre(Pair( + Var("target"), HasConstantVal(1))))), + Pair("p3", HoldsCPLattice(UnorderedElementsAre( + Pair(Var("target"), Varies())))), + Pair("p4", HoldsCPLattice(UnorderedElementsAre(Pair( + Var("target"), HasConstantVal(1))))))); +} + +TEST_F(MultiVarConstantPropagationTest, DifferentAssignmentInBranches) { + std::string Code = R"cc( + void fun(bool b) { + int target; + // [[p1]] + if (b) { + target = 1; + // [[pT]] + } else { + target = 2; + // [[pF]] + } + (void)0; + // [[p2]] + } + )cc"; + RunDataflow(Code, UnorderedElementsAre( + Pair("p1", HoldsCPLattice(UnorderedElementsAre( + Pair(Var("target"), Varies())))), + Pair("pT", HoldsCPLattice(UnorderedElementsAre(Pair( + Var("target"), HasConstantVal(1))))), + Pair("pF", HoldsCPLattice(UnorderedElementsAre(Pair( + Var("target"), HasConstantVal(2))))), + Pair("p2", HoldsCPLattice(UnorderedElementsAre( + Pair(Var("target"), Varies())))))); +} + +TEST_F(MultiVarConstantPropagationTest, DifferentAssignmentInBranch) { + std::string Code = R"cc( + void fun(bool b) { + int target = 1; + // [[p1]] + if (b) { + target = 3; + } + (void)0; + // [[p2]] + } + )cc"; + RunDataflow(Code, UnorderedElementsAre( + Pair("p1", HoldsCPLattice(UnorderedElementsAre(Pair( + Var("target"), HasConstantVal(1))))), + Pair("p2", HoldsCPLattice(UnorderedElementsAre( + Pair(Var("target"), Varies())))))); +} + +} // namespace +} // namespace dataflow +} // namespace clang From ba70fb6460ce6b0b4bd365b33306c12e51e0b459 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 4 Jan 2022 14:32:07 +0000 Subject: [PATCH 523/992] [gn build] Port 4950198116a5 --- .../gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn index f89ec8081312..22bbd2b9a62d 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn @@ -16,6 +16,7 @@ unittest("ClangAnalysisFlowSensitiveTests") { "//llvm/lib/Testing/Support", ] sources = [ + "MultiVarConstantPropagationTest.cpp", "SingleVarConstantPropagationTest.cpp", "TestingSupport.cpp", "TestingSupportTest.cpp", From 2b1c38f737d490c09efc60e2c3e17b8568173097 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 4 Jan 2022 14:32:08 +0000 Subject: [PATCH 524/992] [gn build] Port 4dcc47aaeaf0 --- .../gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn index 22bbd2b9a62d..ce510c257a36 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn @@ -16,6 +16,7 @@ unittest("ClangAnalysisFlowSensitiveTests") { "//llvm/lib/Testing/Support", ] sources = [ + "MapLatticeTest.cpp", "MultiVarConstantPropagationTest.cpp", "SingleVarConstantPropagationTest.cpp", "TestingSupport.cpp", From 9290ccc3c1a17a7874de020656db38183a20f6b0 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Tue, 4 Jan 2022 09:44:47 +0100 Subject: [PATCH 525/992] Introduce the AttributeMask class This class is solely used as a lightweight and clean way to build a set of attributes to be removed from an AttrBuilder. Previously AttrBuilder was used both for building and removing, which introduced odd situation like creation of Attribute with dummy value because the only relevant part was the attribute kind. Differential Revision: https://reviews.llvm.org/D116110 --- clang/lib/CodeGen/CodeGenModule.cpp | 2 +- llvm/include/llvm/IR/Argument.h | 2 +- llvm/include/llvm/IR/Attributes.h | 98 ++++++++++++++++--- llvm/include/llvm/IR/Function.h | 6 +- llvm/include/llvm/IR/InstrTypes.h | 6 +- llvm/lib/IR/Attributes.cpp | 72 +++++++------- llvm/lib/IR/Function.cpp | 12 +-- llvm/lib/IR/Instruction.cpp | 3 +- llvm/lib/IR/Verifier.cpp | 2 +- .../AMDGPU/AMDGPURewriteOutArguments.cpp | 2 +- .../IPO/DeadArgumentElimination.cpp | 3 +- llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 2 +- .../Instrumentation/DataFlowSanitizer.cpp | 2 +- .../Instrumentation/MemorySanitizer.cpp | 4 +- .../Scalar/RewriteStatepointsForGC.cpp | 14 +-- llvm/lib/Transforms/Scalar/SCCP.cpp | 2 +- llvm/unittests/IR/AttributesTest.cpp | 4 +- 17 files changed, 153 insertions(+), 83 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 36b7ce87336c..bf74f4d3f698 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2097,7 +2097,7 @@ void CodeGenModule::setNonAliasAttributes(GlobalDecl GD, // We know that GetCPUAndFeaturesAttributes will always have the // newest set, since it has the newest possible FunctionDecl, so the // new ones should replace the old. - llvm::AttrBuilder RemoveAttrs; + llvm::AttributeMask RemoveAttrs; RemoveAttrs.addAttribute("target-cpu"); RemoveAttrs.addAttribute("target-features"); RemoveAttrs.addAttribute("tune-cpu"); diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h index 396ab6a9d01d..7cbfa2a7b6ce 100644 --- a/llvm/include/llvm/IR/Argument.h +++ b/llvm/include/llvm/IR/Argument.h @@ -162,7 +162,7 @@ class Argument final : public Value { /// Remove attributes from an argument. void removeAttr(Attribute::AttrKind Kind); - void removeAttrs(const AttrBuilder &B); + void removeAttrs(const AttributeMask &AM); /// Check if an argument has a given attribute. bool hasAttribute(Attribute::AttrKind Kind) const; diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h index f64f15bd38ba..0e75371037bf 100644 --- a/llvm/include/llvm/IR/Attributes.h +++ b/llvm/include/llvm/IR/Attributes.h @@ -28,12 +28,14 @@ #include #include #include +#include #include #include namespace llvm { class AttrBuilder; +class AttributeMask; class AttributeImpl; class AttributeListImpl; class AttributeSetNode; @@ -320,7 +322,7 @@ class AttributeSet { /// Remove the specified attributes from this set. Returns a new set because /// attribute sets are immutable. LLVM_NODISCARD AttributeSet - removeAttributes(LLVMContext &C, const AttrBuilder &AttrsToRemove) const; + removeAttributes(LLVMContext &C, const AttributeMask &AttrsToRemove) const; /// Return the number of attributes in this set. unsigned getNumAttributes() const; @@ -580,7 +582,7 @@ class AttributeList { /// Remove the specified attributes at the specified index from this /// attribute list. Returns a new list because attribute lists are immutable. LLVM_NODISCARD AttributeList removeAttributesAtIndex( - LLVMContext &C, unsigned Index, const AttrBuilder &AttrsToRemove) const; + LLVMContext &C, unsigned Index, const AttributeMask &AttrsToRemove) const; /// Remove all attributes at the specified index from this /// attribute list. Returns a new list because attribute lists are immutable. @@ -604,7 +606,7 @@ class AttributeList { /// Remove the specified attribute at the function index from this /// attribute list. Returns a new list because attribute lists are immutable. LLVM_NODISCARD AttributeList - removeFnAttributes(LLVMContext &C, const AttrBuilder &AttrsToRemove) const { + removeFnAttributes(LLVMContext &C, const AttributeMask &AttrsToRemove) const { return removeAttributesAtIndex(C, FunctionIndex, AttrsToRemove); } @@ -630,8 +632,8 @@ class AttributeList { /// Remove the specified attribute at the return value index from this /// attribute list. Returns a new list because attribute lists are immutable. - LLVM_NODISCARD AttributeList - removeRetAttributes(LLVMContext &C, const AttrBuilder &AttrsToRemove) const { + LLVM_NODISCARD AttributeList removeRetAttributes( + LLVMContext &C, const AttributeMask &AttrsToRemove) const { return removeAttributesAtIndex(C, ReturnIndex, AttrsToRemove); } @@ -652,8 +654,9 @@ class AttributeList { /// Remove the specified attribute at the specified arg index from this /// attribute list. Returns a new list because attribute lists are immutable. - LLVM_NODISCARD AttributeList removeParamAttributes( - LLVMContext &C, unsigned ArgNo, const AttrBuilder &AttrsToRemove) const { + LLVM_NODISCARD AttributeList + removeParamAttributes(LLVMContext &C, unsigned ArgNo, + const AttributeMask &AttrsToRemove) const { return removeAttributesAtIndex(C, ArgNo + FirstArgIndex, AttrsToRemove); } @@ -927,6 +930,65 @@ template <> struct DenseMapInfo { } }; +//===----------------------------------------------------------------------===// +/// \class +/// This class stores enough information to efficiently remove some attributes +/// from an existing AttrBuilder, AttributeSet or AttributeList. +class AttributeMask { + std::bitset Attrs; + std::set, std::less<>> TargetDepAttrs; + +public: + AttributeMask() = default; + AttributeMask(const AttributeMask &) = delete; + AttributeMask(AttributeMask &&) = default; + + AttributeMask(AttributeSet AS) { + for (Attribute A : AS) + addAttribute(A); + } + + /// Add an attribute to the mask. + AttributeMask &addAttribute(Attribute::AttrKind Val) { + assert((unsigned)Val < Attribute::EndAttrKinds && + "Attribute out of range!"); + Attrs[Val] = true; + return *this; + } + + /// Add the Attribute object to the builder. + AttributeMask &addAttribute(Attribute A) { + if (A.isStringAttribute()) + addAttribute(A.getKindAsString()); + else + addAttribute(A.getKindAsEnum()); + return *this; + } + + /// Add the target-dependent attribute to the builder. + AttributeMask &addAttribute(StringRef A) { + TargetDepAttrs.insert(A); + return *this; + } + + /// Return true if the builder has the specified attribute. + bool contains(Attribute::AttrKind A) const { + assert((unsigned)A < Attribute::EndAttrKinds && "Attribute out of range!"); + return Attrs[A]; + } + + /// Return true if the builder has the specified target-dependent + /// attribute. + bool contains(StringRef A) const { return TargetDepAttrs.count(A); } + + using td_const_iterator = decltype(TargetDepAttrs)::const_iterator; + using td_const_range = iterator_range; + td_const_range td_attrs() const { + return {TargetDepAttrs.begin(), TargetDepAttrs.end()}; + } + auto const &attrs() const { return Attrs; } +}; + //===----------------------------------------------------------------------===// /// \class /// This class is used in conjunction with the Attribute::get method to @@ -975,21 +1037,29 @@ class AttrBuilder { /// Remove an attribute from the builder. AttrBuilder &removeAttribute(Attribute::AttrKind Val); + /// Remove the target-dependent attribute from the builder. + AttrBuilder &removeAttribute(StringRef A); + + /// Remove the target-dependent attribute from the builder. + AttrBuilder &removeAttribute(Attribute A) { + if (A.isStringAttribute()) + return removeAttribute(A.getKindAsString()); + else + return removeAttribute(A.getKindAsEnum()); + } + /// Remove the attributes from the builder. AttrBuilder &removeAttributes(AttributeList A, uint64_t WithoutIndex); - /// Remove the target-dependent attribute to the builder. - AttrBuilder &removeAttribute(StringRef A); - /// Add the attributes from the builder. AttrBuilder &merge(const AttrBuilder &B); /// Remove the attributes from the builder. - AttrBuilder &remove(const AttrBuilder &B); + AttrBuilder &remove(const AttributeMask &AM); /// Return true if the builder has any attribute that's in the /// specified builder. - bool overlaps(const AttrBuilder &B) const; + bool overlaps(const AttributeMask &AM) const; /// Return true if the builder has the specified attribute. bool contains(Attribute::AttrKind A) const { @@ -1168,14 +1238,14 @@ class AttrBuilder { namespace AttributeFuncs { /// Which attributes cannot be applied to a type. -AttrBuilder typeIncompatible(Type *Ty); +AttributeMask typeIncompatible(Type *Ty); /// Get param/return attributes which imply immediate undefined behavior if an /// invalid value is passed. For example, this includes noundef (where undef /// implies UB), but not nonnull (where null implies poison). It also does not /// include attributes like nocapture, which constrain the function /// implementation rather than the passed value. -AttrBuilder getUBImplyingAttributes(); +AttributeMask getUBImplyingAttributes(); /// \returns Return true if the two functions have compatible target-independent /// attributes for inlining purposes. diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index 669418eacbb0..2c94897c61dc 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -364,7 +364,7 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject, /// Remove function attribute from this function. void removeFnAttr(StringRef Kind); - void removeFnAttrs(const AttrBuilder &Attrs); + void removeFnAttrs(const AttributeMask &Attrs); /// removes the attribute from the return value list of attributes. void removeRetAttr(Attribute::AttrKind Kind); @@ -373,7 +373,7 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject, void removeRetAttr(StringRef Kind); /// removes the attributes from the return value list of attributes. - void removeRetAttrs(const AttrBuilder &Attrs); + void removeRetAttrs(const AttributeMask &Attrs); /// removes the attribute from the list of attributes. void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind); @@ -382,7 +382,7 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject, void removeParamAttr(unsigned ArgNo, StringRef Kind); /// removes the attribute from the list of attributes. - void removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs); + void removeParamAttrs(unsigned ArgNo, const AttributeMask &Attrs); /// Return true if the function has the attribute. bool hasFnAttribute(Attribute::AttrKind Kind) const; diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 143a87f4997d..3eedb762d124 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -1544,7 +1544,7 @@ class CallBase : public Instruction { } /// Removes the attributes from the function - void removeFnAttrs(const AttrBuilder &AttrsToRemove) { + void removeFnAttrs(const AttributeMask &AttrsToRemove) { Attrs = Attrs.removeFnAttributes(getContext(), AttrsToRemove); } @@ -1559,7 +1559,7 @@ class CallBase : public Instruction { } /// Removes the attributes from the return value - void removeRetAttrs(const AttrBuilder &AttrsToRemove) { + void removeRetAttrs(const AttributeMask &AttrsToRemove) { Attrs = Attrs.removeRetAttributes(getContext(), AttrsToRemove); } @@ -1576,7 +1576,7 @@ class CallBase : public Instruction { } /// Removes the attributes from the given argument - void removeParamAttrs(unsigned ArgNo, const AttrBuilder &AttrsToRemove) { + void removeParamAttrs(unsigned ArgNo, const AttributeMask &AttrsToRemove) { Attrs = Attrs.removeParamAttributes(getContext(), ArgNo, AttrsToRemove); } diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp index 2c917e46dfde..c1b63c036ef0 100644 --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -651,7 +651,7 @@ AttributeSet AttributeSet::removeAttribute(LLVMContext &C, } AttributeSet AttributeSet::removeAttributes(LLVMContext &C, - const AttrBuilder &Attrs) const { + const AttributeMask &Attrs) const { AttrBuilder B(*this); // If there is nothing to remove, directly return the original set. if (!B.overlaps(Attrs)) @@ -1314,9 +1314,8 @@ AttributeList AttributeList::removeAttributeAtIndex(LLVMContext &C, return getImpl(C, AttrSets); } -AttributeList -AttributeList::removeAttributesAtIndex(LLVMContext &C, unsigned Index, - const AttrBuilder &AttrsToRemove) const { +AttributeList AttributeList::removeAttributesAtIndex( + LLVMContext &C, unsigned Index, const AttributeMask &AttrsToRemove) const { AttributeSet Attrs = getAttributes(Index); AttributeSet NewAttrs = Attrs.removeAttributes(C, AttrsToRemove); // If nothing was removed, return the original list. @@ -1604,6 +1603,11 @@ AttrBuilder &AttrBuilder::addAttribute(StringRef A, StringRef V) { return *this; } +AttrBuilder &AttrBuilder::removeAttributes(AttributeList AL, uint64_t Index) { + remove(AttributeMask(AL.getAttributes(Index))); + return *this; +} + AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { assert((unsigned)Val < Attribute::EndAttrKinds && "Attribute out of range!"); Attrs[Val] = false; @@ -1616,11 +1620,6 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { return *this; } -AttrBuilder &AttrBuilder::removeAttributes(AttributeList A, uint64_t Index) { - remove(A.getAttributes(Index)); - return *this; -} - AttrBuilder &AttrBuilder::removeAttribute(StringRef A) { TargetDepAttrs.erase(A); return *this; @@ -1760,34 +1759,33 @@ AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) { return *this; } -AttrBuilder &AttrBuilder::remove(const AttrBuilder &B) { +AttrBuilder &AttrBuilder::remove(const AttributeMask &AM) { // FIXME: What if both have an int/type attribute, but they don't match?! for (unsigned Index = 0; Index < Attribute::NumIntAttrKinds; ++Index) - if (B.IntAttrs[Index]) + if (AM.contains((Attribute::AttrKind)Index)) IntAttrs[Index] = 0; for (unsigned Index = 0; Index < Attribute::NumTypeAttrKinds; ++Index) - if (B.TypeAttrs[Index]) + if (AM.contains((Attribute::AttrKind)Index)) TypeAttrs[Index] = nullptr; - Attrs &= ~B.Attrs; + Attrs &= ~AM.attrs(); - for (const auto &I : B.td_attrs()) - TargetDepAttrs.erase(I.first); + for (const auto &I : AM.td_attrs()) + TargetDepAttrs.erase(I); return *this; } -bool AttrBuilder::overlaps(const AttrBuilder &B) const { +bool AttrBuilder::overlaps(const AttributeMask &AM) const { // First check if any of the target independent attributes overlap. - if ((Attrs & B.Attrs).any()) + if ((Attrs & AM.attrs()).any()) return true; // Then check if any target dependent ones do. for (const auto &I : td_attrs()) - if (B.contains(I.first)) + if (AM.contains(I.first)) return true; - return false; } @@ -1835,8 +1833,8 @@ bool AttrBuilder::operator==(const AttrBuilder &B) const { //===----------------------------------------------------------------------===// /// Which attributes cannot be applied to a type. -AttrBuilder AttributeFuncs::typeIncompatible(Type *Ty) { - AttrBuilder Incompatible; +AttributeMask AttributeFuncs::typeIncompatible(Type *Ty) { + AttributeMask Incompatible; if (!Ty->isIntegerTy()) // Attributes that only apply to integers. @@ -1852,18 +1850,18 @@ AttrBuilder AttributeFuncs::typeIncompatible(Type *Ty) { .addAttribute(Attribute::ReadNone) .addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::SwiftError) - .addDereferenceableAttr(1) // the int here is ignored - .addDereferenceableOrNullAttr(1) // the int here is ignored - .addPreallocatedAttr(Ty) - .addInAllocaAttr(Ty) - .addByValAttr(Ty) - .addStructRetAttr(Ty) - .addByRefAttr(Ty) - .addTypeAttr(Attribute::ElementType, Ty); + .addAttribute(Attribute::Dereferenceable) + .addAttribute(Attribute::DereferenceableOrNull) + .addAttribute(Attribute::Preallocated) + .addAttribute(Attribute::InAlloca) + .addAttribute(Attribute::ByVal) + .addAttribute(Attribute::StructRet) + .addAttribute(Attribute::ByRef) + .addAttribute(Attribute::ElementType); if (!Ty->isPtrOrPtrVectorTy()) // Attributes that only apply to pointers or vectors of pointers. - Incompatible.addAlignmentAttr(1); // the int here is ignored + Incompatible.addAttribute(Attribute::Alignment); // Some attributes can apply to all "values" but there are no `void` values. if (Ty->isVoidTy()) @@ -1872,12 +1870,12 @@ AttrBuilder AttributeFuncs::typeIncompatible(Type *Ty) { return Incompatible; } -AttrBuilder AttributeFuncs::getUBImplyingAttributes() { - AttrBuilder B; - B.addAttribute(Attribute::NoUndef); - B.addDereferenceableAttr(1); - B.addDereferenceableOrNullAttr(1); - return B; +AttributeMask AttributeFuncs::getUBImplyingAttributes() { + AttributeMask AM; + AM.addAttribute(Attribute::NoUndef); + AM.addAttribute(Attribute::Dereferenceable); + AM.addAttribute(Attribute::DereferenceableOrNull); + return AM; } template @@ -1916,7 +1914,7 @@ static void adjustCallerSSPLevel(Function &Caller, const Function &Callee) { // If upgrading the SSP attribute, clear out the old SSP Attributes first. // Having multiple SSP attributes doesn't actually hurt, but it adds useless // clutter to the IR. - AttrBuilder OldSSPAttr; + AttributeMask OldSSPAttr; OldSSPAttr.addAttribute(Attribute::StackProtect) .addAttribute(Attribute::StackProtectStrong) .addAttribute(Attribute::StackProtectReq); diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index f1a6402fb11b..93e15e43845c 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -300,9 +300,9 @@ void Argument::removeAttr(Attribute::AttrKind Kind) { getParent()->removeParamAttr(getArgNo(), Kind); } -void Argument::removeAttrs(const AttrBuilder &B) { +void Argument::removeAttrs(const AttributeMask &AM) { AttributeList AL = getParent()->getAttributes(); - AL = AL.removeParamAttributes(Parent->getContext(), getArgNo(), B); + AL = AL.removeParamAttributes(Parent->getContext(), getArgNo(), AM); getParent()->setAttributes(AL); } @@ -589,8 +589,8 @@ void Function::removeFnAttr(StringRef Kind) { AttributeSets = AttributeSets.removeFnAttribute(getContext(), Kind); } -void Function::removeFnAttrs(const AttrBuilder &Attrs) { - AttributeSets = AttributeSets.removeFnAttributes(getContext(), Attrs); +void Function::removeFnAttrs(const AttributeMask &AM) { + AttributeSets = AttributeSets.removeFnAttributes(getContext(), AM); } void Function::removeRetAttr(Attribute::AttrKind Kind) { @@ -601,7 +601,7 @@ void Function::removeRetAttr(StringRef Kind) { AttributeSets = AttributeSets.removeRetAttribute(getContext(), Kind); } -void Function::removeRetAttrs(const AttrBuilder &Attrs) { +void Function::removeRetAttrs(const AttributeMask &Attrs) { AttributeSets = AttributeSets.removeRetAttributes(getContext(), Attrs); } @@ -613,7 +613,7 @@ void Function::removeParamAttr(unsigned ArgNo, StringRef Kind) { AttributeSets = AttributeSets.removeParamAttribute(getContext(), ArgNo, Kind); } -void Function::removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs) { +void Function::removeParamAttrs(unsigned ArgNo, const AttributeMask &Attrs) { AttributeSets = AttributeSets.removeParamAttributes(getContext(), ArgNo, Attrs); } diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 4480ec799c35..770fd8aa918b 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -186,7 +186,8 @@ void Instruction::dropUndefImplyingAttrsAndUnknownMetadata( AttributeList AL = CB->getAttributes(); if (AL.isEmpty()) return; - AttrBuilder UBImplyingAttributes = AttributeFuncs::getUBImplyingAttributes(); + AttributeMask UBImplyingAttributes = + AttributeFuncs::getUBImplyingAttributes(); for (unsigned ArgNo = 0; ArgNo < CB->arg_size(); ArgNo++) CB->removeParamAttrs(ArgNo, UBImplyingAttributes); CB->removeRetAttrs(UBImplyingAttributes); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 9ce37db9ea6c..46da9cfbc6b5 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1793,7 +1793,7 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty, "'noinline and alwaysinline' are incompatible!", V); - AttrBuilder IncompatibleAttrs = AttributeFuncs::typeIncompatible(Ty); + AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(Ty); for (Attribute Attr : Attrs) { if (!Attr.isStringAttribute() && IncompatibleAttrs.contains(Attr.getKindAsEnum())) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp index 45f7c2f369bd..3d578a9b891e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp @@ -353,7 +353,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { // off any return attributes, e.g. zeroext doesn't make sense with a struct. NewFunc->stealArgumentListFrom(F); - AttrBuilder RetAttrs; + AttributeMask RetAttrs; RetAttrs.addAttribute(Attribute::SExt); RetAttrs.addAttribute(Attribute::ZExt); RetAttrs.addAttribute(Attribute::NoAlias); diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index fb9ab7954e36..d71e69a538d7 100644 --- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -287,7 +287,8 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) { SmallVector UnusedArgs; bool Changed = false; - AttrBuilder UBImplyingAttributes = AttributeFuncs::getUBImplyingAttributes(); + AttributeMask UBImplyingAttributes = + AttributeFuncs::getUBImplyingAttributes(); for (Argument &Arg : Fn.args()) { if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() && !Arg.hasPassPointeeByValueCopyAttr()) { diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 321d4a19a585..8fb0c2dc7613 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -301,7 +301,7 @@ static void addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter, Changed.insert(F); // Clear out any existing attributes. - AttrBuilder AttrsToRemove; + AttributeMask AttrsToRemove; AttrsToRemove.addAttribute(Attribute::ReadOnly); AttrsToRemove.addAttribute(Attribute::ReadNone); AttrsToRemove.addAttribute(Attribute::WriteOnly); diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 9f26b37bbc79..1e5688828d30 100644 --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -454,7 +454,7 @@ class DataFlowSanitizer { MDNode *OriginStoreWeights; DFSanABIList ABIList; DenseMap UnwrappedFnMap; - AttrBuilder ReadOnlyNoneAttrs; + AttributeMask ReadOnlyNoneAttrs; /// Memory map parameters used in calculation mapping application addresses /// to shadow addresses and origin addresses. diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 446e601cd4d7..0ae425117fc0 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3664,7 +3664,7 @@ struct MemorySanitizerVisitor : public InstVisitor { // will become a non-readonly function after it is instrumented by us. To // prevent this code from being optimized out, mark that function // non-readonly in advance. - AttrBuilder B; + AttributeMask B; B.addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::ReadNone) .addAttribute(Attribute::WriteOnly) @@ -5359,7 +5359,7 @@ bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) { MemorySanitizerVisitor Visitor(F, *this, TLI); // Clear out readonly/readnone attributes. - AttrBuilder B; + AttributeMask B; B.addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::ReadNone) .addAttribute(Attribute::WriteOnly) diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index e12eca0ed287..5f4e8f1dad0a 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1373,7 +1373,7 @@ static AttributeList legalizeCallAttributes(LLVMContext &Ctx, for (Attribute A : AL.getFnAttrs()) { if (isStatepointDirectiveAttr(A)) - FnAttrs.remove(A); + FnAttrs.removeAttribute(A); } // Just skip parameter and return attributes for now @@ -2643,10 +2643,10 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // List of all parameter and return attributes which must be stripped when // lowering from the abstract machine model. Note that we list attributes // here which aren't valid as return attributes, that is okay. -static AttrBuilder getParamAndReturnAttributesToRemove() { - AttrBuilder R; - R.addDereferenceableAttr(1); - R.addDereferenceableOrNullAttr(1); +static AttributeMask getParamAndReturnAttributesToRemove() { + AttributeMask R; + R.addAttribute(Attribute::Dereferenceable); + R.addAttribute(Attribute::DereferenceableOrNull); R.addAttribute(Attribute::ReadNone); R.addAttribute(Attribute::ReadOnly); R.addAttribute(Attribute::WriteOnly); @@ -2668,7 +2668,7 @@ static void stripNonValidAttributesFromPrototype(Function &F) { return; } - AttrBuilder R = getParamAndReturnAttributesToRemove(); + AttributeMask R = getParamAndReturnAttributesToRemove(); for (Argument &A : F.args()) if (isa(A.getType())) F.removeParamAttrs(A.getArgNo(), R); @@ -2742,7 +2742,7 @@ static void stripNonValidDataFromBody(Function &F) { stripInvalidMetadataFromInstruction(I); - AttrBuilder R = getParamAndReturnAttributesToRemove(); + AttributeMask R = getParamAndReturnAttributesToRemove(); if (auto *Call = dyn_cast(&I)) { for (int i = 0, e = Call->arg_size(); i != e; i++) if (isa(Call->getArgOperand(i)->getType())) diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index ff2f8a25f379..c34da51e6dc1 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -486,7 +486,7 @@ bool llvm::runIPSCCP( // inaccessiblemem_or_argmemonly attributes do not hold any longer. Remove // them from both the function and callsites. if (ReplacedPointerArg) { - AttrBuilder AttributesToRemove; + AttributeMask AttributesToRemove; AttributesToRemove.addAttribute(Attribute::ArgMemOnly); AttributesToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly); F.removeFnAttrs(AttributesToRemove); diff --git a/llvm/unittests/IR/AttributesTest.cpp b/llvm/unittests/IR/AttributesTest.cpp index 188d4e342e85..9bc5e474a272 100644 --- a/llvm/unittests/IR/AttributesTest.cpp +++ b/llvm/unittests/IR/AttributesTest.cpp @@ -81,12 +81,12 @@ TEST(Attributes, RemoveAlign) { AttrBuilder B_align_readonly; B_align_readonly.addAttribute(AlignAttr); B_align_readonly.addAttribute(Attribute::ReadOnly); - AttrBuilder B_align; + AttributeMask B_align; B_align.addAttribute(AlignAttr); AttrBuilder B_stackalign_optnone; B_stackalign_optnone.addAttribute(StackAlignAttr); B_stackalign_optnone.addAttribute(Attribute::OptimizeNone); - AttrBuilder B_stackalign; + AttributeMask B_stackalign; B_stackalign.addAttribute(StackAlignAttr); AttributeSet AS = AttributeSet::get(C, B_align_readonly); From 17af06ba8005d6d14b0ac79ece01ecb028de9f90 Mon Sep 17 00:00:00 2001 From: luxufan <932494295@qq.com> Date: Tue, 4 Jan 2022 20:18:19 +0800 Subject: [PATCH 526/992] [JITLink] Add fixup value range check This patch makes jitlink to report an out of range error when the fixup value out of range Reviewed By: lhames Differential Revision: https://reviews.llvm.org/D107328 --- .../lib/ExecutionEngine/JITLink/ELF_riscv.cpp | 25 +++++++++++++++++-- .../JITLink/RISCV/ELF_pc_indirect.s | 4 +-- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp index 26ec79ea50cf..94b659c02092 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp @@ -161,6 +161,15 @@ static uint32_t extractBits(uint32_t Num, unsigned Low, unsigned Size) { return (Num & (((1ULL << (Size + 1)) - 1) << Low)) >> Low; } +static inline bool isInRangeForImmS32(int64_t Value) { + return (Value >= std::numeric_limits::min() && + Value <= std::numeric_limits::max()); +} + +static inline bool isInRangeForImmU32(uint64_t Value) { + return Value <= std::numeric_limits::max(); +} + class ELFJITLinker_riscv : public JITLinker { friend class JITLinker; @@ -189,14 +198,18 @@ class ELFJITLinker_riscv : public JITLinker { break; } case R_RISCV_HI20: { - int64_t Value = E.getTarget().getAddress() + E.getAddend(); + uint64_t Value = E.getTarget().getAddress() + E.getAddend(); + if (LLVM_UNLIKELY(!isInRangeForImmU32(Value))) + return makeTargetOutOfRangeError(G, B, E); int32_t Hi = (Value + 0x800) & 0xFFFFF000; uint32_t RawInstr = *(little32_t *)FixupPtr; *(little32_t *)FixupPtr = (RawInstr & 0xFFF) | static_cast(Hi); break; } case R_RISCV_LO12_I: { - int64_t Value = E.getTarget().getAddress() + E.getAddend(); + uint64_t Value = E.getTarget().getAddress() + E.getAddend(); + if (LLVM_UNLIKELY(!isInRangeForImmU32(Value))) + return makeTargetOutOfRangeError(G, B, E); int32_t Lo = Value & 0xFFF; uint32_t RawInstr = *(little32_t *)FixupPtr; *(little32_t *)FixupPtr = @@ -205,6 +218,8 @@ class ELFJITLinker_riscv : public JITLinker { } case R_RISCV_CALL: { int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress; + if (LLVM_UNLIKELY(!isInRangeForImmS32(Value))) + return makeTargetOutOfRangeError(G, B, E); int32_t Hi = (Value + 0x800) & 0xFFFFF000; int32_t Lo = Value & 0xFFF; uint32_t RawInstrAuipc = *(little32_t *)FixupPtr; @@ -216,6 +231,8 @@ class ELFJITLinker_riscv : public JITLinker { } case R_RISCV_PCREL_HI20: { int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress; + if (LLVM_UNLIKELY(!isInRangeForImmS32(Value))) + return makeTargetOutOfRangeError(G, B, E); int32_t Hi = (Value + 0x800) & 0xFFFFF000; uint32_t RawInstr = *(little32_t *)FixupPtr; *(little32_t *)FixupPtr = (RawInstr & 0xFFF) | static_cast(Hi); @@ -227,6 +244,8 @@ class ELFJITLinker_riscv : public JITLinker { return RelHI20.takeError(); int64_t Value = RelHI20->getTarget().getAddress() + RelHI20->getAddend() - E.getTarget().getAddress(); + if (LLVM_UNLIKELY(!isInRangeForImmS32(Value))) + return makeTargetOutOfRangeError(G, B, E); int64_t Lo = Value & 0xFFF; uint32_t RawInstr = *(little32_t *)FixupPtr; *(little32_t *)FixupPtr = @@ -237,6 +256,8 @@ class ELFJITLinker_riscv : public JITLinker { auto RelHI20 = getRISCVPCRelHi20(E); int64_t Value = RelHI20->getTarget().getAddress() + RelHI20->getAddend() - E.getTarget().getAddress(); + if (LLVM_UNLIKELY(!isInRangeForImmS32(Value))) + return makeTargetOutOfRangeError(G, B, E); int64_t Lo = Value & 0xFFF; uint32_t Imm31_25 = extractBits(Lo, 5, 7) << 25; uint32_t Imm11_7 = extractBits(Lo, 0, 5) << 7; diff --git a/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_pc_indirect.s b/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_pc_indirect.s index 539da2b1e81d..32897e32bc9f 100644 --- a/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_pc_indirect.s +++ b/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_pc_indirect.s @@ -4,11 +4,11 @@ # RUN: llvm-mc -triple=riscv32 -position-independent -filetype=obj \ # RUN: -o %t/elf_riscv32_sm_pic_reloc.o %s # RUN: llvm-jitlink -noexec \ -# RUN: -slab-allocate 100Kb -slab-address 0xfff00000 -slab-page-size 4096 \ +# RUN: -slab-allocate 100Kb -slab-address 0x1ff00000 -slab-page-size 4096 \ # RUN: -define-abs external_func=0x1 -define-abs external_data=0x2 \ # RUN: -check %s %t/elf_riscv64_sm_pic_reloc.o # RUN: llvm-jitlink -noexec \ -# RUN: -slab-allocate 100Kb -slab-address 0xfff00000 -slab-page-size 4096 \ +# RUN: -slab-allocate 100Kb -slab-address 0x1ff00000 -slab-page-size 4096 \ # RUN: -define-abs external_func=0x1 -define-abs external_data=0x2 \ # RUN: -check %s %t/elf_riscv32_sm_pic_reloc.o # From 229c95ab661d89d29a64bff014229b7c6d3ee8a1 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 28 Dec 2021 03:58:13 +0100 Subject: [PATCH 527/992] [CodeCompletion] Signature help for aggregate initialization. The "parameter list" is the list of fields which should be initialized. We introduce a new OverloadCandidate kind for this. It starts to become harder for CC consumers to handle all the cases for params, so I added some extra APIs on OverloadCandidate to abstract them. Includes some basic support for designated initializers. The same aggregate signature is shown, the current arg jumps after the one you just initialized. This follows C99 semantics for mixed designated/positional initializers (which clang supports in C++ as an extension) and is also a useful prompt for C++ as C++ designated initializers must be in order. Related bugs: - https://github.com/clangd/clangd/issues/965 - https://github.com/clangd/clangd/issues/306 Differential Revision: https://reviews.llvm.org/D116326 --- clang-tools-extra/clangd/CodeComplete.cpp | 57 +- .../clangd/unittests/CodeCompleteTests.cpp | 19 + .../include/clang/Sema/CodeCompleteConsumer.h | 39 +- clang/lib/Sema/CodeCompleteConsumer.cpp | 74 +- clang/lib/Sema/SemaCodeComplete.cpp | 251 ++- clang/test/CodeCompletion/ctor-signature.cpp | 26 +- "tl\033" | 1381 +++++++++++++++++ 7 files changed, 1735 insertions(+), 112 deletions(-) create mode 100644 "tl\033" diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp index 53d8f0d6cdeb..50388e08c30a 100644 --- a/clang-tools-extra/clangd/CodeComplete.cpp +++ b/clang-tools-extra/clangd/CodeComplete.cpp @@ -896,10 +896,7 @@ struct ScoredSignature { int paramIndexForArg(const CodeCompleteConsumer::OverloadCandidate &Candidate, int Arg) { int NumParams = Candidate.getNumParams(); - if (const auto *F = Candidate.getFunction()) { - if (F->isVariadic()) - ++NumParams; - } else if (auto *T = Candidate.getFunctionType()) { + if (auto *T = Candidate.getFunctionType()) { if (auto *Proto = T->getAs()) { if (Proto->isVariadic()) ++NumParams; @@ -996,8 +993,7 @@ class SignatureHelpCollector final : public CodeCompleteConsumer { const ScoredSignature &R) { // Ordering follows: // - Less number of parameters is better. - // - Function is better than FunctionType which is better than - // Function Template. + // - Aggregate > Function > FunctionType > FunctionTemplate // - High score is better. // - Shorter signature is better. // - Alphabetically smaller is better. @@ -1009,18 +1005,22 @@ class SignatureHelpCollector final : public CodeCompleteConsumer { R.Quality.NumberOfOptionalParameters; if (L.Quality.Kind != R.Quality.Kind) { using OC = CodeCompleteConsumer::OverloadCandidate; - switch (L.Quality.Kind) { - case OC::CK_Function: - return true; - case OC::CK_FunctionType: - return R.Quality.Kind != OC::CK_Function; - case OC::CK_FunctionTemplate: - return false; - case OC::CK_Template: - assert(false && "Never see templates and other overloads mixed"); - return false; - } - llvm_unreachable("Unknown overload candidate type."); + auto KindPriority = [&](OC::CandidateKind K) { + switch (K) { + case OC::CK_Aggregate: + return 1; + case OC::CK_Function: + return 2; + case OC::CK_FunctionType: + return 3; + case OC::CK_FunctionTemplate: + return 4; + case OC::CK_Template: + return 5; + } + llvm_unreachable("Unknown overload candidate type."); + }; + return KindPriority(L.Quality.Kind) < KindPriority(R.Quality.Kind); } if (L.Signature.label.size() != R.Signature.label.size()) return L.Signature.label.size() < R.Signature.label.size(); @@ -1171,24 +1171,9 @@ class ParamNameCollector final : public CodeCompleteConsumer { "too many arguments"); for (unsigned I = 0; I < NumCandidates; ++I) { - OverloadCandidate Candidate = Candidates[I]; - NamedDecl *Param = nullptr; - if (auto *Func = Candidate.getFunction()) { - if (CurrentArg < Func->getNumParams()) - Param = Func->getParamDecl(CurrentArg); - } else if (auto *Template = Candidate.getTemplate()) { - if (CurrentArg < Template->getTemplateParameters()->size()) - Param = Template->getTemplateParameters()->getParam(CurrentArg); - } - - if (!Param) - continue; - auto *Ident = Param->getIdentifier(); - if (!Ident) - continue; - auto Name = Ident->getName(); - if (!Name.empty()) - ParamNames.insert(Name.str()); + if (const NamedDecl *ND = Candidates[I].getParamDecl(CurrentArg)) + if (const auto *II = ND->getIdentifier()) + ParamNames.emplace(II->getName()); } } diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index 9d5c57670be1..52dee0fdc0e2 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -1294,6 +1294,25 @@ TEST(SignatureHelpTest, Constructors) { CheckBracedInit("int x(S); int i = x({^});"); } +TEST(SignatureHelpTest, Aggregates) { + std::string Top = R"cpp( + struct S { + int a, b, c, d; + }; + )cpp"; + auto AggregateSig = Sig("S{[[int a]], [[int b]], [[int c]], [[int d]]}"); + EXPECT_THAT(signatures(Top + "S s{^}").signatures, + UnorderedElementsAre(AggregateSig, Sig("S{}"), + Sig("S{[[const S &]]}"), + Sig("S{[[S &&]]}"))); + EXPECT_THAT(signatures(Top + "S s{1,^}").signatures, + ElementsAre(AggregateSig)); + EXPECT_EQ(signatures(Top + "S s{1,^}").activeParameter, 1); + EXPECT_THAT(signatures(Top + "S s{.c=3,^}").signatures, + ElementsAre(AggregateSig)); + EXPECT_EQ(signatures(Top + "S s{.c=3,^}").activeParameter, 3); +} + TEST(SignatureHelpTest, OverloadInitListRegression) { auto Results = signatures(R"cpp( struct A {int x;}; diff --git a/clang/include/clang/Sema/CodeCompleteConsumer.h b/clang/include/clang/Sema/CodeCompleteConsumer.h index 70c34703f0a0..41c495882b27 100644 --- a/clang/include/clang/Sema/CodeCompleteConsumer.h +++ b/clang/include/clang/Sema/CodeCompleteConsumer.h @@ -1018,6 +1018,9 @@ class CodeCompleteConsumer { /// The candidate is a template, template arguments are being completed. CK_Template, + + /// The candidate is aggregate initialization of a record type. + CK_Aggregate, }; private: @@ -1040,17 +1043,32 @@ class CodeCompleteConsumer { /// The template overload candidate, available when /// Kind == CK_Template. const TemplateDecl *Template; + + /// The class being aggregate-initialized, + /// when Kind == CK_Aggregate + const RecordDecl *AggregateType; }; public: OverloadCandidate(FunctionDecl *Function) - : Kind(CK_Function), Function(Function) {} + : Kind(CK_Function), Function(Function) { + assert(Function != nullptr); + } OverloadCandidate(FunctionTemplateDecl *FunctionTemplateDecl) - : Kind(CK_FunctionTemplate), FunctionTemplate(FunctionTemplateDecl) {} + : Kind(CK_FunctionTemplate), FunctionTemplate(FunctionTemplateDecl) { + assert(FunctionTemplateDecl != nullptr); + } OverloadCandidate(const FunctionType *Type) - : Kind(CK_FunctionType), Type(Type) {} + : Kind(CK_FunctionType), Type(Type) { + assert(Type != nullptr); + } + + OverloadCandidate(const RecordDecl *Aggregate) + : Kind(CK_Aggregate), AggregateType(Aggregate) { + assert(Aggregate != nullptr); + } OverloadCandidate(const TemplateDecl *Template) : Kind(CK_Template), Template(Template) {} @@ -1077,8 +1095,23 @@ class CodeCompleteConsumer { return Template; } + /// Retrieve the aggregate type being initialized. + const RecordDecl *getAggregate() const { + assert(getKind() == CK_Aggregate); + return AggregateType; + } + + /// Get the number of parameters in this signature. unsigned getNumParams() const; + /// Get the type of the Nth parameter. + /// Returns null if the type is unknown or N is out of range. + QualType getParamType(unsigned N) const; + + /// Get the declaration of the Nth parameter. + /// Returns null if the decl is unknown or N is out of range. + const NamedDecl *getParamDecl(unsigned N) const; + /// Create a new code-completion string that describes the function /// signature of this overload candidate. CodeCompletionString * diff --git a/clang/lib/Sema/CodeCompleteConsumer.cpp b/clang/lib/Sema/CodeCompleteConsumer.cpp index bb088fd5fe97..fefe20941f17 100644 --- a/clang/lib/Sema/CodeCompleteConsumer.cpp +++ b/clang/lib/Sema/CodeCompleteConsumer.cpp @@ -508,6 +508,7 @@ CodeCompleteConsumer::OverloadCandidate::getFunctionType() const { return Type; case CK_Template: + case CK_Aggregate: return nullptr; } @@ -517,11 +518,80 @@ CodeCompleteConsumer::OverloadCandidate::getFunctionType() const { unsigned CodeCompleteConsumer::OverloadCandidate::getNumParams() const { if (Kind == CK_Template) return Template->getTemplateParameters()->size(); - if (const auto *FPT = dyn_cast_or_null(getFunctionType())) - return FPT->getNumParams(); + + if (Kind == CK_Aggregate) { + unsigned Count = + std::distance(AggregateType->field_begin(), AggregateType->field_end()); + if (const auto *CRD = dyn_cast(AggregateType)) + Count += CRD->getNumBases(); + return Count; + } + + if (const auto *FT = getFunctionType()) + if (const auto *FPT = dyn_cast(FT)) + return FPT->getNumParams(); + return 0; } +QualType +CodeCompleteConsumer::OverloadCandidate::getParamType(unsigned N) const { + if (Kind == CK_Aggregate) { + if (const auto *CRD = dyn_cast(AggregateType)) { + if (N < CRD->getNumBases()) + return std::next(CRD->bases_begin(), N)->getType(); + N -= CRD->getNumBases(); + } + for (const auto *Field : AggregateType->fields()) + if (N-- == 0) + return Field->getType(); + return QualType(); + } + + if (Kind == CK_Template) { + TemplateParameterList *TPL = getTemplate()->getTemplateParameters(); + if (N < TPL->size()) + if (const auto *D = dyn_cast(TPL->getParam(N))) + return D->getType(); + return QualType(); + } + + if (const auto *FT = getFunctionType()) + if (const auto *FPT = dyn_cast(FT)) + if (N < FPT->getNumParams()) + return FPT->getParamType(N); + return QualType(); +} + +const NamedDecl * +CodeCompleteConsumer::OverloadCandidate::getParamDecl(unsigned N) const { + if (Kind == CK_Aggregate) { + if (const auto *CRD = dyn_cast(AggregateType)) { + if (N < CRD->getNumBases()) + return std::next(CRD->bases_begin(), N)->getType()->getAsTagDecl(); + N -= CRD->getNumBases(); + } + for (const auto *Field : AggregateType->fields()) + if (N-- == 0) + return Field; + return nullptr; + } + + if (Kind == CK_Template) { + TemplateParameterList *TPL = getTemplate()->getTemplateParameters(); + if (N < TPL->size()) + return TPL->getParam(N); + return nullptr; + } + + // Note that if we only have a FunctionProtoType, we don't have param decls. + if (const auto *FD = getFunction()) { + if (N < FD->param_size()) + return FD->getParamDecl(N); + } + return nullptr; +} + //===----------------------------------------------------------------------===// // Code completion consumer implementation //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index e2bf5edc7b5e..e089f85420bc 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -2817,14 +2817,18 @@ formatBlockPlaceholder(const PrintingPolicy &Policy, const NamedDecl *BlockDecl, Optional> ObjCSubsts = None); static std::string -FormatFunctionParameter(const PrintingPolicy &Policy, const ParmVarDecl *Param, - bool SuppressName = false, bool SuppressBlock = false, +FormatFunctionParameter(const PrintingPolicy &Policy, + const DeclaratorDecl *Param, bool SuppressName = false, + bool SuppressBlock = false, Optional> ObjCSubsts = None) { // Params are unavailable in FunctionTypeLoc if the FunctionType is invalid. // It would be better to pass in the param Type, which is usually available. // But this case is rare, so just pretend we fell back to int as elsewhere. if (!Param) return "int"; + Decl::ObjCDeclQualifier ObjCQual = Decl::OBJC_TQ_None; + if (const auto *PVD = dyn_cast(Param)) + ObjCQual = PVD->getObjCDeclQualifier(); bool ObjCMethodParam = isa(Param->getDeclContext()); if (Param->getType()->isDependentType() || !Param->getType()->isBlockPointerType()) { @@ -2840,8 +2844,7 @@ FormatFunctionParameter(const PrintingPolicy &Policy, const ParmVarDecl *Param, Type = Type.substObjCTypeArgs(Param->getASTContext(), *ObjCSubsts, ObjCSubstitutionContext::Parameter); if (ObjCMethodParam) { - Result = - "(" + formatObjCParamQualifiers(Param->getObjCDeclQualifier(), Type); + Result = "(" + formatObjCParamQualifiers(ObjCQual, Type); Result += Type.getAsString(Policy) + ")"; if (Param->getIdentifier() && !SuppressName) Result += Param->getIdentifier()->getName(); @@ -2878,8 +2881,7 @@ FormatFunctionParameter(const PrintingPolicy &Policy, const ParmVarDecl *Param, if (ObjCMethodParam) { Result = Type.getAsString(Policy); - std::string Quals = - formatObjCParamQualifiers(Param->getObjCDeclQualifier(), Type); + std::string Quals = formatObjCParamQualifiers(ObjCQual, Type); if (!Quals.empty()) Result = "(" + Quals + " " + Result + ")"; if (Result.back() != ')') @@ -3689,6 +3691,31 @@ const RawComment *clang::getParameterComment( return nullptr; } +static void AddOverloadAggregateChunks(const RecordDecl *RD, + const PrintingPolicy &Policy, + CodeCompletionBuilder &Result, + unsigned CurrentArg) { + unsigned ChunkIndex = 0; + auto AddChunk = [&](llvm::StringRef Placeholder) { + if (ChunkIndex > 0) + Result.AddChunk(CodeCompletionString::CK_Comma); + const char *Copy = Result.getAllocator().CopyString(Placeholder); + if (ChunkIndex == CurrentArg) + Result.AddCurrentParameterChunk(Copy); + else + Result.AddPlaceholderChunk(Copy); + ++ChunkIndex; + }; + // Aggregate initialization has all bases followed by all fields. + // (Bases are not legal in C++11 but in that case we never get here). + if (auto *CRD = llvm::dyn_cast(RD)) { + for (const auto &Base : CRD->bases()) + AddChunk(Base.getType().getAsString(Policy)); + } + for (const auto &Field : RD->fields()) + AddChunk(FormatFunctionParameter(Policy, Field)); +} + /// Add function overload parameter chunks to the given code completion /// string. static void AddOverloadParameterChunks(ASTContext &Context, @@ -3698,6 +3725,11 @@ static void AddOverloadParameterChunks(ASTContext &Context, CodeCompletionBuilder &Result, unsigned CurrentArg, unsigned Start = 0, bool InOptional = false) { + if (!Function && !Prototype) { + Result.AddChunk(CodeCompletionString::CK_CurrentParameter, "..."); + return; + } + bool FirstParameter = true; unsigned NumParams = Function ? Function->getNumParams() : Prototype->getNumParams(); @@ -3851,22 +3883,13 @@ CodeCompleteConsumer::OverloadCandidate::CreateSignatureString( FunctionDecl *FDecl = getFunction(); const FunctionProtoType *Proto = - dyn_cast(getFunctionType()); - if (!FDecl && !Proto) { - // Function without a prototype. Just give the return type and a - // highlighted ellipsis. - const FunctionType *FT = getFunctionType(); - Result.AddResultTypeChunk(Result.getAllocator().CopyString( - FT->getReturnType().getAsString(Policy))); - Result.AddChunk(Braced ? CodeCompletionString::CK_LeftBrace - : CodeCompletionString::CK_LeftParen); - Result.AddChunk(CodeCompletionString::CK_CurrentParameter, "..."); - Result.AddChunk(Braced ? CodeCompletionString::CK_RightBrace - : CodeCompletionString::CK_RightParen); - return Result.TakeString(); - } + dyn_cast_or_null(getFunctionType()); - if (FDecl) { + // First, the name/type of the callee. + if (getKind() == CK_Aggregate) { + Result.AddTextChunk( + Result.getAllocator().CopyString(getAggregate()->getName())); + } else if (FDecl) { if (IncludeBriefComments) { if (auto RC = getParameterComment(S.getASTContext(), *this, CurrentArg)) Result.addBriefComment(RC->getBriefText(S.getASTContext())); @@ -3878,14 +3901,19 @@ CodeCompleteConsumer::OverloadCandidate::CreateSignatureString( FDecl->getDeclName().print(OS, Policy); Result.AddTextChunk(Result.getAllocator().CopyString(OS.str())); } else { + // Function without a declaration. Just give the return type. Result.AddResultTypeChunk(Result.getAllocator().CopyString( - Proto->getReturnType().getAsString(Policy))); + getFunctionType()->getReturnType().getAsString(Policy))); } + // Next, the brackets and parameters. Result.AddChunk(Braced ? CodeCompletionString::CK_LeftBrace : CodeCompletionString::CK_LeftParen); - AddOverloadParameterChunks(S.getASTContext(), Policy, FDecl, Proto, Result, - CurrentArg); + if (getKind() == CK_Aggregate) + AddOverloadAggregateChunks(getAggregate(), Policy, Result, CurrentArg); + else + AddOverloadParameterChunks(S.getASTContext(), Policy, FDecl, Proto, Result, + CurrentArg); Result.AddChunk(Braced ? CodeCompletionString::CK_RightBrace : CodeCompletionString::CK_RightParen); @@ -5926,18 +5954,18 @@ static QualType getParamType(Sema &SemaRef, // overload candidates. QualType ParamType; for (auto &Candidate : Candidates) { - // FIXME: handle non-type-template-parameters by merging with D116326 - if (const auto *FType = Candidate.getFunctionType()) - if (const auto *Proto = dyn_cast(FType)) - if (N < Proto->getNumParams()) { - if (ParamType.isNull()) - ParamType = Proto->getParamType(N); - else if (!SemaRef.Context.hasSameUnqualifiedType( - ParamType.getNonReferenceType(), - Proto->getParamType(N).getNonReferenceType())) - // Otherwise return a default-constructed QualType. - return QualType(); - } + QualType CandidateParamType = Candidate.getParamType(N); + if (CandidateParamType.isNull()) + continue; + if (ParamType.isNull()) { + ParamType = CandidateParamType; + continue; + } + if (!SemaRef.Context.hasSameUnqualifiedType( + ParamType.getNonReferenceType(), + CandidateParamType.getNonReferenceType())) + // Two conflicting types, give up. + return QualType(); } return ParamType; @@ -6058,6 +6086,73 @@ QualType Sema::ProduceCallSignatureHelp(Expr *Fn, ArrayRef Args, return !CandidateSet.empty() ? ParamType : QualType(); } +// Determine which param to continue aggregate initialization from after +// a designated initializer. +// +// Given struct S { int a,b,c,d,e; }: +// after `S{.b=1,` we want to suggest c to continue +// after `S{.b=1, 2,` we continue with d (this is legal C and ext in C++) +// after `S{.b=1, .a=2,` we continue with b (this is legal C and ext in C++) +// +// Possible outcomes: +// - we saw a designator for a field, and continue from the returned index. +// Only aggregate initialization is allowed. +// - we saw a designator, but it was complex or we couldn't find the field. +// Only aggregate initialization is possible, but we can't assist with it. +// Returns an out-of-range index. +// - we saw no designators, just positional arguments. +// Returns None. +static llvm::Optional +getNextAggregateIndexAfterDesignatedInit(const ResultCandidate &Aggregate, + ArrayRef Args) { + static constexpr unsigned Invalid = std::numeric_limits::max(); + assert(Aggregate.getKind() == ResultCandidate::CK_Aggregate); + + // Look for designated initializers. + // They're in their syntactic form, not yet resolved to fields. + IdentifierInfo *DesignatedFieldName = nullptr; + unsigned ArgsAfterDesignator = 0; + for (const Expr *Arg : Args) { + if (const auto *DIE = dyn_cast(Arg)) { + if (DIE->size() == 1 && DIE->getDesignator(0)->isFieldDesignator()) { + DesignatedFieldName = DIE->getDesignator(0)->getFieldName(); + ArgsAfterDesignator = 0; + } else { + return Invalid; // Complicated designator. + } + } else if (isa(Arg)) { + return Invalid; // Unsupported. + } else { + ++ArgsAfterDesignator; + } + } + if (!DesignatedFieldName) + return llvm::None; + + // Find the index within the class's fields. + // (Probing getParamDecl() directly would be quadratic in number of fields). + unsigned DesignatedIndex = 0; + const FieldDecl *DesignatedField = nullptr; + for (const auto *Field : Aggregate.getAggregate()->fields()) { + if (Field->getIdentifier() == DesignatedFieldName) { + DesignatedField = Field; + break; + } + ++DesignatedIndex; + } + if (!DesignatedField) + return Invalid; // Designator referred to a missing field, give up. + + // Find the index within the aggregate (which may have leading bases). + unsigned AggregateSize = Aggregate.getNumParams(); + while (DesignatedIndex < AggregateSize && + Aggregate.getParamDecl(DesignatedIndex) != DesignatedField) + ++DesignatedIndex; + + // Continue from the index after the last named field. + return DesignatedIndex + ArgsAfterDesignator + 1; +} + QualType Sema::ProduceConstructorSignatureHelp(QualType Type, SourceLocation Loc, ArrayRef Args, @@ -6065,48 +6160,72 @@ QualType Sema::ProduceConstructorSignatureHelp(QualType Type, bool Braced) { if (!CodeCompleter) return QualType(); + SmallVector Results; // A complete type is needed to lookup for constructors. - CXXRecordDecl *RD = - isCompleteType(Loc, Type) ? Type->getAsCXXRecordDecl() : nullptr; + RecordDecl *RD = + isCompleteType(Loc, Type) ? Type->getAsRecordDecl() : nullptr; if (!RD) return Type; - // FIXME: we don't support signature help for aggregate initialization, so - // don't offer a confusing partial list (e.g. the copy constructor). - if (Braced && RD->isAggregate()) - return Type; + CXXRecordDecl *CRD = dyn_cast(RD); + + // Consider aggregate initialization. + // We don't check that types so far are correct. + // We also don't handle C99/C++17 brace-elision, we assume init-list elements + // are 1:1 with fields. + // FIXME: it would be nice to support "unwrapping" aggregates that contain + // a single subaggregate, like std::array -> T __elements[N]. + if (Braced && !RD->isUnion() && + (!LangOpts.CPlusPlus || (CRD && CRD->isAggregate()))) { + ResultCandidate AggregateSig(RD); + unsigned AggregateSize = AggregateSig.getNumParams(); + + if (auto NextIndex = + getNextAggregateIndexAfterDesignatedInit(AggregateSig, Args)) { + // A designator was used, only aggregate init is possible. + if (*NextIndex >= AggregateSize) + return Type; + Results.push_back(AggregateSig); + return ProduceSignatureHelp(*this, Results, *NextIndex, OpenParLoc, + Braced); + } + + // Describe aggregate initialization, but also constructors below. + if (Args.size() < AggregateSize) + Results.push_back(AggregateSig); + } // FIXME: Provide support for member initializers. // FIXME: Provide support for variadic template constructors. - OverloadCandidateSet CandidateSet(Loc, OverloadCandidateSet::CSK_Normal); - - for (NamedDecl *C : LookupConstructors(RD)) { - if (auto *FD = dyn_cast(C)) { - // FIXME: we can't yet provide correct signature help for initializer - // list constructors, so skip them entirely. - if (Braced && LangOpts.CPlusPlus && isInitListConstructor(FD)) - continue; - AddOverloadCandidate(FD, DeclAccessPair::make(FD, C->getAccess()), Args, - CandidateSet, - /*SuppressUserConversions=*/false, - /*PartialOverloading=*/true, - /*AllowExplicit*/ true); - } else if (auto *FTD = dyn_cast(C)) { - if (Braced && LangOpts.CPlusPlus && - isInitListConstructor(FTD->getTemplatedDecl())) - continue; + if (CRD) { + OverloadCandidateSet CandidateSet(Loc, OverloadCandidateSet::CSK_Normal); + for (NamedDecl *C : LookupConstructors(CRD)) { + if (auto *FD = dyn_cast(C)) { + // FIXME: we can't yet provide correct signature help for initializer + // list constructors, so skip them entirely. + if (Braced && LangOpts.CPlusPlus && isInitListConstructor(FD)) + continue; + AddOverloadCandidate(FD, DeclAccessPair::make(FD, C->getAccess()), Args, + CandidateSet, + /*SuppressUserConversions=*/false, + /*PartialOverloading=*/true, + /*AllowExplicit*/ true); + } else if (auto *FTD = dyn_cast(C)) { + if (Braced && LangOpts.CPlusPlus && + isInitListConstructor(FTD->getTemplatedDecl())) + continue; - AddTemplateOverloadCandidate( - FTD, DeclAccessPair::make(FTD, C->getAccess()), - /*ExplicitTemplateArgs=*/nullptr, Args, CandidateSet, - /*SuppressUserConversions=*/false, - /*PartialOverloading=*/true); + AddTemplateOverloadCandidate( + FTD, DeclAccessPair::make(FTD, C->getAccess()), + /*ExplicitTemplateArgs=*/nullptr, Args, CandidateSet, + /*SuppressUserConversions=*/false, + /*PartialOverloading=*/true); + } } + mergeCandidatesWithResults(*this, Results, CandidateSet, Loc, Args.size()); } - SmallVector Results; - mergeCandidatesWithResults(*this, Results, CandidateSet, Loc, Args.size()); return ProduceSignatureHelp(*this, Results, Args.size(), OpenParLoc, Braced); } diff --git a/clang/test/CodeCompletion/ctor-signature.cpp b/clang/test/CodeCompletion/ctor-signature.cpp index b02c8811bbcf..8f0cfacfc115 100644 --- a/clang/test/CodeCompletion/ctor-signature.cpp +++ b/clang/test/CodeCompletion/ctor-signature.cpp @@ -42,13 +42,29 @@ int b3 = consumeBar({}); // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:41:22 %s | FileCheck -check-prefix=CHECK-BRACED %s struct Aggregate { - // FIXME: no support for aggregates yet. - // CHECK-AGGREGATE-NOT: OVERLOAD: Aggregate{<#const Aggregate &#>} - // CHECK-AGGREGATE-NOT: OVERLOAD: {{.*}}first int first; int second; + int third; }; -Aggregate a{}; -// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:52:13 %s | FileCheck -check-prefix=CHECK-AGGREGATE %s +Aggregate a{1, 2, 3}; +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:50:13 %s | FileCheck -check-prefix=CHECK-AGGREGATE-1 %s +// CHECK-AGGREGATE-1: OVERLOAD: Aggregate{<#int first#>, int second, int third} +// CHECK-AGGREGATE-1: OVERLOAD: Aggregate{<#const Aggregate &#>} +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:50:16 %s | FileCheck -check-prefix=CHECK-AGGREGATE-2 %s +// CHECK-AGGREGATE-2: OVERLOAD: Aggregate{int first, <#int second#>, int third} +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:50:18 %s | FileCheck -check-prefix=CHECK-AGGREGATE-3 %s +// CHECK-AGGREGATE-3: OVERLOAD: Aggregate{int first, int second, <#int third#>} +Aggregate d{.second=1, .first=2, 3, 4, }; +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:59:13 %s | FileCheck -check-prefix=CHECK-DESIG-1 %s +// CHECK-DESIG-1: OVERLOAD: Aggregate{<#int first#>, int second, int third} +// CHECK-DESIG-1: OVERLOAD: Aggregate{<#const Aggregate &#>} +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:59:24 %s | FileCheck -check-prefix=CHECK-DESIG-2 %s +// CHECK-DESIG-2: OVERLOAD: Aggregate{int first, int second, <#int third#>} +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:59:34 %s | FileCheck -check-prefix=CHECK-DESIG-3 %s +// CHECK-DESIG-3: OVERLOAD: Aggregate{int first, <#int second#>, int third} +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:59:37 %s | FileCheck -check-prefix=CHECK-DESIG-4 %s +// CHECK-DESIG-4: OVERLOAD: Aggregate{int first, int second, <#int third#>} +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:59:38 %s | FileCheck -check-prefix=CHECK-DESIG-5 %s --allow-empty +// CHECK-DESIG-5-NOT: OVERLOAD diff --git "a/tl\033" "b/tl\033" new file mode 100644 index 000000000000..1a124a4c5c13 --- /dev/null +++ "b/tl\033" @@ -0,0 +1,1381 @@ +2a92efd0a239 (HEAD -> main, origin/main) HEAD@{0}: rebase (finish): returning to refs/heads/main +2a92efd0a239 (HEAD -> main, origin/main) HEAD@{1}: rebase (pick): [CodeComplete] drop unused Scope param. NFC +1379eb577607 HEAD@{2}: rebase (start): checkout origin/main +1e3d96c67ff9 HEAD@{3}: commit: [CodeComplete] drop unused Scope param. NFC +6231ef262415 HEAD@{4}: rebase (finish): returning to refs/heads/main +6231ef262415 HEAD@{5}: rebase (start): checkout origin/main +f1f5a85af8be HEAD@{6}: checkout: moving from aggregates to main +c8b1ec7561fe (aggregates) HEAD@{7}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. +fee43399f0af HEAD@{8}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. +daf114e5c347 HEAD@{9}: rebase (continue) (finish): returning to refs/heads/aggregates +daf114e5c347 HEAD@{10}: rebase (continue): [CodeCompletion] Signature help for aggregate initialization. +f2b3e25f860e (configcompiler) HEAD@{11}: rebase (start): checkout origin/main +4f17932fb479 HEAD@{12}: checkout: moving from configcompiler to aggregates +f2b3e25f860e (configcompiler) HEAD@{13}: rebase (finish): returning to refs/heads/configcompiler +f2b3e25f860e (configcompiler) HEAD@{14}: rebase (pick): [clangd] Add CompileFlags.Compiler option to override argv0 +f4ef79306cee HEAD@{15}: rebase (start): checkout origin/main +6443bd3db307 HEAD@{16}: commit (amend): [clangd] Add CompileFlags.Compiler option to override argv0 +0fa6fc0238fe HEAD@{17}: reset: moving to HEAD +0fa6fc0238fe HEAD@{18}: checkout: moving from main to configcompiler +f1f5a85af8be HEAD@{19}: rebase (finish): returning to refs/heads/main +f1f5a85af8be HEAD@{20}: rebase (start): checkout origin/main +09f8315bba39 (arraytype) HEAD@{21}: checkout: moving from bracehelp to main +a61f34ea2502 (bracehelp) HEAD@{22}: commit: [clangd] Fix windows build after 478863ef58c7f7314e06 +92417eaf3329 HEAD@{23}: rebase (finish): returning to refs/heads/bracehelp +92417eaf3329 HEAD@{24}: rebase (pick): [CodeCompletion] Signature help for braced constructor calls +a390c9905d4d HEAD@{25}: rebase (start): checkout origin/main +8da663369977 HEAD@{26}: commit (amend): [CodeCompletion] Signature help for braced constructor calls +9ee52e712414 HEAD@{27}: rebase (continue) (finish): returning to refs/heads/bracehelp +9ee52e712414 HEAD@{28}: rebase (continue): [CodeCompletion] Signature help for braced constructor calls +364eb371012b HEAD@{29}: rebase (start): checkout origin/main +b245d1eaec2d HEAD@{30}: checkout: moving from iwyustdlib to bracehelp +478863ef58c7 (iwyustdlib) HEAD@{31}: commit (amend): [clangd] Basic IncludeCleaner support for c/c++ standard library +ee8a314f09c0 HEAD@{32}: rebase (finish): returning to refs/heads/iwyustdlib +ee8a314f09c0 HEAD@{33}: rebase (pick): [clangd] Basic IncludeCleaner support for c/c++ standard library +b9ed95afc4b1 HEAD@{34}: rebase (start): checkout origin/main +f038610fb5f3 HEAD@{35}: checkout: moving from insertion_point to iwyustdlib +fe68088d44f7 (insertion_point) HEAD@{36}: rebase (finish): returning to refs/heads/insertion_point +fe68088d44f7 (insertion_point) HEAD@{37}: rebase (pick): [clangd] Helper for determining member insertion point. +9e6f88b31a7f (tidydiags) HEAD@{38}: rebase (start): checkout origin/main +aacd98d5b867 HEAD@{39}: checkout: moving from tidydiags to insertion_point +9e6f88b31a7f (tidydiags) HEAD@{40}: commit (amend): [clangd] Respect .clang-tidy ExtraArgs (-Wfoo only) when producing diagnostics +e9211c3dd6ba HEAD@{41}: rebase (finish): returning to refs/heads/tidydiags +e9211c3dd6ba HEAD@{42}: rebase (pick): [clangd] Respect .clang-tidy ExtraArgs (-Wfoo only) when producing diagnostics +7505aeefc4e6 HEAD@{43}: rebase (start): checkout origin/main +53abaad295f4 HEAD@{44}: checkout: moving from aggregates to tidydiags +4f17932fb479 HEAD@{45}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. +9cf82ca7e4ee HEAD@{46}: checkout: moving from tmplargs to aggregates +cd45e8c7bc16 (tmplargs) HEAD@{47}: rebase (finish): returning to refs/heads/tmplargs +cd45e8c7bc16 (tmplargs) HEAD@{48}: rebase (pick): [CodeCompletion] Signature help for template argument lists +3a33c0b1ce0d HEAD@{49}: rebase (start): checkout origin/main +36da2251bd60 HEAD@{50}: commit (amend): [CodeCompletion] Signature help for template argument lists +ef7f8bce7503 HEAD@{51}: checkout: moving from arcpatch-D116218 to tmplargs +f2b2aae6843b (arcpatch-D116218) HEAD@{52}: commit (amend): [clangd] Fix selection on multi-dimensional array. +50f8215cc9be HEAD@{53}: commit (amend): [clangd] Fix selection on multi-dimensional array. (alternate version) +85244a21fd16 HEAD@{54}: commit (amend): [clangd] Fix selection on multi-dimensional array. (alternate version) +169e8e0af680 HEAD@{55}: rebase (finish): returning to refs/heads/arcpatch-D116218 +169e8e0af680 HEAD@{56}: rebase (pick): [clangd] Fix selection on multi-dimensional array. +ca271f4ef5a2 HEAD@{57}: rebase (start): checkout origin/main +70d0857a4dea HEAD@{58}: commit: [clangd] Fix selection on multi-dimensional array. +09f8315bba39 (arraytype) HEAD@{59}: checkout: moving from main to arcpatch-D116218 +09f8315bba39 (arraytype) HEAD@{60}: checkout: moving from tmplargs to main +ef7f8bce7503 HEAD@{61}: commit (amend): [CodeCompletion] Signature help for template argument lists +a7b31d694812 HEAD@{62}: checkout: moving from insertion_point to tmplargs +aacd98d5b867 HEAD@{63}: commit (amend): [clangd] Helper for determining member insertion point. +ac972fe4ff15 HEAD@{64}: checkout: moving from main to insertion_point +09f8315bba39 (arraytype) HEAD@{65}: reset: moving to HEAD +09f8315bba39 (arraytype) HEAD@{66}: checkout: moving from constructor to main +41fbc109a1ae (constructor) HEAD@{67}: commit (amend): [clangd] Add code action to generate a constructor for a C++ class +8e709f570606 HEAD@{68}: commit (amend): [clangd] Add code action to generate a constructor for a C++ class +456dc7755f32 HEAD@{69}: commit: [clangd] Add code action to generate a constructor for a C++ class +ac972fe4ff15 HEAD@{70}: checkout: moving from insertion_point to constructor +ac972fe4ff15 HEAD@{71}: checkout: moving from constructor to insertion_point +09f8315bba39 (arraytype) HEAD@{72}: reset: moving to HEAD +09f8315bba39 (arraytype) HEAD@{73}: checkout: moving from constructor to constructor +09f8315bba39 (arraytype) HEAD@{74}: reset: moving to HEAD~1 +aa6435e963ca HEAD@{75}: checkout: moving from insertion_point to constructor +ac972fe4ff15 HEAD@{76}: checkout: moving from constructor to insertion_point +aa6435e963ca HEAD@{77}: commit (amend): [clangd] Helper for determining member insertion point. +45d6b0cd4780 HEAD@{78}: commit (amend): [clangd] Helper for determining member insertion point. +ac972fe4ff15 HEAD@{79}: checkout: moving from insertion_point to constructor +ac972fe4ff15 HEAD@{80}: checkout: moving from specialmember to insertion_point +60a028a904d5 (specialmember) HEAD@{81}: commit (amend): [clangd] Code action to declare missing move/copy constructor/assignment +939996aed14e HEAD@{82}: checkout: moving from 939996aed14ec84df8cce3f4a5ec4988c4a1f564 to specialmember +939996aed14e HEAD@{83}: rebase (pick): [clangd] Code action to declare missing move/copy constructor/assignment +ac972fe4ff15 HEAD@{84}: rebase (start): checkout insertion_point +bbeef89ae1af HEAD@{85}: checkout: moving from specialmember to bbeef89ae1af +ac972fe4ff15 HEAD@{86}: rebase (finish): returning to refs/heads/specialmember +ac972fe4ff15 HEAD@{87}: rebase (start): checkout insertion_point +bbeef89ae1af HEAD@{88}: checkout: moving from insertion_point to specialmember +ac972fe4ff15 HEAD@{89}: commit (amend): [clangd] Helper for determining member insertion point. +0eac12f86ab3 HEAD@{90}: commit (amend): [clangd] Helper for determining member insertion point. +156bab8c3ab7 HEAD@{91}: commit (amend): [clangd] Helper for determining member insertion point. +da546cc68656 HEAD@{92}: commit (amend): [clangd] Helper for determining member insertion point. +407f5558b48c HEAD@{93}: commit: [clangd] Helper for determining member insertion point. +09f8315bba39 (arraytype) HEAD@{94}: checkout: moving from main to insertion_point +09f8315bba39 (arraytype) HEAD@{95}: checkout: moving from specialmember to main +bbeef89ae1af HEAD@{96}: commit (amend): [clangd] Code action to declare missing move/copy constructor/assignment +a66453e487e3 HEAD@{97}: reset: moving to HEAD +a66453e487e3 HEAD@{98}: commit (amend): [clangd] Code action to declare missing move/copy constructor/assignment +31c647f871a8 HEAD@{99}: commit (amend): [clangd] Code action to declare missing move/copy constructor/assignment +500372f1ac6d HEAD@{100}: reset: moving to HEAD +500372f1ac6d HEAD@{101}: commit (amend): [clangd] Code action to declare missing move/copy constructor/assignment +174dac9746f1 HEAD@{102}: commit (amend): [clangd] Code action to declare missing move/copy constructor/assignment +34bba952dadc HEAD@{103}: commit (amend): [clangd] Code action to declare missing move/copy constructor/assignment +8b2288785c88 HEAD@{104}: commit: [clangd] Code action to declare missing move/copy constructor/assignment +09f8315bba39 (arraytype) HEAD@{105}: checkout: moving from main to specialmember +09f8315bba39 (arraytype) HEAD@{106}: checkout: moving from typeDefinition to main +6fbb2e3eca26 (typeDefinition) HEAD@{107}: commit (amend): [clangd] Implement textDocument/typeDefinition +1ea84876711e HEAD@{108}: commit (amend): [clangd] Implement textDocument/typeDefinition +2bf2e73c73d9 HEAD@{109}: commit (amend): [clangd] Implement textDocument/typeDefinition +d15e5a597103 HEAD@{110}: commit (amend): [clangd] Implement textDocument/typeDefinition +494458626828 HEAD@{111}: commit: [clangd] Implement textDocument/typeDefinition +09f8315bba39 (arraytype) HEAD@{112}: checkout: moving from main to typeDefinition +09f8315bba39 (arraytype) HEAD@{113}: rebase (finish): returning to refs/heads/main +09f8315bba39 (arraytype) HEAD@{114}: rebase (start): checkout origin/main +72ea6fbc150a HEAD@{115}: checkout: moving from arraytype to main +09f8315bba39 (arraytype) HEAD@{116}: rebase (finish): returning to refs/heads/arraytype +09f8315bba39 (arraytype) HEAD@{117}: rebase (pick): [Sema] a[x] has type T when a has type T* or T[], even when T is dependent +ed67d5a03aaf HEAD@{118}: rebase (start): checkout origin/main +5c3e13fb9825 HEAD@{119}: commit (amend): [Sema] a[x] has type T when a has type T* or T[], even when T is dependent +991036e41b3b HEAD@{120}: commit (amend): [Sema] a[x] has type T when a has type T* or T[], even when T is dependent +47ffbac82a3f HEAD@{121}: commit (amend): [Sema] a[x] has type T when a has type T* or T[], even when T is dependent +9923e86a3a96 HEAD@{122}: rebase (continue) (finish): returning to refs/heads/arraytype +9923e86a3a96 HEAD@{123}: rebase (continue): [Sema] a[x] has type T when a has type T* or T[], even when T is dependent +15787ccd4574 HEAD@{124}: rebase (start): checkout origin/main +1dc8f4774d34 HEAD@{125}: rebase (abort): updating HEAD +0651768d7a19 HEAD@{126}: rebase (pick): updated suggesting/coloring of call & return args & implicit operands. +f86d65195716 HEAD@{127}: rebase (pick): updated suggesting/coloring of call & return args & implicit operands +3002813063a8 HEAD@{128}: rebase (pick): --changed Sugesting colors for method calls/return values etc. +7bdf5ba01bb0 HEAD@{129}: rebase (pick): fixed a coalscing bug +7524a1746083 HEAD@{130}: rebase (pick): Add library +ba28b47cb919 HEAD@{131}: rebase (pick): Be const correct +1aa4098bafea HEAD@{132}: rebase (pick): Minor code cleanups +629281c4710d HEAD@{133}: rebase (pick): Add cast_or_null & dyn_cast_or_null +24c3a0a84fda HEAD@{134}: rebase (pick): Implement initializers for structs and pointers +2f93ba463315 HEAD@{135}: rebase (pick): Rename ConstPoolPointerReference to ConstPoolPointerRef - My fingers get tired typing that much +e58844e57ecb HEAD@{136}: rebase (pick): Improve error messages on assertion failure. +73eab57ce304 HEAD@{137}: rebase (pick): * getExitNode() doesn't exist in method anymore +171cd5f1d612 HEAD@{138}: rebase (pick): Added Instrumentation subdirectory. +2423a863e15b HEAD@{139}: rebase (pick): Implement global variables. Struct and Pointer initializers are not implemented yet though +3af979135686 HEAD@{140}: rebase (pick): Implement linking of global variable constant references +c1129719df3c HEAD@{141}: rebase (pick): Add some more interesting test cases for the linker +23ab0f2c31f9 HEAD@{142}: rebase (pick): Oops, didn't handle hex values correctly. :( +c39415b7c1cd HEAD@{143}: rebase (pick): * Fix the constpoolarray -> c"" printing routines to escape things properly +eb2b5e2b34dd HEAD@{144}: rebase (pick): *** empty log message *** +a28f8e125258 HEAD@{145}: rebase (pick): Minor cleanup +131d908673ef HEAD@{146}: rebase (pick): *** empty log message *** +c11c83a339c8 HEAD@{147}: rebase (pick): Implement linker. It's 95% working now. +30fa72c1feb8 HEAD@{148}: rebase (pick): More interesting testcase +5a055ed280fd HEAD@{149}: rebase (pick): Forward operands into implicit uses as well as explicit ones. +eab25baceb5b HEAD@{150}: rebase (pick): External methods shouldn't have argument lists +f2bd12a6d988 HEAD@{151}: rebase (pick): Update comment, remove misleading method +67bb9adc5a0f HEAD@{152}: rebase (pick): Initializers are not const even if the GV is. +c3d3c0630d9d HEAD@{153}: rebase (pick): Add a new -d argument to dump the internal rep as assembly. +a7b34ac799ce HEAD@{154}: rebase (pick): Cast NULL when requested. +731d883c3187 HEAD@{155}: rebase (pick): Added getEntryNode() and getExitNode() functions. +0003ef936aab HEAD@{156}: rebase (pick): Insert code to trace values at basic block and method exits. +a4927eee849c HEAD@{157}: rebase (pick): Insert code to trace values at basic block and method exits. +59a501e47f06 HEAD@{158}: rebase (pick): Added routine to create a char array for a string. +f3328d15f543 HEAD@{159}: rebase (pick): Added routine to create a char array for a string. +1b48aa670b0f HEAD@{160}: rebase (pick): Enable most tests. +b578289a8fa8 HEAD@{161}: rebase (pick): Added a string global variable. +86d5a822efcc HEAD@{162}: rebase (pick): Two bug fixes that were suppressing some "load-constant-into-register" instrs. +ae10fbb5bb27 HEAD@{163}: rebase (pick): Move the burg file to here. Add .in suffix to indicate that it gets +42fcb2d89630 HEAD@{164}: rebase (pick): Make the sparc.burg file be a little more flexible and rubust in the fact of +e5eb3fe6f018 HEAD@{165}: rebase (pick): Use the instruction.def file to remain up to date with future instruction +a95ca89e8976 HEAD@{166}: rebase (pick): New file to define instructions... +1a9806113e30 HEAD@{167}: rebase (pick): Burg files should come out of the Debug Directory for temporary files +a1012b17f9a7 HEAD@{168}: rebase (pick): New module linking functionality prototype +5726d1d2ecd4 HEAD@{169}: rebase (pick): Check in makefile +bfc372b3a5a3 HEAD@{170}: rebase (pick): Fixed tags target so it only happens at root level. +ad26264a523c HEAD@{171}: rebase (pick): Add C source for testmisc.ll +9c5a5f970837 HEAD@{172}: rebase (pick): Dang, I screwed up the merge. This should be better +db6e9ecc453f HEAD@{173}: rebase (pick): New testcase for testing constant pointers to globals +5eff5faafba2 HEAD@{174}: rebase (pick): Test files for linker +77a7c277d54d HEAD@{175}: rebase (pick): MethodTypes take an explicit isVarArg argument +19293514b699 HEAD@{176}: rebase (pick): Fix comment flyer +684125529570 HEAD@{177}: rebase (pick): Add new linker +cff52fd4a48a HEAD@{178}: rebase (pick): Build the new linker +cba92a5489f2 HEAD@{179}: rebase (pick): Use null keyword instead of kludge +42c3881f4c41 HEAD@{180}: rebase (pick): Add more function call and prototype specific tests +c82370afa049 HEAD@{181}: rebase (pick): Compile the transforms directory +6dad439c635a HEAD@{182}: rebase (pick): Start of a linker +71585a57f2b0 HEAD@{183}: rebase (pick): Implement the invoke instruction +4aac971feb66 HEAD@{184}: rebase (pick): * Fix a nefarious bugs: TypesEqual was wrong for varargs methods +81374f6531a5 HEAD@{185}: rebase (pick): Convert a runtime check into an assertion +fc856307fe9a HEAD@{186}: rebase (pick): * Add support for Module specific constants +5119ee94dd54 HEAD@{187}: rebase (pick): Add new TerminatorInst ctor for invoke +97aceab30ca0 HEAD@{188}: rebase (pick): * Fix TODO +fdd33fff63c6 HEAD@{189}: rebase (pick): Fix broken #endif +6eab48b3c68d HEAD@{190}: rebase (pick): * Add #include +d64929f66211 HEAD@{191}: rebase (pick): Add StringList support +d5f1339c1461 HEAD@{192}: rebase (pick): Support the invoke instruction +362b89b2697e HEAD@{193}: rebase (pick): Support indirect calls +250990a3ef85 HEAD@{194}: rebase (pick): not is a keyword in ansi C++, avoid it +cad98049b01e HEAD@{195}: rebase (pick): * Fix privacy issues on RegToRefVecMap +6d8a50fb7185 HEAD@{196}: rebase (pick): * Use new style casts more +d5ef68f42b47 HEAD@{197}: rebase (pick): * Add real support for global variable addresses initializing constants +5b89a0710636 HEAD@{198}: rebase (pick): * Support writing GlobalVariables with info comments by them +38600d48ce25 HEAD@{199}: rebase (pick): * Add support for forward references of global variable addresses +30567de7ef54 HEAD@{200}: rebase (pick): Add operator< to ValID's so that they can be put in map's +c4253f651f13 HEAD@{201}: rebase (pick): Remove exception specification +e46a527bd890 HEAD@{202}: rebase (pick): Support the new Invoke instruction +dbf3974c7876 HEAD@{203}: rebase (pick): Support pointers to globals happily +f72067424d95 HEAD@{204}: rebase (pick): Fix code to make GCC 2.96 happy +ec668ae234aa HEAD@{205}: rebase (pick): * Add support for Invoke instructions +b92a0735743c HEAD@{206}: rebase (pick): Fix filename in comment +eac143eefddc HEAD@{207}: rebase (pick): Better linux support. This file still sucks +98503c7ebc77 HEAD@{208}: rebase (pick): Fix broken #endif +ca3d924e3846 HEAD@{209}: rebase (pick): not is a keyword in Ansi C++. Avoid it +c168bc53e09e HEAD@{210}: rebase (pick): Clean up initializers for GCC 2.96 +b54fa1a20171 HEAD@{211}: rebase (pick): Remove exception specification. Only slows code down. +ed95b6657e6b HEAD@{212}: rebase (pick): Changes to compile with GCC 2.96 +c22edf4bc5a2 HEAD@{213}: rebase (pick): Add comment indicating semantics of indirect calls +4dcafac17dcb HEAD@{214}: rebase (pick): New ctor for invoke inst +a8a651345904 HEAD@{215}: rebase (pick): Add support for indirect calls +af0d7630a30d HEAD@{216}: rebase (pick): Add some casts to make GCC 2.96 happy. +868db5e40c09 HEAD@{217}: rebase (pick): Add use_back() methods +08696c9b3a19 HEAD@{218}: rebase (pick): Add classof implementations for User +3776f284eb1a HEAD@{219}: rebase (pick): Expose typedefs +d5660029e7f9 HEAD@{220}: rebase (pick): Add support for module local constants +8f28f49eecf5 HEAD@{221}: rebase (pick): Add new opcode for Invoke instruction +c6c0d280af0b HEAD@{222}: rebase (pick): Minor changes, add new ctor for invoke instruction +f230dca276c8 HEAD@{223}: rebase (pick): Add assertions +c4ea40ffae4f HEAD@{224}: rebase (pick): * Minor Formatting changes. +e4f89d5176af HEAD@{225}: rebase (pick): * Add destroyConstant stuff to handle module local constants +0a73f5e2e880 HEAD@{226}: rebase (pick): Update todo's +b66fb116fe18 HEAD@{227}: rebase (pick): Each tools should not make tags +969240424993 HEAD@{228}: rebase (pick): --corrected coalescing test: coalsed only if two are of the same reg class +54622d353dc1 HEAD@{229}: rebase (pick): added support for implict operands in machine instruction +af225afe483a HEAD@{230}: rebase (pick): --added support for implicit operands in machine instructions +4c1eeb2f0207 HEAD@{231}: rebase (pick): Delete *.s on clean. +e0e2c0de0d59 HEAD@{232}: rebase (pick): Record implicitRefs for each machine instruction instead of +c7344856e2e2 HEAD@{233}: rebase (pick): Add graph edges due to implicit refs in each machine instruction. +da6e725984b0 HEAD@{234}: rebase (pick): Added a rule for building TAGS. +84249865be76 HEAD@{235}: rebase (pick): Repeat some libs due to circular dependences between Sparc and other +b41937df9bac HEAD@{236}: rebase (pick): Don't insert useful instructions in delay slot of a RETURN. +8ec3840fd358 HEAD@{237}: rebase (pick): Insert code to load constants used as Call or Return arguments. +dfb65425ee14 HEAD@{238}: rebase (pick): Machine-independent code generation routines used in instruction +e8a1ea03539a HEAD@{239}: rebase (pick): Moved code generation support routines to InstrSelectionSupport.{h,cpp}. +920028cc0b2f HEAD@{240}: rebase (pick): Moved code generation support routines to InstrSelectionSupport.cpp. +362badd47ffe HEAD@{241}: rebase (pick): Moved first function to "simpleadd.ll". +e3a87d5e89a0 HEAD@{242}: rebase (pick): testmemory and sumarray now work with instruction selection. +a08813e66ef9 HEAD@{243}: rebase (pick): --removed %g regs being allocated - fix later +576355e82463 HEAD@{244}: rebase (pick): Add hack to get rid of malloc & free instructions for code generation +5566f9c03615 HEAD@{245}: rebase (pick): Add comment +704887cc1858 HEAD@{246}: rebase (pick): Support multiple global's definitions +97e5c873483d HEAD@{247}: rebase (pick): Factor parentness out of Module & GlobalVariable into GlobalValue +370c4a28a876 HEAD@{248}: rebase (pick): Rename getNullPointer to getNull +ddfe3ae972ff HEAD@{249}: rebase (pick): Rename getNullPointer to getNull +7b0ee1e797ab HEAD@{250}: rebase (pick): Allow duplicate constant values as long as they are compatible. +a13bc1844828 HEAD@{251}: rebase (pick): Share ConstPoolPointer elements correctly +bd8752038e30 HEAD@{252}: rebase (pick): Fix broken testcase +398a1a5573f9 HEAD@{253}: rebase (pick): Add check to make sure that we dont reference MEthodType's directly +bdb349a55426 HEAD@{254}: rebase (pick): * Both Method & GlobalVariable now subclass GlobalValue +5c22b0d643af HEAD@{255}: rebase (pick): Adjust test cases to match the fact that methods are now explicit pointer values, not explicit +1f92e9fc5d90 HEAD@{256}: rebase (pick): First try at a horrible global value reference wrapper +ebf7f8fa07e7 HEAD@{257}: rebase (pick): Clean up parser, fix a bug that prevented this from working: +eedd6c7c8622 HEAD@{258}: rebase (pick): * Add support for null as a constant +550de7f1b919 HEAD@{259}: rebase (pick): Modify testcases for new LLVM const syntax +dfe6c7e0aff1 HEAD@{260}: rebase (pick): Commit more code over to new cast style +8d5994a86223 HEAD@{261}: rebase (pick): Convert more code to use new style casts +22c53dc308b0 HEAD@{262}: rebase (pick): Add more support for new style casts +e876f00ebb60 HEAD@{263}: rebase (pick): Add support for new style casts +0b735821091f HEAD@{264}: rebase (pick): Add support for newer cleaner isa, cast, dyn_cast +8f546a6b1eb9 HEAD@{265}: rebase (pick): Update comments +154b8c0b0bdb HEAD@{266}: rebase (pick): Pull predecessor and successor iterators out of the CFG*.h files, and plop them into +96bfa8db5614 HEAD@{267}: rebase (pick): Pull predecessor and successor iterators out of the CFG*.h files, and plop them into +0c5cd66015ba HEAD@{268}: rebase (pick): Comment out a paragraph that refers to a file that no longer exists +bf9adf15ad50 HEAD@{269}: rebase (pick): Fix emission of return instructions +af1ab310689d HEAD@{270}: rebase (pick): Add path to as so it doesn't find llvm as if that path is set. +554b4bc20205 HEAD@{271}: rebase (pick): Exclude a couple of tests that the regalloc stuff doesn't handle yet +2d6c6b32a60e HEAD@{272}: rebase (pick): Add different "cast constant value" for several possible types. +5a0bdbf41700 HEAD@{273}: rebase (pick): Add vector `implicitUses' to class MachineCodeForVMInstr to hold values +69e68114634e HEAD@{274}: rebase (pick): Several fixes: +ecfd19aa7a65 HEAD@{275}: rebase (pick): removing phy regaloc - incorrect file +c9899c19a917 HEAD@{276}: rebase (pick): Change latency of setuw and setsw to 2 cycles. +8e03b2d97f34 HEAD@{277}: rebase (pick): Change ! ( ...== ...) to !=. +aa06d6438043 HEAD@{278}: rebase (pick): Improved dump for disp type operand. +d09bbd3e62ee HEAD@{279}: rebase (pick): Bug fixes: +4542845ffac4 HEAD@{280}: rebase (pick): Minor changes for bug fixes in SchedGraph.cpp. +f2d34339b43a HEAD@{281}: rebase (pick): Two bug fixes: +dadedae23021 HEAD@{282}: rebase (pick): *** empty log message *** +e30f6b836af1 HEAD@{283}: rebase (pick): no major change. +17745bb05c7a HEAD@{284}: rebase (pick): added suggesting color support +0c5afc6b26f2 HEAD@{285}: rebase (pick): added suggesting color suppor +bdaab1203288 HEAD@{286}: rebase (pick): added support for suggesting colors +3061d7a1e42b HEAD@{287}: rebase (pick): --added suggesting colors; call/ret arg handling +f3d3eee7e06a HEAD@{288}: rebase (pick): Add a test for the new null keyword +8e9b70834fa4 HEAD@{289}: rebase (pick): Implement constant pointers, and null specifically in the parser, bytecode writer, and +d20cd6b4422b HEAD@{290}: rebase (pick): Implement a constant pointer value +91bf6d53e2e8 HEAD@{291}: rebase (pick): Pull iterators out of CFG.h and genericize them with GraphTraits +1f5ff53527ab HEAD@{292}: rebase (pick): File #include file +60f364cc5b13 HEAD@{293}: rebase (pick): Pull iterators out of CFG.h and CFGdecls and put them in Support directory +ab4adf7cba15 HEAD@{294}: rebase (pick): * Properly escape function names +b329ccfca12b HEAD@{295}: rebase (pick): Check in bug fix for vadve +3eaa426db4d4 HEAD@{296}: rebase (pick): Add commands to assemble and compile a .ll file +0fd9a3dcc702 HEAD@{297}: rebase (pick): Initial support for construction of a call graph +b3a3ecaf05f7 HEAD@{298}: rebase (pick): Add support to print a call graph, and also add support for module level interprocedural analyses +464bdb4b73aa HEAD@{299}: rebase (pick): Adding the tool to the path doesn't break anything anymore +f1f7f171a7a5 HEAD@{300}: rebase (pick): Make error report a little more useful +58d981ac2a15 HEAD@{301}: rebase (pick): ADCE is broken but at least we know why +dae33afb6ab1 HEAD@{302}: rebase (pick): print out value's by pointer +cb586b4aa067 HEAD@{303}: rebase (pick): Add capability to print out call graph +24c1bbab59ca HEAD@{304}: rebase (pick): Global variables/complex constants have been resolved! +4d13ee0a9344 HEAD@{305}: rebase (pick): -- fixed a ret val bug +19f2d28d3fb2 HEAD@{306}: rebase (pick): -- removed debugging messages +d23e458745cb HEAD@{307}: rebase (pick): -fixed return value bug. +b53ab66b2055 HEAD@{308}: rebase (pick): Add proper support to send output to the right place +1da35ac9ce16 HEAD@{309}: rebase (pick): Print .def files as well as other files +1a7c20d822d2 HEAD@{310}: rebase (pick): Change debug info from #define to command line option +bdd630363635 HEAD@{311}: rebase (pick): Change debug info from #define to command line option +d27bcdc4d564 HEAD@{312}: rebase (pick): * REMOVE extraneous debug info if DEBUG_RA is not set +b58b0442c078 HEAD@{313}: rebase (pick): Seperate instruction definitions into new SparcInstr.def file +84ba33c8b41a HEAD@{314}: rebase (pick): Okay, make the member function work. +c14992951e06 HEAD@{315}: rebase (pick): Remove global debug output fns that have been superceded by a member func +78a5c492e944 HEAD@{316}: rebase (pick): Remove debugging output stuff +3f14f79d64e6 HEAD@{317}: rebase (pick): Emit assembly language from the target... +5a780fe743b5 HEAD@{318}: rebase (pick): Add emitAssembly Method +6d1bd8d21e41 HEAD@{319}: rebase (pick): Add idea +f821fceb8d6a HEAD@{320}: rebase (pick): Add EmitAssembly to mf +d2ccd8e344fc HEAD@{321}: rebase (pick): First cut at assembly output +7cd873804115 HEAD@{322}: rebase (pick): Add emitAssemblyMethod to TargetMachine +8749075054d9 HEAD@{323}: rebase (pick): *** empty log message *** +ca4aeed4cda6 HEAD@{324}: rebase (pick): --added methods to operand class to set/get registers after register allocation +d3262f97ed7a HEAD@{325}: rebase (pick): -- ruchira +983537f3112b HEAD@{326}: rebase (pick): -- updated printing +df8fc0fcada5 HEAD@{327}: rebase (pick): Remove a copy of a bunch of code +5ff0c9da9f43 HEAD@{328}: rebase (pick): C++ gives us auto_ptr's, so we might as well use them. :) +0a6274f4f469 HEAD@{329}: rebase (pick): Fix up code a bit, remove operator<< to Assembly/Writer.h +8ebd15ef9e5b HEAD@{330}: rebase (pick): Remove extraneous #includes +992e6cf11454 HEAD@{331}: rebase (pick): Move operator << from Value.h to Assembly/Writer.h +05c03e0a4a43 HEAD@{332}: rebase (pick): Remove operator << to Assembly/Writer.h +32354c42e162 HEAD@{333}: rebase (pick): Don't check for null on delete +348cbcb3414c HEAD@{334}: rebase (pick): Un-neuter makefile +b9015643ae16 HEAD@{335}: rebase (pick): Minor changes. +31eddde1fbe7 HEAD@{336}: rebase (pick): Folded inssel*.ll into select.ll. +93a7445ced49 HEAD@{337}: rebase (pick): Renamed files to match the primary classes they provide. +73a5ca83c073 HEAD@{338}: rebase (pick): Renamed a header file. +116c6caa7247 HEAD@{339}: rebase (pick): Make class TargetMachine the common interface to all target-dependent +4fc2bc116a7f HEAD@{340}: rebase (pick): Allow pointer constants as well as integer and booleans. +4350d1b2f431 HEAD@{341}: rebase (pick): Make class TargetMachine the common interface to all target-dependent +c3645e342ca4 HEAD@{342}: rebase (pick): Renamed files to match the main classes they provide. +2221c6a54d56 HEAD@{343}: rebase (pick): Cast unsigned to int! It was causing a nice little bug. +3692872402ab HEAD@{344}: rebase (pick): Minor changes. +fdf7be61f2e0 HEAD@{345}: rebase (pick): Don't add instructions to subtree for Phi or Call. +c5ec3128e60a HEAD@{346}: rebase (pick): Format file header. +9bce80700742 HEAD@{347}: rebase (pick): Add new entry/exit edges when removing delay slot nodes from the graph. +0c5c4e8dfb45 HEAD@{348}: rebase (pick): Moved erase edge functions to class SchedGraph. +ad74a2f916dd HEAD@{349}: rebase (pick): Renamed some header files. +6f280562c6f1 HEAD@{350}: rebase (pick): Moved erase-edge functions from SchedGraphNode to SchedGraph. +c20d754ef692 HEAD@{351}: rebase (pick): Moved DebugValue to Value.cpp. +a18896cb69d9 HEAD@{352}: rebase (pick): Added debugging support. +b99a5873a966 HEAD@{353}: rebase (pick): Moved debugging interfaces for class Value to Value.h. +de14aceb2e19 HEAD@{354}: rebase (pick): Minor fixes: renamed target machine files; fold sched info into TargetMachine. +1fabb8f4d05b HEAD@{355}: rebase (pick): Make class TargetMachine the common interface to all target-dependent +004e1e8c9bd5 HEAD@{356}: rebase (pick): Added debugging support. +5308e6f9d6ca HEAD@{357}: rebase (pick): Fix testcases to handle new syntax for construction and initializeation +23bc63990bca HEAD@{358}: rebase (pick): Remove the unsized array constraint +23b021feb086 HEAD@{359}: rebase (pick): Add support for global constants, and for initializers for constants +1f2803d9c6b3 HEAD@{360}: rebase (pick): Add support for global constants, and for initializers for constants +e1fed6f079c9 HEAD@{361}: rebase (pick): added a method to get reg num after register allocation +ae7bbf4710cc HEAD@{362}: rebase (pick): modified machine code printing +13af7a7caac6 HEAD@{363}: rebase (pick): -modified machine operand class - took regNum out of union to set regNum after +6bddc120b229 HEAD@{364}: rebase (pick): modified printing of debug messages +313c2a193181 HEAD@{365}: rebase (pick): --added methods for printing +b8916ea9dfc9 HEAD@{366}: rebase (pick): added setRegForValue to MachineOperand class +072b09e468f8 HEAD@{367}: rebase (pick): fixed printing messages +357bf235defd HEAD@{368}: rebase (pick): -- debug messages dissabled +b3a9794066b2 HEAD@{369}: rebase (pick): added reg alloc support +4ac010f69361 HEAD@{370}: rebase (pick): --reg alloc code added +c7e1696e212a HEAD@{371}: rebase (pick): -reg alloc code +74fe0add218c HEAD@{372}: rebase (pick): added register allocation code +1c1d5b77ea72 HEAD@{373}: rebase (pick): Added regalloc +261723120208 HEAD@{374}: rebase (pick): Oops, accidentally checked my debugging makefile +ccba943ebd24 HEAD@{375}: rebase (pick): Fix a bug with not removing method level types after compilation +32436a343662 HEAD@{376}: rebase (pick): added RegAlloc Directory to DIRS +1c24930f9da4 HEAD@{377}: rebase (pick): *** empty log message *** +cac3722a15a8 HEAD@{378}: rebase (pick): *** empty log message *** +c6554b4537c1 HEAD@{379}: rebase (pick): Remove invalid testcase +847094903baa HEAD@{380}: rebase (pick): Remove invalid testcase. Unneccesary anyways +d71ff5c79c96 HEAD@{381}: rebase (pick): Add new test cases +7789d9c7f54d HEAD@{382}: rebase (pick): Add support for loading and storing pointers... +a3aa024f5831 HEAD@{383}: rebase (pick): Fix a bug that caused a crash if a setcc had zero uses. +c70348cb828c HEAD@{384}: rebase (pick): Add a forward decl, oops. +85c86566e9a5 HEAD@{385}: rebase (pick): Chris seems fond of #include . Fix these. Also convert use list in +3edb0d2e080e HEAD@{386}: rebase (pick): Add a comment +5c8a3647ccb6 HEAD@{387}: rebase (pick): Minor reformatting, & protection fixes +ec87fa4f8523 HEAD@{388}: rebase (pick): Break scheduling infrastructure out of TargetMachine.cpp into SchedInfo.cpp +d589bb98df47 HEAD@{389}: rebase (pick): Split Register specific stuff out from TargetMachine.h to RegInfo.h +ec018be202c8 HEAD@{390}: rebase (pick): Split Target/Machine.h into three files: +53bcc4463c09 HEAD@{391}: rebase (pick): Make a new llvm/Target #include directory. +aaca226978d7 HEAD@{392}: rebase (pick): Checkin changes to: +9cec2d47b443 HEAD@{393}: rebase (pick): Checkin changes to: +3b15eb471b31 HEAD@{394}: rebase (pick): Move files to new sparc directory +11954336afe2 HEAD@{395}: rebase (pick): Move the sparc target to a new lib/Target directory +a8d3715d2038 HEAD@{396}: rebase (pick): Move files. +82cb584aec3c HEAD@{397}: rebase (pick): Move the contents of the CodeGen/TargetMachine/Sparc directory to Target/Sparc +1799226a9df7 HEAD@{398}: rebase (pick): This checkin represents some cleanup of the backend, implementing the following things: +2153a7e280f6 HEAD@{399}: rebase (pick): This checkin represents some cleanup of the backend, implementing the following things: +9936a71b49ba HEAD@{400}: rebase (pick): Updates to use local header files. +ddef6185b427 HEAD@{401}: rebase (pick): Export the instruction forest support from the analysis library +44e4e80c2911 HEAD@{402}: rebase (pick): Initial instruction tree support for the analysis library +001ff12fbe1c HEAD@{403}: rebase (pick): Generic k-way tree support +015b075f7f69 HEAD@{404}: rebase (pick): More cleanups, preparing to revamp InstrForest to, among other things, +d6c5ea5c2392 HEAD@{405}: rebase (pick): * Clean up InstrForest +8f70795fa947 HEAD@{406}: rebase (pick): Eliminate 'BasicNode' from InstrForest. +02e210b78442 HEAD@{407}: rebase (pick): Eliminate MainTreeNode function +bacc3815ee3a HEAD@{408}: rebase (pick): Remove irrelevant gross K&R Cisms +99dec15bddc1 HEAD@{409}: rebase (pick): Handle subtract in expression classifier +9c9d9777ee76 HEAD@{410}: rebase (pick): Disable destructors on constants +58b30135c56a HEAD@{411}: rebase (pick): Use the correct style casts +6fb05a7fb6f1 HEAD@{412}: rebase (pick): Use correct style casts +f6d78c00b28d HEAD@{413}: rebase (pick): Use correct style casts +bd9287aa5602 HEAD@{414}: rebase (pick): Use type checking predicates +b760399acaf2 HEAD@{415}: rebase (pick): Use correct casts +86f6acb766bb HEAD@{416}: rebase (pick): Use predicate for Value type test +b1223a7dc00c HEAD@{417}: rebase (pick): Use predicate for Value type test +5dbd964b9fbc HEAD@{418}: rebase (pick): ModuleTyID doesn't exist anyymore +c583d68d95f8 HEAD@{419}: rebase (pick): getMethodType is now just getType +862b2212c267 HEAD@{420}: rebase (pick): Add support for printing globals +9815c0143466 HEAD@{421}: rebase (pick): Update to use correct type cast +4ebfeafd5ae2 HEAD@{422}: rebase (pick): Add support for global variables +7309e89eeead HEAD@{423}: rebase (pick): * Add capability of printing out a global variable +332d403bc73d HEAD@{424}: rebase (pick): * Method::getType should return type cast as MethodType, eliminate getMethodType +09b1c8b53b5b HEAD@{425}: rebase (pick): Update assertion to allow extra case +c9f650f82da6 HEAD@{426}: rebase (pick): Fix a bug I introduced (assertion failed: Unknown operand type), and convert to predicate style for type checks +ca665a4f7301 HEAD@{427}: rebase (pick): Implement global variable support +78c27fc8588b HEAD@{428}: rebase (pick): Add support for external methods +3b4968db64d9 HEAD@{429}: rebase (pick): Genericize support for calling functions a bit +f2292a6f5bef HEAD@{430}: rebase (pick): Add support for tool specified linker options +e7d26918d539 HEAD@{431}: rebase (pick): Remove the definitions of 3 global functions that don't belong in the core +3268cb00c3aa HEAD@{432}: rebase (pick): Implement the subset of the GetConstantValueAsSignedInt function that is needed, locally. Remove the two support functions to inline their contents. +5ce25378872d HEAD@{433}: rebase (pick): Implement the subset of the GetConstantValueAsSignedInt function that is needed, locally. +7f1dfe6c75ba HEAD@{434}: rebase (pick): Remove 3 gross global functions that don't belong here +bcfd7d3b4a2f HEAD@{435}: rebase (pick): Rename contype to subtype +925282156193 HEAD@{436}: rebase (pick): Make ADCE more robust, it still has problems, but it's getting closer +a8aa73f44e44 HEAD@{437}: rebase (pick): Fix problems with freeing memory twice +3e58e695c052 HEAD@{438}: rebase (pick): Rename file to be consistent with header name +45093beca645 HEAD@{439}: rebase (pick): Rerun backend tests if as or llc is changed +920978127ffb HEAD@{440}: rebase (pick): iFix dependence order +0dda5dffe9e1 HEAD@{441}: rebase (pick): Clean up Type class by removing mutable ConstRules member and use annotations insead +8926543a23ba HEAD@{442}: rebase (pick): Clean up ConstRules stuff to use annotations instead of a mutable member in Type +0554a9254254 HEAD@{443}: rebase (pick): Convert ConstRules to use annotations to clean it up. +ae70148c0e33 HEAD@{444}: rebase (pick): Fix automatic dependence on static libs +57a4461c8737 HEAD@{445}: rebase (pick): Handle cast float-to-float or cast double-to-double. +e26d17b941c6 HEAD@{446}: rebase (pick): Fix build breakage. :( +468369dd37c3 HEAD@{447}: rebase (pick): I really don't like it when people break the build. +093db3f2c28b HEAD@{448}: rebase (pick): Remove extraneous space +d7fa14961741 HEAD@{449}: rebase (pick): Remove extra #include +13c90b0c405c HEAD@{450}: rebase (pick): *** empty log message *** +ad0e744b8800 HEAD@{451}: rebase (pick): *** empty log message *** +479d6ea91cea HEAD@{452}: rebase (pick): Committed for compliation. Not yet final. +3e3b370cfca9 HEAD@{453}: rebase (pick): --Ruchira +215ca905feb5 HEAD@{454}: rebase (pick): New testcase to deal with lists +91c3618d9fba HEAD@{455}: rebase (pick): New file for supporting abstract types +d0201e668537 HEAD@{456}: rebase (pick): Make use of the new TOOLNAME/USEDLIBS options provided in Makefile.common +edadb7525ef9 HEAD@{457}: rebase (pick): Executables all live in a nice centralized location now +a461b8412da2 HEAD@{458}: rebase (pick): Executables have moved into centralized location +f02709b9d7a9 HEAD@{459}: rebase (pick): Support TOOLNAME and USEDLIBS options for easier tool building +e41581d43385 HEAD@{460}: rebase (pick): Remove old old file +cb93b76e7fdc HEAD@{461}: rebase (pick): Convert llc driver to standard tool format +cb8ea37f651a HEAD@{462}: rebase (pick): Provide a way to change the incoming value for a phi node +21daac648d0a HEAD@{463}: rebase (pick): Add llc path to setup +a3f8c0135396 HEAD@{464}: rebase (pick): Uhm... that was really bad +a428778af63a HEAD@{465}: rebase (pick): Clean up driver +545e4d0d6342 HEAD@{466}: rebase (pick): Make makefile not depend on where stuff is installed!!!! +63fb58422942 HEAD@{467}: rebase (pick): Updates to work with new lack of constant pool +d08a74d2397c HEAD@{468}: rebase (pick): Remove unneeded #includes +17ba4b1a7377 HEAD@{469}: rebase (pick): Remove unnecesary #include add dump calls pulled out of .h file +4baa9c258dc7 HEAD@{470}: rebase (pick): * Remove lots of #includes +5fbff64a9093 HEAD@{471}: rebase (pick): * Remove lots of unnecesary #includes +fa24fc193248 HEAD@{472}: rebase (pick): * Remove lots of annoying extra #includes +4a1115871ab1 HEAD@{473}: rebase (pick): * Add tag so emacs knows it's a c++ file +aa1f51a47db4 HEAD@{474}: rebase (pick): Add tags so emacs knows these are C++ files +66cdfde08ddd HEAD@{475}: rebase (pick): Remove extra space +df7b57cb2016 HEAD@{476}: rebase (pick): Remove ReversePostOrderTraversal declaration +e182a70686df HEAD@{477}: rebase (pick): * Don't predefine ReversePostOrderTraversal because it adds a dependence on vector +a7b751de9148 HEAD@{478}: rebase (pick): Check opaque, abstract, and recursive type handling +f65fc4c4b0ca HEAD@{479}: rebase (pick): NEw file +f5797eee291e HEAD@{480}: rebase (pick): Moved functionality into the other constant pool stuff +c317aff403de HEAD@{481}: rebase (pick): Follow the golden rule of the coding standards guide: Make the code look +228b2301a5b8 HEAD@{482}: rebase (pick): The header file for a translation unit should always be included first +ec28d6b33de6 HEAD@{483}: rebase (pick): A file should always include it's private header file *FIRST* see the +a4fd66e4bb44 HEAD@{484}: rebase (pick): Constant pool is eliminated +895e8966aaf7 HEAD@{485}: rebase (pick): Add support for iteration through type graphs +1bc1a1e55811 HEAD@{486}: rebase (pick): Remove support for const pool merging, which is obsolete now. +1f68aecd491b HEAD@{487}: rebase (pick): Annotations are now const +aa592d53a869 HEAD@{488}: rebase (pick): Build lli first +a7352c105c5a HEAD@{489}: rebase (pick): Symboltables are sorted in the bytecode, so no problems here! +5fdc17bb41c2 HEAD@{490}: rebase (pick): Cleanup +98cf8e526cfc HEAD@{491}: rebase (pick): Support abstract types +e64122141c47 HEAD@{492}: rebase (pick): Support a abstract, opaque, and recursive types +299db7ad37f6 HEAD@{493}: rebase (pick): Types and constnats are wierd objects in the symtabs +391ecb41103e HEAD@{494}: rebase (pick): Modules must have a valid, nonnull type. Make them void +186d4233d066 HEAD@{495}: rebase (pick): Support new setName interface +7339777dc091 HEAD@{496}: rebase (pick): * Support new setname interface +ba310ef38dcf HEAD@{497}: rebase (pick): * Cnstants are now global objects +05ef1117f8d2 HEAD@{498}: rebase (pick): Support new setName itf +3d922776af3d HEAD@{499}: rebase (pick): Annotations are const objects now +3ad5e85b0e7c HEAD@{500}: rebase (pick): Types and constants are wierd things in symbol tables now +e95eeb238191 HEAD@{501}: rebase (pick): * Eliminate reference to ConstantPool class +69013e51442c HEAD@{502}: rebase (pick): Constant pool is dead +6aabf9bb8d09 HEAD@{503}: rebase (pick): Constants are now global unique objects +5eccfe8f4744 HEAD@{504}: rebase (pick): * Eliminate constant pool dependancies: +116bd1f60c7d HEAD@{505}: rebase (pick): * Supoprt global constants +719ec15e3bca HEAD@{506}: rebase (pick): * Support global constants +3e22e6fbc35c HEAD@{507}: rebase (pick): annotations are now const +94469c594e8f HEAD@{508}: rebase (pick): * Emit bytecode using a deque instead of a vector to be faster +cd31dfffe14a HEAD@{509}: rebase (pick): * Remove support for internal constant pool +0ccb4914c583 HEAD@{510}: rebase (pick): * Assembly writer is not a module analyzer anymore +81be60efae5b HEAD@{511}: rebase (pick): * Add support for forward referencing types +92f9faa8cd41 HEAD@{512}: rebase (pick): Add support for forward referencing types +7cb39bcc9b11 HEAD@{513}: rebase (pick): Add support for an opaque type +0cc953a4eb36 HEAD@{514}: rebase (pick): Remove #include of nonexistant header file +e659434201e7 HEAD@{515}: rebase (pick): * Slot calc is now simpler and not based on module analyzer. +d9953427123b HEAD@{516}: rebase (pick): Module analyzer no longer has to iterate over constant pool +59b2b4978c66 HEAD@{517}: rebase (pick): Simplify code by eliminating need to hang onto constant pool references +ca915a915738 HEAD@{518}: rebase (pick): * Fixed mapped_iterator to actually work with functors +db2d5ad6fc13 HEAD@{519}: rebase (pick): Constant pools no longer exist +e6503b4355e7 HEAD@{520}: rebase (pick): Eliminate DoConstantPoolMerging. ConstantPools no longer exist +8e819e87f9aa HEAD@{521}: rebase (pick): You no longer have to delete constants! They are located in a global +9e1456843e33 HEAD@{522}: rebase (pick): Annotations are now passed around as const objects +37781e4265d3 HEAD@{523}: rebase (pick): Use a deque instead of a vector for greater efficiency writing bytecode +68b52d48b8d2 HEAD@{524}: rebase (pick): Clean stuff up. +ef8df94e3aba HEAD@{525}: rebase (pick): Simplify SlotCalculator. SlotCalculator is now not a ModuleAnalyzer +220b450fb4a8 HEAD@{526}: rebase (pick): Simplify analyzer +435cda780cfc HEAD@{527}: rebase (pick): * Fix long standing problems that would affect inlining. How could this have worked? +0cb567d4d189 HEAD@{528}: rebase (pick): Add assertion to check for +e0ab1c69297f HEAD@{529}: rebase (pick): * Values are AbstactTypeUsers to support abstract types +16c83b3c1356 HEAD@{530}: rebase (pick): Remove extra whitespace at EOL +a2e45cbc6285 HEAD@{531}: rebase (pick): * Add support for Opaque & Abstract types. +875576a6650b HEAD@{532}: rebase (pick): Support abstract types by keeping on the use list of the abstract type. +70bc7b10091b HEAD@{533}: rebase (pick): SymTabValues no longer hold constant pools +c3d4689a42cb HEAD@{534}: rebase (pick): SymTabValue no longer includes ValueHolder for Module. Include it ourself +aefcbb9a7f94 HEAD@{535}: rebase (pick): * Support new setName interface +22059fea78a8 HEAD@{536}: rebase (pick): Support new setName interface +30dd0bdb5f35 HEAD@{537}: rebase (pick): * Add new DerivedType base class that goes between Type and the derived types +fb9e4e1fcdc4 HEAD@{538}: rebase (pick): Implement support for globally unique constants. Constants no longer live +924247d31d99 HEAD@{539}: rebase (pick): Add support for walking type graphs +fa3aa419ab9f HEAD@{540}: rebase (pick): Changing setName semantics +38d0897ea620 HEAD@{541}: rebase (pick): Make annotations operations const with a mutable annotation list so that +59216be202de HEAD@{542}: rebase (pick): Fixed the "output constant pool even if he have no constants" issue +ab906331394b HEAD@{543}: rebase (pick): whoo hoo I did something! :) +628ad7914f58 HEAD@{544}: rebase (pick): Make fib be more real +e5ad7ea67698 HEAD@{545}: rebase (pick): *** empty log message *** +147dbdd611ae HEAD@{546}: rebase (pick): *** empty log message *** +07a717031897 HEAD@{547}: rebase (pick): Added directory LiveVar/ +2b9d47fba512 HEAD@{548}: rebase (pick): Makefile for tools/tests/ +ef1302a7da62 HEAD@{549}: rebase (pick): Driver to test IsPowerOf2. Could be extended for other library routines. +eb98e995c108 HEAD@{550}: rebase (pick): Add testcodegen target, and restrict which tests are run for it. +8e434f5bede3 HEAD@{551}: rebase (pick): Added nonterminals for arithmetic operations where one operand is constant. +8846488b12e7 HEAD@{552}: rebase (pick): Changed link line. +b9204403813b HEAD@{553}: rebase (pick): Add calls to NormalizeMethod() and to ScheduleInstructionsWithSSA(). +ad0b73970f13 HEAD@{554}: rebase (pick): Makefile for InstrSched/ +03d07894e506 HEAD@{555}: rebase (pick): Remove source list. +bf1f10e707bf HEAD@{556}: rebase (pick): Added directory InstrSched. +db25e211611a HEAD@{557}: rebase (pick): Major changes too hard to document :-) +6195f94883e7 HEAD@{558}: rebase (pick): Added function MachineInstr::operandIsDefined(i) and decl for +4aa6182a26f1 HEAD@{559}: rebase (pick): Extensive additions for supporting instruction scheduling. +21aba4339c60 HEAD@{560}: rebase (pick): Added class MachineSchedInfo and several supporting classes +d0513476dc87 HEAD@{561}: rebase (pick): Implementation of instruction scheduling for LLVM. +3222a43515d4 HEAD@{562}: rebase (pick): Class that encapsulates priority heuristics for instruction scheduling. +a3bb9d7ef0f4 HEAD@{563}: rebase (pick): Scheduling DAG for instruction scheduling. Currently for a single basic block. +f4be165ab676 HEAD@{564}: rebase (pick): Moved debug options declaration to header file, and moved +f914ba215bc2 HEAD@{565}: rebase (pick): Moved function PrintMachineInstructions here. +fb1a19d9a411 HEAD@{566}: rebase (pick): analyze() now checks to see that we don't analyze the same method twice. +9e8f74af6ec5 HEAD@{567}: rebase (pick): *** empty log message *** +3a2656af412d HEAD@{568}: rebase (pick): Simplification transformations to normalize the code for later passes. +bbb02c1d7c9b HEAD@{569}: rebase (pick): Use const int instead of #define. +d0b683357562 HEAD@{570}: rebase (pick): Add copy and assignment operators for POIterator, and +9f98fb5b9284 HEAD@{571}: rebase (pick): Added InstrSched library to link line. +dac45308ccd6 HEAD@{572}: rebase (pick): I suck +ff67dcc22be0 HEAD@{573}: rebase (pick): Initial checkin of TargetData code +8472822ff914 HEAD@{574}: rebase (pick): Remove target specific stuff from Type classes +13bd108c03e0 HEAD@{575}: rebase (pick): Remove target specific method from MemAccessInst class +2771054dbf3e HEAD@{576}: rebase (pick): Convert to use the new factored out TargetData class +ddadbddb187b HEAD@{577}: rebase (pick): Factor code out to the TargetData class +3e08de6cee86 HEAD@{578}: rebase (pick): Use the new TargetData class to factor out some of the shared code +729c3d47e91e HEAD@{579}: rebase (pick): Remove target specific method. +5ca1a2bcbc44 HEAD@{580}: rebase (pick): Remove target specific code, move to TargetData.cpp file +7cd798c969a4 HEAD@{581}: rebase (pick): Support passing a data pointer to annotation factory methods +3b3efaeeaf76 HEAD@{582}: rebase (pick): Demolish explicit source list +1afbb4027fae HEAD@{583}: rebase (pick): Extend annotations to pass data pointers around to the functions +74782cb4a340 HEAD@{584}: rebase (pick): Add another TODO: sigh +ab8c3000e11e HEAD@{585}: rebase (pick): Lots of new functionality +a9a8941bb775 HEAD@{586}: rebase (pick): Remove explicit source list +f011d42626b3 HEAD@{587}: rebase (pick): Add dependence to libvmcore. +37c91bae4bcd HEAD@{588}: rebase (pick): Make sure noone branches to the entry node of the method +ee0ddad61d01 HEAD@{589}: rebase (pick): Compile LLI +d34454f43919 HEAD@{590}: rebase (pick): Rename start methods to main so interpreter works easier +44dfadcd4a33 HEAD@{591}: rebase (pick): Add annotation support +03d42fd345d1 HEAD@{592}: rebase (pick): Handle case where there is no exit node from a flowgraph +6c329f4eaed8 HEAD@{593}: rebase (pick): Changed an assertion message +350d117dbdd5 HEAD@{594}: rebase (pick): Add annotation support to value +311767f056af HEAD@{595}: rebase (pick): * Add assertions +55c6be031f13 HEAD@{596}: rebase (pick): Initial checkin of interpreter +3fdb0df0b0b2 HEAD@{597}: rebase (pick): LV code on machine instructions +19e88d249e25 HEAD@{598}: rebase (pick): LV info on machine instructions +b1dfaf6145ab HEAD@{599}: rebase (pick): Corrected the compilation error by making the ValOperator class a friend of +3059c0b24b7c HEAD@{600}: rebase (pick): Always set isDef for operand in position resultPos. +081ab0fa9e0f HEAD@{601}: rebase (pick): Changed SetMachineOpernad calls in Set3OperandsFromInstr so that the +6be8772e0463 HEAD@{602}: rebase (pick): Changed case 64 to make the first arg of phi a defintion +abc698370478 HEAD@{603}: rebase (pick): Can't use ref to stack value! +56e7b4262d3e HEAD@{604}: rebase (pick): Needed old conditions as well as new in skipToNextVal()! +2b2d58164051 HEAD@{605}: rebase (pick): Bug fix in ValOpIterator: not moving past operand with NULL Value. +4a1a05bc1473 HEAD@{606}: rebase (pick): *** empty log message *** +32525540235d HEAD@{607}: rebase (pick): added a default isDef arg to SetMachineOperand method - Ruchira +ac7c6045f846 HEAD@{608}: rebase (pick): Added isDef field to MachineOperand class - Ruchira +f0942ac597e7 HEAD@{609}: rebase (pick): Add CC operand as 4th operand of SUBcc, and mark it as a def. +9568ebd1a049 HEAD@{610}: rebase (pick): Use extra operand for instructions that set a CC register that +17d5bdb8c5dc HEAD@{611}: rebase (pick): Also, move burg rule to Makefile.common. +5efe6ec39c6f HEAD@{612}: rebase (pick): And add rule to create a .cpp source file from burg input file! +5b8a3ae17209 HEAD@{613}: rebase (pick): Better still, lets move pathname for Burg to Makefile.common. +415c589a5b97 HEAD@{614}: rebase (pick): Add path and options for burg. +15a90d21c83f HEAD@{615}: rebase (pick): Use full pathname for burg. +044f893ad519 HEAD@{616}: rebase (pick): Allow numOperands of -1 for variable #operands. +6b7eebde250d HEAD@{617}: rebase (pick): Simplify command line options, and add option for printing +5ac12a3af462 HEAD@{618}: rebase (pick): Had used the wrong option. +27df4e0f0c54 HEAD@{619}: rebase (pick): Added tree nodes for Phi instructions. +3d470f658f50 HEAD@{620}: rebase (pick): Generate tree nodes for Phi instructions. +5745231c1ee0 HEAD@{621}: rebase (pick): Allow machine instructions with variable numbers of arguments. +3de046767b96 HEAD@{622}: rebase (pick): Added dummy Phi instruction. +7df9d89320cb HEAD@{623}: rebase (pick): Generate dummy Phi machine instruction, plus a bug fix for BrCond(boolreg). +371350759bd5 HEAD@{624}: rebase (pick): Added support for testing instruction selection on all but 2 tests. +09c28c22fde1 HEAD@{625}: rebase (pick): Added class MachineCodeForBasicBlock. +ee4ef4ffe10c HEAD@{626}: rebase (pick): Record machine instructions in the vector for each basic block. +75e6a0432e3b HEAD@{627}: rebase (pick): Added vector of machine instructions for the basic block. +6c523d7b3a45 HEAD@{628}: rebase (pick): New test cases +a991e5fcc19d HEAD@{629}: rebase (pick): Remove some gross stuff +33162a8d8802 HEAD@{630}: rebase (pick): Allow vararg method types with 0 fixed types +18a61fcb43a4 HEAD@{631}: rebase (pick): Make error msg nicer +c3e2fe5af54b HEAD@{632}: rebase (pick): Enable the elimination of method prototypes that are not referenced +8fb736efbcdd HEAD@{633}: rebase (pick): * Make sure that the size of the type field can also control the output +a9dab08596d3 HEAD@{634}: rebase (pick): * Add calls to failure template so that it is actually possible to debug +077a425d4516 HEAD@{635}: rebase (pick): * Fix bugs +03c4f8933762 HEAD@{636}: rebase (pick): * Enable the use of escaped literal strings +934c4b501a22 HEAD@{637}: rebase (pick): Modify var names to make it apparant that the code is really generic +0a587153f15f HEAD@{638}: rebase (pick): Changes to make test scripts more reliable +e67cf2e7e23d HEAD@{639}: rebase (pick): Add test of string constants +c20b0ebc51c4 HEAD@{640}: rebase (pick): Added function printIndent. +841fdaf6e2f7 HEAD@{641}: rebase (pick): Added a pointer hash function object for use in pointer maps. +65fb5153e342 HEAD@{642}: rebase (pick): Make a function const. +0762b37e7677 HEAD@{643}: rebase (pick): Remove lib/LLC library. +cf6a5702c91e HEAD@{644}: rebase (pick): Added several SPARC instructions including conditional move and SETHI. +1fc9217c15ee HEAD@{645}: rebase (pick): Remove redundant and unused functions. +76b1285bfdc7 HEAD@{646}: rebase (pick): Added UltraSparcInstrInfo class to specialize class MachineInstrInfo. +6e560c22a4f9 HEAD@{647}: rebase (pick): Eliminate unused function. +5384b204a5da HEAD@{648}: rebase (pick): Bug fixes: +898348afb52d HEAD@{649}: rebase (pick): Added MachineInstrInfo class and moved instruction-related members there. +0c2462a079ed HEAD@{650}: rebase (pick): Eliminate separate enum for operand register type. +eac34ac45c71 HEAD@{651}: rebase (pick): Work around a few 'sorting issues' with the bytecode output that causes the bytecode +94e2da805ed4 HEAD@{652}: rebase (pick): Don't write out constants that do not have a name, they will be inlined. +0fb64b07f943 HEAD@{653}: rebase (pick): Refactor some of the constant stuff so that we can return complex constant +ad7945a175d6 HEAD@{654}: rebase (pick): Add an arg to insertVal to allow us to prevent builtin types from being ignored +fe70c81141d7 HEAD@{655}: rebase (pick): Add an arg to insertVal to allow us to prevent builtin types from being ignored +f2a10b61e2a7 HEAD@{656}: rebase (pick): New test for varargs functions +d7f49ed443ab HEAD@{657}: rebase (pick): Add library dep +fd413193db44 HEAD@{658}: rebase (pick): Parenthesize output for expranalyze so that pointer stuff being multiplied isn't confusing +88bb8ebe01fd HEAD@{659}: rebase (pick): Build as before dis +392cb8a9804a HEAD@{660}: rebase (pick): Add support for extern varargs methods & varargs method calls +89cb2de0eeac HEAD@{661}: rebase (pick): Add support for extern varargs methods & varargs method calls +aad7190b6bea HEAD@{662}: rebase (pick): Fix a bug when compiling 'shl ubyte * %var, ubyte 2' +b5d668969e65 HEAD@{663}: rebase (pick): Filter out noncore stuff +36123a777b5e HEAD@{664}: rebase (pick): Fixed a bug exposed when doing something like this: -notanoption --help +a9622c681ad6 HEAD@{665}: rebase (pick): Changed printValue() to print constant value if the value is a constant. +bb2db6c88f8a HEAD@{666}: rebase (pick): *** empty log message *** +8d83c40582cc HEAD@{667}: rebase (pick): Doh! Wrong Optional flag. :( +a6c90bf6ee4c HEAD@{668}: rebase (pick): Add a comment indicating that there is documentation of the library +a98afe4b0579 HEAD@{669}: rebase (pick): Initial checking of some rough documentation for commandline library +48fca76a95a8 HEAD@{670}: rebase (pick): Change option name slightly +f62a2f2be6a9 HEAD@{671}: rebase (pick): Minor changes to implementation of CommandLine library to let users override +6c74f799d80b HEAD@{672}: rebase (pick): Add a missing tag +026bec7cf715 HEAD@{673}: rebase (pick): Use the new Alias command line option +d2ec898cfb0e HEAD@{674}: rebase (pick): CommandLine library cleanup. No longer use getValue/setValue, instead, just treat the commandline +f74319d29b56 HEAD@{675}: rebase (pick): Doh! Wrong accessor. Caused 'can not read bytecode' errors. :( +104c6f0c01f6 HEAD@{676}: rebase (pick): -help is verbose enough that we don't need this anymore +b0dcda34759b HEAD@{677}: rebase (pick): Eliminated the Unique class in favor of NonCopyable and NonCopyableV +ec8abea1c777 HEAD@{678}: rebase (pick): Moved inline/llvm/Tools/* to include/llvm/Support/* +f434f8970fdd HEAD@{679}: rebase (pick): Initial checkin +bd177131a770 HEAD@{680}: rebase (pick): Fix coding style issues to actually attempt to be somewhat uniform +f4c632fabc9b HEAD@{681}: rebase (pick): Nonpolymorphic class, doesn't need a virtual dtor! +7bc807e14176 HEAD@{682}: rebase (pick): Clean up hash table usage +efe8c7aa1cf0 HEAD@{683}: rebase (pick): Removal of the redundant CompileContext wrapper +b099c14cc8ba HEAD@{684}: rebase (pick): Verbosify descriptions +b72d002be10f HEAD@{685}: rebase (pick): Large scale changes to implement new command line argument facility +23381cd5b5a1 HEAD@{686}: rebase (pick): Remove dependence on command line library. Silly anyway. +4ee192c0ff7a HEAD@{687}: rebase (pick): Make it pickier +442f68038647 HEAD@{688}: rebase (pick): Add flag for emacs so it realizes it's C++ code +99c4af7c6b12 HEAD@{689}: rebase (pick): New test case +01ef66c762bb HEAD@{690}: rebase (pick): Privatize LLCOptions. It had no business being visible to the entire +b18d26deb43d HEAD@{691}: rebase (pick): Move private header into private directory +57cb798a4677 HEAD@{692}: rebase (pick): Convert from using C style char*'s to strings. +144db6c30c7a HEAD@{693}: rebase (pick): Remove String file some more +78fc43ff73cb HEAD@{694}: rebase (pick): Remove stringutils.h file +3b9829cf7645 HEAD@{695}: rebase (pick): Destroy the StringUtils.h file +a1f7c42bcb01 HEAD@{696}: rebase (pick): Eliminate lots of unnecessary #includes and forward decls +f5e75c7e705d HEAD@{697}: rebase (pick): Eliminate many unneccesary #includes +1aa17f3bb25c HEAD@{698}: rebase (pick): Make code fit in 80 columns more +b95c07e35c7f HEAD@{699}: rebase (pick): Remove unneccesary #includes +5381f682dd30 HEAD@{700}: rebase (pick): Exterminate nasty Cisms +11f554634433 HEAD@{701}: rebase (pick): Refer to include/llvm/CodeGen not Codegen +d2e18d70c558 HEAD@{702}: rebase (pick): Instructions for use +34e368c59b5b HEAD@{703}: rebase (pick): Make sure we build all of the code! +f0d858ed34f1 HEAD@{704}: rebase (pick): Renamed include/llvm/Codegen to include/llvm/CodeGen +6c8ccac2de98 HEAD@{705}: rebase (pick): Fix code to be in a consistent style +c267a7d71c7b HEAD@{706}: rebase (pick): More minor reorganizations +51777d7f1b52 HEAD@{707}: rebase (pick): Remove getTempValuesForMachineCode from the Instruction interface +21be61506817 HEAD@{708}: rebase (pick): Filter out the sparc.burm.c file +ef91903bfcbf HEAD@{709}: rebase (pick): Moved LLC subdir to the tools top level directory +0e90eb4b6eb7 HEAD@{710}: rebase (pick): Make the makefile work +345e38ed07ac HEAD@{711}: rebase (pick): Add new ctor for ConstPoolBool +9d9614205cf1 HEAD@{712}: rebase (pick): Add new constructor for const pool bool +363bdd9a0676 HEAD@{713}: rebase (pick): Add support for casts +7fd6dcb064ef HEAD@{714}: rebase (pick): Add support for casting operators +61f218f640e2 HEAD@{715}: rebase (pick): Support changed expression api +0d64b2ba0a9c HEAD@{716}: rebase (pick): More functionality, renamed API +df5dbc8e3949 HEAD@{717}: rebase (pick): Moved isIntegral to the Type system +a61ce81cc4d8 HEAD@{718}: rebase (pick): Autodep functionality broken. Remove so we get successful builds +148b96074cee HEAD@{719}: rebase (pick): Version of testmemory to test alloca, load and store. +5153d313b0ce HEAD@{720}: rebase (pick): Used a bigger constant in loopfunc.ll that doesn't fit in immed field. +36eb43e26456 HEAD@{721}: rebase (pick): Utility routines for simpler access to the value of an integer constant. +e31cf51c03a4 HEAD@{722}: rebase (pick): Program options class. +2339d0cf578d HEAD@{723}: rebase (pick): Driver and options for the llc compiler. +03cdc0b1bceb HEAD@{724}: rebase (pick): Description of the SPARC as a target architecture. +dd4b4355c99d HEAD@{725}: rebase (pick): Base clas for a description of a target architecture. +8a2e2fbd50e6 HEAD@{726}: rebase (pick): Instruction selection via pattern matching on instruction trees using BURG. +fea7ff57c801 HEAD@{727}: rebase (pick): *** empty log message *** +fae069f4e36b HEAD@{728}: rebase (pick): Added CodeGen, LLC, and Support. +24812650a87f HEAD@{729}: rebase (pick): General support utilities like a program options class and a StringMap +7c52e8197cf9 HEAD@{730}: rebase (pick): CompileContext and options class for the llc compiler. +aed61d90db66 HEAD@{731}: rebase (pick): Header files for the target architecture description and for instruction +82015f75875f HEAD@{732}: rebase (pick): Added support for getting the dependence of an executable on its libs, +70d2dc737e0b HEAD@{733}: rebase (pick): Add isIntegral() method to SignedIntType and UnsignedIntType. +c7371d8afb38 HEAD@{734}: rebase (pick): Provide simpler ways to extract the value of an integer constant. +15e79bcb6e4b HEAD@{735}: rebase (pick): Compute and cache information about the storage size and layout +6b94be0fa4af HEAD@{736}: rebase (pick): Provide uniform access to the pointer operand and to the index +2010845c92f1 HEAD@{737}: rebase (pick): Added a representation of the machine instructions generated +213160e0bb9f HEAD@{738}: rebase (pick): Start of expression analysis support +9250c349b550 HEAD@{739}: rebase (pick): Header to raise and lower representation +56dbc9359f2b HEAD@{740}: rebase (pick): Add support to call LevelRaise +6ffd08afd81c HEAD@{741}: rebase (pick): Update makefile for more accurate deps +f2df47febf1b HEAD@{742}: rebase (pick): Implement ensureTypeAvailable +e06171c5109f HEAD@{743}: rebase (pick): Add support for constant propogation of multiplies +ec9be9e818a5 HEAD@{744}: rebase (pick): Factor out WriteAsOperand. +4bef44e0adfc HEAD@{745}: rebase (pick): Add a comment. +f012589e78ed HEAD@{746}: rebase (pick): Add multiply as a supported constant propogation operation +643641cb450c HEAD@{747}: rebase (pick): New function: WriteAsOperand. +80470d72e903 HEAD@{748}: rebase (pick): Add new base class ConstPoolInt, useful for dealing with integral constants +7a5ca318dfe7 HEAD@{749}: rebase (pick): Add new method, ensureTypeAvailable +9abf2d95c339 HEAD@{750}: rebase (pick): Change is*Type to be a casting convertion operator +a7a79aafa026 HEAD@{751}: rebase (pick): Add an function to BinaryOperator to swap the two operands +f8bb46fb137c HEAD@{752}: rebase (pick): Add short forms of the get*Type methods. +9dbc6bb6b44d HEAD@{753}: rebase (pick): Fix nasty typo +d59d2aa4a97e HEAD@{754}: rebase (pick): Fix clean target +bf1c55b14525 HEAD@{755}: rebase (pick): Compile source files in alphabetical order +02990b116ef2 HEAD@{756}: rebase (pick): Fixed typo in comment +f5b88528d736 HEAD@{757}: rebase (pick): Support external methods +643d6d93c309 HEAD@{758}: rebase (pick): New test case for prototype support +12bb537e90da HEAD@{759}: rebase (pick): Reordered link line for correct static linking. +903f9efa3f84 HEAD@{760}: rebase (pick): Changed default to building library archives instead of shared objects. +aabc8315f101 HEAD@{761}: rebase (pick): Implement forward/external declarations for methods. +64e2c4726aa8 HEAD@{762}: rebase (pick): Implement forward/external declarations for methods. Also, emit an error if a method +a2be53991d96 HEAD@{763}: rebase (pick): Rename 'isMethodExternal' to 'isExternal' +af58b501dadf HEAD@{764}: rebase (pick): Add notes on instruction selection pass +3be6a7da5434 HEAD@{765}: rebase (pick): New testcase from GCC doing array operations +5702913064a2 HEAD@{766}: rebase (pick): Add support for assembly printing fp constants +05c8093e0529 HEAD@{767}: rebase (pick): Add support to the bytecode writer to recognize floating point constants +55f91192bf7c HEAD@{768}: rebase (pick): Add support to the bytecode reader to recognize floating point constants +828ae092b096 HEAD@{769}: rebase (pick): Add support to the parser to recognize floating point constants +9d78fb9b25fa HEAD@{770}: rebase (pick): Add a function to convert a double to a string +d25973d16cd9 HEAD@{771}: rebase (pick): Add support to write and read a fixed amount of raw data +c35d12757fd4 HEAD@{772}: rebase (pick): Add a note +94c6d03b6c82 HEAD@{773}: rebase (pick): * ValueHolder now takes 3 arguments +67b11a5bd739 HEAD@{774}: rebase (pick): Add knowledge about the struct form of the GetElementPtr instruction +06adb0b2ab08 HEAD@{775}: rebase (pick): Remove dependency on the structure of ValueHolder. +0559fb6b55e4 HEAD@{776}: rebase (pick): * The parent of a constant pool is a symtabvalue, not a value. +fb6d8d18898e HEAD@{777}: rebase (pick): The parent of a constant pool is a symtabvalue, not a value. +c20531f12219 HEAD@{778}: rebase (pick): Added some comments, preparing to add global variables and method prototypes +39fae71357a5 HEAD@{779}: rebase (pick): * The parent of a constant pool is a SymTabValue, not a value. +e3812fad3a2d HEAD@{780}: rebase (pick): Made the following changes: +c6df40cee22e HEAD@{781}: rebase (pick): Added more todo's. Don't I ever accomplish anything? +6ee823f32e3d HEAD@{782}: rebase (pick): Add DebugValue member. +c8281fb7bdfe HEAD@{783}: rebase (pick): Made it not inline +e0c85017da0a HEAD@{784}: rebase (pick): Add DebugValue global function +a93311112bbc HEAD@{785}: rebase (pick): Don't clean out the type plane of the constant pool... this is a hack. FIXME +97add160370e HEAD@{786}: rebase (pick): Make sure that types go in the constant pool if they are used. +3f0bab207223 HEAD@{787}: rebase (pick): hasSideEffects should be marked virtual +08fc7cf1be14 HEAD@{788}: rebase (pick): Modify notes +96f249a20298 HEAD@{789}: rebase (pick): Fix stupid typo +c874f2e554d5 HEAD@{790}: rebase (pick): Initial checkin of coding standards +c0abd659a4e3 HEAD@{791}: rebase (pick): Updated documentation for load, store & getelementptr +8ff1023c5729 HEAD@{792}: rebase (pick): add coverage of newly implemented instructions. +87ad59d49e91 HEAD@{793}: rebase (pick): Implementation of Store & GetElementPtr +2b2b55bdec44 HEAD@{794}: rebase (pick): Implement checking for new instructions +4fb6aa4a9e7a HEAD@{795}: rebase (pick): Add note +e9d048cd6792 HEAD@{796}: rebase (pick): Implemented shl, shl, & load instructions +5833b72ec4b1 HEAD@{797}: rebase (pick): Moved Cast from being a Unary instruction to being an "Other" instruction +6b062514ff40 HEAD@{798}: rebase (pick): Use the CDG to mark branches alive on demand. +3005d00fa8dd HEAD@{799}: rebase (pick): Add a new "addOperand" method to User. +4cb53fdaeffb HEAD@{800}: rebase (pick): Fixed post dominator frontiers! Yaay! +aa86a73a5bec HEAD@{801}: rebase (pick): Neg instruction removed. Cast instruction implemented. +842d6e099476 HEAD@{802}: rebase (pick): Neg instruction removed. TODO item fulfilled. +7550203543d2 HEAD@{803}: rebase (pick): Removing unnecesary file +6c99b25c3bd8 HEAD@{804}: rebase (pick): Convert BinaryOperand and UnaryOperator to only take instruction types of +eea771ae35ce HEAD@{805}: rebase (pick): Broad superficial changes: +8b0f42aa64c1 HEAD@{806}: rebase (pick): Devirtualize User::dropAllReferences +b6af5d386268 HEAD@{807}: rebase (pick): Remove dtor's that simply call dropAllReferences +70707b9adf64 HEAD@{808}: rebase (pick): Changed the fundemental architecture of Operands for Instructions. Now +bc4bfa70b8e0 HEAD@{809}: rebase (pick): Changed memory reference instructions to store the result as the implicit +dd91a3d2d9e7 HEAD@{810}: rebase (pick): Fixed some error messages to be nicer +7f723d15a495 HEAD@{811}: rebase (pick): Add note about nuking Instruction::neg +a75385aa3c2e HEAD@{812}: rebase (pick): Initial checkin +93634e0499a7 HEAD@{813}: rebase (pick): Add better support for post dominator information. +0528ba902343 HEAD@{814}: rebase (pick): Add method to unify all exit nodes of a method +947db1d96a4f HEAD@{815}: rebase (pick): Implement support for postdominators, except in dom frontiers +a534bd635e5e HEAD@{816}: rebase (pick): New file, includes method to merge exit nodes together +9996c4da5186 HEAD@{817}: rebase (pick): * Add a DominatorBase base class to maintain root of Dominator info +042a9c01050d HEAD@{818}: rebase (pick): * Added comments +b30075f1d1b2 HEAD@{819}: rebase (pick): Update to include right file +b655a756d6a9 HEAD@{820}: rebase (pick): Initial checkin of analyze tool. +2c1174ab6df2 HEAD@{821}: rebase (pick): Build new analyze tool +8350bbd20ae5 HEAD@{822}: rebase (pick): Added analyze to path for SetupOpt script +c705cdc36c17 HEAD@{823}: rebase (pick): Add analyze tool to path for Setup script +b18d4fae85b6 HEAD@{824}: rebase (pick): IntervalPartition was changed to inherit from vector instead of +743ecc7f0095 HEAD@{825}: rebase (pick): IntervalPartition was changed to inherit from vector instead of +f95290eba1c4 HEAD@{826}: rebase (pick): *** empty log message *** +39b38db21649 HEAD@{827}: rebase (pick): Checkin of new Analysis result printing header +c16998cb96e5 HEAD@{828}: rebase (pick): Code got moved from the lib/Assembly/Writer/IntervalWriter.cpp file to +49090bf698f5 HEAD@{829}: rebase (pick): Remove code for printing out Analysis data structures. It got moved +2149e63cb883 HEAD@{830}: rebase (pick): Update documentation a bit, correct #include guard +b048f8d4a4b3 HEAD@{831}: rebase (pick): Add note about tool idea. Change command line of note to be more specific +62e192f9ef8f HEAD@{832}: rebase (pick): Add printing code for dominator info +5630a5fac34d HEAD@{833}: rebase (pick): Checkin of new dominator calculation routines. These will be improved in +ee98cbc6c810 HEAD@{834}: rebase (pick): Enable printing of dominator related information. +889fa47ccf94 HEAD@{835}: rebase (pick): Add new anaysis routines for building dominator related information +98e49f4ca414 HEAD@{836}: rebase (pick): Addition of 'deleter' function. +bac6bb0ae065 HEAD@{837}: rebase (pick): Moved deleter to include/llvm/Tools/STLExtras.h +de19f162cc14 HEAD@{838}: rebase (pick): Initial checkin. Should print dead instructions, except it doesn't do +4fb09389a03b HEAD@{839}: rebase (pick): Include ADCE pass, rename include/Opt directory to llvm/Optimizations +d56c334ebb78 HEAD@{840}: rebase (pick): Rename DoSparseConditionalConstantProp -> DoSCCP +d8c1f57237a1 HEAD@{841}: rebase (pick): Add note +c355930e34c8 HEAD@{842}: rebase (pick): Add prototypes for ADCE pass +a9aaeed69342 HEAD@{843}: rebase (pick): Rename DoSparseConditionalConstantProp to DoSCCP +82abf7e9b6fa HEAD@{844}: rebase (pick): Optimizations got their own header files +c49280c35c8a HEAD@{845}: rebase (pick): Implement reduceApply method +512b32b42708 HEAD@{846}: rebase (pick): Add a new pop_back() method +c2d246ce3e77 HEAD@{847}: rebase (pick): The ConstRules class got moved to the opt namespace +6167d0001fe5 HEAD@{848}: rebase (pick): Add a reduceApply method +d78e27809d32 HEAD@{849}: rebase (pick): Split AllOpts.h into lots of little .h files. +0963fce4a854 HEAD@{850}: rebase (pick): Export ConstantFoldTerminator, allow it to fold conditional branches to +849231387470 HEAD@{851}: rebase (pick): Added documentation. Constant fold terminators. +6ff2d0ae85ce HEAD@{852}: rebase (pick): Added prototype for ConstantFoldTerminator +6fe27ce22949 HEAD@{853}: rebase (pick): Add a check to avoid allowing V->replaceAllUsesWith(V) +3f6c78a176e1 HEAD@{854}: rebase (pick): Add implementation of BasicBlock::removePredecessor code that was factored +ba8f2c1f6a6f HEAD@{855}: rebase (pick): * Factored RemovePredecessorFromBlock into BasicBlock::removePredecessor +b0e4bdf5d6d0 HEAD@{856}: rebase (pick): We need to make sure to remove PHI nodes in the successor that cannot be +ca8d6d3dd907 HEAD@{857}: rebase (pick): Added a note about a new verification the verifier should do +0686d990fbf0 HEAD@{858}: rebase (pick): Added new removePredecessor method prototype +8930ec2756c1 HEAD@{859}: rebase (pick): Added note, moved note +6109478c092c HEAD@{860}: rebase (pick): Fixed the obnoxious problem that caused an entire directory to rebuild +f2eab63950b5 HEAD@{861}: rebase (pick): Miscellaneous cleanups: +63286b72d223 HEAD@{862}: rebase (pick): Add a new Sparse Conditional Constant Propogation pass +324c5dcc9d82 HEAD@{863}: rebase (pick): Add command line arguments for Constant Pool Merging & Sparse Conditional Constant Prop +b412bc074f36 HEAD@{864}: rebase (pick): Put in test of SCCP. Watch out though, because we need to sort the +b4bb71fd0f0d HEAD@{865}: rebase (pick): Change to use the new GenericBinaryInst class. Support lots more operators. +b00b3f2b682d HEAD@{866}: rebase (pick): Misc cleanup +540c4ae24c8e HEAD@{867}: rebase (pick): * Expose DoConstantPoolMerging +955a4d740cb4 HEAD@{868}: rebase (pick): Convert ugly postincrement to efficient preincrement +1dacaa7057bf HEAD@{869}: rebase (pick): * Move stuff around a bit. +926791f1ca54 HEAD@{870}: rebase (pick): Add instructions to fold unary and binary instructions. +77090a9ae8e4 HEAD@{871}: rebase (pick): * Use the new reduce_apply_bool template +d564849afb96 HEAD@{872}: rebase (pick): getBasicBlocks() is not needed anymore for reading Method data +c9c6da3ca1f7 HEAD@{873}: rebase (pick): Added methods to make dealing with switches and branch instructions +5b8794782e8c HEAD@{874}: rebase (pick): Minor formating changes +259afbe701de HEAD@{875}: rebase (pick): Make a new GenericBinaryInst class, instead of providing lots of silly +f26fbe244a3c HEAD@{876}: rebase (pick): Convert postincrements to more efficient preincrements +a685acebd652 HEAD@{877}: rebase (pick): Add a new slew of functions to allow dynamic_cast<> like operation for +49e5848f7266 HEAD@{878}: rebase (pick): Add extra forwarding accessor methods so that getMethodList(), getBasicBlocks() +41a60e4b2e4f HEAD@{879}: rebase (pick): Add more notes +0f81680a2104 HEAD@{880}: rebase (pick): Filter out some more stuff +0bf0d89d693b HEAD@{881}: rebase (pick): Moved UnaryOperator::create to InstrTypes.cpp until there is an iUnaryOps.cpp +4d5a89c84cd7 HEAD@{882}: rebase (pick): Implement induction variable injection! +c463b997b86a HEAD@{883}: rebase (pick): Renamed get.*Operator to create seeing that it would have to be qualified +ef4669a3e67b HEAD@{884}: rebase (pick): * Rename get.*Operator to create seeing that it would have to be qualified +e51131d287ae HEAD@{885}: rebase (pick): A silly stupid test of the loop depth calculator was added. REMOVE in the +d06ca69a78ce HEAD@{886}: rebase (pick): IntervalPartition: recode to use IntervalIterator to do all the work +d2fd00e218cb HEAD@{887}: rebase (pick): Add a helper function bind_obj +7fc109749907 HEAD@{888}: rebase (pick): Big changes. Interval*.h is now more or less finalized. IntervalPartition +5848055f471b HEAD@{889}: rebase (pick): CFG.h: change the iterator tag +c1eafb1d07ab HEAD@{890}: rebase (pick): ValueHolder's aren't interseting to me anymore +b1abed97808e HEAD@{891}: rebase (pick): New file due to the Intervals.h splitup +0724b3e90091 HEAD@{892}: rebase (pick): New files due to the Intervals.h splitup +ba91baa363b7 HEAD@{893}: rebase (pick): Add a useless phi for testing with InductionVariables stuff +f916541417e3 HEAD@{894}: rebase (pick): #include a different header due to Intervals.h splitting up +5329f8f73c86 HEAD@{895}: rebase (pick): IntervalPartition & IntervalIterator classes have been split out into +fbea208252f0 HEAD@{896}: rebase (pick): IntervalPartition & IntervalIterator classes have been split out into +d828c7706db9 HEAD@{897}: rebase (pick): Prepare for split between Interval, IntervalIterator, and IntervalIPartition +3b3d00dffb19 HEAD@{898}: rebase (pick): Addition of IntervalIterator. Preparing for rename of Intervals.h to +f948fd9866ee HEAD@{899}: rebase (pick): Added notes +d41887123b2e HEAD@{900}: rebase (pick): Implement a lot more functionality. Now loop invariant and linear +d9b7e634b62b HEAD@{901}: rebase (pick): Interval::HeaderNode is now accessed thorugh an accessor function +32f9e9270229 HEAD@{902}: rebase (pick): Add comments +a3eb9281f099 HEAD@{903}: rebase (pick): Add accessor methods to binary/unary operators +b03e9bf9c388 HEAD@{904}: rebase (pick): Add a space to the PHI node output code to make it look nicer +605e752f0429 HEAD@{905}: rebase (pick): Moved printing code to the Assembly/Writer library. +8d6f5b857ebb HEAD@{906}: rebase (pick): Implement the new Interval::isLoop method +bc792603f8b8 HEAD@{907}: rebase (pick): New header file defined with neeto utilities put in one place +3b59035da7ea HEAD@{908}: rebase (pick): Modified to use the new reduce_apply algorithm +58967efc5647 HEAD@{909}: rebase (pick): * Added capability to print out an interval +0246e25d77a6 HEAD@{910}: rebase (pick): * Added comments +f88d93ea7fac HEAD@{911}: rebase (pick): Add a test case: an irreducible flow graph. +d9d7c6d1179e HEAD@{912}: rebase (pick): Get rid of a silly printout that isn't needed right now +667d489ab2c9 HEAD@{913}: rebase (pick): Add note +7d9fb3ab2fe8 HEAD@{914}: rebase (pick): New test case +6cdff3cad3c3 HEAD@{915}: rebase (pick): Add capability to print a derived interval graph +7ac17927b805 HEAD@{916}: rebase (pick): Add capability to build a derived interval graph +56e38307935f HEAD@{917}: rebase (pick): Factor the predeclarations of the CFG.h functionality into a seperate, new header +ea614fef124f HEAD@{918}: rebase (pick): Initial Checking of Interval handling code +644acdbfe034 HEAD@{919}: rebase (pick): Add stub for induction variable code +0a9fdd55c1ca HEAD@{920}: rebase (pick): Add a more complex test case +92e26fb33e21 HEAD@{921}: rebase (pick): Add a test case for interval code +5a4847e078a6 HEAD@{922}: rebase (pick): Add an optimization stub +423d0a2d3762 HEAD@{923}: rebase (pick): New file: Interval analysis support +f0e51696b0f1 HEAD@{924}: rebase (pick): Add a note +7b1bb4951dbd HEAD@{925}: rebase (pick): Filter out more stuff I don't want all the time +df4258024997 HEAD@{926}: rebase (pick): Removed silly test code +118b768314e8 HEAD@{927}: rebase (pick): Added options to print out basic blocks in a variety of different orderings +a925ae543736 HEAD@{928}: rebase (pick): Updates to work with new cfg namespace +0b9af59e24ef HEAD@{929}: rebase (pick): Implement support for writing VCG format output +388ad96269af HEAD@{930}: rebase (pick): Move contents to the cfg namespace. +91078f85b47e HEAD@{931}: rebase (pick): Updates to support +fba16076cade HEAD@{932}: rebase (pick): Updates to support +fc5655471d3e HEAD@{933}: rebase (pick): Updates to support +0a9d874b1031 HEAD@{934}: rebase (pick): Updates to support +bcb3231d6613 HEAD@{935}: rebase (pick): Update documentation to reflect: +db7f6e6d79ea HEAD@{936}: rebase (pick): Moved getBinaryOperator to the BinaryOperator class and the getUnaryOperator +2cf6ee5926fe HEAD@{937}: rebase (pick): I actually got something done +783091c4f5c6 HEAD@{938}: rebase (pick): Beautify the source a bit. +94ea386b0c0c HEAD@{939}: rebase (pick): Include support for reverse iteration. +059def0ab975 HEAD@{940}: rebase (pick): Added a stupid testcase for iterators. +be3f7a5b9a7a HEAD@{941}: rebase (pick): Added reverse depth first capability, fixed depth first capability +5b0379941c62 HEAD@{942}: rebase (pick): Updated to work with new CFG.h file. +015520037831 HEAD@{943}: rebase (pick): Moved iterators to the new CFG.h file. +b77ecaef3f49 HEAD@{944}: rebase (pick): New file +2208b443367c HEAD@{945}: rebase (pick): inlining can change methods a second time, so don't rerun inliner when testing for +d7ec7d53b0c1 HEAD@{946}: rebase (pick): Add extra method to PHI node class +94a07501c89d HEAD@{947}: rebase (pick): Significant rework. DCE is still not done (see #ifdef'd out parts) +110f15739545 HEAD@{948}: rebase (pick): Fixed to print slightly differently. Added use counts for labels +add1ace044cd HEAD@{949}: rebase (pick): Fixes for BB iterators, additional methods added for DCE pass +8a8e91d33707 HEAD@{950}: rebase (pick): Extra comments +3bdcc96804e9 HEAD@{951}: rebase (pick): Now does not include instruction files... +c6325331663d HEAD@{952}: rebase (pick): Initial revision +f3f54944c027 HEAD@{953}: rebase (pick): New repository initialized by cvs2svn. +8b754e2f7567 (origin/master, fork/master) HEAD@{954}: rebase (start): checkout origin/master +1dc8f4774d34 HEAD@{955}: checkout: moving from main to arraytype +72ea6fbc150a HEAD@{956}: checkout: moving from recoverreturn to main +549498c110fa (recoverreturn) HEAD@{957}: commit (amend): [AST] Produce ReturnStmt containing RecoveryExpr when type is wrong +500ba6619cf3 HEAD@{958}: commit (amend): [AST] Produce ReturnStmt containing RecoveryExpr when type is wrong +256e9d00f6a8 HEAD@{959}: commit (amend): [AST] Produce ReturnStmt containing RecoveryExpr when type is wrong +056fc2e74960 HEAD@{960}: commit (amend): [AST] Produce ReturnStmt containing RecoveryExpr when type is wrong +d537c309a9d4 HEAD@{961}: commit: [AST] Produce ReturnStmt containing RecoveryExpr when type is wrong +72ea6fbc150a HEAD@{962}: checkout: moving from main to recoverreturn +72ea6fbc150a HEAD@{963}: rebase (finish): returning to refs/heads/main +72ea6fbc150a HEAD@{964}: rebase (start): checkout origin/main +9dc4af327b12 HEAD@{965}: checkout: moving from two to main +8062dae7812f (two) HEAD@{966}: commit (amend): [Parse] Use empty RecoveryExpr when if/while/do/switch conditions fail to parse +c5ce4cbfc3cd HEAD@{967}: rebase (finish): returning to refs/heads/two +c5ce4cbfc3cd HEAD@{968}: rebase (pick): [Parse] Use empty RecoveryExpr when if/while/do/switch conditions fail to parse +72ea6fbc150a HEAD@{969}: rebase (start): checkout origin/main +c56122daac76 HEAD@{970}: checkout: moving from iwyustdlib to two +f038610fb5f3 HEAD@{971}: commit (amend): [clangd] Basic IncludeCleaner support for c/c++ standard library +e7f383b77f38 HEAD@{972}: commit (amend): [clangd] Basic IncludeCleaner support for c/c++ standard library +23650256334a HEAD@{973}: commit (amend): [clangd] Basic IncludeCleaner support for c/c++ standard library +eb1c9e6fabaa HEAD@{974}: rebase (continue) (finish): returning to refs/heads/iwyustdlib +eb1c9e6fabaa HEAD@{975}: rebase (continue): [clangd] Basic IncludeCleaner support for c/c++ standard library +128c6ed73b8f HEAD@{976}: rebase (start): checkout origin/main +2f3a9575f9ad HEAD@{977}: checkout: moving from stdlib to iwyustdlib +cdfb640fe9e8 (stdlib) HEAD@{978}: checkout: moving from prettify to stdlib +77cc7d2fd845 (prettify) HEAD@{979}: commit (amend): [CodeCompletion][clangd] Clean __uglified parameter names in completion & hover +97d9713c55bc HEAD@{980}: commit (amend): [CodeCompletion][clangd] Clean __uglified parameter names in completion & hover +0e1e531ca3ab HEAD@{981}: commit (amend): [CodeCompletion][clangd] Clean __uglified parameter names in completion & hover +2dcf689f661e HEAD@{982}: commit (amend): [CodeCompletion][clangd] Clean __uglified parameter names in completion & hover +22e53b9a3b3f HEAD@{983}: commit: [CodeCompletion][clangd] Clean __uglified parameter names in completion & hover +9dc4af327b12 HEAD@{984}: checkout: moving from main to prettify +9dc4af327b12 HEAD@{985}: reset: moving to HEAD +9dc4af327b12 HEAD@{986}: reset: moving to HEAD +9dc4af327b12 HEAD@{987}: checkout: moving from 9dc4af327b12dfbcf90fde1641cd649c6814bf98 to main +9dc4af327b12 HEAD@{988}: checkout: moving from main to origin/main +2c644e2f71a5 HEAD@{989}: commit: FFix feature name in 9dc4af327b12dfbcf90fde1641cd649c6814bf98 +9dc4af327b12 HEAD@{990}: rebase (finish): returning to refs/heads/main +9dc4af327b12 HEAD@{991}: rebase (pick): Re-land "[clang] Add early exit when checking for const init of arrays." +4fedd4be385e HEAD@{992}: rebase (start): checkout origin/main +a3fd292fed18 HEAD@{993}: commit (amend): Re-land "[clang] Add early exit when checking for const init of arrays." +70b8662a502c HEAD@{994}: commit (amend): Re-land "[clang] Add early exit when checking for const init of arrays." +2ff827ad7f2d HEAD@{995}: commit (amend): Re-land "[clang] Add early exit when checking for const init of arrays." +9ad5cbdb06d8 HEAD@{996}: revert: Re-land "[clang] Add early exit when checking for const init of arrays." +6f1a501fddae HEAD@{997}: checkout: moving from tmplargs to main +a7b31d694812 HEAD@{998}: commit (amend): [CodeCompletion] Signature help for template argument lists +2142ae80cf59 HEAD@{999}: commit (amend): [CodeCompletion] Signature help for template argument lists +4669c22c0e70 HEAD@{1000}: commit (amend): [CodeCompletion] Signature help for template argument lists +99217d405b2b HEAD@{1001}: commit (amend): [CodeCompletion] Signature help for template argument lists +86fa6ad9fb2b HEAD@{1002}: commit (amend): [CodeCompletion] Signature help for template argument lists +0a7d62a75abf HEAD@{1003}: commit (amend): [CodeCompletion] Signature help for template argument lists +8b0170fa11c1 HEAD@{1004}: commit: [CodeCompletion] Signature help for template argument lists +6f1a501fddae HEAD@{1005}: checkout: moving from main to tmplargs +6f1a501fddae HEAD@{1006}: reset: moving to HEAD +6f1a501fddae HEAD@{1007}: checkout: moving from aggregates to main +9cf82ca7e4ee HEAD@{1008}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. +c9f6b6b3f6a8 HEAD@{1009}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. +7b37b2f933bd HEAD@{1010}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. +e78f39a12189 HEAD@{1011}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. +985a3b182774 HEAD@{1012}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. +a7ab012a8ff1 HEAD@{1013}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. +84c5ef8d6646 HEAD@{1014}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. +7e2d55fea796 HEAD@{1015}: rebase (finish): returning to refs/heads/aggregates +7e2d55fea796 HEAD@{1016}: rebase (pick): [CodeCompletion] Signature help for aggregate initialization. +b245d1eaec2d HEAD@{1017}: rebase (start): checkout bracehelp +6175a4ae0cfc HEAD@{1018}: checkout: moving from bracehelp to aggregates +b245d1eaec2d HEAD@{1019}: commit (amend): [CodeCompletion] Signature help for braced constructor calls +f648b926a983 HEAD@{1020}: checkout: moving from aggregates to bracehelp +6175a4ae0cfc HEAD@{1021}: commit: [CodeCompletion] Signature help for aggregate initialization. +f648b926a983 HEAD@{1022}: checkout: moving from bracehelp to aggregates +f648b926a983 HEAD@{1023}: commit (amend): [CodeCompletion] Signature help for braced constructor calls +d830368b01ba HEAD@{1024}: commit (amend): [CodeCompletion] Signature help for braced constructor calls +3fe02e425768 HEAD@{1025}: commit (amend): [CodeCompletion] Signature help for braced constructor calls +3158a41d01e1 HEAD@{1026}: commit (amend): [CodeCompletion] Signature help for braced constructor calls +8e451de571e7 HEAD@{1027}: commit (amend): [CodeCompletion] Signature help for braced constructor calls +b35aa36a3e3f HEAD@{1028}: commit: [CodeCompletion] Signature help for braced constructor calls +6f1a501fddae HEAD@{1029}: checkout: moving from main to bracehelp +6f1a501fddae HEAD@{1030}: checkout: moving from completeinit to main +347a926ee355 (completeinit) HEAD@{1031}: commit (amend): [CodeCompletion] (mostly) fix completion in incomplete C++ ctor initializers. +9babb0590695 HEAD@{1032}: commit (amend): [CodeCompletion] (mostly) fix completion in incomplete C++ ctor initializers. +91e9b7b90b4f HEAD@{1033}: commit (amend): [CodeCompletion] (mostly) fix completion in incomplete C++ ctor initializers. +0e1023621e0f HEAD@{1034}: commit: [CodeCompletion] (mostly) fix completion in incomplete C++ ctor initializers. +6f1a501fddae HEAD@{1035}: checkout: moving from main to completeinit +6f1a501fddae HEAD@{1036}: checkout: moving from configcompiler to main +0fa6fc0238fe HEAD@{1037}: commit (amend): [clangd] Add CompileFlags.Compiler option to override argv0 +8205faff5871 HEAD@{1038}: commit (amend): [clangd] Add CompileFlags.Compiler option to override argv0 +27055788e902 HEAD@{1039}: commit (amend): [clangd] Add CompileFlags.Compiler option to override argv0 +f9bdd0229665 HEAD@{1040}: commit: [clangd] Add CompileFlags.Compiler option to override argv0 +6f1a501fddae HEAD@{1041}: checkout: moving from main to configcompiler +6f1a501fddae HEAD@{1042}: checkout: moving from manglefilename to main +b3f0e3eeccc0 (manglefilename) HEAD@{1043}: commit (amend): [clangd] Adjust compile flags so they work when applied to other file(type)s. +22ea16ea69e9 HEAD@{1044}: commit (amend): [clangd] Adjust compile flags so they work when applied to other file(type)s. +8325fd69d14a HEAD@{1045}: commit (amend): [clangd] Adjust compile flags so they work when applied to other file(type)s. +0b435ba816ae HEAD@{1046}: commit: [clangd] Adjust compile flags so they work when applied to other file(type)s. +6f1a501fddae HEAD@{1047}: checkout: moving from main to manglefilename +6f1a501fddae HEAD@{1048}: checkout: moving from tidydiags to main +53abaad295f4 HEAD@{1049}: commit (amend): [clangd] Respect .clang-tidy ExtraArgs (-Wfoo only) when producing diagnostics +8daae4149924 HEAD@{1050}: commit (amend): [clangd] Respect .clang-tidy ExtraArgs (-Wfoo only) when producing diagnostics +95f3d66f621b HEAD@{1051}: commit (amend): [clangd] Respect .clang-tidy ExtraArgs (-Wfoo only) when producing diagnostics +6e4e13e32e9a HEAD@{1052}: commit (amend): [clangd] Respect .clang-tidy ExtraArgs (-Wfoo only) when producing diagnostics +b80c98fe991c HEAD@{1053}: commit: [clangd] Respect .clang-tidy ExtraArgs (-Wfoo only) when producing diagnostics +6f1a501fddae HEAD@{1054}: checkout: moving from main to tidydiags +6f1a501fddae HEAD@{1055}: rebase (finish): returning to refs/heads/main +6f1a501fddae HEAD@{1056}: rebase (pick): [clangd] Fix typo in test. NFC +dfa2ad1ad858 HEAD@{1057}: rebase (start): checkout origin/main +e5cc3319d413 HEAD@{1058}: rebase (finish): returning to refs/heads/main +e5cc3319d413 HEAD@{1059}: rebase (pick): [clangd] Fix typo in test. NFC +e751d97863fb HEAD@{1060}: rebase (start): checkout origin/main +be44f91f4fca HEAD@{1061}: commit: [clangd] Fix typo in test. NFC +c2f2bb066b83 HEAD@{1062}: reset: moving to HEAD +c2f2bb066b83 HEAD@{1063}: rebase (finish): returning to refs/heads/main +c2f2bb066b83 HEAD@{1064}: rebase (start): checkout origin/main +62bcb75ce510 HEAD@{1065}: checkout: moving from usingtype to main +af27466c5039 (usingtype) HEAD@{1066}: commit (amend): Reland "[AST] Add UsingType: a sugar type for types found via UsingDecl" +bbc902a8436d HEAD@{1067}: revert: Reland "[AST] Add UsingType: a sugar type for types found via UsingDecl" +cc56c66f27e1 HEAD@{1068}: revert: Revert "[AST] Add UsingType: a sugar type for types found via UsingDecl" +565c17574dd0 HEAD@{1069}: rebase (finish): returning to refs/heads/usingtype +565c17574dd0 HEAD@{1070}: rebase (start): checkout origin/main +e1600db19d63 HEAD@{1071}: rebase (finish): returning to refs/heads/usingtype +e1600db19d63 HEAD@{1072}: rebase (pick): [AST] Add UsingType: a sugar type for types found via UsingDecl +eb66f0662ad9 HEAD@{1073}: rebase (start): checkout origin/main +e5706481005a HEAD@{1074}: commit (amend): [AST] Add UsingType: a sugar type for types found via UsingDecl +c11ab3c47b88 HEAD@{1075}: rebase (finish): returning to refs/heads/usingtype +c11ab3c47b88 HEAD@{1076}: rebase (pick): [AST] Add UsingType: a sugar type for types found via UsingDecl +9cd55c7c3463 HEAD@{1077}: rebase (start): checkout origin/main +77701d00dbf1 HEAD@{1078}: commit (amend): [AST] Add UsingType: a sugar type for types found via UsingDecl +484ad728d0b4 HEAD@{1079}: commit (amend): [AST] Add a sugar type for types found via UsingDecl +38567f18b381 HEAD@{1080}: commit (amend): [AST] Add a sugar type for types found via UsingDecl +73794c07c44c HEAD@{1081}: rebase (finish): returning to refs/heads/usingtype +73794c07c44c HEAD@{1082}: rebase (pick): [AST] Add a sugar type for types found via UsingDecl +02fc8d5c9eb0 HEAD@{1083}: rebase (start): checkout origin/main +528e4f3170f7 HEAD@{1084}: commit (amend): [AST] Add a sugar type for types found via UsingDecl +06aa0ecaf0ad HEAD@{1085}: commit (amend): [AST] Add a sugar type for types found via UsingDecl +eb52127d5587 HEAD@{1086}: commit (amend): [AST] Add a sugar type for types found via UsingDecl +383df0a0d6e6 HEAD@{1087}: commit (amend): [AST] Add a sugar type for types found via UsingDecl +c4f8be2c2d68 HEAD@{1088}: rebase (continue) (finish): returning to refs/heads/usingtype +c4f8be2c2d68 HEAD@{1089}: rebase (continue): [AST] Add a sugar type for types found via UsingDecl +a596a5fc128b HEAD@{1090}: rebase (start): checkout origin/main +25184d506c43 HEAD@{1091}: checkout: moving from main to usingtype +62bcb75ce510 HEAD@{1092}: commit: [AST] Add more testcases to QualTypeNamesTest. NFC +32dede65ae98 HEAD@{1093}: rebase (finish): returning to refs/heads/main +32dede65ae98 HEAD@{1094}: rebase (pick): [AST] Fix QualTypeNamesTest, which was spuriously passing +509153f1e7d1 HEAD@{1095}: rebase (start): checkout origin/main +8b9423dcec0a HEAD@{1096}: commit: [AST] Fix QualTypeNamesTest, which was spuriously passing +ebed0ca71561 HEAD@{1097}: rebase (finish): returning to refs/heads/main +ebed0ca71561 HEAD@{1098}: rebase (start): checkout origin/main +6fef0ffa14a3 HEAD@{1099}: checkout: moving from usingtype to main +25184d506c43 HEAD@{1100}: commit (amend): [AST] Add a sugar type for types found via UsingDecl +63d52ad6d61f HEAD@{1101}: commit (amend): [AST] Add a sugar type for types found via UsingDecl +c0adf4433852 HEAD@{1102}: checkout: moving from origin to usingtype +8491272d5f8b (origin) HEAD@{1103}: commit (amend): [clangd] Extend SymbolOrigin, stop serializing it +58f8efe72279 HEAD@{1104}: rebase (finish): returning to refs/heads/origin +58f8efe72279 HEAD@{1105}: rebase (pick): [clangd] Extend SymbolOrigin, stop serializing it +e7007b69d43b (fixx) HEAD@{1106}: rebase (start): checkout origin/main +ddcc1d2c88de HEAD@{1107}: checkout: moving from fixx to origin +e7007b69d43b (fixx) HEAD@{1108}: rebase (finish): returning to refs/heads/fixx +e7007b69d43b (fixx) HEAD@{1109}: rebase (pick): [Sema] Add FixIt when a C++ out-of-line method has extra/missing const +54fc9eb9b313 HEAD@{1110}: rebase (start): checkout origin/main +563ef9895a46 HEAD@{1111}: commit (amend): [Sema] Add FixIt when a C++ out-of-line method has extra/missing const +eb9db3287358 HEAD@{1112}: rebase (finish): returning to refs/heads/fixx +eb9db3287358 HEAD@{1113}: rebase (pick): [Sema] Add FixIt when a C++ out-of-line method has extra/missing const +529833377ccd (block) HEAD@{1114}: rebase (start): checkout origin/main +9344dda72035 HEAD@{1115}: checkout: moving from block to fixx +529833377ccd (block) HEAD@{1116}: rebase (finish): returning to refs/heads/block +529833377ccd (block) HEAD@{1117}: rebase (pick): [clangd] Disable support for clang-tidy suppression blocks (NOLINTBEGIN) +a908ca6603ab HEAD@{1118}: rebase (start): checkout origin/main +e65ea60537a7 HEAD@{1119}: checkout: moving from asyncindex to block +747908384732 (asyncindex) HEAD@{1120}: rebase (continue) (finish): returning to refs/heads/asyncindex +747908384732 (asyncindex) HEAD@{1121}: rebase (continue): [clangd] Proof of concept: indexing after the preamble is built +a5927737daeb HEAD@{1122}: rebase (start): checkout origin/main +3f8dfb604b16 HEAD@{1123}: checkout: moving from shared to asyncindex +6917f87b3c7c (shared) HEAD@{1124}: rebase (finish): returning to refs/heads/shared +6917f87b3c7c (shared) HEAD@{1125}: rebase (pick): [clangd] Cleanup unneeded use of shared_ptr. NFC +4299d8d0ce42 HEAD@{1126}: rebase (start): checkout origin/main +998c40e04bec HEAD@{1127}: commit: [clangd] Cleanup unneeded use of shared_ptr. NFC +6fef0ffa14a3 HEAD@{1128}: checkout: moving from main to shared +6fef0ffa14a3 HEAD@{1129}: checkout: moving from asyncindex to main +3f8dfb604b16 HEAD@{1130}: commit (amend): [clangd] Proof of concept: indexing after the preamble is built +69244a114c0c HEAD@{1131}: commit (amend): [clangd] Proof of concept: indexing after the preamble is built +e0ed01382993 HEAD@{1132}: commit: [clangd] Proof of concept: indexing after the preamble is built +6fef0ffa14a3 HEAD@{1133}: checkout: moving from main to asyncindex +6fef0ffa14a3 HEAD@{1134}: reset: moving to HEAD +6fef0ffa14a3 HEAD@{1135}: reset: moving to HEAD +6fef0ffa14a3 HEAD@{1136}: reset: moving to HEAD +6fef0ffa14a3 HEAD@{1137}: reset: moving to HEAD +6fef0ffa14a3 HEAD@{1138}: checkout: moving from main to main +6fef0ffa14a3 HEAD@{1139}: rebase (finish): returning to refs/heads/main +6fef0ffa14a3 HEAD@{1140}: rebase (start): checkout origin/main +26f6fbe2be1d HEAD@{1141}: checkout: moving from ccedit to main +782052f2decf (fork/ccedit, ccedit) HEAD@{1142}: commit: [clangd] Prototype: code action to edit compile commands +26f6fbe2be1d HEAD@{1143}: checkout: moving from main to ccedit +26f6fbe2be1d HEAD@{1144}: reset: moving to origin/main +ac431fc2cdf1 (incomplete) HEAD@{1145}: reset: moving to origin/main +c797aa934727 HEAD@{1146}: revert: Revert "Revert "[Symbolizer][Debuginfo] Add debuginfod client to llvm-symbolizer."" +afa3c14e2ff9 HEAD@{1147}: checkout: moving from block to main +e65ea60537a7 HEAD@{1148}: commit (amend): [clangd] Disable support for clang-tidy suppression blocks (NOLINTBEGIN) +c416e5d69d7e HEAD@{1149}: commit (amend): [clangd] Disable support for clang-tidy suppression blocks (NOLINTBEGIN) +2c1e87eae0e2 HEAD@{1150}: commit: [clangd] Disable support for clang-tidy suppression blocks (NOLINTBEGIN) +afa3c14e2ff9 HEAD@{1151}: checkout: moving from main to block +afa3c14e2ff9 HEAD@{1152}: checkout: moving from fixx to main +9344dda72035 HEAD@{1153}: commit (amend): [Sema] Add FixIt when a C++ out-of-line method has extra/missing const +fb15c379c1f0 HEAD@{1154}: commit (amend): [Sema] Add FixIt when a C++ out-of-line method has extra/missing const +be240d2b0505 HEAD@{1155}: commit: [Sema] Add FixIt when a C++ out-of-line method has extra/missing const +ac431fc2cdf1 (incomplete) HEAD@{1156}: checkout: moving from incomplete to fixx +ac431fc2cdf1 (incomplete) HEAD@{1157}: rebase (finish): returning to refs/heads/incomplete +ac431fc2cdf1 (incomplete) HEAD@{1158}: rebase (pick): [clangd] ... and mark a new test as -fno-ms-compatibility too +30fc88bf1dc1 HEAD@{1159}: rebase (start): checkout origin/main +d3aa8d688374 HEAD@{1160}: commit (amend): [clangd] ... and mark a new test as -fno-ms-compatibility too +03d0b9092b60 HEAD@{1161}: commit: [clangd] ... and mark a new test as -fno-ms-compatibility too +1a68c14b577f HEAD@{1162}: reset: moving to HEAD +1a68c14b577f HEAD@{1163}: rebase (finish): returning to refs/heads/incomplete +1a68c14b577f HEAD@{1164}: rebase (pick): [clangd] Restore -fno-ms-compatibility to tests +8d897ec91528 HEAD@{1165}: rebase (start): checkout origin/main +ac5910467704 HEAD@{1166}: commit: [clangd] Restore -fno-ms-compatibility to tests +c25ea488a39a HEAD@{1167}: reset: moving to HEAD +c25ea488a39a HEAD@{1168}: rebase (finish): returning to refs/heads/incomplete +c25ea488a39a HEAD@{1169}: rebase (pick): [clangd] Include-fixer: handle more "incomplete type" diags. +a55e51f9a64c HEAD@{1170}: rebase (start): checkout origin/main +11a2f06c37cc HEAD@{1171}: commit (amend): [clangd] Include-fixer: handle more "incomplete type" diags. +8182fffc0500 HEAD@{1172}: rebase (continue) (finish): returning to refs/heads/incomplete +8182fffc0500 HEAD@{1173}: rebase (continue): [clangd] Include-fixer: handle more "incomplete type" diags. +86caf517bf05 HEAD@{1174}: rebase (start): checkout origin/main +0958968acbe0 HEAD@{1175}: checkout: moving from incompletenfc to incomplete +a8bf389f4146 (incompletenfc) HEAD@{1176}: rebase (finish): returning to refs/heads/incompletenfc +a8bf389f4146 (incompletenfc) HEAD@{1177}: rebase (pick): [clangd] Clean up some include-fixer tests. NFC +3ed47bcc9618 HEAD@{1178}: rebase (start): checkout origin/main +76820d557062 HEAD@{1179}: commit (amend): [clangd] Clean up some include-fixer tests. NFC +c28420e6737b HEAD@{1180}: commit (amend): [clangd] Clean up some include-fixer tests. NFC +b48226a052b2 HEAD@{1181}: commit (amend): [clangd] Clean up some include-fixer tests. NFC +0958968acbe0 HEAD@{1182}: checkout: moving from incomplete to incompletenfc +0958968acbe0 HEAD@{1183}: checkout: moving from main to incomplete +afa3c14e2ff9 HEAD@{1184}: checkout: moving from indeximplicit to main +0d64c65efac9 (indeximplicit) HEAD@{1185}: cherry-pick: [clangd] Indexing of standard library +ee26e0ba082e (implicitc) HEAD@{1186}: checkout: moving from implicitc to indeximplicit +ee26e0ba082e (implicitc) HEAD@{1187}: commit (amend): [clangd] Include fixer for missing functions in C +9ac5d003594e HEAD@{1188}: commit (amend): [clangd] Include fixer for missing functions in C +3b4429acb859 HEAD@{1189}: commit (amend): [clangd] Include fixer for missing functions in C +1a75bc322127 HEAD@{1190}: commit (amend): [clangd] Include fixer for missing functions in C +94ab31f3c7a8 HEAD@{1191}: commit (amend): [clangd] Include fixer for missing functions in C +86494fa881eb HEAD@{1192}: commit: [clangd] Include fixer for missing functions in C +afa3c14e2ff9 HEAD@{1193}: checkout: moving from main to implicitc +afa3c14e2ff9 HEAD@{1194}: rebase (finish): returning to refs/heads/main +afa3c14e2ff9 HEAD@{1195}: rebase (start): checkout origin/main +d4865393b5da HEAD@{1196}: checkout: moving from incomplete to main +0958968acbe0 HEAD@{1197}: commit (amend): [clangd] Include-fixer: handle more "incomplete type" diags, clean up tests +aa89c6b2a300 HEAD@{1198}: commit (amend): [clangd] Include-fixer: handle more "incomplete type" diags, clean up tests +153236d44e9a HEAD@{1199}: commit (amend): [clangd] Include-fixer: handle more "incomplete type" diags, clean up tests +3f0f560caf3a HEAD@{1200}: commit: [clangd] Include-fixer: handle more "incomplete type" diags, clean up tests +d4865393b5da HEAD@{1201}: checkout: moving from main to incomplete +d4865393b5da HEAD@{1202}: reset: moving to HEAD +d4865393b5da HEAD@{1203}: rebase (finish): returning to refs/heads/main +d4865393b5da HEAD@{1204}: rebase (start): checkout origin/main +e7f53ec78fe8 HEAD@{1205}: checkout: moving from tblgen to main +7ef23188fe95 (tblgen) HEAD@{1206}: commit (amend): [clangd] Generate ConfigFragment/YAML/docs from one tablegen source +6bdf61f016e3 HEAD@{1207}: commit (amend): [clangd] Generate ConfigFragment/YAML/docs from one tablegen source +e249c35c3fb4 HEAD@{1208}: commit (amend): [clangd] Generate ConfigFragment/YAML/docs from one tablegen source +fcf5c9f5bf33 HEAD@{1209}: commit (amend): [clangd] Generate ConfigFragment/YAML/docs from one tablegen source +7b3888a32700 HEAD@{1210}: rebase (continue) (finish): returning to refs/heads/tblgen +7b3888a32700 HEAD@{1211}: rebase (continue): [clangd] Generate ConfigFragment/YAML/docs from one tablegen source +4afae6f7c7f6 HEAD@{1212}: rebase (start): checkout origin/main +34b10022310a HEAD@{1213}: commit: [clangd] Generate ConfigFragment/YAML/docs from one tablegen source +e7f53ec78fe8 HEAD@{1214}: checkout: moving from main to tblgen +e7f53ec78fe8 HEAD@{1215}: checkout: moving from two to main +c56122daac76 HEAD@{1216}: reset: moving to HEAD +c56122daac76 HEAD@{1217}: commit (amend): [Parse] Use empty RecoveryExpr when if/while/do/switch conditions fail to parse +2409b3d46f6c HEAD@{1218}: rebase (finish): returning to refs/heads/two +2409b3d46f6c HEAD@{1219}: rebase (pick): [Parse] Use empty RecoveryExpr when if/while/do/switch conditions fail to parse +2676759bf22e (morefix) HEAD@{1220}: rebase (start): checkout origin/main +ad885f5a3eab (arcpatch-D112996) HEAD@{1221}: checkout: moving from morefix to two +2676759bf22e (morefix) HEAD@{1222}: rebase (finish): returning to refs/heads/morefix +2676759bf22e (morefix) HEAD@{1223}: rebase (pick): [clangd] Add fixes for clang "include " diagnostics +b73cf6207efa HEAD@{1224}: rebase (start): checkout origin/main +da7ff2db120f HEAD@{1225}: rebase (finish): returning to refs/heads/morefix +da7ff2db120f HEAD@{1226}: rebase (pick): [clangd] Add fixes for clang "include " diagnostics +77b2bb55671a HEAD@{1227}: rebase (start): checkout origin/main +8bf667957ed0 HEAD@{1228}: commit (amend): [clangd] Add fixes for clang "include " diagnostics +56f023ff10d2 HEAD@{1229}: commit (amend): [clangd] Add fixes for clang "include " diagnostics +805bac439319 HEAD@{1230}: checkout: moving from origin to morefix +ddcc1d2c88de HEAD@{1231}: commit (amend): [clangd] Extend SymbolOrigin, stop serializing it +e4568ef854df HEAD@{1232}: commit (amend): [clangd] Extend SymbolOrigin, stop serializing it +9099df1707fe HEAD@{1233}: checkout: moving from stdlib to origin +cdfb640fe9e8 (stdlib) HEAD@{1234}: commit (amend): [clangd] Indexing of standard library +5c14772f82eb HEAD@{1235}: commit (amend): [clangd] Indexing of standard library +9bcdbb99a75b HEAD@{1236}: commit (amend): [clangd] WIP various stdlib indexing stuff +3e38a40b3f17 HEAD@{1237}: commit (amend): [clangd] WIP various stdlib indexing stuff +4ac5a41a65fc HEAD@{1238}: rebase (finish): returning to refs/heads/stdlib +4ac5a41a65fc HEAD@{1239}: rebase (pick): [clangd] WIP various stdlib indexing stuff +e1b9d805325b HEAD@{1240}: rebase (start): checkout origin/main +5330f525f264 (arcpatch-D105177) HEAD@{1241}: checkout: moving from arcpatch-D105177 to stdlib +5330f525f264 (arcpatch-D105177) HEAD@{1242}: checkout: moving from reserved to arcpatch-D105177 +18cd067d0bfa (reserved) HEAD@{1243}: commit (amend): [clangd] Don't index __reserved_names in headers. +06dd586e7297 HEAD@{1244}: commit (amend): [clangd] Don't index __reserved_names in headers. +e58aab51c464 HEAD@{1245}: commit (amend): [clangd] Don't index __reserved_names in headers. +05a7bfb157fc HEAD@{1246}: commit: [clangd] Don't index __reserved_names in headers. +e7f53ec78fe8 HEAD@{1247}: checkout: moving from main to reserved +e7f53ec78fe8 HEAD@{1248}: checkout: moving from origin to main +9099df1707fe HEAD@{1249}: commit (amend): [clangd] Extend SymbolOrigin, stop serializing it +1557821a2bd2 HEAD@{1250}: commit (amend): [clangd] Extend SymbolOrigin, stop serializing it +8c3bd3cc7478 HEAD@{1251}: commit (amend): [clangd] Extend SymbolOrigin, stop serializing it +cb761c799928 HEAD@{1252}: commit: [clangd] Extend SymbolOrigin, stop serializing it +e7f53ec78fe8 HEAD@{1253}: checkout: moving from main to origin +e7f53ec78fe8 HEAD@{1254}: rebase (finish): returning to refs/heads/main +e7f53ec78fe8 HEAD@{1255}: rebase (start): checkout origin/main +afc9e7517ada HEAD@{1256}: checkout: moving from arcpatch-D105177 to main +5330f525f264 (arcpatch-D105177) HEAD@{1257}: commit (amend): [clangd] WIP various stdlib indexing stuff +4c58226488ee HEAD@{1258}: commit (amend): [clangd] WIP various stdlib indexing stuff +ffbc79cbcc54 HEAD@{1259}: commit (amend): [clangd] WIP various stdlib indexing stuff +5d5179621ede HEAD@{1260}: checkout: moving from main to arcpatch-D105177 +afc9e7517ada HEAD@{1261}: rebase (finish): returning to refs/heads/main +afc9e7517ada HEAD@{1262}: rebase (start): checkout origin/main +f764a1a5bd7c HEAD@{1263}: checkout: moving from arcpatch-D105177 to main +5d5179621ede HEAD@{1264}: reset: moving to HEAD +5d5179621ede HEAD@{1265}: rebase (finish): returning to refs/heads/arcpatch-D105177 +5d5179621ede HEAD@{1266}: rebase (pick): [clangd] Implemented indexing of standard library +25c7ec4fc622 HEAD@{1267}: rebase (start): checkout origin/main +7f2bbbd16a82 HEAD@{1268}: commit: [clangd] Implemented indexing of standard library +15acaad79d6e HEAD@{1269}: checkout: moving from main to arcpatch-D105177 +f764a1a5bd7c HEAD@{1270}: checkout: moving from morefix to main +805bac439319 HEAD@{1271}: commit (amend): [clangd] Add fixes for clang "include " diagnostics +c74d8a0e6f33 HEAD@{1272}: commit (amend): [clangd] Add fixes for clang "include " diagnostics +86d15e9770ca HEAD@{1273}: commit (amend): [clangd] Add fixes for clang "include " diagnostics +a46d34a114b3 HEAD@{1274}: commit: [clangd] Add fixes for clang "include " diagnostics +f764a1a5bd7c HEAD@{1275}: checkout: moving from main to morefix +f764a1a5bd7c HEAD@{1276}: checkout: moving from usingtype to main +c0adf4433852 HEAD@{1277}: commit (amend): [AST] Add a sugar type for types found via UsingDecl +661fde2dfe7c HEAD@{1278}: commit (amend): [AST] Add a sugar type for types found via UsingDecl +f38cd8c69f6d HEAD@{1279}: commit (amend): [AST] Add a sugar type for types found via UsingDecl +4b8286a14790 HEAD@{1280}: commit (amend): [AST] Add a sugar type for types found via UsingDecl +480e5803b30f HEAD@{1281}: commit (amend): [AST] Add a sugar type for types found via UsingDecl +06cc1d22bf04 HEAD@{1282}: rebase (finish): returning to refs/heads/usingtype +06cc1d22bf04 HEAD@{1283}: rebase (pick): [AST] Add a sugar type for types found via UsingDecl +c133fb321f7c HEAD@{1284}: rebase (start): checkout origin/main +8545d9204be1 HEAD@{1285}: rebase (abort): updating HEAD +8545d9204be1 HEAD@{1286}: rebase (abort): updating HEAD +8545d9204be1 HEAD@{1287}: checkout: moving from main to usingtype +f764a1a5bd7c HEAD@{1288}: rebase (finish): returning to refs/heads/main +f764a1a5bd7c HEAD@{1289}: rebase (pick): [clangd] Avoid possible crash: apply configuration after binding methods +a6f53afbcb4d HEAD@{1290}: rebase (finish): returning to refs/heads/main +a6f53afbcb4d HEAD@{1291}: rebase (start): checkout origin/main +5fedbd5b1815 HEAD@{1292}: checkout: moving from main to usingtype +5fedbd5b1815 HEAD@{1293}: checkout: moving from token to main +3878ad5e448c (token) HEAD@{1294}: commit: xxx token +5fedbd5b1815 HEAD@{1295}: checkout: moving from main to token +5fedbd5b1815 HEAD@{1296}: rebase (finish): returning to refs/heads/main +5fedbd5b1815 HEAD@{1297}: rebase (start): checkout origin/main +e56d680fe870 HEAD@{1298}: checkout: moving from iwyustdlib to main +e56d680fe870 HEAD@{1299}: checkout: moving from main to iwyustdlib +e56d680fe870 HEAD@{1300}: rebase (finish): returning to refs/heads/main +e56d680fe870 HEAD@{1301}: rebase (start): checkout origin/main +4fb62e138398 HEAD@{1302}: checkout: moving from placeholders to main +8ac9d2ae5839 (placeholders) HEAD@{1303}: rebase (finish): returning to refs/heads/placeholders +8ac9d2ae5839 (placeholders) HEAD@{1304}: rebase (pick): [clangd] Fix function-arg-placeholder suppression with macros. +ebda5e1e521f HEAD@{1305}: checkout: moving from main to placeholders +ebda5e1e521f HEAD@{1306}: rebase (finish): returning to refs/heads/main +ebda5e1e521f HEAD@{1307}: rebase (start): checkout origin/main +48b67dca2ccc HEAD@{1308}: checkout: moving from two to main +ad885f5a3eab (arcpatch-D112996) HEAD@{1309}: checkout: moving from arcpatch-D112996 to two +63667c1896e1 HEAD@{1310}: rebase (finish): returning to refs/heads/arcpatch-D112996 +63667c1896e1 HEAD@{1311}: rebase (pick): [clangd] Trace per-token time in clangd --check +f7500a4ef7bd HEAD@{1312}: rebase (pick): [CodeCompletion] Generally consider header files without extension +5fbcf677347e HEAD@{1313}: checkout: moving from main to arcpatch-D112996 +48b67dca2ccc HEAD@{1314}: rebase (finish): returning to refs/heads/main +48b67dca2ccc HEAD@{1315}: rebase (start): checkout origin/main +627fa0b9a897 HEAD@{1316}: reset: moving to HEAD +627fa0b9a897 HEAD@{1317}: checkout: moving from enum to main +5880c835bdbe (enum) HEAD@{1318}: reset: moving to HEAD +5880c835bdbe (enum) HEAD@{1319}: reset: moving to HEAD +5880c835bdbe (enum) HEAD@{1320}: rebase (finish): returning to refs/heads/enum +5880c835bdbe (enum) HEAD@{1321}: rebase (pick): [Sema] Avoid crash in CheckEnumConstant with contains-error expressions +6a5e08cc4a5c (redecl) HEAD@{1322}: rebase (finish): returning to refs/heads/redecl +6a5e08cc4a5c (redecl) HEAD@{1323}: rebase (pick): [AST] injected-class-name is not a redecl, even in template specializations +627fa0b9a897 HEAD@{1324}: checkout: moving from main to redecl +627fa0b9a897 HEAD@{1325}: rebase (finish): returning to refs/heads/main +627fa0b9a897 HEAD@{1326}: rebase (start): checkout origin/main +f06e33298266 HEAD@{1327}: rebase (abort): updating HEAD +f06e33298266 HEAD@{1328}: rebase (abort): updating HEAD +f06e33298266 HEAD@{1329}: checkout: moving from specialfiles to main +73453e7adecb (specialfiles) HEAD@{1330}: rebase (finish): returning to refs/heads/specialfiles +73453e7adecb (specialfiles) HEAD@{1331}: rebase (pick): [clangd] Avoid expensive checks of buffer names in IncludeCleaner +de7494a33a5c (constcrash) HEAD@{1332}: rebase (finish): returning to refs/heads/constcrash +de7494a33a5c (constcrash) HEAD@{1333}: rebase (pick): [AST] fail rather than crash when const evaluating invalid c++ foreach +f06e33298266 HEAD@{1334}: checkout: moving from main to specialfiles +f06e33298266 HEAD@{1335}: rebase (finish): returning to refs/heads/main +f06e33298266 HEAD@{1336}: rebase (start): checkout origin/main +9cc08cb02fdc (crashtest) HEAD@{1337}: checkout: moving from crashtest to constcrash +9cc08cb02fdc (crashtest) HEAD@{1338}: rebase (finish): returning to refs/heads/crashtest +9cc08cb02fdc (crashtest) HEAD@{1339}: rebase (pick): [clangd] Add integration test for crash handling +51be7061d025 HEAD@{1340}: reset: moving to HEAD +51be7061d025 HEAD@{1341}: checkout: moving from main to crashtest +51be7061d025 HEAD@{1342}: commit: [clangd] Remove tricky integration test that flakes/fails on some platforms. +4373f3595f8e HEAD@{1343}: rebase (finish): returning to refs/heads/main +4373f3595f8e HEAD@{1344}: rebase (start): checkout origin/main +045695f85cb8 (arcpatch-D109506_1) HEAD@{1345}: checkout: moving from timer to main +aa1ac2ae451e (flush) HEAD@{1346}: checkout: moving from flush to timer +aa1ac2ae451e (flush) HEAD@{1347}: rebase (finish): returning to refs/heads/flush +aa1ac2ae451e (flush) HEAD@{1348}: rebase (pick): [clangd] Flush stderr after signal handlers run, so we always get the full stack/crash info +045695f85cb8 (arcpatch-D109506_1) HEAD@{1349}: checkout: moving from main to flush +045695f85cb8 (arcpatch-D109506_1) HEAD@{1350}: rebase (finish): returning to refs/heads/main +045695f85cb8 (arcpatch-D109506_1) HEAD@{1351}: rebase (start): checkout origin/main +4e91035387fa HEAD@{1352}: checkout: moving from arcpatch-D109506_1 to main +045695f85cb8 (arcpatch-D109506_1) HEAD@{1353}: rebase (finish): returning to refs/heads/arcpatch-D109506_1 +045695f85cb8 (arcpatch-D109506_1) HEAD@{1354}: rebase (pick): [clangd] Print current request context along with the stack trace +980c7f32490b HEAD@{1355}: checkout: moving from arcpatch-D111318 to arcpatch-D109506_1 +a85b661d2ada (arcpatch-D111318) HEAD@{1356}: rebase (finish): returning to refs/heads/arcpatch-D111318 +a85b661d2ada (arcpatch-D111318) HEAD@{1357}: rebase (pick): [clang][clangd] Improve signature help for variadic functions. +3964c1db915b HEAD@{1358}: checkout: moving from main to arcpatch-D111318 +4e91035387fa HEAD@{1359}: rebase (finish): returning to refs/heads/main +4e91035387fa HEAD@{1360}: rebase (pick): [Support] Trim #include after b06df22 +93c1b3caf052 HEAD@{1361}: reset: moving to HEAD +93c1b3caf052 HEAD@{1362}: rebase (finish): returning to refs/heads/main +93c1b3caf052 HEAD@{1363}: rebase (start): checkout origin/main +c15bbdeafffb HEAD@{1364}: checkout: moving from arcpatch-D110825 to main +82fbd3412fec (arcpatch-D110825) HEAD@{1365}: commit: [clangd] Handle members of anon structs in SelectionTree +68e56bd320d7 HEAD@{1366}: checkout: moving from main to arcpatch-D110825 +c15bbdeafffb HEAD@{1367}: rebase (finish): returning to refs/heads/main +c15bbdeafffb HEAD@{1368}: rebase (start): checkout origin/main +bb9333c3504a HEAD@{1369}: checkout: moving from uid to main +22555bafe90d (uid) HEAD@{1370}: rebase (finish): returning to refs/heads/uid +22555bafe90d (uid) HEAD@{1371}: rebase (pick): [VFS] InMemoryFilesystem's UniqueIDs are a function of path and content. +722e705f72dd (arcpatch-D110324) HEAD@{1372}: checkout: moving from arcpatch-D110324 to uid +722e705f72dd (arcpatch-D110324) HEAD@{1373}: rebase (finish): returning to refs/heads/arcpatch-D110324 +722e705f72dd (arcpatch-D110324) HEAD@{1374}: rebase (start): checkout origin/main +eb209c13cce9 HEAD@{1375}: rebase (finish): returning to refs/heads/arcpatch-D110324 +eb209c13cce9 HEAD@{1376}: rebase (pick): clangd: Do not report inline overrides twice +5685eb950da7 HEAD@{1377}: checkout: moving from main to arcpatch-D110324 +bb9333c3504a HEAD@{1378}: rebase (finish): returning to refs/heads/main +bb9333c3504a HEAD@{1379}: rebase (start): checkout origin/main +61cc873a8ef1 HEAD@{1380}: checkout: moving from arcpatch-D109506 to main From 875dd75e5ab45fc48d12cdbad4fc0aa228abe6b5 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Sun, 2 Jan 2022 01:26:43 +0100 Subject: [PATCH 528/992] [libc++][NFC] Use _LIBCPP_DEBUG_ASSERT in <__hash_table> Use `_LIBCPP_DEBUG_ASSERT` in `<__hash_table>` Reviewed By: Quuxplusone, ldionne, Mordante, #libc Spies: libcxx-commits Differential Revision: https://reviews.llvm.org/D116486 --- libcxx/include/__hash_table | 40 ++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table index 126e1884a664..ba5027992512 100644 --- a/libcxx/include/__hash_table +++ b/libcxx/include/__hash_table @@ -2029,11 +2029,9 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi( const_iterator __p, __node_pointer __cp) { -#if _LIBCPP_DEBUG_LEVEL == 2 - _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, - "unordered container::emplace_hint(const_iterator, args...) called with an iterator not" - " referring to this unordered container"); -#endif + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + "unordered container::emplace_hint(const_iterator, args...) called with an iterator not" + " referring to this unordered container"); if (__p != end() && key_eq()(*__p, __cp->__value_)) { __next_pointer __np = __p.__node_; @@ -2158,11 +2156,9 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_hint_multi( const_iterator __p, _Args&&... __args) { -#if _LIBCPP_DEBUG_LEVEL == 2 - _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, - "unordered container::emplace_hint(const_iterator, args...) called with an iterator not" - " referring to this unordered container"); -#endif + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + "unordered container::emplace_hint(const_iterator, args...) called with an iterator not" + " referring to this unordered container"); __node_holder __h = __construct_node(_VSTD::forward<_Args>(__args)...); iterator __r = __node_insert_multi(__p, __h.get()); __h.release(); @@ -2484,12 +2480,12 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __p) { __next_pointer __np = __p.__node_; + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + "unordered container erase(iterator) called with an iterator not" + " referring to this container"); + _LIBCPP_DEBUG_ASSERT(__p != end(), + "unordered container erase(iterator) called with a non-dereferenceable iterator"); #if _LIBCPP_DEBUG_LEVEL == 2 - _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, - "unordered container erase(iterator) called with an iterator not" - " referring to this container"); - _LIBCPP_ASSERT(__p != end(), - "unordered container erase(iterator) called with a non-dereferenceable iterator"); iterator __r(__np, this); #else iterator __r(__np); @@ -2504,14 +2500,12 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __first, const_iterator __last) { -#if _LIBCPP_DEBUG_LEVEL == 2 - _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__first) == this, - "unordered container::erase(iterator, iterator) called with an iterator not" - " referring to this container"); - _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__last) == this, - "unordered container::erase(iterator, iterator) called with an iterator not" - " referring to this container"); -#endif + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__first) == this, + "unordered container::erase(iterator, iterator) called with an iterator not" + " referring to this container"); + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__last) == this, + "unordered container::erase(iterator, iterator) called with an iterator not" + " referring to this container"); for (const_iterator __p = __first; __first != __last; __p = __first) { ++__first; From 25448826dd4c1965be08fa38cc02de9551084262 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 4 Jan 2022 16:00:24 +0100 Subject: [PATCH 529/992] [InstSimplify] Update test to make miscompile more obvious (NFC) This is now testing (null + g3) != g3 and still coming up with "true" as the answer. The original case was a less obvious miscompile with index overflow involved. --- .../Transforms/InstSimplify/ConstProp/icmp-global.ll | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll index 99f1a16c5427..cdf6be0e5243 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll @@ -66,6 +66,7 @@ define i1 @ult_constexpr_constexpr_one(i8* %x) { @g = global [2 x i32] [i32 1, i32 2] @g2 = global i32 0 @g2_weak = extern_weak global i32 +@g3 = global i8 0 define i1 @global_ne_null() { ; CHECK-LABEL: @global_ne_null( @@ -169,8 +170,8 @@ define i1 @null_gep_ne_global() { ; CHECK-LABEL: @null_gep_ne_global( ; CHECK-NEXT: ret i1 true ; - %gep = getelementptr [2 x i32], [2 x i32]* null, i64 ptrtoint (i32* @g2 to i64) - %cmp = icmp ne [2 x i32]* %gep, @g + %gep = getelementptr i8, i8* null, i64 ptrtoint (i8* @g3 to i64) + %cmp = icmp ne i8* %gep, @g3 ret i1 %cmp } @@ -178,8 +179,8 @@ define i1 @null_gep_ult_global() { ; CHECK-LABEL: @null_gep_ult_global( ; CHECK-NEXT: ret i1 true ; - %gep = getelementptr [2 x i32], [2 x i32]* null, i64 ptrtoint (i32* @g2 to i64) - %cmp = icmp ult [2 x i32]* %gep, @g + %gep = getelementptr i8, i8* null, i64 ptrtoint (i8* @g3 to i64) + %cmp = icmp ult i8* %gep, @g3 ret i1 %cmp } From 6c031780aa2c08996d1351de5e7541c75d6645c5 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 4 Jan 2022 16:04:25 +0100 Subject: [PATCH 530/992] [ConstantFold] Remove another incorrect icmp of gep fold This folded (null + X) == g to false, but of course this is incorrect if X == g. Possibly this got confused with the null == g case, which is already handled elsewhere. --- llvm/lib/IR/ConstantFold.cpp | 7 +------ llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll | 4 ++-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 66e3a75833e2..0d76dd732d61 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1541,12 +1541,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, return ICmpInst::ICMP_UGT; } } else if (const GlobalValue *GV2 = dyn_cast(V2)) { - if (isa(CE1Op0)) { - // If its not weak linkage, the GVal must have a non-zero address - // so the result is less-than - if (!GV2->hasExternalWeakLinkage()) - return ICmpInst::ICMP_ULT; - } else if (const GlobalValue *GV = dyn_cast(CE1Op0)) { + if (const GlobalValue *GV = dyn_cast(CE1Op0)) { if (GV != GV2) { if (CE1GEP->hasAllZeroIndices()) return areGlobalsPotentiallyEqual(GV, GV2); diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll index cdf6be0e5243..aeb6ab4e504f 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll @@ -168,7 +168,7 @@ define i1 @null_gep_ugt_null_constant_int() { define i1 @null_gep_ne_global() { ; CHECK-LABEL: @null_gep_ne_global( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: ret i1 icmp ne (i8* getelementptr (i8, i8* null, i64 ptrtoint (i8* @g3 to i64)), i8* @g3) ; %gep = getelementptr i8, i8* null, i64 ptrtoint (i8* @g3 to i64) %cmp = icmp ne i8* %gep, @g3 @@ -177,7 +177,7 @@ define i1 @null_gep_ne_global() { define i1 @null_gep_ult_global() { ; CHECK-LABEL: @null_gep_ult_global( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: ret i1 icmp ult (i8* getelementptr (i8, i8* null, i64 ptrtoint (i8* @g3 to i64)), i8* @g3) ; %gep = getelementptr i8, i8* null, i64 ptrtoint (i8* @g3 to i64) %cmp = icmp ult i8* %gep, @g3 From 0683a1e588ade04bba2572e5ab6cf1361ed392d4 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 22 Dec 2021 16:57:59 +0100 Subject: [PATCH 531/992] [clangd] Adjust compile flags so they work when applied to other file(type)s. It's reasonable to want to use the command from one file to compile another. In particular, the command from a translation unit to parse a related header: {"file": "foo.h", "command": "clang foo.cpp"} This is largely what InterpolatingCompilationDatabase tries to do. To do this correctly can require nontrivial changes to the argv, because the file extension affects semantics. e.g. here we must add "-x c++header". When external tools compile commands for different files, we should apply the same adjustments. This is better than telling people to "fix their tools": - simple e.g. python scripts shouldn't have to interpret clang argv - this is a good way to represent the intent "parse header X in the context of file Y", which can work even if X is not self-contained. clangd does not support this today, but some other tools do, and we may one day. This issue is discussed in https://github.com/clangd/clangd/issues/519 Differential Revision: https://reviews.llvm.org/D116167 --- clang-tools-extra/clangd/CompileCommands.cpp | 44 ++++++++++++++++++- .../clangd/unittests/CompileCommandsTests.cpp | 12 +++++ 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp index d707bf69eded..5c98e40a87fd 100644 --- a/clang-tools-extra/clangd/CompileCommands.cpp +++ b/clang-tools-extra/clangd/CompileCommands.cpp @@ -241,16 +241,38 @@ void CommandMangler::adjust(std::vector &Cmd, if (ArchOptCount < 2) IndicesToDrop.clear(); + // In some cases people may try to reuse the command from another file, e.g. + // { File: "foo.h", CommandLine: "clang foo.cpp" }. + // We assume the intent is to parse foo.h the same way as foo.cpp, or as if + // it were being included from foo.cpp. + // + // We're going to rewrite the command to refer to foo.h, and this may change + // its semantics (e.g. by parsing the file as C). If we do this, we should + // use transferCompileCommand to adjust the argv. + // In practice only the extension of the file matters, so do this only when + // it differs. + llvm::StringRef FileExtension = llvm::sys::path::extension(File); + llvm::Optional TransferFrom; + auto SawInput = [&](llvm::StringRef Input) { + if (llvm::sys::path::extension(Input) != FileExtension) + TransferFrom.emplace(Input); + }; + // Strip all the inputs and `--`. We'll put the input for the requested file // explicitly at the end of the flags. This ensures modifications done in the // following steps apply in more cases (like setting -x, which only affects // inputs that come after it). - for (auto *Input : ArgList.filtered(driver::options::OPT_INPUT)) + for (auto *Input : ArgList.filtered(driver::options::OPT_INPUT)) { + SawInput(Input->getValue(0)); IndicesToDrop.push_back(Input->getIndex()); + } // Anything after `--` is also treated as input, drop them as well. if (auto *DashDash = ArgList.getLastArgNoClaim(driver::options::OPT__DASH_DASH)) { - Cmd.resize(DashDash->getIndex() + 1); // +1 to account for Cmd[0]. + auto DashDashIndex = DashDash->getIndex() + 1; // +1 accounts for Cmd[0] + for (unsigned I = DashDashIndex; I < Cmd.size(); ++I) + SawInput(Cmd[I]); + Cmd.resize(DashDashIndex); } llvm::sort(IndicesToDrop); llvm::for_each(llvm::reverse(IndicesToDrop), @@ -262,6 +284,24 @@ void CommandMangler::adjust(std::vector &Cmd, Cmd.push_back("--"); Cmd.push_back(File.str()); + if (TransferFrom) { + tooling::CompileCommand TransferCmd; + TransferCmd.Filename = std::move(*TransferFrom); + TransferCmd.CommandLine = std::move(Cmd); + TransferCmd = transferCompileCommand(std::move(TransferCmd), File); + Cmd = std::move(TransferCmd.CommandLine); + + // Restore the canonical "driver --opts -- filename" form we expect. + // FIXME: This is ugly and coupled. Make transferCompileCommand ensure it? + assert(!Cmd.empty() && Cmd.back() == File); + Cmd.pop_back(); + if (!Cmd.empty() && Cmd.back() == "--") + Cmd.pop_back(); + assert(!llvm::is_contained(Cmd, "--")); + Cmd.push_back("--"); + Cmd.push_back(File.str()); + } + for (auto &Edit : Config::current().CompileFlags.Edits) Edit(Cmd); diff --git a/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp b/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp index 4cb6ef9a1661..3afcf59ac077 100644 --- a/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp +++ b/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp @@ -53,6 +53,18 @@ TEST(CommandMangler, Everything) { "foo.cc")); } +TEST(CommandMangler, FilenameMismatch) { + auto Mangler = CommandMangler::forTests(); + Mangler.ClangPath = testPath("clang"); + // Our compile flags refer to foo.cc... + std::vector Cmd = {"clang", "foo.cc"}; + // but we're applying it to foo.h... + Mangler.adjust(Cmd, "foo.h"); + // so transferCompileCommand should add -x c++-header to preserve semantics. + EXPECT_THAT( + Cmd, ElementsAre(testPath("clang"), "-x", "c++-header", "--", "foo.h")); +} + TEST(CommandMangler, ResourceDir) { auto Mangler = CommandMangler::forTests(); Mangler.ResourceDir = testPath("fake/resources"); From 10bb837feb22ad70dc4acc3d2cdb5be7f45d1c21 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 4 Jan 2022 16:17:24 +0100 Subject: [PATCH 532/992] Delete file erroneously added in 229c95ab661d89d This was committed by mistake. The unusual filename caused problems for us downstream, apologies if it causes problems for you too! --- "tl\033" | 1381 ------------------------------------------------------ 1 file changed, 1381 deletions(-) delete mode 100644 "tl\033" diff --git "a/tl\033" "b/tl\033" deleted file mode 100644 index 1a124a4c5c13..000000000000 --- "a/tl\033" +++ /dev/null @@ -1,1381 +0,0 @@ -2a92efd0a239 (HEAD -> main, origin/main) HEAD@{0}: rebase (finish): returning to refs/heads/main -2a92efd0a239 (HEAD -> main, origin/main) HEAD@{1}: rebase (pick): [CodeComplete] drop unused Scope param. NFC -1379eb577607 HEAD@{2}: rebase (start): checkout origin/main -1e3d96c67ff9 HEAD@{3}: commit: [CodeComplete] drop unused Scope param. NFC -6231ef262415 HEAD@{4}: rebase (finish): returning to refs/heads/main -6231ef262415 HEAD@{5}: rebase (start): checkout origin/main -f1f5a85af8be HEAD@{6}: checkout: moving from aggregates to main -c8b1ec7561fe (aggregates) HEAD@{7}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. -fee43399f0af HEAD@{8}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. -daf114e5c347 HEAD@{9}: rebase (continue) (finish): returning to refs/heads/aggregates -daf114e5c347 HEAD@{10}: rebase (continue): [CodeCompletion] Signature help for aggregate initialization. -f2b3e25f860e (configcompiler) HEAD@{11}: rebase (start): checkout origin/main -4f17932fb479 HEAD@{12}: checkout: moving from configcompiler to aggregates -f2b3e25f860e (configcompiler) HEAD@{13}: rebase (finish): returning to refs/heads/configcompiler -f2b3e25f860e (configcompiler) HEAD@{14}: rebase (pick): [clangd] Add CompileFlags.Compiler option to override argv0 -f4ef79306cee HEAD@{15}: rebase (start): checkout origin/main -6443bd3db307 HEAD@{16}: commit (amend): [clangd] Add CompileFlags.Compiler option to override argv0 -0fa6fc0238fe HEAD@{17}: reset: moving to HEAD -0fa6fc0238fe HEAD@{18}: checkout: moving from main to configcompiler -f1f5a85af8be HEAD@{19}: rebase (finish): returning to refs/heads/main -f1f5a85af8be HEAD@{20}: rebase (start): checkout origin/main -09f8315bba39 (arraytype) HEAD@{21}: checkout: moving from bracehelp to main -a61f34ea2502 (bracehelp) HEAD@{22}: commit: [clangd] Fix windows build after 478863ef58c7f7314e06 -92417eaf3329 HEAD@{23}: rebase (finish): returning to refs/heads/bracehelp -92417eaf3329 HEAD@{24}: rebase (pick): [CodeCompletion] Signature help for braced constructor calls -a390c9905d4d HEAD@{25}: rebase (start): checkout origin/main -8da663369977 HEAD@{26}: commit (amend): [CodeCompletion] Signature help for braced constructor calls -9ee52e712414 HEAD@{27}: rebase (continue) (finish): returning to refs/heads/bracehelp -9ee52e712414 HEAD@{28}: rebase (continue): [CodeCompletion] Signature help for braced constructor calls -364eb371012b HEAD@{29}: rebase (start): checkout origin/main -b245d1eaec2d HEAD@{30}: checkout: moving from iwyustdlib to bracehelp -478863ef58c7 (iwyustdlib) HEAD@{31}: commit (amend): [clangd] Basic IncludeCleaner support for c/c++ standard library -ee8a314f09c0 HEAD@{32}: rebase (finish): returning to refs/heads/iwyustdlib -ee8a314f09c0 HEAD@{33}: rebase (pick): [clangd] Basic IncludeCleaner support for c/c++ standard library -b9ed95afc4b1 HEAD@{34}: rebase (start): checkout origin/main -f038610fb5f3 HEAD@{35}: checkout: moving from insertion_point to iwyustdlib -fe68088d44f7 (insertion_point) HEAD@{36}: rebase (finish): returning to refs/heads/insertion_point -fe68088d44f7 (insertion_point) HEAD@{37}: rebase (pick): [clangd] Helper for determining member insertion point. -9e6f88b31a7f (tidydiags) HEAD@{38}: rebase (start): checkout origin/main -aacd98d5b867 HEAD@{39}: checkout: moving from tidydiags to insertion_point -9e6f88b31a7f (tidydiags) HEAD@{40}: commit (amend): [clangd] Respect .clang-tidy ExtraArgs (-Wfoo only) when producing diagnostics -e9211c3dd6ba HEAD@{41}: rebase (finish): returning to refs/heads/tidydiags -e9211c3dd6ba HEAD@{42}: rebase (pick): [clangd] Respect .clang-tidy ExtraArgs (-Wfoo only) when producing diagnostics -7505aeefc4e6 HEAD@{43}: rebase (start): checkout origin/main -53abaad295f4 HEAD@{44}: checkout: moving from aggregates to tidydiags -4f17932fb479 HEAD@{45}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. -9cf82ca7e4ee HEAD@{46}: checkout: moving from tmplargs to aggregates -cd45e8c7bc16 (tmplargs) HEAD@{47}: rebase (finish): returning to refs/heads/tmplargs -cd45e8c7bc16 (tmplargs) HEAD@{48}: rebase (pick): [CodeCompletion] Signature help for template argument lists -3a33c0b1ce0d HEAD@{49}: rebase (start): checkout origin/main -36da2251bd60 HEAD@{50}: commit (amend): [CodeCompletion] Signature help for template argument lists -ef7f8bce7503 HEAD@{51}: checkout: moving from arcpatch-D116218 to tmplargs -f2b2aae6843b (arcpatch-D116218) HEAD@{52}: commit (amend): [clangd] Fix selection on multi-dimensional array. -50f8215cc9be HEAD@{53}: commit (amend): [clangd] Fix selection on multi-dimensional array. (alternate version) -85244a21fd16 HEAD@{54}: commit (amend): [clangd] Fix selection on multi-dimensional array. (alternate version) -169e8e0af680 HEAD@{55}: rebase (finish): returning to refs/heads/arcpatch-D116218 -169e8e0af680 HEAD@{56}: rebase (pick): [clangd] Fix selection on multi-dimensional array. -ca271f4ef5a2 HEAD@{57}: rebase (start): checkout origin/main -70d0857a4dea HEAD@{58}: commit: [clangd] Fix selection on multi-dimensional array. -09f8315bba39 (arraytype) HEAD@{59}: checkout: moving from main to arcpatch-D116218 -09f8315bba39 (arraytype) HEAD@{60}: checkout: moving from tmplargs to main -ef7f8bce7503 HEAD@{61}: commit (amend): [CodeCompletion] Signature help for template argument lists -a7b31d694812 HEAD@{62}: checkout: moving from insertion_point to tmplargs -aacd98d5b867 HEAD@{63}: commit (amend): [clangd] Helper for determining member insertion point. -ac972fe4ff15 HEAD@{64}: checkout: moving from main to insertion_point -09f8315bba39 (arraytype) HEAD@{65}: reset: moving to HEAD -09f8315bba39 (arraytype) HEAD@{66}: checkout: moving from constructor to main -41fbc109a1ae (constructor) HEAD@{67}: commit (amend): [clangd] Add code action to generate a constructor for a C++ class -8e709f570606 HEAD@{68}: commit (amend): [clangd] Add code action to generate a constructor for a C++ class -456dc7755f32 HEAD@{69}: commit: [clangd] Add code action to generate a constructor for a C++ class -ac972fe4ff15 HEAD@{70}: checkout: moving from insertion_point to constructor -ac972fe4ff15 HEAD@{71}: checkout: moving from constructor to insertion_point -09f8315bba39 (arraytype) HEAD@{72}: reset: moving to HEAD -09f8315bba39 (arraytype) HEAD@{73}: checkout: moving from constructor to constructor -09f8315bba39 (arraytype) HEAD@{74}: reset: moving to HEAD~1 -aa6435e963ca HEAD@{75}: checkout: moving from insertion_point to constructor -ac972fe4ff15 HEAD@{76}: checkout: moving from constructor to insertion_point -aa6435e963ca HEAD@{77}: commit (amend): [clangd] Helper for determining member insertion point. -45d6b0cd4780 HEAD@{78}: commit (amend): [clangd] Helper for determining member insertion point. -ac972fe4ff15 HEAD@{79}: checkout: moving from insertion_point to constructor -ac972fe4ff15 HEAD@{80}: checkout: moving from specialmember to insertion_point -60a028a904d5 (specialmember) HEAD@{81}: commit (amend): [clangd] Code action to declare missing move/copy constructor/assignment -939996aed14e HEAD@{82}: checkout: moving from 939996aed14ec84df8cce3f4a5ec4988c4a1f564 to specialmember -939996aed14e HEAD@{83}: rebase (pick): [clangd] Code action to declare missing move/copy constructor/assignment -ac972fe4ff15 HEAD@{84}: rebase (start): checkout insertion_point -bbeef89ae1af HEAD@{85}: checkout: moving from specialmember to bbeef89ae1af -ac972fe4ff15 HEAD@{86}: rebase (finish): returning to refs/heads/specialmember -ac972fe4ff15 HEAD@{87}: rebase (start): checkout insertion_point -bbeef89ae1af HEAD@{88}: checkout: moving from insertion_point to specialmember -ac972fe4ff15 HEAD@{89}: commit (amend): [clangd] Helper for determining member insertion point. -0eac12f86ab3 HEAD@{90}: commit (amend): [clangd] Helper for determining member insertion point. -156bab8c3ab7 HEAD@{91}: commit (amend): [clangd] Helper for determining member insertion point. -da546cc68656 HEAD@{92}: commit (amend): [clangd] Helper for determining member insertion point. -407f5558b48c HEAD@{93}: commit: [clangd] Helper for determining member insertion point. -09f8315bba39 (arraytype) HEAD@{94}: checkout: moving from main to insertion_point -09f8315bba39 (arraytype) HEAD@{95}: checkout: moving from specialmember to main -bbeef89ae1af HEAD@{96}: commit (amend): [clangd] Code action to declare missing move/copy constructor/assignment -a66453e487e3 HEAD@{97}: reset: moving to HEAD -a66453e487e3 HEAD@{98}: commit (amend): [clangd] Code action to declare missing move/copy constructor/assignment -31c647f871a8 HEAD@{99}: commit (amend): [clangd] Code action to declare missing move/copy constructor/assignment -500372f1ac6d HEAD@{100}: reset: moving to HEAD -500372f1ac6d HEAD@{101}: commit (amend): [clangd] Code action to declare missing move/copy constructor/assignment -174dac9746f1 HEAD@{102}: commit (amend): [clangd] Code action to declare missing move/copy constructor/assignment -34bba952dadc HEAD@{103}: commit (amend): [clangd] Code action to declare missing move/copy constructor/assignment -8b2288785c88 HEAD@{104}: commit: [clangd] Code action to declare missing move/copy constructor/assignment -09f8315bba39 (arraytype) HEAD@{105}: checkout: moving from main to specialmember -09f8315bba39 (arraytype) HEAD@{106}: checkout: moving from typeDefinition to main -6fbb2e3eca26 (typeDefinition) HEAD@{107}: commit (amend): [clangd] Implement textDocument/typeDefinition -1ea84876711e HEAD@{108}: commit (amend): [clangd] Implement textDocument/typeDefinition -2bf2e73c73d9 HEAD@{109}: commit (amend): [clangd] Implement textDocument/typeDefinition -d15e5a597103 HEAD@{110}: commit (amend): [clangd] Implement textDocument/typeDefinition -494458626828 HEAD@{111}: commit: [clangd] Implement textDocument/typeDefinition -09f8315bba39 (arraytype) HEAD@{112}: checkout: moving from main to typeDefinition -09f8315bba39 (arraytype) HEAD@{113}: rebase (finish): returning to refs/heads/main -09f8315bba39 (arraytype) HEAD@{114}: rebase (start): checkout origin/main -72ea6fbc150a HEAD@{115}: checkout: moving from arraytype to main -09f8315bba39 (arraytype) HEAD@{116}: rebase (finish): returning to refs/heads/arraytype -09f8315bba39 (arraytype) HEAD@{117}: rebase (pick): [Sema] a[x] has type T when a has type T* or T[], even when T is dependent -ed67d5a03aaf HEAD@{118}: rebase (start): checkout origin/main -5c3e13fb9825 HEAD@{119}: commit (amend): [Sema] a[x] has type T when a has type T* or T[], even when T is dependent -991036e41b3b HEAD@{120}: commit (amend): [Sema] a[x] has type T when a has type T* or T[], even when T is dependent -47ffbac82a3f HEAD@{121}: commit (amend): [Sema] a[x] has type T when a has type T* or T[], even when T is dependent -9923e86a3a96 HEAD@{122}: rebase (continue) (finish): returning to refs/heads/arraytype -9923e86a3a96 HEAD@{123}: rebase (continue): [Sema] a[x] has type T when a has type T* or T[], even when T is dependent -15787ccd4574 HEAD@{124}: rebase (start): checkout origin/main -1dc8f4774d34 HEAD@{125}: rebase (abort): updating HEAD -0651768d7a19 HEAD@{126}: rebase (pick): updated suggesting/coloring of call & return args & implicit operands. -f86d65195716 HEAD@{127}: rebase (pick): updated suggesting/coloring of call & return args & implicit operands -3002813063a8 HEAD@{128}: rebase (pick): --changed Sugesting colors for method calls/return values etc. -7bdf5ba01bb0 HEAD@{129}: rebase (pick): fixed a coalscing bug -7524a1746083 HEAD@{130}: rebase (pick): Add library -ba28b47cb919 HEAD@{131}: rebase (pick): Be const correct -1aa4098bafea HEAD@{132}: rebase (pick): Minor code cleanups -629281c4710d HEAD@{133}: rebase (pick): Add cast_or_null & dyn_cast_or_null -24c3a0a84fda HEAD@{134}: rebase (pick): Implement initializers for structs and pointers -2f93ba463315 HEAD@{135}: rebase (pick): Rename ConstPoolPointerReference to ConstPoolPointerRef - My fingers get tired typing that much -e58844e57ecb HEAD@{136}: rebase (pick): Improve error messages on assertion failure. -73eab57ce304 HEAD@{137}: rebase (pick): * getExitNode() doesn't exist in method anymore -171cd5f1d612 HEAD@{138}: rebase (pick): Added Instrumentation subdirectory. -2423a863e15b HEAD@{139}: rebase (pick): Implement global variables. Struct and Pointer initializers are not implemented yet though -3af979135686 HEAD@{140}: rebase (pick): Implement linking of global variable constant references -c1129719df3c HEAD@{141}: rebase (pick): Add some more interesting test cases for the linker -23ab0f2c31f9 HEAD@{142}: rebase (pick): Oops, didn't handle hex values correctly. :( -c39415b7c1cd HEAD@{143}: rebase (pick): * Fix the constpoolarray -> c"" printing routines to escape things properly -eb2b5e2b34dd HEAD@{144}: rebase (pick): *** empty log message *** -a28f8e125258 HEAD@{145}: rebase (pick): Minor cleanup -131d908673ef HEAD@{146}: rebase (pick): *** empty log message *** -c11c83a339c8 HEAD@{147}: rebase (pick): Implement linker. It's 95% working now. -30fa72c1feb8 HEAD@{148}: rebase (pick): More interesting testcase -5a055ed280fd HEAD@{149}: rebase (pick): Forward operands into implicit uses as well as explicit ones. -eab25baceb5b HEAD@{150}: rebase (pick): External methods shouldn't have argument lists -f2bd12a6d988 HEAD@{151}: rebase (pick): Update comment, remove misleading method -67bb9adc5a0f HEAD@{152}: rebase (pick): Initializers are not const even if the GV is. -c3d3c0630d9d HEAD@{153}: rebase (pick): Add a new -d argument to dump the internal rep as assembly. -a7b34ac799ce HEAD@{154}: rebase (pick): Cast NULL when requested. -731d883c3187 HEAD@{155}: rebase (pick): Added getEntryNode() and getExitNode() functions. -0003ef936aab HEAD@{156}: rebase (pick): Insert code to trace values at basic block and method exits. -a4927eee849c HEAD@{157}: rebase (pick): Insert code to trace values at basic block and method exits. -59a501e47f06 HEAD@{158}: rebase (pick): Added routine to create a char array for a string. -f3328d15f543 HEAD@{159}: rebase (pick): Added routine to create a char array for a string. -1b48aa670b0f HEAD@{160}: rebase (pick): Enable most tests. -b578289a8fa8 HEAD@{161}: rebase (pick): Added a string global variable. -86d5a822efcc HEAD@{162}: rebase (pick): Two bug fixes that were suppressing some "load-constant-into-register" instrs. -ae10fbb5bb27 HEAD@{163}: rebase (pick): Move the burg file to here. Add .in suffix to indicate that it gets -42fcb2d89630 HEAD@{164}: rebase (pick): Make the sparc.burg file be a little more flexible and rubust in the fact of -e5eb3fe6f018 HEAD@{165}: rebase (pick): Use the instruction.def file to remain up to date with future instruction -a95ca89e8976 HEAD@{166}: rebase (pick): New file to define instructions... -1a9806113e30 HEAD@{167}: rebase (pick): Burg files should come out of the Debug Directory for temporary files -a1012b17f9a7 HEAD@{168}: rebase (pick): New module linking functionality prototype -5726d1d2ecd4 HEAD@{169}: rebase (pick): Check in makefile -bfc372b3a5a3 HEAD@{170}: rebase (pick): Fixed tags target so it only happens at root level. -ad26264a523c HEAD@{171}: rebase (pick): Add C source for testmisc.ll -9c5a5f970837 HEAD@{172}: rebase (pick): Dang, I screwed up the merge. This should be better -db6e9ecc453f HEAD@{173}: rebase (pick): New testcase for testing constant pointers to globals -5eff5faafba2 HEAD@{174}: rebase (pick): Test files for linker -77a7c277d54d HEAD@{175}: rebase (pick): MethodTypes take an explicit isVarArg argument -19293514b699 HEAD@{176}: rebase (pick): Fix comment flyer -684125529570 HEAD@{177}: rebase (pick): Add new linker -cff52fd4a48a HEAD@{178}: rebase (pick): Build the new linker -cba92a5489f2 HEAD@{179}: rebase (pick): Use null keyword instead of kludge -42c3881f4c41 HEAD@{180}: rebase (pick): Add more function call and prototype specific tests -c82370afa049 HEAD@{181}: rebase (pick): Compile the transforms directory -6dad439c635a HEAD@{182}: rebase (pick): Start of a linker -71585a57f2b0 HEAD@{183}: rebase (pick): Implement the invoke instruction -4aac971feb66 HEAD@{184}: rebase (pick): * Fix a nefarious bugs: TypesEqual was wrong for varargs methods -81374f6531a5 HEAD@{185}: rebase (pick): Convert a runtime check into an assertion -fc856307fe9a HEAD@{186}: rebase (pick): * Add support for Module specific constants -5119ee94dd54 HEAD@{187}: rebase (pick): Add new TerminatorInst ctor for invoke -97aceab30ca0 HEAD@{188}: rebase (pick): * Fix TODO -fdd33fff63c6 HEAD@{189}: rebase (pick): Fix broken #endif -6eab48b3c68d HEAD@{190}: rebase (pick): * Add #include -d64929f66211 HEAD@{191}: rebase (pick): Add StringList support -d5f1339c1461 HEAD@{192}: rebase (pick): Support the invoke instruction -362b89b2697e HEAD@{193}: rebase (pick): Support indirect calls -250990a3ef85 HEAD@{194}: rebase (pick): not is a keyword in ansi C++, avoid it -cad98049b01e HEAD@{195}: rebase (pick): * Fix privacy issues on RegToRefVecMap -6d8a50fb7185 HEAD@{196}: rebase (pick): * Use new style casts more -d5ef68f42b47 HEAD@{197}: rebase (pick): * Add real support for global variable addresses initializing constants -5b89a0710636 HEAD@{198}: rebase (pick): * Support writing GlobalVariables with info comments by them -38600d48ce25 HEAD@{199}: rebase (pick): * Add support for forward references of global variable addresses -30567de7ef54 HEAD@{200}: rebase (pick): Add operator< to ValID's so that they can be put in map's -c4253f651f13 HEAD@{201}: rebase (pick): Remove exception specification -e46a527bd890 HEAD@{202}: rebase (pick): Support the new Invoke instruction -dbf3974c7876 HEAD@{203}: rebase (pick): Support pointers to globals happily -f72067424d95 HEAD@{204}: rebase (pick): Fix code to make GCC 2.96 happy -ec668ae234aa HEAD@{205}: rebase (pick): * Add support for Invoke instructions -b92a0735743c HEAD@{206}: rebase (pick): Fix filename in comment -eac143eefddc HEAD@{207}: rebase (pick): Better linux support. This file still sucks -98503c7ebc77 HEAD@{208}: rebase (pick): Fix broken #endif -ca3d924e3846 HEAD@{209}: rebase (pick): not is a keyword in Ansi C++. Avoid it -c168bc53e09e HEAD@{210}: rebase (pick): Clean up initializers for GCC 2.96 -b54fa1a20171 HEAD@{211}: rebase (pick): Remove exception specification. Only slows code down. -ed95b6657e6b HEAD@{212}: rebase (pick): Changes to compile with GCC 2.96 -c22edf4bc5a2 HEAD@{213}: rebase (pick): Add comment indicating semantics of indirect calls -4dcafac17dcb HEAD@{214}: rebase (pick): New ctor for invoke inst -a8a651345904 HEAD@{215}: rebase (pick): Add support for indirect calls -af0d7630a30d HEAD@{216}: rebase (pick): Add some casts to make GCC 2.96 happy. -868db5e40c09 HEAD@{217}: rebase (pick): Add use_back() methods -08696c9b3a19 HEAD@{218}: rebase (pick): Add classof implementations for User -3776f284eb1a HEAD@{219}: rebase (pick): Expose typedefs -d5660029e7f9 HEAD@{220}: rebase (pick): Add support for module local constants -8f28f49eecf5 HEAD@{221}: rebase (pick): Add new opcode for Invoke instruction -c6c0d280af0b HEAD@{222}: rebase (pick): Minor changes, add new ctor for invoke instruction -f230dca276c8 HEAD@{223}: rebase (pick): Add assertions -c4ea40ffae4f HEAD@{224}: rebase (pick): * Minor Formatting changes. -e4f89d5176af HEAD@{225}: rebase (pick): * Add destroyConstant stuff to handle module local constants -0a73f5e2e880 HEAD@{226}: rebase (pick): Update todo's -b66fb116fe18 HEAD@{227}: rebase (pick): Each tools should not make tags -969240424993 HEAD@{228}: rebase (pick): --corrected coalescing test: coalsed only if two are of the same reg class -54622d353dc1 HEAD@{229}: rebase (pick): added support for implict operands in machine instruction -af225afe483a HEAD@{230}: rebase (pick): --added support for implicit operands in machine instructions -4c1eeb2f0207 HEAD@{231}: rebase (pick): Delete *.s on clean. -e0e2c0de0d59 HEAD@{232}: rebase (pick): Record implicitRefs for each machine instruction instead of -c7344856e2e2 HEAD@{233}: rebase (pick): Add graph edges due to implicit refs in each machine instruction. -da6e725984b0 HEAD@{234}: rebase (pick): Added a rule for building TAGS. -84249865be76 HEAD@{235}: rebase (pick): Repeat some libs due to circular dependences between Sparc and other -b41937df9bac HEAD@{236}: rebase (pick): Don't insert useful instructions in delay slot of a RETURN. -8ec3840fd358 HEAD@{237}: rebase (pick): Insert code to load constants used as Call or Return arguments. -dfb65425ee14 HEAD@{238}: rebase (pick): Machine-independent code generation routines used in instruction -e8a1ea03539a HEAD@{239}: rebase (pick): Moved code generation support routines to InstrSelectionSupport.{h,cpp}. -920028cc0b2f HEAD@{240}: rebase (pick): Moved code generation support routines to InstrSelectionSupport.cpp. -362badd47ffe HEAD@{241}: rebase (pick): Moved first function to "simpleadd.ll". -e3a87d5e89a0 HEAD@{242}: rebase (pick): testmemory and sumarray now work with instruction selection. -a08813e66ef9 HEAD@{243}: rebase (pick): --removed %g regs being allocated - fix later -576355e82463 HEAD@{244}: rebase (pick): Add hack to get rid of malloc & free instructions for code generation -5566f9c03615 HEAD@{245}: rebase (pick): Add comment -704887cc1858 HEAD@{246}: rebase (pick): Support multiple global's definitions -97e5c873483d HEAD@{247}: rebase (pick): Factor parentness out of Module & GlobalVariable into GlobalValue -370c4a28a876 HEAD@{248}: rebase (pick): Rename getNullPointer to getNull -ddfe3ae972ff HEAD@{249}: rebase (pick): Rename getNullPointer to getNull -7b0ee1e797ab HEAD@{250}: rebase (pick): Allow duplicate constant values as long as they are compatible. -a13bc1844828 HEAD@{251}: rebase (pick): Share ConstPoolPointer elements correctly -bd8752038e30 HEAD@{252}: rebase (pick): Fix broken testcase -398a1a5573f9 HEAD@{253}: rebase (pick): Add check to make sure that we dont reference MEthodType's directly -bdb349a55426 HEAD@{254}: rebase (pick): * Both Method & GlobalVariable now subclass GlobalValue -5c22b0d643af HEAD@{255}: rebase (pick): Adjust test cases to match the fact that methods are now explicit pointer values, not explicit -1f92e9fc5d90 HEAD@{256}: rebase (pick): First try at a horrible global value reference wrapper -ebf7f8fa07e7 HEAD@{257}: rebase (pick): Clean up parser, fix a bug that prevented this from working: -eedd6c7c8622 HEAD@{258}: rebase (pick): * Add support for null as a constant -550de7f1b919 HEAD@{259}: rebase (pick): Modify testcases for new LLVM const syntax -dfe6c7e0aff1 HEAD@{260}: rebase (pick): Commit more code over to new cast style -8d5994a86223 HEAD@{261}: rebase (pick): Convert more code to use new style casts -22c53dc308b0 HEAD@{262}: rebase (pick): Add more support for new style casts -e876f00ebb60 HEAD@{263}: rebase (pick): Add support for new style casts -0b735821091f HEAD@{264}: rebase (pick): Add support for newer cleaner isa, cast, dyn_cast -8f546a6b1eb9 HEAD@{265}: rebase (pick): Update comments -154b8c0b0bdb HEAD@{266}: rebase (pick): Pull predecessor and successor iterators out of the CFG*.h files, and plop them into -96bfa8db5614 HEAD@{267}: rebase (pick): Pull predecessor and successor iterators out of the CFG*.h files, and plop them into -0c5cd66015ba HEAD@{268}: rebase (pick): Comment out a paragraph that refers to a file that no longer exists -bf9adf15ad50 HEAD@{269}: rebase (pick): Fix emission of return instructions -af1ab310689d HEAD@{270}: rebase (pick): Add path to as so it doesn't find llvm as if that path is set. -554b4bc20205 HEAD@{271}: rebase (pick): Exclude a couple of tests that the regalloc stuff doesn't handle yet -2d6c6b32a60e HEAD@{272}: rebase (pick): Add different "cast constant value" for several possible types. -5a0bdbf41700 HEAD@{273}: rebase (pick): Add vector `implicitUses' to class MachineCodeForVMInstr to hold values -69e68114634e HEAD@{274}: rebase (pick): Several fixes: -ecfd19aa7a65 HEAD@{275}: rebase (pick): removing phy regaloc - incorrect file -c9899c19a917 HEAD@{276}: rebase (pick): Change latency of setuw and setsw to 2 cycles. -8e03b2d97f34 HEAD@{277}: rebase (pick): Change ! ( ...== ...) to !=. -aa06d6438043 HEAD@{278}: rebase (pick): Improved dump for disp type operand. -d09bbd3e62ee HEAD@{279}: rebase (pick): Bug fixes: -4542845ffac4 HEAD@{280}: rebase (pick): Minor changes for bug fixes in SchedGraph.cpp. -f2d34339b43a HEAD@{281}: rebase (pick): Two bug fixes: -dadedae23021 HEAD@{282}: rebase (pick): *** empty log message *** -e30f6b836af1 HEAD@{283}: rebase (pick): no major change. -17745bb05c7a HEAD@{284}: rebase (pick): added suggesting color support -0c5afc6b26f2 HEAD@{285}: rebase (pick): added suggesting color suppor -bdaab1203288 HEAD@{286}: rebase (pick): added support for suggesting colors -3061d7a1e42b HEAD@{287}: rebase (pick): --added suggesting colors; call/ret arg handling -f3d3eee7e06a HEAD@{288}: rebase (pick): Add a test for the new null keyword -8e9b70834fa4 HEAD@{289}: rebase (pick): Implement constant pointers, and null specifically in the parser, bytecode writer, and -d20cd6b4422b HEAD@{290}: rebase (pick): Implement a constant pointer value -91bf6d53e2e8 HEAD@{291}: rebase (pick): Pull iterators out of CFG.h and genericize them with GraphTraits -1f5ff53527ab HEAD@{292}: rebase (pick): File #include file -60f364cc5b13 HEAD@{293}: rebase (pick): Pull iterators out of CFG.h and CFGdecls and put them in Support directory -ab4adf7cba15 HEAD@{294}: rebase (pick): * Properly escape function names -b329ccfca12b HEAD@{295}: rebase (pick): Check in bug fix for vadve -3eaa426db4d4 HEAD@{296}: rebase (pick): Add commands to assemble and compile a .ll file -0fd9a3dcc702 HEAD@{297}: rebase (pick): Initial support for construction of a call graph -b3a3ecaf05f7 HEAD@{298}: rebase (pick): Add support to print a call graph, and also add support for module level interprocedural analyses -464bdb4b73aa HEAD@{299}: rebase (pick): Adding the tool to the path doesn't break anything anymore -f1f7f171a7a5 HEAD@{300}: rebase (pick): Make error report a little more useful -58d981ac2a15 HEAD@{301}: rebase (pick): ADCE is broken but at least we know why -dae33afb6ab1 HEAD@{302}: rebase (pick): print out value's by pointer -cb586b4aa067 HEAD@{303}: rebase (pick): Add capability to print out call graph -24c1bbab59ca HEAD@{304}: rebase (pick): Global variables/complex constants have been resolved! -4d13ee0a9344 HEAD@{305}: rebase (pick): -- fixed a ret val bug -19f2d28d3fb2 HEAD@{306}: rebase (pick): -- removed debugging messages -d23e458745cb HEAD@{307}: rebase (pick): -fixed return value bug. -b53ab66b2055 HEAD@{308}: rebase (pick): Add proper support to send output to the right place -1da35ac9ce16 HEAD@{309}: rebase (pick): Print .def files as well as other files -1a7c20d822d2 HEAD@{310}: rebase (pick): Change debug info from #define to command line option -bdd630363635 HEAD@{311}: rebase (pick): Change debug info from #define to command line option -d27bcdc4d564 HEAD@{312}: rebase (pick): * REMOVE extraneous debug info if DEBUG_RA is not set -b58b0442c078 HEAD@{313}: rebase (pick): Seperate instruction definitions into new SparcInstr.def file -84ba33c8b41a HEAD@{314}: rebase (pick): Okay, make the member function work. -c14992951e06 HEAD@{315}: rebase (pick): Remove global debug output fns that have been superceded by a member func -78a5c492e944 HEAD@{316}: rebase (pick): Remove debugging output stuff -3f14f79d64e6 HEAD@{317}: rebase (pick): Emit assembly language from the target... -5a780fe743b5 HEAD@{318}: rebase (pick): Add emitAssembly Method -6d1bd8d21e41 HEAD@{319}: rebase (pick): Add idea -f821fceb8d6a HEAD@{320}: rebase (pick): Add EmitAssembly to mf -d2ccd8e344fc HEAD@{321}: rebase (pick): First cut at assembly output -7cd873804115 HEAD@{322}: rebase (pick): Add emitAssemblyMethod to TargetMachine -8749075054d9 HEAD@{323}: rebase (pick): *** empty log message *** -ca4aeed4cda6 HEAD@{324}: rebase (pick): --added methods to operand class to set/get registers after register allocation -d3262f97ed7a HEAD@{325}: rebase (pick): -- ruchira -983537f3112b HEAD@{326}: rebase (pick): -- updated printing -df8fc0fcada5 HEAD@{327}: rebase (pick): Remove a copy of a bunch of code -5ff0c9da9f43 HEAD@{328}: rebase (pick): C++ gives us auto_ptr's, so we might as well use them. :) -0a6274f4f469 HEAD@{329}: rebase (pick): Fix up code a bit, remove operator<< to Assembly/Writer.h -8ebd15ef9e5b HEAD@{330}: rebase (pick): Remove extraneous #includes -992e6cf11454 HEAD@{331}: rebase (pick): Move operator << from Value.h to Assembly/Writer.h -05c03e0a4a43 HEAD@{332}: rebase (pick): Remove operator << to Assembly/Writer.h -32354c42e162 HEAD@{333}: rebase (pick): Don't check for null on delete -348cbcb3414c HEAD@{334}: rebase (pick): Un-neuter makefile -b9015643ae16 HEAD@{335}: rebase (pick): Minor changes. -31eddde1fbe7 HEAD@{336}: rebase (pick): Folded inssel*.ll into select.ll. -93a7445ced49 HEAD@{337}: rebase (pick): Renamed files to match the primary classes they provide. -73a5ca83c073 HEAD@{338}: rebase (pick): Renamed a header file. -116c6caa7247 HEAD@{339}: rebase (pick): Make class TargetMachine the common interface to all target-dependent -4fc2bc116a7f HEAD@{340}: rebase (pick): Allow pointer constants as well as integer and booleans. -4350d1b2f431 HEAD@{341}: rebase (pick): Make class TargetMachine the common interface to all target-dependent -c3645e342ca4 HEAD@{342}: rebase (pick): Renamed files to match the main classes they provide. -2221c6a54d56 HEAD@{343}: rebase (pick): Cast unsigned to int! It was causing a nice little bug. -3692872402ab HEAD@{344}: rebase (pick): Minor changes. -fdf7be61f2e0 HEAD@{345}: rebase (pick): Don't add instructions to subtree for Phi or Call. -c5ec3128e60a HEAD@{346}: rebase (pick): Format file header. -9bce80700742 HEAD@{347}: rebase (pick): Add new entry/exit edges when removing delay slot nodes from the graph. -0c5c4e8dfb45 HEAD@{348}: rebase (pick): Moved erase edge functions to class SchedGraph. -ad74a2f916dd HEAD@{349}: rebase (pick): Renamed some header files. -6f280562c6f1 HEAD@{350}: rebase (pick): Moved erase-edge functions from SchedGraphNode to SchedGraph. -c20d754ef692 HEAD@{351}: rebase (pick): Moved DebugValue to Value.cpp. -a18896cb69d9 HEAD@{352}: rebase (pick): Added debugging support. -b99a5873a966 HEAD@{353}: rebase (pick): Moved debugging interfaces for class Value to Value.h. -de14aceb2e19 HEAD@{354}: rebase (pick): Minor fixes: renamed target machine files; fold sched info into TargetMachine. -1fabb8f4d05b HEAD@{355}: rebase (pick): Make class TargetMachine the common interface to all target-dependent -004e1e8c9bd5 HEAD@{356}: rebase (pick): Added debugging support. -5308e6f9d6ca HEAD@{357}: rebase (pick): Fix testcases to handle new syntax for construction and initializeation -23bc63990bca HEAD@{358}: rebase (pick): Remove the unsized array constraint -23b021feb086 HEAD@{359}: rebase (pick): Add support for global constants, and for initializers for constants -1f2803d9c6b3 HEAD@{360}: rebase (pick): Add support for global constants, and for initializers for constants -e1fed6f079c9 HEAD@{361}: rebase (pick): added a method to get reg num after register allocation -ae7bbf4710cc HEAD@{362}: rebase (pick): modified machine code printing -13af7a7caac6 HEAD@{363}: rebase (pick): -modified machine operand class - took regNum out of union to set regNum after -6bddc120b229 HEAD@{364}: rebase (pick): modified printing of debug messages -313c2a193181 HEAD@{365}: rebase (pick): --added methods for printing -b8916ea9dfc9 HEAD@{366}: rebase (pick): added setRegForValue to MachineOperand class -072b09e468f8 HEAD@{367}: rebase (pick): fixed printing messages -357bf235defd HEAD@{368}: rebase (pick): -- debug messages dissabled -b3a9794066b2 HEAD@{369}: rebase (pick): added reg alloc support -4ac010f69361 HEAD@{370}: rebase (pick): --reg alloc code added -c7e1696e212a HEAD@{371}: rebase (pick): -reg alloc code -74fe0add218c HEAD@{372}: rebase (pick): added register allocation code -1c1d5b77ea72 HEAD@{373}: rebase (pick): Added regalloc -261723120208 HEAD@{374}: rebase (pick): Oops, accidentally checked my debugging makefile -ccba943ebd24 HEAD@{375}: rebase (pick): Fix a bug with not removing method level types after compilation -32436a343662 HEAD@{376}: rebase (pick): added RegAlloc Directory to DIRS -1c24930f9da4 HEAD@{377}: rebase (pick): *** empty log message *** -cac3722a15a8 HEAD@{378}: rebase (pick): *** empty log message *** -c6554b4537c1 HEAD@{379}: rebase (pick): Remove invalid testcase -847094903baa HEAD@{380}: rebase (pick): Remove invalid testcase. Unneccesary anyways -d71ff5c79c96 HEAD@{381}: rebase (pick): Add new test cases -7789d9c7f54d HEAD@{382}: rebase (pick): Add support for loading and storing pointers... -a3aa024f5831 HEAD@{383}: rebase (pick): Fix a bug that caused a crash if a setcc had zero uses. -c70348cb828c HEAD@{384}: rebase (pick): Add a forward decl, oops. -85c86566e9a5 HEAD@{385}: rebase (pick): Chris seems fond of #include . Fix these. Also convert use list in -3edb0d2e080e HEAD@{386}: rebase (pick): Add a comment -5c8a3647ccb6 HEAD@{387}: rebase (pick): Minor reformatting, & protection fixes -ec87fa4f8523 HEAD@{388}: rebase (pick): Break scheduling infrastructure out of TargetMachine.cpp into SchedInfo.cpp -d589bb98df47 HEAD@{389}: rebase (pick): Split Register specific stuff out from TargetMachine.h to RegInfo.h -ec018be202c8 HEAD@{390}: rebase (pick): Split Target/Machine.h into three files: -53bcc4463c09 HEAD@{391}: rebase (pick): Make a new llvm/Target #include directory. -aaca226978d7 HEAD@{392}: rebase (pick): Checkin changes to: -9cec2d47b443 HEAD@{393}: rebase (pick): Checkin changes to: -3b15eb471b31 HEAD@{394}: rebase (pick): Move files to new sparc directory -11954336afe2 HEAD@{395}: rebase (pick): Move the sparc target to a new lib/Target directory -a8d3715d2038 HEAD@{396}: rebase (pick): Move files. -82cb584aec3c HEAD@{397}: rebase (pick): Move the contents of the CodeGen/TargetMachine/Sparc directory to Target/Sparc -1799226a9df7 HEAD@{398}: rebase (pick): This checkin represents some cleanup of the backend, implementing the following things: -2153a7e280f6 HEAD@{399}: rebase (pick): This checkin represents some cleanup of the backend, implementing the following things: -9936a71b49ba HEAD@{400}: rebase (pick): Updates to use local header files. -ddef6185b427 HEAD@{401}: rebase (pick): Export the instruction forest support from the analysis library -44e4e80c2911 HEAD@{402}: rebase (pick): Initial instruction tree support for the analysis library -001ff12fbe1c HEAD@{403}: rebase (pick): Generic k-way tree support -015b075f7f69 HEAD@{404}: rebase (pick): More cleanups, preparing to revamp InstrForest to, among other things, -d6c5ea5c2392 HEAD@{405}: rebase (pick): * Clean up InstrForest -8f70795fa947 HEAD@{406}: rebase (pick): Eliminate 'BasicNode' from InstrForest. -02e210b78442 HEAD@{407}: rebase (pick): Eliminate MainTreeNode function -bacc3815ee3a HEAD@{408}: rebase (pick): Remove irrelevant gross K&R Cisms -99dec15bddc1 HEAD@{409}: rebase (pick): Handle subtract in expression classifier -9c9d9777ee76 HEAD@{410}: rebase (pick): Disable destructors on constants -58b30135c56a HEAD@{411}: rebase (pick): Use the correct style casts -6fb05a7fb6f1 HEAD@{412}: rebase (pick): Use correct style casts -f6d78c00b28d HEAD@{413}: rebase (pick): Use correct style casts -bd9287aa5602 HEAD@{414}: rebase (pick): Use type checking predicates -b760399acaf2 HEAD@{415}: rebase (pick): Use correct casts -86f6acb766bb HEAD@{416}: rebase (pick): Use predicate for Value type test -b1223a7dc00c HEAD@{417}: rebase (pick): Use predicate for Value type test -5dbd964b9fbc HEAD@{418}: rebase (pick): ModuleTyID doesn't exist anyymore -c583d68d95f8 HEAD@{419}: rebase (pick): getMethodType is now just getType -862b2212c267 HEAD@{420}: rebase (pick): Add support for printing globals -9815c0143466 HEAD@{421}: rebase (pick): Update to use correct type cast -4ebfeafd5ae2 HEAD@{422}: rebase (pick): Add support for global variables -7309e89eeead HEAD@{423}: rebase (pick): * Add capability of printing out a global variable -332d403bc73d HEAD@{424}: rebase (pick): * Method::getType should return type cast as MethodType, eliminate getMethodType -09b1c8b53b5b HEAD@{425}: rebase (pick): Update assertion to allow extra case -c9f650f82da6 HEAD@{426}: rebase (pick): Fix a bug I introduced (assertion failed: Unknown operand type), and convert to predicate style for type checks -ca665a4f7301 HEAD@{427}: rebase (pick): Implement global variable support -78c27fc8588b HEAD@{428}: rebase (pick): Add support for external methods -3b4968db64d9 HEAD@{429}: rebase (pick): Genericize support for calling functions a bit -f2292a6f5bef HEAD@{430}: rebase (pick): Add support for tool specified linker options -e7d26918d539 HEAD@{431}: rebase (pick): Remove the definitions of 3 global functions that don't belong in the core -3268cb00c3aa HEAD@{432}: rebase (pick): Implement the subset of the GetConstantValueAsSignedInt function that is needed, locally. Remove the two support functions to inline their contents. -5ce25378872d HEAD@{433}: rebase (pick): Implement the subset of the GetConstantValueAsSignedInt function that is needed, locally. -7f1dfe6c75ba HEAD@{434}: rebase (pick): Remove 3 gross global functions that don't belong here -bcfd7d3b4a2f HEAD@{435}: rebase (pick): Rename contype to subtype -925282156193 HEAD@{436}: rebase (pick): Make ADCE more robust, it still has problems, but it's getting closer -a8aa73f44e44 HEAD@{437}: rebase (pick): Fix problems with freeing memory twice -3e58e695c052 HEAD@{438}: rebase (pick): Rename file to be consistent with header name -45093beca645 HEAD@{439}: rebase (pick): Rerun backend tests if as or llc is changed -920978127ffb HEAD@{440}: rebase (pick): iFix dependence order -0dda5dffe9e1 HEAD@{441}: rebase (pick): Clean up Type class by removing mutable ConstRules member and use annotations insead -8926543a23ba HEAD@{442}: rebase (pick): Clean up ConstRules stuff to use annotations instead of a mutable member in Type -0554a9254254 HEAD@{443}: rebase (pick): Convert ConstRules to use annotations to clean it up. -ae70148c0e33 HEAD@{444}: rebase (pick): Fix automatic dependence on static libs -57a4461c8737 HEAD@{445}: rebase (pick): Handle cast float-to-float or cast double-to-double. -e26d17b941c6 HEAD@{446}: rebase (pick): Fix build breakage. :( -468369dd37c3 HEAD@{447}: rebase (pick): I really don't like it when people break the build. -093db3f2c28b HEAD@{448}: rebase (pick): Remove extraneous space -d7fa14961741 HEAD@{449}: rebase (pick): Remove extra #include -13c90b0c405c HEAD@{450}: rebase (pick): *** empty log message *** -ad0e744b8800 HEAD@{451}: rebase (pick): *** empty log message *** -479d6ea91cea HEAD@{452}: rebase (pick): Committed for compliation. Not yet final. -3e3b370cfca9 HEAD@{453}: rebase (pick): --Ruchira -215ca905feb5 HEAD@{454}: rebase (pick): New testcase to deal with lists -91c3618d9fba HEAD@{455}: rebase (pick): New file for supporting abstract types -d0201e668537 HEAD@{456}: rebase (pick): Make use of the new TOOLNAME/USEDLIBS options provided in Makefile.common -edadb7525ef9 HEAD@{457}: rebase (pick): Executables all live in a nice centralized location now -a461b8412da2 HEAD@{458}: rebase (pick): Executables have moved into centralized location -f02709b9d7a9 HEAD@{459}: rebase (pick): Support TOOLNAME and USEDLIBS options for easier tool building -e41581d43385 HEAD@{460}: rebase (pick): Remove old old file -cb93b76e7fdc HEAD@{461}: rebase (pick): Convert llc driver to standard tool format -cb8ea37f651a HEAD@{462}: rebase (pick): Provide a way to change the incoming value for a phi node -21daac648d0a HEAD@{463}: rebase (pick): Add llc path to setup -a3f8c0135396 HEAD@{464}: rebase (pick): Uhm... that was really bad -a428778af63a HEAD@{465}: rebase (pick): Clean up driver -545e4d0d6342 HEAD@{466}: rebase (pick): Make makefile not depend on where stuff is installed!!!! -63fb58422942 HEAD@{467}: rebase (pick): Updates to work with new lack of constant pool -d08a74d2397c HEAD@{468}: rebase (pick): Remove unneeded #includes -17ba4b1a7377 HEAD@{469}: rebase (pick): Remove unnecesary #include add dump calls pulled out of .h file -4baa9c258dc7 HEAD@{470}: rebase (pick): * Remove lots of #includes -5fbff64a9093 HEAD@{471}: rebase (pick): * Remove lots of unnecesary #includes -fa24fc193248 HEAD@{472}: rebase (pick): * Remove lots of annoying extra #includes -4a1115871ab1 HEAD@{473}: rebase (pick): * Add tag so emacs knows it's a c++ file -aa1f51a47db4 HEAD@{474}: rebase (pick): Add tags so emacs knows these are C++ files -66cdfde08ddd HEAD@{475}: rebase (pick): Remove extra space -df7b57cb2016 HEAD@{476}: rebase (pick): Remove ReversePostOrderTraversal declaration -e182a70686df HEAD@{477}: rebase (pick): * Don't predefine ReversePostOrderTraversal because it adds a dependence on vector -a7b751de9148 HEAD@{478}: rebase (pick): Check opaque, abstract, and recursive type handling -f65fc4c4b0ca HEAD@{479}: rebase (pick): NEw file -f5797eee291e HEAD@{480}: rebase (pick): Moved functionality into the other constant pool stuff -c317aff403de HEAD@{481}: rebase (pick): Follow the golden rule of the coding standards guide: Make the code look -228b2301a5b8 HEAD@{482}: rebase (pick): The header file for a translation unit should always be included first -ec28d6b33de6 HEAD@{483}: rebase (pick): A file should always include it's private header file *FIRST* see the -a4fd66e4bb44 HEAD@{484}: rebase (pick): Constant pool is eliminated -895e8966aaf7 HEAD@{485}: rebase (pick): Add support for iteration through type graphs -1bc1a1e55811 HEAD@{486}: rebase (pick): Remove support for const pool merging, which is obsolete now. -1f68aecd491b HEAD@{487}: rebase (pick): Annotations are now const -aa592d53a869 HEAD@{488}: rebase (pick): Build lli first -a7352c105c5a HEAD@{489}: rebase (pick): Symboltables are sorted in the bytecode, so no problems here! -5fdc17bb41c2 HEAD@{490}: rebase (pick): Cleanup -98cf8e526cfc HEAD@{491}: rebase (pick): Support abstract types -e64122141c47 HEAD@{492}: rebase (pick): Support a abstract, opaque, and recursive types -299db7ad37f6 HEAD@{493}: rebase (pick): Types and constnats are wierd objects in the symtabs -391ecb41103e HEAD@{494}: rebase (pick): Modules must have a valid, nonnull type. Make them void -186d4233d066 HEAD@{495}: rebase (pick): Support new setName interface -7339777dc091 HEAD@{496}: rebase (pick): * Support new setname interface -ba310ef38dcf HEAD@{497}: rebase (pick): * Cnstants are now global objects -05ef1117f8d2 HEAD@{498}: rebase (pick): Support new setName itf -3d922776af3d HEAD@{499}: rebase (pick): Annotations are const objects now -3ad5e85b0e7c HEAD@{500}: rebase (pick): Types and constants are wierd things in symbol tables now -e95eeb238191 HEAD@{501}: rebase (pick): * Eliminate reference to ConstantPool class -69013e51442c HEAD@{502}: rebase (pick): Constant pool is dead -6aabf9bb8d09 HEAD@{503}: rebase (pick): Constants are now global unique objects -5eccfe8f4744 HEAD@{504}: rebase (pick): * Eliminate constant pool dependancies: -116bd1f60c7d HEAD@{505}: rebase (pick): * Supoprt global constants -719ec15e3bca HEAD@{506}: rebase (pick): * Support global constants -3e22e6fbc35c HEAD@{507}: rebase (pick): annotations are now const -94469c594e8f HEAD@{508}: rebase (pick): * Emit bytecode using a deque instead of a vector to be faster -cd31dfffe14a HEAD@{509}: rebase (pick): * Remove support for internal constant pool -0ccb4914c583 HEAD@{510}: rebase (pick): * Assembly writer is not a module analyzer anymore -81be60efae5b HEAD@{511}: rebase (pick): * Add support for forward referencing types -92f9faa8cd41 HEAD@{512}: rebase (pick): Add support for forward referencing types -7cb39bcc9b11 HEAD@{513}: rebase (pick): Add support for an opaque type -0cc953a4eb36 HEAD@{514}: rebase (pick): Remove #include of nonexistant header file -e659434201e7 HEAD@{515}: rebase (pick): * Slot calc is now simpler and not based on module analyzer. -d9953427123b HEAD@{516}: rebase (pick): Module analyzer no longer has to iterate over constant pool -59b2b4978c66 HEAD@{517}: rebase (pick): Simplify code by eliminating need to hang onto constant pool references -ca915a915738 HEAD@{518}: rebase (pick): * Fixed mapped_iterator to actually work with functors -db2d5ad6fc13 HEAD@{519}: rebase (pick): Constant pools no longer exist -e6503b4355e7 HEAD@{520}: rebase (pick): Eliminate DoConstantPoolMerging. ConstantPools no longer exist -8e819e87f9aa HEAD@{521}: rebase (pick): You no longer have to delete constants! They are located in a global -9e1456843e33 HEAD@{522}: rebase (pick): Annotations are now passed around as const objects -37781e4265d3 HEAD@{523}: rebase (pick): Use a deque instead of a vector for greater efficiency writing bytecode -68b52d48b8d2 HEAD@{524}: rebase (pick): Clean stuff up. -ef8df94e3aba HEAD@{525}: rebase (pick): Simplify SlotCalculator. SlotCalculator is now not a ModuleAnalyzer -220b450fb4a8 HEAD@{526}: rebase (pick): Simplify analyzer -435cda780cfc HEAD@{527}: rebase (pick): * Fix long standing problems that would affect inlining. How could this have worked? -0cb567d4d189 HEAD@{528}: rebase (pick): Add assertion to check for -e0ab1c69297f HEAD@{529}: rebase (pick): * Values are AbstactTypeUsers to support abstract types -16c83b3c1356 HEAD@{530}: rebase (pick): Remove extra whitespace at EOL -a2e45cbc6285 HEAD@{531}: rebase (pick): * Add support for Opaque & Abstract types. -875576a6650b HEAD@{532}: rebase (pick): Support abstract types by keeping on the use list of the abstract type. -70bc7b10091b HEAD@{533}: rebase (pick): SymTabValues no longer hold constant pools -c3d4689a42cb HEAD@{534}: rebase (pick): SymTabValue no longer includes ValueHolder for Module. Include it ourself -aefcbb9a7f94 HEAD@{535}: rebase (pick): * Support new setName interface -22059fea78a8 HEAD@{536}: rebase (pick): Support new setName interface -30dd0bdb5f35 HEAD@{537}: rebase (pick): * Add new DerivedType base class that goes between Type and the derived types -fb9e4e1fcdc4 HEAD@{538}: rebase (pick): Implement support for globally unique constants. Constants no longer live -924247d31d99 HEAD@{539}: rebase (pick): Add support for walking type graphs -fa3aa419ab9f HEAD@{540}: rebase (pick): Changing setName semantics -38d0897ea620 HEAD@{541}: rebase (pick): Make annotations operations const with a mutable annotation list so that -59216be202de HEAD@{542}: rebase (pick): Fixed the "output constant pool even if he have no constants" issue -ab906331394b HEAD@{543}: rebase (pick): whoo hoo I did something! :) -628ad7914f58 HEAD@{544}: rebase (pick): Make fib be more real -e5ad7ea67698 HEAD@{545}: rebase (pick): *** empty log message *** -147dbdd611ae HEAD@{546}: rebase (pick): *** empty log message *** -07a717031897 HEAD@{547}: rebase (pick): Added directory LiveVar/ -2b9d47fba512 HEAD@{548}: rebase (pick): Makefile for tools/tests/ -ef1302a7da62 HEAD@{549}: rebase (pick): Driver to test IsPowerOf2. Could be extended for other library routines. -eb98e995c108 HEAD@{550}: rebase (pick): Add testcodegen target, and restrict which tests are run for it. -8e434f5bede3 HEAD@{551}: rebase (pick): Added nonterminals for arithmetic operations where one operand is constant. -8846488b12e7 HEAD@{552}: rebase (pick): Changed link line. -b9204403813b HEAD@{553}: rebase (pick): Add calls to NormalizeMethod() and to ScheduleInstructionsWithSSA(). -ad0b73970f13 HEAD@{554}: rebase (pick): Makefile for InstrSched/ -03d07894e506 HEAD@{555}: rebase (pick): Remove source list. -bf1f10e707bf HEAD@{556}: rebase (pick): Added directory InstrSched. -db25e211611a HEAD@{557}: rebase (pick): Major changes too hard to document :-) -6195f94883e7 HEAD@{558}: rebase (pick): Added function MachineInstr::operandIsDefined(i) and decl for -4aa6182a26f1 HEAD@{559}: rebase (pick): Extensive additions for supporting instruction scheduling. -21aba4339c60 HEAD@{560}: rebase (pick): Added class MachineSchedInfo and several supporting classes -d0513476dc87 HEAD@{561}: rebase (pick): Implementation of instruction scheduling for LLVM. -3222a43515d4 HEAD@{562}: rebase (pick): Class that encapsulates priority heuristics for instruction scheduling. -a3bb9d7ef0f4 HEAD@{563}: rebase (pick): Scheduling DAG for instruction scheduling. Currently for a single basic block. -f4be165ab676 HEAD@{564}: rebase (pick): Moved debug options declaration to header file, and moved -f914ba215bc2 HEAD@{565}: rebase (pick): Moved function PrintMachineInstructions here. -fb1a19d9a411 HEAD@{566}: rebase (pick): analyze() now checks to see that we don't analyze the same method twice. -9e8f74af6ec5 HEAD@{567}: rebase (pick): *** empty log message *** -3a2656af412d HEAD@{568}: rebase (pick): Simplification transformations to normalize the code for later passes. -bbb02c1d7c9b HEAD@{569}: rebase (pick): Use const int instead of #define. -d0b683357562 HEAD@{570}: rebase (pick): Add copy and assignment operators for POIterator, and -9f98fb5b9284 HEAD@{571}: rebase (pick): Added InstrSched library to link line. -dac45308ccd6 HEAD@{572}: rebase (pick): I suck -ff67dcc22be0 HEAD@{573}: rebase (pick): Initial checkin of TargetData code -8472822ff914 HEAD@{574}: rebase (pick): Remove target specific stuff from Type classes -13bd108c03e0 HEAD@{575}: rebase (pick): Remove target specific method from MemAccessInst class -2771054dbf3e HEAD@{576}: rebase (pick): Convert to use the new factored out TargetData class -ddadbddb187b HEAD@{577}: rebase (pick): Factor code out to the TargetData class -3e08de6cee86 HEAD@{578}: rebase (pick): Use the new TargetData class to factor out some of the shared code -729c3d47e91e HEAD@{579}: rebase (pick): Remove target specific method. -5ca1a2bcbc44 HEAD@{580}: rebase (pick): Remove target specific code, move to TargetData.cpp file -7cd798c969a4 HEAD@{581}: rebase (pick): Support passing a data pointer to annotation factory methods -3b3efaeeaf76 HEAD@{582}: rebase (pick): Demolish explicit source list -1afbb4027fae HEAD@{583}: rebase (pick): Extend annotations to pass data pointers around to the functions -74782cb4a340 HEAD@{584}: rebase (pick): Add another TODO: sigh -ab8c3000e11e HEAD@{585}: rebase (pick): Lots of new functionality -a9a8941bb775 HEAD@{586}: rebase (pick): Remove explicit source list -f011d42626b3 HEAD@{587}: rebase (pick): Add dependence to libvmcore. -37c91bae4bcd HEAD@{588}: rebase (pick): Make sure noone branches to the entry node of the method -ee0ddad61d01 HEAD@{589}: rebase (pick): Compile LLI -d34454f43919 HEAD@{590}: rebase (pick): Rename start methods to main so interpreter works easier -44dfadcd4a33 HEAD@{591}: rebase (pick): Add annotation support -03d42fd345d1 HEAD@{592}: rebase (pick): Handle case where there is no exit node from a flowgraph -6c329f4eaed8 HEAD@{593}: rebase (pick): Changed an assertion message -350d117dbdd5 HEAD@{594}: rebase (pick): Add annotation support to value -311767f056af HEAD@{595}: rebase (pick): * Add assertions -55c6be031f13 HEAD@{596}: rebase (pick): Initial checkin of interpreter -3fdb0df0b0b2 HEAD@{597}: rebase (pick): LV code on machine instructions -19e88d249e25 HEAD@{598}: rebase (pick): LV info on machine instructions -b1dfaf6145ab HEAD@{599}: rebase (pick): Corrected the compilation error by making the ValOperator class a friend of -3059c0b24b7c HEAD@{600}: rebase (pick): Always set isDef for operand in position resultPos. -081ab0fa9e0f HEAD@{601}: rebase (pick): Changed SetMachineOpernad calls in Set3OperandsFromInstr so that the -6be8772e0463 HEAD@{602}: rebase (pick): Changed case 64 to make the first arg of phi a defintion -abc698370478 HEAD@{603}: rebase (pick): Can't use ref to stack value! -56e7b4262d3e HEAD@{604}: rebase (pick): Needed old conditions as well as new in skipToNextVal()! -2b2d58164051 HEAD@{605}: rebase (pick): Bug fix in ValOpIterator: not moving past operand with NULL Value. -4a1a05bc1473 HEAD@{606}: rebase (pick): *** empty log message *** -32525540235d HEAD@{607}: rebase (pick): added a default isDef arg to SetMachineOperand method - Ruchira -ac7c6045f846 HEAD@{608}: rebase (pick): Added isDef field to MachineOperand class - Ruchira -f0942ac597e7 HEAD@{609}: rebase (pick): Add CC operand as 4th operand of SUBcc, and mark it as a def. -9568ebd1a049 HEAD@{610}: rebase (pick): Use extra operand for instructions that set a CC register that -17d5bdb8c5dc HEAD@{611}: rebase (pick): Also, move burg rule to Makefile.common. -5efe6ec39c6f HEAD@{612}: rebase (pick): And add rule to create a .cpp source file from burg input file! -5b8a3ae17209 HEAD@{613}: rebase (pick): Better still, lets move pathname for Burg to Makefile.common. -415c589a5b97 HEAD@{614}: rebase (pick): Add path and options for burg. -15a90d21c83f HEAD@{615}: rebase (pick): Use full pathname for burg. -044f893ad519 HEAD@{616}: rebase (pick): Allow numOperands of -1 for variable #operands. -6b7eebde250d HEAD@{617}: rebase (pick): Simplify command line options, and add option for printing -5ac12a3af462 HEAD@{618}: rebase (pick): Had used the wrong option. -27df4e0f0c54 HEAD@{619}: rebase (pick): Added tree nodes for Phi instructions. -3d470f658f50 HEAD@{620}: rebase (pick): Generate tree nodes for Phi instructions. -5745231c1ee0 HEAD@{621}: rebase (pick): Allow machine instructions with variable numbers of arguments. -3de046767b96 HEAD@{622}: rebase (pick): Added dummy Phi instruction. -7df9d89320cb HEAD@{623}: rebase (pick): Generate dummy Phi machine instruction, plus a bug fix for BrCond(boolreg). -371350759bd5 HEAD@{624}: rebase (pick): Added support for testing instruction selection on all but 2 tests. -09c28c22fde1 HEAD@{625}: rebase (pick): Added class MachineCodeForBasicBlock. -ee4ef4ffe10c HEAD@{626}: rebase (pick): Record machine instructions in the vector for each basic block. -75e6a0432e3b HEAD@{627}: rebase (pick): Added vector of machine instructions for the basic block. -6c523d7b3a45 HEAD@{628}: rebase (pick): New test cases -a991e5fcc19d HEAD@{629}: rebase (pick): Remove some gross stuff -33162a8d8802 HEAD@{630}: rebase (pick): Allow vararg method types with 0 fixed types -18a61fcb43a4 HEAD@{631}: rebase (pick): Make error msg nicer -c3e2fe5af54b HEAD@{632}: rebase (pick): Enable the elimination of method prototypes that are not referenced -8fb736efbcdd HEAD@{633}: rebase (pick): * Make sure that the size of the type field can also control the output -a9dab08596d3 HEAD@{634}: rebase (pick): * Add calls to failure template so that it is actually possible to debug -077a425d4516 HEAD@{635}: rebase (pick): * Fix bugs -03c4f8933762 HEAD@{636}: rebase (pick): * Enable the use of escaped literal strings -934c4b501a22 HEAD@{637}: rebase (pick): Modify var names to make it apparant that the code is really generic -0a587153f15f HEAD@{638}: rebase (pick): Changes to make test scripts more reliable -e67cf2e7e23d HEAD@{639}: rebase (pick): Add test of string constants -c20b0ebc51c4 HEAD@{640}: rebase (pick): Added function printIndent. -841fdaf6e2f7 HEAD@{641}: rebase (pick): Added a pointer hash function object for use in pointer maps. -65fb5153e342 HEAD@{642}: rebase (pick): Make a function const. -0762b37e7677 HEAD@{643}: rebase (pick): Remove lib/LLC library. -cf6a5702c91e HEAD@{644}: rebase (pick): Added several SPARC instructions including conditional move and SETHI. -1fc9217c15ee HEAD@{645}: rebase (pick): Remove redundant and unused functions. -76b1285bfdc7 HEAD@{646}: rebase (pick): Added UltraSparcInstrInfo class to specialize class MachineInstrInfo. -6e560c22a4f9 HEAD@{647}: rebase (pick): Eliminate unused function. -5384b204a5da HEAD@{648}: rebase (pick): Bug fixes: -898348afb52d HEAD@{649}: rebase (pick): Added MachineInstrInfo class and moved instruction-related members there. -0c2462a079ed HEAD@{650}: rebase (pick): Eliminate separate enum for operand register type. -eac34ac45c71 HEAD@{651}: rebase (pick): Work around a few 'sorting issues' with the bytecode output that causes the bytecode -94e2da805ed4 HEAD@{652}: rebase (pick): Don't write out constants that do not have a name, they will be inlined. -0fb64b07f943 HEAD@{653}: rebase (pick): Refactor some of the constant stuff so that we can return complex constant -ad7945a175d6 HEAD@{654}: rebase (pick): Add an arg to insertVal to allow us to prevent builtin types from being ignored -fe70c81141d7 HEAD@{655}: rebase (pick): Add an arg to insertVal to allow us to prevent builtin types from being ignored -f2a10b61e2a7 HEAD@{656}: rebase (pick): New test for varargs functions -d7f49ed443ab HEAD@{657}: rebase (pick): Add library dep -fd413193db44 HEAD@{658}: rebase (pick): Parenthesize output for expranalyze so that pointer stuff being multiplied isn't confusing -88bb8ebe01fd HEAD@{659}: rebase (pick): Build as before dis -392cb8a9804a HEAD@{660}: rebase (pick): Add support for extern varargs methods & varargs method calls -89cb2de0eeac HEAD@{661}: rebase (pick): Add support for extern varargs methods & varargs method calls -aad7190b6bea HEAD@{662}: rebase (pick): Fix a bug when compiling 'shl ubyte * %var, ubyte 2' -b5d668969e65 HEAD@{663}: rebase (pick): Filter out noncore stuff -36123a777b5e HEAD@{664}: rebase (pick): Fixed a bug exposed when doing something like this: -notanoption --help -a9622c681ad6 HEAD@{665}: rebase (pick): Changed printValue() to print constant value if the value is a constant. -bb2db6c88f8a HEAD@{666}: rebase (pick): *** empty log message *** -8d83c40582cc HEAD@{667}: rebase (pick): Doh! Wrong Optional flag. :( -a6c90bf6ee4c HEAD@{668}: rebase (pick): Add a comment indicating that there is documentation of the library -a98afe4b0579 HEAD@{669}: rebase (pick): Initial checking of some rough documentation for commandline library -48fca76a95a8 HEAD@{670}: rebase (pick): Change option name slightly -f62a2f2be6a9 HEAD@{671}: rebase (pick): Minor changes to implementation of CommandLine library to let users override -6c74f799d80b HEAD@{672}: rebase (pick): Add a missing tag -026bec7cf715 HEAD@{673}: rebase (pick): Use the new Alias command line option -d2ec898cfb0e HEAD@{674}: rebase (pick): CommandLine library cleanup. No longer use getValue/setValue, instead, just treat the commandline -f74319d29b56 HEAD@{675}: rebase (pick): Doh! Wrong accessor. Caused 'can not read bytecode' errors. :( -104c6f0c01f6 HEAD@{676}: rebase (pick): -help is verbose enough that we don't need this anymore -b0dcda34759b HEAD@{677}: rebase (pick): Eliminated the Unique class in favor of NonCopyable and NonCopyableV -ec8abea1c777 HEAD@{678}: rebase (pick): Moved inline/llvm/Tools/* to include/llvm/Support/* -f434f8970fdd HEAD@{679}: rebase (pick): Initial checkin -bd177131a770 HEAD@{680}: rebase (pick): Fix coding style issues to actually attempt to be somewhat uniform -f4c632fabc9b HEAD@{681}: rebase (pick): Nonpolymorphic class, doesn't need a virtual dtor! -7bc807e14176 HEAD@{682}: rebase (pick): Clean up hash table usage -efe8c7aa1cf0 HEAD@{683}: rebase (pick): Removal of the redundant CompileContext wrapper -b099c14cc8ba HEAD@{684}: rebase (pick): Verbosify descriptions -b72d002be10f HEAD@{685}: rebase (pick): Large scale changes to implement new command line argument facility -23381cd5b5a1 HEAD@{686}: rebase (pick): Remove dependence on command line library. Silly anyway. -4ee192c0ff7a HEAD@{687}: rebase (pick): Make it pickier -442f68038647 HEAD@{688}: rebase (pick): Add flag for emacs so it realizes it's C++ code -99c4af7c6b12 HEAD@{689}: rebase (pick): New test case -01ef66c762bb HEAD@{690}: rebase (pick): Privatize LLCOptions. It had no business being visible to the entire -b18d26deb43d HEAD@{691}: rebase (pick): Move private header into private directory -57cb798a4677 HEAD@{692}: rebase (pick): Convert from using C style char*'s to strings. -144db6c30c7a HEAD@{693}: rebase (pick): Remove String file some more -78fc43ff73cb HEAD@{694}: rebase (pick): Remove stringutils.h file -3b9829cf7645 HEAD@{695}: rebase (pick): Destroy the StringUtils.h file -a1f7c42bcb01 HEAD@{696}: rebase (pick): Eliminate lots of unnecessary #includes and forward decls -f5e75c7e705d HEAD@{697}: rebase (pick): Eliminate many unneccesary #includes -1aa17f3bb25c HEAD@{698}: rebase (pick): Make code fit in 80 columns more -b95c07e35c7f HEAD@{699}: rebase (pick): Remove unneccesary #includes -5381f682dd30 HEAD@{700}: rebase (pick): Exterminate nasty Cisms -11f554634433 HEAD@{701}: rebase (pick): Refer to include/llvm/CodeGen not Codegen -d2e18d70c558 HEAD@{702}: rebase (pick): Instructions for use -34e368c59b5b HEAD@{703}: rebase (pick): Make sure we build all of the code! -f0d858ed34f1 HEAD@{704}: rebase (pick): Renamed include/llvm/Codegen to include/llvm/CodeGen -6c8ccac2de98 HEAD@{705}: rebase (pick): Fix code to be in a consistent style -c267a7d71c7b HEAD@{706}: rebase (pick): More minor reorganizations -51777d7f1b52 HEAD@{707}: rebase (pick): Remove getTempValuesForMachineCode from the Instruction interface -21be61506817 HEAD@{708}: rebase (pick): Filter out the sparc.burm.c file -ef91903bfcbf HEAD@{709}: rebase (pick): Moved LLC subdir to the tools top level directory -0e90eb4b6eb7 HEAD@{710}: rebase (pick): Make the makefile work -345e38ed07ac HEAD@{711}: rebase (pick): Add new ctor for ConstPoolBool -9d9614205cf1 HEAD@{712}: rebase (pick): Add new constructor for const pool bool -363bdd9a0676 HEAD@{713}: rebase (pick): Add support for casts -7fd6dcb064ef HEAD@{714}: rebase (pick): Add support for casting operators -61f218f640e2 HEAD@{715}: rebase (pick): Support changed expression api -0d64b2ba0a9c HEAD@{716}: rebase (pick): More functionality, renamed API -df5dbc8e3949 HEAD@{717}: rebase (pick): Moved isIntegral to the Type system -a61ce81cc4d8 HEAD@{718}: rebase (pick): Autodep functionality broken. Remove so we get successful builds -148b96074cee HEAD@{719}: rebase (pick): Version of testmemory to test alloca, load and store. -5153d313b0ce HEAD@{720}: rebase (pick): Used a bigger constant in loopfunc.ll that doesn't fit in immed field. -36eb43e26456 HEAD@{721}: rebase (pick): Utility routines for simpler access to the value of an integer constant. -e31cf51c03a4 HEAD@{722}: rebase (pick): Program options class. -2339d0cf578d HEAD@{723}: rebase (pick): Driver and options for the llc compiler. -03cdc0b1bceb HEAD@{724}: rebase (pick): Description of the SPARC as a target architecture. -dd4b4355c99d HEAD@{725}: rebase (pick): Base clas for a description of a target architecture. -8a2e2fbd50e6 HEAD@{726}: rebase (pick): Instruction selection via pattern matching on instruction trees using BURG. -fea7ff57c801 HEAD@{727}: rebase (pick): *** empty log message *** -fae069f4e36b HEAD@{728}: rebase (pick): Added CodeGen, LLC, and Support. -24812650a87f HEAD@{729}: rebase (pick): General support utilities like a program options class and a StringMap -7c52e8197cf9 HEAD@{730}: rebase (pick): CompileContext and options class for the llc compiler. -aed61d90db66 HEAD@{731}: rebase (pick): Header files for the target architecture description and for instruction -82015f75875f HEAD@{732}: rebase (pick): Added support for getting the dependence of an executable on its libs, -70d2dc737e0b HEAD@{733}: rebase (pick): Add isIntegral() method to SignedIntType and UnsignedIntType. -c7371d8afb38 HEAD@{734}: rebase (pick): Provide simpler ways to extract the value of an integer constant. -15e79bcb6e4b HEAD@{735}: rebase (pick): Compute and cache information about the storage size and layout -6b94be0fa4af HEAD@{736}: rebase (pick): Provide uniform access to the pointer operand and to the index -2010845c92f1 HEAD@{737}: rebase (pick): Added a representation of the machine instructions generated -213160e0bb9f HEAD@{738}: rebase (pick): Start of expression analysis support -9250c349b550 HEAD@{739}: rebase (pick): Header to raise and lower representation -56dbc9359f2b HEAD@{740}: rebase (pick): Add support to call LevelRaise -6ffd08afd81c HEAD@{741}: rebase (pick): Update makefile for more accurate deps -f2df47febf1b HEAD@{742}: rebase (pick): Implement ensureTypeAvailable -e06171c5109f HEAD@{743}: rebase (pick): Add support for constant propogation of multiplies -ec9be9e818a5 HEAD@{744}: rebase (pick): Factor out WriteAsOperand. -4bef44e0adfc HEAD@{745}: rebase (pick): Add a comment. -f012589e78ed HEAD@{746}: rebase (pick): Add multiply as a supported constant propogation operation -643641cb450c HEAD@{747}: rebase (pick): New function: WriteAsOperand. -80470d72e903 HEAD@{748}: rebase (pick): Add new base class ConstPoolInt, useful for dealing with integral constants -7a5ca318dfe7 HEAD@{749}: rebase (pick): Add new method, ensureTypeAvailable -9abf2d95c339 HEAD@{750}: rebase (pick): Change is*Type to be a casting convertion operator -a7a79aafa026 HEAD@{751}: rebase (pick): Add an function to BinaryOperator to swap the two operands -f8bb46fb137c HEAD@{752}: rebase (pick): Add short forms of the get*Type methods. -9dbc6bb6b44d HEAD@{753}: rebase (pick): Fix nasty typo -d59d2aa4a97e HEAD@{754}: rebase (pick): Fix clean target -bf1c55b14525 HEAD@{755}: rebase (pick): Compile source files in alphabetical order -02990b116ef2 HEAD@{756}: rebase (pick): Fixed typo in comment -f5b88528d736 HEAD@{757}: rebase (pick): Support external methods -643d6d93c309 HEAD@{758}: rebase (pick): New test case for prototype support -12bb537e90da HEAD@{759}: rebase (pick): Reordered link line for correct static linking. -903f9efa3f84 HEAD@{760}: rebase (pick): Changed default to building library archives instead of shared objects. -aabc8315f101 HEAD@{761}: rebase (pick): Implement forward/external declarations for methods. -64e2c4726aa8 HEAD@{762}: rebase (pick): Implement forward/external declarations for methods. Also, emit an error if a method -a2be53991d96 HEAD@{763}: rebase (pick): Rename 'isMethodExternal' to 'isExternal' -af58b501dadf HEAD@{764}: rebase (pick): Add notes on instruction selection pass -3be6a7da5434 HEAD@{765}: rebase (pick): New testcase from GCC doing array operations -5702913064a2 HEAD@{766}: rebase (pick): Add support for assembly printing fp constants -05c8093e0529 HEAD@{767}: rebase (pick): Add support to the bytecode writer to recognize floating point constants -55f91192bf7c HEAD@{768}: rebase (pick): Add support to the bytecode reader to recognize floating point constants -828ae092b096 HEAD@{769}: rebase (pick): Add support to the parser to recognize floating point constants -9d78fb9b25fa HEAD@{770}: rebase (pick): Add a function to convert a double to a string -d25973d16cd9 HEAD@{771}: rebase (pick): Add support to write and read a fixed amount of raw data -c35d12757fd4 HEAD@{772}: rebase (pick): Add a note -94c6d03b6c82 HEAD@{773}: rebase (pick): * ValueHolder now takes 3 arguments -67b11a5bd739 HEAD@{774}: rebase (pick): Add knowledge about the struct form of the GetElementPtr instruction -06adb0b2ab08 HEAD@{775}: rebase (pick): Remove dependency on the structure of ValueHolder. -0559fb6b55e4 HEAD@{776}: rebase (pick): * The parent of a constant pool is a symtabvalue, not a value. -fb6d8d18898e HEAD@{777}: rebase (pick): The parent of a constant pool is a symtabvalue, not a value. -c20531f12219 HEAD@{778}: rebase (pick): Added some comments, preparing to add global variables and method prototypes -39fae71357a5 HEAD@{779}: rebase (pick): * The parent of a constant pool is a SymTabValue, not a value. -e3812fad3a2d HEAD@{780}: rebase (pick): Made the following changes: -c6df40cee22e HEAD@{781}: rebase (pick): Added more todo's. Don't I ever accomplish anything? -6ee823f32e3d HEAD@{782}: rebase (pick): Add DebugValue member. -c8281fb7bdfe HEAD@{783}: rebase (pick): Made it not inline -e0c85017da0a HEAD@{784}: rebase (pick): Add DebugValue global function -a93311112bbc HEAD@{785}: rebase (pick): Don't clean out the type plane of the constant pool... this is a hack. FIXME -97add160370e HEAD@{786}: rebase (pick): Make sure that types go in the constant pool if they are used. -3f0bab207223 HEAD@{787}: rebase (pick): hasSideEffects should be marked virtual -08fc7cf1be14 HEAD@{788}: rebase (pick): Modify notes -96f249a20298 HEAD@{789}: rebase (pick): Fix stupid typo -c874f2e554d5 HEAD@{790}: rebase (pick): Initial checkin of coding standards -c0abd659a4e3 HEAD@{791}: rebase (pick): Updated documentation for load, store & getelementptr -8ff1023c5729 HEAD@{792}: rebase (pick): add coverage of newly implemented instructions. -87ad59d49e91 HEAD@{793}: rebase (pick): Implementation of Store & GetElementPtr -2b2b55bdec44 HEAD@{794}: rebase (pick): Implement checking for new instructions -4fb6aa4a9e7a HEAD@{795}: rebase (pick): Add note -e9d048cd6792 HEAD@{796}: rebase (pick): Implemented shl, shl, & load instructions -5833b72ec4b1 HEAD@{797}: rebase (pick): Moved Cast from being a Unary instruction to being an "Other" instruction -6b062514ff40 HEAD@{798}: rebase (pick): Use the CDG to mark branches alive on demand. -3005d00fa8dd HEAD@{799}: rebase (pick): Add a new "addOperand" method to User. -4cb53fdaeffb HEAD@{800}: rebase (pick): Fixed post dominator frontiers! Yaay! -aa86a73a5bec HEAD@{801}: rebase (pick): Neg instruction removed. Cast instruction implemented. -842d6e099476 HEAD@{802}: rebase (pick): Neg instruction removed. TODO item fulfilled. -7550203543d2 HEAD@{803}: rebase (pick): Removing unnecesary file -6c99b25c3bd8 HEAD@{804}: rebase (pick): Convert BinaryOperand and UnaryOperator to only take instruction types of -eea771ae35ce HEAD@{805}: rebase (pick): Broad superficial changes: -8b0f42aa64c1 HEAD@{806}: rebase (pick): Devirtualize User::dropAllReferences -b6af5d386268 HEAD@{807}: rebase (pick): Remove dtor's that simply call dropAllReferences -70707b9adf64 HEAD@{808}: rebase (pick): Changed the fundemental architecture of Operands for Instructions. Now -bc4bfa70b8e0 HEAD@{809}: rebase (pick): Changed memory reference instructions to store the result as the implicit -dd91a3d2d9e7 HEAD@{810}: rebase (pick): Fixed some error messages to be nicer -7f723d15a495 HEAD@{811}: rebase (pick): Add note about nuking Instruction::neg -a75385aa3c2e HEAD@{812}: rebase (pick): Initial checkin -93634e0499a7 HEAD@{813}: rebase (pick): Add better support for post dominator information. -0528ba902343 HEAD@{814}: rebase (pick): Add method to unify all exit nodes of a method -947db1d96a4f HEAD@{815}: rebase (pick): Implement support for postdominators, except in dom frontiers -a534bd635e5e HEAD@{816}: rebase (pick): New file, includes method to merge exit nodes together -9996c4da5186 HEAD@{817}: rebase (pick): * Add a DominatorBase base class to maintain root of Dominator info -042a9c01050d HEAD@{818}: rebase (pick): * Added comments -b30075f1d1b2 HEAD@{819}: rebase (pick): Update to include right file -b655a756d6a9 HEAD@{820}: rebase (pick): Initial checkin of analyze tool. -2c1174ab6df2 HEAD@{821}: rebase (pick): Build new analyze tool -8350bbd20ae5 HEAD@{822}: rebase (pick): Added analyze to path for SetupOpt script -c705cdc36c17 HEAD@{823}: rebase (pick): Add analyze tool to path for Setup script -b18d4fae85b6 HEAD@{824}: rebase (pick): IntervalPartition was changed to inherit from vector instead of -743ecc7f0095 HEAD@{825}: rebase (pick): IntervalPartition was changed to inherit from vector instead of -f95290eba1c4 HEAD@{826}: rebase (pick): *** empty log message *** -39b38db21649 HEAD@{827}: rebase (pick): Checkin of new Analysis result printing header -c16998cb96e5 HEAD@{828}: rebase (pick): Code got moved from the lib/Assembly/Writer/IntervalWriter.cpp file to -49090bf698f5 HEAD@{829}: rebase (pick): Remove code for printing out Analysis data structures. It got moved -2149e63cb883 HEAD@{830}: rebase (pick): Update documentation a bit, correct #include guard -b048f8d4a4b3 HEAD@{831}: rebase (pick): Add note about tool idea. Change command line of note to be more specific -62e192f9ef8f HEAD@{832}: rebase (pick): Add printing code for dominator info -5630a5fac34d HEAD@{833}: rebase (pick): Checkin of new dominator calculation routines. These will be improved in -ee98cbc6c810 HEAD@{834}: rebase (pick): Enable printing of dominator related information. -889fa47ccf94 HEAD@{835}: rebase (pick): Add new anaysis routines for building dominator related information -98e49f4ca414 HEAD@{836}: rebase (pick): Addition of 'deleter' function. -bac6bb0ae065 HEAD@{837}: rebase (pick): Moved deleter to include/llvm/Tools/STLExtras.h -de19f162cc14 HEAD@{838}: rebase (pick): Initial checkin. Should print dead instructions, except it doesn't do -4fb09389a03b HEAD@{839}: rebase (pick): Include ADCE pass, rename include/Opt directory to llvm/Optimizations -d56c334ebb78 HEAD@{840}: rebase (pick): Rename DoSparseConditionalConstantProp -> DoSCCP -d8c1f57237a1 HEAD@{841}: rebase (pick): Add note -c355930e34c8 HEAD@{842}: rebase (pick): Add prototypes for ADCE pass -a9aaeed69342 HEAD@{843}: rebase (pick): Rename DoSparseConditionalConstantProp to DoSCCP -82abf7e9b6fa HEAD@{844}: rebase (pick): Optimizations got their own header files -c49280c35c8a HEAD@{845}: rebase (pick): Implement reduceApply method -512b32b42708 HEAD@{846}: rebase (pick): Add a new pop_back() method -c2d246ce3e77 HEAD@{847}: rebase (pick): The ConstRules class got moved to the opt namespace -6167d0001fe5 HEAD@{848}: rebase (pick): Add a reduceApply method -d78e27809d32 HEAD@{849}: rebase (pick): Split AllOpts.h into lots of little .h files. -0963fce4a854 HEAD@{850}: rebase (pick): Export ConstantFoldTerminator, allow it to fold conditional branches to -849231387470 HEAD@{851}: rebase (pick): Added documentation. Constant fold terminators. -6ff2d0ae85ce HEAD@{852}: rebase (pick): Added prototype for ConstantFoldTerminator -6fe27ce22949 HEAD@{853}: rebase (pick): Add a check to avoid allowing V->replaceAllUsesWith(V) -3f6c78a176e1 HEAD@{854}: rebase (pick): Add implementation of BasicBlock::removePredecessor code that was factored -ba8f2c1f6a6f HEAD@{855}: rebase (pick): * Factored RemovePredecessorFromBlock into BasicBlock::removePredecessor -b0e4bdf5d6d0 HEAD@{856}: rebase (pick): We need to make sure to remove PHI nodes in the successor that cannot be -ca8d6d3dd907 HEAD@{857}: rebase (pick): Added a note about a new verification the verifier should do -0686d990fbf0 HEAD@{858}: rebase (pick): Added new removePredecessor method prototype -8930ec2756c1 HEAD@{859}: rebase (pick): Added note, moved note -6109478c092c HEAD@{860}: rebase (pick): Fixed the obnoxious problem that caused an entire directory to rebuild -f2eab63950b5 HEAD@{861}: rebase (pick): Miscellaneous cleanups: -63286b72d223 HEAD@{862}: rebase (pick): Add a new Sparse Conditional Constant Propogation pass -324c5dcc9d82 HEAD@{863}: rebase (pick): Add command line arguments for Constant Pool Merging & Sparse Conditional Constant Prop -b412bc074f36 HEAD@{864}: rebase (pick): Put in test of SCCP. Watch out though, because we need to sort the -b4bb71fd0f0d HEAD@{865}: rebase (pick): Change to use the new GenericBinaryInst class. Support lots more operators. -b00b3f2b682d HEAD@{866}: rebase (pick): Misc cleanup -540c4ae24c8e HEAD@{867}: rebase (pick): * Expose DoConstantPoolMerging -955a4d740cb4 HEAD@{868}: rebase (pick): Convert ugly postincrement to efficient preincrement -1dacaa7057bf HEAD@{869}: rebase (pick): * Move stuff around a bit. -926791f1ca54 HEAD@{870}: rebase (pick): Add instructions to fold unary and binary instructions. -77090a9ae8e4 HEAD@{871}: rebase (pick): * Use the new reduce_apply_bool template -d564849afb96 HEAD@{872}: rebase (pick): getBasicBlocks() is not needed anymore for reading Method data -c9c6da3ca1f7 HEAD@{873}: rebase (pick): Added methods to make dealing with switches and branch instructions -5b8794782e8c HEAD@{874}: rebase (pick): Minor formating changes -259afbe701de HEAD@{875}: rebase (pick): Make a new GenericBinaryInst class, instead of providing lots of silly -f26fbe244a3c HEAD@{876}: rebase (pick): Convert postincrements to more efficient preincrements -a685acebd652 HEAD@{877}: rebase (pick): Add a new slew of functions to allow dynamic_cast<> like operation for -49e5848f7266 HEAD@{878}: rebase (pick): Add extra forwarding accessor methods so that getMethodList(), getBasicBlocks() -41a60e4b2e4f HEAD@{879}: rebase (pick): Add more notes -0f81680a2104 HEAD@{880}: rebase (pick): Filter out some more stuff -0bf0d89d693b HEAD@{881}: rebase (pick): Moved UnaryOperator::create to InstrTypes.cpp until there is an iUnaryOps.cpp -4d5a89c84cd7 HEAD@{882}: rebase (pick): Implement induction variable injection! -c463b997b86a HEAD@{883}: rebase (pick): Renamed get.*Operator to create seeing that it would have to be qualified -ef4669a3e67b HEAD@{884}: rebase (pick): * Rename get.*Operator to create seeing that it would have to be qualified -e51131d287ae HEAD@{885}: rebase (pick): A silly stupid test of the loop depth calculator was added. REMOVE in the -d06ca69a78ce HEAD@{886}: rebase (pick): IntervalPartition: recode to use IntervalIterator to do all the work -d2fd00e218cb HEAD@{887}: rebase (pick): Add a helper function bind_obj -7fc109749907 HEAD@{888}: rebase (pick): Big changes. Interval*.h is now more or less finalized. IntervalPartition -5848055f471b HEAD@{889}: rebase (pick): CFG.h: change the iterator tag -c1eafb1d07ab HEAD@{890}: rebase (pick): ValueHolder's aren't interseting to me anymore -b1abed97808e HEAD@{891}: rebase (pick): New file due to the Intervals.h splitup -0724b3e90091 HEAD@{892}: rebase (pick): New files due to the Intervals.h splitup -ba91baa363b7 HEAD@{893}: rebase (pick): Add a useless phi for testing with InductionVariables stuff -f916541417e3 HEAD@{894}: rebase (pick): #include a different header due to Intervals.h splitting up -5329f8f73c86 HEAD@{895}: rebase (pick): IntervalPartition & IntervalIterator classes have been split out into -fbea208252f0 HEAD@{896}: rebase (pick): IntervalPartition & IntervalIterator classes have been split out into -d828c7706db9 HEAD@{897}: rebase (pick): Prepare for split between Interval, IntervalIterator, and IntervalIPartition -3b3d00dffb19 HEAD@{898}: rebase (pick): Addition of IntervalIterator. Preparing for rename of Intervals.h to -f948fd9866ee HEAD@{899}: rebase (pick): Added notes -d41887123b2e HEAD@{900}: rebase (pick): Implement a lot more functionality. Now loop invariant and linear -d9b7e634b62b HEAD@{901}: rebase (pick): Interval::HeaderNode is now accessed thorugh an accessor function -32f9e9270229 HEAD@{902}: rebase (pick): Add comments -a3eb9281f099 HEAD@{903}: rebase (pick): Add accessor methods to binary/unary operators -b03e9bf9c388 HEAD@{904}: rebase (pick): Add a space to the PHI node output code to make it look nicer -605e752f0429 HEAD@{905}: rebase (pick): Moved printing code to the Assembly/Writer library. -8d6f5b857ebb HEAD@{906}: rebase (pick): Implement the new Interval::isLoop method -bc792603f8b8 HEAD@{907}: rebase (pick): New header file defined with neeto utilities put in one place -3b59035da7ea HEAD@{908}: rebase (pick): Modified to use the new reduce_apply algorithm -58967efc5647 HEAD@{909}: rebase (pick): * Added capability to print out an interval -0246e25d77a6 HEAD@{910}: rebase (pick): * Added comments -f88d93ea7fac HEAD@{911}: rebase (pick): Add a test case: an irreducible flow graph. -d9d7c6d1179e HEAD@{912}: rebase (pick): Get rid of a silly printout that isn't needed right now -667d489ab2c9 HEAD@{913}: rebase (pick): Add note -7d9fb3ab2fe8 HEAD@{914}: rebase (pick): New test case -6cdff3cad3c3 HEAD@{915}: rebase (pick): Add capability to print a derived interval graph -7ac17927b805 HEAD@{916}: rebase (pick): Add capability to build a derived interval graph -56e38307935f HEAD@{917}: rebase (pick): Factor the predeclarations of the CFG.h functionality into a seperate, new header -ea614fef124f HEAD@{918}: rebase (pick): Initial Checking of Interval handling code -644acdbfe034 HEAD@{919}: rebase (pick): Add stub for induction variable code -0a9fdd55c1ca HEAD@{920}: rebase (pick): Add a more complex test case -92e26fb33e21 HEAD@{921}: rebase (pick): Add a test case for interval code -5a4847e078a6 HEAD@{922}: rebase (pick): Add an optimization stub -423d0a2d3762 HEAD@{923}: rebase (pick): New file: Interval analysis support -f0e51696b0f1 HEAD@{924}: rebase (pick): Add a note -7b1bb4951dbd HEAD@{925}: rebase (pick): Filter out more stuff I don't want all the time -df4258024997 HEAD@{926}: rebase (pick): Removed silly test code -118b768314e8 HEAD@{927}: rebase (pick): Added options to print out basic blocks in a variety of different orderings -a925ae543736 HEAD@{928}: rebase (pick): Updates to work with new cfg namespace -0b9af59e24ef HEAD@{929}: rebase (pick): Implement support for writing VCG format output -388ad96269af HEAD@{930}: rebase (pick): Move contents to the cfg namespace. -91078f85b47e HEAD@{931}: rebase (pick): Updates to support -fba16076cade HEAD@{932}: rebase (pick): Updates to support -fc5655471d3e HEAD@{933}: rebase (pick): Updates to support -0a9d874b1031 HEAD@{934}: rebase (pick): Updates to support -bcb3231d6613 HEAD@{935}: rebase (pick): Update documentation to reflect: -db7f6e6d79ea HEAD@{936}: rebase (pick): Moved getBinaryOperator to the BinaryOperator class and the getUnaryOperator -2cf6ee5926fe HEAD@{937}: rebase (pick): I actually got something done -783091c4f5c6 HEAD@{938}: rebase (pick): Beautify the source a bit. -94ea386b0c0c HEAD@{939}: rebase (pick): Include support for reverse iteration. -059def0ab975 HEAD@{940}: rebase (pick): Added a stupid testcase for iterators. -be3f7a5b9a7a HEAD@{941}: rebase (pick): Added reverse depth first capability, fixed depth first capability -5b0379941c62 HEAD@{942}: rebase (pick): Updated to work with new CFG.h file. -015520037831 HEAD@{943}: rebase (pick): Moved iterators to the new CFG.h file. -b77ecaef3f49 HEAD@{944}: rebase (pick): New file -2208b443367c HEAD@{945}: rebase (pick): inlining can change methods a second time, so don't rerun inliner when testing for -d7ec7d53b0c1 HEAD@{946}: rebase (pick): Add extra method to PHI node class -94a07501c89d HEAD@{947}: rebase (pick): Significant rework. DCE is still not done (see #ifdef'd out parts) -110f15739545 HEAD@{948}: rebase (pick): Fixed to print slightly differently. Added use counts for labels -add1ace044cd HEAD@{949}: rebase (pick): Fixes for BB iterators, additional methods added for DCE pass -8a8e91d33707 HEAD@{950}: rebase (pick): Extra comments -3bdcc96804e9 HEAD@{951}: rebase (pick): Now does not include instruction files... -c6325331663d HEAD@{952}: rebase (pick): Initial revision -f3f54944c027 HEAD@{953}: rebase (pick): New repository initialized by cvs2svn. -8b754e2f7567 (origin/master, fork/master) HEAD@{954}: rebase (start): checkout origin/master -1dc8f4774d34 HEAD@{955}: checkout: moving from main to arraytype -72ea6fbc150a HEAD@{956}: checkout: moving from recoverreturn to main -549498c110fa (recoverreturn) HEAD@{957}: commit (amend): [AST] Produce ReturnStmt containing RecoveryExpr when type is wrong -500ba6619cf3 HEAD@{958}: commit (amend): [AST] Produce ReturnStmt containing RecoveryExpr when type is wrong -256e9d00f6a8 HEAD@{959}: commit (amend): [AST] Produce ReturnStmt containing RecoveryExpr when type is wrong -056fc2e74960 HEAD@{960}: commit (amend): [AST] Produce ReturnStmt containing RecoveryExpr when type is wrong -d537c309a9d4 HEAD@{961}: commit: [AST] Produce ReturnStmt containing RecoveryExpr when type is wrong -72ea6fbc150a HEAD@{962}: checkout: moving from main to recoverreturn -72ea6fbc150a HEAD@{963}: rebase (finish): returning to refs/heads/main -72ea6fbc150a HEAD@{964}: rebase (start): checkout origin/main -9dc4af327b12 HEAD@{965}: checkout: moving from two to main -8062dae7812f (two) HEAD@{966}: commit (amend): [Parse] Use empty RecoveryExpr when if/while/do/switch conditions fail to parse -c5ce4cbfc3cd HEAD@{967}: rebase (finish): returning to refs/heads/two -c5ce4cbfc3cd HEAD@{968}: rebase (pick): [Parse] Use empty RecoveryExpr when if/while/do/switch conditions fail to parse -72ea6fbc150a HEAD@{969}: rebase (start): checkout origin/main -c56122daac76 HEAD@{970}: checkout: moving from iwyustdlib to two -f038610fb5f3 HEAD@{971}: commit (amend): [clangd] Basic IncludeCleaner support for c/c++ standard library -e7f383b77f38 HEAD@{972}: commit (amend): [clangd] Basic IncludeCleaner support for c/c++ standard library -23650256334a HEAD@{973}: commit (amend): [clangd] Basic IncludeCleaner support for c/c++ standard library -eb1c9e6fabaa HEAD@{974}: rebase (continue) (finish): returning to refs/heads/iwyustdlib -eb1c9e6fabaa HEAD@{975}: rebase (continue): [clangd] Basic IncludeCleaner support for c/c++ standard library -128c6ed73b8f HEAD@{976}: rebase (start): checkout origin/main -2f3a9575f9ad HEAD@{977}: checkout: moving from stdlib to iwyustdlib -cdfb640fe9e8 (stdlib) HEAD@{978}: checkout: moving from prettify to stdlib -77cc7d2fd845 (prettify) HEAD@{979}: commit (amend): [CodeCompletion][clangd] Clean __uglified parameter names in completion & hover -97d9713c55bc HEAD@{980}: commit (amend): [CodeCompletion][clangd] Clean __uglified parameter names in completion & hover -0e1e531ca3ab HEAD@{981}: commit (amend): [CodeCompletion][clangd] Clean __uglified parameter names in completion & hover -2dcf689f661e HEAD@{982}: commit (amend): [CodeCompletion][clangd] Clean __uglified parameter names in completion & hover -22e53b9a3b3f HEAD@{983}: commit: [CodeCompletion][clangd] Clean __uglified parameter names in completion & hover -9dc4af327b12 HEAD@{984}: checkout: moving from main to prettify -9dc4af327b12 HEAD@{985}: reset: moving to HEAD -9dc4af327b12 HEAD@{986}: reset: moving to HEAD -9dc4af327b12 HEAD@{987}: checkout: moving from 9dc4af327b12dfbcf90fde1641cd649c6814bf98 to main -9dc4af327b12 HEAD@{988}: checkout: moving from main to origin/main -2c644e2f71a5 HEAD@{989}: commit: FFix feature name in 9dc4af327b12dfbcf90fde1641cd649c6814bf98 -9dc4af327b12 HEAD@{990}: rebase (finish): returning to refs/heads/main -9dc4af327b12 HEAD@{991}: rebase (pick): Re-land "[clang] Add early exit when checking for const init of arrays." -4fedd4be385e HEAD@{992}: rebase (start): checkout origin/main -a3fd292fed18 HEAD@{993}: commit (amend): Re-land "[clang] Add early exit when checking for const init of arrays." -70b8662a502c HEAD@{994}: commit (amend): Re-land "[clang] Add early exit when checking for const init of arrays." -2ff827ad7f2d HEAD@{995}: commit (amend): Re-land "[clang] Add early exit when checking for const init of arrays." -9ad5cbdb06d8 HEAD@{996}: revert: Re-land "[clang] Add early exit when checking for const init of arrays." -6f1a501fddae HEAD@{997}: checkout: moving from tmplargs to main -a7b31d694812 HEAD@{998}: commit (amend): [CodeCompletion] Signature help for template argument lists -2142ae80cf59 HEAD@{999}: commit (amend): [CodeCompletion] Signature help for template argument lists -4669c22c0e70 HEAD@{1000}: commit (amend): [CodeCompletion] Signature help for template argument lists -99217d405b2b HEAD@{1001}: commit (amend): [CodeCompletion] Signature help for template argument lists -86fa6ad9fb2b HEAD@{1002}: commit (amend): [CodeCompletion] Signature help for template argument lists -0a7d62a75abf HEAD@{1003}: commit (amend): [CodeCompletion] Signature help for template argument lists -8b0170fa11c1 HEAD@{1004}: commit: [CodeCompletion] Signature help for template argument lists -6f1a501fddae HEAD@{1005}: checkout: moving from main to tmplargs -6f1a501fddae HEAD@{1006}: reset: moving to HEAD -6f1a501fddae HEAD@{1007}: checkout: moving from aggregates to main -9cf82ca7e4ee HEAD@{1008}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. -c9f6b6b3f6a8 HEAD@{1009}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. -7b37b2f933bd HEAD@{1010}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. -e78f39a12189 HEAD@{1011}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. -985a3b182774 HEAD@{1012}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. -a7ab012a8ff1 HEAD@{1013}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. -84c5ef8d6646 HEAD@{1014}: commit (amend): [CodeCompletion] Signature help for aggregate initialization. -7e2d55fea796 HEAD@{1015}: rebase (finish): returning to refs/heads/aggregates -7e2d55fea796 HEAD@{1016}: rebase (pick): [CodeCompletion] Signature help for aggregate initialization. -b245d1eaec2d HEAD@{1017}: rebase (start): checkout bracehelp -6175a4ae0cfc HEAD@{1018}: checkout: moving from bracehelp to aggregates -b245d1eaec2d HEAD@{1019}: commit (amend): [CodeCompletion] Signature help for braced constructor calls -f648b926a983 HEAD@{1020}: checkout: moving from aggregates to bracehelp -6175a4ae0cfc HEAD@{1021}: commit: [CodeCompletion] Signature help for aggregate initialization. -f648b926a983 HEAD@{1022}: checkout: moving from bracehelp to aggregates -f648b926a983 HEAD@{1023}: commit (amend): [CodeCompletion] Signature help for braced constructor calls -d830368b01ba HEAD@{1024}: commit (amend): [CodeCompletion] Signature help for braced constructor calls -3fe02e425768 HEAD@{1025}: commit (amend): [CodeCompletion] Signature help for braced constructor calls -3158a41d01e1 HEAD@{1026}: commit (amend): [CodeCompletion] Signature help for braced constructor calls -8e451de571e7 HEAD@{1027}: commit (amend): [CodeCompletion] Signature help for braced constructor calls -b35aa36a3e3f HEAD@{1028}: commit: [CodeCompletion] Signature help for braced constructor calls -6f1a501fddae HEAD@{1029}: checkout: moving from main to bracehelp -6f1a501fddae HEAD@{1030}: checkout: moving from completeinit to main -347a926ee355 (completeinit) HEAD@{1031}: commit (amend): [CodeCompletion] (mostly) fix completion in incomplete C++ ctor initializers. -9babb0590695 HEAD@{1032}: commit (amend): [CodeCompletion] (mostly) fix completion in incomplete C++ ctor initializers. -91e9b7b90b4f HEAD@{1033}: commit (amend): [CodeCompletion] (mostly) fix completion in incomplete C++ ctor initializers. -0e1023621e0f HEAD@{1034}: commit: [CodeCompletion] (mostly) fix completion in incomplete C++ ctor initializers. -6f1a501fddae HEAD@{1035}: checkout: moving from main to completeinit -6f1a501fddae HEAD@{1036}: checkout: moving from configcompiler to main -0fa6fc0238fe HEAD@{1037}: commit (amend): [clangd] Add CompileFlags.Compiler option to override argv0 -8205faff5871 HEAD@{1038}: commit (amend): [clangd] Add CompileFlags.Compiler option to override argv0 -27055788e902 HEAD@{1039}: commit (amend): [clangd] Add CompileFlags.Compiler option to override argv0 -f9bdd0229665 HEAD@{1040}: commit: [clangd] Add CompileFlags.Compiler option to override argv0 -6f1a501fddae HEAD@{1041}: checkout: moving from main to configcompiler -6f1a501fddae HEAD@{1042}: checkout: moving from manglefilename to main -b3f0e3eeccc0 (manglefilename) HEAD@{1043}: commit (amend): [clangd] Adjust compile flags so they work when applied to other file(type)s. -22ea16ea69e9 HEAD@{1044}: commit (amend): [clangd] Adjust compile flags so they work when applied to other file(type)s. -8325fd69d14a HEAD@{1045}: commit (amend): [clangd] Adjust compile flags so they work when applied to other file(type)s. -0b435ba816ae HEAD@{1046}: commit: [clangd] Adjust compile flags so they work when applied to other file(type)s. -6f1a501fddae HEAD@{1047}: checkout: moving from main to manglefilename -6f1a501fddae HEAD@{1048}: checkout: moving from tidydiags to main -53abaad295f4 HEAD@{1049}: commit (amend): [clangd] Respect .clang-tidy ExtraArgs (-Wfoo only) when producing diagnostics -8daae4149924 HEAD@{1050}: commit (amend): [clangd] Respect .clang-tidy ExtraArgs (-Wfoo only) when producing diagnostics -95f3d66f621b HEAD@{1051}: commit (amend): [clangd] Respect .clang-tidy ExtraArgs (-Wfoo only) when producing diagnostics -6e4e13e32e9a HEAD@{1052}: commit (amend): [clangd] Respect .clang-tidy ExtraArgs (-Wfoo only) when producing diagnostics -b80c98fe991c HEAD@{1053}: commit: [clangd] Respect .clang-tidy ExtraArgs (-Wfoo only) when producing diagnostics -6f1a501fddae HEAD@{1054}: checkout: moving from main to tidydiags -6f1a501fddae HEAD@{1055}: rebase (finish): returning to refs/heads/main -6f1a501fddae HEAD@{1056}: rebase (pick): [clangd] Fix typo in test. NFC -dfa2ad1ad858 HEAD@{1057}: rebase (start): checkout origin/main -e5cc3319d413 HEAD@{1058}: rebase (finish): returning to refs/heads/main -e5cc3319d413 HEAD@{1059}: rebase (pick): [clangd] Fix typo in test. NFC -e751d97863fb HEAD@{1060}: rebase (start): checkout origin/main -be44f91f4fca HEAD@{1061}: commit: [clangd] Fix typo in test. NFC -c2f2bb066b83 HEAD@{1062}: reset: moving to HEAD -c2f2bb066b83 HEAD@{1063}: rebase (finish): returning to refs/heads/main -c2f2bb066b83 HEAD@{1064}: rebase (start): checkout origin/main -62bcb75ce510 HEAD@{1065}: checkout: moving from usingtype to main -af27466c5039 (usingtype) HEAD@{1066}: commit (amend): Reland "[AST] Add UsingType: a sugar type for types found via UsingDecl" -bbc902a8436d HEAD@{1067}: revert: Reland "[AST] Add UsingType: a sugar type for types found via UsingDecl" -cc56c66f27e1 HEAD@{1068}: revert: Revert "[AST] Add UsingType: a sugar type for types found via UsingDecl" -565c17574dd0 HEAD@{1069}: rebase (finish): returning to refs/heads/usingtype -565c17574dd0 HEAD@{1070}: rebase (start): checkout origin/main -e1600db19d63 HEAD@{1071}: rebase (finish): returning to refs/heads/usingtype -e1600db19d63 HEAD@{1072}: rebase (pick): [AST] Add UsingType: a sugar type for types found via UsingDecl -eb66f0662ad9 HEAD@{1073}: rebase (start): checkout origin/main -e5706481005a HEAD@{1074}: commit (amend): [AST] Add UsingType: a sugar type for types found via UsingDecl -c11ab3c47b88 HEAD@{1075}: rebase (finish): returning to refs/heads/usingtype -c11ab3c47b88 HEAD@{1076}: rebase (pick): [AST] Add UsingType: a sugar type for types found via UsingDecl -9cd55c7c3463 HEAD@{1077}: rebase (start): checkout origin/main -77701d00dbf1 HEAD@{1078}: commit (amend): [AST] Add UsingType: a sugar type for types found via UsingDecl -484ad728d0b4 HEAD@{1079}: commit (amend): [AST] Add a sugar type for types found via UsingDecl -38567f18b381 HEAD@{1080}: commit (amend): [AST] Add a sugar type for types found via UsingDecl -73794c07c44c HEAD@{1081}: rebase (finish): returning to refs/heads/usingtype -73794c07c44c HEAD@{1082}: rebase (pick): [AST] Add a sugar type for types found via UsingDecl -02fc8d5c9eb0 HEAD@{1083}: rebase (start): checkout origin/main -528e4f3170f7 HEAD@{1084}: commit (amend): [AST] Add a sugar type for types found via UsingDecl -06aa0ecaf0ad HEAD@{1085}: commit (amend): [AST] Add a sugar type for types found via UsingDecl -eb52127d5587 HEAD@{1086}: commit (amend): [AST] Add a sugar type for types found via UsingDecl -383df0a0d6e6 HEAD@{1087}: commit (amend): [AST] Add a sugar type for types found via UsingDecl -c4f8be2c2d68 HEAD@{1088}: rebase (continue) (finish): returning to refs/heads/usingtype -c4f8be2c2d68 HEAD@{1089}: rebase (continue): [AST] Add a sugar type for types found via UsingDecl -a596a5fc128b HEAD@{1090}: rebase (start): checkout origin/main -25184d506c43 HEAD@{1091}: checkout: moving from main to usingtype -62bcb75ce510 HEAD@{1092}: commit: [AST] Add more testcases to QualTypeNamesTest. NFC -32dede65ae98 HEAD@{1093}: rebase (finish): returning to refs/heads/main -32dede65ae98 HEAD@{1094}: rebase (pick): [AST] Fix QualTypeNamesTest, which was spuriously passing -509153f1e7d1 HEAD@{1095}: rebase (start): checkout origin/main -8b9423dcec0a HEAD@{1096}: commit: [AST] Fix QualTypeNamesTest, which was spuriously passing -ebed0ca71561 HEAD@{1097}: rebase (finish): returning to refs/heads/main -ebed0ca71561 HEAD@{1098}: rebase (start): checkout origin/main -6fef0ffa14a3 HEAD@{1099}: checkout: moving from usingtype to main -25184d506c43 HEAD@{1100}: commit (amend): [AST] Add a sugar type for types found via UsingDecl -63d52ad6d61f HEAD@{1101}: commit (amend): [AST] Add a sugar type for types found via UsingDecl -c0adf4433852 HEAD@{1102}: checkout: moving from origin to usingtype -8491272d5f8b (origin) HEAD@{1103}: commit (amend): [clangd] Extend SymbolOrigin, stop serializing it -58f8efe72279 HEAD@{1104}: rebase (finish): returning to refs/heads/origin -58f8efe72279 HEAD@{1105}: rebase (pick): [clangd] Extend SymbolOrigin, stop serializing it -e7007b69d43b (fixx) HEAD@{1106}: rebase (start): checkout origin/main -ddcc1d2c88de HEAD@{1107}: checkout: moving from fixx to origin -e7007b69d43b (fixx) HEAD@{1108}: rebase (finish): returning to refs/heads/fixx -e7007b69d43b (fixx) HEAD@{1109}: rebase (pick): [Sema] Add FixIt when a C++ out-of-line method has extra/missing const -54fc9eb9b313 HEAD@{1110}: rebase (start): checkout origin/main -563ef9895a46 HEAD@{1111}: commit (amend): [Sema] Add FixIt when a C++ out-of-line method has extra/missing const -eb9db3287358 HEAD@{1112}: rebase (finish): returning to refs/heads/fixx -eb9db3287358 HEAD@{1113}: rebase (pick): [Sema] Add FixIt when a C++ out-of-line method has extra/missing const -529833377ccd (block) HEAD@{1114}: rebase (start): checkout origin/main -9344dda72035 HEAD@{1115}: checkout: moving from block to fixx -529833377ccd (block) HEAD@{1116}: rebase (finish): returning to refs/heads/block -529833377ccd (block) HEAD@{1117}: rebase (pick): [clangd] Disable support for clang-tidy suppression blocks (NOLINTBEGIN) -a908ca6603ab HEAD@{1118}: rebase (start): checkout origin/main -e65ea60537a7 HEAD@{1119}: checkout: moving from asyncindex to block -747908384732 (asyncindex) HEAD@{1120}: rebase (continue) (finish): returning to refs/heads/asyncindex -747908384732 (asyncindex) HEAD@{1121}: rebase (continue): [clangd] Proof of concept: indexing after the preamble is built -a5927737daeb HEAD@{1122}: rebase (start): checkout origin/main -3f8dfb604b16 HEAD@{1123}: checkout: moving from shared to asyncindex -6917f87b3c7c (shared) HEAD@{1124}: rebase (finish): returning to refs/heads/shared -6917f87b3c7c (shared) HEAD@{1125}: rebase (pick): [clangd] Cleanup unneeded use of shared_ptr. NFC -4299d8d0ce42 HEAD@{1126}: rebase (start): checkout origin/main -998c40e04bec HEAD@{1127}: commit: [clangd] Cleanup unneeded use of shared_ptr. NFC -6fef0ffa14a3 HEAD@{1128}: checkout: moving from main to shared -6fef0ffa14a3 HEAD@{1129}: checkout: moving from asyncindex to main -3f8dfb604b16 HEAD@{1130}: commit (amend): [clangd] Proof of concept: indexing after the preamble is built -69244a114c0c HEAD@{1131}: commit (amend): [clangd] Proof of concept: indexing after the preamble is built -e0ed01382993 HEAD@{1132}: commit: [clangd] Proof of concept: indexing after the preamble is built -6fef0ffa14a3 HEAD@{1133}: checkout: moving from main to asyncindex -6fef0ffa14a3 HEAD@{1134}: reset: moving to HEAD -6fef0ffa14a3 HEAD@{1135}: reset: moving to HEAD -6fef0ffa14a3 HEAD@{1136}: reset: moving to HEAD -6fef0ffa14a3 HEAD@{1137}: reset: moving to HEAD -6fef0ffa14a3 HEAD@{1138}: checkout: moving from main to main -6fef0ffa14a3 HEAD@{1139}: rebase (finish): returning to refs/heads/main -6fef0ffa14a3 HEAD@{1140}: rebase (start): checkout origin/main -26f6fbe2be1d HEAD@{1141}: checkout: moving from ccedit to main -782052f2decf (fork/ccedit, ccedit) HEAD@{1142}: commit: [clangd] Prototype: code action to edit compile commands -26f6fbe2be1d HEAD@{1143}: checkout: moving from main to ccedit -26f6fbe2be1d HEAD@{1144}: reset: moving to origin/main -ac431fc2cdf1 (incomplete) HEAD@{1145}: reset: moving to origin/main -c797aa934727 HEAD@{1146}: revert: Revert "Revert "[Symbolizer][Debuginfo] Add debuginfod client to llvm-symbolizer."" -afa3c14e2ff9 HEAD@{1147}: checkout: moving from block to main -e65ea60537a7 HEAD@{1148}: commit (amend): [clangd] Disable support for clang-tidy suppression blocks (NOLINTBEGIN) -c416e5d69d7e HEAD@{1149}: commit (amend): [clangd] Disable support for clang-tidy suppression blocks (NOLINTBEGIN) -2c1e87eae0e2 HEAD@{1150}: commit: [clangd] Disable support for clang-tidy suppression blocks (NOLINTBEGIN) -afa3c14e2ff9 HEAD@{1151}: checkout: moving from main to block -afa3c14e2ff9 HEAD@{1152}: checkout: moving from fixx to main -9344dda72035 HEAD@{1153}: commit (amend): [Sema] Add FixIt when a C++ out-of-line method has extra/missing const -fb15c379c1f0 HEAD@{1154}: commit (amend): [Sema] Add FixIt when a C++ out-of-line method has extra/missing const -be240d2b0505 HEAD@{1155}: commit: [Sema] Add FixIt when a C++ out-of-line method has extra/missing const -ac431fc2cdf1 (incomplete) HEAD@{1156}: checkout: moving from incomplete to fixx -ac431fc2cdf1 (incomplete) HEAD@{1157}: rebase (finish): returning to refs/heads/incomplete -ac431fc2cdf1 (incomplete) HEAD@{1158}: rebase (pick): [clangd] ... and mark a new test as -fno-ms-compatibility too -30fc88bf1dc1 HEAD@{1159}: rebase (start): checkout origin/main -d3aa8d688374 HEAD@{1160}: commit (amend): [clangd] ... and mark a new test as -fno-ms-compatibility too -03d0b9092b60 HEAD@{1161}: commit: [clangd] ... and mark a new test as -fno-ms-compatibility too -1a68c14b577f HEAD@{1162}: reset: moving to HEAD -1a68c14b577f HEAD@{1163}: rebase (finish): returning to refs/heads/incomplete -1a68c14b577f HEAD@{1164}: rebase (pick): [clangd] Restore -fno-ms-compatibility to tests -8d897ec91528 HEAD@{1165}: rebase (start): checkout origin/main -ac5910467704 HEAD@{1166}: commit: [clangd] Restore -fno-ms-compatibility to tests -c25ea488a39a HEAD@{1167}: reset: moving to HEAD -c25ea488a39a HEAD@{1168}: rebase (finish): returning to refs/heads/incomplete -c25ea488a39a HEAD@{1169}: rebase (pick): [clangd] Include-fixer: handle more "incomplete type" diags. -a55e51f9a64c HEAD@{1170}: rebase (start): checkout origin/main -11a2f06c37cc HEAD@{1171}: commit (amend): [clangd] Include-fixer: handle more "incomplete type" diags. -8182fffc0500 HEAD@{1172}: rebase (continue) (finish): returning to refs/heads/incomplete -8182fffc0500 HEAD@{1173}: rebase (continue): [clangd] Include-fixer: handle more "incomplete type" diags. -86caf517bf05 HEAD@{1174}: rebase (start): checkout origin/main -0958968acbe0 HEAD@{1175}: checkout: moving from incompletenfc to incomplete -a8bf389f4146 (incompletenfc) HEAD@{1176}: rebase (finish): returning to refs/heads/incompletenfc -a8bf389f4146 (incompletenfc) HEAD@{1177}: rebase (pick): [clangd] Clean up some include-fixer tests. NFC -3ed47bcc9618 HEAD@{1178}: rebase (start): checkout origin/main -76820d557062 HEAD@{1179}: commit (amend): [clangd] Clean up some include-fixer tests. NFC -c28420e6737b HEAD@{1180}: commit (amend): [clangd] Clean up some include-fixer tests. NFC -b48226a052b2 HEAD@{1181}: commit (amend): [clangd] Clean up some include-fixer tests. NFC -0958968acbe0 HEAD@{1182}: checkout: moving from incomplete to incompletenfc -0958968acbe0 HEAD@{1183}: checkout: moving from main to incomplete -afa3c14e2ff9 HEAD@{1184}: checkout: moving from indeximplicit to main -0d64c65efac9 (indeximplicit) HEAD@{1185}: cherry-pick: [clangd] Indexing of standard library -ee26e0ba082e (implicitc) HEAD@{1186}: checkout: moving from implicitc to indeximplicit -ee26e0ba082e (implicitc) HEAD@{1187}: commit (amend): [clangd] Include fixer for missing functions in C -9ac5d003594e HEAD@{1188}: commit (amend): [clangd] Include fixer for missing functions in C -3b4429acb859 HEAD@{1189}: commit (amend): [clangd] Include fixer for missing functions in C -1a75bc322127 HEAD@{1190}: commit (amend): [clangd] Include fixer for missing functions in C -94ab31f3c7a8 HEAD@{1191}: commit (amend): [clangd] Include fixer for missing functions in C -86494fa881eb HEAD@{1192}: commit: [clangd] Include fixer for missing functions in C -afa3c14e2ff9 HEAD@{1193}: checkout: moving from main to implicitc -afa3c14e2ff9 HEAD@{1194}: rebase (finish): returning to refs/heads/main -afa3c14e2ff9 HEAD@{1195}: rebase (start): checkout origin/main -d4865393b5da HEAD@{1196}: checkout: moving from incomplete to main -0958968acbe0 HEAD@{1197}: commit (amend): [clangd] Include-fixer: handle more "incomplete type" diags, clean up tests -aa89c6b2a300 HEAD@{1198}: commit (amend): [clangd] Include-fixer: handle more "incomplete type" diags, clean up tests -153236d44e9a HEAD@{1199}: commit (amend): [clangd] Include-fixer: handle more "incomplete type" diags, clean up tests -3f0f560caf3a HEAD@{1200}: commit: [clangd] Include-fixer: handle more "incomplete type" diags, clean up tests -d4865393b5da HEAD@{1201}: checkout: moving from main to incomplete -d4865393b5da HEAD@{1202}: reset: moving to HEAD -d4865393b5da HEAD@{1203}: rebase (finish): returning to refs/heads/main -d4865393b5da HEAD@{1204}: rebase (start): checkout origin/main -e7f53ec78fe8 HEAD@{1205}: checkout: moving from tblgen to main -7ef23188fe95 (tblgen) HEAD@{1206}: commit (amend): [clangd] Generate ConfigFragment/YAML/docs from one tablegen source -6bdf61f016e3 HEAD@{1207}: commit (amend): [clangd] Generate ConfigFragment/YAML/docs from one tablegen source -e249c35c3fb4 HEAD@{1208}: commit (amend): [clangd] Generate ConfigFragment/YAML/docs from one tablegen source -fcf5c9f5bf33 HEAD@{1209}: commit (amend): [clangd] Generate ConfigFragment/YAML/docs from one tablegen source -7b3888a32700 HEAD@{1210}: rebase (continue) (finish): returning to refs/heads/tblgen -7b3888a32700 HEAD@{1211}: rebase (continue): [clangd] Generate ConfigFragment/YAML/docs from one tablegen source -4afae6f7c7f6 HEAD@{1212}: rebase (start): checkout origin/main -34b10022310a HEAD@{1213}: commit: [clangd] Generate ConfigFragment/YAML/docs from one tablegen source -e7f53ec78fe8 HEAD@{1214}: checkout: moving from main to tblgen -e7f53ec78fe8 HEAD@{1215}: checkout: moving from two to main -c56122daac76 HEAD@{1216}: reset: moving to HEAD -c56122daac76 HEAD@{1217}: commit (amend): [Parse] Use empty RecoveryExpr when if/while/do/switch conditions fail to parse -2409b3d46f6c HEAD@{1218}: rebase (finish): returning to refs/heads/two -2409b3d46f6c HEAD@{1219}: rebase (pick): [Parse] Use empty RecoveryExpr when if/while/do/switch conditions fail to parse -2676759bf22e (morefix) HEAD@{1220}: rebase (start): checkout origin/main -ad885f5a3eab (arcpatch-D112996) HEAD@{1221}: checkout: moving from morefix to two -2676759bf22e (morefix) HEAD@{1222}: rebase (finish): returning to refs/heads/morefix -2676759bf22e (morefix) HEAD@{1223}: rebase (pick): [clangd] Add fixes for clang "include " diagnostics -b73cf6207efa HEAD@{1224}: rebase (start): checkout origin/main -da7ff2db120f HEAD@{1225}: rebase (finish): returning to refs/heads/morefix -da7ff2db120f HEAD@{1226}: rebase (pick): [clangd] Add fixes for clang "include " diagnostics -77b2bb55671a HEAD@{1227}: rebase (start): checkout origin/main -8bf667957ed0 HEAD@{1228}: commit (amend): [clangd] Add fixes for clang "include " diagnostics -56f023ff10d2 HEAD@{1229}: commit (amend): [clangd] Add fixes for clang "include " diagnostics -805bac439319 HEAD@{1230}: checkout: moving from origin to morefix -ddcc1d2c88de HEAD@{1231}: commit (amend): [clangd] Extend SymbolOrigin, stop serializing it -e4568ef854df HEAD@{1232}: commit (amend): [clangd] Extend SymbolOrigin, stop serializing it -9099df1707fe HEAD@{1233}: checkout: moving from stdlib to origin -cdfb640fe9e8 (stdlib) HEAD@{1234}: commit (amend): [clangd] Indexing of standard library -5c14772f82eb HEAD@{1235}: commit (amend): [clangd] Indexing of standard library -9bcdbb99a75b HEAD@{1236}: commit (amend): [clangd] WIP various stdlib indexing stuff -3e38a40b3f17 HEAD@{1237}: commit (amend): [clangd] WIP various stdlib indexing stuff -4ac5a41a65fc HEAD@{1238}: rebase (finish): returning to refs/heads/stdlib -4ac5a41a65fc HEAD@{1239}: rebase (pick): [clangd] WIP various stdlib indexing stuff -e1b9d805325b HEAD@{1240}: rebase (start): checkout origin/main -5330f525f264 (arcpatch-D105177) HEAD@{1241}: checkout: moving from arcpatch-D105177 to stdlib -5330f525f264 (arcpatch-D105177) HEAD@{1242}: checkout: moving from reserved to arcpatch-D105177 -18cd067d0bfa (reserved) HEAD@{1243}: commit (amend): [clangd] Don't index __reserved_names in headers. -06dd586e7297 HEAD@{1244}: commit (amend): [clangd] Don't index __reserved_names in headers. -e58aab51c464 HEAD@{1245}: commit (amend): [clangd] Don't index __reserved_names in headers. -05a7bfb157fc HEAD@{1246}: commit: [clangd] Don't index __reserved_names in headers. -e7f53ec78fe8 HEAD@{1247}: checkout: moving from main to reserved -e7f53ec78fe8 HEAD@{1248}: checkout: moving from origin to main -9099df1707fe HEAD@{1249}: commit (amend): [clangd] Extend SymbolOrigin, stop serializing it -1557821a2bd2 HEAD@{1250}: commit (amend): [clangd] Extend SymbolOrigin, stop serializing it -8c3bd3cc7478 HEAD@{1251}: commit (amend): [clangd] Extend SymbolOrigin, stop serializing it -cb761c799928 HEAD@{1252}: commit: [clangd] Extend SymbolOrigin, stop serializing it -e7f53ec78fe8 HEAD@{1253}: checkout: moving from main to origin -e7f53ec78fe8 HEAD@{1254}: rebase (finish): returning to refs/heads/main -e7f53ec78fe8 HEAD@{1255}: rebase (start): checkout origin/main -afc9e7517ada HEAD@{1256}: checkout: moving from arcpatch-D105177 to main -5330f525f264 (arcpatch-D105177) HEAD@{1257}: commit (amend): [clangd] WIP various stdlib indexing stuff -4c58226488ee HEAD@{1258}: commit (amend): [clangd] WIP various stdlib indexing stuff -ffbc79cbcc54 HEAD@{1259}: commit (amend): [clangd] WIP various stdlib indexing stuff -5d5179621ede HEAD@{1260}: checkout: moving from main to arcpatch-D105177 -afc9e7517ada HEAD@{1261}: rebase (finish): returning to refs/heads/main -afc9e7517ada HEAD@{1262}: rebase (start): checkout origin/main -f764a1a5bd7c HEAD@{1263}: checkout: moving from arcpatch-D105177 to main -5d5179621ede HEAD@{1264}: reset: moving to HEAD -5d5179621ede HEAD@{1265}: rebase (finish): returning to refs/heads/arcpatch-D105177 -5d5179621ede HEAD@{1266}: rebase (pick): [clangd] Implemented indexing of standard library -25c7ec4fc622 HEAD@{1267}: rebase (start): checkout origin/main -7f2bbbd16a82 HEAD@{1268}: commit: [clangd] Implemented indexing of standard library -15acaad79d6e HEAD@{1269}: checkout: moving from main to arcpatch-D105177 -f764a1a5bd7c HEAD@{1270}: checkout: moving from morefix to main -805bac439319 HEAD@{1271}: commit (amend): [clangd] Add fixes for clang "include " diagnostics -c74d8a0e6f33 HEAD@{1272}: commit (amend): [clangd] Add fixes for clang "include " diagnostics -86d15e9770ca HEAD@{1273}: commit (amend): [clangd] Add fixes for clang "include " diagnostics -a46d34a114b3 HEAD@{1274}: commit: [clangd] Add fixes for clang "include " diagnostics -f764a1a5bd7c HEAD@{1275}: checkout: moving from main to morefix -f764a1a5bd7c HEAD@{1276}: checkout: moving from usingtype to main -c0adf4433852 HEAD@{1277}: commit (amend): [AST] Add a sugar type for types found via UsingDecl -661fde2dfe7c HEAD@{1278}: commit (amend): [AST] Add a sugar type for types found via UsingDecl -f38cd8c69f6d HEAD@{1279}: commit (amend): [AST] Add a sugar type for types found via UsingDecl -4b8286a14790 HEAD@{1280}: commit (amend): [AST] Add a sugar type for types found via UsingDecl -480e5803b30f HEAD@{1281}: commit (amend): [AST] Add a sugar type for types found via UsingDecl -06cc1d22bf04 HEAD@{1282}: rebase (finish): returning to refs/heads/usingtype -06cc1d22bf04 HEAD@{1283}: rebase (pick): [AST] Add a sugar type for types found via UsingDecl -c133fb321f7c HEAD@{1284}: rebase (start): checkout origin/main -8545d9204be1 HEAD@{1285}: rebase (abort): updating HEAD -8545d9204be1 HEAD@{1286}: rebase (abort): updating HEAD -8545d9204be1 HEAD@{1287}: checkout: moving from main to usingtype -f764a1a5bd7c HEAD@{1288}: rebase (finish): returning to refs/heads/main -f764a1a5bd7c HEAD@{1289}: rebase (pick): [clangd] Avoid possible crash: apply configuration after binding methods -a6f53afbcb4d HEAD@{1290}: rebase (finish): returning to refs/heads/main -a6f53afbcb4d HEAD@{1291}: rebase (start): checkout origin/main -5fedbd5b1815 HEAD@{1292}: checkout: moving from main to usingtype -5fedbd5b1815 HEAD@{1293}: checkout: moving from token to main -3878ad5e448c (token) HEAD@{1294}: commit: xxx token -5fedbd5b1815 HEAD@{1295}: checkout: moving from main to token -5fedbd5b1815 HEAD@{1296}: rebase (finish): returning to refs/heads/main -5fedbd5b1815 HEAD@{1297}: rebase (start): checkout origin/main -e56d680fe870 HEAD@{1298}: checkout: moving from iwyustdlib to main -e56d680fe870 HEAD@{1299}: checkout: moving from main to iwyustdlib -e56d680fe870 HEAD@{1300}: rebase (finish): returning to refs/heads/main -e56d680fe870 HEAD@{1301}: rebase (start): checkout origin/main -4fb62e138398 HEAD@{1302}: checkout: moving from placeholders to main -8ac9d2ae5839 (placeholders) HEAD@{1303}: rebase (finish): returning to refs/heads/placeholders -8ac9d2ae5839 (placeholders) HEAD@{1304}: rebase (pick): [clangd] Fix function-arg-placeholder suppression with macros. -ebda5e1e521f HEAD@{1305}: checkout: moving from main to placeholders -ebda5e1e521f HEAD@{1306}: rebase (finish): returning to refs/heads/main -ebda5e1e521f HEAD@{1307}: rebase (start): checkout origin/main -48b67dca2ccc HEAD@{1308}: checkout: moving from two to main -ad885f5a3eab (arcpatch-D112996) HEAD@{1309}: checkout: moving from arcpatch-D112996 to two -63667c1896e1 HEAD@{1310}: rebase (finish): returning to refs/heads/arcpatch-D112996 -63667c1896e1 HEAD@{1311}: rebase (pick): [clangd] Trace per-token time in clangd --check -f7500a4ef7bd HEAD@{1312}: rebase (pick): [CodeCompletion] Generally consider header files without extension -5fbcf677347e HEAD@{1313}: checkout: moving from main to arcpatch-D112996 -48b67dca2ccc HEAD@{1314}: rebase (finish): returning to refs/heads/main -48b67dca2ccc HEAD@{1315}: rebase (start): checkout origin/main -627fa0b9a897 HEAD@{1316}: reset: moving to HEAD -627fa0b9a897 HEAD@{1317}: checkout: moving from enum to main -5880c835bdbe (enum) HEAD@{1318}: reset: moving to HEAD -5880c835bdbe (enum) HEAD@{1319}: reset: moving to HEAD -5880c835bdbe (enum) HEAD@{1320}: rebase (finish): returning to refs/heads/enum -5880c835bdbe (enum) HEAD@{1321}: rebase (pick): [Sema] Avoid crash in CheckEnumConstant with contains-error expressions -6a5e08cc4a5c (redecl) HEAD@{1322}: rebase (finish): returning to refs/heads/redecl -6a5e08cc4a5c (redecl) HEAD@{1323}: rebase (pick): [AST] injected-class-name is not a redecl, even in template specializations -627fa0b9a897 HEAD@{1324}: checkout: moving from main to redecl -627fa0b9a897 HEAD@{1325}: rebase (finish): returning to refs/heads/main -627fa0b9a897 HEAD@{1326}: rebase (start): checkout origin/main -f06e33298266 HEAD@{1327}: rebase (abort): updating HEAD -f06e33298266 HEAD@{1328}: rebase (abort): updating HEAD -f06e33298266 HEAD@{1329}: checkout: moving from specialfiles to main -73453e7adecb (specialfiles) HEAD@{1330}: rebase (finish): returning to refs/heads/specialfiles -73453e7adecb (specialfiles) HEAD@{1331}: rebase (pick): [clangd] Avoid expensive checks of buffer names in IncludeCleaner -de7494a33a5c (constcrash) HEAD@{1332}: rebase (finish): returning to refs/heads/constcrash -de7494a33a5c (constcrash) HEAD@{1333}: rebase (pick): [AST] fail rather than crash when const evaluating invalid c++ foreach -f06e33298266 HEAD@{1334}: checkout: moving from main to specialfiles -f06e33298266 HEAD@{1335}: rebase (finish): returning to refs/heads/main -f06e33298266 HEAD@{1336}: rebase (start): checkout origin/main -9cc08cb02fdc (crashtest) HEAD@{1337}: checkout: moving from crashtest to constcrash -9cc08cb02fdc (crashtest) HEAD@{1338}: rebase (finish): returning to refs/heads/crashtest -9cc08cb02fdc (crashtest) HEAD@{1339}: rebase (pick): [clangd] Add integration test for crash handling -51be7061d025 HEAD@{1340}: reset: moving to HEAD -51be7061d025 HEAD@{1341}: checkout: moving from main to crashtest -51be7061d025 HEAD@{1342}: commit: [clangd] Remove tricky integration test that flakes/fails on some platforms. -4373f3595f8e HEAD@{1343}: rebase (finish): returning to refs/heads/main -4373f3595f8e HEAD@{1344}: rebase (start): checkout origin/main -045695f85cb8 (arcpatch-D109506_1) HEAD@{1345}: checkout: moving from timer to main -aa1ac2ae451e (flush) HEAD@{1346}: checkout: moving from flush to timer -aa1ac2ae451e (flush) HEAD@{1347}: rebase (finish): returning to refs/heads/flush -aa1ac2ae451e (flush) HEAD@{1348}: rebase (pick): [clangd] Flush stderr after signal handlers run, so we always get the full stack/crash info -045695f85cb8 (arcpatch-D109506_1) HEAD@{1349}: checkout: moving from main to flush -045695f85cb8 (arcpatch-D109506_1) HEAD@{1350}: rebase (finish): returning to refs/heads/main -045695f85cb8 (arcpatch-D109506_1) HEAD@{1351}: rebase (start): checkout origin/main -4e91035387fa HEAD@{1352}: checkout: moving from arcpatch-D109506_1 to main -045695f85cb8 (arcpatch-D109506_1) HEAD@{1353}: rebase (finish): returning to refs/heads/arcpatch-D109506_1 -045695f85cb8 (arcpatch-D109506_1) HEAD@{1354}: rebase (pick): [clangd] Print current request context along with the stack trace -980c7f32490b HEAD@{1355}: checkout: moving from arcpatch-D111318 to arcpatch-D109506_1 -a85b661d2ada (arcpatch-D111318) HEAD@{1356}: rebase (finish): returning to refs/heads/arcpatch-D111318 -a85b661d2ada (arcpatch-D111318) HEAD@{1357}: rebase (pick): [clang][clangd] Improve signature help for variadic functions. -3964c1db915b HEAD@{1358}: checkout: moving from main to arcpatch-D111318 -4e91035387fa HEAD@{1359}: rebase (finish): returning to refs/heads/main -4e91035387fa HEAD@{1360}: rebase (pick): [Support] Trim #include after b06df22 -93c1b3caf052 HEAD@{1361}: reset: moving to HEAD -93c1b3caf052 HEAD@{1362}: rebase (finish): returning to refs/heads/main -93c1b3caf052 HEAD@{1363}: rebase (start): checkout origin/main -c15bbdeafffb HEAD@{1364}: checkout: moving from arcpatch-D110825 to main -82fbd3412fec (arcpatch-D110825) HEAD@{1365}: commit: [clangd] Handle members of anon structs in SelectionTree -68e56bd320d7 HEAD@{1366}: checkout: moving from main to arcpatch-D110825 -c15bbdeafffb HEAD@{1367}: rebase (finish): returning to refs/heads/main -c15bbdeafffb HEAD@{1368}: rebase (start): checkout origin/main -bb9333c3504a HEAD@{1369}: checkout: moving from uid to main -22555bafe90d (uid) HEAD@{1370}: rebase (finish): returning to refs/heads/uid -22555bafe90d (uid) HEAD@{1371}: rebase (pick): [VFS] InMemoryFilesystem's UniqueIDs are a function of path and content. -722e705f72dd (arcpatch-D110324) HEAD@{1372}: checkout: moving from arcpatch-D110324 to uid -722e705f72dd (arcpatch-D110324) HEAD@{1373}: rebase (finish): returning to refs/heads/arcpatch-D110324 -722e705f72dd (arcpatch-D110324) HEAD@{1374}: rebase (start): checkout origin/main -eb209c13cce9 HEAD@{1375}: rebase (finish): returning to refs/heads/arcpatch-D110324 -eb209c13cce9 HEAD@{1376}: rebase (pick): clangd: Do not report inline overrides twice -5685eb950da7 HEAD@{1377}: checkout: moving from main to arcpatch-D110324 -bb9333c3504a HEAD@{1378}: rebase (finish): returning to refs/heads/main -bb9333c3504a HEAD@{1379}: rebase (start): checkout origin/main -61cc873a8ef1 HEAD@{1380}: checkout: moving from arcpatch-D109506 to main From d8276208be763ba5b70e9b422034e77764a8649f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 4 Jan 2022 14:34:38 +0000 Subject: [PATCH 533/992] [LAA] Remove overeager assertion for aggregate types. 0a00d64 turned an early exit here into an assertion, but the assertion can be triggered, as PR52920 shows. The later code is agnostic to the accessed type, so just drop the assert. The patch also adds tests for LAA directly and loop-load-elimination to show the behavior is sane. --- llvm/lib/Analysis/LoopAccessAnalysis.cpp | 1 - .../LoopAccessAnalysis/symbolic-stride.ll | 141 ++++++++++++++++++ .../LoopLoadElim/symbolic-stride.ll | 40 +++++ 3 files changed, 181 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 6444518dc70c..b8b1b5ad53c9 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1055,7 +1055,6 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, bool ShouldCheckWrap) { Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); - assert(!AccessTy->isAggregateType() && "Bad stride - Not a pointer to a scalar type"); if (isa(AccessTy)) { LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy diff --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll new file mode 100644 index 000000000000..547372b83bc9 --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll @@ -0,0 +1,141 @@ +; RUN: opt -S -disable-output -passes='require,require,loop(print-access-info)' %s 2>&1 | FileCheck %s + +; +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; A forwarding in the presence of symbolic strides. +define void @single_stride(i32* noalias %A, i32* noalias %B, i64 %N, i64 %stride) { +; CHECK-LABEL: Loop access info in function 'single_stride': +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Backward: +; CHECK-NEXT: %load = load i32, i32* %gep.A, align 4 -> +; CHECK-NEXT: store i32 %add, i32* %gep.A.next, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-NEXT: Equal predicate: %stride == 1 +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds i32, i32* %A, i64 %mul: +; CHECK-NEXT: {%A,+,(4 * %stride)}<%loop> +; CHECK-NEXT: --> {%A,+,4}<%loop> +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %mul = mul i64 %iv, %stride + %gep.A = getelementptr inbounds i32, i32* %A, i64 %mul + %load = load i32, i32* %gep.A, align 4 + %gep.B = getelementptr inbounds i32, i32* %B, i64 %iv + %load_1 = load i32, i32* %gep.B, align 4 + %add = add i32 %load_1, %load + %iv.next = add nuw nsw i64 %iv, 1 + %gep.A.next = getelementptr inbounds i32, i32* %A, i64 %iv.next + store i32 %add, i32* %gep.A.next, align 4 + %exitcond = icmp eq i64 %iv.next, %N + br i1 %exitcond, label %exit, label %loop + +exit: ; preds = %loop + ret void +} + +; Similar to @single_stride, but with struct types. +define void @single_stride_struct({ i32, i8 }* noalias %A, { i32, i8 }* noalias %B, i64 %N, i64 %stride) { +; CHECK-LABEL: Loop access info in function 'single_stride_struct': +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Backward: +; CHECK-NEXT: %load = load { i32, i8 }, { i32, i8 }* %gep.A, align 4 -> +; CHECK-NEXT: store { i32, i8 } %ins, { i32, i8 }* %gep.A.next, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-NEXT: Equal predicate: %stride == 1 +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds { i32, i8 }, { i32, i8 }* %A, i64 %mul: +; CHECK-NEXT: {%A,+,(8 * %stride)}<%loop> +; CHECK-NEXT: --> {%A,+,8}<%loop> +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %mul = mul i64 %iv, %stride + %gep.A = getelementptr inbounds { i32, i8 }, { i32, i8 }* %A, i64 %mul + %load = load { i32, i8 }, { i32, i8 }* %gep.A, align 4 + %gep.B = getelementptr inbounds { i32, i8 }, { i32, i8 }* %B, i64 %iv + %load_1 = load { i32, i8 }, { i32, i8 }* %gep.B, align 4 + %v1 = extractvalue { i32, i8 } %load, 0 + %v2 = extractvalue { i32, i8} %load_1, 0 + %add = add i32 %v1, %v2 + %ins = insertvalue { i32, i8 } undef, i32 %add, 0 + %iv.next = add nuw nsw i64 %iv, 1 + %gep.A.next = getelementptr inbounds { i32, i8 }, { i32, i8 }* %A, i64 %iv.next + store { i32, i8 } %ins, { i32, i8 }* %gep.A.next, align 4 + %exitcond = icmp eq i64 %iv.next, %N + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +; A loop with two symbolic strides. +define void @two_strides(i32* noalias %A, i32* noalias %B, i64 %N, i64 %stride.1, i64 %stride.2) { +; CHECK-LABEL: Loop access info in function 'two_strides': +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Backward: +; CHECK-NEXT: %load = load i32, i32* %gep.A, align 4 -> +; CHECK-NEXT: store i32 %add, i32* %gep.A.next, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-NEXT: Equal predicate: %stride.2 == 1 +; CHECK-NEXT: Equal predicate: %stride.1 == 1 +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds i32, i32* %A, i64 %mul: +; CHECK-NEXT: {%A,+,(4 * %stride.1)}<%loop> +; CHECK-NEXT: --> {%A,+,4}<%loop> +; CHECK-NEXT: [PSE] %gep.A.next = getelementptr inbounds i32, i32* %A, i64 %mul.2: +; CHECK-NEXT: {((4 * %stride.2) + %A),+,(4 * %stride.2)}<%loop> +; CHECK-NEXT: --> {(4 + %A),+,4}<%loop> +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %mul = mul i64 %iv, %stride.1 + %gep.A = getelementptr inbounds i32, i32* %A, i64 %mul + %load = load i32, i32* %gep.A, align 4 + %gep.B = getelementptr inbounds i32, i32* %B, i64 %iv + %load_1 = load i32, i32* %gep.B, align 4 + %add = add i32 %load_1, %load + %iv.next = add nuw nsw i64 %iv, 1 + %mul.2 = mul i64 %iv.next, %stride.2 + %gep.A.next = getelementptr inbounds i32, i32* %A, i64 %mul.2 + store i32 %add, i32* %gep.A.next, align 4 + %exitcond = icmp eq i64 %iv.next, %N + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll b/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll index 7a2d1b6c7e3c..dce61157aae1 100644 --- a/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll +++ b/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll @@ -50,6 +50,46 @@ for.end: ; preds = %for.body ret void } +; Similar to @f(), but with a struct type. +; ALL-LABEL: @f_struct( +define void @f_struct({ i32, i8 } * noalias nocapture %A, { i32, i8 }* noalias nocapture readonly %B, i64 %N, + i64 %stride) { + +; ONE_STRIDE_SPEC: %ident.check = icmp ne i64 %stride, 1 + +entry: +; NO_ONE_STRIDE_SPEC-NOT: %load_initial = load { i32, i8 }, { i32, i8 }* %A +; ONE_STRIDE_SPEC: %load_initial = load { i32, i8 }, { i32, i8 }* %A + br label %for.body + +for.body: ; preds = %for.body, %entry +; NO_ONE_STRIDE_SPEC-NOT: %store_forwarded = phi { i32, i8 } [ %load_initial, {{.*}} ], [ %ins, %for.body ] +; ONE_STRIDE_SPEC: %store_forwarded = phi { i32, i8 } [ %load_initial, {{.*}} ], [ %ins, %for.body ] + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %mul = mul i64 %indvars.iv, %stride + %arrayidx = getelementptr inbounds { i32, i8 }, { i32, i8 }* %A, i64 %mul + %load = load { i32, i8 }, { i32, i8 }* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds { i32, i8 }, { i32, i8 }* %B, i64 %indvars.iv + %load_1 = load { i32, i8 }, { i32, i8 }* %arrayidx2, align 4 + +; NO_ONE_STRIDE_SPEC-NOT: %v1 = extractvalue { i32, i8 } %store_forwarded +; ONE_STRIDE_SPEC: %v1 = extractvalue { i32, i8 } %store_forwarded +; ONE_STRIDE_SPEC: %add = add i32 %v1, %v2 + + %v1 = extractvalue { i32, i8 } %load, 0 + %v2 = extractvalue { i32, i8} %load_1, 0 + %add = add i32 %v1, %v2 + %ins = insertvalue { i32, i8 } undef, i32 %add, 0 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %arrayidx_next = getelementptr inbounds { i32, i8 }, { i32, i8 }* %A, i64 %indvars.iv.next + store { i32, i8 } %ins, { i32, i8 }* %arrayidx_next, align 4 + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + ; With two symbolic strides: ; ; for (unsigned i = 0; i < 100; i++) From 8aea5d5951dc7d2130ad671423830a45e215c540 Mon Sep 17 00:00:00 2001 From: Tomas Matheson Date: Thu, 30 Dec 2021 10:36:36 +0000 Subject: [PATCH 534/992] [Arm] Remove duplicate CPU tests There are some duplicate test lines in clang/test/Driver/arm-cortex-cpus.c. Looks like these were duplicated from the corresponding v8.0a tests, which test both "-target armv8" and "-target armv8a". "-target armv8.X" without the "a" doesn't work for later versions though. Several tests also specify the -mlittle-endian twice, which looks unintentional. Remove duplicate test: -target arm -march=armv5 -mthumb Differential Revision: https://reviews.llvm.org/D116415 --- clang/test/Driver/arm-cortex-cpus.c | 41 ++++++++++------------------- 1 file changed, 14 insertions(+), 27 deletions(-) diff --git a/clang/test/Driver/arm-cortex-cpus.c b/clang/test/Driver/arm-cortex-cpus.c index 7b7a4b355dcc..f1d2c569ab69 100644 --- a/clang/test/Driver/arm-cortex-cpus.c +++ b/clang/test/Driver/arm-cortex-cpus.c @@ -28,7 +28,6 @@ // RUN: %clang -target armv5 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V5-THUMB %s // RUN: %clang -target arm -march=armv5 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V5-THUMB %s // RUN: %clang -target armv5t -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V5-THUMB %s -// RUN: %clang -target arm -march=armv5 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V5-THUMB %s // CHECK-V5-THUMB: "-cc1"{{.*}} "-triple" "thumbv5-{{.*}} "-target-cpu" "arm10tdmi" // RUN: %clang -target armv5e -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V5E %s @@ -143,7 +142,7 @@ // RUN: %clang -target arm -march=armv8 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A %s // RUN: %clang -target armv8a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A %s // RUN: %clang -target arm -march=armv8a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A %s -// RUN: %clang -target arm -mlittle-endian -march=armv8-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A %s +// RUN: %clang -target arm -march=armv8-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A %s // CHECK-V8A: "-cc1"{{.*}} "-triple" "armv8-{{.*}}" "-target-cpu" "generic" // RUN: %clang -target armv8r-linux-gnueabi -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8R %s @@ -176,7 +175,7 @@ // RUN: %clang -mcpu=generic -target arm -march=armv8 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A-GENERIC %s // RUN: %clang -mcpu=generic -target armv8a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A-GENERIC %s // RUN: %clang -mcpu=generic -target arm -march=armv8a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A-GENERIC %s -// RUN: %clang -mcpu=generic -target arm -mlittle-endian -march=armv8-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A-GENERIC %s +// RUN: %clang -mcpu=generic -target arm -march=armv8-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A-GENERIC %s // CHECK-V8A-GENERIC: "-cc1"{{.*}} "-triple" "armv8-{{.*}}" "-target-cpu" "generic" // RUN: %clang -target armebv8 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V8A %s @@ -214,17 +213,15 @@ // RUN: %clang -target arm -march=armv8.1a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s // RUN: %clang -target armv8.1a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s // RUN: %clang -target arm -march=armv8.1-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s -// RUN: %clang -target arm -march=armv8.1a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s // RUN: %clang -target armv8.1a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s // RUN: %clang -target arm -march=armv8.1a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s -// RUN: %clang -target arm -mlittle-endian -march=armv8.1-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s +// RUN: %clang -target arm -march=armv8.1-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s // RUN: %clang -mcpu=generic -target arm -march=armv8.1a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s // RUN: %clang -mcpu=generic -target armv8.1a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s // RUN: %clang -mcpu=generic -target arm -march=armv8.1-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s -// RUN: %clang -mcpu=generic -target arm -march=armv8.1a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s // RUN: %clang -mcpu=generic -target armv8.1a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s // RUN: %clang -mcpu=generic -target arm -march=armv8.1a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s -// RUN: %clang -mcpu=generic -target arm -mlittle-endian -march=armv8.1-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s +// RUN: %clang -mcpu=generic -target arm -march=armv8.1-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s // CHECK-V81A: "-cc1"{{.*}} "-triple" "armv8.1a-{{.*}}" "-target-cpu" "generic" // RUN: %clang -target armebv8.1a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V81A %s @@ -254,10 +251,9 @@ // RUN: %clang -target armv8.2a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V82A %s // RUN: %clang -target arm -march=armv8.2a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V82A %s // RUN: %clang -target arm -march=armv8.2-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V82A %s -// RUN: %clang -target arm -march=armv8.2a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V82A %s // RUN: %clang -target armv8.2a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V82A %s // RUN: %clang -target arm -march=armv8.2a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V82A %s -// RUN: %clang -target arm -mlittle-endian -march=armv8.2-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V82A %s +// RUN: %clang -target arm -march=armv8.2-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V82A %s // CHECK-V82A: "-cc1"{{.*}} "-triple" "armv8.2{{.*}}" "-target-cpu" "generic" // RUN: %clang -target armebv8.2a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V82A %s @@ -287,10 +283,9 @@ // RUN: %clang -target armv8.3a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V83A %s // RUN: %clang -target arm -march=armv8.3a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V83A %s // RUN: %clang -target arm -march=armv8.3-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V83A %s -// RUN: %clang -target arm -march=armv8.3a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V83A %s // RUN: %clang -target armv8.3a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V83A %s // RUN: %clang -target arm -march=armv8.3a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V83A %s -// RUN: %clang -target arm -mlittle-endian -march=armv8.3-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V83A %s +// RUN: %clang -target arm -march=armv8.3-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V83A %s // CHECK-V83A: "-cc1"{{.*}} "-triple" "armv8.3{{.*}}" "-target-cpu" "generic" // RUN: %clang -target armebv8.3a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V83A %s @@ -304,10 +299,9 @@ // RUN: %clang -target armv8.4a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V84A %s // RUN: %clang -target arm -march=armv8.4a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V84A %s // RUN: %clang -target arm -march=armv8.4-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V84A %s -// RUN: %clang -target arm -march=armv8.4a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V84A %s // RUN: %clang -target armv8.4a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V84A %s // RUN: %clang -target arm -march=armv8.4a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V84A %s -// RUN: %clang -target arm -mlittle-endian -march=armv8.4-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V84A %s +// RUN: %clang -target arm -march=armv8.4-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V84A %s // CHECK-V84A: "-cc1"{{.*}} "-triple" "armv8.4{{.*}}" "-target-cpu" "generic" // RUN: %clang -target armebv8.4a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V84A %s @@ -321,10 +315,9 @@ // RUN: %clang -target armv8.5a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V85A %s // RUN: %clang -target arm -march=armv8.5a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V85A %s // RUN: %clang -target arm -march=armv8.5-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V85A %s -// RUN: %clang -target arm -march=armv8.5a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V85A %s // RUN: %clang -target armv8.5a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V85A %s // RUN: %clang -target arm -march=armv8.5a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V85A %s -// RUN: %clang -target arm -mlittle-endian -march=armv8.5-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V85A %s +// RUN: %clang -target arm -march=armv8.5-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V85A %s // CHECK-V85A: "-cc1"{{.*}} "-triple" "armv8.5{{.*}}" "-target-cpu" "generic" // RUN: %clang -target armebv8.5a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V85A %s @@ -338,10 +331,9 @@ // RUN: %clang -target armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s // RUN: %clang -target arm -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s // RUN: %clang -target arm -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s -// RUN: %clang -target arm -march=armv8.6a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s // RUN: %clang -target armv8.6a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s // RUN: %clang -target arm -march=armv8.6a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s -// RUN: %clang -target arm -mlittle-endian -march=armv8.6-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s +// RUN: %clang -target arm -march=armv8.6-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s // CHECK-V86A: "-cc1"{{.*}} "-triple" "armv8.6{{.*}}" "-target-cpu" "generic" // RUN: %clang -target armebv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s @@ -355,10 +347,9 @@ // RUN: %clang -target armv8.7a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V87A %s // RUN: %clang -target arm -march=armv8.7a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V87A %s // RUN: %clang -target arm -march=armv8.7-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V87A %s -// RUN: %clang -target arm -march=armv8.7a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V87A %s // RUN: %clang -target armv8.7a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V87A %s // RUN: %clang -target arm -march=armv8.7a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V87A %s -// RUN: %clang -target arm -mlittle-endian -march=armv8.7-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V87A %s +// RUN: %clang -target arm -march=armv8.7-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V87A %s // CHECK-V87A: "-cc1"{{.*}} "-triple" "armv8.7{{.*}}" "-target-cpu" "generic" // RUN: %clang -target armebv8.7a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V87A %s @@ -372,10 +363,9 @@ // RUN: %clang -target armv8.8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V88A %s // RUN: %clang -target arm -march=armv8.8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V88A %s // RUN: %clang -target arm -march=armv8.8-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V88A %s -// RUN: %clang -target arm -march=armv8.8a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V88A %s // RUN: %clang -target armv8.8a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V88A %s // RUN: %clang -target arm -march=armv8.8a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V88A %s -// RUN: %clang -target arm -mlittle-endian -march=armv8.8-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V88A %s +// RUN: %clang -target arm -march=armv8.8-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V88A %s // CHECK-V88A: "-cc1"{{.*}} "-triple" "armv8.8{{.*}}" "-target-cpu" "generic" // RUN: %clang -target armebv8.8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V88A %s @@ -389,10 +379,9 @@ // RUN: %clang -target armv9a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V9A %s // RUN: %clang -target arm -march=armv9a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V9A %s // RUN: %clang -target arm -march=armv9-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V9A %s -// RUN: %clang -target arm -march=armv9a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V9A %s // RUN: %clang -target armv9a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V9A %s // RUN: %clang -target arm -march=armv9a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V9A %s -// RUN: %clang -target arm -mlittle-endian -march=armv9-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V9A %s +// RUN: %clang -target arm -march=armv9-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V9A %s // CHECK-V9A: "-cc1"{{.*}} "-triple" "armv9{{.*}}" "-target-cpu" "generic" // RUN: %clang -target armebv9a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V9A %s @@ -406,10 +395,9 @@ // RUN: %clang -target armv9.1a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V91A %s // RUN: %clang -target arm -march=armv9.1a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V91A %s // RUN: %clang -target arm -march=armv9.1-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V91A %s -// RUN: %clang -target arm -march=armv9.1a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V91A %s // RUN: %clang -target armv9.1a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V91A %s // RUN: %clang -target arm -march=armv9.1a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V91A %s -// RUN: %clang -target arm -mlittle-endian -march=armv9.1-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V91A %s +// RUN: %clang -target arm -march=armv9.1-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V91A %s // CHECK-V91A: "-cc1"{{.*}} "-triple" "armv9.1{{.*}}" "-target-cpu" "generic" // RUN: %clang -target armebv9.1a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V91A %s @@ -423,10 +411,9 @@ // RUN: %clang -target armv9.2a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V92A %s // RUN: %clang -target arm -march=armv9.2a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V92A %s // RUN: %clang -target arm -march=armv9.2-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V92A %s -// RUN: %clang -target arm -march=armv9.2a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V92A %s // RUN: %clang -target armv9.2a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V92A %s // RUN: %clang -target arm -march=armv9.2a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V92A %s -// RUN: %clang -target arm -mlittle-endian -march=armv9.2-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V92A %s +// RUN: %clang -target arm -march=armv9.2-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V92A %s // CHECK-V92A: "-cc1"{{.*}} "-triple" "armv9.2{{.*}}" "-target-cpu" "generic" // RUN: %clang -target armebv9.2a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V92A %s From ea75be3d9df448b6abafaf752a8141764d93ca33 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 30 Dec 2021 23:57:21 +0100 Subject: [PATCH 535/992] [InferAttrs] Add writeonly to all the math functions All of these functions would be `readnone`, but can't be on platforms where they can set `errno`. A `writeonly` function with no pointer arguments can only write (but never read) global state. Writeonly theoretically allows these calls to be CSE'd (a writeonly call with the same arguments will always result in the same global stores) or hoisted out of loops, but that's not implemented currently. There are a few functions in this list that could be `readnone` instead of `writeonly`, if someone is interested. Differential Revision: https://reviews.llvm.org/D116426 --- llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 10 + .../Transforms/InferFunctionAttrs/annotate.ll | 231 +++++++++--------- 2 files changed, 126 insertions(+), 115 deletions(-) diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index bec23a20ac49..167705235d51 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -34,6 +34,7 @@ STATISTIC(NumReadNone, "Number of functions inferred as readnone"); STATISTIC(NumInaccessibleMemOnly, "Number of functions inferred as inaccessiblememonly"); STATISTIC(NumReadOnly, "Number of functions inferred as readonly"); +STATISTIC(NumWriteOnly, "Number of functions inferred as writeonly"); STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly"); STATISTIC(NumInaccessibleMemOrArgMemOnly, "Number of functions inferred as inaccessiblemem_or_argmemonly"); @@ -71,6 +72,14 @@ static bool setOnlyReadsMemory(Function &F) { return true; } +static bool setDoesNotReadMemory(Function &F) { + if (F.doesNotReadMemory()) // writeonly or readnone + return false; + F.setDoesNotReadMemory(); + ++NumWriteOnly; + return true; +} + static bool setOnlyAccessesArgMemory(Function &F) { if (F.onlyAccessesArgMemory()) return false; @@ -1171,6 +1180,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_truncl: Changed |= setDoesNotThrow(F); Changed |= setDoesNotFreeMemory(F); + Changed |= setDoesNotReadMemory(F); Changed |= setWillReturn(F); return Changed; default: diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll index 4fdf32b60923..8869477231d5 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll @@ -195,76 +195,76 @@ declare double @__sinpi(double) ; CHECK: declare float @__sinpif(float) declare float @__sinpif(float) -; CHECK: declare i32 @abs(i32) [[NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]] +; CHECK: declare i32 @abs(i32) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY:#[0-9]+]] declare i32 @abs(i32) ; CHECK: declare noundef i32 @access(i8* nocapture noundef readonly, i32 noundef) [[NOFREE_NOUNWIND:#[0-9]+]] declare i32 @access(i8*, i32) -; CHECK: declare double @acos(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @acos(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @acos(double) -; CHECK: declare float @acosf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @acosf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @acosf(float) -; CHECK: declare double @acosh(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @acosh(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @acosh(double) -; CHECK: declare float @acoshf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @acoshf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @acoshf(float) -; CHECK: declare x86_fp80 @acoshl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @acoshl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @acoshl(x86_fp80) -; CHECK: declare x86_fp80 @acosl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @acosl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @acosl(x86_fp80) ; CHECK: declare noalias noundef i8* @aligned_alloc(i64 noundef, i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND:#[0-9]+]] declare i8* @aligned_alloc(i64, i64) -; CHECK: declare double @asin(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @asin(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @asin(double) -; CHECK: declare float @asinf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @asinf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @asinf(float) -; CHECK: declare double @asinh(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @asinh(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @asinh(double) -; CHECK: declare float @asinhf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @asinhf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @asinhf(float) -; CHECK: declare x86_fp80 @asinhl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @asinhl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @asinhl(x86_fp80) -; CHECK: declare x86_fp80 @asinl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @asinl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @asinl(x86_fp80) -; CHECK: declare double @atan(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @atan(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @atan(double) -; CHECK: declare double @atan2(double, double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @atan2(double, double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @atan2(double, double) -; CHECK: declare float @atan2f(float, float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @atan2f(float, float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @atan2f(float, float) -; CHECK: declare x86_fp80 @atan2l(x86_fp80, x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @atan2l(x86_fp80, x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @atan2l(x86_fp80, x86_fp80) -; CHECK: declare float @atanf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @atanf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @atanf(float) -; CHECK: declare double @atanh(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @atanh(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @atanh(double) -; CHECK: declare float @atanhf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @atanhf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @atanhf(float) -; CHECK: declare x86_fp80 @atanhl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @atanhl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @atanhl(x86_fp80) -; CHECK: declare x86_fp80 @atanl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @atanl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @atanl(x86_fp80) ; CHECK: declare double @atof(i8* nocapture) [[NOFREE_NOUNWIND_READONLY_WILLRETURN:#[0-9]+]] @@ -289,25 +289,25 @@ declare void @bcopy(i8*, i8*, i64) ; CHECK: declare void @bzero(i8* nocapture writeonly, i64) [[ARGMEMONLY_NOFREE_NOUNWIND_WILLRETURN]] declare void @bzero(i8*, i64) -; CHECK: declare noalias noundef i8* @calloc(i64 noundef, i64 noundef) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare noalias noundef i8* @calloc(i64 noundef, i64 noundef) [[NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]] declare i8* @calloc(i64, i64) -; CHECK: declare double @cbrt(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @cbrt(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @cbrt(double) -; CHECK: declare float @cbrtf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @cbrtf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @cbrtf(float) -; CHECK: declare x86_fp80 @cbrtl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @cbrtl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @cbrtl(x86_fp80) -; CHECK: declare double @ceil(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @ceil(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @ceil(double) -; CHECK: declare float @ceilf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @ceilf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @ceilf(float) -; CHECK: declare x86_fp80 @ceill(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @ceill(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @ceill(x86_fp80) ; CHECK: declare noundef i32 @chmod(i8* nocapture noundef readonly, i16 noundef zeroext) [[NOFREE_NOUNWIND]] @@ -322,70 +322,70 @@ declare void @clearerr(%opaque*) ; CHECK: declare noundef i32 @closedir(%opaque* nocapture noundef) [[NOFREE_NOUNWIND]] declare i32 @closedir(%opaque*) -; CHECK: declare double @copysign(double, double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @copysign(double, double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @copysign(double, double) -; CHECK: declare float @copysignf(float, float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @copysignf(float, float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @copysignf(float, float) -; CHECK: declare x86_fp80 @copysignl(x86_fp80, x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @copysignl(x86_fp80, x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @copysignl(x86_fp80, x86_fp80) -; CHECK: declare double @cos(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @cos(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @cos(double) -; CHECK: declare float @cosf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @cosf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @cosf(float) -; CHECK: declare double @cosh(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @cosh(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @cosh(double) -; CHECK: declare float @coshf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @coshf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @coshf(float) -; CHECK: declare x86_fp80 @coshl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @coshl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @coshl(x86_fp80) -; CHECK: declare x86_fp80 @cosl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @cosl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @cosl(x86_fp80) ; CHECK: declare noundef i8* @ctermid(i8* nocapture noundef) [[NOFREE_NOUNWIND]] declare i8* @ctermid(i8*) -; CHECK: declare double @exp(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @exp(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @exp(double) -; CHECK: declare double @exp2(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @exp2(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @exp2(double) -; CHECK: declare float @exp2f(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @exp2f(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @exp2f(float) -; CHECK: declare x86_fp80 @exp2l(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @exp2l(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @exp2l(x86_fp80) -; CHECK: declare float @expf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @expf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @expf(float) -; CHECK: declare x86_fp80 @expl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @expl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @expl(x86_fp80) -; CHECK: declare double @expm1(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @expm1(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @expm1(double) -; CHECK: declare float @expm1f(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @expm1f(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @expm1f(float) -; CHECK: declare x86_fp80 @expm1l(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @expm1l(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @expm1l(x86_fp80) -; CHECK: declare double @fabs(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @fabs(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @fabs(double) -; CHECK: declare float @fabsf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @fabsf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @fabsf(float) -; CHECK: declare x86_fp80 @fabsl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @fabsl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @fabsl(x86_fp80) ; CHECK: declare noundef i32 @fclose(%opaque* nocapture noundef) [[NOFREE_NOUNWIND]] @@ -403,14 +403,14 @@ declare i32 @ferror(%opaque*) ; CHECK: declare noundef i32 @fflush(%opaque* nocapture noundef) [[NOFREE_NOUNWIND]] declare i32 @fflush(%opaque*) -; CHECK: declare i32 @ffs(i32) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare i32 @ffs(i32) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare i32 @ffs(i32) -; CHECK-KNOWN: declare i32 @ffsl(i64) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK-KNOWN: declare i32 @ffsl(i64) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] ; CHECK-UNKNOWN: declare i32 @ffsl(i64){{$}} declare i32 @ffsl(i64) -; CHECK-KNOWN: declare i32 @ffsll(i64) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK-KNOWN: declare i32 @ffsll(i64) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] ; CHECK-UNKNOWN: declare i32 @ffsll(i64){{$}} declare i32 @ffsll(i64) @@ -429,13 +429,13 @@ declare i32 @fileno(%opaque*) ; CHECK: declare void @flockfile(%opaque* nocapture noundef) [[NOFREE_NOUNWIND]] declare void @flockfile(%opaque*) -; CHECK: declare double @floor(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @floor(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @floor(double) -; CHECK: declare float @floorf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @floorf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @floorf(float) -; CHECK: declare x86_fp80 @floorl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @floorl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @floorl(x86_fp80) ; CHECK: declare i32 @fls(i32) @@ -447,31 +447,31 @@ declare i32 @flsl(i64) ; CHECK: declare i32 @flsll(i64) declare i32 @flsll(i64) -; CHECK: declare double @fmax(double, double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @fmax(double, double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @fmax(double, double) -; CHECK: declare float @fmaxf(float, float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @fmaxf(float, float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @fmaxf(float, float) -; CHECK: declare x86_fp80 @fmaxl(x86_fp80, x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @fmaxl(x86_fp80, x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @fmaxl(x86_fp80, x86_fp80) -; CHECK: declare double @fmin(double, double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @fmin(double, double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @fmin(double, double) -; CHECK: declare float @fminf(float, float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @fminf(float, float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @fminf(float, float) -; CHECK: declare x86_fp80 @fminl(x86_fp80, x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @fminl(x86_fp80, x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @fminl(x86_fp80, x86_fp80) -; CHECK: declare double @fmod(double, double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @fmod(double, double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @fmod(double, double) -; CHECK: declare float @fmodf(float, float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @fmodf(float, float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @fmodf(float, float) -; CHECK: declare x86_fp80 @fmodl(x86_fp80, x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @fmodl(x86_fp80, x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @fmodl(x86_fp80, x86_fp80) ; CHECK: declare noalias noundef %opaque* @fopen(i8* nocapture noundef readonly, i8* nocapture noundef readonly) [[NOFREE_NOUNWIND]] @@ -578,13 +578,13 @@ declare i8* @gets(i8*) ; CHECK: declare noundef i32 @gettimeofday(%opaque* nocapture noundef, i8* nocapture noundef) [[NOFREE_NOUNWIND]] declare i32 @gettimeofday(%opaque*, i8*) -; CHECK: declare i32 @isascii(i32) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare i32 @isascii(i32) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare i32 @isascii(i32) -; CHECK: declare i32 @isdigit(i32) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare i32 @isdigit(i32) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare i32 @isdigit(i32) -; CHECK: declare i64 @labs(i64) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare i64 @labs(i64) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare i64 @labs(i64) ; CHECK: declare noundef i32 @lchown(i8* nocapture noundef readonly, i32 noundef, i32 noundef) [[NOFREE_NOUNWIND]] @@ -599,52 +599,52 @@ declare float @ldexpf(float, i32) ; CHECK: declare x86_fp80 @ldexpl(x86_fp80, i32 signext) [[NOFREE_WILLRETURN]] declare x86_fp80 @ldexpl(x86_fp80, i32) -; CHECK: declare i64 @llabs(i64) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare i64 @llabs(i64) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare i64 @llabs(i64) -; CHECK: declare double @log(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @log(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @log(double) -; CHECK: declare double @log10(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @log10(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @log10(double) -; CHECK: declare float @log10f(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @log10f(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @log10f(float) -; CHECK: declare x86_fp80 @log10l(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @log10l(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @log10l(x86_fp80) -; CHECK: declare double @log1p(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @log1p(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @log1p(double) -; CHECK: declare float @log1pf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @log1pf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @log1pf(float) -; CHECK: declare x86_fp80 @log1pl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @log1pl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @log1pl(x86_fp80) -; CHECK: declare double @log2(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @log2(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @log2(double) -; CHECK: declare float @log2f(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @log2f(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @log2f(float) -; CHECK: declare x86_fp80 @log2l(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @log2l(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @log2l(x86_fp80) -; CHECK: declare double @logb(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @logb(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @logb(double) -; CHECK: declare float @logbf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @logbf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @logbf(float) -; CHECK: declare x86_fp80 @logbl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @logbl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @logbl(x86_fp80) -; CHECK: declare float @logf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @logf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @logf(float) -; CHECK: declare x86_fp80 @logl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @logl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @logl(x86_fp80) ; CHECK: declare noundef i32 @lstat(i8* nocapture noundef readonly, %opaque* nocapture noundef) [[NOFREE_NOUNWIND]] @@ -703,13 +703,13 @@ declare float @modff(float, float*) ; CHECK: declare x86_fp80 @modfl(x86_fp80, x86_fp80* nocapture) [[NOFREE_NOUNWIND_WILLRETURN]] declare x86_fp80 @modfl(x86_fp80, x86_fp80*) -; CHECK: declare double @nearbyint(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @nearbyint(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @nearbyint(double) -; CHECK: declare float @nearbyintf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @nearbyintf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @nearbyintf(float) -; CHECK: declare x86_fp80 @nearbyintl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @nearbyintl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @nearbyintl(x86_fp80) ; CHECK-LINUX: declare noundef i32 @open(i8* nocapture noundef readonly, i32 noundef, ...) [[NOFREE]] @@ -734,13 +734,13 @@ declare %opaque* @popen(i8*, i8*) ; CHECK: declare i32 @posix_memalign(i8**, i64, i64) [[NOFREE]] declare i32 @posix_memalign(i8**, i64, i64) -; CHECK: declare double @pow(double, double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @pow(double, double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @pow(double, double) -; CHECK: declare float @powf(float, float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @powf(float, float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @powf(float, float) -; CHECK: declare x86_fp80 @powl(x86_fp80, x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @powl(x86_fp80, x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @powl(x86_fp80, x86_fp80) ; CHECK: declare noundef i64 @pread(i32 noundef, i8* nocapture noundef, i64 noundef, i64 noundef) [[NOFREE]] @@ -792,25 +792,25 @@ declare i32 @rename(i8*, i8*) ; CHECK: declare void @rewind(%opaque* nocapture noundef) [[NOFREE_NOUNWIND]] declare void @rewind(%opaque*) -; CHECK: declare double @rint(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @rint(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @rint(double) -; CHECK: declare float @rintf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @rintf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @rintf(float) -; CHECK: declare x86_fp80 @rintl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @rintl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @rintl(x86_fp80) ; CHECK: declare noundef i32 @rmdir(i8* nocapture noundef readonly) [[NOFREE_NOUNWIND]] declare i32 @rmdir(i8*) -; CHECK: declare double @round(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @round(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @round(double) -; CHECK: declare float @roundf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @roundf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @roundf(float) -; CHECK: declare x86_fp80 @roundl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @roundl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @roundl(x86_fp80) ; CHECK: declare noundef i32 @scanf(i8* nocapture noundef readonly, ...) [[NOFREE_NOUNWIND]] @@ -825,22 +825,22 @@ declare i32 @setitimer(i32, %opaque*, %opaque*) ; CHECK: declare noundef i32 @setvbuf(%opaque* nocapture noundef, i8* noundef, i32 noundef, i64 noundef) [[NOFREE_NOUNWIND]] declare i32 @setvbuf(%opaque*, i8*, i32, i64) -; CHECK: declare double @sin(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @sin(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @sin(double) -; CHECK: declare float @sinf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @sinf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @sinf(float) -; CHECK: declare double @sinh(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @sinh(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @sinh(double) -; CHECK: declare float @sinhf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @sinhf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @sinhf(float) -; CHECK: declare x86_fp80 @sinhl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @sinhl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @sinhl(x86_fp80) -; CHECK: declare x86_fp80 @sinl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @sinl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @sinl(x86_fp80) ; CHECK: declare noundef i32 @snprintf(i8* noalias nocapture noundef writeonly, i64 noundef, i8* nocapture noundef readonly, ...) [[NOFREE_NOUNWIND]] @@ -849,13 +849,13 @@ declare i32 @snprintf(i8*, i64, i8*, ...) ; CHECK: declare noundef i32 @sprintf(i8* noalias nocapture noundef writeonly, i8* nocapture noundef readonly, ...) [[NOFREE_NOUNWIND]] declare i32 @sprintf(i8*, i8*, ...) -; CHECK: declare double @sqrt(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @sqrt(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @sqrt(double) -; CHECK: declare float @sqrtf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @sqrtf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @sqrtf(float) -; CHECK: declare x86_fp80 @sqrtl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @sqrtl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @sqrtl(x86_fp80) ; CHECK: declare noundef i32 @sscanf(i8* nocapture noundef readonly, i8* nocapture noundef readonly, ...) [[NOFREE_NOUNWIND]] @@ -969,22 +969,22 @@ declare i64 @strxfrm(i8*, i8*, i64) ; CHECK: declare noundef i32 @system(i8* nocapture noundef readonly) [[NOFREE]] declare i32 @system(i8*) -; CHECK: declare double @tan(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @tan(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @tan(double) -; CHECK: declare float @tanf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @tanf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @tanf(float) -; CHECK: declare double @tanh(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @tanh(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @tanh(double) -; CHECK: declare float @tanhf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @tanhf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @tanhf(float) -; CHECK: declare x86_fp80 @tanhl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @tanhl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @tanhl(x86_fp80) -; CHECK: declare x86_fp80 @tanl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @tanl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @tanl(x86_fp80) ; CHECK: declare noundef i64 @times(%opaque* nocapture noundef) [[NOFREE_NOUNWIND]] @@ -996,16 +996,16 @@ declare %opaque* @tmpfile() ; CHECK-LINUX: declare noalias noundef %opaque* @tmpfile64() [[NOFREE_NOUNWIND]] declare %opaque* @tmpfile64() -; CHECK: declare i32 @toascii(i32) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare i32 @toascii(i32) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare i32 @toascii(i32) -; CHECK: declare double @trunc(double) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare double @trunc(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @trunc(double) -; CHECK: declare float @truncf(float) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare float @truncf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare float @truncf(float) -; CHECK: declare x86_fp80 @truncl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare x86_fp80 @truncl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @truncl(x86_fp80) ; CHECK: declare noundef i32 @uname(%opaque* nocapture noundef) [[NOFREE_NOUNWIND]] @@ -1064,6 +1064,7 @@ declare void @memset_pattern16(i8*, i8*, i64) ; CHECK-DAG: attributes [[NOFREE_NOUNWIND_WILLRETURN]] = { mustprogress nofree nounwind willreturn } +; CHECK-DAG: attributes [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] = { mustprogress nofree nounwind willreturn writeonly } ; CHECK-DAG: attributes [[NOFREE_NOUNWIND]] = { nofree nounwind } ; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN]] = { inaccessiblememonly mustprogress nofree nounwind willreturn } ; CHECK-DAG: attributes [[NOFREE_NOUNWIND_READONLY_WILLRETURN]] = { mustprogress nofree nounwind readonly willreturn } From 43c5fffcef5c1022c189a13c41dbdd2d653d59dd Mon Sep 17 00:00:00 2001 From: luxufan <932494295@qq.com> Date: Wed, 5 Jan 2022 00:04:09 +0800 Subject: [PATCH 536/992] Revert "[JITLink] Add fixup value range check" This reverts commit 17af06ba8005d6d14b0ac79ece01ecb028de9f90. --- .../lib/ExecutionEngine/JITLink/ELF_riscv.cpp | 25 ++----------------- .../JITLink/RISCV/ELF_pc_indirect.s | 4 +-- 2 files changed, 4 insertions(+), 25 deletions(-) diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp index 94b659c02092..26ec79ea50cf 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp @@ -161,15 +161,6 @@ static uint32_t extractBits(uint32_t Num, unsigned Low, unsigned Size) { return (Num & (((1ULL << (Size + 1)) - 1) << Low)) >> Low; } -static inline bool isInRangeForImmS32(int64_t Value) { - return (Value >= std::numeric_limits::min() && - Value <= std::numeric_limits::max()); -} - -static inline bool isInRangeForImmU32(uint64_t Value) { - return Value <= std::numeric_limits::max(); -} - class ELFJITLinker_riscv : public JITLinker { friend class JITLinker; @@ -198,18 +189,14 @@ class ELFJITLinker_riscv : public JITLinker { break; } case R_RISCV_HI20: { - uint64_t Value = E.getTarget().getAddress() + E.getAddend(); - if (LLVM_UNLIKELY(!isInRangeForImmU32(Value))) - return makeTargetOutOfRangeError(G, B, E); + int64_t Value = E.getTarget().getAddress() + E.getAddend(); int32_t Hi = (Value + 0x800) & 0xFFFFF000; uint32_t RawInstr = *(little32_t *)FixupPtr; *(little32_t *)FixupPtr = (RawInstr & 0xFFF) | static_cast(Hi); break; } case R_RISCV_LO12_I: { - uint64_t Value = E.getTarget().getAddress() + E.getAddend(); - if (LLVM_UNLIKELY(!isInRangeForImmU32(Value))) - return makeTargetOutOfRangeError(G, B, E); + int64_t Value = E.getTarget().getAddress() + E.getAddend(); int32_t Lo = Value & 0xFFF; uint32_t RawInstr = *(little32_t *)FixupPtr; *(little32_t *)FixupPtr = @@ -218,8 +205,6 @@ class ELFJITLinker_riscv : public JITLinker { } case R_RISCV_CALL: { int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress; - if (LLVM_UNLIKELY(!isInRangeForImmS32(Value))) - return makeTargetOutOfRangeError(G, B, E); int32_t Hi = (Value + 0x800) & 0xFFFFF000; int32_t Lo = Value & 0xFFF; uint32_t RawInstrAuipc = *(little32_t *)FixupPtr; @@ -231,8 +216,6 @@ class ELFJITLinker_riscv : public JITLinker { } case R_RISCV_PCREL_HI20: { int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress; - if (LLVM_UNLIKELY(!isInRangeForImmS32(Value))) - return makeTargetOutOfRangeError(G, B, E); int32_t Hi = (Value + 0x800) & 0xFFFFF000; uint32_t RawInstr = *(little32_t *)FixupPtr; *(little32_t *)FixupPtr = (RawInstr & 0xFFF) | static_cast(Hi); @@ -244,8 +227,6 @@ class ELFJITLinker_riscv : public JITLinker { return RelHI20.takeError(); int64_t Value = RelHI20->getTarget().getAddress() + RelHI20->getAddend() - E.getTarget().getAddress(); - if (LLVM_UNLIKELY(!isInRangeForImmS32(Value))) - return makeTargetOutOfRangeError(G, B, E); int64_t Lo = Value & 0xFFF; uint32_t RawInstr = *(little32_t *)FixupPtr; *(little32_t *)FixupPtr = @@ -256,8 +237,6 @@ class ELFJITLinker_riscv : public JITLinker { auto RelHI20 = getRISCVPCRelHi20(E); int64_t Value = RelHI20->getTarget().getAddress() + RelHI20->getAddend() - E.getTarget().getAddress(); - if (LLVM_UNLIKELY(!isInRangeForImmS32(Value))) - return makeTargetOutOfRangeError(G, B, E); int64_t Lo = Value & 0xFFF; uint32_t Imm31_25 = extractBits(Lo, 5, 7) << 25; uint32_t Imm11_7 = extractBits(Lo, 0, 5) << 7; diff --git a/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_pc_indirect.s b/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_pc_indirect.s index 32897e32bc9f..539da2b1e81d 100644 --- a/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_pc_indirect.s +++ b/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_pc_indirect.s @@ -4,11 +4,11 @@ # RUN: llvm-mc -triple=riscv32 -position-independent -filetype=obj \ # RUN: -o %t/elf_riscv32_sm_pic_reloc.o %s # RUN: llvm-jitlink -noexec \ -# RUN: -slab-allocate 100Kb -slab-address 0x1ff00000 -slab-page-size 4096 \ +# RUN: -slab-allocate 100Kb -slab-address 0xfff00000 -slab-page-size 4096 \ # RUN: -define-abs external_func=0x1 -define-abs external_data=0x2 \ # RUN: -check %s %t/elf_riscv64_sm_pic_reloc.o # RUN: llvm-jitlink -noexec \ -# RUN: -slab-allocate 100Kb -slab-address 0x1ff00000 -slab-page-size 4096 \ +# RUN: -slab-allocate 100Kb -slab-address 0xfff00000 -slab-page-size 4096 \ # RUN: -define-abs external_func=0x1 -define-abs external_data=0x2 \ # RUN: -check %s %t/elf_riscv32_sm_pic_reloc.o # From c41610778bc7e0f57239264381c64b8503b21e75 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 21 Dec 2021 08:44:01 -0800 Subject: [PATCH 537/992] [NFC][regalloc] Introduce RegAllocGreedy.h This was suggested in D114831. It should simplify the relation between eviction advisor and the allocator, and simplify ingesting more features tied to the internals of the allocator, in the future. This change simply pulls out RAGreedy, places it in the llvm namespace, and cleans up a bit the includes in the new header file. Differential Revision: https://reviews.llvm.org/D116114 --- llvm/lib/CodeGen/RegAllocGreedy.cpp | 357 +----------------------- llvm/lib/CodeGen/RegAllocGreedy.h | 419 ++++++++++++++++++++++++++++ 2 files changed, 420 insertions(+), 356 deletions(-) create mode 100644 llvm/lib/CodeGen/RegAllocGreedy.h diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index ce3cf31dbd6b..7088c944f5b1 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "RegAllocGreedy.h" #include "AllocationOrder.h" #include "InterferenceCache.h" #include "LiveDebugVariables.h" @@ -135,362 +136,6 @@ static cl::opt ConsiderLocalIntervalCost( static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); -namespace { - -class RAGreedy : public MachineFunctionPass, - public RegAllocBase, - private LiveRangeEdit::Delegate { - // Convenient shortcuts. - using PQueue = std::priority_queue>; - using SmallLISet = SmallPtrSet; - - // context - MachineFunction *MF; - - // Shortcuts to some useful interface. - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - RegisterClassInfo RCI; - - // analyses - SlotIndexes *Indexes; - MachineBlockFrequencyInfo *MBFI; - MachineDominatorTree *DomTree; - MachineLoopInfo *Loops; - MachineOptimizationRemarkEmitter *ORE; - EdgeBundles *Bundles; - SpillPlacement *SpillPlacer; - LiveDebugVariables *DebugVars; - AliasAnalysis *AA; - - // state - std::unique_ptr SpillerInstance; - PQueue Queue; - std::unique_ptr VRAI; - Optional ExtraInfo; - std::unique_ptr EvictAdvisor; - - // Enum CutOffStage to keep a track whether the register allocation failed - // because of the cutoffs encountered in last chance recoloring. - // Note: This is used as bitmask. New value should be next power of 2. - enum CutOffStage { - // No cutoffs encountered - CO_None = 0, - - // lcr-max-depth cutoff encountered - CO_Depth = 1, - - // lcr-max-interf cutoff encountered - CO_Interf = 2 - }; - - uint8_t CutOffInfo; - -#ifndef NDEBUG - static const char *const StageName[]; -#endif - - /// EvictionTrack - Keeps track of past evictions in order to optimize region - /// split decision. - class EvictionTrack { - - public: - using EvictorInfo = - std::pair; - using EvicteeInfo = llvm::DenseMap; - - private: - /// Each Vreg that has been evicted in the last stage of selectOrSplit will - /// be mapped to the evictor Vreg and the PhysReg it was evicted from. - EvicteeInfo Evictees; - - public: - /// Clear all eviction information. - void clear() { Evictees.clear(); } - - /// Clear eviction information for the given evictee Vreg. - /// E.g. when Vreg get's a new allocation, the old eviction info is no - /// longer relevant. - /// \param Evictee The evictee Vreg for whom we want to clear collected - /// eviction info. - void clearEvicteeInfo(Register Evictee) { Evictees.erase(Evictee); } - - /// Track new eviction. - /// The Evictor vreg has evicted the Evictee vreg from Physreg. - /// \param PhysReg The physical register Evictee was evicted from. - /// \param Evictor The evictor Vreg that evicted Evictee. - /// \param Evictee The evictee Vreg. - void addEviction(MCRegister PhysReg, Register Evictor, Register Evictee) { - Evictees[Evictee].first = Evictor; - Evictees[Evictee].second = PhysReg; - } - - /// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg. - /// \param Evictee The evictee vreg. - /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if - /// nobody has evicted Evictee from PhysReg. - EvictorInfo getEvictor(Register Evictee) { - if (Evictees.count(Evictee)) { - return Evictees[Evictee]; - } - - return EvictorInfo(0, 0); - } - }; - - // Keeps track of past evictions in order to optimize region split decision. - EvictionTrack LastEvicted; - - // splitting state. - std::unique_ptr SA; - std::unique_ptr SE; - - /// Cached per-block interference maps - InterferenceCache IntfCache; - - /// All basic blocks where the current register has uses. - SmallVector SplitConstraints; - - /// Global live range splitting candidate info. - struct GlobalSplitCandidate { - // Register intended for assignment, or 0. - MCRegister PhysReg; - - // SplitKit interval index for this candidate. - unsigned IntvIdx; - - // Interference for PhysReg. - InterferenceCache::Cursor Intf; - - // Bundles where this candidate should be live. - BitVector LiveBundles; - SmallVector ActiveBlocks; - - void reset(InterferenceCache &Cache, MCRegister Reg) { - PhysReg = Reg; - IntvIdx = 0; - Intf.setPhysReg(Cache, Reg); - LiveBundles.clear(); - ActiveBlocks.clear(); - } - - // Set B[I] = C for every live bundle where B[I] was NoCand. - unsigned getBundles(SmallVectorImpl &B, unsigned C) { - unsigned Count = 0; - for (unsigned I : LiveBundles.set_bits()) - if (B[I] == NoCand) { - B[I] = C; - Count++; - } - return Count; - } - }; - - /// Candidate info for each PhysReg in AllocationOrder. - /// This vector never shrinks, but grows to the size of the largest register - /// class. - SmallVector GlobalCand; - - enum : unsigned { NoCand = ~0u }; - - /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to - /// NoCand which indicates the stack interval. - SmallVector BundleCand; - - /// Callee-save register cost, calculated once per machine function. - BlockFrequency CSRCost; - - /// Enable or not the consideration of the cost of local intervals created - /// by a split candidate when choosing the best split candidate. - bool EnableAdvancedRASplitCost; - - /// Set of broken hints that may be reconciled later because of eviction. - SmallSetVector SetOfBrokenHints; - - /// The register cost values. This list will be recreated for each Machine - /// Function - ArrayRef RegCosts; - -public: - RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses); - - /// Return the pass name. - StringRef getPassName() const override { return "Greedy Register Allocator"; } - - /// RAGreedy analysis usage. - void getAnalysisUsage(AnalysisUsage &AU) const override; - void releaseMemory() override; - Spiller &spiller() override { return *SpillerInstance; } - void enqueueImpl(LiveInterval *LI) override; - LiveInterval *dequeue() override; - MCRegister selectOrSplit(LiveInterval &, - SmallVectorImpl &) override; - void aboutToRemoveInterval(LiveInterval &) override; - - /// Perform register allocation. - bool runOnMachineFunction(MachineFunction &mf) override; - - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoPHIs); - } - - MachineFunctionProperties getClearedProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::IsSSA); - } - - static char ID; - -private: - MCRegister selectOrSplitImpl(LiveInterval &, SmallVectorImpl &, - SmallVirtRegSet &, unsigned = 0); - - bool LRE_CanEraseVirtReg(Register) override; - void LRE_WillShrinkVirtReg(Register) override; - void LRE_DidCloneVirtReg(Register, Register) override; - void enqueue(PQueue &CurQueue, LiveInterval *LI); - LiveInterval *dequeue(PQueue &CurQueue); - - BlockFrequency calcSpillCost(); - bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); - bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef); - bool growRegion(GlobalSplitCandidate &Cand); - bool splitCanCauseEvictionChain(Register Evictee, GlobalSplitCandidate &Cand, - unsigned BBNumber, - const AllocationOrder &Order); - bool splitCanCauseLocalSpill(unsigned VirtRegToSplit, - GlobalSplitCandidate &Cand, unsigned BBNumber, - const AllocationOrder &Order); - BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &, - const AllocationOrder &Order, - bool *CanCauseEvictionChain); - bool calcCompactRegion(GlobalSplitCandidate&); - void splitAroundRegion(LiveRangeEdit&, ArrayRef); - void calcGapWeights(MCRegister, SmallVectorImpl &); - bool canEvictInterferenceInRange(const LiveInterval &VirtReg, - MCRegister PhysReg, SlotIndex Start, - SlotIndex End, EvictionCost &MaxCost) const; - MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order, - const LiveInterval &VirtReg, - SlotIndex Start, SlotIndex End, - float *BestEvictWeight) const; - void evictInterference(LiveInterval &, MCRegister, - SmallVectorImpl &); - bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg, - SmallLISet &RecoloringCandidates, - const SmallVirtRegSet &FixedRegisters); - - MCRegister tryAssign(LiveInterval&, AllocationOrder&, - SmallVectorImpl&, - const SmallVirtRegSet&); - MCRegister tryFindEvictionCandidate(LiveInterval &, const AllocationOrder &, - uint8_t, const SmallVirtRegSet &) const; - MCRegister tryEvict(LiveInterval &, AllocationOrder &, - SmallVectorImpl &, uint8_t, - const SmallVirtRegSet &); - MCRegister tryRegionSplit(LiveInterval &, AllocationOrder &, - SmallVectorImpl &); - /// Calculate cost of region splitting. - unsigned calculateRegionSplitCost(LiveInterval &VirtReg, - AllocationOrder &Order, - BlockFrequency &BestCost, - unsigned &NumCands, bool IgnoreCSR, - bool *CanCauseEvictionChain = nullptr); - /// Perform region splitting. - unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, - bool HasCompact, - SmallVectorImpl &NewVRegs); - /// Check other options before using a callee-saved register for the first - /// time. - MCRegister tryAssignCSRFirstTime(LiveInterval &VirtReg, - AllocationOrder &Order, MCRegister PhysReg, - uint8_t &CostPerUseLimit, - SmallVectorImpl &NewVRegs); - void initializeCSRCost(); - unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl&); - unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl&); - unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl&); - unsigned trySplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl&, - const SmallVirtRegSet&); - unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, - SmallVectorImpl &, - SmallVirtRegSet &, unsigned); - bool tryRecoloringCandidates(PQueue &, SmallVectorImpl &, - SmallVirtRegSet &, unsigned); - void tryHintRecoloring(LiveInterval &); - void tryHintsRecoloring(); - - /// Model the information carried by one end of a copy. - struct HintInfo { - /// The frequency of the copy. - BlockFrequency Freq; - /// The virtual register or physical register. - Register Reg; - /// Its currently assigned register. - /// In case of a physical register Reg == PhysReg. - MCRegister PhysReg; - - HintInfo(BlockFrequency Freq, Register Reg, MCRegister PhysReg) - : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {} - }; - using HintsInfo = SmallVector; - - BlockFrequency getBrokenHintFreq(const HintsInfo &, MCRegister); - void collectHintInfo(Register, HintsInfo &); - - /// Greedy RA statistic to remark. - struct RAGreedyStats { - unsigned Reloads = 0; - unsigned FoldedReloads = 0; - unsigned ZeroCostFoldedReloads = 0; - unsigned Spills = 0; - unsigned FoldedSpills = 0; - unsigned Copies = 0; - float ReloadsCost = 0.0f; - float FoldedReloadsCost = 0.0f; - float SpillsCost = 0.0f; - float FoldedSpillsCost = 0.0f; - float CopiesCost = 0.0f; - - bool isEmpty() { - return !(Reloads || FoldedReloads || Spills || FoldedSpills || - ZeroCostFoldedReloads || Copies); - } - - void add(RAGreedyStats other) { - Reloads += other.Reloads; - FoldedReloads += other.FoldedReloads; - ZeroCostFoldedReloads += other.ZeroCostFoldedReloads; - Spills += other.Spills; - FoldedSpills += other.FoldedSpills; - Copies += other.Copies; - ReloadsCost += other.ReloadsCost; - FoldedReloadsCost += other.FoldedReloadsCost; - SpillsCost += other.SpillsCost; - FoldedSpillsCost += other.FoldedSpillsCost; - CopiesCost += other.CopiesCost; - } - - void report(MachineOptimizationRemarkMissed &R); - }; - - /// Compute statistic for a basic block. - RAGreedyStats computeStats(MachineBasicBlock &MBB); - - /// Compute and report statistic through a remark. - RAGreedyStats reportStats(MachineLoop *L); - - /// Report the statistic for each loop. - void reportStats(); -}; - -} // end anonymous namespace - char RAGreedy::ID = 0; char &llvm::RAGreedyID = RAGreedy::ID; diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h new file mode 100644 index 000000000000..c414cf73dc5f --- /dev/null +++ b/llvm/lib/CodeGen/RegAllocGreedy.h @@ -0,0 +1,419 @@ +//==- RegAllocGreedy.h ------- greedy register allocator ----------*-C++-*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file defines the RAGreedy function pass for register allocation in +// optimized builds. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_REGALLOCGREEDY_H_ +#define LLVM_CODEGEN_REGALLOCGREEDY_H_ + +#include "AllocationOrder.h" +#include "InterferenceCache.h" +#include "LiveDebugVariables.h" +#include "RegAllocBase.h" +#include "RegAllocEvictionAdvisor.h" +#include "SpillPlacement.h" +#include "SplitKit.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/EdgeBundles.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveRegMatrix.h" +#include "llvm/CodeGen/LiveStacks.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/Spiller.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Target/TargetMachine.h" +#include +#include +#include +#include +#include +#include +#include + +namespace llvm { +class RAGreedy : public MachineFunctionPass, + public RegAllocBase, + private LiveRangeEdit::Delegate { + // Convenient shortcuts. + using PQueue = std::priority_queue>; + using SmallLISet = SmallPtrSet; + + // context + MachineFunction *MF; + + // Shortcuts to some useful interface. + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + RegisterClassInfo RCI; + + // analyses + SlotIndexes *Indexes; + MachineBlockFrequencyInfo *MBFI; + MachineDominatorTree *DomTree; + MachineLoopInfo *Loops; + MachineOptimizationRemarkEmitter *ORE; + EdgeBundles *Bundles; + SpillPlacement *SpillPlacer; + LiveDebugVariables *DebugVars; + AliasAnalysis *AA; + + // state + std::unique_ptr SpillerInstance; + PQueue Queue; + std::unique_ptr VRAI; + Optional ExtraInfo; + std::unique_ptr EvictAdvisor; + + // Enum CutOffStage to keep a track whether the register allocation failed + // because of the cutoffs encountered in last chance recoloring. + // Note: This is used as bitmask. New value should be next power of 2. + enum CutOffStage { + // No cutoffs encountered + CO_None = 0, + + // lcr-max-depth cutoff encountered + CO_Depth = 1, + + // lcr-max-interf cutoff encountered + CO_Interf = 2 + }; + + uint8_t CutOffInfo; + +#ifndef NDEBUG + static const char *const StageName[]; +#endif + + /// EvictionTrack - Keeps track of past evictions in order to optimize region + /// split decision. + class EvictionTrack { + + public: + using EvictorInfo = + std::pair; + using EvicteeInfo = llvm::DenseMap; + + private: + /// Each Vreg that has been evicted in the last stage of selectOrSplit will + /// be mapped to the evictor Vreg and the PhysReg it was evicted from. + EvicteeInfo Evictees; + + public: + /// Clear all eviction information. + void clear() { Evictees.clear(); } + + /// Clear eviction information for the given evictee Vreg. + /// E.g. when Vreg get's a new allocation, the old eviction info is no + /// longer relevant. + /// \param Evictee The evictee Vreg for whom we want to clear collected + /// eviction info. + void clearEvicteeInfo(Register Evictee) { Evictees.erase(Evictee); } + + /// Track new eviction. + /// The Evictor vreg has evicted the Evictee vreg from Physreg. + /// \param PhysReg The physical register Evictee was evicted from. + /// \param Evictor The evictor Vreg that evicted Evictee. + /// \param Evictee The evictee Vreg. + void addEviction(MCRegister PhysReg, Register Evictor, Register Evictee) { + Evictees[Evictee].first = Evictor; + Evictees[Evictee].second = PhysReg; + } + + /// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg. + /// \param Evictee The evictee vreg. + /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if + /// nobody has evicted Evictee from PhysReg. + EvictorInfo getEvictor(Register Evictee) { + if (Evictees.count(Evictee)) { + return Evictees[Evictee]; + } + + return EvictorInfo(0, 0); + } + }; + + // Keeps track of past evictions in order to optimize region split decision. + EvictionTrack LastEvicted; + + // splitting state. + std::unique_ptr SA; + std::unique_ptr SE; + + /// Cached per-block interference maps + InterferenceCache IntfCache; + + /// All basic blocks where the current register has uses. + SmallVector SplitConstraints; + + /// Global live range splitting candidate info. + struct GlobalSplitCandidate { + // Register intended for assignment, or 0. + MCRegister PhysReg; + + // SplitKit interval index for this candidate. + unsigned IntvIdx; + + // Interference for PhysReg. + InterferenceCache::Cursor Intf; + + // Bundles where this candidate should be live. + BitVector LiveBundles; + SmallVector ActiveBlocks; + + void reset(InterferenceCache &Cache, MCRegister Reg) { + PhysReg = Reg; + IntvIdx = 0; + Intf.setPhysReg(Cache, Reg); + LiveBundles.clear(); + ActiveBlocks.clear(); + } + + // Set B[I] = C for every live bundle where B[I] was NoCand. + unsigned getBundles(SmallVectorImpl &B, unsigned C) { + unsigned Count = 0; + for (unsigned I : LiveBundles.set_bits()) + if (B[I] == NoCand) { + B[I] = C; + Count++; + } + return Count; + } + }; + + /// Candidate info for each PhysReg in AllocationOrder. + /// This vector never shrinks, but grows to the size of the largest register + /// class. + SmallVector GlobalCand; + + enum : unsigned { NoCand = ~0u }; + + /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to + /// NoCand which indicates the stack interval. + SmallVector BundleCand; + + /// Callee-save register cost, calculated once per machine function. + BlockFrequency CSRCost; + + /// Enable or not the consideration of the cost of local intervals created + /// by a split candidate when choosing the best split candidate. + bool EnableAdvancedRASplitCost; + + /// Set of broken hints that may be reconciled later because of eviction. + SmallSetVector SetOfBrokenHints; + + /// The register cost values. This list will be recreated for each Machine + /// Function + ArrayRef RegCosts; + +public: + RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses); + + /// Return the pass name. + StringRef getPassName() const override { return "Greedy Register Allocator"; } + + /// RAGreedy analysis usage. + void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override; + Spiller &spiller() override { return *SpillerInstance; } + void enqueueImpl(LiveInterval *LI) override; + LiveInterval *dequeue() override; + MCRegister selectOrSplit(LiveInterval &, + SmallVectorImpl &) override; + void aboutToRemoveInterval(LiveInterval &) override; + + /// Perform register allocation. + bool runOnMachineFunction(MachineFunction &mf) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoPHIs); + } + + MachineFunctionProperties getClearedProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } + + static char ID; + +private: + MCRegister selectOrSplitImpl(LiveInterval &, SmallVectorImpl &, + SmallVirtRegSet &, unsigned = 0); + + bool LRE_CanEraseVirtReg(Register) override; + void LRE_WillShrinkVirtReg(Register) override; + void LRE_DidCloneVirtReg(Register, Register) override; + void enqueue(PQueue &CurQueue, LiveInterval *LI); + LiveInterval *dequeue(PQueue &CurQueue); + + BlockFrequency calcSpillCost(); + bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency &); + bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef); + bool growRegion(GlobalSplitCandidate &Cand); + bool splitCanCauseEvictionChain(Register Evictee, GlobalSplitCandidate &Cand, + unsigned BBNumber, + const AllocationOrder &Order); + bool splitCanCauseLocalSpill(unsigned VirtRegToSplit, + GlobalSplitCandidate &Cand, unsigned BBNumber, + const AllocationOrder &Order); + BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &, + const AllocationOrder &Order, + bool *CanCauseEvictionChain); + bool calcCompactRegion(GlobalSplitCandidate &); + void splitAroundRegion(LiveRangeEdit &, ArrayRef); + void calcGapWeights(MCRegister, SmallVectorImpl &); + bool canEvictInterferenceInRange(const LiveInterval &VirtReg, + MCRegister PhysReg, SlotIndex Start, + SlotIndex End, EvictionCost &MaxCost) const; + MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order, + const LiveInterval &VirtReg, + SlotIndex Start, SlotIndex End, + float *BestEvictWeight) const; + void evictInterference(LiveInterval &, MCRegister, + SmallVectorImpl &); + bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg, + SmallLISet &RecoloringCandidates, + const SmallVirtRegSet &FixedRegisters); + + MCRegister tryAssign(LiveInterval &, AllocationOrder &, + SmallVectorImpl &, const SmallVirtRegSet &); + MCRegister tryEvict(LiveInterval &, AllocationOrder &, + SmallVectorImpl &, uint8_t, + const SmallVirtRegSet &); + MCRegister tryRegionSplit(LiveInterval &, AllocationOrder &, + SmallVectorImpl &); + /// Calculate cost of region splitting. + unsigned calculateRegionSplitCost(LiveInterval &VirtReg, + AllocationOrder &Order, + BlockFrequency &BestCost, + unsigned &NumCands, bool IgnoreCSR, + bool *CanCauseEvictionChain = nullptr); + /// Perform region splitting. + unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, + bool HasCompact, SmallVectorImpl &NewVRegs); + /// Check other options before using a callee-saved register for the first + /// time. + MCRegister tryAssignCSRFirstTime(LiveInterval &VirtReg, + AllocationOrder &Order, MCRegister PhysReg, + uint8_t &CostPerUseLimit, + SmallVectorImpl &NewVRegs); + void initializeCSRCost(); + unsigned tryBlockSplit(LiveInterval &, AllocationOrder &, + SmallVectorImpl &); + unsigned tryInstructionSplit(LiveInterval &, AllocationOrder &, + SmallVectorImpl &); + unsigned tryLocalSplit(LiveInterval &, AllocationOrder &, + SmallVectorImpl &); + unsigned trySplit(LiveInterval &, AllocationOrder &, + SmallVectorImpl &, const SmallVirtRegSet &); + unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, + SmallVectorImpl &, + SmallVirtRegSet &, unsigned); + bool tryRecoloringCandidates(PQueue &, SmallVectorImpl &, + SmallVirtRegSet &, unsigned); + void tryHintRecoloring(LiveInterval &); + void tryHintsRecoloring(); + + /// Model the information carried by one end of a copy. + struct HintInfo { + /// The frequency of the copy. + BlockFrequency Freq; + /// The virtual register or physical register. + Register Reg; + /// Its currently assigned register. + /// In case of a physical register Reg == PhysReg. + MCRegister PhysReg; + + HintInfo(BlockFrequency Freq, Register Reg, MCRegister PhysReg) + : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {} + }; + using HintsInfo = SmallVector; + + BlockFrequency getBrokenHintFreq(const HintsInfo &, MCRegister); + void collectHintInfo(Register, HintsInfo &); + + /// Greedy RA statistic to remark. + struct RAGreedyStats { + unsigned Reloads = 0; + unsigned FoldedReloads = 0; + unsigned ZeroCostFoldedReloads = 0; + unsigned Spills = 0; + unsigned FoldedSpills = 0; + unsigned Copies = 0; + float ReloadsCost = 0.0f; + float FoldedReloadsCost = 0.0f; + float SpillsCost = 0.0f; + float FoldedSpillsCost = 0.0f; + float CopiesCost = 0.0f; + + bool isEmpty() { + return !(Reloads || FoldedReloads || Spills || FoldedSpills || + ZeroCostFoldedReloads || Copies); + } + + void add(RAGreedyStats other) { + Reloads += other.Reloads; + FoldedReloads += other.FoldedReloads; + ZeroCostFoldedReloads += other.ZeroCostFoldedReloads; + Spills += other.Spills; + FoldedSpills += other.FoldedSpills; + Copies += other.Copies; + ReloadsCost += other.ReloadsCost; + FoldedReloadsCost += other.FoldedReloadsCost; + SpillsCost += other.SpillsCost; + FoldedSpillsCost += other.FoldedSpillsCost; + CopiesCost += other.CopiesCost; + } + + void report(MachineOptimizationRemarkMissed &R); + }; + + /// Compute statistic for a basic block. + RAGreedyStats computeStats(MachineBasicBlock &MBB); + + /// Compute and report statistic through a remark. + RAGreedyStats reportStats(MachineLoop *L); + + /// Report the statistic for each loop. + void reportStats(); +}; +} // namespace llvm +#endif // #ifndef LLVM_CODEGEN_REGALLOCGREEDY_H_ From 71059f26d31398d109be057e35bb8c5960d8aaf6 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Thu, 30 Dec 2021 16:30:55 +0100 Subject: [PATCH 538/992] [AST] Produce ReturnStmt containing RecoveryExpr when type is wrong Previously we just drop the ReturnStmt and its argument from the AST, which blocks analysis of broken code. Fixes https://github.com/llvm/llvm-project/issues/39944 Differential Revision: https://reviews.llvm.org/D116414 --- clang/include/clang/Sema/Sema.h | 3 +- clang/lib/Sema/SemaStmt.cpp | 41 +++++++++++++++---- clang/test/AST/ast-dump-recovery.cpp | 40 ++++++++++++++++++ .../SemaCXX/constant-expression-cxx11.cpp | 12 ++---- .../SemaCXX/constant-expression-cxx14.cpp | 6 +-- .../constexpr-function-recovery-crash.cpp | 3 ++ 6 files changed, 83 insertions(+), 22 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 1b3944b35cb4..9521b24e44a7 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -4861,7 +4861,8 @@ class Sema final { StmtResult ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp, Scope *CurScope); - StmtResult BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp); + StmtResult BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp, + bool AllowRecovery = false); StmtResult ActOnCapScopeReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp, NamedReturnInfo &NRInfo, bool SupressSimplerImplicitMoves); diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index 1d90759f2406..d18f89d60d78 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -3878,7 +3878,8 @@ Sema::ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp, RetValExp, nullptr, /*RecoverUncorrectedTypos=*/true); if (RetVal.isInvalid()) return StmtError(); - StmtResult R = BuildReturnStmt(ReturnLoc, RetVal.get()); + StmtResult R = + BuildReturnStmt(ReturnLoc, RetVal.get(), /*AllowRecovery=*/true); if (R.isInvalid() || ExprEvalContexts.back().isDiscardedStatementContext()) return R; @@ -3908,7 +3909,8 @@ static bool CheckSimplerImplicitMovesMSVCWorkaround(const Sema &S, return false; } -StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { +StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp, + bool AllowRecovery) { // Check for unexpanded parameter packs. if (RetValExp && DiagnoseUnexpandedParameterPack(RetValExp)) return StmtError(); @@ -3985,11 +3987,25 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { // If we've already decided this function is invalid, e.g. because // we saw a `return` whose expression had an error, don't keep // trying to deduce its return type. - if (FD->isInvalidDecl()) - return StmtError(); - if (DeduceFunctionTypeFromReturnExpr(FD, ReturnLoc, RetValExp, AT)) { + // (Some return values may be needlessly wrapped in RecoveryExpr). + if (FD->isInvalidDecl() || + DeduceFunctionTypeFromReturnExpr(FD, ReturnLoc, RetValExp, AT)) { FD->setInvalidDecl(); - return StmtError(); + if (!AllowRecovery) + return StmtError(); + // The deduction failure is diagnosed and marked, try to recover. + if (RetValExp) { + // Wrap return value with a recovery expression of the previous type. + // If no deduction yet, use DependentTy. + auto Recovery = CreateRecoveryExpr( + RetValExp->getBeginLoc(), RetValExp->getEndLoc(), RetValExp, + AT->isDeduced() ? FnRetType : QualType()); + if (Recovery.isInvalid()) + return StmtError(); + RetValExp = Recovery.get(); + } else { + // Nothing to do: a ReturnStmt with no value is fine recovery. + } } else { FnRetType = FD->getReturnType(); } @@ -4002,7 +4018,7 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { ReturnStmt *Result = nullptr; if (FnRetType->isVoidType()) { if (RetValExp) { - if (isa(RetValExp)) { + if (auto *ILE = dyn_cast(RetValExp)) { // We simply never allow init lists as the return value of void // functions. This is compatible because this was never allowed before, // so there's no legacy code to deal with. @@ -4018,8 +4034,12 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { Diag(ReturnLoc, diag::err_return_init_list) << CurDecl << FunctionKind << RetValExp->getSourceRange(); - // Drop the expression. - RetValExp = nullptr; + // Preserve the initializers in the AST. + RetValExp = AllowRecovery + ? CreateRecoveryExpr(ILE->getLBraceLoc(), + ILE->getRBraceLoc(), ILE->inits()) + .get() + : nullptr; } else if (!RetValExp->isTypeDependent()) { // C99 6.8.6.4p1 (ext_ since GCC warns) unsigned D = diag::ext_return_has_expr; @@ -4116,6 +4136,9 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { InitializedEntity::InitializeResult(ReturnLoc, RetType); ExprResult Res = PerformMoveOrCopyInitialization( Entity, NRInfo, RetValExp, SupressSimplerImplicitMoves); + if (Res.isInvalid() && AllowRecovery) + Res = CreateRecoveryExpr(RetValExp->getBeginLoc(), + RetValExp->getEndLoc(), RetValExp, RetType); if (Res.isInvalid()) { // FIXME: Clean up temporaries here anyway? return StmtError(); diff --git a/clang/test/AST/ast-dump-recovery.cpp b/clang/test/AST/ast-dump-recovery.cpp index 8c6563961bd6..c196f629bad9 100644 --- a/clang/test/AST/ast-dump-recovery.cpp +++ b/clang/test/AST/ast-dump-recovery.cpp @@ -351,3 +351,43 @@ void CtorInitializer() { // CHECK-NEXT: | `-RecoveryExpr {{.*}} '' }; } + +float *brokenReturn() { + // CHECK: FunctionDecl {{.*}} brokenReturn + return 42; + // CHECK: ReturnStmt + // CHECK-NEXT: `-RecoveryExpr {{.*}} 'float *' + // CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 42 +} + +// Return deduction treats the first, second *and* third differently! +auto *brokenDeducedReturn(int *x, float *y, double *z) { + // CHECK: FunctionDecl {{.*}} invalid brokenDeducedReturn + if (x) return x; + // CHECK: ReturnStmt + // CHECK-NEXT: `-ImplicitCastExpr {{.*}} + // CHECK-NEXT: `-DeclRefExpr {{.*}} 'x' 'int *' + if (y) return y; + // CHECK: ReturnStmt + // CHECK-NEXT: `-RecoveryExpr {{.*}} 'int *' + // CHECK-NEXT: `-DeclRefExpr {{.*}} 'y' 'float *' + if (z) return z; + // CHECK: ReturnStmt + // CHECK-NEXT: `-RecoveryExpr {{.*}} 'int *' + // CHECK-NEXT: `-DeclRefExpr {{.*}} 'z' 'double *' + return x; + // Unfortunate: we wrap a valid return in RecoveryExpr. + // This is to avoid running deduction again after it failed once. + // CHECK: ReturnStmt + // CHECK-NEXT: `-RecoveryExpr {{.*}} 'int *' + // CHECK-NEXT: `-DeclRefExpr {{.*}} 'x' 'int *' +} + +void returnInitListFromVoid() { + // CHECK: FunctionDecl {{.*}} returnInitListFromVoid + return {7,8}; + // CHECK: ReturnStmt + // CHECK-NEXT: `-RecoveryExpr {{.*}} '' + // CHECK-NEXT: |-IntegerLiteral {{.*}} 'int' 7 + // CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 8 +} diff --git a/clang/test/SemaCXX/constant-expression-cxx11.cpp b/clang/test/SemaCXX/constant-expression-cxx11.cpp index c338214b8231..680b2d5307be 100644 --- a/clang/test/SemaCXX/constant-expression-cxx11.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx11.cpp @@ -1939,20 +1939,16 @@ namespace Lifetime { constexpr int &get(int &&n) { return n; } // cxx2b-error@-1 {{non-const lvalue reference to type 'int' cannot bind to a temporary of type 'int'}} - // cxx2b-error@-2 {{no return statement in constexpr function}} See PR40598 constexpr int &&get_rv(int &&n) { return static_cast(n); } struct S { int &&r; int &s; int t; - constexpr S() : r(get_rv(0)), s(get(0)), t(r) {} // expected-note {{read of object outside its lifetime}} - constexpr S(int) : r(get_rv(0)), s(get(0)), t(s) {} - // cxx2b-warning@-1 {{reference 's' is not yet bound to a value when used here}} - // cxx2b-note@-2 {{read of uninitialized object is not allowed in a constant expression}} - // cxx11_20-note@-3 {{read of object outside its lifetime}} + constexpr S() : r(get_rv(0)), s(get(0)), t(r) {} // cxx11_20-note {{read of object outside its lifetime}} + constexpr S(int) : r(get_rv(0)), s(get(0)), t(s) {} // cxx11_20-note {{read of object outside its lifetime}} }; - constexpr int k1 = S().t; // expected-error {{constant expression}} expected-note {{in call}} - constexpr int k2 = S(0).t; // expected-error {{constant expression}} expected-note {{in call}} + constexpr int k1 = S().t; // expected-error {{constant expression}} cxx11_20-note {{in call}} + constexpr int k2 = S(0).t; // expected-error {{constant expression}} cxx11_20-note {{in call}} struct Q { int n = 0; diff --git a/clang/test/SemaCXX/constant-expression-cxx14.cpp b/clang/test/SemaCXX/constant-expression-cxx14.cpp index ee6d796cdcc4..84ffad370789 100644 --- a/clang/test/SemaCXX/constant-expression-cxx14.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx14.cpp @@ -876,14 +876,12 @@ namespace VirtualFromBase { namespace Lifetime { constexpr int &get(int &&r) { return r; } // cxx2b-error@-1 {{non-const lvalue reference to type 'int' cannot bind to a temporary of type 'int'}} - // cxx2b-error@-2 {{no return statement in constexpr function}} See PR40598 constexpr int f() { int &r = get(123); return r; - // cxx2b-note@-1 {{use of reference outside its lifetime is not allowed in a constant expression}} - // cxx14_20-note@-2 {{read of object outside its lifetime}} + // cxx14_20-note@-1 {{read of object outside its lifetime}} } - static_assert(f() == 123, ""); // expected-error {{constant expression}} expected-note {{in call}} + static_assert(f() == 123, ""); // expected-error {{constant expression}} cxx14_20-note {{in call}} constexpr int g() { int *p = 0; diff --git a/clang/test/SemaCXX/constexpr-function-recovery-crash.cpp b/clang/test/SemaCXX/constexpr-function-recovery-crash.cpp index 4b73cff13838..6f0844f1e0d0 100644 --- a/clang/test/SemaCXX/constexpr-function-recovery-crash.cpp +++ b/clang/test/SemaCXX/constexpr-function-recovery-crash.cpp @@ -74,3 +74,6 @@ struct X {} array[] = {undef()}; // expected-error {{use of undeclared identifie constexpr void test11() { for (X& e : array) {} } + +constexpr int test12() { return "wrong"; } // expected-error {{cannot initialize return object of type 'int'}} +constexpr int force12 = test12(); // expected-error {{must be initialized by a constant}} From 64e56f8356416d213bd271c18b766e748a00e095 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Mon, 20 Dec 2021 19:42:38 -0800 Subject: [PATCH 539/992] [NFC] Expose isRematerializable and copyHint from CalcSpillWeights We need to reuse them for the ML regalloc eviction advisor, as we 'explode' the weight calculation into sub-features. Differential Revision: https://reviews.llvm.org/D116074 --- llvm/include/llvm/CodeGen/CalcSpillWeights.h | 12 ++++++++++++ llvm/lib/CodeGen/CalcSpillWeights.cpp | 13 +++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/CodeGen/CalcSpillWeights.h b/llvm/include/llvm/CodeGen/CalcSpillWeights.h index 0b6ed079b38e..bfd5bab3d1c0 100644 --- a/llvm/include/llvm/CodeGen/CalcSpillWeights.h +++ b/llvm/include/llvm/CodeGen/CalcSpillWeights.h @@ -80,6 +80,18 @@ class VirtRegMap; /// live intervals. void calculateSpillWeightsAndHints(); + /// Return the preferred allocation register for reg, given a COPY + /// instruction. + static Register copyHint(const MachineInstr *MI, unsigned Reg, + const TargetRegisterInfo &TRI, + const MachineRegisterInfo &MRI); + + /// Determine if all values in LI are rematerializable. + static bool isRematerializable(const LiveInterval &LI, + const LiveIntervals &LIS, + const VirtRegMap &VRM, + const TargetInstrInfo &TII); + protected: /// Helper function for weight calculations. /// (Re)compute LI's spill weight and allocation hint, or, for non null diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 5f9982cd155d..84a0e4142bb6 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -43,9 +43,9 @@ void VirtRegAuxInfo::calculateSpillWeightsAndHints() { } // Return the preferred allocation register for reg, given a COPY instruction. -static Register copyHint(const MachineInstr *MI, unsigned Reg, - const TargetRegisterInfo &TRI, - const MachineRegisterInfo &MRI) { +Register VirtRegAuxInfo::copyHint(const MachineInstr *MI, unsigned Reg, + const TargetRegisterInfo &TRI, + const MachineRegisterInfo &MRI) { unsigned Sub, HSub; Register HReg; if (MI->getOperand(0).getReg() == Reg) { @@ -77,9 +77,10 @@ static Register copyHint(const MachineInstr *MI, unsigned Reg, } // Check if all values in LI are rematerializable -static bool isRematerializable(const LiveInterval &LI, const LiveIntervals &LIS, - const VirtRegMap &VRM, - const TargetInstrInfo &TII) { +bool VirtRegAuxInfo::isRematerializable(const LiveInterval &LI, + const LiveIntervals &LIS, + const VirtRegMap &VRM, + const TargetInstrInfo &TII) { Register Reg = LI.reg(); Register Original = VRM.getOriginal(Reg); for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); From da6b0d0b768e3ecb1af2fd9df2d98510f7aff45c Mon Sep 17 00:00:00 2001 From: Rajat Bajpai Date: Tue, 4 Jan 2022 17:19:24 +0100 Subject: [PATCH 540/992] [clang-format] Add an option to add a space between operator overloading and opening parentheses This change adds an option AfterOverloadedOperator in SpaceBeforeParensOptions to add a space between overloaded operator and opening parentheses in clang-format. Reviewed By: MyDeveloperDay, curdeius, HazardyKnusperkeks Differential Revision: https://reviews.llvm.org/D116283 --- clang/docs/ClangFormatStyleOptions.rst | 9 +++++++++ clang/docs/ReleaseNotes.rst | 3 +++ clang/include/clang/Format/Format.h | 11 ++++++++++- clang/lib/Format/Format.cpp | 1 + clang/lib/Format/TokenAnnotator.cpp | 12 +++++++++--- clang/unittests/Format/FormatTest.cpp | 27 ++++++++++++++++++++++++++ 6 files changed, 59 insertions(+), 4 deletions(-) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 4f3a9eb9f4a6..5a52916acc55 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -3818,6 +3818,15 @@ the configuration (without a prefix: ``Auto``). IF (...) vs. IF(...) + * ``bool AfterOverloadedOperator`` If ``true``, put a space between operator overloading and opening + parentheses. + + .. code-block:: c++ + + true: false: + void operator++ (int a); vs. void operator++(int a); + object.operator++ (10); object.operator++(10); + * ``bool BeforeNonEmptyParentheses`` If ``true``, put a space before opening parentheses only if the parentheses are not empty. diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 2f48b1424d09..7e24d06567fc 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -311,6 +311,9 @@ clang-format - Improved C++20 Modules and Coroutines support. +- Option ``AfterOverloadedOperator`` has been added in ``SpaceBeforeParensOptions`` + to allow space between overloaded operator and opening parentheses. + libclang -------- diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 24c245642e6a..d3113a5fdba4 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -3429,6 +3429,14 @@ struct FormatStyle { /// /// \endcode bool AfterIfMacros; + /// If ``true``, put a space between operator overloading and opening + /// parentheses. + /// \code + /// true: false: + /// void operator++ (int a); vs. void operator++(int a); + /// object.operator++ (10); object.operator++(10); + /// \endcode + bool AfterOverloadedOperator; /// If ``true``, put a space before opening parentheses only if the /// parentheses are not empty. /// \code @@ -3442,7 +3450,7 @@ struct FormatStyle { : AfterControlStatements(false), AfterForeachMacros(false), AfterFunctionDeclarationName(false), AfterFunctionDefinitionName(false), AfterIfMacros(false), - BeforeNonEmptyParentheses(false) {} + AfterOverloadedOperator(false), BeforeNonEmptyParentheses(false) {} bool operator==(const SpaceBeforeParensCustom &Other) const { return AfterControlStatements == Other.AfterControlStatements && @@ -3451,6 +3459,7 @@ struct FormatStyle { Other.AfterFunctionDeclarationName && AfterFunctionDefinitionName == Other.AfterFunctionDefinitionName && AfterIfMacros == Other.AfterIfMacros && + AfterOverloadedOperator == Other.AfterOverloadedOperator && BeforeNonEmptyParentheses == Other.BeforeNonEmptyParentheses; } }; diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 11c190ebfba7..a4ce8a20a940 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -868,6 +868,7 @@ template <> struct MappingTraits { IO.mapOptional("AfterFunctionDeclarationName", Spacing.AfterFunctionDeclarationName); IO.mapOptional("AfterIfMacros", Spacing.AfterIfMacros); + IO.mapOptional("AfterOverloadedOperator", Spacing.AfterOverloadedOperator); IO.mapOptional("BeforeNonEmptyParentheses", Spacing.BeforeNonEmptyParentheses); } diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index a161ee87e6b5..5b3a450e31f2 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -2923,9 +2923,15 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, } bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const { - return Style.SpaceBeforeParens == FormatStyle::SBPO_Always || - (Style.SpaceBeforeParensOptions.BeforeNonEmptyParentheses && - Right.ParameterCount > 0); + if (Style.SpaceBeforeParens == FormatStyle::SBPO_Always) + return true; + if (Right.is(TT_OverloadedOperatorLParen) && + Style.SpaceBeforeParensOptions.AfterOverloadedOperator) + return true; + if (Style.SpaceBeforeParensOptions.BeforeNonEmptyParentheses && + Right.ParameterCount > 0) + return true; + return false; } bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 71f07412a3b6..1d622e80ed12 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -14536,6 +14536,24 @@ TEST_F(FormatTest, ConfigurableSpaceBeforeParens) { // verifyFormat("X A::operator++ (T);", SomeSpace2); verifyFormat("int x = int (y);", SomeSpace2); verifyFormat("auto lambda = []() { return 0; };", SomeSpace2); + + FormatStyle SpaceAfterOverloadedOperator = getLLVMStyle(); + SpaceAfterOverloadedOperator.SpaceBeforeParens = FormatStyle::SBPO_Custom; + SpaceAfterOverloadedOperator.SpaceBeforeParensOptions + .AfterOverloadedOperator = true; + + verifyFormat("auto operator++ () -> int;", SpaceAfterOverloadedOperator); + verifyFormat("X A::operator++ ();", SpaceAfterOverloadedOperator); + verifyFormat("some_object.operator++ ();", SpaceAfterOverloadedOperator); + verifyFormat("auto func() -> int;", SpaceAfterOverloadedOperator); + + SpaceAfterOverloadedOperator.SpaceBeforeParensOptions + .AfterOverloadedOperator = false; + + verifyFormat("auto operator++() -> int;", SpaceAfterOverloadedOperator); + verifyFormat("X A::operator++();", SpaceAfterOverloadedOperator); + verifyFormat("some_object.operator++();", SpaceAfterOverloadedOperator); + verifyFormat("auto func() -> int;", SpaceAfterOverloadedOperator); } TEST_F(FormatTest, SpaceAfterLogicalNot) { @@ -18771,6 +18789,15 @@ TEST_F(FormatTest, ParsesConfigurationBools) { CHECK_PARSE_NESTED_BOOL(BraceWrapping, SplitEmptyFunction); CHECK_PARSE_NESTED_BOOL(BraceWrapping, SplitEmptyRecord); CHECK_PARSE_NESTED_BOOL(BraceWrapping, SplitEmptyNamespace); + CHECK_PARSE_NESTED_BOOL(SpaceBeforeParensOptions, AfterControlStatements); + CHECK_PARSE_NESTED_BOOL(SpaceBeforeParensOptions, AfterForeachMacros); + CHECK_PARSE_NESTED_BOOL(SpaceBeforeParensOptions, + AfterFunctionDeclarationName); + CHECK_PARSE_NESTED_BOOL(SpaceBeforeParensOptions, + AfterFunctionDefinitionName); + CHECK_PARSE_NESTED_BOOL(SpaceBeforeParensOptions, AfterIfMacros); + CHECK_PARSE_NESTED_BOOL(SpaceBeforeParensOptions, AfterOverloadedOperator); + CHECK_PARSE_NESTED_BOOL(SpaceBeforeParensOptions, BeforeNonEmptyParentheses); } #undef CHECK_PARSE_BOOL From 05594de2d77b6f4735b8d8d417039b60987b3a79 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Tue, 4 Jan 2022 08:28:59 -0800 Subject: [PATCH 541/992] [mlir][ods] Handle DeclareOpInterfaceMethods in formatgen Previously it would not consider ops with DeclareOpInterfaceMethods as having the InferTypeOpInterface interfaces added. The OpInterface nested inside DeclareOpInterfaceMethods is not retained so that one could query it, so check for the the C++ class directly (a bit raw/low level - will be addressed in follow up). Differential Revision: https://reviews.llvm.org/D116572 --- mlir/test/lib/Dialect/Test/TestDialect.cpp | 9 +++++++++ mlir/test/lib/Dialect/Test/TestOps.td | 6 ++++++ mlir/test/mlir-tblgen/op-format.mlir | 5 ++++- mlir/tools/mlir-tblgen/OpFormatGen.cpp | 13 ++++++++++--- 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp index aee0bdb13970..441817803ef0 100644 --- a/mlir/test/lib/Dialect/Test/TestDialect.cpp +++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp @@ -264,6 +264,15 @@ Operation *TestDialect::materializeConstant(OpBuilder &builder, Attribute value, return builder.create(loc, type, value); } +::mlir::LogicalResult FormatInferType2Op::inferReturnTypes( + ::mlir::MLIRContext *context, ::llvm::Optional<::mlir::Location> location, + ::mlir::ValueRange operands, ::mlir::DictionaryAttr attributes, + ::mlir::RegionRange regions, + ::llvm::SmallVectorImpl<::mlir::Type> &inferredReturnTypes) { + inferredReturnTypes.assign({::mlir::IntegerType::get(context, 16)}); + return ::mlir::success(); +} + void *TestDialect::getRegisteredInterfaceForOp(TypeID typeID, OperationName opName) { if (opName.getIdentifier() == "test.unregistered_side_effect_op" && diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 39f0b0b7da56..6fad11b85ad8 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -2139,6 +2139,12 @@ def FormatInferTypeOp : TEST_Op<"format_infer_type", [InferTypeOpInterface]> { }]; } +// Check that formatget supports DeclareOpInterfaceMethods. +def FormatInferType2Op : TEST_Op<"format_infer_type2", [DeclareOpInterfaceMethods]> { + let results = (outs AnyType); + let assemblyFormat = "attr-dict"; +} + // Base class for testing mixing allOperandTypes, allOperands, and // inferResultTypes. class FormatInferAllTypesBaseOp traits = []> diff --git a/mlir/test/mlir-tblgen/op-format.mlir b/mlir/test/mlir-tblgen/op-format.mlir index 152cd0a554f1..77afc41f6541 100644 --- a/mlir/test/mlir-tblgen/op-format.mlir +++ b/mlir/test/mlir-tblgen/op-format.mlir @@ -409,7 +409,10 @@ test.format_infer_variadic_type_from_non_variadic %i64, %i64 : i64 //===----------------------------------------------------------------------===// // CHECK: test.format_infer_type -%ignored_res7 = test.format_infer_type +%ignored_res7a = test.format_infer_type + +// CHECK: test.format_infer_type2 +%ignored_res7b = test.format_infer_type2 // CHECK: test.format_infer_type_all_operands_and_types(%[[I64]], %[[I32]]) : i64, i32 %ignored_res8:2 = test.format_infer_type_all_operands_and_types(%i64, %i32) : i64, i32 diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 02d0e81b6860..b5218030b64d 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -2345,9 +2345,16 @@ LogicalResult FormatParser::parse() { handleSameTypesConstraint(variableTyResolver, /*includeResults=*/true); } else if (def.isSubClassOf("TypesMatchWith")) { handleTypesMatchConstraint(variableTyResolver, def); - } else if (def.getName() == "InferTypeOpInterface" && - !op.allResultTypesKnown()) { - canInferResultTypes = true; + } else if (!op.allResultTypesKnown()) { + // This doesn't check the name directly to handle + // DeclareOpInterfaceMethods + // and the like. + // TODO: Add hasCppInterface check. + if (auto name = def.getValueAsOptionalString("cppClassName")) { + if (*name == "InferTypeOpInterface" && + def.getValueAsString("cppNamespace") == "::mlir") + canInferResultTypes = true; + } } } From 95f9eddbbcffa77685d0fd2e781521acb5d21ae4 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 4 Jan 2022 16:56:33 +0000 Subject: [PATCH 542/992] [X86] combineSetCCMOVMSK - use APInt::getLowBitsSet to create bitmask. NFC. SelectionDAG::getConstant creates an APInt internally anyway, and getLowBitsSet helps assert for legal bitwidths. Plus it silences static analyzer out-of-bounds shift warnings. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7d14ed79e1a9..ab61a0a51598 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -44117,7 +44117,7 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, BCNumEltBits > NumEltBits && DAG.ComputeNumSignBits(BC) > (BCNumEltBits - NumEltBits)) { SDLoc DL(EFLAGS); - unsigned CmpMask = IsAnyOf ? 0 : ((1 << BCNumElts) - 1); + APInt CmpMask = APInt::getLowBitsSet(32, IsAnyOf ? 0 : BCNumElts); return DAG.getNode(X86ISD::CMP, DL, MVT::i32, DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, BC), DAG.getConstant(CmpMask, DL, MVT::i32)); From 56ec762a76cbdf23c64707c45bfc9dd57f2e7abb Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Tue, 4 Jan 2022 12:01:21 -0500 Subject: [PATCH 543/992] [regalloc] Fix GCC warning `-Wattributes`. NFC. - Mark it with LLVM_LIBRARY_VISIBILITY to preserve the legacy visibility. --- llvm/lib/CodeGen/RegAllocGreedy.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h index c414cf73dc5f..3d8f541bc5e7 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.h +++ b/llvm/lib/CodeGen/RegAllocGreedy.h @@ -69,9 +69,9 @@ #include namespace llvm { -class RAGreedy : public MachineFunctionPass, - public RegAllocBase, - private LiveRangeEdit::Delegate { +class LLVM_LIBRARY_VISIBILITY RAGreedy : public MachineFunctionPass, + public RegAllocBase, + private LiveRangeEdit::Delegate { // Convenient shortcuts. using PQueue = std::priority_queue>; using SmallLISet = SmallPtrSet; From 0b09313cd53316eacbdc5e98d4ef00bef2c41d02 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 4 Jan 2022 08:39:51 -0800 Subject: [PATCH 544/992] [funcattrs] Infer writeonly argument attribute [part 2] This builds on the code from D114963, and extends it to handle calls both direct and indirect. With the revised code structure (from series of previously landed NFCs), this is pretty straight forward. One thing to note is that we can not infer writeonly for arguments which might be captured. If the pointer can be read back by the caller, and then read through, we have no way to track that. This is the same restriction we have for readonly, except that we get no mileage out of the "callee can be readonly" exception since a writeonly param on a readonly function is either a) readnone or b) UB. This means we can't actually infer much unless nocapture has already been inferred. Differential Revision: https://reviews.llvm.org/D115003 --- clang/test/CodeGen/arm-vfp16-arguments.c | 2 +- clang/test/CodeGenCXX/wasm-args-returns.cpp | 2 +- .../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 6 +++--- llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 12 ++++++++--- .../TypeBasedAliasAnalysis/functionattrs.ll | 2 +- llvm/test/Other/cgscc-devirt-iteration.ll | 2 +- .../Transforms/FunctionAttrs/norecurse.ll | 2 +- .../Transforms/FunctionAttrs/writeonly.ll | 20 +++++++++++++------ 8 files changed, 31 insertions(+), 17 deletions(-) diff --git a/clang/test/CodeGen/arm-vfp16-arguments.c b/clang/test/CodeGen/arm-vfp16-arguments.c index e11ec1508bbf..0ad099092a9a 100644 --- a/clang/test/CodeGen/arm-vfp16-arguments.c +++ b/clang/test/CodeGen/arm-vfp16-arguments.c @@ -71,6 +71,6 @@ void test_hfa(hfa_t a) {} hfa_t ghfa; hfa_t test_ret_hfa(void) { return ghfa; } -// CHECK-SOFT: define{{.*}} void @test_ret_hfa(%struct.hfa_t* noalias nocapture sret(%struct.hfa_t) align 8 %agg.result) +// CHECK-SOFT: define{{.*}} void @test_ret_hfa(%struct.hfa_t* noalias nocapture writeonly sret(%struct.hfa_t) align 8 %agg.result) // CHECK-HARD: define{{.*}} arm_aapcs_vfpcc [2 x <2 x i32>] @test_ret_hfa() // CHECK-FULL: define{{.*}} arm_aapcs_vfpcc %struct.hfa_t @test_ret_hfa() diff --git a/clang/test/CodeGenCXX/wasm-args-returns.cpp b/clang/test/CodeGenCXX/wasm-args-returns.cpp index c05bb44c05a3..d71bb28eabcc 100644 --- a/clang/test/CodeGenCXX/wasm-args-returns.cpp +++ b/clang/test/CodeGenCXX/wasm-args-returns.cpp @@ -30,7 +30,7 @@ struct two_fields { double d, e; }; test(two_fields); -// CHECK: define void @_Z7forward10two_fields(%struct.two_fields* noalias nocapture sret(%struct.two_fields) align 8 %{{.*}}, %struct.two_fields* nocapture readonly byval(%struct.two_fields) align 8 %{{.*}}) +// CHECK: define void @_Z7forward10two_fields(%struct.two_fields* noalias nocapture writeonly sret(%struct.two_fields) align 8 %{{.*}}, %struct.two_fields* nocapture readonly byval(%struct.two_fields) align 8 %{{.*}}) // // CHECK: define void @_Z15test_two_fieldsv() // CHECK: %[[tmp:.*]] = alloca %struct.two_fields, align 8 diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl index 17333cc80e14..350bb3c69366 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl @@ -403,14 +403,14 @@ struct_arr16 func_ret_struct_arr16() return s; } -// CHECK: define{{.*}} void @func_ret_struct_arr32(%struct.struct_arr32 addrspace(5)* noalias nocapture sret(%struct.struct_arr32) align 4 %agg.result) +// CHECK: define{{.*}} void @func_ret_struct_arr32(%struct.struct_arr32 addrspace(5)* noalias nocapture writeonly sret(%struct.struct_arr32) align 4 %agg.result) struct_arr32 func_ret_struct_arr32() { struct_arr32 s = { 0 }; return s; } -// CHECK: define{{.*}} void @func_ret_struct_arr33(%struct.struct_arr33 addrspace(5)* noalias nocapture sret(%struct.struct_arr33) align 4 %agg.result) +// CHECK: define{{.*}} void @func_ret_struct_arr33(%struct.struct_arr33 addrspace(5)* noalias nocapture writeonly sret(%struct.struct_arr33) align 4 %agg.result) struct_arr33 func_ret_struct_arr33() { struct_arr33 s = { 0 }; @@ -468,7 +468,7 @@ double_nested_struct func_double_nested_struct_ret(int4 arg0, int arg1) { // CHECK: define{{.*}} void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 %arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14) void func_large_struct_padding_arg_direct(large_struct_padding arg) { } -// CHECK: define{{.*}} void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* nocapture readonly byval(%struct.large_struct_padding) align 8 %arg) +// CHECK: define{{.*}} void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture writeonly %out, %struct.large_struct_padding addrspace(5)* nocapture readonly byval(%struct.large_struct_padding) align 8 %arg) void func_large_struct_padding_arg_store(global large_struct_padding* out, large_struct_padding arg) { *out = arg; } diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 8fb0c2dc7613..bc3c3da44729 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -720,10 +720,16 @@ determinePointerAccessAttrs(Argument *A, // The accessors used on call site here do the right thing for calls and // invokes with operand bundles. - if (!CB.onlyReadsMemory() && !CB.onlyReadsMemory(UseIndex)) - return Attribute::None; - if (!CB.doesNotAccessMemory(UseIndex)) + if (CB.doesNotAccessMemory(UseIndex)) { + /* nop */ + } else if (CB.onlyReadsMemory() || CB.onlyReadsMemory(UseIndex)) { IsRead = true; + } else if (CB.hasFnAttr(Attribute::WriteOnly) || + CB.dataOperandHasImpliedAttr(UseIndex, Attribute::WriteOnly)) { + IsWrite = true; + } else { + return Attribute::None; + } break; } diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll index 65c0e27d5f46..e0c9e45101f3 100644 --- a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll +++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll @@ -49,7 +49,7 @@ define void @test2_yes(i8* %p, i8* %q, i64 %n) nounwind { ret void } -; CHECK: define void @test2_no(i8* nocapture %p, i8* nocapture readonly %q, i64 %n) #5 { +; CHECK: define void @test2_no(i8* nocapture writeonly %p, i8* nocapture readonly %q, i64 %n) #5 { define void @test2_no(i8* %p, i8* %q, i64 %n) nounwind { call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i1 false), !tbaa !2 ret void diff --git a/llvm/test/Other/cgscc-devirt-iteration.ll b/llvm/test/Other/cgscc-devirt-iteration.ll index 27892e85cec7..70f6c1f508de 100644 --- a/llvm/test/Other/cgscc-devirt-iteration.ll +++ b/llvm/test/Other/cgscc-devirt-iteration.ll @@ -112,7 +112,7 @@ define void @test3(i8* %src, i8* %dest, i64 %size) noinline { ; CHECK-NOT: read ; CHECK-SAME: noinline ; BEFORE-LABEL: define void @test3(i8* %src, i8* %dest, i64 %size) -; AFTER-LABEL: define void @test3(i8* nocapture readonly %src, i8* nocapture %dest, i64 %size) +; AFTER-LABEL: define void @test3(i8* nocapture readonly %src, i8* nocapture writeonly %dest, i64 %size) %fptr = alloca i8* (i8*, i8*, i64)* store i8* (i8*, i8*, i64)* @memcpy, i8* (i8*, i8*, i64)** %fptr %f = load i8* (i8*, i8*, i64)*, i8* (i8*, i8*, i64)** %fptr diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse.ll b/llvm/test/Transforms/FunctionAttrs/norecurse.ll index f5af6406e2a4..af63da176d5c 100644 --- a/llvm/test/Transforms/FunctionAttrs/norecurse.ll +++ b/llvm/test/Transforms/FunctionAttrs/norecurse.ll @@ -50,7 +50,7 @@ declare i32 @k() readnone ; CHECK: Function Attrs ; CHECK-SAME: nounwind ; CHECK-NOT: norecurse -; CHECK-NEXT: define void @intrinsic(i8* nocapture %dest, i8* nocapture readonly %src, i32 %len) +; CHECK-NEXT: define void @intrinsic(i8* nocapture writeonly %dest, i8* nocapture readonly %src, i32 %len) define void @intrinsic(i8* %dest, i8* %src, i32 %len) { call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 false) ret void diff --git a/llvm/test/Transforms/FunctionAttrs/writeonly.ll b/llvm/test/Transforms/FunctionAttrs/writeonly.ll index 54d00d355f7a..5094d6914929 100644 --- a/llvm/test/Transforms/FunctionAttrs/writeonly.ll +++ b/llvm/test/Transforms/FunctionAttrs/writeonly.ll @@ -78,15 +78,23 @@ define void @direct1(i8* %p) { declare void @direct2_callee(i8* %p) writeonly +; writeonly w/o nocapture is not enough ; CHECK: define void @direct2(i8* %p) define void @direct2(i8* %p) { call void @direct2_callee(i8* %p) + ; read back from global, read through pointer... ret void } -declare void @direct3_callee(i8* writeonly %p) +; CHECK: define void @direct2b(i8* nocapture writeonly %p) +define void @direct2b(i8* %p) { + call void @direct2_callee(i8* nocapture %p) + ret void +} + +declare void @direct3_callee(i8* nocapture writeonly %p) -; CHECK: define void @direct3(i8* %p) +; CHECK: define void @direct3(i8* nocapture writeonly %p) define void @direct3(i8* %p) { call void @direct3_callee(i8* %p) ret void @@ -98,15 +106,15 @@ define void @fptr_test1(i8* %p, void (i8*)* %f) { ret void } -; CHECK: define void @fptr_test2(i8* %p, void (i8*)* nocapture readonly %f) +; CHECK: define void @fptr_test2(i8* nocapture writeonly %p, void (i8*)* nocapture readonly %f) define void @fptr_test2(i8* %p, void (i8*)* %f) { - call void %f(i8* writeonly %p) + call void %f(i8* nocapture writeonly %p) ret void } -; CHECK: define void @fptr_test3(i8* %p, void (i8*)* nocapture readonly %f) +; CHECK: define void @fptr_test3(i8* nocapture writeonly %p, void (i8*)* nocapture readonly %f) define void @fptr_test3(i8* %p, void (i8*)* %f) { - call void %f(i8* %p) writeonly + call void %f(i8* nocapture %p) writeonly ret void } From e24ddb6027b6495a81c95133cfb6812d6090cd32 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 29 Dec 2021 12:24:45 -0500 Subject: [PATCH 545/992] [libc++] Use std::addressof in std::function::target This guards against hostile overloads of operator&. Thanks to Peter Dimov for the report in https://github.com/boostorg/lambda/issues/24. Differential Revision: https://reviews.llvm.org/D116380 --- libcxx/include/__functional/function.h | 5 +-- .../func.wrap.func/addressof.pass.cpp | 32 +++++++++++++++++++ .../robust_against_adl.pass.cpp | 10 ++++-- 3 files changed, 43 insertions(+), 4 deletions(-) create mode 100644 libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/addressof.pass.cpp diff --git a/libcxx/include/__functional/function.h b/libcxx/include/__functional/function.h index 8336d85adf2e..b6d383ce8459 100644 --- a/libcxx/include/__functional/function.h +++ b/libcxx/include/__functional/function.h @@ -16,6 +16,7 @@ #include <__functional/invoke.h> #include <__functional/unary_function.h> #include <__iterator/iterator_traits.h> +#include <__memory/addressof.h> #include <__memory/allocator_traits.h> #include <__memory/compressed_pair.h> #include <__memory/shared_ptr.h> @@ -360,7 +361,7 @@ const void* __func<_Fp, _Alloc, _Rp(_ArgTypes...)>::target(const type_info& __ti) const _NOEXCEPT { if (__ti == typeid(_Fp)) - return &__f_.__target(); + return _VSTD::addressof(__f_.__target()); return nullptr; } @@ -1392,7 +1393,7 @@ const void* __func<_Fp, _Alloc, _Rp()>::target(const type_info& __ti) const { if (__ti == typeid(_Fp)) - return &__f_.first(); + return _VSTD::addressof(__f_.first()); return (const void*)0; } diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/addressof.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/addressof.pass.cpp new file mode 100644 index 000000000000..d18f9c45f71f --- /dev/null +++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/addressof.pass.cpp @@ -0,0 +1,32 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// class function + +// This test runs in C++03, but we have deprecated using std::function in C++03. +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + +// Make sure we can use std::function with a type that has a hostile overload +// of operator&(). + +#include +#include + +#include "operator_hijacker.h" + +struct TrapAddressof : operator_hijacker { + int operator()() const { return 1; } +}; + +int main(int, char**) { + std::function f = TrapAddressof(); + assert(f() == 1); + return 0; +} diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/robust_against_adl.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/robust_against_adl.pass.cpp index 23e477619037..3c0851e4eebe 100644 --- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/robust_against_adl.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/robust_against_adl.pass.cpp @@ -27,8 +27,14 @@ struct Incomplete; template struct Holder { T t; }; + typedef Holder *Ptr; +template +struct Callable { + void operator()() const { } +}; + Ptr no_args() { return nullptr; } Ptr one_arg(Ptr p) { return p; } Ptr two_args(Ptr p, Ptr) { return p; } @@ -37,11 +43,11 @@ Ptr four_args(Ptr p, Ptr, Ptr, Ptr) { return p; } void one_arg_void(Ptr) { } -int main(int, char**) -{ +int main(int, char**) { Ptr x = nullptr; std::function f(no_args); f(); std::function g(one_arg); g(x); std::function h(one_arg_void); h(x); + std::function i(Callable>{}); return 0; } From 587bdb3772333763dd739021cd08bc44bcd8485d Mon Sep 17 00:00:00 2001 From: sternenseemann Date: Tue, 4 Jan 2022 17:33:29 +0000 Subject: [PATCH 546/992] [llvm][cmake] never link llvm-config against llvm dylib When cross-compiling, in order to make the output of the native and cross-compiled llvm-config match, one needs to re-pass all cmake flags relevant to BuildVariables.inc via `CROSS_TOOLCHAIN_FLAGS_NATIVE`. If `LLVM_LINK_LLVM_DYLIB=ON` is among those, building a full `libLLVM` shared object is required for the native llvm-config, otherwise `--shared-mode` will be incorrect and `--link-shared` broken. To avoid this, we can make llvm-config link statically against the needed components for simplicity's sake in both the native and cross case. Reviewed By: beanz Differential Revision: https://reviews.llvm.org/D116537 --- llvm/tools/llvm-config/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llvm/tools/llvm-config/CMakeLists.txt b/llvm/tools/llvm-config/CMakeLists.txt index 1b74355a4209..aa5aa20257f9 100644 --- a/llvm/tools/llvm-config/CMakeLists.txt +++ b/llvm/tools/llvm-config/CMakeLists.txt @@ -6,6 +6,12 @@ set(BUILDVARIABLES_OBJPATH ${CMAKE_CURRENT_BINARY_DIR}/BuildVariables.inc) # Add the llvm-config tool. add_llvm_tool(llvm-config llvm-config.cpp + # This utility doesn't use much of LLVM, so linking a shared library for the + # entire thing is overkill. Avoiding that especially saves on build time when cross + # compiling LLVM and building both cross and native `llvm-config`s. We don't + # want to build an entire native libLLVM.so in addition to the cross one just + # for the native `llvm-config`! + DISABLE_LLVM_LINK_LLVM_DYLIB ) # Compute the substitution values for various items. From bc1df1fabb578c7efb33fb59e8aec6b85720f863 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 4 Jan 2022 18:38:30 +0100 Subject: [PATCH 547/992] [mlir] Fix incorrect top-level comment in DialectSparseTensor.cpp --- mlir/lib/Bindings/Python/DialectSparseTensor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Bindings/Python/DialectSparseTensor.cpp b/mlir/lib/Bindings/Python/DialectSparseTensor.cpp index 7de0b8156f44..c9e3cb6394bb 100644 --- a/mlir/lib/Bindings/Python/DialectSparseTensor.cpp +++ b/mlir/lib/Bindings/Python/DialectSparseTensor.cpp @@ -1,4 +1,4 @@ -//===- DialectLinalg.cpp - 'sparse_tensor' dialect submodule --------------===// +//===- DialectSparseTensor.cpp - 'sparse_tensor' dialect submodule --------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From e18157c26b8e2a442bced5aeea6b4d99f54a6adb Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 4 Jan 2022 09:10:02 -0800 Subject: [PATCH 548/992] Add extra test for D116499 requested in review --- .../overflow-intrinsics-trip-count.ll | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll index de05551ab7df..942f312599da 100644 --- a/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll +++ b/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll @@ -237,6 +237,30 @@ for.end: ; preds = %for.body, %entry ret void } +define void @sadd_symbolic_swapped(i16 %start) { +; CHECK-LABEL: 'sadd_symbolic_swapped' +; CHECK-NEXT: Determining loop execution counts for: @sadd_symbolic_swapped +; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable predicated backedge-taken count. +; +entry: + br i1 undef, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i16 [ %math, %for.body ], [ %start, %for.body.preheader ] + %0 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 %indvars.iv, i16 1) + %math = extractvalue { i16, i1 } %0, 0 + %ov = extractvalue { i16, i1 } %0, 1 + br i1 %ov, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + define void @usub_symbolic_start(i16 %start) { ; CHECK-LABEL: 'usub_symbolic_start' ; CHECK-NEXT: Determining loop execution counts for: @usub_symbolic_start From b061d86c6930acef1b246874adf2f11e9120894c Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 4 Jan 2022 09:43:29 -0800 Subject: [PATCH 549/992] [SCEV] Compute exit count from overflow check expressed w/ x.with.overflow intrinsics This ports the logic we generate in instcombine for a single use x.with.overflow check for use in SCEV's analysis. The result is that we can prove trip counts for many checks, and (through existing logic) often discharge them. Motivation comes from compiling a simple example with -ftrapv. Differential Revision: https://reviews.llvm.org/D116499 --- llvm/lib/Analysis/ScalarEvolution.cpp | 23 +++++++++++ .../overflow-intrinsics-trip-count.ll | 40 ++++++++++++------- llvm/test/CodeGen/PowerPC/negctr.ll | 10 ++--- 3 files changed, 51 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index d48e81f28af9..513b2c0e5da1 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -8093,6 +8093,29 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( return getZero(CI->getType()); } + // If we're exiting based on the overflow flag of an x.with.overflow intrinsic + // with a constant step, we can form an equivalent icmp predicate and figure + // out how many iterations will be taken before we exit. + const WithOverflowInst *WO; + const APInt *C; + if (match(ExitCond, m_ExtractValue<1>(m_WithOverflowInst(WO))) && + match(WO->getRHS(), m_APInt(C))) { + ConstantRange NWR = + ConstantRange::makeExactNoWrapRegion(WO->getBinaryOp(), *C, + WO->getNoWrapKind()); + CmpInst::Predicate Pred; + APInt NewRHSC, Offset; + NWR.getEquivalentICmp(Pred, NewRHSC, Offset); + if (!ExitIfTrue) + Pred = ICmpInst::getInversePredicate(Pred); + auto *LHS = getSCEV(WO->getLHS()); + if (Offset != 0) + LHS = getAddExpr(LHS, getConstant(Offset)); + auto EL = computeExitLimitFromICmp(L, Pred, LHS, getConstant(NewRHSC), + ControlsExit, AllowPredicates); + if (EL.hasAnyInfo()) return EL; + } + // If it's not an integer or pointer comparison then compute it the hard way. return computeExitCountExhaustively(L, ExitCond, ExitIfTrue); } diff --git a/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll index 942f312599da..38372c94e3ea 100644 --- a/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll +++ b/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll @@ -167,9 +167,11 @@ for.end: ; preds = %for.body, %entry define void @uadd_symbolic_start(i16 %start) { ; CHECK-LABEL: 'uadd_symbolic_start' ; CHECK-NEXT: Determining loop execution counts for: @uadd_symbolic_start -; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable predicated backedge-taken count. +; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + (-1 * %start)) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is -1 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (-1 * %start)) +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 1 ; entry: br i1 undef, label %for.end, label %for.body.preheader @@ -191,9 +193,11 @@ for.end: ; preds = %for.body, %entry define void @sadd_symbolic_start(i16 %start) { ; CHECK-LABEL: 'sadd_symbolic_start' ; CHECK-NEXT: Determining loop execution counts for: @sadd_symbolic_start -; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable predicated backedge-taken count. +; CHECK-NEXT: Loop %for.body: backedge-taken count is (32767 + (-1 * %start)) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is -1 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (32767 + (-1 * %start)) +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 1 ; entry: br i1 undef, label %for.end, label %for.body.preheader @@ -264,9 +268,11 @@ for.end: ; preds = %for.body, %entry define void @usub_symbolic_start(i16 %start) { ; CHECK-LABEL: 'usub_symbolic_start' ; CHECK-NEXT: Determining loop execution counts for: @usub_symbolic_start -; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable predicated backedge-taken count. +; CHECK-NEXT: Loop %for.body: backedge-taken count is %start +; CHECK-NEXT: Loop %for.body: max backedge-taken count is -1 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is %start +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 1 ; entry: br i1 undef, label %for.end, label %for.body.preheader @@ -288,9 +294,11 @@ for.end: ; preds = %for.body, %entry define void @ssub_symbolic_start(i16 %start) { ; CHECK-LABEL: 'ssub_symbolic_start' ; CHECK-NEXT: Determining loop execution counts for: @ssub_symbolic_start -; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable predicated backedge-taken count. +; CHECK-NEXT: Loop %for.body: backedge-taken count is (-32768 + %start) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is -1 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-32768 + %start) +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 1 ; entry: br i1 undef, label %for.end, label %for.body.preheader @@ -360,11 +368,13 @@ for.end: ; preds = %for.body, %entry define void @sadd_symbolic_non_latch(i16 %start) { ; CHECK-LABEL: 'sadd_symbolic_non_latch' ; CHECK-NEXT: Determining loop execution counts for: @sadd_symbolic_non_latch -; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. -; CHECK-NEXT: exit count for for.body: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: Loop %for.body: backedge-taken count is ((230 + (-1 * %start)) umin (32767 + (-1 * %start))) +; CHECK-NEXT: exit count for for.body: (32767 + (-1 * %start)) ; CHECK-NEXT: exit count for for.latch: (230 + (-1 * %start)) ; CHECK-NEXT: Loop %for.body: max backedge-taken count is -1 -; CHECK-NEXT: Loop %for.body: Unpredictable predicated backedge-taken count. +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((230 + (-1 * %start)) umin (32767 + (-1 * %start))) +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 1 ; entry: br i1 undef, label %for.end, label %for.body.preheader diff --git a/llvm/test/CodeGen/PowerPC/negctr.ll b/llvm/test/CodeGen/PowerPC/negctr.ll index 93c7daed64f6..38664b058b87 100644 --- a/llvm/test/CodeGen/PowerPC/negctr.ll +++ b/llvm/test/CodeGen/PowerPC/negctr.ll @@ -34,14 +34,10 @@ for.body: ; preds = %for.body, %entry %exitcond = icmp eq i64 %indvars.iv.next, 0 br i1 %exitcond, label %for.end, label %for.body -; FIXME: This should be a hardware loop. -; cmp is optimized to uadd intrinsic in CGP pass which can not be recognized in -; later HardwareLoops Pass. ; CHECK: @main1 -; CHECK: li [[REG:[0-9]+]], 1 -; CHECK: addi [[REG2:[0-9]+]], [[REG]], 1 -; CHECK: cmpld -; CHECK: bge +; CHECK: li [[REG:[0-9]+]], -1 +; CHECK: mtctr [[REG]] +; CHECK: bdnz for.end: ; preds = %for.body, %entry ret void From 2edc21e8566be8fa9b20e0bb71a83af90ec9aa97 Mon Sep 17 00:00:00 2001 From: Erich Keane Date: Tue, 4 Jan 2022 09:28:22 -0800 Subject: [PATCH 550/992] Fix altivec regression caused by D115670 in Vec Const Eval The Vector Constant Evaluator assumes that all the types of its sub-expressions are going to be Vector APValues, which holds for most situations. However, in the 1 examples of Altivec C compilation of operator ++ (not allowed for other vector types), the result is an LValue. Since the operator isn't supported for constant evaluation anyway, this patch just fails-out of constant eval if we are in a situation where the operand to the unary operator causes an LValue. --- clang/lib/AST/ExprConstant.cpp | 9 +++++++++ clang/test/Sema/altivec-init.c | 13 +++++++++++++ 2 files changed, 22 insertions(+) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3bf205d8cb06..9412aba42dfb 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -10434,6 +10434,15 @@ bool VectorExprEvaluator::VisitUnaryOperator(const UnaryOperator *E) { if (!Evaluate(SubExprValue, Info, SubExpr)) return false; + // FIXME: This vector evaluator someday needs to be changed to be LValue + // aware/keep LValue information around, rather than dealing with just vector + // types directly. Until then, we cannot handle cases where the operand to + // these unary operators is an LValue. The only case I've been able to see + // cause this is operator++ assigning to a member expression (only valid in + // altivec compilations) in C mode, so this shouldn't limit us too much. + if (SubExprValue.isLValue()) + return false; + assert(SubExprValue.getVectorLength() == VD->getNumElements() && "Vector length doesn't match type?"); diff --git a/clang/test/Sema/altivec-init.c b/clang/test/Sema/altivec-init.c index 1c20450a6d01..ee38e7070671 100644 --- a/clang/test/Sema/altivec-init.c +++ b/clang/test/Sema/altivec-init.c @@ -45,3 +45,16 @@ void test() int res = vGCC > vAltiVec; vAltiVec = 0 ? vGCC : vGCC; } + +typedef struct VecMem { + vector signed vec; +} VecMem; + +// The following should not assert. See qiongsiwu1's comment here: +// https://reviews.llvm.org/D115670 +void test2() { + vector signed local_vec = {1, 2, 3, 4}; + VecMem VM; + VM.vec = ++local_vec; +} + From e1e74f6cd6ce41ce8303a5a91f29736808fccc36 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 4 Jan 2022 10:07:50 -0800 Subject: [PATCH 551/992] -Wmissing-prototypes: Don't warn in named namespaces nested in anonymous namespaces --- clang/lib/AST/Decl.cpp | 1 - clang/test/SemaCXX/warn-missing-prototypes.cpp | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index e63560f1b6fe..3ef08cab9675 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3251,7 +3251,6 @@ bool FunctionDecl::isGlobal() const { if (const auto *Namespace = cast(DC)) { if (!Namespace->getDeclName()) return false; - break; } } diff --git a/clang/test/SemaCXX/warn-missing-prototypes.cpp b/clang/test/SemaCXX/warn-missing-prototypes.cpp index bb71aa8b142d..e8637e5a90ea 100644 --- a/clang/test/SemaCXX/warn-missing-prototypes.cpp +++ b/clang/test/SemaCXX/warn-missing-prototypes.cpp @@ -13,6 +13,10 @@ namespace NS { namespace { // Don't warn about functions in anonymous namespaces. void f() { } + // Even if they're in nested namespaces within an anonymous namespace. + namespace NS { + void f() { } + } } struct A { From df2e728b77510da33cf3822eae4d66531eeed518 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 4 Jan 2022 10:08:03 -0800 Subject: [PATCH 552/992] [RISCV] Teach RISCVGatherScatterLowering to handle more complex recurrence start values. Previously we only recognized strided loads/store when the initial value for the phi was a strided constant vector. This patch extends the support to a strided_constant added to a splatted value. The rewritten loop will add the splat value to the first element of the strided constant vector to use as the scalar start value. The stride is unaffected. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D115958 --- .../RISCV/RISCVGatherScatterLowering.cpp | 42 ++++- .../rvv/fixed-vector-strided-load-store.ll | 170 ++++++++++++++++++ 2 files changed, 206 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp index d47bd739235f..ba91b16661a4 100644 --- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp @@ -127,6 +127,41 @@ static std::pair matchStridedConstant(Constant *StartC) { return std::make_pair(StartVal, Stride); } +static std::pair matchStridedStart(Value *Start, + IRBuilder<> &Builder) { + // Base case, start is a strided constant. + auto *StartC = dyn_cast(Start); + if (StartC) + return matchStridedConstant(StartC); + + // Not a constant, maybe it's a strided constant with a splat added to it. + auto *BO = dyn_cast(Start); + if (!BO || BO->getOpcode() != Instruction::Add) + return std::make_pair(nullptr, nullptr); + + // Look for an operand that is splatted. + unsigned OtherIndex = 1; + Value *Splat = getSplatValue(BO->getOperand(0)); + if (!Splat) { + Splat = getSplatValue(BO->getOperand(1)); + OtherIndex = 0; + } + if (!Splat) + return std::make_pair(nullptr, nullptr); + + Value *Stride; + std::tie(Start, Stride) = matchStridedStart(BO->getOperand(OtherIndex), + Builder); + if (!Start) + return std::make_pair(nullptr, nullptr); + + // Add the splat value to the start. + Builder.SetInsertPoint(BO); + Builder.SetCurrentDebugLocation(DebugLoc()); + Start = Builder.CreateAdd(Start, Splat); + return std::make_pair(Start, Stride); +} + // Recursively, walk about the use-def chain until we find a Phi with a strided // start value. Build and update a scalar recurrence as we unwind the recursion. // We also update the Stride as we unwind. Our goal is to move all of the @@ -161,12 +196,7 @@ bool RISCVGatherScatterLowering::matchStridedRecurrence(Value *Index, Loop *L, if (!Step) return false; - // Start should be a strided constant. - auto *StartC = dyn_cast(Start); - if (!StartC) - return false; - - std::tie(Start, Stride) = matchStridedConstant(StartC); + std::tie(Start, Stride) = matchStridedStart(Start, Builder); if (!Start) return false; assert(Stride != nullptr); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll index 026e149c1a46..e563b0834d60 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll @@ -978,3 +978,173 @@ define void @scatter_of_pointers(i32** noalias nocapture %0, i32** noalias nocap } declare void @llvm.masked.scatter.v2p0i32.v2p0p0i32(<2 x i32*>, <2 x i32**>, i32 immarg, <2 x i1>) + +define void @strided_load_startval_add_with_splat(i8* noalias nocapture %0, i8* noalias nocapture readonly %1, i32 signext %2) { +; CHECK-LABEL: @strided_load_startval_add_with_splat( +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP2:%.*]], 1024 +; CHECK-NEXT: br i1 [[TMP4]], label [[TMP31:%.*]], label [[TMP5:%.*]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 1023, [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP8]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i32 [[TMP7]], 31 +; CHECK-NEXT: br i1 [[TMP10]], label [[TMP29:%.*]], label [[TMP11:%.*]] +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = and i64 [[TMP9]], 8589934560 +; CHECK-NEXT: [[TMP13:%.*]] = add nsw i64 [[TMP12]], [[TMP6]] +; CHECK-NEXT: [[TMP14:%.*]] = add i64 0, [[TMP6]] +; CHECK-NEXT: [[START:%.*]] = mul i64 [[TMP14]], 5 +; CHECK-NEXT: br label [[TMP15:%.*]] +; CHECK: 15: +; CHECK-NEXT: [[TMP16:%.*]] = phi i64 [ 0, [[TMP11]] ], [ [[TMP25:%.*]], [[TMP15]] ] +; CHECK-NEXT: [[DOTSCALAR:%.*]] = phi i64 [ [[START]], [[TMP11]] ], [ [[DOTSCALAR1:%.*]], [[TMP15]] ] +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[TMP6]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP1:%.*]], i64 [[DOTSCALAR]] +; CHECK-NEXT: [[TMP19:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0i8.i64(<32 x i8> undef, i8* [[TMP18]], i64 5, <32 x i1> ) +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[TMP0:%.*]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to <32 x i8>* +; CHECK-NEXT: [[TMP22:%.*]] = load <32 x i8>, <32 x i8>* [[TMP21]], align 1 +; CHECK-NEXT: [[TMP23:%.*]] = add <32 x i8> [[TMP22]], [[TMP19]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP20]] to <32 x i8>* +; CHECK-NEXT: store <32 x i8> [[TMP23]], <32 x i8>* [[TMP24]], align 1 +; CHECK-NEXT: [[TMP25]] = add nuw i64 [[TMP16]], 32 +; CHECK-NEXT: [[DOTSCALAR1]] = add i64 [[DOTSCALAR]], 160 +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], [[TMP12]] +; CHECK-NEXT: br i1 [[TMP26]], label [[TMP27:%.*]], label [[TMP15]] +; CHECK: 27: +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP9]], [[TMP12]] +; CHECK-NEXT: br i1 [[TMP28]], label [[TMP31]], label [[TMP29]] +; CHECK: 29: +; CHECK-NEXT: [[TMP30:%.*]] = phi i64 [ [[TMP6]], [[TMP5]] ], [ [[TMP13]], [[TMP27]] ] +; CHECK-NEXT: br label [[TMP32:%.*]] +; CHECK: 31: +; CHECK-NEXT: ret void +; CHECK: 32: +; CHECK-NEXT: [[TMP33:%.*]] = phi i64 [ [[TMP40:%.*]], [[TMP32]] ], [ [[TMP30]], [[TMP29]] ] +; CHECK-NEXT: [[TMP34:%.*]] = mul nsw i64 [[TMP33]], 5 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = load i8, i8* [[TMP35]], align 1 +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 [[TMP33]] +; CHECK-NEXT: [[TMP38:%.*]] = load i8, i8* [[TMP37]], align 1 +; CHECK-NEXT: [[TMP39:%.*]] = add i8 [[TMP38]], [[TMP36]] +; CHECK-NEXT: store i8 [[TMP39]], i8* [[TMP37]], align 1 +; CHECK-NEXT: [[TMP40]] = add nsw i64 [[TMP33]], 1 +; CHECK-NEXT: [[TMP41:%.*]] = trunc i64 [[TMP40]] to i32 +; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[TMP41]], 1024 +; CHECK-NEXT: br i1 [[TMP42]], label [[TMP31]], label [[TMP32]] +; +; CHECK-ASM-LABEL: strided_load_startval_add_with_splat: +; CHECK-ASM: # %bb.0: +; CHECK-ASM-NEXT: li a3, 1024 +; CHECK-ASM-NEXT: beq a2, a3, .LBB12_7 +; CHECK-ASM-NEXT: # %bb.1: +; CHECK-ASM-NEXT: li a3, 1023 +; CHECK-ASM-NEXT: subw a4, a3, a2 +; CHECK-ASM-NEXT: li a5, 31 +; CHECK-ASM-NEXT: mv a3, a2 +; CHECK-ASM-NEXT: bltu a4, a5, .LBB12_5 +; CHECK-ASM-NEXT: # %bb.2: +; CHECK-ASM-NEXT: slli a3, a4, 32 +; CHECK-ASM-NEXT: srli a3, a3, 32 +; CHECK-ASM-NEXT: addi a6, a3, 1 +; CHECK-ASM-NEXT: andi a7, a6, -32 +; CHECK-ASM-NEXT: add a3, a7, a2 +; CHECK-ASM-NEXT: slli a4, a2, 2 +; CHECK-ASM-NEXT: add a4, a4, a2 +; CHECK-ASM-NEXT: add a2, a0, a2 +; CHECK-ASM-NEXT: add a4, a1, a4 +; CHECK-ASM-NEXT: li t0, 32 +; CHECK-ASM-NEXT: li t1, 5 +; CHECK-ASM-NEXT: mv a5, a7 +; CHECK-ASM-NEXT: .LBB12_3: # =>This Inner Loop Header: Depth=1 +; CHECK-ASM-NEXT: vsetvli zero, t0, e8, m1, ta, mu +; CHECK-ASM-NEXT: vlse8.v v8, (a4), t1 +; CHECK-ASM-NEXT: vle8.v v9, (a2) +; CHECK-ASM-NEXT: vadd.vv v8, v9, v8 +; CHECK-ASM-NEXT: vse8.v v8, (a2) +; CHECK-ASM-NEXT: addi a5, a5, -32 +; CHECK-ASM-NEXT: addi a2, a2, 32 +; CHECK-ASM-NEXT: addi a4, a4, 160 +; CHECK-ASM-NEXT: bnez a5, .LBB12_3 +; CHECK-ASM-NEXT: # %bb.4: +; CHECK-ASM-NEXT: beq a6, a7, .LBB12_7 +; CHECK-ASM-NEXT: .LBB12_5: +; CHECK-ASM-NEXT: slli a2, a3, 2 +; CHECK-ASM-NEXT: add a2, a2, a3 +; CHECK-ASM-NEXT: add a1, a1, a2 +; CHECK-ASM-NEXT: li a6, 1024 +; CHECK-ASM-NEXT: .LBB12_6: # =>This Inner Loop Header: Depth=1 +; CHECK-ASM-NEXT: lb a4, 0(a1) +; CHECK-ASM-NEXT: add a5, a0, a3 +; CHECK-ASM-NEXT: lb a2, 0(a5) +; CHECK-ASM-NEXT: addw a2, a2, a4 +; CHECK-ASM-NEXT: sb a2, 0(a5) +; CHECK-ASM-NEXT: addiw a2, a3, 1 +; CHECK-ASM-NEXT: addi a3, a3, 1 +; CHECK-ASM-NEXT: addi a1, a1, 5 +; CHECK-ASM-NEXT: bne a2, a6, .LBB12_6 +; CHECK-ASM-NEXT: .LBB12_7: +; CHECK-ASM-NEXT: ret + %4 = icmp eq i32 %2, 1024 + br i1 %4, label %36, label %5 + +5: ; preds = %3 + %6 = sext i32 %2 to i64 + %7 = sub i32 1023, %2 + %8 = zext i32 %7 to i64 + %9 = add nuw nsw i64 %8, 1 + %10 = icmp ult i32 %7, 31 + br i1 %10, label %34, label %11 + +11: ; preds = %5 + %12 = and i64 %9, 8589934560 + %13 = add nsw i64 %12, %6 + %14 = insertelement <32 x i64> poison, i64 %6, i64 0 + %15 = shufflevector <32 x i64> %14, <32 x i64> poison, <32 x i32> zeroinitializer + %16 = add <32 x i64> %15, + br label %17 + +17: ; preds = %17, %11 + %18 = phi i64 [ 0, %11 ], [ %29, %17 ] + %19 = phi <32 x i64> [ %16, %11 ], [ %30, %17 ] + %20 = add i64 %18, %6 + %21 = mul nsw <32 x i64> %19, + %22 = getelementptr inbounds i8, i8* %1, <32 x i64> %21 + %23 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> %22, i32 1, <32 x i1> , <32 x i8> undef) + %24 = getelementptr inbounds i8, i8* %0, i64 %20 + %25 = bitcast i8* %24 to <32 x i8>* + %26 = load <32 x i8>, <32 x i8>* %25, align 1 + %27 = add <32 x i8> %26, %23 + %28 = bitcast i8* %24 to <32 x i8>* + store <32 x i8> %27, <32 x i8>* %28, align 1 + %29 = add nuw i64 %18, 32 + %30 = add <32 x i64> %19, + %31 = icmp eq i64 %29, %12 + br i1 %31, label %32, label %17 + +32: ; preds = %17 + %33 = icmp eq i64 %9, %12 + br i1 %33, label %36, label %34 + +34: ; preds = %5, %32 + %35 = phi i64 [ %6, %5 ], [ %13, %32 ] + br label %37 + +36: ; preds = %37, %32, %3 + ret void + +37: ; preds = %34, %37 + %38 = phi i64 [ %45, %37 ], [ %35, %34 ] + %39 = mul nsw i64 %38, 5 + %40 = getelementptr inbounds i8, i8* %1, i64 %39 + %41 = load i8, i8* %40, align 1 + %42 = getelementptr inbounds i8, i8* %0, i64 %38 + %43 = load i8, i8* %42, align 1 + %44 = add i8 %43, %41 + store i8 %44, i8* %42, align 1 + %45 = add nsw i64 %38, 1 + %46 = trunc i64 %45 to i32 + %47 = icmp eq i32 %46, 1024 + br i1 %47, label %36, label %37 +} From 1e50d064666f7e807ac96655405c5678251475f5 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 4 Jan 2022 13:11:24 -0500 Subject: [PATCH 553/992] [Analysis] fix swapped operands to computeConstantRange This was noted in post-commit review for D116322 / 0edf99950e6 . I am not seeing how to expose the bug in a test though because we don't pass an assumption cache into this analysis from there. --- llvm/lib/Analysis/ValueTracking.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 16bd0adff983..758980187180 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -7146,8 +7146,8 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned, continue; // TODO: Set "ForSigned" parameter via Cmp->isSigned()? ConstantRange RHS = - computeConstantRange(Cmp->getOperand(1), UseInstrInfo, - /* ForSigned */ false, AC, I, DT, Depth + 1); + computeConstantRange(Cmp->getOperand(1), /* ForSigned */ false, + UseInstrInfo, AC, I, DT, Depth + 1); CR = CR.intersectWith( ConstantRange::makeAllowedICmpRegion(Cmp->getPredicate(), RHS)); } From 3064dd8ccffc561e0f01cfa930b9a481d90e7f4f Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sun, 2 Jan 2022 11:53:53 -0800 Subject: [PATCH 554/992] [libcxx] Use Fuchsia-native CPRNG for std::random_device Use the zx_cprng_draw system call directly rather than going through the libc getentropy function. The libc function is a trivial wrapper around the system call, and is not a standard C function. Avoiding it reduces the Fuchsia libc ABI surface that libc++ depends on. Reviewed By: #libc, ldionne Differential Revision: https://reviews.llvm.org/D116498 --- libcxx/include/__config | 10 +++++++++- libcxx/src/random.cpp | 25 ++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index 720e12eac0dd..98f011e7c6ad 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -354,6 +354,12 @@ // When this option is used, the token passed to `std::random_device`'s // constructor *must* be "/dev/urandom" -- anything else is an error. // +// _LIBCPP_USING_FUCHSIA_CPRNG +// Use Fuchsia's zx_cprng_draw() system call, which is specified to +// deliver high-quality entropy and cannot fail. +// When this option is used, the token passed to `std::random_device`'s +// constructor *must* be "/dev/urandom" -- anything else is an error. +// // _LIBCPP_USING_NACL_RANDOM // NaCl's sandbox (which PNaCl also runs in) doesn't allow filesystem access, // including accesses to the special files under `/dev`. This implementation @@ -367,8 +373,10 @@ // constructor *must* be "/dev/urandom" -- anything else is an error. #if defined(__OpenBSD__) # define _LIBCPP_USING_ARC4_RANDOM -#elif defined(__Fuchsia__) || defined(__wasi__) +#elif defined(__wasi__) # define _LIBCPP_USING_GETENTROPY +#elif defined(__Fuchsia__) +# define _LIBCPP_USING_FUCHSIA_CPRNG #elif defined(__native_client__) # define _LIBCPP_USING_NACL_RANDOM #elif defined(_LIBCPP_WIN32API) diff --git a/libcxx/src/random.cpp b/libcxx/src/random.cpp index 286a45785154..5590db85e48a 100644 --- a/libcxx/src/random.cpp +++ b/libcxx/src/random.cpp @@ -36,6 +36,8 @@ # endif #elif defined(_LIBCPP_USING_NACL_RANDOM) # include +#elif defined(_LIBCPP_USING_FUCHSIA_CPRNG) +# include #endif @@ -170,6 +172,27 @@ random_device::operator()() return r; } +#elif defined(_LIBCPP_USING_FUCHSIA_CPRNG) + +random_device::random_device(const string& __token) { + if (__token != "/dev/urandom") + __throw_system_error(ENOENT, ("random device not supported " + __token).c_str()); +} + +random_device::~random_device() {} + +unsigned random_device::operator()() { + // Implicitly link against the vDSO system call ABI without + // requiring the final link to specify -lzircon explicitly when + // statically linking libc++. +# pragma comment(lib, "zircon") + + // The system call cannot fail. It returns only when the bits are ready. + unsigned r; + _zx_cprng_draw(&r, sizeof(r)); + return r; +} + #else #error "Random device not implemented for this architecture" #endif @@ -189,7 +212,7 @@ random_device::entropy() const noexcept return std::numeric_limits::digits; return ent; -#elif defined(__OpenBSD__) +#elif defined(__OpenBSD__) || defined(_LIBCPP_USING_FUCHSIA_CPRNG) return std::numeric_limits::digits; #else return 0; From bd6e6846e7f431e9d84b1f257d4a0c8ddeac4fe2 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Wed, 22 Dec 2021 18:14:14 +0100 Subject: [PATCH 555/992] [libc++] Add the version header to all headers. Some headers which require the version header depend on other headers to provide it. Include the version header in all top-level headers to make sure a header cleanup can't remove the version header. Note this doesn't add the version header to the c headers. Reviewed By: #libc, Quuxplusone, ldionne Differential Revision: https://reviews.llvm.org/D116172 --- libcxx/include/bitset | 1 + libcxx/include/codecvt | 1 + libcxx/include/compare | 1 + libcxx/include/condition_variable | 1 + libcxx/include/execution | 1 + libcxx/include/fstream | 1 + libcxx/include/future | 1 + libcxx/include/ios | 1 + libcxx/include/iosfwd | 1 + libcxx/include/iostream | 1 + libcxx/include/latch | 1 + libcxx/include/queue | 1 + libcxx/include/random | 1 + libcxx/include/ratio | 1 + libcxx/include/semaphore | 1 + libcxx/include/sstream | 1 + libcxx/include/stack | 1 + libcxx/include/streambuf | 1 + libcxx/include/strstream | 1 + libcxx/include/system_error | 1 + libcxx/include/thread | 1 + libcxx/include/typeindex | 1 + libcxx/include/valarray | 1 + libcxx/utils/graph_header_deps.py | 2 +- 24 files changed, 24 insertions(+), 1 deletion(-) diff --git a/libcxx/include/bitset b/libcxx/include/bitset index 8f538e92e7ff..809fadf6d228 100644 --- a/libcxx/include/bitset +++ b/libcxx/include/bitset @@ -120,6 +120,7 @@ template struct hash>; #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/include/codecvt b/libcxx/include/codecvt index 60d3db882c03..74839d199686 100644 --- a/libcxx/include/codecvt +++ b/libcxx/include/codecvt @@ -56,6 +56,7 @@ class codecvt_utf8_utf16 #include <__config> #include <__locale> +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/include/compare b/libcxx/include/compare index 5d07ebaf2fbd..d686b5a369f2 100644 --- a/libcxx/include/compare +++ b/libcxx/include/compare @@ -145,6 +145,7 @@ namespace std { #include <__compare/three_way_comparable.h> #include <__compare/weak_order.h> #include <__config> +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/include/condition_variable b/libcxx/include/condition_variable index 0569e2254d1d..ecec3ea8c017 100644 --- a/libcxx/include/condition_variable +++ b/libcxx/include/condition_variable @@ -109,6 +109,7 @@ public: #include <__config> #include <__mutex_base> #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/include/execution b/libcxx/include/execution index c1debcb72ff1..417b11b103a2 100644 --- a/libcxx/include/execution +++ b/libcxx/include/execution @@ -11,6 +11,7 @@ #define _LIBCPP_EXECUTION #include <__config> +#include #if defined(_LIBCPP_HAS_PARALLEL_ALGORITHMS) && _LIBCPP_STD_VER >= 17 # include <__pstl_execution> diff --git a/libcxx/include/fstream b/libcxx/include/fstream index 3d64adcb23d1..fc0a9204ed60 100644 --- a/libcxx/include/fstream +++ b/libcxx/include/fstream @@ -187,6 +187,7 @@ typedef basic_fstream wfstream; #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_FILESYSTEM_LIBRARY) # include diff --git a/libcxx/include/future b/libcxx/include/future index 6b666a70f48e..e35eedf35641 100644 --- a/libcxx/include/future +++ b/libcxx/include/future @@ -374,6 +374,7 @@ template struct uses_allocator, Alloc>; #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/include/ios b/libcxx/include/ios index 237d146dfb85..74c3c63bd347 100644 --- a/libcxx/include/ios +++ b/libcxx/include/ios @@ -214,6 +214,7 @@ storage-class-specifier const error_category& iostream_category() noexcept; #include <__locale> #include #include +#include #if !defined(_LIBCPP_HAS_NO_ATOMIC_HEADER) #include // for __xindex_ diff --git a/libcxx/include/iosfwd b/libcxx/include/iosfwd index 938d712cf36b..c2ba8ee9e652 100644 --- a/libcxx/include/iosfwd +++ b/libcxx/include/iosfwd @@ -96,6 +96,7 @@ using u32streampos = fpos::state_type>; #include <__config> #include <__mbstate_t.h> +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/include/iostream b/libcxx/include/iostream index 7397acfc678b..793f08ab1330 100644 --- a/libcxx/include/iostream +++ b/libcxx/include/iostream @@ -38,6 +38,7 @@ extern wostream wclog; #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/include/latch b/libcxx/include/latch index e65825991b59..2cc9222baadc 100644 --- a/libcxx/include/latch +++ b/libcxx/include/latch @@ -43,6 +43,7 @@ namespace std #include <__availability> #include <__config> #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/include/queue b/libcxx/include/queue index 03081eb844ba..9fad80253c50 100644 --- a/libcxx/include/queue +++ b/libcxx/include/queue @@ -213,6 +213,7 @@ template #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/include/random b/libcxx/include/random index c88bfce03b19..2e271cec46ad 100644 --- a/libcxx/include/random +++ b/libcxx/include/random @@ -1715,6 +1715,7 @@ class piecewise_linear_distribution #include <__random/uniform_real_distribution.h> #include <__random/weibull_distribution.h> #include +#include #include // for backward compatibility; TODO remove it #include // for backward compatibility; TODO remove it diff --git a/libcxx/include/ratio b/libcxx/include/ratio index 16b45a28ed8b..8859261208d0 100644 --- a/libcxx/include/ratio +++ b/libcxx/include/ratio @@ -81,6 +81,7 @@ typedef ratio<1000000000000000000000000, 1> yotta; // not supported #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/include/semaphore b/libcxx/include/semaphore index 2c2518bce46a..f83f7b455111 100644 --- a/libcxx/include/semaphore +++ b/libcxx/include/semaphore @@ -49,6 +49,7 @@ using binary_semaphore = counting_semaphore<1>; #include <__config> #include <__threading_support> #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/include/sstream b/libcxx/include/sstream index e63d1434ac76..6ad624a93a65 100644 --- a/libcxx/include/sstream +++ b/libcxx/include/sstream @@ -184,6 +184,7 @@ typedef basic_stringstream wstringstream; #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/include/stack b/libcxx/include/stack index 5d959c33c742..3cf6cd233282 100644 --- a/libcxx/include/stack +++ b/libcxx/include/stack @@ -91,6 +91,7 @@ template #include <__memory/uses_allocator.h> #include <__utility/forward.h> #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/include/streambuf b/libcxx/include/streambuf index db3078d809a5..aa66370f1b68 100644 --- a/libcxx/include/streambuf +++ b/libcxx/include/streambuf @@ -110,6 +110,7 @@ protected: #include <__config> #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/include/strstream b/libcxx/include/strstream index a5f17a9dc319..c34a5628b634 100644 --- a/libcxx/include/strstream +++ b/libcxx/include/strstream @@ -132,6 +132,7 @@ private: #include <__config> #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/include/system_error b/libcxx/include/system_error index 059fa0e2d511..6d3a6ca65038 100644 --- a/libcxx/include/system_error +++ b/libcxx/include/system_error @@ -150,6 +150,7 @@ template <> struct hash; #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/include/thread b/libcxx/include/thread index a4632f6fe524..00c4ae35eccb 100644 --- a/libcxx/include/thread +++ b/libcxx/include/thread @@ -97,6 +97,7 @@ void sleep_for(const chrono::duration& rel_time); #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/include/typeindex b/libcxx/include/typeindex index 790aea4d4763..ede0c7fb25c2 100644 --- a/libcxx/include/typeindex +++ b/libcxx/include/typeindex @@ -49,6 +49,7 @@ struct hash #include <__functional_base> #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/include/valarray b/libcxx/include/valarray index 909e0422c476..a55d921872ba 100644 --- a/libcxx/include/valarray +++ b/libcxx/include/valarray @@ -348,6 +348,7 @@ template unspecified2 end(const valarray& v); #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/libcxx/utils/graph_header_deps.py b/libcxx/utils/graph_header_deps.py index 95b0a482ae09..8c19165ab493 100755 --- a/libcxx/utils/graph_header_deps.py +++ b/libcxx/utils/graph_header_deps.py @@ -14,7 +14,7 @@ def is_config_header(h): - return os.path.basename(h) in ['__config', '__libcpp_version', '__undef_macros'] + return os.path.basename(h) in ['__config', '__libcpp_version', '__undef_macros', 'version'] def is_experimental_header(h): From 78f5014fea9d1d19bcf175d2e57f530c223794e0 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 4 Jan 2022 10:15:48 -0800 Subject: [PATCH 556/992] [Hexagon] Conversions to/from FP types, HVX and scalar Co-authored-by: Anirudh Sundar Subramaniam Co-authored-by: Sumanth Gundapaneni --- .../Target/Hexagon/HexagonISelLowering.cpp | 17 +++ llvm/lib/Target/Hexagon/HexagonISelLowering.h | 2 + .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 112 ++++++++++++++++++ llvm/lib/Target/Hexagon/HexagonPatterns.td | 6 + llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 27 ++++- .../CodeGen/Hexagon/autohvx/conv-fp-fp.ll | 87 ++++++++++++++ .../Hexagon/autohvx/conv-fp-int-ieee.ll | 100 ++++++++++++++++ llvm/test/CodeGen/Hexagon/fp16.ll | 76 ++++++++++++ 8 files changed, 422 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/conv-fp-fp.ll create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/conv-fp-int-ieee.ll create mode 100644 llvm/test/CodeGen/Hexagon/fp16.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 01fcbba7f8fb..699a818c887b 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1772,6 +1772,18 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); + // Special handling for half-precision floating point conversions. + // Lower half float conversions into library calls. + setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); + + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + // Handling of indexed loads/stores: default is "expand". // for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::f32, MVT::f64, @@ -1852,6 +1864,11 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, else setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf"); + // Routines to handle fp16 storage type. + setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); + setLibcallName(RTLIB::FPROUND_F64_F16, "__truncdfhf2"); + setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); + // These cause problems when the shift amount is non-constant. setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRL_I128, nullptr); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index a31a697b7317..f9ce7a9407aa 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -476,6 +476,8 @@ class HexagonTargetLowering : public TargetLowering { SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxFpExtend(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxConvertFpInt(SDValue Op, SelectionDAG &DAG) const; SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const; SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index a65ceccb60a1..1f2e5dc43439 100755 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -139,6 +139,14 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::FMAXNUM, MVT::v64f32, Custom); setOperationAction(ISD::VSELECT, MVT::v64f32, Custom); + if (Subtarget.useHVXQFloatOps()) { + setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom); + setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal); + } else if (Subtarget.useHVXIEEEFPOps()) { + setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal); + setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal); + } + setOperationAction(ISD::MLOAD, MVT::v32f32, Custom); setOperationAction(ISD::MSTORE, MVT::v32f32, Custom); setOperationAction(ISD::MLOAD, MVT::v64f16, Custom); @@ -201,6 +209,18 @@ HexagonTargetLowering::initializeHVXLowering() { setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV); } + if (Subtarget.useHVXQFloatOps()) { + setOperationAction(ISD::SINT_TO_FP, T, Expand); + setOperationAction(ISD::UINT_TO_FP, T, Expand); + setOperationAction(ISD::FP_TO_SINT, T, Expand); + setOperationAction(ISD::FP_TO_UINT, T, Expand); + } else if (Subtarget.useHVXIEEEFPOps()) { + setOperationAction(ISD::SINT_TO_FP, T, Custom); + setOperationAction(ISD::UINT_TO_FP, T, Custom); + setOperationAction(ISD::FP_TO_SINT, T, Custom); + setOperationAction(ISD::FP_TO_UINT, T, Custom); + } + setCondCodeAction(ISD::SETNE, T, Expand); setCondCodeAction(ISD::SETLE, T, Expand); setCondCodeAction(ISD::SETGE, T, Expand); @@ -262,6 +282,11 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::UMIN, T, Custom); setOperationAction(ISD::UMAX, T, Custom); } + + setOperationAction(ISD::SINT_TO_FP, T, Custom); + setOperationAction(ISD::UINT_TO_FP, T, Custom); + setOperationAction(ISD::FP_TO_SINT, T, Custom); + setOperationAction(ISD::FP_TO_UINT, T, Custom); } setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand); @@ -1992,6 +2017,81 @@ HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi}); } +SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op, + SelectionDAG &DAG) const { + // This conversion only applies to QFloat. + assert(Subtarget.useHVXQFloatOps()); + + unsigned Opc = Op->getOpcode(); + assert(Opc == ISD::FP_EXTEND); + + MVT VecTy = ty(Op); + MVT ArgTy = ty(Op.getOperand(0)); + const SDLoc &dl(Op); + assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16); + + SDValue F16Vec = Op.getOperand(0); + + APFloat FloatVal = APFloat(1.0f); + bool Ignored; + FloatVal.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored); + SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy); + SDValue VmpyVec = + getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG); + + MVT HalfTy = typeSplit(VecTy).first; + VectorPair Pair = opSplit(VmpyVec, dl, DAG); + SDValue LoVec = + getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG); + SDValue HiVec = + getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG); + + SDValue ShuffVec = + getInstr(Hexagon::V6_vshuffvdd, dl, VecTy, + {HiVec, LoVec, DAG.getConstant(-4, dl, MVT::i32)}, DAG); + + return ShuffVec; +} + +SDValue +HexagonTargetLowering::LowerHvxConvertFpInt(SDValue Op, SelectionDAG &DAG) + const { + // This conversion only applies to IEEE. + assert(Subtarget.useHVXIEEEFPOps()); + + unsigned Opc = Op.getOpcode(); + // Catch invalid conversion ops (just in case). + assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT || + Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP); + MVT ResTy = ty(Op); + + if (Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT) { + MVT FpTy = ty(Op.getOperand(0)).getVectorElementType(); + // There are only conversions of f16. + if (FpTy != MVT::f16) + return SDValue(); + + MVT IntTy = ResTy.getVectorElementType(); + // Other int types aren't legal in HVX, so we shouldn't see them here. + assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32); + // Conversions to i8 and i16 are legal. + if (IntTy == MVT::i8 || IntTy == MVT::i16) + return Op; + } else { + // Converting int -> fp. + if (ResTy.getVectorElementType() != MVT::f16) + return SDValue(); + MVT IntTy = ty(Op.getOperand(0)).getVectorElementType(); + // Other int types aren't legal in HVX, so we shouldn't see them here. + assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32); + // i8, i16 -> f16 is legal. + if (IntTy == MVT::i8 || IntTy == MVT::i16) + return Op; + } + + return SDValue(); +} + SDValue HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const { assert(!Op.isMachineOpcode()); @@ -2296,6 +2396,13 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::MLOAD: case ISD::MSTORE: return SplitHvxMemOp(Op, DAG); + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits()) + return SplitHvxPairOp(Op, DAG); + break; case ISD::CTPOP: case ISD::CTLZ: case ISD::CTTZ: @@ -2356,6 +2463,11 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG); // Unaligned loads will be handled by the default lowering. case ISD::LOAD: return SDValue(); + case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG); + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: return LowerHvxConvertFpInt(Op, DAG); } #ifndef NDEBUG Op.dumpr(&DAG); diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 9b21eb98e2c8..cab59626a600 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -369,6 +369,12 @@ def Umin: pf2; def Umax: pf2; def Rol: pf2; +def Fptosi: pf1; +def Fptoui: pf1; +def Sitofp: pf1; +def Uitofp: pf1; + + // --(1) Immediate ------------------------------------------------------- // diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index 33bf8ed71a9c..2e739d6e06f8 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -476,11 +476,11 @@ let Predicates = [UseHVXV68, UseHVXQFloat] in { // since the qfloat arithmetic instructions above always generate the // accompanying conversions as part of their pattern def: Pat<(VecF16 (pf1 HWF32:$Vuu)), - (V6_vdealh (V6_vconv_hf_qf32 - (VecPF32 (Combinev (V6_vadd_sf (HiVec HvxWR:$Vuu), (V6_vd0)), - (V6_vadd_sf (LoVec HvxWR:$Vuu), (V6_vd0)) - ))))>; - + (V6_vdealh (V6_vconv_hf_qf32 + (VecPF32 (Combinev (V6_vadd_sf (HiVec HvxWR:$Vuu), (V6_vd0)), + (V6_vadd_sf (LoVec HvxWR:$Vuu), (V6_vd0)) + ))))>; + // fpextend for QFloat is handled manually in HexagonISelLoweringHVX.cpp. } // HVX IEEE arithmetic Instructions @@ -497,6 +497,23 @@ let Predicates = [UseHVXV68, UseHVXIEEEFP] in { (V6_vmpy_hf_hf HVF16:$Rs, HVF16:$Rt)>; def: Pat<(fmul HVF32:$Rs, HVF32:$Rt), (V6_vmpy_sf_sf HVF32:$Rs, HVF32:$Rt)>; + + def: Pat<(VecF16 (pf1 HWF32:$Vuu)), + (V6_vdealh (V6_vcvt_hf_sf (HiVec HvxWR:$Vuu), (LoVec HvxWR:$Vuu)))>; + def: Pat<(VecPF32 (pf1 HVF16:$Vu)), + (V6_vcvt_sf_hf (V6_vshuffh HvxVR:$Vu))>; + + def: OpR_R_pat; + def: OpR_R_pat; + def: OpR_R_pat; + def: OpR_R_pat; + + def: Pat<(VecI8 (Fptosi HWF16:$Vu)), + (V6_vcvt_b_hf (HiVec $Vu), (LoVec $Vu))>; + def: Pat<(VecI8 (Fptoui HWF16:$Vu)), + (V6_vcvt_ub_hf (HiVec $Vu), (LoVec $Vu))>; + def: Pat<(VecPF16 (Sitofp HVI8:$Vu)), (V6_vcvt_hf_b HvxVR:$Vu)>; + def: Pat<(VecPF16 (Uitofp HVI8:$Vu)), (V6_vcvt_hf_ub HvxVR:$Vu)>; } let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { diff --git a/llvm/test/CodeGen/Hexagon/autohvx/conv-fp-fp.ll b/llvm/test/CodeGen/Hexagon/autohvx/conv-fp-fp.ll new file mode 100644 index 000000000000..f5096ea9128f --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/conv-fp-fp.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +define <64 x half> @f0(<64 x float> %a0) #0 { +; CHECK-LABEL: f0: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v2 = vxor(v2,v2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.qf32 = vadd(v0.sf,v2.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v1.qf32 = vadd(v1.sf,v2.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = v1:0.qf32 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.h = vdeal(v0.h) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fptrunc <64 x float> %a0 to <64 x half> + ret <64 x half> %v0 +} + +define <64 x float> @f1(<64 x half> %a0) #0 { +; CHECK-LABEL: f1: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r0 = #15360 +; CHECK-NEXT: r7 = #-4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v1.h = vsplat(r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v1:0.qf32 = vmpy(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = v0.qf32 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v1.sf = v1.qf32 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v1:0 = vshuff(v1,v0,r7) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fpext <64 x half> %a0 to <64 x float> + ret <64 x float> %v0 +} + +define <64 x half> @f2(<64 x float> %a0) #1 { +; CHECK-LABEL: f2: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = vcvt(v1.sf,v0.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.h = vdeal(v0.h) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fptrunc <64 x float> %a0 to <64 x half> + ret <64 x half> %v0 +} + +define <64 x float> @f3(<64 x half> %a0) #1 { +; CHECK-LABEL: f3: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.h = vshuff(v0.h) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v1:0.sf = vcvt(v0.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fpext <64 x half> %a0 to <64 x float> + ret <64 x float> %v0 +} + +attributes #0 = { nounwind "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" } +attributes #1 = { nounwind "target-features"="+hvxv69,+hvx-length128b,+hvx-ieee-fp,-hvx-qfloat" } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/conv-fp-int-ieee.ll b/llvm/test/CodeGen/Hexagon/autohvx/conv-fp-int-ieee.ll new file mode 100644 index 000000000000..889b4b3fbabf --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/conv-fp-int-ieee.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +define <64 x i16> @f0(<64 x half> %a0) #0 { +; CHECK-LABEL: f0: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.h = vcvt(v0.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fptosi <64 x half> %a0 to <64 x i16> + ret <64 x i16> %v0 +} + +define <64 x i16> @f1(<64 x half> %a0) #0 { +; CHECK-LABEL: f1: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.uh = vcvt(v0.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fptoui <64 x half> %a0 to <64 x i16> + ret <64 x i16> %v0 +} + +define <128 x i8> @f2(<128 x half> %a0) #0 { +; CHECK-LABEL: f2: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.b = vcvt(v1.hf,v0.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fptosi <128 x half> %a0 to <128 x i8> + ret <128 x i8> %v0 +} + +define <128 x i8> @f3(<128 x half> %a0) #0 { +; CHECK-LABEL: f3: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.ub = vcvt(v1.hf,v0.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fptoui <128 x half> %a0 to <128 x i8> + ret <128 x i8> %v0 +} + +define <64 x half> @f4(<64 x i16> %a0) #0 { +; CHECK-LABEL: f4: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = vcvt(v0.h) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = sitofp <64 x i16> %a0 to <64 x half> + ret <64 x half> %v0 +} + +define <64 x half> @f5(<64 x i16> %a0) #0 { +; CHECK-LABEL: f5: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = vcvt(v0.uh) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = uitofp <64 x i16> %a0 to <64 x half> + ret <64 x half> %v0 +} + +define <128 x half> @f6(<128 x i8> %a0) #0 { +; CHECK-LABEL: f6: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v1:0.hf = vcvt(v0.b) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = sitofp <128 x i8> %a0 to <128 x half> + ret <128 x half> %v0 +} + +define <128 x half> @f7(<128 x i8> %a0) #0 { +; CHECK-LABEL: f7: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v1:0.hf = vcvt(v0.ub) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = uitofp <128 x i8> %a0 to <128 x half> + ret <128 x half> %v0 +} + +attributes #0 = { nounwind "target-features"="+hvxv69,+hvx-length128b,+hvx-ieee-fp,-hvx-qfloat" } diff --git a/llvm/test/CodeGen/Hexagon/fp16.ll b/llvm/test/CodeGen/Hexagon/fp16.ll new file mode 100644 index 000000000000..c1fd501fd51a --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/fp16.ll @@ -0,0 +1,76 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; This test validates the following facts for half-precision floating point +; conversions. +; Generate correct libcall names for conversion from fp16 to fp32. +; (__extendhfsf2). +; The extension from fp16 to fp64 is implicitly handled by __extendhfsf2 and convert_sf2d. +; (fp16->fp32->fp64). +; Generate correcct libcall names for conversion from fp32/fp64 to fp16 +; (__truncsfhf2 and __truncdfhf2) +; Verify that we generate loads and stores of halfword. + +; Validate that we generate correct lib calls to convert fp16 + +;CHECK-LABEL: @test1 +;CHECK: call __extendhfsf2 +;CHECK: r0 = memuh +define dso_local float @test1(i16* nocapture readonly %a) local_unnamed_addr #0 { +entry: + %0 = load i16, i16* %a, align 2 + %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0) + ret float %1 +} + +;CHECK-LABEL: @test2 +;CHECK: call __extendhfsf2 +;CHECK: r0 = memuh +;CHECK: convert_sf2d +define dso_local double @test2(i16* nocapture readonly %a) local_unnamed_addr #0 { +entry: + %0 = load i16, i16* %a, align 2 + %1 = tail call double @llvm.convert.from.fp16.f64(i16 %0) + ret double %1 +} + +;CHECK-LABEL: @test3 +;CHECK: call __truncsfhf2 +;CHECK: memh{{.*}}= r0 +define dso_local void @test3(float %src, i16* nocapture %dst) local_unnamed_addr #0 { +entry: + %0 = tail call i16 @llvm.convert.to.fp16.f32(float %src) + store i16 %0, i16* %dst, align 2 + ret void +} + +;CHECK-LABEL: @test4 +;CHECK: call __truncdfhf2 +;CHECK: memh{{.*}}= r0 +define dso_local void @test4(double %src, i16* nocapture %dst) local_unnamed_addr #0 { +entry: + %0 = tail call i16 @llvm.convert.to.fp16.f64(double %src) + store i16 %0, i16* %dst, align 2 + ret void +} + +;CHECK-LABEL: @test5 +;CHECK: call __extendhfsf2 +;CHECK: call __extendhfsf2 +;CHECK: sfadd +define dso_local float @test5(i16* nocapture readonly %a, i16* nocapture readonly %b) local_unnamed_addr #0 { +entry: + %0 = load i16, i16* %a, align 2 + %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0) + %2 = load i16, i16* %b, align 2 + %3 = tail call float @llvm.convert.from.fp16.f32(i16 %2) + %add = fadd float %1, %3 + ret float %add +} + +declare float @llvm.convert.from.fp16.f32(i16) #1 +declare double @llvm.convert.from.fp16.f64(i16) #1 +declare i16 @llvm.convert.to.fp16.f32(float) #1 +declare i16 @llvm.convert.to.fp16.f64(double) #1 + +attributes #0 = { nounwind readonly } +attributes #1 = { nounwind readnone } From 7ee25bc56f92495eb6d289b5ec18a07f27f1f44b Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Mon, 3 Jan 2022 16:39:58 -0800 Subject: [PATCH 557/992] [mlir][python] Add bindings for diagnostic handler. I considered multiple approaches for this but settled on this one because I could make the lifetime management work in a reasonably easy way (others had issues with not being able to cast to a Python reference from a C++ constructor). We could stand to have more formatting helpers, but best to get the core mechanism in first. Differential Revision: https://reviews.llvm.org/D116568 --- mlir/lib/Bindings/Python/IRCore.cpp | 165 ++++++++++++++++++++- mlir/lib/Bindings/Python/IRModule.h | 76 ++++++++++ mlir/python/mlir/_mlir_libs/_mlir/ir.pyi | 33 ++++- mlir/test/python/ir/diagnostic_handler.py | 172 ++++++++++++++++++++++ 4 files changed, 443 insertions(+), 3 deletions(-) create mode 100644 mlir/test/python/ir/diagnostic_handler.py diff --git a/mlir/lib/Bindings/Python/IRCore.cpp b/mlir/lib/Bindings/Python/IRCore.cpp index b39a1ea844e4..1a7eb46f7529 100644 --- a/mlir/lib/Bindings/Python/IRCore.cpp +++ b/mlir/lib/Bindings/Python/IRCore.cpp @@ -511,6 +511,57 @@ void PyMlirContext::contextExit(const pybind11::object &excType, PyThreadContextEntry::popContext(*this); } +py::object PyMlirContext::attachDiagnosticHandler(py::object callback) { + // Note that ownership is transferred to the delete callback below by way of + // an explicit inc_ref (borrow). + PyDiagnosticHandler *pyHandler = + new PyDiagnosticHandler(get(), std::move(callback)); + py::object pyHandlerObject = + py::cast(pyHandler, py::return_value_policy::take_ownership); + pyHandlerObject.inc_ref(); + + // In these C callbacks, the userData is a PyDiagnosticHandler* that is + // guaranteed to be known to pybind. + auto handlerCallback = + +[](MlirDiagnostic diagnostic, void *userData) -> MlirLogicalResult { + PyDiagnostic *pyDiagnostic = new PyDiagnostic(diagnostic); + py::object pyDiagnosticObject = + py::cast(pyDiagnostic, py::return_value_policy::take_ownership); + + auto *pyHandler = static_cast(userData); + bool result = false; + { + // Since this can be called from arbitrary C++ contexts, always get the + // gil. + py::gil_scoped_acquire gil; + try { + result = py::cast(pyHandler->callback(pyDiagnostic)); + } catch (std::exception &e) { + fprintf(stderr, "MLIR Python Diagnostic handler raised exception: %s\n", + e.what()); + pyHandler->hadError = true; + } + } + + pyDiagnostic->invalidate(); + return result ? mlirLogicalResultSuccess() : mlirLogicalResultFailure(); + }; + auto deleteCallback = +[](void *userData) { + auto *pyHandler = static_cast(userData); + assert(pyHandler->registeredID && "handler is not registered"); + pyHandler->registeredID.reset(); + + // Decrement reference, balancing the inc_ref() above. + py::object pyHandlerObject = + py::cast(pyHandler, py::return_value_policy::reference); + pyHandlerObject.dec_ref(); + }; + + pyHandler->registeredID = mlirContextAttachDiagnosticHandler( + get(), handlerCallback, static_cast(pyHandler), deleteCallback); + return pyHandlerObject; +} + PyMlirContext &DefaultingPyMlirContext::resolve() { PyMlirContext *context = PyThreadContextEntry::getDefaultContext(); if (!context) { @@ -656,6 +707,78 @@ void PyThreadContextEntry::popLocation(PyLocation &location) { stack.pop_back(); } +//------------------------------------------------------------------------------ +// PyDiagnostic* +//------------------------------------------------------------------------------ + +void PyDiagnostic::invalidate() { + valid = false; + if (materializedNotes) { + for (auto ¬eObject : *materializedNotes) { + PyDiagnostic *note = py::cast(noteObject); + note->invalidate(); + } + } +} + +PyDiagnosticHandler::PyDiagnosticHandler(MlirContext context, + py::object callback) + : context(context), callback(std::move(callback)) {} + +PyDiagnosticHandler::~PyDiagnosticHandler() {} + +void PyDiagnosticHandler::detach() { + if (!registeredID) + return; + MlirDiagnosticHandlerID localID = *registeredID; + mlirContextDetachDiagnosticHandler(context, localID); + assert(!registeredID && "should have unregistered"); + // Not strictly necessary but keeps stale pointers from being around to cause + // issues. + context = {nullptr}; +} + +void PyDiagnostic::checkValid() { + if (!valid) { + throw std::invalid_argument( + "Diagnostic is invalid (used outside of callback)"); + } +} + +MlirDiagnosticSeverity PyDiagnostic::getSeverity() { + checkValid(); + return mlirDiagnosticGetSeverity(diagnostic); +} + +PyLocation PyDiagnostic::getLocation() { + checkValid(); + MlirLocation loc = mlirDiagnosticGetLocation(diagnostic); + MlirContext context = mlirLocationGetContext(loc); + return PyLocation(PyMlirContext::forContext(context), loc); +} + +py::str PyDiagnostic::getMessage() { + checkValid(); + py::object fileObject = py::module::import("io").attr("StringIO")(); + PyFileAccumulator accum(fileObject, /*binary=*/false); + mlirDiagnosticPrint(diagnostic, accum.getCallback(), accum.getUserData()); + return fileObject.attr("getvalue")(); +} + +py::tuple PyDiagnostic::getNotes() { + checkValid(); + if (materializedNotes) + return *materializedNotes; + intptr_t numNotes = mlirDiagnosticGetNumNotes(diagnostic); + materializedNotes = py::tuple(numNotes); + for (intptr_t i = 0; i < numNotes; ++i) { + MlirDiagnostic noteDiag = mlirDiagnosticGetNote(diagnostic, i); + py::object pyNoteDiag = py::cast(PyDiagnostic(noteDiag)); + PyTuple_SET_ITEM(materializedNotes->ptr(), i, pyNoteDiag.ptr()); + } + return *materializedNotes; +} + //------------------------------------------------------------------------------ // PyDialect, PyDialectDescriptor, PyDialects //------------------------------------------------------------------------------ @@ -2024,6 +2147,36 @@ class PyOpAttributeMap { //------------------------------------------------------------------------------ void mlir::python::populateIRCore(py::module &m) { + //---------------------------------------------------------------------------- + // Enums. + //---------------------------------------------------------------------------- + py::enum_(m, "DiagnosticSeverity", py::module_local()) + .value("ERROR", MlirDiagnosticError) + .value("WARNING", MlirDiagnosticWarning) + .value("NOTE", MlirDiagnosticNote) + .value("REMARK", MlirDiagnosticRemark); + + //---------------------------------------------------------------------------- + // Mapping of Diagnostics. + //---------------------------------------------------------------------------- + py::class_(m, "Diagnostic", py::module_local()) + .def_property_readonly("severity", &PyDiagnostic::getSeverity) + .def_property_readonly("location", &PyDiagnostic::getLocation) + .def_property_readonly("message", &PyDiagnostic::getMessage) + .def_property_readonly("notes", &PyDiagnostic::getNotes) + .def("__str__", [](PyDiagnostic &self) -> py::str { + if (!self.isValid()) + return ""; + return self.getMessage(); + }); + + py::class_(m, "DiagnosticHandler", py::module_local()) + .def("detach", &PyDiagnosticHandler::detach) + .def_property_readonly("attached", &PyDiagnosticHandler::isAttached) + .def_property_readonly("had_error", &PyDiagnosticHandler::getHadError) + .def("__enter__", &PyDiagnosticHandler::contextEnter) + .def("__exit__", &PyDiagnosticHandler::contextExit); + //---------------------------------------------------------------------------- // Mapping of MlirContext. //---------------------------------------------------------------------------- @@ -2079,6 +2232,9 @@ void mlir::python::populateIRCore(py::module &m) { [](PyMlirContext &self, bool value) { mlirContextSetAllowUnregisteredDialects(self.get(), value); }) + .def("attach_diagnostic_handler", &PyMlirContext::attachDiagnosticHandler, + py::arg("callback"), + "Attaches a diagnostic handler that will receive callbacks") .def( "enable_multithreading", [](PyMlirContext &self, bool enable) { @@ -2204,7 +2360,8 @@ void mlir::python::populateIRCore(py::module &m) { py::arg("context") = py::none(), kContextGetFileLocationDocstring) .def_static( "fused", - [](const std::vector &pyLocations, llvm::Optional metadata, + [](const std::vector &pyLocations, + llvm::Optional metadata, DefaultingPyMlirContext context) { if (pyLocations.empty()) throw py::value_error("No locations provided"); @@ -2236,6 +2393,12 @@ void mlir::python::populateIRCore(py::module &m) { "context", [](PyLocation &self) { return self.getContext().getObject(); }, "Context that owns the Location") + .def( + "emit_error", + [](PyLocation &self, std::string message) { + mlirEmitError(self, message.c_str()); + }, + py::arg("message"), "Emits an error at this location") .def("__repr__", [](PyLocation &self) { PyPrintAccumulator printAccum; mlirLocationPrint(self, printAccum.getCallback(), diff --git a/mlir/lib/Bindings/Python/IRModule.h b/mlir/lib/Bindings/Python/IRModule.h index 117435d633b1..2f354d6d1262 100644 --- a/mlir/lib/Bindings/Python/IRModule.h +++ b/mlir/lib/Bindings/Python/IRModule.h @@ -15,6 +15,7 @@ #include "mlir-c/AffineExpr.h" #include "mlir-c/AffineMap.h" +#include "mlir-c/Diagnostics.h" #include "mlir-c/IR.h" #include "mlir-c/IntegerSet.h" #include "llvm/ADT/DenseMap.h" @@ -24,6 +25,8 @@ namespace mlir { namespace python { class PyBlock; +class PyDiagnostic; +class PyDiagnosticHandler; class PyInsertionPoint; class PyLocation; class DefaultingPyLocation; @@ -207,6 +210,10 @@ class PyMlirContext { const pybind11::object &excVal, const pybind11::object &excTb); + /// Attaches a Python callback as a diagnostic handler, returning a + /// registration object (internally a PyDiagnosticHandler). + pybind11::object attachDiagnosticHandler(pybind11::object callback); + private: PyMlirContext(MlirContext context); // Interns the mapping of live MlirContext::ptr to PyMlirContext instances, @@ -267,6 +274,75 @@ class BaseContextObject { PyMlirContextRef contextRef; }; +/// Python class mirroring the C MlirDiagnostic struct. Note that these structs +/// are only valid for the duration of a diagnostic callback and attempting +/// to access them outside of that will raise an exception. This applies to +/// nested diagnostics (in the notes) as well. +class PyDiagnostic { +public: + PyDiagnostic(MlirDiagnostic diagnostic) : diagnostic(diagnostic) {} + void invalidate(); + bool isValid() { return valid; } + MlirDiagnosticSeverity getSeverity(); + PyLocation getLocation(); + pybind11::str getMessage(); + pybind11::tuple getNotes(); + +private: + MlirDiagnostic diagnostic; + + void checkValid(); + /// If notes have been materialized from the diagnostic, then this will + /// be populated with the corresponding objects (all castable to + /// PyDiagnostic). + llvm::Optional materializedNotes; + bool valid = true; +}; + +/// Represents a diagnostic handler attached to the context. The handler's +/// callback will be invoked with PyDiagnostic instances until the detach() +/// method is called or the context is destroyed. A diagnostic handler can be +/// the subject of a `with` block, which will detach it when the block exits. +/// +/// Since diagnostic handlers can call back into Python code which can do +/// unsafe things (i.e. recursively emitting diagnostics, raising exceptions, +/// etc), this is generally not deemed to be a great user-level API. Users +/// should generally use some form of DiagnosticCollector. If the handler raises +/// any exceptions, they will just be emitted to stderr and dropped. +/// +/// The unique usage of this class means that its lifetime management is +/// different from most other parts of the API. Instances are always created +/// in an attached state and can transition to a detached state by either: +/// a) The context being destroyed and unregistering all handlers. +/// b) An explicit call to detach(). +/// The object may remain live from a Python perspective for an arbitrary time +/// after detachment, but there is nothing the user can do with it (since there +/// is no way to attach an existing handler object). +class PyDiagnosticHandler { +public: + PyDiagnosticHandler(MlirContext context, pybind11::object callback); + ~PyDiagnosticHandler(); + + bool isAttached() { return registeredID.hasValue(); } + bool getHadError() { return hadError; } + + /// Detaches the handler. Does nothing if not attached. + void detach(); + + pybind11::object contextEnter() { return pybind11::cast(this); } + void contextExit(pybind11::object excType, pybind11::object excVal, + pybind11::object excTb) { + detach(); + } + +private: + MlirContext context; + pybind11::object callback; + llvm::Optional registeredID; + bool hadError = false; + friend class PyMlirContext; +}; + /// Wrapper around an MlirDialect. This is exported as `DialectDescriptor` in /// order to differentiate it from the `Dialect` base class which is extended by /// plugins which extend dialect functionality through extension python code. diff --git a/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi b/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi index e61e34a176b0..affe54c3e11b 100644 --- a/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi +++ b/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi @@ -7,7 +7,7 @@ # * Local edits to signatures and types that MyPy did not auto detect (or # detected incorrectly). -from typing import Any, Callable, ClassVar, Dict, List, Optional, Sequence +from typing import Any, Callable, ClassVar, Dict, List, Optional, Sequence, Tuple from typing import overload @@ -43,6 +43,9 @@ __all__ = [ "Dialect", "DialectDescriptor", "Dialects", + "Diagnostic", + "DiagnosticHandler", + "DiagnosticSeverity", "DictAttr", "F16Type", "F32Type", @@ -425,8 +428,9 @@ class Context: def _get_live_count() -> int: ... def _get_live_module_count(self) -> int: ... def _get_live_operation_count(self) -> int: ... + def attach_diagnostic_handler(self, callback: Callable[["Diagnostic"], bool]) -> "DiagnosticHandler": ... def enable_multithreading(self, enable: bool) -> None: ... - def get_dialect_descriptor(name: dialect_name: str) -> "DialectDescriptor": ... + def get_dialect_descriptor(dialect_name: str) -> "DialectDescriptor": ... def is_registered_operation(self, operation_name: str) -> bool: ... def __enter__(self) -> "Context": ... def __exit__(self, arg0: object, arg1: object, arg2: object) -> None: ... @@ -479,6 +483,31 @@ class Dialects: def __getattr__(self, arg0: str) -> "Dialect": ... def __getitem__(self, arg0: str) -> "Dialect": ... +class Diagnostic: + @property + def severity(self) -> "DiagnosticSeverity": ... + @property + def location(self) -> "Location": ... + @property + def message(self) -> str: ... + @property + def notes(self) -> Tuple["Diagnostic"]: ... + +class DiagnosticHandler: + def detach(self) -> None: ... + @property + def attached(self) -> bool: ... + @property + def had_error(self) -> bool: ... + def __enter__(self) -> "DiagnosticHandler": ... + def __exit__(self, arg0: object, arg1: object, arg2: object) -> None: ... + +class DiagnosticSeverity: + ERROR: "DiagnosticSeverity" + WARNING: "DiagnosticSeverity" + NOTE: "DiagnosticSeverity" + REMARK: "DiagnosticSeverity" + # TODO: Auto-generated. Audit and fix. class DictAttr(Attribute): def __init__(self, cast_from_attr: Attribute) -> None: ... diff --git a/mlir/test/python/ir/diagnostic_handler.py b/mlir/test/python/ir/diagnostic_handler.py new file mode 100644 index 000000000000..f38187a6f3be --- /dev/null +++ b/mlir/test/python/ir/diagnostic_handler.py @@ -0,0 +1,172 @@ +# RUN: %PYTHON %s | FileCheck %s + +import gc +from mlir.ir import * + +def run(f): + print("\nTEST:", f.__name__) + f() + gc.collect() + assert Context._get_live_count() == 0 + return f + + +@run +def testLifecycleContextDestroy(): + ctx = Context() + def callback(foo): ... + handler = ctx.attach_diagnostic_handler(callback) + assert handler.attached + # If context is destroyed before the handler, it should auto-detach. + ctx = None + gc.collect() + assert not handler.attached + + # And finally collecting the handler should be fine. + handler = None + gc.collect() + + +@run +def testLifecycleExplicitDetach(): + ctx = Context() + def callback(foo): ... + handler = ctx.attach_diagnostic_handler(callback) + assert handler.attached + handler.detach() + assert not handler.attached + + +@run +def testLifecycleWith(): + ctx = Context() + def callback(foo): ... + with ctx.attach_diagnostic_handler(callback) as handler: + assert handler.attached + assert not handler.attached + + +@run +def testLifecycleWithAndExplicitDetach(): + ctx = Context() + def callback(foo): ... + with ctx.attach_diagnostic_handler(callback) as handler: + assert handler.attached + handler.detach() + assert not handler.attached + + +# CHECK-LABEL: TEST: testDiagnosticCallback +@run +def testDiagnosticCallback(): + ctx = Context() + def callback(d): + # CHECK: DIAGNOSTIC: message='foobar', severity=DiagnosticSeverity.ERROR, loc=loc(unknown) + print(f"DIAGNOSTIC: message='{d.message}', severity={d.severity}, loc={d.location}") + return True + handler = ctx.attach_diagnostic_handler(callback) + loc = Location.unknown(ctx) + loc.emit_error("foobar") + assert not handler.had_error + + +# CHECK-LABEL: TEST: testDiagnosticEmptyNotes +# TODO: Come up with a way to inject a diagnostic with notes from this API. +@run +def testDiagnosticEmptyNotes(): + ctx = Context() + def callback(d): + # CHECK: DIAGNOSTIC: notes=() + print(f"DIAGNOSTIC: notes={d.notes}") + return True + handler = ctx.attach_diagnostic_handler(callback) + loc = Location.unknown(ctx) + loc.emit_error("foobar") + assert not handler.had_error + + +# CHECK-LABEL: TEST: testDiagnosticCallbackException +@run +def testDiagnosticCallbackException(): + ctx = Context() + def callback(d): + raise ValueError("Error in handler") + handler = ctx.attach_diagnostic_handler(callback) + loc = Location.unknown(ctx) + loc.emit_error("foobar") + assert handler.had_error + + +# CHECK-LABEL: TEST: testEscapingDiagnostic +@run +def testEscapingDiagnostic(): + ctx = Context() + diags = [] + def callback(d): + diags.append(d) + return True + handler = ctx.attach_diagnostic_handler(callback) + loc = Location.unknown(ctx) + loc.emit_error("foobar") + assert not handler.had_error + + # CHECK: DIAGNOSTIC: + print(f"DIAGNOSTIC: {str(diags[0])}") + try: + diags[0].severity + raise RuntimeError("expected exception") + except ValueError: + pass + try: + diags[0].location + raise RuntimeError("expected exception") + except ValueError: + pass + try: + diags[0].message + raise RuntimeError("expected exception") + except ValueError: + pass + try: + diags[0].notes + raise RuntimeError("expected exception") + except ValueError: + pass + + + +# CHECK-LABEL: TEST: testDiagnosticReturnTrueHandles +@run +def testDiagnosticReturnTrueHandles(): + ctx = Context() + def callback1(d): + print(f"CALLBACK1: {d}") + return True + def callback2(d): + print(f"CALLBACK2: {d}") + return True + ctx.attach_diagnostic_handler(callback1) + ctx.attach_diagnostic_handler(callback2) + loc = Location.unknown(ctx) + # CHECK-NOT: CALLBACK1 + # CHECK: CALLBACK2: foobar + # CHECK-NOT: CALLBACK1 + loc.emit_error("foobar") + + +# CHECK-LABEL: TEST: testDiagnosticReturnFalseDoesNotHandle +@run +def testDiagnosticReturnFalseDoesNotHandle(): + ctx = Context() + def callback1(d): + print(f"CALLBACK1: {d}") + return True + def callback2(d): + print(f"CALLBACK2: {d}") + return False + ctx.attach_diagnostic_handler(callback1) + ctx.attach_diagnostic_handler(callback2) + loc = Location.unknown(ctx) + # CHECK: CALLBACK2: foobar + # CHECK: CALLBACK1: foobar + loc.emit_error("foobar") From 5a3c27636569df560d4ba9019f796d4442853d5b Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Sun, 26 Dec 2021 21:36:33 -0500 Subject: [PATCH 558/992] [libc++] [test] Remove IFNDR uses of std::invocable. libstdc++'s implementation diagnoses these with hard errors. Fixes #50059. Differential Revision: https://reviews.llvm.org/D116293 --- .../concept.invocable/invocable.compile.pass.cpp | 4 +--- .../regular_invocable.compile.pass.cpp | 4 +--- libcxx/test/std/ranges/range.access/data.pass.cpp | 4 ++-- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/libcxx/test/std/concepts/concepts.callable/concept.invocable/invocable.compile.pass.cpp b/libcxx/test/std/concepts/concepts.callable/concept.invocable/invocable.compile.pass.cpp index dfd21a4f3133..a6ffac199ee1 100644 --- a/libcxx/test/std/concepts/concepts.callable/concept.invocable/invocable.compile.pass.cpp +++ b/libcxx/test/std/concepts/concepts.callable/concept.invocable/invocable.compile.pass.cpp @@ -237,11 +237,10 @@ static_assert(!std::invocable); } // namespace function_objects namespace pointer_to_member_functions { -// clang-format off template constexpr bool check_member_is_invocable() { - constexpr bool result = std::invocable; + constexpr bool result = std::invocable; using uncv_t = std::remove_cvref_t; static_assert(std::invocable == result); static_assert(std::invocable, Args...> == result); @@ -254,7 +253,6 @@ namespace pointer_to_member_functions { static_assert(!std::invocable); return result; } -// clang-format on static_assert(check_member_is_invocable()); static_assert(std::invocable); diff --git a/libcxx/test/std/concepts/concepts.callable/concept.regularinvocable/regular_invocable.compile.pass.cpp b/libcxx/test/std/concepts/concepts.callable/concept.regularinvocable/regular_invocable.compile.pass.cpp index 35973a793f4d..fb152b99b5cd 100644 --- a/libcxx/test/std/concepts/concepts.callable/concept.regularinvocable/regular_invocable.compile.pass.cpp +++ b/libcxx/test/std/concepts/concepts.callable/concept.regularinvocable/regular_invocable.compile.pass.cpp @@ -263,11 +263,10 @@ static_assert(!std::regular_invocable } // namespace function_objects namespace pointer_to_member_functions { -// clang-format off template constexpr bool check_member_is_invocable() { - constexpr bool result = std::regular_invocable; + constexpr bool result = std::regular_invocable; using uncv_t = std::remove_cvref_t; static_assert(std::regular_invocable == result); static_assert(std::regular_invocable, Args...> == result); @@ -280,7 +279,6 @@ namespace pointer_to_member_functions { static_assert(!std::regular_invocable); return result; } -// clang-format on static_assert(check_member_is_invocable()); static_assert(std::regular_invocable); diff --git a/libcxx/test/std/ranges/range.access/data.pass.cpp b/libcxx/test/std/ranges/range.access/data.pass.cpp index b02c82306bcb..4e03dc4d8bd7 100644 --- a/libcxx/test/std/ranges/range.access/data.pass.cpp +++ b/libcxx/test/std/ranges/range.access/data.pass.cpp @@ -25,8 +25,8 @@ static int globalBuff[2]; struct Incomplete; static_assert(!std::is_invocable_v); -static_assert(!std::is_invocable_v); -static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert( std::is_invocable_v); From e88eb6443fe08e36f64bc5c795f80d4fe204ab83 Mon Sep 17 00:00:00 2001 From: Tasmia Rahman Date: Tue, 4 Jan 2022 11:18:13 -0800 Subject: [PATCH 559/992] [Hexagon] Fix buildVector32 for v4i8 constants The code for constructing a 32-bit constant from 4 8-bit constants has a typo and uses one of the constants twice --- llvm/lib/Target/Hexagon/HexagonISelLowering.cpp | 4 ++-- .../CodeGen/Hexagon/generate-const-buildvector32.ll | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/generate-const-buildvector32.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 699a818c887b..9aac770a4380 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -2440,8 +2440,8 @@ HexagonTargetLowering::buildVector32(ArrayRef Elem, const SDLoc &dl, if (AllConst) { int32_t V = (Consts[0]->getZExtValue() & 0xFF) | (Consts[1]->getZExtValue() & 0xFF) << 8 | - (Consts[1]->getZExtValue() & 0xFF) << 16 | - Consts[2]->getZExtValue() << 24; + (Consts[2]->getZExtValue() & 0xFF) << 16 | + Consts[3]->getZExtValue() << 24; return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32)); } diff --git a/llvm/test/CodeGen/Hexagon/generate-const-buildvector32.ll b/llvm/test/CodeGen/Hexagon/generate-const-buildvector32.ll new file mode 100644 index 000000000000..645efc2932ca --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/generate-const-buildvector32.ll @@ -0,0 +1,11 @@ +; RUN: llc -march=hexagon -mtriple=hexagon < %s | FileCheck %s + +; CHECK: r{{[0-9]+}} = ##673059850 + +define dso_local i32 @main() #0 { +entry: + %a = alloca <4 x i8>, align 4 + store <4 x i8> , <4 x i8>* %a, align 4 + ret i32 0 +} + From 6a6a80e88eff1fde4e9c6c38c1efd45eff5c9f9f Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Sun, 3 Oct 2021 00:54:18 -0400 Subject: [PATCH 560/992] [libc++] [test] Add tests for std::span construction from initializer lists. Differential Revision: https://reviews.llvm.org/D116481 --- .../containers/views/span.cons/array.pass.cpp | 64 +++++++++---------- .../views/span.cons/initializer_list.pass.cpp | 43 +++++++++++++ .../views/span.cons/iterator_len.verify.cpp | 5 +- 3 files changed, 78 insertions(+), 34 deletions(-) create mode 100644 libcxx/test/std/containers/views/span.cons/initializer_list.pass.cpp diff --git a/libcxx/test/std/containers/views/span.cons/array.pass.cpp b/libcxx/test/std/containers/views/span.cons/array.pass.cpp index cbb60853385d..fdee1f4ec8a7 100644 --- a/libcxx/test/std/containers/views/span.cons/array.pass.cpp +++ b/libcxx/test/std/containers/views/span.cons/array.pass.cpp @@ -74,50 +74,48 @@ void checkCV() } } - -template -constexpr bool testConstexprSpan() +template +constexpr bool testSpan() { - constexpr T val[2] = {}; + T val[2] = {}; - ASSERT_NOEXCEPT(std::span {val}); + ASSERT_NOEXCEPT(std::span{val}); + ASSERT_NOEXCEPT(std::span{val}); + ASSERT_NOEXCEPT(std::span{val}); ASSERT_NOEXCEPT(std::span{val}); - std::span s1{val}; - std::span s2{val}; - return - s1.data() == &val[0] && s1.size() == 2 - && s2.data() == &val[0] && s2.size() == 2; -} - -template -void testRuntimeSpan() -{ - T val[2] = {}; - ASSERT_NOEXCEPT(std::span {val}); - ASSERT_NOEXCEPT(std::span{val}); - std::span s1{val}; - std::span s2{val}; - assert(s1.data() == &val[0] && s1.size() == 2); - assert(s2.data() == &val[0] && s2.size() == 2); + std::span s1 = val; + std::span s2 = val; + std::span s3 = val; + std::span s4 = val; + assert(s1.data() == val && s1.size() == 2); + assert(s2.data() == val && s2.size() == 2); + assert(s3.data() == val && s3.size() == 2); + assert(s4.data() == val && s4.size() == 2); + + std::span s5 = {{1,2}}; + std::span s6 = {{1,2}}; + assert(s5.size() == 2); // and it dangles + assert(s6.size() == 2); // and it dangles + + return true; } -struct A{}; + +struct A {}; int main(int, char**) { - static_assert(testConstexprSpan(), ""); - static_assert(testConstexprSpan(), ""); - static_assert(testConstexprSpan(), ""); - static_assert(testConstexprSpan(), ""); + testSpan(); + testSpan(); + testSpan(); + testSpan(); - testRuntimeSpan(); - testRuntimeSpan(); - testRuntimeSpan(); - testRuntimeSpan(); - testRuntimeSpan(); + static_assert(testSpan()); + static_assert(testSpan()); + static_assert(testSpan()); checkCV(); - return 0; + return 0; } diff --git a/libcxx/test/std/containers/views/span.cons/initializer_list.pass.cpp b/libcxx/test/std/containers/views/span.cons/initializer_list.pass.cpp new file mode 100644 index 000000000000..5b5c226e329a --- /dev/null +++ b/libcxx/test/std/containers/views/span.cons/initializer_list.pass.cpp @@ -0,0 +1,43 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +// + +#include +#include + +struct Sink { + constexpr Sink() = default; + constexpr Sink(Sink*) {} +}; + +constexpr int count(std::span sp) { + return sp.size(); +} + +template +constexpr int countn(std::span sp) { + return sp.size(); +} + +constexpr bool test() { + Sink a[10]; + assert(count({a}) == 10); + assert(count({a, a+10}) == 10); + assert(countn<10>({a}) == 10); + return true; +} + +int main(int, char**) +{ + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/containers/views/span.cons/iterator_len.verify.cpp b/libcxx/test/std/containers/views/span.cons/iterator_len.verify.cpp index b2472a96ac25..1ee4cc3d494d 100644 --- a/libcxx/test/std/containers/views/span.cons/iterator_len.verify.cpp +++ b/libcxx/test/std/containers/views/span.cons/iterator_len.verify.cpp @@ -31,7 +31,10 @@ int main(int, char**) { int arr[] = {1, 2, 3}; createImplicitSpan(arr, 3); - std::span sp = {0, 0}; // expected-error {{no matching constructor for initialization of 'std::span'}} + std::span sp = {0, 0}; // expected-error {{no matching constructor for initialization of 'std::span'}} + std::span sp2 = {0, 0}; // expected-error {{no matching constructor for initialization of 'std::span'}} + std::span csp = {0, 0}; // expected-error {{no matching constructor for initialization of 'std::span'}} + std::span csp2 = {0, 0}; // expected-error {{no matching constructor for initialization of 'std::span'}} return 0; } From 4a47ac7d514c151883a0b9a98f7c3071229c13a5 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 15 Dec 2021 11:26:47 -0500 Subject: [PATCH 561/992] [libc++] Remove incorrect default constructor in cpp17_input_iterator AFAICT, Cpp17InputIterators are not required to be default constructible, since that requirement is added in Cpp17ForwardIterator. Hence, our archetype for Cpp17InputIterator should not be default constructible. Removing that constructor has a ripple effect on a couple of tests that were making incorrect assumptions. Notably: - Some tests were using cpp17_input_iterator as a sentinel for itself. That is not valid, because a cpp17_input_iterator is not semiregular anymore after the change (and hence it doesn't satisfy sentinel_for). - Some tests were using a stride-counted cpp17_input_iterator as the sentinel for a range. This doesn't work anymore because of the problem above, so these tests were changed not to check stride counts for input iterators. - Some tests were default constructing cpp17_input_iterator when a simple alternative was available -- those have been changed to use that alternative. Differential Revision: https://reviews.llvm.org/D115806 --- ..._iter_iter_iter.addressof.compile.pass.cpp | 3 +- .../insert_iter_iter_iter.pass.cpp | 2 +- .../iterator_sentinel.pass.cpp | 129 ++++++++++++------ .../iterator_count_sentinel.pass.cpp | 11 +- .../iterator_sentinel.pass.cpp | 98 +++++++------ .../counted.iterator/ctor.default.pass.cpp | 8 +- .../move.iter.op.const/default.pass.cpp | 2 +- .../move.iter.op=/move_iterator.pass.cpp | 2 +- .../iterator/ctor.default.pass.cpp | 8 +- .../iterator/member_types.compile.pass.cpp | 12 +- .../range.range/sentinel_t.compile.pass.cpp | 6 +- .../common_range.compile.pass.cpp | 58 ++++---- .../input_range.compile.pass.cpp | 8 +- .../range.subrange/advance.pass.cpp | 2 +- .../string_append/iterator.pass.cpp | 6 +- .../string_assign/iterator.pass.cpp | 6 +- .../string_insert/iter_iter_iter.pass.cpp | 6 +- .../iter_iter_iter_iter.pass.cpp | 6 +- libcxx/test/support/test_iterators.h | 1 - 19 files changed, 218 insertions(+), 156 deletions(-) diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.addressof.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.addressof.compile.pass.cpp index a8eb860e6487..f8311090b37e 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.addressof.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.addressof.compile.pass.cpp @@ -19,10 +19,9 @@ #include "operator_hijacker.h" #include "test_iterators.h" -void test() { +void test(cpp17_input_iterator i) { { std::vector v; - cpp17_input_iterator::iterator> i; v.insert(v.end(), i, i); } { diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.pass.cpp index 19475c3d07e8..ca8dcb8474c6 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.pass.cpp @@ -179,7 +179,7 @@ int main(int, char**) { std::vector s; - s.insert(s.end(), cpp17_input_iterator(), cpp17_input_iterator()); + s.insert(s.end(), cpp17_input_iterator(nullptr), cpp17_input_iterator(nullptr)); } return 0; diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_sentinel.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_sentinel.pass.cpp index 165b0b063e98..90f9aba1b7ff 100644 --- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_sentinel.pass.cpp +++ b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_sentinel.pass.cpp @@ -15,6 +15,7 @@ #include #include +#include #include "test_iterators.h" @@ -44,61 +45,103 @@ class distance_apriori_sentinel { std::ptrdiff_t count_ = 0; }; -template Sent = It> -constexpr void check_assignable_case(std::ptrdiff_t const n) { +template +constexpr void check_assignable_case() { auto range = range_t{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - auto first = stride_counting_iterator(It(range.begin())); - std::ranges::advance(first, stride_counting_iterator(Sent(It(range.begin() + n)))); - assert(first.base().base() == range.begin() + n); - assert(first.stride_count() == 0); // because we got here by assigning from last, not by incrementing + + for (std::ptrdiff_t n = 0; n != 9; ++n) { + { + It first(range.begin()); + Sent last(It(range.begin() + n)); + std::ranges::advance(first, last); + assert(base(first) == range.begin() + n); + } + + // Count operations + if constexpr (std::is_same_v) { + stride_counting_iterator first(It(range.begin())); + stride_counting_iterator last(It(range.begin() + n)); + std::ranges::advance(first, last); + assert(first.base().base() == range.begin() + n); + assert(first.stride_count() == 0); // because we got here by assigning from last, not by incrementing + } + } } -template -constexpr void check_sized_sentinel_case(std::ptrdiff_t const n) { +template +constexpr void check_sized_sentinel_case() { auto range = range_t{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - auto first = stride_counting_iterator(It(range.begin())); - std::ranges::advance(first, distance_apriori_sentinel(n)); - - assert(first.base().base() == range.begin() + n); - if constexpr (std::random_access_iterator) { - assert(first.stride_count() == 1); - assert(first.stride_displacement() == 1); - } else { - assert(first.stride_count() == n); - assert(first.stride_displacement() == n); + + for (std::ptrdiff_t n = 0; n != 9; ++n) { + { + It first(range.begin()); + distance_apriori_sentinel last(n); + std::ranges::advance(first, last); + assert(base(first) == range.begin() + n); + } + + // Count operations + { + stride_counting_iterator first(It(range.begin())); + distance_apriori_sentinel last(n); + std::ranges::advance(first, last); + + assert(first.base().base() == range.begin() + n); + if constexpr (std::random_access_iterator) { + assert(first.stride_count() == 1); + assert(first.stride_displacement() == 1); + } else { + assert(first.stride_count() == n); + assert(first.stride_displacement() == n); + } + } } } -template -constexpr void check_sentinel_case(std::ptrdiff_t const n) { +template +constexpr void check_sentinel_case() { auto range = range_t{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - auto first = stride_counting_iterator(It(range.begin())); - auto const last = It(range.begin() + n); - std::ranges::advance(first, sentinel_wrapper(last)); - assert(first.base() == last); - assert(first.stride_count() == n); + + for (std::ptrdiff_t n = 0; n != 9; ++n) { + { + It first(range.begin()); + sentinel_wrapper last(It(range.begin() + n)); + std::ranges::advance(first, last); + assert(base(first) == range.begin() + n); + } + + // Count operations + { + stride_counting_iterator first(It(range.begin())); + sentinel_wrapper last(It(range.begin() + n)); + std::ranges::advance(first, last); + assert(first.base() == last); + assert(first.stride_count() == n); + } + } } constexpr bool test() { - check_assignable_case >(1); - check_assignable_case >(3); - check_assignable_case >(4); - check_assignable_case >(5); - check_assignable_case >(6); - - check_sized_sentinel_case >(7); - check_sized_sentinel_case >(6); - check_sized_sentinel_case >(5); - check_sized_sentinel_case >(4); - check_sized_sentinel_case >(3); - check_sized_sentinel_case >(2); - - check_sentinel_case >(1); + using It = range_t::const_iterator; + check_assignable_case, sentinel_wrapper>>(); + check_assignable_case>(); + check_assignable_case>(); + check_assignable_case>(); + check_assignable_case>(); + + check_sized_sentinel_case>(); + check_sized_sentinel_case>(); + check_sized_sentinel_case>(); + check_sized_sentinel_case>(); + check_sized_sentinel_case>(); + check_sized_sentinel_case>(); + + check_sentinel_case>(); // cpp20_input_iterator not copyable, so is omitted - check_sentinel_case >(3); - check_sentinel_case >(4); - check_sentinel_case >(5); - check_sentinel_case >(6); + check_sentinel_case>(); + check_sentinel_case>(); + check_sentinel_case>(); + check_sentinel_case>(); return true; } diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.next/iterator_count_sentinel.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.next/iterator_count_sentinel.pass.cpp index 3554a34011d2..3c0e9d89ea64 100644 --- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.next/iterator_count_sentinel.pass.cpp +++ b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.next/iterator_count_sentinel.pass.cpp @@ -13,18 +13,19 @@ #include #include +#include #include "test_iterators.h" -template -constexpr void check(It it, std::ptrdiff_t n, It last) { +template +constexpr void check(It it, std::ptrdiff_t n, Sent last) { { It result = std::ranges::next(it, n, last); assert(result == last); } // Count the number of operations - { + if constexpr (std::is_same_v) { stride_counting_iterator strided_it(it); stride_counting_iterator strided_last(last); stride_counting_iterator result = std::ranges::next(strided_it, n, strided_last); @@ -46,13 +47,13 @@ constexpr void check(It it, std::ptrdiff_t n, It last) { constexpr bool test() { int range[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - check(cpp17_input_iterator(&range[0]), 1, cpp17_input_iterator(&range[1])); + check(cpp17_input_iterator(&range[0]), 1, sentinel_wrapper(cpp17_input_iterator(&range[1]))); check(forward_iterator(&range[0]), 2, forward_iterator(&range[2])); check(bidirectional_iterator(&range[2]), 6, bidirectional_iterator(&range[8])); check(random_access_iterator(&range[3]), 2, random_access_iterator(&range[5])); check(contiguous_iterator(&range[0]), 5, contiguous_iterator(&range[5])); - check(cpp17_input_iterator(&range[0]), 0, cpp17_input_iterator(&range[0])); + check(cpp17_input_iterator(&range[0]), 0, sentinel_wrapper(cpp17_input_iterator(&range[0]))); check(forward_iterator(&range[0]), 0, forward_iterator(&range[0])); check(bidirectional_iterator(&range[2]), 0, bidirectional_iterator(&range[2])); check(random_access_iterator(&range[3]), 0, random_access_iterator(&range[3])); diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.next/iterator_sentinel.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.next/iterator_sentinel.pass.cpp index 354cdffeae5e..545e01b547b6 100644 --- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.next/iterator_sentinel.pass.cpp +++ b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.next/iterator_sentinel.pass.cpp @@ -21,6 +21,20 @@ using range_t = std::array; +// Sentinel type that can be assigned to an iterator. This is to test the case where +// std::ranges::next uses assignment instead of successive increments below. +template +class assignable_sentinel { +public: + explicit assignable_sentinel() = default; + constexpr explicit assignable_sentinel(const It& it) : base_(base(it)) {} + constexpr operator It() const { return It(base_); } + constexpr bool operator==(const It& other) const { return base_ == base(other); } + friend constexpr It base(const assignable_sentinel& s) { return It(s.base_); } +private: + decltype(base(std::declval())) base_; +}; + class distance_apriori_sentinel { public: distance_apriori_sentinel() = default; @@ -45,39 +59,39 @@ class distance_apriori_sentinel { std::ptrdiff_t count_ = 0; }; -template -constexpr void check_assignable(It it, It last, int const* expected) { +template +constexpr void check_assignable(int* it, int* last, int const* expected) { { - It result = std::ranges::next(std::move(it), std::move(last)); - assert(&*result == expected); + It result = std::ranges::next(It(it), assignable_sentinel(It(last))); + assert(base(result) == expected); } // Count operations - { - auto strided_it = stride_counting_iterator(std::move(it)); - auto strided_last = stride_counting_iterator(std::move(last)); - auto result = std::ranges::next(std::move(strided_it), std::move(strided_last)); - assert(&*result == expected); + if constexpr (Count) { + auto strided_it = stride_counting_iterator(It(it)); + auto strided_last = assignable_sentinel(stride_counting_iterator(It(last))); + stride_counting_iterator result = std::ranges::next(std::move(strided_it), std::move(strided_last)); + assert(base(result.base()) == expected); assert(result.stride_count() == 0); // because we got here by assigning from last, not by incrementing } } -template -constexpr void check_sized_sentinel(It it, It last, int const* expected) { - auto n = (last.base() - it.base()); +template +constexpr void check_sized_sentinel(int* it, int* last, int const* expected) { + auto n = (last - it); { auto sent = distance_apriori_sentinel(n); - auto result = std::ranges::next(std::move(it), sent); - assert(&*result == expected); + auto result = std::ranges::next(It(it), sent); + assert(base(result) == expected); } // Count operations { - auto strided_it = stride_counting_iterator(std::move(it)); + auto strided_it = stride_counting_iterator(It(it)); auto sent = distance_apriori_sentinel(n); auto result = std::ranges::next(std::move(strided_it), sent); - assert(&*result == expected); + assert(base(result.base()) == expected); if constexpr (std::random_access_iterator) { assert(result.stride_count() == 1); // should have used exactly one += @@ -89,22 +103,22 @@ constexpr void check_sized_sentinel(It it, It last, int const* expected) { } } -template -constexpr void check_sentinel(It it, It last, int const* expected) { - auto n = (last.base() - it.base()); +template +constexpr void check_sentinel(int* it, int* last, int const* expected) { + auto n = (last - it); { - auto sent = sentinel_wrapper(last); - It result = std::ranges::next(std::move(it), sent); - assert(&*result == expected); + auto sent = sentinel_wrapper(It(last)); + It result = std::ranges::next(It(it), sent); + assert(base(result) == expected); } // Count operations - { - auto strided_it = stride_counting_iterator(it); - auto sent = sentinel_wrapper(stride_counting_iterator(last)); + if constexpr (Count) { + auto strided_it = stride_counting_iterator(It(it)); + auto sent = sentinel_wrapper(stride_counting_iterator(It(last))); stride_counting_iterator result = std::ranges::next(std::move(strided_it), sent); - assert(&*result == expected); + assert(base(result.base()) == expected); assert(result.stride_count() == n); // must have used ++ until it hit the sentinel } } @@ -112,25 +126,25 @@ constexpr void check_sentinel(It it, It last, int const* expected) { constexpr bool test() { int range[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - check_assignable(cpp17_input_iterator(&range[0]), cpp17_input_iterator(&range[2]), &range[2]); - check_assignable(forward_iterator(&range[0]), forward_iterator(&range[3]), &range[3]); - check_assignable(bidirectional_iterator(&range[0]), bidirectional_iterator(&range[4]), &range[4]); - check_assignable(random_access_iterator(&range[0]), random_access_iterator(&range[5]), &range[5]); - check_assignable(contiguous_iterator(&range[0]), contiguous_iterator(&range[6]), &range[6]); + check_assignable>( &range[0], &range[2], &range[2]); + check_assignable>( &range[0], &range[3], &range[3]); + check_assignable>(&range[0], &range[4], &range[4]); + check_assignable>(&range[0], &range[5], &range[5]); + check_assignable>( &range[0], &range[6], &range[6]); - check_sized_sentinel(cpp17_input_iterator(&range[0]), cpp17_input_iterator(&range[7]), &range[7]); - check_sized_sentinel(cpp20_input_iterator(&range[0]), cpp20_input_iterator(&range[6]), &range[6]); - check_sized_sentinel(forward_iterator(&range[0]), forward_iterator(&range[5]), &range[5]); - check_sized_sentinel(bidirectional_iterator(&range[0]), bidirectional_iterator(&range[4]), &range[4]); - check_sized_sentinel(random_access_iterator(&range[0]), random_access_iterator(&range[3]), &range[3]); - check_sized_sentinel(contiguous_iterator(&range[0]), contiguous_iterator(&range[2]), &range[2]); + check_sized_sentinel>( &range[0], &range[7], &range[7]); + check_sized_sentinel>( &range[0], &range[6], &range[6]); + check_sized_sentinel>( &range[0], &range[5], &range[5]); + check_sized_sentinel>(&range[0], &range[4], &range[4]); + check_sized_sentinel>(&range[0], &range[3], &range[3]); + check_sized_sentinel>( &range[0], &range[2], &range[2]); - check_sentinel(cpp17_input_iterator(&range[0]), cpp17_input_iterator(&range[1]), &range[1]); + check_sentinel>( &range[0], &range[1], &range[1]); // cpp20_input_iterator not copyable, so is omitted - check_sentinel(forward_iterator(&range[0]), forward_iterator(&range[3]), &range[3]); - check_sentinel(bidirectional_iterator(&range[0]), bidirectional_iterator(&range[4]), &range[4]); - check_sentinel(random_access_iterator(&range[0]), random_access_iterator(&range[5]), &range[5]); - check_sentinel(contiguous_iterator(&range[0]), contiguous_iterator(&range[6]), &range[6]); + check_sentinel>( &range[0], &range[3], &range[3]); + check_sentinel>(&range[0], &range[4], &range[4]); + check_sentinel>(&range[0], &range[5], &range[5]); + check_sentinel>( &range[0], &range[6], &range[6]); return true; } diff --git a/libcxx/test/std/iterators/predef.iterators/counted.iterator/ctor.default.pass.cpp b/libcxx/test/std/iterators/predef.iterators/counted.iterator/ctor.default.pass.cpp index 70c1d422be75..b5b00ad4a9ff 100644 --- a/libcxx/test/std/iterators/predef.iterators/counted.iterator/ctor.default.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/counted.iterator/ctor.default.pass.cpp @@ -17,11 +17,11 @@ #include "test_iterators.h" constexpr bool test() { - static_assert( std::default_initializable>>); - static_assert(!std::default_initializable>>); + static_assert(!std::default_initializable>>); + static_assert( std::default_initializable>>); - std::counted_iterator> iter; - assert(iter.base() == cpp17_input_iterator()); + std::counted_iterator> iter; + assert(iter.base() == forward_iterator()); assert(iter.count() == 0); return true; diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.const/default.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.const/default.pass.cpp index 40a9e7f1ae89..04d3d6e1866a 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.const/default.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.const/default.pass.cpp @@ -29,7 +29,7 @@ test() int main(int, char**) { - test >(); + // we don't have a test iterator that is both input and default-constructible, so not testing that case test >(); test >(); test >(); diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op=/move_iterator.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op=/move_iterator.pass.cpp index c6dcef9a0d19..a55d7a345074 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op=/move_iterator.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op=/move_iterator.pass.cpp @@ -28,7 +28,7 @@ void test(U u) { const std::move_iterator r2(u); - std::move_iterator r1; + std::move_iterator r1(It(nullptr)); std::move_iterator& rr = (r1 = r2); assert(r1.base() == u); assert(&rr == &r1); diff --git a/libcxx/test/std/ranges/range.adaptors/range.join.view/iterator/ctor.default.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.join.view/iterator/ctor.default.pass.cpp index 52bae5bb752f..709e2f925a67 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.join.view/iterator/ctor.default.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.join.view/iterator/ctor.default.pass.cpp @@ -23,16 +23,16 @@ struct DefaultCtorParent : std::ranges::view_base { T *ptr_; constexpr DefaultCtorParent(T *ptr) : ptr_(ptr) {} - constexpr cpp17_input_iterator begin() { return cpp17_input_iterator(ptr_); } - constexpr cpp17_input_iterator begin() const { return cpp17_input_iterator(ptr_); } + constexpr forward_iterator begin() { return forward_iterator(ptr_); } + constexpr forward_iterator begin() const { return forward_iterator(ptr_); } constexpr T *end() { return ptr_ + 4; } constexpr const T *end() const { return ptr_ + 4; } }; template -constexpr bool operator==(const cpp17_input_iterator &lhs, const T *rhs) { return lhs.base() == rhs; } +constexpr bool operator==(const forward_iterator &lhs, const T *rhs) { return lhs.base() == rhs; } template -constexpr bool operator==(const T *lhs, const cpp17_input_iterator &rhs) { return rhs.base() == lhs; } +constexpr bool operator==(const T *lhs, const forward_iterator &rhs) { return rhs.base() == lhs; } constexpr bool test() { using Base = DefaultCtorParent; diff --git a/libcxx/test/std/ranges/range.adaptors/range.join.view/iterator/member_types.compile.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.join.view/iterator/member_types.compile.pass.cpp index acf7ca17cd69..ba960d4968df 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.join.view/iterator/member_types.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.join.view/iterator/member_types.compile.pass.cpp @@ -20,18 +20,14 @@ template struct ForwardView : std::ranges::view_base { - friend forward_iterator begin(ForwardView&) { return forward_iterator(nullptr); } - friend forward_iterator begin(ForwardView const&) { return forward_iterator(nullptr); } - friend forward_iterator end(ForwardView&) { return forward_iterator(nullptr); } - friend forward_iterator end(ForwardView const&) { return forward_iterator(nullptr); } + forward_iterator begin() const; + sentinel_wrapper> end() const; }; template struct InputView : std::ranges::view_base { - friend cpp17_input_iterator begin(InputView&) { return cpp17_input_iterator(nullptr); } - friend cpp17_input_iterator begin(InputView const&) { return cpp17_input_iterator(nullptr); } - friend cpp17_input_iterator end(InputView&) { return cpp17_input_iterator(nullptr); } - friend cpp17_input_iterator end(InputView const&) { return cpp17_input_iterator(nullptr); } + cpp17_input_iterator begin() const; + sentinel_wrapper> end() const; }; template diff --git a/libcxx/test/std/ranges/range.req/range.range/sentinel_t.compile.pass.cpp b/libcxx/test/std/ranges/range.req/range.range/sentinel_t.compile.pass.cpp index 5288ed684407..267a4c9c3c1a 100644 --- a/libcxx/test/std/ranges/range.req/range.range/sentinel_t.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.req/range.range/sentinel_t.compile.pass.cpp @@ -25,6 +25,6 @@ static_assert(std::same_as >, sentinel>); static_assert(std::same_as const>, sentinel>); static_assert(std::same_as >, sentinel>); -static_assert(std::same_as >, cpp17_input_iterator >); -static_assert(std::same_as const>, cpp17_input_iterator >); -static_assert(std::same_as >, cpp17_input_iterator >); +static_assert(std::same_as >, forward_iterator >); +static_assert(std::same_as const>, forward_iterator >); +static_assert(std::same_as >, forward_iterator >); diff --git a/libcxx/test/std/ranges/range.req/range.refinements/common_range.compile.pass.cpp b/libcxx/test/std/ranges/range.req/range.refinements/common_range.compile.pass.cpp index dd606be1cd5d..0ee8eed08a0b 100644 --- a/libcxx/test/std/ranges/range.req/range.refinements/common_range.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.req/range.refinements/common_range.compile.pass.cpp @@ -16,41 +16,51 @@ #include #include "test_iterators.h" -#include "test_range.h" +template struct Common { It begin() const; It end() const; }; +template struct NonCommon { It begin() const; sentinel_wrapper end() const; }; +template struct Range { It begin() const; Sent end() const; }; +static_assert(!std::ranges::common_range>>); // not a sentinel for itself +static_assert(!std::ranges::common_range>>); // not a sentinel for itself +static_assert( std::ranges::common_range>>); +static_assert( std::ranges::common_range>>); +static_assert( std::ranges::common_range>>); +static_assert( std::ranges::common_range>>); +static_assert( std::ranges::common_range>); -static_assert(!std::ranges::common_range >); -static_assert(!std::ranges::common_range const>); +static_assert(!std::ranges::common_range>>); +static_assert(!std::ranges::common_range>>); +static_assert(!std::ranges::common_range>>); +static_assert(!std::ranges::common_range>>); +static_assert(!std::ranges::common_range>>); +static_assert(!std::ranges::common_range>>); +static_assert(!std::ranges::common_range>); -static_assert(!std::ranges::common_range >); -static_assert(!std::ranges::common_range const>); +// Test when begin() and end() only differ by their constness. +static_assert(!std::ranges::common_range>); -static_assert(std::ranges::common_range >); -static_assert(std::ranges::common_range const>); +// Simple test with a sized_sentinel. +static_assert(!std::ranges::common_range>>); -static_assert(std::ranges::common_range >); -static_assert(!std::ranges::common_range const>); +// Make sure cv-qualification doesn't impact the concept when begin() and end() have matching qualifiers. +static_assert( std::ranges::common_range> const>); +static_assert(!std::ranges::common_range> const>); -struct subtly_not_common { - int* begin() const; +// Test with a range that's a common_range only when const-qualified. +struct Range1 { + int* begin(); + int const* begin() const; int const* end() const; }; -static_assert(std::ranges::range && !std::ranges::common_range); -static_assert(std::ranges::range && !std::ranges::common_range); +static_assert(!std::ranges::common_range); +static_assert( std::ranges::common_range); -struct common_range_non_const_only { +// Test with a range that's a common_range only when not const-qualified. +struct Range2 { int* begin() const; int* end(); int const* end() const; }; -static_assert(std::ranges::range&& std::ranges::common_range); -static_assert(std::ranges::range && !std::ranges::common_range); - -struct common_range_const_only { - int* begin(); - int const* begin() const; - int const* end() const; -}; -static_assert(std::ranges::range && !std::ranges::common_range); -static_assert(std::ranges::range&& std::ranges::common_range); +static_assert( std::ranges::common_range); +static_assert(!std::ranges::common_range); diff --git a/libcxx/test/std/ranges/range.req/range.refinements/input_range.compile.pass.cpp b/libcxx/test/std/ranges/range.req/range.refinements/input_range.compile.pass.cpp index 1a2b66cb3837..c27c972294e5 100644 --- a/libcxx/test/std/ranges/range.req/range.refinements/input_range.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.req/range.refinements/input_range.compile.pass.cpp @@ -32,14 +32,14 @@ static_assert(std::ranges::input_range const>); static_assert(!std::ranges::input_range const>); -static_assert(std::ranges::input_range >); +static_assert(std::ranges::input_range >); static_assert(!std::ranges::input_range >); -static_assert(std::ranges::input_range const>); +static_assert(std::ranges::input_range const>); static_assert(!std::ranges::input_range const>); -static_assert(std::ranges::input_range >); +static_assert(std::ranges::input_range >); static_assert(!std::ranges::input_range >); -static_assert(!std::ranges::input_range const>); +static_assert(!std::ranges::input_range const>); static_assert(!std::ranges::input_range const>); diff --git a/libcxx/test/std/ranges/range.utility/range.subrange/advance.pass.cpp b/libcxx/test/std/ranges/range.utility/range.subrange/advance.pass.cpp index 336af63e4a3e..419a47f092f7 100644 --- a/libcxx/test/std/ranges/range.utility/range.subrange/advance.pass.cpp +++ b/libcxx/test/std/ranges/range.utility/range.subrange/advance.pass.cpp @@ -31,7 +31,7 @@ constexpr bool test() { assert(a4.begin() == globalBuff + 4); assert(a4.size() == 4); - std::ranges::subrange b(InputIter(globalBuff), InputIter(globalBuff + 8)); + std::ranges::subrange> b(InputIter(globalBuff), sentinel_wrapper(InputIter(globalBuff + 8))); auto b1 = std::move(b).next(); assert(b1.begin().base() == globalBuff + 1); diff --git a/libcxx/test/std/strings/basic.string/string.modifiers/string_append/iterator.pass.cpp b/libcxx/test/std/strings/basic.string/string.modifiers/string_append/iterator.pass.cpp index 34b5d4139b39..07c5586419b5 100644 --- a/libcxx/test/std/strings/basic.string/string.modifiers/string_append/iterator.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.modifiers/string_append/iterator.pass.cpp @@ -178,9 +178,9 @@ int main(int, char**) typedef ThrowingIterator TIter; typedef cpp17_input_iterator IIter; const char* s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - test_exceptions(S(), IIter(TIter(s, s+10, 4, TIter::TAIncrement)), IIter()); - test_exceptions(S(), IIter(TIter(s, s+10, 5, TIter::TADereference)), IIter()); - test_exceptions(S(), IIter(TIter(s, s+10, 6, TIter::TAComparison)), IIter()); + test_exceptions(S(), IIter(TIter(s, s+10, 4, TIter::TAIncrement)), IIter(TIter())); + test_exceptions(S(), IIter(TIter(s, s+10, 5, TIter::TADereference)), IIter(TIter())); + test_exceptions(S(), IIter(TIter(s, s+10, 6, TIter::TAComparison)), IIter(TIter())); test_exceptions(S(), TIter(s, s+10, 4, TIter::TAIncrement), TIter()); test_exceptions(S(), TIter(s, s+10, 5, TIter::TADereference), TIter()); diff --git a/libcxx/test/std/strings/basic.string/string.modifiers/string_assign/iterator.pass.cpp b/libcxx/test/std/strings/basic.string/string.modifiers/string_assign/iterator.pass.cpp index f8b10c6e056c..87f3885713da 100644 --- a/libcxx/test/std/strings/basic.string/string.modifiers/string_assign/iterator.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.modifiers/string_assign/iterator.pass.cpp @@ -178,9 +178,9 @@ int main(int, char**) typedef ThrowingIterator TIter; typedef cpp17_input_iterator IIter; const char* s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - test_exceptions(S(), IIter(TIter(s, s+10, 4, TIter::TAIncrement)), IIter()); - test_exceptions(S(), IIter(TIter(s, s+10, 5, TIter::TADereference)), IIter()); - test_exceptions(S(), IIter(TIter(s, s+10, 6, TIter::TAComparison)), IIter()); + test_exceptions(S(), IIter(TIter(s, s+10, 4, TIter::TAIncrement)), IIter(TIter())); + test_exceptions(S(), IIter(TIter(s, s+10, 5, TIter::TADereference)), IIter(TIter())); + test_exceptions(S(), IIter(TIter(s, s+10, 6, TIter::TAComparison)), IIter(TIter())); test_exceptions(S(), TIter(s, s+10, 4, TIter::TAIncrement), TIter()); test_exceptions(S(), TIter(s, s+10, 5, TIter::TADereference), TIter()); diff --git a/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/iter_iter_iter.pass.cpp b/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/iter_iter_iter.pass.cpp index 1f0b12ff7be9..e0e7ff906634 100644 --- a/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/iter_iter_iter.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/iter_iter_iter.pass.cpp @@ -156,9 +156,9 @@ int main(int, char**) typedef ThrowingIterator TIter; typedef cpp17_input_iterator IIter; const char* s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - test_exceptions(S(), 0, IIter(TIter(s, s+10, 4, TIter::TAIncrement)), IIter()); - test_exceptions(S(), 0, IIter(TIter(s, s+10, 5, TIter::TADereference)), IIter()); - test_exceptions(S(), 0, IIter(TIter(s, s+10, 6, TIter::TAComparison)), IIter()); + test_exceptions(S(), 0, IIter(TIter(s, s+10, 4, TIter::TAIncrement)), IIter(TIter())); + test_exceptions(S(), 0, IIter(TIter(s, s+10, 5, TIter::TADereference)), IIter(TIter())); + test_exceptions(S(), 0, IIter(TIter(s, s+10, 6, TIter::TAComparison)), IIter(TIter())); test_exceptions(S(), 0, TIter(s, s+10, 4, TIter::TAIncrement), TIter()); test_exceptions(S(), 0, TIter(s, s+10, 5, TIter::TADereference), TIter()); diff --git a/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/iter_iter_iter_iter.pass.cpp b/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/iter_iter_iter_iter.pass.cpp index 9ce4a375aab8..ea6f234f1a00 100644 --- a/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/iter_iter_iter_iter.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/iter_iter_iter_iter.pass.cpp @@ -1008,9 +1008,9 @@ int main(int, char**) typedef ThrowingIterator TIter; typedef cpp17_input_iterator IIter; const char* s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - test_exceptions(S("abcdefghijklmnopqrst"), 10, 5, IIter(TIter(s, s+10, 4, TIter::TAIncrement)), IIter()); - test_exceptions(S("abcdefghijklmnopqrst"), 10, 5, IIter(TIter(s, s+10, 5, TIter::TADereference)), IIter()); - test_exceptions(S("abcdefghijklmnopqrst"), 10, 5, IIter(TIter(s, s+10, 6, TIter::TAComparison)), IIter()); + test_exceptions(S("abcdefghijklmnopqrst"), 10, 5, IIter(TIter(s, s+10, 4, TIter::TAIncrement)), IIter(TIter())); + test_exceptions(S("abcdefghijklmnopqrst"), 10, 5, IIter(TIter(s, s+10, 5, TIter::TADereference)), IIter(TIter())); + test_exceptions(S("abcdefghijklmnopqrst"), 10, 5, IIter(TIter(s, s+10, 6, TIter::TAComparison)), IIter(TIter())); test_exceptions(S("abcdefghijklmnopqrst"), 10, 5, TIter(s, s+10, 4, TIter::TAIncrement), TIter()); test_exceptions(S("abcdefghijklmnopqrst"), 10, 5, TIter(s, s+10, 5, TIter::TADereference), TIter()); diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h index 4bf082b3634f..36f7caeb5b84 100644 --- a/libcxx/test/support/test_iterators.h +++ b/libcxx/test/support/test_iterators.h @@ -66,7 +66,6 @@ class cpp17_input_iterator TEST_CONSTEXPR_CXX14 It base() const {return it_;} - TEST_CONSTEXPR_CXX14 cpp17_input_iterator() : it_() {} explicit TEST_CONSTEXPR_CXX14 cpp17_input_iterator(It it) : it_(it) {} template TEST_CONSTEXPR_CXX14 cpp17_input_iterator(const cpp17_input_iterator& u) :it_(u.it_) {} From db5b7915959a45951c3e6a0d5cc61f0f4ddfe291 Mon Sep 17 00:00:00 2001 From: Brendon Cahoon Date: Tue, 4 Jan 2022 11:40:30 -0800 Subject: [PATCH 562/992] [Hexagon] Fix an instruction move in HexagonVectorCombine The HexagonVectorCombine pass was moving an instruction incorrectly, which caused a use in a GEP that was not yet defined. HexagonVectorCombine removes a load from a group due to its dependences, but in realignGroup, the load is processed anyways. In realignGroup, when determining the maximum alignment, only those instructions still in the group should be considered. --- .../Target/Hexagon/HexagonVectorCombine.cpp | 2 +- .../Hexagon/autohvx/vector-align-bad-move.ll | 45 +++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/vector-align-bad-move.ll diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp index 755ad96e58c4..bc64d9d30a4c 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -718,7 +718,7 @@ auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool { // Maximum alignment present in the whole address group. const AddrInfo &WithMaxAlign = - getMaxOf(BaseInfos, [](const AddrInfo &AI) { return AI.HaveAlign; }); + getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.HaveAlign; }); Align MaxGiven = WithMaxAlign.HaveAlign; // Minimum alignment present in the move address group. diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-bad-move.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-bad-move.ll new file mode 100644 index 000000000000..8da468a332f0 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-bad-move.ll @@ -0,0 +1,45 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; REQUIRES: asserts + +; Test that the HexagonVectorCombine pass does not move an instruction +; incorrectly, which causes a GEP to have a base that is not defined. +; If the pass runs correctly, the unaligned loads are converted to +; aligned loads instead of crashing. + +; CHECK-NOT: vmemu + +define dllexport void @test() local_unnamed_addr #0 { +entry: + br label %for_begin77 + +for_begin77: + %0 = load i8*, i8** undef, align 4 + %1 = getelementptr i8, i8* %0, i32 1794 + %2 = bitcast i8* %1 to <64 x half>* + %3 = call <64 x half> @llvm.masked.load.v64f16.p0v64f16(<64 x half>* %2, i32 1, <64 x i1> , <64 x half> undef) + %4 = getelementptr i8, i8* %0, i32 1922 + %5 = bitcast i8* %4 to <64 x half>* + %6 = call <64 x half> @llvm.masked.load.v64f16.p0v64f16(<64 x half>* %5, i32 1, <64 x i1> , <64 x half> undef) + %7 = shufflevector <64 x half> %3, <64 x half> %6, <64 x i32> + call void @llvm.assume(i1 true) [ "align"(i8* null, i32 128) ] + %8 = getelementptr i8, i8* null, i32 128 + %9 = bitcast i8* %8 to <64 x half>* + %10 = fadd <64 x half> zeroinitializer, %7 + %11 = shufflevector <64 x half> %10, <64 x half> undef, <64 x i32> + %12 = getelementptr i8, i8* %0, i32 1920 + %13 = bitcast i8* %12 to <64 x half>* + %unmaskedload243 = load <64 x half>, <64 x half>* %13, align 128 + %14 = fadd <64 x half> %11, %unmaskedload243 + store <64 x half> %14, <64 x half>* %9, align 128 + br label %for_begin77 +} + +; Function Attrs: nofree nosync nounwind willreturn +declare void @llvm.assume(i1 noundef) #1 + +; Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +declare <64 x half> @llvm.masked.load.v64f16.p0v64f16(<64 x half>*, i32 immarg, <64 x i1>, <64 x half>) #2 + +attributes #0 = { "target-features"="+hvxv68,+hvx-length128b,+hvx-qfloat" } +attributes #1 = { nofree nosync nounwind willreturn } +attributes #2 = { argmemonly nofree nosync nounwind readonly willreturn } From 1be54bc7642b1378b7fe374920e06ccfab3a94a4 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 4 Jan 2022 11:44:56 -0800 Subject: [PATCH 563/992] precommit additional tests for D116200 --- .../sink_sideeffecting_instruction.ll | 132 +++++++++++++++++- 1 file changed, 129 insertions(+), 3 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll index 791d4d9b2da8..3a38650ba5c3 100644 --- a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll +++ b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll @@ -111,12 +111,36 @@ bb14: ; preds = %bb12, %bb } declare i32 @unknown(i32* %dest) +declare i32 @unknown.as2(i32 addrspace(2)* %dest) -define i32 @sink_to_use(i1 %c) { -; CHECK-LABEL: @sink_to_use( +define i32 @sink_write_to_use(i1 %c) { +; CHECK-LABEL: @sink_write_to_use( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull writeonly [[VAR]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i32, align 4 + %var3 = call i32 @unknown(i32* writeonly %var) argmemonly nounwind willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + +define i32 @sink_readwrite_to_use(i1 %c) { +; CHECK-LABEL: @sink_readwrite_to_use( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1]] ; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] ; CHECK: early_return: ; CHECK-NEXT: ret i32 0 @@ -135,6 +159,108 @@ use_block: ret i32 %var3 } +define i32 @sink_bitcast(i1 %c) { +; CHECK-LABEL: @sink_bitcast( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i8, align 8 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i8* [[VAR]] to i32* +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[BITCAST]]) #[[ATTR1]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i8, align 8 + %bitcast = bitcast i8* %var to i32* + %var3 = call i32 @unknown(i32* %bitcast) argmemonly nounwind willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + + +define i32 @sink_gep1(i1 %c) { +; CHECK-LABEL: @sink_gep1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR1:%.*]] = alloca [2 x i32], align 8 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VAR1]], i64 0, i64 1 +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[GEP]]) #[[ATTR1]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i64, align 8 + %bitcast = bitcast i64* %var to i32* + %gep = getelementptr i32, i32* %bitcast, i32 1 + %var3 = call i32 @unknown(i32* %gep) argmemonly nounwind willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + +define i32 @sink_gep2(i1 %c) { +; CHECK-LABEL: @sink_gep2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR1:%.*]] = alloca [2 x i32], align 8 +; CHECK-NEXT: [[VAR1_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VAR1]], i64 0, i64 0 +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR1_SUB]]) #[[ATTR1]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i64, align 8 + %bitcast = bitcast i64* %var to i32* + %var3 = call i32 @unknown(i32* %bitcast) argmemonly nounwind willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + +define i32 @sink_addrspacecast(i1 %c) { +; CHECK-LABEL: @sink_addrspacecast( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 8 +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast i32* [[VAR]] to i32 addrspace(2)* +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown.as2(i32 addrspace(2)* [[CAST]]) #[[ATTR1]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i32, align 8 + %cast = addrspacecast i32* %var to i32 addrspace(2)* + %var3 = call i32 @unknown.as2(i32 addrspace(2)* %cast) argmemonly nounwind willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + define i32 @neg_infinite_loop(i1 %c) { ; CHECK-LABEL: @neg_infinite_loop( ; CHECK-NEXT: entry: From fa17c0e27ac5e874e0d0ce3be9146ee3f5a99016 Mon Sep 17 00:00:00 2001 From: Paul Robinson Date: Tue, 4 Jan 2022 11:56:42 -0800 Subject: [PATCH 564/992] [PS4] Verify the default DWARF version is 4. Follow-up to b8e03be. Even if Clang's generic default DWARF version bumps up, PS4 will stay on v4. --- clang/test/Driver/debug-options.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/test/Driver/debug-options.c b/clang/test/Driver/debug-options.c index 45a577dc7e7a..22e05140f261 100644 --- a/clang/test/Driver/debug-options.c +++ b/clang/test/Driver/debug-options.c @@ -279,7 +279,8 @@ // NOG_PS4-NOT: "-dwarf-version= // // G_PS4: "-cc1" -// G_PS4: "-dwarf-version= +/// PS4 will stay on v4 even if the generic default version changes. +// G_PS4: "-dwarf-version=4" // G_PS4: "-generate-arange-section" // // G_ERR: error: unknown argument: From 11a46b174923a2509ac620bc8ff621ecdf6135b6 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 4 Jan 2022 12:02:07 -0800 Subject: [PATCH 565/992] precommit tests for a planned followon to D116200 --- .../sink_sideeffecting_instruction.ll | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll index 3a38650ba5c3..49f9e5ec2cb4 100644 --- a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll +++ b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll @@ -330,6 +330,123 @@ use_block: ret i32 %var3 } +define i32 @sink_lifetime1(i1 %c) { +; CHECK-LABEL: @sink_lifetime1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[VAR]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[BITCAST]]) +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[BITCAST]]) +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i32, align 4 + %bitcast = bitcast i32* %var to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %bitcast) + %var3 = call i32 @unknown(i32* %var) argmemonly nounwind willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + call void @llvm.lifetime.end.p0i8(i64 4, i8* %bitcast) + ret i32 %var3 +} + +define i32 @sink_lifetime2(i1 %c) { +; CHECK-LABEL: @sink_lifetime2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[VAR]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[BITCAST]]) +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[MERGE:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: merge: +; CHECK-NEXT: [[RET:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VAR3]], [[USE_BLOCK]] ] +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[BITCAST]]) +; CHECK-NEXT: ret i32 [[RET]] +; CHECK: use_block: +; CHECK-NEXT: br label [[MERGE]] +; +entry: + %var = alloca i32, align 4 + %bitcast = bitcast i32* %var to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %bitcast) + %var3 = call i32 @unknown(i32* %var) argmemonly nounwind willreturn + br i1 %c, label %merge, label %use_block + +merge: + %ret = phi i32 [0, %entry], [%var3, %use_block] + call void @llvm.lifetime.end.p0i8(i64 4, i8* %bitcast) + ret i32 %ret + +use_block: + br label %merge +} + +define i32 @sink_lifetime3(i1 %c) { +; CHECK-LABEL: @sink_lifetime3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i32, align 4 + %bitcast = bitcast i32* %var to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %bitcast) + call void @llvm.lifetime.end.p0i8(i64 4, i8* %bitcast) + ; If unknown accesses %var, that's UB + %var3 = call i32 @unknown(i32* %var) argmemonly nounwind willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + +define i32 @sink_lifetime4(i1 %c) { +; CHECK-LABEL: @sink_lifetime4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[VAR]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[BITCAST]]) +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1]] +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[BITCAST]]) +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i32, align 4 + %bitcast = bitcast i32* %var to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %bitcast) + %var3 = call i32 @unknown(i32* %var) argmemonly nounwind willreturn + call void @llvm.lifetime.end.p0i8(i64 4, i8* %bitcast) + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + declare i32 @bar() declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + From 41760a6b40c1f14e0622aea4a2ee4b4a93c40ec1 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 3 Jan 2022 16:45:08 +0100 Subject: [PATCH 566/992] [mlir] Make Value's constructor constexpr. NFCI. This allows clang to flag unused Values in more cases, so remove them. --- mlir/include/mlir/IR/Value.h | 4 +--- mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp | 1 - mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp | 1 - mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp | 1 - 4 files changed, 1 insertion(+), 6 deletions(-) diff --git a/mlir/include/mlir/IR/Value.h b/mlir/include/mlir/IR/Value.h index ed1f8ea96d01..65f5b8cb1eab 100644 --- a/mlir/include/mlir/IR/Value.h +++ b/mlir/include/mlir/IR/Value.h @@ -83,9 +83,7 @@ class alignas(8) ValueImpl : public IRObjectWithUseList { /// an Operation(in the case of an OpResult). class Value { public: - Value(detail::ValueImpl *impl = nullptr) : impl(impl) {} - Value(const Value &) = default; - Value &operator=(const Value &) = default; + constexpr Value(detail::ValueImpl *impl = nullptr) : impl(impl) {} template bool isa() const { diff --git a/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp b/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp index b9c4dda9d649..ff8e1bbaf04f 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp +++ b/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp @@ -830,7 +830,6 @@ void PatternLowering::generateOperationResultTypeRewriter( // Look for an operation that was replaced by `op`. The result types will be // inferred from the results that were replaced. Block *rewriterBlock = op->getBlock(); - Value replacedOp; for (OpOperand &use : op.op().getUses()) { // Check that the use corresponds to a ReplaceOp and that it is the // replacement value, not the operation being replaced. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index 6568633e477d..1bdad563f4c8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -1624,7 +1624,6 @@ static void genResult(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter, linalg::GenericOp op) { OpOperand *lhs = op.getOutputOperand(0); Type resType = lhs->get().getType(); - Value result; if (getSparseTensorEncoding(resType)) { // The sparse tensor rematerializes from the original sparse tensor's // underlying sparse storage format. diff --git a/mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp b/mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp index 92daab5e8b8f..74777b53a8cc 100644 --- a/mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp @@ -237,7 +237,6 @@ struct TwoDimMultiReductionToElementWise if (!elementType.isIntOrIndexOrFloat()) return failure(); - Value condition; Value result = rewriter.create(loc, multiReductionOp.source(), 0) .getResult(); From 5b1337184bfb12dd0ec9141765d57677438005b7 Mon Sep 17 00:00:00 2001 From: Jack Andersen Date: Tue, 4 Jan 2022 15:16:36 -0500 Subject: [PATCH 567/992] [DebugInfo] Avoid triggering global location assert for 2-byte pointer sizes. D111404 moved a 4/8 byte check assert into a block taken by 2-byte platforms. Since these platforms do not take the branches where the pointer size is used, sink the assert accordingly. Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D116480 --- .../CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 34 ++++++++------ llvm/test/DebugInfo/MSP430/global-var.ll | 47 +++++++++++++++++++ 2 files changed, 67 insertions(+), 14 deletions(-) create mode 100644 llvm/test/DebugInfo/MSP430/global-var.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 9b73f0ab2f05..3ab73d128aed 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -260,9 +260,20 @@ void DwarfCompileUnit::addLocationAttribute( if (Global) { const MCSymbol *Sym = Asm->getSymbol(Global); - unsigned PointerSize = Asm->getDataLayout().getPointerSize(); - assert((PointerSize == 4 || PointerSize == 8) && - "Add support for other sizes if necessary"); + // 16-bit platforms like MSP430 and AVR take this path, so sink this + // assert to platforms that use it. + auto GetPointerSizedFormAndOp = [this]() { + unsigned PointerSize = Asm->getDataLayout().getPointerSize(); + assert((PointerSize == 4 || PointerSize == 8) && + "Add support for other sizes if necessary"); + struct FormAndOp { + dwarf::Form Form; + dwarf::LocationAtom Op; + }; + return PointerSize == 4 + ? FormAndOp{dwarf::DW_FORM_data4, dwarf::DW_OP_const4u} + : FormAndOp{dwarf::DW_FORM_data8, dwarf::DW_OP_const8u}; + }; if (Global->isThreadLocal()) { if (Asm->TM.useEmulatedTLS()) { // TODO: add debug info for emulated thread local mode. @@ -270,15 +281,12 @@ void DwarfCompileUnit::addLocationAttribute( // FIXME: Make this work with -gsplit-dwarf. // Based on GCC's support for TLS: if (!DD->useSplitDwarf()) { + auto FormAndOp = GetPointerSizedFormAndOp(); // 1) Start with a constNu of the appropriate pointer size - addUInt(*Loc, dwarf::DW_FORM_data1, - PointerSize == 4 ? dwarf::DW_OP_const4u - : dwarf::DW_OP_const8u); + addUInt(*Loc, dwarf::DW_FORM_data1, FormAndOp.Op); // 2) containing the (relocated) offset of the TLS variable // within the module's TLS block. - addExpr(*Loc, - PointerSize == 4 ? dwarf::DW_FORM_data4 - : dwarf::DW_FORM_data8, + addExpr(*Loc, FormAndOp.Form, Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); } else { addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); @@ -292,13 +300,11 @@ void DwarfCompileUnit::addLocationAttribute( } } else if (Asm->TM.getRelocationModel() == Reloc::RWPI || Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) { + auto FormAndOp = GetPointerSizedFormAndOp(); // Constant - addUInt(*Loc, dwarf::DW_FORM_data1, - PointerSize == 4 ? dwarf::DW_OP_const4u - : dwarf::DW_OP_const8u); + addUInt(*Loc, dwarf::DW_FORM_data1, FormAndOp.Op); // Relocation offset - addExpr(*Loc, PointerSize == 4 ? dwarf::DW_FORM_data4 - : dwarf::DW_FORM_data8, + addExpr(*Loc, FormAndOp.Form, Asm->getObjFileLowering().getIndirectSymViaRWPI(Sym)); // Base register Register BaseReg = Asm->getObjFileLowering().getStaticBase(); diff --git a/llvm/test/DebugInfo/MSP430/global-var.ll b/llvm/test/DebugInfo/MSP430/global-var.ll new file mode 100644 index 000000000000..1941b33846dc --- /dev/null +++ b/llvm/test/DebugInfo/MSP430/global-var.ll @@ -0,0 +1,47 @@ +; RUN: llc --filetype=obj -o %t < %s +; RUN: llvm-dwarfdump --debug-info %t | FileCheck %s +; RUN: llvm-dwarfdump --verify %t + +; CHECK: DW_TAG_variable +; CHECK-NEXT: DW_AT_name ("global_var") +; CHECK-NEXT: DW_AT_type ({{0x[0-9]+}} "char") +; CHECK-NEXT: DW_AT_external (true) +; CHECK-NEXT: DW_AT_decl_file ("/tmp{{[/\\]}}global-var.c") +; CHECK-NEXT: DW_AT_decl_line (1) +; CHECK-NEXT: DW_AT_location (DW_OP_addr 0x0) + +; ModuleID = 'global-var.c' +source_filename = "global-var.c" +target datalayout = "e-m:e-p:16:16-i32:16-i64:16-f32:16-f64:16-a:8-n8:16-S16" +target triple = "msp430" + +@global_var = dso_local global i8 42, align 1, !dbg !0 + +; Function Attrs: noinline nounwind optnone +define dso_local i16 @main() #0 !dbg !10 { +entry: + ret i16 0, !dbg !15 +} + +attributes #0 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!6, !7, !8} +!llvm.ident = !{!9} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "global_var", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 14.0.0 (https://github.com/llvm/llvm-project ...)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "global-var.c", directory: "/tmp") +!4 = !{!0} +!5 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +!6 = !{i32 7, !"Dwarf Version", i32 4} +!7 = !{i32 2, !"Debug Info Version", i32 3} +!8 = !{i32 1, !"wchar_size", i32 2} +!9 = !{!"clang version 14.0.0 (https://github.com/llvm/llvm-project ...)"} +!10 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 2, type: !11, scopeLine: 2, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !14) +!11 = !DISubroutineType(types: !12) +!12 = !{!13} +!13 = !DIBasicType(name: "int", size: 16, encoding: DW_ATE_signed) +!14 = !{} +!15 = !DILocation(line: 2, column: 13, scope: !10) From 9b63fff3db88ffa09e968a2875a812c85fa62a33 Mon Sep 17 00:00:00 2001 From: Sumanth Gundapaneni Date: Tue, 4 Jan 2022 12:26:34 -0800 Subject: [PATCH 568/992] [Hexagon] Update latencies on REG_SEQUENCE/COPY based on successors. If there are multiple uses of the def of COPY/REG_SEQUENCE, set the latency only if the latencies on all the uses are equal, otherwise set it to default. --- llvm/lib/Target/Hexagon/HexagonSubtarget.cpp | 34 ++++++++++++++------ 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index 21bb1633fa79..047b2176c684 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -477,19 +477,35 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, int SrcOpIdx, // If it's a REG_SEQUENCE/COPY, use its destination instruction to determine // the correct latency. - if ((DstInst->isRegSequence() || DstInst->isCopy()) && Dst->NumSuccs == 1) { + // If there are multiple uses of the def of COPY/REG_SEQUENCE, set the latency + // only if the latencies on all the uses are equal, otherwise set it to + // default. + if ((DstInst->isRegSequence() || DstInst->isCopy())) { Register DReg = DstInst->getOperand(0).getReg(); - MachineInstr *DDst = Dst->Succs[0].getSUnit()->getInstr(); - unsigned UseIdx = -1; - for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); OpNum++) { - const MachineOperand &MO = DDst->getOperand(OpNum); - if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == DReg) { - UseIdx = OpNum; + int DLatency = -1; + for (const auto &DDep : Dst->Succs) { + MachineInstr *DDst = DDep.getSUnit()->getInstr(); + unsigned UseIdx = -1; + for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); OpNum++) { + const MachineOperand &MO = DDst->getOperand(OpNum); + if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == DReg) { + UseIdx = OpNum; + break; + } + } + int Latency = (InstrInfo.getOperandLatency(&InstrItins, *SrcInst, 0, + *DDst, UseIdx)); + // Set DLatency for the first time. + DLatency = (DLatency == -1) ? Latency : DLatency; + + // For multiple uses, if the Latency is different across uses, reset + // DLatency. + if (DLatency != Latency) { + DLatency = -1; break; } } - int DLatency = (InstrInfo.getOperandLatency(&InstrItins, *SrcInst, - 0, *DDst, UseIdx)); + DLatency = std::max(DLatency, 0); Dep.setLatency((unsigned)DLatency); } From a04b5325051c906a8a0ddc58fffa1b095e024314 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 4 Jan 2022 12:23:17 -0800 Subject: [PATCH 569/992] [LegalizeIntegerTypes][RISCV] Teach PromoteSetCCOperands to check sign bits of unsigned compares. Unsigned compares work with either zero extended or sign extended inputs just like equality comparisons. I didn't allow this when I refactored the code in D116421 due to lack of tests. But I've since found a simple C test case that demonstrates when this can be useful. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D116617 --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 22 +++++++++---------- llvm/test/CodeGen/RISCV/alu16.ll | 18 +++++++++++++++ llvm/test/CodeGen/RISCV/alu8.ll | 18 +++++++++++++++ 3 files changed, 46 insertions(+), 12 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 32086a79acdf..4a1e9d89df68 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1747,18 +1747,16 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &LHS, SDValue &RHS, // Prefer to promote the comparison operand with zero extension. - // If this is an equality comparison and the width of OpL/OpR excluding the - // duplicated sign bits is no greater than the width of LHS/RHS, we can avoid - // inserting a zext_inreg operation that we might not be able to remove. - if (ISD::isIntEqualitySetCC(CCCode)) { - unsigned OpLEffectiveBits = DAG.ComputeMaxSignificantBits(OpL); - unsigned OpREffectiveBits = DAG.ComputeMaxSignificantBits(OpR); - if (OpLEffectiveBits <= LHS.getScalarValueSizeInBits() && - OpREffectiveBits <= RHS.getScalarValueSizeInBits()) { - LHS = OpL; - RHS = OpR; - return; - } + // If the width of OpL/OpR excluding the duplicated sign bits is no greater + // than the width of LHS/RHS, we can avoid/ inserting a zext_inreg operation + // that we might not be able to remove. + unsigned OpLEffectiveBits = DAG.ComputeMaxSignificantBits(OpL); + unsigned OpREffectiveBits = DAG.ComputeMaxSignificantBits(OpR); + if (OpLEffectiveBits <= LHS.getScalarValueSizeInBits() && + OpREffectiveBits <= RHS.getScalarValueSizeInBits()) { + LHS = OpL; + RHS = OpR; + return; } // Otherwise, use zext_inreg. diff --git a/llvm/test/CodeGen/RISCV/alu16.ll b/llvm/test/CodeGen/RISCV/alu16.ll index 23dc433baf4e..b1f5e4a9aa85 100644 --- a/llvm/test/CodeGen/RISCV/alu16.ll +++ b/llvm/test/CodeGen/RISCV/alu16.ll @@ -62,6 +62,24 @@ define i16 @sltiu(i16 %a) nounwind { ret i16 %2 } +; Make sure we avoid an AND, if the input of an unsigned compare is known +; to be sign extended. This can occur due to InstCombine canonicalizing +; x s>= 0 && x s< 10 to x u< 10. +define i16 @sltiu_signext(i16 signext %a) nounwind { +; RV32I-LABEL: sltiu_signext: +; RV32I: # %bb.0: +; RV32I-NEXT: sltiu a0, a0, 10 +; RV32I-NEXT: ret +; +; RV64I-LABEL: sltiu_signext: +; RV64I: # %bb.0: +; RV64I-NEXT: sltiu a0, a0, 10 +; RV64I-NEXT: ret + %1 = icmp ult i16 %a, 10 + %2 = zext i1 %1 to i16 + ret i16 %2 +} + define i16 @xori(i16 %a) nounwind { ; RV32I-LABEL: xori: ; RV32I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/alu8.ll b/llvm/test/CodeGen/RISCV/alu8.ll index ed09174745b8..8611e752028d 100644 --- a/llvm/test/CodeGen/RISCV/alu8.ll +++ b/llvm/test/CodeGen/RISCV/alu8.ll @@ -58,6 +58,24 @@ define i8 @sltiu(i8 %a) nounwind { ret i8 %2 } +; Make sure we avoid an AND, if the input of an unsigned compare is known +; to be sign extended. This can occur due to InstCombine canonicalizing +; x s>= 0 && x s< 10 to x u< 10. +define i8 @sltiu_signext(i8 signext %a) nounwind { +; RV32I-LABEL: sltiu_signext: +; RV32I: # %bb.0: +; RV32I-NEXT: sltiu a0, a0, 10 +; RV32I-NEXT: ret +; +; RV64I-LABEL: sltiu_signext: +; RV64I: # %bb.0: +; RV64I-NEXT: sltiu a0, a0, 10 +; RV64I-NEXT: ret + %1 = icmp ult i8 %a, 10 + %2 = zext i1 %1 to i8 + ret i8 %2 +} + define i8 @xori(i8 %a) nounwind { ; RV32I-LABEL: xori: ; RV32I: # %bb.0: From 32c92087502697663d781942a462ea478d4209c7 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 4 Jan 2022 21:47:28 +0100 Subject: [PATCH 570/992] [flang][openacc] Remove unused variable. NFC. --- flang/lib/Lower/OpenACC.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 49bccc1d46c6..f64978d7e826 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -926,7 +926,7 @@ static void genACC(Fortran::lower::AbstractConverter &converter, const auto &accClauseList = std::get(waitConstruct.t); - mlir::Value ifCond, asyncOperand, waitDevnum, async; + mlir::Value ifCond, waitDevnum, async; SmallVector waitOperands; // Async clause have optional values but can be present with From dd72ae3dcc6895f95e1203b40aabcb069c76a0ab Mon Sep 17 00:00:00 2001 From: David Goldman Date: Thu, 30 Dec 2021 12:31:59 -0500 Subject: [PATCH 571/992] [clang][ObjC] Add fix it for missing methods in impl We suggest inserting the method with an empty body at the end of the implementation decl. Differential Revision: https://reviews.llvm.org/D116417 --- clang/lib/Sema/SemaDeclObjC.cpp | 54 ++++++++++--------- .../FixIt/fixit-objc-missing-method-impl.m | 15 ++++++ 2 files changed, 43 insertions(+), 26 deletions(-) create mode 100644 clang/test/FixIt/fixit-objc-missing-method-impl.m diff --git a/clang/lib/Sema/SemaDeclObjC.cpp b/clang/lib/Sema/SemaDeclObjC.cpp index d6e659e17069..d4fefc3d18d8 100644 --- a/clang/lib/Sema/SemaDeclObjC.cpp +++ b/clang/lib/Sema/SemaDeclObjC.cpp @@ -2212,9 +2212,8 @@ void Sema::CheckImplementationIvars(ObjCImplementationDecl *ImpDecl, Diag(IVI->getLocation(), diag::err_inconsistent_ivar_count); } -static void WarnUndefinedMethod(Sema &S, SourceLocation ImpLoc, - ObjCMethodDecl *method, - bool &IncompleteImpl, +static void WarnUndefinedMethod(Sema &S, ObjCImplDecl *Impl, + ObjCMethodDecl *method, bool &IncompleteImpl, unsigned DiagID, NamedDecl *NeededFor = nullptr) { // No point warning no definition of method which is 'unavailable'. @@ -2227,10 +2226,19 @@ static void WarnUndefinedMethod(Sema &S, SourceLocation ImpLoc, // separate warnings. We will give that approach a try, as that // matches what we do with protocols. { - const Sema::SemaDiagnosticBuilder &B = S.Diag(ImpLoc, DiagID); + const Sema::SemaDiagnosticBuilder &B = S.Diag(Impl->getLocation(), DiagID); B << method; if (NeededFor) B << NeededFor; + + // Add an empty definition at the end of the @implementation. + std::string FixItStr; + llvm::raw_string_ostream Out(FixItStr); + method->print(Out, Impl->getASTContext().getPrintingPolicy()); + Out << " {\n}\n\n"; + + SourceLocation Loc = Impl->getAtEndRange().getBegin(); + B << FixItHint::CreateInsertion(Loc, FixItStr); } // Issue a note to the original declaration. @@ -2679,14 +2687,10 @@ static void findProtocolsWithExplicitImpls(const ObjCInterfaceDecl *Super, /// CheckProtocolMethodDefs - This routine checks unimplemented methods /// Declared in protocol, and those referenced by it. -static void CheckProtocolMethodDefs(Sema &S, - SourceLocation ImpLoc, - ObjCProtocolDecl *PDecl, - bool& IncompleteImpl, - const Sema::SelectorSet &InsMap, - const Sema::SelectorSet &ClsMap, - ObjCContainerDecl *CDecl, - LazyProtocolNameSet &ProtocolsExplictImpl) { +static void CheckProtocolMethodDefs( + Sema &S, ObjCImplDecl *Impl, ObjCProtocolDecl *PDecl, bool &IncompleteImpl, + const Sema::SelectorSet &InsMap, const Sema::SelectorSet &ClsMap, + ObjCContainerDecl *CDecl, LazyProtocolNameSet &ProtocolsExplictImpl) { ObjCCategoryDecl *C = dyn_cast(CDecl); ObjCInterfaceDecl *IDecl = C ? C->getClassInterface() : dyn_cast(CDecl); @@ -2773,9 +2777,8 @@ static void CheckProtocolMethodDefs(Sema &S, if (C || MethodInClass->isPropertyAccessor()) continue; unsigned DIAG = diag::warn_unimplemented_protocol_method; - if (!S.Diags.isIgnored(DIAG, ImpLoc)) { - WarnUndefinedMethod(S, ImpLoc, method, IncompleteImpl, DIAG, - PDecl); + if (!S.Diags.isIgnored(DIAG, Impl->getLocation())) { + WarnUndefinedMethod(S, Impl, method, IncompleteImpl, DIAG, PDecl); } } } @@ -2796,15 +2799,15 @@ static void CheckProtocolMethodDefs(Sema &S, continue; unsigned DIAG = diag::warn_unimplemented_protocol_method; - if (!S.Diags.isIgnored(DIAG, ImpLoc)) { - WarnUndefinedMethod(S, ImpLoc, method, IncompleteImpl, DIAG, PDecl); + if (!S.Diags.isIgnored(DIAG, Impl->getLocation())) { + WarnUndefinedMethod(S, Impl, method, IncompleteImpl, DIAG, PDecl); } } } // Check on this protocols's referenced protocols, recursively. for (auto *PI : PDecl->protocols()) - CheckProtocolMethodDefs(S, ImpLoc, PI, IncompleteImpl, InsMap, ClsMap, - CDecl, ProtocolsExplictImpl); + CheckProtocolMethodDefs(S, Impl, PI, IncompleteImpl, InsMap, ClsMap, CDecl, + ProtocolsExplictImpl); } /// MatchAllMethodDeclarations - Check methods declared in interface @@ -2827,7 +2830,7 @@ void Sema::MatchAllMethodDeclarations(const SelectorSet &InsMap, if (!I->isPropertyAccessor() && !InsMap.count(I->getSelector())) { if (ImmediateClass) - WarnUndefinedMethod(*this, IMPDecl->getLocation(), I, IncompleteImpl, + WarnUndefinedMethod(*this, IMPDecl, I, IncompleteImpl, diag::warn_undef_method_impl); continue; } else { @@ -2857,7 +2860,7 @@ void Sema::MatchAllMethodDeclarations(const SelectorSet &InsMap, if (!I->isPropertyAccessor() && !ClsMap.count(I->getSelector())) { if (ImmediateClass) - WarnUndefinedMethod(*this, IMPDecl->getLocation(), I, IncompleteImpl, + WarnUndefinedMethod(*this, IMPDecl, I, IncompleteImpl, diag::warn_undef_method_impl); } else { ObjCMethodDecl *ImpMethodDecl = @@ -3024,16 +3027,15 @@ void Sema::ImplMethodsVsClassMethods(Scope *S, ObjCImplDecl* IMPDecl, if (ObjCInterfaceDecl *I = dyn_cast (CDecl)) { for (auto *PI : I->all_referenced_protocols()) - CheckProtocolMethodDefs(*this, IMPDecl->getLocation(), PI, IncompleteImpl, - InsMap, ClsMap, I, ExplicitImplProtocols); + CheckProtocolMethodDefs(*this, IMPDecl, PI, IncompleteImpl, InsMap, + ClsMap, I, ExplicitImplProtocols); } else if (ObjCCategoryDecl *C = dyn_cast(CDecl)) { // For extended class, unimplemented methods in its protocols will // be reported in the primary class. if (!C->IsClassExtension()) { for (auto *P : C->protocols()) - CheckProtocolMethodDefs(*this, IMPDecl->getLocation(), P, - IncompleteImpl, InsMap, ClsMap, CDecl, - ExplicitImplProtocols); + CheckProtocolMethodDefs(*this, IMPDecl, P, IncompleteImpl, InsMap, + ClsMap, CDecl, ExplicitImplProtocols); DiagnoseUnimplementedProperties(S, IMPDecl, CDecl, /*SynthesizeProperties=*/false); } diff --git a/clang/test/FixIt/fixit-objc-missing-method-impl.m b/clang/test/FixIt/fixit-objc-missing-method-impl.m new file mode 100644 index 000000000000..acc089614a6e --- /dev/null +++ b/clang/test/FixIt/fixit-objc-missing-method-impl.m @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s +// RUN: cp %s %t +// RUN: not %clang_cc1 -pedantic -Werror -fixit -x objective-c %t +// RUN: %clang_cc1 -pedantic -Werror -x objective-c %t + +__attribute__((objc_root_class)) +@interface NSObject +@end + +@interface Foo : NSObject +- (void)fooey; // expected-note{{method 'fooey' declared here}} +@end + +@implementation Foo // expected-warning{{method definition for 'fooey' not found}} +@end From e902ffe6d7560f708b76edaa53d75edcb5d49a3f Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Tue, 4 Jan 2022 12:11:33 -0800 Subject: [PATCH 572/992] [Sema] Fix the assertion in Sema::ActOnDependentMemberExpr 617007240cbfb97c introduced the use of ActOnDependentMemberExpr with variable template specialization. The assertion inside ActOnDependentMemberExpr should be adjusted accordingly. Fixes https://bugs.llvm.org/show_bug.cgi?id=47211 Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D113146 --- clang/lib/Sema/SemaExprMember.cpp | 9 ++++++--- .../SemaCXX/cxx1y-variable-templates_in_class.cpp | 12 ++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp index 83006f9d804a..f67ef030feb7 100644 --- a/clang/lib/Sema/SemaExprMember.cpp +++ b/clang/lib/Sema/SemaExprMember.cpp @@ -504,9 +504,12 @@ Sema::ActOnDependentMemberExpr(Expr *BaseExpr, QualType BaseType, } } - assert(BaseType->isDependentType() || - NameInfo.getName().isDependentName() || - isDependentScopeSpecifier(SS)); + assert(BaseType->isDependentType() || NameInfo.getName().isDependentName() || + isDependentScopeSpecifier(SS) || + (TemplateArgs && llvm::any_of(TemplateArgs->arguments(), + [](const TemplateArgumentLoc &Arg) { + return Arg.getArgument().isDependent(); + }))); // Get the type being accessed in BaseType. If this is an arrow, the BaseExpr // must have pointer type, and the accessed type is the pointee. diff --git a/clang/test/SemaCXX/cxx1y-variable-templates_in_class.cpp b/clang/test/SemaCXX/cxx1y-variable-templates_in_class.cpp index 1a24c6680569..af121a8b75d5 100644 --- a/clang/test/SemaCXX/cxx1y-variable-templates_in_class.cpp +++ b/clang/test/SemaCXX/cxx1y-variable-templates_in_class.cpp @@ -394,6 +394,18 @@ namespace dependent_static_var_template { template static int n; // expected-note {{here}} } int &t = B::template n; // expected-error {{use of variable template 'n' requires template arguments}} + + struct C { + template static T G; + }; + template T C::G = T(6); + + template T F() { + C c; + return c.G; + } + + int cf() { return F(); } } #ifndef PRECXX11 From f6fb7bf636e3257a32a629460d447eea76616384 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Sun, 26 Dec 2021 22:57:46 -0500 Subject: [PATCH 573/992] [libc++] Add an early return for __partial_sort of an empty range. If `__first == __middle`, then `partial_sort` is a no-op; don't bother to iterate all the way from `__middle` to `__end`. Fixes #49431. Differential Revision: https://reviews.llvm.org/D116296 --- libcxx/include/__algorithm/partial_sort.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libcxx/include/__algorithm/partial_sort.h b/libcxx/include/__algorithm/partial_sort.h index 017ac90b6714..40e39e2be974 100644 --- a/libcxx/include/__algorithm/partial_sort.h +++ b/libcxx/include/__algorithm/partial_sort.h @@ -33,6 +33,8 @@ _LIBCPP_CONSTEXPR_AFTER_CXX17 void __partial_sort(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) { + if (__first == __middle) + return; _VSTD::__make_heap<_Compare>(__first, __middle, __comp); typename iterator_traits<_RandomAccessIterator>::difference_type __len = __middle - __first; for (_RandomAccessIterator __i = __middle; __i != __last; ++__i) From e80ef6bd279efa10ae6dc3769787ca11d12de7a9 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Tue, 4 Jan 2022 16:15:02 -0500 Subject: [PATCH 574/992] [libc++] Fix whitespace in __partial_sort. NFC. --- libcxx/include/__algorithm/partial_sort.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libcxx/include/__algorithm/partial_sort.h b/libcxx/include/__algorithm/partial_sort.h index 40e39e2be974..39d1a29dd97b 100644 --- a/libcxx/include/__algorithm/partial_sort.h +++ b/libcxx/include/__algorithm/partial_sort.h @@ -31,7 +31,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template _LIBCPP_CONSTEXPR_AFTER_CXX17 void __partial_sort(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, - _Compare __comp) + _Compare __comp) { if (__first == __middle) return; @@ -66,7 +66,7 @@ void partial_sort(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last) { _VSTD::partial_sort(__first, __middle, __last, - __less::value_type>()); + __less::value_type>()); } _LIBCPP_END_NAMESPACE_STD From d6a68d08f3845ca321d38243bb6595599ae9f93b Mon Sep 17 00:00:00 2001 From: Ikhlas Ajbar Date: Tue, 4 Jan 2022 12:58:04 -0800 Subject: [PATCH 575/992] [Hexagon] Refactor updateLatency() function Co-authored-by: Sumanth Gundapaneni --- llvm/lib/Target/Hexagon/HexagonSubtarget.cpp | 42 ++++++++++---------- llvm/lib/Target/Hexagon/HexagonSubtarget.h | 4 +- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index 047b2176c684..bdd2a2cfc5fa 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -468,10 +468,8 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, int SrcOpIdx, return; } - if (!hasV60Ops()) - return; - - // Set the latency for a copy to zero since we hope that is will get removed. + // Set the latency for a copy to zero since we hope that is will get + // removed. if (DstInst->isCopy()) Dep.setLatency(0); @@ -485,7 +483,7 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, int SrcOpIdx, int DLatency = -1; for (const auto &DDep : Dst->Succs) { MachineInstr *DDst = DDep.getSUnit()->getInstr(); - unsigned UseIdx = -1; + int UseIdx = -1; for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); OpNum++) { const MachineOperand &MO = DDst->getOperand(OpNum); if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == DReg) { @@ -493,6 +491,10 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, int SrcOpIdx, break; } } + + if (UseIdx == -1) + continue; + int Latency = (InstrInfo.getOperandLatency(&InstrItins, *SrcInst, 0, *DDst, UseIdx)); // Set DLatency for the first time. @@ -518,8 +520,10 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, int SrcOpIdx, Dep.setLatency(0); return; } - - updateLatency(*SrcInst, *DstInst, Dep); + int Latency = Dep.getLatency(); + bool IsArtificial = Dep.isArtificial(); + Latency = updateLatency(*SrcInst, *DstInst, IsArtificial, Latency); + Dep.setLatency(Latency); } void HexagonSubtarget::getPostRAMutations( @@ -548,21 +552,19 @@ bool HexagonSubtarget::usePredicatedCalls() const { return EnablePredicatedCalls; } -void HexagonSubtarget::updateLatency(MachineInstr &SrcInst, - MachineInstr &DstInst, SDep &Dep) const { - if (Dep.isArtificial()) { - Dep.setLatency(1); - return; - } - +int HexagonSubtarget::updateLatency(MachineInstr &SrcInst, + MachineInstr &DstInst, bool IsArtificial, + int Latency) const { + if (IsArtificial) + return 1; if (!hasV60Ops()) - return; - - auto &QII = static_cast(*getInstrInfo()); + return Latency; + auto &QII = static_cast(*getInstrInfo()); // BSB scheduling. if (QII.isHVXVec(SrcInst) || useBSBScheduling()) - Dep.setLatency((Dep.getLatency() + 1) >> 1); + Latency = (Latency + 1) >> 1; + return Latency; } void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const { @@ -598,9 +600,9 @@ void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const { // For some instructions (ex: COPY), we might end up with < 0 latency // as they don't have any Itinerary class associated with them. Latency = std::max(Latency, 0); - + bool IsArtificial = I.isArtificial(); + Latency = updateLatency(*SrcI, *DstI, IsArtificial, Latency); I.setLatency(Latency); - updateLatency(*SrcI, *DstI, I); } } diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index e4f375440be1..db682676cf12 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -325,8 +325,8 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { private: // Helper function responsible for increasing the latency only. - void updateLatency(MachineInstr &SrcInst, MachineInstr &DstInst, SDep &Dep) - const; + int updateLatency(MachineInstr &SrcInst, MachineInstr &DstInst, + bool IsArtificial, int Latency) const; void restoreLatency(SUnit *Src, SUnit *Dst) const; void changeLatency(SUnit *Src, SUnit *Dst, unsigned Lat) const; bool isBestZeroLatency(SUnit *Src, SUnit *Dst, const HexagonInstrInfo *TII, From 502e5df0e08e0ea625b528e45fc92257273b6d89 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Wed, 22 Dec 2021 22:36:42 -0500 Subject: [PATCH 576/992] [libc++] Implement `ranges::{cbegin,cend}` per the spec. The big change here is that they now work as intended for rvalues, e.g. `ranges::cbegin(std::string_view("hello"))`. Also, add tests verifying their return types. Differential Revision: https://reviews.llvm.org/D116199 --- libcxx/include/__ranges/access.h | 46 ++++++++-------- .../std/ranges/range.access/begin.pass.cpp | 54 ++++++++++++++----- .../test/std/ranges/range.access/end.pass.cpp | 44 ++++++++++++++- 3 files changed, 106 insertions(+), 38 deletions(-) diff --git a/libcxx/include/__ranges/access.h b/libcxx/include/__ranges/access.h index 91dc3055c86d..4a1242130ac0 100644 --- a/libcxx/include/__ranges/access.h +++ b/libcxx/include/__ranges/access.h @@ -160,20 +160,19 @@ namespace ranges { namespace __cbegin { struct __fn { template - requires invocable - [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp& __t) const - noexcept(noexcept(ranges::begin(_VSTD::as_const(__t)))) - { - return ranges::begin(_VSTD::as_const(__t)); - } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI + constexpr auto operator()(_Tp& __t) const + noexcept(noexcept(ranges::begin(static_cast(__t)))) + -> decltype( ranges::begin(static_cast(__t))) + { return ranges::begin(static_cast(__t)); } template - requires is_rvalue_reference_v<_Tp> && invocable - [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(ranges::begin(static_cast<_Tp const&&>(__t)))) - { - return ranges::begin(static_cast<_Tp const&&>(__t)); - } + requires is_rvalue_reference_v<_Tp&&> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI + constexpr auto operator()(_Tp&& __t) const + noexcept(noexcept(ranges::begin(static_cast(__t)))) + -> decltype( ranges::begin(static_cast(__t))) + { return ranges::begin(static_cast(__t)); } }; } @@ -188,20 +187,19 @@ namespace ranges { namespace __cend { struct __fn { template - requires invocable - [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp& __t) const - noexcept(noexcept(ranges::end(_VSTD::as_const(__t)))) - { - return ranges::end(_VSTD::as_const(__t)); - } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI + constexpr auto operator()(_Tp& __t) const + noexcept(noexcept(ranges::end(static_cast(__t)))) + -> decltype( ranges::end(static_cast(__t))) + { return ranges::end(static_cast(__t)); } template - requires is_rvalue_reference_v<_Tp> && invocable - [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(ranges::end(static_cast<_Tp const&&>(__t)))) - { - return ranges::end(static_cast<_Tp const&&>(__t)); - } + requires is_rvalue_reference_v<_Tp&&> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI + constexpr auto operator()(_Tp&& __t) const + noexcept(noexcept(ranges::end(static_cast(__t)))) + -> decltype( ranges::end(static_cast(__t))) + { return ranges::end(static_cast(__t)); } }; } diff --git a/libcxx/test/std/ranges/range.access/begin.pass.cpp b/libcxx/test/std/ranges/range.access/begin.pass.cpp index 95d2196803b2..1a6951967f88 100644 --- a/libcxx/test/std/ranges/range.access/begin.pass.cpp +++ b/libcxx/test/std/ranges/range.access/begin.pass.cpp @@ -11,6 +11,7 @@ // UNSUPPORTED: libcpp-has-no-incomplete-ranges // std::ranges::begin +// std::ranges::cbegin #include @@ -18,8 +19,8 @@ #include "test_macros.h" #include "test_iterators.h" -using RangeBeginT = decltype(std::ranges::begin)&; -using RangeCBeginT = decltype(std::ranges::cbegin)&; +using RangeBeginT = decltype(std::ranges::begin); +using RangeCBeginT = decltype(std::ranges::cbegin); static int globalBuff[8]; @@ -49,6 +50,28 @@ static_assert(!std::is_invocable_v); static_assert( std::is_invocable_v); static_assert( std::is_invocable_v); +constexpr bool testReturnTypes() { + { + int *x[2]; + ASSERT_SAME_TYPE(decltype(std::ranges::begin(x)), int**); + ASSERT_SAME_TYPE(decltype(std::ranges::cbegin(x)), int* const*); + } + { + int x[2][2]; + ASSERT_SAME_TYPE(decltype(std::ranges::begin(x)), int(*)[2]); + ASSERT_SAME_TYPE(decltype(std::ranges::cbegin(x)), const int(*)[2]); + } + { + struct Different { + char*& begin(); + short*& begin() const; + } x; + ASSERT_SAME_TYPE(decltype(std::ranges::begin(x)), char*); + ASSERT_SAME_TYPE(decltype(std::ranges::cbegin(x)), short*); + } + return true; +} + constexpr bool testArray() { int a[2]; assert(std::ranges::begin(a) == a); @@ -118,12 +141,18 @@ constexpr bool testBeginMember() { BeginMember a; assert(std::ranges::begin(a) == &a.x); assert(std::ranges::cbegin(a) == &a.x); + static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); NonConstBeginMember b; assert(std::ranges::begin(b) == &b.x); + static_assert(!std::is_invocable_v); EnabledBorrowingBeginMember c; + assert(std::ranges::begin(c) == &globalBuff[0]); + assert(std::ranges::cbegin(c) == &globalBuff[0]); assert(std::ranges::begin(std::move(c)) == &globalBuff[0]); + assert(std::ranges::cbegin(std::move(c)) == &globalBuff[0]); BeginMemberFunction d; assert(std::ranges::begin(d) == &d.x); @@ -202,44 +231,44 @@ static_assert(!std::is_invocable_v); - assert(std::ranges::begin(aa) == &aa.x); + static_assert(!std::invocable); assert(std::ranges::cbegin(a) == &a.x); + assert(std::ranges::begin(aa) == &aa.x); assert(std::ranges::cbegin(aa) == &aa.x); BeginFunctionByValue b{}; const BeginFunctionByValue bb{}; assert(std::ranges::begin(b) == &globalBuff[1]); - assert(std::ranges::begin(bb) == &globalBuff[1]); assert(std::ranges::cbegin(b) == &globalBuff[1]); + assert(std::ranges::begin(bb) == &globalBuff[1]); assert(std::ranges::cbegin(bb) == &globalBuff[1]); BeginFunctionEnabledBorrowing c{}; const BeginFunctionEnabledBorrowing cc{}; assert(std::ranges::begin(std::move(c)) == &globalBuff[2]); - static_assert(!std::invocable); + assert(std::ranges::cbegin(std::move(c)) == &globalBuff[2]); assert(std::ranges::begin(std::move(cc)) == &globalBuff[2]); assert(std::ranges::cbegin(std::move(cc)) == &globalBuff[2]); BeginFunctionReturnsEmptyPtr d{}; const BeginFunctionReturnsEmptyPtr dd{}; - static_assert(!std::invocable); - assert(std::ranges::begin(dd) == &dd.x); + static_assert(!std::invocable); assert(std::ranges::cbegin(d) == &d.x); + assert(std::ranges::begin(dd) == &dd.x); assert(std::ranges::cbegin(dd) == &dd.x); BeginFunctionWithDataMember e{}; const BeginFunctionWithDataMember ee{}; - static_assert(!std::invocable); + static_assert(!std::invocable); assert(std::ranges::begin(ee) == &ee.x); assert(std::ranges::cbegin(e) == &e.x); assert(std::ranges::cbegin(ee) == &ee.x); BeginFunctionWithPrivateBeginMember f{}; const BeginFunctionWithPrivateBeginMember ff{}; - static_assert(!std::invocable); - assert(std::ranges::begin(ff) == &ff.y); + static_assert(!std::invocable); assert(std::ranges::cbegin(f) == &f.y); + assert(std::ranges::begin(ff) == &ff.y); assert(std::ranges::cbegin(ff) == &ff.y); return true; @@ -274,8 +303,9 @@ struct BeginReturnsArrayRef { static_assert(noexcept(std::ranges::begin(brar))); static_assert(noexcept(std::ranges::cbegin(brar))); - int main(int, char**) { + static_assert(testReturnTypes()); + testArray(); static_assert(testArray()); diff --git a/libcxx/test/std/ranges/range.access/end.pass.cpp b/libcxx/test/std/ranges/range.access/end.pass.cpp index 84b4904d8f96..27eaf741a113 100644 --- a/libcxx/test/std/ranges/range.access/end.pass.cpp +++ b/libcxx/test/std/ranges/range.access/end.pass.cpp @@ -11,6 +11,7 @@ // UNSUPPORTED: libcpp-has-no-incomplete-ranges // std::ranges::end +// std::ranges::cend #include @@ -18,8 +19,8 @@ #include "test_macros.h" #include "test_iterators.h" -using RangeEndT = decltype(std::ranges::end)&; -using RangeCEndT = decltype(std::ranges::cend)&; +using RangeEndT = decltype(std::ranges::end); +using RangeCEndT = decltype(std::ranges::cend); static int globalBuff[8]; @@ -47,6 +48,30 @@ static_assert(!std::is_invocable_v); static_assert( std::is_invocable_v); static_assert( std::is_invocable_v); +constexpr bool testReturnTypes() { + { + int *x[2]; + ASSERT_SAME_TYPE(decltype(std::ranges::end(x)), int**); + ASSERT_SAME_TYPE(decltype(std::ranges::cend(x)), int* const*); + } + { + int x[2][2]; + ASSERT_SAME_TYPE(decltype(std::ranges::end(x)), int(*)[2]); + ASSERT_SAME_TYPE(decltype(std::ranges::cend(x)), const int(*)[2]); + } + { + struct Different { + char *begin(); + sentinel_wrapper& end(); + short *begin() const; + sentinel_wrapper& end() const; + } x; + ASSERT_SAME_TYPE(decltype(std::ranges::end(x)), sentinel_wrapper); + ASSERT_SAME_TYPE(decltype(std::ranges::cend(x)), sentinel_wrapper); + } + return true; +} + constexpr bool testArray() { int a[2]; assert(std::ranges::end(a) == a + 2); @@ -139,9 +164,11 @@ constexpr bool testEndMember() { NonConstEndMember b; assert(std::ranges::end(b) == &b.x); + static_assert(!std::is_invocable_v); EnabledBorrowingEndMember c; assert(std::ranges::end(std::move(c)) == &globalBuff[0]); + assert(std::ranges::cend(std::move(c)) == &globalBuff[0]); EndMemberFunction d; assert(std::ranges::end(d) == &d.x); @@ -246,7 +273,9 @@ struct BeginMemberEndFunction { constexpr bool testEndFunction() { const EndFunction a{}; assert(std::ranges::end(a) == &a.x); + assert(std::ranges::cend(a) == &a.x); EndFunction aa{}; + static_assert(!std::is_invocable_v); assert(std::ranges::cend(aa) == &aa.x); EndFunctionByValue b; @@ -255,25 +284,34 @@ constexpr bool testEndFunction() { EndFunctionEnabledBorrowing c; assert(std::ranges::end(std::move(c)) == &globalBuff[2]); + assert(std::ranges::cend(std::move(c)) == &globalBuff[2]); const EndFunctionReturnsEmptyPtr d{}; assert(std::ranges::end(d) == &d.x); + assert(std::ranges::cend(d) == &d.x); EndFunctionReturnsEmptyPtr dd{}; + static_assert(!std::is_invocable_v); assert(std::ranges::cend(dd) == &dd.x); const EndFunctionWithDataMember e{}; assert(std::ranges::end(e) == &e.x); + assert(std::ranges::cend(e) == &e.x); EndFunctionWithDataMember ee{}; + static_assert(!std::is_invocable_v); assert(std::ranges::cend(ee) == &ee.x); const EndFunctionWithPrivateEndMember f{}; assert(std::ranges::end(f) == &f.y); + assert(std::ranges::cend(f) == &f.y); EndFunctionWithPrivateEndMember ff{}; + static_assert(!std::is_invocable_v); assert(std::ranges::cend(ff) == &ff.y); const BeginMemberEndFunction g{}; assert(std::ranges::end(g) == &g.x); + assert(std::ranges::cend(g) == &g.x); BeginMemberEndFunction gg{}; + static_assert(!std::is_invocable_v); assert(std::ranges::cend(gg) == &gg.x); return true; @@ -313,6 +351,8 @@ static_assert(noexcept(std::ranges::end(erar))); static_assert(noexcept(std::ranges::cend(erar))); int main(int, char**) { + static_assert(testReturnTypes()); + testArray(); static_assert(testArray()); From cff1a2ed5159bc16e36c1515518239e18d05f2ff Mon Sep 17 00:00:00 2001 From: SANTANU DAS Date: Tue, 3 Aug 2021 21:27:56 +0530 Subject: [PATCH 577/992] [Hexagon] HVX .new store uses different resources When checking resources in the post RA scheduler, see if a .new vector store should be used instead of a regular vector store. It may not be possible to schedule a regular vector store, but it may be possible to schedule a .new version. If the correct one isn't used, then the post RA scheduler may not generate the best schedule. --- .../Hexagon/HexagonHazardRecognizer.cpp | 27 +++++++++++-------- .../Target/Hexagon/HexagonHazardRecognizer.h | 4 +++ 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp b/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp index 44679d429de5..e2215c9900d0 100644 --- a/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp @@ -44,12 +44,7 @@ HexagonHazardRecognizer::getHazardType(SUnit *SU, int stalls) { if (!Resources->canReserveResources(*MI)) { LLVM_DEBUG(dbgs() << "*** Hazard in cycle " << PacketNum << ", " << *MI); HazardType RetVal = Hazard; - if (TII->mayBeNewStore(*MI)) { - // Make sure the register to be stored is defined by an instruction in the - // packet. - MachineOperand &MO = MI->getOperand(MI->getNumOperands() - 1); - if (!MO.isReg() || RegDefs.count(MO.getReg()) == 0) - return Hazard; + if (isNewStore(*MI)) { // The .new store version uses different resources so check if it // causes a hazard. MachineFunction *MF = MI->getParent()->getParent(); @@ -105,6 +100,15 @@ bool HexagonHazardRecognizer::ShouldPreferAnother(SUnit *SU) { return UsesDotCur && ((SU == UsesDotCur) ^ (DotCurPNum == (int)PacketNum)); } +/// Return true if the instruction would be converted to a new value store when +/// packetized. +bool HexagonHazardRecognizer::isNewStore(MachineInstr &MI) { + if (!TII->mayBeNewStore(MI)) + return false; + MachineOperand &MO = MI.getOperand(MI.getNumOperands() - 1); + return (MO.isReg() && RegDefs.count(MO.getReg()) != 0); +} + void HexagonHazardRecognizer::EmitInstruction(SUnit *SU) { MachineInstr *MI = SU->getInstr(); if (!MI) @@ -119,7 +123,7 @@ void HexagonHazardRecognizer::EmitInstruction(SUnit *SU) { if (TII->isZeroCost(MI->getOpcode())) return; - if (!Resources->canReserveResources(*MI)) { + if (!Resources->canReserveResources(*MI) || isNewStore(*MI)) { // It must be a .new store since other instructions must be able to be // reserved at this point. assert(TII->mayBeNewStore(*MI) && "Expecting .new store"); @@ -127,11 +131,12 @@ void HexagonHazardRecognizer::EmitInstruction(SUnit *SU) { MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(TII->getDotNewOp(*MI)), MI->getDebugLoc()); - assert(Resources->canReserveResources(*NewMI)); - Resources->reserveResources(*NewMI); + if (Resources->canReserveResources(*NewMI)) + Resources->reserveResources(*NewMI); + else + Resources->reserveResources(*MI); MF->deleteMachineInstr(NewMI); - } - else + } else Resources->reserveResources(*MI); LLVM_DEBUG(dbgs() << " Add instruction " << *MI); diff --git a/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h b/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h index 53b9cb43b4b6..0528cbd1f15f 100644 --- a/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h +++ b/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h @@ -40,6 +40,10 @@ class HexagonHazardRecognizer : public ScheduleHazardRecognizer { // The set of registers defined by instructions in the current packet. SmallSet RegDefs; + // Return true if the instruction is a store that is converted to a new value + // store because its value is defined in the same packet. + bool isNewStore(MachineInstr &MI); + public: HexagonHazardRecognizer(const InstrItineraryData *II, const HexagonInstrInfo *HII, From d5b6e30ed3acad794dd0aec400e617daffc6cc3d Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Thu, 2 Dec 2021 14:58:23 -0800 Subject: [PATCH 578/992] [LLDB][Clang] add AccessSpecDecl for methods and fields in RecordType This allows access type be printed when running `lldb-test -dump-ast` and `lldb-test -dump-clang-ast`. Differential Revision: https://reviews.llvm.org/D115062 --- .../TypeSystem/Clang/TypeSystemClang.cpp | 69 +++++++++++++++++-- .../TypeSystem/Clang/TypeSystemClang.h | 11 +++ .../Shell/SymbolFile/NativePDB/tag-types.cpp | 29 +++++++- 3 files changed, 100 insertions(+), 9 deletions(-) diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 88c3aedb4c6b..f8f0689ee2ac 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -1345,7 +1345,30 @@ namespace { bool IsValueParam(const clang::TemplateArgument &argument) { return argument.getKind() == TemplateArgument::Integral; } + +void AddAccessSpecifierDecl(clang::CXXRecordDecl *cxx_record_decl, + ASTContext &ct, + clang::AccessSpecifier previous_access, + clang::AccessSpecifier access_specifier) { + if (!cxx_record_decl->isClass() && !cxx_record_decl->isStruct()) + return; + if (previous_access != access_specifier) { + // For struct, don't add AS_public if it's the first AccessSpecDecl. + // For class, don't add AS_private if it's the first AccessSpecDecl. + if ((cxx_record_decl->isStruct() && + previous_access == clang::AccessSpecifier::AS_none && + access_specifier == clang::AccessSpecifier::AS_public) || + (cxx_record_decl->isClass() && + previous_access == clang::AccessSpecifier::AS_none && + access_specifier == clang::AccessSpecifier::AS_private)) { + return; + } + cxx_record_decl->addDecl( + AccessSpecDecl::Create(ct, access_specifier, cxx_record_decl, + SourceLocation(), SourceLocation())); + } } +} // namespace static TemplateParameterList *CreateTemplateParameterList( ASTContext &ast, @@ -2552,6 +2575,22 @@ ClangASTMetadata *TypeSystemClang::GetMetadata(const clang::Type *object) { return nullptr; } +void TypeSystemClang::SetCXXRecordDeclAccess(const clang::CXXRecordDecl *object, + clang::AccessSpecifier access) { + if (access == clang::AccessSpecifier::AS_none) + m_cxx_record_decl_access.erase(object); + else + m_cxx_record_decl_access[object] = access; +} + +clang::AccessSpecifier +TypeSystemClang::GetCXXRecordDeclAccess(const clang::CXXRecordDecl *object) { + auto It = m_cxx_record_decl_access.find(object); + if (It != m_cxx_record_decl_access.end()) + return It->second; + return clang::AccessSpecifier::AS_none; +} + clang::DeclContext * TypeSystemClang::GetDeclContextForType(const CompilerType &type) { return GetDeclContextForType(ClangUtil::GetQualType(type)); @@ -7276,9 +7315,17 @@ clang::FieldDecl *TypeSystemClang::AddFieldToRecordType( } if (field) { - field->setAccess( - TypeSystemClang::ConvertAccessTypeToAccessSpecifier(access)); - + clang::AccessSpecifier access_specifier = + TypeSystemClang::ConvertAccessTypeToAccessSpecifier(access); + field->setAccess(access_specifier); + + if (clang::CXXRecordDecl *cxx_record_decl = + llvm::dyn_cast(record_decl)) { + AddAccessSpecifierDecl(cxx_record_decl, ast->getASTContext(), + ast->GetCXXRecordDeclAccess(cxx_record_decl), + access_specifier); + ast->SetCXXRecordDeclAccess(cxx_record_decl, access_specifier); + } record_decl->addDecl(field); VerifyDecl(field); @@ -7657,6 +7704,11 @@ clang::CXXMethodDecl *TypeSystemClang::AddMethodToCXXRecordType( cxx_method_decl->setParams(llvm::ArrayRef(params)); + AddAccessSpecifierDecl(cxx_record_decl, getASTContext(), + GetCXXRecordDeclAccess(cxx_record_decl), + access_specifier); + SetCXXRecordDeclAccess(cxx_record_decl, access_specifier); + cxx_record_decl->addDecl(cxx_method_decl); // Sometimes the debug info will mention a constructor (default/copy/move), @@ -8190,6 +8242,11 @@ bool TypeSystemClang::CompleteTagDeclarationDefinition( if (qual_type.isNull()) return false; + TypeSystemClang *lldb_ast = + llvm::dyn_cast(type.GetTypeSystem()); + if (lldb_ast == nullptr) + return false; + // Make sure we use the same methodology as // TypeSystemClang::StartTagDeclarationDefinition() as to how we start/end // the definition. @@ -8220,6 +8277,8 @@ bool TypeSystemClang::CompleteTagDeclarationDefinition( cxx_record_decl->setHasLoadedFieldsFromExternalStorage(true); cxx_record_decl->setHasExternalLexicalStorage(false); cxx_record_decl->setHasExternalVisibleStorage(false); + lldb_ast->SetCXXRecordDeclAccess(cxx_record_decl, + clang::AccessSpecifier::AS_none); return true; } } @@ -8233,10 +8292,6 @@ bool TypeSystemClang::CompleteTagDeclarationDefinition( if (enum_decl->isCompleteDefinition()) return true; - TypeSystemClang *lldb_ast = - llvm::dyn_cast(type.GetTypeSystem()); - if (lldb_ast == nullptr) - return false; clang::ASTContext &ast = lldb_ast->getASTContext(); /// TODO This really needs to be fixed. diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h index f3a07397ec44..e0f5906778a1 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h @@ -196,6 +196,11 @@ class TypeSystemClang : public TypeSystem { ClangASTMetadata *GetMetadata(const clang::Decl *object); ClangASTMetadata *GetMetadata(const clang::Type *object); + void SetCXXRecordDeclAccess(const clang::CXXRecordDecl *object, + clang::AccessSpecifier access); + clang::AccessSpecifier + GetCXXRecordDeclAccess(const clang::CXXRecordDecl *object); + // Basic Types CompilerType GetBuiltinTypeForEncodingAndBitSize(lldb::Encoding encoding, size_t bit_size) override; @@ -1080,6 +1085,12 @@ class TypeSystemClang : public TypeSystem { /// Maps Types to their associated ClangASTMetadata. TypeMetadataMap m_type_metadata; + typedef llvm::DenseMap + CXXRecordDeclAccessMap; + /// Maps CXXRecordDecl to their most recent added method/field's + /// AccessSpecifier. + CXXRecordDeclAccessMap m_cxx_record_decl_access; + /// The sema associated that is currently used to build this ASTContext. /// May be null if we are already done parsing this ASTContext or the /// ASTContext wasn't created by parsing source code. diff --git a/lldb/test/Shell/SymbolFile/NativePDB/tag-types.cpp b/lldb/test/Shell/SymbolFile/NativePDB/tag-types.cpp index df00f15c1a03..2073cecfc670 100644 --- a/lldb/test/Shell/SymbolFile/NativePDB/tag-types.cpp +++ b/lldb/test/Shell/SymbolFile/NativePDB/tag-types.cpp @@ -11,16 +11,23 @@ struct Struct { // Test builtin types, which are represented by special CodeView type indices. bool B; +private: char C; +public: signed char SC; +protected: unsigned char UC; char16_t C16; char32_t C32; +protected: wchar_t WC; short S; unsigned short US; +public: int I; +private: unsigned int UI; +public: long L; unsigned long UL; long long LL; @@ -32,15 +39,20 @@ struct Struct { // Test class class Class { -public: // Test pointers to builtin types, which are represented by different special // CodeView type indices. bool *PB; +public: char *PC; +private: signed char *PSC; +protected: unsigned char *PUC; +private: char16_t *PC16; +public: char32_t *PC32; +private: wchar_t *PWC; short *PS; unsigned short *PUS; @@ -155,16 +167,22 @@ int main(int argc, char **argv) { // CHECK-NEXT: (lldb) type lookup -- Struct // CHECK-NEXT: struct Struct { // CHECK-NEXT: bool B; +// CHECK-NEXT: private: // CHECK-NEXT: char C; +// CHECK-NEXT: public: // CHECK-NEXT: signed char SC; +// CHECK-NEXT: protected: // CHECK-NEXT: unsigned char UC; // CHECK-NEXT: char16_t C16; // CHECK-NEXT: char32_t C32; // CHECK-NEXT: wchar_t WC; // CHECK-NEXT: short S; // CHECK-NEXT: unsigned short US; +// CHECK-NEXT: public: // CHECK-NEXT: int I; +// CHECK-NEXT: private: // CHECK-NEXT: unsigned int UI; +// CHECK-NEXT: public: // CHECK-NEXT: long L; // CHECK-NEXT: unsigned long UL; // CHECK-NEXT: long long LL; @@ -176,11 +194,17 @@ int main(int argc, char **argv) { // CHECK-NEXT: (lldb) type lookup -- Class // CHECK-NEXT: class Class { // CHECK-NEXT: bool *PB; +// CHECK-NEXT: public: // CHECK-NEXT: char *PC; +// CHECK-NEXT: private: // CHECK-NEXT: signed char *PSC; +// CHECK-NEXT: protected: // CHECK-NEXT: unsigned char *PUC; +// CHECK-NEXT: private: // CHECK-NEXT: char16_t *PC16; +// CHECK-NEXT: public: // CHECK-NEXT: char32_t *PC32; +// CHECK-NEXT: private: // CHECK-NEXT: wchar_t *PWC; // CHECK-NEXT: short *PS; // CHECK-NEXT: unsigned short *PUS; @@ -217,7 +241,8 @@ int main(int argc, char **argv) { // CHECK-NEXT: } // CHECK-NEXT: (lldb) type lookup -- Derived // CHECK-NEXT: class Derived : public Class { -// CHECK: Derived &Reference; +// CHECK-NEXT: public: +// CHECK-NEXT: Derived &Reference; // CHECK-NEXT: OneMember Member; // CHECK-NEXT: const OneMember ConstMember; // CHECK-NEXT: volatile OneMember VolatileMember; From 2b1c6df5a60ab7846974676586b0e3801e919772 Mon Sep 17 00:00:00 2001 From: Harsha Jagasia Date: Fri, 2 Oct 2020 11:26:45 -0500 Subject: [PATCH 579/992] [Hexagon] Performance regression with b2b For code below: { r7 = addasl(r3,r0,#2) r8 = addasl(r3,r2,#2) r5 = memw(r3+r0<<#2) r6 = memw(r3+r2<<#2) } { p1 = cmp.gtu(r6,r5) if (p1.new) memw(r8+#0) = r5 if (p1.new) memw(r7+#0) = r6 } { r0 = mux(p1,r2,r4) } In packetizer, a new packet is created for the cmp instruction since there arent enough resources in previous packet. Also it is determined that the cmp stalls by 2 cycles since it depends on the prior load of r5. In current packetizer implementation, the predicated store is evaluated for whether it can go in the same packet as compare, and since the compare stalls, the stall of the predicated store does not matter and it can go in the same packet as the cmp. However the predicated store will stall for more cycles because of its dependence on the addasl instruction and to avoid that stall we can put it in a new packet. Improve the packetizer to check if an instruction being added to packet will stall longer than instruction already in packet and if so create a new packet. --- .../Target/Hexagon/HexagonVLIWPacketizer.cpp | 32 ++++++---- .../Target/Hexagon/HexagonVLIWPacketizer.h | 4 ++ llvm/test/CodeGen/Hexagon/nbench1.ll | 64 +++++++++++++++++++ 3 files changed, 89 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/nbench1.ll diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 85ec0cdcd8f0..0f736a189245 100644 --- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -1696,9 +1696,12 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) { MachineBasicBlock::iterator MII = MI.getIterator(); MachineBasicBlock *MBB = MI.getParent(); - if (CurrentPacketMIs.empty()) + if (CurrentPacketMIs.empty()) { PacketStalls = false; + PacketStallCycles = 0; + } PacketStalls |= producesStall(MI); + PacketStallCycles = std::max(PacketStallCycles, calcStall(MI)); if (MI.isImplicitDef()) { // Add to the packet to allow subsequent instructions to be checked @@ -1878,12 +1881,7 @@ bool HexagonPacketizerList::isPureSlot0InsnWithNoSlot1Store( } // V60 forward scheduling. -bool HexagonPacketizerList::producesStall(const MachineInstr &I) { - // If the packet already stalls, then ignore the stall from a subsequent - // instruction in the same packet. - if (PacketStalls) - return false; - +unsigned int HexagonPacketizerList::calcStall(const MachineInstr &I) { // Check whether the previous packet is in a different loop. If this is the // case, there is little point in trying to avoid a stall because that would // favor the rare case (loop entry) over the common case (loop iteration). @@ -1895,10 +1893,12 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) { auto *OldBB = OldPacketMIs.front()->getParent(); auto *ThisBB = I.getParent(); if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB)) - return false; + return 0; } SUnit *SUI = MIToSUnit[const_cast(&I)]; + if (!SUI) + return 0; // If the latency is 0 and there is a data dependence between this // instruction and any instruction in the current packet, we disregard any @@ -1927,7 +1927,7 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) { if (Pred.getSUnit() == SUJ) if ((Pred.getLatency() == 0 && Pred.isAssignedRegDep()) || HII->isNewValueJump(I) || HII->isToBeScheduledASAP(*J, I)) - return false; + return 0; } // Check if the latency is greater than one between this instruction and any @@ -1936,10 +1936,20 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) { SUnit *SUJ = MIToSUnit[J]; for (auto &Pred : SUI->Preds) if (Pred.getSUnit() == SUJ && Pred.getLatency() > 1) - return true; + return Pred.getLatency(); } - return false; + return 0; +} + +bool HexagonPacketizerList::producesStall(const MachineInstr &I) { + unsigned int Latency = calcStall(I); + if (Latency == 0) + return false; + // Ignore stall unless it stalls more than previous instruction in packet + if (PacketStalls) + return Latency > PacketStallCycles; + return true; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h index 27a47220570a..5d1b6d6faa12 100644 --- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h +++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h @@ -56,6 +56,9 @@ class HexagonPacketizerList : public VLIWPacketizerList { // Set to true if the packet contains an instruction that stalls with an // instruction from the previous packet. bool PacketStalls = false; + // Set to the number of cycles of stall a given instruction will incur + // because of dependence on instruction in previous packet. + unsigned int PacketStallCycles = 0; // Set to true if the packet has a duplex pair of sub-instructions. bool PacketHasDuplex = false; @@ -157,6 +160,7 @@ class HexagonPacketizerList : public VLIWPacketizerList { bool hasDualStoreDependence(const MachineInstr &I, const MachineInstr &J); bool producesStall(const MachineInstr &MI); bool isPureSlot0InsnWithNoSlot1Store(const MachineInstr &MI); + unsigned int calcStall(const MachineInstr &MI); }; } // end namespace llvm diff --git a/llvm/test/CodeGen/Hexagon/nbench1.ll b/llvm/test/CodeGen/Hexagon/nbench1.ll new file mode 100644 index 000000000000..8300a9ab89ca --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/nbench1.ll @@ -0,0 +1,64 @@ +; RUN: llc -march=hexagon -O3 < %s | FileCheck %s + +; if instruction being considered for addition to packet has higher latency, +; end existing packet and start a new one. + +; CHECK: .LBB0_4: +; CHECK: p{{[0-3]+}} = cmp.gtu(r{{[0-9]+}},r{{[0-9]+}}) +; CHECK-NEXT: } + +@array = external dso_local local_unnamed_addr global i32*, align 4 + +; Function Attrs: nofree norecurse nounwind +define dso_local void @NumSift(i32 %i, i32 %j) local_unnamed_addr #0 { +entry: + %add36 = shl i32 %i, 1 + %cmp.not37 = icmp ugt i32 %add36, %j + br i1 %cmp.not37, label %while.end, label %while.body.lr.ph + +while.body.lr.ph: ; preds = %entry + %0 = load i32*, i32** @array, align 4 + %add16 = add i32 %j, 1 + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %if.end17 + %add39 = phi i32 [ %add36, %while.body.lr.ph ], [ %add, %if.end17 ] + %i.addr.038 = phi i32 [ %i, %while.body.lr.ph ], [ %i.addr.1, %if.end17 ] + %cmp2 = icmp ult i32 %add39, %j + br i1 %cmp2, label %if.then, label %if.end7 + +if.then: ; preds = %while.body + %arrayidx = getelementptr inbounds i32, i32* %0, i32 %add39 + %1 = load i32, i32* %arrayidx, align 4 + %add3 = or i32 %add39, 1 + %arrayidx4 = getelementptr inbounds i32, i32* %0, i32 %add3 + %2 = load i32, i32* %arrayidx4, align 4 + %cmp5 = icmp ult i32 %1, %2 + %spec.select = select i1 %cmp5, i32 %add3, i32 %add39 + br label %if.end7 + +if.end7: ; preds = %if.then, %while.body + %k.0 = phi i32 [ %add39, %while.body ], [ %spec.select, %if.then ] + %arrayidx8 = getelementptr inbounds i32, i32* %0, i32 %i.addr.038 + %3 = load i32, i32* %arrayidx8, align 4 + %arrayidx9 = getelementptr inbounds i32, i32* %0, i32 %k.0 + %4 = load i32, i32* %arrayidx9, align 4 + %cmp10 = icmp ult i32 %3, %4 + br i1 %cmp10, label %if.then11, label %if.end17 + +if.then11: ; preds = %if.end7 + store i32 %3, i32* %arrayidx9, align 4 + store i32 %4, i32* %arrayidx8, align 4 + br label %if.end17 + +if.end17: ; preds = %if.end7, %if.then11 + %i.addr.1 = phi i32 [ %k.0, %if.then11 ], [ %add16, %if.end7 ] + %add = shl i32 %i.addr.1, 1 + %cmp.not = icmp ugt i32 %add, %j + br i1 %cmp.not, label %while.end, label %while.body + +while.end: ; preds = %if.end17, %entry + ret void +} + +attributes #0 = { "target-cpu"="hexagonv65" } From 1716c36d84030358a025558d16e6d484b40c14cc Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Thu, 23 Dec 2021 16:39:22 -0500 Subject: [PATCH 580/992] [libc++] [test] More test coverage for ranges::{data,size}. Reviewed as part of D116239. --- .../std/ranges/range.access/data.pass.cpp | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/libcxx/test/std/ranges/range.access/data.pass.cpp b/libcxx/test/std/ranges/range.access/data.pass.cpp index 4e03dc4d8bd7..40d2d3ab8eca 100644 --- a/libcxx/test/std/ranges/range.access/data.pass.cpp +++ b/libcxx/test/std/ranges/range.access/data.pass.cpp @@ -15,6 +15,7 @@ #include #include +#include #include "test_macros.h" #include "test_iterators.h" @@ -116,9 +117,10 @@ struct BeginMemberRandomAccess { random_access_iterator begin() const; }; -static_assert(!std::is_invocable_v); -static_assert(!std::is_invocable_v); -static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); struct BeginFriendContiguousIterator { int buff[8]; @@ -135,9 +137,10 @@ static_assert(!std::is_invocable_v begin(const BeginFriendRandomAccess iter); }; -static_assert(!std::is_invocable_v); -static_assert(!std::is_invocable_v); -static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); struct BeginMemberRvalue { int buff[8]; @@ -173,6 +176,12 @@ constexpr bool testViaRangesBegin() { return true; } +struct RandomButNotContiguous { + random_access_iterator begin() const; + random_access_iterator end() const; +}; +static_assert(!std::is_invocable_v); + int main(int, char**) { testDataMember(); static_assert(testDataMember()); From 8507383631f2ce2254e35bb81e03319ede056ed1 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Thu, 23 Dec 2021 16:53:48 -0500 Subject: [PATCH 581/992] [libc++] [ranges] ADL-proof the [range.access] CPOs. For example, `std::ranges::range*>` should be well-formed false, not a hard error at compile time. Differential Revision: https://reviews.llvm.org/D116239 --- libcxx/include/__concepts/class_or_enum.h | 4 ++++ libcxx/include/__ranges/access.h | 3 +++ libcxx/include/__ranges/empty.h | 9 ++++++--- libcxx/include/__ranges/size.h | 10 +++++++--- libcxx/test/std/ranges/range.access/begin.pass.cpp | 6 ++++++ libcxx/test/std/ranges/range.access/data.pass.cpp | 5 +++++ libcxx/test/std/ranges/range.access/empty.pass.cpp | 5 +++++ libcxx/test/std/ranges/range.access/end.pass.cpp | 6 ++++++ libcxx/test/std/ranges/range.access/size.pass.cpp | 5 +++++ libcxx/test/std/ranges/range.access/ssize.pass.cpp | 5 +++++ .../range.req/range.range/range.compile.pass.cpp | 5 +++++ 11 files changed, 57 insertions(+), 6 deletions(-) diff --git a/libcxx/include/__concepts/class_or_enum.h b/libcxx/include/__concepts/class_or_enum.h index 43c7636d9c81..aa8606a21929 100644 --- a/libcxx/include/__concepts/class_or_enum.h +++ b/libcxx/include/__concepts/class_or_enum.h @@ -25,6 +25,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD template concept __class_or_enum = is_class_v<_Tp> || is_union_v<_Tp> || is_enum_v<_Tp>; +// Work around Clang bug https://llvm.org/PR52970 +template +concept __workaround_52970 = is_class_v<__uncvref_t<_Tp>> || is_union_v<__uncvref_t<_Tp>>; + #endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/access.h b/libcxx/include/__ranges/access.h index 4a1242130ac0..246f8b20caf4 100644 --- a/libcxx/include/__ranges/access.h +++ b/libcxx/include/__ranges/access.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___RANGES_ACCESS_H #define _LIBCPP___RANGES_ACCESS_H +#include <__concepts/class_or_enum.h> #include <__config> #include <__iterator/concepts.h> #include <__iterator/readable_traits.h> @@ -39,6 +40,7 @@ namespace __begin { template concept __member_begin = __can_borrow<_Tp> && + __workaround_52970<_Tp> && requires(_Tp&& __t) { { _LIBCPP_AUTO_CAST(__t.begin()) } -> input_or_output_iterator; }; @@ -102,6 +104,7 @@ namespace __end { template concept __member_end = __can_borrow<_Tp> && + __workaround_52970<_Tp> && requires(_Tp&& __t) { typename iterator_t<_Tp>; { _LIBCPP_AUTO_CAST(__t.end()) } -> sentinel_for>; diff --git a/libcxx/include/__ranges/empty.h b/libcxx/include/__ranges/empty.h index e8a8aabf4aed..8da0b120f182 100644 --- a/libcxx/include/__ranges/empty.h +++ b/libcxx/include/__ranges/empty.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___RANGES_EMPTY_H #define _LIBCPP___RANGES_EMPTY_H +#include <__concepts/class_or_enum.h> #include <__config> #include <__iterator/concepts.h> #include <__ranges/access.h> @@ -28,9 +29,11 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { namespace __empty { template - concept __member_empty = requires(_Tp&& __t) { - bool(__t.empty()); - }; + concept __member_empty = + __workaround_52970<_Tp> && + requires(_Tp&& __t) { + bool(__t.empty()); + }; template concept __can_invoke_size = diff --git a/libcxx/include/__ranges/size.h b/libcxx/include/__ranges/size.h index fc6641cf4887..f3de5a8b8410 100644 --- a/libcxx/include/__ranges/size.h +++ b/libcxx/include/__ranges/size.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___RANGES_SIZE_H #define _LIBCPP___RANGES_SIZE_H +#include <__concepts/class_or_enum.h> #include <__config> #include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> @@ -41,9 +42,12 @@ namespace __size { concept __size_enabled = !disable_sized_range>; template - concept __member_size = __size_enabled<_Tp> && requires(_Tp&& __t) { - { _LIBCPP_AUTO_CAST(__t.size()) } -> __integer_like; - }; + concept __member_size = + __size_enabled<_Tp> && + __workaround_52970<_Tp> && + requires(_Tp&& __t) { + { _LIBCPP_AUTO_CAST(__t.size()) } -> __integer_like; + }; template concept __unqualified_size = diff --git a/libcxx/test/std/ranges/range.access/begin.pass.cpp b/libcxx/test/std/ranges/range.access/begin.pass.cpp index 1a6951967f88..11170fa4f994 100644 --- a/libcxx/test/std/ranges/range.access/begin.pass.cpp +++ b/libcxx/test/std/ranges/range.access/begin.pass.cpp @@ -303,6 +303,12 @@ struct BeginReturnsArrayRef { static_assert(noexcept(std::ranges::begin(brar))); static_assert(noexcept(std::ranges::cbegin(brar))); +// Test ADL-proofing. +struct Incomplete; +template struct Holder { T t; }; +static_assert(!std::is_invocable_v*>); +static_assert(!std::is_invocable_v*>); + int main(int, char**) { static_assert(testReturnTypes()); diff --git a/libcxx/test/std/ranges/range.access/data.pass.cpp b/libcxx/test/std/ranges/range.access/data.pass.cpp index 40d2d3ab8eca..6d0b718f6b04 100644 --- a/libcxx/test/std/ranges/range.access/data.pass.cpp +++ b/libcxx/test/std/ranges/range.access/data.pass.cpp @@ -176,6 +176,11 @@ constexpr bool testViaRangesBegin() { return true; } +// Test ADL-proofing. +struct Incomplete; +template struct Holder { T t; }; +static_assert(!std::is_invocable_v*>); + struct RandomButNotContiguous { random_access_iterator begin() const; random_access_iterator end() const; diff --git a/libcxx/test/std/ranges/range.access/empty.pass.cpp b/libcxx/test/std/ranges/range.access/empty.pass.cpp index 18cdce02b573..5724acc67dee 100644 --- a/libcxx/test/std/ranges/range.access/empty.pass.cpp +++ b/libcxx/test/std/ranges/range.access/empty.pass.cpp @@ -168,6 +168,11 @@ constexpr bool testBeginEqualsEnd() { return true; } +// Test ADL-proofing. +struct Incomplete; +template struct Holder { T t; }; +static_assert(!std::is_invocable_v*>); + int main(int, char**) { testEmptyMember(); static_assert(testEmptyMember()); diff --git a/libcxx/test/std/ranges/range.access/end.pass.cpp b/libcxx/test/std/ranges/range.access/end.pass.cpp index 27eaf741a113..4b1d4e3f488d 100644 --- a/libcxx/test/std/ranges/range.access/end.pass.cpp +++ b/libcxx/test/std/ranges/range.access/end.pass.cpp @@ -350,6 +350,12 @@ struct EndReturnsArrayRef { static_assert(noexcept(std::ranges::end(erar))); static_assert(noexcept(std::ranges::cend(erar))); +// Test ADL-proofing. +struct Incomplete; +template struct Holder { T t; }; +static_assert(!std::is_invocable_v*>); +static_assert(!std::is_invocable_v*>); + int main(int, char**) { static_assert(testReturnTypes()); diff --git a/libcxx/test/std/ranges/range.access/size.pass.cpp b/libcxx/test/std/ranges/range.access/size.pass.cpp index 0a45a2d7c498..915e67e19475 100644 --- a/libcxx/test/std/ranges/range.access/size.pass.cpp +++ b/libcxx/test/std/ranges/range.access/size.pass.cpp @@ -314,6 +314,11 @@ constexpr bool testRanges() { return true; } +// Test ADL-proofing. +struct Incomplete; +template struct Holder { T t; }; +static_assert(!std::is_invocable_v*>); + int main(int, char**) { testArrayType(); static_assert(testArrayType()); diff --git a/libcxx/test/std/ranges/range.access/ssize.pass.cpp b/libcxx/test/std/ranges/range.access/ssize.pass.cpp index 39e7b80e2163..c351928c8fe6 100644 --- a/libcxx/test/std/ranges/range.access/ssize.pass.cpp +++ b/libcxx/test/std/ranges/range.access/ssize.pass.cpp @@ -78,6 +78,11 @@ constexpr bool test() { return true; } +// Test ADL-proofing. +struct Incomplete; +template struct Holder { T t; }; +static_assert(!std::is_invocable_v*>); + int main(int, char**) { test(); static_assert(test()); diff --git a/libcxx/test/std/ranges/range.req/range.range/range.compile.pass.cpp b/libcxx/test/std/ranges/range.req/range.range/range.compile.pass.cpp index ecc8048a9586..adf1caa200e6 100644 --- a/libcxx/test/std/ranges/range.req/range.range/range.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.req/range.range/range.compile.pass.cpp @@ -46,3 +46,8 @@ struct int_begin_iterator_end { int* end(); }; static_assert(!std::ranges::range); + +// Test ADL-proofing. +struct Incomplete; +template struct Holder { T t; }; +static_assert(!std::ranges::range*>); From 855d7bedb71376fc5bdfc17644715b2fa3b10f46 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Thu, 30 Dec 2021 23:28:12 -0500 Subject: [PATCH 582/992] [libc++] [P0887] Add newest feature-test macros; mark `type_identity` as implemented. `__cpp_lib_type_identity` was implemented way back in cf49ccd0 (Clang 8), probably before the feature-test macro had been settled on. `__cpp_lib_string_resize_and_overwrite` will be added by D113013 so I didn't add it here. Fixes #46605. Differential Revision: https://reviews.llvm.org/D116433 --- libcxx/docs/FeatureTestMacroTable.rst | 20 ++ libcxx/include/version | 21 ++ .../algorithm.version.pass.cpp | 30 ++ .../functional.version.pass.cpp | 60 ++++ .../map.version.pass.cpp | 44 ++- .../memory.version.pass.cpp | 60 ++++ .../ranges.version.pass.cpp | 34 +- .../set.version.pass.cpp | 42 ++- .../tuple.version.pass.cpp | 30 ++ .../type_traits.version.pass.cpp | 27 ++ .../typeinfo.version.pass.cpp | 66 ++++ .../unordered_map.version.pass.cpp | 44 ++- .../unordered_set.version.pass.cpp | 42 ++- .../utility.version.pass.cpp | 30 ++ .../version.version.pass.cpp | 297 ++++++++++++++++++ .../generate_feature_test_macro_components.py | 49 +++ 16 files changed, 868 insertions(+), 28 deletions(-) create mode 100644 libcxx/test/std/language.support/support.limits/support.limits.general/typeinfo.version.pass.cpp diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index 0578ff829883..714ed803ba69 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -290,16 +290,36 @@ Status ------------------------------------------------- ----------------- ``__cpp_lib_to_array`` ``201907L`` ------------------------------------------------- ----------------- + ``__cpp_lib_type_identity`` ``201806L`` + ------------------------------------------------- ----------------- ``__cpp_lib_unwrap_ref`` ``201811L`` ------------------------------------------------- ----------------- **C++ 2b** ------------------------------------------------------------------- + ``__cpp_lib_allocate_at_least`` *unimplemented* + ------------------------------------------------- ----------------- + ``__cpp_lib_associative_heterogeneous_erasure`` *unimplemented* + ------------------------------------------------- ----------------- ``__cpp_lib_byteswap`` ``202110L`` ------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_typeinfo`` *unimplemented* + ------------------------------------------------- ----------------- + ``__cpp_lib_invoke_r`` *unimplemented* + ------------------------------------------------- ----------------- ``__cpp_lib_is_scoped_enum`` ``202011L`` ------------------------------------------------- ----------------- ``__cpp_lib_monadic_optional`` ``202110L`` ------------------------------------------------- ----------------- + ``__cpp_lib_move_only_function`` *unimplemented* + ------------------------------------------------- ----------------- + ``__cpp_lib_out_ptr`` *unimplemented* + ------------------------------------------------- ----------------- + ``__cpp_lib_ranges_starts_ends_with`` *unimplemented* + ------------------------------------------------- ----------------- + ``__cpp_lib_ranges_zip`` *unimplemented* + ------------------------------------------------- ----------------- + ``__cpp_lib_spanstream`` *unimplemented* + ------------------------------------------------- ----------------- ``__cpp_lib_stacktrace`` *unimplemented* ------------------------------------------------- ----------------- ``__cpp_lib_stdatomic_h`` *unimplemented* diff --git a/libcxx/include/version b/libcxx/include/version index 574dfe47b58f..db67b2e65167 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -15,6 +15,7 @@ Macro name Value Headers __cpp_lib_addressof_constexpr 201603L +__cpp_lib_allocate_at_least 202106L __cpp_lib_allocator_traits_is_always_equal 201411L @@ -24,6 +25,8 @@ __cpp_lib_apply 201603L __cpp_lib_array_constexpr 201811L 201603L // C++17 __cpp_lib_as_const 201510L +__cpp_lib_associative_heterogeneous_erasure 202110L + __cpp_lib_assume_aligned 201811L __cpp_lib_atomic_flag_test 201907L __cpp_lib_atomic_float 201711L @@ -60,6 +63,7 @@ __cpp_lib_constexpr_numeric 201911L __cpp_lib_constexpr_string 201811L __cpp_lib_constexpr_string_view 201811L __cpp_lib_constexpr_tuple 201811L +__cpp_lib_constexpr_typeinfo 202106L __cpp_lib_constexpr_utility 201811L __cpp_lib_constexpr_vector 201907L __cpp_lib_coroutine 201902L @@ -87,6 +91,7 @@ __cpp_lib_integer_sequence 201304L __cpp_lib_integral_constant_callable 201304L __cpp_lib_interpolate 201902L __cpp_lib_invoke 201411L +__cpp_lib_invoke_r 202106L __cpp_lib_is_aggregate 201703L __cpp_lib_is_constant_evaluated 201811L __cpp_lib_is_final 201402L @@ -110,6 +115,7 @@ __cpp_lib_math_constants 201907L __cpp_lib_math_special_functions 201603L __cpp_lib_memory_resource 201603L __cpp_lib_monadic_optional 202110L +__cpp_lib_move_only_function 202110L __cpp_lib_node_extract 201606L __cpp_lib_nonmember_container_access 201411L @@ -119,11 +125,14 @@ __cpp_lib_nonmember_container_access 201411L __cpp_lib_not_fn 201603L __cpp_lib_null_iterators 201304L __cpp_lib_optional 201606L +__cpp_lib_out_ptr 202106L __cpp_lib_parallel_algorithm 201603L __cpp_lib_polymorphic_allocator 201902L __cpp_lib_quoted_string_io 201304L __cpp_lib_ranges 201811L +__cpp_lib_ranges_starts_ends_with 202106L +__cpp_lib_ranges_zip 202110L __cpp_lib_raw_memory_algorithms 201606L __cpp_lib_remove_cvref 201711L __cpp_lib_result_of_sfinae 201210L @@ -139,6 +148,7 @@ __cpp_lib_shift 201806L __cpp_lib_smart_ptr_for_overwrite 202002L __cpp_lib_source_location 201907L __cpp_lib_span 202002L +__cpp_lib_spanstream 202106L __cpp_lib_ssize 201902L __cpp_lib_stacktrace 202011L __cpp_lib_starts_ends_with 201711L @@ -158,6 +168,7 @@ __cpp_lib_transparent_operators 201510L __cpp_lib_tuples_by_type 201304L +__cpp_lib_type_identity 201806L __cpp_lib_type_trait_variable_templates 201510L __cpp_lib_uncaught_exceptions 201411L __cpp_lib_unordered_map_try_emplace 201411L @@ -342,13 +353,23 @@ __cpp_lib_void_t 201411L // # define __cpp_lib_three_way_comparison 201907L # define __cpp_lib_to_address 201711L # define __cpp_lib_to_array 201907L +# define __cpp_lib_type_identity 201806L # define __cpp_lib_unwrap_ref 201811L #endif #if _LIBCPP_STD_VER > 20 +// # define __cpp_lib_allocate_at_least 202106L +// # define __cpp_lib_associative_heterogeneous_erasure 202110L # define __cpp_lib_byteswap 202110L +// # define __cpp_lib_constexpr_typeinfo 202106L +// # define __cpp_lib_invoke_r 202106L # define __cpp_lib_is_scoped_enum 202011L # define __cpp_lib_monadic_optional 202110L +// # define __cpp_lib_move_only_function 202110L +// # define __cpp_lib_out_ptr 202106L +// # define __cpp_lib_ranges_starts_ends_with 202106L +// # define __cpp_lib_ranges_zip 202110L +// # define __cpp_lib_spanstream 202106L // # define __cpp_lib_stacktrace 202011L // # define __cpp_lib_stdatomic_h 202011L # define __cpp_lib_string_contains 202011L diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.pass.cpp index eb7591540eac..b96055d348b8 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.pass.cpp @@ -20,6 +20,7 @@ __cpp_lib_constexpr_algorithms 201806L [C++20] __cpp_lib_parallel_algorithm 201603L [C++17] __cpp_lib_ranges 201811L [C++20] + __cpp_lib_ranges_starts_ends_with 202106L [C++2b] __cpp_lib_robust_nonmodifying_seq_ops 201304L [C++14] __cpp_lib_sample 201603L [C++17] __cpp_lib_shift 201806L [C++20] @@ -46,6 +47,10 @@ # error "__cpp_lib_ranges should not be defined before c++20" # endif +# ifdef __cpp_lib_ranges_starts_ends_with +# error "__cpp_lib_ranges_starts_ends_with should not be defined before c++2b" +# endif + # ifdef __cpp_lib_robust_nonmodifying_seq_ops # error "__cpp_lib_robust_nonmodifying_seq_ops should not be defined before c++14" # endif @@ -76,6 +81,10 @@ # error "__cpp_lib_ranges should not be defined before c++20" # endif +# ifdef __cpp_lib_ranges_starts_ends_with +# error "__cpp_lib_ranges_starts_ends_with should not be defined before c++2b" +# endif + # ifndef __cpp_lib_robust_nonmodifying_seq_ops # error "__cpp_lib_robust_nonmodifying_seq_ops should be defined in c++14" # endif @@ -121,6 +130,10 @@ # error "__cpp_lib_ranges should not be defined before c++20" # endif +# ifdef __cpp_lib_ranges_starts_ends_with +# error "__cpp_lib_ranges_starts_ends_with should not be defined before c++2b" +# endif + # ifndef __cpp_lib_robust_nonmodifying_seq_ops # error "__cpp_lib_robust_nonmodifying_seq_ops should be defined in c++17" # endif @@ -181,6 +194,10 @@ # endif # endif +# ifdef __cpp_lib_ranges_starts_ends_with +# error "__cpp_lib_ranges_starts_ends_with should not be defined before c++2b" +# endif + # ifndef __cpp_lib_robust_nonmodifying_seq_ops # error "__cpp_lib_robust_nonmodifying_seq_ops should be defined in c++20" # endif @@ -244,6 +261,19 @@ # endif # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_ranges_starts_ends_with +# error "__cpp_lib_ranges_starts_ends_with should be defined in c++2b" +# endif +# if __cpp_lib_ranges_starts_ends_with != 202106L +# error "__cpp_lib_ranges_starts_ends_with should have the value 202106L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_ranges_starts_ends_with +# error "__cpp_lib_ranges_starts_ends_with should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_robust_nonmodifying_seq_ops # error "__cpp_lib_robust_nonmodifying_seq_ops should be defined in c++2b" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.pass.cpp index 77ceb9f31457..f8975101ee05 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.pass.cpp @@ -20,6 +20,8 @@ __cpp_lib_boyer_moore_searcher 201603L [C++17] __cpp_lib_constexpr_functional 201907L [C++20] __cpp_lib_invoke 201411L [C++17] + __cpp_lib_invoke_r 202106L [C++2b] + __cpp_lib_move_only_function 202110L [C++2b] __cpp_lib_not_fn 201603L [C++17] __cpp_lib_ranges 201811L [C++20] __cpp_lib_result_of_sfinae 201210L [C++14] @@ -49,6 +51,14 @@ # error "__cpp_lib_invoke should not be defined before c++17" # endif +# ifdef __cpp_lib_invoke_r +# error "__cpp_lib_invoke_r should not be defined before c++2b" +# endif + +# ifdef __cpp_lib_move_only_function +# error "__cpp_lib_move_only_function should not be defined before c++2b" +# endif + # ifdef __cpp_lib_not_fn # error "__cpp_lib_not_fn should not be defined before c++17" # endif @@ -87,6 +97,14 @@ # error "__cpp_lib_invoke should not be defined before c++17" # endif +# ifdef __cpp_lib_invoke_r +# error "__cpp_lib_invoke_r should not be defined before c++2b" +# endif + +# ifdef __cpp_lib_move_only_function +# error "__cpp_lib_move_only_function should not be defined before c++2b" +# endif + # ifdef __cpp_lib_not_fn # error "__cpp_lib_not_fn should not be defined before c++17" # endif @@ -143,6 +161,14 @@ # error "__cpp_lib_invoke should have the value 201411L in c++17" # endif +# ifdef __cpp_lib_invoke_r +# error "__cpp_lib_invoke_r should not be defined before c++2b" +# endif + +# ifdef __cpp_lib_move_only_function +# error "__cpp_lib_move_only_function should not be defined before c++2b" +# endif + # ifndef __cpp_lib_not_fn # error "__cpp_lib_not_fn should be defined in c++17" # endif @@ -208,6 +234,14 @@ # error "__cpp_lib_invoke should have the value 201411L in c++20" # endif +# ifdef __cpp_lib_invoke_r +# error "__cpp_lib_invoke_r should not be defined before c++2b" +# endif + +# ifdef __cpp_lib_move_only_function +# error "__cpp_lib_move_only_function should not be defined before c++2b" +# endif + # ifndef __cpp_lib_not_fn # error "__cpp_lib_not_fn should be defined in c++20" # endif @@ -285,6 +319,32 @@ # error "__cpp_lib_invoke should have the value 201411L in c++2b" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_invoke_r +# error "__cpp_lib_invoke_r should be defined in c++2b" +# endif +# if __cpp_lib_invoke_r != 202106L +# error "__cpp_lib_invoke_r should have the value 202106L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_invoke_r +# error "__cpp_lib_invoke_r should not be defined because it is unimplemented in libc++!" +# endif +# endif + +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_move_only_function +# error "__cpp_lib_move_only_function should be defined in c++2b" +# endif +# if __cpp_lib_move_only_function != 202110L +# error "__cpp_lib_move_only_function should have the value 202110L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_move_only_function +# error "__cpp_lib_move_only_function should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_not_fn # error "__cpp_lib_not_fn should be defined in c++2b" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/map.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/map.version.pass.cpp index 85c5b46d7e3f..a68273a9e7f2 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/map.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/map.version.pass.cpp @@ -15,13 +15,14 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] - __cpp_lib_erase_if 202002L [C++20] - __cpp_lib_generic_associative_lookup 201304L [C++14] - __cpp_lib_map_try_emplace 201411L [C++17] - __cpp_lib_node_extract 201606L [C++17] - __cpp_lib_nonmember_container_access 201411L [C++17] +/* Constant Value + __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] + __cpp_lib_associative_heterogeneous_erasure 202110L [C++2b] + __cpp_lib_erase_if 202002L [C++20] + __cpp_lib_generic_associative_lookup 201304L [C++14] + __cpp_lib_map_try_emplace 201411L [C++17] + __cpp_lib_node_extract 201606L [C++17] + __cpp_lib_nonmember_container_access 201411L [C++17] */ #include @@ -33,6 +34,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -59,6 +64,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -91,6 +100,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++17" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -132,6 +145,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++20" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++20" # endif @@ -176,6 +193,19 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++2b" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should be defined in c++2b" +# endif +# if __cpp_lib_associative_heterogeneous_erasure != 202110L +# error "__cpp_lib_associative_heterogeneous_erasure should have the value 202110L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++2b" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.pass.cpp index dfa5b569081d..415eaa385767 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.pass.cpp @@ -17,6 +17,7 @@ /* Constant Value __cpp_lib_addressof_constexpr 201603L [C++17] + __cpp_lib_allocate_at_least 202106L [C++2b] __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] __cpp_lib_assume_aligned 201811L [C++20] __cpp_lib_atomic_value_initialization 201911L [C++20] @@ -24,6 +25,7 @@ __cpp_lib_constexpr_memory 201811L [C++20] __cpp_lib_enable_shared_from_this 201603L [C++17] __cpp_lib_make_unique 201304L [C++14] + __cpp_lib_out_ptr 202106L [C++2b] __cpp_lib_polymorphic_allocator 201902L [C++20] __cpp_lib_ranges 201811L [C++20] __cpp_lib_raw_memory_algorithms 201606L [C++17] @@ -44,6 +46,10 @@ # error "__cpp_lib_addressof_constexpr should not be defined before c++17" # endif +# ifdef __cpp_lib_allocate_at_least +# error "__cpp_lib_allocate_at_least should not be defined before c++2b" +# endif + # ifdef __cpp_lib_allocator_traits_is_always_equal # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif @@ -72,6 +78,10 @@ # error "__cpp_lib_make_unique should not be defined before c++14" # endif +# ifdef __cpp_lib_out_ptr +# error "__cpp_lib_out_ptr should not be defined before c++2b" +# endif + # ifdef __cpp_lib_polymorphic_allocator # error "__cpp_lib_polymorphic_allocator should not be defined before c++20" # endif @@ -110,6 +120,10 @@ # error "__cpp_lib_addressof_constexpr should not be defined before c++17" # endif +# ifdef __cpp_lib_allocate_at_least +# error "__cpp_lib_allocate_at_least should not be defined before c++2b" +# endif + # ifdef __cpp_lib_allocator_traits_is_always_equal # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif @@ -141,6 +155,10 @@ # error "__cpp_lib_make_unique should have the value 201304L in c++14" # endif +# ifdef __cpp_lib_out_ptr +# error "__cpp_lib_out_ptr should not be defined before c++2b" +# endif + # ifdef __cpp_lib_polymorphic_allocator # error "__cpp_lib_polymorphic_allocator should not be defined before c++20" # endif @@ -185,6 +203,10 @@ # error "__cpp_lib_addressof_constexpr should have the value 201603L in c++17" # endif +# ifdef __cpp_lib_allocate_at_least +# error "__cpp_lib_allocate_at_least should not be defined before c++2b" +# endif + # ifndef __cpp_lib_allocator_traits_is_always_equal # error "__cpp_lib_allocator_traits_is_always_equal should be defined in c++17" # endif @@ -222,6 +244,10 @@ # error "__cpp_lib_make_unique should have the value 201304L in c++17" # endif +# ifdef __cpp_lib_out_ptr +# error "__cpp_lib_out_ptr should not be defined before c++2b" +# endif + # ifdef __cpp_lib_polymorphic_allocator # error "__cpp_lib_polymorphic_allocator should not be defined before c++20" # endif @@ -275,6 +301,10 @@ # error "__cpp_lib_addressof_constexpr should have the value 201603L in c++20" # endif +# ifdef __cpp_lib_allocate_at_least +# error "__cpp_lib_allocate_at_least should not be defined before c++2b" +# endif + # ifndef __cpp_lib_allocator_traits_is_always_equal # error "__cpp_lib_allocator_traits_is_always_equal should be defined in c++20" # endif @@ -330,6 +360,10 @@ # error "__cpp_lib_make_unique should have the value 201304L in c++20" # endif +# ifdef __cpp_lib_out_ptr +# error "__cpp_lib_out_ptr should not be defined before c++2b" +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_polymorphic_allocator # error "__cpp_lib_polymorphic_allocator should be defined in c++20" @@ -413,6 +447,19 @@ # error "__cpp_lib_addressof_constexpr should have the value 201603L in c++2b" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_allocate_at_least +# error "__cpp_lib_allocate_at_least should be defined in c++2b" +# endif +# if __cpp_lib_allocate_at_least != 202106L +# error "__cpp_lib_allocate_at_least should have the value 202106L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_allocate_at_least +# error "__cpp_lib_allocate_at_least should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_allocator_traits_is_always_equal # error "__cpp_lib_allocator_traits_is_always_equal should be defined in c++2b" # endif @@ -468,6 +515,19 @@ # error "__cpp_lib_make_unique should have the value 201304L in c++2b" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_out_ptr +# error "__cpp_lib_out_ptr should be defined in c++2b" +# endif +# if __cpp_lib_out_ptr != 202106L +# error "__cpp_lib_out_ptr should have the value 202106L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_out_ptr +# error "__cpp_lib_out_ptr should not be defined because it is unimplemented in libc++!" +# endif +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_polymorphic_allocator # error "__cpp_lib_polymorphic_allocator should be defined in c++2b" diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.pass.cpp index b1da06fa52fa..177d89f35ca2 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.pass.cpp @@ -17,8 +17,9 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_ranges 201811L [C++20] +/* Constant Value + __cpp_lib_ranges 201811L [C++20] + __cpp_lib_ranges_zip 202110L [C++2b] */ #include @@ -30,18 +31,30 @@ # error "__cpp_lib_ranges should not be defined before c++20" # endif +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined before c++2b" +# endif + #elif TEST_STD_VER == 14 # ifdef __cpp_lib_ranges # error "__cpp_lib_ranges should not be defined before c++20" # endif +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined before c++2b" +# endif + #elif TEST_STD_VER == 17 # ifdef __cpp_lib_ranges # error "__cpp_lib_ranges should not be defined before c++20" # endif +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined before c++2b" +# endif + #elif TEST_STD_VER == 20 # if !defined(_LIBCPP_VERSION) @@ -57,6 +70,10 @@ # endif # endif +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined before c++2b" +# endif + #elif TEST_STD_VER > 20 # if !defined(_LIBCPP_VERSION) @@ -72,6 +89,19 @@ # endif # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should be defined in c++2b" +# endif +# if __cpp_lib_ranges_zip != 202110L +# error "__cpp_lib_ranges_zip should have the value 202110L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined because it is unimplemented in libc++!" +# endif +# endif + #endif // TEST_STD_VER > 20 int main(int, char**) { return 0; } diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/set.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/set.version.pass.cpp index 57fdd5b21f40..53825881afb9 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/set.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/set.version.pass.cpp @@ -15,12 +15,13 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] - __cpp_lib_erase_if 202002L [C++20] - __cpp_lib_generic_associative_lookup 201304L [C++14] - __cpp_lib_node_extract 201606L [C++17] - __cpp_lib_nonmember_container_access 201411L [C++17] +/* Constant Value + __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] + __cpp_lib_associative_heterogeneous_erasure 202110L [C++2b] + __cpp_lib_erase_if 202002L [C++20] + __cpp_lib_generic_associative_lookup 201304L [C++14] + __cpp_lib_node_extract 201606L [C++17] + __cpp_lib_nonmember_container_access 201411L [C++17] */ #include @@ -32,6 +33,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -54,6 +59,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -82,6 +91,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++17" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -116,6 +129,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++20" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++20" # endif @@ -153,6 +170,19 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++2b" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should be defined in c++2b" +# endif +# if __cpp_lib_associative_heterogeneous_erasure != 202110L +# error "__cpp_lib_associative_heterogeneous_erasure should have the value 202110L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++2b" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/tuple.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/tuple.version.pass.cpp index 5d870a8cd0c1..3b312ee5bd4d 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/tuple.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/tuple.version.pass.cpp @@ -19,6 +19,7 @@ __cpp_lib_apply 201603L [C++17] __cpp_lib_constexpr_tuple 201811L [C++20] __cpp_lib_make_from_tuple 201606L [C++17] + __cpp_lib_ranges_zip 202110L [C++2b] __cpp_lib_tuple_element_t 201402L [C++14] __cpp_lib_tuples_by_type 201304L [C++14] */ @@ -40,6 +41,10 @@ # error "__cpp_lib_make_from_tuple should not be defined before c++17" # endif +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined before c++2b" +# endif + # ifdef __cpp_lib_tuple_element_t # error "__cpp_lib_tuple_element_t should not be defined before c++14" # endif @@ -62,6 +67,10 @@ # error "__cpp_lib_make_from_tuple should not be defined before c++17" # endif +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined before c++2b" +# endif + # ifndef __cpp_lib_tuple_element_t # error "__cpp_lib_tuple_element_t should be defined in c++14" # endif @@ -96,6 +105,10 @@ # error "__cpp_lib_make_from_tuple should have the value 201606L in c++17" # endif +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined before c++2b" +# endif + # ifndef __cpp_lib_tuple_element_t # error "__cpp_lib_tuple_element_t should be defined in c++17" # endif @@ -133,6 +146,10 @@ # error "__cpp_lib_make_from_tuple should have the value 201606L in c++20" # endif +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined before c++2b" +# endif + # ifndef __cpp_lib_tuple_element_t # error "__cpp_lib_tuple_element_t should be defined in c++20" # endif @@ -170,6 +187,19 @@ # error "__cpp_lib_make_from_tuple should have the value 201606L in c++2b" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should be defined in c++2b" +# endif +# if __cpp_lib_ranges_zip != 202110L +# error "__cpp_lib_ranges_zip should have the value 202110L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_tuple_element_t # error "__cpp_lib_tuple_element_t should be defined in c++2b" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.pass.cpp index aefb00b849dd..aeff3ad31a98 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.pass.cpp @@ -34,6 +34,7 @@ __cpp_lib_remove_cvref 201711L [C++20] __cpp_lib_result_of_sfinae 201210L [C++14] __cpp_lib_transformation_trait_aliases 201304L [C++14] + __cpp_lib_type_identity 201806L [C++20] __cpp_lib_type_trait_variable_templates 201510L [C++17] __cpp_lib_void_t 201411L [C++17] */ @@ -115,6 +116,10 @@ # error "__cpp_lib_transformation_trait_aliases should not be defined before c++14" # endif +# ifdef __cpp_lib_type_identity +# error "__cpp_lib_type_identity should not be defined before c++20" +# endif + # ifdef __cpp_lib_type_trait_variable_templates # error "__cpp_lib_type_trait_variable_templates should not be defined before c++17" # endif @@ -212,6 +217,10 @@ # error "__cpp_lib_transformation_trait_aliases should have the value 201304L in c++14" # endif +# ifdef __cpp_lib_type_identity +# error "__cpp_lib_type_identity should not be defined before c++20" +# endif + # ifdef __cpp_lib_type_trait_variable_templates # error "__cpp_lib_type_trait_variable_templates should not be defined before c++17" # endif @@ -327,6 +336,10 @@ # error "__cpp_lib_transformation_trait_aliases should have the value 201304L in c++17" # endif +# ifdef __cpp_lib_type_identity +# error "__cpp_lib_type_identity should not be defined before c++20" +# endif + # ifndef __cpp_lib_type_trait_variable_templates # error "__cpp_lib_type_trait_variable_templates should be defined in c++17" # endif @@ -478,6 +491,13 @@ # error "__cpp_lib_transformation_trait_aliases should have the value 201304L in c++20" # endif +# ifndef __cpp_lib_type_identity +# error "__cpp_lib_type_identity should be defined in c++20" +# endif +# if __cpp_lib_type_identity != 201806L +# error "__cpp_lib_type_identity should have the value 201806L in c++20" +# endif + # ifndef __cpp_lib_type_trait_variable_templates # error "__cpp_lib_type_trait_variable_templates should be defined in c++20" # endif @@ -632,6 +652,13 @@ # error "__cpp_lib_transformation_trait_aliases should have the value 201304L in c++2b" # endif +# ifndef __cpp_lib_type_identity +# error "__cpp_lib_type_identity should be defined in c++2b" +# endif +# if __cpp_lib_type_identity != 201806L +# error "__cpp_lib_type_identity should have the value 201806L in c++2b" +# endif + # ifndef __cpp_lib_type_trait_variable_templates # error "__cpp_lib_type_trait_variable_templates should be defined in c++2b" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/typeinfo.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/typeinfo.version.pass.cpp new file mode 100644 index 000000000000..122effde1b8a --- /dev/null +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/typeinfo.version.pass.cpp @@ -0,0 +1,66 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// WARNING: This test was generated by generate_feature_test_macro_components.py +// and should not be edited manually. +// +// clang-format off + +// + +// Test the feature test macros defined by + +/* Constant Value + __cpp_lib_constexpr_typeinfo 202106L [C++2b] +*/ + +#include +#include "test_macros.h" + +#if TEST_STD_VER < 14 + +# ifdef __cpp_lib_constexpr_typeinfo +# error "__cpp_lib_constexpr_typeinfo should not be defined before c++2b" +# endif + +#elif TEST_STD_VER == 14 + +# ifdef __cpp_lib_constexpr_typeinfo +# error "__cpp_lib_constexpr_typeinfo should not be defined before c++2b" +# endif + +#elif TEST_STD_VER == 17 + +# ifdef __cpp_lib_constexpr_typeinfo +# error "__cpp_lib_constexpr_typeinfo should not be defined before c++2b" +# endif + +#elif TEST_STD_VER == 20 + +# ifdef __cpp_lib_constexpr_typeinfo +# error "__cpp_lib_constexpr_typeinfo should not be defined before c++2b" +# endif + +#elif TEST_STD_VER > 20 + +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_constexpr_typeinfo +# error "__cpp_lib_constexpr_typeinfo should be defined in c++2b" +# endif +# if __cpp_lib_constexpr_typeinfo != 202106L +# error "__cpp_lib_constexpr_typeinfo should have the value 202106L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_constexpr_typeinfo +# error "__cpp_lib_constexpr_typeinfo should not be defined because it is unimplemented in libc++!" +# endif +# endif + +#endif // TEST_STD_VER > 20 + +int main(int, char**) { return 0; } diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/unordered_map.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/unordered_map.version.pass.cpp index b8e20ff848ba..e639dffdd5dd 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/unordered_map.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/unordered_map.version.pass.cpp @@ -15,13 +15,14 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] - __cpp_lib_erase_if 202002L [C++20] - __cpp_lib_generic_unordered_lookup 201811L [C++20] - __cpp_lib_node_extract 201606L [C++17] - __cpp_lib_nonmember_container_access 201411L [C++17] - __cpp_lib_unordered_map_try_emplace 201411L [C++17] +/* Constant Value + __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] + __cpp_lib_associative_heterogeneous_erasure 202110L [C++2b] + __cpp_lib_erase_if 202002L [C++20] + __cpp_lib_generic_unordered_lookup 201811L [C++20] + __cpp_lib_node_extract 201606L [C++17] + __cpp_lib_nonmember_container_access 201411L [C++17] + __cpp_lib_unordered_map_try_emplace 201411L [C++17] */ #include @@ -33,6 +34,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -59,6 +64,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -88,6 +97,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++17" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -126,6 +139,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++20" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++20" # endif @@ -170,6 +187,19 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++2b" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should be defined in c++2b" +# endif +# if __cpp_lib_associative_heterogeneous_erasure != 202110L +# error "__cpp_lib_associative_heterogeneous_erasure should have the value 202110L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++2b" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/unordered_set.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/unordered_set.version.pass.cpp index 89692c766ab7..aed7c73d9e66 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/unordered_set.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/unordered_set.version.pass.cpp @@ -15,12 +15,13 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] - __cpp_lib_erase_if 202002L [C++20] - __cpp_lib_generic_unordered_lookup 201811L [C++20] - __cpp_lib_node_extract 201606L [C++17] - __cpp_lib_nonmember_container_access 201411L [C++17] +/* Constant Value + __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] + __cpp_lib_associative_heterogeneous_erasure 202110L [C++2b] + __cpp_lib_erase_if 202002L [C++20] + __cpp_lib_generic_unordered_lookup 201811L [C++20] + __cpp_lib_node_extract 201606L [C++17] + __cpp_lib_nonmember_container_access 201411L [C++17] */ #include @@ -32,6 +33,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -54,6 +59,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -79,6 +88,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++17" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -110,6 +123,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++20" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++20" # endif @@ -147,6 +164,19 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++2b" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should be defined in c++2b" +# endif +# if __cpp_lib_associative_heterogeneous_erasure != 202110L +# error "__cpp_lib_associative_heterogeneous_erasure should have the value 202110L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++2b" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.pass.cpp index 0159371ab250..5d4f2c252a3f 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.pass.cpp @@ -21,6 +21,7 @@ __cpp_lib_exchange_function 201304L [C++14] __cpp_lib_integer_comparison_functions 202002L [C++20] __cpp_lib_integer_sequence 201304L [C++14] + __cpp_lib_ranges_zip 202110L [C++2b] __cpp_lib_to_underlying 202102L [C++2b] __cpp_lib_tuples_by_type 201304L [C++14] */ @@ -50,6 +51,10 @@ # error "__cpp_lib_integer_sequence should not be defined before c++14" # endif +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined before c++2b" +# endif + # ifdef __cpp_lib_to_underlying # error "__cpp_lib_to_underlying should not be defined before c++2b" # endif @@ -86,6 +91,10 @@ # error "__cpp_lib_integer_sequence should have the value 201304L in c++14" # endif +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined before c++2b" +# endif + # ifdef __cpp_lib_to_underlying # error "__cpp_lib_to_underlying should not be defined before c++2b" # endif @@ -128,6 +137,10 @@ # error "__cpp_lib_integer_sequence should have the value 201304L in c++17" # endif +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined before c++2b" +# endif + # ifdef __cpp_lib_to_underlying # error "__cpp_lib_to_underlying should not be defined before c++2b" # endif @@ -182,6 +195,10 @@ # error "__cpp_lib_integer_sequence should have the value 201304L in c++20" # endif +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined before c++2b" +# endif + # ifdef __cpp_lib_to_underlying # error "__cpp_lib_to_underlying should not be defined before c++2b" # endif @@ -236,6 +253,19 @@ # error "__cpp_lib_integer_sequence should have the value 201304L in c++2b" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should be defined in c++2b" +# endif +# if __cpp_lib_ranges_zip != 202110L +# error "__cpp_lib_ranges_zip should have the value 202110L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_to_underlying # error "__cpp_lib_to_underlying should be defined in c++2b" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp index 9637d3fc2a11..c22dda85a247 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp @@ -17,12 +17,14 @@ /* Constant Value __cpp_lib_addressof_constexpr 201603L [C++17] + __cpp_lib_allocate_at_least 202106L [C++2b] __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] __cpp_lib_any 201606L [C++17] __cpp_lib_apply 201603L [C++17] __cpp_lib_array_constexpr 201603L [C++17] 201811L [C++20] __cpp_lib_as_const 201510L [C++17] + __cpp_lib_associative_heterogeneous_erasure 202110L [C++2b] __cpp_lib_assume_aligned 201811L [C++20] __cpp_lib_atomic_flag_test 201907L [C++20] __cpp_lib_atomic_float 201711L [C++20] @@ -57,6 +59,7 @@ __cpp_lib_constexpr_string 201811L [C++20] __cpp_lib_constexpr_string_view 201811L [C++20] __cpp_lib_constexpr_tuple 201811L [C++20] + __cpp_lib_constexpr_typeinfo 202106L [C++2b] __cpp_lib_constexpr_utility 201811L [C++20] __cpp_lib_constexpr_vector 201907L [C++20] __cpp_lib_coroutine 201902L [C++20] @@ -82,6 +85,7 @@ __cpp_lib_integral_constant_callable 201304L [C++14] __cpp_lib_interpolate 201902L [C++20] __cpp_lib_invoke 201411L [C++17] + __cpp_lib_invoke_r 202106L [C++2b] __cpp_lib_is_aggregate 201703L [C++17] __cpp_lib_is_constant_evaluated 201811L [C++20] __cpp_lib_is_final 201402L [C++14] @@ -105,15 +109,19 @@ __cpp_lib_math_special_functions 201603L [C++17] __cpp_lib_memory_resource 201603L [C++17] __cpp_lib_monadic_optional 202110L [C++2b] + __cpp_lib_move_only_function 202110L [C++2b] __cpp_lib_node_extract 201606L [C++17] __cpp_lib_nonmember_container_access 201411L [C++17] __cpp_lib_not_fn 201603L [C++17] __cpp_lib_null_iterators 201304L [C++14] __cpp_lib_optional 201606L [C++17] + __cpp_lib_out_ptr 202106L [C++2b] __cpp_lib_parallel_algorithm 201603L [C++17] __cpp_lib_polymorphic_allocator 201902L [C++20] __cpp_lib_quoted_string_io 201304L [C++14] __cpp_lib_ranges 201811L [C++20] + __cpp_lib_ranges_starts_ends_with 202106L [C++2b] + __cpp_lib_ranges_zip 202110L [C++2b] __cpp_lib_raw_memory_algorithms 201606L [C++17] __cpp_lib_remove_cvref 201711L [C++20] __cpp_lib_result_of_sfinae 201210L [C++14] @@ -129,6 +137,7 @@ __cpp_lib_smart_ptr_for_overwrite 202002L [C++20] __cpp_lib_source_location 201907L [C++20] __cpp_lib_span 202002L [C++20] + __cpp_lib_spanstream 202106L [C++2b] __cpp_lib_ssize 201902L [C++20] __cpp_lib_stacktrace 202011L [C++2b] __cpp_lib_starts_ends_with 201711L [C++20] @@ -148,6 +157,7 @@ 201510L [C++17] __cpp_lib_tuple_element_t 201402L [C++14] __cpp_lib_tuples_by_type 201304L [C++14] + __cpp_lib_type_identity 201806L [C++20] __cpp_lib_type_trait_variable_templates 201510L [C++17] __cpp_lib_uncaught_exceptions 201411L [C++17] __cpp_lib_unordered_map_try_emplace 201411L [C++17] @@ -165,6 +175,10 @@ # error "__cpp_lib_addressof_constexpr should not be defined before c++17" # endif +# ifdef __cpp_lib_allocate_at_least +# error "__cpp_lib_allocate_at_least should not be defined before c++2b" +# endif + # ifdef __cpp_lib_allocator_traits_is_always_equal # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif @@ -185,6 +199,10 @@ # error "__cpp_lib_as_const should not be defined before c++17" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifdef __cpp_lib_assume_aligned # error "__cpp_lib_assume_aligned should not be defined before c++20" # endif @@ -321,6 +339,10 @@ # error "__cpp_lib_constexpr_tuple should not be defined before c++20" # endif +# ifdef __cpp_lib_constexpr_typeinfo +# error "__cpp_lib_constexpr_typeinfo should not be defined before c++2b" +# endif + # ifdef __cpp_lib_constexpr_utility # error "__cpp_lib_constexpr_utility should not be defined before c++20" # endif @@ -417,6 +439,10 @@ # error "__cpp_lib_invoke should not be defined before c++17" # endif +# ifdef __cpp_lib_invoke_r +# error "__cpp_lib_invoke_r should not be defined before c++2b" +# endif + # ifdef __cpp_lib_is_aggregate # error "__cpp_lib_is_aggregate should not be defined before c++17" # endif @@ -509,6 +535,10 @@ # error "__cpp_lib_monadic_optional should not be defined before c++2b" # endif +# ifdef __cpp_lib_move_only_function +# error "__cpp_lib_move_only_function should not be defined before c++2b" +# endif + # ifdef __cpp_lib_node_extract # error "__cpp_lib_node_extract should not be defined before c++17" # endif @@ -529,6 +559,10 @@ # error "__cpp_lib_optional should not be defined before c++17" # endif +# ifdef __cpp_lib_out_ptr +# error "__cpp_lib_out_ptr should not be defined before c++2b" +# endif + # ifdef __cpp_lib_parallel_algorithm # error "__cpp_lib_parallel_algorithm should not be defined before c++17" # endif @@ -545,6 +579,14 @@ # error "__cpp_lib_ranges should not be defined before c++20" # endif +# ifdef __cpp_lib_ranges_starts_ends_with +# error "__cpp_lib_ranges_starts_ends_with should not be defined before c++2b" +# endif + +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined before c++2b" +# endif + # ifdef __cpp_lib_raw_memory_algorithms # error "__cpp_lib_raw_memory_algorithms should not be defined before c++17" # endif @@ -605,6 +647,10 @@ # error "__cpp_lib_span should not be defined before c++20" # endif +# ifdef __cpp_lib_spanstream +# error "__cpp_lib_spanstream should not be defined before c++2b" +# endif + # ifdef __cpp_lib_ssize # error "__cpp_lib_ssize should not be defined before c++20" # endif @@ -673,6 +719,10 @@ # error "__cpp_lib_tuples_by_type should not be defined before c++14" # endif +# ifdef __cpp_lib_type_identity +# error "__cpp_lib_type_identity should not be defined before c++20" +# endif + # ifdef __cpp_lib_type_trait_variable_templates # error "__cpp_lib_type_trait_variable_templates should not be defined before c++17" # endif @@ -703,6 +753,10 @@ # error "__cpp_lib_addressof_constexpr should not be defined before c++17" # endif +# ifdef __cpp_lib_allocate_at_least +# error "__cpp_lib_allocate_at_least should not be defined before c++2b" +# endif + # ifdef __cpp_lib_allocator_traits_is_always_equal # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif @@ -723,6 +777,10 @@ # error "__cpp_lib_as_const should not be defined before c++17" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifdef __cpp_lib_assume_aligned # error "__cpp_lib_assume_aligned should not be defined before c++20" # endif @@ -865,6 +923,10 @@ # error "__cpp_lib_constexpr_tuple should not be defined before c++20" # endif +# ifdef __cpp_lib_constexpr_typeinfo +# error "__cpp_lib_constexpr_typeinfo should not be defined before c++2b" +# endif + # ifdef __cpp_lib_constexpr_utility # error "__cpp_lib_constexpr_utility should not be defined before c++20" # endif @@ -973,6 +1035,10 @@ # error "__cpp_lib_invoke should not be defined before c++17" # endif +# ifdef __cpp_lib_invoke_r +# error "__cpp_lib_invoke_r should not be defined before c++2b" +# endif + # ifdef __cpp_lib_is_aggregate # error "__cpp_lib_is_aggregate should not be defined before c++17" # endif @@ -1077,6 +1143,10 @@ # error "__cpp_lib_monadic_optional should not be defined before c++2b" # endif +# ifdef __cpp_lib_move_only_function +# error "__cpp_lib_move_only_function should not be defined before c++2b" +# endif + # ifdef __cpp_lib_node_extract # error "__cpp_lib_node_extract should not be defined before c++17" # endif @@ -1100,6 +1170,10 @@ # error "__cpp_lib_optional should not be defined before c++17" # endif +# ifdef __cpp_lib_out_ptr +# error "__cpp_lib_out_ptr should not be defined before c++2b" +# endif + # ifdef __cpp_lib_parallel_algorithm # error "__cpp_lib_parallel_algorithm should not be defined before c++17" # endif @@ -1119,6 +1193,14 @@ # error "__cpp_lib_ranges should not be defined before c++20" # endif +# ifdef __cpp_lib_ranges_starts_ends_with +# error "__cpp_lib_ranges_starts_ends_with should not be defined before c++2b" +# endif + +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined before c++2b" +# endif + # ifdef __cpp_lib_raw_memory_algorithms # error "__cpp_lib_raw_memory_algorithms should not be defined before c++17" # endif @@ -1194,6 +1276,10 @@ # error "__cpp_lib_span should not be defined before c++20" # endif +# ifdef __cpp_lib_spanstream +# error "__cpp_lib_spanstream should not be defined before c++2b" +# endif + # ifdef __cpp_lib_ssize # error "__cpp_lib_ssize should not be defined before c++20" # endif @@ -1277,6 +1363,10 @@ # error "__cpp_lib_tuples_by_type should have the value 201304L in c++14" # endif +# ifdef __cpp_lib_type_identity +# error "__cpp_lib_type_identity should not be defined before c++20" +# endif + # ifdef __cpp_lib_type_trait_variable_templates # error "__cpp_lib_type_trait_variable_templates should not be defined before c++17" # endif @@ -1310,6 +1400,10 @@ # error "__cpp_lib_addressof_constexpr should have the value 201603L in c++17" # endif +# ifdef __cpp_lib_allocate_at_least +# error "__cpp_lib_allocate_at_least should not be defined before c++2b" +# endif + # ifndef __cpp_lib_allocator_traits_is_always_equal # error "__cpp_lib_allocator_traits_is_always_equal should be defined in c++17" # endif @@ -1345,6 +1439,10 @@ # error "__cpp_lib_as_const should have the value 201510L in c++17" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # ifdef __cpp_lib_assume_aligned # error "__cpp_lib_assume_aligned should not be defined before c++20" # endif @@ -1511,6 +1609,10 @@ # error "__cpp_lib_constexpr_tuple should not be defined before c++20" # endif +# ifdef __cpp_lib_constexpr_typeinfo +# error "__cpp_lib_constexpr_typeinfo should not be defined before c++2b" +# endif + # ifdef __cpp_lib_constexpr_utility # error "__cpp_lib_constexpr_utility should not be defined before c++20" # endif @@ -1664,6 +1766,10 @@ # error "__cpp_lib_invoke should have the value 201411L in c++17" # endif +# ifdef __cpp_lib_invoke_r +# error "__cpp_lib_invoke_r should not be defined before c++2b" +# endif + # ifndef __cpp_lib_is_aggregate # error "__cpp_lib_is_aggregate should be defined in c++17" # endif @@ -1807,6 +1913,10 @@ # error "__cpp_lib_monadic_optional should not be defined before c++2b" # endif +# ifdef __cpp_lib_move_only_function +# error "__cpp_lib_move_only_function should not be defined before c++2b" +# endif + # ifndef __cpp_lib_node_extract # error "__cpp_lib_node_extract should be defined in c++17" # endif @@ -1842,6 +1952,10 @@ # error "__cpp_lib_optional should have the value 201606L in c++17" # endif +# ifdef __cpp_lib_out_ptr +# error "__cpp_lib_out_ptr should not be defined before c++2b" +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_parallel_algorithm # error "__cpp_lib_parallel_algorithm should be defined in c++17" @@ -1870,6 +1984,14 @@ # error "__cpp_lib_ranges should not be defined before c++20" # endif +# ifdef __cpp_lib_ranges_starts_ends_with +# error "__cpp_lib_ranges_starts_ends_with should not be defined before c++2b" +# endif + +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined before c++2b" +# endif + # ifndef __cpp_lib_raw_memory_algorithms # error "__cpp_lib_raw_memory_algorithms should be defined in c++17" # endif @@ -1969,6 +2091,10 @@ # error "__cpp_lib_span should not be defined before c++20" # endif +# ifdef __cpp_lib_spanstream +# error "__cpp_lib_spanstream should not be defined before c++2b" +# endif + # ifdef __cpp_lib_ssize # error "__cpp_lib_ssize should not be defined before c++20" # endif @@ -2064,6 +2190,10 @@ # error "__cpp_lib_tuples_by_type should have the value 201304L in c++17" # endif +# ifdef __cpp_lib_type_identity +# error "__cpp_lib_type_identity should not be defined before c++20" +# endif + # ifndef __cpp_lib_type_trait_variable_templates # error "__cpp_lib_type_trait_variable_templates should be defined in c++17" # endif @@ -2112,6 +2242,10 @@ # error "__cpp_lib_addressof_constexpr should have the value 201603L in c++20" # endif +# ifdef __cpp_lib_allocate_at_least +# error "__cpp_lib_allocate_at_least should not be defined before c++2b" +# endif + # ifndef __cpp_lib_allocator_traits_is_always_equal # error "__cpp_lib_allocator_traits_is_always_equal should be defined in c++20" # endif @@ -2147,6 +2281,10 @@ # error "__cpp_lib_as_const should have the value 201510L in c++20" # endif +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined before c++2b" +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_assume_aligned # error "__cpp_lib_assume_aligned should be defined in c++20" @@ -2442,6 +2580,10 @@ # error "__cpp_lib_constexpr_tuple should have the value 201811L in c++20" # endif +# ifdef __cpp_lib_constexpr_typeinfo +# error "__cpp_lib_constexpr_typeinfo should not be defined before c++2b" +# endif + # ifndef __cpp_lib_constexpr_utility # error "__cpp_lib_constexpr_utility should be defined in c++20" # endif @@ -2652,6 +2794,10 @@ # error "__cpp_lib_invoke should have the value 201411L in c++20" # endif +# ifdef __cpp_lib_invoke_r +# error "__cpp_lib_invoke_r should not be defined before c++2b" +# endif + # ifndef __cpp_lib_is_aggregate # error "__cpp_lib_is_aggregate should be defined in c++20" # endif @@ -2849,6 +2995,10 @@ # error "__cpp_lib_monadic_optional should not be defined before c++2b" # endif +# ifdef __cpp_lib_move_only_function +# error "__cpp_lib_move_only_function should not be defined before c++2b" +# endif + # ifndef __cpp_lib_node_extract # error "__cpp_lib_node_extract should be defined in c++20" # endif @@ -2884,6 +3034,10 @@ # error "__cpp_lib_optional should have the value 201606L in c++20" # endif +# ifdef __cpp_lib_out_ptr +# error "__cpp_lib_out_ptr should not be defined before c++2b" +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_parallel_algorithm # error "__cpp_lib_parallel_algorithm should be defined in c++20" @@ -2930,6 +3084,14 @@ # endif # endif +# ifdef __cpp_lib_ranges_starts_ends_with +# error "__cpp_lib_ranges_starts_ends_with should not be defined before c++2b" +# endif + +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined before c++2b" +# endif + # ifndef __cpp_lib_raw_memory_algorithms # error "__cpp_lib_raw_memory_algorithms should be defined in c++20" # endif @@ -3065,6 +3227,10 @@ # error "__cpp_lib_span should have the value 202002L in c++20" # endif +# ifdef __cpp_lib_spanstream +# error "__cpp_lib_spanstream should not be defined before c++2b" +# endif + # ifndef __cpp_lib_ssize # error "__cpp_lib_ssize should be defined in c++20" # endif @@ -3190,6 +3356,13 @@ # error "__cpp_lib_tuples_by_type should have the value 201304L in c++20" # endif +# ifndef __cpp_lib_type_identity +# error "__cpp_lib_type_identity should be defined in c++20" +# endif +# if __cpp_lib_type_identity != 201806L +# error "__cpp_lib_type_identity should have the value 201806L in c++20" +# endif + # ifndef __cpp_lib_type_trait_variable_templates # error "__cpp_lib_type_trait_variable_templates should be defined in c++20" # endif @@ -3241,6 +3414,19 @@ # error "__cpp_lib_addressof_constexpr should have the value 201603L in c++2b" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_allocate_at_least +# error "__cpp_lib_allocate_at_least should be defined in c++2b" +# endif +# if __cpp_lib_allocate_at_least != 202106L +# error "__cpp_lib_allocate_at_least should have the value 202106L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_allocate_at_least +# error "__cpp_lib_allocate_at_least should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_allocator_traits_is_always_equal # error "__cpp_lib_allocator_traits_is_always_equal should be defined in c++2b" # endif @@ -3276,6 +3462,19 @@ # error "__cpp_lib_as_const should have the value 201510L in c++2b" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should be defined in c++2b" +# endif +# if __cpp_lib_associative_heterogeneous_erasure != 202110L +# error "__cpp_lib_associative_heterogeneous_erasure should have the value 202110L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_associative_heterogeneous_erasure +# error "__cpp_lib_associative_heterogeneous_erasure should not be defined because it is unimplemented in libc++!" +# endif +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_assume_aligned # error "__cpp_lib_assume_aligned should be defined in c++2b" @@ -3574,6 +3773,19 @@ # error "__cpp_lib_constexpr_tuple should have the value 201811L in c++2b" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_constexpr_typeinfo +# error "__cpp_lib_constexpr_typeinfo should be defined in c++2b" +# endif +# if __cpp_lib_constexpr_typeinfo != 202106L +# error "__cpp_lib_constexpr_typeinfo should have the value 202106L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_constexpr_typeinfo +# error "__cpp_lib_constexpr_typeinfo should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_constexpr_utility # error "__cpp_lib_constexpr_utility should be defined in c++2b" # endif @@ -3784,6 +3996,19 @@ # error "__cpp_lib_invoke should have the value 201411L in c++2b" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_invoke_r +# error "__cpp_lib_invoke_r should be defined in c++2b" +# endif +# if __cpp_lib_invoke_r != 202106L +# error "__cpp_lib_invoke_r should have the value 202106L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_invoke_r +# error "__cpp_lib_invoke_r should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_is_aggregate # error "__cpp_lib_is_aggregate should be defined in c++2b" # endif @@ -3987,6 +4212,19 @@ # error "__cpp_lib_monadic_optional should have the value 202110L in c++2b" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_move_only_function +# error "__cpp_lib_move_only_function should be defined in c++2b" +# endif +# if __cpp_lib_move_only_function != 202110L +# error "__cpp_lib_move_only_function should have the value 202110L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_move_only_function +# error "__cpp_lib_move_only_function should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_node_extract # error "__cpp_lib_node_extract should be defined in c++2b" # endif @@ -4022,6 +4260,19 @@ # error "__cpp_lib_optional should have the value 201606L in c++2b" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_out_ptr +# error "__cpp_lib_out_ptr should be defined in c++2b" +# endif +# if __cpp_lib_out_ptr != 202106L +# error "__cpp_lib_out_ptr should have the value 202106L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_out_ptr +# error "__cpp_lib_out_ptr should not be defined because it is unimplemented in libc++!" +# endif +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_parallel_algorithm # error "__cpp_lib_parallel_algorithm should be defined in c++2b" @@ -4068,6 +4319,32 @@ # endif # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_ranges_starts_ends_with +# error "__cpp_lib_ranges_starts_ends_with should be defined in c++2b" +# endif +# if __cpp_lib_ranges_starts_ends_with != 202106L +# error "__cpp_lib_ranges_starts_ends_with should have the value 202106L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_ranges_starts_ends_with +# error "__cpp_lib_ranges_starts_ends_with should not be defined because it is unimplemented in libc++!" +# endif +# endif + +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should be defined in c++2b" +# endif +# if __cpp_lib_ranges_zip != 202110L +# error "__cpp_lib_ranges_zip should have the value 202110L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_ranges_zip +# error "__cpp_lib_ranges_zip should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_raw_memory_algorithms # error "__cpp_lib_raw_memory_algorithms should be defined in c++2b" # endif @@ -4203,6 +4480,19 @@ # error "__cpp_lib_span should have the value 202002L in c++2b" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_spanstream +# error "__cpp_lib_spanstream should be defined in c++2b" +# endif +# if __cpp_lib_spanstream != 202106L +# error "__cpp_lib_spanstream should have the value 202106L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_spanstream +# error "__cpp_lib_spanstream should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_ssize # error "__cpp_lib_ssize should be defined in c++2b" # endif @@ -4352,6 +4642,13 @@ # error "__cpp_lib_tuples_by_type should have the value 201304L in c++2b" # endif +# ifndef __cpp_lib_type_identity +# error "__cpp_lib_type_identity should be defined in c++2b" +# endif +# if __cpp_lib_type_identity != 201806L +# error "__cpp_lib_type_identity should have the value 201806L in c++2b" +# endif + # ifndef __cpp_lib_type_trait_variable_templates # error "__cpp_lib_type_trait_variable_templates should be defined in c++2b" # endif diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index a6e9023e30be..92de7b3b1284 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -66,6 +66,11 @@ def add_version_header(tc): "name": "__cpp_lib_addressof_constexpr", "values": { "c++17": 201603 }, "headers": ["memory"], + }, { + "name": "__cpp_lib_allocate_at_least", + "values": { "c++2b": 202106 }, + "headers": ["memory"], + "unimplemented": True, }, { "name": "__cpp_lib_allocator_traits_is_always_equal", "values": { "c++17": 201411 }, @@ -86,6 +91,11 @@ def add_version_header(tc): "name": "__cpp_lib_as_const", "values": { "c++17": 201510 }, "headers": ["utility"], + }, { + "name": "__cpp_lib_associative_heterogeneous_erasure", + "values": { "c++2b": 202110 }, + "headers": ["map", "set", "unordered_map", "unordered_set"], + "unimplemented": True, }, { "name": "__cpp_lib_assume_aligned", "values": { "c++20": 201811 }, @@ -235,6 +245,11 @@ def add_version_header(tc): "name": "__cpp_lib_constexpr_tuple", "values": { "c++20": 201811 }, "headers": ["tuple"], + }, { + "name": "__cpp_lib_constexpr_typeinfo", + "values": { "c++2b": 202106 }, + "headers": ["typeinfo"], + "unimplemented": True, }, { "name": "__cpp_lib_constexpr_utility", "values": { "c++20": 201811 }, @@ -343,6 +358,11 @@ def add_version_header(tc): "name": "__cpp_lib_invoke", "values": { "c++17": 201411 }, "headers": ["functional"], + }, { + "name": "__cpp_lib_invoke_r", + "values": { "c++2b": 202106 }, + "headers": ["functional"], + "unimplemented": True, }, { "name": "__cpp_lib_is_aggregate", "values": { "c++17": 201703 }, @@ -446,6 +466,11 @@ def add_version_header(tc): "name": "__cpp_lib_monadic_optional", "values": { "c++2b": 202110 }, "headers": ["optional"], + }, { + "name": "__cpp_lib_move_only_function", + "values": { "c++2b": 202110 }, + "headers": ["functional"], + "unimplemented": True, }, { "name": "__cpp_lib_node_extract", "values": { "c++17": 201606 }, @@ -466,6 +491,11 @@ def add_version_header(tc): "name": "__cpp_lib_optional", "values": { "c++17": 201606 }, "headers": ["optional"], + }, { + "name": "__cpp_lib_out_ptr", + "values": { "c++2b": 202106 }, + "headers": ["memory"], + "unimplemented": True, }, { "name": "__cpp_lib_parallel_algorithm", "values": { "c++17": 201603 }, @@ -485,6 +515,16 @@ def add_version_header(tc): "values": { "c++20": 201811 }, "headers": ["algorithm", "functional", "iterator", "memory", "ranges"], "unimplemented": True, + }, { + "name": "__cpp_lib_ranges_starts_ends_with", + "values": { "c++2b": 202106 }, + "headers": ["algorithm"], + "unimplemented": True, + }, { + "name": "__cpp_lib_ranges_zip", + "values": { "c++2b": 202110 }, + "headers": ["ranges", "tuple", "utility"], + "unimplemented": True, }, { "name": "__cpp_lib_raw_memory_algorithms", "values": { "c++17": 201606 }, @@ -553,6 +593,11 @@ def add_version_header(tc): "name": "__cpp_lib_span", "values": { "c++20": 202002 }, "headers": ["span"], + }, { + "name": "__cpp_lib_spanstream", + "values": { "c++2b": 202106 }, + "headers": ["spanstream"], + "unimplemented": True, }, { "name": "__cpp_lib_ssize", "values": { "c++20": 201902 }, @@ -626,6 +671,10 @@ def add_version_header(tc): "name": "__cpp_lib_tuples_by_type", "values": { "c++14": 201304 }, "headers": ["tuple", "utility"], + }, { + "name": "__cpp_lib_type_identity", + "values": { "c++20": 201806 }, + "headers": ["type_traits"], }, { "name": "__cpp_lib_type_trait_variable_templates", "values": { "c++17": 201510 }, From dd7d5bc5bd46808802aa586a7280a5441acbe923 Mon Sep 17 00:00:00 2001 From: V Donaldson Date: Tue, 4 Jan 2022 12:54:45 -0800 Subject: [PATCH 583/992] [flang] Modify an IO format error message F18 constraint C1308 is: For the G edit descriptor, e shall not be specified if w is zero. For an edit descriptor such as 'G0.2E4', change the error message from: error: Unexpected 'e' in 'G0' edit descriptor To: error: A 'G0' edit descriptor must not have an 'e' value --- flang/include/flang/Common/format.h | 4 ++-- flang/test/Semantics/io07.f90 | 4 ++-- flang/test/Semantics/io08.f90 | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/flang/include/flang/Common/format.h b/flang/include/flang/Common/format.h index e38ea6b0dfed..7ca3faa79f29 100644 --- a/flang/include/flang/Common/format.h +++ b/flang/include/flang/Common/format.h @@ -606,8 +606,8 @@ template bool FormatValidator::Check() { check_e(); } } else if (token_.kind() == TokenKind::Point && check_d() && - token_.kind() == TokenKind::E) { - ReportError("Unexpected 'e' in 'G0' edit descriptor"); // C1308 + token_.kind() == TokenKind::E) { // C1308 + ReportError("A 'G0' edit descriptor must not have an 'e' value"); NextToken(); if (token_.kind() == TokenKind::UnsignedInteger) { NextToken(); diff --git a/flang/test/Semantics/io07.f90 b/flang/test/Semantics/io07.f90 index 5c4c2b419d97..9b1f048e5b17 100644 --- a/flang/test/Semantics/io07.f90 +++ b/flang/test/Semantics/io07.f90 @@ -74,9 +74,9 @@ 8001 format(9G0.5) - !ERROR: Unexpected 'e' in 'G0' edit descriptor + !ERROR: A 'G0' edit descriptor must not have an 'e' value 8101 format(9(G0.5e1)) - !ERROR: Unexpected 'e' in 'G0' edit descriptor + !ERROR: A 'G0' edit descriptor must not have an 'e' value 8102 format(9(G0.5 E 1)) end diff --git a/flang/test/Semantics/io08.f90 b/flang/test/Semantics/io08.f90 index c074e1562e22..843028acfd5b 100644 --- a/flang/test/Semantics/io08.f90 +++ b/flang/test/Semantics/io08.f90 @@ -189,10 +189,10 @@ !ERROR: Expected 'G' edit descriptor '.d' value write(*,'(G4)') - !ERROR: Unexpected 'e' in 'G0' edit descriptor + !ERROR: A 'G0' edit descriptor must not have an 'e' value write(*,'(G0.8e)') - !ERROR: Unexpected 'e' in 'G0' edit descriptor + !ERROR: A 'G0' edit descriptor must not have an 'e' value write(*,'(G0.8e2)') !ERROR: Kind parameter '_' character in format expression From 090f8ec8a8dc31ac4402f345ed79462de9b3dc01 Mon Sep 17 00:00:00 2001 From: Brendon Cahoon Date: Tue, 4 Jan 2022 14:34:15 -0800 Subject: [PATCH 584/992] [Hexagon] Fix some issues with packetizing slot0-only instructions --- .../Target/Hexagon/HexagonVLIWPacketizer.cpp | 35 ++++++++----------- .../Target/Hexagon/HexagonVLIWPacketizer.h | 1 - 2 files changed, 14 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 0f736a189245..e9b658d18175 100644 --- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -886,7 +886,8 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI, // Create a dot new machine instruction to see if resources can be // allocated. If not, bail out now. - int NewOpcode = HII->getDotNewOp(MI); + int NewOpcode = (RC != &Hexagon::PredRegsRegClass) ? HII->getDotNewOp(MI) : + HII->getDotNewPredOp(MI, MBPI); const MCInstrDesc &D = HII->get(NewOpcode); MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc()); bool ResourcesAvailable = ResourceTracker->canReserveResources(*NewMI); @@ -1107,6 +1108,11 @@ static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ, HII.isHVXMemWithAIndirect(MI, MJ)) return true; + // Don't allow a store and an instruction that must be in slot0 and + // doesn't allow a slot1 instruction. + if (MI.mayStore() && HII.isRestrictNoSlot1Store(MJ) && HII.isPureSlot0(MJ)) + return true; + // An inline asm cannot be together with a branch, because we may not be // able to remove the asm out after packetizing (i.e. if the asm must be // moved past the bundle). Similarly, two asms cannot be together to avoid @@ -1526,6 +1532,13 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { bool IsVecJ = HII->isHVXVec(J); bool IsVecI = HII->isHVXVec(I); + // Don't reorder the loads if there is an order dependence. This would + // occur if the first instruction must go in slot0. + if (LoadJ && LoadI && HII->isPureSlot0(J)) { + FoundSequentialDependence = true; + break; + } + if (Slot1Store && MF.getSubtarget().hasV65Ops() && ((LoadJ && StoreI && !NVStoreI) || (StoreJ && LoadI && !NVStoreJ)) && @@ -1821,14 +1834,6 @@ bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) { if (Minimal) return false; - // Constrainst for not packetizing this MI with existing instructions in a - // packet. - // MI is a store instruction. - // CurrentPacketMIs has a SLOT0 only instruction with constraint - // A_RESTRICT_NOSLOT1_STORE/isRestrictNoSlot1Store. - if (MI.mayStore() && isPureSlot0InsnWithNoSlot1Store(MI)) - return false; - if (producesStall(MI)) return false; @@ -1868,18 +1873,6 @@ bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) { return true; } -bool HexagonPacketizerList::isPureSlot0InsnWithNoSlot1Store( - const MachineInstr &MI) { - bool noSlot1Store = false; - bool isSlot0Only = false; - for (auto J : CurrentPacketMIs) { - noSlot1Store |= HII->isRestrictNoSlot1Store(*J); - isSlot0Only |= HII->isPureSlot0(*J); - } - - return (noSlot1Store && isSlot0Only); -} - // V60 forward scheduling. unsigned int HexagonPacketizerList::calcStall(const MachineInstr &I) { // Check whether the previous packet is in a different loop. If this is the diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h index 5d1b6d6faa12..6a709e566f86 100644 --- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h +++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h @@ -159,7 +159,6 @@ class HexagonPacketizerList : public VLIWPacketizerList { bool hasRegMaskDependence(const MachineInstr &I, const MachineInstr &J); bool hasDualStoreDependence(const MachineInstr &I, const MachineInstr &J); bool producesStall(const MachineInstr &MI); - bool isPureSlot0InsnWithNoSlot1Store(const MachineInstr &MI); unsigned int calcStall(const MachineInstr &MI); }; From 6d722801d1a2cd1af8e139c8052443feb62c0eae Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Tue, 4 Jan 2022 19:55:37 +0100 Subject: [PATCH 585/992] [libc++][ranges] Add indirectly_comparable concept Add `indirectly_comparable` concept Reviewed By: Quuxplusone, Mordante, #libc Spies: mgorny, libcxx-commits Differential Revision: https://reviews.llvm.org/D116268 --- libcxx/docs/Status/RangesPaper.csv | 2 +- libcxx/include/CMakeLists.txt | 1 + .../__iterator/indirectly_comparable.h | 30 +++++++++++ libcxx/include/iterator | 6 +++ libcxx/include/module.modulemap | 1 + .../indirectly_comparable.module.verify.cpp | 15 ++++++ .../indirectly_comparable.compile.pass.cpp | 51 +++++++++++++++++++ 7 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 libcxx/include/__iterator/indirectly_comparable.h create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/iterator/indirectly_comparable.module.verify.cpp create mode 100644 libcxx/test/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_comparable.compile.pass.cpp diff --git a/libcxx/docs/Status/RangesPaper.csv b/libcxx/docs/Status/RangesPaper.csv index ed1900bd1244..2c972f27e004 100644 --- a/libcxx/docs/Status/RangesPaper.csv +++ b/libcxx/docs/Status/RangesPaper.csv @@ -63,7 +63,7 @@ Section,Description,Dependencies,Assignee,Complete | indirectly_copyable_storable",[iterator.concepts],Zoe Carver,In progress [common.alg.req]: pt. 2,indirectly_swappable,"| [iterator.concepts] | [iterator.cust.swap]",Zoe Carver,✅ -[common.alg.req]: pt. 3,indirectly_comparable,[projected],Louis Dionne,Not started +[common.alg.req]: pt. 3,indirectly_comparable,[projected],Nikolas Klauser,✅ [common.alg.req]: pt. 4,"| permutable | mergeable | sortable",[iterator.concepts],Unassigned,Not started diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 180f7d4259cd..2354244a5f5e 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -215,6 +215,7 @@ set(files __iterator/erase_if_container.h __iterator/front_insert_iterator.h __iterator/incrementable_traits.h + __iterator/indirectly_comparable.h __iterator/insert_iterator.h __iterator/istream_iterator.h __iterator/istreambuf_iterator.h diff --git a/libcxx/include/__iterator/indirectly_comparable.h b/libcxx/include/__iterator/indirectly_comparable.h new file mode 100644 index 000000000000..3129b2dcf65e --- /dev/null +++ b/libcxx/include/__iterator/indirectly_comparable.h @@ -0,0 +1,30 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ITERATOR_INDIRECTLY_COMPARABLE_H +#define _LIBCPP___ITERATOR_INDIRECTLY_COMPARABLE_H + +#include <__config> +#include <__functional/identity.h> +#include <__iterator/concepts.h> +#include <__iterator/projected.h> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#ifndef _LIBCPP_HAS_NO_RANGES + +template +concept indirectly_comparable = + indirect_binary_predicate<_Rp, projected<_I1, _P1>, projected<_I2, _P2>>; + +#endif // _LIBCPP_HAS_NO_RANGES + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___ITERATOR_INDIRECTLY_COMPARABLE_H diff --git a/libcxx/include/iterator b/libcxx/include/iterator index 4dd9902d79a2..be223192de2d 100644 --- a/libcxx/include/iterator +++ b/libcxx/include/iterator @@ -140,6 +140,11 @@ template template concept indirectly_swappable = see below; // since C++20 +template + concept indirectly_comparable = + indirect_binary_predicate, projected>; // since C++20 + template S> requires (!same_as && copyable) class common_iterator; // since C++20 @@ -593,6 +598,7 @@ template constexpr const E* data(initializer_list il) noexcept; #include <__iterator/erase_if_container.h> #include <__iterator/front_insert_iterator.h> #include <__iterator/incrementable_traits.h> +#include <__iterator/indirectly_comparable.h> #include <__iterator/insert_iterator.h> #include <__iterator/istreambuf_iterator.h> #include <__iterator/istream_iterator.h> diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 1dc6db406a74..8eba28b0efb3 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -594,6 +594,7 @@ module std [system] { module erase_if_container { private header "__iterator/erase_if_container.h" } module front_insert_iterator { private header "__iterator/front_insert_iterator.h" } module incrementable_traits { private header "__iterator/incrementable_traits.h" } + module indirectly_comparable { private header "__iterator/indirectly_comparable.h" } module insert_iterator { private header "__iterator/insert_iterator.h" } module istream_iterator { private header "__iterator/istream_iterator.h" } module istreambuf_iterator { private header "__iterator/istreambuf_iterator.h" } diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/iterator/indirectly_comparable.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/iterator/indirectly_comparable.module.verify.cpp new file mode 100644 index 000000000000..7115f7241e89 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/iterator/indirectly_comparable.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__iterator/indirectly_comparable.h'}} +#include <__iterator/indirectly_comparable.h> diff --git a/libcxx/test/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_comparable.compile.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_comparable.compile.pass.cpp new file mode 100644 index 000000000000..ff415cc4ea32 --- /dev/null +++ b/libcxx/test/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_comparable.compile.pass.cpp @@ -0,0 +1,51 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-no-concepts + +// template +// concept indirectly_­comparable; + +#include +#include +#include + +struct Deref { + int operator()(int*) const; +}; + +static_assert(!std::indirectly_comparable>); // not dereferenceable +static_assert(!std::indirectly_comparable); // not a predicate +static_assert( std::indirectly_comparable>); +static_assert(!std::indirectly_comparable>); +static_assert( std::indirectly_comparable, Deref>); +static_assert(!std::indirectly_comparable, Deref, Deref>); +static_assert(!std::indirectly_comparable, std::identity, Deref>); +static_assert( std::indirectly_comparable, std::identity, Deref>); + +template + requires std::indirectly_comparable + && true // This true is an additional atomic constraint as a tie breaker +constexpr bool subsumes(F) { return true; } + +template + requires std::indirect_binary_predicate, std::projected> +void subsumes(F); + +template + requires std::indirect_binary_predicate, std::projected> + && true // This true is an additional atomic constraint as a tie breaker +constexpr bool is_subsumed(F) { return true; } + +template + requires std::indirectly_comparable +void is_subsumed(F); + +static_assert(subsumes(std::less())); +static_assert(is_subsumed(std::less())); From f61b658d7d1d39dcb82ec6c2f993f61240025486 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 4 Jan 2022 22:44:20 +0000 Subject: [PATCH 586/992] [gn build] Port 6d722801d1a2 --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 210041831574..31f243616dfa 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -275,6 +275,7 @@ if (current_toolchain == default_toolchain) { "__iterator/erase_if_container.h", "__iterator/front_insert_iterator.h", "__iterator/incrementable_traits.h", + "__iterator/indirectly_comparable.h", "__iterator/insert_iterator.h", "__iterator/istream_iterator.h", "__iterator/istreambuf_iterator.h", From 1e1e97a3267325487ffae4a16fdaaec58931ca86 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Tue, 4 Jan 2022 23:28:21 +0100 Subject: [PATCH 587/992] [libc++][test] Allow multiple tries in some thread tests --- .../thread/thread.threads/thread.thread.this/sleep_for.pass.cpp | 2 ++ .../thread.thread.this/sleep_for.signals.pass.cpp | 2 ++ .../std/thread/futures/futures.unique_future/wait_for.pass.cpp | 2 ++ .../thread.sharedtimedmutex.class/lock.pass.cpp | 2 ++ .../thread.sharedtimedmutex.class/lock_shared.pass.cpp | 2 ++ 5 files changed, 10 insertions(+) diff --git a/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp b/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp index 474520c33929..fed874da3224 100644 --- a/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp @@ -13,6 +13,8 @@ // corresponding system libraries. // UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} +// ALLOW_RETRIES: 3 + // // template diff --git a/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.signals.pass.cpp b/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.signals.pass.cpp index 3cdb6735218b..7024c9d8e686 100644 --- a/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.signals.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.signals.pass.cpp @@ -16,6 +16,8 @@ // libraries. // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} +// ALLOW_RETRIES: 3 + // // template diff --git a/libcxx/test/std/thread/futures/futures.unique_future/wait_for.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/wait_for.pass.cpp index 2a04ade2592e..ff5d979e8c14 100644 --- a/libcxx/test/std/thread/futures/futures.unique_future/wait_for.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.unique_future/wait_for.pass.cpp @@ -9,6 +9,8 @@ // UNSUPPORTED: libcpp-has-no-threads // UNSUPPORTED: c++03 +// ALLOW_RETRIES: 3 + // // class future diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp index f0bdf0dfa75e..def36ca5e35f 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp @@ -12,6 +12,8 @@ // shared_timed_mutex was introduced in macosx10.12 // UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} +// ALLOW_RETRIES: 3 + // // class shared_timed_mutex; diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp index d8a02a6bc4c0..aad2c43473be 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp @@ -12,6 +12,8 @@ // shared_timed_mutex was introduced in macosx10.12 // UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} +// ALLOW_RETRIES: 3 + // // class shared_timed_mutex; From d496abbe2a03721d5d22a697a3a1c5961a55f7f2 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 4 Jan 2022 15:11:44 -0800 Subject: [PATCH 588/992] [lld-link] Replace LazyObjFile with lazy ObjFile/BitcodeFile Similar to ELF 3a5fb57393c3bc77be9e7afc2ec9d4ec3c9bbf70. * previously when a LazyObjFile was extracted, a new ObjFile/BitcodeFile was created; now the file is reused, just with `lazy` cleared * avoid the confusing transfer of `symbols` from LazyObjFile to the new file * simpler code, smaller executable (5200+ bytes smaller on x86-64) * make eager parsing feasible (for parallel section/symbol table initialization) Reviewed By: aganea, rnk Differential Revision: https://reviews.llvm.org/D116434 --- lld/COFF/Driver.cpp | 13 ++++--------- lld/COFF/InputFiles.cpp | 41 ++++++++++------------------------------ lld/COFF/InputFiles.h | 40 ++++++++++++--------------------------- lld/COFF/SymbolTable.cpp | 36 ++++++++++++++++++++++------------- lld/COFF/SymbolTable.h | 2 +- lld/COFF/Symbols.h | 5 ++--- 6 files changed, 52 insertions(+), 85 deletions(-) diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 07b60673577e..6178d328e3f5 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -208,17 +208,11 @@ void LinkerDriver::addBuffer(std::unique_ptr mb, ctx.symtab.addFile(make(ctx, mbref)); break; case file_magic::bitcode: - if (lazy) - ctx.symtab.addFile(make(ctx, mbref)); - else - ctx.symtab.addFile(make(ctx, mbref, "", 0)); + ctx.symtab.addFile(make(ctx, mbref, "", 0, lazy)); break; case file_magic::coff_object: case file_magic::coff_import_library: - if (lazy) - ctx.symtab.addFile(make(ctx, mbref)); - else - ctx.symtab.addFile(make(ctx, mbref)); + ctx.symtab.addFile(make(ctx, mbref, lazy)); break; case file_magic::pdb: ctx.symtab.addFile(make(ctx, mbref)); @@ -282,7 +276,8 @@ void LinkerDriver::addArchiveBuffer(MemoryBufferRef mb, StringRef symName, if (magic == file_magic::coff_object) { obj = make(ctx, mb); } else if (magic == file_magic::bitcode) { - obj = make(ctx, mb, parentName, offsetInArchive); + obj = + make(ctx, mb, parentName, offsetInArchive, /*lazy=*/false); } else { error("unknown file type: " + mb.getBufferIdentifier()); return; diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index 4b38e3d1a99b..06eec40d3a22 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -135,31 +135,7 @@ std::vector lld::coff::getArchiveMembers(Archive *file) { return v; } -void LazyObjFile::fetch() { - if (mb.getBuffer().empty()) - return; - - InputFile *file; - if (isBitcode(mb)) - file = make(ctx, mb, "", 0, std::move(symbols)); - else - file = make(ctx, mb, std::move(symbols)); - mb = {}; - ctx.symtab.addFile(file); -} - -void LazyObjFile::parse() { - if (isBitcode(this->mb)) { - // Bitcode file. - std::unique_ptr obj = - CHECK(lto::InputFile::create(this->mb), this); - for (const lto::InputFile::Symbol &sym : obj->symbols()) { - if (!sym.isUndefined()) - ctx.symtab.addLazyObject(this, sym.getName()); - } - return; - } - +void ObjFile::parseLazy() { // Native object file. std::unique_ptr coffObjPtr = CHECK(createBinary(mb), this); COFFObjectFile *coffObj = cast(coffObjPtr.get()); @@ -1005,14 +981,10 @@ void ImportFile::parse() { name, cast_or_null(impSym), hdr->Machine); } -BitcodeFile::BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb, - StringRef archiveName, uint64_t offsetInArchive) - : BitcodeFile(ctx, mb, archiveName, offsetInArchive, {}) {} - BitcodeFile::BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb, StringRef archiveName, uint64_t offsetInArchive, - std::vector &&symbols) - : InputFile(ctx, BitcodeKind, mb), symbols(std::move(symbols)) { + bool lazy) + : InputFile(ctx, BitcodeKind, mb, lazy) { std::string path = mb.getBufferIdentifier().str(); if (config->thinLTOIndexOnly) path = replaceThinLTOSuffix(mb.getBufferIdentifier()); @@ -1107,6 +1079,13 @@ void BitcodeFile::parse() { directives = obj->getCOFFLinkerOpts(); } +void BitcodeFile::parseLazy() { + std::unique_ptr obj = CHECK(lto::InputFile::create(mb), this); + for (const lto::InputFile::Symbol &sym : obj->symbols()) + if (!sym.isUndefined()) + ctx.symtab.addLazyObject(this, sym.getName()); +} + MachineTypes BitcodeFile::getMachineType() { switch (Triple(obj->getTargetTriple()).getArch()) { case Triple::x86_64: diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h index 801c668d3ae4..2cabb54cb386 100644 --- a/lld/COFF/InputFiles.h +++ b/lld/COFF/InputFiles.h @@ -95,13 +95,17 @@ class InputFile { COFFLinkerContext &ctx; protected: - InputFile(COFFLinkerContext &c, Kind k, MemoryBufferRef m) - : mb(m), ctx(c), fileKind(k) {} + InputFile(COFFLinkerContext &c, Kind k, MemoryBufferRef m, bool lazy = false) + : mb(m), ctx(c), fileKind(k), lazy(lazy) {} StringRef directives; private: const Kind fileKind; + +public: + // True if this is a lazy ObjFile or BitcodeFile. + bool lazy = false; }; // .lib or .a file. @@ -121,33 +125,14 @@ class ArchiveFile : public InputFile { llvm::DenseSet seen; }; -// .obj or .o file between -start-lib and -end-lib. -class LazyObjFile : public InputFile { -public: - explicit LazyObjFile(COFFLinkerContext &ctx, MemoryBufferRef m) - : InputFile(ctx, LazyObjectKind, m) {} - static bool classof(const InputFile *f) { - return f->kind() == LazyObjectKind; - } - // Makes this object file part of the link. - void fetch(); - // Adds the symbols in this file to the symbol table as LazyObject symbols. - void parse() override; - -private: - std::vector symbols; -}; - // .obj or .o file. This may be a member of an archive file. class ObjFile : public InputFile { public: - explicit ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m) - : InputFile(ctx, ObjectKind, m) {} - explicit ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m, - std::vector &&symbols) - : InputFile(ctx, ObjectKind, m), symbols(std::move(symbols)) {} + explicit ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy = false) + : InputFile(ctx, ObjectKind, m, lazy) {} static bool classof(const InputFile *f) { return f->kind() == ObjectKind; } void parse() override; + void parseLazy(); MachineTypes getMachineType() override; ArrayRef getChunks() { return chunks; } ArrayRef getDebugChunks() { return debugChunks; } @@ -380,15 +365,14 @@ class ImportFile : public InputFile { // Used for LTO. class BitcodeFile : public InputFile { public: - BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb, StringRef archiveName, - uint64_t offsetInArchive); - explicit BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef m, + explicit BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb, StringRef archiveName, uint64_t offsetInArchive, - std::vector &&symbols); + bool lazy); ~BitcodeFile(); static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } ArrayRef getSymbols() { return symbols; } MachineTypes getMachineType() override; + void parseLazy(); std::unique_ptr obj; private: diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index 679c91ad06e6..9ceac7af7f91 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -37,7 +37,21 @@ StringRef ltrim1(StringRef s, const char *chars) { void SymbolTable::addFile(InputFile *file) { log("Reading " + toString(file)); - file->parse(); + if (file->lazy) { + if (auto *f = dyn_cast(file)) + f->parseLazy(); + else + cast(file)->parseLazy(); + } else { + file->parse(); + if (auto *f = dyn_cast(file)) { + ctx.objFileInstances.push_back(f); + } else if (auto *f = dyn_cast(file)) { + ctx.bitcodeFileInstances.push_back(f); + } else if (auto *f = dyn_cast(file)) { + ctx.importFileInstances.push_back(f); + } + } MachineTypes mt = file->getMachineType(); if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) { @@ -48,14 +62,6 @@ void SymbolTable::addFile(InputFile *file) { return; } - if (auto *f = dyn_cast(file)) { - ctx.objFileInstances.push_back(f); - } else if (auto *f = dyn_cast(file)) { - ctx.bitcodeFileInstances.push_back(f); - } else if (auto *f = dyn_cast(file)) { - ctx.importFileInstances.push_back(f); - } - driver->parseDirectives(file); } @@ -75,9 +81,11 @@ static void forceLazy(Symbol *s) { l->file->addMember(l->sym); break; } - case Symbol::Kind::LazyObjectKind: - cast(s)->file->fetch(); + case Symbol::Kind::LazyObjectKind: { + InputFile *file = cast(s)->file; + file->ctx.symtab.addFile(file); break; + } case Symbol::Kind::LazyDLLSymbolKind: { auto *l = cast(s); l->file->makeImport(l->sym); @@ -562,7 +570,8 @@ void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) { f->addMember(sym); } -void SymbolTable::addLazyObject(LazyObjFile *f, StringRef n) { +void SymbolTable::addLazyObject(InputFile *f, StringRef n) { + assert(f->lazy); Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, f); @@ -574,7 +583,8 @@ void SymbolTable::addLazyObject(LazyObjFile *f, StringRef n) { if (!u || u->weakAlias || s->pendingArchiveLoad) return; s->pendingArchiveLoad = true; - f->fetch(); + f->lazy = false; + addFile(f); } void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym, diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h index 3e76b416d1a0..47f3238fd75b 100644 --- a/lld/COFF/SymbolTable.h +++ b/lld/COFF/SymbolTable.h @@ -91,7 +91,7 @@ class SymbolTable { Symbol *addUndefined(StringRef name, InputFile *f, bool isWeakAlias); void addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym); - void addLazyObject(LazyObjFile *f, StringRef n); + void addLazyObject(InputFile *f, StringRef n); void addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym, StringRef n); Symbol *addAbsolute(StringRef n, COFFSymbolRef s); Symbol *addRegular(InputFile *f, StringRef n, diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h index bb911171b1f5..c8865d128fb8 100644 --- a/lld/COFF/Symbols.h +++ b/lld/COFF/Symbols.h @@ -305,10 +305,9 @@ class LazyArchive : public Symbol { class LazyObject : public Symbol { public: - LazyObject(LazyObjFile *f, StringRef n) - : Symbol(LazyObjectKind, n), file(f) {} + LazyObject(InputFile *f, StringRef n) : Symbol(LazyObjectKind, n), file(f) {} static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; } - LazyObjFile *file; + InputFile *file; }; // MinGW only. From 9e365fe326d694a05775dd166e21352e9529bd1d Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Tue, 4 Jan 2022 15:37:33 -0800 Subject: [PATCH 589/992] [mlir] Retain metadata for single loc fusedloc If a fusedloc is created with a single location then no fusedloc was previously created and single location returned instead. In the case where there is a metadata associated with the location this results in discarding the metadata. Instead only canonicalize where there is no loss of information. Differential Revision: https://reviews.llvm.org/D115605 --- mlir/lib/Bindings/Python/IRCore.cpp | 2 -- mlir/lib/IR/Location.cpp | 16 ++++++++++++---- mlir/test/IR/locations.mlir | 4 ++++ mlir/test/python/ir/location.py | 20 ++++++++++++++++++++ 4 files changed, 36 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Bindings/Python/IRCore.cpp b/mlir/lib/Bindings/Python/IRCore.cpp index 1a7eb46f7529..1a9604882fe0 100644 --- a/mlir/lib/Bindings/Python/IRCore.cpp +++ b/mlir/lib/Bindings/Python/IRCore.cpp @@ -2363,8 +2363,6 @@ void mlir::python::populateIRCore(py::module &m) { [](const std::vector &pyLocations, llvm::Optional metadata, DefaultingPyMlirContext context) { - if (pyLocations.empty()) - throw py::value_error("No locations provided"); llvm::SmallVector locations; locations.reserve(pyLocations.size()); for (auto &pyLocation : pyLocations) diff --git a/mlir/lib/IR/Location.cpp b/mlir/lib/IR/Location.cpp index 1de4d73bbe4f..ce88b244a90d 100644 --- a/mlir/lib/IR/Location.cpp +++ b/mlir/lib/IR/Location.cpp @@ -106,10 +106,18 @@ Location FusedLoc::get(ArrayRef locs, Attribute metadata, } locs = decomposedLocs.getArrayRef(); - // Handle the simple cases of less than two locations. - if (locs.empty()) - return UnknownLoc::get(context); - if (locs.size() == 1) + // Handle the simple cases of less than two locations. Ensure the metadata (if + // provided) is not dropped. + if (locs.empty()) { + if (!metadata) + return UnknownLoc::get(context); + // TODO: Investigate ASAN failure when using implicit conversion from + // Location to ArrayRef below. + return Base::get(context, ArrayRef{UnknownLoc::get(context)}, + metadata); + } + if (locs.size() == 1 && !metadata) return locs.front(); + return Base::get(context, locs, metadata); } diff --git a/mlir/test/IR/locations.mlir b/mlir/test/IR/locations.mlir index 0016c3ec6611..f6c4f21cfd7c 100644 --- a/mlir/test/IR/locations.mlir +++ b/mlir/test/IR/locations.mlir @@ -21,6 +21,10 @@ func @inline_notation() -> i32 { affine.if #set0(%2) { } loc(fused<"myPass">["foo", "foo2"]) + // CHECK: } loc(fused<"myPass">["foo"]) + affine.if #set0(%2) { + } loc(fused<"myPass">["foo"]) + // CHECK: return %0 : i32 loc(unknown) return %1 : i32 loc(unknown) } diff --git a/mlir/test/python/ir/location.py b/mlir/test/python/ir/location.py index 1c13c4870cbe..ecdd02efb0ae 100644 --- a/mlir/test/python/ir/location.py +++ b/mlir/test/python/ir/location.py @@ -78,12 +78,20 @@ def testCallSite(): # CHECK-LABEL: TEST: testFused def testFused(): with Context() as ctx: + loc_single = Location.fused([Location.name("apple")]) loc = Location.fused( [Location.name("apple"), Location.name("banana")]) attr = Attribute.parse('"sauteed"') loc_attr = Location.fused([Location.name("carrot"), Location.name("potatoes")], attr) + loc_empty = Location.fused([]) + loc_empty_attr = Location.fused([], attr) + loc_single_attr = Location.fused([Location.name("apple")], attr) ctx = None + # CHECK: file str: loc("apple") + print("file str:", str(loc_single)) + # CHECK: file repr: loc("apple") + print("file repr:", repr(loc_single)) # CHECK: file str: loc(fused["apple", "banana"]) print("file str:", str(loc)) # CHECK: file repr: loc(fused["apple", "banana"]) @@ -92,6 +100,18 @@ def testFused(): print("file str:", str(loc_attr)) # CHECK: file repr: loc(fused<"sauteed">["carrot", "potatoes"]) print("file repr:", repr(loc_attr)) + # CHECK: file str: loc(unknown) + print("file str:", str(loc_empty)) + # CHECK: file repr: loc(unknown) + print("file repr:", repr(loc_empty)) + # CHECK: file str: loc(fused<"sauteed">[unknown]) + print("file str:", str(loc_empty_attr)) + # CHECK: file repr: loc(fused<"sauteed">[unknown]) + print("file repr:", repr(loc_empty_attr)) + # CHECK: file str: loc(fused<"sauteed">["apple"]) + print("file str:", str(loc_single_attr)) + # CHECK: file repr: loc(fused<"sauteed">["apple"]) + print("file repr:", repr(loc_single_attr)) run(testFused) From 60944d132fe35b774017b7ad05edb55642509642 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 4 Jan 2022 15:37:05 -0800 Subject: [PATCH 590/992] [Hexagon] Convert codegen testcase from .ll to .mir --- .../Target/Hexagon/HexagonCopyToCombine.cpp | 4 +-- .../Target/Hexagon/HexagonTargetMachine.cpp | 2 ++ llvm/test/CodeGen/Hexagon/const64.ll | 18 ------------ .../Hexagon/copy-to-combine-const64.mir | 29 +++++++++++++++++++ 4 files changed, 32 insertions(+), 21 deletions(-) delete mode 100644 llvm/test/CodeGen/Hexagon/const64.ll create mode 100644 llvm/test/CodeGen/Hexagon/copy-to-combine-const64.mir diff --git a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp index 03b0f75b2dc1..2ee7f1325df9 100644 --- a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -70,9 +70,7 @@ class HexagonCopyToCombine : public MachineFunctionPass { public: static char ID; - HexagonCopyToCombine() : MachineFunctionPass(ID) { - initializeHexagonCopyToCombinePass(*PassRegistry::getPassRegistry()); - } + HexagonCopyToCombine() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index fcf829b522cc..c6703bb8a62a 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -139,6 +139,7 @@ namespace llvm { void initializeHexagonBitSimplifyPass(PassRegistry&); void initializeHexagonConstExtendersPass(PassRegistry&); void initializeHexagonConstPropagationPass(PassRegistry&); + void initializeHexagonCopyToCombinePass(PassRegistry&); void initializeHexagonEarlyIfConversionPass(PassRegistry&); void initializeHexagonExpandCondsetsPass(PassRegistry&); void initializeHexagonGenMuxPass(PassRegistry&); @@ -199,6 +200,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonTarget() { initializeHexagonBitSimplifyPass(PR); initializeHexagonConstExtendersPass(PR); initializeHexagonConstPropagationPass(PR); + initializeHexagonCopyToCombinePass(PR); initializeHexagonEarlyIfConversionPass(PR); initializeHexagonGenMuxPass(PR); initializeHexagonHardwareLoopsPass(PR); diff --git a/llvm/test/CodeGen/Hexagon/const64.ll b/llvm/test/CodeGen/Hexagon/const64.ll deleted file mode 100644 index 018157d97024..000000000000 --- a/llvm/test/CodeGen/Hexagon/const64.ll +++ /dev/null @@ -1,18 +0,0 @@ -; RUN: llc -march=hexagon -disable-const64=0 < %s | FileCheck %s -; RUN: llc -march=hexagon -disable-const64=1 < %s | FileCheck %s --check-prefix=CHECKOLD - -; CHECK: CONST64 -; CHECKOLD-NOT: CONST64 - -target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32" -target triple = "hexagon" - -; Function Attrs: nounwind -define void @foo() optsize { -entry: - call void @bar(i32 32768, i32 32768, i8 zeroext 1) - ret void -} - -declare void @bar(i32, i32, i8 zeroext) - diff --git a/llvm/test/CodeGen/Hexagon/copy-to-combine-const64.mir b/llvm/test/CodeGen/Hexagon/copy-to-combine-const64.mir new file mode 100644 index 000000000000..d20d7692e861 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/copy-to-combine-const64.mir @@ -0,0 +1,29 @@ +# RUN: llc -march=hexagon -run-pass hexagon-copy-combine -o - %s -disable-const64=0 | FileCheck --check-prefix CHECK64 %s +# RUN: llc -march=hexagon -run-pass hexagon-copy-combine -o - %s -disable-const64=1 | FileCheck --check-prefix CHECKNO64 %s + +# CHECK64: CONST64 +# CHECKNO64-NOT: CONST64 + +--- | + define void @f0() optsize { + entry: + call void @f1(i32 32768, i32 32768, i8 zeroext 1) + ret void + } + + declare void @f1(i32, i32, i8 zeroext) +... + +name: f0 +tracksRegLiveness: true +stack: + - { id: 0, offset: 0, size: 4, alignment: 8 } +body: | + bb.0: + $r29 = S2_allocframe $r29, 0, implicit-def $r30, implicit $framekey, implicit $framelimit, implicit $r30, implicit $r31 :: (store (s32) into stack) + $r0 = A2_tfrsi 32768 + $r1 = A2_tfrsi 32768 + $r2 = A2_tfrsi 1 + J2_call @f1, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit killed $r0, implicit killed $r1, implicit killed $r2, implicit-def $r29 + $d15 = L4_return $r30, implicit-def $pc, implicit-def $r29, implicit $framekey, implicit-def dead $pc +... From 52f347010a8c43118293feab05e09a4df52ba04f Mon Sep 17 00:00:00 2001 From: SANTANU DAS Date: Tue, 3 Nov 2020 00:45:40 +0530 Subject: [PATCH 591/992] [Hexagon] Make A2_tfrsi not cheap for operands exceeding 16 bits This patch aids to reduce code size since it removes generation of back-to-back A2_tfrsi instructions. It is enabled only at -Os/-Oz. --- llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 25 +++++++++++ llvm/lib/Target/Hexagon/HexagonInstrInfo.h | 1 + llvm/test/CodeGen/Hexagon/notcheap.ll | 45 ++++++++++++++++++++ 3 files changed, 71 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/notcheap.ll diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index fdf480722a2f..1aedff9a2cc3 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -146,6 +146,31 @@ static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB, return Count; } +// Check if the A2_tfrsi instruction is cheap or not. If the operand has +// to be constant-extendend it is not cheap since it occupies two slots +// in a packet. +bool HexagonInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { + // Enable the following steps only at Os/Oz + if (!(MI.getMF()->getFunction().hasOptSize())) + return MI.isAsCheapAsAMove(); + + if (MI.getOpcode() == Hexagon::A2_tfrsi) { + auto Op = MI.getOperand(1); + // If the instruction has a global address as operand, it is not cheap + // since the operand will be constant extended. + if (Op.getType() == MachineOperand::MO_GlobalAddress) + return false; + // If the instruction has an operand of size > 16bits, its will be + // const-extended and hence, it is not cheap. + if (Op.isImm()) { + int64_t Imm = Op.getImm(); + if (!isInt<16>(Imm)) + return false; + } + } + return MI.isAsCheapAsAMove(); +} + /// Find the hardware loop instruction used to set-up the specified loop. /// On Hexagon, we have two instructions used to set-up the hardware loop /// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index 830f04d9eac3..05cdf6c98643 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -335,6 +335,7 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { getSerializableBitmaskMachineOperandTargetFlags() const override; bool isTailCall(const MachineInstr &MI) const override; + bool isAsCheapAsAMove(const MachineInstr &MI) const override; /// HexagonInstrInfo specifics. diff --git a/llvm/test/CodeGen/Hexagon/notcheap.ll b/llvm/test/CodeGen/Hexagon/notcheap.ll new file mode 100644 index 000000000000..1731666dfdd6 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/notcheap.ll @@ -0,0 +1,45 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; Check if only one transfer immediate instruction is generated for init.end block. +; Since the transfer immediate of address operand is declared as not cheap, it +; should generate only one transfer immediate, rather than two of them. + +; CHECK-LABEL: .LBB{{.*}} +; CHECK: r0 = ##_ZZ3foovE1x +; CHECK-NOT: r{{[1-9]*}} = ##_ZZ3foovE1x +; CHECK: memw(r0+#0) += #1 +; CHECK: r{{.*}} = dealloc_return + +%struct.FooBaz = type { i32 } +@_ZZ3foovE1x = internal global %struct.FooBaz zeroinitializer, align 4 +@_ZGVZ3foovE1x = internal global i64 0, section ".bss._ZGVZ3foovE1x", align 8 +@__dso_handle = external dso_local global i8 + +define dso_local i32* @_Z3foov() local_unnamed_addr optsize { +entry: + %0 = load atomic i8, i8* bitcast (i64* @_ZGVZ3foovE1x to i8*) acquire, align 8 + %guard.uninitialized = icmp eq i8 %0, 0 + br i1 %guard.uninitialized, label %init.check, label %init.end + +init.check: ; preds = %entry + %1 = tail call i32 @__cxa_guard_acquire(i64* nonnull @_ZGVZ3foovE1x) + %tobool = icmp eq i32 %1, 0 + br i1 %tobool, label %init.end, label %init + +init: ; preds = %init.check + tail call void @_ZN6FooBazC1Ev(%struct.FooBaz* nonnull @_ZZ3foovE1x) + %2 = tail call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.FooBaz*)* @_ZN6FooBazD1Ev to void (i8*)*), i8* bitcast (%struct.FooBaz* @_ZZ3foovE1x to i8*), i8* nonnull @__dso_handle) + tail call void @__cxa_guard_release(i64* nonnull @_ZGVZ3foovE1x) + br label %init.end + +init.end: ; preds = %init, %init.check, %entry + %3 = load i32, i32* getelementptr inbounds (%struct.FooBaz, %struct.FooBaz* @_ZZ3foovE1x, i32 0, i32 0), align 4 + %inc = add nsw i32 %3, 1 + store i32 %inc, i32* getelementptr inbounds (%struct.FooBaz, %struct.FooBaz* @_ZZ3foovE1x, i32 0, i32 0), align 4 + ret i32* getelementptr inbounds (%struct.FooBaz, %struct.FooBaz* @_ZZ3foovE1x, i32 0, i32 0) +} + +declare dso_local i32 @__cxa_guard_acquire(i64*) local_unnamed_addr +declare dso_local void @_ZN6FooBazC1Ev(%struct.FooBaz*) unnamed_addr +declare dso_local void @_ZN6FooBazD1Ev(%struct.FooBaz*) unnamed_addr +declare dso_local i32 @__cxa_atexit(void (i8*)*, i8*, i8*) local_unnamed_addr +declare dso_local void @__cxa_guard_release(i64*) local_unnamed_addr From 822448635edc95cdf742e7b86630dc24c239032c Mon Sep 17 00:00:00 2001 From: Sumanth Gundapaneni Date: Tue, 31 Aug 2021 22:46:21 -0500 Subject: [PATCH 592/992] [Hexagon] Fix MachineSink not to hoist FP instructions that update USR. Ideally we should make USR as Def for these floating point instructions. However, it violates some assembler MCChecker rules. This patch fixes the issue by marking these FP instructions as non-sinkable. --- llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 17 + llvm/lib/Target/Hexagon/HexagonInstrInfo.h | 6 + .../Hexagon/machine-sink-float-usr.mir | 325 ++++++++++++++++++ 3 files changed, 348 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/machine-sink-float-usr.mir diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 1aedff9a2cc3..a38e43709132 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -171,6 +171,23 @@ bool HexagonInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { return MI.isAsCheapAsAMove(); } +// Do not sink floating point instructions that updates USR register. +// Example: +// feclearexcept +// F2_conv_w2sf +// fetestexcept +// MachineSink sinks F2_conv_w2sf and we are not able to catch exceptions. +// TODO: On some of these floating point instructions, USR is marked as Use. +// In reality, these instructions also Def the USR. If USR is marked as Def, +// some of the assumptions in assembler packetization are broken. +bool HexagonInstrInfo::shouldSink(const MachineInstr &MI) const { + // Assumption: A floating point instruction that reads the USR will write + // the USR as well. + if (isFloat(MI) && MI.hasRegisterImplicitUseOperand(Hexagon::USR)) + return false; + return true; +} + /// Find the hardware loop instruction used to set-up the specified loop. /// On Hexagon, we have two instructions used to set-up the hardware loop /// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index 05cdf6c98643..2af09c857d86 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -337,6 +337,12 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { bool isTailCall(const MachineInstr &MI) const override; bool isAsCheapAsAMove(const MachineInstr &MI) const override; + // Return true if the instruction should be sunk by MachineSink. + // MachineSink determines on its own whether the instruction is safe to sink; + // this gives the target a hook to override the default behavior with regards + // to which instructions should be sunk. + bool shouldSink(const MachineInstr &MI) const override; + /// HexagonInstrInfo specifics. unsigned createVR(MachineFunction *MF, MVT VT) const; diff --git a/llvm/test/CodeGen/Hexagon/machine-sink-float-usr.mir b/llvm/test/CodeGen/Hexagon/machine-sink-float-usr.mir new file mode 100644 index 000000000000..ba023bde9251 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/machine-sink-float-usr.mir @@ -0,0 +1,325 @@ +# RUN: llc -march=hexagon -run-pass machine-sink -o - %s | FileCheck %s + +# Test that MachineSink does not sink F2_conv_w2sf. +# CHECK: name:{{.*}} main +# CHECK: J2_call @feclearexcept +# CHECK: F2_conv_w2sf +# CHECK: J2_call @fetestexcept +--- | + target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" + target triple = "hexagon" + + @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + + ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn + define dso_local i32 @syst_int32_to_float32(i32 %a) local_unnamed_addr #0 { + entry: + %conv = sitofp i32 %a to float + %0 = bitcast float %conv to i32 + ret i32 %0 + } + + ; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn + declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + + ; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn + declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + + ; Function Attrs: nounwind + define dso_local i32 @main() local_unnamed_addr #2 { + entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + %a.0.a.0.a.0.a.0..sroa_cast = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %a.0.a.0.a.0.a.0..sroa_cast) + store volatile i32 -16777235, i32* %a, align 4, !tbaa !3 + %b.0.b.0.b.0.b.0..sroa_cast = bitcast i32* %b to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %b.0.b.0.b.0.b.0..sroa_cast) + store volatile i32 34, i32* %b, align 4, !tbaa !3 + %c.0.c.0.c.0.c.0..sroa_cast = bitcast i32* %c to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %c.0.c.0.c.0.c.0..sroa_cast) + store volatile i32 34, i32* %c, align 4, !tbaa !3 + %b.0.b.0.b.0.b.0.29 = load volatile i32, i32* %b, align 4, !tbaa !3 + %cmp30 = icmp sgt i32 %b.0.b.0.b.0.b.0.29, 0 + br i1 %cmp30, label %for.body, label %if.end + + for.cond.for.cond.cleanup_crit_edge: ; preds = %for.body + %conv.i.le = sitofp i32 %a.0.a.0.a.0.a.0. to float + %0 = bitcast float %conv.i.le to i32 + %phi.cmp = icmp ugt i32 %0, 100 + br i1 %phi.cmp, label %if.then, label %if.end + + for.body: ; preds = %entry, %for.body + %i.031 = phi i32 [ %inc4, %for.body ], [ 0, %entry ] + %c.0.c.0.c.0.c.0. = load volatile i32, i32* %c, align 4, !tbaa !3 + %inc = add nsw i32 %c.0.c.0.c.0.c.0., 1 + store volatile i32 %inc, i32* %c, align 4, !tbaa !3 + %call = tail call i32 @feclearexcept(i32 31) #5 + %a.0.a.0.a.0.a.0. = load volatile i32, i32* %a, align 4, !tbaa !3 + %call2 = tail call i32 @fetestexcept(i32 31) #5 + %call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %call2) #5 + %inc4 = add nuw nsw i32 %i.031, 1 + %b.0.b.0.b.0.b.0. = load volatile i32, i32* %b, align 4, !tbaa !3 + %cmp = icmp slt i32 %inc4, %b.0.b.0.b.0.b.0. + br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge, !llvm.loop !7 + + if.then: ; preds = %for.cond.for.cond.cleanup_crit_edge + %a.0.a.0.a.0.a.0.23 = load volatile i32, i32* %a, align 4, !tbaa !3 + %b.0.b.0.b.0.b.0.20 = load volatile i32, i32* %b, align 4, !tbaa !3 + %add = add nsw i32 %b.0.b.0.b.0.b.0.20, %a.0.a.0.a.0.a.0.23 + %c.0.c.0.c.0.c.0.17 = load volatile i32, i32* %c, align 4, !tbaa !3 + %add7 = add nsw i32 %add, %c.0.c.0.c.0.c.0.17 + br label %cleanup + + if.end: ; preds = %entry, %for.cond.for.cond.cleanup_crit_edge + %a.0.a.0.a.0.a.0.24 = load volatile i32, i32* %a, align 4, !tbaa !3 + %b.0.b.0.b.0.b.0.21 = load volatile i32, i32* %b, align 4, !tbaa !3 + %mul.neg = mul i32 %b.0.b.0.b.0.b.0.21, -6 + %sub = add i32 %mul.neg, %a.0.a.0.a.0.a.0.24 + %c.0.c.0.c.0.c.0.18 = load volatile i32, i32* %c, align 4, !tbaa !3 + %mul8 = mul nsw i32 %c.0.c.0.c.0.c.0.18, 3 + %add9 = add nsw i32 %sub, %mul8 + br label %cleanup + + cleanup: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ %add7, %if.then ], [ %add9, %if.end ] + %1 = bitcast i32* %c to i8* + %2 = bitcast i32* %b to i8* + %3 = bitcast i32* %a to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %3) + ret i32 %retval.0 + } + + declare dso_local i32 @feclearexcept(i32) local_unnamed_addr #3 + + declare dso_local i32 @fetestexcept(i32) local_unnamed_addr #3 + + ; Function Attrs: nofree nounwind + declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) local_unnamed_addr #4 + + attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv68" "target-features"="+v68,-long-calls" } + attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn } + attributes #2 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv68" "target-features"="+v68,-long-calls" } + attributes #3 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv68" "target-features"="+v68,-long-calls" } + attributes #4 = { nofree nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv68" "target-features"="+v68,-long-calls" } + attributes #5 = { nounwind } + + !llvm.module.flags = !{!0, !1} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 7, !"frame-pointer", i32 2} + !3 = !{!4, !4, i64 0} + !4 = !{!"int", !5, i64 0} + !5 = !{!"omnipotent char", !6, i64 0} + !6 = !{!"Simple C/C++ TBAA"} + !7 = distinct !{!7, !8} + !8 = !{!"llvm.loop.mustprogress"} + +... +--- +name: syst_int32_to_float32 +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: intregs, preferred-register: '' } + - { id: 1, class: intregs, preferred-register: '' } +liveins: + - { reg: '$r0', virtual-reg: '%0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $r0 + + %0:intregs = COPY $r0 + %1:intregs = F2_conv_w2sf %0, implicit $usr + $r0 = COPY %1 + PS_jmpret $r31, implicit-def dead $pc, implicit $r0 + +... +--- +name: main +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: intregs, preferred-register: '' } + - { id: 1, class: intregs, preferred-register: '' } + - { id: 2, class: intregs, preferred-register: '' } + - { id: 3, class: intregs, preferred-register: '' } + - { id: 4, class: intregs, preferred-register: '' } + - { id: 5, class: intregs, preferred-register: '' } + - { id: 6, class: intregs, preferred-register: '' } + - { id: 7, class: intregs, preferred-register: '' } + - { id: 8, class: predregs, preferred-register: '' } + - { id: 9, class: intregs, preferred-register: '' } + - { id: 10, class: intregs, preferred-register: '' } + - { id: 11, class: intregs, preferred-register: '' } + - { id: 12, class: intregs, preferred-register: '' } + - { id: 13, class: intregs, preferred-register: '' } + - { id: 14, class: intregs, preferred-register: '' } + - { id: 15, class: intregs, preferred-register: '' } + - { id: 16, class: predregs, preferred-register: '' } + - { id: 17, class: intregs, preferred-register: '' } + - { id: 18, class: predregs, preferred-register: '' } + - { id: 19, class: intregs, preferred-register: '' } + - { id: 20, class: intregs, preferred-register: '' } + - { id: 21, class: intregs, preferred-register: '' } + - { id: 22, class: intregs, preferred-register: '' } + - { id: 23, class: intregs, preferred-register: '' } + - { id: 24, class: intregs, preferred-register: '' } + - { id: 25, class: intregs, preferred-register: '' } + - { id: 26, class: intregs, preferred-register: '' } + - { id: 27, class: intregs, preferred-register: '' } +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: true + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: a, type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: b, type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: c, type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.6(0x50000000), %bb.4(0x30000000) + + S4_storeiri_io %stack.0.a, 0, -16777235 :: (volatile store (s32) into %ir.a, !tbaa !3) + S4_storeiri_io %stack.1.b, 0, 34 :: (volatile store (s32) into %ir.b, !tbaa !3) + S4_storeiri_io %stack.2.c, 0, 34 :: (volatile store (s32) into %ir.c, !tbaa !3) + %7:intregs = L2_loadri_io %stack.1.b, 0 :: (volatile dereferenceable load (s32) from %ir.b, !tbaa !3) + %8:predregs = C2_cmpgti %7, 0 + %6:intregs = A2_tfrsi 0 + J2_jumpf %8, %bb.4, implicit-def $pc + + bb.6: + successors: %bb.2(0x80000000) + + %9:intregs = A2_tfrsi 31 + %13:intregs = A2_tfrsi @.str + J2_jump %bb.2, implicit-def $pc + + bb.1.for.cond.for.cond.cleanup_crit_edge: + successors: %bb.4(0x40000000) + + J2_jump %bb.4, implicit-def dead $pc + + bb.2.for.body: + successors: %bb.2(0x7c000000), %bb.1(0x04000000) + + %0:intregs = PHI %6, %bb.6, %2, %bb.2 + L4_iadd_memopw_io %stack.2.c, 0, 1 :: (volatile store (s32) into %ir.c, !tbaa !3), (volatile dereferenceable load (s32) from %ir.c, !tbaa !3) + ADJCALLSTACKDOWN 0, 0, implicit-def $r29, implicit-def dead $r30, implicit $r31, implicit $r30, implicit $r29 + $r0 = COPY %9 + J2_call @feclearexcept, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit-def $r29, implicit-def $r0 + ADJCALLSTACKUP 0, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit $r29 + %1:intregs = L2_loadri_io %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a, !tbaa !3) + ADJCALLSTACKDOWN 0, 0, implicit-def $r29, implicit-def dead $r30, implicit $r31, implicit $r30, implicit $r29 + %17:intregs = F2_conv_w2sf %1, implicit $usr + $r0 = COPY %9 + J2_call @fetestexcept, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit-def $r29, implicit-def $r0 + ADJCALLSTACKUP 0, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit $r29 + %11:intregs = COPY $r0 + %12:intregs = COPY $r29 + S2_storeri_io %12, 0, %11 :: (store (s32) into stack) + ADJCALLSTACKDOWN 4, 0, implicit-def $r29, implicit-def dead $r30, implicit $r31, implicit $r30, implicit $r29 + $r0 = COPY %13 + J2_call @printf, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit-def $r29, implicit-def $r0 + ADJCALLSTACKUP 4, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit $r29 + %2:intregs = nuw nsw A2_addi %0, 1 + %15:intregs = L2_loadri_io %stack.1.b, 0 :: (volatile dereferenceable load (s32) from %ir.b, !tbaa !3) + %16:predregs = C2_cmpgt %15, %2 + J2_jumpt %16, %bb.2, implicit-def dead $pc + J2_jump %bb.1, implicit-def dead $pc + + bb.3.if.then: + successors: %bb.5(0x80000000) + + %18:predregs = C2_cmpgtui %17, 100 + %24:intregs = L2_loadri_io %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a, !tbaa !3) + %25:intregs = L2_loadri_io %stack.1.b, 0 :: (volatile dereferenceable load (s32) from %ir.b, !tbaa !3) + %26:intregs = L2_loadri_io %stack.2.c, 0 :: (volatile dereferenceable load (s32) from %ir.c, !tbaa !3) + %3:intregs = nsw M2_acci %26, %25, %24 + J2_jumpf %18, %bb.5, implicit-def dead $pc + J2_jump %bb.5, implicit-def dead $pc + + bb.4.if.end: + successors: %bb.5(0x80000000) + + %19:intregs = L2_loadri_io %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a, !tbaa !3) + %20:intregs = L2_loadri_io %stack.1.b, 0 :: (volatile dereferenceable load (s32) from %ir.b, !tbaa !3) + %27:intregs = M2_macsin %19, %20, 6 + %23:intregs = L2_loadri_io %stack.2.c, 0 :: (volatile dereferenceable load (s32) from %ir.c, !tbaa !3) + %4:intregs = nsw M2_macsip %27, %23, 3 + + bb.5.cleanup: + %5:intregs = PHI %4, %bb.4, %3, %bb.3 + $r0 = COPY %5 + PS_jmpret $r31, implicit-def dead $pc, implicit $r0 + +... From c99b2c63169d5aa6499143078790cb3eb87dee45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henry=20Linjam=C3=A4ki?= Date: Tue, 4 Jan 2022 16:00:36 -0800 Subject: [PATCH 593/992] CUDA/HIP: Allow __int128 on the host side Consider case where `__int128` type is supported by the host target but not by a device target (e.g. spirv*). Clang emits an error message for unsupported type even if the device code does not use it. This patch fixes this issue by emitting the error message when the device code attempts to use the unsupported type. Reviewed By: tra Differential Revision: https://reviews.llvm.org/D111047 --- clang/lib/Sema/Sema.cpp | 3 ++- clang/lib/Sema/SemaType.cpp | 4 ++-- clang/test/SemaCUDA/allow-int128.cu | 16 ++++++++++++++++ clang/test/SemaCUDA/spirv-int128.cu | 16 ++++++++++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) create mode 100644 clang/test/SemaCUDA/allow-int128.cu create mode 100644 clang/test/SemaCUDA/spirv-int128.cu diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index ba69400fdbbf..60f37c17c3f1 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1941,7 +1941,8 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { }; auto CheckType = [&](QualType Ty, bool IsRetTy = false) { - if (LangOpts.SYCLIsDevice || (LangOpts.OpenMP && LangOpts.OpenMPIsDevice)) + if (LangOpts.SYCLIsDevice || (LangOpts.OpenMP && LangOpts.OpenMPIsDevice) || + LangOpts.CUDAIsDevice) CheckDeviceType(Ty); QualType UnqualTy = Ty.getCanonicalType().getUnqualifiedType(); diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 0b3154e6bcb6..57825fe3d79b 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -1495,8 +1495,8 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) { } case DeclSpec::TST_int128: if (!S.Context.getTargetInfo().hasInt128Type() && - !S.getLangOpts().SYCLIsDevice && - !(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice)) + !(S.getLangOpts().SYCLIsDevice || S.getLangOpts().CUDAIsDevice || + (S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice))) S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported) << "__int128"; if (DS.getTypeSpecSign() == TypeSpecifierSign::Unsigned) diff --git a/clang/test/SemaCUDA/allow-int128.cu b/clang/test/SemaCUDA/allow-int128.cu new file mode 100644 index 000000000000..eb7b7e7f5286 --- /dev/null +++ b/clang/test/SemaCUDA/allow-int128.cu @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ +// RUN: -aux-triple x86_64-unknown-linux-gnu \ +// RUN: -fcuda-is-device -verify -fsyntax-only %s +// RUN: %clang_cc1 -triple nvptx \ +// RUN: -aux-triple x86_64-unknown-linux-gnu \ +// RUN: -fcuda-is-device -verify -fsyntax-only %s + +// expected-no-diagnostics +#define __device__ __attribute__((device)) + +__int128 h_glb; +__device__ __int128 d_unused; +__device__ __int128 d_glb; +__device__ __int128 bar() { + return d_glb; +} diff --git a/clang/test/SemaCUDA/spirv-int128.cu b/clang/test/SemaCUDA/spirv-int128.cu new file mode 100644 index 000000000000..b2ff5ae5f692 --- /dev/null +++ b/clang/test/SemaCUDA/spirv-int128.cu @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -triple spirv64 -aux-triple x86_64-unknown-linux-gnu \ +// RUN: -fcuda-is-device -verify -fsyntax-only %s + +#define __device__ __attribute__((device)) + +__int128 h_glb; + +__device__ __int128 d_unused; + +// expected-note@+1 {{'d_glb' defined here}} +__device__ __int128 d_glb; + +__device__ __int128 bar() { + // expected-error@+1 {{'d_glb' requires 128 bit size '__int128' type support, but target 'spirv64' does not support it}} + return d_glb; +} From 85b8d03e12bbf33daeb38811be8b749b4131dc89 Mon Sep 17 00:00:00 2001 From: wren romano <2998727+wrengr@users.noreply.github.com> Date: Tue, 4 Jan 2022 15:06:28 -0800 Subject: [PATCH 594/992] [mlir][sparse] Factoring out Transforms/CodegenUtils.{cpp,h} This moves a bunch of helper functions from `Transforms/SparseTensorConversion.cpp` into `Transforms/CodegenUtils.{cpp,h}` so that they can be reused by `Transforms/Sparsification.cpp`, etc. See also the dependent D115010 which cleans up some corner cases in this change. Reviewed By: aartbik, rriddle Differential Revision: https://reviews.llvm.org/D115008 --- .../SparseTensor/Transforms/CMakeLists.txt | 1 + .../SparseTensor/Transforms/CodegenUtils.cpp | 125 +++++++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 166 ++++++++++++++++++ .../Transforms/SparseTensorConversion.cpp | 124 +------------ .../Transforms/Sparsification.cpp | 96 ++++------ .../llvm-project-overlay/mlir/BUILD.bazel | 11 +- 6 files changed, 335 insertions(+), 188 deletions(-) create mode 100644 mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp create mode 100644 mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt index 6b72cb1b3fce..5c4ce30042d6 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt @@ -1,4 +1,5 @@ add_mlir_dialect_library(MLIRSparseTensorTransforms + CodegenUtils.cpp Sparsification.cpp SparseTensorConversion.cpp SparseTensorPasses.cpp diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp new file mode 100644 index 000000000000..602e1f748443 --- /dev/null +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -0,0 +1,125 @@ +//===- CodegenUtils.cpp - Utilities for generating MLIR -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CodegenUtils.h" + +#include "mlir/IR/Types.h" +#include "mlir/IR/Value.h" + +using namespace mlir; +using namespace mlir::sparse_tensor; + +//===----------------------------------------------------------------------===// +// ExecutionEngine/SparseTensorUtils helper functions. +//===----------------------------------------------------------------------===// + +OverheadType mlir::sparse_tensor::overheadTypeEncoding(unsigned width) { + switch (width) { + default: + return OverheadType::kU64; + case 32: + return OverheadType::kU32; + case 16: + return OverheadType::kU16; + case 8: + return OverheadType::kU8; + } +} + +Type mlir::sparse_tensor::getOverheadType(Builder &builder, OverheadType ot) { + switch (ot) { + case OverheadType::kU64: + return builder.getIntegerType(64); + case OverheadType::kU32: + return builder.getIntegerType(32); + case OverheadType::kU16: + return builder.getIntegerType(16); + case OverheadType::kU8: + return builder.getIntegerType(8); + } + llvm_unreachable("Unknown OverheadType"); +} + +Type mlir::sparse_tensor::getPointerOverheadType( + Builder &builder, const SparseTensorEncodingAttr &enc) { + // NOTE(wrengr): This workaround will be fixed in D115010. + unsigned width = enc.getPointerBitWidth(); + if (width == 0) + return builder.getIndexType(); + return getOverheadType(builder, overheadTypeEncoding(width)); +} + +Type mlir::sparse_tensor::getIndexOverheadType( + Builder &builder, const SparseTensorEncodingAttr &enc) { + // NOTE(wrengr): This workaround will be fixed in D115010. + unsigned width = enc.getIndexBitWidth(); + if (width == 0) + return builder.getIndexType(); + return getOverheadType(builder, overheadTypeEncoding(width)); +} + +PrimaryType mlir::sparse_tensor::primaryTypeEncoding(Type elemTp) { + if (elemTp.isF64()) + return PrimaryType::kF64; + if (elemTp.isF32()) + return PrimaryType::kF32; + if (elemTp.isInteger(64)) + return PrimaryType::kI64; + if (elemTp.isInteger(32)) + return PrimaryType::kI32; + if (elemTp.isInteger(16)) + return PrimaryType::kI16; + if (elemTp.isInteger(8)) + return PrimaryType::kI8; + llvm_unreachable("Unknown primary type"); +} + +DimLevelType mlir::sparse_tensor::dimLevelTypeEncoding( + SparseTensorEncodingAttr::DimLevelType dlt) { + switch (dlt) { + case SparseTensorEncodingAttr::DimLevelType::Dense: + return DimLevelType::kDense; + case SparseTensorEncodingAttr::DimLevelType::Compressed: + return DimLevelType::kCompressed; + case SparseTensorEncodingAttr::DimLevelType::Singleton: + return DimLevelType::kSingleton; + } + llvm_unreachable("Unknown SparseTensorEncodingAttr::DimLevelType"); +} + +//===----------------------------------------------------------------------===// +// Misc code generators. +//===----------------------------------------------------------------------===// + +mlir::Attribute mlir::sparse_tensor::getOneAttr(Builder &builder, Type tp) { + if (tp.isa()) + return builder.getFloatAttr(tp, 1.0); + if (tp.isa()) + return builder.getIndexAttr(1); + if (auto intTp = tp.dyn_cast()) + return builder.getIntegerAttr(tp, APInt(intTp.getWidth(), 1)); + if (tp.isa()) { + auto shapedTp = tp.cast(); + if (auto one = getOneAttr(builder, shapedTp.getElementType())) + return DenseElementsAttr::get(shapedTp, one); + } + llvm_unreachable("Unsupported attribute type"); +} + +Value mlir::sparse_tensor::genIsNonzero(OpBuilder &builder, mlir::Location loc, + Value v) { + Type tp = v.getType(); + Value zero = constantZero(builder, loc, tp); + if (tp.isa()) + return builder.create(loc, arith::CmpFPredicate::UNE, v, + zero); + if (tp.isIntOrIndex()) + return builder.create(loc, arith::CmpIPredicate::ne, v, + zero); + llvm_unreachable("Non-numeric type"); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h new file mode 100644 index 000000000000..fd539fe997cf --- /dev/null +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -0,0 +1,166 @@ +//===- CodegenUtils.h - Utilities for generating MLIR -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header file defines utilities for generating MLIR. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_CODEGENUTILS_H_ +#define MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_CODEGENUTILS_H_ + +#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "mlir/ExecutionEngine/SparseTensorUtils.h" +#include "mlir/IR/Builders.h" + +namespace mlir { +class Location; +class Type; +class Value; + +namespace sparse_tensor { + +//===----------------------------------------------------------------------===// +// ExecutionEngine/SparseTensorUtils helper functions. +//===----------------------------------------------------------------------===// + +/// Converts an overhead storage bitwidth to its internal type-encoding. +OverheadType overheadTypeEncoding(unsigned width); + +/// Converts the internal type-encoding for overhead storage to an mlir::Type. +Type getOverheadType(Builder &builder, OverheadType ot); + +/// Returns the mlir::Type for pointer overhead storage. +Type getPointerOverheadType(Builder &builder, + const SparseTensorEncodingAttr &enc); + +/// Returns the mlir::Type for index overhead storage. +Type getIndexOverheadType(Builder &builder, + const SparseTensorEncodingAttr &enc); + +/// Converts a primary storage type to its internal type-encoding. +PrimaryType primaryTypeEncoding(Type elemTp); + +/// Converts the IR's dimension level type to its internal type-encoding. +DimLevelType dimLevelTypeEncoding(SparseTensorEncodingAttr::DimLevelType dlt); + +//===----------------------------------------------------------------------===// +// Misc code generators. +// +// TODO: both of these should move upstream to their respective classes. +// Once RFCs have been created for those changes, list them here. +//===----------------------------------------------------------------------===// + +/// Generates a 1-valued attribute of the given type. This supports +/// all the same types as `getZeroAttr`; however, unlike `getZeroAttr`, +/// for unsupported types we raise `llvm_unreachable` rather than +/// returning a null attribute. +Attribute getOneAttr(Builder &builder, Type tp); + +/// Generates the comparison `v != 0` where `v` is of numeric type. +/// For floating types, we use the "unordered" comparator (i.e., returns +/// true if `v` is NaN). +Value genIsNonzero(OpBuilder &builder, Location loc, Value v); + +//===----------------------------------------------------------------------===// +// Constant generators. +// +// All these functions are just wrappers to improve code legibility; +// therefore, we mark them as `inline` to avoid introducing any additional +// overhead due to the legibility. +// +// TODO: Ideally these should move upstream, so that we don't +// develop a design island. However, doing so will involve +// substantial design work. For related prior discussion, see +// +//===----------------------------------------------------------------------===// + +/// Generates a 0-valued constant of the given type. In addition to +/// the scalar types (`FloatType`, `IndexType`, `IntegerType`), this also +/// works for `RankedTensorType` and `VectorType` (for which it generates +/// a constant `DenseElementsAttr` of zeros). +inline Value constantZero(OpBuilder &builder, Location loc, Type tp) { + return builder.create(loc, tp, builder.getZeroAttr(tp)); +} + +/// Generates a 1-valued constant of the given type. This supports all +/// the same types as `constantZero`. +inline Value constantOne(OpBuilder &builder, Location loc, Type tp) { + return builder.create(loc, tp, getOneAttr(builder, tp)); +} + +/// Generates a constant of `index` type. +inline Value constantIndex(OpBuilder &builder, Location loc, int64_t i) { + return builder.create(loc, i); +} + +/// Generates a constant of `i32` type. +inline Value constantI32(OpBuilder &builder, Location loc, int32_t i) { + return builder.create(loc, i, 32); +} + +/// Generates a constant of `i16` type. +inline Value constantI16(OpBuilder &builder, Location loc, int16_t i) { + return builder.create(loc, i, 16); +} + +/// Generates a constant of `i8` type. +inline Value constantI8(OpBuilder &builder, Location loc, int8_t i) { + return builder.create(loc, i, 8); +} + +/// Generates a constant of `i1` type. +inline Value constantI1(OpBuilder &builder, Location loc, bool b) { + return builder.create(loc, b, 1); +} + +/// Generates a constant of the given `Action`. +inline Value constantAction(OpBuilder &builder, Location loc, Action action) { + return constantI32(builder, loc, static_cast(action)); +} + +/// Generates a constant of the internal type-encoding for overhead storage. +inline Value constantOverheadTypeEncoding(OpBuilder &builder, Location loc, + unsigned width) { + return constantI32(builder, loc, + static_cast(overheadTypeEncoding(width))); +} + +/// Generates a constant of the internal type-encoding for pointer +/// overhead storage. +inline Value constantPointerTypeEncoding(OpBuilder &builder, Location loc, + const SparseTensorEncodingAttr &enc) { + return constantOverheadTypeEncoding(builder, loc, enc.getPointerBitWidth()); +} + +/// Generates a constant of the internal type-encoding for index overhead +/// storage. +inline Value constantIndexTypeEncoding(OpBuilder &builder, Location loc, + const SparseTensorEncodingAttr &enc) { + return constantOverheadTypeEncoding(builder, loc, enc.getIndexBitWidth()); +} + +/// Generates a constant of the internal type-encoding for primary storage. +inline Value constantPrimaryTypeEncoding(OpBuilder &builder, Location loc, + Type elemTp) { + return constantI32(builder, loc, + static_cast(primaryTypeEncoding(elemTp))); +} + +/// Generates a constant of the internal dimension level type encoding. +inline Value +constantDimLevelTypeEncoding(OpBuilder &builder, Location loc, + SparseTensorEncodingAttr::DimLevelType dlt) { + return constantI8(builder, loc, + static_cast(dimLevelTypeEncoding(dlt))); +} + +} // namespace sparse_tensor +} // namespace mlir + +#endif // MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_CODEGENUTILS_H_ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index 047142b06320..3c6817274b83 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -14,6 +14,7 @@ // //===----------------------------------------------------------------------===// +#include "CodegenUtils.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" @@ -39,113 +40,6 @@ enum class EmitCInterface : bool { Off = false, On = true }; // Helper methods. //===----------------------------------------------------------------------===// -/// Generates a constant zero of the given type. -inline static Value constantZero(ConversionPatternRewriter &rewriter, - Location loc, Type t) { - return rewriter.create(loc, t, rewriter.getZeroAttr(t)); -} - -/// Generates a constant of `index` type. -inline static Value constantIndex(ConversionPatternRewriter &rewriter, - Location loc, int64_t i) { - return rewriter.create(loc, i); -} - -/// Generates a constant of `i32` type. -inline static Value constantI32(ConversionPatternRewriter &rewriter, - Location loc, int32_t i) { - return rewriter.create(loc, i, 32); -} - -/// Generates a constant of `i8` type. -inline static Value constantI8(ConversionPatternRewriter &rewriter, - Location loc, int8_t i) { - return rewriter.create(loc, i, 8); -} - -/// Generates a constant of the given `Action`. -static Value constantAction(ConversionPatternRewriter &rewriter, Location loc, - Action action) { - return constantI32(rewriter, loc, static_cast(action)); -} - -/// Generates a constant of the internal type encoding for overhead storage. -static Value constantOverheadTypeEncoding(ConversionPatternRewriter &rewriter, - Location loc, unsigned width) { - OverheadType sec; - switch (width) { - default: - sec = OverheadType::kU64; - break; - case 32: - sec = OverheadType::kU32; - break; - case 16: - sec = OverheadType::kU16; - break; - case 8: - sec = OverheadType::kU8; - break; - } - return constantI32(rewriter, loc, static_cast(sec)); -} - -/// Generates a constant of the internal type encoding for pointer -/// overhead storage. -static Value constantPointerTypeEncoding(ConversionPatternRewriter &rewriter, - Location loc, - SparseTensorEncodingAttr &enc) { - return constantOverheadTypeEncoding(rewriter, loc, enc.getPointerBitWidth()); -} - -/// Generates a constant of the internal type encoding for index overhead -/// storage. -static Value constantIndexTypeEncoding(ConversionPatternRewriter &rewriter, - Location loc, - SparseTensorEncodingAttr &enc) { - return constantOverheadTypeEncoding(rewriter, loc, enc.getIndexBitWidth()); -} - -/// Generates a constant of the internal type encoding for primary storage. -static Value constantPrimaryTypeEncoding(ConversionPatternRewriter &rewriter, - Location loc, Type tp) { - PrimaryType primary; - if (tp.isF64()) - primary = PrimaryType::kF64; - else if (tp.isF32()) - primary = PrimaryType::kF32; - else if (tp.isInteger(64)) - primary = PrimaryType::kI64; - else if (tp.isInteger(32)) - primary = PrimaryType::kI32; - else if (tp.isInteger(16)) - primary = PrimaryType::kI16; - else if (tp.isInteger(8)) - primary = PrimaryType::kI8; - else - llvm_unreachable("Unknown element type"); - return constantI32(rewriter, loc, static_cast(primary)); -} - -/// Generates a constant of the internal dimension level type encoding. -static Value -constantDimLevelTypeEncoding(ConversionPatternRewriter &rewriter, Location loc, - SparseTensorEncodingAttr::DimLevelType dlt) { - DimLevelType dlt2; - switch (dlt) { - case SparseTensorEncodingAttr::DimLevelType::Dense: - dlt2 = DimLevelType::kDense; - break; - case SparseTensorEncodingAttr::DimLevelType::Compressed: - dlt2 = DimLevelType::kCompressed; - break; - case SparseTensorEncodingAttr::DimLevelType::Singleton: - dlt2 = DimLevelType::kSingleton; - break; - } - return constantI8(rewriter, loc, static_cast(dlt2)); -} - /// Returns the equivalent of `void*` for opaque arguments to the /// execution engine. static Type getOpaquePointerType(PatternRewriter &rewriter) { @@ -336,22 +230,6 @@ static void newParams(ConversionPatternRewriter &rewriter, params.push_back(ptr); } -/// Generates the comparison `v != 0` where `v` is of numeric type `t`. -/// For floating types, we use the "unordered" comparator (i.e., returns -/// true if `v` is NaN). -static Value genIsNonzero(ConversionPatternRewriter &rewriter, Location loc, - Value v) { - Type t = v.getType(); - Value zero = constantZero(rewriter, loc, t); - if (t.isa()) - return rewriter.create(loc, arith::CmpFPredicate::UNE, v, - zero); - if (t.isIntOrIndex()) - return rewriter.create(loc, arith::CmpIPredicate::ne, v, - zero); - llvm_unreachable("Unknown element type"); -} - /// Generates the code to read the value from tensor[ivs], and conditionally /// stores the indices ivs to the memory in ind. The generated code looks like /// the following and the insertion point after this routine is inside the diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index 1bdad563f4c8..ca542a1c8f85 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "CodegenUtils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" @@ -406,26 +407,16 @@ static Value genVectorReducInit(CodeGen &codegen, PatternRewriter &rewriter, case kNoReduc: break; case kSum: - case kXor: { + case kXor: // Initialize reduction vector to: | 0 | .. | 0 | r | - Attribute zero = rewriter.getZeroAttr(vtp); - Value vec = rewriter.create(loc, vtp, zero); return rewriter.create( - loc, r, vec, rewriter.create(loc, 0)); - } - case kProduct: { + loc, r, constantZero(rewriter, loc, vtp), + constantIndex(rewriter, loc, 0)); + case kProduct: // Initialize reduction vector to: | 1 | .. | 1 | r | - Type etp = vtp.getElementType(); - Attribute one; - if (etp.isa()) - one = rewriter.getFloatAttr(etp, 1.0); - else - one = rewriter.getIntegerAttr(etp, 1); - Value vec = rewriter.create( - loc, vtp, DenseElementsAttr::get(vtp, one)); return rewriter.create( - loc, r, vec, rewriter.create(loc, 0)); - } + loc, r, constantOne(rewriter, loc, vtp), + constantIndex(rewriter, loc, 0)); case kAnd: case kOr: // Initialize reduction vector to: | r | .. | r | r | @@ -453,13 +444,6 @@ static void updateReduc(Merger &merger, CodeGen &codegen, Value reduc) { // Sparse compiler synthesis methods (statements and expressions). //===----------------------------------------------------------------------===// -/// Maps sparse integer option to actual integral storage type. -static Type genIntType(PatternRewriter &rewriter, unsigned width) { - if (width == 0) - return rewriter.getIndexType(); - return rewriter.getIntegerType(width); -} - /// Generates buffer for the output tensor. Note that all sparse kernels /// assume that when all elements are written to (viz. x(i) = y(i) * z(i)), /// the output buffer is already initialized to all zeroes and only nonzeroes @@ -484,10 +468,8 @@ static Value genOutputBuffer(CodeGen &codegen, PatternRewriter &rewriter, // materializes into the computation, we need to preserve the zero // initialization assumption of all sparse output buffers. if (isMaterializing(tensor)) { - Type tp = denseTp.getElementType(); Value alloc = rewriter.create(loc, denseTp, args); - Value zero = - rewriter.create(loc, tp, rewriter.getZeroAttr(tp)); + Value zero = constantZero(rewriter, loc, denseTp.getElementType()); rewriter.create(loc, zero, alloc); return alloc; } @@ -522,11 +504,11 @@ static void genBuffers(Merger &merger, CodeGen &codegen, // Handle sparse storage schemes. if (merger.isDim(tensor, idx, Dim::kSparse)) { auto dynShape = {ShapedType::kDynamicSize}; - auto ptrTp = MemRefType::get( - dynShape, genIntType(rewriter, enc.getPointerBitWidth())); - auto indTp = MemRefType::get( - dynShape, genIntType(rewriter, enc.getIndexBitWidth())); - Value dim = rewriter.create(loc, d); + auto ptrTp = + MemRefType::get(dynShape, getPointerOverheadType(rewriter, enc)); + auto indTp = + MemRefType::get(dynShape, getIndexOverheadType(rewriter, enc)); + Value dim = constantIndex(rewriter, loc, d); // Generate sparse primitives to obtains pointer and indices. codegen.pointers[tensor][idx] = rewriter.create(loc, ptrTp, t->get(), dim); @@ -557,7 +539,7 @@ static void genBuffers(Merger &merger, CodeGen &codegen, genOutputBuffer(codegen, rewriter, op, denseTp, args); } else if (t == codegen.sparseOut) { // True sparse output needs a lexIdx array. - Value rank = rewriter.create(loc, op.getRank(t)); + Value rank = constantIndex(rewriter, loc, op.getRank(t)); auto dynShape = {ShapedType::kDynamicSize}; auto memTp = MemRefType::get(dynShape, rewriter.getIndexType()); codegen.lexIdx = rewriter.create(loc, memTp, rank); @@ -585,7 +567,7 @@ static VectorType vectorType(CodeGen &codegen, Value ptr) { static Value genVectorMask(CodeGen &codegen, PatternRewriter &rewriter, Value iv, Value lo, Value hi, Value step) { Location loc = iv.getLoc(); - VectorType mtp = vectorType(codegen, genIntType(rewriter, 1)); + VectorType mtp = vectorType(codegen, rewriter.getI1Type()); // Special case if the vector length evenly divides the trip count (for // example, "for i = 0, 128, 16"). A constant all-true mask is generated // so that all subsequent masked memory operations are immediately folded @@ -596,7 +578,7 @@ static Value genVectorMask(CodeGen &codegen, PatternRewriter &rewriter, matchPattern(step, m_Constant(&stepInt))) { if (((hiInt.getInt() - loInt.getInt()) % stepInt.getInt()) == 0) return rewriter.create( - loc, mtp, rewriter.create(loc, 1, 1)); + loc, mtp, constantI1(rewriter, loc, true)); } // Otherwise, generate a vector mask that avoids overrunning the upperbound // during vector execution. Here we rely on subsequent loop optimizations to @@ -617,12 +599,11 @@ static Value genVectorLoad(CodeGen &codegen, PatternRewriter &rewriter, Value ptr, ArrayRef args) { Location loc = ptr.getLoc(); VectorType vtp = vectorType(codegen, ptr); - Value pass = - rewriter.create(loc, vtp, rewriter.getZeroAttr(vtp)); + Value pass = constantZero(rewriter, loc, vtp); if (args.back().getType().isa()) { SmallVector scalarArgs(args.begin(), args.end()); Value indexVec = args.back(); - scalarArgs.back() = rewriter.create(loc, 0); + scalarArgs.back() = constantIndex(rewriter, loc, 0); return rewriter.create( loc, vtp, ptr, scalarArgs, indexVec, codegen.curVecMask, pass); } @@ -637,7 +618,7 @@ static void genVectorStore(CodeGen &codegen, PatternRewriter &rewriter, if (args.back().getType().isa()) { SmallVector scalarArgs(args.begin(), args.end()); Value indexVec = args.back(); - scalarArgs.back() = rewriter.create(loc, 0); + scalarArgs.back() = constantIndex(rewriter, loc, 0); rewriter.create(loc, ptr, scalarArgs, indexVec, codegen.curVecMask, rhs); return; @@ -679,7 +660,7 @@ static Value genAffine(CodeGen &codegen, PatternRewriter &rewriter, } case AffineExprKind::Constant: { int64_t c = a.cast().getValue(); - return rewriter.create(loc, c); + return constantIndex(rewriter, loc, c); } default: llvm_unreachable("unexpected affine subscript"); @@ -728,8 +709,7 @@ static Value genInsertionLoad(CodeGen &codegen, PatternRewriter &rewriter, // Direct lexicographic index order, tensor loads as zero. if (!codegen.expValues) { Type tp = getElementTypeOrSelf(t->get().getType()); - return rewriter.create(loc, tp, - rewriter.getZeroAttr(tp)); + return constantZero(rewriter, loc, tp); } // Load from expanded access pattern. Value index = genIndex(codegen, op, t); @@ -752,8 +732,8 @@ static void genInsertionStore(CodeGen &codegen, PatternRewriter &rewriter, // endif // values[i] = rhs Value index = genIndex(codegen, op, t); - Value fval = rewriter.create(loc, 0, 1); // false - Value tval = rewriter.create(loc, 1, 1); // true + Value fval = constantI1(rewriter, loc, false); + Value tval = constantI1(rewriter, loc, true); // If statement. Value filled = rewriter.create(loc, codegen.expFilled, index); Value cond = rewriter.create(loc, arith::CmpIPredicate::eq, @@ -765,7 +745,7 @@ static void genInsertionStore(CodeGen &codegen, PatternRewriter &rewriter, rewriter.create(loc, tval, codegen.expFilled, index); rewriter.create(loc, index, codegen.expAdded, codegen.expCount); - Value one = rewriter.create(loc, 1); + Value one = constantIndex(rewriter, loc, 1); Value add = rewriter.create(loc, codegen.expCount, one); rewriter.create(loc, add); // False branch. @@ -852,11 +832,11 @@ static Value genLoad(CodeGen &codegen, PatternRewriter &rewriter, Location loc, if (!etp.isa()) { if (etp.getIntOrFloatBitWidth() < 32) vload = rewriter.create( - loc, vload, vectorType(codegen, genIntType(rewriter, 32))); + loc, vload, vectorType(codegen, rewriter.getI32Type())); else if (etp.getIntOrFloatBitWidth() < 64 && !codegen.options.enableSIMDIndex32) vload = rewriter.create( - loc, vload, vectorType(codegen, genIntType(rewriter, 64))); + loc, vload, vectorType(codegen, rewriter.getI64Type())); } return vload; } @@ -867,8 +847,7 @@ static Value genLoad(CodeGen &codegen, PatternRewriter &rewriter, Location loc, Value load = rewriter.create(loc, ptr, s); if (!load.getType().isa()) { if (load.getType().getIntOrFloatBitWidth() < 64) - load = - rewriter.create(loc, load, genIntType(rewriter, 64)); + load = rewriter.create(loc, load, rewriter.getI64Type()); load = rewriter.create(loc, load, rewriter.getIndexType()); } @@ -1000,8 +979,8 @@ static void genExpansion(Merger &merger, CodeGen &codegen, auto dynShape = {ShapedType::kDynamicSize}; Type etp = tensor.getType().cast().getElementType(); Type t1 = MemRefType::get(dynShape, etp); - Type t2 = MemRefType::get(dynShape, genIntType(rewriter, 1)); - Type t3 = MemRefType::get(dynShape, genIntType(rewriter, 0)); + Type t2 = MemRefType::get(dynShape, rewriter.getI1Type()); + Type t3 = MemRefType::get(dynShape, rewriter.getIndexType()); Type t4 = rewriter.getIndexType(); auto res = rewriter.create(loc, TypeRange({t1, t2, t3, t4}), tensor); @@ -1044,8 +1023,8 @@ static bool genInit(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter, break; } Value ptr = codegen.pointers[tensor][idx]; - Value one = rewriter.create(loc, 1); - Value p0 = (pat == 0) ? rewriter.create(loc, 0) + Value one = constantIndex(rewriter, loc, 1); + Value p0 = (pat == 0) ? constantIndex(rewriter, loc, 0) : codegen.pidxs[tensor][topSort[pat - 1]]; codegen.pidxs[tensor][idx] = genLoad(codegen, rewriter, loc, ptr, p0); Value p1 = rewriter.create(loc, p0, one); @@ -1058,7 +1037,7 @@ static bool genInit(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter, } // Initialize the universal dense index. - codegen.loops[idx] = rewriter.create(loc, 0); + codegen.loops[idx] = constantIndex(rewriter, loc, 0); return needsUniv; } @@ -1148,8 +1127,7 @@ static Operation *genFor(Merger &merger, CodeGen &codegen, Location loc = op.getLoc(); Value lo = isSparse ? codegen.pidxs[tensor][idx] : codegen.loops[idx]; Value hi = isSparse ? codegen.highs[tensor][idx] : codegen.sizes[idx]; - Value step = - rewriter.create(loc, codegen.curVecLength); + Value step = constantIndex(rewriter, loc, codegen.curVecLength); // Emit a parallel loop. if (isParallel) { @@ -1323,7 +1301,7 @@ static void genLocals(Merger &merger, CodeGen &codegen, for (; pat != 0; pat--) if (codegen.pidxs[tensor][topSort[pat - 1]]) break; - Value p = (pat == 0) ? rewriter.create(loc, 0) + Value p = (pat == 0) ? constantIndex(rewriter, loc, 0) : codegen.pidxs[tensor][topSort[pat - 1]]; codegen.pidxs[tensor][idx] = genAddress( codegen, rewriter, loc, codegen.sizes[idx], p, codegen.loops[idx]); @@ -1333,7 +1311,7 @@ static void genLocals(Merger &merger, CodeGen &codegen, // Move the insertion indices in lexicographic index order. During access // pattern expansion, we can skip setting the innermost dimension. if (codegen.sparseOut && !codegen.expValues) { - Value pos = rewriter.create(loc, at); + Value pos = constantIndex(rewriter, loc, at); rewriter.create(loc, codegen.loops[idx], codegen.lexIdx, pos); } @@ -1373,7 +1351,7 @@ static void genWhileInduction(Merger &merger, CodeGen &codegen, // after the if-statements more closely resembles code generated by TACO. unsigned o = 0; SmallVector operands; - Value one = rewriter.create(loc, 1); + Value one = constantIndex(rewriter, loc, 1); for (unsigned b = 0, be = induction.size(); b < be; b++) { if (induction[b] && merger.isDim(b, Dim::kSparse)) { unsigned tensor = merger.tensor(b); @@ -1445,7 +1423,7 @@ static scf::IfOp genIf(Merger &merger, CodeGen &codegen, clause = rewriter.create(loc, arith::CmpIPredicate::eq, op1, op2); } else { - clause = rewriter.create(loc, 1, 1); // true + clause = constantI1(rewriter, loc, true); } cond = cond ? rewriter.create(loc, cond, clause) : clause; } diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 00b339e1fa5c..ae806faa2b4c 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1812,7 +1812,6 @@ cc_library( ":SideEffectInterfaces", ":SparseTensorAttrDefsIncGen", ":SparseTensorOpsIncGen", - ":SparseTensorUtils", ":StandardOps", "//llvm:Support", ], @@ -1827,17 +1826,17 @@ cc_library( ":ArithmeticDialect", ":IR", ":LinalgOps", - ":SideEffectInterfaces", - ":SparseTensorAttrDefsIncGen", - ":SparseTensorOpsIncGen", - ":StandardOps", + ":SparseTensor", "//llvm:Support", ], ) cc_library( name = "SparseTensorTransforms", - srcs = glob(["lib/Dialect/SparseTensor/Transforms/*.cpp"]), + srcs = glob([ + "lib/Dialect/SparseTensor/Transforms/*.cpp", + "lib/Dialect/SparseTensor/Transforms/*.h", + ]), hdrs = [ "include/mlir/Dialect/SparseTensor/Transforms/Passes.h", "include/mlir/ExecutionEngine/SparseTensorUtils.h", From bc04a4703824c005490e7ae79f64e873e1bd6c92 Mon Sep 17 00:00:00 2001 From: wren romano <2998727+wrengr@users.noreply.github.com> Date: Tue, 4 Jan 2022 15:10:54 -0800 Subject: [PATCH 595/992] [mlir][sparse] adding OverheadType::kIndex Depends On D115008 This change opens the way for D115012, and removes some corner cases in `CodegenUtils.cpp`. The `SparseTensorAttrDefs.td` already specifies that we allow `0` bitwidth for the two overhead types and that it is interpreted to mean the architecture's native width. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D115010 --- .../mlir/ExecutionEngine/SparseTensorUtils.h | 18 +++++++++++++++-- .../SparseTensor/Transforms/CodegenUtils.cpp | 20 +++++++++---------- .../lib/ExecutionEngine/SparseTensorUtils.cpp | 20 ++++++++++++------- .../SparseTensor/conversion_sparse2dense.mlir | 14 ++++++------- 4 files changed, 44 insertions(+), 28 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h b/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h index 4361fc7d43e7..a1f1dd6ae32d 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h @@ -18,8 +18,22 @@ extern "C" { -/// Encoding of the elemental type, for "overloading" @newSparseTensor. -enum class OverheadType : uint32_t { kU64 = 1, kU32 = 2, kU16 = 3, kU8 = 4 }; +/// This type is used in the public API at all places where MLIR expects +/// values with the built-in type "index". For now, we simply assume that +/// type is 64-bit, but targets with different "index" bit widths should link +/// with an alternatively built runtime support library. +// TODO: support such targets? +using index_t = uint64_t; + +/// Encoding of overhead types (both pointer overhead and indices +/// overhead), for "overloading" @newSparseTensor. +enum class OverheadType : uint32_t { + kIndex = 0, + kU64 = 1, + kU32 = 2, + kU16 = 3, + kU8 = 4 +}; /// Encoding of the elemental type, for "overloading" @newSparseTensor. enum class PrimaryType : uint32_t { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index 602e1f748443..0d45ff15e899 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -20,7 +20,7 @@ using namespace mlir::sparse_tensor; OverheadType mlir::sparse_tensor::overheadTypeEncoding(unsigned width) { switch (width) { - default: + case 64: return OverheadType::kU64; case 32: return OverheadType::kU32; @@ -28,11 +28,16 @@ OverheadType mlir::sparse_tensor::overheadTypeEncoding(unsigned width) { return OverheadType::kU16; case 8: return OverheadType::kU8; + case 0: + return OverheadType::kIndex; } + llvm_unreachable("Unsupported overhead bitwidth"); } Type mlir::sparse_tensor::getOverheadType(Builder &builder, OverheadType ot) { switch (ot) { + case OverheadType::kIndex: + return builder.getIndexType(); case OverheadType::kU64: return builder.getIntegerType(64); case OverheadType::kU32: @@ -47,20 +52,13 @@ Type mlir::sparse_tensor::getOverheadType(Builder &builder, OverheadType ot) { Type mlir::sparse_tensor::getPointerOverheadType( Builder &builder, const SparseTensorEncodingAttr &enc) { - // NOTE(wrengr): This workaround will be fixed in D115010. - unsigned width = enc.getPointerBitWidth(); - if (width == 0) - return builder.getIndexType(); - return getOverheadType(builder, overheadTypeEncoding(width)); + return getOverheadType(builder, + overheadTypeEncoding(enc.getPointerBitWidth())); } Type mlir::sparse_tensor::getIndexOverheadType( Builder &builder, const SparseTensorEncodingAttr &enc) { - // NOTE(wrengr): This workaround will be fixed in D115010. - unsigned width = enc.getIndexBitWidth(); - if (width == 0) - return builder.getIndexType(); - return getOverheadType(builder, overheadTypeEncoding(width)); + return getOverheadType(builder, overheadTypeEncoding(enc.getIndexBitWidth())); } PrimaryType mlir::sparse_tensor::primaryTypeEncoding(Type elemTp) { diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index 927284ec13f4..3681ca17674b 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -686,13 +686,6 @@ static SparseTensorCOO *openSparseTensorCOO(char *filename, uint64_t rank, extern "C" { -/// This type is used in the public API at all places where MLIR expects -/// values with the built-in type "index". For now, we simply assume that -/// type is 64-bit, but targets with different "index" bit widths should link -/// with an alternatively built runtime support library. -// TODO: support such targets? -using index_t = uint64_t; - //===----------------------------------------------------------------------===// // // Public API with methods that operate on MLIR buffers (memrefs) to interact @@ -821,6 +814,12 @@ using index_t = uint64_t; cursor, values, filled, added, count); \ } +// Assume index_t is in fact uint64_t, so that _mlir_ciface_newSparseTensor +// can safely rewrite kIndex to kU64. We make this assertion to guarantee +// that this file cannot get out of sync with its header. +static_assert(std::is_same::value, + "Expected index_t == uint64_t"); + /// Constructs a new sparse tensor. This is the "swiss army knife" /// method for materializing sparse tensors into the computation. /// @@ -846,6 +845,13 @@ _mlir_ciface_newSparseTensor(StridedMemRefType *aref, // NOLINT const index_t *perm = pref->data + pref->offset; uint64_t rank = aref->sizes[0]; + // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases. + // This is safe because of the static_assert above. + if (ptrTp == OverheadType::kIndex) + ptrTp = OverheadType::kU64; + if (indTp == OverheadType::kIndex) + indTp = OverheadType::kU64; + // Double matrices with all combinations of overhead storage. CASE(OverheadType::kU64, OverheadType::kU64, PrimaryType::kF64, uint64_t, uint64_t, double); diff --git a/mlir/test/Dialect/SparseTensor/conversion_sparse2dense.mlir b/mlir/test/Dialect/SparseTensor/conversion_sparse2dense.mlir index 0b7c20392d34..2917685064af 100644 --- a/mlir/test/Dialect/SparseTensor/conversion_sparse2dense.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion_sparse2dense.mlir @@ -27,16 +27,15 @@ // CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<1xindex> // CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<1xindex> to memref // CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<1xindex> -// CHECK-DAG: %[[SecTp:.*]] = arith.constant 1 : i32 +// CHECK-DAG: %[[zeroI32:.*]] = arith.constant 0 : i32 // CHECK-DAG: %[[ElemTp:.*]] = arith.constant 4 : i32 // CHECK-DAG: %[[ActionToIter:.*]] = arith.constant 5 : i32 -// CHECK-DAG: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %[[SecTp]], %[[SecTp]], %[[ElemTp]], %[[ActionToIter]], %[[Arg]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK-DAG: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %[[zeroI32]], %[[zeroI32]], %[[ElemTp]], %[[ActionToIter]], %[[Arg]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr // CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<1xindex> // CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<1xindex> to memref // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<13xi32> -// CHECK-DAG: %[[E0:.*]] = arith.constant 0 : i32 -// CHECK-DAG: linalg.fill(%[[E0]], %[[M]]) : i32, memref<13xi32> +// CHECK-DAG: linalg.fill(%[[zeroI32]], %[[M]]) : i32, memref<13xi32> // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = call @getNextI32(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 // CHECK: scf.condition(%[[Cond]]) @@ -67,16 +66,15 @@ func @sparse_convert_1d(%arg0: tensor<13xi32, #SparseVector>) -> tensor<13xi32> // CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<1xindex> // CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<1xindex> to memref // CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<1xindex> -// CHECK-DAG: %[[SecTp:.*]] = arith.constant 1 : i32 +// CHECK-DAG: %[[zeroI32:.*]] = arith.constant 0 : i32 // CHECK-DAG: %[[ElemTp:.*]] = arith.constant 4 : i32 // CHECK-DAG: %[[ActionToIter:.*]] = arith.constant 5 : i32 -// CHECK-DAG: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %[[SecTp]], %[[SecTp]], %[[ElemTp]], %[[ActionToIter]], %[[Arg]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK-DAG: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %[[zeroI32]], %[[zeroI32]], %[[ElemTp]], %[[ActionToIter]], %[[Arg]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr // CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<1xindex> // CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<1xindex> to memref // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]]) : memref -// CHECK-DAG: %[[E0:.*]] = arith.constant 0 : i32 -// CHECK-DAG: linalg.fill(%[[E0]], %[[M]]) : i32, memref +// CHECK-DAG: linalg.fill(%[[zeroI32]], %[[M]]) : i32, memref // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = call @getNextI32(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 // CHECK: scf.condition(%[[Cond]]) From c9489225678106c21cb8584c08f6003ba3987a5d Mon Sep 17 00:00:00 2001 From: wren romano <2998727+wrengr@users.noreply.github.com> Date: Tue, 4 Jan 2022 15:15:36 -0800 Subject: [PATCH 596/992] [mlir][sparse] Factoring out type-based function-name suffixes Depends On D115010 This changes a couple of places that used to `return failure();` to now use `llvm_unreachable()` instead. However, `Transforms/Sparsification.cpp` should be doing the necessary type checks to ensure that those cases are in fact unreachable. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D115012 --- .../SparseTensor/Transforms/CodegenUtils.cpp | 50 ++++++++ .../SparseTensor/Transforms/CodegenUtils.h | 15 +++ .../Transforms/SparseTensorConversion.cpp | 118 ++---------------- 3 files changed, 76 insertions(+), 107 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index 0d45ff15e899..ea9be3bddb54 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -34,6 +34,14 @@ OverheadType mlir::sparse_tensor::overheadTypeEncoding(unsigned width) { llvm_unreachable("Unsupported overhead bitwidth"); } +OverheadType mlir::sparse_tensor::overheadTypeEncoding(Type tp) { + if (tp.isIndex()) + return OverheadType::kIndex; + if (auto intTp = tp.dyn_cast()) + return overheadTypeEncoding(intTp.getWidth()); + llvm_unreachable("Unknown overhead type"); +} + Type mlir::sparse_tensor::getOverheadType(Builder &builder, OverheadType ot) { switch (ot) { case OverheadType::kIndex: @@ -61,6 +69,26 @@ Type mlir::sparse_tensor::getIndexOverheadType( return getOverheadType(builder, overheadTypeEncoding(enc.getIndexBitWidth())); } +StringRef mlir::sparse_tensor::overheadTypeFunctionSuffix(OverheadType ot) { + switch (ot) { + case OverheadType::kIndex: + return ""; + case OverheadType::kU64: + return "64"; + case OverheadType::kU32: + return "32"; + case OverheadType::kU16: + return "16"; + case OverheadType::kU8: + return "8"; + } + llvm_unreachable("Unknown OverheadType"); +} + +StringRef mlir::sparse_tensor::overheadTypeFunctionSuffix(Type tp) { + return overheadTypeFunctionSuffix(overheadTypeEncoding(tp)); +} + PrimaryType mlir::sparse_tensor::primaryTypeEncoding(Type elemTp) { if (elemTp.isF64()) return PrimaryType::kF64; @@ -77,6 +105,28 @@ PrimaryType mlir::sparse_tensor::primaryTypeEncoding(Type elemTp) { llvm_unreachable("Unknown primary type"); } +StringRef mlir::sparse_tensor::primaryTypeFunctionSuffix(PrimaryType pt) { + switch (pt) { + case PrimaryType::kF64: + return "F64"; + case PrimaryType::kF32: + return "F32"; + case PrimaryType::kI64: + return "I64"; + case PrimaryType::kI32: + return "I32"; + case PrimaryType::kI16: + return "I16"; + case PrimaryType::kI8: + return "I8"; + } + llvm_unreachable("Unknown PrimaryType"); +} + +StringRef mlir::sparse_tensor::primaryTypeFunctionSuffix(Type elemTp) { + return primaryTypeFunctionSuffix(primaryTypeEncoding(elemTp)); +} + DimLevelType mlir::sparse_tensor::dimLevelTypeEncoding( SparseTensorEncodingAttr::DimLevelType dlt) { switch (dlt) { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index fd539fe997cf..9286cca808aa 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -32,6 +32,9 @@ namespace sparse_tensor { /// Converts an overhead storage bitwidth to its internal type-encoding. OverheadType overheadTypeEncoding(unsigned width); +/// Converts an overhead storage type to its internal type-encoding. +OverheadType overheadTypeEncoding(Type tp); + /// Converts the internal type-encoding for overhead storage to an mlir::Type. Type getOverheadType(Builder &builder, OverheadType ot); @@ -43,9 +46,21 @@ Type getPointerOverheadType(Builder &builder, Type getIndexOverheadType(Builder &builder, const SparseTensorEncodingAttr &enc); +/// Convert OverheadType to its function-name suffix. +StringRef overheadTypeFunctionSuffix(OverheadType ot); + +/// Converts an overhead storage type to its function-name suffix. +StringRef overheadTypeFunctionSuffix(Type overheadTp); + /// Converts a primary storage type to its internal type-encoding. PrimaryType primaryTypeEncoding(Type elemTp); +/// Convert PrimaryType to its function-name suffix. +StringRef primaryTypeFunctionSuffix(PrimaryType pt); + +/// Converts a primary storage type to its function-name suffix. +StringRef primaryTypeFunctionSuffix(Type elemTp); + /// Converts the IR's dimension level type to its internal type-encoding. DimLevelType dimLevelTypeEncoding(SparseTensorEncodingAttr::DimLevelType dlt); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index 3c6817274b83..a28f9ac70b31 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -260,21 +260,7 @@ static Value genIndexAndValueForDense(ConversionPatternRewriter &rewriter, static void genAddEltCall(ConversionPatternRewriter &rewriter, Operation *op, Type eltType, Value ptr, Value val, Value ind, Value perm) { - StringRef name; - if (eltType.isF64()) - name = "addEltF64"; - else if (eltType.isF32()) - name = "addEltF32"; - else if (eltType.isInteger(64)) - name = "addEltI64"; - else if (eltType.isInteger(32)) - name = "addEltI32"; - else if (eltType.isInteger(16)) - name = "addEltI16"; - else if (eltType.isInteger(8)) - name = "addEltI8"; - else - llvm_unreachable("Unknown element type"); + SmallString<9> name{"addElt", primaryTypeFunctionSuffix(eltType)}; SmallVector params{ptr, val, ind, perm}; Type pTp = getOpaquePointerType(rewriter); createFuncCall(rewriter, op, name, pTp, params, EmitCInterface::On); @@ -287,21 +273,7 @@ static void genAddEltCall(ConversionPatternRewriter &rewriter, Operation *op, static Value genGetNextCall(ConversionPatternRewriter &rewriter, Operation *op, Value iter, Value ind, Value elemPtr) { Type elemTp = elemPtr.getType().cast().getElementType(); - StringRef name; - if (elemTp.isF64()) - name = "getNextF64"; - else if (elemTp.isF32()) - name = "getNextF32"; - else if (elemTp.isInteger(64)) - name = "getNextI64"; - else if (elemTp.isInteger(32)) - name = "getNextI32"; - else if (elemTp.isInteger(16)) - name = "getNextI16"; - else if (elemTp.isInteger(8)) - name = "getNextI8"; - else - llvm_unreachable("Unknown element type"); + SmallString<10> name{"getNext", primaryTypeFunctionSuffix(elemTp)}; SmallVector params{iter, ind, elemPtr}; Type i1 = rewriter.getI1Type(); return createFuncCall(rewriter, op, name, i1, params, EmitCInterface::On) @@ -668,20 +640,8 @@ class SparseTensorToPointersConverter matchAndRewrite(ToPointersOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { Type resType = op.getType(); - Type eltType = resType.cast().getElementType(); - StringRef name; - if (eltType.isIndex()) - name = "sparsePointers"; - else if (eltType.isInteger(64)) - name = "sparsePointers64"; - else if (eltType.isInteger(32)) - name = "sparsePointers32"; - else if (eltType.isInteger(16)) - name = "sparsePointers16"; - else if (eltType.isInteger(8)) - name = "sparsePointers8"; - else - return failure(); + Type ptrType = resType.cast().getElementType(); + SmallString<16> name{"sparsePointers", overheadTypeFunctionSuffix(ptrType)}; replaceOpWithFuncCall(rewriter, op, name, resType, adaptor.getOperands(), EmitCInterface::On); return success(); @@ -696,20 +656,8 @@ class SparseTensorToIndicesConverter : public OpConversionPattern { matchAndRewrite(ToIndicesOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { Type resType = op.getType(); - Type eltType = resType.cast().getElementType(); - StringRef name; - if (eltType.isIndex()) - name = "sparseIndices"; - else if (eltType.isInteger(64)) - name = "sparseIndices64"; - else if (eltType.isInteger(32)) - name = "sparseIndices32"; - else if (eltType.isInteger(16)) - name = "sparseIndices16"; - else if (eltType.isInteger(8)) - name = "sparseIndices8"; - else - return failure(); + Type indType = resType.cast().getElementType(); + SmallString<15> name{"sparseIndices", overheadTypeFunctionSuffix(indType)}; replaceOpWithFuncCall(rewriter, op, name, resType, adaptor.getOperands(), EmitCInterface::On); return success(); @@ -725,21 +673,7 @@ class SparseTensorToValuesConverter : public OpConversionPattern { ConversionPatternRewriter &rewriter) const override { Type resType = op.getType(); Type eltType = resType.cast().getElementType(); - StringRef name; - if (eltType.isF64()) - name = "sparseValuesF64"; - else if (eltType.isF32()) - name = "sparseValuesF32"; - else if (eltType.isInteger(64)) - name = "sparseValuesI64"; - else if (eltType.isInteger(32)) - name = "sparseValuesI32"; - else if (eltType.isInteger(16)) - name = "sparseValuesI16"; - else if (eltType.isInteger(8)) - name = "sparseValuesI8"; - else - return failure(); + SmallString<15> name{"sparseValues", primaryTypeFunctionSuffix(eltType)}; replaceOpWithFuncCall(rewriter, op, name, resType, adaptor.getOperands(), EmitCInterface::On); return success(); @@ -772,23 +706,8 @@ class SparseTensorLexInsertConverter : public OpConversionPattern { LogicalResult matchAndRewrite(LexInsertOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - Type srcType = op.tensor().getType(); - Type eltType = srcType.cast().getElementType(); - StringRef name; - if (eltType.isF64()) - name = "lexInsertF64"; - else if (eltType.isF32()) - name = "lexInsertF32"; - else if (eltType.isInteger(64)) - name = "lexInsertI64"; - else if (eltType.isInteger(32)) - name = "lexInsertI32"; - else if (eltType.isInteger(16)) - name = "lexInsertI16"; - else if (eltType.isInteger(8)) - name = "lexInsertI8"; - else - llvm_unreachable("Unknown element type"); + Type elemTp = op.tensor().getType().cast().getElementType(); + SmallString<12> name{"lexInsert", primaryTypeFunctionSuffix(elemTp)}; TypeRange noTp; replaceOpWithFuncCall(rewriter, op, name, noTp, adaptor.getOperands(), EmitCInterface::On); @@ -843,23 +762,8 @@ class SparseTensorCompressConverter : public OpConversionPattern { // all-zero/false by only iterating over the set elements, so the // complexity remains proportional to the sparsity of the expanded // access pattern. - Type srcType = op.tensor().getType(); - Type eltType = srcType.cast().getElementType(); - StringRef name; - if (eltType.isF64()) - name = "expInsertF64"; - else if (eltType.isF32()) - name = "expInsertF32"; - else if (eltType.isInteger(64)) - name = "expInsertI64"; - else if (eltType.isInteger(32)) - name = "expInsertI32"; - else if (eltType.isInteger(16)) - name = "expInsertI16"; - else if (eltType.isInteger(8)) - name = "expInsertI8"; - else - return failure(); + Type elemTp = op.tensor().getType().cast().getElementType(); + SmallString<12> name{"expInsert", primaryTypeFunctionSuffix(elemTp)}; TypeRange noTp; replaceOpWithFuncCall(rewriter, op, name, noTp, adaptor.getOperands(), EmitCInterface::On); From 37be74885946f18dbeb70343ad659924c61d2549 Mon Sep 17 00:00:00 2001 From: Rumeet Dhindsa Date: Tue, 4 Jan 2022 16:23:20 -0800 Subject: [PATCH 597/992] Revert "[clang][ObjC] Add fix it for missing methods in impl" This reverts commit dd72ae3dcc6895f95e1203b40aabcb069c76a0ab. Notified the author of the internal failure and author suggested to revert it for now. --- clang/lib/Sema/SemaDeclObjC.cpp | 54 +++++++++---------- .../FixIt/fixit-objc-missing-method-impl.m | 15 ------ 2 files changed, 26 insertions(+), 43 deletions(-) delete mode 100644 clang/test/FixIt/fixit-objc-missing-method-impl.m diff --git a/clang/lib/Sema/SemaDeclObjC.cpp b/clang/lib/Sema/SemaDeclObjC.cpp index d4fefc3d18d8..d6e659e17069 100644 --- a/clang/lib/Sema/SemaDeclObjC.cpp +++ b/clang/lib/Sema/SemaDeclObjC.cpp @@ -2212,8 +2212,9 @@ void Sema::CheckImplementationIvars(ObjCImplementationDecl *ImpDecl, Diag(IVI->getLocation(), diag::err_inconsistent_ivar_count); } -static void WarnUndefinedMethod(Sema &S, ObjCImplDecl *Impl, - ObjCMethodDecl *method, bool &IncompleteImpl, +static void WarnUndefinedMethod(Sema &S, SourceLocation ImpLoc, + ObjCMethodDecl *method, + bool &IncompleteImpl, unsigned DiagID, NamedDecl *NeededFor = nullptr) { // No point warning no definition of method which is 'unavailable'. @@ -2226,19 +2227,10 @@ static void WarnUndefinedMethod(Sema &S, ObjCImplDecl *Impl, // separate warnings. We will give that approach a try, as that // matches what we do with protocols. { - const Sema::SemaDiagnosticBuilder &B = S.Diag(Impl->getLocation(), DiagID); + const Sema::SemaDiagnosticBuilder &B = S.Diag(ImpLoc, DiagID); B << method; if (NeededFor) B << NeededFor; - - // Add an empty definition at the end of the @implementation. - std::string FixItStr; - llvm::raw_string_ostream Out(FixItStr); - method->print(Out, Impl->getASTContext().getPrintingPolicy()); - Out << " {\n}\n\n"; - - SourceLocation Loc = Impl->getAtEndRange().getBegin(); - B << FixItHint::CreateInsertion(Loc, FixItStr); } // Issue a note to the original declaration. @@ -2687,10 +2679,14 @@ static void findProtocolsWithExplicitImpls(const ObjCInterfaceDecl *Super, /// CheckProtocolMethodDefs - This routine checks unimplemented methods /// Declared in protocol, and those referenced by it. -static void CheckProtocolMethodDefs( - Sema &S, ObjCImplDecl *Impl, ObjCProtocolDecl *PDecl, bool &IncompleteImpl, - const Sema::SelectorSet &InsMap, const Sema::SelectorSet &ClsMap, - ObjCContainerDecl *CDecl, LazyProtocolNameSet &ProtocolsExplictImpl) { +static void CheckProtocolMethodDefs(Sema &S, + SourceLocation ImpLoc, + ObjCProtocolDecl *PDecl, + bool& IncompleteImpl, + const Sema::SelectorSet &InsMap, + const Sema::SelectorSet &ClsMap, + ObjCContainerDecl *CDecl, + LazyProtocolNameSet &ProtocolsExplictImpl) { ObjCCategoryDecl *C = dyn_cast(CDecl); ObjCInterfaceDecl *IDecl = C ? C->getClassInterface() : dyn_cast(CDecl); @@ -2777,8 +2773,9 @@ static void CheckProtocolMethodDefs( if (C || MethodInClass->isPropertyAccessor()) continue; unsigned DIAG = diag::warn_unimplemented_protocol_method; - if (!S.Diags.isIgnored(DIAG, Impl->getLocation())) { - WarnUndefinedMethod(S, Impl, method, IncompleteImpl, DIAG, PDecl); + if (!S.Diags.isIgnored(DIAG, ImpLoc)) { + WarnUndefinedMethod(S, ImpLoc, method, IncompleteImpl, DIAG, + PDecl); } } } @@ -2799,15 +2796,15 @@ static void CheckProtocolMethodDefs( continue; unsigned DIAG = diag::warn_unimplemented_protocol_method; - if (!S.Diags.isIgnored(DIAG, Impl->getLocation())) { - WarnUndefinedMethod(S, Impl, method, IncompleteImpl, DIAG, PDecl); + if (!S.Diags.isIgnored(DIAG, ImpLoc)) { + WarnUndefinedMethod(S, ImpLoc, method, IncompleteImpl, DIAG, PDecl); } } } // Check on this protocols's referenced protocols, recursively. for (auto *PI : PDecl->protocols()) - CheckProtocolMethodDefs(S, Impl, PI, IncompleteImpl, InsMap, ClsMap, CDecl, - ProtocolsExplictImpl); + CheckProtocolMethodDefs(S, ImpLoc, PI, IncompleteImpl, InsMap, ClsMap, + CDecl, ProtocolsExplictImpl); } /// MatchAllMethodDeclarations - Check methods declared in interface @@ -2830,7 +2827,7 @@ void Sema::MatchAllMethodDeclarations(const SelectorSet &InsMap, if (!I->isPropertyAccessor() && !InsMap.count(I->getSelector())) { if (ImmediateClass) - WarnUndefinedMethod(*this, IMPDecl, I, IncompleteImpl, + WarnUndefinedMethod(*this, IMPDecl->getLocation(), I, IncompleteImpl, diag::warn_undef_method_impl); continue; } else { @@ -2860,7 +2857,7 @@ void Sema::MatchAllMethodDeclarations(const SelectorSet &InsMap, if (!I->isPropertyAccessor() && !ClsMap.count(I->getSelector())) { if (ImmediateClass) - WarnUndefinedMethod(*this, IMPDecl, I, IncompleteImpl, + WarnUndefinedMethod(*this, IMPDecl->getLocation(), I, IncompleteImpl, diag::warn_undef_method_impl); } else { ObjCMethodDecl *ImpMethodDecl = @@ -3027,15 +3024,16 @@ void Sema::ImplMethodsVsClassMethods(Scope *S, ObjCImplDecl* IMPDecl, if (ObjCInterfaceDecl *I = dyn_cast (CDecl)) { for (auto *PI : I->all_referenced_protocols()) - CheckProtocolMethodDefs(*this, IMPDecl, PI, IncompleteImpl, InsMap, - ClsMap, I, ExplicitImplProtocols); + CheckProtocolMethodDefs(*this, IMPDecl->getLocation(), PI, IncompleteImpl, + InsMap, ClsMap, I, ExplicitImplProtocols); } else if (ObjCCategoryDecl *C = dyn_cast(CDecl)) { // For extended class, unimplemented methods in its protocols will // be reported in the primary class. if (!C->IsClassExtension()) { for (auto *P : C->protocols()) - CheckProtocolMethodDefs(*this, IMPDecl, P, IncompleteImpl, InsMap, - ClsMap, CDecl, ExplicitImplProtocols); + CheckProtocolMethodDefs(*this, IMPDecl->getLocation(), P, + IncompleteImpl, InsMap, ClsMap, CDecl, + ExplicitImplProtocols); DiagnoseUnimplementedProperties(S, IMPDecl, CDecl, /*SynthesizeProperties=*/false); } diff --git a/clang/test/FixIt/fixit-objc-missing-method-impl.m b/clang/test/FixIt/fixit-objc-missing-method-impl.m deleted file mode 100644 index acc089614a6e..000000000000 --- a/clang/test/FixIt/fixit-objc-missing-method-impl.m +++ /dev/null @@ -1,15 +0,0 @@ -// RUN: %clang_cc1 -fsyntax-only -verify %s -// RUN: cp %s %t -// RUN: not %clang_cc1 -pedantic -Werror -fixit -x objective-c %t -// RUN: %clang_cc1 -pedantic -Werror -x objective-c %t - -__attribute__((objc_root_class)) -@interface NSObject -@end - -@interface Foo : NSObject -- (void)fooey; // expected-note{{method 'fooey' declared here}} -@end - -@implementation Foo // expected-warning{{method definition for 'fooey' not found}} -@end From d007e66cb6f58042e043645fda3463de44eb4756 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 4 Jan 2022 16:52:24 -0800 Subject: [PATCH 598/992] [docs] Re-generate ClangCommandLineReference.rst --- clang/docs/ClangCommandLineReference.rst | 200 +++++++++++++++-------- 1 file changed, 132 insertions(+), 68 deletions(-) diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst index 97807009fd91..72d571dd10ee 100644 --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -216,6 +216,8 @@ Trivial automatic variable initialization to zero is only here for benchmarks, i .. option:: -faligned-new= +.. option:: -fautomatic + .. option:: -ffixed-r19 Reserve register r19 (Hexagon only) @@ -242,6 +244,10 @@ Specify comma-separated list of triples OpenMP offloading targets to be supporte .. option:: -force\_load .. program:: clang +.. option:: -fplugin-arg-- + +Pass to plugin + .. option:: -framework .. option:: -frtlib-add-rpath, -fno-rtlib-add-rpath @@ -265,7 +271,7 @@ Build this module as a system module. Only used with -emit-module Method to generate ID's for compilation units for single source offloading languages CUDA and HIP: 'hash' (ID's generated by hashing file path and command line options) \| 'random' (ID's generated as random numbers) \| 'none' (disabled). Default is 'hash'. This option will be overridden by option '-cuid=\[ID\]' if it is specified. -.. option:: --gcc-toolchain=, -gcc-toolchain +.. option:: --gcc-toolchain= Search for GCC installation in the specified directory on targets which commonly use GCC. The directory usually contains 'lib{,32,64}/gcc{,-cross}/$triple' and 'include'. If specified, sysroot is skipped for GCC detection. Note: executables (e.g. ld) used by the compiler are not overridden by the selected GCC installation @@ -395,6 +401,10 @@ Do not add include paths for CUDA/HIP and do not include the default CUDA/HIP wr Do not link device library for CUDA/HIP device compilation +.. option:: -nohipwrapperinc + +Do not include the default HIP wrapper headers and include paths + .. option:: -nolibc .. option:: -nomultidefs @@ -423,6 +433,10 @@ Disable standard #include directories for the C++ standard library Write output to +.. option:: -objcmt-allowlist-dir-path=, -objcmt-white-list-dir-path=, -objcmt-whitelist-dir-path= + +Only modify files with a filename contained in the provided directory path + .. option:: -objcmt-atomic-property Make migration to 'atomic' properties @@ -483,16 +497,20 @@ Enable migration to use NS\_NONATOMIC\_IOSONLY macro for setting property's 'ato Enable migration to annotate property with NS\_RETURNS\_INNER\_POINTER -.. option:: -objcmpt-allowlist-dir-path=, -objcmt-whitelist-dir-path=, -objcmt-white-list-dir-path= +.. option:: -object -Only modify files with a filename contained in the provided directory path +.. option:: -object-file-name=, -object-file-name -.. option:: -object +Set the output for debug infos .. option:: --offload-arch=, --cuda-gpu-arch=, --no-offload-arch= CUDA offloading device architecture (e.g. sm\_35), or HIP offloading target ID in the form of a device architecture followed by target ID features delimited by a colon. Each target ID feature is a pre-defined string followed by a plus or minus sign (e.g. gfx908:xnack+:sramecc-). May be specified more than once. +.. option:: --offload=,... + +Specify comma-separated list of offloading target triples (HIP only) + .. option:: -p, --profile .. option:: -pagezero\_size @@ -901,13 +919,17 @@ Level of field padding for AddressSanitizer Enable linker dead stripping of globals in AddressSanitizer +.. option:: -fsanitize-address-outline-instrumentation, -fno-sanitize-address-outline-instrumentation + +Always generate function calls for address sanitizer instrumentation + .. option:: -fsanitize-address-poison-custom-array-cookie, -fno-sanitize-address-poison-custom-array-cookie Enable poisoning array cookies when using custom operator new\[\] in AddressSanitizer .. option:: -fsanitize-address-use-after-return= -Select the mode of detecting stack use-after-return in AddressSanitizer +Select the mode of detecting stack use-after-return in AddressSanitizer: never \| runtime (default) \| always .. option:: -fsanitize-address-use-after-scope, -fno-sanitize-address-use-after-scope @@ -1060,10 +1082,6 @@ Pass the comma separated arguments in to the preprocessor Pass to the preprocessor -.. option:: -fmacro-prefix-map= - -remap file source paths in predefined preprocessor macros - Include path management ----------------------- @@ -1335,12 +1353,12 @@ Enable the specified warning Enable warnings for deprecated constructs and define \_\_DEPRECATED +.. option:: -Wframe-larger-than=, -Wframe-larger-than + .. option:: -Wnonportable-cfstrings, -Wno-nonportable-cfstrings Target-independent compilation options ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. option:: -Wframe-larger-than= - .. option:: -fPIC, -fno-PIC .. option:: -fPIE, -fno-PIE @@ -1357,6 +1375,10 @@ Emit an address-significance table .. option:: -falign-functions= .. program:: clang +.. option:: -falign-loops= + +N must be a power of two. Align loops to the boundary + .. program:: clang1 .. option:: -faligned-allocation, -faligned-new, -fno-aligned-allocation .. program:: clang @@ -1371,6 +1393,10 @@ Treat editor placeholders as valid source code .. option:: -faltivec, -fno-altivec +.. option:: -faltivec-src-compat= + +Source-level compatibility for Altivec vectors (for PowerPC targets). This includes results of vector comparison (scalar for 'xl', vector for 'gcc') as well as behavior when initializing with a scalar (splatting for 'xl', element zero only for 'gcc'). For 'mixed', the compatibility is as 'gcc' for 'vector bool/vector pixel' and as 'xl' for other types. Current default is 'mixed'. + .. option:: -fansi-escape-codes Use ANSI escape codes for diagnostics @@ -1391,6 +1417,10 @@ Enable Apple gcc-compatible #pragma pack handling Restrict code to those available for App Extensions +.. option:: -fapprox-func, -fno-approx-func + +Allow certain math function calls to be replaced with an approximately equivalent calculation + .. option:: -fasm, -fno-asm .. option:: -fasm-blocks, -fno-asm-blocks @@ -1523,6 +1553,8 @@ Enable C++ exceptions .. option:: -fcxx-modules, -fno-cxx-modules +Enable modules for C++ + .. option:: -fdata-sections, -fno-data-sections Place each data in its own section @@ -1677,7 +1709,7 @@ The compilation directory to embed in the debug info and coverage mapping. .. option:: -ffile-prefix-map= -remap file source paths in debug info and predefined preprocessor macros +remap file source paths in debug info, predefined preprocessor macros and \_\_builtin\_FILE() .. option:: -ffinite-loops, -fno-finite-loops @@ -1705,7 +1737,7 @@ Enable support for int128\_t type .. option:: -ffp-contract= -Form fused FP ops (e.g. FMAs): fast (fuses across statements disregarding pragmas) \| on (only fuses in the same statement unless dictated by pragmas) \| off (never fuses) \| fast-honor-pragmas (fuses across statements unless dictated by pragmas). Default is 'fast' for CUDA, 'fast-honor-pragmas' for HIP, and 'on' otherwise. +Form fused FP ops (e.g. FMAs): fast (fuses across statements disregarding pragmas) \| on (only fuses in the same statement unless dictated by pragmas) \| off (never fuses) \| fast-honor-pragmas (fuses across statements unless diectated by pragmas). Default is 'fast' for CUDA, 'fast-honor-pragmas' for HIP, and 'on' otherwise. .. option:: -ffp-exception-behavior= @@ -1819,29 +1851,19 @@ Enable implicit vector bit-casts .. option:: -flimited-precision= -.. option:: -flto, -fno-lto - -Enable LTO in 'full' mode - .. option:: -flto-jobs= Controls the backend parallelism of -flto=thin (default of 0 means the number of threads will be derived from the number of CPUs detected) -.. program:: clang1 -.. option:: -flto= -.. program:: clang +.. option:: -flto=, -flto (equivalent to -flto=full), -flto=auto (equivalent to -flto=full), -flto=jobserver (equivalent to -flto=full) Set LTO mode to either 'full' or 'thin' -.. program:: clang2 -.. option:: -flto=auto -.. program:: clang +.. option:: -fmacro-backtrace-limit= -.. program:: clang3 -.. option:: -flto=jobserver -.. program:: clang +.. option:: -fmacro-prefix-map= -.. option:: -fmacro-backtrace-limit= +remap file source paths in predefined preprocessor macros and \_\_builtin\_FILE() .. option:: -fmath-errno, -fno-math-errno @@ -1873,6 +1895,10 @@ Allow merging of constants Format message diagnostics so that they fit within N columns +.. option:: -fminimize-whitespace, -fno-minimize-whitespace + +Minimize whitespace when emitting preprocessor output + .. option:: -fmodule-file-deps, -fno-module-file-deps .. option:: -fmodule-map-file= @@ -2017,13 +2043,7 @@ Specify the target Objective-C runtime kind and version Enable ARC-style weak references in Objective-C -.. option:: -foffload-lto, -fno-offload-lto - -Enable LTO in 'full' mode for offload compilation - -.. program:: clang1 -.. option:: -foffload-lto= -.. program:: clang +.. option:: -foffload-lto=, -foffload-lto (equivalent to -foffload-lto=full) Set LTO mode to either 'full' or 'thin' for offload compilation @@ -2033,16 +2053,25 @@ Set LTO mode to either 'full' or 'thin' for offload compilation Parse OpenMP pragmas and generate parallel code. +.. option:: -fopenmp-extensions, -fno-openmp-extensions + +Enable all Clang extensions for OpenMP directives and clauses + .. option:: -fopenmp-simd, -fno-openmp-simd Emit OpenMP code only for SIMD-based constructs. -.. option:: -fopenmp-version= +.. option:: -fopenmp-target-debug, -fno-openmp-target-debug -.. option:: -fopenmp-extensions, -fno-openmp-extensions +Enable debugging in the OpenMP offloading device RTL + +.. option:: -fopenmp-target-new-runtime, -fno-openmp-target-new-runtime + +Use the new bitcode library for OpenMP offloading + +.. option:: -fopenmp-version= -Enable or disable all Clang extensions for OpenMP directives and clauses. By -default, they are enabled. +Set OpenMP version (e.g. 45 for OpenMP 4.5, 50 for OpenMP 5.0). Default value is 50. .. program:: clang1 .. option:: -fopenmp= @@ -2209,6 +2238,10 @@ Set update method of profile counters (atomic,prefer-atomic,single) Use instrumentation data for profile-guided optimization. If pathname is a directory, it reads from /default.profdata. Otherwise, it reads from file . +.. option:: -fprotect-parens, -fno-protect-parens + +Determines whether the optimizer honors parentheses when floating-point expressions are evaluated + .. option:: -fpseudo-probe-for-profiling, -fno-pseudo-probe-for-profiling Emit pseudo probes for sample profiling @@ -2377,6 +2410,10 @@ Enable optimizations based on the strict rules for overwriting polymorphic C++ o .. option:: -fstruct-path-tbaa, -fno-struct-path-tbaa +.. option:: -fswift-async-fp= + +### Output Formats + +The library supports multiple output formats. Use the +`--benchmark_format=` flag (or set the +`BENCHMARK_FORMAT=` environment variable) to set +the format type. `console` is the default format. + +The Console format is intended to be a human readable format. By default +the format generates color output. Context is output on stderr and the +tabular data on stdout. Example tabular output looks like: + +``` +Benchmark Time(ns) CPU(ns) Iterations +---------------------------------------------------------------------- +BM_SetInsert/1024/1 28928 29349 23853 133.097kB/s 33.2742k items/s +BM_SetInsert/1024/8 32065 32913 21375 949.487kB/s 237.372k items/s +BM_SetInsert/1024/10 33157 33648 21431 1.13369MB/s 290.225k items/s +``` + +The JSON format outputs human readable json split into two top level attributes. +The `context` attribute contains information about the run in general, including +information about the CPU and the date. +The `benchmarks` attribute contains a list of every benchmark run. Example json +output looks like: + +```json +{ + "context": { + "date": "2015/03/17-18:40:25", + "num_cpus": 40, + "mhz_per_cpu": 2801, + "cpu_scaling_enabled": false, + "build_type": "debug" + }, + "benchmarks": [ + { + "name": "BM_SetInsert/1024/1", + "iterations": 94877, + "real_time": 29275, + "cpu_time": 29836, + "bytes_per_second": 134066, + "items_per_second": 33516 + }, + { + "name": "BM_SetInsert/1024/8", + "iterations": 21609, + "real_time": 32317, + "cpu_time": 32429, + "bytes_per_second": 986770, + "items_per_second": 246693 + }, + { + "name": "BM_SetInsert/1024/10", + "iterations": 21393, + "real_time": 32724, + "cpu_time": 33355, + "bytes_per_second": 1199226, + "items_per_second": 299807 + } + ] +} +``` + +The CSV format outputs comma-separated values. The `context` is output on stderr +and the CSV itself on stdout. Example CSV output looks like: + +``` +name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label +"BM_SetInsert/1024/1",65465,17890.7,8407.45,475768,118942, +"BM_SetInsert/1024/8",116606,18810.1,9766.64,3.27646e+06,819115, +"BM_SetInsert/1024/10",106365,17238.4,8421.53,4.74973e+06,1.18743e+06, +``` + + + +### Output Files + +Write benchmark results to a file with the `--benchmark_out=` option +(or set `BENCHMARK_OUT`). Specify the output format with +`--benchmark_out_format={json|console|csv}` (or set +`BENCHMARK_OUT_FORMAT={json|console|csv}`). Note that the 'csv' reporter is +deprecated and the saved `.csv` file +[is not parsable](https://github.com/google/benchmark/issues/794) by csv +parsers. + +Specifying `--benchmark_out` does not suppress the console output. + + + +### Running Benchmarks + +Benchmarks are executed by running the produced binaries. Benchmarks binaries, +by default, accept options that may be specified either through their command +line interface or by setting environment variables before execution. For every +`--option_flag=` CLI switch, a corresponding environment variable +`OPTION_FLAG=` exist and is used as default if set (CLI switches always + prevails). A complete list of CLI options is available running benchmarks + with the `--help` switch. + + + +### Running a Subset of Benchmarks + +The `--benchmark_filter=` option (or `BENCHMARK_FILTER=` +environment variable) can be used to only run the benchmarks that match +the specified ``. For example: + +```bash +$ ./run_benchmarks.x --benchmark_filter=BM_memcpy/32 +Run on (1 X 2300 MHz CPU ) +2016-06-25 19:34:24 +Benchmark Time CPU Iterations +---------------------------------------------------- +BM_memcpy/32 11 ns 11 ns 79545455 +BM_memcpy/32k 2181 ns 2185 ns 324074 +BM_memcpy/32 12 ns 12 ns 54687500 +BM_memcpy/32k 1834 ns 1837 ns 357143 +``` + + + +### Result comparison + +It is possible to compare the benchmarking results. +See [Additional Tooling Documentation](docs/tools.md) + + + +### Extra Context + +Sometimes it's useful to add extra context to the content printed before the +results. By default this section includes information about the CPU on which +the benchmarks are running. If you do want to add more context, you can use +the `benchmark_context` command line flag: + +```bash +$ ./run_benchmarks --benchmark_context=pwd=`pwd` +Run on (1 x 2300 MHz CPU) +pwd: /home/user/benchmark/ +Benchmark Time CPU Iterations +---------------------------------------------------- +BM_memcpy/32 11 ns 11 ns 79545455 +BM_memcpy/32k 2181 ns 2185 ns 324074 +``` + +You can get the same effect with the API: + +```c++ + benchmark::AddCustomContext("foo", "bar"); +``` + +Note that attempts to add a second value with the same key will fail with an +error message. + + + +### Runtime and Reporting Considerations + +When the benchmark binary is executed, each benchmark function is run serially. +The number of iterations to run is determined dynamically by running the +benchmark a few times and measuring the time taken and ensuring that the +ultimate result will be statistically stable. As such, faster benchmark +functions will be run for more iterations than slower benchmark functions, and +the number of iterations is thus reported. + +In all cases, the number of iterations for which the benchmark is run is +governed by the amount of time the benchmark takes. Concretely, the number of +iterations is at least one, not more than 1e9, until CPU time is greater than +the minimum time, or the wallclock time is 5x minimum time. The minimum time is +set per benchmark by calling `MinTime` on the registered benchmark object. + +Average timings are then reported over the iterations run. If multiple +repetitions are requested using the `--benchmark_repetitions` command-line +option, or at registration time, the benchmark function will be run several +times and statistical results across these repetitions will also be reported. + +As well as the per-benchmark entries, a preamble in the report will include +information about the machine on which the benchmarks are run. + + + +### Passing Arguments + +Sometimes a family of benchmarks can be implemented with just one routine that +takes an extra argument to specify which one of the family of benchmarks to +run. For example, the following code defines a family of benchmarks for +measuring the speed of `memcpy()` calls of different lengths: + +```c++ +static void BM_memcpy(benchmark::State& state) { + char* src = new char[state.range(0)]; + char* dst = new char[state.range(0)]; + memset(src, 'x', state.range(0)); + for (auto _ : state) + memcpy(dst, src, state.range(0)); + state.SetBytesProcessed(int64_t(state.iterations()) * + int64_t(state.range(0))); + delete[] src; + delete[] dst; +} +BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10); +``` + +The preceding code is quite repetitive, and can be replaced with the following +short-hand. The following invocation will pick a few appropriate arguments in +the specified range and will generate a benchmark for each such argument. + +```c++ +BENCHMARK(BM_memcpy)->Range(8, 8<<10); +``` + +By default the arguments in the range are generated in multiples of eight and +the command above selects [ 8, 64, 512, 4k, 8k ]. In the following code the +range multiplier is changed to multiples of two. + +```c++ +BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10); +``` + +Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ]. + +The preceding code shows a method of defining a sparse range. The following +example shows a method of defining a dense range. It is then used to benchmark +the performance of `std::vector` initialization for uniformly increasing sizes. + +```c++ +static void BM_DenseRange(benchmark::State& state) { + for(auto _ : state) { + std::vector v(state.range(0), state.range(0)); + benchmark::DoNotOptimize(v.data()); + benchmark::ClobberMemory(); + } +} +BENCHMARK(BM_DenseRange)->DenseRange(0, 1024, 128); +``` + +Now arguments generated are [ 0, 128, 256, 384, 512, 640, 768, 896, 1024 ]. + +You might have a benchmark that depends on two or more inputs. For example, the +following code defines a family of benchmarks for measuring the speed of set +insertion. + +```c++ +static void BM_SetInsert(benchmark::State& state) { + std::set data; + for (auto _ : state) { + state.PauseTiming(); + data = ConstructRandomSet(state.range(0)); + state.ResumeTiming(); + for (int j = 0; j < state.range(1); ++j) + data.insert(RandomNumber()); + } +} +BENCHMARK(BM_SetInsert) + ->Args({1<<10, 128}) + ->Args({2<<10, 128}) + ->Args({4<<10, 128}) + ->Args({8<<10, 128}) + ->Args({1<<10, 512}) + ->Args({2<<10, 512}) + ->Args({4<<10, 512}) + ->Args({8<<10, 512}); +``` + +The preceding code is quite repetitive, and can be replaced with the following +short-hand. The following macro will pick a few appropriate arguments in the +product of the two specified ranges and will generate a benchmark for each such +pair. + +```c++ +BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}}); +``` + +Some benchmarks may require specific argument values that cannot be expressed +with `Ranges`. In this case, `ArgsProduct` offers the ability to generate a +benchmark input for each combination in the product of the supplied vectors. + +```c++ +BENCHMARK(BM_SetInsert) + ->ArgsProduct({{1<<10, 3<<10, 8<<10}, {20, 40, 60, 80}}) +// would generate the same benchmark arguments as +BENCHMARK(BM_SetInsert) + ->Args({1<<10, 20}) + ->Args({3<<10, 20}) + ->Args({8<<10, 20}) + ->Args({3<<10, 40}) + ->Args({8<<10, 40}) + ->Args({1<<10, 40}) + ->Args({1<<10, 60}) + ->Args({3<<10, 60}) + ->Args({8<<10, 60}) + ->Args({1<<10, 80}) + ->Args({3<<10, 80}) + ->Args({8<<10, 80}); +``` + +For more complex patterns of inputs, passing a custom function to `Apply` allows +programmatic specification of an arbitrary set of arguments on which to run the +benchmark. The following example enumerates a dense range on one parameter, +and a sparse range on the second. + +```c++ +static void CustomArguments(benchmark::internal::Benchmark* b) { + for (int i = 0; i <= 10; ++i) + for (int j = 32; j <= 1024*1024; j *= 8) + b->Args({i, j}); +} +BENCHMARK(BM_SetInsert)->Apply(CustomArguments); +``` + +#### Passing Arbitrary Arguments to a Benchmark + +In C++11 it is possible to define a benchmark that takes an arbitrary number +of extra arguments. The `BENCHMARK_CAPTURE(func, test_case_name, ...args)` +macro creates a benchmark that invokes `func` with the `benchmark::State` as +the first argument followed by the specified `args...`. +The `test_case_name` is appended to the name of the benchmark and +should describe the values passed. + +```c++ +template +void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) { + [...] +} +// Registers a benchmark named "BM_takes_args/int_string_test" that passes +// the specified values to `extra_args`. +BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); +``` + +Note that elements of `...args` may refer to global variables. Users should +avoid modifying global state inside of a benchmark. + + + +### Calculating Asymptotic Complexity (Big O) + +Asymptotic complexity might be calculated for a family of benchmarks. The +following code will calculate the coefficient for the high-order term in the +running time and the normalized root-mean square error of string comparison. + +```c++ +static void BM_StringCompare(benchmark::State& state) { + std::string s1(state.range(0), '-'); + std::string s2(state.range(0), '-'); + for (auto _ : state) { + benchmark::DoNotOptimize(s1.compare(s2)); + } + state.SetComplexityN(state.range(0)); +} +BENCHMARK(BM_StringCompare) + ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(benchmark::oN); +``` + +As shown in the following invocation, asymptotic complexity might also be +calculated automatically. + +```c++ +BENCHMARK(BM_StringCompare) + ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(); +``` + +The following code will specify asymptotic complexity with a lambda function, +that might be used to customize high-order term calculation. + +```c++ +BENCHMARK(BM_StringCompare)->RangeMultiplier(2) + ->Range(1<<10, 1<<18)->Complexity([](benchmark::IterationCount n)->double{return n; }); +``` + + + +### Custom Benchmark Name + +You can change the benchmark's name as follows: + +```c++ +BENCHMARK(BM_memcpy)->Name("memcpy")->RangeMultiplier(2)->Range(8, 8<<10); +``` + +The invocation will execute the benchmark as before using `BM_memcpy` but changes +the prefix in the report to `memcpy`. + + + +### Templated Benchmarks + +This example produces and consumes messages of size `sizeof(v)` `range_x` +times. It also outputs throughput in the absence of multiprogramming. + +```c++ +template void BM_Sequential(benchmark::State& state) { + Q q; + typename Q::value_type v; + for (auto _ : state) { + for (int i = state.range(0); i--; ) + q.push(v); + for (int e = state.range(0); e--; ) + q.Wait(&v); + } + // actually messages, not bytes: + state.SetBytesProcessed( + static_cast(state.iterations())*state.range(0)); +} +BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); +``` + +Three macros are provided for adding benchmark templates. + +```c++ +#ifdef BENCHMARK_HAS_CXX11 +#define BENCHMARK_TEMPLATE(func, ...) // Takes any number of parameters. +#else // C++ < C++11 +#define BENCHMARK_TEMPLATE(func, arg1) +#endif +#define BENCHMARK_TEMPLATE1(func, arg1) +#define BENCHMARK_TEMPLATE2(func, arg1, arg2) +``` + + + +### Fixtures + +Fixture tests are created by first defining a type that derives from +`::benchmark::Fixture` and then creating/registering the tests using the +following macros: + +* `BENCHMARK_F(ClassName, Method)` +* `BENCHMARK_DEFINE_F(ClassName, Method)` +* `BENCHMARK_REGISTER_F(ClassName, Method)` + +For Example: + +```c++ +class MyFixture : public benchmark::Fixture { +public: + void SetUp(const ::benchmark::State& state) { + } + + void TearDown(const ::benchmark::State& state) { + } +}; + +BENCHMARK_F(MyFixture, FooTest)(benchmark::State& st) { + for (auto _ : st) { + ... + } +} + +BENCHMARK_DEFINE_F(MyFixture, BarTest)(benchmark::State& st) { + for (auto _ : st) { + ... + } +} +/* BarTest is NOT registered */ +BENCHMARK_REGISTER_F(MyFixture, BarTest)->Threads(2); +/* BarTest is now registered */ +``` + +#### Templated Fixtures + +Also you can create templated fixture by using the following macros: + +* `BENCHMARK_TEMPLATE_F(ClassName, Method, ...)` +* `BENCHMARK_TEMPLATE_DEFINE_F(ClassName, Method, ...)` + +For example: + +```c++ +template +class MyFixture : public benchmark::Fixture {}; + +BENCHMARK_TEMPLATE_F(MyFixture, IntTest, int)(benchmark::State& st) { + for (auto _ : st) { + ... + } +} + +BENCHMARK_TEMPLATE_DEFINE_F(MyFixture, DoubleTest, double)(benchmark::State& st) { + for (auto _ : st) { + ... + } +} + +BENCHMARK_REGISTER_F(MyFixture, DoubleTest)->Threads(2); +``` + + + +### Custom Counters + +You can add your own counters with user-defined names. The example below +will add columns "Foo", "Bar" and "Baz" in its output: + +```c++ +static void UserCountersExample1(benchmark::State& state) { + double numFoos = 0, numBars = 0, numBazs = 0; + for (auto _ : state) { + // ... count Foo,Bar,Baz events + } + state.counters["Foo"] = numFoos; + state.counters["Bar"] = numBars; + state.counters["Baz"] = numBazs; +} +``` + +The `state.counters` object is a `std::map` with `std::string` keys +and `Counter` values. The latter is a `double`-like class, via an implicit +conversion to `double&`. Thus you can use all of the standard arithmetic +assignment operators (`=,+=,-=,*=,/=`) to change the value of each counter. + +In multithreaded benchmarks, each counter is set on the calling thread only. +When the benchmark finishes, the counters from each thread will be summed; +the resulting sum is the value which will be shown for the benchmark. + +The `Counter` constructor accepts three parameters: the value as a `double` +; a bit flag which allows you to show counters as rates, and/or as per-thread +iteration, and/or as per-thread averages, and/or iteration invariants, +and/or finally inverting the result; and a flag specifying the 'unit' - i.e. +is 1k a 1000 (default, `benchmark::Counter::OneK::kIs1000`), or 1024 +(`benchmark::Counter::OneK::kIs1024`)? + +```c++ + // sets a simple counter + state.counters["Foo"] = numFoos; + + // Set the counter as a rate. It will be presented divided + // by the duration of the benchmark. + // Meaning: per one second, how many 'foo's are processed? + state.counters["FooRate"] = Counter(numFoos, benchmark::Counter::kIsRate); + + // Set the counter as a rate. It will be presented divided + // by the duration of the benchmark, and the result inverted. + // Meaning: how many seconds it takes to process one 'foo'? + state.counters["FooInvRate"] = Counter(numFoos, benchmark::Counter::kIsRate | benchmark::Counter::kInvert); + + // Set the counter as a thread-average quantity. It will + // be presented divided by the number of threads. + state.counters["FooAvg"] = Counter(numFoos, benchmark::Counter::kAvgThreads); + + // There's also a combined flag: + state.counters["FooAvgRate"] = Counter(numFoos,benchmark::Counter::kAvgThreadsRate); + + // This says that we process with the rate of state.range(0) bytes every iteration: + state.counters["BytesProcessed"] = Counter(state.range(0), benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1024); +``` + +When you're compiling in C++11 mode or later you can use `insert()` with +`std::initializer_list`: + +```c++ + // With C++11, this can be done: + state.counters.insert({{"Foo", numFoos}, {"Bar", numBars}, {"Baz", numBazs}}); + // ... instead of: + state.counters["Foo"] = numFoos; + state.counters["Bar"] = numBars; + state.counters["Baz"] = numBazs; +``` + +#### Counter Reporting + +When using the console reporter, by default, user counters are printed at +the end after the table, the same way as ``bytes_processed`` and +``items_processed``. This is best for cases in which there are few counters, +or where there are only a couple of lines per benchmark. Here's an example of +the default output: + +``` +------------------------------------------------------------------------------ +Benchmark Time CPU Iterations UserCounters... +------------------------------------------------------------------------------ +BM_UserCounter/threads:8 2248 ns 10277 ns 68808 Bar=16 Bat=40 Baz=24 Foo=8 +BM_UserCounter/threads:1 9797 ns 9788 ns 71523 Bar=2 Bat=5 Baz=3 Foo=1024m +BM_UserCounter/threads:2 4924 ns 9842 ns 71036 Bar=4 Bat=10 Baz=6 Foo=2 +BM_UserCounter/threads:4 2589 ns 10284 ns 68012 Bar=8 Bat=20 Baz=12 Foo=4 +BM_UserCounter/threads:8 2212 ns 10287 ns 68040 Bar=16 Bat=40 Baz=24 Foo=8 +BM_UserCounter/threads:16 1782 ns 10278 ns 68144 Bar=32 Bat=80 Baz=48 Foo=16 +BM_UserCounter/threads:32 1291 ns 10296 ns 68256 Bar=64 Bat=160 Baz=96 Foo=32 +BM_UserCounter/threads:4 2615 ns 10307 ns 68040 Bar=8 Bat=20 Baz=12 Foo=4 +BM_Factorial 26 ns 26 ns 26608979 40320 +BM_Factorial/real_time 26 ns 26 ns 26587936 40320 +BM_CalculatePiRange/1 16 ns 16 ns 45704255 0 +BM_CalculatePiRange/8 73 ns 73 ns 9520927 3.28374 +BM_CalculatePiRange/64 609 ns 609 ns 1140647 3.15746 +BM_CalculatePiRange/512 4900 ns 4901 ns 142696 3.14355 +``` + +If this doesn't suit you, you can print each counter as a table column by +passing the flag `--benchmark_counters_tabular=true` to the benchmark +application. This is best for cases in which there are a lot of counters, or +a lot of lines per individual benchmark. Note that this will trigger a +reprinting of the table header any time the counter set changes between +individual benchmarks. Here's an example of corresponding output when +`--benchmark_counters_tabular=true` is passed: + +``` +--------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations Bar Bat Baz Foo +--------------------------------------------------------------------------------------- +BM_UserCounter/threads:8 2198 ns 9953 ns 70688 16 40 24 8 +BM_UserCounter/threads:1 9504 ns 9504 ns 73787 2 5 3 1 +BM_UserCounter/threads:2 4775 ns 9550 ns 72606 4 10 6 2 +BM_UserCounter/threads:4 2508 ns 9951 ns 70332 8 20 12 4 +BM_UserCounter/threads:8 2055 ns 9933 ns 70344 16 40 24 8 +BM_UserCounter/threads:16 1610 ns 9946 ns 70720 32 80 48 16 +BM_UserCounter/threads:32 1192 ns 9948 ns 70496 64 160 96 32 +BM_UserCounter/threads:4 2506 ns 9949 ns 70332 8 20 12 4 +-------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------- +BM_Factorial 26 ns 26 ns 26392245 40320 +BM_Factorial/real_time 26 ns 26 ns 26494107 40320 +BM_CalculatePiRange/1 15 ns 15 ns 45571597 0 +BM_CalculatePiRange/8 74 ns 74 ns 9450212 3.28374 +BM_CalculatePiRange/64 595 ns 595 ns 1173901 3.15746 +BM_CalculatePiRange/512 4752 ns 4752 ns 147380 3.14355 +BM_CalculatePiRange/4k 37970 ns 37972 ns 18453 3.14184 +BM_CalculatePiRange/32k 303733 ns 303744 ns 2305 3.14162 +BM_CalculatePiRange/256k 2434095 ns 2434186 ns 288 3.1416 +BM_CalculatePiRange/1024k 9721140 ns 9721413 ns 71 3.14159 +BM_CalculatePi/threads:8 2255 ns 9943 ns 70936 +``` + +Note above the additional header printed when the benchmark changes from +``BM_UserCounter`` to ``BM_Factorial``. This is because ``BM_Factorial`` does +not have the same counter set as ``BM_UserCounter``. + + + +### Multithreaded Benchmarks + +In a multithreaded test (benchmark invoked by multiple threads simultaneously), +it is guaranteed that none of the threads will start until all have reached +the start of the benchmark loop, and all will have finished before any thread +exits the benchmark loop. (This behavior is also provided by the `KeepRunning()` +API) As such, any global setup or teardown can be wrapped in a check against the thread +index: + +```c++ +static void BM_MultiThreaded(benchmark::State& state) { + if (state.thread_index == 0) { + // Setup code here. + } + for (auto _ : state) { + // Run the test as normal. + } + if (state.thread_index == 0) { + // Teardown code here. + } +} +BENCHMARK(BM_MultiThreaded)->Threads(2); +``` + +If the benchmarked code itself uses threads and you want to compare it to +single-threaded code, you may want to use real-time ("wallclock") measurements +for latency comparisons: + +```c++ +BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime(); +``` + +Without `UseRealTime`, CPU time is used by default. + + + +### CPU Timers + +By default, the CPU timer only measures the time spent by the main thread. +If the benchmark itself uses threads internally, this measurement may not +be what you are looking for. Instead, there is a way to measure the total +CPU usage of the process, by all the threads. + +```c++ +void callee(int i); + +static void MyMain(int size) { +#pragma omp parallel for + for(int i = 0; i < size; i++) + callee(i); +} + +static void BM_OpenMP(benchmark::State& state) { + for (auto _ : state) + MyMain(state.range(0)); +} + +// Measure the time spent by the main thread, use it to decide for how long to +// run the benchmark loop. Depending on the internal implementation detail may +// measure to anywhere from near-zero (the overhead spent before/after work +// handoff to worker thread[s]) to the whole single-thread time. +BENCHMARK(BM_OpenMP)->Range(8, 8<<10); + +// Measure the user-visible time, the wall clock (literally, the time that +// has passed on the clock on the wall), use it to decide for how long to +// run the benchmark loop. This will always be meaningful, an will match the +// time spent by the main thread in single-threaded case, in general decreasing +// with the number of internal threads doing the work. +BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->UseRealTime(); + +// Measure the total CPU consumption, use it to decide for how long to +// run the benchmark loop. This will always measure to no less than the +// time spent by the main thread in single-threaded case. +BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime(); + +// A mixture of the last two. Measure the total CPU consumption, but use the +// wall clock to decide for how long to run the benchmark loop. +BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime()->UseRealTime(); +``` + +#### Controlling Timers + +Normally, the entire duration of the work loop (`for (auto _ : state) {}`) +is measured. But sometimes, it is necessary to do some work inside of +that loop, every iteration, but without counting that time to the benchmark time. +That is possible, although it is not recommended, since it has high overhead. + +```c++ +static void BM_SetInsert_With_Timer_Control(benchmark::State& state) { + std::set data; + for (auto _ : state) { + state.PauseTiming(); // Stop timers. They will not count until they are resumed. + data = ConstructRandomSet(state.range(0)); // Do something that should not be measured + state.ResumeTiming(); // And resume timers. They are now counting again. + // The rest will be measured. + for (int j = 0; j < state.range(1); ++j) + data.insert(RandomNumber()); + } +} +BENCHMARK(BM_SetInsert_With_Timer_Control)->Ranges({{1<<10, 8<<10}, {128, 512}}); +``` + + + +### Manual Timing + +For benchmarking something for which neither CPU time nor real-time are +correct or accurate enough, completely manual timing is supported using +the `UseManualTime` function. + +When `UseManualTime` is used, the benchmarked code must call +`SetIterationTime` once per iteration of the benchmark loop to +report the manually measured time. + +An example use case for this is benchmarking GPU execution (e.g. OpenCL +or CUDA kernels, OpenGL or Vulkan or Direct3D draw calls), which cannot +be accurately measured using CPU time or real-time. Instead, they can be +measured accurately using a dedicated API, and these measurement results +can be reported back with `SetIterationTime`. + +```c++ +static void BM_ManualTiming(benchmark::State& state) { + int microseconds = state.range(0); + std::chrono::duration sleep_duration { + static_cast(microseconds) + }; + + for (auto _ : state) { + auto start = std::chrono::high_resolution_clock::now(); + // Simulate some useful workload with a sleep + std::this_thread::sleep_for(sleep_duration); + auto end = std::chrono::high_resolution_clock::now(); + + auto elapsed_seconds = + std::chrono::duration_cast>( + end - start); + + state.SetIterationTime(elapsed_seconds.count()); + } +} +BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime(); +``` + + + +### Setting the Time Unit + +If a benchmark runs a few milliseconds it may be hard to visually compare the +measured times, since the output data is given in nanoseconds per default. In +order to manually set the time unit, you can specify it manually: + +```c++ +BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); +``` + + + +### Preventing Optimization + +To prevent a value or expression from being optimized away by the compiler +the `benchmark::DoNotOptimize(...)` and `benchmark::ClobberMemory()` +functions can be used. + +```c++ +static void BM_test(benchmark::State& state) { + for (auto _ : state) { + int x = 0; + for (int i=0; i < 64; ++i) { + benchmark::DoNotOptimize(x += i); + } + } +} +``` + +`DoNotOptimize()` forces the *result* of `` to be stored in either +memory or a register. For GNU based compilers it acts as read/write barrier +for global memory. More specifically it forces the compiler to flush pending +writes to memory and reload any other values as necessary. + +Note that `DoNotOptimize()` does not prevent optimizations on `` +in any way. `` may even be removed entirely when the result is already +known. For example: + +```c++ + /* Example 1: `` is removed entirely. */ + int foo(int x) { return x + 42; } + while (...) DoNotOptimize(foo(0)); // Optimized to DoNotOptimize(42); + + /* Example 2: Result of '' is only reused */ + int bar(int) __attribute__((const)); + while (...) DoNotOptimize(bar(0)); // Optimized to: + // int __result__ = bar(0); + // while (...) DoNotOptimize(__result__); +``` + +The second tool for preventing optimizations is `ClobberMemory()`. In essence +`ClobberMemory()` forces the compiler to perform all pending writes to global +memory. Memory managed by block scope objects must be "escaped" using +`DoNotOptimize(...)` before it can be clobbered. In the below example +`ClobberMemory()` prevents the call to `v.push_back(42)` from being optimized +away. + +```c++ +static void BM_vector_push_back(benchmark::State& state) { + for (auto _ : state) { + std::vector v; + v.reserve(1); + benchmark::DoNotOptimize(v.data()); // Allow v.data() to be clobbered. + v.push_back(42); + benchmark::ClobberMemory(); // Force 42 to be written to memory. + } +} +``` + +Note that `ClobberMemory()` is only available for GNU or MSVC based compilers. + + + +### Statistics: Reporting the Mean, Median and Standard Deviation of Repeated Benchmarks + +By default each benchmark is run once and that single result is reported. +However benchmarks are often noisy and a single result may not be representative +of the overall behavior. For this reason it's possible to repeatedly rerun the +benchmark. + +The number of runs of each benchmark is specified globally by the +`--benchmark_repetitions` flag or on a per benchmark basis by calling +`Repetitions` on the registered benchmark object. When a benchmark is run more +than once the mean, median and standard deviation of the runs will be reported. + +Additionally the `--benchmark_report_aggregates_only={true|false}`, +`--benchmark_display_aggregates_only={true|false}` flags or +`ReportAggregatesOnly(bool)`, `DisplayAggregatesOnly(bool)` functions can be +used to change how repeated tests are reported. By default the result of each +repeated run is reported. When `report aggregates only` option is `true`, +only the aggregates (i.e. mean, median and standard deviation, maybe complexity +measurements if they were requested) of the runs is reported, to both the +reporters - standard output (console), and the file. +However when only the `display aggregates only` option is `true`, +only the aggregates are displayed in the standard output, while the file +output still contains everything. +Calling `ReportAggregatesOnly(bool)` / `DisplayAggregatesOnly(bool)` on a +registered benchmark object overrides the value of the appropriate flag for that +benchmark. + + + +### Custom Statistics + +While having mean, median and standard deviation is nice, this may not be +enough for everyone. For example you may want to know what the largest +observation is, e.g. because you have some real-time constraints. This is easy. +The following code will specify a custom statistic to be calculated, defined +by a lambda function. + +```c++ +void BM_spin_empty(benchmark::State& state) { + for (auto _ : state) { + for (int x = 0; x < state.range(0); ++x) { + benchmark::DoNotOptimize(x); + } + } +} + +BENCHMARK(BM_spin_empty) + ->ComputeStatistics("max", [](const std::vector& v) -> double { + return *(std::max_element(std::begin(v), std::end(v))); + }) + ->Arg(512); +``` + + + +### Using RegisterBenchmark(name, fn, args...) + +The `RegisterBenchmark(name, func, args...)` function provides an alternative +way to create and register benchmarks. +`RegisterBenchmark(name, func, args...)` creates, registers, and returns a +pointer to a new benchmark with the specified `name` that invokes +`func(st, args...)` where `st` is a `benchmark::State` object. + +Unlike the `BENCHMARK` registration macros, which can only be used at the global +scope, the `RegisterBenchmark` can be called anywhere. This allows for +benchmark tests to be registered programmatically. + +Additionally `RegisterBenchmark` allows any callable object to be registered +as a benchmark. Including capturing lambdas and function objects. + +For Example: +```c++ +auto BM_test = [](benchmark::State& st, auto Inputs) { /* ... */ }; + +int main(int argc, char** argv) { + for (auto& test_input : { /* ... */ }) + benchmark::RegisterBenchmark(test_input.name(), BM_test, test_input); + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); +} +``` + + + +### Exiting with an Error + +When errors caused by external influences, such as file I/O and network +communication, occur within a benchmark the +`State::SkipWithError(const char* msg)` function can be used to skip that run +of benchmark and report the error. Note that only future iterations of the +`KeepRunning()` are skipped. For the ranged-for version of the benchmark loop +Users must explicitly exit the loop, otherwise all iterations will be performed. +Users may explicitly return to exit the benchmark immediately. + +The `SkipWithError(...)` function may be used at any point within the benchmark, +including before and after the benchmark loop. Moreover, if `SkipWithError(...)` +has been used, it is not required to reach the benchmark loop and one may return +from the benchmark function early. + +For example: + +```c++ +static void BM_test(benchmark::State& state) { + auto resource = GetResource(); + if (!resource.good()) { + state.SkipWithError("Resource is not good!"); + // KeepRunning() loop will not be entered. + } + while (state.KeepRunning()) { + auto data = resource.read_data(); + if (!resource.good()) { + state.SkipWithError("Failed to read data!"); + break; // Needed to skip the rest of the iteration. + } + do_stuff(data); + } +} + +static void BM_test_ranged_fo(benchmark::State & state) { + auto resource = GetResource(); + if (!resource.good()) { + state.SkipWithError("Resource is not good!"); + return; // Early return is allowed when SkipWithError() has been used. + } + for (auto _ : state) { + auto data = resource.read_data(); + if (!resource.good()) { + state.SkipWithError("Failed to read data!"); + break; // REQUIRED to prevent all further iterations. + } + do_stuff(data); + } +} +``` + + +### A Faster KeepRunning Loop + +In C++11 mode, a ranged-based for loop should be used in preference to +the `KeepRunning` loop for running the benchmarks. For example: + +```c++ +static void BM_Fast(benchmark::State &state) { + for (auto _ : state) { + FastOperation(); + } +} +BENCHMARK(BM_Fast); +``` + +The reason the ranged-for loop is faster than using `KeepRunning`, is +because `KeepRunning` requires a memory load and store of the iteration count +ever iteration, whereas the ranged-for variant is able to keep the iteration count +in a register. + +For example, an empty inner loop of using the ranged-based for method looks like: + +```asm +# Loop Init + mov rbx, qword ptr [r14 + 104] + call benchmark::State::StartKeepRunning() + test rbx, rbx + je .LoopEnd +.LoopHeader: # =>This Inner Loop Header: Depth=1 + add rbx, -1 + jne .LoopHeader +.LoopEnd: +``` + +Compared to an empty `KeepRunning` loop, which looks like: + +```asm +.LoopHeader: # in Loop: Header=BB0_3 Depth=1 + cmp byte ptr [rbx], 1 + jne .LoopInit +.LoopBody: # =>This Inner Loop Header: Depth=1 + mov rax, qword ptr [rbx + 8] + lea rcx, [rax + 1] + mov qword ptr [rbx + 8], rcx + cmp rax, qword ptr [rbx + 104] + jb .LoopHeader + jmp .LoopEnd +.LoopInit: + mov rdi, rbx + call benchmark::State::StartKeepRunning() + jmp .LoopBody +.LoopEnd: +``` + +Unless C++03 compatibility is required, the ranged-for variant of writing +the benchmark loop should be preferred. + + + +### Disabling CPU Frequency Scaling + +If you see this error: + +``` +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +``` + +you might want to disable the CPU frequency scaling while running the benchmark: + +```bash +sudo cpupower frequency-set --governor performance +./mybench +sudo cpupower frequency-set --governor powersave +``` diff --git a/libcxx/utils/google-benchmark/WORKSPACE b/libcxx/utils/google-benchmark/WORKSPACE new file mode 100644 index 000000000000..631f3ba05de5 --- /dev/null +++ b/libcxx/utils/google-benchmark/WORKSPACE @@ -0,0 +1,51 @@ +workspace(name = "com_github_google_benchmark") + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +http_archive( + name = "rules_cc", + strip_prefix = "rules_cc-a508235df92e71d537fcbae0c7c952ea6957a912", + urls = ["https://github.com/bazelbuild/rules_cc/archive/a508235df92e71d537fcbae0c7c952ea6957a912.zip"], + sha256 = "d7dc12c1d5bc1a87474de8e3d17b7731a4dcebcfb8aa3990fe8ac7734ef12f2f", +) + +http_archive( + name = "com_google_absl", + sha256 = "f41868f7a938605c92936230081175d1eae87f6ea2c248f41077c8f88316f111", + strip_prefix = "abseil-cpp-20200225.2", + urls = ["https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz"], +) + +http_archive( + name = "com_google_googletest", + strip_prefix = "googletest-3f0cf6b62ad1eb50d8736538363d3580dd640c3e", + urls = ["https://github.com/google/googletest/archive/3f0cf6b62ad1eb50d8736538363d3580dd640c3e.zip"], + sha256 = "8f827dd550db8b4fdf73904690df0be9fccc161017c9038a724bc9a0617a1bc8", +) + +http_archive( + name = "pybind11", + build_file = "@//bindings/python:pybind11.BUILD", + sha256 = "1eed57bc6863190e35637290f97a20c81cfe4d9090ac0a24f3bbf08f265eb71d", + strip_prefix = "pybind11-2.4.3", + urls = ["https://github.com/pybind/pybind11/archive/v2.4.3.tar.gz"], +) + +new_local_repository( + name = "python_headers", + build_file = "@//bindings/python:python_headers.BUILD", + path = "/usr/include/python3.6", # May be overwritten by setup.py. +) + +http_archive( + name = "rules_python", + url = "https://github.com/bazelbuild/rules_python/releases/download/0.1.0/rules_python-0.1.0.tar.gz", + sha256 = "b6d46438523a3ec0f3cead544190ee13223a52f6a6765a29eae7b7cc24cc83a0", +) + +load("@rules_python//python:pip.bzl", pip3_install="pip_install") + +pip3_install( + name = "py_deps", + requirements = "//:requirements.txt", +) diff --git a/libcxx/utils/google-benchmark/_config.yml b/libcxx/utils/google-benchmark/_config.yml new file mode 100644 index 000000000000..1fa5ff852bda --- /dev/null +++ b/libcxx/utils/google-benchmark/_config.yml @@ -0,0 +1,2 @@ +theme: jekyll-theme-midnight +markdown: GFM diff --git a/libcxx/utils/google-benchmark/appveyor.yml b/libcxx/utils/google-benchmark/appveyor.yml new file mode 100644 index 000000000000..81da955f0281 --- /dev/null +++ b/libcxx/utils/google-benchmark/appveyor.yml @@ -0,0 +1,50 @@ +version: '{build}' + +image: Visual Studio 2017 + +configuration: + - Debug + - Release + +environment: + matrix: + - compiler: msvc-15-seh + generator: "Visual Studio 15 2017" + + - compiler: msvc-15-seh + generator: "Visual Studio 15 2017 Win64" + + - compiler: msvc-14-seh + generator: "Visual Studio 14 2015" + + - compiler: msvc-14-seh + generator: "Visual Studio 14 2015 Win64" + + - compiler: gcc-5.3.0-posix + generator: "MinGW Makefiles" + cxx_path: 'C:\mingw-w64\i686-5.3.0-posix-dwarf-rt_v4-rev0\mingw32\bin' + APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 + +matrix: + fast_finish: true + +install: + # git bash conflicts with MinGW makefiles + - if "%generator%"=="MinGW Makefiles" (set "PATH=%PATH:C:\Program Files\Git\usr\bin;=%") + - if not "%cxx_path%"=="" (set "PATH=%PATH%;%cxx_path%") + +build_script: + - md _build -Force + - cd _build + - echo %configuration% + - cmake -G "%generator%" "-DCMAKE_BUILD_TYPE=%configuration%" -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON .. + - cmake --build . --config %configuration% + +test_script: + - ctest --build-config %configuration% --timeout 300 --output-on-failure + +artifacts: + - path: '_build/CMakeFiles/*.log' + name: logs + - path: '_build/Testing/**/*.xml' + name: test_results diff --git a/libcxx/utils/google-benchmark/bindings/python/BUILD b/libcxx/utils/google-benchmark/bindings/python/BUILD new file mode 100644 index 000000000000..9559a76b30a9 --- /dev/null +++ b/libcxx/utils/google-benchmark/bindings/python/BUILD @@ -0,0 +1,3 @@ +exports_files(glob(["*.BUILD"])) +exports_files(["build_defs.bzl"]) + diff --git a/libcxx/utils/google-benchmark/bindings/python/build_defs.bzl b/libcxx/utils/google-benchmark/bindings/python/build_defs.bzl new file mode 100644 index 000000000000..45907aaa5e2d --- /dev/null +++ b/libcxx/utils/google-benchmark/bindings/python/build_defs.bzl @@ -0,0 +1,25 @@ +_SHARED_LIB_SUFFIX = { + "//conditions:default": ".so", + "//:windows": ".dll", +} + +def py_extension(name, srcs, hdrs = [], copts = [], features = [], deps = []): + for shared_lib_suffix in _SHARED_LIB_SUFFIX.values(): + shared_lib_name = name + shared_lib_suffix + native.cc_binary( + name = shared_lib_name, + linkshared = 1, + linkstatic = 1, + srcs = srcs + hdrs, + copts = copts, + features = features, + deps = deps, + ) + + return native.py_library( + name = name, + data = select({ + platform: [name + shared_lib_suffix] + for platform, shared_lib_suffix in _SHARED_LIB_SUFFIX.items() + }), + ) diff --git a/libcxx/utils/google-benchmark/bindings/python/google_benchmark/BUILD b/libcxx/utils/google-benchmark/bindings/python/google_benchmark/BUILD new file mode 100644 index 000000000000..3c1561f48eee --- /dev/null +++ b/libcxx/utils/google-benchmark/bindings/python/google_benchmark/BUILD @@ -0,0 +1,38 @@ +load("//bindings/python:build_defs.bzl", "py_extension") + +py_library( + name = "google_benchmark", + srcs = ["__init__.py"], + visibility = ["//visibility:public"], + deps = [ + ":_benchmark", + # pip; absl:app + ], +) + +py_extension( + name = "_benchmark", + srcs = ["benchmark.cc"], + copts = [ + "-fexceptions", + "-fno-strict-aliasing", + ], + features = ["-use_header_modules"], + deps = [ + "//:benchmark", + "@pybind11", + "@python_headers", + ], +) + +py_test( + name = "example", + srcs = ["example.py"], + python_version = "PY3", + srcs_version = "PY3", + visibility = ["//visibility:public"], + deps = [ + ":google_benchmark", + ], +) + diff --git a/libcxx/utils/google-benchmark/bindings/python/google_benchmark/__init__.py b/libcxx/utils/google-benchmark/bindings/python/google_benchmark/__init__.py new file mode 100644 index 000000000000..1055bf241856 --- /dev/null +++ b/libcxx/utils/google-benchmark/bindings/python/google_benchmark/__init__.py @@ -0,0 +1,158 @@ +# Copyright 2020 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Python benchmarking utilities. + +Example usage: + import google_benchmark as benchmark + + @benchmark.register + def my_benchmark(state): + ... # Code executed outside `while` loop is not timed. + + while state: + ... # Code executed within `while` loop is timed. + + if __name__ == '__main__': + benchmark.main() +""" + +from absl import app +from google_benchmark import _benchmark +from google_benchmark._benchmark import ( + Counter, + kNanosecond, + kMicrosecond, + kMillisecond, + kSecond, + oNone, + o1, + oN, + oNSquared, + oNCubed, + oLogN, + oNLogN, + oAuto, + oLambda, +) + + +__all__ = [ + "register", + "main", + "Counter", + "kNanosecond", + "kMicrosecond", + "kMillisecond", + "kSecond", + "oNone", + "o1", + "oN", + "oNSquared", + "oNCubed", + "oLogN", + "oNLogN", + "oAuto", + "oLambda", +] + +__version__ = "0.2.0" + + +class __OptionMaker: + """A stateless class to collect benchmark options. + + Collect all decorator calls like @option.range(start=0, limit=1<<5). + """ + + class Options: + """Pure data class to store options calls, along with the benchmarked function.""" + + def __init__(self, func): + self.func = func + self.builder_calls = [] + + @classmethod + def make(cls, func_or_options): + """Make Options from Options or the benchmarked function.""" + if isinstance(func_or_options, cls.Options): + return func_or_options + return cls.Options(func_or_options) + + def __getattr__(self, builder_name): + """Append option call in the Options.""" + + # The function that get returned on @option.range(start=0, limit=1<<5). + def __builder_method(*args, **kwargs): + + # The decorator that get called, either with the benchmared function + # or the previous Options + def __decorator(func_or_options): + options = self.make(func_or_options) + options.builder_calls.append((builder_name, args, kwargs)) + # The decorator returns Options so it is not technically a decorator + # and needs a final call to @regiser + return options + + return __decorator + + return __builder_method + + +# Alias for nicer API. +# We have to instantiate an object, even if stateless, to be able to use __getattr__ +# on option.range +option = __OptionMaker() + + +def register(undefined=None, *, name=None): + """Register function for benchmarking.""" + if undefined is None: + # Decorator is called without parenthesis so we return a decorator + return lambda f: register(f, name=name) + + # We have either the function to benchmark (simple case) or an instance of Options + # (@option._ case). + options = __OptionMaker.make(undefined) + + if name is None: + name = options.func.__name__ + + # We register the benchmark and reproduce all the @option._ calls onto the + # benchmark builder pattern + benchmark = _benchmark.RegisterBenchmark(name, options.func) + for name, args, kwargs in options.builder_calls[::-1]: + getattr(benchmark, name)(*args, **kwargs) + + # return the benchmarked function because the decorator does not modify it + return options.func + + +def _flags_parser(argv): + argv = _benchmark.Initialize(argv) + return app.parse_flags_with_usage(argv) + + +def _run_benchmarks(argv): + if len(argv) > 1: + raise app.UsageError("Too many command-line arguments.") + return _benchmark.RunSpecifiedBenchmarks() + + +def main(argv=None): + return app.run(_run_benchmarks, argv=argv, flags_parser=_flags_parser) + + +# Methods for use with custom main function. +initialize = _benchmark.Initialize +run_benchmarks = _benchmark.RunSpecifiedBenchmarks diff --git a/libcxx/utils/google-benchmark/bindings/python/google_benchmark/benchmark.cc b/libcxx/utils/google-benchmark/bindings/python/google_benchmark/benchmark.cc new file mode 100644 index 000000000000..1b01fe7f7f0f --- /dev/null +++ b/libcxx/utils/google-benchmark/bindings/python/google_benchmark/benchmark.cc @@ -0,0 +1,181 @@ +// Benchmark for Python. + +#include +#include +#include + +#include "pybind11/operators.h" +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" +#include "pybind11/stl_bind.h" + +#include "benchmark/benchmark.h" + +PYBIND11_MAKE_OPAQUE(benchmark::UserCounters); + +namespace { +namespace py = ::pybind11; + +std::vector Initialize(const std::vector& argv) { + // The `argv` pointers here become invalid when this function returns, but + // benchmark holds the pointer to `argv[0]`. We create a static copy of it + // so it persists, and replace the pointer below. + static std::string executable_name(argv[0]); + std::vector ptrs; + ptrs.reserve(argv.size()); + for (auto& arg : argv) { + ptrs.push_back(const_cast(arg.c_str())); + } + ptrs[0] = const_cast(executable_name.c_str()); + int argc = static_cast(argv.size()); + benchmark::Initialize(&argc, ptrs.data()); + std::vector remaining_argv; + remaining_argv.reserve(argc); + for (int i = 0; i < argc; ++i) { + remaining_argv.emplace_back(ptrs[i]); + } + return remaining_argv; +} + +benchmark::internal::Benchmark* RegisterBenchmark(const char* name, + py::function f) { + return benchmark::RegisterBenchmark( + name, [f](benchmark::State& state) { f(&state); }); +} + +PYBIND11_MODULE(_benchmark, m) { + using benchmark::TimeUnit; + py::enum_(m, "TimeUnit") + .value("kNanosecond", TimeUnit::kNanosecond) + .value("kMicrosecond", TimeUnit::kMicrosecond) + .value("kMillisecond", TimeUnit::kMillisecond) + .value("kSecond", TimeUnit::kSecond) + .export_values(); + + using benchmark::BigO; + py::enum_(m, "BigO") + .value("oNone", BigO::oNone) + .value("o1", BigO::o1) + .value("oN", BigO::oN) + .value("oNSquared", BigO::oNSquared) + .value("oNCubed", BigO::oNCubed) + .value("oLogN", BigO::oLogN) + .value("oNLogN", BigO::oLogN) + .value("oAuto", BigO::oAuto) + .value("oLambda", BigO::oLambda) + .export_values(); + + using benchmark::internal::Benchmark; + py::class_(m, "Benchmark") + // For methods returning a pointer tor the current object, reference + // return policy is used to ask pybind not to take ownership oof the + // returned object and avoid calling delete on it. + // https://pybind11.readthedocs.io/en/stable/advanced/functions.html#return-value-policies + // + // For methods taking a const std::vector<...>&, a copy is created + // because a it is bound to a Python list. + // https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html + .def("unit", &Benchmark::Unit, py::return_value_policy::reference) + .def("arg", &Benchmark::Arg, py::return_value_policy::reference) + .def("args", &Benchmark::Args, py::return_value_policy::reference) + .def("range", &Benchmark::Range, py::return_value_policy::reference, + py::arg("start"), py::arg("limit")) + .def("dense_range", &Benchmark::DenseRange, + py::return_value_policy::reference, py::arg("start"), + py::arg("limit"), py::arg("step") = 1) + .def("ranges", &Benchmark::Ranges, py::return_value_policy::reference) + .def("args_product", &Benchmark::ArgsProduct, + py::return_value_policy::reference) + .def("arg_name", &Benchmark::ArgName, py::return_value_policy::reference) + .def("arg_names", &Benchmark::ArgNames, + py::return_value_policy::reference) + .def("range_pair", &Benchmark::RangePair, + py::return_value_policy::reference, py::arg("lo1"), py::arg("hi1"), + py::arg("lo2"), py::arg("hi2")) + .def("range_multiplier", &Benchmark::RangeMultiplier, + py::return_value_policy::reference) + .def("min_time", &Benchmark::MinTime, py::return_value_policy::reference) + .def("iterations", &Benchmark::Iterations, + py::return_value_policy::reference) + .def("repetitions", &Benchmark::Repetitions, + py::return_value_policy::reference) + .def("report_aggregates_only", &Benchmark::ReportAggregatesOnly, + py::return_value_policy::reference, py::arg("value") = true) + .def("display_aggregates_only", &Benchmark::DisplayAggregatesOnly, + py::return_value_policy::reference, py::arg("value") = true) + .def("measure_process_cpu_time", &Benchmark::MeasureProcessCPUTime, + py::return_value_policy::reference) + .def("use_real_time", &Benchmark::UseRealTime, + py::return_value_policy::reference) + .def("use_manual_time", &Benchmark::UseManualTime, + py::return_value_policy::reference) + .def( + "complexity", + (Benchmark * (Benchmark::*)(benchmark::BigO)) & Benchmark::Complexity, + py::return_value_policy::reference, + py::arg("complexity") = benchmark::oAuto); + + using benchmark::Counter; + py::class_ py_counter(m, "Counter"); + + py::enum_(py_counter, "Flags") + .value("kDefaults", Counter::Flags::kDefaults) + .value("kIsRate", Counter::Flags::kIsRate) + .value("kAvgThreads", Counter::Flags::kAvgThreads) + .value("kAvgThreadsRate", Counter::Flags::kAvgThreadsRate) + .value("kIsIterationInvariant", Counter::Flags::kIsIterationInvariant) + .value("kIsIterationInvariantRate", + Counter::Flags::kIsIterationInvariantRate) + .value("kAvgIterations", Counter::Flags::kAvgIterations) + .value("kAvgIterationsRate", Counter::Flags::kAvgIterationsRate) + .value("kInvert", Counter::Flags::kInvert) + .export_values() + .def(py::self | py::self); + + py::enum_(py_counter, "OneK") + .value("kIs1000", Counter::OneK::kIs1000) + .value("kIs1024", Counter::OneK::kIs1024) + .export_values(); + + py_counter + .def(py::init(), + py::arg("value") = 0., py::arg("flags") = Counter::kDefaults, + py::arg("k") = Counter::kIs1000) + .def(py::init([](double value) { return Counter(value); })) + .def_readwrite("value", &Counter::value) + .def_readwrite("flags", &Counter::flags) + .def_readwrite("oneK", &Counter::oneK); + py::implicitly_convertible(); + py::implicitly_convertible(); + + py::bind_map(m, "UserCounters"); + + using benchmark::State; + py::class_(m, "State") + .def("__bool__", &State::KeepRunning) + .def_property_readonly("keep_running", &State::KeepRunning) + .def("pause_timing", &State::PauseTiming) + .def("resume_timing", &State::ResumeTiming) + .def("skip_with_error", &State::SkipWithError) + .def_property_readonly("error_occurred", &State::error_occurred) + .def("set_iteration_time", &State::SetIterationTime) + .def_property("bytes_processed", &State::bytes_processed, + &State::SetBytesProcessed) + .def_property("complexity_n", &State::complexity_length_n, + &State::SetComplexityN) + .def_property("items_processed", &State::items_processed, + &State::SetItemsProcessed) + .def("set_label", (void (State::*)(const char*)) & State::SetLabel) + .def("range", &State::range, py::arg("pos") = 0) + .def_property_readonly("iterations", &State::iterations) + .def_readwrite("counters", &State::counters) + .def_readonly("thread_index", &State::thread_index) + .def_readonly("threads", &State::threads); + + m.def("Initialize", Initialize); + m.def("RegisterBenchmark", RegisterBenchmark, + py::return_value_policy::reference); + m.def("RunSpecifiedBenchmarks", + []() { benchmark::RunSpecifiedBenchmarks(); }); +}; +} // namespace diff --git a/libcxx/utils/google-benchmark/bindings/python/google_benchmark/example.py b/libcxx/utils/google-benchmark/bindings/python/google_benchmark/example.py new file mode 100644 index 000000000000..9134e8cffeaf --- /dev/null +++ b/libcxx/utils/google-benchmark/bindings/python/google_benchmark/example.py @@ -0,0 +1,136 @@ +# Copyright 2020 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Example of Python using C++ benchmark framework. + +To run this example, you must first install the `google_benchmark` Python package. + +To install using `setup.py`, download and extract the `google_benchmark` source. +In the extracted directory, execute: + python setup.py install +""" + +import random +import time + +import google_benchmark as benchmark +from google_benchmark import Counter + + +@benchmark.register +def empty(state): + while state: + pass + + +@benchmark.register +def sum_million(state): + while state: + sum(range(1_000_000)) + +@benchmark.register +def pause_timing(state): + """Pause timing every iteration.""" + while state: + # Construct a list of random ints every iteration without timing it + state.pause_timing() + random_list = [random.randint(0, 100) for _ in range(100)] + state.resume_timing() + # Time the in place sorting algorithm + random_list.sort() + + +@benchmark.register +def skipped(state): + if True: # Test some predicate here. + state.skip_with_error("some error") + return # NOTE: You must explicitly return, or benchmark will continue. + + ... # Benchmark code would be here. + + +@benchmark.register +def manual_timing(state): + while state: + # Manually count Python CPU time + start = time.perf_counter() # perf_counter_ns() in Python 3.7+ + # Something to benchmark + time.sleep(0.01) + end = time.perf_counter() + state.set_iteration_time(end - start) + + +@benchmark.register +def custom_counters(state): + """Collect cutom metric using benchmark.Counter.""" + num_foo = 0.0 + while state: + # Benchmark some code here + pass + # Collect some custom metric named foo + num_foo += 0.13 + + # Automatic Counter from numbers. + state.counters["foo"] = num_foo + # Set a counter as a rate. + state.counters["foo_rate"] = Counter(num_foo, Counter.kIsRate) + # Set a counter as an inverse of rate. + state.counters["foo_inv_rate"] = Counter(num_foo, Counter.kIsRate | Counter.kInvert) + # Set a counter as a thread-average quantity. + state.counters["foo_avg"] = Counter(num_foo, Counter.kAvgThreads) + # There's also a combined flag: + state.counters["foo_avg_rate"] = Counter(num_foo, Counter.kAvgThreadsRate) + + +@benchmark.register +@benchmark.option.measure_process_cpu_time() +@benchmark.option.use_real_time() +def with_options(state): + while state: + sum(range(1_000_000)) + + +@benchmark.register(name="sum_million_microseconds") +@benchmark.option.unit(benchmark.kMicrosecond) +def with_options(state): + while state: + sum(range(1_000_000)) + + +@benchmark.register +@benchmark.option.arg(100) +@benchmark.option.arg(1000) +def passing_argument(state): + while state: + sum(range(state.range(0))) + + +@benchmark.register +@benchmark.option.range(8, limit=8 << 10) +def using_range(state): + while state: + sum(range(state.range(0))) + + +@benchmark.register +@benchmark.option.range_multiplier(2) +@benchmark.option.range(1 << 10, 1 << 18) +@benchmark.option.complexity(benchmark.oN) +def computing_complexity(state): + while state: + sum(range(state.range(0))) + state.complexity_n = state.range(0) + + +if __name__ == "__main__": + benchmark.main() diff --git a/libcxx/utils/google-benchmark/bindings/python/pybind11.BUILD b/libcxx/utils/google-benchmark/bindings/python/pybind11.BUILD new file mode 100644 index 000000000000..bc833500383a --- /dev/null +++ b/libcxx/utils/google-benchmark/bindings/python/pybind11.BUILD @@ -0,0 +1,20 @@ +cc_library( + name = "pybind11", + hdrs = glob( + include = [ + "include/pybind11/*.h", + "include/pybind11/detail/*.h", + ], + exclude = [ + "include/pybind11/common.h", + "include/pybind11/eigen.h", + ], + ), + copts = [ + "-fexceptions", + "-Wno-undefined-inline", + "-Wno-pragma-once-outside-header", + ], + includes = ["include"], + visibility = ["//visibility:public"], +) diff --git a/libcxx/utils/google-benchmark/bindings/python/python_headers.BUILD b/libcxx/utils/google-benchmark/bindings/python/python_headers.BUILD new file mode 100644 index 000000000000..9c34cf6ca4bd --- /dev/null +++ b/libcxx/utils/google-benchmark/bindings/python/python_headers.BUILD @@ -0,0 +1,6 @@ +cc_library( + name = "python_headers", + hdrs = glob(["**/*.h"]), + includes = ["."], + visibility = ["//visibility:public"], +) diff --git a/libcxx/utils/google-benchmark/bindings/python/requirements.txt b/libcxx/utils/google-benchmark/bindings/python/requirements.txt new file mode 100644 index 000000000000..f5bbe7eca5ce --- /dev/null +++ b/libcxx/utils/google-benchmark/bindings/python/requirements.txt @@ -0,0 +1,2 @@ +absl-py>=0.7.1 + diff --git a/libcxx/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake b/libcxx/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake new file mode 100644 index 000000000000..858589e9775c --- /dev/null +++ b/libcxx/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake @@ -0,0 +1,78 @@ +# - Adds a compiler flag if it is supported by the compiler +# +# This function checks that the supplied compiler flag is supported and then +# adds it to the corresponding compiler flags +# +# add_cxx_compiler_flag( []) +# +# - Example +# +# include(AddCXXCompilerFlag) +# add_cxx_compiler_flag(-Wall) +# add_cxx_compiler_flag(-no-strict-aliasing RELEASE) +# Requires CMake 2.6+ + +if(__add_cxx_compiler_flag) + return() +endif() +set(__add_cxx_compiler_flag INCLUDED) + +include(CheckCXXCompilerFlag) + +function(mangle_compiler_flag FLAG OUTPUT) + string(TOUPPER "HAVE_CXX_FLAG_${FLAG}" SANITIZED_FLAG) + string(REPLACE "+" "X" SANITIZED_FLAG ${SANITIZED_FLAG}) + string(REGEX REPLACE "[^A-Za-z_0-9]" "_" SANITIZED_FLAG ${SANITIZED_FLAG}) + string(REGEX REPLACE "_+" "_" SANITIZED_FLAG ${SANITIZED_FLAG}) + set(${OUTPUT} "${SANITIZED_FLAG}" PARENT_SCOPE) +endfunction(mangle_compiler_flag) + +function(add_cxx_compiler_flag FLAG) + mangle_compiler_flag("${FLAG}" MANGLED_FLAG) + set(OLD_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${FLAG}") + check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG}) + set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}") + if(${MANGLED_FLAG}) + if(ARGC GREATER 1) + set(VARIANT ${ARGV1}) + string(TOUPPER "_${VARIANT}" VARIANT) + else() + set(VARIANT "") + endif() + set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${BENCHMARK_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE) + endif() +endfunction() + +function(add_required_cxx_compiler_flag FLAG) + mangle_compiler_flag("${FLAG}" MANGLED_FLAG) + set(OLD_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${FLAG}") + check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG}) + set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}") + if(${MANGLED_FLAG}) + if(ARGC GREATER 1) + set(VARIANT ${ARGV1}) + string(TOUPPER "_${VARIANT}" VARIANT) + else() + set(VARIANT "") + endif() + set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE) + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE) + set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${FLAG}" PARENT_SCOPE) + else() + message(FATAL_ERROR "Required flag '${FLAG}' is not supported by the compiler") + endif() +endfunction() + +function(check_cxx_warning_flag FLAG) + mangle_compiler_flag("${FLAG}" MANGLED_FLAG) + set(OLD_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") + # Add -Werror to ensure the compiler generates an error if the warning flag + # doesn't exist. + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror ${FLAG}") + check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG}) + set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}") +endfunction() diff --git a/libcxx/utils/google-benchmark/cmake/CXXFeatureCheck.cmake b/libcxx/utils/google-benchmark/cmake/CXXFeatureCheck.cmake new file mode 100644 index 000000000000..62e6741fe3de --- /dev/null +++ b/libcxx/utils/google-benchmark/cmake/CXXFeatureCheck.cmake @@ -0,0 +1,69 @@ +# - Compile and run code to check for C++ features +# +# This functions compiles a source file under the `cmake` folder +# and adds the corresponding `HAVE_[FILENAME]` flag to the CMake +# environment +# +# cxx_feature_check( []) +# +# - Example +# +# include(CXXFeatureCheck) +# cxx_feature_check(STD_REGEX) +# Requires CMake 2.8.12+ + +if(__cxx_feature_check) + return() +endif() +set(__cxx_feature_check INCLUDED) + +function(cxx_feature_check FILE) + string(TOLOWER ${FILE} FILE) + string(TOUPPER ${FILE} VAR) + string(TOUPPER "HAVE_${VAR}" FEATURE) + if (DEFINED HAVE_${VAR}) + set(HAVE_${VAR} 1 PARENT_SCOPE) + add_definitions(-DHAVE_${VAR}) + return() + endif() + + if (ARGC GREATER 1) + message(STATUS "Enabling additional flags: ${ARGV1}") + list(APPEND BENCHMARK_CXX_LINKER_FLAGS ${ARGV1}) + endif() + + if (NOT DEFINED COMPILE_${FEATURE}) + message(STATUS "Performing Test ${FEATURE}") + if(CMAKE_CROSSCOMPILING) + try_compile(COMPILE_${FEATURE} + ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp + CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS} + LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}) + if(COMPILE_${FEATURE}) + message(WARNING + "If you see build failures due to cross compilation, try setting HAVE_${VAR} to 0") + set(RUN_${FEATURE} 0 CACHE INTERNAL "") + else() + set(RUN_${FEATURE} 1 CACHE INTERNAL "") + endif() + else() + message(STATUS "Performing Test ${FEATURE}") + try_run(RUN_${FEATURE} COMPILE_${FEATURE} + ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp + CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS} + LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}) + endif() + endif() + + if(RUN_${FEATURE} EQUAL 0) + message(STATUS "Performing Test ${FEATURE} -- success") + set(HAVE_${VAR} 1 PARENT_SCOPE) + add_definitions(-DHAVE_${VAR}) + else() + if(NOT COMPILE_${FEATURE}) + message(STATUS "Performing Test ${FEATURE} -- failed to compile") + else() + message(STATUS "Performing Test ${FEATURE} -- compiled but failed to run") + endif() + endif() +endfunction() diff --git a/libcxx/utils/google-benchmark/cmake/Config.cmake.in b/libcxx/utils/google-benchmark/cmake/Config.cmake.in new file mode 100644 index 000000000000..6e9256eea8a2 --- /dev/null +++ b/libcxx/utils/google-benchmark/cmake/Config.cmake.in @@ -0,0 +1 @@ +include("${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake") diff --git a/libcxx/utils/google-benchmark/cmake/GetGitVersion.cmake b/libcxx/utils/google-benchmark/cmake/GetGitVersion.cmake new file mode 100644 index 000000000000..04a1f9b70d68 --- /dev/null +++ b/libcxx/utils/google-benchmark/cmake/GetGitVersion.cmake @@ -0,0 +1,58 @@ +# - Returns a version string from Git tags +# +# This function inspects the annotated git tags for the project and returns a string +# into a CMake variable +# +# get_git_version() +# +# - Example +# +# include(GetGitVersion) +# get_git_version(GIT_VERSION) +# +# Requires CMake 2.8.11+ +find_package(Git) + +if(__get_git_version) + return() +endif() +set(__get_git_version INCLUDED) + +function(get_git_version var) + if(GIT_EXECUTABLE) + execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8 + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + RESULT_VARIABLE status + OUTPUT_VARIABLE GIT_DESCRIBE_VERSION + ERROR_QUIET) + if(status) + set(GIT_DESCRIBE_VERSION "v0.0.0") + endif() + + string(STRIP ${GIT_DESCRIBE_VERSION} GIT_DESCRIBE_VERSION) + if(GIT_DESCRIBE_VERSION MATCHES v[^-]*-) + string(REGEX REPLACE "v([^-]*)-([0-9]+)-.*" "\\1.\\2" GIT_VERSION ${GIT_DESCRIBE_VERSION}) + else() + string(REGEX REPLACE "v(.*)" "\\1" GIT_VERSION ${GIT_DESCRIBE_VERSION}) + endif() + + # Work out if the repository is dirty + execute_process(COMMAND ${GIT_EXECUTABLE} update-index -q --refresh + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + OUTPUT_QUIET + ERROR_QUIET) + execute_process(COMMAND ${GIT_EXECUTABLE} diff-index --name-only HEAD -- + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + OUTPUT_VARIABLE GIT_DIFF_INDEX + ERROR_QUIET) + string(COMPARE NOTEQUAL "${GIT_DIFF_INDEX}" "" GIT_DIRTY) + if (${GIT_DIRTY}) + set(GIT_DESCRIBE_VERSION "${GIT_DESCRIBE_VERSION}-dirty") + endif() + message(STATUS "git version: ${GIT_DESCRIBE_VERSION} normalized to ${GIT_VERSION}") + else() + set(GIT_VERSION "0.0.0") + endif() + + set(${var} ${GIT_VERSION} PARENT_SCOPE) +endfunction() diff --git a/libcxx/utils/google-benchmark/cmake/GoogleTest.cmake b/libcxx/utils/google-benchmark/cmake/GoogleTest.cmake new file mode 100644 index 000000000000..dd611fc875f1 --- /dev/null +++ b/libcxx/utils/google-benchmark/cmake/GoogleTest.cmake @@ -0,0 +1,41 @@ +# Download and unpack googletest at configure time +set(GOOGLETEST_PREFIX "${benchmark_BINARY_DIR}/third_party/googletest") +configure_file(${benchmark_SOURCE_DIR}/cmake/GoogleTest.cmake.in ${GOOGLETEST_PREFIX}/CMakeLists.txt @ONLY) + +set(GOOGLETEST_PATH "${CMAKE_CURRENT_SOURCE_DIR}/googletest" CACHE PATH "") # Mind the quotes +execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" + -DALLOW_DOWNLOADING_GOOGLETEST=${BENCHMARK_DOWNLOAD_DEPENDENCIES} -DGOOGLETEST_PATH:PATH=${GOOGLETEST_PATH} . + RESULT_VARIABLE result + WORKING_DIRECTORY ${GOOGLETEST_PREFIX} +) + +if(result) + message(FATAL_ERROR "CMake step for googletest failed: ${result}") +endif() + +execute_process( + COMMAND ${CMAKE_COMMAND} --build . + RESULT_VARIABLE result + WORKING_DIRECTORY ${GOOGLETEST_PREFIX} +) + +if(result) + message(FATAL_ERROR "Build step for googletest failed: ${result}") +endif() + +# Prevent overriding the parent project's compiler/linker +# settings on Windows +set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) + +include(${GOOGLETEST_PREFIX}/googletest-paths.cmake) + +# Add googletest directly to our build. This defines +# the gtest and gtest_main targets. +add_subdirectory(${GOOGLETEST_SOURCE_DIR} + ${GOOGLETEST_BINARY_DIR} + EXCLUDE_FROM_ALL) + +set_target_properties(gtest PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $) +set_target_properties(gtest_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $) +set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $) +set_target_properties(gmock_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $) diff --git a/libcxx/utils/google-benchmark/cmake/GoogleTest.cmake.in b/libcxx/utils/google-benchmark/cmake/GoogleTest.cmake.in new file mode 100644 index 000000000000..fd957ff56409 --- /dev/null +++ b/libcxx/utils/google-benchmark/cmake/GoogleTest.cmake.in @@ -0,0 +1,58 @@ +cmake_minimum_required(VERSION 2.8.12) + +project(googletest-download NONE) + +# Enable ExternalProject CMake module +include(ExternalProject) + +option(ALLOW_DOWNLOADING_GOOGLETEST "If googletest src tree is not found in location specified by GOOGLETEST_PATH, do fetch the archive from internet" OFF) +set(GOOGLETEST_PATH "/usr/src/googletest" CACHE PATH + "Path to the googletest root tree. Should contain googletest and googlemock subdirs. And CMakeLists.txt in root, and in both of these subdirs") + +# Download and install GoogleTest + +message(STATUS "Looking for Google Test sources") +message(STATUS "Looking for Google Test sources in ${GOOGLETEST_PATH}") +if(EXISTS "${GOOGLETEST_PATH}" AND IS_DIRECTORY "${GOOGLETEST_PATH}" AND EXISTS "${GOOGLETEST_PATH}/CMakeLists.txt" AND + EXISTS "${GOOGLETEST_PATH}/googletest" AND IS_DIRECTORY "${GOOGLETEST_PATH}/googletest" AND EXISTS "${GOOGLETEST_PATH}/googletest/CMakeLists.txt" AND + EXISTS "${GOOGLETEST_PATH}/googlemock" AND IS_DIRECTORY "${GOOGLETEST_PATH}/googlemock" AND EXISTS "${GOOGLETEST_PATH}/googlemock/CMakeLists.txt") + message(STATUS "Found Google Test in ${GOOGLETEST_PATH}") + + ExternalProject_Add( + googletest + PREFIX "${CMAKE_BINARY_DIR}" + DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/download" + SOURCE_DIR "${GOOGLETEST_PATH}" # use existing src dir. + BINARY_DIR "${CMAKE_BINARY_DIR}/build" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" + ) +else() + if(NOT ALLOW_DOWNLOADING_GOOGLETEST) + message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.") + else() + message(WARNING "Did not find Google Test sources! Fetching from web...") + ExternalProject_Add( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG master + PREFIX "${CMAKE_BINARY_DIR}" + STAMP_DIR "${CMAKE_BINARY_DIR}/stamp" + DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/download" + SOURCE_DIR "${CMAKE_BINARY_DIR}/src" + BINARY_DIR "${CMAKE_BINARY_DIR}/build" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" + ) + endif() +endif() + +ExternalProject_Get_Property(googletest SOURCE_DIR BINARY_DIR) +file(WRITE googletest-paths.cmake +"set(GOOGLETEST_SOURCE_DIR \"${SOURCE_DIR}\") +set(GOOGLETEST_BINARY_DIR \"${BINARY_DIR}\") +") diff --git a/libcxx/utils/google-benchmark/cmake/benchmark.pc.in b/libcxx/utils/google-benchmark/cmake/benchmark.pc.in new file mode 100644 index 000000000000..34beb012eef1 --- /dev/null +++ b/libcxx/utils/google-benchmark/cmake/benchmark.pc.in @@ -0,0 +1,12 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix} +libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ +includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ + +Name: @PROJECT_NAME@ +Description: Google microbenchmark framework +Version: @VERSION@ + +Libs: -L${libdir} -lbenchmark +Libs.private: -lpthread +Cflags: -I${includedir} diff --git a/libcxx/utils/google-benchmark/cmake/gnu_posix_regex.cpp b/libcxx/utils/google-benchmark/cmake/gnu_posix_regex.cpp new file mode 100644 index 000000000000..b5b91cdab7c2 --- /dev/null +++ b/libcxx/utils/google-benchmark/cmake/gnu_posix_regex.cpp @@ -0,0 +1,12 @@ +#include +#include +int main() { + std::string str = "test0159"; + regex_t re; + int ec = regcomp(&re, "^[a-z]+[0-9]+$", REG_EXTENDED | REG_NOSUB); + if (ec != 0) { + return ec; + } + return regexec(&re, str.c_str(), 0, nullptr, 0) ? -1 : 0; +} + diff --git a/libcxx/utils/google-benchmark/cmake/llvm-toolchain.cmake b/libcxx/utils/google-benchmark/cmake/llvm-toolchain.cmake new file mode 100644 index 000000000000..fc119e52fd26 --- /dev/null +++ b/libcxx/utils/google-benchmark/cmake/llvm-toolchain.cmake @@ -0,0 +1,8 @@ +find_package(LLVMAr REQUIRED) +set(CMAKE_AR "${LLVMAR_EXECUTABLE}" CACHE FILEPATH "" FORCE) + +find_package(LLVMNm REQUIRED) +set(CMAKE_NM "${LLVMNM_EXECUTABLE}" CACHE FILEPATH "" FORCE) + +find_package(LLVMRanLib REQUIRED) +set(CMAKE_RANLIB "${LLVMRANLIB_EXECUTABLE}" CACHE FILEPATH "" FORCE) diff --git a/libcxx/utils/google-benchmark/cmake/posix_regex.cpp b/libcxx/utils/google-benchmark/cmake/posix_regex.cpp new file mode 100644 index 000000000000..466dc62560a2 --- /dev/null +++ b/libcxx/utils/google-benchmark/cmake/posix_regex.cpp @@ -0,0 +1,14 @@ +#include +#include +int main() { + std::string str = "test0159"; + regex_t re; + int ec = regcomp(&re, "^[a-z]+[0-9]+$", REG_EXTENDED | REG_NOSUB); + if (ec != 0) { + return ec; + } + int ret = regexec(&re, str.c_str(), 0, nullptr, 0) ? -1 : 0; + regfree(&re); + return ret; +} + diff --git a/libcxx/utils/google-benchmark/cmake/split_list.cmake b/libcxx/utils/google-benchmark/cmake/split_list.cmake new file mode 100644 index 000000000000..67aed3fdc857 --- /dev/null +++ b/libcxx/utils/google-benchmark/cmake/split_list.cmake @@ -0,0 +1,3 @@ +macro(split_list listname) + string(REPLACE ";" " " ${listname} "${${listname}}") +endmacro() diff --git a/libcxx/utils/google-benchmark/cmake/std_regex.cpp b/libcxx/utils/google-benchmark/cmake/std_regex.cpp new file mode 100644 index 000000000000..696f2a26bce0 --- /dev/null +++ b/libcxx/utils/google-benchmark/cmake/std_regex.cpp @@ -0,0 +1,10 @@ +#include +#include +int main() { + const std::string str = "test0159"; + std::regex re; + re = std::regex("^[a-z]+[0-9]+$", + std::regex_constants::extended | std::regex_constants::nosubs); + return std::regex_search(str, re) ? 0 : -1; +} + diff --git a/libcxx/utils/google-benchmark/cmake/steady_clock.cpp b/libcxx/utils/google-benchmark/cmake/steady_clock.cpp new file mode 100644 index 000000000000..66d50d17e9e6 --- /dev/null +++ b/libcxx/utils/google-benchmark/cmake/steady_clock.cpp @@ -0,0 +1,7 @@ +#include + +int main() { + typedef std::chrono::steady_clock Clock; + Clock::time_point tp = Clock::now(); + ((void)tp); +} diff --git a/libcxx/utils/google-benchmark/cmake/thread_safety_attributes.cpp b/libcxx/utils/google-benchmark/cmake/thread_safety_attributes.cpp new file mode 100644 index 000000000000..46161babdb10 --- /dev/null +++ b/libcxx/utils/google-benchmark/cmake/thread_safety_attributes.cpp @@ -0,0 +1,4 @@ +#define HAVE_THREAD_SAFETY_ATTRIBUTES +#include "../src/mutex.h" + +int main() {} diff --git a/libcxx/utils/google-benchmark/dependencies.md b/libcxx/utils/google-benchmark/dependencies.md new file mode 100644 index 000000000000..6289b4e3548b --- /dev/null +++ b/libcxx/utils/google-benchmark/dependencies.md @@ -0,0 +1,18 @@ +# Build tool dependency policy + +To ensure the broadest compatibility when building the benchmark library, but +still allow forward progress, we require any build tooling to be available for: + +* Debian stable AND +* The last two Ubuntu LTS releases AND + +Currently, this means using build tool versions that are available for Ubuntu +16.04 (Xenial), Ubuntu 18.04 (Bionic), and Debian stretch. + +_Note, [travis](.travis.yml) runs under Ubuntu 14.04 (Trusty) for linux builds._ + +## cmake +The current supported version is cmake 3.5.1 as of 2018-06-06. + +_Note, this version is also available for Ubuntu 14.04, the previous Ubuntu LTS +release, as `cmake3`._ diff --git a/libcxx/utils/google-benchmark/docs/AssemblyTests.md b/libcxx/utils/google-benchmark/docs/AssemblyTests.md new file mode 100644 index 000000000000..1fbdc269b53d --- /dev/null +++ b/libcxx/utils/google-benchmark/docs/AssemblyTests.md @@ -0,0 +1,147 @@ +# Assembly Tests + +The Benchmark library provides a number of functions whose primary +purpose in to affect assembly generation, including `DoNotOptimize` +and `ClobberMemory`. In addition there are other functions, +such as `KeepRunning`, for which generating good assembly is paramount. + +For these functions it's important to have tests that verify the +correctness and quality of the implementation. This requires testing +the code generated by the compiler. + +This document describes how the Benchmark library tests compiler output, +as well as how to properly write new tests. + + +## Anatomy of a Test + +Writing a test has two steps: + +* Write the code you want to generate assembly for. +* Add `// CHECK` lines to match against the verified assembly. + +Example: +```c++ + +// CHECK-LABEL: test_add: +extern "C" int test_add() { + extern int ExternInt; + return ExternInt + 1; + + // CHECK: movl ExternInt(%rip), %eax + // CHECK: addl %eax + // CHECK: ret +} + +``` + +#### LLVM Filecheck + +[LLVM's Filecheck](https://llvm.org/docs/CommandGuide/FileCheck.html) +is used to test the generated assembly against the `// CHECK` lines +specified in the tests source file. Please see the documentation +linked above for information on how to write `CHECK` directives. + +#### Tips and Tricks: + +* Tests should match the minimal amount of output required to establish +correctness. `CHECK` directives don't have to match on the exact next line +after the previous match, so tests should omit checks for unimportant +bits of assembly. ([`CHECK-NEXT`](https://llvm.org/docs/CommandGuide/FileCheck.html#the-check-next-directive) +can be used to ensure a match occurs exactly after the previous match). + +* The tests are compiled with `-O3 -g0`. So we're only testing the +optimized output. + +* The assembly output is further cleaned up using `tools/strip_asm.py`. +This removes comments, assembler directives, and unused labels before +the test is run. + +* The generated and stripped assembly file for a test is output under +`/test/.s` + +* Filecheck supports using [`CHECK` prefixes](https://llvm.org/docs/CommandGuide/FileCheck.html#cmdoption-check-prefixes) +to specify lines that should only match in certain situations. +The Benchmark tests use `CHECK-CLANG` and `CHECK-GNU` for lines that +are only expected to match Clang or GCC's output respectively. Normal +`CHECK` lines match against all compilers. (Note: `CHECK-NOT` and +`CHECK-LABEL` are NOT prefixes. They are versions of non-prefixed +`CHECK` lines) + +* Use `extern "C"` to disable name mangling for specific functions. This +makes them easier to name in the `CHECK` lines. + + +## Problems Writing Portable Tests + +Writing tests which check the code generated by a compiler are +inherently non-portable. Different compilers and even different compiler +versions may generate entirely different code. The Benchmark tests +must tolerate this. + +LLVM Filecheck provides a number of mechanisms to help write +"more portable" tests; including [matching using regular expressions](https://llvm.org/docs/CommandGuide/FileCheck.html#filecheck-pattern-matching-syntax), +allowing the creation of [named variables](https://llvm.org/docs/CommandGuide/FileCheck.html#filecheck-variables) +for later matching, and [checking non-sequential matches](https://llvm.org/docs/CommandGuide/FileCheck.html#the-check-dag-directive). + +#### Capturing Variables + +For example, say GCC stores a variable in a register but Clang stores +it in memory. To write a test that tolerates both cases we "capture" +the destination of the store, and then use the captured expression +to write the remainder of the test. + +```c++ +// CHECK-LABEL: test_div_no_op_into_shr: +extern "C" void test_div_no_op_into_shr(int value) { + int divisor = 2; + benchmark::DoNotOptimize(divisor); // hide the value from the optimizer + return value / divisor; + + // CHECK: movl $2, [[DEST:.*]] + // CHECK: idivl [[DEST]] + // CHECK: ret +} +``` + +#### Using Regular Expressions to Match Differing Output + +Often tests require testing assembly lines which may subtly differ +between compilers or compiler versions. A common example of this +is matching stack frame addresses. In this case regular expressions +can be used to match the differing bits of output. For example: + +```c++ +int ExternInt; +struct Point { int x, y, z; }; + +// CHECK-LABEL: test_store_point: +extern "C" void test_store_point() { + Point p{ExternInt, ExternInt, ExternInt}; + benchmark::DoNotOptimize(p); + + // CHECK: movl ExternInt(%rip), %eax + // CHECK: movl %eax, -{{[0-9]+}}(%rsp) + // CHECK: movl %eax, -{{[0-9]+}}(%rsp) + // CHECK: movl %eax, -{{[0-9]+}}(%rsp) + // CHECK: ret +} +``` + +## Current Requirements and Limitations + +The tests require Filecheck to be installed along the `PATH` of the +build machine. Otherwise the tests will be disabled. + +Additionally, as mentioned in the previous section, codegen tests are +inherently non-portable. Currently the tests are limited to: + +* x86_64 targets. +* Compiled with GCC or Clang + +Further work could be done, at least on a limited basis, to extend the +tests to other architectures and compilers (using `CHECK` prefixes). + +Furthermore, the tests fail for builds which specify additional flags +that modify code generation, including `--coverage` or `-fsanitize=`. + diff --git a/libcxx/utils/google-benchmark/docs/_config.yml b/libcxx/utils/google-benchmark/docs/_config.yml new file mode 100644 index 000000000000..fc24e7a62dc2 --- /dev/null +++ b/libcxx/utils/google-benchmark/docs/_config.yml @@ -0,0 +1 @@ +theme: jekyll-theme-hacker \ No newline at end of file diff --git a/libcxx/utils/google-benchmark/docs/perf_counters.md b/libcxx/utils/google-benchmark/docs/perf_counters.md new file mode 100644 index 000000000000..74560e966971 --- /dev/null +++ b/libcxx/utils/google-benchmark/docs/perf_counters.md @@ -0,0 +1,34 @@ + + +# User-Requested Performance Counters + +When running benchmarks, the user may choose to request collection of +performance counters. This may be useful in investigation scenarios - narrowing +down the cause of a regression; or verifying that the underlying cause of a +performance improvement matches expectations. + +This feature is available if: + +* The benchmark is run on an architecture featuring a Performance Monitoring + Unit (PMU), +* The benchmark is compiled with support for collecting counters. Currently, + this requires [libpfm](http://perfmon2.sourceforge.net/) be available at build + time + +The feature does not require modifying benchmark code. Counter collection is +handled at the boundaries where timer collection is also handled. + +To opt-in: + +* Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`. +* Enable the cmake flag BENCHMARK_ENABLE_LIBPFM. + +To use, pass a comma-separated list of counter names through the +`--benchmark_perf_counters` flag. The names are decoded through libpfm - meaning, +they are platform specific, but some (e.g. `CYCLES` or `INSTRUCTIONS`) are +mapped by libpfm to platform-specifics - see libpfm +[documentation](http://perfmon2.sourceforge.net/docs.html) for more details. + +The counter values are reported back through the [User Counters](../README.md#custom-counters) +mechanism, meaning, they are available in all the formats (e.g. JSON) supported +by User Counters. \ No newline at end of file diff --git a/libcxx/utils/google-benchmark/docs/random_interleaving.md b/libcxx/utils/google-benchmark/docs/random_interleaving.md new file mode 100644 index 000000000000..c08303684148 --- /dev/null +++ b/libcxx/utils/google-benchmark/docs/random_interleaving.md @@ -0,0 +1,13 @@ + + +# Random Interleaving + +[Random Interleaving](https://github.com/google/benchmark/issues/1051) is a +technique to lower run-to-run variance. It randomly interleaves repetitions of a +microbenchmark with repetitions from other microbenchmarks in the same benchmark +test. Data shows it is able to lower run-to-run variance by +[40%](https://github.com/google/benchmark/issues/1051) on average. + +To use, you mainly need to set `--benchmark_enable_random_interleaving=true`, +and optionally specify non-zero repetition count `--benchmark_repetitions=9` +and optionally decrease the per-repetition time `--benchmark_min_time=0.1`. diff --git a/libcxx/utils/google-benchmark/docs/releasing.md b/libcxx/utils/google-benchmark/docs/releasing.md new file mode 100644 index 000000000000..7a6dfc4017b2 --- /dev/null +++ b/libcxx/utils/google-benchmark/docs/releasing.md @@ -0,0 +1,22 @@ +# How to release + +* Make sure you're on main and synced to HEAD +* Ensure the project builds and tests run (sanity check only, obviously) + * `parallel -j0 exec ::: test/*_test` can help ensure everything at least + passes +* Prepare release notes + * `git log $(git describe --abbrev=0 --tags)..HEAD` gives you the list of + commits between the last annotated tag and HEAD + * Pick the most interesting. +* Create one last commit that updates the version saved in `CMakeLists.txt` to the release version you're creating. (This version will be used if benchmark is installed from the archive you'll be creating in the next step.) + +``` +project (benchmark VERSION 1.5.3 LANGUAGES CXX) +``` + +* Create a release through github's interface + * Note this will create a lightweight tag. + * Update this to an annotated tag: + * `git pull --tags` + * `git tag -a -f ` + * `git push --force origin` diff --git a/libcxx/utils/google-benchmark/docs/tools.md b/libcxx/utils/google-benchmark/docs/tools.md new file mode 100644 index 000000000000..f2d0c497f3fc --- /dev/null +++ b/libcxx/utils/google-benchmark/docs/tools.md @@ -0,0 +1,203 @@ +# Benchmark Tools + +## compare.py + +The `compare.py` can be used to compare the result of benchmarks. + +### Dependencies +The utility relies on the [scipy](https://www.scipy.org) package which can be installed using pip: +```bash +pip3 install -r requirements.txt +``` + +### Displaying aggregates only + +The switch `-a` / `--display_aggregates_only` can be used to control the +displayment of the normal iterations vs the aggregates. When passed, it will +be passthrough to the benchmark binaries to be run, and will be accounted for +in the tool itself; only the aggregates will be displayed, but not normal runs. +It only affects the display, the separate runs will still be used to calculate +the U test. + +### Modes of operation + +There are three modes of operation: + +1. Just compare two benchmarks +The program is invoked like: + +``` bash +$ compare.py benchmarks [benchmark options]... +``` +Where `` and `` either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file. + +`[benchmark options]` will be passed to the benchmarks invocations. They can be anything that binary accepts, be it either normal `--benchmark_*` parameters, or some custom parameters your binary takes. + +Example output: +``` +$ ./compare.py benchmarks ./a.out ./a.out +RUNNING: ./a.out --benchmark_out=/tmp/tmprBT5nW +Run on (8 X 4000 MHz CPU s) +2017-11-07 21:16:44 +------------------------------------------------------ +Benchmark Time CPU Iterations +------------------------------------------------------ +BM_memcpy/8 36 ns 36 ns 19101577 211.669MB/s +BM_memcpy/64 76 ns 76 ns 9412571 800.199MB/s +BM_memcpy/512 84 ns 84 ns 8249070 5.64771GB/s +BM_memcpy/1024 116 ns 116 ns 6181763 8.19505GB/s +BM_memcpy/8192 643 ns 643 ns 1062855 11.8636GB/s +BM_copy/8 222 ns 222 ns 3137987 34.3772MB/s +BM_copy/64 1608 ns 1608 ns 432758 37.9501MB/s +BM_copy/512 12589 ns 12589 ns 54806 38.7867MB/s +BM_copy/1024 25169 ns 25169 ns 27713 38.8003MB/s +BM_copy/8192 201165 ns 201112 ns 3486 38.8466MB/s +RUNNING: ./a.out --benchmark_out=/tmp/tmpt1wwG_ +Run on (8 X 4000 MHz CPU s) +2017-11-07 21:16:53 +------------------------------------------------------ +Benchmark Time CPU Iterations +------------------------------------------------------ +BM_memcpy/8 36 ns 36 ns 19397903 211.255MB/s +BM_memcpy/64 73 ns 73 ns 9691174 839.635MB/s +BM_memcpy/512 85 ns 85 ns 8312329 5.60101GB/s +BM_memcpy/1024 118 ns 118 ns 6438774 8.11608GB/s +BM_memcpy/8192 656 ns 656 ns 1068644 11.6277GB/s +BM_copy/8 223 ns 223 ns 3146977 34.2338MB/s +BM_copy/64 1611 ns 1611 ns 435340 37.8751MB/s +BM_copy/512 12622 ns 12622 ns 54818 38.6844MB/s +BM_copy/1024 25257 ns 25239 ns 27779 38.6927MB/s +BM_copy/8192 205013 ns 205010 ns 3479 38.108MB/s +Comparing ./a.out to ./a.out +Benchmark Time CPU Time Old Time New CPU Old CPU New +------------------------------------------------------------------------------------------------------ +BM_memcpy/8 +0.0020 +0.0020 36 36 36 36 +BM_memcpy/64 -0.0468 -0.0470 76 73 76 73 +BM_memcpy/512 +0.0081 +0.0083 84 85 84 85 +BM_memcpy/1024 +0.0098 +0.0097 116 118 116 118 +BM_memcpy/8192 +0.0200 +0.0203 643 656 643 656 +BM_copy/8 +0.0046 +0.0042 222 223 222 223 +BM_copy/64 +0.0020 +0.0020 1608 1611 1608 1611 +BM_copy/512 +0.0027 +0.0026 12589 12622 12589 12622 +BM_copy/1024 +0.0035 +0.0028 25169 25257 25169 25239 +BM_copy/8192 +0.0191 +0.0194 201165 205013 201112 205010 +``` + +What it does is for the every benchmark from the first run it looks for the benchmark with exactly the same name in the second run, and then compares the results. If the names differ, the benchmark is omitted from the diff. +As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`. + +2. Compare two different filters of one benchmark +The program is invoked like: + +``` bash +$ compare.py filters [benchmark options]... +``` +Where `` either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file. + +Where `` and `` are the same regex filters that you would pass to the `[--benchmark_filter=]` parameter of the benchmark binary. + +`[benchmark options]` will be passed to the benchmarks invocations. They can be anything that binary accepts, be it either normal `--benchmark_*` parameters, or some custom parameters your binary takes. + +Example output: +``` +$ ./compare.py filters ./a.out BM_memcpy BM_copy +RUNNING: ./a.out --benchmark_filter=BM_memcpy --benchmark_out=/tmp/tmpBWKk0k +Run on (8 X 4000 MHz CPU s) +2017-11-07 21:37:28 +------------------------------------------------------ +Benchmark Time CPU Iterations +------------------------------------------------------ +BM_memcpy/8 36 ns 36 ns 17891491 211.215MB/s +BM_memcpy/64 74 ns 74 ns 9400999 825.646MB/s +BM_memcpy/512 87 ns 87 ns 8027453 5.46126GB/s +BM_memcpy/1024 111 ns 111 ns 6116853 8.5648GB/s +BM_memcpy/8192 657 ns 656 ns 1064679 11.6247GB/s +RUNNING: ./a.out --benchmark_filter=BM_copy --benchmark_out=/tmp/tmpAvWcOM +Run on (8 X 4000 MHz CPU s) +2017-11-07 21:37:33 +---------------------------------------------------- +Benchmark Time CPU Iterations +---------------------------------------------------- +BM_copy/8 227 ns 227 ns 3038700 33.6264MB/s +BM_copy/64 1640 ns 1640 ns 426893 37.2154MB/s +BM_copy/512 12804 ns 12801 ns 55417 38.1444MB/s +BM_copy/1024 25409 ns 25407 ns 27516 38.4365MB/s +BM_copy/8192 202986 ns 202990 ns 3454 38.4871MB/s +Comparing BM_memcpy to BM_copy (from ./a.out) +Benchmark Time CPU Time Old Time New CPU Old CPU New +-------------------------------------------------------------------------------------------------------------------- +[BM_memcpy vs. BM_copy]/8 +5.2829 +5.2812 36 227 36 227 +[BM_memcpy vs. BM_copy]/64 +21.1719 +21.1856 74 1640 74 1640 +[BM_memcpy vs. BM_copy]/512 +145.6487 +145.6097 87 12804 87 12801 +[BM_memcpy vs. BM_copy]/1024 +227.1860 +227.1776 111 25409 111 25407 +[BM_memcpy vs. BM_copy]/8192 +308.1664 +308.2898 657 202986 656 202990 +``` + +As you can see, it applies filter to the benchmarks, both when running the benchmark, and before doing the diff. And to make the diff work, the matches are replaced with some common string. Thus, you can compare two different benchmark families within one benchmark binary. +As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`. + +3. Compare filter one from benchmark one to filter two from benchmark two: +The program is invoked like: + +``` bash +$ compare.py filters [benchmark options]... +``` + +Where `` and `` either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file. + +Where `` and `` are the same regex filters that you would pass to the `[--benchmark_filter=]` parameter of the benchmark binary. + +`[benchmark options]` will be passed to the benchmarks invocations. They can be anything that binary accepts, be it either normal `--benchmark_*` parameters, or some custom parameters your binary takes. + +Example output: +``` +$ ./compare.py benchmarksfiltered ./a.out BM_memcpy ./a.out BM_copy +RUNNING: ./a.out --benchmark_filter=BM_memcpy --benchmark_out=/tmp/tmp_FvbYg +Run on (8 X 4000 MHz CPU s) +2017-11-07 21:38:27 +------------------------------------------------------ +Benchmark Time CPU Iterations +------------------------------------------------------ +BM_memcpy/8 37 ns 37 ns 18953482 204.118MB/s +BM_memcpy/64 74 ns 74 ns 9206578 828.245MB/s +BM_memcpy/512 91 ns 91 ns 8086195 5.25476GB/s +BM_memcpy/1024 120 ns 120 ns 5804513 7.95662GB/s +BM_memcpy/8192 664 ns 664 ns 1028363 11.4948GB/s +RUNNING: ./a.out --benchmark_filter=BM_copy --benchmark_out=/tmp/tmpDfL5iE +Run on (8 X 4000 MHz CPU s) +2017-11-07 21:38:32 +---------------------------------------------------- +Benchmark Time CPU Iterations +---------------------------------------------------- +BM_copy/8 230 ns 230 ns 2985909 33.1161MB/s +BM_copy/64 1654 ns 1653 ns 419408 36.9137MB/s +BM_copy/512 13122 ns 13120 ns 53403 37.2156MB/s +BM_copy/1024 26679 ns 26666 ns 26575 36.6218MB/s +BM_copy/8192 215068 ns 215053 ns 3221 36.3283MB/s +Comparing BM_memcpy (from ./a.out) to BM_copy (from ./a.out) +Benchmark Time CPU Time Old Time New CPU Old CPU New +-------------------------------------------------------------------------------------------------------------------- +[BM_memcpy vs. BM_copy]/8 +5.1649 +5.1637 37 230 37 230 +[BM_memcpy vs. BM_copy]/64 +21.4352 +21.4374 74 1654 74 1653 +[BM_memcpy vs. BM_copy]/512 +143.6022 +143.5865 91 13122 91 13120 +[BM_memcpy vs. BM_copy]/1024 +221.5903 +221.4790 120 26679 120 26666 +[BM_memcpy vs. BM_copy]/8192 +322.9059 +323.0096 664 215068 664 215053 +``` +This is a mix of the previous two modes, two (potentially different) benchmark binaries are run, and a different filter is applied to each one. +As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`. + +### U test + +If there is a sufficient repetition count of the benchmarks, the tool can do +a [U Test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test), of the +null hypothesis that it is equally likely that a randomly selected value from +one sample will be less than or greater than a randomly selected value from a +second sample. + +If the calculated p-value is below this value is lower than the significance +level alpha, then the result is said to be statistically significant and the +null hypothesis is rejected. Which in other words means that the two benchmarks +aren't identical. + +**WARNING**: requires **LARGE** (no less than 9) number of repetitions to be +meaningful! diff --git a/libcxx/utils/google-benchmark/include/benchmark/benchmark.h b/libcxx/utils/google-benchmark/include/benchmark/benchmark.h new file mode 100644 index 000000000000..9b5480244d6f --- /dev/null +++ b/libcxx/utils/google-benchmark/include/benchmark/benchmark.h @@ -0,0 +1,1654 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Support for registering benchmarks for functions. + +/* Example usage: +// Define a function that executes the code to be measured a +// specified number of times: +static void BM_StringCreation(benchmark::State& state) { + for (auto _ : state) + std::string empty_string; +} + +// Register the function as a benchmark +BENCHMARK(BM_StringCreation); + +// Define another benchmark +static void BM_StringCopy(benchmark::State& state) { + std::string x = "hello"; + for (auto _ : state) + std::string copy(x); +} +BENCHMARK(BM_StringCopy); + +// Augment the main() program to invoke benchmarks if specified +// via the --benchmarks command line flag. E.g., +// my_unittest --benchmark_filter=all +// my_unittest --benchmark_filter=BM_StringCreation +// my_unittest --benchmark_filter=String +// my_unittest --benchmark_filter='Copy|Creation' +int main(int argc, char** argv) { + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); + return 0; +} + +// Sometimes a family of microbenchmarks can be implemented with +// just one routine that takes an extra argument to specify which +// one of the family of benchmarks to run. For example, the following +// code defines a family of microbenchmarks for measuring the speed +// of memcpy() calls of different lengths: + +static void BM_memcpy(benchmark::State& state) { + char* src = new char[state.range(0)]; char* dst = new char[state.range(0)]; + memset(src, 'x', state.range(0)); + for (auto _ : state) + memcpy(dst, src, state.range(0)); + state.SetBytesProcessed(state.iterations() * state.range(0)); + delete[] src; delete[] dst; +} +BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10); + +// The preceding code is quite repetitive, and can be replaced with the +// following short-hand. The following invocation will pick a few +// appropriate arguments in the specified range and will generate a +// microbenchmark for each such argument. +BENCHMARK(BM_memcpy)->Range(8, 8<<10); + +// You might have a microbenchmark that depends on two inputs. For +// example, the following code defines a family of microbenchmarks for +// measuring the speed of set insertion. +static void BM_SetInsert(benchmark::State& state) { + set data; + for (auto _ : state) { + state.PauseTiming(); + data = ConstructRandomSet(state.range(0)); + state.ResumeTiming(); + for (int j = 0; j < state.range(1); ++j) + data.insert(RandomNumber()); + } +} +BENCHMARK(BM_SetInsert) + ->Args({1<<10, 128}) + ->Args({2<<10, 128}) + ->Args({4<<10, 128}) + ->Args({8<<10, 128}) + ->Args({1<<10, 512}) + ->Args({2<<10, 512}) + ->Args({4<<10, 512}) + ->Args({8<<10, 512}); + +// The preceding code is quite repetitive, and can be replaced with +// the following short-hand. The following macro will pick a few +// appropriate arguments in the product of the two specified ranges +// and will generate a microbenchmark for each such pair. +BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}}); + +// For more complex patterns of inputs, passing a custom function +// to Apply allows programmatic specification of an +// arbitrary set of arguments to run the microbenchmark on. +// The following example enumerates a dense range on +// one parameter, and a sparse range on the second. +static void CustomArguments(benchmark::internal::Benchmark* b) { + for (int i = 0; i <= 10; ++i) + for (int j = 32; j <= 1024*1024; j *= 8) + b->Args({i, j}); +} +BENCHMARK(BM_SetInsert)->Apply(CustomArguments); + +// Templated microbenchmarks work the same way: +// Produce then consume 'size' messages 'iters' times +// Measures throughput in the absence of multiprogramming. +template int BM_Sequential(benchmark::State& state) { + Q q; + typename Q::value_type v; + for (auto _ : state) { + for (int i = state.range(0); i--; ) + q.push(v); + for (int e = state.range(0); e--; ) + q.Wait(&v); + } + // actually messages, not bytes: + state.SetBytesProcessed(state.iterations() * state.range(0)); +} +BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); + +Use `Benchmark::MinTime(double t)` to set the minimum time used to run the +benchmark. This option overrides the `benchmark_min_time` flag. + +void BM_test(benchmark::State& state) { + ... body ... +} +BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds. + +In a multithreaded test, it is guaranteed that none of the threads will start +until all have reached the loop start, and all will have finished before any +thread exits the loop body. As such, any global setup or teardown you want to +do can be wrapped in a check against the thread index: + +static void BM_MultiThreaded(benchmark::State& state) { + if (state.thread_index == 0) { + // Setup code here. + } + for (auto _ : state) { + // Run the test as normal. + } + if (state.thread_index == 0) { + // Teardown code here. + } +} +BENCHMARK(BM_MultiThreaded)->Threads(4); + + +If a benchmark runs a few milliseconds it may be hard to visually compare the +measured times, since the output data is given in nanoseconds per default. In +order to manually set the time unit, you can specify it manually: + +BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); +*/ + +#ifndef BENCHMARK_BENCHMARK_H_ +#define BENCHMARK_BENCHMARK_H_ + +// The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer. +#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) +#define BENCHMARK_HAS_CXX11 +#endif + +// This _MSC_VER check should detect VS 2017 v15.3 and newer. +#if __cplusplus >= 201703L || \ + (defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L) +#define BENCHMARK_HAS_CXX17 +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(BENCHMARK_HAS_CXX11) +#include +#include +#include +#endif + +#if defined(_MSC_VER) +#include // for _ReadWriteBarrier +#endif + +#ifndef BENCHMARK_HAS_CXX11 +#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&); \ + TypeName& operator=(const TypeName&) +#else +#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&) = delete; \ + TypeName& operator=(const TypeName&) = delete +#endif + +#ifdef BENCHMARK_HAS_CXX17 +#define BENCHMARK_UNUSED [[maybe_unused]] +#elif defined(__GNUC__) || defined(__clang__) +#define BENCHMARK_UNUSED __attribute__((unused)) +#else +#define BENCHMARK_UNUSED +#endif + +#if defined(__GNUC__) || defined(__clang__) +#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline)) +#define BENCHMARK_NOEXCEPT noexcept +#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) +#elif defined(_MSC_VER) && !defined(__clang__) +#define BENCHMARK_ALWAYS_INLINE __forceinline +#if _MSC_VER >= 1900 +#define BENCHMARK_NOEXCEPT noexcept +#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) +#else +#define BENCHMARK_NOEXCEPT +#define BENCHMARK_NOEXCEPT_OP(x) +#endif +#define __func__ __FUNCTION__ +#else +#define BENCHMARK_ALWAYS_INLINE +#define BENCHMARK_NOEXCEPT +#define BENCHMARK_NOEXCEPT_OP(x) +#endif + +#define BENCHMARK_INTERNAL_TOSTRING2(x) #x +#define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x) + +#if defined(__GNUC__) || defined(__clang__) +#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y) +#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) +#else +#define BENCHMARK_BUILTIN_EXPECT(x, y) x +#define BENCHMARK_DEPRECATED_MSG(msg) +#define BENCHMARK_WARNING_MSG(msg) \ + __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \ + __LINE__) ") : warning note: " msg)) +#endif + +#if defined(__GNUC__) && !defined(__clang__) +#define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +#endif + +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + +#if defined(__GNUC__) || __has_builtin(__builtin_unreachable) +#define BENCHMARK_UNREACHABLE() __builtin_unreachable() +#elif defined(_MSC_VER) +#define BENCHMARK_UNREACHABLE() __assume(false) +#else +#define BENCHMARK_UNREACHABLE() ((void)0) +#endif + +#ifdef BENCHMARK_HAS_CXX11 +#define BENCHMARK_OVERRIDE override +#else +#define BENCHMARK_OVERRIDE +#endif + +namespace benchmark { +class BenchmarkReporter; +class MemoryManager; + +void Initialize(int* argc, char** argv); +void Shutdown(); + +// Report to stdout all arguments in 'argv' as unrecognized except the first. +// Returns true there is at least on unrecognized argument (i.e. 'argc' > 1). +bool ReportUnrecognizedArguments(int argc, char** argv); + +// Generate a list of benchmarks matching the specified --benchmark_filter flag +// and if --benchmark_list_tests is specified return after printing the name +// of each matching benchmark. Otherwise run each matching benchmark and +// report the results. +// +// The second and third overload use the specified 'display_reporter' and +// 'file_reporter' respectively. 'file_reporter' will write to the file +// specified +// by '--benchmark_output'. If '--benchmark_output' is not given the +// 'file_reporter' is ignored. +// +// RETURNS: The number of matching benchmarks. +size_t RunSpecifiedBenchmarks(); +size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter); +size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, + BenchmarkReporter* file_reporter); + +// Register a MemoryManager instance that will be used to collect and report +// allocation measurements for benchmark runs. +void RegisterMemoryManager(MemoryManager* memory_manager); + +// Add a key-value pair to output as part of the context stanza in the report. +void AddCustomContext(const std::string& key, const std::string& value); + +namespace internal { +class Benchmark; +class BenchmarkImp; +class BenchmarkFamilies; + +void UseCharPointer(char const volatile*); + +// Take ownership of the pointer and register the benchmark. Return the +// registered benchmark. +Benchmark* RegisterBenchmarkInternal(Benchmark*); + +// Ensure that the standard streams are properly initialized in every TU. +int InitializeStreams(); +BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams(); + +} // namespace internal + +#if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \ + defined(__EMSCRIPTEN__) +#define BENCHMARK_HAS_NO_INLINE_ASSEMBLY +#endif + +// The DoNotOptimize(...) function can be used to prevent a value or +// expression from being optimized away by the compiler. This function is +// intended to add little to no overhead. +// See: https://youtu.be/nXaxk27zwlk?t=2441 +#ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY +template +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { + asm volatile("" : : "r,m"(value) : "memory"); +} + +template +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) { +#if defined(__clang__) + asm volatile("" : "+r,m"(value) : : "memory"); +#else + asm volatile("" : "+m,r"(value) : : "memory"); +#endif +} + +// Force the compiler to flush pending writes to global memory. Acts as an +// effective read/write barrier +inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { + asm volatile("" : : : "memory"); +} +#elif defined(_MSC_VER) +template +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { + internal::UseCharPointer(&reinterpret_cast(value)); + _ReadWriteBarrier(); +} + +inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); } +#else +template +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { + internal::UseCharPointer(&reinterpret_cast(value)); +} +// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers +#endif + +// This class is used for user-defined counters. +class Counter { + public: + enum Flags { + kDefaults = 0, + // Mark the counter as a rate. It will be presented divided + // by the duration of the benchmark. + kIsRate = 1U << 0U, + // Mark the counter as a thread-average quantity. It will be + // presented divided by the number of threads. + kAvgThreads = 1U << 1U, + // Mark the counter as a thread-average rate. See above. + kAvgThreadsRate = kIsRate | kAvgThreads, + // Mark the counter as a constant value, valid/same for *every* iteration. + // When reporting, it will be *multiplied* by the iteration count. + kIsIterationInvariant = 1U << 2U, + // Mark the counter as a constant rate. + // When reporting, it will be *multiplied* by the iteration count + // and then divided by the duration of the benchmark. + kIsIterationInvariantRate = kIsRate | kIsIterationInvariant, + // Mark the counter as a iteration-average quantity. + // It will be presented divided by the number of iterations. + kAvgIterations = 1U << 3U, + // Mark the counter as a iteration-average rate. See above. + kAvgIterationsRate = kIsRate | kAvgIterations, + + // In the end, invert the result. This is always done last! + kInvert = 1U << 31U + }; + + enum OneK { + // 1'000 items per 1k + kIs1000 = 1000, + // 1'024 items per 1k + kIs1024 = 1024 + }; + + double value; + Flags flags; + OneK oneK; + + BENCHMARK_ALWAYS_INLINE + Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000) + : value(v), flags(f), oneK(k) {} + + BENCHMARK_ALWAYS_INLINE operator double const&() const { return value; } + BENCHMARK_ALWAYS_INLINE operator double&() { return value; } +}; + +// A helper for user code to create unforeseen combinations of Flags, without +// having to do this cast manually each time, or providing this operator. +Counter::Flags inline operator|(const Counter::Flags& LHS, + const Counter::Flags& RHS) { + return static_cast(static_cast(LHS) | + static_cast(RHS)); +} + +// This is the container for the user-defined counters. +typedef std::map UserCounters; + +// TimeUnit is passed to a benchmark in order to specify the order of magnitude +// for the measured time. +enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond }; + +// BigO is passed to a benchmark in order to specify the asymptotic +// computational +// complexity for the benchmark. In case oAuto is selected, complexity will be +// calculated automatically to the best fit. +enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda }; + +typedef uint64_t IterationCount; + +// BigOFunc is passed to a benchmark in order to specify the asymptotic +// computational complexity for the benchmark. +typedef double(BigOFunc)(IterationCount); + +// StatisticsFunc is passed to a benchmark in order to compute some descriptive +// statistics over all the measurements of some type +typedef double(StatisticsFunc)(const std::vector&); + +namespace internal { +struct Statistics { + std::string name_; + StatisticsFunc* compute_; + + Statistics(const std::string& name, StatisticsFunc* compute) + : name_(name), compute_(compute) {} +}; + +class BenchmarkInstance; +class ThreadTimer; +class ThreadManager; +class PerfCountersMeasurement; + +enum AggregationReportMode +#if defined(BENCHMARK_HAS_CXX11) + : unsigned +#else +#endif +{ + // The mode has not been manually specified + ARM_Unspecified = 0, + // The mode is user-specified. + // This may or may not be set when the following bit-flags are set. + ARM_Default = 1U << 0U, + // File reporter should only output aggregates. + ARM_FileReportAggregatesOnly = 1U << 1U, + // Display reporter should only output aggregates + ARM_DisplayReportAggregatesOnly = 1U << 2U, + // Both reporters should only display aggregates. + ARM_ReportAggregatesOnly = + ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly +}; + +} // namespace internal + +// State is passed to a running Benchmark and contains state for the +// benchmark to use. +class State { + public: + struct StateIterator; + friend struct StateIterator; + + // Returns iterators used to run each iteration of a benchmark using a + // C++11 ranged-based for loop. These functions should not be called directly. + // + // REQUIRES: The benchmark has not started running yet. Neither begin nor end + // have been called previously. + // + // NOTE: KeepRunning may not be used after calling either of these functions. + BENCHMARK_ALWAYS_INLINE StateIterator begin(); + BENCHMARK_ALWAYS_INLINE StateIterator end(); + + // Returns true if the benchmark should continue through another iteration. + // NOTE: A benchmark may not return from the test until KeepRunning() has + // returned false. + bool KeepRunning(); + + // Returns true iff the benchmark should run n more iterations. + // REQUIRES: 'n' > 0. + // NOTE: A benchmark must not return from the test until KeepRunningBatch() + // has returned false. + // NOTE: KeepRunningBatch() may overshoot by up to 'n' iterations. + // + // Intended usage: + // while (state.KeepRunningBatch(1000)) { + // // process 1000 elements + // } + bool KeepRunningBatch(IterationCount n); + + // REQUIRES: timer is running and 'SkipWithError(...)' has not been called + // by the current thread. + // Stop the benchmark timer. If not called, the timer will be + // automatically stopped after the last iteration of the benchmark loop. + // + // For threaded benchmarks the PauseTiming() function only pauses the timing + // for the current thread. + // + // NOTE: The "real time" measurement is per-thread. If different threads + // report different measurements the largest one is reported. + // + // NOTE: PauseTiming()/ResumeTiming() are relatively + // heavyweight, and so their use should generally be avoided + // within each benchmark iteration, if possible. + void PauseTiming(); + + // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called + // by the current thread. + // Start the benchmark timer. The timer is NOT running on entrance to the + // benchmark function. It begins running after control flow enters the + // benchmark loop. + // + // NOTE: PauseTiming()/ResumeTiming() are relatively + // heavyweight, and so their use should generally be avoided + // within each benchmark iteration, if possible. + void ResumeTiming(); + + // REQUIRES: 'SkipWithError(...)' has not been called previously by the + // current thread. + // Report the benchmark as resulting in an error with the specified 'msg'. + // After this call the user may explicitly 'return' from the benchmark. + // + // If the ranged-for style of benchmark loop is used, the user must explicitly + // break from the loop, otherwise all future iterations will be run. + // If the 'KeepRunning()' loop is used the current thread will automatically + // exit the loop at the end of the current iteration. + // + // For threaded benchmarks only the current thread stops executing and future + // calls to `KeepRunning()` will block until all threads have completed + // the `KeepRunning()` loop. If multiple threads report an error only the + // first error message is used. + // + // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit + // the current scope immediately. If the function is called from within + // the 'KeepRunning()' loop the current iteration will finish. It is the users + // responsibility to exit the scope as needed. + void SkipWithError(const char* msg); + + // Returns true if an error has been reported with 'SkipWithError(...)'. + bool error_occurred() const { return error_occurred_; } + + // REQUIRES: called exactly once per iteration of the benchmarking loop. + // Set the manually measured time for this benchmark iteration, which + // is used instead of automatically measured time if UseManualTime() was + // specified. + // + // For threaded benchmarks the final value will be set to the largest + // reported values. + void SetIterationTime(double seconds); + + // Set the number of bytes processed by the current benchmark + // execution. This routine is typically called once at the end of a + // throughput oriented benchmark. + // + // REQUIRES: a benchmark has exited its benchmarking loop. + BENCHMARK_ALWAYS_INLINE + void SetBytesProcessed(int64_t bytes) { + counters["bytes_per_second"] = + Counter(static_cast(bytes), Counter::kIsRate, Counter::kIs1024); + } + + BENCHMARK_ALWAYS_INLINE + int64_t bytes_processed() const { + if (counters.find("bytes_per_second") != counters.end()) + return static_cast(counters.at("bytes_per_second")); + return 0; + } + + // If this routine is called with complexity_n > 0 and complexity report is + // requested for the + // family benchmark, then current benchmark will be part of the computation + // and complexity_n will + // represent the length of N. + BENCHMARK_ALWAYS_INLINE + void SetComplexityN(int64_t complexity_n) { complexity_n_ = complexity_n; } + + BENCHMARK_ALWAYS_INLINE + int64_t complexity_length_n() const { return complexity_n_; } + + // If this routine is called with items > 0, then an items/s + // label is printed on the benchmark report line for the currently + // executing benchmark. It is typically called at the end of a processing + // benchmark where a processing items/second output is desired. + // + // REQUIRES: a benchmark has exited its benchmarking loop. + BENCHMARK_ALWAYS_INLINE + void SetItemsProcessed(int64_t items) { + counters["items_per_second"] = + Counter(static_cast(items), benchmark::Counter::kIsRate); + } + + BENCHMARK_ALWAYS_INLINE + int64_t items_processed() const { + if (counters.find("items_per_second") != counters.end()) + return static_cast(counters.at("items_per_second")); + return 0; + } + + // If this routine is called, the specified label is printed at the + // end of the benchmark report line for the currently executing + // benchmark. Example: + // static void BM_Compress(benchmark::State& state) { + // ... + // double compress = input_size / output_size; + // state.SetLabel(StrFormat("compress:%.1f%%", 100.0*compression)); + // } + // Produces output that looks like: + // BM_Compress 50 50 14115038 compress:27.3% + // + // REQUIRES: a benchmark has exited its benchmarking loop. + void SetLabel(const char* label); + + void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) { + this->SetLabel(str.c_str()); + } + + // Range arguments for this run. CHECKs if the argument has been set. + BENCHMARK_ALWAYS_INLINE + int64_t range(std::size_t pos = 0) const { + assert(range_.size() > pos); + return range_[pos]; + } + + BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead") + int64_t range_x() const { return range(0); } + + BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead") + int64_t range_y() const { return range(1); } + + BENCHMARK_ALWAYS_INLINE + IterationCount iterations() const { + if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) { + return 0; + } + return max_iterations - total_iterations_ + batch_leftover_; + } + + private + : // items we expect on the first cache line (ie 64 bytes of the struct) + // When total_iterations_ is 0, KeepRunning() and friends will return false. + // May be larger than max_iterations. + IterationCount total_iterations_; + + // When using KeepRunningBatch(), batch_leftover_ holds the number of + // iterations beyond max_iters that were run. Used to track + // completed_iterations_ accurately. + IterationCount batch_leftover_; + + public: + const IterationCount max_iterations; + + private: + bool started_; + bool finished_; + bool error_occurred_; + + private: // items we don't need on the first cache line + std::vector range_; + + int64_t complexity_n_; + + public: + // Container for user-defined counters. + UserCounters counters; + // Index of the executing thread. Values from [0, threads). + const int thread_index; + // Number of threads concurrently executing the benchmark. + const int threads; + + private: + State(IterationCount max_iters, const std::vector& ranges, + int thread_i, int n_threads, internal::ThreadTimer* timer, + internal::ThreadManager* manager, + internal::PerfCountersMeasurement* perf_counters_measurement); + + void StartKeepRunning(); + // Implementation of KeepRunning() and KeepRunningBatch(). + // is_batch must be true unless n is 1. + bool KeepRunningInternal(IterationCount n, bool is_batch); + void FinishKeepRunning(); + internal::ThreadTimer* const timer_; + internal::ThreadManager* const manager_; + internal::PerfCountersMeasurement* const perf_counters_measurement_; + + friend class internal::BenchmarkInstance; +}; + +inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() { + return KeepRunningInternal(1, /*is_batch=*/false); +} + +inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningBatch(IterationCount n) { + return KeepRunningInternal(n, /*is_batch=*/true); +} + +inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n, + bool is_batch) { + // total_iterations_ is set to 0 by the constructor, and always set to a + // nonzero value by StartKepRunning(). + assert(n > 0); + // n must be 1 unless is_batch is true. + assert(is_batch || n == 1); + if (BENCHMARK_BUILTIN_EXPECT(total_iterations_ >= n, true)) { + total_iterations_ -= n; + return true; + } + if (!started_) { + StartKeepRunning(); + if (!error_occurred_ && total_iterations_ >= n) { + total_iterations_ -= n; + return true; + } + } + // For non-batch runs, total_iterations_ must be 0 by now. + if (is_batch && total_iterations_ != 0) { + batch_leftover_ = n - total_iterations_; + total_iterations_ = 0; + return true; + } + FinishKeepRunning(); + return false; +} + +struct State::StateIterator { + struct BENCHMARK_UNUSED Value {}; + typedef std::forward_iterator_tag iterator_category; + typedef Value value_type; + typedef Value reference; + typedef Value pointer; + typedef std::ptrdiff_t difference_type; + + private: + friend class State; + BENCHMARK_ALWAYS_INLINE + StateIterator() : cached_(0), parent_() {} + + BENCHMARK_ALWAYS_INLINE + explicit StateIterator(State* st) + : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {} + + public: + BENCHMARK_ALWAYS_INLINE + Value operator*() const { return Value(); } + + BENCHMARK_ALWAYS_INLINE + StateIterator& operator++() { + assert(cached_ > 0); + --cached_; + return *this; + } + + BENCHMARK_ALWAYS_INLINE + bool operator!=(StateIterator const&) const { + if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true; + parent_->FinishKeepRunning(); + return false; + } + + private: + IterationCount cached_; + State* const parent_; +}; + +inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() { + return StateIterator(this); +} +inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() { + StartKeepRunning(); + return StateIterator(); +} + +namespace internal { + +typedef void(Function)(State&); + +// ------------------------------------------------------ +// Benchmark registration object. The BENCHMARK() macro expands +// into an internal::Benchmark* object. Various methods can +// be called on this object to change the properties of the benchmark. +// Each method returns "this" so that multiple method calls can +// chained into one expression. +class Benchmark { + public: + virtual ~Benchmark(); + + // Note: the following methods all return "this" so that multiple + // method calls can be chained together in one expression. + + // Specify the name of the benchmark + Benchmark* Name(const std::string& name); + + // Run this benchmark once with "x" as the extra argument passed + // to the function. + // REQUIRES: The function passed to the constructor must accept an arg1. + Benchmark* Arg(int64_t x); + + // Run this benchmark with the given time unit for the generated output report + Benchmark* Unit(TimeUnit unit); + + // Run this benchmark once for a number of values picked from the + // range [start..limit]. (start and limit are always picked.) + // REQUIRES: The function passed to the constructor must accept an arg1. + Benchmark* Range(int64_t start, int64_t limit); + + // Run this benchmark once for all values in the range [start..limit] with + // specific step + // REQUIRES: The function passed to the constructor must accept an arg1. + Benchmark* DenseRange(int64_t start, int64_t limit, int step = 1); + + // Run this benchmark once with "args" as the extra arguments passed + // to the function. + // REQUIRES: The function passed to the constructor must accept arg1, arg2 ... + Benchmark* Args(const std::vector& args); + + // Equivalent to Args({x, y}) + // NOTE: This is a legacy C++03 interface provided for compatibility only. + // New code should use 'Args'. + Benchmark* ArgPair(int64_t x, int64_t y) { + std::vector args; + args.push_back(x); + args.push_back(y); + return Args(args); + } + + // Run this benchmark once for a number of values picked from the + // ranges [start..limit]. (starts and limits are always picked.) + // REQUIRES: The function passed to the constructor must accept arg1, arg2 ... + Benchmark* Ranges(const std::vector >& ranges); + + // Run this benchmark once for each combination of values in the (cartesian) + // product of the supplied argument lists. + // REQUIRES: The function passed to the constructor must accept arg1, arg2 ... + Benchmark* ArgsProduct(const std::vector >& arglists); + + // Equivalent to ArgNames({name}) + Benchmark* ArgName(const std::string& name); + + // Set the argument names to display in the benchmark name. If not called, + // only argument values will be shown. + Benchmark* ArgNames(const std::vector& names); + + // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}). + // NOTE: This is a legacy C++03 interface provided for compatibility only. + // New code should use 'Ranges'. + Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) { + std::vector > ranges; + ranges.push_back(std::make_pair(lo1, hi1)); + ranges.push_back(std::make_pair(lo2, hi2)); + return Ranges(ranges); + } + + // Pass this benchmark object to *func, which can customize + // the benchmark by calling various methods like Arg, Args, + // Threads, etc. + Benchmark* Apply(void (*func)(Benchmark* benchmark)); + + // Set the range multiplier for non-dense range. If not called, the range + // multiplier kRangeMultiplier will be used. + Benchmark* RangeMultiplier(int multiplier); + + // Set the minimum amount of time to use when running this benchmark. This + // option overrides the `benchmark_min_time` flag. + // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark. + Benchmark* MinTime(double t); + + // Specify the amount of iterations that should be run by this benchmark. + // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark. + // + // NOTE: This function should only be used when *exact* iteration control is + // needed and never to control or limit how long a benchmark runs, where + // `--benchmark_min_time=N` or `MinTime(...)` should be used instead. + Benchmark* Iterations(IterationCount n); + + // Specify the amount of times to repeat this benchmark. This option overrides + // the `benchmark_repetitions` flag. + // REQUIRES: `n > 0` + Benchmark* Repetitions(int n); + + // Specify if each repetition of the benchmark should be reported separately + // or if only the final statistics should be reported. If the benchmark + // is not repeated then the single result is always reported. + // Applies to *ALL* reporters (display and file). + Benchmark* ReportAggregatesOnly(bool value = true); + + // Same as ReportAggregatesOnly(), but applies to display reporter only. + Benchmark* DisplayAggregatesOnly(bool value = true); + + // By default, the CPU time is measured only for the main thread, which may + // be unrepresentative if the benchmark uses threads internally. If called, + // the total CPU time spent by all the threads will be measured instead. + // By default, the only the main thread CPU time will be measured. + Benchmark* MeasureProcessCPUTime(); + + // If a particular benchmark should use the Wall clock instead of the CPU time + // (be it either the CPU time of the main thread only (default), or the + // total CPU usage of the benchmark), call this method. If called, the elapsed + // (wall) time will be used to control how many iterations are run, and in the + // printing of items/second or MB/seconds values. + // If not called, the CPU time used by the benchmark will be used. + Benchmark* UseRealTime(); + + // If a benchmark must measure time manually (e.g. if GPU execution time is + // being + // measured), call this method. If called, each benchmark iteration should + // call + // SetIterationTime(seconds) to report the measured time, which will be used + // to control how many iterations are run, and in the printing of items/second + // or MB/second values. + Benchmark* UseManualTime(); + + // Set the asymptotic computational complexity for the benchmark. If called + // the asymptotic computational complexity will be shown on the output. + Benchmark* Complexity(BigO complexity = benchmark::oAuto); + + // Set the asymptotic computational complexity for the benchmark. If called + // the asymptotic computational complexity will be shown on the output. + Benchmark* Complexity(BigOFunc* complexity); + + // Add this statistics to be computed over all the values of benchmark run + Benchmark* ComputeStatistics(std::string name, StatisticsFunc* statistics); + + // Support for running multiple copies of the same benchmark concurrently + // in multiple threads. This may be useful when measuring the scaling + // of some piece of code. + + // Run one instance of this benchmark concurrently in t threads. + Benchmark* Threads(int t); + + // Pick a set of values T from [min_threads,max_threads]. + // min_threads and max_threads are always included in T. Run this + // benchmark once for each value in T. The benchmark run for a + // particular value t consists of t threads running the benchmark + // function concurrently. For example, consider: + // BENCHMARK(Foo)->ThreadRange(1,16); + // This will run the following benchmarks: + // Foo in 1 thread + // Foo in 2 threads + // Foo in 4 threads + // Foo in 8 threads + // Foo in 16 threads + Benchmark* ThreadRange(int min_threads, int max_threads); + + // For each value n in the range, run this benchmark once using n threads. + // min_threads and max_threads are always included in the range. + // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts + // a benchmark with 1, 4, 7 and 8 threads. + Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1); + + // Equivalent to ThreadRange(NumCPUs(), NumCPUs()) + Benchmark* ThreadPerCpu(); + + virtual void Run(State& state) = 0; + + protected: + explicit Benchmark(const char* name); + Benchmark(Benchmark const&); + void SetName(const char* name); + + int ArgsCnt() const; + + private: + friend class BenchmarkFamilies; + friend class BenchmarkInstance; + + std::string name_; + AggregationReportMode aggregation_report_mode_; + std::vector arg_names_; // Args for all benchmark runs + std::vector > args_; // Args for all benchmark runs + TimeUnit time_unit_; + int range_multiplier_; + double min_time_; + IterationCount iterations_; + int repetitions_; + bool measure_process_cpu_time_; + bool use_real_time_; + bool use_manual_time_; + BigO complexity_; + BigOFunc* complexity_lambda_; + std::vector statistics_; + std::vector thread_counts_; + + Benchmark& operator=(Benchmark const&); +}; + +} // namespace internal + +// Create and register a benchmark with the specified 'name' that invokes +// the specified functor 'fn'. +// +// RETURNS: A pointer to the registered benchmark. +internal::Benchmark* RegisterBenchmark(const char* name, + internal::Function* fn); + +#if defined(BENCHMARK_HAS_CXX11) +template +internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn); +#endif + +// Remove all registered benchmarks. All pointers to previously registered +// benchmarks are invalidated. +void ClearRegisteredBenchmarks(); + +namespace internal { +// The class used to hold all Benchmarks created from static function. +// (ie those created using the BENCHMARK(...) macros. +class FunctionBenchmark : public Benchmark { + public: + FunctionBenchmark(const char* name, Function* func) + : Benchmark(name), func_(func) {} + + virtual void Run(State& st) BENCHMARK_OVERRIDE; + + private: + Function* func_; +}; + +#ifdef BENCHMARK_HAS_CXX11 +template +class LambdaBenchmark : public Benchmark { + public: + virtual void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); } + + private: + template + LambdaBenchmark(const char* name, OLambda&& lam) + : Benchmark(name), lambda_(std::forward(lam)) {} + + LambdaBenchmark(LambdaBenchmark const&) = delete; + + private: + template + friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&); + + Lambda lambda_; +}; +#endif + +} // namespace internal + +inline internal::Benchmark* RegisterBenchmark(const char* name, + internal::Function* fn) { + return internal::RegisterBenchmarkInternal( + ::new internal::FunctionBenchmark(name, fn)); +} + +#ifdef BENCHMARK_HAS_CXX11 +template +internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) { + using BenchType = + internal::LambdaBenchmark::type>; + return internal::RegisterBenchmarkInternal( + ::new BenchType(name, std::forward(fn))); +} +#endif + +#if defined(BENCHMARK_HAS_CXX11) && \ + (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409) +template +internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn, + Args&&... args) { + return benchmark::RegisterBenchmark( + name, [=](benchmark::State& st) { fn(st, args...); }); +} +#else +#define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK +#endif + +// The base class for all fixture tests. +class Fixture : public internal::Benchmark { + public: + Fixture() : internal::Benchmark("") {} + + virtual void Run(State& st) BENCHMARK_OVERRIDE { + this->SetUp(st); + this->BenchmarkCase(st); + this->TearDown(st); + } + + // These will be deprecated ... + virtual void SetUp(const State&) {} + virtual void TearDown(const State&) {} + // ... In favor of these. + virtual void SetUp(State& st) { SetUp(const_cast(st)); } + virtual void TearDown(State& st) { TearDown(const_cast(st)); } + + protected: + virtual void BenchmarkCase(State&) = 0; +}; + +} // namespace benchmark + +// ------------------------------------------------------ +// Macro to register benchmarks + +// Check that __COUNTER__ is defined and that __COUNTER__ increases by 1 +// every time it is expanded. X + 1 == X + 0 is used in case X is defined to be +// empty. If X is empty the expression becomes (+1 == +0). +#if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0) +#define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__ +#else +#define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__ +#endif + +// Helpers for generating unique variable names +#define BENCHMARK_PRIVATE_NAME(n) \ + BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, n) +#define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c) +#define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c +// Helper for concatenation with macro name expansion +#define BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method) \ + BaseClass##_##Method##_Benchmark + +#define BENCHMARK_PRIVATE_DECLARE(n) \ + static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \ + BENCHMARK_UNUSED + +#define BENCHMARK(n) \ + BENCHMARK_PRIVATE_DECLARE(n) = \ + (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark(#n, n))) + +// Old-style macros +#define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a)) +#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)}) +#define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t)) +#define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi)) +#define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \ + BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}}) + +#ifdef BENCHMARK_HAS_CXX11 + +// Register a benchmark which invokes the function specified by `func` +// with the additional arguments specified by `...`. +// +// For example: +// +// template ` +// void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) { +// [...] +//} +// /* Registers a benchmark named "BM_takes_args/int_string_test` */ +// BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); +#define BENCHMARK_CAPTURE(func, test_case_name, ...) \ + BENCHMARK_PRIVATE_DECLARE(func) = \ + (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark( \ + #func "/" #test_case_name, \ + [](::benchmark::State& st) { func(st, __VA_ARGS__); }))) + +#endif // BENCHMARK_HAS_CXX11 + +// This will register a benchmark for a templatized function. For example: +// +// template +// void BM_Foo(int iters); +// +// BENCHMARK_TEMPLATE(BM_Foo, 1); +// +// will register BM_Foo<1> as a benchmark. +#define BENCHMARK_TEMPLATE1(n, a) \ + BENCHMARK_PRIVATE_DECLARE(n) = \ + (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n))) + +#define BENCHMARK_TEMPLATE2(n, a, b) \ + BENCHMARK_PRIVATE_DECLARE(n) = \ + (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark(#n "<" #a "," #b ">", \ + n))) + +#ifdef BENCHMARK_HAS_CXX11 +#define BENCHMARK_TEMPLATE(n, ...) \ + BENCHMARK_PRIVATE_DECLARE(n) = \ + (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark( \ + #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>))) +#else +#define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a) +#endif + +#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ + class BaseClass##_##Method##_Benchmark : public BaseClass { \ + public: \ + BaseClass##_##Method##_Benchmark() : BaseClass() { \ + this->SetName(#BaseClass "/" #Method); \ + } \ + \ + protected: \ + virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ + }; + +#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ + class BaseClass##_##Method##_Benchmark : public BaseClass { \ + public: \ + BaseClass##_##Method##_Benchmark() : BaseClass() { \ + this->SetName(#BaseClass "<" #a ">/" #Method); \ + } \ + \ + protected: \ + virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ + }; + +#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ + class BaseClass##_##Method##_Benchmark : public BaseClass { \ + public: \ + BaseClass##_##Method##_Benchmark() : BaseClass() { \ + this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \ + } \ + \ + protected: \ + virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ + }; + +#ifdef BENCHMARK_HAS_CXX11 +#define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...) \ + class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \ + public: \ + BaseClass##_##Method##_Benchmark() : BaseClass<__VA_ARGS__>() { \ + this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); \ + } \ + \ + protected: \ + virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ + }; +#else +#define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \ + BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(n, a) +#endif + +#define BENCHMARK_DEFINE_F(BaseClass, Method) \ + BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ + void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase + +#define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) \ + BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ + void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase + +#define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b) \ + BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ + void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase + +#ifdef BENCHMARK_HAS_CXX11 +#define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...) \ + BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \ + void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase +#else +#define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, a) \ + BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) +#endif + +#define BENCHMARK_REGISTER_F(BaseClass, Method) \ + BENCHMARK_PRIVATE_REGISTER_F(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)) + +#define BENCHMARK_PRIVATE_REGISTER_F(TestName) \ + BENCHMARK_PRIVATE_DECLARE(TestName) = \ + (::benchmark::internal::RegisterBenchmarkInternal(new TestName())) + +// This macro will define and register a benchmark within a fixture class. +#define BENCHMARK_F(BaseClass, Method) \ + BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ + BENCHMARK_REGISTER_F(BaseClass, Method); \ + void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase + +#define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) \ + BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ + BENCHMARK_REGISTER_F(BaseClass, Method); \ + void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase + +#define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b) \ + BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ + BENCHMARK_REGISTER_F(BaseClass, Method); \ + void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase + +#ifdef BENCHMARK_HAS_CXX11 +#define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...) \ + BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \ + BENCHMARK_REGISTER_F(BaseClass, Method); \ + void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase +#else +#define BENCHMARK_TEMPLATE_F(BaseClass, Method, a) \ + BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) +#endif + +// Helper macro to create a main routine in a test that runs the benchmarks +#define BENCHMARK_MAIN() \ + int main(int argc, char** argv) { \ + ::benchmark::Initialize(&argc, argv); \ + if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \ + ::benchmark::RunSpecifiedBenchmarks(); \ + ::benchmark::Shutdown(); \ + return 0; \ + } \ + int main(int, char**) + +// ------------------------------------------------------ +// Benchmark Reporters + +namespace benchmark { + +struct CPUInfo { + struct CacheInfo { + std::string type; + int level; + int size; + int num_sharing; + }; + + enum Scaling { + UNKNOWN, + ENABLED, + DISABLED + }; + + int num_cpus; + Scaling scaling; + double cycles_per_second; + std::vector caches; + std::vector load_avg; + + static const CPUInfo& Get(); + + private: + CPUInfo(); + BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo); +}; + +// Adding Struct for System Information +struct SystemInfo { + std::string name; + static const SystemInfo& Get(); + + private: + SystemInfo(); + BENCHMARK_DISALLOW_COPY_AND_ASSIGN(SystemInfo); +}; + +// BenchmarkName contains the components of the Benchmark's name +// which allows individual fields to be modified or cleared before +// building the final name using 'str()'. +struct BenchmarkName { + std::string function_name; + std::string args; + std::string min_time; + std::string iterations; + std::string repetitions; + std::string time_type; + std::string threads; + + // Return the full name of the benchmark with each non-empty + // field separated by a '/' + std::string str() const; +}; + +// Interface for custom benchmark result printers. +// By default, benchmark reports are printed to stdout. However an application +// can control the destination of the reports by calling +// RunSpecifiedBenchmarks and passing it a custom reporter object. +// The reporter object must implement the following interface. +class BenchmarkReporter { + public: + struct Context { + CPUInfo const& cpu_info; + SystemInfo const& sys_info; + // The number of chars in the longest benchmark name. + size_t name_field_width; + static const char* executable_name; + Context(); + }; + + struct Run { + static const int64_t no_repetition_index = -1; + enum RunType { RT_Iteration, RT_Aggregate }; + + Run() + : run_type(RT_Iteration), + error_occurred(false), + iterations(1), + threads(1), + time_unit(kNanosecond), + real_accumulated_time(0), + cpu_accumulated_time(0), + max_heapbytes_used(0), + complexity(oNone), + complexity_lambda(), + complexity_n(0), + report_big_o(false), + report_rms(false), + counters(), + has_memory_result(false), + allocs_per_iter(0.0), + max_bytes_used(0) {} + + std::string benchmark_name() const; + BenchmarkName run_name; + int64_t family_index; + int64_t per_family_instance_index; + RunType run_type; + std::string aggregate_name; + std::string report_label; // Empty if not set by benchmark. + bool error_occurred; + std::string error_message; + + IterationCount iterations; + int64_t threads; + int64_t repetition_index; + int64_t repetitions; + TimeUnit time_unit; + double real_accumulated_time; + double cpu_accumulated_time; + + // Return a value representing the real time per iteration in the unit + // specified by 'time_unit'. + // NOTE: If 'iterations' is zero the returned value represents the + // accumulated time. + double GetAdjustedRealTime() const; + + // Return a value representing the cpu time per iteration in the unit + // specified by 'time_unit'. + // NOTE: If 'iterations' is zero the returned value represents the + // accumulated time. + double GetAdjustedCPUTime() const; + + // This is set to 0.0 if memory tracing is not enabled. + double max_heapbytes_used; + + // Keep track of arguments to compute asymptotic complexity + BigO complexity; + BigOFunc* complexity_lambda; + int64_t complexity_n; + + // what statistics to compute from the measurements + const std::vector* statistics; + + // Inform print function whether the current run is a complexity report + bool report_big_o; + bool report_rms; + + UserCounters counters; + + // Memory metrics. + bool has_memory_result; + double allocs_per_iter; + int64_t max_bytes_used; + }; + + struct PerFamilyRunReports { + PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {} + + // How many runs will all instances of this benchmark perform? + int num_runs_total; + + // How many runs have happened already? + int num_runs_done; + + // The reports about (non-errneous!) runs of this family. + std::vector Runs; + }; + + // Construct a BenchmarkReporter with the output stream set to 'std::cout' + // and the error stream set to 'std::cerr' + BenchmarkReporter(); + + // Called once for every suite of benchmarks run. + // The parameter "context" contains information that the + // reporter may wish to use when generating its report, for example the + // platform under which the benchmarks are running. The benchmark run is + // never started if this function returns false, allowing the reporter + // to skip runs based on the context information. + virtual bool ReportContext(const Context& context) = 0; + + // Called once for each group of benchmark runs, gives information about + // cpu-time and heap memory usage during the benchmark run. If the group + // of runs contained more than two entries then 'report' contains additional + // elements representing the mean and standard deviation of those runs. + // Additionally if this group of runs was the last in a family of benchmarks + // 'reports' contains additional entries representing the asymptotic + // complexity and RMS of that benchmark family. + virtual void ReportRuns(const std::vector& report) = 0; + + // Called once and only once after ever group of benchmarks is run and + // reported. + virtual void Finalize() {} + + // REQUIRES: The object referenced by 'out' is valid for the lifetime + // of the reporter. + void SetOutputStream(std::ostream* out) { + assert(out); + output_stream_ = out; + } + + // REQUIRES: The object referenced by 'err' is valid for the lifetime + // of the reporter. + void SetErrorStream(std::ostream* err) { + assert(err); + error_stream_ = err; + } + + std::ostream& GetOutputStream() const { return *output_stream_; } + + std::ostream& GetErrorStream() const { return *error_stream_; } + + virtual ~BenchmarkReporter(); + + // Write a human readable string to 'out' representing the specified + // 'context'. + // REQUIRES: 'out' is non-null. + static void PrintBasicContext(std::ostream* out, Context const& context); + + private: + std::ostream* output_stream_; + std::ostream* error_stream_; +}; + +// Simple reporter that outputs benchmark data to the console. This is the +// default reporter used by RunSpecifiedBenchmarks(). +class ConsoleReporter : public BenchmarkReporter { + public: + enum OutputOptions { + OO_None = 0, + OO_Color = 1, + OO_Tabular = 2, + OO_ColorTabular = OO_Color | OO_Tabular, + OO_Defaults = OO_ColorTabular + }; + explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults) + : output_options_(opts_), + name_field_width_(0), + prev_counters_(), + printed_header_(false) {} + + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; + virtual void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; + + protected: + virtual void PrintRunData(const Run& report); + virtual void PrintHeader(const Run& report); + + OutputOptions output_options_; + size_t name_field_width_; + UserCounters prev_counters_; + bool printed_header_; +}; + +class JSONReporter : public BenchmarkReporter { + public: + JSONReporter() : first_report_(true) {} + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; + virtual void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; + virtual void Finalize() BENCHMARK_OVERRIDE; + + private: + void PrintRunData(const Run& report); + + bool first_report_; +}; + +class BENCHMARK_DEPRECATED_MSG( + "The CSV Reporter will be removed in a future release") CSVReporter + : public BenchmarkReporter { + public: + CSVReporter() : printed_header_(false) {} + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; + virtual void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; + + private: + void PrintRunData(const Run& report); + + bool printed_header_; + std::set user_counter_names_; +}; + +// If a MemoryManager is registered, it can be used to collect and report +// allocation metrics for a run of the benchmark. +class MemoryManager { + public: + struct Result { + Result() : num_allocs(0), max_bytes_used(0) {} + + // The number of allocations made in total between Start and Stop. + int64_t num_allocs; + + // The peak memory use between Start and Stop. + int64_t max_bytes_used; + }; + + virtual ~MemoryManager() {} + + // Implement this to start recording allocation information. + virtual void Start() = 0; + + // Implement this to stop recording and fill out the given Result structure. + virtual void Stop(Result* result) = 0; +}; + +inline const char* GetTimeUnitString(TimeUnit unit) { + switch (unit) { + case kSecond: + return "s"; + case kMillisecond: + return "ms"; + case kMicrosecond: + return "us"; + case kNanosecond: + return "ns"; + } + BENCHMARK_UNREACHABLE(); +} + +inline double GetTimeUnitMultiplier(TimeUnit unit) { + switch (unit) { + case kSecond: + return 1; + case kMillisecond: + return 1e3; + case kMicrosecond: + return 1e6; + case kNanosecond: + return 1e9; + } + BENCHMARK_UNREACHABLE(); +} + +} // namespace benchmark + +#endif // BENCHMARK_BENCHMARK_H_ diff --git a/libcxx/utils/google-benchmark/requirements.txt b/libcxx/utils/google-benchmark/requirements.txt new file mode 100644 index 000000000000..85e898604068 --- /dev/null +++ b/libcxx/utils/google-benchmark/requirements.txt @@ -0,0 +1,2 @@ +numpy == 1.19.4 +scipy == 1.5.4 diff --git a/libcxx/utils/google-benchmark/setup.py b/libcxx/utils/google-benchmark/setup.py new file mode 100644 index 000000000000..5cdab10cf77c --- /dev/null +++ b/libcxx/utils/google-benchmark/setup.py @@ -0,0 +1,140 @@ +import os +import posixpath +import re +import shutil +import sys + +from distutils import sysconfig +import setuptools +from setuptools.command import build_ext + + +HERE = os.path.dirname(os.path.abspath(__file__)) + + +IS_WINDOWS = sys.platform.startswith("win") + + +def _get_version(): + """Parse the version string from __init__.py.""" + with open( + os.path.join(HERE, "bindings", "python", "google_benchmark", "__init__.py") + ) as init_file: + try: + version_line = next( + line for line in init_file if line.startswith("__version__") + ) + except StopIteration: + raise ValueError("__version__ not defined in __init__.py") + else: + namespace = {} + exec(version_line, namespace) # pylint: disable=exec-used + return namespace["__version__"] + + +def _parse_requirements(path): + with open(os.path.join(HERE, path)) as requirements: + return [ + line.rstrip() + for line in requirements + if not (line.isspace() or line.startswith("#")) + ] + + +class BazelExtension(setuptools.Extension): + """A C/C++ extension that is defined as a Bazel BUILD target.""" + + def __init__(self, name, bazel_target): + self.bazel_target = bazel_target + self.relpath, self.target_name = posixpath.relpath(bazel_target, "//").split( + ":" + ) + setuptools.Extension.__init__(self, name, sources=[]) + + +class BuildBazelExtension(build_ext.build_ext): + """A command that runs Bazel to build a C/C++ extension.""" + + def run(self): + for ext in self.extensions: + self.bazel_build(ext) + build_ext.build_ext.run(self) + + def bazel_build(self, ext): + """Runs the bazel build to create the package.""" + with open("WORKSPACE", "r") as workspace: + workspace_contents = workspace.read() + + with open("WORKSPACE", "w") as workspace: + workspace.write( + re.sub( + r'(?<=path = ").*(?=", # May be overwritten by setup\.py\.)', + sysconfig.get_python_inc().replace(os.path.sep, posixpath.sep), + workspace_contents, + ) + ) + + if not os.path.exists(self.build_temp): + os.makedirs(self.build_temp) + + bazel_argv = [ + "bazel", + "build", + ext.bazel_target, + "--symlink_prefix=" + os.path.join(self.build_temp, "bazel-"), + "--compilation_mode=" + ("dbg" if self.debug else "opt"), + ] + + if IS_WINDOWS: + # Link with python*.lib. + for library_dir in self.library_dirs: + bazel_argv.append("--linkopt=/LIBPATH:" + library_dir) + + self.spawn(bazel_argv) + + shared_lib_suffix = '.dll' if IS_WINDOWS else '.so' + ext_bazel_bin_path = os.path.join( + self.build_temp, 'bazel-bin', + ext.relpath, ext.target_name + shared_lib_suffix) + + ext_dest_path = self.get_ext_fullpath(ext.name) + ext_dest_dir = os.path.dirname(ext_dest_path) + if not os.path.exists(ext_dest_dir): + os.makedirs(ext_dest_dir) + shutil.copyfile(ext_bazel_bin_path, ext_dest_path) + + +setuptools.setup( + name="google_benchmark", + version=_get_version(), + url="https://github.com/google/benchmark", + description="A library to benchmark code snippets.", + author="Google", + author_email="benchmark-py@google.com", + # Contained modules and scripts. + package_dir={"": "bindings/python"}, + packages=setuptools.find_packages("bindings/python"), + install_requires=_parse_requirements("bindings/python/requirements.txt"), + cmdclass=dict(build_ext=BuildBazelExtension), + ext_modules=[ + BazelExtension( + "google_benchmark._benchmark", + "//bindings/python/google_benchmark:_benchmark", + ) + ], + zip_safe=False, + # PyPI package information. + classifiers=[ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Topic :: Software Development :: Testing", + "Topic :: System :: Benchmark", + ], + license="Apache 2.0", + keywords="benchmark", +) diff --git a/libcxx/utils/google-benchmark/src/CMakeLists.txt b/libcxx/utils/google-benchmark/src/CMakeLists.txt new file mode 100644 index 000000000000..a6c8e9a7a0b7 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/CMakeLists.txt @@ -0,0 +1,120 @@ +# Allow the source files to find headers in src/ +include(GNUInstallDirs) +include_directories(${PROJECT_SOURCE_DIR}/src) + +if (DEFINED BENCHMARK_CXX_LINKER_FLAGS) + list(APPEND CMAKE_SHARED_LINKER_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}) + list(APPEND CMAKE_MODULE_LINKER_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}) +endif() + +file(GLOB + SOURCE_FILES + *.cc + ${PROJECT_SOURCE_DIR}/include/benchmark/*.h + ${CMAKE_CURRENT_SOURCE_DIR}/*.h) +file(GLOB BENCHMARK_MAIN "benchmark_main.cc") +foreach(item ${BENCHMARK_MAIN}) + list(REMOVE_ITEM SOURCE_FILES "${item}") +endforeach() + +add_library(benchmark ${SOURCE_FILES}) +add_library(benchmark::benchmark ALIAS benchmark) +set_target_properties(benchmark PROPERTIES + OUTPUT_NAME "benchmark" + VERSION ${GENERIC_LIB_VERSION} + SOVERSION ${GENERIC_LIB_SOVERSION} +) +target_include_directories(benchmark PUBLIC + $ + ) + +# libpfm, if available +if (HAVE_LIBPFM) + target_link_libraries(benchmark libpfm.a) + add_definitions(-DHAVE_LIBPFM) +endif() + +# Link threads. +target_link_libraries(benchmark ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) +find_library(LIBRT rt) +if(LIBRT) + target_link_libraries(benchmark ${LIBRT}) +endif() + +if(CMAKE_BUILD_TYPE) + string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER) +endif() +if(NOT CMAKE_THREAD_LIBS_INIT AND "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}" MATCHES ".*-fsanitize=[^ ]*address.*") + message(WARNING "CMake's FindThreads.cmake did not fail, but CMAKE_THREAD_LIBS_INIT ended up being empty. This was fixed in https://github.com/Kitware/CMake/commit/d53317130e84898c5328c237186dbd995aaf1c12 Let's guess that -pthread is sufficient.") + target_link_libraries(benchmark -pthread) +endif() + +# We need extra libraries on Windows +if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") + target_link_libraries(benchmark shlwapi) +endif() + +# We need extra libraries on Solaris +if(${CMAKE_SYSTEM_NAME} MATCHES "SunOS") + target_link_libraries(benchmark kstat) +endif() + +# Benchmark main library +add_library(benchmark_main "benchmark_main.cc") +add_library(benchmark::benchmark_main ALIAS benchmark_main) +set_target_properties(benchmark_main PROPERTIES + OUTPUT_NAME "benchmark_main" + VERSION ${GENERIC_LIB_VERSION} + SOVERSION ${GENERIC_LIB_SOVERSION} +) +target_include_directories(benchmark PUBLIC + $ + ) +target_link_libraries(benchmark_main benchmark::benchmark) + + +set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated") + +set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake") +set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake") +set(pkg_config "${generated_dir}/${PROJECT_NAME}.pc") +set(targets_export_name "${PROJECT_NAME}Targets") + +set(namespace "${PROJECT_NAME}::") + +include(CMakePackageConfigHelpers) +write_basic_package_version_file( + "${version_config}" VERSION ${GENERIC_LIB_VERSION} COMPATIBILITY SameMajorVersion +) + +configure_file("${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in" "${project_config}" @ONLY) +configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark.pc.in" "${pkg_config}" @ONLY) + +if (BENCHMARK_ENABLE_INSTALL) + # Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable) + install( + TARGETS benchmark benchmark_main + EXPORT ${targets_export_name} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + + install( + DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark" + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + FILES_MATCHING PATTERN "*.*h") + + install( + FILES "${project_config}" "${version_config}" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") + + install( + FILES "${pkg_config}" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig") + + install( + EXPORT "${targets_export_name}" + NAMESPACE "${namespace}" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") +endif() diff --git a/libcxx/utils/google-benchmark/src/arraysize.h b/libcxx/utils/google-benchmark/src/arraysize.h new file mode 100644 index 000000000000..51a50f2dff27 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/arraysize.h @@ -0,0 +1,33 @@ +#ifndef BENCHMARK_ARRAYSIZE_H_ +#define BENCHMARK_ARRAYSIZE_H_ + +#include "internal_macros.h" + +namespace benchmark { +namespace internal { +// The arraysize(arr) macro returns the # of elements in an array arr. +// The expression is a compile-time constant, and therefore can be +// used in defining new arrays, for example. If you use arraysize on +// a pointer by mistake, you will get a compile-time error. +// + +// This template function declaration is used in defining arraysize. +// Note that the function doesn't need an implementation, as we only +// use its type. +template +char (&ArraySizeHelper(T (&array)[N]))[N]; + +// That gcc wants both of these prototypes seems mysterious. VC, for +// its part, can't decide which to use (another mystery). Matching of +// template overloads: the final frontier. +#ifndef COMPILER_MSVC +template +char (&ArraySizeHelper(const T (&array)[N]))[N]; +#endif + +#define arraysize(array) (sizeof(::benchmark::internal::ArraySizeHelper(array))) + +} // end namespace internal +} // end namespace benchmark + +#endif // BENCHMARK_ARRAYSIZE_H_ diff --git a/libcxx/utils/google-benchmark/src/benchmark.cc b/libcxx/utils/google-benchmark/src/benchmark.cc new file mode 100644 index 000000000000..89f64967bf18 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/benchmark.cc @@ -0,0 +1,617 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark/benchmark.h" + +#include "benchmark_api_internal.h" +#include "benchmark_runner.h" +#include "internal_macros.h" + +#ifndef BENCHMARK_OS_WINDOWS +#ifndef BENCHMARK_OS_FUCHSIA +#include +#endif +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "check.h" +#include "colorprint.h" +#include "commandlineflags.h" +#include "complexity.h" +#include "counter.h" +#include "internal_macros.h" +#include "log.h" +#include "mutex.h" +#include "perf_counters.h" +#include "re.h" +#include "statistics.h" +#include "string_util.h" +#include "thread_manager.h" +#include "thread_timer.h" + +// Print a list of benchmarks. This option overrides all other options. +DEFINE_bool(benchmark_list_tests, false); + +// A regular expression that specifies the set of benchmarks to execute. If +// this flag is empty, or if this flag is the string \"all\", all benchmarks +// linked into the binary are run. +DEFINE_string(benchmark_filter, "."); + +// Minimum number of seconds we should run benchmark before results are +// considered significant. For cpu-time based tests, this is the lower bound +// on the total cpu time used by all threads that make up the test. For +// real-time based tests, this is the lower bound on the elapsed time of the +// benchmark execution, regardless of number of threads. +DEFINE_double(benchmark_min_time, 0.5); + +// The number of runs of each benchmark. If greater than 1, the mean and +// standard deviation of the runs will be reported. +DEFINE_int32(benchmark_repetitions, 1); + +// If set, enable random interleaving of repetitions of all benchmarks. +// See http://github.com/google/benchmark/issues/1051 for details. +DEFINE_bool(benchmark_enable_random_interleaving, false); + +// Report the result of each benchmark repetitions. When 'true' is specified +// only the mean, standard deviation, and other statistics are reported for +// repeated benchmarks. Affects all reporters. +DEFINE_bool(benchmark_report_aggregates_only, false); + +// Display the result of each benchmark repetitions. When 'true' is specified +// only the mean, standard deviation, and other statistics are displayed for +// repeated benchmarks. Unlike benchmark_report_aggregates_only, only affects +// the display reporter, but *NOT* file reporter, which will still contain +// all the output. +DEFINE_bool(benchmark_display_aggregates_only, false); + +// The format to use for console output. +// Valid values are 'console', 'json', or 'csv'. +DEFINE_string(benchmark_format, "console"); + +// The format to use for file output. +// Valid values are 'console', 'json', or 'csv'. +DEFINE_string(benchmark_out_format, "json"); + +// The file to write additional output to. +DEFINE_string(benchmark_out, ""); + +// Whether to use colors in the output. Valid values: +// 'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use colors if +// the output is being sent to a terminal and the TERM environment variable is +// set to a terminal type that supports colors. +DEFINE_string(benchmark_color, "auto"); + +// Whether to use tabular format when printing user counters to the console. +// Valid values: 'true'/'yes'/1, 'false'/'no'/0. Defaults to false. +DEFINE_bool(benchmark_counters_tabular, false); + +// The level of verbose logging to output +DEFINE_int32(v, 0); + +// List of additional perf counters to collect, in libpfm format. For more +// information about libpfm: https://man7.org/linux/man-pages/man3/libpfm.3.html +DEFINE_string(benchmark_perf_counters, ""); + +namespace benchmark { +namespace internal { + +// Extra context to include in the output formatted as comma-separated key-value +// pairs. Kept internal as it's only used for parsing from env/command line. +DEFINE_kvpairs(benchmark_context, {}); + +std::map* global_context = nullptr; + +// FIXME: wouldn't LTO mess this up? +void UseCharPointer(char const volatile*) {} + +} // namespace internal + +State::State(IterationCount max_iters, const std::vector& ranges, + int thread_i, int n_threads, internal::ThreadTimer* timer, + internal::ThreadManager* manager, + internal::PerfCountersMeasurement* perf_counters_measurement) + : total_iterations_(0), + batch_leftover_(0), + max_iterations(max_iters), + started_(false), + finished_(false), + error_occurred_(false), + range_(ranges), + complexity_n_(0), + counters(), + thread_index(thread_i), + threads(n_threads), + timer_(timer), + manager_(manager), + perf_counters_measurement_(perf_counters_measurement) { + CHECK(max_iterations != 0) << "At least one iteration must be run"; + CHECK_LT(thread_index, threads) << "thread_index must be less than threads"; + + // Note: The use of offsetof below is technically undefined until C++17 + // because State is not a standard layout type. However, all compilers + // currently provide well-defined behavior as an extension (which is + // demonstrated since constexpr evaluation must diagnose all undefined + // behavior). However, GCC and Clang also warn about this use of offsetof, + // which must be suppressed. +#if defined(__INTEL_COMPILER) +#pragma warning push +#pragma warning(disable : 1875) +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Winvalid-offsetof" +#endif + // Offset tests to ensure commonly accessed data is on the first cache line. + const int cache_line_size = 64; + static_assert(offsetof(State, error_occurred_) <= + (cache_line_size - sizeof(error_occurred_)), + ""); +#if defined(__INTEL_COMPILER) +#pragma warning pop +#elif defined(__GNUC__) +#pragma GCC diagnostic pop +#endif +} + +void State::PauseTiming() { + // Add in time accumulated so far + CHECK(started_ && !finished_ && !error_occurred_); + timer_->StopTimer(); + if (perf_counters_measurement_) { + auto measurements = perf_counters_measurement_->StopAndGetMeasurements(); + for (const auto& name_and_measurement : measurements) { + auto name = name_and_measurement.first; + auto measurement = name_and_measurement.second; + CHECK_EQ(counters[name], 0.0); + counters[name] = Counter(measurement, Counter::kAvgIterations); + } + } +} + +void State::ResumeTiming() { + CHECK(started_ && !finished_ && !error_occurred_); + timer_->StartTimer(); + if (perf_counters_measurement_) { + perf_counters_measurement_->Start(); + } +} + +void State::SkipWithError(const char* msg) { + CHECK(msg); + error_occurred_ = true; + { + MutexLock l(manager_->GetBenchmarkMutex()); + if (manager_->results.has_error_ == false) { + manager_->results.error_message_ = msg; + manager_->results.has_error_ = true; + } + } + total_iterations_ = 0; + if (timer_->running()) timer_->StopTimer(); +} + +void State::SetIterationTime(double seconds) { + timer_->SetIterationTime(seconds); +} + +void State::SetLabel(const char* label) { + MutexLock l(manager_->GetBenchmarkMutex()); + manager_->results.report_label_ = label; +} + +void State::StartKeepRunning() { + CHECK(!started_ && !finished_); + started_ = true; + total_iterations_ = error_occurred_ ? 0 : max_iterations; + manager_->StartStopBarrier(); + if (!error_occurred_) ResumeTiming(); +} + +void State::FinishKeepRunning() { + CHECK(started_ && (!finished_ || error_occurred_)); + if (!error_occurred_) { + PauseTiming(); + } + // Total iterations has now wrapped around past 0. Fix this. + total_iterations_ = 0; + finished_ = true; + manager_->StartStopBarrier(); +} + +namespace internal { +namespace { + +// Flushes streams after invoking reporter methods that write to them. This +// ensures users get timely updates even when streams are not line-buffered. +void FlushStreams(BenchmarkReporter* reporter) { + if (!reporter) return; + std::flush(reporter->GetOutputStream()); + std::flush(reporter->GetErrorStream()); +} + +// Reports in both display and file reporters. +void Report(BenchmarkReporter* display_reporter, + BenchmarkReporter* file_reporter, const RunResults& run_results) { + auto report_one = [](BenchmarkReporter* reporter, bool aggregates_only, + const RunResults& results) { + assert(reporter); + // If there are no aggregates, do output non-aggregates. + aggregates_only &= !results.aggregates_only.empty(); + if (!aggregates_only) reporter->ReportRuns(results.non_aggregates); + if (!results.aggregates_only.empty()) + reporter->ReportRuns(results.aggregates_only); + }; + + report_one(display_reporter, run_results.display_report_aggregates_only, + run_results); + if (file_reporter) + report_one(file_reporter, run_results.file_report_aggregates_only, + run_results); + + FlushStreams(display_reporter); + FlushStreams(file_reporter); +} + +void RunBenchmarks(const std::vector& benchmarks, + BenchmarkReporter* display_reporter, + BenchmarkReporter* file_reporter) { + // Note the file_reporter can be null. + CHECK(display_reporter != nullptr); + + // Determine the width of the name field using a minimum width of 10. + bool might_have_aggregates = FLAGS_benchmark_repetitions > 1; + size_t name_field_width = 10; + size_t stat_field_width = 0; + for (const BenchmarkInstance& benchmark : benchmarks) { + name_field_width = + std::max(name_field_width, benchmark.name().str().size()); + might_have_aggregates |= benchmark.repetitions() > 1; + + for (const auto& Stat : benchmark.statistics()) + stat_field_width = std::max(stat_field_width, Stat.name_.size()); + } + if (might_have_aggregates) name_field_width += 1 + stat_field_width; + + // Print header here + BenchmarkReporter::Context context; + context.name_field_width = name_field_width; + + // Keep track of running times of all instances of each benchmark family. + std::map + per_family_reports; + + if (display_reporter->ReportContext(context) && + (!file_reporter || file_reporter->ReportContext(context))) { + FlushStreams(display_reporter); + FlushStreams(file_reporter); + + size_t num_repetitions_total = 0; + + std::vector runners; + runners.reserve(benchmarks.size()); + for (const BenchmarkInstance& benchmark : benchmarks) { + BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr; + if (benchmark.complexity() != oNone) + reports_for_family = &per_family_reports[benchmark.family_index()]; + + runners.emplace_back(benchmark, reports_for_family); + int num_repeats_of_this_instance = runners.back().GetNumRepeats(); + num_repetitions_total += num_repeats_of_this_instance; + if (reports_for_family) + reports_for_family->num_runs_total += num_repeats_of_this_instance; + } + assert(runners.size() == benchmarks.size() && "Unexpected runner count."); + + std::vector repetition_indices; + repetition_indices.reserve(num_repetitions_total); + for (size_t runner_index = 0, num_runners = runners.size(); + runner_index != num_runners; ++runner_index) { + const internal::BenchmarkRunner& runner = runners[runner_index]; + std::fill_n(std::back_inserter(repetition_indices), + runner.GetNumRepeats(), runner_index); + } + assert(repetition_indices.size() == num_repetitions_total && + "Unexpected number of repetition indexes."); + + if (FLAGS_benchmark_enable_random_interleaving) { + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(repetition_indices.begin(), repetition_indices.end(), g); + } + + for (size_t repetition_index : repetition_indices) { + internal::BenchmarkRunner& runner = runners[repetition_index]; + runner.DoOneRepetition(); + if (runner.HasRepeatsRemaining()) continue; + // FIXME: report each repetition separately, not all of them in bulk. + + RunResults run_results = runner.GetResults(); + + // Maybe calculate complexity report + if (const auto* reports_for_family = runner.GetReportsForFamily()) { + if (reports_for_family->num_runs_done == + reports_for_family->num_runs_total) { + auto additional_run_stats = ComputeBigO(reports_for_family->Runs); + run_results.aggregates_only.insert(run_results.aggregates_only.end(), + additional_run_stats.begin(), + additional_run_stats.end()); + per_family_reports.erase( + (int)reports_for_family->Runs.front().family_index); + } + } + + Report(display_reporter, file_reporter, run_results); + } + } + display_reporter->Finalize(); + if (file_reporter) file_reporter->Finalize(); + FlushStreams(display_reporter); + FlushStreams(file_reporter); +} + +// Disable deprecated warnings temporarily because we need to reference +// CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif + +std::unique_ptr CreateReporter( + std::string const& name, ConsoleReporter::OutputOptions output_opts) { + typedef std::unique_ptr PtrType; + if (name == "console") { + return PtrType(new ConsoleReporter(output_opts)); + } else if (name == "json") { + return PtrType(new JSONReporter); + } else if (name == "csv") { + return PtrType(new CSVReporter); + } else { + std::cerr << "Unexpected format: '" << name << "'\n"; + std::exit(1); + } +} + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +} // end namespace + +bool IsZero(double n) { + return std::abs(n) < std::numeric_limits::epsilon(); +} + +ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) { + int output_opts = ConsoleReporter::OO_Defaults; + auto is_benchmark_color = [force_no_color]() -> bool { + if (force_no_color) { + return false; + } + if (FLAGS_benchmark_color == "auto") { + return IsColorTerminal(); + } + return IsTruthyFlagValue(FLAGS_benchmark_color); + }; + if (is_benchmark_color()) { + output_opts |= ConsoleReporter::OO_Color; + } else { + output_opts &= ~ConsoleReporter::OO_Color; + } + if (FLAGS_benchmark_counters_tabular) { + output_opts |= ConsoleReporter::OO_Tabular; + } else { + output_opts &= ~ConsoleReporter::OO_Tabular; + } + return static_cast(output_opts); +} + +} // end namespace internal + +size_t RunSpecifiedBenchmarks() { + return RunSpecifiedBenchmarks(nullptr, nullptr); +} + +size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter) { + return RunSpecifiedBenchmarks(display_reporter, nullptr); +} + +size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, + BenchmarkReporter* file_reporter) { + std::string spec = FLAGS_benchmark_filter; + if (spec.empty() || spec == "all") + spec = "."; // Regexp that matches all benchmarks + + // Setup the reporters + std::ofstream output_file; + std::unique_ptr default_display_reporter; + std::unique_ptr default_file_reporter; + if (!display_reporter) { + default_display_reporter = internal::CreateReporter( + FLAGS_benchmark_format, internal::GetOutputOptions()); + display_reporter = default_display_reporter.get(); + } + auto& Out = display_reporter->GetOutputStream(); + auto& Err = display_reporter->GetErrorStream(); + + std::string const& fname = FLAGS_benchmark_out; + if (fname.empty() && file_reporter) { + Err << "A custom file reporter was provided but " + "--benchmark_out= was not specified." + << std::endl; + std::exit(1); + } + if (!fname.empty()) { + output_file.open(fname); + if (!output_file.is_open()) { + Err << "invalid file name: '" << fname << "'" << std::endl; + std::exit(1); + } + if (!file_reporter) { + default_file_reporter = internal::CreateReporter( + FLAGS_benchmark_out_format, ConsoleReporter::OO_None); + file_reporter = default_file_reporter.get(); + } + file_reporter->SetOutputStream(&output_file); + file_reporter->SetErrorStream(&output_file); + } + + std::vector benchmarks; + if (!FindBenchmarksInternal(spec, &benchmarks, &Err)) return 0; + + if (benchmarks.empty()) { + Err << "Failed to match any benchmarks against regex: " << spec << "\n"; + return 0; + } + + if (FLAGS_benchmark_list_tests) { + for (auto const& benchmark : benchmarks) + Out << benchmark.name().str() << "\n"; + } else { + internal::RunBenchmarks(benchmarks, display_reporter, file_reporter); + } + + return benchmarks.size(); +} + +void RegisterMemoryManager(MemoryManager* manager) { + internal::memory_manager = manager; +} + +void AddCustomContext(const std::string& key, const std::string& value) { + if (internal::global_context == nullptr) { + internal::global_context = new std::map(); + } + if (!internal::global_context->emplace(key, value).second) { + std::cerr << "Failed to add custom context \"" << key << "\" as it already " + << "exists with value \"" << value << "\"\n"; + } +} + +namespace internal { + +void PrintUsageAndExit() { + fprintf(stdout, + "benchmark" + " [--benchmark_list_tests={true|false}]\n" + " [--benchmark_filter=]\n" + " [--benchmark_min_time=]\n" + " [--benchmark_repetitions=]\n" + " [--benchmark_enable_random_interleaving={true|false}]\n" + " [--benchmark_report_aggregates_only={true|false}]\n" + " [--benchmark_display_aggregates_only={true|false}]\n" + " [--benchmark_format=]\n" + " [--benchmark_out=]\n" + " [--benchmark_out_format=]\n" + " [--benchmark_color={auto|true|false}]\n" + " [--benchmark_counters_tabular={true|false}]\n" + " [--benchmark_context==,...]\n" + " [--v=]\n"); + exit(0); +} + +void ParseCommandLineFlags(int* argc, char** argv) { + using namespace benchmark; + BenchmarkReporter::Context::executable_name = + (argc && *argc > 0) ? argv[0] : "unknown"; + for (int i = 1; argc && i < *argc; ++i) { + if (ParseBoolFlag(argv[i], "benchmark_list_tests", + &FLAGS_benchmark_list_tests) || + ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || + ParseDoubleFlag(argv[i], "benchmark_min_time", + &FLAGS_benchmark_min_time) || + ParseInt32Flag(argv[i], "benchmark_repetitions", + &FLAGS_benchmark_repetitions) || + ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving", + &FLAGS_benchmark_enable_random_interleaving) || + ParseBoolFlag(argv[i], "benchmark_report_aggregates_only", + &FLAGS_benchmark_report_aggregates_only) || + ParseBoolFlag(argv[i], "benchmark_display_aggregates_only", + &FLAGS_benchmark_display_aggregates_only) || + ParseStringFlag(argv[i], "benchmark_format", &FLAGS_benchmark_format) || + ParseStringFlag(argv[i], "benchmark_out", &FLAGS_benchmark_out) || + ParseStringFlag(argv[i], "benchmark_out_format", + &FLAGS_benchmark_out_format) || + ParseStringFlag(argv[i], "benchmark_color", &FLAGS_benchmark_color) || + // "color_print" is the deprecated name for "benchmark_color". + // TODO: Remove this. + ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) || + ParseBoolFlag(argv[i], "benchmark_counters_tabular", + &FLAGS_benchmark_counters_tabular) || + ParseStringFlag(argv[i], "benchmark_perf_counters", + &FLAGS_benchmark_perf_counters) || + ParseKeyValueFlag(argv[i], "benchmark_context", + &FLAGS_benchmark_context) || + ParseInt32Flag(argv[i], "v", &FLAGS_v)) { + for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1]; + + --(*argc); + --i; + } else if (IsFlag(argv[i], "help")) { + PrintUsageAndExit(); + } + } + for (auto const* flag : + {&FLAGS_benchmark_format, &FLAGS_benchmark_out_format}) { + if (*flag != "console" && *flag != "json" && *flag != "csv") { + PrintUsageAndExit(); + } + } + if (FLAGS_benchmark_color.empty()) { + PrintUsageAndExit(); + } + for (const auto& kv : FLAGS_benchmark_context) { + AddCustomContext(kv.first, kv.second); + } +} + +int InitializeStreams() { + static std::ios_base::Init init; + return 0; +} + +} // end namespace internal + +void Initialize(int* argc, char** argv) { + internal::ParseCommandLineFlags(argc, argv); + internal::LogLevel() = FLAGS_v; +} + +void Shutdown() { + delete internal::global_context; +} + +bool ReportUnrecognizedArguments(int argc, char** argv) { + for (int i = 1; i < argc; ++i) { + fprintf(stderr, "%s: error: unrecognized command-line flag: %s\n", argv[0], + argv[i]); + } + return argc > 1; +} + +} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/benchmark_api_internal.cc b/libcxx/utils/google-benchmark/src/benchmark_api_internal.cc new file mode 100644 index 000000000000..89da519afc8c --- /dev/null +++ b/libcxx/utils/google-benchmark/src/benchmark_api_internal.cc @@ -0,0 +1,94 @@ +#include "benchmark_api_internal.h" + +#include + +#include "string_util.h" + +namespace benchmark { +namespace internal { + +BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx, + int per_family_instance_idx, + const std::vector& args, + int thread_count) + : benchmark_(*benchmark), + family_index_(family_idx), + per_family_instance_index_(per_family_instance_idx), + aggregation_report_mode_(benchmark_.aggregation_report_mode_), + args_(args), + time_unit_(benchmark_.time_unit_), + measure_process_cpu_time_(benchmark_.measure_process_cpu_time_), + use_real_time_(benchmark_.use_real_time_), + use_manual_time_(benchmark_.use_manual_time_), + complexity_(benchmark_.complexity_), + complexity_lambda_(benchmark_.complexity_lambda_), + statistics_(benchmark_.statistics_), + repetitions_(benchmark_.repetitions_), + min_time_(benchmark_.min_time_), + iterations_(benchmark_.iterations_), + threads_(thread_count) { + name_.function_name = benchmark_.name_; + + size_t arg_i = 0; + for (const auto& arg : args) { + if (!name_.args.empty()) { + name_.args += '/'; + } + + if (arg_i < benchmark->arg_names_.size()) { + const auto& arg_name = benchmark_.arg_names_[arg_i]; + if (!arg_name.empty()) { + name_.args += StrFormat("%s:", arg_name.c_str()); + } + } + + name_.args += StrFormat("%" PRId64, arg); + ++arg_i; + } + + if (!IsZero(benchmark->min_time_)) { + name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_); + } + + if (benchmark_.iterations_ != 0) { + name_.iterations = StrFormat( + "iterations:%lu", static_cast(benchmark_.iterations_)); + } + + if (benchmark_.repetitions_ != 0) { + name_.repetitions = StrFormat("repeats:%d", benchmark_.repetitions_); + } + + if (benchmark_.measure_process_cpu_time_) { + name_.time_type = "process_time"; + } + + if (benchmark_.use_manual_time_) { + if (!name_.time_type.empty()) { + name_.time_type += '/'; + } + name_.time_type += "manual_time"; + } else if (benchmark_.use_real_time_) { + if (!name_.time_type.empty()) { + name_.time_type += '/'; + } + name_.time_type += "real_time"; + } + + if (!benchmark_.thread_counts_.empty()) { + name_.threads = StrFormat("threads:%d", threads_); + } +} + +State BenchmarkInstance::Run( + IterationCount iters, int thread_id, internal::ThreadTimer* timer, + internal::ThreadManager* manager, + internal::PerfCountersMeasurement* perf_counters_measurement) const { + State st(iters, args_, thread_id, threads_, timer, manager, + perf_counters_measurement); + benchmark_.Run(st); + return st; +} + +} // namespace internal +} // namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/benchmark_api_internal.h b/libcxx/utils/google-benchmark/src/benchmark_api_internal.h new file mode 100644 index 000000000000..9296b7d2c816 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/benchmark_api_internal.h @@ -0,0 +1,78 @@ +#ifndef BENCHMARK_API_INTERNAL_H +#define BENCHMARK_API_INTERNAL_H + +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "commandlineflags.h" + +namespace benchmark { +namespace internal { + +// Information kept per benchmark we may want to run +class BenchmarkInstance { + public: + BenchmarkInstance(Benchmark* benchmark, int family_index, + int per_family_instance_index, + const std::vector& args, int threads); + + const BenchmarkName& name() const { return name_; } + int family_index() const { return family_index_; } + int per_family_instance_index() const { return per_family_instance_index_; } + AggregationReportMode aggregation_report_mode() const { + return aggregation_report_mode_; + } + TimeUnit time_unit() const { return time_unit_; } + bool measure_process_cpu_time() const { return measure_process_cpu_time_; } + bool use_real_time() const { return use_real_time_; } + bool use_manual_time() const { return use_manual_time_; } + BigO complexity() const { return complexity_; } + BigOFunc& complexity_lambda() const { return *complexity_lambda_; } + const std::vector& statistics() const { return statistics_; } + int repetitions() const { return repetitions_; } + double min_time() const { return min_time_; } + IterationCount iterations() const { return iterations_; } + int threads() const { return threads_; } + + State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer, + internal::ThreadManager* manager, + internal::PerfCountersMeasurement* perf_counters_measurement) const; + + private: + BenchmarkName name_; + Benchmark& benchmark_; + const int family_index_; + const int per_family_instance_index_; + AggregationReportMode aggregation_report_mode_; + const std::vector& args_; + TimeUnit time_unit_; + bool measure_process_cpu_time_; + bool use_real_time_; + bool use_manual_time_; + BigO complexity_; + BigOFunc* complexity_lambda_; + UserCounters counters_; + const std::vector& statistics_; + int repetitions_; + double min_time_; + IterationCount iterations_; + int threads_; // Number of concurrent threads to us +}; + +bool FindBenchmarksInternal(const std::string& re, + std::vector* benchmarks, + std::ostream* Err); + +bool IsZero(double n); + +ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false); + +} // end namespace internal +} // end namespace benchmark + +#endif // BENCHMARK_API_INTERNAL_H diff --git a/libcxx/utils/google-benchmark/src/benchmark_main.cc b/libcxx/utils/google-benchmark/src/benchmark_main.cc new file mode 100644 index 000000000000..b3b247831496 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/benchmark_main.cc @@ -0,0 +1,17 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark/benchmark.h" + +BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/src/benchmark_name.cc b/libcxx/utils/google-benchmark/src/benchmark_name.cc new file mode 100644 index 000000000000..2a17ebce277f --- /dev/null +++ b/libcxx/utils/google-benchmark/src/benchmark_name.cc @@ -0,0 +1,58 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +namespace benchmark { + +namespace { + +// Compute the total size of a pack of std::strings +size_t size_impl() { return 0; } + +template +size_t size_impl(const Head& head, const Tail&... tail) { + return head.size() + size_impl(tail...); +} + +// Join a pack of std::strings using a delimiter +// TODO: use absl::StrJoin +void join_impl(std::string&, char) {} + +template +void join_impl(std::string& s, const char delimiter, const Head& head, + const Tail&... tail) { + if (!s.empty() && !head.empty()) { + s += delimiter; + } + + s += head; + + join_impl(s, delimiter, tail...); +} + +template +std::string join(char delimiter, const Ts&... ts) { + std::string s; + s.reserve(sizeof...(Ts) + size_impl(ts...)); + join_impl(s, delimiter, ts...); + return s; +} +} // namespace + +std::string BenchmarkName::str() const { + return join('/', function_name, args, min_time, iterations, repetitions, + time_type, threads); +} +} // namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/benchmark_register.cc b/libcxx/utils/google-benchmark/src/benchmark_register.cc new file mode 100644 index 000000000000..574462220e7c --- /dev/null +++ b/libcxx/utils/google-benchmark/src/benchmark_register.cc @@ -0,0 +1,461 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark_register.h" + +#ifndef BENCHMARK_OS_WINDOWS +#ifndef BENCHMARK_OS_FUCHSIA +#include +#endif +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "benchmark_api_internal.h" +#include "check.h" +#include "commandlineflags.h" +#include "complexity.h" +#include "internal_macros.h" +#include "log.h" +#include "mutex.h" +#include "re.h" +#include "statistics.h" +#include "string_util.h" +#include "timers.h" + +namespace benchmark { + +namespace { +// For non-dense Range, intermediate values are powers of kRangeMultiplier. +static const int kRangeMultiplier = 8; +// The size of a benchmark family determines is the number of inputs to repeat +// the benchmark on. If this is "large" then warn the user during configuration. +static const size_t kMaxFamilySize = 100; +} // end namespace + +namespace internal { + +//=============================================================================// +// BenchmarkFamilies +//=============================================================================// + +// Class for managing registered benchmarks. Note that each registered +// benchmark identifies a family of related benchmarks to run. +class BenchmarkFamilies { + public: + static BenchmarkFamilies* GetInstance(); + + // Registers a benchmark family and returns the index assigned to it. + size_t AddBenchmark(std::unique_ptr family); + + // Clear all registered benchmark families. + void ClearBenchmarks(); + + // Extract the list of benchmark instances that match the specified + // regular expression. + bool FindBenchmarks(std::string re, + std::vector* benchmarks, + std::ostream* Err); + + private: + BenchmarkFamilies() {} + + std::vector> families_; + Mutex mutex_; +}; + +BenchmarkFamilies* BenchmarkFamilies::GetInstance() { + static BenchmarkFamilies instance; + return &instance; +} + +size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr family) { + MutexLock l(mutex_); + size_t index = families_.size(); + families_.push_back(std::move(family)); + return index; +} + +void BenchmarkFamilies::ClearBenchmarks() { + MutexLock l(mutex_); + families_.clear(); + families_.shrink_to_fit(); +} + +bool BenchmarkFamilies::FindBenchmarks( + std::string spec, std::vector* benchmarks, + std::ostream* ErrStream) { + CHECK(ErrStream); + auto& Err = *ErrStream; + // Make regular expression out of command-line flag + std::string error_msg; + Regex re; + bool isNegativeFilter = false; + if (spec[0] == '-') { + spec.replace(0, 1, ""); + isNegativeFilter = true; + } + if (!re.Init(spec, &error_msg)) { + Err << "Could not compile benchmark re: " << error_msg << std::endl; + return false; + } + + // Special list of thread counts to use when none are specified + const std::vector one_thread = {1}; + + int next_family_index = 0; + + MutexLock l(mutex_); + for (std::unique_ptr& family : families_) { + int family_index = next_family_index; + int per_family_instance_index = 0; + + // Family was deleted or benchmark doesn't match + if (!family) continue; + + if (family->ArgsCnt() == -1) { + family->Args({}); + } + const std::vector* thread_counts = + (family->thread_counts_.empty() + ? &one_thread + : &static_cast&>(family->thread_counts_)); + const size_t family_size = family->args_.size() * thread_counts->size(); + // The benchmark will be run at least 'family_size' different inputs. + // If 'family_size' is very large warn the user. + if (family_size > kMaxFamilySize) { + Err << "The number of inputs is very large. " << family->name_ + << " will be repeated at least " << family_size << " times.\n"; + } + // reserve in the special case the regex ".", since we know the final + // family size. + if (spec == ".") benchmarks->reserve(benchmarks->size() + family_size); + + for (auto const& args : family->args_) { + for (int num_threads : *thread_counts) { + BenchmarkInstance instance(family.get(), family_index, + per_family_instance_index, args, + num_threads); + + const auto full_name = instance.name().str(); + if ((re.Match(full_name) && !isNegativeFilter) || + (!re.Match(full_name) && isNegativeFilter)) { + benchmarks->push_back(std::move(instance)); + + ++per_family_instance_index; + + // Only bump the next family index once we've estabilished that + // at least one instance of this family will be run. + if (next_family_index == family_index) ++next_family_index; + } + } + } + } + return true; +} + +Benchmark* RegisterBenchmarkInternal(Benchmark* bench) { + std::unique_ptr bench_ptr(bench); + BenchmarkFamilies* families = BenchmarkFamilies::GetInstance(); + families->AddBenchmark(std::move(bench_ptr)); + return bench; +} + +// FIXME: This function is a hack so that benchmark.cc can access +// `BenchmarkFamilies` +bool FindBenchmarksInternal(const std::string& re, + std::vector* benchmarks, + std::ostream* Err) { + return BenchmarkFamilies::GetInstance()->FindBenchmarks(re, benchmarks, Err); +} + +//=============================================================================// +// Benchmark +//=============================================================================// + +Benchmark::Benchmark(const char* name) + : name_(name), + aggregation_report_mode_(ARM_Unspecified), + time_unit_(kNanosecond), + range_multiplier_(kRangeMultiplier), + min_time_(0), + iterations_(0), + repetitions_(0), + measure_process_cpu_time_(false), + use_real_time_(false), + use_manual_time_(false), + complexity_(oNone), + complexity_lambda_(nullptr) { + ComputeStatistics("mean", StatisticsMean); + ComputeStatistics("median", StatisticsMedian); + ComputeStatistics("stddev", StatisticsStdDev); +} + +Benchmark::~Benchmark() {} + +Benchmark* Benchmark::Name(const std::string& name) { + SetName(name.c_str()); + return this; +} + +Benchmark* Benchmark::Arg(int64_t x) { + CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); + args_.push_back({x}); + return this; +} + +Benchmark* Benchmark::Unit(TimeUnit unit) { + time_unit_ = unit; + return this; +} + +Benchmark* Benchmark::Range(int64_t start, int64_t limit) { + CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); + std::vector arglist; + AddRange(&arglist, start, limit, range_multiplier_); + + for (int64_t i : arglist) { + args_.push_back({i}); + } + return this; +} + +Benchmark* Benchmark::Ranges( + const std::vector>& ranges) { + CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(ranges.size())); + std::vector> arglists(ranges.size()); + for (std::size_t i = 0; i < ranges.size(); i++) { + AddRange(&arglists[i], ranges[i].first, ranges[i].second, + range_multiplier_); + } + + ArgsProduct(arglists); + + return this; +} + +Benchmark* Benchmark::ArgsProduct( + const std::vector>& arglists) { + CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(arglists.size())); + + std::vector indices(arglists.size()); + const std::size_t total = std::accumulate( + std::begin(arglists), std::end(arglists), std::size_t{1}, + [](const std::size_t res, const std::vector& arglist) { + return res * arglist.size(); + }); + std::vector args; + args.reserve(arglists.size()); + for (std::size_t i = 0; i < total; i++) { + for (std::size_t arg = 0; arg < arglists.size(); arg++) { + args.push_back(arglists[arg][indices[arg]]); + } + args_.push_back(args); + args.clear(); + + std::size_t arg = 0; + do { + indices[arg] = (indices[arg] + 1) % arglists[arg].size(); + } while (indices[arg++] == 0 && arg < arglists.size()); + } + + return this; +} + +Benchmark* Benchmark::ArgName(const std::string& name) { + CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); + arg_names_ = {name}; + return this; +} + +Benchmark* Benchmark::ArgNames(const std::vector& names) { + CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(names.size())); + arg_names_ = names; + return this; +} + +Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) { + CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); + CHECK_LE(start, limit); + for (int64_t arg = start; arg <= limit; arg += step) { + args_.push_back({arg}); + } + return this; +} + +Benchmark* Benchmark::Args(const std::vector& args) { + CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(args.size())); + args_.push_back(args); + return this; +} + +Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) { + custom_arguments(this); + return this; +} + +Benchmark* Benchmark::RangeMultiplier(int multiplier) { + CHECK(multiplier > 1); + range_multiplier_ = multiplier; + return this; +} + +Benchmark* Benchmark::MinTime(double t) { + CHECK(t > 0.0); + CHECK(iterations_ == 0); + min_time_ = t; + return this; +} + +Benchmark* Benchmark::Iterations(IterationCount n) { + CHECK(n > 0); + CHECK(IsZero(min_time_)); + iterations_ = n; + return this; +} + +Benchmark* Benchmark::Repetitions(int n) { + CHECK(n > 0); + repetitions_ = n; + return this; +} + +Benchmark* Benchmark::ReportAggregatesOnly(bool value) { + aggregation_report_mode_ = value ? ARM_ReportAggregatesOnly : ARM_Default; + return this; +} + +Benchmark* Benchmark::DisplayAggregatesOnly(bool value) { + // If we were called, the report mode is no longer 'unspecified', in any case. + aggregation_report_mode_ = static_cast( + aggregation_report_mode_ | ARM_Default); + + if (value) { + aggregation_report_mode_ = static_cast( + aggregation_report_mode_ | ARM_DisplayReportAggregatesOnly); + } else { + aggregation_report_mode_ = static_cast( + aggregation_report_mode_ & ~ARM_DisplayReportAggregatesOnly); + } + + return this; +} + +Benchmark* Benchmark::MeasureProcessCPUTime() { + // Can be used together with UseRealTime() / UseManualTime(). + measure_process_cpu_time_ = true; + return this; +} + +Benchmark* Benchmark::UseRealTime() { + CHECK(!use_manual_time_) + << "Cannot set UseRealTime and UseManualTime simultaneously."; + use_real_time_ = true; + return this; +} + +Benchmark* Benchmark::UseManualTime() { + CHECK(!use_real_time_) + << "Cannot set UseRealTime and UseManualTime simultaneously."; + use_manual_time_ = true; + return this; +} + +Benchmark* Benchmark::Complexity(BigO complexity) { + complexity_ = complexity; + return this; +} + +Benchmark* Benchmark::Complexity(BigOFunc* complexity) { + complexity_lambda_ = complexity; + complexity_ = oLambda; + return this; +} + +Benchmark* Benchmark::ComputeStatistics(std::string name, + StatisticsFunc* statistics) { + statistics_.emplace_back(name, statistics); + return this; +} + +Benchmark* Benchmark::Threads(int t) { + CHECK_GT(t, 0); + thread_counts_.push_back(t); + return this; +} + +Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) { + CHECK_GT(min_threads, 0); + CHECK_GE(max_threads, min_threads); + + AddRange(&thread_counts_, min_threads, max_threads, 2); + return this; +} + +Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads, + int stride) { + CHECK_GT(min_threads, 0); + CHECK_GE(max_threads, min_threads); + CHECK_GE(stride, 1); + + for (auto i = min_threads; i < max_threads; i += stride) { + thread_counts_.push_back(i); + } + thread_counts_.push_back(max_threads); + return this; +} + +Benchmark* Benchmark::ThreadPerCpu() { + thread_counts_.push_back(CPUInfo::Get().num_cpus); + return this; +} + +void Benchmark::SetName(const char* name) { name_ = name; } + +int Benchmark::ArgsCnt() const { + if (args_.empty()) { + if (arg_names_.empty()) return -1; + return static_cast(arg_names_.size()); + } + return static_cast(args_.front().size()); +} + +//=============================================================================// +// FunctionBenchmark +//=============================================================================// + +void FunctionBenchmark::Run(State& st) { func_(st); } + +} // end namespace internal + +void ClearRegisteredBenchmarks() { + internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks(); +} + +} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/benchmark_register.h b/libcxx/utils/google-benchmark/src/benchmark_register.h new file mode 100644 index 000000000000..09496607f224 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/benchmark_register.h @@ -0,0 +1,108 @@ +#ifndef BENCHMARK_REGISTER_H +#define BENCHMARK_REGISTER_H + +#include +#include + +#include "check.h" + +namespace benchmark { +namespace internal { + +// Append the powers of 'mult' in the closed interval [lo, hi]. +// Returns iterator to the start of the inserted range. +template +typename std::vector::iterator +AddPowers(std::vector* dst, T lo, T hi, int mult) { + CHECK_GE(lo, 0); + CHECK_GE(hi, lo); + CHECK_GE(mult, 2); + + const size_t start_offset = dst->size(); + + static const T kmax = std::numeric_limits::max(); + + // Space out the values in multiples of "mult" + for (T i = static_cast(1); i <= hi; i *= mult) { + if (i >= lo) { + dst->push_back(i); + } + // Break the loop here since multiplying by + // 'mult' would move outside of the range of T + if (i > kmax / mult) break; + } + + return dst->begin() + start_offset; +} + +template +void AddNegatedPowers(std::vector* dst, T lo, T hi, int mult) { + // We negate lo and hi so we require that they cannot be equal to 'min'. + CHECK_GT(lo, std::numeric_limits::min()); + CHECK_GT(hi, std::numeric_limits::min()); + CHECK_GE(hi, lo); + CHECK_LE(hi, 0); + + // Add positive powers, then negate and reverse. + // Casts necessary since small integers get promoted + // to 'int' when negating. + const auto lo_complement = static_cast(-lo); + const auto hi_complement = static_cast(-hi); + + const auto it = AddPowers(dst, hi_complement, lo_complement, mult); + + std::for_each(it, dst->end(), [](T& t) { t *= -1; }); + std::reverse(it, dst->end()); +} + +template +void AddRange(std::vector* dst, T lo, T hi, int mult) { + static_assert(std::is_integral::value && std::is_signed::value, + "Args type must be a signed integer"); + + CHECK_GE(hi, lo); + CHECK_GE(mult, 2); + + // Add "lo" + dst->push_back(lo); + + // Handle lo == hi as a special case, so we then know + // lo < hi and so it is safe to add 1 to lo and subtract 1 + // from hi without falling outside of the range of T. + if (lo == hi) return; + + // Ensure that lo_inner <= hi_inner below. + if (lo + 1 == hi) { + dst->push_back(hi); + return; + } + + // Add all powers of 'mult' in the range [lo+1, hi-1] (inclusive). + const auto lo_inner = static_cast(lo + 1); + const auto hi_inner = static_cast(hi - 1); + + // Insert negative values + if (lo_inner < 0) { + AddNegatedPowers(dst, lo_inner, std::min(hi_inner, T{-1}), mult); + } + + // Treat 0 as a special case (see discussion on #762). + if (lo < 0 && hi >= 0) { + dst->push_back(0); + } + + // Insert positive values + if (hi_inner > 0) { + AddPowers(dst, std::max(lo_inner, T{1}), hi_inner, mult); + } + + // Add "hi" (if different from last value). + if (hi != dst->back()) { + dst->push_back(hi); + } +} + +} // namespace internal +} // namespace benchmark + +#endif // BENCHMARK_REGISTER_H diff --git a/libcxx/utils/google-benchmark/src/benchmark_runner.cc b/libcxx/utils/google-benchmark/src/benchmark_runner.cc new file mode 100644 index 000000000000..6742d42dbecd --- /dev/null +++ b/libcxx/utils/google-benchmark/src/benchmark_runner.cc @@ -0,0 +1,349 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark_runner.h" + +#include "benchmark/benchmark.h" +#include "benchmark_api_internal.h" +#include "internal_macros.h" + +#ifndef BENCHMARK_OS_WINDOWS +#ifndef BENCHMARK_OS_FUCHSIA +#include +#endif +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "check.h" +#include "colorprint.h" +#include "commandlineflags.h" +#include "complexity.h" +#include "counter.h" +#include "internal_macros.h" +#include "log.h" +#include "mutex.h" +#include "perf_counters.h" +#include "re.h" +#include "statistics.h" +#include "string_util.h" +#include "thread_manager.h" +#include "thread_timer.h" + +namespace benchmark { + +namespace internal { + +MemoryManager* memory_manager = nullptr; + +namespace { + +static constexpr IterationCount kMaxIterations = 1000000000; + +BenchmarkReporter::Run CreateRunReport( + const benchmark::internal::BenchmarkInstance& b, + const internal::ThreadManager::Result& results, + IterationCount memory_iterations, + const MemoryManager::Result& memory_result, double seconds, + int64_t repetition_index, int64_t repeats) { + // Create report about this benchmark run. + BenchmarkReporter::Run report; + + report.run_name = b.name(); + report.family_index = b.family_index(); + report.per_family_instance_index = b.per_family_instance_index(); + report.error_occurred = results.has_error_; + report.error_message = results.error_message_; + report.report_label = results.report_label_; + // This is the total iterations across all threads. + report.iterations = results.iterations; + report.time_unit = b.time_unit(); + report.threads = b.threads(); + report.repetition_index = repetition_index; + report.repetitions = repeats; + + if (!report.error_occurred) { + if (b.use_manual_time()) { + report.real_accumulated_time = results.manual_time_used; + } else { + report.real_accumulated_time = results.real_time_used; + } + report.cpu_accumulated_time = results.cpu_time_used; + report.complexity_n = results.complexity_n; + report.complexity = b.complexity(); + report.complexity_lambda = b.complexity_lambda(); + report.statistics = &b.statistics(); + report.counters = results.counters; + + if (memory_iterations > 0) { + report.has_memory_result = true; + report.allocs_per_iter = + memory_iterations ? static_cast(memory_result.num_allocs) / + memory_iterations + : 0; + report.max_bytes_used = memory_result.max_bytes_used; + } + + internal::Finish(&report.counters, results.iterations, seconds, + b.threads()); + } + return report; +} + +// Execute one thread of benchmark b for the specified number of iterations. +// Adds the stats collected for the thread into manager->results. +void RunInThread(const BenchmarkInstance* b, IterationCount iters, + int thread_id, ThreadManager* manager, + PerfCountersMeasurement* perf_counters_measurement) { + internal::ThreadTimer timer( + b->measure_process_cpu_time() + ? internal::ThreadTimer::CreateProcessCpuTime() + : internal::ThreadTimer::Create()); + State st = + b->Run(iters, thread_id, &timer, manager, perf_counters_measurement); + CHECK(st.error_occurred() || st.iterations() >= st.max_iterations) + << "Benchmark returned before State::KeepRunning() returned false!"; + { + MutexLock l(manager->GetBenchmarkMutex()); + internal::ThreadManager::Result& results = manager->results; + results.iterations += st.iterations(); + results.cpu_time_used += timer.cpu_time_used(); + results.real_time_used += timer.real_time_used(); + results.manual_time_used += timer.manual_time_used(); + results.complexity_n += st.complexity_length_n(); + internal::Increment(&results.counters, st.counters); + } + manager->NotifyThreadComplete(); +} + +} // end namespace + +BenchmarkRunner::BenchmarkRunner( + const benchmark::internal::BenchmarkInstance& b_, + BenchmarkReporter::PerFamilyRunReports* reports_for_family_) + : b(b_), + reports_for_family(reports_for_family_), + min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time), + repeats(b.repetitions() != 0 ? b.repetitions() + : FLAGS_benchmark_repetitions), + has_explicit_iteration_count(b.iterations() != 0), + pool(b.threads() - 1), + iters(has_explicit_iteration_count ? b.iterations() : 1), + perf_counters_measurement( + PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))), + perf_counters_measurement_ptr(perf_counters_measurement.IsValid() + ? &perf_counters_measurement + : nullptr) { + run_results.display_report_aggregates_only = + (FLAGS_benchmark_report_aggregates_only || + FLAGS_benchmark_display_aggregates_only); + run_results.file_report_aggregates_only = + FLAGS_benchmark_report_aggregates_only; + if (b.aggregation_report_mode() != internal::ARM_Unspecified) { + run_results.display_report_aggregates_only = + (b.aggregation_report_mode() & + internal::ARM_DisplayReportAggregatesOnly); + run_results.file_report_aggregates_only = + (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly); + CHECK(FLAGS_benchmark_perf_counters.empty() || + perf_counters_measurement.IsValid()) + << "Perf counters were requested but could not be set up."; + } +} + +BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() { + VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n"; + + std::unique_ptr manager; + manager.reset(new internal::ThreadManager(b.threads())); + + // Run all but one thread in separate threads + for (std::size_t ti = 0; ti < pool.size(); ++ti) { + pool[ti] = std::thread(&RunInThread, &b, iters, static_cast(ti + 1), + manager.get(), perf_counters_measurement_ptr); + } + // And run one thread here directly. + // (If we were asked to run just one thread, we don't create new threads.) + // Yes, we need to do this here *after* we start the separate threads. + RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr); + + // The main thread has finished. Now let's wait for the other threads. + manager->WaitForAllThreads(); + for (std::thread& thread : pool) thread.join(); + + IterationResults i; + // Acquire the measurements/counters from the manager, UNDER THE LOCK! + { + MutexLock l(manager->GetBenchmarkMutex()); + i.results = manager->results; + } + + // And get rid of the manager. + manager.reset(); + + // Adjust real/manual time stats since they were reported per thread. + i.results.real_time_used /= b.threads(); + i.results.manual_time_used /= b.threads(); + // If we were measuring whole-process CPU usage, adjust the CPU time too. + if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads(); + + VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" + << i.results.real_time_used << "\n"; + + // By using KeepRunningBatch a benchmark can iterate more times than + // requested, so take the iteration count from i.results. + i.iters = i.results.iterations / b.threads(); + + // Base decisions off of real time if requested by this benchmark. + i.seconds = i.results.cpu_time_used; + if (b.use_manual_time()) { + i.seconds = i.results.manual_time_used; + } else if (b.use_real_time()) { + i.seconds = i.results.real_time_used; + } + + return i; +} + +IterationCount BenchmarkRunner::PredictNumItersNeeded( + const IterationResults& i) const { + // See how much iterations should be increased by. + // Note: Avoid division by zero with max(seconds, 1ns). + double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); + // If our last run was at least 10% of FLAGS_benchmark_min_time then we + // use the multiplier directly. + // Otherwise we use at most 10 times expansion. + // NOTE: When the last run was at least 10% of the min time the max + // expansion should be 14x. + bool is_significant = (i.seconds / min_time) > 0.1; + multiplier = is_significant ? multiplier : std::min(10.0, multiplier); + if (multiplier <= 1.0) multiplier = 2.0; + + // So what seems to be the sufficiently-large iteration count? Round up. + const IterationCount max_next_iters = static_cast( + std::lround(std::max(multiplier * static_cast(i.iters), + static_cast(i.iters) + 1.0))); + // But we do have *some* sanity limits though.. + const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); + + VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; + return next_iters; // round up before conversion to integer. +} + +bool BenchmarkRunner::ShouldReportIterationResults( + const IterationResults& i) const { + // Determine if this run should be reported; + // Either it has run for a sufficient amount of time + // or because an error was reported. + return i.results.has_error_ || + i.iters >= kMaxIterations || // Too many iterations already. + i.seconds >= min_time || // The elapsed time is large enough. + // CPU time is specified but the elapsed real time greatly exceeds + // the minimum time. + // Note that user provided timers are except from this sanity check. + ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time()); +} + +void BenchmarkRunner::DoOneRepetition() { + assert(HasRepeatsRemaining() && "Already done all repetitions?"); + + const bool is_the_first_repetition = num_repetitions_done == 0; + IterationResults i; + + // We *may* be gradually increasing the length (iteration count) + // of the benchmark until we decide the results are significant. + // And once we do, we report those last results and exit. + // Please do note that the if there are repetitions, the iteration count + // is *only* calculated for the *first* repetition, and other repetitions + // simply use that precomputed iteration count. + for (;;) { + i = DoNIterations(); + + // Do we consider the results to be significant? + // If we are doing repetitions, and the first repetition was already done, + // it has calculated the correct iteration time, so we have run that very + // iteration count just now. No need to calculate anything. Just report. + // Else, the normal rules apply. + const bool results_are_significant = !is_the_first_repetition || + has_explicit_iteration_count || + ShouldReportIterationResults(i); + + if (results_are_significant) break; // Good, let's report them! + + // Nope, bad iteration. Let's re-estimate the hopefully-sufficient + // iteration count, and run the benchmark again... + + iters = PredictNumItersNeeded(i); + assert(iters > i.iters && + "if we did more iterations than we want to do the next time, " + "then we should have accepted the current iteration run."); + } + + // Oh, one last thing, we need to also produce the 'memory measurements'.. + MemoryManager::Result memory_result; + IterationCount memory_iterations = 0; + if (memory_manager != nullptr) { + // Only run a few iterations to reduce the impact of one-time + // allocations in benchmarks that are not properly managed. + memory_iterations = std::min(16, iters); + memory_manager->Start(); + std::unique_ptr manager; + manager.reset(new internal::ThreadManager(1)); + RunInThread(&b, memory_iterations, 0, manager.get(), + perf_counters_measurement_ptr); + manager->WaitForAllThreads(); + manager.reset(); + + memory_manager->Stop(&memory_result); + } + + // Ok, now actualy report. + BenchmarkReporter::Run report = + CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds, + num_repetitions_done, repeats); + + if (reports_for_family) { + ++reports_for_family->num_runs_done; + if (!report.error_occurred) reports_for_family->Runs.push_back(report); + } + + run_results.non_aggregates.push_back(report); + + ++num_repetitions_done; +} + +RunResults&& BenchmarkRunner::GetResults() { + assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?"); + + // Calculate additional statistics over the repetitions of this instance. + run_results.aggregates_only = ComputeStats(run_results.non_aggregates); + + return std::move(run_results); +} + +} // end namespace internal + +} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/benchmark_runner.h b/libcxx/utils/google-benchmark/src/benchmark_runner.h new file mode 100644 index 000000000000..8a855236b227 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/benchmark_runner.h @@ -0,0 +1,106 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BENCHMARK_RUNNER_H_ +#define BENCHMARK_RUNNER_H_ + +#include +#include + +#include "benchmark_api_internal.h" +#include "internal_macros.h" +#include "perf_counters.h" +#include "thread_manager.h" + +DECLARE_double(benchmark_min_time); + +DECLARE_int32(benchmark_repetitions); + +DECLARE_bool(benchmark_report_aggregates_only); + +DECLARE_bool(benchmark_display_aggregates_only); + +DECLARE_string(benchmark_perf_counters); + +namespace benchmark { + +namespace internal { + +extern MemoryManager* memory_manager; + +struct RunResults { + std::vector non_aggregates; + std::vector aggregates_only; + + bool display_report_aggregates_only = false; + bool file_report_aggregates_only = false; +}; + +class BenchmarkRunner { + public: + BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, + BenchmarkReporter::PerFamilyRunReports* reports_for_family); + + int GetNumRepeats() const { return repeats; } + + bool HasRepeatsRemaining() const { + return GetNumRepeats() != num_repetitions_done; + } + + void DoOneRepetition(); + + RunResults&& GetResults(); + + BenchmarkReporter::PerFamilyRunReports* GetReportsForFamily() const { + return reports_for_family; + }; + + private: + RunResults run_results; + + const benchmark::internal::BenchmarkInstance& b; + BenchmarkReporter::PerFamilyRunReports* reports_for_family; + + const double min_time; + const int repeats; + const bool has_explicit_iteration_count; + + int num_repetitions_done = 0; + + std::vector pool; + + IterationCount iters; // preserved between repetitions! + // So only the first repetition has to find/calculate it, + // the other repetitions will just use that precomputed iteration count. + + PerfCountersMeasurement perf_counters_measurement; + PerfCountersMeasurement* const perf_counters_measurement_ptr; + + struct IterationResults { + internal::ThreadManager::Result results; + IterationCount iters; + double seconds; + }; + IterationResults DoNIterations(); + + IterationCount PredictNumItersNeeded(const IterationResults& i) const; + + bool ShouldReportIterationResults(const IterationResults& i) const; +}; + +} // namespace internal + +} // end namespace benchmark + +#endif // BENCHMARK_RUNNER_H_ diff --git a/libcxx/utils/google-benchmark/src/check.h b/libcxx/utils/google-benchmark/src/check.h new file mode 100644 index 000000000000..f5f8253f8040 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/check.h @@ -0,0 +1,82 @@ +#ifndef CHECK_H_ +#define CHECK_H_ + +#include +#include +#include + +#include "internal_macros.h" +#include "log.h" + +namespace benchmark { +namespace internal { + +typedef void(AbortHandlerT)(); + +inline AbortHandlerT*& GetAbortHandler() { + static AbortHandlerT* handler = &std::abort; + return handler; +} + +BENCHMARK_NORETURN inline void CallAbortHandler() { + GetAbortHandler()(); + std::abort(); // fallback to enforce noreturn +} + +// CheckHandler is the class constructed by failing CHECK macros. CheckHandler +// will log information about the failures and abort when it is destructed. +class CheckHandler { + public: + CheckHandler(const char* check, const char* file, const char* func, int line) + : log_(GetErrorLogInstance()) { + log_ << file << ":" << line << ": " << func << ": Check `" << check + << "' failed. "; + } + + LogType& GetLog() { return log_; } + + BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) { + log_ << std::endl; + CallAbortHandler(); + } + + CheckHandler& operator=(const CheckHandler&) = delete; + CheckHandler(const CheckHandler&) = delete; + CheckHandler() = delete; + + private: + LogType& log_; +}; + +} // end namespace internal +} // end namespace benchmark + +// The CHECK macro returns a std::ostream object that can have extra information +// written to it. +#ifndef NDEBUG +#define CHECK(b) \ + (b ? ::benchmark::internal::GetNullLogInstance() \ + : ::benchmark::internal::CheckHandler(#b, __FILE__, __func__, __LINE__) \ + .GetLog()) +#else +#define CHECK(b) ::benchmark::internal::GetNullLogInstance() +#endif + +// clang-format off +// preserve whitespacing between operators for alignment +#define CHECK_EQ(a, b) CHECK((a) == (b)) +#define CHECK_NE(a, b) CHECK((a) != (b)) +#define CHECK_GE(a, b) CHECK((a) >= (b)) +#define CHECK_LE(a, b) CHECK((a) <= (b)) +#define CHECK_GT(a, b) CHECK((a) > (b)) +#define CHECK_LT(a, b) CHECK((a) < (b)) + +#define CHECK_FLOAT_EQ(a, b, eps) CHECK(std::fabs((a) - (b)) < (eps)) +#define CHECK_FLOAT_NE(a, b, eps) CHECK(std::fabs((a) - (b)) >= (eps)) +#define CHECK_FLOAT_GE(a, b, eps) CHECK((a) - (b) > -(eps)) +#define CHECK_FLOAT_LE(a, b, eps) CHECK((b) - (a) > -(eps)) +#define CHECK_FLOAT_GT(a, b, eps) CHECK((a) - (b) > (eps)) +#define CHECK_FLOAT_LT(a, b, eps) CHECK((b) - (a) > (eps)) +//clang-format on + +#endif // CHECK_H_ diff --git a/libcxx/utils/google-benchmark/src/colorprint.cc b/libcxx/utils/google-benchmark/src/colorprint.cc new file mode 100644 index 000000000000..fff6a98818b8 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/colorprint.cc @@ -0,0 +1,188 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "colorprint.h" + +#include +#include +#include +#include +#include +#include + +#include "check.h" +#include "internal_macros.h" + +#ifdef BENCHMARK_OS_WINDOWS +#include +#include +#else +#include +#endif // BENCHMARK_OS_WINDOWS + +namespace benchmark { +namespace { +#ifdef BENCHMARK_OS_WINDOWS +typedef WORD PlatformColorCode; +#else +typedef const char* PlatformColorCode; +#endif + +PlatformColorCode GetPlatformColorCode(LogColor color) { +#ifdef BENCHMARK_OS_WINDOWS + switch (color) { + case COLOR_RED: + return FOREGROUND_RED; + case COLOR_GREEN: + return FOREGROUND_GREEN; + case COLOR_YELLOW: + return FOREGROUND_RED | FOREGROUND_GREEN; + case COLOR_BLUE: + return FOREGROUND_BLUE; + case COLOR_MAGENTA: + return FOREGROUND_BLUE | FOREGROUND_RED; + case COLOR_CYAN: + return FOREGROUND_BLUE | FOREGROUND_GREEN; + case COLOR_WHITE: // fall through to default + default: + return 0; + } +#else + switch (color) { + case COLOR_RED: + return "1"; + case COLOR_GREEN: + return "2"; + case COLOR_YELLOW: + return "3"; + case COLOR_BLUE: + return "4"; + case COLOR_MAGENTA: + return "5"; + case COLOR_CYAN: + return "6"; + case COLOR_WHITE: + return "7"; + default: + return nullptr; + }; +#endif +} + +} // end namespace + +std::string FormatString(const char* msg, va_list args) { + // we might need a second shot at this, so pre-emptivly make a copy + va_list args_cp; + va_copy(args_cp, args); + + std::size_t size = 256; + char local_buff[256]; + auto ret = vsnprintf(local_buff, size, msg, args_cp); + + va_end(args_cp); + + // currently there is no error handling for failure, so this is hack. + CHECK(ret >= 0); + + if (ret == 0) // handle empty expansion + return {}; + else if (static_cast(ret) < size) + return local_buff; + else { + // we did not provide a long enough buffer on our first attempt. + size = (size_t)ret + 1; // + 1 for the null byte + std::unique_ptr buff(new char[size]); + ret = vsnprintf(buff.get(), size, msg, args); + CHECK(ret > 0 && ((size_t)ret) < size); + return buff.get(); + } +} + +std::string FormatString(const char* msg, ...) { + va_list args; + va_start(args, msg); + auto tmp = FormatString(msg, args); + va_end(args); + return tmp; +} + +void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + ColorPrintf(out, color, fmt, args); + va_end(args); +} + +void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, + va_list args) { +#ifdef BENCHMARK_OS_WINDOWS + ((void)out); // suppress unused warning + + const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE); + + // Gets the current text color. + CONSOLE_SCREEN_BUFFER_INFO buffer_info; + GetConsoleScreenBufferInfo(stdout_handle, &buffer_info); + const WORD old_color_attrs = buffer_info.wAttributes; + + // We need to flush the stream buffers into the console before each + // SetConsoleTextAttribute call lest it affect the text that is already + // printed but has not yet reached the console. + fflush(stdout); + SetConsoleTextAttribute(stdout_handle, + GetPlatformColorCode(color) | FOREGROUND_INTENSITY); + vprintf(fmt, args); + + fflush(stdout); + // Restores the text color. + SetConsoleTextAttribute(stdout_handle, old_color_attrs); +#else + const char* color_code = GetPlatformColorCode(color); + if (color_code) out << FormatString("\033[0;3%sm", color_code); + out << FormatString(fmt, args) << "\033[m"; +#endif +} + +bool IsColorTerminal() { +#if BENCHMARK_OS_WINDOWS + // On Windows the TERM variable is usually not set, but the + // console there does support colors. + return 0 != _isatty(_fileno(stdout)); +#else + // On non-Windows platforms, we rely on the TERM variable. This list of + // supported TERM values is copied from Google Test: + // . + const char* const SUPPORTED_TERM_VALUES[] = { + "xterm", "xterm-color", "xterm-256color", + "screen", "screen-256color", "tmux", + "tmux-256color", "rxvt-unicode", "rxvt-unicode-256color", + "linux", "cygwin", + }; + + const char* const term = getenv("TERM"); + + bool term_supports_color = false; + for (const char* candidate : SUPPORTED_TERM_VALUES) { + if (term && 0 == strcmp(term, candidate)) { + term_supports_color = true; + break; + } + } + + return 0 != isatty(fileno(stdout)) && term_supports_color; +#endif // BENCHMARK_OS_WINDOWS +} + +} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/colorprint.h b/libcxx/utils/google-benchmark/src/colorprint.h new file mode 100644 index 000000000000..9f6fab9b3422 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/colorprint.h @@ -0,0 +1,33 @@ +#ifndef BENCHMARK_COLORPRINT_H_ +#define BENCHMARK_COLORPRINT_H_ + +#include +#include +#include + +namespace benchmark { +enum LogColor { + COLOR_DEFAULT, + COLOR_RED, + COLOR_GREEN, + COLOR_YELLOW, + COLOR_BLUE, + COLOR_MAGENTA, + COLOR_CYAN, + COLOR_WHITE +}; + +std::string FormatString(const char* msg, va_list args); +std::string FormatString(const char* msg, ...); + +void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, + va_list args); +void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...); + +// Returns true if stdout appears to be a terminal that supports colored +// output, false otherwise. +bool IsColorTerminal(); + +} // end namespace benchmark + +#endif // BENCHMARK_COLORPRINT_H_ diff --git a/libcxx/utils/google-benchmark/src/commandlineflags.cc b/libcxx/utils/google-benchmark/src/commandlineflags.cc new file mode 100644 index 000000000000..5724aaa29402 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/commandlineflags.cc @@ -0,0 +1,286 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "commandlineflags.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../src/string_util.h" + +namespace benchmark { +namespace { + +// Parses 'str' for a 32-bit signed integer. If successful, writes +// the result to *value and returns true; otherwise leaves *value +// unchanged and returns false. +bool ParseInt32(const std::string& src_text, const char* str, int32_t* value) { + // Parses the environment variable as a decimal integer. + char* end = nullptr; + const long long_value = strtol(str, &end, 10); // NOLINT + + // Has strtol() consumed all characters in the string? + if (*end != '\0') { + // No - an invalid character was encountered. + std::cerr << src_text << " is expected to be a 32-bit integer, " + << "but actually has value \"" << str << "\".\n"; + return false; + } + + // Is the parsed value in the range of an Int32? + const int32_t result = static_cast(long_value); + if (long_value == std::numeric_limits::max() || + long_value == std::numeric_limits::min() || + // The parsed value overflows as a long. (strtol() returns + // LONG_MAX or LONG_MIN when the input overflows.) + result != long_value + // The parsed value overflows as an Int32. + ) { + std::cerr << src_text << " is expected to be a 32-bit integer, " + << "but actually has value \"" << str << "\", " + << "which overflows.\n"; + return false; + } + + *value = result; + return true; +} + +// Parses 'str' for a double. If successful, writes the result to *value and +// returns true; otherwise leaves *value unchanged and returns false. +bool ParseDouble(const std::string& src_text, const char* str, double* value) { + // Parses the environment variable as a decimal integer. + char* end = nullptr; + const double double_value = strtod(str, &end); // NOLINT + + // Has strtol() consumed all characters in the string? + if (*end != '\0') { + // No - an invalid character was encountered. + std::cerr << src_text << " is expected to be a double, " + << "but actually has value \"" << str << "\".\n"; + return false; + } + + *value = double_value; + return true; +} + +// Parses 'str' into KV pairs. If successful, writes the result to *value and +// returns true; otherwise leaves *value unchanged and returns false. +bool ParseKvPairs(const std::string& src_text, const char* str, + std::map* value) { + std::map kvs; + for (const auto& kvpair : StrSplit(str, ',')) { + const auto kv = StrSplit(kvpair, '='); + if (kv.size() != 2) { + std::cerr << src_text << " is expected to be a comma-separated list of " + << "= strings, but actually has value \"" << str + << "\".\n"; + return false; + } + if (!kvs.emplace(kv[0], kv[1]).second) { + std::cerr << src_text << " is expected to contain unique keys but key \"" + << kv[0] << "\" was repeated.\n"; + return false; + } + } + + *value = kvs; + return true; +} + +// Returns the name of the environment variable corresponding to the +// given flag. For example, FlagToEnvVar("foo") will return +// "BENCHMARK_FOO" in the open-source version. +static std::string FlagToEnvVar(const char* flag) { + const std::string flag_str(flag); + + std::string env_var; + for (size_t i = 0; i != flag_str.length(); ++i) + env_var += static_cast(::toupper(flag_str.c_str()[i])); + + return env_var; +} + +} // namespace + +bool BoolFromEnv(const char* flag, bool default_val) { + const std::string env_var = FlagToEnvVar(flag); + const char* const value_str = getenv(env_var.c_str()); + return value_str == nullptr ? default_val : IsTruthyFlagValue(value_str); +} + +int32_t Int32FromEnv(const char* flag, int32_t default_val) { + const std::string env_var = FlagToEnvVar(flag); + const char* const value_str = getenv(env_var.c_str()); + int32_t value = default_val; + if (value_str == nullptr || + !ParseInt32(std::string("Environment variable ") + env_var, value_str, + &value)) { + return default_val; + } + return value; +} + +double DoubleFromEnv(const char* flag, double default_val) { + const std::string env_var = FlagToEnvVar(flag); + const char* const value_str = getenv(env_var.c_str()); + double value = default_val; + if (value_str == nullptr || + !ParseDouble(std::string("Environment variable ") + env_var, value_str, + &value)) { + return default_val; + } + return value; +} + +const char* StringFromEnv(const char* flag, const char* default_val) { + const std::string env_var = FlagToEnvVar(flag); + const char* const value = getenv(env_var.c_str()); + return value == nullptr ? default_val : value; +} + +std::map KvPairsFromEnv( + const char* flag, std::map default_val) { + const std::string env_var = FlagToEnvVar(flag); + const char* const value_str = getenv(env_var.c_str()); + + if (value_str == nullptr) return default_val; + + std::map value; + if (!ParseKvPairs("Environment variable " + env_var, value_str, &value)) { + return default_val; + } + return value; +} + +// Parses a string as a command line flag. The string should have +// the format "--flag=value". When def_optional is true, the "=value" +// part can be omitted. +// +// Returns the value of the flag, or nullptr if the parsing failed. +const char* ParseFlagValue(const char* str, const char* flag, + bool def_optional) { + // str and flag must not be nullptr. + if (str == nullptr || flag == nullptr) return nullptr; + + // The flag must start with "--". + const std::string flag_str = std::string("--") + std::string(flag); + const size_t flag_len = flag_str.length(); + if (strncmp(str, flag_str.c_str(), flag_len) != 0) return nullptr; + + // Skips the flag name. + const char* flag_end = str + flag_len; + + // When def_optional is true, it's OK to not have a "=value" part. + if (def_optional && (flag_end[0] == '\0')) return flag_end; + + // If def_optional is true and there are more characters after the + // flag name, or if def_optional is false, there must be a '=' after + // the flag name. + if (flag_end[0] != '=') return nullptr; + + // Returns the string after "=". + return flag_end + 1; +} + +bool ParseBoolFlag(const char* str, const char* flag, bool* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, true); + + // Aborts if the parsing failed. + if (value_str == nullptr) return false; + + // Converts the string value to a bool. + *value = IsTruthyFlagValue(value_str); + return true; +} + +bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, false); + + // Aborts if the parsing failed. + if (value_str == nullptr) return false; + + // Sets *value to the value of the flag. + return ParseInt32(std::string("The value of flag --") + flag, value_str, + value); +} + +bool ParseDoubleFlag(const char* str, const char* flag, double* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, false); + + // Aborts if the parsing failed. + if (value_str == nullptr) return false; + + // Sets *value to the value of the flag. + return ParseDouble(std::string("The value of flag --") + flag, value_str, + value); +} + +bool ParseStringFlag(const char* str, const char* flag, std::string* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, false); + + // Aborts if the parsing failed. + if (value_str == nullptr) return false; + + *value = value_str; + return true; +} + +bool ParseKeyValueFlag( + const char* str, const char* flag, + std::map* value) { + const char* const value_str = ParseFlagValue(str, flag, false); + + if (value_str == nullptr) return false; + + for (const auto& kvpair : StrSplit(value_str, ',')) { + const auto kv = StrSplit(kvpair, '='); + if (kv.size() != 2) return false; + value->emplace(kv[0], kv[1]); + } + + return true; +} + +bool IsFlag(const char* str, const char* flag) { + return (ParseFlagValue(str, flag, true) != nullptr); +} + +bool IsTruthyFlagValue(const std::string& value) { + if (value.size() == 1) { + char v = value[0]; + return isalnum(v) && + !(v == '0' || v == 'f' || v == 'F' || v == 'n' || v == 'N'); + } else if (!value.empty()) { + std::string value_lower(value); + std::transform(value_lower.begin(), value_lower.end(), value_lower.begin(), + [](char c) { return static_cast(::tolower(c)); }); + return !(value_lower == "false" || value_lower == "no" || + value_lower == "off"); + } else + return true; +} + +} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/commandlineflags.h b/libcxx/utils/google-benchmark/src/commandlineflags.h new file mode 100644 index 000000000000..0c988cccb3ae --- /dev/null +++ b/libcxx/utils/google-benchmark/src/commandlineflags.h @@ -0,0 +1,116 @@ +#ifndef BENCHMARK_COMMANDLINEFLAGS_H_ +#define BENCHMARK_COMMANDLINEFLAGS_H_ + +#include +#include +#include + +// Macro for referencing flags. +#define FLAG(name) FLAGS_##name + +// Macros for declaring flags. +#define DECLARE_bool(name) extern bool FLAG(name) +#define DECLARE_int32(name) extern int32_t FLAG(name) +#define DECLARE_double(name) extern double FLAG(name) +#define DECLARE_string(name) extern std::string FLAG(name) +#define DECLARE_kvpairs(name) \ + extern std::map FLAG(name) + +// Macros for defining flags. +#define DEFINE_bool(name, default_val) \ + bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val) +#define DEFINE_int32(name, default_val) \ + int32_t FLAG(name) = benchmark::Int32FromEnv(#name, default_val) +#define DEFINE_double(name, default_val) \ + double FLAG(name) = benchmark::DoubleFromEnv(#name, default_val) +#define DEFINE_string(name, default_val) \ + std::string FLAG(name) = benchmark::StringFromEnv(#name, default_val) +#define DEFINE_kvpairs(name, default_val) \ + std::map FLAG(name) = \ + benchmark::KvPairsFromEnv(#name, default_val) + +namespace benchmark { + +// Parses a bool from the environment variable corresponding to the given flag. +// +// If the variable exists, returns IsTruthyFlagValue() value; if not, +// returns the given default value. +bool BoolFromEnv(const char* flag, bool default_val); + +// Parses an Int32 from the environment variable corresponding to the given +// flag. +// +// If the variable exists, returns ParseInt32() value; if not, returns +// the given default value. +int32_t Int32FromEnv(const char* flag, int32_t default_val); + +// Parses an Double from the environment variable corresponding to the given +// flag. +// +// If the variable exists, returns ParseDouble(); if not, returns +// the given default value. +double DoubleFromEnv(const char* flag, double default_val); + +// Parses a string from the environment variable corresponding to the given +// flag. +// +// If variable exists, returns its value; if not, returns +// the given default value. +const char* StringFromEnv(const char* flag, const char* default_val); + +// Parses a set of kvpairs from the environment variable corresponding to the +// given flag. +// +// If variable exists, returns its value; if not, returns +// the given default value. +std::map KvPairsFromEnv( + const char* flag, std::map default_val); + +// Parses a string for a bool flag, in the form of either +// "--flag=value" or "--flag". +// +// In the former case, the value is taken as true if it passes IsTruthyValue(). +// +// In the latter case, the value is taken as true. +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseBoolFlag(const char* str, const char* flag, bool* value); + +// Parses a string for an Int32 flag, in the form of "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseInt32Flag(const char* str, const char* flag, int32_t* value); + +// Parses a string for a Double flag, in the form of "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseDoubleFlag(const char* str, const char* flag, double* value); + +// Parses a string for a string flag, in the form of "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseStringFlag(const char* str, const char* flag, std::string* value); + +// Parses a string for a kvpairs flag in the form "--flag=key=value,key=value" +// +// On success, stores the value of the flag in *value and returns true. On +// failure returns false, though *value may have been mutated. +bool ParseKeyValueFlag(const char* str, const char* flag, + std::map* value); + +// Returns true if the string matches the flag. +bool IsFlag(const char* str, const char* flag); + +// Returns true unless value starts with one of: '0', 'f', 'F', 'n' or 'N', or +// some non-alphanumeric character. Also returns false if the value matches +// one of 'no', 'false', 'off' (case-insensitive). As a special case, also +// returns true if value is the empty string. +bool IsTruthyFlagValue(const std::string& value); + +} // end namespace benchmark + +#endif // BENCHMARK_COMMANDLINEFLAGS_H_ diff --git a/libcxx/utils/google-benchmark/src/complexity.cc b/libcxx/utils/google-benchmark/src/complexity.cc new file mode 100644 index 000000000000..29f7c3b03155 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/complexity.cc @@ -0,0 +1,240 @@ +// Copyright 2016 Ismael Jimenez Martinez. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Source project : https://github.com/ismaelJimenez/cpp.leastsq +// Adapted to be used with google benchmark + +#include "benchmark/benchmark.h" + +#include +#include +#include "check.h" +#include "complexity.h" + +namespace benchmark { + +// Internal function to calculate the different scalability forms +BigOFunc* FittingCurve(BigO complexity) { + static const double kLog2E = 1.44269504088896340736; + switch (complexity) { + case oN: + return [](IterationCount n) -> double { return static_cast(n); }; + case oNSquared: + return [](IterationCount n) -> double { return std::pow(n, 2); }; + case oNCubed: + return [](IterationCount n) -> double { return std::pow(n, 3); }; + case oLogN: + /* Note: can't use log2 because Android's GNU STL lacks it */ + return + [](IterationCount n) { return kLog2E * log(static_cast(n)); }; + case oNLogN: + /* Note: can't use log2 because Android's GNU STL lacks it */ + return [](IterationCount n) { + return kLog2E * n * log(static_cast(n)); + }; + case o1: + default: + return [](IterationCount) { return 1.0; }; + } +} + +// Function to return an string for the calculated complexity +std::string GetBigOString(BigO complexity) { + switch (complexity) { + case oN: + return "N"; + case oNSquared: + return "N^2"; + case oNCubed: + return "N^3"; + case oLogN: + return "lgN"; + case oNLogN: + return "NlgN"; + case o1: + return "(1)"; + default: + return "f(N)"; + } +} + +// Find the coefficient for the high-order term in the running time, by +// minimizing the sum of squares of relative error, for the fitting curve +// given by the lambda expression. +// - n : Vector containing the size of the benchmark tests. +// - time : Vector containing the times for the benchmark tests. +// - fitting_curve : lambda expression (e.g. [](int64_t n) {return n; };). + +// For a deeper explanation on the algorithm logic, please refer to +// https://en.wikipedia.org/wiki/Least_squares#Least_squares,_regression_analysis_and_statistics + +LeastSq MinimalLeastSq(const std::vector& n, + const std::vector& time, + BigOFunc* fitting_curve) { + double sigma_gn_squared = 0.0; + double sigma_time = 0.0; + double sigma_time_gn = 0.0; + + // Calculate least square fitting parameter + for (size_t i = 0; i < n.size(); ++i) { + double gn_i = fitting_curve(n[i]); + sigma_gn_squared += gn_i * gn_i; + sigma_time += time[i]; + sigma_time_gn += time[i] * gn_i; + } + + LeastSq result; + result.complexity = oLambda; + + // Calculate complexity. + result.coef = sigma_time_gn / sigma_gn_squared; + + // Calculate RMS + double rms = 0.0; + for (size_t i = 0; i < n.size(); ++i) { + double fit = result.coef * fitting_curve(n[i]); + rms += pow((time[i] - fit), 2); + } + + // Normalized RMS by the mean of the observed values + double mean = sigma_time / n.size(); + result.rms = sqrt(rms / n.size()) / mean; + + return result; +} + +// Find the coefficient for the high-order term in the running time, by +// minimizing the sum of squares of relative error. +// - n : Vector containing the size of the benchmark tests. +// - time : Vector containing the times for the benchmark tests. +// - complexity : If different than oAuto, the fitting curve will stick to +// this one. If it is oAuto, it will be calculated the best +// fitting curve. +LeastSq MinimalLeastSq(const std::vector& n, + const std::vector& time, const BigO complexity) { + CHECK_EQ(n.size(), time.size()); + CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two + // benchmark runs are given + CHECK_NE(complexity, oNone); + + LeastSq best_fit; + + if (complexity == oAuto) { + std::vector fit_curves = {oLogN, oN, oNLogN, oNSquared, oNCubed}; + + // Take o1 as default best fitting curve + best_fit = MinimalLeastSq(n, time, FittingCurve(o1)); + best_fit.complexity = o1; + + // Compute all possible fitting curves and stick to the best one + for (const auto& fit : fit_curves) { + LeastSq current_fit = MinimalLeastSq(n, time, FittingCurve(fit)); + if (current_fit.rms < best_fit.rms) { + best_fit = current_fit; + best_fit.complexity = fit; + } + } + } else { + best_fit = MinimalLeastSq(n, time, FittingCurve(complexity)); + best_fit.complexity = complexity; + } + + return best_fit; +} + +std::vector ComputeBigO( + const std::vector& reports) { + typedef BenchmarkReporter::Run Run; + std::vector results; + + if (reports.size() < 2) return results; + + // Accumulators. + std::vector n; + std::vector real_time; + std::vector cpu_time; + + // Populate the accumulators. + for (const Run& run : reports) { + CHECK_GT(run.complexity_n, 0) << "Did you forget to call SetComplexityN?"; + n.push_back(run.complexity_n); + real_time.push_back(run.real_accumulated_time / run.iterations); + cpu_time.push_back(run.cpu_accumulated_time / run.iterations); + } + + LeastSq result_cpu; + LeastSq result_real; + + if (reports[0].complexity == oLambda) { + result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity_lambda); + result_real = MinimalLeastSq(n, real_time, reports[0].complexity_lambda); + } else { + result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity); + result_real = MinimalLeastSq(n, real_time, result_cpu.complexity); + } + + // Drop the 'args' when reporting complexity. + auto run_name = reports[0].run_name; + run_name.args.clear(); + + // Get the data from the accumulator to BenchmarkReporter::Run's. + Run big_o; + big_o.run_name = run_name; + big_o.family_index = reports[0].family_index; + big_o.per_family_instance_index = reports[0].per_family_instance_index; + big_o.run_type = BenchmarkReporter::Run::RT_Aggregate; + big_o.repetitions = reports[0].repetitions; + big_o.repetition_index = Run::no_repetition_index; + big_o.threads = reports[0].threads; + big_o.aggregate_name = "BigO"; + big_o.report_label = reports[0].report_label; + big_o.iterations = 0; + big_o.real_accumulated_time = result_real.coef; + big_o.cpu_accumulated_time = result_cpu.coef; + big_o.report_big_o = true; + big_o.complexity = result_cpu.complexity; + + // All the time results are reported after being multiplied by the + // time unit multiplier. But since RMS is a relative quantity it + // should not be multiplied at all. So, here, we _divide_ it by the + // multiplier so that when it is multiplied later the result is the + // correct one. + double multiplier = GetTimeUnitMultiplier(reports[0].time_unit); + + // Only add label to mean/stddev if it is same for all runs + Run rms; + rms.run_name = run_name; + rms.family_index = reports[0].family_index; + rms.per_family_instance_index = reports[0].per_family_instance_index; + rms.run_type = BenchmarkReporter::Run::RT_Aggregate; + rms.aggregate_name = "RMS"; + rms.report_label = big_o.report_label; + rms.iterations = 0; + rms.repetition_index = Run::no_repetition_index; + rms.repetitions = reports[0].repetitions; + rms.threads = reports[0].threads; + rms.real_accumulated_time = result_real.rms / multiplier; + rms.cpu_accumulated_time = result_cpu.rms / multiplier; + rms.report_rms = true; + rms.complexity = result_cpu.complexity; + // don't forget to keep the time unit, or we won't be able to + // recover the correct value. + rms.time_unit = reports[0].time_unit; + + results.push_back(big_o); + results.push_back(rms); + return results; +} + +} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/complexity.h b/libcxx/utils/google-benchmark/src/complexity.h new file mode 100644 index 000000000000..df29b48d29b4 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/complexity.h @@ -0,0 +1,55 @@ +// Copyright 2016 Ismael Jimenez Martinez. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Source project : https://github.com/ismaelJimenez/cpp.leastsq +// Adapted to be used with google benchmark + +#ifndef COMPLEXITY_H_ +#define COMPLEXITY_H_ + +#include +#include + +#include "benchmark/benchmark.h" + +namespace benchmark { + +// Return a vector containing the bigO and RMS information for the specified +// list of reports. If 'reports.size() < 2' an empty vector is returned. +std::vector ComputeBigO( + const std::vector& reports); + +// This data structure will contain the result returned by MinimalLeastSq +// - coef : Estimated coeficient for the high-order term as +// interpolated from data. +// - rms : Normalized Root Mean Squared Error. +// - complexity : Scalability form (e.g. oN, oNLogN). In case a scalability +// form has been provided to MinimalLeastSq this will return +// the same value. In case BigO::oAuto has been selected, this +// parameter will return the best fitting curve detected. + +struct LeastSq { + LeastSq() : coef(0.0), rms(0.0), complexity(oNone) {} + + double coef; + double rms; + BigO complexity; +}; + +// Function to return an string for the calculated complexity +std::string GetBigOString(BigO complexity); + +} // end namespace benchmark + +#endif // COMPLEXITY_H_ diff --git a/libcxx/utils/google-benchmark/src/console_reporter.cc b/libcxx/utils/google-benchmark/src/console_reporter.cc new file mode 100644 index 000000000000..6fd764525e81 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/console_reporter.cc @@ -0,0 +1,177 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "check.h" +#include "colorprint.h" +#include "commandlineflags.h" +#include "complexity.h" +#include "counter.h" +#include "internal_macros.h" +#include "string_util.h" +#include "timers.h" + +namespace benchmark { + +bool ConsoleReporter::ReportContext(const Context& context) { + name_field_width_ = context.name_field_width; + printed_header_ = false; + prev_counters_.clear(); + + PrintBasicContext(&GetErrorStream(), context); + +#ifdef BENCHMARK_OS_WINDOWS + if ((output_options_ & OO_Color) && &std::cout != &GetOutputStream()) { + GetErrorStream() + << "Color printing is only supported for stdout on windows." + " Disabling color printing\n"; + output_options_ = static_cast< OutputOptions >(output_options_ & ~OO_Color); + } +#endif + + return true; +} + +void ConsoleReporter::PrintHeader(const Run& run) { + std::string str = FormatString("%-*s %13s %15s %12s", static_cast(name_field_width_), + "Benchmark", "Time", "CPU", "Iterations"); + if(!run.counters.empty()) { + if(output_options_ & OO_Tabular) { + for(auto const& c : run.counters) { + str += FormatString(" %10s", c.first.c_str()); + } + } else { + str += " UserCounters..."; + } + } + std::string line = std::string(str.length(), '-'); + GetOutputStream() << line << "\n" << str << "\n" << line << "\n"; +} + +void ConsoleReporter::ReportRuns(const std::vector& reports) { + for (const auto& run : reports) { + // print the header: + // --- if none was printed yet + bool print_header = !printed_header_; + // --- or if the format is tabular and this run + // has different fields from the prev header + print_header |= (output_options_ & OO_Tabular) && + (!internal::SameNames(run.counters, prev_counters_)); + if (print_header) { + printed_header_ = true; + prev_counters_ = run.counters; + PrintHeader(run); + } + // As an alternative to printing the headers like this, we could sort + // the benchmarks by header and then print. But this would require + // waiting for the full results before printing, or printing twice. + PrintRunData(run); + } +} + +static void IgnoreColorPrint(std::ostream& out, LogColor, const char* fmt, + ...) { + va_list args; + va_start(args, fmt); + out << FormatString(fmt, args); + va_end(args); +} + + +static std::string FormatTime(double time) { + // Align decimal places... + if (time < 1.0) { + return FormatString("%10.3f", time); + } + if (time < 10.0) { + return FormatString("%10.2f", time); + } + if (time < 100.0) { + return FormatString("%10.1f", time); + } + return FormatString("%10.0f", time); +} + +void ConsoleReporter::PrintRunData(const Run& result) { + typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...); + auto& Out = GetOutputStream(); + PrinterFn* printer = (output_options_ & OO_Color) ? + (PrinterFn*)ColorPrintf : IgnoreColorPrint; + auto name_color = + (result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN; + printer(Out, name_color, "%-*s ", name_field_width_, + result.benchmark_name().c_str()); + + if (result.error_occurred) { + printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'", + result.error_message.c_str()); + printer(Out, COLOR_DEFAULT, "\n"); + return; + } + + const double real_time = result.GetAdjustedRealTime(); + const double cpu_time = result.GetAdjustedCPUTime(); + const std::string real_time_str = FormatTime(real_time); + const std::string cpu_time_str = FormatTime(cpu_time); + + + if (result.report_big_o) { + std::string big_o = GetBigOString(result.complexity); + printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", real_time, big_o.c_str(), + cpu_time, big_o.c_str()); + } else if (result.report_rms) { + printer(Out, COLOR_YELLOW, "%10.0f %-4s %10.0f %-4s ", real_time * 100, "%", + cpu_time * 100, "%"); + } else { + const char* timeLabel = GetTimeUnitString(result.time_unit); + printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(), timeLabel, + cpu_time_str.c_str(), timeLabel); + } + + if (!result.report_big_o && !result.report_rms) { + printer(Out, COLOR_CYAN, "%10lld", result.iterations); + } + + for (auto& c : result.counters) { + const std::size_t cNameLen = std::max(std::string::size_type(10), + c.first.length()); + auto const& s = HumanReadableNumber(c.second.value, c.second.oneK); + const char* unit = ""; + if (c.second.flags & Counter::kIsRate) + unit = (c.second.flags & Counter::kInvert) ? "s" : "/s"; + if (output_options_ & OO_Tabular) { + printer(Out, COLOR_DEFAULT, " %*s%s", cNameLen - strlen(unit), s.c_str(), + unit); + } else { + printer(Out, COLOR_DEFAULT, " %s=%s%s", c.first.c_str(), s.c_str(), unit); + } + } + + if (!result.report_label.empty()) { + printer(Out, COLOR_DEFAULT, " %s", result.report_label.c_str()); + } + + printer(Out, COLOR_DEFAULT, "\n"); +} + +} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/counter.cc b/libcxx/utils/google-benchmark/src/counter.cc new file mode 100644 index 000000000000..cf5b78ee3ac6 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/counter.cc @@ -0,0 +1,80 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "counter.h" + +namespace benchmark { +namespace internal { + +double Finish(Counter const& c, IterationCount iterations, double cpu_time, + double num_threads) { + double v = c.value; + if (c.flags & Counter::kIsRate) { + v /= cpu_time; + } + if (c.flags & Counter::kAvgThreads) { + v /= num_threads; + } + if (c.flags & Counter::kIsIterationInvariant) { + v *= iterations; + } + if (c.flags & Counter::kAvgIterations) { + v /= iterations; + } + + if (c.flags & Counter::kInvert) { // Invert is *always* last. + v = 1.0 / v; + } + return v; +} + +void Finish(UserCounters* l, IterationCount iterations, double cpu_time, + double num_threads) { + for (auto& c : *l) { + c.second.value = Finish(c.second, iterations, cpu_time, num_threads); + } +} + +void Increment(UserCounters* l, UserCounters const& r) { + // add counters present in both or just in *l + for (auto& c : *l) { + auto it = r.find(c.first); + if (it != r.end()) { + c.second.value = c.second + it->second; + } + } + // add counters present in r, but not in *l + for (auto const& tc : r) { + auto it = l->find(tc.first); + if (it == l->end()) { + (*l)[tc.first] = tc.second; + } + } +} + +bool SameNames(UserCounters const& l, UserCounters const& r) { + if (&l == &r) return true; + if (l.size() != r.size()) { + return false; + } + for (auto const& c : l) { + if (r.find(c.first) == r.end()) { + return false; + } + } + return true; +} + +} // end namespace internal +} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/counter.h b/libcxx/utils/google-benchmark/src/counter.h new file mode 100644 index 000000000000..1f5a58e31f0c --- /dev/null +++ b/libcxx/utils/google-benchmark/src/counter.h @@ -0,0 +1,32 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BENCHMARK_COUNTER_H_ +#define BENCHMARK_COUNTER_H_ + +#include "benchmark/benchmark.h" + +namespace benchmark { + +// these counter-related functions are hidden to reduce API surface. +namespace internal { +void Finish(UserCounters* l, IterationCount iterations, double time, + double num_threads); +void Increment(UserCounters* l, UserCounters const& r); +bool SameNames(UserCounters const& l, UserCounters const& r); +} // end namespace internal + +} // end namespace benchmark + +#endif // BENCHMARK_COUNTER_H_ diff --git a/libcxx/utils/google-benchmark/src/csv_reporter.cc b/libcxx/utils/google-benchmark/src/csv_reporter.cc new file mode 100644 index 000000000000..af2c18fc8a6e --- /dev/null +++ b/libcxx/utils/google-benchmark/src/csv_reporter.cc @@ -0,0 +1,154 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark/benchmark.h" +#include "complexity.h" + +#include +#include +#include +#include +#include +#include + +#include "check.h" +#include "string_util.h" +#include "timers.h" + +// File format reference: http://edoceo.com/utilitas/csv-file-format. + +namespace benchmark { + +namespace { +std::vector elements = { + "name", "iterations", "real_time", "cpu_time", + "time_unit", "bytes_per_second", "items_per_second", "label", + "error_occurred", "error_message"}; +} // namespace + +std::string CsvEscape(const std::string & s) { + std::string tmp; + tmp.reserve(s.size() + 2); + for (char c : s) { + switch (c) { + case '"' : tmp += "\"\""; break; + default : tmp += c; break; + } + } + return '"' + tmp + '"'; +} + +bool CSVReporter::ReportContext(const Context& context) { + PrintBasicContext(&GetErrorStream(), context); + return true; +} + +void CSVReporter::ReportRuns(const std::vector& reports) { + std::ostream& Out = GetOutputStream(); + + if (!printed_header_) { + // save the names of all the user counters + for (const auto& run : reports) { + for (const auto& cnt : run.counters) { + if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second") + continue; + user_counter_names_.insert(cnt.first); + } + } + + // print the header + for (auto B = elements.begin(); B != elements.end();) { + Out << *B++; + if (B != elements.end()) Out << ","; + } + for (auto B = user_counter_names_.begin(); + B != user_counter_names_.end();) { + Out << ",\"" << *B++ << "\""; + } + Out << "\n"; + + printed_header_ = true; + } else { + // check that all the current counters are saved in the name set + for (const auto& run : reports) { + for (const auto& cnt : run.counters) { + if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second") + continue; + CHECK(user_counter_names_.find(cnt.first) != user_counter_names_.end()) + << "All counters must be present in each run. " + << "Counter named \"" << cnt.first + << "\" was not in a run after being added to the header"; + } + } + } + + // print results for each run + for (const auto& run : reports) { + PrintRunData(run); + } +} + +void CSVReporter::PrintRunData(const Run& run) { + std::ostream& Out = GetOutputStream(); + Out << CsvEscape(run.benchmark_name()) << ","; + if (run.error_occurred) { + Out << std::string(elements.size() - 3, ','); + Out << "true,"; + Out << CsvEscape(run.error_message) << "\n"; + return; + } + + // Do not print iteration on bigO and RMS report + if (!run.report_big_o && !run.report_rms) { + Out << run.iterations; + } + Out << ","; + + Out << run.GetAdjustedRealTime() << ","; + Out << run.GetAdjustedCPUTime() << ","; + + // Do not print timeLabel on bigO and RMS report + if (run.report_big_o) { + Out << GetBigOString(run.complexity); + } else if (!run.report_rms) { + Out << GetTimeUnitString(run.time_unit); + } + Out << ","; + + if (run.counters.find("bytes_per_second") != run.counters.end()) { + Out << run.counters.at("bytes_per_second"); + } + Out << ","; + if (run.counters.find("items_per_second") != run.counters.end()) { + Out << run.counters.at("items_per_second"); + } + Out << ","; + if (!run.report_label.empty()) { + Out << CsvEscape(run.report_label); + } + Out << ",,"; // for error_occurred and error_message + + // Print user counters + for (const auto& ucn : user_counter_names_) { + auto it = run.counters.find(ucn); + if (it == run.counters.end()) { + Out << ","; + } else { + Out << "," << it->second; + } + } + Out << '\n'; +} + +} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/cycleclock.h b/libcxx/utils/google-benchmark/src/cycleclock.h new file mode 100644 index 000000000000..f22ca9f7d299 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/cycleclock.h @@ -0,0 +1,225 @@ +// ---------------------------------------------------------------------- +// CycleClock +// A CycleClock tells you the current time in Cycles. The "time" +// is actually time since power-on. This is like time() but doesn't +// involve a system call and is much more precise. +// +// NOTE: Not all cpu/platform/kernel combinations guarantee that this +// clock increments at a constant rate or is synchronized across all logical +// cpus in a system. +// +// If you need the above guarantees, please consider using a different +// API. There are efforts to provide an interface which provides a millisecond +// granularity and implemented as a memory read. A memory read is generally +// cheaper than the CycleClock for many architectures. +// +// Also, in some out of order CPU implementations, the CycleClock is not +// serializing. So if you're trying to count at cycles granularity, your +// data might be inaccurate due to out of order instruction execution. +// ---------------------------------------------------------------------- + +#ifndef BENCHMARK_CYCLECLOCK_H_ +#define BENCHMARK_CYCLECLOCK_H_ + +#include + +#include "benchmark/benchmark.h" +#include "internal_macros.h" + +#if defined(BENCHMARK_OS_MACOSX) +#include +#endif +// For MSVC, we want to use '_asm rdtsc' when possible (since it works +// with even ancient MSVC compilers), and when not possible the +// __rdtsc intrinsic, declared in . Unfortunately, in some +// environments, and have conflicting +// declarations of some other intrinsics, breaking compilation. +// Therefore, we simply declare __rdtsc ourselves. See also +// http://connect.microsoft.com/VisualStudio/feedback/details/262047 +#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) +extern "C" uint64_t __rdtsc(); +#pragma intrinsic(__rdtsc) +#endif + +#if !defined(BENCHMARK_OS_WINDOWS) || defined(BENCHMARK_OS_MINGW) +#include +#include +#endif + +#ifdef BENCHMARK_OS_EMSCRIPTEN +#include +#endif + +namespace benchmark { +// NOTE: only i386 and x86_64 have been well tested. +// PPC, sparc, alpha, and ia64 are based on +// http://peter.kuscsik.com/wordpress/?p=14 +// with modifications by m3b. See also +// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h +namespace cycleclock { +// This should return the number of cycles since power-on. Thread-safe. +inline BENCHMARK_ALWAYS_INLINE int64_t Now() { +#if defined(BENCHMARK_OS_MACOSX) + // this goes at the top because we need ALL Macs, regardless of + // architecture, to return the number of "mach time units" that + // have passed since startup. See sysinfo.cc where + // InitializeSystemInfo() sets the supposed cpu clock frequency of + // macs to the number of mach time units per second, not actual + // CPU clock frequency (which can change in the face of CPU + // frequency scaling). Also note that when the Mac sleeps, this + // counter pauses; it does not continue counting, nor does it + // reset to zero. + return mach_absolute_time(); +#elif defined(BENCHMARK_OS_EMSCRIPTEN) + // this goes above x86-specific code because old versions of Emscripten + // define __x86_64__, although they have nothing to do with it. + return static_cast(emscripten_get_now() * 1e+6); +#elif defined(__i386__) + int64_t ret; + __asm__ volatile("rdtsc" : "=A"(ret)); + return ret; +#elif defined(__x86_64__) || defined(__amd64__) + uint64_t low, high; + __asm__ volatile("rdtsc" : "=a"(low), "=d"(high)); + return (high << 32) | low; +#elif defined(__powerpc__) || defined(__ppc__) + // This returns a time-base, which is not always precisely a cycle-count. +#if defined(__powerpc64__) || defined(__ppc64__) + int64_t tb; + asm volatile("mfspr %0, 268" : "=r"(tb)); + return tb; +#else + uint32_t tbl, tbu0, tbu1; + asm volatile( + "mftbu %0\n" + "mftb %1\n" + "mftbu %2" + : "=r"(tbu0), "=r"(tbl), "=r"(tbu1)); + tbl &= -static_cast(tbu0 == tbu1); + // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is no longer needed) + return (static_cast(tbu1) << 32) | tbl; +#endif +#elif defined(__sparc__) + int64_t tick; + asm(".byte 0x83, 0x41, 0x00, 0x00"); + asm("mov %%g1, %0" : "=r"(tick)); + return tick; +#elif defined(__ia64__) + int64_t itc; + asm("mov %0 = ar.itc" : "=r"(itc)); + return itc; +#elif defined(COMPILER_MSVC) && defined(_M_IX86) + // Older MSVC compilers (like 7.x) don't seem to support the + // __rdtsc intrinsic properly, so I prefer to use _asm instead + // when I know it will work. Otherwise, I'll use __rdtsc and hope + // the code is being compiled with a non-ancient compiler. + _asm rdtsc +#elif defined(COMPILER_MSVC) && defined(_M_ARM64) + // See https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=vs-2019 + // and https://reviews.llvm.org/D53115 + int64_t virtual_timer_value; + virtual_timer_value = _ReadStatusReg(ARM64_CNTVCT); + return virtual_timer_value; +#elif defined(COMPILER_MSVC) + return __rdtsc(); +#elif defined(BENCHMARK_OS_NACL) + // Native Client validator on x86/x86-64 allows RDTSC instructions, + // and this case is handled above. Native Client validator on ARM + // rejects MRC instructions (used in the ARM-specific sequence below), + // so we handle it here. Portable Native Client compiles to + // architecture-agnostic bytecode, which doesn't provide any + // cycle counter access mnemonics. + + // Native Client does not provide any API to access cycle counter. + // Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday + // because is provides nanosecond resolution (which is noticable at + // least for PNaCl modules running on x86 Mac & Linux). + // Initialize to always return 0 if clock_gettime fails. + struct timespec ts = {0, 0}; + clock_gettime(CLOCK_MONOTONIC, &ts); + return static_cast(ts.tv_sec) * 1000000000 + ts.tv_nsec; +#elif defined(__aarch64__) + // System timer of ARMv8 runs at a different frequency than the CPU's. + // The frequency is fixed, typically in the range 1-50MHz. It can be + // read at CNTFRQ special register. We assume the OS has set up + // the virtual timer properly. + int64_t virtual_timer_value; + asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); + return virtual_timer_value; +#elif defined(__ARM_ARCH) + // V6 is the earliest arch that has a standard cyclecount + // Native Client validator doesn't allow MRC instructions. +#if (__ARM_ARCH >= 6) + uint32_t pmccntr; + uint32_t pmuseren; + uint32_t pmcntenset; + // Read the user mode perf monitor counter access permissions. + asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren)); + if (pmuseren & 1) { // Allows reading perfmon counters for user mode code. + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset)); + if (pmcntenset & 0x80000000ul) { // Is it counting? + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr)); + // The counter is set up to count every 64th cycle + return static_cast(pmccntr) * 64; // Should optimize to << 6 + } + } +#endif + struct timeval tv; + gettimeofday(&tv, nullptr); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; +#elif defined(__mips__) || defined(__m68k__) + // mips apparently only allows rdtsc for superusers, so we fall + // back to gettimeofday. It's possible clock_gettime would be better. + struct timeval tv; + gettimeofday(&tv, nullptr); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; +#elif defined(__loongarch__) + struct timeval tv; + gettimeofday(&tv, nullptr); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; +#elif defined(__s390__) // Covers both s390 and s390x. + // Return the CPU clock. + uint64_t tsc; +#if defined(BENCHMARK_OS_ZOS) && defined(COMPILER_IBMXL) + // z/OS XL compiler HLASM syntax. + asm(" stck %0" : "=m"(tsc) : : "cc"); +#else + asm("stck %0" : "=Q"(tsc) : : "cc"); +#endif + return tsc; +#elif defined(__riscv) // RISC-V + // Use RDCYCLE (and RDCYCLEH on riscv32) +#if __riscv_xlen == 32 + uint32_t cycles_lo, cycles_hi0, cycles_hi1; + // This asm also includes the PowerPC overflow handling strategy, as above. + // Implemented in assembly because Clang insisted on branching. + asm volatile( + "rdcycleh %0\n" + "rdcycle %1\n" + "rdcycleh %2\n" + "sub %0, %0, %2\n" + "seqz %0, %0\n" + "sub %0, zero, %0\n" + "and %1, %1, %0\n" + : "=r"(cycles_hi0), "=r"(cycles_lo), "=r"(cycles_hi1)); + return (static_cast(cycles_hi1) << 32) | cycles_lo; +#else + uint64_t cycles; + asm volatile("rdcycle %0" : "=r"(cycles)); + return cycles; +#endif +#elif defined(__e2k__) || defined(__elbrus__) + struct timeval tv; + gettimeofday(&tv, nullptr); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; +#else +// The soft failover to a generic implementation is automatic only for ARM. +// For other platforms the developer is expected to make an attempt to create +// a fast implementation and use generic version if nothing better is available. +#error You need to define CycleTimer for your OS and CPU +#endif +} +} // end namespace cycleclock +} // end namespace benchmark + +#endif // BENCHMARK_CYCLECLOCK_H_ diff --git a/libcxx/utils/google-benchmark/src/internal_macros.h b/libcxx/utils/google-benchmark/src/internal_macros.h new file mode 100644 index 000000000000..91f367b894bc --- /dev/null +++ b/libcxx/utils/google-benchmark/src/internal_macros.h @@ -0,0 +1,102 @@ +#ifndef BENCHMARK_INTERNAL_MACROS_H_ +#define BENCHMARK_INTERNAL_MACROS_H_ + +#include "benchmark/benchmark.h" + +/* Needed to detect STL */ +#include + +// clang-format off + +#ifndef __has_feature +#define __has_feature(x) 0 +#endif + +#if defined(__clang__) + #if defined(__ibmxl__) + #if !defined(COMPILER_IBMXL) + #define COMPILER_IBMXL + #endif + #elif !defined(COMPILER_CLANG) + #define COMPILER_CLANG + #endif +#elif defined(_MSC_VER) + #if !defined(COMPILER_MSVC) + #define COMPILER_MSVC + #endif +#elif defined(__GNUC__) + #if !defined(COMPILER_GCC) + #define COMPILER_GCC + #endif +#endif + +#if __has_feature(cxx_attributes) + #define BENCHMARK_NORETURN [[noreturn]] +#elif defined(__GNUC__) + #define BENCHMARK_NORETURN __attribute__((noreturn)) +#elif defined(COMPILER_MSVC) + #define BENCHMARK_NORETURN __declspec(noreturn) +#else + #define BENCHMARK_NORETURN +#endif + +#if defined(__CYGWIN__) + #define BENCHMARK_OS_CYGWIN 1 +#elif defined(_WIN32) + #define BENCHMARK_OS_WINDOWS 1 + #if defined(__MINGW32__) + #define BENCHMARK_OS_MINGW 1 + #endif +#elif defined(__APPLE__) + #define BENCHMARK_OS_APPLE 1 + #include "TargetConditionals.h" + #if defined(TARGET_OS_MAC) + #define BENCHMARK_OS_MACOSX 1 + #if defined(TARGET_OS_IPHONE) + #define BENCHMARK_OS_IOS 1 + #endif + #endif +#elif defined(__FreeBSD__) + #define BENCHMARK_OS_FREEBSD 1 +#elif defined(__NetBSD__) + #define BENCHMARK_OS_NETBSD 1 +#elif defined(__OpenBSD__) + #define BENCHMARK_OS_OPENBSD 1 +#elif defined(__DragonFly__) + #define BENCHMARK_OS_DRAGONFLY 1 +#elif defined(__linux__) + #define BENCHMARK_OS_LINUX 1 +#elif defined(__native_client__) + #define BENCHMARK_OS_NACL 1 +#elif defined(__EMSCRIPTEN__) + #define BENCHMARK_OS_EMSCRIPTEN 1 +#elif defined(__rtems__) + #define BENCHMARK_OS_RTEMS 1 +#elif defined(__Fuchsia__) +#define BENCHMARK_OS_FUCHSIA 1 +#elif defined (__SVR4) && defined (__sun) +#define BENCHMARK_OS_SOLARIS 1 +#elif defined(__QNX__) +#define BENCHMARK_OS_QNX 1 +#elif defined(__MVS__) +#define BENCHMARK_OS_ZOS 1 +#endif + +#if defined(__ANDROID__) && defined(__GLIBCXX__) +#define BENCHMARK_STL_ANDROID_GNUSTL 1 +#endif + +#if !__has_feature(cxx_exceptions) && !defined(__cpp_exceptions) \ + && !defined(__EXCEPTIONS) + #define BENCHMARK_HAS_NO_EXCEPTIONS +#endif + +#if defined(COMPILER_CLANG) || defined(COMPILER_GCC) + #define BENCHMARK_MAYBE_UNUSED __attribute__((unused)) +#else + #define BENCHMARK_MAYBE_UNUSED +#endif + +// clang-format on + +#endif // BENCHMARK_INTERNAL_MACROS_H_ diff --git a/libcxx/utils/google-benchmark/src/json_reporter.cc b/libcxx/utils/google-benchmark/src/json_reporter.cc new file mode 100644 index 000000000000..26898456f854 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/json_reporter.cc @@ -0,0 +1,269 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark/benchmark.h" +#include "complexity.h" + +#include +#include +#include +#include // for setprecision +#include +#include +#include +#include +#include + +#include "string_util.h" +#include "timers.h" + +namespace benchmark { +namespace internal { +extern std::map* global_context; +} + +namespace { + +std::string StrEscape(const std::string & s) { + std::string tmp; + tmp.reserve(s.size()); + for (char c : s) { + switch (c) { + case '\b': tmp += "\\b"; break; + case '\f': tmp += "\\f"; break; + case '\n': tmp += "\\n"; break; + case '\r': tmp += "\\r"; break; + case '\t': tmp += "\\t"; break; + case '\\': tmp += "\\\\"; break; + case '"' : tmp += "\\\""; break; + default : tmp += c; break; + } + } + return tmp; +} + +std::string FormatKV(std::string const& key, std::string const& value) { + return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str()); +} + +std::string FormatKV(std::string const& key, const char* value) { + return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str()); +} + +std::string FormatKV(std::string const& key, bool value) { + return StrFormat("\"%s\": %s", StrEscape(key).c_str(), value ? "true" : "false"); +} + +std::string FormatKV(std::string const& key, int64_t value) { + std::stringstream ss; + ss << '"' << StrEscape(key) << "\": " << value; + return ss.str(); +} + +std::string FormatKV(std::string const& key, IterationCount value) { + std::stringstream ss; + ss << '"' << StrEscape(key) << "\": " << value; + return ss.str(); +} + +std::string FormatKV(std::string const& key, double value) { + std::stringstream ss; + ss << '"' << StrEscape(key) << "\": "; + + if (std::isnan(value)) + ss << (value < 0 ? "-" : "") << "NaN"; + else if (std::isinf(value)) + ss << (value < 0 ? "-" : "") << "Infinity"; + else { + const auto max_digits10 = + std::numeric_limits::max_digits10; + const auto max_fractional_digits10 = max_digits10 - 1; + ss << std::scientific << std::setprecision(max_fractional_digits10) + << value; + } + return ss.str(); +} + +int64_t RoundDouble(double v) { return std::lround(v); } + +} // end namespace + +bool JSONReporter::ReportContext(const Context& context) { + std::ostream& out = GetOutputStream(); + + out << "{\n"; + std::string inner_indent(2, ' '); + + // Open context block and print context information. + out << inner_indent << "\"context\": {\n"; + std::string indent(4, ' '); + + std::string walltime_value = LocalDateTimeString(); + out << indent << FormatKV("date", walltime_value) << ",\n"; + + out << indent << FormatKV("host_name", context.sys_info.name) << ",\n"; + + if (Context::executable_name) { + out << indent << FormatKV("executable", Context::executable_name) << ",\n"; + } + + CPUInfo const& info = context.cpu_info; + out << indent << FormatKV("num_cpus", static_cast(info.num_cpus)) + << ",\n"; + out << indent + << FormatKV("mhz_per_cpu", + RoundDouble(info.cycles_per_second / 1000000.0)) + << ",\n"; + if (CPUInfo::Scaling::UNKNOWN != info.scaling) { + out << indent << FormatKV("cpu_scaling_enabled", info.scaling == CPUInfo::Scaling::ENABLED ? true : false) + << ",\n"; + } + + out << indent << "\"caches\": [\n"; + indent = std::string(6, ' '); + std::string cache_indent(8, ' '); + for (size_t i = 0; i < info.caches.size(); ++i) { + auto& CI = info.caches[i]; + out << indent << "{\n"; + out << cache_indent << FormatKV("type", CI.type) << ",\n"; + out << cache_indent << FormatKV("level", static_cast(CI.level)) + << ",\n"; + out << cache_indent + << FormatKV("size", static_cast(CI.size)) << ",\n"; + out << cache_indent + << FormatKV("num_sharing", static_cast(CI.num_sharing)) + << "\n"; + out << indent << "}"; + if (i != info.caches.size() - 1) out << ","; + out << "\n"; + } + indent = std::string(4, ' '); + out << indent << "],\n"; + out << indent << "\"load_avg\": ["; + for (auto it = info.load_avg.begin(); it != info.load_avg.end();) { + out << *it++; + if (it != info.load_avg.end()) out << ","; + } + out << "],\n"; + +#if defined(NDEBUG) + const char build_type[] = "release"; +#else + const char build_type[] = "debug"; +#endif + out << indent << FormatKV("library_build_type", build_type) << "\n"; + + if (internal::global_context != nullptr) { + for (const auto& kv: *internal::global_context) { + out << indent << FormatKV(kv.first, kv.second) << "\n"; + } + } + + // Close context block and open the list of benchmarks. + out << inner_indent << "},\n"; + out << inner_indent << "\"benchmarks\": [\n"; + return true; +} + +void JSONReporter::ReportRuns(std::vector const& reports) { + if (reports.empty()) { + return; + } + std::string indent(4, ' '); + std::ostream& out = GetOutputStream(); + if (!first_report_) { + out << ",\n"; + } + first_report_ = false; + + for (auto it = reports.begin(); it != reports.end(); ++it) { + out << indent << "{\n"; + PrintRunData(*it); + out << indent << '}'; + auto it_cp = it; + if (++it_cp != reports.end()) { + out << ",\n"; + } + } +} + +void JSONReporter::Finalize() { + // Close the list of benchmarks and the top level object. + GetOutputStream() << "\n ]\n}\n"; +} + +void JSONReporter::PrintRunData(Run const& run) { + std::string indent(6, ' '); + std::ostream& out = GetOutputStream(); + out << indent << FormatKV("name", run.benchmark_name()) << ",\n"; + out << indent << FormatKV("family_index", run.family_index) << ",\n"; + out << indent + << FormatKV("per_family_instance_index", run.per_family_instance_index) + << ",\n"; + out << indent << FormatKV("run_name", run.run_name.str()) << ",\n"; + out << indent << FormatKV("run_type", [&run]() -> const char* { + switch (run.run_type) { + case BenchmarkReporter::Run::RT_Iteration: + return "iteration"; + case BenchmarkReporter::Run::RT_Aggregate: + return "aggregate"; + } + BENCHMARK_UNREACHABLE(); + }()) << ",\n"; + out << indent << FormatKV("repetitions", run.repetitions) << ",\n"; + if (run.run_type != BenchmarkReporter::Run::RT_Aggregate) { + out << indent << FormatKV("repetition_index", run.repetition_index) + << ",\n"; + } + out << indent << FormatKV("threads", run.threads) << ",\n"; + if (run.run_type == BenchmarkReporter::Run::RT_Aggregate) { + out << indent << FormatKV("aggregate_name", run.aggregate_name) << ",\n"; + } + if (run.error_occurred) { + out << indent << FormatKV("error_occurred", run.error_occurred) << ",\n"; + out << indent << FormatKV("error_message", run.error_message) << ",\n"; + } + if (!run.report_big_o && !run.report_rms) { + out << indent << FormatKV("iterations", run.iterations) << ",\n"; + out << indent << FormatKV("real_time", run.GetAdjustedRealTime()) << ",\n"; + out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime()); + out << ",\n" + << indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit)); + } else if (run.report_big_o) { + out << indent << FormatKV("cpu_coefficient", run.GetAdjustedCPUTime()) + << ",\n"; + out << indent << FormatKV("real_coefficient", run.GetAdjustedRealTime()) + << ",\n"; + out << indent << FormatKV("big_o", GetBigOString(run.complexity)) << ",\n"; + out << indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit)); + } else if (run.report_rms) { + out << indent << FormatKV("rms", run.GetAdjustedCPUTime()); + } + + for (auto& c : run.counters) { + out << ",\n" << indent << FormatKV(c.first, c.second); + } + + if (run.has_memory_result) { + out << ",\n" << indent << FormatKV("allocs_per_iter", run.allocs_per_iter); + out << ",\n" << indent << FormatKV("max_bytes_used", run.max_bytes_used); + } + + if (!run.report_label.empty()) { + out << ",\n" << indent << FormatKV("label", run.report_label); + } + out << '\n'; +} + +} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/log.h b/libcxx/utils/google-benchmark/src/log.h new file mode 100644 index 000000000000..47d0c35c0182 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/log.h @@ -0,0 +1,74 @@ +#ifndef BENCHMARK_LOG_H_ +#define BENCHMARK_LOG_H_ + +#include +#include + +#include "benchmark/benchmark.h" + +namespace benchmark { +namespace internal { + +typedef std::basic_ostream&(EndLType)(std::basic_ostream&); + +class LogType { + friend LogType& GetNullLogInstance(); + friend LogType& GetErrorLogInstance(); + + // FIXME: Add locking to output. + template + friend LogType& operator<<(LogType&, Tp const&); + friend LogType& operator<<(LogType&, EndLType*); + + private: + LogType(std::ostream* out) : out_(out) {} + std::ostream* out_; + BENCHMARK_DISALLOW_COPY_AND_ASSIGN(LogType); +}; + +template +LogType& operator<<(LogType& log, Tp const& value) { + if (log.out_) { + *log.out_ << value; + } + return log; +} + +inline LogType& operator<<(LogType& log, EndLType* m) { + if (log.out_) { + *log.out_ << m; + } + return log; +} + +inline int& LogLevel() { + static int log_level = 0; + return log_level; +} + +inline LogType& GetNullLogInstance() { + static LogType log(nullptr); + return log; +} + +inline LogType& GetErrorLogInstance() { + static LogType log(&std::clog); + return log; +} + +inline LogType& GetLogInstanceForLevel(int level) { + if (level <= LogLevel()) { + return GetErrorLogInstance(); + } + return GetNullLogInstance(); +} + +} // end namespace internal +} // end namespace benchmark + +// clang-format off +#define VLOG(x) \ + (::benchmark::internal::GetLogInstanceForLevel(x) << "-- LOG(" << x << "):" \ + " ") +// clang-format on +#endif diff --git a/libcxx/utils/google-benchmark/src/mutex.h b/libcxx/utils/google-benchmark/src/mutex.h new file mode 100644 index 000000000000..9cc414ec467e --- /dev/null +++ b/libcxx/utils/google-benchmark/src/mutex.h @@ -0,0 +1,155 @@ +#ifndef BENCHMARK_MUTEX_H_ +#define BENCHMARK_MUTEX_H_ + +#include +#include + +#include "check.h" + +// Enable thread safety attributes only with clang. +// The attributes can be safely erased when compiling with other compilers. +#if defined(HAVE_THREAD_SAFETY_ATTRIBUTES) +#define THREAD_ANNOTATION_ATTRIBUTE_(x) __attribute__((x)) +#else +#define THREAD_ANNOTATION_ATTRIBUTE_(x) // no-op +#endif + +#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(capability(x)) + +#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE_(scoped_lockable) + +#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(guarded_by(x)) + +#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(pt_guarded_by(x)) + +#define ACQUIRED_BEFORE(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(acquired_before(__VA_ARGS__)) + +#define ACQUIRED_AFTER(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(acquired_after(__VA_ARGS__)) + +#define REQUIRES(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(requires_capability(__VA_ARGS__)) + +#define REQUIRES_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(requires_shared_capability(__VA_ARGS__)) + +#define ACQUIRE(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(acquire_capability(__VA_ARGS__)) + +#define ACQUIRE_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(acquire_shared_capability(__VA_ARGS__)) + +#define RELEASE(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(release_capability(__VA_ARGS__)) + +#define RELEASE_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(release_shared_capability(__VA_ARGS__)) + +#define TRY_ACQUIRE(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_capability(__VA_ARGS__)) + +#define TRY_ACQUIRE_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_shared_capability(__VA_ARGS__)) + +#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE_(locks_excluded(__VA_ARGS__)) + +#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(assert_capability(x)) + +#define ASSERT_SHARED_CAPABILITY(x) \ + THREAD_ANNOTATION_ATTRIBUTE_(assert_shared_capability(x)) + +#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(lock_returned(x)) + +#define NO_THREAD_SAFETY_ANALYSIS \ + THREAD_ANNOTATION_ATTRIBUTE_(no_thread_safety_analysis) + +namespace benchmark { + +typedef std::condition_variable Condition; + +// NOTE: Wrappers for std::mutex and std::unique_lock are provided so that +// we can annotate them with thread safety attributes and use the +// -Wthread-safety warning with clang. The standard library types cannot be +// used directly because they do not provide the required annotations. +class CAPABILITY("mutex") Mutex { + public: + Mutex() {} + + void lock() ACQUIRE() { mut_.lock(); } + void unlock() RELEASE() { mut_.unlock(); } + std::mutex& native_handle() { return mut_; } + + private: + std::mutex mut_; +}; + +class SCOPED_CAPABILITY MutexLock { + typedef std::unique_lock MutexLockImp; + + public: + MutexLock(Mutex& m) ACQUIRE(m) : ml_(m.native_handle()) {} + ~MutexLock() RELEASE() {} + MutexLockImp& native_handle() { return ml_; } + + private: + MutexLockImp ml_; +}; + +class Barrier { + public: + Barrier(int num_threads) : running_threads_(num_threads) {} + + // Called by each thread + bool wait() EXCLUDES(lock_) { + bool last_thread = false; + { + MutexLock ml(lock_); + last_thread = createBarrier(ml); + } + if (last_thread) phase_condition_.notify_all(); + return last_thread; + } + + void removeThread() EXCLUDES(lock_) { + MutexLock ml(lock_); + --running_threads_; + if (entered_ != 0) phase_condition_.notify_all(); + } + + private: + Mutex lock_; + Condition phase_condition_; + int running_threads_; + + // State for barrier management + int phase_number_ = 0; + int entered_ = 0; // Number of threads that have entered this barrier + + // Enter the barrier and wait until all other threads have also + // entered the barrier. Returns iff this is the last thread to + // enter the barrier. + bool createBarrier(MutexLock& ml) REQUIRES(lock_) { + CHECK_LT(entered_, running_threads_); + entered_++; + if (entered_ < running_threads_) { + // Wait for all threads to enter + int phase_number_cp = phase_number_; + auto cb = [this, phase_number_cp]() { + return this->phase_number_ > phase_number_cp || + entered_ == running_threads_; // A thread has aborted in error + }; + phase_condition_.wait(ml.native_handle(), cb); + if (phase_number_ > phase_number_cp) return false; + // else (running_threads_ == entered_) and we are the last thread. + } + // Last thread has reached the barrier + phase_number_++; + entered_ = 0; + return true; + } +}; + +} // end namespace benchmark + +#endif // BENCHMARK_MUTEX_H_ diff --git a/libcxx/utils/google-benchmark/src/perf_counters.cc b/libcxx/utils/google-benchmark/src/perf_counters.cc new file mode 100644 index 000000000000..4ddf0de2502c --- /dev/null +++ b/libcxx/utils/google-benchmark/src/perf_counters.cc @@ -0,0 +1,132 @@ +// Copyright 2021 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "perf_counters.h" + +#include +#include + +#if defined HAVE_LIBPFM +#include "perfmon/pfmlib.h" +#include "perfmon/pfmlib_perf_event.h" +#endif + +namespace benchmark { +namespace internal { + +constexpr size_t PerfCounterValues::kMaxCounters; + +#if defined HAVE_LIBPFM +const bool PerfCounters::kSupported = true; + +bool PerfCounters::Initialize() { return pfm_initialize() == PFM_SUCCESS; } + +PerfCounters PerfCounters::Create( + const std::vector& counter_names) { + if (counter_names.empty()) { + return NoCounters(); + } + if (counter_names.size() > PerfCounterValues::kMaxCounters) { + GetErrorLogInstance() + << counter_names.size() + << " counters were requested. The minimum is 1, the maximum is " + << PerfCounterValues::kMaxCounters << "\n"; + return NoCounters(); + } + std::vector counter_ids(counter_names.size()); + + const int mode = PFM_PLM3; // user mode only + for (size_t i = 0; i < counter_names.size(); ++i) { + const bool is_first = i == 0; + struct perf_event_attr attr{}; + attr.size = sizeof(attr); + const int group_id = !is_first ? counter_ids[0] : -1; + const auto& name = counter_names[i]; + if (name.empty()) { + GetErrorLogInstance() << "A counter name was the empty string\n"; + return NoCounters(); + } + pfm_perf_encode_arg_t arg{}; + arg.attr = &attr; + + const int pfm_get = + pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT, &arg); + if (pfm_get != PFM_SUCCESS) { + GetErrorLogInstance() << "Unknown counter name: " << name << "\n"; + return NoCounters(); + } + attr.disabled = is_first; + // Note: the man page for perf_event_create suggests inerit = true and + // read_format = PERF_FORMAT_GROUP don't work together, but that's not the + // case. + attr.inherit = true; + attr.pinned = is_first; + attr.exclude_kernel = true; + attr.exclude_user = false; + attr.exclude_hv = true; + // Read all counters in one read. + attr.read_format = PERF_FORMAT_GROUP; + + int id = -1; + static constexpr size_t kNrOfSyscallRetries = 5; + // Retry syscall as it was interrupted often (b/64774091). + for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries; + ++num_retries) { + id = perf_event_open(&attr, 0, -1, group_id, 0); + if (id >= 0 || errno != EINTR) { + break; + } + } + if (id < 0) { + GetErrorLogInstance() + << "Failed to get a file descriptor for " << name << "\n"; + return NoCounters(); + } + + counter_ids[i] = id; + } + if (ioctl(counter_ids[0], PERF_EVENT_IOC_ENABLE) != 0) { + GetErrorLogInstance() << "Failed to start counters\n"; + return NoCounters(); + } + + return PerfCounters(counter_names, std::move(counter_ids)); +} + +PerfCounters::~PerfCounters() { + if (counter_ids_.empty()) { + return; + } + ioctl(counter_ids_[0], PERF_EVENT_IOC_DISABLE); + for (int fd : counter_ids_) { + close(fd); + } +} +#else // defined HAVE_LIBPFM +const bool PerfCounters::kSupported = false; + +bool PerfCounters::Initialize() { return false; } + +PerfCounters PerfCounters::Create( + const std::vector& counter_names) { + if (!counter_names.empty()) { + GetErrorLogInstance() << "Performance counters not supported."; + } + return NoCounters(); +} + +PerfCounters::~PerfCounters() = default; +#endif // defined HAVE_LIBPFM +} // namespace internal +} // namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/perf_counters.h b/libcxx/utils/google-benchmark/src/perf_counters.h new file mode 100644 index 000000000000..b6629b99070b --- /dev/null +++ b/libcxx/utils/google-benchmark/src/perf_counters.h @@ -0,0 +1,172 @@ +// Copyright 2021 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BENCHMARK_PERF_COUNTERS_H +#define BENCHMARK_PERF_COUNTERS_H + +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "check.h" +#include "log.h" + +#ifndef BENCHMARK_OS_WINDOWS +#include +#endif + +namespace benchmark { +namespace internal { + +// Typically, we can only read a small number of counters. There is also a +// padding preceding counter values, when reading multiple counters with one +// syscall (which is desirable). PerfCounterValues abstracts these details. +// The implementation ensures the storage is inlined, and allows 0-based +// indexing into the counter values. +// The object is used in conjunction with a PerfCounters object, by passing it +// to Snapshot(). The values are populated such that +// perfCounters->names()[i]'s value is obtained at position i (as given by +// operator[]) of this object. +class PerfCounterValues { + public: + explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) { + CHECK_LE(nr_counters_, kMaxCounters); + } + + uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; } + + static constexpr size_t kMaxCounters = 3; + + private: + friend class PerfCounters; + // Get the byte buffer in which perf counters can be captured. + // This is used by PerfCounters::Read + std::pair get_data_buffer() { + return {reinterpret_cast(values_.data()), + sizeof(uint64_t) * (kPadding + nr_counters_)}; + } + + static constexpr size_t kPadding = 1; + std::array values_; + const size_t nr_counters_; +}; + +// Collect PMU counters. The object, once constructed, is ready to be used by +// calling read(). PMU counter collection is enabled from the time create() is +// called, to obtain the object, until the object's destructor is called. +class PerfCounters final { + public: + // True iff this platform supports performance counters. + static const bool kSupported; + + bool IsValid() const { return is_valid_; } + static PerfCounters NoCounters() { return PerfCounters(); } + + ~PerfCounters(); + PerfCounters(PerfCounters&&) = default; + PerfCounters(const PerfCounters&) = delete; + + // Platform-specific implementations may choose to do some library + // initialization here. + static bool Initialize(); + + // Return a PerfCounters object ready to read the counters with the names + // specified. The values are user-mode only. The counter name format is + // implementation and OS specific. + // TODO: once we move to C++-17, this should be a std::optional, and then the + // IsValid() boolean can be dropped. + static PerfCounters Create(const std::vector& counter_names); + + // Take a snapshot of the current value of the counters into the provided + // valid PerfCounterValues storage. The values are populated such that: + // names()[i]'s value is (*values)[i] + BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const { +#ifndef BENCHMARK_OS_WINDOWS + assert(values != nullptr); + assert(IsValid()); + auto buffer = values->get_data_buffer(); + auto read_bytes = ::read(counter_ids_[0], buffer.first, buffer.second); + return static_cast(read_bytes) == buffer.second; +#else + (void)values; + return false; +#endif + } + + const std::vector& names() const { return counter_names_; } + size_t num_counters() const { return counter_names_.size(); } + + private: + PerfCounters(const std::vector& counter_names, + std::vector&& counter_ids) + : counter_ids_(std::move(counter_ids)), + counter_names_(counter_names), + is_valid_(true) {} + PerfCounters() : is_valid_(false) {} + + std::vector counter_ids_; + const std::vector counter_names_; + const bool is_valid_; +}; + +// Typical usage of the above primitives. +class PerfCountersMeasurement final { + public: + PerfCountersMeasurement(PerfCounters&& c) + : counters_(std::move(c)), + start_values_(counters_.IsValid() ? counters_.names().size() : 0), + end_values_(counters_.IsValid() ? counters_.names().size() : 0) {} + + bool IsValid() const { return counters_.IsValid(); } + + BENCHMARK_ALWAYS_INLINE void Start() { + assert(IsValid()); + // Tell the compiler to not move instructions above/below where we take + // the snapshot. + ClobberMemory(); + counters_.Snapshot(&start_values_); + ClobberMemory(); + } + + BENCHMARK_ALWAYS_INLINE std::vector> + StopAndGetMeasurements() { + assert(IsValid()); + // Tell the compiler to not move instructions above/below where we take + // the snapshot. + ClobberMemory(); + counters_.Snapshot(&end_values_); + ClobberMemory(); + + std::vector> ret; + for (size_t i = 0; i < counters_.names().size(); ++i) { + double measurement = static_cast(end_values_[i]) - + static_cast(start_values_[i]); + ret.push_back({counters_.names()[i], measurement}); + } + return ret; + } + + private: + PerfCounters counters_; + PerfCounterValues start_values_; + PerfCounterValues end_values_; +}; + +BENCHMARK_UNUSED static bool perf_init_anchor = PerfCounters::Initialize(); + +} // namespace internal +} // namespace benchmark + +#endif // BENCHMARK_PERF_COUNTERS_H diff --git a/libcxx/utils/google-benchmark/src/re.h b/libcxx/utils/google-benchmark/src/re.h new file mode 100644 index 000000000000..fbe25037b463 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/re.h @@ -0,0 +1,158 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BENCHMARK_RE_H_ +#define BENCHMARK_RE_H_ + +#include "internal_macros.h" + +// clang-format off + +#if !defined(HAVE_STD_REGEX) && \ + !defined(HAVE_GNU_POSIX_REGEX) && \ + !defined(HAVE_POSIX_REGEX) + // No explicit regex selection; detect based on builtin hints. + #if defined(BENCHMARK_OS_LINUX) || defined(BENCHMARK_OS_APPLE) + #define HAVE_POSIX_REGEX 1 + #elif __cplusplus >= 199711L + #define HAVE_STD_REGEX 1 + #endif +#endif + +// Prefer C regex libraries when compiling w/o exceptions so that we can +// correctly report errors. +#if defined(BENCHMARK_HAS_NO_EXCEPTIONS) && \ + defined(BENCHMARK_HAVE_STD_REGEX) && \ + (defined(HAVE_GNU_POSIX_REGEX) || defined(HAVE_POSIX_REGEX)) + #undef HAVE_STD_REGEX +#endif + +#if defined(HAVE_STD_REGEX) + #include +#elif defined(HAVE_GNU_POSIX_REGEX) + #include +#elif defined(HAVE_POSIX_REGEX) + #include +#else +#error No regular expression backend was found! +#endif + +// clang-format on + +#include + +#include "check.h" + +namespace benchmark { + +// A wrapper around the POSIX regular expression API that provides automatic +// cleanup +class Regex { + public: + Regex() : init_(false) {} + + ~Regex(); + + // Compile a regular expression matcher from spec. Returns true on success. + // + // On failure (and if error is not nullptr), error is populated with a human + // readable error message if an error occurs. + bool Init(const std::string& spec, std::string* error); + + // Returns whether str matches the compiled regular expression. + bool Match(const std::string& str); + + private: + bool init_; +// Underlying regular expression object +#if defined(HAVE_STD_REGEX) + std::regex re_; +#elif defined(HAVE_POSIX_REGEX) || defined(HAVE_GNU_POSIX_REGEX) + regex_t re_; +#else +#error No regular expression backend implementation available +#endif +}; + +#if defined(HAVE_STD_REGEX) + +inline bool Regex::Init(const std::string& spec, std::string* error) { +#ifdef BENCHMARK_HAS_NO_EXCEPTIONS + ((void)error); // suppress unused warning +#else + try { +#endif + re_ = std::regex(spec, std::regex_constants::extended); + init_ = true; +#ifndef BENCHMARK_HAS_NO_EXCEPTIONS +} +catch (const std::regex_error& e) { + if (error) { + *error = e.what(); + } +} +#endif +return init_; +} + +inline Regex::~Regex() {} + +inline bool Regex::Match(const std::string& str) { + if (!init_) { + return false; + } + return std::regex_search(str, re_); +} + +#else +inline bool Regex::Init(const std::string& spec, std::string* error) { + int ec = regcomp(&re_, spec.c_str(), REG_EXTENDED | REG_NOSUB); + if (ec != 0) { + if (error) { + size_t needed = regerror(ec, &re_, nullptr, 0); + char* errbuf = new char[needed]; + regerror(ec, &re_, errbuf, needed); + + // regerror returns the number of bytes necessary to null terminate + // the string, so we move that when assigning to error. + CHECK_NE(needed, 0); + error->assign(errbuf, needed - 1); + + delete[] errbuf; + } + + return false; + } + + init_ = true; + return true; +} + +inline Regex::~Regex() { + if (init_) { + regfree(&re_); + } +} + +inline bool Regex::Match(const std::string& str) { + if (!init_) { + return false; + } + return regexec(&re_, str.c_str(), 0, nullptr, 0) == 0; +} +#endif + +} // end namespace benchmark + +#endif // BENCHMARK_RE_H_ diff --git a/libcxx/utils/google-benchmark/src/reporter.cc b/libcxx/utils/google-benchmark/src/reporter.cc new file mode 100644 index 000000000000..14dd40dc72f4 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/reporter.cc @@ -0,0 +1,116 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark/benchmark.h" +#include "timers.h" + +#include + +#include +#include +#include +#include +#include + +#include "check.h" +#include "string_util.h" + +namespace benchmark { +namespace internal { +extern std::map* global_context; +} + +BenchmarkReporter::BenchmarkReporter() + : output_stream_(&std::cout), error_stream_(&std::cerr) {} + +BenchmarkReporter::~BenchmarkReporter() {} + +void BenchmarkReporter::PrintBasicContext(std::ostream *out, + Context const &context) { + CHECK(out) << "cannot be null"; + auto &Out = *out; + + Out << LocalDateTimeString() << "\n"; + + if (context.executable_name) + Out << "Running " << context.executable_name << "\n"; + + const CPUInfo &info = context.cpu_info; + Out << "Run on (" << info.num_cpus << " X " + << (info.cycles_per_second / 1000000.0) << " MHz CPU " + << ((info.num_cpus > 1) ? "s" : "") << ")\n"; + if (info.caches.size() != 0) { + Out << "CPU Caches:\n"; + for (auto &CInfo : info.caches) { + Out << " L" << CInfo.level << " " << CInfo.type << " " + << (CInfo.size / 1024) << " KiB"; + if (CInfo.num_sharing != 0) + Out << " (x" << (info.num_cpus / CInfo.num_sharing) << ")"; + Out << "\n"; + } + } + if (!info.load_avg.empty()) { + Out << "Load Average: "; + for (auto It = info.load_avg.begin(); It != info.load_avg.end();) { + Out << StrFormat("%.2f", *It++); + if (It != info.load_avg.end()) Out << ", "; + } + Out << "\n"; + } + + if (internal::global_context != nullptr) { + for (const auto& kv: *internal::global_context) { + Out << kv.first << ": " << kv.second << "\n"; + } + } + + if (CPUInfo::Scaling::ENABLED == info.scaling) { + Out << "***WARNING*** CPU scaling is enabled, the benchmark " + "real time measurements may be noisy and will incur extra " + "overhead.\n"; + } + +#ifndef NDEBUG + Out << "***WARNING*** Library was built as DEBUG. Timings may be " + "affected.\n"; +#endif +} + +// No initializer because it's already initialized to NULL. +const char *BenchmarkReporter::Context::executable_name; + +BenchmarkReporter::Context::Context() + : cpu_info(CPUInfo::Get()), sys_info(SystemInfo::Get()) {} + +std::string BenchmarkReporter::Run::benchmark_name() const { + std::string name = run_name.str(); + if (run_type == RT_Aggregate) { + name += "_" + aggregate_name; + } + return name; +} + +double BenchmarkReporter::Run::GetAdjustedRealTime() const { + double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit); + if (iterations != 0) new_time /= static_cast(iterations); + return new_time; +} + +double BenchmarkReporter::Run::GetAdjustedCPUTime() const { + double new_time = cpu_accumulated_time * GetTimeUnitMultiplier(time_unit); + if (iterations != 0) new_time /= static_cast(iterations); + return new_time; +} + +} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/sleep.cc b/libcxx/utils/google-benchmark/src/sleep.cc new file mode 100644 index 000000000000..4609d540eade --- /dev/null +++ b/libcxx/utils/google-benchmark/src/sleep.cc @@ -0,0 +1,67 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "sleep.h" + +#include +#include +#include + +#include "internal_macros.h" + +#ifdef BENCHMARK_OS_WINDOWS +#include +#endif + +#ifdef BENCHMARK_OS_ZOS +#include +#endif + +namespace benchmark { +#ifdef BENCHMARK_OS_WINDOWS +// Window's Sleep takes milliseconds argument. +void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); } +void SleepForSeconds(double seconds) { + SleepForMilliseconds(static_cast(kNumMillisPerSecond * seconds)); +} +#else // BENCHMARK_OS_WINDOWS +void SleepForMicroseconds(int microseconds) { +#ifdef BENCHMARK_OS_ZOS + // z/OS does not support nanosleep. Instead call sleep() and then usleep() to + // sleep for the remaining microseconds because usleep() will fail if its + // argument is greater than 1000000. + div_t sleepTime = div(microseconds, kNumMicrosPerSecond); + int seconds = sleepTime.quot; + while (seconds != 0) + seconds = sleep(seconds); + while (usleep(sleepTime.rem) == -1 && errno == EINTR) + ; +#else + struct timespec sleep_time; + sleep_time.tv_sec = microseconds / kNumMicrosPerSecond; + sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro; + while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) + ; // Ignore signals and wait for the full interval to elapse. +#endif +} + +void SleepForMilliseconds(int milliseconds) { + SleepForMicroseconds(milliseconds * kNumMicrosPerMilli); +} + +void SleepForSeconds(double seconds) { + SleepForMicroseconds(static_cast(seconds * kNumMicrosPerSecond)); +} +#endif // BENCHMARK_OS_WINDOWS +} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/sleep.h b/libcxx/utils/google-benchmark/src/sleep.h new file mode 100644 index 000000000000..f98551afe284 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/sleep.h @@ -0,0 +1,15 @@ +#ifndef BENCHMARK_SLEEP_H_ +#define BENCHMARK_SLEEP_H_ + +namespace benchmark { +const int kNumMillisPerSecond = 1000; +const int kNumMicrosPerMilli = 1000; +const int kNumMicrosPerSecond = kNumMillisPerSecond * 1000; +const int kNumNanosPerMicro = 1000; +const int kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond; + +void SleepForMilliseconds(int milliseconds); +void SleepForSeconds(double seconds); +} // end namespace benchmark + +#endif // BENCHMARK_SLEEP_H_ diff --git a/libcxx/utils/google-benchmark/src/statistics.cc b/libcxx/utils/google-benchmark/src/statistics.cc new file mode 100644 index 000000000000..57472b9ff99b --- /dev/null +++ b/libcxx/utils/google-benchmark/src/statistics.cc @@ -0,0 +1,195 @@ +// Copyright 2016 Ismael Jimenez Martinez. All rights reserved. +// Copyright 2017 Roman Lebedev. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark/benchmark.h" + +#include +#include +#include +#include +#include +#include "check.h" +#include "statistics.h" + +namespace benchmark { + +auto StatisticsSum = [](const std::vector& v) { + return std::accumulate(v.begin(), v.end(), 0.0); +}; + +double StatisticsMean(const std::vector& v) { + if (v.empty()) return 0.0; + return StatisticsSum(v) * (1.0 / v.size()); +} + +double StatisticsMedian(const std::vector& v) { + if (v.size() < 3) return StatisticsMean(v); + std::vector copy(v); + + auto center = copy.begin() + v.size() / 2; + std::nth_element(copy.begin(), center, copy.end()); + + // did we have an odd number of samples? + // if yes, then center is the median + // it no, then we are looking for the average between center and the value + // before + if (v.size() % 2 == 1) return *center; + auto center2 = copy.begin() + v.size() / 2 - 1; + std::nth_element(copy.begin(), center2, copy.end()); + return (*center + *center2) / 2.0; +} + +// Return the sum of the squares of this sample set +auto SumSquares = [](const std::vector& v) { + return std::inner_product(v.begin(), v.end(), v.begin(), 0.0); +}; + +auto Sqr = [](const double dat) { return dat * dat; }; +auto Sqrt = [](const double dat) { + // Avoid NaN due to imprecision in the calculations + if (dat < 0.0) return 0.0; + return std::sqrt(dat); +}; + +double StatisticsStdDev(const std::vector& v) { + const auto mean = StatisticsMean(v); + if (v.empty()) return mean; + + // Sample standard deviation is undefined for n = 1 + if (v.size() == 1) return 0.0; + + const double avg_squares = SumSquares(v) * (1.0 / v.size()); + return Sqrt(v.size() / (v.size() - 1.0) * (avg_squares - Sqr(mean))); +} + +std::vector ComputeStats( + const std::vector& reports) { + typedef BenchmarkReporter::Run Run; + std::vector results; + + auto error_count = + std::count_if(reports.begin(), reports.end(), + [](Run const& run) { return run.error_occurred; }); + + if (reports.size() - error_count < 2) { + // We don't report aggregated data if there was a single run. + return results; + } + + // Accumulators. + std::vector real_accumulated_time_stat; + std::vector cpu_accumulated_time_stat; + + real_accumulated_time_stat.reserve(reports.size()); + cpu_accumulated_time_stat.reserve(reports.size()); + + // All repetitions should be run with the same number of iterations so we + // can take this information from the first benchmark. + const IterationCount run_iterations = reports.front().iterations; + // create stats for user counters + struct CounterStat { + Counter c; + std::vector s; + }; + std::map counter_stats; + for (Run const& r : reports) { + for (auto const& cnt : r.counters) { + auto it = counter_stats.find(cnt.first); + if (it == counter_stats.end()) { + counter_stats.insert({cnt.first, {cnt.second, std::vector{}}}); + it = counter_stats.find(cnt.first); + it->second.s.reserve(reports.size()); + } else { + CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags); + } + } + } + + // Populate the accumulators. + for (Run const& run : reports) { + CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name()); + CHECK_EQ(run_iterations, run.iterations); + if (run.error_occurred) continue; + real_accumulated_time_stat.emplace_back(run.real_accumulated_time); + cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time); + // user counters + for (auto const& cnt : run.counters) { + auto it = counter_stats.find(cnt.first); + CHECK_NE(it, counter_stats.end()); + it->second.s.emplace_back(cnt.second); + } + } + + // Only add label if it is same for all runs + std::string report_label = reports[0].report_label; + for (std::size_t i = 1; i < reports.size(); i++) { + if (reports[i].report_label != report_label) { + report_label = ""; + break; + } + } + + const double iteration_rescale_factor = + double(reports.size()) / double(run_iterations); + + for (const auto& Stat : *reports[0].statistics) { + // Get the data from the accumulator to BenchmarkReporter::Run's. + Run data; + data.run_name = reports[0].run_name; + data.family_index = reports[0].family_index; + data.per_family_instance_index = reports[0].per_family_instance_index; + data.run_type = BenchmarkReporter::Run::RT_Aggregate; + data.threads = reports[0].threads; + data.repetitions = reports[0].repetitions; + data.repetition_index = Run::no_repetition_index; + data.aggregate_name = Stat.name_; + data.report_label = report_label; + + // It is incorrect to say that an aggregate is computed over + // run's iterations, because those iterations already got averaged. + // Similarly, if there are N repetitions with 1 iterations each, + // an aggregate will be computed over N measurements, not 1. + // Thus it is best to simply use the count of separate reports. + data.iterations = reports.size(); + + data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat); + data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat); + + // We will divide these times by data.iterations when reporting, but the + // data.iterations is not nessesairly the scale of these measurements, + // because in each repetition, these timers are sum over all the iterations. + // And if we want to say that the stats are over N repetitions and not + // M iterations, we need to multiply these by (N/M). + data.real_accumulated_time *= iteration_rescale_factor; + data.cpu_accumulated_time *= iteration_rescale_factor; + + data.time_unit = reports[0].time_unit; + + // user counters + for (auto const& kv : counter_stats) { + // Do *NOT* rescale the custom counters. They are already properly scaled. + const auto uc_stat = Stat.compute_(kv.second.s); + auto c = Counter(uc_stat, counter_stats[kv.first].c.flags, + counter_stats[kv.first].c.oneK); + data.counters[kv.first] = c; + } + + results.push_back(data); + } + + return results; +} + +} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/statistics.h b/libcxx/utils/google-benchmark/src/statistics.h new file mode 100644 index 000000000000..7eccc85536a5 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/statistics.h @@ -0,0 +1,37 @@ +// Copyright 2016 Ismael Jimenez Martinez. All rights reserved. +// Copyright 2017 Roman Lebedev. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef STATISTICS_H_ +#define STATISTICS_H_ + +#include + +#include "benchmark/benchmark.h" + +namespace benchmark { + +// Return a vector containing the mean, median and standard devation information +// (and any user-specified info) for the specified list of reports. If 'reports' +// contains less than two non-errored runs an empty vector is returned +std::vector ComputeStats( + const std::vector& reports); + +double StatisticsMean(const std::vector& v); +double StatisticsMedian(const std::vector& v); +double StatisticsStdDev(const std::vector& v); + +} // end namespace benchmark + +#endif // STATISTICS_H_ diff --git a/libcxx/utils/google-benchmark/src/string_util.cc b/libcxx/utils/google-benchmark/src/string_util.cc new file mode 100644 index 000000000000..3551418174fd --- /dev/null +++ b/libcxx/utils/google-benchmark/src/string_util.cc @@ -0,0 +1,268 @@ +#include "string_util.h" + +#include +#ifdef BENCHMARK_STL_ANDROID_GNUSTL +#include +#endif +#include +#include +#include +#include +#include + +#include "arraysize.h" + +namespace benchmark { +namespace { + +// kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta. +const char kBigSIUnits[] = "kMGTPEZY"; +// Kibi, Mebi, Gibi, Tebi, Pebi, Exbi, Zebi, Yobi. +const char kBigIECUnits[] = "KMGTPEZY"; +// milli, micro, nano, pico, femto, atto, zepto, yocto. +const char kSmallSIUnits[] = "munpfazy"; + +// We require that all three arrays have the same size. +static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits), + "SI and IEC unit arrays must be the same size"); +static_assert(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits), + "Small SI and Big SI unit arrays must be the same size"); + +static const int64_t kUnitsSize = arraysize(kBigSIUnits); + +void ToExponentAndMantissa(double val, double thresh, int precision, + double one_k, std::string* mantissa, + int64_t* exponent) { + std::stringstream mantissa_stream; + + if (val < 0) { + mantissa_stream << "-"; + val = -val; + } + + // Adjust threshold so that it never excludes things which can't be rendered + // in 'precision' digits. + const double adjusted_threshold = + std::max(thresh, 1.0 / std::pow(10.0, precision)); + const double big_threshold = adjusted_threshold * one_k; + const double small_threshold = adjusted_threshold; + // Values in ]simple_threshold,small_threshold[ will be printed as-is + const double simple_threshold = 0.01; + + if (val > big_threshold) { + // Positive powers + double scaled = val; + for (size_t i = 0; i < arraysize(kBigSIUnits); ++i) { + scaled /= one_k; + if (scaled <= big_threshold) { + mantissa_stream << scaled; + *exponent = i + 1; + *mantissa = mantissa_stream.str(); + return; + } + } + mantissa_stream << val; + *exponent = 0; + } else if (val < small_threshold) { + // Negative powers + if (val < simple_threshold) { + double scaled = val; + for (size_t i = 0; i < arraysize(kSmallSIUnits); ++i) { + scaled *= one_k; + if (scaled >= small_threshold) { + mantissa_stream << scaled; + *exponent = -static_cast(i + 1); + *mantissa = mantissa_stream.str(); + return; + } + } + } + mantissa_stream << val; + *exponent = 0; + } else { + mantissa_stream << val; + *exponent = 0; + } + *mantissa = mantissa_stream.str(); +} + +std::string ExponentToPrefix(int64_t exponent, bool iec) { + if (exponent == 0) return ""; + + const int64_t index = (exponent > 0 ? exponent - 1 : -exponent - 1); + if (index >= kUnitsSize) return ""; + + const char* array = + (exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits); + if (iec) + return array[index] + std::string("i"); + else + return std::string(1, array[index]); +} + +std::string ToBinaryStringFullySpecified(double value, double threshold, + int precision, double one_k = 1024.0) { + std::string mantissa; + int64_t exponent; + ToExponentAndMantissa(value, threshold, precision, one_k, &mantissa, + &exponent); + return mantissa + ExponentToPrefix(exponent, false); +} + +} // end namespace + +void AppendHumanReadable(int n, std::string* str) { + std::stringstream ss; + // Round down to the nearest SI prefix. + ss << ToBinaryStringFullySpecified(n, 1.0, 0); + *str += ss.str(); +} + +std::string HumanReadableNumber(double n, double one_k) { + // 1.1 means that figures up to 1.1k should be shown with the next unit down; + // this softens edge effects. + // 1 means that we should show one decimal place of precision. + return ToBinaryStringFullySpecified(n, 1.1, 1, one_k); +} + +std::string StrFormatImp(const char* msg, va_list args) { + // we might need a second shot at this, so pre-emptivly make a copy + va_list args_cp; + va_copy(args_cp, args); + + // TODO(ericwf): use std::array for first attempt to avoid one memory + // allocation guess what the size might be + std::array local_buff; + std::size_t size = local_buff.size(); + // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation + // in the android-ndk + auto ret = vsnprintf(local_buff.data(), size, msg, args_cp); + + va_end(args_cp); + + // handle empty expansion + if (ret == 0) return std::string{}; + if (static_cast(ret) < size) + return std::string(local_buff.data()); + + // we did not provide a long enough buffer on our first attempt. + // add 1 to size to account for null-byte in size cast to prevent overflow + size = static_cast(ret) + 1; + auto buff_ptr = std::unique_ptr(new char[size]); + // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation + // in the android-ndk + ret = vsnprintf(buff_ptr.get(), size, msg, args); + return std::string(buff_ptr.get()); +} + +std::string StrFormat(const char* format, ...) { + va_list args; + va_start(args, format); + std::string tmp = StrFormatImp(format, args); + va_end(args); + return tmp; +} + +std::vector StrSplit(const std::string& str, char delim) { + if (str.empty()) return {}; + std::vector ret; + size_t first = 0; + size_t next = str.find(delim); + for (; next != std::string::npos; + first = next + 1, next = str.find(delim, first)) { + ret.push_back(str.substr(first, next - first)); + } + ret.push_back(str.substr(first)); + return ret; +} + +#ifdef BENCHMARK_STL_ANDROID_GNUSTL +/* + * GNU STL in Android NDK lacks support for some C++11 functions, including + * stoul, stoi, stod. We reimplement them here using C functions strtoul, + * strtol, strtod. Note that reimplemented functions are in benchmark:: + * namespace, not std:: namespace. + */ +unsigned long stoul(const std::string& str, size_t* pos, int base) { + /* Record previous errno */ + const int oldErrno = errno; + errno = 0; + + const char* strStart = str.c_str(); + char* strEnd = const_cast(strStart); + const unsigned long result = strtoul(strStart, &strEnd, base); + + const int strtoulErrno = errno; + /* Restore previous errno */ + errno = oldErrno; + + /* Check for errors and return */ + if (strtoulErrno == ERANGE) { + throw std::out_of_range( + "stoul failed: " + str + " is outside of range of unsigned long"); + } else if (strEnd == strStart || strtoulErrno != 0) { + throw std::invalid_argument( + "stoul failed: " + str + " is not an integer"); + } + if (pos != nullptr) { + *pos = static_cast(strEnd - strStart); + } + return result; +} + +int stoi(const std::string& str, size_t* pos, int base) { + /* Record previous errno */ + const int oldErrno = errno; + errno = 0; + + const char* strStart = str.c_str(); + char* strEnd = const_cast(strStart); + const long result = strtol(strStart, &strEnd, base); + + const int strtolErrno = errno; + /* Restore previous errno */ + errno = oldErrno; + + /* Check for errors and return */ + if (strtolErrno == ERANGE || long(int(result)) != result) { + throw std::out_of_range( + "stoul failed: " + str + " is outside of range of int"); + } else if (strEnd == strStart || strtolErrno != 0) { + throw std::invalid_argument( + "stoul failed: " + str + " is not an integer"); + } + if (pos != nullptr) { + *pos = static_cast(strEnd - strStart); + } + return int(result); +} + +double stod(const std::string& str, size_t* pos) { + /* Record previous errno */ + const int oldErrno = errno; + errno = 0; + + const char* strStart = str.c_str(); + char* strEnd = const_cast(strStart); + const double result = strtod(strStart, &strEnd); + + /* Restore previous errno */ + const int strtodErrno = errno; + errno = oldErrno; + + /* Check for errors and return */ + if (strtodErrno == ERANGE) { + throw std::out_of_range( + "stoul failed: " + str + " is outside of range of int"); + } else if (strEnd == strStart || strtodErrno != 0) { + throw std::invalid_argument( + "stoul failed: " + str + " is not an integer"); + } + if (pos != nullptr) { + *pos = static_cast(strEnd - strStart); + } + return result; +} +#endif + +} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/string_util.h b/libcxx/utils/google-benchmark/src/string_util.h new file mode 100644 index 000000000000..6bc28b6912a8 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/string_util.h @@ -0,0 +1,61 @@ +#ifndef BENCHMARK_STRING_UTIL_H_ +#define BENCHMARK_STRING_UTIL_H_ + +#include +#include +#include +#include "internal_macros.h" + +namespace benchmark { + +void AppendHumanReadable(int n, std::string* str); + +std::string HumanReadableNumber(double n, double one_k = 1024.0); + +#if defined(__MINGW32__) +__attribute__((format(__MINGW_PRINTF_FORMAT, 1, 2))) +#elif defined(__GNUC__) +__attribute__((format(printf, 1, 2))) +#endif +std::string +StrFormat(const char* format, ...); + +inline std::ostream& StrCatImp(std::ostream& out) BENCHMARK_NOEXCEPT { + return out; +} + +template +inline std::ostream& StrCatImp(std::ostream& out, First&& f, Rest&&... rest) { + out << std::forward(f); + return StrCatImp(out, std::forward(rest)...); +} + +template +inline std::string StrCat(Args&&... args) { + std::ostringstream ss; + StrCatImp(ss, std::forward(args)...); + return ss.str(); +} + +std::vector StrSplit(const std::string& str, char delim); + +#ifdef BENCHMARK_STL_ANDROID_GNUSTL +/* + * GNU STL in Android NDK lacks support for some C++11 functions, including + * stoul, stoi, stod. We reimplement them here using C functions strtoul, + * strtol, strtod. Note that reimplemented functions are in benchmark:: + * namespace, not std:: namespace. + */ +unsigned long stoul(const std::string& str, size_t* pos = nullptr, + int base = 10); +int stoi(const std::string& str, size_t* pos = nullptr, int base = 10); +double stod(const std::string& str, size_t* pos = nullptr); +#else +using std::stoul; +using std::stoi; +using std::stod; +#endif + +} // end namespace benchmark + +#endif // BENCHMARK_STRING_UTIL_H_ diff --git a/libcxx/utils/google-benchmark/src/sysinfo.cc b/libcxx/utils/google-benchmark/src/sysinfo.cc new file mode 100644 index 000000000000..c1969ea2d3fe --- /dev/null +++ b/libcxx/utils/google-benchmark/src/sysinfo.cc @@ -0,0 +1,726 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal_macros.h" + +#ifdef BENCHMARK_OS_WINDOWS +#include +#undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA +#include +#include +#include +#else +#include +#ifndef BENCHMARK_OS_FUCHSIA +#include +#endif +#include +#include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD +#include +#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \ + defined BENCHMARK_OS_NETBSD || defined BENCHMARK_OS_OPENBSD || \ + defined BENCHMARK_OS_DRAGONFLY +#define BENCHMARK_HAS_SYSCTL +#include +#endif +#endif +#if defined(BENCHMARK_OS_SOLARIS) +#include +#endif +#if defined(BENCHMARK_OS_QNX) +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "check.h" +#include "cycleclock.h" +#include "internal_macros.h" +#include "log.h" +#include "sleep.h" +#include "string_util.h" + +namespace benchmark { +namespace { + +void PrintImp(std::ostream& out) { out << std::endl; } + +template +void PrintImp(std::ostream& out, First&& f, Rest&&... rest) { + out << std::forward(f); + PrintImp(out, std::forward(rest)...); +} + +template +BENCHMARK_NORETURN void PrintErrorAndDie(Args&&... args) { + PrintImp(std::cerr, std::forward(args)...); + std::exit(EXIT_FAILURE); +} + +#ifdef BENCHMARK_HAS_SYSCTL + +/// ValueUnion - A type used to correctly alias the byte-for-byte output of +/// `sysctl` with the result type it's to be interpreted as. +struct ValueUnion { + union DataT { + uint32_t uint32_value; + uint64_t uint64_value; + // For correct aliasing of union members from bytes. + char bytes[8]; + }; + using DataPtr = std::unique_ptr; + + // The size of the data union member + its trailing array size. + size_t Size; + DataPtr Buff; + + public: + ValueUnion() : Size(0), Buff(nullptr, &std::free) {} + + explicit ValueUnion(size_t BuffSize) + : Size(sizeof(DataT) + BuffSize), + Buff(::new (std::malloc(Size)) DataT(), &std::free) {} + + ValueUnion(ValueUnion&& other) = default; + + explicit operator bool() const { return bool(Buff); } + + char* data() const { return Buff->bytes; } + + std::string GetAsString() const { return std::string(data()); } + + int64_t GetAsInteger() const { + if (Size == sizeof(Buff->uint32_value)) + return static_cast(Buff->uint32_value); + else if (Size == sizeof(Buff->uint64_value)) + return static_cast(Buff->uint64_value); + BENCHMARK_UNREACHABLE(); + } + + uint64_t GetAsUnsigned() const { + if (Size == sizeof(Buff->uint32_value)) + return Buff->uint32_value; + else if (Size == sizeof(Buff->uint64_value)) + return Buff->uint64_value; + BENCHMARK_UNREACHABLE(); + } + + template + std::array GetAsArray() { + const int ArrSize = sizeof(T) * N; + CHECK_LE(ArrSize, Size); + std::array Arr; + std::memcpy(Arr.data(), data(), ArrSize); + return Arr; + } +}; + +ValueUnion GetSysctlImp(std::string const& Name) { +#if defined BENCHMARK_OS_OPENBSD + int mib[2]; + + mib[0] = CTL_HW; + if ((Name == "hw.ncpu") || (Name == "hw.cpuspeed")){ + ValueUnion buff(sizeof(int)); + + if (Name == "hw.ncpu") { + mib[1] = HW_NCPU; + } else { + mib[1] = HW_CPUSPEED; + } + + if (sysctl(mib, 2, buff.data(), &buff.Size, nullptr, 0) == -1) { + return ValueUnion(); + } + return buff; + } + return ValueUnion(); +#else + size_t CurBuffSize = 0; + if (sysctlbyname(Name.c_str(), nullptr, &CurBuffSize, nullptr, 0) == -1) + return ValueUnion(); + + ValueUnion buff(CurBuffSize); + if (sysctlbyname(Name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0) + return buff; + return ValueUnion(); +#endif +} + +BENCHMARK_MAYBE_UNUSED +bool GetSysctl(std::string const& Name, std::string* Out) { + Out->clear(); + auto Buff = GetSysctlImp(Name); + if (!Buff) return false; + Out->assign(Buff.data()); + return true; +} + +template ::value>::type> +bool GetSysctl(std::string const& Name, Tp* Out) { + *Out = 0; + auto Buff = GetSysctlImp(Name); + if (!Buff) return false; + *Out = static_cast(Buff.GetAsUnsigned()); + return true; +} + +template +bool GetSysctl(std::string const& Name, std::array* Out) { + auto Buff = GetSysctlImp(Name); + if (!Buff) return false; + *Out = Buff.GetAsArray(); + return true; +} +#endif + +template +bool ReadFromFile(std::string const& fname, ArgT* arg) { + *arg = ArgT(); + std::ifstream f(fname.c_str()); + if (!f.is_open()) return false; + f >> *arg; + return f.good(); +} + +CPUInfo::Scaling CpuScaling(int num_cpus) { + // We don't have a valid CPU count, so don't even bother. + if (num_cpus <= 0) return CPUInfo::Scaling::UNKNOWN; +#ifdef BENCHMARK_OS_QNX + return CPUInfo::Scaling::UNKNOWN; +#endif +#ifndef BENCHMARK_OS_WINDOWS + // On Linux, the CPUfreq subsystem exposes CPU information as files on the + // local file system. If reading the exported files fails, then we may not be + // running on Linux, so we silently ignore all the read errors. + std::string res; + for (int cpu = 0; cpu < num_cpus; ++cpu) { + std::string governor_file = + StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor"); + if (ReadFromFile(governor_file, &res) && res != "performance") return CPUInfo::Scaling::ENABLED; + } + return CPUInfo::Scaling::DISABLED; +#endif + return CPUInfo::Scaling::UNKNOWN; +} + +int CountSetBitsInCPUMap(std::string Val) { + auto CountBits = [](std::string Part) { + using CPUMask = std::bitset; + Part = "0x" + Part; + CPUMask Mask(benchmark::stoul(Part, nullptr, 16)); + return static_cast(Mask.count()); + }; + size_t Pos; + int total = 0; + while ((Pos = Val.find(',')) != std::string::npos) { + total += CountBits(Val.substr(0, Pos)); + Val = Val.substr(Pos + 1); + } + if (!Val.empty()) { + total += CountBits(Val); + } + return total; +} + +BENCHMARK_MAYBE_UNUSED +std::vector GetCacheSizesFromKVFS() { + std::vector res; + std::string dir = "/sys/devices/system/cpu/cpu0/cache/"; + int Idx = 0; + while (true) { + CPUInfo::CacheInfo info; + std::string FPath = StrCat(dir, "index", Idx++, "/"); + std::ifstream f(StrCat(FPath, "size").c_str()); + if (!f.is_open()) break; + std::string suffix; + f >> info.size; + if (f.fail()) + PrintErrorAndDie("Failed while reading file '", FPath, "size'"); + if (f.good()) { + f >> suffix; + if (f.bad()) + PrintErrorAndDie( + "Invalid cache size format: failed to read size suffix"); + else if (f && suffix != "K") + PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix); + else if (suffix == "K") + info.size *= 1024; + } + if (!ReadFromFile(StrCat(FPath, "type"), &info.type)) + PrintErrorAndDie("Failed to read from file ", FPath, "type"); + if (!ReadFromFile(StrCat(FPath, "level"), &info.level)) + PrintErrorAndDie("Failed to read from file ", FPath, "level"); + std::string map_str; + if (!ReadFromFile(StrCat(FPath, "shared_cpu_map"), &map_str)) + PrintErrorAndDie("Failed to read from file ", FPath, "shared_cpu_map"); + info.num_sharing = CountSetBitsInCPUMap(map_str); + res.push_back(info); + } + + return res; +} + +#ifdef BENCHMARK_OS_MACOSX +std::vector GetCacheSizesMacOSX() { + std::vector res; + std::array CacheCounts{{0, 0, 0, 0}}; + GetSysctl("hw.cacheconfig", &CacheCounts); + + struct { + std::string name; + std::string type; + int level; + uint64_t num_sharing; + } Cases[] = {{"hw.l1dcachesize", "Data", 1, CacheCounts[1]}, + {"hw.l1icachesize", "Instruction", 1, CacheCounts[1]}, + {"hw.l2cachesize", "Unified", 2, CacheCounts[2]}, + {"hw.l3cachesize", "Unified", 3, CacheCounts[3]}}; + for (auto& C : Cases) { + int val; + if (!GetSysctl(C.name, &val)) continue; + CPUInfo::CacheInfo info; + info.type = C.type; + info.level = C.level; + info.size = val; + info.num_sharing = static_cast(C.num_sharing); + res.push_back(std::move(info)); + } + return res; +} +#elif defined(BENCHMARK_OS_WINDOWS) +std::vector GetCacheSizesWindows() { + std::vector res; + DWORD buffer_size = 0; + using PInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION; + using CInfo = CACHE_DESCRIPTOR; + + using UPtr = std::unique_ptr; + GetLogicalProcessorInformation(nullptr, &buffer_size); + UPtr buff((PInfo*)malloc(buffer_size), &std::free); + if (!GetLogicalProcessorInformation(buff.get(), &buffer_size)) + PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ", + GetLastError()); + + PInfo* it = buff.get(); + PInfo* end = buff.get() + (buffer_size / sizeof(PInfo)); + + for (; it != end; ++it) { + if (it->Relationship != RelationCache) continue; + using BitSet = std::bitset; + BitSet B(it->ProcessorMask); + // To prevent duplicates, only consider caches where CPU 0 is specified + if (!B.test(0)) continue; + CInfo* Cache = &it->Cache; + CPUInfo::CacheInfo C; + C.num_sharing = static_cast(B.count()); + C.level = Cache->Level; + C.size = Cache->Size; + switch (Cache->Type) { + case CacheUnified: + C.type = "Unified"; + break; + case CacheInstruction: + C.type = "Instruction"; + break; + case CacheData: + C.type = "Data"; + break; + case CacheTrace: + C.type = "Trace"; + break; + default: + C.type = "Unknown"; + break; + } + res.push_back(C); + } + return res; +} +#elif BENCHMARK_OS_QNX +std::vector GetCacheSizesQNX() { + std::vector res; + struct cacheattr_entry *cache = SYSPAGE_ENTRY(cacheattr); + uint32_t const elsize = SYSPAGE_ELEMENT_SIZE(cacheattr); + int num = SYSPAGE_ENTRY_SIZE(cacheattr) / elsize ; + for(int i = 0; i < num; ++i ) { + CPUInfo::CacheInfo info; + switch (cache->flags){ + case CACHE_FLAG_INSTR : + info.type = "Instruction"; + info.level = 1; + break; + case CACHE_FLAG_DATA : + info.type = "Data"; + info.level = 1; + break; + case CACHE_FLAG_UNIFIED : + info.type = "Unified"; + info.level = 2; + break; + case CACHE_FLAG_SHARED : + info.type = "Shared"; + info.level = 3; + break; + default : + continue; + break; + } + info.size = cache->line_size * cache->num_lines; + info.num_sharing = 0; + res.push_back(std::move(info)); + cache = SYSPAGE_ARRAY_ADJ_OFFSET(cacheattr, cache, elsize); + } + return res; +} +#endif + +std::vector GetCacheSizes() { +#ifdef BENCHMARK_OS_MACOSX + return GetCacheSizesMacOSX(); +#elif defined(BENCHMARK_OS_WINDOWS) + return GetCacheSizesWindows(); +#elif defined(BENCHMARK_OS_QNX) + return GetCacheSizesQNX(); +#else + return GetCacheSizesFromKVFS(); +#endif +} + +std::string GetSystemName() { +#if defined(BENCHMARK_OS_WINDOWS) + std::string str; + const unsigned COUNT = MAX_COMPUTERNAME_LENGTH+1; + TCHAR hostname[COUNT] = {'\0'}; + DWORD DWCOUNT = COUNT; + if (!GetComputerName(hostname, &DWCOUNT)) + return std::string(""); +#ifndef UNICODE + str = std::string(hostname, DWCOUNT); +#else + //Using wstring_convert, Is deprecated in C++17 + using convert_type = std::codecvt_utf8; + std::wstring_convert converter; + std::wstring wStr(hostname, DWCOUNT); + str = converter.to_bytes(wStr); +#endif + return str; +#else // defined(BENCHMARK_OS_WINDOWS) +#ifndef HOST_NAME_MAX +#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac Doesnt have HOST_NAME_MAX defined +#define HOST_NAME_MAX 64 +#elif defined(BENCHMARK_OS_NACL) +#define HOST_NAME_MAX 64 +#elif defined(BENCHMARK_OS_QNX) +#define HOST_NAME_MAX 154 +#elif defined(BENCHMARK_OS_RTEMS) +#define HOST_NAME_MAX 256 +#else +#warning "HOST_NAME_MAX not defined. using 64" +#define HOST_NAME_MAX 64 +#endif +#endif // def HOST_NAME_MAX + char hostname[HOST_NAME_MAX]; + int retVal = gethostname(hostname, HOST_NAME_MAX); + if (retVal != 0) return std::string(""); + return std::string(hostname); +#endif // Catch-all POSIX block. +} + +int GetNumCPUs() { +#ifdef BENCHMARK_HAS_SYSCTL + int NumCPU = -1; + if (GetSysctl("hw.ncpu", &NumCPU)) return NumCPU; + fprintf(stderr, "Err: %s\n", strerror(errno)); + std::exit(EXIT_FAILURE); +#elif defined(BENCHMARK_OS_WINDOWS) + SYSTEM_INFO sysinfo; + // Use memset as opposed to = {} to avoid GCC missing initializer false + // positives. + std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO)); + GetSystemInfo(&sysinfo); + return sysinfo.dwNumberOfProcessors; // number of logical + // processors in the current + // group +#elif defined(BENCHMARK_OS_SOLARIS) + // Returns -1 in case of a failure. + int NumCPU = sysconf(_SC_NPROCESSORS_ONLN); + if (NumCPU < 0) { + fprintf(stderr, + "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n", + strerror(errno)); + } + return NumCPU; +#elif defined(BENCHMARK_OS_QNX) + return static_cast(_syspage_ptr->num_cpu); +#else + int NumCPUs = 0; + int MaxID = -1; + std::ifstream f("/proc/cpuinfo"); + if (!f.is_open()) { + std::cerr << "failed to open /proc/cpuinfo\n"; + return -1; + } + const std::string Key = "processor"; + std::string ln; + while (std::getline(f, ln)) { + if (ln.empty()) continue; + size_t SplitIdx = ln.find(':'); + std::string value; +#if defined(__s390__) + // s390 has another format in /proc/cpuinfo + // it needs to be parsed differently + if (SplitIdx != std::string::npos) value = ln.substr(Key.size()+1,SplitIdx-Key.size()-1); +#else + if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); +#endif + if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) { + NumCPUs++; + if (!value.empty()) { + int CurID = benchmark::stoi(value); + MaxID = std::max(CurID, MaxID); + } + } + } + if (f.bad()) { + std::cerr << "Failure reading /proc/cpuinfo\n"; + return -1; + } + if (!f.eof()) { + std::cerr << "Failed to read to end of /proc/cpuinfo\n"; + return -1; + } + f.close(); + + if ((MaxID + 1) != NumCPUs) { + fprintf(stderr, + "CPU ID assignments in /proc/cpuinfo seem messed up." + " This is usually caused by a bad BIOS.\n"); + } + return NumCPUs; +#endif + BENCHMARK_UNREACHABLE(); +} + +double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) { + // Currently, scaling is only used on linux path here, + // suppress diagnostics about it being unused on other paths. + (void)scaling; + +#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN + long freq; + + // If the kernel is exporting the tsc frequency use that. There are issues + // where cpuinfo_max_freq cannot be relied on because the BIOS may be + // exporintg an invalid p-state (on x86) or p-states may be used to put the + // processor in a new mode (turbo mode). Essentially, those frequencies + // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as + // well. + if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq) + // If CPU scaling is disabled, use the the *current* frequency. + // Note that we specifically don't want to read cpuinfo_cur_freq, + // because it is only readable by root. + || (scaling == CPUInfo::Scaling::DISABLED && + ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq", + &freq)) + // Otherwise, if CPU scaling may be in effect, we want to use + // the *maximum* frequency, not whatever CPU speed some random processor + // happens to be using now. + || ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", + &freq)) { + // The value is in kHz (as the file name suggests). For example, on a + // 2GHz warpstation, the file contains the value "2000000". + return freq * 1000.0; + } + + const double error_value = -1; + double bogo_clock = error_value; + + std::ifstream f("/proc/cpuinfo"); + if (!f.is_open()) { + std::cerr << "failed to open /proc/cpuinfo\n"; + return error_value; + } + + auto startsWithKey = [](std::string const& Value, std::string const& Key) { + if (Key.size() > Value.size()) return false; + auto Cmp = [&](char X, char Y) { + return std::tolower(X) == std::tolower(Y); + }; + return std::equal(Key.begin(), Key.end(), Value.begin(), Cmp); + }; + + std::string ln; + while (std::getline(f, ln)) { + if (ln.empty()) continue; + size_t SplitIdx = ln.find(':'); + std::string value; + if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); + // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only + // accept positive values. Some environments (virtual machines) report zero, + // which would cause infinite looping in WallTime_Init. + if (startsWithKey(ln, "cpu MHz")) { + if (!value.empty()) { + double cycles_per_second = benchmark::stod(value) * 1000000.0; + if (cycles_per_second > 0) return cycles_per_second; + } + } else if (startsWithKey(ln, "bogomips")) { + if (!value.empty()) { + bogo_clock = benchmark::stod(value) * 1000000.0; + if (bogo_clock < 0.0) bogo_clock = error_value; + } + } + } + if (f.bad()) { + std::cerr << "Failure reading /proc/cpuinfo\n"; + return error_value; + } + if (!f.eof()) { + std::cerr << "Failed to read to end of /proc/cpuinfo\n"; + return error_value; + } + f.close(); + // If we found the bogomips clock, but nothing better, we'll use it (but + // we're not happy about it); otherwise, fallback to the rough estimation + // below. + if (bogo_clock >= 0.0) return bogo_clock; + +#elif defined BENCHMARK_HAS_SYSCTL + constexpr auto* FreqStr = +#if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD) + "machdep.tsc_freq"; +#elif defined BENCHMARK_OS_OPENBSD + "hw.cpuspeed"; +#elif defined BENCHMARK_OS_DRAGONFLY + "hw.tsc_frequency"; +#else + "hw.cpufrequency"; +#endif + unsigned long long hz = 0; +#if defined BENCHMARK_OS_OPENBSD + if (GetSysctl(FreqStr, &hz)) return hz * 1000000; +#else + if (GetSysctl(FreqStr, &hz)) return hz; +#endif + fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", + FreqStr, strerror(errno)); + +#elif defined BENCHMARK_OS_WINDOWS + // In NT, read MHz from the registry. If we fail to do so or we're in win9x + // then make a crude estimate. + DWORD data, data_size = sizeof(data); + if (IsWindowsXPOrGreater() && + SUCCEEDED( + SHGetValueA(HKEY_LOCAL_MACHINE, + "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", + "~MHz", nullptr, &data, &data_size))) + return static_cast((int64_t)data * + (int64_t)(1000 * 1000)); // was mhz +#elif defined (BENCHMARK_OS_SOLARIS) + kstat_ctl_t *kc = kstat_open(); + if (!kc) { + std::cerr << "failed to open /dev/kstat\n"; + return -1; + } + kstat_t *ksp = kstat_lookup(kc, (char*)"cpu_info", -1, (char*)"cpu_info0"); + if (!ksp) { + std::cerr << "failed to lookup in /dev/kstat\n"; + return -1; + } + if (kstat_read(kc, ksp, NULL) < 0) { + std::cerr << "failed to read from /dev/kstat\n"; + return -1; + } + kstat_named_t *knp = + (kstat_named_t*)kstat_data_lookup(ksp, (char*)"current_clock_Hz"); + if (!knp) { + std::cerr << "failed to lookup data in /dev/kstat\n"; + return -1; + } + if (knp->data_type != KSTAT_DATA_UINT64) { + std::cerr << "current_clock_Hz is of unexpected data type: " + << knp->data_type << "\n"; + return -1; + } + double clock_hz = knp->value.ui64; + kstat_close(kc); + return clock_hz; +#elif defined (BENCHMARK_OS_QNX) + return static_cast((int64_t)(SYSPAGE_ENTRY(cpuinfo)->speed) * + (int64_t)(1000 * 1000)); +#endif + // If we've fallen through, attempt to roughly estimate the CPU clock rate. + const int estimate_time_ms = 1000; + const auto start_ticks = cycleclock::Now(); + SleepForMilliseconds(estimate_time_ms); + return static_cast(cycleclock::Now() - start_ticks); +} + +std::vector GetLoadAvg() { +#if (defined BENCHMARK_OS_FREEBSD || defined(BENCHMARK_OS_LINUX) || \ + defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD || \ + defined BENCHMARK_OS_OPENBSD || defined BENCHMARK_OS_DRAGONFLY) && \ + !defined(__ANDROID__) + constexpr int kMaxSamples = 3; + std::vector res(kMaxSamples, 0.0); + const int nelem = getloadavg(res.data(), kMaxSamples); + if (nelem < 1) { + res.clear(); + } else { + res.resize(nelem); + } + return res; +#else + return {}; +#endif +} + +} // end namespace + +const CPUInfo& CPUInfo::Get() { + static const CPUInfo* info = new CPUInfo(); + return *info; +} + +CPUInfo::CPUInfo() + : num_cpus(GetNumCPUs()), + scaling(CpuScaling(num_cpus)), + cycles_per_second(GetCPUCyclesPerSecond(scaling)), + caches(GetCacheSizes()), + load_avg(GetLoadAvg()) {} + +const SystemInfo& SystemInfo::Get() { + static const SystemInfo* info = new SystemInfo(); + return *info; +} + +SystemInfo::SystemInfo() : name(GetSystemName()) {} +} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/thread_manager.h b/libcxx/utils/google-benchmark/src/thread_manager.h new file mode 100644 index 000000000000..28e2dd53aff2 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/thread_manager.h @@ -0,0 +1,64 @@ +#ifndef BENCHMARK_THREAD_MANAGER_H +#define BENCHMARK_THREAD_MANAGER_H + +#include + +#include "benchmark/benchmark.h" +#include "mutex.h" + +namespace benchmark { +namespace internal { + +class ThreadManager { + public: + explicit ThreadManager(int num_threads) + : alive_threads_(num_threads), start_stop_barrier_(num_threads) {} + + Mutex& GetBenchmarkMutex() const RETURN_CAPABILITY(benchmark_mutex_) { + return benchmark_mutex_; + } + + bool StartStopBarrier() EXCLUDES(end_cond_mutex_) { + return start_stop_barrier_.wait(); + } + + void NotifyThreadComplete() EXCLUDES(end_cond_mutex_) { + start_stop_barrier_.removeThread(); + if (--alive_threads_ == 0) { + MutexLock lock(end_cond_mutex_); + end_condition_.notify_all(); + } + } + + void WaitForAllThreads() EXCLUDES(end_cond_mutex_) { + MutexLock lock(end_cond_mutex_); + end_condition_.wait(lock.native_handle(), + [this]() { return alive_threads_ == 0; }); + } + + public: + struct Result { + IterationCount iterations = 0; + double real_time_used = 0; + double cpu_time_used = 0; + double manual_time_used = 0; + int64_t complexity_n = 0; + std::string report_label_; + std::string error_message_; + bool has_error_ = false; + UserCounters counters; + }; + GUARDED_BY(GetBenchmarkMutex()) Result results; + + private: + mutable Mutex benchmark_mutex_; + std::atomic alive_threads_; + Barrier start_stop_barrier_; + Mutex end_cond_mutex_; + Condition end_condition_; +}; + +} // namespace internal +} // namespace benchmark + +#endif // BENCHMARK_THREAD_MANAGER_H diff --git a/libcxx/utils/google-benchmark/src/thread_timer.h b/libcxx/utils/google-benchmark/src/thread_timer.h new file mode 100644 index 000000000000..1703ca0d6f87 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/thread_timer.h @@ -0,0 +1,86 @@ +#ifndef BENCHMARK_THREAD_TIMER_H +#define BENCHMARK_THREAD_TIMER_H + +#include "check.h" +#include "timers.h" + +namespace benchmark { +namespace internal { + +class ThreadTimer { + explicit ThreadTimer(bool measure_process_cpu_time_) + : measure_process_cpu_time(measure_process_cpu_time_) {} + + public: + static ThreadTimer Create() { + return ThreadTimer(/*measure_process_cpu_time_=*/false); + } + static ThreadTimer CreateProcessCpuTime() { + return ThreadTimer(/*measure_process_cpu_time_=*/true); + } + + // Called by each thread + void StartTimer() { + running_ = true; + start_real_time_ = ChronoClockNow(); + start_cpu_time_ = ReadCpuTimerOfChoice(); + } + + // Called by each thread + void StopTimer() { + CHECK(running_); + running_ = false; + real_time_used_ += ChronoClockNow() - start_real_time_; + // Floating point error can result in the subtraction producing a negative + // time. Guard against that. + cpu_time_used_ += + std::max(ReadCpuTimerOfChoice() - start_cpu_time_, 0); + } + + // Called by each thread + void SetIterationTime(double seconds) { manual_time_used_ += seconds; } + + bool running() const { return running_; } + + // REQUIRES: timer is not running + double real_time_used() const { + CHECK(!running_); + return real_time_used_; + } + + // REQUIRES: timer is not running + double cpu_time_used() const { + CHECK(!running_); + return cpu_time_used_; + } + + // REQUIRES: timer is not running + double manual_time_used() const { + CHECK(!running_); + return manual_time_used_; + } + + private: + double ReadCpuTimerOfChoice() const { + if (measure_process_cpu_time) return ProcessCPUUsage(); + return ThreadCPUUsage(); + } + + // should the thread, or the process, time be measured? + const bool measure_process_cpu_time; + + bool running_ = false; // Is the timer running + double start_real_time_ = 0; // If running_ + double start_cpu_time_ = 0; // If running_ + + // Accumulated time so far (does not contain current slice if running_) + double real_time_used_ = 0; + double cpu_time_used_ = 0; + // Manually set iteration time. User sets this with SetIterationTime(seconds). + double manual_time_used_ = 0; +}; + +} // namespace internal +} // namespace benchmark + +#endif // BENCHMARK_THREAD_TIMER_H diff --git a/libcxx/utils/google-benchmark/src/timers.cc b/libcxx/utils/google-benchmark/src/timers.cc new file mode 100644 index 000000000000..af4767dff944 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/timers.cc @@ -0,0 +1,253 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "timers.h" +#include "internal_macros.h" + +#ifdef BENCHMARK_OS_WINDOWS +#include +#undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA +#include +#include +#else +#include +#ifndef BENCHMARK_OS_FUCHSIA +#include +#endif +#include +#include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD +#include +#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_DRAGONFLY || \ + defined BENCHMARK_OS_MACOSX +#include +#endif +#if defined(BENCHMARK_OS_MACOSX) +#include +#include +#include +#endif +#endif + +#ifdef BENCHMARK_OS_EMSCRIPTEN +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "check.h" +#include "log.h" +#include "sleep.h" +#include "string_util.h" + +namespace benchmark { + +// Suppress unused warnings on helper functions. +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wunused-function" +#endif + +namespace { +#if defined(BENCHMARK_OS_WINDOWS) +double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) { + ULARGE_INTEGER kernel; + ULARGE_INTEGER user; + kernel.HighPart = kernel_time.dwHighDateTime; + kernel.LowPart = kernel_time.dwLowDateTime; + user.HighPart = user_time.dwHighDateTime; + user.LowPart = user_time.dwLowDateTime; + return (static_cast(kernel.QuadPart) + + static_cast(user.QuadPart)) * + 1e-7; +} +#elif !defined(BENCHMARK_OS_FUCHSIA) +double MakeTime(struct rusage const& ru) { + return (static_cast(ru.ru_utime.tv_sec) + + static_cast(ru.ru_utime.tv_usec) * 1e-6 + + static_cast(ru.ru_stime.tv_sec) + + static_cast(ru.ru_stime.tv_usec) * 1e-6); +} +#endif +#if defined(BENCHMARK_OS_MACOSX) +double MakeTime(thread_basic_info_data_t const& info) { + return (static_cast(info.user_time.seconds) + + static_cast(info.user_time.microseconds) * 1e-6 + + static_cast(info.system_time.seconds) + + static_cast(info.system_time.microseconds) * 1e-6); +} +#endif +#if defined(CLOCK_PROCESS_CPUTIME_ID) || defined(CLOCK_THREAD_CPUTIME_ID) +double MakeTime(struct timespec const& ts) { + return ts.tv_sec + (static_cast(ts.tv_nsec) * 1e-9); +} +#endif + +BENCHMARK_NORETURN static void DiagnoseAndExit(const char* msg) { + std::cerr << "ERROR: " << msg << std::endl; + std::exit(EXIT_FAILURE); +} + +} // end namespace + +double ProcessCPUUsage() { +#if defined(BENCHMARK_OS_WINDOWS) + HANDLE proc = GetCurrentProcess(); + FILETIME creation_time; + FILETIME exit_time; + FILETIME kernel_time; + FILETIME user_time; + if (GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, + &user_time)) + return MakeTime(kernel_time, user_time); + DiagnoseAndExit("GetProccessTimes() failed"); +#elif defined(BENCHMARK_OS_EMSCRIPTEN) + // clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) returns 0 on Emscripten. + // Use Emscripten-specific API. Reported CPU time would be exactly the + // same as total time, but this is ok because there aren't long-latency + // syncronous system calls in Emscripten. + return emscripten_get_now() * 1e-3; +#elif defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX) + // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See + // https://github.com/google/benchmark/pull/292 + struct timespec spec; + if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0) + return MakeTime(spec); + DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed"); +#else + struct rusage ru; + if (getrusage(RUSAGE_SELF, &ru) == 0) return MakeTime(ru); + DiagnoseAndExit("getrusage(RUSAGE_SELF, ...) failed"); +#endif +} + +double ThreadCPUUsage() { +#if defined(BENCHMARK_OS_WINDOWS) + HANDLE this_thread = GetCurrentThread(); + FILETIME creation_time; + FILETIME exit_time; + FILETIME kernel_time; + FILETIME user_time; + GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time, + &user_time); + return MakeTime(kernel_time, user_time); +#elif defined(BENCHMARK_OS_MACOSX) + // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See + // https://github.com/google/benchmark/pull/292 + mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; + thread_basic_info_data_t info; + mach_port_t thread = pthread_mach_thread_np(pthread_self()); + if (thread_info(thread, THREAD_BASIC_INFO, (thread_info_t)&info, &count) == + KERN_SUCCESS) { + return MakeTime(info); + } + DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info"); +#elif defined(BENCHMARK_OS_EMSCRIPTEN) + // Emscripten doesn't support traditional threads + return ProcessCPUUsage(); +#elif defined(BENCHMARK_OS_RTEMS) + // RTEMS doesn't support CLOCK_THREAD_CPUTIME_ID. See + // https://github.com/RTEMS/rtems/blob/master/cpukit/posix/src/clockgettime.c + return ProcessCPUUsage(); +#elif defined(BENCHMARK_OS_SOLARIS) + struct rusage ru; + if (getrusage(RUSAGE_LWP, &ru) == 0) return MakeTime(ru); + DiagnoseAndExit("getrusage(RUSAGE_LWP, ...) failed"); +#elif defined(CLOCK_THREAD_CPUTIME_ID) + struct timespec ts; + if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) return MakeTime(ts); + DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed"); +#else +#error Per-thread timing is not available on your system. +#endif +} + +std::string LocalDateTimeString() { + // Write the local time in RFC3339 format yyyy-mm-ddTHH:MM:SS+/-HH:MM. + typedef std::chrono::system_clock Clock; + std::time_t now = Clock::to_time_t(Clock::now()); + const std::size_t kTzOffsetLen = 6; + const std::size_t kTimestampLen = 19; + + std::size_t tz_len; + std::size_t timestamp_len; + long int offset_minutes; + char tz_offset_sign = '+'; + // tz_offset is set in one of three ways: + // * strftime with %z - This either returns empty or the ISO 8601 time. The maximum length an + // ISO 8601 string can be is 7 (e.g. -03:30, plus trailing zero). + // * snprintf with %c%02li:%02li - The maximum length is 41 (one for %c, up to 19 for %02li, + // one for :, up to 19 %02li, plus trailing zero). + // * A fixed string of "-00:00". The maximum length is 7 (-00:00, plus trailing zero). + // + // Thus, the maximum size this needs to be is 41. + char tz_offset[41]; + // Long enough buffer to avoid format-overflow warnings + char storage[128]; + +#if defined(BENCHMARK_OS_WINDOWS) + std::tm *timeinfo_p = ::localtime(&now); +#else + std::tm timeinfo; + std::tm *timeinfo_p = &timeinfo; + ::localtime_r(&now, &timeinfo); +#endif + + tz_len = std::strftime(tz_offset, sizeof(tz_offset), "%z", timeinfo_p); + + if (tz_len < kTzOffsetLen && tz_len > 1) { + // Timezone offset was written. strftime writes offset as +HHMM or -HHMM, + // RFC3339 specifies an offset as +HH:MM or -HH:MM. To convert, we parse + // the offset as an integer, then reprint it to a string. + + offset_minutes = ::strtol(tz_offset, NULL, 10); + if (offset_minutes < 0) { + offset_minutes *= -1; + tz_offset_sign = '-'; + } + + tz_len = ::snprintf(tz_offset, sizeof(tz_offset), "%c%02li:%02li", + tz_offset_sign, offset_minutes / 100, offset_minutes % 100); + CHECK(tz_len == kTzOffsetLen); + ((void)tz_len); // Prevent unused variable warning in optimized build. + } else { + // Unknown offset. RFC3339 specifies that unknown local offsets should be + // written as UTC time with -00:00 timezone. +#if defined(BENCHMARK_OS_WINDOWS) + // Potential race condition if another thread calls localtime or gmtime. + timeinfo_p = ::gmtime(&now); +#else + ::gmtime_r(&now, &timeinfo); +#endif + + strncpy(tz_offset, "-00:00", kTzOffsetLen + 1); + } + + timestamp_len = std::strftime(storage, sizeof(storage), "%Y-%m-%dT%H:%M:%S", + timeinfo_p); + CHECK(timestamp_len == kTimestampLen); + // Prevent unused variable warning in optimized build. + ((void)kTimestampLen); + + std::strncat(storage, tz_offset, sizeof(storage) - timestamp_len - 1); + return std::string(storage); +} + +} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/timers.h b/libcxx/utils/google-benchmark/src/timers.h new file mode 100644 index 000000000000..65606ccd93d1 --- /dev/null +++ b/libcxx/utils/google-benchmark/src/timers.h @@ -0,0 +1,48 @@ +#ifndef BENCHMARK_TIMERS_H +#define BENCHMARK_TIMERS_H + +#include +#include + +namespace benchmark { + +// Return the CPU usage of the current process +double ProcessCPUUsage(); + +// Return the CPU usage of the children of the current process +double ChildrenCPUUsage(); + +// Return the CPU usage of the current thread +double ThreadCPUUsage(); + +#if defined(HAVE_STEADY_CLOCK) +template +struct ChooseSteadyClock { + typedef std::chrono::high_resolution_clock type; +}; + +template <> +struct ChooseSteadyClock { + typedef std::chrono::steady_clock type; +}; +#endif + +struct ChooseClockType { +#if defined(HAVE_STEADY_CLOCK) + typedef ChooseSteadyClock<>::type type; +#else + typedef std::chrono::high_resolution_clock type; +#endif +}; + +inline double ChronoClockNow() { + typedef ChooseClockType::type ClockType; + using FpSeconds = std::chrono::duration; + return FpSeconds(ClockType::now().time_since_epoch()).count(); +} + +std::string LocalDateTimeString(); + +} // end namespace benchmark + +#endif // BENCHMARK_TIMERS_H diff --git a/libcxx/utils/google-benchmark/test/AssemblyTests.cmake b/libcxx/utils/google-benchmark/test/AssemblyTests.cmake new file mode 100644 index 000000000000..3d078586f1de --- /dev/null +++ b/libcxx/utils/google-benchmark/test/AssemblyTests.cmake @@ -0,0 +1,46 @@ + +include(split_list) + +set(ASM_TEST_FLAGS "") +check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG) +if (BENCHMARK_HAS_O3_FLAG) + list(APPEND ASM_TEST_FLAGS -O3) +endif() + +check_cxx_compiler_flag(-g0 BENCHMARK_HAS_G0_FLAG) +if (BENCHMARK_HAS_G0_FLAG) + list(APPEND ASM_TEST_FLAGS -g0) +endif() + +check_cxx_compiler_flag(-fno-stack-protector BENCHMARK_HAS_FNO_STACK_PROTECTOR_FLAG) +if (BENCHMARK_HAS_FNO_STACK_PROTECTOR_FLAG) + list(APPEND ASM_TEST_FLAGS -fno-stack-protector) +endif() + +split_list(ASM_TEST_FLAGS) +string(TOUPPER "${CMAKE_CXX_COMPILER_ID}" ASM_TEST_COMPILER) + +macro(add_filecheck_test name) + cmake_parse_arguments(ARG "" "" "CHECK_PREFIXES" ${ARGV}) + add_library(${name} OBJECT ${name}.cc) + set_target_properties(${name} PROPERTIES COMPILE_FLAGS "-S ${ASM_TEST_FLAGS}") + set(ASM_OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/${name}.s") + add_custom_target(copy_${name} ALL + COMMAND ${PROJECT_SOURCE_DIR}/tools/strip_asm.py + $ + ${ASM_OUTPUT_FILE} + BYPRODUCTS ${ASM_OUTPUT_FILE}) + add_dependencies(copy_${name} ${name}) + if (NOT ARG_CHECK_PREFIXES) + set(ARG_CHECK_PREFIXES "CHECK") + endif() + foreach(prefix ${ARG_CHECK_PREFIXES}) + add_test(NAME run_${name}_${prefix} + COMMAND + ${LLVM_FILECHECK_EXE} ${name}.cc + --input-file=${ASM_OUTPUT_FILE} + --check-prefixes=CHECK,CHECK-${ASM_TEST_COMPILER} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + endforeach() +endmacro() + diff --git a/libcxx/utils/google-benchmark/test/BUILD b/libcxx/utils/google-benchmark/test/BUILD new file mode 100644 index 000000000000..1f27f99ede9f --- /dev/null +++ b/libcxx/utils/google-benchmark/test/BUILD @@ -0,0 +1,74 @@ +TEST_COPTS = [ + "-pedantic", + "-pedantic-errors", + "-std=c++11", + "-Wall", + "-Wextra", + "-Wshadow", + # "-Wshorten-64-to-32", + "-Wfloat-equal", + "-fstrict-aliasing", +] + +PER_SRC_COPTS = ({ + "cxx03_test.cc": ["-std=c++03"], + # Some of the issues with DoNotOptimize only occur when optimization is enabled + "donotoptimize_test.cc": ["-O3"], +}) + +TEST_ARGS = ["--benchmark_min_time=0.01"] + +PER_SRC_TEST_ARGS = ({ + "user_counters_tabular_test.cc": ["--benchmark_counters_tabular=true"], + "repetitions_test.cc": [" --benchmark_repetitions=3"], +}) + +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") + +cc_library( + name = "output_test_helper", + testonly = 1, + srcs = ["output_test_helper.cc"], + hdrs = ["output_test.h"], + copts = TEST_COPTS, + deps = [ + "//:benchmark", + "//:benchmark_internal_headers", + ], +) + +[ + cc_test( + name = test_src[:-len(".cc")], + size = "small", + srcs = [test_src], + args = TEST_ARGS + PER_SRC_TEST_ARGS.get(test_src, []), + copts = TEST_COPTS + PER_SRC_COPTS.get(test_src, []), + deps = [ + ":output_test_helper", + "//:benchmark", + "//:benchmark_internal_headers", + "@com_google_googletest//:gtest", + ] + ( + ["@com_google_googletest//:gtest_main"] if (test_src[-len("gtest.cc"):] == "gtest.cc") else [] + ), + # FIXME: Add support for assembly tests to bazel. + # See Issue #556 + # https://github.com/google/benchmark/issues/556 + ) + for test_src in glob( + ["*test.cc"], + exclude = [ + "*_assembly_test.cc", + "link_main_test.cc", + ], + ) +] + +cc_test( + name = "link_main_test", + size = "small", + srcs = ["link_main_test.cc"], + copts = TEST_COPTS, + deps = ["//:benchmark_main"], +) diff --git a/libcxx/utils/google-benchmark/test/CMakeLists.txt b/libcxx/utils/google-benchmark/test/CMakeLists.txt new file mode 100644 index 000000000000..79cdf53b402c --- /dev/null +++ b/libcxx/utils/google-benchmark/test/CMakeLists.txt @@ -0,0 +1,271 @@ +# Enable the tests + +find_package(Threads REQUIRED) +include(CheckCXXCompilerFlag) + +# NOTE: Some tests use `` to perform the test. Therefore we must +# strip -DNDEBUG from the default CMake flags in DEBUG mode. +string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) +if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" ) + add_definitions( -UNDEBUG ) + add_definitions(-DTEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS) + # Also remove /D NDEBUG to avoid MSVC warnings about conflicting defines. + foreach (flags_var_to_scrub + CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_CXX_FLAGS_MINSIZEREL + CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_RELWITHDEBINFO + CMAKE_C_FLAGS_MINSIZEREL) + string (REGEX REPLACE "(^| )[/-]D *NDEBUG($| )" " " + "${flags_var_to_scrub}" "${${flags_var_to_scrub}}") + endforeach() +endif() + +check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG) +set(BENCHMARK_O3_FLAG "") +if (BENCHMARK_HAS_O3_FLAG) + set(BENCHMARK_O3_FLAG "-O3") +endif() + +# NOTE: These flags must be added after find_package(Threads REQUIRED) otherwise +# they will break the configuration check. +if (DEFINED BENCHMARK_CXX_LINKER_FLAGS) + list(APPEND CMAKE_EXE_LINKER_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}) +endif() + +add_library(output_test_helper STATIC output_test_helper.cc output_test.h) + +macro(compile_benchmark_test name) + add_executable(${name} "${name}.cc") + target_link_libraries(${name} benchmark::benchmark ${CMAKE_THREAD_LIBS_INIT}) +endmacro(compile_benchmark_test) + +macro(compile_benchmark_test_with_main name) + add_executable(${name} "${name}.cc") + target_link_libraries(${name} benchmark::benchmark_main) +endmacro(compile_benchmark_test_with_main) + +macro(compile_output_test name) + add_executable(${name} "${name}.cc" output_test.h) + target_link_libraries(${name} output_test_helper benchmark::benchmark + ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) +endmacro(compile_output_test) + +# Demonstration executable +compile_benchmark_test(benchmark_test) +add_test(NAME benchmark COMMAND benchmark_test --benchmark_min_time=0.01) + +compile_benchmark_test(filter_test) +macro(add_filter_test name filter expect) + add_test(NAME ${name} COMMAND filter_test --benchmark_min_time=0.01 --benchmark_filter=${filter} ${expect}) + add_test(NAME ${name}_list_only COMMAND filter_test --benchmark_list_tests --benchmark_filter=${filter} ${expect}) +endmacro(add_filter_test) + +add_filter_test(filter_simple "Foo" 3) +add_filter_test(filter_simple_negative "-Foo" 2) +add_filter_test(filter_suffix "BM_.*" 4) +add_filter_test(filter_suffix_negative "-BM_.*" 1) +add_filter_test(filter_regex_all ".*" 5) +add_filter_test(filter_regex_all_negative "-.*" 0) +add_filter_test(filter_regex_blank "" 5) +add_filter_test(filter_regex_blank_negative "-" 0) +add_filter_test(filter_regex_none "monkey" 0) +add_filter_test(filter_regex_none_negative "-monkey" 5) +add_filter_test(filter_regex_wildcard ".*Foo.*" 3) +add_filter_test(filter_regex_wildcard_negative "-.*Foo.*" 2) +add_filter_test(filter_regex_begin "^BM_.*" 4) +add_filter_test(filter_regex_begin_negative "-^BM_.*" 1) +add_filter_test(filter_regex_begin2 "^N" 1) +add_filter_test(filter_regex_begin2_negative "-^N" 4) +add_filter_test(filter_regex_end ".*Ba$" 1) +add_filter_test(filter_regex_end_negative "-.*Ba$" 4) + +compile_benchmark_test(options_test) +add_test(NAME options_benchmarks COMMAND options_test --benchmark_min_time=0.01) + +compile_benchmark_test(basic_test) +add_test(NAME basic_benchmark COMMAND basic_test --benchmark_min_time=0.01) + +compile_output_test(repetitions_test) +add_test(NAME repetitions_benchmark COMMAND repetitions_test --benchmark_min_time=0.01 --benchmark_repetitions=3) + +compile_benchmark_test(diagnostics_test) +add_test(NAME diagnostics_test COMMAND diagnostics_test --benchmark_min_time=0.01) + +compile_benchmark_test(skip_with_error_test) +add_test(NAME skip_with_error_test COMMAND skip_with_error_test --benchmark_min_time=0.01) + +compile_benchmark_test(donotoptimize_test) +# Some of the issues with DoNotOptimize only occur when optimization is enabled +check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG) +if (BENCHMARK_HAS_O3_FLAG) + set_target_properties(donotoptimize_test PROPERTIES COMPILE_FLAGS "-O3") +endif() +add_test(NAME donotoptimize_test COMMAND donotoptimize_test --benchmark_min_time=0.01) + +compile_benchmark_test(fixture_test) +add_test(NAME fixture_test COMMAND fixture_test --benchmark_min_time=0.01) + +compile_benchmark_test(register_benchmark_test) +add_test(NAME register_benchmark_test COMMAND register_benchmark_test --benchmark_min_time=0.01) + +compile_benchmark_test(map_test) +add_test(NAME map_test COMMAND map_test --benchmark_min_time=0.01) + +compile_benchmark_test(multiple_ranges_test) +add_test(NAME multiple_ranges_test COMMAND multiple_ranges_test --benchmark_min_time=0.01) + +compile_benchmark_test(args_product_test) +add_test(NAME args_product_test COMMAND args_product_test --benchmark_min_time=0.01) + +compile_benchmark_test_with_main(link_main_test) +add_test(NAME link_main_test COMMAND link_main_test --benchmark_min_time=0.01) + +compile_output_test(reporter_output_test) +add_test(NAME reporter_output_test COMMAND reporter_output_test --benchmark_min_time=0.01) + +compile_output_test(templated_fixture_test) +add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_min_time=0.01) + +compile_output_test(user_counters_test) +add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01) + +compile_output_test(perf_counters_test) +add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmark_min_time=0.01 --benchmark_perf_counters=CYCLES,BRANCHES) + +compile_output_test(internal_threading_test) +add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01) + +compile_output_test(report_aggregates_only_test) +add_test(NAME report_aggregates_only_test COMMAND report_aggregates_only_test --benchmark_min_time=0.01) + +compile_output_test(display_aggregates_only_test) +add_test(NAME display_aggregates_only_test COMMAND display_aggregates_only_test --benchmark_min_time=0.01) + +compile_output_test(user_counters_tabular_test) +add_test(NAME user_counters_tabular_test COMMAND user_counters_tabular_test --benchmark_counters_tabular=true --benchmark_min_time=0.01) + +compile_output_test(user_counters_thousands_test) +add_test(NAME user_counters_thousands_test COMMAND user_counters_thousands_test --benchmark_min_time=0.01) + +compile_output_test(memory_manager_test) +add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01) + +check_cxx_compiler_flag(-std=c++03 BENCHMARK_HAS_CXX03_FLAG) +if (BENCHMARK_HAS_CXX03_FLAG) + compile_benchmark_test(cxx03_test) + set_target_properties(cxx03_test + PROPERTIES + CXX_STANDARD 98 + CXX_STANDARD_REQUIRED YES) + # libstdc++ provides different definitions within between dialects. When + # LTO is enabled and -Werror is specified GCC diagnoses this ODR violation + # causing the test to fail to compile. To prevent this we explicitly disable + # the warning. + check_cxx_compiler_flag(-Wno-odr BENCHMARK_HAS_WNO_ODR) + if (BENCHMARK_ENABLE_LTO AND BENCHMARK_HAS_WNO_ODR) + set_target_properties(cxx03_test + PROPERTIES + LINK_FLAGS "-Wno-odr") + endif() + add_test(NAME cxx03 COMMAND cxx03_test --benchmark_min_time=0.01) +endif() + +# Attempt to work around flaky test failures when running on Appveyor servers. +if (DEFINED ENV{APPVEYOR}) + set(COMPLEXITY_MIN_TIME "0.5") +else() + set(COMPLEXITY_MIN_TIME "0.01") +endif() +compile_output_test(complexity_test) +add_test(NAME complexity_benchmark COMMAND complexity_test --benchmark_min_time=${COMPLEXITY_MIN_TIME}) + +############################################################################### +# GoogleTest Unit Tests +############################################################################### + +if (BENCHMARK_ENABLE_GTEST_TESTS) + macro(compile_gtest name) + add_executable(${name} "${name}.cc") + target_link_libraries(${name} benchmark::benchmark + gmock_main ${CMAKE_THREAD_LIBS_INIT}) + endmacro(compile_gtest) + + macro(add_gtest name) + compile_gtest(${name}) + add_test(NAME ${name} COMMAND ${name}) + endmacro() + + add_gtest(benchmark_gtest) + add_gtest(benchmark_name_gtest) + add_gtest(benchmark_random_interleaving_gtest) + add_gtest(commandlineflags_gtest) + add_gtest(statistics_gtest) + add_gtest(string_util_gtest) + add_gtest(perf_counters_gtest) +endif(BENCHMARK_ENABLE_GTEST_TESTS) + +############################################################################### +# Assembly Unit Tests +############################################################################### + +if (BENCHMARK_ENABLE_ASSEMBLY_TESTS) + if (NOT LLVM_FILECHECK_EXE) + message(FATAL_ERROR "LLVM FileCheck is required when including this file") + endif() + include(AssemblyTests.cmake) + add_filecheck_test(donotoptimize_assembly_test) + add_filecheck_test(state_assembly_test) + add_filecheck_test(clobber_memory_assembly_test) +endif() + + + +############################################################################### +# Code Coverage Configuration +############################################################################### + +# Add the coverage command(s) +if(CMAKE_BUILD_TYPE) + string(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_LOWER) +endif() +if (${CMAKE_BUILD_TYPE_LOWER} MATCHES "coverage") + find_program(GCOV gcov) + find_program(LCOV lcov) + find_program(GENHTML genhtml) + find_program(CTEST ctest) + if (GCOV AND LCOV AND GENHTML AND CTEST AND HAVE_CXX_FLAG_COVERAGE) + add_custom_command( + OUTPUT ${CMAKE_BINARY_DIR}/lcov/index.html + COMMAND ${LCOV} -q -z -d . + COMMAND ${LCOV} -q --no-external -c -b "${CMAKE_SOURCE_DIR}" -d . -o before.lcov -i + COMMAND ${CTEST} --force-new-ctest-process + COMMAND ${LCOV} -q --no-external -c -b "${CMAKE_SOURCE_DIR}" -d . -o after.lcov + COMMAND ${LCOV} -q -a before.lcov -a after.lcov --output-file final.lcov + COMMAND ${LCOV} -q -r final.lcov "'${CMAKE_SOURCE_DIR}/test/*'" -o final.lcov + COMMAND ${GENHTML} final.lcov -o lcov --demangle-cpp --sort -p "${CMAKE_BINARY_DIR}" -t benchmark + DEPENDS filter_test benchmark_test options_test basic_test fixture_test cxx03_test complexity_test + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMENT "Running LCOV" + ) + add_custom_target(coverage + DEPENDS ${CMAKE_BINARY_DIR}/lcov/index.html + COMMENT "LCOV report at lcov/index.html" + ) + message(STATUS "Coverage command added") + else() + if (HAVE_CXX_FLAG_COVERAGE) + set(CXX_FLAG_COVERAGE_MESSAGE supported) + else() + set(CXX_FLAG_COVERAGE_MESSAGE unavailable) + endif() + message(WARNING + "Coverage not available:\n" + " gcov: ${GCOV}\n" + " lcov: ${LCOV}\n" + " genhtml: ${GENHTML}\n" + " ctest: ${CTEST}\n" + " --coverage flag: ${CXX_FLAG_COVERAGE_MESSAGE}") + endif() +endif() diff --git a/libcxx/utils/google-benchmark/test/args_product_test.cc b/libcxx/utils/google-benchmark/test/args_product_test.cc new file mode 100644 index 000000000000..32a75d50dd9e --- /dev/null +++ b/libcxx/utils/google-benchmark/test/args_product_test.cc @@ -0,0 +1,77 @@ +#include "benchmark/benchmark.h" + +#include +#include +#include +#include + +class ArgsProductFixture : public ::benchmark::Fixture { + public: + ArgsProductFixture() + : expectedValues({{0, 100, 2000, 30000}, + {1, 15, 3, 8}, + {1, 15, 3, 9}, + {1, 15, 7, 8}, + {1, 15, 7, 9}, + {1, 15, 10, 8}, + {1, 15, 10, 9}, + {2, 15, 3, 8}, + {2, 15, 3, 9}, + {2, 15, 7, 8}, + {2, 15, 7, 9}, + {2, 15, 10, 8}, + {2, 15, 10, 9}, + {4, 5, 6, 11}}) {} + + void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE { + std::vector ranges = {state.range(0), state.range(1), + state.range(2), state.range(3)}; + + assert(expectedValues.find(ranges) != expectedValues.end()); + + actualValues.insert(ranges); + } + + // NOTE: This is not TearDown as we want to check after _all_ runs are + // complete. + virtual ~ArgsProductFixture() { + if (actualValues != expectedValues) { + std::cout << "EXPECTED\n"; + for (auto v : expectedValues) { + std::cout << "{"; + for (int64_t iv : v) { + std::cout << iv << ", "; + } + std::cout << "}\n"; + } + std::cout << "ACTUAL\n"; + for (auto v : actualValues) { + std::cout << "{"; + for (int64_t iv : v) { + std::cout << iv << ", "; + } + std::cout << "}\n"; + } + } + } + + std::set> expectedValues; + std::set> actualValues; +}; + +BENCHMARK_DEFINE_F(ArgsProductFixture, Empty)(benchmark::State& state) { + for (auto _ : state) { + int64_t product = + state.range(0) * state.range(1) * state.range(2) * state.range(3); + for (int64_t x = 0; x < product; x++) { + benchmark::DoNotOptimize(x); + } + } +} + +BENCHMARK_REGISTER_F(ArgsProductFixture, Empty) + ->Args({0, 100, 2000, 30000}) + ->ArgsProduct({{1, 2}, {15}, {3, 7, 10}, {8, 9}}) + ->Args({4, 5, 6, 11}); + +BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/test/basic_test.cc b/libcxx/utils/google-benchmark/test/basic_test.cc new file mode 100644 index 000000000000..33642211e205 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/basic_test.cc @@ -0,0 +1,151 @@ + +#include "benchmark/benchmark.h" + +#define BASIC_BENCHMARK_TEST(x) BENCHMARK(x)->Arg(8)->Arg(512)->Arg(8192) + +void BM_empty(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(state.iterations()); + } +} +BENCHMARK(BM_empty); +BENCHMARK(BM_empty)->ThreadPerCpu(); + +void BM_spin_empty(benchmark::State& state) { + for (auto _ : state) { + for (int x = 0; x < state.range(0); ++x) { + benchmark::DoNotOptimize(x); + } + } +} +BASIC_BENCHMARK_TEST(BM_spin_empty); +BASIC_BENCHMARK_TEST(BM_spin_empty)->ThreadPerCpu(); + +void BM_spin_pause_before(benchmark::State& state) { + for (int i = 0; i < state.range(0); ++i) { + benchmark::DoNotOptimize(i); + } + for (auto _ : state) { + for (int i = 0; i < state.range(0); ++i) { + benchmark::DoNotOptimize(i); + } + } +} +BASIC_BENCHMARK_TEST(BM_spin_pause_before); +BASIC_BENCHMARK_TEST(BM_spin_pause_before)->ThreadPerCpu(); + +void BM_spin_pause_during(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + for (int i = 0; i < state.range(0); ++i) { + benchmark::DoNotOptimize(i); + } + state.ResumeTiming(); + for (int i = 0; i < state.range(0); ++i) { + benchmark::DoNotOptimize(i); + } + } +} +BASIC_BENCHMARK_TEST(BM_spin_pause_during); +BASIC_BENCHMARK_TEST(BM_spin_pause_during)->ThreadPerCpu(); + +void BM_pause_during(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + state.ResumeTiming(); + } +} +BENCHMARK(BM_pause_during); +BENCHMARK(BM_pause_during)->ThreadPerCpu(); +BENCHMARK(BM_pause_during)->UseRealTime(); +BENCHMARK(BM_pause_during)->UseRealTime()->ThreadPerCpu(); + +void BM_spin_pause_after(benchmark::State& state) { + for (auto _ : state) { + for (int i = 0; i < state.range(0); ++i) { + benchmark::DoNotOptimize(i); + } + } + for (int i = 0; i < state.range(0); ++i) { + benchmark::DoNotOptimize(i); + } +} +BASIC_BENCHMARK_TEST(BM_spin_pause_after); +BASIC_BENCHMARK_TEST(BM_spin_pause_after)->ThreadPerCpu(); + +void BM_spin_pause_before_and_after(benchmark::State& state) { + for (int i = 0; i < state.range(0); ++i) { + benchmark::DoNotOptimize(i); + } + for (auto _ : state) { + for (int i = 0; i < state.range(0); ++i) { + benchmark::DoNotOptimize(i); + } + } + for (int i = 0; i < state.range(0); ++i) { + benchmark::DoNotOptimize(i); + } +} +BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after); +BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after)->ThreadPerCpu(); + +void BM_empty_stop_start(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_empty_stop_start); +BENCHMARK(BM_empty_stop_start)->ThreadPerCpu(); + + +void BM_KeepRunning(benchmark::State& state) { + benchmark::IterationCount iter_count = 0; + assert(iter_count == state.iterations()); + while (state.KeepRunning()) { + ++iter_count; + } + assert(iter_count == state.iterations()); +} +BENCHMARK(BM_KeepRunning); + +void BM_KeepRunningBatch(benchmark::State& state) { + // Choose a batch size >1000 to skip the typical runs with iteration + // targets of 10, 100 and 1000. If these are not actually skipped the + // bug would be detectable as consecutive runs with the same iteration + // count. Below we assert that this does not happen. + const benchmark::IterationCount batch_size = 1009; + + static benchmark::IterationCount prior_iter_count = 0; + benchmark::IterationCount iter_count = 0; + while (state.KeepRunningBatch(batch_size)) { + iter_count += batch_size; + } + assert(state.iterations() == iter_count); + + // Verify that the iteration count always increases across runs (see + // comment above). + assert(iter_count == batch_size // max_iterations == 1 + || iter_count > prior_iter_count); // max_iterations > batch_size + prior_iter_count = iter_count; +} +// Register with a fixed repetition count to establish the invariant that +// the iteration count should always change across runs. This overrides +// the --benchmark_repetitions command line flag, which would otherwise +// cause this test to fail if set > 1. +BENCHMARK(BM_KeepRunningBatch)->Repetitions(1); + +void BM_RangedFor(benchmark::State& state) { + benchmark::IterationCount iter_count = 0; + for (auto _ : state) { + ++iter_count; + } + assert(iter_count == state.max_iterations); +} +BENCHMARK(BM_RangedFor); + +// Ensure that StateIterator provides all the necessary typedefs required to +// instantiate std::iterator_traits. +static_assert(std::is_same< + typename std::iterator_traits::value_type, + typename benchmark::State::StateIterator::value_type>::value, ""); + +BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/test/benchmark_gtest.cc b/libcxx/utils/google-benchmark/test/benchmark_gtest.cc new file mode 100644 index 000000000000..14a885ba46da --- /dev/null +++ b/libcxx/utils/google-benchmark/test/benchmark_gtest.cc @@ -0,0 +1,165 @@ +#include +#include +#include + +#include "../src/benchmark_register.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace benchmark { +namespace internal { +extern std::map* global_context; + +namespace { + +TEST(AddRangeTest, Simple) { + std::vector dst; + AddRange(&dst, 1, 2, 2); + EXPECT_THAT(dst, testing::ElementsAre(1, 2)); +} + +TEST(AddRangeTest, Simple64) { + std::vector dst; + AddRange(&dst, static_cast(1), static_cast(2), 2); + EXPECT_THAT(dst, testing::ElementsAre(1, 2)); +} + +TEST(AddRangeTest, Advanced) { + std::vector dst; + AddRange(&dst, 5, 15, 2); + EXPECT_THAT(dst, testing::ElementsAre(5, 8, 15)); +} + +TEST(AddRangeTest, Advanced64) { + std::vector dst; + AddRange(&dst, static_cast(5), static_cast(15), 2); + EXPECT_THAT(dst, testing::ElementsAre(5, 8, 15)); +} + +TEST(AddRangeTest, FullRange8) { + std::vector dst; + AddRange(&dst, int8_t{1}, std::numeric_limits::max(), 8); + EXPECT_THAT(dst, testing::ElementsAre(1, 8, 64, 127)); +} + +TEST(AddRangeTest, FullRange64) { + std::vector dst; + AddRange(&dst, int64_t{1}, std::numeric_limits::max(), 1024); + EXPECT_THAT( + dst, testing::ElementsAre(1LL, 1024LL, 1048576LL, 1073741824LL, + 1099511627776LL, 1125899906842624LL, + 1152921504606846976LL, 9223372036854775807LL)); +} + +TEST(AddRangeTest, NegativeRanges) { + std::vector dst; + AddRange(&dst, -8, 0, 2); + EXPECT_THAT(dst, testing::ElementsAre(-8, -4, -2, -1, 0)); +} + +TEST(AddRangeTest, StrictlyNegative) { + std::vector dst; + AddRange(&dst, -8, -1, 2); + EXPECT_THAT(dst, testing::ElementsAre(-8, -4, -2, -1)); +} + +TEST(AddRangeTest, SymmetricNegativeRanges) { + std::vector dst; + AddRange(&dst, -8, 8, 2); + EXPECT_THAT(dst, testing::ElementsAre(-8, -4, -2, -1, 0, 1, 2, 4, 8)); +} + +TEST(AddRangeTest, SymmetricNegativeRangesOddMult) { + std::vector dst; + AddRange(&dst, -30, 32, 5); + EXPECT_THAT(dst, testing::ElementsAre(-30, -25, -5, -1, 0, 1, 5, 25, 32)); +} + +TEST(AddRangeTest, NegativeRangesAsymmetric) { + std::vector dst; + AddRange(&dst, -3, 5, 2); + EXPECT_THAT(dst, testing::ElementsAre(-3, -2, -1, 0, 1, 2, 4, 5)); +} + +TEST(AddRangeTest, NegativeRangesLargeStep) { + // Always include -1, 0, 1 when crossing zero. + std::vector dst; + AddRange(&dst, -8, 8, 10); + EXPECT_THAT(dst, testing::ElementsAre(-8, -1, 0, 1, 8)); +} + +TEST(AddRangeTest, ZeroOnlyRange) { + std::vector dst; + AddRange(&dst, 0, 0, 2); + EXPECT_THAT(dst, testing::ElementsAre(0)); +} + +TEST(AddRangeTest, ZeroStartingRange) { + std::vector dst; + AddRange(&dst, 0, 2, 2); + EXPECT_THAT(dst, testing::ElementsAre(0, 1, 2)); +} + +TEST(AddRangeTest, NegativeRange64) { + std::vector dst; + AddRange(&dst, -4, 4, 2); + EXPECT_THAT(dst, testing::ElementsAre(-4, -2, -1, 0, 1, 2, 4)); +} + +TEST(AddRangeTest, NegativeRangePreservesExistingOrder) { + // If elements already exist in the range, ensure we don't change + // their ordering by adding negative values. + std::vector dst = {1, 2, 3}; + AddRange(&dst, -2, 2, 2); + EXPECT_THAT(dst, testing::ElementsAre(1, 2, 3, -2, -1, 0, 1, 2)); +} + +TEST(AddRangeTest, FullNegativeRange64) { + std::vector dst; + const auto min = std::numeric_limits::min(); + const auto max = std::numeric_limits::max(); + AddRange(&dst, min, max, 1024); + EXPECT_THAT( + dst, testing::ElementsAreArray(std::vector{ + min, -1152921504606846976LL, -1125899906842624LL, + -1099511627776LL, -1073741824LL, -1048576LL, -1024LL, -1LL, 0LL, + 1LL, 1024LL, 1048576LL, 1073741824LL, 1099511627776LL, + 1125899906842624LL, 1152921504606846976LL, max})); +} + +TEST(AddRangeTest, Simple8) { + std::vector dst; + AddRange(&dst, 1, 8, 2); + EXPECT_THAT(dst, testing::ElementsAre(1, 2, 4, 8)); +} + +TEST(AddCustomContext, Simple) { + EXPECT_THAT(global_context, nullptr); + + AddCustomContext("foo", "bar"); + AddCustomContext("baz", "qux"); + + EXPECT_THAT(*global_context, + testing::UnorderedElementsAre(testing::Pair("foo", "bar"), + testing::Pair("baz", "qux"))); + + delete global_context; + global_context = nullptr; +} + +TEST(AddCustomContext, DuplicateKey) { + EXPECT_THAT(global_context, nullptr); + + AddCustomContext("foo", "bar"); + AddCustomContext("foo", "qux"); + + EXPECT_THAT(*global_context, + testing::UnorderedElementsAre(testing::Pair("foo", "bar"))); + + delete global_context; + global_context = nullptr; +} + +} // namespace +} // namespace internal +} // namespace benchmark diff --git a/libcxx/utils/google-benchmark/test/benchmark_name_gtest.cc b/libcxx/utils/google-benchmark/test/benchmark_name_gtest.cc new file mode 100644 index 000000000000..afb401c1f532 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/benchmark_name_gtest.cc @@ -0,0 +1,74 @@ +#include "benchmark/benchmark.h" +#include "gtest/gtest.h" + +namespace { + +using namespace benchmark; +using namespace benchmark::internal; + +TEST(BenchmarkNameTest, Empty) { + const auto name = BenchmarkName(); + EXPECT_EQ(name.str(), std::string()); +} + +TEST(BenchmarkNameTest, FunctionName) { + auto name = BenchmarkName(); + name.function_name = "function_name"; + EXPECT_EQ(name.str(), "function_name"); +} + +TEST(BenchmarkNameTest, FunctionNameAndArgs) { + auto name = BenchmarkName(); + name.function_name = "function_name"; + name.args = "some_args:3/4/5"; + EXPECT_EQ(name.str(), "function_name/some_args:3/4/5"); +} + +TEST(BenchmarkNameTest, MinTime) { + auto name = BenchmarkName(); + name.function_name = "function_name"; + name.args = "some_args:3/4"; + name.min_time = "min_time:3.4s"; + EXPECT_EQ(name.str(), "function_name/some_args:3/4/min_time:3.4s"); +} + +TEST(BenchmarkNameTest, Iterations) { + auto name = BenchmarkName(); + name.function_name = "function_name"; + name.min_time = "min_time:3.4s"; + name.iterations = "iterations:42"; + EXPECT_EQ(name.str(), "function_name/min_time:3.4s/iterations:42"); +} + +TEST(BenchmarkNameTest, Repetitions) { + auto name = BenchmarkName(); + name.function_name = "function_name"; + name.min_time = "min_time:3.4s"; + name.repetitions = "repetitions:24"; + EXPECT_EQ(name.str(), "function_name/min_time:3.4s/repetitions:24"); +} + +TEST(BenchmarkNameTest, TimeType) { + auto name = BenchmarkName(); + name.function_name = "function_name"; + name.min_time = "min_time:3.4s"; + name.time_type = "hammer_time"; + EXPECT_EQ(name.str(), "function_name/min_time:3.4s/hammer_time"); +} + +TEST(BenchmarkNameTest, Threads) { + auto name = BenchmarkName(); + name.function_name = "function_name"; + name.min_time = "min_time:3.4s"; + name.threads = "threads:256"; + EXPECT_EQ(name.str(), "function_name/min_time:3.4s/threads:256"); +} + +TEST(BenchmarkNameTest, TestEmptyFunctionName) { + auto name = BenchmarkName(); + name.args = "first:3/second:4"; + name.threads = "threads:22"; + EXPECT_EQ(name.str(), "first:3/second:4/threads:22"); +} + +} // end namespace diff --git a/libcxx/utils/google-benchmark/test/benchmark_random_interleaving_gtest.cc b/libcxx/utils/google-benchmark/test/benchmark_random_interleaving_gtest.cc new file mode 100644 index 000000000000..8e28dab3f41d --- /dev/null +++ b/libcxx/utils/google-benchmark/test/benchmark_random_interleaving_gtest.cc @@ -0,0 +1,126 @@ +#include +#include +#include + +#include "../src/commandlineflags.h" +#include "../src/string_util.h" +#include "benchmark/benchmark.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +DECLARE_bool(benchmark_enable_random_interleaving); +DECLARE_string(benchmark_filter); +DECLARE_int32(benchmark_repetitions); + +namespace benchmark { +namespace internal { +namespace { + +class EventQueue : public std::queue { + public: + void Put(const std::string& event) { push(event); } + + void Clear() { + while (!empty()) { + pop(); + } + } + + std::string Get() { + std::string event = front(); + pop(); + return event; + } +}; + +static EventQueue* queue = new EventQueue; + +class NullReporter : public BenchmarkReporter { + public: + bool ReportContext(const Context& /*context*/) override { return true; } + void ReportRuns(const std::vector& /* report */) override {} +}; + +class BenchmarkTest : public testing::Test { + public: + static void SetupHook(int /* num_threads */) { queue->push("Setup"); } + + static void TeardownHook(int /* num_threads */) { queue->push("Teardown"); } + + void Execute(const std::string& pattern) { + queue->Clear(); + + BenchmarkReporter* reporter = new NullReporter; + FLAGS_benchmark_filter = pattern; + RunSpecifiedBenchmarks(reporter); + delete reporter; + + queue->Put("DONE"); // End marker + } +}; + +static void BM_Match1(benchmark::State& state) { + const int64_t arg = state.range(0); + + for (auto _ : state) { + } + queue->Put(StrFormat("BM_Match1/%d", static_cast(arg))); +} +BENCHMARK(BM_Match1) + ->Iterations(100) + ->Arg(1) + ->Arg(2) + ->Arg(3) + ->Range(10, 80) + ->Args({90}) + ->Args({100}); + +TEST_F(BenchmarkTest, Match1) { + Execute("BM_Match1"); + ASSERT_EQ("BM_Match1/1", queue->Get()); + ASSERT_EQ("BM_Match1/2", queue->Get()); + ASSERT_EQ("BM_Match1/3", queue->Get()); + ASSERT_EQ("BM_Match1/10", queue->Get()); + ASSERT_EQ("BM_Match1/64", queue->Get()); + ASSERT_EQ("BM_Match1/80", queue->Get()); + ASSERT_EQ("BM_Match1/90", queue->Get()); + ASSERT_EQ("BM_Match1/100", queue->Get()); + ASSERT_EQ("DONE", queue->Get()); +} + +TEST_F(BenchmarkTest, Match1WithRepetition) { + FLAGS_benchmark_repetitions = 2; + + Execute("BM_Match1/(64|80)"); + ASSERT_EQ("BM_Match1/64", queue->Get()); + ASSERT_EQ("BM_Match1/64", queue->Get()); + ASSERT_EQ("BM_Match1/80", queue->Get()); + ASSERT_EQ("BM_Match1/80", queue->Get()); + ASSERT_EQ("DONE", queue->Get()); +} + +TEST_F(BenchmarkTest, Match1WithRandomInterleaving) { + FLAGS_benchmark_enable_random_interleaving = true; + FLAGS_benchmark_repetitions = 100; + + std::map element_count; + std::map interleaving_count; + Execute("BM_Match1/(64|80)"); + for (int i = 0; i < 100; ++i) { + std::vector interleaving; + interleaving.push_back(queue->Get()); + interleaving.push_back(queue->Get()); + element_count[interleaving[0].c_str()]++; + element_count[interleaving[1].c_str()]++; + interleaving_count[StrFormat("%s,%s", interleaving[0].c_str(), + interleaving[1].c_str())]++; + } + EXPECT_EQ(element_count["BM_Match1/64"], 100) << "Unexpected repetitions."; + EXPECT_EQ(element_count["BM_Match1/80"], 100) << "Unexpected repetitions."; + EXPECT_GE(interleaving_count.size(), 2) << "Interleaving was not randomized."; + ASSERT_EQ("DONE", queue->Get()); +} + +} // namespace +} // namespace internal +} // namespace benchmark diff --git a/libcxx/utils/google-benchmark/test/benchmark_test.cc b/libcxx/utils/google-benchmark/test/benchmark_test.cc new file mode 100644 index 000000000000..3cd4f5565fa1 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/benchmark_test.cc @@ -0,0 +1,245 @@ +#include "benchmark/benchmark.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__GNUC__) +#define BENCHMARK_NOINLINE __attribute__((noinline)) +#else +#define BENCHMARK_NOINLINE +#endif + +namespace { + +int BENCHMARK_NOINLINE Factorial(uint32_t n) { + return (n == 1) ? 1 : n * Factorial(n - 1); +} + +double CalculatePi(int depth) { + double pi = 0.0; + for (int i = 0; i < depth; ++i) { + double numerator = static_cast(((i % 2) * 2) - 1); + double denominator = static_cast((2 * i) - 1); + pi += numerator / denominator; + } + return (pi - 1.0) * 4; +} + +std::set ConstructRandomSet(int64_t size) { + std::set s; + for (int i = 0; i < size; ++i) s.insert(s.end(), i); + return s; +} + +std::mutex test_vector_mu; +std::vector* test_vector = nullptr; + +} // end namespace + +static void BM_Factorial(benchmark::State& state) { + int fac_42 = 0; + for (auto _ : state) fac_42 = Factorial(8); + // Prevent compiler optimizations + std::stringstream ss; + ss << fac_42; + state.SetLabel(ss.str()); +} +BENCHMARK(BM_Factorial); +BENCHMARK(BM_Factorial)->UseRealTime(); + +static void BM_CalculatePiRange(benchmark::State& state) { + double pi = 0.0; + for (auto _ : state) pi = CalculatePi(static_cast(state.range(0))); + std::stringstream ss; + ss << pi; + state.SetLabel(ss.str()); +} +BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024); + +static void BM_CalculatePi(benchmark::State& state) { + static const int depth = 1024; + for (auto _ : state) { + benchmark::DoNotOptimize(CalculatePi(static_cast(depth))); + } +} +BENCHMARK(BM_CalculatePi)->Threads(8); +BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32); +BENCHMARK(BM_CalculatePi)->ThreadPerCpu(); + +static void BM_SetInsert(benchmark::State& state) { + std::set data; + for (auto _ : state) { + state.PauseTiming(); + data = ConstructRandomSet(state.range(0)); + state.ResumeTiming(); + for (int j = 0; j < state.range(1); ++j) data.insert(rand()); + } + state.SetItemsProcessed(state.iterations() * state.range(1)); + state.SetBytesProcessed(state.iterations() * state.range(1) * sizeof(int)); +} + +// Test many inserts at once to reduce the total iterations needed. Otherwise, the slower, +// non-timed part of each iteration will make the benchmark take forever. +BENCHMARK(BM_SetInsert)->Ranges({{1 << 10, 8 << 10}, {128, 512}}); + +template +static void BM_Sequential(benchmark::State& state) { + ValueType v = 42; + for (auto _ : state) { + Container c; + for (int64_t i = state.range(0); --i;) c.push_back(v); + } + const int64_t items_processed = state.iterations() * state.range(0); + state.SetItemsProcessed(items_processed); + state.SetBytesProcessed(items_processed * sizeof(v)); +} +BENCHMARK_TEMPLATE2(BM_Sequential, std::vector, int) + ->Range(1 << 0, 1 << 10); +BENCHMARK_TEMPLATE(BM_Sequential, std::list)->Range(1 << 0, 1 << 10); +// Test the variadic version of BENCHMARK_TEMPLATE in C++11 and beyond. +#ifdef BENCHMARK_HAS_CXX11 +BENCHMARK_TEMPLATE(BM_Sequential, std::vector, int)->Arg(512); +#endif + +static void BM_StringCompare(benchmark::State& state) { + size_t len = static_cast(state.range(0)); + std::string s1(len, '-'); + std::string s2(len, '-'); + for (auto _ : state) benchmark::DoNotOptimize(s1.compare(s2)); +} +BENCHMARK(BM_StringCompare)->Range(1, 1 << 20); + +static void BM_SetupTeardown(benchmark::State& state) { + if (state.thread_index == 0) { + // No need to lock test_vector_mu here as this is running single-threaded. + test_vector = new std::vector(); + } + int i = 0; + for (auto _ : state) { + std::lock_guard l(test_vector_mu); + if (i % 2 == 0) + test_vector->push_back(i); + else + test_vector->pop_back(); + ++i; + } + if (state.thread_index == 0) { + delete test_vector; + } +} +BENCHMARK(BM_SetupTeardown)->ThreadPerCpu(); + +static void BM_LongTest(benchmark::State& state) { + double tracker = 0.0; + for (auto _ : state) { + for (int i = 0; i < state.range(0); ++i) + benchmark::DoNotOptimize(tracker += i); + } +} +BENCHMARK(BM_LongTest)->Range(1 << 16, 1 << 28); + +static void BM_ParallelMemset(benchmark::State& state) { + int64_t size = state.range(0) / static_cast(sizeof(int)); + int thread_size = static_cast(size) / state.threads; + int from = thread_size * state.thread_index; + int to = from + thread_size; + + if (state.thread_index == 0) { + test_vector = new std::vector(static_cast(size)); + } + + for (auto _ : state) { + for (int i = from; i < to; i++) { + // No need to lock test_vector_mu as ranges + // do not overlap between threads. + benchmark::DoNotOptimize(test_vector->at(i) = 1); + } + } + + if (state.thread_index == 0) { + delete test_vector; + } +} +BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4); + +static void BM_ManualTiming(benchmark::State& state) { + int64_t slept_for = 0; + int64_t microseconds = state.range(0); + std::chrono::duration sleep_duration{ + static_cast(microseconds)}; + + for (auto _ : state) { + auto start = std::chrono::high_resolution_clock::now(); + // Simulate some useful workload with a sleep + std::this_thread::sleep_for( + std::chrono::duration_cast(sleep_duration)); + auto end = std::chrono::high_resolution_clock::now(); + + auto elapsed = + std::chrono::duration_cast>(end - start); + + state.SetIterationTime(elapsed.count()); + slept_for += microseconds; + } + state.SetItemsProcessed(slept_for); +} +BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseRealTime(); +BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseManualTime(); + +#ifdef BENCHMARK_HAS_CXX11 + +template +void BM_with_args(benchmark::State& state, Args&&...) { + for (auto _ : state) { + } +} +BENCHMARK_CAPTURE(BM_with_args, int_test, 42, 43, 44); +BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test, std::string("abc"), + std::pair(42, 3.8)); + +void BM_non_template_args(benchmark::State& state, int, double) { + while(state.KeepRunning()) {} +} +BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0); + +#endif // BENCHMARK_HAS_CXX11 + +static void BM_DenseThreadRanges(benchmark::State& st) { + switch (st.range(0)) { + case 1: + assert(st.threads == 1 || st.threads == 2 || st.threads == 3); + break; + case 2: + assert(st.threads == 1 || st.threads == 3 || st.threads == 4); + break; + case 3: + assert(st.threads == 5 || st.threads == 8 || st.threads == 11 || + st.threads == 14); + break; + default: + assert(false && "Invalid test case number"); + } + while (st.KeepRunning()) { + } +} +BENCHMARK(BM_DenseThreadRanges)->Arg(1)->DenseThreadRange(1, 3); +BENCHMARK(BM_DenseThreadRanges)->Arg(2)->DenseThreadRange(1, 4, 2); +BENCHMARK(BM_DenseThreadRanges)->Arg(3)->DenseThreadRange(5, 14, 3); + +BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/test/clobber_memory_assembly_test.cc b/libcxx/utils/google-benchmark/test/clobber_memory_assembly_test.cc new file mode 100644 index 000000000000..f41911a39ce7 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/clobber_memory_assembly_test.cc @@ -0,0 +1,64 @@ +#include + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wreturn-type" +#endif + +extern "C" { + +extern int ExternInt; +extern int ExternInt2; +extern int ExternInt3; + +} + +// CHECK-LABEL: test_basic: +extern "C" void test_basic() { + int x; + benchmark::DoNotOptimize(&x); + x = 101; + benchmark::ClobberMemory(); + // CHECK: leaq [[DEST:[^,]+]], %rax + // CHECK: movl $101, [[DEST]] + // CHECK: ret +} + +// CHECK-LABEL: test_redundant_store: +extern "C" void test_redundant_store() { + ExternInt = 3; + benchmark::ClobberMemory(); + ExternInt = 51; + // CHECK-DAG: ExternInt + // CHECK-DAG: movl $3 + // CHECK: movl $51 +} + +// CHECK-LABEL: test_redundant_read: +extern "C" void test_redundant_read() { + int x; + benchmark::DoNotOptimize(&x); + x = ExternInt; + benchmark::ClobberMemory(); + x = ExternInt2; + // CHECK: leaq [[DEST:[^,]+]], %rax + // CHECK: ExternInt(%rip) + // CHECK: movl %eax, [[DEST]] + // CHECK-NOT: ExternInt2 + // CHECK: ret +} + +// CHECK-LABEL: test_redundant_read2: +extern "C" void test_redundant_read2() { + int x; + benchmark::DoNotOptimize(&x); + x = ExternInt; + benchmark::ClobberMemory(); + x = ExternInt2; + benchmark::ClobberMemory(); + // CHECK: leaq [[DEST:[^,]+]], %rax + // CHECK: ExternInt(%rip) + // CHECK: movl %eax, [[DEST]] + // CHECK: ExternInt2(%rip) + // CHECK: movl %eax, [[DEST]] + // CHECK: ret +} diff --git a/libcxx/utils/google-benchmark/test/commandlineflags_gtest.cc b/libcxx/utils/google-benchmark/test/commandlineflags_gtest.cc new file mode 100644 index 000000000000..8412008ffe35 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/commandlineflags_gtest.cc @@ -0,0 +1,228 @@ +#include + +#include "../src/commandlineflags.h" +#include "../src/internal_macros.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace benchmark { +namespace { + +#if defined(BENCHMARK_OS_WINDOWS) +int setenv(const char* name, const char* value, int overwrite) { + if (!overwrite) { + // NOTE: getenv_s is far superior but not available under mingw. + char* env_value = getenv(name); + if (env_value == nullptr) { + return -1; + } + } + return _putenv_s(name, value); +} + +int unsetenv(const char* name) { return _putenv_s(name, ""); } + +#endif // BENCHMARK_OS_WINDOWS + +TEST(BoolFromEnv, Default) { + ASSERT_EQ(unsetenv("NOT_IN_ENV"), 0); + EXPECT_EQ(BoolFromEnv("not_in_env", true), true); +} + +TEST(BoolFromEnv, False) { + ASSERT_EQ(setenv("IN_ENV", "0", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", true), false); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "N", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", true), false); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "n", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", true), false); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "NO", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", true), false); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "No", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", true), false); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "no", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", true), false); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "F", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", true), false); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "f", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", true), false); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "FALSE", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", true), false); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "False", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", true), false); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "false", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", true), false); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "OFF", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", true), false); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "Off", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", true), false); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "off", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", true), false); + unsetenv("IN_ENV"); +} + +TEST(BoolFromEnv, True) { + ASSERT_EQ(setenv("IN_ENV", "1", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", false), true); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "Y", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", false), true); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "y", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", false), true); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "YES", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", false), true); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "Yes", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", false), true); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "yes", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", false), true); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "T", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", false), true); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "t", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", false), true); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "TRUE", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", false), true); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "True", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", false), true); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "true", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", false), true); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "ON", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", false), true); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "On", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", false), true); + unsetenv("IN_ENV"); + + ASSERT_EQ(setenv("IN_ENV", "on", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", false), true); + unsetenv("IN_ENV"); + +#ifndef BENCHMARK_OS_WINDOWS + ASSERT_EQ(setenv("IN_ENV", "", 1), 0); + EXPECT_EQ(BoolFromEnv("in_env", false), true); + unsetenv("IN_ENV"); +#endif +} + +TEST(Int32FromEnv, NotInEnv) { + ASSERT_EQ(unsetenv("NOT_IN_ENV"), 0); + EXPECT_EQ(Int32FromEnv("not_in_env", 42), 42); +} + +TEST(Int32FromEnv, InvalidInteger) { + ASSERT_EQ(setenv("IN_ENV", "foo", 1), 0); + EXPECT_EQ(Int32FromEnv("in_env", 42), 42); + unsetenv("IN_ENV"); +} + +TEST(Int32FromEnv, ValidInteger) { + ASSERT_EQ(setenv("IN_ENV", "42", 1), 0); + EXPECT_EQ(Int32FromEnv("in_env", 64), 42); + unsetenv("IN_ENV"); +} + +TEST(DoubleFromEnv, NotInEnv) { + ASSERT_EQ(unsetenv("NOT_IN_ENV"), 0); + EXPECT_EQ(DoubleFromEnv("not_in_env", 0.51), 0.51); +} + +TEST(DoubleFromEnv, InvalidReal) { + ASSERT_EQ(setenv("IN_ENV", "foo", 1), 0); + EXPECT_EQ(DoubleFromEnv("in_env", 0.51), 0.51); + unsetenv("IN_ENV"); +} + +TEST(DoubleFromEnv, ValidReal) { + ASSERT_EQ(setenv("IN_ENV", "0.51", 1), 0); + EXPECT_EQ(DoubleFromEnv("in_env", 0.71), 0.51); + unsetenv("IN_ENV"); +} + +TEST(StringFromEnv, Default) { + ASSERT_EQ(unsetenv("NOT_IN_ENV"), 0); + EXPECT_STREQ(StringFromEnv("not_in_env", "foo"), "foo"); +} + +TEST(StringFromEnv, Valid) { + ASSERT_EQ(setenv("IN_ENV", "foo", 1), 0); + EXPECT_STREQ(StringFromEnv("in_env", "bar"), "foo"); + unsetenv("IN_ENV"); +} + +TEST(KvPairsFromEnv, Default) { + ASSERT_EQ(unsetenv("NOT_IN_ENV"), 0); + EXPECT_THAT(KvPairsFromEnv("not_in_env", {{"foo", "bar"}}), + testing::ElementsAre(testing::Pair("foo", "bar"))); +} + +TEST(KvPairsFromEnv, MalformedReturnsDefault) { + ASSERT_EQ(setenv("IN_ENV", "foo", 1), 0); + EXPECT_THAT(KvPairsFromEnv("in_env", {{"foo", "bar"}}), + testing::ElementsAre(testing::Pair("foo", "bar"))); + unsetenv("IN_ENV"); +} + +TEST(KvPairsFromEnv, Single) { + ASSERT_EQ(setenv("IN_ENV", "foo=bar", 1), 0); + EXPECT_THAT(KvPairsFromEnv("in_env", {}), + testing::ElementsAre(testing::Pair("foo", "bar"))); + unsetenv("IN_ENV"); +} + +TEST(KvPairsFromEnv, Multiple) { + ASSERT_EQ(setenv("IN_ENV", "foo=bar,baz=qux", 1), 0); + EXPECT_THAT(KvPairsFromEnv("in_env", {}), + testing::UnorderedElementsAre(testing::Pair("foo", "bar"), + testing::Pair("baz", "qux"))); + unsetenv("IN_ENV"); +} + +} // namespace +} // namespace benchmark diff --git a/libcxx/utils/google-benchmark/test/complexity_test.cc b/libcxx/utils/google-benchmark/test/complexity_test.cc new file mode 100644 index 000000000000..0de73c5722b5 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/complexity_test.cc @@ -0,0 +1,222 @@ +#undef NDEBUG +#include +#include +#include +#include +#include +#include "benchmark/benchmark.h" +#include "output_test.h" + +namespace { + +#define ADD_COMPLEXITY_CASES(...) \ + int CONCAT(dummy, __LINE__) = AddComplexityTest(__VA_ARGS__) + +int AddComplexityTest(std::string test_name, std::string big_o_test_name, + std::string rms_test_name, std::string big_o, + int family_index) { + SetSubstitutions({{"%name", test_name}, + {"%bigo_name", big_o_test_name}, + {"%rms_name", rms_test_name}, + {"%bigo_str", "[ ]* %float " + big_o}, + {"%bigo", big_o}, + {"%rms", "[ ]*[0-9]+ %"}}); + AddCases( + TC_ConsoleOut, + {{"^%bigo_name %bigo_str %bigo_str[ ]*$"}, + {"^%bigo_name", MR_Not}, // Assert we we didn't only matched a name. + {"^%rms_name %rms %rms[ ]*$", MR_Next}}); + AddCases( + TC_JSONOut, + {{"\"name\": \"%bigo_name\",$"}, + {"\"family_index\": " + std::to_string(family_index) + ",$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"%name\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": %int,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"BigO\",$", MR_Next}, + {"\"cpu_coefficient\": %float,$", MR_Next}, + {"\"real_coefficient\": %float,$", MR_Next}, + {"\"big_o\": \"%bigo\",$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}, + {"\"name\": \"%rms_name\",$"}, + {"\"family_index\": " + std::to_string(family_index) + ",$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"%name\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": %int,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"RMS\",$", MR_Next}, + {"\"rms\": %float$", MR_Next}, + {"}", MR_Next}}); + AddCases(TC_CSVOut, {{"^\"%bigo_name\",,%float,%float,%bigo,,,,,$"}, + {"^\"%bigo_name\"", MR_Not}, + {"^\"%rms_name\",,%float,%float,,,,,,$", MR_Next}}); + return 0; +} + +} // end namespace + +// ========================================================================= // +// --------------------------- Testing BigO O(1) --------------------------- // +// ========================================================================= // + +void BM_Complexity_O1(benchmark::State& state) { + for (auto _ : state) { + for (int i = 0; i < 1024; ++i) { + benchmark::DoNotOptimize(&i); + } + } + state.SetComplexityN(state.range(0)); +} +BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity(benchmark::o1); +BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity(); +BENCHMARK(BM_Complexity_O1) + ->Range(1, 1 << 18) + ->Complexity([](benchmark::IterationCount) { return 1.0; }); + +const char *one_test_name = "BM_Complexity_O1"; +const char *big_o_1_test_name = "BM_Complexity_O1_BigO"; +const char *rms_o_1_test_name = "BM_Complexity_O1_RMS"; +const char *enum_big_o_1 = "\\([0-9]+\\)"; +// FIXME: Tolerate both '(1)' and 'lgN' as output when the complexity is auto +// deduced. +// See https://github.com/google/benchmark/issues/272 +const char *auto_big_o_1 = "(\\([0-9]+\\))|(lgN)"; +const char *lambda_big_o_1 = "f\\(N\\)"; + +// Add enum tests +ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name, + enum_big_o_1, /*family_index=*/0); + +// Add auto enum tests +ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name, + auto_big_o_1, /*family_index=*/1); + +// Add lambda tests +ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name, + lambda_big_o_1, /*family_index=*/2); + +// ========================================================================= // +// --------------------------- Testing BigO O(N) --------------------------- // +// ========================================================================= // + +std::vector ConstructRandomVector(int64_t size) { + std::vector v; + v.reserve(static_cast(size)); + for (int i = 0; i < size; ++i) { + v.push_back(static_cast(std::rand() % size)); + } + return v; +} + +void BM_Complexity_O_N(benchmark::State& state) { + auto v = ConstructRandomVector(state.range(0)); + // Test worst case scenario (item not in vector) + const int64_t item_not_in_vector = state.range(0) * 2; + for (auto _ : state) { + benchmark::DoNotOptimize(std::find(v.begin(), v.end(), item_not_in_vector)); + } + state.SetComplexityN(state.range(0)); +} +BENCHMARK(BM_Complexity_O_N) + ->RangeMultiplier(2) + ->Range(1 << 10, 1 << 16) + ->Complexity(benchmark::oN); +BENCHMARK(BM_Complexity_O_N) + ->RangeMultiplier(2) + ->Range(1 << 10, 1 << 16) + ->Complexity([](benchmark::IterationCount n) -> double { + return static_cast(n); + }); +BENCHMARK(BM_Complexity_O_N) + ->RangeMultiplier(2) + ->Range(1 << 10, 1 << 16) + ->Complexity(); + +const char *n_test_name = "BM_Complexity_O_N"; +const char *big_o_n_test_name = "BM_Complexity_O_N_BigO"; +const char *rms_o_n_test_name = "BM_Complexity_O_N_RMS"; +const char *enum_auto_big_o_n = "N"; +const char *lambda_big_o_n = "f\\(N\\)"; + +// Add enum tests +ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name, + enum_auto_big_o_n, /*family_index=*/3); + +// Add lambda tests +ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name, + lambda_big_o_n, /*family_index=*/4); + +// ========================================================================= // +// ------------------------- Testing BigO O(N*lgN) ------------------------- // +// ========================================================================= // + +static void BM_Complexity_O_N_log_N(benchmark::State& state) { + auto v = ConstructRandomVector(state.range(0)); + for (auto _ : state) { + std::sort(v.begin(), v.end()); + } + state.SetComplexityN(state.range(0)); +} +static const double kLog2E = 1.44269504088896340736; +BENCHMARK(BM_Complexity_O_N_log_N) + ->RangeMultiplier(2) + ->Range(1 << 10, 1 << 16) + ->Complexity(benchmark::oNLogN); +BENCHMARK(BM_Complexity_O_N_log_N) + ->RangeMultiplier(2) + ->Range(1 << 10, 1 << 16) + ->Complexity([](benchmark::IterationCount n) { + return kLog2E * n * log(static_cast(n)); + }); +BENCHMARK(BM_Complexity_O_N_log_N) + ->RangeMultiplier(2) + ->Range(1 << 10, 1 << 16) + ->Complexity(); + +const char *n_lg_n_test_name = "BM_Complexity_O_N_log_N"; +const char *big_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_BigO"; +const char *rms_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_RMS"; +const char *enum_auto_big_o_n_lg_n = "NlgN"; +const char *lambda_big_o_n_lg_n = "f\\(N\\)"; + +// Add enum tests +ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name, + rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n, + /*family_index=*/6); + +// Add lambda tests +ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name, + rms_o_n_lg_n_test_name, lambda_big_o_n_lg_n, + /*family_index=*/7); + +// ========================================================================= // +// -------- Testing formatting of Complexity with captured args ------------ // +// ========================================================================= // + +void BM_ComplexityCaptureArgs(benchmark::State& state, int n) { + for (auto _ : state) { + // This test requires a non-zero CPU time to avoid divide-by-zero + benchmark::DoNotOptimize(state.iterations()); + } + state.SetComplexityN(n); +} + +BENCHMARK_CAPTURE(BM_ComplexityCaptureArgs, capture_test, 100) + ->Complexity(benchmark::oN) + ->Ranges({{1, 2}, {3, 4}}); + +const std::string complexity_capture_name = + "BM_ComplexityCaptureArgs/capture_test"; + +ADD_COMPLEXITY_CASES(complexity_capture_name, complexity_capture_name + "_BigO", + complexity_capture_name + "_RMS", "N", /*family_index=*/9); + +// ========================================================================= // +// --------------------------- TEST CASES END ------------------------------ // +// ========================================================================= // + +int main(int argc, char *argv[]) { RunOutputTests(argc, argv); } diff --git a/libcxx/utils/google-benchmark/test/cxx03_test.cc b/libcxx/utils/google-benchmark/test/cxx03_test.cc new file mode 100644 index 000000000000..c4c9a52273e3 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/cxx03_test.cc @@ -0,0 +1,63 @@ +#undef NDEBUG +#include +#include + +#include "benchmark/benchmark.h" + +#if __cplusplus >= 201103L +#error C++11 or greater detected. Should be C++03. +#endif + +#ifdef BENCHMARK_HAS_CXX11 +#error C++11 or greater detected by the library. BENCHMARK_HAS_CXX11 is defined. +#endif + +void BM_empty(benchmark::State& state) { + while (state.KeepRunning()) { + volatile benchmark::IterationCount x = state.iterations(); + ((void)x); + } +} +BENCHMARK(BM_empty); + +// The new C++11 interface for args/ranges requires initializer list support. +// Therefore we provide the old interface to support C++03. +void BM_old_arg_range_interface(benchmark::State& state) { + assert((state.range(0) == 1 && state.range(1) == 2) || + (state.range(0) == 5 && state.range(1) == 6)); + while (state.KeepRunning()) { + } +} +BENCHMARK(BM_old_arg_range_interface)->ArgPair(1, 2)->RangePair(5, 5, 6, 6); + +template +void BM_template2(benchmark::State& state) { + BM_empty(state); +} +BENCHMARK_TEMPLATE2(BM_template2, int, long); + +template +void BM_template1(benchmark::State& state) { + BM_empty(state); +} +BENCHMARK_TEMPLATE(BM_template1, long); +BENCHMARK_TEMPLATE1(BM_template1, int); + +template +struct BM_Fixture : public ::benchmark::Fixture { +}; + +BENCHMARK_TEMPLATE_F(BM_Fixture, BM_template1, long)(benchmark::State& state) { + BM_empty(state); +} +BENCHMARK_TEMPLATE1_F(BM_Fixture, BM_template2, int)(benchmark::State& state) { + BM_empty(state); +} + +void BM_counters(benchmark::State& state) { + BM_empty(state); + state.counters["Foo"] = 2; +} +BENCHMARK(BM_counters); + +BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/test/diagnostics_test.cc b/libcxx/utils/google-benchmark/test/diagnostics_test.cc new file mode 100644 index 000000000000..dd64a3365531 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/diagnostics_test.cc @@ -0,0 +1,80 @@ +// Testing: +// State::PauseTiming() +// State::ResumeTiming() +// Test that CHECK's within these function diagnose when they are called +// outside of the KeepRunning() loop. +// +// NOTE: Users should NOT include or use src/check.h. This is only done in +// order to test library internals. + +#include +#include + +#include "../src/check.h" +#include "benchmark/benchmark.h" + +#if defined(__GNUC__) && !defined(__EXCEPTIONS) +#define TEST_HAS_NO_EXCEPTIONS +#endif + +void TestHandler() { +#ifndef TEST_HAS_NO_EXCEPTIONS + throw std::logic_error(""); +#else + std::abort(); +#endif +} + +void try_invalid_pause_resume(benchmark::State& state) { +#if !defined(TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS) && !defined(TEST_HAS_NO_EXCEPTIONS) + try { + state.PauseTiming(); + std::abort(); + } catch (std::logic_error const&) { + } + try { + state.ResumeTiming(); + std::abort(); + } catch (std::logic_error const&) { + } +#else + (void)state; // avoid unused warning +#endif +} + +void BM_diagnostic_test(benchmark::State& state) { + static bool called_once = false; + + if (called_once == false) try_invalid_pause_resume(state); + + for (auto _ : state) { + benchmark::DoNotOptimize(state.iterations()); + } + + if (called_once == false) try_invalid_pause_resume(state); + + called_once = true; +} +BENCHMARK(BM_diagnostic_test); + + +void BM_diagnostic_test_keep_running(benchmark::State& state) { + static bool called_once = false; + + if (called_once == false) try_invalid_pause_resume(state); + + while(state.KeepRunning()) { + benchmark::DoNotOptimize(state.iterations()); + } + + if (called_once == false) try_invalid_pause_resume(state); + + called_once = true; +} +BENCHMARK(BM_diagnostic_test_keep_running); + +int main(int argc, char* argv[]) { + benchmark::internal::GetAbortHandler() = &TestHandler; + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); +} diff --git a/libcxx/utils/google-benchmark/test/display_aggregates_only_test.cc b/libcxx/utils/google-benchmark/test/display_aggregates_only_test.cc new file mode 100644 index 000000000000..3c36d3f03c11 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/display_aggregates_only_test.cc @@ -0,0 +1,43 @@ + +#undef NDEBUG +#include +#include + +#include "benchmark/benchmark.h" +#include "output_test.h" + +// Ok this test is super ugly. We want to check what happens with the file +// reporter in the presence of DisplayAggregatesOnly(). +// We do not care about console output, the normal tests check that already. + +void BM_SummaryRepeat(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->DisplayAggregatesOnly(); + +int main(int argc, char* argv[]) { + const std::string output = GetFileReporterOutput(argc, argv); + + if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 6 || + SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3\"") != 3 || + SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_mean\"") != 1 || + SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_median\"") != + 1 || + SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"") != + 1) { + std::cout << "Precondition mismatch. Expected to only find 6 " + "occurrences of \"BM_SummaryRepeat/repeats:3\" substring:\n" + "\"name\": \"BM_SummaryRepeat/repeats:3\", " + "\"name\": \"BM_SummaryRepeat/repeats:3\", " + "\"name\": \"BM_SummaryRepeat/repeats:3\", " + "\"name\": \"BM_SummaryRepeat/repeats:3_mean\", " + "\"name\": \"BM_SummaryRepeat/repeats:3_median\", " + "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"\nThe entire " + "output:\n"; + std::cout << output; + return 1; + } + + return 0; +} diff --git a/libcxx/utils/google-benchmark/test/donotoptimize_assembly_test.cc b/libcxx/utils/google-benchmark/test/donotoptimize_assembly_test.cc new file mode 100644 index 000000000000..d4b0bab70e77 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/donotoptimize_assembly_test.cc @@ -0,0 +1,163 @@ +#include + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wreturn-type" +#endif + +extern "C" { + +extern int ExternInt; +extern int ExternInt2; +extern int ExternInt3; + +inline int Add42(int x) { return x + 42; } + +struct NotTriviallyCopyable { + NotTriviallyCopyable(); + explicit NotTriviallyCopyable(int x) : value(x) {} + NotTriviallyCopyable(NotTriviallyCopyable const&); + int value; +}; + +struct Large { + int value; + int data[2]; +}; + +} +// CHECK-LABEL: test_with_rvalue: +extern "C" void test_with_rvalue() { + benchmark::DoNotOptimize(Add42(0)); + // CHECK: movl $42, %eax + // CHECK: ret +} + +// CHECK-LABEL: test_with_large_rvalue: +extern "C" void test_with_large_rvalue() { + benchmark::DoNotOptimize(Large{ExternInt, {ExternInt, ExternInt}}); + // CHECK: ExternInt(%rip) + // CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]] + // CHECK: movl %eax, -{{[0-9]+}}(%[[REG]]) + // CHECK: movl %eax, -{{[0-9]+}}(%[[REG]]) + // CHECK: ret +} + +// CHECK-LABEL: test_with_non_trivial_rvalue: +extern "C" void test_with_non_trivial_rvalue() { + benchmark::DoNotOptimize(NotTriviallyCopyable(ExternInt)); + // CHECK: mov{{l|q}} ExternInt(%rip) + // CHECK: ret +} + +// CHECK-LABEL: test_with_lvalue: +extern "C" void test_with_lvalue() { + int x = 101; + benchmark::DoNotOptimize(x); + // CHECK-GNU: movl $101, %eax + // CHECK-CLANG: movl $101, -{{[0-9]+}}(%[[REG:[a-z]+]]) + // CHECK: ret +} + +// CHECK-LABEL: test_with_large_lvalue: +extern "C" void test_with_large_lvalue() { + Large L{ExternInt, {ExternInt, ExternInt}}; + benchmark::DoNotOptimize(L); + // CHECK: ExternInt(%rip) + // CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]]) + // CHECK: movl %eax, -{{[0-9]+}}(%[[REG]]) + // CHECK: movl %eax, -{{[0-9]+}}(%[[REG]]) + // CHECK: ret +} + +// CHECK-LABEL: test_with_non_trivial_lvalue: +extern "C" void test_with_non_trivial_lvalue() { + NotTriviallyCopyable NTC(ExternInt); + benchmark::DoNotOptimize(NTC); + // CHECK: ExternInt(%rip) + // CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]]) + // CHECK: ret +} + +// CHECK-LABEL: test_with_const_lvalue: +extern "C" void test_with_const_lvalue() { + const int x = 123; + benchmark::DoNotOptimize(x); + // CHECK: movl $123, %eax + // CHECK: ret +} + +// CHECK-LABEL: test_with_large_const_lvalue: +extern "C" void test_with_large_const_lvalue() { + const Large L{ExternInt, {ExternInt, ExternInt}}; + benchmark::DoNotOptimize(L); + // CHECK: ExternInt(%rip) + // CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]]) + // CHECK: movl %eax, -{{[0-9]+}}(%[[REG]]) + // CHECK: movl %eax, -{{[0-9]+}}(%[[REG]]) + // CHECK: ret +} + +// CHECK-LABEL: test_with_non_trivial_const_lvalue: +extern "C" void test_with_non_trivial_const_lvalue() { + const NotTriviallyCopyable Obj(ExternInt); + benchmark::DoNotOptimize(Obj); + // CHECK: mov{{q|l}} ExternInt(%rip) + // CHECK: ret +} + +// CHECK-LABEL: test_div_by_two: +extern "C" int test_div_by_two(int input) { + int divisor = 2; + benchmark::DoNotOptimize(divisor); + return input / divisor; + // CHECK: movl $2, [[DEST:.*]] + // CHECK: idivl [[DEST]] + // CHECK: ret +} + +// CHECK-LABEL: test_inc_integer: +extern "C" int test_inc_integer() { + int x = 0; + for (int i=0; i < 5; ++i) + benchmark::DoNotOptimize(++x); + // CHECK: movl $1, [[DEST:.*]] + // CHECK: {{(addl \$1,|incl)}} [[DEST]] + // CHECK: {{(addl \$1,|incl)}} [[DEST]] + // CHECK: {{(addl \$1,|incl)}} [[DEST]] + // CHECK: {{(addl \$1,|incl)}} [[DEST]] + // CHECK-CLANG: movl [[DEST]], %eax + // CHECK: ret + return x; +} + +// CHECK-LABEL: test_pointer_rvalue +extern "C" void test_pointer_rvalue() { + // CHECK: movl $42, [[DEST:.*]] + // CHECK: leaq [[DEST]], %rax + // CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z]+]]) + // CHECK: ret + int x = 42; + benchmark::DoNotOptimize(&x); +} + +// CHECK-LABEL: test_pointer_const_lvalue: +extern "C" void test_pointer_const_lvalue() { + // CHECK: movl $42, [[DEST:.*]] + // CHECK: leaq [[DEST]], %rax + // CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z]+]]) + // CHECK: ret + int x = 42; + int * const xp = &x; + benchmark::DoNotOptimize(xp); +} + +// CHECK-LABEL: test_pointer_lvalue: +extern "C" void test_pointer_lvalue() { + // CHECK: movl $42, [[DEST:.*]] + // CHECK: leaq [[DEST]], %rax + // CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z+]+]]) + // CHECK: ret + int x = 42; + int *xp = &x; + benchmark::DoNotOptimize(xp); +} diff --git a/libcxx/utils/google-benchmark/test/donotoptimize_test.cc b/libcxx/utils/google-benchmark/test/donotoptimize_test.cc new file mode 100644 index 000000000000..2ce92d1c72be --- /dev/null +++ b/libcxx/utils/google-benchmark/test/donotoptimize_test.cc @@ -0,0 +1,52 @@ +#include "benchmark/benchmark.h" + +#include + +namespace { +#if defined(__GNUC__) +std::uint64_t double_up(const std::uint64_t x) __attribute__((const)); +#endif +std::uint64_t double_up(const std::uint64_t x) { return x * 2; } +} + +// Using DoNotOptimize on types like BitRef seem to cause a lot of problems +// with the inline assembly on both GCC and Clang. +struct BitRef { + int index; + unsigned char &byte; + +public: + static BitRef Make() { + static unsigned char arr[2] = {}; + BitRef b(1, arr[0]); + return b; + } +private: + BitRef(int i, unsigned char& b) : index(i), byte(b) {} +}; + +int main(int, char*[]) { + // this test verifies compilation of DoNotOptimize() for some types + + char buffer8[8] = ""; + benchmark::DoNotOptimize(buffer8); + + char buffer20[20] = ""; + benchmark::DoNotOptimize(buffer20); + + char buffer1024[1024] = ""; + benchmark::DoNotOptimize(buffer1024); + benchmark::DoNotOptimize(&buffer1024[0]); + + int x = 123; + benchmark::DoNotOptimize(x); + benchmark::DoNotOptimize(&x); + benchmark::DoNotOptimize(x += 42); + + benchmark::DoNotOptimize(double_up(x)); + + // These tests are to e + benchmark::DoNotOptimize(BitRef::Make()); + BitRef lval = BitRef::Make(); + benchmark::DoNotOptimize(lval); +} diff --git a/libcxx/utils/google-benchmark/test/filter_test.cc b/libcxx/utils/google-benchmark/test/filter_test.cc new file mode 100644 index 000000000000..1c198913b36a --- /dev/null +++ b/libcxx/utils/google-benchmark/test/filter_test.cc @@ -0,0 +1,118 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" + +namespace { + +class TestReporter : public benchmark::ConsoleReporter { + public: + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { + return ConsoleReporter::ReportContext(context); + }; + + virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { + ++count_; + max_family_index_ = + std::max(max_family_index_, report[0].family_index); + ConsoleReporter::ReportRuns(report); + }; + + TestReporter() : count_(0), max_family_index_(0) {} + + virtual ~TestReporter() {} + + size_t GetCount() const { return count_; } + + size_t GetMaxFamilyIndex() const { return max_family_index_; } + + private: + mutable size_t count_; + mutable size_t max_family_index_; +}; + +} // end namespace + +static void NoPrefix(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(NoPrefix); + +static void BM_Foo(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_Foo); + +static void BM_Bar(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_Bar); + +static void BM_FooBar(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_FooBar); + +static void BM_FooBa(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_FooBa); + +int main(int argc, char **argv) { + bool list_only = false; + for (int i = 0; i < argc; ++i) + list_only |= std::string(argv[i]).find("--benchmark_list_tests") != + std::string::npos; + + benchmark::Initialize(&argc, argv); + + TestReporter test_reporter; + const size_t returned_count = + benchmark::RunSpecifiedBenchmarks(&test_reporter); + + if (argc == 2) { + // Make sure we ran all of the tests + std::stringstream ss(argv[1]); + size_t expected_return; + ss >> expected_return; + + if (returned_count != expected_return) { + std::cerr << "ERROR: Expected " << expected_return + << " tests to match the filter but returned_count = " + << returned_count << std::endl; + return -1; + } + + const size_t expected_reports = list_only ? 0 : expected_return; + const size_t reports_count = test_reporter.GetCount(); + if (reports_count != expected_reports) { + std::cerr << "ERROR: Expected " << expected_reports + << " tests to be run but reported_count = " << reports_count + << std::endl; + return -1; + } + + const size_t max_family_index = test_reporter.GetMaxFamilyIndex(); + const size_t num_families = reports_count == 0 ? 0 : 1 + max_family_index; + if (num_families != expected_reports) { + std::cerr << "ERROR: Expected " << expected_reports + << " test families to be run but num_families = " + << num_families << std::endl; + return -1; + } + } + + return 0; +} diff --git a/libcxx/utils/google-benchmark/test/fixture_test.cc b/libcxx/utils/google-benchmark/test/fixture_test.cc new file mode 100644 index 000000000000..eba0a42d9cb0 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/fixture_test.cc @@ -0,0 +1,51 @@ + +#include "benchmark/benchmark.h" + +#include +#include + +#define FIXTURE_BECHMARK_NAME MyFixture + +class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture { + public: + void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE { + if (state.thread_index == 0) { + assert(data.get() == nullptr); + data.reset(new int(42)); + } + } + + void TearDown(const ::benchmark::State& state) BENCHMARK_OVERRIDE { + if (state.thread_index == 0) { + assert(data.get() != nullptr); + data.reset(); + } + } + + ~FIXTURE_BECHMARK_NAME() { assert(data == nullptr); } + + std::unique_ptr data; +}; + +BENCHMARK_F(FIXTURE_BECHMARK_NAME, Foo)(benchmark::State &st) { + assert(data.get() != nullptr); + assert(*data == 42); + for (auto _ : st) { + } +} + +BENCHMARK_DEFINE_F(FIXTURE_BECHMARK_NAME, Bar)(benchmark::State& st) { + if (st.thread_index == 0) { + assert(data.get() != nullptr); + assert(*data == 42); + } + for (auto _ : st) { + assert(data.get() != nullptr); + assert(*data == 42); + } + st.SetItemsProcessed(st.range(0)); +} +BENCHMARK_REGISTER_F(FIXTURE_BECHMARK_NAME, Bar)->Arg(42); +BENCHMARK_REGISTER_F(FIXTURE_BECHMARK_NAME, Bar)->Arg(42)->ThreadPerCpu(); + +BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/test/internal_threading_test.cc b/libcxx/utils/google-benchmark/test/internal_threading_test.cc new file mode 100644 index 000000000000..039d7c14a8c4 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/internal_threading_test.cc @@ -0,0 +1,184 @@ + +#undef NDEBUG + +#include +#include +#include "../src/timers.h" +#include "benchmark/benchmark.h" +#include "output_test.h" + +static const std::chrono::duration time_frame(50); +static const double time_frame_in_sec( + std::chrono::duration_cast>>( + time_frame) + .count()); + +void MyBusySpinwait() { + const auto start = benchmark::ChronoClockNow(); + + while (true) { + const auto now = benchmark::ChronoClockNow(); + const auto elapsed = now - start; + + if (std::chrono::duration(elapsed) >= + time_frame) + return; + } +} + +// ========================================================================= // +// --------------------------- TEST CASES BEGIN ---------------------------- // +// ========================================================================= // + +// ========================================================================= // +// BM_MainThread + +void BM_MainThread(benchmark::State& state) { + for (auto _ : state) { + MyBusySpinwait(); + state.SetIterationTime(time_frame_in_sec); + } + state.counters["invtime"] = + benchmark::Counter{1, benchmark::Counter::kIsRate}; +} + +BENCHMARK(BM_MainThread)->Iterations(1)->Threads(1); +BENCHMARK(BM_MainThread)->Iterations(1)->Threads(1)->UseRealTime(); +BENCHMARK(BM_MainThread)->Iterations(1)->Threads(1)->UseManualTime(); +BENCHMARK(BM_MainThread)->Iterations(1)->Threads(1)->MeasureProcessCPUTime(); +BENCHMARK(BM_MainThread) + ->Iterations(1) + ->Threads(1) + ->MeasureProcessCPUTime() + ->UseRealTime(); +BENCHMARK(BM_MainThread) + ->Iterations(1) + ->Threads(1) + ->MeasureProcessCPUTime() + ->UseManualTime(); + +BENCHMARK(BM_MainThread)->Iterations(1)->Threads(2); +BENCHMARK(BM_MainThread)->Iterations(1)->Threads(2)->UseRealTime(); +BENCHMARK(BM_MainThread)->Iterations(1)->Threads(2)->UseManualTime(); +BENCHMARK(BM_MainThread)->Iterations(1)->Threads(2)->MeasureProcessCPUTime(); +BENCHMARK(BM_MainThread) + ->Iterations(1) + ->Threads(2) + ->MeasureProcessCPUTime() + ->UseRealTime(); +BENCHMARK(BM_MainThread) + ->Iterations(1) + ->Threads(2) + ->MeasureProcessCPUTime() + ->UseManualTime(); + +// ========================================================================= // +// BM_WorkerThread + +void BM_WorkerThread(benchmark::State& state) { + for (auto _ : state) { + std::thread Worker(&MyBusySpinwait); + Worker.join(); + state.SetIterationTime(time_frame_in_sec); + } + state.counters["invtime"] = + benchmark::Counter{1, benchmark::Counter::kIsRate}; +} + +BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(1); +BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(1)->UseRealTime(); +BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(1)->UseManualTime(); +BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(1)->MeasureProcessCPUTime(); +BENCHMARK(BM_WorkerThread) + ->Iterations(1) + ->Threads(1) + ->MeasureProcessCPUTime() + ->UseRealTime(); +BENCHMARK(BM_WorkerThread) + ->Iterations(1) + ->Threads(1) + ->MeasureProcessCPUTime() + ->UseManualTime(); + +BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(2); +BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(2)->UseRealTime(); +BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(2)->UseManualTime(); +BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(2)->MeasureProcessCPUTime(); +BENCHMARK(BM_WorkerThread) + ->Iterations(1) + ->Threads(2) + ->MeasureProcessCPUTime() + ->UseRealTime(); +BENCHMARK(BM_WorkerThread) + ->Iterations(1) + ->Threads(2) + ->MeasureProcessCPUTime() + ->UseManualTime(); + +// ========================================================================= // +// BM_MainThreadAndWorkerThread + +void BM_MainThreadAndWorkerThread(benchmark::State& state) { + for (auto _ : state) { + std::thread Worker(&MyBusySpinwait); + MyBusySpinwait(); + Worker.join(); + state.SetIterationTime(time_frame_in_sec); + } + state.counters["invtime"] = + benchmark::Counter{1, benchmark::Counter::kIsRate}; +} + +BENCHMARK(BM_MainThreadAndWorkerThread)->Iterations(1)->Threads(1); +BENCHMARK(BM_MainThreadAndWorkerThread) + ->Iterations(1) + ->Threads(1) + ->UseRealTime(); +BENCHMARK(BM_MainThreadAndWorkerThread) + ->Iterations(1) + ->Threads(1) + ->UseManualTime(); +BENCHMARK(BM_MainThreadAndWorkerThread) + ->Iterations(1) + ->Threads(1) + ->MeasureProcessCPUTime(); +BENCHMARK(BM_MainThreadAndWorkerThread) + ->Iterations(1) + ->Threads(1) + ->MeasureProcessCPUTime() + ->UseRealTime(); +BENCHMARK(BM_MainThreadAndWorkerThread) + ->Iterations(1) + ->Threads(1) + ->MeasureProcessCPUTime() + ->UseManualTime(); + +BENCHMARK(BM_MainThreadAndWorkerThread)->Iterations(1)->Threads(2); +BENCHMARK(BM_MainThreadAndWorkerThread) + ->Iterations(1) + ->Threads(2) + ->UseRealTime(); +BENCHMARK(BM_MainThreadAndWorkerThread) + ->Iterations(1) + ->Threads(2) + ->UseManualTime(); +BENCHMARK(BM_MainThreadAndWorkerThread) + ->Iterations(1) + ->Threads(2) + ->MeasureProcessCPUTime(); +BENCHMARK(BM_MainThreadAndWorkerThread) + ->Iterations(1) + ->Threads(2) + ->MeasureProcessCPUTime() + ->UseRealTime(); +BENCHMARK(BM_MainThreadAndWorkerThread) + ->Iterations(1) + ->Threads(2) + ->MeasureProcessCPUTime() + ->UseManualTime(); + +// ========================================================================= // +// ---------------------------- TEST CASES END ----------------------------- // +// ========================================================================= // + +int main(int argc, char* argv[]) { RunOutputTests(argc, argv); } diff --git a/libcxx/utils/google-benchmark/test/link_main_test.cc b/libcxx/utils/google-benchmark/test/link_main_test.cc new file mode 100644 index 000000000000..241ad5c3905e --- /dev/null +++ b/libcxx/utils/google-benchmark/test/link_main_test.cc @@ -0,0 +1,8 @@ +#include "benchmark/benchmark.h" + +void BM_empty(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(state.iterations()); + } +} +BENCHMARK(BM_empty); diff --git a/libcxx/utils/google-benchmark/test/map_test.cc b/libcxx/utils/google-benchmark/test/map_test.cc new file mode 100644 index 000000000000..86391b36016f --- /dev/null +++ b/libcxx/utils/google-benchmark/test/map_test.cc @@ -0,0 +1,57 @@ +#include "benchmark/benchmark.h" + +#include +#include + +namespace { + +std::map ConstructRandomMap(int size) { + std::map m; + for (int i = 0; i < size; ++i) { + m.insert(std::make_pair(std::rand() % size, std::rand() % size)); + } + return m; +} + +} // namespace + +// Basic version. +static void BM_MapLookup(benchmark::State& state) { + const int size = static_cast(state.range(0)); + std::map m; + for (auto _ : state) { + state.PauseTiming(); + m = ConstructRandomMap(size); + state.ResumeTiming(); + for (int i = 0; i < size; ++i) { + benchmark::DoNotOptimize(m.find(std::rand() % size)); + } + } + state.SetItemsProcessed(state.iterations() * size); +} +BENCHMARK(BM_MapLookup)->Range(1 << 3, 1 << 12); + +// Using fixtures. +class MapFixture : public ::benchmark::Fixture { + public: + void SetUp(const ::benchmark::State& st) BENCHMARK_OVERRIDE { + m = ConstructRandomMap(static_cast(st.range(0))); + } + + void TearDown(const ::benchmark::State&) BENCHMARK_OVERRIDE { m.clear(); } + + std::map m; +}; + +BENCHMARK_DEFINE_F(MapFixture, Lookup)(benchmark::State& state) { + const int size = static_cast(state.range(0)); + for (auto _ : state) { + for (int i = 0; i < size; ++i) { + benchmark::DoNotOptimize(m.find(std::rand() % size)); + } + } + state.SetItemsProcessed(state.iterations() * size); +} +BENCHMARK_REGISTER_F(MapFixture, Lookup)->Range(1 << 3, 1 << 12); + +BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/test/memory_manager_test.cc b/libcxx/utils/google-benchmark/test/memory_manager_test.cc new file mode 100644 index 000000000000..f0c192fcbd00 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/memory_manager_test.cc @@ -0,0 +1,46 @@ +#include + +#include "../src/check.h" +#include "benchmark/benchmark.h" +#include "output_test.h" + +class TestMemoryManager : public benchmark::MemoryManager { + void Start() BENCHMARK_OVERRIDE {} + void Stop(Result* result) BENCHMARK_OVERRIDE { + result->num_allocs = 42; + result->max_bytes_used = 42000; + } +}; + +void BM_empty(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(state.iterations()); + } +} +BENCHMARK(BM_empty); + +ADD_CASES(TC_ConsoleOut, {{"^BM_empty %console_report$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_empty\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_empty\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"allocs_per_iter\": %float,$", MR_Next}, + {"\"max_bytes_used\": 42000$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_empty\",%csv_report$"}}); + +int main(int argc, char* argv[]) { + std::unique_ptr mm(new TestMemoryManager()); + + benchmark::RegisterMemoryManager(mm.get()); + RunOutputTests(argc, argv); + benchmark::RegisterMemoryManager(nullptr); +} diff --git a/libcxx/utils/google-benchmark/test/multiple_ranges_test.cc b/libcxx/utils/google-benchmark/test/multiple_ranges_test.cc new file mode 100644 index 000000000000..6b61f3af47bb --- /dev/null +++ b/libcxx/utils/google-benchmark/test/multiple_ranges_test.cc @@ -0,0 +1,96 @@ +#include "benchmark/benchmark.h" + +#include +#include +#include +#include + +class MultipleRangesFixture : public ::benchmark::Fixture { + public: + MultipleRangesFixture() + : expectedValues({{1, 3, 5}, + {1, 3, 8}, + {1, 3, 15}, + {2, 3, 5}, + {2, 3, 8}, + {2, 3, 15}, + {1, 4, 5}, + {1, 4, 8}, + {1, 4, 15}, + {2, 4, 5}, + {2, 4, 8}, + {2, 4, 15}, + {1, 7, 5}, + {1, 7, 8}, + {1, 7, 15}, + {2, 7, 5}, + {2, 7, 8}, + {2, 7, 15}, + {7, 6, 3}}) {} + + void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE { + std::vector ranges = {state.range(0), state.range(1), + state.range(2)}; + + assert(expectedValues.find(ranges) != expectedValues.end()); + + actualValues.insert(ranges); + } + + // NOTE: This is not TearDown as we want to check after _all_ runs are + // complete. + virtual ~MultipleRangesFixture() { + if (actualValues != expectedValues) { + std::cout << "EXPECTED\n"; + for (auto v : expectedValues) { + std::cout << "{"; + for (int64_t iv : v) { + std::cout << iv << ", "; + } + std::cout << "}\n"; + } + std::cout << "ACTUAL\n"; + for (auto v : actualValues) { + std::cout << "{"; + for (int64_t iv : v) { + std::cout << iv << ", "; + } + std::cout << "}\n"; + } + } + } + + std::set> expectedValues; + std::set> actualValues; +}; + +BENCHMARK_DEFINE_F(MultipleRangesFixture, Empty)(benchmark::State& state) { + for (auto _ : state) { + int64_t product = state.range(0) * state.range(1) * state.range(2); + for (int64_t x = 0; x < product; x++) { + benchmark::DoNotOptimize(x); + } + } +} + +BENCHMARK_REGISTER_F(MultipleRangesFixture, Empty) + ->RangeMultiplier(2) + ->Ranges({{1, 2}, {3, 7}, {5, 15}}) + ->Args({7, 6, 3}); + +void BM_CheckDefaultArgument(benchmark::State& state) { + // Test that the 'range()' without an argument is the same as 'range(0)'. + assert(state.range() == state.range(0)); + assert(state.range() != state.range(1)); + for (auto _ : state) { + } +} +BENCHMARK(BM_CheckDefaultArgument)->Ranges({{1, 5}, {6, 10}}); + +static void BM_MultipleRanges(benchmark::State& st) { + for (auto _ : st) { + } +} +BENCHMARK(BM_MultipleRanges)->Ranges({{5, 5}, {6, 6}}); + +BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/test/options_test.cc b/libcxx/utils/google-benchmark/test/options_test.cc new file mode 100644 index 000000000000..9f9a78667c9e --- /dev/null +++ b/libcxx/utils/google-benchmark/test/options_test.cc @@ -0,0 +1,76 @@ +#include "benchmark/benchmark.h" +#include +#include + +#if defined(NDEBUG) +#undef NDEBUG +#endif +#include + +void BM_basic(benchmark::State& state) { + for (auto _ : state) { + } +} + +void BM_basic_slow(benchmark::State& state) { + std::chrono::milliseconds sleep_duration(state.range(0)); + for (auto _ : state) { + std::this_thread::sleep_for( + std::chrono::duration_cast(sleep_duration)); + } +} + +BENCHMARK(BM_basic); +BENCHMARK(BM_basic)->Arg(42); +BENCHMARK(BM_basic_slow)->Arg(10)->Unit(benchmark::kNanosecond); +BENCHMARK(BM_basic_slow)->Arg(100)->Unit(benchmark::kMicrosecond); +BENCHMARK(BM_basic_slow)->Arg(1000)->Unit(benchmark::kMillisecond); +BENCHMARK(BM_basic_slow)->Arg(1000)->Unit(benchmark::kSecond); +BENCHMARK(BM_basic)->Range(1, 8); +BENCHMARK(BM_basic)->RangeMultiplier(2)->Range(1, 8); +BENCHMARK(BM_basic)->DenseRange(10, 15); +BENCHMARK(BM_basic)->Args({42, 42}); +BENCHMARK(BM_basic)->Ranges({{64, 512}, {64, 512}}); +BENCHMARK(BM_basic)->MinTime(0.7); +BENCHMARK(BM_basic)->UseRealTime(); +BENCHMARK(BM_basic)->ThreadRange(2, 4); +BENCHMARK(BM_basic)->ThreadPerCpu(); +BENCHMARK(BM_basic)->Repetitions(3); +BENCHMARK(BM_basic) + ->RangeMultiplier(std::numeric_limits::max()) + ->Range(std::numeric_limits::min(), + std::numeric_limits::max()); + +// Negative ranges +BENCHMARK(BM_basic)->Range(-64, -1); +BENCHMARK(BM_basic)->RangeMultiplier(4)->Range(-8, 8); +BENCHMARK(BM_basic)->DenseRange(-2, 2, 1); +BENCHMARK(BM_basic)->Ranges({{-64, 1}, {-8, -1}}); + +void CustomArgs(benchmark::internal::Benchmark* b) { + for (int i = 0; i < 10; ++i) { + b->Arg(i); + } +} + +BENCHMARK(BM_basic)->Apply(CustomArgs); + +void BM_explicit_iteration_count(benchmark::State& state) { + // Test that benchmarks specified with an explicit iteration count are + // only run once. + static bool invoked_before = false; + assert(!invoked_before); + invoked_before = true; + + // Test that the requested iteration count is respected. + assert(state.max_iterations == 42); + size_t actual_iterations = 0; + for (auto _ : state) + ++actual_iterations; + assert(state.iterations() == state.max_iterations); + assert(state.iterations() == 42); + +} +BENCHMARK(BM_explicit_iteration_count)->Iterations(42); + +BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/test/output_test.h b/libcxx/utils/google-benchmark/test/output_test.h new file mode 100644 index 000000000000..15368f9b6830 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/output_test.h @@ -0,0 +1,213 @@ +#ifndef TEST_OUTPUT_TEST_H +#define TEST_OUTPUT_TEST_H + +#undef NDEBUG +#include +#include +#include +#include +#include +#include +#include + +#include "../src/re.h" +#include "benchmark/benchmark.h" + +#define CONCAT2(x, y) x##y +#define CONCAT(x, y) CONCAT2(x, y) + +#define ADD_CASES(...) int CONCAT(dummy, __LINE__) = ::AddCases(__VA_ARGS__) + +#define SET_SUBSTITUTIONS(...) \ + int CONCAT(dummy, __LINE__) = ::SetSubstitutions(__VA_ARGS__) + +enum MatchRules { + MR_Default, // Skip non-matching lines until a match is found. + MR_Next, // Match must occur on the next line. + MR_Not // No line between the current position and the next match matches + // the regex +}; + +struct TestCase { + TestCase(std::string re, int rule = MR_Default); + + std::string regex_str; + int match_rule; + std::string substituted_regex; + std::shared_ptr regex; +}; + +enum TestCaseID { + TC_ConsoleOut, + TC_ConsoleErr, + TC_JSONOut, + TC_JSONErr, + TC_CSVOut, + TC_CSVErr, + + TC_NumID // PRIVATE +}; + +// Add a list of test cases to be run against the output specified by +// 'ID' +int AddCases(TestCaseID ID, std::initializer_list il); + +// Add or set a list of substitutions to be performed on constructed regex's +// See 'output_test_helper.cc' for a list of default substitutions. +int SetSubstitutions( + std::initializer_list> il); + +// Run all output tests. +void RunOutputTests(int argc, char* argv[]); + +// Count the number of 'pat' substrings in the 'haystack' string. +int SubstrCnt(const std::string& haystack, const std::string& pat); + +// Run registered benchmarks with file reporter enabled, and return the content +// outputted by the file reporter. +std::string GetFileReporterOutput(int argc, char* argv[]); + +// ========================================================================= // +// ------------------------- Results checking ------------------------------ // +// ========================================================================= // + +// Call this macro to register a benchmark for checking its results. This +// should be all that's needed. It subscribes a function to check the (CSV) +// results of a benchmark. This is done only after verifying that the output +// strings are really as expected. +// bm_name_pattern: a name or a regex pattern which will be matched against +// all the benchmark names. Matching benchmarks +// will be the subject of a call to checker_function +// checker_function: should be of type ResultsCheckFn (see below) +#define CHECK_BENCHMARK_RESULTS(bm_name_pattern, checker_function) \ + size_t CONCAT(dummy, __LINE__) = AddChecker(bm_name_pattern, checker_function) + +struct Results; +typedef std::function ResultsCheckFn; + +size_t AddChecker(const char* bm_name_pattern, ResultsCheckFn fn); + +// Class holding the results of a benchmark. +// It is passed in calls to checker functions. +struct Results { + // the benchmark name + std::string name; + // the benchmark fields + std::map values; + + Results(const std::string& n) : name(n) {} + + int NumThreads() const; + + double NumIterations() const; + + typedef enum { kCpuTime, kRealTime } BenchmarkTime; + + // get cpu_time or real_time in seconds + double GetTime(BenchmarkTime which) const; + + // get the real_time duration of the benchmark in seconds. + // it is better to use fuzzy float checks for this, as the float + // ASCII formatting is lossy. + double DurationRealTime() const { + return NumIterations() * GetTime(kRealTime); + } + // get the cpu_time duration of the benchmark in seconds + double DurationCPUTime() const { + return NumIterations() * GetTime(kCpuTime); + } + + // get the string for a result by name, or nullptr if the name + // is not found + const std::string* Get(const char* entry_name) const { + auto it = values.find(entry_name); + if (it == values.end()) return nullptr; + return &it->second; + } + + // get a result by name, parsed as a specific type. + // NOTE: for counters, use GetCounterAs instead. + template + T GetAs(const char* entry_name) const; + + // counters are written as doubles, so they have to be read first + // as a double, and only then converted to the asked type. + template + T GetCounterAs(const char* entry_name) const { + double dval = GetAs(entry_name); + T tval = static_cast(dval); + return tval; + } +}; + +template +T Results::GetAs(const char* entry_name) const { + auto* sv = Get(entry_name); + CHECK(sv != nullptr && !sv->empty()); + std::stringstream ss; + ss << *sv; + T out; + ss >> out; + CHECK(!ss.fail()); + return out; +} + +//---------------------------------- +// Macros to help in result checking. Do not use them with arguments causing +// side-effects. + +// clang-format off + +#define CHECK_RESULT_VALUE_IMPL(entry, getfn, var_type, var_name, relationship, value) \ + CONCAT(CHECK_, relationship) \ + (entry.getfn< var_type >(var_name), (value)) << "\n" \ + << __FILE__ << ":" << __LINE__ << ": " << (entry).name << ":\n" \ + << __FILE__ << ":" << __LINE__ << ": " \ + << "expected (" << #var_type << ")" << (var_name) \ + << "=" << (entry).getfn< var_type >(var_name) \ + << " to be " #relationship " to " << (value) << "\n" + +// check with tolerance. eps_factor is the tolerance window, which is +// interpreted relative to value (eg, 0.1 means 10% of value). +#define CHECK_FLOAT_RESULT_VALUE_IMPL(entry, getfn, var_type, var_name, relationship, value, eps_factor) \ + CONCAT(CHECK_FLOAT_, relationship) \ + (entry.getfn< var_type >(var_name), (value), (eps_factor) * (value)) << "\n" \ + << __FILE__ << ":" << __LINE__ << ": " << (entry).name << ":\n" \ + << __FILE__ << ":" << __LINE__ << ": " \ + << "expected (" << #var_type << ")" << (var_name) \ + << "=" << (entry).getfn< var_type >(var_name) \ + << " to be " #relationship " to " << (value) << "\n" \ + << __FILE__ << ":" << __LINE__ << ": " \ + << "with tolerance of " << (eps_factor) * (value) \ + << " (" << (eps_factor)*100. << "%), " \ + << "but delta was " << ((entry).getfn< var_type >(var_name) - (value)) \ + << " (" << (((entry).getfn< var_type >(var_name) - (value)) \ + / \ + ((value) > 1.e-5 || value < -1.e-5 ? value : 1.e-5)*100.) \ + << "%)" + +#define CHECK_RESULT_VALUE(entry, var_type, var_name, relationship, value) \ + CHECK_RESULT_VALUE_IMPL(entry, GetAs, var_type, var_name, relationship, value) + +#define CHECK_COUNTER_VALUE(entry, var_type, var_name, relationship, value) \ + CHECK_RESULT_VALUE_IMPL(entry, GetCounterAs, var_type, var_name, relationship, value) + +#define CHECK_FLOAT_RESULT_VALUE(entry, var_name, relationship, value, eps_factor) \ + CHECK_FLOAT_RESULT_VALUE_IMPL(entry, GetAs, double, var_name, relationship, value, eps_factor) + +#define CHECK_FLOAT_COUNTER_VALUE(entry, var_name, relationship, value, eps_factor) \ + CHECK_FLOAT_RESULT_VALUE_IMPL(entry, GetCounterAs, double, var_name, relationship, value, eps_factor) + +// clang-format on + +// ========================================================================= // +// --------------------------- Misc Utilities ------------------------------ // +// ========================================================================= // + +namespace { + +const char* const dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"; + +} // end namespace + +#endif // TEST_OUTPUT_TEST_H diff --git a/libcxx/utils/google-benchmark/test/output_test_helper.cc b/libcxx/utils/google-benchmark/test/output_test_helper.cc new file mode 100644 index 000000000000..b8ef1205744a --- /dev/null +++ b/libcxx/utils/google-benchmark/test/output_test_helper.cc @@ -0,0 +1,520 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../src/benchmark_api_internal.h" +#include "../src/check.h" // NOTE: check.h is for internal use only! +#include "../src/re.h" // NOTE: re.h is for internal use only +#include "output_test.h" + +// ========================================================================= // +// ------------------------------ Internals -------------------------------- // +// ========================================================================= // +namespace internal { +namespace { + +using TestCaseList = std::vector; + +// Use a vector because the order elements are added matters during iteration. +// std::map/unordered_map don't guarantee that. +// For example: +// SetSubstitutions({{"%HelloWorld", "Hello"}, {"%Hello", "Hi"}}); +// Substitute("%HelloWorld") // Always expands to Hello. +using SubMap = std::vector>; + +TestCaseList& GetTestCaseList(TestCaseID ID) { + // Uses function-local statics to ensure initialization occurs + // before first use. + static TestCaseList lists[TC_NumID]; + return lists[ID]; +} + +SubMap& GetSubstitutions() { + // Don't use 'dec_re' from header because it may not yet be initialized. + // clang-format off + static std::string safe_dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"; + static std::string time_re = "([0-9]+[.])?[0-9]+"; + static SubMap map = { + {"%float", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"}, + // human-readable float + {"%hrfloat", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?[kMGTPEZYmunpfazy]?"}, + {"%int", "[ ]*[0-9]+"}, + {" %s ", "[ ]+"}, + {"%time", "[ ]*" + time_re + "[ ]+ns"}, + {"%console_report", "[ ]*" + time_re + "[ ]+ns [ ]*" + time_re + "[ ]+ns [ ]*[0-9]+"}, + {"%console_us_report", "[ ]*" + time_re + "[ ]+us [ ]*" + time_re + "[ ]+us [ ]*[0-9]+"}, + {"%console_ms_report", "[ ]*" + time_re + "[ ]+ms [ ]*" + time_re + "[ ]+ms [ ]*[0-9]+"}, + {"%console_s_report", "[ ]*" + time_re + "[ ]+s [ ]*" + time_re + "[ ]+s [ ]*[0-9]+"}, + {"%console_time_only_report", "[ ]*" + time_re + "[ ]+ns [ ]*" + time_re + "[ ]+ns"}, + {"%console_us_report", "[ ]*" + time_re + "[ ]+us [ ]*" + time_re + "[ ]+us [ ]*[0-9]+"}, + {"%console_us_time_only_report", "[ ]*" + time_re + "[ ]+us [ ]*" + time_re + "[ ]+us"}, + {"%csv_header", + "name,iterations,real_time,cpu_time,time_unit,bytes_per_second," + "items_per_second,label,error_occurred,error_message"}, + {"%csv_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns,,,,,"}, + {"%csv_us_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",us,,,,,"}, + {"%csv_ms_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ms,,,,,"}, + {"%csv_s_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",s,,,,,"}, + {"%csv_bytes_report", + "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns," + safe_dec_re + ",,,,"}, + {"%csv_items_report", + "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns,," + safe_dec_re + ",,,"}, + {"%csv_bytes_items_report", + "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns," + safe_dec_re + + "," + safe_dec_re + ",,,"}, + {"%csv_label_report_begin", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns,,,"}, + {"%csv_label_report_end", ",,"}}; + // clang-format on + return map; +} + +std::string PerformSubstitutions(std::string source) { + SubMap const& subs = GetSubstitutions(); + using SizeT = std::string::size_type; + for (auto const& KV : subs) { + SizeT pos; + SizeT next_start = 0; + while ((pos = source.find(KV.first, next_start)) != std::string::npos) { + next_start = pos + KV.second.size(); + source.replace(pos, KV.first.size(), KV.second); + } + } + return source; +} + +void CheckCase(std::stringstream& remaining_output, TestCase const& TC, + TestCaseList const& not_checks) { + std::string first_line; + bool on_first = true; + std::string line; + while (remaining_output.eof() == false) { + CHECK(remaining_output.good()); + std::getline(remaining_output, line); + if (on_first) { + first_line = line; + on_first = false; + } + for (const auto& NC : not_checks) { + CHECK(!NC.regex->Match(line)) + << "Unexpected match for line \"" << line << "\" for MR_Not regex \"" + << NC.regex_str << "\"" + << "\n actual regex string \"" << TC.substituted_regex << "\"" + << "\n started matching near: " << first_line; + } + if (TC.regex->Match(line)) return; + CHECK(TC.match_rule != MR_Next) + << "Expected line \"" << line << "\" to match regex \"" << TC.regex_str + << "\"" + << "\n actual regex string \"" << TC.substituted_regex << "\"" + << "\n started matching near: " << first_line; + } + CHECK(remaining_output.eof() == false) + << "End of output reached before match for regex \"" << TC.regex_str + << "\" was found" + << "\n actual regex string \"" << TC.substituted_regex << "\"" + << "\n started matching near: " << first_line; +} + +void CheckCases(TestCaseList const& checks, std::stringstream& output) { + std::vector not_checks; + for (size_t i = 0; i < checks.size(); ++i) { + const auto& TC = checks[i]; + if (TC.match_rule == MR_Not) { + not_checks.push_back(TC); + continue; + } + CheckCase(output, TC, not_checks); + not_checks.clear(); + } +} + +class TestReporter : public benchmark::BenchmarkReporter { + public: + TestReporter(std::vector reps) + : reporters_(reps) {} + + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { + bool last_ret = false; + bool first = true; + for (auto rep : reporters_) { + bool new_ret = rep->ReportContext(context); + CHECK(first || new_ret == last_ret) + << "Reports return different values for ReportContext"; + first = false; + last_ret = new_ret; + } + (void)first; + return last_ret; + } + + void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { + for (auto rep : reporters_) rep->ReportRuns(report); + } + void Finalize() BENCHMARK_OVERRIDE { + for (auto rep : reporters_) rep->Finalize(); + } + + private: + std::vector reporters_; +}; +} // namespace + +} // end namespace internal + +// ========================================================================= // +// -------------------------- Results checking ----------------------------- // +// ========================================================================= // + +namespace internal { + +// Utility class to manage subscribers for checking benchmark results. +// It works by parsing the CSV output to read the results. +class ResultsChecker { + public: + struct PatternAndFn : public TestCase { // reusing TestCase for its regexes + PatternAndFn(const std::string& rx, ResultsCheckFn fn_) + : TestCase(rx), fn(fn_) {} + ResultsCheckFn fn; + }; + + std::vector check_patterns; + std::vector results; + std::vector field_names; + + void Add(const std::string& entry_pattern, ResultsCheckFn fn); + + void CheckResults(std::stringstream& output); + + private: + void SetHeader_(const std::string& csv_header); + void SetValues_(const std::string& entry_csv_line); + + std::vector SplitCsv_(const std::string& line); +}; + +// store the static ResultsChecker in a function to prevent initialization +// order problems +ResultsChecker& GetResultsChecker() { + static ResultsChecker rc; + return rc; +} + +// add a results checker for a benchmark +void ResultsChecker::Add(const std::string& entry_pattern, ResultsCheckFn fn) { + check_patterns.emplace_back(entry_pattern, fn); +} + +// check the results of all subscribed benchmarks +void ResultsChecker::CheckResults(std::stringstream& output) { + // first reset the stream to the start + { + auto start = std::stringstream::pos_type(0); + // clear before calling tellg() + output.clear(); + // seek to zero only when needed + if (output.tellg() > start) output.seekg(start); + // and just in case + output.clear(); + } + // now go over every line and publish it to the ResultsChecker + std::string line; + bool on_first = true; + while (output.eof() == false) { + CHECK(output.good()); + std::getline(output, line); + if (on_first) { + SetHeader_(line); // this is important + on_first = false; + continue; + } + SetValues_(line); + } + // finally we can call the subscribed check functions + for (const auto& p : check_patterns) { + VLOG(2) << "--------------------------------\n"; + VLOG(2) << "checking for benchmarks matching " << p.regex_str << "...\n"; + for (const auto& r : results) { + if (!p.regex->Match(r.name)) { + VLOG(2) << p.regex_str << " is not matched by " << r.name << "\n"; + continue; + } else { + VLOG(2) << p.regex_str << " is matched by " << r.name << "\n"; + } + VLOG(1) << "Checking results of " << r.name << ": ... \n"; + p.fn(r); + VLOG(1) << "Checking results of " << r.name << ": OK.\n"; + } + } +} + +// prepare for the names in this header +void ResultsChecker::SetHeader_(const std::string& csv_header) { + field_names = SplitCsv_(csv_header); +} + +// set the values for a benchmark +void ResultsChecker::SetValues_(const std::string& entry_csv_line) { + if (entry_csv_line.empty()) return; // some lines are empty + CHECK(!field_names.empty()); + auto vals = SplitCsv_(entry_csv_line); + CHECK_EQ(vals.size(), field_names.size()); + results.emplace_back(vals[0]); // vals[0] is the benchmark name + auto& entry = results.back(); + for (size_t i = 1, e = vals.size(); i < e; ++i) { + entry.values[field_names[i]] = vals[i]; + } +} + +// a quick'n'dirty csv splitter (eliminating quotes) +std::vector ResultsChecker::SplitCsv_(const std::string& line) { + std::vector out; + if (line.empty()) return out; + if (!field_names.empty()) out.reserve(field_names.size()); + size_t prev = 0, pos = line.find_first_of(','), curr = pos; + while (pos != line.npos) { + CHECK(curr > 0); + if (line[prev] == '"') ++prev; + if (line[curr - 1] == '"') --curr; + out.push_back(line.substr(prev, curr - prev)); + prev = pos + 1; + pos = line.find_first_of(',', pos + 1); + curr = pos; + } + curr = line.size(); + if (line[prev] == '"') ++prev; + if (line[curr - 1] == '"') --curr; + out.push_back(line.substr(prev, curr - prev)); + return out; +} + +} // end namespace internal + +size_t AddChecker(const char* bm_name, ResultsCheckFn fn) { + auto& rc = internal::GetResultsChecker(); + rc.Add(bm_name, fn); + return rc.results.size(); +} + +int Results::NumThreads() const { + auto pos = name.find("/threads:"); + if (pos == name.npos) return 1; + auto end = name.find('/', pos + 9); + std::stringstream ss; + ss << name.substr(pos + 9, end); + int num = 1; + ss >> num; + CHECK(!ss.fail()); + return num; +} + +double Results::NumIterations() const { + return GetAs("iterations"); +} + +double Results::GetTime(BenchmarkTime which) const { + CHECK(which == kCpuTime || which == kRealTime); + const char* which_str = which == kCpuTime ? "cpu_time" : "real_time"; + double val = GetAs(which_str); + auto unit = Get("time_unit"); + CHECK(unit); + if (*unit == "ns") { + return val * 1.e-9; + } else if (*unit == "us") { + return val * 1.e-6; + } else if (*unit == "ms") { + return val * 1.e-3; + } else if (*unit == "s") { + return val; + } else { + CHECK(1 == 0) << "unknown time unit: " << *unit; + return 0; + } +} + +// ========================================================================= // +// -------------------------- Public API Definitions------------------------ // +// ========================================================================= // + +TestCase::TestCase(std::string re, int rule) + : regex_str(std::move(re)), + match_rule(rule), + substituted_regex(internal::PerformSubstitutions(regex_str)), + regex(std::make_shared()) { + std::string err_str; + regex->Init(substituted_regex, &err_str); + CHECK(err_str.empty()) << "Could not construct regex \"" << substituted_regex + << "\"" + << "\n originally \"" << regex_str << "\"" + << "\n got error: " << err_str; +} + +int AddCases(TestCaseID ID, std::initializer_list il) { + auto& L = internal::GetTestCaseList(ID); + L.insert(L.end(), il); + return 0; +} + +int SetSubstitutions( + std::initializer_list> il) { + auto& subs = internal::GetSubstitutions(); + for (auto KV : il) { + bool exists = false; + KV.second = internal::PerformSubstitutions(KV.second); + for (auto& EKV : subs) { + if (EKV.first == KV.first) { + EKV.second = std::move(KV.second); + exists = true; + break; + } + } + if (!exists) subs.push_back(std::move(KV)); + } + return 0; +} + +// Disable deprecated warnings temporarily because we need to reference +// CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif +void RunOutputTests(int argc, char* argv[]) { + using internal::GetTestCaseList; + benchmark::Initialize(&argc, argv); + auto options = benchmark::internal::GetOutputOptions(/*force_no_color*/ true); + benchmark::ConsoleReporter CR(options); + benchmark::JSONReporter JR; + benchmark::CSVReporter CSVR; + struct ReporterTest { + const char* name; + std::vector& output_cases; + std::vector& error_cases; + benchmark::BenchmarkReporter& reporter; + std::stringstream out_stream; + std::stringstream err_stream; + + ReporterTest(const char* n, std::vector& out_tc, + std::vector& err_tc, + benchmark::BenchmarkReporter& br) + : name(n), output_cases(out_tc), error_cases(err_tc), reporter(br) { + reporter.SetOutputStream(&out_stream); + reporter.SetErrorStream(&err_stream); + } + } TestCases[] = { + {"ConsoleReporter", GetTestCaseList(TC_ConsoleOut), + GetTestCaseList(TC_ConsoleErr), CR}, + {"JSONReporter", GetTestCaseList(TC_JSONOut), GetTestCaseList(TC_JSONErr), + JR}, + {"CSVReporter", GetTestCaseList(TC_CSVOut), GetTestCaseList(TC_CSVErr), + CSVR}, + }; + + // Create the test reporter and run the benchmarks. + std::cout << "Running benchmarks...\n"; + internal::TestReporter test_rep({&CR, &JR, &CSVR}); + benchmark::RunSpecifiedBenchmarks(&test_rep); + + for (auto& rep_test : TestCases) { + std::string msg = std::string("\nTesting ") + rep_test.name + " Output\n"; + std::string banner(msg.size() - 1, '-'); + std::cout << banner << msg << banner << "\n"; + + std::cerr << rep_test.err_stream.str(); + std::cout << rep_test.out_stream.str(); + + internal::CheckCases(rep_test.error_cases, rep_test.err_stream); + internal::CheckCases(rep_test.output_cases, rep_test.out_stream); + + std::cout << "\n"; + } + + // now that we know the output is as expected, we can dispatch + // the checks to subscribees. + auto& csv = TestCases[2]; + // would use == but gcc spits a warning + CHECK(std::strcmp(csv.name, "CSVReporter") == 0); + internal::GetResultsChecker().CheckResults(csv.out_stream); +} + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +int SubstrCnt(const std::string& haystack, const std::string& pat) { + if (pat.length() == 0) return 0; + int count = 0; + for (size_t offset = haystack.find(pat); offset != std::string::npos; + offset = haystack.find(pat, offset + pat.length())) + ++count; + return count; +} + +static char ToHex(int ch) { + return ch < 10 ? static_cast('0' + ch) + : static_cast('a' + (ch - 10)); +} + +static char RandomHexChar() { + static std::mt19937 rd{std::random_device{}()}; + static std::uniform_int_distribution mrand{0, 15}; + return ToHex(mrand(rd)); +} + +static std::string GetRandomFileName() { + std::string model = "test.%%%%%%"; + for (auto & ch : model) { + if (ch == '%') + ch = RandomHexChar(); + } + return model; +} + +static bool FileExists(std::string const& name) { + std::ifstream in(name.c_str()); + return in.good(); +} + +static std::string GetTempFileName() { + // This function attempts to avoid race conditions where two tests + // create the same file at the same time. However, it still introduces races + // similar to tmpnam. + int retries = 3; + while (--retries) { + std::string name = GetRandomFileName(); + if (!FileExists(name)) + return name; + } + std::cerr << "Failed to create unique temporary file name" << std::endl; + std::abort(); +} + +std::string GetFileReporterOutput(int argc, char* argv[]) { + std::vector new_argv(argv, argv + argc); + assert(static_cast(argc) == new_argv.size()); + + std::string tmp_file_name = GetTempFileName(); + std::cout << "Will be using this as the tmp file: " << tmp_file_name << '\n'; + + std::string tmp = "--benchmark_out="; + tmp += tmp_file_name; + new_argv.emplace_back(const_cast(tmp.c_str())); + + argc = int(new_argv.size()); + + benchmark::Initialize(&argc, new_argv.data()); + benchmark::RunSpecifiedBenchmarks(); + + // Read the output back from the file, and delete the file. + std::ifstream tmp_stream(tmp_file_name); + std::string output = std::string((std::istreambuf_iterator(tmp_stream)), + std::istreambuf_iterator()); + std::remove(tmp_file_name.c_str()); + + return output; +} diff --git a/libcxx/utils/google-benchmark/test/perf_counters_gtest.cc b/libcxx/utils/google-benchmark/test/perf_counters_gtest.cc new file mode 100644 index 000000000000..2a2868a71536 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/perf_counters_gtest.cc @@ -0,0 +1,145 @@ +#include + +#include "../src/perf_counters.h" +#include "gtest/gtest.h" + +#ifndef GTEST_SKIP +struct MsgHandler { + void operator=(std::ostream&){} +}; +#define GTEST_SKIP() return MsgHandler() = std::cout +#endif + +using benchmark::internal::PerfCounters; +using benchmark::internal::PerfCounterValues; + +namespace { +const char kGenericPerfEvent1[] = "CYCLES"; +const char kGenericPerfEvent2[] = "BRANCHES"; +const char kGenericPerfEvent3[] = "INSTRUCTIONS"; + +TEST(PerfCountersTest, Init) { + EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported); +} + +TEST(PerfCountersTest, OneCounter) { + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Performance counters not supported.\n"; + } + EXPECT_TRUE(PerfCounters::Initialize()); + EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1}).IsValid()); +} + +TEST(PerfCountersTest, NegativeTest) { + if (!PerfCounters::kSupported) { + EXPECT_FALSE(PerfCounters::Initialize()); + return; + } + EXPECT_TRUE(PerfCounters::Initialize()); + EXPECT_FALSE(PerfCounters::Create({}).IsValid()); + EXPECT_FALSE(PerfCounters::Create({""}).IsValid()); + EXPECT_FALSE(PerfCounters::Create({"not a counter name"}).IsValid()); + { + EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2, + kGenericPerfEvent3}) + .IsValid()); + } + EXPECT_FALSE( + PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1}) + .IsValid()); + EXPECT_FALSE(PerfCounters::Create({kGenericPerfEvent3, "not a counter name", + kGenericPerfEvent1}) + .IsValid()); + { + EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2, + kGenericPerfEvent3}) + .IsValid()); + } + EXPECT_FALSE( + PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2, + kGenericPerfEvent3, "MISPREDICTED_BRANCH_RETIRED"}) + .IsValid()); +} + +TEST(PerfCountersTest, Read1Counter) { + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; + } + EXPECT_TRUE(PerfCounters::Initialize()); + auto counters = PerfCounters::Create({kGenericPerfEvent1}); + EXPECT_TRUE(counters.IsValid()); + PerfCounterValues values1(1); + EXPECT_TRUE(counters.Snapshot(&values1)); + EXPECT_GT(values1[0], 0); + PerfCounterValues values2(1); + EXPECT_TRUE(counters.Snapshot(&values2)); + EXPECT_GT(values2[0], 0); + EXPECT_GT(values2[0], values1[0]); +} + +TEST(PerfCountersTest, Read2Counters) { + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; + } + EXPECT_TRUE(PerfCounters::Initialize()); + auto counters = + PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2}); + EXPECT_TRUE(counters.IsValid()); + PerfCounterValues values1(2); + EXPECT_TRUE(counters.Snapshot(&values1)); + EXPECT_GT(values1[0], 0); + EXPECT_GT(values1[1], 0); + PerfCounterValues values2(2); + EXPECT_TRUE(counters.Snapshot(&values2)); + EXPECT_GT(values2[0], 0); + EXPECT_GT(values2[1], 0); +} + +size_t do_work() { + size_t res = 0; + for (size_t i = 0; i < 100000000; ++i) res += i * i; + return res; +} + +void measure(size_t threadcount, PerfCounterValues* values1, + PerfCounterValues* values2) { + CHECK_NE(values1, nullptr); + CHECK_NE(values2, nullptr); + std::vector threads(threadcount); + auto work = [&]() { CHECK(do_work() > 1000); }; + + // We need to first set up the counters, then start the threads, so the + // threads would inherit the counters. But later, we need to first destroy the + // thread pool (so all the work finishes), then measure the counters. So the + // scopes overlap, and we need to explicitly control the scope of the + // threadpool. + auto counters = + PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent3}); + for (auto& t : threads) t = std::thread(work); + counters.Snapshot(values1); + for (auto& t : threads) t.join(); + counters.Snapshot(values2); +} + +TEST(PerfCountersTest, MultiThreaded) { + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Test skipped because libpfm is not supported."; + } + EXPECT_TRUE(PerfCounters::Initialize()); + PerfCounterValues values1(2); + PerfCounterValues values2(2); + + measure(2, &values1, &values2); + std::vector D1{static_cast(values2[0] - values1[0]), + static_cast(values2[1] - values1[1])}; + + measure(4, &values1, &values2); + std::vector D2{static_cast(values2[0] - values1[0]), + static_cast(values2[1] - values1[1])}; + + // Some extra work will happen on the main thread - like joining the threads + // - so the ratio won't be quite 2.0, but very close. + EXPECT_GE(D2[0], 1.9 * D1[0]); + EXPECT_GE(D2[1], 1.9 * D1[1]); +} +} // namespace diff --git a/libcxx/utils/google-benchmark/test/perf_counters_test.cc b/libcxx/utils/google-benchmark/test/perf_counters_test.cc new file mode 100644 index 000000000000..d6e0284d4d4b --- /dev/null +++ b/libcxx/utils/google-benchmark/test/perf_counters_test.cc @@ -0,0 +1,27 @@ +#undef NDEBUG + +#include "../src/perf_counters.h" + +#include "benchmark/benchmark.h" +#include "output_test.h" + +void BM_Simple(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(state.iterations()); + } +} +BENCHMARK(BM_Simple); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Simple\",$"}}); + +void CheckSimple(Results const& e) { + CHECK_COUNTER_VALUE(e, double, "CYCLES", GT, 0); + CHECK_COUNTER_VALUE(e, double, "BRANCHES", GT, 0.0); +} +CHECK_BENCHMARK_RESULTS("BM_Simple", &CheckSimple); + +int main(int argc, char* argv[]) { + if (!benchmark::internal::PerfCounters::kSupported) { + return 0; + } + RunOutputTests(argc, argv); +} diff --git a/libcxx/utils/google-benchmark/test/register_benchmark_test.cc b/libcxx/utils/google-benchmark/test/register_benchmark_test.cc new file mode 100644 index 000000000000..c027eabacae0 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/register_benchmark_test.cc @@ -0,0 +1,184 @@ + +#undef NDEBUG +#include +#include + +#include "../src/check.h" // NOTE: check.h is for internal use only! +#include "benchmark/benchmark.h" + +namespace { + +class TestReporter : public benchmark::ConsoleReporter { + public: + virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { + all_runs_.insert(all_runs_.end(), begin(report), end(report)); + ConsoleReporter::ReportRuns(report); + } + + std::vector all_runs_; +}; + +struct TestCase { + std::string name; + const char* label; + // Note: not explicit as we rely on it being converted through ADD_CASES. + TestCase(const char* xname) : TestCase(xname, nullptr) {} + TestCase(const char* xname, const char* xlabel) + : name(xname), label(xlabel) {} + + typedef benchmark::BenchmarkReporter::Run Run; + + void CheckRun(Run const& run) const { + // clang-format off + CHECK(name == run.benchmark_name()) << "expected " << name << " got " + << run.benchmark_name(); + if (label) { + CHECK(run.report_label == label) << "expected " << label << " got " + << run.report_label; + } else { + CHECK(run.report_label == ""); + } + // clang-format on + } +}; + +std::vector ExpectedResults; + +int AddCases(std::initializer_list const& v) { + for (auto N : v) { + ExpectedResults.push_back(N); + } + return 0; +} + +#define CONCAT(x, y) CONCAT2(x, y) +#define CONCAT2(x, y) x##y +#define ADD_CASES(...) int CONCAT(dummy, __LINE__) = AddCases({__VA_ARGS__}) + +} // end namespace + +typedef benchmark::internal::Benchmark* ReturnVal; + +//----------------------------------------------------------------------------// +// Test RegisterBenchmark with no additional arguments +//----------------------------------------------------------------------------// +void BM_function(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_function); +ReturnVal dummy = benchmark::RegisterBenchmark( + "BM_function_manual_registration", BM_function); +ADD_CASES({"BM_function"}, {"BM_function_manual_registration"}); + +//----------------------------------------------------------------------------// +// Test RegisterBenchmark with additional arguments +// Note: GCC <= 4.8 do not support this form of RegisterBenchmark because they +// reject the variadic pack expansion of lambda captures. +//----------------------------------------------------------------------------// +#ifndef BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK + +void BM_extra_args(benchmark::State& st, const char* label) { + for (auto _ : st) { + } + st.SetLabel(label); +} +int RegisterFromFunction() { + std::pair cases[] = { + {"test1", "One"}, {"test2", "Two"}, {"test3", "Three"}}; + for (auto const& c : cases) + benchmark::RegisterBenchmark(c.first, &BM_extra_args, c.second); + return 0; +} +int dummy2 = RegisterFromFunction(); +ADD_CASES({"test1", "One"}, {"test2", "Two"}, {"test3", "Three"}); + +#endif // BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK + +//----------------------------------------------------------------------------// +// Test RegisterBenchmark with different callable types +//----------------------------------------------------------------------------// + +struct CustomFixture { + void operator()(benchmark::State& st) { + for (auto _ : st) { + } + } +}; + +void TestRegistrationAtRuntime() { +#ifdef BENCHMARK_HAS_CXX11 + { + CustomFixture fx; + benchmark::RegisterBenchmark("custom_fixture", fx); + AddCases({"custom_fixture"}); + } +#endif +#ifndef BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK + { + const char* x = "42"; + auto capturing_lam = [=](benchmark::State& st) { + for (auto _ : st) { + } + st.SetLabel(x); + }; + benchmark::RegisterBenchmark("lambda_benchmark", capturing_lam); + AddCases({{"lambda_benchmark", x}}); + } +#endif +} + +// Test that all benchmarks, registered at either during static init or runtime, +// are run and the results are passed to the reported. +void RunTestOne() { + TestRegistrationAtRuntime(); + + TestReporter test_reporter; + benchmark::RunSpecifiedBenchmarks(&test_reporter); + + typedef benchmark::BenchmarkReporter::Run Run; + auto EB = ExpectedResults.begin(); + + for (Run const& run : test_reporter.all_runs_) { + assert(EB != ExpectedResults.end()); + EB->CheckRun(run); + ++EB; + } + assert(EB == ExpectedResults.end()); +} + +// Test that ClearRegisteredBenchmarks() clears all previously registered +// benchmarks. +// Also test that new benchmarks can be registered and ran afterwards. +void RunTestTwo() { + assert(ExpectedResults.size() != 0 && + "must have at least one registered benchmark"); + ExpectedResults.clear(); + benchmark::ClearRegisteredBenchmarks(); + + TestReporter test_reporter; + size_t num_ran = benchmark::RunSpecifiedBenchmarks(&test_reporter); + assert(num_ran == 0); + assert(test_reporter.all_runs_.begin() == test_reporter.all_runs_.end()); + + TestRegistrationAtRuntime(); + num_ran = benchmark::RunSpecifiedBenchmarks(&test_reporter); + assert(num_ran == ExpectedResults.size()); + + typedef benchmark::BenchmarkReporter::Run Run; + auto EB = ExpectedResults.begin(); + + for (Run const& run : test_reporter.all_runs_) { + assert(EB != ExpectedResults.end()); + EB->CheckRun(run); + ++EB; + } + assert(EB == ExpectedResults.end()); +} + +int main(int argc, char* argv[]) { + benchmark::Initialize(&argc, argv); + + RunTestOne(); + RunTestTwo(); +} diff --git a/libcxx/utils/google-benchmark/test/repetitions_test.cc b/libcxx/utils/google-benchmark/test/repetitions_test.cc new file mode 100644 index 000000000000..f93de502a35a --- /dev/null +++ b/libcxx/utils/google-benchmark/test/repetitions_test.cc @@ -0,0 +1,208 @@ + +#include "benchmark/benchmark.h" +#include "output_test.h" + +// ========================================================================= // +// ------------------------ Testing Basic Output --------------------------- // +// ========================================================================= // + +void BM_ExplicitRepetitions(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_ExplicitRepetitions)->Repetitions(2); + +ADD_CASES(TC_ConsoleOut, + {{"^BM_ExplicitRepetitions/repeats:2 %console_report$"}}); +ADD_CASES(TC_ConsoleOut, + {{"^BM_ExplicitRepetitions/repeats:2 %console_report$"}}); +ADD_CASES(TC_ConsoleOut, + {{"^BM_ExplicitRepetitions/repeats:2_mean %console_report$"}}); +ADD_CASES(TC_ConsoleOut, + {{"^BM_ExplicitRepetitions/repeats:2_median %console_report$"}}); +ADD_CASES(TC_ConsoleOut, + {{"^BM_ExplicitRepetitions/repeats:2_stddev %console_report$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_ExplicitRepetitions/repeats:2\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_ExplicitRepetitions/repeats:2\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_ExplicitRepetitions/repeats:2_mean\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_ExplicitRepetitions/repeats:2_median\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_ExplicitRepetitions/repeats:2_stddev\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ExplicitRepetitions/repeats:2\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ExplicitRepetitions/repeats:2\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_ExplicitRepetitions/repeats:2_mean\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_ExplicitRepetitions/repeats:2_median\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_ExplicitRepetitions/repeats:2_stddev\",%csv_report$"}}); + +// ========================================================================= // +// ------------------------ Testing Basic Output --------------------------- // +// ========================================================================= // + +void BM_ImplicitRepetitions(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_ImplicitRepetitions); + +ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions %console_report$"}}); +ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions %console_report$"}}); +ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions %console_report$"}}); +ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions_mean %console_report$"}}); +ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions_median %console_report$"}}); +ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions_stddev %console_report$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_mean\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_median\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_stddev\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions_mean\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions_median\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions_stddev\",%csv_report$"}}); + +// ========================================================================= // +// --------------------------- TEST CASES END ------------------------------ // +// ========================================================================= // + +int main(int argc, char* argv[]) { RunOutputTests(argc, argv); } diff --git a/libcxx/utils/google-benchmark/test/report_aggregates_only_test.cc b/libcxx/utils/google-benchmark/test/report_aggregates_only_test.cc new file mode 100644 index 000000000000..9646b9be534d --- /dev/null +++ b/libcxx/utils/google-benchmark/test/report_aggregates_only_test.cc @@ -0,0 +1,39 @@ + +#undef NDEBUG +#include +#include + +#include "benchmark/benchmark.h" +#include "output_test.h" + +// Ok this test is super ugly. We want to check what happens with the file +// reporter in the presence of ReportAggregatesOnly(). +// We do not care about console output, the normal tests check that already. + +void BM_SummaryRepeat(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->ReportAggregatesOnly(); + +int main(int argc, char* argv[]) { + const std::string output = GetFileReporterOutput(argc, argv); + + if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 3 || + SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_mean\"") != 1 || + SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_median\"") != + 1 || + SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"") != + 1) { + std::cout << "Precondition mismatch. Expected to only find three " + "occurrences of \"BM_SummaryRepeat/repeats:3\" substring:\n" + "\"name\": \"BM_SummaryRepeat/repeats:3_mean\", " + "\"name\": \"BM_SummaryRepeat/repeats:3_median\", " + "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"\nThe entire " + "output:\n"; + std::cout << output; + return 1; + } + + return 0; +} diff --git a/libcxx/utils/google-benchmark/test/reporter_output_test.cc b/libcxx/utils/google-benchmark/test/reporter_output_test.cc new file mode 100644 index 000000000000..989eb48ecc81 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/reporter_output_test.cc @@ -0,0 +1,956 @@ + +#undef NDEBUG +#include + +#include "benchmark/benchmark.h" +#include "output_test.h" + +// ========================================================================= // +// ---------------------- Testing Prologue Output -------------------------- // +// ========================================================================= // + +ADD_CASES(TC_ConsoleOut, {{"^[-]+$", MR_Next}, + {"^Benchmark %s Time %s CPU %s Iterations$", MR_Next}, + {"^[-]+$", MR_Next}}); +static int AddContextCases() { + AddCases(TC_ConsoleErr, + { + {"^%int-%int-%intT%int:%int:%int[-+]%int:%int$", MR_Default}, + {"Running .*/reporter_output_test(\\.exe)?$", MR_Next}, + {"Run on \\(%int X %float MHz CPU s?\\)", MR_Next}, + }); + AddCases(TC_JSONOut, + {{"^\\{", MR_Default}, + {"\"context\":", MR_Next}, + {"\"date\": \"", MR_Next}, + {"\"host_name\":", MR_Next}, + {"\"executable\": \".*(/|\\\\)reporter_output_test(\\.exe)?\",", + MR_Next}, + {"\"num_cpus\": %int,$", MR_Next}, + {"\"mhz_per_cpu\": %float,$", MR_Next}, + {"\"caches\": \\[$", MR_Default}}); + auto const& Info = benchmark::CPUInfo::Get(); + auto const& Caches = Info.caches; + if (!Caches.empty()) { + AddCases(TC_ConsoleErr, {{"CPU Caches:$", MR_Next}}); + } + for (size_t I = 0; I < Caches.size(); ++I) { + std::string num_caches_str = + Caches[I].num_sharing != 0 ? " \\(x%int\\)$" : "$"; + AddCases(TC_ConsoleErr, + {{"L%int (Data|Instruction|Unified) %int KiB" + num_caches_str, + MR_Next}}); + AddCases(TC_JSONOut, {{"\\{$", MR_Next}, + {"\"type\": \"", MR_Next}, + {"\"level\": %int,$", MR_Next}, + {"\"size\": %int,$", MR_Next}, + {"\"num_sharing\": %int$", MR_Next}, + {"}[,]{0,1}$", MR_Next}}); + } + AddCases(TC_JSONOut, {{"],$"}}); + auto const& LoadAvg = Info.load_avg; + if (!LoadAvg.empty()) { + AddCases(TC_ConsoleErr, + {{"Load Average: (%float, ){0,2}%float$", MR_Next}}); + } + AddCases(TC_JSONOut, {{"\"load_avg\": \\[(%float,?){0,3}],$", MR_Next}}); + return 0; +} +int dummy_register = AddContextCases(); +ADD_CASES(TC_CSVOut, {{"%csv_header"}}); + +// ========================================================================= // +// ------------------------ Testing Basic Output --------------------------- // +// ========================================================================= // + +void BM_basic(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_basic); + +ADD_CASES(TC_ConsoleOut, {{"^BM_basic %console_report$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_basic\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_basic\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_basic\",%csv_report$"}}); + +// ========================================================================= // +// ------------------------ Testing Bytes per Second Output ---------------- // +// ========================================================================= // + +void BM_bytes_per_second(benchmark::State& state) { + for (auto _ : state) { + // This test requires a non-zero CPU time to avoid divide-by-zero + benchmark::DoNotOptimize(state.iterations()); + } + state.SetBytesProcessed(1); +} +BENCHMARK(BM_bytes_per_second); + +ADD_CASES(TC_ConsoleOut, {{"^BM_bytes_per_second %console_report " + "bytes_per_second=%float[kM]{0,1}/s$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_bytes_per_second\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_bytes_per_second\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"bytes_per_second\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_bytes_per_second\",%csv_bytes_report$"}}); + +// ========================================================================= // +// ------------------------ Testing Items per Second Output ---------------- // +// ========================================================================= // + +void BM_items_per_second(benchmark::State& state) { + for (auto _ : state) { + // This test requires a non-zero CPU time to avoid divide-by-zero + benchmark::DoNotOptimize(state.iterations()); + } + state.SetItemsProcessed(1); +} +BENCHMARK(BM_items_per_second); + +ADD_CASES(TC_ConsoleOut, {{"^BM_items_per_second %console_report " + "items_per_second=%float[kM]{0,1}/s$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_items_per_second\",$"}, + {"\"family_index\": 2,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_items_per_second\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"items_per_second\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_items_per_second\",%csv_items_report$"}}); + +// ========================================================================= // +// ------------------------ Testing Label Output --------------------------- // +// ========================================================================= // + +void BM_label(benchmark::State& state) { + for (auto _ : state) { + } + state.SetLabel("some label"); +} +BENCHMARK(BM_label); + +ADD_CASES(TC_ConsoleOut, {{"^BM_label %console_report some label$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_label\",$"}, + {"\"family_index\": 3,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_label\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"label\": \"some label\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_label\",%csv_label_report_begin\"some " + "label\"%csv_label_report_end$"}}); + +// ========================================================================= // +// ------------------------ Testing Time Label Output ---------------------- // +// ========================================================================= // + +void BM_time_label_nanosecond(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_time_label_nanosecond)->Unit(benchmark::kNanosecond); + +ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_nanosecond %console_report$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_time_label_nanosecond\",$"}, + {"\"family_index\": 4,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_time_label_nanosecond\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_time_label_nanosecond\",%csv_report$"}}); + +void BM_time_label_microsecond(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_time_label_microsecond)->Unit(benchmark::kMicrosecond); + +ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_microsecond %console_us_report$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_time_label_microsecond\",$"}, + {"\"family_index\": 5,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_time_label_microsecond\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"us\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_time_label_microsecond\",%csv_us_report$"}}); + +void BM_time_label_millisecond(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_time_label_millisecond)->Unit(benchmark::kMillisecond); + +ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_millisecond %console_ms_report$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_time_label_millisecond\",$"}, + {"\"family_index\": 6,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_time_label_millisecond\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ms\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_time_label_millisecond\",%csv_ms_report$"}}); + +void BM_time_label_second(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_time_label_second)->Unit(benchmark::kSecond); + +ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_second %console_s_report$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_time_label_second\",$"}, + {"\"family_index\": 7,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_time_label_second\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"s\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_time_label_second\",%csv_s_report$"}}); + +// ========================================================================= // +// ------------------------ Testing Error Output --------------------------- // +// ========================================================================= // + +void BM_error(benchmark::State& state) { + state.SkipWithError("message"); + for (auto _ : state) { + } +} +BENCHMARK(BM_error); +ADD_CASES(TC_ConsoleOut, {{"^BM_error[ ]+ERROR OCCURRED: 'message'$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_error\",$"}, + {"\"family_index\": 8,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_error\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"error_occurred\": true,$", MR_Next}, + {"\"error_message\": \"message\",$", MR_Next}}); + +ADD_CASES(TC_CSVOut, {{"^\"BM_error\",,,,,,,,true,\"message\"$"}}); + +// ========================================================================= // +// ------------------------ Testing No Arg Name Output ----------------------- +// // +// ========================================================================= // + +void BM_no_arg_name(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_no_arg_name)->Arg(3); +ADD_CASES(TC_ConsoleOut, {{"^BM_no_arg_name/3 %console_report$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_no_arg_name/3\",$"}, + {"\"family_index\": 9,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_no_arg_name/3\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_no_arg_name/3\",%csv_report$"}}); + +// ========================================================================= // +// ------------------------ Testing Arg Name Output ----------------------- // +// ========================================================================= // + +void BM_arg_name(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_arg_name)->ArgName("first")->Arg(3); +ADD_CASES(TC_ConsoleOut, {{"^BM_arg_name/first:3 %console_report$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_arg_name/first:3\",$"}, + {"\"family_index\": 10,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_arg_name/first:3\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_arg_name/first:3\",%csv_report$"}}); + +// ========================================================================= // +// ------------------------ Testing Arg Names Output ----------------------- // +// ========================================================================= // + +void BM_arg_names(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_arg_names)->Args({2, 5, 4})->ArgNames({"first", "", "third"}); +ADD_CASES(TC_ConsoleOut, + {{"^BM_arg_names/first:2/5/third:4 %console_report$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_arg_names/first:2/5/third:4\",$"}, + {"\"family_index\": 11,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_arg_names/first:2/5/third:4\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_arg_names/first:2/5/third:4\",%csv_report$"}}); + +// ========================================================================= // +// ------------------------ Testing Name Output ---------------------------- // +// ========================================================================= // + +void BM_name(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_name)->Name("BM_custom_name"); + +ADD_CASES(TC_ConsoleOut, {{"^BM_custom_name %console_report$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_custom_name\",$"}, + {"\"family_index\": 12,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_custom_name\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_custom_name\",%csv_report$"}}); + +// ========================================================================= // +// ------------------------ Testing Big Args Output ------------------------ // +// ========================================================================= // + +void BM_BigArgs(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_BigArgs)->RangeMultiplier(2)->Range(1U << 30U, 1U << 31U); +ADD_CASES(TC_ConsoleOut, {{"^BM_BigArgs/1073741824 %console_report$"}, + {"^BM_BigArgs/2147483648 %console_report$"}}); + +// ========================================================================= // +// ----------------------- Testing Complexity Output ----------------------- // +// ========================================================================= // + +void BM_Complexity_O1(benchmark::State& state) { + for (auto _ : state) { + // This test requires a non-zero CPU time to avoid divide-by-zero + benchmark::DoNotOptimize(state.iterations()); + } + state.SetComplexityN(state.range(0)); +} +BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity(benchmark::o1); +SET_SUBSTITUTIONS({{"%bigOStr", "[ ]* %float \\([0-9]+\\)"}, + {"%RMS", "[ ]*[0-9]+ %"}}); +ADD_CASES(TC_ConsoleOut, {{"^BM_Complexity_O1_BigO %bigOStr %bigOStr[ ]*$"}, + {"^BM_Complexity_O1_RMS %RMS %RMS[ ]*$"}}); + +// ========================================================================= // +// ----------------------- Testing Aggregate Output ------------------------ // +// ========================================================================= // + +// Test that non-aggregate data is printed by default +void BM_Repeat(benchmark::State& state) { + for (auto _ : state) { + } +} +// need two repetitions min to be able to output any aggregate output +BENCHMARK(BM_Repeat)->Repetitions(2); +ADD_CASES(TC_ConsoleOut, + {{"^BM_Repeat/repeats:2 %console_report$"}, + {"^BM_Repeat/repeats:2 %console_report$"}, + {"^BM_Repeat/repeats:2_mean %console_time_only_report [ ]*2$"}, + {"^BM_Repeat/repeats:2_median %console_time_only_report [ ]*2$"}, + {"^BM_Repeat/repeats:2_stddev %console_time_only_report [ ]*2$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:2\",$"}, + {"\"family_index\": 15,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Repeat/repeats:2\"", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"name\": \"BM_Repeat/repeats:2\",$"}, + {"\"family_index\": 15,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"name\": \"BM_Repeat/repeats:2_mean\",$"}, + {"\"family_index\": 15,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"iterations\": 2,$", MR_Next}, + {"\"name\": \"BM_Repeat/repeats:2_median\",$"}, + {"\"family_index\": 15,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"iterations\": 2,$", MR_Next}, + {"\"name\": \"BM_Repeat/repeats:2_stddev\",$"}, + {"\"family_index\": 15,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"iterations\": 2,$", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:2\",%csv_report$"}, + {"^\"BM_Repeat/repeats:2\",%csv_report$"}, + {"^\"BM_Repeat/repeats:2_mean\",%csv_report$"}, + {"^\"BM_Repeat/repeats:2_median\",%csv_report$"}, + {"^\"BM_Repeat/repeats:2_stddev\",%csv_report$"}}); +// but for two repetitions, mean and median is the same, so let's repeat.. +BENCHMARK(BM_Repeat)->Repetitions(3); +ADD_CASES(TC_ConsoleOut, + {{"^BM_Repeat/repeats:3 %console_report$"}, + {"^BM_Repeat/repeats:3 %console_report$"}, + {"^BM_Repeat/repeats:3 %console_report$"}, + {"^BM_Repeat/repeats:3_mean %console_time_only_report [ ]*3$"}, + {"^BM_Repeat/repeats:3_median %console_time_only_report [ ]*3$"}, + {"^BM_Repeat/repeats:3_stddev %console_time_only_report [ ]*3$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:3\",$"}, + {"\"family_index\": 16,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"name\": \"BM_Repeat/repeats:3\",$"}, + {"\"family_index\": 16,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"name\": \"BM_Repeat/repeats:3\",$"}, + {"\"family_index\": 16,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"name\": \"BM_Repeat/repeats:3_mean\",$"}, + {"\"family_index\": 16,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"name\": \"BM_Repeat/repeats:3_median\",$"}, + {"\"family_index\": 16,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"name\": \"BM_Repeat/repeats:3_stddev\",$"}, + {"\"family_index\": 16,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:3\",%csv_report$"}, + {"^\"BM_Repeat/repeats:3\",%csv_report$"}, + {"^\"BM_Repeat/repeats:3\",%csv_report$"}, + {"^\"BM_Repeat/repeats:3_mean\",%csv_report$"}, + {"^\"BM_Repeat/repeats:3_median\",%csv_report$"}, + {"^\"BM_Repeat/repeats:3_stddev\",%csv_report$"}}); +// median differs between even/odd number of repetitions, so just to be sure +BENCHMARK(BM_Repeat)->Repetitions(4); +ADD_CASES(TC_ConsoleOut, + {{"^BM_Repeat/repeats:4 %console_report$"}, + {"^BM_Repeat/repeats:4 %console_report$"}, + {"^BM_Repeat/repeats:4 %console_report$"}, + {"^BM_Repeat/repeats:4 %console_report$"}, + {"^BM_Repeat/repeats:4_mean %console_time_only_report [ ]*4$"}, + {"^BM_Repeat/repeats:4_median %console_time_only_report [ ]*4$"}, + {"^BM_Repeat/repeats:4_stddev %console_time_only_report [ ]*4$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:4\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 4,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"name\": \"BM_Repeat/repeats:4\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 4,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"name\": \"BM_Repeat/repeats:4\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 4,$", MR_Next}, + {"\"repetition_index\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"name\": \"BM_Repeat/repeats:4\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 4,$", MR_Next}, + {"\"repetition_index\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"name\": \"BM_Repeat/repeats:4_mean\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 4,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"iterations\": 4,$", MR_Next}, + {"\"name\": \"BM_Repeat/repeats:4_median\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 4,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"iterations\": 4,$", MR_Next}, + {"\"name\": \"BM_Repeat/repeats:4_stddev\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 4,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"iterations\": 4,$", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:4\",%csv_report$"}, + {"^\"BM_Repeat/repeats:4\",%csv_report$"}, + {"^\"BM_Repeat/repeats:4\",%csv_report$"}, + {"^\"BM_Repeat/repeats:4\",%csv_report$"}, + {"^\"BM_Repeat/repeats:4_mean\",%csv_report$"}, + {"^\"BM_Repeat/repeats:4_median\",%csv_report$"}, + {"^\"BM_Repeat/repeats:4_stddev\",%csv_report$"}}); + +// Test that a non-repeated test still prints non-aggregate results even when +// only-aggregate reports have been requested +void BM_RepeatOnce(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_RepeatOnce)->Repetitions(1)->ReportAggregatesOnly(); +ADD_CASES(TC_ConsoleOut, {{"^BM_RepeatOnce/repeats:1 %console_report$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_RepeatOnce/repeats:1\",$"}, + {"\"family_index\": 18,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_RepeatOnce/repeats:1\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_RepeatOnce/repeats:1\",%csv_report$"}}); + +// Test that non-aggregate data is not reported +void BM_SummaryRepeat(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->ReportAggregatesOnly(); +ADD_CASES( + TC_ConsoleOut, + {{".*BM_SummaryRepeat/repeats:3 ", MR_Not}, + {"^BM_SummaryRepeat/repeats:3_mean %console_time_only_report [ ]*3$"}, + {"^BM_SummaryRepeat/repeats:3_median %console_time_only_report [ ]*3$"}, + {"^BM_SummaryRepeat/repeats:3_stddev %console_time_only_report [ ]*3$"}}); +ADD_CASES(TC_JSONOut, + {{".*BM_SummaryRepeat/repeats:3 ", MR_Not}, + {"\"name\": \"BM_SummaryRepeat/repeats:3_mean\",$"}, + {"\"family_index\": 19,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_SummaryRepeat/repeats:3\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"name\": \"BM_SummaryRepeat/repeats:3_median\",$"}, + {"\"family_index\": 19,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_SummaryRepeat/repeats:3\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"name\": \"BM_SummaryRepeat/repeats:3_stddev\",$"}, + {"\"family_index\": 19,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_SummaryRepeat/repeats:3\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}}); +ADD_CASES(TC_CSVOut, {{".*BM_SummaryRepeat/repeats:3 ", MR_Not}, + {"^\"BM_SummaryRepeat/repeats:3_mean\",%csv_report$"}, + {"^\"BM_SummaryRepeat/repeats:3_median\",%csv_report$"}, + {"^\"BM_SummaryRepeat/repeats:3_stddev\",%csv_report$"}}); + +// Test that non-aggregate data is not displayed. +// NOTE: this test is kinda bad. we are only testing the display output. +// But we don't check that the file output still contains everything... +void BM_SummaryDisplay(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_SummaryDisplay)->Repetitions(2)->DisplayAggregatesOnly(); +ADD_CASES( + TC_ConsoleOut, + {{".*BM_SummaryDisplay/repeats:2 ", MR_Not}, + {"^BM_SummaryDisplay/repeats:2_mean %console_time_only_report [ ]*2$"}, + {"^BM_SummaryDisplay/repeats:2_median %console_time_only_report [ ]*2$"}, + {"^BM_SummaryDisplay/repeats:2_stddev %console_time_only_report [ ]*2$"}}); +ADD_CASES(TC_JSONOut, + {{".*BM_SummaryDisplay/repeats:2 ", MR_Not}, + {"\"name\": \"BM_SummaryDisplay/repeats:2_mean\",$"}, + {"\"family_index\": 20,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_SummaryDisplay/repeats:2\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"iterations\": 2,$", MR_Next}, + {"\"name\": \"BM_SummaryDisplay/repeats:2_median\",$"}, + {"\"family_index\": 20,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_SummaryDisplay/repeats:2\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"iterations\": 2,$", MR_Next}, + {"\"name\": \"BM_SummaryDisplay/repeats:2_stddev\",$"}, + {"\"family_index\": 20,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_SummaryDisplay/repeats:2\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"iterations\": 2,$", MR_Next}}); +ADD_CASES(TC_CSVOut, + {{".*BM_SummaryDisplay/repeats:2 ", MR_Not}, + {"^\"BM_SummaryDisplay/repeats:2_mean\",%csv_report$"}, + {"^\"BM_SummaryDisplay/repeats:2_median\",%csv_report$"}, + {"^\"BM_SummaryDisplay/repeats:2_stddev\",%csv_report$"}}); + +// Test repeats with custom time unit. +void BM_RepeatTimeUnit(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_RepeatTimeUnit) + ->Repetitions(3) + ->ReportAggregatesOnly() + ->Unit(benchmark::kMicrosecond); +ADD_CASES( + TC_ConsoleOut, + {{".*BM_RepeatTimeUnit/repeats:3 ", MR_Not}, + {"^BM_RepeatTimeUnit/repeats:3_mean %console_us_time_only_report [ ]*3$"}, + {"^BM_RepeatTimeUnit/repeats:3_median %console_us_time_only_report [ " + "]*3$"}, + {"^BM_RepeatTimeUnit/repeats:3_stddev %console_us_time_only_report [ " + "]*3$"}}); +ADD_CASES(TC_JSONOut, + {{".*BM_RepeatTimeUnit/repeats:3 ", MR_Not}, + {"\"name\": \"BM_RepeatTimeUnit/repeats:3_mean\",$"}, + {"\"family_index\": 21,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_RepeatTimeUnit/repeats:3\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"time_unit\": \"us\",?$"}, + {"\"name\": \"BM_RepeatTimeUnit/repeats:3_median\",$"}, + {"\"family_index\": 21,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_RepeatTimeUnit/repeats:3\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"time_unit\": \"us\",?$"}, + {"\"name\": \"BM_RepeatTimeUnit/repeats:3_stddev\",$"}, + {"\"family_index\": 21,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_RepeatTimeUnit/repeats:3\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"time_unit\": \"us\",?$"}}); +ADD_CASES(TC_CSVOut, + {{".*BM_RepeatTimeUnit/repeats:3 ", MR_Not}, + {"^\"BM_RepeatTimeUnit/repeats:3_mean\",%csv_us_report$"}, + {"^\"BM_RepeatTimeUnit/repeats:3_median\",%csv_us_report$"}, + {"^\"BM_RepeatTimeUnit/repeats:3_stddev\",%csv_us_report$"}}); + +// ========================================================================= // +// -------------------- Testing user-provided statistics ------------------- // +// ========================================================================= // + +const auto UserStatistics = [](const std::vector& v) { + return v.back(); +}; +void BM_UserStats(benchmark::State& state) { + for (auto _ : state) { + state.SetIterationTime(150 / 10e8); + } +} +// clang-format off +BENCHMARK(BM_UserStats) + ->Repetitions(3) + ->Iterations(5) + ->UseManualTime() + ->ComputeStatistics("", UserStatistics); +// clang-format on + +// check that user-provided stats is calculated, and is after the default-ones +// empty string as name is intentional, it would sort before anything else +ADD_CASES(TC_ConsoleOut, {{"^BM_UserStats/iterations:5/repeats:3/manual_time [ " + "]* 150 ns %time [ ]*5$"}, + {"^BM_UserStats/iterations:5/repeats:3/manual_time [ " + "]* 150 ns %time [ ]*5$"}, + {"^BM_UserStats/iterations:5/repeats:3/manual_time [ " + "]* 150 ns %time [ ]*5$"}, + {"^BM_UserStats/iterations:5/repeats:3/" + "manual_time_mean [ ]* 150 ns %time [ ]*3$"}, + {"^BM_UserStats/iterations:5/repeats:3/" + "manual_time_median [ ]* 150 ns %time [ ]*3$"}, + {"^BM_UserStats/iterations:5/repeats:3/" + "manual_time_stddev [ ]* 0.000 ns %time [ ]*3$"}, + {"^BM_UserStats/iterations:5/repeats:3/manual_time_ " + "[ ]* 150 ns %time [ ]*3$"}}); +ADD_CASES( + TC_JSONOut, + {{"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": 5,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": 5,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": 5,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_mean\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_median\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_stddev\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}}); +ADD_CASES( + TC_CSVOut, + {{"^\"BM_UserStats/iterations:5/repeats:3/manual_time\",%csv_report$"}, + {"^\"BM_UserStats/iterations:5/repeats:3/manual_time\",%csv_report$"}, + {"^\"BM_UserStats/iterations:5/repeats:3/manual_time\",%csv_report$"}, + {"^\"BM_UserStats/iterations:5/repeats:3/manual_time_mean\",%csv_report$"}, + {"^\"BM_UserStats/iterations:5/repeats:3/" + "manual_time_median\",%csv_report$"}, + {"^\"BM_UserStats/iterations:5/repeats:3/" + "manual_time_stddev\",%csv_report$"}, + {"^\"BM_UserStats/iterations:5/repeats:3/manual_time_\",%csv_report$"}}); + +// ========================================================================= // +// ------------------------- Testing StrEscape JSON ------------------------ // +// ========================================================================= // +#if 0 // enable when csv testing code correctly handles multi-line fields +void BM_JSON_Format(benchmark::State& state) { + state.SkipWithError("val\b\f\n\r\t\\\"with\"es,capes"); + for (auto _ : state) { + } +} +BENCHMARK(BM_JSON_Format); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_JSON_Format\",$"}, + {"\"family_index\": 23,$", MR_Next}, +{"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_JSON_Format\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"error_occurred\": true,$", MR_Next}, + {R"("error_message": "val\\b\\f\\n\\r\\t\\\\\\"with\\"es,capes",$)", MR_Next}}); +#endif +// ========================================================================= // +// -------------------------- Testing CsvEscape ---------------------------- // +// ========================================================================= // + +void BM_CSV_Format(benchmark::State& state) { + state.SkipWithError("\"freedom\""); + for (auto _ : state) { + } +} +BENCHMARK(BM_CSV_Format); +ADD_CASES(TC_CSVOut, {{"^\"BM_CSV_Format\",,,,,,,,true,\"\"\"freedom\"\"\"$"}}); + +// ========================================================================= // +// --------------------------- TEST CASES END ------------------------------ // +// ========================================================================= // + +int main(int argc, char* argv[]) { RunOutputTests(argc, argv); } diff --git a/libcxx/utils/google-benchmark/test/skip_with_error_test.cc b/libcxx/utils/google-benchmark/test/skip_with_error_test.cc new file mode 100644 index 000000000000..827966e9dfe3 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/skip_with_error_test.cc @@ -0,0 +1,195 @@ + +#undef NDEBUG +#include +#include + +#include "../src/check.h" // NOTE: check.h is for internal use only! +#include "benchmark/benchmark.h" + +namespace { + +class TestReporter : public benchmark::ConsoleReporter { + public: + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { + return ConsoleReporter::ReportContext(context); + }; + + virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { + all_runs_.insert(all_runs_.end(), begin(report), end(report)); + ConsoleReporter::ReportRuns(report); + } + + TestReporter() {} + virtual ~TestReporter() {} + + mutable std::vector all_runs_; +}; + +struct TestCase { + std::string name; + bool error_occurred; + std::string error_message; + + typedef benchmark::BenchmarkReporter::Run Run; + + void CheckRun(Run const& run) const { + CHECK(name == run.benchmark_name()) + << "expected " << name << " got " << run.benchmark_name(); + CHECK(error_occurred == run.error_occurred); + CHECK(error_message == run.error_message); + if (error_occurred) { + // CHECK(run.iterations == 0); + } else { + CHECK(run.iterations != 0); + } + } +}; + +std::vector ExpectedResults; + +int AddCases(const char* base_name, std::initializer_list const& v) { + for (auto TC : v) { + TC.name = base_name + TC.name; + ExpectedResults.push_back(std::move(TC)); + } + return 0; +} + +#define CONCAT(x, y) CONCAT2(x, y) +#define CONCAT2(x, y) x##y +#define ADD_CASES(...) int CONCAT(dummy, __LINE__) = AddCases(__VA_ARGS__) + +} // end namespace + +void BM_error_no_running(benchmark::State& state) { + state.SkipWithError("error message"); +} +BENCHMARK(BM_error_no_running); +ADD_CASES("BM_error_no_running", {{"", true, "error message"}}); + +void BM_error_before_running(benchmark::State& state) { + state.SkipWithError("error message"); + while (state.KeepRunning()) { + assert(false); + } +} +BENCHMARK(BM_error_before_running); +ADD_CASES("BM_error_before_running", {{"", true, "error message"}}); + +void BM_error_before_running_batch(benchmark::State& state) { + state.SkipWithError("error message"); + while (state.KeepRunningBatch(17)) { + assert(false); + } +} +BENCHMARK(BM_error_before_running_batch); +ADD_CASES("BM_error_before_running_batch", {{"", true, "error message"}}); + +void BM_error_before_running_range_for(benchmark::State& state) { + state.SkipWithError("error message"); + for (auto _ : state) { + assert(false); + } +} +BENCHMARK(BM_error_before_running_range_for); +ADD_CASES("BM_error_before_running_range_for", {{"", true, "error message"}}); + +void BM_error_during_running(benchmark::State& state) { + int first_iter = true; + while (state.KeepRunning()) { + if (state.range(0) == 1 && state.thread_index <= (state.threads / 2)) { + assert(first_iter); + first_iter = false; + state.SkipWithError("error message"); + } else { + state.PauseTiming(); + state.ResumeTiming(); + } + } +} +BENCHMARK(BM_error_during_running)->Arg(1)->Arg(2)->ThreadRange(1, 8); +ADD_CASES("BM_error_during_running", {{"/1/threads:1", true, "error message"}, + {"/1/threads:2", true, "error message"}, + {"/1/threads:4", true, "error message"}, + {"/1/threads:8", true, "error message"}, + {"/2/threads:1", false, ""}, + {"/2/threads:2", false, ""}, + {"/2/threads:4", false, ""}, + {"/2/threads:8", false, ""}}); + +void BM_error_during_running_ranged_for(benchmark::State& state) { + assert(state.max_iterations > 3 && "test requires at least a few iterations"); + int first_iter = true; + // NOTE: Users should not write the for loop explicitly. + for (auto It = state.begin(), End = state.end(); It != End; ++It) { + if (state.range(0) == 1) { + assert(first_iter); + first_iter = false; + state.SkipWithError("error message"); + // Test the unfortunate but documented behavior that the ranged-for loop + // doesn't automatically terminate when SkipWithError is set. + assert(++It != End); + break; // Required behavior + } + } +} +BENCHMARK(BM_error_during_running_ranged_for)->Arg(1)->Arg(2)->Iterations(5); +ADD_CASES("BM_error_during_running_ranged_for", + {{"/1/iterations:5", true, "error message"}, + {"/2/iterations:5", false, ""}}); + +void BM_error_after_running(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(state.iterations()); + } + if (state.thread_index <= (state.threads / 2)) + state.SkipWithError("error message"); +} +BENCHMARK(BM_error_after_running)->ThreadRange(1, 8); +ADD_CASES("BM_error_after_running", {{"/threads:1", true, "error message"}, + {"/threads:2", true, "error message"}, + {"/threads:4", true, "error message"}, + {"/threads:8", true, "error message"}}); + +void BM_error_while_paused(benchmark::State& state) { + bool first_iter = true; + while (state.KeepRunning()) { + if (state.range(0) == 1 && state.thread_index <= (state.threads / 2)) { + assert(first_iter); + first_iter = false; + state.PauseTiming(); + state.SkipWithError("error message"); + } else { + state.PauseTiming(); + state.ResumeTiming(); + } + } +} +BENCHMARK(BM_error_while_paused)->Arg(1)->Arg(2)->ThreadRange(1, 8); +ADD_CASES("BM_error_while_paused", {{"/1/threads:1", true, "error message"}, + {"/1/threads:2", true, "error message"}, + {"/1/threads:4", true, "error message"}, + {"/1/threads:8", true, "error message"}, + {"/2/threads:1", false, ""}, + {"/2/threads:2", false, ""}, + {"/2/threads:4", false, ""}, + {"/2/threads:8", false, ""}}); + +int main(int argc, char* argv[]) { + benchmark::Initialize(&argc, argv); + + TestReporter test_reporter; + benchmark::RunSpecifiedBenchmarks(&test_reporter); + + typedef benchmark::BenchmarkReporter::Run Run; + auto EB = ExpectedResults.begin(); + + for (Run const& run : test_reporter.all_runs_) { + assert(EB != ExpectedResults.end()); + EB->CheckRun(run); + ++EB; + } + assert(EB == ExpectedResults.end()); + + return 0; +} diff --git a/libcxx/utils/google-benchmark/test/state_assembly_test.cc b/libcxx/utils/google-benchmark/test/state_assembly_test.cc new file mode 100644 index 000000000000..7ddbb3b2a92c --- /dev/null +++ b/libcxx/utils/google-benchmark/test/state_assembly_test.cc @@ -0,0 +1,68 @@ +#include + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wreturn-type" +#endif + +// clang-format off +extern "C" { + extern int ExternInt; + benchmark::State& GetState(); + void Fn(); +} +// clang-format on + +using benchmark::State; + +// CHECK-LABEL: test_for_auto_loop: +extern "C" int test_for_auto_loop() { + State& S = GetState(); + int x = 42; + // CHECK: [[CALL:call(q)*]] _ZN9benchmark5State16StartKeepRunningEv + // CHECK-NEXT: testq %rbx, %rbx + // CHECK-NEXT: je [[LOOP_END:.*]] + + for (auto _ : S) { + // CHECK: .L[[LOOP_HEAD:[a-zA-Z0-9_]+]]: + // CHECK-GNU-NEXT: subq $1, %rbx + // CHECK-CLANG-NEXT: {{(addq \$1, %rax|incq %rax|addq \$-1, %rbx)}} + // CHECK-NEXT: jne .L[[LOOP_HEAD]] + benchmark::DoNotOptimize(x); + } + // CHECK: [[LOOP_END]]: + // CHECK: [[CALL]] _ZN9benchmark5State17FinishKeepRunningEv + + // CHECK: movl $101, %eax + // CHECK: ret + return 101; +} + +// CHECK-LABEL: test_while_loop: +extern "C" int test_while_loop() { + State& S = GetState(); + int x = 42; + + // CHECK: j{{(e|mp)}} .L[[LOOP_HEADER:[a-zA-Z0-9_]+]] + // CHECK-NEXT: .L[[LOOP_BODY:[a-zA-Z0-9_]+]]: + while (S.KeepRunning()) { + // CHECK-GNU-NEXT: subq $1, %[[IREG:[a-z]+]] + // CHECK-CLANG-NEXT: {{(addq \$-1,|decq)}} %[[IREG:[a-z]+]] + // CHECK: movq %[[IREG]], [[DEST:.*]] + benchmark::DoNotOptimize(x); + } + // CHECK-DAG: movq [[DEST]], %[[IREG]] + // CHECK-DAG: testq %[[IREG]], %[[IREG]] + // CHECK-DAG: jne .L[[LOOP_BODY]] + // CHECK-DAG: .L[[LOOP_HEADER]]: + + // CHECK: cmpb $0 + // CHECK-NEXT: jne .L[[LOOP_END:[a-zA-Z0-9_]+]] + // CHECK: [[CALL:call(q)*]] _ZN9benchmark5State16StartKeepRunningEv + + // CHECK: .L[[LOOP_END]]: + // CHECK: [[CALL]] _ZN9benchmark5State17FinishKeepRunningEv + + // CHECK: movl $101, %eax + // CHECK: ret + return 101; +} diff --git a/libcxx/utils/google-benchmark/test/statistics_gtest.cc b/libcxx/utils/google-benchmark/test/statistics_gtest.cc new file mode 100644 index 000000000000..3ddc72dd7ac6 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/statistics_gtest.cc @@ -0,0 +1,28 @@ +//===---------------------------------------------------------------------===// +// statistics_test - Unit tests for src/statistics.cc +//===---------------------------------------------------------------------===// + +#include "../src/statistics.h" +#include "gtest/gtest.h" + +namespace { +TEST(StatisticsTest, Mean) { + EXPECT_DOUBLE_EQ(benchmark::StatisticsMean({42, 42, 42, 42}), 42.0); + EXPECT_DOUBLE_EQ(benchmark::StatisticsMean({1, 2, 3, 4}), 2.5); + EXPECT_DOUBLE_EQ(benchmark::StatisticsMean({1, 2, 5, 10, 10, 14}), 7.0); +} + +TEST(StatisticsTest, Median) { + EXPECT_DOUBLE_EQ(benchmark::StatisticsMedian({42, 42, 42, 42}), 42.0); + EXPECT_DOUBLE_EQ(benchmark::StatisticsMedian({1, 2, 3, 4}), 2.5); + EXPECT_DOUBLE_EQ(benchmark::StatisticsMedian({1, 2, 5, 10, 10}), 5.0); +} + +TEST(StatisticsTest, StdDev) { + EXPECT_DOUBLE_EQ(benchmark::StatisticsStdDev({101, 101, 101, 101}), 0.0); + EXPECT_DOUBLE_EQ(benchmark::StatisticsStdDev({1, 2, 3}), 1.0); + EXPECT_DOUBLE_EQ(benchmark::StatisticsStdDev({2.5, 2.4, 3.3, 4.2, 5.1}), + 1.151086443322134); +} + +} // end namespace diff --git a/libcxx/utils/google-benchmark/test/string_util_gtest.cc b/libcxx/utils/google-benchmark/test/string_util_gtest.cc new file mode 100644 index 000000000000..c7061b409e91 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/string_util_gtest.cc @@ -0,0 +1,161 @@ +//===---------------------------------------------------------------------===// +// statistics_test - Unit tests for src/statistics.cc +//===---------------------------------------------------------------------===// + +#include "../src/string_util.h" +#include "../src/internal_macros.h" +#include "gtest/gtest.h" + +namespace { +TEST(StringUtilTest, stoul) { + { + size_t pos = 0; + EXPECT_EQ(0ul, benchmark::stoul("0", &pos)); + EXPECT_EQ(1ul, pos); + } + { + size_t pos = 0; + EXPECT_EQ(7ul, benchmark::stoul("7", &pos)); + EXPECT_EQ(1ul, pos); + } + { + size_t pos = 0; + EXPECT_EQ(135ul, benchmark::stoul("135", &pos)); + EXPECT_EQ(3ul, pos); + } +#if ULONG_MAX == 0xFFFFFFFFul + { + size_t pos = 0; + EXPECT_EQ(0xFFFFFFFFul, benchmark::stoul("4294967295", &pos)); + EXPECT_EQ(10ul, pos); + } +#elif ULONG_MAX == 0xFFFFFFFFFFFFFFFFul + { + size_t pos = 0; + EXPECT_EQ(0xFFFFFFFFFFFFFFFFul, benchmark::stoul("18446744073709551615", &pos)); + EXPECT_EQ(20ul, pos); + } +#endif + { + size_t pos = 0; + EXPECT_EQ(10ul, benchmark::stoul("1010", &pos, 2)); + EXPECT_EQ(4ul, pos); + } + { + size_t pos = 0; + EXPECT_EQ(520ul, benchmark::stoul("1010", &pos, 8)); + EXPECT_EQ(4ul, pos); + } + { + size_t pos = 0; + EXPECT_EQ(1010ul, benchmark::stoul("1010", &pos, 10)); + EXPECT_EQ(4ul, pos); + } + { + size_t pos = 0; + EXPECT_EQ(4112ul, benchmark::stoul("1010", &pos, 16)); + EXPECT_EQ(4ul, pos); + } + { + size_t pos = 0; + EXPECT_EQ(0xBEEFul, benchmark::stoul("BEEF", &pos, 16)); + EXPECT_EQ(4ul, pos); + } +#ifndef BENCHMARK_HAS_NO_EXCEPTIONS + { + ASSERT_THROW(benchmark::stoul("this is a test"), std::invalid_argument); + } +#endif +} + +TEST(StringUtilTest, stoi) { + { + size_t pos = 0; + EXPECT_EQ(0, benchmark::stoi("0", &pos)); + EXPECT_EQ(1ul, pos); + } + { + size_t pos = 0; + EXPECT_EQ(-17, benchmark::stoi("-17", &pos)); + EXPECT_EQ(3ul, pos); + } + { + size_t pos = 0; + EXPECT_EQ(1357, benchmark::stoi("1357", &pos)); + EXPECT_EQ(4ul, pos); + } + { + size_t pos = 0; + EXPECT_EQ(10, benchmark::stoi("1010", &pos, 2)); + EXPECT_EQ(4ul, pos); + } + { + size_t pos = 0; + EXPECT_EQ(520, benchmark::stoi("1010", &pos, 8)); + EXPECT_EQ(4ul, pos); + } + { + size_t pos = 0; + EXPECT_EQ(1010, benchmark::stoi("1010", &pos, 10)); + EXPECT_EQ(4ul, pos); + } + { + size_t pos = 0; + EXPECT_EQ(4112, benchmark::stoi("1010", &pos, 16)); + EXPECT_EQ(4ul, pos); + } + { + size_t pos = 0; + EXPECT_EQ(0xBEEF, benchmark::stoi("BEEF", &pos, 16)); + EXPECT_EQ(4ul, pos); + } +#ifndef BENCHMARK_HAS_NO_EXCEPTIONS + { + ASSERT_THROW(benchmark::stoi("this is a test"), std::invalid_argument); + } +#endif +} + +TEST(StringUtilTest, stod) { + { + size_t pos = 0; + EXPECT_EQ(0.0, benchmark::stod("0", &pos)); + EXPECT_EQ(1ul, pos); + } + { + size_t pos = 0; + EXPECT_EQ(-84.0, benchmark::stod("-84", &pos)); + EXPECT_EQ(3ul, pos); + } + { + size_t pos = 0; + EXPECT_EQ(1234.0, benchmark::stod("1234", &pos)); + EXPECT_EQ(4ul, pos); + } + { + size_t pos = 0; + EXPECT_EQ(1.5, benchmark::stod("1.5", &pos)); + EXPECT_EQ(3ul, pos); + } + { + size_t pos = 0; + /* Note: exactly representable as double */ + EXPECT_EQ(-1.25e+9, benchmark::stod("-1.25e+9", &pos)); + EXPECT_EQ(8ul, pos); + } +#ifndef BENCHMARK_HAS_NO_EXCEPTIONS + { + ASSERT_THROW(benchmark::stod("this is a test"), std::invalid_argument); + } +#endif +} + +TEST(StringUtilTest, StrSplit) { + EXPECT_EQ(benchmark::StrSplit("", ','), std::vector{}); + EXPECT_EQ(benchmark::StrSplit("hello", ','), + std::vector({"hello"})); + EXPECT_EQ(benchmark::StrSplit("hello,there,is,more", ','), + std::vector({"hello", "there", "is", "more"})); +} + +} // end namespace diff --git a/libcxx/utils/google-benchmark/test/templated_fixture_test.cc b/libcxx/utils/google-benchmark/test/templated_fixture_test.cc new file mode 100644 index 000000000000..fe9865cc776f --- /dev/null +++ b/libcxx/utils/google-benchmark/test/templated_fixture_test.cc @@ -0,0 +1,28 @@ + +#include "benchmark/benchmark.h" + +#include +#include + +template +class MyFixture : public ::benchmark::Fixture { + public: + MyFixture() : data(0) {} + + T data; +}; + +BENCHMARK_TEMPLATE_F(MyFixture, Foo, int)(benchmark::State& st) { + for (auto _ : st) { + data += 1; + } +} + +BENCHMARK_TEMPLATE_DEFINE_F(MyFixture, Bar, double)(benchmark::State& st) { + for (auto _ : st) { + data += 1.0; + } +} +BENCHMARK_REGISTER_F(MyFixture, Bar); + +BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/test/user_counters_tabular_test.cc b/libcxx/utils/google-benchmark/test/user_counters_tabular_test.cc new file mode 100644 index 000000000000..421f27b5cb8b --- /dev/null +++ b/libcxx/utils/google-benchmark/test/user_counters_tabular_test.cc @@ -0,0 +1,500 @@ + +#undef NDEBUG + +#include "benchmark/benchmark.h" +#include "output_test.h" + +// @todo: this checks the full output at once; the rule for +// CounterSet1 was failing because it was not matching "^[-]+$". +// @todo: check that the counters are vertically aligned. +ADD_CASES(TC_ConsoleOut, + { + // keeping these lines long improves readability, so: + // clang-format off + {"^[-]+$", MR_Next}, + {"^Benchmark %s Time %s CPU %s Iterations %s Bar %s Bat %s Baz %s Foo %s Frob %s Lob$", MR_Next}, + {"^[-]+$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1_mean %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1_median %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2_mean %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2_median %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, + {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, + {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, + {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, + {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, + {"^[-]+$", MR_Next}, + {"^Benchmark %s Time %s CPU %s Iterations %s Bar %s Baz %s Foo$", MR_Next}, + {"^[-]+$", MR_Next}, + {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^[-]+$", MR_Next}, + {"^Benchmark %s Time %s CPU %s Iterations %s Bat %s Baz %s Foo$", MR_Next}, + {"^[-]+$", MR_Next}, + {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$"}, + // clang-format on + }); +ADD_CASES(TC_CSVOut, {{"%csv_header," + "\"Bar\",\"Bat\",\"Baz\",\"Foo\",\"Frob\",\"Lob\""}}); + +// ========================================================================= // +// ------------------------- Tabular Counters Output ----------------------- // +// ========================================================================= // + +void BM_Counters_Tabular(benchmark::State& state) { + for (auto _ : state) { + } + namespace bm = benchmark; + state.counters.insert({ + {"Foo", {1, bm::Counter::kAvgThreads}}, + {"Bar", {2, bm::Counter::kAvgThreads}}, + {"Baz", {4, bm::Counter::kAvgThreads}}, + {"Bat", {8, bm::Counter::kAvgThreads}}, + {"Frob", {16, bm::Counter::kAvgThreads}}, + {"Lob", {32, bm::Counter::kAvgThreads}}, + }); +} +BENCHMARK(BM_Counters_Tabular)->ThreadRange(1, 2)->Repetitions(2); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_mean\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_median\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_stddev\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); + +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 1,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 2,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 1,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 2,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2_median\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 1,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 2,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2_stddev\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 1,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 2,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1_mean\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1_median\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1_stddev\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2_mean\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2_median\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2_stddev\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +// VS2013 does not allow this function to be passed as a lambda argument +// to CHECK_BENCHMARK_RESULTS() +void CheckTabular(Results const& e) { + CHECK_COUNTER_VALUE(e, int, "Foo", EQ, 1); + CHECK_COUNTER_VALUE(e, int, "Bar", EQ, 2); + CHECK_COUNTER_VALUE(e, int, "Baz", EQ, 4); + CHECK_COUNTER_VALUE(e, int, "Bat", EQ, 8); + CHECK_COUNTER_VALUE(e, int, "Frob", EQ, 16); + CHECK_COUNTER_VALUE(e, int, "Lob", EQ, 32); +} +CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/repeats:2/threads:1$", + &CheckTabular); +CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/repeats:2/threads:2$", + &CheckTabular); + +// ========================================================================= // +// -------------------- Tabular+Rate Counters Output ----------------------- // +// ========================================================================= // + +void BM_CounterRates_Tabular(benchmark::State& state) { + for (auto _ : state) { + // This test requires a non-zero CPU time to avoid divide-by-zero + benchmark::DoNotOptimize(state.iterations()); + } + namespace bm = benchmark; + state.counters.insert({ + {"Foo", {1, bm::Counter::kAvgThreadsRate}}, + {"Bar", {2, bm::Counter::kAvgThreadsRate}}, + {"Baz", {4, bm::Counter::kAvgThreadsRate}}, + {"Bat", {8, bm::Counter::kAvgThreadsRate}}, + {"Frob", {16, bm::Counter::kAvgThreadsRate}}, + {"Lob", {32, bm::Counter::kAvgThreadsRate}}, + }); +} +BENCHMARK(BM_CounterRates_Tabular)->ThreadRange(1, 16); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_CounterRates_Tabular/threads:%int\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_CounterRates_Tabular/threads:%int\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_CounterRates_Tabular/threads:%int\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +// VS2013 does not allow this function to be passed as a lambda argument +// to CHECK_BENCHMARK_RESULTS() +void CheckTabularRate(Results const& e) { + double t = e.DurationCPUTime(); + CHECK_FLOAT_COUNTER_VALUE(e, "Foo", EQ, 1. / t, 0.001); + CHECK_FLOAT_COUNTER_VALUE(e, "Bar", EQ, 2. / t, 0.001); + CHECK_FLOAT_COUNTER_VALUE(e, "Baz", EQ, 4. / t, 0.001); + CHECK_FLOAT_COUNTER_VALUE(e, "Bat", EQ, 8. / t, 0.001); + CHECK_FLOAT_COUNTER_VALUE(e, "Frob", EQ, 16. / t, 0.001); + CHECK_FLOAT_COUNTER_VALUE(e, "Lob", EQ, 32. / t, 0.001); +} +CHECK_BENCHMARK_RESULTS("BM_CounterRates_Tabular/threads:%int", + &CheckTabularRate); + +// ========================================================================= // +// ------------------------- Tabular Counters Output ----------------------- // +// ========================================================================= // + +// set only some of the counters +void BM_CounterSet0_Tabular(benchmark::State& state) { + for (auto _ : state) { + } + namespace bm = benchmark; + state.counters.insert({ + {"Foo", {10, bm::Counter::kAvgThreads}}, + {"Bar", {20, bm::Counter::kAvgThreads}}, + {"Baz", {40, bm::Counter::kAvgThreads}}, + }); +} +BENCHMARK(BM_CounterSet0_Tabular)->ThreadRange(1, 16); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_CounterSet0_Tabular/threads:%int\",$"}, + {"\"family_index\": 2,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_CounterSet0_Tabular/threads:%int\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_CounterSet0_Tabular/threads:%int\",%csv_report," + "%float,,%float,%float,,"}}); +// VS2013 does not allow this function to be passed as a lambda argument +// to CHECK_BENCHMARK_RESULTS() +void CheckSet0(Results const& e) { + CHECK_COUNTER_VALUE(e, int, "Foo", EQ, 10); + CHECK_COUNTER_VALUE(e, int, "Bar", EQ, 20); + CHECK_COUNTER_VALUE(e, int, "Baz", EQ, 40); +} +CHECK_BENCHMARK_RESULTS("BM_CounterSet0_Tabular", &CheckSet0); + +// again. +void BM_CounterSet1_Tabular(benchmark::State& state) { + for (auto _ : state) { + } + namespace bm = benchmark; + state.counters.insert({ + {"Foo", {15, bm::Counter::kAvgThreads}}, + {"Bar", {25, bm::Counter::kAvgThreads}}, + {"Baz", {45, bm::Counter::kAvgThreads}}, + }); +} +BENCHMARK(BM_CounterSet1_Tabular)->ThreadRange(1, 16); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_CounterSet1_Tabular/threads:%int\",$"}, + {"\"family_index\": 3,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_CounterSet1_Tabular/threads:%int\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_CounterSet1_Tabular/threads:%int\",%csv_report," + "%float,,%float,%float,,"}}); +// VS2013 does not allow this function to be passed as a lambda argument +// to CHECK_BENCHMARK_RESULTS() +void CheckSet1(Results const& e) { + CHECK_COUNTER_VALUE(e, int, "Foo", EQ, 15); + CHECK_COUNTER_VALUE(e, int, "Bar", EQ, 25); + CHECK_COUNTER_VALUE(e, int, "Baz", EQ, 45); +} +CHECK_BENCHMARK_RESULTS("BM_CounterSet1_Tabular/threads:%int", &CheckSet1); + +// ========================================================================= // +// ------------------------- Tabular Counters Output ----------------------- // +// ========================================================================= // + +// set only some of the counters, different set now. +void BM_CounterSet2_Tabular(benchmark::State& state) { + for (auto _ : state) { + } + namespace bm = benchmark; + state.counters.insert({ + {"Foo", {10, bm::Counter::kAvgThreads}}, + {"Bat", {30, bm::Counter::kAvgThreads}}, + {"Baz", {40, bm::Counter::kAvgThreads}}, + }); +} +BENCHMARK(BM_CounterSet2_Tabular)->ThreadRange(1, 16); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_CounterSet2_Tabular/threads:%int\",$"}, + {"\"family_index\": 4,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_CounterSet2_Tabular/threads:%int\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_CounterSet2_Tabular/threads:%int\",%csv_report," + ",%float,%float,%float,,"}}); +// VS2013 does not allow this function to be passed as a lambda argument +// to CHECK_BENCHMARK_RESULTS() +void CheckSet2(Results const& e) { + CHECK_COUNTER_VALUE(e, int, "Foo", EQ, 10); + CHECK_COUNTER_VALUE(e, int, "Bat", EQ, 30); + CHECK_COUNTER_VALUE(e, int, "Baz", EQ, 40); +} +CHECK_BENCHMARK_RESULTS("BM_CounterSet2_Tabular", &CheckSet2); + +// ========================================================================= // +// --------------------------- TEST CASES END ------------------------------ // +// ========================================================================= // + +int main(int argc, char* argv[]) { RunOutputTests(argc, argv); } diff --git a/libcxx/utils/google-benchmark/test/user_counters_test.cc b/libcxx/utils/google-benchmark/test/user_counters_test.cc new file mode 100644 index 000000000000..377bb32ca948 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/user_counters_test.cc @@ -0,0 +1,555 @@ + +#undef NDEBUG + +#include "benchmark/benchmark.h" +#include "output_test.h" + +// ========================================================================= // +// ---------------------- Testing Prologue Output -------------------------- // +// ========================================================================= // + +// clang-format off + +ADD_CASES(TC_ConsoleOut, + {{"^[-]+$", MR_Next}, + {"^Benchmark %s Time %s CPU %s Iterations UserCounters...$", MR_Next}, + {"^[-]+$", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"%csv_header,\"bar\",\"foo\""}}); + +// clang-format on + +// ========================================================================= // +// ------------------------- Simple Counters Output ------------------------ // +// ========================================================================= // + +void BM_Counters_Simple(benchmark::State& state) { + for (auto _ : state) { + } + state.counters["foo"] = 1; + state.counters["bar"] = 2 * (double)state.iterations(); +} +BENCHMARK(BM_Counters_Simple); +ADD_CASES(TC_ConsoleOut, + {{"^BM_Counters_Simple %console_report bar=%hrfloat foo=%hrfloat$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Simple\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Simple\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"bar\": %float,$", MR_Next}, + {"\"foo\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Simple\",%csv_report,%float,%float$"}}); +// VS2013 does not allow this function to be passed as a lambda argument +// to CHECK_BENCHMARK_RESULTS() +void CheckSimple(Results const& e) { + double its = e.NumIterations(); + CHECK_COUNTER_VALUE(e, int, "foo", EQ, 1); + // check that the value of bar is within 0.1% of the expected value + CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. * its, 0.001); +} +CHECK_BENCHMARK_RESULTS("BM_Counters_Simple", &CheckSimple); + +// ========================================================================= // +// --------------------- Counters+Items+Bytes/s Output --------------------- // +// ========================================================================= // + +namespace { +int num_calls1 = 0; +} +void BM_Counters_WithBytesAndItemsPSec(benchmark::State& state) { + for (auto _ : state) { + // This test requires a non-zero CPU time to avoid divide-by-zero + benchmark::DoNotOptimize(state.iterations()); + } + state.counters["foo"] = 1; + state.counters["bar"] = ++num_calls1; + state.SetBytesProcessed(364); + state.SetItemsProcessed(150); +} +BENCHMARK(BM_Counters_WithBytesAndItemsPSec); +ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_WithBytesAndItemsPSec %console_report " + "bar=%hrfloat bytes_per_second=%hrfloat/s " + "foo=%hrfloat items_per_second=%hrfloat/s$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_WithBytesAndItemsPSec\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_WithBytesAndItemsPSec\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"bar\": %float,$", MR_Next}, + {"\"bytes_per_second\": %float,$", MR_Next}, + {"\"foo\": %float,$", MR_Next}, + {"\"items_per_second\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_WithBytesAndItemsPSec\"," + "%csv_bytes_items_report,%float,%float$"}}); +// VS2013 does not allow this function to be passed as a lambda argument +// to CHECK_BENCHMARK_RESULTS() +void CheckBytesAndItemsPSec(Results const& e) { + double t = e.DurationCPUTime(); // this (and not real time) is the time used + CHECK_COUNTER_VALUE(e, int, "foo", EQ, 1); + CHECK_COUNTER_VALUE(e, int, "bar", EQ, num_calls1); + // check that the values are within 0.1% of the expected values + CHECK_FLOAT_RESULT_VALUE(e, "bytes_per_second", EQ, 364. / t, 0.001); + CHECK_FLOAT_RESULT_VALUE(e, "items_per_second", EQ, 150. / t, 0.001); +} +CHECK_BENCHMARK_RESULTS("BM_Counters_WithBytesAndItemsPSec", + &CheckBytesAndItemsPSec); + +// ========================================================================= // +// ------------------------- Rate Counters Output -------------------------- // +// ========================================================================= // + +void BM_Counters_Rate(benchmark::State& state) { + for (auto _ : state) { + // This test requires a non-zero CPU time to avoid divide-by-zero + benchmark::DoNotOptimize(state.iterations()); + } + namespace bm = benchmark; + state.counters["foo"] = bm::Counter{1, bm::Counter::kIsRate}; + state.counters["bar"] = bm::Counter{2, bm::Counter::kIsRate}; +} +BENCHMARK(BM_Counters_Rate); +ADD_CASES( + TC_ConsoleOut, + {{"^BM_Counters_Rate %console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Rate\",$"}, + {"\"family_index\": 2,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Rate\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"bar\": %float,$", MR_Next}, + {"\"foo\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Rate\",%csv_report,%float,%float$"}}); +// VS2013 does not allow this function to be passed as a lambda argument +// to CHECK_BENCHMARK_RESULTS() +void CheckRate(Results const& e) { + double t = e.DurationCPUTime(); // this (and not real time) is the time used + // check that the values are within 0.1% of the expected values + CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 1. / t, 0.001); + CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. / t, 0.001); +} +CHECK_BENCHMARK_RESULTS("BM_Counters_Rate", &CheckRate); + +// ========================================================================= // +// ----------------------- Inverted Counters Output ------------------------ // +// ========================================================================= // + +void BM_Invert(benchmark::State& state) { + for (auto _ : state) { + // This test requires a non-zero CPU time to avoid divide-by-zero + benchmark::DoNotOptimize(state.iterations()); + } + namespace bm = benchmark; + state.counters["foo"] = bm::Counter{0.0001, bm::Counter::kInvert}; + state.counters["bar"] = bm::Counter{10000, bm::Counter::kInvert}; +} +BENCHMARK(BM_Invert); +ADD_CASES(TC_ConsoleOut, + {{"^BM_Invert %console_report bar=%hrfloatu foo=%hrfloatk$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Invert\",$"}, + {"\"family_index\": 3,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Invert\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"bar\": %float,$", MR_Next}, + {"\"foo\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_Invert\",%csv_report,%float,%float$"}}); +// VS2013 does not allow this function to be passed as a lambda argument +// to CHECK_BENCHMARK_RESULTS() +void CheckInvert(Results const& e) { + CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 10000, 0.0001); + CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 0.0001, 0.0001); +} +CHECK_BENCHMARK_RESULTS("BM_Invert", &CheckInvert); + +// ========================================================================= // +// ------------------------- InvertedRate Counters Output +// -------------------------- // +// ========================================================================= // + +void BM_Counters_InvertedRate(benchmark::State& state) { + for (auto _ : state) { + // This test requires a non-zero CPU time to avoid divide-by-zero + benchmark::DoNotOptimize(state.iterations()); + } + namespace bm = benchmark; + state.counters["foo"] = + bm::Counter{1, bm::Counter::kIsRate | bm::Counter::kInvert}; + state.counters["bar"] = + bm::Counter{8192, bm::Counter::kIsRate | bm::Counter::kInvert}; +} +BENCHMARK(BM_Counters_InvertedRate); +ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_InvertedRate %console_report " + "bar=%hrfloats foo=%hrfloats$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_InvertedRate\",$"}, + {"\"family_index\": 4,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_InvertedRate\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"bar\": %float,$", MR_Next}, + {"\"foo\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_InvertedRate\",%csv_report,%float,%float$"}}); +// VS2013 does not allow this function to be passed as a lambda argument +// to CHECK_BENCHMARK_RESULTS() +void CheckInvertedRate(Results const& e) { + double t = e.DurationCPUTime(); // this (and not real time) is the time used + // check that the values are within 0.1% of the expected values + CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, t, 0.001); + CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, t / 8192.0, 0.001); +} +CHECK_BENCHMARK_RESULTS("BM_Counters_InvertedRate", &CheckInvertedRate); + +// ========================================================================= // +// ------------------------- Thread Counters Output ------------------------ // +// ========================================================================= // + +void BM_Counters_Threads(benchmark::State& state) { + for (auto _ : state) { + } + state.counters["foo"] = 1; + state.counters["bar"] = 2; +} +BENCHMARK(BM_Counters_Threads)->ThreadRange(1, 8); +ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_Threads/threads:%int %console_report " + "bar=%hrfloat foo=%hrfloat$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Threads/threads:%int\",$"}, + {"\"family_index\": 5,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Threads/threads:%int\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"bar\": %float,$", MR_Next}, + {"\"foo\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES( + TC_CSVOut, + {{"^\"BM_Counters_Threads/threads:%int\",%csv_report,%float,%float$"}}); +// VS2013 does not allow this function to be passed as a lambda argument +// to CHECK_BENCHMARK_RESULTS() +void CheckThreads(Results const& e) { + CHECK_COUNTER_VALUE(e, int, "foo", EQ, e.NumThreads()); + CHECK_COUNTER_VALUE(e, int, "bar", EQ, 2 * e.NumThreads()); +} +CHECK_BENCHMARK_RESULTS("BM_Counters_Threads/threads:%int", &CheckThreads); + +// ========================================================================= // +// ---------------------- ThreadAvg Counters Output ------------------------ // +// ========================================================================= // + +void BM_Counters_AvgThreads(benchmark::State& state) { + for (auto _ : state) { + } + namespace bm = benchmark; + state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgThreads}; + state.counters["bar"] = bm::Counter{2, bm::Counter::kAvgThreads}; +} +BENCHMARK(BM_Counters_AvgThreads)->ThreadRange(1, 8); +ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgThreads/threads:%int " + "%console_report bar=%hrfloat foo=%hrfloat$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_AvgThreads/threads:%int\",$"}, + {"\"family_index\": 6,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_AvgThreads/threads:%int\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"bar\": %float,$", MR_Next}, + {"\"foo\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES( + TC_CSVOut, + {{"^\"BM_Counters_AvgThreads/threads:%int\",%csv_report,%float,%float$"}}); +// VS2013 does not allow this function to be passed as a lambda argument +// to CHECK_BENCHMARK_RESULTS() +void CheckAvgThreads(Results const& e) { + CHECK_COUNTER_VALUE(e, int, "foo", EQ, 1); + CHECK_COUNTER_VALUE(e, int, "bar", EQ, 2); +} +CHECK_BENCHMARK_RESULTS("BM_Counters_AvgThreads/threads:%int", + &CheckAvgThreads); + +// ========================================================================= // +// ---------------------- ThreadAvg Counters Output ------------------------ // +// ========================================================================= // + +void BM_Counters_AvgThreadsRate(benchmark::State& state) { + for (auto _ : state) { + // This test requires a non-zero CPU time to avoid divide-by-zero + benchmark::DoNotOptimize(state.iterations()); + } + namespace bm = benchmark; + state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgThreadsRate}; + state.counters["bar"] = bm::Counter{2, bm::Counter::kAvgThreadsRate}; +} +BENCHMARK(BM_Counters_AvgThreadsRate)->ThreadRange(1, 8); +ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgThreadsRate/threads:%int " + "%console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_AvgThreadsRate/threads:%int\",$"}, + {"\"family_index\": 7,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_AvgThreadsRate/threads:%int\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"bar\": %float,$", MR_Next}, + {"\"foo\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_AvgThreadsRate/" + "threads:%int\",%csv_report,%float,%float$"}}); +// VS2013 does not allow this function to be passed as a lambda argument +// to CHECK_BENCHMARK_RESULTS() +void CheckAvgThreadsRate(Results const& e) { + CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 1. / e.DurationCPUTime(), 0.001); + CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. / e.DurationCPUTime(), 0.001); +} +CHECK_BENCHMARK_RESULTS("BM_Counters_AvgThreadsRate/threads:%int", + &CheckAvgThreadsRate); + +// ========================================================================= // +// ------------------- IterationInvariant Counters Output ------------------ // +// ========================================================================= // + +void BM_Counters_IterationInvariant(benchmark::State& state) { + for (auto _ : state) { + } + namespace bm = benchmark; + state.counters["foo"] = bm::Counter{1, bm::Counter::kIsIterationInvariant}; + state.counters["bar"] = bm::Counter{2, bm::Counter::kIsIterationInvariant}; +} +BENCHMARK(BM_Counters_IterationInvariant); +ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_IterationInvariant %console_report " + "bar=%hrfloat foo=%hrfloat$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_IterationInvariant\",$"}, + {"\"family_index\": 8,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_IterationInvariant\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"bar\": %float,$", MR_Next}, + {"\"foo\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_IterationInvariant\",%csv_report,%float,%float$"}}); +// VS2013 does not allow this function to be passed as a lambda argument +// to CHECK_BENCHMARK_RESULTS() +void CheckIterationInvariant(Results const& e) { + double its = e.NumIterations(); + // check that the values are within 0.1% of the expected value + CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, its, 0.001); + CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. * its, 0.001); +} +CHECK_BENCHMARK_RESULTS("BM_Counters_IterationInvariant", + &CheckIterationInvariant); + +// ========================================================================= // +// ----------------- IterationInvariantRate Counters Output ---------------- // +// ========================================================================= // + +void BM_Counters_kIsIterationInvariantRate(benchmark::State& state) { + for (auto _ : state) { + // This test requires a non-zero CPU time to avoid divide-by-zero + benchmark::DoNotOptimize(state.iterations()); + } + namespace bm = benchmark; + state.counters["foo"] = + bm::Counter{1, bm::Counter::kIsIterationInvariantRate}; + state.counters["bar"] = + bm::Counter{2, bm::Counter::kIsRate | bm::Counter::kIsIterationInvariant}; +} +BENCHMARK(BM_Counters_kIsIterationInvariantRate); +ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_kIsIterationInvariantRate " + "%console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_kIsIterationInvariantRate\",$"}, + {"\"family_index\": 9,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_kIsIterationInvariantRate\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"bar\": %float,$", MR_Next}, + {"\"foo\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_kIsIterationInvariantRate\",%csv_report," + "%float,%float$"}}); +// VS2013 does not allow this function to be passed as a lambda argument +// to CHECK_BENCHMARK_RESULTS() +void CheckIsIterationInvariantRate(Results const& e) { + double its = e.NumIterations(); + double t = e.DurationCPUTime(); // this (and not real time) is the time used + // check that the values are within 0.1% of the expected values + CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, its * 1. / t, 0.001); + CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, its * 2. / t, 0.001); +} +CHECK_BENCHMARK_RESULTS("BM_Counters_kIsIterationInvariantRate", + &CheckIsIterationInvariantRate); + +// ========================================================================= // +// ------------------- AvgIterations Counters Output ------------------ // +// ========================================================================= // + +void BM_Counters_AvgIterations(benchmark::State& state) { + for (auto _ : state) { + } + namespace bm = benchmark; + state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgIterations}; + state.counters["bar"] = bm::Counter{2, bm::Counter::kAvgIterations}; +} +BENCHMARK(BM_Counters_AvgIterations); +ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgIterations %console_report " + "bar=%hrfloat foo=%hrfloat$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_AvgIterations\",$"}, + {"\"family_index\": 10,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_AvgIterations\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"bar\": %float,$", MR_Next}, + {"\"foo\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_AvgIterations\",%csv_report,%float,%float$"}}); +// VS2013 does not allow this function to be passed as a lambda argument +// to CHECK_BENCHMARK_RESULTS() +void CheckAvgIterations(Results const& e) { + double its = e.NumIterations(); + // check that the values are within 0.1% of the expected value + CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 1. / its, 0.001); + CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. / its, 0.001); +} +CHECK_BENCHMARK_RESULTS("BM_Counters_AvgIterations", &CheckAvgIterations); + +// ========================================================================= // +// ----------------- AvgIterationsRate Counters Output ---------------- // +// ========================================================================= // + +void BM_Counters_kAvgIterationsRate(benchmark::State& state) { + for (auto _ : state) { + // This test requires a non-zero CPU time to avoid divide-by-zero + benchmark::DoNotOptimize(state.iterations()); + } + namespace bm = benchmark; + state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgIterationsRate}; + state.counters["bar"] = + bm::Counter{2, bm::Counter::kIsRate | bm::Counter::kAvgIterations}; +} +BENCHMARK(BM_Counters_kAvgIterationsRate); +ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_kAvgIterationsRate " + "%console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_kAvgIterationsRate\",$"}, + {"\"family_index\": 11,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_kAvgIterationsRate\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"bar\": %float,$", MR_Next}, + {"\"foo\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_kAvgIterationsRate\",%csv_report," + "%float,%float$"}}); +// VS2013 does not allow this function to be passed as a lambda argument +// to CHECK_BENCHMARK_RESULTS() +void CheckAvgIterationsRate(Results const& e) { + double its = e.NumIterations(); + double t = e.DurationCPUTime(); // this (and not real time) is the time used + // check that the values are within 0.1% of the expected values + CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 1. / its / t, 0.001); + CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. / its / t, 0.001); +} +CHECK_BENCHMARK_RESULTS("BM_Counters_kAvgIterationsRate", + &CheckAvgIterationsRate); + +// ========================================================================= // +// --------------------------- TEST CASES END ------------------------------ // +// ========================================================================= // + +int main(int argc, char* argv[]) { RunOutputTests(argc, argv); } diff --git a/libcxx/utils/google-benchmark/test/user_counters_thousands_test.cc b/libcxx/utils/google-benchmark/test/user_counters_thousands_test.cc new file mode 100644 index 000000000000..bbe194264ed4 --- /dev/null +++ b/libcxx/utils/google-benchmark/test/user_counters_thousands_test.cc @@ -0,0 +1,183 @@ + +#undef NDEBUG + +#include "benchmark/benchmark.h" +#include "output_test.h" + +// ========================================================================= // +// ------------------------ Thousands Customisation ------------------------ // +// ========================================================================= // + +void BM_Counters_Thousands(benchmark::State& state) { + for (auto _ : state) { + } + namespace bm = benchmark; + state.counters.insert({ + {"t0_1000000DefaultBase", + bm::Counter(1000 * 1000, bm::Counter::kDefaults)}, + {"t1_1000000Base1000", bm::Counter(1000 * 1000, bm::Counter::kDefaults, + benchmark::Counter::OneK::kIs1000)}, + {"t2_1000000Base1024", bm::Counter(1000 * 1000, bm::Counter::kDefaults, + benchmark::Counter::OneK::kIs1024)}, + {"t3_1048576Base1000", bm::Counter(1024 * 1024, bm::Counter::kDefaults, + benchmark::Counter::OneK::kIs1000)}, + {"t4_1048576Base1024", bm::Counter(1024 * 1024, bm::Counter::kDefaults, + benchmark::Counter::OneK::kIs1024)}, + }); +} +BENCHMARK(BM_Counters_Thousands)->Repetitions(2); +ADD_CASES( + TC_ConsoleOut, + { + {"^BM_Counters_Thousands/repeats:2 %console_report " + "t0_1000000DefaultBase=1000k " + "t1_1000000Base1000=1000k t2_1000000Base1024=976.56[23]k " + "t3_1048576Base1000=1048.58k t4_1048576Base1024=1024k$"}, + {"^BM_Counters_Thousands/repeats:2 %console_report " + "t0_1000000DefaultBase=1000k " + "t1_1000000Base1000=1000k t2_1000000Base1024=976.56[23]k " + "t3_1048576Base1000=1048.58k t4_1048576Base1024=1024k$"}, + {"^BM_Counters_Thousands/repeats:2_mean %console_report " + "t0_1000000DefaultBase=1000k t1_1000000Base1000=1000k " + "t2_1000000Base1024=976.56[23]k t3_1048576Base1000=1048.58k " + "t4_1048576Base1024=1024k$"}, + {"^BM_Counters_Thousands/repeats:2_median %console_report " + "t0_1000000DefaultBase=1000k t1_1000000Base1000=1000k " + "t2_1000000Base1024=976.56[23]k t3_1048576Base1000=1048.58k " + "t4_1048576Base1024=1024k$"}, + {"^BM_Counters_Thousands/repeats:2_stddev %console_time_only_report [ " + "]*2 t0_1000000DefaultBase=0 t1_1000000Base1000=0 " + "t2_1000000Base1024=0 t3_1048576Base1000=0 t4_1048576Base1024=0$"}, + }); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Thousands/repeats:2\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"t0_1000000DefaultBase\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, + {"\"t1_1000000Base1000\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, + {"\"t2_1000000Base1024\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, + {"\"t3_1048576Base1000\": 1\\.048576(0)*e\\+(0)*6,$", MR_Next}, + {"\"t4_1048576Base1024\": 1\\.048576(0)*e\\+(0)*6$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Thousands/repeats:2\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"t0_1000000DefaultBase\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, + {"\"t1_1000000Base1000\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, + {"\"t2_1000000Base1024\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, + {"\"t3_1048576Base1000\": 1\\.048576(0)*e\\+(0)*6,$", MR_Next}, + {"\"t4_1048576Base1024\": 1\\.048576(0)*e\\+(0)*6$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Thousands/repeats:2_mean\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"iterations\": 2,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"t0_1000000DefaultBase\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, + {"\"t1_1000000Base1000\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, + {"\"t2_1000000Base1024\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, + {"\"t3_1048576Base1000\": 1\\.048576(0)*e\\+(0)*6,$", MR_Next}, + {"\"t4_1048576Base1024\": 1\\.048576(0)*e\\+(0)*6$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Thousands/repeats:2_median\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"iterations\": 2,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"t0_1000000DefaultBase\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, + {"\"t1_1000000Base1000\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, + {"\"t2_1000000Base1024\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, + {"\"t3_1048576Base1000\": 1\\.048576(0)*e\\+(0)*6,$", MR_Next}, + {"\"t4_1048576Base1024\": 1\\.048576(0)*e\\+(0)*6$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Thousands/repeats:2_stddev\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"iterations\": 2,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"t0_1000000DefaultBase\": 0\\.(0)*e\\+(0)*,$", MR_Next}, + {"\"t1_1000000Base1000\": 0\\.(0)*e\\+(0)*,$", MR_Next}, + {"\"t2_1000000Base1024\": 0\\.(0)*e\\+(0)*,$", MR_Next}, + {"\"t3_1048576Base1000\": 0\\.(0)*e\\+(0)*,$", MR_Next}, + {"\"t4_1048576Base1024\": 0\\.(0)*e\\+(0)*$", MR_Next}, + {"}", MR_Next}}); + +ADD_CASES( + TC_CSVOut, + {{"^\"BM_Counters_Thousands/" + "repeats:2\",%csv_report,1e\\+(0)*6,1e\\+(0)*6,1e\\+(0)*6,1\\.04858e\\+(" + "0)*6,1\\.04858e\\+(0)*6$"}, + {"^\"BM_Counters_Thousands/" + "repeats:2\",%csv_report,1e\\+(0)*6,1e\\+(0)*6,1e\\+(0)*6,1\\.04858e\\+(" + "0)*6,1\\.04858e\\+(0)*6$"}, + {"^\"BM_Counters_Thousands/" + "repeats:2_mean\",%csv_report,1e\\+(0)*6,1e\\+(0)*6,1e\\+(0)*6,1\\." + "04858e\\+(0)*6,1\\.04858e\\+(0)*6$"}, + {"^\"BM_Counters_Thousands/" + "repeats:2_median\",%csv_report,1e\\+(0)*6,1e\\+(0)*6,1e\\+(0)*6,1\\." + "04858e\\+(0)*6,1\\.04858e\\+(0)*6$"}, + {"^\"BM_Counters_Thousands/repeats:2_stddev\",%csv_report,0,0,0,0,0$"}}); +// VS2013 does not allow this function to be passed as a lambda argument +// to CHECK_BENCHMARK_RESULTS() +void CheckThousands(Results const& e) { + if (e.name != "BM_Counters_Thousands/repeats:2") + return; // Do not check the aggregates! + + // check that the values are within 0.01% of the expected values + CHECK_FLOAT_COUNTER_VALUE(e, "t0_1000000DefaultBase", EQ, 1000 * 1000, + 0.0001); + CHECK_FLOAT_COUNTER_VALUE(e, "t1_1000000Base1000", EQ, 1000 * 1000, 0.0001); + CHECK_FLOAT_COUNTER_VALUE(e, "t2_1000000Base1024", EQ, 1000 * 1000, 0.0001); + CHECK_FLOAT_COUNTER_VALUE(e, "t3_1048576Base1000", EQ, 1024 * 1024, 0.0001); + CHECK_FLOAT_COUNTER_VALUE(e, "t4_1048576Base1024", EQ, 1024 * 1024, 0.0001); +} +CHECK_BENCHMARK_RESULTS("BM_Counters_Thousands", &CheckThousands); + +// ========================================================================= // +// --------------------------- TEST CASES END ------------------------------ // +// ========================================================================= // + +int main(int argc, char* argv[]) { RunOutputTests(argc, argv); } diff --git a/libcxx/utils/google-benchmark/tools/BUILD.bazel b/libcxx/utils/google-benchmark/tools/BUILD.bazel new file mode 100644 index 000000000000..5895883a2eb3 --- /dev/null +++ b/libcxx/utils/google-benchmark/tools/BUILD.bazel @@ -0,0 +1,19 @@ +load("@py_deps//:requirements.bzl", "requirement") + +py_library( + name = "gbench", + srcs = glob(["gbench/*.py"]), + deps = [ + requirement("numpy"), + requirement("scipy"), + ], +) + +py_binary( + name = "compare", + srcs = ["compare.py"], + python_version = "PY2", + deps = [ + ":gbench", + ], +) diff --git a/libcxx/utils/google-benchmark/tools/compare.py b/libcxx/utils/google-benchmark/tools/compare.py new file mode 100755 index 000000000000..01d2c89f50fb --- /dev/null +++ b/libcxx/utils/google-benchmark/tools/compare.py @@ -0,0 +1,429 @@ +#!/usr/bin/env python + +import unittest +""" +compare.py - versatile benchmark output compare tool +""" + +import argparse +from argparse import ArgumentParser +import json +import sys +import gbench +from gbench import util, report +from gbench.util import * + + +def check_inputs(in1, in2, flags): + """ + Perform checking on the user provided inputs and diagnose any abnormalities + """ + in1_kind, in1_err = classify_input_file(in1) + in2_kind, in2_err = classify_input_file(in2) + output_file = find_benchmark_flag('--benchmark_out=', flags) + output_type = find_benchmark_flag('--benchmark_out_format=', flags) + if in1_kind == IT_Executable and in2_kind == IT_Executable and output_file: + print(("WARNING: '--benchmark_out=%s' will be passed to both " + "benchmarks causing it to be overwritten") % output_file) + if in1_kind == IT_JSON and in2_kind == IT_JSON and len(flags) > 0: + print("WARNING: passing optional flags has no effect since both " + "inputs are JSON") + if output_type is not None and output_type != 'json': + print(("ERROR: passing '--benchmark_out_format=%s' to 'compare.py`" + " is not supported.") % output_type) + sys.exit(1) + + +def create_parser(): + parser = ArgumentParser( + description='versatile benchmark output compare tool') + + parser.add_argument( + '-a', + '--display_aggregates_only', + dest='display_aggregates_only', + action="store_true", + help="If there are repetitions, by default, we display everything - the" + " actual runs, and the aggregates computed. Sometimes, it is " + "desirable to only view the aggregates. E.g. when there are a lot " + "of repetitions. Do note that only the display is affected. " + "Internally, all the actual runs are still used, e.g. for U test.") + + parser.add_argument( + '--no-color', + dest='color', + default=True, + action="store_false", + help="Do not use colors in the terminal output" + ) + + parser.add_argument( + '-d', + '--dump_to_json', + dest='dump_to_json', + help="Additionally, dump benchmark comparison output to this file in JSON format.") + + utest = parser.add_argument_group() + utest.add_argument( + '--no-utest', + dest='utest', + default=True, + action="store_false", + help="The tool can do a two-tailed Mann-Whitney U test with the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample.\nWARNING: requires **LARGE** (no less than {}) number of repetitions to be meaningful!\nThe test is being done by default, if at least {} repetitions were done.\nThis option can disable the U Test.".format(report.UTEST_OPTIMAL_REPETITIONS, report.UTEST_MIN_REPETITIONS)) + alpha_default = 0.05 + utest.add_argument( + "--alpha", + dest='utest_alpha', + default=alpha_default, + type=float, + help=("significance level alpha. if the calculated p-value is below this value, then the result is said to be statistically significant and the null hypothesis is rejected.\n(default: %0.4f)") % + alpha_default) + + subparsers = parser.add_subparsers( + help='This tool has multiple modes of operation:', + dest='mode') + + parser_a = subparsers.add_parser( + 'benchmarks', + help='The most simple use-case, compare all the output of these two benchmarks') + baseline = parser_a.add_argument_group( + 'baseline', 'The benchmark baseline') + baseline.add_argument( + 'test_baseline', + metavar='test_baseline', + type=argparse.FileType('r'), + nargs=1, + help='A benchmark executable or JSON output file') + contender = parser_a.add_argument_group( + 'contender', 'The benchmark that will be compared against the baseline') + contender.add_argument( + 'test_contender', + metavar='test_contender', + type=argparse.FileType('r'), + nargs=1, + help='A benchmark executable or JSON output file') + parser_a.add_argument( + 'benchmark_options', + metavar='benchmark_options', + nargs=argparse.REMAINDER, + help='Arguments to pass when running benchmark executables') + + parser_b = subparsers.add_parser( + 'filters', help='Compare filter one with the filter two of benchmark') + baseline = parser_b.add_argument_group( + 'baseline', 'The benchmark baseline') + baseline.add_argument( + 'test', + metavar='test', + type=argparse.FileType('r'), + nargs=1, + help='A benchmark executable or JSON output file') + baseline.add_argument( + 'filter_baseline', + metavar='filter_baseline', + type=str, + nargs=1, + help='The first filter, that will be used as baseline') + contender = parser_b.add_argument_group( + 'contender', 'The benchmark that will be compared against the baseline') + contender.add_argument( + 'filter_contender', + metavar='filter_contender', + type=str, + nargs=1, + help='The second filter, that will be compared against the baseline') + parser_b.add_argument( + 'benchmark_options', + metavar='benchmark_options', + nargs=argparse.REMAINDER, + help='Arguments to pass when running benchmark executables') + + parser_c = subparsers.add_parser( + 'benchmarksfiltered', + help='Compare filter one of first benchmark with filter two of the second benchmark') + baseline = parser_c.add_argument_group( + 'baseline', 'The benchmark baseline') + baseline.add_argument( + 'test_baseline', + metavar='test_baseline', + type=argparse.FileType('r'), + nargs=1, + help='A benchmark executable or JSON output file') + baseline.add_argument( + 'filter_baseline', + metavar='filter_baseline', + type=str, + nargs=1, + help='The first filter, that will be used as baseline') + contender = parser_c.add_argument_group( + 'contender', 'The benchmark that will be compared against the baseline') + contender.add_argument( + 'test_contender', + metavar='test_contender', + type=argparse.FileType('r'), + nargs=1, + help='The second benchmark executable or JSON output file, that will be compared against the baseline') + contender.add_argument( + 'filter_contender', + metavar='filter_contender', + type=str, + nargs=1, + help='The second filter, that will be compared against the baseline') + parser_c.add_argument( + 'benchmark_options', + metavar='benchmark_options', + nargs=argparse.REMAINDER, + help='Arguments to pass when running benchmark executables') + + return parser + + +def main(): + # Parse the command line flags + parser = create_parser() + args, unknown_args = parser.parse_known_args() + if args.mode is None: + parser.print_help() + exit(1) + assert not unknown_args + benchmark_options = args.benchmark_options + + if args.mode == 'benchmarks': + test_baseline = args.test_baseline[0].name + test_contender = args.test_contender[0].name + filter_baseline = '' + filter_contender = '' + + # NOTE: if test_baseline == test_contender, you are analyzing the stdev + + description = 'Comparing %s to %s' % (test_baseline, test_contender) + elif args.mode == 'filters': + test_baseline = args.test[0].name + test_contender = args.test[0].name + filter_baseline = args.filter_baseline[0] + filter_contender = args.filter_contender[0] + + # NOTE: if filter_baseline == filter_contender, you are analyzing the + # stdev + + description = 'Comparing %s to %s (from %s)' % ( + filter_baseline, filter_contender, args.test[0].name) + elif args.mode == 'benchmarksfiltered': + test_baseline = args.test_baseline[0].name + test_contender = args.test_contender[0].name + filter_baseline = args.filter_baseline[0] + filter_contender = args.filter_contender[0] + + # NOTE: if test_baseline == test_contender and + # filter_baseline == filter_contender, you are analyzing the stdev + + description = 'Comparing %s (from %s) to %s (from %s)' % ( + filter_baseline, test_baseline, filter_contender, test_contender) + else: + # should never happen + print("Unrecognized mode of operation: '%s'" % args.mode) + parser.print_help() + exit(1) + + check_inputs(test_baseline, test_contender, benchmark_options) + + if args.display_aggregates_only: + benchmark_options += ['--benchmark_display_aggregates_only=true'] + + options_baseline = [] + options_contender = [] + + if filter_baseline and filter_contender: + options_baseline = ['--benchmark_filter=%s' % filter_baseline] + options_contender = ['--benchmark_filter=%s' % filter_contender] + + # Run the benchmarks and report the results + json1 = json1_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark( + test_baseline, benchmark_options + options_baseline)) + json2 = json2_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark( + test_contender, benchmark_options + options_contender)) + + # Now, filter the benchmarks so that the difference report can work + if filter_baseline and filter_contender: + replacement = '[%s vs. %s]' % (filter_baseline, filter_contender) + json1 = gbench.report.filter_benchmark( + json1_orig, filter_baseline, replacement) + json2 = gbench.report.filter_benchmark( + json2_orig, filter_contender, replacement) + + diff_report = gbench.report.get_difference_report( + json1, json2, args.utest) + output_lines = gbench.report.print_difference_report( + diff_report, + args.display_aggregates_only, + args.utest, args.utest_alpha, args.color) + print(description) + for ln in output_lines: + print(ln) + + # Optionally, diff and output to JSON + if args.dump_to_json is not None: + with open(args.dump_to_json, 'w') as f_json: + json.dump(diff_report, f_json) + +class TestParser(unittest.TestCase): + def setUp(self): + self.parser = create_parser() + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'gbench', + 'Inputs') + self.testInput0 = os.path.join(testInputs, 'test1_run1.json') + self.testInput1 = os.path.join(testInputs, 'test1_run2.json') + + def test_benchmarks_basic(self): + parsed = self.parser.parse_args( + ['benchmarks', self.testInput0, self.testInput1]) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'benchmarks') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertFalse(parsed.benchmark_options) + + def test_benchmarks_basic_without_utest(self): + parsed = self.parser.parse_args( + ['--no-utest', 'benchmarks', self.testInput0, self.testInput1]) + self.assertFalse(parsed.display_aggregates_only) + self.assertFalse(parsed.utest) + self.assertEqual(parsed.utest_alpha, 0.05) + self.assertEqual(parsed.mode, 'benchmarks') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertFalse(parsed.benchmark_options) + + def test_benchmarks_basic_display_aggregates_only(self): + parsed = self.parser.parse_args( + ['-a', 'benchmarks', self.testInput0, self.testInput1]) + self.assertTrue(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'benchmarks') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertFalse(parsed.benchmark_options) + + def test_benchmarks_basic_with_utest_alpha(self): + parsed = self.parser.parse_args( + ['--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1]) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.utest_alpha, 0.314) + self.assertEqual(parsed.mode, 'benchmarks') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertFalse(parsed.benchmark_options) + + def test_benchmarks_basic_without_utest_with_utest_alpha(self): + parsed = self.parser.parse_args( + ['--no-utest', '--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1]) + self.assertFalse(parsed.display_aggregates_only) + self.assertFalse(parsed.utest) + self.assertEqual(parsed.utest_alpha, 0.314) + self.assertEqual(parsed.mode, 'benchmarks') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertFalse(parsed.benchmark_options) + + def test_benchmarks_with_remainder(self): + parsed = self.parser.parse_args( + ['benchmarks', self.testInput0, self.testInput1, 'd']) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'benchmarks') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertEqual(parsed.benchmark_options, ['d']) + + def test_benchmarks_with_remainder_after_doubleminus(self): + parsed = self.parser.parse_args( + ['benchmarks', self.testInput0, self.testInput1, '--', 'e']) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'benchmarks') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertEqual(parsed.benchmark_options, ['e']) + + def test_filters_basic(self): + parsed = self.parser.parse_args( + ['filters', self.testInput0, 'c', 'd']) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'filters') + self.assertEqual(parsed.test[0].name, self.testInput0) + self.assertEqual(parsed.filter_baseline[0], 'c') + self.assertEqual(parsed.filter_contender[0], 'd') + self.assertFalse(parsed.benchmark_options) + + def test_filters_with_remainder(self): + parsed = self.parser.parse_args( + ['filters', self.testInput0, 'c', 'd', 'e']) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'filters') + self.assertEqual(parsed.test[0].name, self.testInput0) + self.assertEqual(parsed.filter_baseline[0], 'c') + self.assertEqual(parsed.filter_contender[0], 'd') + self.assertEqual(parsed.benchmark_options, ['e']) + + def test_filters_with_remainder_after_doubleminus(self): + parsed = self.parser.parse_args( + ['filters', self.testInput0, 'c', 'd', '--', 'f']) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'filters') + self.assertEqual(parsed.test[0].name, self.testInput0) + self.assertEqual(parsed.filter_baseline[0], 'c') + self.assertEqual(parsed.filter_contender[0], 'd') + self.assertEqual(parsed.benchmark_options, ['f']) + + def test_benchmarksfiltered_basic(self): + parsed = self.parser.parse_args( + ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e']) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'benchmarksfiltered') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.filter_baseline[0], 'c') + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertEqual(parsed.filter_contender[0], 'e') + self.assertFalse(parsed.benchmark_options) + + def test_benchmarksfiltered_with_remainder(self): + parsed = self.parser.parse_args( + ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', 'f']) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'benchmarksfiltered') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.filter_baseline[0], 'c') + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertEqual(parsed.filter_contender[0], 'e') + self.assertEqual(parsed.benchmark_options[0], 'f') + + def test_benchmarksfiltered_with_remainder_after_doubleminus(self): + parsed = self.parser.parse_args( + ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', '--', 'g']) + self.assertFalse(parsed.display_aggregates_only) + self.assertTrue(parsed.utest) + self.assertEqual(parsed.mode, 'benchmarksfiltered') + self.assertEqual(parsed.test_baseline[0].name, self.testInput0) + self.assertEqual(parsed.filter_baseline[0], 'c') + self.assertEqual(parsed.test_contender[0].name, self.testInput1) + self.assertEqual(parsed.filter_contender[0], 'e') + self.assertEqual(parsed.benchmark_options[0], 'g') + + +if __name__ == '__main__': + # unittest.main() + main() + +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; +# kate: indent-mode python; remove-trailing-spaces modified; diff --git a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run1.json b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run1.json new file mode 100644 index 000000000000..601e327aefb5 --- /dev/null +++ b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run1.json @@ -0,0 +1,119 @@ +{ + "context": { + "date": "2016-08-02 17:44:46", + "num_cpus": 4, + "mhz_per_cpu": 4228, + "cpu_scaling_enabled": false, + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_SameTimes", + "iterations": 1000, + "real_time": 10, + "cpu_time": 10, + "time_unit": "ns" + }, + { + "name": "BM_2xFaster", + "iterations": 1000, + "real_time": 50, + "cpu_time": 50, + "time_unit": "ns" + }, + { + "name": "BM_2xSlower", + "iterations": 1000, + "real_time": 50, + "cpu_time": 50, + "time_unit": "ns" + }, + { + "name": "BM_1PercentFaster", + "iterations": 1000, + "real_time": 100, + "cpu_time": 100, + "time_unit": "ns" + }, + { + "name": "BM_1PercentSlower", + "iterations": 1000, + "real_time": 100, + "cpu_time": 100, + "time_unit": "ns" + }, + { + "name": "BM_10PercentFaster", + "iterations": 1000, + "real_time": 100, + "cpu_time": 100, + "time_unit": "ns" + }, + { + "name": "BM_10PercentSlower", + "iterations": 1000, + "real_time": 100, + "cpu_time": 100, + "time_unit": "ns" + }, + { + "name": "BM_100xSlower", + "iterations": 1000, + "real_time": 100, + "cpu_time": 100, + "time_unit": "ns" + }, + { + "name": "BM_100xFaster", + "iterations": 1000, + "real_time": 10000, + "cpu_time": 10000, + "time_unit": "ns" + }, + { + "name": "BM_10PercentCPUToTime", + "iterations": 1000, + "real_time": 100, + "cpu_time": 100, + "time_unit": "ns" + }, + { + "name": "BM_ThirdFaster", + "iterations": 1000, + "real_time": 100, + "cpu_time": 100, + "time_unit": "ns" + }, + { + "name": "MyComplexityTest_BigO", + "run_name": "MyComplexityTest", + "run_type": "aggregate", + "aggregate_name": "BigO", + "cpu_coefficient": 4.2749856294592886e+00, + "real_coefficient": 6.4789275289789780e+00, + "big_o": "N", + "time_unit": "ns" + }, + { + "name": "MyComplexityTest_RMS", + "run_name": "MyComplexityTest", + "run_type": "aggregate", + "aggregate_name": "RMS", + "rms": 4.5097802512472874e-03 + }, + { + "name": "BM_NotBadTimeUnit", + "iterations": 1000, + "real_time": 0.4, + "cpu_time": 0.5, + "time_unit": "s" + }, + { + "name": "BM_DifferentTimeUnit", + "iterations": 1, + "real_time": 1, + "cpu_time": 1, + "time_unit": "s" + } + ] +} diff --git a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run2.json b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run2.json new file mode 100644 index 000000000000..3cbcf39b0c93 --- /dev/null +++ b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run2.json @@ -0,0 +1,119 @@ +{ + "context": { + "date": "2016-08-02 17:44:46", + "num_cpus": 4, + "mhz_per_cpu": 4228, + "cpu_scaling_enabled": false, + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_SameTimes", + "iterations": 1000, + "real_time": 10, + "cpu_time": 10, + "time_unit": "ns" + }, + { + "name": "BM_2xFaster", + "iterations": 1000, + "real_time": 25, + "cpu_time": 25, + "time_unit": "ns" + }, + { + "name": "BM_2xSlower", + "iterations": 20833333, + "real_time": 100, + "cpu_time": 100, + "time_unit": "ns" + }, + { + "name": "BM_1PercentFaster", + "iterations": 1000, + "real_time": 98.9999999, + "cpu_time": 98.9999999, + "time_unit": "ns" + }, + { + "name": "BM_1PercentSlower", + "iterations": 1000, + "real_time": 100.9999999, + "cpu_time": 100.9999999, + "time_unit": "ns" + }, + { + "name": "BM_10PercentFaster", + "iterations": 1000, + "real_time": 90, + "cpu_time": 90, + "time_unit": "ns" + }, + { + "name": "BM_10PercentSlower", + "iterations": 1000, + "real_time": 110, + "cpu_time": 110, + "time_unit": "ns" + }, + { + "name": "BM_100xSlower", + "iterations": 1000, + "real_time": 1.0000e+04, + "cpu_time": 1.0000e+04, + "time_unit": "ns" + }, + { + "name": "BM_100xFaster", + "iterations": 1000, + "real_time": 100, + "cpu_time": 100, + "time_unit": "ns" + }, + { + "name": "BM_10PercentCPUToTime", + "iterations": 1000, + "real_time": 110, + "cpu_time": 90, + "time_unit": "ns" + }, + { + "name": "BM_ThirdFaster", + "iterations": 1000, + "real_time": 66.665, + "cpu_time": 66.664, + "time_unit": "ns" + }, + { + "name": "MyComplexityTest_BigO", + "run_name": "MyComplexityTest", + "run_type": "aggregate", + "aggregate_name": "BigO", + "cpu_coefficient": 5.6215779594361486e+00, + "real_coefficient": 5.6288314793554610e+00, + "big_o": "N", + "time_unit": "ns" + }, + { + "name": "MyComplexityTest_RMS", + "run_name": "MyComplexityTest", + "run_type": "aggregate", + "aggregate_name": "RMS", + "rms": 3.3128901852342174e-03 + }, + { + "name": "BM_NotBadTimeUnit", + "iterations": 1000, + "real_time": 0.04, + "cpu_time": 0.6, + "time_unit": "s" + }, + { + "name": "BM_DifferentTimeUnit", + "iterations": 1, + "real_time": 1, + "cpu_time": 1, + "time_unit": "ns" + } + ] +} diff --git a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test2_run.json b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test2_run.json new file mode 100644 index 000000000000..15bc69803049 --- /dev/null +++ b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test2_run.json @@ -0,0 +1,81 @@ +{ + "context": { + "date": "2016-08-02 17:44:46", + "num_cpus": 4, + "mhz_per_cpu": 4228, + "cpu_scaling_enabled": false, + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_Hi", + "iterations": 1234, + "real_time": 42, + "cpu_time": 24, + "time_unit": "ms" + }, + { + "name": "BM_Zero", + "iterations": 1000, + "real_time": 10, + "cpu_time": 10, + "time_unit": "ns" + }, + { + "name": "BM_Zero/4", + "iterations": 4000, + "real_time": 40, + "cpu_time": 40, + "time_unit": "ns" + }, + { + "name": "Prefix/BM_Zero", + "iterations": 2000, + "real_time": 20, + "cpu_time": 20, + "time_unit": "ns" + }, + { + "name": "Prefix/BM_Zero/3", + "iterations": 3000, + "real_time": 30, + "cpu_time": 30, + "time_unit": "ns" + }, + { + "name": "BM_One", + "iterations": 5000, + "real_time": 5, + "cpu_time": 5, + "time_unit": "ns" + }, + { + "name": "BM_One/4", + "iterations": 2000, + "real_time": 20, + "cpu_time": 20, + "time_unit": "ns" + }, + { + "name": "Prefix/BM_One", + "iterations": 1000, + "real_time": 10, + "cpu_time": 10, + "time_unit": "ns" + }, + { + "name": "Prefix/BM_One/3", + "iterations": 1500, + "real_time": 15, + "cpu_time": 15, + "time_unit": "ns" + }, + { + "name": "BM_Bye", + "iterations": 5321, + "real_time": 11, + "cpu_time": 63, + "time_unit": "ns" + } + ] +} diff --git a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test3_run0.json b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test3_run0.json new file mode 100644 index 000000000000..49f8b061437f --- /dev/null +++ b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test3_run0.json @@ -0,0 +1,65 @@ +{ + "context": { + "date": "2016-08-02 17:44:46", + "num_cpus": 4, + "mhz_per_cpu": 4228, + "cpu_scaling_enabled": false, + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_One", + "run_type": "aggregate", + "iterations": 1000, + "real_time": 10, + "cpu_time": 100, + "time_unit": "ns" + }, + { + "name": "BM_Two", + "iterations": 1000, + "real_time": 9, + "cpu_time": 90, + "time_unit": "ns" + }, + { + "name": "BM_Two", + "iterations": 1000, + "real_time": 8, + "cpu_time": 86, + "time_unit": "ns" + }, + { + "name": "short", + "run_type": "aggregate", + "iterations": 1000, + "real_time": 8, + "cpu_time": 80, + "time_unit": "ns" + }, + { + "name": "short", + "run_type": "aggregate", + "iterations": 1000, + "real_time": 8, + "cpu_time": 77, + "time_unit": "ns" + }, + { + "name": "medium", + "run_type": "iteration", + "iterations": 1000, + "real_time": 8, + "cpu_time": 80, + "time_unit": "ns" + }, + { + "name": "medium", + "run_type": "iteration", + "iterations": 1000, + "real_time": 9, + "cpu_time": 82, + "time_unit": "ns" + } + ] +} diff --git a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test3_run1.json b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test3_run1.json new file mode 100644 index 000000000000..acc5ba17aed1 --- /dev/null +++ b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test3_run1.json @@ -0,0 +1,65 @@ +{ + "context": { + "date": "2016-08-02 17:44:46", + "num_cpus": 4, + "mhz_per_cpu": 4228, + "cpu_scaling_enabled": false, + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_One", + "iterations": 1000, + "real_time": 9, + "cpu_time": 110, + "time_unit": "ns" + }, + { + "name": "BM_Two", + "run_type": "aggregate", + "iterations": 1000, + "real_time": 10, + "cpu_time": 89, + "time_unit": "ns" + }, + { + "name": "BM_Two", + "iterations": 1000, + "real_time": 7, + "cpu_time": 72, + "time_unit": "ns" + }, + { + "name": "short", + "run_type": "aggregate", + "iterations": 1000, + "real_time": 7, + "cpu_time": 75, + "time_unit": "ns" + }, + { + "name": "short", + "run_type": "aggregate", + "iterations": 762, + "real_time": 4.54, + "cpu_time": 66.6, + "time_unit": "ns" + }, + { + "name": "short", + "run_type": "iteration", + "iterations": 1000, + "real_time": 800, + "cpu_time": 1, + "time_unit": "ns" + }, + { + "name": "medium", + "run_type": "iteration", + "iterations": 1200, + "real_time": 5, + "cpu_time": 53, + "time_unit": "ns" + } + ] +} diff --git a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test4_run.json b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test4_run.json new file mode 100644 index 000000000000..eaa005f3a9f4 --- /dev/null +++ b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test4_run.json @@ -0,0 +1,96 @@ +{ + "benchmarks": [ + { + "name": "99 family 0 instance 0 repetition 0", + "run_type": "iteration", + "family_index": 0, + "per_family_instance_index": 0, + "repetition_index": 0 + }, + { + "name": "98 family 0 instance 0 repetition 1", + "run_type": "iteration", + "family_index": 0, + "per_family_instance_index": 0, + "repetition_index": 1 + }, + { + "name": "97 family 0 instance 0 aggregate", + "run_type": "aggregate", + "family_index": 0, + "per_family_instance_index": 0, + "aggregate_name": "9 aggregate" + }, + + + { + "name": "96 family 0 instance 1 repetition 0", + "run_type": "iteration", + "family_index": 0, + "per_family_instance_index": 1, + "repetition_index": 0 + }, + { + "name": "95 family 0 instance 1 repetition 1", + "run_type": "iteration", + "family_index": 0, + "per_family_instance_index": 1, + "repetition_index": 1 + }, + { + "name": "94 family 0 instance 1 aggregate", + "run_type": "aggregate", + "family_index": 0, + "per_family_instance_index": 1, + "aggregate_name": "9 aggregate" + }, + + + + + { + "name": "93 family 1 instance 0 repetition 0", + "run_type": "iteration", + "family_index": 1, + "per_family_instance_index": 0, + "repetition_index": 0 + }, + { + "name": "92 family 1 instance 0 repetition 1", + "run_type": "iteration", + "family_index": 1, + "per_family_instance_index": 0, + "repetition_index": 1 + }, + { + "name": "91 family 1 instance 0 aggregate", + "run_type": "aggregate", + "family_index": 1, + "per_family_instance_index": 0, + "aggregate_name": "9 aggregate" + }, + + + { + "name": "90 family 1 instance 1 repetition 0", + "run_type": "iteration", + "family_index": 1, + "per_family_instance_index": 1, + "repetition_index": 0 + }, + { + "name": "89 family 1 instance 1 repetition 1", + "run_type": "iteration", + "family_index": 1, + "per_family_instance_index": 1, + "repetition_index": 1 + }, + { + "name": "88 family 1 instance 1 aggregate", + "run_type": "aggregate", + "family_index": 1, + "per_family_instance_index": 1, + "aggregate_name": "9 aggregate" + } + ] +} diff --git a/libcxx/utils/google-benchmark/tools/gbench/__init__.py b/libcxx/utils/google-benchmark/tools/gbench/__init__.py new file mode 100644 index 000000000000..fce1a1acfbb3 --- /dev/null +++ b/libcxx/utils/google-benchmark/tools/gbench/__init__.py @@ -0,0 +1,8 @@ +"""Google Benchmark tooling""" + +__author__ = 'Eric Fiselier' +__email__ = 'eric@efcs.ca' +__versioninfo__ = (0, 5, 0) +__version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev' + +__all__ = [] diff --git a/libcxx/utils/google-benchmark/tools/gbench/report.py b/libcxx/utils/google-benchmark/tools/gbench/report.py new file mode 100644 index 000000000000..6bea82f6bf7b --- /dev/null +++ b/libcxx/utils/google-benchmark/tools/gbench/report.py @@ -0,0 +1,991 @@ +"""report.py - Utilities for reporting statistics about benchmark results +""" + +import unittest +import os +import re +import copy +import random + +from scipy.stats import mannwhitneyu + + +class BenchmarkColor(object): + def __init__(self, name, code): + self.name = name + self.code = code + + def __repr__(self): + return '%s%r' % (self.__class__.__name__, + (self.name, self.code)) + + def __format__(self, format): + return self.code + + +# Benchmark Colors Enumeration +BC_NONE = BenchmarkColor('NONE', '') +BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m') +BC_CYAN = BenchmarkColor('CYAN', '\033[96m') +BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m') +BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m') +BC_HEADER = BenchmarkColor('HEADER', '\033[92m') +BC_WARNING = BenchmarkColor('WARNING', '\033[93m') +BC_WHITE = BenchmarkColor('WHITE', '\033[97m') +BC_FAIL = BenchmarkColor('FAIL', '\033[91m') +BC_ENDC = BenchmarkColor('ENDC', '\033[0m') +BC_BOLD = BenchmarkColor('BOLD', '\033[1m') +BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m') + +UTEST_MIN_REPETITIONS = 2 +UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better. +UTEST_COL_NAME = "_pvalue" + + +def color_format(use_color, fmt_str, *args, **kwargs): + """ + Return the result of 'fmt_str.format(*args, **kwargs)' after transforming + 'args' and 'kwargs' according to the value of 'use_color'. If 'use_color' + is False then all color codes in 'args' and 'kwargs' are replaced with + the empty string. + """ + assert use_color is True or use_color is False + if not use_color: + args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE + for arg in args] + kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE + for key, arg in kwargs.items()} + return fmt_str.format(*args, **kwargs) + + +def find_longest_name(benchmark_list): + """ + Return the length of the longest benchmark name in a given list of + benchmark JSON objects + """ + longest_name = 1 + for bc in benchmark_list: + if len(bc['name']) > longest_name: + longest_name = len(bc['name']) + return longest_name + + +def calculate_change(old_val, new_val): + """ + Return a float representing the decimal change between old_val and new_val. + """ + if old_val == 0 and new_val == 0: + return 0.0 + if old_val == 0: + return float(new_val - old_val) / (float(old_val + new_val) / 2) + return float(new_val - old_val) / abs(old_val) + + +def filter_benchmark(json_orig, family, replacement=""): + """ + Apply a filter to the json, and only leave the 'family' of benchmarks. + """ + regex = re.compile(family) + filtered = {} + filtered['benchmarks'] = [] + for be in json_orig['benchmarks']: + if not regex.search(be['name']): + continue + filteredbench = copy.deepcopy(be) # Do NOT modify the old name! + filteredbench['name'] = regex.sub(replacement, filteredbench['name']) + filtered['benchmarks'].append(filteredbench) + return filtered + + +def get_unique_benchmark_names(json): + """ + While *keeping* the order, give all the unique 'names' used for benchmarks. + """ + seen = set() + uniqued = [x['name'] for x in json['benchmarks'] + if x['name'] not in seen and + (seen.add(x['name']) or True)] + return uniqued + + +def intersect(list1, list2): + """ + Given two lists, get a new list consisting of the elements only contained + in *both of the input lists*, while preserving the ordering. + """ + return [x for x in list1 if x in list2] + + +def is_potentially_comparable_benchmark(x): + return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x) + + +def partition_benchmarks(json1, json2): + """ + While preserving the ordering, find benchmarks with the same names in + both of the inputs, and group them. + (i.e. partition/filter into groups with common name) + """ + json1_unique_names = get_unique_benchmark_names(json1) + json2_unique_names = get_unique_benchmark_names(json2) + names = intersect(json1_unique_names, json2_unique_names) + partitions = [] + for name in names: + time_unit = None + # Pick the time unit from the first entry of the lhs benchmark. + # We should be careful not to crash with unexpected input. + for x in json1['benchmarks']: + if (x['name'] == name and is_potentially_comparable_benchmark(x)): + time_unit = x['time_unit'] + break + if time_unit is None: + continue + # Filter by name and time unit. + # All the repetitions are assumed to be comparable. + lhs = [x for x in json1['benchmarks'] if x['name'] == name and + x['time_unit'] == time_unit] + rhs = [x for x in json2['benchmarks'] if x['name'] == name and + x['time_unit'] == time_unit] + partitions.append([lhs, rhs]) + return partitions + + +def extract_field(partition, field_name): + # The count of elements may be different. We want *all* of them. + lhs = [x[field_name] for x in partition[0]] + rhs = [x[field_name] for x in partition[1]] + return [lhs, rhs] + + +def calc_utest(timings_cpu, timings_time): + min_rep_cnt = min(len(timings_time[0]), + len(timings_time[1]), + len(timings_cpu[0]), + len(timings_cpu[1])) + + # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions? + if min_rep_cnt < UTEST_MIN_REPETITIONS: + return False, None, None + + time_pvalue = mannwhitneyu( + timings_time[0], timings_time[1], alternative='two-sided').pvalue + cpu_pvalue = mannwhitneyu( + timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue + + return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue + +def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True): + def get_utest_color(pval): + return BC_FAIL if pval >= utest_alpha else BC_OKGREEN + + # Check if we failed miserably with minimum required repetitions for utest + if not utest['have_optimal_repetitions'] and utest['cpu_pvalue'] is None and utest['time_pvalue'] is None: + return [] + + dsc = "U Test, Repetitions: {} vs {}".format( + utest['nr_of_repetitions'], utest['nr_of_repetitions_other']) + dsc_color = BC_OKGREEN + + # We still got some results to show but issue a warning about it. + if not utest['have_optimal_repetitions']: + dsc_color = BC_WARNING + dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format( + UTEST_OPTIMAL_REPETITIONS) + + special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}" + + return [color_format(use_color, + special_str, + BC_HEADER, + "{}{}".format(bc_name, UTEST_COL_NAME), + first_col_width, + get_utest_color( + utest['time_pvalue']), utest['time_pvalue'], + get_utest_color( + utest['cpu_pvalue']), utest['cpu_pvalue'], + dsc_color, dsc, + endc=BC_ENDC)] + + +def get_difference_report( + json1, + json2, + utest=False): + """ + Calculate and report the difference between each test of two benchmarks + runs specified as 'json1' and 'json2'. Output is another json containing + relevant details for each test run. + """ + assert utest is True or utest is False + + diff_report = [] + partitions = partition_benchmarks(json1, json2) + for partition in partitions: + benchmark_name = partition[0][0]['name'] + time_unit = partition[0][0]['time_unit'] + measurements = [] + utest_results = {} + # Careful, we may have different repetition count. + for i in range(min(len(partition[0]), len(partition[1]))): + bn = partition[0][i] + other_bench = partition[1][i] + measurements.append({ + 'real_time': bn['real_time'], + 'cpu_time': bn['cpu_time'], + 'real_time_other': other_bench['real_time'], + 'cpu_time_other': other_bench['cpu_time'], + 'time': calculate_change(bn['real_time'], other_bench['real_time']), + 'cpu': calculate_change(bn['cpu_time'], other_bench['cpu_time']) + }) + + # After processing the whole partition, if requested, do the U test. + if utest: + timings_cpu = extract_field(partition, 'cpu_time') + timings_time = extract_field(partition, 'real_time') + have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time) + if cpu_pvalue and time_pvalue: + utest_results = { + 'have_optimal_repetitions': have_optimal_repetitions, + 'cpu_pvalue': cpu_pvalue, + 'time_pvalue': time_pvalue, + 'nr_of_repetitions': len(timings_cpu[0]), + 'nr_of_repetitions_other': len(timings_cpu[1]) + } + + # Store only if we had any measurements for given benchmark. + # E.g. partition_benchmarks will filter out the benchmarks having + # time units which are not compatible with other time units in the + # benchmark suite. + if measurements: + run_type = partition[0][0]['run_type'] if 'run_type' in partition[0][0] else '' + aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else '' + diff_report.append({ + 'name': benchmark_name, + 'measurements': measurements, + 'time_unit': time_unit, + 'run_type': run_type, + 'aggregate_name': aggregate_name, + 'utest': utest_results + }) + + return diff_report + + +def print_difference_report( + json_diff_report, + include_aggregates_only=False, + utest=False, + utest_alpha=0.05, + use_color=True): + """ + Calculate and report the difference between each test of two benchmarks + runs specified as 'json1' and 'json2'. + """ + assert utest is True or utest is False + + def get_color(res): + if res > 0.05: + return BC_FAIL + elif res > -0.07: + return BC_WHITE + else: + return BC_CYAN + + first_col_width = find_longest_name(json_diff_report) + first_col_width = max( + first_col_width, + len('Benchmark')) + first_col_width += len(UTEST_COL_NAME) + first_line = "{:<{}s}Time CPU Time Old Time New CPU Old CPU New".format( + 'Benchmark', 12 + first_col_width) + output_strs = [first_line, '-' * len(first_line)] + + fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}" + for benchmark in json_diff_report: + # *If* we were asked to only include aggregates, + # and if it is non-aggregate, then don't print it. + if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate': + for measurement in benchmark['measurements']: + output_strs += [color_format(use_color, + fmt_str, + BC_HEADER, + benchmark['name'], + first_col_width, + get_color(measurement['time']), + measurement['time'], + get_color(measurement['cpu']), + measurement['cpu'], + measurement['real_time'], + measurement['real_time_other'], + measurement['cpu_time'], + measurement['cpu_time_other'], + endc=BC_ENDC)] + + # After processing the measurements, if requested and + # if applicable (e.g. u-test exists for given benchmark), + # print the U test. + if utest and benchmark['utest']: + output_strs += print_utest(benchmark['name'], + benchmark['utest'], + utest_alpha=utest_alpha, + first_col_width=first_col_width, + use_color=use_color) + + return output_strs + + +############################################################################### +# Unit tests + + +class TestGetUniqueBenchmarkNames(unittest.TestCase): + def load_results(self): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput = os.path.join(testInputs, 'test3_run0.json') + with open(testOutput, 'r') as f: + json = json.load(f) + return json + + def test_basic(self): + expect_lines = [ + 'BM_One', + 'BM_Two', + 'short', # These two are not sorted + 'medium', # These two are not sorted + ] + json = self.load_results() + output_lines = get_unique_benchmark_names(json) + print("\n") + print("\n".join(output_lines)) + self.assertEqual(len(output_lines), len(expect_lines)) + for i in range(0, len(output_lines)): + self.assertEqual(expect_lines[i], output_lines[i]) + + +class TestReportDifference(unittest.TestCase): + @classmethod + def setUpClass(cls): + def load_results(): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput1 = os.path.join(testInputs, 'test1_run1.json') + testOutput2 = os.path.join(testInputs, 'test1_run2.json') + with open(testOutput1, 'r') as f: + json1 = json.load(f) + with open(testOutput2, 'r') as f: + json2 = json.load(f) + return json1, json2 + + json1, json2 = load_results() + cls.json_diff_report = get_difference_report(json1, json2) + + def test_json_diff_report_pretty_printing(self): + expect_lines = [ + ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'], + ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'], + ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'], + ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'], + ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'], + ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'], + ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'], + ['BM_100xSlower', '+99.0000', '+99.0000', + '100', '10000', '100', '10000'], + ['BM_100xFaster', '-0.9900', '-0.9900', + '10000', '100', '10000', '100'], + ['BM_10PercentCPUToTime', '+0.1000', + '-0.1000', '100', '110', '100', '90'], + ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'], + ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'], + ] + output_lines_with_header = print_difference_report( + self.json_diff_report, use_color=False) + output_lines = output_lines_with_header[2:] + print("\n") + print("\n".join(output_lines_with_header)) + self.assertEqual(len(output_lines), len(expect_lines)) + for i in range(0, len(output_lines)): + parts = [x for x in output_lines[i].split(' ') if x] + self.assertEqual(len(parts), 7) + self.assertEqual(expect_lines[i], parts) + + def test_json_diff_report_output(self): + expected_output = [ + { + 'name': 'BM_SameTimes', + 'measurements': [{'time': 0.0000, 'cpu': 0.0000, 'real_time': 10, 'real_time_other': 10, 'cpu_time': 10, 'cpu_time_other': 10}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_2xFaster', + 'measurements': [{'time': -0.5000, 'cpu': -0.5000, 'real_time': 50, 'real_time_other': 25, 'cpu_time': 50, 'cpu_time_other': 25}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_2xSlower', + 'measurements': [{'time': 1.0000, 'cpu': 1.0000, 'real_time': 50, 'real_time_other': 100, 'cpu_time': 50, 'cpu_time_other': 100}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_1PercentFaster', + 'measurements': [{'time': -0.0100, 'cpu': -0.0100, 'real_time': 100, 'real_time_other': 98.9999999, 'cpu_time': 100, 'cpu_time_other': 98.9999999}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_1PercentSlower', + 'measurements': [{'time': 0.0100, 'cpu': 0.0100, 'real_time': 100, 'real_time_other': 101, 'cpu_time': 100, 'cpu_time_other': 101}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_10PercentFaster', + 'measurements': [{'time': -0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 90, 'cpu_time': 100, 'cpu_time_other': 90}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_10PercentSlower', + 'measurements': [{'time': 0.1000, 'cpu': 0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 110}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_100xSlower', + 'measurements': [{'time': 99.0000, 'cpu': 99.0000, 'real_time': 100, 'real_time_other': 10000, 'cpu_time': 100, 'cpu_time_other': 10000}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_100xFaster', + 'measurements': [{'time': -0.9900, 'cpu': -0.9900, 'real_time': 10000, 'real_time_other': 100, 'cpu_time': 10000, 'cpu_time_other': 100}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_10PercentCPUToTime', + 'measurements': [{'time': 0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 90}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_ThirdFaster', + 'measurements': [{'time': -0.3333, 'cpu': -0.3334, 'real_time': 100, 'real_time_other': 67, 'cpu_time': 100, 'cpu_time_other': 67}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': 'BM_NotBadTimeUnit', + 'measurements': [{'time': -0.9000, 'cpu': 0.2000, 'real_time': 0.4, 'real_time_other': 0.04, 'cpu_time': 0.5, 'cpu_time_other': 0.6}], + 'time_unit': 's', + 'utest': {} + }, + ] + self.assertEqual(len(self.json_diff_report), len(expected_output)) + for out, expected in zip( + self.json_diff_report, expected_output): + self.assertEqual(out['name'], expected['name']) + self.assertEqual(out['time_unit'], expected['time_unit']) + assert_utest(self, out, expected) + assert_measurements(self, out, expected) + + +class TestReportDifferenceBetweenFamilies(unittest.TestCase): + @classmethod + def setUpClass(cls): + def load_result(): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput = os.path.join(testInputs, 'test2_run.json') + with open(testOutput, 'r') as f: + json = json.load(f) + return json + + json = load_result() + json1 = filter_benchmark(json, "BM_Z.ro", ".") + json2 = filter_benchmark(json, "BM_O.e", ".") + cls.json_diff_report = get_difference_report(json1, json2) + + def test_json_diff_report_pretty_printing(self): + expect_lines = [ + ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'], + ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'], + ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'], + ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'], + ] + output_lines_with_header = print_difference_report( + self.json_diff_report, use_color=False) + output_lines = output_lines_with_header[2:] + print("\n") + print("\n".join(output_lines_with_header)) + self.assertEqual(len(output_lines), len(expect_lines)) + for i in range(0, len(output_lines)): + parts = [x for x in output_lines[i].split(' ') if x] + self.assertEqual(len(parts), 7) + self.assertEqual(expect_lines[i], parts) + + def test_json_diff_report(self): + expected_output = [ + { + 'name': u'.', + 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 10, 'real_time_other': 5, 'cpu_time': 10, 'cpu_time_other': 5}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': u'./4', + 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 40, 'real_time_other': 20, 'cpu_time': 40, 'cpu_time_other': 20}], + 'time_unit': 'ns', + 'utest': {}, + }, + { + 'name': u'Prefix/.', + 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 20, 'real_time_other': 10, 'cpu_time': 20, 'cpu_time_other': 10}], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': u'Prefix/./3', + 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}], + 'time_unit': 'ns', + 'utest': {} + } + ] + self.assertEqual(len(self.json_diff_report), len(expected_output)) + for out, expected in zip( + self.json_diff_report, expected_output): + self.assertEqual(out['name'], expected['name']) + self.assertEqual(out['time_unit'], expected['time_unit']) + assert_utest(self, out, expected) + assert_measurements(self, out, expected) + + +class TestReportDifferenceWithUTest(unittest.TestCase): + @classmethod + def setUpClass(cls): + def load_results(): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput1 = os.path.join(testInputs, 'test3_run0.json') + testOutput2 = os.path.join(testInputs, 'test3_run1.json') + with open(testOutput1, 'r') as f: + json1 = json.load(f) + with open(testOutput2, 'r') as f: + json2 = json.load(f) + return json1, json2 + + json1, json2 = load_results() + cls.json_diff_report = get_difference_report( + json1, json2, utest=True) + + def test_json_diff_report_pretty_printing(self): + expect_lines = [ + ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], + ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], + ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], + ['BM_Two_pvalue', + '0.6985', + '0.6985', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '2.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], + ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], + ['short_pvalue', + '0.7671', + '0.1489', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '3.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], + ] + output_lines_with_header = print_difference_report( + self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False) + output_lines = output_lines_with_header[2:] + print("\n") + print("\n".join(output_lines_with_header)) + self.assertEqual(len(output_lines), len(expect_lines)) + for i in range(0, len(output_lines)): + parts = [x for x in output_lines[i].split(' ') if x] + self.assertEqual(expect_lines[i], parts) + + def test_json_diff_report_pretty_printing_aggregates_only(self): + expect_lines = [ + ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], + ['BM_Two_pvalue', + '0.6985', + '0.6985', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '2.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], + ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], + ['short_pvalue', + '0.7671', + '0.1489', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '3.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ] + output_lines_with_header = print_difference_report( + self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False) + output_lines = output_lines_with_header[2:] + print("\n") + print("\n".join(output_lines_with_header)) + self.assertEqual(len(output_lines), len(expect_lines)) + for i in range(0, len(output_lines)): + parts = [x for x in output_lines[i].split(' ') if x] + self.assertEqual(expect_lines[i], parts) + + def test_json_diff_report(self): + expected_output = [ + { + 'name': u'BM_One', + 'measurements': [ + {'time': -0.1, + 'cpu': 0.1, + 'real_time': 10, + 'real_time_other': 9, + 'cpu_time': 100, + 'cpu_time_other': 110} + ], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': u'BM_Two', + 'measurements': [ + {'time': 0.1111111111111111, + 'cpu': -0.011111111111111112, + 'real_time': 9, + 'real_time_other': 10, + 'cpu_time': 90, + 'cpu_time_other': 89}, + {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8, + 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72} + ], + 'time_unit': 'ns', + 'utest': { + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387 + } + }, + { + 'name': u'short', + 'measurements': [ + {'time': -0.125, + 'cpu': -0.0625, + 'real_time': 8, + 'real_time_other': 7, + 'cpu_time': 80, + 'cpu_time_other': 75}, + {'time': -0.4325, + 'cpu': -0.13506493506493514, + 'real_time': 8, + 'real_time_other': 4.54, + 'cpu_time': 77, + 'cpu_time_other': 66.6} + ], + 'time_unit': 'ns', + 'utest': { + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772 + } + }, + { + 'name': u'medium', + 'measurements': [ + {'time': -0.375, + 'cpu': -0.3375, + 'real_time': 8, + 'real_time_other': 5, + 'cpu_time': 80, + 'cpu_time_other': 53} + ], + 'time_unit': 'ns', + 'utest': {} + } + ] + self.assertEqual(len(self.json_diff_report), len(expected_output)) + for out, expected in zip( + self.json_diff_report, expected_output): + self.assertEqual(out['name'], expected['name']) + self.assertEqual(out['time_unit'], expected['time_unit']) + assert_utest(self, out, expected) + assert_measurements(self, out, expected) + + +class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( + unittest.TestCase): + @classmethod + def setUpClass(cls): + def load_results(): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput1 = os.path.join(testInputs, 'test3_run0.json') + testOutput2 = os.path.join(testInputs, 'test3_run1.json') + with open(testOutput1, 'r') as f: + json1 = json.load(f) + with open(testOutput2, 'r') as f: + json2 = json.load(f) + return json1, json2 + + json1, json2 = load_results() + cls.json_diff_report = get_difference_report( + json1, json2, utest=True) + + def test_json_diff_report_pretty_printing(self): + expect_lines = [ + ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], + ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], + ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], + ['BM_Two_pvalue', + '0.6985', + '0.6985', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '2.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], + ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], + ['short_pvalue', + '0.7671', + '0.1489', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '3.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'] + ] + output_lines_with_header = print_difference_report( + self.json_diff_report, + utest=True, utest_alpha=0.05, use_color=False) + output_lines = output_lines_with_header[2:] + print("\n") + print("\n".join(output_lines_with_header)) + self.assertEqual(len(output_lines), len(expect_lines)) + for i in range(0, len(output_lines)): + parts = [x for x in output_lines[i].split(' ') if x] + self.assertEqual(expect_lines[i], parts) + + def test_json_diff_report(self): + expected_output = [ + { + 'name': u'BM_One', + 'measurements': [ + {'time': -0.1, + 'cpu': 0.1, + 'real_time': 10, + 'real_time_other': 9, + 'cpu_time': 100, + 'cpu_time_other': 110} + ], + 'time_unit': 'ns', + 'utest': {} + }, + { + 'name': u'BM_Two', + 'measurements': [ + {'time': 0.1111111111111111, + 'cpu': -0.011111111111111112, + 'real_time': 9, + 'real_time_other': 10, + 'cpu_time': 90, + 'cpu_time_other': 89}, + {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8, + 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72} + ], + 'time_unit': 'ns', + 'utest': { + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387 + } + }, + { + 'name': u'short', + 'measurements': [ + {'time': -0.125, + 'cpu': -0.0625, + 'real_time': 8, + 'real_time_other': 7, + 'cpu_time': 80, + 'cpu_time_other': 75}, + {'time': -0.4325, + 'cpu': -0.13506493506493514, + 'real_time': 8, + 'real_time_other': 4.54, + 'cpu_time': 77, + 'cpu_time_other': 66.6} + ], + 'time_unit': 'ns', + 'utest': { + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772 + } + }, + { + 'name': u'medium', + 'measurements': [ + {'real_time_other': 5, + 'cpu_time': 80, + 'time': -0.375, + 'real_time': 8, + 'cpu_time_other': 53, + 'cpu': -0.3375 + } + ], + 'utest': {}, + 'time_unit': u'ns', + 'aggregate_name': '' + } + ] + self.assertEqual(len(self.json_diff_report), len(expected_output)) + for out, expected in zip( + self.json_diff_report, expected_output): + self.assertEqual(out['name'], expected['name']) + self.assertEqual(out['time_unit'], expected['time_unit']) + assert_utest(self, out, expected) + assert_measurements(self, out, expected) + + +class TestReportSorting(unittest.TestCase): + @classmethod + def setUpClass(cls): + def load_result(): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput = os.path.join(testInputs, 'test4_run.json') + with open(testOutput, 'r') as f: + json = json.load(f) + return json + + cls.json = load_result() + + def test_json_diff_report_pretty_printing(self): + import util + + expected_names = [ + "99 family 0 instance 0 repetition 0", + "98 family 0 instance 0 repetition 1", + "97 family 0 instance 0 aggregate", + "96 family 0 instance 1 repetition 0", + "95 family 0 instance 1 repetition 1", + "94 family 0 instance 1 aggregate", + "93 family 1 instance 0 repetition 0", + "92 family 1 instance 0 repetition 1", + "91 family 1 instance 0 aggregate", + "90 family 1 instance 1 repetition 0", + "89 family 1 instance 1 repetition 1", + "88 family 1 instance 1 aggregate" + ] + + for n in range(len(self.json['benchmarks']) ** 2): + random.shuffle(self.json['benchmarks']) + sorted_benchmarks = util.sort_benchmark_results(self.json)[ + 'benchmarks'] + self.assertEqual(len(expected_names), len(sorted_benchmarks)) + for out, expected in zip(sorted_benchmarks, expected_names): + self.assertEqual(out['name'], expected) + + +def assert_utest(unittest_instance, lhs, rhs): + if lhs['utest']: + unittest_instance.assertAlmostEqual( + lhs['utest']['cpu_pvalue'], + rhs['utest']['cpu_pvalue']) + unittest_instance.assertAlmostEqual( + lhs['utest']['time_pvalue'], + rhs['utest']['time_pvalue']) + unittest_instance.assertEqual( + lhs['utest']['have_optimal_repetitions'], + rhs['utest']['have_optimal_repetitions']) + else: + # lhs is empty. assert if rhs is not. + unittest_instance.assertEqual(lhs['utest'], rhs['utest']) + + +def assert_measurements(unittest_instance, lhs, rhs): + for m1, m2 in zip(lhs['measurements'], rhs['measurements']): + unittest_instance.assertEqual(m1['real_time'], m2['real_time']) + unittest_instance.assertEqual(m1['cpu_time'], m2['cpu_time']) + # m1['time'] and m1['cpu'] hold values which are being calculated, + # and therefore we must use almost-equal pattern. + unittest_instance.assertAlmostEqual(m1['time'], m2['time'], places=4) + unittest_instance.assertAlmostEqual(m1['cpu'], m2['cpu'], places=4) + + +if __name__ == '__main__': + unittest.main() + +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; +# kate: indent-mode python; remove-trailing-spaces modified; diff --git a/libcxx/utils/google-benchmark/tools/gbench/util.py b/libcxx/utils/google-benchmark/tools/gbench/util.py new file mode 100644 index 000000000000..5d0012c0cb1c --- /dev/null +++ b/libcxx/utils/google-benchmark/tools/gbench/util.py @@ -0,0 +1,181 @@ +"""util.py - General utilities for running, loading, and processing benchmarks +""" +import json +import os +import tempfile +import subprocess +import sys +import functools + +# Input file type enumeration +IT_Invalid = 0 +IT_JSON = 1 +IT_Executable = 2 + +_num_magic_bytes = 2 if sys.platform.startswith('win') else 4 + + +def is_executable_file(filename): + """ + Return 'True' if 'filename' names a valid file which is likely + an executable. A file is considered an executable if it starts with the + magic bytes for a EXE, Mach O, or ELF file. + """ + if not os.path.isfile(filename): + return False + with open(filename, mode='rb') as f: + magic_bytes = f.read(_num_magic_bytes) + if sys.platform == 'darwin': + return magic_bytes in [ + b'\xfe\xed\xfa\xce', # MH_MAGIC + b'\xce\xfa\xed\xfe', # MH_CIGAM + b'\xfe\xed\xfa\xcf', # MH_MAGIC_64 + b'\xcf\xfa\xed\xfe', # MH_CIGAM_64 + b'\xca\xfe\xba\xbe', # FAT_MAGIC + b'\xbe\xba\xfe\xca' # FAT_CIGAM + ] + elif sys.platform.startswith('win'): + return magic_bytes == b'MZ' + else: + return magic_bytes == b'\x7FELF' + + +def is_json_file(filename): + """ + Returns 'True' if 'filename' names a valid JSON output file. + 'False' otherwise. + """ + try: + with open(filename, 'r') as f: + json.load(f) + return True + except BaseException: + pass + return False + + +def classify_input_file(filename): + """ + Return a tuple (type, msg) where 'type' specifies the classified type + of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable + string represeting the error. + """ + ftype = IT_Invalid + err_msg = None + if not os.path.exists(filename): + err_msg = "'%s' does not exist" % filename + elif not os.path.isfile(filename): + err_msg = "'%s' does not name a file" % filename + elif is_executable_file(filename): + ftype = IT_Executable + elif is_json_file(filename): + ftype = IT_JSON + else: + err_msg = "'%s' does not name a valid benchmark executable or JSON file" % filename + return ftype, err_msg + + +def check_input_file(filename): + """ + Classify the file named by 'filename' and return the classification. + If the file is classified as 'IT_Invalid' print an error message and exit + the program. + """ + ftype, msg = classify_input_file(filename) + if ftype == IT_Invalid: + print("Invalid input file: %s" % msg) + sys.exit(1) + return ftype + + +def find_benchmark_flag(prefix, benchmark_flags): + """ + Search the specified list of flags for a flag matching `` and + if it is found return the arg it specifies. If specified more than once the + last value is returned. If the flag is not found None is returned. + """ + assert prefix.startswith('--') and prefix.endswith('=') + result = None + for f in benchmark_flags: + if f.startswith(prefix): + result = f[len(prefix):] + return result + + +def remove_benchmark_flags(prefix, benchmark_flags): + """ + Return a new list containing the specified benchmark_flags except those + with the specified prefix. + """ + assert prefix.startswith('--') and prefix.endswith('=') + return [f for f in benchmark_flags if not f.startswith(prefix)] + + +def load_benchmark_results(fname): + """ + Read benchmark output from a file and return the JSON object. + REQUIRES: 'fname' names a file containing JSON benchmark output. + """ + with open(fname, 'r') as f: + return json.load(f) + + +def sort_benchmark_results(result): + benchmarks = result['benchmarks'] + + # From inner key to the outer key! + benchmarks = sorted( + benchmarks, key=lambda benchmark: benchmark['repetition_index'] if 'repetition_index' in benchmark else -1) + benchmarks = sorted( + benchmarks, key=lambda benchmark: 1 if 'run_type' in benchmark and benchmark['run_type'] == "aggregate" else 0) + benchmarks = sorted( + benchmarks, key=lambda benchmark: benchmark['per_family_instance_index'] if 'per_family_instance_index' in benchmark else -1) + benchmarks = sorted( + benchmarks, key=lambda benchmark: benchmark['family_index'] if 'family_index' in benchmark else -1) + + result['benchmarks'] = benchmarks + return result + + +def run_benchmark(exe_name, benchmark_flags): + """ + Run a benchmark specified by 'exe_name' with the specified + 'benchmark_flags'. The benchmark is run directly as a subprocess to preserve + real time console output. + RETURNS: A JSON object representing the benchmark output + """ + output_name = find_benchmark_flag('--benchmark_out=', + benchmark_flags) + is_temp_output = False + if output_name is None: + is_temp_output = True + thandle, output_name = tempfile.mkstemp() + os.close(thandle) + benchmark_flags = list(benchmark_flags) + \ + ['--benchmark_out=%s' % output_name] + + cmd = [exe_name] + benchmark_flags + print("RUNNING: %s" % ' '.join(cmd)) + exitCode = subprocess.call(cmd) + if exitCode != 0: + print('TEST FAILED...') + sys.exit(exitCode) + json_res = load_benchmark_results(output_name) + if is_temp_output: + os.unlink(output_name) + return json_res + + +def run_or_load_benchmark(filename, benchmark_flags): + """ + Get the results for a specified benchmark. If 'filename' specifies + an executable benchmark then the results are generated by running the + benchmark. Otherwise 'filename' must name a valid JSON output file, + which is loaded and the result returned. + """ + ftype = check_input_file(filename) + if ftype == IT_JSON: + return load_benchmark_results(filename) + if ftype == IT_Executable: + return run_benchmark(filename, benchmark_flags) + raise ValueError('Unknown file type %s' % ftype) diff --git a/libcxx/utils/google-benchmark/tools/requirements.txt b/libcxx/utils/google-benchmark/tools/requirements.txt new file mode 100644 index 000000000000..3b3331b5af12 --- /dev/null +++ b/libcxx/utils/google-benchmark/tools/requirements.txt @@ -0,0 +1 @@ +scipy>=1.5.0 \ No newline at end of file diff --git a/libcxx/utils/google-benchmark/tools/strip_asm.py b/libcxx/utils/google-benchmark/tools/strip_asm.py new file mode 100755 index 000000000000..9030550b43be --- /dev/null +++ b/libcxx/utils/google-benchmark/tools/strip_asm.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python + +""" +strip_asm.py - Cleanup ASM output for the specified file +""" + +from argparse import ArgumentParser +import sys +import os +import re + +def find_used_labels(asm): + found = set() + label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)") + for l in asm.splitlines(): + m = label_re.match(l) + if m: + found.add('.L%s' % m.group(1)) + return found + + +def normalize_labels(asm): + decls = set() + label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") + for l in asm.splitlines(): + m = label_decl.match(l) + if m: + decls.add(m.group(0)) + if len(decls) == 0: + return asm + needs_dot = next(iter(decls))[0] != '.' + if not needs_dot: + return asm + for ld in decls: + asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm) + return asm + + +def transform_labels(asm): + asm = normalize_labels(asm) + used_decls = find_used_labels(asm) + new_asm = '' + label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") + for l in asm.splitlines(): + m = label_decl.match(l) + if not m or m.group(0) in used_decls: + new_asm += l + new_asm += '\n' + return new_asm + + +def is_identifier(tk): + if len(tk) == 0: + return False + first = tk[0] + if not first.isalpha() and first != '_': + return False + for i in range(1, len(tk)): + c = tk[i] + if not c.isalnum() and c != '_': + return False + return True + +def process_identifiers(l): + """ + process_identifiers - process all identifiers and modify them to have + consistent names across all platforms; specifically across ELF and MachO. + For example, MachO inserts an additional understore at the beginning of + names. This function removes that. + """ + parts = re.split(r'([a-zA-Z0-9_]+)', l) + new_line = '' + for tk in parts: + if is_identifier(tk): + if tk.startswith('__Z'): + tk = tk[1:] + elif tk.startswith('_') and len(tk) > 1 and \ + tk[1].isalpha() and tk[1] != 'Z': + tk = tk[1:] + new_line += tk + return new_line + + +def process_asm(asm): + """ + Strip the ASM of unwanted directives and lines + """ + new_contents = '' + asm = transform_labels(asm) + + # TODO: Add more things we want to remove + discard_regexes = [ + re.compile("\s+\..*$"), # directive + re.compile("\s*#(NO_APP|APP)$"), #inline ASM + re.compile("\s*#.*$"), # comment line + re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive + re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"), + ] + keep_regexes = [ + + ] + fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:") + for l in asm.splitlines(): + # Remove Mach-O attribute + l = l.replace('@GOTPCREL', '') + add_line = True + for reg in discard_regexes: + if reg.match(l) is not None: + add_line = False + break + for reg in keep_regexes: + if reg.match(l) is not None: + add_line = True + break + if add_line: + if fn_label_def.match(l) and len(new_contents) != 0: + new_contents += '\n' + l = process_identifiers(l) + new_contents += l + new_contents += '\n' + return new_contents + +def main(): + parser = ArgumentParser( + description='generate a stripped assembly file') + parser.add_argument( + 'input', metavar='input', type=str, nargs=1, + help='An input assembly file') + parser.add_argument( + 'out', metavar='output', type=str, nargs=1, + help='The output file') + args, unknown_args = parser.parse_known_args() + input = args.input[0] + output = args.out[0] + if not os.path.isfile(input): + print(("ERROR: input file '%s' does not exist") % input) + sys.exit(1) + contents = None + with open(input, 'r') as f: + contents = f.read() + new_contents = process_asm(contents) + with open(output, 'w') as f: + f.write(new_contents) + + +if __name__ == '__main__': + main() + +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; +# kate: indent-mode python; remove-trailing-spaces modified; diff --git a/lld/include/lld/ReaderWriter/MachOLinkingContext.h b/lld/include/lld/ReaderWriter/MachOLinkingContext.h new file mode 100644 index 000000000000..974f323bc612 --- /dev/null +++ b/lld/include/lld/ReaderWriter/MachOLinkingContext.h @@ -0,0 +1,505 @@ +//===- lld/ReaderWriter/MachOLinkingContext.h -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_LINKING_CONTEXT_H +#define LLD_READER_WRITER_MACHO_LINKING_CONTEXT_H + +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Writer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/ErrorHandling.h" +#include + +using llvm::MachO::HeaderFileType; + +namespace lld { + +namespace mach_o { +class ArchHandler; +class MachODylibFile; +class MachOFile; +class SectCreateFile; +} + +class MachOLinkingContext : public LinkingContext { +public: + MachOLinkingContext(); + ~MachOLinkingContext() override; + + enum Arch { + arch_unknown, + arch_ppc, + arch_x86, + arch_x86_64, + arch_armv6, + arch_armv7, + arch_armv7s, + arch_arm64, + }; + + enum class OS { + unknown, + macOSX, + iOS, + iOS_simulator + }; + + enum class ExportMode { + globals, // Default, all global symbols exported. + exported, // -exported_symbol[s_list], only listed symbols exported. + unexported // -unexported_symbol[s_list], no listed symbol exported. + }; + + enum class DebugInfoMode { + addDebugMap, // Default + noDebugMap // -S option + }; + + enum class UndefinedMode { + error, + warning, + suppress, + dynamicLookup + }; + + enum ObjCConstraint { + objc_unknown = 0, + objc_supports_gc = 2, + objc_gc_only = 4, + // Image optimized by dyld = 8 + // GC compaction = 16 + objc_retainReleaseForSimulator = 32, + objc_retainRelease + }; + + /// Initializes the context to sane default values given the specified output + /// file type, arch, os, and minimum os version. This should be called before + /// other setXXX() methods. + void configure(HeaderFileType type, Arch arch, OS os, uint32_t minOSVersion, + bool exportDynamicSymbols); + + void addPasses(PassManager &pm) override; + bool validateImpl() override; + std::string demangle(StringRef symbolName) const override; + + void createImplicitFiles(std::vector> &) override; + + /// Creates a new file which is owned by the context. Returns a pointer to + /// the new file. + template + typename std::enable_if::value, T *>::type + make_file(Args &&... args) const { + auto file = std::unique_ptr(new T(std::forward(args)...)); + auto *filePtr = file.get(); + auto *ctx = const_cast(this); + ctx->getNodes().push_back(std::make_unique(std::move(file))); + return filePtr; + } + + uint32_t getCPUType() const; + uint32_t getCPUSubType() const; + + bool addEntryPointLoadCommand() const; + bool addUnixThreadLoadCommand() const; + bool outputTypeHasEntry() const; + bool is64Bit() const; + + virtual uint64_t pageZeroSize() const { return _pageZeroSize; } + virtual uint64_t pageSize() const { return _pageSize; } + + mach_o::ArchHandler &archHandler() const; + + HeaderFileType outputMachOType() const { return _outputMachOType; } + + Arch arch() const { return _arch; } + StringRef archName() const { return nameFromArch(_arch); } + OS os() const { return _os; } + + ExportMode exportMode() const { return _exportMode; } + void setExportMode(ExportMode mode) { _exportMode = mode; } + void addExportSymbol(StringRef sym); + bool exportRestrictMode() const { return _exportMode != ExportMode::globals; } + bool exportSymbolNamed(StringRef sym) const; + + DebugInfoMode debugInfoMode() const { return _debugInfoMode; } + void setDebugInfoMode(DebugInfoMode mode) { + _debugInfoMode = mode; + } + + void appendOrderedSymbol(StringRef symbol, StringRef filename); + + bool keepPrivateExterns() const { return _keepPrivateExterns; } + void setKeepPrivateExterns(bool v) { _keepPrivateExterns = v; } + bool demangleSymbols() const { return _demangle; } + void setDemangleSymbols(bool d) { _demangle = d; } + bool mergeObjCCategories() const { return _mergeObjCCategories; } + void setMergeObjCCategories(bool v) { _mergeObjCCategories = v; } + /// Create file at specified path which will contain a binary encoding + /// of all input and output file paths. + std::error_code createDependencyFile(StringRef path); + void addInputFileDependency(StringRef path) const; + void addInputFileNotFound(StringRef path) const; + void addOutputFileDependency(StringRef path) const; + + bool minOS(StringRef mac, StringRef iOS) const; + void setDoNothing(bool value) { _doNothing = value; } + bool doNothing() const { return _doNothing; } + bool printAtoms() const { return _printAtoms; } + bool testingFileUsage() const { return _testingFileUsage; } + const StringRefVector &searchDirs() const { return _searchDirs; } + const StringRefVector &frameworkDirs() const { return _frameworkDirs; } + void setSysLibRoots(const StringRefVector &paths); + const StringRefVector &sysLibRoots() const { return _syslibRoots; } + bool PIE() const { return _pie; } + void setPIE(bool pie) { _pie = pie; } + bool generateVersionLoadCommand() const { + return _generateVersionLoadCommand; + } + void setGenerateVersionLoadCommand(bool v) { + _generateVersionLoadCommand = v; + } + + bool generateFunctionStartsLoadCommand() const { + return _generateFunctionStartsLoadCommand; + } + void setGenerateFunctionStartsLoadCommand(bool v) { + _generateFunctionStartsLoadCommand = v; + } + + bool generateDataInCodeLoadCommand() const { + return _generateDataInCodeLoadCommand; + } + void setGenerateDataInCodeLoadCommand(bool v) { + _generateDataInCodeLoadCommand = v; + } + + uint64_t stackSize() const { return _stackSize; } + void setStackSize(uint64_t stackSize) { _stackSize = stackSize; } + + uint64_t baseAddress() const { return _baseAddress; } + void setBaseAddress(uint64_t baseAddress) { _baseAddress = baseAddress; } + + ObjCConstraint objcConstraint() const { return _objcConstraint; } + + uint32_t osMinVersion() const { return _osMinVersion; } + + uint32_t sdkVersion() const { return _sdkVersion; } + void setSdkVersion(uint64_t v) { _sdkVersion = v; } + + uint64_t sourceVersion() const { return _sourceVersion; } + void setSourceVersion(uint64_t v) { _sourceVersion = v; } + + uint32_t swiftVersion() const { return _swiftVersion; } + + /// Checks whether a given path on the filesystem exists. + /// + /// When running in -test_file_usage mode, this method consults an + /// internally maintained list of files that exist (provided by -path_exists) + /// instead of the actual filesystem. + bool pathExists(StringRef path) const; + + /// Like pathExists() but only used on files - not directories. + bool fileExists(StringRef path) const; + + /// Adds any library search paths derived from the given base, possibly + /// modified by -syslibroots. + /// + /// The set of paths added consists of approximately all syslibroot-prepended + /// versions of libPath that exist, or the original libPath if there are none + /// for whatever reason. With various edge-cases for compatibility. + void addModifiedSearchDir(StringRef libPath, bool isSystemPath = false); + + /// Determine whether -lFoo can be resolve within the given path, and + /// return the filename if so. + /// + /// The -lFoo option is documented to search for libFoo.dylib and libFoo.a in + /// that order, unless Foo ends in ".o", in which case only the exact file + /// matches (e.g. -lfoo.o would only find foo.o). + llvm::Optional searchDirForLibrary(StringRef path, + StringRef libName) const; + + /// Iterates through all search path entries looking for libName (as + /// specified by -lFoo). + llvm::Optional searchLibrary(StringRef libName) const; + + /// Add a framework search path. Internally, this method may be prepended + /// the path with syslibroot. + void addFrameworkSearchDir(StringRef fwPath, bool isSystemPath = false); + + /// Iterates through all framework directories looking for + /// Foo.framework/Foo (when fwName = "Foo"). + llvm::Optional findPathForFramework(StringRef fwName) const; + + /// The dylib's binary compatibility version, in the raw uint32 format. + /// + /// When building a dynamic library, this is the compatibility version that + /// gets embedded into the result. Other Mach-O binaries that link against + /// this library will store the compatibility version in its load command. At + /// runtime, the loader will verify that the binary is compatible with the + /// installed dynamic library. + uint32_t compatibilityVersion() const { return _compatibilityVersion; } + + /// The dylib's current version, in the raw uint32 format. + /// + /// When building a dynamic library, this is the current version that gets + /// embedded into the result. Other Mach-O binaries that link against + /// this library will store the compatibility version in its load command. + uint32_t currentVersion() const { return _currentVersion; } + + /// The dylib's install name. + /// + /// Binaries that link against the dylib will embed this path into the dylib + /// load command. When loading the binaries at runtime, this is the location + /// on disk that the loader will look for the dylib. + StringRef installName() const { return _installName; } + + /// Whether or not the dylib has side effects during initialization. + /// + /// Dylibs marked as being dead strippable provide the guarantee that loading + /// the dylib has no side effects, allowing the linker to strip out the dylib + /// when linking a binary that does not use any of its symbols. + bool deadStrippableDylib() const { return _deadStrippableDylib; } + + /// Whether or not to use flat namespace. + /// + /// MachO usually uses a two-level namespace, where each external symbol + /// referenced by the target is associated with the dylib that will provide + /// the symbol's definition at runtime. Using flat namespace overrides this + /// behavior: the linker searches all dylibs on the command line and all + /// dylibs those original dylibs depend on, but does not record which dylib + /// an external symbol came from. At runtime dyld again searches all images + /// and uses the first definition it finds. In addition, any undefines in + /// loaded flat_namespace dylibs must be resolvable at build time. + bool useFlatNamespace() const { return _flatNamespace; } + + /// How to handle undefined symbols. + /// + /// Options are: + /// * error: Report an error and terminate linking. + /// * warning: Report a warning, but continue linking. + /// * suppress: Ignore and continue linking. + /// * dynamic_lookup: For use with -twolevel namespace: Records source dylibs + /// for symbols that are defined in a linked dylib at static link time. + /// Undefined symbols are handled by searching all loaded images at + /// runtime. + UndefinedMode undefinedMode() const { return _undefinedMode; } + + /// The path to the executable that will load the bundle at runtime. + /// + /// When building a Mach-O bundle, this executable will be examined if there + /// are undefined symbols after the main link phase. It is expected that this + /// binary will be loading the bundle at runtime and will provide the symbols + /// at that point. + StringRef bundleLoader() const { return _bundleLoader; } + + void setCompatibilityVersion(uint32_t vers) { _compatibilityVersion = vers; } + void setCurrentVersion(uint32_t vers) { _currentVersion = vers; } + void setInstallName(StringRef name) { _installName = name; } + void setDeadStrippableDylib(bool deadStrippable) { + _deadStrippableDylib = deadStrippable; + } + void setUseFlatNamespace(bool flatNamespace) { + _flatNamespace = flatNamespace; + } + + void setUndefinedMode(UndefinedMode undefinedMode) { + _undefinedMode = undefinedMode; + } + + void setBundleLoader(StringRef loader) { _bundleLoader = loader; } + void setPrintAtoms(bool value=true) { _printAtoms = value; } + void setTestingFileUsage(bool value = true) { + _testingFileUsage = value; + } + void addExistingPathForDebug(StringRef path) { + _existingPaths.insert(path); + } + + void addRpath(StringRef rpath); + const StringRefVector &rpaths() const { return _rpaths; } + + /// Add section alignment constraint on final layout. + void addSectionAlignment(StringRef seg, StringRef sect, uint16_t align); + + /// Add a section based on a command-line sectcreate option. + void addSectCreateSection(StringRef seg, StringRef sect, + std::unique_ptr content); + + /// Returns true if specified section had alignment constraints. + bool sectionAligned(StringRef seg, StringRef sect, uint16_t &align) const; + + StringRef dyldPath() const { return "/usr/lib/dyld"; } + + /// Stub creation Pass should be run. + bool needsStubsPass() const; + + // GOT creation Pass should be run. + bool needsGOTPass() const; + + /// Pass to add TLV sections. + bool needsTLVPass() const; + + /// Pass to transform __compact_unwind into __unwind_info should be run. + bool needsCompactUnwindPass() const; + + /// Pass to add shims switching between thumb and arm mode. + bool needsShimPass() const; + + /// Pass to add objc image info and optimized objc data. + bool needsObjCPass() const; + + /// Magic symbol name stubs will need to help lazy bind. + StringRef binderSymbolName() const; + + /// Used to keep track of direct and indirect dylibs. + void registerDylib(mach_o::MachODylibFile *dylib, bool upward) const; + + // Reads a file from disk to memory. Returns only a needed chunk + // if a fat binary. + ErrorOr> getMemoryBuffer(StringRef path); + + /// Used to find indirect dylibs. Instantiates a MachODylibFile if one + /// has not already been made for the requested dylib. Uses -L and -F + /// search paths to allow indirect dylibs to be overridden. + mach_o::MachODylibFile* findIndirectDylib(StringRef path); + + uint32_t dylibCurrentVersion(StringRef installName) const; + + uint32_t dylibCompatVersion(StringRef installName) const; + + ArrayRef allDylibs() const { + return _allDylibs; + } + + /// Creates a copy (owned by this MachOLinkingContext) of a string. + StringRef copy(StringRef str) { return str.copy(_allocator); } + + /// If the memoryBuffer is a fat file with a slice for the current arch, + /// this method will return the offset and size of that slice. + bool sliceFromFatFile(MemoryBufferRef mb, uint32_t &offset, uint32_t &size); + + /// Returns if a command line option specified dylib is an upward link. + bool isUpwardDylib(StringRef installName) const; + + static bool isThinObjectFile(StringRef path, Arch &arch); + static Arch archFromCpuType(uint32_t cputype, uint32_t cpusubtype); + static Arch archFromName(StringRef archName); + static StringRef nameFromArch(Arch arch); + static uint32_t cpuTypeFromArch(Arch arch); + static uint32_t cpuSubtypeFromArch(Arch arch); + static bool is64Bit(Arch arch); + static bool isHostEndian(Arch arch); + static bool isBigEndian(Arch arch); + + /// Construct 32-bit value from string "X.Y.Z" where + /// bits are xxxx.yy.zz. Largest number is 65535.255.255 + static bool parsePackedVersion(StringRef str, uint32_t &result); + + /// Construct 64-bit value from string "A.B.C.D.E" where + /// bits are aaaa.bb.cc.dd.ee. Largest number is 16777215.1023.1023.1023.1023 + static bool parsePackedVersion(StringRef str, uint64_t &result); + + void finalizeInputFiles() override; + + llvm::Error handleLoadedFile(File &file) override; + + bool customAtomOrderer(const DefinedAtom *left, const DefinedAtom *right, + bool &leftBeforeRight) const; + + /// Return the 'flat namespace' file. This is the file that supplies + /// atoms for otherwise undefined symbols when the -flat_namespace or + /// -undefined dynamic_lookup options are used. + File* flatNamespaceFile() const { return _flatNamespaceFile; } + +private: + Writer &writer() const override; + mach_o::MachODylibFile* loadIndirectDylib(StringRef path); + struct ArchInfo { + StringRef archName; + MachOLinkingContext::Arch arch; + bool littleEndian; + uint32_t cputype; + uint32_t cpusubtype; + }; + + struct SectionAlign { + StringRef segmentName; + StringRef sectionName; + uint16_t align; + }; + + struct OrderFileNode { + StringRef fileFilter; + unsigned order; + }; + + static bool findOrderOrdinal(const std::vector &nodes, + const DefinedAtom *atom, unsigned &ordinal); + + static ArchInfo _s_archInfos[]; + + std::set _existingPaths; // For testing only. + StringRefVector _searchDirs; + StringRefVector _syslibRoots; + StringRefVector _frameworkDirs; + HeaderFileType _outputMachOType = llvm::MachO::MH_EXECUTE; + bool _outputMachOTypeStatic = false; // Disambiguate static vs dynamic prog + bool _doNothing = false; // for -help and -v which just print info + bool _pie = false; + Arch _arch = arch_unknown; + OS _os = OS::macOSX; + uint32_t _osMinVersion = 0; + uint32_t _sdkVersion = 0; + uint64_t _sourceVersion = 0; + uint64_t _pageZeroSize = 0; + uint64_t _pageSize = 4096; + uint64_t _baseAddress = 0; + uint64_t _stackSize = 0; + uint32_t _compatibilityVersion = 0; + uint32_t _currentVersion = 0; + ObjCConstraint _objcConstraint = objc_unknown; + uint32_t _swiftVersion = 0; + StringRef _installName; + StringRefVector _rpaths; + bool _flatNamespace = false; + UndefinedMode _undefinedMode = UndefinedMode::error; + bool _deadStrippableDylib = false; + bool _printAtoms = false; + bool _testingFileUsage = false; + bool _keepPrivateExterns = false; + bool _demangle = false; + bool _mergeObjCCategories = true; + bool _generateVersionLoadCommand = false; + bool _generateFunctionStartsLoadCommand = false; + bool _generateDataInCodeLoadCommand = false; + StringRef _bundleLoader; + mutable std::unique_ptr _archHandler; + mutable std::unique_ptr _writer; + std::vector _sectAligns; + mutable llvm::StringMap _pathToDylibMap; + mutable std::vector _allDylibs; + mutable std::set _upwardDylibs; + mutable std::vector> _indirectDylibs; + mutable std::mutex _dylibsMutex; + ExportMode _exportMode = ExportMode::globals; + llvm::StringSet<> _exportedSymbols; + DebugInfoMode _debugInfoMode = DebugInfoMode::addDebugMap; + std::unique_ptr _dependencyInfo; + llvm::StringMap> _orderFiles; + unsigned _orderFileEntries = 0; + File *_flatNamespaceFile = nullptr; + mach_o::SectCreateFile *_sectCreateFile = nullptr; +}; + +} // end namespace lld + +#endif // LLD_READER_WRITER_MACHO_LINKING_CONTEXT_H diff --git a/lld/include/lld/ReaderWriter/YamlContext.h b/lld/include/lld/ReaderWriter/YamlContext.h new file mode 100644 index 000000000000..dc133e3627de --- /dev/null +++ b/lld/include/lld/ReaderWriter/YamlContext.h @@ -0,0 +1,42 @@ +//===- lld/ReaderWriter/YamlContext.h - object used in YAML I/O context ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_YAML_CONTEXT_H +#define LLD_READER_WRITER_YAML_CONTEXT_H + +#include "lld/Common/LLVM.h" +#include +#include +#include + +namespace lld { +class File; +class LinkingContext; +class Registry; +namespace mach_o { +namespace normalized { +struct NormalizedFile; +} +} + +using lld::mach_o::normalized::NormalizedFile; + +/// When YAML I/O is used in lld, the yaml context always holds a YamlContext +/// object. We need to support hetergenous yaml documents which each require +/// different context info. This struct supports all clients. +struct YamlContext { + const LinkingContext *_ctx = nullptr; + const Registry *_registry = nullptr; + File *_file = nullptr; + NormalizedFile *_normalizeMachOFile = nullptr; + StringRef _path; +}; + +} // end namespace lld + +#endif // LLD_READER_WRITER_YAML_CONTEXT_H diff --git a/lld/lib/CMakeLists.txt b/lld/lib/CMakeLists.txt new file mode 100644 index 000000000000..8884efcfe9ba --- /dev/null +++ b/lld/lib/CMakeLists.txt @@ -0,0 +1,3 @@ +add_subdirectory(Core) +add_subdirectory(Driver) +add_subdirectory(ReaderWriter) diff --git a/lld/lib/Core/CMakeLists.txt b/lld/lib/Core/CMakeLists.txt new file mode 100644 index 000000000000..d5e507536b72 --- /dev/null +++ b/lld/lib/Core/CMakeLists.txt @@ -0,0 +1,24 @@ +add_lld_library(lldCore + DefinedAtom.cpp + Error.cpp + File.cpp + LinkingContext.cpp + Reader.cpp + Resolver.cpp + SymbolTable.cpp + Writer.cpp + + ADDITIONAL_HEADER_DIRS + ${LLD_INCLUDE_DIR}/lld/Core + + LINK_COMPONENTS + BinaryFormat + MC + Support + + LINK_LIBS + ${LLVM_PTHREAD_LIB} + + DEPENDS + intrinsics_gen + ) diff --git a/lld/lib/Core/DefinedAtom.cpp b/lld/lib/Core/DefinedAtom.cpp new file mode 100644 index 000000000000..3c1eece16841 --- /dev/null +++ b/lld/lib/Core/DefinedAtom.cpp @@ -0,0 +1,81 @@ +//===- DefinedAtom.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ErrorHandling.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" + +namespace lld { + +DefinedAtom::ContentPermissions DefinedAtom::permissions() const { + // By default base permissions on content type. + return permissions(this->contentType()); +} + +// Utility function for deriving permissions from content type +DefinedAtom::ContentPermissions DefinedAtom::permissions(ContentType type) { + switch (type) { + case typeCode: + case typeResolver: + case typeBranchIsland: + case typeBranchShim: + case typeStub: + case typeStubHelper: + case typeMachHeader: + return permR_X; + + case typeConstant: + case typeCString: + case typeUTF16String: + case typeCFI: + case typeLSDA: + case typeLiteral4: + case typeLiteral8: + case typeLiteral16: + case typeDTraceDOF: + case typeCompactUnwindInfo: + case typeProcessedUnwindInfo: + case typeObjCImageInfo: + case typeObjCMethodList: + return permR__; + + case typeData: + case typeDataFast: + case typeZeroFill: + case typeZeroFillFast: + case typeObjC1Class: + case typeLazyPointer: + case typeLazyDylibPointer: + case typeNonLazyPointer: + case typeThunkTLV: + return permRW_; + + case typeGOT: + case typeConstData: + case typeCFString: + case typeInitializerPtr: + case typeTerminatorPtr: + case typeCStringPtr: + case typeObjCClassPtr: + case typeObjC2CategoryList: + case typeInterposingTuples: + case typeTLVInitialData: + case typeTLVInitialZeroFill: + case typeTLVInitializerPtr: + return permRW_L; + + case typeUnknown: + case typeTempLTO: + case typeSectCreate: + case typeDSOHandle: + return permUnknown; + } + llvm_unreachable("unknown content type"); +} + +} // namespace diff --git a/lld/lib/Core/Error.cpp b/lld/lib/Core/Error.cpp new file mode 100644 index 000000000000..a4f4b1b8af48 --- /dev/null +++ b/lld/lib/Core/Error.cpp @@ -0,0 +1,93 @@ +//===- Error.cpp - system_error extensions for lld --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Error.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include +#include +#include + +using namespace lld; + +namespace { +class _YamlReaderErrorCategory : public std::error_category { +public: + const char* name() const noexcept override { + return "lld.yaml.reader"; + } + + std::string message(int ev) const override { + switch (static_cast(ev)) { + case YamlReaderError::unknown_keyword: + return "Unknown keyword found in yaml file"; + case YamlReaderError::illegal_value: + return "Bad value found in yaml file"; + } + llvm_unreachable("An enumerator of YamlReaderError does not have a " + "message defined."); + } +}; +} // end anonymous namespace + +const std::error_category &lld::YamlReaderCategory() { + static _YamlReaderErrorCategory o; + return o; +} + +namespace lld { + +/// Temporary class to enable make_dynamic_error_code() until +/// llvm::ErrorOr<> is updated to work with error encapsulations +/// other than error_code. +class dynamic_error_category : public std::error_category { +public: + ~dynamic_error_category() override = default; + + const char *name() const noexcept override { + return "lld.dynamic_error"; + } + + std::string message(int ev) const override { + assert(ev >= 0); + assert(ev < (int)_messages.size()); + // The value is an index into the string vector. + return _messages[ev]; + } + + int add(std::string msg) { + std::lock_guard lock(_mutex); + // Value zero is always the success value. + if (_messages.empty()) + _messages.push_back("Success"); + _messages.push_back(msg); + // Return the index of the string just appended. + return _messages.size() - 1; + } + +private: + std::vector _messages; + std::recursive_mutex _mutex; +}; + +static dynamic_error_category categorySingleton; + +std::error_code make_dynamic_error_code(StringRef msg) { + return std::error_code(categorySingleton.add(std::string(msg)), + categorySingleton); +} + +char GenericError::ID = 0; + +GenericError::GenericError(Twine Msg) : Msg(Msg.str()) { } + +void GenericError::log(raw_ostream &OS) const { + OS << Msg; +} + +} // namespace lld diff --git a/lld/lib/Core/File.cpp b/lld/lib/Core/File.cpp new file mode 100644 index 000000000000..ce33923c136e --- /dev/null +++ b/lld/lib/Core/File.cpp @@ -0,0 +1,28 @@ +//===- Core/File.cpp - A Container of Atoms -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/File.h" +#include + +namespace lld { + +File::~File() = default; + +File::AtomVector File::_noDefinedAtoms; +File::AtomVector File::_noUndefinedAtoms; +File::AtomVector File::_noSharedLibraryAtoms; +File::AtomVector File::_noAbsoluteAtoms; + +std::error_code File::parse() { + std::lock_guard lock(_parseMutex); + if (!_lastError.hasValue()) + _lastError = doParse(); + return _lastError.getValue(); +} + +} // end namespace lld diff --git a/lld/lib/Core/LinkingContext.cpp b/lld/lib/Core/LinkingContext.cpp new file mode 100644 index 000000000000..911ae606678d --- /dev/null +++ b/lld/lib/Core/LinkingContext.cpp @@ -0,0 +1,69 @@ +//===- lib/Core/LinkingContext.cpp - Linker Context Object Interface ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/LinkingContext.h" +#include "lld/Core/File.h" +#include "lld/Core/Node.h" +#include "lld/Core/Simple.h" +#include "lld/Core/Writer.h" +#include + +namespace lld { + +LinkingContext::LinkingContext() = default; + +LinkingContext::~LinkingContext() = default; + +bool LinkingContext::validate() { + return validateImpl(); +} + +llvm::Error LinkingContext::writeFile(const File &linkedFile) const { + return this->writer().writeFile(linkedFile, _outputPath); +} + +std::unique_ptr LinkingContext::createEntrySymbolFile() const { + return createEntrySymbolFile(""); +} + +std::unique_ptr +LinkingContext::createEntrySymbolFile(StringRef filename) const { + if (entrySymbolName().empty()) + return nullptr; + std::unique_ptr entryFile(new SimpleFile(filename, + File::kindEntryObject)); + entryFile->addAtom( + *(new (_allocator) SimpleUndefinedAtom(*entryFile, entrySymbolName()))); + return std::move(entryFile); +} + +std::unique_ptr LinkingContext::createUndefinedSymbolFile() const { + return createUndefinedSymbolFile(""); +} + +std::unique_ptr +LinkingContext::createUndefinedSymbolFile(StringRef filename) const { + if (_initialUndefinedSymbols.empty()) + return nullptr; + std::unique_ptr undefinedSymFile( + new SimpleFile(filename, File::kindUndefinedSymsObject)); + for (StringRef undefSym : _initialUndefinedSymbols) + undefinedSymFile->addAtom(*(new (_allocator) SimpleUndefinedAtom( + *undefinedSymFile, undefSym))); + return std::move(undefinedSymFile); +} + +void LinkingContext::createInternalFiles( + std::vector> &result) const { + if (std::unique_ptr file = createEntrySymbolFile()) + result.push_back(std::move(file)); + if (std::unique_ptr file = createUndefinedSymbolFile()) + result.push_back(std::move(file)); +} + +} // end namespace lld diff --git a/lld/lib/Core/Reader.cpp b/lld/lib/Core/Reader.cpp new file mode 100644 index 000000000000..3592d87ce627 --- /dev/null +++ b/lld/lib/Core/Reader.cpp @@ -0,0 +1,113 @@ +//===- lib/Core/Reader.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Reader.h" +#include "lld/Core/File.h" +#include "lld/Core/Reference.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include +#include + +using llvm::file_magic; +using llvm::identify_magic; + +namespace lld { + +YamlIOTaggedDocumentHandler::~YamlIOTaggedDocumentHandler() = default; + +void Registry::add(std::unique_ptr reader) { + _readers.push_back(std::move(reader)); +} + +void Registry::add(std::unique_ptr handler) { + _yamlHandlers.push_back(std::move(handler)); +} + +ErrorOr> +Registry::loadFile(std::unique_ptr mb) const { + // Get file magic. + StringRef content(mb->getBufferStart(), mb->getBufferSize()); + file_magic fileType = identify_magic(content); + + // Ask each registered reader if it can handle this file type or extension. + for (const std::unique_ptr &reader : _readers) { + if (!reader->canParse(fileType, mb->getMemBufferRef())) + continue; + return reader->loadFile(std::move(mb), *this); + } + + // No Reader could parse this file. + return make_error_code(llvm::errc::executable_format_error); +} + +static const Registry::KindStrings kindStrings[] = { + {Reference::kindLayoutAfter, "layout-after"}, + {Reference::kindAssociate, "associate"}, + LLD_KIND_STRING_END}; + +Registry::Registry() { + addKindTable(Reference::KindNamespace::all, Reference::KindArch::all, + kindStrings); +} + +bool Registry::handleTaggedDoc(llvm::yaml::IO &io, + const lld::File *&file) const { + for (const std::unique_ptr &h : _yamlHandlers) + if (h->handledDocTag(io, file)) + return true; + return false; +} + +void Registry::addKindTable(Reference::KindNamespace ns, + Reference::KindArch arch, + const KindStrings array[]) { + KindEntry entry = { ns, arch, array }; + _kindEntries.push_back(entry); +} + +bool Registry::referenceKindFromString(StringRef inputStr, + Reference::KindNamespace &ns, + Reference::KindArch &arch, + Reference::KindValue &value) const { + for (const KindEntry &entry : _kindEntries) { + for (const KindStrings *pair = entry.array; !pair->name.empty(); ++pair) { + if (!inputStr.equals(pair->name)) + continue; + ns = entry.ns; + arch = entry.arch; + value = pair->value; + return true; + } + } + return false; +} + +bool Registry::referenceKindToString(Reference::KindNamespace ns, + Reference::KindArch arch, + Reference::KindValue value, + StringRef &str) const { + for (const KindEntry &entry : _kindEntries) { + if (entry.ns != ns) + continue; + if (entry.arch != arch) + continue; + for (const KindStrings *pair = entry.array; !pair->name.empty(); ++pair) { + if (pair->value != value) + continue; + str = pair->name; + return true; + } + } + return false; +} + +} // end namespace lld diff --git a/lld/lib/Core/Resolver.cpp b/lld/lib/Core/Resolver.cpp new file mode 100644 index 000000000000..1ed0b1c6e618 --- /dev/null +++ b/lld/lib/Core/Resolver.cpp @@ -0,0 +1,496 @@ +//===- Core/Resolver.cpp - Resolves Atom References -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Resolver.h" +#include "lld/Common/LLVM.h" +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/Atom.h" +#include "lld/Core/File.h" +#include "lld/Core/Instrumentation.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/Core/SymbolTable.h" +#include "lld/Core/UndefinedAtom.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + +namespace lld { + +llvm::Expected Resolver::handleFile(File &file) { + if (auto ec = _ctx.handleLoadedFile(file)) + return std::move(ec); + bool undefAdded = false; + for (auto &atom : file.defined().owning_ptrs()) + doDefinedAtom(std::move(atom)); + for (auto &atom : file.undefined().owning_ptrs()) { + if (doUndefinedAtom(std::move(atom))) + undefAdded = true; + } + for (auto &atom : file.sharedLibrary().owning_ptrs()) + doSharedLibraryAtom(std::move(atom)); + for (auto &atom : file.absolute().owning_ptrs()) + doAbsoluteAtom(std::move(atom)); + return undefAdded; +} + +llvm::Expected Resolver::forEachUndefines(File &file, + UndefCallback callback) { + size_t i = _undefineIndex[&file]; + bool undefAdded = false; + do { + for (; i < _undefines.size(); ++i) { + StringRef undefName = _undefines[i]; + if (undefName.empty()) + continue; + const Atom *atom = _symbolTable.findByName(undefName); + if (!isa(atom) || _symbolTable.isCoalescedAway(atom)) { + // The symbol was resolved by some other file. Cache the result. + _undefines[i] = ""; + continue; + } + auto undefAddedOrError = callback(undefName); + if (auto ec = undefAddedOrError.takeError()) + return std::move(ec); + undefAdded |= undefAddedOrError.get(); + } + } while (i < _undefines.size()); + _undefineIndex[&file] = i; + return undefAdded; +} + +llvm::Expected Resolver::handleArchiveFile(File &file) { + ArchiveLibraryFile *archiveFile = cast(&file); + return forEachUndefines(file, + [&](StringRef undefName) -> llvm::Expected { + if (File *member = archiveFile->find(undefName)) { + member->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + return handleFile(*member); + } + return false; + }); +} + +llvm::Error Resolver::handleSharedLibrary(File &file) { + // Add all the atoms from the shared library + SharedLibraryFile *sharedLibrary = cast(&file); + auto undefAddedOrError = handleFile(*sharedLibrary); + if (auto ec = undefAddedOrError.takeError()) + return ec; + undefAddedOrError = + forEachUndefines(file, [&](StringRef undefName) -> llvm::Expected { + auto atom = sharedLibrary->exports(undefName); + if (atom.get()) + doSharedLibraryAtom(std::move(atom)); + return false; + }); + + if (auto ec = undefAddedOrError.takeError()) + return ec; + return llvm::Error::success(); +} + +bool Resolver::doUndefinedAtom(OwningAtomPtr atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " UndefinedAtom: " + << llvm::format("0x%09lX", atom.get()) + << ", name=" << atom.get()->name() << "\n"); + + // tell symbol table + bool newUndefAdded = _symbolTable.add(*atom.get()); + if (newUndefAdded) + _undefines.push_back(atom.get()->name()); + + // add to list of known atoms + _atoms.push_back(OwningAtomPtr(atom.release())); + + return newUndefAdded; +} + +// Called on each atom when a file is added. Returns true if a given +// atom is added to the symbol table. +void Resolver::doDefinedAtom(OwningAtomPtr atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " DefinedAtom: " + << llvm::format("0x%09lX", atom.get()) + << ", file=#" + << atom.get()->file().ordinal() + << ", atom=#" + << atom.get()->ordinal() + << ", name=" + << atom.get()->name() + << ", type=" + << atom.get()->contentType() + << "\n"); + + // An atom that should never be dead-stripped is a dead-strip root. + if (_ctx.deadStrip() && + atom.get()->deadStrip() == DefinedAtom::deadStripNever) { + _deadStripRoots.insert(atom.get()); + } + + // add to list of known atoms + _symbolTable.add(*atom.get()); + _atoms.push_back(OwningAtomPtr(atom.release())); +} + +void Resolver::doSharedLibraryAtom(OwningAtomPtr atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " SharedLibraryAtom: " + << llvm::format("0x%09lX", atom.get()) + << ", name=" + << atom.get()->name() + << "\n"); + + // tell symbol table + _symbolTable.add(*atom.get()); + + // add to list of known atoms + _atoms.push_back(OwningAtomPtr(atom.release())); +} + +void Resolver::doAbsoluteAtom(OwningAtomPtr atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " AbsoluteAtom: " + << llvm::format("0x%09lX", atom.get()) + << ", name=" + << atom.get()->name() + << "\n"); + + // tell symbol table + if (atom.get()->scope() != Atom::scopeTranslationUnit) + _symbolTable.add(*atom.get()); + + // add to list of known atoms + _atoms.push_back(OwningAtomPtr(atom.release())); +} + +// Returns true if at least one of N previous files has created an +// undefined symbol. +bool Resolver::undefinesAdded(int begin, int end) { + std::vector> &inputs = _ctx.getNodes(); + for (int i = begin; i < end; ++i) + if (FileNode *node = dyn_cast(inputs[i].get())) + if (_newUndefinesAdded[node->getFile()]) + return true; + return false; +} + +File *Resolver::getFile(int &index) { + std::vector> &inputs = _ctx.getNodes(); + if ((size_t)index >= inputs.size()) + return nullptr; + if (GroupEnd *group = dyn_cast(inputs[index].get())) { + // We are at the end of the current group. If one or more new + // undefined atom has been added in the last groupSize files, we + // reiterate over the files. + int size = group->getSize(); + if (undefinesAdded(index - size, index)) { + index -= size; + return getFile(index); + } + ++index; + return getFile(index); + } + return cast(inputs[index++].get())->getFile(); +} + +// Keep adding atoms until _ctx.getNextFile() returns an error. This +// function is where undefined atoms are resolved. +bool Resolver::resolveUndefines() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Resolving undefines:\n"); + ScopedTask task(getDefaultDomain(), "resolveUndefines"); + int index = 0; + std::set seen; + for (;;) { + bool undefAdded = false; + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "Loading file #" << index << "\n"); + File *file = getFile(index); + if (!file) + return true; + if (std::error_code ec = file->parse()) { + llvm::errs() << "Cannot open " + file->path() << ": " << ec.message() + << "\n"; + return false; + } + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "Loaded file: " << file->path() << "\n"); + switch (file->kind()) { + case File::kindErrorObject: + case File::kindNormalizedObject: + case File::kindMachObject: + case File::kindCEntryObject: + case File::kindHeaderObject: + case File::kindEntryObject: + case File::kindUndefinedSymsObject: + case File::kindStubHelperObject: + case File::kindResolverMergedObject: + case File::kindSectCreateObject: { + // The same file may be visited more than once if the file is + // in --start-group and --end-group. Only library files should + // be processed more than once. + if (seen.count(file)) + break; + seen.insert(file); + assert(!file->hasOrdinal()); + file->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + auto undefAddedOrError = handleFile(*file); + if (auto EC = undefAddedOrError.takeError()) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + llvm::errs() << "Error in " + file->path() << ": "; + logAllUnhandledErrors(std::move(EC), llvm::errs(), std::string()); + return false; + } + undefAdded = undefAddedOrError.get(); + break; + } + case File::kindArchiveLibrary: { + if (!file->hasOrdinal()) + file->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + auto undefAddedOrError = handleArchiveFile(*file); + if (auto EC = undefAddedOrError.takeError()) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + llvm::errs() << "Error in " + file->path() << ": "; + logAllUnhandledErrors(std::move(EC), llvm::errs(), std::string()); + return false; + } + undefAdded = undefAddedOrError.get(); + break; + } + case File::kindSharedLibrary: + if (!file->hasOrdinal()) + file->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + if (auto EC = handleSharedLibrary(*file)) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + llvm::errs() << "Error in " + file->path() << ": "; + logAllUnhandledErrors(std::move(EC), llvm::errs(), std::string()); + return false; + } + break; + } + _newUndefinesAdded[file] = undefAdded; + } +} + +// switch all references to undefined or coalesced away atoms +// to the new defined atom +void Resolver::updateReferences() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Updating references:\n"); + ScopedTask task(getDefaultDomain(), "updateReferences"); + for (const OwningAtomPtr &atom : _atoms) { + if (const DefinedAtom *defAtom = dyn_cast(atom.get())) { + for (const Reference *ref : *defAtom) { + // A reference of type kindAssociate shouldn't be updated. + // Instead, an atom having such reference will be removed + // if the target atom is coalesced away, so that they will + // go away as a group. + if (ref->kindNamespace() == lld::Reference::KindNamespace::all && + ref->kindValue() == lld::Reference::kindAssociate) { + if (_symbolTable.isCoalescedAway(atom.get())) + _deadAtoms.insert(ref->target()); + continue; + } + const Atom *newTarget = _symbolTable.replacement(ref->target()); + const_cast(ref)->setTarget(newTarget); + } + } + } +} + +// For dead code stripping, recursively mark atoms "live" +void Resolver::markLive(const Atom *atom) { + // Mark the atom is live. If it's already marked live, then stop recursion. + auto exists = _liveAtoms.insert(atom); + if (!exists.second) + return; + + // Mark all atoms it references as live + if (const DefinedAtom *defAtom = dyn_cast(atom)) { + for (const Reference *ref : *defAtom) + markLive(ref->target()); + for (auto &p : llvm::make_range(_reverseRef.equal_range(defAtom))) { + const Atom *target = p.second; + markLive(target); + } + } +} + +static bool isBackref(const Reference *ref) { + if (ref->kindNamespace() != lld::Reference::KindNamespace::all) + return false; + return (ref->kindValue() == lld::Reference::kindLayoutAfter); +} + +// remove all atoms not actually used +void Resolver::deadStripOptimize() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Dead stripping unused atoms:\n"); + ScopedTask task(getDefaultDomain(), "deadStripOptimize"); + // only do this optimization with -dead_strip + if (!_ctx.deadStrip()) + return; + + // Some type of references prevent referring atoms to be dead-striped. + // Make a reverse map of such references before traversing the graph. + // While traversing the list of atoms, mark AbsoluteAtoms as live + // in order to avoid reclaim. + for (const OwningAtomPtr &atom : _atoms) { + if (const DefinedAtom *defAtom = dyn_cast(atom.get())) + for (const Reference *ref : *defAtom) + if (isBackref(ref)) + _reverseRef.insert(std::make_pair(ref->target(), atom.get())); + if (const AbsoluteAtom *absAtom = dyn_cast(atom.get())) + markLive(absAtom); + } + + // By default, shared libraries are built with all globals as dead strip roots + if (_ctx.globalsAreDeadStripRoots()) + for (const OwningAtomPtr &atom : _atoms) + if (const DefinedAtom *defAtom = dyn_cast(atom.get())) + if (defAtom->scope() == DefinedAtom::scopeGlobal) + _deadStripRoots.insert(defAtom); + + // Or, use list of names that are dead strip roots. + for (const StringRef &name : _ctx.deadStripRoots()) { + const Atom *symAtom = _symbolTable.findByName(name); + assert(symAtom); + _deadStripRoots.insert(symAtom); + } + + // mark all roots as live, and recursively all atoms they reference + for (const Atom *dsrAtom : _deadStripRoots) + markLive(dsrAtom); + + // now remove all non-live atoms from _atoms + llvm::erase_if(_atoms, [&](OwningAtomPtr &a) { + return _liveAtoms.count(a.get()) == 0; + }); +} + +// error out if some undefines remain +bool Resolver::checkUndefines() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Checking for undefines:\n"); + + // build vector of remaining undefined symbols + std::vector undefinedAtoms = _symbolTable.undefines(); + if (_ctx.deadStrip()) { + // When dead code stripping, we don't care if dead atoms are undefined. + llvm::erase_if(undefinedAtoms, + [&](const Atom *a) { return _liveAtoms.count(a) == 0; }); + } + + if (undefinedAtoms.empty()) + return false; + + // Warn about unresolved symbols. + bool foundUndefines = false; + for (const UndefinedAtom *undef : undefinedAtoms) { + // Skip over a weak symbol. + if (undef->canBeNull() != UndefinedAtom::canBeNullNever) + continue; + + // If this is a library and undefined symbols are allowed on the + // target platform, skip over it. + if (isa(undef->file()) && _ctx.allowShlibUndefines()) + continue; + + // If the undefine is coalesced away, skip over it. + if (_symbolTable.isCoalescedAway(undef)) + continue; + + // Seems like this symbol is undefined. Warn that. + foundUndefines = true; + if (_ctx.printRemainingUndefines()) { + llvm::errs() << "Undefined symbol: " << undef->file().path() << ": " + << _ctx.demangle(undef->name()) << "\n"; + } + } + if (!foundUndefines) + return false; + if (_ctx.printRemainingUndefines()) + llvm::errs() << "symbol(s) not found\n"; + return true; +} + +// Remove from _atoms all coalesced away atoms. +void Resolver::removeCoalescedAwayAtoms() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Removing coalesced away atoms:\n"); + ScopedTask task(getDefaultDomain(), "removeCoalescedAwayAtoms"); + llvm::erase_if(_atoms, [&](OwningAtomPtr &a) { + return _symbolTable.isCoalescedAway(a.get()) || _deadAtoms.count(a.get()); + }); +} + +bool Resolver::resolve() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Resolving atom references:\n"); + if (!resolveUndefines()) + return false; + updateReferences(); + deadStripOptimize(); + if (checkUndefines()) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "Found undefines... "); + if (!_ctx.allowRemainingUndefines()) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "which we don't allow\n"); + return false; + } + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "which we are ok with\n"); + } + removeCoalescedAwayAtoms(); + _result->addAtoms(_atoms); + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "******** Finished resolver\n"); + return true; +} + +void Resolver::MergedFile::addAtoms( + llvm::MutableArrayRef> all) { + ScopedTask task(getDefaultDomain(), "addAtoms"); + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "Resolver final atom list:\n"); + + for (OwningAtomPtr &atom : all) { +#ifndef NDEBUG + if (auto *definedAtom = dyn_cast(atom.get())) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << llvm::format(" 0x%09lX", definedAtom) + << ", file=#" + << definedAtom->file().ordinal() + << ", atom=#" + << definedAtom->ordinal() + << ", name=" + << definedAtom->name() + << ", type=" + << definedAtom->contentType() + << "\n"); + } else { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << llvm::format(" 0x%09lX", atom.get()) + << ", name=" + << atom.get()->name() + << "\n"); + } +#endif + addAtom(*atom.release()); + } +} + +} // namespace lld diff --git a/lld/lib/Core/SymbolTable.cpp b/lld/lib/Core/SymbolTable.cpp new file mode 100644 index 000000000000..3ce9555aa494 --- /dev/null +++ b/lld/lib/Core/SymbolTable.cpp @@ -0,0 +1,284 @@ +//===- Core/SymbolTable.cpp - Main Symbol Table ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/SymbolTable.h" +#include "lld/Common/LLVM.h" +#include "lld/Core/AbsoluteAtom.h" +#include "lld/Core/Atom.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Resolver.h" +#include "lld/Core/SharedLibraryAtom.h" +#include "lld/Core/UndefinedAtom.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + +namespace lld { +bool SymbolTable::add(const UndefinedAtom &atom) { return addByName(atom); } + +bool SymbolTable::add(const SharedLibraryAtom &atom) { return addByName(atom); } + +bool SymbolTable::add(const AbsoluteAtom &atom) { return addByName(atom); } + +bool SymbolTable::add(const DefinedAtom &atom) { + if (!atom.name().empty() && + atom.scope() != DefinedAtom::scopeTranslationUnit) { + // Named atoms cannot be merged by content. + assert(atom.merge() != DefinedAtom::mergeByContent); + // Track named atoms that are not scoped to file (static). + return addByName(atom); + } + if (atom.merge() == DefinedAtom::mergeByContent) { + // Named atoms cannot be merged by content. + assert(atom.name().empty()); + // Currently only read-only constants can be merged. + if (atom.permissions() == DefinedAtom::permR__) + return addByContent(atom); + // TODO: support mergeByContent of data atoms by comparing content & fixups. + } + return false; +} + +enum NameCollisionResolution { + NCR_First, + NCR_Second, + NCR_DupDef, + NCR_DupUndef, + NCR_DupShLib, + NCR_Error +}; + +static NameCollisionResolution cases[4][4] = { + //regular absolute undef sharedLib + { + // first is regular + NCR_DupDef, NCR_Error, NCR_First, NCR_First + }, + { + // first is absolute + NCR_Error, NCR_Error, NCR_First, NCR_First + }, + { + // first is undef + NCR_Second, NCR_Second, NCR_DupUndef, NCR_Second + }, + { + // first is sharedLib + NCR_Second, NCR_Second, NCR_First, NCR_DupShLib + } +}; + +static NameCollisionResolution collide(Atom::Definition first, + Atom::Definition second) { + return cases[first][second]; +} + +enum MergeResolution { + MCR_First, + MCR_Second, + MCR_Largest, + MCR_SameSize, + MCR_Error +}; + +static MergeResolution mergeCases[][6] = { + // no tentative weak weakAddress sameNameAndSize largest + {MCR_Error, MCR_First, MCR_First, MCR_First, MCR_SameSize, MCR_Largest}, // no + {MCR_Second, MCR_Largest, MCR_Second, MCR_Second, MCR_SameSize, MCR_Largest}, // tentative + {MCR_Second, MCR_First, MCR_First, MCR_Second, MCR_SameSize, MCR_Largest}, // weak + {MCR_Second, MCR_First, MCR_First, MCR_First, MCR_SameSize, MCR_Largest}, // weakAddress + {MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize}, // sameSize + {MCR_Largest, MCR_Largest, MCR_Largest, MCR_Largest, MCR_SameSize, MCR_Largest}, // largest +}; + +static MergeResolution mergeSelect(DefinedAtom::Merge first, + DefinedAtom::Merge second) { + assert(first != DefinedAtom::mergeByContent); + assert(second != DefinedAtom::mergeByContent); + return mergeCases[first][second]; +} + +bool SymbolTable::addByName(const Atom &newAtom) { + StringRef name = newAtom.name(); + assert(!name.empty()); + const Atom *existing = findByName(name); + if (existing == nullptr) { + // Name is not in symbol table yet, add it associate with this atom. + _nameTable[name] = &newAtom; + return true; + } + + // Do nothing if the same object is added more than once. + if (existing == &newAtom) + return false; + + // Name is already in symbol table and associated with another atom. + bool useNew = true; + switch (collide(existing->definition(), newAtom.definition())) { + case NCR_First: + useNew = false; + break; + case NCR_Second: + useNew = true; + break; + case NCR_DupDef: { + const auto *existingDef = cast(existing); + const auto *newDef = cast(&newAtom); + switch (mergeSelect(existingDef->merge(), newDef->merge())) { + case MCR_First: + useNew = false; + break; + case MCR_Second: + useNew = true; + break; + case MCR_Largest: { + uint64_t existingSize = existingDef->sectionSize(); + uint64_t newSize = newDef->sectionSize(); + useNew = (newSize >= existingSize); + break; + } + case MCR_SameSize: { + uint64_t existingSize = existingDef->sectionSize(); + uint64_t newSize = newDef->sectionSize(); + if (existingSize == newSize) { + useNew = true; + break; + } + llvm::errs() << "Size mismatch: " << existing->name() << " (" + << existingSize << ") " << newAtom.name() << " (" << newSize + << ")\n"; + LLVM_FALLTHROUGH; + } + case MCR_Error: + llvm::errs() << "Duplicate symbols: " << existing->name() << ":" + << existing->file().path() << " and " << newAtom.name() + << ":" << newAtom.file().path() << "\n"; + llvm::report_fatal_error("duplicate symbol error"); + break; + } + break; + } + case NCR_DupUndef: { + const UndefinedAtom* existingUndef = cast(existing); + const UndefinedAtom* newUndef = cast(&newAtom); + + bool sameCanBeNull = (existingUndef->canBeNull() == newUndef->canBeNull()); + if (sameCanBeNull) + useNew = false; + else + useNew = (newUndef->canBeNull() < existingUndef->canBeNull()); + break; + } + case NCR_DupShLib: { + useNew = false; + break; + } + case NCR_Error: + llvm::errs() << "SymbolTable: error while merging " << name << "\n"; + llvm::report_fatal_error("duplicate symbol error"); + break; + } + + if (useNew) { + // Update name table to use new atom. + _nameTable[name] = &newAtom; + // Add existing atom to replacement table. + _replacedAtoms[existing] = &newAtom; + } else { + // New atom is not being used. Add it to replacement table. + _replacedAtoms[&newAtom] = existing; + } + return false; +} + +unsigned SymbolTable::AtomMappingInfo::getHashValue(const DefinedAtom *atom) { + auto content = atom->rawContent(); + return llvm::hash_combine(atom->size(), + atom->contentType(), + llvm::hash_combine_range(content.begin(), + content.end())); +} + +bool SymbolTable::AtomMappingInfo::isEqual(const DefinedAtom * const l, + const DefinedAtom * const r) { + if (l == r) + return true; + if (l == getEmptyKey() || r == getEmptyKey()) + return false; + if (l == getTombstoneKey() || r == getTombstoneKey()) + return false; + if (l->contentType() != r->contentType()) + return false; + if (l->size() != r->size()) + return false; + if (l->sectionChoice() != r->sectionChoice()) + return false; + if (l->sectionChoice() == DefinedAtom::sectionCustomRequired) { + if (!l->customSectionName().equals(r->customSectionName())) + return false; + } + ArrayRef lc = l->rawContent(); + ArrayRef rc = r->rawContent(); + return memcmp(lc.data(), rc.data(), lc.size()) == 0; +} + +bool SymbolTable::addByContent(const DefinedAtom &newAtom) { + AtomContentSet::iterator pos = _contentTable.find(&newAtom); + if (pos == _contentTable.end()) { + _contentTable.insert(&newAtom); + return true; + } + const Atom* existing = *pos; + // New atom is not being used. Add it to replacement table. + _replacedAtoms[&newAtom] = existing; + return false; +} + +const Atom *SymbolTable::findByName(StringRef sym) { + NameToAtom::iterator pos = _nameTable.find(sym); + if (pos == _nameTable.end()) + return nullptr; + return pos->second; +} + +const Atom *SymbolTable::replacement(const Atom *atom) { + // Find the replacement for a given atom. Atoms in _replacedAtoms + // may be chained, so find the last one. + for (;;) { + AtomToAtom::iterator pos = _replacedAtoms.find(atom); + if (pos == _replacedAtoms.end()) + return atom; + atom = pos->second; + } +} + +bool SymbolTable::isCoalescedAway(const Atom *atom) { + return _replacedAtoms.count(atom) > 0; +} + +std::vector SymbolTable::undefines() { + std::vector ret; + for (auto it : _nameTable) { + const Atom *atom = it.second; + assert(atom != nullptr); + if (const auto *undef = dyn_cast(atom)) + if (_replacedAtoms.count(undef) == 0) + ret.push_back(undef); + } + return ret; +} + +} // namespace lld diff --git a/lld/lib/Core/Writer.cpp b/lld/lib/Core/Writer.cpp new file mode 100644 index 000000000000..12788b187e11 --- /dev/null +++ b/lld/lib/Core/Writer.cpp @@ -0,0 +1,17 @@ +//===- lib/Core/Writer.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Writer.h" + +namespace lld { + +Writer::Writer() = default; + +Writer::~Writer() = default; + +} // end namespace lld diff --git a/lld/lib/Driver/CMakeLists.txt b/lld/lib/Driver/CMakeLists.txt new file mode 100644 index 000000000000..afc0bd1187f8 --- /dev/null +++ b/lld/lib/Driver/CMakeLists.txt @@ -0,0 +1,23 @@ +set(LLVM_TARGET_DEFINITIONS DarwinLdOptions.td) +tablegen(LLVM DarwinLdOptions.inc -gen-opt-parser-defs) +add_public_tablegen_target(DriverOptionsTableGen) + +add_lld_library(lldDriver + DarwinLdDriver.cpp + + ADDITIONAL_HEADER_DIRS + ${LLD_INCLUDE_DIR}/lld/Driver + + LINK_COMPONENTS + Option + Support + + LINK_LIBS + lldCommon + lldCore + lldMachOOld + lldReaderWriter + lldYAML + ) + +add_dependencies(lldDriver DriverOptionsTableGen) diff --git a/lld/lib/Driver/DarwinLdDriver.cpp b/lld/lib/Driver/DarwinLdDriver.cpp new file mode 100644 index 000000000000..21d125726192 --- /dev/null +++ b/lld/lib/Driver/DarwinLdDriver.cpp @@ -0,0 +1,1229 @@ +//===- lib/Driver/DarwinLdDriver.cpp --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// Concrete instance of the Driver for darwin's ld. +/// +//===----------------------------------------------------------------------===// + +#include "lld/Common/Args.h" +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/LLVM.h" +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/Error.h" +#include "lld/Core/File.h" +#include "lld/Core/Instrumentation.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Node.h" +#include "lld/Core/PassManager.h" +#include "lld/Core/Resolver.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include +#include + +using namespace lld; + +namespace { + +// Create enum with OPT_xxx values for each option in DarwinLdOptions.td +enum { + OPT_INVALID = 0, +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELP, META, VALUES) \ + OPT_##ID, +#include "DarwinLdOptions.inc" +#undef OPTION +}; + +// Create prefix string literals used in DarwinLdOptions.td +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "DarwinLdOptions.inc" +#undef PREFIX + +// Create table mapping all options defined in DarwinLdOptions.td +static const llvm::opt::OptTable::Info InfoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + {PREFIX, NAME, HELPTEXT, \ + METAVAR, OPT_##ID, llvm::opt::Option::KIND##Class, \ + PARAM, FLAGS, OPT_##GROUP, \ + OPT_##ALIAS, ALIASARGS, VALUES}, +#include "DarwinLdOptions.inc" +#undef OPTION +}; + +// Create OptTable class for parsing actual command line arguments +class DarwinLdOptTable : public llvm::opt::OptTable { +public: + DarwinLdOptTable() : OptTable(InfoTable) {} +}; + +static std::vector> +makeErrorFile(StringRef path, std::error_code ec) { + std::vector> result; + result.push_back(std::make_unique(path, ec)); + return result; +} + +static std::vector> +parseMemberFiles(std::unique_ptr file) { + std::vector> members; + if (auto *archive = dyn_cast(file.get())) { + if (std::error_code ec = archive->parseAllMembers(members)) + return makeErrorFile(file->path(), ec); + } else { + members.push_back(std::move(file)); + } + return members; +} + +std::vector> loadFile(MachOLinkingContext &ctx, + StringRef path, bool wholeArchive, + bool upwardDylib) { + if (ctx.logInputFiles()) + message(path); + + ErrorOr> mbOrErr = ctx.getMemoryBuffer(path); + if (std::error_code ec = mbOrErr.getError()) + return makeErrorFile(path, ec); + ErrorOr> fileOrErr = + ctx.registry().loadFile(std::move(mbOrErr.get())); + if (std::error_code ec = fileOrErr.getError()) + return makeErrorFile(path, ec); + std::unique_ptr &file = fileOrErr.get(); + + // If file is a dylib, inform LinkingContext about it. + if (SharedLibraryFile *shl = dyn_cast(file.get())) { + if (std::error_code ec = shl->parse()) + return makeErrorFile(path, ec); + ctx.registerDylib(reinterpret_cast(shl), + upwardDylib); + } + if (wholeArchive) + return parseMemberFiles(std::move(file)); + std::vector> files; + files.push_back(std::move(file)); + return files; +} + +} // end anonymous namespace + +// Test may be running on Windows. Canonicalize the path +// separator to '/' to get consistent outputs for tests. +static std::string canonicalizePath(StringRef path) { + char sep = llvm::sys::path::get_separator().front(); + if (sep != '/') { + std::string fixedPath = std::string(path); + std::replace(fixedPath.begin(), fixedPath.end(), sep, '/'); + return fixedPath; + } else { + return std::string(path); + } +} + +static void addFile(StringRef path, MachOLinkingContext &ctx, + bool loadWholeArchive, bool upwardDylib) { + std::vector> files = + loadFile(ctx, path, loadWholeArchive, upwardDylib); + for (std::unique_ptr &file : files) + ctx.getNodes().push_back(std::make_unique(std::move(file))); +} + +// Export lists are one symbol per line. Blank lines are ignored. +// Trailing comments start with #. +static std::error_code parseExportsList(StringRef exportFilePath, + MachOLinkingContext &ctx) { + // Map in export list file. + ErrorOr> mb = + MemoryBuffer::getFileOrSTDIN(exportFilePath); + if (std::error_code ec = mb.getError()) + return ec; + ctx.addInputFileDependency(exportFilePath); + StringRef buffer = mb->get()->getBuffer(); + while (!buffer.empty()) { + // Split off each line in the file. + std::pair lineAndRest = buffer.split('\n'); + StringRef line = lineAndRest.first; + // Ignore trailing # comments. + std::pair symAndComment = line.split('#'); + StringRef sym = symAndComment.first.trim(); + if (!sym.empty()) + ctx.addExportSymbol(sym); + buffer = lineAndRest.second; + } + return std::error_code(); +} + +/// Order files are one symbol per line. Blank lines are ignored. +/// Trailing comments start with #. Symbol names can be prefixed with an +/// architecture name and/or .o leaf name. Examples: +/// _foo +/// bar.o:_bar +/// libfrob.a(bar.o):_bar +/// x86_64:_foo64 +static std::error_code parseOrderFile(StringRef orderFilePath, + MachOLinkingContext &ctx) { + // Map in order file. + ErrorOr> mb = + MemoryBuffer::getFileOrSTDIN(orderFilePath); + if (std::error_code ec = mb.getError()) + return ec; + ctx.addInputFileDependency(orderFilePath); + StringRef buffer = mb->get()->getBuffer(); + while (!buffer.empty()) { + // Split off each line in the file. + std::pair lineAndRest = buffer.split('\n'); + StringRef line = lineAndRest.first; + buffer = lineAndRest.second; + // Ignore trailing # comments. + std::pair symAndComment = line.split('#'); + if (symAndComment.first.empty()) + continue; + StringRef sym = symAndComment.first.trim(); + if (sym.empty()) + continue; + // Check for prefix. + StringRef prefix; + std::pair prefixAndSym = sym.split(':'); + if (!prefixAndSym.second.empty()) { + sym = prefixAndSym.second; + prefix = prefixAndSym.first; + if (!prefix.endswith(".o") && !prefix.endswith(".o)")) { + // If arch name prefix does not match arch being linked, ignore symbol. + if (!ctx.archName().equals(prefix)) + continue; + prefix = ""; + } + } else + sym = prefixAndSym.first; + if (!sym.empty()) { + ctx.appendOrderedSymbol(sym, prefix); + // llvm::errs() << sym << ", prefix=" << prefix << "\n"; + } + } + return std::error_code(); +} + +// +// There are two variants of the -filelist option: +// +// -filelist +// In this variant, the path is to a text file which contains one file path +// per line. There are no comments or trimming of whitespace. +// +// -fileList , +// In this variant, the path is to a text file which contains a partial path +// per line. The prefix is prepended to each partial path. +// +static llvm::Error loadFileList(StringRef fileListPath, + MachOLinkingContext &ctx, bool forceLoad) { + // If there is a comma, split off . + std::pair opt = fileListPath.split(','); + StringRef filePath = opt.first; + StringRef dirName = opt.second; + ctx.addInputFileDependency(filePath); + // Map in file list file. + ErrorOr> mb = + MemoryBuffer::getFileOrSTDIN(filePath); + if (std::error_code ec = mb.getError()) + return llvm::errorCodeToError(ec); + StringRef buffer = mb->get()->getBuffer(); + while (!buffer.empty()) { + // Split off each line in the file. + std::pair lineAndRest = buffer.split('\n'); + StringRef line = lineAndRest.first; + StringRef path; + if (!dirName.empty()) { + // If there is a then prepend dir to each line. + SmallString<256> fullPath; + fullPath.assign(dirName); + llvm::sys::path::append(fullPath, Twine(line)); + path = ctx.copy(fullPath.str()); + } else { + // No use whole line as input file path. + path = ctx.copy(line); + } + if (!ctx.pathExists(path)) { + return llvm::make_error(Twine("File not found '") + + path + + "'"); + } + if (ctx.testingFileUsage()) { + message("Found filelist entry " + canonicalizePath(path)); + } + addFile(path, ctx, forceLoad, false); + buffer = lineAndRest.second; + } + return llvm::Error::success(); +} + +/// Parse number assuming it is base 16, but allow 0x prefix. +static bool parseNumberBase16(StringRef numStr, uint64_t &baseAddress) { + if (numStr.startswith_insensitive("0x")) + numStr = numStr.drop_front(2); + return numStr.getAsInteger(16, baseAddress); +} + +static void parseLLVMOptions(const LinkingContext &ctx) { + // Honor -mllvm + if (!ctx.llvmOptions().empty()) { + unsigned numArgs = ctx.llvmOptions().size(); + auto **args = new const char *[numArgs + 2]; + args[0] = "lld (LLVM option parsing)"; + for (unsigned i = 0; i != numArgs; ++i) + args[i + 1] = ctx.llvmOptions()[i]; + args[numArgs + 1] = nullptr; + llvm::cl::ResetAllOptionOccurrences(); + llvm::cl::ParseCommandLineOptions(numArgs + 1, args); + } +} + +namespace lld { +namespace mach_o { + +bool parse(llvm::ArrayRef args, MachOLinkingContext &ctx) { + // Parse command line options using DarwinLdOptions.td + DarwinLdOptTable table; + unsigned missingIndex; + unsigned missingCount; + llvm::opt::InputArgList parsedArgs = + table.ParseArgs(args.slice(1), missingIndex, missingCount); + if (missingCount) { + error("missing arg value for '" + + Twine(parsedArgs.getArgString(missingIndex)) + "' expected " + + Twine(missingCount) + " argument(s)."); + return false; + } + + for (auto unknownArg : parsedArgs.filtered(OPT_UNKNOWN)) { + warn("ignoring unknown argument: " + + Twine(unknownArg->getAsString(parsedArgs))); + } + + errorHandler().verbose = parsedArgs.hasArg(OPT_v); + errorHandler().errorLimit = args::getInteger(parsedArgs, OPT_error_limit, 20); + + // Figure out output kind ( -dylib, -r, -bundle, -preload, or -static ) + llvm::MachO::HeaderFileType fileType = llvm::MachO::MH_EXECUTE; + bool isStaticExecutable = false; + if (llvm::opt::Arg *kind = parsedArgs.getLastArg( + OPT_dylib, OPT_relocatable, OPT_bundle, OPT_static, OPT_preload)) { + switch (kind->getOption().getID()) { + case OPT_dylib: + fileType = llvm::MachO::MH_DYLIB; + break; + case OPT_relocatable: + fileType = llvm::MachO::MH_OBJECT; + break; + case OPT_bundle: + fileType = llvm::MachO::MH_BUNDLE; + break; + case OPT_static: + fileType = llvm::MachO::MH_EXECUTE; + isStaticExecutable = true; + break; + case OPT_preload: + fileType = llvm::MachO::MH_PRELOAD; + break; + } + } + + // Handle -arch xxx + MachOLinkingContext::Arch arch = MachOLinkingContext::arch_unknown; + if (llvm::opt::Arg *archStr = parsedArgs.getLastArg(OPT_arch)) { + arch = MachOLinkingContext::archFromName(archStr->getValue()); + if (arch == MachOLinkingContext::arch_unknown) { + error("unknown arch named '" + Twine(archStr->getValue()) + "'"); + return false; + } + } + // If no -arch specified, scan input files to find first non-fat .o file. + if (arch == MachOLinkingContext::arch_unknown) { + for (auto &inFile : parsedArgs.filtered(OPT_INPUT)) { + // This is expensive because it opens and maps the file. But that is + // ok because no -arch is rare. + if (MachOLinkingContext::isThinObjectFile(inFile->getValue(), arch)) + break; + } + if (arch == MachOLinkingContext::arch_unknown && + !parsedArgs.getLastArg(OPT_test_file_usage)) { + // If no -arch and no options at all, print usage message. + if (parsedArgs.size() == 0) { + table.printHelp(llvm::outs(), + (std::string(args[0]) + " [options] file...").c_str(), + "LLVM Linker", false); + } else { + error("-arch not specified and could not be inferred"); + } + return false; + } + } + + // Handle -macosx_version_min or -ios_version_min + MachOLinkingContext::OS os = MachOLinkingContext::OS::unknown; + uint32_t minOSVersion = 0; + if (llvm::opt::Arg *minOS = + parsedArgs.getLastArg(OPT_macosx_version_min, OPT_ios_version_min, + OPT_ios_simulator_version_min)) { + switch (minOS->getOption().getID()) { + case OPT_macosx_version_min: + os = MachOLinkingContext::OS::macOSX; + if (MachOLinkingContext::parsePackedVersion(minOS->getValue(), + minOSVersion)) { + error("malformed macosx_version_min value"); + return false; + } + break; + case OPT_ios_version_min: + os = MachOLinkingContext::OS::iOS; + if (MachOLinkingContext::parsePackedVersion(minOS->getValue(), + minOSVersion)) { + error("malformed ios_version_min value"); + return false; + } + break; + case OPT_ios_simulator_version_min: + os = MachOLinkingContext::OS::iOS_simulator; + if (MachOLinkingContext::parsePackedVersion(minOS->getValue(), + minOSVersion)) { + error("malformed ios_simulator_version_min value"); + return false; + } + break; + } + } else { + // No min-os version on command line, check environment variables + } + + // Handle export_dynamic + // FIXME: Should we warn when this applies to something other than a static + // executable or dylib? Those are the only cases where this has an effect. + // Note, this has to come before ctx.configure() so that we get the correct + // value for _globalsAreDeadStripRoots. + bool exportDynamicSymbols = parsedArgs.hasArg(OPT_export_dynamic); + + // Now that there's enough information parsed in, let the linking context + // set up default values. + ctx.configure(fileType, arch, os, minOSVersion, exportDynamicSymbols); + + // Handle -e xxx + if (llvm::opt::Arg *entry = parsedArgs.getLastArg(OPT_entry)) + ctx.setEntrySymbolName(entry->getValue()); + + // Handle -o xxx + if (llvm::opt::Arg *outpath = parsedArgs.getLastArg(OPT_output)) + ctx.setOutputPath(outpath->getValue()); + else + ctx.setOutputPath("a.out"); + + // Handle -image_base XXX and -seg1addr XXXX + if (llvm::opt::Arg *imageBase = parsedArgs.getLastArg(OPT_image_base)) { + uint64_t baseAddress; + if (parseNumberBase16(imageBase->getValue(), baseAddress)) { + error("image_base expects a hex number"); + return false; + } else if (baseAddress < ctx.pageZeroSize()) { + error("image_base overlaps with __PAGEZERO"); + return false; + } else if (baseAddress % ctx.pageSize()) { + error("image_base must be a multiple of page size (0x" + + llvm::utohexstr(ctx.pageSize()) + ")"); + return false; + } + + ctx.setBaseAddress(baseAddress); + } + + // Handle -dead_strip + if (parsedArgs.getLastArg(OPT_dead_strip)) + ctx.setDeadStripping(true); + + bool globalWholeArchive = false; + // Handle -all_load + if (parsedArgs.getLastArg(OPT_all_load)) + globalWholeArchive = true; + + // Handle -install_name + if (llvm::opt::Arg *installName = parsedArgs.getLastArg(OPT_install_name)) + ctx.setInstallName(installName->getValue()); + else + ctx.setInstallName(ctx.outputPath()); + + // Handle -mark_dead_strippable_dylib + if (parsedArgs.getLastArg(OPT_mark_dead_strippable_dylib)) + ctx.setDeadStrippableDylib(true); + + // Handle -compatibility_version and -current_version + if (llvm::opt::Arg *vers = parsedArgs.getLastArg(OPT_compatibility_version)) { + if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) { + error("-compatibility_version can only be used with -dylib"); + return false; + } + uint32_t parsedVers; + if (MachOLinkingContext::parsePackedVersion(vers->getValue(), parsedVers)) { + error("-compatibility_version value is malformed"); + return false; + } + ctx.setCompatibilityVersion(parsedVers); + } + + if (llvm::opt::Arg *vers = parsedArgs.getLastArg(OPT_current_version)) { + if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) { + error("-current_version can only be used with -dylib"); + return false; + } + uint32_t parsedVers; + if (MachOLinkingContext::parsePackedVersion(vers->getValue(), parsedVers)) { + error("-current_version value is malformed"); + return false; + } + ctx.setCurrentVersion(parsedVers); + } + + // Handle -bundle_loader + if (llvm::opt::Arg *loader = parsedArgs.getLastArg(OPT_bundle_loader)) + ctx.setBundleLoader(loader->getValue()); + + // Handle -sectalign segname sectname align + for (auto &alignArg : parsedArgs.filtered(OPT_sectalign)) { + const char* segName = alignArg->getValue(0); + const char* sectName = alignArg->getValue(1); + const char* alignStr = alignArg->getValue(2); + if ((alignStr[0] == '0') && (alignStr[1] == 'x')) + alignStr += 2; + unsigned long long alignValue; + if (llvm::getAsUnsignedInteger(alignStr, 16, alignValue)) { + error("-sectalign alignment value '" + Twine(alignStr) + + "' not a valid number"); + return false; + } + uint16_t align = 1 << llvm::countTrailingZeros(alignValue); + if (!llvm::isPowerOf2_64(alignValue)) { + std::string Msg; + llvm::raw_string_ostream OS(Msg); + OS << "alignment for '-sectalign " << segName << " " << sectName + << llvm::format(" 0x%llX", alignValue) + << "' is not a power of two, using " << llvm::format("0x%08X", align); + OS.flush(); + warn(Msg); + } + ctx.addSectionAlignment(segName, sectName, align); + } + + // Handle -mllvm + for (auto &llvmArg : parsedArgs.filtered(OPT_mllvm)) { + ctx.appendLLVMOption(llvmArg->getValue()); + } + + // Handle -print_atoms + if (parsedArgs.getLastArg(OPT_print_atoms)) + ctx.setPrintAtoms(); + + // Handle -t (trace) option. + if (parsedArgs.getLastArg(OPT_t)) + ctx.setLogInputFiles(true); + + // Handle -demangle option. + if (parsedArgs.getLastArg(OPT_demangle)) + ctx.setDemangleSymbols(true); + + // Handle -keep_private_externs + if (parsedArgs.getLastArg(OPT_keep_private_externs)) { + ctx.setKeepPrivateExterns(true); + if (ctx.outputMachOType() != llvm::MachO::MH_OBJECT) + warn("-keep_private_externs only used in -r mode"); + } + + // Handle -dependency_info used by Xcode. + if (llvm::opt::Arg *depInfo = parsedArgs.getLastArg(OPT_dependency_info)) + if (std::error_code ec = ctx.createDependencyFile(depInfo->getValue())) + warn(ec.message() + ", processing '-dependency_info " + + depInfo->getValue()); + + // In -test_file_usage mode, we'll be given an explicit list of paths that + // exist. We'll also be expected to print out information about how we located + // libraries and so on that the user specified, but not to actually do any + // linking. + if (parsedArgs.getLastArg(OPT_test_file_usage)) { + ctx.setTestingFileUsage(); + + // With paths existing by fiat, linking is not going to end well. + ctx.setDoNothing(true); + + // Only bother looking for an existence override if we're going to use it. + for (auto existingPath : parsedArgs.filtered(OPT_path_exists)) { + ctx.addExistingPathForDebug(existingPath->getValue()); + } + } + + // Register possible input file parsers. + if (!ctx.doNothing()) { + ctx.registry().addSupportMachOObjects(ctx); + ctx.registry().addSupportArchives(ctx.logInputFiles()); + ctx.registry().addSupportYamlFiles(); + } + + // Now construct the set of library search directories, following ld64's + // baroque set of accumulated hacks. Mostly, the algorithm constructs + // { syslibroots } x { libpaths } + // + // Unfortunately, there are numerous exceptions: + // 1. Only absolute paths get modified by syslibroot options. + // 2. If there is just 1 -syslibroot, system paths not found in it are + // skipped. + // 3. If the last -syslibroot is "/", all of them are ignored entirely. + // 4. If { syslibroots } x path == {}, the original path is kept. + std::vector sysLibRoots; + for (auto syslibRoot : parsedArgs.filtered(OPT_syslibroot)) { + sysLibRoots.push_back(syslibRoot->getValue()); + } + if (!sysLibRoots.empty()) { + // Ignore all if last -syslibroot is "/". + if (sysLibRoots.back() != "/") + ctx.setSysLibRoots(sysLibRoots); + } + + // Paths specified with -L come first, and are not considered system paths for + // the case where there is precisely 1 -syslibroot. + for (auto libPath : parsedArgs.filtered(OPT_L)) { + ctx.addModifiedSearchDir(libPath->getValue()); + } + + // Process -F directories (where to look for frameworks). + for (auto fwPath : parsedArgs.filtered(OPT_F)) { + ctx.addFrameworkSearchDir(fwPath->getValue()); + } + + // -Z suppresses the standard search paths. + if (!parsedArgs.hasArg(OPT_Z)) { + ctx.addModifiedSearchDir("/usr/lib", true); + ctx.addModifiedSearchDir("/usr/local/lib", true); + ctx.addFrameworkSearchDir("/Library/Frameworks", true); + ctx.addFrameworkSearchDir("/System/Library/Frameworks", true); + } + + // Now that we've constructed the final set of search paths, print out those + // search paths in verbose mode. + if (errorHandler().verbose) { + message("Library search paths:"); + for (auto path : ctx.searchDirs()) { + message(" " + path); + } + message("Framework search paths:"); + for (auto path : ctx.frameworkDirs()) { + message(" " + path); + } + } + + // Handle -exported_symbols_list + for (auto expFile : parsedArgs.filtered(OPT_exported_symbols_list)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::unexported) { + error("-exported_symbols_list cannot be combined with " + "-unexported_symbol[s_list]"); + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::exported); + if (std::error_code ec = parseExportsList(expFile->getValue(), ctx)) { + error(ec.message() + ", processing '-exported_symbols_list " + + expFile->getValue()); + return false; + } + } + + // Handle -exported_symbol + for (auto symbol : parsedArgs.filtered(OPT_exported_symbol)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::unexported) { + error("-exported_symbol cannot be combined with " + "-unexported_symbol[s_list]"); + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::exported); + ctx.addExportSymbol(symbol->getValue()); + } + + // Handle -unexported_symbols_list + for (auto expFile : parsedArgs.filtered(OPT_unexported_symbols_list)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::exported) { + error("-unexported_symbols_list cannot be combined with " + "-exported_symbol[s_list]"); + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::unexported); + if (std::error_code ec = parseExportsList(expFile->getValue(), ctx)) { + error(ec.message() + ", processing '-unexported_symbols_list " + + expFile->getValue()); + return false; + } + } + + // Handle -unexported_symbol + for (auto symbol : parsedArgs.filtered(OPT_unexported_symbol)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::exported) { + error("-unexported_symbol cannot be combined with " + "-exported_symbol[s_list]"); + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::unexported); + ctx.addExportSymbol(symbol->getValue()); + } + + // Handle obosolete -multi_module and -single_module + if (llvm::opt::Arg *mod = + parsedArgs.getLastArg(OPT_multi_module, OPT_single_module)) { + if (mod->getOption().getID() == OPT_multi_module) + warn("-multi_module is obsolete and being ignored"); + else if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) + warn("-single_module being ignored. It is only for use when producing a " + "dylib"); + } + + // Handle obsolete ObjC options: -objc_gc_compaction, -objc_gc, -objc_gc_only + if (parsedArgs.getLastArg(OPT_objc_gc_compaction)) { + error("-objc_gc_compaction is not supported"); + return false; + } + + if (parsedArgs.getLastArg(OPT_objc_gc)) { + error("-objc_gc is not supported"); + return false; + } + + if (parsedArgs.getLastArg(OPT_objc_gc_only)) { + error("-objc_gc_only is not supported"); + return false; + } + + // Handle -pie or -no_pie + if (llvm::opt::Arg *pie = parsedArgs.getLastArg(OPT_pie, OPT_no_pie)) { + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_EXECUTE: + switch (ctx.os()) { + case MachOLinkingContext::OS::macOSX: + if ((minOSVersion < 0x000A0500) && + (pie->getOption().getID() == OPT_pie)) { + error("-pie can only be used when targeting Mac OS X 10.5 or later"); + return false; + } + break; + case MachOLinkingContext::OS::iOS: + if ((minOSVersion < 0x00040200) && + (pie->getOption().getID() == OPT_pie)) { + error("-pie can only be used when targeting iOS 4.2 or later"); + return false; + } + break; + case MachOLinkingContext::OS::iOS_simulator: + if (pie->getOption().getID() == OPT_no_pie) { + error("iOS simulator programs must be built PIE"); + return false; + } + break; + case MachOLinkingContext::OS::unknown: + break; + } + ctx.setPIE(pie->getOption().getID() == OPT_pie); + break; + case llvm::MachO::MH_PRELOAD: + break; + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + warn(pie->getSpelling() + + " being ignored. It is only used when linking main executables"); + break; + default: + error(pie->getSpelling() + + " can only used when linking main executables"); + return false; + } + } + + // Handle -version_load_command or -no_version_load_command + { + bool flagOn = false; + bool flagOff = false; + if (auto *arg = parsedArgs.getLastArg(OPT_version_load_command, + OPT_no_version_load_command)) { + flagOn = arg->getOption().getID() == OPT_version_load_command; + flagOff = arg->getOption().getID() == OPT_no_version_load_command; + } + + // default to adding version load command for dynamic code, + // static code must opt-in + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_OBJECT: + ctx.setGenerateVersionLoadCommand(false); + break; + case llvm::MachO::MH_EXECUTE: + // dynamic executables default to generating a version load command, + // while static executables only generate it if required. + if (isStaticExecutable) { + if (flagOn) + ctx.setGenerateVersionLoadCommand(true); + } else { + if (!flagOff) + ctx.setGenerateVersionLoadCommand(true); + } + break; + case llvm::MachO::MH_PRELOAD: + case llvm::MachO::MH_KEXT_BUNDLE: + if (flagOn) + ctx.setGenerateVersionLoadCommand(true); + break; + case llvm::MachO::MH_DYLINKER: + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + if (!flagOff) + ctx.setGenerateVersionLoadCommand(true); + break; + case llvm::MachO::MH_FVMLIB: + case llvm::MachO::MH_DYLDLINK: + case llvm::MachO::MH_DYLIB_STUB: + case llvm::MachO::MH_DSYM: + // We don't generate load commands for these file types, even if + // forced on. + break; + } + } + + // Handle -function_starts or -no_function_starts + { + bool flagOn = false; + bool flagOff = false; + if (auto *arg = parsedArgs.getLastArg(OPT_function_starts, + OPT_no_function_starts)) { + flagOn = arg->getOption().getID() == OPT_function_starts; + flagOff = arg->getOption().getID() == OPT_no_function_starts; + } + + // default to adding functions start for dynamic code, static code must + // opt-in + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_OBJECT: + ctx.setGenerateFunctionStartsLoadCommand(false); + break; + case llvm::MachO::MH_EXECUTE: + // dynamic executables default to generating a version load command, + // while static executables only generate it if required. + if (isStaticExecutable) { + if (flagOn) + ctx.setGenerateFunctionStartsLoadCommand(true); + } else { + if (!flagOff) + ctx.setGenerateFunctionStartsLoadCommand(true); + } + break; + case llvm::MachO::MH_PRELOAD: + case llvm::MachO::MH_KEXT_BUNDLE: + if (flagOn) + ctx.setGenerateFunctionStartsLoadCommand(true); + break; + case llvm::MachO::MH_DYLINKER: + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + if (!flagOff) + ctx.setGenerateFunctionStartsLoadCommand(true); + break; + case llvm::MachO::MH_FVMLIB: + case llvm::MachO::MH_DYLDLINK: + case llvm::MachO::MH_DYLIB_STUB: + case llvm::MachO::MH_DSYM: + // We don't generate load commands for these file types, even if + // forced on. + break; + } + } + + // Handle -data_in_code_info or -no_data_in_code_info + { + bool flagOn = false; + bool flagOff = false; + if (auto *arg = parsedArgs.getLastArg(OPT_data_in_code_info, + OPT_no_data_in_code_info)) { + flagOn = arg->getOption().getID() == OPT_data_in_code_info; + flagOff = arg->getOption().getID() == OPT_no_data_in_code_info; + } + + // default to adding data in code for dynamic code, static code must + // opt-in + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_OBJECT: + if (!flagOff) + ctx.setGenerateDataInCodeLoadCommand(true); + break; + case llvm::MachO::MH_EXECUTE: + // dynamic executables default to generating a version load command, + // while static executables only generate it if required. + if (isStaticExecutable) { + if (flagOn) + ctx.setGenerateDataInCodeLoadCommand(true); + } else { + if (!flagOff) + ctx.setGenerateDataInCodeLoadCommand(true); + } + break; + case llvm::MachO::MH_PRELOAD: + case llvm::MachO::MH_KEXT_BUNDLE: + if (flagOn) + ctx.setGenerateDataInCodeLoadCommand(true); + break; + case llvm::MachO::MH_DYLINKER: + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + if (!flagOff) + ctx.setGenerateDataInCodeLoadCommand(true); + break; + case llvm::MachO::MH_FVMLIB: + case llvm::MachO::MH_DYLDLINK: + case llvm::MachO::MH_DYLIB_STUB: + case llvm::MachO::MH_DSYM: + // We don't generate load commands for these file types, even if + // forced on. + break; + } + } + + // Handle sdk_version + if (llvm::opt::Arg *arg = parsedArgs.getLastArg(OPT_sdk_version)) { + uint32_t sdkVersion = 0; + if (MachOLinkingContext::parsePackedVersion(arg->getValue(), + sdkVersion)) { + error("malformed sdkVersion value"); + return false; + } + ctx.setSdkVersion(sdkVersion); + } else if (ctx.generateVersionLoadCommand()) { + // If we don't have an sdk version, but were going to emit a load command + // with min_version, then we need to give a warning as we have no sdk + // version to put in that command. + // FIXME: We need to decide whether to make this an error. + warn("-sdk_version is required when emitting min version load command. " + "Setting sdk version to match provided min version"); + ctx.setSdkVersion(ctx.osMinVersion()); + } + + // Handle source_version + if (llvm::opt::Arg *arg = parsedArgs.getLastArg(OPT_source_version)) { + uint64_t version = 0; + if (MachOLinkingContext::parsePackedVersion(arg->getValue(), + version)) { + error("malformed source_version value"); + return false; + } + ctx.setSourceVersion(version); + } + + // Handle stack_size + if (llvm::opt::Arg *stackSize = parsedArgs.getLastArg(OPT_stack_size)) { + uint64_t stackSizeVal; + if (parseNumberBase16(stackSize->getValue(), stackSizeVal)) { + error("stack_size expects a hex number"); + return false; + } + if ((stackSizeVal % ctx.pageSize()) != 0) { + error("stack_size must be a multiple of page size (0x" + + llvm::utohexstr(ctx.pageSize()) + ")"); + return false; + } + + ctx.setStackSize(stackSizeVal); + } + + // Handle debug info handling options: -S + if (parsedArgs.hasArg(OPT_S)) + ctx.setDebugInfoMode(MachOLinkingContext::DebugInfoMode::noDebugMap); + + // Handle -order_file + for (auto orderFile : parsedArgs.filtered(OPT_order_file)) { + if (std::error_code ec = parseOrderFile(orderFile->getValue(), ctx)) { + error(ec.message() + ", processing '-order_file " + orderFile->getValue() + + "'"); + return false; + } + } + + // Handle -flat_namespace. + if (llvm::opt::Arg *ns = + parsedArgs.getLastArg(OPT_flat_namespace, OPT_twolevel_namespace)) { + if (ns->getOption().getID() == OPT_flat_namespace) + ctx.setUseFlatNamespace(true); + } + + // Handle -undefined + if (llvm::opt::Arg *undef = parsedArgs.getLastArg(OPT_undefined)) { + MachOLinkingContext::UndefinedMode UndefMode; + if (StringRef(undef->getValue()).equals("error")) + UndefMode = MachOLinkingContext::UndefinedMode::error; + else if (StringRef(undef->getValue()).equals("warning")) + UndefMode = MachOLinkingContext::UndefinedMode::warning; + else if (StringRef(undef->getValue()).equals("suppress")) + UndefMode = MachOLinkingContext::UndefinedMode::suppress; + else if (StringRef(undef->getValue()).equals("dynamic_lookup")) + UndefMode = MachOLinkingContext::UndefinedMode::dynamicLookup; + else { + error("invalid option to -undefined [ warning | error | suppress | " + "dynamic_lookup ]"); + return false; + } + + if (ctx.useFlatNamespace()) { + // If we're using -flat_namespace then 'warning', 'suppress' and + // 'dynamic_lookup' are all equivalent, so map them to 'suppress'. + if (UndefMode != MachOLinkingContext::UndefinedMode::error) + UndefMode = MachOLinkingContext::UndefinedMode::suppress; + } else { + // If we're using -twolevel_namespace then 'warning' and 'suppress' are + // illegal. Emit a diagnostic if they've been (mis)used. + if (UndefMode == MachOLinkingContext::UndefinedMode::warning || + UndefMode == MachOLinkingContext::UndefinedMode::suppress) { + error("can't use -undefined warning or suppress with " + "-twolevel_namespace"); + return false; + } + } + + ctx.setUndefinedMode(UndefMode); + } + + // Handle -no_objc_category_merging. + if (parsedArgs.getLastArg(OPT_no_objc_category_merging)) + ctx.setMergeObjCCategories(false); + + // Handle -rpath + if (parsedArgs.hasArg(OPT_rpath)) { + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_EXECUTE: + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + if (!ctx.minOS("10.5", "2.0")) { + if (ctx.os() == MachOLinkingContext::OS::macOSX) + error("-rpath can only be used when targeting OS X 10.5 or later"); + else + error("-rpath can only be used when targeting iOS 2.0 or later"); + return false; + } + break; + default: + error("-rpath can only be used when creating a dynamic final linked " + "image"); + return false; + } + + for (auto rPath : parsedArgs.filtered(OPT_rpath)) { + ctx.addRpath(rPath->getValue()); + } + } + + // Parse the LLVM options before we process files in case the file handling + // makes use of things like LLVM_DEBUG(). + parseLLVMOptions(ctx); + + // Handle input files and sectcreate. + for (auto &arg : parsedArgs) { + bool upward; + llvm::Optional resolvedPath; + switch (arg->getOption().getID()) { + default: + continue; + case OPT_INPUT: + addFile(arg->getValue(), ctx, globalWholeArchive, false); + break; + case OPT_upward_library: + addFile(arg->getValue(), ctx, false, true); + break; + case OPT_force_load: + addFile(arg->getValue(), ctx, true, false); + break; + case OPT_l: + case OPT_upward_l: + upward = (arg->getOption().getID() == OPT_upward_l); + resolvedPath = ctx.searchLibrary(arg->getValue()); + if (!resolvedPath) { + error("Unable to find library for " + arg->getSpelling() + + arg->getValue()); + return false; + } else if (ctx.testingFileUsage()) { + message(Twine("Found ") + (upward ? "upward " : " ") + "library " + + canonicalizePath(resolvedPath.getValue())); + } + addFile(resolvedPath.getValue(), ctx, globalWholeArchive, upward); + break; + case OPT_framework: + case OPT_upward_framework: + upward = (arg->getOption().getID() == OPT_upward_framework); + resolvedPath = ctx.findPathForFramework(arg->getValue()); + if (!resolvedPath) { + error("Unable to find framework for " + arg->getSpelling() + " " + + arg->getValue()); + return false; + } else if (ctx.testingFileUsage()) { + message(Twine("Found ") + (upward ? "upward " : " ") + "framework " + + canonicalizePath(resolvedPath.getValue())); + } + addFile(resolvedPath.getValue(), ctx, globalWholeArchive, upward); + break; + case OPT_filelist: + if (auto ec = loadFileList(arg->getValue(), ctx, globalWholeArchive)) { + handleAllErrors(std::move(ec), [&](const llvm::ErrorInfoBase &EI) { + error(EI.message() + ", processing '-filelist " + arg->getValue()); + }); + return false; + } + break; + case OPT_sectcreate: { + const char* seg = arg->getValue(0); + const char* sect = arg->getValue(1); + const char* fileName = arg->getValue(2); + + ErrorOr> contentOrErr = + MemoryBuffer::getFile(fileName); + + if (!contentOrErr) { + error("can't open -sectcreate file " + Twine(fileName)); + return false; + } + + ctx.addSectCreateSection(seg, sect, std::move(*contentOrErr)); + } + break; + } + } + + if (ctx.getNodes().empty()) { + error("No input files"); + return false; + } + + // Validate the combination of options used. + return ctx.validate(); +} + +static void createFiles(MachOLinkingContext &ctx, bool Implicit) { + std::vector> Files; + if (Implicit) + ctx.createImplicitFiles(Files); + else + ctx.createInternalFiles(Files); + for (auto i = Files.rbegin(), e = Files.rend(); i != e; ++i) { + auto &members = ctx.getNodes(); + members.insert(members.begin(), std::make_unique(std::move(*i))); + } +} + +/// This is where the link is actually performed. +bool link(llvm::ArrayRef args, bool CanExitEarly, + raw_ostream &StdoutOS, raw_ostream &StderrOS) { + lld::stdoutOS = &StdoutOS; + lld::stderrOS = &StderrOS; + + errorHandler().logName = args::getFilenameWithoutExe(args[0]); + errorHandler().errorLimitExceededMsg = + "too many errors emitted, stopping now (use " + "'-error-limit 0' to see all errors)"; + errorHandler().exitEarly = CanExitEarly; + StderrOS.enable_colors(StderrOS.has_colors()); + + MachOLinkingContext ctx; + if (!parse(args, ctx)) + return false; + if (ctx.doNothing()) + return true; + if (ctx.getNodes().empty()) + return false; + + for (std::unique_ptr &ie : ctx.getNodes()) + if (FileNode *node = dyn_cast(ie.get())) + node->getFile()->parse(); + + createFiles(ctx, false /* Implicit */); + + // Give target a chance to add files + createFiles(ctx, true /* Implicit */); + + // Give target a chance to postprocess input files. + // Mach-O uses this chance to move all object files before library files. + ctx.finalizeInputFiles(); + + // Do core linking. + ScopedTask resolveTask(getDefaultDomain(), "Resolve"); + Resolver resolver(ctx); + if (!resolver.resolve()) + return false; + SimpleFile *merged = nullptr; + { + std::unique_ptr mergedFile = resolver.resultFile(); + merged = mergedFile.get(); + auto &members = ctx.getNodes(); + members.insert(members.begin(), + std::make_unique(std::move(mergedFile))); + } + resolveTask.end(); + + // Run passes on linked atoms. + ScopedTask passTask(getDefaultDomain(), "Passes"); + PassManager pm; + ctx.addPasses(pm); + if (auto ec = pm.runOnFile(*merged)) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + lld::errs() << "Failed to run passes on file '" << ctx.outputPath() + << "': "; + logAllUnhandledErrors(std::move(ec), lld::errs(), std::string()); + return false; + } + + passTask.end(); + + // Give linked atoms to Writer to generate output file. + ScopedTask writeTask(getDefaultDomain(), "Write"); + if (auto ec = ctx.writeFile(*merged)) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + lld::errs() << "Failed to write file '" << ctx.outputPath() << "': "; + logAllUnhandledErrors(std::move(ec), lld::errs(), std::string()); + return false; + } + + // Call exit() if we can to avoid calling destructors. + if (CanExitEarly) + exitLld(errorCount() ? 1 : 0); + + + return true; +} + +} // end namespace mach_o +} // end namespace lld diff --git a/lld/lib/Driver/DarwinLdOptions.td b/lld/lib/Driver/DarwinLdOptions.td new file mode 100644 index 000000000000..3bbde8bf1c1c --- /dev/null +++ b/lld/lib/Driver/DarwinLdOptions.td @@ -0,0 +1,250 @@ +include "llvm/Option/OptParser.td" + + +// output kinds +def grp_kind : OptionGroup<"outs">, HelpText<"OUTPUT KIND">; +def relocatable : Flag<["-"], "r">, + HelpText<"Create relocatable object file">, Group; +def static : Flag<["-"], "static">, + HelpText<"Create static executable">, Group; +def dynamic : Flag<["-"], "dynamic">, + HelpText<"Create dynamic executable (default)">,Group; +def dylib : Flag<["-"], "dylib">, + HelpText<"Create dynamic library">, Group; +def bundle : Flag<["-"], "bundle">, + HelpText<"Create dynamic bundle">, Group; +def execute : Flag<["-"], "execute">, + HelpText<"Create main executable (default)">, Group; +def preload : Flag<["-"], "preload">, + HelpText<"Create binary for use with embedded systems">, Group; + +// optimizations +def grp_opts : OptionGroup<"opts">, HelpText<"OPTIMIZATIONS">; +def dead_strip : Flag<["-"], "dead_strip">, + HelpText<"Remove unreference code and data">, Group; +def macosx_version_min : Separate<["-"], "macosx_version_min">, + MetaVarName<"">, + HelpText<"Minimum Mac OS X version">, Group; +def ios_version_min : Separate<["-"], "ios_version_min">, + MetaVarName<"">, + HelpText<"Minimum iOS version">, Group; +def iphoneos_version_min : Separate<["-"], "iphoneos_version_min">, + Alias; +def ios_simulator_version_min : Separate<["-"], "ios_simulator_version_min">, + MetaVarName<"">, + HelpText<"Minimum iOS simulator version">, Group; +def sdk_version : Separate<["-"], "sdk_version">, + MetaVarName<"">, + HelpText<"SDK version">, Group; +def source_version : Separate<["-"], "source_version">, + MetaVarName<"">, + HelpText<"Source version">, Group; +def version_load_command : Flag<["-"], "version_load_command">, + HelpText<"Force generation of a version load command">, Group; +def no_version_load_command : Flag<["-"], "no_version_load_command">, + HelpText<"Disable generation of a version load command">, Group; +def function_starts : Flag<["-"], "function_starts">, + HelpText<"Force generation of a function starts load command">, + Group; +def no_function_starts : Flag<["-"], "no_function_starts">, + HelpText<"Disable generation of a function starts load command">, + Group; +def data_in_code_info : Flag<["-"], "data_in_code_info">, + HelpText<"Force generation of a data in code load command">, + Group; +def no_data_in_code_info : Flag<["-"], "no_data_in_code_info">, + HelpText<"Disable generation of a data in code load command">, + Group; +def mllvm : Separate<["-"], "mllvm">, + MetaVarName<"">, + HelpText<"Add directory to library search path">, Group; +def F : JoinedOrSeparate<["-"], "F">, + MetaVarName<"">, + HelpText<"Add directory to framework search path">, Group; +def Z : Flag<["-"], "Z">, + HelpText<"Do not search standard directories for libraries or frameworks">; +def all_load : Flag<["-"], "all_load">, + HelpText<"Forces all members of all static libraries to be loaded">, + Group; +def force_load : Separate<["-"], "force_load">, + MetaVarName<"">, + HelpText<"Forces all members of specified static libraries to be loaded">, + Group; +def syslibroot : Separate<["-"], "syslibroot">, MetaVarName<"">, + HelpText<"Add path to SDK to all absolute library search paths">, + Group; + +// Input options +def l : Joined<["-"], "l">, + MetaVarName<"">, + HelpText<"Base name of library searched for in -L directories">; +def upward_l : Joined<["-"], "upward-l">, + MetaVarName<"">, + HelpText<"Base name of upward library searched for in -L directories">; +def framework : Separate<["-"], "framework">, + MetaVarName<"">, + HelpText<"Base name of framework searched for in -F directories">; +def upward_framework : Separate<["-"], "upward_framework">, + MetaVarName<"">, + HelpText<"Base name of upward framework searched for in -F directories">; +def upward_library : Separate<["-"], "upward_library">, + MetaVarName<"">, + HelpText<"path to upward dylib to link with">; +def filelist : Separate<["-"], "filelist">, + MetaVarName<"">, + HelpText<"file containing paths to input files">; + + +// test case options +def print_atoms : Flag<["-"], "print_atoms">, + HelpText<"Emit output as yaml atoms">; +def test_file_usage : Flag<["-"], "test_file_usage">, + HelpText<"Only files specified by -file_exists are considered to exist. " + "Print which files would be used">; +def path_exists : Separate<["-"], "path_exists">, + MetaVarName<"">, + HelpText<"Used with -test_file_usage to declare a path">; + + +// general options +def output : Separate<["-"], "o">, + MetaVarName<"">, + HelpText<"Output file path">; +def arch : Separate<["-"], "arch">, + MetaVarName<"">, + HelpText<"Architecture to link">; +def sectalign : MultiArg<["-"], "sectalign", 3>, + MetaVarName<" ">, + HelpText<"Alignment for segment/section">; +def sectcreate : MultiArg<["-"], "sectcreate", 3>, + MetaVarName<" ">, + HelpText<"Create section / from contents of ">; +def image_base : Separate<["-"], "image_base">; +def seg1addr : Separate<["-"], "seg1addr">, Alias; +def demangle : Flag<["-"], "demangle">, + HelpText<"Demangles symbol names in errors and warnings">; +def dependency_info : Separate<["-"], "dependency_info">, + MetaVarName<"">, + HelpText<"Write binary list of files used during link">; +def S : Flag<["-"], "S">, + HelpText<"Remove debug information (STABS or DWARF) from the output file">; +def rpath : Separate<["-"], "rpath">, + MetaVarName<"">, + HelpText<"Add path to the runpath search path list for image being created">; + +def t : Flag<["-"], "t">, + HelpText<"Print the names of the input files as ld processes them">; +def v : Flag<["-"], "v">, + HelpText<"Print linker information">; +def error_limit : Separate<["-", "--"], "error-limit">, + MetaVarName<"">, + HelpText<"Maximum number of errors to emit before stopping (0 = no limit)">; + +// Ignored options +def lto_library : Separate<["-"], "lto_library">, + MetaVarName<"">, + HelpText<"Ignored for compatibility with other linkers">; + +// Obsolete options +def grp_obsolete : OptionGroup<"obsolete">, HelpText<"OBSOLETE OPTIONS">; +def single_module : Flag<["-"], "single_module">, + HelpText<"Default for dylibs">, Group; +def multi_module : Flag<["-"], "multi_module">, + HelpText<"Unsupported way to build dylibs">, Group; +def objc_gc_compaction : Flag<["-"], "objc_gc_compaction">, + HelpText<"Unsupported ObjC GC option">, Group; +def objc_gc : Flag<["-"], "objc_gc">, + HelpText<"Unsupported ObjC GC option">, Group; +def objc_gc_only : Flag<["-"], "objc_gc_only">, + HelpText<"Unsupported ObjC GC option">, Group; diff --git a/lld/lib/ReaderWriter/CMakeLists.txt b/lld/lib/ReaderWriter/CMakeLists.txt new file mode 100644 index 000000000000..bedb836d2c1e --- /dev/null +++ b/lld/lib/ReaderWriter/CMakeLists.txt @@ -0,0 +1,20 @@ +add_subdirectory(MachO) +add_subdirectory(YAML) + +if (MSVC) + add_definitions(-wd4062) # Suppress 'warning C4062: Enumerator has no associated handler in a switch statement.' +endif() + +add_lld_library(lldReaderWriter + FileArchive.cpp + + ADDITIONAL_HEADER_DIRS + ${LLD_INCLUDE_DIR}/lld/ReaderWriter + + LINK_COMPONENTS + Object + Support + + LINK_LIBS + lldCore + ) diff --git a/lld/lib/ReaderWriter/FileArchive.cpp b/lld/lib/ReaderWriter/FileArchive.cpp new file mode 100644 index 000000000000..98f4d06ee210 --- /dev/null +++ b/lld/lib/ReaderWriter/FileArchive.cpp @@ -0,0 +1,227 @@ +//===- lib/ReaderWriter/FileArchive.cpp -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lld/Common/LLVM.h" +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/File.h" +#include "lld/Core/Reader.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include +#include + +using llvm::object::Archive; +using llvm::file_magic; +using llvm::identify_magic; + +namespace lld { + +namespace { + +/// The FileArchive class represents an Archive Library file +class FileArchive : public lld::ArchiveLibraryFile { +public: + FileArchive(std::unique_ptr mb, const Registry ®, + StringRef path, bool logLoading) + : ArchiveLibraryFile(path), _mb(std::shared_ptr(mb.release())), + _registry(reg), _logLoading(logLoading) {} + + /// Check if any member of the archive contains an Atom with the + /// specified name and return the File object for that member, or nullptr. + File *find(StringRef name) override { + auto member = _symbolMemberMap.find(name); + if (member == _symbolMemberMap.end()) + return nullptr; + Archive::Child c = member->second; + + // Don't return a member already returned + Expected buf = c.getBuffer(); + if (!buf) { + // TODO: Actually report errors helpfully. + consumeError(buf.takeError()); + return nullptr; + } + const char *memberStart = buf->data(); + if (_membersInstantiated.count(memberStart)) + return nullptr; + _membersInstantiated.insert(memberStart); + + std::unique_ptr result; + if (instantiateMember(c, result)) + return nullptr; + + File *file = result.get(); + _filesReturned.push_back(std::move(result)); + + // Give up the file pointer. It was stored and will be destroyed with destruction of FileArchive + return file; + } + + /// parse each member + std::error_code + parseAllMembers(std::vector> &result) override { + if (std::error_code ec = parse()) + return ec; + llvm::Error err = llvm::Error::success(); + for (auto mf = _archive->child_begin(err), me = _archive->child_end(); + mf != me; ++mf) { + std::unique_ptr file; + if (std::error_code ec = instantiateMember(*mf, file)) { + // err is Success (or we wouldn't be in the loop body) but we can't + // return without testing or consuming it. + consumeError(std::move(err)); + return ec; + } + result.push_back(std::move(file)); + } + if (err) + return errorToErrorCode(std::move(err)); + return std::error_code(); + } + + const AtomRange defined() const override { + return _noDefinedAtoms; + } + + const AtomRange undefined() const override { + return _noUndefinedAtoms; + } + + const AtomRange sharedLibrary() const override { + return _noSharedLibraryAtoms; + } + + const AtomRange absolute() const override { + return _noAbsoluteAtoms; + } + + void clearAtoms() override { + _noDefinedAtoms.clear(); + _noUndefinedAtoms.clear(); + _noSharedLibraryAtoms.clear(); + _noAbsoluteAtoms.clear(); + } + +protected: + std::error_code doParse() override { + // Make Archive object which will be owned by FileArchive object. + llvm::Error Err = llvm::Error::success(); + _archive.reset(new Archive(_mb->getMemBufferRef(), Err)); + if (Err) + return errorToErrorCode(std::move(Err)); + std::error_code ec; + if ((ec = buildTableOfContents())) + return ec; + return std::error_code(); + } + +private: + std::error_code instantiateMember(Archive::Child member, + std::unique_ptr &result) const { + Expected mbOrErr = member.getMemoryBufferRef(); + if (!mbOrErr) + return errorToErrorCode(mbOrErr.takeError()); + llvm::MemoryBufferRef mb = mbOrErr.get(); + std::string memberPath = (_archive->getFileName() + "(" + + mb.getBufferIdentifier() + ")").str(); + + if (_logLoading) + llvm::errs() << memberPath << "\n"; + + std::unique_ptr memberMB(MemoryBuffer::getMemBuffer( + mb.getBuffer(), mb.getBufferIdentifier(), false)); + + ErrorOr> fileOrErr = + _registry.loadFile(std::move(memberMB)); + if (std::error_code ec = fileOrErr.getError()) + return ec; + result = std::move(fileOrErr.get()); + if (std::error_code ec = result->parse()) + return ec; + result->setArchivePath(_archive->getFileName()); + + // The memory buffer is co-owned by the archive file and the children, + // so that the bufffer is deallocated when all the members are destructed. + result->setSharedMemoryBuffer(_mb); + return std::error_code(); + } + + std::error_code buildTableOfContents() { + DEBUG_WITH_TYPE("FileArchive", llvm::dbgs() + << "Table of contents for archive '" + << _archive->getFileName() << "':\n"); + for (const Archive::Symbol &sym : _archive->symbols()) { + StringRef name = sym.getName(); + Expected memberOrErr = sym.getMember(); + if (!memberOrErr) + return errorToErrorCode(memberOrErr.takeError()); + Archive::Child member = memberOrErr.get(); + DEBUG_WITH_TYPE("FileArchive", + llvm::dbgs() + << llvm::format("0x%08llX ", + member.getBuffer()->data()) + << "'" << name << "'\n"); + _symbolMemberMap.insert(std::make_pair(name, member)); + } + return std::error_code(); + } + + typedef std::unordered_map MemberMap; + typedef std::set InstantiatedSet; + + std::shared_ptr _mb; + const Registry &_registry; + std::unique_ptr _archive; + MemberMap _symbolMemberMap; + InstantiatedSet _membersInstantiated; + bool _logLoading; + std::vector> _memberBuffers; + std::vector> _filesReturned; +}; + +class ArchiveReader : public Reader { +public: + ArchiveReader(bool logLoading) : _logLoading(logLoading) {} + + bool canParse(file_magic magic, MemoryBufferRef) const override { + return magic == file_magic::archive; + } + + ErrorOr> loadFile(std::unique_ptr mb, + const Registry ®) const override { + StringRef path = mb->getBufferIdentifier(); + std::unique_ptr ret = + std::make_unique(std::move(mb), reg, path, _logLoading); + return std::move(ret); + } + +private: + bool _logLoading; +}; + +} // anonymous namespace + +void Registry::addSupportArchives(bool logLoading) { + add(std::unique_ptr(new ArchiveReader(logLoading))); +} + +} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/ArchHandler.cpp b/lld/lib/ReaderWriter/MachO/ArchHandler.cpp new file mode 100644 index 000000000000..c101f3b157bb --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/ArchHandler.cpp @@ -0,0 +1,171 @@ +//===- lib/FileFormat/MachO/ArchHandler.cpp -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + + +ArchHandler::ArchHandler() { +} + +ArchHandler::~ArchHandler() { +} + +std::unique_ptr ArchHandler::create( + MachOLinkingContext::Arch arch) { + switch (arch) { + case MachOLinkingContext::arch_x86_64: + return create_x86_64(); + case MachOLinkingContext::arch_x86: + return create_x86(); + case MachOLinkingContext::arch_armv6: + case MachOLinkingContext::arch_armv7: + case MachOLinkingContext::arch_armv7s: + return create_arm(); + case MachOLinkingContext::arch_arm64: + return create_arm64(); + default: + llvm_unreachable("Unknown arch"); + } +} + + +bool ArchHandler::isLazyPointer(const Reference &ref) { + // A lazy bind entry is needed for a lazy pointer. + const StubInfo &info = stubInfo(); + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + if (ref.kindArch() != info.lazyPointerReferenceToFinal.arch) + return false; + return (ref.kindValue() == info.lazyPointerReferenceToFinal.kind); +} + + +ArchHandler::RelocPattern ArchHandler::relocPattern(const Relocation &reloc) { + assert((reloc.type & 0xFFF0) == 0); + uint16_t result = reloc.type; + if (reloc.scattered) + result |= rScattered; + if (reloc.pcRel) + result |= rPcRel; + if (reloc.isExtern) + result |= rExtern; + switch(reloc.length) { + case 0: + break; + case 1: + result |= rLength2; + break; + case 2: + result |= rLength4; + break; + case 3: + result |= rLength8; + break; + default: + llvm_unreachable("bad r_length"); + } + return result; +} + +normalized::Relocation +ArchHandler::relocFromPattern(ArchHandler::RelocPattern pattern) { + normalized::Relocation result; + result.offset = 0; + result.scattered = (pattern & rScattered); + result.type = (RelocationInfoType)(pattern & 0xF); + result.pcRel = (pattern & rPcRel); + result.isExtern = (pattern & rExtern); + result.value = 0; + result.symbol = 0; + switch (pattern & 0x300) { + case rLength1: + result.length = 0; + break; + case rLength2: + result.length = 1; + break; + case rLength4: + result.length = 2; + break; + case rLength8: + result.length = 3; + break; + } + return result; +} + +void ArchHandler::appendReloc(normalized::Relocations &relocs, uint32_t offset, + uint32_t symbol, uint32_t value, + RelocPattern pattern) { + normalized::Relocation reloc = relocFromPattern(pattern); + reloc.offset = offset; + reloc.symbol = symbol; + reloc.value = value; + relocs.push_back(reloc); +} + + +int16_t ArchHandler::readS16(const uint8_t *addr, bool isBig) { + return read16(addr, isBig); +} + +int32_t ArchHandler::readS32(const uint8_t *addr, bool isBig) { + return read32(addr, isBig); +} + +uint32_t ArchHandler::readU32(const uint8_t *addr, bool isBig) { + return read32(addr, isBig); +} + + int64_t ArchHandler::readS64(const uint8_t *addr, bool isBig) { + return read64(addr, isBig); +} + +bool ArchHandler::isDwarfCIE(bool isBig, const DefinedAtom *atom) { + assert(atom->contentType() == DefinedAtom::typeCFI); + if (atom->rawContent().size() < sizeof(uint32_t)) + return false; + uint32_t size = read32(atom->rawContent().data(), isBig); + + uint32_t idOffset = sizeof(uint32_t); + if (size == 0xffffffffU) + idOffset += sizeof(uint64_t); + + return read32(atom->rawContent().data() + idOffset, isBig) == 0; +} + +const Atom *ArchHandler::fdeTargetFunction(const DefinedAtom *fde) { + for (auto ref : *fde) { + if (ref->kindNamespace() == Reference::KindNamespace::mach_o && + ref->kindValue() == unwindRefToFunctionKind()) { + assert(ref->kindArch() == kindArch() && "unexpected Reference arch"); + return ref->target(); + } + } + + return nullptr; +} + +} // namespace mach_o +} // namespace lld + + + diff --git a/lld/lib/ReaderWriter/MachO/ArchHandler.h b/lld/lib/ReaderWriter/MachO/ArchHandler.h new file mode 100644 index 000000000000..83646c09b1a8 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/ArchHandler.h @@ -0,0 +1,322 @@ +//===- lib/FileFormat/MachO/ArchHandler.h ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_ARCH_HANDLER_H +#define LLD_READER_WRITER_MACHO_ARCH_HANDLER_H + +#include "Atoms.h" +#include "File.h" +#include "MachONormalizedFile.h" +#include "lld/Common/LLVM.h" +#include "lld/Core/Error.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/Triple.h" + +namespace lld { +namespace mach_o { + +/// +/// The ArchHandler class handles all architecture specific aspects of +/// mach-o linking. +/// +class ArchHandler { +public: + virtual ~ArchHandler(); + + /// There is no public interface to subclasses of ArchHandler, so this + /// is the only way to instantiate an ArchHandler. + static std::unique_ptr create(MachOLinkingContext::Arch arch); + + /// Get (arch specific) kind strings used by Registry. + virtual const Registry::KindStrings *kindStrings() = 0; + + /// Convert mach-o Arch to Reference::KindArch. + virtual Reference::KindArch kindArch() = 0; + + /// Used by StubPass to update References to shared library functions + /// to be references to a stub. + virtual bool isCallSite(const Reference &) = 0; + + /// Used by GOTPass to locate GOT References + virtual bool isGOTAccess(const Reference &, bool &canBypassGOT) { + return false; + } + + /// Used by TLVPass to locate TLV References. + virtual bool isTLVAccess(const Reference &) const { return false; } + + /// Used by the TLVPass to update TLV References. + virtual void updateReferenceToTLV(const Reference *) {} + + /// Used by ShimPass to insert shims in branches that switch mode. + virtual bool isNonCallBranch(const Reference &) = 0; + + /// Used by GOTPass to update GOT References + virtual void updateReferenceToGOT(const Reference *, bool targetIsNowGOT) {} + + /// Does this architecture make use of __unwind_info sections for exception + /// handling? If so, it will need a separate pass to create them. + virtual bool needsCompactUnwind() = 0; + + /// Returns the kind of reference to use to synthesize a 32-bit image-offset + /// value, used in the __unwind_info section. + virtual Reference::KindValue imageOffsetKind() = 0; + + /// Returns the kind of reference to use to synthesize a 32-bit image-offset + /// indirect value. Used for personality functions in the __unwind_info + /// section. + virtual Reference::KindValue imageOffsetKindIndirect() = 0; + + /// Architecture specific compact unwind type that signals __eh_frame should + /// actually be used. + virtual uint32_t dwarfCompactUnwindType() = 0; + + /// Reference from an __eh_frame CIE atom to its personality function it's + /// describing. Usually pointer-sized and PC-relative, but differs in whether + /// it needs to be in relocatable objects. + virtual Reference::KindValue unwindRefToPersonalityFunctionKind() = 0; + + /// Reference from an __eh_frame FDE to the CIE it's based on. + virtual Reference::KindValue unwindRefToCIEKind() = 0; + + /// Reference from an __eh_frame FDE atom to the function it's + /// describing. Usually pointer-sized and PC-relative, but differs in whether + /// it needs to be in relocatable objects. + virtual Reference::KindValue unwindRefToFunctionKind() = 0; + + /// Reference from an __unwind_info entry of dwarfCompactUnwindType to the + /// required __eh_frame entry. On current architectures, the low 24 bits + /// represent the offset of the function's FDE entry from the start of + /// __eh_frame. + virtual Reference::KindValue unwindRefToEhFrameKind() = 0; + + /// Returns a pointer sized reference kind. On 64-bit targets this will + /// likely be something like pointer64, and pointer32 on 32-bit targets. + virtual Reference::KindValue pointerKind() = 0; + + virtual const Atom *fdeTargetFunction(const DefinedAtom *fde); + + /// Used by normalizedFromAtoms() to know where to generated rebasing and + /// binding info in final executables. + virtual bool isPointer(const Reference &) = 0; + + /// Used by normalizedFromAtoms() to know where to generated lazy binding + /// info in final executables. + virtual bool isLazyPointer(const Reference &); + + /// Reference from an __stub_helper entry to the required offset of the + /// lazy bind commands. + virtual Reference::KindValue lazyImmediateLocationKind() = 0; + + /// Returns true if the specified relocation is paired to the next relocation. + virtual bool isPairedReloc(const normalized::Relocation &) = 0; + + /// Prototype for a helper function. Given a sectionIndex and address, + /// finds the atom and offset with that atom of that address. + typedef std::function + FindAtomBySectionAndAddress; + + /// Prototype for a helper function. Given a symbolIndex, finds the atom + /// representing that symbol. + typedef std::function FindAtomBySymbolIndex; + + /// Analyzes a relocation from a .o file and returns the info + /// (kind, target, addend) needed to instantiate a Reference. + /// Two helper functions are passed as parameters to find the target atom + /// given a symbol index or address. + virtual llvm::Error + getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBigEndian, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) = 0; + + /// Analyzes a pair of relocations from a .o file and returns the info + /// (kind, target, addend) needed to instantiate a Reference. + /// Two helper functions are passed as parameters to find the target atom + /// given a symbol index or address. + virtual llvm::Error + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) = 0; + + /// Prototype for a helper function. Given an atom, finds the symbol table + /// index for it in the output file. + typedef std::function FindSymbolIndexForAtom; + + /// Prototype for a helper function. Given an atom, finds the index + /// of the section that will contain the atom. + typedef std::function FindSectionIndexForAtom; + + /// Prototype for a helper function. Given an atom, finds the address + /// assigned to it in the output file. + typedef std::function FindAddressForAtom; + + /// Some architectures require local symbols on anonymous atoms. + virtual bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) { + return false; + } + + /// Copy raw content then apply all fixup References on an Atom. + virtual void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + llvm::MutableArrayRef atomContentBuffer) = 0; + + /// Used in -r mode to convert a Reference to a mach-o relocation. + virtual void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom, + FindSectionIndexForAtom, + FindAddressForAtom, + normalized::Relocations&) = 0; + + /// Add arch-specific References. + virtual void addAdditionalReferences(MachODefinedAtom &atom) { } + + // Add Reference for data-in-code marker. + virtual void addDataInCodeReference(MachODefinedAtom &atom, uint32_t atomOff, + uint16_t length, uint16_t kind) { } + + /// Returns true if the specificed Reference value marks the start or end + /// of a data-in-code range in an atom. + virtual bool isDataInCodeTransition(Reference::KindValue refKind) { + return false; + } + + /// Returns the Reference value for a Reference that marks that start of + /// a data-in-code range. + virtual Reference::KindValue dataInCodeTransitionStart( + const MachODefinedAtom &atom) { + return 0; + } + + /// Returns the Reference value for a Reference that marks that end of + /// a data-in-code range. + virtual Reference::KindValue dataInCodeTransitionEnd( + const MachODefinedAtom &atom) { + return 0; + } + + /// Only relevant for 32-bit arm archs. + virtual bool isThumbFunction(const DefinedAtom &atom) { return false; } + + /// Only relevant for 32-bit arm archs. + virtual const DefinedAtom *createShim(MachOFile &file, bool thumbToArm, + const DefinedAtom &) { + llvm_unreachable("shims only support on arm"); + } + + /// Does a given unwind-cfi atom represent a CIE (as opposed to an FDE). + static bool isDwarfCIE(bool isBig, const DefinedAtom *atom); + + struct ReferenceInfo { + Reference::KindArch arch; + uint16_t kind; + uint32_t offset; + int32_t addend; + }; + + struct OptionalRefInfo { + bool used; + uint16_t kind; + uint32_t offset; + int32_t addend; + }; + + /// Table of architecture specific information for creating stubs. + struct StubInfo { + const char* binderSymbolName; + ReferenceInfo lazyPointerReferenceToHelper; + ReferenceInfo lazyPointerReferenceToFinal; + ReferenceInfo nonLazyPointerReferenceToBinder; + uint8_t codeAlignment; + + uint32_t stubSize; + uint8_t stubBytes[16]; + ReferenceInfo stubReferenceToLP; + OptionalRefInfo optStubReferenceToLP; + + uint32_t stubHelperSize; + uint8_t stubHelperBytes[16]; + ReferenceInfo stubHelperReferenceToImm; + ReferenceInfo stubHelperReferenceToHelperCommon; + + DefinedAtom::ContentType stubHelperImageCacheContentType; + + uint32_t stubHelperCommonSize; + uint8_t stubHelperCommonAlignment; + uint8_t stubHelperCommonBytes[36]; + ReferenceInfo stubHelperCommonReferenceToCache; + OptionalRefInfo optStubHelperCommonReferenceToCache; + ReferenceInfo stubHelperCommonReferenceToBinder; + OptionalRefInfo optStubHelperCommonReferenceToBinder; + }; + + virtual const StubInfo &stubInfo() = 0; + +protected: + ArchHandler(); + + static std::unique_ptr create_x86_64(); + static std::unique_ptr create_x86(); + static std::unique_ptr create_arm(); + static std::unique_ptr create_arm64(); + + // Handy way to pack mach-o r_type and other bit fields into one 16-bit value. + typedef uint16_t RelocPattern; + enum { + rScattered = 0x8000, + rPcRel = 0x4000, + rExtern = 0x2000, + rLength1 = 0x0000, + rLength2 = 0x0100, + rLength4 = 0x0200, + rLength8 = 0x0300, + rLenArmLo = rLength1, + rLenArmHi = rLength2, + rLenThmbLo = rLength4, + rLenThmbHi = rLength8 + }; + /// Extract RelocPattern from normalized mach-o relocation. + static RelocPattern relocPattern(const normalized::Relocation &reloc); + /// Create normalized Relocation initialized from pattern. + static normalized::Relocation relocFromPattern(RelocPattern pattern); + /// One liner to add a relocation. + static void appendReloc(normalized::Relocations &relocs, uint32_t offset, + uint32_t symbol, uint32_t value, + RelocPattern pattern); + + + static int16_t readS16(const uint8_t *addr, bool isBig); + static int32_t readS32(const uint8_t *addr, bool isBig); + static uint32_t readU32(const uint8_t *addr, bool isBig); + static int64_t readS64(const uint8_t *addr, bool isBig); +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_ARCH_HANDLER_H diff --git a/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp b/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp new file mode 100644 index 000000000000..06c98ac06fd1 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp @@ -0,0 +1,1522 @@ +//===- lib/FileFormat/MachO/ArchHandler_arm.cpp ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + +using llvm::support::ulittle32_t; +using llvm::support::little32_t; + + +class ArchHandler_arm : public ArchHandler { +public: + ArchHandler_arm() = default; + ~ArchHandler_arm() override = default; + + const Registry::KindStrings *kindStrings() override { return _sKindStrings; } + + Reference::KindArch kindArch() override { return Reference::KindArch::ARM; } + + const ArchHandler::StubInfo &stubInfo() override; + bool isCallSite(const Reference &) override; + bool isPointer(const Reference &) override; + bool isPairedReloc(const normalized::Relocation &) override; + bool isNonCallBranch(const Reference &) override; + + bool needsCompactUnwind() override { + return false; + } + Reference::KindValue imageOffsetKind() override { + return invalid; + } + Reference::KindValue imageOffsetKindIndirect() override { + return invalid; + } + + Reference::KindValue unwindRefToPersonalityFunctionKind() override { + return invalid; + } + + Reference::KindValue unwindRefToCIEKind() override { + return invalid; + } + + Reference::KindValue unwindRefToFunctionKind() override { + return invalid; + } + + Reference::KindValue unwindRefToEhFrameKind() override { + return invalid; + } + + Reference::KindValue lazyImmediateLocationKind() override { + return lazyImmediateLocation; + } + + Reference::KindValue pointerKind() override { + return invalid; + } + + uint32_t dwarfCompactUnwindType() override { + // FIXME + return -1; + } + + llvm::Error getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + llvm::Error + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + + void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + llvm::MutableArrayRef atomContentBuffer) override; + + void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom, + FindSectionIndexForAtom, + FindAddressForAtom, + normalized::Relocations &) override; + + void addAdditionalReferences(MachODefinedAtom &atom) override; + + bool isDataInCodeTransition(Reference::KindValue refKind) override { + switch (refKind) { + case modeThumbCode: + case modeArmCode: + case modeData: + return true; + default: + return false; + break; + } + } + + Reference::KindValue dataInCodeTransitionStart( + const MachODefinedAtom &atom) override { + return modeData; + } + + Reference::KindValue dataInCodeTransitionEnd( + const MachODefinedAtom &atom) override { + return atom.isThumb() ? modeThumbCode : modeArmCode; + } + + bool isThumbFunction(const DefinedAtom &atom) override; + const DefinedAtom *createShim(MachOFile &file, bool thumbToArm, + const DefinedAtom &) override; + +private: + friend class Thumb2ToArmShimAtom; + friend class ArmToThumbShimAtom; + + static const Registry::KindStrings _sKindStrings[]; + static const StubInfo _sStubInfoArmPIC; + + enum ArmKind : Reference::KindValue { + invalid, /// for error condition + + modeThumbCode, /// Content starting at this offset is thumb. + modeArmCode, /// Content starting at this offset is arm. + modeData, /// Content starting at this offset is data. + + // Kinds found in mach-o .o files: + thumb_bl22, /// ex: bl _foo + thumb_b22, /// ex: b _foo + thumb_movw, /// ex: movw r1, :lower16:_foo + thumb_movt, /// ex: movt r1, :lower16:_foo + thumb_movw_funcRel, /// ex: movw r1, :lower16:(_foo-(L1+4)) + thumb_movt_funcRel, /// ex: movt r1, :upper16:(_foo-(L1+4)) + arm_bl24, /// ex: bl _foo + arm_b24, /// ex: b _foo + arm_movw, /// ex: movw r1, :lower16:_foo + arm_movt, /// ex: movt r1, :lower16:_foo + arm_movw_funcRel, /// ex: movw r1, :lower16:(_foo-(L1+4)) + arm_movt_funcRel, /// ex: movt r1, :upper16:(_foo-(L1+4)) + pointer32, /// ex: .long _foo + delta32, /// ex: .long _foo - . + + // Kinds introduced by Passes: + lazyPointer, /// Location contains a lazy pointer. + lazyImmediateLocation, /// Location contains immediate value used in stub. + }; + + // Utility functions for inspecting/updating instructions. + static bool isThumbMovw(uint32_t instruction); + static bool isThumbMovt(uint32_t instruction); + static bool isArmMovw(uint32_t instruction); + static bool isArmMovt(uint32_t instruction); + static int32_t getDisplacementFromThumbBranch(uint32_t instruction, uint32_t); + static int32_t getDisplacementFromArmBranch(uint32_t instruction); + static uint16_t getWordFromThumbMov(uint32_t instruction); + static uint16_t getWordFromArmMov(uint32_t instruction); + static uint32_t clearThumbBit(uint32_t value, const Atom *target); + static uint32_t setDisplacementInArmBranch(uint32_t instr, int32_t disp, + bool targetIsThumb); + static uint32_t setDisplacementInThumbBranch(uint32_t instr, uint32_t ia, + int32_t disp, bool targetThumb); + static uint32_t setWordFromThumbMov(uint32_t instruction, uint16_t word); + static uint32_t setWordFromArmMov(uint32_t instruction, uint16_t word); + + StringRef stubName(const DefinedAtom &); + bool useExternalRelocationTo(const Atom &target); + + void applyFixupFinal(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress, bool &thumbMode, + bool targetIsThumb); + + void applyFixupRelocatable(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, bool &thumbMode, + bool targetIsThumb); +}; + +//===----------------------------------------------------------------------===// +// ArchHandler_arm +//===----------------------------------------------------------------------===// + +const Registry::KindStrings ArchHandler_arm::_sKindStrings[] = { + LLD_KIND_STRING_ENTRY(invalid), + LLD_KIND_STRING_ENTRY(modeThumbCode), + LLD_KIND_STRING_ENTRY(modeArmCode), + LLD_KIND_STRING_ENTRY(modeData), + LLD_KIND_STRING_ENTRY(thumb_bl22), + LLD_KIND_STRING_ENTRY(thumb_b22), + LLD_KIND_STRING_ENTRY(thumb_movw), + LLD_KIND_STRING_ENTRY(thumb_movt), + LLD_KIND_STRING_ENTRY(thumb_movw_funcRel), + LLD_KIND_STRING_ENTRY(thumb_movt_funcRel), + LLD_KIND_STRING_ENTRY(arm_bl24), + LLD_KIND_STRING_ENTRY(arm_b24), + LLD_KIND_STRING_ENTRY(arm_movw), + LLD_KIND_STRING_ENTRY(arm_movt), + LLD_KIND_STRING_ENTRY(arm_movw_funcRel), + LLD_KIND_STRING_ENTRY(arm_movt_funcRel), + LLD_KIND_STRING_ENTRY(pointer32), + LLD_KIND_STRING_ENTRY(delta32), + LLD_KIND_STRING_ENTRY(lazyPointer), + LLD_KIND_STRING_ENTRY(lazyImmediateLocation), + LLD_KIND_STRING_END +}; + +const ArchHandler::StubInfo ArchHandler_arm::_sStubInfoArmPIC = { + "dyld_stub_binder", + + // References in lazy pointer + { Reference::KindArch::ARM, pointer32, 0, 0 }, + { Reference::KindArch::ARM, lazyPointer, 0, 0 }, + + // GOT pointer to dyld_stub_binder + { Reference::KindArch::ARM, pointer32, 0, 0 }, + + // arm code alignment 2^2 + 2, + + // Stub size and code + 16, + { 0x04, 0xC0, 0x9F, 0xE5, // ldr ip, pc + 12 + 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip + 0x00, 0xF0, 0x9C, 0xE5, // ldr pc, [ip] + 0x00, 0x00, 0x00, 0x00 }, // .long L_foo$lazy_ptr - (L1$scv + 8) + { Reference::KindArch::ARM, delta32, 12, 0 }, + { false, 0, 0, 0 }, + + // Stub Helper size and code + 12, + { 0x00, 0xC0, 0x9F, 0xE5, // ldr ip, [pc, #0] + 0x00, 0x00, 0x00, 0xEA, // b _helperhelper + 0x00, 0x00, 0x00, 0x00 }, // .long lazy-info-offset + { Reference::KindArch::ARM, lazyImmediateLocation, 8, 0 }, + { Reference::KindArch::ARM, arm_b24, 4, 0 }, + + // Stub helper image cache content type + DefinedAtom::typeGOT, + + // Stub Helper-Common size and code + 36, + // Stub helper alignment + 2, + { // push lazy-info-offset + 0x04, 0xC0, 0x2D, 0xE5, // str ip, [sp, #-4]! + // push address of dyld_mageLoaderCache + 0x10, 0xC0, 0x9F, 0xE5, // ldr ip, L1 + 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip + 0x04, 0xC0, 0x2D, 0xE5, // str ip, [sp, #-4]! + // jump through dyld_stub_binder + 0x08, 0xC0, 0x9F, 0xE5, // ldr ip, L2 + 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip + 0x00, 0xF0, 0x9C, 0xE5, // ldr pc, [ip] + 0x00, 0x00, 0x00, 0x00, // L1: .long fFastStubGOTAtom - (helper+16) + 0x00, 0x00, 0x00, 0x00 }, // L2: .long dyld_stub_binder - (helper+28) + { Reference::KindArch::ARM, delta32, 28, 0xC }, + { false, 0, 0, 0 }, + { Reference::KindArch::ARM, delta32, 32, 0x04 }, + { false, 0, 0, 0 } +}; + +const ArchHandler::StubInfo &ArchHandler_arm::stubInfo() { + // If multiple kinds of stubs are supported, select which StubInfo here. + return _sStubInfoArmPIC; +} + +bool ArchHandler_arm::isCallSite(const Reference &ref) { + switch (ref.kindValue()) { + case thumb_b22: + case thumb_bl22: + case arm_b24: + case arm_bl24: + return true; + default: + return false; + } +} + +bool ArchHandler_arm::isPointer(const Reference &ref) { + return (ref.kindValue() == pointer32); +} + +bool ArchHandler_arm::isNonCallBranch(const Reference &ref) { + switch (ref.kindValue()) { + case thumb_b22: + case arm_b24: + return true; + default: + return false; + } +} + +bool ArchHandler_arm::isPairedReloc(const Relocation &reloc) { + switch (reloc.type) { + case ARM_RELOC_SECTDIFF: + case ARM_RELOC_LOCAL_SECTDIFF: + case ARM_RELOC_HALF_SECTDIFF: + case ARM_RELOC_HALF: + return true; + default: + return false; + } +} + +/// Trace references from stub atom to lazy pointer to target and get its name. +StringRef ArchHandler_arm::stubName(const DefinedAtom &stubAtom) { + assert(stubAtom.contentType() == DefinedAtom::typeStub); + for (const Reference *ref : stubAtom) { + if (const DefinedAtom* lp = dyn_cast(ref->target())) { + if (lp->contentType() != DefinedAtom::typeLazyPointer) + continue; + for (const Reference *ref2 : *lp) { + if (ref2->kindValue() != lazyPointer) + continue; + return ref2->target()->name(); + } + } + } + return "stub"; +} + +/// Extract displacement from an ARM b/bl/blx instruction. +int32_t ArchHandler_arm::getDisplacementFromArmBranch(uint32_t instruction) { + // Sign-extend imm24 + int32_t displacement = (instruction & 0x00FFFFFF) << 2; + if ((displacement & 0x02000000) != 0) + displacement |= 0xFC000000; + // If this is BLX and H bit set, add 2. + if ((instruction & 0xFF000000) == 0xFB000000) + displacement += 2; + return displacement; +} + +/// Update an ARM b/bl/blx instruction, switching bl <-> blx as needed. +uint32_t ArchHandler_arm::setDisplacementInArmBranch(uint32_t instruction, + int32_t displacement, + bool targetIsThumb) { + assert((displacement <= 33554428) && (displacement > (-33554432)) + && "arm branch out of range"); + bool is_blx = ((instruction & 0xF0000000) == 0xF0000000); + uint32_t newInstruction = (instruction & 0xFF000000); + uint32_t h = 0; + if (targetIsThumb) { + // Force use of BLX. + newInstruction = 0xFA000000; + if (!is_blx) { + assert(((instruction & 0xF0000000) == 0xE0000000) + && "no conditional arm blx"); + assert(((instruction & 0xFF000000) == 0xEB000000) + && "no arm pc-rel BX instruction"); + } + if (displacement & 2) + h = 1; + } + else { + // Force use of B/BL. + if (is_blx) + newInstruction = 0xEB000000; + } + newInstruction |= (h << 24) | ((displacement >> 2) & 0x00FFFFFF); + return newInstruction; +} + +/// Extract displacement from a thumb b/bl/blx instruction. +int32_t ArchHandler_arm::getDisplacementFromThumbBranch(uint32_t instruction, + uint32_t instrAddr) { + bool is_blx = ((instruction & 0xD000F800) == 0xC000F000); + uint32_t s = (instruction >> 10) & 0x1; + uint32_t j1 = (instruction >> 29) & 0x1; + uint32_t j2 = (instruction >> 27) & 0x1; + uint32_t imm10 = instruction & 0x3FF; + uint32_t imm11 = (instruction >> 16) & 0x7FF; + uint32_t i1 = (j1 == s); + uint32_t i2 = (j2 == s); + uint32_t dis = + (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1); + int32_t sdis = dis; + int32_t result = s ? (sdis | 0xFE000000) : sdis; + if (is_blx && (instrAddr & 0x2)) { + // The thumb blx instruction always has low bit of imm11 as zero. The way + // a 2-byte aligned blx can branch to a 4-byte aligned ARM target is that + // the blx instruction always 4-byte aligns the pc before adding the + // displacement from the blx. We must emulate that when decoding this. + result -= 2; + } + return result; +} + +/// Update a thumb b/bl/blx instruction, switching bl <-> blx as needed. +uint32_t ArchHandler_arm::setDisplacementInThumbBranch(uint32_t instruction, + uint32_t instrAddr, + int32_t displacement, + bool targetIsThumb) { + assert((displacement <= 16777214) && (displacement > (-16777216)) + && "thumb branch out of range"); + bool is_bl = ((instruction & 0xD000F800) == 0xD000F000); + bool is_blx = ((instruction & 0xD000F800) == 0xC000F000); + bool is_b = ((instruction & 0xD000F800) == 0x9000F000); + uint32_t newInstruction = (instruction & 0xD000F800); + if (is_bl || is_blx) { + if (targetIsThumb) { + newInstruction = 0xD000F000; // Use bl + } else { + newInstruction = 0xC000F000; // Use blx + // See note in getDisplacementFromThumbBranch() about blx. + if (instrAddr & 0x2) + displacement += 2; + } + } else if (is_b) { + assert(targetIsThumb && "no pc-rel thumb branch instruction that " + "switches to arm mode"); + } + else { + llvm_unreachable("thumb branch22 reloc on a non-branch instruction"); + } + uint32_t s = (uint32_t)(displacement >> 24) & 0x1; + uint32_t i1 = (uint32_t)(displacement >> 23) & 0x1; + uint32_t i2 = (uint32_t)(displacement >> 22) & 0x1; + uint32_t imm10 = (uint32_t)(displacement >> 12) & 0x3FF; + uint32_t imm11 = (uint32_t)(displacement >> 1) & 0x7FF; + uint32_t j1 = (i1 == s); + uint32_t j2 = (i2 == s); + uint32_t nextDisp = (j1 << 13) | (j2 << 11) | imm11; + uint32_t firstDisp = (s << 10) | imm10; + newInstruction |= (nextDisp << 16) | firstDisp; + return newInstruction; +} + +bool ArchHandler_arm::isThumbMovw(uint32_t instruction) { + return (instruction & 0x8000FBF0) == 0x0000F240; +} + +bool ArchHandler_arm::isThumbMovt(uint32_t instruction) { + return (instruction & 0x8000FBF0) == 0x0000F2C0; +} + +bool ArchHandler_arm::isArmMovw(uint32_t instruction) { + return (instruction & 0x0FF00000) == 0x03000000; +} + +bool ArchHandler_arm::isArmMovt(uint32_t instruction) { + return (instruction & 0x0FF00000) == 0x03400000; +} + +uint16_t ArchHandler_arm::getWordFromThumbMov(uint32_t instruction) { + assert(isThumbMovw(instruction) || isThumbMovt(instruction)); + uint32_t i = ((instruction & 0x00000400) >> 10); + uint32_t imm4 = (instruction & 0x0000000F); + uint32_t imm3 = ((instruction & 0x70000000) >> 28); + uint32_t imm8 = ((instruction & 0x00FF0000) >> 16); + return (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8; +} + +uint16_t ArchHandler_arm::getWordFromArmMov(uint32_t instruction) { + assert(isArmMovw(instruction) || isArmMovt(instruction)); + uint32_t imm4 = ((instruction & 0x000F0000) >> 16); + uint32_t imm12 = (instruction & 0x00000FFF); + return (imm4 << 12) | imm12; +} + +uint32_t ArchHandler_arm::setWordFromThumbMov(uint32_t instr, uint16_t word) { + assert(isThumbMovw(instr) || isThumbMovt(instr)); + uint32_t imm4 = (word & 0xF000) >> 12; + uint32_t i = (word & 0x0800) >> 11; + uint32_t imm3 = (word & 0x0700) >> 8; + uint32_t imm8 = word & 0x00FF; + return (instr & 0x8F00FBF0) | imm4 | (i << 10) | (imm3 << 28) | (imm8 << 16); +} + +uint32_t ArchHandler_arm::setWordFromArmMov(uint32_t instr, uint16_t word) { + assert(isArmMovw(instr) || isArmMovt(instr)); + uint32_t imm4 = (word & 0xF000) >> 12; + uint32_t imm12 = word & 0x0FFF; + return (instr & 0xFFF0F000) | (imm4 << 16) | imm12; +} + +uint32_t ArchHandler_arm::clearThumbBit(uint32_t value, const Atom *target) { + // The assembler often adds one to the address of a thumb function. + // We need to undo that so it does not look like an addend. + if (value & 1) { + if (isa(target)) { + const MachODefinedAtom *machoTarget = + reinterpret_cast(target); + if (machoTarget->isThumb()) + value &= -2; // mask off thumb-bit + } + } + return value; +} + +llvm::Error ArchHandler_arm::getReferenceInfo( + const Relocation &reloc, const DefinedAtom *inAtom, uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, + const lld::Atom **target, Reference::Addend *addend) { + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + uint64_t targetAddress; + uint32_t instruction = *(const ulittle32_t *)fixupContent; + int32_t displacement; + switch (relocPattern(reloc)) { + case ARM_THUMB_RELOC_BR22 | rPcRel | rExtern | rLength4: + // ex: bl _foo (and _foo is undefined) + if ((instruction & 0xD000F800) == 0x9000F000) + *kind = thumb_b22; + else + *kind = thumb_bl22; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + // Instruction contains branch to addend. + displacement = getDisplacementFromThumbBranch(instruction, fixupAddress); + *addend = fixupAddress + 4 + displacement; + return llvm::Error::success(); + case ARM_THUMB_RELOC_BR22 | rPcRel | rLength4: + // ex: bl _foo (and _foo is defined) + if ((instruction & 0xD000F800) == 0x9000F000) + *kind = thumb_b22; + else + *kind = thumb_bl22; + displacement = getDisplacementFromThumbBranch(instruction, fixupAddress); + targetAddress = fixupAddress + 4 + displacement; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ARM_THUMB_RELOC_BR22 | rScattered | rPcRel | rLength4: + // ex: bl _foo+4 (and _foo is defined) + if ((instruction & 0xD000F800) == 0x9000F000) + *kind = thumb_b22; + else + *kind = thumb_bl22; + displacement = getDisplacementFromThumbBranch(instruction, fixupAddress); + targetAddress = fixupAddress + 4 + displacement; + if (auto ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + // reloc.value is target atom's address. Instruction contains branch + // to atom+addend. + *addend += (targetAddress - reloc.value); + return llvm::Error::success(); + case ARM_RELOC_BR24 | rPcRel | rExtern | rLength4: + // ex: bl _foo (and _foo is undefined) + if (((instruction & 0x0F000000) == 0x0A000000) + && ((instruction & 0xF0000000) != 0xF0000000)) + *kind = arm_b24; + else + *kind = arm_bl24; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + // Instruction contains branch to addend. + displacement = getDisplacementFromArmBranch(instruction); + *addend = fixupAddress + 8 + displacement; + return llvm::Error::success(); + case ARM_RELOC_BR24 | rPcRel | rLength4: + // ex: bl _foo (and _foo is defined) + if (((instruction & 0x0F000000) == 0x0A000000) + && ((instruction & 0xF0000000) != 0xF0000000)) + *kind = arm_b24; + else + *kind = arm_bl24; + displacement = getDisplacementFromArmBranch(instruction); + targetAddress = fixupAddress + 8 + displacement; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ARM_RELOC_BR24 | rScattered | rPcRel | rLength4: + // ex: bl _foo+4 (and _foo is defined) + if (((instruction & 0x0F000000) == 0x0A000000) + && ((instruction & 0xF0000000) != 0xF0000000)) + *kind = arm_b24; + else + *kind = arm_bl24; + displacement = getDisplacementFromArmBranch(instruction); + targetAddress = fixupAddress + 8 + displacement; + if (auto ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + // reloc.value is target atom's address. Instruction contains branch + // to atom+addend. + *addend += (targetAddress - reloc.value); + return llvm::Error::success(); + case ARM_RELOC_VANILLA | rExtern | rLength4: + // ex: .long _foo (and _foo is undefined) + *kind = pointer32; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = instruction; + return llvm::Error::success(); + case ARM_RELOC_VANILLA | rLength4: + // ex: .long _foo (and _foo is defined) + *kind = pointer32; + if (auto ec = atomFromAddress(reloc.symbol, instruction, target, addend)) + return ec; + *addend = clearThumbBit((uint32_t) * addend, *target); + return llvm::Error::success(); + case ARM_RELOC_VANILLA | rScattered | rLength4: + // ex: .long _foo+a (and _foo is defined) + *kind = pointer32; + if (auto ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + *addend += (clearThumbBit(instruction, *target) - reloc.value); + return llvm::Error::success(); + default: + return llvm::make_error("unsupported arm relocation type"); + } + return llvm::Error::success(); +} + +llvm::Error +ArchHandler_arm::getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, + bool scatterable, + FindAtomBySectionAndAddress atomFromAddr, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + bool pointerDiff = false; + bool funcRel; + bool top; + bool thumbReloc; + switch(relocPattern(reloc1) << 16 | relocPattern(reloc2)) { + case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbLo) << 16 | + ARM_RELOC_PAIR | rScattered | rLenThmbLo): + // ex: movw r1, :lower16:(_x-L1) [thumb mode] + *kind = thumb_movw_funcRel; + funcRel = true; + top = false; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbHi) << 16 | + ARM_RELOC_PAIR | rScattered | rLenThmbHi): + // ex: movt r1, :upper16:(_x-L1) [thumb mode] + *kind = thumb_movt_funcRel; + funcRel = true; + top = true; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmLo) << 16 | + ARM_RELOC_PAIR | rScattered | rLenArmLo): + // ex: movw r1, :lower16:(_x-L1) [arm mode] + *kind = arm_movw_funcRel; + funcRel = true; + top = false; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmHi) << 16 | + ARM_RELOC_PAIR | rScattered | rLenArmHi): + // ex: movt r1, :upper16:(_x-L1) [arm mode] + *kind = arm_movt_funcRel; + funcRel = true; + top = true; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rLenThmbLo) << 16 | + ARM_RELOC_PAIR | rLenThmbLo): + // ex: movw r1, :lower16:_x [thumb mode] + *kind = thumb_movw; + funcRel = false; + top = false; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rLenThmbHi) << 16 | + ARM_RELOC_PAIR | rLenThmbHi): + // ex: movt r1, :upper16:_x [thumb mode] + *kind = thumb_movt; + funcRel = false; + top = true; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rLenArmLo) << 16 | + ARM_RELOC_PAIR | rLenArmLo): + // ex: movw r1, :lower16:_x [arm mode] + *kind = arm_movw; + funcRel = false; + top = false; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rLenArmHi) << 16 | + ARM_RELOC_PAIR | rLenArmHi): + // ex: movt r1, :upper16:_x [arm mode] + *kind = arm_movt; + funcRel = false; + top = true; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rScattered | rLenThmbLo) << 16 | + ARM_RELOC_PAIR | rLenThmbLo): + // ex: movw r1, :lower16:_x+a [thumb mode] + *kind = thumb_movw; + funcRel = false; + top = false; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rScattered | rLenThmbHi) << 16 | + ARM_RELOC_PAIR | rLenThmbHi): + // ex: movt r1, :upper16:_x+a [thumb mode] + *kind = thumb_movt; + funcRel = false; + top = true; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rScattered | rLenArmLo) << 16 | + ARM_RELOC_PAIR | rLenArmLo): + // ex: movw r1, :lower16:_x+a [arm mode] + *kind = arm_movw; + funcRel = false; + top = false; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rScattered | rLenArmHi) << 16 | + ARM_RELOC_PAIR | rLenArmHi): + // ex: movt r1, :upper16:_x+a [arm mode] + *kind = arm_movt; + funcRel = false; + top = true; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rExtern | rLenThmbLo) << 16 | + ARM_RELOC_PAIR | rLenThmbLo): + // ex: movw r1, :lower16:_undef [thumb mode] + *kind = thumb_movw; + funcRel = false; + top = false; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rExtern | rLenThmbHi) << 16 | + ARM_RELOC_PAIR | rLenThmbHi): + // ex: movt r1, :upper16:_undef [thumb mode] + *kind = thumb_movt; + funcRel = false; + top = true; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rExtern | rLenArmLo) << 16 | + ARM_RELOC_PAIR | rLenArmLo): + // ex: movw r1, :lower16:_undef [arm mode] + *kind = arm_movw; + funcRel = false; + top = false; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rExtern | rLenArmHi) << 16 | + ARM_RELOC_PAIR | rLenArmHi): + // ex: movt r1, :upper16:_undef [arm mode] + *kind = arm_movt; + funcRel = false; + top = true; + thumbReloc = false; + break; + case ((ARM_RELOC_SECTDIFF | rScattered | rLength4) << 16 | + ARM_RELOC_PAIR | rScattered | rLength4): + case ((ARM_RELOC_LOCAL_SECTDIFF | rScattered | rLength4) << 16 | + ARM_RELOC_PAIR | rScattered | rLength4): + // ex: .long _foo - . + pointerDiff = true; + break; + default: + return llvm::make_error("unsupported arm relocation pair"); + } + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + uint32_t instruction = *(const ulittle32_t *)fixupContent; + uint32_t value; + uint32_t fromAddress; + uint32_t toAddress; + uint16_t instruction16; + uint16_t other16; + const lld::Atom *fromTarget; + Reference::Addend offsetInTo; + Reference::Addend offsetInFrom; + if (pointerDiff) { + toAddress = reloc1.value; + fromAddress = reloc2.value; + if (auto ec = atomFromAddr(0, toAddress, target, &offsetInTo)) + return ec; + if (auto ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom)) + return ec; + if (scatterable && (fromTarget != inAtom)) + return llvm::make_error( + "SECTDIFF relocation where subtrahend label is not in atom"); + *kind = delta32; + value = clearThumbBit(instruction, *target); + *addend = (int32_t)(value - (toAddress - fixupAddress)); + } else if (funcRel) { + toAddress = reloc1.value; + fromAddress = reloc2.value; + if (auto ec = atomFromAddr(0, toAddress, target, &offsetInTo)) + return ec; + if (auto ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom)) + return ec; + if (fromTarget != inAtom) + return llvm::make_error("ARM_RELOC_HALF_SECTDIFF relocation" + " where subtrahend label is not in atom"); + other16 = (reloc2.offset & 0xFFFF); + if (thumbReloc) { + if (top) { + if (!isThumbMovt(instruction)) + return llvm::make_error("expected movt instruction"); + } + else { + if (!isThumbMovw(instruction)) + return llvm::make_error("expected movw instruction"); + } + instruction16 = getWordFromThumbMov(instruction); + } + else { + if (top) { + if (!isArmMovt(instruction)) + return llvm::make_error("expected movt instruction"); + } + else { + if (!isArmMovw(instruction)) + return llvm::make_error("expected movw instruction"); + } + instruction16 = getWordFromArmMov(instruction); + } + if (top) + value = (instruction16 << 16) | other16; + else + value = (other16 << 16) | instruction16; + value = clearThumbBit(value, *target); + int64_t ta = (int64_t) value - (toAddress - fromAddress); + *addend = ta - offsetInFrom; + return llvm::Error::success(); + } else { + uint32_t sectIndex; + if (thumbReloc) { + if (top) { + if (!isThumbMovt(instruction)) + return llvm::make_error("expected movt instruction"); + } + else { + if (!isThumbMovw(instruction)) + return llvm::make_error("expected movw instruction"); + } + instruction16 = getWordFromThumbMov(instruction); + } + else { + if (top) { + if (!isArmMovt(instruction)) + return llvm::make_error("expected movt instruction"); + } + else { + if (!isArmMovw(instruction)) + return llvm::make_error("expected movw instruction"); + } + instruction16 = getWordFromArmMov(instruction); + } + other16 = (reloc2.offset & 0xFFFF); + if (top) + value = (instruction16 << 16) | other16; + else + value = (other16 << 16) | instruction16; + if (reloc1.isExtern) { + if (auto ec = atomFromSymbolIndex(reloc1.symbol, target)) + return ec; + *addend = value; + } else { + if (reloc1.scattered) { + toAddress = reloc1.value; + sectIndex = 0; + } else { + toAddress = value; + sectIndex = reloc1.symbol; + } + if (auto ec = atomFromAddr(sectIndex, toAddress, target, &offsetInTo)) + return ec; + *addend = value - toAddress; + } + } + + return llvm::Error::success(); +} + +void ArchHandler_arm::applyFixupFinal(const Reference &ref, uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, + bool &thumbMode, bool targetIsThumb) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::ARM); + ulittle32_t *loc32 = reinterpret_cast(loc); + int32_t displacement; + uint16_t value16; + uint32_t value32; + switch (static_cast(ref.kindValue())) { + case modeThumbCode: + thumbMode = true; + break; + case modeArmCode: + thumbMode = false; + break; + case modeData: + break; + case thumb_b22: + case thumb_bl22: + assert(thumbMode); + displacement = (targetAddress - (fixupAddress + 4)) + ref.addend(); + value32 = setDisplacementInThumbBranch(*loc32, fixupAddress, + displacement, targetIsThumb); + *loc32 = value32; + break; + case thumb_movw: + assert(thumbMode); + value16 = (targetAddress + ref.addend()) & 0xFFFF; + if (targetIsThumb) + value16 |= 1; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movt: + assert(thumbMode); + value16 = (targetAddress + ref.addend()) >> 16; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movw_funcRel: + assert(thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; + if (targetIsThumb) + value16 |= 1; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movt_funcRel: + assert(thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case arm_b24: + case arm_bl24: + assert(!thumbMode); + displacement = (targetAddress - (fixupAddress + 8)) + ref.addend(); + value32 = setDisplacementInArmBranch(*loc32, displacement, targetIsThumb); + *loc32 = value32; + break; + case arm_movw: + assert(!thumbMode); + value16 = (targetAddress + ref.addend()) & 0xFFFF; + if (targetIsThumb) + value16 |= 1; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movt: + assert(!thumbMode); + value16 = (targetAddress + ref.addend()) >> 16; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movw_funcRel: + assert(!thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; + if (targetIsThumb) + value16 |= 1; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movt_funcRel: + assert(!thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case pointer32: + if (targetIsThumb) + *loc32 = targetAddress + ref.addend() + 1; + else + *loc32 = targetAddress + ref.addend(); + break; + case delta32: + if (targetIsThumb) + *loc32 = targetAddress - fixupAddress + ref.addend() + 1; + else + *loc32 = targetAddress - fixupAddress + ref.addend(); + break; + case lazyPointer: + // do nothing + break; + case lazyImmediateLocation: + *loc32 = ref.addend(); + break; + case invalid: + llvm_unreachable("invalid ARM Reference Kind"); + break; + } +} + +void ArchHandler_arm::generateAtomContent(const DefinedAtom &atom, + bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + llvm::MutableArrayRef atomContentBuffer) { + // Copy raw bytes. + std::copy(atom.rawContent().begin(), atom.rawContent().end(), + atomContentBuffer.begin()); + // Apply fix-ups. + bool thumbMode = false; + for (const Reference *ref : atom) { + uint32_t offset = ref->offsetInAtom(); + const Atom *target = ref->target(); + uint64_t targetAddress = 0; + bool targetIsThumb = false; + if (const DefinedAtom *defTarg = dyn_cast(target)) { + targetAddress = findAddress(*target); + targetIsThumb = isThumbFunction(*defTarg); + } + uint64_t atomAddress = findAddress(atom); + uint64_t fixupAddress = atomAddress + offset; + if (relocatable) { + applyFixupRelocatable(*ref, &atomContentBuffer[offset], fixupAddress, + targetAddress, atomAddress, thumbMode, + targetIsThumb); + } else { + applyFixupFinal(*ref, &atomContentBuffer[offset], fixupAddress, + targetAddress, atomAddress, thumbMode, targetIsThumb); + } + } +} + +bool ArchHandler_arm::useExternalRelocationTo(const Atom &target) { + // Undefined symbols are referenced via external relocations. + if (isa(&target)) + return true; + if (const DefinedAtom *defAtom = dyn_cast(&target)) { + switch (defAtom->merge()) { + case DefinedAtom::mergeAsTentative: + // Tentative definitions are referenced via external relocations. + return true; + case DefinedAtom::mergeAsWeak: + case DefinedAtom::mergeAsWeakAndAddressUsed: + // Global weak-defs are referenced via external relocations. + return (defAtom->scope() == DefinedAtom::scopeGlobal); + default: + break; + } + } + // Everything else is reference via an internal relocation. + return false; +} + +void ArchHandler_arm::applyFixupRelocatable(const Reference &ref, uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, + bool &thumbMode, + bool targetIsThumb) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::ARM); + bool useExternalReloc = useExternalRelocationTo(*ref.target()); + ulittle32_t *loc32 = reinterpret_cast(loc); + int32_t displacement; + uint16_t value16; + uint32_t value32; + bool targetIsUndef = isa(ref.target()); + switch (static_cast(ref.kindValue())) { + case modeThumbCode: + thumbMode = true; + break; + case modeArmCode: + thumbMode = false; + break; + case modeData: + break; + case thumb_b22: + case thumb_bl22: + assert(thumbMode); + if (useExternalReloc) + displacement = (ref.addend() - (fixupAddress + 4)); + else + displacement = (targetAddress - (fixupAddress + 4)) + ref.addend(); + value32 = setDisplacementInThumbBranch(*loc32, fixupAddress, + displacement, + targetIsUndef || targetIsThumb); + *loc32 = value32; + break; + case thumb_movw: + assert(thumbMode); + if (useExternalReloc) + value16 = ref.addend() & 0xFFFF; + else + value16 = (targetAddress + ref.addend()) & 0xFFFF; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movt: + assert(thumbMode); + if (useExternalReloc) + value16 = ref.addend() >> 16; + else + value16 = (targetAddress + ref.addend()) >> 16; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movw_funcRel: + assert(thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movt_funcRel: + assert(thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case arm_b24: + case arm_bl24: + assert(!thumbMode); + if (useExternalReloc) + displacement = (ref.addend() - (fixupAddress + 8)); + else + displacement = (targetAddress - (fixupAddress + 8)) + ref.addend(); + value32 = setDisplacementInArmBranch(*loc32, displacement, + targetIsThumb); + *loc32 = value32; + break; + case arm_movw: + assert(!thumbMode); + if (useExternalReloc) + value16 = ref.addend() & 0xFFFF; + else + value16 = (targetAddress + ref.addend()) & 0xFFFF; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movt: + assert(!thumbMode); + if (useExternalReloc) + value16 = ref.addend() >> 16; + else + value16 = (targetAddress + ref.addend()) >> 16; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movw_funcRel: + assert(!thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movt_funcRel: + assert(!thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case pointer32: + *loc32 = targetAddress + ref.addend(); + break; + case delta32: + *loc32 = targetAddress - fixupAddress + ref.addend(); + break; + case lazyPointer: + case lazyImmediateLocation: + // do nothing + break; + case invalid: + llvm_unreachable("invalid ARM Reference Kind"); + break; + } +} + +void ArchHandler_arm::appendSectionRelocations( + const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::ARM); + uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); + bool useExternalReloc = useExternalRelocationTo(*ref.target()); + uint32_t targetAtomAddress; + uint32_t fromAtomAddress; + uint16_t other16; + switch (static_cast(ref.kindValue())) { + case modeThumbCode: + case modeArmCode: + case modeData: + // Do nothing. + break; + case thumb_b22: + case thumb_bl22: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_THUMB_RELOC_BR22 | rExtern | rPcRel | rLength4); + } else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + ARM_THUMB_RELOC_BR22 | rScattered | rPcRel | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_THUMB_RELOC_BR22 | rPcRel | rLength4); + } + break; + case thumb_movw: + if (useExternalReloc) { + other16 = ref.addend() >> 16; + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_HALF | rExtern | rLenThmbLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbLo); + } else { + targetAtomAddress = addressForAtom(*ref.target()); + if (ref.addend() != 0) { + other16 = (targetAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF | rScattered | rLenThmbLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbLo); + } else { + other16 = (targetAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_HALF | rLenThmbLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbLo); + } + } + break; + case thumb_movt: + if (useExternalReloc) { + other16 = ref.addend() & 0xFFFF; + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_HALF | rExtern | rLenThmbHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbHi); + } else { + targetAtomAddress = addressForAtom(*ref.target()); + if (ref.addend() != 0) { + other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF | rScattered | rLenThmbHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbHi); + } else { + other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_HALF | rLenThmbHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbHi); + } + } + break; + case thumb_movw_funcRel: + fromAtomAddress = addressForAtom(atom); + targetAtomAddress = addressForAtom(*ref.target()); + other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbLo); + appendReloc(relocs, other16, 0, fromAtomAddress, + ARM_RELOC_PAIR | rScattered | rLenThmbLo); + break; + case thumb_movt_funcRel: + fromAtomAddress = addressForAtom(atom); + targetAtomAddress = addressForAtom(*ref.target()); + other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbHi); + appendReloc(relocs, other16, 0, fromAtomAddress, + ARM_RELOC_PAIR | rScattered | rLenThmbHi); + break; + case arm_b24: + case arm_bl24: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_BR24 | rExtern | rPcRel | rLength4); + } else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + ARM_RELOC_BR24 | rScattered | rPcRel | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_BR24 | rPcRel | rLength4); + } + break; + case arm_movw: + if (useExternalReloc) { + other16 = ref.addend() >> 16; + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_HALF | rExtern | rLenArmLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmLo); + } else { + targetAtomAddress = addressForAtom(*ref.target()); + if (ref.addend() != 0) { + other16 = (targetAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF | rScattered | rLenArmLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmLo); + } else { + other16 = (targetAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_HALF | rLenArmLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmLo); + } + } + break; + case arm_movt: + if (useExternalReloc) { + other16 = ref.addend() & 0xFFFF; + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_HALF | rExtern | rLenArmHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmHi); + } else { + targetAtomAddress = addressForAtom(*ref.target()); + if (ref.addend() != 0) { + other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF | rScattered | rLenArmHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmHi); + } else { + other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_HALF | rLenArmHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmHi); + } + } + break; + case arm_movw_funcRel: + fromAtomAddress = addressForAtom(atom); + targetAtomAddress = addressForAtom(*ref.target()); + other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmLo); + appendReloc(relocs, other16, 0, fromAtomAddress, + ARM_RELOC_PAIR | rScattered | rLenArmLo); + break; + case arm_movt_funcRel: + fromAtomAddress = addressForAtom(atom); + targetAtomAddress = addressForAtom(*ref.target()); + other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmHi); + appendReloc(relocs, other16, 0, fromAtomAddress, + ARM_RELOC_PAIR | rScattered | rLenArmHi); + break; + case pointer32: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_VANILLA | rExtern | rLength4); + } + else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + ARM_RELOC_VANILLA | rScattered | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_VANILLA | rLength4); + } + break; + case delta32: + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + ARM_RELOC_SECTDIFF | rScattered | rLength4); + appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) + + ref.offsetInAtom(), + ARM_RELOC_PAIR | rScattered | rLength4); + break; + case lazyPointer: + case lazyImmediateLocation: + // do nothing + break; + case invalid: + llvm_unreachable("invalid ARM Reference Kind"); + break; + } +} + +void ArchHandler_arm::addAdditionalReferences(MachODefinedAtom &atom) { + if (atom.isThumb()) { + atom.addReference(Reference::KindNamespace::mach_o, + Reference::KindArch::ARM, modeThumbCode, 0, &atom, 0); + } +} + +bool ArchHandler_arm::isThumbFunction(const DefinedAtom &atom) { + for (const Reference *ref : atom) { + if (ref->offsetInAtom() != 0) + return false; + if (ref->kindNamespace() != Reference::KindNamespace::mach_o) + continue; + assert(ref->kindArch() == Reference::KindArch::ARM); + if (ref->kindValue() == modeThumbCode) + return true; + } + return false; +} + +class Thumb2ToArmShimAtom : public SimpleDefinedAtom { +public: + Thumb2ToArmShimAtom(MachOFile &file, StringRef targetName, + const DefinedAtom &target) + : SimpleDefinedAtom(file) { + addReference(Reference::KindNamespace::mach_o, Reference::KindArch::ARM, + ArchHandler_arm::modeThumbCode, 0, this, 0); + addReference(Reference::KindNamespace::mach_o, Reference::KindArch::ARM, + ArchHandler_arm::delta32, 8, &target, 0); + std::string name = std::string(targetName) + "$shim"; + StringRef tmp(name); + _name = tmp.copy(file.allocator()); + } + + ~Thumb2ToArmShimAtom() override = default; + + StringRef name() const override { + return _name; + } + + ContentType contentType() const override { + return DefinedAtom::typeCode; + } + + Alignment alignment() const override { return 4; } + + uint64_t size() const override { + return 12; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef rawContent() const override { + static const uint8_t bytes[] = + { 0xDF, 0xF8, 0x04, 0xC0, // ldr ip, pc + 4 + 0xFF, 0x44, // add ip, pc, ip + 0x60, 0x47, // ldr pc, [ip] + 0x00, 0x00, 0x00, 0x00 }; // .long target - this + assert(sizeof(bytes) == size()); + return llvm::makeArrayRef(bytes, sizeof(bytes)); + } +private: + StringRef _name; +}; + +class ArmToThumbShimAtom : public SimpleDefinedAtom { +public: + ArmToThumbShimAtom(MachOFile &file, StringRef targetName, + const DefinedAtom &target) + : SimpleDefinedAtom(file) { + addReference(Reference::KindNamespace::mach_o, Reference::KindArch::ARM, + ArchHandler_arm::delta32, 12, &target, 0); + std::string name = std::string(targetName) + "$shim"; + StringRef tmp(name); + _name = tmp.copy(file.allocator()); + } + + ~ArmToThumbShimAtom() override = default; + + StringRef name() const override { + return _name; + } + + ContentType contentType() const override { + return DefinedAtom::typeCode; + } + + Alignment alignment() const override { return 4; } + + uint64_t size() const override { + return 16; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef rawContent() const override { + static const uint8_t bytes[] = + { 0x04, 0xC0, 0x9F, 0xE5, // ldr ip, pc + 4 + 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip + 0x1C, 0xFF, 0x2F, 0xE1, // ldr pc, [ip] + 0x00, 0x00, 0x00, 0x00 }; // .long target - this + assert(sizeof(bytes) == size()); + return llvm::makeArrayRef(bytes, sizeof(bytes)); + } +private: + StringRef _name; +}; + +const DefinedAtom *ArchHandler_arm::createShim(MachOFile &file, + bool thumbToArm, + const DefinedAtom &target) { + bool isStub = (target.contentType() == DefinedAtom::typeStub); + StringRef targetName = isStub ? stubName(target) : target.name(); + if (thumbToArm) + return new (file.allocator()) Thumb2ToArmShimAtom(file, targetName, target); + else + return new (file.allocator()) ArmToThumbShimAtom(file, targetName, target); +} + +std::unique_ptr ArchHandler::create_arm() { + return std::unique_ptr(new ArchHandler_arm()); +} + +} // namespace mach_o +} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp b/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp new file mode 100644 index 000000000000..bee081aec067 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp @@ -0,0 +1,897 @@ +//===- lib/FileFormat/MachO/ArchHandler_arm64.cpp -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + +using llvm::support::ulittle32_t; +using llvm::support::ulittle64_t; + +using llvm::support::little32_t; +using llvm::support::little64_t; + +class ArchHandler_arm64 : public ArchHandler { +public: + ArchHandler_arm64() = default; + ~ArchHandler_arm64() override = default; + + const Registry::KindStrings *kindStrings() override { return _sKindStrings; } + + Reference::KindArch kindArch() override { + return Reference::KindArch::AArch64; + } + + /// Used by GOTPass to locate GOT References + bool isGOTAccess(const Reference &ref, bool &canBypassGOT) override { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::AArch64); + switch (ref.kindValue()) { + case gotPage21: + case gotOffset12: + canBypassGOT = true; + return true; + case delta32ToGOT: + case unwindCIEToPersonalityFunction: + case imageOffsetGot: + canBypassGOT = false; + return true; + default: + return false; + } + } + + /// Used by GOTPass to update GOT References. + void updateReferenceToGOT(const Reference *ref, bool targetNowGOT) override { + // If GOT slot was instantiated, transform: + // gotPage21/gotOffset12 -> page21/offset12scale8 + // If GOT slot optimized away, transform: + // gotPage21/gotOffset12 -> page21/addOffset12 + assert(ref->kindNamespace() == Reference::KindNamespace::mach_o); + assert(ref->kindArch() == Reference::KindArch::AArch64); + switch (ref->kindValue()) { + case gotPage21: + const_cast(ref)->setKindValue(page21); + break; + case gotOffset12: + const_cast(ref)->setKindValue(targetNowGOT ? + offset12scale8 : addOffset12); + break; + case delta32ToGOT: + const_cast(ref)->setKindValue(delta32); + break; + case imageOffsetGot: + const_cast(ref)->setKindValue(imageOffset); + break; + default: + llvm_unreachable("Not a GOT reference"); + } + } + + const StubInfo &stubInfo() override { return _sStubInfo; } + + bool isCallSite(const Reference &) override; + bool isNonCallBranch(const Reference &) override { + return false; + } + + bool isPointer(const Reference &) override; + bool isPairedReloc(const normalized::Relocation &) override; + + bool needsCompactUnwind() override { + return true; + } + Reference::KindValue imageOffsetKind() override { + return imageOffset; + } + Reference::KindValue imageOffsetKindIndirect() override { + return imageOffsetGot; + } + + Reference::KindValue unwindRefToPersonalityFunctionKind() override { + return unwindCIEToPersonalityFunction; + } + + Reference::KindValue unwindRefToCIEKind() override { + return negDelta32; + } + + Reference::KindValue unwindRefToFunctionKind() override { + return unwindFDEToFunction; + } + + Reference::KindValue unwindRefToEhFrameKind() override { + return unwindInfoToEhFrame; + } + + Reference::KindValue pointerKind() override { + return pointer64; + } + + Reference::KindValue lazyImmediateLocationKind() override { + return lazyImmediateLocation; + } + + uint32_t dwarfCompactUnwindType() override { + return 0x03000000; + } + + llvm::Error getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + llvm::Error + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + + bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) override { + return (atom->contentType() == DefinedAtom::typeCString); + } + + void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + llvm::MutableArrayRef atomContentBuffer) override; + + void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) override; + +private: + static const Registry::KindStrings _sKindStrings[]; + static const StubInfo _sStubInfo; + + enum Arm64Kind : Reference::KindValue { + invalid, /// for error condition + + // Kinds found in mach-o .o files: + branch26, /// ex: bl _foo + page21, /// ex: adrp x1, _foo@PAGE + offset12, /// ex: ldrb w0, [x1, _foo@PAGEOFF] + offset12scale2, /// ex: ldrs w0, [x1, _foo@PAGEOFF] + offset12scale4, /// ex: ldr w0, [x1, _foo@PAGEOFF] + offset12scale8, /// ex: ldr x0, [x1, _foo@PAGEOFF] + offset12scale16, /// ex: ldr q0, [x1, _foo@PAGEOFF] + gotPage21, /// ex: adrp x1, _foo@GOTPAGE + gotOffset12, /// ex: ldr w0, [x1, _foo@GOTPAGEOFF] + tlvPage21, /// ex: adrp x1, _foo@TLVPAGE + tlvOffset12, /// ex: ldr w0, [x1, _foo@TLVPAGEOFF] + + pointer64, /// ex: .quad _foo + delta64, /// ex: .quad _foo - . + delta32, /// ex: .long _foo - . + negDelta32, /// ex: .long . - _foo + pointer64ToGOT, /// ex: .quad _foo@GOT + delta32ToGOT, /// ex: .long _foo@GOT - . + + // Kinds introduced by Passes: + addOffset12, /// Location contains LDR to change into ADD. + lazyPointer, /// Location contains a lazy pointer. + lazyImmediateLocation, /// Location contains immediate value used in stub. + imageOffset, /// Location contains offset of atom in final image + imageOffsetGot, /// Location contains offset of GOT entry for atom in + /// final image (typically personality function). + unwindCIEToPersonalityFunction, /// Nearly delta32ToGOT, but cannot be + /// rematerialized in relocatable object + /// (yay for implicit contracts!). + unwindFDEToFunction, /// Nearly delta64, but cannot be rematerialized in + /// relocatable object (yay for implicit contracts!). + unwindInfoToEhFrame, /// Fix low 24 bits of compact unwind encoding to + /// refer to __eh_frame entry. + }; + + void applyFixupFinal(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress, uint64_t imageBaseAddress, + FindAddressForAtom findSectionAddress); + + void applyFixupRelocatable(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress, bool targetUnnamed); + + // Utility functions for inspecting/updating instructions. + static uint32_t setDisplacementInBranch26(uint32_t instr, int32_t disp); + static uint32_t setDisplacementInADRP(uint32_t instr, int64_t disp); + static Arm64Kind offset12KindFromInstruction(uint32_t instr); + static uint32_t setImm12(uint32_t instr, uint32_t offset); +}; + +const Registry::KindStrings ArchHandler_arm64::_sKindStrings[] = { + LLD_KIND_STRING_ENTRY(invalid), + LLD_KIND_STRING_ENTRY(branch26), + LLD_KIND_STRING_ENTRY(page21), + LLD_KIND_STRING_ENTRY(offset12), + LLD_KIND_STRING_ENTRY(offset12scale2), + LLD_KIND_STRING_ENTRY(offset12scale4), + LLD_KIND_STRING_ENTRY(offset12scale8), + LLD_KIND_STRING_ENTRY(offset12scale16), + LLD_KIND_STRING_ENTRY(gotPage21), + LLD_KIND_STRING_ENTRY(gotOffset12), + LLD_KIND_STRING_ENTRY(tlvPage21), + LLD_KIND_STRING_ENTRY(tlvOffset12), + LLD_KIND_STRING_ENTRY(pointer64), + LLD_KIND_STRING_ENTRY(delta64), + LLD_KIND_STRING_ENTRY(delta32), + LLD_KIND_STRING_ENTRY(negDelta32), + LLD_KIND_STRING_ENTRY(pointer64ToGOT), + LLD_KIND_STRING_ENTRY(delta32ToGOT), + + LLD_KIND_STRING_ENTRY(addOffset12), + LLD_KIND_STRING_ENTRY(lazyPointer), + LLD_KIND_STRING_ENTRY(lazyImmediateLocation), + LLD_KIND_STRING_ENTRY(imageOffset), + LLD_KIND_STRING_ENTRY(imageOffsetGot), + LLD_KIND_STRING_ENTRY(unwindCIEToPersonalityFunction), + LLD_KIND_STRING_ENTRY(unwindFDEToFunction), + LLD_KIND_STRING_ENTRY(unwindInfoToEhFrame), + + LLD_KIND_STRING_END +}; + +const ArchHandler::StubInfo ArchHandler_arm64::_sStubInfo = { + "dyld_stub_binder", + + // Lazy pointer references + { Reference::KindArch::AArch64, pointer64, 0, 0 }, + { Reference::KindArch::AArch64, lazyPointer, 0, 0 }, + + // GOT pointer to dyld_stub_binder + { Reference::KindArch::AArch64, pointer64, 0, 0 }, + + // arm64 code alignment 2^1 + 1, + + // Stub size and code + 12, + { 0x10, 0x00, 0x00, 0x90, // ADRP X16, lazy_pointer@page + 0x10, 0x02, 0x40, 0xF9, // LDR X16, [X16, lazy_pointer@pageoff] + 0x00, 0x02, 0x1F, 0xD6 }, // BR X16 + { Reference::KindArch::AArch64, page21, 0, 0 }, + { true, offset12scale8, 4, 0 }, + + // Stub Helper size and code + 12, + { 0x50, 0x00, 0x00, 0x18, // LDR W16, L0 + 0x00, 0x00, 0x00, 0x14, // LDR B helperhelper + 0x00, 0x00, 0x00, 0x00 }, // L0: .long 0 + { Reference::KindArch::AArch64, lazyImmediateLocation, 8, 0 }, + { Reference::KindArch::AArch64, branch26, 4, 0 }, + + // Stub helper image cache content type + DefinedAtom::typeGOT, + + // Stub Helper-Common size and code + 24, + // Stub helper alignment + 2, + { 0x11, 0x00, 0x00, 0x90, // ADRP X17, dyld_ImageLoaderCache@page + 0x31, 0x02, 0x00, 0x91, // ADD X17, X17, dyld_ImageLoaderCache@pageoff + 0xF0, 0x47, 0xBF, 0xA9, // STP X16/X17, [SP, #-16]! + 0x10, 0x00, 0x00, 0x90, // ADRP X16, _fast_lazy_bind@page + 0x10, 0x02, 0x40, 0xF9, // LDR X16, [X16,_fast_lazy_bind@pageoff] + 0x00, 0x02, 0x1F, 0xD6 }, // BR X16 + { Reference::KindArch::AArch64, page21, 0, 0 }, + { true, offset12, 4, 0 }, + { Reference::KindArch::AArch64, page21, 12, 0 }, + { true, offset12scale8, 16, 0 } +}; + +bool ArchHandler_arm64::isCallSite(const Reference &ref) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::AArch64); + return (ref.kindValue() == branch26); +} + +bool ArchHandler_arm64::isPointer(const Reference &ref) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::AArch64); + Reference::KindValue kind = ref.kindValue(); + return (kind == pointer64); +} + +bool ArchHandler_arm64::isPairedReloc(const Relocation &r) { + return ((r.type == ARM64_RELOC_ADDEND) || (r.type == ARM64_RELOC_SUBTRACTOR)); +} + +uint32_t ArchHandler_arm64::setDisplacementInBranch26(uint32_t instr, + int32_t displacement) { + assert((displacement <= 134217727) && (displacement > (-134217728)) && + "arm64 branch out of range"); + return (instr & 0xFC000000) | ((uint32_t)(displacement >> 2) & 0x03FFFFFF); +} + +uint32_t ArchHandler_arm64::setDisplacementInADRP(uint32_t instruction, + int64_t displacement) { + assert((displacement <= 0x100000000LL) && (displacement > (-0x100000000LL)) && + "arm64 ADRP out of range"); + assert(((instruction & 0x9F000000) == 0x90000000) && + "reloc not on ADRP instruction"); + uint32_t immhi = (displacement >> 9) & (0x00FFFFE0); + uint32_t immlo = (displacement << 17) & (0x60000000); + return (instruction & 0x9F00001F) | immlo | immhi; +} + +ArchHandler_arm64::Arm64Kind +ArchHandler_arm64::offset12KindFromInstruction(uint32_t instruction) { + if (instruction & 0x08000000) { + switch ((instruction >> 30) & 0x3) { + case 0: + if ((instruction & 0x04800000) == 0x04800000) + return offset12scale16; + return offset12; + case 1: + return offset12scale2; + case 2: + return offset12scale4; + case 3: + return offset12scale8; + } + } + return offset12; +} + +uint32_t ArchHandler_arm64::setImm12(uint32_t instruction, uint32_t offset) { + assert(((offset & 0xFFFFF000) == 0) && "imm12 offset out of range"); + uint32_t imm12 = offset << 10; + return (instruction & 0xFFC003FF) | imm12; +} + +llvm::Error ArchHandler_arm64::getReferenceInfo( + const Relocation &reloc, const DefinedAtom *inAtom, uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, + const lld::Atom **target, Reference::Addend *addend) { + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + switch (relocPattern(reloc)) { + case ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4: + // ex: bl _foo + *kind = branch26; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error::success(); + case ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4: + // ex: adrp x1, _foo@PAGE + *kind = page21; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error::success(); + case ARM64_RELOC_PAGEOFF12 | rExtern | rLength4: + // ex: ldr x0, [x1, _foo@PAGEOFF] + *kind = offset12KindFromInstruction(*(const little32_t *)fixupContent); + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error::success(); + case ARM64_RELOC_GOT_LOAD_PAGE21 | rPcRel | rExtern | rLength4: + // ex: adrp x1, _foo@GOTPAGE + *kind = gotPage21; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error::success(); + case ARM64_RELOC_GOT_LOAD_PAGEOFF12 | rExtern | rLength4: + // ex: ldr x0, [x1, _foo@GOTPAGEOFF] + *kind = gotOffset12; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error::success(); + case ARM64_RELOC_TLVP_LOAD_PAGE21 | rPcRel | rExtern | rLength4: + // ex: adrp x1, _foo@TLVPAGE + *kind = tlvPage21; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error::success(); + case ARM64_RELOC_TLVP_LOAD_PAGEOFF12 | rExtern | rLength4: + // ex: ldr x0, [x1, _foo@TLVPAGEOFF] + *kind = tlvOffset12; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error::success(); + case ARM64_RELOC_UNSIGNED | rExtern | rLength8: + // ex: .quad _foo + N + *kind = pointer64; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = *(const little64_t *)fixupContent; + return llvm::Error::success(); + case ARM64_RELOC_UNSIGNED | rLength8: + // ex: .quad Lfoo + N + *kind = pointer64; + return atomFromAddress(reloc.symbol, *(const little64_t *)fixupContent, + target, addend); + case ARM64_RELOC_POINTER_TO_GOT | rExtern | rLength8: + // ex: .quad _foo@GOT + *kind = pointer64ToGOT; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error::success(); + case ARM64_RELOC_POINTER_TO_GOT | rPcRel | rExtern | rLength4: + // ex: .long _foo@GOT - . + + // If we are in an .eh_frame section, then the kind of the relocation should + // not be delta32ToGOT. It may instead be unwindCIEToPersonalityFunction. + if (inAtom->contentType() == DefinedAtom::typeCFI) + *kind = unwindCIEToPersonalityFunction; + else + *kind = delta32ToGOT; + + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error::success(); + default: + return llvm::make_error("unsupported arm64 relocation type"); + } +} + +llvm::Error ArchHandler_arm64::getPairReferenceInfo( + const normalized::Relocation &reloc1, const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, uint32_t offsetInAtom, uint64_t fixupAddress, + bool swap, bool scatterable, FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, + const lld::Atom **target, Reference::Addend *addend) { + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + switch (relocPattern(reloc1) << 16 | relocPattern(reloc2)) { + case ((ARM64_RELOC_ADDEND | rLength4) << 16 | + ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4): + // ex: bl _foo+8 + *kind = branch26; + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = reloc1.symbol; + return llvm::Error::success(); + case ((ARM64_RELOC_ADDEND | rLength4) << 16 | + ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4): + // ex: adrp x1, _foo@PAGE + *kind = page21; + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = reloc1.symbol; + return llvm::Error::success(); + case ((ARM64_RELOC_ADDEND | rLength4) << 16 | + ARM64_RELOC_PAGEOFF12 | rExtern | rLength4): { + // ex: ldr w0, [x1, _foo@PAGEOFF] + uint32_t cont32 = (int32_t)*(const little32_t *)fixupContent; + *kind = offset12KindFromInstruction(cont32); + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = reloc1.symbol; + return llvm::Error::success(); + } + case ((ARM64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 | + ARM64_RELOC_UNSIGNED | rExtern | rLength8): + // ex: .quad _foo - . + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + + // If we are in an .eh_frame section, then the kind of the relocation should + // not be delta64. It may instead be unwindFDEToFunction. + if (inAtom->contentType() == DefinedAtom::typeCFI) + *kind = unwindFDEToFunction; + else + *kind = delta64; + + // The offsets of the 2 relocations must match + if (reloc1.offset != reloc2.offset) + return llvm::make_error( + "paired relocs must have the same offset"); + *addend = (int64_t)*(const little64_t *)fixupContent + offsetInAtom; + return llvm::Error::success(); + case ((ARM64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 | + ARM64_RELOC_UNSIGNED | rExtern | rLength4): + // ex: .quad _foo - . + *kind = delta32; + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = (int32_t)*(const little32_t *)fixupContent + offsetInAtom; + return llvm::Error::success(); + default: + return llvm::make_error("unsupported arm64 relocation pair"); + } +} + +void ArchHandler_arm64::generateAtomContent( + const DefinedAtom &atom, bool relocatable, FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, uint64_t imageBaseAddress, + llvm::MutableArrayRef atomContentBuffer) { + // Copy raw bytes. + std::copy(atom.rawContent().begin(), atom.rawContent().end(), + atomContentBuffer.begin()); + // Apply fix-ups. +#ifndef NDEBUG + if (atom.begin() != atom.end()) { + DEBUG_WITH_TYPE("atom-content", llvm::dbgs() + << "Applying fixups to atom:\n" + << " address=" + << llvm::format(" 0x%09lX", &atom) + << ", file=#" + << atom.file().ordinal() + << ", atom=#" + << atom.ordinal() + << ", name=" + << atom.name() + << ", type=" + << atom.contentType() + << "\n"); + } +#endif + for (const Reference *ref : atom) { + uint32_t offset = ref->offsetInAtom(); + const Atom *target = ref->target(); + bool targetUnnamed = target->name().empty(); + uint64_t targetAddress = 0; + if (isa(target)) + targetAddress = findAddress(*target); + uint64_t atomAddress = findAddress(atom); + uint64_t fixupAddress = atomAddress + offset; + if (relocatable) { + applyFixupRelocatable(*ref, &atomContentBuffer[offset], fixupAddress, + targetAddress, atomAddress, targetUnnamed); + } else { + applyFixupFinal(*ref, &atomContentBuffer[offset], fixupAddress, + targetAddress, atomAddress, imageBaseAddress, + findSectionAddress); + } + } +} + +void ArchHandler_arm64::applyFixupFinal(const Reference &ref, uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, + uint64_t imageBaseAddress, + FindAddressForAtom findSectionAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::AArch64); + ulittle32_t *loc32 = reinterpret_cast(loc); + ulittle64_t *loc64 = reinterpret_cast(loc); + int32_t displacement; + uint32_t instruction; + uint32_t value32; + uint32_t value64; + switch (static_cast(ref.kindValue())) { + case branch26: + displacement = (targetAddress - fixupAddress) + ref.addend(); + *loc32 = setDisplacementInBranch26(*loc32, displacement); + return; + case page21: + case gotPage21: + case tlvPage21: + displacement = + ((targetAddress + ref.addend()) & (-4096)) - (fixupAddress & (-4096)); + *loc32 = setDisplacementInADRP(*loc32, displacement); + return; + case offset12: + case gotOffset12: + case tlvOffset12: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + *loc32 = setImm12(*loc32, displacement); + return; + case offset12scale2: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + assert(((displacement & 0x1) == 0) && + "scaled imm12 not accessing 2-byte aligneds"); + *loc32 = setImm12(*loc32, displacement >> 1); + return; + case offset12scale4: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + assert(((displacement & 0x3) == 0) && + "scaled imm12 not accessing 4-byte aligned"); + *loc32 = setImm12(*loc32, displacement >> 2); + return; + case offset12scale8: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + assert(((displacement & 0x7) == 0) && + "scaled imm12 not accessing 8-byte aligned"); + *loc32 = setImm12(*loc32, displacement >> 3); + return; + case offset12scale16: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + assert(((displacement & 0xF) == 0) && + "scaled imm12 not accessing 16-byte aligned"); + *loc32 = setImm12(*loc32, displacement >> 4); + return; + case addOffset12: + instruction = *loc32; + assert(((instruction & 0xFFC00000) == 0xF9400000) && + "GOT reloc is not an LDR instruction"); + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + value32 = 0x91000000 | (instruction & 0x000003FF); + instruction = setImm12(value32, displacement); + *loc32 = instruction; + return; + case pointer64: + case pointer64ToGOT: + *loc64 = targetAddress + ref.addend(); + return; + case delta64: + case unwindFDEToFunction: + *loc64 = (targetAddress - fixupAddress) + ref.addend(); + return; + case delta32: + case delta32ToGOT: + case unwindCIEToPersonalityFunction: + *loc32 = (targetAddress - fixupAddress) + ref.addend(); + return; + case negDelta32: + *loc32 = fixupAddress - targetAddress + ref.addend(); + return; + case lazyPointer: + // Do nothing + return; + case lazyImmediateLocation: + *loc32 = ref.addend(); + return; + case imageOffset: + *loc32 = (targetAddress - imageBaseAddress) + ref.addend(); + return; + case imageOffsetGot: + llvm_unreachable("imageOffsetGot should have been changed to imageOffset"); + break; + case unwindInfoToEhFrame: + value64 = targetAddress - findSectionAddress(*ref.target()) + ref.addend(); + assert(value64 < 0xffffffU && "offset in __eh_frame too large"); + *loc32 = (*loc32 & 0xff000000U) | value64; + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("invalid arm64 Reference Kind"); +} + +void ArchHandler_arm64::applyFixupRelocatable(const Reference &ref, + uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, + bool targetUnnamed) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::AArch64); + ulittle32_t *loc32 = reinterpret_cast(loc); + ulittle64_t *loc64 = reinterpret_cast(loc); + switch (static_cast(ref.kindValue())) { + case branch26: + *loc32 = setDisplacementInBranch26(*loc32, 0); + return; + case page21: + case gotPage21: + case tlvPage21: + *loc32 = setDisplacementInADRP(*loc32, 0); + return; + case offset12: + case offset12scale2: + case offset12scale4: + case offset12scale8: + case offset12scale16: + case gotOffset12: + case tlvOffset12: + *loc32 = setImm12(*loc32, 0); + return; + case pointer64: + if (targetUnnamed) + *loc64 = targetAddress + ref.addend(); + else + *loc64 = ref.addend(); + return; + case delta64: + *loc64 = ref.addend() + inAtomAddress - fixupAddress; + return; + case unwindFDEToFunction: + // We don't emit unwindFDEToFunction in -r mode as they are implicitly + // generated from the data in the __eh_frame section. So here we need + // to use the targetAddress so that we can generate the full relocation + // when we parse again later. + *loc64 = targetAddress - fixupAddress; + return; + case delta32: + *loc32 = ref.addend() + inAtomAddress - fixupAddress; + return; + case negDelta32: + // We don't emit negDelta32 in -r mode as they are implicitly + // generated from the data in the __eh_frame section. So here we need + // to use the targetAddress so that we can generate the full relocation + // when we parse again later. + *loc32 = fixupAddress - targetAddress + ref.addend(); + return; + case pointer64ToGOT: + *loc64 = 0; + return; + case delta32ToGOT: + *loc32 = inAtomAddress - fixupAddress; + return; + case unwindCIEToPersonalityFunction: + // We don't emit unwindCIEToPersonalityFunction in -r mode as they are + // implicitly generated from the data in the __eh_frame section. So here we + // need to use the targetAddress so that we can generate the full relocation + // when we parse again later. + *loc32 = targetAddress - fixupAddress; + return; + case addOffset12: + llvm_unreachable("lazy reference kind implies GOT pass was run"); + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + case imageOffset: + case imageOffsetGot: + case unwindInfoToEhFrame: + llvm_unreachable("fixup implies __unwind_info"); + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("unknown arm64 Reference Kind"); +} + +void ArchHandler_arm64::appendSectionRelocations( + const DefinedAtom &atom, uint64_t atomSectionOffset, const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, normalized::Relocations &relocs) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::AArch64); + uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); + switch (static_cast(ref.kindValue())) { + case branch26: + if (ref.addend()) { + appendReloc(relocs, sectionOffset, ref.addend(), 0, + ARM64_RELOC_ADDEND | rLength4); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4); + } else { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4); + } + return; + case page21: + if (ref.addend()) { + appendReloc(relocs, sectionOffset, ref.addend(), 0, + ARM64_RELOC_ADDEND | rLength4); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4); + } else { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4); + } + return; + case offset12: + case offset12scale2: + case offset12scale4: + case offset12scale8: + case offset12scale16: + if (ref.addend()) { + appendReloc(relocs, sectionOffset, ref.addend(), 0, + ARM64_RELOC_ADDEND | rLength4); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_PAGEOFF12 | rExtern | rLength4); + } else { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_PAGEOFF12 | rExtern | rLength4); + } + return; + case gotPage21: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_GOT_LOAD_PAGE21 | rPcRel | rExtern | rLength4); + return; + case gotOffset12: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_GOT_LOAD_PAGEOFF12 | rExtern | rLength4); + return; + case tlvPage21: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_TLVP_LOAD_PAGE21 | rPcRel | rExtern | rLength4); + return; + case tlvOffset12: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_TLVP_LOAD_PAGEOFF12 | rExtern | rLength4); + return; + case pointer64: + if (ref.target()->name().empty()) + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + ARM64_RELOC_UNSIGNED | rLength8); + else + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_UNSIGNED | rExtern | rLength8); + return; + case delta64: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + ARM64_RELOC_SUBTRACTOR | rExtern | rLength8); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_UNSIGNED | rExtern | rLength8); + return; + case delta32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + ARM64_RELOC_SUBTRACTOR | rExtern | rLength4 ); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_UNSIGNED | rExtern | rLength4 ); + return; + case pointer64ToGOT: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_POINTER_TO_GOT | rExtern | rLength8); + return; + case delta32ToGOT: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_POINTER_TO_GOT | rPcRel | rExtern | rLength4); + return; + case addOffset12: + llvm_unreachable("lazy reference kind implies GOT pass was run"); + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + case imageOffset: + case imageOffsetGot: + llvm_unreachable("deltas from mach_header can only be in final images"); + case unwindCIEToPersonalityFunction: + case unwindFDEToFunction: + case unwindInfoToEhFrame: + case negDelta32: + // Do nothing. + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("unknown arm64 Reference Kind"); +} + +std::unique_ptr ArchHandler::create_arm64() { + return std::unique_ptr(new ArchHandler_arm64()); +} + +} // namespace mach_o +} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp b/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp new file mode 100644 index 000000000000..6ea8e8c42e80 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp @@ -0,0 +1,643 @@ +//===- lib/FileFormat/MachO/ArchHandler_x86.cpp ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + +using llvm::support::ulittle16_t; +using llvm::support::ulittle32_t; + +using llvm::support::little16_t; +using llvm::support::little32_t; + +class ArchHandler_x86 : public ArchHandler { +public: + ArchHandler_x86() = default; + ~ArchHandler_x86() override = default; + + const Registry::KindStrings *kindStrings() override { return _sKindStrings; } + + Reference::KindArch kindArch() override { return Reference::KindArch::x86; } + + const StubInfo &stubInfo() override { return _sStubInfo; } + bool isCallSite(const Reference &) override; + bool isNonCallBranch(const Reference &) override { + return false; + } + + bool isPointer(const Reference &) override; + bool isPairedReloc(const normalized::Relocation &) override; + + bool needsCompactUnwind() override { + return false; + } + + Reference::KindValue imageOffsetKind() override { + return invalid; + } + + Reference::KindValue imageOffsetKindIndirect() override { + return invalid; + } + + Reference::KindValue unwindRefToPersonalityFunctionKind() override { + return invalid; + } + + Reference::KindValue unwindRefToCIEKind() override { + return negDelta32; + } + + Reference::KindValue unwindRefToFunctionKind() override{ + return delta32; + } + + Reference::KindValue lazyImmediateLocationKind() override { + return lazyImmediateLocation; + } + + Reference::KindValue unwindRefToEhFrameKind() override { + return invalid; + } + + Reference::KindValue pointerKind() override { + return invalid; + } + + uint32_t dwarfCompactUnwindType() override { + return 0x04000000U; + } + + llvm::Error getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + llvm::Error + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + + void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + llvm::MutableArrayRef atomContentBuffer) override; + + void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) override; + + bool isDataInCodeTransition(Reference::KindValue refKind) override { + return refKind == modeCode || refKind == modeData; + } + + Reference::KindValue dataInCodeTransitionStart( + const MachODefinedAtom &atom) override { + return modeData; + } + + Reference::KindValue dataInCodeTransitionEnd( + const MachODefinedAtom &atom) override { + return modeCode; + } + +private: + static const Registry::KindStrings _sKindStrings[]; + static const StubInfo _sStubInfo; + + enum X86Kind : Reference::KindValue { + invalid, /// for error condition + + modeCode, /// Content starting at this offset is code. + modeData, /// Content starting at this offset is data. + + // Kinds found in mach-o .o files: + branch32, /// ex: call _foo + branch16, /// ex: callw _foo + abs32, /// ex: movl _foo, %eax + funcRel32, /// ex: movl _foo-L1(%eax), %eax + pointer32, /// ex: .long _foo + delta32, /// ex: .long _foo - . + negDelta32, /// ex: .long . - _foo + + // Kinds introduced by Passes: + lazyPointer, /// Location contains a lazy pointer. + lazyImmediateLocation, /// Location contains immediate value used in stub. + }; + + static bool useExternalRelocationTo(const Atom &target); + + void applyFixupFinal(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress); + + void applyFixupRelocatable(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress); +}; + +//===----------------------------------------------------------------------===// +// ArchHandler_x86 +//===----------------------------------------------------------------------===// + +const Registry::KindStrings ArchHandler_x86::_sKindStrings[] = { + LLD_KIND_STRING_ENTRY(invalid), + LLD_KIND_STRING_ENTRY(modeCode), + LLD_KIND_STRING_ENTRY(modeData), + LLD_KIND_STRING_ENTRY(branch32), + LLD_KIND_STRING_ENTRY(branch16), + LLD_KIND_STRING_ENTRY(abs32), + LLD_KIND_STRING_ENTRY(funcRel32), + LLD_KIND_STRING_ENTRY(pointer32), + LLD_KIND_STRING_ENTRY(delta32), + LLD_KIND_STRING_ENTRY(negDelta32), + LLD_KIND_STRING_ENTRY(lazyPointer), + LLD_KIND_STRING_ENTRY(lazyImmediateLocation), + LLD_KIND_STRING_END +}; + +const ArchHandler::StubInfo ArchHandler_x86::_sStubInfo = { + "dyld_stub_binder", + + // Lazy pointer references + { Reference::KindArch::x86, pointer32, 0, 0 }, + { Reference::KindArch::x86, lazyPointer, 0, 0 }, + + // GOT pointer to dyld_stub_binder + { Reference::KindArch::x86, pointer32, 0, 0 }, + + // x86 code alignment + 1, + + // Stub size and code + 6, + { 0xff, 0x25, 0x00, 0x00, 0x00, 0x00 }, // jmp *lazyPointer + { Reference::KindArch::x86, abs32, 2, 0 }, + { false, 0, 0, 0 }, + + // Stub Helper size and code + 10, + { 0x68, 0x00, 0x00, 0x00, 0x00, // pushl $lazy-info-offset + 0xE9, 0x00, 0x00, 0x00, 0x00 }, // jmp helperhelper + { Reference::KindArch::x86, lazyImmediateLocation, 1, 0 }, + { Reference::KindArch::x86, branch32, 6, 0 }, + + // Stub helper image cache content type + DefinedAtom::typeNonLazyPointer, + + // Stub Helper-Common size and code + 12, + // Stub helper alignment + 2, + { 0x68, 0x00, 0x00, 0x00, 0x00, // pushl $dyld_ImageLoaderCache + 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *_fast_lazy_bind + 0x90 }, // nop + { Reference::KindArch::x86, abs32, 1, 0 }, + { false, 0, 0, 0 }, + { Reference::KindArch::x86, abs32, 7, 0 }, + { false, 0, 0, 0 } +}; + +bool ArchHandler_x86::isCallSite(const Reference &ref) { + return (ref.kindValue() == branch32); +} + +bool ArchHandler_x86::isPointer(const Reference &ref) { + return (ref.kindValue() == pointer32); +} + +bool ArchHandler_x86::isPairedReloc(const Relocation &reloc) { + if (!reloc.scattered) + return false; + return (reloc.type == GENERIC_RELOC_LOCAL_SECTDIFF) || + (reloc.type == GENERIC_RELOC_SECTDIFF); +} + +llvm::Error +ArchHandler_x86::getReferenceInfo(const Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + DefinedAtom::ContentPermissions perms; + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + uint64_t targetAddress; + switch (relocPattern(reloc)) { + case GENERIC_RELOC_VANILLA | rPcRel | rExtern | rLength4: + // ex: call _foo (and _foo undefined) + *kind = branch32; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = fixupAddress + 4 + (int32_t)*(const little32_t *)fixupContent; + break; + case GENERIC_RELOC_VANILLA | rPcRel | rLength4: + // ex: call _foo (and _foo defined) + *kind = branch32; + targetAddress = + fixupAddress + 4 + (int32_t) * (const little32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + break; + case GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength4: + // ex: call _foo+n (and _foo defined) + *kind = branch32; + targetAddress = + fixupAddress + 4 + (int32_t) * (const little32_t *)fixupContent; + if (auto ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + *addend = targetAddress - reloc.value; + break; + case GENERIC_RELOC_VANILLA | rPcRel | rExtern | rLength2: + // ex: callw _foo (and _foo undefined) + *kind = branch16; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = fixupAddress + 2 + (int16_t)*(const little16_t *)fixupContent; + break; + case GENERIC_RELOC_VANILLA | rPcRel | rLength2: + // ex: callw _foo (and _foo defined) + *kind = branch16; + targetAddress = + fixupAddress + 2 + (int16_t) * (const little16_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + break; + case GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength2: + // ex: callw _foo+n (and _foo defined) + *kind = branch16; + targetAddress = + fixupAddress + 2 + (int16_t) * (const little16_t *)fixupContent; + if (auto ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + *addend = targetAddress - reloc.value; + break; + case GENERIC_RELOC_VANILLA | rExtern | rLength4: + // ex: movl _foo, %eax (and _foo undefined) + // ex: .long _foo (and _foo undefined) + perms = inAtom->permissions(); + *kind = + ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32 + : pointer32; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = *(const ulittle32_t *)fixupContent; + break; + case GENERIC_RELOC_VANILLA | rLength4: + // ex: movl _foo, %eax (and _foo defined) + // ex: .long _foo (and _foo defined) + perms = inAtom->permissions(); + *kind = + ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32 + : pointer32; + targetAddress = *(const ulittle32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + break; + case GENERIC_RELOC_VANILLA | rScattered | rLength4: + // ex: .long _foo+n (and _foo defined) + perms = inAtom->permissions(); + *kind = + ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32 + : pointer32; + if (auto ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + *addend = *(const ulittle32_t *)fixupContent - reloc.value; + break; + default: + return llvm::make_error("unsupported i386 relocation type"); + } + return llvm::Error::success(); +} + +llvm::Error +ArchHandler_x86::getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + bool scatterable, + FindAtomBySectionAndAddress atomFromAddr, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + DefinedAtom::ContentPermissions perms = inAtom->permissions(); + uint32_t fromAddress; + uint32_t toAddress; + uint32_t value; + const lld::Atom *fromTarget; + Reference::Addend offsetInTo; + Reference::Addend offsetInFrom; + switch (relocPattern(reloc1) << 16 | relocPattern(reloc2)) { + case ((GENERIC_RELOC_SECTDIFF | rScattered | rLength4) << 16 | + GENERIC_RELOC_PAIR | rScattered | rLength4): + case ((GENERIC_RELOC_LOCAL_SECTDIFF | rScattered | rLength4) << 16 | + GENERIC_RELOC_PAIR | rScattered | rLength4): + toAddress = reloc1.value; + fromAddress = reloc2.value; + value = *(const little32_t *)fixupContent; + if (auto ec = atomFromAddr(0, toAddress, target, &offsetInTo)) + return ec; + if (auto ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom)) + return ec; + if (fromTarget != inAtom) { + if (*target != inAtom) + return llvm::make_error( + "SECTDIFF relocation where neither target is in atom"); + *kind = negDelta32; + *addend = toAddress - value - fromAddress; + *target = fromTarget; + } else { + if ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) { + // SECTDIFF relocations are used in i386 codegen where the function + // prolog does a CALL to the next instruction which POPs the return + // address into EBX which becomes the pic-base register. The POP + // instruction is label the used for the subtrahend in expressions. + // The funcRel32 kind represents the 32-bit delta to some symbol from + // the start of the function (atom) containing the funcRel32. + *kind = funcRel32; + uint32_t ta = fromAddress + value - toAddress; + *addend = ta - offsetInFrom; + } else { + *kind = delta32; + *addend = fromAddress + value - toAddress; + } + } + return llvm::Error::success(); + break; + default: + return llvm::make_error("unsupported i386 relocation type"); + } +} + +void ArchHandler_x86::generateAtomContent(const DefinedAtom &atom, + bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + llvm::MutableArrayRef atomContentBuffer) { + // Copy raw bytes. + std::copy(atom.rawContent().begin(), atom.rawContent().end(), + atomContentBuffer.begin()); + // Apply fix-ups. + for (const Reference *ref : atom) { + uint32_t offset = ref->offsetInAtom(); + const Atom *target = ref->target(); + uint64_t targetAddress = 0; + if (isa(target)) + targetAddress = findAddress(*target); + uint64_t atomAddress = findAddress(atom); + uint64_t fixupAddress = atomAddress + offset; + if (relocatable) { + applyFixupRelocatable(*ref, &atomContentBuffer[offset], + fixupAddress, targetAddress, + atomAddress); + } else { + applyFixupFinal(*ref, &atomContentBuffer[offset], + fixupAddress, targetAddress, + atomAddress); + } + } +} + +void ArchHandler_x86::applyFixupFinal(const Reference &ref, uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86); + ulittle32_t *loc32 = reinterpret_cast(loc); + switch (static_cast(ref.kindValue())) { + case branch32: + *loc32 = (targetAddress - (fixupAddress + 4)) + ref.addend(); + break; + case branch16: + *loc32 = (targetAddress - (fixupAddress + 2)) + ref.addend(); + break; + case pointer32: + case abs32: + *loc32 = targetAddress + ref.addend(); + break; + case funcRel32: + *loc32 = targetAddress - inAtomAddress + ref.addend(); + break; + case delta32: + *loc32 = targetAddress - fixupAddress + ref.addend(); + break; + case negDelta32: + *loc32 = fixupAddress - targetAddress + ref.addend(); + break; + case modeCode: + case modeData: + case lazyPointer: + // do nothing + break; + case lazyImmediateLocation: + *loc32 = ref.addend(); + break; + case invalid: + llvm_unreachable("invalid x86 Reference Kind"); + break; + } +} + +void ArchHandler_x86::applyFixupRelocatable(const Reference &ref, + uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86); + bool useExternalReloc = useExternalRelocationTo(*ref.target()); + ulittle16_t *loc16 = reinterpret_cast(loc); + ulittle32_t *loc32 = reinterpret_cast(loc); + switch (static_cast(ref.kindValue())) { + case branch32: + if (useExternalReloc) + *loc32 = ref.addend() - (fixupAddress + 4); + else + *loc32 =(targetAddress - (fixupAddress+4)) + ref.addend(); + break; + case branch16: + if (useExternalReloc) + *loc16 = ref.addend() - (fixupAddress + 2); + else + *loc16 = (targetAddress - (fixupAddress+2)) + ref.addend(); + break; + case pointer32: + case abs32: + *loc32 = targetAddress + ref.addend(); + break; + case funcRel32: + *loc32 = targetAddress - inAtomAddress + ref.addend(); // FIXME + break; + case delta32: + *loc32 = targetAddress - fixupAddress + ref.addend(); + break; + case negDelta32: + *loc32 = fixupAddress - targetAddress + ref.addend(); + break; + case modeCode: + case modeData: + case lazyPointer: + case lazyImmediateLocation: + // do nothing + break; + case invalid: + llvm_unreachable("invalid x86 Reference Kind"); + break; + } +} + +bool ArchHandler_x86::useExternalRelocationTo(const Atom &target) { + // Undefined symbols are referenced via external relocations. + if (isa(&target)) + return true; + if (const DefinedAtom *defAtom = dyn_cast(&target)) { + switch (defAtom->merge()) { + case DefinedAtom::mergeAsTentative: + // Tentative definitions are referenced via external relocations. + return true; + case DefinedAtom::mergeAsWeak: + case DefinedAtom::mergeAsWeakAndAddressUsed: + // Global weak-defs are referenced via external relocations. + return (defAtom->scope() == DefinedAtom::scopeGlobal); + default: + break; + } + } + // Everything else is reference via an internal relocation. + return false; +} + +void ArchHandler_x86::appendSectionRelocations( + const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86); + uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); + bool useExternalReloc = useExternalRelocationTo(*ref.target()); + switch (static_cast(ref.kindValue())) { + case modeCode: + case modeData: + break; + case branch32: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + GENERIC_RELOC_VANILLA | rExtern | rPcRel | rLength4); + } else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + GENERIC_RELOC_VANILLA | rPcRel | rLength4); + } + break; + case branch16: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + GENERIC_RELOC_VANILLA | rExtern | rPcRel | rLength2); + } else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength2); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + GENERIC_RELOC_VANILLA | rPcRel | rLength2); + } + break; + case pointer32: + case abs32: + if (useExternalReloc) + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + GENERIC_RELOC_VANILLA | rExtern | rLength4); + else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_VANILLA | rScattered | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + GENERIC_RELOC_VANILLA | rLength4); + } + break; + case funcRel32: + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_SECTDIFF | rScattered | rLength4); + appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) - ref.addend(), + GENERIC_RELOC_PAIR | rScattered | rLength4); + break; + case delta32: + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_SECTDIFF | rScattered | rLength4); + appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) + + ref.offsetInAtom(), + GENERIC_RELOC_PAIR | rScattered | rLength4); + break; + case negDelta32: + appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) + + ref.offsetInAtom(), + GENERIC_RELOC_SECTDIFF | rScattered | rLength4); + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_PAIR | rScattered | rLength4); + break; + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + break; + case invalid: + llvm_unreachable("unknown x86 Reference Kind"); + break; + } +} + +std::unique_ptr ArchHandler::create_x86() { + return std::unique_ptr(new ArchHandler_x86()); +} + +} // namespace mach_o +} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp b/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp new file mode 100644 index 000000000000..687407049d4b --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp @@ -0,0 +1,899 @@ +//===- lib/FileFormat/MachO/ArchHandler_x86_64.cpp ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + +using llvm::support::ulittle32_t; +using llvm::support::ulittle64_t; + +using llvm::support::little32_t; +using llvm::support::little64_t; + +class ArchHandler_x86_64 : public ArchHandler { +public: + ArchHandler_x86_64() = default; + ~ArchHandler_x86_64() override = default; + + const Registry::KindStrings *kindStrings() override { return _sKindStrings; } + + Reference::KindArch kindArch() override { + return Reference::KindArch::x86_64; + } + + /// Used by GOTPass to locate GOT References + bool isGOTAccess(const Reference &ref, bool &canBypassGOT) override { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::x86_64); + switch (ref.kindValue()) { + case ripRel32GotLoad: + canBypassGOT = true; + return true; + case ripRel32Got: + canBypassGOT = false; + return true; + case imageOffsetGot: + canBypassGOT = false; + return true; + default: + return false; + } + } + + bool isTLVAccess(const Reference &ref) const override { + assert(ref.kindNamespace() == Reference::KindNamespace::mach_o); + assert(ref.kindArch() == Reference::KindArch::x86_64); + return ref.kindValue() == ripRel32Tlv; + } + + void updateReferenceToTLV(const Reference *ref) override { + assert(ref->kindNamespace() == Reference::KindNamespace::mach_o); + assert(ref->kindArch() == Reference::KindArch::x86_64); + assert(ref->kindValue() == ripRel32Tlv); + const_cast(ref)->setKindValue(ripRel32); + } + + /// Used by GOTPass to update GOT References + void updateReferenceToGOT(const Reference *ref, bool targetNowGOT) override { + assert(ref->kindNamespace() == Reference::KindNamespace::mach_o); + assert(ref->kindArch() == Reference::KindArch::x86_64); + + switch (ref->kindValue()) { + case ripRel32Got: + assert(targetNowGOT && "target must be GOT"); + LLVM_FALLTHROUGH; + case ripRel32GotLoad: + const_cast(ref) + ->setKindValue(targetNowGOT ? ripRel32 : ripRel32GotLoadNowLea); + break; + case imageOffsetGot: + const_cast(ref)->setKindValue(imageOffset); + break; + default: + llvm_unreachable("unknown GOT reference kind"); + } + } + + bool needsCompactUnwind() override { + return true; + } + + Reference::KindValue imageOffsetKind() override { + return imageOffset; + } + + Reference::KindValue imageOffsetKindIndirect() override { + return imageOffsetGot; + } + + Reference::KindValue unwindRefToPersonalityFunctionKind() override { + return ripRel32Got; + } + + Reference::KindValue unwindRefToCIEKind() override { + return negDelta32; + } + + Reference::KindValue unwindRefToFunctionKind() override{ + return unwindFDEToFunction; + } + + Reference::KindValue lazyImmediateLocationKind() override { + return lazyImmediateLocation; + } + + Reference::KindValue unwindRefToEhFrameKind() override { + return unwindInfoToEhFrame; + } + + Reference::KindValue pointerKind() override { + return pointer64; + } + + uint32_t dwarfCompactUnwindType() override { + return 0x04000000U; + } + + const StubInfo &stubInfo() override { return _sStubInfo; } + + bool isNonCallBranch(const Reference &) override { + return false; + } + + bool isCallSite(const Reference &) override; + bool isPointer(const Reference &) override; + bool isPairedReloc(const normalized::Relocation &) override; + + llvm::Error getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + llvm::Error + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + + bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) override { + return (atom->contentType() == DefinedAtom::typeCString); + } + + void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBase, + llvm::MutableArrayRef atomContentBuffer) override; + + void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) override; + + bool isDataInCodeTransition(Reference::KindValue refKind) override { + return refKind == modeCode || refKind == modeData; + } + + Reference::KindValue dataInCodeTransitionStart( + const MachODefinedAtom &atom) override { + return modeData; + } + + Reference::KindValue dataInCodeTransitionEnd( + const MachODefinedAtom &atom) override { + return modeCode; + } + +private: + static const Registry::KindStrings _sKindStrings[]; + static const StubInfo _sStubInfo; + + enum X86_64Kind: Reference::KindValue { + invalid, /// for error condition + + modeCode, /// Content starting at this offset is code. + modeData, /// Content starting at this offset is data. + + // Kinds found in mach-o .o files: + branch32, /// ex: call _foo + ripRel32, /// ex: movq _foo(%rip), %rax + ripRel32Minus1, /// ex: movb $0x12, _foo(%rip) + ripRel32Minus2, /// ex: movw $0x1234, _foo(%rip) + ripRel32Minus4, /// ex: movl $0x12345678, _foo(%rip) + ripRel32Anon, /// ex: movq L1(%rip), %rax + ripRel32Minus1Anon, /// ex: movb $0x12, L1(%rip) + ripRel32Minus2Anon, /// ex: movw $0x1234, L1(%rip) + ripRel32Minus4Anon, /// ex: movw $0x12345678, L1(%rip) + ripRel32GotLoad, /// ex: movq _foo@GOTPCREL(%rip), %rax + ripRel32Got, /// ex: pushq _foo@GOTPCREL(%rip) + ripRel32Tlv, /// ex: movq _foo@TLVP(%rip), %rdi + pointer64, /// ex: .quad _foo + pointer64Anon, /// ex: .quad L1 + delta64, /// ex: .quad _foo - . + delta32, /// ex: .long _foo - . + delta64Anon, /// ex: .quad L1 - . + delta32Anon, /// ex: .long L1 - . + negDelta64, /// ex: .quad . - _foo + negDelta32, /// ex: .long . - _foo + + // Kinds introduced by Passes: + ripRel32GotLoadNowLea, /// Target of GOT load is in linkage unit so + /// "movq _foo@GOTPCREL(%rip), %rax" can be changed + /// to "leaq _foo(%rip), %rax + lazyPointer, /// Location contains a lazy pointer. + lazyImmediateLocation, /// Location contains immediate value used in stub. + + imageOffset, /// Location contains offset of atom in final image + imageOffsetGot, /// Location contains offset of GOT entry for atom in + /// final image (typically personality function). + unwindFDEToFunction, /// Nearly delta64, but cannot be rematerialized in + /// relocatable object (yay for implicit contracts!). + unwindInfoToEhFrame, /// Fix low 24 bits of compact unwind encoding to + /// refer to __eh_frame entry. + tlvInitSectionOffset /// Location contains offset tlv init-value atom + /// within the __thread_data section. + }; + + Reference::KindValue kindFromReloc(const normalized::Relocation &reloc); + + void applyFixupFinal(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress, uint64_t imageBaseAddress, + FindAddressForAtom findSectionAddress); + + void applyFixupRelocatable(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress); +}; + +const Registry::KindStrings ArchHandler_x86_64::_sKindStrings[] = { + LLD_KIND_STRING_ENTRY(invalid), + LLD_KIND_STRING_ENTRY(modeCode), + LLD_KIND_STRING_ENTRY(modeData), + LLD_KIND_STRING_ENTRY(branch32), + LLD_KIND_STRING_ENTRY(ripRel32), + LLD_KIND_STRING_ENTRY(ripRel32Minus1), + LLD_KIND_STRING_ENTRY(ripRel32Minus2), + LLD_KIND_STRING_ENTRY(ripRel32Minus4), + LLD_KIND_STRING_ENTRY(ripRel32Anon), + LLD_KIND_STRING_ENTRY(ripRel32Minus1Anon), + LLD_KIND_STRING_ENTRY(ripRel32Minus2Anon), + LLD_KIND_STRING_ENTRY(ripRel32Minus4Anon), + LLD_KIND_STRING_ENTRY(ripRel32GotLoad), + LLD_KIND_STRING_ENTRY(ripRel32GotLoadNowLea), + LLD_KIND_STRING_ENTRY(ripRel32Got), + LLD_KIND_STRING_ENTRY(ripRel32Tlv), + LLD_KIND_STRING_ENTRY(lazyPointer), + LLD_KIND_STRING_ENTRY(lazyImmediateLocation), + LLD_KIND_STRING_ENTRY(pointer64), + LLD_KIND_STRING_ENTRY(pointer64Anon), + LLD_KIND_STRING_ENTRY(delta32), + LLD_KIND_STRING_ENTRY(delta64), + LLD_KIND_STRING_ENTRY(delta32Anon), + LLD_KIND_STRING_ENTRY(delta64Anon), + LLD_KIND_STRING_ENTRY(negDelta64), + LLD_KIND_STRING_ENTRY(negDelta32), + LLD_KIND_STRING_ENTRY(imageOffset), + LLD_KIND_STRING_ENTRY(imageOffsetGot), + LLD_KIND_STRING_ENTRY(unwindFDEToFunction), + LLD_KIND_STRING_ENTRY(unwindInfoToEhFrame), + LLD_KIND_STRING_ENTRY(tlvInitSectionOffset), + LLD_KIND_STRING_END +}; + +const ArchHandler::StubInfo ArchHandler_x86_64::_sStubInfo = { + "dyld_stub_binder", + + // Lazy pointer references + { Reference::KindArch::x86_64, pointer64, 0, 0 }, + { Reference::KindArch::x86_64, lazyPointer, 0, 0 }, + + // GOT pointer to dyld_stub_binder + { Reference::KindArch::x86_64, pointer64, 0, 0 }, + + // x86_64 code alignment 2^1 + 1, + + // Stub size and code + 6, + { 0xff, 0x25, 0x00, 0x00, 0x00, 0x00 }, // jmp *lazyPointer + { Reference::KindArch::x86_64, ripRel32, 2, 0 }, + { false, 0, 0, 0 }, + + // Stub Helper size and code + 10, + { 0x68, 0x00, 0x00, 0x00, 0x00, // pushq $lazy-info-offset + 0xE9, 0x00, 0x00, 0x00, 0x00 }, // jmp helperhelper + { Reference::KindArch::x86_64, lazyImmediateLocation, 1, 0 }, + { Reference::KindArch::x86_64, branch32, 6, 0 }, + + // Stub helper image cache content type + DefinedAtom::typeNonLazyPointer, + + // Stub Helper-Common size and code + 16, + // Stub helper alignment + 2, + { 0x4C, 0x8D, 0x1D, 0x00, 0x00, 0x00, 0x00, // leaq cache(%rip),%r11 + 0x41, 0x53, // push %r11 + 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *binder(%rip) + 0x90 }, // nop + { Reference::KindArch::x86_64, ripRel32, 3, 0 }, + { false, 0, 0, 0 }, + { Reference::KindArch::x86_64, ripRel32, 11, 0 }, + { false, 0, 0, 0 } + +}; + +bool ArchHandler_x86_64::isCallSite(const Reference &ref) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::x86_64); + return (ref.kindValue() == branch32); +} + +bool ArchHandler_x86_64::isPointer(const Reference &ref) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::x86_64); + Reference::KindValue kind = ref.kindValue(); + return (kind == pointer64 || kind == pointer64Anon); +} + +bool ArchHandler_x86_64::isPairedReloc(const Relocation &reloc) { + return (reloc.type == X86_64_RELOC_SUBTRACTOR); +} + +Reference::KindValue +ArchHandler_x86_64::kindFromReloc(const Relocation &reloc) { + switch(relocPattern(reloc)) { + case X86_64_RELOC_BRANCH | rPcRel | rExtern | rLength4: + return branch32; + case X86_64_RELOC_SIGNED | rPcRel | rExtern | rLength4: + return ripRel32; + case X86_64_RELOC_SIGNED | rPcRel | rLength4: + return ripRel32Anon; + case X86_64_RELOC_SIGNED_1 | rPcRel | rExtern | rLength4: + return ripRel32Minus1; + case X86_64_RELOC_SIGNED_1 | rPcRel | rLength4: + return ripRel32Minus1Anon; + case X86_64_RELOC_SIGNED_2 | rPcRel | rExtern | rLength4: + return ripRel32Minus2; + case X86_64_RELOC_SIGNED_2 | rPcRel | rLength4: + return ripRel32Minus2Anon; + case X86_64_RELOC_SIGNED_4 | rPcRel | rExtern | rLength4: + return ripRel32Minus4; + case X86_64_RELOC_SIGNED_4 | rPcRel | rLength4: + return ripRel32Minus4Anon; + case X86_64_RELOC_GOT_LOAD | rPcRel | rExtern | rLength4: + return ripRel32GotLoad; + case X86_64_RELOC_GOT | rPcRel | rExtern | rLength4: + return ripRel32Got; + case X86_64_RELOC_TLV | rPcRel | rExtern | rLength4: + return ripRel32Tlv; + case X86_64_RELOC_UNSIGNED | rExtern | rLength8: + return pointer64; + case X86_64_RELOC_UNSIGNED | rLength8: + return pointer64Anon; + default: + return invalid; + } +} + +llvm::Error +ArchHandler_x86_64::getReferenceInfo(const Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + *kind = kindFromReloc(reloc); + if (*kind == invalid) + return llvm::make_error("unknown type"); + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + uint64_t targetAddress; + switch (*kind) { + case branch32: + case ripRel32: + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = *(const little32_t *)fixupContent; + return llvm::Error::success(); + case ripRel32Minus1: + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = (int32_t)*(const little32_t *)fixupContent + 1; + return llvm::Error::success(); + case ripRel32Minus2: + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = (int32_t)*(const little32_t *)fixupContent + 2; + return llvm::Error::success(); + case ripRel32Minus4: + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = (int32_t)*(const little32_t *)fixupContent + 4; + return llvm::Error::success(); + case ripRel32Anon: + targetAddress = fixupAddress + 4 + *(const little32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ripRel32Minus1Anon: + targetAddress = fixupAddress + 5 + *(const little32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ripRel32Minus2Anon: + targetAddress = fixupAddress + 6 + *(const little32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ripRel32Minus4Anon: + targetAddress = fixupAddress + 8 + *(const little32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ripRel32GotLoad: + case ripRel32Got: + case ripRel32Tlv: + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = *(const little32_t *)fixupContent; + return llvm::Error::success(); + case tlvInitSectionOffset: + case pointer64: + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + // If this is the 3rd pointer of a tlv-thunk (i.e. the pointer to the TLV's + // initial value) we need to handle it specially. + if (inAtom->contentType() == DefinedAtom::typeThunkTLV && + offsetInAtom == 16) { + *kind = tlvInitSectionOffset; + assert(*addend == 0 && "TLV-init has non-zero addend?"); + } else + *addend = *(const little64_t *)fixupContent; + return llvm::Error::success(); + case pointer64Anon: + targetAddress = *(const little64_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + default: + llvm_unreachable("bad reloc kind"); + } +} + +llvm::Error +ArchHandler_x86_64::getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + uint64_t targetAddress; + const lld::Atom *fromTarget; + if (auto ec = atomFromSymbolIndex(reloc1.symbol, &fromTarget)) + return ec; + + switch(relocPattern(reloc1) << 16 | relocPattern(reloc2)) { + case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 | + X86_64_RELOC_UNSIGNED | rExtern | rLength8): { + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + uint64_t encodedAddend = (int64_t)*(const little64_t *)fixupContent; + if (inAtom == fromTarget) { + if (inAtom->contentType() == DefinedAtom::typeCFI) + *kind = unwindFDEToFunction; + else + *kind = delta64; + *addend = encodedAddend + offsetInAtom; + } else if (inAtom == *target) { + *kind = negDelta64; + *addend = encodedAddend - offsetInAtom; + *target = fromTarget; + } else + return llvm::make_error("Invalid pointer diff"); + return llvm::Error::success(); + } + case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 | + X86_64_RELOC_UNSIGNED | rExtern | rLength4): { + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + uint32_t encodedAddend = (int32_t)*(const little32_t *)fixupContent; + if (inAtom == fromTarget) { + *kind = delta32; + *addend = encodedAddend + offsetInAtom; + } else if (inAtom == *target) { + *kind = negDelta32; + *addend = encodedAddend - offsetInAtom; + *target = fromTarget; + } else + return llvm::make_error("Invalid pointer diff"); + return llvm::Error::success(); + } + case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 | + X86_64_RELOC_UNSIGNED | rLength8): + if (fromTarget != inAtom) + return llvm::make_error("pointer diff not in base atom"); + *kind = delta64Anon; + targetAddress = offsetInAtom + (int64_t)*(const little64_t *)fixupContent; + return atomFromAddress(reloc2.symbol, targetAddress, target, addend); + case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 | + X86_64_RELOC_UNSIGNED | rLength4): + if (fromTarget != inAtom) + return llvm::make_error("pointer diff not in base atom"); + *kind = delta32Anon; + targetAddress = offsetInAtom + (int32_t)*(const little32_t *)fixupContent; + return atomFromAddress(reloc2.symbol, targetAddress, target, addend); + default: + return llvm::make_error("unknown pair"); + } +} + +void ArchHandler_x86_64::generateAtomContent( + const DefinedAtom &atom, bool relocatable, FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, uint64_t imageBaseAddress, + llvm::MutableArrayRef atomContentBuffer) { + // Copy raw bytes. + std::copy(atom.rawContent().begin(), atom.rawContent().end(), + atomContentBuffer.begin()); + // Apply fix-ups. + for (const Reference *ref : atom) { + uint32_t offset = ref->offsetInAtom(); + const Atom *target = ref->target(); + uint64_t targetAddress = 0; + if (isa(target)) + targetAddress = findAddress(*target); + uint64_t atomAddress = findAddress(atom); + uint64_t fixupAddress = atomAddress + offset; + if (relocatable) { + applyFixupRelocatable(*ref, &atomContentBuffer[offset], + fixupAddress, targetAddress, + atomAddress); + } else { + applyFixupFinal(*ref, &atomContentBuffer[offset], + fixupAddress, targetAddress, + atomAddress, imageBaseAddress, findSectionAddress); + } + } +} + +void ArchHandler_x86_64::applyFixupFinal( + const Reference &ref, uint8_t *loc, uint64_t fixupAddress, + uint64_t targetAddress, uint64_t inAtomAddress, uint64_t imageBaseAddress, + FindAddressForAtom findSectionAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86_64); + ulittle32_t *loc32 = reinterpret_cast(loc); + ulittle64_t *loc64 = reinterpret_cast(loc); + switch (static_cast(ref.kindValue())) { + case branch32: + case ripRel32: + case ripRel32Anon: + case ripRel32Got: + case ripRel32GotLoad: + case ripRel32Tlv: + *loc32 = targetAddress - (fixupAddress + 4) + ref.addend(); + return; + case pointer64: + case pointer64Anon: + *loc64 = targetAddress + ref.addend(); + return; + case tlvInitSectionOffset: + *loc64 = targetAddress - findSectionAddress(*ref.target()) + ref.addend(); + return; + case ripRel32Minus1: + case ripRel32Minus1Anon: + *loc32 = targetAddress - (fixupAddress + 5) + ref.addend(); + return; + case ripRel32Minus2: + case ripRel32Minus2Anon: + *loc32 = targetAddress - (fixupAddress + 6) + ref.addend(); + return; + case ripRel32Minus4: + case ripRel32Minus4Anon: + *loc32 = targetAddress - (fixupAddress + 8) + ref.addend(); + return; + case delta32: + case delta32Anon: + *loc32 = targetAddress - fixupAddress + ref.addend(); + return; + case delta64: + case delta64Anon: + case unwindFDEToFunction: + *loc64 = targetAddress - fixupAddress + ref.addend(); + return; + case ripRel32GotLoadNowLea: + // Change MOVQ to LEA + assert(loc[-2] == 0x8B); + loc[-2] = 0x8D; + *loc32 = targetAddress - (fixupAddress + 4) + ref.addend(); + return; + case negDelta64: + *loc64 = fixupAddress - targetAddress + ref.addend(); + return; + case negDelta32: + *loc32 = fixupAddress - targetAddress + ref.addend(); + return; + case modeCode: + case modeData: + case lazyPointer: + // Do nothing + return; + case lazyImmediateLocation: + *loc32 = ref.addend(); + return; + case imageOffset: + case imageOffsetGot: + *loc32 = (targetAddress - imageBaseAddress) + ref.addend(); + return; + case unwindInfoToEhFrame: { + uint64_t val = targetAddress - findSectionAddress(*ref.target()) + ref.addend(); + assert(val < 0xffffffU && "offset in __eh_frame too large"); + *loc32 = (*loc32 & 0xff000000U) | val; + return; + } + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("invalid x86_64 Reference Kind"); +} + +void ArchHandler_x86_64::applyFixupRelocatable(const Reference &ref, + uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86_64); + ulittle32_t *loc32 = reinterpret_cast(loc); + ulittle64_t *loc64 = reinterpret_cast(loc); + switch (static_cast(ref.kindValue())) { + case branch32: + case ripRel32: + case ripRel32Got: + case ripRel32GotLoad: + case ripRel32Tlv: + *loc32 = ref.addend(); + return; + case ripRel32Anon: + *loc32 = (targetAddress - (fixupAddress + 4)) + ref.addend(); + return; + case tlvInitSectionOffset: + case pointer64: + *loc64 = ref.addend(); + return; + case pointer64Anon: + *loc64 = targetAddress + ref.addend(); + return; + case ripRel32Minus1: + *loc32 = ref.addend() - 1; + return; + case ripRel32Minus1Anon: + *loc32 = (targetAddress - (fixupAddress + 5)) + ref.addend(); + return; + case ripRel32Minus2: + *loc32 = ref.addend() - 2; + return; + case ripRel32Minus2Anon: + *loc32 = (targetAddress - (fixupAddress + 6)) + ref.addend(); + return; + case ripRel32Minus4: + *loc32 = ref.addend() - 4; + return; + case ripRel32Minus4Anon: + *loc32 = (targetAddress - (fixupAddress + 8)) + ref.addend(); + return; + case delta32: + *loc32 = ref.addend() + inAtomAddress - fixupAddress; + return; + case delta32Anon: + // The value we write here should be the delta to the target + // after taking in to account the difference from the fixup back to the + // last defined label + // ie, if we have: + // _base: ... + // Lfixup: .quad Ltarget - . + // ... + // Ltarget: + // + // Then we want to encode the value (Ltarget + addend) - (LFixup - _base) + *loc32 = (targetAddress + ref.addend()) - (fixupAddress - inAtomAddress); + return; + case delta64: + *loc64 = ref.addend() + inAtomAddress - fixupAddress; + return; + case delta64Anon: + // The value we write here should be the delta to the target + // after taking in to account the difference from the fixup back to the + // last defined label + // ie, if we have: + // _base: ... + // Lfixup: .quad Ltarget - . + // ... + // Ltarget: + // + // Then we want to encode the value (Ltarget + addend) - (LFixup - _base) + *loc64 = (targetAddress + ref.addend()) - (fixupAddress - inAtomAddress); + return; + case negDelta64: + *loc64 = ref.addend() + fixupAddress - inAtomAddress; + return; + case negDelta32: + *loc32 = ref.addend() + fixupAddress - inAtomAddress; + return; + case ripRel32GotLoadNowLea: + llvm_unreachable("ripRel32GotLoadNowLea implies GOT pass was run"); + return; + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + return; + case imageOffset: + case imageOffsetGot: + case unwindInfoToEhFrame: + llvm_unreachable("fixup implies __unwind_info"); + return; + case modeCode: + case modeData: + case unwindFDEToFunction: + // Do nothing for now + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("unknown x86_64 Reference Kind"); +} + +void ArchHandler_x86_64::appendSectionRelocations( + const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86_64); + uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); + switch (static_cast(ref.kindValue())) { + case modeCode: + case modeData: + return; + case branch32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_BRANCH | rPcRel | rExtern | rLength4); + return; + case ripRel32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED | rPcRel | rExtern | rLength4 ); + return; + case ripRel32Anon: + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED | rPcRel | rLength4 ); + return; + case ripRel32Got: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_GOT | rPcRel | rExtern | rLength4 ); + return; + case ripRel32GotLoad: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_GOT_LOAD | rPcRel | rExtern | rLength4 ); + return; + case ripRel32Tlv: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_TLV | rPcRel | rExtern | rLength4 ); + return; + case tlvInitSectionOffset: + case pointer64: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rExtern | rLength8); + return; + case pointer64Anon: + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rLength8); + return; + case ripRel32Minus1: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_1 | rPcRel | rExtern | rLength4 ); + return; + case ripRel32Minus1Anon: + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_1 | rPcRel | rLength4 ); + return; + case ripRel32Minus2: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_2 | rPcRel | rExtern | rLength4 ); + return; + case ripRel32Minus2Anon: + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_2 | rPcRel | rLength4 ); + return; + case ripRel32Minus4: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_4 | rPcRel | rExtern | rLength4 ); + return; + case ripRel32Minus4Anon: + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_4 | rPcRel | rLength4 ); + return; + case delta32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength4 ); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rExtern | rLength4 ); + return; + case delta32Anon: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength4 ); + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rLength4 ); + return; + case delta64: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength8 ); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rExtern | rLength8 ); + return; + case delta64Anon: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength8 ); + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rLength8 ); + return; + case unwindFDEToFunction: + case unwindInfoToEhFrame: + return; + case negDelta32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength4 ); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_UNSIGNED | rExtern | rLength4 ); + return; + case negDelta64: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength8 ); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_UNSIGNED | rExtern | rLength8 ); + return; + case ripRel32GotLoadNowLea: + llvm_unreachable("ripRel32GotLoadNowLea implies GOT pass was run"); + return; + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + return; + case imageOffset: + case imageOffsetGot: + llvm_unreachable("__unwind_info references should have been resolved"); + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("unknown x86_64 Reference Kind"); +} + +std::unique_ptr ArchHandler::create_x86_64() { + return std::unique_ptr(new ArchHandler_x86_64()); +} + +} // namespace mach_o +} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/Atoms.h b/lld/lib/ReaderWriter/MachO/Atoms.h new file mode 100644 index 000000000000..c61aaa88e8df --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/Atoms.h @@ -0,0 +1,180 @@ +//===- lib/ReaderWriter/MachO/Atoms.h ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_ATOMS_H +#define LLD_READER_WRITER_MACHO_ATOMS_H + +#include "lld/Core/Atom.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/SharedLibraryAtom.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include +#include + +namespace lld { + +class File; + +namespace mach_o { + +class MachODefinedAtom : public SimpleDefinedAtom { +public: + MachODefinedAtom(const File &f, const StringRef name, Scope scope, + ContentType type, Merge merge, bool thumb, bool noDeadStrip, + const ArrayRef content, Alignment align) + : SimpleDefinedAtom(f), _name(name), _content(content), + _align(align), _contentType(type), _scope(scope), _merge(merge), + _thumb(thumb), _noDeadStrip(noDeadStrip) {} + + // Constructor for zero-fill content + MachODefinedAtom(const File &f, const StringRef name, Scope scope, + ContentType type, uint64_t size, bool noDeadStrip, + Alignment align) + : SimpleDefinedAtom(f), _name(name), + _content(ArrayRef(nullptr, size)), _align(align), + _contentType(type), _scope(scope), _merge(mergeNo), _thumb(false), + _noDeadStrip(noDeadStrip) {} + + ~MachODefinedAtom() override = default; + + uint64_t size() const override { return _content.size(); } + + ContentType contentType() const override { return _contentType; } + + Alignment alignment() const override { return _align; } + + StringRef name() const override { return _name; } + + Scope scope() const override { return _scope; } + + Merge merge() const override { return _merge; } + + DeadStripKind deadStrip() const override { + if (_contentType == DefinedAtom::typeInitializerPtr) + return deadStripNever; + if (_contentType == DefinedAtom::typeTerminatorPtr) + return deadStripNever; + if (_noDeadStrip) + return deadStripNever; + return deadStripNormal; + } + + ArrayRef rawContent() const override { + // Note: Zerofill atoms have a content pointer which is null. + return _content; + } + + bool isThumb() const { return _thumb; } + +private: + const StringRef _name; + const ArrayRef _content; + const DefinedAtom::Alignment _align; + const ContentType _contentType; + const Scope _scope; + const Merge _merge; + const bool _thumb; + const bool _noDeadStrip; +}; + +class MachODefinedCustomSectionAtom : public MachODefinedAtom { +public: + MachODefinedCustomSectionAtom(const File &f, const StringRef name, + Scope scope, ContentType type, Merge merge, + bool thumb, bool noDeadStrip, + const ArrayRef content, + StringRef sectionName, Alignment align) + : MachODefinedAtom(f, name, scope, type, merge, thumb, noDeadStrip, + content, align), + _sectionName(sectionName) {} + + ~MachODefinedCustomSectionAtom() override = default; + + SectionChoice sectionChoice() const override { + return DefinedAtom::sectionCustomRequired; + } + + StringRef customSectionName() const override { + return _sectionName; + } +private: + StringRef _sectionName; +}; + +class MachOTentativeDefAtom : public SimpleDefinedAtom { +public: + MachOTentativeDefAtom(const File &f, const StringRef name, Scope scope, + uint64_t size, DefinedAtom::Alignment align) + : SimpleDefinedAtom(f), _name(std::string(name)), _scope(scope), + _size(size), _align(align) {} + + ~MachOTentativeDefAtom() override = default; + + uint64_t size() const override { return _size; } + + Merge merge() const override { return DefinedAtom::mergeAsTentative; } + + ContentType contentType() const override { return DefinedAtom::typeZeroFill; } + + Alignment alignment() const override { return _align; } + + StringRef name() const override { return _name; } + + Scope scope() const override { return _scope; } + + ArrayRef rawContent() const override { return ArrayRef(); } + +private: + const std::string _name; + const Scope _scope; + const uint64_t _size; + const DefinedAtom::Alignment _align; +}; + +class MachOSharedLibraryAtom : public SharedLibraryAtom { +public: + MachOSharedLibraryAtom(const File &file, StringRef name, + StringRef dylibInstallName, bool weakDef) + : SharedLibraryAtom(), _file(file), _name(name), + _dylibInstallName(dylibInstallName) {} + ~MachOSharedLibraryAtom() override = default; + + StringRef loadName() const override { return _dylibInstallName; } + + bool canBeNullAtRuntime() const override { + // FIXME: this may actually be changeable. For now, all symbols are strongly + // defined though. + return false; + } + + const File &file() const override { return _file; } + + StringRef name() const override { return _name; } + + Type type() const override { + // Unused in MachO (I think). + return Type::Unknown; + } + + uint64_t size() const override { + // Unused in MachO (I think) + return 0; + } + +private: + const File &_file; + StringRef _name; + StringRef _dylibInstallName; +}; + +} // end namespace mach_o +} // end namespace lld + +#endif // LLD_READER_WRITER_MACHO_ATOMS_H diff --git a/lld/lib/ReaderWriter/MachO/CMakeLists.txt b/lld/lib/ReaderWriter/MachO/CMakeLists.txt new file mode 100644 index 000000000000..c3e2497b8c9e --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/CMakeLists.txt @@ -0,0 +1,36 @@ +add_lld_library(lldMachOOld + ArchHandler.cpp + ArchHandler_arm.cpp + ArchHandler_arm64.cpp + ArchHandler_x86.cpp + ArchHandler_x86_64.cpp + CompactUnwindPass.cpp + GOTPass.cpp + LayoutPass.cpp + MachOLinkingContext.cpp + MachONormalizedFileBinaryReader.cpp + MachONormalizedFileBinaryWriter.cpp + MachONormalizedFileFromAtoms.cpp + MachONormalizedFileToAtoms.cpp + MachONormalizedFileYAML.cpp + ObjCPass.cpp + ShimPass.cpp + StubsPass.cpp + TLVPass.cpp + WriterMachO.cpp + + LINK_COMPONENTS + DebugInfoDWARF + Demangle + Object + Support + TextAPI + + LINK_LIBS + lldCommon + lldCore + lldYAML + ${LLVM_PTHREAD_LIB} + ) + +include_directories(.) diff --git a/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp b/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp new file mode 100644 index 000000000000..f3636feb217b --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp @@ -0,0 +1,580 @@ +//===- lib/ReaderWriter/MachO/CompactUnwindPass.cpp -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file A pass to convert MachO's __compact_unwind sections into the final +/// __unwind_info format used during runtime. See +/// mach-o/compact_unwind_encoding.h for more details on the formats involved. +/// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "MachOPasses.h" +#include "lld/Common/LLVM.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" + +#define DEBUG_TYPE "macho-compact-unwind" + +namespace lld { +namespace mach_o { + +namespace { +struct CompactUnwindEntry { + const Atom *rangeStart; + const Atom *personalityFunction; + const Atom *lsdaLocation; + const Atom *ehFrame; + + uint32_t rangeLength; + + // There are 3 types of compact unwind entry, distinguished by the encoding + // value: 0 indicates a function with no unwind info; + // _archHandler.dwarfCompactUnwindType() indicates that the entry defers to + // __eh_frame, and that the ehFrame entry will be valid; any other value is a + // real compact unwind entry -- personalityFunction will be set and + // lsdaLocation may be. + uint32_t encoding; + + CompactUnwindEntry(const DefinedAtom *function) + : rangeStart(function), personalityFunction(nullptr), + lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(function->size()), + encoding(0) {} + + CompactUnwindEntry() + : rangeStart(nullptr), personalityFunction(nullptr), + lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(0), encoding(0) {} +}; + +struct UnwindInfoPage { + ArrayRef entries; +}; +} + +class UnwindInfoAtom : public SimpleDefinedAtom { +public: + UnwindInfoAtom(ArchHandler &archHandler, const File &file, bool isBig, + std::vector &personalities, + std::vector &commonEncodings, + std::vector &pages, uint32_t numLSDAs) + : SimpleDefinedAtom(file), _archHandler(archHandler), + _commonEncodingsOffset(7 * sizeof(uint32_t)), + _personalityArrayOffset(_commonEncodingsOffset + + commonEncodings.size() * sizeof(uint32_t)), + _topLevelIndexOffset(_personalityArrayOffset + + personalities.size() * sizeof(uint32_t)), + _lsdaIndexOffset(_topLevelIndexOffset + + 3 * (pages.size() + 1) * sizeof(uint32_t)), + _firstPageOffset(_lsdaIndexOffset + 2 * numLSDAs * sizeof(uint32_t)), + _isBig(isBig) { + + addHeader(commonEncodings.size(), personalities.size(), pages.size()); + addCommonEncodings(commonEncodings); + addPersonalityFunctions(personalities); + addTopLevelIndexes(pages); + addLSDAIndexes(pages, numLSDAs); + addSecondLevelPages(pages); + } + + ~UnwindInfoAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeProcessedUnwindInfo; + } + + Alignment alignment() const override { return 4; } + + uint64_t size() const override { return _contents.size(); } + + ContentPermissions permissions() const override { + return DefinedAtom::permR__; + } + + ArrayRef rawContent() const override { return _contents; } + + void addHeader(uint32_t numCommon, uint32_t numPersonalities, + uint32_t numPages) { + using normalized::write32; + + uint32_t headerSize = 7 * sizeof(uint32_t); + _contents.resize(headerSize); + + uint8_t *headerEntries = _contents.data(); + // version + write32(headerEntries, 1, _isBig); + // commonEncodingsArraySectionOffset + write32(headerEntries + sizeof(uint32_t), _commonEncodingsOffset, _isBig); + // commonEncodingsArrayCount + write32(headerEntries + 2 * sizeof(uint32_t), numCommon, _isBig); + // personalityArraySectionOffset + write32(headerEntries + 3 * sizeof(uint32_t), _personalityArrayOffset, + _isBig); + // personalityArrayCount + write32(headerEntries + 4 * sizeof(uint32_t), numPersonalities, _isBig); + // indexSectionOffset + write32(headerEntries + 5 * sizeof(uint32_t), _topLevelIndexOffset, _isBig); + // indexCount + write32(headerEntries + 6 * sizeof(uint32_t), numPages + 1, _isBig); + } + + /// Add the list of common encodings to the section; this is simply an array + /// of uint32_t compact values. Size has already been specified in the header. + void addCommonEncodings(std::vector &commonEncodings) { + using normalized::write32; + + _contents.resize(_commonEncodingsOffset + + commonEncodings.size() * sizeof(uint32_t)); + uint8_t *commonEncodingsArea = + reinterpret_cast(_contents.data() + _commonEncodingsOffset); + + for (uint32_t encoding : commonEncodings) { + write32(commonEncodingsArea, encoding, _isBig); + commonEncodingsArea += sizeof(uint32_t); + } + } + + void addPersonalityFunctions(std::vector personalities) { + _contents.resize(_personalityArrayOffset + + personalities.size() * sizeof(uint32_t)); + + for (unsigned i = 0; i < personalities.size(); ++i) + addImageReferenceIndirect(_personalityArrayOffset + i * sizeof(uint32_t), + personalities[i]); + } + + void addTopLevelIndexes(std::vector &pages) { + using normalized::write32; + + uint32_t numIndexes = pages.size() + 1; + _contents.resize(_topLevelIndexOffset + numIndexes * 3 * sizeof(uint32_t)); + + uint32_t pageLoc = _firstPageOffset; + + // The most difficult job here is calculating the LSDAs; everything else + // follows fairly naturally, but we can't state where the first + uint8_t *indexData = &_contents[_topLevelIndexOffset]; + uint32_t numLSDAs = 0; + for (unsigned i = 0; i < pages.size(); ++i) { + // functionOffset + addImageReference(_topLevelIndexOffset + 3 * i * sizeof(uint32_t), + pages[i].entries[0].rangeStart); + // secondLevelPagesSectionOffset + write32(indexData + (3 * i + 1) * sizeof(uint32_t), pageLoc, _isBig); + write32(indexData + (3 * i + 2) * sizeof(uint32_t), + _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig); + + for (auto &entry : pages[i].entries) + if (entry.lsdaLocation) + ++numLSDAs; + } + + // Finally, write out the final sentinel index + auto &finalEntry = pages[pages.size() - 1].entries.back(); + addImageReference(_topLevelIndexOffset + + 3 * pages.size() * sizeof(uint32_t), + finalEntry.rangeStart, finalEntry.rangeLength); + // secondLevelPagesSectionOffset => 0 + write32(indexData + (3 * pages.size() + 2) * sizeof(uint32_t), + _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig); + } + + void addLSDAIndexes(std::vector &pages, uint32_t numLSDAs) { + _contents.resize(_lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t)); + + uint32_t curOffset = _lsdaIndexOffset; + for (auto &page : pages) { + for (auto &entry : page.entries) { + if (!entry.lsdaLocation) + continue; + + addImageReference(curOffset, entry.rangeStart); + addImageReference(curOffset + sizeof(uint32_t), entry.lsdaLocation); + curOffset += 2 * sizeof(uint32_t); + } + } + } + + void addSecondLevelPages(std::vector &pages) { + for (auto &page : pages) { + addRegularSecondLevelPage(page); + } + } + + void addRegularSecondLevelPage(const UnwindInfoPage &page) { + uint32_t curPageOffset = _contents.size(); + const int16_t headerSize = sizeof(uint32_t) + 2 * sizeof(uint16_t); + uint32_t curPageSize = + headerSize + 2 * page.entries.size() * sizeof(uint32_t); + _contents.resize(curPageOffset + curPageSize); + + using normalized::write32; + using normalized::write16; + // 2 => regular page + write32(&_contents[curPageOffset], 2, _isBig); + // offset of 1st entry + write16(&_contents[curPageOffset + 4], headerSize, _isBig); + write16(&_contents[curPageOffset + 6], page.entries.size(), _isBig); + + uint32_t pagePos = curPageOffset + headerSize; + for (auto &entry : page.entries) { + addImageReference(pagePos, entry.rangeStart); + + write32(_contents.data() + pagePos + sizeof(uint32_t), entry.encoding, + _isBig); + if ((entry.encoding & 0x0f000000U) == + _archHandler.dwarfCompactUnwindType()) + addEhFrameReference(pagePos + sizeof(uint32_t), entry.ehFrame); + + pagePos += 2 * sizeof(uint32_t); + } + } + + void addEhFrameReference(uint32_t offset, const Atom *dest, + Reference::Addend addend = 0) { + addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), + _archHandler.unwindRefToEhFrameKind(), offset, dest, addend); + } + + void addImageReference(uint32_t offset, const Atom *dest, + Reference::Addend addend = 0) { + addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), + _archHandler.imageOffsetKind(), offset, dest, addend); + } + + void addImageReferenceIndirect(uint32_t offset, const Atom *dest) { + addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), + _archHandler.imageOffsetKindIndirect(), offset, dest, 0); + } + +private: + mach_o::ArchHandler &_archHandler; + std::vector _contents; + uint32_t _commonEncodingsOffset; + uint32_t _personalityArrayOffset; + uint32_t _topLevelIndexOffset; + uint32_t _lsdaIndexOffset; + uint32_t _firstPageOffset; + bool _isBig; +}; + +/// Pass for instantiating and optimizing GOT slots. +/// +class CompactUnwindPass : public Pass { +public: + CompactUnwindPass(const MachOLinkingContext &context) + : _ctx(context), _archHandler(_ctx.archHandler()), + _file(*_ctx.make_file("")), + _isBig(MachOLinkingContext::isBigEndian(_ctx.arch())) { + _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); + } + +private: + llvm::Error perform(SimpleFile &mergedFile) override { + LLVM_DEBUG(llvm::dbgs() << "MachO Compact Unwind pass\n"); + + std::map unwindLocs; + std::map dwarfFrames; + std::vector personalities; + uint32_t numLSDAs = 0; + + // First collect all __compact_unwind and __eh_frame entries, addressable by + // the function referred to. + collectCompactUnwindEntries(mergedFile, unwindLocs, personalities, + numLSDAs); + + collectDwarfFrameEntries(mergedFile, dwarfFrames); + + // Skip rest of pass if no unwind info. + if (unwindLocs.empty() && dwarfFrames.empty()) + return llvm::Error::success(); + + // FIXME: if there are more than 4 personality functions then we need to + // defer to DWARF info for the ones we don't put in the list. They should + // also probably be sorted by frequency. + assert(personalities.size() <= 4); + + // TODO: Find common encodings for use by compressed pages. + std::vector commonEncodings; + + // Now sort the entries by final address and fixup the compact encoding to + // its final form (i.e. set personality function bits & create DWARF + // references where needed). + std::vector unwindInfos = createUnwindInfoEntries( + mergedFile, unwindLocs, personalities, dwarfFrames); + + // Remove any unused eh-frame atoms. + pruneUnusedEHFrames(mergedFile, unwindInfos, unwindLocs, dwarfFrames); + + // Finally, we can start creating pages based on these entries. + + LLVM_DEBUG(llvm::dbgs() << " Splitting entries into pages\n"); + // FIXME: we split the entries into pages naively: lots of 4k pages followed + // by a small one. ld64 tried to minimize space and align them to real 4k + // boundaries. That might be worth doing, or perhaps we could perform some + // minor balancing for expected number of lookups. + std::vector pages; + auto remainingInfos = llvm::makeArrayRef(unwindInfos); + do { + pages.push_back(UnwindInfoPage()); + + // FIXME: we only create regular pages at the moment. These can hold up to + // 1021 entries according to the documentation. + unsigned entriesInPage = std::min(1021U, (unsigned)remainingInfos.size()); + + pages.back().entries = remainingInfos.slice(0, entriesInPage); + remainingInfos = remainingInfos.slice(entriesInPage); + + LLVM_DEBUG(llvm::dbgs() + << " Page from " + << pages.back().entries[0].rangeStart->name() << " to " + << pages.back().entries.back().rangeStart->name() << " + " + << llvm::format("0x%x", + pages.back().entries.back().rangeLength) + << " has " << entriesInPage << " entries\n"); + } while (!remainingInfos.empty()); + + auto *unwind = new (_file.allocator()) + UnwindInfoAtom(_archHandler, _file, _isBig, personalities, + commonEncodings, pages, numLSDAs); + mergedFile.addAtom(*unwind); + + // Finally, remove all __compact_unwind atoms now that we've processed them. + mergedFile.removeDefinedAtomsIf([](const DefinedAtom *atom) { + return atom->contentType() == DefinedAtom::typeCompactUnwindInfo; + }); + + return llvm::Error::success(); + } + + void collectCompactUnwindEntries( + const SimpleFile &mergedFile, + std::map &unwindLocs, + std::vector &personalities, uint32_t &numLSDAs) { + LLVM_DEBUG(llvm::dbgs() << " Collecting __compact_unwind entries\n"); + + for (const DefinedAtom *atom : mergedFile.defined()) { + if (atom->contentType() != DefinedAtom::typeCompactUnwindInfo) + continue; + + auto unwindEntry = extractCompactUnwindEntry(atom); + unwindLocs.insert(std::make_pair(unwindEntry.rangeStart, unwindEntry)); + + LLVM_DEBUG(llvm::dbgs() << " Entry for " + << unwindEntry.rangeStart->name() << ", encoding=" + << llvm::format("0x%08x", unwindEntry.encoding)); + if (unwindEntry.personalityFunction) + LLVM_DEBUG(llvm::dbgs() + << ", personality=" + << unwindEntry.personalityFunction->name() + << ", lsdaLoc=" << unwindEntry.lsdaLocation->name()); + LLVM_DEBUG(llvm::dbgs() << '\n'); + + // Count number of LSDAs we see, since we need to know how big the index + // will be while laying out the section. + if (unwindEntry.lsdaLocation) + ++numLSDAs; + + // Gather the personality functions now, so that they're in deterministic + // order (derived from the DefinedAtom order). + if (unwindEntry.personalityFunction && + !llvm::count(personalities, unwindEntry.personalityFunction)) + personalities.push_back(unwindEntry.personalityFunction); + } + } + + CompactUnwindEntry extractCompactUnwindEntry(const DefinedAtom *atom) { + CompactUnwindEntry entry; + + for (const Reference *ref : *atom) { + switch (ref->offsetInAtom()) { + case 0: + // FIXME: there could legitimately be functions with multiple encoding + // entries. However, nothing produces them at the moment. + assert(ref->addend() == 0 && "unexpected offset into function"); + entry.rangeStart = ref->target(); + break; + case 0x10: + assert(ref->addend() == 0 && "unexpected offset into personality fn"); + entry.personalityFunction = ref->target(); + break; + case 0x18: + assert(ref->addend() == 0 && "unexpected offset into LSDA atom"); + entry.lsdaLocation = ref->target(); + break; + } + } + + if (atom->rawContent().size() < 4 * sizeof(uint32_t)) + return entry; + + using normalized::read32; + entry.rangeLength = + read32(atom->rawContent().data() + 2 * sizeof(uint32_t), _isBig); + entry.encoding = + read32(atom->rawContent().data() + 3 * sizeof(uint32_t), _isBig); + return entry; + } + + void + collectDwarfFrameEntries(const SimpleFile &mergedFile, + std::map &dwarfFrames) { + for (const DefinedAtom *ehFrameAtom : mergedFile.defined()) { + if (ehFrameAtom->contentType() != DefinedAtom::typeCFI) + continue; + if (ArchHandler::isDwarfCIE(_isBig, ehFrameAtom)) + continue; + + if (const Atom *function = _archHandler.fdeTargetFunction(ehFrameAtom)) + dwarfFrames[function] = ehFrameAtom; + } + } + + /// Every atom defined in __TEXT,__text needs an entry in the final + /// __unwind_info section (in order). These comes from two sources: + /// + Input __compact_unwind sections where possible (after adding the + /// personality function offset which is only known now). + /// + A synthesised reference to __eh_frame if there's no __compact_unwind + /// or too many personality functions to be accommodated. + std::vector createUnwindInfoEntries( + const SimpleFile &mergedFile, + const std::map &unwindLocs, + const std::vector &personalities, + const std::map &dwarfFrames) { + std::vector unwindInfos; + + LLVM_DEBUG(llvm::dbgs() << " Creating __unwind_info entries\n"); + // The final order in the __unwind_info section must be derived from the + // order of typeCode atoms, since that's how they'll be put into the object + // file eventually (yuck!). + for (const DefinedAtom *atom : mergedFile.defined()) { + if (atom->contentType() != DefinedAtom::typeCode) + continue; + + unwindInfos.push_back(finalizeUnwindInfoEntryForAtom( + atom, unwindLocs, personalities, dwarfFrames)); + + LLVM_DEBUG(llvm::dbgs() + << " Entry for " << atom->name() << ", final encoding=" + << llvm::format("0x%08x", unwindInfos.back().encoding) + << '\n'); + } + + return unwindInfos; + } + + /// Remove unused EH frames. + /// + /// An EH frame is considered unused if there is a corresponding compact + /// unwind atom that doesn't require the EH frame. + void pruneUnusedEHFrames( + SimpleFile &mergedFile, + const std::vector &unwindInfos, + const std::map &unwindLocs, + const std::map &dwarfFrames) { + + // Worklist of all 'used' FDEs. + std::vector usedDwarfWorklist; + + // We have to check two conditions when building the worklist: + // (1) EH frames used by compact unwind entries. + for (auto &entry : unwindInfos) + if (entry.ehFrame) + usedDwarfWorklist.push_back(cast(entry.ehFrame)); + + // (2) EH frames that reference functions with no corresponding compact + // unwind info. + for (auto &entry : dwarfFrames) + if (!unwindLocs.count(entry.first)) + usedDwarfWorklist.push_back(cast(entry.second)); + + // Add all transitively referenced CFI atoms by processing the worklist. + std::set usedDwarfFrames; + while (!usedDwarfWorklist.empty()) { + const DefinedAtom *cfiAtom = usedDwarfWorklist.back(); + usedDwarfWorklist.pop_back(); + usedDwarfFrames.insert(cfiAtom); + for (const auto *ref : *cfiAtom) { + const DefinedAtom *cfiTarget = dyn_cast(ref->target()); + if (cfiTarget->contentType() == DefinedAtom::typeCFI) + usedDwarfWorklist.push_back(cfiTarget); + } + } + + // Finally, delete all unreferenced CFI atoms. + mergedFile.removeDefinedAtomsIf([&](const DefinedAtom *atom) { + if ((atom->contentType() == DefinedAtom::typeCFI) && + !usedDwarfFrames.count(atom)) + return true; + return false; + }); + } + + CompactUnwindEntry finalizeUnwindInfoEntryForAtom( + const DefinedAtom *function, + const std::map &unwindLocs, + const std::vector &personalities, + const std::map &dwarfFrames) { + auto unwindLoc = unwindLocs.find(function); + + CompactUnwindEntry entry; + if (unwindLoc == unwindLocs.end()) { + // Default entry has correct encoding (0 => no unwind), but we need to + // synthesise the function. + entry.rangeStart = function; + entry.rangeLength = function->size(); + } else + entry = unwindLoc->second; + + + // If there's no __compact_unwind entry, or it explicitly says to use + // __eh_frame, we need to try and fill in the correct DWARF atom. + if (entry.encoding == _archHandler.dwarfCompactUnwindType() || + entry.encoding == 0) { + auto dwarfFrame = dwarfFrames.find(function); + if (dwarfFrame != dwarfFrames.end()) { + entry.encoding = _archHandler.dwarfCompactUnwindType(); + entry.ehFrame = dwarfFrame->second; + } + } + + auto personality = llvm::find(personalities, entry.personalityFunction); + uint32_t personalityIdx = personality == personalities.end() + ? 0 + : personality - personalities.begin() + 1; + + // FIXME: We should also use DWARF when there isn't enough room for the + // personality function in the compact encoding. + assert(personalityIdx < 4 && "too many personality functions"); + + entry.encoding |= personalityIdx << 28; + + if (entry.lsdaLocation) + entry.encoding |= 1U << 30; + + return entry; + } + + const MachOLinkingContext &_ctx; + mach_o::ArchHandler &_archHandler; + MachOFile &_file; + bool _isBig; +}; + +void addCompactUnwindPass(PassManager &pm, const MachOLinkingContext &ctx) { + assert(ctx.needsCompactUnwindPass()); + pm.add(std::make_unique(ctx)); +} + +} // end namespace mach_o +} // end namespace lld diff --git a/lld/lib/ReaderWriter/MachO/DebugInfo.h b/lld/lib/ReaderWriter/MachO/DebugInfo.h new file mode 100644 index 000000000000..591dd1ebad86 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/DebugInfo.h @@ -0,0 +1,106 @@ +//===- lib/ReaderWriter/MachO/File.h ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_DEBUGINFO_H +#define LLD_READER_WRITER_MACHO_DEBUGINFO_H + +#include "lld/Core/Atom.h" +#include + +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + + +namespace lld { +namespace mach_o { + +class DebugInfo { +public: + enum class Kind { + Dwarf, + Stabs + }; + + Kind kind() const { return _kind; } + + void setAllocator(std::unique_ptr allocator) { + _allocator = std::move(allocator); + } + +protected: + DebugInfo(Kind kind) : _kind(kind) {} + +private: + std::unique_ptr _allocator; + Kind _kind; +}; + +struct TranslationUnitSource { + StringRef name; + StringRef path; +}; + +class DwarfDebugInfo : public DebugInfo { +public: + DwarfDebugInfo(TranslationUnitSource tu) + : DebugInfo(Kind::Dwarf), _tu(std::move(tu)) {} + + static inline bool classof(const DebugInfo *di) { + return di->kind() == Kind::Dwarf; + } + + const TranslationUnitSource &translationUnitSource() const { return _tu; } + +private: + TranslationUnitSource _tu; +}; + +struct Stab { + Stab(const Atom* atom, uint8_t type, uint8_t other, uint16_t desc, + uint32_t value, StringRef str) + : atom(atom), type(type), other(other), desc(desc), value(value), + str(str) {} + + const class Atom* atom; + uint8_t type; + uint8_t other; + uint16_t desc; + uint32_t value; + StringRef str; +}; + +inline raw_ostream& operator<<(raw_ostream &os, Stab &s) { + os << "Stab -- atom: " << llvm::format("%p", s.atom) << ", type: " << (uint32_t)s.type + << ", other: " << (uint32_t)s.other << ", desc: " << s.desc << ", value: " << s.value + << ", str: '" << s.str << "'"; + return os; +} + +class StabsDebugInfo : public DebugInfo { +public: + + typedef std::vector StabsList; + + StabsDebugInfo(StabsList stabs) + : DebugInfo(Kind::Stabs), _stabs(std::move(stabs)) {} + + static inline bool classof(const DebugInfo *di) { + return di->kind() == Kind::Stabs; + } + + const StabsList& stabs() const { return _stabs; } + +public: + StabsList _stabs; +}; + +} // end namespace mach_o +} // end namespace lld + +#endif // LLD_READER_WRITER_MACHO_DEBUGINFO_H diff --git a/lld/lib/ReaderWriter/MachO/ExecutableAtoms.h b/lld/lib/ReaderWriter/MachO/ExecutableAtoms.h new file mode 100644 index 000000000000..ce94be457026 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/ExecutableAtoms.h @@ -0,0 +1,154 @@ +//===- lib/ReaderWriter/MachO/ExecutableAtoms.h ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_EXECUTABLE_ATOMS_H +#define LLD_READER_WRITER_MACHO_EXECUTABLE_ATOMS_H + +#include "Atoms.h" +#include "File.h" + +#include "llvm/BinaryFormat/MachO.h" + +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/Core/UndefinedAtom.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" + +namespace lld { +namespace mach_o { + + +// +// CEntryFile adds an UndefinedAtom for "_main" so that the Resolving +// phase will fail if "_main" is undefined. +// +class CEntryFile : public SimpleFile { +public: + CEntryFile(const MachOLinkingContext &context) + : SimpleFile("C entry", kindCEntryObject), + _undefMain(*this, context.entrySymbolName()) { + this->addAtom(_undefMain); + } + +private: + SimpleUndefinedAtom _undefMain; +}; + + +// +// StubHelperFile adds an UndefinedAtom for "dyld_stub_binder" so that +// the Resolveing phase will fail if "dyld_stub_binder" is undefined. +// +class StubHelperFile : public SimpleFile { +public: + StubHelperFile(const MachOLinkingContext &context) + : SimpleFile("stub runtime", kindStubHelperObject), + _undefBinder(*this, context.binderSymbolName()) { + this->addAtom(_undefBinder); + } + +private: + SimpleUndefinedAtom _undefBinder; +}; + + +// +// MachHeaderAliasFile lazily instantiates the magic symbols that mark the start +// of the mach_header for final linked images. +// +class MachHeaderAliasFile : public SimpleFile { +public: + MachHeaderAliasFile(const MachOLinkingContext &context) + : SimpleFile("mach_header symbols", kindHeaderObject) { + StringRef machHeaderSymbolName; + DefinedAtom::Scope symbolScope = DefinedAtom::scopeLinkageUnit; + StringRef dsoHandleName; + switch (context.outputMachOType()) { + case llvm::MachO::MH_OBJECT: + machHeaderSymbolName = "__mh_object_header"; + break; + case llvm::MachO::MH_EXECUTE: + machHeaderSymbolName = "__mh_execute_header"; + symbolScope = DefinedAtom::scopeGlobal; + dsoHandleName = "___dso_handle"; + break; + case llvm::MachO::MH_FVMLIB: + llvm_unreachable("no mach_header symbol for file type"); + case llvm::MachO::MH_CORE: + llvm_unreachable("no mach_header symbol for file type"); + case llvm::MachO::MH_PRELOAD: + llvm_unreachable("no mach_header symbol for file type"); + case llvm::MachO::MH_DYLIB: + machHeaderSymbolName = "__mh_dylib_header"; + dsoHandleName = "___dso_handle"; + break; + case llvm::MachO::MH_DYLINKER: + machHeaderSymbolName = "__mh_dylinker_header"; + dsoHandleName = "___dso_handle"; + break; + case llvm::MachO::MH_BUNDLE: + machHeaderSymbolName = "__mh_bundle_header"; + dsoHandleName = "___dso_handle"; + break; + case llvm::MachO::MH_DYLIB_STUB: + llvm_unreachable("no mach_header symbol for file type"); + case llvm::MachO::MH_DSYM: + llvm_unreachable("no mach_header symbol for file type"); + case llvm::MachO::MH_KEXT_BUNDLE: + dsoHandleName = "___dso_handle"; + break; + } + if (!machHeaderSymbolName.empty()) + _definedAtoms.push_back(new (allocator()) MachODefinedAtom( + *this, machHeaderSymbolName, symbolScope, + DefinedAtom::typeMachHeader, DefinedAtom::mergeNo, false, + true /* noDeadStrip */, + ArrayRef(), DefinedAtom::Alignment(4096))); + + if (!dsoHandleName.empty()) + _definedAtoms.push_back(new (allocator()) MachODefinedAtom( + *this, dsoHandleName, DefinedAtom::scopeLinkageUnit, + DefinedAtom::typeDSOHandle, DefinedAtom::mergeNo, false, + true /* noDeadStrip */, + ArrayRef(), DefinedAtom::Alignment(1))); + } + + const AtomRange defined() const override { + return _definedAtoms; + } + const AtomRange undefined() const override { + return _noUndefinedAtoms; + } + + const AtomRange sharedLibrary() const override { + return _noSharedLibraryAtoms; + } + + const AtomRange absolute() const override { + return _noAbsoluteAtoms; + } + + void clearAtoms() override { + _definedAtoms.clear(); + _noUndefinedAtoms.clear(); + _noSharedLibraryAtoms.clear(); + _noAbsoluteAtoms.clear(); + } + + +private: + mutable AtomVector _definedAtoms; +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_EXECUTABLE_ATOMS_H diff --git a/lld/lib/ReaderWriter/MachO/File.h b/lld/lib/ReaderWriter/MachO/File.h new file mode 100644 index 000000000000..77832969c6b3 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/File.h @@ -0,0 +1,467 @@ +//===- lib/ReaderWriter/MachO/File.h ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_FILE_H +#define LLD_READER_WRITER_MACHO_FILE_H + +#include "Atoms.h" +#include "DebugInfo.h" +#include "MachONormalizedFile.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/Format.h" +#include "llvm/TextAPI/InterfaceFile.h" +#include "llvm/TextAPI/TextAPIReader.h" +#include + +namespace lld { +namespace mach_o { + +using lld::mach_o::normalized::Section; + +class MachOFile : public SimpleFile { +public: + + /// Real file constructor - for on-disk files. + MachOFile(std::unique_ptr mb, MachOLinkingContext *ctx) + : SimpleFile(mb->getBufferIdentifier(), File::kindMachObject), + _mb(std::move(mb)), _ctx(ctx) {} + + /// Dummy file constructor - for virtual files. + MachOFile(StringRef path) + : SimpleFile(path, File::kindMachObject) {} + + void addDefinedAtom(StringRef name, Atom::Scope scope, + DefinedAtom::ContentType type, DefinedAtom::Merge merge, + uint64_t sectionOffset, uint64_t contentSize, bool thumb, + bool noDeadStrip, bool copyRefs, + const Section *inSection) { + assert(sectionOffset+contentSize <= inSection->content.size()); + ArrayRef content = inSection->content.slice(sectionOffset, + contentSize); + if (copyRefs) { + // Make a copy of the atom's name and content that is owned by this file. + name = name.copy(allocator()); + content = content.copy(allocator()); + } + DefinedAtom::Alignment align( + inSection->alignment, + sectionOffset % inSection->alignment); + auto *atom = + new (allocator()) MachODefinedAtom(*this, name, scope, type, merge, + thumb, noDeadStrip, content, align); + addAtomForSection(inSection, atom, sectionOffset); + } + + void addDefinedAtomInCustomSection(StringRef name, Atom::Scope scope, + DefinedAtom::ContentType type, DefinedAtom::Merge merge, + bool thumb, bool noDeadStrip, uint64_t sectionOffset, + uint64_t contentSize, StringRef sectionName, + bool copyRefs, const Section *inSection) { + assert(sectionOffset+contentSize <= inSection->content.size()); + ArrayRef content = inSection->content.slice(sectionOffset, + contentSize); + if (copyRefs) { + // Make a copy of the atom's name and content that is owned by this file. + name = name.copy(allocator()); + content = content.copy(allocator()); + sectionName = sectionName.copy(allocator()); + } + DefinedAtom::Alignment align( + inSection->alignment, + sectionOffset % inSection->alignment); + auto *atom = + new (allocator()) MachODefinedCustomSectionAtom(*this, name, scope, type, + merge, thumb, + noDeadStrip, content, + sectionName, align); + addAtomForSection(inSection, atom, sectionOffset); + } + + void addZeroFillDefinedAtom(StringRef name, Atom::Scope scope, + uint64_t sectionOffset, uint64_t size, + bool noDeadStrip, bool copyRefs, + const Section *inSection) { + if (copyRefs) { + // Make a copy of the atom's name and content that is owned by this file. + name = name.copy(allocator()); + } + DefinedAtom::Alignment align( + inSection->alignment, + sectionOffset % inSection->alignment); + + DefinedAtom::ContentType type = DefinedAtom::typeUnknown; + switch (inSection->type) { + case llvm::MachO::S_ZEROFILL: + type = DefinedAtom::typeZeroFill; + break; + case llvm::MachO::S_THREAD_LOCAL_ZEROFILL: + type = DefinedAtom::typeTLVInitialZeroFill; + break; + default: + llvm_unreachable("Unrecognized zero-fill section"); + } + + auto *atom = + new (allocator()) MachODefinedAtom(*this, name, scope, type, size, + noDeadStrip, align); + addAtomForSection(inSection, atom, sectionOffset); + } + + void addUndefinedAtom(StringRef name, bool copyRefs) { + if (copyRefs) { + // Make a copy of the atom's name that is owned by this file. + name = name.copy(allocator()); + } + auto *atom = new (allocator()) SimpleUndefinedAtom(*this, name); + addAtom(*atom); + _undefAtoms[name] = atom; + } + + void addTentativeDefAtom(StringRef name, Atom::Scope scope, uint64_t size, + DefinedAtom::Alignment align, bool copyRefs) { + if (copyRefs) { + // Make a copy of the atom's name that is owned by this file. + name = name.copy(allocator()); + } + auto *atom = + new (allocator()) MachOTentativeDefAtom(*this, name, scope, size, align); + addAtom(*atom); + _undefAtoms[name] = atom; + } + + /// Search this file for the atom from 'section' that covers + /// 'offsetInSect'. Returns nullptr is no atom found. + MachODefinedAtom *findAtomCoveringAddress(const Section §ion, + uint64_t offsetInSect, + uint32_t *foundOffsetAtom=nullptr) { + const auto &pos = _sectionAtoms.find(§ion); + if (pos == _sectionAtoms.end()) + return nullptr; + const auto &vec = pos->second; + assert(offsetInSect < section.content.size()); + // Vector of atoms for section are already sorted, so do binary search. + const auto &atomPos = std::lower_bound(vec.begin(), vec.end(), offsetInSect, + [offsetInSect](const SectionOffsetAndAtom &ao, + uint64_t targetAddr) -> bool { + // Each atom has a start offset of its slice of the + // section's content. This compare function must return true + // iff the atom's range is before the offset being searched for. + uint64_t atomsEndOffset = ao.offset+ao.atom->rawContent().size(); + return (atomsEndOffset <= offsetInSect); + }); + if (atomPos == vec.end()) + return nullptr; + if (foundOffsetAtom) + *foundOffsetAtom = offsetInSect - atomPos->offset; + return atomPos->atom; + } + + /// Searches this file for an UndefinedAtom named 'name'. Returns + /// nullptr is no such atom found. + const lld::Atom *findUndefAtom(StringRef name) { + auto pos = _undefAtoms.find(name); + if (pos == _undefAtoms.end()) + return nullptr; + return pos->second; + } + + typedef std::function DefinedAtomVisitor; + + void eachDefinedAtom(DefinedAtomVisitor vistor) { + for (auto §AndAtoms : _sectionAtoms) { + for (auto &offAndAtom : sectAndAtoms.second) { + vistor(offAndAtom.atom); + } + } + } + + typedef std::function + SectionAtomVisitor; + + void eachAtomInSection(const Section §ion, SectionAtomVisitor visitor) { + auto pos = _sectionAtoms.find(§ion); + if (pos == _sectionAtoms.end()) + return; + auto vec = pos->second; + + for (auto &offAndAtom : vec) + visitor(offAndAtom.atom, offAndAtom.offset); + } + + MachOLinkingContext::Arch arch() const { return _arch; } + void setArch(MachOLinkingContext::Arch arch) { _arch = arch; } + + MachOLinkingContext::OS OS() const { return _os; } + void setOS(MachOLinkingContext::OS os) { _os = os; } + + MachOLinkingContext::ObjCConstraint objcConstraint() const { + return _objcConstraint; + } + void setObjcConstraint(MachOLinkingContext::ObjCConstraint v) { + _objcConstraint = v; + } + + uint32_t minVersion() const { return _minVersion; } + void setMinVersion(uint32_t v) { _minVersion = v; } + + LoadCommandType minVersionLoadCommandKind() const { + return _minVersionLoadCommandKind; + } + void setMinVersionLoadCommandKind(LoadCommandType v) { + _minVersionLoadCommandKind = v; + } + + uint32_t swiftVersion() const { return _swiftVersion; } + void setSwiftVersion(uint32_t v) { _swiftVersion = v; } + + bool subsectionsViaSymbols() const { + return _flags & llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS; + } + void setFlags(normalized::FileFlags v) { _flags = v; } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const File *F) { + return F->kind() == File::kindMachObject; + } + + void setDebugInfo(std::unique_ptr debugInfo) { + _debugInfo = std::move(debugInfo); + } + + DebugInfo* debugInfo() const { return _debugInfo.get(); } + std::unique_ptr takeDebugInfo() { return std::move(_debugInfo); } + +protected: + std::error_code doParse() override { + // Convert binary file to normalized mach-o. + auto normFile = normalized::readBinary(_mb, _ctx->arch()); + if (auto ec = normFile.takeError()) + return llvm::errorToErrorCode(std::move(ec)); + // Convert normalized mach-o to atoms. + if (auto ec = normalized::normalizedObjectToAtoms(this, **normFile, false)) + return llvm::errorToErrorCode(std::move(ec)); + return std::error_code(); + } + +private: + struct SectionOffsetAndAtom { uint64_t offset; MachODefinedAtom *atom; }; + + void addAtomForSection(const Section *inSection, MachODefinedAtom* atom, + uint64_t sectionOffset) { + SectionOffsetAndAtom offAndAtom; + offAndAtom.offset = sectionOffset; + offAndAtom.atom = atom; + _sectionAtoms[inSection].push_back(offAndAtom); + addAtom(*atom); + } + + typedef llvm::DenseMap> SectionToAtoms; + typedef llvm::StringMap NameToAtom; + + std::unique_ptr _mb; + MachOLinkingContext *_ctx; + SectionToAtoms _sectionAtoms; + NameToAtom _undefAtoms; + MachOLinkingContext::Arch _arch = MachOLinkingContext::arch_unknown; + MachOLinkingContext::OS _os = MachOLinkingContext::OS::unknown; + uint32_t _minVersion = 0; + LoadCommandType _minVersionLoadCommandKind = (LoadCommandType)0; + MachOLinkingContext::ObjCConstraint _objcConstraint = + MachOLinkingContext::objc_unknown; + uint32_t _swiftVersion = 0; + normalized::FileFlags _flags = llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS; + std::unique_ptr _debugInfo; +}; + +class MachODylibFile : public SharedLibraryFile { +public: + MachODylibFile(std::unique_ptr mb, MachOLinkingContext *ctx) + : SharedLibraryFile(mb->getBufferIdentifier()), + _mb(std::move(mb)), _ctx(ctx) {} + + MachODylibFile(StringRef path) : SharedLibraryFile(path) {} + + OwningAtomPtr exports(StringRef name) const override { + // Pass down _installName so that if this requested symbol + // is re-exported through this dylib, the SharedLibraryAtom's loadName() + // is this dylib installName and not the implementation dylib's. + // NOTE: isData is not needed for dylibs (it matters for static libs). + return exports(name, _installName); + } + + /// Adds symbol name that this dylib exports. The corresponding + /// SharedLibraryAtom is created lazily (since most symbols are not used). + void addExportedSymbol(StringRef name, bool weakDef, bool copyRefs) { + if (copyRefs) { + name = name.copy(allocator()); + } + AtomAndFlags info(weakDef); + _nameToAtom[name] = info; + } + + void addReExportedDylib(StringRef dylibPath) { + _reExportedDylibs.emplace_back(dylibPath); + } + + StringRef installName() const { return _installName; } + uint32_t currentVersion() { return _currentVersion; } + uint32_t compatVersion() { return _compatVersion; } + + void setInstallName(StringRef name) { _installName = name; } + void setCompatVersion(uint32_t version) { _compatVersion = version; } + void setCurrentVersion(uint32_t version) { _currentVersion = version; } + + typedef std::function FindDylib; + + void loadReExportedDylibs(FindDylib find) { + for (ReExportedDylib &entry : _reExportedDylibs) { + if (!entry.file) + entry.file = find(entry.path); + } + } + + StringRef getDSOName() const override { return _installName; } + + std::error_code doParse() override { + // Convert binary file to normalized mach-o. + auto normFile = normalized::readBinary(_mb, _ctx->arch()); + if (auto ec = normFile.takeError()) + return llvm::errorToErrorCode(std::move(ec)); + // Convert normalized mach-o to atoms. + if (auto ec = normalized::normalizedDylibToAtoms(this, **normFile, false)) + return llvm::errorToErrorCode(std::move(ec)); + return std::error_code(); + } + +protected: + OwningAtomPtr exports(StringRef name, + StringRef installName) const { + // First, check if requested symbol is directly implemented by this dylib. + auto entry = _nameToAtom.find(name); + if (entry != _nameToAtom.end()) { + // FIXME: Make this map a set and only used in assert builds. + // Note, its safe to assert here as the resolver is the only client of + // this API and it only requests exports for undefined symbols. + // If we return from here we are no longer undefined so we should never + // get here again. + assert(!entry->second.atom && "Duplicate shared library export"); + bool weakDef = entry->second.weakDef; + auto *atom = new (allocator()) MachOSharedLibraryAtom(*this, name, + installName, + weakDef); + entry->second.atom = atom; + return atom; + } + + // Next, check if symbol is implemented in some re-exported dylib. + for (const ReExportedDylib &dylib : _reExportedDylibs) { + assert(dylib.file); + auto atom = dylib.file->exports(name, installName); + if (atom.get()) + return atom; + } + + // Symbol not exported or re-exported by this dylib. + return nullptr; + } + + struct ReExportedDylib { + ReExportedDylib(StringRef p) : path(p), file(nullptr) { } + ReExportedDylib(StringRef p, MachODylibFile *file) : path(p), file(file) { } + StringRef path; + MachODylibFile *file; + }; + + struct AtomAndFlags { + AtomAndFlags() : atom(nullptr), weakDef(false) { } + AtomAndFlags(bool weak) : atom(nullptr), weakDef(weak) { } + const SharedLibraryAtom *atom; + bool weakDef; + }; + + std::unique_ptr _mb; + MachOLinkingContext *_ctx; + StringRef _installName; + uint32_t _currentVersion; + uint32_t _compatVersion; + std::vector _reExportedDylibs; + mutable std::unordered_map _nameToAtom; +}; + +class TAPIFile : public MachODylibFile { +public: + + TAPIFile(std::unique_ptr mb, MachOLinkingContext *ctx) + : MachODylibFile(std::move(mb), ctx) {} + + std::error_code doParse() override { + + llvm::Expected> result = + llvm::MachO::TextAPIReader::get(*_mb); + if (!result) + return std::make_error_code(std::errc::invalid_argument); + + std::unique_ptr interface{std::move(*result)}; + return loadFromInterface(*interface); + } + +private: + std::error_code loadFromInterface(llvm::MachO::InterfaceFile &interface) { + llvm::MachO::Architecture arch; + switch(_ctx->arch()) { + case MachOLinkingContext::arch_x86: + arch = llvm::MachO::AK_i386; + break; + case MachOLinkingContext::arch_x86_64: + arch = llvm::MachO::AK_x86_64; + break; + case MachOLinkingContext::arch_arm64: + arch = llvm::MachO::AK_arm64; + break; + default: + return std::make_error_code(std::errc::invalid_argument); + } + + setInstallName(interface.getInstallName().copy(allocator())); + // TODO(compnerd) filter out symbols based on the target platform + for (const auto symbol : interface.symbols()) + if (symbol->getArchitectures().has(arch)) + addExportedSymbol(symbol->getName(), symbol->isWeakDefined(), true); + + for (const llvm::MachO::InterfaceFileRef &reexport : + interface.reexportedLibraries()) + addReExportedDylib(reexport.getInstallName().copy(allocator())); + + for (const auto& document : interface.documents()) { + for (auto& reexport : _reExportedDylibs) { + if (reexport.path != document->getInstallName()) + continue; + assert(!reexport.file); + _ownedFiles.push_back(std::make_unique( + MemoryBuffer::getMemBuffer("", _mb->getBufferIdentifier()), _ctx)); + reexport.file = _ownedFiles.back().get(); + std::error_code err = _ownedFiles.back()->loadFromInterface(*document); + if (err) + return err; + } + } + + return std::error_code(); + } + + std::vector> _ownedFiles; +}; + +} // end namespace mach_o +} // end namespace lld + +#endif // LLD_READER_WRITER_MACHO_FILE_H diff --git a/lld/lib/ReaderWriter/MachO/FlatNamespaceFile.h b/lld/lib/ReaderWriter/MachO/FlatNamespaceFile.h new file mode 100644 index 000000000000..1885effef49f --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/FlatNamespaceFile.h @@ -0,0 +1,62 @@ +//===- lib/ReaderWriter/MachO/FlatNamespaceFile.h -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_FLAT_NAMESPACE_FILE_H +#define LLD_READER_WRITER_MACHO_FLAT_NAMESPACE_FILE_H + +#include "Atoms.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/Support/Debug.h" + +namespace lld { +namespace mach_o { + +// +// A FlateNamespaceFile instance may be added as a resolution source of last +// resort, depending on how -flat_namespace and -undefined are set. +// +class FlatNamespaceFile : public SharedLibraryFile { +public: + FlatNamespaceFile(const MachOLinkingContext &context) + : SharedLibraryFile("flat namespace") { } + + OwningAtomPtr exports(StringRef name) const override { + return new (allocator()) MachOSharedLibraryAtom(*this, name, getDSOName(), + false); + } + + StringRef getDSOName() const override { return "flat-namespace"; } + + const AtomRange defined() const override { + return _noDefinedAtoms; + } + const AtomRange undefined() const override { + return _noUndefinedAtoms; + } + + const AtomRange sharedLibrary() const override { + return _noSharedLibraryAtoms; + } + + const AtomRange absolute() const override { + return _noAbsoluteAtoms; + } + + void clearAtoms() override { + _noDefinedAtoms.clear(); + _noUndefinedAtoms.clear(); + _noSharedLibraryAtoms.clear(); + _noAbsoluteAtoms.clear(); + } +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_FLAT_NAMESPACE_FILE_H diff --git a/lld/lib/ReaderWriter/MachO/GOTPass.cpp b/lld/lib/ReaderWriter/MachO/GOTPass.cpp new file mode 100644 index 000000000000..10e611c1bd2b --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/GOTPass.cpp @@ -0,0 +1,183 @@ +//===- lib/ReaderWriter/MachO/GOTPass.cpp -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This linker pass transforms all GOT kind references to real references. +/// That is, in assembly you can write something like: +/// movq foo@GOTPCREL(%rip), %rax +/// which means you want to load a pointer to "foo" out of the GOT (global +/// Offsets Table). In the object file, the Atom containing this instruction +/// has a Reference whose target is an Atom named "foo" and the Reference +/// kind is a GOT load. The linker needs to instantiate a pointer sized +/// GOT entry. This is done be creating a GOT Atom to represent that pointer +/// sized data in this pass, and altering the Atom graph so the Reference now +/// points to the GOT Atom entry (corresponding to "foo") and changing the +/// Reference Kind to reflect it is now pointing to a GOT entry (rather +/// then needing a GOT entry). +/// +/// There is one optimization the linker can do here. If the target of the GOT +/// is in the same linkage unit and does not need to be interposable, and +/// the GOT use is just a load (not some other operation), this pass can +/// transform that load into an LEA (add). This optimizes away one memory load +/// which at runtime that could stall the pipeline. This optimization only +/// works for architectures in which a (GOT) load instruction can be change to +/// an LEA instruction that is the same size. The method isGOTAccess() should +/// only return true for "canBypassGOT" if this optimization is supported. +/// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachOPasses.h" +#include "lld/Common/LLVM.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" + +namespace lld { +namespace mach_o { + +// +// GOT Entry Atom created by the GOT pass. +// +class GOTEntryAtom : public SimpleDefinedAtom { +public: + GOTEntryAtom(const File &file, bool is64, StringRef name) + : SimpleDefinedAtom(file), _is64(is64), _name(name) { } + + ~GOTEntryAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeGOT; + } + + Alignment alignment() const override { + return _is64 ? 8 : 4; + } + + uint64_t size() const override { + return _is64 ? 8 : 4; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permRW_; + } + + ArrayRef rawContent() const override { + static const uint8_t zeros[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + return llvm::makeArrayRef(zeros, size()); + } + + StringRef slotName() const { + return _name; + } + +private: + const bool _is64; + StringRef _name; +}; + +/// Pass for instantiating and optimizing GOT slots. +/// +class GOTPass : public Pass { +public: + GOTPass(const MachOLinkingContext &context) + : _ctx(context), _archHandler(_ctx.archHandler()), + _file(*_ctx.make_file("")) { + _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); + } + +private: + llvm::Error perform(SimpleFile &mergedFile) override { + // Scan all references in all atoms. + for (const DefinedAtom *atom : mergedFile.defined()) { + for (const Reference *ref : *atom) { + // Look at instructions accessing the GOT. + bool canBypassGOT; + if (!_archHandler.isGOTAccess(*ref, canBypassGOT)) + continue; + const Atom *target = ref->target(); + assert(target != nullptr); + + if (!shouldReplaceTargetWithGOTAtom(target, canBypassGOT)) { + // Update reference kind to reflect that target is a direct access. + _archHandler.updateReferenceToGOT(ref, false); + } else { + // Replace the target with a reference to a GOT entry. + const DefinedAtom *gotEntry = makeGOTEntry(target); + const_cast(ref)->setTarget(gotEntry); + // Update reference kind to reflect that target is now a GOT entry. + _archHandler.updateReferenceToGOT(ref, true); + } + } + } + + // Sort and add all created GOT Atoms to master file + std::vector entries; + entries.reserve(_targetToGOT.size()); + for (auto &it : _targetToGOT) + entries.push_back(it.second); + std::sort(entries.begin(), entries.end(), + [](const GOTEntryAtom *left, const GOTEntryAtom *right) { + return (left->slotName().compare(right->slotName()) < 0); + }); + for (const GOTEntryAtom *slot : entries) + mergedFile.addAtom(*slot); + + return llvm::Error::success(); + } + + bool shouldReplaceTargetWithGOTAtom(const Atom *target, bool canBypassGOT) { + // Accesses to shared library symbols must go through GOT. + if (isa(target)) + return true; + // Accesses to interposable symbols in same linkage unit must also go + // through GOT. + const DefinedAtom *defTarget = dyn_cast(target); + if (defTarget != nullptr && + defTarget->interposable() != DefinedAtom::interposeNo) { + assert(defTarget->scope() != DefinedAtom::scopeTranslationUnit); + return true; + } + // Target does not require indirection. So, if instruction allows GOT to be + // by-passed, do that optimization and don't create GOT entry. + return !canBypassGOT; + } + + const DefinedAtom *makeGOTEntry(const Atom *target) { + auto pos = _targetToGOT.find(target); + if (pos == _targetToGOT.end()) { + auto *gotEntry = new (_file.allocator()) + GOTEntryAtom(_file, _ctx.is64Bit(), target->name()); + _targetToGOT[target] = gotEntry; + const ArchHandler::ReferenceInfo &nlInfo = _archHandler.stubInfo(). + nonLazyPointerReferenceToBinder; + gotEntry->addReference(Reference::KindNamespace::mach_o, nlInfo.arch, + nlInfo.kind, 0, target, 0); + return gotEntry; + } + return pos->second; + } + + const MachOLinkingContext &_ctx; + mach_o::ArchHandler &_archHandler; + MachOFile &_file; + llvm::DenseMap _targetToGOT; +}; + +void addGOTPass(PassManager &pm, const MachOLinkingContext &ctx) { + assert(ctx.needsGOTPass()); + pm.add(std::make_unique(ctx)); +} + +} // end namespace mach_o +} // end namespace lld diff --git a/lld/lib/ReaderWriter/MachO/LayoutPass.cpp b/lld/lib/ReaderWriter/MachO/LayoutPass.cpp new file mode 100644 index 000000000000..e92fdf1b4913 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/LayoutPass.cpp @@ -0,0 +1,490 @@ +//===-- ReaderWriter/MachO/LayoutPass.cpp - Layout atoms ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LayoutPass.h" +#include "lld/Core/Instrumentation.h" +#include "lld/Core/PassManager.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Parallel.h" +#include +#include +#include + +using namespace lld; + +#define DEBUG_TYPE "LayoutPass" + +namespace lld { +namespace mach_o { + +static bool compareAtoms(const LayoutPass::SortKey &, + const LayoutPass::SortKey &, + LayoutPass::SortOverride customSorter); + +#ifndef NDEBUG +// Return "reason (leftval, rightval)" +static std::string formatReason(StringRef reason, int leftVal, int rightVal) { + return (Twine(reason) + " (" + Twine(leftVal) + ", " + Twine(rightVal) + ")") + .str(); +} + +// Less-than relationship of two atoms must be transitive, which is, if a < b +// and b < c, a < c must be true. This function checks the transitivity by +// checking the sort results. +static void checkTransitivity(std::vector &vec, + LayoutPass::SortOverride customSorter) { + for (auto i = vec.begin(), e = vec.end(); (i + 1) != e; ++i) { + for (auto j = i + 1; j != e; ++j) { + assert(compareAtoms(*i, *j, customSorter)); + assert(!compareAtoms(*j, *i, customSorter)); + } + } +} + +// Helper functions to check follow-on graph. +typedef llvm::DenseMap AtomToAtomT; + +static std::string atomToDebugString(const Atom *atom) { + const DefinedAtom *definedAtom = dyn_cast(atom); + std::string str; + llvm::raw_string_ostream s(str); + if (definedAtom->name().empty()) + s << ""; + else + s << definedAtom->name(); + s << " in "; + if (definedAtom->customSectionName().empty()) + s << ""; + else + s << definedAtom->customSectionName(); + s.flush(); + return str; +} + +static void showCycleDetectedError(const Registry ®istry, + AtomToAtomT &followOnNexts, + const DefinedAtom *atom) { + const DefinedAtom *start = atom; + llvm::dbgs() << "There's a cycle in a follow-on chain!\n"; + do { + llvm::dbgs() << " " << atomToDebugString(atom) << "\n"; + for (const Reference *ref : *atom) { + StringRef kindValStr; + if (!registry.referenceKindToString(ref->kindNamespace(), ref->kindArch(), + ref->kindValue(), kindValStr)) { + kindValStr = ""; + } + llvm::dbgs() << " " << kindValStr + << ": " << atomToDebugString(ref->target()) << "\n"; + } + atom = followOnNexts[atom]; + } while (atom != start); + llvm::report_fatal_error("Cycle detected"); +} + +/// Exit if there's a cycle in a followon chain reachable from the +/// given root atom. Uses the tortoise and hare algorithm to detect a +/// cycle. +static void checkNoCycleInFollowonChain(const Registry ®istry, + AtomToAtomT &followOnNexts, + const DefinedAtom *root) { + const DefinedAtom *tortoise = root; + const DefinedAtom *hare = followOnNexts[root]; + while (true) { + if (!tortoise || !hare) + return; + if (tortoise == hare) + showCycleDetectedError(registry, followOnNexts, tortoise); + tortoise = followOnNexts[tortoise]; + hare = followOnNexts[followOnNexts[hare]]; + } +} + +static void checkReachabilityFromRoot(AtomToAtomT &followOnRoots, + const DefinedAtom *atom) { + if (!atom) return; + auto i = followOnRoots.find(atom); + if (i == followOnRoots.end()) { + llvm_unreachable(((Twine("Atom <") + atomToDebugString(atom) + + "> has no follow-on root!")) + .str() + .c_str()); + } + const DefinedAtom *ap = i->second; + while (true) { + const DefinedAtom *next = followOnRoots[ap]; + if (!next) { + llvm_unreachable((Twine("Atom <" + atomToDebugString(atom) + + "> is not reachable from its root!")) + .str() + .c_str()); + } + if (next == ap) + return; + ap = next; + } +} + +static void printDefinedAtoms(const File::AtomRange &atomRange) { + for (const DefinedAtom *atom : atomRange) { + llvm::dbgs() << " file=" << atom->file().path() + << ", name=" << atom->name() + << ", size=" << atom->size() + << ", type=" << atom->contentType() + << ", ordinal=" << atom->ordinal() + << "\n"; + } +} + +/// Verify that the followon chain is sane. Should not be called in +/// release binary. +void LayoutPass::checkFollowonChain(const File::AtomRange &range) { + ScopedTask task(getDefaultDomain(), "LayoutPass::checkFollowonChain"); + + // Verify that there's no cycle in follow-on chain. + std::set roots; + for (const auto &ai : _followOnRoots) + roots.insert(ai.second); + for (const DefinedAtom *root : roots) + checkNoCycleInFollowonChain(_registry, _followOnNexts, root); + + // Verify that all the atoms in followOnNexts have references to + // their roots. + for (const auto &ai : _followOnNexts) { + checkReachabilityFromRoot(_followOnRoots, ai.first); + checkReachabilityFromRoot(_followOnRoots, ai.second); + } +} +#endif // #ifndef NDEBUG + +/// The function compares atoms by sorting atoms in the following order +/// a) Sorts atoms by their ordinal overrides (layout-after/ingroup) +/// b) Sorts atoms by their permissions +/// c) Sorts atoms by their content +/// d) Sorts atoms by custom sorter +/// e) Sorts atoms on how they appear using File Ordinality +/// f) Sorts atoms on how they appear within the File +static bool compareAtomsSub(const LayoutPass::SortKey &lc, + const LayoutPass::SortKey &rc, + LayoutPass::SortOverride customSorter, + std::string &reason) { + const DefinedAtom *left = lc._atom.get(); + const DefinedAtom *right = rc._atom.get(); + if (left == right) { + reason = "same"; + return false; + } + + // Find the root of the chain if it is a part of a follow-on chain. + const DefinedAtom *leftRoot = lc._root; + const DefinedAtom *rightRoot = rc._root; + + // Sort atoms by their ordinal overrides only if they fall in the same + // chain. + if (leftRoot == rightRoot) { + LLVM_DEBUG(reason = formatReason("override", lc._override, rc._override)); + return lc._override < rc._override; + } + + // Sort same permissions together. + DefinedAtom::ContentPermissions leftPerms = leftRoot->permissions(); + DefinedAtom::ContentPermissions rightPerms = rightRoot->permissions(); + + if (leftPerms != rightPerms) { + LLVM_DEBUG( + reason = formatReason("contentPerms", (int)leftPerms, (int)rightPerms)); + return leftPerms < rightPerms; + } + + // Sort same content types together. + DefinedAtom::ContentType leftType = leftRoot->contentType(); + DefinedAtom::ContentType rightType = rightRoot->contentType(); + + if (leftType != rightType) { + LLVM_DEBUG(reason = + formatReason("contentType", (int)leftType, (int)rightType)); + return leftType < rightType; + } + + // Use custom sorter if supplied. + if (customSorter) { + bool leftBeforeRight; + if (customSorter(leftRoot, rightRoot, leftBeforeRight)) + return leftBeforeRight; + } + + // Sort by .o order. + const File *leftFile = &leftRoot->file(); + const File *rightFile = &rightRoot->file(); + + if (leftFile != rightFile) { + LLVM_DEBUG(reason = formatReason(".o order", (int)leftFile->ordinal(), + (int)rightFile->ordinal())); + return leftFile->ordinal() < rightFile->ordinal(); + } + + // Sort by atom order with .o file. + uint64_t leftOrdinal = leftRoot->ordinal(); + uint64_t rightOrdinal = rightRoot->ordinal(); + + if (leftOrdinal != rightOrdinal) { + LLVM_DEBUG(reason = formatReason("ordinal", (int)leftRoot->ordinal(), + (int)rightRoot->ordinal())); + return leftOrdinal < rightOrdinal; + } + + llvm::errs() << "Unordered: <" << left->name() << "> <" << right->name() + << ">\n"; + llvm_unreachable("Atoms with Same Ordinal!"); +} + +static bool compareAtoms(const LayoutPass::SortKey &lc, + const LayoutPass::SortKey &rc, + LayoutPass::SortOverride customSorter) { + std::string reason; + bool result = compareAtomsSub(lc, rc, customSorter, reason); + LLVM_DEBUG({ + StringRef comp = result ? "<" : ">="; + llvm::dbgs() << "Layout: '" << lc._atom.get()->name() + << "' " << comp << " '" + << rc._atom.get()->name() << "' (" << reason << ")\n"; + }); + return result; +} + +LayoutPass::LayoutPass(const Registry ®istry, SortOverride sorter) + : _registry(registry), _customSorter(std::move(sorter)) {} + +// Returns the atom immediately followed by the given atom in the followon +// chain. +const DefinedAtom *LayoutPass::findAtomFollowedBy( + const DefinedAtom *targetAtom) { + // Start from the beginning of the chain and follow the chain until + // we find the targetChain. + const DefinedAtom *atom = _followOnRoots[targetAtom]; + while (true) { + const DefinedAtom *prevAtom = atom; + AtomToAtomT::iterator targetFollowOnAtomsIter = _followOnNexts.find(atom); + // The target atom must be in the chain of its root. + assert(targetFollowOnAtomsIter != _followOnNexts.end()); + atom = targetFollowOnAtomsIter->second; + if (atom == targetAtom) + return prevAtom; + } +} + +// Check if all the atoms followed by the given target atom are of size zero. +// When this method is called, an atom being added is not of size zero and +// will be added to the head of the followon chain. All the atoms between the +// atom and the targetAtom (specified by layout-after) need to be of size zero +// in this case. Otherwise the desired layout is impossible. +bool LayoutPass::checkAllPrevAtomsZeroSize(const DefinedAtom *targetAtom) { + const DefinedAtom *atom = _followOnRoots[targetAtom]; + while (true) { + if (atom == targetAtom) + return true; + if (atom->size() != 0) + // TODO: print warning that an impossible layout is being desired by the + // user. + return false; + AtomToAtomT::iterator targetFollowOnAtomsIter = _followOnNexts.find(atom); + // The target atom must be in the chain of its root. + assert(targetFollowOnAtomsIter != _followOnNexts.end()); + atom = targetFollowOnAtomsIter->second; + } +} + +// Set the root of all atoms in targetAtom's chain to the given root. +void LayoutPass::setChainRoot(const DefinedAtom *targetAtom, + const DefinedAtom *root) { + // Walk through the followon chain and override each node's root. + while (true) { + _followOnRoots[targetAtom] = root; + AtomToAtomT::iterator targetFollowOnAtomsIter = + _followOnNexts.find(targetAtom); + if (targetFollowOnAtomsIter == _followOnNexts.end()) + return; + targetAtom = targetFollowOnAtomsIter->second; + } +} + +/// This pass builds the followon tables described by two DenseMaps +/// followOnRoots and followonNexts. +/// The followOnRoots map contains a mapping of a DefinedAtom to its root +/// The followOnNexts map contains a mapping of what DefinedAtom follows the +/// current Atom +/// The algorithm follows a very simple approach +/// a) If the atom is first seen, then make that as the root atom +/// b) The targetAtom which this Atom contains, has the root thats set to the +/// root of the current atom +/// c) If the targetAtom is part of a different tree and the root of the +/// targetAtom is itself, Chain all the atoms that are contained in the tree +/// to the current Tree +/// d) If the targetAtom is part of a different chain and the root of the +/// targetAtom until the targetAtom has all atoms of size 0, then chain the +/// targetAtoms and its tree to the current chain +void LayoutPass::buildFollowOnTable(const File::AtomRange &range) { + ScopedTask task(getDefaultDomain(), "LayoutPass::buildFollowOnTable"); + // Set the initial size of the followon and the followonNext hash to the + // number of atoms that we have. + _followOnRoots.reserve(range.size()); + _followOnNexts.reserve(range.size()); + for (const DefinedAtom *ai : range) { + for (const Reference *r : *ai) { + if (r->kindNamespace() != lld::Reference::KindNamespace::all || + r->kindValue() != lld::Reference::kindLayoutAfter) + continue; + const DefinedAtom *targetAtom = dyn_cast(r->target()); + _followOnNexts[ai] = targetAtom; + + // If we find a followon for the first time, let's make that atom as the + // root atom. + if (_followOnRoots.count(ai) == 0) + _followOnRoots[ai] = ai; + + auto iter = _followOnRoots.find(targetAtom); + if (iter == _followOnRoots.end()) { + // If the targetAtom is not a root of any chain, let's make the root of + // the targetAtom to the root of the current chain. + + // The expression m[i] = m[j] where m is a DenseMap and i != j is not + // safe. m[j] returns a reference, which would be invalidated when a + // rehashing occurs. If rehashing occurs to make room for m[i], m[j] + // becomes invalid, and that invalid reference would be used as the RHS + // value of the expression. + // Copy the value to workaround. + const DefinedAtom *tmp = _followOnRoots[ai]; + _followOnRoots[targetAtom] = tmp; + continue; + } + if (iter->second == targetAtom) { + // If the targetAtom is the root of a chain, the chain becomes part of + // the current chain. Rewrite the subchain's root to the current + // chain's root. + setChainRoot(targetAtom, _followOnRoots[ai]); + continue; + } + // The targetAtom is already a part of a chain. If the current atom is + // of size zero, we can insert it in the middle of the chain just + // before the target atom, while not breaking other atom's followon + // relationships. If it's not, we can only insert the current atom at + // the beginning of the chain. All the atoms followed by the target + // atom must be of size zero in that case to satisfy the followon + // relationships. + size_t currentAtomSize = ai->size(); + if (currentAtomSize == 0) { + const DefinedAtom *targetPrevAtom = findAtomFollowedBy(targetAtom); + _followOnNexts[targetPrevAtom] = ai; + const DefinedAtom *tmp = _followOnRoots[targetPrevAtom]; + _followOnRoots[ai] = tmp; + continue; + } + if (!checkAllPrevAtomsZeroSize(targetAtom)) + break; + _followOnNexts[ai] = _followOnRoots[targetAtom]; + setChainRoot(_followOnRoots[targetAtom], _followOnRoots[ai]); + } + } +} + +/// Build an ordinal override map by traversing the followon chain, and +/// assigning ordinals to each atom, if the atoms have their ordinals +/// already assigned skip the atom and move to the next. This is the +/// main map thats used to sort the atoms while comparing two atoms together +void +LayoutPass::buildOrdinalOverrideMap(const File::AtomRange &range) { + ScopedTask task(getDefaultDomain(), "LayoutPass::buildOrdinalOverrideMap"); + uint64_t index = 0; + for (const DefinedAtom *ai : range) { + const DefinedAtom *atom = ai; + if (_ordinalOverrideMap.find(atom) != _ordinalOverrideMap.end()) + continue; + AtomToAtomT::iterator start = _followOnRoots.find(atom); + if (start == _followOnRoots.end()) + continue; + for (const DefinedAtom *nextAtom = start->second; nextAtom; + nextAtom = _followOnNexts[nextAtom]) { + AtomToOrdinalT::iterator pos = _ordinalOverrideMap.find(nextAtom); + if (pos == _ordinalOverrideMap.end()) + _ordinalOverrideMap[nextAtom] = index++; + } + } +} + +std::vector +LayoutPass::decorate(File::AtomRange &atomRange) const { + std::vector ret; + for (OwningAtomPtr &atom : atomRange.owning_ptrs()) { + auto ri = _followOnRoots.find(atom.get()); + auto oi = _ordinalOverrideMap.find(atom.get()); + const auto *root = (ri == _followOnRoots.end()) ? atom.get() : ri->second; + uint64_t override = (oi == _ordinalOverrideMap.end()) ? 0 : oi->second; + ret.push_back(SortKey(std::move(atom), root, override)); + } + return ret; +} + +void LayoutPass::undecorate(File::AtomRange &atomRange, + std::vector &keys) const { + size_t i = 0; + for (SortKey &k : keys) + atomRange[i++] = std::move(k._atom); +} + +/// Perform the actual pass +llvm::Error LayoutPass::perform(SimpleFile &mergedFile) { + LLVM_DEBUG(llvm::dbgs() << "******** Laying out atoms:\n"); + // sort the atoms + ScopedTask task(getDefaultDomain(), "LayoutPass"); + File::AtomRange atomRange = mergedFile.defined(); + + // Build follow on tables + buildFollowOnTable(atomRange); + + // Check the structure of followon graph if running in debug mode. + LLVM_DEBUG(checkFollowonChain(atomRange)); + + // Build override maps + buildOrdinalOverrideMap(atomRange); + + LLVM_DEBUG({ + llvm::dbgs() << "unsorted atoms:\n"; + printDefinedAtoms(atomRange); + }); + + std::vector vec = decorate(atomRange); + llvm::parallelSort( + vec, + [&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool { + return compareAtoms(l, r, _customSorter); + }); + LLVM_DEBUG(checkTransitivity(vec, _customSorter)); + undecorate(atomRange, vec); + + LLVM_DEBUG({ + llvm::dbgs() << "sorted atoms:\n"; + printDefinedAtoms(atomRange); + }); + + LLVM_DEBUG(llvm::dbgs() << "******** Finished laying out atoms\n"); + return llvm::Error::success(); +} + +void addLayoutPass(PassManager &pm, const MachOLinkingContext &ctx) { + pm.add(std::make_unique( + ctx.registry(), [&](const DefinedAtom * left, const DefinedAtom * right, + bool & leftBeforeRight) ->bool { + return ctx.customAtomOrderer(left, right, leftBeforeRight); + })); +} + +} // namespace mach_o +} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/LayoutPass.h b/lld/lib/ReaderWriter/MachO/LayoutPass.h new file mode 100644 index 000000000000..904e16b7fb0e --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/LayoutPass.h @@ -0,0 +1,118 @@ +//===------ lib/ReaderWriter/MachO/LayoutPass.h - Handles Layout of atoms -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_LAYOUT_PASS_H +#define LLD_READER_WRITER_MACHO_LAYOUT_PASS_H + +#include "lld/Core/File.h" +#include "lld/Core/Pass.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/DenseMap.h" +#include +#include +#include + +namespace lld { +class DefinedAtom; +class SimpleFile; + +namespace mach_o { + +/// This linker pass does the layout of the atoms. The pass is done after the +/// order their .o files were found on the command line, then by order of the +/// atoms (address) in the .o file. But some atoms have a preferred location +/// in their section (such as pinned to the start or end of the section), so +/// the sort must take that into account too. +class LayoutPass : public Pass { +public: + struct SortKey { + SortKey(OwningAtomPtr &&atom, + const DefinedAtom *root, uint64_t override) + : _atom(std::move(atom)), _root(root), _override(override) {} + OwningAtomPtr _atom; + const DefinedAtom *_root; + uint64_t _override; + + // Note, these are only here to appease MSVC bots which didn't like + // the same methods being implemented/deleted in OwningAtomPtr. + SortKey(SortKey &&key) : _atom(std::move(key._atom)), _root(key._root), + _override(key._override) { + key._root = nullptr; + } + + SortKey &operator=(SortKey &&key) { + _atom = std::move(key._atom); + _root = key._root; + key._root = nullptr; + _override = key._override; + return *this; + } + + private: + SortKey(const SortKey &) = delete; + void operator=(const SortKey&) = delete; + }; + + typedef std::function SortOverride; + + LayoutPass(const Registry ®istry, SortOverride sorter); + + /// Sorts atoms in mergedFile by content type then by command line order. + llvm::Error perform(SimpleFile &mergedFile) override; + + ~LayoutPass() override = default; + +private: + // Build the followOn atoms chain as specified by the kindLayoutAfter + // reference type + void buildFollowOnTable(const File::AtomRange &range); + + // Build a map of Atoms to ordinals for sorting the atoms + void buildOrdinalOverrideMap(const File::AtomRange &range); + + const Registry &_registry; + SortOverride _customSorter; + + typedef llvm::DenseMap AtomToAtomT; + typedef llvm::DenseMap AtomToOrdinalT; + + // A map to be used to sort atoms. It represents the order of atoms in the + // result; if Atom X is mapped to atom Y in this map, X will be located + // immediately before Y in the output file. Y might be mapped to another + // atom, constructing a follow-on chain. An atom cannot be mapped to more + // than one atom unless all but one atom are of size zero. + AtomToAtomT _followOnNexts; + + // A map to be used to sort atoms. It's a map from an atom to its root of + // follow-on chain. A root atom is mapped to itself. If an atom is not in + // _followOnNexts, the atom is not in this map, and vice versa. + AtomToAtomT _followOnRoots; + + AtomToOrdinalT _ordinalOverrideMap; + + // Helper methods for buildFollowOnTable(). + const DefinedAtom *findAtomFollowedBy(const DefinedAtom *targetAtom); + bool checkAllPrevAtomsZeroSize(const DefinedAtom *targetAtom); + + void setChainRoot(const DefinedAtom *targetAtom, const DefinedAtom *root); + + std::vector decorate(File::AtomRange &atomRange) const; + + void undecorate(File::AtomRange &atomRange, + std::vector &keys) const; + + // Check if the follow-on graph is a correct structure. For debugging only. + void checkFollowonChain(const File::AtomRange &range); +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_LAYOUT_PASS_H diff --git a/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp b/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp new file mode 100644 index 000000000000..acd919e4d411 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp @@ -0,0 +1,1104 @@ +//===- lib/ReaderWriter/MachO/MachOLinkingContext.cpp ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lld/Common/ErrorHandler.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "ArchHandler.h" +#include "File.h" +#include "FlatNamespaceFile.h" +#include "MachONormalizedFile.h" +#include "MachOPasses.h" +#include "SectCreateFile.h" +#include "lld/Common/Driver.h" +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/PassManager.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Writer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Demangle/Demangle.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/Path.h" +#include + +using lld::mach_o::ArchHandler; +using lld::mach_o::MachOFile; +using lld::mach_o::MachODylibFile; +using namespace llvm::MachO; + +namespace lld { + +bool MachOLinkingContext::parsePackedVersion(StringRef str, uint32_t &result) { + result = 0; + + if (str.empty()) + return false; + + SmallVector parts; + llvm::SplitString(str, parts, "."); + + unsigned long long num; + if (llvm::getAsUnsignedInteger(parts[0], 10, num)) + return true; + if (num > 65535) + return true; + result = num << 16; + + if (parts.size() > 1) { + if (llvm::getAsUnsignedInteger(parts[1], 10, num)) + return true; + if (num > 255) + return true; + result |= (num << 8); + } + + if (parts.size() > 2) { + if (llvm::getAsUnsignedInteger(parts[2], 10, num)) + return true; + if (num > 255) + return true; + result |= num; + } + + return false; +} + +bool MachOLinkingContext::parsePackedVersion(StringRef str, uint64_t &result) { + result = 0; + + if (str.empty()) + return false; + + SmallVector parts; + llvm::SplitString(str, parts, "."); + + unsigned long long num; + if (llvm::getAsUnsignedInteger(parts[0], 10, num)) + return true; + if (num > 0xFFFFFF) + return true; + result = num << 40; + + unsigned Shift = 30; + for (StringRef str : llvm::makeArrayRef(parts).slice(1)) { + if (llvm::getAsUnsignedInteger(str, 10, num)) + return true; + if (num > 0x3FF) + return true; + result |= (num << Shift); + Shift -= 10; + } + + return false; +} + +MachOLinkingContext::ArchInfo MachOLinkingContext::_s_archInfos[] = { + { "x86_64", arch_x86_64, true, CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_ALL }, + { "i386", arch_x86, true, CPU_TYPE_I386, CPU_SUBTYPE_X86_ALL }, + { "ppc", arch_ppc, false, CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_ALL }, + { "armv6", arch_armv6, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V6 }, + { "armv7", arch_armv7, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7 }, + { "armv7s", arch_armv7s, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7S }, + { "arm64", arch_arm64, true, CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_ALL }, + { "", arch_unknown,false, 0, 0 } +}; + +MachOLinkingContext::Arch +MachOLinkingContext::archFromCpuType(uint32_t cputype, uint32_t cpusubtype) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if ((info->cputype == cputype) && (info->cpusubtype == cpusubtype)) + return info->arch; + } + return arch_unknown; +} + +MachOLinkingContext::Arch +MachOLinkingContext::archFromName(StringRef archName) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->archName.equals(archName)) + return info->arch; + } + return arch_unknown; +} + +StringRef MachOLinkingContext::nameFromArch(Arch arch) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) + return info->archName; + } + return ""; +} + +uint32_t MachOLinkingContext::cpuTypeFromArch(Arch arch) { + assert(arch != arch_unknown); + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) + return info->cputype; + } + llvm_unreachable("Unknown arch type"); +} + +uint32_t MachOLinkingContext::cpuSubtypeFromArch(Arch arch) { + assert(arch != arch_unknown); + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) + return info->cpusubtype; + } + llvm_unreachable("Unknown arch type"); +} + +bool MachOLinkingContext::isThinObjectFile(StringRef path, Arch &arch) { + return mach_o::normalized::isThinObjectFile(path, arch); +} + +bool MachOLinkingContext::sliceFromFatFile(MemoryBufferRef mb, uint32_t &offset, + uint32_t &size) { + return mach_o::normalized::sliceFromFatFile(mb, _arch, offset, size); +} + +MachOLinkingContext::MachOLinkingContext() {} + +MachOLinkingContext::~MachOLinkingContext() { + // Atoms are allocated on BumpPtrAllocator's on File's. + // As we transfer atoms from one file to another, we need to clear all of the + // atoms before we remove any of the BumpPtrAllocator's. + auto &nodes = getNodes(); + for (unsigned i = 0, e = nodes.size(); i != e; ++i) { + FileNode *node = dyn_cast(nodes[i].get()); + if (!node) + continue; + File *file = node->getFile(); + file->clearAtoms(); + } +} + +void MachOLinkingContext::configure(HeaderFileType type, Arch arch, OS os, + uint32_t minOSVersion, + bool exportDynamicSymbols) { + _outputMachOType = type; + _arch = arch; + _os = os; + _osMinVersion = minOSVersion; + + // If min OS not specified on command line, use reasonable defaults. + // Note that we only do sensible defaults when emitting something other than + // object and preload. + if (_outputMachOType != llvm::MachO::MH_OBJECT && + _outputMachOType != llvm::MachO::MH_PRELOAD) { + if (minOSVersion == 0) { + switch (_arch) { + case arch_x86_64: + case arch_x86: + parsePackedVersion("10.8", _osMinVersion); + _os = MachOLinkingContext::OS::macOSX; + break; + case arch_armv6: + case arch_armv7: + case arch_armv7s: + case arch_arm64: + parsePackedVersion("7.0", _osMinVersion); + _os = MachOLinkingContext::OS::iOS; + break; + default: + break; + } + } + } + + switch (_outputMachOType) { + case llvm::MachO::MH_EXECUTE: + // If targeting newer OS, use _main + if (minOS("10.8", "6.0")) { + _entrySymbolName = "_main"; + } else { + // If targeting older OS, use start (in crt1.o) + _entrySymbolName = "start"; + } + + // __PAGEZERO defaults to 4GB on 64-bit (except for PP64 which lld does not + // support) and 4KB on 32-bit. + if (is64Bit(_arch)) { + _pageZeroSize = 0x100000000; + } else { + _pageZeroSize = 0x1000; + } + + // Initial base address is __PAGEZERO size. + _baseAddress = _pageZeroSize; + + // Make PIE by default when targetting newer OSs. + switch (os) { + case OS::macOSX: + if (minOSVersion >= 0x000A0700) // MacOSX 10.7 + _pie = true; + break; + case OS::iOS: + if (minOSVersion >= 0x00040300) // iOS 4.3 + _pie = true; + break; + case OS::iOS_simulator: + _pie = true; + break; + case OS::unknown: + break; + } + setGlobalsAreDeadStripRoots(exportDynamicSymbols); + break; + case llvm::MachO::MH_DYLIB: + setGlobalsAreDeadStripRoots(exportDynamicSymbols); + break; + case llvm::MachO::MH_BUNDLE: + break; + case llvm::MachO::MH_OBJECT: + _printRemainingUndefines = false; + _allowRemainingUndefines = true; + break; + default: + break; + } + + // Set default segment page sizes based on arch. + if (arch == arch_arm64) + _pageSize = 4*4096; +} + +uint32_t MachOLinkingContext::getCPUType() const { + return cpuTypeFromArch(_arch); +} + +uint32_t MachOLinkingContext::getCPUSubType() const { + return cpuSubtypeFromArch(_arch); +} + +bool MachOLinkingContext::is64Bit(Arch arch) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) { + return (info->cputype & CPU_ARCH_ABI64); + } + } + // unknown archs are not 64-bit. + return false; +} + +bool MachOLinkingContext::isHostEndian(Arch arch) { + assert(arch != arch_unknown); + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) { + return (info->littleEndian == llvm::sys::IsLittleEndianHost); + } + } + llvm_unreachable("Unknown arch type"); +} + +bool MachOLinkingContext::isBigEndian(Arch arch) { + assert(arch != arch_unknown); + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) { + return ! info->littleEndian; + } + } + llvm_unreachable("Unknown arch type"); +} + +bool MachOLinkingContext::is64Bit() const { + return is64Bit(_arch); +} + +bool MachOLinkingContext::outputTypeHasEntry() const { + switch (_outputMachOType) { + case MH_EXECUTE: + case MH_DYLINKER: + case MH_PRELOAD: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::needsStubsPass() const { + switch (_outputMachOType) { + case MH_EXECUTE: + return !_outputMachOTypeStatic; + case MH_DYLIB: + case MH_BUNDLE: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::needsGOTPass() const { + // GOT pass not used in -r mode. + if (_outputMachOType == MH_OBJECT) + return false; + // Only some arches use GOT pass. + switch (_arch) { + case arch_x86_64: + case arch_arm64: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::needsCompactUnwindPass() const { + switch (_outputMachOType) { + case MH_EXECUTE: + case MH_DYLIB: + case MH_BUNDLE: + return archHandler().needsCompactUnwind(); + default: + return false; + } +} + +bool MachOLinkingContext::needsObjCPass() const { + // ObjC pass is only needed if any of the inputs were ObjC. + return _objcConstraint != objc_unknown; +} + +bool MachOLinkingContext::needsShimPass() const { + // Shim pass only used in final executables. + if (_outputMachOType == MH_OBJECT) + return false; + // Only 32-bit arm arches use Shim pass. + switch (_arch) { + case arch_armv6: + case arch_armv7: + case arch_armv7s: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::needsTLVPass() const { + switch (_outputMachOType) { + case MH_BUNDLE: + case MH_EXECUTE: + case MH_DYLIB: + return true; + default: + return false; + } +} + +StringRef MachOLinkingContext::binderSymbolName() const { + return archHandler().stubInfo().binderSymbolName; +} + +bool MachOLinkingContext::minOS(StringRef mac, StringRef iOS) const { + uint32_t parsedVersion; + switch (_os) { + case OS::macOSX: + if (parsePackedVersion(mac, parsedVersion)) + return false; + return _osMinVersion >= parsedVersion; + case OS::iOS: + case OS::iOS_simulator: + if (parsePackedVersion(iOS, parsedVersion)) + return false; + return _osMinVersion >= parsedVersion; + case OS::unknown: + // If we don't know the target, then assume that we don't meet the min OS. + // This matches the ld64 behaviour + return false; + } + llvm_unreachable("invalid OS enum"); +} + +bool MachOLinkingContext::addEntryPointLoadCommand() const { + if ((_outputMachOType == MH_EXECUTE) && !_outputMachOTypeStatic) { + return minOS("10.8", "6.0"); + } + return false; +} + +bool MachOLinkingContext::addUnixThreadLoadCommand() const { + switch (_outputMachOType) { + case MH_EXECUTE: + if (_outputMachOTypeStatic) + return true; + else + return !minOS("10.8", "6.0"); + break; + case MH_DYLINKER: + case MH_PRELOAD: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::pathExists(StringRef path) const { + if (!_testingFileUsage) + return llvm::sys::fs::exists(path.str()); + + // Otherwise, we're in test mode: only files explicitly provided on the + // command-line exist. + std::string key = path.str(); + std::replace(key.begin(), key.end(), '\\', '/'); + return _existingPaths.find(key) != _existingPaths.end(); +} + +bool MachOLinkingContext::fileExists(StringRef path) const { + bool found = pathExists(path); + // Log search misses. + if (!found) + addInputFileNotFound(path); + + // When testing, file is never opened, so logging is done here. + if (_testingFileUsage && found) + addInputFileDependency(path); + + return found; +} + +void MachOLinkingContext::setSysLibRoots(const StringRefVector &paths) { + _syslibRoots = paths; +} + +void MachOLinkingContext::addRpath(StringRef rpath) { + _rpaths.push_back(rpath); +} + +void MachOLinkingContext::addModifiedSearchDir(StringRef libPath, + bool isSystemPath) { + bool addedModifiedPath = false; + + // -syslibroot only applies to absolute paths. + if (libPath.startswith("/")) { + for (auto syslibRoot : _syslibRoots) { + SmallString<256> path(syslibRoot); + llvm::sys::path::append(path, libPath); + if (pathExists(path)) { + _searchDirs.push_back(path.str().copy(_allocator)); + addedModifiedPath = true; + } + } + } + + if (addedModifiedPath) + return; + + // Finally, if only one -syslibroot is given, system paths which aren't in it + // get suppressed. + if (_syslibRoots.size() != 1 || !isSystemPath) { + if (pathExists(libPath)) { + _searchDirs.push_back(libPath); + } + } +} + +void MachOLinkingContext::addFrameworkSearchDir(StringRef fwPath, + bool isSystemPath) { + bool pathAdded = false; + + // -syslibroot only used with to absolute framework search paths. + if (fwPath.startswith("/")) { + for (auto syslibRoot : _syslibRoots) { + SmallString<256> path(syslibRoot); + llvm::sys::path::append(path, fwPath); + if (pathExists(path)) { + _frameworkDirs.push_back(path.str().copy(_allocator)); + pathAdded = true; + } + } + } + // If fwPath found in any -syslibroot, then done. + if (pathAdded) + return; + + // If only one -syslibroot, system paths not in that SDK are suppressed. + if (isSystemPath && (_syslibRoots.size() == 1)) + return; + + // Only use raw fwPath if that directory exists. + if (pathExists(fwPath)) + _frameworkDirs.push_back(fwPath); +} + +llvm::Optional +MachOLinkingContext::searchDirForLibrary(StringRef path, + StringRef libName) const { + SmallString<256> fullPath; + if (libName.endswith(".o")) { + // A request ending in .o is special: just search for the file directly. + fullPath.assign(path); + llvm::sys::path::append(fullPath, libName); + if (fileExists(fullPath)) + return fullPath.str().copy(_allocator); + return llvm::None; + } + + // Search for stub library + fullPath.assign(path); + llvm::sys::path::append(fullPath, Twine("lib") + libName + ".tbd"); + if (fileExists(fullPath)) + return fullPath.str().copy(_allocator); + + // Search for dynamic library + fullPath.assign(path); + llvm::sys::path::append(fullPath, Twine("lib") + libName + ".dylib"); + if (fileExists(fullPath)) + return fullPath.str().copy(_allocator); + + // If not, try for a static library + fullPath.assign(path); + llvm::sys::path::append(fullPath, Twine("lib") + libName + ".a"); + if (fileExists(fullPath)) + return fullPath.str().copy(_allocator); + + return llvm::None; +} + +llvm::Optional +MachOLinkingContext::searchLibrary(StringRef libName) const { + SmallString<256> path; + for (StringRef dir : searchDirs()) { + llvm::Optional searchDir = searchDirForLibrary(dir, libName); + if (searchDir) + return searchDir; + } + + return llvm::None; +} + +llvm::Optional +MachOLinkingContext::findPathForFramework(StringRef fwName) const{ + SmallString<256> fullPath; + for (StringRef dir : frameworkDirs()) { + fullPath.assign(dir); + llvm::sys::path::append(fullPath, Twine(fwName) + ".framework", fwName); + if (fileExists(fullPath)) + return fullPath.str().copy(_allocator); + } + + return llvm::None; +} + +bool MachOLinkingContext::validateImpl() { + // TODO: if -arch not specified, look at arch of first .o file. + + if (_currentVersion && _outputMachOType != MH_DYLIB) { + error("-current_version can only be used with dylibs"); + return false; + } + + if (_compatibilityVersion && _outputMachOType != MH_DYLIB) { + error("-compatibility_version can only be used with dylibs"); + return false; + } + + if (_deadStrippableDylib && _outputMachOType != MH_DYLIB) { + error("-mark_dead_strippable_dylib can only be used with dylibs"); + return false; + } + + if (!_bundleLoader.empty() && outputMachOType() != MH_BUNDLE) { + error("-bundle_loader can only be used with Mach-O bundles"); + return false; + } + + // If -exported_symbols_list used, all exported symbols must be defined. + if (_exportMode == ExportMode::exported) { + for (const auto &symbol : _exportedSymbols) + addInitialUndefinedSymbol(symbol.getKey()); + } + + // If -dead_strip, set up initial live symbols. + if (deadStrip()) { + // Entry point is live. + if (outputTypeHasEntry()) + addDeadStripRoot(entrySymbolName()); + // Lazy binding helper is live. + if (needsStubsPass()) + addDeadStripRoot(binderSymbolName()); + // If using -exported_symbols_list, make all exported symbols live. + if (_exportMode == ExportMode::exported) { + setGlobalsAreDeadStripRoots(false); + for (const auto &symbol : _exportedSymbols) + addDeadStripRoot(symbol.getKey()); + } + } + + addOutputFileDependency(outputPath()); + + return true; +} + +void MachOLinkingContext::addPasses(PassManager &pm) { + // objc pass should be before layout pass. Otherwise test cases may contain + // no atoms which confuses the layout pass. + if (needsObjCPass()) + mach_o::addObjCPass(pm, *this); + mach_o::addLayoutPass(pm, *this); + if (needsStubsPass()) + mach_o::addStubsPass(pm, *this); + if (needsCompactUnwindPass()) + mach_o::addCompactUnwindPass(pm, *this); + if (needsGOTPass()) + mach_o::addGOTPass(pm, *this); + if (needsTLVPass()) + mach_o::addTLVPass(pm, *this); + if (needsShimPass()) + mach_o::addShimPass(pm, *this); // Shim pass must run after stubs pass. +} + +Writer &MachOLinkingContext::writer() const { + if (!_writer) + _writer = createWriterMachO(*this); + return *_writer; +} + +ErrorOr> +MachOLinkingContext::getMemoryBuffer(StringRef path) { + addInputFileDependency(path); + + ErrorOr> mbOrErr = + MemoryBuffer::getFileOrSTDIN(path); + if (std::error_code ec = mbOrErr.getError()) + return ec; + std::unique_ptr mb = std::move(mbOrErr.get()); + + // If buffer contains a fat file, find required arch in fat buffer + // and switch buffer to point to just that required slice. + uint32_t offset; + uint32_t size; + if (sliceFromFatFile(mb->getMemBufferRef(), offset, size)) + return MemoryBuffer::getFileSlice(path, size, offset); + return std::move(mb); +} + +MachODylibFile* MachOLinkingContext::loadIndirectDylib(StringRef path) { + ErrorOr> mbOrErr = getMemoryBuffer(path); + if (mbOrErr.getError()) + return nullptr; + + ErrorOr> fileOrErr = + registry().loadFile(std::move(mbOrErr.get())); + if (!fileOrErr) + return nullptr; + std::unique_ptr &file = fileOrErr.get(); + file->parse(); + MachODylibFile *result = reinterpret_cast(file.get()); + // Node object now owned by _indirectDylibs vector. + _indirectDylibs.push_back(std::move(file)); + return result; +} + +MachODylibFile* MachOLinkingContext::findIndirectDylib(StringRef path) { + // See if already loaded. + auto pos = _pathToDylibMap.find(path); + if (pos != _pathToDylibMap.end()) + return pos->second; + + // Search -L paths if of the form "libXXX.dylib" + std::pair split = path.rsplit('/'); + StringRef leafName = split.second; + if (leafName.startswith("lib") && leafName.endswith(".dylib")) { + // FIXME: Need to enhance searchLibrary() to only look for .dylib + auto libPath = searchLibrary(leafName); + if (libPath) + return loadIndirectDylib(libPath.getValue()); + } + + // Try full path with sysroot. + for (StringRef sysPath : _syslibRoots) { + SmallString<256> fullPath; + fullPath.assign(sysPath); + llvm::sys::path::append(fullPath, path); + if (pathExists(fullPath)) + return loadIndirectDylib(fullPath); + } + + // Try full path. + if (pathExists(path)) { + return loadIndirectDylib(path); + } + + return nullptr; +} + +uint32_t MachOLinkingContext::dylibCurrentVersion(StringRef installName) const { + auto pos = _pathToDylibMap.find(installName); + if (pos != _pathToDylibMap.end()) + return pos->second->currentVersion(); + else + return 0x10000; // 1.0 +} + +uint32_t MachOLinkingContext::dylibCompatVersion(StringRef installName) const { + auto pos = _pathToDylibMap.find(installName); + if (pos != _pathToDylibMap.end()) + return pos->second->compatVersion(); + else + return 0x10000; // 1.0 +} + +void MachOLinkingContext::createImplicitFiles( + std::vector > &result) { + // Add indirect dylibs by asking each linked dylib to add its indirects. + // Iterate until no more dylibs get loaded. + size_t dylibCount = 0; + while (dylibCount != _allDylibs.size()) { + dylibCount = _allDylibs.size(); + for (MachODylibFile *dylib : _allDylibs) { + dylib->loadReExportedDylibs([this] (StringRef path) -> MachODylibFile* { + return findIndirectDylib(path); }); + } + } + + // Let writer add output type specific extras. + writer().createImplicitFiles(result); + + // If undefinedMode is != error, add a FlatNamespaceFile instance. This will + // provide a SharedLibraryAtom for symbols that aren't defined elsewhere. + if (undefinedMode() != UndefinedMode::error) { + result.emplace_back(new mach_o::FlatNamespaceFile(*this)); + _flatNamespaceFile = result.back().get(); + } +} + +void MachOLinkingContext::registerDylib(MachODylibFile *dylib, + bool upward) const { + std::lock_guard lock(_dylibsMutex); + + if (!llvm::count(_allDylibs, dylib)) + _allDylibs.push_back(dylib); + _pathToDylibMap[dylib->installName()] = dylib; + // If path is different than install name, register path too. + if (!dylib->path().equals(dylib->installName())) + _pathToDylibMap[dylib->path()] = dylib; + if (upward) + _upwardDylibs.insert(dylib); +} + +bool MachOLinkingContext::isUpwardDylib(StringRef installName) const { + for (MachODylibFile *dylib : _upwardDylibs) { + if (dylib->installName().equals(installName)) + return true; + } + return false; +} + +ArchHandler &MachOLinkingContext::archHandler() const { + if (!_archHandler) + _archHandler = ArchHandler::create(_arch); + return *_archHandler; +} + +void MachOLinkingContext::addSectionAlignment(StringRef seg, StringRef sect, + uint16_t align) { + SectionAlign entry = { seg, sect, align }; + _sectAligns.push_back(entry); +} + +void MachOLinkingContext::addSectCreateSection( + StringRef seg, StringRef sect, + std::unique_ptr content) { + + if (!_sectCreateFile) { + auto sectCreateFile = std::make_unique(); + _sectCreateFile = sectCreateFile.get(); + getNodes().push_back(std::make_unique(std::move(sectCreateFile))); + } + + assert(_sectCreateFile && "sectcreate file does not exist."); + _sectCreateFile->addSection(seg, sect, std::move(content)); +} + +bool MachOLinkingContext::sectionAligned(StringRef seg, StringRef sect, + uint16_t &align) const { + for (const SectionAlign &entry : _sectAligns) { + if (seg.equals(entry.segmentName) && sect.equals(entry.sectionName)) { + align = entry.align; + return true; + } + } + return false; +} + +void MachOLinkingContext::addExportSymbol(StringRef sym) { + // Support old crufty export lists with bogus entries. + if (sym.endswith(".eh") || sym.startswith(".objc_category_name_")) { + llvm::errs() << "warning: ignoring " << sym << " in export list\n"; + return; + } + // Only i386 MacOSX uses old ABI, so don't change those. + if ((_os != OS::macOSX) || (_arch != arch_x86)) { + // ObjC has two different ABIs. Be nice and allow one export list work for + // both ABIs by renaming symbols. + if (sym.startswith(".objc_class_name_")) { + std::string abi2className("_OBJC_CLASS_$_"); + abi2className += sym.substr(17); + _exportedSymbols.insert(copy(abi2className)); + std::string abi2metaclassName("_OBJC_METACLASS_$_"); + abi2metaclassName += sym.substr(17); + _exportedSymbols.insert(copy(abi2metaclassName)); + return; + } + } + + // FIXME: Support wildcards. + _exportedSymbols.insert(sym); +} + +bool MachOLinkingContext::exportSymbolNamed(StringRef sym) const { + switch (_exportMode) { + case ExportMode::globals: + llvm_unreachable("exportSymbolNamed() should not be called in this mode"); + break; + case ExportMode::exported: + return _exportedSymbols.count(sym); + case ExportMode::unexported: + return !_exportedSymbols.count(sym); + } + llvm_unreachable("_exportMode unknown enum value"); +} + +std::string MachOLinkingContext::demangle(StringRef symbolName) const { + // Only try to demangle symbols if -demangle on command line + if (!demangleSymbols()) + return std::string(symbolName); + + // Only try to demangle symbols that look like C++ symbols + if (!symbolName.startswith("__Z")) + return std::string(symbolName); + + SmallString<256> symBuff; + StringRef nullTermSym = Twine(symbolName).toNullTerminatedStringRef(symBuff); + // Mach-O has extra leading underscore that needs to be removed. + const char *cstr = nullTermSym.data() + 1; + int status; + char *demangled = llvm::itaniumDemangle(cstr, nullptr, nullptr, &status); + if (demangled) { + std::string result(demangled); + // __cxa_demangle() always uses a malloc'ed buffer to return the result. + free(demangled); + return result; + } + + return std::string(symbolName); +} + +static void addDependencyInfoHelper(llvm::raw_fd_ostream *DepInfo, + char Opcode, StringRef Path) { + if (!DepInfo) + return; + + *DepInfo << Opcode; + *DepInfo << Path; + *DepInfo << '\0'; +} + +std::error_code MachOLinkingContext::createDependencyFile(StringRef path) { + std::error_code ec; + _dependencyInfo = std::unique_ptr( + new llvm::raw_fd_ostream(path, ec, llvm::sys::fs::OF_None)); + if (ec) { + _dependencyInfo.reset(); + return ec; + } + + addDependencyInfoHelper(_dependencyInfo.get(), 0x00, "lld" /*FIXME*/); + return std::error_code(); +} + +void MachOLinkingContext::addInputFileDependency(StringRef path) const { + addDependencyInfoHelper(_dependencyInfo.get(), 0x10, path); +} + +void MachOLinkingContext::addInputFileNotFound(StringRef path) const { + addDependencyInfoHelper(_dependencyInfo.get(), 0x11, path); +} + +void MachOLinkingContext::addOutputFileDependency(StringRef path) const { + addDependencyInfoHelper(_dependencyInfo.get(), 0x40, path); +} + +void MachOLinkingContext::appendOrderedSymbol(StringRef symbol, + StringRef filename) { + // To support sorting static functions which may have the same name in + // multiple .o files, _orderFiles maps the symbol name to a vector + // of OrderFileNode each of which can specify a file prefix. + OrderFileNode info; + if (!filename.empty()) + info.fileFilter = copy(filename); + info.order = _orderFileEntries++; + _orderFiles[symbol].push_back(info); +} + +bool +MachOLinkingContext::findOrderOrdinal(const std::vector &nodes, + const DefinedAtom *atom, + unsigned &ordinal) { + const File *objFile = &atom->file(); + assert(objFile); + StringRef objName = objFile->path(); + std::pair dirAndLeaf = objName.rsplit('/'); + if (!dirAndLeaf.second.empty()) + objName = dirAndLeaf.second; + for (const OrderFileNode &info : nodes) { + if (info.fileFilter.empty()) { + // Have unprefixed symbol name in order file that matches this atom. + ordinal = info.order; + return true; + } + if (info.fileFilter.equals(objName)) { + // Have prefixed symbol name in order file that matches atom's path. + ordinal = info.order; + return true; + } + } + return false; +} + +bool MachOLinkingContext::customAtomOrderer(const DefinedAtom *left, + const DefinedAtom *right, + bool &leftBeforeRight) const { + // No custom sorting if no order file entries. + if (!_orderFileEntries) + return false; + + // Order files can only order named atoms. + StringRef leftName = left->name(); + StringRef rightName = right->name(); + if (leftName.empty() || rightName.empty()) + return false; + + // If neither is in order file list, no custom sorter. + auto leftPos = _orderFiles.find(leftName); + auto rightPos = _orderFiles.find(rightName); + bool leftIsOrdered = (leftPos != _orderFiles.end()); + bool rightIsOrdered = (rightPos != _orderFiles.end()); + if (!leftIsOrdered && !rightIsOrdered) + return false; + + // There could be multiple symbols with same name but different file prefixes. + unsigned leftOrder; + unsigned rightOrder; + bool foundLeft = + leftIsOrdered && findOrderOrdinal(leftPos->getValue(), left, leftOrder); + bool foundRight = rightIsOrdered && + findOrderOrdinal(rightPos->getValue(), right, rightOrder); + if (!foundLeft && !foundRight) + return false; + + // If only one is in order file list, ordered one goes first. + if (foundLeft != foundRight) + leftBeforeRight = foundLeft; + else + leftBeforeRight = (leftOrder < rightOrder); + + return true; +} + +static bool isLibrary(const std::unique_ptr &elem) { + if (FileNode *node = dyn_cast(const_cast(elem.get()))) { + File *file = node->getFile(); + return isa(file) || isa(file); + } + return false; +} + +// The darwin linker processes input files in two phases. The first phase +// links in all object (.o) files in command line order. The second phase +// links in libraries in command line order. +// In this function we reorder the input files so that all the object files +// comes before any library file. We also make a group for the library files +// so that the Resolver will reiterate over the libraries as long as we find +// new undefines from libraries. +void MachOLinkingContext::finalizeInputFiles() { + std::vector> &elements = getNodes(); + llvm::stable_sort(elements, [](const std::unique_ptr &a, + const std::unique_ptr &b) { + return !isLibrary(a) && isLibrary(b); + }); + size_t numLibs = std::count_if(elements.begin(), elements.end(), isLibrary); + elements.push_back(std::make_unique(numLibs)); +} + +llvm::Error MachOLinkingContext::handleLoadedFile(File &file) { + auto *machoFile = dyn_cast(&file); + if (!machoFile) + return llvm::Error::success(); + + // Check that the arch of the context matches that of the file. + // Also set the arch of the context if it didn't have one. + if (_arch == arch_unknown) { + _arch = machoFile->arch(); + } else if (machoFile->arch() != arch_unknown && machoFile->arch() != _arch) { + // Archs are different. + return llvm::make_error(file.path() + + Twine(" cannot be linked due to incompatible architecture")); + } + + // Check that the OS of the context matches that of the file. + // Also set the OS of the context if it didn't have one. + if (_os == OS::unknown) { + _os = machoFile->OS(); + } else if (machoFile->OS() != OS::unknown && machoFile->OS() != _os) { + // OSes are different. + return llvm::make_error(file.path() + + Twine(" cannot be linked due to incompatible operating systems")); + } + + // Check that if the objc info exists, that it is compatible with the target + // OS. + switch (machoFile->objcConstraint()) { + case objc_unknown: + // The file is not compiled with objc, so skip the checks. + break; + case objc_gc_only: + case objc_supports_gc: + llvm_unreachable("GC support should already have thrown an error"); + case objc_retainReleaseForSimulator: + // The file is built with simulator objc, so make sure that the context + // is also building with simulator support. + if (_os != OS::iOS_simulator) + return llvm::make_error(file.path() + + Twine(" cannot be linked. It contains ObjC built for the simulator" + " while we are linking a non-simulator target")); + assert((_objcConstraint == objc_unknown || + _objcConstraint == objc_retainReleaseForSimulator) && + "Must be linking with retain/release for the simulator"); + _objcConstraint = objc_retainReleaseForSimulator; + break; + case objc_retainRelease: + // The file is built without simulator objc, so make sure that the + // context is also building without simulator support. + if (_os == OS::iOS_simulator) + return llvm::make_error(file.path() + + Twine(" cannot be linked. It contains ObjC built for a non-simulator" + " target while we are linking a simulator target")); + assert((_objcConstraint == objc_unknown || + _objcConstraint == objc_retainRelease) && + "Must be linking with retain/release for a non-simulator target"); + _objcConstraint = objc_retainRelease; + break; + } + + // Check that the swift version of the context matches that of the file. + // Also set the swift version of the context if it didn't have one. + if (!_swiftVersion) { + _swiftVersion = machoFile->swiftVersion(); + } else if (machoFile->swiftVersion() && + machoFile->swiftVersion() != _swiftVersion) { + // Swift versions are different. + return llvm::make_error("different swift versions"); + } + + return llvm::Error::success(); +} + +} // end namespace lld diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h b/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h new file mode 100644 index 000000000000..3ef2949addab --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h @@ -0,0 +1,336 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFile.h -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// +/// \file These data structures comprise the "normalized" view of +/// mach-o object files. The normalized view is an in-memory only data structure +/// which is always in native endianness and pointer size. +/// +/// The normalized view easily converts to and from YAML using YAML I/O. +/// +/// The normalized view converts to and from binary mach-o object files using +/// the writeBinary() and readBinary() functions. +/// +/// The normalized view converts to and from lld::Atoms using the +/// normalizedToAtoms() and normalizedFromAtoms(). +/// +/// Overall, the conversion paths available look like: +/// +/// +---------------+ +/// | binary mach-o | +/// +---------------+ +/// ^ +/// | +/// v +/// +------------+ +------+ +/// | normalized | <-> | yaml | +/// +------------+ +------+ +/// ^ +/// | +/// v +/// +-------+ +/// | Atoms | +/// +-------+ +/// + +#ifndef LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H +#define LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H + +#include "DebugInfo.h" +#include "lld/Common/LLVM.h" +#include "lld/Core/Error.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/YAMLTraits.h" + +using llvm::BumpPtrAllocator; +using llvm::yaml::Hex64; +using llvm::yaml::Hex32; +using llvm::yaml::Hex16; +using llvm::yaml::Hex8; +using llvm::yaml::SequenceTraits; +using llvm::MachO::HeaderFileType; +using llvm::MachO::BindType; +using llvm::MachO::RebaseType; +using llvm::MachO::NListType; +using llvm::MachO::RelocationInfoType; +using llvm::MachO::SectionType; +using llvm::MachO::LoadCommandType; +using llvm::MachO::ExportSymbolKind; +using llvm::MachO::DataRegionType; + +namespace lld { +namespace mach_o { +namespace normalized { + + +/// The real mach-o relocation record is 8-bytes on disk and is +/// encoded in one of two different bit-field patterns. This +/// normalized form has the union of all possible fields. +struct Relocation { + Relocation() : offset(0), scattered(false), + type(llvm::MachO::GENERIC_RELOC_VANILLA), + length(0), pcRel(false), isExtern(false), value(0), + symbol(0) { } + + Hex32 offset; + bool scattered; + RelocationInfoType type; + uint8_t length; + bool pcRel; + bool isExtern; + Hex32 value; + uint32_t symbol; +}; + +/// A typedef so that YAML I/O can treat this vector as a sequence. +typedef std::vector Relocations; + +/// A typedef so that YAML I/O can process the raw bytes in a section. +typedef std::vector ContentBytes; + +/// A typedef so that YAML I/O can treat indirect symbols as a flow sequence. +typedef std::vector IndirectSymbols; + +/// A typedef so that YAML I/O can encode/decode section attributes. +LLVM_YAML_STRONG_TYPEDEF(uint32_t, SectionAttr) + +/// A typedef so that YAML I/O can encode/decode section alignment. +LLVM_YAML_STRONG_TYPEDEF(uint16_t, SectionAlignment) + +/// Mach-O has a 32-bit and 64-bit section record. This normalized form +/// can support either kind. +struct Section { + Section() : type(llvm::MachO::S_REGULAR), + attributes(0), alignment(1), address(0) { } + + StringRef segmentName; + StringRef sectionName; + SectionType type; + SectionAttr attributes; + SectionAlignment alignment; + Hex64 address; + ArrayRef content; + Relocations relocations; + IndirectSymbols indirectSymbols; +}; + + +/// A typedef so that YAML I/O can encode/decode the scope bits of an nlist. +LLVM_YAML_STRONG_TYPEDEF(uint8_t, SymbolScope) + +/// A typedef so that YAML I/O can encode/decode the desc bits of an nlist. +LLVM_YAML_STRONG_TYPEDEF(uint16_t, SymbolDesc) + +/// Mach-O has a 32-bit and 64-bit symbol table entry (nlist), and the symbol +/// type and scope and mixed in the same n_type field. This normalized form +/// works for any pointer size and separates out the type and scope. +struct Symbol { + Symbol() : type(llvm::MachO::N_UNDF), scope(0), sect(0), desc(0), value(0) { } + + StringRef name; + NListType type; + SymbolScope scope; + uint8_t sect; + SymbolDesc desc; + Hex64 value; +}; + +/// Check whether the given section type indicates a zero-filled section. +// FIXME: Utility functions of this kind should probably be moved into +// llvm/Support. +inline bool isZeroFillSection(SectionType T) { + return (T == llvm::MachO::S_ZEROFILL || + T == llvm::MachO::S_THREAD_LOCAL_ZEROFILL); +} + +/// A typedef so that YAML I/O can (de/en)code the protection bits of a segment. +LLVM_YAML_STRONG_TYPEDEF(uint32_t, VMProtect) + +/// A typedef to hold verions X.Y.X packed into 32-bit xxxx.yy.zz +LLVM_YAML_STRONG_TYPEDEF(uint32_t, PackedVersion) + +/// Segments are only used in normalized final linked images (not in relocatable +/// object files). They specify how a range of the file is loaded. +struct Segment { + StringRef name; + Hex64 address; + Hex64 size; + VMProtect init_access; + VMProtect max_access; +}; + +/// Only used in normalized final linked images to specify on which dylibs +/// it depends. +struct DependentDylib { + StringRef path; + LoadCommandType kind; + PackedVersion compatVersion; + PackedVersion currentVersion; +}; + +/// A normalized rebasing entry. Only used in normalized final linked images. +struct RebaseLocation { + Hex32 segOffset; + uint8_t segIndex; + RebaseType kind; +}; + +/// A normalized binding entry. Only used in normalized final linked images. +struct BindLocation { + Hex32 segOffset; + uint8_t segIndex; + BindType kind; + bool canBeNull; + int ordinal; + StringRef symbolName; + Hex64 addend; +}; + +/// A typedef so that YAML I/O can encode/decode export flags. +LLVM_YAML_STRONG_TYPEDEF(uint32_t, ExportFlags) + +/// A normalized export entry. Only used in normalized final linked images. +struct Export { + StringRef name; + Hex64 offset; + ExportSymbolKind kind; + ExportFlags flags; + Hex32 otherOffset; + StringRef otherName; +}; + +/// A normalized data-in-code entry. +struct DataInCode { + Hex32 offset; + Hex16 length; + DataRegionType kind; +}; + +/// A typedef so that YAML I/O can encode/decode mach_header.flags. +LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags) + +/// +struct NormalizedFile { + MachOLinkingContext::Arch arch = MachOLinkingContext::arch_unknown; + HeaderFileType fileType = llvm::MachO::MH_OBJECT; + FileFlags flags = 0; + std::vector segments; // Not used in object files. + std::vector
sections; + + // Symbols sorted by kind. + std::vector localSymbols; + std::vector globalSymbols; + std::vector undefinedSymbols; + std::vector stabsSymbols; + + // Maps to load commands with no LINKEDIT content (final linked images only). + std::vector dependentDylibs; + StringRef installName; // dylibs only + PackedVersion compatVersion = 0; // dylibs only + PackedVersion currentVersion = 0; // dylibs only + bool hasUUID = false; + bool hasMinVersionLoadCommand = false; + bool generateDataInCodeLoadCommand = false; + std::vector rpaths; + Hex64 entryAddress = 0; + Hex64 stackSize = 0; + MachOLinkingContext::OS os = MachOLinkingContext::OS::unknown; + Hex64 sourceVersion = 0; + PackedVersion minOSverson = 0; + PackedVersion sdkVersion = 0; + LoadCommandType minOSVersionKind = (LoadCommandType)0; + + // Maps to load commands with LINKEDIT content (final linked images only). + Hex32 pageSize = 0; + std::vector rebasingInfo; + std::vector bindingInfo; + std::vector weakBindingInfo; + std::vector lazyBindingInfo; + std::vector exportInfo; + std::vector functionStarts; + std::vector dataInCode; + + // TODO: + // code-signature + // split-seg-info + // function-starts + + // For any allocations in this struct which need to be owned by this struct. + BumpPtrAllocator ownedAllocations; +}; + +/// Tests if a file is a non-fat mach-o object file. +bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch); + +/// If the buffer is a fat file with the request arch, then this function +/// returns true with 'offset' and 'size' set to location of the arch slice +/// within the buffer. Otherwise returns false; +bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch, + uint32_t &offset, uint32_t &size); + +/// Reads a mach-o file and produces an in-memory normalized view. +llvm::Expected> +readBinary(std::unique_ptr &mb, + const MachOLinkingContext::Arch arch); + +/// Takes in-memory normalized view and writes a mach-o object file. +llvm::Error writeBinary(const NormalizedFile &file, StringRef path); + +size_t headerAndLoadCommandsSize(const NormalizedFile &file, + bool includeFunctionStarts); + + +/// Parses a yaml encoded mach-o file to produce an in-memory normalized view. +llvm::Expected> +readYaml(std::unique_ptr &mb); + +/// Writes a yaml encoded mach-o files given an in-memory normalized view. +std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out); + +llvm::Error +normalizedObjectToAtoms(MachOFile *file, + const NormalizedFile &normalizedFile, + bool copyRefs); + +llvm::Error +normalizedDylibToAtoms(MachODylibFile *file, + const NormalizedFile &normalizedFile, + bool copyRefs); + +/// Takes in-memory normalized dylib or object and parses it into lld::File +llvm::Expected> +normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs); + +/// Takes atoms and generates a normalized macho-o view. +llvm::Expected> +normalizedFromAtoms(const lld::File &atomFile, const MachOLinkingContext &ctxt); + + +} // namespace normalized + +/// Class for interfacing mach-o yaml files into generic yaml parsing +class MachOYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler { +public: + MachOYamlIOTaggedDocumentHandler(MachOLinkingContext::Arch arch) + : _arch(arch) { } + bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override; +private: + const MachOLinkingContext::Arch _arch; +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp new file mode 100644 index 000000000000..87601ca1be8b --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp @@ -0,0 +1,614 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// +/// \file For mach-o object files, this implementation converts from +/// mach-o on-disk binary format to in-memory normalized mach-o. +/// +/// +---------------+ +/// | binary mach-o | +/// +---------------+ +/// | +/// | +/// v +/// +------------+ +/// | normalized | +/// +------------+ + +#include "ArchHandler.h" +#include "MachONormalizedFile.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "lld/Common/LLVM.h" +#include "lld/Core/Error.h" +#include "lld/Core/SharedLibraryFile.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Object/MachO.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using namespace llvm::MachO; +using llvm::object::ExportEntry; +using llvm::file_magic; +using llvm::object::MachOObjectFile; + +namespace lld { +namespace mach_o { +namespace normalized { + +// Utility to call a lambda expression on each load command. +static llvm::Error forEachLoadCommand( + StringRef lcRange, unsigned lcCount, bool isBig, bool is64, + std::function func) { + const char* p = lcRange.begin(); + for (unsigned i=0; i < lcCount; ++i) { + const load_command *lc = reinterpret_cast(p); + load_command lcCopy; + const load_command *slc = lc; + if (isBig != llvm::sys::IsBigEndianHost) { + memcpy(&lcCopy, lc, sizeof(load_command)); + swapStruct(lcCopy); + slc = &lcCopy; + } + if ( (p + slc->cmdsize) > lcRange.end() ) + return llvm::make_error("Load command exceeds range"); + + if (func(slc->cmd, slc->cmdsize, p)) + return llvm::Error::success(); + + p += slc->cmdsize; + } + + return llvm::Error::success(); +} + +static std::error_code appendRelocations(Relocations &relocs, StringRef buffer, + bool bigEndian, + uint32_t reloff, uint32_t nreloc) { + if ((reloff + nreloc*8) > buffer.size()) + return make_error_code(llvm::errc::executable_format_error); + const any_relocation_info* relocsArray = + reinterpret_cast(buffer.begin()+reloff); + + for(uint32_t i=0; i < nreloc; ++i) { + relocs.push_back(unpackRelocation(relocsArray[i], bigEndian)); + } + return std::error_code(); +} + +static std::error_code +appendIndirectSymbols(IndirectSymbols &isyms, StringRef buffer, bool isBig, + uint32_t istOffset, uint32_t istCount, + uint32_t startIndex, uint32_t count) { + if ((istOffset + istCount*4) > buffer.size()) + return make_error_code(llvm::errc::executable_format_error); + if (startIndex+count > istCount) + return make_error_code(llvm::errc::executable_format_error); + const uint8_t *indirectSymbolArray = (const uint8_t *)buffer.data(); + + for(uint32_t i=0; i < count; ++i) { + isyms.push_back(read32( + indirectSymbolArray + (startIndex + i) * sizeof(uint32_t), isBig)); + } + return std::error_code(); +} + + +template static T readBigEndian(T t) { + if (llvm::sys::IsLittleEndianHost) + llvm::sys::swapByteOrder(t); + return t; +} + + +static bool isMachOHeader(const mach_header *mh, bool &is64, bool &isBig) { + switch (read32(&mh->magic, false)) { + case llvm::MachO::MH_MAGIC: + is64 = false; + isBig = false; + return true; + case llvm::MachO::MH_MAGIC_64: + is64 = true; + isBig = false; + return true; + case llvm::MachO::MH_CIGAM: + is64 = false; + isBig = true; + return true; + case llvm::MachO::MH_CIGAM_64: + is64 = true; + isBig = true; + return true; + default: + return false; + } +} + + +bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch) { + // Try opening and mapping file at path. + ErrorOr> b = MemoryBuffer::getFileOrSTDIN(path); + if (b.getError()) + return false; + + // If file length < 32 it is too small to be mach-o object file. + StringRef fileBuffer = b->get()->getBuffer(); + if (fileBuffer.size() < 32) + return false; + + // If file buffer does not start with MH_MAGIC (and variants), not obj file. + const mach_header *mh = reinterpret_cast( + fileBuffer.begin()); + bool is64, isBig; + if (!isMachOHeader(mh, is64, isBig)) + return false; + + // If not MH_OBJECT, not object file. + if (read32(&mh->filetype, isBig) != MH_OBJECT) + return false; + + // Lookup up arch from cpu/subtype pair. + arch = MachOLinkingContext::archFromCpuType( + read32(&mh->cputype, isBig), + read32(&mh->cpusubtype, isBig)); + return true; +} + +bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch, + uint32_t &offset, uint32_t &size) { + const char *start = mb.getBufferStart(); + const llvm::MachO::fat_header *fh = + reinterpret_cast(start); + if (readBigEndian(fh->magic) != llvm::MachO::FAT_MAGIC) + return false; + uint32_t nfat_arch = readBigEndian(fh->nfat_arch); + const fat_arch *fstart = + reinterpret_cast(start + sizeof(fat_header)); + const fat_arch *fend = + reinterpret_cast(start + sizeof(fat_header) + + sizeof(fat_arch) * nfat_arch); + const uint32_t reqCpuType = MachOLinkingContext::cpuTypeFromArch(arch); + const uint32_t reqCpuSubtype = MachOLinkingContext::cpuSubtypeFromArch(arch); + for (const fat_arch *fa = fstart; fa < fend; ++fa) { + if ((readBigEndian(fa->cputype) == reqCpuType) && + (readBigEndian(fa->cpusubtype) == reqCpuSubtype)) { + offset = readBigEndian(fa->offset); + size = readBigEndian(fa->size); + if ((offset + size) > mb.getBufferSize()) + return false; + return true; + } + } + return false; +} + +/// Reads a mach-o file and produces an in-memory normalized view. +llvm::Expected> +readBinary(std::unique_ptr &mb, + const MachOLinkingContext::Arch arch) { + // Make empty NormalizedFile. + std::unique_ptr f(new NormalizedFile()); + + const char *start = mb->getBufferStart(); + size_t objSize = mb->getBufferSize(); + const mach_header *mh = reinterpret_cast(start); + + uint32_t sliceOffset; + uint32_t sliceSize; + if (sliceFromFatFile(mb->getMemBufferRef(), arch, sliceOffset, sliceSize)) { + start = &start[sliceOffset]; + objSize = sliceSize; + mh = reinterpret_cast(start); + } + + // Determine endianness and pointer size for mach-o file. + bool is64, isBig; + if (!isMachOHeader(mh, is64, isBig)) + return llvm::make_error("File is not a mach-o"); + + // Endian swap header, if needed. + mach_header headerCopy; + const mach_header *smh = mh; + if (isBig != llvm::sys::IsBigEndianHost) { + memcpy(&headerCopy, mh, sizeof(mach_header)); + swapStruct(headerCopy); + smh = &headerCopy; + } + + // Validate head and load commands fit in buffer. + const uint32_t lcCount = smh->ncmds; + const char *lcStart = + start + (is64 ? sizeof(mach_header_64) : sizeof(mach_header)); + StringRef lcRange(lcStart, smh->sizeofcmds); + if (lcRange.end() > (start + objSize)) + return llvm::make_error("Load commands exceed file size"); + + // Get architecture from mach_header. + f->arch = MachOLinkingContext::archFromCpuType(smh->cputype, smh->cpusubtype); + if (f->arch != arch) { + return llvm::make_error( + Twine("file is wrong architecture. Expected " + "(" + MachOLinkingContext::nameFromArch(arch) + + ") found (" + + MachOLinkingContext::nameFromArch(f->arch) + + ")" )); + } + // Copy file type and flags + f->fileType = HeaderFileType(smh->filetype); + f->flags = smh->flags; + + + // Pre-scan load commands looking for indirect symbol table. + uint32_t indirectSymbolTableOffset = 0; + uint32_t indirectSymbolTableCount = 0; + auto ec = forEachLoadCommand(lcRange, lcCount, isBig, is64, + [&](uint32_t cmd, uint32_t size, + const char *lc) -> bool { + if (cmd == LC_DYSYMTAB) { + const dysymtab_command *d = reinterpret_cast(lc); + indirectSymbolTableOffset = read32(&d->indirectsymoff, isBig); + indirectSymbolTableCount = read32(&d->nindirectsyms, isBig); + return true; + } + return false; + }); + if (ec) + return std::move(ec); + + // Walk load commands looking for segments/sections and the symbol table. + const data_in_code_entry *dataInCode = nullptr; + const dyld_info_command *dyldInfo = nullptr; + uint32_t dataInCodeSize = 0; + ec = forEachLoadCommand(lcRange, lcCount, isBig, is64, + [&] (uint32_t cmd, uint32_t size, const char* lc) -> bool { + switch(cmd) { + case LC_SEGMENT_64: + if (is64) { + const segment_command_64 *seg = + reinterpret_cast(lc); + const unsigned sectionCount = read32(&seg->nsects, isBig); + const section_64 *sects = reinterpret_cast + (lc + sizeof(segment_command_64)); + const unsigned lcSize = sizeof(segment_command_64) + + sectionCount*sizeof(section_64); + // Verify sections don't extend beyond end of segment load command. + if (lcSize > size) + return true; + for (unsigned i=0; i < sectionCount; ++i) { + const section_64 *sect = §s[i]; + Section section; + section.segmentName = getString16(sect->segname); + section.sectionName = getString16(sect->sectname); + section.type = (SectionType)(read32(§->flags, isBig) & + SECTION_TYPE); + section.attributes = read32(§->flags, isBig) & SECTION_ATTRIBUTES; + section.alignment = 1 << read32(§->align, isBig); + section.address = read64(§->addr, isBig); + const uint8_t *content = + (const uint8_t *)start + read32(§->offset, isBig); + size_t contentSize = read64(§->size, isBig); + // Note: this assign() is copying the content bytes. Ideally, + // we can use a custom allocator for vector to avoid the copy. + section.content = llvm::makeArrayRef(content, contentSize); + appendRelocations(section.relocations, mb->getBuffer(), isBig, + read32(§->reloff, isBig), + read32(§->nreloc, isBig)); + if (section.type == S_NON_LAZY_SYMBOL_POINTERS) { + appendIndirectSymbols(section.indirectSymbols, mb->getBuffer(), + isBig, + indirectSymbolTableOffset, + indirectSymbolTableCount, + read32(§->reserved1, isBig), + contentSize/4); + } + f->sections.push_back(section); + } + } + break; + case LC_SEGMENT: + if (!is64) { + const segment_command *seg = + reinterpret_cast(lc); + const unsigned sectionCount = read32(&seg->nsects, isBig); + const section *sects = reinterpret_cast + (lc + sizeof(segment_command)); + const unsigned lcSize = sizeof(segment_command) + + sectionCount*sizeof(section); + // Verify sections don't extend beyond end of segment load command. + if (lcSize > size) + return true; + for (unsigned i=0; i < sectionCount; ++i) { + const section *sect = §s[i]; + Section section; + section.segmentName = getString16(sect->segname); + section.sectionName = getString16(sect->sectname); + section.type = (SectionType)(read32(§->flags, isBig) & + SECTION_TYPE); + section.attributes = + read32((const uint8_t *)§->flags, isBig) & SECTION_ATTRIBUTES; + section.alignment = 1 << read32(§->align, isBig); + section.address = read32(§->addr, isBig); + const uint8_t *content = + (const uint8_t *)start + read32(§->offset, isBig); + size_t contentSize = read32(§->size, isBig); + // Note: this assign() is copying the content bytes. Ideally, + // we can use a custom allocator for vector to avoid the copy. + section.content = llvm::makeArrayRef(content, contentSize); + appendRelocations(section.relocations, mb->getBuffer(), isBig, + read32(§->reloff, isBig), + read32(§->nreloc, isBig)); + if (section.type == S_NON_LAZY_SYMBOL_POINTERS) { + appendIndirectSymbols( + section.indirectSymbols, mb->getBuffer(), isBig, + indirectSymbolTableOffset, indirectSymbolTableCount, + read32(§->reserved1, isBig), contentSize / 4); + } + f->sections.push_back(section); + } + } + break; + case LC_SYMTAB: { + const symtab_command *st = reinterpret_cast(lc); + const char *strings = start + read32(&st->stroff, isBig); + const uint32_t strSize = read32(&st->strsize, isBig); + // Validate string pool and symbol table all in buffer. + if (read32((const uint8_t *)&st->stroff, isBig) + + read32((const uint8_t *)&st->strsize, isBig) > + objSize) + return true; + if (is64) { + const uint32_t symOffset = read32(&st->symoff, isBig); + const uint32_t symCount = read32(&st->nsyms, isBig); + if ( symOffset+(symCount*sizeof(nlist_64)) > objSize) + return true; + const nlist_64 *symbols = + reinterpret_cast(start + symOffset); + // Convert each nlist_64 to a lld::mach_o::normalized::Symbol. + for(uint32_t i=0; i < symCount; ++i) { + nlist_64 tempSym; + memcpy(&tempSym, &symbols[i], sizeof(nlist_64)); + const nlist_64 *sin = &tempSym; + if (isBig != llvm::sys::IsBigEndianHost) + swapStruct(tempSym); + Symbol sout; + if (sin->n_strx > strSize) + return true; + sout.name = &strings[sin->n_strx]; + sout.type = static_cast(sin->n_type & (N_STAB|N_TYPE)); + sout.scope = (sin->n_type & (N_PEXT|N_EXT)); + sout.sect = sin->n_sect; + sout.desc = sin->n_desc; + sout.value = sin->n_value; + if (sin->n_type & N_STAB) + f->stabsSymbols.push_back(sout); + else if (sout.type == N_UNDF) + f->undefinedSymbols.push_back(sout); + else if (sin->n_type & N_EXT) + f->globalSymbols.push_back(sout); + else + f->localSymbols.push_back(sout); + } + } else { + const uint32_t symOffset = read32(&st->symoff, isBig); + const uint32_t symCount = read32(&st->nsyms, isBig); + if ( symOffset+(symCount*sizeof(nlist)) > objSize) + return true; + const nlist *symbols = + reinterpret_cast(start + symOffset); + // Convert each nlist to a lld::mach_o::normalized::Symbol. + for(uint32_t i=0; i < symCount; ++i) { + const nlist *sin = &symbols[i]; + nlist tempSym; + if (isBig != llvm::sys::IsBigEndianHost) { + tempSym = *sin; swapStruct(tempSym); sin = &tempSym; + } + Symbol sout; + if (sin->n_strx > strSize) + return true; + sout.name = &strings[sin->n_strx]; + sout.type = (NListType)(sin->n_type & N_TYPE); + sout.scope = (sin->n_type & (N_PEXT|N_EXT)); + sout.sect = sin->n_sect; + sout.desc = sin->n_desc; + sout.value = sin->n_value; + if (sout.type == N_UNDF) + f->undefinedSymbols.push_back(sout); + else if (sout.scope == (SymbolScope)N_EXT) + f->globalSymbols.push_back(sout); + else if (sin->n_type & N_STAB) + f->stabsSymbols.push_back(sout); + else + f->localSymbols.push_back(sout); + } + } + } + break; + case LC_ID_DYLIB: { + const dylib_command *dl = reinterpret_cast(lc); + f->installName = lc + read32(&dl->dylib.name, isBig); + f->currentVersion = read32(&dl->dylib.current_version, isBig); + f->compatVersion = read32(&dl->dylib.compatibility_version, isBig); + } + break; + case LC_DATA_IN_CODE: { + const linkedit_data_command *ldc = + reinterpret_cast(lc); + dataInCode = reinterpret_cast( + start + read32(&ldc->dataoff, isBig)); + dataInCodeSize = read32(&ldc->datasize, isBig); + } + break; + case LC_LOAD_DYLIB: + case LC_LOAD_WEAK_DYLIB: + case LC_REEXPORT_DYLIB: + case LC_LOAD_UPWARD_DYLIB: { + const dylib_command *dl = reinterpret_cast(lc); + DependentDylib entry; + entry.path = lc + read32(&dl->dylib.name, isBig); + entry.kind = LoadCommandType(cmd); + entry.compatVersion = read32(&dl->dylib.compatibility_version, isBig); + entry.currentVersion = read32(&dl->dylib.current_version, isBig); + f->dependentDylibs.push_back(entry); + } + break; + case LC_RPATH: { + const rpath_command *rpc = reinterpret_cast(lc); + f->rpaths.push_back(lc + read32(&rpc->path, isBig)); + } + break; + case LC_DYLD_INFO: + case LC_DYLD_INFO_ONLY: + dyldInfo = reinterpret_cast(lc); + break; + case LC_VERSION_MIN_MACOSX: + case LC_VERSION_MIN_IPHONEOS: + case LC_VERSION_MIN_WATCHOS: + case LC_VERSION_MIN_TVOS: + // If we are emitting an object file, then we may take the load command + // kind from these commands and pass it on to the output + // file. + f->minOSVersionKind = (LoadCommandType)cmd; + break; + } + return false; + }); + if (ec) + return std::move(ec); + + if (dataInCode) { + // Convert on-disk data_in_code_entry array to DataInCode vector. + for (unsigned i=0; i < dataInCodeSize/sizeof(data_in_code_entry); ++i) { + DataInCode entry; + entry.offset = read32(&dataInCode[i].offset, isBig); + entry.length = read16(&dataInCode[i].length, isBig); + entry.kind = + (DataRegionType)read16((const uint8_t *)&dataInCode[i].kind, isBig); + f->dataInCode.push_back(entry); + } + } + + if (dyldInfo) { + // If any exports, extract and add to normalized exportInfo vector. + if (dyldInfo->export_size) { + const uint8_t *trieStart = reinterpret_cast( + start + read32(&dyldInfo->export_off, isBig)); + ArrayRef trie(trieStart, read32(&dyldInfo->export_size, isBig)); + Error Err = Error::success(); + for (const ExportEntry &trieExport : MachOObjectFile::exports(Err, trie)) { + Export normExport; + normExport.name = trieExport.name().copy(f->ownedAllocations); + normExport.offset = trieExport.address(); + normExport.kind = ExportSymbolKind(trieExport.flags() & EXPORT_SYMBOL_FLAGS_KIND_MASK); + normExport.flags = trieExport.flags() & ~EXPORT_SYMBOL_FLAGS_KIND_MASK; + normExport.otherOffset = trieExport.other(); + if (!trieExport.otherName().empty()) + normExport.otherName = trieExport.otherName().copy(f->ownedAllocations); + f->exportInfo.push_back(normExport); + } + if (Err) + return std::move(Err); + } + } + + return std::move(f); +} + +class MachOObjectReader : public Reader { +public: + MachOObjectReader(MachOLinkingContext &ctx) : _ctx(ctx) {} + + bool canParse(file_magic magic, MemoryBufferRef mb) const override { + return (magic == file_magic::macho_object && mb.getBufferSize() > 32); + } + + ErrorOr> + loadFile(std::unique_ptr mb, + const Registry ®istry) const override { + std::unique_ptr ret = + std::make_unique(std::move(mb), &_ctx); + return std::move(ret); + } + +private: + MachOLinkingContext &_ctx; +}; + +class MachODylibReader : public Reader { +public: + MachODylibReader(MachOLinkingContext &ctx) : _ctx(ctx) {} + + bool canParse(file_magic magic, MemoryBufferRef mb) const override { + switch (magic) { + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamically_linked_shared_lib_stub: + return mb.getBufferSize() > 32; + default: + return false; + } + } + + ErrorOr> + loadFile(std::unique_ptr mb, + const Registry ®istry) const override { + std::unique_ptr ret = + std::make_unique(std::move(mb), &_ctx); + return std::move(ret); + } + +private: + MachOLinkingContext &_ctx; +}; + +class MachOTAPIReader : public Reader { +public: + MachOTAPIReader(MachOLinkingContext &ctx) : _ctx(ctx) {} + + bool canParse(file_magic magic, MemoryBufferRef mb) const override { + return magic == file_magic::tapi_file; + } + + ErrorOr> + loadFile(std::unique_ptr mb, + const Registry ®istry) const override { + std::unique_ptr ret = + std::make_unique(std::move(mb), &_ctx); + return std::move(ret); + } + +private: + MachOLinkingContext &_ctx; +}; + +} // namespace normalized +} // namespace mach_o + +void Registry::addSupportMachOObjects(MachOLinkingContext &ctx) { + MachOLinkingContext::Arch arch = ctx.arch(); + add(std::unique_ptr(new mach_o::normalized::MachOObjectReader(ctx))); + add(std::unique_ptr(new mach_o::normalized::MachODylibReader(ctx))); + add(std::unique_ptr(new mach_o::normalized::MachOTAPIReader(ctx))); + addKindTable(Reference::KindNamespace::mach_o, ctx.archHandler().kindArch(), + ctx.archHandler().kindStrings()); + add(std::unique_ptr( + new mach_o::MachOYamlIOTaggedDocumentHandler(arch))); +} + + +} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h new file mode 100644 index 000000000000..aeb04ef4508a --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h @@ -0,0 +1,213 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_NORMALIZED_FILE_BINARY_UTILS_H +#define LLD_READER_WRITER_MACHO_NORMALIZED_FILE_BINARY_UTILS_H + +#include "MachONormalizedFile.h" +#include "lld/Common/LLVM.h" +#include "lld/Core/Error.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/LEB128.h" +#include + +namespace lld { +namespace mach_o { +namespace normalized { + +class ByteBuffer { +public: + ByteBuffer() : _ostream(_bytes) { } + + void append_byte(uint8_t b) { + _ostream << b; + } + void append_uleb128(uint64_t value) { + llvm::encodeULEB128(value, _ostream); + } + void append_uleb128Fixed(uint64_t value, unsigned byteCount) { + unsigned min = llvm::getULEB128Size(value); + assert(min <= byteCount); + unsigned pad = byteCount - min; + llvm::encodeULEB128(value, _ostream, pad); + } + void append_sleb128(int64_t value) { + llvm::encodeSLEB128(value, _ostream); + } + void append_string(StringRef str) { + _ostream << str; + append_byte(0); + } + void align(unsigned alignment) { + while ( (_ostream.tell() % alignment) != 0 ) + append_byte(0); + } + size_t size() { + return _ostream.tell(); + } + const uint8_t *bytes() { + return reinterpret_cast(_ostream.str().data()); + } + +private: + SmallVector _bytes; + // Stream ivar must be after SmallVector ivar to construct properly. + llvm::raw_svector_ostream _ostream; +}; + +using namespace llvm::support::endian; +using llvm::sys::getSwappedBytes; + +template +static inline uint16_t read16(const T *loc, bool isBig) { + assert((uint64_t)loc % alignof(T) == 0 && "invalid pointer alignment"); + return isBig ? read16be(loc) : read16le(loc); +} + +template +static inline uint32_t read32(const T *loc, bool isBig) { + assert((uint64_t)loc % alignof(T) == 0 && "invalid pointer alignment"); + return isBig ? read32be(loc) : read32le(loc); +} + +template +static inline uint64_t read64(const T *loc, bool isBig) { + assert((uint64_t)loc % alignof(T) == 0 && "invalid pointer alignment"); + return isBig ? read64be(loc) : read64le(loc); +} + +inline void write16(uint8_t *loc, uint16_t value, bool isBig) { + if (isBig) + write16be(loc, value); + else + write16le(loc, value); +} + +inline void write32(uint8_t *loc, uint32_t value, bool isBig) { + if (isBig) + write32be(loc, value); + else + write32le(loc, value); +} + +inline void write64(uint8_t *loc, uint64_t value, bool isBig) { + if (isBig) + write64be(loc, value); + else + write64le(loc, value); +} + +inline uint32_t +bitFieldExtract(uint32_t value, bool isBigEndianBigField, uint8_t firstBit, + uint8_t bitCount) { + const uint32_t mask = ((1<> shift) & mask; +} + +inline void +bitFieldSet(uint32_t &bits, bool isBigEndianBigField, uint32_t newBits, + uint8_t firstBit, uint8_t bitCount) { + const uint32_t mask = ((1< 16) ? 16: str.size()); +} + +// Implemented in normalizedToAtoms() and used by normalizedFromAtoms() so +// that the same table can be used to map mach-o sections to and from +// DefinedAtom::ContentType. +void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType, + StringRef &segmentName, + StringRef §ionName, + SectionType §ionType, + SectionAttr §ionAttrs, + bool &relocsToDefinedCanBeImplicit); + +} // namespace normalized +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_NORMALIZED_FILE_BINARY_UTILS_H diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp new file mode 100644 index 000000000000..17b45b9ca827 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp @@ -0,0 +1,1560 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// +/// \file For mach-o object files, this implementation converts normalized +/// mach-o in memory to mach-o binary on disk. +/// +/// +---------------+ +/// | binary mach-o | +/// +---------------+ +/// ^ +/// | +/// | +/// +------------+ +/// | normalized | +/// +------------+ + +#include "MachONormalizedFile.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "lld/Common/LLVM.h" +#include "lld/Core/Error.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + +using namespace llvm::MachO; + +namespace lld { +namespace mach_o { +namespace normalized { + +struct TrieNode; // Forward declaration. + +struct TrieEdge : public llvm::ilist_node { + TrieEdge(StringRef s, TrieNode *node) : _subString(s), _child(node) {} + + StringRef _subString; + struct TrieNode *_child; +}; + +} // namespace normalized +} // namespace mach_o +} // namespace lld + + +namespace llvm { +using lld::mach_o::normalized::TrieEdge; +template <> +struct ilist_alloc_traits : ilist_noalloc_traits {}; +} // namespace llvm + + +namespace lld { +namespace mach_o { +namespace normalized { + +struct TrieNode { + typedef llvm::ilist TrieEdgeList; + + TrieNode(StringRef s) + : _cummulativeString(s), _address(0), _flags(0), _other(0), + _trieOffset(0), _hasExportInfo(false) {} + ~TrieNode() = default; + + void addSymbol(const Export &entry, BumpPtrAllocator &allocator, + std::vector &allNodes); + + void addOrderedNodes(const Export &entry, + std::vector &allNodes); + bool updateOffset(uint32_t &offset); + void appendToByteBuffer(ByteBuffer &out); + +private: + StringRef _cummulativeString; + TrieEdgeList _children; + uint64_t _address; + uint64_t _flags; + uint64_t _other; + StringRef _importedName; + uint32_t _trieOffset; + bool _hasExportInfo; + bool _ordered = false; +}; + +/// Utility class for writing a mach-o binary file given an in-memory +/// normalized file. +class MachOFileLayout { +public: + /// All layout computation is done in the constructor. + MachOFileLayout(const NormalizedFile &file, bool alwaysIncludeFunctionStarts); + + /// Returns the final file size as computed in the constructor. + size_t size() const; + + // Returns size of the mach_header and load commands. + size_t headerAndLoadCommandsSize() const; + + /// Writes the normalized file as a binary mach-o file to the specified + /// path. This does not have a stream interface because the generated + /// file may need the 'x' bit set. + llvm::Error writeBinary(StringRef path); + +private: + uint32_t loadCommandsSize(uint32_t &count, + bool alwaysIncludeFunctionStarts); + void buildFileOffsets(); + void writeMachHeader(); + llvm::Error writeLoadCommands(); + void writeSectionContent(); + void writeRelocations(); + void writeSymbolTable(); + void writeRebaseInfo(); + void writeBindingInfo(); + void writeLazyBindingInfo(); + void writeExportInfo(); + void writeFunctionStartsInfo(); + void writeDataInCodeInfo(); + void writeLinkEditContent(); + void buildLinkEditInfo(); + void buildRebaseInfo(); + void buildBindInfo(); + void buildLazyBindInfo(); + void buildExportTrie(); + void computeFunctionStartsSize(); + void computeDataInCodeSize(); + void computeSymbolTableSizes(); + void buildSectionRelocations(); + void appendSymbols(const std::vector &symbols, + uint32_t &symOffset, uint32_t &strOffset); + uint32_t indirectSymbolIndex(const Section §, uint32_t &index); + uint32_t indirectSymbolElementSize(const Section §); + + // For use as template parameter to load command methods. + struct MachO64Trait { + typedef llvm::MachO::segment_command_64 command; + typedef llvm::MachO::section_64 section; + enum { LC = llvm::MachO::LC_SEGMENT_64 }; + }; + + // For use as template parameter to load command methods. + struct MachO32Trait { + typedef llvm::MachO::segment_command command; + typedef llvm::MachO::section section; + enum { LC = llvm::MachO::LC_SEGMENT }; + }; + + template + llvm::Error writeSingleSegmentLoadCommand(uint8_t *&lc); + template llvm::Error writeSegmentLoadCommands(uint8_t *&lc); + + uint32_t pointerAlign(uint32_t value); + static StringRef dyldPath(); + + struct SegExtraInfo { + uint32_t fileOffset; + uint32_t fileSize; + std::vector sections; + }; + typedef std::map SegMap; + struct SectionExtraInfo { + uint32_t fileOffset; + }; + typedef std::map SectionMap; + + const NormalizedFile &_file; + std::error_code _ec; + uint8_t *_buffer; + const bool _is64; + const bool _swap; + const bool _bigEndianArch; + uint64_t _seg1addr; + uint32_t _startOfLoadCommands; + uint32_t _countOfLoadCommands; + uint32_t _endOfLoadCommands; + uint32_t _startOfRelocations; + uint32_t _startOfFunctionStarts; + uint32_t _startOfDataInCode; + uint32_t _startOfSymbols; + uint32_t _startOfIndirectSymbols; + uint32_t _startOfSymbolStrings; + uint32_t _endOfSymbolStrings; + uint32_t _symbolTableLocalsStartIndex; + uint32_t _symbolTableGlobalsStartIndex; + uint32_t _symbolTableUndefinesStartIndex; + uint32_t _symbolStringPoolSize; + uint32_t _symbolTableSize; + uint32_t _functionStartsSize; + uint32_t _dataInCodeSize; + uint32_t _indirectSymbolTableCount; + // Used in object file creation only + uint32_t _startOfSectionsContent; + uint32_t _endOfSectionsContent; + // Used in final linked image only + uint32_t _startOfLinkEdit; + uint32_t _startOfRebaseInfo; + uint32_t _endOfRebaseInfo; + uint32_t _startOfBindingInfo; + uint32_t _endOfBindingInfo; + uint32_t _startOfLazyBindingInfo; + uint32_t _endOfLazyBindingInfo; + uint32_t _startOfExportTrie; + uint32_t _endOfExportTrie; + uint32_t _endOfLinkEdit; + uint64_t _addressOfLinkEdit; + SegMap _segInfo; + SectionMap _sectInfo; + ByteBuffer _rebaseInfo; + ByteBuffer _bindingInfo; + ByteBuffer _lazyBindingInfo; + ByteBuffer _weakBindingInfo; + ByteBuffer _exportTrie; +}; + +size_t headerAndLoadCommandsSize(const NormalizedFile &file, + bool includeFunctionStarts) { + MachOFileLayout layout(file, includeFunctionStarts); + return layout.headerAndLoadCommandsSize(); +} + +StringRef MachOFileLayout::dyldPath() { + return "/usr/lib/dyld"; +} + +uint32_t MachOFileLayout::pointerAlign(uint32_t value) { + return llvm::alignTo(value, _is64 ? 8 : 4); +} + + +size_t MachOFileLayout::headerAndLoadCommandsSize() const { + return _endOfLoadCommands; +} + +MachOFileLayout::MachOFileLayout(const NormalizedFile &file, + bool alwaysIncludeFunctionStarts) + : _file(file), + _is64(MachOLinkingContext::is64Bit(file.arch)), + _swap(!MachOLinkingContext::isHostEndian(file.arch)), + _bigEndianArch(MachOLinkingContext::isBigEndian(file.arch)), + _seg1addr(INT64_MAX) { + _startOfLoadCommands = _is64 ? sizeof(mach_header_64) : sizeof(mach_header); + const size_t segCommandBaseSize = + (_is64 ? sizeof(segment_command_64) : sizeof(segment_command)); + const size_t sectsSize = (_is64 ? sizeof(section_64) : sizeof(section)); + if (file.fileType == llvm::MachO::MH_OBJECT) { + // object files have just one segment load command containing all sections + _endOfLoadCommands = _startOfLoadCommands + + segCommandBaseSize + + file.sections.size() * sectsSize + + sizeof(symtab_command); + _countOfLoadCommands = 2; + if (file.hasMinVersionLoadCommand) { + _endOfLoadCommands += sizeof(version_min_command); + _countOfLoadCommands++; + } + if (!_file.functionStarts.empty() || alwaysIncludeFunctionStarts) { + _endOfLoadCommands += sizeof(linkedit_data_command); + _countOfLoadCommands++; + } + if (_file.generateDataInCodeLoadCommand) { + _endOfLoadCommands += sizeof(linkedit_data_command); + _countOfLoadCommands++; + } + // Assign file offsets to each section. + _startOfSectionsContent = _endOfLoadCommands; + unsigned relocCount = 0; + uint64_t offset = _startOfSectionsContent; + for (const Section § : file.sections) { + if (isZeroFillSection(sect.type)) + _sectInfo[§].fileOffset = 0; + else { + offset = llvm::alignTo(offset, sect.alignment); + _sectInfo[§].fileOffset = offset; + offset += sect.content.size(); + } + relocCount += sect.relocations.size(); + } + _endOfSectionsContent = offset; + + computeSymbolTableSizes(); + computeFunctionStartsSize(); + computeDataInCodeSize(); + + // Align start of relocations. + _startOfRelocations = pointerAlign(_endOfSectionsContent); + _startOfFunctionStarts = _startOfRelocations + relocCount * 8; + _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize; + _startOfSymbols = _startOfDataInCode + _dataInCodeSize; + // Add Indirect symbol table. + _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; + // Align start of symbol table and symbol strings. + _startOfSymbolStrings = _startOfIndirectSymbols + + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); + _endOfSymbolStrings = _startOfSymbolStrings + + pointerAlign(_symbolStringPoolSize); + _endOfLinkEdit = _endOfSymbolStrings; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << "MachOFileLayout()\n" + << " startOfLoadCommands=" << _startOfLoadCommands << "\n" + << " countOfLoadCommands=" << _countOfLoadCommands << "\n" + << " endOfLoadCommands=" << _endOfLoadCommands << "\n" + << " startOfRelocations=" << _startOfRelocations << "\n" + << " startOfSymbols=" << _startOfSymbols << "\n" + << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" + << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" + << " startOfSectionsContent=" << _startOfSectionsContent << "\n" + << " endOfSectionsContent=" << _endOfSectionsContent << "\n"); + } else { + // Final linked images have one load command per segment. + _endOfLoadCommands = _startOfLoadCommands + + loadCommandsSize(_countOfLoadCommands, + alwaysIncludeFunctionStarts); + + // Assign section file offsets. + buildFileOffsets(); + buildLinkEditInfo(); + + // LINKEDIT of final linked images has in order: + // rebase info, binding info, lazy binding info, weak binding info, + // data-in-code, symbol table, indirect symbol table, symbol table strings. + _startOfRebaseInfo = _startOfLinkEdit; + _endOfRebaseInfo = _startOfRebaseInfo + _rebaseInfo.size(); + _startOfBindingInfo = _endOfRebaseInfo; + _endOfBindingInfo = _startOfBindingInfo + _bindingInfo.size(); + _startOfLazyBindingInfo = _endOfBindingInfo; + _endOfLazyBindingInfo = _startOfLazyBindingInfo + _lazyBindingInfo.size(); + _startOfExportTrie = _endOfLazyBindingInfo; + _endOfExportTrie = _startOfExportTrie + _exportTrie.size(); + _startOfFunctionStarts = _endOfExportTrie; + _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize; + _startOfSymbols = _startOfDataInCode + _dataInCodeSize; + _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; + _startOfSymbolStrings = _startOfIndirectSymbols + + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); + _endOfSymbolStrings = _startOfSymbolStrings + + pointerAlign(_symbolStringPoolSize); + _endOfLinkEdit = _endOfSymbolStrings; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << "MachOFileLayout()\n" + << " startOfLoadCommands=" << _startOfLoadCommands << "\n" + << " countOfLoadCommands=" << _countOfLoadCommands << "\n" + << " endOfLoadCommands=" << _endOfLoadCommands << "\n" + << " startOfLinkEdit=" << _startOfLinkEdit << "\n" + << " startOfRebaseInfo=" << _startOfRebaseInfo << "\n" + << " endOfRebaseInfo=" << _endOfRebaseInfo << "\n" + << " startOfBindingInfo=" << _startOfBindingInfo << "\n" + << " endOfBindingInfo=" << _endOfBindingInfo << "\n" + << " startOfLazyBindingInfo=" << _startOfLazyBindingInfo << "\n" + << " endOfLazyBindingInfo=" << _endOfLazyBindingInfo << "\n" + << " startOfExportTrie=" << _startOfExportTrie << "\n" + << " endOfExportTrie=" << _endOfExportTrie << "\n" + << " startOfFunctionStarts=" << _startOfFunctionStarts << "\n" + << " startOfDataInCode=" << _startOfDataInCode << "\n" + << " startOfSymbols=" << _startOfSymbols << "\n" + << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" + << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" + << " addressOfLinkEdit=" << _addressOfLinkEdit << "\n"); + } +} + +uint32_t MachOFileLayout::loadCommandsSize(uint32_t &count, + bool alwaysIncludeFunctionStarts) { + uint32_t size = 0; + count = 0; + + const size_t segCommandSize = + (_is64 ? sizeof(segment_command_64) : sizeof(segment_command)); + const size_t sectionSize = (_is64 ? sizeof(section_64) : sizeof(section)); + + // Add LC_SEGMENT for each segment. + size += _file.segments.size() * segCommandSize; + count += _file.segments.size(); + // Add section record for each section. + size += _file.sections.size() * sectionSize; + + // If creating a dylib, add LC_ID_DYLIB. + if (_file.fileType == llvm::MachO::MH_DYLIB) { + size += sizeof(dylib_command) + pointerAlign(_file.installName.size() + 1); + ++count; + } + + // Add LC_DYLD_INFO + size += sizeof(dyld_info_command); + ++count; + + // Add LC_SYMTAB + size += sizeof(symtab_command); + ++count; + + // Add LC_DYSYMTAB + if (_file.fileType != llvm::MachO::MH_PRELOAD) { + size += sizeof(dysymtab_command); + ++count; + } + + // If main executable add LC_LOAD_DYLINKER + if (_file.fileType == llvm::MachO::MH_EXECUTE) { + size += pointerAlign(sizeof(dylinker_command) + dyldPath().size()+1); + ++count; + } + + // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS, + // LC_VERSION_MIN_TVOS + if (_file.hasMinVersionLoadCommand) { + size += sizeof(version_min_command); + ++count; + } + + // Add LC_SOURCE_VERSION + size += sizeof(source_version_command); + ++count; + + // If main executable add LC_MAIN + if (_file.fileType == llvm::MachO::MH_EXECUTE) { + size += sizeof(entry_point_command); + ++count; + } + + // Add LC_LOAD_DYLIB for each dependent dylib. + for (const DependentDylib &dep : _file.dependentDylibs) { + size += sizeof(dylib_command) + pointerAlign(dep.path.size()+1); + ++count; + } + + // Add LC_RPATH + for (const StringRef &path : _file.rpaths) { + size += pointerAlign(sizeof(rpath_command) + path.size() + 1); + ++count; + } + + // Add LC_FUNCTION_STARTS if needed + if (!_file.functionStarts.empty() || alwaysIncludeFunctionStarts) { + size += sizeof(linkedit_data_command); + ++count; + } + + // Add LC_DATA_IN_CODE if requested. Note, we do encode zero length entries. + // FIXME: Zero length entries is only to match ld64. Should we change this? + if (_file.generateDataInCodeLoadCommand) { + size += sizeof(linkedit_data_command); + ++count; + } + + return size; +} + +static bool overlaps(const Segment &s1, const Segment &s2) { + if (s2.address >= s1.address+s1.size) + return false; + if (s1.address >= s2.address+s2.size) + return false; + return true; +} + +static bool overlaps(const Section &s1, const Section &s2) { + if (s2.address >= s1.address+s1.content.size()) + return false; + if (s1.address >= s2.address+s2.content.size()) + return false; + return true; +} + +void MachOFileLayout::buildFileOffsets() { + // Verify no segments overlap + for (const Segment &sg1 : _file.segments) { + for (const Segment &sg2 : _file.segments) { + if (&sg1 == &sg2) + continue; + if (overlaps(sg1,sg2)) { + _ec = make_error_code(llvm::errc::executable_format_error); + return; + } + } + } + + // Verify no sections overlap + for (const Section &s1 : _file.sections) { + for (const Section &s2 : _file.sections) { + if (&s1 == &s2) + continue; + if (overlaps(s1,s2)) { + _ec = make_error_code(llvm::errc::executable_format_error); + return; + } + } + } + + // Build side table of extra info about segments and sections. + SegExtraInfo t; + t.fileOffset = 0; + for (const Segment &sg : _file.segments) { + _segInfo[&sg] = t; + } + SectionExtraInfo t2; + t2.fileOffset = 0; + // Assign sections to segments. + for (const Section &s : _file.sections) { + _sectInfo[&s] = t2; + bool foundSegment = false; + for (const Segment &sg : _file.segments) { + if (sg.name.equals(s.segmentName)) { + if ((s.address >= sg.address) + && (s.address+s.content.size() <= sg.address+sg.size)) { + _segInfo[&sg].sections.push_back(&s); + foundSegment = true; + break; + } + } + } + if (!foundSegment) { + _ec = make_error_code(llvm::errc::executable_format_error); + return; + } + } + + // Assign file offsets. + uint32_t fileOffset = 0; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << "buildFileOffsets()\n"); + for (const Segment &sg : _file.segments) { + _segInfo[&sg].fileOffset = fileOffset; + if ((_seg1addr == INT64_MAX) && sg.init_access) + _seg1addr = sg.address; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << " segment=" << sg.name + << ", fileOffset=" << _segInfo[&sg].fileOffset << "\n"); + + uint32_t segFileSize = 0; + // A segment that is not zero-fill must use a least one page of disk space. + if (sg.init_access) + segFileSize = _file.pageSize; + for (const Section *s : _segInfo[&sg].sections) { + uint32_t sectOffset = s->address - sg.address; + uint32_t sectFileSize = + isZeroFillSection(s->type) ? 0 : s->content.size(); + segFileSize = std::max(segFileSize, sectOffset + sectFileSize); + + _sectInfo[s].fileOffset = _segInfo[&sg].fileOffset + sectOffset; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << " section=" << s->sectionName + << ", fileOffset=" << fileOffset << "\n"); + } + + // round up all segments to page aligned, except __LINKEDIT + if (!sg.name.equals("__LINKEDIT")) { + _segInfo[&sg].fileSize = llvm::alignTo(segFileSize, _file.pageSize); + fileOffset = llvm::alignTo(fileOffset + segFileSize, _file.pageSize); + } + _addressOfLinkEdit = sg.address + sg.size; + } + _startOfLinkEdit = fileOffset; +} + +size_t MachOFileLayout::size() const { + return _endOfSymbolStrings; +} + +void MachOFileLayout::writeMachHeader() { + auto cpusubtype = MachOLinkingContext::cpuSubtypeFromArch(_file.arch); + // dynamic x86 executables on newer OS version should also set the + // CPU_SUBTYPE_LIB64 mask in the CPU subtype. + // FIXME: Check that this is a dynamic executable, not a static one. + if (_file.fileType == llvm::MachO::MH_EXECUTE && + cpusubtype == CPU_SUBTYPE_X86_64_ALL && + _file.os == MachOLinkingContext::OS::macOSX) { + uint32_t version; + bool failed = MachOLinkingContext::parsePackedVersion("10.5", version); + if (!failed && _file.minOSverson >= version) + cpusubtype |= CPU_SUBTYPE_LIB64; + } + + mach_header *mh = reinterpret_cast(_buffer); + mh->magic = _is64 ? llvm::MachO::MH_MAGIC_64 : llvm::MachO::MH_MAGIC; + mh->cputype = MachOLinkingContext::cpuTypeFromArch(_file.arch); + mh->cpusubtype = cpusubtype; + mh->filetype = _file.fileType; + mh->ncmds = _countOfLoadCommands; + mh->sizeofcmds = _endOfLoadCommands - _startOfLoadCommands; + mh->flags = _file.flags; + if (_swap) + swapStruct(*mh); +} + +uint32_t MachOFileLayout::indirectSymbolIndex(const Section §, + uint32_t &index) { + if (sect.indirectSymbols.empty()) + return 0; + uint32_t result = index; + index += sect.indirectSymbols.size(); + return result; +} + +uint32_t MachOFileLayout::indirectSymbolElementSize(const Section §) { + if (sect.indirectSymbols.empty()) + return 0; + if (sect.type != S_SYMBOL_STUBS) + return 0; + return sect.content.size() / sect.indirectSymbols.size(); +} + +template +llvm::Error MachOFileLayout::writeSingleSegmentLoadCommand(uint8_t *&lc) { + typename T::command* seg = reinterpret_cast(lc); + seg->cmd = T::LC; + seg->cmdsize = sizeof(typename T::command) + + _file.sections.size() * sizeof(typename T::section); + uint8_t *next = lc + seg->cmdsize; + memset(seg->segname, 0, 16); + seg->flags = 0; + seg->vmaddr = 0; + seg->fileoff = _endOfLoadCommands; + seg->maxprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; + seg->initprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; + seg->nsects = _file.sections.size(); + if (seg->nsects) { + seg->vmsize = _file.sections.back().address + + _file.sections.back().content.size(); + seg->filesize = _sectInfo[&_file.sections.back()].fileOffset + + _file.sections.back().content.size() - + _sectInfo[&_file.sections.front()].fileOffset; + } + if (_swap) + swapStruct(*seg); + typename T::section *sout = reinterpret_cast + (lc+sizeof(typename T::command)); + uint32_t relOffset = _startOfRelocations; + uint32_t indirectSymRunningIndex = 0; + for (const Section &sin : _file.sections) { + setString16(sin.sectionName, sout->sectname); + setString16(sin.segmentName, sout->segname); + sout->addr = sin.address; + sout->size = sin.content.size(); + sout->offset = _sectInfo[&sin].fileOffset; + sout->align = llvm::Log2_32(sin.alignment); + sout->reloff = sin.relocations.empty() ? 0 : relOffset; + sout->nreloc = sin.relocations.size(); + sout->flags = sin.type | sin.attributes; + sout->reserved1 = indirectSymbolIndex(sin, indirectSymRunningIndex); + sout->reserved2 = indirectSymbolElementSize(sin); + relOffset += sin.relocations.size() * sizeof(any_relocation_info); + if (_swap) + swapStruct(*sout); + ++sout; + } + lc = next; + return llvm::Error::success(); +} + +template +llvm::Error MachOFileLayout::writeSegmentLoadCommands(uint8_t *&lc) { + uint32_t indirectSymRunningIndex = 0; + for (const Segment &seg : _file.segments) { + // Link edit has no sections and a custom range of address, so handle it + // specially. + SegExtraInfo &segInfo = _segInfo[&seg]; + if (seg.name.equals("__LINKEDIT")) { + size_t linkeditSize = _endOfLinkEdit - _startOfLinkEdit; + typename T::command* cmd = reinterpret_cast(lc); + cmd->cmd = T::LC; + cmd->cmdsize = sizeof(typename T::command); + uint8_t *next = lc + cmd->cmdsize; + setString16("__LINKEDIT", cmd->segname); + cmd->vmaddr = _addressOfLinkEdit; + cmd->vmsize = llvm::alignTo(linkeditSize, _file.pageSize); + cmd->fileoff = _startOfLinkEdit; + cmd->filesize = linkeditSize; + cmd->initprot = seg.init_access; + cmd->maxprot = seg.max_access; + cmd->nsects = 0; + cmd->flags = 0; + if (_swap) + swapStruct(*cmd); + lc = next; + continue; + } + // Write segment command with trailing sections. + typename T::command* cmd = reinterpret_cast(lc); + cmd->cmd = T::LC; + cmd->cmdsize = sizeof(typename T::command) + + segInfo.sections.size() * sizeof(typename T::section); + uint8_t *next = lc + cmd->cmdsize; + setString16(seg.name, cmd->segname); + cmd->vmaddr = seg.address; + cmd->vmsize = seg.size; + cmd->fileoff = segInfo.fileOffset; + cmd->filesize = segInfo.fileSize; + cmd->initprot = seg.init_access; + cmd->maxprot = seg.max_access; + cmd->nsects = segInfo.sections.size(); + cmd->flags = 0; + if (_swap) + swapStruct(*cmd); + typename T::section *sect = reinterpret_cast + (lc+sizeof(typename T::command)); + for (const Section *section : segInfo.sections) { + setString16(section->sectionName, sect->sectname); + setString16(section->segmentName, sect->segname); + sect->addr = section->address; + sect->size = section->content.size(); + if (isZeroFillSection(section->type)) + sect->offset = 0; + else + sect->offset = section->address - seg.address + segInfo.fileOffset; + sect->align = llvm::Log2_32(section->alignment); + sect->reloff = 0; + sect->nreloc = 0; + sect->flags = section->type | section->attributes; + sect->reserved1 = indirectSymbolIndex(*section, indirectSymRunningIndex); + sect->reserved2 = indirectSymbolElementSize(*section); + if (_swap) + swapStruct(*sect); + ++sect; + } + lc = reinterpret_cast(next); + } + return llvm::Error::success(); +} + +static void writeVersionMinLoadCommand(const NormalizedFile &_file, + bool _swap, + uint8_t *&lc) { + if (!_file.hasMinVersionLoadCommand) + return; + version_min_command *vm = reinterpret_cast(lc); + switch (_file.os) { + case MachOLinkingContext::OS::unknown: + vm->cmd = _file.minOSVersionKind; + vm->cmdsize = sizeof(version_min_command); + vm->version = _file.minOSverson; + vm->sdk = 0; + break; + case MachOLinkingContext::OS::macOSX: + vm->cmd = LC_VERSION_MIN_MACOSX; + vm->cmdsize = sizeof(version_min_command); + vm->version = _file.minOSverson; + vm->sdk = _file.sdkVersion; + break; + case MachOLinkingContext::OS::iOS: + case MachOLinkingContext::OS::iOS_simulator: + vm->cmd = LC_VERSION_MIN_IPHONEOS; + vm->cmdsize = sizeof(version_min_command); + vm->version = _file.minOSverson; + vm->sdk = _file.sdkVersion; + break; + } + if (_swap) + swapStruct(*vm); + lc += sizeof(version_min_command); +} + +llvm::Error MachOFileLayout::writeLoadCommands() { + uint8_t *lc = &_buffer[_startOfLoadCommands]; + if (_file.fileType == llvm::MachO::MH_OBJECT) { + // Object files have one unnamed segment which holds all sections. + if (_is64) { + if (auto ec = writeSingleSegmentLoadCommand(lc)) + return ec; + } else { + if (auto ec = writeSingleSegmentLoadCommand(lc)) + return ec; + } + // Add LC_SYMTAB with symbol table info + symtab_command* st = reinterpret_cast(lc); + st->cmd = LC_SYMTAB; + st->cmdsize = sizeof(symtab_command); + st->symoff = _startOfSymbols; + st->nsyms = _file.stabsSymbols.size() + _file.localSymbols.size() + + _file.globalSymbols.size() + _file.undefinedSymbols.size(); + st->stroff = _startOfSymbolStrings; + st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; + if (_swap) + swapStruct(*st); + lc += sizeof(symtab_command); + + // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, + // LC_VERSION_MIN_WATCHOS, LC_VERSION_MIN_TVOS + writeVersionMinLoadCommand(_file, _swap, lc); + + // Add LC_FUNCTION_STARTS if needed. + if (_functionStartsSize != 0) { + linkedit_data_command* dl = reinterpret_cast(lc); + dl->cmd = LC_FUNCTION_STARTS; + dl->cmdsize = sizeof(linkedit_data_command); + dl->dataoff = _startOfFunctionStarts; + dl->datasize = _functionStartsSize; + if (_swap) + swapStruct(*dl); + lc += sizeof(linkedit_data_command); + } + + // Add LC_DATA_IN_CODE if requested. + if (_file.generateDataInCodeLoadCommand) { + linkedit_data_command* dl = reinterpret_cast(lc); + dl->cmd = LC_DATA_IN_CODE; + dl->cmdsize = sizeof(linkedit_data_command); + dl->dataoff = _startOfDataInCode; + dl->datasize = _dataInCodeSize; + if (_swap) + swapStruct(*dl); + lc += sizeof(linkedit_data_command); + } + } else { + // Final linked images have sections under segments. + if (_is64) { + if (auto ec = writeSegmentLoadCommands(lc)) + return ec; + } else { + if (auto ec = writeSegmentLoadCommands(lc)) + return ec; + } + + // Add LC_ID_DYLIB command for dynamic libraries. + if (_file.fileType == llvm::MachO::MH_DYLIB) { + dylib_command *dc = reinterpret_cast(lc); + StringRef path = _file.installName; + uint32_t size = sizeof(dylib_command) + pointerAlign(path.size() + 1); + dc->cmd = LC_ID_DYLIB; + dc->cmdsize = size; + dc->dylib.name = sizeof(dylib_command); // offset + // needs to be some constant value different than the one in LC_LOAD_DYLIB + dc->dylib.timestamp = 1; + dc->dylib.current_version = _file.currentVersion; + dc->dylib.compatibility_version = _file.compatVersion; + if (_swap) + swapStruct(*dc); + memcpy(lc + sizeof(dylib_command), path.begin(), path.size()); + lc[sizeof(dylib_command) + path.size()] = '\0'; + lc += size; + } + + // Add LC_DYLD_INFO_ONLY. + dyld_info_command* di = reinterpret_cast(lc); + di->cmd = LC_DYLD_INFO_ONLY; + di->cmdsize = sizeof(dyld_info_command); + di->rebase_off = _rebaseInfo.size() ? _startOfRebaseInfo : 0; + di->rebase_size = _rebaseInfo.size(); + di->bind_off = _bindingInfo.size() ? _startOfBindingInfo : 0; + di->bind_size = _bindingInfo.size(); + di->weak_bind_off = 0; + di->weak_bind_size = 0; + di->lazy_bind_off = _lazyBindingInfo.size() ? _startOfLazyBindingInfo : 0; + di->lazy_bind_size = _lazyBindingInfo.size(); + di->export_off = _exportTrie.size() ? _startOfExportTrie : 0; + di->export_size = _exportTrie.size(); + if (_swap) + swapStruct(*di); + lc += sizeof(dyld_info_command); + + // Add LC_SYMTAB with symbol table info. + symtab_command* st = reinterpret_cast(lc); + st->cmd = LC_SYMTAB; + st->cmdsize = sizeof(symtab_command); + st->symoff = _startOfSymbols; + st->nsyms = _file.stabsSymbols.size() + _file.localSymbols.size() + + _file.globalSymbols.size() + _file.undefinedSymbols.size(); + st->stroff = _startOfSymbolStrings; + st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; + if (_swap) + swapStruct(*st); + lc += sizeof(symtab_command); + + // Add LC_DYSYMTAB + if (_file.fileType != llvm::MachO::MH_PRELOAD) { + dysymtab_command* dst = reinterpret_cast(lc); + dst->cmd = LC_DYSYMTAB; + dst->cmdsize = sizeof(dysymtab_command); + dst->ilocalsym = _symbolTableLocalsStartIndex; + dst->nlocalsym = _file.stabsSymbols.size() + + _file.localSymbols.size(); + dst->iextdefsym = _symbolTableGlobalsStartIndex; + dst->nextdefsym = _file.globalSymbols.size(); + dst->iundefsym = _symbolTableUndefinesStartIndex; + dst->nundefsym = _file.undefinedSymbols.size(); + dst->tocoff = 0; + dst->ntoc = 0; + dst->modtaboff = 0; + dst->nmodtab = 0; + dst->extrefsymoff = 0; + dst->nextrefsyms = 0; + dst->indirectsymoff = _startOfIndirectSymbols; + dst->nindirectsyms = _indirectSymbolTableCount; + dst->extreloff = 0; + dst->nextrel = 0; + dst->locreloff = 0; + dst->nlocrel = 0; + if (_swap) + swapStruct(*dst); + lc += sizeof(dysymtab_command); + } + + // If main executable, add LC_LOAD_DYLINKER + if (_file.fileType == llvm::MachO::MH_EXECUTE) { + // Build LC_LOAD_DYLINKER load command. + uint32_t size=pointerAlign(sizeof(dylinker_command)+dyldPath().size()+1); + dylinker_command* dl = reinterpret_cast(lc); + dl->cmd = LC_LOAD_DYLINKER; + dl->cmdsize = size; + dl->name = sizeof(dylinker_command); // offset + if (_swap) + swapStruct(*dl); + memcpy(lc+sizeof(dylinker_command), dyldPath().data(), dyldPath().size()); + lc[sizeof(dylinker_command)+dyldPath().size()] = '\0'; + lc += size; + } + + // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS, + // LC_VERSION_MIN_TVOS + writeVersionMinLoadCommand(_file, _swap, lc); + + // Add LC_SOURCE_VERSION + { + // Note, using a temporary here to appease UB as we may not be aligned + // enough for a struct containing a uint64_t when emitting a 32-bit binary + source_version_command sv; + sv.cmd = LC_SOURCE_VERSION; + sv.cmdsize = sizeof(source_version_command); + sv.version = _file.sourceVersion; + if (_swap) + swapStruct(sv); + memcpy(lc, &sv, sizeof(source_version_command)); + lc += sizeof(source_version_command); + } + + // If main executable, add LC_MAIN. + if (_file.fileType == llvm::MachO::MH_EXECUTE) { + // Build LC_MAIN load command. + // Note, using a temporary here to appease UB as we may not be aligned + // enough for a struct containing a uint64_t when emitting a 32-bit binary + entry_point_command ep; + ep.cmd = LC_MAIN; + ep.cmdsize = sizeof(entry_point_command); + ep.entryoff = _file.entryAddress - _seg1addr; + ep.stacksize = _file.stackSize; + if (_swap) + swapStruct(ep); + memcpy(lc, &ep, sizeof(entry_point_command)); + lc += sizeof(entry_point_command); + } + + // Add LC_LOAD_DYLIB commands + for (const DependentDylib &dep : _file.dependentDylibs) { + dylib_command* dc = reinterpret_cast(lc); + uint32_t size = sizeof(dylib_command) + pointerAlign(dep.path.size()+1); + dc->cmd = dep.kind; + dc->cmdsize = size; + dc->dylib.name = sizeof(dylib_command); // offset + // needs to be some constant value different than the one in LC_ID_DYLIB + dc->dylib.timestamp = 2; + dc->dylib.current_version = dep.currentVersion; + dc->dylib.compatibility_version = dep.compatVersion; + if (_swap) + swapStruct(*dc); + memcpy(lc+sizeof(dylib_command), dep.path.begin(), dep.path.size()); + lc[sizeof(dylib_command)+dep.path.size()] = '\0'; + lc += size; + } + + // Add LC_RPATH + for (const StringRef &path : _file.rpaths) { + rpath_command *rpc = reinterpret_cast(lc); + uint32_t size = pointerAlign(sizeof(rpath_command) + path.size() + 1); + rpc->cmd = LC_RPATH; + rpc->cmdsize = size; + rpc->path = sizeof(rpath_command); // offset + if (_swap) + swapStruct(*rpc); + memcpy(lc+sizeof(rpath_command), path.begin(), path.size()); + lc[sizeof(rpath_command)+path.size()] = '\0'; + lc += size; + } + + // Add LC_FUNCTION_STARTS if needed. + if (_functionStartsSize != 0) { + linkedit_data_command* dl = reinterpret_cast(lc); + dl->cmd = LC_FUNCTION_STARTS; + dl->cmdsize = sizeof(linkedit_data_command); + dl->dataoff = _startOfFunctionStarts; + dl->datasize = _functionStartsSize; + if (_swap) + swapStruct(*dl); + lc += sizeof(linkedit_data_command); + } + + // Add LC_DATA_IN_CODE if requested. + if (_file.generateDataInCodeLoadCommand) { + linkedit_data_command* dl = reinterpret_cast(lc); + dl->cmd = LC_DATA_IN_CODE; + dl->cmdsize = sizeof(linkedit_data_command); + dl->dataoff = _startOfDataInCode; + dl->datasize = _dataInCodeSize; + if (_swap) + swapStruct(*dl); + lc += sizeof(linkedit_data_command); + } + } + assert(lc == &_buffer[_endOfLoadCommands]); + return llvm::Error::success(); +} + +void MachOFileLayout::writeSectionContent() { + for (const Section &s : _file.sections) { + // Copy all section content to output buffer. + if (isZeroFillSection(s.type)) + continue; + if (s.content.empty()) + continue; + uint32_t offset = _sectInfo[&s].fileOffset; + assert(offset >= _endOfLoadCommands); + uint8_t *p = &_buffer[offset]; + memcpy(p, &s.content[0], s.content.size()); + p += s.content.size(); + } +} + +void MachOFileLayout::writeRelocations() { + uint32_t relOffset = _startOfRelocations; + for (Section sect : _file.sections) { + for (Relocation r : sect.relocations) { + any_relocation_info* rb = reinterpret_cast( + &_buffer[relOffset]); + *rb = packRelocation(r, _swap, _bigEndianArch); + relOffset += sizeof(any_relocation_info); + } + } +} + +void MachOFileLayout::appendSymbols(const std::vector &symbols, + uint32_t &symOffset, uint32_t &strOffset) { + for (const Symbol &sym : symbols) { + if (_is64) { + nlist_64* nb = reinterpret_cast(&_buffer[symOffset]); + nb->n_strx = strOffset - _startOfSymbolStrings; + nb->n_type = sym.type | sym.scope; + nb->n_sect = sym.sect; + nb->n_desc = sym.desc; + nb->n_value = sym.value; + if (_swap) + swapStruct(*nb); + symOffset += sizeof(nlist_64); + } else { + nlist* nb = reinterpret_cast(&_buffer[symOffset]); + nb->n_strx = strOffset - _startOfSymbolStrings; + nb->n_type = sym.type | sym.scope; + nb->n_sect = sym.sect; + nb->n_desc = sym.desc; + nb->n_value = sym.value; + if (_swap) + swapStruct(*nb); + symOffset += sizeof(nlist); + } + memcpy(&_buffer[strOffset], sym.name.begin(), sym.name.size()); + strOffset += sym.name.size(); + _buffer[strOffset++] ='\0'; // Strings in table have nul terminator. + } +} + +void MachOFileLayout::writeFunctionStartsInfo() { + if (!_functionStartsSize) + return; + memcpy(&_buffer[_startOfFunctionStarts], _file.functionStarts.data(), + _functionStartsSize); +} + +void MachOFileLayout::writeDataInCodeInfo() { + uint32_t offset = _startOfDataInCode; + for (const DataInCode &entry : _file.dataInCode) { + data_in_code_entry *dst = reinterpret_cast( + &_buffer[offset]); + dst->offset = entry.offset; + dst->length = entry.length; + dst->kind = entry.kind; + if (_swap) + swapStruct(*dst); + offset += sizeof(data_in_code_entry); + } +} + +void MachOFileLayout::writeSymbolTable() { + // Write symbol table and symbol strings in parallel. + uint32_t symOffset = _startOfSymbols; + uint32_t strOffset = _startOfSymbolStrings; + // Reserve n_strx offset of zero to mean no name. + _buffer[strOffset++] = ' '; + _buffer[strOffset++] = '\0'; + appendSymbols(_file.stabsSymbols, symOffset, strOffset); + appendSymbols(_file.localSymbols, symOffset, strOffset); + appendSymbols(_file.globalSymbols, symOffset, strOffset); + appendSymbols(_file.undefinedSymbols, symOffset, strOffset); + // Write indirect symbol table array. + uint32_t *indirects = reinterpret_cast + (&_buffer[_startOfIndirectSymbols]); + if (_file.fileType == llvm::MachO::MH_OBJECT) { + // Object files have sections in same order as input normalized file. + for (const Section §ion : _file.sections) { + for (uint32_t index : section.indirectSymbols) { + if (_swap) + *indirects++ = llvm::sys::getSwappedBytes(index); + else + *indirects++ = index; + } + } + } else { + // Final linked images must sort sections from normalized file. + for (const Segment &seg : _file.segments) { + SegExtraInfo &segInfo = _segInfo[&seg]; + for (const Section *section : segInfo.sections) { + for (uint32_t index : section->indirectSymbols) { + if (_swap) + *indirects++ = llvm::sys::getSwappedBytes(index); + else + *indirects++ = index; + } + } + } + } +} + +void MachOFileLayout::writeRebaseInfo() { + memcpy(&_buffer[_startOfRebaseInfo], _rebaseInfo.bytes(), _rebaseInfo.size()); +} + +void MachOFileLayout::writeBindingInfo() { + memcpy(&_buffer[_startOfBindingInfo], + _bindingInfo.bytes(), _bindingInfo.size()); +} + +void MachOFileLayout::writeLazyBindingInfo() { + memcpy(&_buffer[_startOfLazyBindingInfo], + _lazyBindingInfo.bytes(), _lazyBindingInfo.size()); +} + +void MachOFileLayout::writeExportInfo() { + memcpy(&_buffer[_startOfExportTrie], _exportTrie.bytes(), _exportTrie.size()); +} + +void MachOFileLayout::buildLinkEditInfo() { + buildRebaseInfo(); + buildBindInfo(); + buildLazyBindInfo(); + buildExportTrie(); + computeSymbolTableSizes(); + computeFunctionStartsSize(); + computeDataInCodeSize(); +} + +void MachOFileLayout::buildSectionRelocations() { + +} + +void MachOFileLayout::buildRebaseInfo() { + // TODO: compress rebasing info. + for (const RebaseLocation& entry : _file.rebasingInfo) { + _rebaseInfo.append_byte(REBASE_OPCODE_SET_TYPE_IMM | entry.kind); + _rebaseInfo.append_byte(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + | entry.segIndex); + _rebaseInfo.append_uleb128(entry.segOffset); + _rebaseInfo.append_uleb128(REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1); + } + _rebaseInfo.append_byte(REBASE_OPCODE_DONE); + _rebaseInfo.align(_is64 ? 8 : 4); +} + +void MachOFileLayout::buildBindInfo() { + // TODO: compress bind info. + uint64_t lastAddend = 0; + int lastOrdinal = 0x80000000; + StringRef lastSymbolName; + BindType lastType = (BindType)0; + Hex32 lastSegOffset = ~0U; + uint8_t lastSegIndex = (uint8_t)~0U; + for (const BindLocation& entry : _file.bindingInfo) { + if (entry.ordinal != lastOrdinal) { + if (entry.ordinal <= 0) + _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | + (entry.ordinal & BIND_IMMEDIATE_MASK)); + else if (entry.ordinal <= BIND_IMMEDIATE_MASK) + _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | + entry.ordinal); + else { + _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); + _bindingInfo.append_uleb128(entry.ordinal); + } + lastOrdinal = entry.ordinal; + } + + if (lastSymbolName != entry.symbolName) { + _bindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); + _bindingInfo.append_string(entry.symbolName); + lastSymbolName = entry.symbolName; + } + + if (lastType != entry.kind) { + _bindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind); + lastType = entry.kind; + } + + if (lastSegIndex != entry.segIndex || lastSegOffset != entry.segOffset) { + _bindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + | entry.segIndex); + _bindingInfo.append_uleb128(entry.segOffset); + lastSegIndex = entry.segIndex; + lastSegOffset = entry.segOffset; + } + if (entry.addend != lastAddend) { + _bindingInfo.append_byte(BIND_OPCODE_SET_ADDEND_SLEB); + _bindingInfo.append_sleb128(entry.addend); + lastAddend = entry.addend; + } + _bindingInfo.append_byte(BIND_OPCODE_DO_BIND); + } + _bindingInfo.append_byte(BIND_OPCODE_DONE); + _bindingInfo.align(_is64 ? 8 : 4); +} + +void MachOFileLayout::buildLazyBindInfo() { + for (const BindLocation& entry : _file.lazyBindingInfo) { + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + | entry.segIndex); + _lazyBindingInfo.append_uleb128(entry.segOffset); + if (entry.ordinal <= 0) + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | + (entry.ordinal & BIND_IMMEDIATE_MASK)); + else if (entry.ordinal <= BIND_IMMEDIATE_MASK) + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | + entry.ordinal); + else { + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); + _lazyBindingInfo.append_uleb128(entry.ordinal); + } + // FIXME: We need to | the opcode here with flags. + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); + _lazyBindingInfo.append_string(entry.symbolName); + _lazyBindingInfo.append_byte(BIND_OPCODE_DO_BIND); + _lazyBindingInfo.append_byte(BIND_OPCODE_DONE); + } + _lazyBindingInfo.align(_is64 ? 8 : 4); +} + +void TrieNode::addSymbol(const Export& entry, + BumpPtrAllocator &allocator, + std::vector &allNodes) { + StringRef partialStr = entry.name.drop_front(_cummulativeString.size()); + for (TrieEdge &edge : _children) { + StringRef edgeStr = edge._subString; + if (partialStr.startswith(edgeStr)) { + // Already have matching edge, go down that path. + edge._child->addSymbol(entry, allocator, allNodes); + return; + } + // See if string has common prefix with existing edge. + for (int n=edgeStr.size()-1; n > 0; --n) { + if (partialStr.substr(0, n).equals(edgeStr.substr(0, n))) { + // Splice in new node: was A -> C, now A -> B -> C + StringRef bNodeStr = edge._child->_cummulativeString; + bNodeStr = bNodeStr.drop_back(edgeStr.size()-n).copy(allocator); + auto *bNode = new (allocator) TrieNode(bNodeStr); + allNodes.push_back(bNode); + TrieNode* cNode = edge._child; + StringRef abEdgeStr = edgeStr.substr(0,n).copy(allocator); + StringRef bcEdgeStr = edgeStr.substr(n).copy(allocator); + DEBUG_WITH_TYPE("trie-builder", llvm::dbgs() + << "splice in TrieNode('" << bNodeStr + << "') between edge '" + << abEdgeStr << "' and edge='" + << bcEdgeStr<< "'\n"); + TrieEdge& abEdge = edge; + abEdge._subString = abEdgeStr; + abEdge._child = bNode; + auto *bcEdge = new (allocator) TrieEdge(bcEdgeStr, cNode); + bNode->_children.insert(bNode->_children.end(), bcEdge); + bNode->addSymbol(entry, allocator, allNodes); + return; + } + } + } + if (entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { + assert(entry.otherOffset != 0); + } + if (entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { + assert(entry.otherOffset != 0); + } + // No commonality with any existing child, make a new edge. + auto *newNode = new (allocator) TrieNode(entry.name.copy(allocator)); + auto *newEdge = new (allocator) TrieEdge(partialStr, newNode); + _children.insert(_children.end(), newEdge); + DEBUG_WITH_TYPE("trie-builder", llvm::dbgs() + << "new TrieNode('" << entry.name << "') with edge '" + << partialStr << "' from node='" + << _cummulativeString << "'\n"); + newNode->_address = entry.offset; + newNode->_flags = entry.flags | entry.kind; + newNode->_other = entry.otherOffset; + if ((entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && !entry.otherName.empty()) + newNode->_importedName = entry.otherName.copy(allocator); + newNode->_hasExportInfo = true; + allNodes.push_back(newNode); +} + +void TrieNode::addOrderedNodes(const Export& entry, + std::vector &orderedNodes) { + if (!_ordered) { + orderedNodes.push_back(this); + _ordered = true; + } + + StringRef partialStr = entry.name.drop_front(_cummulativeString.size()); + for (TrieEdge &edge : _children) { + StringRef edgeStr = edge._subString; + if (partialStr.startswith(edgeStr)) { + // Already have matching edge, go down that path. + edge._child->addOrderedNodes(entry, orderedNodes); + return; + } + } +} + +bool TrieNode::updateOffset(uint32_t& offset) { + uint32_t nodeSize = 1; // Length when no export info + if (_hasExportInfo) { + if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { + nodeSize = llvm::getULEB128Size(_flags); + nodeSize += llvm::getULEB128Size(_other); // Other contains ordinal. + nodeSize += _importedName.size(); + ++nodeSize; // Trailing zero in imported name. + } else { + nodeSize = llvm::getULEB128Size(_flags) + llvm::getULEB128Size(_address); + if (_flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) + nodeSize += llvm::getULEB128Size(_other); + } + // Overall node size so far is uleb128 of export info + actual export info. + nodeSize += llvm::getULEB128Size(nodeSize); + } + // Compute size of all child edges. + ++nodeSize; // Byte for number of children. + for (TrieEdge &edge : _children) { + nodeSize += edge._subString.size() + 1 // String length. + + llvm::getULEB128Size(edge._child->_trieOffset); // Offset len. + } + // On input, 'offset' is new prefered location for this node. + bool result = (_trieOffset != offset); + // Store new location in node object for use by parents. + _trieOffset = offset; + // Update offset for next iteration. + offset += nodeSize; + // Return true if _trieOffset was changed. + return result; +} + +void TrieNode::appendToByteBuffer(ByteBuffer &out) { + if (_hasExportInfo) { + if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { + if (!_importedName.empty()) { + // nodes with re-export info: size, flags, ordinal, import-name + uint32_t nodeSize = llvm::getULEB128Size(_flags) + + llvm::getULEB128Size(_other) + + _importedName.size() + 1; + assert(nodeSize < 256); + out.append_byte(nodeSize); + out.append_uleb128(_flags); + out.append_uleb128(_other); + out.append_string(_importedName); + } else { + // nodes without re-export info: size, flags, ordinal, empty-string + uint32_t nodeSize = llvm::getULEB128Size(_flags) + + llvm::getULEB128Size(_other) + 1; + assert(nodeSize < 256); + out.append_byte(nodeSize); + out.append_uleb128(_flags); + out.append_uleb128(_other); + out.append_byte(0); + } + } else if ( _flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) { + // Nodes with export info: size, flags, address, other + uint32_t nodeSize = llvm::getULEB128Size(_flags) + + llvm::getULEB128Size(_address) + + llvm::getULEB128Size(_other); + assert(nodeSize < 256); + out.append_byte(nodeSize); + out.append_uleb128(_flags); + out.append_uleb128(_address); + out.append_uleb128(_other); + } else { + // Nodes with export info: size, flags, address + uint32_t nodeSize = llvm::getULEB128Size(_flags) + + llvm::getULEB128Size(_address); + assert(nodeSize < 256); + out.append_byte(nodeSize); + out.append_uleb128(_flags); + out.append_uleb128(_address); + } + } else { + // Node with no export info. + uint32_t nodeSize = 0; + out.append_byte(nodeSize); + } + // Add number of children. + assert(_children.size() < 256); + out.append_byte(_children.size()); + // Append each child edge substring and node offset. + for (TrieEdge &edge : _children) { + out.append_string(edge._subString); + out.append_uleb128(edge._child->_trieOffset); + } +} + +void MachOFileLayout::buildExportTrie() { + if (_file.exportInfo.empty()) + return; + + // For all temporary strings and objects used building trie. + BumpPtrAllocator allocator; + + // Build trie of all exported symbols. + auto *rootNode = new (allocator) TrieNode(StringRef()); + std::vector allNodes; + allNodes.reserve(_file.exportInfo.size()*2); + allNodes.push_back(rootNode); + for (const Export& entry : _file.exportInfo) { + rootNode->addSymbol(entry, allocator, allNodes); + } + + std::vector orderedNodes; + orderedNodes.reserve(allNodes.size()); + + for (const Export& entry : _file.exportInfo) + rootNode->addOrderedNodes(entry, orderedNodes); + + // Assign each node in the vector an offset in the trie stream, iterating + // until all uleb128 sizes have stabilized. + bool more; + do { + uint32_t offset = 0; + more = false; + for (TrieNode* node : orderedNodes) { + if (node->updateOffset(offset)) + more = true; + } + } while (more); + + // Serialize trie to ByteBuffer. + for (TrieNode* node : orderedNodes) { + node->appendToByteBuffer(_exportTrie); + } + _exportTrie.align(_is64 ? 8 : 4); +} + +void MachOFileLayout::computeSymbolTableSizes() { + // MachO symbol tables have three ranges: locals, globals, and undefines + const size_t nlistSize = (_is64 ? sizeof(nlist_64) : sizeof(nlist)); + _symbolTableSize = nlistSize * (_file.stabsSymbols.size() + + _file.localSymbols.size() + + _file.globalSymbols.size() + + _file.undefinedSymbols.size()); + // Always reserve 1-byte for the empty string and 1-byte for its terminator. + _symbolStringPoolSize = 2; + for (const Symbol &sym : _file.stabsSymbols) { + _symbolStringPoolSize += (sym.name.size()+1); + } + for (const Symbol &sym : _file.localSymbols) { + _symbolStringPoolSize += (sym.name.size()+1); + } + for (const Symbol &sym : _file.globalSymbols) { + _symbolStringPoolSize += (sym.name.size()+1); + } + for (const Symbol &sym : _file.undefinedSymbols) { + _symbolStringPoolSize += (sym.name.size()+1); + } + _symbolTableLocalsStartIndex = 0; + _symbolTableGlobalsStartIndex = _file.stabsSymbols.size() + + _file.localSymbols.size(); + _symbolTableUndefinesStartIndex = _symbolTableGlobalsStartIndex + + _file.globalSymbols.size(); + + _indirectSymbolTableCount = 0; + for (const Section § : _file.sections) { + _indirectSymbolTableCount += sect.indirectSymbols.size(); + } +} + +void MachOFileLayout::computeFunctionStartsSize() { + _functionStartsSize = _file.functionStarts.size(); +} + +void MachOFileLayout::computeDataInCodeSize() { + _dataInCodeSize = _file.dataInCode.size() * sizeof(data_in_code_entry); +} + +void MachOFileLayout::writeLinkEditContent() { + if (_file.fileType == llvm::MachO::MH_OBJECT) { + writeRelocations(); + writeFunctionStartsInfo(); + writeDataInCodeInfo(); + writeSymbolTable(); + } else { + writeRebaseInfo(); + writeBindingInfo(); + writeLazyBindingInfo(); + // TODO: add weak binding info + writeExportInfo(); + writeFunctionStartsInfo(); + writeDataInCodeInfo(); + writeSymbolTable(); + } +} + +llvm::Error MachOFileLayout::writeBinary(StringRef path) { + // Check for pending error from constructor. + if (_ec) + return llvm::errorCodeToError(_ec); + // Create FileOutputBuffer with calculated size. + unsigned flags = 0; + if (_file.fileType != llvm::MachO::MH_OBJECT) + flags = llvm::FileOutputBuffer::F_executable; + Expected> fobOrErr = + llvm::FileOutputBuffer::create(path, size(), flags); + if (Error E = fobOrErr.takeError()) + return E; + std::unique_ptr &fob = *fobOrErr; + // Write content. + _buffer = fob->getBufferStart(); + writeMachHeader(); + if (auto ec = writeLoadCommands()) + return ec; + writeSectionContent(); + writeLinkEditContent(); + if (Error E = fob->commit()) + return E; + + return llvm::Error::success(); +} + +/// Takes in-memory normalized view and writes a mach-o object file. +llvm::Error writeBinary(const NormalizedFile &file, StringRef path) { + MachOFileLayout layout(file, false); + return layout.writeBinary(path); +} + +} // namespace normalized +} // namespace mach_o +} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp new file mode 100644 index 000000000000..ddfd1764f7e1 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp @@ -0,0 +1,1657 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// +/// \file Converts from in-memory Atoms to in-memory normalized mach-o. +/// +/// +------------+ +/// | normalized | +/// +------------+ +/// ^ +/// | +/// | +/// +-------+ +/// | Atoms | +/// +-------+ + +#include "ArchHandler.h" +#include "DebugInfo.h" +#include "MachONormalizedFile.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "lld/Common/LLVM.h" +#include "lld/Core/Error.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include +#include +#include + +using llvm::StringRef; +using llvm::isa; +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; +using namespace lld; + +namespace { + +struct AtomInfo { + const DefinedAtom *atom; + uint64_t offsetInSection; +}; + +struct SectionInfo { + SectionInfo(StringRef seg, StringRef sect, SectionType type, + const MachOLinkingContext &ctxt, uint32_t attr, + bool relocsToDefinedCanBeImplicit); + + StringRef segmentName; + StringRef sectionName; + SectionType type; + uint32_t attributes; + uint64_t address; + uint64_t size; + uint16_t alignment; + + /// If this is set, the any relocs in this section which point to defined + /// addresses can be implicitly generated. This is the case for the + /// __eh_frame section where references to the function can be implicit if the + /// function is defined. + bool relocsToDefinedCanBeImplicit; + + + std::vector atomsAndOffsets; + uint32_t normalizedSectionIndex; + uint32_t finalSectionIndex; +}; + +SectionInfo::SectionInfo(StringRef sg, StringRef sct, SectionType t, + const MachOLinkingContext &ctxt, uint32_t attrs, + bool relocsToDefinedCanBeImplicit) + : segmentName(sg), sectionName(sct), type(t), attributes(attrs), + address(0), size(0), alignment(1), + relocsToDefinedCanBeImplicit(relocsToDefinedCanBeImplicit), + normalizedSectionIndex(0), finalSectionIndex(0) { + uint16_t align = 1; + if (ctxt.sectionAligned(segmentName, sectionName, align)) { + alignment = align; + } +} + +struct SegmentInfo { + SegmentInfo(StringRef name); + + StringRef name; + uint64_t address; + uint64_t size; + uint32_t init_access; + uint32_t max_access; + std::vector sections; + uint32_t normalizedSegmentIndex; +}; + +SegmentInfo::SegmentInfo(StringRef n) + : name(n), address(0), size(0), init_access(0), max_access(0), + normalizedSegmentIndex(0) { +} + +class Util { +public: + Util(const MachOLinkingContext &ctxt) + : _ctx(ctxt), _archHandler(ctxt.archHandler()), _entryAtom(nullptr), + _hasTLVDescriptors(false), _subsectionsViaSymbols(true) {} + ~Util(); + + void processDefinedAtoms(const lld::File &atomFile); + void processAtomAttributes(const DefinedAtom *atom); + void assignAtomToSection(const DefinedAtom *atom); + void organizeSections(); + void assignAddressesToSections(const NormalizedFile &file); + uint32_t fileFlags(); + void copySegmentInfo(NormalizedFile &file); + void copySectionInfo(NormalizedFile &file); + void updateSectionInfo(NormalizedFile &file); + void buildAtomToAddressMap(); + llvm::Error synthesizeDebugNotes(NormalizedFile &file); + llvm::Error addSymbols(const lld::File &atomFile, NormalizedFile &file); + void addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file); + void addRebaseAndBindingInfo(const lld::File &, NormalizedFile &file); + void addExportInfo(const lld::File &, NormalizedFile &file); + void addSectionRelocs(const lld::File &, NormalizedFile &file); + void addFunctionStarts(const lld::File &, NormalizedFile &file); + void buildDataInCodeArray(const lld::File &, NormalizedFile &file); + void addDependentDylibs(const lld::File &, NormalizedFile &file); + void copyEntryPointAddress(NormalizedFile &file); + void copySectionContent(NormalizedFile &file); + + bool allSourceFilesHaveMinVersions() const { + return _allSourceFilesHaveMinVersions; + } + + uint32_t minVersion() const { + return _minVersion; + } + + LoadCommandType minVersionCommandType() const { + return _minVersionCommandType; + } + +private: + typedef std::map TypeToSection; + typedef llvm::DenseMap AtomToAddress; + + struct DylibInfo { int ordinal; bool hasWeak; bool hasNonWeak; }; + typedef llvm::StringMap DylibPathToInfo; + + SectionInfo *sectionForAtom(const DefinedAtom*); + SectionInfo *getRelocatableSection(DefinedAtom::ContentType type); + SectionInfo *getFinalSection(DefinedAtom::ContentType type); + void appendAtom(SectionInfo *sect, const DefinedAtom *atom); + SegmentInfo *segmentForName(StringRef segName); + void layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr); + void layoutSectionsInTextSegment(size_t, SegmentInfo *, uint64_t &); + void copySectionContent(SectionInfo *si, ContentBytes &content); + uint16_t descBits(const DefinedAtom* atom); + int dylibOrdinal(const SharedLibraryAtom *sa); + void segIndexForSection(const SectionInfo *sect, + uint8_t &segmentIndex, uint64_t &segmentStartAddr); + const Atom *targetOfLazyPointer(const DefinedAtom *lpAtom); + const Atom *targetOfStub(const DefinedAtom *stubAtom); + llvm::Error getSymbolTableRegion(const DefinedAtom* atom, + bool &inGlobalsRegion, + SymbolScope &symbolScope); + void appendSection(SectionInfo *si, NormalizedFile &file); + uint32_t sectionIndexForAtom(const Atom *atom); + void fixLazyReferenceImm(const DefinedAtom *atom, uint32_t offset, + NormalizedFile &file); + + typedef llvm::DenseMap AtomToIndex; + struct AtomAndIndex { const Atom *atom; uint32_t index; SymbolScope scope; }; + struct AtomSorter { + bool operator()(const AtomAndIndex &left, const AtomAndIndex &right); + }; + struct SegmentSorter { + bool operator()(const SegmentInfo *left, const SegmentInfo *right); + static unsigned weight(const SegmentInfo *); + }; + struct TextSectionSorter { + bool operator()(const SectionInfo *left, const SectionInfo *right); + static unsigned weight(const SectionInfo *); + }; + + const MachOLinkingContext &_ctx; + mach_o::ArchHandler &_archHandler; + llvm::BumpPtrAllocator _allocator; + std::vector _sectionInfos; + std::vector _segmentInfos; + TypeToSection _sectionMap; + std::vector _customSections; + AtomToAddress _atomToAddress; + DylibPathToInfo _dylibInfo; + const DefinedAtom *_entryAtom; + AtomToIndex _atomToSymbolIndex; + std::vector _machHeaderAliasAtoms; + bool _hasTLVDescriptors; + bool _subsectionsViaSymbols; + bool _allSourceFilesHaveMinVersions = true; + LoadCommandType _minVersionCommandType = (LoadCommandType)0; + uint32_t _minVersion = 0; + std::vector _stabs; +}; + +Util::~Util() { + // The SectionInfo structs are BumpPtr allocated, but atomsAndOffsets needs + // to be deleted. + for (SectionInfo *si : _sectionInfos) { + // clear() destroys vector elements, but does not deallocate. + // Instead use swap() to deallocate vector buffer. + std::vector empty; + si->atomsAndOffsets.swap(empty); + } + // The SegmentInfo structs are BumpPtr allocated, but sections needs + // to be deleted. + for (SegmentInfo *sgi : _segmentInfos) { + std::vector empty2; + sgi->sections.swap(empty2); + } +} + +SectionInfo *Util::getRelocatableSection(DefinedAtom::ContentType type) { + StringRef segmentName; + StringRef sectionName; + SectionType sectionType; + SectionAttr sectionAttrs; + bool relocsToDefinedCanBeImplicit; + + // Use same table used by when parsing .o files. + relocatableSectionInfoForContentType(type, segmentName, sectionName, + sectionType, sectionAttrs, + relocsToDefinedCanBeImplicit); + // If we already have a SectionInfo with this name, re-use it. + // This can happen if two ContentType map to the same mach-o section. + for (auto sect : _sectionMap) { + if (sect.second->sectionName.equals(sectionName) && + sect.second->segmentName.equals(segmentName)) { + return sect.second; + } + } + // Otherwise allocate new SectionInfo object. + auto *sect = new (_allocator) + SectionInfo(segmentName, sectionName, sectionType, _ctx, sectionAttrs, + relocsToDefinedCanBeImplicit); + _sectionInfos.push_back(sect); + _sectionMap[type] = sect; + return sect; +} + +#define ENTRY(seg, sect, type, atomType) \ + {seg, sect, type, DefinedAtom::atomType } + +struct MachOFinalSectionFromAtomType { + StringRef segmentName; + StringRef sectionName; + SectionType sectionType; + DefinedAtom::ContentType atomType; +}; + +const MachOFinalSectionFromAtomType sectsToAtomType[] = { + ENTRY("__TEXT", "__text", S_REGULAR, typeCode), + ENTRY("__TEXT", "__text", S_REGULAR, typeMachHeader), + ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString), + ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String), + ENTRY("__TEXT", "__const", S_REGULAR, typeConstant), + ENTRY("__TEXT", "__const", S_4BYTE_LITERALS, typeLiteral4), + ENTRY("__TEXT", "__const", S_8BYTE_LITERALS, typeLiteral8), + ENTRY("__TEXT", "__const", S_16BYTE_LITERALS, typeLiteral16), + ENTRY("__TEXT", "__stubs", S_SYMBOL_STUBS, typeStub), + ENTRY("__TEXT", "__stub_helper", S_REGULAR, typeStubHelper), + ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA), + ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI), + ENTRY("__TEXT", "__unwind_info", S_REGULAR, typeProcessedUnwindInfo), + ENTRY("__DATA", "__data", S_REGULAR, typeData), + ENTRY("__DATA", "__const", S_REGULAR, typeConstData), + ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString), + ENTRY("__DATA", "__la_symbol_ptr", S_LAZY_SYMBOL_POINTERS, + typeLazyPointer), + ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS, + typeInitializerPtr), + ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS, + typeTerminatorPtr), + ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS, + typeGOT), + ENTRY("__DATA", "__nl_symbol_ptr", S_NON_LAZY_SYMBOL_POINTERS, + typeNonLazyPointer), + ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES, + typeThunkTLV), + ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, + typeTLVInitialData), + ENTRY("__DATA", "__thread_ptrs", S_THREAD_LOCAL_VARIABLE_POINTERS, + typeTLVInitializerPtr), + ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL, + typeTLVInitialZeroFill), + ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill), + ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples), +}; +#undef ENTRY + +SectionInfo *Util::getFinalSection(DefinedAtom::ContentType atomType) { + for (auto &p : sectsToAtomType) { + if (p.atomType != atomType) + continue; + SectionAttr sectionAttrs = 0; + switch (atomType) { + case DefinedAtom::typeMachHeader: + case DefinedAtom::typeCode: + case DefinedAtom::typeStub: + case DefinedAtom::typeStubHelper: + sectionAttrs = S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS; + break; + case DefinedAtom::typeThunkTLV: + _hasTLVDescriptors = true; + break; + default: + break; + } + // If we already have a SectionInfo with this name, re-use it. + // This can happen if two ContentType map to the same mach-o section. + for (auto sect : _sectionMap) { + if (sect.second->sectionName.equals(p.sectionName) && + sect.second->segmentName.equals(p.segmentName)) { + return sect.second; + } + } + // Otherwise allocate new SectionInfo object. + auto *sect = new (_allocator) SectionInfo( + p.segmentName, p.sectionName, p.sectionType, _ctx, sectionAttrs, + /* relocsToDefinedCanBeImplicit */ false); + _sectionInfos.push_back(sect); + _sectionMap[atomType] = sect; + return sect; + } + llvm_unreachable("content type not yet supported"); +} + +SectionInfo *Util::sectionForAtom(const DefinedAtom *atom) { + if (atom->sectionChoice() == DefinedAtom::sectionBasedOnContent) { + // Section for this atom is derived from content type. + DefinedAtom::ContentType type = atom->contentType(); + auto pos = _sectionMap.find(type); + if ( pos != _sectionMap.end() ) + return pos->second; + bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); + return rMode ? getRelocatableSection(type) : getFinalSection(type); + } else { + // This atom needs to be in a custom section. + StringRef customName = atom->customSectionName(); + // Look to see if we have already allocated the needed custom section. + for(SectionInfo *sect : _customSections) { + const DefinedAtom *firstAtom = sect->atomsAndOffsets.front().atom; + if (firstAtom->customSectionName().equals(customName)) { + return sect; + } + } + // Not found, so need to create a new custom section. + size_t seperatorIndex = customName.find('/'); + assert(seperatorIndex != StringRef::npos); + StringRef segName = customName.slice(0, seperatorIndex); + StringRef sectName = customName.drop_front(seperatorIndex + 1); + auto *sect = + new (_allocator) SectionInfo(segName, sectName, S_REGULAR, _ctx, + 0, /* relocsToDefinedCanBeImplicit */ false); + _customSections.push_back(sect); + _sectionInfos.push_back(sect); + return sect; + } +} + +void Util::appendAtom(SectionInfo *sect, const DefinedAtom *atom) { + // Figure out offset for atom in this section given alignment constraints. + uint64_t offset = sect->size; + DefinedAtom::Alignment atomAlign = atom->alignment(); + uint64_t align = atomAlign.value; + uint64_t requiredModulus = atomAlign.modulus; + uint64_t currentModulus = (offset % align); + if ( currentModulus != requiredModulus ) { + if ( requiredModulus > currentModulus ) + offset += requiredModulus-currentModulus; + else + offset += align+requiredModulus-currentModulus; + } + // Record max alignment of any atom in this section. + if (align > sect->alignment) + sect->alignment = atomAlign.value; + // Assign atom to this section with this offset. + AtomInfo ai = {atom, offset}; + sect->atomsAndOffsets.push_back(ai); + // Update section size to include this atom. + sect->size = offset + atom->size(); +} + +void Util::processDefinedAtoms(const lld::File &atomFile) { + for (const DefinedAtom *atom : atomFile.defined()) { + processAtomAttributes(atom); + assignAtomToSection(atom); + } +} + +void Util::processAtomAttributes(const DefinedAtom *atom) { + if (auto *machoFile = dyn_cast(&atom->file())) { + // If the file doesn't use subsections via symbols, then make sure we don't + // add that flag to the final output file if we have a relocatable file. + if (!machoFile->subsectionsViaSymbols()) + _subsectionsViaSymbols = false; + + // All the source files must have min versions for us to output an object + // file with a min version. + if (auto v = machoFile->minVersion()) + _minVersion = std::max(_minVersion, v); + else + _allSourceFilesHaveMinVersions = false; + + // If we don't have a platform load command, but one of the source files + // does, then take the one from the file. + if (!_minVersionCommandType) + if (auto v = machoFile->minVersionLoadCommandKind()) + _minVersionCommandType = v; + } +} + +void Util::assignAtomToSection(const DefinedAtom *atom) { + if (atom->contentType() == DefinedAtom::typeMachHeader) { + _machHeaderAliasAtoms.push_back(atom); + // Assign atom to this section with this offset. + AtomInfo ai = {atom, 0}; + sectionForAtom(atom)->atomsAndOffsets.push_back(ai); + } else if (atom->contentType() == DefinedAtom::typeDSOHandle) + _machHeaderAliasAtoms.push_back(atom); + else + appendAtom(sectionForAtom(atom), atom); +} + +SegmentInfo *Util::segmentForName(StringRef segName) { + for (SegmentInfo *si : _segmentInfos) { + if ( si->name.equals(segName) ) + return si; + } + auto *info = new (_allocator) SegmentInfo(segName); + + // Set the initial segment protection. + if (segName.equals("__TEXT")) + info->init_access = VM_PROT_READ | VM_PROT_EXECUTE; + else if (segName.equals("__PAGEZERO")) + info->init_access = 0; + else if (segName.equals("__LINKEDIT")) + info->init_access = VM_PROT_READ; + else { + // All others default to read-write + info->init_access = VM_PROT_READ | VM_PROT_WRITE; + } + + // Set max segment protection + // Note, its overkill to use a switch statement here, but makes it so much + // easier to use switch coverage to catch new cases. + switch (_ctx.os()) { + case lld::MachOLinkingContext::OS::unknown: + case lld::MachOLinkingContext::OS::macOSX: + case lld::MachOLinkingContext::OS::iOS_simulator: + if (segName.equals("__PAGEZERO")) { + info->max_access = 0; + break; + } + // All others default to all + info->max_access = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; + break; + case lld::MachOLinkingContext::OS::iOS: + // iPhoneOS always uses same protection for max and initial + info->max_access = info->init_access; + break; + } + _segmentInfos.push_back(info); + return info; +} + +unsigned Util::SegmentSorter::weight(const SegmentInfo *seg) { + return llvm::StringSwitch(seg->name) + .Case("__PAGEZERO", 1) + .Case("__TEXT", 2) + .Case("__DATA", 3) + .Default(100); +} + +bool Util::SegmentSorter::operator()(const SegmentInfo *left, + const SegmentInfo *right) { + return (weight(left) < weight(right)); +} + +unsigned Util::TextSectionSorter::weight(const SectionInfo *sect) { + return llvm::StringSwitch(sect->sectionName) + .Case("__text", 1) + .Case("__stubs", 2) + .Case("__stub_helper", 3) + .Case("__const", 4) + .Case("__cstring", 5) + .Case("__unwind_info", 98) + .Case("__eh_frame", 99) + .Default(10); +} + +bool Util::TextSectionSorter::operator()(const SectionInfo *left, + const SectionInfo *right) { + return (weight(left) < weight(right)); +} + +void Util::organizeSections() { + // NOTE!: Keep this in sync with assignAddressesToSections. + switch (_ctx.outputMachOType()) { + case llvm::MachO::MH_EXECUTE: + // Main executables, need a zero-page segment + segmentForName("__PAGEZERO"); + // Fall into next case. + LLVM_FALLTHROUGH; + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + // All dynamic code needs TEXT segment to hold the load commands. + segmentForName("__TEXT"); + break; + default: + break; + } + segmentForName("__LINKEDIT"); + + // Group sections into segments. + for (SectionInfo *si : _sectionInfos) { + SegmentInfo *seg = segmentForName(si->segmentName); + seg->sections.push_back(si); + } + // Sort segments. + std::sort(_segmentInfos.begin(), _segmentInfos.end(), SegmentSorter()); + + // Sort sections within segments. + for (SegmentInfo *seg : _segmentInfos) { + if (seg->name.equals("__TEXT")) { + std::sort(seg->sections.begin(), seg->sections.end(), + TextSectionSorter()); + } + } + + // Record final section indexes. + uint32_t segmentIndex = 0; + uint32_t sectionIndex = 1; + for (SegmentInfo *seg : _segmentInfos) { + seg->normalizedSegmentIndex = segmentIndex++; + for (SectionInfo *sect : seg->sections) + sect->finalSectionIndex = sectionIndex++; + } +} + +void Util::layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr) { + seg->address = addr; + for (SectionInfo *sect : seg->sections) { + sect->address = llvm::alignTo(addr, sect->alignment); + addr = sect->address + sect->size; + } + seg->size = llvm::alignTo(addr - seg->address, _ctx.pageSize()); +} + +// __TEXT segment lays out backwards so padding is at front after load commands. +void Util::layoutSectionsInTextSegment(size_t hlcSize, SegmentInfo *seg, + uint64_t &addr) { + seg->address = addr; + // Walks sections starting at end to calculate padding for start. + int64_t taddr = 0; + for (auto it = seg->sections.rbegin(); it != seg->sections.rend(); ++it) { + SectionInfo *sect = *it; + taddr -= sect->size; + taddr = taddr & (0 - sect->alignment); + } + int64_t padding = taddr - hlcSize; + while (padding < 0) + padding += _ctx.pageSize(); + // Start assigning section address starting at padded offset. + addr += (padding + hlcSize); + for (SectionInfo *sect : seg->sections) { + sect->address = llvm::alignTo(addr, sect->alignment); + addr = sect->address + sect->size; + } + seg->size = llvm::alignTo(addr - seg->address, _ctx.pageSize()); +} + +void Util::assignAddressesToSections(const NormalizedFile &file) { + // NOTE!: Keep this in sync with organizeSections. + size_t hlcSize = headerAndLoadCommandsSize(file, + _ctx.generateFunctionStartsLoadCommand()); + uint64_t address = 0; + for (SegmentInfo *seg : _segmentInfos) { + if (seg->name.equals("__PAGEZERO")) { + seg->size = _ctx.pageZeroSize(); + address += seg->size; + } + else if (seg->name.equals("__TEXT")) { + // _ctx.baseAddress() == 0 implies it was either unspecified or + // pageZeroSize is also 0. In either case resetting address is safe. + address = _ctx.baseAddress() ? _ctx.baseAddress() : address; + layoutSectionsInTextSegment(hlcSize, seg, address); + } else + layoutSectionsInSegment(seg, address); + + address = llvm::alignTo(address, _ctx.pageSize()); + } + DEBUG_WITH_TYPE("WriterMachO-norm", + llvm::dbgs() << "assignAddressesToSections()\n"; + for (SegmentInfo *sgi : _segmentInfos) { + llvm::dbgs() << " address=" << llvm::format("0x%08llX", sgi->address) + << ", size=" << llvm::format("0x%08llX", sgi->size) + << ", segment-name='" << sgi->name + << "'\n"; + for (SectionInfo *si : sgi->sections) { + llvm::dbgs()<< " addr=" << llvm::format("0x%08llX", si->address) + << ", size=" << llvm::format("0x%08llX", si->size) + << ", section-name='" << si->sectionName + << "\n"; + } + } + ); +} + +void Util::copySegmentInfo(NormalizedFile &file) { + for (SegmentInfo *sgi : _segmentInfos) { + Segment seg; + seg.name = sgi->name; + seg.address = sgi->address; + seg.size = sgi->size; + seg.init_access = sgi->init_access; + seg.max_access = sgi->max_access; + file.segments.push_back(seg); + } +} + +void Util::appendSection(SectionInfo *si, NormalizedFile &file) { + // Add new empty section to end of file.sections. + Section temp; + file.sections.push_back(std::move(temp)); + Section* normSect = &file.sections.back(); + // Copy fields to normalized section. + normSect->segmentName = si->segmentName; + normSect->sectionName = si->sectionName; + normSect->type = si->type; + normSect->attributes = si->attributes; + normSect->address = si->address; + normSect->alignment = si->alignment; + // Record where normalized section is. + si->normalizedSectionIndex = file.sections.size()-1; +} + +void Util::copySectionContent(NormalizedFile &file) { + const bool r = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); + + // Utility function for ArchHandler to find address of atom in output file. + auto addrForAtom = [&] (const Atom &atom) -> uint64_t { + auto pos = _atomToAddress.find(&atom); + assert(pos != _atomToAddress.end()); + return pos->second; + }; + + auto sectionAddrForAtom = [&] (const Atom &atom) -> uint64_t { + for (const SectionInfo *sectInfo : _sectionInfos) + for (const AtomInfo &atomInfo : sectInfo->atomsAndOffsets) + if (atomInfo.atom == &atom) + return sectInfo->address; + llvm_unreachable("atom not assigned to section"); + }; + + for (SectionInfo *si : _sectionInfos) { + Section *normSect = &file.sections[si->normalizedSectionIndex]; + if (isZeroFillSection(si->type)) { + const uint8_t *empty = nullptr; + normSect->content = llvm::makeArrayRef(empty, si->size); + continue; + } + // Copy content from atoms to content buffer for section. + llvm::MutableArrayRef sectionContent; + if (si->size) { + uint8_t *sectContent = file.ownedAllocations.Allocate(si->size); + sectionContent = llvm::MutableArrayRef(sectContent, si->size); + normSect->content = sectionContent; + } + for (AtomInfo &ai : si->atomsAndOffsets) { + if (!ai.atom->size()) { + assert(ai.atom->begin() == ai.atom->end() && + "Cannot have references without content"); + continue; + } + auto atomContent = sectionContent.slice(ai.offsetInSection, + ai.atom->size()); + _archHandler.generateAtomContent(*ai.atom, r, addrForAtom, + sectionAddrForAtom, _ctx.baseAddress(), + atomContent); + } + } +} + +void Util::copySectionInfo(NormalizedFile &file) { + file.sections.reserve(_sectionInfos.size()); + // Write sections grouped by segment. + for (SegmentInfo *sgi : _segmentInfos) { + for (SectionInfo *si : sgi->sections) { + appendSection(si, file); + } + } +} + +void Util::updateSectionInfo(NormalizedFile &file) { + file.sections.reserve(_sectionInfos.size()); + // sections grouped by segment. + for (SegmentInfo *sgi : _segmentInfos) { + Segment *normSeg = &file.segments[sgi->normalizedSegmentIndex]; + normSeg->address = sgi->address; + normSeg->size = sgi->size; + for (SectionInfo *si : sgi->sections) { + Section *normSect = &file.sections[si->normalizedSectionIndex]; + normSect->address = si->address; + } + } +} + +void Util::copyEntryPointAddress(NormalizedFile &nFile) { + if (!_entryAtom) { + nFile.entryAddress = 0; + return; + } + + if (_ctx.outputTypeHasEntry()) { + if (_archHandler.isThumbFunction(*_entryAtom)) + nFile.entryAddress = (_atomToAddress[_entryAtom] | 1); + else + nFile.entryAddress = _atomToAddress[_entryAtom]; + } +} + +void Util::buildAtomToAddressMap() { + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << "assign atom addresses:\n"); + const bool lookForEntry = _ctx.outputTypeHasEntry(); + for (SectionInfo *sect : _sectionInfos) { + for (const AtomInfo &info : sect->atomsAndOffsets) { + _atomToAddress[info.atom] = sect->address + info.offsetInSection; + if (lookForEntry && (info.atom->contentType() == DefinedAtom::typeCode) && + (info.atom->size() != 0) && + info.atom->name() == _ctx.entrySymbolName()) { + _entryAtom = info.atom; + } + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << " address=" + << llvm::format("0x%016X", _atomToAddress[info.atom]) + << llvm::format(" 0x%09lX", info.atom) + << ", file=#" + << info.atom->file().ordinal() + << ", atom=#" + << info.atom->ordinal() + << ", name=" + << info.atom->name() + << ", type=" + << info.atom->contentType() + << "\n"); + } + } + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << "assign header alias atom addresses:\n"); + for (const Atom *atom : _machHeaderAliasAtoms) { + _atomToAddress[atom] = _ctx.baseAddress(); +#ifndef NDEBUG + if (auto *definedAtom = dyn_cast(atom)) { + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << " address=" + << llvm::format("0x%016X", _atomToAddress[atom]) + << llvm::format(" 0x%09lX", atom) + << ", file=#" + << definedAtom->file().ordinal() + << ", atom=#" + << definedAtom->ordinal() + << ", name=" + << definedAtom->name() + << ", type=" + << definedAtom->contentType() + << "\n"); + } else { + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << " address=" + << llvm::format("0x%016X", _atomToAddress[atom]) + << " atom=" << atom + << " name=" << atom->name() << "\n"); + } +#endif + } +} + +llvm::Error Util::synthesizeDebugNotes(NormalizedFile &file) { + + // Bail out early if we don't need to generate a debug map. + if (_ctx.debugInfoMode() == MachOLinkingContext::DebugInfoMode::noDebugMap) + return llvm::Error::success(); + + std::vector atomsNeedingDebugNotes; + std::set filesWithStabs; + bool objFileHasDwarf = false; + const File *objFile = nullptr; + + for (SectionInfo *sect : _sectionInfos) { + for (const AtomInfo &info : sect->atomsAndOffsets) { + if (const DefinedAtom *atom = dyn_cast(info.atom)) { + + // FIXME: No stabs/debug-notes for symbols that wouldn't be in the + // symbol table. + // FIXME: No stabs/debug-notes for kernel dtrace probes. + + if (atom->contentType() == DefinedAtom::typeCFI || + atom->contentType() == DefinedAtom::typeCString) + continue; + + // Whenever we encounter a new file, update the 'objfileHasDwarf' flag. + if (&info.atom->file() != objFile) { + objFileHasDwarf = false; + if (const mach_o::MachOFile *atomFile = + dyn_cast(&info.atom->file())) { + if (atomFile->debugInfo()) { + if (isa(atomFile->debugInfo())) + objFileHasDwarf = true; + else if (isa(atomFile->debugInfo())) + filesWithStabs.insert(atomFile); + } + } + } + + // If this atom is from a file that needs dwarf, add it to the list. + if (objFileHasDwarf) + atomsNeedingDebugNotes.push_back(info.atom); + } + } + } + + // Sort atoms needing debug notes by file ordinal, then atom ordinal. + std::sort(atomsNeedingDebugNotes.begin(), atomsNeedingDebugNotes.end(), + [](const DefinedAtom *lhs, const DefinedAtom *rhs) { + if (lhs->file().ordinal() != rhs->file().ordinal()) + return (lhs->file().ordinal() < rhs->file().ordinal()); + return (lhs->ordinal() < rhs->ordinal()); + }); + + // FIXME: Handle : Add -add_ast_path option to \ + // linker which add N_AST stab entry to output + // See OutputFile::synthesizeDebugNotes in ObjectFile.cpp in ld64. + + StringRef oldFileName = ""; + StringRef oldDirPath = ""; + bool wroteStartSO = false; + std::unordered_set seenFiles; + for (const DefinedAtom *atom : atomsNeedingDebugNotes) { + const auto &atomFile = cast(atom->file()); + assert(dyn_cast_or_null(atomFile.debugInfo()) + && "file for atom needing debug notes does not contain dwarf"); + auto &dwarf = cast(*atomFile.debugInfo()); + + auto &tu = dwarf.translationUnitSource(); + StringRef newFileName = tu.name; + StringRef newDirPath = tu.path; + + // Add an SO whenever the TU source file changes. + if (newFileName != oldFileName || newDirPath != oldDirPath) { + // Translation unit change, emit ending SO + if (oldFileName != "") + _stabs.push_back(mach_o::Stab(nullptr, N_SO, 1, 0, 0, "")); + + oldFileName = newFileName; + oldDirPath = newDirPath; + + // If newDirPath doesn't end with a '/' we need to add one: + if (newDirPath.back() != '/') { + char *p = + file.ownedAllocations.Allocate(newDirPath.size() + 2); + memcpy(p, newDirPath.data(), newDirPath.size()); + p[newDirPath.size()] = '/'; + p[newDirPath.size() + 1] = '\0'; + newDirPath = p; + } + + // New translation unit, emit start SOs: + _stabs.push_back(mach_o::Stab(nullptr, N_SO, 0, 0, 0, newDirPath)); + _stabs.push_back(mach_o::Stab(nullptr, N_SO, 0, 0, 0, newFileName)); + + // Synthesize OSO for start of file. + char *fullPath = nullptr; + { + SmallString<1024> pathBuf(atomFile.path()); + if (auto EC = llvm::sys::fs::make_absolute(pathBuf)) + return llvm::errorCodeToError(EC); + fullPath = file.ownedAllocations.Allocate(pathBuf.size() + 1); + memcpy(fullPath, pathBuf.c_str(), pathBuf.size() + 1); + } + + // Get mod time. + uint32_t modTime = 0; + llvm::sys::fs::file_status stat; + if (!llvm::sys::fs::status(fullPath, stat)) + if (llvm::sys::fs::exists(stat)) + modTime = llvm::sys::toTimeT(stat.getLastModificationTime()); + + _stabs.push_back(mach_o::Stab(nullptr, N_OSO, _ctx.getCPUSubType(), 1, + modTime, fullPath)); + // linker should put cpusubtype in n_sect field + // of nlist entry for N_OSO debug note entries. + wroteStartSO = true; + } + + if (atom->contentType() == DefinedAtom::typeCode) { + // Synthesize BNSYM and start FUN stabs. + _stabs.push_back(mach_o::Stab(atom, N_BNSYM, 1, 0, 0, "")); + _stabs.push_back(mach_o::Stab(atom, N_FUN, 1, 0, 0, atom->name())); + // Synthesize any SOL stabs needed + // FIXME: add SOL stabs. + _stabs.push_back(mach_o::Stab(nullptr, N_FUN, 0, 0, + atom->rawContent().size(), "")); + _stabs.push_back(mach_o::Stab(nullptr, N_ENSYM, 1, 0, + atom->rawContent().size(), "")); + } else { + if (atom->scope() == Atom::scopeTranslationUnit) + _stabs.push_back(mach_o::Stab(atom, N_STSYM, 1, 0, 0, atom->name())); + else + _stabs.push_back(mach_o::Stab(nullptr, N_GSYM, 1, 0, 0, atom->name())); + } + } + + // Emit ending SO if necessary. + if (wroteStartSO) + _stabs.push_back(mach_o::Stab(nullptr, N_SO, 1, 0, 0, "")); + + // Copy any stabs from .o file. + for (const auto *objFile : filesWithStabs) { + const auto &stabsList = + cast(objFile->debugInfo())->stabs(); + for (auto &stab : stabsList) { + // FIXME: Drop stabs whose atoms have been dead-stripped. + _stabs.push_back(stab); + } + } + + return llvm::Error::success(); +} + +uint16_t Util::descBits(const DefinedAtom* atom) { + uint16_t desc = 0; + switch (atom->merge()) { + case lld::DefinedAtom::mergeNo: + case lld::DefinedAtom::mergeAsTentative: + break; + case lld::DefinedAtom::mergeAsWeak: + case lld::DefinedAtom::mergeAsWeakAndAddressUsed: + desc |= N_WEAK_DEF; + break; + case lld::DefinedAtom::mergeSameNameAndSize: + case lld::DefinedAtom::mergeByLargestSection: + case lld::DefinedAtom::mergeByContent: + llvm_unreachable("Unsupported DefinedAtom::merge()"); + break; + } + if (atom->contentType() == lld::DefinedAtom::typeResolver) + desc |= N_SYMBOL_RESOLVER; + if (atom->contentType() == lld::DefinedAtom::typeMachHeader) + desc |= REFERENCED_DYNAMICALLY; + if (_archHandler.isThumbFunction(*atom)) + desc |= N_ARM_THUMB_DEF; + if (atom->deadStrip() == DefinedAtom::deadStripNever && + _ctx.outputMachOType() == llvm::MachO::MH_OBJECT) { + if ((atom->contentType() != DefinedAtom::typeInitializerPtr) + && (atom->contentType() != DefinedAtom::typeTerminatorPtr)) + desc |= N_NO_DEAD_STRIP; + } + return desc; +} + +bool Util::AtomSorter::operator()(const AtomAndIndex &left, + const AtomAndIndex &right) { + return (left.atom->name().compare(right.atom->name()) < 0); +} + +llvm::Error Util::getSymbolTableRegion(const DefinedAtom* atom, + bool &inGlobalsRegion, + SymbolScope &scope) { + bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); + switch (atom->scope()) { + case Atom::scopeTranslationUnit: + scope = 0; + inGlobalsRegion = false; + return llvm::Error::success(); + case Atom::scopeLinkageUnit: + if ((_ctx.exportMode() == MachOLinkingContext::ExportMode::exported) && + _ctx.exportSymbolNamed(atom->name())) { + return llvm::make_error( + Twine("cannot export hidden symbol ") + atom->name()); + } + if (rMode) { + if (_ctx.keepPrivateExterns()) { + // -keep_private_externs means keep in globals region as N_PEXT. + scope = N_PEXT | N_EXT; + inGlobalsRegion = true; + return llvm::Error::success(); + } + } + // scopeLinkageUnit symbols are no longer global once linked. + scope = N_PEXT; + inGlobalsRegion = false; + return llvm::Error::success(); + case Atom::scopeGlobal: + if (_ctx.exportRestrictMode()) { + if (_ctx.exportSymbolNamed(atom->name())) { + scope = N_EXT; + inGlobalsRegion = true; + return llvm::Error::success(); + } else { + scope = N_PEXT; + inGlobalsRegion = false; + return llvm::Error::success(); + } + } else { + scope = N_EXT; + inGlobalsRegion = true; + return llvm::Error::success(); + } + break; + } + llvm_unreachable("atom->scope() unknown enum value"); +} + + + +llvm::Error Util::addSymbols(const lld::File &atomFile, + NormalizedFile &file) { + bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); + // Mach-O symbol table has four regions: stabs, locals, globals, undefs. + + // Add all stabs. + for (auto &stab : _stabs) { + lld::mach_o::normalized::Symbol sym; + sym.type = static_cast(stab.type); + sym.scope = 0; + sym.sect = stab.other; + sym.desc = stab.desc; + if (stab.atom) + sym.value = _atomToAddress[stab.atom]; + else + sym.value = stab.value; + sym.name = stab.str; + file.stabsSymbols.push_back(sym); + } + + // Add all local (non-global) symbols in address order + std::vector globals; + globals.reserve(512); + for (SectionInfo *sect : _sectionInfos) { + for (const AtomInfo &info : sect->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + if (!atom->name().empty()) { + SymbolScope symbolScope; + bool inGlobalsRegion; + if (auto ec = getSymbolTableRegion(atom, inGlobalsRegion, symbolScope)){ + return ec; + } + if (inGlobalsRegion) { + AtomAndIndex ai = { atom, sect->finalSectionIndex, symbolScope }; + globals.push_back(ai); + } else { + lld::mach_o::normalized::Symbol sym; + sym.name = atom->name(); + sym.type = N_SECT; + sym.scope = symbolScope; + sym.sect = sect->finalSectionIndex; + sym.desc = descBits(atom); + sym.value = _atomToAddress[atom]; + _atomToSymbolIndex[atom] = file.localSymbols.size(); + file.localSymbols.push_back(sym); + } + } else if (rMode && _archHandler.needsLocalSymbolInRelocatableFile(atom)){ + // Create 'Lxxx' labels for anonymous atoms if archHandler says so. + static unsigned tempNum = 1; + char tmpName[16]; + sprintf(tmpName, "L%04u", tempNum++); + StringRef tempRef(tmpName); + lld::mach_o::normalized::Symbol sym; + sym.name = tempRef.copy(file.ownedAllocations); + sym.type = N_SECT; + sym.scope = 0; + sym.sect = sect->finalSectionIndex; + sym.desc = 0; + sym.value = _atomToAddress[atom]; + _atomToSymbolIndex[atom] = file.localSymbols.size(); + file.localSymbols.push_back(sym); + } + } + } + + // Sort global symbol alphabetically, then add to symbol table. + std::sort(globals.begin(), globals.end(), AtomSorter()); + const uint32_t globalStartIndex = file.localSymbols.size(); + for (AtomAndIndex &ai : globals) { + lld::mach_o::normalized::Symbol sym; + sym.name = ai.atom->name(); + sym.type = N_SECT; + sym.scope = ai.scope; + sym.sect = ai.index; + sym.desc = descBits(static_cast(ai.atom)); + sym.value = _atomToAddress[ai.atom]; + _atomToSymbolIndex[ai.atom] = globalStartIndex + file.globalSymbols.size(); + file.globalSymbols.push_back(sym); + } + + // Sort undefined symbol alphabetically, then add to symbol table. + std::vector undefs; + undefs.reserve(128); + for (const UndefinedAtom *atom : atomFile.undefined()) { + AtomAndIndex ai = { atom, 0, N_EXT }; + undefs.push_back(ai); + } + for (const SharedLibraryAtom *atom : atomFile.sharedLibrary()) { + AtomAndIndex ai = { atom, 0, N_EXT }; + undefs.push_back(ai); + } + std::sort(undefs.begin(), undefs.end(), AtomSorter()); + const uint32_t start = file.globalSymbols.size() + file.localSymbols.size(); + for (AtomAndIndex &ai : undefs) { + lld::mach_o::normalized::Symbol sym; + uint16_t desc = 0; + if (!rMode) { + uint8_t ordinal = 0; + if (!_ctx.useFlatNamespace()) + ordinal = dylibOrdinal(dyn_cast(ai.atom)); + llvm::MachO::SET_LIBRARY_ORDINAL(desc, ordinal); + } + sym.name = ai.atom->name(); + sym.type = N_UNDF; + sym.scope = ai.scope; + sym.sect = 0; + sym.desc = desc; + sym.value = 0; + _atomToSymbolIndex[ai.atom] = file.undefinedSymbols.size() + start; + file.undefinedSymbols.push_back(sym); + } + + return llvm::Error::success(); +} + +const Atom *Util::targetOfLazyPointer(const DefinedAtom *lpAtom) { + for (const Reference *ref : *lpAtom) { + if (_archHandler.isLazyPointer(*ref)) { + return ref->target(); + } + } + return nullptr; +} + +const Atom *Util::targetOfStub(const DefinedAtom *stubAtom) { + for (const Reference *ref : *stubAtom) { + if (const Atom *ta = ref->target()) { + if (const DefinedAtom *lpAtom = dyn_cast(ta)) { + const Atom *target = targetOfLazyPointer(lpAtom); + if (target) + return target; + } + } + } + return nullptr; +} + +void Util::addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file) { + for (SectionInfo *si : _sectionInfos) { + Section &normSect = file.sections[si->normalizedSectionIndex]; + switch (si->type) { + case llvm::MachO::S_NON_LAZY_SYMBOL_POINTERS: + for (const AtomInfo &info : si->atomsAndOffsets) { + bool foundTarget = false; + for (const Reference *ref : *info.atom) { + const Atom *target = ref->target(); + if (target) { + if (isa(target)) { + uint32_t index = _atomToSymbolIndex[target]; + normSect.indirectSymbols.push_back(index); + foundTarget = true; + } else { + normSect.indirectSymbols.push_back( + llvm::MachO::INDIRECT_SYMBOL_LOCAL); + } + } + } + if (!foundTarget) { + normSect.indirectSymbols.push_back( + llvm::MachO::INDIRECT_SYMBOL_ABS); + } + } + break; + case llvm::MachO::S_LAZY_SYMBOL_POINTERS: + for (const AtomInfo &info : si->atomsAndOffsets) { + const Atom *target = targetOfLazyPointer(info.atom); + if (target) { + uint32_t index = _atomToSymbolIndex[target]; + normSect.indirectSymbols.push_back(index); + } + } + break; + case llvm::MachO::S_SYMBOL_STUBS: + for (const AtomInfo &info : si->atomsAndOffsets) { + const Atom *target = targetOfStub(info.atom); + if (target) { + uint32_t index = _atomToSymbolIndex[target]; + normSect.indirectSymbols.push_back(index); + } + } + break; + default: + break; + } + } +} + +void Util::addDependentDylibs(const lld::File &atomFile, + NormalizedFile &nFile) { + // Scan all imported symbols and build up list of dylibs they are from. + int ordinal = 1; + for (const auto *dylib : _ctx.allDylibs()) { + DylibPathToInfo::iterator pos = _dylibInfo.find(dylib->installName()); + if (pos == _dylibInfo.end()) { + DylibInfo info; + bool flatNamespaceAtom = dylib == _ctx.flatNamespaceFile(); + + // If we're in -flat_namespace mode (or this atom came from the flat + // namespace file under -undefined dynamic_lookup) then use the flat + // lookup ordinal. + if (flatNamespaceAtom || _ctx.useFlatNamespace()) + info.ordinal = BIND_SPECIAL_DYLIB_FLAT_LOOKUP; + else + info.ordinal = ordinal++; + info.hasWeak = false; + info.hasNonWeak = !info.hasWeak; + _dylibInfo[dylib->installName()] = info; + + // Unless this was a flat_namespace atom, record the source dylib. + if (!flatNamespaceAtom) { + DependentDylib depInfo; + depInfo.path = dylib->installName(); + depInfo.kind = llvm::MachO::LC_LOAD_DYLIB; + depInfo.currentVersion = _ctx.dylibCurrentVersion(dylib->path()); + depInfo.compatVersion = _ctx.dylibCompatVersion(dylib->path()); + nFile.dependentDylibs.push_back(depInfo); + } + } else { + pos->second.hasWeak = false; + pos->second.hasNonWeak = !pos->second.hasWeak; + } + } + // Automatically weak link dylib in which all symbols are weak (canBeNull). + for (DependentDylib &dep : nFile.dependentDylibs) { + DylibInfo &info = _dylibInfo[dep.path]; + if (info.hasWeak && !info.hasNonWeak) + dep.kind = llvm::MachO::LC_LOAD_WEAK_DYLIB; + else if (_ctx.isUpwardDylib(dep.path)) + dep.kind = llvm::MachO::LC_LOAD_UPWARD_DYLIB; + } +} + +int Util::dylibOrdinal(const SharedLibraryAtom *sa) { + return _dylibInfo[sa->loadName()].ordinal; +} + +void Util::segIndexForSection(const SectionInfo *sect, uint8_t &segmentIndex, + uint64_t &segmentStartAddr) { + segmentIndex = 0; + for (const SegmentInfo *seg : _segmentInfos) { + if ((seg->address <= sect->address) + && (seg->address+seg->size >= sect->address+sect->size)) { + segmentStartAddr = seg->address; + return; + } + ++segmentIndex; + } + llvm_unreachable("section not in any segment"); +} + +uint32_t Util::sectionIndexForAtom(const Atom *atom) { + uint64_t address = _atomToAddress[atom]; + for (const SectionInfo *si : _sectionInfos) { + if ((si->address <= address) && (address < si->address+si->size)) + return si->finalSectionIndex; + } + llvm_unreachable("atom not in any section"); +} + +void Util::addSectionRelocs(const lld::File &, NormalizedFile &file) { + if (_ctx.outputMachOType() != llvm::MachO::MH_OBJECT) + return; + + // Utility function for ArchHandler to find symbol index for an atom. + auto symIndexForAtom = [&] (const Atom &atom) -> uint32_t { + auto pos = _atomToSymbolIndex.find(&atom); + assert(pos != _atomToSymbolIndex.end()); + return pos->second; + }; + + // Utility function for ArchHandler to find section index for an atom. + auto sectIndexForAtom = [&] (const Atom &atom) -> uint32_t { + return sectionIndexForAtom(&atom); + }; + + // Utility function for ArchHandler to find address of atom in output file. + auto addressForAtom = [&] (const Atom &atom) -> uint64_t { + auto pos = _atomToAddress.find(&atom); + assert(pos != _atomToAddress.end()); + return pos->second; + }; + + for (SectionInfo *si : _sectionInfos) { + Section &normSect = file.sections[si->normalizedSectionIndex]; + for (const AtomInfo &info : si->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + for (const Reference *ref : *atom) { + // Skip emitting relocs for sections which are always able to be + // implicitly regenerated and where the relocation targets an address + // which is defined. + if (si->relocsToDefinedCanBeImplicit && isa(ref->target())) + continue; + _archHandler.appendSectionRelocations(*atom, info.offsetInSection, *ref, + symIndexForAtom, + sectIndexForAtom, + addressForAtom, + normSect.relocations); + } + } + } +} + +void Util::addFunctionStarts(const lld::File &, NormalizedFile &file) { + if (!_ctx.generateFunctionStartsLoadCommand()) + return; + file.functionStarts.reserve(8192); + // Delta compress function starts, starting with the mach header symbol. + const uint64_t badAddress = ~0ULL; + uint64_t addr = badAddress; + for (SectionInfo *si : _sectionInfos) { + for (const AtomInfo &info : si->atomsAndOffsets) { + auto type = info.atom->contentType(); + if (type == DefinedAtom::typeMachHeader) { + addr = _atomToAddress[info.atom]; + continue; + } + if (type != DefinedAtom::typeCode) + continue; + assert(addr != badAddress && "Missing mach header symbol"); + // Skip atoms which have 0 size. This is so that LC_FUNCTION_STARTS + // can't spill in to the next section. + if (!info.atom->size()) + continue; + uint64_t nextAddr = _atomToAddress[info.atom]; + if (_archHandler.isThumbFunction(*info.atom)) + nextAddr |= 1; + uint64_t delta = nextAddr - addr; + if (delta) { + ByteBuffer buffer; + buffer.append_uleb128(delta); + file.functionStarts.insert(file.functionStarts.end(), buffer.bytes(), + buffer.bytes() + buffer.size()); + } + addr = nextAddr; + } + } + + // Null terminate, and pad to pointer size for this arch. + file.functionStarts.push_back(0); + + auto size = file.functionStarts.size(); + for (unsigned i = size, e = llvm::alignTo(size, _ctx.is64Bit() ? 8 : 4); + i != e; ++i) + file.functionStarts.push_back(0); +} + +void Util::buildDataInCodeArray(const lld::File &, NormalizedFile &file) { + if (!_ctx.generateDataInCodeLoadCommand()) + return; + for (SectionInfo *si : _sectionInfos) { + for (const AtomInfo &info : si->atomsAndOffsets) { + // Atoms that contain data-in-code have "transition" references + // which mark a point where the embedded data starts of ends. + // This needs to be converted to the mach-o format which is an array + // of data-in-code ranges. + uint32_t startOffset = 0; + DataRegionType mode = DataRegionType(0); + for (const Reference *ref : *info.atom) { + if (ref->kindNamespace() != Reference::KindNamespace::mach_o) + continue; + if (_archHandler.isDataInCodeTransition(ref->kindValue())) { + DataRegionType nextMode = (DataRegionType)ref->addend(); + if (mode != nextMode) { + if (mode != 0) { + // Found end data range, so make range entry. + DataInCode entry; + entry.offset = si->address + info.offsetInSection + startOffset; + entry.length = ref->offsetInAtom() - startOffset; + entry.kind = mode; + file.dataInCode.push_back(entry); + } + } + mode = nextMode; + startOffset = ref->offsetInAtom(); + } + } + if (mode != 0) { + // Function ends with data (no end transition). + DataInCode entry; + entry.offset = si->address + info.offsetInSection + startOffset; + entry.length = info.atom->size() - startOffset; + entry.kind = mode; + file.dataInCode.push_back(entry); + } + } + } +} + +void Util::addRebaseAndBindingInfo(const lld::File &atomFile, + NormalizedFile &nFile) { + if (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT) + return; + + uint8_t segmentIndex; + uint64_t segmentStartAddr; + uint32_t offsetInBindInfo = 0; + + for (SectionInfo *sect : _sectionInfos) { + segIndexForSection(sect, segmentIndex, segmentStartAddr); + for (const AtomInfo &info : sect->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + for (const Reference *ref : *atom) { + uint64_t segmentOffset = _atomToAddress[atom] + ref->offsetInAtom() + - segmentStartAddr; + const Atom* targ = ref->target(); + if (_archHandler.isPointer(*ref)) { + // A pointer to a DefinedAtom requires rebasing. + if (isa(targ)) { + RebaseLocation rebase; + rebase.segIndex = segmentIndex; + rebase.segOffset = segmentOffset; + rebase.kind = llvm::MachO::REBASE_TYPE_POINTER; + nFile.rebasingInfo.push_back(rebase); + } + // A pointer to an SharedLibraryAtom requires binding. + if (const SharedLibraryAtom *sa = dyn_cast(targ)) { + BindLocation bind; + bind.segIndex = segmentIndex; + bind.segOffset = segmentOffset; + bind.kind = llvm::MachO::BIND_TYPE_POINTER; + bind.canBeNull = sa->canBeNullAtRuntime(); + bind.ordinal = dylibOrdinal(sa); + bind.symbolName = targ->name(); + bind.addend = ref->addend(); + nFile.bindingInfo.push_back(bind); + } + } + else if (_archHandler.isLazyPointer(*ref)) { + BindLocation bind; + if (const SharedLibraryAtom *sa = dyn_cast(targ)) { + bind.ordinal = dylibOrdinal(sa); + } else { + bind.ordinal = llvm::MachO::BIND_SPECIAL_DYLIB_SELF; + } + bind.segIndex = segmentIndex; + bind.segOffset = segmentOffset; + bind.kind = llvm::MachO::BIND_TYPE_POINTER; + bind.canBeNull = false; //sa->canBeNullAtRuntime(); + bind.symbolName = targ->name(); + bind.addend = ref->addend(); + nFile.lazyBindingInfo.push_back(bind); + + // Now that we know the segmentOffset and the ordinal attribute, + // we can fix the helper's code + + fixLazyReferenceImm(atom, offsetInBindInfo, nFile); + + // 5 bytes for opcodes + variable sizes (target name + \0 and offset + // encode's size) + offsetInBindInfo += + 6 + targ->name().size() + llvm::getULEB128Size(bind.segOffset); + if (bind.ordinal > BIND_IMMEDIATE_MASK) + offsetInBindInfo += llvm::getULEB128Size(bind.ordinal); + } + } + } + } +} + +void Util::fixLazyReferenceImm(const DefinedAtom *atom, uint32_t offset, + NormalizedFile &file) { + for (const Reference *ref : *atom) { + const DefinedAtom *da = dyn_cast(ref->target()); + if (da == nullptr) + return; + + const Reference *helperRef = nullptr; + for (const Reference *hr : *da) { + if (hr->kindValue() == _archHandler.lazyImmediateLocationKind()) { + helperRef = hr; + break; + } + } + if (helperRef == nullptr) + continue; + + // TODO: maybe get the fixed atom content from _archHandler ? + for (SectionInfo *sectInfo : _sectionInfos) { + for (const AtomInfo &atomInfo : sectInfo->atomsAndOffsets) { + if (atomInfo.atom == helperRef->target()) { + auto sectionContent = + file.sections[sectInfo->normalizedSectionIndex].content; + uint8_t *rawb = + file.ownedAllocations.Allocate(sectionContent.size()); + llvm::MutableArrayRef newContent{rawb, + sectionContent.size()}; + std::copy(sectionContent.begin(), sectionContent.end(), + newContent.begin()); + llvm::support::ulittle32_t *loc = + reinterpret_cast( + &newContent[atomInfo.offsetInSection + + helperRef->offsetInAtom()]); + *loc = offset; + file.sections[sectInfo->normalizedSectionIndex].content = newContent; + } + } + } + } +} + +void Util::addExportInfo(const lld::File &atomFile, NormalizedFile &nFile) { + if (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT) + return; + + for (SectionInfo *sect : _sectionInfos) { + for (const AtomInfo &info : sect->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + if (atom->scope() != Atom::scopeGlobal) + continue; + if (_ctx.exportRestrictMode()) { + if (!_ctx.exportSymbolNamed(atom->name())) + continue; + } + Export exprt; + exprt.name = atom->name(); + exprt.offset = _atomToAddress[atom] - _ctx.baseAddress(); + exprt.kind = EXPORT_SYMBOL_FLAGS_KIND_REGULAR; + if (atom->merge() == DefinedAtom::mergeAsWeak) + exprt.flags = EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; + else + exprt.flags = 0; + exprt.otherOffset = 0; + exprt.otherName = StringRef(); + nFile.exportInfo.push_back(exprt); + } + } +} + +uint32_t Util::fileFlags() { + // FIXME: these need to determined at runtime. + if (_ctx.outputMachOType() == MH_OBJECT) { + return _subsectionsViaSymbols ? (uint32_t)MH_SUBSECTIONS_VIA_SYMBOLS : 0; + } else { + uint32_t flags = MH_DYLDLINK; + if (!_ctx.useFlatNamespace()) + flags |= MH_TWOLEVEL | MH_NOUNDEFS; + if ((_ctx.outputMachOType() == MH_EXECUTE) && _ctx.PIE()) + flags |= MH_PIE; + if (_hasTLVDescriptors) + flags |= (MH_PIE | MH_HAS_TLV_DESCRIPTORS); + return flags; + } +} + +} // end anonymous namespace + +namespace lld { +namespace mach_o { +namespace normalized { + +/// Convert a set of Atoms into a normalized mach-o file. +llvm::Expected> +normalizedFromAtoms(const lld::File &atomFile, + const MachOLinkingContext &context) { + // The util object buffers info until the normalized file can be made. + Util util(context); + util.processDefinedAtoms(atomFile); + util.organizeSections(); + + std::unique_ptr f(new NormalizedFile()); + NormalizedFile &normFile = *f.get(); + normFile.arch = context.arch(); + normFile.fileType = context.outputMachOType(); + normFile.flags = util.fileFlags(); + normFile.stackSize = context.stackSize(); + normFile.installName = context.installName(); + normFile.currentVersion = context.currentVersion(); + normFile.compatVersion = context.compatibilityVersion(); + normFile.os = context.os(); + + // If we are emitting an object file, then the min version is the maximum + // of the min's of all the source files and the cmdline. + if (normFile.fileType == llvm::MachO::MH_OBJECT) + normFile.minOSverson = std::max(context.osMinVersion(), util.minVersion()); + else + normFile.minOSverson = context.osMinVersion(); + + normFile.minOSVersionKind = util.minVersionCommandType(); + + normFile.sdkVersion = context.sdkVersion(); + normFile.sourceVersion = context.sourceVersion(); + + if (context.generateVersionLoadCommand() && + context.os() != MachOLinkingContext::OS::unknown) + normFile.hasMinVersionLoadCommand = true; + else if (normFile.fileType == llvm::MachO::MH_OBJECT && + util.allSourceFilesHaveMinVersions() && + ((normFile.os != MachOLinkingContext::OS::unknown) || + util.minVersionCommandType())) { + // If we emit an object file, then it should contain a min version load + // command if all of the source files also contained min version commands. + // Also, we either need to have a platform, or found a platform from the + // source object files. + normFile.hasMinVersionLoadCommand = true; + } + normFile.generateDataInCodeLoadCommand = + context.generateDataInCodeLoadCommand(); + normFile.pageSize = context.pageSize(); + normFile.rpaths = context.rpaths(); + util.addDependentDylibs(atomFile, normFile); + util.copySegmentInfo(normFile); + util.copySectionInfo(normFile); + util.assignAddressesToSections(normFile); + util.buildAtomToAddressMap(); + if (auto err = util.synthesizeDebugNotes(normFile)) + return std::move(err); + util.updateSectionInfo(normFile); + util.copySectionContent(normFile); + if (auto ec = util.addSymbols(atomFile, normFile)) { + return std::move(ec); + } + util.addIndirectSymbols(atomFile, normFile); + util.addRebaseAndBindingInfo(atomFile, normFile); + util.addExportInfo(atomFile, normFile); + util.addSectionRelocs(atomFile, normFile); + util.addFunctionStarts(atomFile, normFile); + util.buildDataInCodeArray(atomFile, normFile); + util.copyEntryPointAddress(normFile); + + return std::move(f); +} + +} // namespace normalized +} // namespace mach_o +} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp new file mode 100644 index 000000000000..164a283b972b --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp @@ -0,0 +1,1635 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// +/// \file Converts from in-memory normalized mach-o to in-memory Atoms. +/// +/// +------------+ +/// | normalized | +/// +------------+ +/// | +/// | +/// v +/// +-------+ +/// | Atoms | +/// +-------+ + +#include "ArchHandler.h" +#include "Atoms.h" +#include "File.h" +#include "MachONormalizedFile.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "lld/Common/LLVM.h" +#include "lld/Core/Error.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +#define DEBUG_TYPE "normalized-file-to-atoms" + +namespace lld { +namespace mach_o { + + +namespace { // anonymous + + +#define ENTRY(seg, sect, type, atomType) \ + {seg, sect, type, DefinedAtom::atomType } + +struct MachORelocatableSectionToAtomType { + StringRef segmentName; + StringRef sectionName; + SectionType sectionType; + DefinedAtom::ContentType atomType; +}; + +const MachORelocatableSectionToAtomType sectsToAtomType[] = { + ENTRY("__TEXT", "__text", S_REGULAR, typeCode), + ENTRY("__TEXT", "__text", S_REGULAR, typeResolver), + ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString), + ENTRY("", "", S_CSTRING_LITERALS, typeCString), + ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String), + ENTRY("__TEXT", "__const", S_REGULAR, typeConstant), + ENTRY("__TEXT", "__const_coal", S_COALESCED, typeConstant), + ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI), + ENTRY("__TEXT", "__eh_frame", S_REGULAR, typeCFI), + ENTRY("__TEXT", "__literal4", S_4BYTE_LITERALS, typeLiteral4), + ENTRY("__TEXT", "__literal8", S_8BYTE_LITERALS, typeLiteral8), + ENTRY("__TEXT", "__literal16", S_16BYTE_LITERALS, typeLiteral16), + ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA), + ENTRY("__DATA", "__data", S_REGULAR, typeData), + ENTRY("__DATA", "__datacoal_nt", S_COALESCED, typeData), + ENTRY("__DATA", "__const", S_REGULAR, typeConstData), + ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString), + ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS, + typeInitializerPtr), + ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS, + typeTerminatorPtr), + ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS, + typeGOT), + ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill), + ENTRY("", "", S_NON_LAZY_SYMBOL_POINTERS, + typeGOT), + ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples), + ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES, + typeThunkTLV), + ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData), + ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL, + typeTLVInitialZeroFill), + ENTRY("__DATA", "__objc_imageinfo", S_REGULAR, typeObjCImageInfo), + ENTRY("__DATA", "__objc_catlist", S_REGULAR, typeObjC2CategoryList), + ENTRY("", "", S_INTERPOSING, typeInterposingTuples), + ENTRY("__LD", "__compact_unwind", S_REGULAR, + typeCompactUnwindInfo), + ENTRY("", "", S_REGULAR, typeUnknown) +}; +#undef ENTRY + + +/// Figures out ContentType of a mach-o section. +DefinedAtom::ContentType atomTypeFromSection(const Section §ion, + bool &customSectionName) { + // First look for match of name and type. Empty names in table are wildcards. + customSectionName = false; + for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; + p->atomType != DefinedAtom::typeUnknown; ++p) { + if (p->sectionType != section.type) + continue; + if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty()) + continue; + if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty()) + continue; + customSectionName = p->segmentName.empty() && p->sectionName.empty(); + return p->atomType; + } + // Look for code denoted by section attributes + if (section.attributes & S_ATTR_PURE_INSTRUCTIONS) + return DefinedAtom::typeCode; + + return DefinedAtom::typeUnknown; +} + +enum AtomizeModel { + atomizeAtSymbols, + atomizeFixedSize, + atomizePointerSize, + atomizeUTF8, + atomizeUTF16, + atomizeCFI, + atomizeCU, + atomizeCFString +}; + +/// Returns info on how to atomize a section of the specified ContentType. +void sectionParseInfo(DefinedAtom::ContentType atomType, + unsigned int &sizeMultiple, + DefinedAtom::Scope &scope, + DefinedAtom::Merge &merge, + AtomizeModel &atomizeModel) { + struct ParseInfo { + DefinedAtom::ContentType atomType; + unsigned int sizeMultiple; + DefinedAtom::Scope scope; + DefinedAtom::Merge merge; + AtomizeModel atomizeModel; + }; + + #define ENTRY(type, size, scope, merge, model) \ + {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model } + + static const ParseInfo parseInfo[] = { + ENTRY(typeCode, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeData, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeConstData, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeZeroFill, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeConstant, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeCString, 1, scopeLinkageUnit, mergeByContent, + atomizeUTF8), + ENTRY(typeUTF16String, 1, scopeLinkageUnit, mergeByContent, + atomizeUTF16), + ENTRY(typeCFI, 4, scopeTranslationUnit, mergeNo, + atomizeCFI), + ENTRY(typeLiteral4, 4, scopeLinkageUnit, mergeByContent, + atomizeFixedSize), + ENTRY(typeLiteral8, 8, scopeLinkageUnit, mergeByContent, + atomizeFixedSize), + ENTRY(typeLiteral16, 16, scopeLinkageUnit, mergeByContent, + atomizeFixedSize), + ENTRY(typeCFString, 4, scopeLinkageUnit, mergeByContent, + atomizeCFString), + ENTRY(typeInitializerPtr, 4, scopeTranslationUnit, mergeNo, + atomizePointerSize), + ENTRY(typeTerminatorPtr, 4, scopeTranslationUnit, mergeNo, + atomizePointerSize), + ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo, + atomizeCU), + ENTRY(typeGOT, 4, scopeLinkageUnit, mergeByContent, + atomizePointerSize), + ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent, + atomizePointerSize), + ENTRY(typeUnknown, 1, scopeGlobal, mergeNo, + atomizeAtSymbols) + }; + #undef ENTRY + const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo); + for (int i=0; i < tableLen; ++i) { + if (parseInfo[i].atomType == atomType) { + sizeMultiple = parseInfo[i].sizeMultiple; + scope = parseInfo[i].scope; + merge = parseInfo[i].merge; + atomizeModel = parseInfo[i].atomizeModel; + return; + } + } + + // Unknown type is atomized by symbols. + sizeMultiple = 1; + scope = DefinedAtom::scopeGlobal; + merge = DefinedAtom::mergeNo; + atomizeModel = atomizeAtSymbols; +} + + +Atom::Scope atomScope(uint8_t scope) { + switch (scope) { + case N_EXT: + return Atom::scopeGlobal; + case N_PEXT: + case N_PEXT | N_EXT: + return Atom::scopeLinkageUnit; + case 0: + return Atom::scopeTranslationUnit; + } + llvm_unreachable("unknown scope value!"); +} + +void appendSymbolsInSection( + const std::vector &inSymbols, + uint32_t sectionIndex, + SmallVector &outSyms) { + for (const lld::mach_o::normalized::Symbol &sym : inSymbols) { + // Only look at definition symbols. + if ((sym.type & N_TYPE) != N_SECT) + continue; + if (sym.sect != sectionIndex) + continue; + outSyms.push_back(&sym); + } +} + +void atomFromSymbol(DefinedAtom::ContentType atomType, const Section §ion, + MachOFile &file, uint64_t symbolAddr, StringRef symbolName, + uint16_t symbolDescFlags, Atom::Scope symbolScope, + uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) { + // Mach-O symbol table does have size in it. Instead the size is the + // difference between this and the next symbol. + uint64_t size = nextSymbolAddr - symbolAddr; + uint64_t offset = symbolAddr - section.address; + bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable; + if (isZeroFillSection(section.type)) { + file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size, + noDeadStrip, copyRefs, §ion); + } else { + DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF) + ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo; + bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF); + if (atomType == DefinedAtom::typeUnknown) { + // Mach-O needs a segment and section name. Concatenate those two + // with a / separator (e.g. "seg/sect") to fit into the lld model + // of just a section name. + std::string segSectName = section.segmentName.str() + + "/" + section.sectionName.str(); + file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType, + merge, thumb, noDeadStrip, offset, + size, segSectName, true, §ion); + } else { + if ((atomType == lld::DefinedAtom::typeCode) && + (symbolDescFlags & N_SYMBOL_RESOLVER)) { + atomType = lld::DefinedAtom::typeResolver; + } + file.addDefinedAtom(symbolName, symbolScope, atomType, merge, + offset, size, thumb, noDeadStrip, copyRefs, §ion); + } + } +} + +llvm::Error processSymboledSection(DefinedAtom::ContentType atomType, + const Section §ion, + const NormalizedFile &normalizedFile, + MachOFile &file, bool scatterable, + bool copyRefs) { + // Find section's index. + uint32_t sectIndex = 1; + for (auto § : normalizedFile.sections) { + if (§ == §ion) + break; + ++sectIndex; + } + + // Find all symbols in this section. + SmallVector symbols; + appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols); + appendSymbolsInSection(normalizedFile.localSymbols, sectIndex, symbols); + + // Sort symbols. + std::sort(symbols.begin(), symbols.end(), + [](const lld::mach_o::normalized::Symbol *lhs, + const lld::mach_o::normalized::Symbol *rhs) -> bool { + if (lhs == rhs) + return false; + // First by address. + uint64_t lhsAddr = lhs->value; + uint64_t rhsAddr = rhs->value; + if (lhsAddr != rhsAddr) + return lhsAddr < rhsAddr; + // If same address, one is an alias so sort by scope. + Atom::Scope lScope = atomScope(lhs->scope); + Atom::Scope rScope = atomScope(rhs->scope); + if (lScope != rScope) + return lScope < rScope; + // If same address and scope, see if one might be better as + // the alias. + bool lPrivate = (lhs->name.front() == 'l'); + bool rPrivate = (rhs->name.front() == 'l'); + if (lPrivate != rPrivate) + return lPrivate; + // If same address and scope, sort by name. + return lhs->name < rhs->name; + }); + + // Debug logging of symbols. + // for (const Symbol *sym : symbols) + // llvm::errs() << " sym: " + // << llvm::format("0x%08llx ", (uint64_t)sym->value) + // << ", " << sym->name << "\n"; + + // If section has no symbols and no content, there are no atoms. + if (symbols.empty() && section.content.empty()) + return llvm::Error::success(); + + if (symbols.empty()) { + // Section has no symbols, put all content in one anonymous atom. + atomFromSymbol(atomType, section, file, section.address, StringRef(), + 0, Atom::scopeTranslationUnit, + section.address + section.content.size(), + scatterable, copyRefs); + } + else if (symbols.front()->value != section.address) { + // Section has anonymous content before first symbol. + atomFromSymbol(atomType, section, file, section.address, StringRef(), + 0, Atom::scopeTranslationUnit, symbols.front()->value, + scatterable, copyRefs); + } + + const lld::mach_o::normalized::Symbol *lastSym = nullptr; + for (const lld::mach_o::normalized::Symbol *sym : symbols) { + if (lastSym != nullptr) { + // Ignore any assembler added "ltmpNNN" symbol at start of section + // if there is another symbol at the start. + if ((lastSym->value != sym->value) + || lastSym->value != section.address + || !lastSym->name.startswith("ltmp")) { + atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, + lastSym->desc, atomScope(lastSym->scope), sym->value, + scatterable, copyRefs); + } + } + lastSym = sym; + } + if (lastSym != nullptr) { + atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, + lastSym->desc, atomScope(lastSym->scope), + section.address + section.content.size(), + scatterable, copyRefs); + } + + // If object built without .subsections_via_symbols, add reference chain. + if (!scatterable) { + MachODefinedAtom *prevAtom = nullptr; + file.eachAtomInSection(section, + [&](MachODefinedAtom *atom, uint64_t offset)->void { + if (prevAtom) + prevAtom->addReference(Reference::KindNamespace::all, + Reference::KindArch::all, + Reference::kindLayoutAfter, 0, atom, 0); + prevAtom = atom; + }); + } + + return llvm::Error::success(); +} + +llvm::Error processSection(DefinedAtom::ContentType atomType, + const Section §ion, + bool customSectionName, + const NormalizedFile &normalizedFile, + MachOFile &file, bool scatterable, + bool copyRefs) { + const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + + // Get info on how to atomize section. + unsigned int sizeMultiple; + DefinedAtom::Scope scope; + DefinedAtom::Merge merge; + AtomizeModel atomizeModel; + sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel); + + // Validate section size. + if ((section.content.size() % sizeMultiple) != 0) + return llvm::make_error(Twine("Section ") + + section.segmentName + + "/" + section.sectionName + + " has size (" + + Twine(section.content.size()) + + ") which is not a multiple of " + + Twine(sizeMultiple)); + + if (atomizeModel == atomizeAtSymbols) { + // Break section up into atoms each with a fixed size. + return processSymboledSection(atomType, section, normalizedFile, file, + scatterable, copyRefs); + } else { + unsigned int size; + for (unsigned int offset = 0, e = section.content.size(); offset != e;) { + switch (atomizeModel) { + case atomizeFixedSize: + // Break section up into atoms each with a fixed size. + size = sizeMultiple; + break; + case atomizePointerSize: + // Break section up into atoms each the size of a pointer. + size = is64 ? 8 : 4; + break; + case atomizeUTF8: + // Break section up into zero terminated c-strings. + size = 0; + for (unsigned int i = offset; i < e; ++i) { + if (section.content[i] == 0) { + size = i + 1 - offset; + break; + } + } + break; + case atomizeUTF16: + // Break section up into zero terminated UTF16 strings. + size = 0; + for (unsigned int i = offset; i < e; i += 2) { + if ((section.content[i] == 0) && (section.content[i + 1] == 0)) { + size = i + 2 - offset; + break; + } + } + break; + case atomizeCFI: + // Break section up into dwarf unwind CFIs (FDE or CIE). + size = read32(§ion.content[offset], isBig) + 4; + if (offset+size > section.content.size()) { + return llvm::make_error(Twine("Section ") + + section.segmentName + + "/" + section.sectionName + + " is malformed. Size of CFI " + "starting at offset (" + + Twine(offset) + + ") is past end of section."); + } + break; + case atomizeCU: + // Break section up into compact unwind entries. + size = is64 ? 32 : 20; + break; + case atomizeCFString: + // Break section up into NS/CFString objects. + size = is64 ? 32 : 16; + break; + case atomizeAtSymbols: + break; + } + if (size == 0) { + return llvm::make_error(Twine("Section ") + + section.segmentName + + "/" + section.sectionName + + " is malformed. The last atom " + "is not zero terminated."); + } + if (customSectionName) { + // Mach-O needs a segment and section name. Concatenate those two + // with a / separator (e.g. "seg/sect") to fit into the lld model + // of just a section name. + std::string segSectName = section.segmentName.str() + + "/" + section.sectionName.str(); + file.addDefinedAtomInCustomSection(StringRef(), scope, atomType, + merge, false, false, offset, + size, segSectName, true, §ion); + } else { + file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size, + false, false, copyRefs, §ion); + } + offset += size; + } + } + return llvm::Error::success(); +} + +const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile, + uint64_t address) { + for (const Section &s : normalizedFile.sections) { + uint64_t sAddr = s.address; + if ((sAddr <= address) && (address < sAddr+s.content.size())) { + return &s; + } + } + return nullptr; +} + +const MachODefinedAtom * +findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file, + uint64_t addr, Reference::Addend &addend) { + const Section *sect = nullptr; + sect = findSectionCoveringAddress(normalizedFile, addr); + if (!sect) + return nullptr; + + uint32_t offsetInTarget; + uint64_t offsetInSect = addr - sect->address; + auto atom = + file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); + addend = offsetInTarget; + return atom; +} + +// Walks all relocations for a section in a normalized .o file and +// creates corresponding lld::Reference objects. +llvm::Error convertRelocs(const Section §ion, + const NormalizedFile &normalizedFile, + bool scatterable, + MachOFile &file, + ArchHandler &handler) { + // Utility function for ArchHandler to find atom by its address. + auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr, + const lld::Atom **atom, Reference::Addend *addend) + -> llvm::Error { + if (sectIndex > normalizedFile.sections.size()) + return llvm::make_error(Twine("out of range section " + "index (") + Twine(sectIndex) + ")"); + const Section *sect = nullptr; + if (sectIndex == 0) { + sect = findSectionCoveringAddress(normalizedFile, addr); + if (!sect) + return llvm::make_error(Twine("address (" + Twine(addr) + + ") is not in any section")); + } else { + sect = &normalizedFile.sections[sectIndex-1]; + } + uint32_t offsetInTarget; + uint64_t offsetInSect = addr - sect->address; + *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); + *addend = offsetInTarget; + return llvm::Error::success(); + }; + + // Utility function for ArchHandler to find atom by its symbol index. + auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result) + -> llvm::Error { + // Find symbol from index. + const lld::mach_o::normalized::Symbol *sym = nullptr; + uint32_t numStabs = normalizedFile.stabsSymbols.size(); + uint32_t numLocal = normalizedFile.localSymbols.size(); + uint32_t numGlobal = normalizedFile.globalSymbols.size(); + uint32_t numUndef = normalizedFile.undefinedSymbols.size(); + assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?"); + if (symbolIndex < numStabs+numLocal) { + sym = &normalizedFile.localSymbols[symbolIndex-numStabs]; + } else if (symbolIndex < numStabs+numLocal+numGlobal) { + sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal]; + } else if (symbolIndex < numStabs+numLocal+numGlobal+numUndef) { + sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal- + numGlobal]; + } else { + return llvm::make_error(Twine("symbol index (") + + Twine(symbolIndex) + ") out of range"); + } + + // Find atom from symbol. + if ((sym->type & N_TYPE) == N_SECT) { + if (sym->sect > normalizedFile.sections.size()) + return llvm::make_error(Twine("symbol section index (") + + Twine(sym->sect) + ") out of range "); + const Section &symSection = normalizedFile.sections[sym->sect-1]; + uint64_t targetOffsetInSect = sym->value - symSection.address; + MachODefinedAtom *target = file.findAtomCoveringAddress(symSection, + targetOffsetInSect); + if (target) { + *result = target; + return llvm::Error::success(); + } + return llvm::make_error("no atom found for defined symbol"); + } else if ((sym->type & N_TYPE) == N_UNDF) { + const lld::Atom *target = file.findUndefAtom(sym->name); + if (target) { + *result = target; + return llvm::Error::success(); + } + return llvm::make_error("no undefined atom found for sym"); + } else { + // Search undefs + return llvm::make_error("no atom found for symbol"); + } + }; + + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + // Use old-school iterator so that paired relocations can be grouped. + for (auto it=section.relocations.begin(), e=section.relocations.end(); + it != e; ++it) { + const Relocation &reloc = *it; + // Find atom this relocation is in. + if (reloc.offset > section.content.size()) + return llvm::make_error( + Twine("r_address (") + Twine(reloc.offset) + + ") is larger than section size (" + + Twine(section.content.size()) + ")"); + uint32_t offsetInAtom; + MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section, + reloc.offset, + &offsetInAtom); + assert(inAtom && "r_address in range, should have found atom"); + uint64_t fixupAddress = section.address + reloc.offset; + + const lld::Atom *target = nullptr; + Reference::Addend addend = 0; + Reference::KindValue kind; + if (handler.isPairedReloc(reloc)) { + // Handle paired relocations together. + const Relocation &reloc2 = *++it; + auto relocErr = handler.getPairReferenceInfo( + reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable, + atomByAddr, atomBySymbol, &kind, &target, &addend); + if (relocErr) { + return handleErrors(std::move(relocErr), + [&](std::unique_ptr GE) { + return llvm::make_error( + Twine("bad relocation (") + GE->getMessage() + + ") in section " + + section.segmentName + "/" + section.sectionName + + " (r1_address=" + Twine::utohexstr(reloc.offset) + + ", r1_type=" + Twine(reloc.type) + + ", r1_extern=" + Twine(reloc.isExtern) + + ", r1_length=" + Twine((int)reloc.length) + + ", r1_pcrel=" + Twine(reloc.pcRel) + + (!reloc.scattered ? (Twine(", r1_symbolnum=") + + Twine(reloc.symbol)) + : (Twine(", r1_scattered=1, r1_value=") + + Twine(reloc.value))) + + ")" + + ", (r2_address=" + Twine::utohexstr(reloc2.offset) + + ", r2_type=" + Twine(reloc2.type) + + ", r2_extern=" + Twine(reloc2.isExtern) + + ", r2_length=" + Twine((int)reloc2.length) + + ", r2_pcrel=" + Twine(reloc2.pcRel) + + (!reloc2.scattered ? (Twine(", r2_symbolnum=") + + Twine(reloc2.symbol)) + : (Twine(", r2_scattered=1, r2_value=") + + Twine(reloc2.value))) + + ")" ); + }); + } + } + else { + // Use ArchHandler to convert relocation record into information + // needed to instantiate an lld::Reference object. + auto relocErr = handler.getReferenceInfo( + reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr, + atomBySymbol, &kind, &target, &addend); + if (relocErr) { + return handleErrors(std::move(relocErr), + [&](std::unique_ptr GE) { + return llvm::make_error( + Twine("bad relocation (") + GE->getMessage() + + ") in section " + + section.segmentName + "/" + section.sectionName + + " (r_address=" + Twine::utohexstr(reloc.offset) + + ", r_type=" + Twine(reloc.type) + + ", r_extern=" + Twine(reloc.isExtern) + + ", r_length=" + Twine((int)reloc.length) + + ", r_pcrel=" + Twine(reloc.pcRel) + + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol)) + : (Twine(", r_scattered=1, r_value=") + + Twine(reloc.value))) + + ")" ); + }); + } + } + // Instantiate an lld::Reference object and add to its atom. + inAtom->addReference(Reference::KindNamespace::mach_o, + handler.kindArch(), + kind, offsetInAtom, target, addend); + } + + return llvm::Error::success(); +} + +bool isDebugInfoSection(const Section §ion) { + if ((section.attributes & S_ATTR_DEBUG) == 0) + return false; + return section.segmentName.equals("__DWARF"); +} + +static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) { + std::string strName = name.str(); + for (auto *atom : file.defined()) + if (atom->name() == strName) + return atom; + return nullptr; +} + +static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) { + char *strCopy = alloc.Allocate(str.size() + 1); + memcpy(strCopy, str.data(), str.size()); + strCopy[str.size()] = '\0'; + return strCopy; +} + +llvm::Error parseStabs(MachOFile &file, + const NormalizedFile &normalizedFile, + bool copyRefs) { + + if (normalizedFile.stabsSymbols.empty()) + return llvm::Error::success(); + + // FIXME: Kill this off when we can move to sane yaml parsing. + std::unique_ptr allocator; + if (copyRefs) + allocator = std::make_unique(); + + enum { start, inBeginEnd } state = start; + + const Atom *currentAtom = nullptr; + uint64_t currentAtomAddress = 0; + StabsDebugInfo::StabsList stabsList; + for (const auto &stabSym : normalizedFile.stabsSymbols) { + Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc, + stabSym.value, stabSym.name); + switch (state) { + case start: + switch (static_cast(stabSym.type)) { + case N_BNSYM: + state = inBeginEnd; + currentAtomAddress = stabSym.value; + Reference::Addend addend; + currentAtom = findAtomCoveringAddress(normalizedFile, file, + currentAtomAddress, addend); + if (addend != 0) + return llvm::make_error( + "Non-zero addend for BNSYM '" + stabSym.name + "' in " + + file.path()); + if (currentAtom) + stab.atom = currentAtom; + else { + // FIXME: ld64 just issues a warning here - should we match that? + return llvm::make_error( + "can't find atom for stabs BNSYM at " + + Twine::utohexstr(stabSym.value) + " in " + file.path()); + } + break; + case N_SO: + case N_OSO: + // Not associated with an atom, just copy. + if (copyRefs) + stab.str = copyDebugString(stabSym.name, *allocator); + else + stab.str = stabSym.name; + break; + case N_GSYM: { + auto colonIdx = stabSym.name.find(':'); + if (colonIdx != StringRef::npos) { + StringRef name = stabSym.name.substr(0, colonIdx); + currentAtom = findDefinedAtomByName(file, "_" + name); + stab.atom = currentAtom; + if (copyRefs) + stab.str = copyDebugString(stabSym.name, *allocator); + else + stab.str = stabSym.name; + } else { + currentAtom = findDefinedAtomByName(file, stabSym.name); + stab.atom = currentAtom; + if (copyRefs) + stab.str = copyDebugString(stabSym.name, *allocator); + else + stab.str = stabSym.name; + } + if (stab.atom == nullptr) + return llvm::make_error( + "can't find atom for N_GSYM stabs" + stabSym.name + + " in " + file.path()); + break; + } + case N_FUN: + return llvm::make_error( + "old-style N_FUN stab '" + stabSym.name + "' unsupported"); + default: + return llvm::make_error( + "unrecognized stab symbol '" + stabSym.name + "'"); + } + break; + case inBeginEnd: + stab.atom = currentAtom; + switch (static_cast(stabSym.type)) { + case N_ENSYM: + state = start; + currentAtom = nullptr; + break; + case N_FUN: + // Just copy the string. + if (copyRefs) + stab.str = copyDebugString(stabSym.name, *allocator); + else + stab.str = stabSym.name; + break; + default: + return llvm::make_error( + "unrecognized stab symbol '" + stabSym.name + "'"); + } + } + llvm::dbgs() << "Adding to stabsList: " << stab << "\n"; + stabsList.push_back(stab); + } + + file.setDebugInfo(std::make_unique(std::move(stabsList))); + + // FIXME: Kill this off when we fix YAML memory ownership. + file.debugInfo()->setAllocator(std::move(allocator)); + + return llvm::Error::success(); +} + +static llvm::DataExtractor +dataExtractorFromSection(const NormalizedFile &normalizedFile, + const Section &S) { + const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + StringRef SecData(reinterpret_cast(S.content.data()), + S.content.size()); + return llvm::DataExtractor(SecData, !isBig, is64 ? 8 : 4); +} + +// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE +// inspection" code if possible. +static uint64_t getCUAbbrevOffset(llvm::DataExtractor abbrevData, + uint64_t abbrCode) { + uint64_t offset = 0; + while (abbrevData.getULEB128(&offset) != abbrCode) { + // Tag + abbrevData.getULEB128(&offset); + // DW_CHILDREN + abbrevData.getU8(&offset); + // Attributes + while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset)) + ; + } + return offset; +} + +// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE +// inspection" code if possible. +static Expected +getIndexedString(const NormalizedFile &normalizedFile, + llvm::dwarf::Form form, llvm::DataExtractor infoData, + uint64_t &infoOffset, const Section &stringsSection) { + if (form == llvm::dwarf::DW_FORM_string) + return infoData.getCStr(&infoOffset); + if (form != llvm::dwarf::DW_FORM_strp) + return llvm::make_error( + "string field encoded without DW_FORM_strp"); + uint64_t stringOffset = infoData.getU32(&infoOffset); + llvm::DataExtractor stringsData = + dataExtractorFromSection(normalizedFile, stringsSection); + return stringsData.getCStr(&stringOffset); +} + +// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE +// inspection" code if possible. +static llvm::Expected +readCompUnit(const NormalizedFile &normalizedFile, + const Section &info, + const Section &abbrev, + const Section &strings, + StringRef path) { + // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE + // inspection" code if possible. + uint64_t offset = 0; + llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32; + auto infoData = dataExtractorFromSection(normalizedFile, info); + uint32_t length = infoData.getU32(&offset); + if (length == llvm::dwarf::DW_LENGTH_DWARF64) { + Format = llvm::dwarf::DwarfFormat::DWARF64; + infoData.getU64(&offset); + } + else if (length >= llvm::dwarf::DW_LENGTH_lo_reserved) + return llvm::make_error("Malformed DWARF in " + path); + + uint16_t version = infoData.getU16(&offset); + + if (version < 2 || version > 4) + return llvm::make_error("Unsupported DWARF version in " + + path); + + infoData.getU32(&offset); // Abbrev offset (should be zero) + uint8_t addrSize = infoData.getU8(&offset); + + uint32_t abbrCode = infoData.getULEB128(&offset); + auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev); + uint64_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode); + uint64_t tag = abbrevData.getULEB128(&abbrevOffset); + if (tag != llvm::dwarf::DW_TAG_compile_unit) + return llvm::make_error("top level DIE is not a compile unit"); + // DW_CHILDREN + abbrevData.getU8(&abbrevOffset); + uint32_t name; + llvm::dwarf::Form form; + llvm::dwarf::FormParams formParams = {version, addrSize, Format}; + TranslationUnitSource tu; + while ((name = abbrevData.getULEB128(&abbrevOffset)) | + (form = static_cast( + abbrevData.getULEB128(&abbrevOffset))) && + (name != 0 || form != 0)) { + switch (name) { + case llvm::dwarf::DW_AT_name: { + if (auto eName = getIndexedString(normalizedFile, form, infoData, offset, + strings)) + tu.name = *eName; + else + return eName.takeError(); + break; + } + case llvm::dwarf::DW_AT_comp_dir: { + if (auto eName = getIndexedString(normalizedFile, form, infoData, offset, + strings)) + tu.path = *eName; + else + return eName.takeError(); + break; + } + default: + llvm::DWARFFormValue::skipValue(form, infoData, &offset, formParams); + } + } + return tu; +} + +llvm::Error parseDebugInfo(MachOFile &file, + const NormalizedFile &normalizedFile, bool copyRefs) { + + // Find the interesting debug info sections. + const Section *debugInfo = nullptr; + const Section *debugAbbrev = nullptr; + const Section *debugStrings = nullptr; + + for (auto &s : normalizedFile.sections) { + if (s.segmentName == "__DWARF") { + if (s.sectionName == "__debug_info") + debugInfo = &s; + else if (s.sectionName == "__debug_abbrev") + debugAbbrev = &s; + else if (s.sectionName == "__debug_str") + debugStrings = &s; + } + } + + if (!debugInfo) + return parseStabs(file, normalizedFile, copyRefs); + + if (debugInfo->content.size() == 0) + return llvm::Error::success(); + + if (debugInfo->content.size() < 12) + return llvm::make_error("Malformed __debug_info section in " + + file.path() + ": too small"); + + if (!debugAbbrev) + return llvm::make_error("Missing __dwarf_abbrev section in " + + file.path()); + + if (auto tuOrErr = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev, + *debugStrings, file.path())) { + // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML + // memory ownership. + std::unique_ptr allocator; + if (copyRefs) { + allocator = std::make_unique(); + tuOrErr->name = copyDebugString(tuOrErr->name, *allocator); + tuOrErr->path = copyDebugString(tuOrErr->path, *allocator); + } + file.setDebugInfo(std::make_unique(std::move(*tuOrErr))); + if (copyRefs) + file.debugInfo()->setAllocator(std::move(allocator)); + } else + return tuOrErr.takeError(); + + return llvm::Error::success(); +} + +static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) { + if (is64) + return read64(addr, isBig); + + int32_t res = read32(addr, isBig); + return res; +} + +/// --- Augmentation String Processing --- + +struct CIEInfo { + bool _augmentationDataPresent = false; + bool _mayHaveEH = false; + uint32_t _offsetOfLSDA = ~0U; + uint32_t _offsetOfPersonality = ~0U; + uint32_t _offsetOfFDEPointerEncoding = ~0U; + uint32_t _augmentationDataLength = ~0U; +}; + +typedef llvm::DenseMap CIEInfoMap; + +static llvm::Error processAugmentationString(const uint8_t *augStr, + CIEInfo &cieInfo, + unsigned &len) { + + if (augStr[0] == '\0') { + len = 1; + return llvm::Error::success(); + } + + if (augStr[0] != 'z') + return llvm::make_error("expected 'z' at start of " + "augmentation string"); + + cieInfo._augmentationDataPresent = true; + uint64_t idx = 1; + + uint32_t offsetInAugmentationData = 0; + while (augStr[idx] != '\0') { + if (augStr[idx] == 'L') { + cieInfo._offsetOfLSDA = offsetInAugmentationData; + // This adds a single byte to the augmentation data. + ++offsetInAugmentationData; + ++idx; + continue; + } + if (augStr[idx] == 'P') { + cieInfo._offsetOfPersonality = offsetInAugmentationData; + // This adds a single byte to the augmentation data for the encoding, + // then a number of bytes for the pointer data. + // FIXME: We are assuming 4 is correct here for the pointer size as we + // always currently use delta32ToGOT. + offsetInAugmentationData += 5; + ++idx; + continue; + } + if (augStr[idx] == 'R') { + cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData; + // This adds a single byte to the augmentation data. + ++offsetInAugmentationData; + ++idx; + continue; + } + if (augStr[idx] == 'e') { + if (augStr[idx + 1] != 'h') + return llvm::make_error("expected 'eh' in " + "augmentation string"); + cieInfo._mayHaveEH = true; + idx += 2; + continue; + } + ++idx; + } + + cieInfo._augmentationDataLength = offsetInAugmentationData; + + len = idx + 1; + return llvm::Error::success(); +} + +static llvm::Error processCIE(const NormalizedFile &normalizedFile, + MachOFile &file, + mach_o::ArchHandler &handler, + const Section *ehFrameSection, + MachODefinedAtom *atom, + uint64_t offset, + CIEInfoMap &cieInfos) { + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + const uint8_t *frameData = atom->rawContent().data(); + + CIEInfo cieInfo; + + uint32_t size = read32(frameData, isBig); + uint64_t cieIDField = size == 0xffffffffU + ? sizeof(uint32_t) + sizeof(uint64_t) + : sizeof(uint32_t); + uint64_t versionField = cieIDField + sizeof(uint32_t); + uint64_t augmentationStringField = versionField + sizeof(uint8_t); + + unsigned augmentationStringLength = 0; + if (auto err = processAugmentationString(frameData + augmentationStringField, + cieInfo, augmentationStringLength)) + return err; + + if (cieInfo._offsetOfPersonality != ~0U) { + // If we have augmentation data for the personality function, then we may + // need to implicitly generate its relocation. + + // Parse the EH Data field which is pointer sized. + uint64_t EHDataField = augmentationStringField + augmentationStringLength; + const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); + unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0); + + // Parse Code Align Factor which is a ULEB128. + uint64_t CodeAlignField = EHDataField + EHDataFieldSize; + unsigned lengthFieldSize = 0; + llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize); + + // Parse Data Align Factor which is a SLEB128. + uint64_t DataAlignField = CodeAlignField + lengthFieldSize; + llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize); + + // Parse Return Address Register which is a byte. + uint64_t ReturnAddressField = DataAlignField + lengthFieldSize; + + // Parse the augmentation length which is a ULEB128. + uint64_t AugmentationLengthField = ReturnAddressField + 1; + uint64_t AugmentationLength = + llvm::decodeULEB128(frameData + AugmentationLengthField, + &lengthFieldSize); + + if (AugmentationLength != cieInfo._augmentationDataLength) + return llvm::make_error("CIE augmentation data length " + "mismatch"); + + // Get the start address of the augmentation data. + uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize; + + // Parse the personality function from the augmentation data. + uint64_t PersonalityField = + AugmentationDataField + cieInfo._offsetOfPersonality; + + // Parse the personality encoding. + // FIXME: Verify that this is a 32-bit pcrel offset. + uint64_t PersonalityFunctionField = PersonalityField + 1; + + if (atom->begin() != atom->end()) { + // If we have an explicit relocation, then make sure it matches this + // offset as this is where we'd expect it to be applied to. + DefinedAtom::reference_iterator CurrentRef = atom->begin(); + if (CurrentRef->offsetInAtom() != PersonalityFunctionField) + return llvm::make_error("CIE personality reloc at " + "wrong offset"); + + if (++CurrentRef != atom->end()) + return llvm::make_error("CIE contains too many relocs"); + } else { + // Implicitly generate the personality function reloc. It's assumed to + // be a delta32 offset to a GOT entry. + // FIXME: Parse the encoding and check this. + int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig); + uint64_t funcAddress = ehFrameSection->address + offset + + PersonalityFunctionField; + funcAddress += funcDelta; + + const MachODefinedAtom *func = nullptr; + Reference::Addend addend; + func = findAtomCoveringAddress(normalizedFile, file, funcAddress, + addend); + atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), + handler.unwindRefToPersonalityFunctionKind(), + PersonalityFunctionField, func, addend); + } + } else if (atom->begin() != atom->end()) { + // Otherwise, we expect there to be no relocations in this atom as the only + // relocation would have been to the personality function. + return llvm::make_error("unexpected relocation in CIE"); + } + + + cieInfos[atom] = std::move(cieInfo); + + return llvm::Error::success(); +} + +static llvm::Error processFDE(const NormalizedFile &normalizedFile, + MachOFile &file, + mach_o::ArchHandler &handler, + const Section *ehFrameSection, + MachODefinedAtom *atom, + uint64_t offset, + const CIEInfoMap &cieInfos) { + + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); + + // Compiler wasn't lazy and actually told us what it meant. + // Unfortunately, the compiler may not have generated references for all of + // [cie, func, lsda] and so we still need to parse the FDE and add references + // for any the compiler didn't generate. + if (atom->begin() != atom->end()) + atom->sortReferences(); + + DefinedAtom::reference_iterator CurrentRef = atom->begin(); + + // This helper returns the reference (if one exists) at the offset we are + // currently processing. It automatically increments the ref iterator if we + // do return a ref, and throws an error if we pass over a ref without + // comsuming it. + auto currentRefGetter = [&CurrentRef, + &atom](uint64_t Offset)->const Reference* { + // If there are no more refs found, then we are done. + if (CurrentRef == atom->end()) + return nullptr; + + const Reference *Ref = *CurrentRef; + + // If we haven't reached the offset for this reference, then return that + // we don't yet have a reference to process. + if (Offset < Ref->offsetInAtom()) + return nullptr; + + // If the offset is equal, then we want to process this ref. + if (Offset == Ref->offsetInAtom()) { + ++CurrentRef; + return Ref; + } + + // The current ref is at an offset which is earlier than the current + // offset, then we failed to consume it when we should have. In this case + // throw an error. + llvm::report_fatal_error("Skipped reference when processing FDE"); + }; + + // Helper to either get the reference at this current location, and verify + // that it is of the expected type, or add a reference of that type. + // Returns the reference target. + auto verifyOrAddReference = [&](uint64_t targetAddress, + Reference::KindValue refKind, + uint64_t refAddress, + bool allowsAddend)->const Atom* { + if (auto *ref = currentRefGetter(refAddress)) { + // The compiler already emitted a relocation for the CIE ref. This should + // have been converted to the correct type of reference in + // get[Pair]ReferenceInfo(). + assert(ref->kindValue() == refKind && + "Incorrect EHFrame reference kind"); + return ref->target(); + } + Reference::Addend addend; + auto *target = findAtomCoveringAddress(normalizedFile, file, + targetAddress, addend); + atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), + refKind, refAddress, target, addend); + + if (!allowsAddend) + assert(!addend && "EHFrame reference cannot have addend"); + return target; + }; + + const uint8_t *startFrameData = atom->rawContent().data(); + const uint8_t *frameData = startFrameData; + + uint32_t size = read32(frameData, isBig); + uint64_t cieFieldInFDE = size == 0xffffffffU + ? sizeof(uint32_t) + sizeof(uint64_t) + : sizeof(uint32_t); + + // Linker needs to fixup a reference from the FDE to its parent CIE (a + // 32-bit byte offset backwards in the __eh_frame section). + uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig); + uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE; + cieAddress -= cieDelta; + + auto *cieRefTarget = verifyOrAddReference(cieAddress, + handler.unwindRefToCIEKind(), + cieFieldInFDE, false); + const MachODefinedAtom *cie = dyn_cast(cieRefTarget); + assert(cie && cie->contentType() == DefinedAtom::typeCFI && + "FDE's CIE field does not point at the start of a CIE."); + + const CIEInfo &cieInfo = cieInfos.find(cie)->second; + + // Linker needs to fixup reference from the FDE to the function it's + // describing. FIXME: there are actually different ways to do this, and the + // particular method used is specified in the CIE's augmentation fields + // (hopefully) + uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t); + + int64_t functionFromFDE = readSPtr(is64, isBig, + frameData + rangeFieldInFDE); + uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE; + rangeStart += functionFromFDE; + + verifyOrAddReference(rangeStart, + handler.unwindRefToFunctionKind(), + rangeFieldInFDE, true); + + // Handle the augmentation data if there is any. + if (cieInfo._augmentationDataPresent) { + // First process the augmentation data length field. + uint64_t augmentationDataLengthFieldInFDE = + rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t)); + unsigned lengthFieldSize = 0; + uint64_t augmentationDataLength = + llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE, + &lengthFieldSize); + + if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) { + + // Look at the augmentation data field. + uint64_t augmentationDataFieldInFDE = + augmentationDataLengthFieldInFDE + lengthFieldSize; + + int64_t lsdaFromFDE = readSPtr(is64, isBig, + frameData + augmentationDataFieldInFDE); + uint64_t lsdaStart = + ehFrameSection->address + offset + augmentationDataFieldInFDE + + lsdaFromFDE; + + verifyOrAddReference(lsdaStart, + handler.unwindRefToFunctionKind(), + augmentationDataFieldInFDE, true); + } + } + + return llvm::Error::success(); +} + +llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile, + MachOFile &file, + mach_o::ArchHandler &handler) { + + const Section *ehFrameSection = nullptr; + for (auto §ion : normalizedFile.sections) + if (section.segmentName == "__TEXT" && + section.sectionName == "__eh_frame") { + ehFrameSection = §ion; + break; + } + + // No __eh_frame so nothing to do. + if (!ehFrameSection) + return llvm::Error::success(); + + llvm::Error ehFrameErr = llvm::Error::success(); + CIEInfoMap cieInfos; + + file.eachAtomInSection(*ehFrameSection, + [&](MachODefinedAtom *atom, uint64_t offset) -> void { + assert(atom->contentType() == DefinedAtom::typeCFI); + + // Bail out if we've encountered an error. + if (ehFrameErr) + return; + + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + if (ArchHandler::isDwarfCIE(isBig, atom)) + ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection, + atom, offset, cieInfos); + else + ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection, + atom, offset, cieInfos); + }); + + return ehFrameErr; +} + +llvm::Error parseObjCImageInfo(const Section §, + const NormalizedFile &normalizedFile, + MachOFile &file) { + + // struct objc_image_info { + // uint32_t version; // initially 0 + // uint32_t flags; + // }; + + ArrayRef content = sect.content; + if (content.size() != 8) + return llvm::make_error(sect.segmentName + "/" + + sect.sectionName + + " in file " + file.path() + + " should be 8 bytes in size"); + + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + uint32_t version = read32(content.data(), isBig); + if (version) + return llvm::make_error(sect.segmentName + "/" + + sect.sectionName + + " in file " + file.path() + + " should have version=0"); + + uint32_t flags = read32(content.data() + 4, isBig); + if (flags & (MachOLinkingContext::objc_supports_gc | + MachOLinkingContext::objc_gc_only)) + return llvm::make_error(sect.segmentName + "/" + + sect.sectionName + + " in file " + file.path() + + " uses GC. This is not supported"); + + if (flags & MachOLinkingContext::objc_retainReleaseForSimulator) + file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator); + else + file.setObjcConstraint(MachOLinkingContext::objc_retainRelease); + + file.setSwiftVersion((flags >> 8) & 0xFF); + + return llvm::Error::success(); +} + +/// Converts normalized mach-o file into an lld::File and lld::Atoms. +llvm::Expected> +objectToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs) { + auto file = std::make_unique(path); + if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs)) + return std::move(ec); + return std::unique_ptr(std::move(file)); +} + +llvm::Expected> +dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs) { + // Instantiate SharedLibraryFile object. + auto file = std::make_unique(path); + if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs)) + return std::move(ec); + return std::unique_ptr(std::move(file)); +} + +} // anonymous namespace + +namespace normalized { + +static bool isObjCImageInfo(const Section §) { + return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") || + (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo"); +} + +llvm::Error +normalizedObjectToAtoms(MachOFile *file, + const NormalizedFile &normalizedFile, + bool copyRefs) { + LLVM_DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: " + << file->path() << "\n"); + bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0); + + // Create atoms from each section. + for (auto § : normalizedFile.sections) { + + // If this is a debug-info section parse it specially. + if (isDebugInfoSection(sect)) + continue; + + // If the file contains an objc_image_info struct, then we should parse the + // ObjC flags and Swift version. + if (isObjCImageInfo(sect)) { + if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file)) + return ec; + // We then skip adding atoms for this section as we use the ObjCPass to + // re-emit this data after it has been aggregated for all files. + continue; + } + + bool customSectionName; + DefinedAtom::ContentType atomType = atomTypeFromSection(sect, + customSectionName); + if (auto ec = processSection(atomType, sect, customSectionName, + normalizedFile, *file, scatterable, copyRefs)) + return ec; + } + // Create atoms from undefined symbols. + for (auto &sym : normalizedFile.undefinedSymbols) { + // Undefined symbols with n_value != 0 are actually tentative definitions. + if (sym.value == Hex64(0)) { + file->addUndefinedAtom(sym.name, copyRefs); + } else { + file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value, + DefinedAtom::Alignment(1 << (sym.desc >> 8)), + copyRefs); + } + } + + // Convert mach-o relocations to References + std::unique_ptr handler + = ArchHandler::create(normalizedFile.arch); + for (auto § : normalizedFile.sections) { + if (isDebugInfoSection(sect)) + continue; + if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable, + *file, *handler)) + return ec; + } + + // Add additional arch-specific References + file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void { + handler->addAdditionalReferences(*atom); + }); + + // Each __eh_frame section needs references to both __text (the function we're + // providing unwind info for) and itself (FDE -> CIE). These aren't + // represented in the relocations on some architectures, so we have to add + // them back in manually there. + if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler)) + return ec; + + // Process mach-o data-in-code regions array. That information is encoded in + // atoms as References at each transition point. + unsigned nextIndex = 0; + for (const DataInCode &entry : normalizedFile.dataInCode) { + ++nextIndex; + const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset); + if (!s) { + return llvm::make_error(Twine("LC_DATA_IN_CODE address (" + + Twine(entry.offset) + + ") is not in any section")); + } + uint64_t offsetInSect = entry.offset - s->address; + uint32_t offsetInAtom; + MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect, + &offsetInAtom); + if (offsetInAtom + entry.length > atom->size()) { + return llvm::make_error(Twine("LC_DATA_IN_CODE entry " + "(offset=" + + Twine(entry.offset) + + ", length=" + + Twine(entry.length) + + ") crosses atom boundary.")); + } + // Add reference that marks start of data-in-code. + atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), + handler->dataInCodeTransitionStart(*atom), + offsetInAtom, atom, entry.kind); + + // Peek at next entry, if it starts where this one ends, skip ending ref. + if (nextIndex < normalizedFile.dataInCode.size()) { + const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex]; + if (nextEntry.offset == (entry.offset + entry.length)) + continue; + } + + // If data goes to end of function, skip ending ref. + if ((offsetInAtom + entry.length) == atom->size()) + continue; + + // Add reference that marks end of data-in-code. + atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), + handler->dataInCodeTransitionEnd(*atom), + offsetInAtom+entry.length, atom, 0); + } + + // Cache some attributes on the file for use later. + file->setFlags(normalizedFile.flags); + file->setArch(normalizedFile.arch); + file->setOS(normalizedFile.os); + file->setMinVersion(normalizedFile.minOSverson); + file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind); + + // Sort references in each atom to their canonical order. + for (const DefinedAtom* defAtom : file->defined()) { + reinterpret_cast(defAtom)->sortReferences(); + } + + if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs)) + return err; + + return llvm::Error::success(); +} + +llvm::Error +normalizedDylibToAtoms(MachODylibFile *file, + const NormalizedFile &normalizedFile, + bool copyRefs) { + file->setInstallName(normalizedFile.installName); + file->setCompatVersion(normalizedFile.compatVersion); + file->setCurrentVersion(normalizedFile.currentVersion); + + // Tell MachODylibFile object about all symbols it exports. + if (!normalizedFile.exportInfo.empty()) { + // If exports trie exists, use it instead of traditional symbol table. + for (const Export &exp : normalizedFile.exportInfo) { + bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); + // StringRefs from export iterator are ephemeral, so force copy. + file->addExportedSymbol(exp.name, weakDef, true); + } + } else { + for (auto &sym : normalizedFile.globalSymbols) { + assert((sym.scope & N_EXT) && "only expect external symbols here"); + bool weakDef = (sym.desc & N_WEAK_DEF); + file->addExportedSymbol(sym.name, weakDef, copyRefs); + } + } + // Tell MachODylibFile object about all dylibs it re-exports. + for (const DependentDylib &dep : normalizedFile.dependentDylibs) { + if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB) + file->addReExportedDylib(dep.path); + } + return llvm::Error::success(); +} + +void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType, + StringRef &segmentName, + StringRef §ionName, + SectionType §ionType, + SectionAttr §ionAttrs, + bool &relocsToDefinedCanBeImplicit) { + + for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; + p->atomType != DefinedAtom::typeUnknown; ++p) { + if (p->atomType != atomType) + continue; + // Wild carded entries are ignored for reverse lookups. + if (p->segmentName.empty() || p->sectionName.empty()) + continue; + segmentName = p->segmentName; + sectionName = p->sectionName; + sectionType = p->sectionType; + sectionAttrs = 0; + relocsToDefinedCanBeImplicit = false; + if (atomType == DefinedAtom::typeCode) + sectionAttrs = S_ATTR_PURE_INSTRUCTIONS; + if (atomType == DefinedAtom::typeCFI) + relocsToDefinedCanBeImplicit = true; + return; + } + llvm_unreachable("content type not yet supported"); +} + +llvm::Expected> +normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs) { + switch (normalizedFile.fileType) { + case MH_DYLIB: + case MH_DYLIB_STUB: + return dylibToAtoms(normalizedFile, path, copyRefs); + case MH_OBJECT: + return objectToAtoms(normalizedFile, path, copyRefs); + default: + llvm_unreachable("unhandled MachO file type!"); + } +} + +} // namespace normalized +} // namespace mach_o +} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp new file mode 100644 index 000000000000..3826e97d62b9 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp @@ -0,0 +1,840 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// +/// \file For mach-o object files, this implementation uses YAML I/O to +/// provide the convert between YAML and the normalized mach-o (NM). +/// +/// +------------+ +------+ +/// | normalized | <-> | yaml | +/// +------------+ +------+ + +#include "MachONormalizedFile.h" +#include "lld/Common/LLVM.h" +#include "lld/Core/Error.h" +#include "lld/ReaderWriter/YamlContext.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include + +using llvm::StringRef; +using namespace llvm::yaml; +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; +using lld::YamlContext; + +LLVM_YAML_IS_SEQUENCE_VECTOR(Segment) +LLVM_YAML_IS_SEQUENCE_VECTOR(DependentDylib) +LLVM_YAML_IS_SEQUENCE_VECTOR(RebaseLocation) +LLVM_YAML_IS_SEQUENCE_VECTOR(BindLocation) +LLVM_YAML_IS_SEQUENCE_VECTOR(Export) +LLVM_YAML_IS_SEQUENCE_VECTOR(DataInCode) + + +// for compatibility with gcc-4.7 in C++11 mode, add extra namespace +namespace llvm { +namespace yaml { + +// A vector of Sections is a sequence. +template<> +struct SequenceTraits< std::vector
> { + static size_t size(IO &io, std::vector
&seq) { + return seq.size(); + } + static Section& element(IO &io, std::vector
&seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } +}; + +template<> +struct SequenceTraits< std::vector > { + static size_t size(IO &io, std::vector &seq) { + return seq.size(); + } + static Symbol& element(IO &io, std::vector &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } +}; + +// A vector of Relocations is a sequence. +template<> +struct SequenceTraits< Relocations > { + static size_t size(IO &io, Relocations &seq) { + return seq.size(); + } + static Relocation& element(IO &io, Relocations &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } +}; + +// The content for a section is represented as a flow sequence of hex bytes. +template<> +struct SequenceTraits< ContentBytes > { + static size_t size(IO &io, ContentBytes &seq) { + return seq.size(); + } + static Hex8& element(IO &io, ContentBytes &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } + static const bool flow = true; +}; + +// The indirect symbols for a section is represented as a flow sequence +// of numbers (symbol table indexes). +template<> +struct SequenceTraits< IndirectSymbols > { + static size_t size(IO &io, IndirectSymbols &seq) { + return seq.size(); + } + static uint32_t& element(IO &io, IndirectSymbols &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } + static const bool flow = true; +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &io, lld::MachOLinkingContext::Arch &value) { + io.enumCase(value, "unknown",lld::MachOLinkingContext::arch_unknown); + io.enumCase(value, "ppc", lld::MachOLinkingContext::arch_ppc); + io.enumCase(value, "x86", lld::MachOLinkingContext::arch_x86); + io.enumCase(value, "x86_64", lld::MachOLinkingContext::arch_x86_64); + io.enumCase(value, "armv6", lld::MachOLinkingContext::arch_armv6); + io.enumCase(value, "armv7", lld::MachOLinkingContext::arch_armv7); + io.enumCase(value, "armv7s", lld::MachOLinkingContext::arch_armv7s); + io.enumCase(value, "arm64", lld::MachOLinkingContext::arch_arm64); + } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &io, lld::MachOLinkingContext::OS &value) { + io.enumCase(value, "unknown", + lld::MachOLinkingContext::OS::unknown); + io.enumCase(value, "Mac OS X", + lld::MachOLinkingContext::OS::macOSX); + io.enumCase(value, "iOS", + lld::MachOLinkingContext::OS::iOS); + io.enumCase(value, "iOS Simulator", + lld::MachOLinkingContext::OS::iOS_simulator); + } +}; + + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &io, HeaderFileType &value) { + io.enumCase(value, "MH_OBJECT", llvm::MachO::MH_OBJECT); + io.enumCase(value, "MH_DYLIB", llvm::MachO::MH_DYLIB); + io.enumCase(value, "MH_EXECUTE", llvm::MachO::MH_EXECUTE); + io.enumCase(value, "MH_BUNDLE", llvm::MachO::MH_BUNDLE); + } +}; + + +template <> +struct ScalarBitSetTraits { + static void bitset(IO &io, FileFlags &value) { + io.bitSetCase(value, "MH_TWOLEVEL", + llvm::MachO::MH_TWOLEVEL); + io.bitSetCase(value, "MH_SUBSECTIONS_VIA_SYMBOLS", + llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + } +}; + + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &io, SectionType &value) { + io.enumCase(value, "S_REGULAR", + llvm::MachO::S_REGULAR); + io.enumCase(value, "S_ZEROFILL", + llvm::MachO::S_ZEROFILL); + io.enumCase(value, "S_CSTRING_LITERALS", + llvm::MachO::S_CSTRING_LITERALS); + io.enumCase(value, "S_4BYTE_LITERALS", + llvm::MachO::S_4BYTE_LITERALS); + io.enumCase(value, "S_8BYTE_LITERALS", + llvm::MachO::S_8BYTE_LITERALS); + io.enumCase(value, "S_LITERAL_POINTERS", + llvm::MachO::S_LITERAL_POINTERS); + io.enumCase(value, "S_NON_LAZY_SYMBOL_POINTERS", + llvm::MachO::S_NON_LAZY_SYMBOL_POINTERS); + io.enumCase(value, "S_LAZY_SYMBOL_POINTERS", + llvm::MachO::S_LAZY_SYMBOL_POINTERS); + io.enumCase(value, "S_SYMBOL_STUBS", + llvm::MachO::S_SYMBOL_STUBS); + io.enumCase(value, "S_MOD_INIT_FUNC_POINTERS", + llvm::MachO::S_MOD_INIT_FUNC_POINTERS); + io.enumCase(value, "S_MOD_TERM_FUNC_POINTERS", + llvm::MachO::S_MOD_TERM_FUNC_POINTERS); + io.enumCase(value, "S_COALESCED", + llvm::MachO::S_COALESCED); + io.enumCase(value, "S_GB_ZEROFILL", + llvm::MachO::S_GB_ZEROFILL); + io.enumCase(value, "S_INTERPOSING", + llvm::MachO::S_INTERPOSING); + io.enumCase(value, "S_16BYTE_LITERALS", + llvm::MachO::S_16BYTE_LITERALS); + io.enumCase(value, "S_DTRACE_DOF", + llvm::MachO::S_DTRACE_DOF); + io.enumCase(value, "S_LAZY_DYLIB_SYMBOL_POINTERS", + llvm::MachO::S_LAZY_DYLIB_SYMBOL_POINTERS); + io.enumCase(value, "S_THREAD_LOCAL_REGULAR", + llvm::MachO::S_THREAD_LOCAL_REGULAR); + io.enumCase(value, "S_THREAD_LOCAL_ZEROFILL", + llvm::MachO::S_THREAD_LOCAL_ZEROFILL); + io.enumCase(value, "S_THREAD_LOCAL_VARIABLES", + llvm::MachO::S_THREAD_LOCAL_VARIABLES); + io.enumCase(value, "S_THREAD_LOCAL_VARIABLE_POINTERS", + llvm::MachO::S_THREAD_LOCAL_VARIABLE_POINTERS); + io.enumCase(value, "S_THREAD_LOCAL_INIT_FUNCTION_POINTERS", + llvm::MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS); + } +}; + +template <> +struct ScalarBitSetTraits { + static void bitset(IO &io, SectionAttr &value) { + io.bitSetCase(value, "S_ATTR_PURE_INSTRUCTIONS", + llvm::MachO::S_ATTR_PURE_INSTRUCTIONS); + io.bitSetCase(value, "S_ATTR_SOME_INSTRUCTIONS", + llvm::MachO::S_ATTR_SOME_INSTRUCTIONS); + io.bitSetCase(value, "S_ATTR_NO_DEAD_STRIP", + llvm::MachO::S_ATTR_NO_DEAD_STRIP); + io.bitSetCase(value, "S_ATTR_EXT_RELOC", + llvm::MachO::S_ATTR_EXT_RELOC); + io.bitSetCase(value, "S_ATTR_LOC_RELOC", + llvm::MachO::S_ATTR_LOC_RELOC); + io.bitSetCase(value, "S_ATTR_DEBUG", + llvm::MachO::S_ATTR_DEBUG); + } +}; + +/// This is a custom formatter for SectionAlignment. Values are +/// the power to raise by, ie, the n in 2^n. +template <> struct ScalarTraits { + static void output(const SectionAlignment &value, void *ctxt, + raw_ostream &out) { + out << llvm::format("%d", (uint32_t)value); + } + + static StringRef input(StringRef scalar, void *ctxt, + SectionAlignment &value) { + uint32_t alignment; + if (scalar.getAsInteger(0, alignment)) { + return "malformed alignment value"; + } + if (!llvm::isPowerOf2_32(alignment)) + return "alignment must be a power of 2"; + value = alignment; + return StringRef(); // returning empty string means success + } + + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &io, NListType &value) { + io.enumCase(value, "N_UNDF", llvm::MachO::N_UNDF); + io.enumCase(value, "N_ABS", llvm::MachO::N_ABS); + io.enumCase(value, "N_SECT", llvm::MachO::N_SECT); + io.enumCase(value, "N_PBUD", llvm::MachO::N_PBUD); + io.enumCase(value, "N_INDR", llvm::MachO::N_INDR); + } +}; + +template <> +struct ScalarBitSetTraits { + static void bitset(IO &io, SymbolScope &value) { + io.bitSetCase(value, "N_EXT", llvm::MachO::N_EXT); + io.bitSetCase(value, "N_PEXT", llvm::MachO::N_PEXT); + } +}; + +template <> +struct ScalarBitSetTraits { + static void bitset(IO &io, SymbolDesc &value) { + io.bitSetCase(value, "N_NO_DEAD_STRIP", llvm::MachO::N_NO_DEAD_STRIP); + io.bitSetCase(value, "N_WEAK_REF", llvm::MachO::N_WEAK_REF); + io.bitSetCase(value, "N_WEAK_DEF", llvm::MachO::N_WEAK_DEF); + io.bitSetCase(value, "N_ARM_THUMB_DEF", llvm::MachO::N_ARM_THUMB_DEF); + io.bitSetCase(value, "N_SYMBOL_RESOLVER", llvm::MachO::N_SYMBOL_RESOLVER); + } +}; + + +template <> +struct MappingTraits
{ + struct NormalizedContentBytes; + static void mapping(IO &io, Section §) { + io.mapRequired("segment", sect.segmentName); + io.mapRequired("section", sect.sectionName); + io.mapRequired("type", sect.type); + io.mapOptional("attributes", sect.attributes); + io.mapOptional("alignment", sect.alignment, (SectionAlignment)1); + io.mapRequired("address", sect.address); + if (isZeroFillSection(sect.type)) { + // S_ZEROFILL sections use "size:" instead of "content:" + uint64_t size = sect.content.size(); + io.mapOptional("size", size); + if (!io.outputting()) { + uint8_t *bytes = nullptr; + sect.content = makeArrayRef(bytes, size); + } + } else { + MappingNormalization> content( + io, sect.content); + io.mapOptional("content", content->_normalizedContent); + } + io.mapOptional("relocations", sect.relocations); + io.mapOptional("indirect-syms", sect.indirectSymbols); + } + + struct NormalizedContent { + NormalizedContent(IO &io) : _io(io) {} + NormalizedContent(IO &io, ArrayRef content) : _io(io) { + // When writing yaml, copy content byte array to Hex8 vector. + for (auto &c : content) { + _normalizedContent.push_back(c); + } + } + ArrayRef denormalize(IO &io) { + // When reading yaml, allocate byte array owned by NormalizedFile and + // copy Hex8 vector to byte array. + YamlContext *info = reinterpret_cast(io.getContext()); + assert(info != nullptr); + NormalizedFile *file = info->_normalizeMachOFile; + assert(file != nullptr); + size_t size = _normalizedContent.size(); + if (!size) + return None; + uint8_t *bytes = file->ownedAllocations.Allocate(size); + std::copy(_normalizedContent.begin(), _normalizedContent.end(), bytes); + return makeArrayRef(bytes, size); + } + + IO &_io; + ContentBytes _normalizedContent; + }; +}; + + +template <> +struct MappingTraits { + static void mapping(IO &io, Relocation &reloc) { + io.mapRequired("offset", reloc.offset); + io.mapOptional("scattered", reloc.scattered, false); + io.mapRequired("type", reloc.type); + io.mapRequired("length", reloc.length); + io.mapRequired("pc-rel", reloc.pcRel); + if ( !reloc.scattered ) + io.mapRequired("extern", reloc.isExtern); + if ( reloc.scattered ) + io.mapRequired("value", reloc.value); + if ( !reloc.scattered ) + io.mapRequired("symbol", reloc.symbol); + } +}; + + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &io, RelocationInfoType &value) { + YamlContext *info = reinterpret_cast(io.getContext()); + assert(info != nullptr); + NormalizedFile *file = info->_normalizeMachOFile; + assert(file != nullptr); + switch (file->arch) { + case lld::MachOLinkingContext::arch_x86_64: + io.enumCase(value, "X86_64_RELOC_UNSIGNED", + llvm::MachO::X86_64_RELOC_UNSIGNED); + io.enumCase(value, "X86_64_RELOC_SIGNED", + llvm::MachO::X86_64_RELOC_SIGNED); + io.enumCase(value, "X86_64_RELOC_BRANCH", + llvm::MachO::X86_64_RELOC_BRANCH); + io.enumCase(value, "X86_64_RELOC_GOT_LOAD", + llvm::MachO::X86_64_RELOC_GOT_LOAD); + io.enumCase(value, "X86_64_RELOC_GOT", + llvm::MachO::X86_64_RELOC_GOT); + io.enumCase(value, "X86_64_RELOC_SUBTRACTOR", + llvm::MachO::X86_64_RELOC_SUBTRACTOR); + io.enumCase(value, "X86_64_RELOC_SIGNED_1", + llvm::MachO::X86_64_RELOC_SIGNED_1); + io.enumCase(value, "X86_64_RELOC_SIGNED_2", + llvm::MachO::X86_64_RELOC_SIGNED_2); + io.enumCase(value, "X86_64_RELOC_SIGNED_4", + llvm::MachO::X86_64_RELOC_SIGNED_4); + io.enumCase(value, "X86_64_RELOC_TLV", + llvm::MachO::X86_64_RELOC_TLV); + break; + case lld::MachOLinkingContext::arch_x86: + io.enumCase(value, "GENERIC_RELOC_VANILLA", + llvm::MachO::GENERIC_RELOC_VANILLA); + io.enumCase(value, "GENERIC_RELOC_PAIR", + llvm::MachO::GENERIC_RELOC_PAIR); + io.enumCase(value, "GENERIC_RELOC_SECTDIFF", + llvm::MachO::GENERIC_RELOC_SECTDIFF); + io.enumCase(value, "GENERIC_RELOC_LOCAL_SECTDIFF", + llvm::MachO::GENERIC_RELOC_LOCAL_SECTDIFF); + io.enumCase(value, "GENERIC_RELOC_TLV", + llvm::MachO::GENERIC_RELOC_TLV); + break; + case lld::MachOLinkingContext::arch_armv6: + case lld::MachOLinkingContext::arch_armv7: + case lld::MachOLinkingContext::arch_armv7s: + io.enumCase(value, "ARM_RELOC_VANILLA", + llvm::MachO::ARM_RELOC_VANILLA); + io.enumCase(value, "ARM_RELOC_PAIR", + llvm::MachO::ARM_RELOC_PAIR); + io.enumCase(value, "ARM_RELOC_SECTDIFF", + llvm::MachO::ARM_RELOC_SECTDIFF); + io.enumCase(value, "ARM_RELOC_LOCAL_SECTDIFF", + llvm::MachO::ARM_RELOC_LOCAL_SECTDIFF); + io.enumCase(value, "ARM_RELOC_BR24", + llvm::MachO::ARM_RELOC_BR24); + io.enumCase(value, "ARM_THUMB_RELOC_BR22", + llvm::MachO::ARM_THUMB_RELOC_BR22); + io.enumCase(value, "ARM_RELOC_HALF", + llvm::MachO::ARM_RELOC_HALF); + io.enumCase(value, "ARM_RELOC_HALF_SECTDIFF", + llvm::MachO::ARM_RELOC_HALF_SECTDIFF); + break; + case lld::MachOLinkingContext::arch_arm64: + io.enumCase(value, "ARM64_RELOC_UNSIGNED", + llvm::MachO::ARM64_RELOC_UNSIGNED); + io.enumCase(value, "ARM64_RELOC_SUBTRACTOR", + llvm::MachO::ARM64_RELOC_SUBTRACTOR); + io.enumCase(value, "ARM64_RELOC_BRANCH26", + llvm::MachO::ARM64_RELOC_BRANCH26); + io.enumCase(value, "ARM64_RELOC_PAGE21", + llvm::MachO::ARM64_RELOC_PAGE21); + io.enumCase(value, "ARM64_RELOC_PAGEOFF12", + llvm::MachO::ARM64_RELOC_PAGEOFF12); + io.enumCase(value, "ARM64_RELOC_GOT_LOAD_PAGE21", + llvm::MachO::ARM64_RELOC_GOT_LOAD_PAGE21); + io.enumCase(value, "ARM64_RELOC_GOT_LOAD_PAGEOFF12", + llvm::MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12); + io.enumCase(value, "ARM64_RELOC_POINTER_TO_GOT", + llvm::MachO::ARM64_RELOC_POINTER_TO_GOT); + io.enumCase(value, "ARM64_RELOC_TLVP_LOAD_PAGE21", + llvm::MachO::ARM64_RELOC_TLVP_LOAD_PAGE21); + io.enumCase(value, "ARM64_RELOC_TLVP_LOAD_PAGEOFF12", + llvm::MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12); + io.enumCase(value, "ARM64_RELOC_ADDEND", + llvm::MachO::ARM64_RELOC_ADDEND); + break; + default: + llvm_unreachable("unknown architecture"); + } + } +}; + + +template <> +struct MappingTraits { + static void mapping(IO &io, Symbol& sym) { + io.mapRequired("name", sym.name); + io.mapRequired("type", sym.type); + io.mapOptional("scope", sym.scope, SymbolScope(0)); + io.mapOptional("sect", sym.sect, (uint8_t)0); + if (sym.type == llvm::MachO::N_UNDF) { + // In undef symbols, desc field contains alignment/ordinal info + // which is better represented as a hex vaule. + uint16_t t1 = sym.desc; + Hex16 t2 = t1; + io.mapOptional("desc", t2, Hex16(0)); + sym.desc = t2; + } else { + // In defined symbols, desc fit is a set of option bits. + io.mapOptional("desc", sym.desc, SymbolDesc(0)); + } + io.mapRequired("value", sym.value); + } +}; + +// Custom mapping for VMProtect (e.g. "r-x"). +template <> +struct ScalarTraits { + static void output(const VMProtect &value, void*, raw_ostream &out) { + out << ( (value & llvm::MachO::VM_PROT_READ) ? 'r' : '-'); + out << ( (value & llvm::MachO::VM_PROT_WRITE) ? 'w' : '-'); + out << ( (value & llvm::MachO::VM_PROT_EXECUTE) ? 'x' : '-'); + } + static StringRef input(StringRef scalar, void*, VMProtect &value) { + value = 0; + if (scalar.size() != 3) + return "segment access protection must be three chars (e.g. \"r-x\")"; + switch (scalar[0]) { + case 'r': + value = llvm::MachO::VM_PROT_READ; + break; + case '-': + break; + default: + return "segment access protection first char must be 'r' or '-'"; + } + switch (scalar[1]) { + case 'w': + value = value | llvm::MachO::VM_PROT_WRITE; + break; + case '-': + break; + default: + return "segment access protection second char must be 'w' or '-'"; + } + switch (scalar[2]) { + case 'x': + value = value | llvm::MachO::VM_PROT_EXECUTE; + break; + case '-': + break; + default: + return "segment access protection third char must be 'x' or '-'"; + } + // Return the empty string on success, + return StringRef(); + } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; + + +template <> +struct MappingTraits { + static void mapping(IO &io, Segment& seg) { + io.mapRequired("name", seg.name); + io.mapRequired("address", seg.address); + io.mapRequired("size", seg.size); + io.mapRequired("init-access", seg.init_access); + io.mapRequired("max-access", seg.max_access); + } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &io, LoadCommandType &value) { + io.enumCase(value, "LC_LOAD_DYLIB", + llvm::MachO::LC_LOAD_DYLIB); + io.enumCase(value, "LC_LOAD_WEAK_DYLIB", + llvm::MachO::LC_LOAD_WEAK_DYLIB); + io.enumCase(value, "LC_REEXPORT_DYLIB", + llvm::MachO::LC_REEXPORT_DYLIB); + io.enumCase(value, "LC_LOAD_UPWARD_DYLIB", + llvm::MachO::LC_LOAD_UPWARD_DYLIB); + io.enumCase(value, "LC_LAZY_LOAD_DYLIB", + llvm::MachO::LC_LAZY_LOAD_DYLIB); + io.enumCase(value, "LC_VERSION_MIN_MACOSX", + llvm::MachO::LC_VERSION_MIN_MACOSX); + io.enumCase(value, "LC_VERSION_MIN_IPHONEOS", + llvm::MachO::LC_VERSION_MIN_IPHONEOS); + io.enumCase(value, "LC_VERSION_MIN_TVOS", + llvm::MachO::LC_VERSION_MIN_TVOS); + io.enumCase(value, "LC_VERSION_MIN_WATCHOS", + llvm::MachO::LC_VERSION_MIN_WATCHOS); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &io, DependentDylib& dylib) { + io.mapRequired("path", dylib.path); + io.mapOptional("kind", dylib.kind, + llvm::MachO::LC_LOAD_DYLIB); + io.mapOptional("compat-version", dylib.compatVersion, + PackedVersion(0x10000)); + io.mapOptional("current-version", dylib.currentVersion, + PackedVersion(0x10000)); + } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &io, RebaseType &value) { + io.enumCase(value, "REBASE_TYPE_POINTER", + llvm::MachO::REBASE_TYPE_POINTER); + io.enumCase(value, "REBASE_TYPE_TEXT_PCREL32", + llvm::MachO::REBASE_TYPE_TEXT_PCREL32); + io.enumCase(value, "REBASE_TYPE_TEXT_ABSOLUTE32", + llvm::MachO::REBASE_TYPE_TEXT_ABSOLUTE32); + } +}; + + +template <> +struct MappingTraits { + static void mapping(IO &io, RebaseLocation& rebase) { + io.mapRequired("segment-index", rebase.segIndex); + io.mapRequired("segment-offset", rebase.segOffset); + io.mapOptional("kind", rebase.kind, + llvm::MachO::REBASE_TYPE_POINTER); + } +}; + + + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &io, BindType &value) { + io.enumCase(value, "BIND_TYPE_POINTER", + llvm::MachO::BIND_TYPE_POINTER); + io.enumCase(value, "BIND_TYPE_TEXT_ABSOLUTE32", + llvm::MachO::BIND_TYPE_TEXT_ABSOLUTE32); + io.enumCase(value, "BIND_TYPE_TEXT_PCREL32", + llvm::MachO::BIND_TYPE_TEXT_PCREL32); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &io, BindLocation &bind) { + io.mapRequired("segment-index", bind.segIndex); + io.mapRequired("segment-offset", bind.segOffset); + io.mapOptional("kind", bind.kind, + llvm::MachO::BIND_TYPE_POINTER); + io.mapOptional("can-be-null", bind.canBeNull, false); + io.mapRequired("ordinal", bind.ordinal); + io.mapRequired("symbol-name", bind.symbolName); + io.mapOptional("addend", bind.addend, Hex64(0)); + } +}; + + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &io, ExportSymbolKind &value) { + io.enumCase(value, "EXPORT_SYMBOL_FLAGS_KIND_REGULAR", + llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR); + io.enumCase(value, "EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL", + llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL); + io.enumCase(value, "EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE", + llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE); + } +}; + +template <> +struct ScalarBitSetTraits { + static void bitset(IO &io, ExportFlags &value) { + io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION", + llvm::MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); + io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_REEXPORT", + llvm::MachO::EXPORT_SYMBOL_FLAGS_REEXPORT); + io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER", + llvm::MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER); + } +}; + + +template <> +struct MappingTraits { + static void mapping(IO &io, Export &exp) { + io.mapRequired("name", exp.name); + io.mapOptional("offset", exp.offset); + io.mapOptional("kind", exp.kind, + llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR); + if (!io.outputting() || exp.flags) + io.mapOptional("flags", exp.flags); + io.mapOptional("other", exp.otherOffset, Hex32(0)); + io.mapOptional("other-name", exp.otherName, StringRef()); + } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &io, DataRegionType &value) { + io.enumCase(value, "DICE_KIND_DATA", + llvm::MachO::DICE_KIND_DATA); + io.enumCase(value, "DICE_KIND_JUMP_TABLE8", + llvm::MachO::DICE_KIND_JUMP_TABLE8); + io.enumCase(value, "DICE_KIND_JUMP_TABLE16", + llvm::MachO::DICE_KIND_JUMP_TABLE16); + io.enumCase(value, "DICE_KIND_JUMP_TABLE32", + llvm::MachO::DICE_KIND_JUMP_TABLE32); + io.enumCase(value, "DICE_KIND_ABS_JUMP_TABLE32", + llvm::MachO::DICE_KIND_ABS_JUMP_TABLE32); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &io, DataInCode &entry) { + io.mapRequired("offset", entry.offset); + io.mapRequired("length", entry.length); + io.mapRequired("kind", entry.kind); + } +}; + +template <> +struct ScalarTraits { + static void output(const PackedVersion &value, void*, raw_ostream &out) { + out << llvm::format("%d.%d", (value >> 16), (value >> 8) & 0xFF); + if (value & 0xFF) { + out << llvm::format(".%d", (value & 0xFF)); + } + } + static StringRef input(StringRef scalar, void*, PackedVersion &result) { + uint32_t value; + if (lld::MachOLinkingContext::parsePackedVersion(scalar, value)) + return "malformed version number"; + result = value; + // Return the empty string on success, + return StringRef(); + } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; + +template <> +struct MappingTraits { + static void mapping(IO &io, NormalizedFile &file) { + io.mapRequired("arch", file.arch); + io.mapRequired("file-type", file.fileType); + io.mapOptional("flags", file.flags); + io.mapOptional("dependents", file.dependentDylibs); + io.mapOptional("install-name", file.installName, StringRef()); + io.mapOptional("compat-version", file.compatVersion, PackedVersion(0x10000)); + io.mapOptional("current-version", file.currentVersion, PackedVersion(0x10000)); + io.mapOptional("has-UUID", file.hasUUID, true); + io.mapOptional("rpaths", file.rpaths); + io.mapOptional("entry-point", file.entryAddress, Hex64(0)); + io.mapOptional("stack-size", file.stackSize, Hex64(0)); + io.mapOptional("source-version", file.sourceVersion, Hex64(0)); + io.mapOptional("OS", file.os); + io.mapOptional("min-os-version", file.minOSverson, PackedVersion(0)); + io.mapOptional("min-os-version-kind", file.minOSVersionKind, (LoadCommandType)0); + io.mapOptional("sdk-version", file.sdkVersion, PackedVersion(0)); + io.mapOptional("segments", file.segments); + io.mapOptional("sections", file.sections); + io.mapOptional("local-symbols", file.localSymbols); + io.mapOptional("global-symbols", file.globalSymbols); + io.mapOptional("undefined-symbols",file.undefinedSymbols); + io.mapOptional("page-size", file.pageSize, Hex32(4096)); + io.mapOptional("rebasings", file.rebasingInfo); + io.mapOptional("bindings", file.bindingInfo); + io.mapOptional("weak-bindings", file.weakBindingInfo); + io.mapOptional("lazy-bindings", file.lazyBindingInfo); + io.mapOptional("exports", file.exportInfo); + io.mapOptional("dataInCode", file.dataInCode); + } + static std::string validate(IO &io, NormalizedFile &file) { return {}; } +}; + +} // namespace llvm +} // namespace yaml + + +namespace lld { +namespace mach_o { + +/// Handles !mach-o tagged yaml documents. +bool MachOYamlIOTaggedDocumentHandler::handledDocTag(llvm::yaml::IO &io, + const lld::File *&file) const { + if (!io.mapTag("!mach-o")) + return false; + // Step 1: parse yaml into normalized mach-o struct. + NormalizedFile nf; + YamlContext *info = reinterpret_cast(io.getContext()); + assert(info != nullptr); + assert(info->_normalizeMachOFile == nullptr); + info->_normalizeMachOFile = &nf; + MappingTraits::mapping(io, nf); + // Step 2: parse normalized mach-o struct into atoms. + auto fileOrError = normalizedToAtoms(nf, info->_path, true); + + // Check that we parsed successfully. + if (!fileOrError) { + std::string buffer; + llvm::raw_string_ostream stream(buffer); + handleAllErrors(fileOrError.takeError(), + [&](const llvm::ErrorInfoBase &EI) { + EI.log(stream); + stream << "\n"; + }); + io.setError(stream.str()); + return false; + } + + if (nf.arch != _arch) { + io.setError(Twine("file is wrong architecture. Expected (" + + MachOLinkingContext::nameFromArch(_arch) + + ") found (" + + MachOLinkingContext::nameFromArch(nf.arch) + + ")")); + return false; + } + info->_normalizeMachOFile = nullptr; + file = fileOrError->release(); + return true; +} + + + +namespace normalized { + +/// Parses a yaml encoded mach-o file to produce an in-memory normalized view. +llvm::Expected> +readYaml(std::unique_ptr &mb) { + // Make empty NormalizedFile. + std::unique_ptr f(new NormalizedFile()); + + // Create YAML Input parser. + YamlContext yamlContext; + yamlContext._normalizeMachOFile = f.get(); + llvm::yaml::Input yin(mb->getBuffer(), &yamlContext); + + // Fill NormalizedFile by parsing yaml. + yin >> *f; + + // Return error if there were parsing problems. + if (auto ec = yin.error()) + return llvm::make_error(Twine("YAML parsing error: ") + + ec.message()); + + // Hand ownership of instantiated NormalizedFile to caller. + return std::move(f); +} + + +/// Writes a yaml encoded mach-o files from an in-memory normalized view. +std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out) { + // YAML I/O is not const aware, so need to cast away ;-( + NormalizedFile *f = const_cast(&file); + + // Create yaml Output writer, using yaml options for context. + YamlContext yamlContext; + yamlContext._normalizeMachOFile = f; + llvm::yaml::Output yout(out, &yamlContext); + + // Stream out yaml. + yout << *f; + + return std::error_code(); +} + +} // namespace normalized +} // namespace mach_o +} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/MachOPasses.h b/lld/lib/ReaderWriter/MachO/MachOPasses.h new file mode 100644 index 000000000000..93cd3e4df281 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/MachOPasses.h @@ -0,0 +1,29 @@ +//===- lib/ReaderWriter/MachO/MachOPasses.h -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_PASSES_H +#define LLD_READER_WRITER_MACHO_PASSES_H + +#include "lld/Core/PassManager.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" + +namespace lld { +namespace mach_o { + +void addLayoutPass(PassManager &pm, const MachOLinkingContext &ctx); +void addStubsPass(PassManager &pm, const MachOLinkingContext &ctx); +void addGOTPass(PassManager &pm, const MachOLinkingContext &ctx); +void addTLVPass(PassManager &pm, const MachOLinkingContext &ctx); +void addCompactUnwindPass(PassManager &pm, const MachOLinkingContext &ctx); +void addObjCPass(PassManager &pm, const MachOLinkingContext &ctx); +void addShimPass(PassManager &pm, const MachOLinkingContext &ctx); + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_PASSES_H diff --git a/lld/lib/ReaderWriter/MachO/ObjCPass.cpp b/lld/lib/ReaderWriter/MachO/ObjCPass.cpp new file mode 100644 index 000000000000..02a95b5aa0c0 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/ObjCPass.cpp @@ -0,0 +1,131 @@ +//===- lib/ReaderWriter/MachO/ObjCPass.cpp -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "MachOPasses.h" +#include "lld/Common/LLVM.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" + +namespace lld { +namespace mach_o { + +/// +/// ObjC Image Info Atom created by the ObjC pass. +/// +class ObjCImageInfoAtom : public SimpleDefinedAtom { +public: + ObjCImageInfoAtom(const File &file, bool isBig, + MachOLinkingContext::ObjCConstraint objCConstraint, + uint32_t swiftVersion) + : SimpleDefinedAtom(file) { + + Data.info.version = 0; + + switch (objCConstraint) { + case MachOLinkingContext::objc_unknown: + llvm_unreachable("Shouldn't run the objc pass without a constraint"); + case MachOLinkingContext::objc_supports_gc: + case MachOLinkingContext::objc_gc_only: + llvm_unreachable("GC is not supported"); + case MachOLinkingContext::objc_retainReleaseForSimulator: + // The retain/release for simulator flag is already the correct + // encoded value for the data so just set it here. + Data.info.flags = (uint32_t)objCConstraint; + break; + case MachOLinkingContext::objc_retainRelease: + // We don't need to encode this flag, so just leave the flags as 0. + Data.info.flags = 0; + break; + } + + Data.info.flags |= (swiftVersion << 8); + + normalized::write32(Data.bytes + 4, Data.info.flags, isBig); + } + + ~ObjCImageInfoAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeObjCImageInfo; + } + + Alignment alignment() const override { + return 4; + } + + uint64_t size() const override { + return 8; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR__; + } + + ArrayRef rawContent() const override { + return llvm::makeArrayRef(Data.bytes, size()); + } + +private: + + struct objc_image_info { + uint32_t version; + uint32_t flags; + }; + + union { + objc_image_info info; + uint8_t bytes[8]; + } Data; +}; + +class ObjCPass : public Pass { +public: + ObjCPass(const MachOLinkingContext &context) + : _ctx(context), + _file(*_ctx.make_file("")) { + _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); + } + + llvm::Error perform(SimpleFile &mergedFile) override { + // Add the image info. + mergedFile.addAtom(*getImageInfo()); + + return llvm::Error::success(); + } + +private: + + const DefinedAtom* getImageInfo() { + bool IsBig = MachOLinkingContext::isBigEndian(_ctx.arch()); + return new (_file.allocator()) ObjCImageInfoAtom(_file, IsBig, + _ctx.objcConstraint(), + _ctx.swiftVersion()); + } + + const MachOLinkingContext &_ctx; + MachOFile &_file; +}; + + + +void addObjCPass(PassManager &pm, const MachOLinkingContext &ctx) { + pm.add(std::make_unique(ctx)); +} + +} // end namespace mach_o +} // end namespace lld diff --git a/lld/lib/ReaderWriter/MachO/SectCreateFile.h b/lld/lib/ReaderWriter/MachO/SectCreateFile.h new file mode 100644 index 000000000000..7bb98e16695c --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/SectCreateFile.h @@ -0,0 +1,101 @@ +//===---- lib/ReaderWriter/MachO/SectCreateFile.h ---------------*- c++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_SECTCREATE_FILE_H +#define LLD_READER_WRITER_MACHO_SECTCREATE_FILE_H + +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" + +namespace lld { +namespace mach_o { + +// +// A FlateNamespaceFile instance may be added as a resolution source of last +// resort, depending on how -flat_namespace and -undefined are set. +// +class SectCreateFile : public File { +public: + class SectCreateAtom : public SimpleDefinedAtom { + public: + SectCreateAtom(const File &file, StringRef segName, StringRef sectName, + std::unique_ptr content) + : SimpleDefinedAtom(file), + _combinedName((segName + "/" + sectName).str()), + _content(std::move(content)) {} + + ~SectCreateAtom() override = default; + + uint64_t size() const override { return _content->getBufferSize(); } + + Scope scope() const override { return scopeGlobal; } + + ContentType contentType() const override { return typeSectCreate; } + + SectionChoice sectionChoice() const override { return sectionCustomRequired; } + + StringRef customSectionName() const override { return _combinedName; } + + DeadStripKind deadStrip() const override { return deadStripNever; } + + ArrayRef rawContent() const override { + const uint8_t *data = + reinterpret_cast(_content->getBufferStart()); + return ArrayRef(data, _content->getBufferSize()); + } + + StringRef segmentName() const { return _segName; } + StringRef sectionName() const { return _sectName; } + + private: + std::string _combinedName; + StringRef _segName; + StringRef _sectName; + std::unique_ptr _content; + }; + + SectCreateFile() : File("sectcreate", kindSectCreateObject) {} + + void addSection(StringRef seg, StringRef sect, + std::unique_ptr content) { + _definedAtoms.push_back( + new (allocator()) SectCreateAtom(*this, seg, sect, std::move(content))); + } + + const AtomRange defined() const override { + return _definedAtoms; + } + + const AtomRange undefined() const override { + return _noUndefinedAtoms; + } + + const AtomRange sharedLibrary() const override { + return _noSharedLibraryAtoms; + } + + const AtomRange absolute() const override { + return _noAbsoluteAtoms; + } + + void clearAtoms() override { + _definedAtoms.clear(); + _noUndefinedAtoms.clear(); + _noSharedLibraryAtoms.clear(); + _noAbsoluteAtoms.clear(); + } + +private: + AtomVector _definedAtoms; +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_SECTCREATE_FILE_H diff --git a/lld/lib/ReaderWriter/MachO/ShimPass.cpp b/lld/lib/ReaderWriter/MachO/ShimPass.cpp new file mode 100644 index 000000000000..a5b34cfe8de6 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/ShimPass.cpp @@ -0,0 +1,128 @@ +//===- lib/ReaderWriter/MachO/ShimPass.cpp -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This linker pass updates branch-sites whose target is a different mode +// (thumb vs arm). +// +// Arm code has two instruction encodings thumb and arm. When branching from +// one code encoding to another, you need to use an instruction that switches +// the instruction mode. Usually the transition only happens at call sites, and +// the linker can transform a BL instruction in BLX (or vice versa). But if the +// compiler did a tail call optimization and a function ends with a branch (not +// branch and link), there is no pc-rel BX instruction. +// +// The ShimPass looks for pc-rel B instructions that will need to switch mode. +// For those cases it synthesizes a shim which does the transition, then +// modifies the original atom with the B instruction to target to the shim atom. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachOPasses.h" +#include "lld/Common/LLVM.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" + +namespace lld { +namespace mach_o { + +class ShimPass : public Pass { +public: + ShimPass(const MachOLinkingContext &context) + : _ctx(context), _archHandler(_ctx.archHandler()), + _stubInfo(_archHandler.stubInfo()), + _file(*_ctx.make_file("")) { + _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); + } + + llvm::Error perform(SimpleFile &mergedFile) override { + // Scan all references in all atoms. + for (const DefinedAtom *atom : mergedFile.defined()) { + for (const Reference *ref : *atom) { + // Look at non-call branches. + if (!_archHandler.isNonCallBranch(*ref)) + continue; + const Atom *target = ref->target(); + assert(target != nullptr); + if (const lld::DefinedAtom *daTarget = dyn_cast(target)) { + bool atomIsThumb = _archHandler.isThumbFunction(*atom); + bool targetIsThumb = _archHandler.isThumbFunction(*daTarget); + if (atomIsThumb != targetIsThumb) + updateBranchToUseShim(atomIsThumb, *daTarget, ref); + } + } + } + // Exit early if no shims needed. + if (_targetToShim.empty()) + return llvm::Error::success(); + + // Sort shim atoms so the layout order is stable. + std::vector shims; + shims.reserve(_targetToShim.size()); + for (auto element : _targetToShim) { + shims.push_back(element.second); + } + std::sort(shims.begin(), shims.end(), + [](const DefinedAtom *l, const DefinedAtom *r) { + return (l->name() < r->name()); + }); + + // Add all shims to master file. + for (const DefinedAtom *shim : shims) + mergedFile.addAtom(*shim); + + return llvm::Error::success(); + } + +private: + + void updateBranchToUseShim(bool thumbToArm, const DefinedAtom& target, + const Reference *ref) { + // Make file-format specific stub and other support atoms. + const DefinedAtom *shim = this->getShim(thumbToArm, target); + assert(shim != nullptr); + // Switch branch site to target shim atom. + const_cast(ref)->setTarget(shim); + } + + const DefinedAtom* getShim(bool thumbToArm, const DefinedAtom& target) { + auto pos = _targetToShim.find(&target); + if ( pos != _targetToShim.end() ) { + // Reuse an existing shim. + assert(pos->second != nullptr); + return pos->second; + } else { + // There is no existing shim, so create a new one. + const DefinedAtom *shim = _archHandler.createShim(_file, thumbToArm, + target); + _targetToShim[&target] = shim; + return shim; + } + } + + const MachOLinkingContext &_ctx; + mach_o::ArchHandler &_archHandler; + const ArchHandler::StubInfo &_stubInfo; + MachOFile &_file; + llvm::DenseMap _targetToShim; +}; + + + +void addShimPass(PassManager &pm, const MachOLinkingContext &ctx) { + pm.add(std::make_unique(ctx)); +} + +} // end namespace mach_o +} // end namespace lld diff --git a/lld/lib/ReaderWriter/MachO/StubsPass.cpp b/lld/lib/ReaderWriter/MachO/StubsPass.cpp new file mode 100644 index 000000000000..fbbd8b2c7584 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/StubsPass.cpp @@ -0,0 +1,377 @@ +//===- lib/ReaderWriter/MachO/StubsPass.cpp ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This linker pass updates call-sites which have references to shared library +// atoms to instead have a reference to a stub (PLT entry) for the specified +// symbol. Each file format defines a subclass of StubsPass which implements +// the abstract methods for creating the file format specific StubAtoms. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachOPasses.h" +#include "lld/Common/LLVM.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" + +namespace lld { +namespace mach_o { + +// +// Lazy Pointer Atom created by the stubs pass. +// +class LazyPointerAtom : public SimpleDefinedAtom { +public: + LazyPointerAtom(const File &file, bool is64) + : SimpleDefinedAtom(file), _is64(is64) { } + + ~LazyPointerAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeLazyPointer; + } + + Alignment alignment() const override { + return _is64 ? 8 : 4; + } + + uint64_t size() const override { + return _is64 ? 8 : 4; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permRW_; + } + + ArrayRef rawContent() const override { + static const uint8_t zeros[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + return llvm::makeArrayRef(zeros, size()); + } + +private: + const bool _is64; +}; + +// +// NonLazyPointer (GOT) Atom created by the stubs pass. +// +class NonLazyPointerAtom : public SimpleDefinedAtom { +public: + NonLazyPointerAtom(const File &file, bool is64, ContentType contentType) + : SimpleDefinedAtom(file), _is64(is64), _contentType(contentType) { } + + ~NonLazyPointerAtom() override = default; + + ContentType contentType() const override { + return _contentType; + } + + Alignment alignment() const override { + return _is64 ? 8 : 4; + } + + uint64_t size() const override { + return _is64 ? 8 : 4; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permRW_; + } + + ArrayRef rawContent() const override { + static const uint8_t zeros[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + return llvm::makeArrayRef(zeros, size()); + } + +private: + const bool _is64; + const ContentType _contentType; +}; + +// +// Stub Atom created by the stubs pass. +// +class StubAtom : public SimpleDefinedAtom { +public: + StubAtom(const File &file, const ArchHandler::StubInfo &stubInfo) + : SimpleDefinedAtom(file), _stubInfo(stubInfo){ } + + ~StubAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeStub; + } + + Alignment alignment() const override { + return 1 << _stubInfo.codeAlignment; + } + + uint64_t size() const override { + return _stubInfo.stubSize; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef rawContent() const override { + return llvm::makeArrayRef(_stubInfo.stubBytes, _stubInfo.stubSize); + } + +private: + const ArchHandler::StubInfo &_stubInfo; +}; + +// +// Stub Helper Atom created by the stubs pass. +// +class StubHelperAtom : public SimpleDefinedAtom { +public: + StubHelperAtom(const File &file, const ArchHandler::StubInfo &stubInfo) + : SimpleDefinedAtom(file), _stubInfo(stubInfo) { } + + ~StubHelperAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeStubHelper; + } + + Alignment alignment() const override { + return 1 << _stubInfo.codeAlignment; + } + + uint64_t size() const override { + return _stubInfo.stubHelperSize; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef rawContent() const override { + return llvm::makeArrayRef(_stubInfo.stubHelperBytes, + _stubInfo.stubHelperSize); + } + +private: + const ArchHandler::StubInfo &_stubInfo; +}; + +// +// Stub Helper Common Atom created by the stubs pass. +// +class StubHelperCommonAtom : public SimpleDefinedAtom { +public: + StubHelperCommonAtom(const File &file, const ArchHandler::StubInfo &stubInfo) + : SimpleDefinedAtom(file), _stubInfo(stubInfo) { } + + ~StubHelperCommonAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeStubHelper; + } + + Alignment alignment() const override { + return 1 << _stubInfo.stubHelperCommonAlignment; + } + + uint64_t size() const override { + return _stubInfo.stubHelperCommonSize; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef rawContent() const override { + return llvm::makeArrayRef(_stubInfo.stubHelperCommonBytes, + _stubInfo.stubHelperCommonSize); + } + +private: + const ArchHandler::StubInfo &_stubInfo; +}; + +class StubsPass : public Pass { +public: + StubsPass(const MachOLinkingContext &context) + : _ctx(context), _archHandler(_ctx.archHandler()), + _stubInfo(_archHandler.stubInfo()), + _file(*_ctx.make_file("")) { + _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); + } + + llvm::Error perform(SimpleFile &mergedFile) override { + // Skip this pass if output format uses text relocations instead of stubs. + if (!this->noTextRelocs()) + return llvm::Error::success(); + + // Scan all references in all atoms. + for (const DefinedAtom *atom : mergedFile.defined()) { + for (const Reference *ref : *atom) { + // Look at call-sites. + if (!this->isCallSite(*ref)) + continue; + const Atom *target = ref->target(); + assert(target != nullptr); + if (isa(target)) { + // Calls to shared libraries go through stubs. + _targetToUses[target].push_back(ref); + continue; + } + const DefinedAtom *defTarget = dyn_cast(target); + if (defTarget && defTarget->interposable() != DefinedAtom::interposeNo){ + // Calls to interposable functions in same linkage unit must also go + // through a stub. + assert(defTarget->scope() != DefinedAtom::scopeTranslationUnit); + _targetToUses[target].push_back(ref); + } + } + } + + // Exit early if no stubs needed. + if (_targetToUses.empty()) + return llvm::Error::success(); + + // First add help-common and GOT slots used by lazy binding. + SimpleDefinedAtom *helperCommonAtom = + new (_file.allocator()) StubHelperCommonAtom(_file, _stubInfo); + SimpleDefinedAtom *helperCacheNLPAtom = + new (_file.allocator()) NonLazyPointerAtom(_file, _ctx.is64Bit(), + _stubInfo.stubHelperImageCacheContentType); + SimpleDefinedAtom *helperBinderNLPAtom = + new (_file.allocator()) NonLazyPointerAtom(_file, _ctx.is64Bit(), + _stubInfo.stubHelperImageCacheContentType); + addReference(helperCommonAtom, _stubInfo.stubHelperCommonReferenceToCache, + helperCacheNLPAtom); + addOptReference( + helperCommonAtom, _stubInfo.stubHelperCommonReferenceToCache, + _stubInfo.optStubHelperCommonReferenceToCache, helperCacheNLPAtom); + addReference(helperCommonAtom, _stubInfo.stubHelperCommonReferenceToBinder, + helperBinderNLPAtom); + addOptReference( + helperCommonAtom, _stubInfo.stubHelperCommonReferenceToBinder, + _stubInfo.optStubHelperCommonReferenceToBinder, helperBinderNLPAtom); + mergedFile.addAtom(*helperCommonAtom); + mergedFile.addAtom(*helperBinderNLPAtom); + mergedFile.addAtom(*helperCacheNLPAtom); + + // Add reference to dyld_stub_binder in libSystem.dylib + auto I = llvm::find_if( + mergedFile.sharedLibrary(), [&](const SharedLibraryAtom *atom) { + return atom->name().equals(_stubInfo.binderSymbolName); + }); + assert(I != mergedFile.sharedLibrary().end() && + "dyld_stub_binder not found"); + addReference(helperBinderNLPAtom, _stubInfo.nonLazyPointerReferenceToBinder, *I); + + // Sort targets by name, so stubs and lazy pointers are consistent + std::vector targetsNeedingStubs; + for (auto it : _targetToUses) + targetsNeedingStubs.push_back(it.first); + std::sort(targetsNeedingStubs.begin(), targetsNeedingStubs.end(), + [](const Atom * left, const Atom * right) { + return (left->name().compare(right->name()) < 0); + }); + + // Make and append stubs, lazy pointers, and helpers in alphabetical order. + unsigned lazyOffset = 0; + for (const Atom *target : targetsNeedingStubs) { + auto *stub = new (_file.allocator()) StubAtom(_file, _stubInfo); + auto *lp = + new (_file.allocator()) LazyPointerAtom(_file, _ctx.is64Bit()); + auto *helper = new (_file.allocator()) StubHelperAtom(_file, _stubInfo); + + addReference(stub, _stubInfo.stubReferenceToLP, lp); + addOptReference(stub, _stubInfo.stubReferenceToLP, + _stubInfo.optStubReferenceToLP, lp); + addReference(lp, _stubInfo.lazyPointerReferenceToHelper, helper); + addReference(lp, _stubInfo.lazyPointerReferenceToFinal, target); + addReference(helper, _stubInfo.stubHelperReferenceToImm, helper); + addReferenceAddend(helper, _stubInfo.stubHelperReferenceToImm, helper, + lazyOffset); + addReference(helper, _stubInfo.stubHelperReferenceToHelperCommon, + helperCommonAtom); + + mergedFile.addAtom(*stub); + mergedFile.addAtom(*lp); + mergedFile.addAtom(*helper); + + // Update each reference to use stub. + for (const Reference *ref : _targetToUses[target]) { + assert(ref->target() == target); + // Switch call site to reference stub atom instead. + const_cast(ref)->setTarget(stub); + } + + // Calculate new offset + lazyOffset += target->name().size() + 12; + } + + return llvm::Error::success(); + } + +private: + bool noTextRelocs() { + return true; + } + + bool isCallSite(const Reference &ref) { + return _archHandler.isCallSite(ref); + } + + void addReference(SimpleDefinedAtom* atom, + const ArchHandler::ReferenceInfo &refInfo, + const lld::Atom* target) { + atom->addReference(Reference::KindNamespace::mach_o, + refInfo.arch, refInfo.kind, refInfo.offset, + target, refInfo.addend); + } + + void addReferenceAddend(SimpleDefinedAtom *atom, + const ArchHandler::ReferenceInfo &refInfo, + const lld::Atom *target, uint64_t addend) { + atom->addReference(Reference::KindNamespace::mach_o, refInfo.arch, + refInfo.kind, refInfo.offset, target, addend); + } + + void addOptReference(SimpleDefinedAtom* atom, + const ArchHandler::ReferenceInfo &refInfo, + const ArchHandler::OptionalRefInfo &optRef, + const lld::Atom* target) { + if (!optRef.used) + return; + atom->addReference(Reference::KindNamespace::mach_o, + refInfo.arch, optRef.kind, optRef.offset, + target, optRef.addend); + } + + typedef llvm::DenseMap> TargetToUses; + + const MachOLinkingContext &_ctx; + mach_o::ArchHandler &_archHandler; + const ArchHandler::StubInfo &_stubInfo; + MachOFile &_file; + TargetToUses _targetToUses; +}; + +void addStubsPass(PassManager &pm, const MachOLinkingContext &ctx) { + pm.add(std::unique_ptr(new StubsPass(ctx))); +} + +} // end namespace mach_o +} // end namespace lld diff --git a/lld/lib/ReaderWriter/MachO/TLVPass.cpp b/lld/lib/ReaderWriter/MachO/TLVPass.cpp new file mode 100644 index 000000000000..e0a031cfb07b --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/TLVPass.cpp @@ -0,0 +1,140 @@ +//===- lib/ReaderWriter/MachO/TLVPass.cpp -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This linker pass transforms all TLV references to real references. +/// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachOPasses.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" + +namespace lld { +namespace mach_o { + +// +// TLVP Entry Atom created by the TLV pass. +// +class TLVPEntryAtom : public SimpleDefinedAtom { +public: + TLVPEntryAtom(const File &file, bool is64, StringRef name) + : SimpleDefinedAtom(file), _is64(is64), _name(name) {} + + ~TLVPEntryAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeTLVInitializerPtr; + } + + Alignment alignment() const override { + return _is64 ? 8 : 4; + } + + uint64_t size() const override { + return _is64 ? 8 : 4; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permRW_; + } + + ArrayRef rawContent() const override { + static const uint8_t zeros[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + return llvm::makeArrayRef(zeros, size()); + } + + StringRef slotName() const { + return _name; + } + +private: + const bool _is64; + StringRef _name; +}; + +class TLVPass : public Pass { +public: + TLVPass(const MachOLinkingContext &context) + : _ctx(context), _archHandler(_ctx.archHandler()), + _file(*_ctx.make_file("")) { + _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); + } + +private: + llvm::Error perform(SimpleFile &mergedFile) override { + bool allowTLV = _ctx.minOS("10.7", "1.0"); + + for (const DefinedAtom *atom : mergedFile.defined()) { + for (const Reference *ref : *atom) { + if (!_archHandler.isTLVAccess(*ref)) + continue; + + if (!allowTLV) + return llvm::make_error( + "targeted OS version does not support use of thread local " + "variables in " + atom->name() + " for architecture " + + _ctx.archName()); + + const Atom *target = ref->target(); + assert(target != nullptr); + + const DefinedAtom *tlvpEntry = makeTLVPEntry(target); + const_cast(ref)->setTarget(tlvpEntry); + _archHandler.updateReferenceToTLV(ref); + } + } + + std::vector entries; + entries.reserve(_targetToTLVP.size()); + for (auto &it : _targetToTLVP) + entries.push_back(it.second); + std::sort(entries.begin(), entries.end(), + [](const TLVPEntryAtom *lhs, const TLVPEntryAtom *rhs) { + return (lhs->slotName().compare(rhs->slotName()) < 0); + }); + + for (const TLVPEntryAtom *slot : entries) + mergedFile.addAtom(*slot); + + return llvm::Error::success(); + } + + const DefinedAtom *makeTLVPEntry(const Atom *target) { + auto pos = _targetToTLVP.find(target); + + if (pos != _targetToTLVP.end()) + return pos->second; + + auto *tlvpEntry = new (_file.allocator()) + TLVPEntryAtom(_file, _ctx.is64Bit(), target->name()); + _targetToTLVP[target] = tlvpEntry; + const ArchHandler::ReferenceInfo &nlInfo = + _archHandler.stubInfo().nonLazyPointerReferenceToBinder; + tlvpEntry->addReference(Reference::KindNamespace::mach_o, nlInfo.arch, + nlInfo.kind, 0, target, 0); + return tlvpEntry; + } + + const MachOLinkingContext &_ctx; + mach_o::ArchHandler &_archHandler; + MachOFile &_file; + llvm::DenseMap _targetToTLVP; +}; + +void addTLVPass(PassManager &pm, const MachOLinkingContext &ctx) { + assert(ctx.needsTLVPass()); + pm.add(std::make_unique(ctx)); +} + +} // end namespace mach_o +} // end namespace lld diff --git a/lld/lib/ReaderWriter/MachO/WriterMachO.cpp b/lld/lib/ReaderWriter/MachO/WriterMachO.cpp new file mode 100644 index 000000000000..60e0e9dd9a81 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/WriterMachO.cpp @@ -0,0 +1,70 @@ +//===- lib/ReaderWriter/MachO/WriterMachO.cpp -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ExecutableAtoms.h" +#include "MachONormalizedFile.h" +#include "lld/Core/File.h" +#include "lld/Core/Writer.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include + +using lld::mach_o::normalized::NormalizedFile; + +namespace lld { +namespace mach_o { + +class MachOWriter : public Writer { +public: + MachOWriter(const MachOLinkingContext &ctxt) : _ctx(ctxt) {} + + llvm::Error writeFile(const lld::File &file, StringRef path) override { + // Construct empty normalized file from atoms. + llvm::Expected> nFile = + normalized::normalizedFromAtoms(file, _ctx); + if (auto ec = nFile.takeError()) + return ec; + + // For testing, write out yaml form of normalized file. + if (_ctx.printAtoms()) { + std::unique_ptr yamlWriter = createWriterYAML(_ctx); + if (auto ec = yamlWriter->writeFile(file, "-")) + return ec; + } + + // Write normalized file as mach-o binary. + return writeBinary(*nFile->get(), path); + } + + void createImplicitFiles(std::vector> &r) override { + // When building main executables, add _main as required entry point. + if (_ctx.outputTypeHasEntry()) + r.emplace_back(new CEntryFile(_ctx)); + // If this can link with dylibs, need helper function (dyld_stub_binder). + if (_ctx.needsStubsPass()) + r.emplace_back(new StubHelperFile(_ctx)); + // Final linked images can access a symbol for their mach_header. + if (_ctx.outputMachOType() != llvm::MachO::MH_OBJECT) + r.emplace_back(new MachHeaderAliasFile(_ctx)); + } +private: + const MachOLinkingContext &_ctx; + }; + + +} // namespace mach_o + +std::unique_ptr createWriterMachO(const MachOLinkingContext &context) { + return std::unique_ptr(new lld::mach_o::MachOWriter(context)); +} + +} // namespace lld diff --git a/lld/lib/ReaderWriter/YAML/CMakeLists.txt b/lld/lib/ReaderWriter/YAML/CMakeLists.txt new file mode 100644 index 000000000000..0e63574a63d2 --- /dev/null +++ b/lld/lib/ReaderWriter/YAML/CMakeLists.txt @@ -0,0 +1,9 @@ +add_lld_library(lldYAML + ReaderWriterYAML.cpp + + LINK_COMPONENTS + Support + + LINK_LIBS + lldCore + ) diff --git a/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp b/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp new file mode 100644 index 000000000000..c0e6e0334fa6 --- /dev/null +++ b/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp @@ -0,0 +1,1403 @@ +//===- lib/ReaderWriter/YAML/ReaderWriterYAML.cpp -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/AbsoluteAtom.h" +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/Atom.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/Error.h" +#include "lld/Core/File.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Reference.h" +#include "lld/Core/SharedLibraryAtom.h" +#include "lld/Core/Simple.h" +#include "lld/Core/UndefinedAtom.h" +#include "lld/Core/Writer.h" +#include "lld/ReaderWriter/YamlContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include +#include + +using llvm::file_magic; +using llvm::yaml::MappingTraits; +using llvm::yaml::ScalarEnumerationTraits; +using llvm::yaml::ScalarTraits; +using llvm::yaml::IO; +using llvm::yaml::SequenceTraits; +using llvm::yaml::DocumentListTraits; + +using namespace lld; + +/// The conversion of Atoms to and from YAML uses LLVM's YAML I/O. This +/// file just defines template specializations on the lld types which control +/// how the mapping is done to and from YAML. + +namespace { + +/// Used when writing yaml files. +/// In most cases, atoms names are unambiguous, so references can just +/// use the atom name as the target (e.g. target: foo). But in a few +/// cases that does not work, so ref-names are added. These are labels +/// used only in yaml. The labels do not exist in the Atom model. +/// +/// One need for ref-names are when atoms have no user supplied name +/// (e.g. c-string literal). Another case is when two object files with +/// identically named static functions are merged (ld -r) into one object file. +/// In that case referencing the function by name is ambiguous, so a unique +/// ref-name is added. +class RefNameBuilder { +public: + RefNameBuilder(const lld::File &file) + : _collisionCount(0), _unnamedCounter(0) { + // visit all atoms + for (const lld::DefinedAtom *atom : file.defined()) { + // Build map of atoms names to detect duplicates + if (!atom->name().empty()) + buildDuplicateNameMap(*atom); + + // Find references to unnamed atoms and create ref-names for them. + for (const lld::Reference *ref : *atom) { + // create refname for any unnamed reference target + const lld::Atom *target = ref->target(); + if ((target != nullptr) && target->name().empty()) { + std::string storage; + llvm::raw_string_ostream buffer(storage); + buffer << llvm::format("L%03d", _unnamedCounter++); + StringRef newName = copyString(buffer.str()); + _refNames[target] = std::string(newName); + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "unnamed atom: creating ref-name: '" + << newName << "' (" + << (const void *)newName.data() << ", " + << newName.size() << ")\n"); + } + } + } + for (const lld::UndefinedAtom *undefAtom : file.undefined()) { + buildDuplicateNameMap(*undefAtom); + } + for (const lld::SharedLibraryAtom *shlibAtom : file.sharedLibrary()) { + buildDuplicateNameMap(*shlibAtom); + } + for (const lld::AbsoluteAtom *absAtom : file.absolute()) { + if (!absAtom->name().empty()) + buildDuplicateNameMap(*absAtom); + } + } + + void buildDuplicateNameMap(const lld::Atom &atom) { + assert(!atom.name().empty()); + NameToAtom::iterator pos = _nameMap.find(atom.name()); + if (pos != _nameMap.end()) { + // Found name collision, give each a unique ref-name. + std::string Storage; + llvm::raw_string_ostream buffer(Storage); + buffer << atom.name() << llvm::format(".%03d", ++_collisionCount); + StringRef newName = copyString(buffer.str()); + _refNames[&atom] = std::string(newName); + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "name collision: creating ref-name: '" + << newName << "' (" + << (const void *)newName.data() + << ", " << newName.size() << ")\n"); + const lld::Atom *prevAtom = pos->second; + AtomToRefName::iterator pos2 = _refNames.find(prevAtom); + if (pos2 == _refNames.end()) { + // Only create ref-name for previous if none already created. + std::string Storage2; + llvm::raw_string_ostream buffer2(Storage2); + buffer2 << prevAtom->name() << llvm::format(".%03d", ++_collisionCount); + StringRef newName2 = copyString(buffer2.str()); + _refNames[prevAtom] = std::string(newName2); + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "name collision: creating ref-name: '" + << newName2 << "' (" + << (const void *)newName2.data() << ", " + << newName2.size() << ")\n"); + } + } else { + // First time we've seen this name, just add it to map. + _nameMap[atom.name()] = &atom; + DEBUG_WITH_TYPE("WriterYAML", llvm::dbgs() + << "atom name seen for first time: '" + << atom.name() << "' (" + << (const void *)atom.name().data() + << ", " << atom.name().size() << ")\n"); + } + } + + bool hasRefName(const lld::Atom *atom) { return _refNames.count(atom); } + + StringRef refName(const lld::Atom *atom) { + return _refNames.find(atom)->second; + } + +private: + typedef llvm::StringMap NameToAtom; + typedef llvm::DenseMap AtomToRefName; + + // Allocate a new copy of this string in _storage, so the strings + // can be freed when RefNameBuilder is destroyed. + StringRef copyString(StringRef str) { + char *s = _storage.Allocate(str.size()); + memcpy(s, str.data(), str.size()); + return StringRef(s, str.size()); + } + + unsigned int _collisionCount; + unsigned int _unnamedCounter; + NameToAtom _nameMap; + AtomToRefName _refNames; + llvm::BumpPtrAllocator _storage; +}; + +/// Used when reading yaml files to find the target of a reference +/// that could be a name or ref-name. +class RefNameResolver { +public: + RefNameResolver(const lld::File *file, IO &io); + + const lld::Atom *lookup(StringRef name) const { + NameToAtom::const_iterator pos = _nameMap.find(name); + if (pos != _nameMap.end()) + return pos->second; + _io.setError(Twine("no such atom name: ") + name); + return nullptr; + } + +private: + typedef llvm::StringMap NameToAtom; + + void add(StringRef name, const lld::Atom *atom) { + if (_nameMap.count(name)) { + _io.setError(Twine("duplicate atom name: ") + name); + } else { + _nameMap[name] = atom; + } + } + + IO &_io; + NameToAtom _nameMap; +}; + +/// Mapping of Atoms. +template class AtomList { + using Ty = std::vector>; + +public: + typename Ty::iterator begin() { return _atoms.begin(); } + typename Ty::iterator end() { return _atoms.end(); } + Ty _atoms; +}; + +/// Mapping of kind: field in yaml files. +enum FileKinds { + fileKindObjectAtoms, // atom based object file encoded in yaml + fileKindArchive, // static archive library encoded in yaml + fileKindObjectMachO // mach-o object files encoded in yaml +}; + +struct ArchMember { + FileKinds _kind; + StringRef _name; + const lld::File *_content; +}; + +// The content bytes in a DefinedAtom are just uint8_t but we want +// special formatting, so define a strong type. +LLVM_YAML_STRONG_TYPEDEF(uint8_t, ImplicitHex8) + +// SharedLibraryAtoms have a bool canBeNull() method which we'd like to be +// more readable than just true/false. +LLVM_YAML_STRONG_TYPEDEF(bool, ShlibCanBeNull) + +// lld::Reference::Kind is a tuple of . +// For yaml, we just want one string that encapsulates the tuple. +struct RefKind { + Reference::KindNamespace ns; + Reference::KindArch arch; + Reference::KindValue value; +}; + +} // end anonymous namespace + +LLVM_YAML_IS_SEQUENCE_VECTOR(ArchMember) +LLVM_YAML_IS_SEQUENCE_VECTOR(const lld::Reference *) +// Always write DefinedAtoms content bytes as a flow sequence. +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(ImplicitHex8) + +// for compatibility with gcc-4.7 in C++11 mode, add extra namespace +namespace llvm { +namespace yaml { + +// This is a custom formatter for RefKind +template <> struct ScalarTraits { + static void output(const RefKind &kind, void *ctxt, raw_ostream &out) { + assert(ctxt != nullptr); + YamlContext *info = reinterpret_cast(ctxt); + assert(info->_registry); + StringRef str; + if (info->_registry->referenceKindToString(kind.ns, kind.arch, kind.value, + str)) + out << str; + else + out << (int)(kind.ns) << "-" << (int)(kind.arch) << "-" << kind.value; + } + + static StringRef input(StringRef scalar, void *ctxt, RefKind &kind) { + assert(ctxt != nullptr); + YamlContext *info = reinterpret_cast(ctxt); + assert(info->_registry); + if (info->_registry->referenceKindFromString(scalar, kind.ns, kind.arch, + kind.value)) + return StringRef(); + return StringRef("unknown reference kind"); + } + + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &io, lld::File::Kind &value) { + io.enumCase(value, "error-object", lld::File::kindErrorObject); + io.enumCase(value, "object", lld::File::kindMachObject); + io.enumCase(value, "shared-library", lld::File::kindSharedLibrary); + io.enumCase(value, "static-library", lld::File::kindArchiveLibrary); + } +}; + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &io, lld::Atom::Scope &value) { + io.enumCase(value, "global", lld::Atom::scopeGlobal); + io.enumCase(value, "hidden", lld::Atom::scopeLinkageUnit); + io.enumCase(value, "static", lld::Atom::scopeTranslationUnit); + } +}; + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &io, lld::DefinedAtom::SectionChoice &value) { + io.enumCase(value, "content", lld::DefinedAtom::sectionBasedOnContent); + io.enumCase(value, "custom", lld::DefinedAtom::sectionCustomPreferred); + io.enumCase(value, "custom-required", + lld::DefinedAtom::sectionCustomRequired); + } +}; + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &io, lld::DefinedAtom::Interposable &value) { + io.enumCase(value, "no", DefinedAtom::interposeNo); + io.enumCase(value, "yes", DefinedAtom::interposeYes); + io.enumCase(value, "yes-and-weak", DefinedAtom::interposeYesAndRuntimeWeak); + } +}; + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &io, lld::DefinedAtom::Merge &value) { + io.enumCase(value, "no", lld::DefinedAtom::mergeNo); + io.enumCase(value, "as-tentative", lld::DefinedAtom::mergeAsTentative); + io.enumCase(value, "as-weak", lld::DefinedAtom::mergeAsWeak); + io.enumCase(value, "as-addressed-weak", + lld::DefinedAtom::mergeAsWeakAndAddressUsed); + io.enumCase(value, "by-content", lld::DefinedAtom::mergeByContent); + io.enumCase(value, "same-name-and-size", + lld::DefinedAtom::mergeSameNameAndSize); + io.enumCase(value, "largest", lld::DefinedAtom::mergeByLargestSection); + } +}; + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &io, lld::DefinedAtom::DeadStripKind &value) { + io.enumCase(value, "normal", lld::DefinedAtom::deadStripNormal); + io.enumCase(value, "never", lld::DefinedAtom::deadStripNever); + io.enumCase(value, "always", lld::DefinedAtom::deadStripAlways); + } +}; + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &io, lld::DefinedAtom::DynamicExport &value) { + io.enumCase(value, "normal", lld::DefinedAtom::dynamicExportNormal); + io.enumCase(value, "always", lld::DefinedAtom::dynamicExportAlways); + } +}; + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &io, lld::DefinedAtom::CodeModel &value) { + io.enumCase(value, "none", lld::DefinedAtom::codeNA); + io.enumCase(value, "mips-pic", lld::DefinedAtom::codeMipsPIC); + io.enumCase(value, "mips-micro", lld::DefinedAtom::codeMipsMicro); + io.enumCase(value, "mips-micro-pic", lld::DefinedAtom::codeMipsMicroPIC); + io.enumCase(value, "mips-16", lld::DefinedAtom::codeMips16); + io.enumCase(value, "arm-thumb", lld::DefinedAtom::codeARMThumb); + io.enumCase(value, "arm-a", lld::DefinedAtom::codeARM_a); + io.enumCase(value, "arm-d", lld::DefinedAtom::codeARM_d); + io.enumCase(value, "arm-t", lld::DefinedAtom::codeARM_t); + } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &io, lld::DefinedAtom::ContentPermissions &value) { + io.enumCase(value, "---", lld::DefinedAtom::perm___); + io.enumCase(value, "r--", lld::DefinedAtom::permR__); + io.enumCase(value, "r-x", lld::DefinedAtom::permR_X); + io.enumCase(value, "rw-", lld::DefinedAtom::permRW_); + io.enumCase(value, "rwx", lld::DefinedAtom::permRWX); + io.enumCase(value, "rw-l", lld::DefinedAtom::permRW_L); + io.enumCase(value, "unknown", lld::DefinedAtom::permUnknown); + } +}; + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &io, lld::DefinedAtom::ContentType &value) { + io.enumCase(value, "unknown", DefinedAtom::typeUnknown); + io.enumCase(value, "code", DefinedAtom::typeCode); + io.enumCase(value, "stub", DefinedAtom::typeStub); + io.enumCase(value, "constant", DefinedAtom::typeConstant); + io.enumCase(value, "data", DefinedAtom::typeData); + io.enumCase(value, "quick-data", DefinedAtom::typeDataFast); + io.enumCase(value, "zero-fill", DefinedAtom::typeZeroFill); + io.enumCase(value, "zero-fill-quick", DefinedAtom::typeZeroFillFast); + io.enumCase(value, "const-data", DefinedAtom::typeConstData); + io.enumCase(value, "got", DefinedAtom::typeGOT); + io.enumCase(value, "resolver", DefinedAtom::typeResolver); + io.enumCase(value, "branch-island", DefinedAtom::typeBranchIsland); + io.enumCase(value, "branch-shim", DefinedAtom::typeBranchShim); + io.enumCase(value, "stub-helper", DefinedAtom::typeStubHelper); + io.enumCase(value, "c-string", DefinedAtom::typeCString); + io.enumCase(value, "utf16-string", DefinedAtom::typeUTF16String); + io.enumCase(value, "unwind-cfi", DefinedAtom::typeCFI); + io.enumCase(value, "unwind-lsda", DefinedAtom::typeLSDA); + io.enumCase(value, "const-4-byte", DefinedAtom::typeLiteral4); + io.enumCase(value, "const-8-byte", DefinedAtom::typeLiteral8); + io.enumCase(value, "const-16-byte", DefinedAtom::typeLiteral16); + io.enumCase(value, "lazy-pointer", DefinedAtom::typeLazyPointer); + io.enumCase(value, "lazy-dylib-pointer", + DefinedAtom::typeLazyDylibPointer); + io.enumCase(value, "cfstring", DefinedAtom::typeCFString); + io.enumCase(value, "initializer-pointer", + DefinedAtom::typeInitializerPtr); + io.enumCase(value, "terminator-pointer", + DefinedAtom::typeTerminatorPtr); + io.enumCase(value, "c-string-pointer",DefinedAtom::typeCStringPtr); + io.enumCase(value, "objc-class-pointer", + DefinedAtom::typeObjCClassPtr); + io.enumCase(value, "objc-category-list", + DefinedAtom::typeObjC2CategoryList); + io.enumCase(value, "objc-image-info", + DefinedAtom::typeObjCImageInfo); + io.enumCase(value, "objc-method-list", + DefinedAtom::typeObjCMethodList); + io.enumCase(value, "objc-class1", DefinedAtom::typeObjC1Class); + io.enumCase(value, "dtraceDOF", DefinedAtom::typeDTraceDOF); + io.enumCase(value, "interposing-tuples", + DefinedAtom::typeInterposingTuples); + io.enumCase(value, "lto-temp", DefinedAtom::typeTempLTO); + io.enumCase(value, "compact-unwind", DefinedAtom::typeCompactUnwindInfo); + io.enumCase(value, "unwind-info", DefinedAtom::typeProcessedUnwindInfo); + io.enumCase(value, "tlv-thunk", DefinedAtom::typeThunkTLV); + io.enumCase(value, "tlv-data", DefinedAtom::typeTLVInitialData); + io.enumCase(value, "tlv-zero-fill", DefinedAtom::typeTLVInitialZeroFill); + io.enumCase(value, "tlv-initializer-ptr", + DefinedAtom::typeTLVInitializerPtr); + io.enumCase(value, "mach_header", DefinedAtom::typeMachHeader); + io.enumCase(value, "dso_handle", DefinedAtom::typeDSOHandle); + io.enumCase(value, "sectcreate", DefinedAtom::typeSectCreate); + } +}; + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &io, lld::UndefinedAtom::CanBeNull &value) { + io.enumCase(value, "never", lld::UndefinedAtom::canBeNullNever); + io.enumCase(value, "at-runtime", lld::UndefinedAtom::canBeNullAtRuntime); + io.enumCase(value, "at-buildtime",lld::UndefinedAtom::canBeNullAtBuildtime); + } +}; + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &io, ShlibCanBeNull &value) { + io.enumCase(value, "never", false); + io.enumCase(value, "at-runtime", true); + } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &io, lld::SharedLibraryAtom::Type &value) { + io.enumCase(value, "code", lld::SharedLibraryAtom::Type::Code); + io.enumCase(value, "data", lld::SharedLibraryAtom::Type::Data); + io.enumCase(value, "unknown", lld::SharedLibraryAtom::Type::Unknown); + } +}; + +/// This is a custom formatter for lld::DefinedAtom::Alignment. Values look +/// like: +/// 8 # 8-byte aligned +/// 7 mod 16 # 16-byte aligned plus 7 bytes +template <> struct ScalarTraits { + static void output(const lld::DefinedAtom::Alignment &value, void *ctxt, + raw_ostream &out) { + if (value.modulus == 0) { + out << llvm::format("%d", value.value); + } else { + out << llvm::format("%d mod %d", value.modulus, value.value); + } + } + + static StringRef input(StringRef scalar, void *ctxt, + lld::DefinedAtom::Alignment &value) { + value.modulus = 0; + size_t modStart = scalar.find("mod"); + if (modStart != StringRef::npos) { + StringRef modStr = scalar.slice(0, modStart); + modStr = modStr.rtrim(); + unsigned int modulus; + if (modStr.getAsInteger(0, modulus)) { + return "malformed alignment modulus"; + } + value.modulus = modulus; + scalar = scalar.drop_front(modStart + 3); + scalar = scalar.ltrim(); + } + unsigned int power; + if (scalar.getAsInteger(0, power)) { + return "malformed alignment power"; + } + value.value = power; + if (value.modulus >= power) { + return "malformed alignment, modulus too large for power"; + } + return StringRef(); // returning empty string means success + } + + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &io, FileKinds &value) { + io.enumCase(value, "object", fileKindObjectAtoms); + io.enumCase(value, "archive", fileKindArchive); + io.enumCase(value, "object-mach-o", fileKindObjectMachO); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &io, ArchMember &member) { + io.mapOptional("kind", member._kind, fileKindObjectAtoms); + io.mapOptional("name", member._name); + io.mapRequired("content", member._content); + } +}; + +// Declare that an AtomList is a yaml sequence. +template struct SequenceTraits > { + static size_t size(IO &io, AtomList &seq) { return seq._atoms.size(); } + static T *&element(IO &io, AtomList &seq, size_t index) { + if (index >= seq._atoms.size()) + seq._atoms.resize(index + 1); + return seq._atoms[index].get(); + } +}; + +// Declare that an AtomRange is a yaml sequence. +template struct SequenceTraits > { + static size_t size(IO &io, File::AtomRange &seq) { return seq.size(); } + static T *&element(IO &io, File::AtomRange &seq, size_t index) { + assert(io.outputting() && "AtomRange only used when outputting"); + assert(index < seq.size() && "Out of range access"); + return seq[index].get(); + } +}; + +// Used to allow DefinedAtom content bytes to be a flow sequence of +// two-digit hex numbers without the leading 0x (e.g. FF, 04, 0A) +template <> struct ScalarTraits { + static void output(const ImplicitHex8 &val, void *, raw_ostream &out) { + uint8_t num = val; + out << llvm::format("%02X", num); + } + + static StringRef input(StringRef str, void *, ImplicitHex8 &val) { + unsigned long long n; + if (getAsUnsignedInteger(str, 16, n)) + return "invalid two-digit-hex number"; + if (n > 0xFF) + return "out of range two-digit-hex number"; + val = n; + return StringRef(); // returning empty string means success + } + + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; + +// YAML conversion for std::vector +template <> struct DocumentListTraits > { + static size_t size(IO &io, std::vector &seq) { + return seq.size(); + } + static const lld::File *&element(IO &io, std::vector &seq, + size_t index) { + if (index >= seq.size()) + seq.resize(index + 1); + return seq[index]; + } +}; + +// YAML conversion for const lld::File* +template <> struct MappingTraits { + class NormArchiveFile : public lld::ArchiveLibraryFile { + public: + NormArchiveFile(IO &io) : ArchiveLibraryFile("") {} + + NormArchiveFile(IO &io, const lld::File *file) + : ArchiveLibraryFile(file->path()), _path(file->path()) { + // If we want to support writing archives, this constructor would + // need to populate _members. + } + + const lld::File *denormalize(IO &io) { return this; } + + const AtomRange defined() const override { + return _noDefinedAtoms; + } + + const AtomRange undefined() const override { + return _noUndefinedAtoms; + } + + const AtomRange sharedLibrary() const override { + return _noSharedLibraryAtoms; + } + + const AtomRange absolute() const override { + return _noAbsoluteAtoms; + } + + void clearAtoms() override { + _noDefinedAtoms.clear(); + _noUndefinedAtoms.clear(); + _noSharedLibraryAtoms.clear(); + _noAbsoluteAtoms.clear(); + } + + File *find(StringRef name) override { + for (const ArchMember &member : _members) + for (const lld::DefinedAtom *atom : member._content->defined()) + if (name == atom->name()) + return const_cast(member._content); + return nullptr; + } + + std::error_code + parseAllMembers(std::vector> &result) override { + return std::error_code(); + } + + StringRef _path; + std::vector _members; + }; + + class NormalizedFile : public lld::File { + public: + NormalizedFile(IO &io) + : File("", kindNormalizedObject), _io(io), _rnb(nullptr), + _definedAtomsRef(_definedAtoms._atoms), + _undefinedAtomsRef(_undefinedAtoms._atoms), + _sharedLibraryAtomsRef(_sharedLibraryAtoms._atoms), + _absoluteAtomsRef(_absoluteAtoms._atoms) {} + + NormalizedFile(IO &io, const lld::File *file) + : File(file->path(), kindNormalizedObject), _io(io), + _rnb(new RefNameBuilder(*file)), _path(file->path()), + _definedAtomsRef(file->defined()), + _undefinedAtomsRef(file->undefined()), + _sharedLibraryAtomsRef(file->sharedLibrary()), + _absoluteAtomsRef(file->absolute()) { + } + + ~NormalizedFile() override { + } + + const lld::File *denormalize(IO &io); + + const AtomRange defined() const override { + return _definedAtomsRef; + } + + const AtomRange undefined() const override { + return _undefinedAtomsRef; + } + + const AtomRange sharedLibrary() const override { + return _sharedLibraryAtomsRef; + } + + const AtomRange absolute() const override { + return _absoluteAtomsRef; + } + + void clearAtoms() override { + _definedAtoms._atoms.clear(); + _undefinedAtoms._atoms.clear(); + _sharedLibraryAtoms._atoms.clear(); + _absoluteAtoms._atoms.clear(); + } + + // Allocate a new copy of this string in _storage, so the strings + // can be freed when File is destroyed. + StringRef copyString(StringRef str) { + char *s = _storage.Allocate(str.size()); + memcpy(s, str.data(), str.size()); + return StringRef(s, str.size()); + } + + IO &_io; + std::unique_ptr _rnb; + StringRef _path; + AtomList _definedAtoms; + AtomList _undefinedAtoms; + AtomList _sharedLibraryAtoms; + AtomList _absoluteAtoms; + AtomRange _definedAtomsRef; + AtomRange _undefinedAtomsRef; + AtomRange _sharedLibraryAtomsRef; + AtomRange _absoluteAtomsRef; + llvm::BumpPtrAllocator _storage; + }; + + static void mapping(IO &io, const lld::File *&file) { + YamlContext *info = reinterpret_cast(io.getContext()); + assert(info != nullptr); + // Let any register tag handler process this. + if (info->_registry && info->_registry->handleTaggedDoc(io, file)) + return; + // If no registered handler claims this tag and there is no tag, + // grandfather in as "!native". + if (io.mapTag("!native", true) || io.mapTag("tag:yaml.org,2002:map")) + mappingAtoms(io, file); + } + + static void mappingAtoms(IO &io, const lld::File *&file) { + YamlContext *info = reinterpret_cast(io.getContext()); + MappingNormalizationHeap + keys(io, file, nullptr); + assert(info != nullptr); + info->_file = keys.operator->(); + + io.mapOptional("path", keys->_path); + + if (io.outputting()) { + io.mapOptional("defined-atoms", keys->_definedAtomsRef); + io.mapOptional("undefined-atoms", keys->_undefinedAtomsRef); + io.mapOptional("shared-library-atoms", keys->_sharedLibraryAtomsRef); + io.mapOptional("absolute-atoms", keys->_absoluteAtomsRef); + } else { + io.mapOptional("defined-atoms", keys->_definedAtoms); + io.mapOptional("undefined-atoms", keys->_undefinedAtoms); + io.mapOptional("shared-library-atoms", keys->_sharedLibraryAtoms); + io.mapOptional("absolute-atoms", keys->_absoluteAtoms); + } + } + + static void mappingArchive(IO &io, const lld::File *&file) { + YamlContext *info = reinterpret_cast(io.getContext()); + MappingNormalizationHeap + keys(io, file, &info->_file->allocator()); + + io.mapOptional("path", keys->_path); + io.mapOptional("members", keys->_members); + } +}; + +// YAML conversion for const lld::Reference* +template <> struct MappingTraits { + class NormalizedReference : public lld::Reference { + public: + NormalizedReference(IO &io) + : lld::Reference(lld::Reference::KindNamespace::all, + lld::Reference::KindArch::all, 0), + _target(nullptr), _offset(0), _addend(0), _tag(0) {} + + NormalizedReference(IO &io, const lld::Reference *ref) + : lld::Reference(ref->kindNamespace(), ref->kindArch(), + ref->kindValue()), + _target(nullptr), _targetName(targetName(io, ref)), + _offset(ref->offsetInAtom()), _addend(ref->addend()), + _tag(ref->tag()) { + _mappedKind.ns = ref->kindNamespace(); + _mappedKind.arch = ref->kindArch(); + _mappedKind.value = ref->kindValue(); + } + + const lld::Reference *denormalize(IO &io) { + YamlContext *info = reinterpret_cast(io.getContext()); + assert(info != nullptr); + typedef MappingTraits::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast(info->_file); + if (!_targetName.empty()) + _targetName = f->copyString(_targetName); + DEBUG_WITH_TYPE("WriterYAML", llvm::dbgs() + << "created Reference to name: '" + << _targetName << "' (" + << (const void *)_targetName.data() + << ", " << _targetName.size() << ")\n"); + setKindNamespace(_mappedKind.ns); + setKindArch(_mappedKind.arch); + setKindValue(_mappedKind.value); + return this; + } + + void bind(const RefNameResolver &); + static StringRef targetName(IO &io, const lld::Reference *ref); + + uint64_t offsetInAtom() const override { return _offset; } + const lld::Atom *target() const override { return _target; } + Addend addend() const override { return _addend; } + void setAddend(Addend a) override { _addend = a; } + void setTarget(const lld::Atom *a) override { _target = a; } + + const lld::Atom *_target; + StringRef _targetName; + uint32_t _offset; + Addend _addend; + RefKind _mappedKind; + uint32_t _tag; + }; + + static void mapping(IO &io, const lld::Reference *&ref) { + YamlContext *info = reinterpret_cast(io.getContext()); + MappingNormalizationHeap keys( + io, ref, &info->_file->allocator()); + + io.mapRequired("kind", keys->_mappedKind); + io.mapOptional("offset", keys->_offset); + io.mapOptional("target", keys->_targetName); + io.mapOptional("addend", keys->_addend, (lld::Reference::Addend)0); + io.mapOptional("tag", keys->_tag, 0u); + } +}; + +// YAML conversion for const lld::DefinedAtom* +template <> struct MappingTraits { + + class NormalizedAtom : public lld::DefinedAtom { + public: + NormalizedAtom(IO &io) + : _file(fileFromContext(io)), _contentType(), _alignment(1) { + static uint32_t ordinalCounter = 1; + _ordinal = ordinalCounter++; + } + + NormalizedAtom(IO &io, const lld::DefinedAtom *atom) + : _file(fileFromContext(io)), _name(atom->name()), + _scope(atom->scope()), _interpose(atom->interposable()), + _merge(atom->merge()), _contentType(atom->contentType()), + _alignment(atom->alignment()), _sectionChoice(atom->sectionChoice()), + _deadStrip(atom->deadStrip()), _dynamicExport(atom->dynamicExport()), + _codeModel(atom->codeModel()), + _permissions(atom->permissions()), _size(atom->size()), + _sectionName(atom->customSectionName()), + _sectionSize(atom->sectionSize()) { + for (const lld::Reference *r : *atom) + _references.push_back(r); + if (!atom->occupiesDiskSpace()) + return; + ArrayRef cont = atom->rawContent(); + _content.reserve(cont.size()); + for (uint8_t x : cont) + _content.push_back(x); + } + + ~NormalizedAtom() override = default; + + const lld::DefinedAtom *denormalize(IO &io) { + YamlContext *info = reinterpret_cast(io.getContext()); + assert(info != nullptr); + typedef MappingTraits::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast(info->_file); + if (!_name.empty()) + _name = f->copyString(_name); + if (!_refName.empty()) + _refName = f->copyString(_refName); + if (!_sectionName.empty()) + _sectionName = f->copyString(_sectionName); + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "created DefinedAtom named: '" << _name + << "' (" << (const void *)_name.data() + << ", " << _name.size() << ")\n"); + return this; + } + + void bind(const RefNameResolver &); + + // Extract current File object from YAML I/O parsing context + const lld::File &fileFromContext(IO &io) { + YamlContext *info = reinterpret_cast(io.getContext()); + assert(info != nullptr); + assert(info->_file != nullptr); + return *info->_file; + } + + const lld::File &file() const override { return _file; } + StringRef name() const override { return _name; } + uint64_t size() const override { return _size; } + Scope scope() const override { return _scope; } + Interposable interposable() const override { return _interpose; } + Merge merge() const override { return _merge; } + ContentType contentType() const override { return _contentType; } + Alignment alignment() const override { return _alignment; } + SectionChoice sectionChoice() const override { return _sectionChoice; } + StringRef customSectionName() const override { return _sectionName; } + uint64_t sectionSize() const override { return _sectionSize; } + DeadStripKind deadStrip() const override { return _deadStrip; } + DynamicExport dynamicExport() const override { return _dynamicExport; } + CodeModel codeModel() const override { return _codeModel; } + ContentPermissions permissions() const override { return _permissions; } + ArrayRef rawContent() const override { + if (!occupiesDiskSpace()) + return ArrayRef(); + return ArrayRef( + reinterpret_cast(_content.data()), _content.size()); + } + + uint64_t ordinal() const override { return _ordinal; } + + reference_iterator begin() const override { + uintptr_t index = 0; + const void *it = reinterpret_cast(index); + return reference_iterator(*this, it); + } + reference_iterator end() const override { + uintptr_t index = _references.size(); + const void *it = reinterpret_cast(index); + return reference_iterator(*this, it); + } + const lld::Reference *derefIterator(const void *it) const override { + uintptr_t index = reinterpret_cast(it); + assert(index < _references.size()); + return _references[index]; + } + void incrementIterator(const void *&it) const override { + uintptr_t index = reinterpret_cast(it); + ++index; + it = reinterpret_cast(index); + } + + void addReference(Reference::KindNamespace ns, + Reference::KindArch arch, + Reference::KindValue kindValue, uint64_t off, + const Atom *target, Reference::Addend a) override { + assert(target && "trying to create reference to nothing"); + auto node = new (file().allocator()) SimpleReference(ns, arch, kindValue, + off, target, a); + _references.push_back(node); + } + + const lld::File &_file; + StringRef _name; + StringRef _refName; + Scope _scope; + Interposable _interpose; + Merge _merge; + ContentType _contentType; + Alignment _alignment; + SectionChoice _sectionChoice; + DeadStripKind _deadStrip; + DynamicExport _dynamicExport; + CodeModel _codeModel; + ContentPermissions _permissions; + uint32_t _ordinal; + std::vector _content; + uint64_t _size; + StringRef _sectionName; + uint64_t _sectionSize; + std::vector _references; + }; + + static void mapping(IO &io, const lld::DefinedAtom *&atom) { + YamlContext *info = reinterpret_cast(io.getContext()); + MappingNormalizationHeap keys( + io, atom, &info->_file->allocator()); + if (io.outputting()) { + // If writing YAML, check if atom needs a ref-name. + typedef MappingTraits::NormalizedFile NormalizedFile; + assert(info != nullptr); + NormalizedFile *f = reinterpret_cast(info->_file); + assert(f); + assert(f->_rnb); + if (f->_rnb->hasRefName(atom)) { + keys->_refName = f->_rnb->refName(atom); + } + } + + io.mapOptional("name", keys->_name, StringRef()); + io.mapOptional("ref-name", keys->_refName, StringRef()); + io.mapOptional("scope", keys->_scope, + DefinedAtom::scopeTranslationUnit); + io.mapOptional("type", keys->_contentType, + DefinedAtom::typeCode); + io.mapOptional("content", keys->_content); + io.mapOptional("size", keys->_size, (uint64_t)keys->_content.size()); + io.mapOptional("interposable", keys->_interpose, + DefinedAtom::interposeNo); + io.mapOptional("merge", keys->_merge, DefinedAtom::mergeNo); + io.mapOptional("alignment", keys->_alignment, + DefinedAtom::Alignment(1)); + io.mapOptional("section-choice", keys->_sectionChoice, + DefinedAtom::sectionBasedOnContent); + io.mapOptional("section-name", keys->_sectionName, StringRef()); + io.mapOptional("section-size", keys->_sectionSize, (uint64_t)0); + io.mapOptional("dead-strip", keys->_deadStrip, + DefinedAtom::deadStripNormal); + io.mapOptional("dynamic-export", keys->_dynamicExport, + DefinedAtom::dynamicExportNormal); + io.mapOptional("code-model", keys->_codeModel, DefinedAtom::codeNA); + // default permissions based on content type + io.mapOptional("permissions", keys->_permissions, + DefinedAtom::permissions( + keys->_contentType)); + io.mapOptional("references", keys->_references); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &io, lld::DefinedAtom *&atom) { + const lld::DefinedAtom *atomPtr = atom; + MappingTraits::mapping(io, atomPtr); + atom = const_cast(atomPtr); + } +}; + +// YAML conversion for const lld::UndefinedAtom* +template <> struct MappingTraits { + class NormalizedAtom : public lld::UndefinedAtom { + public: + NormalizedAtom(IO &io) + : _file(fileFromContext(io)), _canBeNull(canBeNullNever) {} + + NormalizedAtom(IO &io, const lld::UndefinedAtom *atom) + : _file(fileFromContext(io)), _name(atom->name()), + _canBeNull(atom->canBeNull()) {} + + ~NormalizedAtom() override = default; + + const lld::UndefinedAtom *denormalize(IO &io) { + YamlContext *info = reinterpret_cast(io.getContext()); + assert(info != nullptr); + typedef MappingTraits::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast(info->_file); + if (!_name.empty()) + _name = f->copyString(_name); + + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "created UndefinedAtom named: '" << _name + << "' (" << (const void *)_name.data() << ", " + << _name.size() << ")\n"); + return this; + } + + // Extract current File object from YAML I/O parsing context + const lld::File &fileFromContext(IO &io) { + YamlContext *info = reinterpret_cast(io.getContext()); + assert(info != nullptr); + assert(info->_file != nullptr); + return *info->_file; + } + + const lld::File &file() const override { return _file; } + StringRef name() const override { return _name; } + CanBeNull canBeNull() const override { return _canBeNull; } + + const lld::File &_file; + StringRef _name; + CanBeNull _canBeNull; + }; + + static void mapping(IO &io, const lld::UndefinedAtom *&atom) { + YamlContext *info = reinterpret_cast(io.getContext()); + MappingNormalizationHeap keys( + io, atom, &info->_file->allocator()); + + io.mapRequired("name", keys->_name); + io.mapOptional("can-be-null", keys->_canBeNull, + lld::UndefinedAtom::canBeNullNever); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &io, lld::UndefinedAtom *&atom) { + const lld::UndefinedAtom *atomPtr = atom; + MappingTraits::mapping(io, atomPtr); + atom = const_cast(atomPtr); + } +}; + +// YAML conversion for const lld::SharedLibraryAtom* +template <> struct MappingTraits { + class NormalizedAtom : public lld::SharedLibraryAtom { + public: + NormalizedAtom(IO &io) + : _file(fileFromContext(io)), _canBeNull(false), + _type(Type::Unknown), _size(0) {} + + NormalizedAtom(IO &io, const lld::SharedLibraryAtom *atom) + : _file(fileFromContext(io)), _name(atom->name()), + _loadName(atom->loadName()), _canBeNull(atom->canBeNullAtRuntime()), + _type(atom->type()), _size(atom->size()) {} + + ~NormalizedAtom() override = default; + + const lld::SharedLibraryAtom *denormalize(IO &io) { + YamlContext *info = reinterpret_cast(io.getContext()); + assert(info != nullptr); + typedef MappingTraits::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast(info->_file); + if (!_name.empty()) + _name = f->copyString(_name); + if (!_loadName.empty()) + _loadName = f->copyString(_loadName); + + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "created SharedLibraryAtom named: '" + << _name << "' (" + << (const void *)_name.data() + << ", " << _name.size() << ")\n"); + return this; + } + + // Extract current File object from YAML I/O parsing context + const lld::File &fileFromContext(IO &io) { + YamlContext *info = reinterpret_cast(io.getContext()); + assert(info != nullptr); + assert(info->_file != nullptr); + return *info->_file; + } + + const lld::File &file() const override { return _file; } + StringRef name() const override { return _name; } + StringRef loadName() const override { return _loadName; } + bool canBeNullAtRuntime() const override { return _canBeNull; } + Type type() const override { return _type; } + uint64_t size() const override { return _size; } + + const lld::File &_file; + StringRef _name; + StringRef _loadName; + ShlibCanBeNull _canBeNull; + Type _type; + uint64_t _size; + }; + + static void mapping(IO &io, const lld::SharedLibraryAtom *&atom) { + + YamlContext *info = reinterpret_cast(io.getContext()); + MappingNormalizationHeap + keys(io, atom, &info->_file->allocator()); + + io.mapRequired("name", keys->_name); + io.mapOptional("load-name", keys->_loadName); + io.mapOptional("can-be-null", keys->_canBeNull, (ShlibCanBeNull) false); + io.mapOptional("type", keys->_type, SharedLibraryAtom::Type::Code); + io.mapOptional("size", keys->_size, uint64_t(0)); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &io, lld::SharedLibraryAtom *&atom) { + const lld::SharedLibraryAtom *atomPtr = atom; + MappingTraits::mapping(io, atomPtr); + atom = const_cast(atomPtr); + } +}; + +// YAML conversion for const lld::AbsoluteAtom* +template <> struct MappingTraits { + class NormalizedAtom : public lld::AbsoluteAtom { + public: + NormalizedAtom(IO &io) + : _file(fileFromContext(io)), _scope(), _value(0) {} + + NormalizedAtom(IO &io, const lld::AbsoluteAtom *atom) + : _file(fileFromContext(io)), _name(atom->name()), + _scope(atom->scope()), _value(atom->value()) {} + + ~NormalizedAtom() override = default; + + const lld::AbsoluteAtom *denormalize(IO &io) { + YamlContext *info = reinterpret_cast(io.getContext()); + assert(info != nullptr); + typedef MappingTraits::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast(info->_file); + if (!_name.empty()) + _name = f->copyString(_name); + + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "created AbsoluteAtom named: '" << _name + << "' (" << (const void *)_name.data() + << ", " << _name.size() << ")\n"); + return this; + } + + // Extract current File object from YAML I/O parsing context + const lld::File &fileFromContext(IO &io) { + YamlContext *info = reinterpret_cast(io.getContext()); + assert(info != nullptr); + assert(info->_file != nullptr); + return *info->_file; + } + + const lld::File &file() const override { return _file; } + StringRef name() const override { return _name; } + uint64_t value() const override { return _value; } + Scope scope() const override { return _scope; } + + const lld::File &_file; + StringRef _name; + StringRef _refName; + Scope _scope; + Hex64 _value; + }; + + static void mapping(IO &io, const lld::AbsoluteAtom *&atom) { + YamlContext *info = reinterpret_cast(io.getContext()); + MappingNormalizationHeap keys( + io, atom, &info->_file->allocator()); + + if (io.outputting()) { + typedef MappingTraits::NormalizedFile NormalizedFile; + YamlContext *info = reinterpret_cast(io.getContext()); + assert(info != nullptr); + NormalizedFile *f = reinterpret_cast(info->_file); + assert(f); + assert(f->_rnb); + if (f->_rnb->hasRefName(atom)) { + keys->_refName = f->_rnb->refName(atom); + } + } + + io.mapRequired("name", keys->_name); + io.mapOptional("ref-name", keys->_refName, StringRef()); + io.mapOptional("scope", keys->_scope); + io.mapRequired("value", keys->_value); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &io, lld::AbsoluteAtom *&atom) { + const lld::AbsoluteAtom *atomPtr = atom; + MappingTraits::mapping(io, atomPtr); + atom = const_cast(atomPtr); + } +}; + +} // end namespace llvm +} // end namespace yaml + +RefNameResolver::RefNameResolver(const lld::File *file, IO &io) : _io(io) { + typedef MappingTraits::NormalizedAtom + NormalizedAtom; + for (const lld::DefinedAtom *a : file->defined()) { + const auto *na = (const NormalizedAtom *)a; + if (!na->_refName.empty()) + add(na->_refName, a); + else if (!na->_name.empty()) + add(na->_name, a); + } + + for (const lld::UndefinedAtom *a : file->undefined()) + add(a->name(), a); + + for (const lld::SharedLibraryAtom *a : file->sharedLibrary()) + add(a->name(), a); + + typedef MappingTraits::NormalizedAtom NormAbsAtom; + for (const lld::AbsoluteAtom *a : file->absolute()) { + const auto *na = (const NormAbsAtom *)a; + if (na->_refName.empty()) + add(na->_name, a); + else + add(na->_refName, a); + } +} + +inline const lld::File * +MappingTraits::NormalizedFile::denormalize(IO &io) { + typedef MappingTraits::NormalizedAtom + NormalizedAtom; + + RefNameResolver nameResolver(this, io); + // Now that all atoms are parsed, references can be bound. + for (const lld::DefinedAtom *a : this->defined()) { + auto *normAtom = (NormalizedAtom *)const_cast(a); + normAtom->bind(nameResolver); + } + + return this; +} + +inline void MappingTraits::NormalizedAtom::bind( + const RefNameResolver &resolver) { + typedef MappingTraits::NormalizedReference + NormalizedReference; + for (const lld::Reference *ref : _references) { + auto *normRef = (NormalizedReference *)const_cast(ref); + normRef->bind(resolver); + } +} + +inline void MappingTraits::NormalizedReference::bind( + const RefNameResolver &resolver) { + _target = resolver.lookup(_targetName); +} + +inline StringRef +MappingTraits::NormalizedReference::targetName( + IO &io, const lld::Reference *ref) { + if (ref->target() == nullptr) + return StringRef(); + YamlContext *info = reinterpret_cast(io.getContext()); + assert(info != nullptr); + typedef MappingTraits::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast(info->_file); + RefNameBuilder &rnb = *f->_rnb; + if (rnb.hasRefName(ref->target())) + return rnb.refName(ref->target()); + return ref->target()->name(); +} + +namespace lld { +namespace yaml { + +class Writer : public lld::Writer { +public: + Writer(const LinkingContext &context) : _ctx(context) {} + + llvm::Error writeFile(const lld::File &file, StringRef outPath) override { + // Create stream to path. + std::error_code ec; + llvm::raw_fd_ostream out(outPath, ec, llvm::sys::fs::OF_TextWithCRLF); + if (ec) + return llvm::errorCodeToError(ec); + + // Create yaml Output writer, using yaml options for context. + YamlContext yamlContext; + yamlContext._ctx = &_ctx; + yamlContext._registry = &_ctx.registry(); + llvm::yaml::Output yout(out, &yamlContext); + + // Write yaml output. + const lld::File *fileRef = &file; + yout << fileRef; + + return llvm::Error::success(); + } + +private: + const LinkingContext &_ctx; +}; + +} // end namespace yaml + +namespace { + +/// Handles !native tagged yaml documents. +class NativeYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler { + bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override { + if (io.mapTag("!native")) { + MappingTraits::mappingAtoms(io, file); + return true; + } + return false; + } +}; + +/// Handles !archive tagged yaml documents. +class ArchiveYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler { + bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override { + if (io.mapTag("!archive")) { + MappingTraits::mappingArchive(io, file); + return true; + } + return false; + } +}; + +class YAMLReader : public Reader { +public: + YAMLReader(const Registry ®istry) : _registry(registry) {} + + bool canParse(file_magic magic, MemoryBufferRef mb) const override { + StringRef name = mb.getBufferIdentifier(); + return name.endswith(".objtxt") || name.endswith(".yaml"); + } + + ErrorOr> + loadFile(std::unique_ptr mb, + const class Registry &) const override { + // Create YAML Input Reader. + YamlContext yamlContext; + yamlContext._registry = &_registry; + yamlContext._path = mb->getBufferIdentifier(); + llvm::yaml::Input yin(mb->getBuffer(), &yamlContext); + + // Fill vector with File objects created by parsing yaml. + std::vector createdFiles; + yin >> createdFiles; + assert(createdFiles.size() == 1); + + // Error out now if there were parsing errors. + if (yin.error()) + return make_error_code(lld::YamlReaderError::illegal_value); + + std::shared_ptr smb(mb.release()); + const File *file = createdFiles[0]; + // Note: loadFile() should return vector of *const* File + File *f = const_cast(file); + f->setLastError(std::error_code()); + f->setSharedMemoryBuffer(smb); + return std::unique_ptr(f); + } + +private: + const Registry &_registry; +}; + +} // end anonymous namespace + +void Registry::addSupportYamlFiles() { + add(std::unique_ptr(new YAMLReader(*this))); + add(std::unique_ptr( + new NativeYamlIOTaggedDocumentHandler())); + add(std::unique_ptr( + new ArchiveYamlIOTaggedDocumentHandler())); +} + +std::unique_ptr createWriterYAML(const LinkingContext &context) { + return std::unique_ptr(new lld::yaml::Writer(context)); +} + +} // end namespace lld diff --git a/lld/test/ELF/Inputs/copy-rel.s b/lld/test/ELF/Inputs/copy-rel.s new file mode 100644 index 000000000000..bcfc7a58a33f --- /dev/null +++ b/lld/test/ELF/Inputs/copy-rel.s @@ -0,0 +1,11 @@ +.globl foo +.type foo, @object +.size foo, 4 +foo: +.long 1 + +.weak bar +.type bar, @object +.size bar, 4 +bar: +.long 2 diff --git a/lld/test/ELF/copy-rel.s b/lld/test/ELF/copy-rel.s new file mode 100644 index 000000000000..7a957d2c290c --- /dev/null +++ b/lld/test/ELF/copy-rel.s @@ -0,0 +1,25 @@ +# REQUIRES: x86 + +## Test copy relocations can be created for -no-pie and -pie. + +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=x86_64 %p/Inputs/copy-rel.s -o %t1.o +# RUN: ld.lld %t1.o -o %t1.so -shared -soname=so + +# RUN: ld.lld %t.o %t1.so -o %t +# RUN: llvm-readobj -r %t | FileCheck %s + +# RUN: ld.lld %t.o %t1.so -o %t -pie +# RUN: llvm-readobj -r %t | FileCheck %s + +# CHECK: Relocations [ +# CHECK-NEXT: .rela.dyn { +# CHECK-NEXT: R_X86_64_COPY foo 0x0 +# CHECK-NEXT: R_X86_64_COPY bar 0x0 +# CHECK-NEXT: } +# CHECK-NEXT: ] + +.global _start +_start: + mov $foo - ., %eax + movabs $bar, %rax diff --git a/lld/test/ELF/relocation-copy-alias.s b/lld/test/ELF/relocation-copy-alias.s new file mode 100644 index 000000000000..f2251bbeefc2 --- /dev/null +++ b/lld/test/ELF/relocation-copy-alias.s @@ -0,0 +1,69 @@ +// REQUIRES: x86 +// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o +// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/relocation-copy-alias.s -o %t2.o +// RUN: ld.lld --hash-style=sysv -shared %t2.o -o %t.so +// RUN: ld.lld --hash-style=sysv %t.o %t.so -o %t3 +// RUN: llvm-readobj --dyn-symbols -r --expand-relocs %t3 | FileCheck %s +// RUN: ld.lld --hash-style=sysv --gc-sections %t.o %t.so -o %t3 +// RUN: llvm-readobj --dyn-symbols -r --expand-relocs %t3 | FileCheck %s + +.global _start +_start: +movl $5, a1 +movl $5, b1 +movl $5, b2 + +// CHECK: .rela.dyn { +// CHECK-NEXT: Relocation { +// CHECK-NEXT: Offset: +// CHECK-NEXT: Type: R_X86_64_COPY +// CHECK-NEXT: Symbol: a1 +// CHECK-NEXT: Addend: 0x0 +// CHECK-NEXT: } +// CHECK-NEXT: Relocation { +// CHECK-NEXT: Offset: +// CHECK-NEXT: Type: R_X86_64_COPY +// CHECK-NEXT: Symbol: b1 +// CHECK-NEXT: Addend: 0x0 +// CHECK-NEXT: } +// CHECK-NEXT: } + +// CHECK: Name: a1 +// CHECK-NEXT: Value: [[A:.*]] +// CHECK-NEXT: Size: 1 +// CHECK-NEXT: Binding: Global (0x1) +// CHECK-NEXT: Type: Object (0x1) +// CHECK-NEXT: Other: 0 +// CHECK-NEXT: Section: .bss (0x7) + +// CHECK: Name: b1 +// CHECK-NEXT: Value: [[B:.*]] +// CHECK-NEXT: Size: 1 +// CHECK-NEXT: Binding: Global +// CHECK-NEXT: Type: Object (0x1) +// CHECK-NEXT: Other: 0 +// CHECK-NEXT: Section: .bss + +// CHECK: Name: b2 +// CHECK-NEXT: Value: [[B]] +// CHECK-NEXT: Size: 1 +// CHECK-NEXT: Binding: Global +// CHECK-NEXT: Type: Object (0x1) +// CHECK-NEXT: Other: 0 +// CHECK-NEXT: Section: .bss + +// CHECK: Name: a2 +// CHECK-NEXT: Value: [[A]] +// CHECK-NEXT: Size: 1 +// CHECK-NEXT: Binding: Weak +// CHECK-NEXT: Type: Object (0x1) +// CHECK-NEXT: Other: 0 +// CHECK-NEXT: Section: .bss + +// CHECK: Name: b3 +// CHECK-NEXT: Value: [[B]] +// CHECK-NEXT: Size: 1 +// CHECK-NEXT: Binding: Global +// CHECK-NEXT: Type: Object (0x1) +// CHECK-NEXT: Other: 0 +// CHECK-NEXT: Section: .bss diff --git a/lld/test/darwin/Inputs/native-and-mach-o.objtxt b/lld/test/darwin/Inputs/native-and-mach-o.objtxt new file mode 100644 index 000000000000..58124eb83321 --- /dev/null +++ b/lld/test/darwin/Inputs/native-and-mach-o.objtxt @@ -0,0 +1,17 @@ +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS ] + address: 0 + content: [ 0xC3 ] +global-symbols: + - name: _foo + type: N_SECT + scope: [ N_EXT ] + sect: 1 + desc: [ ] + value: 0 diff --git a/lld/test/darwin/Inputs/native-and-mach-o2.objtxt b/lld/test/darwin/Inputs/native-and-mach-o2.objtxt new file mode 100644 index 000000000000..344c9bc0b0d2 --- /dev/null +++ b/lld/test/darwin/Inputs/native-and-mach-o2.objtxt @@ -0,0 +1,19 @@ +--- !mach-o +arch: x86_64 +file-type: MH_DYLIB +flags: [ ] +install-name: /usr/lib/libSystem.B.dylib +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55 ] + +global-symbols: + - name: dyld_stub_binder + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 diff --git a/lld/test/darwin/cmdline-lto_library.objtxt b/lld/test/darwin/cmdline-lto_library.objtxt new file mode 100644 index 000000000000..48226ec81361 --- /dev/null +++ b/lld/test/darwin/cmdline-lto_library.objtxt @@ -0,0 +1,11 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -lto_library %t -print-atoms -r %s 2>&1 | FileCheck %s +# +# Test that the -lto_library option does not result in an error. +# + +# CHECK-NOT: -lto_library + +--- !native +defined-atoms: + - name: _foo +... diff --git a/lld/test/darwin/cmdline-objc_gc.objtxt b/lld/test/darwin/cmdline-objc_gc.objtxt new file mode 100644 index 000000000000..f6db8e06ef9a --- /dev/null +++ b/lld/test/darwin/cmdline-objc_gc.objtxt @@ -0,0 +1,15 @@ +# RUN: not ld64.lld.darwinold -arch x86_64 -objc_gc %s 2>&1 | FileCheck %s +# +# Test that the -objc_gc is rejected. +# + +# CHECK: error: -objc_gc is not supported + +--- !native +defined-atoms: + - name: _main + type: code + scope: global + content: [ 0x90 ] + +... diff --git a/lld/test/darwin/cmdline-objc_gc_compaction.objtxt b/lld/test/darwin/cmdline-objc_gc_compaction.objtxt new file mode 100644 index 000000000000..47620b9b39a1 --- /dev/null +++ b/lld/test/darwin/cmdline-objc_gc_compaction.objtxt @@ -0,0 +1,15 @@ +# RUN: not ld64.lld.darwinold -arch x86_64 -objc_gc_compaction %s 2>&1 | FileCheck %s +# +# Test that the -objc_gc_compaction is rejected. +# + +# CHECK: error: -objc_gc_compaction is not supported + +--- !native +defined-atoms: + - name: _main + type: code + scope: global + content: [ 0x90 ] + +... diff --git a/lld/test/darwin/cmdline-objc_gc_only.objtxt b/lld/test/darwin/cmdline-objc_gc_only.objtxt new file mode 100644 index 000000000000..4c5f1b338bd6 --- /dev/null +++ b/lld/test/darwin/cmdline-objc_gc_only.objtxt @@ -0,0 +1,15 @@ +# RUN: not ld64.lld.darwinold -arch x86_64 -objc_gc_only %s 2>&1 | FileCheck %s +# +# Test that the -objc_gc_only is rejected. +# + +# CHECK: error: -objc_gc_only is not supported + +--- !native +defined-atoms: + - name: _main + type: code + scope: global + content: [ 0x90 ] + +... diff --git a/lld/test/darwin/native-and-mach-o.objtxt b/lld/test/darwin/native-and-mach-o.objtxt new file mode 100644 index 000000000000..41a9ef64dddb --- /dev/null +++ b/lld/test/darwin/native-and-mach-o.objtxt @@ -0,0 +1,27 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s \ +# RUN: %p/Inputs/native-and-mach-o.objtxt \ +# RUN: %p/Inputs/native-and-mach-o2.objtxt -o %t && \ +# RUN: llvm-nm %t | FileCheck %s +# +# Test a mix of atoms and mach-o both encoded in yaml +# + +--- !native +defined-atoms: + - name: _main + type: code + scope: global + content: [ 55, 48, 89, E5, 30, C0, E8, 00, + 00, 00, 00, 31, C0, 5D, C3 ] + references: + - offset: 7 + kind: branch32 + target: _foo + +undefined-atoms: + - name: _foo + +... + +# CHECK: {{[0-9a-f]+}} T _foo +# CHECK: {{[0-9a-f]+}} T _main diff --git a/lld/test/mach-o/Inputs/DependencyDump.py b/lld/test/mach-o/Inputs/DependencyDump.py new file mode 100755 index 000000000000..0f4d49d6fb9a --- /dev/null +++ b/lld/test/mach-o/Inputs/DependencyDump.py @@ -0,0 +1,30 @@ +# -*- Python -*- + + +# +# Dump out Xcode binary dependency file. +# + +import sys + +f = open(sys.argv[1], "rb") +byte = f.read(1) +while byte != b'': + if byte == b'\000': + sys.stdout.write("linker-vers: ") + elif byte == b'\020': + sys.stdout.write("input-file: ") + elif byte == b'\021': + sys.stdout.write("not-found: ") + elif byte == b'\100': + sys.stdout.write("output-file: ") + byte = f.read(1) + while byte != b'\000': + if byte != b'\012': + sys.stdout.write(byte.decode("ascii")) + byte = f.read(1) + sys.stdout.write("\n") + byte = f.read(1) + +f.close() + diff --git a/lld/test/mach-o/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd b/lld/test/mach-o/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd new file mode 100644 index 000000000000..fddd192630d1 --- /dev/null +++ b/lld/test/mach-o/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd @@ -0,0 +1,42 @@ +--- !tapi-tbd-v3 +archs: [ x86_64 ] +uuids: [ 'x86_64: 00000000-0000-0000-0000-000000000000' ] +platform: macosx +install-name: '/usr/lib/libSystem.B.dylib' +current-version: 0001.001.1 +exports: + - archs: [ 'x86_64' ] + re-exports: [ '/usr/lib/system/libdyld.dylib', + '/usr/lib/system/libsystem_c.dylib', + '/usr/lib/system/libsystem_m.dylib' ] +--- !tapi-tbd-v3 +archs: [ x86_64 ] +uuids: [ 'x86_64: 00000000-0000-0000-0000-000000000001' ] +platform: macosx +install-name: '/usr/lib/system/libdyld.dylib' +current-version: 0001.001.1 +parent-umbrella: System +exports: + - archs: [ 'x86_64' ] + symbols: [ dyld_stub_binder ] +--- !tapi-tbd-v3 +archs: [ x86_64 ] +uuids: [ 'x86_64: 00000000-0000-0000-0000-000000000002' ] +platform: macosx +install-name: '/usr/lib/system/libsystem_c.dylib' +current-version: 0001.001.1 +parent-umbrella: System +exports: + - archs: [ 'x86_64' ] + symbols: [ ] +--- !tapi-tbd-v3 +archs: [ x86_64 ] +uuids: [ 'x86_64: 00000000-0000-0000-0000-000000000003' ] +platform: macosx +install-name: '/usr/lib/system/libsystem_m.dylib' +current-version: 0001.001.1 +parent-umbrella: System +exports: + - archs: [ 'x86_64' ] + symbols: [ ___nan ] +... diff --git a/lld/test/mach-o/Inputs/PIE.yaml b/lld/test/mach-o/Inputs/PIE.yaml new file mode 100644 index 000000000000..0463154fcf28 --- /dev/null +++ b/lld/test/mach-o/Inputs/PIE.yaml @@ -0,0 +1,6 @@ +--- !mach-o +arch: x86_64 +file-type: MH_DYLIB +install-name: /usr/lib/libSystem.B.dylib +exports: + - name: dyld_stub_binder diff --git a/lld/test/mach-o/Inputs/arm-interworking.yaml b/lld/test/mach-o/Inputs/arm-interworking.yaml new file mode 100644 index 000000000000..d78a2997fe33 --- /dev/null +++ b/lld/test/mach-o/Inputs/arm-interworking.yaml @@ -0,0 +1,83 @@ +--- !mach-o +arch: armv7 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 2 + address: 0x0000000000000000 + content: [ 0xFE, 0xFF, 0xFF, 0xEB, 0x02, 0x00, 0x00, 0xFA, + 0xFC, 0xFF, 0xFF, 0xEB, 0xFB, 0xFF, 0xFF, 0xFA, + 0x1E, 0xFF, 0x2F, 0xE1, 0x1E, 0xFF, 0x2F, 0xE1 ] + relocations: + - offset: 0x0000000C + type: ARM_RELOC_BR24 + length: 2 + pc-rel: true + extern: true + symbol: 4 + - offset: 0x00000008 + type: ARM_RELOC_BR24 + length: 2 + pc-rel: true + extern: true + symbol: 3 + - offset: 0x00000004 + type: ARM_RELOC_BR24 + length: 2 + pc-rel: true + extern: false + symbol: 1 + - offset: 0x00000000 + type: ARM_RELOC_BR24 + length: 2 + pc-rel: true + extern: false + symbol: 1 + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + address: 0x0000000000000018 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000004 + type: ARM_RELOC_VANILLA + length: 2 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000000 + type: ARM_RELOC_VANILLA + length: 2 + pc-rel: false + extern: true + symbol: 3 +local-symbols: + - name: _d2 + type: N_SECT + sect: 2 + value: 0x0000000000000018 +global-symbols: + - name: _a1 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: _a2 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000014 +undefined-symbols: + - name: _t1 + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _t2 + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 diff --git a/lld/test/mach-o/Inputs/arm-shims.yaml b/lld/test/mach-o/Inputs/arm-shims.yaml new file mode 100644 index 000000000000..8baebef17d86 --- /dev/null +++ b/lld/test/mach-o/Inputs/arm-shims.yaml @@ -0,0 +1,60 @@ +--- !mach-o +arch: armv7 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 2 + address: 0x0000000000000000 + content: [ 0x00, 0xBF, 0xFF, 0xF7, 0xFE, 0xEF, 0xFF, 0xF7, + 0xFB, 0xBF, 0x00, 0x00, 0x00, 0xF0, 0x20, 0xE3, + 0xFA, 0xFF, 0xFF, 0xFA, 0xF9, 0xFF, 0xFF, 0xEA ] + relocations: + - offset: 0x00000014 + type: ARM_RELOC_BR24 + length: 2 + pc-rel: true + extern: true + symbol: 3 + - offset: 0x00000010 + type: ARM_RELOC_BR24 + length: 2 + pc-rel: true + extern: true + symbol: 3 + - offset: 0x00000006 + type: ARM_THUMB_RELOC_BR22 + length: 2 + pc-rel: true + extern: true + symbol: 2 + - offset: 0x00000002 + type: ARM_THUMB_RELOC_BR22 + length: 2 + pc-rel: true + extern: true + symbol: 2 +global-symbols: + - name: _a2 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x000000000000000C + - name: _t2 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + desc: [ N_ARM_THUMB_DEF ] + value: 0x0000000000000000 +undefined-symbols: + - name: _a1 + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _t1 + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 diff --git a/lld/test/mach-o/Inputs/arm64/libSystem.yaml b/lld/test/mach-o/Inputs/arm64/libSystem.yaml new file mode 100644 index 000000000000..76cba1bc3255 --- /dev/null +++ b/lld/test/mach-o/Inputs/arm64/libSystem.yaml @@ -0,0 +1,13 @@ +# +# For use by test cases that create dynamic output types which may needs stubs +# and therefore will need a dylib definition of dyld_stub_binder. +# + +--- !mach-o +arch: arm64 +file-type: MH_DYLIB +install-name: /usr/lib/libSystem.B.dylib +exports: + - name: dyld_stub_binder + +... diff --git a/lld/test/mach-o/Inputs/armv7/libSystem.yaml b/lld/test/mach-o/Inputs/armv7/libSystem.yaml new file mode 100644 index 000000000000..2539f9003540 --- /dev/null +++ b/lld/test/mach-o/Inputs/armv7/libSystem.yaml @@ -0,0 +1,13 @@ +# +# For use by test cases that create dynamic output types which may needs stubs +# and therefore will need a dylib definition of dyld_stub_binder. +# + +--- !mach-o +arch: armv7 +file-type: MH_DYLIB +install-name: /usr/lib/libSystem.B.dylib +exports: + - name: dyld_stub_binder + +... diff --git a/lld/test/mach-o/Inputs/bar.yaml b/lld/test/mach-o/Inputs/bar.yaml new file mode 100644 index 000000000000..5605e67e7c35 --- /dev/null +++ b/lld/test/mach-o/Inputs/bar.yaml @@ -0,0 +1,18 @@ + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0xC3 ] +global-symbols: + - name: _bar + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 diff --git a/lld/test/mach-o/Inputs/cstring-sections.yaml b/lld/test/mach-o/Inputs/cstring-sections.yaml new file mode 100644 index 000000000000..eb227f29f8f5 --- /dev/null +++ b/lld/test/mach-o/Inputs/cstring-sections.yaml @@ -0,0 +1,25 @@ +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __objc_methname + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x0000000000000000 + content: [ 0x61, 0x62, 0x63, 0x00 ] + - segment: __TEXT + section: __objc_classname + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x0000000000000006 + content: [ 0x61, 0x62, 0x63, 0x00 ] + - segment: __TEXT + section: __cstring + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x000000000000000A + content: [ 0x61, 0x62, 0x63, 0x00 ] diff --git a/lld/test/mach-o/Inputs/exported_symbols_list.exp b/lld/test/mach-o/Inputs/exported_symbols_list.exp new file mode 100644 index 000000000000..ff6653342472 --- /dev/null +++ b/lld/test/mach-o/Inputs/exported_symbols_list.exp @@ -0,0 +1,6 @@ +# +# For use with exported_symbols_list.yaml +# +_foo +_b + diff --git a/lld/test/mach-o/Inputs/full.filelist b/lld/test/mach-o/Inputs/full.filelist new file mode 100644 index 000000000000..abf98b633377 --- /dev/null +++ b/lld/test/mach-o/Inputs/full.filelist @@ -0,0 +1,3 @@ +/foo/bar/a.o +/foo/bar/b.o +/foo/x.a diff --git a/lld/test/mach-o/Inputs/got-order.yaml b/lld/test/mach-o/Inputs/got-order.yaml new file mode 100644 index 000000000000..d256e9d7d463 --- /dev/null +++ b/lld/test/mach-o/Inputs/got-order.yaml @@ -0,0 +1,53 @@ +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x48, 0x8B, 0x0D, 0x00, + 0x00, 0x00, 0x00, 0x48, 0x8B, 0x05, 0x00, 0x00, + 0x00, 0x00, 0x8B, 0x00, 0x03, 0x01, 0x48, 0x8B, + 0x0D, 0x00, 0x00, 0x00, 0x00, 0x03, 0x01, 0x5D, + 0xC3 ] + relocations: + - offset: 0x00000019 + type: X86_64_RELOC_GOT_LOAD + length: 2 + pc-rel: true + extern: true + symbol: 2 + - offset: 0x0000000E + type: X86_64_RELOC_GOT_LOAD + length: 2 + pc-rel: true + extern: true + symbol: 1 + - offset: 0x00000007 + type: X86_64_RELOC_GOT_LOAD + length: 2 + pc-rel: true + extern: true + symbol: 3 +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _bar + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _foo + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _zazzle + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 diff --git a/lld/test/mach-o/Inputs/got-order2.yaml b/lld/test/mach-o/Inputs/got-order2.yaml new file mode 100644 index 000000000000..faddeda924db --- /dev/null +++ b/lld/test/mach-o/Inputs/got-order2.yaml @@ -0,0 +1,11 @@ +--- !mach-o +arch: x86_64 +file-type: MH_DYLIB +install-name: /usr/lib/libfoobar.dylib +exports: + - name: _bar + - name: _zazzle + - name: _foo + - name: _aaa + - name: _fff + - name: _zzz diff --git a/lld/test/mach-o/Inputs/hello-world-arm64.yaml b/lld/test/mach-o/Inputs/hello-world-arm64.yaml new file mode 100644 index 000000000000..31de71ef4941 --- /dev/null +++ b/lld/test/mach-o/Inputs/hello-world-arm64.yaml @@ -0,0 +1,8 @@ +--- !mach-o +arch: arm64 +file-type: MH_DYLIB +install-name: /usr/lib/libSystem.B.dylib +exports: + - name: _fprintf + - name: ___stdoutp + - name: dyld_stub_binder diff --git a/lld/test/mach-o/Inputs/hello-world-armv6.yaml b/lld/test/mach-o/Inputs/hello-world-armv6.yaml new file mode 100644 index 000000000000..0b29f65ab126 --- /dev/null +++ b/lld/test/mach-o/Inputs/hello-world-armv6.yaml @@ -0,0 +1,7 @@ +--- !mach-o +arch: armv6 +file-type: MH_DYLIB +install-name: /usr/lib/libSystem.B.dylib +exports: + - name: _printf + - name: dyld_stub_binder diff --git a/lld/test/mach-o/Inputs/hello-world-armv7.yaml b/lld/test/mach-o/Inputs/hello-world-armv7.yaml new file mode 100644 index 000000000000..4e26120fe216 --- /dev/null +++ b/lld/test/mach-o/Inputs/hello-world-armv7.yaml @@ -0,0 +1,7 @@ +--- !mach-o +arch: armv7 +file-type: MH_DYLIB +install-name: /usr/lib/libSystem.B.dylib +exports: + - name: _printf + - name: dyld_stub_binder diff --git a/lld/test/mach-o/Inputs/hello-world-x86.yaml b/lld/test/mach-o/Inputs/hello-world-x86.yaml new file mode 100644 index 000000000000..dbec62b77f3b --- /dev/null +++ b/lld/test/mach-o/Inputs/hello-world-x86.yaml @@ -0,0 +1,7 @@ +--- !mach-o +arch: x86 +file-type: MH_DYLIB +install-name: /usr/lib/libSystem.B.dylib +exports: + - name: _printf + - name: dyld_stub_binder diff --git a/lld/test/mach-o/Inputs/hello-world-x86_64.yaml b/lld/test/mach-o/Inputs/hello-world-x86_64.yaml new file mode 100644 index 000000000000..7840d5c1932e --- /dev/null +++ b/lld/test/mach-o/Inputs/hello-world-x86_64.yaml @@ -0,0 +1,8 @@ +--- !mach-o +arch: x86_64 +file-type: MH_DYLIB +install-name: /usr/lib/libSystem.B.dylib +exports: + - name: _fprintf + - name: dyld_stub_binder + - name: ___stdoutp diff --git a/lld/test/mach-o/Inputs/hw.raw_bytes b/lld/test/mach-o/Inputs/hw.raw_bytes new file mode 100644 index 000000000000..ce013625030b --- /dev/null +++ b/lld/test/mach-o/Inputs/hw.raw_bytes @@ -0,0 +1 @@ +hello diff --git a/lld/test/mach-o/Inputs/interposing-section.yaml b/lld/test/mach-o/Inputs/interposing-section.yaml new file mode 100644 index 000000000000..45966b6870cc --- /dev/null +++ b/lld/test/mach-o/Inputs/interposing-section.yaml @@ -0,0 +1,6 @@ +--- !mach-o +arch: x86_64 +file-type: MH_DYLIB +install-name: /usr/lib/libSystem.B.dylib +exports: + - name: _open diff --git a/lld/test/mach-o/Inputs/lazy-bind-x86_64-2.yaml b/lld/test/mach-o/Inputs/lazy-bind-x86_64-2.yaml new file mode 100644 index 000000000000..50a97bc9c09b --- /dev/null +++ b/lld/test/mach-o/Inputs/lazy-bind-x86_64-2.yaml @@ -0,0 +1,8 @@ +--- !mach-o +arch: x86_64 +file-type: MH_DYLIB +install-name: /usr/lib/libfoo.dylib +compat-version: 2.0 +current-version: 3.4 +exports: + - name: _foo diff --git a/lld/test/mach-o/Inputs/lazy-bind-x86_64-3.yaml b/lld/test/mach-o/Inputs/lazy-bind-x86_64-3.yaml new file mode 100644 index 000000000000..2f61cc0cda1a --- /dev/null +++ b/lld/test/mach-o/Inputs/lazy-bind-x86_64-3.yaml @@ -0,0 +1,8 @@ +--- !mach-o +arch: x86_64 +file-type: MH_DYLIB +install-name: /usr/lib/libbaz.dylib +compat-version: 3.0 +current-version: 4.5 +exports: + - name: _baz diff --git a/lld/test/mach-o/Inputs/lazy-bind-x86_64.yaml b/lld/test/mach-o/Inputs/lazy-bind-x86_64.yaml new file mode 100644 index 000000000000..7e6cd9007bf3 --- /dev/null +++ b/lld/test/mach-o/Inputs/lazy-bind-x86_64.yaml @@ -0,0 +1,8 @@ +--- !mach-o +arch: x86_64 +file-type: MH_DYLIB +install-name: /usr/lib/libbar.dylib +compat-version: 1.0 +current-version: 2.3 +exports: + - name: _bar diff --git a/lld/test/mach-o/Inputs/lib-search-paths/usr/lib/libmyshared.dylib b/lld/test/mach-o/Inputs/lib-search-paths/usr/lib/libmyshared.dylib new file mode 100755 index 0000000000000000000000000000000000000000..71185fbdf73600ff2b795605d217a3c609752edd GIT binary patch literal 20628 zcmeI4J!n%=6vt26`av5_tqv6nhAN^KS_icq99n6yMG-$@=_1$Gq!=)*Bq`dhi$lSo zgMzqqbL^xd_yNVmK?Qel5eE^RI~4!_ym!+_n$SRTko*svoO|wj_rCLfna({Y`MCc2 zt(i5&%yz*xo3+CvpW`DA4d(_iY=0ci!AL|v1VlgtL_h>YKm zey=0Rk0J55ozT6|?@eY8OXmP)2jM@3I+q%mNMTSZJT8bz==0!U2NdTDC2_8pc~A@; zm`(fD*r&Xr#TLqA3^w8#9!d>GuK0aw*ziCj5?6Y%IJvRjx$671_*<|q@YEE>H41g^ z(%9vTBg12!Su$MW;|hHqv``qAXGMPx^c1uc8lAtJoyyG@?o8%0>E84_JY1)An=SDE zEhQQ3$#>x1x8QTU0DBy^4SEF{6+Yi730^ekU}dF0ud@x|M{AHMWw_aDVv z2}NtG8T6$0VJSR(8h~;w<1>4MuOMc9bA@~#UYmM-zEI5Mde28e9K)U5w=-$B8}>x# zC*f0ipuA3h5LO}rA|L`HAOa#F0wN#+A|L{dLLkVk8s%;Eo+2OuA|L`HAOa#F0wN#+ zA|L`H(4YijZaP1kbH1o-9nGa$u&1&1EzUCjIVelOlmnQfWx1GhY#ZP)LR+LH3zxqo z2reXewN&gi+lL_*`MoS@70*XeJM9X^xhXf5ok?f%+(aS*A|L`HAOa#F0wN#+A|L`H zAOa$=Edrl{{{K?4&P1Ml=PBgt?Ehzy{vNuiS08gepQrV`{=S!Bt6u+=MDW`H+Hzk# z_l)%TJ2y9T9|r&~J2O3tOO-La!rD>H)B5Lfi>NSu_KsQgodYQ6V7++go{ z`Je*h%C`INdVB8iEZ@gYI{}|D--Bfz{J-kC|Jw2A-RjT&U(08&U3m0lV7#85`_qWW z$68O%eYxKr@51lW_uF^Dr}%z*`aDFEh=2%)fCz|y2#A0Ph=2%)fC$t_;Kt&Qo2yGp zOUz5V!X;75ypnk}^Em1Yj42$mARk9Y8)WvJg;c5kuN=l6(CyUz8Ak#^GL~q~zn*&k EU$)S+n*aa+ literal 0 HcmV?d00001 diff --git a/lld/test/mach-o/Inputs/lib-search-paths/usr/lib/libmystatic.a b/lld/test/mach-o/Inputs/lib-search-paths/usr/lib/libmystatic.a new file mode 100644 index 0000000000000000000000000000000000000000..b12062941f376d739ba0bee9161c165f25c6a2a0 GIT binary patch literal 556 zcmY$iNi0gvu;bEKKm~>-1}5evX6B}b3JL~bDP&SX!N>%rK7osYfq@Z-W8%|_@^j;J zD~n4KOEQxg7+@^De2D2VGfgZN6if|(1}hkW4K*|{Ff##~2ecAuXad*yUvK}h1I=ZI z8V;fZK(;6VF%U=s@e42uJHY}KE&%BP0vI1?CIbjFfdqjdKE5Qiq6ET*@Izc9LJ)j( z7R)>(@dhL|(7cqyl0<|^e7uWeh$DgzW5L`5G7sh!m>e<dAI2c(xD*)eDl3IOc4H2MGl literal 0 HcmV?d00001 diff --git a/lld/test/mach-o/Inputs/lib-search-paths/usr/local/lib/file.o b/lld/test/mach-o/Inputs/lib-search-paths/usr/local/lib/file.o new file mode 100644 index 0000000000000000000000000000000000000000..f9a923d37db381942611b306f0c87ed7266b2723 GIT binary patch literal 404 zcmX^A>+L^w1_nlE1|R{%AUXiVPyk{ekOblvU>0_Q1u9$s(gOrAKF~}C5M}}i0zrIy zNoqw2gbm?`xJHB^_~0we^0G&c|j05LKE$%8OCKnth|q?aGrG4W|d`ML3FnK`NXATt4O85SG> literal 0 HcmV?d00001 diff --git a/lld/test/mach-o/Inputs/libbar.a b/lld/test/mach-o/Inputs/libbar.a new file mode 100644 index 0000000000000000000000000000000000000000..64cae6c749eee95e83a7273ad80f991a93b74ff3 GIT binary patch literal 824 zcma)4yG{Z@6rBYFxPoqBp^3#9ZN**O1x$>=2!=u<0Zg8pwZqn*PP9iEvIl?XDjPv(%!a|j%|Cc zbR7N)opxO*nLcVJQL=J&lrJ2}%5k|`$O|CG3(yBy(?xv%lS3aTrBh*bpHxQo@u$#z z(w>g4nJAI$p$Oyo^W!Tjgb{`|z!`ANLW00Dm}UPD@LY@+6{CIB6ilwmAR0DDLtp!T zwQyRU`dTHF;CehuF67Sz=T674C2I6K{d!|a*WH_|?j_OT+QqOKiK$up%TLt1`~blQyoNzRt$tt34>fswW*=4NQH>OB(eEZjA9Up z&4AHAAhFmCV!-d$cke?&B4Y4M&OPUzd+zzT=e&3Ma(X6&2!WbFb)cva);)DrpbJHC z@)UBy^Q0vCShbZJSXiD(r6=XW z?0hDjf?pBl0&IXNt3ss$c8)l1%v=RGapLgq$MLf;apG zz+;ODf-p`W7QROdXb$udJSY7Ii-&hB7r4m#9ma`%bQ}2ZeNF^Zv1)(p^R?e+i-XLv z`0mFPBnapNB~86G&F7`4w>R3x>`m|Km-tEVY1haFpug{Cfagl(Dy5BVuBx`m+w0{u zrKVHV8*l~)SzCp;g+Jz!#K7oOIXVH9$`@5(Gh530Dy2TpX1}%^)BH*M(_7A)DZQrp zZcfK^{5f#u_~&yvr3Y0frr%SSY_b)iQ;T)G^TOJ3ET^?8d;}}z-3>Z<^&1 | FileCheck %s + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +sections: + - segment: __DATA + section: __objc_imageinfo + type: S_REGULAR + attributes: [ S_ATTR_NO_DEAD_STRIP ] + address: 0x0000000000000100 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00 ] +... diff --git a/lld/test/mach-o/Inputs/unwind-info-simple-arm64.yaml b/lld/test/mach-o/Inputs/unwind-info-simple-arm64.yaml new file mode 100644 index 000000000000..5f7ae50717ba --- /dev/null +++ b/lld/test/mach-o/Inputs/unwind-info-simple-arm64.yaml @@ -0,0 +1,13 @@ +--- !mach-o +arch: arm64 +file-type: MH_DYLIB +install-name: /usr/lib/libc++.dylib +exports: + - name: __Unwind_Resume + - name: __ZTIl + - name: __ZTIi + - name: ___cxa_end_catch + - name: ___cxa_begin_catch + - name: ___cxa_allocate_exception + - name: ___cxa_throw + - name: ___gxx_personality_v0 diff --git a/lld/test/mach-o/Inputs/use-dylib-install-names.yaml b/lld/test/mach-o/Inputs/use-dylib-install-names.yaml new file mode 100644 index 000000000000..cec2559f2435 --- /dev/null +++ b/lld/test/mach-o/Inputs/use-dylib-install-names.yaml @@ -0,0 +1,28 @@ +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0xE8, 0x00, 0x00, 0x00, + 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00, 0xE8, 0x00, + 0x00, 0x00, 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00, + 0xE8, 0x00, 0x00, 0x00, 0x00, 0x5D, 0xE9, 0x00, + 0x00, 0x00, 0x00 ] +global-symbols: + - name: _foo + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _myGlobal + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 diff --git a/lld/test/mach-o/Inputs/use-simple-dylib.yaml b/lld/test/mach-o/Inputs/use-simple-dylib.yaml new file mode 100644 index 000000000000..9081bcf76932 --- /dev/null +++ b/lld/test/mach-o/Inputs/use-simple-dylib.yaml @@ -0,0 +1,58 @@ +--- !mach-o +arch: x86_64 +file-type: MH_DYLIB +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 4 + address: 0x0000000000000000 + content: [ 0xCC, 0xC3, 0x90, 0xC3, 0x90, 0x90, 0xC3, 0x90, + 0x90, 0x90, 0xC3, 0x90, 0x90, 0x90, 0x90, 0xC3, + 0x31, 0xC0, 0xC3 ] +local-symbols: + - name: _myStatic + type: N_SECT + sect: 1 + value: 0x000000000000000B + - name: _myVariablePreviouslyKnownAsPrivateExtern + type: N_SECT + scope: [ N_PEXT ] + sect: 1 + desc: [ N_SYMBOL_RESOLVER ] + value: 0x0000000000000011 +global-symbols: + - name: _myGlobal + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000001 + - name: _myGlobalWeak + type: N_SECT + scope: [ N_EXT ] + sect: 1 + desc: [ N_WEAK_DEF ] + value: 0x0000000000000002 + - name: _myHidden + type: N_SECT + scope: [ N_EXT, N_PEXT ] + sect: 1 + value: 0x0000000000000004 + - name: _myHiddenWeak + type: N_SECT + scope: [ N_EXT, N_PEXT ] + sect: 1 + desc: [ N_WEAK_DEF ] + value: 0x0000000000000007 + - name: _myResolver + type: N_SECT + scope: [ N_EXT ] + sect: 1 + desc: [ N_SYMBOL_RESOLVER ] + value: 0x0000000000000010 + +install-name: libspecial.dylib diff --git a/lld/test/mach-o/Inputs/write-final-sections.yaml b/lld/test/mach-o/Inputs/write-final-sections.yaml new file mode 100644 index 000000000000..ed434917f2cf --- /dev/null +++ b/lld/test/mach-o/Inputs/write-final-sections.yaml @@ -0,0 +1,20 @@ +--- !mach-o +arch: x86_64 +file-type: MH_DYLIB +flags: [ ] +install-name: /usr/lib/libSystem.B.dylib +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55 ] + +global-symbols: + - name: dyld_stub_binder + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + diff --git a/lld/test/mach-o/Inputs/wrong-arch-error.yaml b/lld/test/mach-o/Inputs/wrong-arch-error.yaml new file mode 100644 index 000000000000..714ce9f1631d --- /dev/null +++ b/lld/test/mach-o/Inputs/wrong-arch-error.yaml @@ -0,0 +1,24 @@ +# RUN: not ld64.lld.darwinold -arch x86_64 -r %s 2> %t.err +# RUN: FileCheck %s < %t.err + +--- !mach-o +arch: x86 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 4 + address: 0x0000000000000000 + content: [ 0xC3 ] + +global-symbols: + - name: _bar + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 diff --git a/lld/test/mach-o/Inputs/x86/libSystem.yaml b/lld/test/mach-o/Inputs/x86/libSystem.yaml new file mode 100644 index 000000000000..87a4895c9f4a --- /dev/null +++ b/lld/test/mach-o/Inputs/x86/libSystem.yaml @@ -0,0 +1,13 @@ +# +# For use by test cases that create dynamic output types which may needs stubs +# and therefore will need a dylib definition of dyld_stub_binder. +# + +--- !mach-o +arch: x86 +file-type: MH_DYLIB +install-name: /usr/lib/libSystem.B.dylib +exports: + - name: dyld_stub_binder + +... diff --git a/lld/test/mach-o/Inputs/x86_64/libSystem.yaml b/lld/test/mach-o/Inputs/x86_64/libSystem.yaml new file mode 100644 index 000000000000..fbbf794f3264 --- /dev/null +++ b/lld/test/mach-o/Inputs/x86_64/libSystem.yaml @@ -0,0 +1,13 @@ +# +# For use by test cases that create dynamic output types which may needs stubs +# and therefore will need a dylib definition of dyld_stub_binder. +# + +--- !mach-o +arch: x86_64 +file-type: MH_DYLIB +install-name: /usr/lib/libSystem.B.dylib +exports: + - name: dyld_stub_binder + +... diff --git a/lld/test/mach-o/PIE.yaml b/lld/test/mach-o/PIE.yaml new file mode 100644 index 000000000000..0195f5059cdf --- /dev/null +++ b/lld/test/mach-o/PIE.yaml @@ -0,0 +1,40 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s \ +# RUN: %p/Inputs/PIE.yaml -o %t && \ +# RUN: llvm-objdump --macho --private-headers %t | FileCheck %s +# +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s \ +# RUN: %p/Inputs/PIE.yaml -pie -o %t\ +# RUN: && llvm-objdump --macho --private-headers %t | FileCheck %s +# +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s \ +# RUN: %p/Inputs/PIE.yaml -no_pie -o %t\ +# RUN: && llvm-objdump --macho --private-headers %t \ +# RUN: | FileCheck --check-prefix=CHECK_NO_PIE %s +# +# Test various PIE options. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0xC3 ] +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + +... + +# CHECK: MH_MAGIC_64 {{[0-9a-zA-Z _]+}} TWOLEVEL PIE +# CHECK_NO_PIE-NOT: MH_MAGIC_64 {{[0-9a-zA-Z _]+}} TWOLEVEL PIE diff --git a/lld/test/mach-o/align_text.yaml b/lld/test/mach-o/align_text.yaml new file mode 100644 index 000000000000..6278ee251636 --- /dev/null +++ b/lld/test/mach-o/align_text.yaml @@ -0,0 +1,45 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r %s -o %t -print_atoms | FileCheck %s +# RUN: ld64.lld.darwinold -arch x86_64 -r %t -o %t2 -print_atoms | FileCheck %s +# +# Test that alignment info round trips through -r +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 16 + address: 0x0000000000000000 + content: [ 0x90, 0x90, 0x90, 0xC3, 0xC3, 0xC3 ] +local-symbols: + - name: _f1 + type: N_SECT + sect: 1 + value: 0x0000000000000003 + - name: _f2 + type: N_SECT + sect: 1 + value: 0x0000000000000004 + - name: _f3 + type: N_SECT + sect: 1 + value: 0x0000000000000005 +... + +# CHECK: defined-atoms: +# CHECK: - content: [ 90, 90, 90 ] +# CHECK: alignment: 16 +# CHECK: - name: _f1 +# CHECK: content: [ C3 ] +# CHECK: alignment: 3 mod 16 +# CHECK: - name: _f2 +# CHECK: content: [ C3 ] +# CHECK: alignment: 4 mod 16 +# CHECK: - name: _f3 +# CHECK: content: [ C3 ] +# CHECK: alignment: 5 mod 16 diff --git a/lld/test/mach-o/arm-interworking-movw.yaml b/lld/test/mach-o/arm-interworking-movw.yaml new file mode 100644 index 000000000000..3f61fafd1943 --- /dev/null +++ b/lld/test/mach-o/arm-interworking-movw.yaml @@ -0,0 +1,393 @@ +# REQUIRES: arm +# RUN: ld64.lld.darwinold -arch armv7 -r -print_atoms %s -o %t | FileCheck %s +# RUN: ld64.lld.darwinold -arch armv7 -dylib -print_atoms %t -o %t2 \ +# RUN: %p/Inputs/armv7/libSystem.yaml -sectalign __TEXT __text 0x1000 | FileCheck %s +# RUN: llvm-objdump -d --macho --no-symbolic-operands %t2 | FileCheck --check-prefix=CODE %s +# +# Test thumb and arm branches round trip through -r. +# Test movw/movt pairs have low bit set properly for thumb vs arm. +# +# + +--- !mach-o +arch: armv7 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 2 + address: 0x0000000000000000 + content: [ 0x40, 0xF2, 0x25, 0x00, 0xC0, 0xF2, 0x00, 0x00, + 0x40, 0xF2, 0x01, 0x01, 0xC0, 0xF2, 0x00, 0x01, + 0x40, 0xF2, 0x4E, 0x02, 0xC0, 0xF2, 0x00, 0x02, + 0x40, 0xF2, 0x2A, 0x03, 0xC0, 0xF2, 0x00, 0x03, + 0x78, 0x44, 0x70, 0x47, 0x70, 0x47, 0x25, 0x00, + 0x00, 0xE3, 0x00, 0x00, 0x40, 0xE3, 0xD7, 0x1F, + 0x0F, 0xE3, 0xFF, 0x1F, 0x4F, 0xE3, 0x4E, 0x20, + 0x00, 0xE3, 0x00, 0x20, 0x40, 0xE3, 0x00, 0x30, + 0x00, 0xE3, 0x00, 0x30, 0x40, 0xE3, 0x0F, 0x00, + 0x80, 0xE0, 0x1E, 0xFF, 0x2F, 0xE1, 0x1E, 0xFF, + 0x2F, 0xE1 ] + relocations: + - offset: 0x00000042 + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 1 + pc-rel: false + value: 0x0000004E + - offset: 0x00000000 + scattered: true + type: ARM_RELOC_PAIR + length: 1 + pc-rel: false + value: 0x00000046 + - offset: 0x0000003E + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 0 + pc-rel: false + value: 0x0000004E + - offset: 0x00000000 + scattered: true + type: ARM_RELOC_PAIR + length: 0 + pc-rel: false + value: 0x00000046 + - offset: 0x0000003A + type: ARM_RELOC_HALF + length: 1 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x0000004E + type: ARM_RELOC_PAIR + length: 1 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x00000036 + type: ARM_RELOC_HALF + length: 0 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000000 + type: ARM_RELOC_PAIR + length: 0 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x00000032 + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 1 + pc-rel: false + value: 0x00000024 + - offset: 0x0000FFD6 + scattered: true + type: ARM_RELOC_PAIR + length: 1 + pc-rel: false + value: 0x00000046 + - offset: 0x0000002E + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 0 + pc-rel: false + value: 0x00000024 + - offset: 0x0000FFFF + scattered: true + type: ARM_RELOC_PAIR + length: 0 + pc-rel: false + value: 0x00000046 + - offset: 0x0000002A + type: ARM_RELOC_HALF + length: 1 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000025 + type: ARM_RELOC_PAIR + length: 1 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x00000026 + type: ARM_RELOC_HALF + length: 0 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000000 + type: ARM_RELOC_PAIR + length: 0 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x0000001C + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 3 + pc-rel: false + value: 0x0000004E + - offset: 0x0000002A + scattered: true + type: ARM_RELOC_PAIR + length: 3 + pc-rel: false + value: 0x00000020 + - offset: 0x00000018 + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 2 + pc-rel: false + value: 0x0000004E + - offset: 0x00000000 + scattered: true + type: ARM_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x00000020 + - offset: 0x00000014 + type: ARM_RELOC_HALF + length: 3 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x0000004E + type: ARM_RELOC_PAIR + length: 3 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x00000010 + type: ARM_RELOC_HALF + length: 2 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000000 + type: ARM_RELOC_PAIR + length: 2 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x0000000C + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 3 + pc-rel: false + value: 0x00000024 + - offset: 0x00000000 + scattered: true + type: ARM_RELOC_PAIR + length: 3 + pc-rel: false + value: 0x00000020 + - offset: 0x00000008 + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000024 + - offset: 0x00000000 + scattered: true + type: ARM_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x00000020 + - offset: 0x00000004 + type: ARM_RELOC_HALF + length: 3 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000025 + type: ARM_RELOC_PAIR + length: 3 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x00000000 + type: ARM_RELOC_HALF + length: 2 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000000 + type: ARM_RELOC_PAIR + length: 2 + pc-rel: false + extern: false + symbol: 16777215 +local-symbols: + - name: _t1 + type: N_SECT + sect: 1 + desc: [ N_ARM_THUMB_DEF ] + value: 0x0000000000000000 + - name: _t2 + type: N_SECT + sect: 1 + desc: [ N_ARM_THUMB_DEF ] + value: 0x0000000000000024 + - name: _a2 + type: N_SECT + sect: 1 + value: 0x000000000000004E + - name: _a1 + type: N_SECT + sect: 1 + value: 0x0000000000000026 +... + +# CHECK: defined-atoms: +# CHECK: - name: _t1 +# CHECK: references: +# CHECK: - kind: modeThumbCode +# CHECK: offset: 0 +# CHECK: target: _t1 +# CHECK: - kind: thumb_movw +# CHECK: offset: 0 +# CHECK: target: _t2 +# CHECK-NOT: addend: +# CHECK: - kind: thumb_movt +# CHECK: offset: 4 +# CHECK: target: _t2 +# CHECK-NOT: addend: +# CHECK: - kind: thumb_movw_funcRel +# CHECK: offset: 8 +# CHECK: target: _t2 +# CHECK: addend: -36 +# CHECK: - kind: thumb_movt_funcRel +# CHECK: offset: 12 +# CHECK: target: _t2 +# CHECK: addend: -36 +# CHECK: - kind: thumb_movw +# CHECK: offset: 16 +# CHECK: target: _a2 +# CHECK-NOT: addend: +# CHECK: - kind: thumb_movt +# CHECK: offset: 20 +# CHECK: target: _a2 +# CHECK-NOT: addend: +# CHECK: - kind: thumb_movw_funcRel +# CHECK: offset: 24 +# CHECK: target: _a2 +# CHECK: addend: -36 +# CHECK: - kind: thumb_movt_funcRel +# CHECK: offset: 28 +# CHECK: target: _a2 +# CHECK: addend: -36 +# CHECK: - name: _t2 +# CHECK: references: +# CHECK: - kind: modeThumbCode +# CHECK: offset: 0 +# CHECK: target: _t2 +# CHECK: - name: _a1 +# CHECK: references: +# CHECK: - kind: arm_movw +# CHECK: offset: 0 +# CHECK: target: _t2 +# CHECK-NOT: addend: +# CHECK: - kind: arm_movt +# CHECK: offset: 4 +# CHECK: target: _t2 +# CHECK-NOT: addend: +# CHECK: - kind: arm_movw_funcRel +# CHECK: offset: 8 +# CHECK: target: _t2 +# CHECK: addend: -40 +# CHECK: - kind: arm_movt_funcRel +# CHECK: offset: 12 +# CHECK: target: _t2 +# CHECK: addend: -40 +# CHECK: - kind: arm_movw +# CHECK: offset: 16 +# CHECK: target: _a2 +# CHECK-NOT: addend: +# CHECK: - kind: arm_movt +# CHECK: offset: 20 +# CHECK: target: _a2 +# CHECK-NOT: addend: +# CHECK: - kind: arm_movw_funcRel +# CHECK: offset: 24 +# CHECK: target: _a2 +# CHECK: addend: -40 +# CHECK: - kind: arm_movt_funcRel +# CHECK: offset: 28 +# CHECK: target: _a2 +# CHECK: addend: -40 +# CHECK: - name: _a2 + + +# CODE: _t1: +# CODE-NEXT: movw r0, #4133 +# CODE-NEXT: movt r0, #0 +# CODE-NEXT: movw r1, #1 +# CODE-NEXT: movt r1, #0 +# CODE-NEXT: movw r2, #4174 +# CODE-NEXT: movt r2, #0 +# CODE-NEXT: movw r3, #42 +# CODE-NEXT: movt r3, #0 + + +# CODE: _a1: +# CODE-NEXT: movw r0, #4133 +# CODE-NEXT: movt r0, #0 +# CODE-NEXT: movw r1, #65495 +# CODE-NEXT: movt r1, #65535 +# CODE-NEXT: movw r2, #4174 +# CODE-NEXT: movt r2, #0 +# CODE-NEXT: movw r3, #0 +# CODE-NEXT: movt r3, #0 + + + +# .syntax unified +# .align 2 +# +# .code 16 +# .thumb_func _t1 +#_t1: +# movw r0, :lower16:(_t2) +# movt r0, :upper16:(_t2) +# movw r1, :lower16:(_t2-(L0+4)) +# movt r1, :upper16:(_t2-(L0+4)) +# movw r2, :lower16:(_a2) +# movt r2, :upper16:(_a2) +# movw r3, :lower16:(_a2-(L0+4)) +# movt r3, :upper16:(_a2-(L0+4)) +#L0: +# add r0, pc +# bx lr +# +# +# .code 16 +# .thumb_func _t2 +#_t2: +# bx lr +# +# +# +# .code 32 +#_a1: +# movw r0, :lower16:(_t2) +# movt r0, :upper16:(_t2) +# movw r1, :lower16:(_t2-(L1+8)) +# movt r1, :upper16:(_t2-(L1+8)) +# movw r2, :lower16:(_a2) +# movt r2, :upper16:(_a2) +# movw r3, :lower16:(_a2-(L1+8)) +# movt r3, :upper16:(_a2-(L1+8)) +#L1: +# add r0, pc +# bx lr +# +#_a2: +# bx lr + diff --git a/lld/test/mach-o/arm-interworking.yaml b/lld/test/mach-o/arm-interworking.yaml new file mode 100644 index 000000000000..4196c12d0943 --- /dev/null +++ b/lld/test/mach-o/arm-interworking.yaml @@ -0,0 +1,288 @@ +# RUN: ld64.lld.darwinold -arch armv7 -r -print_atoms %s \ +# RUN: %p/Inputs/arm-interworking.yaml -o %t | FileCheck %s \ +# RUN: && ld64.lld.darwinold -arch armv7 -dylib -print_atoms \ +# RUN: %p/Inputs/armv7/libSystem.yaml %t -o %t2 | FileCheck %s \ +# RUN: && llvm-readobj -S --section-data %t2 | FileCheck -check-prefix=CODE %s +# +# Test thumb and arm branches round trip through -r. +# Test bl/blx instructions are fixed up properly. +# +# + +--- !mach-o +arch: armv7 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 2 + address: 0x0000000000000000 + content: [ 0xFF, 0xF7, 0xFE, 0xFF, 0xC0, 0x46, 0xFF, 0xF7, + 0xFC, 0xEF, 0xC0, 0x46, 0xFF, 0xF7, 0xF8, 0xEF, + 0xFF, 0xF7, 0xF6, 0xFF, 0xC0, 0x46, 0xFF, 0xF7, + 0xF3, 0xFF, 0xC0, 0x46, 0x00, 0xF0, 0x06, 0xE8, + 0xC0, 0x46, 0x00, 0xF0, 0x03, 0xF8, 0x00, 0xF0, + 0x02, 0xF8, 0x70, 0x47, 0x70, 0x47, 0x70, 0x47 ] + relocations: + - offset: 0x00000026 + type: ARM_THUMB_RELOC_BR22 + length: 2 + pc-rel: true + extern: false + symbol: 1 + - offset: 0x00000022 + type: ARM_THUMB_RELOC_BR22 + length: 2 + pc-rel: true + extern: false + symbol: 1 + - offset: 0x0000001C + type: ARM_THUMB_RELOC_BR22 + length: 2 + pc-rel: true + extern: false + symbol: 1 + - offset: 0x00000016 + type: ARM_THUMB_RELOC_BR22 + length: 2 + pc-rel: true + extern: false + symbol: 1 + - offset: 0x00000010 + type: ARM_THUMB_RELOC_BR22 + length: 2 + pc-rel: true + extern: false + symbol: 1 + - offset: 0x0000000C + type: ARM_THUMB_RELOC_BR22 + length: 2 + pc-rel: true + extern: true + symbol: 5 + - offset: 0x00000006 + type: ARM_THUMB_RELOC_BR22 + length: 2 + pc-rel: true + extern: true + symbol: 5 + - offset: 0x00000000 + type: ARM_THUMB_RELOC_BR22 + length: 2 + pc-rel: true + extern: true + symbol: 4 + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + address: 0x0000000000000030 + content: [ 0x2D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000004 + type: ARM_RELOC_VANILLA + length: 2 + pc-rel: false + extern: true + symbol: 4 + - offset: 0x00000000 + type: ARM_RELOC_VANILLA + length: 2 + pc-rel: false + extern: false + symbol: 1 +local-symbols: + - name: _t3 + type: N_SECT + sect: 1 + desc: [ N_ARM_THUMB_DEF ] + value: 0x000000000000002E + - name: _d1 + type: N_SECT + sect: 2 + value: 0x0000000000000030 +global-symbols: + - name: _t1 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + desc: [ N_ARM_THUMB_DEF ] + value: 0x0000000000000000 + - name: _t2 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + desc: [ N_ARM_THUMB_DEF ] + value: 0x000000000000002C +undefined-symbols: + - name: _a1 + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _a2 + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + +... + + +# CHECK: defined-atoms: +# CHECK: - name: _d1 +# CHECK: type: data +# CHECK: references: +# CHECK: - kind: pointer32 +# CHECK: offset: 0 +# CHECK: target: _t2 +# CHECK: - kind: pointer32 +# CHECK: offset: 4 +# CHECK: target: _a1 +# CHECK: - name: _d2 +# CHECK: type: data +# CHECK: references: +# CHECK: - kind: pointer32 +# CHECK: offset: 0 +# CHECK: target: _t1 +# CHECK: - kind: pointer32 +# CHECK: offset: 4 +# CHECK: target: _a1 +# CHECK: - name: _t1 +# CHECK: scope: global +# CHECK: references: +# CHECK: - kind: modeThumbCode +# CHECK: offset: 0 +# CHECK: target: _t1 +# CHECK: - kind: thumb_bl22 +# CHECK: offset: 0 +# CHECK: target: _a1 +# CHECK: - kind: thumb_bl22 +# CHECK: offset: 6 +# CHECK: target: _a2 +# CHECK: - kind: thumb_bl22 +# CHECK: offset: 12 +# CHECK: target: _a2 +# CHECK: - kind: thumb_bl22 +# CHECK: offset: 16 +# CHECK: target: _t1 +# CHECK: - kind: thumb_bl22 +# CHECK: offset: 22 +# CHECK: target: _t1 +# CHECK: - kind: thumb_bl22 +# CHECK: offset: 28 +# CHECK: target: _t2 +# CHECK: - kind: thumb_bl22 +# CHECK: offset: 34 +# CHECK: target: _t2 +# CHECK: - kind: thumb_bl22 +# CHECK: offset: 38 +# CHECK: target: _t3 +# CHECK: - name: _t2 +# CHECK: scope: global +# CHECK: content: [ 70, 47 ] +# CHECK: references: +# CHECK: - kind: modeThumbCode +# CHECK: offset: 0 +# CHECK: target: _t2 +# CHECK: - name: _t3 +# CHECK: content: [ 70, 47 ] +# CHECK: references: +# CHECK: - kind: modeThumbCode +# CHECK: offset: 0 +# CHECK: target: _t3 +# CHECK: - name: _a1 +# CHECK: scope: global +# CHECK: references: +# CHECK: - kind: arm_bl24 +# CHECK: offset: 0 +# CHECK: target: _a1 +# CHECK: - kind: arm_bl24 +# CHECK: offset: 4 +# CHECK: target: _a2 +# CHECK: - kind: arm_bl24 +# CHECK: offset: 8 +# CHECK: target: _t1 +# CHECK: - kind: arm_bl24 +# CHECK: offset: 12 +# CHECK: target: _t2 +# CHECK: - name: _a2 +# CHECK: scope: global + +# CODE: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00) +# CODE: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00) +# CODE: SectionData ( +# CODE: 0000: 00F016E8 C04600F0 1EE8C046 00F01AE8 +# CODE: 0010: FFF7F6FF C046FFF7 F3FFC046 00F006F8 +# CODE: 0020: C04600F0 03F800F0 02F87047 70477047 +# CODE: 0030: FEFFFFEB 020000EB F0FFFFFA FAFFFFFA +# CODE: 0040: 1EFF2FE1 1EFF2FE1 +# CODE: ) + +# CODE: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00) +# CODE: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00) +# CODE: SectionData ( +# CODE: 0000: E50F0000 E80F0000 B90F0000 E80F0000 +# CODE: ) + +# When we get a good mach-o disassembler the above __text section content check can be change to be symbolic. +# Verify the low (thumb) bit is set on the first and third pointers but not the second and fourth. + + + +# Input file one: +# +# .align 2 +# .code 16 +# .globl _t1 +# .thumb_func _t1 +#_t1: +# bl _a1 +# nop +# blx _a2 +# nop +# blx _a2 +# bl _t1 +# nop +# bl _t1 +# nop +# blx _t2 +# nop +# blx _t2 +# bx lr +# +# .globl _t2 +# .thumb_func _t2 +#_t2: +# bx lr +# +# .data +#_d1: .long _t2 +# .long _a1 + + + +# Input file two: +# +# .align 2 +# .code 32 +# .globl _a1 +#_a1: +# bl _a1 +# blx _a2 +# bl _t1 +# blx _t2 +# bx lr +# +# .globl _a2 +#_a2: +# bx lr +# +# .data +#_d2: .long _t1 +# .long _a1 + + + + diff --git a/lld/test/mach-o/arm-shims.yaml b/lld/test/mach-o/arm-shims.yaml new file mode 100644 index 000000000000..7c4f04677f1c --- /dev/null +++ b/lld/test/mach-o/arm-shims.yaml @@ -0,0 +1,126 @@ +# RUN: ld64.lld.darwinold -arch armv7 %s %p/Inputs/arm-shims.yaml \ +# RUN: -dylib %p/Inputs/armv7/libSystem.yaml -o %t +# RUN: llvm-readobj -S --section-data %t | FileCheck %s +# +# Test b from arm to thumb or vice versa has shims added.s +# +# + +--- !mach-o +arch: armv7 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 2 + address: 0x0000000000000000 + content: [ 0x00, 0xBF, 0xFF, 0xF7, 0xFE, 0xEF, 0xFF, 0xF7, + 0xFB, 0xBF, 0x00, 0x00, 0x00, 0xF0, 0x20, 0xE3, + 0xFA, 0xFF, 0xFF, 0xFA, 0xF9, 0xFF, 0xFF, 0xEA ] + relocations: + - offset: 0x00000014 + type: ARM_RELOC_BR24 + length: 2 + pc-rel: true + extern: true + symbol: 3 + - offset: 0x00000010 + type: ARM_RELOC_BR24 + length: 2 + pc-rel: true + extern: true + symbol: 3 + - offset: 0x00000006 + type: ARM_THUMB_RELOC_BR22 + length: 2 + pc-rel: true + extern: true + symbol: 2 + - offset: 0x00000002 + type: ARM_THUMB_RELOC_BR22 + length: 2 + pc-rel: true + extern: true + symbol: 2 +global-symbols: + - name: _a1 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x000000000000000C + - name: _t1 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + desc: [ N_ARM_THUMB_DEF ] + value: 0x0000000000000000 +undefined-symbols: + - name: _a2 + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _t2 + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + +... + +# CHECK: Section { +# CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00) +# CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00) +# CHECK: SectionData ( +# CHECK: 0000: 00BF00F0 10E800F0 19B80000 00F020E3 +# CHECK: 0010: 000000FA 0F0000EA 00BFFFF7 F8EF00F0 +# CHECK: 0020: 07B80000 00F020E3 F4FFFFFA 050000EA +# CHECK: 0030: DFF804C0 FF446047 D4FFFFFF DFF804C0 +# CHECK: 0040: FF446047 E0FFFFFF 04C09FE5 0CC08FE0 +# CHECK: 0050: 1CFF2FE1 ADFFFFFF 04C09FE5 0CC08FE0 +# CHECK: 0060: 1CFF2FE1 B5FFFFFF +# CHECK: ) + +# When we get a good mach-o disassembler the above __text section content check can be change to be symbolic. + + +# Input file one: +# +# .align 2 +# .code 16 +# .globl _t1 +# .thumb_func _t1 +#_t1: +# nop +# blx _a2 +# b _a2 +# +# .code 32 +# .align 2 +# .globl _a1 +#_a1: +# nop +# blx _t2 +# b _t2 + + + +# Input file two: +# +# .align 2 +# .code 16 +# .globl _t2 +# .thumb_func _t2 +#_t2: +# nop +# blx _a1 +# b _a1 +# +# .code 32 +# .align 2 +# .globl _a2 +#_a2: +# nop +# blx _t1 +# b _t1 diff --git a/lld/test/mach-o/arm-subsections-via-symbols.yaml b/lld/test/mach-o/arm-subsections-via-symbols.yaml new file mode 100644 index 000000000000..96346e845c9a --- /dev/null +++ b/lld/test/mach-o/arm-subsections-via-symbols.yaml @@ -0,0 +1,60 @@ +# RUN: ld64.lld.darwinold -arch armv7 %s -r -print_atoms -o %t | FileCheck %s +# +# Test that assembly written without .subsections_via_symbols is parsed so +# that atoms are non-dead-strip and there is a layout-after references +# chaining atoms together. +# + +--- !mach-o +arch: armv7 +file-type: MH_OBJECT +flags: [ ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 2 + address: 0x0000000000000000 + content: [ 0x04, 0x10, 0x9F, 0xE5, 0x04, 0x20, 0x9F, 0xE5, + 0x1E, 0xFF, 0x2F, 0xE1, 0x78, 0x56, 0x34, 0x12, + 0x21, 0x43, 0x65, 0x87 ] +local-symbols: + - name: constants1 + type: N_SECT + sect: 1 + value: 0x000000000000000C + - name: constants2 + type: N_SECT + sect: 1 + value: 0x0000000000000010 +global-symbols: + - name: _foo + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +... + + +# CHECK:defined-atoms: +# CHECK: - name: _foo +# CHECK: scope: global +# CHECK: content: [ 04, 10, 9F, E5, 04, 20, 9F, E5, 1E, FF, 2F, E1 ] +# CHECK: dead-strip: never +# CHECK: references: +# CHECK: - kind: layout-after +# CHECK: offset: 0 +# CHECK: target: constants1 +# CHECK: - name: constants1 +# CHECK: content: [ 78, 56, 34, 12 ] +# CHECK: dead-strip: never +# CHECK: references: +# CHECK: - kind: layout-after +# CHECK: offset: 0 +# CHECK: target: constants2 +# CHECK: - name: constants2 +# CHECK: content: [ 21, 43, 65, 87 ] +# CHECK: dead-strip: never diff --git a/lld/test/mach-o/arm64-reloc-negDelta32-fixup.yaml b/lld/test/mach-o/arm64-reloc-negDelta32-fixup.yaml new file mode 100644 index 000000000000..02200908d7ca --- /dev/null +++ b/lld/test/mach-o/arm64-reloc-negDelta32-fixup.yaml @@ -0,0 +1,124 @@ +# RUN: ld64.lld.darwinold -arch arm64 -r %s -o %t +# RUN: ld64.lld.darwinold -arch arm64 -r %t -o %t2 +# RUN: llvm-objdump -s --section="__eh_frame" %t | FileCheck %s +# RUN: llvm-objdump -s --section="__eh_frame" %t2 | FileCheck %s + +# The reference from FDE->CIE is implicitly created as a negDelta32. +# We don't emit these in to the binary as relocations, so we need to +# make sure that the offset in the FDE to the CIE is the correct value. +# CHECK: {{[0-9abcdef]*}} 10000000 00000000 017a5200 01781e01 +# CHECK: {{[0-9abcdef]*}} 100c1f00 20000000 18000000 b8ffffff +# Note, this one that matters ^~~~~~~~ +# It needs to be 0x18 as that is the offset back to 0 where the CIE is. +# CHECK: {{[0-9abcdef]*}} ffffffff 20000000 00000000 00480e10 +# CHECK: {{[0-9abcdef]*}} 9e019d02 00000000 + +--- !mach-o +arch: arm64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 4 + address: 0x0000000000000000 + content: [ 0xFD, 0x7B, 0xBF, 0xA9, 0xFD, 0x03, 0x00, 0x91, + 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x91, + 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x80, 0x52, + 0xFD, 0x7B, 0xC1, 0xA8, 0xC0, 0x03, 0x5F, 0xD6 ] + relocations: + - offset: 0x00000010 + type: ARM64_RELOC_BRANCH26 + length: 2 + pc-rel: true + extern: true + symbol: 6 + - offset: 0x0000000C + type: ARM64_RELOC_PAGEOFF12 + length: 2 + pc-rel: false + extern: true + symbol: 1 + - offset: 0x00000008 + type: ARM64_RELOC_PAGE21 + length: 2 + pc-rel: true + extern: true + symbol: 1 + - segment: __TEXT + section: __cstring + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x0000000000000020 + content: [ 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, + 0x72, 0x6C, 0x64, 0x00 ] + - segment: __LD + section: __compact_unwind + type: S_REGULAR + attributes: [ ] + alignment: 8 + address: 0x0000000000000030 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000000 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 1 + - segment: __TEXT + section: __eh_frame + type: S_COALESCED + attributes: [ ] + alignment: 8 + address: 0x0000000000000050 + content: [ 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x7A, 0x52, 0x00, 0x01, 0x78, 0x1E, 0x01, + 0x10, 0x0C, 0x1F, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x94, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x20, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x0E, 0x10, + 0x9E, 0x01, 0x9D, 0x02, 0x00, 0x00, 0x00, 0x00 ] +local-symbols: + - name: ltmp0 + type: N_SECT + sect: 1 + value: 0x0000000000000000 + - name: L_str + type: N_SECT + sect: 2 + value: 0x0000000000000020 + - name: ltmp1 + type: N_SECT + sect: 2 + value: 0x0000000000000020 + - name: ltmp2 + type: N_SECT + sect: 3 + value: 0x0000000000000030 + - name: ltmp3 + type: N_SECT + sect: 4 + value: 0x0000000000000050 +global-symbols: + - name: __Z3fooi + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _puts + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +page-size: 0x00000000 +... diff --git a/lld/test/mach-o/arm64-relocs-errors-delta64-offset.yaml b/lld/test/mach-o/arm64-relocs-errors-delta64-offset.yaml new file mode 100644 index 000000000000..d0f7389a4cbe --- /dev/null +++ b/lld/test/mach-o/arm64-relocs-errors-delta64-offset.yaml @@ -0,0 +1,65 @@ +# RUN: not ld64.lld.darwinold -arch arm64 %s -r \ +# RUN: 2> %t.err +# RUN: FileCheck %s < %t.err + + +--- !mach-o +arch: arm64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 4 + address: 0x0000000000000000 + content: [ 0xFF, 0x83, 0x00, 0xD1, 0xE0, 0x0B, 0x00, 0xF9, + 0x08, 0x00, 0x40, 0xB9, 0x08, 0x0D, 0x00, 0x71, + 0x08, 0x09, 0x00, 0x71, 0xE8, 0x0F, 0x00, 0xB9, + 0xC8, 0x00, 0x00, 0x54, 0x01, 0x00, 0x00, 0x14, + 0xE8, 0x03, 0x00, 0x32, 0x08, 0x01, 0x00, 0x12, + 0xE8, 0x7F, 0x00, 0x39, 0x02, 0x00, 0x00, 0x14 ] + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + alignment: 8 + address: 0x000000000001C348 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + +# Make sure that the offsets of the subtractor and unsigned both match. +# CHECK: bad relocation (paired relocs must have the same offset) in section __DATA/__data (r1_address=1, r1_type=1, r1_extern=1, r1_length=3, r1_pcrel=0, r1_symbolnum=1), (r2_address=0, r2_type=0, r2_extern=1, r2_length=3, r2_pcrel=0, r2_symbolnum=1) + - offset: 0x00000001 + type: ARM64_RELOC_SUBTRACTOR + length: 3 + pc-rel: false + extern: true + symbol: 1 + - offset: 0x00000000 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 1 +global-symbols: + - name: _f1 + type: N_SECT + sect: 2 + value: 0x000000000001C348 + - name: _f2 + type: N_SECT + sect: 1 + value: 0x0000000000000010 + - name: _f3 + type: N_SECT + sect: 1 + value: 0x0000000000000020 diff --git a/lld/test/mach-o/arm64-section-order.yaml b/lld/test/mach-o/arm64-section-order.yaml new file mode 100644 index 000000000000..e4174b64f67c --- /dev/null +++ b/lld/test/mach-o/arm64-section-order.yaml @@ -0,0 +1,67 @@ +# RUN: ld64.lld.darwinold -arch arm64 -r -print_atoms %s -o %t +# RUN: ld64.lld.darwinold -arch arm64 -r -print_atoms %t -o %t2 +# RUN: llvm-objdump --section-headers %t | FileCheck %s +# RUN: llvm-objdump --section-headers %t2 | FileCheck %s + +# Make sure that the sections are sorted. Currently we want this order: +# __text, __unwind_info + +# CHECK: Sections: +# CHECK: 0 __text {{.*}} TEXT +# CHECK: 1 __compact_unwind {{.*}} + + +--- !mach-o +arch: arm64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 8 + address: 0x0000000000000000 + content: [ 0xC0, 0x03, 0x5F, 0xD6, 0xC0, 0x03, 0x5F, 0xD6 ] + - segment: __LD + section: __compact_unwind + type: S_REGULAR + attributes: [ ] + alignment: 8 + address: 0x0000000000000008 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000020 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000000 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 1 +global-symbols: + - name: __Z3fooi + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: __Z4foo2i + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000004 diff --git a/lld/test/mach-o/bind-opcodes.yaml b/lld/test/mach-o/bind-opcodes.yaml new file mode 100644 index 000000000000..4a33b54cb4e1 --- /dev/null +++ b/lld/test/mach-o/bind-opcodes.yaml @@ -0,0 +1,140 @@ +# RUN: ld64.lld.darwinold -arch arm64 %s %p/Inputs/hello-world-arm64.yaml -o %t +# RUN: obj2yaml %t | FileCheck %s +# + +--- !mach-o +arch: arm64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 2 + address: 0x0000000000000000 + content: [ 0xFD, 0x7B, 0xBF, 0xA9, 0xFD, 0x03, 0x00, 0x91, + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xF9, + 0x00, 0x01, 0x40, 0xF9, 0x01, 0x00, 0x00, 0x90, + 0x21, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00, 0x94, + 0x00, 0x00, 0x80, 0x52, 0xFD, 0x7B, 0xC1, 0xA8, + 0xC0, 0x03, 0x5F, 0xD6 ] + relocations: + - offset: 0x0000001C + type: ARM64_RELOC_BRANCH26 + length: 2 + pc-rel: true + extern: true + symbol: 5 + - offset: 0x00000018 + type: ARM64_RELOC_PAGEOFF12 + length: 2 + pc-rel: false + extern: true + symbol: 1 + - offset: 0x00000014 + type: ARM64_RELOC_PAGE21 + length: 2 + pc-rel: true + extern: true + symbol: 1 + - offset: 0x0000000C + type: ARM64_RELOC_GOT_LOAD_PAGEOFF12 + length: 2 + pc-rel: false + extern: true + symbol: 4 + - offset: 0x00000008 + type: ARM64_RELOC_GOT_LOAD_PAGE21 + length: 2 + pc-rel: true + extern: true + symbol: 4 + - segment: __TEXT + section: __cstring + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x000000000000002C + content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x00 ] +local-symbols: + - name: ltmp0 + type: N_SECT + sect: 1 + value: 0x0000000000000000 + - name: l_.str + type: N_SECT + sect: 2 + value: 0x000000000000002C + - name: ltmp1 + type: N_SECT + sect: 2 + value: 0x000000000000002C +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: ___stdoutp + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _fprintf + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + + +# CHECK: BindOpcodes: +# CHECK: - Opcode: BIND_OPCODE_SET_DYLIB_ORDINAL_IMM +# CHECK: Imm: 1 +# CHECK: Symbol: '' +# CHECK: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# CHECK: Imm: 0 +# CHECK: Symbol: dyld_stub_binder +# CHECK: - Opcode: BIND_OPCODE_SET_TYPE_IMM +# CHECK: Imm: 1 +# CHECK: Symbol: '' +# CHECK: - Opcode: BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB +# CHECK: Imm: 2 +# CHECK: ULEBExtraData: [ 0x0 ] +# CHECK: Symbol: '' +# CHECK: - Opcode: BIND_OPCODE_DO_BIND +# CHECK: Imm: 0 +# CHECK: Symbol: '' +# CHECK: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# CHECK: Imm: 0 +# CHECK: Symbol: ___stdoutp +# CHECK: - Opcode: BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB +# CHECK: Imm: 2 +# CHECK: ULEBExtraData: [ 0x10 ] +# CHECK: Symbol: '' +# CHECK: - Opcode: BIND_OPCODE_DO_BIND +# CHECK: Imm: 0 +# CHECK: Symbol: '' +# CHECK: - Opcode: BIND_OPCODE_DONE +# CHECK: Imm: 0 +# CHECK: Symbol: '' + +# CHECK: LazyBindOpcodes: +# CHECK: - Opcode: BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB +# CHECK: Imm: 2 +# CHECK: ULEBExtraData: [ 0x18 ] +# CHECK: Symbol: '' +# CHECK: - Opcode: BIND_OPCODE_SET_DYLIB_ORDINAL_IMM +# CHECK: Imm: 1 +# CHECK: Symbol: '' +# CHECK: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# CHECK: Imm: 0 +# CHECK: Symbol: _fprintf +# CHECK: - Opcode: BIND_OPCODE_DO_BIND +# CHECK: Imm: 0 +# CHECK: Symbol: '' +# CHECK: - Opcode: BIND_OPCODE_DONE +# CHECK: Imm: 0 +# CHECK: Symbol: '' +# CHECK: - Opcode: BIND_OPCODE_DONE +# CHECK: Imm: 0 +# CHECK: Symbol: '' \ No newline at end of file diff --git a/lld/test/mach-o/cstring-sections.yaml b/lld/test/mach-o/cstring-sections.yaml new file mode 100644 index 000000000000..251df8e3587e --- /dev/null +++ b/lld/test/mach-o/cstring-sections.yaml @@ -0,0 +1,65 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r %s -o %t -print_atoms | FileCheck %s +# +# Test -keep_private_externs in -r mode. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __objc_methname + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x0000000000000000 + content: [ 0x61, 0x62, 0x63, 0x00, 0x64, 0x65, 0x66, 0x00 ] + - segment: __TEXT + section: __objc_classname + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x0000000000000006 + content: [ 0x61, 0x62, 0x63, 0x00, 0x67, 0x68, 0x69, 0x00 ] + - segment: __TEXT + section: __cstring + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x000000000000000A + content: [ 0x61, 0x62, 0x63, 0x00, 0x6A, 0x6B, 0x6C, 0x00 ] + + +... + +# CHECK: defined-atoms: +# CHECK: - scope: hidden +# CHECK: type: c-string +# CHECK: content: [ 61, 62, 63, 00 ] +# CHECK: merge: by-content +# CHECK: section-choice: custom-required +# CHECK: section-name: '__TEXT/__objc_methname' +# CHECK: - scope: hidden +# CHECK: type: c-string +# CHECK: content: [ 64, 65, 66, 00 ] +# CHECK: merge: by-content +# CHECK: section-choice: custom-required +# CHECK: section-name: '__TEXT/__objc_methname' +# CHECK: - scope: hidden +# CHECK: type: c-string +# CHECK: content: [ 61, 62, 63, 00 ] +# CHECK: merge: by-content +# CHECK: section-choice: custom-required +# CHECK: section-name: '__TEXT/__objc_classname' +# CHECK: - scope: hidden +# CHECK: type: c-string +# CHECK: content: [ 67, 68, 69, 00 ] +# CHECK: merge: by-content +# CHECK: section-choice: custom-required +# CHECK: section-name: '__TEXT/__objc_classname' +# CHECK: - scope: hidden +# CHECK: type: c-string +# CHECK: content: [ 61, 62, 63, 00 ] +# CHECK: merge: by-content +# CHECK: - scope: hidden +# CHECK: type: c-string +# CHECK: content: [ 6A, 6B, 6C, 00 ] +# CHECK: merge: by-content diff --git a/lld/test/mach-o/data-in-code-load-command.yaml b/lld/test/mach-o/data-in-code-load-command.yaml new file mode 100644 index 000000000000..e2131783619a --- /dev/null +++ b/lld/test/mach-o/data-in-code-load-command.yaml @@ -0,0 +1,35 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml && llvm-objdump --private-headers %t | FileCheck %s +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -static -data_in_code_info && llvm-objdump --private-headers %t | FileCheck %s +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -no_data_in_code_info && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_DATA_IN_CODE_INFO +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -static -data_in_code_info -no_data_in_code_info && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_DATA_IN_CODE_INFO +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -static && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_DATA_IN_CODE_INFO +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -r && llvm-objdump --private-headers %t | FileCheck %s +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -r -data_in_code_info && llvm-objdump --private-headers %t | FileCheck %s +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -r -no_data_in_code_info && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_DATA_IN_CODE_INFO + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x00, 0x00, 0x00, 0x00 ] +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +... + +# CHECK: Load command {{[0-9]*}} +# CHECK: cmd LC_DATA_IN_CODE +# CHECK: cmdsize 16 +# CHECK: dataoff +# CHECK: datasize + +# NO_DATA_IN_CODE_INFO-NOT: LC_DATA_IN_CODE diff --git a/lld/test/mach-o/data-only-dylib.yaml b/lld/test/mach-o/data-only-dylib.yaml new file mode 100644 index 000000000000..f865755e3c52 --- /dev/null +++ b/lld/test/mach-o/data-only-dylib.yaml @@ -0,0 +1,27 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -dylib %s -o %t %p/Inputs/x86_64/libSystem.yaml +# RUN: llvm-nm %t | FileCheck %s +# +# Test that a data-only dylib can be built. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + alignment: 2 + address: 0x0000000000000000 + content: [ 0x00, 0x00, 0x00, 0x00 ] +global-symbols: + - name: _myData + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +... + +# CHECK: _myData diff --git a/lld/test/mach-o/dead-strip-globals.yaml b/lld/test/mach-o/dead-strip-globals.yaml new file mode 100644 index 000000000000..cacc44f4b93c --- /dev/null +++ b/lld/test/mach-o/dead-strip-globals.yaml @@ -0,0 +1,31 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -dead_strip -export_dynamic %s -dylib %p/Inputs/x86_64/libSystem.yaml -o %t.dylib -print_atoms | FileCheck -check-prefix=CHECK1 %s +# RUN: ld64.lld.darwinold -arch x86_64 -export_dynamic -dead_strip %s -dylib %p/Inputs/x86_64/libSystem.yaml -o %t.dylib -print_atoms | FileCheck -check-prefix=CHECK1 %s +# RUN: ld64.lld.darwinold -arch x86_64 -dead_strip %s -dylib %p/Inputs/x86_64/libSystem.yaml -o %t2.dylib -print_atoms | FileCheck -check-prefix=CHECK2 %s + +# RUN: ld64.lld.darwinold -arch x86_64 -r %s -dylib %p/Inputs/x86_64/libSystem.yaml -o %t3.o +# RUN: llvm-nm -m %t3.o | FileCheck -check-prefix=RELOCATABLE_SYMBOLS %s + +# +# Test that -export_dynamic -dead-strip from removing globals. +# + +--- +defined-atoms: + - name: def + scope: global + dead-strip: never + - name: dead + scope: global +shared-library-atoms: + - name: dyld_stub_binder + load-name: /usr/lib/libSystem.B.dylib + type: unknown +... + +# CHECK1: name: def +# CHECK1: name: dead + +# CHECK2: name: def +# CHECK2-NOT: name: dead + +# RELOCATABLE_SYMBOLS: external def diff --git a/lld/test/mach-o/debug-syms.yaml b/lld/test/mach-o/debug-syms.yaml new file mode 100644 index 000000000000..901c2528fc72 --- /dev/null +++ b/lld/test/mach-o/debug-syms.yaml @@ -0,0 +1,249 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -o %t %s -dylib %p/Inputs/x86_64/libSystem.yaml && \ +# RUN: llvm-nm --no-sort --debug-syms %t | FileCheck %s + +# CHECK: 0000000000000000 - 00 0000 SO /Users/lhames/Projects/lld/lld-svn-tot/scratch/ +# CHECK-NEXT: 0000000000000000 - 00 0000 SO hw.c +# CHECK-NEXT: {{[0-9a-f]+}} - 03 0001 OSO {{.*}}{{/|\\}}test{{/|\\}}mach-o{{/|\\}}debug-syms.yaml +# CHECK-NEXT: 0000000000000fa0 - 01 0000 BNSYM +# CHECK-NEXT: 0000000000000fa0 - 01 0000 FUN _main +# CHECK-NEXT: 0000000000000016 - 00 0000 FUN +# CHECK-NEXT: 0000000000000016 - 01 0000 ENSYM +# CHECK-NEXT: 0000000000000000 - 01 0000 SO + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +min-os-version-kind: LC_VERSION_MIN_MACOSX +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 16 + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0xC7, 0x45, + 0xFC, 0x00, 0x00, 0x00, 0x00, 0x89, 0x7D, 0xF8, + 0x48, 0x89, 0x75, 0xF0, 0x5D, 0xC3 ] + - segment: __DWARF + section: __debug_str + type: S_REGULAR + attributes: [ S_ATTR_DEBUG ] + address: 0x0000000000000016 + content: [ 0x41, 0x70, 0x70, 0x6C, 0x65, 0x20, 0x4C, 0x4C, + 0x56, 0x4D, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, + 0x6F, 0x6E, 0x20, 0x38, 0x2E, 0x30, 0x2E, 0x30, + 0x20, 0x28, 0x63, 0x6C, 0x61, 0x6E, 0x67, 0x2D, + 0x38, 0x30, 0x30, 0x2E, 0x30, 0x2E, 0x32, 0x34, + 0x2E, 0x31, 0x29, 0x00, 0x68, 0x77, 0x2E, 0x63, + 0x00, 0x2F, 0x55, 0x73, 0x65, 0x72, 0x73, 0x2F, + 0x6C, 0x68, 0x61, 0x6D, 0x65, 0x73, 0x2F, 0x50, + 0x72, 0x6F, 0x6A, 0x65, 0x63, 0x74, 0x73, 0x2F, + 0x6C, 0x6C, 0x64, 0x2F, 0x6C, 0x6C, 0x64, 0x2D, + 0x73, 0x76, 0x6E, 0x2D, 0x74, 0x6F, 0x74, 0x2F, + 0x73, 0x63, 0x72, 0x61, 0x74, 0x63, 0x68, 0x00, + 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x69, 0x6E, 0x74, + 0x00, 0x61, 0x72, 0x67, 0x63, 0x00, 0x61, 0x72, + 0x67, 0x76, 0x00, 0x63, 0x68, 0x61, 0x72, 0x00 ] + - segment: __DWARF + section: __debug_loc + type: S_REGULAR + attributes: [ S_ATTR_DEBUG ] + address: 0x000000000000008E + - segment: __DWARF + section: __debug_abbrev + type: S_REGULAR + attributes: [ S_ATTR_DEBUG ] + address: 0x000000000000008E + content: [ 0x01, 0x11, 0x01, 0x25, 0x0E, 0x13, 0x05, 0x03, + 0x0E, 0x10, 0x06, 0x1B, 0x0E, 0x11, 0x01, 0x12, + 0x01, 0x00, 0x00, 0x02, 0x2E, 0x01, 0x11, 0x01, + 0x12, 0x01, 0x40, 0x0A, 0x03, 0x0E, 0x3A, 0x0B, + 0x3B, 0x0B, 0x27, 0x0C, 0x49, 0x13, 0x3F, 0x0C, + 0x00, 0x00, 0x03, 0x05, 0x00, 0x02, 0x0A, 0x03, + 0x0E, 0x3A, 0x0B, 0x3B, 0x0B, 0x49, 0x13, 0x00, + 0x00, 0x04, 0x24, 0x00, 0x03, 0x0E, 0x3E, 0x0B, + 0x0B, 0x0B, 0x00, 0x00, 0x05, 0x0F, 0x00, 0x49, + 0x13, 0x00, 0x00, 0x00 ] + - segment: __DWARF + section: __debug_info + type: S_REGULAR + attributes: [ S_ATTR_DEBUG ] + address: 0x00000000000000DA + content: [ 0x7F, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x56, 0x60, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, + 0x6A, 0x00, 0x00, 0x00, 0x01, 0x03, 0x02, 0x91, + 0x78, 0x69, 0x00, 0x00, 0x00, 0x01, 0x01, 0x6A, + 0x00, 0x00, 0x00, 0x03, 0x02, 0x91, 0x70, 0x6E, + 0x00, 0x00, 0x00, 0x01, 0x01, 0x71, 0x00, 0x00, + 0x00, 0x00, 0x04, 0x65, 0x00, 0x00, 0x00, 0x05, + 0x04, 0x05, 0x76, 0x00, 0x00, 0x00, 0x05, 0x7B, + 0x00, 0x00, 0x00, 0x04, 0x73, 0x00, 0x00, 0x00, + 0x06, 0x01, 0x00 ] + relocations: + - offset: 0x00000037 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x0000002F + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000026 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x0000001E + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 1 + - segment: __DWARF + section: __debug_ranges + type: S_REGULAR + attributes: [ S_ATTR_DEBUG ] + address: 0x000000000000015D + - segment: __DWARF + section: __debug_macinfo + type: S_REGULAR + attributes: [ S_ATTR_DEBUG ] + address: 0x000000000000015D + content: [ 0x00 ] + - segment: __DWARF + section: __apple_names + type: S_REGULAR + attributes: [ S_ATTR_DEBUG ] + address: 0x000000000000015E + content: [ 0x48, 0x53, 0x41, 0x48, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x06, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x6A, 0x7F, 0x9A, 0x7C, + 0x2C, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 ] + - segment: __DWARF + section: __apple_objc + type: S_REGULAR + attributes: [ S_ATTR_DEBUG ] + address: 0x000000000000019A + content: [ 0x48, 0x53, 0x41, 0x48, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x06, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF ] + - segment: __DWARF + section: __apple_namespac + type: S_REGULAR + attributes: [ S_ATTR_DEBUG ] + address: 0x00000000000001BE + content: [ 0x48, 0x53, 0x41, 0x48, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x06, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF ] + - segment: __DWARF + section: __apple_types + type: S_REGULAR + attributes: [ S_ATTR_DEBUG ] + address: 0x00000000000001E2 + content: [ 0x48, 0x53, 0x41, 0x48, 0x01, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x06, 0x00, + 0x03, 0x00, 0x05, 0x00, 0x04, 0x00, 0x0B, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x30, 0x80, 0x88, 0x0B, 0x63, 0x20, 0x95, 0x7C, + 0x40, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, + 0x65, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x6A, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x7B, 0x00, 0x00, 0x00, 0x24, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + - segment: __DWARF + section: __apple_exttypes + type: S_REGULAR + attributes: [ S_ATTR_DEBUG ] + address: 0x0000000000000248 + content: [ 0x48, 0x53, 0x41, 0x48, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x06, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF ] + - segment: __LD + section: __compact_unwind + type: S_REGULAR + attributes: [ S_ATTR_DEBUG ] + alignment: 8 + address: 0x0000000000000270 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000000 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 1 + - segment: __TEXT + section: __eh_frame + type: S_COALESCED + attributes: [ ] + alignment: 8 + address: 0x0000000000000290 + content: [ 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x7A, 0x52, 0x00, 0x01, 0x78, 0x10, 0x01, + 0x10, 0x0C, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, + 0x50, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x41, 0x0E, 0x10, 0x86, 0x02, 0x43, 0x0D, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + - segment: __DWARF + section: __debug_line + type: S_REGULAR + attributes: [ S_ATTR_DEBUG ] + address: 0x00000000000002D0 + content: [ 0x37, 0x00, 0x00, 0x00, 0x02, 0x00, 0x1B, 0x00, + 0x00, 0x00, 0x01, 0x01, 0xFB, 0x0E, 0x0D, 0x00, + 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x01, 0x00, 0x68, 0x77, 0x2E, 0x63, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x02, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x05, 0x03, 0x0A, 0x08, 0x3D, 0x02, 0x02, + 0x00, 0x01, 0x01 ] + relocations: + - offset: 0x00000028 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 1 +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +page-size: 0x00000000 +... diff --git a/lld/test/mach-o/demangle.yaml b/lld/test/mach-o/demangle.yaml new file mode 100644 index 000000000000..2f1cba527f26 --- /dev/null +++ b/lld/test/mach-o/demangle.yaml @@ -0,0 +1,74 @@ +# REQUIRES: system-linker-mach-o +# +# RUN: not ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s \ +# RUN: -dylib -o %t %p/Inputs/x86_64/libSystem.yaml 2> %t.err +# RUN: FileCheck %s < %t.err +# +# RUN: not ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s \ +# RUN: -dylib -o %t %p/Inputs/x86_64/libSystem.yaml -demangle 2> %t.err2 +# RUN: FileCheck %s --check-prefix=DCHECK < %t.err2 +# +# Test -demangle option works on undefined symbol errors. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0xE8, 0x00, 0x00, 0x00, 0x00, 0xE8, 0x00, 0x00, + 0x00, 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x0000000B + type: X86_64_RELOC_BRANCH + length: 2 + pc-rel: true + extern: true + symbol: 2 + - offset: 0x00000006 + type: X86_64_RELOC_BRANCH + length: 2 + pc-rel: true + extern: true + symbol: 3 + - offset: 0x00000001 + type: X86_64_RELOC_BRANCH + length: 2 + pc-rel: true + extern: true + symbol: 1 +global-symbols: + - name: __Z1xv + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: __Znam + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: __Znotcpp + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _foo + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + +... + +# CHECK: __Znotcpp +# CHECK: __Znam +# CHECK: _foo + +# DCHECK: __Znotcpp +# DCHECK: operator new[](unsigned long) +# DCHECK: _foo + diff --git a/lld/test/mach-o/dependency_info.yaml b/lld/test/mach-o/dependency_info.yaml new file mode 100644 index 000000000000..1195c9e91967 --- /dev/null +++ b/lld/test/mach-o/dependency_info.yaml @@ -0,0 +1,19 @@ +# Test -dependency_info option +# +# RUN: ld64.lld.darwinold -arch x86_64 -test_file_usage \ +# RUN: -dependency_info %t.info \ +# RUN: -path_exists /System/Library/Frameworks \ +# RUN: -path_exists /System/Library/Frameworks/Foo.framework/Foo \ +# RUN: -path_exists /Custom/Frameworks \ +# RUN: -path_exists /Custom/Frameworks/Bar.framework/Bar \ +# RUN: -F/Custom/Frameworks \ +# RUN: -framework Bar \ +# RUN: -framework Foo +# RUN: %python %p/Inputs/DependencyDump.py %t.info | FileCheck %s + + +# CHECK: linker-vers: lld +# CHECK: input-file: /Custom/Frameworks{{[/\\]}}Bar.framework{{[/\\]}}Bar +# CHECK: not-found: /Custom/Frameworks{{[/\\]}}Foo.framework{{[/\\]}}Foo +# CHECK: input-file: /System/Library/Frameworks{{[/\\]}}Foo.framework{{[/\\]}}Foo +# CHECK: output-file: a.out diff --git a/lld/test/mach-o/do-not-emit-unwind-fde-arm64.yaml b/lld/test/mach-o/do-not-emit-unwind-fde-arm64.yaml new file mode 100644 index 000000000000..0cb3655d49a1 --- /dev/null +++ b/lld/test/mach-o/do-not-emit-unwind-fde-arm64.yaml @@ -0,0 +1,208 @@ +# RUN: ld64.lld.darwinold -arch arm64 -r -print_atoms %s -o %t | FileCheck %s +# RUN: ld64.lld.darwinold -arch arm64 -r -print_atoms %t -o %t2 | FileCheck %s +# RUN: llvm-objdump -r -s --section="__eh_frame" --macho %t | FileCheck --check-prefix=CODE %s +# RUN: llvm-objdump -r -s --section="__eh_frame" --macho %t2 | FileCheck --check-prefix=CODE %s + + +--- !mach-o +arch: arm64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 4 + address: 0x0000000000000000 + content: [ 0xFD, 0x7B, 0xBF, 0xA9, 0xFD, 0x03, 0x00, 0x91, + 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x91, + 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x80, 0x52, + 0xFD, 0x7B, 0xC1, 0xA8, 0xC0, 0x03, 0x5F, 0xD6 ] + relocations: + - offset: 0x00000010 + type: ARM64_RELOC_BRANCH26 + length: 2 + pc-rel: true + extern: true + symbol: 9 + - offset: 0x0000000C + type: ARM64_RELOC_PAGEOFF12 + length: 2 + pc-rel: false + extern: true + symbol: 1 + - offset: 0x00000008 + type: ARM64_RELOC_PAGE21 + length: 2 + pc-rel: true + extern: true + symbol: 1 + - segment: __TEXT + section: __cstring + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x0000000000000020 + content: [ 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, + 0x72, 0x6C, 0x64, 0x00 ] + - segment: __LD + section: __compact_unwind + type: S_REGULAR + attributes: [ ] + alignment: 8 + address: 0x0000000000000030 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000000 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 1 + - segment: __TEXT + section: __eh_frame + type: S_COALESCED + attributes: [ ] + alignment: 8 + address: 0x0000000000000050 + content: [ 0x1C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x7A, 0x50, 0x4C, 0x52, 0x00, 0x01, 0x78, + 0x1E, 0x07, 0x00, 0x9D, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x10, 0x0C, 0x1F, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x88, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x48, 0x0E, 0x10, 0x9E, 0x01, 0x9D, 0x02 ] + - segment: __TEXT + section: __gcc_except_tab + type: S_REGULAR + attributes: [ ] + address: 0x00000000000000A0 + content: [ 0x00, 0x00, 0x00, 0x00 ] +local-symbols: + - name: ltmp0 + type: N_SECT + sect: 1 + value: 0x0000000000000000 + - name: L_str + type: N_SECT + sect: 2 + value: 0x0000000000000020 + - name: ltmp1 + type: N_SECT + sect: 2 + value: 0x0000000000000020 + - name: ltmp2 + type: N_SECT + sect: 3 + value: 0x0000000000000030 + - name: ltmp3 + type: N_SECT + sect: 4 + value: 0x0000000000000050 + - name: ltmp4 + type: N_SECT + sect: 4 + value: 0x0000000000000070 +global-symbols: + - name: __Z3fooi + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: __gxx_personality_v0 + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _bar + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _puts + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +page-size: 0x00000000 + +# CHECK: defined-atoms: +# CHECK: - ref-name: L{{[0-9]*}} +# CHECK: scope: hidden +# CHECK: type: c-string +# CHECK: content: [ 48, 65, 6C, 6C, 6F, 20, 77, 6F, 72, 6C, 64, 00 ] +# CHECK: merge: by-content +# CHECK: - ref-name: L{{[0-9]*}} +# CHECK: type: unwind-cfi +# CHECK: content: [ 1C, 00, 00, 00, 00, 00, 00, 00, 01, 7A, 50, 4C, +# CHECK: 52, 00, 01, 78, 1E, 07, 00, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, +# CHECK: {{..}}, {{..}}, {{..}}, 00, 10, 0C, 1F, 00 ] +# CHECK: - type: unwind-cfi +# CHECK: content: [ 24, 00, 00, 00, 24, 00, 00, 00, {{..}}, {{..}}, {{..}}, {{..}}, +# CHECK: {{..}}, {{..}}, {{..}}, {{..}}, 20, 00, 00, 00, 00, 00, 00, 00, +# CHECK: 08, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, 48, 0E, 10, +# CHECK: 9E, 01, 9D, 02 ] +# CHECK: references: +# CHECK: - kind: negDelta32 +# CHECK: offset: 4 +# CHECK: target: L{{[0-9]*}} +# CHECK: - kind: unwindFDEToFunction +# CHECK: offset: 8 +# CHECK: target: __Z3fooi +# CHECK: - kind: unwindFDEToFunction +# CHECK: offset: 25 +# CHECK: target: L{{[0-9]*}} +# CHECK: - ref-name: L{{[0-9]*}} +# CHECK: type: unwind-lsda +# CHECK: content: [ 00, 00, 00, 00 ] +# CHECK: - type: compact-unwind +# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00, 20, 00, 00, 00, +# CHECK: 00, 00, 00, 03, 00, 00, 00, 00, 00, 00, 00, 00, +# CHECK: 00, 00, 00, 00, 00, 00, 00, 00 ] +# CHECK: alignment: 8 +# CHECK: references: +# CHECK: - kind: pointer64 +# CHECK: offset: 0 +# CHECK: target: __Z3fooi +# CHECK: - name: __Z3fooi +# CHECK: scope: global +# CHECK: content: [ FD, 7B, BF, A9, FD, 03, 00, 91, 00, 00, 00, 90, +# CHECK: 00, 00, 00, 91, 00, 00, 00, 94, 00, 00, 80, 52, +# CHECK: FD, 7B, C1, A8, C0, 03, 5F, D6 ] +# CHECK: alignment: 4 +# CHECK: references: +# CHECK: - kind: page21 +# CHECK: offset: 8 +# CHECK: target: L{{[0-9]*}} +# CHECK: - kind: offset12 +# CHECK: offset: 12 +# CHECK: target: L{{[0-9]*}} +# CHECK: - kind: branch26 +# CHECK: offset: 16 +# CHECK: target: _puts + +# Make sure we don't have any relocations in the __eh_frame section +# CODE-NOT: RELOCATION RECORDS FOR [__eh_frame] + +# Also make sure the reloc for the FDE->function is the correct offset +# It should be the offset from the fixup location back to the address +# of the function we are referencing +# CODE: Contents of section __TEXT,__eh_frame: +# This is the CIE: +# CODE-NEXT: {{[0-9abcdef]*}} 1c000000 00000000 017a504c 52000178 +# CODE-NEXT: {{[0-9abcdef]*}} 1e0700bd ffffffff ffffff00 100c1f00 +# This is the FDE: +# CODE-NEXT: {{[0-9abcdef]*}} 24000000 24000000 a8ffffff ffffffff +# This is the important offset for FDE->func ^~~~~~~~ ~~~~~~~~ + +# CODE-NEXT: {{[0-9abcdef]*}} 20000000 00000000 08c3ffff ffffffff +# And this is the offset for FDE->lsda ^~~~~~~~ ~~~~~~ +# CODE-NEXT: {{[0-9abcdef]*}} ff480e10 9e019d02 +# And this byte ^~ diff --git a/lld/test/mach-o/dso_handle.yaml b/lld/test/mach-o/dso_handle.yaml new file mode 100644 index 000000000000..f35f8b1cd6f8 --- /dev/null +++ b/lld/test/mach-o/dso_handle.yaml @@ -0,0 +1,62 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/x86_64/libSystem.yaml -o %t1 +# RUN: llvm-nm -m -n %t1 | FileCheck %s +# +# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/x86_64/libSystem.yaml -dead_strip -o %t2 +# RUN: llvm-nm -m -n %t2 | FileCheck %s +# +# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/x86_64/libSystem.yaml -dylib -o %t3 +# RUN: llvm-nm -m -n %t3 | FileCheck %s +# +# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/x86_64/libSystem.yaml -bundle -o %t4 +# RUN: llvm-nm -m -n %t4 | FileCheck %s +# +# Test that ___dso_handle symbol is available for executables, bundles, and dylibs +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0x5D, 0xC3 ] + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + alignment: 8 + address: 0x0000000000000008 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000000 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 2 +global-symbols: + - name: _d + type: N_SECT + scope: [ N_EXT ] + sect: 2 + value: 0x0000000000000008 + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: ___dso_handle + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + + +... + +# CHECK_NOT: ___dso_handle +# CHECK: _main diff --git a/lld/test/mach-o/dylib-install-names.yaml b/lld/test/mach-o/dylib-install-names.yaml new file mode 100644 index 000000000000..869b19bdab17 --- /dev/null +++ b/lld/test/mach-o/dylib-install-names.yaml @@ -0,0 +1,74 @@ +# Check we accept -install_name correctly: +# RUN: ld64.lld.darwinold -arch x86_64 -install_name libwibble.dylib -dylib \ +# RUN: -compatibility_version 2.0 -current_version 5.3 \ +# RUN: %p/Inputs/x86_64/libSystem.yaml %s -o %t.dylib +# RUN: llvm-objdump --private-headers %t.dylib | FileCheck %s --check-prefix=CHECK-BINARY-WRITE + +# Check we read LC_ID_DYLIB correctly: +# RUN: ld64.lld.darwinold -arch x86_64 %p/Inputs/use-dylib-install-names.yaml \ +# RUN: %p/Inputs/x86_64/libSystem.yaml %t.dylib -dylib -o %t2.dylib +# RUN: llvm-objdump --private-headers %t2.dylib | FileCheck %s --check-prefix=CHECK-BINARY-READ + +# Check we default the install-name to the output file: +# RUN: ld64.lld.darwinold -arch x86_64 -dylib %s -o libwibble.dylib \ +# RUN: -compatibility_version 2.0 -current_version 5.3 \ +# RUN: %p/Inputs/x86_64/libSystem.yaml +# RUN: llvm-objdump --private-headers libwibble.dylib | FileCheck %s --check-prefix=CHECK-BINARY-WRITE +# RUN: rm -f libwibble.dylib + +# Check -single_module does nothing +# RUN: ld64.lld.darwinold -arch x86_64 -dylib %s -install_name libwibble.dylib \ +# RUN: -compatibility_version 2.0 -current_version 5.3 \ +# RUN: -single_module -o %t2.dylib %p/Inputs/x86_64/libSystem.yaml +# RUN: llvm-objdump --private-headers %t2.dylib | FileCheck %s --check-prefix=CHECK-BINARY-WRITE + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 4 + address: 0x0000000000000000 + content: [ 0xCC, 0xC3, 0x90, 0xC3, 0x90, 0x90, 0xC3, 0x90, + 0x90, 0x90, 0xC3, 0x90, 0x90, 0x90, 0x90, 0xC3, + 0x31, 0xC0, 0xC3 ] +local-symbols: + - name: _myStatic + type: N_SECT + sect: 1 + value: 0x000000000000000B +global-symbols: + - name: _myGlobal + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000001 +... + + +# CHECK-BINARY-WRITE: cmd LC_ID_DYLIB +# CHECK-BINARY-WRITE-NEXT: cmdsize 40 +# CHECK-BINARY-WRITE-NEXT: name libwibble.dylib (offset 24) +# CHECK-BINARY-WRITE-NEXT: time stamp 1 +# CHECK-BINARY-WRITE-NEXT: current version 5.3.0 +# CHECK-BINARY-WRITE-NEXT: compatibility version 2.0.0 + +# CHECK-BINARY-READ: cmd LC_LOAD_DYLIB +# CHECK-BINARY-READ-NEXT: cmdsize 56 +# CHECK-BINARY-READ-NEXT: name /usr/lib/libSystem.B.dylib (offset 24) +# CHECK-BINARY-READ-NEXT: time stamp 2 +# CHECK-BINARY-READ-NEXT: current version 1.0.0 +# CHECK-BINARY-READ-NEXT: compatibility version 1.0.0 + +# CHECK-BINARY-READ: cmd LC_LOAD_DYLIB +# CHECK-BINARY-READ-NEXT: cmdsize 40 +# CHECK-BINARY-READ-NEXT: name libwibble.dylib (offset 24) +# CHECK-BINARY-READ-NEXT: time stamp 2 +# CHECK-BINARY-READ-NEXT: current version 5.3.0 +# CHECK-BINARY-READ-NEXT: compatibility version 2.0.0 diff --git a/lld/test/mach-o/eh-frame-relocs-arm64.yaml b/lld/test/mach-o/eh-frame-relocs-arm64.yaml new file mode 100644 index 000000000000..3d7245e5d114 --- /dev/null +++ b/lld/test/mach-o/eh-frame-relocs-arm64.yaml @@ -0,0 +1,318 @@ +# RUN: ld64.lld.darwinold -arch arm64 -r -print_atoms %s -o %t | FileCheck %s +# RUN: ld64.lld.darwinold -arch arm64 -r -print_atoms %t -o %t2 | FileCheck %s +# RUN: llvm-objdump -r -s --section="__eh_frame" --macho %t | FileCheck --check-prefix=CODE %s +# RUN: llvm-objdump -r -s --section="__eh_frame" --macho %t2 | FileCheck --check-prefix=CODE %s + + +--- !mach-o +arch: arm64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 4 + address: 0x0000000000000000 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xC0, 0x03, 0x5F, 0xD6, 0xC0, 0x03, 0x5F, 0xD6, + 0xC0, 0x03, 0x5F, 0xD6 ] + - segment: __TEXT + section: __gcc_except_tab + type: S_REGULAR + attributes: [ ] + address: 0x0000000000000014 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + address: 0x000000000000001C + content: [ 0x00, 0x00, 0x00, 0x00 ] + - segment: __LD + section: __compact_unwind + type: S_REGULAR + attributes: [ ] + alignment: 8 + address: 0x0000000000000020 + content: [ 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000020 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000000 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 1 + - segment: __TEXT + section: __eh_frame + type: S_COALESCED + attributes: [ ] + alignment: 8 + address: 0x0000000000000060 + content: [ 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x03, 0x7A, 0x50, 0x4C, 0x52, 0x00, 0x01, 0x78, + 0x1E, 0x07, 0x9B, 0xED, 0xFF, 0xFF, 0xFF, 0x10, + 0x10, 0x0C, 0x1F, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0xDC, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0xCB, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0E, 0x10, 0x9E, + 0x01, 0x9D, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x03, 0x7A, 0x50, 0x4C, 0x52, 0x00, 0x01, 0x78, + 0x1E, 0x07, 0x9B, 0xA9, 0xFF, 0xFF, 0xFF, 0x10, + 0x10, 0x0C, 0x1F, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x94, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x83, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0E, 0x10, 0x9E, + 0x01, 0x9D, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x0000007D + type: ARM64_RELOC_SUBTRACTOR + length: 3 + pc-rel: false + extern: true + symbol: 6 + - offset: 0x0000007D + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 3 + - offset: 0x0000006C + type: ARM64_RELOC_SUBTRACTOR + length: 3 + pc-rel: false + extern: true + symbol: 6 + - offset: 0x0000006C + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 8 + - offset: 0x0000005B + type: ARM64_RELOC_POINTER_TO_GOT + length: 2 + pc-rel: true + extern: true + symbol: 10 + - offset: 0x00000035 + type: ARM64_RELOC_SUBTRACTOR + length: 3 + pc-rel: false + extern: true + symbol: 6 + - offset: 0x00000035 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000024 + type: ARM64_RELOC_SUBTRACTOR + length: 3 + pc-rel: false + extern: true + symbol: 6 + - offset: 0x00000024 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 7 + - offset: 0x00000013 + type: ARM64_RELOC_POINTER_TO_GOT + length: 2 + pc-rel: true + extern: true + symbol: 9 +local-symbols: + - name: ltmp0 + type: N_SECT + sect: 1 + value: 0x0000000000000000 + - name: ltmp1 + type: N_SECT + sect: 2 + value: 0x0000000000000014 + - name: _bar1 + type: N_SECT + sect: 2 + value: 0x0000000000000014 + - name: _bar2 + type: N_SECT + sect: 2 + value: 0x0000000000000018 + - name: ltmp12 + type: N_SECT + sect: 3 + value: 0x000000000000001C + - name: ltmp13 + type: N_SECT + sect: 4 + value: 0x0000000000000020 + - name: ltmp16 + type: N_SECT + sect: 5 + value: 0x0000000000000060 +global-symbols: + - name: __Z3fooi + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000008 + - name: __Z4foo2i + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x000000000000000C + - name: __gxx_personality_v0 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: __gxx_personality_v1 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000004 + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000010 + - name: _someData + type: N_SECT + scope: [ N_EXT ] + sect: 3 + value: 0x000000000000001C +page-size: 0x00000000 +... + +# CHECK: --- !native +# CHECK: path: '' +# CHECK: defined-atoms: +# CHECK: - ref-name: L000 +# CHECK: type: unwind-cfi +# CHECK: content: [ 18, 00, 00, 00, 00, 00, 00, 00, 03, 7A, 50, 4C, +# CHECK: 52, 00, 01, 78, 1E, 07, 9B, {{..}}, {{..}}, {{..}}, {{..}}, 10, +# CHECK: 10, 0C, 1F, 00 ] +# CHECK: alignment: 8 +# CHECK: references: +# CHECK: - kind: unwindCIEToPersonalityFunction +# CHECK: offset: 19 +# CHECK: target: __gxx_personality_v0 +# CHECK: - type: unwind-cfi +# CHECK: content: [ 28, 00, 00, 00, 20, 00, 00, 00, {{..}}, {{..}}, {{..}}, {{..}}, +# CHECK: {{..}}, {{..}}, {{..}}, {{..}}, 04, 00, 00, 00, 00, 00, 00, 00, +# CHECK: 08, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, 0E, 10, 9E, +# CHECK: 01, 9D, 02, 00, 00, 00, 00, 00 ] +# CHECK: alignment: 4 mod 8 +# CHECK: references: +# CHECK: - kind: negDelta32 +# CHECK: offset: 4 +# CHECK: target: L000 +# CHECK: - kind: unwindFDEToFunction +# CHECK: offset: 8 +# CHECK: target: __Z3fooi +# CHECK: - kind: unwindFDEToFunction +# CHECK: offset: 25 +# CHECK: target: _bar1 +# CHECK: - ref-name: L001 +# CHECK: type: unwind-cfi +# CHECK: content: [ 18, 00, 00, 00, 00, 00, 00, 00, 03, 7A, 50, 4C, +# CHECK: 52, 00, 01, 78, 1E, 07, 9B, {{..}}, {{..}}, {{..}}, {{..}}, 10, +# CHECK: 10, 0C, 1F, 00 ] +# CHECK: alignment: 8 +# CHECK: references: +# CHECK: - kind: unwindCIEToPersonalityFunction +# CHECK: offset: 19 +# CHECK: target: __gxx_personality_v1 +# CHECK: - type: unwind-cfi +# CHECK: content: [ 28, 00, 00, 00, 20, 00, 00, 00, {{..}}, {{..}}, {{..}}, {{..}}, +# CHECK: {{..}}, {{..}}, {{..}}, {{..}}, 04, 00, 00, 00, 00, 00, 00, 00, +# CHECK: 08, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, 0E, 10, 9E, +# CHECK: 01, 9D, 02, 00, 00, 00, 00, 00 ] +# CHECK: alignment: 4 mod 8 +# CHECK: references: +# CHECK: - kind: negDelta32 +# CHECK: offset: 4 +# CHECK: target: L001 +# CHECK: - kind: unwindFDEToFunction +# CHECK: offset: 8 +# CHECK: target: __Z4foo2i +# CHECK: - kind: unwindFDEToFunction +# CHECK: offset: 25 +# CHECK: target: _bar2 +# CHECK: - name: _bar1 +# CHECK: type: unwind-lsda +# CHECK: content: [ 00, 00, 00, 00 ] +# CHECK: - name: _bar2 +# CHECK: type: unwind-lsda +# CHECK: content: [ 00, 00, 00, 00 ] +# CHECK: - name: _someData +# CHECK: scope: global +# CHECK: type: data +# CHECK: content: [ 00, 00, 00, 00 ] +# CHECK: - name: __gxx_personality_v0 +# CHECK: scope: global +# CHECK: content: [ 00, 00, 00, 00 ] +# CHECK: alignment: 4 +# CHECK: - name: __gxx_personality_v1 +# CHECK: scope: global +# CHECK: content: [ 00, 00, 00, 00 ] +# CHECK: alignment: 4 +# CHECK: - name: __Z3fooi +# CHECK: scope: global +# CHECK: content: [ C0, 03, 5F, D6 ] +# CHECK: alignment: 4 +# CHECK: - name: __Z4foo2i +# CHECK: scope: global +# CHECK: content: [ C0, 03, 5F, D6 ] +# CHECK: alignment: 4 +# CHECK: - name: _main +# CHECK: scope: global +# CHECK: content: [ C0, 03, 5F, D6 ] +# CHECK: alignment: 4 +# CHECK: ... + +# # Make sure we don't have any relocations in the __eh_frame section +# CODE-NOT: RELOCATION RECORDS FOR [__eh_frame] + +# Also make sure the reloc for the CIE->personality function is the +# correct offset +# It should be the offset from the fixup location back to the address +# of the function we are referencing +# CODE: Contents of section __TEXT,__eh_frame: +# This is the CIE: +# CODE-NEXT: {{[0-9abcdef]*}} 18000000 00000000 037a504c 52000178 +# CODE-NEXT: {{[0-9abcdef]*}} 1e079bd1 ffffff10 100c1f00 28000000 +# This is the important offset for CIE->pfunc +# ^~~~~~~~~ +# Then we have an FDE starting from 28000000 above +# CODE-NEXT: {{[0-9abcdef]*}} 20000000 c8ffffff ffffffff 04000000 +# CODE-NEXT: {{[0-9abcdef]*}} 00000000 08c3ffff ffffffff ff0e109e +# And a new CIE starts at this 00000018 right below here +# CODE-NEXT: {{[0-9abcdef]*}} 019d0200 00000000 18000000 00000000 +# CODE-NEXT: {{[0-9abcdef]*}} 037a504c 52000178 1e079b8d ffffff10 +# This is the important offset for its CIE->pfunc ^~~~~~~~~ diff --git a/lld/test/mach-o/empty-sections.yaml b/lld/test/mach-o/empty-sections.yaml new file mode 100644 index 000000000000..83cd97aeac39 --- /dev/null +++ b/lld/test/mach-o/empty-sections.yaml @@ -0,0 +1,9 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r %s -o %t +# +# Test that writing empty mach-o sections does not segfault the linker. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +... diff --git a/lld/test/mach-o/error-simulator-vs-macosx.yaml b/lld/test/mach-o/error-simulator-vs-macosx.yaml new file mode 100644 index 000000000000..94b73d6c5334 --- /dev/null +++ b/lld/test/mach-o/error-simulator-vs-macosx.yaml @@ -0,0 +1,30 @@ +# RUN: ld64.lld.darwinold -arch i386 -macosx_version_min 10.8 %s %p/Inputs/hello-world-x86.yaml -o %t && llvm-nm -m %t | FileCheck %s +# RUN: not ld64.lld.darwinold -arch i386 -ios_simulator_version_min 5.0 %s %p/Inputs/hello-world-x86.yaml -o %t 2>&1 | FileCheck %s --check-prefix=ERROR +# +# Test that i386 can link with a macos version but gives an error with a simulator version. +# + +--- !mach-o +arch: x86 +OS: Mac OS X +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x90 ] +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +... + +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main +# CHECK: (undefined) external dyld_stub_binder (from libSystem) + +# ERROR: cannot be linked due to incompatible operating systems diff --git a/lld/test/mach-o/exe-offsets.yaml b/lld/test/mach-o/exe-offsets.yaml new file mode 100644 index 000000000000..65025febf4e4 --- /dev/null +++ b/lld/test/mach-o/exe-offsets.yaml @@ -0,0 +1,45 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %s -o %t -e start %p/Inputs/x86_64/libSystem.yaml +# RUN: llvm-readobj --sections %t | FileCheck %s + +# Make sure data gets put at offset + +--- !native +defined-atoms: + - name: start + scope: global + content: [ 90 ] + + - name: _s1 + type: data + content: [ 31, 32, 33, 34 ] + + - name: _s2 + type: zero-fill + size: 8192 + + - name: _s3 + type: zero-fill + size: 100 + + - name: _s4 + type: data + content: [ 01 ] + + +# CHECK-LABEL: Section { +# CHECK: Name: __text +# CHECK: Segment: __TEXT +# CHECK: Size: 0x1 +# CHECK: Offset: 0 + +# CHECK-LABEL: Section { +# CHECK: Name: __data +# CHECK: Segment: __DATA +# CHECK: Size: 0x5 +# CHECK: Offset: 4096 + +# CHECK-LABEL: Section { +# CHECK: Name: __bss +# CHECK: Segment: __DATA +# CHECK: Size: 0x2064 +# CHECK: Offset: 0 diff --git a/lld/test/mach-o/exe-segment-overlap.yaml b/lld/test/mach-o/exe-segment-overlap.yaml new file mode 100644 index 000000000000..f1bf67bd0908 --- /dev/null +++ b/lld/test/mach-o/exe-segment-overlap.yaml @@ -0,0 +1,44 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %s -o %t %p/Inputs/x86_64/libSystem.yaml +# RUN: llvm-readobj --sections --section-data %t | FileCheck %s + +--- !native +defined-atoms: + - name: _main + scope: global + content: [ 90 ] + + - name: _s2 + type: data + content: [ 31, 32, 33, 34 ] + + - name: _kustom + scope: global + type: unknown + content: [ 01, 02, 03, 04, 05, 06, 07, 08 ] + section-choice: custom-required + section-name: __CUST/__custom + + +# CHECK-LABEL: Section { +# CHECK: Name: __text +# CHECK: Segment: __TEXT +# CHECK: Size: 0x1 +# CHECK: Offset: 4095 + +# CHECK-LABEL: Section { +# CHECK: Name: __data +# CHECK: Segment: __DATA +# CHECK: Size: 0x4 +# CHECK: Offset: 4096 +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 31323334 +# CHECK-NEXT: ) + +# CHECK-LABEL: Section { +# CHECK: Name: __custom{{ }} +# CHECK: Segment: __CUST{{ }} +# CHECK: Size: 0x8 +# CHECK: Offset: 8192 +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 01020304 05060708 +# CHECK-NEXT: ) diff --git a/lld/test/mach-o/executable-exports.yaml b/lld/test/mach-o/executable-exports.yaml new file mode 100644 index 000000000000..8f0f3146e421 --- /dev/null +++ b/lld/test/mach-o/executable-exports.yaml @@ -0,0 +1,46 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 \ +# RUN: %s %p/Inputs/x86_64/libSystem.yaml -o %t && \ +# RUN: llvm-objdump --macho --exports-trie %t | FileCheck %s +# +# +# Tests that exports trie builds properly. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0xC3, 0xC3, 0xC3, 0xC3 ] +global-symbols: + - name: _myHidden + type: N_SECT + scope: [ N_EXT, N_PEXT ] + sect: 1 + value: 0x0000000000000000 + - name: _myRegular + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000001 + - name: _myWeak + type: N_SECT + scope: [ N_EXT ] + sect: 1 + desc: [ N_WEAK_DEF ] + value: 0x0000000000000002 + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000003 +... + +# CHECK-NOT: _myHidden +# CHECK: 0x100000FFD _myRegular +# CHECK: 0x100000FFE _myWeak [weak_def] diff --git a/lld/test/mach-o/export-trie-order.yaml b/lld/test/mach-o/export-trie-order.yaml new file mode 100644 index 000000000000..e8819e00b400 --- /dev/null +++ b/lld/test/mach-o/export-trie-order.yaml @@ -0,0 +1,62 @@ +# RUN: ld64.lld.darwinold -arch i386 %s %p/Inputs/hello-world-x86.yaml -o %t +# RUN: llvm-objdump --macho --exports-trie %t | FileCheck %s +# +# Test that the export trie is emitted in order. +# + +--- !mach-o +arch: x86 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x89, 0xE5, 0x83, 0xEC, 0x08, 0xE8, 0x00, + 0x00, 0x00, 0x00, 0x58, 0x8D, 0x80, 0x16, 0x00, + 0x00, 0x00, 0x89, 0x04, 0x24, 0xE8, 0xE6, 0xFF, + 0xFF, 0xFF, 0x31, 0xC0, 0x83, 0xC4, 0x08, 0x5D, + 0xC3 ] + relocations: + - offset: 0x00000016 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: true + extern: true + symbol: 1 + - offset: 0x0000000E + scattered: true + type: GENERIC_RELOC_LOCAL_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000021 + - offset: 0x00000000 + scattered: true + type: GENERIC_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x0000000B + - segment: __TEXT + section: __cstring + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x0000000000000021 + content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x00 ] +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _printf + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + +# CHECK: Exports trie: +# CHECK-NEXT: __mh_execute_header +# CHECK-NEXT: _main diff --git a/lld/test/mach-o/exported_symbols_list-dylib.yaml b/lld/test/mach-o/exported_symbols_list-dylib.yaml new file mode 100644 index 000000000000..1c417d860341 --- /dev/null +++ b/lld/test/mach-o/exported_symbols_list-dylib.yaml @@ -0,0 +1,77 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 -dylib \ +# RUN: %s %p/Inputs/x86_64/libSystem.yaml -o %t \ +# RUN: -exported_symbols_list %p/Inputs/exported_symbols_list.exp && \ +# RUN: llvm-nm -m %t | FileCheck %s +# +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 -dylib \ +# RUN: %s %p/Inputs/x86_64/libSystem.yaml -o %t2 \ +# RUN: -exported_symbol _foo -exported_symbol _b && \ +# RUN: llvm-nm -m %t2 | FileCheck %s +# +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 -dylib \ +# RUN: %s %p/Inputs/x86_64/libSystem.yaml -o %t3 \ +# RUN: -unexported_symbol _bar -unexported_symbol _a && \ +# RUN: llvm-nm -m %t3 | FileCheck %s +# +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 -dylib \ +# RUN: %s %p/Inputs/x86_64/libSystem.yaml -dead_strip -o %t \ +# RUN: -exported_symbols_list %p/Inputs/exported_symbols_list.exp && \ +# RUN: llvm-nm -m %t | FileCheck -check-prefix=CHECK_DEAD %s +# +# Test -exported_symbols_list and -exported_symbol properly changes visibility. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x5D, 0xC3, 0x55, 0x48, + 0x89, 0xE5, 0x5D, 0xC3 ] + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + alignment: 2 + address: 0x000000000000000C + content: [ 0x0A, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00 ] + +global-symbols: + - name: _a + type: N_SECT + scope: [ N_EXT ] + sect: 2 + value: 0x000000000000000C + - name: _b + type: N_SECT + scope: [ N_EXT ] + sect: 2 + value: 0x0000000000000010 + - name: _bar + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000006 + - name: _foo + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + + +... + +# CHECK: (__DATA,__data) non-external (was a private external) _a +# CHECK: (__DATA,__data) external _b +# CHECK: (__TEXT,__text) non-external (was a private external) _bar +# CHECK: (__TEXT,__text) external _foo + +# CHECK_DEAD-NOT: (__DATA,__data) non-external (was a private external) _a +# CHECK_DEAD: (__DATA,__data) external _b +# CHECK_DEAD-NOT: (__TEXT,__text) non-external (was a private external) _bar +# CHECK_DEAD: (__TEXT,__text) external _foo diff --git a/lld/test/mach-o/exported_symbols_list-obj.yaml b/lld/test/mach-o/exported_symbols_list-obj.yaml new file mode 100644 index 000000000000..420b9cc60317 --- /dev/null +++ b/lld/test/mach-o/exported_symbols_list-obj.yaml @@ -0,0 +1,67 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r %s -o %t -exported_symbol _bar \ +# RUN: && llvm-nm -m %t | FileCheck %s +# +# RUN: ld64.lld.darwinold -arch x86_64 -r %s -o %t2 -keep_private_externs \ +# RUN: -exported_symbol _bar && \ +# RUN: llvm-nm -m %t2 | FileCheck -check-prefix=CHECK_KPE %s +# +# RUN: not ld64.lld.darwinold -arch x86_64 -r %s -o %t3 \ +# RUN: -exported_symbol _foo 2> %t4 + +# Test -exported_symbols_list properly changes visibility in -r mode. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x5D, 0xC3, 0x55, 0x48, + 0x89, 0xE5, 0x5D, 0xC3 ] + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + alignment: 2 + address: 0x000000000000000C + content: [ 0x0A, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00 ] + +global-symbols: + - name: _a + type: N_SECT + scope: [ N_EXT ] + sect: 2 + value: 0x000000000000000C + - name: _b + type: N_SECT + scope: [ N_EXT, N_PEXT ] + sect: 2 + value: 0x0000000000000010 + - name: _bar + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000006 + - name: _foo + type: N_SECT + scope: [ N_EXT, N_PEXT ] + sect: 1 + value: 0x0000000000000000 + + +... + +# CHECK: (__DATA,__data) non-external (was a private external) _a +# CHECK: (__DATA,__data) non-external (was a private external) _b +# CHECK: (__TEXT,__text) external _bar +# CHECK: (__TEXT,__text) non-external (was a private external) _foo + +# CHECK_KPE: (__DATA,__data) non-external (was a private external) _a +# CHECK_KPE: (__DATA,__data) private external _b +# CHECK_KPE: (__TEXT,__text) external _bar +# CHECK_KPE: (__TEXT,__text) private external _foo diff --git a/lld/test/mach-o/exported_symbols_list-undef.yaml b/lld/test/mach-o/exported_symbols_list-undef.yaml new file mode 100644 index 000000000000..85480af7fdae --- /dev/null +++ b/lld/test/mach-o/exported_symbols_list-undef.yaml @@ -0,0 +1,55 @@ +# RUN: not ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 -dylib \ +# RUN: %s %p/Inputs/x86_64/libSystem.yaml -o %t -exported_symbol _foobar 2> %t2 +# +# Test -exported_symbol fails if exported symbol not found. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x5D, 0xC3, 0x55, 0x48, + 0x89, 0xE5, 0x5D, 0xC3 ] + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + alignment: 2 + address: 0x000000000000000C + content: [ 0x0A, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00 ] + +global-symbols: + - name: _a + type: N_SECT + scope: [ N_EXT ] + sect: 2 + value: 0x000000000000000C + - name: _b + type: N_SECT + scope: [ N_EXT ] + sect: 2 + value: 0x0000000000000010 + - name: _bar + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000006 + - name: _foo + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + + +... + +# CHECK: (__DATA,__data) private external _a +# CHECK: (__DATA,__data) external _b +# CHECK: (__TEXT,__text) private external _bar +# CHECK: (__TEXT,__text) external _foo diff --git a/lld/test/mach-o/fat-archive.yaml b/lld/test/mach-o/fat-archive.yaml new file mode 100644 index 000000000000..33631ed3b1f3 --- /dev/null +++ b/lld/test/mach-o/fat-archive.yaml @@ -0,0 +1,45 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t \ +# RUN: -L %p/Inputs -lfoo %p/Inputs/x86_64/libSystem.yaml +# RUN: llvm-nm -m -n %t | FileCheck %s +# +# Test that fat archives are handled. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 4 + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x10, + 0xC7, 0x45, 0xFC, 0x00, 0x00, 0x00, 0x00, 0xB0, + 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00, 0x31, 0xC0, + 0x48, 0x83, 0xC4, 0x10, 0x5D, 0xC3 ] + relocations: + - offset: 0x00000012 + type: X86_64_RELOC_BRANCH + length: 2 + pc-rel: true + extern: true + symbol: 1 +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _foo + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + +... + +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _foo diff --git a/lld/test/mach-o/filelist.yaml b/lld/test/mach-o/filelist.yaml new file mode 100644 index 000000000000..e2ffa9fce7fe --- /dev/null +++ b/lld/test/mach-o/filelist.yaml @@ -0,0 +1,18 @@ +# RUN: ld64.lld.darwinold -test_file_usage \ +# RUN: -filelist %p/Inputs/full.filelist \ +# RUN: -path_exists /foo/bar/a.o \ +# RUN: -path_exists /foo/bar/b.o \ +# RUN: -path_exists /foo/x.a \ +# RUN: 2>&1 | FileCheck %s +# +# RUN: ld64.lld.darwinold -test_file_usage -t \ +# RUN: -filelist %p/Inputs/partial.filelist,/foo \ +# RUN: -path_exists /foo/bar/a.o \ +# RUN: -path_exists /foo/bar/b.o \ +# RUN: -path_exists /foo/x.a \ +# RUN: 2>&1 | FileCheck %s + + +# CHECK: Found filelist entry /foo/bar/a.o +# CHECK: Found filelist entry /foo/bar/b.o +# CHECK: Found filelist entry /foo/x.a diff --git a/lld/test/mach-o/flat_namespace_undef_error.yaml b/lld/test/mach-o/flat_namespace_undef_error.yaml new file mode 100644 index 000000000000..004ab3b8add3 --- /dev/null +++ b/lld/test/mach-o/flat_namespace_undef_error.yaml @@ -0,0 +1,17 @@ +# RUN: not ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.9 -flat_namespace -undefined error %s -o %t %p/Inputs/x86_64/libSystem.yaml 2>&1 | FileCheck %s + +--- !native +defined-atoms: + - name: _main + scope: global + content: [ E9, 00, 00, 00, 00 ] + alignment: 16 + references: + - kind: branch32 + offset: 1 + target: _bar +undefined-atoms: + - name: _bar + +# Make sure we error out for -flat_namespace -undefined error. +# CHECK: Undefined symbol: : _bar diff --git a/lld/test/mach-o/flat_namespace_undef_suppress.yaml b/lld/test/mach-o/flat_namespace_undef_suppress.yaml new file mode 100644 index 000000000000..9ad0db86332d --- /dev/null +++ b/lld/test/mach-o/flat_namespace_undef_suppress.yaml @@ -0,0 +1,17 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.9 -flat_namespace -undefined suppress %s -o %t %p/Inputs/x86_64/libSystem.yaml +# +# Sanity check '-flat_namespace -undefined suppress'. +# This should pass without error, even though '_bar' is undefined. + +--- !native +defined-atoms: + - name: _main + scope: global + content: [ E9, 00, 00, 00, 00 ] + alignment: 16 + references: + - kind: branch32 + offset: 1 + target: _bar +undefined-atoms: + - name: _bar diff --git a/lld/test/mach-o/force_load-dylib.yaml b/lld/test/mach-o/force_load-dylib.yaml new file mode 100644 index 000000000000..fb40aa9b67b5 --- /dev/null +++ b/lld/test/mach-o/force_load-dylib.yaml @@ -0,0 +1,45 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -dylib %p/Inputs/bar.yaml \ +# RUN: -install_name /usr/lib/libbar.dylib %p/Inputs/x86_64/libSystem.yaml -o %t1.dylib +# RUN: ld64.lld.darwinold -arch x86_64 -dylib %s -all_load %t1.dylib \ +# RUN: -install_name /usr/lib/libfoo.dylib %p/Inputs/x86_64/libSystem.yaml -o %t +# RUN: llvm-nm -m %t | FileCheck %s +# +# +# Test -all_load does not break linking with dylibs +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0x5D, 0xE9, + 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000008 + type: X86_64_RELOC_BRANCH + length: 2 + pc-rel: true + extern: true + symbol: 1 +global-symbols: + - name: _foo + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _bar + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + +... + + +# CHECK: (__TEXT,__text) external _foo diff --git a/lld/test/mach-o/force_load-x86_64.yaml b/lld/test/mach-o/force_load-x86_64.yaml new file mode 100644 index 000000000000..295217c8b3c1 --- /dev/null +++ b/lld/test/mach-o/force_load-x86_64.yaml @@ -0,0 +1,38 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/x86_64/libSystem.yaml \ +# RUN: %p/Inputs/libfoo.a %p/Inputs/libbar.a -o %t1 +# RUN: llvm-nm -m -n %t1 | FileCheck %s +# +# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/x86_64/libSystem.yaml \ +# RUN: -force_load %p/Inputs/libfoo.a %p/Inputs/libbar.a -o %t2 +# RUN: llvm-nm -m -n %t2 | FileCheck --check-prefix=CHECKF %s +# +# Test that -force_load causes members of static library to be loaded. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0xC3 ] +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +... + +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main +# CHECK-NOT: {{[0-9a-f]+}} (__TEXT,__text) external _main + +# CHECKF: {{[0-9a-f]+}} (__TEXT,__text) external _main +# CHECKF: {{[0-9a-f]+}} (__TEXT,__text) external _foo +# CHECKF-NOT: {{[0-9a-f]+}} (__TEXT,__text) external _bar diff --git a/lld/test/mach-o/framework-user-paths.yaml b/lld/test/mach-o/framework-user-paths.yaml new file mode 100644 index 000000000000..a96cfed7ab75 --- /dev/null +++ b/lld/test/mach-o/framework-user-paths.yaml @@ -0,0 +1,41 @@ +# +# Test framework and SDK search paths. +# myFrameworks is not an absolute path, so it should not by found in SDK +# /Custom/Frameworks should be found in SDK +# /opt/Frameworks should not be found in SDK +# /System/Library/Frameworks is implicit and should be in SDK +# +# RUN: ld64.lld.darwinold -arch x86_64 -r -test_file_usage -v \ +# RUN: -path_exists myFrameworks \ +# RUN: -path_exists myFrameworks/my.framework/my \ +# RUN: -path_exists /opt/Frameworks \ +# RUN: -path_exists /opt/Frameworks/other.framework/other \ +# RUN: -path_exists /Custom/Frameworks \ +# RUN: -path_exists /Custom/Frameworks/Bar.framework/Bar \ +# RUN: -path_exists /System/Library/Frameworks \ +# RUN: -path_exists /System/Library/Frameworks/Foo.framework/Foo \ +# RUN: -path_exists /SDK/myFrameworks \ +# RUN: -path_exists /SDK/myFrameworks/my.framework/my \ +# RUN: -path_exists /SDK/Custom/Frameworks \ +# RUN: -path_exists /SDK/Custom/Frameworks/Bar.framework/Bar \ +# RUN: -path_exists /SDK/System/Library/Frameworks \ +# RUN: -path_exists /SDK/System/Library/Frameworks/Foo.framework/Foo \ +# RUN: -syslibroot /SDK \ +# RUN: -FmyFrameworks \ +# RUN: -F/Custom/Frameworks \ +# RUN: -F/opt/Frameworks \ +# RUN: -framework my \ +# RUN: -framework Bar \ +# RUN: -framework Foo \ +# RUN: -framework other \ +# RUN: 2>&1 | FileCheck %s + +# CHECK: Framework search paths: +# CHECK-NEXT: myFrameworks +# CHECK-NEXT: /SDK/Custom/Frameworks +# CHECK-NEXT: /opt/Frameworks +# CHECK-NEXT: /SDK/System/Library/Frameworks +# CHECK: Found framework myFrameworks/my.framework/my +# CHECK: Found framework /SDK/Custom/Frameworks/Bar.framework/Bar +# CHECK: Found framework /SDK/System/Library/Frameworks/Foo.framework/Foo +# CHECK: Found framework /opt/Frameworks/other.framework/other diff --git a/lld/test/mach-o/function-starts-load-command.yaml b/lld/test/mach-o/function-starts-load-command.yaml new file mode 100644 index 000000000000..cb558ad688e2 --- /dev/null +++ b/lld/test/mach-o/function-starts-load-command.yaml @@ -0,0 +1,32 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml && llvm-objdump --private-headers %t | FileCheck %s +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -static -function_starts && llvm-objdump --private-headers %t | FileCheck %s +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -no_function_starts && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_FUNCTION_STARTS +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -static -function_starts -no_function_starts && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_FUNCTION_STARTS +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -static && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_FUNCTION_STARTS + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x00, 0x00, 0x00, 0x00 ] +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +... + +# CHECK: Load command {{[0-9]*}} +# CHECK: cmd LC_FUNCTION_STARTS +# CHECK: cmdsize 16 +# CHECK: dataoff +# CHECK: datasize + +# NO_FUNCTION_STARTS-NOT: LC_FUNCTION_STARTS diff --git a/lld/test/mach-o/gcc_except_tab-got-arm64.yaml b/lld/test/mach-o/gcc_except_tab-got-arm64.yaml new file mode 100644 index 000000000000..caccf4f2fe14 --- /dev/null +++ b/lld/test/mach-o/gcc_except_tab-got-arm64.yaml @@ -0,0 +1,53 @@ +# RUN: ld64.lld.darwinold -arch arm64 %s \ +# RUN: -dylib %p/Inputs/arm64/libSystem.yaml -o %t +# RUN: llvm-objdump --section-headers %t | FileCheck %s + +# Make sure that the GOT relocation from gcc_except_tab to the data +# is not removed. + +--- !native +defined-atoms: + - name: _main + scope: global + content: [ FD, 7B, BF, A9, FD, 03, 00, 91, FF, 43, 00, D1, + BF, C3, 1F, B8, 00, 00, 00, 94, BF, 03, 00, 91, + FD, 7B, C1, A8, C0, 03, 5F, D6 ] + alignment: 4 + - name: __ZTSP1A + scope: hidden + type: constant + content: [ 50, 31, 41, 00 ] + merge: as-weak + - name: GCC_except_table0 + type: unwind-lsda + content: [ FF, 9B, E7, 80, 00, 03, 5B, 00, 00, 00, 00, 1C, + 00, 00, 00, 00, 00, 00, 00, 00, 1C, 00, 00, 00, + 18, 00, 00, 00, 84, 00, 00, 00, 03, 40, 00, 00, + 00, 10, 00, 00, 00, 94, 00, 00, 00, 03, 60, 00, + 00, 00, 20, 00, 00, 00, B4, 00, 00, 00, 05, 80, + 00, 00, 00, 68, 00, 00, 00, 00, 00, 00, 00, 00, + E8, 00, 00, 00, 08, 00, 00, 00, 28, 01, 00, 00, + 00, F0, 00, 00, 00, 74, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 01, 7D, 01, 00, A8, FF, FF, FF ] + alignment: 4 + references: + - kind: delta32ToGOT + offset: 104 + target: __ZTIP1A + - name: __ZTIP1A + scope: hidden + type: data + content: [ 10, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 80, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00 ] + merge: as-weak + alignment: 16 +shared-library-atoms: + - name: dyld_stub_binder + load-name: /usr/lib/libSystem.B.dylib + type: unknown +... + +# Make sure we have a GOT relocation. +# This could only have come from __gcc_except_tab to __ZTIP1A +# CHECK: __got \ No newline at end of file diff --git a/lld/test/mach-o/got-order.yaml b/lld/test/mach-o/got-order.yaml new file mode 100644 index 000000000000..5ebb86042245 --- /dev/null +++ b/lld/test/mach-o/got-order.yaml @@ -0,0 +1,69 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/got-order.yaml \ +# RUN: %p/Inputs/got-order2.yaml -o %t %p/Inputs/x86_64/libSystem.yaml +# RUN: llvm-objdump --macho --bind %t | FileCheck %s +# +# Test that GOT slots are sorted by name +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x48, 0x8B, 0x0D, 0x00, + 0x00, 0x00, 0x00, 0x48, 0x8B, 0x05, 0x00, 0x00, + 0x00, 0x00, 0x8B, 0x00, 0x03, 0x01, 0x48, 0x8B, + 0x0D, 0x00, 0x00, 0x00, 0x00, 0x03, 0x01, 0x5D, + 0xC3 ] + relocations: + - offset: 0x00000019 + type: X86_64_RELOC_GOT_LOAD + length: 2 + pc-rel: true + extern: true + symbol: 2 + - offset: 0x0000000E + type: X86_64_RELOC_GOT_LOAD + length: 2 + pc-rel: true + extern: true + symbol: 1 + - offset: 0x00000007 + type: X86_64_RELOC_GOT_LOAD + length: 2 + pc-rel: true + extern: true + symbol: 3 +global-symbols: + - name: _func + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _aaa + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _fff + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _zzz + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + + +# CHECK: __DATA __got {{[0-9a-zA-Z _]+}} pointer 0 libfoobar _aaa +# CHECK-NEXT: __DATA __got {{[0-9a-zA-Z _]+}} pointer 0 libfoobar _bar +# CHECK-NEXT: __DATA __got {{[0-9a-zA-Z _]+}} pointer 0 libfoobar _fff +# CHECK-NEXT: __DATA __got {{[0-9a-zA-Z _]+}} pointer 0 libfoobar _foo +# CHECK-NEXT: __DATA __got {{[0-9a-zA-Z _]+}} pointer 0 libfoobar _zazzle +# CHECK-NEXT: __DATA __got {{[0-9a-zA-Z _]+}} pointer 0 libfoobar _zzz diff --git a/lld/test/mach-o/hello-world-arm64.yaml b/lld/test/mach-o/hello-world-arm64.yaml new file mode 100644 index 000000000000..c2e232233dc6 --- /dev/null +++ b/lld/test/mach-o/hello-world-arm64.yaml @@ -0,0 +1,102 @@ +# RUN: ld64.lld.darwinold -arch arm64 %s %p/Inputs/hello-world-arm64.yaml -o %t +# RUN: llvm-nm -m -n %t | FileCheck %s +# RUN: llvm-objdump --private-headers %t | FileCheck %s --check-prefix=CHECK-PRIVATE-HEADER +# +# Test that arm64 hello-world can be linked into a mach-o executable +# + +--- !mach-o +arch: arm64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 2 + address: 0x0000000000000000 + content: [ 0xFD, 0x7B, 0xBF, 0xA9, 0xFD, 0x03, 0x00, 0x91, + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xF9, + 0x00, 0x01, 0x40, 0xF9, 0x01, 0x00, 0x00, 0x90, + 0x21, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00, 0x94, + 0x00, 0x00, 0x80, 0x52, 0xFD, 0x7B, 0xC1, 0xA8, + 0xC0, 0x03, 0x5F, 0xD6 ] + relocations: + - offset: 0x0000001C + type: ARM64_RELOC_BRANCH26 + length: 2 + pc-rel: true + extern: true + symbol: 5 + - offset: 0x00000018 + type: ARM64_RELOC_PAGEOFF12 + length: 2 + pc-rel: false + extern: true + symbol: 1 + - offset: 0x00000014 + type: ARM64_RELOC_PAGE21 + length: 2 + pc-rel: true + extern: true + symbol: 1 + - offset: 0x0000000C + type: ARM64_RELOC_GOT_LOAD_PAGEOFF12 + length: 2 + pc-rel: false + extern: true + symbol: 4 + - offset: 0x00000008 + type: ARM64_RELOC_GOT_LOAD_PAGE21 + length: 2 + pc-rel: true + extern: true + symbol: 4 + - segment: __TEXT + section: __cstring + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x000000000000002C + content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x00 ] +local-symbols: + - name: ltmp0 + type: N_SECT + sect: 1 + value: 0x0000000000000000 + - name: l_.str + type: N_SECT + sect: 2 + value: 0x000000000000002C + - name: ltmp1 + type: N_SECT + sect: 2 + value: 0x000000000000002C +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: ___stdoutp + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _fprintf + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + +# CHECK: (undefined) external ___stdoutp (from libSystem) +# CHECK: (undefined) external _fprintf (from libSystem) +# CHECK: (undefined) external dyld_stub_binder (from libSystem) +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main + +# CHECK-PRIVATE-HEADER: sectname __stubs +# CHECK-PRIVATE-HEADER-NEXT: segname __TEXT +# CHECK-PRIVATE-HEADER-NEXT: addr +# CHECK-PRIVATE-HEADER-NEXT: size +# CHECK-PRIVATE-HEADER-NEXT: offset +# CHECK-PRIVATE-HEADER-NEXT: align 2^1 (2) diff --git a/lld/test/mach-o/hello-world-armv6.yaml b/lld/test/mach-o/hello-world-armv6.yaml new file mode 100644 index 000000000000..4004c963da89 --- /dev/null +++ b/lld/test/mach-o/hello-world-armv6.yaml @@ -0,0 +1,64 @@ +# RUN: ld64.lld.darwinold -arch armv6 %s %p/Inputs/hello-world-armv6.yaml -o %t +# RUN: llvm-nm -m %t | FileCheck %s +# +# Test that armv6 (arm) hello-world can be linked into a mach-o executable +# + +--- !mach-o +arch: armv6 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 2 + address: 0x0000000000000000 + content: [ 0x80, 0x40, 0x2D, 0xE9, 0x10, 0x00, 0x9F, 0xE5, + 0x0D, 0x70, 0xA0, 0xE1, 0x00, 0x00, 0x8F, 0xE0, + 0xFA, 0xFF, 0xFF, 0xEB, 0x00, 0x00, 0xA0, 0xE3, + 0x80, 0x80, 0xBD, 0xE8, 0x0C, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x0000001C + scattered: true + type: ARM_RELOC_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000020 + - offset: 0x00000000 + scattered: true + type: ARM_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x0000000C + - offset: 0x00000010 + type: ARM_RELOC_BR24 + length: 2 + pc-rel: true + extern: true + symbol: 1 + - segment: __TEXT + section: __cstring + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x0000000000000020 + content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x00 ] +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _printf + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main +# CHECK: (undefined) external _printf (from libSystem) +# CHECK: (undefined) external dyld_stub_binder (from libSystem) diff --git a/lld/test/mach-o/hello-world-armv7.yaml b/lld/test/mach-o/hello-world-armv7.yaml new file mode 100644 index 000000000000..0407e924aa5b --- /dev/null +++ b/lld/test/mach-o/hello-world-armv7.yaml @@ -0,0 +1,76 @@ +# RUN: ld64.lld.darwinold -arch armv7 %s %p/Inputs/hello-world-armv7.yaml -o %t +# RUN: llvm-nm -m -n %t | FileCheck %s +# +# Test that armv7 (thumb) hello-world can be linked into a mach-o executable +# + +--- !mach-o +arch: armv7 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 2 + address: 0x0000000000000000 + content: [ 0x80, 0xB5, 0x40, 0xF2, 0x06, 0x00, 0x6F, 0x46, + 0xC0, 0xF2, 0x00, 0x00, 0x78, 0x44, 0xFF, 0xF7, + 0xF8, 0xEF, 0x00, 0x20, 0x80, 0xBD ] + relocations: + - offset: 0x0000000E + type: ARM_THUMB_RELOC_BR22 + length: 2 + pc-rel: true + extern: true + symbol: 1 + - offset: 0x00000008 + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 3 + pc-rel: false + value: 0x00000016 + - offset: 0x00000006 + scattered: true + type: ARM_RELOC_PAIR + length: 3 + pc-rel: false + value: 0x0000000C + - offset: 0x00000002 + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000016 + - offset: 0x00000000 + scattered: true + type: ARM_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x0000000C + - segment: __TEXT + section: __cstring + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x0000000000000016 + content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x00 ] +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + desc: [ N_ARM_THUMB_DEF ] + value: 0x0000000000000000 +undefined-symbols: + - name: _printf + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + +# CHECK: (undefined) external _printf (from libSystem) +# CHECK: (undefined) external dyld_stub_binder (from libSystem) +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external [Thumb] _main diff --git a/lld/test/mach-o/hello-world-x86.yaml b/lld/test/mach-o/hello-world-x86.yaml new file mode 100644 index 000000000000..5c3bc6731cd4 --- /dev/null +++ b/lld/test/mach-o/hello-world-x86.yaml @@ -0,0 +1,62 @@ +# RUN: ld64.lld.darwinold -arch i386 %s %p/Inputs/hello-world-x86.yaml -o %t +# RUN: llvm-nm -m %t | FileCheck %s +# +# Test that i386 hello-world can be linked into a mach-o executable +# + +--- !mach-o +arch: x86 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x89, 0xE5, 0x83, 0xEC, 0x08, 0xE8, 0x00, + 0x00, 0x00, 0x00, 0x58, 0x8D, 0x80, 0x16, 0x00, + 0x00, 0x00, 0x89, 0x04, 0x24, 0xE8, 0xE6, 0xFF, + 0xFF, 0xFF, 0x31, 0xC0, 0x83, 0xC4, 0x08, 0x5D, + 0xC3 ] + relocations: + - offset: 0x00000016 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: true + extern: true + symbol: 1 + - offset: 0x0000000E + scattered: true + type: GENERIC_RELOC_LOCAL_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000021 + - offset: 0x00000000 + scattered: true + type: GENERIC_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x0000000B + - segment: __TEXT + section: __cstring + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x0000000000000021 + content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x00 ] +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _printf + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main +# CHECK: (undefined) external _printf (from libSystem) +# CHECK: (undefined) external dyld_stub_binder (from libSystem) diff --git a/lld/test/mach-o/hello-world-x86_64.yaml b/lld/test/mach-o/hello-world-x86_64.yaml new file mode 100644 index 000000000000..c49565244a19 --- /dev/null +++ b/lld/test/mach-o/hello-world-x86_64.yaml @@ -0,0 +1,120 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/hello-world-x86_64.yaml \ +# RUN: -o %t +# RUN: llvm-nm -m -n %t | FileCheck %s +# +# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/hello-world-x86_64.yaml \ +# RUN: -dead_strip -o %t2 +# RUN: llvm-nm -m -n %t2 | FileCheck %s +# +# Test that x86_64 hello-world can be linked into a mach-o executable +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x48, 0x8B, 0x05, 0x00, + 0x00, 0x00, 0x00, 0x48, 0x8B, 0x38, 0x48, 0x8D, + 0x35, 0x00, 0x00, 0x00, 0x00, 0x31, 0xC0, 0xE8, + 0x00, 0x00, 0x00, 0x00, 0x31, 0xC0, 0x5D, 0xC3 ] + relocations: + - offset: 0x00000018 + type: X86_64_RELOC_BRANCH + length: 2 + pc-rel: true + extern: true + symbol: 5 + - offset: 0x00000011 + type: X86_64_RELOC_SIGNED + length: 2 + pc-rel: true + extern: true + symbol: 0 + - offset: 0x00000007 + type: X86_64_RELOC_GOT_LOAD + length: 2 + pc-rel: true + extern: true + symbol: 4 + - segment: __TEXT + section: __cstring + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x0000000000000020 + content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x00 ] + - segment: __LD + section: __compact_unwind + type: S_REGULAR + attributes: [ ] + alignment: 8 + address: 0x0000000000000028 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000000 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 1 + - segment: __TEXT + section: __eh_frame + type: S_COALESCED + attributes: [ ] + alignment: 8 + address: 0x0000000000000048 + content: [ 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x7A, 0x52, 0x00, 0x01, 0x78, 0x10, 0x01, + 0x10, 0x0C, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, + 0x98, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x41, 0x0E, 0x10, 0x86, 0x02, 0x43, 0x0D, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] +local-symbols: + - name: L1 + type: N_SECT + sect: 2 + value: 0x0000000000000020 + - name: EH_frame0 + type: N_SECT + sect: 4 + value: 0x0000000000000048 +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: _main.eh + type: N_SECT + scope: [ N_EXT ] + sect: 4 + value: 0x0000000000000060 +undefined-symbols: + - name: ___stdoutp + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _fprintf + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + +... + +# CHECK: (undefined) external ___stdoutp (from libSystem) +# CHECK: (undefined) external _fprintf (from libSystem) +# CHECK: (undefined) external dyld_stub_binder (from libSystem) +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) [referenced dynamically] external __mh_execute_header +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main diff --git a/lld/test/mach-o/image-base.yaml b/lld/test/mach-o/image-base.yaml new file mode 100644 index 000000000000..c56eed199e5d --- /dev/null +++ b/lld/test/mach-o/image-base.yaml @@ -0,0 +1,28 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.9 %s -o %t -image_base 31415926000 %p/Inputs/x86_64/libSystem.yaml +# RUN: llvm-readobj --macho-segment %t | FileCheck %s +# RUN: not ld64.lld.darwinold -arch x86_64 -image_base 0x31415926530 %s >/dev/null 2> %t +# RUN: FileCheck < %t %s --check-prefix=CHECK-ERROR-MISPAGED +# RUN: not ld64.lld.darwinold -arch x86_64 -image_base 1000 %s >/dev/null 2> %t +# RUN: FileCheck < %t %s --check-prefix=CHECK-ERROR-OVERLAP +# RUN: not ld64.lld.darwinold -arch x86_64 -image_base hithere %s >/dev/null 2> %t +# RUN: FileCheck < %t %s --check-prefix=CHECK-ERROR-NOTHEX + +--- !native +defined-atoms: + - name: _main + scope: global + content: [] + +# CHECK: Segment { +# CHECK: Cmd: LC_SEGMENT_64 +# CHECK: Name: __TEXT +# CHECK-NEXT: Size: 152 +# CHECK-NEXT: vmaddr: 0x31415926000 +# CHECK-NEXT: vmsize: 0x1000 + + +# CHECK-ERROR-MISPAGED: error: image_base must be a multiple of page size (0x1000) + +# CHECK-ERROR-OVERLAP: error: image_base overlaps with __PAGEZERO + +# CHECK-ERROR-NOTHEX: error: image_base expects a hex number diff --git a/lld/test/mach-o/infer-arch.yaml b/lld/test/mach-o/infer-arch.yaml new file mode 100644 index 000000000000..a66d17bc58df --- /dev/null +++ b/lld/test/mach-o/infer-arch.yaml @@ -0,0 +1,29 @@ +# RUN: ld64.lld.darwinold -arch i386 -macosx_version_min 10.8 %s -r -o %t \ +# RUN: && ld64.lld.darwinold -r %t -o %t2 -print_atoms | FileCheck %s +# +# Test linker can detect architecture without -arch option. +# + +--- !mach-o +arch: x86 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0xC3 ] +global-symbols: + - name: _foo + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + +... + + +# CHECK: defined-atoms: +# CHECK: - name: _foo diff --git a/lld/test/mach-o/interposing-section.yaml b/lld/test/mach-o/interposing-section.yaml new file mode 100644 index 000000000000..340ea8cc4b71 --- /dev/null +++ b/lld/test/mach-o/interposing-section.yaml @@ -0,0 +1,72 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/interposing-section.yaml \ +# RUN: -dylib -o %t %p/Inputs/x86_64/libSystem.yaml +# RUN: llvm-objdump --private-headers %t | FileCheck %s +# +# RUN: ld64.lld.darwinold -arch x86_64 %s -r -o %t1 +# RUN: llvm-objdump --private-headers %t1 | FileCheck %s +# +# Test that interposing section is preserved by linker. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0x5D, 0xE9, + 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000008 + type: X86_64_RELOC_BRANCH + length: 2 + pc-rel: true + extern: true + symbol: 2 + - segment: __DATA + section: __interpose + type: S_INTERPOSING + attributes: [ ] + alignment: 8 + address: 0x0000000000000010 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000008 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000000 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 0 +local-symbols: + - name: _my_open + type: N_SECT + sect: 1 + value: 0x0000000000000000 + - name: __interpose_open + type: N_SECT + sect: 2 + desc: [ N_NO_DEAD_STRIP ] + value: 0x0000000000000010 +undefined-symbols: + - name: _open + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + + +# CHECK: sectname __interposing +# CHECK: segname __DATA +# CHECK: type S_INTERPOSING + diff --git a/lld/test/mach-o/keep_private_externs.yaml b/lld/test/mach-o/keep_private_externs.yaml new file mode 100644 index 000000000000..b8f0e4f7a065 --- /dev/null +++ b/lld/test/mach-o/keep_private_externs.yaml @@ -0,0 +1,63 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r %s -o %t \ +# RUN: && llvm-nm -m %t | FileCheck %s +# +# RUN: ld64.lld.darwinold -arch x86_64 -r %s -o %t2 -keep_private_externs \ +# RUN: && llvm-nm -m %t2 | FileCheck -check-prefix=CHECK_KPE %s +# +# Test -keep_private_externs in -r mode. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x5D, 0xC3, 0x55, 0x48, + 0x89, 0xE5, 0x5D, 0xC3 ] + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + alignment: 2 + address: 0x000000000000000C + content: [ 0x0A, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00 ] + +global-symbols: + - name: _a + type: N_SECT + scope: [ N_EXT ] + sect: 2 + value: 0x000000000000000C + - name: _b + type: N_SECT + scope: [ N_EXT, N_PEXT ] + sect: 2 + value: 0x0000000000000010 + - name: _bar + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000006 + - name: _foo + type: N_SECT + scope: [ N_EXT, N_PEXT ] + sect: 1 + value: 0x0000000000000000 + + +... + +# CHECK: (__DATA,__data) external _a +# CHECK: (__DATA,__data) non-external (was a private external) _b +# CHECK: (__TEXT,__text) external _bar +# CHECK: (__TEXT,__text) non-external (was a private external) _foo + +# CHECK_KPE: (__DATA,__data) external _a +# CHECK_KPE: (__DATA,__data) private external _b +# CHECK_KPE: (__TEXT,__text) external _bar +# CHECK_KPE: (__TEXT,__text) private external _foo diff --git a/lld/test/mach-o/lazy-bind-x86_64.yaml b/lld/test/mach-o/lazy-bind-x86_64.yaml new file mode 100644 index 000000000000..3bad4c2ad83c --- /dev/null +++ b/lld/test/mach-o/lazy-bind-x86_64.yaml @@ -0,0 +1,111 @@ +# REQUIRES: x86 + +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s \ +# RUN: %p/Inputs/lazy-bind-x86_64.yaml %p/Inputs/lazy-bind-x86_64-2.yaml \ +# RUN: %p/Inputs/lazy-bind-x86_64-3.yaml -o %t \ +# RUN: %p/Inputs/x86_64/libSystem.yaml +# RUN: llvm-objdump --macho --lazy-bind %t | FileCheck %s +# RUN: llvm-nm -m %t | FileCheck --check-prefix=CHECK-NM %s +# RUN: llvm-objdump --disassemble %t | FileCheck --check-prefix=CHECK-HELPERS %s +# RUN: llvm-objdump --private-headers %t | FileCheck --check-prefix=CHECK-DYLIBS %s +# +# Test that correct two-level namespace ordinals are used for lazy bindings. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0xE8, 0x00, + 0x00, 0x00, 0x00, 0x31, 0xC0, 0xE8, 0x00, 0x00, + 0x00, 0x00, 0x31, 0xC0, 0xE8, 0x00, 0x00, 0x00, + 0x00, 0x31, 0xC0, 0x5D, 0xC3 ] + relocations: + - offset: 0x00000015 + type: X86_64_RELOC_BRANCH + length: 2 + pc-rel: true + extern: true + symbol: 3 + - offset: 0x0000000E + type: X86_64_RELOC_BRANCH + length: 2 + pc-rel: true + extern: true + symbol: 2 + - offset: 0x00000007 + type: X86_64_RELOC_BRANCH + length: 2 + pc-rel: true + extern: true + symbol: 1 +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _bar + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _baz + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _foo + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + +... + + +# CHECK: libbar _bar +# CHECK: libbaz _baz +# CHECK: libfoo _foo + + +# CHECK-NM: (undefined) external _bar (from libbar) +# CHECK-NM: (undefined) external _baz (from libbaz) +# CHECK-NM: (undefined) external _foo (from libfoo) + + +# CHECK-HELPERS:Disassembly of section __TEXT,__stub_helper: +# CHECK-HELPERS: 68 00 00 00 00 pushq $0 +# CHECK-HELPERS: 68 0b 00 00 00 pushq $11 +# CHECK-HELPERS: 68 16 00 00 00 pushq $22 + +# Make sure the stub helper is correctly aligned +# CHECK-DYLIBS: sectname __stub_helper +# CHECK-DYLIBS-NEXT: segname __TEXT +# CHECK-DYLIBS-NEXT: addr +# CHECK-DYLIBS-NEXT: size +# CHECK-DYLIBS-NEXT: offset +# CHECK-DYLIBS-NEXT: align 2^2 (4) + +# Make sure the __nl_symbol_ptr section is used instea of __got as this is x86_64 +# CHECK-DYLIBS: sectname __nl_symbol_ptr +# CHECK-DYLIBS-NEXT: segname __DATA + +# CHECK-DYLIBS: cmd LC_LOAD_DYLIB +# CHECK-DYLIBS: name /usr/lib/libbar.dylib (offset 24) +# CHECK-DYLIBS: current version 2.3.0 +# CHECK-DYLIBS: compatibility version 1.0.0 +# CHECK-DYLIBS: cmd LC_LOAD_DYLIB +# CHECK-DYLIBS: name /usr/lib/libfoo.dylib (offset 24) +# CHECK-DYLIBS: current version 3.4.0 +# CHECK-DYLIBS: compatibility version 2.0.0 +# CHECK-DYLIBS: cmd LC_LOAD_DYLIB +# CHECK-DYLIBS: name /usr/lib/libbaz.dylib (offset 24) +# CHECK-DYLIBS: current version 4.5.0 +# CHECK-DYLIBS: compatibility version 3.0.0 + + diff --git a/lld/test/mach-o/lc_segment_filesize.yaml b/lld/test/mach-o/lc_segment_filesize.yaml new file mode 100644 index 000000000000..fea5008adbc5 --- /dev/null +++ b/lld/test/mach-o/lc_segment_filesize.yaml @@ -0,0 +1,31 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r -o %t %s && llvm-objdump --private-headers %t | FileCheck %s + +# CHECK: filesize 19 + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS ] + alignment: 16 + address: 0x0000000000000000 + content: [ 0x00, 0x00, 0x00 ] + - segment: __TEXT + section: __alt + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS ] + alignment: 16 + address: 0x0000000000000010 + content: [ 0x00, 0x00, 0x00 ] +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +page-size: 0x00000000 +... diff --git a/lld/test/mach-o/lib-search-paths.yaml b/lld/test/mach-o/lib-search-paths.yaml new file mode 100644 index 000000000000..29c5e62ce429 --- /dev/null +++ b/lld/test/mach-o/lib-search-paths.yaml @@ -0,0 +1,16 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %s -syslibroot %p/Inputs/lib-search-paths -lmyshared -lmystatic -lfile.o -r -print_atoms 2>&1 | FileCheck %s + +--- !native +undefined-atoms: + - name: _from_myshared + - name: _from_mystatic + - name: _from_fileo + +# CHECK: defined-atoms: +# CHECK: - name: _from_fileo +# CHECK: content: [ 2A, 00, 00, 00 ] +# CHECK: - name: _from_mystatic +# CHECK: content: [ 02, 00, 00, 00 ] +# CHECK: shared-library-atoms: +# CHECK: - name: _from_myshared +# CHECK: load-name: libmyshared.dylib diff --git a/lld/test/mach-o/library-order.yaml b/lld/test/mach-o/library-order.yaml new file mode 100644 index 000000000000..02d31c578a4b --- /dev/null +++ b/lld/test/mach-o/library-order.yaml @@ -0,0 +1,45 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %p/Inputs/libfoo.a %s -o %t \ +# RUN: %p/Inputs/x86_64/libSystem.yaml +# RUN: llvm-nm -m -n %t | FileCheck %s +# +# Test that if library is before object file on command line, it still is used. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 4 + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x10, + 0xC7, 0x45, 0xFC, 0x00, 0x00, 0x00, 0x00, 0xB0, + 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00, 0x31, 0xC0, + 0x48, 0x83, 0xC4, 0x10, 0x5D, 0xC3 ] + relocations: + - offset: 0x00000012 + type: X86_64_RELOC_BRANCH + length: 2 + pc-rel: true + extern: true + symbol: 1 +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _foo + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + +... + +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _foo diff --git a/lld/test/mach-o/library-rescan.yaml b/lld/test/mach-o/library-rescan.yaml new file mode 100644 index 000000000000..138a696fe5ff --- /dev/null +++ b/lld/test/mach-o/library-rescan.yaml @@ -0,0 +1,46 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %p/Inputs/libfoo.a %p/Inputs/libbar.a \ +# RUN: %s -o %t %p/Inputs/x86_64/libSystem.yaml +# RUN: llvm-nm -m -n %t | FileCheck %s +# +# Test that static libraries are automatically rescanned (bar needs foo). +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 4 + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x10, + 0xC7, 0x45, 0xFC, 0x00, 0x00, 0x00, 0x00, 0xB0, + 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00, 0x31, 0xC0, + 0x48, 0x83, 0xC4, 0x10, 0x5D, 0xC3 ] + relocations: + - offset: 0x00000012 + type: X86_64_RELOC_BRANCH + length: 2 + pc-rel: true + extern: true + symbol: 1 +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _bar + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + +... + +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _bar +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _foo diff --git a/lld/test/mach-o/libresolve-bizarre-root-override.yaml b/lld/test/mach-o/libresolve-bizarre-root-override.yaml new file mode 100644 index 000000000000..0fda09a9b8d8 --- /dev/null +++ b/lld/test/mach-o/libresolve-bizarre-root-override.yaml @@ -0,0 +1,17 @@ +# RUN: not ld64.lld.darwinold -test_file_usage -v \ +# RUN: -path_exists /usr/lib \ +# RUN: -path_exists /Applications/MySDK/usr/local/lib \ +# RUN: -path_exists /Applications/MySDK/usr/lib \ +# RUN: -path_exists /Applications/MySDK/usr/lib/libSystem.dylib \ +# RUN: -syslibroot /Applications/MySDK \ +# RUN: -syslibroot / \ +# RUN: -lSystem \ +# RUN: 2>&1 | FileCheck %s + +# When the last -syslibroot is simply "/", all of them get discarded. So in this +# case, only /usr/lib should show up. + +# CHECK: Library search paths: +# CHECK: /usr/lib +# CHECK-NOT: /usr/local/lib +# CHECK: Unable to find library for -lSystem diff --git a/lld/test/mach-o/libresolve-multiple-syslibroots.yaml b/lld/test/mach-o/libresolve-multiple-syslibroots.yaml new file mode 100644 index 000000000000..66627056afd3 --- /dev/null +++ b/lld/test/mach-o/libresolve-multiple-syslibroots.yaml @@ -0,0 +1,17 @@ +# RUN: ld64.lld.darwinold -test_file_usage -v \ +# RUN: -path_exists /usr/lib \ +# RUN: -path_exists /Applications/MyFirstSDK/usr/local/lib \ +# RUN: -path_exists /Applications/MySecondSDK/usr/local/lib \ +# RUN: -path_exists /Applications/MyFirstSDK/usr/local/lib/libSystem.a \ +# RUN: -path_exists /Applications/MySecondSDK/usr/local/lib/libSystem.a \ +# RUN: -syslibroot /Applications/MyFirstSDK \ +# RUN: -syslibroot /Applications/MySecondSDK \ +# RUN: -lSystem \ +# RUN: 2>&1 | FileCheck %s + + +# CHECK: Library search paths: +# CHECK: /usr/lib +# CHECK: /Applications/MyFirstSDK/usr/local/lib +# CHECK: /Applications/MySecondSDK/usr/local/lib +# CHECK: Found library /Applications/MyFirstSDK/usr/local/lib/libSystem.a diff --git a/lld/test/mach-o/libresolve-one-syslibroot.yaml b/lld/test/mach-o/libresolve-one-syslibroot.yaml new file mode 100644 index 000000000000..7ca2670a7277 --- /dev/null +++ b/lld/test/mach-o/libresolve-one-syslibroot.yaml @@ -0,0 +1,25 @@ +# RUN: ld64.lld.darwinold -test_file_usage -v \ +# RUN: -path_exists /usr/lib \ +# RUN: -path_exists /Applications/MySDK/usr/local/lib \ +# RUN: -path_exists /Applications/MySDK/usr/local/lib/libSystem.a \ +# RUN: -path_exists /hasFoo \ +# RUN: -path_exists /hasFoo/foo.o \ +# RUN: -syslibroot /Applications/MySDK \ +# RUN: -L/hasFoo \ +# RUN: -lSystem -lfoo.o \ +# RUN: 2>&1 | FileCheck %s + +# When just one -syslibroot is specified, we apparently want to skip *system* +# paths that aren't found. User ones should still get added. In this case +# /usr/lib exists, but not the equivalent in the -syslibroot, so there should be +# no mention of /usr/lib. + +# CHECK: Library search paths: +# CHECK: /hasFoo +# CHECK-NOT: /usr/lib +# CHECK-NOT: /usr/local/lib +# CHECK: /Applications/MySDK/usr/local/lib +# CHECK-NOT: /usr/lib +# CHECK-NOT: /usr/local/lib +# CHECK: Found library /Applications/MySDK/usr/local/lib/libSystem.a +# CHECK: Found library /hasFoo/foo.o diff --git a/lld/test/mach-o/libresolve-simple.yaml b/lld/test/mach-o/libresolve-simple.yaml new file mode 100644 index 000000000000..00e35734ebde --- /dev/null +++ b/lld/test/mach-o/libresolve-simple.yaml @@ -0,0 +1,21 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r -test_file_usage -v \ +# RUN: -path_exists /usr/lib \ +# RUN: -path_exists /usr/local/lib \ +# RUN: -path_exists /usr/lib/libSystem.dylib \ +# RUN: -path_exists hasFoo \ +# RUN: -path_exists hasFoo/libFoo.dylib \ +# RUN: -path_exists /hasBar \ +# RUN: -path_exists /hasBar/libBar.dylib \ +# RUN: -L hasFoo \ +# RUN: -L /hasBar \ +# RUN: -lSystem -lFoo -lBar \ +# RUN: 2>&1 | FileCheck %s + +# CHECK: Library search paths: +# CHECK: hasFoo +# CHECK: /hasBar +# CHECK: /usr/lib +# CHECK: /usr/local/lib +# CHECK: Found library /usr/lib/libSystem.dylib +# CHECK: Found library hasFoo/libFoo.dylib +# CHECK: Found library /hasBar/libBar.dylib diff --git a/lld/test/mach-o/libresolve-user-paths.yaml b/lld/test/mach-o/libresolve-user-paths.yaml new file mode 100644 index 000000000000..3fbb205eba5f --- /dev/null +++ b/lld/test/mach-o/libresolve-user-paths.yaml @@ -0,0 +1,20 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r -test_file_usage -v \ +# RUN: -path_exists hasFoo \ +# RUN: -path_exists hasFoo/libFoo.dylib \ +# RUN: -path_exists /hasBar \ +# RUN: -path_exists /hasBar/libBar.dylib \ +# RUN: -path_exists /SDK/hasFoo \ +# RUN: -path_exists /SDK/hasFoo/libFoo.dylib \ +# RUN: -path_exists /SDK/hasBar \ +# RUN: -path_exists /SDK/hasBar/libBar.dylib \ +# RUN: -syslibroot /SDK \ +# RUN: -L hasFoo \ +# RUN: -L /hasBar \ +# RUN: -lFoo -lBar \ +# RUN: 2>&1 | FileCheck %s + +# CHECK: Library search paths: +# CHECK: hasFoo +# CHECK: /SDK/hasBar +# CHECK: Found library hasFoo/libFoo.dylib +# CHECK: Found library /SDK/hasBar/libBar.dylib diff --git a/lld/test/mach-o/libresolve-z.yaml b/lld/test/mach-o/libresolve-z.yaml new file mode 100644 index 000000000000..aaf98ebec46b --- /dev/null +++ b/lld/test/mach-o/libresolve-z.yaml @@ -0,0 +1,21 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r -test_file_usage -v \ +# RUN: -path_exists /usr/lib \ +# RUN: -path_exists /usr/local/lib \ +# RUN: -path_exists /usr/lib/libSystem.dylib \ +# RUN: -path_exists hasFoo \ +# RUN: -path_exists hasFoo/libFoo.dylib \ +# RUN: -path_exists /hasBar \ +# RUN: -path_exists /hasBar/libBar.dylib \ +# RUN: -L hasFoo \ +# RUN: -L /hasBar \ +# RUN: -Z \ +# RUN: -lFoo -lBar \ +# RUN: 2>&1 | FileCheck %s + +# CHECK: Library search paths: +# CHECK: hasFoo +# CHECK: /hasBar +# CHECK-NOT: /usr/lib +# CHECK-NOT: /usr/local/lib +# CHECK: Found library hasFoo/libFoo.dylib +# CHECK: Found library /hasBar/libBar.dylib diff --git a/lld/test/mach-o/lit.local.cfg b/lld/test/mach-o/lit.local.cfg new file mode 100644 index 000000000000..ccbf4e12fbf0 --- /dev/null +++ b/lld/test/mach-o/lit.local.cfg @@ -0,0 +1,4 @@ + +# mach-o test cases encode input files in yaml and use .yaml extension +config.suffixes = ['.yaml'] +config.excludes = ['Inputs'] diff --git a/lld/test/mach-o/load-commands-size.yaml b/lld/test/mach-o/load-commands-size.yaml new file mode 100644 index 000000000000..25314d7eb4ba --- /dev/null +++ b/lld/test/mach-o/load-commands-size.yaml @@ -0,0 +1,305 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %s -o %t -dylib \ +# RUN: -macosx_version_min 10.10 -sdk_version 10.10 \ +# RUN: -install_name /usr/lib/foo.dylib \ +# RUN: %p/Inputs/x86_64/libSystem.yaml && \ +# RUN: llvm-readobj %t + +# (Tests that lld doesn't crash or produce an invalid file.) + +--- !native +path: '' +defined-atoms: + - name: _foo + scope: global + type: unknown + content: [ 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00 ] + alignment: 16 + section-choice: custom-required + section-name: '__TEXT/__foo' diff --git a/lld/test/mach-o/mach_header-cpusubtype.yaml b/lld/test/mach-o/mach_header-cpusubtype.yaml new file mode 100644 index 000000000000..7c97e4063e2d --- /dev/null +++ b/lld/test/mach-o/mach_header-cpusubtype.yaml @@ -0,0 +1,34 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.4 %s %p/Inputs/hello-world-x86_64.yaml -o %t && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_LIB64 +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.5 %s %p/Inputs/hello-world-x86_64.yaml -o %t && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=LIB64 +# RUN: ld64.lld.darwinold -arch x86_64 -dylib -macosx_version_min 10.5 %s %p/Inputs/hello-world-x86_64.yaml -o %t && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=DYLIB + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x00, 0x00 ] +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: start + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000001 + +... + +# NO_LIB64: MH_MAGIC_64 X86_64 ALL 0x00 EXECUTE +# LIB64: MH_MAGIC_64 X86_64 ALL LIB64 EXECUTE +# DYLIB: MH_MAGIC_64 X86_64 ALL 0x00 DYLIB diff --git a/lld/test/mach-o/mh_bundle_header.yaml b/lld/test/mach-o/mh_bundle_header.yaml new file mode 100644 index 000000000000..3db78b103eab --- /dev/null +++ b/lld/test/mach-o/mh_bundle_header.yaml @@ -0,0 +1,54 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %s -bundle -o %t %p/Inputs/x86_64/libSystem.yaml && llvm-nm -m -n %t | FileCheck %s +# RUN: ld64.lld.darwinold -arch x86_64 %s -bundle -dead_strip -o %t %p/Inputs/x86_64/libSystem.yaml && llvm-nm -m -n %t | FileCheck %s +# +# Test that __mh_bundle_header symbol is available for bundles +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0x5D, 0xC3 ] + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + alignment: 8 + address: 0x0000000000000008 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000000 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 2 +global-symbols: + - name: _d + type: N_SECT + scope: [ N_EXT ] + sect: 2 + value: 0x0000000000000008 + - name: _foo + type: N_SECT + scope: [ N_EXT ] + sect: 1 + desc: [ N_NO_DEAD_STRIP ] + value: 0x0000000000000000 +undefined-symbols: + - name: __mh_bundle_header + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + + +... + +# CHECK: __mh_bundle_header +# CHECK: _foo diff --git a/lld/test/mach-o/mh_dylib_header.yaml b/lld/test/mach-o/mh_dylib_header.yaml new file mode 100644 index 000000000000..ce03d3b3c487 --- /dev/null +++ b/lld/test/mach-o/mh_dylib_header.yaml @@ -0,0 +1,53 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %s -dylib -o %t %p/Inputs/x86_64/libSystem.yaml +# RUN: llvm-nm -m -n %t | FileCheck %s +# +# Test that __mh_dylib_header symbol is available for dylibs +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0x5D, 0xC3 ] + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + alignment: 8 + address: 0x0000000000000008 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000000 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 2 +global-symbols: + - name: _d + type: N_SECT + scope: [ N_EXT ] + sect: 2 + value: 0x0000000000000008 + - name: _foo + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: __mh_dylib_header + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + + +... + +# CHECK_NOT: __mh_dylib_header +# CHECK: _foo diff --git a/lld/test/mach-o/objc-category-list-atom.yaml b/lld/test/mach-o/objc-category-list-atom.yaml new file mode 100644 index 000000000000..cf4d4966903a --- /dev/null +++ b/lld/test/mach-o/objc-category-list-atom.yaml @@ -0,0 +1,70 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s +# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %t -o %t2 | FileCheck %s + + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +sections: + - segment: __DATA + section: __objc_catlist + type: S_REGULAR + attributes: [ S_ATTR_NO_DEAD_STRIP ] + alignment: 8 + address: 0x00000000000003F8 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000008 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 0 + - offset: 0x00000000 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 1 +undefined-symbols: + - name: __category1 + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: __category2 + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +page-size: 0x00000000 +... + +# Make sure we atomize the category list section by pointer sized atoms. + +# CHECK: path: '' +# CHECK: defined-atoms: +# CHECK: - type: objc-category-list +# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00 ] +# CHECK: merge: by-content +# CHECK: alignment: 8 +# CHECK: references: +# CHECK: - kind: pointer64 +# CHECK: offset: 0 +# CHECK: target: __category2 +# CHECK: - type: objc-category-list +# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00 ] +# CHECK: merge: by-content +# CHECK: alignment: 8 +# CHECK: references: +# CHECK: - kind: pointer64 +# CHECK: offset: 0 +# CHECK: target: __category1 +# CHECK: undefined-atoms: +# CHECK: - name: __category1 +# CHECK: - name: __category2 +# CHECK: ... diff --git a/lld/test/mach-o/objc-image-info-host-vs-simulator.yaml b/lld/test/mach-o/objc-image-info-host-vs-simulator.yaml new file mode 100644 index 000000000000..06913009936b --- /dev/null +++ b/lld/test/mach-o/objc-image-info-host-vs-simulator.yaml @@ -0,0 +1,23 @@ +# RUN: not ld64.lld.darwinold -arch x86_64 -r %s 2>&1 | FileCheck %s + +# The file is built for the host, but the objc image info flags are for +# the simulator. + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +sections: + - segment: __DATA + section: __objc_imageinfo + type: S_REGULAR + attributes: [ S_ATTR_NO_DEAD_STRIP ] + address: 0x0000000000000100 + content: [ 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00 ] +... + +# CHECK: {{.*}} cannot be linked. It contains ObjC built for the simulator while we are linking a non-simulator target \ No newline at end of file diff --git a/lld/test/mach-o/objc-image-info-invalid-size.yaml b/lld/test/mach-o/objc-image-info-invalid-size.yaml new file mode 100644 index 000000000000..ea00bfae077d --- /dev/null +++ b/lld/test/mach-o/objc-image-info-invalid-size.yaml @@ -0,0 +1,20 @@ +# RUN: not ld64.lld.darwinold -arch x86_64 -r %s 2>&1 | FileCheck %s + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +sections: + - segment: __DATA + section: __objc_imageinfo + type: S_REGULAR + attributes: [ S_ATTR_NO_DEAD_STRIP ] + address: 0x0000000000000100 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] +... + +# CHECK: error: __DATA/__objc_imageinfo in file {{.*}} should be 8 bytes in size \ No newline at end of file diff --git a/lld/test/mach-o/objc-image-info-invalid-version.yaml b/lld/test/mach-o/objc-image-info-invalid-version.yaml new file mode 100644 index 000000000000..c64206c436e8 --- /dev/null +++ b/lld/test/mach-o/objc-image-info-invalid-version.yaml @@ -0,0 +1,20 @@ +# RUN: not ld64.lld.darwinold -arch x86_64 -r %s 2>&1 | FileCheck %s + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +sections: + - segment: __DATA + section: __objc_imageinfo + type: S_REGULAR + attributes: [ S_ATTR_NO_DEAD_STRIP ] + address: 0x0000000000000100 + content: [ 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00 ] +... + +# CHECK: error: __DATA/__objc_imageinfo in file {{.*}} should have version=0 \ No newline at end of file diff --git a/lld/test/mach-o/objc-image-info-mismatched-swift-version.yaml b/lld/test/mach-o/objc-image-info-mismatched-swift-version.yaml new file mode 100644 index 000000000000..35539ca21cac --- /dev/null +++ b/lld/test/mach-o/objc-image-info-mismatched-swift-version.yaml @@ -0,0 +1,20 @@ +# RUN: not ld64.lld.darwinold -arch x86_64 -r %s %p/Inputs/swift-version-1.yaml 2>&1 | FileCheck %s + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +sections: + - segment: __DATA + section: __objc_imageinfo + type: S_REGULAR + attributes: [ S_ATTR_NO_DEAD_STRIP ] + address: 0x0000000000000100 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00 ] +... + +# CHECK: different swift versions \ No newline at end of file diff --git a/lld/test/mach-o/objc-image-info-pass-output.yaml b/lld/test/mach-o/objc-image-info-pass-output.yaml new file mode 100644 index 000000000000..7f7953e3c892 --- /dev/null +++ b/lld/test/mach-o/objc-image-info-pass-output.yaml @@ -0,0 +1,30 @@ +# RUN: ld64.lld.darwinold -ios_simulator_version_min 5.0 -arch x86_64 -r %s -o %t -print_atoms | FileCheck %s + +# Make sure that we have an objc image info in the output. It should have +# been generated by the objc pass. + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +sections: + - segment: __DATA + section: __objc_imageinfo + type: S_REGULAR + attributes: [ S_ATTR_NO_DEAD_STRIP ] + address: 0x0000000000000100 + content: [ 0x00, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00 ] +... + +# CHECK: --- !native +# CHECK: path: '' +# CHECK: defined-atoms: +# CHECK: - scope: hidden +# CHECK: type: objc-image-info +# CHECK: content: [ 00, 00, 00, 00, 20, 02, 00, 00 ] +# CHECK: alignment: 4 +# CHECK: ... \ No newline at end of file diff --git a/lld/test/mach-o/objc-image-info-simulator-vs-host.yaml b/lld/test/mach-o/objc-image-info-simulator-vs-host.yaml new file mode 100644 index 000000000000..37e5f7489d80 --- /dev/null +++ b/lld/test/mach-o/objc-image-info-simulator-vs-host.yaml @@ -0,0 +1,23 @@ +# RUN: not ld64.lld.darwinold -ios_simulator_version_min 5.0 -arch x86_64 -r %s 2>&1 | FileCheck %s + +# The file is built for the simulator, but the objc image info flags are for +# the host. + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +sections: + - segment: __DATA + section: __objc_imageinfo + type: S_REGULAR + attributes: [ S_ATTR_NO_DEAD_STRIP ] + address: 0x0000000000000100 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] +... + +# CHECK: {{.*}} cannot be linked. It contains ObjC built for a non-simulator target while we are linking a simulator target \ No newline at end of file diff --git a/lld/test/mach-o/objc-image-info-unsupported-gc.yaml b/lld/test/mach-o/objc-image-info-unsupported-gc.yaml new file mode 100644 index 000000000000..4615e7ebed49 --- /dev/null +++ b/lld/test/mach-o/objc-image-info-unsupported-gc.yaml @@ -0,0 +1,20 @@ +# RUN: not ld64.lld.darwinold -arch x86_64 -r %s 2>&1 | FileCheck %s + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +sections: + - segment: __DATA + section: __objc_imageinfo + type: S_REGULAR + attributes: [ S_ATTR_NO_DEAD_STRIP ] + address: 0x0000000000000100 + content: [ 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00 ] +... + +# CHECK: error: __DATA/__objc_imageinfo in file {{.*}} uses GC. This is not supported \ No newline at end of file diff --git a/lld/test/mach-o/objc_export_list.yaml b/lld/test/mach-o/objc_export_list.yaml new file mode 100644 index 000000000000..1629c1880130 --- /dev/null +++ b/lld/test/mach-o/objc_export_list.yaml @@ -0,0 +1,63 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -dylib %s -o %t \ +# RUN: -exported_symbol .objc_class_name_Foo %p/Inputs/x86_64/libSystem.yaml +# RUN: llvm-nm -m %t | FileCheck %s +# +# Test that exported objc classes can be specificed using old naming +# (.e.g .objc_class_name_Foo instead of _OBJC_CLASS_$_Foo) +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __DATA + section: __objc_data + type: S_REGULAR + attributes: [ ] + alignment: 8 + address: 0x0000000000000000 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000030 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 0 + - offset: 0x00000028 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 1 + - offset: 0x00000000 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 1 +global-symbols: + - name: '_OBJC_CLASS_$_Foo' + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: '_OBJC_METACLASS_$_Foo' + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000028 +... + +# CHECK: (__DATA,__objc_data) external _OBJC_CLASS_$_Foo +# CHECK: (__DATA,__objc_data) external _OBJC_METACLASS_$_Foo diff --git a/lld/test/mach-o/order_file-basic.yaml b/lld/test/mach-o/order_file-basic.yaml new file mode 100644 index 000000000000..9dc1c009921f --- /dev/null +++ b/lld/test/mach-o/order_file-basic.yaml @@ -0,0 +1,75 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/x86_64/libSystem.yaml \ +# RUN: -order_file %p/Inputs/order_file-basic.order \ +# RUN: -force_load %p/Inputs/libfoo.a -o %t +# RUN: llvm-nm -m -n %t | FileCheck %s +# +# Test -order_file +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0xC3, 0xC3, 0xC3, 0xC3 ] + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + alignment: 2 + address: 0x0000000000000014 + content: [ 0x05, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00 ] +global-symbols: + - name: _data1 + type: N_SECT + scope: [ N_EXT ] + sect: 2 + value: 0x0000000000000014 + - name: _data2 + type: N_SECT + scope: [ N_EXT ] + sect: 2 + value: 0x0000000000000018 + - name: _data3 + type: N_SECT + scope: [ N_EXT ] + sect: 2 + value: 0x000000000000001C + - name: _func1 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: _func2 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000001 + - name: _func3 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000002 + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000003 +... + + +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _func2 +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _foo +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _func1 +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _func3 +# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main +# CHECK: {{[0-9a-f]+}} (__DATA,__data) external _data3 +# CHECK: {{[0-9a-f]+}} (__DATA,__data) external _data1 +# CHECK: {{[0-9a-f]+}} (__DATA,__data) external _data2 + diff --git a/lld/test/mach-o/parse-aliases.yaml b/lld/test/mach-o/parse-aliases.yaml new file mode 100644 index 000000000000..59dcb546c7c6 --- /dev/null +++ b/lld/test/mach-o/parse-aliases.yaml @@ -0,0 +1,90 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s +# +# Test multiple labels to same address parse into aliases. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 4 + address: 0x0000000000000000 + content: [ 0xCC, 0xC3 ] +local-symbols: + - name: _pad + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: _myStaticAlias1 + type: N_SECT + sect: 1 + value: 0x0000000000000001 + - name: _myStaticAlias3 + type: N_SECT + sect: 1 + value: 0x0000000000000001 + - name: _myStaticAlias2 + type: N_SECT + sect: 1 + value: 0x0000000000000001 +global-symbols: + - name: _myGlobalFunc1 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000001 + - name: _myGlobalFunc2 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000001 + - name: _myGlobalFunc3 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000001 + - name: _myHiddenAlias1 + type: N_SECT + scope: [ N_EXT, N_PEXT ] + sect: 1 + value: 0x0000000000000001 + - name: _myHiddenAlias2 + type: N_SECT + scope: [ N_EXT, N_PEXT ] + sect: 1 + value: 0x0000000000000001 + - name: _myHiddenAlias3 + type: N_SECT + scope: [ N_EXT, N_PEXT ] + sect: 1 + value: 0x0000000000000001 +... + +# CHECK: defined-atoms: +# CHECK: - name: _pad +# CHECK: scope: global +# CHECK: content: [ CC ] +# CHECK: - name: _myStaticAlias1 +# CHECK: - name: _myStaticAlias2 +# CHECK: - name: _myStaticAlias3 +# CHECK: - name: _myHiddenAlias1 +# CHECK: scope: hidden +# CHECK: - name: _myHiddenAlias2 +# CHECK: scope: hidden +# CHECK: - name: _myHiddenAlias3 +# CHECK: scope: hidden +# CHECK: - name: _myGlobalFunc1 +# CHECK: scope: global +# CHECK: - name: _myGlobalFunc2 +# CHECK: scope: global +# CHECK: - name: _myGlobalFunc3 +# CHECK: scope: global +# CHECK: content: [ C3 ] diff --git a/lld/test/mach-o/parse-arm-relocs.yaml b/lld/test/mach-o/parse-arm-relocs.yaml new file mode 100644 index 000000000000..26e1dcf9dae7 --- /dev/null +++ b/lld/test/mach-o/parse-arm-relocs.yaml @@ -0,0 +1,818 @@ +# RUN: ld64.lld.darwinold -arch armv7 -r -print_atoms %s -o %t | FileCheck %s +# RUN: ld64.lld.darwinold -arch armv7 -r -print_atoms %t -o %t2 | FileCheck %s +# +# Test parsing of armv7 relocations. +# +# + +--- !mach-o +arch: armv7 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 2 + address: 0x0000000000000000 + content: [ 0x00, 0xF0, 0x4E, 0xF8, 0x00, 0xF0, 0x4E, 0xF8, + 0xFF, 0xF7, 0xFA, 0xFF, 0xFF, 0xF7, 0xFA, 0xFF, + 0xFF, 0xF7, 0xF6, 0xBF, 0x40, 0xF2, 0x72, 0x01, + 0xC0, 0xF2, 0x00, 0x01, 0x40, 0xF2, 0x7A, 0x02, + 0xC0, 0xF2, 0x00, 0x02, 0x40, 0xF2, 0x29, 0x01, + 0xC0, 0xF2, 0x00, 0x01, 0x79, 0x44, 0x40, 0xF2, + 0xA0, 0x03, 0xC0, 0xF2, 0x00, 0x03, 0x40, 0xF2, + 0xA8, 0x04, 0xC0, 0xF2, 0x00, 0x04, 0x40, 0xF2, + 0x57, 0x03, 0xC0, 0xF2, 0x00, 0x03, 0x40, 0xF2, + 0x00, 0x05, 0xC0, 0xF2, 0x00, 0x05, 0x40, 0xF2, + 0x08, 0x06, 0xC0, 0xF2, 0x00, 0x06, 0xC0, 0x46, + 0x10, 0x00, 0x00, 0xEB, 0x10, 0x00, 0x00, 0xEB, + 0xE6, 0xFF, 0xFF, 0xEB, 0xE6, 0xFF, 0xFF, 0xEB, + 0xE4, 0xFF, 0xFF, 0xEA, 0x20, 0x10, 0x00, 0xE3, + 0x00, 0x10, 0x40, 0xE3, 0x28, 0x20, 0x00, 0xE3, + 0x00, 0x20, 0x40, 0xE3, 0x0F, 0x10, 0x81, 0xE0, + 0xA0, 0x30, 0x00, 0xE3, 0x00, 0x30, 0x40, 0xE3, + 0xA8, 0x40, 0x00, 0xE3, 0x00, 0x40, 0x40, 0xE3, + 0x00, 0x50, 0x00, 0xE3, 0x00, 0x50, 0x40, 0xE3, + 0x08, 0x60, 0x00, 0xE3, 0x00, 0x60, 0x40, 0xE3 ] + relocations: + - offset: 0x0000009C + type: ARM_RELOC_HALF + length: 1 + pc-rel: false + extern: true + symbol: 4 + - offset: 0x00000008 + type: ARM_RELOC_PAIR + length: 1 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x00000098 + type: ARM_RELOC_HALF + length: 0 + pc-rel: false + extern: true + symbol: 4 + - offset: 0x00000000 + type: ARM_RELOC_PAIR + length: 0 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x00000094 + type: ARM_RELOC_HALF + length: 1 + pc-rel: false + extern: true + symbol: 4 + - offset: 0x00000000 + type: ARM_RELOC_PAIR + length: 1 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x00000090 + type: ARM_RELOC_HALF + length: 0 + pc-rel: false + extern: true + symbol: 4 + - offset: 0x00000000 + type: ARM_RELOC_PAIR + length: 0 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x0000008C + scattered: true + type: ARM_RELOC_HALF + length: 1 + pc-rel: false + value: 0x000000A0 + - offset: 0x000000A8 + type: ARM_RELOC_PAIR + length: 1 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x00000088 + scattered: true + type: ARM_RELOC_HALF + length: 0 + pc-rel: false + value: 0x000000A0 + - offset: 0x00000000 + type: ARM_RELOC_PAIR + length: 0 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x00000084 + type: ARM_RELOC_HALF + length: 1 + pc-rel: false + extern: false + symbol: 2 + - offset: 0x000000A0 + type: ARM_RELOC_PAIR + length: 1 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x00000080 + type: ARM_RELOC_HALF + length: 0 + pc-rel: false + extern: false + symbol: 2 + - offset: 0x00000000 + type: ARM_RELOC_PAIR + length: 0 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x00000078 + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 1 + pc-rel: false + value: 0x000000A0 + - offset: 0x00000028 + scattered: true + type: ARM_RELOC_PAIR + length: 1 + pc-rel: false + value: 0x00000080 + - offset: 0x00000074 + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 0 + pc-rel: false + value: 0x000000A0 + - offset: 0x00000000 + scattered: true + type: ARM_RELOC_PAIR + length: 0 + pc-rel: false + value: 0x00000080 + - offset: 0x00000070 + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 1 + pc-rel: false + value: 0x000000A0 + - offset: 0x00000020 + scattered: true + type: ARM_RELOC_PAIR + length: 1 + pc-rel: false + value: 0x00000080 + - offset: 0x0000006C + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 0 + pc-rel: false + value: 0x000000A0 + - offset: 0x00000000 + scattered: true + type: ARM_RELOC_PAIR + length: 0 + pc-rel: false + value: 0x00000080 + - offset: 0x00000068 + type: ARM_RELOC_BR24 + length: 2 + pc-rel: true + extern: true + symbol: 4 + - offset: 0x00000064 + type: ARM_RELOC_BR24 + length: 2 + pc-rel: true + extern: true + symbol: 4 + - offset: 0x00000060 + type: ARM_RELOC_BR24 + length: 2 + pc-rel: true + extern: true + symbol: 4 + - offset: 0x0000005C + scattered: true + type: ARM_RELOC_BR24 + length: 2 + pc-rel: true + value: 0x000000A0 + - offset: 0x00000058 + type: ARM_RELOC_BR24 + length: 2 + pc-rel: true + extern: false + symbol: 2 + - offset: 0x00000052 + type: ARM_RELOC_HALF + length: 3 + pc-rel: false + extern: true + symbol: 4 + - offset: 0x00000008 + type: ARM_RELOC_PAIR + length: 3 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x0000004E + type: ARM_RELOC_HALF + length: 2 + pc-rel: false + extern: true + symbol: 4 + - offset: 0x00000000 + type: ARM_RELOC_PAIR + length: 2 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x0000004A + type: ARM_RELOC_HALF + length: 3 + pc-rel: false + extern: true + symbol: 4 + - offset: 0x00000000 + type: ARM_RELOC_PAIR + length: 3 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x00000046 + type: ARM_RELOC_HALF + length: 2 + pc-rel: false + extern: true + symbol: 4 + - offset: 0x00000000 + type: ARM_RELOC_PAIR + length: 2 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x00000042 + type: ARM_RELOC_HALF + length: 3 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000057 + type: ARM_RELOC_PAIR + length: 3 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x0000003E + type: ARM_RELOC_HALF + length: 2 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000000 + type: ARM_RELOC_PAIR + length: 2 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x0000003A + scattered: true + type: ARM_RELOC_HALF + length: 3 + pc-rel: false + value: 0x000000A0 + - offset: 0x000000A8 + type: ARM_RELOC_PAIR + length: 3 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x00000036 + scattered: true + type: ARM_RELOC_HALF + length: 2 + pc-rel: false + value: 0x000000A0 + - offset: 0x00000000 + type: ARM_RELOC_PAIR + length: 2 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x00000032 + type: ARM_RELOC_HALF + length: 3 + pc-rel: false + extern: false + symbol: 2 + - offset: 0x000000A0 + type: ARM_RELOC_PAIR + length: 3 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x0000002E + type: ARM_RELOC_HALF + length: 2 + pc-rel: false + extern: false + symbol: 2 + - offset: 0x00000000 + type: ARM_RELOC_PAIR + length: 2 + pc-rel: false + extern: false + symbol: 16777215 + - offset: 0x00000028 + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 3 + pc-rel: false + value: 0x00000056 + - offset: 0x00000028 + scattered: true + type: ARM_RELOC_PAIR + length: 3 + pc-rel: false + value: 0x0000002E + - offset: 0x00000024 + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000056 + - offset: 0x00000000 + scattered: true + type: ARM_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x0000002E + - offset: 0x00000020 + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 3 + pc-rel: false + value: 0x000000A0 + - offset: 0x0000007A + scattered: true + type: ARM_RELOC_PAIR + length: 3 + pc-rel: false + value: 0x0000002E + - offset: 0x0000001C + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 2 + pc-rel: false + value: 0x000000A0 + - offset: 0x00000000 + scattered: true + type: ARM_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x0000002E + - offset: 0x00000018 + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 3 + pc-rel: false + value: 0x000000A0 + - offset: 0x00000072 + scattered: true + type: ARM_RELOC_PAIR + length: 3 + pc-rel: false + value: 0x0000002E + - offset: 0x00000014 + scattered: true + type: ARM_RELOC_HALF_SECTDIFF + length: 2 + pc-rel: false + value: 0x000000A0 + - offset: 0x00000000 + scattered: true + type: ARM_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x0000002E + - offset: 0x00000010 + type: ARM_THUMB_RELOC_BR22 + length: 2 + pc-rel: true + extern: true + symbol: 4 + - offset: 0x0000000C + type: ARM_THUMB_RELOC_BR22 + length: 2 + pc-rel: true + extern: true + symbol: 4 + - offset: 0x00000008 + type: ARM_THUMB_RELOC_BR22 + length: 2 + pc-rel: true + extern: true + symbol: 4 + - offset: 0x00000004 + scattered: true + type: ARM_THUMB_RELOC_BR22 + length: 2 + pc-rel: true + value: 0x000000A0 + - offset: 0x00000000 + type: ARM_THUMB_RELOC_BR22 + length: 2 + pc-rel: true + extern: false + symbol: 2 + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + address: 0x00000000000000A0 + content: [ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0xA4, 0xFF, 0xFF, 0xFF, + 0xA4, 0xFF, 0xFF, 0xFF, 0x45, 0xFF, 0xFF, 0xFF, + 0x45, 0xFF, 0xFF, 0xFF ] + relocations: + - offset: 0x00000020 + scattered: true + type: ARM_RELOC_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000000 + - offset: 0x00000000 + scattered: true + type: ARM_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x000000C0 + - offset: 0x0000001C + scattered: true + type: ARM_RELOC_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000000 + - offset: 0x00000000 + scattered: true + type: ARM_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x000000BC + - offset: 0x00000018 + scattered: true + type: ARM_RELOC_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000058 + - offset: 0x00000000 + scattered: true + type: ARM_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x000000B8 + - offset: 0x00000014 + scattered: true + type: ARM_RELOC_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000058 + - offset: 0x00000000 + scattered: true + type: ARM_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x000000B4 + - offset: 0x00000010 + type: ARM_RELOC_VANILLA + length: 2 + pc-rel: false + extern: true + symbol: 4 + - offset: 0x0000000C + type: ARM_RELOC_VANILLA + length: 2 + pc-rel: false + extern: true + symbol: 4 + - offset: 0x00000008 + scattered: true + type: ARM_RELOC_VANILLA + length: 2 + pc-rel: false + value: 0x00000000 + - offset: 0x00000004 + type: ARM_RELOC_VANILLA + length: 2 + pc-rel: false + extern: false + symbol: 1 +local-symbols: + - name: _foo_thumb + type: N_SECT + sect: 1 + desc: [ N_ARM_THUMB_DEF ] + value: 0x0000000000000000 + - name: _x + type: N_SECT + sect: 2 + value: 0x00000000000000A0 + - name: _t1 + type: N_SECT + sect: 1 + desc: [ N_ARM_THUMB_DEF ] + value: 0x0000000000000056 + - name: _foo_arm + type: N_SECT + sect: 1 + value: 0x0000000000000058 +undefined-symbols: + - name: _undef + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + +# CHECK: defined-atoms: +# CHECK: - name: _x +# CHECK: type: data +# CHECK: references: +# CHECK: - kind: pointer32 +# CHECK: offset: 4 +# CHECK: target: _foo_thumb +# CHECK-NOT: addend: +# CHECK: - kind: pointer32 +# CHECK: offset: 8 +# CHECK: target: _foo_thumb +# CHECK: addend: 4 +# CHECK: - kind: pointer32 +# CHECK: offset: 12 +# CHECK: target: _undef +# CHECK-NOT: addend: +# CHECK: - kind: pointer32 +# CHECK: offset: 16 +# CHECK: target: _undef +# CHECK: addend: 4 +# CHECK: - kind: delta32 +# CHECK: offset: 20 +# CHECK: target: _foo_arm +# CHECK-NOT: addend: +# CHECK: - kind: delta32 +# CHECK: offset: 24 +# CHECK: target: _foo_arm +# CHECK: addend: 4 +# CHECK: - kind: delta32 +# CHECK: offset: 28 +# CHECK: target: _foo_thumb +# CHECK-NOT: addend: +# CHECK: - kind: delta32 +# CHECK: offset: 32 +# CHECK: target: _foo_thumb +# CHECK: addend: 4 +# CHECK: - name: _foo_thumb +# CHECK: references: +# CHECK: - kind: modeThumbCode +# CHECK: offset: 0 +# CHECK: - kind: thumb_bl22 +# CHECK: offset: 0 +# CHECK: target: _x +# CHECK-NOT: addend: +# CHECK: - kind: thumb_bl22 +# CHECK: offset: 4 +# CHECK: target: _x +# CHECK: addend: 4 +# CHECK: - kind: thumb_bl22 +# CHECK: offset: 8 +# CHECK: target: _undef +# CHECK-NOT: addend: +# CHECK: - kind: thumb_bl22 +# CHECK: offset: 12 +# CHECK: target: _undef +# CHECK: addend: 4 +# CHECK: - kind: thumb_b22 +# CHECK: offset: 16 +# CHECK: target: _undef +# CHECK-NOT: addend: +# CHECK: - kind: thumb_movw_funcRel +# CHECK: offset: 20 +# CHECK: target: _x +# CHECK: addend: -46 +# CHECK: - kind: thumb_movt_funcRel +# CHECK: offset: 24 +# CHECK: target: _x +# CHECK: addend: -46 +# CHECK: - kind: thumb_movw_funcRel +# CHECK: offset: 28 +# CHECK: target: _x +# CHECK: addend: -38 +# CHECK: - kind: thumb_movt_funcRel +# CHECK: offset: 32 +# CHECK: target: _x +# CHECK: addend: -38 +# CHECK: - kind: thumb_movw_funcRel +# CHECK: offset: 36 +# CHECK: target: _t1 +# CHECK: addend: -46 +# CHECK: - kind: thumb_movt_funcRel +# CHECK: offset: 40 +# CHECK: target: _t1 +# CHECK: addend: -46 +# CHECK: - kind: thumb_movw +# CHECK: offset: 46 +# CHECK: target: _x +# CHECK-NOT: addend: +# CHECK: - kind: thumb_movt +# CHECK: offset: 50 +# CHECK: target: _x +# CHECK-NOT: addend: +# CHECK: - kind: thumb_movw +# CHECK: offset: 54 +# CHECK: target: _x +# CHECK: addend: 8 +# CHECK: - kind: thumb_movt +# CHECK: offset: 58 +# CHECK: target: _x +# CHECK: addend: 8 +# CHECK: - kind: thumb_movw +# CHECK: offset: 62 +# CHECK: target: _t1 +# CHECK-NOT: addend: +# CHECK: - kind: thumb_movt +# CHECK: offset: 66 +# CHECK: target: _t1 +# CHECK-NOT: addend: +# CHECK: - kind: thumb_movw +# CHECK: offset: 70 +# CHECK: target: _undef +# CHECK-NOT: addend: +# CHECK: - kind: thumb_movt +# CHECK: offset: 74 +# CHECK: target: _undef +# CHECK-NOT: addend: +# CHECK: - kind: thumb_movw +# CHECK: offset: 78 +# CHECK: target: _undef +# CHECK: addend: 8 +# CHECK: - kind: thumb_movt +# CHECK: offset: 82 +# CHECK: target: _undef +# CHECK: addend: 8 +# CHECK: - name: _t1 +# CHECK: content: [ C0, 46 ] +# CHECK: references: +# CHECK: - kind: modeThumbCode +# CHECK: offset: 0 +# CHECK: - name: _foo_arm +# CHECK: references: +# CHECK-NOT: - kind: modeThumbCode +# CHECK: - kind: arm_bl24 +# CHECK: offset: 0 +# CHECK: target: _x +# CHECK-NOT: addend: +# CHECK: - kind: arm_bl24 +# CHECK: offset: 4 +# CHECK: target: _x +# CHECK: addend: 4 +# CHECK: - kind: arm_bl24 +# CHECK: offset: 8 +# CHECK: target: _undef +# CHECK-NOT: addend: +# CHECK: - kind: arm_bl24 +# CHECK: offset: 12 +# CHECK: target: _undef +# CHECK: addend: 4 +# CHECK: - kind: arm_b24 +# CHECK: offset: 16 +# CHECK: target: _undef +# CHECK-NOT: addend: +# CHECK: - kind: arm_movw_funcRel +# CHECK: offset: 20 +# CHECK: target: _x +# CHECK: addend: -40 +# CHECK: - kind: arm_movt_funcRel +# CHECK: offset: 24 +# CHECK: target: _x +# CHECK: addend: -40 +# CHECK: - kind: arm_movw_funcRel +# CHECK: offset: 28 +# CHECK: target: _x +# CHECK: addend: -32 +# CHECK: - kind: arm_movt_funcRel +# CHECK: offset: 32 +# CHECK: target: _x +# CHECK: addend: -32 +# CHECK: - kind: arm_movw +# CHECK: offset: 40 +# CHECK: target: _x +# CHECK-NOT: addend: +# CHECK: - kind: arm_movt +# CHECK: offset: 44 +# CHECK: target: _x +# CHECK-NOT: addend: +# CHECK: - kind: arm_movw +# CHECK: offset: 48 +# CHECK: target: _x +# CHECK: addend: 8 +# CHECK: - kind: arm_movt +# CHECK: offset: 52 +# CHECK: target: _x +# CHECK: addend: 8 +# CHECK: - kind: arm_movw +# CHECK: offset: 56 +# CHECK: target: _undef +# CHECK-NOT: addend: +# CHECK: - kind: arm_movt +# CHECK: offset: 60 +# CHECK: target: _undef +# CHECK-NOT: addend: +# CHECK: - kind: arm_movw +# CHECK: offset: 64 +# CHECK: target: _undef +# CHECK: addend: 8 +# CHECK: - kind: arm_movt +# CHECK: offset: 68 +# CHECK: target: _undef +# CHECK: addend: 8 +# CHECK: undefined-atoms: +# CHECK: - name: _undef + + + + +# .align 2 +# .code 16 +# .thumb_func _foo_thumb +#_foo_thumb: +# bl _x +# bl _x+4 +# bl _undef +# bl _undef+4 +# b _undef +# movw r1, :lower16:(_x-L1) +# movt r1, :upper16:(_x-L1) +# movw r2, :lower16:(_x+8-L1) +# movt r2, :upper16:(_x+8-L1) +# movw r1, :lower16:(_t1-L1) +# movt r1, :upper16:(_t1-L1) +# add r1, pc +#L1: +# movw r3, :lower16:_x +# movt r3, :upper16:_x +# movw r4, :lower16:_x+8 +# movt r4, :upper16:_x+8 +# movw r3, :lower16:_t1 +# movt r3, :upper16:_t1 +# movw r5, :lower16:_undef +# movt r5, :upper16:_undef +# movw r6, :lower16:_undef+8 +# movt r6, :upper16:_undef+8 +# +# .thumb_func _t1 +#_t1: +# nop +# +# +# .code 32 +# .align 2 +#_foo_arm: +# bl _x +# bl _x+4 +# bl _undef +# bl _undef+4 +# b _undef +# movw r1, :lower16:(_x-L2) +# movt r1, :upper16:(_x-L2) +# movw r2, :lower16:(_x+8-L2) +# movt r2, :upper16:(_x+8-L2) +# add r1, pc +#L2: +# movw r3, :lower16:_x +# movt r3, :upper16:_x +# movw r4, :lower16:_x+8 +# movt r4, :upper16:_x+8 +# movw r5, :lower16:_undef +# movt r5, :upper16:_undef +# movw r6, :lower16:_undef+8 +# movt r6, :upper16:_undef+8 +# +# +# .data +#_x: .long 0 +# .long _foo_thumb +# .long _foo_thumb+4 +# .long _undef +# .long _undef+4 +# .long _foo_arm - . +# .long _foo_arm+4- . +# .long _foo_thumb - . +# .long _foo_thumb+4 - . +# diff --git a/lld/test/mach-o/parse-cfstring32.yaml b/lld/test/mach-o/parse-cfstring32.yaml new file mode 100644 index 000000000000..aee244ab931b --- /dev/null +++ b/lld/test/mach-o/parse-cfstring32.yaml @@ -0,0 +1,94 @@ +# RUN: ld64.lld.darwinold -arch i386 -r -print_atoms %s -o %t | FileCheck %s +# +# Test parsing of mach-o functions. +# + +--- !mach-o +arch: x86 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __cstring + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x0000000000000000 + content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x00, 0x74, 0x68, + 0x65, 0x72, 0x65, 0x00 ] + - segment: __DATA + section: __cfstring + type: S_REGULAR + attributes: [ ] + alignment: 8 + address: 0x0000000000000010 + content: [ 0x00, 0x00, 0x00, 0x00, 0xC8, 0x07, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xC8, 0x07, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000018 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000010 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + extern: true + symbol: 0 + - offset: 0x00000008 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000000 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + extern: true + symbol: 0 +undefined-symbols: + - name: ___CFConstantStringClassReference + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + +# CHECK: defined-atoms: +# CHECK: - ref-name: [[STR1:L[L0-9]+]] +# CHECK: scope: hidden +# CHECK: type: c-string +# CHECK: content: [ 68, 65, 6C, 6C, 6F, 00 ] +# CHECK: merge: by-content +# CHECK: - ref-name: [[STR2:L[L0-9]+]] +# CHECK: scope: hidden +# CHECK: type: c-string +# CHECK: content: [ 74, 68, 65, 72, 65, 00 ] +# CHECK: merge: by-content +# CHECK: - scope: hidden +# CHECK: type: cfstring +# CHECK: merge: by-content +# CHECK: references: +# CHECK: - kind: pointer32 +# CHECK: offset: 0 +# CHECK: target: ___CFConstantStringClassReference +# CHECK: - kind: pointer32 +# CHECK: offset: 8 +# CHECK: target: [[STR1]] +# CHECK: - scope: hidden +# CHECK: type: cfstring +# CHECK: merge: by-content +# CHECK: references: +# CHECK: - kind: pointer32 +# CHECK: offset: 0 +# CHECK: target: ___CFConstantStringClassReference +# CHECK: - kind: pointer32 +# CHECK: offset: 8 +# CHECK: target: [[STR2]] +# CHECK:undefined-atoms: +# CHECK: - name: ___CFConstantStringClassReference diff --git a/lld/test/mach-o/parse-cfstring64.yaml b/lld/test/mach-o/parse-cfstring64.yaml new file mode 100644 index 000000000000..2725047a5b6f --- /dev/null +++ b/lld/test/mach-o/parse-cfstring64.yaml @@ -0,0 +1,108 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s +# +# Test parsing of CFString constants. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __cstring + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x0000000000000000 + content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x00, 0x74, 0x68, + 0x65, 0x72, 0x65, 0x00 ] + - segment: __DATA + section: __cfstring + type: S_REGULAR + attributes: [ ] + alignment: 4 + address: 0x0000000000000010 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xC8, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xC8, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000030 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 1 + - offset: 0x00000020 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000010 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 0 + - offset: 0x00000000 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 2 +local-symbols: + - name: Lstr1 + type: N_SECT + sect: 1 + value: 0x0000000000000000 + - name: Lstr2 + type: N_SECT + sect: 1 + value: 0x0000000000000006 +undefined-symbols: + - name: ___CFConstantStringClassReference + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + +# CHECK:defined-atoms: +# CHECK: - ref-name: L000 +# CHECK: scope: hidden +# CHECK: type: c-string +# CHECK: content: [ 68, 65, 6C, 6C, 6F, 00 ] +# CHECK: merge: by-content +# CHECK: - ref-name: L001 +# CHECK: scope: hidden +# CHECK: type: c-string +# CHECK: content: [ 74, 68, 65, 72, 65, 00 ] +# CHECK: merge: by-content +# CHECK: - scope: hidden +# CHECK: type: cfstring +# CHECK: merge: by-content +# CHECK: references: +# CHECK: - kind: pointer64 +# CHECK: offset: 0 +# CHECK: target: ___CFConstantStringClassReference +# CHECK: - kind: pointer64 +# CHECK: offset: 16 +# CHECK: target: L000 +# CHECK: - scope: hidden +# CHECK: type: cfstring +# CHECK: merge: by-content +# CHECK: references: +# CHECK: - kind: pointer64 +# CHECK: offset: 0 +# CHECK: target: ___CFConstantStringClassReference +# CHECK: - kind: pointer64 +# CHECK: offset: 16 +# CHECK: target: L001 +# CHECK:undefined-atoms: +# CHECK: - name: ___CFConstantStringClassReference + diff --git a/lld/test/mach-o/parse-compact-unwind32.yaml b/lld/test/mach-o/parse-compact-unwind32.yaml new file mode 100644 index 000000000000..3b0edff5cfca --- /dev/null +++ b/lld/test/mach-o/parse-compact-unwind32.yaml @@ -0,0 +1,72 @@ +# RUN: ld64.lld.darwinold -arch i386 -r -print_atoms %s -o %t | FileCheck %s +# +# Test parsing of __LD/__compact_unwind (compact unwind) section. +# + +--- !mach-o +arch: x86 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 4 + address: 0x0000000000000000 + content: [ 0x55, 0x89, 0xE5, 0xB8, 0x0A, 0x00, 0x00, 0x00, + 0x5D, 0xC3, 0x55, 0x89, 0xE5, 0xB8, 0x0A, 0x00, + 0x00, 0x00, 0x5D, 0xC3 ] + - segment: __LD + section: __compact_unwind + type: S_REGULAR + attributes: [ ] + alignment: 2 + address: 0x000000000000001C + content: [ 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000014 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000000 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + extern: false + symbol: 1 +global-symbols: + - name: __Z3barv + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x000000000000000A + - name: __Z3foov + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +... + +# CHECK: defined-atoms: +# CHECK: - type: compact-unwind +# CHECK: content: [ 00, 00, 00, 00, 0A, 00, 00, 00, 00, 00, 00, 01, +# CHECK: 00, 00, 00, 00, 00, 00, 00, 00 ] +# CHECK: - type: compact-unwind +# CHECK: content: [ 10, 00, 00, 00, 0A, 00, 00, 00, 00, 00, 00, 01, +# CHECK: 00, 00, 00, 00, 00, 00, 00, 00 ] +# CHECK: - name: __Z3foov +# CHECK: scope: global +# CHECK: content: [ 55, 89, E5, B8, 0A, 00, 00, 00, 5D, C3 ] +# CHECK: - name: __Z3barv +# CHECK: scope: global +# CHECK: content: [ 55, 89, E5, B8, 0A, 00, 00, 00, 5D, C3 ] + diff --git a/lld/test/mach-o/parse-compact-unwind64.yaml b/lld/test/mach-o/parse-compact-unwind64.yaml new file mode 100644 index 000000000000..a3d2cdfcaf3c --- /dev/null +++ b/lld/test/mach-o/parse-compact-unwind64.yaml @@ -0,0 +1,76 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s +# +# Test parsing of __LD/__compact_unwind (compact unwind) section. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 4 + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0xB8, 0x0A, 0x00, 0x00, + 0x00, 0x5D, 0xC3, 0x55, 0x48, 0x89, 0xE5, 0xB8, + 0x0A, 0x00, 0x00, 0x00, 0x5D, 0xC3 ] + - segment: __LD + section: __compact_unwind + type: S_REGULAR + attributes: [ ] + alignment: 8 + address: 0x0000000000000020 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000020 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000000 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 1 +global-symbols: + - name: __Z3barv + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: __Z3foov + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x000000000000000B +... + +# CHECK: defined-atoms: +# CHECK: - type: compact-unwind +# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00, 0B, 00, 00, 00, +# CHECK: 00, 00, 00, 01, 00, 00, 00, 00, 00, 00, 00, 00, +# CHECK: 00, 00, 00, 00, 00, 00, 00, 00 ] +# CHECK: - type: compact-unwind +# CHECK: content: [ 10, 00, 00, 00, 00, 00, 00, 00, 0B, 00, 00, 00, +# CHECK: 00, 00, 00, 01, 00, 00, 00, 00, 00, 00, 00, 00, +# CHECK: 00, 00, 00, 00, 00, 00, 00, 00 ] +# CHECK: - name: __Z3barv +# CHECK: scope: global +# CHECK: content: [ 55, 48, 89, E5, B8, 0A, 00, 00, 00, 5D, C3 ] +# CHECK: - name: __Z3foov +# CHECK: scope: global +# CHECK: content: [ 55, 48, 89, E5, B8, 0A, 00, 00, 00, 5D, C3 ] diff --git a/lld/test/mach-o/parse-data-in-code-armv7.yaml b/lld/test/mach-o/parse-data-in-code-armv7.yaml new file mode 100644 index 000000000000..6f8ffa914025 --- /dev/null +++ b/lld/test/mach-o/parse-data-in-code-armv7.yaml @@ -0,0 +1,157 @@ +# RUN: ld64.lld.darwinold -arch armv7 -r -print_atoms %s -o %t | FileCheck %s +# RUN: ld64.lld.darwinold -arch armv7 -r -print_atoms %t -o %t2 | FileCheck %s +# RUN: ld64.lld.darwinold -arch armv7 -dylib %s -o %t3.dylib %p/Inputs/armv7/libSystem.yaml \ +# RUN: && llvm-objdump --macho --private-headers %t3.dylib | FileCheck --check-prefix=CHECK2 %s +# +# Test parsing LC_DATA_IN_CODE +# +# + +--- !mach-o +arch: armv7 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 2 + address: 0x0000000000000000 + content: [ 0x00, 0xBF, 0x00, 0xBF, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x00, 0xBF, 0x00, 0xBF, + 0x00, 0xF0, 0x20, 0xE3, 0x0A, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x20, 0xE3 ] +local-symbols: + - name: _foo_thumb + type: N_SECT + sect: 1 + desc: [ N_ARM_THUMB_DEF ] + value: 0x0000000000000000 + - name: _foo_arm + type: N_SECT + sect: 1 + value: 0x0000000000000018 +dataInCode: + - offset: 0x00000004 + length: 0x0004 + kind: DICE_KIND_DATA + - offset: 0x00000008 + length: 0x0004 + kind: DICE_KIND_JUMP_TABLE32 + - offset: 0x0000000C + length: 0x0004 + kind: DICE_KIND_JUMP_TABLE16 + - offset: 0x00000010 + length: 0x0004 + kind: DICE_KIND_JUMP_TABLE8 + - offset: 0x0000001C + length: 0x0004 + kind: DICE_KIND_DATA + - offset: 0x00000020 + length: 0x0004 + kind: DICE_KIND_JUMP_TABLE32 + - offset: 0x00000024 + length: 0x0004 + kind: DICE_KIND_JUMP_TABLE16 + - offset: 0x00000028 + length: 0x0004 + kind: DICE_KIND_JUMP_TABLE8 +... + + + +# CHECK: defined-atoms: +# CHECK: - name: _foo_thumb +# CHECK: references: +# CHECK: - kind: modeThumbCode +# CHECK: offset: 0 +# CHECK: - kind: modeData +# CHECK: offset: 4 +# CHECK: addend: 1 +# CHECK: - kind: modeData +# CHECK: offset: 8 +# CHECK: addend: 4 +# CHECK: - kind: modeData +# CHECK: offset: 12 +# CHECK: addend: 3 +# CHECK: - kind: modeData +# CHECK: offset: 16 +# CHECK: addend: 2 +# CHECK: - kind: modeThumbCode +# CHECK: offset: 20 +# CHECK: - name: _foo_arm +# CHECK: references: +# CHECK: - kind: modeData +# CHECK: offset: 4 +# CHECK: addend: 1 +# CHECK: - kind: modeData +# CHECK: offset: 8 +# CHECK: addend: 4 +# CHECK: - kind: modeData +# CHECK: offset: 12 +# CHECK: addend: 3 +# CHECK: - kind: modeData +# CHECK: offset: 16 +# CHECK: addend: 2 +# CHECK: - kind: modeArmCode +# CHECK: offset: 20 + + +# CHECK2: cmd LC_DATA_IN_CODE +# CHECK2: cmdsize 16 +# CHECK2: datasize 64 + + +# .code 16 +# .thumb_func _foo_thumb +#_foo_thumb: +# nop +# nop +# +# .data_region +# .long 0 +# .end_data_region +# +# .data_region jt32 +# .long 1 +# .end_data_region +# +# .data_region jt16 +# .long 2 +# .end_data_region +# +# .data_region jt8 +# .long 3 +# .end_data_region +# +# nop +# nop +# +# +# +# .code 32 +# .align 2 +#_foo_arm: +# nop +# +# .data_region +# .long 10 +# .end_data_region +# +# .data_region jt32 +# .long 11 +# .end_data_region +# +# .data_region jt16 +# .long 12 +# .end_data_region +# +# .data_region jt8 +# .long 13 +# .end_data_region +# +# nop +# diff --git a/lld/test/mach-o/parse-data-in-code-x86.yaml b/lld/test/mach-o/parse-data-in-code-x86.yaml new file mode 100644 index 000000000000..34de2d6f9439 --- /dev/null +++ b/lld/test/mach-o/parse-data-in-code-x86.yaml @@ -0,0 +1,77 @@ +# RUN: ld64.lld.darwinold -arch i386 -r -print_atoms %s -o %t | FileCheck %s \ +# RUN: && ld64.lld.darwinold -arch i386 -r -print_atoms %t -o %t2 | FileCheck %s +# +# Test parsing LC_DATA_IN_CODE +# +# + +--- !mach-o +arch: x86 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x90, 0x90, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x90, 0x90, 0x90, 0x90, 0x03, 0x00, + 0x00, 0x00 ] +local-symbols: + - name: _func1 + type: N_SECT + sect: 1 + value: 0x0000000000000000 + - name: _func2 + type: N_SECT + sect: 1 + value: 0x000000000000000B +dataInCode: + - offset: 0x00000002 + length: 0x0008 + kind: DICE_KIND_JUMP_TABLE32 + - offset: 0x0000000E + length: 0x0004 + kind: DICE_KIND_JUMP_TABLE32 +... + + + +# CHECK: defined-atoms: +# CHECK: - name: _func1 +# CHECK: references: +# CHECK: - kind: modeData +# CHECK: offset: 2 +# CHECK: addend: 4 +# CHECK: - kind: modeCode +# CHECK: offset: 10 +# CHECK: - name: _func2 +# CHECK: references: +# CHECK: - kind: modeData +# CHECK: offset: 3 +# CHECK: addend: 4 +# CHECK-NOT: - kind: modeData + + + + +# +#_func1: +# nop +# nop +# .data_region jt32 +# .long 1 +# .long 2 +# .end_data_region +# nop +# +# +# _func2: +# nop +# nop +# nop +# .data_region jt32 +# .long 3 +# .end_data_region +# diff --git a/lld/test/mach-o/parse-data-relocs-arm64.yaml b/lld/test/mach-o/parse-data-relocs-arm64.yaml new file mode 100644 index 000000000000..504d69f99712 --- /dev/null +++ b/lld/test/mach-o/parse-data-relocs-arm64.yaml @@ -0,0 +1,244 @@ +# RUN: ld64.lld.darwinold -arch arm64 -r -print_atoms %s -o %t | FileCheck %s +# RUN: ld64.lld.darwinold -arch arm64 -r -print_atoms %t -o %t2 | FileCheck %s +# +# Test parsing and writing of arm64 data relocations. +# +# The first step tests if the supplied mach-o file is parsed into the correct +# set of references. The second step verifies relocations can be round-tripped +# by writing to a new .o file, then parsing that file which should result in +# the same references. +# +#_test: + + +--- !mach-o +arch: arm64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 4 + address: 0x0000000000000000 + content: [ 0xC0, 0x03, 0x5F, 0xD6 ] + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + address: 0x0000000000000004 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xDC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xC0, 0xFF, 0xFF, 0xFF, 0xBE, 0xFF, 0xFF, 0xFF, + 0xB0, 0xFF, 0xFF, 0xFF ] + relocations: + - offset: 0x00000050 + type: ARM64_RELOC_POINTER_TO_GOT + length: 2 + pc-rel: true + extern: true + symbol: 2 + - offset: 0x0000004C + type: ARM64_RELOC_SUBTRACTOR + length: 2 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x0000004C + type: ARM64_RELOC_UNSIGNED + length: 2 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000048 + type: ARM64_RELOC_SUBTRACTOR + length: 2 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000048 + type: ARM64_RELOC_UNSIGNED + length: 2 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000040 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000038 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 2 + - offset: 0x00000030 + type: ARM64_RELOC_SUBTRACTOR + length: 3 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000030 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000028 + type: ARM64_RELOC_SUBTRACTOR + length: 3 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000028 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000020 + type: ARM64_RELOC_SUBTRACTOR + length: 3 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000020 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000018 + type: ARM64_RELOC_POINTER_TO_GOT + length: 3 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000010 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000008 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 2 +local-symbols: + - name: _v1 + type: N_SECT + sect: 2 + value: 0x000000000000000C +global-symbols: + - name: _bar + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _foo + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +page-size: 0x00000000 +... + +# CHECK: defined-atoms: +# CHECK: - ref-name: L000 +# CHECK: type: data +# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00 ] +# CHECK: - name: _v1 +# CHECK: type: data +# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00, 08, 00, 00, 00, +# CHECK: 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, +# CHECK: 00, 00, 00, 00, 00, 00, 00, 00, E0, FF, FF, FF, +# CHECK: FF, FF, FF, FF, DC, FF, FF, FF, FF, FF, FF, FF, +# CHECK: {{..}}, {{..}}, 00, 00, 00, 00, 00, 00, 04, 00, 00, 00, +# CHECK: 00, 00, 00, 00, C0, FF, FF, FF, BE, FF, FF, FF, +# CHECK: {{B0|B8}}, {{..}}, FF, FF ] +# CHECK: references: +# CHECK: - kind: pointer64 +# CHECK: offset: 0 +# CHECK: target: _foo +# CHECK-NOT: addend: +# CHECK: - kind: pointer64 +# CHECK: offset: 8 +# CHECK: target: _foo +# CHECK: addend: 8 +# CHECK: - kind: pointer64ToGOT +# CHECK: offset: 16 +# CHECK: target: _foo +# CHECK-NOT: addend: +# CHECK: - kind: delta64 +# CHECK: offset: 24 +# CHECK: target: _foo +# CHECK: addend: 24 +# CHECK: - kind: delta64 +# CHECK: offset: 32 +# CHECK: target: _foo +# CHECK-NOT: addend: +# CHECK: - kind: delta64 +# CHECK: offset: 40 +# CHECK: target: _foo +# CHECK: addend: 4 +# CHECK: - kind: pointer64 +# CHECK: offset: 48 +# CHECK: target: L000 +# CHECK-NOT: addend: +# CHECK: - kind: pointer64 +# CHECK: offset: 56 +# CHECK: target: _foo +# CHECK: addend: 4 +# CHECK: - kind: delta32 +# CHECK: offset: 64 +# CHECK: target: _foo +# CHECK-NOT: addend: +# CHECK: - kind: delta32 +# CHECK: offset: 68 +# CHECK: target: _foo +# CHECK: addend: 2 +# CHECK: - kind: delta32ToGOT +# CHECK: offset: 72 +# CHECK: target: _foo +# CHECK-NOT: addend: +# CHECK: - name: _bar +# CHECK: scope: global +# CHECK: content: [ C0, 03, 5F, D6 ] +# CHECK: alignment: 4 +# CHECK: undefined-atoms: +# CHECK: - name: _foo + +# .subsections_via_symbols +# .text +# .globl_foo +# .align2 +# _foo: +# ret +# .data +#Lanon: +# .quad 0 +#_v1: +# .quad _foo +# .quad _foo + 8 +# .quad _foo@GOT +# .quad _foo + 24 - . +# .quad _foo - . +# .quad _foo + 4 - . +# .quad Lanon +# .quad Lanon + 4 +# .long _foo - . +# .long _foo +2 - . +# .long _foo@GOT - . + diff --git a/lld/test/mach-o/parse-data-relocs-x86_64.yaml b/lld/test/mach-o/parse-data-relocs-x86_64.yaml new file mode 100644 index 000000000000..8aea7cc88778 --- /dev/null +++ b/lld/test/mach-o/parse-data-relocs-x86_64.yaml @@ -0,0 +1,372 @@ + +# RUN: ld64.lld.darwinold -arch x86_64 -r %s -o %t -print_atoms | FileCheck %s \ +# RUN: && ld64.lld.darwinold -arch x86_64 %t -r -print_atoms -o %t2 | FileCheck %s +# +# Test parsing and writing of x86_64 data relocations. +# +# The first step tests if the supplied mach-o file is parsed into the correct +# set of references. The second step verifies relocations can be round-tripped +# by writing to a new .o file, then parsing that file which should result in +# the same references. +# +#_foo: +# ret +# +#_bar: +# ret +# +# .section __DATA,__custom +#L1: +# .quad 0 +# +# .data +#_d: +# .quad _foo +# .quad _foo+4 +# .quad _foo - . +# .quad L1 +# .quad L1 + 2 +# .quad _foo - . +# .quad _foo + 4 - . +# .quad L1 - . +# .long _foo - . +# .long _foo + 4 - . +# .long L1 - . +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0xC3, 0xC3 ] + - segment: __DATA + section: __custom + type: S_REGULAR + attributes: [ ] + address: 0x0000000000000002 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + address: 0x000000000000000A + content: [ +# .quad _foo +# No addend is needed here as we are referencing _foo directly and that is +# encoded entirely in the X86_64_RELOC_UNSIGNED + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +# .quad _foo+4 +# Addend of 4 is needed here as we are referencing _foo from the +# X86_64_RELOC_UNSIGNED, then the addend gives us 4 more. + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +# .quad _foo - . +# This is the pair X86_64_RELOC_SUBTRACTOR and X86_64_RELOC_UNSIGNED. +# The subtractor references _d which is the first nonlocal label in this +# section. The unsigned references _foo. +# Note the addend here is -16 because that is the offset from here back +# to _d. + 0xF0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +# .quad . - _foo +# This is the pair X86_64_RELOC_SUBTRACTOR and X86_64_RELOC_UNSIGNED. +# The subtractor references _d which is the first nonlocal label in this +# section. The unsigned references _foo. +# Note the addend here is -16 because that is the offset from here back +# to _d. + 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +# .quad L1 +# This is a X86_64_RELOC_UNSIGNED without extern set. +# In this case, we encode the section number for L1 in the relocation, and +# the addend here is the absolute address of the location in that section +# we want to reference. + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +# .quad L1 + 2 +# This is a X86_64_RELOC_UNSIGNED without extern set. +# In this case, we encode the section number for L1 in the relocation, and +# the addend here is the absolute address of the location in that section +# we want to reference. We have a 4 because the section is at address 2 +# and we want an offset of 2 from there. + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +# .quad _foo - . +# This is the pair X86_64_RELOC_SUBTRACTOR and X86_64_RELOC_UNSIGNED. +# The subtractor references _d which is the first nonlocal label in this +# section. The unsigned references _foo. +# Note the addend here is -40 because that is the offset from here back +# to _d. + 0xD0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +# .quad _foo + 4 - . +# This is the pair X86_64_RELOC_SUBTRACTOR and X86_64_RELOC_UNSIGNED. +# The subtractor references _d which is the first nonlocal label in this +# section. The unsigned references _foo. +# Note the addend here is -52. It would have been -56 because that +# would take us from the address of this relocation back to _d. But as +# we also add 4 for the offset, we get -52. + 0xCC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +# .quad L1 - . +# This is the pair X86_64_RELOC_SUBTRACTOR and X86_64_RELOC_UNSIGNED. +# The subtractor references _d which is the first nonlocal label in this +# section. The unsigned does not have extern set, so the relocation +# number is the section number for L1. +# Note the addend here is -62. Of that, -64 would be the offset from +# this location from _d. The remaining 2 is the absolute address +# of L1. + 0xC2, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +# .long _foo - . +# This is the pair X86_64_RELOC_SUBTRACTOR and X86_64_RELOC_UNSIGNED. +# The subtractor references _d which is the first nonlocal label in this +# section. The unsigned references _foo. +# Note the addend here is -72 because that is the offset from here back +# to _d. + 0xB8, 0xFF, 0xFF, 0xFF, +# .long . - _foo +# This is the pair X86_64_RELOC_SUBTRACTOR and X86_64_RELOC_UNSIGNED. +# The subtractor references _d which is the first nonlocal label in this +# section. The unsigned references _foo. +# Note the addend here is -76 because that is the offset from here back +# to _d. + 0xB4, 0xFF, 0xFF, 0xFF, +# .long _foo + 4 - . +# This is the pair X86_64_RELOC_SUBTRACTOR and X86_64_RELOC_UNSIGNED. +# The subtractor references _d which is the first nonlocal label in this +# section. The unsigned references _foo. +# Note the addend here is -76. It would have been -80 because that +# would take us from the address of this relocation back to _d. But as +# we also add 4 for the offset, we get -76. + 0xB4, 0xFF, 0xFF, 0xFF, +# .long L1 - . +# This is the pair X86_64_RELOC_SUBTRACTOR and X86_64_RELOC_UNSIGNED. +# The subtractor references _d which is the first nonlocal label in this +# section. The unsigned does not have extern set, so the relocation +# number is the section number for L1. +# Note the addend here is -82. Of that, -84 would be the offset from +# this location from _d. The remaining 2 is the absolute address +# of L1. + 0xAE, 0xFF, 0xFF, 0xFF ] + relocations: + - offset: 0x00000054 + type: X86_64_RELOC_SUBTRACTOR + length: 2 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000054 + type: X86_64_RELOC_UNSIGNED + length: 2 + pc-rel: false + extern: false + symbol: 2 + - offset: 0x00000050 + type: X86_64_RELOC_SUBTRACTOR + length: 2 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000050 + type: X86_64_RELOC_UNSIGNED + length: 2 + pc-rel: false + extern: true + symbol: 0 + - offset: 0x0000004C + type: X86_64_RELOC_SUBTRACTOR + length: 2 + pc-rel: false + extern: true + symbol: 0 + - offset: 0x0000004C + type: X86_64_RELOC_UNSIGNED + length: 2 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000048 + type: X86_64_RELOC_SUBTRACTOR + length: 2 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000048 + type: X86_64_RELOC_UNSIGNED + length: 2 + pc-rel: false + extern: true + symbol: 0 + - offset: 0x00000040 + type: X86_64_RELOC_SUBTRACTOR + length: 3 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000040 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 2 + - offset: 0x00000038 + type: X86_64_RELOC_SUBTRACTOR + length: 3 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000038 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 0 + - offset: 0x00000030 + type: X86_64_RELOC_SUBTRACTOR + length: 3 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000030 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 0 + - offset: 0x00000028 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 2 + - offset: 0x00000020 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 2 + - offset: 0x00000018 + type: X86_64_RELOC_SUBTRACTOR + length: 3 + pc-rel: false + extern: true + symbol: 0 + - offset: 0x00000018 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000010 + type: X86_64_RELOC_SUBTRACTOR + length: 3 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000010 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 0 + - offset: 0x00000008 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 0 + - offset: 0x00000000 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 0 +local-symbols: + - name: _foo + type: N_SECT + sect: 1 + value: 0x0000000000000000 + - name: _bar + type: N_SECT + sect: 1 + value: 0x0000000000000001 + - name: _d + type: N_SECT + sect: 3 + value: 0x000000000000000A +page-size: 0x00000000 +... + + +# CHECK:defined-atoms: +# CHECK: - name: _d +# CHECK: type: data +# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00, 04, 00, 00, 00, +# CHECK: 00, 00, 00, 00, F0, FF, FF, FF, FF, FF, FF, FF, +# CHECK: 18, 00, 00, 00, 00, 00, 00, 00, {{..}}, {{..}}, 00, 00, +# CHECK: 00, 00, 00, 00, {{..}}, {{..}}, 00, 00, 00, 00, 00, 00, +# CHECK: D0, FF, FF, FF, FF, FF, FF, FF, CC, FF, FF, FF, +# CHECK: FF, FF, FF, FF, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, +# CHECK: B8, FF, FF, FF, B4, FF, FF, FF, B4, FF, FF, FF, +# CHECK: {{..}}, {{..}}, {{..}}, {{..}} ] +# CHECK: dead-strip: never +# CHECK: references: +# CHECK: - kind: pointer64 +# CHECK: offset: 0 +# CHECK: target: _foo +# CHECK: - kind: pointer64 +# CHECK: offset: 8 +# CHECK: target: _foo +# CHECK: addend: 4 +# CHECK: - kind: delta64 +# CHECK: offset: 16 +# CHECK: target: _foo +# CHECK: - kind: negDelta64 +# CHECK: offset: 24 +# CHECK: target: _foo +# CHECK: - kind: pointer64Anon +# CHECK: offset: 32 +# CHECK: target: L003 +# CHECK: - kind: pointer64Anon +# CHECK: offset: 40 +# CHECK: target: L003 +# CHECK: addend: 2 +# CHECK: - kind: delta64 +# CHECK: offset: 48 +# CHECK: target: _foo +# CHECK: - kind: delta64 +# CHECK: offset: 56 +# CHECK: target: _foo +# CHECK: addend: 4 +# CHECK: - kind: delta64Anon +# CHECK: offset: 64 +# CHECK: target: L003 +# CHECK: - kind: delta32 +# CHECK: offset: 72 +# CHECK: target: _foo +# CHECK: - kind: negDelta32 +# CHECK: offset: 76 +# CHECK: target: _foo +# CHECK: - kind: delta32 +# CHECK: offset: 80 +# CHECK: target: _foo +# CHECK: addend: 4 +# CHECK: - kind: delta32Anon +# CHECK: offset: 84 +# CHECK: target: L003 +# CHECK: - name: _foo +# CHECK: content: [ C3 ] +# CHECK: dead-strip: never +# CHECK: - name: _bar +# CHECK: content: [ C3 ] +# CHECK: dead-strip: never +# CHECK: - ref-name: L003 +# CHECK: type: unknown +# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00 ] +# CHECK: section-choice: custom-required +# CHECK: section-name: '__DATA/__custom' +# CHECK: dead-strip: never + diff --git a/lld/test/mach-o/parse-data.yaml b/lld/test/mach-o/parse-data.yaml new file mode 100644 index 000000000000..ec8a2ad439c4 --- /dev/null +++ b/lld/test/mach-o/parse-data.yaml @@ -0,0 +1,119 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s +# +# Test parsing of mach-o data symbols. +# +# long a = 0x0807060504030201; +# int b = 0x14131211; +# int c = 0x24232221; +# static int s1; +# static int s2 = 0x34333231; +# +# + + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + alignment: 8 + address: 0x0000000000000000 + content: [ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x11, 0x12, 0x13, 0x14, 0x21, 0x22, 0x23, 0x24, + 0x31, 0x32, 0x33, 0x34, 0x41, 0x42, 0x43, 0x44 ] + - segment: __CUST + section: __custom + type: S_REGULAR + attributes: [ ] + alignment: 8 + address: 0x0000000000000018 + content: [ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 ] + - segment: __DATA + section: __bss + type: S_ZEROFILL + attributes: [ ] + alignment: 2 + address: 0x0000000000000020 + size: 4 +local-symbols: + - name: _s1 + type: N_SECT + sect: 3 + value: 0x0000000000000020 + - name: _s2 + type: N_SECT + sect: 1 + value: 0x0000000000000010 +global-symbols: + - name: _a + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: _b + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000008 + - name: _c + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x000000000000000C + - name: _cWeak + type: N_SECT + scope: [ N_EXT ] + sect: 1 + desc: [ N_WEAK_DEF ] + value: 0x0000000000000014 + - name: _kustom + type: N_SECT + scope: [ N_EXT ] + sect: 2 + value: 0x0000000000000018 +... + +# CHECK: defined-atoms: + +# CHECK: - name: _a +# CHECK: scope: global +# CHECK: type: data +# CHECK: content: [ 01, 02, 03, 04, 05, 06, 07, 08 ] + +# CHECK: - name: _b +# CHECK: scope: global +# CHECK: type: data +# CHECK: content: [ 11, 12, 13, 14 ] + +# CHECK: - name: _c +# CHECK: scope: global +# CHECK: type: data +# CHECK: content: [ 21, 22, 23, 24 ] + +# CHECK: - name: _s2 +# CHECK: type: data +# CHECK: content: [ 31, 32, 33, 34 ] + +# CHECK: - name: _cWeak +# CHECK: scope: global +# CHECK: type: data +# CHECK: content: [ 41, 42, 43, 44 ] +# CHECK: merge: as-weak + +# CHECK: - name: _s1 +# CHECK: type: zero-fill +# CHECK: size: 4 + +# CHECK: - name: _kustom +# CHECK: scope: global +# CHECK: type: unknown +# CHECK: content: [ 01, 02, 03, 04, 05, 06, 07, 08 ] +# CHECK: section-choice: custom-required +# CHECK: section-name: '__CUST/__custom' + diff --git a/lld/test/mach-o/parse-eh-frame-relocs-x86_64.yaml b/lld/test/mach-o/parse-eh-frame-relocs-x86_64.yaml new file mode 100644 index 000000000000..c82c85f0ec0c --- /dev/null +++ b/lld/test/mach-o/parse-eh-frame-relocs-x86_64.yaml @@ -0,0 +1,176 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s +# +# Test parsing of x86_64 __eh_frame (dwarf unwind) relocations. + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 16 + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0xE8, 0x00, 0x00, 0x00, + 0x00, 0x5D, 0xC3, 0x48, 0x89, 0xC7, 0xE8, 0x00, + 0x00, 0x00, 0x00, 0x5D, 0xE9, 0x00, 0x00, 0x00, + 0x00, 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x55, 0x48, 0x89, 0xE5, 0x5D, 0xC3, 0x66, 0x2E, + 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x55, 0x48, 0x89, 0xE5, 0xE8, 0x00, 0x00, 0x00, + 0x00, 0x5D, 0xC3, 0x48, 0x89, 0xC7, 0xE8, 0x00, + 0x00, 0x00, 0x00, 0x5D, 0xE9, 0x00, 0x00, 0x00, + 0x00 ] + - segment: __TEXT + section: __gcc_except_tab + type: S_REGULAR + attributes: [ ] + alignment: 4 + address: 0x000000000000004C + content: [ 0xFF, 0x9B, 0xA2, 0x80, 0x80, 0x00, 0x03, 0x1A, + 0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x01, 0x09, 0x00, 0x00, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0x9B, 0xA2, 0x80, 0x80, 0x00, 0x03, 0x1A, + 0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x01, 0x09, 0x00, 0x00, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 ] + - segment: __TEXT + section: __eh_frame + type: S_COALESCED + attributes: [ ] + alignment: 8 + address: 0x0000000000000100 + content: [ 0x1C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x03, 0x7A, 0x50, 0x4C, 0x52, 0x00, 0x01, 0x78, + 0x10, 0x07, 0x9B, 0x04, 0x00, 0x00, 0x00, 0x10, + 0x10, 0x0C, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, + 0x2C, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0xD8, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x41, 0x0E, 0x10, 0x86, 0x02, 0x43, 0x0D, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x03, 0x7A, 0x52, 0x00, 0x01, 0x78, 0x10, 0x01, + 0x10, 0x0C, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, + 0xB0, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x41, 0x0E, 0x10, 0x86, 0x02, 0x43, 0x0D, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2C, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, + 0x98, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0xCB, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x41, 0x0E, 0x10, 0x86, 0x02, 0x43, 0x0D, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000013 + type: X86_64_RELOC_GOT + length: 2 + pc-rel: true + extern: true + symbol: 8 +local-symbols: + - name: GCC_except_table0 + type: N_SECT + sect: 2 + value: 0x000000000000004C + - name: GCC_except_table2 + type: N_SECT + sect: 2 + value: 0x0000000000000074 +global-symbols: + - name: _catchMyException1 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: _catchMyException2 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000030 + - name: _bar + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000020 +undefined-symbols: + - name: _foo + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: ___cxa_begin_catch + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: ___cxa_end_catch + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: ___gxx_personality_v0 + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +page-size: 0x00000000 +... + +# Check that LSDA fields are fixed up correctly, even when there are multiple +# CIEs involved. +# +# (1) Check that we can relocate an LSDA at all. Requires correct interpretation +# of augmentation data strings in CIEs and augmentation data fields of FDEs. +# +# CHECK: - type: unwind-cfi +# CHECK-NOT: - type: +# CHECK: references: +# CHECK-NEXT: - kind: negDelta32 +# CHECK-NEXT: offset: 4 +# CHECK-NEXT: target: L002 +# CHECK-NEXT: - kind: unwindFDEToFunction +# CHECK-NEXT: offset: 8 +# CHECK-NEXT: target: _catchMyException1 +# CHECK-NEXT: - kind: unwindFDEToFunction +# CHECK-NEXT: offset: 25 +# CHECK-NEXT: target: GCC_except_table0 +# +# (2) Check that we have an intervening FDE with a different CIE. +# If the test fails here then test (3) probably isn't testing what it +# should, and this test-case should be updated. +# +# CHECK: - type: unwind-cfi +# CHECK-NOT: - type: +# CHECK: references: +# CHECK-NEXT: - kind: negDelta32 +# CHECK-NEXT: offset: 4 +# CHECK-NEXT: target: L001 +# CHECK-NEXT: - kind: unwindFDEToFunction +# CHECK-NEXT: offset: 8 +# CHECK-NEXT: target: _bar +# +# (3) Check that we can relocate the LSDA on a second FDE that references the +# original CIE from (1). Requires us to match this FDE up with the correct +# CIE. +# +# CHECK-NEXT: - type: unwind-cfi +# CHECK-NOT: - type: +# CHECK: references: +# CHECK-NEXT: - kind: negDelta32 +# CHECK-NEXT: offset: 4 +# CHECK-NEXT: target: L002 +# CHECK-NEXT: - kind: unwindFDEToFunction +# CHECK-NEXT: offset: 8 +# CHECK-NEXT: target: _catchMyException2 +# CHECK-NEXT: - kind: unwindFDEToFunction +# CHECK-NEXT: offset: 25 +# CHECK-NEXT: target: GCC_except_table2 diff --git a/lld/test/mach-o/parse-eh-frame-x86-anon.yaml b/lld/test/mach-o/parse-eh-frame-x86-anon.yaml new file mode 100644 index 000000000000..feb914b581b9 --- /dev/null +++ b/lld/test/mach-o/parse-eh-frame-x86-anon.yaml @@ -0,0 +1,129 @@ +# RUN: ld64.lld.darwinold -arch i386 -r -print_atoms %s -o %t | FileCheck %s +# +# Test parsing of new __eh_frame (dwarf unwind) section that has no .eh labels +# and no relocations. +# + +--- !mach-o +arch: x86 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x89, 0xE5, 0x56, 0x83, 0xEC, 0x14, 0xE8, + 0x00, 0x00, 0x00, 0x00, 0x5E, 0xC7, 0x04, 0x24, + 0x04, 0x00, 0x00, 0x00, 0xE8, 0xE7, 0xFF, 0xFF, + 0xFF, 0xC7, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x8B, + 0x8E, 0x38, 0x00, 0x00, 0x00, 0x89, 0x4C, 0x24, + 0x04, 0x89, 0x04, 0x24, 0xC7, 0x44, 0x24, 0x08, + 0x00, 0x00, 0x00, 0x00, 0xE8, 0xC7, 0xFF, 0xFF, + 0xFF, 0x55, 0x89, 0xE5, 0x83, 0xEC, 0x08, 0xE8, + 0xBC, 0xFF, 0xFF, 0xFF ] + relocations: + - offset: 0x00000040 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: true + extern: false + symbol: 1 + - offset: 0x00000035 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: true + extern: true + symbol: 4 + - offset: 0x00000021 + scattered: true + type: GENERIC_RELOC_LOCAL_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000044 + - offset: 0x00000000 + scattered: true + type: GENERIC_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x0000000C + - offset: 0x00000015 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: true + extern: true + symbol: 3 + - segment: __IMPORT + section: __pointers + type: S_NON_LAZY_SYMBOL_POINTERS + attributes: [ ] + address: 0x0000000000000044 + content: [ 0x00, 0x00, 0x00, 0x00 ] + indirect-syms: [ 5 ] + - segment: __TEXT + section: __eh_frame + type: S_REGULAR + attributes: [ ] + alignment: 2 + address: 0x0000000000000048 + content: [ 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x7A, 0x52, 0x00, 0x01, 0x7C, 0x08, 0x01, + 0x10, 0x0C, 0x05, 0x04, 0x88, 0x01, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, + 0x98, 0xFF, 0xFF, 0xFF, 0x39, 0x00, 0x00, 0x00, + 0x00, 0x41, 0x0E, 0x08, 0x84, 0x02, 0x42, 0x0D, + 0x04, 0x44, 0x86, 0x03, 0x18, 0x00, 0x00, 0x00, + 0x38, 0x00, 0x00, 0x00, 0xB5, 0xFF, 0xFF, 0xFF, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x41, 0x0E, 0x08, + 0x84, 0x02, 0x42, 0x0D, 0x04, 0x00, 0x00, 0x00 ] +global-symbols: + - name: __Z3barv + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000039 + - name: __Z3foov + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: __ZTIi + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: ___cxa_allocate_exception + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: ___cxa_throw + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + +# CHECK: defined-atoms: +# CHECK: - ref-name: [[CIE:L[L0-9]+]] +# CHECK: type: unwind-cfi +# CHECK: content: +# CHECK: - type: unwind-cfi +# CHECK: content: +# CHECK: references: +# CHECK: - kind: negDelta32 +# CHECK: offset: 4 +# CHECK: target: [[CIE]] +# CHECK: - kind: delta32 +# CHECK: offset: 8 +# CHECK: target: __Z3foov +# CHECK: - type: unwind-cfi +# CHECK: content: +# CHECK: references: +# CHECK: - kind: negDelta32 +# CHECK: offset: 4 +# CHECK: target: [[CIE]] +# CHECK: - kind: delta32 +# CHECK: offset: 8 +# CHECK: target: __Z3barv + diff --git a/lld/test/mach-o/parse-eh-frame-x86-labeled.yaml b/lld/test/mach-o/parse-eh-frame-x86-labeled.yaml new file mode 100644 index 000000000000..b1853ae9ac46 --- /dev/null +++ b/lld/test/mach-o/parse-eh-frame-x86-labeled.yaml @@ -0,0 +1,193 @@ +# RUN: ld64.lld.darwinold -arch i386 -r -print_atoms %s -o %t | FileCheck %s +# +# Test parsing of old __eh_frame (dwarf unwind) section that has .eh labels +# and relocations. +# + +--- !mach-o +arch: x86 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x89, 0xE5, 0x56, 0x83, 0xEC, 0x14, 0xE8, + 0x00, 0x00, 0x00, 0x00, 0x5E, 0xC7, 0x04, 0x24, + 0x04, 0x00, 0x00, 0x00, 0xE8, 0xE7, 0xFF, 0xFF, + 0xFF, 0xC7, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x8B, + 0x8E, 0x38, 0x00, 0x00, 0x00, 0x89, 0x4C, 0x24, + 0x04, 0x89, 0x04, 0x24, 0xC7, 0x44, 0x24, 0x08, + 0x00, 0x00, 0x00, 0x00, 0xE8, 0xC7, 0xFF, 0xFF, + 0xFF, 0x55, 0x89, 0xE5, 0x83, 0xEC, 0x08, 0xE8, + 0xBC, 0xFF, 0xFF, 0xFF ] + relocations: + - offset: 0x00000040 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: true + extern: false + symbol: 1 + - offset: 0x00000035 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: true + extern: true + symbol: 7 + - offset: 0x00000021 + scattered: true + type: GENERIC_RELOC_LOCAL_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000044 + - offset: 0x00000000 + scattered: true + type: GENERIC_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x0000000C + - offset: 0x00000015 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: true + extern: true + symbol: 6 + - segment: __IMPORT + section: __pointers + type: S_NON_LAZY_SYMBOL_POINTERS + attributes: [ ] + address: 0x0000000000000044 + content: [ 0x00, 0x00, 0x00, 0x00 ] + indirect-syms: [ 5 ] + - segment: __TEXT + section: __eh_frame + type: S_REGULAR + attributes: [ ] + alignment: 2 + address: 0x0000000000000048 + content: [ 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x7A, 0x52, 0x00, 0x01, 0x7C, 0x08, 0x01, + 0x10, 0x0C, 0x05, 0x04, 0x88, 0x01, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, + 0x98, 0xFF, 0xFF, 0xFF, 0x39, 0x00, 0x00, 0x00, + 0x00, 0x41, 0x0E, 0x08, 0x84, 0x02, 0x42, 0x0D, + 0x04, 0x44, 0x86, 0x03, 0x18, 0x00, 0x00, 0x00, + 0x38, 0x00, 0x00, 0x00, 0xB5, 0xFF, 0xFF, 0xFF, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x41, 0x0E, 0x08, + 0x84, 0x02, 0x42, 0x0D, 0x04, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x0000001C + scattered: true + type: GENERIC_RELOC_LOCAL_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000064 + - offset: 0x00000000 + scattered: true + type: GENERIC_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x00000048 + - offset: 0x00000020 + scattered: true + type: GENERIC_RELOC_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000000 + - offset: 0x00000000 + scattered: true + type: GENERIC_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x00000068 + - offset: 0x00000038 + scattered: true + type: GENERIC_RELOC_LOCAL_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000080 + - offset: 0x00000000 + scattered: true + type: GENERIC_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x00000048 + - offset: 0x0000003C + scattered: true + type: GENERIC_RELOC_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000039 + - offset: 0x00000000 + scattered: true + type: GENERIC_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x00000084 +local-symbols: + - name: EH_frame0 + type: N_SECT + sect: 3 + value: 0x0000000000000048 +global-symbols: + - name: __Z3barv + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000039 + - name: __Z3barv.eh + type: N_SECT + scope: [ N_EXT ] + sect: 3 + value: 0x000000000000007C + - name: __Z3foov + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: __Z3foov.eh + type: N_SECT + scope: [ N_EXT ] + sect: 3 + value: 0x0000000000000060 +undefined-symbols: + - name: __ZTIi + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: ___cxa_allocate_exception + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: ___cxa_throw + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + +# CHECK: defined-atoms: +# CHECK: - ref-name: [[CIE:L[L0-9]+]] +# CHECK: type: unwind-cfi +# CHECK: content: +# CHECK: - type: unwind-cfi +# CHECK: content: +# CHECK: references: +# CHECK: - kind: negDelta32 +# CHECK: offset: 4 +# CHECK: target: [[CIE]] +# CHECK: - kind: delta32 +# CHECK: offset: 8 +# CHECK: target: __Z3foov +# CHECK: - type: unwind-cfi +# CHECK: content: +# CHECK: references: +# CHECK: - kind: negDelta32 +# CHECK: offset: 4 +# CHECK: target: [[CIE]] +# CHECK: - kind: delta32 +# CHECK: offset: 8 +# CHECK: target: __Z3barv + diff --git a/lld/test/mach-o/parse-eh-frame.yaml b/lld/test/mach-o/parse-eh-frame.yaml new file mode 100644 index 000000000000..014e1f21c005 --- /dev/null +++ b/lld/test/mach-o/parse-eh-frame.yaml @@ -0,0 +1,88 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s +# +# Test parsing of __eh_frame (dwarf unwind) section. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0xB8, 0x09, 0x00, 0x00, + 0x00, 0x5D, 0xC3, 0x55, 0x48, 0x89, 0xE5, 0xB8, + 0x0A, 0x00, 0x00, 0x00, 0x5D, 0xC3 ] + - segment: __TEXT + section: __eh_frame + type: S_COALESCED + attributes: [ ] + alignment: 8 + address: 0x0000000000000058 + content: [ 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x7A, 0x52, 0x00, 0x01, 0x78, 0x10, 0x01, + 0x10, 0x0C, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, + 0x88, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x41, 0x0E, 0x10, 0x86, 0x02, 0x43, 0x0D, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, + 0x6B, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x41, 0x0E, 0x10, 0x86, 0x02, 0x43, 0x0D, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] +global-symbols: + - name: __Z3barv + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: __Z3foov + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x000000000000000B +... + +# CHECK: defined-atoms: +# CHECK: - ref-name: [[CIE:L[0-9]+]] +# CHECK: type: unwind-cfi +# CHECK: content: [ 14, 00, 00, 00, 00, 00, 00, 00, 01, 7A, 52, 00, +# CHECK: 01, 78, 10, 01, 10, 0C, 07, 08, 90, 01, 00, 00 ] +# CHECK: - type: unwind-cfi +# CHECK: content: [ 24, 00, 00, 00, 1C, 00, 00, 00, 88, FF, FF, FF, +# CHECK: FF, FF, FF, FF, 0B, 00, 00, 00, 00, 00, 00, 00, +# CHECK: 00, 41, 0E, 10, 86, 02, 43, 0D, 06, 00, 00, 00, +# CHECK: 00, 00, 00, 00 ] +# CHECK: references: +# CHECK: - kind: negDelta32 +# CHECK: offset: 4 +# CHECK: target: [[CIE]] +# CHECK: - kind: unwindFDEToFunction +# CHECK: offset: 8 +# CHECK: target: __Z3barv +# CHECK: - type: unwind-cfi +# CHECK: content: [ 24, 00, 00, 00, 44, 00, 00, 00, 6B, FF, FF, FF, +# CHECK: FF, FF, FF, FF, 0B, 00, 00, 00, 00, 00, 00, 00, +# CHECK: 00, 41, 0E, 10, 86, 02, 43, 0D, 06, 00, 00, 00, +# CHECK: 00, 00, 00, 00 ] +# CHECK: references: +# CHECK: - kind: negDelta32 +# CHECK: offset: 4 +# CHECK: target: [[CIE]] +# CHECK: - kind: unwindFDEToFunction +# CHECK: offset: 8 +# CHECK: target: __Z3foov +# CHECK: - name: __Z3barv +# CHECK: scope: global +# CHECK: content: [ 55, 48, 89, E5, B8, 09, 00, 00, 00, 5D, C3 ] +# CHECK: - name: __Z3foov +# CHECK: scope: global +# CHECK: content: [ 55, 48, 89, E5, B8, 0A, 00, 00, 00, 5D, C3 ] + diff --git a/lld/test/mach-o/parse-function.yaml b/lld/test/mach-o/parse-function.yaml new file mode 100644 index 000000000000..b6d24fee6ff6 --- /dev/null +++ b/lld/test/mach-o/parse-function.yaml @@ -0,0 +1,100 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r %s -o %t +# RUN: ld64.lld.darwinold -arch x86_64 -r %t -print_atoms -o %t2 | FileCheck %s +# +# Test parsing of mach-o functions. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 4 + address: 0x0000000000000000 + content: [ 0xCC, 0xC3, 0x90, 0xC3, 0x90, 0x90, 0xC3, 0x90, + 0x90, 0x90, 0xC3, 0x90, 0x90, 0x90, 0x90, 0xC3, + 0xCC, 0x31, 0xC0, 0xC3 ] +local-symbols: + - name: _myStatic + type: N_SECT + sect: 1 + value: 0x000000000000000B +global-symbols: + - name: _myGlobal + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000001 + - name: _myGlobalWeak + type: N_SECT + scope: [ N_EXT ] + sect: 1 + desc: [ N_WEAK_DEF ] + value: 0x0000000000000002 + - name: _myHidden + type: N_SECT + scope: [ N_EXT, N_PEXT ] + sect: 1 + value: 0x0000000000000004 + - name: _myHiddenWeak + type: N_SECT + scope: [ N_EXT, N_PEXT ] + sect: 1 + desc: [ N_WEAK_DEF ] + value: 0x0000000000000007 + - name: _myStripNot + type: N_SECT + scope: [ N_EXT ] + sect: 1 + desc: [ N_NO_DEAD_STRIP ] + value: 0x0000000000000010 + - name: _myResolver + type: N_SECT + scope: [ N_EXT ] + sect: 1 + desc: [ N_SYMBOL_RESOLVER ] + value: 0x0000000000000011 +... + +# CHECK-NOT: name: +# CHECK: content: [ CC ] + +# CHECK: name: _myGlobal +# CHECK: scope: global +# CHECK: content: [ C3 ] + +# CHECK: name: _myGlobalWeak +# CHECK: scope: global +# CHECK: content: [ 90, C3 ] +# CHECK: merge: as-weak + +# CHECK: name: _myHidden +# CHECK: scope: hidden +# CHECK: content: [ 90, 90, C3 ] + +# CHECK: name: _myHiddenWeak +# CHECK: scope: hidden +# CHECK: content: [ 90, 90, 90, C3 ] +# CHECK: merge: as-weak + +# CHECK: name: _myStatic +# CHECK-NOT: scope: global +# CHECK-NOT: scope: hidden +# CHECK: content: [ 90, 90, 90, 90, C3 ] + +# CHECK: name: _myStripNot +# CHECK: scope: global +# CHECK: content: [ CC ] +# CHECK: dead-strip: never + +# CHECK: name: _myResolver +# CHECK: scope: global +# CHECK: type: resolver +# CHECK: content: [ 31, C0, C3 ] + diff --git a/lld/test/mach-o/parse-initializers32.yaml b/lld/test/mach-o/parse-initializers32.yaml new file mode 100644 index 000000000000..5f243198050e --- /dev/null +++ b/lld/test/mach-o/parse-initializers32.yaml @@ -0,0 +1,84 @@ +# RUN: ld64.lld.darwinold -arch i386 -r -print_atoms %s -o %t | FileCheck %s +# +# Test parsing of literal sections. +# + +--- !mach-o +arch: x86 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x89, 0xE5, 0x5D, 0xC3, 0x55, 0x89, 0xE5, + 0x5D, 0xC3, 0x55, 0x89, 0xE5, 0x5D, 0xC3 ] + - segment: __DATA + section: __mod_init_func + type: S_MOD_INIT_FUNC_POINTERS + attributes: [ ] + alignment: 2 + address: 0x0000000000000044 + content: [ 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000000 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000004 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + extern: false + symbol: 1 + - segment: __DATA + section: __mod_term_func + type: S_MOD_TERM_FUNC_POINTERS + attributes: [ ] + alignment: 2 + address: 0x0000000000000104 + content: [ 0x0A, 0x00, 0x00, 0x00 ] +global-symbols: + - name: _init + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: _init2 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000005 + - name: _term + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x000000000000000A +... + + +# CHECK:defined-atoms: +# CHECK: - type: initializer-pointer +# CHECK: content: [ 00, 00, 00, 00 ] +# CHECK: dead-strip: never +# CHECK: - type: initializer-pointer +# CHECK: content: [ 05, 00, 00, 00 ] +# CHECK: dead-strip: never +# CHECK: - type: terminator-pointer +# CHECK: content: [ 0A, 00, 00, 00 ] +# CHECK: dead-strip: never +# CHECK: - name: _init +# CHECK: scope: global +# CHECK: content: [ 55, 89, E5, 5D, C3 ] +# CHECK: - name: _init2 +# CHECK: scope: global +# CHECK: content: [ 55, 89, E5, 5D, C3 ] +# CHECK: - name: _term +# CHECK: scope: global +# CHECK: content: [ 55, 89, E5, 5D, C3 ] diff --git a/lld/test/mach-o/parse-initializers64.yaml b/lld/test/mach-o/parse-initializers64.yaml new file mode 100644 index 000000000000..8d1503ba6665 --- /dev/null +++ b/lld/test/mach-o/parse-initializers64.yaml @@ -0,0 +1,105 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s +# +# Test parsing of literal sections. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x5D, 0xC3, 0x55, 0x48, + 0x89, 0xE5, 0x5D, 0xC3, 0x55, 0x48, 0x89, 0xE5, + 0x5D, 0xC3 ] + - segment: __DATA + section: __mod_init_func + type: S_MOD_INIT_FUNC_POINTERS + attributes: [ ] + alignment: 1 + address: 0x0000000000000100 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000000 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 0 + - offset: 0x00000008 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 1 + - segment: __DATA + section: __mod_term_func + type: S_MOD_TERM_FUNC_POINTERS + attributes: [ ] + alignment: 8 + address: 0x0000000000000108 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000000 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 2 +global-symbols: + - name: _init + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: _init2 + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000006 + - name: _term + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x000000000000000C +... + + +# CHECK:defined-atoms: +# CHECK: - type: initializer-pointer +# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00 ] +# CHECK: dead-strip: never +# CHECK: references: +# CHECK: - kind: pointer64 +# CHECK: offset: 0 +# CHECK: target: _init +# CHECK: - type: initializer-pointer +# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00 ] +# CHECK: dead-strip: never +# CHECK: references: +# CHECK: - kind: pointer64 +# CHECK: offset: 0 +# CHECK: target: _init2 +# CHECK: - type: terminator-pointer +# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00 ] +# CHECK: dead-strip: never +# CHECK: references: +# CHECK: - kind: pointer64 +# CHECK: offset: 0 +# CHECK: target: _term +# CHECK: - name: _init +# CHECK: scope: global +# CHECK: content: [ 55, 48, 89, E5, 5D, C3 ] +# CHECK: - name: _init2 +# CHECK: scope: global +# CHECK: content: [ 55, 48, 89, E5, 5D, C3 ] +# CHECK: - name: _term +# CHECK: scope: global +# CHECK: content: [ 55, 48, 89, E5, 5D, C3 ] diff --git a/lld/test/mach-o/parse-literals-error.yaml b/lld/test/mach-o/parse-literals-error.yaml new file mode 100644 index 000000000000..b426c819422f --- /dev/null +++ b/lld/test/mach-o/parse-literals-error.yaml @@ -0,0 +1,25 @@ +# RUN: not ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t 2> %t.err +# RUN: FileCheck %s < %t.err +# +# Test for error if literal section is not correct size multiple. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __literal8 + type: S_8BYTE_LITERALS + attributes: [ ] + alignment: 0 + address: 0x0000000000000120 + content: [ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D ] +... + +# CHECK: error: + diff --git a/lld/test/mach-o/parse-literals.yaml b/lld/test/mach-o/parse-literals.yaml new file mode 100644 index 000000000000..8792e9524206 --- /dev/null +++ b/lld/test/mach-o/parse-literals.yaml @@ -0,0 +1,93 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s +# +# Test parsing of literal sections. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __cstring + type: S_CSTRING_LITERALS + attributes: [ ] + alignment: 1 + address: 0x0000000000000100 + content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x00, 0x74, 0x68, + 0x65, 0x72, 0x65, 0x00, 0x77, 0x6F, 0x72, 0x6C, + 0x00 ] + - segment: __TEXT + section: __literal4 + type: S_4BYTE_LITERALS + attributes: [ ] + alignment: 1 + address: 0x0000000000000114 + content: [ 0x01, 0x02, 0x03, 0x04, 0x11, 0x12, 0x13, 0x14, + 0x28, 0x29, 0x2A, 0x2B ] + - segment: __TEXT + section: __literal8 + type: S_8BYTE_LITERALS + attributes: [ ] + alignment: 1 + address: 0x0000000000000120 + content: [ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F ] + - segment: __TEXT + section: __literal16 + type: S_16BYTE_LITERALS + attributes: [ ] + alignment: 1 + address: 0x0000000000000130 + content: [ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00 ] + - segment: __TEXT + section: __ustring + type: S_REGULAR + attributes: [ ] + alignment: 1 + address: 0x0000000000000100 + content: [ 0x68, 0x00, 0x65, 0x00, 0x6C, 0x00, 0x6C, 0x00, + 0x6F, 0x00, 0x00, 0x00, 0x74, 0x00, 0x68, 0x00, + 0x65, 0x00, 0x72, 0x00, 0x00, 0x00 ] +... + + +# CHECK:defined-atoms: +# CHECK: - scope: hidden +# CHECK: type: c-string +# CHECK: content: [ 68, 65, 6C, 6C, 6F, 00 ] +# CHECK: - scope: hidden +# CHECK: type: c-string +# CHECK: content: [ 74, 68, 65, 72, 65, 00 ] +# CHECK: - scope: hidden +# CHECK: type: c-string +# CHECK: content: [ 77, 6F, 72, 6C, 00 ] +# CHECK: - scope: hidden +# CHECK: type: utf16-string +# CHECK: content: [ 68, 00, 65, 00, 6C, 00, 6C, 00, 6F, 00, 00, 00 ] +# CHECK: - scope: hidden +# CHECK: type: utf16-string +# CHECK: content: [ 74, 00, 68, 00, 65, 00, 72, 00, 00, 00 ] +# CHECK: - scope: hidden +# CHECK: type: const-4-byte +# CHECK: content: [ 01, 02, 03, 04 ] +# CHECK: - scope: hidden +# CHECK: type: const-4-byte +# CHECK: content: [ 11, 12, 13, 14 ] +# CHECK: - scope: hidden +# CHECK: type: const-4-byte +# CHECK: content: [ 28, 29, 2A, 2B ] +# CHECK: - scope: hidden +# CHECK: type: const-8-byte +# CHECK: content: [ 01, 02, 03, 04, 05, 06, 07, 08 ] +# CHECK: - scope: hidden +# CHECK: type: const-8-byte +# CHECK: content: [ 28, 29, 2A, 2B, 2C, 2D, 2E, 2F ] +# CHECK: - scope: hidden +# CHECK: type: const-16-byte +# CHECK: content: [ 01, 02, 03, 04, 05, 06, 07, 08, 09, 0A, 0B, 0C, +# CHECK: 0D, 0E, 0F, 00 ] + diff --git a/lld/test/mach-o/parse-non-lazy-pointers.yaml b/lld/test/mach-o/parse-non-lazy-pointers.yaml new file mode 100644 index 000000000000..591c116e00c6 --- /dev/null +++ b/lld/test/mach-o/parse-non-lazy-pointers.yaml @@ -0,0 +1,98 @@ +# RUN: ld64.lld.darwinold -arch i386 -r -print_atoms %s -o %t | FileCheck %s +# +# Test parsing of non-lazy-pointer sections. +# + +--- !mach-o +arch: x86 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x89, 0xE5, 0xE8, 0x00, 0x00, 0x00, 0x00, + 0x59, 0x8D, 0x81, 0x14, 0x00, 0x00, 0x00, 0x8D, + 0x81, 0x18, 0x00, 0x00, 0x00, 0x5D, 0xC3, 0x55, + 0x89, 0xE5, 0x5D, 0xC3 ] + relocations: + - offset: 0x00000011 + scattered: true + type: GENERIC_RELOC_LOCAL_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000020 + - offset: 0x00000000 + scattered: true + type: GENERIC_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x00000008 + - offset: 0x0000000B + scattered: true + type: GENERIC_RELOC_LOCAL_SECTDIFF + length: 2 + pc-rel: false + value: 0x0000001C + - offset: 0x00000000 + scattered: true + type: GENERIC_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x00000008 + - segment: __IMPORT + section: __pointers + type: S_NON_LAZY_SYMBOL_POINTERS + attributes: [ ] + address: 0x000000000000001C + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + indirect-syms: [ 2, 2147483648 ] +local-symbols: + - name: _foo + type: N_SECT + sect: 1 + value: 0x0000000000000017 +global-symbols: + - name: _get + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _bar + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + + +# CHECK:defined-atoms: +# CHECK: - ref-name: [[GOT1:L[L0-9]+]] +# CHECK: scope: hidden +# CHECK: type: got +# CHECK: content: [ 00, 00, 00, 00 ] +# CHECK: merge: by-content +# CHECK: - ref-name: [[GOT2:L[L0-9]+]] +# CHECK: scope: hidden +# CHECK: type: got +# CHECK: content: [ 00, 00, 00, 00 ] +# CHECK: merge: by-content +# CHECK: - name: _get +# CHECK: scope: global +# CHECK: content: [ 55, 89, E5, E8, 00, 00, 00, 00, 59, 8D, 81, 14, +# CHECK: 00, 00, 00, 8D, 81, 18, 00, 00, 00, 5D, C3 ] +# CHECK: references: +# CHECK: - kind: funcRel32 +# CHECK: offset: 11 +# CHECK: target: [[GOT1]] +# CHECK: - kind: funcRel32 +# CHECK: offset: 17 +# CHECK: target: [[GOT2]] +# CHECK: - name: _foo +# CHECK: content: [ 55, 89, E5, 5D, C3 ] + + diff --git a/lld/test/mach-o/parse-relocs-x86.yaml b/lld/test/mach-o/parse-relocs-x86.yaml new file mode 100644 index 000000000000..b1bd6199ff0a --- /dev/null +++ b/lld/test/mach-o/parse-relocs-x86.yaml @@ -0,0 +1,296 @@ +# RUN: ld64.lld.darwinold -arch i386 -r -print_atoms %s -o %t | FileCheck %s \ +# RUN: && ld64.lld.darwinold -arch i386 -r -print_atoms %t -o %t2 | FileCheck %s +# +# Test parsing and writing of x86 relocations. +# +# The first step tests if the supplied mach-o file is parsed into the correct +# set of references. The second step verifies relocations can be round-tripped +# by writing to a new .o file, then parsing that file which should result in +# the same references. +# +# .text +#_test: +# call _undef +# call _undef+2 +# call _foo +# call _foo+2 +# callw _undef +# callw _foo +# callw _foo+2 +#L1: +# movl _undef, %eax +# movl _x, %eax +# movl _x+4, %eax +# movl _x-L1(%eax), %eax +# movl _x+4-L1(%eax), %eax +# +#_foo: +# ret +# +# .data +#_x: +# .long _undef +# .long _undef+7 +# .long _foo +# .long _foo+3 +# .long _test - . +# .long _test+3 - . +# + +--- !mach-o +arch: x86 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0xE8, 0xFB, 0xFF, 0xFF, 0xFF, 0xE8, 0xF8, 0xFF, + 0xFF, 0xFF, 0xE8, 0x2C, 0x00, 0x00, 0x00, 0xE8, + 0x29, 0x00, 0x00, 0x00, 0x66, 0xE8, 0xE8, 0xFF, + 0x66, 0xE8, 0x1F, 0x00, 0x66, 0xE8, 0x1D, 0x00, + 0xA1, 0x00, 0x00, 0x00, 0x00, 0xA1, 0x3C, 0x00, + 0x00, 0x00, 0xA1, 0x40, 0x00, 0x00, 0x00, 0x8B, + 0x80, 0x1C, 0x00, 0x00, 0x00, 0x8B, 0x80, 0x20, + 0x00, 0x00, 0x00, 0xC3 ] + relocations: + - offset: 0x00000037 + scattered: true + type: GENERIC_RELOC_LOCAL_SECTDIFF + length: 2 + pc-rel: false + value: 0x0000003C + - offset: 0x00000000 + scattered: true + type: GENERIC_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x00000020 + - offset: 0x00000031 + scattered: true + type: GENERIC_RELOC_LOCAL_SECTDIFF + length: 2 + pc-rel: false + value: 0x0000003C + - offset: 0x00000000 + scattered: true + type: GENERIC_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x00000020 + - offset: 0x0000002B + scattered: true + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + value: 0x0000003C + - offset: 0x00000026 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + extern: false + symbol: 2 + - offset: 0x00000021 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + extern: true + symbol: 3 + - offset: 0x0000001E + scattered: true + type: GENERIC_RELOC_VANILLA + length: 1 + pc-rel: true + value: 0x0000003B + - offset: 0x0000001A + type: GENERIC_RELOC_VANILLA + length: 1 + pc-rel: true + extern: false + symbol: 1 + - offset: 0x00000016 + type: GENERIC_RELOC_VANILLA + length: 1 + pc-rel: true + extern: true + symbol: 3 + - offset: 0x00000010 + scattered: true + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: true + value: 0x0000003B + - offset: 0x0000000B + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: true + extern: false + symbol: 1 + - offset: 0x00000006 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: true + extern: true + symbol: 3 + - offset: 0x00000001 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: true + extern: true + symbol: 3 + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + address: 0x000000000000003C + content: [ 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, + 0xB4, 0xFF, 0xFF, 0xFF, 0xB3, 0xFF, 0xFF, 0xFF ] + relocations: + - offset: 0x00000014 + scattered: true + type: GENERIC_RELOC_LOCAL_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000000 + - offset: 0x00000000 + scattered: true + type: GENERIC_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x00000050 + - offset: 0x00000010 + scattered: true + type: GENERIC_RELOC_LOCAL_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000000 + - offset: 0x00000000 + scattered: true + type: GENERIC_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x0000004C + - offset: 0x0000000C + scattered: true + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + value: 0x0000003B + - offset: 0x00000008 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000004 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + extern: true + symbol: 3 + - offset: 0x00000000 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + extern: true + symbol: 3 +local-symbols: + - name: _test + type: N_SECT + sect: 1 + value: 0x0000000000000000 + - name: _foo + type: N_SECT + sect: 1 + value: 0x000000000000003B + - name: _x + type: N_SECT + sect: 2 + value: 0x000000000000003C +undefined-symbols: + - name: _undef + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + +# CHECK: defined-atoms: +# CHECK: - name: _x +# CHECK: type: data +# CHECK: references: +# CHECK: - kind: pointer32 +# CHECK: offset: 0 +# CHECK: target: _undef +# CHECK-NOT: addend: +# CHECK: - kind: pointer32 +# CHECK: offset: 4 +# CHECK: target: _undef +# CHECK: addend: 7 +# CHECK: - kind: pointer32 +# CHECK: offset: 8 +# CHECK: target: _foo +# CHECK-NOT: addend: +# CHECK: - kind: pointer32 +# CHECK: offset: 12 +# CHECK: target: _foo +# CHECK: addend: 3 +# CHECK: - kind: delta32 +# CHECK: offset: 16 +# CHECK: target: _test +# CHECK: - kind: delta32 +# CHECK: offset: 20 +# CHECK: target: _test +# CHECK: addend: 3 +# CHECK: - name: _test +# CHECK: references: +# CHECK: - kind: branch32 +# CHECK: offset: 1 +# CHECK: target: _undef +# CHECK-NOT: addend: +# CHECK: - kind: branch32 +# CHECK: offset: 6 +# CHECK: target: _undef +# CHECK: addend: 2 +# CHECK: - kind: branch32 +# CHECK: offset: 11 +# CHECK: target: _foo +# CHECK-NOT: addend: +# CHECK: - kind: branch32 +# CHECK: offset: 16 +# CHECK: target: _foo +# CHECK: addend: 2 +# CHECK: - kind: branch16 +# CHECK: offset: 22 +# CHECK: target: _undef +# CHECK-NOT: addend: +# CHECK: - kind: branch16 +# CHECK: offset: 26 +# CHECK: target: _foo +# CHECK-NOT: addend: +# CHECK: - kind: branch16 +# CHECK: offset: 30 +# CHECK: target: _foo +# CHECK: addend: 2 +# CHECK: - kind: abs32 +# CHECK: offset: 33 +# CHECK: target: _undef +# CHECK: - kind: abs32 +# CHECK: offset: 38 +# CHECK: target: _x +# CHECK: - kind: abs32 +# CHECK: offset: 43 +# CHECK: target: _x +# CHECK: addend: 4 +# CHECK: - kind: funcRel32 +# CHECK: offset: 49 +# CHECK: target: _x +# CHECK: addend: -32 +# CHECK: - kind: funcRel32 +# CHECK: offset: 55 +# CHECK: target: _x +# CHECK: addend: -28 + diff --git a/lld/test/mach-o/parse-section-no-symbol.yaml b/lld/test/mach-o/parse-section-no-symbol.yaml new file mode 100644 index 000000000000..a1747e97c126 --- /dev/null +++ b/lld/test/mach-o/parse-section-no-symbol.yaml @@ -0,0 +1,23 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r %s -print_atoms -o %t2 | FileCheck %s +# +# Test parsing of mach-o functions with no symbols at all. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 4 + address: 0x0000000000000000 + content: [ 0xCC ] +... + +# CHECK-NOT: name: +# CHECK: content: [ CC ] diff --git a/lld/test/mach-o/parse-tentative-defs.yaml b/lld/test/mach-o/parse-tentative-defs.yaml new file mode 100644 index 000000000000..345038b00859 --- /dev/null +++ b/lld/test/mach-o/parse-tentative-defs.yaml @@ -0,0 +1,88 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s +# +# Test parsing of tentative definitions, including size, scope, and alignment. +# +# +# int tent4; +# long tent8; +# __attribute__((visibility("hidden"))) int tentHidden; +# __attribute__((aligned(16))) int tent4_16; +# __attribute__((aligned(32))) long tent64_32[8]; +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __tex + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS ] + address: 0x0000000000000000 +undefined-symbols: + - name: _tent4 + type: N_UNDF + scope: [ N_EXT ] + desc: 0x0200 + value: 0x0000000000000004 + - name: _tent4_16 + type: N_UNDF + scope: [ N_EXT ] + desc: 0x0400 + value: 0x0000000000000004 + - name: _tent64_32 + type: N_UNDF + scope: [ N_EXT ] + desc: 0x0500 + value: 0x0000000000000040 + - name: _tent8 + type: N_UNDF + scope: [ N_EXT ] + desc: 0x0300 + value: 0x0000000000000008 + - name: _tentHidden + type: N_UNDF + scope: [ N_EXT, N_PEXT ] + desc: 0x0200 + value: 0x0000000000000004 +... + + +# CHECK: defined-atoms: +# CHECK: name: _tent4 +# CHECK: scope: global +# CHECK: type: zero-fill +# CHECK: size: 4 +# CHECK: merge: as-tentative +# CHECK: alignment: 4 + +# CHECK: name: _tent4_16 +# CHECK: scope: global +# CHECK: type: zero-fill +# CHECK: size: 4 +# CHECK: merge: as-tentative +# CHECK: alignment: 16 + +# CHECK: name: _tent64_32 +# CHECK: scope: global +# CHECK: type: zero-fill +# CHECK: size: 64 +# CHECK: merge: as-tentative +# CHECK: alignment: 32 + +# CHECK: name: _tent8 +# CHECK: scope: global +# CHECK: type: zero-fill +# CHECK: size: 8 +# CHECK: merge: as-tentative +# CHECK: alignment: 8 + +# CHECK: name: _tentHidden +# CHECK: scope: hidden +# CHECK: type: zero-fill +# CHECK: size: 4 +# CHECK: merge: as-tentative +# CHECK: alignment: 4 diff --git a/lld/test/mach-o/parse-text-relocs-arm64.yaml b/lld/test/mach-o/parse-text-relocs-arm64.yaml new file mode 100644 index 000000000000..1c0922bd29f9 --- /dev/null +++ b/lld/test/mach-o/parse-text-relocs-arm64.yaml @@ -0,0 +1,237 @@ +# RUN: ld64.lld.darwinold -arch arm64 -r -print_atoms %s -o %t | FileCheck %s \ +# RUN: && ld64.lld.darwinold -arch arm64 -r -print_atoms %t -o %t2 | FileCheck %s +# +# Test parsing and writing of arm64 text relocations. +# +# The first step tests if the supplied mach-o file is parsed into the correct +# set of references. The second step verifies relocations can be round-tripped +# by writing to a new .o file, then parsing that file which should result in +# the same references. +# +#_test: + + +--- !mach-o +arch: arm64 +file-type: MH_OBJECT +flags: [ ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, 0x94, + 0x01, 0x00, 0x00, 0x90, 0x20, 0x00, 0x40, 0x39, + 0x20, 0x00, 0x40, 0x79, 0x20, 0x00, 0x40, 0xB9, + 0x20, 0x00, 0x40, 0xF9, 0x20, 0x00, 0xC0, 0x3D, + 0x01, 0x00, 0x00, 0x90, 0x20, 0x00, 0x40, 0xB9, + 0x01, 0x00, 0x00, 0x90, 0x20, 0x00, 0x40, 0xF9, + 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x40, 0xF9 ] + relocations: + - offset: 0x00000034 + type: ARM64_RELOC_TLVP_LOAD_PAGEOFF12 + length: 2 + pc-rel: false + extern: true + symbol: 5 + - offset: 0x00000030 + type: ARM64_RELOC_TLVP_LOAD_PAGE21 + length: 2 + pc-rel: true + extern: true + symbol: 5 + - offset: 0x0000002C + type: ARM64_RELOC_GOT_LOAD_PAGEOFF12 + length: 2 + pc-rel: false + extern: true + symbol: 6 + - offset: 0x00000028 + type: ARM64_RELOC_GOT_LOAD_PAGE21 + length: 2 + pc-rel: true + extern: true + symbol: 6 + - offset: 0x00000024 + type: ARM64_RELOC_ADDEND + length: 2 + pc-rel: false + extern: false + symbol: 16 + - offset: 0x00000024 + type: ARM64_RELOC_PAGEOFF12 + length: 2 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000020 + type: ARM64_RELOC_ADDEND + length: 2 + pc-rel: false + extern: false + symbol: 16 + - offset: 0x00000020 + type: ARM64_RELOC_PAGE21 + length: 2 + pc-rel: true + extern: true + symbol: 2 + - offset: 0x0000001C + type: ARM64_RELOC_PAGEOFF12 + length: 2 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000018 + type: ARM64_RELOC_PAGEOFF12 + length: 2 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000014 + type: ARM64_RELOC_PAGEOFF12 + length: 2 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000010 + type: ARM64_RELOC_PAGEOFF12 + length: 2 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x0000000C + type: ARM64_RELOC_PAGEOFF12 + length: 2 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000008 + type: ARM64_RELOC_PAGE21 + length: 2 + pc-rel: true + extern: true + symbol: 2 + - offset: 0x00000004 + type: ARM64_RELOC_ADDEND + length: 2 + pc-rel: false + extern: false + symbol: 8 + - offset: 0x00000004 + type: ARM64_RELOC_BRANCH26 + length: 2 + pc-rel: true + extern: true + symbol: 4 + - offset: 0x00000000 + type: ARM64_RELOC_BRANCH26 + length: 2 + pc-rel: true + extern: true + symbol: 4 + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + alignment: 2 + address: 0x0000000000000038 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] +local-symbols: + - name: ltmp0 + type: N_SECT + sect: 1 + value: 0x0000000000000000 + - name: _func + type: N_SECT + sect: 1 + value: 0x0000000000000000 + - name: _v1 + type: N_SECT + sect: 2 + value: 0x0000000000000038 + - name: ltmp1 + type: N_SECT + sect: 2 + value: 0x0000000000000038 +undefined-symbols: + - name: _foo + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _tlv + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _v2 + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + +# CHECK: defined-atoms: +# CHECK: - name: _v1 +# CHECK: type: data +# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, +# CHECK: 00, 00, 00, 00 ] +# CHECK: - name: _func +# CHECK: content: [ 00, 00, 00, 94, 00, 00, 00, 94, 01, 00, 00, 90, +# CHECK: 20, 00, 40, 39, 20, 00, 40, 79, 20, 00, 40, B9, +# CHECK: 20, 00, 40, F9, 20, 00, C0, 3D, 01, 00, 00, 90, +# CHECK: 20, 00, 40, B9, 01, 00, 00, 90, 20, 00, 40, F9, +# CHECK: 00, 00, 00, 90, 00, 00, 40, F9 ] +# CHECK: references: +# CHECK: - kind: branch26 +# CHECK: offset: 0 +# CHECK: target: _foo +# CHECK: - kind: branch26 +# CHECK: offset: 4 +# CHECK: target: _foo +# CHECK: addend: 8 +# CHECK: - kind: page21 +# CHECK: offset: 8 +# CHECK: target: _v1 +# CHECK: - kind: offset12 +# CHECK: offset: 12 +# CHECK: target: _v1 +# CHECK: - kind: offset12scale2 +# CHECK: offset: 16 +# CHECK: target: _v1 +# CHECK: - kind: offset12scale4 +# CHECK: offset: 20 +# CHECK: target: _v1 +# CHECK: - kind: offset12scale8 +# CHECK: offset: 24 +# CHECK: target: _v1 +# CHECK: - kind: offset12scale16 +# CHECK: offset: 28 +# CHECK: target: _v1 +# CHECK: - kind: page21 +# CHECK: offset: 32 +# CHECK: target: _v1 +# CHECK: addend: 16 +# CHECK: - kind: offset12scale4 +# CHECK: offset: 36 +# CHECK: target: _v1 +# CHECK: addend: 16 +# CHECK: - kind: gotPage21 +# CHECK: offset: 40 +# CHECK: target: _v2 +# CHECK: - kind: gotOffset12 +# CHECK: offset: 44 +# CHECK: target: _v2 +# CHECK: - kind: tlvPage21 +# CHECK: offset: 48 +# CHECK: target: _tlv +# CHECK: - kind: tlvOffset12 +# CHECK: offset: 52 +# CHECK: target: _tlv +# CHECK: undefined-atoms: +# CHECK: - name: _foo +# CHECK: - name: _tlv +# CHECK: - name: _v2 + diff --git a/lld/test/mach-o/parse-text-relocs-x86_64.yaml b/lld/test/mach-o/parse-text-relocs-x86_64.yaml new file mode 100644 index 000000000000..d5ce60dca2e9 --- /dev/null +++ b/lld/test/mach-o/parse-text-relocs-x86_64.yaml @@ -0,0 +1,204 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s \ +# RUN: && ld64.lld.darwinold -arch x86_64 -r -print_atoms %t -o %t2 | FileCheck %s +# +# Test parsing and writing of x86_64 text relocations. +# +# The first step tests if the supplied mach-o file is parsed into the correct +# set of references. The second step verifies relocations can be round-tripped +# by writing to a new .o file, then parsing that file which should result in +# the same references. +# +#_test: +# call _foo +# call _foo+4 +# movq _foo@GOTPCREL(%rip), %rax +# pushq _foo@GOTPCREL(%rip) +# movl _foo(%rip), %eax +# movl _foo+4(%rip), %eax +# movb $0x12, _foo(%rip) +# movw $0x1234, _foo(%rip) +# movl $0x12345678, _foo(%rip) +# movl L2(%rip), %eax +# movb $0x12, L2(%rip) +# movw $0x1234, L2(%rip) +# movl $0x12345678, L2(%rip) +# +# .data +#L2: .long 0 + + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0xE8, 0x00, 0x00, 0x00, 0x00, 0xE8, 0x04, 0x00, + 0x00, 0x00, 0x48, 0x8B, 0x05, 0x04, 0x00, 0x00, + 0x00, 0xFF, 0x35, 0x04, 0x00, 0x00, 0x00, 0x8B, + 0x05, 0x00, 0x00, 0x00, 0x00, 0x8B, 0x05, 0x04, + 0x00, 0x00, 0x00, 0xC6, 0x05, 0xFF, 0xFF, 0xFF, + 0xFF, 0x12, 0x66, 0xC7, 0x05, 0xFE, 0xFF, 0xFF, + 0xFF, 0x34, 0x12, 0xC7, 0x05, 0xFC, 0xFF, 0xFF, + 0xFF, 0x78, 0x56, 0x34, 0x12, 0x8B, 0x05, 0x1A, + 0x00, 0x00, 0x00, 0xc6, 0x05, 0x13, 0x00, 0x00, + 0x00, 0x12, 0x66, 0xc7, 0x05, 0x0a, 0x00, 0x00, + 0x00, 0x34, 0x12, 0xc7, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x78, 0x56, 0x34, 0x12 ] + relocations: + - offset: 0x00000055 + type: X86_64_RELOC_SIGNED_4 + length: 2 + pc-rel: true + extern: false + symbol: 2 + - offset: 0x0000004d + type: X86_64_RELOC_SIGNED_2 + length: 2 + pc-rel: true + extern: false + symbol: 2 + - offset: 0x00000045 + type: X86_64_RELOC_SIGNED_1 + length: 2 + pc-rel: true + extern: false + symbol: 2 + - offset: 0x0000003F + type: X86_64_RELOC_SIGNED + length: 2 + pc-rel: true + extern: false + symbol: 2 + - offset: 0x00000035 + type: X86_64_RELOC_SIGNED_4 + length: 2 + pc-rel: true + extern: true + symbol: 1 + - offset: 0x0000002D + type: X86_64_RELOC_SIGNED_2 + length: 2 + pc-rel: true + extern: true + symbol: 1 + - offset: 0x00000025 + type: X86_64_RELOC_SIGNED_1 + length: 2 + pc-rel: true + extern: true + symbol: 1 + - offset: 0x0000001F + type: X86_64_RELOC_SIGNED + length: 2 + pc-rel: true + extern: true + symbol: 1 + - offset: 0x00000019 + type: X86_64_RELOC_SIGNED + length: 2 + pc-rel: true + extern: true + symbol: 1 + - offset: 0x00000013 + type: X86_64_RELOC_GOT + length: 2 + pc-rel: true + extern: true + symbol: 1 + - offset: 0x0000000D + type: X86_64_RELOC_GOT_LOAD + length: 2 + pc-rel: true + extern: true + symbol: 1 + - offset: 0x00000006 + type: X86_64_RELOC_BRANCH + length: 2 + pc-rel: true + extern: true + symbol: 1 + - offset: 0x00000001 + type: X86_64_RELOC_BRANCH + length: 2 + pc-rel: true + extern: true + symbol: 1 + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + address: 0x000000000000005D + content: [ 0x00, 0x00, 0x00, 0x00 ] +local-symbols: + - name: _test + type: N_SECT + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _foo + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + +# CHECK: defined-atoms: +# CHECK: - ref-name: [[LABEL:L[0-9]+]] +# CHECK: type: data +# CHECK: content: [ 00, 00, 00, 00 ] +# CHECK: - name: _test +# CHECK: references: +# CHECK: - kind: branch32 +# CHECK: offset: 1 +# CHECK: target: _foo +# CHECK: - kind: branch32 +# CHECK: offset: 6 +# CHECK: target: _foo +# CHECK: addend: 4 +# CHECK: - kind: ripRel32GotLoad +# CHECK: offset: 13 +# CHECK: target: _foo +# CHECK: addend: 4 +# CHECK: - kind: ripRel32Got +# CHECK: offset: 19 +# CHECK: target: _foo +# CHECK: addend: 4 +# CHECK: - kind: ripRel32 +# CHECK: offset: 25 +# CHECK: target: _foo +# CHECK: - kind: ripRel32 +# CHECK: offset: 31 +# CHECK: target: _foo +# CHECK: addend: 4 +# CHECK: - kind: ripRel32Minus1 +# CHECK: offset: 37 +# CHECK: target: _foo +# CHECK-NOT: addend: +# CHECK: - kind: ripRel32Minus2 +# CHECK: offset: 45 +# CHECK: target: _foo +# CHECK-NOT: addend: +# CHECK: - kind: ripRel32Minus4 +# CHECK: offset: 53 +# CHECK: target: _foo +# CHECK-NOT: addend: +# CHECK: - kind: ripRel32Anon +# CHECK: offset: 63 +# CHECK: target: [[LABEL]] +# CHECK-NOT: addend: +# CHECK: - kind: ripRel32Minus1Anon +# CHECK: offset: 69 +# CHECK: target: [[LABEL]] +# CHECK-NOT: addend: +# CHECK: - kind: ripRel32Minus2Anon +# CHECK: offset: 77 +# CHECK: target: [[LABEL]] +# CHECK-NOT: addend: +# CHECK: - kind: ripRel32Minus4Anon +# CHECK: offset: 85 +# CHECK: target: [[LABEL]] +# CHECK-NOT: addend: diff --git a/lld/test/mach-o/parse-tlv-relocs-x86-64.yaml b/lld/test/mach-o/parse-tlv-relocs-x86-64.yaml new file mode 100644 index 000000000000..7c968dc094a7 --- /dev/null +++ b/lld/test/mach-o/parse-tlv-relocs-x86-64.yaml @@ -0,0 +1,100 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s \ +# RUN: && ld64.lld.darwinold -arch x86_64 -r -print_atoms %t -o %t2 | FileCheck %s +# +# Test parsing of x86_64 tlv relocations. + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 16 + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x48, 0x8B, 0x3D, 0x00, + 0x00, 0x00, 0x00, 0xFF, 0x17, 0x8B, 0x00, 0x5D, + 0xC3 ] + relocations: + - offset: 0x00000007 + type: X86_64_RELOC_TLV + length: 2 + pc-rel: true + extern: true + symbol: 2 + - segment: __DATA + section: __thread_data + type: S_THREAD_LOCAL_REGULAR + attributes: [ ] + alignment: 4 + address: 0x0000000000000014 + content: [ 0x07, 0x00, 0x00, 0x00 ] + - segment: __DATA + section: __thread_vars + type: S_THREAD_LOCAL_VARIABLES + attributes: [ ] + address: 0x0000000000000018 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000010 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 0 + - offset: 0x00000000 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 3 +local-symbols: + - name: '_x$tlv$init' + type: N_SECT + sect: 2 + value: 0x0000000000000014 +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: _x + type: N_SECT + scope: [ N_EXT ] + sect: 3 + value: 0x0000000000000018 +undefined-symbols: + - name: __tlv_bootstrap + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +page-size: 0x00000000 +... + +# CHECK: - name: _x +# CHECK-NEXT: scope: global +# CHECK-NEXT: type: tlv-thunk +# CHECK-NOT: - name: +# CHECK: references: +# CHECK-NEXT: - kind: pointer64 +# CHECK-NEXT: offset: 0 +# CHECK-NEXT: target: __tlv_bootstrap +# CHECK-NEXT: - kind: tlvInitSectionOffset +# CHECK-NEXT: offset: 16 +# CHECK-NEXT: target: '_x$tlv$init' +# CHECK: - name: _main +# CHECK-NOT: - name: +# CHECK-NEXT: scope: global +# CHECK: references: +# CHECK-NEXT: - kind: ripRel32Tlv +# CHECK-NEXT: offset: 7 +# CHECK-NEXT: target: _x diff --git a/lld/test/mach-o/re-exported-dylib-ordinal.yaml b/lld/test/mach-o/re-exported-dylib-ordinal.yaml new file mode 100644 index 000000000000..8fb5ac8b9967 --- /dev/null +++ b/lld/test/mach-o/re-exported-dylib-ordinal.yaml @@ -0,0 +1,46 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s \ +# RUN: %p/Inputs/re-exported-dylib-ordinal.yaml \ +# RUN: %p/Inputs/re-exported-dylib-ordinal2.yaml \ +# RUN: %p/Inputs/re-exported-dylib-ordinal3.yaml -dylib -o %t \ +# RUN: && llvm-nm -m %t | FileCheck %s +# +# Test that when one dylib A re-exports dylib B that using a symbol from B +# gets recorded as coming from A. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0x5D, 0xE9, + 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000008 + type: X86_64_RELOC_BRANCH + length: 2 + pc-rel: true + extern: true + symbol: 1 +global-symbols: + - name: _test + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _bar + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + +# CHECK: (undefined) external _bar (from libfoo) +# CHECK: (undefined) external dyld_stub_binder (from libSystem) diff --git a/lld/test/mach-o/rpath.yaml b/lld/test/mach-o/rpath.yaml new file mode 100644 index 000000000000..604eafefb119 --- /dev/null +++ b/lld/test/mach-o/rpath.yaml @@ -0,0 +1,38 @@ +# Check we handle -rpath correctly: +# RUN: ld64.lld.darwinold -arch x86_64 -rpath @loader_path/../Frameworks \ +# RUN: %p/Inputs/x86_64/libSystem.yaml %s -o %t +# RUN: llvm-objdump --private-headers %t | FileCheck %s --check-prefix=CHECK-BINARY-WRITE + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 4 + address: 0x0000000000000000 + content: [ 0xCC, 0xC3, 0x90, 0xC3, 0x90, 0x90, 0xC3, 0x90, + 0x90, 0x90, 0xC3, 0x90, 0x90, 0x90, 0x90, 0xC3, + 0x31, 0xC0, 0xC3 ] +local-symbols: + - name: _myStatic + type: N_SECT + sect: 1 + value: 0x000000000000000B +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000001 +... + + +# CHECK-BINARY-WRITE: cmd LC_RPATH +# CHECK-BINARY-WRITE-NEXT: cmdsize 40 +# CHECK-BINARY-WRITE-NEXT: path @loader_path/../Frameworks (offset 12) diff --git a/lld/test/mach-o/run-tlv-pass-x86-64.yaml b/lld/test/mach-o/run-tlv-pass-x86-64.yaml new file mode 100644 index 000000000000..73b8c37f10af --- /dev/null +++ b/lld/test/mach-o/run-tlv-pass-x86-64.yaml @@ -0,0 +1,144 @@ +# RUN: ld64.lld.darwinold -macosx_version_min 10.7 -arch x86_64 -print_atoms %s -o %t | FileCheck %s +# RUN: not ld64.lld.darwinold -macosx_version_min 10.6 -arch x86_64 -o %t %s 2> %t2 +# RUN: FileCheck < %t2 %s --check-prefix=CHECK-ERROR +# RUN: llvm-objdump --macho --private-headers %t | FileCheck %s --check-prefix=CHECK-LOADCMDS +# +# Test parsing of x86_64 tlv relocations. + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 16 + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x48, 0x8B, 0x3D, 0x00, + 0x00, 0x00, 0x00, 0xFF, 0x17, 0x8B, 0x00, 0x5D, + 0xC3 ] + relocations: + - offset: 0x00000007 + type: X86_64_RELOC_TLV + length: 2 + pc-rel: true + extern: true + symbol: 2 + - segment: __DATA + section: __thread_bss + type: S_THREAD_LOCAL_ZEROFILL + attributes: [ ] + alignment: 4 + address: 0x0000000000000014 + size: 4 + - segment: __DATA + section: __thread_vars + type: S_THREAD_LOCAL_VARIABLES + attributes: [ ] + address: 0x0000000000000018 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000010 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 0 + - offset: 0x00000000 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 3 + - segment: __DATA + section: __dummy + type: S_REGULAR + attributes: [ ] + alignment: 8 + address: 0x00000000000000C0 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] +local-symbols: + - name: '_x$tlv$init' + type: N_SECT + sect: 2 + value: 0x0000000000000014 +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: _x + type: N_SECT + scope: [ N_EXT ] + sect: 3 + value: 0x0000000000000018 + - name: '__tlv_bootstrap' + type: N_SECT + scope: [ N_EXT ] + sect: 4 + value: 0x00000000000000C0 + - name: 'dyld_stub_binder' + type: N_SECT + scope: [ N_EXT ] + sect: 4 + value: 0x00000000000000C8 + - name: 'start' + type: N_SECT + scope: [ N_EXT ] + sect: 4 + value: 0x00000000000000D0 +page-size: 0x00000000 +... + +# CHECK: - name: _x +# CHECK-NEXT: scope: global +# CHECK-NEXT: type: tlv-thunk +# CHECK-NOT: - name: +# CHECK: references: +# CHECK-NEXT: - kind: pointer64 +# CHECK-NEXT: offset: 0 +# CHECK-NEXT: target: __tlv_bootstrap +# CHECK-NEXT: - kind: tlvInitSectionOffset +# CHECK-NEXT: offset: 16 +# CHECK-NEXT: target: '_x$tlv$init' +# CHECK: - name: '_x$tlv$init' +# CHECK-NEXT: type: tlv-zero-fill +# CHECK: - name: _main +# CHECK-NOT: - name: +# CHECK: references: +# CHECK-NEXT: - kind: ripRel32 +# CHECK-NEXT: offset: 7 +# CHECK-NEXT: target: L[[ID:[0-9]+]] +# CHECK: - ref-name: L[[ID]] +# CHECK-NEXT: scope: hidden +# CHECK-NEXT: type: tlv-initializer-ptr +# CHECK-NEXT: content: [ 00, 00, 00, 00, 00, 00, 00, 00 ] +# CHECK-NEXT: alignment: 8 +# CHECK-NEXT: permissions: rw- +# CHECK-NEXT: references: +# CHECK-NEXT: - kind: pointer64 +# CHECK-NEXT: offset: 0 +# CHECK-NEXT: target: _x + +# CHECK-ERROR: targeted OS version does not support use of thread local variables in _main for architecture x86_64 + +# CHECK-LOADCMDS: sectname __thread_bss +# CHECK-LOADCMDS: segname __DATA +# CHECK-LOADCMDS: addr 0x{{[0-9A-F]*}} +# CHECK-LOADCMDS: size 0x0000000000000004 +# CHECK-LOADCMDS: offset 0 +# CHECK-LOADCMDS: align 2^2 (4) +# CHECK-LOADCMDS: reloff 0 +# CHECK-LOADCMDS: nreloc 0 +# CHECK-LOADCMDS: type S_THREAD_LOCAL_ZEROFILL diff --git a/lld/test/mach-o/sdk-version-error.yaml b/lld/test/mach-o/sdk-version-error.yaml new file mode 100644 index 000000000000..c3c497022d84 --- /dev/null +++ b/lld/test/mach-o/sdk-version-error.yaml @@ -0,0 +1,22 @@ +# RUN: not ld64.lld.darwinold -arch x86_64 -sdk_version 10.blah %s -o %t 2>&1 | FileCheck %s --check-prefix=ERROR + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x00, 0x00, 0x00, 0x00 ] +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +... + +# ERROR: malformed sdkVersion value \ No newline at end of file diff --git a/lld/test/mach-o/sectalign.yaml b/lld/test/mach-o/sectalign.yaml new file mode 100644 index 000000000000..2270faff04e4 --- /dev/null +++ b/lld/test/mach-o/sectalign.yaml @@ -0,0 +1,80 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -dylib \ +# RUN: -sectalign __DATA __custom 0x800 -sectalign __TEXT __text 0x400 \ +# RUN: %p/Inputs/x86_64/libSystem.yaml -o %t \ +# RUN: && llvm-readobj --sections %t | FileCheck %s +# +# Test -sectalign option on __text and a custom section. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x8B, 0x05, 0x00, 0x00, + 0x00, 0x00, 0x03, 0x05, 0x00, 0x00, 0x00, 0x00, + 0x5D, 0xC3 ] + relocations: + - offset: 0x0000000C + type: X86_64_RELOC_SIGNED + length: 2 + pc-rel: true + extern: true + symbol: 1 + - offset: 0x00000006 + type: X86_64_RELOC_SIGNED + length: 2 + pc-rel: true + extern: true + symbol: 2 + - segment: __DATA + section: __data + type: S_REGULAR + attributes: [ ] + alignment: 2 + address: 0x0000000000000014 + content: [ 0x0A, 0x00, 0x00, 0x00 ] + - segment: __DATA + section: __custom + type: S_REGULAR + attributes: [ ] + alignment: 2 + address: 0x0000000000000018 + content: [ 0x0A, 0x00, 0x00, 0x00 ] +global-symbols: + - name: _a + type: N_SECT + scope: [ N_EXT ] + sect: 2 + value: 0x0000000000000014 + - name: _b + type: N_SECT + scope: [ N_EXT ] + sect: 3 + value: 0x0000000000000018 + - name: _get + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + +... + + +# CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00) +# CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00) +# CHECK: Address: 0xC00 + +# CHECK: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00) +# CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00) +# CHECK: Address: 0x1000 + +# CHECK: Name: __custom (5F 5F 63 75 73 74 6F 6D 00 00 00 00 00 00 00 00) +# CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00) +# CHECK: Address: 0x1800 + diff --git a/lld/test/mach-o/sectattrs.yaml b/lld/test/mach-o/sectattrs.yaml new file mode 100644 index 000000000000..b2a71720c5eb --- /dev/null +++ b/lld/test/mach-o/sectattrs.yaml @@ -0,0 +1,30 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -dylib \ +# RUN: %p/Inputs/x86_64/libSystem.yaml -o %t \ +# RUN: && llvm-objdump --private-headers %t | FileCheck %s +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x8B, 0x05, 0x00, 0x00, + 0x00, 0x00, 0x03, 0x05, 0x00, 0x00, 0x00, 0x00, + 0x5D, 0xC3 ] +global-symbols: + - name: _get + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + +... + + +# CHECK: PURE_INSTRUCTIONS SOME_INSTRUCTIONS + diff --git a/lld/test/mach-o/sectcreate.yaml b/lld/test/mach-o/sectcreate.yaml new file mode 100644 index 000000000000..1252b492dda5 --- /dev/null +++ b/lld/test/mach-o/sectcreate.yaml @@ -0,0 +1,12 @@ +# RUN: ld64.lld.darwinold -r -arch x86_64 -o %t -sectcreate __DATA __data \ +# RUN: %p/Inputs/hw.raw_bytes -print_atoms | FileCheck %s + +# CHECK: --- !native +# CHECK: path: '' +# CHECK: defined-atoms: +# CHECK: - scope: global +# CHECK: type: sectcreate +# CHECK: content: [ 68, 65, 6C, 6C, 6F, 0A ] +# CHECK: section-choice: custom-required +# CHECK: section-name: '__DATA/__data' +# CHECK: dead-strip: never diff --git a/lld/test/mach-o/seg-protection-arm64.yaml b/lld/test/mach-o/seg-protection-arm64.yaml new file mode 100644 index 000000000000..0a17574ef32b --- /dev/null +++ b/lld/test/mach-o/seg-protection-arm64.yaml @@ -0,0 +1,78 @@ +# RUN: ld64.lld.darwinold -arch arm64 %s %p/Inputs/hello-world-arm64.yaml -o %t && llvm-objdump --private-headers %t | FileCheck %s + +--- !mach-o +arch: arm64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x00, 0x00 ] +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: start + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000001 + +... + +# CHECK: Load command 0 +# CHECK: cmd LC_SEGMENT_64 +# CHECK: cmdsize 72 +# CHECK: segname __PAGEZERO +# CHECK: vmaddr +# CHECK: vmsize +# CHECK: fileoff +# CHECK: filesize +# CHECK: maxprot --- +# CHECK: initprot --- +# CHECK: nsects 0 +# CHECK: flags (none) +# CHECK: Load command 1 +# CHECK: cmd LC_SEGMENT_64 +# CHECK: cmdsize 152 +# CHECK: segname __TEXT +# CHECK: vmaddr +# CHECK: vmsize +# CHECK: fileoff +# CHECK: filesize +# CHECK: maxprot r-x +# CHECK: initprot r-x +# CHECK: nsects 1 +# CHECK: flags (none) +# CHECK: Section +# CHECK: sectname __text +# CHECK: segname __TEXT +# CHECK: addr +# CHECK: size +# CHECK: offset +# CHECK: align 2^0 (1) +# CHECK: reloff 0 +# CHECK: nreloc 0 +# CHECK: type S_REGULAR +# CHECK: attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS +# CHECK: reserved1 0 +# CHECK: reserved2 0 +# CHECK: Load command 2 +# CHECK: cmd LC_SEGMENT_64 +# CHECK: cmdsize 72 +# CHECK: segname __LINKEDIT +# CHECK: vmaddr +# CHECK: vmsize +# CHECK: fileoff +# CHECK: filesize +# CHECK: maxprot r-- +# CHECK: initprot r-- +# CHECK: nsects 0 +# CHECK: flags (none) diff --git a/lld/test/mach-o/seg-protection-x86_64.yaml b/lld/test/mach-o/seg-protection-x86_64.yaml new file mode 100644 index 000000000000..b68929d69e61 --- /dev/null +++ b/lld/test/mach-o/seg-protection-x86_64.yaml @@ -0,0 +1,78 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/hello-world-x86_64.yaml -o %t && llvm-objdump --private-headers %t | FileCheck %s + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x00, 0x00 ] +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: start + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000001 + +... + +# CHECK: Load command 0 +# CHECK: cmd LC_SEGMENT_64 +# CHECK: cmdsize 72 +# CHECK: segname __PAGEZERO +# CHECK: vmaddr +# CHECK: vmsize +# CHECK: fileoff +# CHECK: filesize +# CHECK: maxprot --- +# CHECK: initprot --- +# CHECK: nsects 0 +# CHECK: flags (none) +# CHECK: Load command 1 +# CHECK: cmd LC_SEGMENT_64 +# CHECK: cmdsize 152 +# CHECK: segname __TEXT +# CHECK: vmaddr +# CHECK: vmsize +# CHECK: fileoff +# CHECK: filesize +# CHECK: maxprot rwx +# CHECK: initprot r-x +# CHECK: nsects 1 +# CHECK: flags (none) +# CHECK: Section +# CHECK: sectname __text +# CHECK: segname __TEXT +# CHECK: addr +# CHECK: size +# CHECK: offset +# CHECK: align 2^0 (1) +# CHECK: reloff 0 +# CHECK: nreloc 0 +# CHECK: type S_REGULAR +# CHECK: attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS +# CHECK: reserved1 0 +# CHECK: reserved2 0 +# CHECK: Load command 2 +# CHECK: cmd LC_SEGMENT_64 +# CHECK: cmdsize 72 +# CHECK: segname __LINKEDIT +# CHECK: vmaddr +# CHECK: vmsize +# CHECK: fileoff +# CHECK: filesize +# CHECK: maxprot rwx +# CHECK: initprot r-- +# CHECK: nsects 0 +# CHECK: flags (none) diff --git a/lld/test/mach-o/source-version.yaml b/lld/test/mach-o/source-version.yaml new file mode 100644 index 000000000000..6a19ec6fa0b0 --- /dev/null +++ b/lld/test/mach-o/source-version.yaml @@ -0,0 +1,28 @@ +# RUN: not ld64.lld.darwinold -arch x86_64 -source_version 10.blah %s -o %t 2>&1 | FileCheck %s --check-prefix=ERROR +# RUN: ld64.lld.darwinold -arch x86_64 -source_version 10.1.2.3.4 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml && llvm-objdump --private-headers %t | FileCheck %s + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x00, 0x00, 0x00, 0x00 ] +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +... + +# ERROR: malformed source_version value + +# CHECK: Load command {{[0-9]*}} +# CHECK: cmd LC_SOURCE_VERSION +# CHECK: cmdsize 16 +# CHECK: version 10.1.2.3.4 \ No newline at end of file diff --git a/lld/test/mach-o/stack-size.yaml b/lld/test/mach-o/stack-size.yaml new file mode 100644 index 000000000000..35a1700b7dd5 --- /dev/null +++ b/lld/test/mach-o/stack-size.yaml @@ -0,0 +1,24 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.9 %s -o %t %p/Inputs/x86_64/libSystem.yaml +# RUN: llvm-objdump --private-headers %t | FileCheck --check-prefix=CHECK-DEFAULT %s +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.9 %s -o %t -stack_size 31415926000 %p/Inputs/x86_64/libSystem.yaml +# RUN: llvm-objdump --private-headers %t | FileCheck --check-prefix=CHECK-EXPLICIT %s +# RUN: not ld64.lld.darwinold -arch x86_64 -stack_size 0x31415926530 %s >/dev/null 2> %t +# RUN: FileCheck < %t %s --check-prefix=CHECK-ERROR-MISPAGED +# RUN: not ld64.lld.darwinold -arch x86_64 -stack_size hithere %s >/dev/null 2> %t +# RUN: FileCheck < %t %s --check-prefix=CHECK-ERROR-NOTHEX + +--- !native +defined-atoms: + - name: _main + scope: global + content: [] + +# CHECK-DEFAULT: cmd LC_MAIN +# CHECK-DEFAULT: stacksize 0 + +# CHECK-EXPLICIT: cmd LC_MAIN +# CHECK-EXPLICIT: stacksize 3384796143616 + +# CHECK-ERROR-MISPAGED: error: stack_size must be a multiple of page size (0x1000) + +# CHECK-ERROR-NOTHEX: error: stack_size expects a hex number diff --git a/lld/test/mach-o/string-table.yaml b/lld/test/mach-o/string-table.yaml new file mode 100644 index 000000000000..d22c13945b42 --- /dev/null +++ b/lld/test/mach-o/string-table.yaml @@ -0,0 +1,66 @@ +# RUN: ld64.lld.darwinold -arch i386 %s %p/Inputs/hello-world-x86.yaml -o %t +# RUN: obj2yaml %t | FileCheck %s +# +# Test that the string table contains a ' ' as its first symbol +# + +--- !mach-o +arch: x86 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x89, 0xE5, 0x83, 0xEC, 0x08, 0xE8, 0x00, + 0x00, 0x00, 0x00, 0x58, 0x8D, 0x80, 0x16, 0x00, + 0x00, 0x00, 0x89, 0x04, 0x24, 0xE8, 0xE6, 0xFF, + 0xFF, 0xFF, 0x31, 0xC0, 0x83, 0xC4, 0x08, 0x5D, + 0xC3 ] + relocations: + - offset: 0x00000016 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: true + extern: true + symbol: 1 + - offset: 0x0000000E + scattered: true + type: GENERIC_RELOC_LOCAL_SECTDIFF + length: 2 + pc-rel: false + value: 0x00000021 + - offset: 0x00000000 + scattered: true + type: GENERIC_RELOC_PAIR + length: 2 + pc-rel: false + value: 0x0000000B + - segment: __TEXT + section: __cstring + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x0000000000000021 + content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x00 ] +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _printf + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + +# CHECK: StringTable: +# CHECK-NEXT: - ' ' +# CHECK-NEXT: - __mh_execute_header +# CHECK-NEXT: - _main +# CHECK-NEXT: - _printf +# CHECK-NEXT: - dyld_stub_binder +# CHECK-NEXT: - '' diff --git a/lld/test/mach-o/stub-link.s b/lld/test/mach-o/stub-link.s new file mode 100644 index 000000000000..e0b053605582 --- /dev/null +++ b/lld/test/mach-o/stub-link.s @@ -0,0 +1,21 @@ +# REQUIRES: x86 + +# RUN: mkdir -p %t +# +# RUN: llvm-mc -filetype obj -triple x86_64-apple-darwin %s -o %t/test.o +# RUN: ld64.lld.darwinold -o %t/test -Z -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem %t/test.o +# +# RUN: llvm-objdump --bind --no-show-raw-insn -d -r %t/test | FileCheck %s + +# CHECK: Disassembly of section __TEXT,__text: +# CHECK: movq {{.*}} # [[ADDR:[0-9a-f]+]] + +# CHECK: Bind table: +# CHECK: __DATA __got 0x[[ADDR]] pointer 0 libSystem ___nan + +.section __TEXT,__text +.global _main + +_main: + movq ___nan@GOTPCREL(%rip), %rax + ret diff --git a/lld/test/mach-o/subsections-via-symbols-default.yaml b/lld/test/mach-o/subsections-via-symbols-default.yaml new file mode 100644 index 000000000000..44e11bcc6df7 --- /dev/null +++ b/lld/test/mach-o/subsections-via-symbols-default.yaml @@ -0,0 +1,28 @@ +# RUN: ld64.lld.darwinold -ios_simulator_version_min 5.0 -arch x86_64 -r %s -o %t +# RUN: llvm-readobj --file-headers %t | FileCheck %s + +# Make sure that we have an objc image info in the output. It should have +# been generated by the objc pass. + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +compat-version: 0.0 +current-version: 0.0 +has-UUID: false +OS: unknown +sections: + - segment: __DATA + section: __objc_imageinfo + type: S_REGULAR + attributes: [ S_ATTR_NO_DEAD_STRIP ] + address: 0x0000000000000100 + content: [ 0x00, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00 ] +... + +# The ObjC pass creates a new image info in a new MachoFile internal to the pass. +# Make sure that we still have MH_SUBSECTIONS_VIA_SYMBOLS in the output file, even +# though that file in the ObjCPass didn't get it set from being parsed. + +# CHECK: MH_SUBSECTIONS_VIA_SYMBOLS \ No newline at end of file diff --git a/lld/test/mach-o/twolevel_namespace_undef_dynamic_lookup.yaml b/lld/test/mach-o/twolevel_namespace_undef_dynamic_lookup.yaml new file mode 100644 index 000000000000..cc0e61b7b7d9 --- /dev/null +++ b/lld/test/mach-o/twolevel_namespace_undef_dynamic_lookup.yaml @@ -0,0 +1,17 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.9 -twolevel_namespace -undefined dynamic_lookup %s -o %t %p/Inputs/x86_64/libSystem.yaml +# +# Sanity check '-twolevel_namespace -undefined dynamic_lookup'. +# This should pass without error, even though '_bar' is undefined. + +--- !native +defined-atoms: + - name: _main + scope: global + content: [ E9, 00, 00, 00, 00 ] + alignment: 16 + references: + - kind: branch32 + offset: 1 + target: _bar +undefined-atoms: + - name: _bar diff --git a/lld/test/mach-o/twolevel_namespace_undef_warning_suppress.yaml b/lld/test/mach-o/twolevel_namespace_undef_warning_suppress.yaml new file mode 100644 index 000000000000..1195c090f86a --- /dev/null +++ b/lld/test/mach-o/twolevel_namespace_undef_warning_suppress.yaml @@ -0,0 +1,23 @@ +# RUN: not ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.9 -twolevel_namespace -undefined warning %s -o %t %p/Inputs/x86_64/libSystem.yaml 2>&1 | \ +# RUN: FileCheck --check-prefix=CHECK-WARNING %s +# RUN: not ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.9 -twolevel_namespace -undefined suppress %s -o %t %p/Inputs/x86_64/libSystem.yaml 2>&1 | \ +# RUN: FileCheck --check-prefix=CHECK-SUPPRESS %s + +--- !native +defined-atoms: + - name: _main + scope: global + content: [ E9, 00, 00, 00, 00 ] + alignment: 16 + references: + - kind: branch32 + offset: 1 + target: _bar +undefined-atoms: + - name: _bar + +# Make sure that the driver issues an error diagnostic about this combination +# being invalid. +# +# CHECK-WARNING: can't use -undefined warning or suppress with -twolevel_namespace +# CHECK-SUPPRESS: can't use -undefined warning or suppress with -twolevel_namespace \ No newline at end of file diff --git a/lld/test/mach-o/unwind-info-simple-arm64.yaml b/lld/test/mach-o/unwind-info-simple-arm64.yaml new file mode 100644 index 000000000000..75d817a57989 --- /dev/null +++ b/lld/test/mach-o/unwind-info-simple-arm64.yaml @@ -0,0 +1,267 @@ +# RUN: ld64.lld.darwinold -arch arm64 -o %t %s \ +# RUN: %p/Inputs/unwind-info-simple-arm64.yaml -e _main %p/Inputs/arm64/libSystem.yaml +# RUN: llvm-objdump --unwind-info %t | FileCheck %s + +--- !mach-o +arch: arm64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 2 + address: 0x0000000000000000 + content: [ 0xFD, 0x7B, 0xBF, 0xA9, 0xFD, 0x03, 0x00, 0x91, + 0xE0, 0x03, 0x1E, 0x32, 0x00, 0x00, 0x00, 0x94, + 0x48, 0x01, 0x80, 0x52, 0x08, 0x00, 0x00, 0xB9, + 0x02, 0x00, 0x80, 0xD2, 0x01, 0x00, 0x00, 0x90, + 0x21, 0x00, 0x40, 0xF9, 0x00, 0x00, 0x00, 0x94, + 0xFD, 0x7B, 0xBF, 0xA9, 0xFD, 0x03, 0x00, 0x91, + 0xE0, 0x03, 0x1E, 0x32, 0x00, 0x00, 0x00, 0x94, + 0x48, 0x01, 0x80, 0x52, 0x08, 0x00, 0x00, 0xB9, + 0x02, 0x00, 0x80, 0xD2, 0x01, 0x00, 0x00, 0x90, + 0x21, 0x00, 0x40, 0xF9, 0x00, 0x00, 0x00, 0x94, + 0x3F, 0x04, 0x00, 0x71, 0x81, 0x00, 0x00, 0x54, + 0x00, 0x00, 0x00, 0x94, 0xFD, 0x7B, 0xC1, 0xA8, + 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x94, + 0xFD, 0x7B, 0xBF, 0xA9, 0xFD, 0x03, 0x00, 0x91, + 0x00, 0x00, 0x00, 0x94 ] + relocations: + - offset: 0x00000070 + type: ARM64_RELOC_BRANCH26 + length: 2 + pc-rel: true + extern: true + symbol: 5 + - offset: 0x00000064 + type: ARM64_RELOC_BRANCH26 + length: 2 + pc-rel: true + extern: true + symbol: 7 + - offset: 0x00000060 + type: ARM64_RELOC_BRANCH26 + length: 2 + pc-rel: true + extern: true + symbol: 12 + - offset: 0x00000058 + type: ARM64_RELOC_BRANCH26 + length: 2 + pc-rel: true + extern: true + symbol: 11 + - offset: 0x0000004C + type: ARM64_RELOC_BRANCH26 + length: 2 + pc-rel: true + extern: true + symbol: 13 + - offset: 0x00000048 + type: ARM64_RELOC_GOT_LOAD_PAGEOFF12 + length: 2 + pc-rel: false + extern: true + symbol: 8 + - offset: 0x00000044 + type: ARM64_RELOC_GOT_LOAD_PAGE21 + length: 2 + pc-rel: true + extern: true + symbol: 8 + - offset: 0x00000034 + type: ARM64_RELOC_BRANCH26 + length: 2 + pc-rel: true + extern: true + symbol: 10 + - offset: 0x00000024 + type: ARM64_RELOC_BRANCH26 + length: 2 + pc-rel: true + extern: true + symbol: 13 + - offset: 0x00000020 + type: ARM64_RELOC_GOT_LOAD_PAGEOFF12 + length: 2 + pc-rel: false + extern: true + symbol: 8 + - offset: 0x0000001C + type: ARM64_RELOC_GOT_LOAD_PAGE21 + length: 2 + pc-rel: true + extern: true + symbol: 8 + - offset: 0x0000000C + type: ARM64_RELOC_BRANCH26 + length: 2 + pc-rel: true + extern: true + symbol: 10 + - segment: __TEXT + section: __gcc_except_tab + type: S_REGULAR + attributes: [ ] + alignment: 2 + address: 0x0000000000000074 + content: [ 0xFF, 0x9B, 0xAF, 0x80, 0x00, 0x03, 0x27, 0x00, + 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x01, 0x28, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, + 0xD0, 0xFF, 0xFF, 0xFF ] + relocations: + - offset: 0x00000030 + type: ARM64_RELOC_POINTER_TO_GOT + length: 2 + pc-rel: true + extern: true + symbol: 9 + - segment: __LD + section: __compact_unwind + type: S_REGULAR + attributes: [ ] + alignment: 8 + address: 0x00000000000000A8 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000040 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000038 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 2 + - offset: 0x00000030 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 14 + - offset: 0x00000020 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000000 + type: ARM64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: false + symbol: 1 +local-symbols: + - name: ltmp0 + type: N_SECT + sect: 1 + value: 0x0000000000000000 + - name: ltmp14 + type: N_SECT + sect: 2 + value: 0x0000000000000074 + - name: GCC_except_table1 + type: N_SECT + sect: 2 + value: 0x0000000000000074 + - name: ltmp21 + type: N_SECT + sect: 3 + value: 0x00000000000000A8 +global-symbols: + - name: __Z3barv + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000028 + - name: __Z3foov + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000068 +undefined-symbols: + - name: __Unwind_Resume + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: __ZTIi + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: __ZTIl + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: ___cxa_allocate_exception + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: ___cxa_begin_catch + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: ___cxa_end_catch + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: ___cxa_throw + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: ___gxx_personality_v0 + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + +... + + +# CHECK: Contents of __unwind_info section: +# CHECK: Version: 0x1 +# CHECK: Common encodings array section offset: 0x1c +# CHECK: Number of common encodings in array: 0x0 +# CHECK: Personality function array section offset: 0x1c +# CHECK: Number of personality functions in array: 0x1 +# CHECK: Index array section offset: 0x20 +# CHECK: Number of indices in array: 0x2 +# CHECK: Common encodings: (count = 0) +# CHECK: Personality functions: (count = 1) +# CHECK: personality[1]: 0x00004020 +# CHECK: Top level indices: (count = 2) +# CHECK: [0]: function offset=0x00003e68, 2nd level page offset=0x00000040, LSDA offset=0x00000038 +# CHECK: [1]: function offset=0x00003edc, 2nd level page offset=0x00000000, LSDA offset=0x00000040 +# CHECK: LSDA descriptors: +# CHECK: [0]: function offset=0x00003e90, LSDA offset=0x00003f6c +# CHECK: Second level indices: +# CHECK: Second level index[0]: offset in section=0x00000040, base function offset=0x00003e68 +# CHECK: [0]: function offset=0x00003e68, encoding=0x04000000 +# CHECK: [1]: function offset=0x00003e90, encoding=0x54000000 +# CHECK: [2]: function offset=0x00003ed0, encoding=0x04000000 +# CHECK-NOT: Contents of __compact_unwind section + + + diff --git a/lld/test/mach-o/unwind-info-simple-x86_64.yaml b/lld/test/mach-o/unwind-info-simple-x86_64.yaml new file mode 100644 index 000000000000..a711f4457c2a --- /dev/null +++ b/lld/test/mach-o/unwind-info-simple-x86_64.yaml @@ -0,0 +1,133 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %s -o %t -e _main %p/Inputs/x86_64/libSystem.yaml +# RUN: llvm-objdump --unwind-info %t | FileCheck %s + +# CHECK: Contents of __unwind_info section: +# CHECK: Version: 0x1 +# CHECK: Common encodings array section offset: 0x1c +# CHECK: Number of common encodings in array: 0x0 +# CHECK: Personality function array section offset: 0x1c +# CHECK: Number of personality functions in array: 0x1 +# CHECK: Index array section offset: 0x20 +# CHECK: Number of indices in array: 0x2 +# CHECK: Common encodings: (count = 0) +# CHECK: Personality functions: (count = 1) +# CHECK: personality[1]: 0x00001000 +# CHECK: Top level indices: (count = 2) +# CHECK: [0]: function offset=0x00000efb, 2nd level page offset=0x00000040, LSDA offset=0x00000038 +# CHECK: [1]: function offset=0x00000f00, 2nd level page offset=0x00000000, LSDA offset=0x00000040 +# CHECK: LSDA descriptors: +# CHECK: [0]: function offset=0x00000efb, LSDA offset=0x00000f00 +# CHECK: Second level indices: +# CHECK: Second level index[0]: offset in section=0x00000040, base function offset=0x00000efb +# CHECK: [0]: function offset=0x00000efb, encoding=0x51000000 +# CHECK: [1]: function offset=0x00000efc, encoding=0x01000000 +# CHECK: [2]: function offset=0x00000efd, encoding=0x04000018 +# CHECK: [3]: function offset=0x00000efe, encoding=0x04000040 +# CHECK: [4]: function offset=0x00000eff, encoding=0x00000000 +# CHECK-NOT: Contents of __compact_unwind section + +--- !native +path: '' +defined-atoms: + - name: GCC_except_table1 + type: unwind-lsda + content: [ FF, 9B, A2, 80, 80, 00, 03, 1A, 08, 00, 00, 00, + 05, 00, 00, 00, 1A, 00, 00, 00, 01, 0D, 00, 00, + 00, 64, 00, 00, 00, 00, 00, 00, 00, 00, 01, 00, + 04, 00, 00, 00 ] + - type: compact-unwind + content: [ 40, 00, 00, 00, 00, 00, 00, 00, 01, 00, 00, 00, + 00, 00, 00, 41, 00, 00, 00, 00, 00, 00, 00, 00, + E0, 00, 00, 00, 00, 00, 00, 00 ] + references: + - kind: pointer64Anon + offset: 0 + target: __Z3barv + - kind: pointer64 + offset: 16 + target: ___gxx_personality_v0 + - kind: pointer64Anon + offset: 24 + target: GCC_except_table1 + - type: compact-unwind + content: [ C0, 00, 00, 00, 00, 00, 00, 00, 01, 00, 00, 00, + 00, 00, 00, 01, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00 ] + references: + - kind: pointer64Anon + offset: 0 + target: _main + - type: compact-unwind + content: [ C1, 00, 00, 00, 00, 00, 00, 00, 01, 00, 00, 00, + 00, 00, 00, 04, 00, 00, 00, 00, 00, 00, 00, 00, + 00, 00, 00, 00, 00, 00, 00, 00 ] + references: + - kind: pointer64Anon + offset: 0 + target: _needsDwarfButNoCompactUnwind + +# Generic x86_64 CIE: + - name: LCIE + type: unwind-cfi + content: [ 14, 00, 00, 00, 00, 00, 00, 00, 01, 7A, 52, 00, + 01, 78, 10, 01, 10, 0C, 07, 08, 90, 01, 00, 00 ] + + - type: unwind-cfi + content: [ 24, 00, 00, 00, 1C, 00, 00, 00, C8, FE, FF, FF, + FF, FF, FF, FF, 01, 00, 00, 00, 00, 00, 00, 00, + 00, 41, 0E, 10, 86, 02, 43, 0D, 06, 00, 00, 00, + 00, 00, 00, 00 ] + references: + - kind: unwindFDEToFunction + offset: 8 + target: _needsDwarfButNoCompactUnwind + - kind: negDelta32 + offset: 4 + target: LCIE + + - type: unwind-cfi + content: [ 24, 00, 00, 00, 44, 00, 00, 00, C8, FE, FF, FF, + FF, FF, FF, FF, 01, 00, 00, 00, 00, 00, 00, 00, + 00, 41, 0E, 10, 86, 02, 43, 0D, 06, 00, 00, 00, + 00, 00, 00, 00 ] + references: + - kind: unwindFDEToFunction + offset: 8 + target: _needsDwarfSaysCompactUnwind + - kind: negDelta32 + offset: 4 + target: LCIE + + - type: unwind-cfi + content: [ 24, 00, 00, 00, 6C, 00, 00, 00, C8, FE, FF, FF, + FF, FF, FF, FF, 01, 00, 00, 00, 00, 00, 00, 00, + 00, 41, 0E, 10, 86, 02, 43, 0D, 06, 00, 00, 00, + 00, 00, 00, 00 ] + references: + - kind: unwindFDEToFunction + offset: 8 + target: _main + - kind: negDelta32 + offset: 4 + target: LCIE + + - name: __Z3barv + scope: global + content: [ C3 ] + - name: _main + scope: global + content: [ C3 ] + - name: _needsDwarfButNoCompactUnwind + scope: global + content: [ C3 ] + - name: _needsDwarfSaysCompactUnwind + scope: global + content: [ C3 ] + - name: _noUnwindData + scope: global + content: [ C3 ] + +shared-library-atoms: + - name: ___gxx_personality_v0 + load-name: '/usr/lib/libc++abi.dylib' + type: unknown diff --git a/lld/test/mach-o/upward-dylib-load-command.yaml b/lld/test/mach-o/upward-dylib-load-command.yaml new file mode 100644 index 000000000000..6dbeb44895a4 --- /dev/null +++ b/lld/test/mach-o/upward-dylib-load-command.yaml @@ -0,0 +1,48 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -dylib %p/Inputs/bar.yaml \ +# RUN: -install_name /usr/lib/libbar.dylib %p/Inputs/x86_64/libSystem.yaml -o %t1.dylib +# RUN: ld64.lld.darwinold -arch x86_64 -dylib %s -upward_library %t1.dylib \ +# RUN: -install_name /usr/lib/libfoo.dylib %p/Inputs/x86_64/libSystem.yaml -o %t +# RUN: llvm-objdump --private-headers %t | FileCheck %s +# +# +# Test upward linking: 1) build libbar.dylib, 2) build libfoo.dylib and upward +# like with libbar.dylib, 3) dump load commands of libfoo and verify upward link. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0x5D, 0xE9, + 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000008 + type: X86_64_RELOC_BRANCH + length: 2 + pc-rel: true + extern: true + symbol: 1 +global-symbols: + - name: _foo + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _bar + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + +... + + +# CHECK: cmd LC_LOAD_UPWARD_DYLIB +# CHECK-NEXT: cmdsize 48 +# CHECK-NEXT: name /usr/lib/libbar.dylib (offset 24) diff --git a/lld/test/mach-o/upward-dylib-paths.yaml b/lld/test/mach-o/upward-dylib-paths.yaml new file mode 100644 index 000000000000..509edd3d5ca8 --- /dev/null +++ b/lld/test/mach-o/upward-dylib-paths.yaml @@ -0,0 +1,18 @@ +# +# +# RUN: ld64.lld.darwinold -arch x86_64 -r -test_file_usage -v \ +# RUN: -path_exists /Custom/Frameworks \ +# RUN: -path_exists /Custom/Frameworks/Bar.framework/Bar \ +# RUN: -path_exists /usr/lib \ +# RUN: -path_exists /usr/lib/libfoo.dylib \ +# RUN: -path_exists /opt/stuff/libstuff.dylib \ +# RUN: -F/Custom/Frameworks \ +# RUN: -upward_framework Bar \ +# RUN: -upward-lfoo \ +# RUN: -upward_library /opt/stuff/libstuff.dylib \ +# RUN: 2>&1 | FileCheck %s + +# CHECK: Found upward framework /Custom/Frameworks/Bar.framework/Bar +# CHECK: Found upward library /usr/lib/libfoo.dylib + + diff --git a/lld/test/mach-o/usage.yaml b/lld/test/mach-o/usage.yaml new file mode 100644 index 000000000000..efae4d0d3144 --- /dev/null +++ b/lld/test/mach-o/usage.yaml @@ -0,0 +1,8 @@ +# RUN: not ld64.lld.darwinold | FileCheck %s +# +# Test that running darwin linker with no option prints out usage message. +# + + +# CHECK: USAGE: +# CHECK: -arch diff --git a/lld/test/mach-o/use-dylib.yaml b/lld/test/mach-o/use-dylib.yaml new file mode 100644 index 000000000000..5717a9316fb7 --- /dev/null +++ b/lld/test/mach-o/use-dylib.yaml @@ -0,0 +1,39 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %s \ +# RUN: %p/Inputs/use-simple-dylib.yaml %p/Inputs/x86_64/libSystem.yaml -dylib -o %t.dylib +# RUN: llvm-objdump --private-headers %t.dylib | FileCheck %s + +# This test ensures that we have a LC_LOAD_DYLIB for libspecial.dylib even though we don't +# use any atoms from it. This matches the ld64 behaviour. +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0xE8, 0x00, 0x00, 0x00, + 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00, 0xE8, 0x00, + 0x00, 0x00, 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00, + 0xE8, 0x00, 0x00, 0x00, 0x00, 0x5D, 0xE9, 0x00, + 0x00, 0x00, 0x00 ] +global-symbols: + - name: _foo + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 + + +# CHECK: cmd LC_LOAD_DYLIB +# CHECK: name libspecial.dylib (offset 24) +# CHECK: current version 1.0.0 +# CHECK: compatibility version 1.0.0 +# CHECK: cmd LC_LOAD_DYLIB +# CHECK: name /usr/lib/libSystem.B.dylib (offset 24) +# CHECK: current version 1.0.0 +# CHECK: compatibility version 1.0.0 diff --git a/lld/test/mach-o/use-simple-dylib.yaml b/lld/test/mach-o/use-simple-dylib.yaml new file mode 100644 index 000000000000..62fb18f1f089 --- /dev/null +++ b/lld/test/mach-o/use-simple-dylib.yaml @@ -0,0 +1,73 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -print_atoms -r %s \ +# RUN: %p/Inputs/use-simple-dylib.yaml -o %t | FileCheck %s + + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x55, 0x48, 0x89, 0xE5, 0xE8, 0x00, 0x00, 0x00, + 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00, 0xE8, 0x00, + 0x00, 0x00, 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00, + 0xE8, 0x00, 0x00, 0x00, 0x00, 0x5D, 0xE9, 0x00, + 0x00, 0x00, 0x00 ] +global-symbols: + - name: _foo + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +undefined-symbols: + - name: _myGlobal + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _myGlobalWeak + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _myHidden + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _myHiddenWeak + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _myResolver + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _myStatic + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 + - name: _myVariablePreviouslyKnownAsPrivateExtern + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + + +# CHECK: undefined-atoms: +# CHECK: - name: _myStatic +# CHECK: - name: _myVariablePreviouslyKnownAsPrivateExtern +# CHECK: shared-library-atoms: +# CHECK: - name: _myGlobal +# CHECK: load-name: libspecial.dylib +# CHECK: - name: _myGlobalWeak +# CHECK: load-name: libspecial.dylib +# CHECK: - name: _myHidden +# CHECK: load-name: libspecial.dylib +# CHECK: - name: _myHiddenWeak +# CHECK: load-name: libspecial.dylib +# CHECK: - name: _myResolver +# CHECK: load-name: libspecial.dylib diff --git a/lld/test/mach-o/version-min-load-command-object.yaml b/lld/test/mach-o/version-min-load-command-object.yaml new file mode 100644 index 000000000000..8d9089bc6c01 --- /dev/null +++ b/lld/test/mach-o/version-min-load-command-object.yaml @@ -0,0 +1,35 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %s -o %t -r -macosx_version_min 10.8 && llvm-objdump --private-headers %t | FileCheck %s +# RUN: ld64.lld.darwinold -arch x86_64 %s -o %t -r && llvm-objdump --private-headers %t | FileCheck %s +# RUN: ld64.lld.darwinold -arch x86_64 %s -o %t -r %p/Inputs/no-version-min-load-command-object.yaml && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_VERSION_MIN + +# If we are emitting an object file, then we only emit a min version load command if the source object file(s) all have +# version(s) and either known platforms or contain min version load commands themselves. + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +min-os-version-kind: LC_VERSION_MIN_MACOSX +min-os-version: 10.8 +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x00, 0x00, 0x00, 0x00 ] +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +... + +# CHECK: Load command {{[0-9]*}} +# CHECK: cmd LC_VERSION_MIN_MACOSX +# CHECK: cmdsize 16 +# CHECK: version 10.8 +# CHECK: sdk n/a + +# NO_VERSION_MIN-NOT: LC_VERSION_MIN_MACOSX \ No newline at end of file diff --git a/lld/test/mach-o/version-min-load-command.yaml b/lld/test/mach-o/version-min-load-command.yaml new file mode 100644 index 000000000000..e31319dfb790 --- /dev/null +++ b/lld/test/mach-o/version-min-load-command.yaml @@ -0,0 +1,43 @@ +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml && llvm-objdump --private-headers %t | FileCheck %s +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml 2>&1 | FileCheck %s --check-prefix=WARNING +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -static -version_load_command && llvm-objdump --private-headers %t | FileCheck %s +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -no_version_load_command && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_VERSION_MIN +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -static -version_load_command -no_version_load_command && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_VERSION_MIN +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -static && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_VERSION_MIN + +# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 -sdk_version 10.9 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=SDK_VERSION + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x00, 0x00, 0x00, 0x00 ] +global-symbols: + - name: _main + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +... + +# CHECK: Load command {{[0-9]*}} +# CHECK: cmd LC_VERSION_MIN_MACOSX +# CHECK: cmdsize 16 +# CHECK: version 10.8 +# CHECK: sdk 10.8 + +# SDK_VERSION: Load command {{[0-9]*}} +# SDK_VERSION: cmd LC_VERSION_MIN_MACOSX +# SDK_VERSION: cmdsize 16 +# SDK_VERSION: version 10.8 +# SDK_VERSION: sdk 10.9 + +# WARNING: warning: -sdk_version is required when emitting min version load command. Setting sdk version to match provided min version + +# NO_VERSION_MIN-NOT: LC_VERSION_MIN_MACOSX diff --git a/lld/test/mach-o/write-final-sections.yaml b/lld/test/mach-o/write-final-sections.yaml new file mode 100644 index 000000000000..d0c0f3c8b777 --- /dev/null +++ b/lld/test/mach-o/write-final-sections.yaml @@ -0,0 +1,165 @@ +# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/write-final-sections.yaml \ +# RUN: -o %t -e _foo +# RUN: llvm-readobj --sections --section-data %t | FileCheck %s + +--- !native +defined-atoms: +# For __TEXT, __text (with typeCode) + - name: _foo + scope: global + content: [ 55 ] +# CHECK: Name: __text +# CHECK: Segment: __TEXT +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 55 +# CHECK-NEXT: ) + +# For __TEXT, __const (with typeConstant), + - type: constant + content: [ 01, 00, 00, 00 ] +# From __TEXT, __literal4, (with typeLiteral4) + - scope: hidden + type: const-4-byte + content: [ 02, 00, 00, 00 ] +# From __TEXT, __literal8, (with typeLiteral8) + - scope: hidden + type: const-8-byte + content: [ 03, 00, 00, 00, 00, 00, 00, 00 ] +# From __TEXT, __literal16, (with typeLiteral16) + - scope: hidden + type: const-16-byte + content: [ 04, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00 ] +# CHECK: Name: __const +# CHECK: Segment: __TEXT +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 01000000 02000000 03000000 00000000 +# CHECK-NEXT: 0010: 04000000 00000000 00000000 00000000 +# CHECK-NEXT: ) + +# For __TEXT, __cstring (with typeCString) + - scope: hidden + type: c-string + content: [ 57, 69, 62, 62, 6C, 65, 00 ] + merge: by-content +# CHECK: Name: __cstring +# CHECK: Segment: __TEXT +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 57696262 6C6500 +# CHECK-NEXT: ) + +# For __TEXT, __ustring (with typeUTF16String) + - scope: hidden + type: utf16-string + content: [ 05, 00 ] + merge: by-content +# CHECK: Name: __ustring +# CHECK: Segment: __TEXT +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 0500 +# CHECK-NEXT: ) + +# For __TEXT, __gcc_except_tab, (with typeLSDA) + - name: GCC_except_table0 + type: unwind-lsda + content: [ 06, 00 ] +# CHECK: Name: __gcc_except_tab +# CHECK: Segment: __TEXT +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 0600 +# CHECK-NEXT: ) + +# For __TEXT, __eh_frame, (with typeCFI) + - name: LCIE + type: unwind-cfi + content: [ 14, 00, 00, 00, 00, 00, 00, 00, 01, 7A, 52, 00, + 01, 78, 10, 01, 10, 0C, 07, 08, 90, 01, 00, 00 ] + + - type: unwind-cfi + content: [ 24, 00, 00, 00, 1C, 00, 00, 00, C8, FE, FF, FF, + FF, FF, FF, FF, 01, 00, 00, 00, 00, 00, 00, 00, + 00, 41, 0E, 10, 86, 02, 43, 0D, 06, 00, 00, 00, + 00, 00, 00, 00 ] + references: + - kind: unwindFDEToFunction + offset: 8 + target: _foo + - kind: negDelta32 + offset: 4 + target: LCIE + +# CHECK: Name: __eh_frame +# CHECK: Segment: __TEXT +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 +# CHECK-NEXT: 0010: 100C0708 90010000 24000000 1C000000 +# CHECK-NEXT: 0020: 70FFFFFF FFFFFFFF 01000000 00000000 +# CHECK-NEXT: 0030: 00410E10 8602430D 06000000 00000000 +# CHECK-NEXT: ) + +# For __DATA, __data, (with typeData) + - name: var + type: data + content: [ 08 ] +# CHECK: Name: __data +# CHECK: Segment: __DATA +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 08 +# CHECK-NEXT: ) + +# For __DATA, __bss (with typeZeroFill) +# FIXME: Attributes & tags of __bss are mostly broken. Should be at end of +# __DATA, should have size, should have S_ZEROFILL flag. + - type: zero-fill + size: 8 +# CHECK: Name: __bss +# CHECK: Segment: __DATA + +# For __DATA, __const, (with typeConstData) + - type: const-data + content: [ 09, 00, 00, 00 ] +# CHECK: Name: __const +# CHECK: Segment: __DATA +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 09000000 +# CHECK-NEXT: ) + +# For __DATA, __cfstring, (with typeCFString) + - type: cfstring + content: [ 0A, 00 ] +# CHECK: Name: __cfstring +# CHECK: Segment: __DATA +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 0A00 +# CHECK-NEXT: ) + +# For __DATA, __got (with typeGOT) + - type: got + content: [ 0B, 00, 00, 00, 00, 00, 00, 00 ] +# CHECK: Name: __got +# CHECK: Segment: __DATA +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 0B000000 00000000 +# CHECK-NEXT: ) + + +# For __DATA, __mod_init_func (with typeInitializerPtr) + - type: initializer-pointer + content: [ 0C, 00, 00, 00, 00, 00, 00, 00 ] +# CHECK: Name: __mod_init_func +# CHECK: Segment: __DATA +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 0C000000 00000000 +# CHECK-NEXT: ) + +# For __DATA, __mod_term_func (with typeTerminatorPointer) + - type: terminator-pointer + content: [ 0D, 00, 00, 00, 00, 00, 00, 00 ] +# CHECK: Name: __mod_term_func +# CHECK: Segment: __DATA +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 0D000000 00000000 +# CHECK-NEXT: ) + + - type: compact-unwind + content: [ 0E, 00, 00, 00, 00, 00, 00, 00 ] +# CHECK-NOT: Name: __compact_unwind diff --git a/lld/test/mach-o/wrong-arch-error.yaml b/lld/test/mach-o/wrong-arch-error.yaml new file mode 100644 index 000000000000..17bd024bc2b4 --- /dev/null +++ b/lld/test/mach-o/wrong-arch-error.yaml @@ -0,0 +1,28 @@ +# RUN: not ld64.lld.darwinold -arch x86_64 -r %s \ +# RUN: %p/Inputs/wrong-arch-error.yaml 2> %t.err +# RUN: FileCheck %s < %t.err + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0xCC ] + +global-symbols: + - name: _foo + type: N_SECT + scope: [ N_EXT ] + sect: 1 + value: 0x0000000000000000 +... + + +# CHECK: wrong architecture diff --git a/lld/unittests/CMakeLists.txt b/lld/unittests/CMakeLists.txt new file mode 100644 index 000000000000..84d35d43f4e8 --- /dev/null +++ b/lld/unittests/CMakeLists.txt @@ -0,0 +1,16 @@ +add_custom_target(LLDUnitTests) +set_target_properties(LLDUnitTests PROPERTIES FOLDER "lld tests") + +set(CMAKE_BUILD_WITH_INSTALL_RPATH OFF) + +# add_lld_unittest(test_dirname file1.cpp file2.cpp) +# +# Will compile the list of files together and link against lld +# Produces a binary named 'basename(test_dirname)'. +function(add_lld_unittest test_dirname) + add_unittest(LLDUnitTests ${test_dirname} ${ARGN}) + target_link_libraries(${test_dirname} ${LLVM_COMMON_LIBS}) +endfunction() + +add_subdirectory(DriverTests) +add_subdirectory(MachOTests) diff --git a/lld/unittests/DriverTests/CMakeLists.txt b/lld/unittests/DriverTests/CMakeLists.txt new file mode 100644 index 000000000000..e750bf6b069b --- /dev/null +++ b/lld/unittests/DriverTests/CMakeLists.txt @@ -0,0 +1,9 @@ +add_lld_unittest(DriverTests + DarwinLdDriverTest.cpp + ) + +target_link_libraries(DriverTests + PRIVATE + lldDriver + lldMachOOld + ) diff --git a/lld/unittests/DriverTests/DarwinLdDriverTest.cpp b/lld/unittests/DriverTests/DarwinLdDriverTest.cpp new file mode 100644 index 000000000000..af0fbbeef2a5 --- /dev/null +++ b/lld/unittests/DriverTests/DarwinLdDriverTest.cpp @@ -0,0 +1,263 @@ +//===- lld/unittest/DarwinLdDriverTest.cpp --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Darwin's ld driver tests. +/// +//===----------------------------------------------------------------------===// + +#include "lld/Common/Driver.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/raw_ostream.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace lld; + +namespace lld { +namespace mach_o { +bool parse(llvm::ArrayRef args, MachOLinkingContext &ctx); +} +} + +namespace { +class DarwinLdParserTest : public testing::Test { +protected: + int inputFileCount() { return _ctx.getNodes().size(); } + + StringRef inputFile(int index) { + Node &node = *_ctx.getNodes()[index]; + if (node.kind() == Node::Kind::File) + return cast(&node)->getFile()->path(); + llvm_unreachable("not handling other types of input files"); + } + + bool parse(std::vector args) { + args.insert(args.begin(), "ld"); + return mach_o::parse(args, _ctx); + } + + MachOLinkingContext _ctx; +}; +} + +TEST_F(DarwinLdParserTest, Basic) { + EXPECT_TRUE(parse({"foo.o", "bar.o", "-arch", "i386"})); + EXPECT_FALSE(_ctx.allowRemainingUndefines()); + EXPECT_FALSE(_ctx.deadStrip()); + EXPECT_EQ(2, inputFileCount()); + EXPECT_EQ("foo.o", inputFile(0)); + EXPECT_EQ("bar.o", inputFile(1)); +} + +TEST_F(DarwinLdParserTest, Output) { + EXPECT_TRUE(parse({"-o", "my.out", "foo.o", "-arch", "i386"})); + EXPECT_EQ("my.out", _ctx.outputPath()); +} + +TEST_F(DarwinLdParserTest, Dylib) { + EXPECT_TRUE(parse({"-dylib", "foo.o", "-arch", "i386"})); + EXPECT_EQ(llvm::MachO::MH_DYLIB, _ctx.outputMachOType()); +} + +TEST_F(DarwinLdParserTest, Relocatable) { + EXPECT_TRUE(parse({"-r", "foo.o", "-arch", "i386"})); + EXPECT_EQ(llvm::MachO::MH_OBJECT, _ctx.outputMachOType()); +} + +TEST_F(DarwinLdParserTest, Bundle) { + EXPECT_TRUE(parse({"-bundle", "foo.o", "-arch", "i386"})); + EXPECT_EQ(llvm::MachO::MH_BUNDLE, _ctx.outputMachOType()); +} + +TEST_F(DarwinLdParserTest, Preload) { + EXPECT_TRUE(parse({"-preload", "foo.o", "-arch", "i386"})); + EXPECT_EQ(llvm::MachO::MH_PRELOAD, _ctx.outputMachOType()); +} + +TEST_F(DarwinLdParserTest, Static) { + EXPECT_TRUE(parse({"-static", "foo.o", "-arch", "i386"})); + EXPECT_EQ(llvm::MachO::MH_EXECUTE, _ctx.outputMachOType()); +} + +TEST_F(DarwinLdParserTest, Entry) { + EXPECT_TRUE(parse({"-e", "entryFunc", "foo.o", "-arch", "i386"})); + EXPECT_EQ("entryFunc", _ctx.entrySymbolName()); +} + +TEST_F(DarwinLdParserTest, DeadStrip) { + EXPECT_TRUE(parse({"-arch", "x86_64", "-dead_strip", "foo.o"})); + EXPECT_TRUE(_ctx.deadStrip()); +} + +TEST_F(DarwinLdParserTest, DeadStripRootsExe) { + EXPECT_TRUE(parse({"-arch", "x86_64", "-dead_strip", "foo.o"})); + EXPECT_FALSE(_ctx.globalsAreDeadStripRoots()); +} + +TEST_F(DarwinLdParserTest, DeadStripRootsDylib) { + EXPECT_TRUE(parse({"-arch", "x86_64", "-dylib", "-dead_strip", "foo.o"})); + EXPECT_FALSE(_ctx.globalsAreDeadStripRoots()); +} + +TEST_F(DarwinLdParserTest, DeadStripRootsRelocatable) { + EXPECT_TRUE(parse({"-arch", "x86_64", "-r", "-dead_strip", "foo.o"})); + EXPECT_FALSE(_ctx.globalsAreDeadStripRoots()); +} + +TEST_F(DarwinLdParserTest, DeadStripRootsExportDynamicExe) { + EXPECT_TRUE( + parse({"-arch", "x86_64", "-dead_strip", "-export_dynamic", "foo.o"})); + EXPECT_TRUE(_ctx.globalsAreDeadStripRoots()); +} + +TEST_F(DarwinLdParserTest, DeadStripRootsExportDynamicDylib) { + EXPECT_TRUE(parse({"-arch", "x86_64", "-dylib", "-dead_strip", + "-export_dynamic", "foo.o"})); + EXPECT_TRUE(_ctx.globalsAreDeadStripRoots()); +} + +TEST_F(DarwinLdParserTest, DeadStripRootsExportDynamicRelocatable) { + EXPECT_TRUE(parse( + {"-arch", "x86_64", "-r", "-dead_strip", "-export_dynamic", "foo.o"})); + EXPECT_FALSE(_ctx.globalsAreDeadStripRoots()); +} + +TEST_F(DarwinLdParserTest, Arch) { + EXPECT_TRUE(parse({"-arch", "x86_64", "foo.o"})); + EXPECT_EQ(MachOLinkingContext::arch_x86_64, _ctx.arch()); + EXPECT_EQ((uint32_t)llvm::MachO::CPU_TYPE_X86_64, _ctx.getCPUType()); + EXPECT_EQ(llvm::MachO::CPU_SUBTYPE_X86_64_ALL, _ctx.getCPUSubType()); +} + +TEST_F(DarwinLdParserTest, Arch_x86) { + EXPECT_TRUE(parse({"-arch", "i386", "foo.o"})); + EXPECT_EQ(MachOLinkingContext::arch_x86, _ctx.arch()); + EXPECT_EQ((uint32_t)llvm::MachO::CPU_TYPE_I386, _ctx.getCPUType()); + EXPECT_EQ(llvm::MachO::CPU_SUBTYPE_X86_ALL, _ctx.getCPUSubType()); +} + +TEST_F(DarwinLdParserTest, Arch_armv6) { + EXPECT_TRUE(parse({"-arch", "armv6", "foo.o"})); + EXPECT_EQ(MachOLinkingContext::arch_armv6, _ctx.arch()); + EXPECT_EQ((uint32_t)llvm::MachO::CPU_TYPE_ARM, _ctx.getCPUType()); + EXPECT_EQ(llvm::MachO::CPU_SUBTYPE_ARM_V6, _ctx.getCPUSubType()); +} + +TEST_F(DarwinLdParserTest, Arch_armv7) { + EXPECT_TRUE(parse({"-arch", "armv7", "foo.o"})); + EXPECT_EQ(MachOLinkingContext::arch_armv7, _ctx.arch()); + EXPECT_EQ((uint32_t)llvm::MachO::CPU_TYPE_ARM, _ctx.getCPUType()); + EXPECT_EQ(llvm::MachO::CPU_SUBTYPE_ARM_V7, _ctx.getCPUSubType()); +} + +TEST_F(DarwinLdParserTest, Arch_armv7s) { + EXPECT_TRUE(parse({"-arch", "armv7s", "foo.o"})); + EXPECT_EQ(MachOLinkingContext::arch_armv7s, _ctx.arch()); + EXPECT_EQ((uint32_t)llvm::MachO::CPU_TYPE_ARM, _ctx.getCPUType()); + EXPECT_EQ(llvm::MachO::CPU_SUBTYPE_ARM_V7S, _ctx.getCPUSubType()); +} + +TEST_F(DarwinLdParserTest, MinMacOSX10_7) { + EXPECT_TRUE( + parse({"-macosx_version_min", "10.7", "foo.o", "-arch", "x86_64"})); + EXPECT_EQ(MachOLinkingContext::OS::macOSX, _ctx.os()); + EXPECT_TRUE(_ctx.minOS("10.7", "")); + EXPECT_FALSE(_ctx.minOS("10.8", "")); +} + +TEST_F(DarwinLdParserTest, MinMacOSX10_8) { + EXPECT_TRUE( + parse({"-macosx_version_min", "10.8.3", "foo.o", "-arch", "x86_64"})); + EXPECT_EQ(MachOLinkingContext::OS::macOSX, _ctx.os()); + EXPECT_TRUE(_ctx.minOS("10.7", "")); + EXPECT_TRUE(_ctx.minOS("10.8", "")); +} + +TEST_F(DarwinLdParserTest, iOS5) { + EXPECT_TRUE(parse({"-ios_version_min", "5.0", "foo.o", "-arch", "armv7"})); + EXPECT_EQ(MachOLinkingContext::OS::iOS, _ctx.os()); + EXPECT_TRUE(_ctx.minOS("", "5.0")); + EXPECT_FALSE(_ctx.minOS("", "6.0")); +} + +TEST_F(DarwinLdParserTest, iOS6) { + EXPECT_TRUE(parse({"-ios_version_min", "6.0", "foo.o", "-arch", "armv7"})); + EXPECT_EQ(MachOLinkingContext::OS::iOS, _ctx.os()); + EXPECT_TRUE(_ctx.minOS("", "5.0")); + EXPECT_TRUE(_ctx.minOS("", "6.0")); +} + +TEST_F(DarwinLdParserTest, iOS_Simulator5) { + EXPECT_TRUE( + parse({"-ios_simulator_version_min", "5.0", "a.o", "-arch", "i386"})); + EXPECT_EQ(MachOLinkingContext::OS::iOS_simulator, _ctx.os()); + EXPECT_TRUE(_ctx.minOS("", "5.0")); + EXPECT_FALSE(_ctx.minOS("", "6.0")); +} + +TEST_F(DarwinLdParserTest, iOS_Simulator6) { + EXPECT_TRUE( + parse({"-ios_simulator_version_min", "6.0", "a.o", "-arch", "i386"})); + EXPECT_EQ(MachOLinkingContext::OS::iOS_simulator, _ctx.os()); + EXPECT_TRUE(_ctx.minOS("", "5.0")); + EXPECT_TRUE(_ctx.minOS("", "6.0")); +} + +TEST_F(DarwinLdParserTest, compatibilityVersion) { + EXPECT_TRUE(parse( + {"-dylib", "-compatibility_version", "1.2.3", "a.o", "-arch", "i386"})); + EXPECT_EQ(_ctx.compatibilityVersion(), 0x10203U); +} + +TEST_F(DarwinLdParserTest, compatibilityVersionInvalidType) { + EXPECT_FALSE(parse( + {"-bundle", "-compatibility_version", "1.2.3", "a.o", "-arch", "i386"})); +} + +TEST_F(DarwinLdParserTest, compatibilityVersionInvalidValue) { + EXPECT_FALSE(parse( + {"-bundle", "-compatibility_version", "1,2,3", "a.o", "-arch", "i386"})); +} + +TEST_F(DarwinLdParserTest, currentVersion) { + EXPECT_TRUE( + parse({"-dylib", "-current_version", "1.2.3", "a.o", "-arch", "i386"})); + EXPECT_EQ(_ctx.currentVersion(), 0x10203U); +} + +TEST_F(DarwinLdParserTest, currentVersionInvalidType) { + EXPECT_FALSE( + parse({"-bundle", "-current_version", "1.2.3", "a.o", "-arch", "i386"})); +} + +TEST_F(DarwinLdParserTest, currentVersionInvalidValue) { + EXPECT_FALSE( + parse({"-bundle", "-current_version", "1,2,3", "a.o", "-arch", "i386"})); +} + +TEST_F(DarwinLdParserTest, bundleLoader) { + EXPECT_TRUE( + parse({"-bundle", "-bundle_loader", "/bin/ls", "a.o", "-arch", "i386"})); + EXPECT_EQ(_ctx.bundleLoader(), "/bin/ls"); +} + +TEST_F(DarwinLdParserTest, bundleLoaderInvalidType) { + EXPECT_FALSE(parse({"-bundle_loader", "/bin/ls", "a.o", "-arch", "i386"})); +} + +TEST_F(DarwinLdParserTest, deadStrippableDylib) { + EXPECT_TRUE( + parse({"-dylib", "-mark_dead_strippable_dylib", "a.o", "-arch", "i386"})); + EXPECT_EQ(true, _ctx.deadStrippableDylib()); +} + +TEST_F(DarwinLdParserTest, deadStrippableDylibInvalidType) { + EXPECT_FALSE(parse({"-mark_dead_strippable_dylib", "a.o", "-arch", "i386"})); +} diff --git a/lld/unittests/MachOTests/CMakeLists.txt b/lld/unittests/MachOTests/CMakeLists.txt new file mode 100644 index 000000000000..7cc71380cd62 --- /dev/null +++ b/lld/unittests/MachOTests/CMakeLists.txt @@ -0,0 +1,14 @@ + +add_lld_unittest(lldMachOOldTests + MachONormalizedFileBinaryReaderTests.cpp + MachONormalizedFileBinaryWriterTests.cpp + MachONormalizedFileToAtomsTests.cpp + MachONormalizedFileYAMLTests.cpp + ) + +target_link_libraries(lldMachOOldTests + PRIVATE + lldDriver + lldMachOOld + lldYAML + ) diff --git a/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp b/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp new file mode 100644 index 000000000000..fbf18a8d9e00 --- /dev/null +++ b/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp @@ -0,0 +1,753 @@ +//===- lld/unittest/MachOTests/MachONormalizedFileBinaryReaderTests.cpp ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../../lib/ReaderWriter/MachO/MachONormalizedFile.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/YAMLTraits.h" +#include "gtest/gtest.h" +#include +#include + +using llvm::SmallString; +using llvm::StringRef; +using llvm::MemoryBuffer; +using llvm::Twine; + +using namespace lld::mach_o::normalized; +using namespace llvm::MachO; + +static std::unique_ptr +fromBinary(const uint8_t bytes[], unsigned length, StringRef archStr) { + StringRef sr((const char*)bytes, length); + std::unique_ptr mb(MemoryBuffer::getMemBuffer(sr, "", false)); + llvm::Expected> r = + lld::mach_o::normalized::readBinary( + mb, lld::MachOLinkingContext::archFromName(archStr)); + EXPECT_FALSE(!r); + return std::move(*r); +} + +// The Mach-O object reader uses functions such as read32 or read64 +// which don't allow unaligned access. Our in-memory object file +// needs to be aligned to a larger boundary than uint8_t's. +#if _MSC_VER +#define FILEBYTES __declspec(align(64)) const uint8_t fileBytes[] +#else +#define FILEBYTES const uint8_t fileBytes[] __attribute__((aligned(64))) +#endif + +TEST(BinaryReaderTest, empty_obj_x86_64) { + FILEBYTES = { + 0xcf, 0xfa, 0xed, 0xfe, 0x07, 0x00, 0x00, 0x01, + 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, + 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5f, 0x5f, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5f, 0x5f, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + std::unique_ptr f = + fromBinary(fileBytes, sizeof(fileBytes), "x86_64"); + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); + EXPECT_EQ((int)(f->fileType), MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_TRUE(f->localSymbols.empty()); + EXPECT_TRUE(f->globalSymbols.empty()); + EXPECT_TRUE(f->undefinedSymbols.empty()); +} + +TEST(BinaryReaderTest, empty_obj_x86) { + FILEBYTES = { + 0xce, 0xfa, 0xed, 0xfe, 0x07, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, + 0x00, 0x20, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x74, 0x65, + 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x54, 0x45, + 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + std::unique_ptr f = + fromBinary(fileBytes, sizeof(fileBytes), "i386"); + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86); + EXPECT_EQ((int)(f->fileType), MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_TRUE(f->localSymbols.empty()); + EXPECT_TRUE(f->globalSymbols.empty()); + EXPECT_TRUE(f->undefinedSymbols.empty()); +} + +TEST(BinaryReaderTest, empty_obj_ppc) { + FILEBYTES = { + 0xfe, 0xed, 0xfa, 0xce, 0x00, 0x00, 0x00, 0x12, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x7c, + 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x98, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, + 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x74, 0x65, + 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x54, 0x45, + 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x98, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + std::unique_ptr f = + fromBinary(fileBytes, sizeof(fileBytes), "ppc"); + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_ppc); + EXPECT_EQ((int)(f->fileType), MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_TRUE(f->localSymbols.empty()); + EXPECT_TRUE(f->globalSymbols.empty()); + EXPECT_TRUE(f->undefinedSymbols.empty()); +} + +TEST(BinaryReaderTest, empty_obj_armv7) { + FILEBYTES = { + 0xce, 0xfa, 0xed, 0xfe, 0x0c, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, + 0x00, 0x20, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x74, 0x65, + 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x54, 0x45, + 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + std::unique_ptr f = + fromBinary(fileBytes, sizeof(fileBytes), "armv7"); + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7); + EXPECT_EQ((int)(f->fileType), MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_TRUE(f->localSymbols.empty()); + EXPECT_TRUE(f->globalSymbols.empty()); + EXPECT_TRUE(f->undefinedSymbols.empty()); +} + +TEST(BinaryReaderTest, empty_obj_x86_64_arm7) { + FILEBYTES = { +#include "empty_obj_x86_armv7.txt" + }; + std::unique_ptr f = + fromBinary(fileBytes, sizeof(fileBytes), "x86_64"); + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); + EXPECT_EQ((int)(f->fileType), MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_TRUE(f->localSymbols.empty()); + EXPECT_TRUE(f->globalSymbols.empty()); + EXPECT_TRUE(f->undefinedSymbols.empty()); + + std::unique_ptr f2 = + fromBinary(fileBytes, sizeof(fileBytes), "armv7"); + EXPECT_EQ(f2->arch, lld::MachOLinkingContext::arch_armv7); + EXPECT_EQ((int)(f2->fileType), MH_OBJECT); + EXPECT_EQ((int)(f2->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_TRUE(f2->localSymbols.empty()); + EXPECT_TRUE(f2->globalSymbols.empty()); + EXPECT_TRUE(f2->undefinedSymbols.empty()); +} + +TEST(BinaryReaderTest, hello_obj_x86_64) { + FILEBYTES = { + 0xCF, 0xFA, 0xED, 0xFE, 0x07, 0x00, 0x00, 0x01, + 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x50, 0x01, 0x00, 0x00, + 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0xE8, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x70, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5F, 0x5F, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5F, 0x5F, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x70, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0xA4, 0x01, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5F, 0x5F, 0x63, 0x73, 0x74, 0x72, 0x69, 0x6E, + 0x67, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5F, 0x5F, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x9D, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0xB4, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0xE4, 0x01, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x55, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x10, + 0x48, 0x8D, 0x3D, 0x00, 0x00, 0x00, 0x00, 0xC7, + 0x45, 0xFC, 0x00, 0x00, 0x00, 0x00, 0xB0, 0x00, + 0xE8, 0x00, 0x00, 0x00, 0x00, 0xB9, 0x00, 0x00, + 0x00, 0x00, 0x89, 0x45, 0xF8, 0x89, 0xC8, 0x48, + 0x83, 0xC4, 0x10, 0x5D, 0xC3, 0x68, 0x65, 0x6C, + 0x6C, 0x6F, 0x0A, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x2D, 0x0B, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x1D, 0x0F, 0x00, 0x00, 0x00, + 0x0E, 0x02, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0F, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x5F, 0x6D, 0x61, + 0x69, 0x6E, 0x00, 0x5F, 0x70, 0x72, 0x69, 0x6E, + 0x74, 0x66, 0x00, 0x4C, 0x5F, 0x2E, 0x73, 0x74, + 0x72, 0x00, 0x00, 0x00 }; + std::unique_ptr f = + fromBinary(fileBytes, sizeof(fileBytes), "x86_64"); + + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); + EXPECT_EQ((int)(f->fileType), MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ(f->sections.size(), 2UL); + const Section& text = f->sections[0]; + EXPECT_TRUE(text.segmentName.equals("__TEXT")); + EXPECT_TRUE(text.sectionName.equals("__text")); + EXPECT_EQ(text.type, S_REGULAR); + EXPECT_EQ(text.attributes,SectionAttr(S_ATTR_PURE_INSTRUCTIONS + | S_ATTR_SOME_INSTRUCTIONS)); + EXPECT_EQ((uint16_t)text.alignment, 16U); + EXPECT_EQ(text.address, Hex64(0x0)); + EXPECT_EQ(text.content.size(), 45UL); + EXPECT_EQ((int)(text.content[0]), 0x55); + EXPECT_EQ((int)(text.content[1]), 0x48); + EXPECT_TRUE(text.indirectSymbols.empty()); + EXPECT_EQ(text.relocations.size(), 2UL); + const Relocation& call = text.relocations[0]; + EXPECT_EQ(call.offset, Hex32(0x19)); + EXPECT_EQ(call.type, X86_64_RELOC_BRANCH); + EXPECT_EQ(call.length, 2); + EXPECT_EQ(call.isExtern, true); + EXPECT_EQ(call.symbol, 2U); + const Relocation& str = text.relocations[1]; + EXPECT_EQ(str.offset, Hex32(0xB)); + EXPECT_EQ(str.type, X86_64_RELOC_SIGNED); + EXPECT_EQ(str.length, 2); + EXPECT_EQ(str.isExtern, true); + EXPECT_EQ(str.symbol, 0U); + + const Section& cstring = f->sections[1]; + EXPECT_TRUE(cstring.segmentName.equals("__TEXT")); + EXPECT_TRUE(cstring.sectionName.equals("__cstring")); + EXPECT_EQ(cstring.type, S_CSTRING_LITERALS); + EXPECT_EQ(cstring.attributes, SectionAttr(0)); + EXPECT_EQ((uint16_t)cstring.alignment, 1U); + EXPECT_EQ(cstring.address, Hex64(0x02D)); + EXPECT_EQ(cstring.content.size(), 7UL); + EXPECT_EQ((int)(cstring.content[0]), 0x68); + EXPECT_EQ((int)(cstring.content[1]), 0x65); + EXPECT_EQ((int)(cstring.content[2]), 0x6c); + EXPECT_TRUE(cstring.indirectSymbols.empty()); + EXPECT_TRUE(cstring.relocations.empty()); + + EXPECT_EQ(f->localSymbols.size(), 1UL); + const Symbol& strLabel = f->localSymbols[0]; + EXPECT_EQ(strLabel.type, N_SECT); + EXPECT_EQ(strLabel.sect, 2); + EXPECT_EQ(strLabel.value, Hex64(0x2D)); + EXPECT_EQ(f->globalSymbols.size(), 1UL); + const Symbol& mainLabel = f->globalSymbols[0]; + EXPECT_TRUE(mainLabel.name.equals("_main")); + EXPECT_EQ(mainLabel.type, N_SECT); + EXPECT_EQ(mainLabel.sect, 1); + EXPECT_EQ(mainLabel.scope, SymbolScope(N_EXT)); + EXPECT_EQ(mainLabel.value, Hex64(0x0)); + EXPECT_EQ(f->undefinedSymbols.size(), 1UL); + const Symbol& printfLabel = f->undefinedSymbols[0]; + EXPECT_TRUE(printfLabel.name.equals("_printf")); + EXPECT_EQ(printfLabel.type, N_UNDF); + EXPECT_EQ(printfLabel.scope, SymbolScope(N_EXT)); +} + +TEST(BinaryReaderTest, hello_obj_x86) { + FILEBYTES = { + 0xCE, 0xFA, 0xED, 0xFE, 0x07, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x28, 0x01, 0x00, 0x00, + 0x00, 0x20, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x37, 0x00, 0x00, 0x00, 0x44, 0x01, 0x00, 0x00, + 0x37, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x5F, 0x5F, 0x74, 0x65, + 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x5F, 0x5F, 0x54, 0x45, + 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x44, 0x01, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x7C, 0x01, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5F, 0x5F, 0x63, 0x73, 0x74, 0x72, 0x69, 0x6E, + 0x67, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5F, 0x5F, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x74, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x94, 0x01, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0xAC, 0x01, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x55, 0x89, 0xE5, 0x83, + 0xEC, 0x18, 0xE8, 0x00, 0x00, 0x00, 0x00, 0x58, + 0x8D, 0x80, 0x25, 0x00, 0x00, 0x00, 0xC7, 0x45, + 0xFC, 0x00, 0x00, 0x00, 0x00, 0x89, 0x04, 0x24, + 0xE8, 0xDF, 0xFF, 0xFF, 0xFF, 0xB9, 0x00, 0x00, + 0x00, 0x00, 0x89, 0x45, 0xF8, 0x89, 0xC8, 0x83, + 0xC4, 0x18, 0x5D, 0xC3, 0x68, 0x65, 0x6C, 0x6C, + 0x6F, 0x0A, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x0D, 0x0E, 0x00, 0x00, 0xA4, + 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, + 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0F, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x5F, 0x6D, 0x61, + 0x69, 0x6E, 0x00, 0x5F, 0x70, 0x72, 0x69, 0x6E, + 0x74, 0x66, 0x00, 0x00 + }; + std::unique_ptr f = + fromBinary(fileBytes, sizeof(fileBytes), "i386"); + + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86); + EXPECT_EQ((int)(f->fileType), MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ(f->sections.size(), 2UL); + const Section& text = f->sections[0]; + EXPECT_TRUE(text.segmentName.equals("__TEXT")); + EXPECT_TRUE(text.sectionName.equals("__text")); + EXPECT_EQ(text.type, S_REGULAR); + EXPECT_EQ(text.attributes,SectionAttr(S_ATTR_PURE_INSTRUCTIONS + | S_ATTR_SOME_INSTRUCTIONS)); + EXPECT_EQ((uint16_t)text.alignment, 16U); + EXPECT_EQ(text.address, Hex64(0x0)); + EXPECT_EQ(text.content.size(), 48UL); + EXPECT_EQ((int)(text.content[0]), 0x55); + EXPECT_EQ((int)(text.content[1]), 0x89); + EXPECT_TRUE(text.indirectSymbols.empty()); + EXPECT_EQ(text.relocations.size(), 3UL); + const Relocation& call = text.relocations[0]; + EXPECT_EQ(call.offset, Hex32(0x1D)); + EXPECT_EQ(call.scattered, false); + EXPECT_EQ(call.type, GENERIC_RELOC_VANILLA); + EXPECT_EQ(call.pcRel, true); + EXPECT_EQ(call.length, 2); + EXPECT_EQ(call.isExtern, true); + EXPECT_EQ(call.symbol, 1U); + const Relocation& sectDiff = text.relocations[1]; + EXPECT_EQ(sectDiff.offset, Hex32(0xE)); + EXPECT_EQ(sectDiff.scattered, true); + EXPECT_EQ(sectDiff.type, GENERIC_RELOC_LOCAL_SECTDIFF); + EXPECT_EQ(sectDiff.pcRel, false); + EXPECT_EQ(sectDiff.length, 2); + EXPECT_EQ(sectDiff.value, 0x30U); + const Relocation& pair = text.relocations[2]; + EXPECT_EQ(pair.offset, Hex32(0x0)); + EXPECT_EQ(pair.scattered, true); + EXPECT_EQ(pair.type, GENERIC_RELOC_PAIR); + EXPECT_EQ(pair.pcRel, false); + EXPECT_EQ(pair.length, 2); + EXPECT_EQ(pair.value, 0x0BU); + + const Section& cstring = f->sections[1]; + EXPECT_TRUE(cstring.segmentName.equals("__TEXT")); + EXPECT_TRUE(cstring.sectionName.equals("__cstring")); + EXPECT_EQ(cstring.type, S_CSTRING_LITERALS); + EXPECT_EQ(cstring.attributes, SectionAttr(0)); + EXPECT_EQ((uint16_t)cstring.alignment, 1U); + EXPECT_EQ(cstring.address, Hex64(0x030)); + EXPECT_EQ(cstring.content.size(), 7UL); + EXPECT_EQ((int)(cstring.content[0]), 0x68); + EXPECT_EQ((int)(cstring.content[1]), 0x65); + EXPECT_EQ((int)(cstring.content[2]), 0x6c); + EXPECT_TRUE(cstring.indirectSymbols.empty()); + EXPECT_TRUE(cstring.relocations.empty()); + + EXPECT_EQ(f->localSymbols.size(), 0UL); + EXPECT_EQ(f->globalSymbols.size(), 1UL); + const Symbol& mainLabel = f->globalSymbols[0]; + EXPECT_TRUE(mainLabel.name.equals("_main")); + EXPECT_EQ(mainLabel.type, N_SECT); + EXPECT_EQ(mainLabel.sect, 1); + EXPECT_EQ(mainLabel.scope, SymbolScope(N_EXT)); + EXPECT_EQ(mainLabel.value, Hex64(0x0)); + EXPECT_EQ(f->undefinedSymbols.size(), 1UL); + const Symbol& printfLabel = f->undefinedSymbols[0]; + EXPECT_TRUE(printfLabel.name.equals("_printf")); + EXPECT_EQ(printfLabel.type, N_UNDF); + EXPECT_EQ(printfLabel.scope, SymbolScope(N_EXT)); +} + +TEST(BinaryReaderTest, hello_obj_armv7) { + FILEBYTES = { + 0xCE, 0xFA, 0xED, 0xFE, 0x0C, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x28, 0x01, 0x00, 0x00, + 0x00, 0x20, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x31, 0x00, 0x00, 0x00, 0x44, 0x01, 0x00, 0x00, + 0x31, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x5F, 0x5F, 0x74, 0x65, + 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x5F, 0x5F, 0x54, 0x45, + 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2A, 0x00, 0x00, 0x00, 0x44, 0x01, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x78, 0x01, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5F, 0x5F, 0x63, 0x73, 0x74, 0x72, 0x69, 0x6E, + 0x67, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5F, 0x5F, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2A, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x6E, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0xA0, 0x01, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0xB8, 0x01, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x80, 0xB5, 0x6F, 0x46, + 0x82, 0xB0, 0x40, 0xF2, 0x18, 0x00, 0xC0, 0xF2, + 0x00, 0x00, 0x78, 0x44, 0x00, 0x21, 0xC0, 0xF2, + 0x00, 0x01, 0x01, 0x91, 0xFF, 0xF7, 0xF2, 0xFF, + 0x00, 0x21, 0xC0, 0xF2, 0x00, 0x01, 0x00, 0x90, + 0x08, 0x46, 0x02, 0xB0, 0x80, 0xBD, 0x68, 0x65, + 0x6C, 0x6C, 0x6F, 0x0A, 0x00, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x6D, + 0x0A, 0x00, 0x00, 0xB9, 0x2A, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0xB1, 0x0E, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0xA9, 0x2A, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xA1, 0x0E, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0F, 0x01, 0x08, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x5F, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x5F, + 0x70, 0x72, 0x69, 0x6E, 0x74, 0x66, 0x00, 0x00 + }; + std::unique_ptr f = + fromBinary(fileBytes, sizeof(fileBytes), "armv7"); + + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7); + EXPECT_EQ((int)(f->fileType), MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ(f->sections.size(), 2UL); + const Section& text = f->sections[0]; + EXPECT_TRUE(text.segmentName.equals("__TEXT")); + EXPECT_TRUE(text.sectionName.equals("__text")); + EXPECT_EQ(text.type, S_REGULAR); + EXPECT_EQ(text.attributes,SectionAttr(S_ATTR_PURE_INSTRUCTIONS + | S_ATTR_SOME_INSTRUCTIONS)); + EXPECT_EQ((uint16_t)text.alignment, 4U); + EXPECT_EQ(text.address, Hex64(0x0)); + EXPECT_EQ(text.content.size(), 42UL); + EXPECT_EQ((int)(text.content[0]), 0x80); + EXPECT_EQ((int)(text.content[1]), 0xB5); + EXPECT_TRUE(text.indirectSymbols.empty()); + EXPECT_EQ(text.relocations.size(), 5UL); + const Relocation& call = text.relocations[0]; + EXPECT_EQ(call.offset, Hex32(0x18)); + EXPECT_EQ(call.scattered, false); + EXPECT_EQ(call.type, ARM_THUMB_RELOC_BR22); + EXPECT_EQ(call.length, 2); + EXPECT_EQ(call.isExtern, true); + EXPECT_EQ(call.symbol, 1U); + const Relocation& movt = text.relocations[1]; + EXPECT_EQ(movt.offset, Hex32(0xA)); + EXPECT_EQ(movt.scattered, true); + EXPECT_EQ(movt.type, ARM_RELOC_HALF_SECTDIFF); + EXPECT_EQ(movt.length, 3); + EXPECT_EQ(movt.value, Hex32(0x2A)); + const Relocation& movtPair = text.relocations[2]; + EXPECT_EQ(movtPair.offset, Hex32(0x18)); + EXPECT_EQ(movtPair.scattered, true); + EXPECT_EQ(movtPair.type, ARM_RELOC_PAIR); + EXPECT_EQ(movtPair.length, 3); + EXPECT_EQ(movtPair.value, Hex32(0xE)); + const Relocation& movw = text.relocations[3]; + EXPECT_EQ(movw.offset, Hex32(0x6)); + EXPECT_EQ(movw.scattered, true); + EXPECT_EQ(movw.type, ARM_RELOC_HALF_SECTDIFF); + EXPECT_EQ(movw.length, 2); + EXPECT_EQ(movw.value, Hex32(0x2A)); + const Relocation& movwPair = text.relocations[4]; + EXPECT_EQ(movwPair.offset, Hex32(0x0)); + EXPECT_EQ(movwPair.scattered, true); + EXPECT_EQ(movwPair.type, ARM_RELOC_PAIR); + EXPECT_EQ(movwPair.length, 2); + EXPECT_EQ(movwPair.value, Hex32(0xE)); + + const Section& cstring = f->sections[1]; + EXPECT_TRUE(cstring.segmentName.equals("__TEXT")); + EXPECT_TRUE(cstring.sectionName.equals("__cstring")); + EXPECT_EQ(cstring.type, S_CSTRING_LITERALS); + EXPECT_EQ(cstring.attributes, SectionAttr(0)); + EXPECT_EQ((uint16_t)cstring.alignment, 1U); + EXPECT_EQ(cstring.address, Hex64(0x02A)); + EXPECT_EQ(cstring.content.size(), 7UL); + EXPECT_EQ((int)(cstring.content[0]), 0x68); + EXPECT_EQ((int)(cstring.content[1]), 0x65); + EXPECT_EQ((int)(cstring.content[2]), 0x6c); + EXPECT_TRUE(cstring.indirectSymbols.empty()); + EXPECT_TRUE(cstring.relocations.empty()); + + EXPECT_EQ(f->localSymbols.size(), 0UL); + EXPECT_EQ(f->globalSymbols.size(), 1UL); + const Symbol& mainLabel = f->globalSymbols[0]; + EXPECT_TRUE(mainLabel.name.equals("_main")); + EXPECT_EQ(mainLabel.type, N_SECT); + EXPECT_EQ(mainLabel.sect, 1); + EXPECT_EQ(mainLabel.scope, SymbolScope(N_EXT)); + EXPECT_EQ(mainLabel.value, Hex64(0x0)); + EXPECT_EQ(f->undefinedSymbols.size(), 1UL); + const Symbol& printfLabel = f->undefinedSymbols[0]; + EXPECT_TRUE(printfLabel.name.equals("_printf")); + EXPECT_EQ(printfLabel.type, N_UNDF); + EXPECT_EQ(printfLabel.scope, SymbolScope(N_EXT)); +} + +TEST(BinaryReaderTest, hello_obj_ppc) { + FILEBYTES = { + 0xFE, 0xED, 0xFA, 0xCE, 0x00, 0x00, 0x00, 0x12, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x01, 0x28, + 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x4B, 0x00, 0x00, 0x01, 0x44, + 0x00, 0x00, 0x00, 0x4B, 0x00, 0x00, 0x00, 0x07, + 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x02, + 0x00, 0x00, 0x00, 0x00, 0x5F, 0x5F, 0x74, 0x65, + 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x5F, 0x5F, 0x54, 0x45, + 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x01, 0x44, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x01, 0x90, + 0x00, 0x00, 0x00, 0x05, 0x80, 0x00, 0x04, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5F, 0x5F, 0x63, 0x73, 0x74, 0x72, 0x69, 0x6E, + 0x67, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5F, 0x5F, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x07, + 0x00, 0x00, 0x01, 0x88, 0x00, 0x00, 0x00, 0x02, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x01, 0xB8, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x01, 0xD0, + 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0B, + 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x7C, 0x08, 0x02, 0xA6, + 0xBF, 0xC1, 0xFF, 0xF8, 0x90, 0x01, 0x00, 0x08, + 0x94, 0x21, 0xFF, 0xB0, 0x7C, 0x3E, 0x0B, 0x78, + 0x42, 0x9F, 0x00, 0x05, 0x7F, 0xE8, 0x02, 0xA6, + 0x3C, 0x5F, 0x00, 0x00, 0x38, 0x62, 0x00, 0x2C, + 0x4B, 0xFF, 0xFF, 0xDD, 0x38, 0x00, 0x00, 0x00, + 0x7C, 0x03, 0x03, 0x78, 0x80, 0x21, 0x00, 0x00, + 0x80, 0x01, 0x00, 0x08, 0x7C, 0x08, 0x03, 0xA6, + 0xBB, 0xC1, 0xFF, 0xF8, 0x4E, 0x80, 0x00, 0x20, + 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x01, 0xD3, + 0xAB, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x44, + 0xA1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, + 0xAC, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x44, + 0xA1, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x18, + 0x00, 0x00, 0x00, 0x01, 0x0F, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x5F, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x5F, + 0x70, 0x72, 0x69, 0x6E, 0x74, 0x66, 0x00, 0x00 + }; + std::unique_ptr f = + fromBinary(fileBytes, sizeof(fileBytes), "ppc"); + + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_ppc); + EXPECT_EQ((int)(f->fileType), MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ(f->sections.size(), 2UL); + const Section& text = f->sections[0]; + EXPECT_TRUE(text.segmentName.equals("__TEXT")); + EXPECT_TRUE(text.sectionName.equals("__text")); + EXPECT_EQ(text.type, S_REGULAR); + EXPECT_EQ(text.attributes,SectionAttr(S_ATTR_PURE_INSTRUCTIONS + | S_ATTR_SOME_INSTRUCTIONS)); + EXPECT_EQ((uint16_t)text.alignment, 4U); + EXPECT_EQ(text.address, Hex64(0x0)); + EXPECT_EQ(text.content.size(), 68UL); + EXPECT_EQ((int)(text.content[0]), 0x7C); + EXPECT_EQ((int)(text.content[1]), 0x08); + EXPECT_TRUE(text.indirectSymbols.empty()); + EXPECT_EQ(text.relocations.size(), 5UL); + const Relocation& bl = text.relocations[0]; + EXPECT_EQ(bl.offset, Hex32(0x24)); + EXPECT_EQ(bl.type, PPC_RELOC_BR24); + EXPECT_EQ(bl.length, 2); + EXPECT_EQ(bl.isExtern, true); + EXPECT_EQ(bl.symbol, 1U); + const Relocation& lo = text.relocations[1]; + EXPECT_EQ(lo.offset, Hex32(0x20)); + EXPECT_EQ(lo.scattered, true); + EXPECT_EQ(lo.type, PPC_RELOC_LO16_SECTDIFF); + EXPECT_EQ(lo.length, 2); + EXPECT_EQ(lo.value, Hex32(0x44)); + const Relocation& loPair = text.relocations[2]; + EXPECT_EQ(loPair.offset, Hex32(0x0)); + EXPECT_EQ(loPair.scattered, true); + EXPECT_EQ(loPair.type, PPC_RELOC_PAIR); + EXPECT_EQ(loPair.length, 2); + EXPECT_EQ(loPair.value, Hex32(0x18)); + const Relocation& ha = text.relocations[3]; + EXPECT_EQ(ha.offset, Hex32(0x1C)); + EXPECT_EQ(ha.scattered, true); + EXPECT_EQ(ha.type, PPC_RELOC_HA16_SECTDIFF); + EXPECT_EQ(ha.length, 2); + EXPECT_EQ(ha.value, Hex32(0x44)); + const Relocation& haPair = text.relocations[4]; + EXPECT_EQ(haPair.offset, Hex32(0x2c)); + EXPECT_EQ(haPair.scattered, true); + EXPECT_EQ(haPair.type, PPC_RELOC_PAIR); + EXPECT_EQ(haPair.length, 2); + EXPECT_EQ(haPair.value, Hex32(0x18)); + + const Section& cstring = f->sections[1]; + EXPECT_TRUE(cstring.segmentName.equals("__TEXT")); + EXPECT_TRUE(cstring.sectionName.equals("__cstring")); + EXPECT_EQ(cstring.type, S_CSTRING_LITERALS); + EXPECT_EQ(cstring.attributes, SectionAttr(0)); + EXPECT_EQ((uint16_t)cstring.alignment, 4U); + EXPECT_EQ(cstring.address, Hex64(0x044)); + EXPECT_EQ(cstring.content.size(), 7UL); + EXPECT_EQ((int)(cstring.content[0]), 0x68); + EXPECT_EQ((int)(cstring.content[1]), 0x65); + EXPECT_EQ((int)(cstring.content[2]), 0x6c); + EXPECT_TRUE(cstring.indirectSymbols.empty()); + EXPECT_TRUE(cstring.relocations.empty()); + + EXPECT_EQ(f->localSymbols.size(), 0UL); + EXPECT_EQ(f->globalSymbols.size(), 1UL); + const Symbol& mainLabel = f->globalSymbols[0]; + EXPECT_TRUE(mainLabel.name.equals("_main")); + EXPECT_EQ(mainLabel.type, N_SECT); + EXPECT_EQ(mainLabel.sect, 1); + EXPECT_EQ(mainLabel.scope, SymbolScope(N_EXT)); + EXPECT_EQ(mainLabel.value, Hex64(0x0)); + EXPECT_EQ(f->undefinedSymbols.size(), 1UL); + const Symbol& printfLabel = f->undefinedSymbols[0]; + EXPECT_TRUE(printfLabel.name.equals("_printf")); + EXPECT_EQ(printfLabel.type, N_UNDF); + EXPECT_EQ(printfLabel.scope, SymbolScope(N_EXT)); + + SmallString<128> tmpFl; + std::error_code ec = + llvm::sys::fs::createTemporaryFile(Twine("xx"), "o", tmpFl); + EXPECT_FALSE(ec); + llvm::Error ec2 = writeBinary(*f, tmpFl); + EXPECT_FALSE(ec2); + llvm::sys::fs::remove(tmpFl); +} diff --git a/lld/unittests/MachOTests/MachONormalizedFileBinaryWriterTests.cpp b/lld/unittests/MachOTests/MachONormalizedFileBinaryWriterTests.cpp new file mode 100644 index 000000000000..f2314da28a4f --- /dev/null +++ b/lld/unittests/MachOTests/MachONormalizedFileBinaryWriterTests.cpp @@ -0,0 +1,695 @@ +//===- lld/unittest/MachOTests/MachONormalizedFileBinaryWriterTests.cpp ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../../lib/ReaderWriter/MachO/MachONormalizedFile.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/FileSystem.h" +#include "gtest/gtest.h" +#include +#include +#include +#include + +using llvm::StringRef; +using llvm::MemoryBuffer; +using llvm::SmallString; +using llvm::Twine; +using llvm::ErrorOr; +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +// Parses binary mach-o file at specified path and returns +// ownership of buffer to mb parameter and ownership of +// Normalized file to nf parameter. +static void fromBinary(StringRef path, std::unique_ptr &mb, + std::unique_ptr &nf, StringRef archStr) { + ErrorOr> mbOrErr = MemoryBuffer::getFile(path); + std::error_code ec = mbOrErr.getError(); + EXPECT_FALSE(ec); + mb = std::move(mbOrErr.get()); + + llvm::Expected> r = + lld::mach_o::normalized::readBinary( + mb, lld::MachOLinkingContext::archFromName(archStr)); + EXPECT_FALSE(!r); + nf.reset(r->release()); +} + +static Relocation +makeReloc(unsigned addr, bool rel, bool ext, RelocationInfoType type, + unsigned sym) { + Relocation result; + result.offset = addr; + result.scattered = false; + result.type = type; + result.length = 2; + result.pcRel = rel; + result.isExtern = ext; + result.value = 0; + result.symbol = sym; + return result; +} + +static Relocation +makeScatReloc(unsigned addr, RelocationInfoType type, unsigned value) { + Relocation result; + result.offset = addr; + result.scattered = true; + result.type = type; + result.length = 2; + result.pcRel = false; + result.isExtern = true; + result.value = value; + result.symbol = 0; + return result; +} + +static Symbol +makeUndefSymbol(StringRef name) { + Symbol sym; + sym.name = name; + sym.type = N_UNDF; + sym.scope = N_EXT; + sym.sect = NO_SECT; + sym.desc = 0; + sym.value = 0; + return sym; +} + + +static Symbol +makeSymbol(StringRef name, unsigned addr) { + Symbol sym; + sym.name = name; + sym.type = N_SECT; + sym.scope = N_EXT; + sym.sect = 1; + sym.desc = 0; + sym.value = addr; + return sym; +} + +static Symbol +makeThumbSymbol(StringRef name, unsigned addr) { + Symbol sym; + sym.name = name; + sym.type = N_SECT; + sym.scope = N_EXT; + sym.sect = 1; + sym.desc = N_ARM_THUMB_DEF; + sym.value = addr; + return sym; +} + +TEST(BinaryWriterTest, obj_relocs_x86_64) { + SmallString<128> tmpFl; + { + NormalizedFile f; + f.arch = lld::MachOLinkingContext::arch_x86_64; + f.fileType = MH_OBJECT; + f.flags = MH_SUBSECTIONS_VIA_SYMBOLS; + f.os = lld::MachOLinkingContext::OS::macOSX; + f.sections.resize(1); + Section& text = f.sections.front(); + text.segmentName = "__TEXT"; + text.sectionName = "__text"; + text.type = S_REGULAR; + text.attributes = SectionAttr(S_ATTR_PURE_INSTRUCTIONS + | S_ATTR_SOME_INSTRUCTIONS); + text.alignment = 16; + text.address = 0; + const uint8_t textBytes[] = { + 0xe8, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8b, 0x05, + 0x00, 0x00, 0x00, 0x00, 0xff, 0x35, 0x00, 0x00, + 0x00, 0x00, 0x8b, 0x05, 0x00, 0x00, 0x00, 0x00, + 0xc6, 0x05, 0xff, 0xff, 0xff, 0xff, 0x12, 0xc7, + 0x05, 0xfc, 0xff, 0xff, 0xff, 0x78, 0x56, 0x34, + 0x12, 0x48, 0x8b, 0x3d, 0x00, 0x00, 0x00, 0x00 }; + + text.content = llvm::makeArrayRef(textBytes, sizeof(textBytes)); + text.relocations.push_back(makeReloc(0x01, false, true, X86_64_RELOC_BRANCH, 1)); + text.relocations.push_back(makeReloc(0x08, false, true, X86_64_RELOC_GOT_LOAD, 1)); + text.relocations.push_back(makeReloc(0x0E, false, true, X86_64_RELOC_GOT, 1)); + text.relocations.push_back(makeReloc(0x14, false, true, X86_64_RELOC_SIGNED, 1)); + text.relocations.push_back(makeReloc(0x1A, false, true, X86_64_RELOC_SIGNED_1, 1)); + text.relocations.push_back(makeReloc(0x21, false, true, X86_64_RELOC_SIGNED_4, 1)); + text.relocations.push_back(makeReloc(0x2C, false, true, X86_64_RELOC_TLV, 2)); + + f.undefinedSymbols.push_back(makeUndefSymbol("_bar")); + f.undefinedSymbols.push_back(makeUndefSymbol("_tbar")); + + std::error_code ec = + llvm::sys::fs::createTemporaryFile(Twine("xx"), "o", tmpFl); + EXPECT_FALSE(ec); + llvm::Error ec2 = writeBinary(f, tmpFl); + EXPECT_FALSE(ec2); + } + + std::unique_ptr bufferOwner; + std::unique_ptr f2; + fromBinary(tmpFl, bufferOwner, f2, "x86_64"); + + EXPECT_EQ(lld::MachOLinkingContext::arch_x86_64, f2->arch); + EXPECT_EQ(MH_OBJECT, f2->fileType); + EXPECT_EQ(FileFlags(MH_SUBSECTIONS_VIA_SYMBOLS), f2->flags); + + EXPECT_TRUE(f2->localSymbols.empty()); + EXPECT_TRUE(f2->globalSymbols.empty()); + EXPECT_EQ(2UL, f2->undefinedSymbols.size()); + const Symbol& barUndef = f2->undefinedSymbols[0]; + EXPECT_TRUE(barUndef.name.equals("_bar")); + EXPECT_EQ(N_UNDF, barUndef.type); + EXPECT_EQ(SymbolScope(N_EXT), barUndef.scope); + const Symbol& tbarUndef = f2->undefinedSymbols[1]; + EXPECT_TRUE(tbarUndef.name.equals("_tbar")); + EXPECT_EQ(N_UNDF, tbarUndef.type); + EXPECT_EQ(SymbolScope(N_EXT), tbarUndef.scope); + + EXPECT_EQ(1UL, f2->sections.size()); + const Section& text = f2->sections[0]; + EXPECT_TRUE(text.segmentName.equals("__TEXT")); + EXPECT_TRUE(text.sectionName.equals("__text")); + EXPECT_EQ(S_REGULAR, text.type); + EXPECT_EQ(text.attributes,SectionAttr(S_ATTR_PURE_INSTRUCTIONS + | S_ATTR_SOME_INSTRUCTIONS)); + EXPECT_EQ((uint16_t)text.alignment, 16U); + EXPECT_EQ(text.address, Hex64(0x0)); + EXPECT_EQ(48UL, text.content.size()); + const Relocation& call = text.relocations[0]; + EXPECT_EQ(call.offset, Hex32(0x1)); + EXPECT_EQ(call.type, X86_64_RELOC_BRANCH); + EXPECT_EQ(call.length, 2); + EXPECT_EQ(call.isExtern, true); + EXPECT_EQ(call.symbol, 1U); + const Relocation& gotLoad = text.relocations[1]; + EXPECT_EQ(gotLoad.offset, Hex32(0x8)); + EXPECT_EQ(gotLoad.type, X86_64_RELOC_GOT_LOAD); + EXPECT_EQ(gotLoad.length, 2); + EXPECT_EQ(gotLoad.isExtern, true); + EXPECT_EQ(gotLoad.symbol, 1U); + const Relocation& gotUse = text.relocations[2]; + EXPECT_EQ(gotUse.offset, Hex32(0xE)); + EXPECT_EQ(gotUse.type, X86_64_RELOC_GOT); + EXPECT_EQ(gotUse.length, 2); + EXPECT_EQ(gotUse.isExtern, true); + EXPECT_EQ(gotUse.symbol, 1U); + const Relocation& signed0 = text.relocations[3]; + EXPECT_EQ(signed0.offset, Hex32(0x14)); + EXPECT_EQ(signed0.type, X86_64_RELOC_SIGNED); + EXPECT_EQ(signed0.length, 2); + EXPECT_EQ(signed0.isExtern, true); + EXPECT_EQ(signed0.symbol, 1U); + const Relocation& signed1 = text.relocations[4]; + EXPECT_EQ(signed1.offset, Hex32(0x1A)); + EXPECT_EQ(signed1.type, X86_64_RELOC_SIGNED_1); + EXPECT_EQ(signed1.length, 2); + EXPECT_EQ(signed1.isExtern, true); + EXPECT_EQ(signed1.symbol, 1U); + const Relocation& signed4 = text.relocations[5]; + EXPECT_EQ(signed4.offset, Hex32(0x21)); + EXPECT_EQ(signed4.type, X86_64_RELOC_SIGNED_4); + EXPECT_EQ(signed4.length, 2); + EXPECT_EQ(signed4.isExtern, true); + EXPECT_EQ(signed4.symbol, 1U); + + bufferOwner.reset(nullptr); + std::error_code ec = llvm::sys::fs::remove(Twine(tmpFl)); + EXPECT_FALSE(ec); +} + + + +TEST(BinaryWriterTest, obj_relocs_x86) { + SmallString<128> tmpFl; + { + NormalizedFile f; + f.arch = lld::MachOLinkingContext::arch_x86; + f.fileType = MH_OBJECT; + f.flags = MH_SUBSECTIONS_VIA_SYMBOLS; + f.os = lld::MachOLinkingContext::OS::macOSX; + f.sections.resize(1); + Section& text = f.sections.front(); + text.segmentName = "__TEXT"; + text.sectionName = "__text"; + text.type = S_REGULAR; + text.attributes = SectionAttr(S_ATTR_PURE_INSTRUCTIONS + | S_ATTR_SOME_INSTRUCTIONS); + text.alignment = 16; + text.address = 0; + const uint8_t textBytes[] = { + 0xe8, 0xfb, 0xff, 0xff, 0xff, 0xa1, 0x00, 0x00, + 0x00, 0x00, 0x8b, 0xb0, 0xfb, 0xff, 0xff, 0xff, + 0x8b, 0x80, 0x11, 0x00, 0x00, 0x00 }; + + text.content = llvm::makeArrayRef(textBytes, sizeof(textBytes)); + text.relocations.push_back(makeReloc(0x01, true, true, GENERIC_RELOC_VANILLA, 0)); + text.relocations.push_back(makeReloc(0x06, false, true, GENERIC_RELOC_VANILLA, 0)); + text.relocations.push_back(makeScatReloc(0x0c, GENERIC_RELOC_LOCAL_SECTDIFF, 0)); + text.relocations.push_back(makeScatReloc(0x0, GENERIC_RELOC_PAIR, 5)); + text.relocations.push_back(makeReloc(0x12, true, true, GENERIC_RELOC_TLV, 1)); + + f.undefinedSymbols.push_back(makeUndefSymbol("_bar")); + f.undefinedSymbols.push_back(makeUndefSymbol("_tbar")); + + std::error_code ec = + llvm::sys::fs::createTemporaryFile(Twine("xx"), "o", tmpFl); + EXPECT_FALSE(ec); + llvm::Error ec2 = writeBinary(f, tmpFl); + EXPECT_FALSE(ec2); + } + std::unique_ptr bufferOwner; + std::unique_ptr f2; + fromBinary(tmpFl, bufferOwner, f2, "i386"); + + EXPECT_EQ(lld::MachOLinkingContext::arch_x86, f2->arch); + EXPECT_EQ(MH_OBJECT, f2->fileType); + EXPECT_EQ(FileFlags(MH_SUBSECTIONS_VIA_SYMBOLS), f2->flags); + + EXPECT_TRUE(f2->localSymbols.empty()); + EXPECT_TRUE(f2->globalSymbols.empty()); + EXPECT_EQ(2UL, f2->undefinedSymbols.size()); + const Symbol& barUndef = f2->undefinedSymbols[0]; + EXPECT_TRUE(barUndef.name.equals("_bar")); + EXPECT_EQ(N_UNDF, barUndef.type); + EXPECT_EQ(SymbolScope(N_EXT), barUndef.scope); + const Symbol& tbarUndef = f2->undefinedSymbols[1]; + EXPECT_TRUE(tbarUndef.name.equals("_tbar")); + EXPECT_EQ(N_UNDF, tbarUndef.type); + EXPECT_EQ(SymbolScope(N_EXT), tbarUndef.scope); + + EXPECT_EQ(1UL, f2->sections.size()); + const Section& text = f2->sections[0]; + EXPECT_TRUE(text.segmentName.equals("__TEXT")); + EXPECT_TRUE(text.sectionName.equals("__text")); + EXPECT_EQ(S_REGULAR, text.type); + EXPECT_EQ(text.attributes,SectionAttr(S_ATTR_PURE_INSTRUCTIONS + | S_ATTR_SOME_INSTRUCTIONS)); + EXPECT_EQ((uint16_t)text.alignment, 16U); + EXPECT_EQ(text.address, Hex64(0x0)); + EXPECT_EQ(22UL, text.content.size()); + const Relocation& call = text.relocations[0]; + EXPECT_EQ(call.offset, Hex32(0x1)); + EXPECT_EQ(call.scattered, false); + EXPECT_EQ(call.type, GENERIC_RELOC_VANILLA); + EXPECT_EQ(call.pcRel, true); + EXPECT_EQ(call.length, 2); + EXPECT_EQ(call.isExtern, true); + EXPECT_EQ(call.symbol, 0U); + const Relocation& absLoad = text.relocations[1]; + EXPECT_EQ(absLoad.offset, Hex32(0x6)); + EXPECT_EQ(absLoad.scattered, false); + EXPECT_EQ(absLoad.type, GENERIC_RELOC_VANILLA); + EXPECT_EQ(absLoad.pcRel, false); + EXPECT_EQ(absLoad.length, 2); + EXPECT_EQ(absLoad.isExtern, true); + EXPECT_EQ(absLoad.symbol,0U); + const Relocation& pic1 = text.relocations[2]; + EXPECT_EQ(pic1.offset, Hex32(0xc)); + EXPECT_EQ(pic1.scattered, true); + EXPECT_EQ(pic1.type, GENERIC_RELOC_LOCAL_SECTDIFF); + EXPECT_EQ(pic1.length, 2); + EXPECT_EQ(pic1.value, 0U); + const Relocation& pic2 = text.relocations[3]; + EXPECT_EQ(pic2.offset, Hex32(0x0)); + EXPECT_EQ(pic1.scattered, true); + EXPECT_EQ(pic2.type, GENERIC_RELOC_PAIR); + EXPECT_EQ(pic2.length, 2); + EXPECT_EQ(pic2.value, 5U); + const Relocation& tlv = text.relocations[4]; + EXPECT_EQ(tlv.offset, Hex32(0x12)); + EXPECT_EQ(tlv.type, GENERIC_RELOC_TLV); + EXPECT_EQ(tlv.length, 2); + EXPECT_EQ(tlv.isExtern, true); + EXPECT_EQ(tlv.symbol, 1U); + + // lld::errs() << "temp = " << tmpFl << "\n"; + bufferOwner.reset(nullptr); + std::error_code ec = llvm::sys::fs::remove(Twine(tmpFl)); + EXPECT_FALSE(ec); +} + + + +TEST(BinaryWriterTest, obj_relocs_armv7) { + SmallString<128> tmpFl; + { + NormalizedFile f; + f.arch = lld::MachOLinkingContext::arch_armv7; + f.fileType = MH_OBJECT; + f.flags = MH_SUBSECTIONS_VIA_SYMBOLS; + f.os = lld::MachOLinkingContext::OS::macOSX; + f.sections.resize(1); + Section& text = f.sections.front(); + text.segmentName = "__TEXT"; + text.sectionName = "__text"; + text.type = S_REGULAR; + text.attributes = SectionAttr(S_ATTR_PURE_INSTRUCTIONS + | S_ATTR_SOME_INSTRUCTIONS); + text.alignment = 4; + text.address = 0; + const uint8_t textBytes[] = { + 0xff, 0xf7, 0xfe, 0xef, 0x40, 0xf2, 0x05, 0x01, + 0xc0, 0xf2, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xbf }; + + text.content = llvm::makeArrayRef(textBytes, sizeof(textBytes)); + text.relocations.push_back(makeReloc(0x00, true, true, + ARM_THUMB_RELOC_BR22, 2)); + text.relocations.push_back(makeScatReloc(0x04, + ARM_RELOC_HALF_SECTDIFF, 0x10)); + text.relocations.push_back(makeScatReloc(0x00, + ARM_RELOC_PAIR, 0xC)); + text.relocations.push_back(makeScatReloc(0x08, + ARM_RELOC_HALF_SECTDIFF, 0x10)); + text.relocations.push_back(makeScatReloc(0x00, + ARM_RELOC_PAIR, 0xC)); + text.relocations.push_back(makeReloc(0x0C, false, true, + ARM_RELOC_VANILLA, 2)); + + f.globalSymbols.push_back(makeThumbSymbol("_foo", 0x00)); + f.globalSymbols.push_back(makeThumbSymbol("_foo2", 0x10)); + f.undefinedSymbols.push_back(makeUndefSymbol("_bar")); + + std::error_code ec = + llvm::sys::fs::createTemporaryFile(Twine("xx"), "o", tmpFl); + EXPECT_FALSE(ec); + llvm::Error ec2 = writeBinary(f, tmpFl); + EXPECT_FALSE(ec2); + } + std::unique_ptr bufferOwner; + std::unique_ptr f2; + fromBinary(tmpFl, bufferOwner, f2, "armv7"); + + EXPECT_EQ(lld::MachOLinkingContext::arch_armv7, f2->arch); + EXPECT_EQ(MH_OBJECT, f2->fileType); + EXPECT_EQ(FileFlags(MH_SUBSECTIONS_VIA_SYMBOLS), f2->flags); + + EXPECT_TRUE(f2->localSymbols.empty()); + EXPECT_EQ(2UL, f2->globalSymbols.size()); + const Symbol& fooDef = f2->globalSymbols[0]; + EXPECT_TRUE(fooDef.name.equals("_foo")); + EXPECT_EQ(N_SECT, fooDef.type); + EXPECT_EQ(1, fooDef.sect); + EXPECT_EQ(SymbolScope(N_EXT), fooDef.scope); + const Symbol& foo2Def = f2->globalSymbols[1]; + EXPECT_TRUE(foo2Def.name.equals("_foo2")); + EXPECT_EQ(N_SECT, foo2Def.type); + EXPECT_EQ(1, foo2Def.sect); + EXPECT_EQ(SymbolScope(N_EXT), foo2Def.scope); + + EXPECT_EQ(1UL, f2->undefinedSymbols.size()); + const Symbol& barUndef = f2->undefinedSymbols[0]; + EXPECT_TRUE(barUndef.name.equals("_bar")); + EXPECT_EQ(N_UNDF, barUndef.type); + EXPECT_EQ(SymbolScope(N_EXT), barUndef.scope); + + EXPECT_EQ(1UL, f2->sections.size()); + const Section& text = f2->sections[0]; + EXPECT_TRUE(text.segmentName.equals("__TEXT")); + EXPECT_TRUE(text.sectionName.equals("__text")); + EXPECT_EQ(S_REGULAR, text.type); + EXPECT_EQ(text.attributes,SectionAttr(S_ATTR_PURE_INSTRUCTIONS + | S_ATTR_SOME_INSTRUCTIONS)); + EXPECT_EQ((uint16_t)text.alignment, 4U); + EXPECT_EQ(text.address, Hex64(0x0)); + EXPECT_EQ(18UL, text.content.size()); + const Relocation& blx = text.relocations[0]; + EXPECT_EQ(blx.offset, Hex32(0x0)); + EXPECT_EQ(blx.scattered, false); + EXPECT_EQ(blx.type, ARM_THUMB_RELOC_BR22); + EXPECT_EQ(blx.pcRel, true); + EXPECT_EQ(blx.length, 2); + EXPECT_EQ(blx.isExtern, true); + EXPECT_EQ(blx.symbol, 2U); + const Relocation& movw1 = text.relocations[1]; + EXPECT_EQ(movw1.offset, Hex32(0x4)); + EXPECT_EQ(movw1.scattered, true); + EXPECT_EQ(movw1.type, ARM_RELOC_HALF_SECTDIFF); + EXPECT_EQ(movw1.length, 2); + EXPECT_EQ(movw1.value, 0x10U); + const Relocation& movw2 = text.relocations[2]; + EXPECT_EQ(movw2.offset, Hex32(0x0)); + EXPECT_EQ(movw2.scattered, true); + EXPECT_EQ(movw2.type, ARM_RELOC_PAIR); + EXPECT_EQ(movw2.length, 2); + EXPECT_EQ(movw2.value, Hex32(0xC)); + const Relocation& movt1 = text.relocations[3]; + EXPECT_EQ(movt1.offset, Hex32(0x8)); + EXPECT_EQ(movt1.scattered, true); + EXPECT_EQ(movt1.type, ARM_RELOC_HALF_SECTDIFF); + EXPECT_EQ(movt1.length, 2); + EXPECT_EQ(movt1.value, Hex32(0x10)); + const Relocation& movt2 = text.relocations[4]; + EXPECT_EQ(movt2.offset, Hex32(0x0)); + EXPECT_EQ(movt2.scattered, true); + EXPECT_EQ(movt2.type, ARM_RELOC_PAIR); + EXPECT_EQ(movt2.length, 2); + EXPECT_EQ(movt2.value, Hex32(0xC)); + const Relocation& absPointer = text.relocations[5]; + EXPECT_EQ(absPointer.offset, Hex32(0xC)); + EXPECT_EQ(absPointer.type, ARM_RELOC_VANILLA); + EXPECT_EQ(absPointer.length, 2); + EXPECT_EQ(absPointer.isExtern, true); + EXPECT_EQ(absPointer.symbol, 2U); + + // lld::errs() << "temp = " << tmpFl << "\n"; + bufferOwner.reset(nullptr); + std::error_code ec = llvm::sys::fs::remove(Twine(tmpFl)); + EXPECT_FALSE(ec); +} + + + +TEST(BinaryWriterTest, obj_relocs_ppc) { + SmallString<128> tmpFl; + { + NormalizedFile f; + f.arch = lld::MachOLinkingContext::arch_ppc; + f.fileType = MH_OBJECT; + f.flags = MH_SUBSECTIONS_VIA_SYMBOLS; + f.os = lld::MachOLinkingContext::OS::macOSX; + f.sections.resize(1); + Section& text = f.sections.front(); + text.segmentName = "__TEXT"; + text.sectionName = "__text"; + text.type = S_REGULAR; + text.attributes = SectionAttr(S_ATTR_PURE_INSTRUCTIONS + | S_ATTR_SOME_INSTRUCTIONS); + text.alignment = 4; + text.address = 0; + const uint8_t textBytes[] = { + 0x48, 0x00, 0x00, 0x01, 0x40, 0x82, 0xff, 0xfc, + 0x3c, 0x62, 0x00, 0x00, 0x3c, 0x62, 0x00, 0x00, + 0x80, 0x63, 0x00, 0x24, 0x80, 0x63, 0x00, 0x24, + 0x3c, 0x40, 0x00, 0x00, 0x3c, 0x60, 0x00, 0x00, + 0x80, 0x42, 0x00, 0x28, 0x80, 0x63, 0x00, 0x28, + 0x60, 0x00, 0x00, 0x00 }; + + text.content = llvm::makeArrayRef(textBytes, sizeof(textBytes)); + text.relocations.push_back(makeReloc(0x00, true, true, + PPC_RELOC_BR24, 2)); + text.relocations.push_back(makeReloc(0x04, true, true, + PPC_RELOC_BR14, 2)); + text.relocations.push_back(makeScatReloc(0x08, + PPC_RELOC_HI16_SECTDIFF, 0x28)); + text.relocations.push_back(makeScatReloc(0x24, + PPC_RELOC_PAIR, 0x4)); + text.relocations.push_back(makeScatReloc(0x0C, + PPC_RELOC_HA16_SECTDIFF, 0x28)); + text.relocations.push_back(makeScatReloc(0x24, + PPC_RELOC_PAIR, 0x4)); + text.relocations.push_back(makeScatReloc(0x10, + PPC_RELOC_LO16_SECTDIFF, 0x28)); + text.relocations.push_back(makeScatReloc(0x00, + PPC_RELOC_PAIR, 0x4)); + text.relocations.push_back(makeScatReloc(0x14, + PPC_RELOC_LO14_SECTDIFF, 0x28)); + text.relocations.push_back(makeScatReloc(0x00, + PPC_RELOC_PAIR, 0x4)); + text.relocations.push_back(makeReloc(0x18, false, false, + PPC_RELOC_HI16, 1)); + text.relocations.push_back(makeReloc(0x28, false, false, + PPC_RELOC_PAIR, 0)); + text.relocations.push_back(makeReloc(0x1C, false, false, + PPC_RELOC_HA16, 1)); + text.relocations.push_back(makeReloc(0x28, false, false, + PPC_RELOC_PAIR, 0)); + text.relocations.push_back(makeReloc(0x20, false, false, + PPC_RELOC_LO16, 1)); + text.relocations.push_back(makeReloc(0x00, false, false, + PPC_RELOC_PAIR, 0)); + text.relocations.push_back(makeReloc(0x24, false, false, + PPC_RELOC_LO14, 1)); + text.relocations.push_back(makeReloc(0x00, false, false, + PPC_RELOC_PAIR, 0)); + + f.globalSymbols.push_back(makeSymbol("_foo", 0x00)); + f.globalSymbols.push_back(makeSymbol("_foo2", 0x28)); + f.undefinedSymbols.push_back(makeUndefSymbol("_bar")); + + std::error_code ec = + llvm::sys::fs::createTemporaryFile(Twine("xx"), "o", tmpFl); + EXPECT_FALSE(ec); + llvm::Error ec2 = writeBinary(f, tmpFl); + EXPECT_FALSE(ec2); + } + std::unique_ptr bufferOwner; + std::unique_ptr f2; + fromBinary(tmpFl, bufferOwner, f2, "ppc"); + + EXPECT_EQ(lld::MachOLinkingContext::arch_ppc, f2->arch); + EXPECT_EQ(MH_OBJECT, f2->fileType); + EXPECT_EQ(FileFlags(MH_SUBSECTIONS_VIA_SYMBOLS), f2->flags); + + EXPECT_TRUE(f2->localSymbols.empty()); + EXPECT_EQ(2UL, f2->globalSymbols.size()); + const Symbol& fooDef = f2->globalSymbols[0]; + EXPECT_TRUE(fooDef.name.equals("_foo")); + EXPECT_EQ(N_SECT, fooDef.type); + EXPECT_EQ(1, fooDef.sect); + EXPECT_EQ(SymbolScope(N_EXT), fooDef.scope); + const Symbol& foo2Def = f2->globalSymbols[1]; + EXPECT_TRUE(foo2Def.name.equals("_foo2")); + EXPECT_EQ(N_SECT, foo2Def.type); + EXPECT_EQ(1, foo2Def.sect); + EXPECT_EQ(SymbolScope(N_EXT), foo2Def.scope); + + EXPECT_EQ(1UL, f2->undefinedSymbols.size()); + const Symbol& barUndef = f2->undefinedSymbols[0]; + EXPECT_TRUE(barUndef.name.equals("_bar")); + EXPECT_EQ(N_UNDF, barUndef.type); + EXPECT_EQ(SymbolScope(N_EXT), barUndef.scope); + + EXPECT_EQ(1UL, f2->sections.size()); + const Section& text = f2->sections[0]; + EXPECT_TRUE(text.segmentName.equals("__TEXT")); + EXPECT_TRUE(text.sectionName.equals("__text")); + EXPECT_EQ(S_REGULAR, text.type); + EXPECT_EQ(text.attributes,SectionAttr(S_ATTR_PURE_INSTRUCTIONS + | S_ATTR_SOME_INSTRUCTIONS)); + EXPECT_EQ((uint16_t)text.alignment, 4U); + EXPECT_EQ(text.address, Hex64(0x0)); + EXPECT_EQ(44UL, text.content.size()); + const Relocation& br24 = text.relocations[0]; + EXPECT_EQ(br24.offset, Hex32(0x0)); + EXPECT_EQ(br24.scattered, false); + EXPECT_EQ(br24.type, PPC_RELOC_BR24); + EXPECT_EQ(br24.pcRel, true); + EXPECT_EQ(br24.length, 2); + EXPECT_EQ(br24.isExtern, true); + EXPECT_EQ(br24.symbol, 2U); + const Relocation& br14 = text.relocations[1]; + EXPECT_EQ(br14.offset, Hex32(0x4)); + EXPECT_EQ(br14.scattered, false); + EXPECT_EQ(br14.type, PPC_RELOC_BR14); + EXPECT_EQ(br14.pcRel, true); + EXPECT_EQ(br14.length, 2); + EXPECT_EQ(br14.isExtern, true); + EXPECT_EQ(br14.symbol, 2U); + const Relocation& pichi1 = text.relocations[2]; + EXPECT_EQ(pichi1.offset, Hex32(0x8)); + EXPECT_EQ(pichi1.scattered, true); + EXPECT_EQ(pichi1.type, PPC_RELOC_HI16_SECTDIFF); + EXPECT_EQ(pichi1.length, 2); + EXPECT_EQ(pichi1.value, 0x28U); + const Relocation& pichi2 = text.relocations[3]; + EXPECT_EQ(pichi2.offset, Hex32(0x24)); + EXPECT_EQ(pichi2.scattered, true); + EXPECT_EQ(pichi2.type, PPC_RELOC_PAIR); + EXPECT_EQ(pichi2.length, 2); + EXPECT_EQ(pichi2.value, 0x4U); + const Relocation& picha1 = text.relocations[4]; + EXPECT_EQ(picha1.offset, Hex32(0xC)); + EXPECT_EQ(picha1.scattered, true); + EXPECT_EQ(picha1.type, PPC_RELOC_HA16_SECTDIFF); + EXPECT_EQ(picha1.length, 2); + EXPECT_EQ(picha1.value, 0x28U); + const Relocation& picha2 = text.relocations[5]; + EXPECT_EQ(picha2.offset, Hex32(0x24)); + EXPECT_EQ(picha2.scattered, true); + EXPECT_EQ(picha2.type, PPC_RELOC_PAIR); + EXPECT_EQ(picha2.length, 2); + EXPECT_EQ(picha2.value, 0x4U); + const Relocation& piclo1 = text.relocations[6]; + EXPECT_EQ(piclo1.offset, Hex32(0x10)); + EXPECT_EQ(piclo1.scattered, true); + EXPECT_EQ(piclo1.type, PPC_RELOC_LO16_SECTDIFF); + EXPECT_EQ(piclo1.length, 2); + EXPECT_EQ(piclo1.value, 0x28U); + const Relocation& piclo2 = text.relocations[7]; + EXPECT_EQ(piclo2.offset, Hex32(0x0)); + EXPECT_EQ(piclo2.scattered, true); + EXPECT_EQ(piclo2.type, PPC_RELOC_PAIR); + EXPECT_EQ(piclo2.length, 2); + EXPECT_EQ(piclo2.value, 0x4U); + const Relocation& picloa1 = text.relocations[8]; + EXPECT_EQ(picloa1.offset, Hex32(0x14)); + EXPECT_EQ(picloa1.scattered, true); + EXPECT_EQ(picloa1.type, PPC_RELOC_LO14_SECTDIFF); + EXPECT_EQ(picloa1.length, 2); + EXPECT_EQ(picloa1.value, 0x28U); + const Relocation& picloa2 = text.relocations[9]; + EXPECT_EQ(picloa2.offset, Hex32(0x0)); + EXPECT_EQ(picloa2.scattered, true); + EXPECT_EQ(picloa2.type, PPC_RELOC_PAIR); + EXPECT_EQ(picloa2.length, 2); + EXPECT_EQ(picloa2.value, 0x4U); + const Relocation& abshi1 = text.relocations[10]; + EXPECT_EQ(abshi1.offset, Hex32(0x18)); + EXPECT_EQ(abshi1.scattered, false); + EXPECT_EQ(abshi1.type, PPC_RELOC_HI16); + EXPECT_EQ(abshi1.length, 2); + EXPECT_EQ(abshi1.symbol, 1U); + const Relocation& abshi2 = text.relocations[11]; + EXPECT_EQ(abshi2.offset, Hex32(0x28)); + EXPECT_EQ(abshi2.scattered, false); + EXPECT_EQ(abshi2.type, PPC_RELOC_PAIR); + EXPECT_EQ(abshi2.length, 2); + EXPECT_EQ(abshi2.symbol, 0U); + const Relocation& absha1 = text.relocations[12]; + EXPECT_EQ(absha1.offset, Hex32(0x1C)); + EXPECT_EQ(absha1.scattered, false); + EXPECT_EQ(absha1.type, PPC_RELOC_HA16); + EXPECT_EQ(absha1.length, 2); + EXPECT_EQ(absha1.symbol, 1U); + const Relocation& absha2 = text.relocations[13]; + EXPECT_EQ(absha2.offset, Hex32(0x28)); + EXPECT_EQ(absha2.scattered, false); + EXPECT_EQ(absha2.type, PPC_RELOC_PAIR); + EXPECT_EQ(absha2.length, 2); + EXPECT_EQ(absha2.symbol, 0U); + const Relocation& abslo1 = text.relocations[14]; + EXPECT_EQ(abslo1.offset, Hex32(0x20)); + EXPECT_EQ(abslo1.scattered, false); + EXPECT_EQ(abslo1.type, PPC_RELOC_LO16); + EXPECT_EQ(abslo1.length, 2); + EXPECT_EQ(abslo1.symbol, 1U); + const Relocation& abslo2 = text.relocations[15]; + EXPECT_EQ(abslo2.offset, Hex32(0x00)); + EXPECT_EQ(abslo2.scattered, false); + EXPECT_EQ(abslo2.type, PPC_RELOC_PAIR); + EXPECT_EQ(abslo2.length, 2); + EXPECT_EQ(abslo2.symbol, 0U); + const Relocation& absloa1 = text.relocations[16]; + EXPECT_EQ(absloa1.offset, Hex32(0x24)); + EXPECT_EQ(absloa1.scattered, false); + EXPECT_EQ(absloa1.type, PPC_RELOC_LO14); + EXPECT_EQ(absloa1.length, 2); + EXPECT_EQ(absloa1.symbol, 1U); + const Relocation& absloa2 = text.relocations[17]; + EXPECT_EQ(absloa2.offset, Hex32(0x00)); + EXPECT_EQ(absloa2.scattered, false); + EXPECT_EQ(absloa2.type, PPC_RELOC_PAIR); + EXPECT_EQ(absloa2.length, 2); + EXPECT_EQ(absloa2.symbol, 0U); + + bufferOwner.reset(nullptr); + std::error_code ec = llvm::sys::fs::remove(Twine(tmpFl)); + EXPECT_FALSE(ec); +} diff --git a/lld/unittests/MachOTests/MachONormalizedFileToAtomsTests.cpp b/lld/unittests/MachOTests/MachONormalizedFileToAtomsTests.cpp new file mode 100644 index 000000000000..19534eadaf5b --- /dev/null +++ b/lld/unittests/MachOTests/MachONormalizedFileToAtomsTests.cpp @@ -0,0 +1,140 @@ +//===- lld/unittest/MachOTests/MachONormalizedFileToAtomsTests.cpp --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../../lib/ReaderWriter/MachO/MachONormalizedFile.h" +#include "lld/Core/Atom.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/UndefinedAtom.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/YAMLTraits.h" +#include "gtest/gtest.h" +#include +#include + +using namespace lld::mach_o::normalized; +using namespace llvm::MachO; + +TEST(ToAtomsTest, empty_obj_x86_64) { + NormalizedFile f; + f.arch = lld::MachOLinkingContext::arch_x86_64; + llvm::Expected> atom_f = + normalizedToAtoms(f, "", false); + EXPECT_FALSE(!atom_f); + EXPECT_EQ(0U, (*atom_f)->defined().size()); +} + +TEST(ToAtomsTest, basic_obj_x86_64) { + NormalizedFile f; + f.arch = lld::MachOLinkingContext::arch_x86_64; + Section textSection; + static const uint8_t contentBytes[] = { 0x90, 0xC3, 0xC3, 0xC4 }; + const unsigned contentSize = sizeof(contentBytes) / sizeof(contentBytes[0]); + textSection.content = llvm::makeArrayRef(contentBytes, contentSize); + f.sections.push_back(textSection); + Symbol fooSymbol; + fooSymbol.name = "_foo"; + fooSymbol.type = N_SECT; + fooSymbol.scope = N_EXT; + fooSymbol.sect = 1; + fooSymbol.value = 0; + f.globalSymbols.push_back(fooSymbol); + Symbol barSymbol; + barSymbol.name = "_bar"; + barSymbol.type = N_SECT; + barSymbol.scope = N_EXT; + barSymbol.sect = 1; + barSymbol.value = 2; + f.globalSymbols.push_back(barSymbol); + Symbol undefSym; + undefSym.name = "_undef"; + undefSym.type = N_UNDF; + f.undefinedSymbols.push_back(undefSym); + Symbol bazSymbol; + bazSymbol.name = "_baz"; + bazSymbol.type = N_SECT; + bazSymbol.scope = N_EXT | N_PEXT; + bazSymbol.sect = 1; + bazSymbol.value = 3; + f.localSymbols.push_back(bazSymbol); + + llvm::Expected> atom_f = + normalizedToAtoms(f, "", false); + EXPECT_FALSE(!atom_f); + const lld::File &file = **atom_f; + EXPECT_EQ(3U, file.defined().size()); + auto it = file.defined().begin(); + const lld::DefinedAtom *atom1 = *it; + ++it; + const lld::DefinedAtom *atom2 = *it; + ++it; + const lld::DefinedAtom *atom3 = *it; + const lld::UndefinedAtom *atom4 = *file.undefined().begin(); + EXPECT_TRUE(atom1->name().equals("_foo")); + EXPECT_EQ(2U, atom1->rawContent().size()); + EXPECT_EQ(0x90, atom1->rawContent()[0]); + EXPECT_EQ(0xC3, atom1->rawContent()[1]); + EXPECT_EQ(lld::Atom::scopeGlobal, atom1->scope()); + + EXPECT_TRUE(atom2->name().equals("_bar")); + EXPECT_EQ(1U, atom2->rawContent().size()); + EXPECT_EQ(0xC3, atom2->rawContent()[0]); + EXPECT_EQ(lld::Atom::scopeGlobal, atom2->scope()); + + EXPECT_TRUE(atom3->name().equals("_baz")); + EXPECT_EQ(1U, atom3->rawContent().size()); + EXPECT_EQ(0xC4, atom3->rawContent()[0]); + EXPECT_EQ(lld::Atom::scopeLinkageUnit, atom3->scope()); + + EXPECT_TRUE(atom4->name().equals("_undef")); + EXPECT_EQ(lld::Atom::definitionUndefined, atom4->definition()); +} + +TEST(ToAtomsTest, reservedUnitLength) { + static const uint8_t debugInfoWithReservedLengthContent[12] = { + 0xf0, 0xff, 0xff, 0xff // Reserved length value + }; + static const uint8_t debugInfoWithValidBigLengthContent[12] = { + 0xef, 0xff, 0xff, 0xff, // The maximum valid length value for DWARF32 + 0x00, 0x00 // Wrong version + }; + static const uint8_t dummyContent[] = {0x00}; + + NormalizedFile fReservedLength, fValidBigLength; + fReservedLength.arch = lld::MachOLinkingContext::arch_x86; + fValidBigLength.arch = lld::MachOLinkingContext::arch_x86; + Section section; + section.segmentName = "__DWARF"; + section.sectionName = "__debug_info"; + section.content = llvm::makeArrayRef(debugInfoWithReservedLengthContent); + fReservedLength.sections.push_back(section); + section.content = llvm::makeArrayRef(debugInfoWithValidBigLengthContent); + fValidBigLength.sections.push_back(section); + section.sectionName = "__debug_abbrev"; + section.content = llvm::makeArrayRef(dummyContent); + fReservedLength.sections.push_back(section); + fValidBigLength.sections.push_back(section); + section.sectionName = "__debug_str"; + fReservedLength.sections.push_back(section); + fValidBigLength.sections.push_back(section); + + auto resultReservedLength = normalizedToAtoms(fReservedLength, "foo", false); + auto resultValidBigLength = normalizedToAtoms(fValidBigLength, "foo", false); + + // Both cases should return errors, but different. + ASSERT_FALSE(resultReservedLength); + ASSERT_FALSE(resultValidBigLength); + + EXPECT_STREQ("Malformed DWARF in foo", + toString(resultReservedLength.takeError()).c_str()); + EXPECT_STREQ("Unsupported DWARF version in foo", + toString(resultValidBigLength.takeError()).c_str()); +} diff --git a/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp b/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp new file mode 100644 index 000000000000..dbfe3a051811 --- /dev/null +++ b/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp @@ -0,0 +1,762 @@ +//===- lld/unittest/MachOTests/MachONormalizedFileYAMLTests.cpp -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../../lib/ReaderWriter/MachO/MachONormalizedFile.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include "gtest/gtest.h" +#include +#include +#include +#include + +using llvm::StringRef; +using llvm::MemoryBuffer; +using lld::mach_o::normalized::NormalizedFile; +using lld::mach_o::normalized::Symbol; +using lld::mach_o::normalized::Section; +using lld::mach_o::normalized::Relocation; + +static std::unique_ptr fromYAML(StringRef str) { + std::unique_ptr mb(MemoryBuffer::getMemBuffer(str)); + llvm::Expected> r + = lld::mach_o::normalized::readYaml(mb); + EXPECT_FALSE(!r); + return std::move(*r); +} + +static void toYAML(const NormalizedFile &f, std::string &out) { + llvm::raw_string_ostream ostr(out); + std::error_code ec = lld::mach_o::normalized::writeYaml(f, ostr); + EXPECT_TRUE(!ec); +} + +// ppc is no longer supported, but it is here to test endianness handling. +TEST(ObjectFileYAML, empty_ppc) { + std::unique_ptr f = fromYAML( + "---\n" + "arch: ppc\n" + "file-type: MH_OBJECT\n" + "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" + "...\n"); + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_ppc); + EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)(int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_TRUE(f->sections.empty()); + EXPECT_TRUE(f->localSymbols.empty()); + EXPECT_TRUE(f->globalSymbols.empty()); + EXPECT_TRUE(f->undefinedSymbols.empty()); +} + +TEST(ObjectFileYAML, empty_x86_64) { + std::unique_ptr f = fromYAML( + "---\n" + "arch: x86_64\n" + "file-type: MH_OBJECT\n" + "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" + "...\n"); + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); + EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)(int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_TRUE(f->sections.empty()); + EXPECT_TRUE(f->localSymbols.empty()); + EXPECT_TRUE(f->globalSymbols.empty()); + EXPECT_TRUE(f->undefinedSymbols.empty()); +} + +TEST(ObjectFileYAML, empty_x86) { + std::unique_ptr f = fromYAML( + "---\n" + "arch: x86\n" + "file-type: MH_OBJECT\n" + "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" + "...\n"); + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86); + EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_TRUE(f->sections.empty()); + EXPECT_TRUE(f->localSymbols.empty()); + EXPECT_TRUE(f->globalSymbols.empty()); + EXPECT_TRUE(f->undefinedSymbols.empty()); +} + +TEST(ObjectFileYAML, empty_armv6) { + std::unique_ptr f = fromYAML( + "---\n" + "arch: armv6\n" + "file-type: MH_OBJECT\n" + "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" + "...\n"); + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv6); + EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_TRUE(f->sections.empty()); + EXPECT_TRUE(f->localSymbols.empty()); + EXPECT_TRUE(f->globalSymbols.empty()); + EXPECT_TRUE(f->undefinedSymbols.empty()); +} + +TEST(ObjectFileYAML, empty_armv7) { + std::unique_ptr f = fromYAML( + "---\n" + "arch: armv7\n" + "file-type: MH_OBJECT\n" + "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" + "...\n"); + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7); + EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_TRUE(f->sections.empty()); + EXPECT_TRUE(f->localSymbols.empty()); + EXPECT_TRUE(f->globalSymbols.empty()); + EXPECT_TRUE(f->undefinedSymbols.empty()); +} + +TEST(ObjectFileYAML, empty_armv7s) { + std::unique_ptr f = fromYAML( + "---\n" + "arch: armv7s\n" + "file-type: MH_OBJECT\n" + "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" + "...\n"); + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7s); + EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_TRUE(f->sections.empty()); + EXPECT_TRUE(f->localSymbols.empty()); + EXPECT_TRUE(f->globalSymbols.empty()); + EXPECT_TRUE(f->undefinedSymbols.empty()); +} + +TEST(ObjectFileYAML, roundTrip) { + std::string intermediate; + { + NormalizedFile f; + f.arch = lld::MachOLinkingContext::arch_x86_64; + f.fileType = llvm::MachO::MH_OBJECT; + f.flags = (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS; + f.os = lld::MachOLinkingContext::OS::macOSX; + toYAML(f, intermediate); + } + { + std::unique_ptr f2 = fromYAML(intermediate); + EXPECT_EQ(f2->arch, lld::MachOLinkingContext::arch_x86_64); + EXPECT_EQ((int)(f2->fileType), llvm::MachO::MH_OBJECT); + EXPECT_EQ((int)(f2->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_TRUE(f2->sections.empty()); + EXPECT_TRUE(f2->localSymbols.empty()); + EXPECT_TRUE(f2->globalSymbols.empty()); + EXPECT_TRUE(f2->undefinedSymbols.empty()); + } +} + +TEST(ObjectFileYAML, oneSymbol) { + std::unique_ptr f = fromYAML( + "---\n" + "arch: x86_64\n" + "file-type: MH_OBJECT\n" + "global-symbols:\n" + " - name: _main\n" + " type: N_SECT\n" + " scope: [ N_EXT ]\n" + " sect: 1\n" + " desc: [ ]\n" + " value: 0x100\n" + "...\n"); + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); + EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); + EXPECT_TRUE(f->sections.empty()); + EXPECT_TRUE(f->localSymbols.empty()); + EXPECT_TRUE(f->undefinedSymbols.empty()); + EXPECT_EQ(f->globalSymbols.size(), 1UL); + const Symbol& sym = f->globalSymbols[0]; + EXPECT_TRUE(sym.name.equals("_main")); + EXPECT_EQ((int)(sym.type), llvm::MachO::N_SECT); + EXPECT_EQ((int)(sym.scope), llvm::MachO::N_EXT); + EXPECT_EQ(sym.sect, 1); + EXPECT_EQ((int)(sym.desc), 0); + EXPECT_EQ((uint64_t)sym.value, 0x100ULL); +} + +TEST(ObjectFileYAML, oneSection) { + std::unique_ptr f = fromYAML( + "---\n" + "arch: x86_64\n" + "file-type: MH_OBJECT\n" + "sections:\n" + " - segment: __TEXT\n" + " section: __text\n" + " type: S_REGULAR\n" + " attributes: [ S_ATTR_PURE_INSTRUCTIONS ]\n" + " alignment: 2\n" + " address: 0x12345678\n" + " content: [ 0x90, 0x90 ]\n" + "...\n"); + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); + EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); + EXPECT_TRUE(f->localSymbols.empty()); + EXPECT_TRUE(f->globalSymbols.empty()); + EXPECT_TRUE(f->undefinedSymbols.empty()); + EXPECT_EQ(f->sections.size(), 1UL); + const Section& sect = f->sections[0]; + EXPECT_TRUE(sect.segmentName.equals("__TEXT")); + EXPECT_TRUE(sect.sectionName.equals("__text")); + EXPECT_EQ((uint32_t)(sect.type), (uint32_t)(llvm::MachO::S_REGULAR)); + EXPECT_EQ((uint32_t)(sect.attributes), + (uint32_t)(llvm::MachO::S_ATTR_PURE_INSTRUCTIONS)); + EXPECT_EQ((uint16_t)sect.alignment, 2U); + EXPECT_EQ((uint64_t)sect.address, 0x12345678ULL); + EXPECT_EQ(sect.content.size(), 2UL); + EXPECT_EQ((int)(sect.content[0]), 0x90); + EXPECT_EQ((int)(sect.content[1]), 0x90); +} + +TEST(ObjectFileYAML, hello_x86_64) { + std::unique_ptr f = fromYAML( + "---\n" + "arch: x86_64\n" + "file-type: MH_OBJECT\n" + "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" + "sections:\n" + " - segment: __TEXT\n" + " section: __text\n" + " type: S_REGULAR\n" + " attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS]\n" + " alignment: 1\n" + " address: 0x0000\n" + " content: [ 0x55, 0x48, 0x89, 0xe5, 0x48, 0x8d, 0x3d, 0x00,\n" + " 0x00, 0x00, 0x00, 0x30, 0xc0, 0xe8, 0x00, 0x00,\n" + " 0x00, 0x00, 0x31, 0xc0, 0x5d, 0xc3 ]\n" + " relocations:\n" + " - offset: 0x0e\n" + " type: X86_64_RELOC_BRANCH\n" + " length: 2\n" + " pc-rel: true\n" + " extern: true\n" + " symbol: 2\n" + " - offset: 0x07\n" + " type: X86_64_RELOC_SIGNED\n" + " length: 2\n" + " pc-rel: true\n" + " extern: true\n" + " symbol: 1\n" + " - segment: __TEXT\n" + " section: __cstring\n" + " type: S_CSTRING_LITERALS\n" + " attributes: [ ]\n" + " alignment: 1\n" + " address: 0x0016\n" + " content: [ 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x0a, 0x00 ]\n" + "global-symbols:\n" + " - name: _main\n" + " type: N_SECT\n" + " scope: [ N_EXT ]\n" + " sect: 1\n" + " value: 0x0\n" + "local-symbols:\n" + " - name: L_.str\n" + " type: N_SECT\n" + " scope: [ ]\n" + " sect: 2\n" + " value: 0x16\n" + "undefined-symbols:\n" + " - name: _printf\n" + " type: N_UNDF\n" + " value: 0x0\n" + "...\n"); + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); + EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ(f->sections.size(), 2UL); + + const Section& sect1 = f->sections[0]; + EXPECT_TRUE(sect1.segmentName.equals("__TEXT")); + EXPECT_TRUE(sect1.sectionName.equals("__text")); + EXPECT_EQ((uint32_t)(sect1.type), (uint32_t)(llvm::MachO::S_REGULAR)); + EXPECT_EQ((uint32_t)(sect1.attributes), + (uint32_t)(llvm::MachO::S_ATTR_PURE_INSTRUCTIONS + | llvm::MachO::S_ATTR_SOME_INSTRUCTIONS)); + EXPECT_EQ((uint16_t)sect1.alignment, 1U); + EXPECT_EQ((uint64_t)sect1.address, 0x0ULL); + EXPECT_EQ(sect1.content.size(), 22UL); + EXPECT_EQ((int)(sect1.content[0]), 0x55); + EXPECT_EQ((int)(sect1.content[1]), 0x48); + EXPECT_EQ(sect1.relocations.size(), 2UL); + const Relocation& reloc1 = sect1.relocations[0]; + EXPECT_EQ(reloc1.offset, 0x0eU); + EXPECT_FALSE(reloc1.scattered); + EXPECT_EQ((int)reloc1.type, (int)llvm::MachO::X86_64_RELOC_BRANCH); + EXPECT_EQ(reloc1.length, 2); + EXPECT_TRUE(reloc1.pcRel); + EXPECT_TRUE(reloc1.isExtern); + EXPECT_EQ(reloc1.symbol, 2U); + EXPECT_EQ((int)(reloc1.value), 0); + const Relocation& reloc2 = sect1.relocations[1]; + EXPECT_EQ(reloc2.offset, 0x07U); + EXPECT_FALSE(reloc2.scattered); + EXPECT_EQ((int)reloc2.type, (int)llvm::MachO::X86_64_RELOC_SIGNED); + EXPECT_EQ(reloc2.length, 2); + EXPECT_TRUE(reloc2.pcRel); + EXPECT_TRUE(reloc2.isExtern); + EXPECT_EQ(reloc2.symbol, 1U); + EXPECT_EQ((int)(reloc2.value), 0); + + const Section& sect2 = f->sections[1]; + EXPECT_TRUE(sect2.segmentName.equals("__TEXT")); + EXPECT_TRUE(sect2.sectionName.equals("__cstring")); + EXPECT_EQ((uint32_t)(sect2.type), (uint32_t)(llvm::MachO::S_CSTRING_LITERALS)); + EXPECT_EQ((uint32_t)(sect2.attributes), 0U); + EXPECT_EQ((uint16_t)sect2.alignment, 1U); + EXPECT_EQ((uint64_t)sect2.address, 0x016ULL); + EXPECT_EQ(sect2.content.size(), 7UL); + EXPECT_EQ((int)(sect2.content[0]), 0x68); + EXPECT_EQ((int)(sect2.content[1]), 0x65); + EXPECT_EQ((int)(sect2.content[2]), 0x6c); + + EXPECT_EQ(f->globalSymbols.size(), 1UL); + const Symbol& sym1 = f->globalSymbols[0]; + EXPECT_TRUE(sym1.name.equals("_main")); + EXPECT_EQ((int)(sym1.type), llvm::MachO::N_SECT); + EXPECT_EQ((int)(sym1.scope), llvm::MachO::N_EXT); + EXPECT_EQ(sym1.sect, 1); + EXPECT_EQ((int)(sym1.desc), 0); + EXPECT_EQ((uint64_t)sym1.value, 0x0ULL); + EXPECT_EQ(f->localSymbols.size(), 1UL); + const Symbol& sym2 = f->localSymbols[0]; + EXPECT_TRUE(sym2.name.equals("L_.str")); + EXPECT_EQ((int)(sym2.type), llvm::MachO::N_SECT); + EXPECT_EQ((int)(sym2.scope), 0); + EXPECT_EQ(sym2.sect, 2); + EXPECT_EQ((int)(sym2.desc), 0); + EXPECT_EQ((uint64_t)sym2.value, 0x16ULL); + EXPECT_EQ(f->undefinedSymbols.size(), 1UL); + const Symbol& sym3 = f->undefinedSymbols[0]; + EXPECT_TRUE(sym3.name.equals("_printf")); + EXPECT_EQ((int)(sym3.type), llvm::MachO::N_UNDF); + EXPECT_EQ((int)(sym3.scope), 0); + EXPECT_EQ(sym3.sect, 0); + EXPECT_EQ((int)(sym3.desc), 0); + EXPECT_EQ((uint64_t)sym3.value, 0x0ULL); +} + +TEST(ObjectFileYAML, hello_x86) { + std::unique_ptr f = fromYAML( + "---\n" + "arch: x86\n" + "file-type: MH_OBJECT\n" + "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" + "sections:\n" + " - segment: __TEXT\n" + " section: __text\n" + " type: S_REGULAR\n" + " attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS]\n" + " alignment: 1\n" + " address: 0x0000\n" + " content: [ 0x55, 0x89, 0xe5, 0x83, 0xec, 0x08, 0xe8, 0x00,\n" + " 0x00, 0x00, 0x00, 0x58, 0x8d, 0x80, 0x16, 0x00,\n" + " 0x00, 0x00, 0x89, 0x04, 0x24, 0xe8, 0xe6, 0xff,\n" + " 0xff, 0xff, 0x31, 0xc0, 0x83, 0xc4, 0x08, 0x5d,\n" + " 0xc3 ]\n" + " relocations:\n" + " - offset: 0x16\n" + " type: GENERIC_RELOC_VANILLA\n" + " length: 2\n" + " pc-rel: true\n" + " extern: true\n" + " symbol: 1\n" + " - offset: 0x0e\n" + " scattered: true\n" + " type: GENERIC_RELOC_LOCAL_SECTDIFF\n" + " length: 2\n" + " pc-rel: false\n" + " value: 0x21\n" + " - offset: 0x0\n" + " scattered: true\n" + " type: GENERIC_RELOC_PAIR\n" + " length: 2\n" + " pc-rel: false\n" + " value: 0xb\n" + " - segment: __TEXT\n" + " section: __cstring\n" + " type: S_CSTRING_LITERALS\n" + " attributes: [ ]\n" + " alignment: 1\n" + " address: 0x0021\n" + " content: [ 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x0a, 0x00 ]\n" + "global-symbols:\n" + " - name: _main\n" + " type: N_SECT\n" + " scope: [ N_EXT ]\n" + " sect: 1\n" + " value: 0x0\n" + "undefined-symbols:\n" + " - name: _printf\n" + " type: N_UNDF\n" + " value: 0x0\n" + "...\n"); + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86); + EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ(f->sections.size(), 2UL); + + const Section& sect1 = f->sections[0]; + EXPECT_TRUE(sect1.segmentName.equals("__TEXT")); + EXPECT_TRUE(sect1.sectionName.equals("__text")); + EXPECT_EQ((uint32_t)(sect1.type), (uint32_t)(llvm::MachO::S_REGULAR)); + EXPECT_EQ((uint32_t)(sect1.attributes), + (uint32_t)(llvm::MachO::S_ATTR_PURE_INSTRUCTIONS + | llvm::MachO::S_ATTR_SOME_INSTRUCTIONS)); + EXPECT_EQ((uint16_t)sect1.alignment, 1U); + EXPECT_EQ((uint64_t)sect1.address, 0x0ULL); + EXPECT_EQ(sect1.content.size(), 33UL); + EXPECT_EQ((int)(sect1.content[0]), 0x55); + EXPECT_EQ((int)(sect1.content[1]), 0x89); + EXPECT_EQ(sect1.relocations.size(), 3UL); + const Relocation& reloc1 = sect1.relocations[0]; + EXPECT_EQ(reloc1.offset, 0x16U); + EXPECT_FALSE(reloc1.scattered); + EXPECT_EQ((int)reloc1.type, (int)llvm::MachO::GENERIC_RELOC_VANILLA); + EXPECT_EQ(reloc1.length, 2); + EXPECT_TRUE(reloc1.pcRel); + EXPECT_TRUE(reloc1.isExtern); + EXPECT_EQ(reloc1.symbol, 1U); + EXPECT_EQ((int)(reloc1.value), 0); + const Relocation& reloc2 = sect1.relocations[1]; + EXPECT_EQ(reloc2.offset, 0x0eU); + EXPECT_TRUE(reloc2.scattered); + EXPECT_EQ((int)reloc2.type, (int)llvm::MachO::GENERIC_RELOC_LOCAL_SECTDIFF); + EXPECT_EQ(reloc2.length, 2); + EXPECT_FALSE(reloc2.pcRel); + EXPECT_EQ(reloc2.symbol, 0U); + EXPECT_EQ((int)(reloc2.value), 0x21); + const Relocation& reloc3 = sect1.relocations[2]; + EXPECT_EQ(reloc3.offset, 0U); + EXPECT_TRUE(reloc3.scattered); + EXPECT_EQ((int)reloc3.type, (int)llvm::MachO::GENERIC_RELOC_PAIR); + EXPECT_EQ(reloc3.length, 2); + EXPECT_FALSE(reloc3.pcRel); + EXPECT_EQ(reloc3.symbol, 0U); + EXPECT_EQ((int)(reloc3.value), 0xb); + + const Section& sect2 = f->sections[1]; + EXPECT_TRUE(sect2.segmentName.equals("__TEXT")); + EXPECT_TRUE(sect2.sectionName.equals("__cstring")); + EXPECT_EQ((uint32_t)(sect2.type), (uint32_t)(llvm::MachO::S_CSTRING_LITERALS)); + EXPECT_EQ((uint32_t)(sect2.attributes), 0U); + EXPECT_EQ((uint16_t)sect2.alignment, 1U); + EXPECT_EQ((uint64_t)sect2.address, 0x021ULL); + EXPECT_EQ(sect2.content.size(), 7UL); + EXPECT_EQ((int)(sect2.content[0]), 0x68); + EXPECT_EQ((int)(sect2.content[1]), 0x65); + EXPECT_EQ((int)(sect2.content[2]), 0x6c); + + EXPECT_EQ(f->globalSymbols.size(), 1UL); + const Symbol& sym1 = f->globalSymbols[0]; + EXPECT_TRUE(sym1.name.equals("_main")); + EXPECT_EQ((int)(sym1.type), llvm::MachO::N_SECT); + EXPECT_EQ((int)(sym1.scope), llvm::MachO::N_EXT); + EXPECT_EQ(sym1.sect, 1); + EXPECT_EQ((int)(sym1.desc), 0); + EXPECT_EQ((uint64_t)sym1.value, 0x0ULL); + EXPECT_EQ(f->undefinedSymbols.size(), 1UL); + const Symbol& sym2 = f->undefinedSymbols[0]; + EXPECT_TRUE(sym2.name.equals("_printf")); + EXPECT_EQ((int)(sym2.type), llvm::MachO::N_UNDF); + EXPECT_EQ((int)(sym2.scope), 0); + EXPECT_EQ(sym2.sect, 0); + EXPECT_EQ((int)(sym2.desc), 0); + EXPECT_EQ((uint64_t)sym2.value, 0x0ULL); +} + +TEST(ObjectFileYAML, hello_armv6) { + std::unique_ptr f = fromYAML( + "---\n" + "arch: armv6\n" + "file-type: MH_OBJECT\n" + "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" + "sections:\n" + " - segment: __TEXT\n" + " section: __text\n" + " type: S_REGULAR\n" + " attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS]\n" + " alignment: 4\n" + " address: 0x0000\n" + " content: [ 0x80, 0x40, 0x2d, 0xe9, 0x10, 0x00, 0x9f, 0xe5,\n" + " 0x0d, 0x70, 0xa0, 0xe1, 0x00, 0x00, 0x8f, 0xe0,\n" + " 0xfa, 0xff, 0xff, 0xeb, 0x00, 0x00, 0xa0, 0xe3,\n" + " 0x80, 0x80, 0xbd, 0xe8, 0x0c, 0x00, 0x00, 0x00 ]\n" + " relocations:\n" + " - offset: 0x1c\n" + " scattered: true\n" + " type: ARM_RELOC_SECTDIFF\n" + " length: 2\n" + " pc-rel: false\n" + " value: 0x20\n" + " - offset: 0x0\n" + " scattered: true\n" + " type: ARM_RELOC_PAIR\n" + " length: 2\n" + " pc-rel: false\n" + " value: 0xc\n" + " - offset: 0x10\n" + " type: ARM_RELOC_BR24\n" + " length: 2\n" + " pc-rel: true\n" + " extern: true\n" + " symbol: 1\n" + " - segment: __TEXT\n" + " section: __cstring\n" + " type: S_CSTRING_LITERALS\n" + " attributes: [ ]\n" + " alignment: 1\n" + " address: 0x0020\n" + " content: [ 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x0a, 0x00 ]\n" + "global-symbols:\n" + " - name: _main\n" + " type: N_SECT\n" + " scope: [ N_EXT ]\n" + " sect: 1\n" + " value: 0x0\n" + "undefined-symbols:\n" + " - name: _printf\n" + " type: N_UNDF\n" + " value: 0x0\n" + "...\n"); + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv6); + EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ(f->sections.size(), 2UL); + + const Section& sect1 = f->sections[0]; + EXPECT_TRUE(sect1.segmentName.equals("__TEXT")); + EXPECT_TRUE(sect1.sectionName.equals("__text")); + EXPECT_EQ((uint32_t)(sect1.type), (uint32_t)(llvm::MachO::S_REGULAR)); + EXPECT_EQ((uint32_t)(sect1.attributes), + (uint32_t)(llvm::MachO::S_ATTR_PURE_INSTRUCTIONS + | llvm::MachO::S_ATTR_SOME_INSTRUCTIONS)); + EXPECT_EQ((uint16_t)sect1.alignment, 4U); + EXPECT_EQ((uint64_t)sect1.address, 0x0ULL); + EXPECT_EQ(sect1.content.size(), 32UL); + EXPECT_EQ((int)(sect1.content[0]), 0x80); + EXPECT_EQ((int)(sect1.content[1]), 0x40); + EXPECT_EQ(sect1.relocations.size(), 3UL); + const Relocation& reloc1 = sect1.relocations[0]; + EXPECT_EQ(reloc1.offset, 0x1cU); + EXPECT_TRUE(reloc1.scattered); + EXPECT_EQ((int)reloc1.type, (int)llvm::MachO::ARM_RELOC_SECTDIFF); + EXPECT_EQ(reloc1.length, 2); + EXPECT_FALSE(reloc1.pcRel); + EXPECT_EQ(reloc1.symbol, 0U); + EXPECT_EQ((int)(reloc1.value), 0x20); + const Relocation& reloc2 = sect1.relocations[1]; + EXPECT_EQ(reloc2.offset, 0x0U); + EXPECT_TRUE(reloc2.scattered); + EXPECT_EQ((int)reloc2.type, (int)llvm::MachO::ARM_RELOC_PAIR); + EXPECT_EQ(reloc2.length, 2); + EXPECT_FALSE(reloc2.pcRel); + EXPECT_EQ(reloc2.symbol, 0U); + EXPECT_EQ((int)(reloc2.value), 0xc); + const Relocation& reloc3 = sect1.relocations[2]; + EXPECT_EQ(reloc3.offset, 0x10U); + EXPECT_FALSE(reloc3.scattered); + EXPECT_EQ((int)reloc3.type, (int)llvm::MachO::ARM_RELOC_BR24); + EXPECT_EQ(reloc3.length, 2); + EXPECT_TRUE(reloc3.pcRel); + EXPECT_TRUE(reloc3.isExtern); + EXPECT_EQ(reloc3.symbol, 1U); + EXPECT_EQ((int)(reloc3.value), 0); + + const Section& sect2 = f->sections[1]; + EXPECT_TRUE(sect2.segmentName.equals("__TEXT")); + EXPECT_TRUE(sect2.sectionName.equals("__cstring")); + EXPECT_EQ((uint32_t)(sect2.type), (uint32_t)(llvm::MachO::S_CSTRING_LITERALS)); + EXPECT_EQ((uint32_t)(sect2.attributes), 0U); + EXPECT_EQ((uint16_t)sect2.alignment, 1U); + EXPECT_EQ((uint64_t)sect2.address, 0x020ULL); + EXPECT_EQ(sect2.content.size(), 7UL); + EXPECT_EQ((int)(sect2.content[0]), 0x68); + EXPECT_EQ((int)(sect2.content[1]), 0x65); + EXPECT_EQ((int)(sect2.content[2]), 0x6c); + + EXPECT_EQ(f->globalSymbols.size(), 1UL); + const Symbol& sym1 = f->globalSymbols[0]; + EXPECT_TRUE(sym1.name.equals("_main")); + EXPECT_EQ((int)(sym1.type), llvm::MachO::N_SECT); + EXPECT_EQ((int)(sym1.scope), llvm::MachO::N_EXT); + EXPECT_EQ(sym1.sect, 1); + EXPECT_EQ((int)(sym1.desc), 0); + EXPECT_EQ((uint64_t)sym1.value, 0x0ULL); + EXPECT_EQ(f->undefinedSymbols.size(), 1UL); + const Symbol& sym2 = f->undefinedSymbols[0]; + EXPECT_TRUE(sym2.name.equals("_printf")); + EXPECT_EQ((int)(sym2.type), llvm::MachO::N_UNDF); + EXPECT_EQ((int)(sym2.scope), 0); + EXPECT_EQ(sym2.sect, 0); + EXPECT_EQ((int)(sym2.desc), 0); + EXPECT_EQ((uint64_t)sym2.value, 0x0ULL); +} + +TEST(ObjectFileYAML, hello_armv7) { + std::unique_ptr f = fromYAML( + "---\n" + "arch: armv7\n" + "file-type: MH_OBJECT\n" + "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" + "sections:\n" + " - segment: __TEXT\n" + " section: __text\n" + " type: S_REGULAR\n" + " attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS]\n" + " alignment: 2\n" + " address: 0x0000\n" + " content: [ 0x80, 0xb5, 0x40, 0xf2, 0x06, 0x00, 0x6f, 0x46,\n" + " 0xc0, 0xf2, 0x00, 0x00, 0x78, 0x44, 0xff, 0xf7,\n" + " 0xf8, 0xef, 0x00, 0x20, 0x80, 0xbd ]\n" + " relocations:\n" + " - offset: 0x0e\n" + " type: ARM_THUMB_RELOC_BR22\n" + " length: 2\n" + " pc-rel: true\n" + " extern: true\n" + " symbol: 1\n" + " - offset: 0x08\n" + " scattered: true\n" + " type: ARM_RELOC_HALF_SECTDIFF\n" + " length: 3\n" + " pc-rel: false\n" + " value: 0x16\n" + " - offset: 0x06\n" + " scattered: true\n" + " type: ARM_RELOC_PAIR\n" + " length: 3\n" + " pc-rel: false\n" + " value: 0xc\n" + " - offset: 0x02\n" + " scattered: true\n" + " type: ARM_RELOC_HALF_SECTDIFF\n" + " length: 2\n" + " pc-rel: false\n" + " value: 0x16\n" + " - offset: 0x0\n" + " scattered: true\n" + " type: ARM_RELOC_PAIR\n" + " length: 2\n" + " pc-rel: false\n" + " value: 0xc\n" + " - segment: __TEXT\n" + " section: __cstring\n" + " type: S_CSTRING_LITERALS\n" + " attributes: [ ]\n" + " alignment: 1\n" + " address: 0x0016\n" + " content: [ 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x0a, 0x00 ]\n" + "global-symbols:\n" + " - name: _main\n" + " type: N_SECT\n" + " scope: [ N_EXT ]\n" + " sect: 1\n" + " desc: [ N_ARM_THUMB_DEF ]\n" + " value: 0x0\n" + "undefined-symbols:\n" + " - name: _printf\n" + " type: N_UNDF\n" + " value: 0x0\n" + "...\n"); + EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7); + EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); + EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ(f->sections.size(), 2UL); + + const Section& sect1 = f->sections[0]; + EXPECT_TRUE(sect1.segmentName.equals("__TEXT")); + EXPECT_TRUE(sect1.sectionName.equals("__text")); + EXPECT_EQ((uint32_t)(sect1.type), (uint32_t)(llvm::MachO::S_REGULAR)); + EXPECT_EQ((uint32_t)(sect1.attributes), + (uint32_t)(llvm::MachO::S_ATTR_PURE_INSTRUCTIONS + | llvm::MachO::S_ATTR_SOME_INSTRUCTIONS)); + EXPECT_EQ((uint16_t)sect1.alignment, 2U); + EXPECT_EQ((uint64_t)sect1.address, 0x0ULL); + EXPECT_EQ(sect1.content.size(), 22UL); + EXPECT_EQ((int)(sect1.content[0]), 0x80); + EXPECT_EQ((int)(sect1.content[1]), 0xb5); + EXPECT_EQ(sect1.relocations.size(), 5UL); + const Relocation& reloc1 = sect1.relocations[0]; + EXPECT_EQ(reloc1.offset, 0x0eU); + EXPECT_FALSE(reloc1.scattered); + EXPECT_EQ((int)reloc1.type, (int)llvm::MachO::ARM_THUMB_RELOC_BR22); + EXPECT_EQ(reloc1.length, 2); + EXPECT_TRUE(reloc1.pcRel); + EXPECT_TRUE(reloc1.isExtern); + EXPECT_EQ(reloc1.symbol, 1U); + EXPECT_EQ((int)(reloc1.value), 0); + const Relocation& reloc2 = sect1.relocations[1]; + EXPECT_EQ(reloc2.offset, 0x8U); + EXPECT_TRUE(reloc2.scattered); + EXPECT_EQ((int)reloc2.type, (int)llvm::MachO::ARM_RELOC_HALF_SECTDIFF); + EXPECT_EQ(reloc2.length, 3); + EXPECT_FALSE(reloc2.pcRel); + EXPECT_EQ(reloc2.symbol, 0U); + EXPECT_EQ((int)(reloc2.value), 0x16); + const Relocation& reloc3 = sect1.relocations[2]; + EXPECT_EQ(reloc3.offset, 0x6U); + EXPECT_TRUE(reloc3.scattered); + EXPECT_EQ((int)reloc3.type, (int)llvm::MachO::ARM_RELOC_PAIR); + EXPECT_EQ(reloc3.length, 3); + EXPECT_FALSE(reloc3.pcRel); + EXPECT_EQ(reloc3.symbol, 0U); + EXPECT_EQ((int)(reloc3.value), 0xc); + const Relocation& reloc4 = sect1.relocations[3]; + EXPECT_EQ(reloc4.offset, 0x2U); + EXPECT_TRUE(reloc4.scattered); + EXPECT_EQ((int)reloc4.type, (int)llvm::MachO::ARM_RELOC_HALF_SECTDIFF); + EXPECT_EQ(reloc4.length, 2); + EXPECT_FALSE(reloc4.pcRel); + EXPECT_EQ(reloc4.symbol, 0U); + EXPECT_EQ((int)(reloc4.value), 0x16); + const Relocation& reloc5 = sect1.relocations[4]; + EXPECT_EQ(reloc5.offset, 0x0U); + EXPECT_TRUE(reloc5.scattered); + EXPECT_EQ((int)reloc5.type, (int)llvm::MachO::ARM_RELOC_PAIR); + EXPECT_EQ(reloc5.length, 2); + EXPECT_FALSE(reloc5.pcRel); + EXPECT_EQ(reloc5.symbol, 0U); + EXPECT_EQ((int)(reloc5.value), 0xc); + + const Section& sect2 = f->sections[1]; + EXPECT_TRUE(sect2.segmentName.equals("__TEXT")); + EXPECT_TRUE(sect2.sectionName.equals("__cstring")); + EXPECT_EQ((uint32_t)(sect2.type), (uint32_t)(llvm::MachO::S_CSTRING_LITERALS)); + EXPECT_EQ((uint32_t)(sect2.attributes), 0U); + EXPECT_EQ((uint16_t)sect2.alignment, 1U); + EXPECT_EQ((uint64_t)sect2.address, 0x016ULL); + EXPECT_EQ(sect2.content.size(), 7UL); + EXPECT_EQ((int)(sect2.content[0]), 0x68); + EXPECT_EQ((int)(sect2.content[1]), 0x65); + EXPECT_EQ((int)(sect2.content[2]), 0x6c); + + EXPECT_EQ(f->globalSymbols.size(), 1UL); + const Symbol& sym1 = f->globalSymbols[0]; + EXPECT_TRUE(sym1.name.equals("_main")); + EXPECT_EQ((int)(sym1.type), llvm::MachO::N_SECT); + EXPECT_EQ((int)(sym1.scope), llvm::MachO::N_EXT); + EXPECT_EQ(sym1.sect, 1); + EXPECT_EQ((int)(sym1.desc), (int)(llvm::MachO::N_ARM_THUMB_DEF)); + EXPECT_EQ((uint64_t)sym1.value, 0x0ULL); + EXPECT_EQ(f->undefinedSymbols.size(), 1UL); + const Symbol& sym2 = f->undefinedSymbols[0]; + EXPECT_TRUE(sym2.name.equals("_printf")); + EXPECT_EQ((int)(sym2.type), llvm::MachO::N_UNDF); + EXPECT_EQ((int)(sym2.scope), 0); + EXPECT_EQ(sym2.sect, 0); + EXPECT_EQ((int)(sym2.desc), 0); + EXPECT_EQ((uint64_t)sym2.value, 0x0ULL); +} diff --git a/lld/unittests/MachOTests/empty_obj_x86_armv7.txt b/lld/unittests/MachOTests/empty_obj_x86_armv7.txt new file mode 100644 index 000000000000..9d340cb7132e --- /dev/null +++ b/lld/unittests/MachOTests/empty_obj_x86_armv7.txt @@ -0,0 +1,1272 @@ +0xca, 0xfe, 0xba, 0xbe, 0x00, 0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x07, 0x00, +0x00, 0x00, 0x03, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0xb8, 0x00, 0x00, +0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x40, +0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xcf, 0xfa, 0xed, 0xfe, 0x07, 0x00, 0x00, 0x01, 0x03, 0x00, 0x00, 0x00, +0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, 0x00, +0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x98, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb8, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, +0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x5f, 0x5f, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb8, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0xce, 0xfa, 0xed, 0xfe, 0x0c, 0x00, 0x00, 0x00, 0x09, +0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x7c, 0x00, +0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x98, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, +0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x74, +0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x5f, 0x5f, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x98, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxOptional.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxOptional.cpp new file mode 100644 index 000000000000..c0c819632851 --- /dev/null +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxOptional.cpp @@ -0,0 +1,84 @@ +//===-- LibCxxOptional.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LibCxx.h" +#include "lldb/DataFormatters/FormattersHelpers.h" + +using namespace lldb; +using namespace lldb_private; + +namespace { + +class OptionalFrontEnd : public SyntheticChildrenFrontEnd { +public: + OptionalFrontEnd(ValueObject &valobj) : SyntheticChildrenFrontEnd(valobj) { + Update(); + } + + size_t GetIndexOfChildWithName(ConstString name) override { + return formatters::ExtractIndexFromString(name.GetCString()); + } + + bool MightHaveChildren() override { return true; } + bool Update() override; + size_t CalculateNumChildren() override { return m_has_value ? 1U : 0U; } + ValueObjectSP GetChildAtIndex(size_t idx) override; + +private: + /// True iff the option contains a value. + bool m_has_value = false; +}; +} // namespace + +bool OptionalFrontEnd::Update() { + ValueObjectSP engaged_sp( + m_backend.GetChildMemberWithName(ConstString("__engaged_"), true)); + + if (!engaged_sp) + return false; + + // __engaged_ is a bool flag and is true if the optional contains a value. + // Converting it to unsigned gives us a size of 1 if it contains a value + // and 0 if not. + m_has_value = engaged_sp->GetValueAsUnsigned(0) == 1; + + return false; +} + +ValueObjectSP OptionalFrontEnd::GetChildAtIndex(size_t idx) { + if (!m_has_value) + return ValueObjectSP(); + + // __val_ contains the underlying value of an optional if it has one. + // Currently because it is part of an anonymous union GetChildMemberWithName() + // does not peer through and find it unless we are at the parent itself. + // We can obtain the parent through __engaged_. + ValueObjectSP val_sp( + m_backend.GetChildMemberWithName(ConstString("__engaged_"), true) + ->GetParent() + ->GetChildAtIndex(0, true) + ->GetChildMemberWithName(ConstString("__val_"), true)); + + if (!val_sp) + return ValueObjectSP(); + + CompilerType holder_type = val_sp->GetCompilerType(); + + if (!holder_type) + return ValueObjectSP(); + + return val_sp->Clone(ConstString("Value")); +} + +SyntheticChildrenFrontEnd * +formatters::LibcxxOptionalFrontEndCreator(CXXSyntheticChildren *, + lldb::ValueObjectSP valobj_sp) { + if (valobj_sp) + return new OptionalFrontEnd(*valobj_sp); + return nullptr; +} diff --git a/lldb/source/lldb.cpp b/lldb/source/lldb.cpp new file mode 100644 index 000000000000..371902f6c1b5 --- /dev/null +++ b/lldb/source/lldb.cpp @@ -0,0 +1,77 @@ +//===-- lldb.cpp ----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "VCSVersion.inc" +#include "lldb/lldb-private.h" +#include "clang/Basic/Version.h" + +using namespace lldb; +using namespace lldb_private; + +// LLDB_VERSION_STRING is set through a define so unlike the other defines +// expanded with CMake, it lacks the double quotes. +#define QUOTE(str) #str +#define EXPAND_AND_QUOTE(str) QUOTE(str) + +static const char *GetLLDBVersion() { +#ifdef LLDB_VERSION_STRING + return EXPAND_AND_QUOTE(LLDB_VERSION_STRING); +#else + return "lldb version " CLANG_VERSION_STRING; +#endif +} + +static const char *GetLLDBRevision() { +#ifdef LLDB_REVISION + return LLDB_REVISION; +#else + return NULL; +#endif +} + +static const char *GetLLDBRepository() { +#ifdef LLDB_REPOSITORY + return LLDB_REPOSITORY; +#else + return NULL; +#endif +} + +const char *lldb_private::GetVersion() { + static std::string g_version_str; + if (g_version_str.empty()) { + const char *lldb_version = GetLLDBVersion(); + const char *lldb_repo = GetLLDBRepository(); + const char *lldb_rev = GetLLDBRevision(); + g_version_str += lldb_version; + if (lldb_repo || lldb_rev) { + g_version_str += " ("; + if (lldb_repo) + g_version_str += lldb_repo; + if (lldb_repo && lldb_rev) + g_version_str += " "; + if (lldb_rev) { + g_version_str += "revision "; + g_version_str += lldb_rev; + } + g_version_str += ")"; + } + + std::string clang_rev(clang::getClangRevision()); + if (clang_rev.length() > 0) { + g_version_str += "\n clang revision "; + g_version_str += clang_rev; + } + std::string llvm_rev(clang::getLLVMRevision()); + if (llvm_rev.length() > 0) { + g_version_str += "\n llvm revision "; + g_version_str += llvm_rev; + } + } + return g_version_str.c_str(); +} diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/Makefile new file mode 100644 index 000000000000..c5df567e01a2 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/Makefile @@ -0,0 +1,5 @@ +CXX_SOURCES := main.cpp + +USE_LIBCPP := 1 + +include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/TestDataFormatterLibcxxDeque.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/TestDataFormatterLibcxxDeque.py new file mode 100644 index 000000000000..b9949288c989 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/TestDataFormatterLibcxxDeque.py @@ -0,0 +1,25 @@ +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class LibcxxDequeDataFormatterTestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @add_test_categories(["libc++"]) + def test(self): + self.build() + lldbutil.run_to_source_breakpoint(self, "break here", + lldb.SBFileSpec("main.cpp")) + + self.expect_expr("empty", result_children=[]) + self.expect_expr("deque_1", result_children=[ + ValueCheck(name="[0]", value="1"), + ]) + self.expect_expr("deque_3", result_children=[ + ValueCheck(name="[0]", value="3"), + ValueCheck(name="[1]", value="1"), + ValueCheck(name="[2]", value="2") + ]) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/main.cpp new file mode 100644 index 000000000000..43c3f374a0f9 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/main.cpp @@ -0,0 +1,8 @@ +#include + +int main() { + std::deque empty; + std::deque deque_1 = {1}; + std::deque deque_3 = {3, 1, 2}; + return empty.size() + deque_1.front() + deque_3.front(); // break here +} diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/Makefile new file mode 100644 index 000000000000..564cbada74e0 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/Makefile @@ -0,0 +1,6 @@ +CXX_SOURCES := main.cpp + +USE_LIBCPP := 1 + +CXXFLAGS_EXTRAS := -O0 +include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/TestDataFormatterLibcxxList.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/TestDataFormatterLibcxxList.py new file mode 100644 index 000000000000..8de749d74f03 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/TestDataFormatterLibcxxList.py @@ -0,0 +1,218 @@ +""" +Test lldb data formatter subsystem. +""" + + + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class LibcxxListDataFormatterTestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + def setUp(self): + # Call super's setUp(). + TestBase.setUp(self) + # Find the line number to break at. + self.line = line_number('main.cpp', '// Set break point at this line.') + self.line2 = line_number('main.cpp', + '// Set second break point at this line.') + self.line3 = line_number('main.cpp', + '// Set third break point at this line.') + self.line4 = line_number('main.cpp', + '// Set fourth break point at this line.') + + @add_test_categories(["libc++"]) + def test_with_run_command(self): + """Test that that file and class static variables display correctly.""" + self.build() + self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) + + lldbutil.run_break_set_by_file_and_line( + self, "main.cpp", self.line, num_expected_locations=-1) + lldbutil.run_break_set_by_file_and_line( + self, "main.cpp", self.line2, num_expected_locations=-1) + lldbutil.run_break_set_by_file_and_line( + self, "main.cpp", self.line3, num_expected_locations=-1) + lldbutil.run_break_set_by_file_and_line( + self, "main.cpp", self.line4, num_expected_locations=-1) + + self.runCmd("run", RUN_SUCCEEDED) + + lldbutil.skip_if_library_missing( + self, self.target(), lldbutil.PrintableRegex("libc\+\+")) + + # The stop reason of the thread should be breakpoint. + self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT, + substrs=['stopped', + 'stop reason = breakpoint']) + + # This is the function to remove the custom formats in order to have a + # clean slate for the next test case. + def cleanup(): + self.runCmd('type format clear', check=False) + self.runCmd('type summary clear', check=False) + self.runCmd('type filter clear', check=False) + self.runCmd('type synth clear', check=False) + self.runCmd( + "settings set target.max-children-count 256", + check=False) + + # Execute the cleanup function during test case tear down. + self.addTearDownHook(cleanup) + + self.runCmd("frame variable numbers_list --show-types") + self.runCmd( + "type summary add std::int_list std::string_list int_list string_list --summary-string \"list has ${svar%#} items\" -e") + self.runCmd("type format add -f hex int") + + self.expect("frame variable numbers_list --raw", matching=False, + substrs=['list has 0 items', + '{}']) + + self.expect("frame variable numbers_list", + substrs=['list has 0 items', + '{}']) + + self.expect("p numbers_list", + substrs=['list has 0 items', + '{}']) + + self.runCmd("n") # This gets up past the printf + self.runCmd("n") # Now advance over the first push_back. + + self.expect("frame variable numbers_list", + substrs=['list has 1 items', + '[0] = ', + '0x12345678']) + + self.runCmd("n") + self.runCmd("n") + self.runCmd("n") + + self.expect("frame variable numbers_list", + substrs=['list has 4 items', + '[0] = ', + '0x12345678', + '[1] =', + '0x11223344', + '[2] =', + '0xbeeffeed', + '[3] =', + '0x00abba00']) + + self.runCmd("n") + self.runCmd("n") + + self.expect("frame variable numbers_list", + substrs=['list has 6 items', + '[0] = ', + '0x12345678', + '0x11223344', + '0xbeeffeed', + '0x00abba00', + '[4] =', + '0x0abcdef0', + '[5] =', + '0x0cab0cab']) + + self.expect("p numbers_list", + substrs=['list has 6 items', + '[0] = ', + '0x12345678', + '0x11223344', + '0xbeeffeed', + '0x00abba00', + '[4] =', + '0x0abcdef0', + '[5] =', + '0x0cab0cab']) + + # check access-by-index + self.expect("frame variable numbers_list[0]", + substrs=['0x12345678']) + self.expect("frame variable numbers_list[1]", + substrs=['0x11223344']) + + self.runCmd("n") + + self.expect("frame variable numbers_list", + substrs=['list has 0 items', + '{}']) + + self.runCmd("n") + self.runCmd("n") + self.runCmd("n") + self.runCmd("n") + + self.expect("frame variable numbers_list", + substrs=['list has 4 items', + '[0] = ', '1', + '[1] = ', '2', + '[2] = ', '3', + '[3] = ', '4']) + + ListPtr = self.frame().FindVariable("list_ptr") + self.assertTrue(ListPtr.GetChildAtIndex( + 0).GetValueAsUnsigned(0) == 1, "[0] = 1") + + # check that MightHaveChildren() gets it right + self.assertTrue( + self.frame().FindVariable("numbers_list").MightHaveChildren(), + "numbers_list.MightHaveChildren() says False for non empty!") + + self.runCmd("type format delete int") + + self.runCmd("c") + + self.expect("frame variable text_list", + substrs=['list has 3 items', + '[0]', 'goofy', + '[1]', 'is', + '[2]', 'smart']) + + # check that MightHaveChildren() gets it right + self.assertTrue( + self.frame().FindVariable("text_list").MightHaveChildren(), + "text_list.MightHaveChildren() says False for non empty!") + + self.expect("p text_list", + substrs=['list has 3 items', + '\"goofy\"', + '\"is\"', + '\"smart\"']) + + self.runCmd("n") # This gets us past the printf + self.runCmd("n") + self.runCmd("n") + + # check access-by-index + self.expect("frame variable text_list[0]", + substrs=['goofy']) + self.expect("frame variable text_list[3]", + substrs=['!!!']) + + self.runCmd("continue") + + # check that the list provider correctly updates if elements move + countingList = self.frame().FindVariable("countingList") + countingList.SetPreferDynamicValue(True) + countingList.SetPreferSyntheticValue(True) + + self.assertTrue(countingList.GetChildAtIndex( + 0).GetValueAsUnsigned(0) == 3141, "list[0] == 3141") + self.assertTrue(countingList.GetChildAtIndex( + 1).GetValueAsUnsigned(0) == 3141, "list[1] == 3141") + + self.runCmd("continue") + + self.assertEqual( + countingList.GetChildAtIndex(0).GetValueAsUnsigned(0), 3141, + "uniqued list[0] == 3141") + self.assertEqual( + countingList.GetChildAtIndex(1).GetValueAsUnsigned(0), 3142, + "uniqued list[1] == 3142") diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/Makefile new file mode 100644 index 000000000000..564cbada74e0 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/Makefile @@ -0,0 +1,6 @@ +CXX_SOURCES := main.cpp + +USE_LIBCPP := 1 + +CXXFLAGS_EXTRAS := -O0 +include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/TestDataFormatterLibcxxListLoop.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/TestDataFormatterLibcxxListLoop.py new file mode 100644 index 000000000000..1678c513e50b --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/TestDataFormatterLibcxxListLoop.py @@ -0,0 +1,69 @@ +""" +Test that the debugger handles loops in std::list (which can appear as a result of e.g. memory +corruption). +""" + + + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class LibcxxListDataFormatterTestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + NO_DEBUG_INFO_TESTCASE = True + + @add_test_categories(["libc++"]) + @expectedFailureAndroid(bugnumber="llvm.org/pr32592") + def test_with_run_command(self): + self.build() + exe = self.getBuildArtifact("a.out") + target = self.dbg.CreateTarget(exe) + self.assertTrue(target and target.IsValid(), "Target is valid") + + file_spec = lldb.SBFileSpec("main.cpp", False) + breakpoint1 = target.BreakpointCreateBySourceRegex( + '// Set break point at this line.', file_spec) + self.assertTrue(breakpoint1 and breakpoint1.IsValid()) + breakpoint2 = target.BreakpointCreateBySourceRegex( + '// Set second break point at this line.', file_spec) + self.assertTrue(breakpoint2 and breakpoint2.IsValid()) + + # Run the program, it should stop at breakpoint 1. + process = target.LaunchSimple( + None, None, self.get_process_working_directory()) + self.assertTrue(process and process.IsValid(), PROCESS_IS_VALID) + self.assertEqual( + len(lldbutil.get_threads_stopped_at_breakpoint(process, breakpoint1)), 1) + + # verify our list is displayed correctly + self.expect( + "frame variable *numbers_list", + substrs=[ + '[0] = 1', + '[1] = 2', + '[2] = 3', + '[3] = 4', + '[5] = 6']) + + # Continue to breakpoint 2. + process.Continue() + self.assertTrue(process and process.IsValid(), PROCESS_IS_VALID) + self.assertEqual( + len(lldbutil.get_threads_stopped_at_breakpoint(process, breakpoint2)), 1) + + # The list is now inconsistent. However, we should be able to get the first three + # elements at least (and most importantly, not crash). + self.expect( + "frame variable *numbers_list", + substrs=[ + '[0] = 1', + '[1] = 2', + '[2] = 3']) + + # Run to completion. + process.Continue() + self.assertEqual(process.GetState(), lldb.eStateExited, PROCESS_EXITED) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/main.cpp new file mode 100644 index 000000000000..e07e93838b9f --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/main.cpp @@ -0,0 +1,35 @@ +// Evil hack: To simulate memory corruption, we want to fiddle with some internals of std::list. +// Make those accessible to us. +#define private public +#define protected public + +#include +#include +#include + +typedef std::list int_list; + +int main() +{ +#ifdef LLDB_USING_LIBCPP + int_list *numbers_list = new int_list{1,2,3,4,5,6,7,8,9,10}; + + printf("// Set break point at this line."); + +#if _LIBCPP_VERSION >= 3800 + auto *third_elem = numbers_list->__end_.__next_->__next_->__next_; + assert(third_elem->__as_node()->__value_ == 3); + auto *fifth_elem = third_elem->__next_->__next_; + assert(fifth_elem->__as_node()->__value_ == 5); +#else + auto *third_elem = numbers_list->__end_.__next_->__next_->__next_; + assert(third_elem->__value_ == 3); + auto *fifth_elem = third_elem->__next_->__next_; + assert(fifth_elem->__value_ == 5); +#endif + fifth_elem->__next_ = third_elem; +#endif + + // Any attempt to free the list will probably crash the program. Let's just leak it. + return 0; // Set second break point at this line. +} diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/main.cpp new file mode 100644 index 000000000000..a3ef06b18e74 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/main.cpp @@ -0,0 +1,44 @@ +#include +#include +#include + +typedef std::list int_list; +typedef std::list string_list; + +int main() +{ + int_list numbers_list; + std::list* list_ptr = &numbers_list; + + printf("// Set break point at this line."); + (numbers_list.push_back(0x12345678)); + (numbers_list.push_back(0x11223344)); + (numbers_list.push_back(0xBEEFFEED)); + (numbers_list.push_back(0x00ABBA00)); + (numbers_list.push_back(0x0ABCDEF0)); + (numbers_list.push_back(0x0CAB0CAB)); + + numbers_list.clear(); + + (numbers_list.push_back(1)); + (numbers_list.push_back(2)); + (numbers_list.push_back(3)); + (numbers_list.push_back(4)); + + string_list text_list; + (text_list.push_back(std::string("goofy"))); + (text_list.push_back(std::string("is"))); + (text_list.push_back(std::string("smart"))); + + printf("// Set second break point at this line."); + (text_list.push_back(std::string("!!!"))); + + std::list countingList = {3141, 3142, 3142,3142,3142, 3142, 3142, 3141}; + countingList.sort(); + printf("// Set third break point at this line."); + countingList.unique(); + printf("// Set fourth break point at this line."); + countingList.size(); + + return 0; +} diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/Makefile new file mode 100644 index 000000000000..23496eb20657 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/Makefile @@ -0,0 +1,6 @@ +CXX_SOURCES := main.cpp + +USE_LIBCPP := 1 + +CXXFLAGS_EXTRAS := -std=c++17 -fno-exceptions +include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/TestDataFormatterLibcxxOptional.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/TestDataFormatterLibcxxOptional.py new file mode 100644 index 000000000000..27c8d7f474ed --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/TestDataFormatterLibcxxOptional.py @@ -0,0 +1,73 @@ +""" +Test lldb data formatter subsystem. +""" + + + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class LibcxxOptionalDataFormatterTestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @add_test_categories(["libc++"]) + ## Clang 7.0 is the oldest Clang that can reliably parse newer libc++ versions + ## with -std=c++17. + @skipIf(oslist=no_match(["macosx"]), compiler="clang", compiler_version=['<', '7.0']) + ## We are skipping gcc version less that 5.1 since this test requires -std=c++17 + @skipIf(compiler="gcc", compiler_version=['<', '5.1']) + + def test_with_run_command(self): + """Test that that file and class static variables display correctly.""" + self.build() + self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) + + bkpt = self.target().FindBreakpointByID( + lldbutil.run_break_set_by_source_regexp( + self, "break here")) + + self.runCmd("run", RUN_SUCCEEDED) + + # The stop reason of the thread should be breakpoint. + self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT, + substrs=['stopped', + 'stop reason = breakpoint']) + + self.runCmd( "frame variable has_optional" ) + + output = self.res.GetOutput() + + ## The variable has_optional tells us if the test program + ## detected we have a sufficient libc++ version to support optional + ## false means we do not and therefore should skip the test + if output.find("(bool) has_optional = false") != -1 : + self.skipTest( "Optional not supported" ) + + lldbutil.continue_to_breakpoint(self.process(), bkpt) + + self.expect("frame variable number_not_engaged", + substrs=['Has Value=false']) + + self.expect("frame variable number_engaged", + substrs=['Has Value=true', + 'Value = 42', + '}']) + + self.expect("frame var numbers", + substrs=['(optional_int_vect) numbers = Has Value=true {', + 'Value = size=4 {', + '[0] = 1', + '[1] = 2', + '[2] = 3', + '[3] = 4', + '}', + '}']) + + self.expect("frame var ostring", + substrs=['(optional_string) ostring = Has Value=true {', + 'Value = "hello"', + '}']) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/main.cpp new file mode 100644 index 000000000000..16bb98c61056 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/main.cpp @@ -0,0 +1,42 @@ +#include +#include +#include + +// If we have libc++ 4.0 or greater we should have +// According to libc++ C++1z status page https://libcxx.llvm.org/cxx1z_status.html +#if _LIBCPP_VERSION >= 4000 +#include +#define HAVE_OPTIONAL 1 +#else +#define HAVE_OPTIONAL 0 +#endif + + +int main() +{ + bool has_optional = HAVE_OPTIONAL ; + + printf( "%d\n", has_optional ) ; // break here + +#if HAVE_OPTIONAL == 1 + using int_vect = std::vector ; + using optional_int = std::optional ; + using optional_int_vect = std::optional ; + using optional_string = std::optional ; + + optional_int number_not_engaged ; + optional_int number_engaged = 42 ; + + printf( "%d\n", *number_engaged) ; + + optional_int_vect numbers{{1,2,3,4}} ; + + printf( "%d %d\n", numbers.value()[0], numbers.value()[1] ) ; + + optional_string ostring = "hello" ; + + printf( "%s\n", ostring->c_str() ) ; +#endif + + return 0; // break here +} diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/Makefile new file mode 100644 index 000000000000..913a52fb191c --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/Makefile @@ -0,0 +1,9 @@ +CXX_SOURCES := main.cpp + +# Work around "exception specification in declaration does not match previous +# declaration" errors present in older libc++ releases. This error was fixed in +# the 3.8 release. +CFLAGS_EXTRAS := -fno-exceptions + +USE_LIBCPP := 1 +include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/TestDataFormatterUnordered.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/TestDataFormatterUnordered.py new file mode 100644 index 000000000000..3519daec6ec4 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/TestDataFormatterUnordered.py @@ -0,0 +1,78 @@ +""" +Test lldb data formatter subsystem. +""" + + + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class LibcxxUnorderedDataFormatterTestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + def setUp(self): + TestBase.setUp(self) + self.namespace = 'std' + + @add_test_categories(["libc++"]) + def test_with_run_command(self): + self.build() + self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) + + lldbutil.run_break_set_by_source_regexp( + self, "Set break point at this line.") + + self.runCmd("run", RUN_SUCCEEDED) + + # The stop reason of the thread should be breakpoint. + self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT, + substrs=['stopped', + 'stop reason = breakpoint']) + + # This is the function to remove the custom formats in order to have a + # clean slate for the next test case. + def cleanup(): + self.runCmd('type format clear', check=False) + self.runCmd('type summary clear', check=False) + self.runCmd('type filter clear', check=False) + self.runCmd('type synth clear', check=False) + self.runCmd( + "settings set target.max-children-count 256", + check=False) + + # Execute the cleanup function during test case tear down. + self.addTearDownHook(cleanup) + + ns = self.namespace + self.look_for_content_and_continue( + "map", ['%s::unordered_map' % + ns, 'size=5 {', 'hello', 'world', 'this', 'is', 'me']) + + self.look_for_content_and_continue( + "mmap", ['%s::unordered_multimap' % ns, 'size=6 {', 'first = 3', 'second = "this"', + 'first = 2', 'second = "hello"']) + + self.look_for_content_and_continue( + "iset", ['%s::unordered_set' % + ns, 'size=5 {', '\[\d\] = 5', '\[\d\] = 3', '\[\d\] = 2']) + + self.look_for_content_and_continue( + "sset", ['%s::unordered_set' % ns, 'size=5 {', '\[\d\] = "is"', '\[\d\] = "world"', + '\[\d\] = "hello"']) + + self.look_for_content_and_continue( + "imset", ['%s::unordered_multiset' % ns, 'size=6 {', '(\[\d\] = 3(\\n|.)+){3}', + '\[\d\] = 2', '\[\d\] = 1']) + + self.look_for_content_and_continue( + "smset", ['%s::unordered_multiset' % ns, 'size=5 {', '(\[\d\] = "is"(\\n|.)+){2}', + '(\[\d\] = "world"(\\n|.)+){2}']) + + def look_for_content_and_continue(self, var_name, patterns): + self.expect(("frame variable %s" % var_name), patterns=patterns) + self.expect(("frame variable %s" % var_name), patterns=patterns) + self.runCmd("continue") diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/main.cpp new file mode 100644 index 000000000000..81a5763559d3 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/main.cpp @@ -0,0 +1,80 @@ +#include +#include +#include + +using std::string; + +#define intstr_map std::unordered_map +#define intstr_mmap std::unordered_multimap + +#define int_set std::unordered_set +#define str_set std::unordered_set +#define int_mset std::unordered_multiset +#define str_mset std::unordered_multiset + +int g_the_foo = 0; + +int thefoo_rw(int arg = 1) +{ + if (arg < 0) + arg = 0; + if (!arg) + arg = 1; + g_the_foo += arg; + return g_the_foo; +} + +int main() +{ + intstr_map map; + map.emplace(1,"hello"); + map.emplace(2,"world"); + map.emplace(3,"this"); + map.emplace(4,"is"); + map.emplace(5,"me"); + thefoo_rw(); // Set break point at this line. + + intstr_mmap mmap; + mmap.emplace(1,"hello"); + mmap.emplace(2,"hello"); + mmap.emplace(2,"world"); + mmap.emplace(3,"this"); + mmap.emplace(3,"this"); + mmap.emplace(3,"this"); + thefoo_rw(); // Set break point at this line. + + int_set iset; + iset.emplace(1); + iset.emplace(2); + iset.emplace(3); + iset.emplace(4); + iset.emplace(5); + thefoo_rw(); // Set break point at this line. + + str_set sset; + sset.emplace("hello"); + sset.emplace("world"); + sset.emplace("this"); + sset.emplace("is"); + sset.emplace("me"); + thefoo_rw(); // Set break point at this line. + + int_mset imset; + imset.emplace(1); + imset.emplace(2); + imset.emplace(2); + imset.emplace(3); + imset.emplace(3); + imset.emplace(3); + thefoo_rw(); // Set break point at this line. + + str_mset smset; + smset.emplace("hello"); + smset.emplace("world"); + smset.emplace("world"); + smset.emplace("is"); + smset.emplace("is"); + thefoo_rw(); // Set break point at this line. + + return 0; +} diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/Makefile new file mode 100644 index 000000000000..c825977b1a5d --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/Makefile @@ -0,0 +1,6 @@ +CXX_SOURCES := main.cpp + +CFLAGS_EXTRAS := -O0 +USE_LIBSTDCPP := 1 + +include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/TestDataFormatterStdList.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/TestDataFormatterStdList.py new file mode 100644 index 000000000000..03131ccfde2f --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/TestDataFormatterStdList.py @@ -0,0 +1,207 @@ +""" +Test lldb data formatter subsystem. +""" + + + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class StdListDataFormatterTestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + def setUp(self): + # Call super's setUp(). + TestBase.setUp(self) + # Find the line numbers to break at for the different tests. + self.line = line_number('main.cpp', '// Set break point at this line.') + self.optional_line = line_number( + 'main.cpp', '// Optional break point at this line.') + self.final_line = line_number( + 'main.cpp', '// Set final break point at this line.') + + @add_test_categories(["libstdcxx"]) + @expectedFailureAll(bugnumber="llvm.org/pr50861", compiler="gcc") + def test_with_run_command(self): + """Test that that file and class static variables display correctly.""" + self.build() + self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) + + lldbutil.run_break_set_by_file_and_line( + self, "main.cpp", self.line, num_expected_locations=-1) + + self.runCmd("run", RUN_SUCCEEDED) + + # The stop reason of the thread should be breakpoint. + self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT, + substrs=['stopped', + 'stop reason = breakpoint']) + + # This is the function to remove the custom formats in order to have a + # clean slate for the next test case. + def cleanup(): + self.runCmd('type format clear', check=False) + self.runCmd('type summary clear', check=False) + self.runCmd('type filter clear', check=False) + self.runCmd('type synth clear', check=False) + self.runCmd( + "settings set target.max-children-count 256", + check=False) + + # Execute the cleanup function during test case tear down. + self.addTearDownHook(cleanup) + + self.runCmd("frame variable numbers_list --show-types") + + self.runCmd("type format add -f hex int") + + self.expect("frame variable numbers_list --raw", matching=False, + substrs=['size=0', + '{}']) + self.expect( + "frame variable &numbers_list._M_impl._M_node --raw", + matching=False, + substrs=[ + 'size=0', + '{}']) + + self.expect("frame variable numbers_list", + substrs=['size=0', + '{}']) + + self.expect("p numbers_list", + substrs=['size=0', + '{}']) + + self.runCmd("n") + + self.expect("frame variable numbers_list", + substrs=['size=1', + '[0] = ', + '0x12345678']) + + self.runCmd("n") + self.runCmd("n") + self.runCmd("n") + + self.expect("frame variable numbers_list", + substrs=['size=4', + '[0] = ', + '0x12345678', + '[1] =', + '0x11223344', + '[2] =', + '0xbeeffeed', + '[3] =', + '0x00abba00']) + + self.runCmd("n") + self.runCmd("n") + + self.expect("frame variable numbers_list", + substrs=['size=6', + '[0] = ', + '0x12345678', + '0x11223344', + '0xbeeffeed', + '0x00abba00', + '[4] =', + '0x0abcdef0', + '[5] =', + '0x0cab0cab']) + + self.expect("p numbers_list", + substrs=['size=6', + '[0] = ', + '0x12345678', + '0x11223344', + '0xbeeffeed', + '0x00abba00', + '[4] =', + '0x0abcdef0', + '[5] =', + '0x0cab0cab']) + + # check access-by-index + self.expect("frame variable numbers_list[0]", + substrs=['0x12345678']) + self.expect("frame variable numbers_list[1]", + substrs=['0x11223344']) + + # but check that expression does not rely on us + self.expect("expression numbers_list[0]", matching=False, error=True, + substrs=['0x12345678']) + + # check that MightHaveChildren() gets it right + self.assertTrue( + self.frame().FindVariable("numbers_list").MightHaveChildren(), + "numbers_list.MightHaveChildren() says False for non empty!") + + self.runCmd("n") + + self.expect("frame variable numbers_list", + substrs=['size=0', + '{}']) + + self.runCmd("n") + self.runCmd("n") + self.runCmd("n") + self.runCmd("n") + + self.expect("frame variable numbers_list", + substrs=['size=4', + '[0] = ', '1', + '[1] = ', '2', + '[2] = ', '3', + '[3] = ', '4']) + + self.runCmd("type format delete int") + + self.runCmd("n") + + self.expect("frame variable text_list", + substrs=['size=0', + '{}']) + + lldbutil.run_break_set_by_file_and_line( + self, "main.cpp", self.final_line, num_expected_locations=-1) + + self.runCmd("c", RUN_SUCCEEDED) + + # The stop reason of the thread should be breakpoint. + self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT, + substrs=['stopped', + 'stop reason = breakpoint']) + + self.expect("frame variable text_list", + substrs=['size=4', + '[0]', 'goofy', + '[1]', 'is', + '[2]', 'smart', + '[3]', '!!!']) + + self.expect("p text_list", + substrs=['size=4', + '\"goofy\"', + '\"is\"', + '\"smart\"', + '\"!!!\"']) + + # check access-by-index + self.expect("frame variable text_list[0]", + substrs=['goofy']) + self.expect("frame variable text_list[3]", + substrs=['!!!']) + + # but check that expression does not rely on us + self.expect("expression text_list[0]", matching=False, error=True, + substrs=['goofy']) + + # check that MightHaveChildren() gets it right + self.assertTrue( + self.frame().FindVariable("text_list").MightHaveChildren(), + "text_list.MightHaveChildren() says False for non empty!") diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/main.cpp new file mode 100644 index 000000000000..191acdcc97be --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/main.cpp @@ -0,0 +1,34 @@ +#include +#include + +typedef std::list int_list; +typedef std::list string_list; + +int main() +{ + int_list numbers_list; + + numbers_list.push_back(0x12345678); // Set break point at this line. + numbers_list.push_back(0x11223344); + numbers_list.push_back(0xBEEFFEED); + numbers_list.push_back(0x00ABBA00); + numbers_list.push_back(0x0ABCDEF0); + numbers_list.push_back(0x0CAB0CAB); + + numbers_list.clear(); + + numbers_list.push_back(1); + numbers_list.push_back(2); + numbers_list.push_back(3); + numbers_list.push_back(4); + + string_list text_list; + text_list.push_back(std::string("goofy")); // Optional break point at this line. + text_list.push_back(std::string("is")); + text_list.push_back(std::string("smart")); + + text_list.push_back(std::string("!!!")); + + return 0; // Set final break point at this line. +} + diff --git a/lldb/test/API/functionalities/gdb_remote_client/gdbclientutils.py b/lldb/test/API/functionalities/gdb_remote_client/gdbclientutils.py new file mode 100644 index 000000000000..a1ab7ab052e2 --- /dev/null +++ b/lldb/test/API/functionalities/gdb_remote_client/gdbclientutils.py @@ -0,0 +1,717 @@ +import ctypes +import errno +import io +import os +import os.path +import threading +import socket +import lldb +import binascii +import traceback +from lldbsuite.support import seven +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbtest_config + + +def checksum(message): + """ + Calculate the GDB server protocol checksum of the message. + + The GDB server protocol uses a simple modulo 256 sum. + """ + check = 0 + for c in message: + check += ord(c) + return check % 256 + + +def frame_packet(message): + """ + Create a framed packet that's ready to send over the GDB connection + channel. + + Framing includes surrounding the message between $ and #, and appending + a two character hex checksum. + """ + return "$%s#%02x" % (message, checksum(message)) + + +def escape_binary(message): + """ + Escape the binary message using the process described in the GDB server + protocol documentation. + + Most bytes are sent through as-is, but $, #, and { are escaped by writing + a { followed by the original byte mod 0x20. + """ + out = "" + for c in message: + d = ord(c) + if d in (0x23, 0x24, 0x7d): + out += chr(0x7d) + out += chr(d ^ 0x20) + else: + out += c + return out + + +def hex_encode_bytes(message): + """ + Encode the binary message by converting each byte into a two-character + hex string. + """ + out = "" + for c in message: + out += "%02x" % ord(c) + return out + + +def hex_decode_bytes(hex_bytes): + """ + Decode the hex string into a binary message by converting each two-character + hex string into a single output byte. + """ + out = "" + hex_len = len(hex_bytes) + while i < hex_len - 1: + out += chr(int(hex_bytes[i:i + 2]), 16) + i += 2 + return out + + +class MockGDBServerResponder: + """ + A base class for handling client packets and issuing server responses for + GDB tests. + + This handles many typical situations, while still allowing subclasses to + completely customize their responses. + + Most subclasses will be interested in overriding the other() method, which + handles any packet not recognized in the common packet handling code. + """ + + registerCount = 40 + packetLog = None + + def __init__(self): + self.packetLog = [] + + def respond(self, packet): + """ + Return the unframed packet data that the server should issue in response + to the given packet received from the client. + """ + self.packetLog.append(packet) + if packet is MockGDBServer.PACKET_INTERRUPT: + return self.interrupt() + if packet == "c": + return self.cont() + if packet.startswith("vCont;c"): + return self.vCont(packet) + if packet[0] == "A": + return self.A(packet) + if packet[0] == "D": + return self.D(packet) + if packet[0] == "g": + return self.readRegisters() + if packet[0] == "G": + # Gxxxxxxxxxxx + # Gxxxxxxxxxxx;thread:1234; + return self.writeRegisters(packet[1:].split(';')[0]) + if packet[0] == "p": + regnum = packet[1:].split(';')[0] + return self.readRegister(int(regnum, 16)) + if packet[0] == "P": + register, value = packet[1:].split("=") + return self.writeRegister(int(register, 16), value) + if packet[0] == "m": + addr, length = [int(x, 16) for x in packet[1:].split(',')] + return self.readMemory(addr, length) + if packet[0] == "M": + location, encoded_data = packet[1:].split(":") + addr, length = [int(x, 16) for x in location.split(',')] + return self.writeMemory(addr, encoded_data) + if packet[0:7] == "qSymbol": + return self.qSymbol(packet[8:]) + if packet[0:10] == "qSupported": + return self.qSupported(packet[11:].split(";")) + if packet == "qfThreadInfo": + return self.qfThreadInfo() + if packet == "qsThreadInfo": + return self.qsThreadInfo() + if packet == "qC": + return self.qC() + if packet == "QEnableErrorStrings": + return self.QEnableErrorStrings() + if packet == "?": + return self.haltReason() + if packet == "s": + return self.haltReason() + if packet[0] == "H": + tid = packet[2:] + if "." in tid: + assert tid.startswith("p") + # TODO: do we want to do anything with PID? + tid = tid.split(".", 1)[1] + return self.selectThread(packet[1], int(tid, 16)) + if packet[0:6] == "qXfer:": + obj, read, annex, location = packet[6:].split(":") + offset, length = [int(x, 16) for x in location.split(',')] + data, has_more = self.qXferRead(obj, annex, offset, length) + if data is not None: + return self._qXferResponse(data, has_more) + return "" + if packet.startswith("vAttach;"): + pid = packet.partition(';')[2] + return self.vAttach(int(pid, 16)) + if packet[0] == "Z": + return self.setBreakpoint(packet) + if packet.startswith("qThreadStopInfo"): + threadnum = int (packet[15:], 16) + return self.threadStopInfo(threadnum) + if packet == "QThreadSuffixSupported": + return self.QThreadSuffixSupported() + if packet == "QListThreadsInStopReply": + return self.QListThreadsInStopReply() + if packet.startswith("qMemoryRegionInfo:"): + return self.qMemoryRegionInfo(int(packet.split(':')[1], 16)) + if packet == "qQueryGDBServer": + return self.qQueryGDBServer() + if packet == "qHostInfo": + return self.qHostInfo() + if packet == "qGetWorkingDir": + return self.qGetWorkingDir() + if packet == "qOffsets": + return self.qOffsets(); + if packet == "qsProcessInfo": + return self.qsProcessInfo() + if packet.startswith("qfProcessInfo"): + return self.qfProcessInfo(packet) + if packet.startswith("qPathComplete:"): + return self.qPathComplete() + if packet.startswith("vFile:"): + return self.vFile(packet) + if packet.startswith("vRun;"): + return self.vRun(packet) + if packet.startswith("qLaunchSuccess"): + return self.qLaunchSuccess() + if packet.startswith("QEnvironment:"): + return self.QEnvironment(packet) + if packet.startswith("QEnvironmentHexEncoded:"): + return self.QEnvironmentHexEncoded(packet) + if packet.startswith("qRegisterInfo"): + regnum = int(packet[len("qRegisterInfo"):], 16) + return self.qRegisterInfo(regnum) + if packet == "k": + return self.k() + + return self.other(packet) + + def qsProcessInfo(self): + return "E04" + + def qfProcessInfo(self, packet): + return "E04" + + def qGetWorkingDir(self): + return "2f" + + def qOffsets(self): + return "" + + def qHostInfo(self): + return "ptrsize:8;endian:little;" + + def qQueryGDBServer(self): + return "E04" + + def interrupt(self): + raise self.UnexpectedPacketException() + + def cont(self): + raise self.UnexpectedPacketException() + + def vCont(self, packet): + raise self.UnexpectedPacketException() + + def A(self, packet): + return "" + + def D(self, packet): + return "OK" + + def readRegisters(self): + return "00000000" * self.registerCount + + def readRegister(self, register): + return "00000000" + + def writeRegisters(self, registers_hex): + return "OK" + + def writeRegister(self, register, value_hex): + return "OK" + + def readMemory(self, addr, length): + return "00" * length + + def writeMemory(self, addr, data_hex): + return "OK" + + def qSymbol(self, symbol_args): + return "OK" + + def qSupported(self, client_supported): + return "qXfer:features:read+;PacketSize=3fff;QStartNoAckMode+" + + def qfThreadInfo(self): + return "l" + + def qsThreadInfo(self): + return "l" + + def qC(self): + return "QC0" + + def QEnableErrorStrings(self): + return "OK" + + def haltReason(self): + # SIGINT is 2, return type is 2 digit hex string + return "S02" + + def qXferRead(self, obj, annex, offset, length): + return None, False + + def _qXferResponse(self, data, has_more): + return "%s%s" % ("m" if has_more else "l", escape_binary(data)) + + def vAttach(self, pid): + raise self.UnexpectedPacketException() + + def selectThread(self, op, thread_id): + return "OK" + + def setBreakpoint(self, packet): + raise self.UnexpectedPacketException() + + def threadStopInfo(self, threadnum): + return "" + + def other(self, packet): + # empty string means unsupported + return "" + + def QThreadSuffixSupported(self): + return "" + + def QListThreadsInStopReply(self): + return "" + + def qMemoryRegionInfo(self, addr): + return "" + + def qPathComplete(self): + return "" + + def vFile(self, packet): + return "" + + def vRun(self, packet): + return "" + + def qLaunchSuccess(self): + return "" + + def QEnvironment(self, packet): + return "OK" + + def QEnvironmentHexEncoded(self, packet): + return "OK" + + def qRegisterInfo(self, num): + return "" + + def k(self): + return "" + + """ + Raised when we receive a packet for which there is no default action. + Override the responder class to implement behavior suitable for the test at + hand. + """ + class UnexpectedPacketException(Exception): + pass + + +class ServerSocket: + """ + A wrapper class for TCP or pty-based server. + """ + + def get_connect_address(self): + """Get address for the client to connect to.""" + + def get_connect_url(self): + """Get URL suitable for process connect command.""" + + def close_server(self): + """Close all resources used by the server.""" + + def accept(self): + """Accept a single client connection to the server.""" + + def close_connection(self): + """Close all resources used by the accepted connection.""" + + def recv(self): + """Receive a data packet from the connected client.""" + + def sendall(self, data): + """Send the data to the connected client.""" + + +class TCPServerSocket(ServerSocket): + def __init__(self): + family, type, proto, _, addr = socket.getaddrinfo( + "localhost", 0, proto=socket.IPPROTO_TCP)[0] + self._server_socket = socket.socket(family, type, proto) + self._connection = None + + self._server_socket.bind(addr) + self._server_socket.listen(1) + + def get_connect_address(self): + return "[{}]:{}".format(*self._server_socket.getsockname()) + + def get_connect_url(self): + return "connect://" + self.get_connect_address() + + def close_server(self): + self._server_socket.close() + + def accept(self): + assert self._connection is None + # accept() is stubborn and won't fail even when the socket is + # shutdown, so we'll use a timeout + self._server_socket.settimeout(30.0) + client, client_addr = self._server_socket.accept() + # The connected client inherits its timeout from self._socket, + # but we'll use a blocking socket for the client + client.settimeout(None) + self._connection = client + + def close_connection(self): + assert self._connection is not None + self._connection.close() + self._connection = None + + def recv(self): + assert self._connection is not None + return self._connection.recv(4096) + + def sendall(self, data): + assert self._connection is not None + return self._connection.sendall(data) + + +class PtyServerSocket(ServerSocket): + def __init__(self): + import pty + import tty + master, slave = pty.openpty() + tty.setraw(master) + self._master = io.FileIO(master, 'r+b') + self._slave = io.FileIO(slave, 'r+b') + + def get_connect_address(self): + libc = ctypes.CDLL(None) + libc.ptsname.argtypes = (ctypes.c_int,) + libc.ptsname.restype = ctypes.c_char_p + return libc.ptsname(self._master.fileno()).decode() + + def get_connect_url(self): + return "serial://" + self.get_connect_address() + + def close_server(self): + self._slave.close() + self._master.close() + + def recv(self): + try: + return self._master.read(4096) + except OSError as e: + # closing the pty results in EIO on Linux, convert it to EOF + if e.errno == errno.EIO: + return b'' + raise + + def sendall(self, data): + return self._master.write(data) + + +class MockGDBServer: + """ + A simple TCP-based GDB server that can test client behavior by receiving + commands and issuing custom-tailored responses. + + Responses are generated via the .responder property, which should be an + instance of a class based on MockGDBServerResponder. + """ + + responder = None + _socket = None + _thread = None + _receivedData = None + _receivedDataOffset = None + _shouldSendAck = True + + def __init__(self, socket_class): + self._socket_class = socket_class + self.responder = MockGDBServerResponder() + + def start(self): + self._socket = self._socket_class() + # Start a thread that waits for a client connection. + self._thread = threading.Thread(target=self._run) + self._thread.start() + + def stop(self): + self._socket.close_server() + self._thread.join() + self._thread = None + + def get_connect_address(self): + return self._socket.get_connect_address() + + def get_connect_url(self): + return self._socket.get_connect_url() + + def _run(self): + # For testing purposes, we only need to worry about one client + # connecting just one time. + try: + self._socket.accept() + except: + return + self._shouldSendAck = True + self._receivedData = "" + self._receivedDataOffset = 0 + data = None + while True: + try: + data = seven.bitcast_to_string(self._socket.recv()) + if data is None or len(data) == 0: + break + self._receive(data) + except Exception as e: + print("An exception happened when receiving the response from the gdb server. Closing the client...") + traceback.print_exc() + self._socket.close_connection() + break + + def _receive(self, data): + """ + Collects data, parses and responds to as many packets as exist. + Any leftover data is kept for parsing the next time around. + """ + self._receivedData += data + try: + packet = self._parsePacket() + while packet is not None: + self._handlePacket(packet) + packet = self._parsePacket() + except self.InvalidPacketException: + self._socket.close_connection() + + def _parsePacket(self): + """ + Reads bytes from self._receivedData, returning: + - a packet's contents if a valid packet is found + - the PACKET_ACK unique object if we got an ack + - None if we only have a partial packet + + Raises an InvalidPacketException if unexpected data is received + or if checksums fail. + + Once a complete packet is found at the front of self._receivedData, + its data is removed form self._receivedData. + """ + data = self._receivedData + i = self._receivedDataOffset + data_len = len(data) + if data_len == 0: + return None + if i == 0: + # If we're looking at the start of the received data, that means + # we're looking for the start of a new packet, denoted by a $. + # It's also possible we'll see an ACK here, denoted by a + + if data[0] == '+': + self._receivedData = data[1:] + return self.PACKET_ACK + if ord(data[0]) == 3: + self._receivedData = data[1:] + return self.PACKET_INTERRUPT + if data[0] == '$': + i += 1 + else: + raise self.InvalidPacketException( + "Unexpected leading byte: %s" % data[0]) + + # If we're looking beyond the start of the received data, then we're + # looking for the end of the packet content, denoted by a #. + # Note that we pick up searching from where we left off last time + while i < data_len and data[i] != '#': + i += 1 + + # If there isn't enough data left for a checksum, just remember where + # we left off so we can pick up there the next time around + if i > data_len - 3: + self._receivedDataOffset = i + return None + + # If we have enough data remaining for the checksum, extract it and + # compare to the packet contents + packet = data[1:i] + i += 1 + try: + check = int(data[i:i + 2], 16) + except ValueError: + raise self.InvalidPacketException("Checksum is not valid hex") + i += 2 + if check != checksum(packet): + raise self.InvalidPacketException( + "Checksum %02x does not match content %02x" % + (check, checksum(packet))) + # remove parsed bytes from _receivedData and reset offset so parsing + # can start on the next packet the next time around + self._receivedData = data[i:] + self._receivedDataOffset = 0 + return packet + + def _handlePacket(self, packet): + if packet is self.PACKET_ACK: + # Ignore ACKs from the client. For the future, we can consider + # adding validation code to make sure the client only sends ACKs + # when it's supposed to. + return + response = "" + # We'll handle the ack stuff here since it's not something any of the + # tests will be concerned about, and it'll get turned off quickly anyway. + if self._shouldSendAck: + self._socket.sendall(seven.bitcast_to_bytes('+')) + if packet == "QStartNoAckMode": + self._shouldSendAck = False + response = "OK" + elif self.responder is not None: + # Delegate everything else to our responder + response = self.responder.respond(packet) + # Handle packet framing since we don't want to bother tests with it. + if response is not None: + framed = frame_packet(response) + self._socket.sendall(seven.bitcast_to_bytes(framed)) + + PACKET_ACK = object() + PACKET_INTERRUPT = object() + + class InvalidPacketException(Exception): + pass + + +class GDBRemoteTestBase(TestBase): + """ + Base class for GDB client tests. + + This class will setup and start a mock GDB server for the test to use. + It also provides assertPacketLogContains, which simplifies the checking + of packets sent by the client. + """ + + NO_DEBUG_INFO_TESTCASE = True + mydir = TestBase.compute_mydir(__file__) + server = None + server_socket_class = TCPServerSocket + + def setUp(self): + TestBase.setUp(self) + self.server = MockGDBServer(socket_class=self.server_socket_class) + self.server.start() + + def tearDown(self): + # TestBase.tearDown will kill the process, but we need to kill it early + # so its client connection closes and we can stop the server before + # finally calling the base tearDown. + if self.process() is not None: + self.process().Kill() + self.server.stop() + TestBase.tearDown(self) + + def createTarget(self, yaml_path): + """ + Create a target by auto-generating the object based on the given yaml + instructions. + + This will track the generated object so it can be automatically removed + during tearDown. + """ + yaml_base, ext = os.path.splitext(yaml_path) + obj_path = self.getBuildArtifact(yaml_base) + self.yaml2obj(yaml_path, obj_path) + return self.dbg.CreateTarget(obj_path) + + def connect(self, target): + """ + Create a process by connecting to the mock GDB server. + + Includes assertions that the process was successfully created. + """ + listener = self.dbg.GetListener() + error = lldb.SBError() + process = target.ConnectRemote(listener, + self.server.get_connect_url(), "gdb-remote", error) + self.assertTrue(error.Success(), error.description) + self.assertTrue(process, PROCESS_IS_VALID) + return process + + def assertPacketLogContains(self, packets): + """ + Assert that the mock server's packet log contains the given packets. + + The packet log includes all packets sent by the client and received + by the server. This fuction makes it easy to verify that the client + sent the expected packets to the server. + + The check does not require that the packets be consecutive, but does + require that they are ordered in the log as they ordered in the arg. + """ + i = 0 + j = 0 + log = self.server.responder.packetLog + + while i < len(packets) and j < len(log): + if log[j] == packets[i]: + i += 1 + j += 1 + if i < len(packets): + self.fail(u"Did not receive: %s\nLast 10 packets:\n\t%s" % + (packets[i], u'\n\t'.join(log))) + + +class GDBPlatformClientTestBase(GDBRemoteTestBase): + """ + Base class for platform server clients. + + This class extends GDBRemoteTestBase by automatically connecting + via "platform connect" in the setUp() method. + """ + + def setUp(self): + super().setUp() + self.runCmd("platform select remote-gdb-server") + self.runCmd("platform connect " + self.server.get_connect_url()) + self.assertTrue(self.dbg.GetSelectedPlatform().IsConnected()) + + def tearDown(self): + self.dbg.GetSelectedPlatform().DisconnectRemote() + super().tearDown() diff --git a/lldb/test/API/functionalities/memory/read/Makefile b/lldb/test/API/functionalities/memory/read/Makefile new file mode 100644 index 000000000000..99998b20bcb0 --- /dev/null +++ b/lldb/test/API/functionalities/memory/read/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/functionalities/memory/read/TestMemoryRead.py b/lldb/test/API/functionalities/memory/read/TestMemoryRead.py new file mode 100644 index 000000000000..ceea4ab2f067 --- /dev/null +++ b/lldb/test/API/functionalities/memory/read/TestMemoryRead.py @@ -0,0 +1,177 @@ +""" +Test the 'memory read' command. +""" + +import lldb +import lldbsuite.test.lldbutil as lldbutil + +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * + + +class MemoryReadTestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + def setUp(self): + # Call super's setUp(). + TestBase.setUp(self) + # Find the line number to break inside main(). + self.line = line_number('main.cpp', '// Set break point at this line.') + + def build_run_stop(self): + self.build() + exe = self.getBuildArtifact("a.out") + self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) + + # Break in main() after the variables are assigned values. + lldbutil.run_break_set_by_file_and_line(self, + "main.cpp", + self.line, + num_expected_locations=1, + loc_exact=True) + + self.runCmd("run", RUN_SUCCEEDED) + + # The stop reason of the thread should be breakpoint. + self.expect("thread list", + STOPPED_DUE_TO_BREAKPOINT, + substrs=['stopped', 'stop reason = breakpoint']) + + # The breakpoint should have a hit count of 1. + self.expect("breakpoint list -f", + BREAKPOINT_HIT_ONCE, + substrs=[' resolved, hit count = 1']) + + @no_debug_info_test + def test_memory_read(self): + """Test the 'memory read' command with plain and vector formats.""" + self.build_run_stop() + + # (lldb) memory read -f d -c 1 `&argc` + # 0x7fff5fbff9a0: 1 + self.runCmd("memory read -f d -c 1 `&argc`") + + # Find the starting address for variable 'argc' to verify later that the + # '--format uint32_t[] --size 4 --count 4' option increments the address + # correctly. + line = self.res.GetOutput().splitlines()[0] + items = line.split(':') + address = int(items[0], 0) + argc = int(items[1], 0) + self.assertGreater(address, 0) + self.assertEquals(argc, 1) + + # (lldb) memory read --format uint32_t[] --size 4 --count 4 `&argc` + # 0x7fff5fbff9a0: {0x00000001} + # 0x7fff5fbff9a4: {0x00000000} + # 0x7fff5fbff9a8: {0x0ec0bf27} + # 0x7fff5fbff9ac: {0x215db505} + self.runCmd( + "memory read --format uint32_t[] --size 4 --count 4 `&argc`") + lines = self.res.GetOutput().splitlines() + for i in range(4): + if i == 0: + # Verify that the printout for argc is correct. + self.assertEqual( + argc, int(lines[i].split(':')[1].strip(' {}'), 0)) + addr = int(lines[i].split(':')[0], 0) + # Verify that the printout for addr is incremented correctly. + self.assertEqual(addr, (address + i * 4)) + + # (lldb) memory read --format char[] --size 7 --count 1 `&my_string` + # 0x7fff5fbff990: {abcdefg} + self.expect( + "memory read --format char[] --size 7 --count 1 `&my_string`", + substrs=['abcdefg']) + + # (lldb) memory read --format 'hex float' --size 16 `&argc` + # 0x7fff5fbff5b0: error: unsupported byte size (16) for hex float + # format + self.expect( + "memory read --format 'hex float' --size 16 `&argc`", + substrs=['unsupported byte size (16) for hex float format']) + + self.expect( + "memory read --format 'float' --count 1 --size 8 `&my_double`", + substrs=['1234.']) + + # (lldb) memory read --format 'float' --count 1 --size 20 `&my_double` + # 0x7fff5fbff598: error: unsupported byte size (20) for float format + self.expect( + "memory read --format 'float' --count 1 --size 20 `&my_double`", + substrs=['unsupported byte size (20) for float format']) + + self.expect('memory read --type int --count 5 `&my_ints[0]`', + substrs=['(int) 0x', '2', '4', '6', '8', '10']) + + self.expect( + 'memory read --type int --count 5 --format hex `&my_ints[0]`', + substrs=[ + '(int) 0x', + '0x', + '0a']) + + self.expect( + 'memory read --type int --count 5 --offset 5 `&my_ints[0]`', + substrs=[ + '(int) 0x', + '12', + '14', + '16', + '18', + '20']) + + # the gdb format specifier and the size in characters for + # the returned values including the 0x prefix. + variations = [['b', 4], ['h', 6], ['w', 10], ['g', 18]] + for v in variations: + formatter = v[0] + expected_object_length = v[1] + self.runCmd( + "memory read --gdb-format 4%s &my_uint64s" % formatter) + lines = self.res.GetOutput().splitlines() + objects_read = [] + for l in lines: + objects_read.extend(l.split(':')[1].split()) + # Check that we got back 4 0x0000 etc bytes + for o in objects_read: + self.assertEqual(len(o), expected_object_length) + self.assertEquals(len(objects_read), 4) + + @no_debug_info_test + def test_memory_read_file(self): + self.build_run_stop() + res = lldb.SBCommandReturnObject() + self.ci.HandleCommand("memory read -f d -c 1 `&argc`", res) + self.assertTrue(res.Succeeded(), "memory read failed:" + res.GetError()) + + # Record golden output. + golden_output = res.GetOutput() + + memory_read_file = self.getBuildArtifact("memory-read-output") + + def check_file_content(expected): + with open(memory_read_file) as f: + lines = f.readlines() + lines = [s.strip() for s in lines] + expected = [s.strip() for s in expected] + self.assertEqual(lines, expected) + + # Sanity check. + self.runCmd("memory read -f d -c 1 -o '{}' `&argc`".format(memory_read_file)) + check_file_content([golden_output]) + + # Write some garbage to the file. + with open(memory_read_file, 'w') as f: + f.write("some garbage") + + # Make sure the file is truncated when we run the command again. + self.runCmd("memory read -f d -c 1 -o '{}' `&argc`".format(memory_read_file)) + check_file_content([golden_output]) + + # Make sure the file is appended when we run the command with --append-outfile. + self.runCmd( + "memory read -f d -c 1 -o '{}' --append-outfile `&argc`".format( + memory_read_file)) + check_file_content([golden_output, golden_output]) diff --git a/lldb/test/API/functionalities/memory/read/main.cpp b/lldb/test/API/functionalities/memory/read/main.cpp new file mode 100644 index 000000000000..5a33ac1343c2 --- /dev/null +++ b/lldb/test/API/functionalities/memory/read/main.cpp @@ -0,0 +1,13 @@ +#include +#include + +int main (int argc, char const *argv[]) +{ + char my_string[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 0}; + double my_double = 1234.5678; + int my_ints[] = {2,4,6,8,10,12,14,16,18,20,22}; + uint64_t my_uint64s[] = {0, 1, 2, 3, 4, 5, 6, 7}; + printf("my_string=%s\n", my_string); // Set break point at this line. + printf("my_double=%g\n", my_double); + return 0; +} diff --git a/lldb/test/API/linux/aarch64/tagged_memory_region/Makefile b/lldb/test/API/linux/aarch64/tagged_memory_region/Makefile new file mode 100644 index 000000000000..10495940055b --- /dev/null +++ b/lldb/test/API/linux/aarch64/tagged_memory_region/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/linux/aarch64/tagged_memory_region/TestAArch64LinuxTaggedMemoryRegion.py b/lldb/test/API/linux/aarch64/tagged_memory_region/TestAArch64LinuxTaggedMemoryRegion.py new file mode 100644 index 000000000000..b175f6234b10 --- /dev/null +++ b/lldb/test/API/linux/aarch64/tagged_memory_region/TestAArch64LinuxTaggedMemoryRegion.py @@ -0,0 +1,42 @@ +""" +Test that "memory region" lookup uses the ABI plugin to remove +non address bits from addresses before lookup. +""" + + + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class AArch64LinuxTaggedMemoryRegionTestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + NO_DEBUG_INFO_TESTCASE = True + + # AArch64 Linux always enables the top byte ignore feature + @skipUnlessArch("aarch64") + @skipUnlessPlatform(["linux"]) + def test_mte_regions(self): + self.build() + self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) + + lldbutil.run_break_set_by_file_and_line(self, "main.c", + line_number('main.c', '// Set break point at this line.'), + num_expected_locations=1) + + self.runCmd("run", RUN_SUCCEEDED) + + if self.process().GetState() == lldb.eStateExited: + self.fail("Test program failed to run.") + + self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT, + substrs=['stopped', + 'stop reason = breakpoint']) + + # Despite the non address bits we should find a region + self.expect("memory region the_page", patterns=[ + "\[0x[0-9A-Fa-f]+-0x[0-9A-Fa-f]+\) r-x"]) diff --git a/lldb/test/API/linux/aarch64/tagged_memory_region/main.c b/lldb/test/API/linux/aarch64/tagged_memory_region/main.c new file mode 100644 index 000000000000..29f99d73e12d --- /dev/null +++ b/lldb/test/API/linux/aarch64/tagged_memory_region/main.c @@ -0,0 +1,17 @@ +#include +#include +#include +#include + +int main(int argc, char const *argv[]) { + void *the_page = mmap(0, sysconf(_SC_PAGESIZE), PROT_READ | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (the_page == MAP_FAILED) + return 1; + + // Put something in the top byte (AArch64 Linux always enables top byte + // ignore) + the_page = (void *)((size_t)the_page | ((size_t)0x34 << 56)); + + return 0; // Set break point at this line. +} diff --git a/llvm/lib/Analysis/ReleaseModeModelRunner.cpp b/llvm/lib/Analysis/ReleaseModeModelRunner.cpp new file mode 100644 index 000000000000..d2bf95388066 --- /dev/null +++ b/llvm/lib/Analysis/ReleaseModeModelRunner.cpp @@ -0,0 +1,90 @@ +//===- ReleaseModeModelRunner.cpp - Fast, precompiled model runner -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a model runner wrapping an AOT compiled ML model. +// Only inference is supported. +// +//===----------------------------------------------------------------------===// +#include "llvm/Config/config.h" +#if defined(LLVM_HAVE_TF_AOT) + +#include "llvm/Analysis/InlineModelFeatureMaps.h" +#include "llvm/Analysis/MLInlineAdvisor.h" + +// codegen-ed file +#include "InlinerSizeModel.h" // NOLINT + +#include +#include + +using namespace llvm; +namespace { + +const char FeedPrefix[] = "feed_"; +const char FetchPrefix[] = "fetch_"; + +/// MLModelRunner - production mode implementation. It uses a AOT-compiled +/// SavedModel for efficient execution. +class ReleaseModeModelRunner final : public MLModelRunner { +public: + ReleaseModeModelRunner(LLVMContext &Ctx); + virtual ~ReleaseModeModelRunner() = default; + + bool run() override; + + void setFeature(FeatureIndex Index, int64_t Value) override; + int64_t getFeature(int Index) const override; + +private: + std::vector FeatureIndices; + int32_t ResultIndex = -1; + std::unique_ptr CompiledModel; +}; +} // namespace + +ReleaseModeModelRunner::ReleaseModeModelRunner(LLVMContext &Ctx) + : MLModelRunner(Ctx), + CompiledModel(std::make_unique()) { + assert(CompiledModel && "The CompiledModel should be valid"); + + FeatureIndices.resize(NumberOfFeatures); + + for (size_t I = 0; I < NumberOfFeatures; ++I) { + const int Index = + CompiledModel->LookupArgIndex(FeedPrefix + FeatureNameMap[I]); + assert(Index >= 0 && "Cannot find Feature in inlining model"); + FeatureIndices[I] = Index; + } + + ResultIndex = + CompiledModel->LookupResultIndex(std::string(FetchPrefix) + DecisionName); + assert(ResultIndex >= 0 && "Cannot find DecisionName in inlining model"); +} + +int64_t ReleaseModeModelRunner::getFeature(int Index) const { + return *static_cast( + CompiledModel->arg_data(FeatureIndices[Index])); +} + +void ReleaseModeModelRunner::setFeature(FeatureIndex Index, int64_t Value) { + *static_cast(CompiledModel->arg_data( + FeatureIndices[static_cast(Index)])) = Value; +} + +bool ReleaseModeModelRunner::run() { + CompiledModel->Run(); + return static_cast( + *static_cast(CompiledModel->result_data(ResultIndex))); +} + +std::unique_ptr +llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { + auto AOTRunner = std::make_unique(M.getContext()); + return std::make_unique(M, MAM, std::move(AOTRunner)); +} +#endif // defined(LLVM_HAVE_TF_AOT) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 9f138136e6e9..ddb77bf00e42 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8413,16 +8413,6 @@ SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue Cond = Op.getOperand(0); - if (Subtarget->hasScalarCompareEq64() && Op->getOperand(0)->hasOneUse() && - !Op->isDivergent()) { - if (VT == MVT::i64) - return Op; - SDValue LHS = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(1)); - SDValue RHS = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(2)); - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getSelect(DL, MVT::i64, Cond, LHS, RHS)); - } - SDValue Zero = DAG.getConstant(0, DL, MVT::i32); SDValue One = DAG.getConstant(1, DL, MVT::i32); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 1755b93538ce..4b7f06996ed6 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6122,11 +6122,8 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst, continue; case AMDGPU::S_CSELECT_B32: - lowerSelect32(Worklist, Inst, MDT); - Inst.eraseFromParent(); - continue; case AMDGPU::S_CSELECT_B64: - splitSelect64(Worklist, Inst, MDT); + lowerSelect(Worklist, Inst, MDT); Inst.eraseFromParent(); continue; case AMDGPU::S_CMP_EQ_I32: @@ -6304,8 +6301,8 @@ SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst, return std::make_pair(false, nullptr); } -void SIInstrInfo::lowerSelect32(SetVectorType &Worklist, MachineInstr &Inst, - MachineDominatorTree *MDT) const { +void SIInstrInfo::lowerSelect(SetVectorType &Worklist, MachineInstr &Inst, + MachineDominatorTree *MDT) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -6380,95 +6377,6 @@ void SIInstrInfo::lowerSelect32(SetVectorType &Worklist, MachineInstr &Inst, addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); } -void SIInstrInfo::splitSelect64(SetVectorType &Worklist, MachineInstr &Inst, - MachineDominatorTree *MDT) const { - // Split S_CSELECT_B64 into a pair of S_CSELECT_B32 and lower them - // further. - const DebugLoc &DL = Inst.getDebugLoc(); - MachineBasicBlock::iterator MII = Inst; - MachineBasicBlock &MBB = *Inst.getParent(); - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - - // Get the original operands. - MachineOperand &Dest = Inst.getOperand(0); - MachineOperand &Src0 = Inst.getOperand(1); - MachineOperand &Src1 = Inst.getOperand(2); - MachineOperand &Cond = Inst.getOperand(3); - - Register SCCSource = Cond.getReg(); - bool IsSCC = (SCCSource == AMDGPU::SCC); - - // If this is a trivial select where the condition is effectively not SCC - // (SCCSource is a source of copy to SCC), then the select is semantically - // equivalent to copying SCCSource. Hence, there is no need to create - // V_CNDMASK, we can just use that and bail out. - if (!IsSCC && (Src0.isImm() && Src0.getImm() == -1) && - (Src1.isImm() && Src1.getImm() == 0)) { - MRI.replaceRegWith(Dest.getReg(), SCCSource); - return; - } - - // Prepare the split destination. - Register FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); - Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - - // Split the source operands. - const TargetRegisterClass *Src0RC = nullptr; - const TargetRegisterClass *Src0SubRC = nullptr; - if (Src0.isReg()) { - Src0RC = MRI.getRegClass(Src0.getReg()); - Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); - } - const TargetRegisterClass *Src1RC = nullptr; - const TargetRegisterClass *Src1SubRC = nullptr; - if (Src1.isReg()) { - Src1RC = MRI.getRegClass(Src1.getReg()); - Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); - } - // Split lo. - MachineOperand SrcReg0Sub0 = - buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); - MachineOperand SrcReg1Sub0 = - buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC); - // Split hi. - MachineOperand SrcReg0Sub1 = - buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC); - MachineOperand SrcReg1Sub1 = - buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC); - // Select the lo part. - MachineInstr *LoHalf = - BuildMI(MBB, MII, DL, get(AMDGPU::S_CSELECT_B32), DestSub0) - .add(SrcReg0Sub0) - .add(SrcReg1Sub0); - // Replace the condition operand with the original one. - LoHalf->getOperand(3).setReg(SCCSource); - Worklist.insert(LoHalf); - // Select the hi part. - MachineInstr *HiHalf = - BuildMI(MBB, MII, DL, get(AMDGPU::S_CSELECT_B32), DestSub1) - .add(SrcReg0Sub1) - .add(SrcReg1Sub1); - // Replace the condition operand with the original one. - HiHalf->getOperand(3).setReg(SCCSource); - Worklist.insert(HiHalf); - // Merge them back to the original 64-bit one. - BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) - .addReg(DestSub0) - .addImm(AMDGPU::sub0) - .addReg(DestSub1) - .addImm(AMDGPU::sub1); - MRI.replaceRegWith(Dest.getReg(), FullDestReg); - - // Try to legalize the operands in case we need to swap the order to keep - // it valid. - legalizeOperands(*LoHalf, MDT); - legalizeOperands(*HiHalf, MDT); - - // Move all users of this moved value. - addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); -} - void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index dd9ea2b53ca2..70a48cd58e38 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -78,11 +78,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst, MachineDominatorTree *MDT = nullptr) const; - void lowerSelect32(SetVectorType &Worklist, MachineInstr &Inst, - MachineDominatorTree *MDT = nullptr) const; - - void splitSelect64(SetVectorType &Worklist, MachineInstr &Inst, - MachineDominatorTree *MDT = nullptr) const; + void lowerSelect(SetVectorType &Worklist, MachineInstr &Inst, + MachineDominatorTree *MDT = nullptr) const; void lowerScalarAbs(SetVectorType &Worklist, MachineInstr &Inst) const; diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 1713586dcf5b..96438e9247a2 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -518,10 +518,9 @@ let Uses = [SCC] in { def S_CSELECT_B32 : SOP2_32 <"s_cselect_b32", [(set i32:$sdst, (SelectPat i64:$src0, i64:$src1))] - >; } + + def S_CSELECT_B64 : SOP2_64 <"s_cselect_b64">; } // End Uses = [SCC] let Defs = [SCC] in { diff --git a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp b/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp new file mode 100644 index 000000000000..9cd959012e6f --- /dev/null +++ b/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp @@ -0,0 +1,152 @@ +//===-- M68kCallLowering.cpp - Call lowering -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file implements the lowering of LLVM calls to machine code calls for +/// GlobalISel. +// +//===----------------------------------------------------------------------===// + +#include "M68kCallLowering.h" +#include "M68kISelLowering.h" +#include "M68kInstrInfo.h" +#include "M68kSubtarget.h" +#include "M68kTargetMachine.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/TargetCallingConv.h" + +using namespace llvm; + +M68kCallLowering::M68kCallLowering(const M68kTargetLowering &TLI) + : CallLowering(&TLI) {} + +struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { + OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + MachineInstrBuilder MIB) + : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB) {} + + void assignValueToReg(Register ValVReg, Register PhysReg, + CCValAssign VA) override { + MIB.addUse(PhysReg, RegState::Implicit); + Register ExtReg = extendRegister(ValVReg, VA); + MIRBuilder.buildCopy(PhysReg, ExtReg); + } + + void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, + MachinePointerInfo &MPO, CCValAssign &VA) override { + llvm_unreachable("unimplemented"); + } + + Register getStackAddress(uint64_t Size, int64_t Offset, + MachinePointerInfo &MPO, + ISD::ArgFlagsTy Flags) override { + llvm_unreachable("unimplemented"); + } + + MachineInstrBuilder MIB; +}; +bool M68kCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, + const Value *Val, ArrayRef VRegs, + FunctionLoweringInfo &FLI, + Register SwiftErrorVReg) const { + + auto MIB = MIRBuilder.buildInstrNoInsert(M68k::RTS); + bool Success = true; + MachineFunction &MF = MIRBuilder.getMF(); + const Function &F = MF.getFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const M68kTargetLowering &TLI = *getTLI(); + CCAssignFn *AssignFn = + TLI.getCCAssignFn(F.getCallingConv(), true, F.isVarArg()); + auto &DL = F.getParent()->getDataLayout(); + if (!VRegs.empty()) { + SmallVector SplitArgs; + ArgInfo OrigArg{VRegs, Val->getType(), 0}; + setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); + splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv()); + OutgoingValueAssigner ArgAssigner(AssignFn); + OutgoingArgHandler ArgHandler(MIRBuilder, MRI, MIB); + Success = determineAndHandleAssignments(ArgHandler, ArgAssigner, SplitArgs, + MIRBuilder, F.getCallingConv(), + F.isVarArg()); + } + MIRBuilder.insertInstr(MIB); + return Success; +} + +bool M68kCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, + const Function &F, + ArrayRef> VRegs, + FunctionLoweringInfo &FLI) const { + MachineFunction &MF = MIRBuilder.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const auto &DL = F.getParent()->getDataLayout(); + auto &TLI = *getTLI(); + + SmallVector SplitArgs; + unsigned I = 0; + for (const auto &Arg : F.args()) { + ArgInfo OrigArg{VRegs[I], Arg.getType(), I}; + setArgFlags(OrigArg, I + AttributeList::FirstArgIndex, DL, F); + splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv()); + ++I; + } + + CCAssignFn *AssignFn = + TLI.getCCAssignFn(F.getCallingConv(), false, F.isVarArg()); + IncomingValueAssigner ArgAssigner(AssignFn); + FormalArgHandler ArgHandler(MIRBuilder, MRI); + return determineAndHandleAssignments(ArgHandler, ArgAssigner, SplitArgs, + MIRBuilder, F.getCallingConv(), + F.isVarArg()); +} + +void M68kIncomingValueHandler::assignValueToReg(Register ValVReg, + Register PhysReg, + CCValAssign VA) { + MIRBuilder.getMRI()->addLiveIn(PhysReg); + MIRBuilder.getMBB().addLiveIn(PhysReg); + IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA); +} + +void M68kIncomingValueHandler::assignValueToAddress(Register ValVReg, + Register Addr, + LLT MemTy, + MachinePointerInfo &MPO, + CCValAssign &VA) { + MachineFunction &MF = MIRBuilder.getMF(); + auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, MemTy, + inferAlignFromPtrInfo(MF, MPO)); + MIRBuilder.buildLoad(ValVReg, Addr, *MMO); +} + +Register M68kIncomingValueHandler::getStackAddress(uint64_t Size, + int64_t Offset, + MachinePointerInfo &MPO, + ISD::ArgFlagsTy Flags) { + auto &MFI = MIRBuilder.getMF().getFrameInfo(); + const bool IsImmutable = !Flags.isByVal(); + int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable); + MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); + + // Build Frame Index + llvm::LLT FramePtr = LLT::pointer( + 0, MIRBuilder.getMF().getDataLayout().getPointerSizeInBits()); + MachineInstrBuilder AddrReg = MIRBuilder.buildFrameIndex(FramePtr, FI); + StackUsed = std::max(StackUsed, Size + Offset); + return AddrReg.getReg(0); +} + +bool M68kCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const { + return false; +} + +bool M68kCallLowering::enableBigEndian() const { return true; } diff --git a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h b/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h new file mode 100644 index 000000000000..47cdefdba100 --- /dev/null +++ b/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h @@ -0,0 +1,72 @@ +//===-- M68kCallLowering.h - Call lowering -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file implements the lowering of LLVM calls to machine code calls for +/// GlobalISel. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_M68K_GLSEL_M68KCALLLOWERING_H +#define LLVM_LIB_TARGET_M68K_GLSEL_M68KCALLLOWERING_H + +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/ValueTypes.h" + +namespace llvm { + +class M68kTargetLowering; + +class M68kCallLowering : public CallLowering { + // TODO: We are only supporting return instruction with no value at this time + // point + +public: + M68kCallLowering(const M68kTargetLowering &TLI); + + bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, + ArrayRef VRegs, FunctionLoweringInfo &FLI, + Register SwiftErrorVReg) const override; + + bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, + ArrayRef> VRegs, + FunctionLoweringInfo &FLI) const override; + + bool lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const override; + + bool enableBigEndian() const override; +}; +struct M68kIncomingValueHandler : public CallLowering::IncomingValueHandler { + M68kIncomingValueHandler(MachineIRBuilder &MIRBuilder, + MachineRegisterInfo &MRI) + : CallLowering::IncomingValueHandler(MIRBuilder, MRI) {} + + uint64_t StackUsed; + +private: + void assignValueToReg(Register ValVReg, Register PhysReg, + CCValAssign VA) override; + + void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, + MachinePointerInfo &MPO, CCValAssign &VA) override; + + Register getStackAddress(uint64_t Size, int64_t Offset, + MachinePointerInfo &MPO, + ISD::ArgFlagsTy Flags) override; +}; + +struct FormalArgHandler : public M68kIncomingValueHandler { + FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) + : M68kIncomingValueHandler(MIRBuilder, MRI) {} +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_M68K_GLSEL_M68KCALLLOWERING_H diff --git a/llvm/lib/Target/M68k/GlSel/M68kInstructionSelector.cpp b/llvm/lib/Target/M68k/GlSel/M68kInstructionSelector.cpp new file mode 100644 index 000000000000..9ac4ab9a5ba1 --- /dev/null +++ b/llvm/lib/Target/M68k/GlSel/M68kInstructionSelector.cpp @@ -0,0 +1,90 @@ +//===- M68kInstructionSelector.cpp ------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the InstructionSelector class for +/// M68k. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#include "M68kRegisterBankInfo.h" +#include "M68kSubtarget.h" +#include "M68kTargetMachine.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "m68k-isel" + +using namespace llvm; + +#define GET_GLOBALISEL_PREDICATE_BITSET +#include "M68kGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATE_BITSET + +namespace { + +class M68kInstructionSelector : public InstructionSelector { +public: + M68kInstructionSelector(const M68kTargetMachine &TM, const M68kSubtarget &STI, + const M68kRegisterBankInfo &RBI); + + bool select(MachineInstr &I) override; + static const char *getName() { return DEBUG_TYPE; } + +private: + bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; + + const M68kTargetMachine &TM; + const M68kInstrInfo &TII; + const M68kRegisterInfo &TRI; + const M68kRegisterBankInfo &RBI; + +#define GET_GLOBALISEL_PREDICATES_DECL +#include "M68kGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_DECL + +#define GET_GLOBALISEL_TEMPORARIES_DECL +#include "M68kGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_DECL +}; + +} // end anonymous namespace + +#define GET_GLOBALISEL_IMPL +#include "M68kGenGlobalISel.inc" +#undef GET_GLOBALISEL_IMPL + +M68kInstructionSelector::M68kInstructionSelector( + const M68kTargetMachine &TM, const M68kSubtarget &STI, + const M68kRegisterBankInfo &RBI) + : InstructionSelector(), TM(TM), TII(*STI.getInstrInfo()), + TRI(*STI.getRegisterInfo()), RBI(RBI), + +#define GET_GLOBALISEL_PREDICATES_INIT +#include "M68kGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_INIT +#define GET_GLOBALISEL_TEMPORARIES_INIT +#include "M68kGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_INIT +{ +} + +bool M68kInstructionSelector::select(MachineInstr &I) { + // Certain non-generic instructions also need some special handling. + if (!isPreISelGenericOpcode(I.getOpcode())) + return true; + + if (selectImpl(I, *CoverageInfo)) + return true; + + return false; +} + +namespace llvm { +InstructionSelector * +createM68kInstructionSelector(const M68kTargetMachine &TM, + const M68kSubtarget &Subtarget, + const M68kRegisterBankInfo &RBI) { + return new M68kInstructionSelector(TM, Subtarget, RBI); +} +} // end namespace llvm diff --git a/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.cpp b/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.cpp new file mode 100644 index 000000000000..bcbe62816beb --- /dev/null +++ b/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.cpp @@ -0,0 +1,33 @@ +//===-- M68kLegalizerInfo.cpp ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the Machinelegalizer class for M68k. +//===----------------------------------------------------------------------===// + +#include "M68kLegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Type.h" + +using namespace llvm; + +M68kLegalizerInfo::M68kLegalizerInfo(const M68kSubtarget &ST) { + using namespace TargetOpcode; + const LLT S32 = LLT::scalar(32); + const LLT P0 = LLT::pointer(0, 32); + getActionDefinitionsBuilder(G_LOAD).legalFor({S32}); + getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({P0}); + getActionDefinitionsBuilder(G_ADD).legalFor({S32}); + getActionDefinitionsBuilder(G_SUB).legalFor({S32}); + getActionDefinitionsBuilder(G_MUL).legalFor({S32}); + getActionDefinitionsBuilder(G_UDIV).legalFor({S32}); + getLegacyLegalizerInfo().computeTables(); +} diff --git a/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.h b/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.h new file mode 100644 index 000000000000..205aa81aedcc --- /dev/null +++ b/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.h @@ -0,0 +1,29 @@ +//===- M68kLegalizerInfo --------------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares the targeting of the MachineLegalizer class for +/// M68k. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_M68K_GLSEL_M68KLEGALIZERINFO_H +#define LLVM_LIB_TARGET_M68K_GLSEL_M68KLEGALIZERINFO_H + +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" + +namespace llvm { + +class M68kSubtarget; + +/// This struct provides the information for the target register banks. +struct M68kLegalizerInfo : public LegalizerInfo { +public: + M68kLegalizerInfo(const M68kSubtarget &ST); +}; +} // end namespace llvm +#endif // LLVM_LIB_TARGET_M68K_GLSEL_M68KLEGALIZERINFO_H diff --git a/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.cpp b/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.cpp new file mode 100644 index 000000000000..5c0f5dae8e37 --- /dev/null +++ b/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.cpp @@ -0,0 +1,105 @@ +//===-- M68kRegisterBankInfo.cpp -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the RegisterBankInfo class for M68k. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#include "M68kRegisterBankInfo.h" +#include "M68kInstrInfo.h" // For the register classes +#include "M68kSubtarget.h" +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" + +#define GET_TARGET_REGBANK_IMPL +#include "M68kGenRegisterBank.inc" + +using namespace llvm; + +// FIXME: TableGen this. +// If it grows too much and TableGen still isn't ready to do the job, extract it +// into an M68kGenRegisterBankInfo.def (similar to AArch64). +namespace llvm { +namespace M68k { +enum PartialMappingIdx { + PMI_GPR, + PMI_Min = PMI_GPR, +}; + +RegisterBankInfo::PartialMapping PartMappings[]{ + // GPR Partial Mapping + {0, 32, GPRRegBank}, +}; + +enum ValueMappingIdx { + InvalidIdx = 0, + GPR3OpsIdx = 1, +}; + +RegisterBankInfo::ValueMapping ValueMappings[] = { + // invalid + {nullptr, 0}, + // 3 operands in GPRs + {&PartMappings[PMI_GPR - PMI_Min], 1}, + {&PartMappings[PMI_GPR - PMI_Min], 1}, + {&PartMappings[PMI_GPR - PMI_Min], 1}, + +}; +} // end namespace M68k +} // end namespace llvm + +M68kRegisterBankInfo::M68kRegisterBankInfo(const TargetRegisterInfo &TRI) + : M68kGenRegisterBankInfo() {} + +const RegisterBank & +M68kRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, + LLT) const { + return getRegBank(M68k::GPRRegBankID); +} + +const RegisterBankInfo::InstructionMapping & +M68kRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { + auto Opc = MI.getOpcode(); + + if (!isPreISelGenericOpcode(Opc)) { + const InstructionMapping &Mapping = getInstrMappingImpl(MI); + if (Mapping.isValid()) + return Mapping; + } + + using namespace TargetOpcode; + + unsigned NumOperands = MI.getNumOperands(); + const ValueMapping *OperandsMapping = &M68k::ValueMappings[M68k::GPR3OpsIdx]; + + switch (Opc) { + case G_ADD: + case G_SUB: + case G_MUL: + case G_SDIV: + case G_UDIV: + case G_LOAD: + case G_STORE: { + OperandsMapping = &M68k::ValueMappings[M68k::GPR3OpsIdx]; + break; + } + + case G_CONSTANT: + case G_FRAME_INDEX: + OperandsMapping = + getOperandsMapping({&M68k::ValueMappings[M68k::GPR3OpsIdx], nullptr}); + break; + default: + return getInvalidInstructionMapping(); + } + + return getInstructionMapping(DefaultMappingID, /*Cost=*/1, OperandsMapping, + NumOperands); +} diff --git a/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.h b/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.h new file mode 100644 index 000000000000..853c75df2bb3 --- /dev/null +++ b/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.h @@ -0,0 +1,45 @@ +//===-- M68kRegisterBankInfo.h ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares the targeting of the RegisterBankInfo class for M68k. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_M68K_GLSEL_M68KREGISTERBANKINFO_H +#define LLVM_LIB_TARGET_M68K_GLSEL_M68KREGISTERBANKINFO_H + +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" + +#define GET_REGBANK_DECLARATIONS +#include "M68kGenRegisterBank.inc" +#undef GET_REGBANK_DECLARATIONS + +namespace llvm { + +class TargetRegisterInfo; + +class M68kGenRegisterBankInfo : public RegisterBankInfo { +protected: +#define GET_TARGET_REGBANK_CLASS +#include "M68kGenRegisterBank.inc" +#undef GET_TARGET_REGBANK_CLASS +}; + +/// This class provides the information for the target register banks. +class M68kRegisterBankInfo final : public M68kGenRegisterBankInfo { +public: + M68kRegisterBankInfo(const TargetRegisterInfo &TRI); + + const RegisterBank &getRegBankFromRegClass(const TargetRegisterClass &RC, + LLT) const override; + + const InstructionMapping & + getInstrMapping(const MachineInstr &MI) const override; +}; +} // end namespace llvm +#endif diff --git a/llvm/lib/Target/M68k/GlSel/M68kRegisterBanks.td b/llvm/lib/Target/M68k/GlSel/M68kRegisterBanks.td new file mode 100644 index 000000000000..942677a60e6c --- /dev/null +++ b/llvm/lib/Target/M68k/GlSel/M68kRegisterBanks.td @@ -0,0 +1,15 @@ +//===-- M68kRegisterBanks.td - Describe the M68k Banks -------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Define the M68k register banks used for GlobalISel. +/// +//===----------------------------------------------------------------------===// + +/// General Purpose Registers. Here we define a register bank with name AnyGPR +def GPRRegBank : RegisterBank<"AnyGPR", [XR32]>; diff --git a/llvm/test/Analysis/BasicAA/memset_pattern.ll b/llvm/test/Analysis/BasicAA/memset_pattern.ll new file mode 100644 index 000000000000..1096d3896fda --- /dev/null +++ b/llvm/test/Analysis/BasicAA/memset_pattern.ll @@ -0,0 +1,21 @@ +; RUN: opt -S -basic-aa -gvn < %s | FileCheck %s +; PR10872 +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-macosx10.7" + +@z = internal global i32 0, align 4 +@y = internal global i32 0, align 4 +@x = internal constant i32 0, align 4 + +; CHECK: @test +define i32 @test() nounwind uwtable ssp { +entry: + store i32 1, i32* @z + tail call void @memset_pattern16(i8* bitcast (i32* @y to i8*), i8* bitcast (i32* @x to i8*), i64 4) nounwind +; CHECK-NOT: load + %l = load i32, i32* @z +; CHECK: ret i32 1 + ret i32 %l +} + +declare void @memset_pattern16(i8*, i8* readonly, i64) argmemonly diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll new file mode 100644 index 000000000000..5b7a7bdbadfe --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll @@ -0,0 +1,204 @@ +; RUN: opt -loop-vectorize -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=DISABLED_MASKED_STRIDED +; RUN: opt -loop-vectorize -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=ENABLED_MASKED_STRIDED +; REQUIRES: asserts + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; (1) Interleave-group with factor 4, storing only 2 members out of the 4. +; Check that when we allow masked-memops to support interleave-group with gaps, +; the store is vectorized using a wide masked store, with a 1,1,0,0,1,1,0,0,... mask. +; Check that when we don't allow masked-memops to support interleave-group with gaps, +; the store is scalarized. +; The input IR was generated from this source: +; for(i=0;i<1024;i++){ +; points[i*4] = x[i]; +; points[i*4 + 1] = y[i]; +; } +; (relates to the testcase in PR50566) + +; DISABLED_MASKED_STRIDED: LV: Checking a loop in "test1" +; +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 54 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 54 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 110 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 110 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 228 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 228 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 + +; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test1" +; +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 15 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 21 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 36 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 73 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 + +define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv + %0 = load i16, i16* %arrayidx, align 2 + %1 = shl nuw nsw i64 %indvars.iv, 2 + %arrayidx2 = getelementptr inbounds i16, i16* %points, i64 %1 + store i16 %0, i16* %arrayidx2, align 2 + %arrayidx4 = getelementptr inbounds i16, i16* %y, i64 %indvars.iv + %2 = load i16, i16* %arrayidx4, align 2 + %3 = or i64 %1, 1 + %arrayidx7 = getelementptr inbounds i16, i16* %points, i64 %3 + store i16 %2, i16* %arrayidx7, align 2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} + +; (2) Same as above, but this time the gaps mask of the store is also And-ed with the +; fold-tail mask. If using masked memops to vectorize interleaved-group with gaps is +; not allowed, the store is scalarized and predicated. +; The input IR was generated from this source: +; for(i=0;i 0). +; If using masked memops to vectorize interleaved-group with gaps is +; not allowed, the store is scalarized and predicated. +; Here the Interleave-group is with factor 3, storing only 1 member out of the 3. +; The input IR was generated from this source: +; for(i=0;i<1024;i++){ +; if (x[i] > 0) +; points[i*3] = x[i]; +; } + +; DISABLED_MASKED_STRIDED: LV: Checking a loop in "test" +; +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2 + +; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test" +; +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2 + +define void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] + %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv + %0 = load i16, i16* %arrayidx, align 2 + %cmp1 = icmp sgt i16 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + %1 = mul nuw nsw i64 %indvars.iv, 3 + %arrayidx6 = getelementptr inbounds i16, i16* %points, i64 %1 + store i16 %0, i16* %arrayidx6, align 2 + br label %for.inc + +for.inc: + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/llvm/test/Analysis/LoopAccessAnalysis/gep-induction-operand-typesize-warning.ll b/llvm/test/Analysis/LoopAccessAnalysis/gep-induction-operand-typesize-warning.ll new file mode 100644 index 000000000000..35e4935d3ee2 --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/gep-induction-operand-typesize-warning.ll @@ -0,0 +1,21 @@ +; RUN: opt -loop-load-elim -mtriple=aarch64--linux-gnu -mattr=+sve < %s + +; This regression test is verifying that a GEP instruction performed on a +; scalable vector does not produce a 'assumption that TypeSize is not scalable' +; warning in the llvm::getGEPInductionOperand function. + +define void @get_gep_induction_operand_typesize_warning(i64 %n, * %a) { +entry: + br label %loop.body + +loop.body: + %0 = phi i64 [ 0, %entry ], [ %1, %loop.body ] + %idx = getelementptr , * %a, i64 %0 + store zeroinitializer, * %idx + %1 = add i64 %0, 1 + %2 = icmp eq i64 %1, %n + br i1 %2, label %loop.end, label %loop.body + +loop.end: + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll index 27215568482b..355158783b52 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll @@ -17,15 +17,17 @@ ; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 -; GFX9-DAG: s_load_dword s[[PTR:[0-9]+]], s[4:5], 0x0{{$}} +; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}} ; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16) -; GFX9-DAG: s_lshl_b32 s[[SSRC_SHARED_BASE:[0-9]+]], [[SSRC_SHARED]], 16 +; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16 +; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_SHARED_BASE]] ; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base -; GFX9: s_cmp_lg_u32 s[[PTR]], -1 -; GFX9: s_cselect_b64 s{{\[}}[[SEL_LO:[0-9]+]]:[[SEL_HI:[0-9]+]]{{\]}}, s{{\[}}[[PTR]]:[[SSRC_SHARED_BASE]]{{\]}}, 0 -; GFX9-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s[[SEL_LO]] -; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s[[SEL_HI]] +; GFX9: s_cmp_lg_u32 [[PTR]], -1 +; GFX9: s_cselect_b64 vcc, -1, 0 +; GFX9: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc +; GFX9-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]] @@ -82,17 +84,19 @@ define void @use_group_to_flat_addrspacecast_func(i32 addrspace(3)* %ptr) #0 { ; CI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] -; GFX9-DAG: s_load_dword s[[PTR:[0-9]+]], s[4:5], 0x0{{$}} +; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}} ; GFX9-DAG: s_getreg_b32 [[SSRC_PRIVATE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16) -; GFX9-DAG: s_lshl_b32 s[[SSRC_PRIVATE_BASE:[0-9]+]], [[SSRC_PRIVATE]], 16 +; GFX9-DAG: s_lshl_b32 [[SSRC_PRIVATE_BASE:s[0-9]+]], [[SSRC_PRIVATE]], 16 +; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_PRIVATE_BASE]] ; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base ; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 -; GFX9: s_cmp_lg_u32 s[[PTR]], -1 -; GFX9: s_cselect_b64 s{{\[}}[[SEL_LO:[0-9]+]]:[[SEL_HI:[0-9]+]]{{\]}}, s{{\[}}[[PTR]]:[[SSRC_PRIVATE_BASE]]{{\]}}, 0 -; GFX9-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s[[SEL_LO]] -; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s[[SEL_HI]] +; GFX9: s_cmp_lg_u32 [[PTR]], -1 +; GFX9: s_cselect_b64 vcc, -1, 0 +; GFX9: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc +; GFX9: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]] diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll index a8cefd4e50cf..7cd0add30ccc 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -2,7 +2,6 @@ ; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -amdgpu-codegenprepare -amdgpu-bypass-slow-div=0 %s | FileCheck %s ; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -amdgpu-bypass-slow-div=0 < %s | FileCheck -check-prefix=GFX6 %s ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -amdgpu-bypass-slow-div=0 < %s | FileCheck -check-prefix=GFX9 %s -; RUN: llc -mtriple=amdgcn-- -mcpu=gfx90a -amdgpu-bypass-slow-div=0 < %s | FileCheck -check-prefix=GFX90A %s define amdgpu_kernel void @udiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; CHECK-LABEL: @udiv_i32( @@ -95,34 +94,6 @@ define amdgpu_kernel void @udiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GFX9-NEXT: global_store_dword v2, v0, s[0:1] ; GFX9-NEXT: s_endpgm -; -; GFX90A-LABEL: udiv_i32: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c -; GFX90A-NEXT: v_mov_b32_e32 v1, 0 -; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX90A-NEXT: s_sub_i32 s4, 0, s3 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX90A-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX90A-NEXT: v_mul_lo_u32 v2, s4, v0 -; GFX90A-NEXT: v_mul_hi_u32 v2, v0, v2 -; GFX90A-NEXT: v_add_u32_e32 v0, v0, v2 -; GFX90A-NEXT: v_mul_hi_u32 v0, s2, v0 -; GFX90A-NEXT: v_mul_lo_u32 v2, v0, s3 -; GFX90A-NEXT: v_sub_u32_e32 v2, s2, v2 -; GFX90A-NEXT: v_add_u32_e32 v3, 1, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v3, s3, v2 -; GFX90A-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc -; GFX90A-NEXT: v_add_u32_e32 v3, 1, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX90A-NEXT: global_store_dword v1, v0, s[0:1] -; GFX90A-NEXT: s_endpgm %r = udiv i32 %x, %y store i32 %r, i32 addrspace(1)* %out ret void @@ -213,32 +184,6 @@ define amdgpu_kernel void @urem_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm -; -; GFX90A-LABEL: urem_i32: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c -; GFX90A-NEXT: v_mov_b32_e32 v1, 0 -; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX90A-NEXT: s_sub_i32 s4, 0, s3 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX90A-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX90A-NEXT: v_mul_lo_u32 v2, s4, v0 -; GFX90A-NEXT: v_mul_hi_u32 v2, v0, v2 -; GFX90A-NEXT: v_add_u32_e32 v0, v0, v2 -; GFX90A-NEXT: v_mul_hi_u32 v0, s2, v0 -; GFX90A-NEXT: v_mul_lo_u32 v0, v0, s3 -; GFX90A-NEXT: v_sub_u32_e32 v0, s2, v0 -; GFX90A-NEXT: v_subrev_u32_e32 v2, s3, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v2, s3, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX90A-NEXT: global_store_dword v1, v0, s[0:1] -; GFX90A-NEXT: s_endpgm %r = urem i32 %x, %y store i32 %r, i32 addrspace(1)* %out ret void @@ -362,43 +307,6 @@ define amdgpu_kernel void @sdiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; GFX9-NEXT: v_subrev_u32_e32 v0, s4, v0 ; GFX9-NEXT: global_store_dword v2, v0, s[0:1] ; GFX9-NEXT: s_endpgm -; -; GFX90A-LABEL: sdiv_i32: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c -; GFX90A-NEXT: v_mov_b32_e32 v1, 0 -; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: s_ashr_i32 s4, s3, 31 -; GFX90A-NEXT: s_add_i32 s3, s3, s4 -; GFX90A-NEXT: s_xor_b32 s3, s3, s4 -; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX90A-NEXT: s_ashr_i32 s5, s2, 31 -; GFX90A-NEXT: s_add_i32 s2, s2, s5 -; GFX90A-NEXT: s_xor_b32 s4, s5, s4 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX90A-NEXT: s_xor_b32 s2, s2, s5 -; GFX90A-NEXT: s_sub_i32 s5, 0, s3 -; GFX90A-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX90A-NEXT: v_mul_lo_u32 v2, s5, v0 -; GFX90A-NEXT: v_mul_hi_u32 v2, v0, v2 -; GFX90A-NEXT: v_add_u32_e32 v0, v0, v2 -; GFX90A-NEXT: v_mul_hi_u32 v0, s2, v0 -; GFX90A-NEXT: v_mul_lo_u32 v2, v0, s3 -; GFX90A-NEXT: v_sub_u32_e32 v2, s2, v2 -; GFX90A-NEXT: v_add_u32_e32 v3, 1, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v3, s3, v2 -; GFX90A-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc -; GFX90A-NEXT: v_add_u32_e32 v3, 1, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX90A-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX90A-NEXT: v_subrev_u32_e32 v0, s4, v0 -; GFX90A-NEXT: global_store_dword v1, v0, s[0:1] -; GFX90A-NEXT: s_endpgm %r = sdiv i32 %x, %y store i32 %r, i32 addrspace(1)* %out ret void @@ -513,40 +421,6 @@ define amdgpu_kernel void @srem_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm -; -; GFX90A-LABEL: srem_i32: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c -; GFX90A-NEXT: v_mov_b32_e32 v1, 0 -; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: s_ashr_i32 s4, s3, 31 -; GFX90A-NEXT: s_add_i32 s3, s3, s4 -; GFX90A-NEXT: s_xor_b32 s3, s3, s4 -; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX90A-NEXT: s_sub_i32 s5, 0, s3 -; GFX90A-NEXT: s_ashr_i32 s4, s2, 31 -; GFX90A-NEXT: s_add_i32 s2, s2, s4 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX90A-NEXT: s_xor_b32 s2, s2, s4 -; GFX90A-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX90A-NEXT: v_mul_lo_u32 v2, s5, v0 -; GFX90A-NEXT: v_mul_hi_u32 v2, v0, v2 -; GFX90A-NEXT: v_add_u32_e32 v0, v0, v2 -; GFX90A-NEXT: v_mul_hi_u32 v0, s2, v0 -; GFX90A-NEXT: v_mul_lo_u32 v0, v0, s3 -; GFX90A-NEXT: v_sub_u32_e32 v0, s2, v0 -; GFX90A-NEXT: v_subrev_u32_e32 v2, s3, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v2, s3, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX90A-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX90A-NEXT: v_subrev_u32_e32 v0, s4, v0 -; GFX90A-NEXT: global_store_dword v1, v0, s[0:1] -; GFX90A-NEXT: s_endpgm %r = srem i32 %x, %y store i32 %r, i32 addrspace(1)* %out ret void @@ -614,26 +488,6 @@ define amdgpu_kernel void @udiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v4, vcc ; GFX9-NEXT: global_store_short v3, v0, s[0:1] ; GFX9-NEXT: s_endpgm -; -; GFX90A-LABEL: udiv_i16: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_load_dword s2, s[0:1], 0x2c -; GFX90A-NEXT: v_mov_b32_e32 v3, 0 -; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: s_lshr_b32 s3, s2, 16 -; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX90A-NEXT: s_and_b32 s2, s2, 0xffff -; GFX90A-NEXT: v_cvt_f32_u32_e32 v1, s2 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v2, v0 -; GFX90A-NEXT: v_mul_f32_e32 v2, v1, v2 -; GFX90A-NEXT: v_trunc_f32_e32 v2, v2 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v4, v2 -; GFX90A-NEXT: v_mad_f32 v1, -v2, v0, v1 -; GFX90A-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 -; GFX90A-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v4, vcc -; GFX90A-NEXT: global_store_short v3, v0, s[0:1] -; GFX90A-NEXT: s_endpgm %r = udiv i16 %x, %y store i16 %r, i16 addrspace(1)* %out ret void @@ -708,28 +562,6 @@ define amdgpu_kernel void @urem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_short v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm -; -; GFX90A-LABEL: urem_i16: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_load_dword s2, s[0:1], 0x2c -; GFX90A-NEXT: v_mov_b32_e32 v3, 0 -; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: s_lshr_b32 s3, s2, 16 -; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX90A-NEXT: s_and_b32 s4, s2, 0xffff -; GFX90A-NEXT: v_cvt_f32_u32_e32 v1, s4 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v2, v0 -; GFX90A-NEXT: v_mul_f32_e32 v2, v1, v2 -; GFX90A-NEXT: v_trunc_f32_e32 v2, v2 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v4, v2 -; GFX90A-NEXT: v_mad_f32 v1, -v2, v0, v1 -; GFX90A-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 -; GFX90A-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v4, vcc -; GFX90A-NEXT: v_mul_lo_u32 v0, v0, s3 -; GFX90A-NEXT: v_sub_u32_e32 v0, s2, v0 -; GFX90A-NEXT: global_store_short v3, v0, s[0:1] -; GFX90A-NEXT: s_endpgm %r = urem i16 %x, %y store i16 %r, i16 addrspace(1)* %out ret void @@ -811,6 +643,7 @@ define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; GFX9-NEXT: v_add_u32_e32 v0, s0, v3 ; GFX9-NEXT: global_store_short v1, v0, s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_i16: ; GFX90A: ; %bb.0: @@ -836,6 +669,8 @@ define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; GFX90A-NEXT: v_add_u32_e32 v0, s0, v3 ; GFX90A-NEXT: global_store_short v1, v0, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv i16 %x, %y store i16 %r, i16 addrspace(1)* %out ret void @@ -924,6 +759,7 @@ define amdgpu_kernel void @srem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_short v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: srem_i16: ; GFX90A: ; %bb.0: @@ -951,6 +787,8 @@ define amdgpu_kernel void @srem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; GFX90A-NEXT: v_sub_u32_e32 v0, s4, v0 ; GFX90A-NEXT: global_store_short v1, v0, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem i16 %x, %y store i16 %r, i16 addrspace(1)* %out ret void @@ -1014,24 +852,6 @@ define amdgpu_kernel void @udiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v4, vcc ; GFX9-NEXT: global_store_byte v2, v0, s[0:1] ; GFX9-NEXT: s_endpgm -; -; GFX90A-LABEL: udiv_i8: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_load_dword s2, s[0:1], 0x2c -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 -; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_cvt_f32_ubyte1_e32 v0, s2 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v1, v0 -; GFX90A-NEXT: v_cvt_f32_ubyte0_e32 v3, s2 -; GFX90A-NEXT: v_mul_f32_e32 v1, v3, v1 -; GFX90A-NEXT: v_trunc_f32_e32 v1, v1 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v4, v1 -; GFX90A-NEXT: v_mad_f32 v1, -v1, v0, v3 -; GFX90A-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 -; GFX90A-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v4, vcc -; GFX90A-NEXT: global_store_byte v2, v0, s[0:1] -; GFX90A-NEXT: s_endpgm %r = udiv i8 %x, %y store i8 %r, i8 addrspace(1)* %out ret void @@ -1104,6 +924,7 @@ define amdgpu_kernel void @urem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_byte v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: urem_i8: ; GFX90A: ; %bb.0: @@ -1125,6 +946,8 @@ define amdgpu_kernel void @urem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX90A-NEXT: v_sub_u32_e32 v0, s4, v0 ; GFX90A-NEXT: global_store_byte v2, v0, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem i8 %x, %y store i8 %r, i8 addrspace(1)* %out ret void @@ -1206,6 +1029,7 @@ define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX9-NEXT: v_add_u32_e32 v0, s0, v3 ; GFX9-NEXT: global_store_byte v1, v0, s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_i8: ; GFX90A: ; %bb.0: @@ -1231,6 +1055,8 @@ define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX90A-NEXT: v_add_u32_e32 v0, s0, v3 ; GFX90A-NEXT: global_store_byte v1, v0, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv i8 %x, %y store i8 %r, i8 addrspace(1)* %out ret void @@ -1320,6 +1146,7 @@ define amdgpu_kernel void @srem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0 ; GFX9-NEXT: global_store_byte v1, v0, s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: srem_i8: ; GFX90A: ; %bb.0: @@ -1348,6 +1175,8 @@ define amdgpu_kernel void @srem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX90A-NEXT: v_sub_u32_e32 v1, s4, v1 ; GFX90A-NEXT: global_store_byte v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem i8 %x, %y store i8 %r, i8 addrspace(1)* %out ret void @@ -1658,6 +1487,7 @@ define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_v4i32: ; GFX90A: ; %bb.0: @@ -1744,6 +1574,8 @@ define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX90A-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv <4 x i32> %x, %y store <4 x i32> %r, <4 x i32> addrspace(1)* %out ret void @@ -2030,84 +1862,6 @@ define amdgpu_kernel void @urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX9-NEXT: s_endpgm -; -; GFX90A-LABEL: urem_v4i32: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; GFX90A-NEXT: s_mov_b32 s12, 0x4f7ffffe -; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX90A-NEXT: v_mov_b32_e32 v4, 0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s8 -; GFX90A-NEXT: s_sub_i32 s2, 0, s8 -; GFX90A-NEXT: v_cvt_f32_u32_e32 v1, s9 -; GFX90A-NEXT: s_sub_i32 s3, 0, s9 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GFX90A-NEXT: v_mul_f32_e32 v0, s12, v0 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX90A-NEXT: v_mul_f32_e32 v1, s12, v1 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX90A-NEXT: v_mul_lo_u32 v2, s2, v0 -; GFX90A-NEXT: v_mul_hi_u32 v2, v0, v2 -; GFX90A-NEXT: v_add_u32_e32 v0, v0, v2 -; GFX90A-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX90A-NEXT: v_mul_lo_u32 v0, v0, s8 -; GFX90A-NEXT: v_sub_u32_e32 v0, s4, v0 -; GFX90A-NEXT: v_subrev_u32_e32 v2, s8, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v2, s8, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX90A-NEXT: v_cvt_f32_u32_e32 v2, s10 -; GFX90A-NEXT: v_mul_lo_u32 v3, s3, v1 -; GFX90A-NEXT: v_mul_hi_u32 v3, v1, v3 -; GFX90A-NEXT: v_add_u32_e32 v1, v1, v3 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GFX90A-NEXT: v_mul_hi_u32 v1, s5, v1 -; GFX90A-NEXT: v_mul_lo_u32 v1, v1, s9 -; GFX90A-NEXT: v_sub_u32_e32 v1, s5, v1 -; GFX90A-NEXT: v_mul_f32_e32 v2, s12, v2 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GFX90A-NEXT: v_subrev_u32_e32 v3, s9, v1 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 -; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v3, s9, v1 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 -; GFX90A-NEXT: s_sub_i32 s2, 0, s10 -; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX90A-NEXT: v_mul_lo_u32 v3, s2, v2 -; GFX90A-NEXT: v_mul_hi_u32 v3, v2, v3 -; GFX90A-NEXT: v_add_u32_e32 v2, v2, v3 -; GFX90A-NEXT: v_cvt_f32_u32_e32 v3, s11 -; GFX90A-NEXT: v_mul_hi_u32 v2, s6, v2 -; GFX90A-NEXT: v_mul_lo_u32 v2, v2, s10 -; GFX90A-NEXT: v_sub_u32_e32 v2, s6, v2 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; GFX90A-NEXT: v_subrev_u32_e32 v5, s10, v2 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s10, v2 -; GFX90A-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc -; GFX90A-NEXT: v_mul_f32_e32 v3, s12, v3 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v3, v3 -; GFX90A-NEXT: v_subrev_u32_e32 v5, s10, v2 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s10, v2 -; GFX90A-NEXT: s_sub_i32 s2, 0, s11 -; GFX90A-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc -; GFX90A-NEXT: v_mul_lo_u32 v5, s2, v3 -; GFX90A-NEXT: v_mul_hi_u32 v5, v3, v5 -; GFX90A-NEXT: v_add_u32_e32 v3, v3, v5 -; GFX90A-NEXT: v_mul_hi_u32 v3, s7, v3 -; GFX90A-NEXT: v_mul_lo_u32 v3, v3, s11 -; GFX90A-NEXT: v_sub_u32_e32 v3, s7, v3 -; GFX90A-NEXT: v_subrev_u32_e32 v5, s11, v3 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 -; GFX90A-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v5, s11, v3 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 -; GFX90A-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] -; GFX90A-NEXT: s_endpgm %r = urem <4 x i32> %x, %y store <4 x i32> %r, <4 x i32> addrspace(1)* %out ret void @@ -2526,128 +2280,6 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX9-NEXT: v_subrev_u32_e32 v3, s2, v3 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX9-NEXT: s_endpgm -; -; GFX90A-LABEL: sdiv_v4i32: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; GFX90A-NEXT: s_mov_b32 s13, 0x4f7ffffe -; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX90A-NEXT: v_mov_b32_e32 v4, 0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: s_ashr_i32 s2, s8, 31 -; GFX90A-NEXT: s_add_i32 s3, s8, s2 -; GFX90A-NEXT: s_xor_b32 s3, s3, s2 -; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX90A-NEXT: s_ashr_i32 s8, s4, 31 -; GFX90A-NEXT: s_add_i32 s4, s4, s8 -; GFX90A-NEXT: s_xor_b32 s2, s8, s2 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX90A-NEXT: s_xor_b32 s4, s4, s8 -; GFX90A-NEXT: s_sub_i32 s8, 0, s3 -; GFX90A-NEXT: s_ashr_i32 s12, s9, 31 -; GFX90A-NEXT: v_mul_f32_e32 v0, s13, v0 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX90A-NEXT: v_mul_lo_u32 v1, s8, v0 -; GFX90A-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX90A-NEXT: v_add_u32_e32 v0, v0, v1 -; GFX90A-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX90A-NEXT: v_mul_lo_u32 v1, v0, s3 -; GFX90A-NEXT: v_sub_u32_e32 v1, s4, v1 -; GFX90A-NEXT: s_add_i32 s4, s9, s12 -; GFX90A-NEXT: s_xor_b32 s4, s4, s12 -; GFX90A-NEXT: v_cvt_f32_u32_e32 v3, s4 -; GFX90A-NEXT: v_add_u32_e32 v2, 1, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v2, s3, v1 -; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v1, v3 -; GFX90A-NEXT: v_add_u32_e32 v2, 1, v0 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX90A-NEXT: v_xor_b32_e32 v0, s2, v0 -; GFX90A-NEXT: v_mul_f32_e32 v1, s13, v1 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX90A-NEXT: v_subrev_u32_e32 v0, s2, v0 -; GFX90A-NEXT: s_ashr_i32 s2, s5, 31 -; GFX90A-NEXT: s_add_i32 s5, s5, s2 -; GFX90A-NEXT: s_xor_b32 s3, s2, s12 -; GFX90A-NEXT: s_xor_b32 s2, s5, s2 -; GFX90A-NEXT: s_sub_i32 s5, 0, s4 -; GFX90A-NEXT: v_mul_lo_u32 v2, s5, v1 -; GFX90A-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX90A-NEXT: v_add_u32_e32 v1, v1, v2 -; GFX90A-NEXT: v_mul_hi_u32 v1, s2, v1 -; GFX90A-NEXT: v_mul_lo_u32 v2, v1, s4 -; GFX90A-NEXT: v_sub_u32_e32 v2, s2, v2 -; GFX90A-NEXT: s_ashr_i32 s2, s10, 31 -; GFX90A-NEXT: s_add_i32 s5, s10, s2 -; GFX90A-NEXT: s_xor_b32 s5, s5, s2 -; GFX90A-NEXT: v_cvt_f32_u32_e32 v5, s5 -; GFX90A-NEXT: v_add_u32_e32 v3, 1, v1 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s4, v2 -; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v3, s4, v2 -; GFX90A-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s4, v2 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v2, v5 -; GFX90A-NEXT: v_add_u32_e32 v3, 1, v1 -; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX90A-NEXT: v_xor_b32_e32 v1, s3, v1 -; GFX90A-NEXT: v_mul_f32_e32 v2, s13, v2 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GFX90A-NEXT: v_subrev_u32_e32 v1, s3, v1 -; GFX90A-NEXT: s_ashr_i32 s3, s6, 31 -; GFX90A-NEXT: s_add_i32 s4, s6, s3 -; GFX90A-NEXT: s_xor_b32 s2, s3, s2 -; GFX90A-NEXT: s_xor_b32 s3, s4, s3 -; GFX90A-NEXT: s_sub_i32 s4, 0, s5 -; GFX90A-NEXT: v_mul_lo_u32 v3, s4, v2 -; GFX90A-NEXT: v_mul_hi_u32 v3, v2, v3 -; GFX90A-NEXT: v_add_u32_e32 v2, v2, v3 -; GFX90A-NEXT: v_mul_hi_u32 v2, s3, v2 -; GFX90A-NEXT: v_mul_lo_u32 v3, v2, s5 -; GFX90A-NEXT: v_sub_u32_e32 v3, s3, v3 -; GFX90A-NEXT: s_ashr_i32 s3, s11, 31 -; GFX90A-NEXT: s_add_i32 s4, s11, s3 -; GFX90A-NEXT: s_xor_b32 s4, s4, s3 -; GFX90A-NEXT: v_cvt_f32_u32_e32 v6, s4 -; GFX90A-NEXT: v_add_u32_e32 v5, 1, v2 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s5, v3 -; GFX90A-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v5, s5, v3 -; GFX90A-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s5, v3 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v3, v6 -; GFX90A-NEXT: v_add_u32_e32 v5, 1, v2 -; GFX90A-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc -; GFX90A-NEXT: v_xor_b32_e32 v2, s2, v2 -; GFX90A-NEXT: v_mul_f32_e32 v3, s13, v3 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v3, v3 -; GFX90A-NEXT: v_subrev_u32_e32 v2, s2, v2 -; GFX90A-NEXT: s_ashr_i32 s2, s7, 31 -; GFX90A-NEXT: s_add_i32 s5, s7, s2 -; GFX90A-NEXT: s_xor_b32 s3, s2, s3 -; GFX90A-NEXT: s_xor_b32 s2, s5, s2 -; GFX90A-NEXT: s_sub_i32 s5, 0, s4 -; GFX90A-NEXT: v_mul_lo_u32 v5, s5, v3 -; GFX90A-NEXT: v_mul_hi_u32 v5, v3, v5 -; GFX90A-NEXT: v_add_u32_e32 v3, v3, v5 -; GFX90A-NEXT: v_mul_hi_u32 v3, s2, v3 -; GFX90A-NEXT: v_mul_lo_u32 v5, v3, s4 -; GFX90A-NEXT: v_sub_u32_e32 v5, s2, v5 -; GFX90A-NEXT: v_add_u32_e32 v6, 1, v3 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s4, v5 -; GFX90A-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v6, s4, v5 -; GFX90A-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc -; GFX90A-NEXT: v_add_u32_e32 v6, 1, v3 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s4, v5 -; GFX90A-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc -; GFX90A-NEXT: v_xor_b32_e32 v3, s3, v3 -; GFX90A-NEXT: v_subrev_u32_e32 v3, s3, v3 -; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] -; GFX90A-NEXT: s_endpgm %r = sdiv <4 x i32> %x, %y store <4 x i32> %r, <4 x i32> addrspace(1)* %out ret void @@ -3030,6 +2662,7 @@ define amdgpu_kernel void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX9-NEXT: v_subrev_u32_e32 v3, s5, v3 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: srem_v4i32: ; GFX90A: ; %bb.0: @@ -3140,6 +2773,8 @@ define amdgpu_kernel void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX90A-NEXT: v_subrev_u32_e32 v3, s2, v3 ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem <4 x i32> %x, %y store <4 x i32> %r, <4 x i32> addrspace(1)* %out ret void @@ -3349,6 +2984,7 @@ define amdgpu_kernel void @udiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX9-NEXT: v_lshl_or_b32 v0, v3, 16, v0 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_v4i16: ; GFX90A: ; %bb.0: @@ -3408,6 +3044,8 @@ define amdgpu_kernel void @udiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX90A-NEXT: v_lshl_or_b32 v0, v3, 16, v0 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv <4 x i16> %x, %y store <4 x i16> %r, <4 x i16> addrspace(1)* %out ret void @@ -3641,6 +3279,7 @@ define amdgpu_kernel void @urem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX9-NEXT: v_lshl_or_b32 v0, v5, 16, v0 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: urem_v4i16: ; GFX90A: ; %bb.0: @@ -3708,6 +3347,8 @@ define amdgpu_kernel void @urem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX90A-NEXT: v_lshl_or_b32 v0, v3, 16, v0 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem <4 x i16> %x, %y store <4 x i16> %r, <4 x i16> addrspace(1)* %out ret void @@ -3972,6 +3613,7 @@ define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX9-NEXT: v_lshl_or_b32 v0, v4, 16, v0 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_v4i16: ; GFX90A: ; %bb.0: @@ -4050,6 +3692,8 @@ define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX90A-NEXT: v_lshl_or_b32 v0, v4, 16, v0 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv <4 x i16> %x, %y store <4 x i16> %r, <4 x i16> addrspace(1)* %out ret void @@ -4338,6 +3982,7 @@ define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v3 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: srem_v4i16: ; GFX90A: ; %bb.0: @@ -4424,6 +4069,8 @@ define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX90A-NEXT: v_lshl_or_b32 v0, v4, 16, v0 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem <4 x i16> %x, %y store <4 x i16> %r, <4 x i16> addrspace(1)* %out ret void @@ -4493,6 +4140,7 @@ define amdgpu_kernel void @udiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX9-NEXT: v_and_b32_e32 v0, 7, v0 ; GFX9-NEXT: global_store_byte v2, v0, s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_i3: ; GFX90A: ; %bb.0: @@ -4514,6 +4162,8 @@ define amdgpu_kernel void @udiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX90A-NEXT: v_and_b32_e32 v0, 7, v0 ; GFX90A-NEXT: global_store_byte v2, v0, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv i3 %x, %y store i3 %r, i3 addrspace(1)* %out ret void @@ -4592,6 +4242,7 @@ define amdgpu_kernel void @urem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_byte v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: urem_i3: ; GFX90A: ; %bb.0: @@ -4616,6 +4267,8 @@ define amdgpu_kernel void @urem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX90A-NEXT: v_and_b32_e32 v1, 7, v1 ; GFX90A-NEXT: global_store_byte v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem i3 %x, %y store i3 %r, i3 addrspace(1)* %out ret void @@ -4699,6 +4352,7 @@ define amdgpu_kernel void @sdiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX9-NEXT: v_and_b32_e32 v0, 7, v0 ; GFX9-NEXT: global_store_byte v1, v0, s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_i3: ; GFX90A: ; %bb.0: @@ -4725,6 +4379,8 @@ define amdgpu_kernel void @sdiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX90A-NEXT: v_and_b32_e32 v0, 7, v0 ; GFX90A-NEXT: global_store_byte v1, v0, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv i3 %x, %y store i3 %r, i3 addrspace(1)* %out ret void @@ -4817,6 +4473,7 @@ define amdgpu_kernel void @srem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_byte v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: srem_i3: ; GFX90A: ; %bb.0: @@ -4846,6 +4503,8 @@ define amdgpu_kernel void @srem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX90A-NEXT: v_and_b32_e32 v1, 7, v1 ; GFX90A-NEXT: global_store_byte v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem i3 %x, %y store i3 %r, i3 addrspace(1)* %out ret void @@ -5011,6 +4670,7 @@ define amdgpu_kernel void @udiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX9-NEXT: global_store_short v1, v3, s[4:5] offset:4 ; GFX9-NEXT: global_store_dword v1, v0, s[4:5] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_v3i16: ; GFX90A: ; %bb.0: @@ -5058,6 +4718,8 @@ define amdgpu_kernel void @udiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX90A-NEXT: global_store_short v1, v3, s[4:5] offset:4 ; GFX90A-NEXT: global_store_dword v1, v0, s[4:5] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv <3 x i16> %x, %y store <3 x i16> %r, <3 x i16> addrspace(1)* %out ret void @@ -5245,6 +4907,7 @@ define amdgpu_kernel void @urem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX9-NEXT: global_store_short v3, v2, s[6:7] offset:4 ; GFX9-NEXT: global_store_dword v3, v0, s[6:7] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: urem_v3i16: ; GFX90A: ; %bb.0: @@ -5298,6 +4961,8 @@ define amdgpu_kernel void @urem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX90A-NEXT: global_store_short v1, v3, s[4:5] offset:4 ; GFX90A-NEXT: global_store_dword v1, v0, s[4:5] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem <3 x i16> %x, %y store <3 x i16> %r, <3 x i16> addrspace(1)* %out ret void @@ -5503,6 +5168,7 @@ define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX9-NEXT: global_store_short v1, v0, s[6:7] offset:4 ; GFX9-NEXT: global_store_dword v1, v2, s[6:7] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_v3i16: ; GFX90A: ; %bb.0: @@ -5564,6 +5230,8 @@ define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX90A-NEXT: global_store_short v1, v0, s[6:7] offset:4 ; GFX90A-NEXT: global_store_dword v1, v2, s[6:7] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv <3 x i16> %x, %y store <3 x i16> %r, <3 x i16> addrspace(1)* %out ret void @@ -5791,6 +5459,7 @@ define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX9-NEXT: global_store_short v3, v2, s[0:1] offset:4 ; GFX9-NEXT: global_store_dword v3, v0, s[0:1] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: srem_v3i16: ; GFX90A: ; %bb.0: @@ -5858,6 +5527,8 @@ define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX90A-NEXT: global_store_short v1, v3, s[6:7] offset:4 ; GFX90A-NEXT: global_store_dword v1, v0, s[6:7] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem <3 x i16> %x, %y store <3 x i16> %r, <3 x i16> addrspace(1)* %out ret void @@ -6041,6 +5712,7 @@ define amdgpu_kernel void @udiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX9-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX9-NEXT: global_store_short v2, v0, s[4:5] offset:4 ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_v3i15: ; GFX90A: ; %bb.0: @@ -6097,6 +5769,8 @@ define amdgpu_kernel void @udiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX90A-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX90A-NEXT: global_store_short v2, v0, s[4:5] offset:4 ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv <3 x i15> %x, %y store <3 x i15> %r, <3 x i15> addrspace(1)* %out ret void @@ -6302,6 +5976,7 @@ define amdgpu_kernel void @urem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX9-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX9-NEXT: global_store_short v2, v0, s[4:5] offset:4 ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: urem_v3i15: ; GFX90A: ; %bb.0: @@ -6366,6 +6041,8 @@ define amdgpu_kernel void @urem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX90A-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX90A-NEXT: global_store_short v2, v0, s[4:5] offset:4 ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem <3 x i15> %x, %y store <3 x i15> %r, <3 x i15> addrspace(1)* %out ret void @@ -6589,6 +6266,7 @@ define amdgpu_kernel void @sdiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX9-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX9-NEXT: global_store_short v2, v0, s[6:7] offset:4 ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_v3i15: ; GFX90A: ; %bb.0: @@ -6659,6 +6337,8 @@ define amdgpu_kernel void @sdiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX90A-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX90A-NEXT: global_store_short v2, v0, s[6:7] offset:4 ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv <3 x i15> %x, %y store <3 x i15> %r, <3 x i15> addrspace(1)* %out ret void @@ -6916,6 +6596,7 @@ define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX9-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX9-NEXT: global_store_short v4, v0, s[4:5] offset:4 ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: srem_v3i15: ; GFX90A: ; %bb.0: @@ -7000,6 +6681,8 @@ define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX90A-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX90A-NEXT: global_store_short v2, v0, s[4:5] offset:4 ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem <3 x i15> %x, %y store <3 x i15> %r, <3 x i15> addrspace(1)* %out ret void @@ -7041,6 +6724,7 @@ define amdgpu_kernel void @udiv_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_i32_oddk_denom: ; GFX90A: ; %bb.0: @@ -7056,6 +6740,8 @@ define amdgpu_kernel void @udiv_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv i32 %x, 1235195 store i32 %r, i32 addrspace(1)* %out ret void @@ -7089,6 +6775,7 @@ define amdgpu_kernel void @udiv_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_i32_pow2k_denom: ; GFX90A: ; %bb.0: @@ -7100,6 +6787,8 @@ define amdgpu_kernel void @udiv_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv i32 %x, 4096 store i32 %r, i32 addrspace(1)* %out ret void @@ -7136,6 +6825,7 @@ define amdgpu_kernel void @udiv_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[4:5] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_i32_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -7148,6 +6838,8 @@ define amdgpu_kernel void @udiv_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl i32 4096, %y %r = udiv i32 %x, %shl.y store i32 %r, i32 addrspace(1)* %out @@ -7191,6 +6883,7 @@ define amdgpu_kernel void @udiv_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_v2i32_pow2k_denom: ; GFX90A: ; %bb.0: @@ -7204,6 +6897,8 @@ define amdgpu_kernel void @udiv_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX90A-NEXT: v_mov_b32_e32 v1, s1 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv <2 x i32> %x, store <2 x i32> %r, <2 x i32> addrspace(1)* %out ret void @@ -7254,6 +6949,7 @@ define amdgpu_kernel void @udiv_v2i32_mixed_pow2k_denom(<2 x i32> addrspace(1)* ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_v2i32_mixed_pow2k_denom: ; GFX90A: ; %bb.0: @@ -7271,6 +6967,8 @@ define amdgpu_kernel void @udiv_v2i32_mixed_pow2k_denom(<2 x i32> addrspace(1)* ; GFX90A-NEXT: v_mov_b32_e32 v1, s1 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv <2 x i32> %x, store <2 x i32> %r, <2 x i32> addrspace(1)* %out ret void @@ -7452,58 +7150,6 @@ define amdgpu_kernel void @udiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm -; -; GFX90A-LABEL: udiv_v2i32_pow2_shl_denom: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 -; GFX90A-NEXT: s_movk_i32 s8, 0x1000 -; GFX90A-NEXT: s_mov_b32 s9, 0x4f7ffffe -; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x2c -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: s_lshl_b32 s2, s8, s2 -; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s2 -; GFX90A-NEXT: s_lshl_b32 s0, s8, s3 -; GFX90A-NEXT: v_cvt_f32_u32_e32 v1, s0 -; GFX90A-NEXT: s_sub_i32 s1, 0, s2 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GFX90A-NEXT: v_mul_f32_e32 v0, s9, v0 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX90A-NEXT: v_mul_f32_e32 v1, s9, v1 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX90A-NEXT: v_mul_lo_u32 v3, s1, v0 -; GFX90A-NEXT: v_mul_hi_u32 v3, v0, v3 -; GFX90A-NEXT: v_add_u32_e32 v0, v0, v3 -; GFX90A-NEXT: v_mul_hi_u32 v0, s6, v0 -; GFX90A-NEXT: v_mul_lo_u32 v3, v0, s2 -; GFX90A-NEXT: v_sub_u32_e32 v3, s6, v3 -; GFX90A-NEXT: v_add_u32_e32 v4, 1, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s2, v3 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v4, s2, v3 -; GFX90A-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX90A-NEXT: s_sub_i32 s1, 0, s0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s2, v3 -; GFX90A-NEXT: v_mul_lo_u32 v3, s1, v1 -; GFX90A-NEXT: v_mul_hi_u32 v3, v1, v3 -; GFX90A-NEXT: v_add_u32_e32 v1, v1, v3 -; GFX90A-NEXT: v_mul_hi_u32 v1, s7, v1 -; GFX90A-NEXT: v_mul_lo_u32 v3, v1, s0 -; GFX90A-NEXT: v_add_u32_e32 v4, 1, v0 -; GFX90A-NEXT: v_sub_u32_e32 v3, s7, v3 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; GFX90A-NEXT: v_add_u32_e32 v4, 1, v1 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s0, v3 -; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v4, s0, v3 -; GFX90A-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX90A-NEXT: v_add_u32_e32 v4, 1, v1 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s0, v3 -; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] -; GFX90A-NEXT: s_endpgm %shl.y = shl <2 x i32> , %y %r = udiv <2 x i32> %x, %shl.y store <2 x i32> %r, <2 x i32> addrspace(1)* %out @@ -7551,6 +7197,7 @@ define amdgpu_kernel void @urem_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: urem_i32_oddk_denom: ; GFX90A: ; %bb.0: @@ -7568,6 +7215,8 @@ define amdgpu_kernel void @urem_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem i32 %x, 1235195 store i32 %r, i32 addrspace(1)* %out ret void @@ -7601,6 +7250,7 @@ define amdgpu_kernel void @urem_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: urem_i32_pow2k_denom: ; GFX90A: ; %bb.0: @@ -7612,6 +7262,8 @@ define amdgpu_kernel void @urem_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem i32 %x, 4096 store i32 %r, i32 addrspace(1)* %out ret void @@ -7650,6 +7302,7 @@ define amdgpu_kernel void @urem_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[4:5] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: urem_i32_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -7663,6 +7316,8 @@ define amdgpu_kernel void @urem_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl i32 4096, %y %r = urem i32 %x, %shl.y store i32 %r, i32 addrspace(1)* %out @@ -7708,6 +7363,7 @@ define amdgpu_kernel void @urem_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: urem_v2i32_pow2k_denom: ; GFX90A: ; %bb.0: @@ -7722,6 +7378,8 @@ define amdgpu_kernel void @urem_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem <2 x i32> %x, store <2 x i32> %r, <2 x i32> addrspace(1)* %out ret void @@ -7891,54 +7549,6 @@ define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm -; -; GFX90A-LABEL: urem_v2i32_pow2_shl_denom: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 -; GFX90A-NEXT: s_movk_i32 s8, 0x1000 -; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x2c -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: s_lshl_b32 s2, s8, s2 -; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s2 -; GFX90A-NEXT: s_lshl_b32 s0, s8, s3 -; GFX90A-NEXT: s_mov_b32 s3, 0x4f7ffffe -; GFX90A-NEXT: v_cvt_f32_u32_e32 v1, s0 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX90A-NEXT: s_sub_i32 s1, 0, s2 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GFX90A-NEXT: v_mul_f32_e32 v0, s3, v0 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX90A-NEXT: v_mul_f32_e32 v1, s3, v1 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX90A-NEXT: v_mul_lo_u32 v3, s1, v0 -; GFX90A-NEXT: v_mul_hi_u32 v3, v0, v3 -; GFX90A-NEXT: v_add_u32_e32 v0, v0, v3 -; GFX90A-NEXT: v_mul_hi_u32 v0, s6, v0 -; GFX90A-NEXT: v_mul_lo_u32 v0, v0, s2 -; GFX90A-NEXT: v_sub_u32_e32 v0, s6, v0 -; GFX90A-NEXT: v_subrev_u32_e32 v3, s2, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s2, v0 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v3, s2, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s2, v0 -; GFX90A-NEXT: s_sub_i32 s1, 0, s0 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX90A-NEXT: v_mul_lo_u32 v3, s1, v1 -; GFX90A-NEXT: v_mul_hi_u32 v3, v1, v3 -; GFX90A-NEXT: v_add_u32_e32 v1, v1, v3 -; GFX90A-NEXT: v_mul_hi_u32 v1, s7, v1 -; GFX90A-NEXT: v_mul_lo_u32 v1, v1, s0 -; GFX90A-NEXT: v_sub_u32_e32 v1, s7, v1 -; GFX90A-NEXT: v_subrev_u32_e32 v3, s0, v1 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s0, v1 -; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v3, s0, v1 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s0, v1 -; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] -; GFX90A-NEXT: s_endpgm %shl.y = shl <2 x i32> , %y %r = urem <2 x i32> %x, %shl.y store <2 x i32> %r, <2 x i32> addrspace(1)* %out @@ -7981,6 +7591,7 @@ define amdgpu_kernel void @sdiv_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_i32_oddk_denom: ; GFX90A: ; %bb.0: @@ -7996,6 +7607,8 @@ define amdgpu_kernel void @sdiv_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv i32 %x, 1235195 store i32 %r, i32 addrspace(1)* %out ret void @@ -8035,6 +7648,7 @@ define amdgpu_kernel void @sdiv_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_i32_pow2k_denom: ; GFX90A: ; %bb.0: @@ -8049,6 +7663,8 @@ define amdgpu_kernel void @sdiv_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv i32 %x, 4096 store i32 %r, i32 addrspace(1)* %out ret void @@ -8138,44 +7754,6 @@ define amdgpu_kernel void @sdiv_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX9-NEXT: v_subrev_u32_e32 v0, s2, v0 ; GFX9-NEXT: global_store_dword v2, v0, s[0:1] ; GFX9-NEXT: s_endpgm -; -; GFX90A-LABEL: sdiv_i32_pow2_shl_denom: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c -; GFX90A-NEXT: v_mov_b32_e32 v1, 0 -; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: s_lshl_b32 s3, 0x1000, s3 -; GFX90A-NEXT: s_ashr_i32 s4, s3, 31 -; GFX90A-NEXT: s_add_i32 s3, s3, s4 -; GFX90A-NEXT: s_xor_b32 s3, s3, s4 -; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX90A-NEXT: s_sub_i32 s6, 0, s3 -; GFX90A-NEXT: s_ashr_i32 s5, s2, 31 -; GFX90A-NEXT: s_add_i32 s2, s2, s5 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX90A-NEXT: s_xor_b32 s2, s2, s5 -; GFX90A-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX90A-NEXT: v_mul_lo_u32 v2, s6, v0 -; GFX90A-NEXT: v_mul_hi_u32 v2, v0, v2 -; GFX90A-NEXT: v_add_u32_e32 v0, v0, v2 -; GFX90A-NEXT: v_mul_hi_u32 v0, s2, v0 -; GFX90A-NEXT: v_mul_lo_u32 v3, v0, s3 -; GFX90A-NEXT: v_sub_u32_e32 v3, s2, v3 -; GFX90A-NEXT: v_add_u32_e32 v2, 1, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v3 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v2, s3, v3 -; GFX90A-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc -; GFX90A-NEXT: v_add_u32_e32 v4, 1, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; GFX90A-NEXT: s_xor_b32 s2, s5, s4 -; GFX90A-NEXT: v_xor_b32_e32 v0, s2, v0 -; GFX90A-NEXT: v_subrev_u32_e32 v0, s2, v0 -; GFX90A-NEXT: global_store_dword v1, v0, s[0:1] -; GFX90A-NEXT: s_endpgm %shl.y = shl i32 4096, %y %r = sdiv i32 %x, %shl.y store i32 %r, i32 addrspace(1)* %out @@ -8231,6 +7809,7 @@ define amdgpu_kernel void @sdiv_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_v2i32_pow2k_denom: ; GFX90A: ; %bb.0: @@ -8250,6 +7829,8 @@ define amdgpu_kernel void @sdiv_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX90A-NEXT: v_mov_b32_e32 v1, s1 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv <2 x i32> %x, store <2 x i32> %r, <2 x i32> addrspace(1)* %out ret void @@ -8306,6 +7887,7 @@ define amdgpu_kernel void @ssdiv_v2i32_mixed_pow2k_denom(<2 x i32> addrspace(1)* ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: ssdiv_v2i32_mixed_pow2k_denom: ; GFX90A: ; %bb.0: @@ -8326,6 +7908,8 @@ define amdgpu_kernel void @ssdiv_v2i32_mixed_pow2k_denom(<2 x i32> addrspace(1)* ; GFX90A-NEXT: v_mov_b32_e32 v1, s1 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv <2 x i32> %x, store <2 x i32> %r, <2 x i32> addrspace(1)* %out ret void @@ -8559,76 +8143,6 @@ define amdgpu_kernel void @sdiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; GFX9-NEXT: v_subrev_u32_e32 v1, s1, v1 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm -; -; GFX90A-LABEL: sdiv_v2i32_pow2_shl_denom: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 -; GFX90A-NEXT: s_movk_i32 s8, 0x1000 -; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x2c -; GFX90A-NEXT: s_mov_b32 s10, 0x4f7ffffe -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: s_lshl_b32 s2, s8, s2 -; GFX90A-NEXT: s_ashr_i32 s9, s2, 31 -; GFX90A-NEXT: s_add_i32 s2, s2, s9 -; GFX90A-NEXT: s_xor_b32 s2, s2, s9 -; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s2 -; GFX90A-NEXT: s_ashr_i32 s1, s6, 31 -; GFX90A-NEXT: s_lshl_b32 s0, s8, s3 -; GFX90A-NEXT: s_add_i32 s3, s6, s1 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX90A-NEXT: s_xor_b32 s6, s1, s9 -; GFX90A-NEXT: s_xor_b32 s1, s3, s1 -; GFX90A-NEXT: s_sub_i32 s3, 0, s2 -; GFX90A-NEXT: v_mul_f32_e32 v0, s10, v0 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX90A-NEXT: v_mul_lo_u32 v1, s3, v0 -; GFX90A-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX90A-NEXT: v_add_u32_e32 v0, v0, v1 -; GFX90A-NEXT: v_mul_hi_u32 v0, s1, v0 -; GFX90A-NEXT: v_mul_lo_u32 v1, v0, s2 -; GFX90A-NEXT: v_sub_u32_e32 v1, s1, v1 -; GFX90A-NEXT: s_ashr_i32 s1, s0, 31 -; GFX90A-NEXT: s_add_i32 s0, s0, s1 -; GFX90A-NEXT: s_xor_b32 s0, s0, s1 -; GFX90A-NEXT: v_cvt_f32_u32_e32 v4, s0 -; GFX90A-NEXT: v_add_u32_e32 v3, 1, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v3, s2, v1 -; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v1, v4 -; GFX90A-NEXT: s_ashr_i32 s2, s7, 31 -; GFX90A-NEXT: s_add_i32 s3, s7, s2 -; GFX90A-NEXT: v_add_u32_e32 v3, 1, v0 -; GFX90A-NEXT: v_mul_f32_e32 v1, s10, v1 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX90A-NEXT: s_xor_b32 s1, s2, s1 -; GFX90A-NEXT: s_xor_b32 s2, s3, s2 -; GFX90A-NEXT: s_sub_i32 s3, 0, s0 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX90A-NEXT: v_mul_lo_u32 v3, s3, v1 -; GFX90A-NEXT: v_mul_hi_u32 v3, v1, v3 -; GFX90A-NEXT: v_add_u32_e32 v1, v1, v3 -; GFX90A-NEXT: v_mul_hi_u32 v1, s2, v1 -; GFX90A-NEXT: v_mul_lo_u32 v3, v1, s0 -; GFX90A-NEXT: v_sub_u32_e32 v3, s2, v3 -; GFX90A-NEXT: v_add_u32_e32 v4, 1, v1 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s0, v3 -; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v4, s0, v3 -; GFX90A-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX90A-NEXT: v_add_u32_e32 v4, 1, v1 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s0, v3 -; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX90A-NEXT: v_xor_b32_e32 v0, s6, v0 -; GFX90A-NEXT: v_xor_b32_e32 v1, s1, v1 -; GFX90A-NEXT: v_subrev_u32_e32 v0, s6, v0 -; GFX90A-NEXT: v_subrev_u32_e32 v1, s1, v1 -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] -; GFX90A-NEXT: s_endpgm %shl.y = shl <2 x i32> , %y %r = sdiv <2 x i32> %x, %shl.y store <2 x i32> %r, <2 x i32> addrspace(1)* %out @@ -8676,6 +8190,7 @@ define amdgpu_kernel void @srem_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: srem_i32_oddk_denom: ; GFX90A: ; %bb.0: @@ -8693,6 +8208,8 @@ define amdgpu_kernel void @srem_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem i32 %x, 1235195 store i32 %r, i32 addrspace(1)* %out ret void @@ -8734,6 +8251,7 @@ define amdgpu_kernel void @srem_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: srem_i32_pow2k_denom: ; GFX90A: ; %bb.0: @@ -8749,6 +8267,8 @@ define amdgpu_kernel void @srem_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem i32 %x, 4096 store i32 %r, i32 addrspace(1)* %out ret void @@ -8832,41 +8352,6 @@ define amdgpu_kernel void @srem_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm -; -; GFX90A-LABEL: srem_i32_pow2_shl_denom: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c -; GFX90A-NEXT: v_mov_b32_e32 v1, 0 -; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: s_lshl_b32 s3, 0x1000, s3 -; GFX90A-NEXT: s_ashr_i32 s4, s3, 31 -; GFX90A-NEXT: s_add_i32 s3, s3, s4 -; GFX90A-NEXT: s_xor_b32 s3, s3, s4 -; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX90A-NEXT: s_sub_i32 s5, 0, s3 -; GFX90A-NEXT: s_ashr_i32 s4, s2, 31 -; GFX90A-NEXT: s_add_i32 s2, s2, s4 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX90A-NEXT: s_xor_b32 s2, s2, s4 -; GFX90A-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX90A-NEXT: v_mul_lo_u32 v2, s5, v0 -; GFX90A-NEXT: v_mul_hi_u32 v2, v0, v2 -; GFX90A-NEXT: v_add_u32_e32 v0, v0, v2 -; GFX90A-NEXT: v_mul_hi_u32 v0, s2, v0 -; GFX90A-NEXT: v_mul_lo_u32 v0, v0, s3 -; GFX90A-NEXT: v_sub_u32_e32 v0, s2, v0 -; GFX90A-NEXT: v_subrev_u32_e32 v2, s3, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX90A-NEXT: v_subrev_u32_e32 v2, s3, v0 -; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 -; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX90A-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX90A-NEXT: v_subrev_u32_e32 v0, s4, v0 -; GFX90A-NEXT: global_store_dword v1, v0, s[0:1] -; GFX90A-NEXT: s_endpgm %shl.y = shl i32 4096, %y %r = srem i32 %x, %shl.y store i32 %r, i32 addrspace(1)* %out @@ -8928,6 +8413,7 @@ define amdgpu_kernel void @srem_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: srem_v2i32_pow2k_denom: ; GFX90A: ; %bb.0: @@ -8950,6 +8436,8 @@ define amdgpu_kernel void @srem_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem <2 x i32> %x, store <2 x i32> %r, <2 x i32> addrspace(1)* %out ret void @@ -9166,6 +8654,7 @@ define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; GFX9-NEXT: v_subrev_u32_e32 v1, s6, v1 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: srem_v2i32_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -9230,6 +8719,8 @@ define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; GFX90A-NEXT: v_subrev_u32_e32 v1, s1, v1 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl <2 x i32> , %y %r = srem <2 x i32> %x, %shl.y store <2 x i32> %r, <2 x i32> addrspace(1)* %out @@ -9460,20 +8951,28 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_cmp_lt_u32_e64 s[0:1], s6, v5 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v4 +<<<<<<< HEAD ; GFX9-NEXT: v_cndmask_b32_e64 v4, v7, v5, s[0:1] +======= +; GFX9-NEXT: v_cndmask_b32_e64 v4, v7, v6, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v6, s[0:1], 2, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v7, s[0:1], 0, v1, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v8, s[0:1], 1, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v9, s[0:1], 0, v1, s[0:1] +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 +; GFX9-NEXT: v_cndmask_b32_e64 v4, v9, v7, s[0:1] +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_mov_b32_e32 v7, s7 ; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v7, v2, vcc ; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s3, v2 -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc ; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s6, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 1, 2, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s2, v2 -; GFX9-NEXT: v_add_co_u32_e64 v4, s[0:1], v0, v4 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc ; GFX9-NEXT: v_addc_co_u32_e64 v5, s[0:1], 0, v1, s[0:1] ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +<<<<<<< HEAD ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX9-NEXT: global_store_dwordx2 v6, v[0:1], s[4:5] @@ -9593,6 +9092,13 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm +======= +; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, v6, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: global_store_dwordx2 v5, v[0:1], s[4:5] +; GFX9-NEXT: s_endpgm +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv i64 %x, 1235195949943 store i64 %r, i64 addrspace(1)* %out ret void @@ -9628,16 +9134,6 @@ define amdgpu_kernel void @udiv_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) ; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm -; -; GFX90A-LABEL: udiv_i64_pow2k_denom: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: s_lshr_b64 s[2:3], s[2:3], 12 -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX90A-NEXT: s_endpgm %r = udiv i64 %x, 4096 store i64 %r, i64 addrspace(1)* %out ret void @@ -9678,6 +9174,7 @@ define amdgpu_kernel void @udiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_i64_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -9690,6 +9187,8 @@ define amdgpu_kernel void @udiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl i64 4096, %y %r = udiv i64 %x, %shl.y store i64 %r, i64 addrspace(1)* %out @@ -9737,6 +9236,7 @@ define amdgpu_kernel void @udiv_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX9-NEXT: v_mov_b32_e32 v3, s5 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_v2i64_pow2k_denom: ; GFX90A: ; %bb.0: @@ -9752,6 +9252,8 @@ define amdgpu_kernel void @udiv_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX90A-NEXT: v_mov_b32_e32 v3, s5 ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv <2 x i64> %x, store <2 x i64> %r, <2 x i64> addrspace(1)* %out ret void @@ -9891,11 +9393,17 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 +<<<<<<< HEAD ; GFX9-NEXT: s_movk_i32 s8, 0xfff ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: v_mul_hi_u32 v2, v0, s2 ; GFX9-NEXT: v_mul_lo_u32 v4, v1, s2 ; GFX9-NEXT: v_mul_lo_u32 v3, v0, s2 +======= +; GFX9-NEXT: v_mul_hi_u32 v2, v0, s4 +; GFX9-NEXT: v_mul_lo_u32 v4, v1, s4 +; GFX9-NEXT: v_mul_lo_u32 v3, v0, s4 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_sub_u32_e32 v2, v2, v0 ; GFX9-NEXT: v_add_u32_e32 v2, v2, v4 ; GFX9-NEXT: v_mul_hi_u32 v6, v0, v3 @@ -9911,7 +9419,37 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v6, v3, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v8, v5, vcc ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 +<<<<<<< HEAD ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc +======= +; GFX9-NEXT: v_add_co_u32_e64 v0, s[2:3], v0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v7, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v2, vcc, v1, v3, s[2:3] +; GFX9-NEXT: v_mul_hi_u32 v4, v0, s4 +; GFX9-NEXT: v_mul_lo_u32 v6, v2, s4 +; GFX9-NEXT: v_mul_lo_u32 v8, v0, s4 +; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: v_sub_u32_e32 v4, v4, v0 +; GFX9-NEXT: v_add_u32_e32 v4, v4, v6 +; GFX9-NEXT: v_mul_lo_u32 v6, v0, v4 +; GFX9-NEXT: v_mul_hi_u32 v9, v0, v8 +; GFX9-NEXT: v_mul_hi_u32 v10, v0, v4 +; GFX9-NEXT: v_mul_hi_u32 v11, v2, v4 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v9, v6 +; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, v7, v10, vcc +; GFX9-NEXT: v_mul_lo_u32 v10, v2, v8 +; GFX9-NEXT: v_mul_hi_u32 v8, v2, v8 +; GFX9-NEXT: v_mul_lo_u32 v2, v2, v4 +; GFX9-NEXT: s_movk_i32 s0, 0xfff +; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v6, v10 +; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v9, v8, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v11, v5, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v6, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v1, vcc, v1, v4, s[2:3] +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX9-NEXT: v_mul_hi_u32 v2, v0, s2 @@ -9947,10 +9485,15 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc ; GFX9-NEXT: v_mul_lo_u32 v4, s7, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, s7, v0 +<<<<<<< HEAD +======= +; GFX9-NEXT: s_lshr_b64 s[2:3], s[4:5], 12 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v3, v0, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v6, v5, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1 +<<<<<<< HEAD ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc ; GFX9-NEXT: v_mul_lo_u32 v2, v1, s8 ; GFX9-NEXT: v_mul_hi_u32 v3, v0, s8 @@ -9976,10 +9519,42 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v3, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v6, vcc +======= +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v7, v2, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 2, v0 +; GFX9-NEXT: v_mul_lo_u32 v4, v1, s0 +; GFX9-NEXT: v_mul_hi_u32 v6, v0, s0 +; GFX9-NEXT: v_mul_lo_u32 v9, v0, s0 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, 1, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc +; GFX9-NEXT: v_add_u32_e32 v4, v6, v4 +; GFX9-NEXT: v_mov_b32_e32 v6, s7 +; GFX9-NEXT: v_sub_co_u32_e32 v9, vcc, s6, v9 +; GFX9-NEXT: v_subb_co_u32_e32 v4, vcc, v6, v4, vcc +; GFX9-NEXT: v_subrev_co_u32_e32 v6, vcc, s0, v9 +; GFX9-NEXT: v_subbrev_co_u32_e32 v10, vcc, 0, v4, vcc +; GFX9-NEXT: s_movk_i32 s0, 0xffe +; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s0, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10 +; GFX9-NEXT: v_cndmask_b32_e32 v6, -1, v6, vcc +; GFX9-NEXT: v_cmp_lt_u32_e64 s[0:1], s0, v9 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v4 +; GFX9-NEXT: v_cndmask_b32_e64 v4, -1, v6, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 +; GFX9-NEXT: v_cndmask_b32_e64 v3, v1, v3, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v0, v1, s[0:1] +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX9-NEXT: global_store_dwordx4 v5, v[0:3], s[8:9] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_v2i64_mixed_pow2k_denom: ; GFX90A: ; %bb.0: @@ -10084,6 +9659,8 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX90A-NEXT: v_mov_b32_e32 v1, s1 ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv <2 x i64> %x, store <2 x i64> %r, <2 x i64> addrspace(1)* %out ret void @@ -10139,6 +9716,7 @@ define amdgpu_kernel void @udiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_mov_b32_e32 v3, s5 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_v2i64_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -10157,6 +9735,8 @@ define amdgpu_kernel void @udiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX90A-NEXT: v_mov_b32_e32 v3, s5 ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl <2 x i64> , %y %r = udiv <2 x i64> %x, %shl.y store <2 x i64> %r, <2 x i64> addrspace(1)* %out @@ -10391,6 +9971,7 @@ define amdgpu_kernel void @urem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[2:3] ; GFX9-NEXT: v_subbrev_co_u32_e64 v2, s[0:1], 0, v2, s[0:1] ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v7 +<<<<<<< HEAD ; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v4, s7 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v4, v1, vcc @@ -10523,6 +10104,23 @@ define amdgpu_kernel void @urem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm +======= +; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v6, s7 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v6, v1, vcc +; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s6, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s10, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s8, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, v3, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: global_store_dwordx2 v5, v[0:1], s[4:5] +; GFX9-NEXT: s_endpgm +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem i64 %x, 1235195393993 store i64 %r, i64 addrspace(1)* %out ret void @@ -10557,16 +10155,6 @@ define amdgpu_kernel void @urem_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm -; -; GFX90A-LABEL: urem_i64_pow2k_denom: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX90A-NEXT: v_mov_b32_e32 v1, 0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: s_and_b32 s2, s2, 0xfff -; GFX90A-NEXT: v_mov_b32_e32 v0, s2 -; GFX90A-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] -; GFX90A-NEXT: s_endpgm %r = urem i64 %x, 4096 store i64 %r, i64 addrspace(1)* %out ret void @@ -10613,6 +10201,7 @@ define amdgpu_kernel void @urem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: urem_i64_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -10628,6 +10217,8 @@ define amdgpu_kernel void @urem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl i64 4096, %y %r = urem i64 %x, %shl.y store i64 %r, i64 addrspace(1)* %out @@ -10676,6 +10267,7 @@ define amdgpu_kernel void @urem_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX9-NEXT: v_mov_b32_e32 v2, s0 ; GFX9-NEXT: global_store_dwordx4 v1, v[0:3], s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: urem_v2i64_pow2k_denom: ; GFX90A: ; %bb.0: @@ -10691,6 +10283,8 @@ define amdgpu_kernel void @urem_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX90A-NEXT: v_mov_b32_e32 v2, s0 ; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem <2 x i64> %x, store <2 x i64> %r, <2 x i64> addrspace(1)* %out ret void @@ -10756,6 +10350,7 @@ define amdgpu_kernel void @urem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_mov_b32_e32 v3, s5 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: urem_v2i64_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -10779,6 +10374,8 @@ define amdgpu_kernel void @urem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX90A-NEXT: v_mov_b32_e32 v3, s5 ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl <2 x i64> , %y %r = urem <2 x i64> %x, %shl.y store <2 x i64> %r, <2 x i64> addrspace(1)* %out @@ -10921,10 +10518,17 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_trunc_f32_e32 v1, v1 ; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 +<<<<<<< HEAD ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_lo_u32 v2, v1, s4 ; GFX9-NEXT: v_mul_hi_u32 v3, v0, s4 ; GFX9-NEXT: v_mul_lo_u32 v4, v0, s4 +======= +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GFX9-NEXT: v_mul_hi_u32 v3, v0, s8 +; GFX9-NEXT: v_mul_lo_u32 v2, v1, s8 +; GFX9-NEXT: v_mul_lo_u32 v4, v0, s8 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 ; GFX9-NEXT: v_sub_u32_e32 v2, v2, v0 ; GFX9-NEXT: v_mul_hi_u32 v3, v0, v4 @@ -10940,6 +10544,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v6, v4, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v8, v5, vcc ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 +<<<<<<< HEAD ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc @@ -10980,10 +10585,55 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_mul_lo_u32 v4, s3, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, s3, v0 ; GFX9-NEXT: s_mov_b32 s5, 0x12d8fb +======= +; GFX9-NEXT: v_add_co_u32_e64 v0, s[2:3], v0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v7, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v2, vcc, v1, v3, s[2:3] +; GFX9-NEXT: v_mul_lo_u32 v4, v2, s8 +; GFX9-NEXT: v_mul_hi_u32 v6, v0, s8 +; GFX9-NEXT: v_mul_lo_u32 v8, v0, s8 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX9-NEXT: v_add_u32_e32 v4, v6, v4 +; GFX9-NEXT: v_sub_u32_e32 v4, v4, v0 +; GFX9-NEXT: v_mul_lo_u32 v10, v0, v4 +; GFX9-NEXT: v_mul_hi_u32 v11, v0, v8 +; GFX9-NEXT: v_mul_hi_u32 v12, v0, v4 +; GFX9-NEXT: v_mul_hi_u32 v9, v2, v8 +; GFX9-NEXT: v_mul_lo_u32 v8, v2, v8 +; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v11, v10 +; GFX9-NEXT: v_mul_hi_u32 v6, v2, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, v7, v12, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, v2, v4 +; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v10, v8 +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v11, v9, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v6, v5, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v8, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v1, vcc, v1, v4, s[2:3] +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_ashr_i32 s2, s7, 31 +; GFX9-NEXT: s_add_u32 s0, s6, s2 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: s_mov_b32 s3, s2 +; GFX9-NEXT: s_addc_u32 s1, s7, s2 +; GFX9-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, s0, v1 +; GFX9-NEXT: v_mul_hi_u32 v3, s0, v0 +; GFX9-NEXT: v_mul_hi_u32 v4, s0, v1 +; GFX9-NEXT: v_mul_hi_u32 v6, s1, v1 +; GFX9-NEXT: v_mul_lo_u32 v1, s1, v1 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v7, v4, vcc +; GFX9-NEXT: v_mul_lo_u32 v4, s1, v0 +; GFX9-NEXT: v_mul_hi_u32 v0, s1, v0 +; GFX9-NEXT: s_mov_b32 s3, 0x12d8fb +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v3, v0, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v6, v5, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1 +<<<<<<< HEAD ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc ; GFX9-NEXT: v_mul_lo_u32 v2, v1, s5 ; GFX9-NEXT: v_mul_hi_u32 v3, v0, s5 @@ -11014,9 +10664,45 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 ; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s4, v0 +======= +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v7, v2, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 2, v0 +; GFX9-NEXT: v_mul_lo_u32 v4, v1, s3 +; GFX9-NEXT: v_mul_hi_u32 v6, v0, s3 +; GFX9-NEXT: v_mul_lo_u32 v9, v0, s3 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, 1, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc +; GFX9-NEXT: v_add_u32_e32 v4, v6, v4 +; GFX9-NEXT: v_sub_co_u32_e32 v9, vcc, s0, v9 +; GFX9-NEXT: v_mov_b32_e32 v6, s1 +; GFX9-NEXT: v_subb_co_u32_e32 v4, vcc, v6, v4, vcc +; GFX9-NEXT: v_subrev_co_u32_e32 v6, vcc, s3, v9 +; GFX9-NEXT: v_subbrev_co_u32_e32 v10, vcc, 0, v4, vcc +; GFX9-NEXT: s_mov_b32 s0, 0x12d8fa +; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s0, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10 +; GFX9-NEXT: v_cndmask_b32_e32 v6, -1, v6, vcc +; GFX9-NEXT: v_cmp_lt_u32_e64 s[0:1], s0, v9 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v4 +; GFX9-NEXT: v_cndmask_b32_e64 v4, -1, v6, s[0:1] +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; GFX9-NEXT: v_xor_b32_e32 v0, s2, v0 +; GFX9-NEXT: v_xor_b32_e32 v1, s2, v1 +; GFX9-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s2, v0 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc ; GFX9-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_i64_oddk_denom: ; GFX90A: ; %bb.0: @@ -11127,6 +10813,8 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv i64 %x, 1235195 store i64 %r, i64 addrspace(1)* %out ret void @@ -11170,20 +10858,6 @@ define amdgpu_kernel void @sdiv_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) ; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm -; -; GFX90A-LABEL: sdiv_i64_pow2k_denom: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: s_ashr_i32 s4, s3, 31 -; GFX90A-NEXT: s_lshr_b32 s4, s4, 20 -; GFX90A-NEXT: s_add_u32 s2, s2, s4 -; GFX90A-NEXT: s_addc_u32 s3, s3, 0 -; GFX90A-NEXT: s_ashr_i64 s[2:3], s[2:3], 12 -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX90A-NEXT: s_endpgm %r = sdiv i64 %x, 4096 store i64 %r, i64 addrspace(1)* %out ret void @@ -11441,8 +11115,15 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v5 ; GFX9-NEXT: v_cndmask_b32_e64 v5, v7, v6, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v6, s[0:1], 2, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v7, s[0:1], 0, v1, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v8, s[0:1], 1, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v9, s[0:1], 0, v1, s[0:1] +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v7, s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v7, s7 ; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v7, v3, vcc +<<<<<<< HEAD ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s9, v3 ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v5 ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc @@ -11451,12 +11132,25 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s9, v3 ; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], v0, v5 +======= +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s10, v4 +; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s11, v3 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v4, vcc -; GFX9-NEXT: v_addc_co_u32_e64 v6, s[0:1], 0, v1, s[0:1] ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +<<<<<<< HEAD ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GFX9-NEXT: s_xor_b64 s[0:1], s[10:11], s[2:3] ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +======= +; GFX9-NEXT: v_cndmask_b32_e64 v3, v8, v6, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: s_xor_b64 s[0:1], s[2:3], s[8:9] +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_xor_b32_e32 v0, s0, v0 ; GFX9-NEXT: v_xor_b32_e32 v1, s1, v1 ; GFX9-NEXT: v_mov_b32_e32 v3, s1 @@ -11464,6 +11158,7 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_i64_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -11595,6 +11290,8 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl i64 4096, %y %r = sdiv i64 %x, %shl.y store i64 %r, i64 addrspace(1)* %out @@ -11658,6 +11355,7 @@ define amdgpu_kernel void @sdiv_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX9-NEXT: v_mov_b32_e32 v3, s5 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_v2i64_pow2k_denom: ; GFX90A: ; %bb.0: @@ -11681,6 +11379,8 @@ define amdgpu_kernel void @sdiv_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX90A-NEXT: v_mov_b32_e32 v3, s5 ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv <2 x i64> %x, store <2 x i64> %r, <2 x i64> addrspace(1)* %out ret void @@ -11855,13 +11555,48 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v7, vcc ; GFX9-NEXT: v_mul_lo_u32 v7, v1, v5 ; GFX9-NEXT: v_mul_hi_u32 v5, v1, v5 +<<<<<<< HEAD ; GFX9-NEXT: s_ashr_i64 s[2:3], s[2:3], 12 ; GFX9-NEXT: s_ashr_i32 s4, s7, 31 +======= +; GFX9-NEXT: s_ashr_i64 s[4:5], s[4:5], 12 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v7 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v6, v5, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v8, v4, vcc ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 +<<<<<<< HEAD ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v5, vcc +======= +; GFX9-NEXT: v_mov_b32_e32 v6, 0 +; GFX9-NEXT: v_add_co_u32_e64 v0, s[2:3], v0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v6, v5, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v2, vcc, v1, v3, s[2:3] +; GFX9-NEXT: v_mul_lo_u32 v5, v2, s8 +; GFX9-NEXT: v_mul_hi_u32 v7, v0, s8 +; GFX9-NEXT: v_mul_lo_u32 v8, v0, s8 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 +; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 +; GFX9-NEXT: v_sub_u32_e32 v5, v5, v0 +; GFX9-NEXT: v_mul_lo_u32 v10, v0, v5 +; GFX9-NEXT: v_mul_hi_u32 v11, v0, v8 +; GFX9-NEXT: v_mul_hi_u32 v12, v0, v5 +; GFX9-NEXT: v_mul_hi_u32 v9, v2, v8 +; GFX9-NEXT: v_mul_lo_u32 v8, v2, v8 +; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v11, v10 +; GFX9-NEXT: v_mul_hi_u32 v7, v2, v5 +; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v12, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, v2, v5 +; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v10, v8 +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v11, v9, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v7, v4, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v8, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v6, v5, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v1, vcc, v1, v5, s[2:3] +; GFX9-NEXT: s_ashr_i32 s2, s7, 31 +; GFX9-NEXT: s_add_u32 s6, s6, s2 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX9-NEXT: v_mul_lo_u32 v2, v1, s8 @@ -11898,12 +11633,17 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v5, vcc ; GFX9-NEXT: v_mul_lo_u32 v5, s7, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, s7, v0 +<<<<<<< HEAD ; GFX9-NEXT: s_movk_i32 s5, 0xfff ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +======= +; GFX9-NEXT: s_movk_i32 s0, 0xfff +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 ; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v3, v0, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v6, v4, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1 +<<<<<<< HEAD ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc ; GFX9-NEXT: v_mul_lo_u32 v2, v1, s5 ; GFX9-NEXT: v_mul_hi_u32 v3, v0, s5 @@ -11934,12 +11674,48 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 ; GFX9-NEXT: v_mov_b32_e32 v3, s4 ; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s4, v0 +======= +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v6, v2, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 2, v0 +; GFX9-NEXT: v_mul_lo_u32 v5, v1, s0 +; GFX9-NEXT: v_mul_hi_u32 v6, v0, s0 +; GFX9-NEXT: v_mul_lo_u32 v9, v0, s0 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, 1, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc +; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 +; GFX9-NEXT: v_mov_b32_e32 v6, s7 +; GFX9-NEXT: v_sub_co_u32_e32 v9, vcc, s6, v9 +; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v6, v5, vcc +; GFX9-NEXT: v_subrev_co_u32_e32 v6, vcc, s0, v9 +; GFX9-NEXT: v_subbrev_co_u32_e32 v10, vcc, 0, v5, vcc +; GFX9-NEXT: s_movk_i32 s0, 0xffe +; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s0, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10 +; GFX9-NEXT: v_cndmask_b32_e32 v6, -1, v6, vcc +; GFX9-NEXT: v_cmp_lt_u32_e64 s[0:1], s0, v9 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v5, -1, v6, s[0:1] +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; GFX9-NEXT: v_xor_b32_e32 v0, s2, v0 +; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s2, v0 +; GFX9-NEXT: v_xor_b32_e32 v1, s2, v1 +; GFX9-NEXT: v_mov_b32_e32 v3, s2 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[8:9] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: ssdiv_v2i64_mixed_pow2k_denom: ; GFX90A: ; %bb.0: @@ -12059,6 +11835,8 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX90A-NEXT: v_mov_b32_e32 v1, s1 ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv <2 x i64> %x, store <2 x i64> %r, <2 x i64> addrspace(1)* %out ret void @@ -12451,6 +12229,7 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_subbrev_co_u32_e64 v5, s[0:1], 0, v5, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v5 ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] +<<<<<<< HEAD ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v6 ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v5 @@ -12563,6 +12342,123 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v7, vcc ; GFX9-NEXT: v_add_u32_e32 v5, v5, v8 ; GFX9-NEXT: v_sub_u32_e32 v7, s7, v5 +======= +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v4 +; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v7, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v7, s[0:1], 2, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v8, s[0:1], 0, v1, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v9, s[0:1], 1, v0 +; GFX9-NEXT: v_addc_co_u32_e64 v10, s[0:1], 0, v1, s[0:1] +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 +; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, v8, s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v8, s5 +; GFX9-NEXT: s_xor_b64 s[4:5], s[14:15], s[12:13] +; GFX9-NEXT: s_ashr_i32 s12, s9, 31 +; GFX9-NEXT: s_add_u32 s8, s8, s12 +; GFX9-NEXT: s_mov_b32 s13, s12 +; GFX9-NEXT: s_addc_u32 s9, s9, s12 +; GFX9-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] +; GFX9-NEXT: v_cvt_f32_u32_e32 v10, s8 +; GFX9-NEXT: v_cvt_f32_u32_e32 v11, s9 +; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v8, v2, vcc +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s11, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s10, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s11, v2 +; GFX9-NEXT: v_mac_f32_e32 v10, s16, v11 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc +; GFX9-NEXT: v_rcp_f32_e32 v3, v10 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX9-NEXT: s_sub_u32 s10, 0, s8 +; GFX9-NEXT: v_mul_f32_e32 v3, s17, v3 +; GFX9-NEXT: v_mul_f32_e32 v4, s18, v3 +; GFX9-NEXT: v_trunc_f32_e32 v4, v4 +; GFX9-NEXT: v_mac_f32_e32 v3, s19, v4 +; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v2, v9, v7, s[0:1] +; GFX9-NEXT: s_subb_u32 s11, 0, s9 +; GFX9-NEXT: v_mul_lo_u32 v8, s10, v4 +; GFX9-NEXT: v_mul_hi_u32 v7, s10, v3 +; GFX9-NEXT: v_mul_lo_u32 v9, s11, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, s10, v3 +; GFX9-NEXT: v_add_u32_e32 v7, v7, v8 +; GFX9-NEXT: v_add_u32_e32 v7, v7, v9 +; GFX9-NEXT: v_mul_lo_u32 v8, v3, v7 +; GFX9-NEXT: v_mul_hi_u32 v9, v3, v2 +; GFX9-NEXT: v_mul_hi_u32 v10, v3, v7 +; GFX9-NEXT: v_mul_hi_u32 v11, v4, v7 +; GFX9-NEXT: v_mul_lo_u32 v7, v4, v7 +; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v9, v8 +; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v10, vcc +; GFX9-NEXT: v_mul_lo_u32 v10, v4, v2 +; GFX9-NEXT: v_mul_hi_u32 v2, v4, v2 +; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 +; GFX9-NEXT: v_xor_b32_e32 v1, s5, v1 +; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v10 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v9, v2, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v11, v6, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 +; GFX9-NEXT: v_add_co_u32_e64 v2, s[0:1], v3, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v5, v8, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v3, vcc, v4, v7, s[0:1] +; GFX9-NEXT: v_mul_lo_u32 v8, s10, v3 +; GFX9-NEXT: v_mul_hi_u32 v9, s10, v2 +; GFX9-NEXT: v_mul_lo_u32 v10, s11, v2 +; GFX9-NEXT: v_mul_lo_u32 v11, s10, v2 +; GFX9-NEXT: s_ashr_i32 s10, s7, 31 +; GFX9-NEXT: v_add_u32_e32 v8, v9, v8 +; GFX9-NEXT: v_add_u32_e32 v8, v8, v10 +; GFX9-NEXT: v_mul_lo_u32 v12, v2, v8 +; GFX9-NEXT: v_mul_hi_u32 v13, v2, v11 +; GFX9-NEXT: v_mul_hi_u32 v14, v2, v8 +; GFX9-NEXT: v_mul_hi_u32 v10, v3, v11 +; GFX9-NEXT: v_mul_lo_u32 v11, v3, v11 +; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v13, v12 +; GFX9-NEXT: v_mul_hi_u32 v9, v3, v8 +; GFX9-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v14, vcc +; GFX9-NEXT: v_mul_lo_u32 v3, v3, v8 +; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, v12, v11 +; GFX9-NEXT: v_addc_co_u32_e32 v10, vcc, v13, v10, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v9, v6, vcc +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v10, v3 +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v5, v8, vcc +; GFX9-NEXT: v_add_u32_e32 v4, v4, v7 +; GFX9-NEXT: v_addc_co_u32_e64 v4, vcc, v4, v8, s[0:1] +; GFX9-NEXT: s_add_u32 s0, s6, s10 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 +; GFX9-NEXT: s_mov_b32 s11, s10 +; GFX9-NEXT: s_addc_u32 s1, s7, s10 +; GFX9-NEXT: s_xor_b64 s[6:7], s[0:1], s[10:11] +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc +; GFX9-NEXT: v_mul_lo_u32 v4, s6, v3 +; GFX9-NEXT: v_mul_hi_u32 v7, s6, v2 +; GFX9-NEXT: v_mul_hi_u32 v9, s6, v3 +; GFX9-NEXT: v_mul_hi_u32 v10, s7, v3 +; GFX9-NEXT: v_mul_lo_u32 v3, s7, v3 +; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v7, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v9, vcc +; GFX9-NEXT: v_mul_lo_u32 v9, s7, v2 +; GFX9-NEXT: v_mul_hi_u32 v2, s7, v2 +; GFX9-NEXT: v_mov_b32_e32 v8, s5 +; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v7, v2, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v10, v6, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v4, vcc +; GFX9-NEXT: v_mul_lo_u32 v4, s8, v3 +; GFX9-NEXT: v_mul_hi_u32 v5, s8, v2 +; GFX9-NEXT: v_mul_lo_u32 v7, s9, v2 +; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s4, v0 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v8, vcc +; GFX9-NEXT: v_add_u32_e32 v4, v5, v4 +; GFX9-NEXT: v_mul_lo_u32 v5, s8, v2 +; GFX9-NEXT: v_add_u32_e32 v4, v4, v7 +; GFX9-NEXT: v_sub_u32_e32 v7, s7, v4 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_mov_b32_e32 v8, s9 ; GFX9-NEXT: v_sub_co_u32_e32 v6, vcc, s6, v6 ; GFX9-NEXT: v_subb_co_u32_e64 v7, s[0:1], v7, v8, vcc @@ -12574,7 +12470,14 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v7 ; GFX9-NEXT: v_cndmask_b32_e64 v7, v9, v8, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v8, s[0:1], 2, v2 +; GFX9-NEXT: v_addc_co_u32_e64 v9, s[0:1], 0, v3, s[0:1] +; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v2 +; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1] +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v7 +; GFX9-NEXT: v_cndmask_b32_e64 v7, v11, v9, s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v9, s7 +<<<<<<< HEAD ; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v9, v5, vcc ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s9, v5 ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v7 @@ -12595,9 +12498,29 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_mov_b32_e32 v5, s1 ; GFX9-NEXT: v_subrev_co_u32_e32 v3, vcc, s0, v3 ; GFX9-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v5, vcc +======= +; GFX9-NEXT: v_subb_co_u32_e32 v4, vcc, v9, v4, vcc +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s9, v4 +; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s9, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v5, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, v8, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX9-NEXT: s_xor_b64 s[0:1], s[10:11], s[12:13] +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; GFX9-NEXT: v_xor_b32_e32 v2, s0, v2 +; GFX9-NEXT: v_xor_b32_e32 v3, s1, v3 +; GFX9-NEXT: v_mov_b32_e32 v4, s1 +; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s0, v2 +; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v4, vcc +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_v2i64_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -12854,6 +12777,8 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v5, vcc ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl <2 x i64> , %y %r = sdiv <2 x i64> %x, %shl.y store <2 x i64> %r, <2 x i64> addrspace(1)* %out @@ -12994,10 +12919,17 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_trunc_f32_e32 v1, v1 ; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 +<<<<<<< HEAD ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_lo_u32 v2, v1, s4 ; GFX9-NEXT: v_mul_hi_u32 v3, v0, s4 ; GFX9-NEXT: v_mul_lo_u32 v4, v0, s4 +======= +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GFX9-NEXT: v_mul_hi_u32 v3, v0, s8 +; GFX9-NEXT: v_mul_lo_u32 v2, v1, s8 +; GFX9-NEXT: v_mul_lo_u32 v4, v0, s8 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 ; GFX9-NEXT: v_sub_u32_e32 v2, v2, v0 ; GFX9-NEXT: v_mul_hi_u32 v3, v0, v4 @@ -13013,6 +12945,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v6, v4, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v8, v5, vcc ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 +<<<<<<< HEAD ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc @@ -13053,30 +12986,91 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_mul_lo_u32 v4, s3, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, s3, v0 ; GFX9-NEXT: s_mov_b32 s5, 0x12d8fb +======= +; GFX9-NEXT: v_add_co_u32_e64 v0, s[2:3], v0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v7, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v2, vcc, v1, v3, s[2:3] +; GFX9-NEXT: v_mul_lo_u32 v4, v2, s8 +; GFX9-NEXT: v_mul_hi_u32 v6, v0, s8 +; GFX9-NEXT: v_mul_lo_u32 v8, v0, s8 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX9-NEXT: v_add_u32_e32 v4, v6, v4 +; GFX9-NEXT: v_sub_u32_e32 v4, v4, v0 +; GFX9-NEXT: v_mul_lo_u32 v10, v0, v4 +; GFX9-NEXT: v_mul_hi_u32 v11, v0, v8 +; GFX9-NEXT: v_mul_hi_u32 v12, v0, v4 +; GFX9-NEXT: v_mul_hi_u32 v9, v2, v8 +; GFX9-NEXT: v_mul_lo_u32 v8, v2, v8 +; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v11, v10 +; GFX9-NEXT: v_mul_hi_u32 v6, v2, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, v7, v12, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, v2, v4 +; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v10, v8 +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v11, v9, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v6, v5, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v8, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v1, vcc, v1, v4, s[2:3] +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_ashr_i32 s2, s7, 31 +; GFX9-NEXT: s_add_u32 s0, s6, s2 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: s_mov_b32 s3, s2 +; GFX9-NEXT: s_addc_u32 s1, s7, s2 +; GFX9-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, s0, v1 +; GFX9-NEXT: v_mul_hi_u32 v3, s0, v0 +; GFX9-NEXT: v_mul_hi_u32 v4, s0, v1 +; GFX9-NEXT: v_mul_hi_u32 v6, s1, v1 +; GFX9-NEXT: v_mul_lo_u32 v1, s1, v1 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v7, v4, vcc +; GFX9-NEXT: v_mul_lo_u32 v4, s1, v0 +; GFX9-NEXT: v_mul_hi_u32 v0, s1, v0 +; GFX9-NEXT: s_mov_b32 s3, 0x12d8fb +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v3, v0, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v6, v5, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1 +<<<<<<< HEAD ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc ; GFX9-NEXT: v_mul_lo_u32 v1, v1, s5 ; GFX9-NEXT: v_mul_hi_u32 v2, v0, s5 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, s5 +======= +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v7, v2, vcc +; GFX9-NEXT: v_mul_hi_u32 v2, v0, s3 +; GFX9-NEXT: v_mul_lo_u32 v1, v1, s3 +; GFX9-NEXT: v_mul_lo_u32 v0, v0, s3 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 -; GFX9-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s2, v0 +; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s0, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, s1 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v1, vcc +<<<<<<< HEAD ; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s5, v0 ; GFX9-NEXT: v_subbrev_co_u32_e32 v3, vcc, 0, v1, vcc ; GFX9-NEXT: v_subrev_co_u32_e32 v4, vcc, s5, v2 ; GFX9-NEXT: v_subbrev_co_u32_e32 v6, vcc, 0, v3, vcc ; GFX9-NEXT: s_mov_b32 s2, 0x12d8fa ; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s2, v2 +======= +; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s3, v0 +; GFX9-NEXT: v_subbrev_co_u32_e32 v3, vcc, 0, v1, vcc +; GFX9-NEXT: v_subrev_co_u32_e32 v4, vcc, s3, v2 +; GFX9-NEXT: v_subbrev_co_u32_e32 v6, vcc, 0, v3, vcc +; GFX9-NEXT: s_mov_b32 s0, 0x12d8fa +; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s0, v2 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX9-NEXT: v_cndmask_b32_e32 v7, -1, v7, vcc ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX9-NEXT: v_cmp_lt_u32_e64 s[0:1], s0, v0 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +<<<<<<< HEAD ; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s2, v0 ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 @@ -13088,9 +13082,23 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 ; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s4, v0 +======= +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[0:1] +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; GFX9-NEXT: v_xor_b32_e32 v0, s2, v0 +; GFX9-NEXT: v_xor_b32_e32 v1, s2, v1 +; GFX9-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s2, v0 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc ; GFX9-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: srem_i64_oddk_denom: ; GFX90A: ; %bb.0: @@ -13202,6 +13210,8 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem i64 %x, 1235195 store i64 %r, i64 addrspace(1)* %out ret void @@ -13249,22 +13259,6 @@ define amdgpu_kernel void @srem_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) ; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm -; -; GFX90A-LABEL: srem_i64_pow2k_denom: -; GFX90A: ; %bb.0: -; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: s_ashr_i32 s4, s3, 31 -; GFX90A-NEXT: s_lshr_b32 s4, s4, 20 -; GFX90A-NEXT: s_add_u32 s4, s2, s4 -; GFX90A-NEXT: s_addc_u32 s5, s3, 0 -; GFX90A-NEXT: s_and_b32 s4, s4, 0xfffff000 -; GFX90A-NEXT: s_sub_u32 s2, s2, s4 -; GFX90A-NEXT: s_subb_u32 s3, s3, s5 -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX90A-NEXT: s_endpgm %r = srem i64 %x, 4096 store i64 %r, i64 addrspace(1)* %out ret void @@ -13524,19 +13518,19 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[2:3] ; GFX9-NEXT: v_subbrev_co_u32_e64 v3, s[0:1], 0, v3, s[0:1] ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v7 -; GFX9-NEXT: v_cndmask_b32_e64 v4, v5, v4, s[0:1] -; GFX9-NEXT: v_mov_b32_e32 v5, s7 -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v5, v1, vcc -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 ; GFX9-NEXT: v_cndmask_b32_e64 v3, v6, v3, s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v6, s7 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v6, v1, vcc +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s9, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v4, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GFX9-NEXT: v_xor_b32_e32 v0, s10, v0 ; GFX9-NEXT: v_xor_b32_e32 v1, s10, v1 ; GFX9-NEXT: v_mov_b32_e32 v3, s10 @@ -13544,6 +13538,7 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: srem_i64_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -13676,6 +13671,8 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl i64 4096, %y %r = srem i64 %x, %shl.y store i64 %r, i64 addrspace(1)* %out @@ -13749,6 +13746,7 @@ define amdgpu_kernel void @srem_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX9-NEXT: v_mov_b32_e32 v3, s5 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: srem_v2i64_pow2k_denom: ; GFX90A: ; %bb.0: @@ -13777,6 +13775,8 @@ define amdgpu_kernel void @srem_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX90A-NEXT: v_mov_b32_e32 v3, s5 ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem <2 x i64> %x, store <2 x i64> %r, <2 x i64> addrspace(1)* %out ret void @@ -14155,6 +14155,7 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_mul_hi_u32 v3, s12, v1 ; GFX9-NEXT: v_mul_lo_u32 v4, s13, v1 ; GFX9-NEXT: v_mul_lo_u32 v1, s12, v1 +<<<<<<< HEAD ; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 ; GFX9-NEXT: v_add_u32_e32 v2, v2, v4 ; GFX9-NEXT: v_sub_u32_e32 v3, s15, v2 @@ -14218,6 +14219,74 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v9, vcc ; GFX9-NEXT: v_mul_lo_u32 v9, v5, v3 ; GFX9-NEXT: v_mul_hi_u32 v3, v5, v3 +======= +; GFX9-NEXT: v_mul_hi_u32 v2, s12, v0 +; GFX9-NEXT: v_mul_lo_u32 v3, s13, v0 +; GFX9-NEXT: v_mul_lo_u32 v0, s12, v0 +; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX9-NEXT: v_sub_u32_e32 v2, s15, v1 +; GFX9-NEXT: v_mov_b32_e32 v3, s13 +; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s14, v0 +; GFX9-NEXT: v_subb_co_u32_e64 v2, s[0:1], v2, v3, vcc +; GFX9-NEXT: v_subrev_co_u32_e64 v4, s[0:1], s12, v0 +; GFX9-NEXT: v_subbrev_co_u32_e64 v7, s[2:3], 0, v2, s[0:1] +; GFX9-NEXT: v_cmp_le_u32_e64 s[2:3], s13, v7 +; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[2:3] +; GFX9-NEXT: v_cmp_le_u32_e64 s[2:3], s12, v4 +; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[2:3] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[2:3], s13, v7 +; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[2:3] +; GFX9-NEXT: s_ashr_i32 s2, s11, 31 +; GFX9-NEXT: v_subb_co_u32_e64 v2, s[0:1], v2, v3, s[0:1] +; GFX9-NEXT: s_add_u32 s10, s10, s2 +; GFX9-NEXT: v_subrev_co_u32_e64 v3, s[0:1], s12, v4 +; GFX9-NEXT: s_mov_b32 s3, s2 +; GFX9-NEXT: s_addc_u32 s11, s11, s2 +; GFX9-NEXT: s_xor_b64 s[10:11], s[10:11], s[2:3] +; GFX9-NEXT: v_subbrev_co_u32_e64 v2, s[0:1], 0, v2, s[0:1] +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v8 +; GFX9-NEXT: v_cvt_f32_u32_e32 v8, s10 +; GFX9-NEXT: v_cvt_f32_u32_e32 v9, s11 +; GFX9-NEXT: v_cndmask_b32_e64 v2, v7, v2, s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v7, s15 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v7, v1, vcc +; GFX9-NEXT: v_mac_f32_e32 v8, s16, v9 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s13, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s12, v0 +; GFX9-NEXT: v_rcp_f32_e32 v8, v8 +; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s13, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v10, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, v3, s[0:1] +; GFX9-NEXT: v_mul_f32_e32 v3, s17, v8 +; GFX9-NEXT: v_mul_f32_e32 v4, s18, v3 +; GFX9-NEXT: v_trunc_f32_e32 v4, v4 +; GFX9-NEXT: v_mac_f32_e32 v3, s19, v4 +; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GFX9-NEXT: s_sub_u32 s2, 0, s10 +; GFX9-NEXT: s_subb_u32 s3, 0, s11 +; GFX9-NEXT: v_mul_hi_u32 v7, s2, v3 +; GFX9-NEXT: v_mul_lo_u32 v8, s2, v4 +; GFX9-NEXT: v_mul_lo_u32 v9, s3, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, s2, v3 +; GFX9-NEXT: v_add_u32_e32 v7, v7, v8 +; GFX9-NEXT: v_add_u32_e32 v7, v7, v9 +; GFX9-NEXT: v_mul_lo_u32 v8, v3, v7 +; GFX9-NEXT: v_mul_hi_u32 v9, v3, v2 +; GFX9-NEXT: v_mul_hi_u32 v10, v3, v7 +; GFX9-NEXT: v_mul_hi_u32 v11, v4, v7 +; GFX9-NEXT: v_mul_lo_u32 v7, v4, v7 +; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v9, v8 +; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v10, vcc +; GFX9-NEXT: v_mul_lo_u32 v10, v4, v2 +; GFX9-NEXT: v_mul_hi_u32 v2, v4, v2 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: s_ashr_i32 s12, s7, 31 ; GFX9-NEXT: s_mov_b32 s13, s12 ; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v9 @@ -14293,6 +14362,7 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[2:3] ; GFX9-NEXT: v_subbrev_co_u32_e64 v5, s[0:1], 0, v5, s[0:1] ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v9 +<<<<<<< HEAD ; GFX9-NEXT: v_cndmask_b32_e64 v6, v7, v6, s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v7, s7 ; GFX9-NEXT: v_subb_co_u32_e32 v4, vcc, v7, v4, vcc @@ -14306,6 +14376,22 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +======= +; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v8, s7 +; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v8, v3, vcc +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s10, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s11, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v4, v7, v5, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX9-NEXT: v_xor_b32_e32 v2, s12, v2 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_xor_b32_e32 v3, s12, v3 ; GFX9-NEXT: v_xor_b32_e32 v4, s12, v4 ; GFX9-NEXT: v_mov_b32_e32 v5, s12 @@ -14314,6 +14400,7 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dwordx4 v0, v[1:4], s[4:5] ; GFX9-NEXT: s_endpgm +<<<<<<< HEAD ; ; GFX90A-LABEL: srem_v2i64_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -14572,6 +14659,8 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v5, vcc ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[8:9] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl <2 x i64> , %y %r = srem <2 x i64> %x, %shl.y store <2 x i64> %r, <2 x i64> addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll new file mode 100644 index 000000000000..a6ba6a16223f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -0,0 +1,615 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,CIVI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,GFX9 %s + +; GCN-LABEL: {{^}}use_dispatch_ptr: +; GCN: s_load_dword s{{[0-9]+}}, s[4:5] +define hidden void @use_dispatch_ptr() #1 { + %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 + %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* + %value = load volatile i32, i32 addrspace(4)* %header_ptr + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr: +; GCN: enable_sgpr_dispatch_ptr = 1 +; GCN-NOT: s[4:5] +; GCN-NOT: s4 +; GCN-NOT: s5 +define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 { + call void @use_dispatch_ptr() + ret void +} + +; GCN-LABEL: {{^}}use_queue_ptr: +; GCN: s_load_dword s{{[0-9]+}}, s[4:5] +define hidden void @use_queue_ptr() #1 { + %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 + %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* + %value = load volatile i32, i32 addrspace(4)* %header_ptr + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr: +; GCN: enable_sgpr_queue_ptr = 1 +; GCN-NOT: s[4:5] +; GCN-NOT: s4 +; GCN-NOT: s5 +define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 { + call void @use_queue_ptr() + ret void +} + +; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast: +; CIVI: s_load_dword [[APERTURE_LOAD:s[0-9]+]], s[4:5], 0x10 +; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]] +; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16 +; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]] +; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}} +; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}} +define hidden void @use_queue_ptr_addrspacecast() #1 { + %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32* + store volatile i32 0, i32* %asc + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast: +; CIVI: enable_sgpr_queue_ptr = 1 +; CIVI-NOT: s[4:5] +; CIVI-NOT: s4 +; CIVI-NOT: s5 +define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 { + call void @use_queue_ptr_addrspacecast() + ret void +} + +; Not really supported in callable functions. +; GCN-LABEL: {{^}}use_kernarg_segment_ptr: +; GCN: s_mov_b64 [[PTR:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], 0x0{{$}} +define hidden void @use_kernarg_segment_ptr() #1 { + %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 + %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)* + %value = load volatile i32, i32 addrspace(4)* %header_ptr + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr: +; GCN: enable_sgpr_kernarg_segment_ptr = 1 +define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 { + call void @use_kernarg_segment_ptr() + ret void +} + +; GCN-LABEL: {{^}}use_dispatch_id: +; GCN: ; use s[4:5] +define hidden void @use_dispatch_id() #1 { + %id = call i64 @llvm.amdgcn.dispatch.id() + call void asm sideeffect "; use $0", "s"(i64 %id) + ret void +} + +; No kernarg segment so that there is a mov to check. With kernarg +; pointer enabled, it happens to end up in the right place anyway. + +; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id: +; GCN: enable_sgpr_dispatch_id = 1 +; GCN-NOT: s[4:5] +; GCN-NOT: s4 +; GCN-NOT: s5 +define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 { + call void @use_dispatch_id() + ret void +} + +; GCN-LABEL: {{^}}use_workgroup_id_x: +; GCN: s_waitcnt +; GCN: ; use s4 +define hidden void @use_workgroup_id_x() #1 { + %val = call i32 @llvm.amdgcn.workgroup.id.x() + call void asm sideeffect "; use $0", "s"(i32 %val) + ret void +} + +; GCN-LABEL: {{^}}use_stack_workgroup_id_x: +; GCN: s_waitcnt +; GCN-NOT: s32 +; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}} +; GCN: ; use s4 +; GCN: s_setpc_b64 +define hidden void @use_stack_workgroup_id_x() #1 { + %alloca = alloca i32, addrspace(5) + store volatile i32 0, i32 addrspace(5)* %alloca + %val = call i32 @llvm.amdgcn.workgroup.id.x() + call void asm sideeffect "; use $0", "s"(i32 %val) + ret void +} + +; GCN-LABEL: {{^}}use_workgroup_id_y: +; GCN: s_waitcnt +; GCN: ; use s4 +define hidden void @use_workgroup_id_y() #1 { + %val = call i32 @llvm.amdgcn.workgroup.id.y() + call void asm sideeffect "; use $0", "s"(i32 %val) + ret void +} + +; GCN-LABEL: {{^}}use_workgroup_id_z: +; GCN: s_waitcnt +; GCN: ; use s4 +define hidden void @use_workgroup_id_z() #1 { + %val = call i32 @llvm.amdgcn.workgroup.id.z() + call void asm sideeffect "; use $0", "s"(i32 %val) + ret void +} + +; GCN-LABEL: {{^}}use_workgroup_id_xy: +; GCN: ; use s4 +; GCN: ; use s5 +define hidden void @use_workgroup_id_xy() #1 { + %val0 = call i32 @llvm.amdgcn.workgroup.id.x() + %val1 = call i32 @llvm.amdgcn.workgroup.id.y() + call void asm sideeffect "; use $0", "s"(i32 %val0) + call void asm sideeffect "; use $0", "s"(i32 %val1) + ret void +} + +; GCN-LABEL: {{^}}use_workgroup_id_xyz: +; GCN: ; use s4 +; GCN: ; use s5 +; GCN: ; use s6 +define hidden void @use_workgroup_id_xyz() #1 { + %val0 = call i32 @llvm.amdgcn.workgroup.id.x() + %val1 = call i32 @llvm.amdgcn.workgroup.id.y() + %val2 = call i32 @llvm.amdgcn.workgroup.id.z() + call void asm sideeffect "; use $0", "s"(i32 %val0) + call void asm sideeffect "; use $0", "s"(i32 %val1) + call void asm sideeffect "; use $0", "s"(i32 %val2) + ret void +} + +; GCN-LABEL: {{^}}use_workgroup_id_xz: +; GCN: ; use s4 +; GCN: ; use s5 +define hidden void @use_workgroup_id_xz() #1 { + %val0 = call i32 @llvm.amdgcn.workgroup.id.x() + %val1 = call i32 @llvm.amdgcn.workgroup.id.z() + call void asm sideeffect "; use $0", "s"(i32 %val0) + call void asm sideeffect "; use $0", "s"(i32 %val1) + ret void +} + +; GCN-LABEL: {{^}}use_workgroup_id_yz: +; GCN: ; use s4 +; GCN: ; use s5 +define hidden void @use_workgroup_id_yz() #1 { + %val0 = call i32 @llvm.amdgcn.workgroup.id.y() + %val1 = call i32 @llvm.amdgcn.workgroup.id.z() + call void asm sideeffect "; use $0", "s"(i32 %val0) + call void asm sideeffect "; use $0", "s"(i32 %val1) + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x: +; GCN: enable_sgpr_workgroup_id_x = 1 +; GCN: enable_sgpr_workgroup_id_y = 0 +; GCN: enable_sgpr_workgroup_id_z = 0 + +; GCN-NOT: s6 +; GCN: s_mov_b32 s4, s6 +; GCN: s_mov_b32 s32, 0 +; GCN: s_getpc_b64 s[6:7] +; GCN-NEXT: s_add_u32 s6, s6, use_workgroup_id_x@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s7, s7, use_workgroup_id_x@rel32@hi+12 +; GCN: s_swappc_b64 +; GCN-NEXT: s_endpgm +define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 { + call void @use_workgroup_id_x() + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y: +; GCN: enable_sgpr_workgroup_id_x = 1 +; GCN: enable_sgpr_workgroup_id_y = 1 +; GCN: enable_sgpr_workgroup_id_z = 0 + +; GCN: s_mov_b32 s4, s7 +; GCN: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 +define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 { + call void @use_workgroup_id_y() + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z: +; GCN: enable_sgpr_workgroup_id_x = 1 +; GCN: enable_sgpr_workgroup_id_y = 0 +; GCN: enable_sgpr_workgroup_id_z = 1 + +; GCN: s_mov_b32 s4, s7 + +; GCN: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 +define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 { + call void @use_workgroup_id_z() + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy: +; GCN: enable_sgpr_workgroup_id_x = 1 +; GCN: enable_sgpr_workgroup_id_y = 1 +; GCN: enable_sgpr_workgroup_id_z = 0 + +; GCN: s_mov_b32 s5, s7 +; GCN: s_mov_b32 s4, s6 + +; GCN: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 +define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 { + call void @use_workgroup_id_xy() + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz: +; GCN: enable_sgpr_workgroup_id_x = 1 +; GCN: enable_sgpr_workgroup_id_y = 1 +; GCN: enable_sgpr_workgroup_id_z = 1 + +; GCN: s_mov_b32 s5, s7 +; GCN: s_mov_b32 s4, s6 +; GCN: s_mov_b32 s6, s8 + +; GCN: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 +define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 { + call void @use_workgroup_id_xyz() + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz: +; GCN: enable_sgpr_workgroup_id_x = 1 +; GCN: enable_sgpr_workgroup_id_y = 0 +; GCN: enable_sgpr_workgroup_id_z = 1 + +; GCN: s_mov_b32 s5, s7 +; GCN: s_mov_b32 s4, s6 + +; GCN: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 +define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 { + call void @use_workgroup_id_xz() + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz: +; GCN: enable_sgpr_workgroup_id_x = 1 +; GCN: enable_sgpr_workgroup_id_y = 1 +; GCN: enable_sgpr_workgroup_id_z = 1 + +; GCN: s_mov_b32 s5, s8 +; GCN: s_mov_b32 s4, s7 + +; GCN: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 +define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 { + call void @use_workgroup_id_yz() + ret void +} + +; Argument is in right place already +; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x: +; GCN-NOT: s4 +; GCN: v_readlane_b32 s4, v40, 0 +define hidden void @func_indirect_use_workgroup_id_x() #1 { + call void @use_workgroup_id_x() + ret void +} + +; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y: +; GCN-NOT: s4 +; GCN: v_readlane_b32 s4, v40, 0 +define hidden void @func_indirect_use_workgroup_id_y() #1 { + call void @use_workgroup_id_y() + ret void +} + +; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z: +; GCN-NOT: s4 +; GCN: v_readlane_b32 s4, v40, 0 +define hidden void @func_indirect_use_workgroup_id_z() #1 { + call void @use_workgroup_id_z() + ret void +} + +; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x: +; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 +; GCN: ; use s4 +define hidden void @other_arg_use_workgroup_id_x(i32 %arg0) #1 { + %val = call i32 @llvm.amdgcn.workgroup.id.x() + store volatile i32 %arg0, i32 addrspace(1)* undef + call void asm sideeffect "; use $0", "s"(i32 %val) + ret void +} + +; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y: +; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 +; GCN: ; use s4 +define hidden void @other_arg_use_workgroup_id_y(i32 %arg0) #1 { + %val = call i32 @llvm.amdgcn.workgroup.id.y() + store volatile i32 %arg0, i32 addrspace(1)* undef + call void asm sideeffect "; use $0", "s"(i32 %val) + ret void +} + +; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z: +; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 +; GCN: ; use s4 +define hidden void @other_arg_use_workgroup_id_z(i32 %arg0) #1 { + %val = call i32 @llvm.amdgcn.workgroup.id.z() + store volatile i32 %arg0, i32 addrspace(1)* undef + call void asm sideeffect "; use $0", "s"(i32 %val) + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x: +; GCN: enable_sgpr_workgroup_id_x = 1 +; GCN: enable_sgpr_workgroup_id_y = 0 +; GCN: enable_sgpr_workgroup_id_z = 0 + +; GCN-DAG: v_mov_b32_e32 v0, 0x22b +; GCN-DAG: s_mov_b32 s4, s6 + +; GCN-DAG: s_mov_b32 s32, 0 +; GCN-NOT: s4 +; GCN: s_swappc_b64 +define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 { + call void @other_arg_use_workgroup_id_x(i32 555) + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y: +; GCN: enable_sgpr_workgroup_id_x = 1 +; GCN: enable_sgpr_workgroup_id_y = 1 +; GCN: enable_sgpr_workgroup_id_z = 0 + +; GCN-DAG: v_mov_b32_e32 v0, 0x22b +; GCN-DAG: s_mov_b32 s4, s7 + +; GCN-DAG: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 +define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 { + call void @other_arg_use_workgroup_id_y(i32 555) + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z: +; GCN: enable_sgpr_workgroup_id_x = 1 +; GCN: enable_sgpr_workgroup_id_y = 0 +; GCN: enable_sgpr_workgroup_id_z = 1 + +; GCN-DAG: v_mov_b32_e32 v0, 0x22b + +; GCN: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 +define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 { + call void @other_arg_use_workgroup_id_z(i32 555) + ret void +} + +; GCN-LABEL: {{^}}use_every_sgpr_input: +; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32{{$}} +; GCN: s_load_dword s{{[0-9]+}}, s[4:5] +; GCN: s_load_dword s{{[0-9]+}}, s[6:7] +; GCN: s_load_dword s{{[0-9]+}}, s[8:9] + +; GCN: ; use s[10:11] +; GCN: ; use s12 +; GCN: ; use s13 +; GCN: ; use s14 +define hidden void @use_every_sgpr_input() #1 { + %alloca = alloca i32, align 4, addrspace(5) + store volatile i32 0, i32 addrspace(5)* %alloca + + %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 + %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* + %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc + + %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 + %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* + %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc + + %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 + %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* + %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc + + %val3 = call i64 @llvm.amdgcn.dispatch.id() + call void asm sideeffect "; use $0", "s"(i64 %val3) + + %val4 = call i32 @llvm.amdgcn.workgroup.id.x() + call void asm sideeffect "; use $0", "s"(i32 %val4) + + %val5 = call i32 @llvm.amdgcn.workgroup.id.y() + call void asm sideeffect "; use $0", "s"(i32 %val5) + + %val6 = call i32 @llvm.amdgcn.workgroup.id.z() + call void asm sideeffect "; use $0", "s"(i32 %val6) + + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input: +; GCN: enable_sgpr_workgroup_id_x = 1 +; GCN: enable_sgpr_workgroup_id_y = 1 +; GCN: enable_sgpr_workgroup_id_z = 1 +; GCN: enable_sgpr_workgroup_info = 0 + +; GCN: enable_sgpr_private_segment_buffer = 1 +; GCN: enable_sgpr_dispatch_ptr = 1 +; GCN: enable_sgpr_queue_ptr = 1 +; GCN: enable_sgpr_kernarg_segment_ptr = 1 +; GCN: enable_sgpr_dispatch_id = 1 +; GCN: enable_sgpr_flat_scratch_init = 1 + +; GCN: s_mov_b32 s13, s15 +; GCN: s_mov_b32 s12, s14 +; GCN: s_mov_b32 s14, s16 +; GCN: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 +define amdgpu_kernel void @kern_indirect_use_every_sgpr_input(i8) #1 { + call void @use_every_sgpr_input() + ret void +} + +; We have to pass the kernarg segment, but there are no kernel +; arguments so null is passed. +; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input_no_kernargs: +; GCN: enable_sgpr_workgroup_id_x = 1 +; GCN: enable_sgpr_workgroup_id_y = 1 +; GCN: enable_sgpr_workgroup_id_z = 1 +; GCN: enable_sgpr_workgroup_info = 0 + +; GCN: enable_sgpr_private_segment_buffer = 1 +; GCN: enable_sgpr_dispatch_ptr = 1 +; GCN: enable_sgpr_queue_ptr = 1 +; GCN: enable_sgpr_kernarg_segment_ptr = 0 +; GCN: enable_sgpr_dispatch_id = 1 +; GCN: enable_sgpr_flat_scratch_init = 1 + +; GCN: s_mov_b64 s[10:11], s[8:9] +; GCN: s_mov_b64 s[8:9], 0{{$}} +; GCN: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 +define amdgpu_kernel void @kern_indirect_use_every_sgpr_input_no_kernargs() #1 { + call void @use_every_sgpr_input() + ret void +} + +; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input: +; GCN-NOT: s6 +; GCN-NOT: s7 +; GCN-NOT: s8 +; GCN-NOT: s9 +; GCN-NOT: s10 +; GCN-NOT: s11 +; GCN-NOT: s12 +; GCN-NOT: s13 +; GCN-NOT: s[6:7] +; GCN-NOT: s[8:9] +; GCN-NOT: s[10:11] +; GCN-NOT: s[12:13] +; GCN: s_or_saveexec_b64 s[16:17], -1 +define hidden void @func_indirect_use_every_sgpr_input() #1 { + call void @use_every_sgpr_input() + ret void +} + +; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz: +; GCN: s_mov_b32 s4, s12 +; GCN: s_mov_b32 s5, s13 +; GCN: s_mov_b32 s6, s14 +; GCN: ; use s[10:11] +; GCN: ; use s12 +; GCN: ; use s13 +; GCN: ; use s14 + +; GCN: s_swappc_b64 +define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 { + %alloca = alloca i32, align 4, addrspace(5) + store volatile i32 0, i32 addrspace(5)* %alloca + + %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 + %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* + %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc + + %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 + %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* + %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc + + %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 + %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* + %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc + + %val3 = call i64 @llvm.amdgcn.dispatch.id() + call void asm sideeffect "; use $0", "s"(i64 %val3) + + %val4 = call i32 @llvm.amdgcn.workgroup.id.x() + call void asm sideeffect "; use $0", "s"(i32 %val4) + + %val5 = call i32 @llvm.amdgcn.workgroup.id.y() + call void asm sideeffect "; use $0", "s"(i32 %val5) + + %val6 = call i32 @llvm.amdgcn.workgroup.id.z() + call void asm sideeffect "; use $0", "s"(i32 %val6) + + call void @use_workgroup_id_xyz() + ret void +} + +; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill: +; GCN-DAG: s_mov_b32 s33, s32 +; GCN-DAG: s_addk_i32 s32, 0x400 +; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[4:5] +; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[6:7] + +; GCN: s_mov_b32 s4, s12 +; GCN: s_mov_b32 s5, s13 +; GCN: s_mov_b32 s6, s14 + +; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-68-9][0-9]*]], s14 +; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-57-9][0-9]*]], s13 +; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s12 +; GCN: s_mov_b64 s{{\[}}[[LO_Z:[0-9]+]]{{\:}}[[HI_Z:[0-9]+]]{{\]}}, s[8:9] + +; GCN: s_swappc_b64 + +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33{{$}} +; GCN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO_X]]:[[HI_X]]{{\]}}, 0x0 +; GCN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO_Y]]:[[HI_Y]]{{\]}}, 0x0 +; GCN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO_Z]]:[[HI_Z]]{{\]}}, 0x0 +; GCN: ; use +; GCN: ; use [[SAVE_X]] +; GCN: ; use [[SAVE_Y]] +; GCN: ; use [[SAVE_Z]] +define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 { + %alloca = alloca i32, align 4, addrspace(5) + call void @use_workgroup_id_xyz() + + store volatile i32 0, i32 addrspace(5)* %alloca + + %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 + %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* + %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc + + %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 + %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* + %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc + + %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 + %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* + %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc + + %val3 = call i64 @llvm.amdgcn.dispatch.id() + call void asm sideeffect "; use $0", "s"(i64 %val3) + + %val4 = call i32 @llvm.amdgcn.workgroup.id.x() + call void asm sideeffect "; use $0", "s"(i32 %val4) + + %val5 = call i32 @llvm.amdgcn.workgroup.id.y() + call void asm sideeffect "; use $0", "s"(i32 %val5) + + %val6 = call i32 @llvm.amdgcn.workgroup.id.z() + call void asm sideeffect "; use $0", "s"(i32 %val6) + + ret void +} + +declare i32 @llvm.amdgcn.workgroup.id.x() #0 +declare i32 @llvm.amdgcn.workgroup.id.y() #0 +declare i32 @llvm.amdgcn.workgroup.id.z() #0 +declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 +declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 +declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 +declare i64 @llvm.amdgcn.dispatch.id() #0 +declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 + +attributes #0 = { nounwind readnone speculatable } +attributes #1 = { nounwind noinline } diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll index 0be2e867e3cf..d10d0dd74741 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll @@ -159,7 +159,7 @@ define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v4i32(<4 x i } ; GCN-LABEL: {{^}}sdiv_constant_sel_constants_i64: -; GCN: s_cselect_b64 s[{{[0-9]+}}:{{[0-9]+}}], 0, 5 +; GCN: s_cselect_b32 s{{[0-9]+}}, 0, 5 define amdgpu_kernel void @sdiv_constant_sel_constants_i64(i64 addrspace(1)* %p, i1 %cond) { %sel = select i1 %cond, i64 121, i64 23 %bo = sdiv i64 120, %sel @@ -177,7 +177,7 @@ define amdgpu_kernel void @sdiv_constant_sel_constants_i32(i32 addrspace(1)* %p, } ; GCN-LABEL: {{^}}udiv_constant_sel_constants_i64: -; GCN: s_cselect_b64 s[{{[0-9]+}}:{{[0-9]+}}], 0, 5 +; GCN: s_cselect_b32 s{{[0-9]+}}, 0, 5 define amdgpu_kernel void @udiv_constant_sel_constants_i64(i64 addrspace(1)* %p, i1 %cond) { %sel = select i1 %cond, i64 -4, i64 23 %bo = udiv i64 120, %sel @@ -186,7 +186,7 @@ define amdgpu_kernel void @udiv_constant_sel_constants_i64(i64 addrspace(1)* %p, } ; GCN-LABEL: {{^}}srem_constant_sel_constants: -; GCN: s_cselect_b64 s[{{[0-9]+}}:{{[0-9]+}}], 33, 3 +; GCN: s_cselect_b32 s{{[0-9]+}}, 33, 3 define amdgpu_kernel void @srem_constant_sel_constants(i64 addrspace(1)* %p, i1 %cond) { %sel = select i1 %cond, i64 34, i64 15 %bo = srem i64 33, %sel @@ -195,7 +195,7 @@ define amdgpu_kernel void @srem_constant_sel_constants(i64 addrspace(1)* %p, i1 } ; GCN-LABEL: {{^}}urem_constant_sel_constants: -; GCN: s_cselect_b64 s[{{[0-9]+}}:{{[0-9]+}}], 33, 3 +; GCN: s_cselect_b32 s{{[0-9]+}}, 33, 3 define amdgpu_kernel void @urem_constant_sel_constants(i64 addrspace(1)* %p, i1 %cond) { %sel = select i1 %cond, i64 34, i64 15 %bo = urem i64 33, %sel diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll index b66ab4e577aa..ad255818c9fe 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll @@ -38,23 +38,16 @@ entry: ; GCN-LABEL: {{^}}double4_extelt: ; GCN-NOT: buffer_ -; GCN-DAG: s_mov_b32 s[[L0LO:[0-9]+]], 0x47ae147b -; GCN-DAG: s_mov_b32 s[[L0HI:[0-9]+]], 0x3f847ae1 -; GCN-DAG: s_mov_b32 s[[L1LO:[0-9]+]], 0xc28f5c29 -; GCN-DAG: s_mov_b32 s[[L1HI:[0-9]+]], 0x3ff028f5 ; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; GCN: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s{{\[}}[[L1LO]]:[[L1HI]]{{\]}}, s{{\[}}[[L0LO]]:[[L0HI]]{{\]}} -; GCN-DAG: s_mov_b32 s[[L2LO:[0-9]+]], 0xe147ae14 -; GCN-DAG: s_mov_b32 s[[L2HI:[0-9]+]], 0x4000147a -; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 -; GCN: s_cselect_b64 s{{\[}}[[T1LO:[0-9]+]]:[[T1HI:[0-9]+]]{{\]}}, s{{\[}}[[T0LO]]:[[T0HI]]{{\]}}, s{{\[}}[[L2LO]]:[[L2HI]]{{\]}} -; GCN-DAG: s_mov_b32 s[[L3LO:[0-9]+]], 0x70a3d70a -; GCN-DAG: s_mov_b32 s[[L3HI:[0-9]+]], 0x40100a3d -; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 -; GCN: s_cselect_b64 s{{\[}}[[T2LO:[0-9]+]]:[[T2HI:[0-9]+]]{{\]}}, s{{\[}}[[T1LO]]:[[T1HI]]{{\]}}, s{{\[}}[[L3LO]]:[[L3HI]]{{\]}} -; GCN-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T2LO]] -; GCN-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T2HI]] -; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 +; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3 +; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C2]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C3]] +; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @double4_extelt(double addrspace(1)* %out, i32 %sel) { entry: %ext = extractelement <4 x double> , i32 %sel @@ -64,27 +57,19 @@ entry: ; GCN-LABEL: {{^}}double5_extelt: ; GCN-NOT: buffer_ -; GCN-DAG: s_mov_b32 s[[L0LO:[0-9]+]], 0x47ae147b -; GCN-DAG: s_mov_b32 s[[L0HI:[0-9]+]], 0x3f847ae1 -; GCN-DAG: s_mov_b32 s[[L1LO:[0-9]+]], 0xc28f5c29 -; GCN-DAG: s_mov_b32 s[[L1HI:[0-9]+]], 0x3ff028f5 ; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; GCN: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s{{\[}}[[L1LO]]:[[L1HI]]{{\]}}, s{{\[}}[[L0LO]]:[[L0HI]]{{\]}} -; GCN-DAG: s_mov_b32 s[[L2LO:[0-9]+]], 0xe147ae14 -; GCN-DAG: s_mov_b32 s[[L2HI:[0-9]+]], 0x4000147a -; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 -; GCN: s_cselect_b64 s{{\[}}[[T1LO:[0-9]+]]:[[T1HI:[0-9]+]]{{\]}}, s{{\[}}[[T0LO]]:[[T0HI]]{{\]}}, s{{\[}}[[L2LO]]:[[L2HI]]{{\]}} -; GCN-DAG: s_mov_b32 s[[L3LO:[0-9]+]], 0x70a3d70a -; GCN-DAG: s_mov_b32 s[[L3HI:[0-9]+]], 0x40100a3d -; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 -; GCN: s_cselect_b64 s{{\[}}[[T2LO:[0-9]+]]:[[T2HI:[0-9]+]]{{\]}}, s{{\[}}[[T1LO]]:[[T1HI]]{{\]}}, s{{\[}}[[L3LO]]:[[L3HI]]{{\]}} -; Double literals 5.01 and 4.01 share the same low 32 bits. -; GCN-DAG: s_mov_b32 s[[L4HI:[0-9]+]], 0x40140a3d -; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4 -; GCN: s_cselect_b64 s{{\[}}[[T3LO:[0-9]+]]:[[T3HI:[0-9]+]]{{\]}}, s{{\[}}[[T2LO]]:[[T2HI]]{{\]}}, s{{\[}}[[L3LO]]:[[L4HI]]{{\]}} -; GCN-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T3LO]] -; GCN-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T3HI]] -; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 +; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3 +; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 4 +; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C2]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C3]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C4]] +; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @double5_extelt(double addrspace(1)* %out, i32 %sel) { entry: %ext = extractelement <5 x double> , i32 %sel @@ -122,15 +107,11 @@ entry: ; GCN-LABEL: {{^}}double2_extelt: ; GCN-NOT: buffer_ -; GCN-DAG: s_mov_b32 s[[L0LO:[0-9]+]], 0x47ae147b -; GCN-DAG: s_mov_b32 s[[L0HI:[0-9]+]], 0x3f847ae1 -; GCN-DAG: s_mov_b32 s[[L1LO:[0-9]+]], 0xc28f5c29 -; GCN-DAG: s_mov_b32 s[[L1HI:[0-9]+]], 0x3ff028f5 ; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; GCN: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s{{\[}}[[L1LO]]:[[L1HI]]{{\]}}, s{{\[}}[[L0LO]]:[[L0HI]]{{\]}} -; GCN-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T0LO]] -; GCN-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T0HI]] -; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] +; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @double2_extelt(double addrspace(1)* %out, i32 %sel) { entry: %ext = extractelement <2 x double> , i32 %sel diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll index 35b2d4d8306d..094ae27b5c57 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; GCN-LABEL: {{^}}extract_vector_elt_v3f64_2: ; GCN: buffer_load_dwordx4 @@ -14,22 +14,15 @@ define amdgpu_kernel void @extract_vector_elt_v3f64_2(double addrspace(1)* %out, ; GCN-LABEL: {{^}}dyn_extract_vector_elt_v3f64: ; GCN-NOT: buffer_load -; SI-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; SI-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 -; SI-DAG: s_cmp_eq_u32 [[IDX]], 2 -; SI-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] -; SI: store_dwordx2 v[{{[0-9:]+}}] -; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; VI: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] -; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 2 -; VI: s_cselect_b64 s{{\[}}[[T1LO:[0-9]+]]:[[T1HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s{{\[}}[[T0LO]]:[[T0HI]]{{\]}} -; VI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T1LO]] -; VI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T1HI]] -; VI: store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 +; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @dyn_extract_vector_elt_v3f64(double addrspace(1)* %out, <3 x double> %foo, i32 %elt) #0 { %dynelt = extractelement <3 x double> %foo, i32 %elt store volatile double %dynelt, double addrspace(1)* %out @@ -38,28 +31,19 @@ define amdgpu_kernel void @dyn_extract_vector_elt_v3f64(double addrspace(1)* %ou ; GCN-LABEL: {{^}}dyn_extract_vector_elt_v4f64: ; GCN-NOT: buffer_load -; SI-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; SI-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 -; SI-DAG: s_cmp_eq_u32 [[IDX]], 2 -; SI-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 -; SI-DAG: s_cmp_eq_u32 [[IDX]], 3 -; SI-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] -; SI: store_dwordx2 v[{{[0-9:]+}}] -; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; VI: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] -; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 2 -; VI: s_cselect_b64 s{{\[}}[[T1LO:[0-9]+]]:[[T1HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s{{\[}}[[T0LO]]:[[T0HI]]{{\]}} -; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 3 -; VI: s_cselect_b64 s{{\[}}[[T2LO:[0-9]+]]:[[T2HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s{{\[}}[[T1LO]]:[[T1HI]]{{\]}} -; VI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T2LO]] -; VI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T2HI]] -; VI: store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 +; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3 +; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] +; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @dyn_extract_vector_elt_v4f64(double addrspace(1)* %out, <4 x double> %foo, i32 %elt) #0 { %dynelt = extractelement <4 x double> %foo, i32 %elt store volatile double %dynelt, double addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll index 985490592487..248f5fc985ee 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; How the replacement of i64 stores with v2i32 stores resulted in ; breaking other users of the bitcast if they already existed @@ -32,14 +32,10 @@ define amdgpu_kernel void @extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x ; GCN-LABEL: {{^}}dyn_extract_vector_elt_v2i64: ; GCN-NOT: buffer_load ; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; SI-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; SI: store_dwordx2 v[{{[0-9:]+}}] -; VI: s_cselect_b64 s{{\[}}[[S_LO:[0-9]+]]:[[S_HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] -; VI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[S_LO]] -; VI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[S_HI]] -; VI: store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @dyn_extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x i64> %foo, i32 %elt) #0 { %dynelt = extractelement <2 x i64> %foo, i32 %elt store volatile i64 %dynelt, i64 addrspace(1)* %out @@ -63,23 +59,16 @@ define amdgpu_kernel void @dyn_extract_vector_elt_v2i64_2(i64 addrspace(1)* %out } ; GCN-LABEL: {{^}}dyn_extract_vector_elt_v3i64: -; SI-NOT: buffer_load -; SI-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; SI-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 -; SI-DAG: s_cmp_eq_u32 [[IDX]], 2 -; SI-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] -; SI: store_dwordx2 v[{{[0-9:]+}}] -; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; VI: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] -; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 2 -; VI: s_cselect_b64 s{{\[}}[[T1LO:[0-9]+]]:[[T1HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s{{\[}}[[T0LO]]:[[T0HI]]{{\]}} -; VI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T1LO]] -; VI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T1HI]] -; VI: store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +; GCN-NOT: buffer_load +; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 +; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @dyn_extract_vector_elt_v3i64(i64 addrspace(1)* %out, <3 x i64> %foo, i32 %elt) #0 { %dynelt = extractelement <3 x i64> %foo, i32 %elt store volatile i64 %dynelt, i64 addrspace(1)* %out @@ -88,28 +77,19 @@ define amdgpu_kernel void @dyn_extract_vector_elt_v3i64(i64 addrspace(1)* %out, ; GCN-LABEL: {{^}}dyn_extract_vector_elt_v4i64: ; GCN-NOT: buffer_load -; SI-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; SI-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 -; SI-DAG: s_cmp_eq_u32 [[IDX]], 2 -; SI-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 -; SI-DAG: s_cmp_eq_u32 [[IDX]], 3 -; SI-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] -; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] -; SI: store_dwordx2 v[{{[0-9:]+}}] -; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; VI: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] -; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 2 -; VI: s_cselect_b64 s{{\[}}[[T1LO:[0-9]+]]:[[T1HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s{{\[}}[[T0LO]]:[[T0HI]]{{\]}} -; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 3 -; VI: s_cselect_b64 s{{\[}}[[T2LO:[0-9]+]]:[[T2HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s{{\[}}[[T1LO]]:[[T1HI]]{{\]}} -; VI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T2LO]] -; VI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T2HI]] -; VI: store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 +; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3 +; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] +; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @dyn_extract_vector_elt_v4i64(i64 addrspace(1)* %out, <4 x i64> %foo, i32 %elt) #0 { %dynelt = extractelement <4 x i64> %foo, i32 %elt store volatile i64 %dynelt, i64 addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll index 86fd814d95fa..80d0b3499c70 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll @@ -5,6 +5,7 @@ declare void @extern_func() #0 define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() { +<<<<<<< HEAD ; FLAT_SCR_OPT-LABEL: stack_object_addrspacecast_in_kernel_no_calls: ; FLAT_SCR_OPT: ; %bb.0: ; FLAT_SCR_OPT-NEXT: s_add_u32 s0, s0, s3 @@ -34,6 +35,24 @@ define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() { ; FLAT_SCR_ARCH-NEXT: flat_store_dword v[0:1], v2 ; FLAT_SCR_ARCH-NEXT: s_waitcnt_vscnt null, 0x0 ; FLAT_SCR_ARCH-NEXT: s_endpgm +======= +; GCN-LABEL: stack_object_addrspacecast_in_kernel_no_calls: +; GCN: ; %bb.0: +; GCN-NEXT: s_add_u32 s0, s0, s3 +; GCN-NEXT: s_addc_u32 s1, s1, 0 +; GCN-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GCN-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GCN-NEXT: v_mov_b32_e32 v0, 4 +; GCN-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16) +; GCN-NEXT: v_mov_b32_e32 v2, 0 +; GCN-NEXT: s_lshl_b32 s0, s0, 16 +; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, -1, v0 +; GCN-NEXT: v_cndmask_b32_e64 v1, 0, s0, vcc_lo +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo +; GCN-NEXT: flat_store_dword v[0:1], v2 +; GCN-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-NEXT: s_endpgm +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %alloca = alloca i32, addrspace(5) %cast = addrspacecast i32 addrspace(5)* %alloca to i32* store volatile i32 0, i32* %cast diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll index 2ea25fe7a31c..3a4923693470 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll @@ -285,18 +285,16 @@ entry: } ; GCN-LABEL: {{^}}double2_inselt: -; GCN: s_load_dwordx4 s{{\[}}[[FIRST:[0-9]+]]:[[LAST:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}] ; GCN-NOT: v_movrel ; GCN-NOT: buffer_ -; GCN: s_cmp_lg_u32 [[IDX:s[0-9]+]], 1 -; GCN: s_cselect_b64 s{{\[}}[[P0_LO:[0-9]+]]:[[P0_HI:[0-9]+]]{{\]}}, s{{\[}}{{[0-9]+}}:[[LAST]]{{\]}}, 1.0 -; GCN: s_cmp_lg_u32 [[IDX]], 0 -; GCN: s_cselect_b64 s{{\[}}[[P1_LO:[0-9]+]]:[[P1_HI:[0-9]+]]{{\]}}, s{{\[}}[[FIRST]]:{{[0-9]+}}{{\]}}, 1.0 -; GCN: v_mov_b32_e32 v[[V_FIRST:[0-9]+]], s[[P1_LO]] -; GCN: v_mov_b32_e32 v[[V_SECOND:[0-9]+]], s[[P1_HI]] -; GCN: v_mov_b32_e32 v[[V_THIRD:[0-9]+]], s[[P0_LO]] -; GCN: v_mov_b32_e32 v[[V_LAST:[0-9]+]], s[[P0_HI]] -; GCN: flat_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[V_FIRST]]:[[V_LAST]]{{\]}} +; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; GCN-DAG: s_cselect_b64 [[CC1:[^,]+]], -1, 0 +; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC1]] +; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 0, [[CC1]] +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 0 +; GCN-DAG: s_cselect_b64 [[CC2:[^,]+]], -1, 0 +; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC2]] +; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 0, [[CC2]] define amdgpu_kernel void @double2_inselt(<2 x double> addrspace(1)* %out, <2 x double> %vec, i32 %sel) { entry: %v = insertelement <2 x double> %vec, double 1.000000e+00, i32 %sel @@ -307,7 +305,7 @@ entry: ; GCN-LABEL: {{^}}double5_inselt: ; GCN-NOT: v_movrel ; GCN-NOT: buffer_ -; GCN-COUNT-5: s_cselect_b64 +; GCN-COUNT-10: v_cndmask_b32 define amdgpu_kernel void @double5_inselt(<5 x double> addrspace(1)* %out, <5 x double> %vec, i32 %sel) { entry: %v = insertelement <5 x double> %vec, double 1.000000e+00, i32 %sel diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll index bbdff9c4e897..c269811008c5 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -1627,6 +1627,7 @@ define amdgpu_kernel void @dynamic_insertelement_v2f64(<2 x double> addrspace(1) ; ; VI-LABEL: dynamic_insertelement_v2f64: ; VI: ; %bb.0: +<<<<<<< HEAD ; VI-NEXT: s_load_dword s10, s[4:5], 0x60 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x30 @@ -1638,11 +1639,27 @@ define amdgpu_kernel void @dynamic_insertelement_v2f64(<2 x double> addrspace(1) ; VI-NEXT: s_cselect_b64 s[6:7], s[6:7], s[8:9] ; VI-NEXT: s_cmp_lg_u32 s10, 0 ; VI-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9] +======= +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x30 +; VI-NEXT: s_load_dword s4, s[4:5], 0x60 +; VI-NEXT: v_mov_b32_e32 v1, 0x40200000 +; VI-NEXT: s_mov_b32 s3, 0x1100f000 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: v_mov_b32_e32 v1, s5 -; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: v_mov_b32_e32 v3, s7 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s11 +; VI-NEXT: s_cmp_eq_u32 s4, 1 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v0, s10 +; VI-NEXT: s_cmp_eq_u32 s4, 0 +; VI-NEXT: v_cndmask_b32_e64 v2, v0, 0, vcc +; VI-NEXT: v_mov_b32_e32 v0, s9 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v0, s8 +; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm %vecins = insertelement <2 x double> %a, double 8.0, i32 %b @@ -1682,6 +1699,7 @@ define amdgpu_kernel void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* % ; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) +<<<<<<< HEAD ; VI-NEXT: s_cmp_lg_u32 s8, 1 ; VI-NEXT: s_cselect_b64 s[2:3], s[2:3], 5 ; VI-NEXT: s_cmp_lg_u32 s8, 0 @@ -1691,6 +1709,21 @@ define amdgpu_kernel void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* % ; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: v_mov_b32_e32 v3, s3 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +======= +; VI-NEXT: s_cmp_eq_u32 s6, 1 +; VI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; VI-NEXT: v_mov_b32_e32 v0, s11 +; VI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[4:5] +; VI-NEXT: v_mov_b32_e32 v0, s10 +; VI-NEXT: s_cmp_eq_u32 s6, 0 +; VI-NEXT: v_cndmask_b32_e64 v2, v0, 5, s[4:5] +; VI-NEXT: v_mov_b32_e32 v0, s9 +; VI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[4:5] +; VI-NEXT: v_mov_b32_e32 v0, s8 +; VI-NEXT: v_cndmask_b32_e64 v0, v0, 5, s[4:5] +; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; VI-NEXT: s_endpgm %vecins = insertelement <2 x i64> %a, i64 5, i32 %b store <2 x i64> %vecins, <2 x i64> addrspace(1)* %out, align 8 @@ -1737,6 +1770,7 @@ define amdgpu_kernel void @dynamic_insertelement_v3i64(<3 x i64> addrspace(1)* % ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x30 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; VI-NEXT: s_waitcnt lgkmcnt(0) +<<<<<<< HEAD ; VI-NEXT: s_cmp_lg_u32 s12, 1 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_cselect_b64 s[6:7], s[10:11], 5 @@ -1751,6 +1785,27 @@ define amdgpu_kernel void @dynamic_insertelement_v3i64(<3 x i64> addrspace(1)* % ; VI-NEXT: v_mov_b32_e32 v1, s9 ; VI-NEXT: v_mov_b32_e32 v2, s6 ; VI-NEXT: v_mov_b32_e32 v3, s7 +======= +; VI-NEXT: v_mov_b32_e32 v0, s11 +; VI-NEXT: s_cmp_eq_u32 s12, 1 +; VI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; VI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[4:5] +; VI-NEXT: v_mov_b32_e32 v0, s10 +; VI-NEXT: s_cmp_eq_u32 s12, 0 +; VI-NEXT: v_cndmask_b32_e64 v2, v0, 5, s[4:5] +; VI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; VI-NEXT: v_mov_b32_e32 v0, s9 +; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[4:5] +; VI-NEXT: v_mov_b32_e32 v0, s8 +; VI-NEXT: s_cmp_eq_u32 s12, 2 +; VI-NEXT: v_cndmask_b32_e64 v0, v0, 5, s[4:5] +; VI-NEXT: v_mov_b32_e32 v4, s7 +; VI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; VI-NEXT: v_cndmask_b32_e64 v5, v4, 0, s[4:5] +; VI-NEXT: v_mov_b32_e32 v4, s6 +; VI-NEXT: v_cndmask_b32_e64 v4, v4, 5, s[4:5] +; VI-NEXT: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 offset:16 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm %vecins = insertelement <3 x i64> %a, i64 5, i32 %b @@ -1798,32 +1853,46 @@ define amdgpu_kernel void @dynamic_insertelement_v4f64(<4 x double> addrspace(1) ; ; VI-LABEL: dynamic_insertelement_v4f64: ; VI: ; %bb.0: +<<<<<<< HEAD ; VI-NEXT: s_load_dword s16, s[4:5], 0x40 ; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; VI-NEXT: s_mov_b32 s4, 0 ; VI-NEXT: s_mov_b32 s5, 0x40200000 +======= +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20 +; VI-NEXT: s_load_dword s4, s[4:5], 0x40 +; VI-NEXT: v_mov_b32_e32 v4, 0x40200000 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; VI-NEXT: s_mov_b32 s3, 0x1100f000 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_cmp_lg_u32 s16, 1 -; VI-NEXT: s_cselect_b64 s[6:7], s[10:11], s[4:5] -; VI-NEXT: s_cmp_lg_u32 s16, 0 -; VI-NEXT: s_cselect_b64 s[8:9], s[8:9], s[4:5] -; VI-NEXT: s_cmp_lg_u32 s16, 3 -; VI-NEXT: s_cselect_b64 s[10:11], s[14:15], s[4:5] -; VI-NEXT: s_cmp_lg_u32 s16, 2 -; VI-NEXT: s_cselect_b64 s[4:5], s[12:13], s[4:5] ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: v_mov_b32_e32 v1, s5 -; VI-NEXT: v_mov_b32_e32 v2, s10 -; VI-NEXT: v_mov_b32_e32 v3, s11 -; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 -; VI-NEXT: s_nop 0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s11 +; VI-NEXT: s_cmp_eq_u32 s4, 1 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v3, v0, v4, vcc +; VI-NEXT: v_mov_b32_e32 v0, s10 +; VI-NEXT: s_cmp_eq_u32 s4, 0 +; VI-NEXT: v_cndmask_b32_e64 v2, v0, 0, vcc +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_mov_b32_e32 v0, s9 +; VI-NEXT: v_cndmask_b32_e32 v1, v0, v4, vcc ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: v_mov_b32_e32 v1, s9 -; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: v_mov_b32_e32 v3, s7 +; VI-NEXT: s_cmp_eq_u32 s4, 3 +; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_mov_b32_e32 v5, s15 +; VI-NEXT: v_cndmask_b32_e32 v7, v5, v4, vcc +; VI-NEXT: v_mov_b32_e32 v5, s14 +; VI-NEXT: s_cmp_eq_u32 s4, 2 +; VI-NEXT: v_cndmask_b32_e64 v6, v5, 0, vcc +; VI-NEXT: v_mov_b32_e32 v5, s13 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc +; VI-NEXT: v_mov_b32_e32 v4, s12 +; VI-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm %vecins = insertelement <4 x double> %a, double 8.0, i32 %b diff --git a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll index 618236dd8645..af2100705794 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll @@ -345,6 +345,7 @@ define amdgpu_kernel void @smulo_i64_s(i64 %x, i64 %y) { ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +<<<<<<< HEAD ; GFX9-NEXT: s_mul_i32 s9, s0, s3 ; GFX9-NEXT: s_mul_hi_u32 s10, s0, s2 ; GFX9-NEXT: s_mul_hi_u32 s5, s0, s3 @@ -361,23 +362,48 @@ define amdgpu_kernel void @smulo_i64_s(i64 %x, i64 %y) { ; GFX9-NEXT: s_addc_u32 s5, 0, s5 ; GFX9-NEXT: s_sub_u32 s6, s4, s2 ; GFX9-NEXT: s_subb_u32 s7, s5, 0 +======= +; GFX9-NEXT: s_mul_i32 s7, s0, s3 +; GFX9-NEXT: s_mul_hi_u32 s8, s0, s2 +; GFX9-NEXT: s_mul_hi_u32 s6, s0, s3 +; GFX9-NEXT: s_add_u32 s9, s8, s7 +; GFX9-NEXT: s_mul_i32 s5, s1, s2 +; GFX9-NEXT: s_addc_u32 s6, 0, s6 +; GFX9-NEXT: s_add_u32 s9, s9, s5 +; GFX9-NEXT: s_mul_hi_u32 s4, s1, s2 +; GFX9-NEXT: s_mul_hi_i32 s10, s1, s3 +; GFX9-NEXT: s_addc_u32 s4, s6, s4 +; GFX9-NEXT: s_addc_u32 s6, s10, 0 +; GFX9-NEXT: s_mul_i32 s9, s1, s3 +; GFX9-NEXT: s_add_u32 s4, s4, s9 +; GFX9-NEXT: s_addc_u32 s6, 0, s6 +; GFX9-NEXT: s_sub_u32 s9, s4, s2 +; GFX9-NEXT: s_subb_u32 s10, s6, 0 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: s_cmp_lt_i32 s1, 0 -; GFX9-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5] -; GFX9-NEXT: s_sub_u32 s6, s4, s0 -; GFX9-NEXT: s_subb_u32 s7, s5, 0 +; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s10 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_mov_b32_e32 v2, s9 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc +; GFX9-NEXT: v_subrev_co_u32_e32 v3, vcc, s0, v2 +; GFX9-NEXT: v_subbrev_co_u32_e32 v1, vcc, 0, v0, vcc ; GFX9-NEXT: s_cmp_lt_i32 s3, 0 -; GFX9-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5] -; GFX9-NEXT: s_add_i32 s1, s10, s9 -; GFX9-NEXT: s_add_i32 s1, s1, s8 -; GFX9-NEXT: s_ashr_i32 s6, s1, 31 -; GFX9-NEXT: s_mov_b32 s7, s6 -; GFX9-NEXT: s_cmp_lg_u64 s[4:5], s[6:7] -; GFX9-NEXT: s_mul_i32 s2, s0, s2 -; GFX9-NEXT: v_mov_b32_e32 v0, s1 -; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[0:1] -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[0:1] +; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX9-NEXT: s_add_i32 s1, s8, s7 +; GFX9-NEXT: s_add_i32 s1, s1, s5 +; GFX9-NEXT: s_ashr_i32 s4, s1, 31 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; GFX9-NEXT: s_mov_b32 s5, s4 +; GFX9-NEXT: s_mul_i32 s0, s0, s2 +; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, s[4:5], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: s_endpgm ; @@ -385,6 +411,7 @@ define amdgpu_kernel void @smulo_i64_s(i64 %x, i64 %y) { ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) +<<<<<<< HEAD ; GFX10-NEXT: s_mul_i32 s9, s0, s3 ; GFX10-NEXT: s_mul_hi_u32 s10, s0, s2 ; GFX10-NEXT: s_mul_hi_u32 s5, s0, s3 @@ -401,21 +428,44 @@ define amdgpu_kernel void @smulo_i64_s(i64 %x, i64 %y) { ; GFX10-NEXT: s_addc_u32 s5, 0, s5 ; GFX10-NEXT: s_sub_u32 s6, s4, s2 ; GFX10-NEXT: s_subb_u32 s7, s5, 0 +======= +; GFX10-NEXT: s_mul_i32 s7, s0, s3 +; GFX10-NEXT: s_mul_hi_u32 s8, s0, s2 +; GFX10-NEXT: s_mul_hi_u32 s6, s0, s3 +; GFX10-NEXT: s_add_u32 s11, s8, s7 +; GFX10-NEXT: s_mul_i32 s5, s1, s2 +; GFX10-NEXT: s_addc_u32 s6, 0, s6 +; GFX10-NEXT: s_mul_hi_u32 s4, s1, s2 +; GFX10-NEXT: s_add_u32 s11, s11, s5 +; GFX10-NEXT: s_mul_hi_i32 s9, s1, s3 +; GFX10-NEXT: s_addc_u32 s4, s6, s4 +; GFX10-NEXT: s_mul_i32 s10, s1, s3 +; GFX10-NEXT: s_addc_u32 s6, s9, 0 +; GFX10-NEXT: s_add_u32 s4, s4, s10 +; GFX10-NEXT: s_addc_u32 s6, 0, s6 +; GFX10-NEXT: s_sub_u32 s9, s4, s2 +; GFX10-NEXT: s_subb_u32 s10, s6, 0 +; GFX10-NEXT: v_mov_b32_e32 v1, s9 +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX10-NEXT: s_cmp_lt_i32 s1, 0 -; GFX10-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5] -; GFX10-NEXT: s_sub_u32 s6, s4, s0 -; GFX10-NEXT: s_subb_u32 s7, s5, 0 +; GFX10-NEXT: v_mov_b32_e32 v0, s10 +; GFX10-NEXT: s_cselect_b32 vcc_lo, -1, 0 ; GFX10-NEXT: s_cmp_lt_i32 s3, 0 +; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, s6, v0, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v3, vcc_lo, v2, s0 ; GFX10-NEXT: s_mul_i32 s0, s0, s2 -; GFX10-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5] -; GFX10-NEXT: s_add_i32 s1, s10, s9 -; GFX10-NEXT: s_add_i32 s1, s1, s8 -; GFX10-NEXT: s_ashr_i32 s6, s1, 31 -; GFX10-NEXT: s_mov_b32 s7, s6 -; GFX10-NEXT: s_cmp_lg_u64 s[4:5], s[6:7] -; GFX10-NEXT: s_cselect_b32 s2, -1, 0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, s1, 0, s2 -; GFX10-NEXT: v_cndmask_b32_e64 v0, s0, 0, s2 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v0, vcc_lo +; GFX10-NEXT: s_cselect_b32 vcc_lo, -1, 0 +; GFX10-NEXT: s_add_i32 s1, s8, s7 +; GFX10-NEXT: s_add_i32 s1, s1, s5 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo +; GFX10-NEXT: s_ashr_i32 s4, s1, 31 +; GFX10-NEXT: s_mov_b32 s5, s4 +; GFX10-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[4:5], v[0:1] +; GFX10-NEXT: v_cndmask_b32_e64 v1, s1, 0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v0, s0, 0, vcc_lo ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_endpgm bb: diff --git a/llvm/test/CodeGen/AMDGPU/load-select-ptr.ll b/llvm/test/CodeGen/AMDGPU/load-select-ptr.ll index 3634cedfb0bb..407a4e5f1b76 100644 --- a/llvm/test/CodeGen/AMDGPU/load-select-ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/load-select-ptr.ll @@ -9,7 +9,8 @@ ; GCN: s_load_dwordx2 ; GCN: s_cmp_eq_u32 -; GCN: s_cselect_b64 +; GCN: v_cndmask_b32 +; GCN: v_cndmask_b32 ; GCN-NOT: load_dword ; GCN: flat_load_dwordx2 @@ -34,7 +35,8 @@ define amdgpu_kernel void @select_ptr_crash_i64_flat(i32 %tmp, [8 x i32], i64* % ; GCN: s_load_dwordx2 ; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}} ; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}} -; GCN: s_cselect_b64 +; GCN: v_cndmask_b32 +; GCN: v_cndmask_b32 ; GCN: flat_store_dwordx2 define amdgpu_kernel void @select_ptr_crash_i64_global(i32 %tmp, [8 x i32], i64 addrspace(1)* %ptr0, [8 x i32], i64 addrspace(1)* %ptr1, [8 x i32], i64 addrspace(1)* %ptr2) { %tmp2 = icmp eq i32 %tmp, 0 diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-global-uses.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-global-uses.ll new file mode 100644 index 000000000000..3ea52f9309f6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-global-uses.ll @@ -0,0 +1,55 @@ +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s + +;. +; Kernel LDS lowering. +;. +; @lds.1: is part of @llvm.used list, and also it is used within kernel, hence it is lowered. +; @lds.2: is part of @llvm.compiler.used list, and also it is used within kernel, hence it is lowered. +; @lds.3: is used as initializer to @gptr.3, hence @lds.3 is not lowered, though it is used within kernel. +; @lds.4: is used as initializer to @gptr.4, hence @lds.4 is not lowered, though it is used within kernel, +; irrespective of the uses of @gptr.4 itself ( @gptr.4 is part of llvm.compiler.used list ). +; @lds.5: is part of @llvm.used list, but is not used within kernel, hence it is not lowered. +; @lds.6: is part of @llvm.compiler.used list, but is not used within kernel, hence it is not lowered. +;. + +; CHECK: %llvm.amdgcn.kernel.k0.lds.t = type { i32, i16 } + +; CHECK-NOT: @lds.1 +; CHECK-NOT: @lds.2 +; CHECK: @lds.3 = addrspace(3) global i64 undef, align 8 +; CHECK: @lds.4 = addrspace(3) global float undef, align 4 +; CHECK: @lds.5 = addrspace(3) global i16 undef, align 2 +; CHECK: @lds.6 = addrspace(3) global i32 undef, align 4 +@lds.1 = addrspace(3) global i16 undef, align 2 +@lds.2 = addrspace(3) global i32 undef, align 4 +@lds.3 = addrspace(3) global i64 undef, align 8 +@lds.4 = addrspace(3) global float undef, align 4 +@lds.5 = addrspace(3) global i16 undef, align 2 +@lds.6 = addrspace(3) global i32 undef, align 4 + +; CHECK: @gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8 +; CHECK: @gptr.4 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (float addrspace(3)* @lds.4 to i64 addrspace(3)*) to i64*), align 8 +@gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8 +@gptr.4 = addrspace(1) global i64* addrspacecast (float addrspace(3)* @lds.4 to i64*), align 8 + +; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 4 + +; CHECK: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.5 to i8 addrspace(3)*) to i8*)], section "llvm.metadata" +; CHECK: @llvm.compiler.used = appending global [2 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.6 to i8 addrspace(3)*) to i8*)], section "llvm.metadata" +@llvm.used = appending global [2 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.1 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.5 to i8 addrspace(3)*) to i8*)], section "llvm.metadata" +@llvm.compiler.used = appending global [3 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.2 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.6 to i8 addrspace(3)*) to i8*)], section "llvm.metadata" + +; CHECK-LABEL: @k0() +; CHECK: %ld.lds.1 = load i16, i16 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 4 +; CHECK: %ld.lds.2 = load i32, i32 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 0), align 4 +; CHECK: %ld.lds.3 = load i64, i64 addrspace(3)* @lds.3, align 4 +; CHECK: %ld.lds.4 = load float, float addrspace(3)* @lds.4, align 4 +; CHECK: ret void +define amdgpu_kernel void @k0() { + %ld.lds.1 = load i16, i16 addrspace(3)* @lds.1 + %ld.lds.2 = load i32, i32 addrspace(3)* @lds.2 + %ld.lds.3 = load i64, i64 addrspace(3)* @lds.3 + %ld.lds.4 = load float, float addrspace(3)* @lds.4 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-alias.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-alias.ll new file mode 100644 index 000000000000..104c87774a72 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-alias.ll @@ -0,0 +1,93 @@ +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s + +;. +; @lds.1: is aliased with @alias.to.lds.1, and @alias.to.lds.1 is used within kernel @k0. +; Hence, @lds.1 is lowered. +; @lds.2: is aliased with @alias.to.lds.2, and @alias.to.lds.2 is used within non-kernel @f0, +; Hence, @lds.2 is lowered. +; @lds.3: is used as initializer to global @gptr.3, and @gptr.3 is aliased with @alias.to.gptr.3, +; and @alias.to.gptr.3 is used within kernel @k1. Hence, @lds.3 is lowered. +; @lds.4: is used as initializer to global @gptr.4, and @gptr.4 is aliased with @alias.to.gptr.4, +; and @alias.to.gptr.4 is used within non-kernel @f1. Hence, @lds.4 is lowered. +; @lds.5: is aliased with @alias.to.lds.5, but neither @lds.5 nor @alias.to.lds.5 is used anywhere. +; Hence, @lds.5 is not lowered. +; @lds.6: is used as initializer to global @gptr.6, and @gptr.6 is aliased with @alias.to.gptr.6. +; But none of them are used anywhere. Hence, @lds.6 is not lowered. +;. + +; CHECK: %llvm.amdgcn.module.lds.t = type { [4 x i8], [3 x i8], [1 x i8], [2 x i8] } + +; CHECK-NOT: @lds.1 +; CHECK-NOT: @lds.2 +; CHECK-NOT: @lds.3 +; CHECK-NOT: @lds.4 +; CHECK: @lds.5 = internal unnamed_addr addrspace(3) global [5 x i8] undef, align 8 +; CHECK: @lds.6 = internal unnamed_addr addrspace(3) global [6 x i8] undef, align 8 +@lds.1 = internal unnamed_addr addrspace(3) global [1 x i8] undef, align 1 +@lds.2 = internal unnamed_addr addrspace(3) global [2 x i8] undef, align 2 +@lds.3 = internal unnamed_addr addrspace(3) global [3 x i8] undef, align 4 +@lds.4 = internal unnamed_addr addrspace(3) global [4 x i8] undef, align 4 +@lds.5 = internal unnamed_addr addrspace(3) global [5 x i8] undef, align 8 +@lds.6 = internal unnamed_addr addrspace(3) global [6 x i8] undef, align 8 + +; CHECK: @gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([3 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1) to i64 addrspace(3)*) to i64*), align 8 +; CHECK: @gptr.4 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i64 addrspace(3)*) to i64*), align 8 +; CHECK: @gptr.6 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([6 x i8] addrspace(3)* @lds.6 to i64 addrspace(3)*) to i64*), align 8 +@gptr.3 = addrspace(1) global i64* addrspacecast ([3 x i8] addrspace(3)* @lds.3 to i64*), align 8 +@gptr.4 = addrspace(1) global i64* addrspacecast ([4 x i8] addrspace(3)* @lds.4 to i64*), align 8 +@gptr.6 = addrspace(1) global i64* addrspacecast ([6 x i8] addrspace(3)* @lds.6 to i64*), align 8 + +; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 4 +; CHECK: @llvm.compiler.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0, i32 0) to i8*)], section "llvm.metadata" + +; CHECK: @alias.to.lds.1 = alias [1 x i8], getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2) +; CHECK: @alias.to.lds.2 = alias [2 x i8], getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 3) +; CHECK: @alias.to.gptr.3 = alias i64*, i64* addrspace(1)* @gptr.3 +; CHECK: @alias.to.gptr.4 = alias i64*, i64* addrspace(1)* @gptr.4 +; CHECK: @alias.to.lds.5 = alias [5 x i8], [5 x i8] addrspace(3)* @lds.5 +; CHECK: @alias.to.gptr.6 = alias i64*, i64* addrspace(1)* @gptr.6 +@alias.to.lds.1 = alias [1 x i8], [1 x i8] addrspace(3)* @lds.1 +@alias.to.lds.2 = alias [2 x i8], [2 x i8] addrspace(3)* @lds.2 +@alias.to.gptr.3 = alias i64*, i64* addrspace(1)* @gptr.3 +@alias.to.gptr.4 = alias i64*, i64* addrspace(1)* @gptr.4 +@alias.to.lds.5 = alias [5 x i8], [5 x i8] addrspace(3)* @lds.5 +@alias.to.gptr.6 = alias i64*, i64* addrspace(1)* @gptr.6 + +; CHECK-LABEL: @f1 +; CHECK: %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.4, align 8 +; CHECK: ret void +define void @f1() { + %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.4 + ret void +} + +; CHECK-LABEL: @f0 +; CHECK: %bc = bitcast [2 x i8] addrspace(3)* @alias.to.lds.2 to i8 addrspace(3)* +; CHECK: store i8 1, i8 addrspace(3)* %bc, align 2 +; CHECK: ret void +define void @f0() { + %bc = bitcast [2 x i8] addrspace(3)* @alias.to.lds.2 to i8 addrspace(3)* + store i8 1, i8 addrspace(3)* %bc, align 2 + ret void +} + +; CHECK-LABEL: @k1 +; CHECK-LABEL: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ] +; CHECK-LABEL: %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.3, align 8 +; CHECK-LABEL: ret void +define amdgpu_kernel void @k1() { + %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.3 + ret void +} + +; CHECK-LABEL: @k0 +; CHECK-LABEL: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ] +; CHECK-LABEL: %bc = bitcast [1 x i8] addrspace(3)* @alias.to.lds.1 to i8 addrspace(3)* +; CHECK-LABEL: store i8 1, i8 addrspace(3)* %bc, align 1 +; CHECK-LABEL: ret void +define amdgpu_kernel void @k0() { + %bc = bitcast [1 x i8] addrspace(3)* @alias.to.lds.1 to i8 addrspace(3)* + store i8 1, i8 addrspace(3)* %bc, align 1 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-uses.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-uses.ll new file mode 100644 index 000000000000..77fcefa7944d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-uses.ll @@ -0,0 +1,88 @@ +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s + +;. +; @lds.1: is part of @llvm.used list, and is no-where used. Hence it is not lowered. +; @lds.2: is part of @llvm.compiler.used list, and is no-where used. Hence it is not lowered. +; @lds.3: is used as initializer to @gptr.3, and is no-where used. @gptr.3 itself is also not +; used anywhere else, hence @lds.3 is not lowered. +; @lds.4: is used as initializer to @gptr.4, and is no-where used. @gptr.4 is part of +; @llvm.compiler.used list, but is no-where else used. hence @lds.4 is not lowered. +; +; @lds.5: is used as initializer to @gptr.5, and is no-where used. @gptr.5 is part of +; @llvm.compiler.used list, but is also used within kernel @k0. Hence @lds.5 is lowered. +; @lds.6: is used as initializer to @gptr.6, and is no-where used. @gptr.6 is part of +; @llvm.compiler.used list, but is also used within non-kernel function @f0. Hence @lds.6 is lowered. +; @lds.7: is used as initializer to @gptr.7, and is no-where used. @gptr.7 is used as initializer to @gptr.8, +; and @gptr.8 is used within non-kernel function @f1. Hence @lds.7 is lowered. +;. + +; CHECK: %llvm.amdgcn.module.lds.t = type { [3 x float], [1 x float], [2 x float] } + +; CHECK: @lds.1 = addrspace(3) global i16 undef, align 2 +; CHECK: @lds.2 = addrspace(3) global i32 undef, align 4 +; CHECK: @lds.3 = addrspace(3) global i64 undef, align 8 +; CHECK: @lds.4 = addrspace(3) global float undef, align 4 +; CHECK-NOT: @lds.5 +; CHECK-NOT: @lds.6 +; CHECK-NOT: @lds.7 +@lds.1 = addrspace(3) global i16 undef, align 2 +@lds.2 = addrspace(3) global i32 undef, align 4 +@lds.3 = addrspace(3) global i64 undef, align 8 +@lds.4 = addrspace(3) global float undef, align 4 +@lds.5 = addrspace(3) global [1 x float] undef, align 4 +@lds.6 = addrspace(3) global [2 x float] undef, align 8 +@lds.7 = addrspace(3) global [3 x float] undef, align 16 + +; CHECK: @gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8 +; CHECK: @gptr.4 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (float addrspace(3)* @lds.4 to i64 addrspace(3)*) to i64*), align 8 +; CHECK: @gptr.5 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([1 x float] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1) to i64 addrspace(3)*) to i64*), align 8 +; CHECK: @gptr.6 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([2 x float] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2) to i64 addrspace(3)*) to i64*), align 8 +; CHECK: @gptr.7 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i64 addrspace(3)*) to i64*), align 8 +; CHECK: @gptr.8 = addrspace(1) global i64** addrspacecast (i64* addrspace(1)* @gptr.7 to i64**), align 8 +@gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8 +@gptr.4 = addrspace(1) global i64* addrspacecast (float addrspace(3)* @lds.4 to i64*), align 8 +@gptr.5 = addrspace(1) global i64* addrspacecast ([1 x float] addrspace(3)* @lds.5 to i64*), align 8 +@gptr.6 = addrspace(1) global i64* addrspacecast ([2 x float] addrspace(3)* @lds.6 to i64*), align 8 +@gptr.7 = addrspace(1) global i64* addrspacecast ([3 x float] addrspace(3)* @lds.7 to i64*), align 8 +@gptr.8 = addrspace(1) global i64** addrspacecast (i64* addrspace(1)* @gptr.7 to i64**), align 8 + +; CHECK: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.1 to i8 addrspace(3)*) to i8*)], section "llvm.metadata" +; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 16 +; CHECK: @llvm.compiler.used = appending global [5 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.2 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i8 addrspace(3)*) to i8*)], section "llvm.metadata" +@llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.1 to i8 addrspace(3)*) to i8*)], section "llvm.metadata" +@llvm.compiler.used = appending global [4 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.2 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i8 addrspace(1)*) to i8*)], section "llvm.metadata" + +; CHECK-LABEL: @f1() +; CHECK: %ld = load i64**, i64** addrspace(1)* @gptr.8, align 8 +; CHECK: ret void +define void @f1() { + %ld = load i64**, i64** addrspace(1)* @gptr.8 + ret void +} + +; CHECK-LABEL: @f0() +; CHECK: %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 +; CHECK: addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4 +; CHECK: ret void +define void @f0() { + %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4 + ret void +} + +; CHECK-LABEL: @k0() +; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ] +; CHECK: %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 +; CHECK: addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4 +; CHECK: ret void +define amdgpu_kernel void @k0() { + %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4 + ret void +} + +; CHECK-LABEL: @k1() +; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ] +; CHECK: ret void +define amdgpu_kernel void @k1() { + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect.ll new file mode 100644 index 000000000000..c3fd1c0f9e82 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect.ll @@ -0,0 +1,39 @@ +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s + +; CHECK: %llvm.amdgcn.module.lds.t = type { double, float } + +; CHECK: @function_indirect = addrspace(1) global float* addrspacecast (float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1) to float*), align 8 + +; CHECK: @kernel_indirect = addrspace(1) global double* addrspacecast (double addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0) to double*), align 8 + +; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 8 + +@function_target = addrspace(3) global float undef, align 4 +@function_indirect = addrspace(1) global float* addrspacecast (float addrspace(3)* @function_target to float*), align 8 + +@kernel_target = addrspace(3) global double undef, align 8 +@kernel_indirect = addrspace(1) global double* addrspacecast (double addrspace(3)* @kernel_target to double*), align 8 + +; CHECK-LABEL: @function(float %x) +; CHECK: %0 = load float*, float* addrspace(1)* @function_indirect, align 8 +define void @function(float %x) local_unnamed_addr #5 { +entry: + %0 = load float*, float* addrspace(1)* @function_indirect, align 8 + store float %x, float* %0, align 4 + ret void +} + +; CHECK-LABEL: @kernel(double %x) +; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ] +; CHECK: %0 = load double*, double* addrspace(1)* @kernel_indirect, align 8 +define amdgpu_kernel void @kernel(double %x) local_unnamed_addr #5 { +entry: + %0 = load double*, double* addrspace(1)* @kernel_indirect, align 8 + store double %x, double* %0, align 8 + ret void +} + + + + diff --git a/llvm/test/CodeGen/AMDGPU/select64.ll b/llvm/test/CodeGen/AMDGPU/select64.ll index 5a8b83c52370..ad4a5a6d1cb4 100644 --- a/llvm/test/CodeGen/AMDGPU/select64.ll +++ b/llvm/test/CodeGen/AMDGPU/select64.ll @@ -1,9 +1,14 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefix=SI %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=VI %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefix=GFX90A %s +; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=SI,GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=VI,GCN %s +; GCN-LABEL: {{^}}select0: +; i64 select should be split into two i32 selects, and we shouldn't need +; to use a shfit to extract the hi dword of the input. +; GCN-NOT: s_lshr_b64 +; GCN: v_cndmask +; GCN: v_cndmask define amdgpu_kernel void @select0(i64 addrspace(1)* %out, i32 %cond, i64 %in) { +<<<<<<< HEAD ; SI-LABEL: select0: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dword s6, s[0:1], 0xb @@ -48,6 +53,8 @@ define amdgpu_kernel void @select0(i64 addrspace(1)* %out, i32 %cond, i64 %in) { ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. entry: %0 = icmp ugt i32 %cond, 5 %1 = select i1 %0, i64 0, i64 %in @@ -55,7 +62,13 @@ entry: ret void } +; GCN-LABEL: {{^}}select_trunc_i64: +; VI: s_cselect_b32 +; VI-NOT: s_cselect_b32 +; SI: v_cndmask_b32 +; SI-NOT: v_cndmask_b32 define amdgpu_kernel void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i64 %in) nounwind { +<<<<<<< HEAD ; SI-LABEL: select_trunc_i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -97,6 +110,8 @@ define amdgpu_kernel void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %cmp = icmp ugt i32 %cond, 5 %sel = select i1 %cmp, i64 0, i64 %in %trunc = trunc i64 %sel to i32 @@ -104,7 +119,13 @@ define amdgpu_kernel void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i ret void } +; GCN-LABEL: {{^}}select_trunc_i64_2: +; VI: s_cselect_b32 +; VI-NOT: s_cselect_b32 +; SI: v_cndmask_b32 +; SI-NOT: v_cndmask_b32 define amdgpu_kernel void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 %a, i64 %b) nounwind { +<<<<<<< HEAD ; SI-LABEL: select_trunc_i64_2: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s8, s[0:1], 0xb @@ -147,6 +168,8 @@ define amdgpu_kernel void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %cmp = icmp ugt i32 %cond, 5 %sel = select i1 %cmp, i64 %a, i64 %b %trunc = trunc i64 %sel to i32 @@ -154,7 +177,13 @@ define amdgpu_kernel void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, ret void } +; GCN-LABEL: {{^}}v_select_trunc_i64_2: +; VI: s_cselect_b32 +; VI-NOT: s_cselect_b32 +; SI: v_cndmask_b32 +; SI-NOT: v_cndmask_b32 define amdgpu_kernel void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { +<<<<<<< HEAD ; SI-LABEL: v_select_trunc_i64_2: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd @@ -206,6 +235,8 @@ define amdgpu_kernel void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %con ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %cmp = icmp ugt i32 %cond, 5 %a = load i64, i64 addrspace(1)* %aptr, align 8 %b = load i64, i64 addrspace(1)* %bptr, align 8 @@ -215,7 +246,12 @@ define amdgpu_kernel void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %con ret void } +; GCN-LABEL: {{^}}v_select_i64_split_imm: +; GCN-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}} +; GCN-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 63, {{v[0-9]+}} +; GCN: s_endpgm define amdgpu_kernel void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { +<<<<<<< HEAD ; SI-LABEL: v_select_i64_split_imm: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd @@ -270,6 +306,8 @@ define amdgpu_kernel void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %c ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm +======= +>>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %cmp = icmp ugt i32 %cond, 5 %a = load i64, i64 addrspace(1)* %aptr, align 8 %b = load i64, i64 addrspace(1)* %bptr, align 8 diff --git a/llvm/test/CodeGen/AMDGPU/selectcc.ll b/llvm/test/CodeGen/AMDGPU/selectcc.ll index 48127d493fbc..54a26a4cf676 100644 --- a/llvm/test/CodeGen/AMDGPU/selectcc.ll +++ b/llvm/test/CodeGen/AMDGPU/selectcc.ll @@ -1,6 +1,6 @@ ; RUN: llc -verify-machineinstrs -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI -check-prefix=FUNC %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}selectcc_i64: ; EG: XOR_INT @@ -9,10 +9,9 @@ ; EG: CNDE_INT ; EG: CNDE_INT ; SI: v_cmp_eq_u64 -; SI: v_cndmask -; SI: v_cndmask ; VI: s_cmp_eq_u64 -; VI: s_cselect_b64 +; GCN: v_cndmask +; GCN: v_cndmask define amdgpu_kernel void @selectcc_i64(i64 addrspace(1) * %out, i64 %lhs, i64 %rhs, i64 %true, i64 %false) { entry: %0 = icmp eq i64 %lhs, %rhs diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll index 651567fe602a..62ae206572b6 100644 --- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll @@ -16,10 +16,10 @@ define amdgpu_kernel void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 ; GCN-LABEL: {{^}}sint_to_fp_i1_f64: ; VI-DAG: s_cmp_eq_u32 -; VI-DAG: s_cselect_b64 s{{\[}}[[S_LO:[0-9]+]]:[[S_HI:[0-9]+]]{{\]}}, -1.0, 0 -; VI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[S_LO]] -; VI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[S_HI]] -; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +; VI-DAG: s_cselect_b32 s[[SSEL:[0-9]+]], 0xbff00000, 0 +; VI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} +; VI-DAG: v_mov_b32_e32 v[[SEL:[0-9]+]], s[[SSEL]] +; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[ZERO]]:[[SEL]]{{\]}} ; VI: s_endpgm ; SI-DAG: s_cmp_eq_u32 diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll index d35af1510218..1f26cd39c4b8 100644 --- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll @@ -76,15 +76,13 @@ define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1) ; GCN-LABEL: {{^}}uint_to_fp_i1_to_f64: ; VI-DAG: s_cmp_eq_u32 -; VI-DAG: s_cselect_b64 s{{\[}}[[S_LO:[0-9]+]]:[[S_HI:[0-9]+]]{{\]}}, 1.0, 0 -; VI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[S_LO]] -; VI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[S_HI]] -; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +; VI-DAG: s_cselect_b32 s[[SSEL:[0-9]+]], 0x3ff00000, 0 +; VI-DAG: v_mov_b32_e32 v[[SEL:[0-9]+]], s[[SSEL]] ; SI-DAG: s_cmp_eq_u32 ; SI-DAG: s_cselect_b64 vcc, -1, 0 ; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, {{v[0-9]+}}, vcc -; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} -; SI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[ZERO]]:[[SEL]]{{\]}} +; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} +; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[ZERO]]:[[SEL]]{{\]}} ; GCN: s_endpgm define amdgpu_kernel void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) { %cmp = icmp eq i32 %in, 0 diff --git a/llvm/test/CodeGen/X86/peep-test-5.ll b/llvm/test/CodeGen/X86/peep-test-5.ll new file mode 100644 index 000000000000..2530cf6576b8 --- /dev/null +++ b/llvm/test/CodeGen/X86/peep-test-5.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -o - %s -mtriple=x86_64-- | FileCheck %s +; Example of a decref operation with "immortal" objects. +; void decref(long* refcount) { +; long count = *refcount; +; if (count == 1) { free_object() } +; else if (count > 1) { *refcount = count - 1; } +; else { /* immortal */ } +; } +; Resulting assembly should share flags from single CMP instruction for both +; conditions! +define void @decref(i32* %p) { +; CHECK-LABEL: decref: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: cmpl $1, %eax +; CHECK-NEXT: jne .LBB0_2 +; CHECK-NEXT: # %bb.1: # %bb_free +; CHECK-NEXT: callq free_object@PLT +; CHECK-NEXT: .LBB0_4: # %end +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_2: # %bb2 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: jle .LBB0_4 +; CHECK-NEXT: # %bb.3: # %bb_dec +; CHECK-NEXT: decl %eax +; CHECK-NEXT: movl %eax, (%rdi) +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %count = load i32, i32* %p, align 4 + %cmp0 = icmp eq i32 %count, 1 + br i1 %cmp0, label %bb_free, label %bb2 + +bb2: + %cmp1 = icmp sgt i32 %count, 1 + br i1 %cmp1, label %bb_dec, label %end + +bb_dec: + %dec = add nsw i32 %count, -1 + store i32 %dec, i32* %p, align 4 + br label %end + +bb_free: + call void @free_object() + br label %end + +end: + ret void +} + +declare void @free_object() diff --git a/llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_weak_defs_extra.s b/llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_weak_defs_extra.s new file mode 100644 index 000000000000..b25bb8a3079b --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_weak_defs_extra.s @@ -0,0 +1,19 @@ +# Supplies a weak def, WeakDef, and a pointer holding its address, +# WeakDefAddrInExtraFile. + + .section __TEXT,__text,regular,pure_instructions + .build_version macos, 10, 14 sdk_version 10, 14 + .section __DATA,__data + .globl WeakDef + .weak_definition WeakDef + .p2align 2 +WeakDef: + .long 2 + + .globl WeakDefAddrInExtraFile + .p2align 3 +WeakDefAddrInExtraFile: + .quad WeakDef + + +.subsections_via_symbols diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_skip_debug_sections.s b/llvm/test/ExecutionEngine/JITLink/X86/MachO_skip_debug_sections.s new file mode 100644 index 000000000000..4d43ade6f3b7 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_skip_debug_sections.s @@ -0,0 +1,21 @@ +# REQUIRES: asserts +# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj -o %t %s +# RUN: llvm-jitlink -debug-only=jitlink -noexec %t 2>&1 | FileCheck %s +# +# Check that debug sections are not emitted, and consequently that we don't +# error out due to buggy past-the-end anonymous relocations in __debug_ranges. +# +# CHECK: __debug_ranges is a debug section: No graph section will be created. + .section __TEXT,__text,regular,pure_instructions + .macosx_version_min 10, 15 + .globl _main + .p2align 4, 0x90 +_main: + retq +Lpast_the_end: + + .section __DWARF,__debug_ranges + .p2align 4 + .quad Lpast_the_end + +.subsections_via_symbols diff --git a/llvm/test/Transforms/LICM/no-hoist-prof.ll b/llvm/test/Transforms/LICM/no-hoist-prof.ll new file mode 100644 index 000000000000..1775ecc21c4d --- /dev/null +++ b/llvm/test/Transforms/LICM/no-hoist-prof.ll @@ -0,0 +1,88 @@ +; RUN: opt -passes='sample-profile,function(loop-mssa(licm))' -aa-pipeline=basic-aa -S -sample-profile-file='%S/Inputs/no-hoist-prof.prof' < %s | FileCheck %s --check-prefix=CHECK-BFI-LICM +; RUN: opt -passes=licm -S < %s | FileCheck %s --check-prefix=CHECK-LICM + +; Original source code: +; +; int bar(int); +; int foo(int iter, int explode) { +; int base = bar(explode); +; for (int i = 0; i != iter; ++i) +; if (i == explode) +; iter = (base * base) + bar(iter); +; return iter; +; } + +; We need debug information in this .ll in order to leverage the pgo file, so: +; .ll generated by running `clang++ -O3 -g -S -emit-llvm`, then: +; - move hoisted mul back into cold section +; - give labels names +; - reindex variables +; - remove metadata calls, attributes, module header +; - remove unnecessary metadata + +; CHECK-LICM: .l.check.preheader:{{.*}} +; CHECK-LICM-NEXT: {{.*}} = mul {{.*}} +; CHECK-LICM-NEXT: br{{.*}} + +; CHECK-BFI-LICM: .l.cold:{{.*}} +; CHECK-BFI-LICM-NEXT: {{.*}} = mul {{.*}} + +define dso_local i32 @_Z3fooii(i32, i32) local_unnamed_addr #0 !dbg !7 { + %3 = tail call i32 @_Z3bari(i32 %1), !dbg !19 + %4 = icmp eq i32 %0, 0, !dbg !22 + br i1 %4, label %.l.ret, label %.l.check.preheader, !dbg !24 + +.l.check.preheader: + br label %.l.check, !dbg !24 + +.l.ret: + %5 = phi i32 [ 0, %2 ], [ %12, %.l.iterate ] + ret i32 %5, !dbg !25 + +.l.check: + %6 = phi i32 [ 0, %.l.check.preheader ], [ %13, %.l.iterate ] + %7 = phi i32 [ %0, %.l.check.preheader ], [ %12, %.l.iterate ] + %8 = icmp eq i32 %6, %1, !dbg !26 + br i1 %8, label %.l.cold, label %.l.iterate, !dbg !28 + +.l.cold: + %9 = mul nsw i32 %3, %3 + %10 = tail call i32 @_Z3bari(i32 %7), !dbg !29 + %11 = add nsw i32 %10, %9, !dbg !30 + br label %.l.iterate, !dbg !31 + +.l.iterate: + %12 = phi i32 [ %11, %.l.cold ], [ %7, %.l.check ] + %13 = add nuw nsw i32 %6, 1, !dbg !32 + %14 = icmp eq i32 %13, %12, !dbg !22 + br i1 %14, label %.l.ret, label %.l.check, !dbg !24, !llvm.loop !33 +} + +attributes #0 = { "use-sample-profile" } + +declare dso_local i32 @_Z3bari(i32) local_unnamed_addr #1 + +!llvm.module.flags = !{!4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 8.0.20181009 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, nameTableKind: None) +!1 = !DIFile(filename: "foo.cpp", directory: "/tmp/gather_pgo") +!4 = !{i32 2, !"Debug Info Version", i32 3} +!7 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooii", scope: !1, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!16 = distinct !DILexicalBlock(scope: !7, file: !1, line: 4, column: 3) +!19 = !DILocation(line: 3, column: 14, scope: !7) +!22 = !DILocation(line: 4, column: 21, scope: !23) +!23 = distinct !DILexicalBlock(scope: !16, file: !1, line: 4, column: 3) +!24 = !DILocation(line: 4, column: 3, scope: !16) +!25 = !DILocation(line: 7, column: 3, scope: !7) +!26 = !DILocation(line: 5, column: 11, scope: !27) +!27 = distinct !DILexicalBlock(scope: !23, file: !1, line: 5, column: 9) +!28 = !DILocation(line: 5, column: 9, scope: !23) +!29 = !DILocation(line: 6, column: 30, scope: !27) +!30 = !DILocation(line: 6, column: 28, scope: !27) +!31 = !DILocation(line: 6, column: 7, scope: !27) +!32 = !DILocation(line: 4, column: 30, scope: !23) +!33 = distinct !{!33, !24, !34} +!34 = !DILocation(line: 6, column: 38, scope: !16) diff --git a/llvm/test/Transforms/LICM/sink.ll b/llvm/test/Transforms/LICM/sink.ll new file mode 100644 index 000000000000..d82168b147cc --- /dev/null +++ b/llvm/test/Transforms/LICM/sink.ll @@ -0,0 +1,69 @@ +; RUN: opt -S -licm -licm-coldness-threshold=0 < %s | FileCheck %s --check-prefix=CHECK-LICM +; RUN: opt -S -licm < %s | opt -S -loop-sink | FileCheck %s --check-prefix=CHECK-SINK +; RUN: opt -S < %s -passes='require,loop-mssa(licm),loop-sink' \ +; RUN: | FileCheck %s --check-prefix=CHECK-SINK +; RUN: opt -S -licm -licm-coldness-threshold=0 -verify-memoryssa < %s | FileCheck %s --check-prefix=CHECK-LICM +; RUN: opt -S -licm -verify-memoryssa < %s | FileCheck %s --check-prefix=CHECK-BFI-LICM + +; Original source code: +; int g; +; int foo(int p, int x) { +; for (int i = 0; i != x; i++) +; if (__builtin_expect(i == p, 0)) { +; x += g; x *= g; +; } +; return x; +; } +; +; Load of global value g should not be hoisted to preheader. + +@g = global i32 0, align 4 + +define i32 @foo(i32, i32) #0 !prof !2 { + %3 = icmp eq i32 %1, 0 + br i1 %3, label %._crit_edge, label %.lr.ph.preheader + +.lr.ph.preheader: + br label %.lr.ph + +; CHECK-LICM: .lr.ph.preheader: +; CHECK-LICM: load i32, i32* @g +; CHECK-LICM: br label %.lr.ph + +; CHECK-BFI-LICM: .lr.ph.preheader: +; CHECK-BFI-LICM-NOT: load i32, i32* @g +; CHECK-BFI-LICM: br label %.lr.ph + +.lr.ph: + %.03 = phi i32 [ %8, %.combine ], [ 0, %.lr.ph.preheader ] + %.012 = phi i32 [ %.1, %.combine ], [ %1, %.lr.ph.preheader ] + %4 = icmp eq i32 %.03, %0 + br i1 %4, label %.then, label %.combine, !prof !1 + +.then: + %5 = load i32, i32* @g, align 4 + %6 = add nsw i32 %5, %.012 + %7 = mul nsw i32 %6, %5 + br label %.combine + +; CHECK-SINK: .then: +; CHECK-SINK: load i32, i32* @g +; CHECK-SINK: br label %.combine + +.combine: + %.1 = phi i32 [ %7, %.then ], [ %.012, %.lr.ph ] + %8 = add nuw nsw i32 %.03, 1 + %9 = icmp eq i32 %8, %.1 + br i1 %9, label %._crit_edge.loopexit, label %.lr.ph + +._crit_edge.loopexit: + %.1.lcssa = phi i32 [ %.1, %.combine ] + br label %._crit_edge + +._crit_edge: + %.01.lcssa = phi i32 [ 0, %2 ], [ %.1.lcssa, %._crit_edge.loopexit ] + ret i32 %.01.lcssa +} + +!1 = !{!"branch_weights", i32 1, i32 2000} +!2 = !{!"function_entry_count", i64 1} diff --git a/llvm/test/Verifier/dbg-invalid-enum-as-scope.ll b/llvm/test/Verifier/dbg-invalid-enum-as-scope.ll new file mode 100644 index 000000000000..4053d4aede2e --- /dev/null +++ b/llvm/test/Verifier/dbg-invalid-enum-as-scope.ll @@ -0,0 +1,16 @@ +; RUN: llvm-as -disable-output <%s 2>&1 | FileCheck %s +; CHECK: enum type is not a scope; check enum type ODR violation +; CHECK: warning: ignoring invalid debug info + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!llvm.dbg.cu = !{!1} +!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !13, enums: !3) +!2 = !DIFile(filename: "file.c", directory: "dir") +!3 = !{!4} +!4 = distinct !DICompositeType(tag: DW_TAG_enumeration_type, name: "Stage", file: !2, line: 3, baseType: !10, size: 32, elements: !11, identifier: "_ZTS5Stage") +!6 = !DIDerivedType(tag: DW_TAG_member, name: "Var", scope: !4, file: !2, line: 5, baseType: !10) +!10 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!11 = !{!12} +!12 = !DIEnumerator(name: "A1", value: 0, isUnsigned: true) +!13 = !{!6} diff --git a/llvm/test/tools/llvm-nm/lit.local.cfg b/llvm/test/tools/llvm-nm/lit.local.cfg new file mode 100644 index 000000000000..c8625f4d9d24 --- /dev/null +++ b/llvm/test/tools/llvm-nm/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'X86' in config.root.targets: + config.unsupported = True From 06cc2f2f122ab2ce6c2ffb5cd6f87f0334919ca3 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Wed, 5 Jan 2022 18:38:10 +0100 Subject: [PATCH 709/992] [mlir] Align LLVM_Type ODS constraint on type verifiers Verify only the outer type being LLVM-compatible, the elemental types if present are already checked by the type verifiers. This makes some LLVM dialect operations compatible with mixed-dialect types that appear during progressive lowering. Reviewed By: wsmoses Differential Revision: https://reviews.llvm.org/D116671 --- mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td | 2 +- mlir/test/Dialect/LLVMIR/types.mlir | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td index f4f73a8ddb14..386bd76bf1d5 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td @@ -57,7 +57,7 @@ def LLVM_Dialect : Dialect { // LLVM dialect type. def LLVM_Type : DialectType, + CPred<"::mlir::LLVM::isCompatibleOuterType($_self)">, "LLVM dialect-compatible type">; // Type constraint accepting LLVM token type. diff --git a/mlir/test/Dialect/LLVMIR/types.mlir b/mlir/test/Dialect/LLVMIR/types.mlir index 9a53f56ce70e..6e1a571d0ca1 100644 --- a/mlir/test/Dialect/LLVMIR/types.mlir +++ b/mlir/test/Dialect/LLVMIR/types.mlir @@ -178,9 +178,11 @@ func @verbose() { // CHECK-LABEL: @ptr_elem_interface // CHECK-COUNT-3: !llvm.ptr +// CHECK: llvm.mlir.undef : !llvm.ptr func @ptr_elem_interface(%arg0: !llvm.ptr) { %0 = llvm.load %arg0 : !llvm.ptr llvm.store %0, %arg0 : !llvm.ptr + llvm.mlir.undef : !llvm.ptr return } From e2165e096869ef5bd5150c150a1cbfc25f693a0f Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 5 Jan 2022 11:53:59 -0500 Subject: [PATCH 710/992] [InstCombine] remove trunc user restriction for match of bswap This does not appear to cause any problems, and it fixes #50910 Extra tests with a trunc user were added with: 3a239379 ...but they don't match either way, so there's an opportunity to improve the matching further. --- llvm/lib/Transforms/Utils/Local.cpp | 6 +----- llvm/test/Transforms/InstCombine/bswap.ll | 5 +---- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index ecad79b68185..efc53968b1e9 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3147,11 +3147,6 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( if (!ITy->isIntOrIntVectorTy() || ITy->getScalarSizeInBits() > 128) return false; // Can't do integer/elements > 128 bits. - Type *DemandedTy = ITy; - if (I->hasOneUse()) - if (auto *Trunc = dyn_cast(I->user_back())) - DemandedTy = Trunc->getType(); - // Try to find all the pieces corresponding to the bswap. bool FoundRoot = false; std::map> BPS; @@ -3165,6 +3160,7 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( "Illegal bit provenance index"); // If the upper bits are zero, then attempt to perform as a truncated op. + Type *DemandedTy = ITy; if (BitProvenance.back() == BitPart::Unset) { while (!BitProvenance.empty() && BitProvenance.back() == BitPart::Unset) BitProvenance = BitProvenance.drop_back(); diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index 7777e4fa3ad0..86d8718073d5 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -919,10 +919,7 @@ declare i64 @llvm.bswap.i64(i64) define i32 @PR50910(i64 %t0) { ; CHECK-LABEL: @PR50910( -; CHECK-NEXT: [[T2:%.*]] = and i64 [[T0:%.*]], 72057594037927935 -; CHECK-NEXT: [[T3:%.*]] = call i64 @llvm.bswap.i64(i64 [[T2]]) -; CHECK-NEXT: [[T4:%.*]] = lshr i64 [[T0]], 56 -; CHECK-NEXT: [[T5:%.*]] = or i64 [[T3]], [[T4]] +; CHECK-NEXT: [[T5:%.*]] = call i64 @llvm.bswap.i64(i64 [[T0:%.*]]) ; CHECK-NEXT: [[T6:%.*]] = trunc i64 [[T5]] to i32 ; CHECK-NEXT: ret i32 [[T6]] ; From dde7388ad5bb19ddd1cb2ac4669ff3012723dd69 Mon Sep 17 00:00:00 2001 From: Groverkss Date: Wed, 5 Jan 2022 23:19:36 +0530 Subject: [PATCH 711/992] [MLIR] Add clearAndCopyFrom to IntegerPolyhedron This patch adds clearAndCopyFrom to IntegerPolyhedron. This requires moving LLVM-style RTTI from FlatAffineConstraints to IntegerPolyhedron. This patch is part of a series of patches to move presburger math to Presburger directory. Reviewed By: arjunp Differential Revision: https://reviews.llvm.org/D116533 --- mlir/include/mlir/Analysis/AffineStructures.h | 15 +++++----- .../Analysis/Presburger/IntegerPolyhedron.h | 15 ++++++++++ mlir/lib/Analysis/AffineStructures.cpp | 28 +++++++++++++------ .../Analysis/Presburger/IntegerPolyhedron.cpp | 4 +++ 4 files changed, 45 insertions(+), 17 deletions(-) diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h index bb76b4ff13d2..3991628a6100 100644 --- a/mlir/include/mlir/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Analysis/AffineStructures.h @@ -59,9 +59,6 @@ struct MutableAffineMap; /// class FlatAffineConstraints : public IntegerPolyhedron { public: - /// All derived classes of FlatAffineConstraints. - enum class Kind { FlatAffineConstraints, FlatAffineValueConstraints }; - /// Constructs a constraint system reserving memory for the specified number /// of constraints and identifiers. FlatAffineConstraints(unsigned numReservedInequalities, @@ -99,9 +96,11 @@ class FlatAffineConstraints : public IntegerPolyhedron { virtual ~FlatAffineConstraints() = default; /// Return the kind of this FlatAffineConstraints. - virtual Kind getKind() const { return Kind::FlatAffineConstraints; } + Kind getKind() const override { return Kind::FlatAffineConstraints; } - static bool classof(const FlatAffineConstraints *cst) { return true; } + static bool classof(const IntegerPolyhedron *cst) { + return cst->getKind() == Kind::FlatAffineConstraints; + } /// Checks for emptiness by performing variable elimination on all /// identifiers, running the GCD test on each equality constraint, and @@ -250,7 +249,7 @@ class FlatAffineConstraints : public IntegerPolyhedron { LogicalResult unionBoundingBox(const FlatAffineConstraints &other); /// Replaces the contents of this FlatAffineConstraints with `other`. - virtual void clearAndCopyFrom(const FlatAffineConstraints &other); + void clearAndCopyFrom(const IntegerPolyhedron &other) override; /// Returns the smallest known constant bound for the extent of the specified /// identifier (pos^th), i.e., the smallest known constant that is greater @@ -499,7 +498,7 @@ class FlatAffineValueConstraints : public FlatAffineConstraints { /// Return the kind of this FlatAffineConstraints. Kind getKind() const override { return Kind::FlatAffineValueConstraints; } - static bool classof(const FlatAffineConstraints *cst) { + static bool classof(const IntegerPolyhedron *cst) { return cst->getKind() == Kind::FlatAffineValueConstraints; } @@ -698,7 +697,7 @@ class FlatAffineValueConstraints : public FlatAffineConstraints { bool areIdsAlignedWithOther(const FlatAffineValueConstraints &other); /// Replaces the contents of this FlatAffineValueConstraints with `other`. - void clearAndCopyFrom(const FlatAffineConstraints &other) override; + void clearAndCopyFrom(const IntegerPolyhedron &other) override; /// Returns the Value associated with the pos^th identifier. Asserts if /// no Value identifier was associated. diff --git a/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h b/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h index 9185ea14cd53..eae56e3acd0a 100644 --- a/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h +++ b/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h @@ -50,6 +50,13 @@ namespace mlir { /// class IntegerPolyhedron { public: + /// All derived classes of IntegerPolyhedron. + enum class Kind { + FlatAffineConstraints, + FlatAffineValueConstraints, + IntegerPolyhedron + }; + /// Kind of identifier (column). enum IdKind { Dimension, Symbol, Local }; @@ -77,6 +84,11 @@ class IntegerPolyhedron { virtual ~IntegerPolyhedron() = default; + /// Return the kind of this IntegerPolyhedron. + virtual Kind getKind() const { return Kind::IntegerPolyhedron; } + + static bool classof(const IntegerPolyhedron *cst) { return true; } + // Clones this object. std::unique_ptr clone() const; @@ -189,6 +201,9 @@ class IntegerPolyhedron { /// values and removes them. void setAndEliminate(unsigned pos, ArrayRef values); + /// Replaces the contents of this IntegerPolyhedron with `other`. + virtual void clearAndCopyFrom(const IntegerPolyhedron &other); + /// Gather positions of all lower and upper bounds of the identifier at `pos`, /// and optionally any equalities on it. In addition, the bounds are to be /// independent of identifiers in position range [`offset`, `offset` + `num`). diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index b6f592a71023..6c11bf74a414 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -2643,25 +2643,35 @@ void FlatAffineConstraints::removeTrivialRedundancy() { // the savings. } -void FlatAffineConstraints::clearAndCopyFrom( - const FlatAffineConstraints &other) { +void FlatAffineConstraints::clearAndCopyFrom(const IntegerPolyhedron &other) { if (auto *otherValueSet = dyn_cast(&other)) assert(!otherValueSet->hasValues() && "cannot copy associated Values into FlatAffineConstraints"); - // Note: Assigment operator does not vtable pointer, so kind does not change. - *this = other; + + // Note: Assigment operator does not vtable pointer, so kind does not + // change. + if (auto *otherValueSet = dyn_cast(&other)) + *this = *otherValueSet; + else + *static_cast(this) = other; } void FlatAffineValueConstraints::clearAndCopyFrom( - const FlatAffineConstraints &other) { + const IntegerPolyhedron &other) { + if (auto *otherValueSet = dyn_cast(&other)) { *this = *otherValueSet; - } else { - *static_cast(this) = other; - values.clear(); - values.resize(numIds, None); + return; } + + if (auto *otherValueSet = dyn_cast(&other)) + *static_cast(this) = *otherValueSet; + else + *static_cast(this) = other; + + values.clear(); + values.resize(numIds, None); } static std::pair diff --git a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp index 03de0e97aaee..06b39cbc0f8b 100644 --- a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp +++ b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp @@ -303,6 +303,10 @@ void IntegerPolyhedron::setAndEliminate(unsigned pos, removeIdRange(pos, pos + values.size()); } +void IntegerPolyhedron::clearAndCopyFrom(const IntegerPolyhedron &other) { + *this = other; +} + void IntegerPolyhedron::printSpace(raw_ostream &os) const { os << "\nConstraints (" << getNumDimIds() << " dims, " << getNumSymbolIds() << " symbols, " << getNumLocalIds() << " locals), (" << getNumConstraints() From 085f078307bac264301b07f6e47e2a04e90a6f1d Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 5 Jan 2022 13:09:25 -0500 Subject: [PATCH 712/992] Revert "Revert D109159 "[amdgpu] Enable selection of `s_cselect_b64`."" This reverts commit 859ebca744e634dcc89a2294ffa41574f947bd62. The change contained many unrelated changes and e.g. restored unit test failes for the old lld port. --- .../clangd/unittests/TestScheme.h | 0 clang/lib/Driver/ToolChains/HIP.cpp | 460 ----- clang/lib/Driver/ToolChains/HIP.h | 108 -- .../sanitizer-blacklist-vfsoverlay.yaml | 15 - .../catch-alignment-assumption-blacklist.c | 28 - ...tch-nullptr-and-nonzero-offset-blacklist.c | 34 - clang/test/CodeGen/ubsan-blacklist.c | 28 - clang/test/CodeGenCXX/cfi-blacklist.cpp | 41 - .../Driver/debug-var-experimental-switch.c | 2 - clang/test/Sema/branch-protection-attr-err.c | 22 - .../cmake/Modules/CustomLibcxx/CMakeLists.txt | 29 - .../sanitizer_persistent_allocator.h | 110 -- .../lib/tsan/rtl/tsan_update_shadow_word.inc | 59 - .../fuzzer/EntropicScalePerExecTimeTest.cpp | 51 - .../fuzzer/entropic-scale-per-exec-time.test | 8 - .../TestCases/mem_info_cache_entries.cpp | 10 - .../memprof/TestCases/print_miss_rate.cpp | 14 - .../alignment-assumption-ignorelist.cppp | 20 - libcxx/cmake/caches/Generic-32bits.cmake | 1 - libcxx/include/__memory/pointer_safety.h | 52 - libcxx/test/libcxx/atomics/ext-int.verify.cpp | 28 - .../libcpp-has-no-threads.compile.fail.cpp | 22 - .../atomics/libcpp-has-no-threads.pass.cpp | 19 - .../algorithms/robust_against_adl.pass.cpp | 183 -- .../trivially_copyable.compile.fail.cpp | 71 - .../insert_iter_value.pass.cpp | 98 - .../support.limits.general/charconv.pass.cpp | 33 - .../memory_resource.version.pass.cpp | 35 - libcxx/test/std/numerics/c.math/abs.fail.cpp | 31 - .../string.view.cons/deduct.pass.cpp | 49 - .../any.cast/const_correctness.fail.cpp | 50 - .../any.cast/not_copy_constructible.fail.cpp | 62 - .../declare_no_pointers.pass.cpp | 28 - .../declare_reachable.pass.cpp | 29 - .../get_pointer_safety.pass.cpp | 44 - .../result_of.deprecated.fail.cpp | 22 - libcxx/test/support/coroutine_types.h | 74 - libcxx/test/support/tracked_value.h | 59 - libcxx/utils/google-benchmark/.clang-format | 5 - .../google-benchmark/.github/.libcxx-setup.sh | 24 - .../.github/ISSUE_TEMPLATE/bug_report.md | 32 - .../.github/ISSUE_TEMPLATE/feature_request.md | 20 - .../.github/workflows/bazel.yml | 30 - .../workflows/build-and-test-perfcounters.yml | 44 - .../.github/workflows/build-and-test.yml | 110 -- .../.github/workflows/pylint.yml | 26 - .../.github/workflows/sanitizer.yml | 78 - .../.github/workflows/test_bindings.yml | 24 - libcxx/utils/google-benchmark/.gitignore | 66 - libcxx/utils/google-benchmark/.travis.yml | 208 --- .../utils/google-benchmark/.ycm_extra_conf.py | 115 -- libcxx/utils/google-benchmark/AUTHORS | 60 - libcxx/utils/google-benchmark/BUILD.bazel | 44 - libcxx/utils/google-benchmark/CMakeLists.txt | 313 ---- libcxx/utils/google-benchmark/CONTRIBUTING.md | 58 - libcxx/utils/google-benchmark/CONTRIBUTORS | 85 - libcxx/utils/google-benchmark/LICENSE | 202 -- libcxx/utils/google-benchmark/README.md | 1378 -------------- libcxx/utils/google-benchmark/WORKSPACE | 51 - libcxx/utils/google-benchmark/_config.yml | 2 - libcxx/utils/google-benchmark/appveyor.yml | 50 - .../google-benchmark/bindings/python/BUILD | 3 - .../bindings/python/build_defs.bzl | 25 - .../bindings/python/google_benchmark/BUILD | 38 - .../python/google_benchmark/__init__.py | 158 -- .../python/google_benchmark/benchmark.cc | 181 -- .../python/google_benchmark/example.py | 136 -- .../bindings/python/pybind11.BUILD | 20 - .../bindings/python/python_headers.BUILD | 6 - .../bindings/python/requirements.txt | 2 - .../cmake/AddCXXCompilerFlag.cmake | 78 - .../cmake/CXXFeatureCheck.cmake | 69 - .../google-benchmark/cmake/Config.cmake.in | 1 - .../cmake/GetGitVersion.cmake | 58 - .../google-benchmark/cmake/GoogleTest.cmake | 41 - .../cmake/GoogleTest.cmake.in | 58 - .../google-benchmark/cmake/benchmark.pc.in | 12 - .../cmake/gnu_posix_regex.cpp | 12 - .../cmake/llvm-toolchain.cmake | 8 - .../google-benchmark/cmake/posix_regex.cpp | 14 - .../google-benchmark/cmake/split_list.cmake | 3 - .../google-benchmark/cmake/std_regex.cpp | 10 - .../google-benchmark/cmake/steady_clock.cpp | 7 - .../cmake/thread_safety_attributes.cpp | 4 - libcxx/utils/google-benchmark/dependencies.md | 18 - .../google-benchmark/docs/AssemblyTests.md | 147 -- .../utils/google-benchmark/docs/_config.yml | 1 - .../google-benchmark/docs/perf_counters.md | 34 - .../docs/random_interleaving.md | 13 - .../utils/google-benchmark/docs/releasing.md | 22 - libcxx/utils/google-benchmark/docs/tools.md | 203 -- .../include/benchmark/benchmark.h | 1654 ---------------- .../utils/google-benchmark/requirements.txt | 2 - libcxx/utils/google-benchmark/setup.py | 140 -- .../utils/google-benchmark/src/CMakeLists.txt | 120 -- libcxx/utils/google-benchmark/src/arraysize.h | 33 - .../utils/google-benchmark/src/benchmark.cc | 617 ------ .../src/benchmark_api_internal.cc | 94 - .../src/benchmark_api_internal.h | 78 - .../google-benchmark/src/benchmark_main.cc | 17 - .../google-benchmark/src/benchmark_name.cc | 58 - .../src/benchmark_register.cc | 461 ----- .../google-benchmark/src/benchmark_register.h | 108 -- .../google-benchmark/src/benchmark_runner.cc | 349 ---- .../google-benchmark/src/benchmark_runner.h | 106 -- libcxx/utils/google-benchmark/src/check.h | 82 - .../utils/google-benchmark/src/colorprint.cc | 188 -- .../utils/google-benchmark/src/colorprint.h | 33 - .../google-benchmark/src/commandlineflags.cc | 286 --- .../google-benchmark/src/commandlineflags.h | 116 -- .../utils/google-benchmark/src/complexity.cc | 240 --- .../utils/google-benchmark/src/complexity.h | 55 - .../google-benchmark/src/console_reporter.cc | 177 -- libcxx/utils/google-benchmark/src/counter.cc | 80 - libcxx/utils/google-benchmark/src/counter.h | 32 - .../google-benchmark/src/csv_reporter.cc | 154 -- .../utils/google-benchmark/src/cycleclock.h | 225 --- .../google-benchmark/src/internal_macros.h | 102 - .../google-benchmark/src/json_reporter.cc | 269 --- libcxx/utils/google-benchmark/src/log.h | 74 - libcxx/utils/google-benchmark/src/mutex.h | 155 -- .../google-benchmark/src/perf_counters.cc | 132 -- .../google-benchmark/src/perf_counters.h | 172 -- libcxx/utils/google-benchmark/src/re.h | 158 -- libcxx/utils/google-benchmark/src/reporter.cc | 116 -- libcxx/utils/google-benchmark/src/sleep.cc | 67 - libcxx/utils/google-benchmark/src/sleep.h | 15 - .../utils/google-benchmark/src/statistics.cc | 195 -- .../utils/google-benchmark/src/statistics.h | 37 - .../utils/google-benchmark/src/string_util.cc | 268 --- .../utils/google-benchmark/src/string_util.h | 61 - libcxx/utils/google-benchmark/src/sysinfo.cc | 726 -------- .../google-benchmark/src/thread_manager.h | 64 - .../utils/google-benchmark/src/thread_timer.h | 86 - libcxx/utils/google-benchmark/src/timers.cc | 253 --- libcxx/utils/google-benchmark/src/timers.h | 48 - .../google-benchmark/test/AssemblyTests.cmake | 46 - libcxx/utils/google-benchmark/test/BUILD | 74 - .../google-benchmark/test/CMakeLists.txt | 271 --- .../test/args_product_test.cc | 77 - .../utils/google-benchmark/test/basic_test.cc | 151 -- .../google-benchmark/test/benchmark_gtest.cc | 165 -- .../test/benchmark_name_gtest.cc | 74 - .../benchmark_random_interleaving_gtest.cc | 126 -- .../google-benchmark/test/benchmark_test.cc | 245 --- .../test/clobber_memory_assembly_test.cc | 64 - .../test/commandlineflags_gtest.cc | 228 --- .../google-benchmark/test/complexity_test.cc | 222 --- .../utils/google-benchmark/test/cxx03_test.cc | 63 - .../google-benchmark/test/diagnostics_test.cc | 80 - .../test/display_aggregates_only_test.cc | 43 - .../test/donotoptimize_assembly_test.cc | 163 -- .../test/donotoptimize_test.cc | 52 - .../google-benchmark/test/filter_test.cc | 118 -- .../google-benchmark/test/fixture_test.cc | 51 - .../test/internal_threading_test.cc | 184 -- .../google-benchmark/test/link_main_test.cc | 8 - .../utils/google-benchmark/test/map_test.cc | 57 - .../test/memory_manager_test.cc | 46 - .../test/multiple_ranges_test.cc | 96 - .../google-benchmark/test/options_test.cc | 76 - .../utils/google-benchmark/test/output_test.h | 213 --- .../test/output_test_helper.cc | 520 ------ .../test/perf_counters_gtest.cc | 145 -- .../test/perf_counters_test.cc | 27 - .../test/register_benchmark_test.cc | 184 -- .../google-benchmark/test/repetitions_test.cc | 208 --- .../test/report_aggregates_only_test.cc | 39 - .../test/reporter_output_test.cc | 956 ---------- .../test/skip_with_error_test.cc | 195 -- .../test/state_assembly_test.cc | 68 - .../google-benchmark/test/statistics_gtest.cc | 28 - .../test/string_util_gtest.cc | 161 -- .../test/templated_fixture_test.cc | 28 - .../test/user_counters_tabular_test.cc | 500 ----- .../test/user_counters_test.cc | 555 ------ .../test/user_counters_thousands_test.cc | 183 -- .../utils/google-benchmark/tools/BUILD.bazel | 19 - .../utils/google-benchmark/tools/compare.py | 429 ----- .../tools/gbench/Inputs/test1_run1.json | 119 -- .../tools/gbench/Inputs/test1_run2.json | 119 -- .../tools/gbench/Inputs/test2_run.json | 81 - .../tools/gbench/Inputs/test3_run0.json | 65 - .../tools/gbench/Inputs/test3_run1.json | 65 - .../tools/gbench/Inputs/test4_run.json | 96 - .../google-benchmark/tools/gbench/__init__.py | 8 - .../google-benchmark/tools/gbench/report.py | 991 ---------- .../google-benchmark/tools/gbench/util.py | 181 -- .../google-benchmark/tools/requirements.txt | 1 - .../utils/google-benchmark/tools/strip_asm.py | 151 -- .../lld/ReaderWriter/MachOLinkingContext.h | 505 ----- lld/include/lld/ReaderWriter/YamlContext.h | 42 - lld/lib/CMakeLists.txt | 3 - lld/lib/Core/CMakeLists.txt | 24 - lld/lib/Core/DefinedAtom.cpp | 81 - lld/lib/Core/Error.cpp | 93 - lld/lib/Core/File.cpp | 28 - lld/lib/Core/LinkingContext.cpp | 69 - lld/lib/Core/Reader.cpp | 113 -- lld/lib/Core/Resolver.cpp | 496 ----- lld/lib/Core/SymbolTable.cpp | 284 --- lld/lib/Core/Writer.cpp | 17 - lld/lib/Driver/CMakeLists.txt | 23 - lld/lib/Driver/DarwinLdDriver.cpp | 1229 ------------ lld/lib/Driver/DarwinLdOptions.td | 250 --- lld/lib/ReaderWriter/CMakeLists.txt | 20 - lld/lib/ReaderWriter/FileArchive.cpp | 227 --- lld/lib/ReaderWriter/MachO/ArchHandler.cpp | 171 -- lld/lib/ReaderWriter/MachO/ArchHandler.h | 322 ---- .../ReaderWriter/MachO/ArchHandler_arm.cpp | 1522 --------------- .../ReaderWriter/MachO/ArchHandler_arm64.cpp | 897 --------- .../ReaderWriter/MachO/ArchHandler_x86.cpp | 643 ------- .../ReaderWriter/MachO/ArchHandler_x86_64.cpp | 899 --------- lld/lib/ReaderWriter/MachO/Atoms.h | 180 -- lld/lib/ReaderWriter/MachO/CMakeLists.txt | 36 - .../ReaderWriter/MachO/CompactUnwindPass.cpp | 580 ------ lld/lib/ReaderWriter/MachO/DebugInfo.h | 106 -- lld/lib/ReaderWriter/MachO/ExecutableAtoms.h | 154 -- lld/lib/ReaderWriter/MachO/File.h | 467 ----- .../ReaderWriter/MachO/FlatNamespaceFile.h | 62 - lld/lib/ReaderWriter/MachO/GOTPass.cpp | 183 -- lld/lib/ReaderWriter/MachO/LayoutPass.cpp | 490 ----- lld/lib/ReaderWriter/MachO/LayoutPass.h | 118 -- .../MachO/MachOLinkingContext.cpp | 1104 ----------- .../ReaderWriter/MachO/MachONormalizedFile.h | 336 ---- .../MachO/MachONormalizedFileBinaryReader.cpp | 614 ------ .../MachO/MachONormalizedFileBinaryUtils.h | 213 --- .../MachO/MachONormalizedFileBinaryWriter.cpp | 1560 ---------------- .../MachO/MachONormalizedFileFromAtoms.cpp | 1657 ----------------- .../MachO/MachONormalizedFileToAtoms.cpp | 1635 ---------------- .../MachO/MachONormalizedFileYAML.cpp | 840 --------- lld/lib/ReaderWriter/MachO/MachOPasses.h | 29 - lld/lib/ReaderWriter/MachO/ObjCPass.cpp | 131 -- lld/lib/ReaderWriter/MachO/SectCreateFile.h | 101 - lld/lib/ReaderWriter/MachO/ShimPass.cpp | 128 -- lld/lib/ReaderWriter/MachO/StubsPass.cpp | 377 ---- lld/lib/ReaderWriter/MachO/TLVPass.cpp | 140 -- lld/lib/ReaderWriter/MachO/WriterMachO.cpp | 70 - lld/lib/ReaderWriter/YAML/CMakeLists.txt | 9 - .../ReaderWriter/YAML/ReaderWriterYAML.cpp | 1403 -------------- lld/test/ELF/Inputs/copy-rel.s | 11 - lld/test/ELF/copy-rel.s | 25 - lld/test/ELF/relocation-copy-alias.s | 69 - .../darwin/Inputs/native-and-mach-o.objtxt | 17 - .../darwin/Inputs/native-and-mach-o2.objtxt | 19 - lld/test/darwin/cmdline-lto_library.objtxt | 11 - lld/test/darwin/cmdline-objc_gc.objtxt | 15 - .../darwin/cmdline-objc_gc_compaction.objtxt | 15 - lld/test/darwin/cmdline-objc_gc_only.objtxt | 15 - lld/test/darwin/native-and-mach-o.objtxt | 27 - lld/test/mach-o/Inputs/DependencyDump.py | 30 - .../Inputs/MacOSX.sdk/usr/lib/libSystem.tbd | 42 - lld/test/mach-o/Inputs/PIE.yaml | 6 - lld/test/mach-o/Inputs/arm-interworking.yaml | 83 - lld/test/mach-o/Inputs/arm-shims.yaml | 60 - lld/test/mach-o/Inputs/arm64/libSystem.yaml | 13 - lld/test/mach-o/Inputs/armv7/libSystem.yaml | 13 - lld/test/mach-o/Inputs/bar.yaml | 18 - lld/test/mach-o/Inputs/cstring-sections.yaml | 25 - .../mach-o/Inputs/exported_symbols_list.exp | 6 - lld/test/mach-o/Inputs/full.filelist | 3 - lld/test/mach-o/Inputs/got-order.yaml | 53 - lld/test/mach-o/Inputs/got-order2.yaml | 11 - lld/test/mach-o/Inputs/hello-world-arm64.yaml | 8 - lld/test/mach-o/Inputs/hello-world-armv6.yaml | 7 - lld/test/mach-o/Inputs/hello-world-armv7.yaml | 7 - lld/test/mach-o/Inputs/hello-world-x86.yaml | 7 - .../mach-o/Inputs/hello-world-x86_64.yaml | 8 - lld/test/mach-o/Inputs/hw.raw_bytes | 1 - .../mach-o/Inputs/interposing-section.yaml | 6 - .../mach-o/Inputs/lazy-bind-x86_64-2.yaml | 8 - .../mach-o/Inputs/lazy-bind-x86_64-3.yaml | 8 - lld/test/mach-o/Inputs/lazy-bind-x86_64.yaml | 8 - .../usr/lib/libmyshared.dylib | Bin 20628 -> 0 bytes .../lib-search-paths/usr/lib/libmystatic.a | Bin 556 -> 0 bytes .../lib-search-paths/usr/local/lib/file.o | Bin 404 -> 0 bytes lld/test/mach-o/Inputs/libbar.a | Bin 824 -> 0 bytes lld/test/mach-o/Inputs/libfoo.a | Bin 1320 -> 0 bytes .../no-version-min-load-command-object.yaml | 22 - lld/test/mach-o/Inputs/order_file-basic.order | 11 - lld/test/mach-o/Inputs/partial.filelist | 3 - .../Inputs/re-exported-dylib-ordinal.yaml | 21 - .../Inputs/re-exported-dylib-ordinal2.yaml | 18 - .../Inputs/re-exported-dylib-ordinal3.yaml | 19 - lld/test/mach-o/Inputs/swift-version-1.yaml | 18 - .../Inputs/unwind-info-simple-arm64.yaml | 13 - .../Inputs/use-dylib-install-names.yaml | 28 - lld/test/mach-o/Inputs/use-simple-dylib.yaml | 58 - .../mach-o/Inputs/write-final-sections.yaml | 20 - lld/test/mach-o/Inputs/wrong-arch-error.yaml | 24 - lld/test/mach-o/Inputs/x86/libSystem.yaml | 13 - lld/test/mach-o/Inputs/x86_64/libSystem.yaml | 13 - lld/test/mach-o/PIE.yaml | 40 - lld/test/mach-o/align_text.yaml | 45 - lld/test/mach-o/arm-interworking-movw.yaml | 393 ---- lld/test/mach-o/arm-interworking.yaml | 288 --- lld/test/mach-o/arm-shims.yaml | 126 -- .../mach-o/arm-subsections-via-symbols.yaml | 60 - .../mach-o/arm64-reloc-negDelta32-fixup.yaml | 124 -- .../arm64-relocs-errors-delta64-offset.yaml | 65 - lld/test/mach-o/arm64-section-order.yaml | 67 - lld/test/mach-o/bind-opcodes.yaml | 140 -- lld/test/mach-o/cstring-sections.yaml | 65 - .../mach-o/data-in-code-load-command.yaml | 35 - lld/test/mach-o/data-only-dylib.yaml | 27 - lld/test/mach-o/dead-strip-globals.yaml | 31 - lld/test/mach-o/debug-syms.yaml | 249 --- lld/test/mach-o/demangle.yaml | 74 - lld/test/mach-o/dependency_info.yaml | 19 - .../mach-o/do-not-emit-unwind-fde-arm64.yaml | 208 --- lld/test/mach-o/dso_handle.yaml | 62 - lld/test/mach-o/dylib-install-names.yaml | 74 - lld/test/mach-o/eh-frame-relocs-arm64.yaml | 318 ---- lld/test/mach-o/empty-sections.yaml | 9 - .../mach-o/error-simulator-vs-macosx.yaml | 30 - lld/test/mach-o/exe-offsets.yaml | 45 - lld/test/mach-o/exe-segment-overlap.yaml | 44 - lld/test/mach-o/executable-exports.yaml | 46 - lld/test/mach-o/export-trie-order.yaml | 62 - .../mach-o/exported_symbols_list-dylib.yaml | 77 - .../mach-o/exported_symbols_list-obj.yaml | 67 - .../mach-o/exported_symbols_list-undef.yaml | 55 - lld/test/mach-o/fat-archive.yaml | 45 - lld/test/mach-o/filelist.yaml | 18 - .../mach-o/flat_namespace_undef_error.yaml | 17 - .../mach-o/flat_namespace_undef_suppress.yaml | 17 - lld/test/mach-o/force_load-dylib.yaml | 45 - lld/test/mach-o/force_load-x86_64.yaml | 38 - lld/test/mach-o/framework-user-paths.yaml | 41 - .../mach-o/function-starts-load-command.yaml | 32 - lld/test/mach-o/gcc_except_tab-got-arm64.yaml | 53 - lld/test/mach-o/got-order.yaml | 69 - lld/test/mach-o/hello-world-arm64.yaml | 102 - lld/test/mach-o/hello-world-armv6.yaml | 64 - lld/test/mach-o/hello-world-armv7.yaml | 76 - lld/test/mach-o/hello-world-x86.yaml | 62 - lld/test/mach-o/hello-world-x86_64.yaml | 120 -- lld/test/mach-o/image-base.yaml | 28 - lld/test/mach-o/infer-arch.yaml | 29 - lld/test/mach-o/interposing-section.yaml | 72 - lld/test/mach-o/keep_private_externs.yaml | 63 - lld/test/mach-o/lazy-bind-x86_64.yaml | 111 -- lld/test/mach-o/lc_segment_filesize.yaml | 31 - lld/test/mach-o/lib-search-paths.yaml | 16 - lld/test/mach-o/library-order.yaml | 45 - lld/test/mach-o/library-rescan.yaml | 46 - .../libresolve-bizarre-root-override.yaml | 17 - .../libresolve-multiple-syslibroots.yaml | 17 - .../mach-o/libresolve-one-syslibroot.yaml | 25 - lld/test/mach-o/libresolve-simple.yaml | 21 - lld/test/mach-o/libresolve-user-paths.yaml | 20 - lld/test/mach-o/libresolve-z.yaml | 21 - lld/test/mach-o/lit.local.cfg | 4 - lld/test/mach-o/load-commands-size.yaml | 305 --- lld/test/mach-o/mach_header-cpusubtype.yaml | 34 - lld/test/mach-o/mh_bundle_header.yaml | 54 - lld/test/mach-o/mh_dylib_header.yaml | 53 - lld/test/mach-o/objc-category-list-atom.yaml | 70 - .../objc-image-info-host-vs-simulator.yaml | 23 - .../mach-o/objc-image-info-invalid-size.yaml | 20 - .../objc-image-info-invalid-version.yaml | 20 - ...c-image-info-mismatched-swift-version.yaml | 20 - .../mach-o/objc-image-info-pass-output.yaml | 30 - .../objc-image-info-simulator-vs-host.yaml | 23 - .../objc-image-info-unsupported-gc.yaml | 20 - lld/test/mach-o/objc_export_list.yaml | 63 - lld/test/mach-o/order_file-basic.yaml | 75 - lld/test/mach-o/parse-aliases.yaml | 90 - lld/test/mach-o/parse-arm-relocs.yaml | 818 -------- lld/test/mach-o/parse-cfstring32.yaml | 94 - lld/test/mach-o/parse-cfstring64.yaml | 108 -- lld/test/mach-o/parse-compact-unwind32.yaml | 72 - lld/test/mach-o/parse-compact-unwind64.yaml | 76 - lld/test/mach-o/parse-data-in-code-armv7.yaml | 157 -- lld/test/mach-o/parse-data-in-code-x86.yaml | 77 - lld/test/mach-o/parse-data-relocs-arm64.yaml | 244 --- lld/test/mach-o/parse-data-relocs-x86_64.yaml | 372 ---- lld/test/mach-o/parse-data.yaml | 119 -- .../mach-o/parse-eh-frame-relocs-x86_64.yaml | 176 -- lld/test/mach-o/parse-eh-frame-x86-anon.yaml | 129 -- .../mach-o/parse-eh-frame-x86-labeled.yaml | 193 -- lld/test/mach-o/parse-eh-frame.yaml | 88 - lld/test/mach-o/parse-function.yaml | 100 - lld/test/mach-o/parse-initializers32.yaml | 84 - lld/test/mach-o/parse-initializers64.yaml | 105 -- lld/test/mach-o/parse-literals-error.yaml | 25 - lld/test/mach-o/parse-literals.yaml | 93 - lld/test/mach-o/parse-non-lazy-pointers.yaml | 98 - lld/test/mach-o/parse-relocs-x86.yaml | 296 --- lld/test/mach-o/parse-section-no-symbol.yaml | 23 - lld/test/mach-o/parse-tentative-defs.yaml | 88 - lld/test/mach-o/parse-text-relocs-arm64.yaml | 237 --- lld/test/mach-o/parse-text-relocs-x86_64.yaml | 204 -- lld/test/mach-o/parse-tlv-relocs-x86-64.yaml | 100 - .../mach-o/re-exported-dylib-ordinal.yaml | 46 - lld/test/mach-o/rpath.yaml | 38 - lld/test/mach-o/run-tlv-pass-x86-64.yaml | 144 -- lld/test/mach-o/sdk-version-error.yaml | 22 - lld/test/mach-o/sectalign.yaml | 80 - lld/test/mach-o/sectattrs.yaml | 30 - lld/test/mach-o/sectcreate.yaml | 12 - lld/test/mach-o/seg-protection-arm64.yaml | 78 - lld/test/mach-o/seg-protection-x86_64.yaml | 78 - lld/test/mach-o/source-version.yaml | 28 - lld/test/mach-o/stack-size.yaml | 24 - lld/test/mach-o/string-table.yaml | 66 - lld/test/mach-o/stub-link.s | 21 - .../subsections-via-symbols-default.yaml | 28 - ...olevel_namespace_undef_dynamic_lookup.yaml | 17 - ...evel_namespace_undef_warning_suppress.yaml | 23 - lld/test/mach-o/unwind-info-simple-arm64.yaml | 267 --- .../mach-o/unwind-info-simple-x86_64.yaml | 133 -- .../mach-o/upward-dylib-load-command.yaml | 48 - lld/test/mach-o/upward-dylib-paths.yaml | 18 - lld/test/mach-o/usage.yaml | 8 - lld/test/mach-o/use-dylib.yaml | 39 - lld/test/mach-o/use-simple-dylib.yaml | 73 - .../version-min-load-command-object.yaml | 35 - lld/test/mach-o/version-min-load-command.yaml | 43 - lld/test/mach-o/write-final-sections.yaml | 165 -- lld/test/mach-o/wrong-arch-error.yaml | 28 - lld/unittests/CMakeLists.txt | 16 - lld/unittests/DriverTests/CMakeLists.txt | 9 - .../DriverTests/DarwinLdDriverTest.cpp | 263 --- lld/unittests/MachOTests/CMakeLists.txt | 14 - .../MachONormalizedFileBinaryReaderTests.cpp | 753 -------- .../MachONormalizedFileBinaryWriterTests.cpp | 695 ------- .../MachONormalizedFileToAtomsTests.cpp | 140 -- .../MachONormalizedFileYAMLTests.cpp | 762 -------- .../MachOTests/empty_obj_x86_armv7.txt | 1272 ------------- .../Language/CPlusPlus/LibCxxOptional.cpp | 84 - lldb/source/lldb.cpp | 77 - .../data-formatter-stl/libcxx/deque/Makefile | 5 - .../deque/TestDataFormatterLibcxxDeque.py | 25 - .../data-formatter-stl/libcxx/deque/main.cpp | 8 - .../data-formatter-stl/libcxx/list/Makefile | 6 - .../list/TestDataFormatterLibcxxList.py | 218 --- .../libcxx/list/loop/Makefile | 6 - .../loop/TestDataFormatterLibcxxListLoop.py | 69 - .../libcxx/list/loop/main.cpp | 35 - .../data-formatter-stl/libcxx/list/main.cpp | 44 - .../libcxx/optional/Makefile | 6 - .../TestDataFormatterLibcxxOptional.py | 73 - .../libcxx/optional/main.cpp | 42 - .../libcxx/unordered/Makefile | 9 - .../unordered/TestDataFormatterUnordered.py | 78 - .../libcxx/unordered/main.cpp | 80 - .../libstdcpp/list/Makefile | 6 - .../list/TestDataFormatterStdList.py | 207 -- .../libstdcpp/list/main.cpp | 34 - .../gdb_remote_client/gdbclientutils.py | 717 ------- .../API/functionalities/memory/read/Makefile | 3 - .../memory/read/TestMemoryRead.py | 177 -- .../API/functionalities/memory/read/main.cpp | 13 - .../aarch64/tagged_memory_region/Makefile | 3 - .../TestAArch64LinuxTaggedMemoryRegion.py | 42 - .../linux/aarch64/tagged_memory_region/main.c | 17 - llvm/lib/Analysis/ReleaseModeModelRunner.cpp | 90 - llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 10 + llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 98 +- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 7 +- llvm/lib/Target/AMDGPU/SOPInstructions.td | 5 +- .../Target/M68k/GlSel/M68kCallLowering.cpp | 152 -- llvm/lib/Target/M68k/GlSel/M68kCallLowering.h | 72 - .../M68k/GlSel/M68kInstructionSelector.cpp | 90 - .../Target/M68k/GlSel/M68kLegalizerInfo.cpp | 33 - .../lib/Target/M68k/GlSel/M68kLegalizerInfo.h | 29 - .../M68k/GlSel/M68kRegisterBankInfo.cpp | 105 -- .../Target/M68k/GlSel/M68kRegisterBankInfo.h | 45 - .../Target/M68k/GlSel/M68kRegisterBanks.td | 15 - llvm/test/Analysis/BasicAA/memset_pattern.ll | 21 - .../interleaved-store-accesses-with-gaps.ll | 204 -- .../gep-induction-operand-typesize-warning.ll | 21 - llvm/test/CodeGen/AMDGPU/addrspacecast.ll | 28 +- .../AMDGPU/amdgpu-codegenprepare-idiv.ll | 1497 +++++++-------- .../AMDGPU/callee-special-input-sgprs.ll | 615 ------ llvm/test/CodeGen/AMDGPU/dagcombine-select.ll | 8 +- .../CodeGen/AMDGPU/extract_vector_dynelt.ll | 69 +- .../CodeGen/AMDGPU/extract_vector_elt-f64.ll | 64 +- .../CodeGen/AMDGPU/extract_vector_elt-i64.ll | 78 +- llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll | 19 - .../CodeGen/AMDGPU/insert_vector_dynelt.ll | 20 +- llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll | 113 +- llvm/test/CodeGen/AMDGPU/llvm.mulo.ll | 104 +- llvm/test/CodeGen/AMDGPU/load-select-ptr.ll | 6 +- .../AMDGPU/lower-kernel-lds-global-uses.ll | 55 - .../AMDGPU/lower-module-lds-global-alias.ll | 93 - .../AMDGPU/lower-module-lds-global-uses.ll | 88 - .../AMDGPU/lower-module-lds-indirect.ll | 39 - llvm/test/CodeGen/AMDGPU/select64.ll | 46 +- llvm/test/CodeGen/AMDGPU/selectcc.ll | 9 +- llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll | 8 +- llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll | 10 +- llvm/test/CodeGen/X86/peep-test-5.ll | 56 - .../X86/Inputs/MachO_weak_defs_extra.s | 19 - .../JITLink/X86/MachO_skip_debug_sections.s | 21 - llvm/test/Transforms/LICM/no-hoist-prof.ll | 88 - llvm/test/Transforms/LICM/sink.ll | 69 - .../Verifier/dbg-invalid-enum-as-scope.ll | 16 - llvm/test/tools/llvm-nm/lit.local.cfg | 2 - 500 files changed, 1047 insertions(+), 65847 deletions(-) delete mode 100644 clang-tools-extra/clangd/unittests/TestScheme.h delete mode 100644 clang/lib/Driver/ToolChains/HIP.cpp delete mode 100644 clang/lib/Driver/ToolChains/HIP.h delete mode 100644 clang/test/CodeGen/Inputs/sanitizer-blacklist-vfsoverlay.yaml delete mode 100644 clang/test/CodeGen/catch-alignment-assumption-blacklist.c delete mode 100644 clang/test/CodeGen/catch-nullptr-and-nonzero-offset-blacklist.c delete mode 100644 clang/test/CodeGen/ubsan-blacklist.c delete mode 100644 clang/test/CodeGenCXX/cfi-blacklist.cpp delete mode 100644 clang/test/Driver/debug-var-experimental-switch.c delete mode 100644 clang/test/Sema/branch-protection-attr-err.c delete mode 100644 compiler-rt/cmake/Modules/CustomLibcxx/CMakeLists.txt delete mode 100644 compiler-rt/lib/sanitizer_common/sanitizer_persistent_allocator.h delete mode 100644 compiler-rt/lib/tsan/rtl/tsan_update_shadow_word.inc delete mode 100644 compiler-rt/test/fuzzer/EntropicScalePerExecTimeTest.cpp delete mode 100644 compiler-rt/test/fuzzer/entropic-scale-per-exec-time.test delete mode 100644 compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp delete mode 100644 compiler-rt/test/memprof/TestCases/print_miss_rate.cpp delete mode 100644 compiler-rt/test/ubsan/TestCases/Pointer/alignment-assumption-ignorelist.cppp delete mode 100644 libcxx/cmake/caches/Generic-32bits.cmake delete mode 100644 libcxx/include/__memory/pointer_safety.h delete mode 100644 libcxx/test/libcxx/atomics/ext-int.verify.cpp delete mode 100644 libcxx/test/libcxx/atomics/libcpp-has-no-threads.compile.fail.cpp delete mode 100644 libcxx/test/libcxx/atomics/libcpp-has-no-threads.pass.cpp delete mode 100644 libcxx/test/std/algorithms/robust_against_adl.pass.cpp delete mode 100644 libcxx/test/std/atomics/atomics.types.generic/trivially_copyable.compile.fail.cpp delete mode 100644 libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_value.pass.cpp delete mode 100644 libcxx/test/std/language.support/support.limits/support.limits.general/charconv.pass.cpp delete mode 100644 libcxx/test/std/language.support/support.limits/support.limits.general/memory_resource.version.pass.cpp delete mode 100644 libcxx/test/std/numerics/c.math/abs.fail.cpp delete mode 100644 libcxx/test/std/strings/string.view/string.view.cons/deduct.pass.cpp delete mode 100644 libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.fail.cpp delete mode 100644 libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.fail.cpp delete mode 100644 libcxx/test/std/utilities/memory/util.dynamic.safety/declare_no_pointers.pass.cpp delete mode 100644 libcxx/test/std/utilities/memory/util.dynamic.safety/declare_reachable.pass.cpp delete mode 100644 libcxx/test/std/utilities/memory/util.dynamic.safety/get_pointer_safety.pass.cpp delete mode 100644 libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of.deprecated.fail.cpp delete mode 100644 libcxx/test/support/coroutine_types.h delete mode 100644 libcxx/test/support/tracked_value.h delete mode 100644 libcxx/utils/google-benchmark/.clang-format delete mode 100755 libcxx/utils/google-benchmark/.github/.libcxx-setup.sh delete mode 100644 libcxx/utils/google-benchmark/.github/ISSUE_TEMPLATE/bug_report.md delete mode 100644 libcxx/utils/google-benchmark/.github/ISSUE_TEMPLATE/feature_request.md delete mode 100644 libcxx/utils/google-benchmark/.github/workflows/bazel.yml delete mode 100644 libcxx/utils/google-benchmark/.github/workflows/build-and-test-perfcounters.yml delete mode 100644 libcxx/utils/google-benchmark/.github/workflows/build-and-test.yml delete mode 100644 libcxx/utils/google-benchmark/.github/workflows/pylint.yml delete mode 100644 libcxx/utils/google-benchmark/.github/workflows/sanitizer.yml delete mode 100644 libcxx/utils/google-benchmark/.github/workflows/test_bindings.yml delete mode 100644 libcxx/utils/google-benchmark/.gitignore delete mode 100644 libcxx/utils/google-benchmark/.travis.yml delete mode 100644 libcxx/utils/google-benchmark/.ycm_extra_conf.py delete mode 100644 libcxx/utils/google-benchmark/AUTHORS delete mode 100644 libcxx/utils/google-benchmark/BUILD.bazel delete mode 100644 libcxx/utils/google-benchmark/CMakeLists.txt delete mode 100644 libcxx/utils/google-benchmark/CONTRIBUTING.md delete mode 100644 libcxx/utils/google-benchmark/CONTRIBUTORS delete mode 100644 libcxx/utils/google-benchmark/LICENSE delete mode 100644 libcxx/utils/google-benchmark/README.md delete mode 100644 libcxx/utils/google-benchmark/WORKSPACE delete mode 100644 libcxx/utils/google-benchmark/_config.yml delete mode 100644 libcxx/utils/google-benchmark/appveyor.yml delete mode 100644 libcxx/utils/google-benchmark/bindings/python/BUILD delete mode 100644 libcxx/utils/google-benchmark/bindings/python/build_defs.bzl delete mode 100644 libcxx/utils/google-benchmark/bindings/python/google_benchmark/BUILD delete mode 100644 libcxx/utils/google-benchmark/bindings/python/google_benchmark/__init__.py delete mode 100644 libcxx/utils/google-benchmark/bindings/python/google_benchmark/benchmark.cc delete mode 100644 libcxx/utils/google-benchmark/bindings/python/google_benchmark/example.py delete mode 100644 libcxx/utils/google-benchmark/bindings/python/pybind11.BUILD delete mode 100644 libcxx/utils/google-benchmark/bindings/python/python_headers.BUILD delete mode 100644 libcxx/utils/google-benchmark/bindings/python/requirements.txt delete mode 100644 libcxx/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake delete mode 100644 libcxx/utils/google-benchmark/cmake/CXXFeatureCheck.cmake delete mode 100644 libcxx/utils/google-benchmark/cmake/Config.cmake.in delete mode 100644 libcxx/utils/google-benchmark/cmake/GetGitVersion.cmake delete mode 100644 libcxx/utils/google-benchmark/cmake/GoogleTest.cmake delete mode 100644 libcxx/utils/google-benchmark/cmake/GoogleTest.cmake.in delete mode 100644 libcxx/utils/google-benchmark/cmake/benchmark.pc.in delete mode 100644 libcxx/utils/google-benchmark/cmake/gnu_posix_regex.cpp delete mode 100644 libcxx/utils/google-benchmark/cmake/llvm-toolchain.cmake delete mode 100644 libcxx/utils/google-benchmark/cmake/posix_regex.cpp delete mode 100644 libcxx/utils/google-benchmark/cmake/split_list.cmake delete mode 100644 libcxx/utils/google-benchmark/cmake/std_regex.cpp delete mode 100644 libcxx/utils/google-benchmark/cmake/steady_clock.cpp delete mode 100644 libcxx/utils/google-benchmark/cmake/thread_safety_attributes.cpp delete mode 100644 libcxx/utils/google-benchmark/dependencies.md delete mode 100644 libcxx/utils/google-benchmark/docs/AssemblyTests.md delete mode 100644 libcxx/utils/google-benchmark/docs/_config.yml delete mode 100644 libcxx/utils/google-benchmark/docs/perf_counters.md delete mode 100644 libcxx/utils/google-benchmark/docs/random_interleaving.md delete mode 100644 libcxx/utils/google-benchmark/docs/releasing.md delete mode 100644 libcxx/utils/google-benchmark/docs/tools.md delete mode 100644 libcxx/utils/google-benchmark/include/benchmark/benchmark.h delete mode 100644 libcxx/utils/google-benchmark/requirements.txt delete mode 100644 libcxx/utils/google-benchmark/setup.py delete mode 100644 libcxx/utils/google-benchmark/src/CMakeLists.txt delete mode 100644 libcxx/utils/google-benchmark/src/arraysize.h delete mode 100644 libcxx/utils/google-benchmark/src/benchmark.cc delete mode 100644 libcxx/utils/google-benchmark/src/benchmark_api_internal.cc delete mode 100644 libcxx/utils/google-benchmark/src/benchmark_api_internal.h delete mode 100644 libcxx/utils/google-benchmark/src/benchmark_main.cc delete mode 100644 libcxx/utils/google-benchmark/src/benchmark_name.cc delete mode 100644 libcxx/utils/google-benchmark/src/benchmark_register.cc delete mode 100644 libcxx/utils/google-benchmark/src/benchmark_register.h delete mode 100644 libcxx/utils/google-benchmark/src/benchmark_runner.cc delete mode 100644 libcxx/utils/google-benchmark/src/benchmark_runner.h delete mode 100644 libcxx/utils/google-benchmark/src/check.h delete mode 100644 libcxx/utils/google-benchmark/src/colorprint.cc delete mode 100644 libcxx/utils/google-benchmark/src/colorprint.h delete mode 100644 libcxx/utils/google-benchmark/src/commandlineflags.cc delete mode 100644 libcxx/utils/google-benchmark/src/commandlineflags.h delete mode 100644 libcxx/utils/google-benchmark/src/complexity.cc delete mode 100644 libcxx/utils/google-benchmark/src/complexity.h delete mode 100644 libcxx/utils/google-benchmark/src/console_reporter.cc delete mode 100644 libcxx/utils/google-benchmark/src/counter.cc delete mode 100644 libcxx/utils/google-benchmark/src/counter.h delete mode 100644 libcxx/utils/google-benchmark/src/csv_reporter.cc delete mode 100644 libcxx/utils/google-benchmark/src/cycleclock.h delete mode 100644 libcxx/utils/google-benchmark/src/internal_macros.h delete mode 100644 libcxx/utils/google-benchmark/src/json_reporter.cc delete mode 100644 libcxx/utils/google-benchmark/src/log.h delete mode 100644 libcxx/utils/google-benchmark/src/mutex.h delete mode 100644 libcxx/utils/google-benchmark/src/perf_counters.cc delete mode 100644 libcxx/utils/google-benchmark/src/perf_counters.h delete mode 100644 libcxx/utils/google-benchmark/src/re.h delete mode 100644 libcxx/utils/google-benchmark/src/reporter.cc delete mode 100644 libcxx/utils/google-benchmark/src/sleep.cc delete mode 100644 libcxx/utils/google-benchmark/src/sleep.h delete mode 100644 libcxx/utils/google-benchmark/src/statistics.cc delete mode 100644 libcxx/utils/google-benchmark/src/statistics.h delete mode 100644 libcxx/utils/google-benchmark/src/string_util.cc delete mode 100644 libcxx/utils/google-benchmark/src/string_util.h delete mode 100644 libcxx/utils/google-benchmark/src/sysinfo.cc delete mode 100644 libcxx/utils/google-benchmark/src/thread_manager.h delete mode 100644 libcxx/utils/google-benchmark/src/thread_timer.h delete mode 100644 libcxx/utils/google-benchmark/src/timers.cc delete mode 100644 libcxx/utils/google-benchmark/src/timers.h delete mode 100644 libcxx/utils/google-benchmark/test/AssemblyTests.cmake delete mode 100644 libcxx/utils/google-benchmark/test/BUILD delete mode 100644 libcxx/utils/google-benchmark/test/CMakeLists.txt delete mode 100644 libcxx/utils/google-benchmark/test/args_product_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/basic_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/benchmark_gtest.cc delete mode 100644 libcxx/utils/google-benchmark/test/benchmark_name_gtest.cc delete mode 100644 libcxx/utils/google-benchmark/test/benchmark_random_interleaving_gtest.cc delete mode 100644 libcxx/utils/google-benchmark/test/benchmark_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/clobber_memory_assembly_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/commandlineflags_gtest.cc delete mode 100644 libcxx/utils/google-benchmark/test/complexity_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/cxx03_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/diagnostics_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/display_aggregates_only_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/donotoptimize_assembly_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/donotoptimize_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/filter_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/fixture_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/internal_threading_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/link_main_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/map_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/memory_manager_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/multiple_ranges_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/options_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/output_test.h delete mode 100644 libcxx/utils/google-benchmark/test/output_test_helper.cc delete mode 100644 libcxx/utils/google-benchmark/test/perf_counters_gtest.cc delete mode 100644 libcxx/utils/google-benchmark/test/perf_counters_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/register_benchmark_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/repetitions_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/report_aggregates_only_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/reporter_output_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/skip_with_error_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/state_assembly_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/statistics_gtest.cc delete mode 100644 libcxx/utils/google-benchmark/test/string_util_gtest.cc delete mode 100644 libcxx/utils/google-benchmark/test/templated_fixture_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/user_counters_tabular_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/user_counters_test.cc delete mode 100644 libcxx/utils/google-benchmark/test/user_counters_thousands_test.cc delete mode 100644 libcxx/utils/google-benchmark/tools/BUILD.bazel delete mode 100755 libcxx/utils/google-benchmark/tools/compare.py delete mode 100644 libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run1.json delete mode 100644 libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run2.json delete mode 100644 libcxx/utils/google-benchmark/tools/gbench/Inputs/test2_run.json delete mode 100644 libcxx/utils/google-benchmark/tools/gbench/Inputs/test3_run0.json delete mode 100644 libcxx/utils/google-benchmark/tools/gbench/Inputs/test3_run1.json delete mode 100644 libcxx/utils/google-benchmark/tools/gbench/Inputs/test4_run.json delete mode 100644 libcxx/utils/google-benchmark/tools/gbench/__init__.py delete mode 100644 libcxx/utils/google-benchmark/tools/gbench/report.py delete mode 100644 libcxx/utils/google-benchmark/tools/gbench/util.py delete mode 100644 libcxx/utils/google-benchmark/tools/requirements.txt delete mode 100755 libcxx/utils/google-benchmark/tools/strip_asm.py delete mode 100644 lld/include/lld/ReaderWriter/MachOLinkingContext.h delete mode 100644 lld/include/lld/ReaderWriter/YamlContext.h delete mode 100644 lld/lib/CMakeLists.txt delete mode 100644 lld/lib/Core/CMakeLists.txt delete mode 100644 lld/lib/Core/DefinedAtom.cpp delete mode 100644 lld/lib/Core/Error.cpp delete mode 100644 lld/lib/Core/File.cpp delete mode 100644 lld/lib/Core/LinkingContext.cpp delete mode 100644 lld/lib/Core/Reader.cpp delete mode 100644 lld/lib/Core/Resolver.cpp delete mode 100644 lld/lib/Core/SymbolTable.cpp delete mode 100644 lld/lib/Core/Writer.cpp delete mode 100644 lld/lib/Driver/CMakeLists.txt delete mode 100644 lld/lib/Driver/DarwinLdDriver.cpp delete mode 100644 lld/lib/Driver/DarwinLdOptions.td delete mode 100644 lld/lib/ReaderWriter/CMakeLists.txt delete mode 100644 lld/lib/ReaderWriter/FileArchive.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/ArchHandler.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/ArchHandler.h delete mode 100644 lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/Atoms.h delete mode 100644 lld/lib/ReaderWriter/MachO/CMakeLists.txt delete mode 100644 lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/DebugInfo.h delete mode 100644 lld/lib/ReaderWriter/MachO/ExecutableAtoms.h delete mode 100644 lld/lib/ReaderWriter/MachO/File.h delete mode 100644 lld/lib/ReaderWriter/MachO/FlatNamespaceFile.h delete mode 100644 lld/lib/ReaderWriter/MachO/GOTPass.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/LayoutPass.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/LayoutPass.h delete mode 100644 lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/MachONormalizedFile.h delete mode 100644 lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h delete mode 100644 lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/MachOPasses.h delete mode 100644 lld/lib/ReaderWriter/MachO/ObjCPass.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/SectCreateFile.h delete mode 100644 lld/lib/ReaderWriter/MachO/ShimPass.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/StubsPass.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/TLVPass.cpp delete mode 100644 lld/lib/ReaderWriter/MachO/WriterMachO.cpp delete mode 100644 lld/lib/ReaderWriter/YAML/CMakeLists.txt delete mode 100644 lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp delete mode 100644 lld/test/ELF/Inputs/copy-rel.s delete mode 100644 lld/test/ELF/copy-rel.s delete mode 100644 lld/test/ELF/relocation-copy-alias.s delete mode 100644 lld/test/darwin/Inputs/native-and-mach-o.objtxt delete mode 100644 lld/test/darwin/Inputs/native-and-mach-o2.objtxt delete mode 100644 lld/test/darwin/cmdline-lto_library.objtxt delete mode 100644 lld/test/darwin/cmdline-objc_gc.objtxt delete mode 100644 lld/test/darwin/cmdline-objc_gc_compaction.objtxt delete mode 100644 lld/test/darwin/cmdline-objc_gc_only.objtxt delete mode 100644 lld/test/darwin/native-and-mach-o.objtxt delete mode 100755 lld/test/mach-o/Inputs/DependencyDump.py delete mode 100644 lld/test/mach-o/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd delete mode 100644 lld/test/mach-o/Inputs/PIE.yaml delete mode 100644 lld/test/mach-o/Inputs/arm-interworking.yaml delete mode 100644 lld/test/mach-o/Inputs/arm-shims.yaml delete mode 100644 lld/test/mach-o/Inputs/arm64/libSystem.yaml delete mode 100644 lld/test/mach-o/Inputs/armv7/libSystem.yaml delete mode 100644 lld/test/mach-o/Inputs/bar.yaml delete mode 100644 lld/test/mach-o/Inputs/cstring-sections.yaml delete mode 100644 lld/test/mach-o/Inputs/exported_symbols_list.exp delete mode 100644 lld/test/mach-o/Inputs/full.filelist delete mode 100644 lld/test/mach-o/Inputs/got-order.yaml delete mode 100644 lld/test/mach-o/Inputs/got-order2.yaml delete mode 100644 lld/test/mach-o/Inputs/hello-world-arm64.yaml delete mode 100644 lld/test/mach-o/Inputs/hello-world-armv6.yaml delete mode 100644 lld/test/mach-o/Inputs/hello-world-armv7.yaml delete mode 100644 lld/test/mach-o/Inputs/hello-world-x86.yaml delete mode 100644 lld/test/mach-o/Inputs/hello-world-x86_64.yaml delete mode 100644 lld/test/mach-o/Inputs/hw.raw_bytes delete mode 100644 lld/test/mach-o/Inputs/interposing-section.yaml delete mode 100644 lld/test/mach-o/Inputs/lazy-bind-x86_64-2.yaml delete mode 100644 lld/test/mach-o/Inputs/lazy-bind-x86_64-3.yaml delete mode 100644 lld/test/mach-o/Inputs/lazy-bind-x86_64.yaml delete mode 100755 lld/test/mach-o/Inputs/lib-search-paths/usr/lib/libmyshared.dylib delete mode 100644 lld/test/mach-o/Inputs/lib-search-paths/usr/lib/libmystatic.a delete mode 100644 lld/test/mach-o/Inputs/lib-search-paths/usr/local/lib/file.o delete mode 100644 lld/test/mach-o/Inputs/libbar.a delete mode 100644 lld/test/mach-o/Inputs/libfoo.a delete mode 100644 lld/test/mach-o/Inputs/no-version-min-load-command-object.yaml delete mode 100644 lld/test/mach-o/Inputs/order_file-basic.order delete mode 100644 lld/test/mach-o/Inputs/partial.filelist delete mode 100644 lld/test/mach-o/Inputs/re-exported-dylib-ordinal.yaml delete mode 100644 lld/test/mach-o/Inputs/re-exported-dylib-ordinal2.yaml delete mode 100644 lld/test/mach-o/Inputs/re-exported-dylib-ordinal3.yaml delete mode 100644 lld/test/mach-o/Inputs/swift-version-1.yaml delete mode 100644 lld/test/mach-o/Inputs/unwind-info-simple-arm64.yaml delete mode 100644 lld/test/mach-o/Inputs/use-dylib-install-names.yaml delete mode 100644 lld/test/mach-o/Inputs/use-simple-dylib.yaml delete mode 100644 lld/test/mach-o/Inputs/write-final-sections.yaml delete mode 100644 lld/test/mach-o/Inputs/wrong-arch-error.yaml delete mode 100644 lld/test/mach-o/Inputs/x86/libSystem.yaml delete mode 100644 lld/test/mach-o/Inputs/x86_64/libSystem.yaml delete mode 100644 lld/test/mach-o/PIE.yaml delete mode 100644 lld/test/mach-o/align_text.yaml delete mode 100644 lld/test/mach-o/arm-interworking-movw.yaml delete mode 100644 lld/test/mach-o/arm-interworking.yaml delete mode 100644 lld/test/mach-o/arm-shims.yaml delete mode 100644 lld/test/mach-o/arm-subsections-via-symbols.yaml delete mode 100644 lld/test/mach-o/arm64-reloc-negDelta32-fixup.yaml delete mode 100644 lld/test/mach-o/arm64-relocs-errors-delta64-offset.yaml delete mode 100644 lld/test/mach-o/arm64-section-order.yaml delete mode 100644 lld/test/mach-o/bind-opcodes.yaml delete mode 100644 lld/test/mach-o/cstring-sections.yaml delete mode 100644 lld/test/mach-o/data-in-code-load-command.yaml delete mode 100644 lld/test/mach-o/data-only-dylib.yaml delete mode 100644 lld/test/mach-o/dead-strip-globals.yaml delete mode 100644 lld/test/mach-o/debug-syms.yaml delete mode 100644 lld/test/mach-o/demangle.yaml delete mode 100644 lld/test/mach-o/dependency_info.yaml delete mode 100644 lld/test/mach-o/do-not-emit-unwind-fde-arm64.yaml delete mode 100644 lld/test/mach-o/dso_handle.yaml delete mode 100644 lld/test/mach-o/dylib-install-names.yaml delete mode 100644 lld/test/mach-o/eh-frame-relocs-arm64.yaml delete mode 100644 lld/test/mach-o/empty-sections.yaml delete mode 100644 lld/test/mach-o/error-simulator-vs-macosx.yaml delete mode 100644 lld/test/mach-o/exe-offsets.yaml delete mode 100644 lld/test/mach-o/exe-segment-overlap.yaml delete mode 100644 lld/test/mach-o/executable-exports.yaml delete mode 100644 lld/test/mach-o/export-trie-order.yaml delete mode 100644 lld/test/mach-o/exported_symbols_list-dylib.yaml delete mode 100644 lld/test/mach-o/exported_symbols_list-obj.yaml delete mode 100644 lld/test/mach-o/exported_symbols_list-undef.yaml delete mode 100644 lld/test/mach-o/fat-archive.yaml delete mode 100644 lld/test/mach-o/filelist.yaml delete mode 100644 lld/test/mach-o/flat_namespace_undef_error.yaml delete mode 100644 lld/test/mach-o/flat_namespace_undef_suppress.yaml delete mode 100644 lld/test/mach-o/force_load-dylib.yaml delete mode 100644 lld/test/mach-o/force_load-x86_64.yaml delete mode 100644 lld/test/mach-o/framework-user-paths.yaml delete mode 100644 lld/test/mach-o/function-starts-load-command.yaml delete mode 100644 lld/test/mach-o/gcc_except_tab-got-arm64.yaml delete mode 100644 lld/test/mach-o/got-order.yaml delete mode 100644 lld/test/mach-o/hello-world-arm64.yaml delete mode 100644 lld/test/mach-o/hello-world-armv6.yaml delete mode 100644 lld/test/mach-o/hello-world-armv7.yaml delete mode 100644 lld/test/mach-o/hello-world-x86.yaml delete mode 100644 lld/test/mach-o/hello-world-x86_64.yaml delete mode 100644 lld/test/mach-o/image-base.yaml delete mode 100644 lld/test/mach-o/infer-arch.yaml delete mode 100644 lld/test/mach-o/interposing-section.yaml delete mode 100644 lld/test/mach-o/keep_private_externs.yaml delete mode 100644 lld/test/mach-o/lazy-bind-x86_64.yaml delete mode 100644 lld/test/mach-o/lc_segment_filesize.yaml delete mode 100644 lld/test/mach-o/lib-search-paths.yaml delete mode 100644 lld/test/mach-o/library-order.yaml delete mode 100644 lld/test/mach-o/library-rescan.yaml delete mode 100644 lld/test/mach-o/libresolve-bizarre-root-override.yaml delete mode 100644 lld/test/mach-o/libresolve-multiple-syslibroots.yaml delete mode 100644 lld/test/mach-o/libresolve-one-syslibroot.yaml delete mode 100644 lld/test/mach-o/libresolve-simple.yaml delete mode 100644 lld/test/mach-o/libresolve-user-paths.yaml delete mode 100644 lld/test/mach-o/libresolve-z.yaml delete mode 100644 lld/test/mach-o/lit.local.cfg delete mode 100644 lld/test/mach-o/load-commands-size.yaml delete mode 100644 lld/test/mach-o/mach_header-cpusubtype.yaml delete mode 100644 lld/test/mach-o/mh_bundle_header.yaml delete mode 100644 lld/test/mach-o/mh_dylib_header.yaml delete mode 100644 lld/test/mach-o/objc-category-list-atom.yaml delete mode 100644 lld/test/mach-o/objc-image-info-host-vs-simulator.yaml delete mode 100644 lld/test/mach-o/objc-image-info-invalid-size.yaml delete mode 100644 lld/test/mach-o/objc-image-info-invalid-version.yaml delete mode 100644 lld/test/mach-o/objc-image-info-mismatched-swift-version.yaml delete mode 100644 lld/test/mach-o/objc-image-info-pass-output.yaml delete mode 100644 lld/test/mach-o/objc-image-info-simulator-vs-host.yaml delete mode 100644 lld/test/mach-o/objc-image-info-unsupported-gc.yaml delete mode 100644 lld/test/mach-o/objc_export_list.yaml delete mode 100644 lld/test/mach-o/order_file-basic.yaml delete mode 100644 lld/test/mach-o/parse-aliases.yaml delete mode 100644 lld/test/mach-o/parse-arm-relocs.yaml delete mode 100644 lld/test/mach-o/parse-cfstring32.yaml delete mode 100644 lld/test/mach-o/parse-cfstring64.yaml delete mode 100644 lld/test/mach-o/parse-compact-unwind32.yaml delete mode 100644 lld/test/mach-o/parse-compact-unwind64.yaml delete mode 100644 lld/test/mach-o/parse-data-in-code-armv7.yaml delete mode 100644 lld/test/mach-o/parse-data-in-code-x86.yaml delete mode 100644 lld/test/mach-o/parse-data-relocs-arm64.yaml delete mode 100644 lld/test/mach-o/parse-data-relocs-x86_64.yaml delete mode 100644 lld/test/mach-o/parse-data.yaml delete mode 100644 lld/test/mach-o/parse-eh-frame-relocs-x86_64.yaml delete mode 100644 lld/test/mach-o/parse-eh-frame-x86-anon.yaml delete mode 100644 lld/test/mach-o/parse-eh-frame-x86-labeled.yaml delete mode 100644 lld/test/mach-o/parse-eh-frame.yaml delete mode 100644 lld/test/mach-o/parse-function.yaml delete mode 100644 lld/test/mach-o/parse-initializers32.yaml delete mode 100644 lld/test/mach-o/parse-initializers64.yaml delete mode 100644 lld/test/mach-o/parse-literals-error.yaml delete mode 100644 lld/test/mach-o/parse-literals.yaml delete mode 100644 lld/test/mach-o/parse-non-lazy-pointers.yaml delete mode 100644 lld/test/mach-o/parse-relocs-x86.yaml delete mode 100644 lld/test/mach-o/parse-section-no-symbol.yaml delete mode 100644 lld/test/mach-o/parse-tentative-defs.yaml delete mode 100644 lld/test/mach-o/parse-text-relocs-arm64.yaml delete mode 100644 lld/test/mach-o/parse-text-relocs-x86_64.yaml delete mode 100644 lld/test/mach-o/parse-tlv-relocs-x86-64.yaml delete mode 100644 lld/test/mach-o/re-exported-dylib-ordinal.yaml delete mode 100644 lld/test/mach-o/rpath.yaml delete mode 100644 lld/test/mach-o/run-tlv-pass-x86-64.yaml delete mode 100644 lld/test/mach-o/sdk-version-error.yaml delete mode 100644 lld/test/mach-o/sectalign.yaml delete mode 100644 lld/test/mach-o/sectattrs.yaml delete mode 100644 lld/test/mach-o/sectcreate.yaml delete mode 100644 lld/test/mach-o/seg-protection-arm64.yaml delete mode 100644 lld/test/mach-o/seg-protection-x86_64.yaml delete mode 100644 lld/test/mach-o/source-version.yaml delete mode 100644 lld/test/mach-o/stack-size.yaml delete mode 100644 lld/test/mach-o/string-table.yaml delete mode 100644 lld/test/mach-o/stub-link.s delete mode 100644 lld/test/mach-o/subsections-via-symbols-default.yaml delete mode 100644 lld/test/mach-o/twolevel_namespace_undef_dynamic_lookup.yaml delete mode 100644 lld/test/mach-o/twolevel_namespace_undef_warning_suppress.yaml delete mode 100644 lld/test/mach-o/unwind-info-simple-arm64.yaml delete mode 100644 lld/test/mach-o/unwind-info-simple-x86_64.yaml delete mode 100644 lld/test/mach-o/upward-dylib-load-command.yaml delete mode 100644 lld/test/mach-o/upward-dylib-paths.yaml delete mode 100644 lld/test/mach-o/usage.yaml delete mode 100644 lld/test/mach-o/use-dylib.yaml delete mode 100644 lld/test/mach-o/use-simple-dylib.yaml delete mode 100644 lld/test/mach-o/version-min-load-command-object.yaml delete mode 100644 lld/test/mach-o/version-min-load-command.yaml delete mode 100644 lld/test/mach-o/write-final-sections.yaml delete mode 100644 lld/test/mach-o/wrong-arch-error.yaml delete mode 100644 lld/unittests/CMakeLists.txt delete mode 100644 lld/unittests/DriverTests/CMakeLists.txt delete mode 100644 lld/unittests/DriverTests/DarwinLdDriverTest.cpp delete mode 100644 lld/unittests/MachOTests/CMakeLists.txt delete mode 100644 lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp delete mode 100644 lld/unittests/MachOTests/MachONormalizedFileBinaryWriterTests.cpp delete mode 100644 lld/unittests/MachOTests/MachONormalizedFileToAtomsTests.cpp delete mode 100644 lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp delete mode 100644 lld/unittests/MachOTests/empty_obj_x86_armv7.txt delete mode 100644 lldb/source/Plugins/Language/CPlusPlus/LibCxxOptional.cpp delete mode 100644 lldb/source/lldb.cpp delete mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/Makefile delete mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/TestDataFormatterLibcxxDeque.py delete mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/main.cpp delete mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/Makefile delete mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/TestDataFormatterLibcxxList.py delete mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/Makefile delete mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/TestDataFormatterLibcxxListLoop.py delete mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/main.cpp delete mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/main.cpp delete mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/Makefile delete mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/TestDataFormatterLibcxxOptional.py delete mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/main.cpp delete mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/Makefile delete mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/TestDataFormatterUnordered.py delete mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/main.cpp delete mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/Makefile delete mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/TestDataFormatterStdList.py delete mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/main.cpp delete mode 100644 lldb/test/API/functionalities/gdb_remote_client/gdbclientutils.py delete mode 100644 lldb/test/API/functionalities/memory/read/Makefile delete mode 100644 lldb/test/API/functionalities/memory/read/TestMemoryRead.py delete mode 100644 lldb/test/API/functionalities/memory/read/main.cpp delete mode 100644 lldb/test/API/linux/aarch64/tagged_memory_region/Makefile delete mode 100644 lldb/test/API/linux/aarch64/tagged_memory_region/TestAArch64LinuxTaggedMemoryRegion.py delete mode 100644 lldb/test/API/linux/aarch64/tagged_memory_region/main.c delete mode 100644 llvm/lib/Analysis/ReleaseModeModelRunner.cpp delete mode 100644 llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp delete mode 100644 llvm/lib/Target/M68k/GlSel/M68kCallLowering.h delete mode 100644 llvm/lib/Target/M68k/GlSel/M68kInstructionSelector.cpp delete mode 100644 llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.cpp delete mode 100644 llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.h delete mode 100644 llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.cpp delete mode 100644 llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.h delete mode 100644 llvm/lib/Target/M68k/GlSel/M68kRegisterBanks.td delete mode 100644 llvm/test/Analysis/BasicAA/memset_pattern.ll delete mode 100644 llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll delete mode 100644 llvm/test/Analysis/LoopAccessAnalysis/gep-induction-operand-typesize-warning.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/lower-kernel-lds-global-uses.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/lower-module-lds-global-alias.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/lower-module-lds-global-uses.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect.ll delete mode 100644 llvm/test/CodeGen/X86/peep-test-5.ll delete mode 100644 llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_weak_defs_extra.s delete mode 100644 llvm/test/ExecutionEngine/JITLink/X86/MachO_skip_debug_sections.s delete mode 100644 llvm/test/Transforms/LICM/no-hoist-prof.ll delete mode 100644 llvm/test/Transforms/LICM/sink.ll delete mode 100644 llvm/test/Verifier/dbg-invalid-enum-as-scope.ll delete mode 100644 llvm/test/tools/llvm-nm/lit.local.cfg diff --git a/clang-tools-extra/clangd/unittests/TestScheme.h b/clang-tools-extra/clangd/unittests/TestScheme.h deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp deleted file mode 100644 index 665d5bab7218..000000000000 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ /dev/null @@ -1,460 +0,0 @@ -//===--- HIP.cpp - HIP Tool and ToolChain Implementations -------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "HIP.h" -#include "AMDGPU.h" -#include "CommonArgs.h" -#include "clang/Basic/Cuda.h" -#include "clang/Basic/TargetID.h" -#include "clang/Driver/Compilation.h" -#include "clang/Driver/Driver.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/InputInfo.h" -#include "clang/Driver/Options.h" -#include "llvm/Support/Alignment.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/TargetParser.h" - -using namespace clang::driver; -using namespace clang::driver::toolchains; -using namespace clang::driver::tools; -using namespace clang; -using namespace llvm::opt; - -#if defined(_WIN32) || defined(_WIN64) -#define NULL_FILE "nul" -#else -#define NULL_FILE "/dev/null" -#endif - -namespace { -const unsigned HIPCodeObjectAlign = 4096; -} // namespace - -void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, - const InputInfoList &Inputs, - const InputInfo &Output, - const llvm::opt::ArgList &Args) const { - // Construct lld command. - // The output from ld.lld is an HSA code object file. - ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", "-shared", - "-plugin-opt=-amdgpu-internalize-symbols"}; - - auto &TC = getToolChain(); - auto &D = TC.getDriver(); - assert(!Inputs.empty() && "Must have at least one input."); - bool IsThinLTO = D.getLTOMode(/*IsOffload=*/true) == LTOK_Thin; - addLTOOptions(TC, Args, LldArgs, Output, Inputs[0], IsThinLTO); - - // Extract all the -m options - std::vector Features; - amdgpu::getAMDGPUTargetFeatures(D, TC.getTriple(), Args, Features); - - // Add features to mattr such as cumode - std::string MAttrString = "-plugin-opt=-mattr="; - for (auto OneFeature : unifyTargetFeatures(Features)) { - MAttrString.append(Args.MakeArgString(OneFeature)); - if (OneFeature != Features.back()) - MAttrString.append(","); - } - if (!Features.empty()) - LldArgs.push_back(Args.MakeArgString(MAttrString)); - - // ToDo: Remove this option after AMDGPU backend supports ISA-level linking. - // Since AMDGPU backend currently does not support ISA-level linking, all - // called functions need to be imported. - if (IsThinLTO) - LldArgs.push_back(Args.MakeArgString("-plugin-opt=-force-import-all")); - - for (const Arg *A : Args.filtered(options::OPT_mllvm)) { - LldArgs.push_back( - Args.MakeArgString(Twine("-plugin-opt=") + A->getValue(0))); - } - - if (C.getDriver().isSaveTempsEnabled()) - LldArgs.push_back("-save-temps"); - - addLinkerCompressDebugSectionsOption(TC, Args, LldArgs); - - LldArgs.append({"-o", Output.getFilename()}); - for (auto Input : Inputs) - LldArgs.push_back(Input.getFilename()); - - if (Args.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize, - false)) - llvm::for_each(TC.getHIPDeviceLibs(Args), [&](auto BCFile) { - LldArgs.push_back(Args.MakeArgString(BCFile.Path)); - }); - - const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld")); - C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Lld, LldArgs, Inputs, Output)); -} - -// Construct a clang-offload-bundler command to bundle code objects for -// different GPU's into a HIP fat binary. -void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA, - StringRef OutputFileName, const InputInfoList &Inputs, - const llvm::opt::ArgList &Args, const Tool& T) { - // Construct clang-offload-bundler command to bundle object files for - // for different GPU archs. - ArgStringList BundlerArgs; - BundlerArgs.push_back(Args.MakeArgString("-type=o")); - BundlerArgs.push_back( - Args.MakeArgString("-bundle-align=" + Twine(HIPCodeObjectAlign))); - - // ToDo: Remove the dummy host binary entry which is required by - // clang-offload-bundler. - std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux"; - std::string BundlerInputArg = "-inputs=" NULL_FILE; - - // For code object version 2 and 3, the offload kind in bundle ID is 'hip' - // for backward compatibility. For code object version 4 and greater, the - // offload kind in bundle ID is 'hipv4'. - std::string OffloadKind = "hip"; - if (getAMDGPUCodeObjectVersion(C.getDriver(), Args) >= 4) - OffloadKind = OffloadKind + "v4"; - for (const auto &II : Inputs) { - const auto* A = II.getAction(); - BundlerTargetArg = BundlerTargetArg + "," + OffloadKind + - "-amdgcn-amd-amdhsa--" + - StringRef(A->getOffloadingArch()).str(); - BundlerInputArg = BundlerInputArg + "," + II.getFilename(); - } - BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg)); - BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg)); - - std::string Output = std::string(OutputFileName); - auto BundlerOutputArg = - Args.MakeArgString(std::string("-outputs=").append(Output)); - BundlerArgs.push_back(BundlerOutputArg); - - const char *Bundler = Args.MakeArgString( - T.getToolChain().GetProgramPath("clang-offload-bundler")); - C.addCommand(std::make_unique( - JA, T, ResponseFileSupport::None(), Bundler, BundlerArgs, Inputs, - InputInfo(&JA, Args.MakeArgString(Output)))); -} - -/// Add Generated HIP Object File which has device images embedded into the -/// host to the argument list for linking. Using MC directives, embed the -/// device code and also define symbols required by the code generation so that -/// the image can be retrieved at runtime. -void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary( - Compilation &C, const InputInfo &Output, - const InputInfoList &Inputs, const ArgList &Args, - const JobAction &JA) const { - const ToolChain &TC = getToolChain(); - std::string Name = - std::string(llvm::sys::path::stem(Output.getFilename())); - - // Create Temp Object File Generator, - // Offload Bundled file and Bundled Object file. - // Keep them if save-temps is enabled. - const char *McinFile; - const char *BundleFile; - if (C.getDriver().isSaveTempsEnabled()) { - McinFile = C.getArgs().MakeArgString(Name + ".mcin"); - BundleFile = C.getArgs().MakeArgString(Name + ".hipfb"); - } else { - auto TmpNameMcin = C.getDriver().GetTemporaryPath(Name, "mcin"); - McinFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameMcin)); - auto TmpNameFb = C.getDriver().GetTemporaryPath(Name, "hipfb"); - BundleFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameFb)); - } - constructHIPFatbinCommand(C, JA, BundleFile, Inputs, Args, *this); - - // Create a buffer to write the contents of the temp obj generator. - std::string ObjBuffer; - llvm::raw_string_ostream ObjStream(ObjBuffer); - - // Add MC directives to embed target binaries. We ensure that each - // section and image is 16-byte aligned. This is not mandatory, but - // increases the likelihood of data to be aligned with a cache block - // in several main host machines. - ObjStream << "# HIP Object Generator\n"; - ObjStream << "# *** Automatically generated by Clang ***\n"; - ObjStream << " .protected __hip_fatbin\n"; - ObjStream << " .type __hip_fatbin,@object\n"; - ObjStream << " .section .hip_fatbin,\"a\",@progbits\n"; - ObjStream << " .globl __hip_fatbin\n"; - ObjStream << " .p2align " << llvm::Log2(llvm::Align(HIPCodeObjectAlign)) - << "\n"; - ObjStream << "__hip_fatbin:\n"; - ObjStream << " .incbin \"" << BundleFile << "\"\n"; - ObjStream.flush(); - - // Dump the contents of the temp object file gen if the user requested that. - // We support this option to enable testing of behavior with -###. - if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script)) - llvm::errs() << ObjBuffer; - - // Open script file and write the contents. - std::error_code EC; - llvm::raw_fd_ostream Objf(McinFile, EC, llvm::sys::fs::OF_None); - - if (EC) { - C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message(); - return; - } - - Objf << ObjBuffer; - - ArgStringList McArgs{"-o", Output.getFilename(), - McinFile, "--filetype=obj"}; - const char *Mc = Args.MakeArgString(TC.GetProgramPath("llvm-mc")); - C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Mc, McArgs, Inputs, Output)); -} - -// For amdgcn the inputs of the linker job are device bitcode and output is -// object file. It calls llvm-link, opt, llc, then lld steps. -void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - if (Inputs.size() > 0 && - Inputs[0].getType() == types::TY_Image && - JA.getType() == types::TY_Object) - return constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs, Args, JA); - - if (JA.getType() == types::TY_HIP_FATBIN) - return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this); - - return constructLldCommand(C, JA, Inputs, Output, Args); -} - -HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple, - const ToolChain &HostTC, const ArgList &Args) - : ROCMToolChain(D, Triple, Args), HostTC(HostTC) { - // Lookup binaries into the driver directory, this is used to - // discover the clang-offload-bundler executable. - getProgramPaths().push_back(getDriver().Dir); -} - -void HIPToolChain::addClangTargetOptions( - const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - Action::OffloadKind DeviceOffloadingKind) const { - HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); - - assert(DeviceOffloadingKind == Action::OFK_HIP && - "Only HIP offloading kinds are supported for GPUs."); - - CC1Args.push_back("-fcuda-is-device"); - - if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals, - options::OPT_fno_cuda_approx_transcendentals, false)) - CC1Args.push_back("-fcuda-approx-transcendentals"); - - if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, - false)) - CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"}); - - StringRef MaxThreadsPerBlock = - DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ); - if (!MaxThreadsPerBlock.empty()) { - std::string ArgStr = - std::string("--gpu-max-threads-per-block=") + MaxThreadsPerBlock.str(); - CC1Args.push_back(DriverArgs.MakeArgStringRef(ArgStr)); - } - - CC1Args.push_back("-fcuda-allow-variadic-functions"); - - // Default to "hidden" visibility, as object level linking will not be - // supported for the foreseeable future. - if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, - options::OPT_fvisibility_ms_compat)) { - CC1Args.append({"-fvisibility", "hidden"}); - CC1Args.push_back("-fapply-global-visibility-to-externs"); - } - - llvm::for_each(getHIPDeviceLibs(DriverArgs), [&](auto BCFile) { - CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" - : "-mlink-bitcode-file"); - CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path)); - }); -} - -llvm::opt::DerivedArgList * -HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, - StringRef BoundArch, - Action::OffloadKind DeviceOffloadKind) const { - DerivedArgList *DAL = - HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind); - if (!DAL) - DAL = new DerivedArgList(Args.getBaseArgs()); - - const OptTable &Opts = getDriver().getOpts(); - - for (Arg *A : Args) { - if (!shouldSkipArgument(A)) - DAL->append(A); - } - - if (!BoundArch.empty()) { - DAL->eraseArg(options::OPT_mcpu_EQ); - DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), BoundArch); - checkTargetID(*DAL); - } - - return DAL; -} - -Tool *HIPToolChain::buildLinker() const { - assert(getTriple().getArch() == llvm::Triple::amdgcn); - return new tools::AMDGCN::Linker(*this); -} - -void HIPToolChain::addClangWarningOptions(ArgStringList &CC1Args) const { - HostTC.addClangWarningOptions(CC1Args); -} - -ToolChain::CXXStdlibType -HIPToolChain::GetCXXStdlibType(const ArgList &Args) const { - return HostTC.GetCXXStdlibType(Args); -} - -void HIPToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, - ArgStringList &CC1Args) const { - HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); -} - -void HIPToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args, - ArgStringList &CC1Args) const { - HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args); -} - -void HIPToolChain::AddIAMCUIncludeArgs(const ArgList &Args, - ArgStringList &CC1Args) const { - HostTC.AddIAMCUIncludeArgs(Args, CC1Args); -} - -void HIPToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs, - ArgStringList &CC1Args) const { - RocmInstallation.AddHIPIncludeArgs(DriverArgs, CC1Args); -} - -SanitizerMask HIPToolChain::getSupportedSanitizers() const { - // The HIPToolChain only supports sanitizers in the sense that it allows - // sanitizer arguments on the command line if they are supported by the host - // toolchain. The HIPToolChain will actually ignore any command line - // arguments for any of these "supported" sanitizers. That means that no - // sanitization of device code is actually supported at this time. - // - // This behavior is necessary because the host and device toolchains - // invocations often share the command line, so the device toolchain must - // tolerate flags meant only for the host toolchain. - return HostTC.getSupportedSanitizers(); -} - -VersionTuple HIPToolChain::computeMSVCVersion(const Driver *D, - const ArgList &Args) const { - return HostTC.computeMSVCVersion(D, Args); -} - -llvm::SmallVector -HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { - llvm::SmallVector BCLibs; - if (DriverArgs.hasArg(options::OPT_nogpulib)) - return {}; - ArgStringList LibraryPaths; - - // Find in --hip-device-lib-path and HIP_LIBRARY_PATH. - for (auto Path : RocmInstallation.getRocmDeviceLibPathArg()) - LibraryPaths.push_back(DriverArgs.MakeArgString(Path)); - - addDirectoryList(DriverArgs, LibraryPaths, "", "HIP_DEVICE_LIB_PATH"); - - // Maintain compatability with --hip-device-lib. - auto BCLibArgs = DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ); - if (!BCLibArgs.empty()) { - llvm::for_each(BCLibArgs, [&](StringRef BCName) { - StringRef FullName; - for (std::string LibraryPath : LibraryPaths) { - SmallString<128> Path(LibraryPath); - llvm::sys::path::append(Path, BCName); - FullName = Path; - if (llvm::sys::fs::exists(FullName)) { - BCLibs.push_back(FullName); - return; - } - } - getDriver().Diag(diag::err_drv_no_such_file) << BCName; - }); - } else { - if (!RocmInstallation.hasDeviceLibrary()) { - getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0; - return {}; - } - StringRef GpuArch = getGPUArch(DriverArgs); - assert(!GpuArch.empty() && "Must have an explicit GPU arch."); - - // If --hip-device-lib is not set, add the default bitcode libraries. - if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize, - options::OPT_fno_gpu_sanitize, false)) { - auto AsanRTL = RocmInstallation.getAsanRTLPath(); - if (AsanRTL.empty()) { - unsigned DiagID = getDriver().getDiags().getCustomDiagID( - DiagnosticsEngine::Error, - "AMDGPU address sanitizer runtime library (asanrtl) is not found. " - "Please install ROCm device library which supports address " - "sanitizer"); - getDriver().Diag(DiagID); - return {}; - } else - BCLibs.push_back({AsanRTL.str(), /*ShouldInternalize=*/false}); - } - - // Add the HIP specific bitcode library. - BCLibs.push_back(RocmInstallation.getHIPPath()); - - // Add common device libraries like ocml etc. - for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str())) - BCLibs.push_back(StringRef(N)); - - // Add instrument lib. - auto InstLib = - DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ); - if (InstLib.empty()) - return BCLibs; - if (llvm::sys::fs::exists(InstLib)) - BCLibs.push_back(InstLib); - else - getDriver().Diag(diag::err_drv_no_such_file) << InstLib; - } - - return BCLibs; -} - -void HIPToolChain::checkTargetID(const llvm::opt::ArgList &DriverArgs) const { - auto PTID = getParsedTargetID(DriverArgs); - if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) { - getDriver().Diag(clang::diag::err_drv_bad_target_id) - << PTID.OptionalTargetID.getValue(); - return; - } - - assert(PTID.OptionalFeatures && "Invalid return from getParsedTargetID"); - auto &FeatureMap = PTID.OptionalFeatures.getValue(); - // Sanitizer is not supported with xnack-. - if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize, - options::OPT_fno_gpu_sanitize, false)) { - auto Loc = FeatureMap.find("xnack"); - if (Loc != FeatureMap.end() && !Loc->second) { - auto &Diags = getDriver().getDiags(); - auto DiagID = Diags.getCustomDiagID( - DiagnosticsEngine::Error, - "'-fgpu-sanitize' is not compatible with offload arch '%0'. " - "Use an offload arch without 'xnack-' instead"); - Diags.Report(DiagID) << PTID.OptionalTargetID.getValue(); - } - } -} diff --git a/clang/lib/Driver/ToolChains/HIP.h b/clang/lib/Driver/ToolChains/HIP.h deleted file mode 100644 index 60b3d69b3f52..000000000000 --- a/clang/lib/Driver/ToolChains/HIP.h +++ /dev/null @@ -1,108 +0,0 @@ -//===--- HIP.h - HIP ToolChain Implementations ------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_HIP_H -#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_HIP_H - -#include "clang/Driver/ToolChain.h" -#include "clang/Driver/Tool.h" -#include "AMDGPU.h" - -namespace clang { -namespace driver { - -namespace tools { - -namespace AMDGCN { - // Construct command for creating HIP fatbin. - void constructHIPFatbinCommand(Compilation &C, const JobAction &JA, - StringRef OutputFileName, const InputInfoList &Inputs, - const llvm::opt::ArgList &TCArgs, const Tool& T); - -// Runs llvm-link/opt/llc/lld, which links multiple LLVM bitcode, together with -// device library, then compiles it to ISA in a shared object. -class LLVM_LIBRARY_VISIBILITY Linker : public Tool { -public: - Linker(const ToolChain &TC) : Tool("AMDGCN::Linker", "amdgcn-link", TC) {} - - bool hasIntegratedCPP() const override { return false; } - - void ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, const InputInfoList &Inputs, - const llvm::opt::ArgList &TCArgs, - const char *LinkingOutput) const override; - -private: - - void constructLldCommand(Compilation &C, const JobAction &JA, - const InputInfoList &Inputs, const InputInfo &Output, - const llvm::opt::ArgList &Args) const; - - // Construct command for creating Object from HIP fatbin. - void constructGenerateObjFileFromHIPFatBinary(Compilation &C, - const InputInfo &Output, - const InputInfoList &Inputs, - const llvm::opt::ArgList &Args, - const JobAction &JA) const; -}; - -} // end namespace AMDGCN -} // end namespace tools - -namespace toolchains { - -class LLVM_LIBRARY_VISIBILITY HIPToolChain final : public ROCMToolChain { -public: - HIPToolChain(const Driver &D, const llvm::Triple &Triple, - const ToolChain &HostTC, const llvm::opt::ArgList &Args); - - const llvm::Triple *getAuxTriple() const override { - return &HostTC.getTriple(); - } - - llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, - Action::OffloadKind DeviceOffloadKind) const override; - void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - Action::OffloadKind DeviceOffloadKind) const override; - void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override; - CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override; - void - AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args) const override; - void AddClangCXXStdlibIncludeArgs( - const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CC1Args) const override; - void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args) const override; - void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args) const override; - llvm::SmallVector - getHIPDeviceLibs(const llvm::opt::ArgList &Args) const override; - - SanitizerMask getSupportedSanitizers() const override; - - VersionTuple - computeMSVCVersion(const Driver *D, - const llvm::opt::ArgList &Args) const override; - - unsigned GetDefaultDwarfVersion() const override { return 5; } - - const ToolChain &HostTC; - void checkTargetID(const llvm::opt::ArgList &DriverArgs) const override; - -protected: - Tool *buildLinker() const override; -}; - -} // end namespace toolchains -} // end namespace driver -} // end namespace clang - -#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_HIP_H diff --git a/clang/test/CodeGen/Inputs/sanitizer-blacklist-vfsoverlay.yaml b/clang/test/CodeGen/Inputs/sanitizer-blacklist-vfsoverlay.yaml deleted file mode 100644 index df2b22189769..000000000000 --- a/clang/test/CodeGen/Inputs/sanitizer-blacklist-vfsoverlay.yaml +++ /dev/null @@ -1,15 +0,0 @@ -{ - 'version': 0, - 'roots': [ - { 'name': '@DIR@', 'type': 'directory', - 'contents': [ - { 'name': 'only-virtual-file.blacklist', 'type': 'file', - 'external-contents': '@REAL_FILE@' - }, - { 'name': 'invalid-virtual-file.blacklist', 'type': 'file', - 'external-contents': '@NONEXISTENT_FILE@' - } - ] - } - ] -} diff --git a/clang/test/CodeGen/catch-alignment-assumption-blacklist.c b/clang/test/CodeGen/catch-alignment-assumption-blacklist.c deleted file mode 100644 index 67da7ff7627b..000000000000 --- a/clang/test/CodeGen/catch-alignment-assumption-blacklist.c +++ /dev/null @@ -1,28 +0,0 @@ -// RUN: %clang_cc1 -fsanitize=alignment -fsanitize-recover=alignment -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_alignment_assumption" --check-prefixes=CHECK - -// CHECK-LABEL: @baseline -void *baseline(void *x) { - // CHECK: call void @__ubsan_handle_alignment_assumption( - return __builtin_assume_aligned(x, 1); -} - -// CHECK-LABEL: blacklist_0 -__attribute__((no_sanitize("undefined"))) void *blacklist_0(void *x) { - return __builtin_assume_aligned(x, 1); -} - -// CHECK-LABEL: blacklist_1 -__attribute__((no_sanitize("alignment"))) void *blacklist_1(void *x) { - return __builtin_assume_aligned(x, 1); -} - -// CHECK-LABEL: dont_ignore_volatile_ptrs -void *dont_ignore_volatile_ptrs(void * volatile x) { - // CHECK: call void @__ubsan_handle_alignment_assumption( - return __builtin_assume_aligned(x, 1); -} - -// CHECK-LABEL: ignore_volatiles -void *ignore_volatiles(volatile void * x) { - return __builtin_assume_aligned(x, 1); -} diff --git a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-blacklist.c b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-blacklist.c deleted file mode 100644 index 476f5852b8e4..000000000000 --- a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-blacklist.c +++ /dev/null @@ -1,34 +0,0 @@ -// RUN: %clang_cc1 -x c -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" -// RUN: %clang_cc1 -x c -fno-delete-null-pointer-checks -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" - -// RUN: %clang_cc1 -x c++ -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" -// RUN: %clang_cc1 -x c++ -fno-delete-null-pointer-checks -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" - -#ifdef __cplusplus -extern "C" { -#endif - -// CHECK-LABEL: @baseline -char *baseline(char *base, unsigned long offset) { - // CHECK: call void @__ubsan_handle_pointer_overflow( - return base + offset; -} - -// CHECK-LABEL: @blacklist_0 -__attribute__((no_sanitize("undefined"))) char *blacklist_0(char *base, unsigned long offset) { - return base + offset; -} - -// CHECK-LABEL: @blacklist_1 -__attribute__((no_sanitize("pointer-overflow"))) char *blacklist_1(char *base, unsigned long offset) { - return base + offset; -} - -// CHECK-LABEL: @ignore_non_default_address_space -__attribute__((address_space(1))) char *ignore_non_default_address_space(__attribute__((address_space(1))) char *base, unsigned long offset) { - return base + offset; -} - -#ifdef __cplusplus -} -#endif diff --git a/clang/test/CodeGen/ubsan-blacklist.c b/clang/test/CodeGen/ubsan-blacklist.c deleted file mode 100644 index 666003bd9233..000000000000 --- a/clang/test/CodeGen/ubsan-blacklist.c +++ /dev/null @@ -1,28 +0,0 @@ -// Verify ubsan doesn't emit checks for blacklisted functions and files -// RUN: echo "fun:hash" > %t-func.blacklist -// RUN: echo "src:%s" | sed -e 's/\\/\\\\/g' > %t-file.blacklist -// RUN: %clang_cc1 -fsanitize=unsigned-integer-overflow -emit-llvm %s -o - | FileCheck %s --check-prefix=DEFAULT -// RUN: %clang_cc1 -fsanitize=unsigned-integer-overflow -fsanitize-blacklist=%t-func.blacklist -emit-llvm %s -o - | FileCheck %s --check-prefix=FUNC -// RUN: %clang_cc1 -fsanitize=unsigned-integer-overflow -fsanitize-blacklist=%t-file.blacklist -emit-llvm %s -o - | FileCheck %s --check-prefix=FILE - -unsigned i; - -// DEFAULT: @hash -// FUNC: @hash -// FILE: @hash -unsigned hash() { -// DEFAULT: call {{.*}}void @__ubsan -// FUNC-NOT: call {{.*}}void @__ubsan -// FILE-NOT: call {{.*}}void @__ubsan - return i * 37; -} - -// DEFAULT: @add -// FUNC: @add -// FILE: @add -unsigned add() { -// DEFAULT: call {{.*}}void @__ubsan -// FUNC: call {{.*}}void @__ubsan -// FILE-NOT: call {{.*}}void @__ubsan - return i + 1; -} diff --git a/clang/test/CodeGenCXX/cfi-blacklist.cpp b/clang/test/CodeGenCXX/cfi-blacklist.cpp deleted file mode 100644 index c01e5fcd9260..000000000000 --- a/clang/test/CodeGenCXX/cfi-blacklist.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// RUN: %clang_cc1 -triple %itanium_abi_triple -fvisibility hidden -fms-extensions -fsanitize=cfi-vcall -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=NOBL %s - -// Check that blacklisting cfi and cfi-vcall work correctly -// RUN: echo "[cfi-vcall]" > %t.vcall.txt -// RUN: echo "type:std::*" >> %t.vcall.txt -// RUN: %clang_cc1 -triple %itanium_abi_triple -fvisibility hidden -fms-extensions -fsanitize=cfi-vcall -fsanitize-blacklist=%t.vcall.txt -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=NOSTD %s -// -// RUN: echo "[cfi]" > %t.cfi.txt -// RUN: echo "type:std::*" >> %t.cfi.txt -// RUN: %clang_cc1 -triple %itanium_abi_triple -fvisibility hidden -fms-extensions -fsanitize=cfi-vcall -fsanitize-blacklist=%t.cfi.txt -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=NOSTD %s - -// Check that blacklisting non-vcall modes does not affect vcalls -// RUN: echo "[cfi-icall|cfi-nvcall|cfi-cast-strict|cfi-derived-cast|cfi-unrelated-cast]" > %t.other.txt -// RUN: echo "type:std::*" >> %t.other.txt -// RUN: %clang_cc1 -triple %itanium_abi_triple -fvisibility hidden -fms-extensions -fsanitize=cfi-vcall -fsanitize-blacklist=%t.other.txt -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=NOBL %s - -struct S1 { - virtual void f(); -}; - -namespace std { - -struct S2 { - virtual void f(); -}; - -} - -// CHECK: define{{.*}}s1f -// NOBL: llvm.type.test -// NOSTD: llvm.type.test -void s1f(S1 *s1) { - s1->f(); -} - -// CHECK: define{{.*}}s2f -// NOBL: llvm.type.test -// NOSTD-NOT: llvm.type.test -void s2f(std::S2 *s2) { - s2->f(); -} diff --git a/clang/test/Driver/debug-var-experimental-switch.c b/clang/test/Driver/debug-var-experimental-switch.c deleted file mode 100644 index 9c7a782e9e2b..000000000000 --- a/clang/test/Driver/debug-var-experimental-switch.c +++ /dev/null @@ -1,2 +0,0 @@ -// RUN: %clang -Xclang -fexperimental-debug-variable-locations -fsyntax-only -disable-llvm-passes %s -int main() {} diff --git a/clang/test/Sema/branch-protection-attr-err.c b/clang/test/Sema/branch-protection-attr-err.c deleted file mode 100644 index cfb53eb26f3e..000000000000 --- a/clang/test/Sema/branch-protection-attr-err.c +++ /dev/null @@ -1,22 +0,0 @@ -// RUN: %clang_cc1 -triple aarch64 -verify -fsyntax-only %s - -__attribute__((target("branch-protection=foo"))) // expected-error {{invalid or misplaced branch protection specification 'foo'}} -void badvalue0() {} - -__attribute__((target("branch-protection=+bti"))) // expected-error {{invalid or misplaced branch protection specification ''}} -void badvalue1() {} - -__attribute__((target("branch-protection=bti+"))) // expected-error {{invalid or misplaced branch protection specification ''}} -void badvalue2() {} - -__attribute__((target("branch-protection=pac-ret+bkey"))) // expected-error {{invalid or misplaced branch protection specification 'bkey'}} -void badvalue3() {} - -__attribute__((target("branch-protection=bti+leaf"))) // expected-error {{invalid or misplaced branch protection specification 'leaf'}} -void badoption0() {} - -__attribute__((target("branch-protection=bti+leaf+pac-ret"))) // expected-error {{invalid or misplaced branch protection specification 'leaf'}} -void badorder0() {} - -__attribute__ ((target("branch-protection=pac-ret+bti+leaf"))) // expected-error {{invalid or misplaced branch protection specification 'leaf'}} -void badorder1() {} diff --git a/compiler-rt/cmake/Modules/CustomLibcxx/CMakeLists.txt b/compiler-rt/cmake/Modules/CustomLibcxx/CMakeLists.txt deleted file mode 100644 index 6c6680023c7f..000000000000 --- a/compiler-rt/cmake/Modules/CustomLibcxx/CMakeLists.txt +++ /dev/null @@ -1,29 +0,0 @@ -# TODO(phosek): We should use the runtimes build instead configured with -# LLVM_ENABLE_RUNTIMES=libcxxabi;libcxx to avoid duplication of logic. - -cmake_minimum_required(VERSION 3.13.4) -project(custom-libcxx C CXX) - -find_package(Python3 REQUIRED COMPONENTS Interpreter) - -# Build static libcxxabi. -set(LIBCXXABI_ENABLE_SHARED OFF CACHE BOOL "") -set(LIBCXXABI_ENABLE_EXCEPTIONS ON CACHE BOOL "") -set(LIBCXXABI_HERMETIC_STATIC_LIBRARY ON CACHE STRING "") -set(LIBCXXABI_LIBCXX_PATH ${COMPILER_RT_LIBCXX_PATH} CACHE PATH "") -set(LIBCXXABI_INCLUDE_TESTS OFF CACHE BOOL "") -add_subdirectory(${COMPILER_RT_LIBCXXABI_PATH} ${CMAKE_CURRENT_BINARY_DIR}/cxxabi) - -# Build static libcxx without exceptions. -set(LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY OFF CACHE BOOL "") -set(LIBCXX_ENABLE_SHARED OFF CACHE BOOL "") -set(LIBCXX_ENABLE_EXCEPTIONS OFF CACHE BOOL "") -set(LIBCXX_HERMETIC_STATIC_LIBRARY ON CACHE BOOL "") - -# Use above libcxxabi. -set(LIBCXX_CXX_ABI "libcxxabi" CACHE STRING "") -set(LIBCXX_CXX_ABI_INTREE 1) -set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "") -set(LIBCXX_CXX_ABI_INCLUDE_PATHS ${COMPILER_RT_LIBCXXABI_PATH}/include CACHE PATH "") - -add_subdirectory(${COMPILER_RT_LIBCXX_PATH} ${CMAKE_CURRENT_BINARY_DIR}/cxx) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_persistent_allocator.h b/compiler-rt/lib/sanitizer_common/sanitizer_persistent_allocator.h deleted file mode 100644 index e18b0030567f..000000000000 --- a/compiler-rt/lib/sanitizer_common/sanitizer_persistent_allocator.h +++ /dev/null @@ -1,110 +0,0 @@ -//===-- sanitizer_persistent_allocator.h ------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// A fast memory allocator that does not support free() nor realloc(). -// All allocations are forever. -//===----------------------------------------------------------------------===// - -#ifndef SANITIZER_PERSISTENT_ALLOCATOR_H -#define SANITIZER_PERSISTENT_ALLOCATOR_H - -#include "sanitizer_internal_defs.h" -#include "sanitizer_mutex.h" -#include "sanitizer_atomic.h" -#include "sanitizer_common.h" - -namespace __sanitizer { - -template -class PersistentAllocator { - public: - T *alloc(uptr count = 1); - uptr allocated() const { return atomic_load_relaxed(&mapped_size); } - - void TestOnlyUnmap(); - - private: - T *tryAlloc(uptr count); - T *refillAndAlloc(uptr count); - mutable StaticSpinMutex mtx; // Protects alloc of new blocks. - atomic_uintptr_t region_pos; // Region allocator for Node's. - atomic_uintptr_t region_end; - atomic_uintptr_t mapped_size; - - struct BlockInfo { - const BlockInfo *next; - uptr ptr; - uptr size; - }; - const BlockInfo *curr; -}; - -template -inline T *PersistentAllocator::tryAlloc(uptr count) { - // Optimisic lock-free allocation, essentially try to bump the region ptr. - for (;;) { - uptr cmp = atomic_load(®ion_pos, memory_order_acquire); - uptr end = atomic_load(®ion_end, memory_order_acquire); - uptr size = count * sizeof(T); - if (cmp == 0 || cmp + size > end) - return nullptr; - if (atomic_compare_exchange_weak(®ion_pos, &cmp, cmp + size, - memory_order_acquire)) - return reinterpret_cast(cmp); - } -} - -template -inline T *PersistentAllocator::alloc(uptr count) { - // First, try to allocate optimisitically. - T *s = tryAlloc(count); - if (LIKELY(s)) - return s; - return refillAndAlloc(count); -} - -template -inline T *PersistentAllocator::refillAndAlloc(uptr count) { - // If failed, lock, retry and alloc new superblock. - SpinMutexLock l(&mtx); - for (;;) { - T *s = tryAlloc(count); - if (s) - return s; - atomic_store(®ion_pos, 0, memory_order_relaxed); - uptr size = count * sizeof(T) + sizeof(BlockInfo); - uptr allocsz = RoundUpTo(Max(size, 64u * 1024u), GetPageSizeCached()); - uptr mem = (uptr)MmapOrDie(allocsz, "stack depot"); - BlockInfo *new_block = (BlockInfo *)(mem + allocsz) - 1; - new_block->next = curr; - new_block->ptr = mem; - new_block->size = allocsz; - curr = new_block; - - atomic_fetch_add(&mapped_size, allocsz, memory_order_relaxed); - - allocsz -= sizeof(BlockInfo); - atomic_store(®ion_end, mem + allocsz, memory_order_release); - atomic_store(®ion_pos, mem, memory_order_release); - } -} - -template -void PersistentAllocator::TestOnlyUnmap() { - while (curr) { - uptr mem = curr->ptr; - uptr allocsz = curr->size; - curr = curr->next; - UnmapOrDie((void *)mem, allocsz); - } - internal_memset(this, 0, sizeof(*this)); -} - -} // namespace __sanitizer - -#endif // SANITIZER_PERSISTENT_ALLOCATOR_H diff --git a/compiler-rt/lib/tsan/rtl/tsan_update_shadow_word.inc b/compiler-rt/lib/tsan/rtl/tsan_update_shadow_word.inc deleted file mode 100644 index a58ef0f17efa..000000000000 --- a/compiler-rt/lib/tsan/rtl/tsan_update_shadow_word.inc +++ /dev/null @@ -1,59 +0,0 @@ -//===-- tsan_update_shadow_word.inc -----------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of ThreadSanitizer (TSan), a race detector. -// -// Body of the hottest inner loop. -// If we wrap this body into a function, compilers (both gcc and clang) -// produce sligtly less efficient code. -//===----------------------------------------------------------------------===// -do { - const unsigned kAccessSize = 1 << kAccessSizeLog; - u64 *sp = &shadow_mem[idx]; - old = LoadShadow(sp); - if (LIKELY(old.IsZero())) { - if (!stored) { - StoreIfNotYetStored(sp, &store_word); - stored = true; - } - break; - } - // is the memory access equal to the previous? - if (LIKELY(Shadow::Addr0AndSizeAreEqual(cur, old))) { - // same thread? - if (LIKELY(Shadow::TidsAreEqual(old, cur))) { - if (LIKELY(old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic))) { - StoreIfNotYetStored(sp, &store_word); - stored = true; - } - break; - } - if (HappensBefore(old, thr)) { - if (old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic)) { - StoreIfNotYetStored(sp, &store_word); - stored = true; - } - break; - } - if (LIKELY(old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic))) - break; - goto RACE; - } - // Do the memory access intersect? - if (Shadow::TwoRangesIntersect(old, cur, kAccessSize)) { - if (Shadow::TidsAreEqual(old, cur)) - break; - if (old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic)) - break; - if (LIKELY(HappensBefore(old, thr))) - break; - goto RACE; - } - // The accesses do not intersect. - break; -} while (0); diff --git a/compiler-rt/test/fuzzer/EntropicScalePerExecTimeTest.cpp b/compiler-rt/test/fuzzer/EntropicScalePerExecTimeTest.cpp deleted file mode 100644 index 44d58a087453..000000000000 --- a/compiler-rt/test/fuzzer/EntropicScalePerExecTimeTest.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -// Tests whether scaling the Entropic scheduling weight based on input execution -// time is effective or not. Inputs of size less than 7 will take at least 100 -// microseconds more than inputs of size greater than or equal to 7. Inputs of -// size greater than 7 in the corpus should be favored by the exec-time-scaled -// Entropic scheduling policy than the input of size less than 7 in the corpus, -// eventually finding the crashing input with less executions. -#include -#include -#include - -static volatile int Sink; -static volatile int *Nil = nullptr; - -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { - if (Size > 10) - return 0; // To make the test quicker. - - if (Size < 7) { - // execute a lot slower than the crashing input below. - size_t ExecTimeUSec = 100; - std::this_thread::sleep_for(std::chrono::microseconds(ExecTimeUSec)); - Sink = 7; - - if (Size > 0 && Data[0] == 0xaa && Size > 1 && Data[1] == 0xbb && - Size > 2 && Data[2] == 0xcc && Size > 3 && Data[3] == 0xdd && - Size > 4 && Data[4] == 0xee && Size > 5 && Data[5] == 0xff) - Sink += 7; - } - - // Give unique coverage for each input of size (7, 8, 9, 10) - if (Size == 7) - Sink = -7; - - if (Size == 8) - Sink = -8; - - if (Size == 9) - Sink = -9; - - if (Size == 10) - Sink = -10; - - if (Sink < 0 && Data[0] == 0xab && Data[1] == 0xcd) - *Nil = 42; // crash. - - return 0; -} diff --git a/compiler-rt/test/fuzzer/entropic-scale-per-exec-time.test b/compiler-rt/test/fuzzer/entropic-scale-per-exec-time.test deleted file mode 100644 index d8baf59da97a..000000000000 --- a/compiler-rt/test/fuzzer/entropic-scale-per-exec-time.test +++ /dev/null @@ -1,8 +0,0 @@ -REQUIRES: linux, x86_64 -RUN: %cpp_compiler %S/EntropicScalePerExecTimeTest.cpp -o %t-EntropicScalePerExecTimeTest -RUN: not %run %t-EntropicScalePerExecTimeTest -entropic=1 -entropic_scale_per_exec_time=1 -seed=1 -runs=200000 -max_len=10 - -# Without -entropic_scale_per_exec_time=1, the crash takes longer to find since -# the slow path is explored first. This test is disabled because it sometimes -# finds the bug under certain configs. -DISABLED: %run %t-EntropicScalePerExecTimeTest -entropic=1 -seed=1 -runs=200000 -max_len=10 diff --git a/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp b/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp deleted file mode 100644 index c253855fbf03..000000000000 --- a/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp +++ /dev/null @@ -1,10 +0,0 @@ -// Check mem_info_cache_entries option. - -// RUN: %clangxx_memprof -O0 %s -o %t && %env_memprof_opts=log_path=stderr:mem_info_cache_entries=15:print_mem_info_cache_miss_rate=1:print_mem_info_cache_miss_rate_details=1 %run %t 2>&1 | FileCheck %s - -// CHECK: Set 14 miss rate: 0 / {{.*}} = 0.00% -// CHECK-NOT: Set - -int main() { - return 0; -} diff --git a/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp b/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp deleted file mode 100644 index e32a0de0d913..000000000000 --- a/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// Check print_mem_info_cache_miss_rate and -// print_mem_info_cache_miss_rate_details options. - -// RUN: %clangxx_memprof -O0 %s -o %t -// RUN: %env_memprof_opts=log_path=stderr:print_mem_info_cache_miss_rate=1 %run %t 2>&1 | FileCheck %s -// RUN: %env_memprof_opts=log_path=stderr:print_mem_info_cache_miss_rate=1:print_mem_info_cache_miss_rate_details=1 %run %t 2>&1 | FileCheck %s --check-prefix=DETAILS - -// CHECK: Overall miss rate: 0 / {{.*}} = 0.00% -// DETAILS: Set 0 miss rate: 0 / {{.*}} = 0.00% -// DETAILS: Set 16380 miss rate: 0 / {{.*}} = 0.00% - -int main() { - return 0; -} diff --git a/compiler-rt/test/ubsan/TestCases/Pointer/alignment-assumption-ignorelist.cppp b/compiler-rt/test/ubsan/TestCases/Pointer/alignment-assumption-ignorelist.cppp deleted file mode 100644 index e5c166497dcf..000000000000 --- a/compiler-rt/test/ubsan/TestCases/Pointer/alignment-assumption-ignorelist.cppp +++ /dev/null @@ -1,20 +0,0 @@ -// RUN: %clang -fsanitize=alignment -fno-sanitize-recover=alignment -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s --implicit-check-not=" assumption " - -// RUN: rm -f %tmp -// RUN: echo "[alignment]" >> %tmp -// RUN: echo "fun:main" >> %tmp -// RUN: %clang -fsanitize=alignment -fno-sanitize-recover=alignment -fsanitize-ignorelist=%tmp -O0 %s -o %t && %run %t 2>&1 - -#include - -int main(int argc, char* argv[]) { - char *ptr = (char *)malloc(2); - - __builtin_assume_aligned(ptr + 1, 0x8000); - // CHECK: {{.*}}alignment-assumption-ignorelist.cpp:[[@LINE-1]]:32: runtime error: assumption of 32768 byte alignment for pointer of type 'char *' failed - // CHECK: 0x{{.*}}: note: address is {{.*}} aligned, misalignment offset is {{.*}} byte - - free(ptr); - - return 0; -} diff --git a/libcxx/cmake/caches/Generic-32bits.cmake b/libcxx/cmake/caches/Generic-32bits.cmake deleted file mode 100644 index ae7b2ac3e8d8..000000000000 --- a/libcxx/cmake/caches/Generic-32bits.cmake +++ /dev/null @@ -1 +0,0 @@ -set(LLVM_BUILD_32_BITS ON CACHE BOOL "") diff --git a/libcxx/include/__memory/pointer_safety.h b/libcxx/include/__memory/pointer_safety.h deleted file mode 100644 index e72080393dc9..000000000000 --- a/libcxx/include/__memory/pointer_safety.h +++ /dev/null @@ -1,52 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP___MEMORY_POINTER_SAFETY_H -#define _LIBCPP___MEMORY_POINTER_SAFETY_H - -#include <__config> -#include - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -#pragma GCC system_header -#endif - -_LIBCPP_BEGIN_NAMESPACE_STD - -#if !defined(_LIBCPP_CXX03_LANG) - -enum class pointer_safety : unsigned char { - relaxed, - preferred, - strict -}; - -inline _LIBCPP_INLINE_VISIBILITY -pointer_safety get_pointer_safety() _NOEXCEPT { - return pointer_safety::relaxed; -} - -_LIBCPP_FUNC_VIS void declare_reachable(void* __p); -_LIBCPP_FUNC_VIS void declare_no_pointers(char* __p, size_t __n); -_LIBCPP_FUNC_VIS void undeclare_no_pointers(char* __p, size_t __n); -_LIBCPP_FUNC_VIS void* __undeclare_reachable(void* __p); - -template -inline _LIBCPP_INLINE_VISIBILITY -_Tp* -undeclare_reachable(_Tp* __p) -{ - return static_cast<_Tp*>(__undeclare_reachable(__p)); -} - -#endif // !C++03 - -_LIBCPP_END_NAMESPACE_STD - -#endif // _LIBCPP___MEMORY_POINTER_SAFETY_H diff --git a/libcxx/test/libcxx/atomics/ext-int.verify.cpp b/libcxx/test/libcxx/atomics/ext-int.verify.cpp deleted file mode 100644 index 90034426fada..000000000000 --- a/libcxx/test/libcxx/atomics/ext-int.verify.cpp +++ /dev/null @@ -1,28 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// Make sure that `std::atomic` doesn't work with `_ExtInt`. The intent is to -// disable them for now until their behavior can be designed better later. -// See https://reviews.llvm.org/D84049 for details. - -// UNSUPPORTED: apple-clang-12 - -// UNSUPPORTED: libcpp-has-no-threads -// UNSUPPORTED: c++03 - -#include - -int main(int, char**) -{ - // expected-error@atomic:*1 {{_Atomic cannot be applied to integer type '_ExtInt(32)'}} - std::atomic<_ExtInt(32)> x(42); - - return 0; -} diff --git a/libcxx/test/libcxx/atomics/libcpp-has-no-threads.compile.fail.cpp b/libcxx/test/libcxx/atomics/libcpp-has-no-threads.compile.fail.cpp deleted file mode 100644 index 9c6a8f7f55f6..000000000000 --- a/libcxx/test/libcxx/atomics/libcpp-has-no-threads.compile.fail.cpp +++ /dev/null @@ -1,22 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// Test that including fails to compile when _LIBCPP_HAS_NO_THREADS -// is defined. - -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_HAS_NO_THREADS - -#include - -int main(int, char**) -{ - - return 0; -} diff --git a/libcxx/test/libcxx/atomics/libcpp-has-no-threads.pass.cpp b/libcxx/test/libcxx/atomics/libcpp-has-no-threads.pass.cpp deleted file mode 100644 index b52ba6425223..000000000000 --- a/libcxx/test/libcxx/atomics/libcpp-has-no-threads.pass.cpp +++ /dev/null @@ -1,19 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// XFAIL: libcpp-has-no-threads - -#ifdef _LIBCPP_HAS_NO_THREADS -#error This should be XFAILed for the purpose of detecting that the LIT feature\ - 'libcpp-has-no-threads' is available iff _LIBCPP_HAS_NO_THREADS is defined -#endif - -int main(int, char**) -{ - - return 0; -} diff --git a/libcxx/test/std/algorithms/robust_against_adl.pass.cpp b/libcxx/test/std/algorithms/robust_against_adl.pass.cpp deleted file mode 100644 index fca6f98f8b0a..000000000000 --- a/libcxx/test/std/algorithms/robust_against_adl.pass.cpp +++ /dev/null @@ -1,183 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 - -// - -#include -#include - -#include "test_macros.h" - -struct Incomplete; -template struct Holder { T t; }; - -template -struct Intable { - TEST_CONSTEXPR operator int() const { return 1; } -}; - -struct Tester { - using Element = Holder*; - Element data[10]; -}; - -TEST_CONSTEXPR_CXX20 bool test() -{ - Tester t {}; - Tester u {}; - Tester::Element value = nullptr; - Intable count; - - // THESE RELY ON ADL SWAP IN PRACTICE: - // swap_ranges, iter_swap, reverse, rotate, partition - // sort, nth_element - // pop_heap, sort_heap, partial_sort, partial_sort_copy - // next_permutation, prev_permutation - // stable_partition, stable_sort, inplace_merge - // THESE RELY ON ADL SWAP IN THEORY: - // push_heap, make_heap - - (void)std::all_of(t.data, t.data+10, [](void*){ return true; }); - (void)std::any_of(t.data, t.data+10, [](void*){ return true; }); - (void)std::copy(t.data, t.data+10, u.data); - (void)std::copy_n(t.data, count, u.data); - (void)std::copy_backward(t.data, t.data+10, u.data+10); - (void)std::count(t.data, t.data+10, value); - (void)std::count_if(t.data, t.data+10, [](void*){ return true; }); - (void)std::distance(t.data, t.data+10); - (void)std::fill(t.data, t.data+10, value); - (void)std::fill_n(t.data, count, value); - (void)std::find_if(t.data, t.data+10, [](void*){ return true; }); - (void)std::find_if_not(t.data, t.data+10, [](void*){ return true; }); - (void)std::for_each(t.data, t.data+10, [](void*){}); -#if TEST_STD_VER >= 17 - (void)std::for_each_n(t.data, count, [](void*){}); -#endif - (void)std::generate(t.data, t.data+10, [](){ return nullptr; }); - (void)std::generate_n(t.data, count, [](){ return nullptr; }); - (void)std::is_partitioned(t.data, t.data+10, [](void*){ return true; }); - (void)std::move(t.data, t.data+10, u.data); - (void)std::move_backward(t.data, t.data+10, u.data+10); - (void)std::none_of(t.data, t.data+10, [](void*){ return true; }); - (void)std::partition_copy(t.data, t.data+5, u.data, u.data+5, [](void*){ return true; }); - (void)std::partition_point(t.data, t.data+10, [](void*){ return true; }); - (void)std::remove(t.data, t.data+10, value); - (void)std::remove_copy(t.data, t.data+10, u.data, value); - (void)std::remove_copy_if(t.data, t.data+10, u.data, [](void*){ return true; }); - (void)std::remove_if(t.data, t.data+10, [](void*){ return true; }); - (void)std::replace(t.data, t.data+10, value, value); - (void)std::replace_copy(t.data, t.data+10, u.data, value, value); - (void)std::replace_copy_if(t.data, t.data+10, u.data, [](void*){ return true; }, value); - (void)std::replace_if(t.data, t.data+10, [](void*){ return true; }, value); - (void)std::reverse_copy(t.data, t.data+10, u.data); - (void)std::rotate_copy(t.data, t.data+5, t.data+10, u.data); - // TODO: shift_left - // TODO: shift_right - (void)std::transform(t.data, t.data+10, u.data, [](void*){ return nullptr; }); - - // WITHOUT COMPARATORS - (void)std::adjacent_find(t.data, t.data+10); - (void)std::binary_search(t.data, t.data+10, t.data[5]); - (void)std::equal(t.data, t.data+10, u.data); - (void)std::equal_range(t.data, t.data+10, t.data[5]); - (void)std::find_end(t.data, t.data+10, u.data, u.data+5); - (void)std::includes(t.data, t.data+10, u.data, u.data+10); - (void)std::is_heap(t.data, t.data+10); - (void)std::is_heap_until(t.data, t.data+10); - (void)std::is_permutation(t.data, t.data+10, u.data); - (void)std::is_sorted(t.data, t.data+10); - (void)std::is_sorted_until(t.data, t.data+10); - (void)std::lexicographical_compare(t.data, t.data+10, u.data, u.data+10); - // TODO: lexicographical_compare_three_way - (void)std::lower_bound(t.data, t.data+10, t.data[5]); - (void)std::max(value, value); - (void)std::max({ value, value }); - (void)std::max_element(t.data, t.data+10); - (void)std::merge(t.data, t.data+5, t.data+5, t.data+10, u.data); - (void)std::min(value, value); - (void)std::min({ value, value }); - (void)std::min_element(t.data, t.data+10); - (void)std::minmax(value, value); - (void)std::minmax({ value, value }); - (void)std::minmax_element(t.data, t.data+10); - (void)std::mismatch(t.data, t.data+10, u.data); - (void)std::search(t.data, t.data+10, u.data, u.data+5); - (void)std::search_n(t.data, t.data+10, count, value); - (void)std::set_difference(t.data, t.data+5, t.data+5, t.data+10, u.data); - (void)std::set_intersection(t.data, t.data+5, t.data+5, t.data+10, u.data); - (void)std::set_symmetric_difference(t.data, t.data+5, t.data+5, t.data+10, u.data); - (void)std::set_union(t.data, t.data+5, t.data+5, t.data+10, u.data); - (void)std::unique(t.data, t.data+10); - (void)std::unique_copy(t.data, t.data+10, u.data); - (void)std::upper_bound(t.data, t.data+10, t.data[5]); -#if TEST_STD_VER >= 14 - (void)std::equal(t.data, t.data+10, u.data, u.data+10); - (void)std::is_permutation(t.data, t.data+10, u.data, u.data+10); - (void)std::mismatch(t.data, t.data+10, u.data, u.data+10); -#endif -#if TEST_STD_VER >= 20 - (void)std::clamp(value, value, value); -#endif - - // WITH COMPARATORS - (void)std::adjacent_find(t.data, t.data+10, std::equal_to()); - (void)std::binary_search(t.data, t.data+10, value, std::less()); - (void)std::equal(t.data, t.data+10, u.data, std::equal_to()); - (void)std::equal_range(t.data, t.data+10, value, std::less()); - (void)std::find_end(t.data, t.data+10, u.data, u.data+5, std::equal_to()); - (void)std::includes(t.data, t.data+10, u.data, u.data+10, std::less()); - (void)std::is_heap(t.data, t.data+10, std::less()); - (void)std::is_heap_until(t.data, t.data+10, std::less()); - (void)std::is_permutation(t.data, t.data+10, u.data, std::equal_to()); - (void)std::is_sorted(t.data, t.data+10, std::less()); - (void)std::is_sorted_until(t.data, t.data+10, std::less()); - (void)std::lexicographical_compare(t.data, t.data+10, u.data, u.data+10, std::less()); - // TODO: lexicographical_compare_three_way - (void)std::lower_bound(t.data, t.data+10, value, std::less()); - (void)std::max(value, value, std::less()); - (void)std::max({ value, value }, std::less()); - (void)std::max_element(t.data, t.data+10, std::less()); - (void)std::merge(t.data, t.data+5, t.data+5, t.data+10, u.data, std::less()); - (void)std::min(value, value, std::less()); - (void)std::min({ value, value }, std::less()); - (void)std::min_element(t.data, t.data+10, std::less()); - (void)std::minmax(value, value, std::less()); - (void)std::minmax({ value, value }, std::less()); - (void)std::minmax_element(t.data, t.data+10, std::less()); - (void)std::mismatch(t.data, t.data+10, u.data, std::equal_to()); - (void)std::search(t.data, t.data+10, u.data, u.data+5, std::equal_to()); - (void)std::search_n(t.data, t.data+10, count, value, std::equal_to()); - (void)std::set_difference(t.data, t.data+5, t.data+5, t.data+10, u.data, std::less()); - (void)std::set_intersection(t.data, t.data+5, t.data+5, t.data+10, u.data, std::less()); - (void)std::set_symmetric_difference(t.data, t.data+5, t.data+5, t.data+10, u.data, std::less()); - (void)std::set_union(t.data, t.data+5, t.data+5, t.data+10, u.data, std::less()); - (void)std::unique(t.data, t.data+10, std::equal_to()); - (void)std::unique_copy(t.data, t.data+10, u.data, std::equal_to()); - (void)std::upper_bound(t.data, t.data+10, value, std::less()); -#if TEST_STD_VER >= 14 - (void)std::equal(t.data, t.data+10, u.data, u.data+10, std::equal_to()); - (void)std::is_permutation(t.data, t.data+10, u.data, u.data+10, std::equal_to()); - (void)std::mismatch(t.data, t.data+10, u.data, u.data+10, std::equal_to()); -#endif -#if TEST_STD_VER >= 20 - (void)std::clamp(value, value, value, std::less()); -#endif - - return true; -} - -int main(int, char**) -{ - test(); -#if TEST_STD_VER >= 20 - static_assert(test()); -#endif - return 0; -} diff --git a/libcxx/test/std/atomics/atomics.types.generic/trivially_copyable.compile.fail.cpp b/libcxx/test/std/atomics/atomics.types.generic/trivially_copyable.compile.fail.cpp deleted file mode 100644 index 3ec8ed25c976..000000000000 --- a/libcxx/test/std/atomics/atomics.types.generic/trivially_copyable.compile.fail.cpp +++ /dev/null @@ -1,71 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// template -// struct atomic -// { -// bool is_lock_free() const volatile noexcept; -// bool is_lock_free() const noexcept; -// void store(T desr, memory_order m = memory_order_seq_cst) volatile noexcept; -// void store(T desr, memory_order m = memory_order_seq_cst) noexcept; -// T load(memory_order m = memory_order_seq_cst) const volatile noexcept; -// T load(memory_order m = memory_order_seq_cst) const noexcept; -// operator T() const volatile noexcept; -// operator T() const noexcept; -// T exchange(T desr, memory_order m = memory_order_seq_cst) volatile noexcept; -// T exchange(T desr, memory_order m = memory_order_seq_cst) noexcept; -// bool compare_exchange_weak(T& expc, T desr, -// memory_order s, memory_order f) volatile noexcept; -// bool compare_exchange_weak(T& expc, T desr, memory_order s, memory_order f) noexcept; -// bool compare_exchange_strong(T& expc, T desr, -// memory_order s, memory_order f) volatile noexcept; -// bool compare_exchange_strong(T& expc, T desr, -// memory_order s, memory_order f) noexcept; -// bool compare_exchange_weak(T& expc, T desr, -// memory_order m = memory_order_seq_cst) volatile noexcept; -// bool compare_exchange_weak(T& expc, T desr, -// memory_order m = memory_order_seq_cst) noexcept; -// bool compare_exchange_strong(T& expc, T desr, -// memory_order m = memory_order_seq_cst) volatile noexcept; -// bool compare_exchange_strong(T& expc, T desr, -// memory_order m = memory_order_seq_cst) noexcept; -// -// atomic() noexcept = default; -// constexpr atomic(T desr) noexcept; -// atomic(const atomic&) = delete; -// atomic& operator=(const atomic&) = delete; -// atomic& operator=(const atomic&) volatile = delete; -// T operator=(T) volatile noexcept; -// T operator=(T) noexcept; -// }; - -#include -#include -#include -#include // for thread_id -#include // for nanoseconds - -struct NotTriviallyCopyable { - NotTriviallyCopyable ( int i ) : i_(i) {} - NotTriviallyCopyable ( const NotTriviallyCopyable &rhs) : i_(rhs.i_) {} - int i_; -}; - -template -void test ( T t ) { - std::atomic t0(t); -} - -int main(int, char**) -{ - test(NotTriviallyCopyable(42)); - - return 0; -} diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_value.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_value.pass.cpp deleted file mode 100644 index 2edadd0fe777..000000000000 --- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_value.pass.cpp +++ /dev/null @@ -1,98 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// iterator insert(const_iterator position, const value_type& x); - -#include -#include -#include - -#include "test_macros.h" -#include "test_allocator.h" -#include "min_allocator.h" -#include "asan_testing.h" - -int main(int, char**) -{ - { - std::vector v(100); - std::vector::iterator i = v.insert(v.cbegin() + 10, 1); - assert(v.size() == 101); - assert(is_contiguous_container_asan_correct(v)); - assert(i == v.begin() + 10); - int j; - for (j = 0; j < 10; ++j) - assert(v[j] == 0); - assert(v[j] == 1); - for (++j; j < 101; ++j) - assert(v[j] == 0); - } - { - std::vector v(100); - while(v.size() < v.capacity()) v.push_back(0); // force reallocation - size_t sz = v.size(); - std::vector::iterator i = v.insert(v.cbegin() + 10, 1); - assert(v.size() == sz + 1); - assert(is_contiguous_container_asan_correct(v)); - assert(i == v.begin() + 10); - std::size_t j; - for (j = 0; j < 10; ++j) - assert(v[j] == 0); - assert(v[j] == 1); - for (++j; j < v.size(); ++j) - assert(v[j] == 0); - } - { - std::vector v(100); - while(v.size() < v.capacity()) v.push_back(0); - v.pop_back(); v.pop_back(); // force no reallocation - size_t sz = v.size(); - std::vector::iterator i = v.insert(v.cbegin() + 10, 1); - assert(v.size() == sz + 1); - assert(is_contiguous_container_asan_correct(v)); - assert(i == v.begin() + 10); - std::size_t j; - for (j = 0; j < 10; ++j) - assert(v[j] == 0); - assert(v[j] == 1); - for (++j; j < v.size(); ++j) - assert(v[j] == 0); - } - { - std::vector > v(100); - std::vector >::iterator i = v.insert(v.cbegin() + 10, 1); - assert(v.size() == 101); - assert(is_contiguous_container_asan_correct(v)); - assert(i == v.begin() + 10); - int j; - for (j = 0; j < 10; ++j) - assert(v[j] == 0); - assert(v[j] == 1); - for (++j; j < 101; ++j) - assert(v[j] == 0); - } -#if TEST_STD_VER >= 11 - { - std::vector> v(100); - std::vector>::iterator i = v.insert(v.cbegin() + 10, 1); - assert(v.size() == 101); - assert(is_contiguous_container_asan_correct(v)); - assert(i == v.begin() + 10); - int j; - for (j = 0; j < 10; ++j) - assert(v[j] == 0); - assert(v[j] == 1); - for (++j; j < 101; ++j) - assert(v[j] == 0); - } -#endif - - return 0; -} diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/charconv.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/charconv.pass.cpp deleted file mode 100644 index 2afe2e26ba55..000000000000 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/charconv.pass.cpp +++ /dev/null @@ -1,33 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// feature macros - -/* Constant Value - __cpp_lib_to_chars 201611L - -*/ - -#include -#include -#include "test_macros.h" - -int main(int, char**) -{ -// ensure that the macros that are supposed to be defined in are defined. - -/* -#if !defined(__cpp_lib_fooby) -# error "__cpp_lib_fooby is not defined" -#elif __cpp_lib_fooby < 201606L -# error "__cpp_lib_fooby has an invalid value" -#endif -*/ - - return 0; -} diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/memory_resource.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/memory_resource.version.pass.cpp deleted file mode 100644 index d712a8bca8d1..000000000000 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/memory_resource.version.pass.cpp +++ /dev/null @@ -1,35 +0,0 @@ - -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// feature macros - -/* Constant Value - __cpp_lib_memory_resource 201603L - -*/ - -// XFAIL -// #include -#include -#include "test_macros.h" - -int main(int, char**) -{ -// ensure that the macros that are supposed to be defined in are defined. - -/* -#if !defined(__cpp_lib_fooby) -# error "__cpp_lib_fooby is not defined" -#elif __cpp_lib_fooby < 201606L -# error "__cpp_lib_fooby has an invalid value" -#endif -*/ - - return 0; -} diff --git a/libcxx/test/std/numerics/c.math/abs.fail.cpp b/libcxx/test/std/numerics/c.math/abs.fail.cpp deleted file mode 100644 index d58cf0d563d1..000000000000 --- a/libcxx/test/std/numerics/c.math/abs.fail.cpp +++ /dev/null @@ -1,31 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -#include "test_macros.h" - -int main(int, char**) -{ - unsigned int ui = -5; - ui = std::abs(ui); // expected-error {{call to 'abs' is ambiguous}} - - unsigned char uc = -5; - uc = std::abs(uc); // expected-warning {{taking the absolute value of unsigned type 'unsigned char' has no effect}} - - unsigned short us = -5; - us = std::abs(us); // expected-warning {{taking the absolute value of unsigned type 'unsigned short' has no effect}} - - unsigned long ul = -5; - ul = std::abs(ul); // expected-error {{call to 'abs' is ambiguous}} - - unsigned long long ull = -5; - ull = ::abs(ull); // expected-error {{call to 'abs' is ambiguous}} - - return 0; -} diff --git a/libcxx/test/std/strings/string.view/string.view.cons/deduct.pass.cpp b/libcxx/test/std/strings/string.view/string.view.cons/deduct.pass.cpp deleted file mode 100644 index e0ff5f6c0cd5..000000000000 --- a/libcxx/test/std/strings/string.view/string.view.cons/deduct.pass.cpp +++ /dev/null @@ -1,49 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-no-concepts - -// - -// template _End> -// basic_string_view(_It, _End) -> basic_string_view>; - -#include -#include - -#include "make_string.h" -#include "test_macros.h" -#include "test_iterators.h" - -template -constexpr void test() { - auto val = MAKE_STRING_VIEW(CharT, "test"); - auto sv = std::basic_string_view(val.begin(), Sentinel(val.end())); - ASSERT_SAME_TYPE(decltype(sv), std::basic_string_view); - assert(sv.size() == val.size()); - assert(sv.data() == val.data()); -} - -constexpr void test() { - test(); -#ifndef TEST_HAS_NO_WIDE_CHARACTERS - test(); -#endif - test(); - test(); - test(); - test(); - test>(); -} - -int main(int, char**) { - test(); - - return 0; -} - diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.fail.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.fail.cpp deleted file mode 100644 index 234efc83423b..000000000000 --- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.fail.cpp +++ /dev/null @@ -1,50 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14 - -// Throwing bad_any_cast is supported starting in macosx10.13 -// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} - -// - -// template -// ValueType any_cast(any const &); - -// Try and cast away const. - -#include - -struct TestType {}; -struct TestType2 {}; - -int main(int, char**) -{ - using std::any; - using std::any_cast; - - any a; - - // expected-error@any:* {{drops 'const' qualifier}} - // expected-error-re@any:* {{static_assert failed{{.*}} "ValueType is required to be a const lvalue reference or a CopyConstructible type"}} - any_cast(static_cast(a)); // expected-note {{requested here}} - - // expected-error@any:* {{cannot cast from lvalue of type 'const TestType' to rvalue reference type 'TestType &&'; types are not compatible}} - // expected-error-re@any:* {{static_assert failed{{.*}} "ValueType is required to be a const lvalue reference or a CopyConstructible type"}} - any_cast(static_cast(a)); // expected-note {{requested here}} - - // expected-error@any:* {{drops 'const' qualifier}} - // expected-error-re@any:* {{static_assert failed{{.*}} "ValueType is required to be a const lvalue reference or a CopyConstructible type"}} - any_cast(static_cast(a)); // expected-note {{requested here}} - - // expected-error@any:* {{cannot cast from lvalue of type 'const TestType2' to rvalue reference type 'TestType2 &&'; types are not compatible}} - // expected-error-re@any:* {{static_assert failed{{.*}} "ValueType is required to be a const lvalue reference or a CopyConstructible type"}} - any_cast(static_cast(a)); // expected-note {{requested here}} - - return 0; -} diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.fail.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.fail.cpp deleted file mode 100644 index 44a67f7aa03d..000000000000 --- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.fail.cpp +++ /dev/null @@ -1,62 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14 - -// Throwing bad_any_cast is supported starting in macosx10.13 -// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} - -// - -// template -// ValueType const any_cast(any const&); -// -// template -// ValueType any_cast(any &); -// -// template -// ValueType any_cast(any &&); - -// Test instantiating the any_cast with a non-copyable type. - -#include - -using std::any; -using std::any_cast; - -struct no_copy -{ - no_copy() {} - no_copy(no_copy &&) {} - no_copy(no_copy const &) = delete; -}; - -struct no_move { - no_move() {} - no_move(no_move&&) = delete; - no_move(no_move const&) {} -}; - -int main(int, char**) { - any a; - // expected-error-re@any:* {{static_assert failed{{.*}} "ValueType is required to be an lvalue reference or a CopyConstructible type"}} - // expected-error@any:* {{static_cast from 'no_copy' to 'no_copy' uses deleted function}} - any_cast(static_cast(a)); // expected-note {{requested here}} - - // expected-error-re@any:* {{static_assert failed{{.*}} "ValueType is required to be a const lvalue reference or a CopyConstructible type"}} - // expected-error@any:* {{static_cast from 'const no_copy' to 'no_copy' uses deleted function}} - any_cast(static_cast(a)); // expected-note {{requested here}} - - any_cast(static_cast(a)); // OK - - // expected-error-re@any:* {{static_assert failed{{.*}} "ValueType is required to be an rvalue reference or a CopyConstructible type"}} - // expected-error@any:* {{static_cast from 'typename remove_reference::type' (aka 'no_move') to 'no_move' uses deleted function}} - any_cast(static_cast(a)); - - return 0; -} diff --git a/libcxx/test/std/utilities/memory/util.dynamic.safety/declare_no_pointers.pass.cpp b/libcxx/test/std/utilities/memory/util.dynamic.safety/declare_no_pointers.pass.cpp deleted file mode 100644 index 562f0058d672..000000000000 --- a/libcxx/test/std/utilities/memory/util.dynamic.safety/declare_no_pointers.pass.cpp +++ /dev/null @@ -1,28 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 - -// - -// void declare_no_pointers(char* p, size_t n); -// void undeclare_no_pointers(char* p, size_t n); - -#include - -#include "test_macros.h" - -int main(int, char**) -{ - char* p = new char[10]; - std::declare_no_pointers(p, 10); - std::undeclare_no_pointers(p, 10); - delete [] p; - - return 0; -} diff --git a/libcxx/test/std/utilities/memory/util.dynamic.safety/declare_reachable.pass.cpp b/libcxx/test/std/utilities/memory/util.dynamic.safety/declare_reachable.pass.cpp deleted file mode 100644 index 08d06d582212..000000000000 --- a/libcxx/test/std/utilities/memory/util.dynamic.safety/declare_reachable.pass.cpp +++ /dev/null @@ -1,29 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 - -// - -// void declare_reachable(void* p); -// template T* undeclare_reachable(T* p); - -#include -#include - -#include "test_macros.h" - -int main(int, char**) -{ - int* p = new int; - std::declare_reachable(p); - assert(std::undeclare_reachable(p) == p); - delete p; - - return 0; -} diff --git a/libcxx/test/std/utilities/memory/util.dynamic.safety/get_pointer_safety.pass.cpp b/libcxx/test/std/utilities/memory/util.dynamic.safety/get_pointer_safety.pass.cpp deleted file mode 100644 index e6125721761b..000000000000 --- a/libcxx/test/std/utilities/memory/util.dynamic.safety/get_pointer_safety.pass.cpp +++ /dev/null @@ -1,44 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// pointer_safety get_pointer_safety(); - -// UNSUPPORTED: c++03 - -#include -#include - -#include "test_macros.h" - -int main(int, char**) -{ - { - static_assert(std::is_enum::value, ""); - static_assert(!std::is_convertible::value, ""); - static_assert(std::is_same< - std::underlying_type::type, - unsigned char - >::value, ""); - } - { - std::pointer_safety r = std::get_pointer_safety(); - assert(r == std::pointer_safety::relaxed || - r == std::pointer_safety::preferred || - r == std::pointer_safety::strict); - } - // Regression test for https://llvm.org/PR26961 - { - std::pointer_safety d; - d = std::get_pointer_safety(); - assert(d == std::get_pointer_safety()); - } - - return 0; -} diff --git a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of.deprecated.fail.cpp b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of.deprecated.fail.cpp deleted file mode 100644 index aece06674cf0..000000000000 --- a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of.deprecated.fail.cpp +++ /dev/null @@ -1,22 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS - -// type_traits - -// result_of - -#include - -#include "test_macros.h" - -int main(int, char**) { - [[maybe_unused]] std::result_of a; // expected-warning {{'result_of' is deprecated}} -} diff --git a/libcxx/test/support/coroutine_types.h b/libcxx/test/support/coroutine_types.h deleted file mode 100644 index 04b27a161eb3..000000000000 --- a/libcxx/test/support/coroutine_types.h +++ /dev/null @@ -1,74 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef SUPPORT_COROUTINE_TYPES_H -#define SUPPORT_COROUTINE_TYPES_H - -#include - -template struct generator { - struct promise_type { - Ty current_value; - std::experimental::suspend_always yield_value(Ty value) { - this->current_value = value; - return {}; - } - std::experimental::suspend_always initial_suspend() { return {}; } - std::experimental::suspend_always final_suspend() noexcept { return {}; } - generator get_return_object() { return generator{this}; }; - void return_void() {} - void unhandled_exception() {} - }; - - struct iterator { - std::experimental::coroutine_handle _Coro; - bool _Done; - - iterator(std::experimental::coroutine_handle Coro, bool Done) - : _Coro(Coro), _Done(Done) {} - - iterator &operator++() { - _Coro.resume(); - _Done = _Coro.done(); - return *this; - } - - bool operator==(iterator const &_Right) const { - return _Done == _Right._Done; - } - - bool operator!=(iterator const &_Right) const { return !(*this == _Right); } - - Ty const &operator*() const { return _Coro.promise().current_value; } - - Ty const *operator->() const { return &(operator*()); } - }; - - iterator begin() { - p.resume(); - return {p, p.done()}; - } - - iterator end() { return {p, true}; } - - generator(generator &&rhs) : p(rhs.p) { rhs.p = nullptr; } - - ~generator() { - if (p) - p.destroy(); - } - -private: - explicit generator(promise_type *p) - : p(std::experimental::coroutine_handle::from_promise(*p)) {} - - std::experimental::coroutine_handle p; -}; - -#endif // SUPPORT_COROUTINE_TYPES_H diff --git a/libcxx/test/support/tracked_value.h b/libcxx/test/support/tracked_value.h deleted file mode 100644 index 01b8c840d19b..000000000000 --- a/libcxx/test/support/tracked_value.h +++ /dev/null @@ -1,59 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#ifndef SUPPORT_TRACKED_VALUE_H -#define SUPPORT_TRACKED_VALUE_H - -#include - -#include "test_macros.h" - -struct TrackedValue { - enum State { CONSTRUCTED, MOVED_FROM, DESTROYED }; - State state; - - TrackedValue() : state(State::CONSTRUCTED) {} - - TrackedValue(TrackedValue const& t) : state(State::CONSTRUCTED) { - assert(t.state != State::MOVED_FROM && "copying a moved-from object"); - assert(t.state != State::DESTROYED && "copying a destroyed object"); - } - -#if TEST_STD_VER >= 11 - TrackedValue(TrackedValue&& t) : state(State::CONSTRUCTED) { - assert(t.state != State::MOVED_FROM && "double moving from an object"); - assert(t.state != State::DESTROYED && "moving from a destroyed object"); - t.state = State::MOVED_FROM; - } -#endif - - TrackedValue& operator=(TrackedValue const& t) { - assert(state != State::DESTROYED && "copy assigning into destroyed object"); - assert(t.state != State::MOVED_FROM && "copying a moved-from object"); - assert(t.state != State::DESTROYED && "copying a destroyed object"); - state = t.state; - return *this; - } - -#if TEST_STD_VER >= 11 - TrackedValue& operator=(TrackedValue&& t) { - assert(state != State::DESTROYED && "move assigning into destroyed object"); - assert(t.state != State::MOVED_FROM && "double moving from an object"); - assert(t.state != State::DESTROYED && "moving from a destroyed object"); - state = t.state; - t.state = State::MOVED_FROM; - return *this; - } -#endif - - ~TrackedValue() { - assert(state != State::DESTROYED && "double-destroying an object"); - state = State::DESTROYED; - } -}; - -#endif // SUPPORT_TRACKED_VALUE_H diff --git a/libcxx/utils/google-benchmark/.clang-format b/libcxx/utils/google-benchmark/.clang-format deleted file mode 100644 index e7d00feaa08a..000000000000 --- a/libcxx/utils/google-benchmark/.clang-format +++ /dev/null @@ -1,5 +0,0 @@ ---- -Language: Cpp -BasedOnStyle: Google -PointerAlignment: Left -... diff --git a/libcxx/utils/google-benchmark/.github/.libcxx-setup.sh b/libcxx/utils/google-benchmark/.github/.libcxx-setup.sh deleted file mode 100755 index 56008403ae92..000000000000 --- a/libcxx/utils/google-benchmark/.github/.libcxx-setup.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -# Checkout LLVM sources -git clone --depth=1 https://github.com/llvm/llvm-project.git llvm-project - -# Setup libc++ options -if [ -z "$BUILD_32_BITS" ]; then - export BUILD_32_BITS=OFF && echo disabling 32 bit build -fi - -# Build and install libc++ (Use unstable ABI for better sanitizer coverage) -cd ./llvm-project -cmake -DCMAKE_C_COMPILER=${C_COMPILER} \ - -DCMAKE_CXX_COMPILER=${COMPILER} \ - -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DCMAKE_INSTALL_PREFIX=/usr \ - -DLIBCXX_ABI_UNSTABLE=OFF \ - -DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER} \ - -DLLVM_BUILD_32_BITS=${BUILD_32_BITS} \ - -DLLVM_ENABLE_PROJECTS='libcxx;libcxxabi' \ - -S llvm -B llvm-build -G "Unix Makefiles" -make -C llvm-build -j3 cxx cxxabi -sudo make -C llvm-build install-cxx install-cxxabi -cd .. diff --git a/libcxx/utils/google-benchmark/.github/ISSUE_TEMPLATE/bug_report.md b/libcxx/utils/google-benchmark/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index 6c2ced9b2ec5..000000000000 --- a/libcxx/utils/google-benchmark/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -name: Bug report -about: Create a report to help us improve -title: "[BUG]" -labels: '' -assignees: '' - ---- - -**Describe the bug** -A clear and concise description of what the bug is. - -**System** -Which OS, compiler, and compiler version are you using: - - OS: - - Compiler and version: - -**To reproduce** -Steps to reproduce the behavior: -1. sync to commit ... -2. cmake/bazel... -3. make ... -4. See error - -**Expected behavior** -A clear and concise description of what you expected to happen. - -**Screenshots** -If applicable, add screenshots to help explain your problem. - -**Additional context** -Add any other context about the problem here. diff --git a/libcxx/utils/google-benchmark/.github/ISSUE_TEMPLATE/feature_request.md b/libcxx/utils/google-benchmark/.github/ISSUE_TEMPLATE/feature_request.md deleted file mode 100644 index 9e8ab6a673f6..000000000000 --- a/libcxx/utils/google-benchmark/.github/ISSUE_TEMPLATE/feature_request.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -name: Feature request -about: Suggest an idea for this project -title: "[FR]" -labels: '' -assignees: '' - ---- - -**Is your feature request related to a problem? Please describe.** -A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] - -**Describe the solution you'd like** -A clear and concise description of what you want to happen. - -**Describe alternatives you've considered** -A clear and concise description of any alternative solutions or features you've considered. - -**Additional context** -Add any other context or screenshots about the feature request here. diff --git a/libcxx/utils/google-benchmark/.github/workflows/bazel.yml b/libcxx/utils/google-benchmark/.github/workflows/bazel.yml deleted file mode 100644 index a53661b2f9b1..000000000000 --- a/libcxx/utils/google-benchmark/.github/workflows/bazel.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: bazel - -on: - push: {} - pull_request: {} - -jobs: - build-and-test: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v1 - - - name: mount bazel cache - uses: actions/cache@v2.0.0 - env: - cache-name: bazel-cache - with: - path: "~/.cache/bazel" - key: ${{ env.cache-name }}-${{ runner.os }}-${{ github.ref }} - restore-keys: | - ${{ env.cache-name }}-${{ runner.os }}-main - - - name: build - run: | - bazel build //:benchmark //:benchmark_main //test/... - - - name: test - run: | - bazel test --test_output=all //test/... diff --git a/libcxx/utils/google-benchmark/.github/workflows/build-and-test-perfcounters.yml b/libcxx/utils/google-benchmark/.github/workflows/build-and-test-perfcounters.yml deleted file mode 100644 index b2b541919766..000000000000 --- a/libcxx/utils/google-benchmark/.github/workflows/build-and-test-perfcounters.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: build-and-test-perfcounters - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - -jobs: - job: - # TODO(dominic): Extend this to include compiler and set through env: CC/CXX. - name: ${{ matrix.os }}.${{ matrix.build_type }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, ubuntu-16.04, ubuntu-20.04] - build_type: ['Release', 'Debug'] - steps: - - uses: actions/checkout@v2 - - - name: install libpfm - run: sudo apt install libpfm4-dev - - - name: create build environment - run: cmake -E make_directory ${{ runner.workspace }}/_build - - - name: configure cmake - shell: bash - working-directory: ${{ runner.workspace }}/_build - run: cmake -DBENCHMARK_ENABLE_LIBPFM=1 -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - - - name: build - shell: bash - working-directory: ${{ runner.workspace }}/_build - run: cmake --build . --config ${{ matrix.build_type }} - - # Skip testing, for now. It seems perf_event_open does not succeed on the - # hosting machine, very likely a permissions issue. - # TODO(mtrofin): Enable test. - # - name: test - # shell: bash - # working-directory: ${{ runner.workspace }}/_build - # run: sudo ctest -C ${{ matrix.build_type }} --rerun-failed --output-on-failure diff --git a/libcxx/utils/google-benchmark/.github/workflows/build-and-test.yml b/libcxx/utils/google-benchmark/.github/workflows/build-and-test.yml deleted file mode 100644 index 9e5be3b1dc17..000000000000 --- a/libcxx/utils/google-benchmark/.github/workflows/build-and-test.yml +++ /dev/null @@ -1,110 +0,0 @@ -name: build-and-test - -on: - push: {} - pull_request: {} - -jobs: - # TODO: add 32-bit builds (g++ and clang++) for ubuntu - # (requires g++-multilib and libc6:i386) - # TODO: add coverage build (requires lcov) - # TODO: add clang + libc++ builds for ubuntu - # TODO: add clang + ubsan/asan/msan + libc++ builds for ubuntu - job: - name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.compiler }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, ubuntu-16.04, ubuntu-20.04, macos-latest] - build_type: ['Release', 'Debug'] - compiler: [g++, clang++] - include: - - displayTargetName: windows-latest-release - os: windows-latest - build_type: 'Release' - - displayTargetName: windows-latest-debug - os: windows-latest - build_type: 'Debug' - steps: - - uses: actions/checkout@v2 - - - name: create build environment - run: cmake -E make_directory ${{ runner.workspace }}/_build - - - name: configure cmake - env: - CXX: ${{ matrix.compiler }} - shell: bash - working-directory: ${{ runner.workspace }}/_build - run: > - cmake $GITHUB_WORKSPACE - -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - - - name: build - shell: bash - working-directory: ${{ runner.workspace }}/_build - run: cmake --build . --config ${{ matrix.build_type }} - - - name: test - shell: bash - working-directory: ${{ runner.workspace }}/_build - run: ctest -C ${{ matrix.build_type }} -VV - - ubuntu-14_04: - name: ubuntu-14.04.${{ matrix.build_type }}.${{ matrix.compiler }} - runs-on: [ubuntu-latest] - strategy: - fail-fast: false - matrix: - build_type: ['Release', 'Debug'] - compiler: [g++-4.8, clang++-3.6] - include: - - compiler: g++-6 - build_type: 'Debug' - run_tests: true - - compiler: g++-6 - build_type: 'Release' - run_tests: true - container: ubuntu:14.04 - steps: - - uses: actions/checkout@v2 - - - name: install required bits - run: | - sudo apt update - sudo apt -y install clang-3.6 cmake3 g++-4.8 git - - - name: install other bits - if: ${{ matrix.compiler }} == g++-6 - run: | - sudo apt -y install software-properties-common - sudo add-apt-repository -y "ppa:ubuntu-toolchain-r/test" - sudo apt update - sudo apt -y install g++-6 - - - name: create build environment - run: cmake -E make_directory $GITHUB_WORKSPACE/_build - - - name: configure cmake - env: - CXX: ${{ matrix.compiler }} - shell: bash - working-directory: ${{ github.workspace }}/_build - run: > - cmake $GITHUB_WORKSPACE - -DBENCHMARK_ENABLE_TESTING=${{ matrix.run_tests }} - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - -DBENCHMARK_DOWNLOAD_DEPENDENCIES=${{ matrix.run_tests }} - - - name: build - shell: bash - working-directory: ${{ github.workspace }}/_build - run: cmake --build . --config ${{ matrix.build_type }} - - - name: test - if: ${{ matrix.run_tests }} - shell: bash - working-directory: ${{ github.workspace }}/_build - run: ctest -C ${{ matrix.build_type }} -VV diff --git a/libcxx/utils/google-benchmark/.github/workflows/pylint.yml b/libcxx/utils/google-benchmark/.github/workflows/pylint.yml deleted file mode 100644 index 0f73a5823206..000000000000 --- a/libcxx/utils/google-benchmark/.github/workflows/pylint.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: pylint - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - -jobs: - pylint: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.8 - uses: actions/setup-python@v1 - with: - python-version: 3.8 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pylint pylint-exit conan - - name: Run pylint - run: | - pylint `find . -name '*.py'|xargs` || pylint-exit $? diff --git a/libcxx/utils/google-benchmark/.github/workflows/sanitizer.yml b/libcxx/utils/google-benchmark/.github/workflows/sanitizer.yml deleted file mode 100644 index fbc984492df6..000000000000 --- a/libcxx/utils/google-benchmark/.github/workflows/sanitizer.yml +++ /dev/null @@ -1,78 +0,0 @@ -name: sanitizer - -on: - push: {} - pull_request: {} - -env: - CC: clang - CXX: clang++ - EXTRA_CXX_FLAGS: "-stdlib=libc++" - UBSAN_OPTIONS: "print_stacktrace=1" - -jobs: - job: - name: ${{ matrix.sanitizer }}.${{ matrix.build_type }} - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - build_type: ['Debug', 'RelWithDebInfo'] - sanitizer: ['asan', 'ubsan', 'tsan'] - # TODO: add 'msan' above. currently failing and needs investigation. - steps: - - uses: actions/checkout@v2 - - - name: configure msan env - if: matrix.sanitizer == 'msan' - run: | - echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins" >> $GITHUB_ENV - echo "LIBCXX_SANITIZER=MemoryWithOrigins" >> $GITHUB_ENV - - - name: configure ubsan env - if: matrix.sanitizer == 'ubsan' - run: | - echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=all" >> $GITHUB_ENV - echo "LIBCXX_SANITIZER=Undefined" >> $GITHUB_ENV - - - name: configure asan env - if: matrix.sanitizer == 'asan' - run: | - echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=address -fno-sanitize-recover=all" >> $GITHUB_ENV - echo "LIBCXX_SANITIZER=Address" >> $GITHUB_ENV - - - name: configure tsan env - if: matrix.sanitizer == 'tsan' - run: | - echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all" >> $GITHUB_ENV - echo "LIBCXX_SANITIZER=Thread" >> $GITHUB_ENV - - - name: install llvm stuff - run: "${GITHUB_WORKSPACE}/.github/.libcxx-setup.sh" - - - name: create build environment - run: cmake -E make_directory ${{ runner.workspace }}/_build - - - name: configure cmake - shell: bash - working-directory: ${{ runner.workspace }}/_build - run: > - cmake $GITHUB_WORKSPACE - -DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF - -DBENCHMARK_ENABLE_LIBPFM=OFF - -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON - -DCMAKE_C_COMPILER=${{ env.CC }} - -DCMAKE_CXX_COMPILER=${{ env.CXX }} - -DCMAKE_C_FLAGS="${{ env.EXTRA_FLAGS }}" - -DCMAKE_CXX_FLAGS="${{ env.EXTRA_FLAGS }} ${{ env.EXTRA_CXX_FLAGS }}" - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - - - name: build - shell: bash - working-directory: ${{ runner.workspace }}/_build - run: cmake --build . --config ${{ matrix.build_type }} - - - name: test - shell: bash - working-directory: ${{ runner.workspace }}/_build - run: ctest -C ${{ matrix.build_type }} -VV diff --git a/libcxx/utils/google-benchmark/.github/workflows/test_bindings.yml b/libcxx/utils/google-benchmark/.github/workflows/test_bindings.yml deleted file mode 100644 index 4a580ebe047a..000000000000 --- a/libcxx/utils/google-benchmark/.github/workflows/test_bindings.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: test-bindings - -on: - push: - branches: [main] - pull_request: - branches: [main] - -jobs: - python_bindings: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - name: Set up Python - uses: actions/setup-python@v1 - with: - python-version: 3.8 - - name: Install benchmark - run: - python setup.py install - - name: Run example bindings - run: - python bindings/python/google_benchmark/example.py diff --git a/libcxx/utils/google-benchmark/.gitignore b/libcxx/utils/google-benchmark/.gitignore deleted file mode 100644 index be55d774e21b..000000000000 --- a/libcxx/utils/google-benchmark/.gitignore +++ /dev/null @@ -1,66 +0,0 @@ -*.a -*.so -*.so.?* -*.dll -*.exe -*.dylib -*.cmake -!/cmake/*.cmake -!/test/AssemblyTests.cmake -*~ -*.swp -*.pyc -__pycache__ - -# lcov -*.lcov -/lcov - -# cmake files. -/Testing -CMakeCache.txt -CMakeFiles/ -cmake_install.cmake - -# makefiles. -Makefile - -# in-source build. -bin/ -lib/ -/test/*_test - -# exuberant ctags. -tags - -# YouCompleteMe configuration. -.ycm_extra_conf.pyc - -# ninja generated files. -.ninja_deps -.ninja_log -build.ninja -install_manifest.txt -rules.ninja - -# bazel output symlinks. -bazel-* - -# out-of-source build top-level folders. -build/ -_build/ -build*/ - -# in-source dependencies -/googletest/ - -# Visual Studio 2015/2017 cache/options directory -.vs/ -CMakeSettings.json - -# Visual Studio Code cache/options directory -.vscode/ - -# Python build stuff -dist/ -*.egg-info* diff --git a/libcxx/utils/google-benchmark/.travis.yml b/libcxx/utils/google-benchmark/.travis.yml deleted file mode 100644 index 8cfed3d10dab..000000000000 --- a/libcxx/utils/google-benchmark/.travis.yml +++ /dev/null @@ -1,208 +0,0 @@ -sudo: required -dist: trusty -language: cpp - -matrix: - include: - - compiler: gcc - addons: - apt: - packages: - - lcov - env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Coverage - - compiler: gcc - addons: - apt: - packages: - - g++-multilib - - libc6:i386 - env: - - COMPILER=g++ - - C_COMPILER=gcc - - BUILD_TYPE=Debug - - BUILD_32_BITS=ON - - EXTRA_FLAGS="-m32" - - compiler: gcc - addons: - apt: - packages: - - g++-multilib - - libc6:i386 - env: - - COMPILER=g++ - - C_COMPILER=gcc - - BUILD_TYPE=Release - - BUILD_32_BITS=ON - - EXTRA_FLAGS="-m32" - - compiler: gcc - env: - - INSTALL_GCC6_FROM_PPA=1 - - COMPILER=g++-6 C_COMPILER=gcc-6 BUILD_TYPE=Debug - - ENABLE_SANITIZER=1 - - EXTRA_FLAGS="-fno-omit-frame-pointer -g -O2 -fsanitize=undefined,address -fuse-ld=gold" - # Clang w/ libc++ - - compiler: clang - dist: xenial - addons: - apt: - packages: - clang-3.8 - env: - - INSTALL_GCC6_FROM_PPA=1 - - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug - - LIBCXX_BUILD=1 - - EXTRA_CXX_FLAGS="-stdlib=libc++" - - compiler: clang - dist: xenial - addons: - apt: - packages: - clang-3.8 - env: - - INSTALL_GCC6_FROM_PPA=1 - - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Release - - LIBCXX_BUILD=1 - - EXTRA_CXX_FLAGS="-stdlib=libc++" - # Clang w/ 32bit libc++ - - compiler: clang - dist: xenial - addons: - apt: - packages: - - clang-3.8 - - g++-multilib - - libc6:i386 - env: - - INSTALL_GCC6_FROM_PPA=1 - - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug - - LIBCXX_BUILD=1 - - BUILD_32_BITS=ON - - EXTRA_FLAGS="-m32" - - EXTRA_CXX_FLAGS="-stdlib=libc++" - # Clang w/ 32bit libc++ - - compiler: clang - dist: xenial - addons: - apt: - packages: - - clang-3.8 - - g++-multilib - - libc6:i386 - env: - - INSTALL_GCC6_FROM_PPA=1 - - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Release - - LIBCXX_BUILD=1 - - BUILD_32_BITS=ON - - EXTRA_FLAGS="-m32" - - EXTRA_CXX_FLAGS="-stdlib=libc++" - # Clang w/ libc++, ASAN, UBSAN - - compiler: clang - dist: xenial - addons: - apt: - packages: - clang-3.8 - env: - - INSTALL_GCC6_FROM_PPA=1 - - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug - - LIBCXX_BUILD=1 LIBCXX_SANITIZER="Undefined;Address" - - ENABLE_SANITIZER=1 - - EXTRA_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=undefined,address -fno-sanitize-recover=all" - - EXTRA_CXX_FLAGS="-stdlib=libc++" - - UBSAN_OPTIONS=print_stacktrace=1 - # Clang w/ libc++ and MSAN - - compiler: clang - dist: xenial - addons: - apt: - packages: - clang-3.8 - env: - - INSTALL_GCC6_FROM_PPA=1 - - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug - - LIBCXX_BUILD=1 LIBCXX_SANITIZER=MemoryWithOrigins - - ENABLE_SANITIZER=1 - - EXTRA_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins" - - EXTRA_CXX_FLAGS="-stdlib=libc++" - # Clang w/ libc++ and MSAN - - compiler: clang - dist: xenial - addons: - apt: - packages: - clang-3.8 - env: - - INSTALL_GCC6_FROM_PPA=1 - - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=RelWithDebInfo - - LIBCXX_BUILD=1 LIBCXX_SANITIZER=Thread - - ENABLE_SANITIZER=1 - - EXTRA_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all" - - EXTRA_CXX_FLAGS="-stdlib=libc++" - - os: osx - osx_image: xcode8.3 - compiler: clang - env: - - COMPILER=clang++ - - BUILD_TYPE=Release - - BUILD_32_BITS=ON - - EXTRA_FLAGS="-m32" - -before_script: - - if [ -n "${LIBCXX_BUILD}" ]; then - source .libcxx-setup.sh; - fi - - if [ -n "${ENABLE_SANITIZER}" ]; then - export EXTRA_OPTIONS="-DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF"; - else - export EXTRA_OPTIONS=""; - fi - - mkdir -p build && cd build - -before_install: - - if [ -z "$BUILD_32_BITS" ]; then - export BUILD_32_BITS=OFF && echo disabling 32 bit build; - fi - - if [ -n "${INSTALL_GCC6_FROM_PPA}" ]; then - sudo add-apt-repository -y "ppa:ubuntu-toolchain-r/test"; - sudo apt-get update --option Acquire::Retries=100 --option Acquire::http::Timeout="60"; - fi - -install: - - if [ -n "${INSTALL_GCC6_FROM_PPA}" ]; then - travis_wait sudo -E apt-get -yq --no-install-suggests --no-install-recommends install g++-6; - fi - - if [ "${TRAVIS_OS_NAME}" == "linux" -a "${BUILD_32_BITS}" == "OFF" ]; then - travis_wait sudo -E apt-get -y --no-install-suggests --no-install-recommends install llvm-3.9-tools; - sudo cp /usr/lib/llvm-3.9/bin/FileCheck /usr/local/bin/; - fi - - if [ "${BUILD_TYPE}" == "Coverage" -a "${TRAVIS_OS_NAME}" == "linux" ]; then - PATH=~/.local/bin:${PATH}; - pip install --user --upgrade pip; - travis_wait pip install --user cpp-coveralls; - fi - - if [ "${C_COMPILER}" == "gcc-7" -a "${TRAVIS_OS_NAME}" == "osx" ]; then - rm -f /usr/local/include/c++; - brew update; - travis_wait brew install gcc@7; - fi - - if [ "${TRAVIS_OS_NAME}" == "linux" ]; then - sudo apt-get update -qq; - sudo apt-get install -qq unzip cmake3; - wget https://github.com/bazelbuild/bazel/releases/download/3.2.0/bazel-3.2.0-installer-linux-x86_64.sh --output-document bazel-installer.sh; - travis_wait sudo bash bazel-installer.sh; - fi - - if [ "${TRAVIS_OS_NAME}" == "osx" ]; then - curl -L -o bazel-installer.sh https://github.com/bazelbuild/bazel/releases/download/3.2.0/bazel-3.2.0-installer-darwin-x86_64.sh; - travis_wait sudo bash bazel-installer.sh; - fi - -script: - - cmake -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DCMAKE_C_FLAGS="${EXTRA_FLAGS}" -DCMAKE_CXX_FLAGS="${EXTRA_FLAGS} ${EXTRA_CXX_FLAGS}" -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -DBENCHMARK_BUILD_32_BITS=${BUILD_32_BITS} ${EXTRA_OPTIONS} .. - - make - - ctest -C ${BUILD_TYPE} --output-on-failure - - bazel test -c dbg --define google_benchmark.have_regex=posix --announce_rc --verbose_failures --test_output=errors --keep_going //test/... - -after_success: - - if [ "${BUILD_TYPE}" == "Coverage" -a "${TRAVIS_OS_NAME}" == "linux" ]; then - coveralls --include src --include include --gcov-options '\-lp' --root .. --build-root .; - fi diff --git a/libcxx/utils/google-benchmark/.ycm_extra_conf.py b/libcxx/utils/google-benchmark/.ycm_extra_conf.py deleted file mode 100644 index 5649ddcc749f..000000000000 --- a/libcxx/utils/google-benchmark/.ycm_extra_conf.py +++ /dev/null @@ -1,115 +0,0 @@ -import os -import ycm_core - -# These are the compilation flags that will be used in case there's no -# compilation database set (by default, one is not set). -# CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR. -flags = [ -'-Wall', -'-Werror', -'-pedantic-errors', -'-std=c++0x', -'-fno-strict-aliasing', -'-O3', -'-DNDEBUG', -# ...and the same thing goes for the magic -x option which specifies the -# language that the files to be compiled are written in. This is mostly -# relevant for c++ headers. -# For a C project, you would set this to 'c' instead of 'c++'. -'-x', 'c++', -'-I', 'include', -'-isystem', '/usr/include', -'-isystem', '/usr/local/include', -] - - -# Set this to the absolute path to the folder (NOT the file!) containing the -# compile_commands.json file to use that instead of 'flags'. See here for -# more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html -# -# Most projects will NOT need to set this to anything; you can just change the -# 'flags' list of compilation flags. Notice that YCM itself uses that approach. -compilation_database_folder = '' - -if os.path.exists( compilation_database_folder ): - database = ycm_core.CompilationDatabase( compilation_database_folder ) -else: - database = None - -SOURCE_EXTENSIONS = [ '.cc' ] - -def DirectoryOfThisScript(): - return os.path.dirname( os.path.abspath( __file__ ) ) - - -def MakeRelativePathsInFlagsAbsolute( flags, working_directory ): - if not working_directory: - return list( flags ) - new_flags = [] - make_next_absolute = False - path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ] - for flag in flags: - new_flag = flag - - if make_next_absolute: - make_next_absolute = False - if not flag.startswith( '/' ): - new_flag = os.path.join( working_directory, flag ) - - for path_flag in path_flags: - if flag == path_flag: - make_next_absolute = True - break - - if flag.startswith( path_flag ): - path = flag[ len( path_flag ): ] - new_flag = path_flag + os.path.join( working_directory, path ) - break - - if new_flag: - new_flags.append( new_flag ) - return new_flags - - -def IsHeaderFile( filename ): - extension = os.path.splitext( filename )[ 1 ] - return extension in [ '.h', '.hxx', '.hpp', '.hh' ] - - -def GetCompilationInfoForFile( filename ): - # The compilation_commands.json file generated by CMake does not have entries - # for header files. So we do our best by asking the db for flags for a - # corresponding source file, if any. If one exists, the flags for that file - # should be good enough. - if IsHeaderFile( filename ): - basename = os.path.splitext( filename )[ 0 ] - for extension in SOURCE_EXTENSIONS: - replacement_file = basename + extension - if os.path.exists( replacement_file ): - compilation_info = database.GetCompilationInfoForFile( - replacement_file ) - if compilation_info.compiler_flags_: - return compilation_info - return None - return database.GetCompilationInfoForFile( filename ) - - -def FlagsForFile( filename, **kwargs ): - if database: - # Bear in mind that compilation_info.compiler_flags_ does NOT return a - # python list, but a "list-like" StringVec object - compilation_info = GetCompilationInfoForFile( filename ) - if not compilation_info: - return None - - final_flags = MakeRelativePathsInFlagsAbsolute( - compilation_info.compiler_flags_, - compilation_info.compiler_working_dir_ ) - else: - relative_to = DirectoryOfThisScript() - final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to ) - - return { - 'flags': final_flags, - 'do_cache': True - } diff --git a/libcxx/utils/google-benchmark/AUTHORS b/libcxx/utils/google-benchmark/AUTHORS deleted file mode 100644 index 838dd4f5bd5e..000000000000 --- a/libcxx/utils/google-benchmark/AUTHORS +++ /dev/null @@ -1,60 +0,0 @@ -# This is the official list of benchmark authors for copyright purposes. -# This file is distinct from the CONTRIBUTORS files. -# See the latter for an explanation. -# -# Names should be added to this file as: -# Name or Organization -# The email address is not required for organizations. -# -# Please keep the list sorted. - -Albert Pretorius -Alex Steele -Andriy Berestovskyy -Arne Beer -Carto -Christian Wassermann -Christopher Seymour -Colin Braley -Daniel Harvey -David Coeurjolly -Deniz Evrenci -Dirac Research -Dominik Czarnota -Eric Backus -Eric Fiselier -Eugene Zhuk -Evgeny Safronov -Federico Ficarelli -Felix Homann -Gergő Szitár -Google Inc. -International Business Machines Corporation -Ismael Jimenez Martinez -Jern-Kuan Leong -JianXiong Zhou -Joao Paulo Magalhaes -Jordan Williams -Jussi Knuuttila -Kaito Udagawa -Kishan Kumar -Lei Xu -Matt Clarkson -Maxim Vafin -MongoDB Inc. -Nick Hutchinson -Norman Heino -Oleksandr Sochka -Ori Livneh -Paul Redmond -Radoslav Yovchev -Roman Lebedev -Sayan Bhattacharjee -Shuo Chen -Steinar H. Gunderson -Stripe, Inc. -Tobias Schmidt -Yixuan Qiu -Yusuke Suzuki -Zbigniew Skowron -Min-Yih Hsu diff --git a/libcxx/utils/google-benchmark/BUILD.bazel b/libcxx/utils/google-benchmark/BUILD.bazel deleted file mode 100644 index eb35b62730c6..000000000000 --- a/libcxx/utils/google-benchmark/BUILD.bazel +++ /dev/null @@ -1,44 +0,0 @@ -load("@rules_cc//cc:defs.bzl", "cc_library") - -licenses(["notice"]) - -config_setting( - name = "windows", - values = { - "cpu": "x64_windows", - }, - visibility = [":__subpackages__"], -) - -cc_library( - name = "benchmark", - srcs = glob( - [ - "src/*.cc", - "src/*.h", - ], - exclude = ["src/benchmark_main.cc"], - ), - hdrs = ["include/benchmark/benchmark.h"], - linkopts = select({ - ":windows": ["-DEFAULTLIB:shlwapi.lib"], - "//conditions:default": ["-pthread"], - }), - strip_include_prefix = "include", - visibility = ["//visibility:public"], -) - -cc_library( - name = "benchmark_main", - srcs = ["src/benchmark_main.cc"], - hdrs = ["include/benchmark/benchmark.h"], - strip_include_prefix = "include", - visibility = ["//visibility:public"], - deps = [":benchmark"], -) - -cc_library( - name = "benchmark_internal_headers", - hdrs = glob(["src/*.h"]), - visibility = ["//test:__pkg__"], -) diff --git a/libcxx/utils/google-benchmark/CMakeLists.txt b/libcxx/utils/google-benchmark/CMakeLists.txt deleted file mode 100644 index ef8dcdc68cfb..000000000000 --- a/libcxx/utils/google-benchmark/CMakeLists.txt +++ /dev/null @@ -1,313 +0,0 @@ -cmake_minimum_required (VERSION 3.5.1) - -foreach(p - CMP0048 # OK to clear PROJECT_VERSION on project() - CMP0054 # CMake 3.1 - CMP0056 # export EXE_LINKER_FLAGS to try_run - CMP0057 # Support no if() IN_LIST operator - CMP0063 # Honor visibility properties for all targets - CMP0077 # Allow option() overrides in importing projects - ) - if(POLICY ${p}) - cmake_policy(SET ${p} NEW) - endif() -endforeach() - -project (benchmark VERSION 1.5.4 LANGUAGES CXX) - -option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON) -option(BENCHMARK_ENABLE_EXCEPTIONS "Enable the use of exceptions in the benchmark library." ON) -option(BENCHMARK_ENABLE_LTO "Enable link time optimisation of the benchmark library." OFF) -option(BENCHMARK_USE_LIBCXX "Build and test using libc++ as the standard library." OFF) -if(NOT MSVC) - option(BENCHMARK_BUILD_32_BITS "Build a 32 bit version of the library." OFF) -else() - set(BENCHMARK_BUILD_32_BITS OFF CACHE BOOL "Build a 32 bit version of the library - unsupported when using MSVC)" FORCE) -endif() -option(BENCHMARK_ENABLE_INSTALL "Enable installation of benchmark. (Projects embedding benchmark may want to turn this OFF.)" ON) - -# Allow unmet dependencies to be met using CMake's ExternalProject mechanics, which -# may require downloading the source code. -option(BENCHMARK_DOWNLOAD_DEPENDENCIES "Allow the downloading and in-tree building of unmet dependencies" OFF) - -# This option can be used to disable building and running unit tests which depend on gtest -# in cases where it is not possible to build or find a valid version of gtest. -option(BENCHMARK_ENABLE_GTEST_TESTS "Enable building the unit tests which depend on gtest" ON) - -option(BENCHMARK_ENABLE_LIBPFM "Enable performance counters provided by libpfm" OFF) - -set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) -if(MSVC) - # As of CMake 3.18, CMAKE_SYSTEM_PROCESSOR is not set properly for MSVC and - # cross-compilation (e.g. Host=x86_64, target=aarch64) requires using the - # undocumented, but working variable. - # See https://gitlab.kitware.com/cmake/cmake/-/issues/15170 - set(CMAKE_SYSTEM_PROCESSOR ${MSVC_CXX_ARCHITECTURE_ID}) - if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ARM") - set(CMAKE_CROSSCOMPILING TRUE) - endif() -endif() - -set(ENABLE_ASSEMBLY_TESTS_DEFAULT OFF) -function(should_enable_assembly_tests) - if(CMAKE_BUILD_TYPE) - string(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_LOWER) - if (${CMAKE_BUILD_TYPE_LOWER} MATCHES "coverage") - # FIXME: The --coverage flag needs to be removed when building assembly - # tests for this to work. - return() - endif() - endif() - if (MSVC) - return() - elseif(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") - return() - elseif(NOT CMAKE_SIZEOF_VOID_P EQUAL 8) - # FIXME: Make these work on 32 bit builds - return() - elseif(BENCHMARK_BUILD_32_BITS) - # FIXME: Make these work on 32 bit builds - return() - endif() - find_program(LLVM_FILECHECK_EXE FileCheck) - if (LLVM_FILECHECK_EXE) - set(LLVM_FILECHECK_EXE "${LLVM_FILECHECK_EXE}" CACHE PATH "llvm filecheck" FORCE) - message(STATUS "LLVM FileCheck Found: ${LLVM_FILECHECK_EXE}") - else() - message(STATUS "Failed to find LLVM FileCheck") - return() - endif() - set(ENABLE_ASSEMBLY_TESTS_DEFAULT ON PARENT_SCOPE) -endfunction() -should_enable_assembly_tests() - -# This option disables the building and running of the assembly verification tests -option(BENCHMARK_ENABLE_ASSEMBLY_TESTS "Enable building and running the assembly tests" - ${ENABLE_ASSEMBLY_TESTS_DEFAULT}) - -# Make sure we can import out CMake functions -list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules") -list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") - - -# Read the git tags to determine the project version -include(GetGitVersion) -get_git_version(GIT_VERSION) - -# If no git version can be determined, use the version -# from the project() command -if ("${GIT_VERSION}" STREQUAL "0.0.0") - set(VERSION "${benchmark_VERSION}") -else() - set(VERSION "${GIT_VERSION}") -endif() -# Tell the user what versions we are using -message(STATUS "Version: ${VERSION}") - -# The version of the libraries -set(GENERIC_LIB_VERSION ${VERSION}) -string(SUBSTRING ${VERSION} 0 1 GENERIC_LIB_SOVERSION) - -# Import our CMake modules -include(CheckCXXCompilerFlag) -include(AddCXXCompilerFlag) -include(CXXFeatureCheck) - -if (BENCHMARK_BUILD_32_BITS) - add_required_cxx_compiler_flag(-m32) -endif() - -if (MSVC) - # Turn compiler warnings up to 11 - string(REGEX REPLACE "[-/]W[1-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") - add_definitions(-D_CRT_SECURE_NO_WARNINGS) - - if (NOT BENCHMARK_ENABLE_EXCEPTIONS) - add_cxx_compiler_flag(-EHs-) - add_cxx_compiler_flag(-EHa-) - add_definitions(-D_HAS_EXCEPTIONS=0) - endif() - # Link time optimisation - if (BENCHMARK_ENABLE_LTO) - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /GL") - set(CMAKE_STATIC_LINKER_FLAGS_RELEASE "${CMAKE_STATIC_LINKER_FLAGS_RELEASE} /LTCG") - set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /LTCG") - set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG") - - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /GL") - string(REGEX REPLACE "[-/]INCREMENTAL" "/INCREMENTAL:NO" CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO}") - set(CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO} /LTCG") - string(REGEX REPLACE "[-/]INCREMENTAL" "/INCREMENTAL:NO" CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO}") - set(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO} /LTCG") - string(REGEX REPLACE "[-/]INCREMENTAL" "/INCREMENTAL:NO" CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO}") - set(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} /LTCG") - - set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /GL") - set(CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL "${CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL} /LTCG") - set(CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL "${CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL} /LTCG") - set(CMAKE_EXE_LINKER_FLAGS_MINSIZEREL "${CMAKE_EXE_LINKER_FLAGS_MINSIZEREL} /LTCG") - endif() -else() - # Try and enable C++11. Don't use C++14 because it doesn't work in some - # configurations. - add_cxx_compiler_flag(-std=c++11) - if (NOT HAVE_CXX_FLAG_STD_CXX11) - add_cxx_compiler_flag(-std=c++0x) - endif() - - # Turn compiler warnings up to 11 - add_cxx_compiler_flag(-Wall) - add_cxx_compiler_flag(-Wextra) - add_cxx_compiler_flag(-Wshadow) - add_cxx_compiler_flag(-Werror RELEASE) - add_cxx_compiler_flag(-Werror RELWITHDEBINFO) - add_cxx_compiler_flag(-Werror MINSIZEREL) - if (NOT BENCHMARK_ENABLE_TESTING) - # Disable warning when compiling tests as gtest does not use 'override'. - add_cxx_compiler_flag(-Wsuggest-override) - endif() - add_cxx_compiler_flag(-pedantic) - add_cxx_compiler_flag(-pedantic-errors) - add_cxx_compiler_flag(-Wshorten-64-to-32) - add_cxx_compiler_flag(-fstrict-aliasing) - # Disable warnings regarding deprecated parts of the library while building - # and testing those parts of the library. - add_cxx_compiler_flag(-Wno-deprecated-declarations) - if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - # Intel silently ignores '-Wno-deprecated-declarations', - # warning no. 1786 must be explicitly disabled. - # See #631 for rationale. - add_cxx_compiler_flag(-wd1786) - endif() - # Disable deprecation warnings for release builds (when -Werror is enabled). - add_cxx_compiler_flag(-Wno-deprecated RELEASE) - add_cxx_compiler_flag(-Wno-deprecated RELWITHDEBINFO) - add_cxx_compiler_flag(-Wno-deprecated MINSIZEREL) - if (NOT BENCHMARK_ENABLE_EXCEPTIONS) - add_cxx_compiler_flag(-fno-exceptions) - endif() - - if (HAVE_CXX_FLAG_FSTRICT_ALIASING) - if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel") #ICC17u2: Many false positives for Wstrict-aliasing - add_cxx_compiler_flag(-Wstrict-aliasing) - endif() - endif() - # ICC17u2: overloaded virtual function "benchmark::Fixture::SetUp" is only partially overridden - # (because of deprecated overload) - add_cxx_compiler_flag(-wd654) - add_cxx_compiler_flag(-Wthread-safety) - if (HAVE_CXX_FLAG_WTHREAD_SAFETY) - cxx_feature_check(THREAD_SAFETY_ATTRIBUTES) - endif() - - # On most UNIX like platforms g++ and clang++ define _GNU_SOURCE as a - # predefined macro, which turns on all of the wonderful libc extensions. - # However g++ doesn't do this in Cygwin so we have to define it ourselfs - # since we depend on GNU/POSIX/BSD extensions. - if (CYGWIN) - add_definitions(-D_GNU_SOURCE=1) - endif() - - if (QNXNTO) - add_definitions(-D_QNX_SOURCE) - endif() - - # Link time optimisation - if (BENCHMARK_ENABLE_LTO) - add_cxx_compiler_flag(-flto) - add_cxx_compiler_flag(-Wno-lto-type-mismatch) - if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - find_program(GCC_AR gcc-ar) - if (GCC_AR) - set(CMAKE_AR ${GCC_AR}) - endif() - find_program(GCC_RANLIB gcc-ranlib) - if (GCC_RANLIB) - set(CMAKE_RANLIB ${GCC_RANLIB}) - endif() - elseif("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") - include(llvm-toolchain) - endif() - endif() - - # Coverage build type - set(BENCHMARK_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_DEBUG}" - CACHE STRING "Flags used by the C++ compiler during coverage builds." - FORCE) - set(BENCHMARK_EXE_LINKER_FLAGS_COVERAGE "${CMAKE_EXE_LINKER_FLAGS_DEBUG}" - CACHE STRING "Flags used for linking binaries during coverage builds." - FORCE) - set(BENCHMARK_SHARED_LINKER_FLAGS_COVERAGE "${CMAKE_SHARED_LINKER_FLAGS_DEBUG}" - CACHE STRING "Flags used by the shared libraries linker during coverage builds." - FORCE) - mark_as_advanced( - BENCHMARK_CXX_FLAGS_COVERAGE - BENCHMARK_EXE_LINKER_FLAGS_COVERAGE - BENCHMARK_SHARED_LINKER_FLAGS_COVERAGE) - set(CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}" CACHE STRING - "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel Coverage.") - add_cxx_compiler_flag(--coverage COVERAGE) -endif() - -if (BENCHMARK_USE_LIBCXX) - if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") - add_cxx_compiler_flag(-stdlib=libc++) - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR - "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") - add_cxx_compiler_flag(-nostdinc++) - message(WARNING "libc++ header path must be manually specified using CMAKE_CXX_FLAGS") - # Adding -nodefaultlibs directly to CMAKE__LINKER_FLAGS will break - # configuration checks such as 'find_package(Threads)' - list(APPEND BENCHMARK_CXX_LINKER_FLAGS -nodefaultlibs) - # -lc++ cannot be added directly to CMAKE__LINKER_FLAGS because - # linker flags appear before all linker inputs and -lc++ must appear after. - list(APPEND BENCHMARK_CXX_LIBRARIES c++) - else() - message(FATAL_ERROR "-DBENCHMARK_USE_LIBCXX:BOOL=ON is not supported for compiler") - endif() -endif(BENCHMARK_USE_LIBCXX) - -set(EXTRA_CXX_FLAGS "") -if (WIN32 AND "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") - # Clang on Windows fails to compile the regex feature check under C++11 - set(EXTRA_CXX_FLAGS "-DCMAKE_CXX_STANDARD=14") -endif() - -# C++ feature checks -# Determine the correct regular expression engine to use -cxx_feature_check(STD_REGEX ${EXTRA_CXX_FLAGS}) -cxx_feature_check(GNU_POSIX_REGEX ${EXTRA_CXX_FLAGS}) -cxx_feature_check(POSIX_REGEX ${EXTRA_CXX_FLAGS}) -if(NOT HAVE_STD_REGEX AND NOT HAVE_GNU_POSIX_REGEX AND NOT HAVE_POSIX_REGEX) - message(FATAL_ERROR "Failed to determine the source files for the regular expression backend") -endif() -if (NOT BENCHMARK_ENABLE_EXCEPTIONS AND HAVE_STD_REGEX - AND NOT HAVE_GNU_POSIX_REGEX AND NOT HAVE_POSIX_REGEX) - message(WARNING "Using std::regex with exceptions disabled is not fully supported") -endif() - -cxx_feature_check(STEADY_CLOCK) -# Ensure we have pthreads -set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(Threads REQUIRED) - -if (BENCHMARK_ENABLE_LIBPFM) - find_package(PFM) -endif() - -# Set up directories -include_directories(${PROJECT_SOURCE_DIR}/include) - -# Build the targets -add_subdirectory(src) - -if (BENCHMARK_ENABLE_TESTING) - enable_testing() - if (BENCHMARK_ENABLE_GTEST_TESTS AND - NOT (TARGET gtest AND TARGET gtest_main AND - TARGET gmock AND TARGET gmock_main)) - include(GoogleTest) - endif() - add_subdirectory(test) -endif() diff --git a/libcxx/utils/google-benchmark/CONTRIBUTING.md b/libcxx/utils/google-benchmark/CONTRIBUTING.md deleted file mode 100644 index 43de4c9d4709..000000000000 --- a/libcxx/utils/google-benchmark/CONTRIBUTING.md +++ /dev/null @@ -1,58 +0,0 @@ -# How to contribute # - -We'd love to accept your patches and contributions to this project. There are -a just a few small guidelines you need to follow. - - -## Contributor License Agreement ## - -Contributions to any Google project must be accompanied by a Contributor -License Agreement. This is not a copyright **assignment**, it simply gives -Google permission to use and redistribute your contributions as part of the -project. - - * If you are an individual writing original source code and you're sure you - own the intellectual property, then you'll need to sign an [individual - CLA][]. - - * If you work for a company that wants to allow you to contribute your work, - then you'll need to sign a [corporate CLA][]. - -You generally only need to submit a CLA once, so if you've already submitted -one (even if it was for a different project), you probably don't need to do it -again. - -[individual CLA]: https://developers.google.com/open-source/cla/individual -[corporate CLA]: https://developers.google.com/open-source/cla/corporate - -Once your CLA is submitted (or if you already submitted one for -another Google project), make a commit adding yourself to the -[AUTHORS][] and [CONTRIBUTORS][] files. This commit can be part -of your first [pull request][]. - -[AUTHORS]: AUTHORS -[CONTRIBUTORS]: CONTRIBUTORS - - -## Submitting a patch ## - - 1. It's generally best to start by opening a new issue describing the bug or - feature you're intending to fix. Even if you think it's relatively minor, - it's helpful to know what people are working on. Mention in the initial - issue that you are planning to work on that bug or feature so that it can - be assigned to you. - - 1. Follow the normal process of [forking][] the project, and setup a new - branch to work in. It's important that each group of changes be done in - separate branches in order to ensure that a pull request only includes the - commits related to that bug or feature. - - 1. Do your best to have [well-formed commit messages][] for each change. - This provides consistency throughout the project, and ensures that commit - messages are able to be formatted properly by various git tools. - - 1. Finally, push the commits to your fork and submit a [pull request][]. - -[forking]: https://help.github.com/articles/fork-a-repo -[well-formed commit messages]: http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html -[pull request]: https://help.github.com/articles/creating-a-pull-request diff --git a/libcxx/utils/google-benchmark/CONTRIBUTORS b/libcxx/utils/google-benchmark/CONTRIBUTORS deleted file mode 100644 index 7489731de5a8..000000000000 --- a/libcxx/utils/google-benchmark/CONTRIBUTORS +++ /dev/null @@ -1,85 +0,0 @@ -# People who have agreed to one of the CLAs and can contribute patches. -# The AUTHORS file lists the copyright holders; this file -# lists people. For example, Google employees are listed here -# but not in AUTHORS, because Google holds the copyright. -# -# Names should be added to this file only after verifying that -# the individual or the individual's organization has agreed to -# the appropriate Contributor License Agreement, found here: -# -# https://developers.google.com/open-source/cla/individual -# https://developers.google.com/open-source/cla/corporate -# -# The agreement for individuals can be filled out on the web. -# -# When adding J Random Contributor's name to this file, -# either J's name or J's organization's name should be -# added to the AUTHORS file, depending on whether the -# individual or corporate CLA was used. -# -# Names should be added to this file as: -# Name -# -# Please keep the list sorted. - -Abhina Sreeskantharajan -Albert Pretorius -Alex Steele -Andriy Berestovskyy -Arne Beer -Billy Robert O'Neal III -Chris Kennelly -Christian Wassermann -Christopher Seymour -Colin Braley -Cyrille Faucheux -Daniel Harvey -David Coeurjolly -Deniz Evrenci -Dominic Hamon -Dominik Czarnota -Eric Backus -Eric Fiselier -Eugene Zhuk -Evgeny Safronov -Fanbo Meng -Federico Ficarelli -Felix Homann -Geoffrey Martin-Noble -Gergő Szitár -Hannes Hauswedell -Ismael Jimenez Martinez -Jern-Kuan Leong -JianXiong Zhou -Joao Paulo Magalhaes -John Millikin -Jordan Williams -Jussi Knuuttila -Kai Wolf -Kaito Udagawa -Kishan Kumar -Lei Xu -Matt Clarkson -Maxim Vafin -Nick Hutchinson -Norman Heino -Oleksandr Sochka -Ori Livneh -Pascal Leroy -Paul Redmond -Pierre Phaneuf -Radoslav Yovchev -Raul Marin -Ray Glover -Robert Guo -Roman Lebedev -Sayan Bhattacharjee -Shuo Chen -Steven Wan -Tobias Schmidt -Tobias Ulvgård -Tom Madams -Yixuan Qiu -Yusuke Suzuki -Zbigniew Skowron -Min-Yih Hsu diff --git a/libcxx/utils/google-benchmark/LICENSE b/libcxx/utils/google-benchmark/LICENSE deleted file mode 100644 index d64569567334..000000000000 --- a/libcxx/utils/google-benchmark/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/libcxx/utils/google-benchmark/README.md b/libcxx/utils/google-benchmark/README.md deleted file mode 100644 index aa61cef1b162..000000000000 --- a/libcxx/utils/google-benchmark/README.md +++ /dev/null @@ -1,1378 +0,0 @@ -# Benchmark - -[![build-and-test](https://github.com/google/benchmark/workflows/build-and-test/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Abuild-and-test) -[![bazel](https://github.com/google/benchmark/actions/workflows/bazel.yml/badge.svg)](https://github.com/google/benchmark/actions/workflows/bazel.yml) -[![pylint](https://github.com/google/benchmark/workflows/pylint/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Apylint) -[![test-bindings](https://github.com/google/benchmark/workflows/test-bindings/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Atest-bindings) - -[![Build Status](https://travis-ci.org/google/benchmark.svg?branch=master)](https://travis-ci.org/google/benchmark) -[![Coverage Status](https://coveralls.io/repos/google/benchmark/badge.svg)](https://coveralls.io/r/google/benchmark) - - -A library to benchmark code snippets, similar to unit tests. Example: - -```c++ -#include - -static void BM_SomeFunction(benchmark::State& state) { - // Perform setup here - for (auto _ : state) { - // This code gets timed - SomeFunction(); - } -} -// Register the function as a benchmark -BENCHMARK(BM_SomeFunction); -// Run the benchmark -BENCHMARK_MAIN(); -``` - -To get started, see [Requirements](#requirements) and -[Installation](#installation). See [Usage](#usage) for a full example and the -[User Guide](#user-guide) for a more comprehensive feature overview. - -It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/master/docs/primer.md) -as some of the structural aspects of the APIs are similar. - -### Resources - -[Discussion group](https://groups.google.com/d/forum/benchmark-discuss) - -IRC channels: -* [libera](https://libera.chat) #benchmark - -[Additional Tooling Documentation](docs/tools.md) - -[Assembly Testing Documentation](docs/AssemblyTests.md) - -## Requirements - -The library can be used with C++03. However, it requires C++11 to build, -including compiler and standard library support. - -The following minimum versions are required to build the library: - -* GCC 4.8 -* Clang 3.4 -* Visual Studio 14 2015 -* Intel 2015 Update 1 - -See [Platform-Specific Build Instructions](#platform-specific-build-instructions). - -## Installation - -This describes the installation process using cmake. As pre-requisites, you'll -need git and cmake installed. - -_See [dependencies.md](dependencies.md) for more details regarding supported -versions of build tools._ - -```bash -# Check out the library. -$ git clone https://github.com/google/benchmark.git -# Benchmark requires Google Test as a dependency. Add the source tree as a subdirectory. -$ git clone https://github.com/google/googletest.git benchmark/googletest -# Go to the library root directory -$ cd benchmark -# Make a build directory to place the build output. -$ cmake -E make_directory "build" -# Generate build system files with cmake. -$ cmake -E chdir "build" cmake -DCMAKE_BUILD_TYPE=Release ../ -# or, starting with CMake 3.13, use a simpler form: -# cmake -DCMAKE_BUILD_TYPE=Release -S . -B "build" -# Build the library. -$ cmake --build "build" --config Release -``` -This builds the `benchmark` and `benchmark_main` libraries and tests. -On a unix system, the build directory should now look something like this: - -``` -/benchmark - /build - /src - /libbenchmark.a - /libbenchmark_main.a - /test - ... -``` - -Next, you can run the tests to check the build. - -```bash -$ cmake -E chdir "build" ctest --build-config Release -``` - -If you want to install the library globally, also run: - -``` -sudo cmake --build "build" --config Release --target install -``` - -Note that Google Benchmark requires Google Test to build and run the tests. This -dependency can be provided two ways: - -* Checkout the Google Test sources into `benchmark/googletest` as above. -* Otherwise, if `-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON` is specified during - configuration, the library will automatically download and build any required - dependencies. - -If you do not wish to build and run the tests, add `-DBENCHMARK_ENABLE_GTEST_TESTS=OFF` -to `CMAKE_ARGS`. - -### Debug vs Release - -By default, benchmark builds as a debug library. You will see a warning in the -output when this is the case. To build it as a release library instead, add -`-DCMAKE_BUILD_TYPE=Release` when generating the build system files, as shown -above. The use of `--config Release` in build commands is needed to properly -support multi-configuration tools (like Visual Studio for example) and can be -skipped for other build systems (like Makefile). - -To enable link-time optimisation, also add `-DBENCHMARK_ENABLE_LTO=true` when -generating the build system files. - -If you are using gcc, you might need to set `GCC_AR` and `GCC_RANLIB` cmake -cache variables, if autodetection fails. - -If you are using clang, you may need to set `LLVMAR_EXECUTABLE`, -`LLVMNM_EXECUTABLE` and `LLVMRANLIB_EXECUTABLE` cmake cache variables. - -### Stable and Experimental Library Versions - -The main branch contains the latest stable version of the benchmarking library; -the API of which can be considered largely stable, with source breaking changes -being made only upon the release of a new major version. - -Newer, experimental, features are implemented and tested on the -[`v2` branch](https://github.com/google/benchmark/tree/v2). Users who wish -to use, test, and provide feedback on the new features are encouraged to try -this branch. However, this branch provides no stability guarantees and reserves -the right to change and break the API at any time. - -## Usage - -### Basic usage - -Define a function that executes the code to measure, register it as a benchmark -function using the `BENCHMARK` macro, and ensure an appropriate `main` function -is available: - -```c++ -#include - -static void BM_StringCreation(benchmark::State& state) { - for (auto _ : state) - std::string empty_string; -} -// Register the function as a benchmark -BENCHMARK(BM_StringCreation); - -// Define another benchmark -static void BM_StringCopy(benchmark::State& state) { - std::string x = "hello"; - for (auto _ : state) - std::string copy(x); -} -BENCHMARK(BM_StringCopy); - -BENCHMARK_MAIN(); -``` - -To run the benchmark, compile and link against the `benchmark` library -(libbenchmark.a/.so). If you followed the build steps above, this library will -be under the build directory you created. - -```bash -# Example on linux after running the build steps above. Assumes the -# `benchmark` and `build` directories are under the current directory. -$ g++ mybenchmark.cc -std=c++11 -isystem benchmark/include \ - -Lbenchmark/build/src -lbenchmark -lpthread -o mybenchmark -``` - -Alternatively, link against the `benchmark_main` library and remove -`BENCHMARK_MAIN();` above to get the same behavior. - -The compiled executable will run all benchmarks by default. Pass the `--help` -flag for option information or see the guide below. - -### Usage with CMake - -If using CMake, it is recommended to link against the project-provided -`benchmark::benchmark` and `benchmark::benchmark_main` targets using -`target_link_libraries`. -It is possible to use ```find_package``` to import an installed version of the -library. -```cmake -find_package(benchmark REQUIRED) -``` -Alternatively, ```add_subdirectory``` will incorporate the library directly in -to one's CMake project. -```cmake -add_subdirectory(benchmark) -``` -Either way, link to the library as follows. -```cmake -target_link_libraries(MyTarget benchmark::benchmark) -``` - -## Platform Specific Build Instructions - -### Building with GCC - -When the library is built using GCC it is necessary to link with the pthread -library due to how GCC implements `std::thread`. Failing to link to pthread will -lead to runtime exceptions (unless you're using libc++), not linker errors. See -[issue #67](https://github.com/google/benchmark/issues/67) for more details. You -can link to pthread by adding `-pthread` to your linker command. Note, you can -also use `-lpthread`, but there are potential issues with ordering of command -line parameters if you use that. - -### Building with Visual Studio 2015 or 2017 - -The `shlwapi` library (`-lshlwapi`) is required to support a call to `CPUInfo` which reads the registry. Either add `shlwapi.lib` under `[ Configuration Properties > Linker > Input ]`, or use the following: - -``` -// Alternatively, can add libraries using linker options. -#ifdef _WIN32 -#pragma comment ( lib, "Shlwapi.lib" ) -#ifdef _DEBUG -#pragma comment ( lib, "benchmarkd.lib" ) -#else -#pragma comment ( lib, "benchmark.lib" ) -#endif -#endif -``` - -Can also use the graphical version of CMake: -* Open `CMake GUI`. -* Under `Where to build the binaries`, same path as source plus `build`. -* Under `CMAKE_INSTALL_PREFIX`, same path as source plus `install`. -* Click `Configure`, `Generate`, `Open Project`. -* If build fails, try deleting entire directory and starting again, or unticking options to build less. - -### Building with Intel 2015 Update 1 or Intel System Studio Update 4 - -See instructions for building with Visual Studio. Once built, right click on the solution and change the build to Intel. - -### Building on Solaris - -If you're running benchmarks on solaris, you'll want the kstat library linked in -too (`-lkstat`). - -## User Guide - -### Command Line - -[Output Formats](#output-formats) - -[Output Files](#output-files) - -[Running Benchmarks](#running-benchmarks) - -[Running a Subset of Benchmarks](#running-a-subset-of-benchmarks) - -[Result Comparison](#result-comparison) - -[Extra Context](#extra-context) - -### Library - -[Runtime and Reporting Considerations](#runtime-and-reporting-considerations) - -[Passing Arguments](#passing-arguments) - -[Custom Benchmark Name](#custom-benchmark-name) - -[Calculating Asymptotic Complexity](#asymptotic-complexity) - -[Templated Benchmarks](#templated-benchmarks) - -[Fixtures](#fixtures) - -[Custom Counters](#custom-counters) - -[Multithreaded Benchmarks](#multithreaded-benchmarks) - -[CPU Timers](#cpu-timers) - -[Manual Timing](#manual-timing) - -[Setting the Time Unit](#setting-the-time-unit) - -[Random Interleaving](docs/random_interleaving.md) - -[User-Requested Performance Counters](docs/perf_counters.md) - -[Preventing Optimization](#preventing-optimization) - -[Reporting Statistics](#reporting-statistics) - -[Custom Statistics](#custom-statistics) - -[Using RegisterBenchmark](#using-register-benchmark) - -[Exiting with an Error](#exiting-with-an-error) - -[A Faster KeepRunning Loop](#a-faster-keep-running-loop) - -[Disabling CPU Frequency Scaling](#disabling-cpu-frequency-scaling) - - - - -### Output Formats - -The library supports multiple output formats. Use the -`--benchmark_format=` flag (or set the -`BENCHMARK_FORMAT=` environment variable) to set -the format type. `console` is the default format. - -The Console format is intended to be a human readable format. By default -the format generates color output. Context is output on stderr and the -tabular data on stdout. Example tabular output looks like: - -``` -Benchmark Time(ns) CPU(ns) Iterations ----------------------------------------------------------------------- -BM_SetInsert/1024/1 28928 29349 23853 133.097kB/s 33.2742k items/s -BM_SetInsert/1024/8 32065 32913 21375 949.487kB/s 237.372k items/s -BM_SetInsert/1024/10 33157 33648 21431 1.13369MB/s 290.225k items/s -``` - -The JSON format outputs human readable json split into two top level attributes. -The `context` attribute contains information about the run in general, including -information about the CPU and the date. -The `benchmarks` attribute contains a list of every benchmark run. Example json -output looks like: - -```json -{ - "context": { - "date": "2015/03/17-18:40:25", - "num_cpus": 40, - "mhz_per_cpu": 2801, - "cpu_scaling_enabled": false, - "build_type": "debug" - }, - "benchmarks": [ - { - "name": "BM_SetInsert/1024/1", - "iterations": 94877, - "real_time": 29275, - "cpu_time": 29836, - "bytes_per_second": 134066, - "items_per_second": 33516 - }, - { - "name": "BM_SetInsert/1024/8", - "iterations": 21609, - "real_time": 32317, - "cpu_time": 32429, - "bytes_per_second": 986770, - "items_per_second": 246693 - }, - { - "name": "BM_SetInsert/1024/10", - "iterations": 21393, - "real_time": 32724, - "cpu_time": 33355, - "bytes_per_second": 1199226, - "items_per_second": 299807 - } - ] -} -``` - -The CSV format outputs comma-separated values. The `context` is output on stderr -and the CSV itself on stdout. Example CSV output looks like: - -``` -name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label -"BM_SetInsert/1024/1",65465,17890.7,8407.45,475768,118942, -"BM_SetInsert/1024/8",116606,18810.1,9766.64,3.27646e+06,819115, -"BM_SetInsert/1024/10",106365,17238.4,8421.53,4.74973e+06,1.18743e+06, -``` - - - -### Output Files - -Write benchmark results to a file with the `--benchmark_out=` option -(or set `BENCHMARK_OUT`). Specify the output format with -`--benchmark_out_format={json|console|csv}` (or set -`BENCHMARK_OUT_FORMAT={json|console|csv}`). Note that the 'csv' reporter is -deprecated and the saved `.csv` file -[is not parsable](https://github.com/google/benchmark/issues/794) by csv -parsers. - -Specifying `--benchmark_out` does not suppress the console output. - - - -### Running Benchmarks - -Benchmarks are executed by running the produced binaries. Benchmarks binaries, -by default, accept options that may be specified either through their command -line interface or by setting environment variables before execution. For every -`--option_flag=` CLI switch, a corresponding environment variable -`OPTION_FLAG=` exist and is used as default if set (CLI switches always - prevails). A complete list of CLI options is available running benchmarks - with the `--help` switch. - - - -### Running a Subset of Benchmarks - -The `--benchmark_filter=` option (or `BENCHMARK_FILTER=` -environment variable) can be used to only run the benchmarks that match -the specified ``. For example: - -```bash -$ ./run_benchmarks.x --benchmark_filter=BM_memcpy/32 -Run on (1 X 2300 MHz CPU ) -2016-06-25 19:34:24 -Benchmark Time CPU Iterations ----------------------------------------------------- -BM_memcpy/32 11 ns 11 ns 79545455 -BM_memcpy/32k 2181 ns 2185 ns 324074 -BM_memcpy/32 12 ns 12 ns 54687500 -BM_memcpy/32k 1834 ns 1837 ns 357143 -``` - - - -### Result comparison - -It is possible to compare the benchmarking results. -See [Additional Tooling Documentation](docs/tools.md) - - - -### Extra Context - -Sometimes it's useful to add extra context to the content printed before the -results. By default this section includes information about the CPU on which -the benchmarks are running. If you do want to add more context, you can use -the `benchmark_context` command line flag: - -```bash -$ ./run_benchmarks --benchmark_context=pwd=`pwd` -Run on (1 x 2300 MHz CPU) -pwd: /home/user/benchmark/ -Benchmark Time CPU Iterations ----------------------------------------------------- -BM_memcpy/32 11 ns 11 ns 79545455 -BM_memcpy/32k 2181 ns 2185 ns 324074 -``` - -You can get the same effect with the API: - -```c++ - benchmark::AddCustomContext("foo", "bar"); -``` - -Note that attempts to add a second value with the same key will fail with an -error message. - - - -### Runtime and Reporting Considerations - -When the benchmark binary is executed, each benchmark function is run serially. -The number of iterations to run is determined dynamically by running the -benchmark a few times and measuring the time taken and ensuring that the -ultimate result will be statistically stable. As such, faster benchmark -functions will be run for more iterations than slower benchmark functions, and -the number of iterations is thus reported. - -In all cases, the number of iterations for which the benchmark is run is -governed by the amount of time the benchmark takes. Concretely, the number of -iterations is at least one, not more than 1e9, until CPU time is greater than -the minimum time, or the wallclock time is 5x minimum time. The minimum time is -set per benchmark by calling `MinTime` on the registered benchmark object. - -Average timings are then reported over the iterations run. If multiple -repetitions are requested using the `--benchmark_repetitions` command-line -option, or at registration time, the benchmark function will be run several -times and statistical results across these repetitions will also be reported. - -As well as the per-benchmark entries, a preamble in the report will include -information about the machine on which the benchmarks are run. - - - -### Passing Arguments - -Sometimes a family of benchmarks can be implemented with just one routine that -takes an extra argument to specify which one of the family of benchmarks to -run. For example, the following code defines a family of benchmarks for -measuring the speed of `memcpy()` calls of different lengths: - -```c++ -static void BM_memcpy(benchmark::State& state) { - char* src = new char[state.range(0)]; - char* dst = new char[state.range(0)]; - memset(src, 'x', state.range(0)); - for (auto _ : state) - memcpy(dst, src, state.range(0)); - state.SetBytesProcessed(int64_t(state.iterations()) * - int64_t(state.range(0))); - delete[] src; - delete[] dst; -} -BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10); -``` - -The preceding code is quite repetitive, and can be replaced with the following -short-hand. The following invocation will pick a few appropriate arguments in -the specified range and will generate a benchmark for each such argument. - -```c++ -BENCHMARK(BM_memcpy)->Range(8, 8<<10); -``` - -By default the arguments in the range are generated in multiples of eight and -the command above selects [ 8, 64, 512, 4k, 8k ]. In the following code the -range multiplier is changed to multiples of two. - -```c++ -BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10); -``` - -Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ]. - -The preceding code shows a method of defining a sparse range. The following -example shows a method of defining a dense range. It is then used to benchmark -the performance of `std::vector` initialization for uniformly increasing sizes. - -```c++ -static void BM_DenseRange(benchmark::State& state) { - for(auto _ : state) { - std::vector v(state.range(0), state.range(0)); - benchmark::DoNotOptimize(v.data()); - benchmark::ClobberMemory(); - } -} -BENCHMARK(BM_DenseRange)->DenseRange(0, 1024, 128); -``` - -Now arguments generated are [ 0, 128, 256, 384, 512, 640, 768, 896, 1024 ]. - -You might have a benchmark that depends on two or more inputs. For example, the -following code defines a family of benchmarks for measuring the speed of set -insertion. - -```c++ -static void BM_SetInsert(benchmark::State& state) { - std::set data; - for (auto _ : state) { - state.PauseTiming(); - data = ConstructRandomSet(state.range(0)); - state.ResumeTiming(); - for (int j = 0; j < state.range(1); ++j) - data.insert(RandomNumber()); - } -} -BENCHMARK(BM_SetInsert) - ->Args({1<<10, 128}) - ->Args({2<<10, 128}) - ->Args({4<<10, 128}) - ->Args({8<<10, 128}) - ->Args({1<<10, 512}) - ->Args({2<<10, 512}) - ->Args({4<<10, 512}) - ->Args({8<<10, 512}); -``` - -The preceding code is quite repetitive, and can be replaced with the following -short-hand. The following macro will pick a few appropriate arguments in the -product of the two specified ranges and will generate a benchmark for each such -pair. - -```c++ -BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}}); -``` - -Some benchmarks may require specific argument values that cannot be expressed -with `Ranges`. In this case, `ArgsProduct` offers the ability to generate a -benchmark input for each combination in the product of the supplied vectors. - -```c++ -BENCHMARK(BM_SetInsert) - ->ArgsProduct({{1<<10, 3<<10, 8<<10}, {20, 40, 60, 80}}) -// would generate the same benchmark arguments as -BENCHMARK(BM_SetInsert) - ->Args({1<<10, 20}) - ->Args({3<<10, 20}) - ->Args({8<<10, 20}) - ->Args({3<<10, 40}) - ->Args({8<<10, 40}) - ->Args({1<<10, 40}) - ->Args({1<<10, 60}) - ->Args({3<<10, 60}) - ->Args({8<<10, 60}) - ->Args({1<<10, 80}) - ->Args({3<<10, 80}) - ->Args({8<<10, 80}); -``` - -For more complex patterns of inputs, passing a custom function to `Apply` allows -programmatic specification of an arbitrary set of arguments on which to run the -benchmark. The following example enumerates a dense range on one parameter, -and a sparse range on the second. - -```c++ -static void CustomArguments(benchmark::internal::Benchmark* b) { - for (int i = 0; i <= 10; ++i) - for (int j = 32; j <= 1024*1024; j *= 8) - b->Args({i, j}); -} -BENCHMARK(BM_SetInsert)->Apply(CustomArguments); -``` - -#### Passing Arbitrary Arguments to a Benchmark - -In C++11 it is possible to define a benchmark that takes an arbitrary number -of extra arguments. The `BENCHMARK_CAPTURE(func, test_case_name, ...args)` -macro creates a benchmark that invokes `func` with the `benchmark::State` as -the first argument followed by the specified `args...`. -The `test_case_name` is appended to the name of the benchmark and -should describe the values passed. - -```c++ -template -void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) { - [...] -} -// Registers a benchmark named "BM_takes_args/int_string_test" that passes -// the specified values to `extra_args`. -BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); -``` - -Note that elements of `...args` may refer to global variables. Users should -avoid modifying global state inside of a benchmark. - - - -### Calculating Asymptotic Complexity (Big O) - -Asymptotic complexity might be calculated for a family of benchmarks. The -following code will calculate the coefficient for the high-order term in the -running time and the normalized root-mean square error of string comparison. - -```c++ -static void BM_StringCompare(benchmark::State& state) { - std::string s1(state.range(0), '-'); - std::string s2(state.range(0), '-'); - for (auto _ : state) { - benchmark::DoNotOptimize(s1.compare(s2)); - } - state.SetComplexityN(state.range(0)); -} -BENCHMARK(BM_StringCompare) - ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(benchmark::oN); -``` - -As shown in the following invocation, asymptotic complexity might also be -calculated automatically. - -```c++ -BENCHMARK(BM_StringCompare) - ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(); -``` - -The following code will specify asymptotic complexity with a lambda function, -that might be used to customize high-order term calculation. - -```c++ -BENCHMARK(BM_StringCompare)->RangeMultiplier(2) - ->Range(1<<10, 1<<18)->Complexity([](benchmark::IterationCount n)->double{return n; }); -``` - - - -### Custom Benchmark Name - -You can change the benchmark's name as follows: - -```c++ -BENCHMARK(BM_memcpy)->Name("memcpy")->RangeMultiplier(2)->Range(8, 8<<10); -``` - -The invocation will execute the benchmark as before using `BM_memcpy` but changes -the prefix in the report to `memcpy`. - - - -### Templated Benchmarks - -This example produces and consumes messages of size `sizeof(v)` `range_x` -times. It also outputs throughput in the absence of multiprogramming. - -```c++ -template void BM_Sequential(benchmark::State& state) { - Q q; - typename Q::value_type v; - for (auto _ : state) { - for (int i = state.range(0); i--; ) - q.push(v); - for (int e = state.range(0); e--; ) - q.Wait(&v); - } - // actually messages, not bytes: - state.SetBytesProcessed( - static_cast(state.iterations())*state.range(0)); -} -BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); -``` - -Three macros are provided for adding benchmark templates. - -```c++ -#ifdef BENCHMARK_HAS_CXX11 -#define BENCHMARK_TEMPLATE(func, ...) // Takes any number of parameters. -#else // C++ < C++11 -#define BENCHMARK_TEMPLATE(func, arg1) -#endif -#define BENCHMARK_TEMPLATE1(func, arg1) -#define BENCHMARK_TEMPLATE2(func, arg1, arg2) -``` - - - -### Fixtures - -Fixture tests are created by first defining a type that derives from -`::benchmark::Fixture` and then creating/registering the tests using the -following macros: - -* `BENCHMARK_F(ClassName, Method)` -* `BENCHMARK_DEFINE_F(ClassName, Method)` -* `BENCHMARK_REGISTER_F(ClassName, Method)` - -For Example: - -```c++ -class MyFixture : public benchmark::Fixture { -public: - void SetUp(const ::benchmark::State& state) { - } - - void TearDown(const ::benchmark::State& state) { - } -}; - -BENCHMARK_F(MyFixture, FooTest)(benchmark::State& st) { - for (auto _ : st) { - ... - } -} - -BENCHMARK_DEFINE_F(MyFixture, BarTest)(benchmark::State& st) { - for (auto _ : st) { - ... - } -} -/* BarTest is NOT registered */ -BENCHMARK_REGISTER_F(MyFixture, BarTest)->Threads(2); -/* BarTest is now registered */ -``` - -#### Templated Fixtures - -Also you can create templated fixture by using the following macros: - -* `BENCHMARK_TEMPLATE_F(ClassName, Method, ...)` -* `BENCHMARK_TEMPLATE_DEFINE_F(ClassName, Method, ...)` - -For example: - -```c++ -template -class MyFixture : public benchmark::Fixture {}; - -BENCHMARK_TEMPLATE_F(MyFixture, IntTest, int)(benchmark::State& st) { - for (auto _ : st) { - ... - } -} - -BENCHMARK_TEMPLATE_DEFINE_F(MyFixture, DoubleTest, double)(benchmark::State& st) { - for (auto _ : st) { - ... - } -} - -BENCHMARK_REGISTER_F(MyFixture, DoubleTest)->Threads(2); -``` - - - -### Custom Counters - -You can add your own counters with user-defined names. The example below -will add columns "Foo", "Bar" and "Baz" in its output: - -```c++ -static void UserCountersExample1(benchmark::State& state) { - double numFoos = 0, numBars = 0, numBazs = 0; - for (auto _ : state) { - // ... count Foo,Bar,Baz events - } - state.counters["Foo"] = numFoos; - state.counters["Bar"] = numBars; - state.counters["Baz"] = numBazs; -} -``` - -The `state.counters` object is a `std::map` with `std::string` keys -and `Counter` values. The latter is a `double`-like class, via an implicit -conversion to `double&`. Thus you can use all of the standard arithmetic -assignment operators (`=,+=,-=,*=,/=`) to change the value of each counter. - -In multithreaded benchmarks, each counter is set on the calling thread only. -When the benchmark finishes, the counters from each thread will be summed; -the resulting sum is the value which will be shown for the benchmark. - -The `Counter` constructor accepts three parameters: the value as a `double` -; a bit flag which allows you to show counters as rates, and/or as per-thread -iteration, and/or as per-thread averages, and/or iteration invariants, -and/or finally inverting the result; and a flag specifying the 'unit' - i.e. -is 1k a 1000 (default, `benchmark::Counter::OneK::kIs1000`), or 1024 -(`benchmark::Counter::OneK::kIs1024`)? - -```c++ - // sets a simple counter - state.counters["Foo"] = numFoos; - - // Set the counter as a rate. It will be presented divided - // by the duration of the benchmark. - // Meaning: per one second, how many 'foo's are processed? - state.counters["FooRate"] = Counter(numFoos, benchmark::Counter::kIsRate); - - // Set the counter as a rate. It will be presented divided - // by the duration of the benchmark, and the result inverted. - // Meaning: how many seconds it takes to process one 'foo'? - state.counters["FooInvRate"] = Counter(numFoos, benchmark::Counter::kIsRate | benchmark::Counter::kInvert); - - // Set the counter as a thread-average quantity. It will - // be presented divided by the number of threads. - state.counters["FooAvg"] = Counter(numFoos, benchmark::Counter::kAvgThreads); - - // There's also a combined flag: - state.counters["FooAvgRate"] = Counter(numFoos,benchmark::Counter::kAvgThreadsRate); - - // This says that we process with the rate of state.range(0) bytes every iteration: - state.counters["BytesProcessed"] = Counter(state.range(0), benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1024); -``` - -When you're compiling in C++11 mode or later you can use `insert()` with -`std::initializer_list`: - -```c++ - // With C++11, this can be done: - state.counters.insert({{"Foo", numFoos}, {"Bar", numBars}, {"Baz", numBazs}}); - // ... instead of: - state.counters["Foo"] = numFoos; - state.counters["Bar"] = numBars; - state.counters["Baz"] = numBazs; -``` - -#### Counter Reporting - -When using the console reporter, by default, user counters are printed at -the end after the table, the same way as ``bytes_processed`` and -``items_processed``. This is best for cases in which there are few counters, -or where there are only a couple of lines per benchmark. Here's an example of -the default output: - -``` ------------------------------------------------------------------------------- -Benchmark Time CPU Iterations UserCounters... ------------------------------------------------------------------------------- -BM_UserCounter/threads:8 2248 ns 10277 ns 68808 Bar=16 Bat=40 Baz=24 Foo=8 -BM_UserCounter/threads:1 9797 ns 9788 ns 71523 Bar=2 Bat=5 Baz=3 Foo=1024m -BM_UserCounter/threads:2 4924 ns 9842 ns 71036 Bar=4 Bat=10 Baz=6 Foo=2 -BM_UserCounter/threads:4 2589 ns 10284 ns 68012 Bar=8 Bat=20 Baz=12 Foo=4 -BM_UserCounter/threads:8 2212 ns 10287 ns 68040 Bar=16 Bat=40 Baz=24 Foo=8 -BM_UserCounter/threads:16 1782 ns 10278 ns 68144 Bar=32 Bat=80 Baz=48 Foo=16 -BM_UserCounter/threads:32 1291 ns 10296 ns 68256 Bar=64 Bat=160 Baz=96 Foo=32 -BM_UserCounter/threads:4 2615 ns 10307 ns 68040 Bar=8 Bat=20 Baz=12 Foo=4 -BM_Factorial 26 ns 26 ns 26608979 40320 -BM_Factorial/real_time 26 ns 26 ns 26587936 40320 -BM_CalculatePiRange/1 16 ns 16 ns 45704255 0 -BM_CalculatePiRange/8 73 ns 73 ns 9520927 3.28374 -BM_CalculatePiRange/64 609 ns 609 ns 1140647 3.15746 -BM_CalculatePiRange/512 4900 ns 4901 ns 142696 3.14355 -``` - -If this doesn't suit you, you can print each counter as a table column by -passing the flag `--benchmark_counters_tabular=true` to the benchmark -application. This is best for cases in which there are a lot of counters, or -a lot of lines per individual benchmark. Note that this will trigger a -reprinting of the table header any time the counter set changes between -individual benchmarks. Here's an example of corresponding output when -`--benchmark_counters_tabular=true` is passed: - -``` ---------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations Bar Bat Baz Foo ---------------------------------------------------------------------------------------- -BM_UserCounter/threads:8 2198 ns 9953 ns 70688 16 40 24 8 -BM_UserCounter/threads:1 9504 ns 9504 ns 73787 2 5 3 1 -BM_UserCounter/threads:2 4775 ns 9550 ns 72606 4 10 6 2 -BM_UserCounter/threads:4 2508 ns 9951 ns 70332 8 20 12 4 -BM_UserCounter/threads:8 2055 ns 9933 ns 70344 16 40 24 8 -BM_UserCounter/threads:16 1610 ns 9946 ns 70720 32 80 48 16 -BM_UserCounter/threads:32 1192 ns 9948 ns 70496 64 160 96 32 -BM_UserCounter/threads:4 2506 ns 9949 ns 70332 8 20 12 4 --------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------- -BM_Factorial 26 ns 26 ns 26392245 40320 -BM_Factorial/real_time 26 ns 26 ns 26494107 40320 -BM_CalculatePiRange/1 15 ns 15 ns 45571597 0 -BM_CalculatePiRange/8 74 ns 74 ns 9450212 3.28374 -BM_CalculatePiRange/64 595 ns 595 ns 1173901 3.15746 -BM_CalculatePiRange/512 4752 ns 4752 ns 147380 3.14355 -BM_CalculatePiRange/4k 37970 ns 37972 ns 18453 3.14184 -BM_CalculatePiRange/32k 303733 ns 303744 ns 2305 3.14162 -BM_CalculatePiRange/256k 2434095 ns 2434186 ns 288 3.1416 -BM_CalculatePiRange/1024k 9721140 ns 9721413 ns 71 3.14159 -BM_CalculatePi/threads:8 2255 ns 9943 ns 70936 -``` - -Note above the additional header printed when the benchmark changes from -``BM_UserCounter`` to ``BM_Factorial``. This is because ``BM_Factorial`` does -not have the same counter set as ``BM_UserCounter``. - - - -### Multithreaded Benchmarks - -In a multithreaded test (benchmark invoked by multiple threads simultaneously), -it is guaranteed that none of the threads will start until all have reached -the start of the benchmark loop, and all will have finished before any thread -exits the benchmark loop. (This behavior is also provided by the `KeepRunning()` -API) As such, any global setup or teardown can be wrapped in a check against the thread -index: - -```c++ -static void BM_MultiThreaded(benchmark::State& state) { - if (state.thread_index == 0) { - // Setup code here. - } - for (auto _ : state) { - // Run the test as normal. - } - if (state.thread_index == 0) { - // Teardown code here. - } -} -BENCHMARK(BM_MultiThreaded)->Threads(2); -``` - -If the benchmarked code itself uses threads and you want to compare it to -single-threaded code, you may want to use real-time ("wallclock") measurements -for latency comparisons: - -```c++ -BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime(); -``` - -Without `UseRealTime`, CPU time is used by default. - - - -### CPU Timers - -By default, the CPU timer only measures the time spent by the main thread. -If the benchmark itself uses threads internally, this measurement may not -be what you are looking for. Instead, there is a way to measure the total -CPU usage of the process, by all the threads. - -```c++ -void callee(int i); - -static void MyMain(int size) { -#pragma omp parallel for - for(int i = 0; i < size; i++) - callee(i); -} - -static void BM_OpenMP(benchmark::State& state) { - for (auto _ : state) - MyMain(state.range(0)); -} - -// Measure the time spent by the main thread, use it to decide for how long to -// run the benchmark loop. Depending on the internal implementation detail may -// measure to anywhere from near-zero (the overhead spent before/after work -// handoff to worker thread[s]) to the whole single-thread time. -BENCHMARK(BM_OpenMP)->Range(8, 8<<10); - -// Measure the user-visible time, the wall clock (literally, the time that -// has passed on the clock on the wall), use it to decide for how long to -// run the benchmark loop. This will always be meaningful, an will match the -// time spent by the main thread in single-threaded case, in general decreasing -// with the number of internal threads doing the work. -BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->UseRealTime(); - -// Measure the total CPU consumption, use it to decide for how long to -// run the benchmark loop. This will always measure to no less than the -// time spent by the main thread in single-threaded case. -BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime(); - -// A mixture of the last two. Measure the total CPU consumption, but use the -// wall clock to decide for how long to run the benchmark loop. -BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime()->UseRealTime(); -``` - -#### Controlling Timers - -Normally, the entire duration of the work loop (`for (auto _ : state) {}`) -is measured. But sometimes, it is necessary to do some work inside of -that loop, every iteration, but without counting that time to the benchmark time. -That is possible, although it is not recommended, since it has high overhead. - -```c++ -static void BM_SetInsert_With_Timer_Control(benchmark::State& state) { - std::set data; - for (auto _ : state) { - state.PauseTiming(); // Stop timers. They will not count until they are resumed. - data = ConstructRandomSet(state.range(0)); // Do something that should not be measured - state.ResumeTiming(); // And resume timers. They are now counting again. - // The rest will be measured. - for (int j = 0; j < state.range(1); ++j) - data.insert(RandomNumber()); - } -} -BENCHMARK(BM_SetInsert_With_Timer_Control)->Ranges({{1<<10, 8<<10}, {128, 512}}); -``` - - - -### Manual Timing - -For benchmarking something for which neither CPU time nor real-time are -correct or accurate enough, completely manual timing is supported using -the `UseManualTime` function. - -When `UseManualTime` is used, the benchmarked code must call -`SetIterationTime` once per iteration of the benchmark loop to -report the manually measured time. - -An example use case for this is benchmarking GPU execution (e.g. OpenCL -or CUDA kernels, OpenGL or Vulkan or Direct3D draw calls), which cannot -be accurately measured using CPU time or real-time. Instead, they can be -measured accurately using a dedicated API, and these measurement results -can be reported back with `SetIterationTime`. - -```c++ -static void BM_ManualTiming(benchmark::State& state) { - int microseconds = state.range(0); - std::chrono::duration sleep_duration { - static_cast(microseconds) - }; - - for (auto _ : state) { - auto start = std::chrono::high_resolution_clock::now(); - // Simulate some useful workload with a sleep - std::this_thread::sleep_for(sleep_duration); - auto end = std::chrono::high_resolution_clock::now(); - - auto elapsed_seconds = - std::chrono::duration_cast>( - end - start); - - state.SetIterationTime(elapsed_seconds.count()); - } -} -BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime(); -``` - - - -### Setting the Time Unit - -If a benchmark runs a few milliseconds it may be hard to visually compare the -measured times, since the output data is given in nanoseconds per default. In -order to manually set the time unit, you can specify it manually: - -```c++ -BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); -``` - - - -### Preventing Optimization - -To prevent a value or expression from being optimized away by the compiler -the `benchmark::DoNotOptimize(...)` and `benchmark::ClobberMemory()` -functions can be used. - -```c++ -static void BM_test(benchmark::State& state) { - for (auto _ : state) { - int x = 0; - for (int i=0; i < 64; ++i) { - benchmark::DoNotOptimize(x += i); - } - } -} -``` - -`DoNotOptimize()` forces the *result* of `` to be stored in either -memory or a register. For GNU based compilers it acts as read/write barrier -for global memory. More specifically it forces the compiler to flush pending -writes to memory and reload any other values as necessary. - -Note that `DoNotOptimize()` does not prevent optimizations on `` -in any way. `` may even be removed entirely when the result is already -known. For example: - -```c++ - /* Example 1: `` is removed entirely. */ - int foo(int x) { return x + 42; } - while (...) DoNotOptimize(foo(0)); // Optimized to DoNotOptimize(42); - - /* Example 2: Result of '' is only reused */ - int bar(int) __attribute__((const)); - while (...) DoNotOptimize(bar(0)); // Optimized to: - // int __result__ = bar(0); - // while (...) DoNotOptimize(__result__); -``` - -The second tool for preventing optimizations is `ClobberMemory()`. In essence -`ClobberMemory()` forces the compiler to perform all pending writes to global -memory. Memory managed by block scope objects must be "escaped" using -`DoNotOptimize(...)` before it can be clobbered. In the below example -`ClobberMemory()` prevents the call to `v.push_back(42)` from being optimized -away. - -```c++ -static void BM_vector_push_back(benchmark::State& state) { - for (auto _ : state) { - std::vector v; - v.reserve(1); - benchmark::DoNotOptimize(v.data()); // Allow v.data() to be clobbered. - v.push_back(42); - benchmark::ClobberMemory(); // Force 42 to be written to memory. - } -} -``` - -Note that `ClobberMemory()` is only available for GNU or MSVC based compilers. - - - -### Statistics: Reporting the Mean, Median and Standard Deviation of Repeated Benchmarks - -By default each benchmark is run once and that single result is reported. -However benchmarks are often noisy and a single result may not be representative -of the overall behavior. For this reason it's possible to repeatedly rerun the -benchmark. - -The number of runs of each benchmark is specified globally by the -`--benchmark_repetitions` flag or on a per benchmark basis by calling -`Repetitions` on the registered benchmark object. When a benchmark is run more -than once the mean, median and standard deviation of the runs will be reported. - -Additionally the `--benchmark_report_aggregates_only={true|false}`, -`--benchmark_display_aggregates_only={true|false}` flags or -`ReportAggregatesOnly(bool)`, `DisplayAggregatesOnly(bool)` functions can be -used to change how repeated tests are reported. By default the result of each -repeated run is reported. When `report aggregates only` option is `true`, -only the aggregates (i.e. mean, median and standard deviation, maybe complexity -measurements if they were requested) of the runs is reported, to both the -reporters - standard output (console), and the file. -However when only the `display aggregates only` option is `true`, -only the aggregates are displayed in the standard output, while the file -output still contains everything. -Calling `ReportAggregatesOnly(bool)` / `DisplayAggregatesOnly(bool)` on a -registered benchmark object overrides the value of the appropriate flag for that -benchmark. - - - -### Custom Statistics - -While having mean, median and standard deviation is nice, this may not be -enough for everyone. For example you may want to know what the largest -observation is, e.g. because you have some real-time constraints. This is easy. -The following code will specify a custom statistic to be calculated, defined -by a lambda function. - -```c++ -void BM_spin_empty(benchmark::State& state) { - for (auto _ : state) { - for (int x = 0; x < state.range(0); ++x) { - benchmark::DoNotOptimize(x); - } - } -} - -BENCHMARK(BM_spin_empty) - ->ComputeStatistics("max", [](const std::vector& v) -> double { - return *(std::max_element(std::begin(v), std::end(v))); - }) - ->Arg(512); -``` - - - -### Using RegisterBenchmark(name, fn, args...) - -The `RegisterBenchmark(name, func, args...)` function provides an alternative -way to create and register benchmarks. -`RegisterBenchmark(name, func, args...)` creates, registers, and returns a -pointer to a new benchmark with the specified `name` that invokes -`func(st, args...)` where `st` is a `benchmark::State` object. - -Unlike the `BENCHMARK` registration macros, which can only be used at the global -scope, the `RegisterBenchmark` can be called anywhere. This allows for -benchmark tests to be registered programmatically. - -Additionally `RegisterBenchmark` allows any callable object to be registered -as a benchmark. Including capturing lambdas and function objects. - -For Example: -```c++ -auto BM_test = [](benchmark::State& st, auto Inputs) { /* ... */ }; - -int main(int argc, char** argv) { - for (auto& test_input : { /* ... */ }) - benchmark::RegisterBenchmark(test_input.name(), BM_test, test_input); - benchmark::Initialize(&argc, argv); - benchmark::RunSpecifiedBenchmarks(); - benchmark::Shutdown(); -} -``` - - - -### Exiting with an Error - -When errors caused by external influences, such as file I/O and network -communication, occur within a benchmark the -`State::SkipWithError(const char* msg)` function can be used to skip that run -of benchmark and report the error. Note that only future iterations of the -`KeepRunning()` are skipped. For the ranged-for version of the benchmark loop -Users must explicitly exit the loop, otherwise all iterations will be performed. -Users may explicitly return to exit the benchmark immediately. - -The `SkipWithError(...)` function may be used at any point within the benchmark, -including before and after the benchmark loop. Moreover, if `SkipWithError(...)` -has been used, it is not required to reach the benchmark loop and one may return -from the benchmark function early. - -For example: - -```c++ -static void BM_test(benchmark::State& state) { - auto resource = GetResource(); - if (!resource.good()) { - state.SkipWithError("Resource is not good!"); - // KeepRunning() loop will not be entered. - } - while (state.KeepRunning()) { - auto data = resource.read_data(); - if (!resource.good()) { - state.SkipWithError("Failed to read data!"); - break; // Needed to skip the rest of the iteration. - } - do_stuff(data); - } -} - -static void BM_test_ranged_fo(benchmark::State & state) { - auto resource = GetResource(); - if (!resource.good()) { - state.SkipWithError("Resource is not good!"); - return; // Early return is allowed when SkipWithError() has been used. - } - for (auto _ : state) { - auto data = resource.read_data(); - if (!resource.good()) { - state.SkipWithError("Failed to read data!"); - break; // REQUIRED to prevent all further iterations. - } - do_stuff(data); - } -} -``` - - -### A Faster KeepRunning Loop - -In C++11 mode, a ranged-based for loop should be used in preference to -the `KeepRunning` loop for running the benchmarks. For example: - -```c++ -static void BM_Fast(benchmark::State &state) { - for (auto _ : state) { - FastOperation(); - } -} -BENCHMARK(BM_Fast); -``` - -The reason the ranged-for loop is faster than using `KeepRunning`, is -because `KeepRunning` requires a memory load and store of the iteration count -ever iteration, whereas the ranged-for variant is able to keep the iteration count -in a register. - -For example, an empty inner loop of using the ranged-based for method looks like: - -```asm -# Loop Init - mov rbx, qword ptr [r14 + 104] - call benchmark::State::StartKeepRunning() - test rbx, rbx - je .LoopEnd -.LoopHeader: # =>This Inner Loop Header: Depth=1 - add rbx, -1 - jne .LoopHeader -.LoopEnd: -``` - -Compared to an empty `KeepRunning` loop, which looks like: - -```asm -.LoopHeader: # in Loop: Header=BB0_3 Depth=1 - cmp byte ptr [rbx], 1 - jne .LoopInit -.LoopBody: # =>This Inner Loop Header: Depth=1 - mov rax, qword ptr [rbx + 8] - lea rcx, [rax + 1] - mov qword ptr [rbx + 8], rcx - cmp rax, qword ptr [rbx + 104] - jb .LoopHeader - jmp .LoopEnd -.LoopInit: - mov rdi, rbx - call benchmark::State::StartKeepRunning() - jmp .LoopBody -.LoopEnd: -``` - -Unless C++03 compatibility is required, the ranged-for variant of writing -the benchmark loop should be preferred. - - - -### Disabling CPU Frequency Scaling - -If you see this error: - -``` -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -``` - -you might want to disable the CPU frequency scaling while running the benchmark: - -```bash -sudo cpupower frequency-set --governor performance -./mybench -sudo cpupower frequency-set --governor powersave -``` diff --git a/libcxx/utils/google-benchmark/WORKSPACE b/libcxx/utils/google-benchmark/WORKSPACE deleted file mode 100644 index 631f3ba05de5..000000000000 --- a/libcxx/utils/google-benchmark/WORKSPACE +++ /dev/null @@ -1,51 +0,0 @@ -workspace(name = "com_github_google_benchmark") - -load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") - -http_archive( - name = "rules_cc", - strip_prefix = "rules_cc-a508235df92e71d537fcbae0c7c952ea6957a912", - urls = ["https://github.com/bazelbuild/rules_cc/archive/a508235df92e71d537fcbae0c7c952ea6957a912.zip"], - sha256 = "d7dc12c1d5bc1a87474de8e3d17b7731a4dcebcfb8aa3990fe8ac7734ef12f2f", -) - -http_archive( - name = "com_google_absl", - sha256 = "f41868f7a938605c92936230081175d1eae87f6ea2c248f41077c8f88316f111", - strip_prefix = "abseil-cpp-20200225.2", - urls = ["https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz"], -) - -http_archive( - name = "com_google_googletest", - strip_prefix = "googletest-3f0cf6b62ad1eb50d8736538363d3580dd640c3e", - urls = ["https://github.com/google/googletest/archive/3f0cf6b62ad1eb50d8736538363d3580dd640c3e.zip"], - sha256 = "8f827dd550db8b4fdf73904690df0be9fccc161017c9038a724bc9a0617a1bc8", -) - -http_archive( - name = "pybind11", - build_file = "@//bindings/python:pybind11.BUILD", - sha256 = "1eed57bc6863190e35637290f97a20c81cfe4d9090ac0a24f3bbf08f265eb71d", - strip_prefix = "pybind11-2.4.3", - urls = ["https://github.com/pybind/pybind11/archive/v2.4.3.tar.gz"], -) - -new_local_repository( - name = "python_headers", - build_file = "@//bindings/python:python_headers.BUILD", - path = "/usr/include/python3.6", # May be overwritten by setup.py. -) - -http_archive( - name = "rules_python", - url = "https://github.com/bazelbuild/rules_python/releases/download/0.1.0/rules_python-0.1.0.tar.gz", - sha256 = "b6d46438523a3ec0f3cead544190ee13223a52f6a6765a29eae7b7cc24cc83a0", -) - -load("@rules_python//python:pip.bzl", pip3_install="pip_install") - -pip3_install( - name = "py_deps", - requirements = "//:requirements.txt", -) diff --git a/libcxx/utils/google-benchmark/_config.yml b/libcxx/utils/google-benchmark/_config.yml deleted file mode 100644 index 1fa5ff852bda..000000000000 --- a/libcxx/utils/google-benchmark/_config.yml +++ /dev/null @@ -1,2 +0,0 @@ -theme: jekyll-theme-midnight -markdown: GFM diff --git a/libcxx/utils/google-benchmark/appveyor.yml b/libcxx/utils/google-benchmark/appveyor.yml deleted file mode 100644 index 81da955f0281..000000000000 --- a/libcxx/utils/google-benchmark/appveyor.yml +++ /dev/null @@ -1,50 +0,0 @@ -version: '{build}' - -image: Visual Studio 2017 - -configuration: - - Debug - - Release - -environment: - matrix: - - compiler: msvc-15-seh - generator: "Visual Studio 15 2017" - - - compiler: msvc-15-seh - generator: "Visual Studio 15 2017 Win64" - - - compiler: msvc-14-seh - generator: "Visual Studio 14 2015" - - - compiler: msvc-14-seh - generator: "Visual Studio 14 2015 Win64" - - - compiler: gcc-5.3.0-posix - generator: "MinGW Makefiles" - cxx_path: 'C:\mingw-w64\i686-5.3.0-posix-dwarf-rt_v4-rev0\mingw32\bin' - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 - -matrix: - fast_finish: true - -install: - # git bash conflicts with MinGW makefiles - - if "%generator%"=="MinGW Makefiles" (set "PATH=%PATH:C:\Program Files\Git\usr\bin;=%") - - if not "%cxx_path%"=="" (set "PATH=%PATH%;%cxx_path%") - -build_script: - - md _build -Force - - cd _build - - echo %configuration% - - cmake -G "%generator%" "-DCMAKE_BUILD_TYPE=%configuration%" -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON .. - - cmake --build . --config %configuration% - -test_script: - - ctest --build-config %configuration% --timeout 300 --output-on-failure - -artifacts: - - path: '_build/CMakeFiles/*.log' - name: logs - - path: '_build/Testing/**/*.xml' - name: test_results diff --git a/libcxx/utils/google-benchmark/bindings/python/BUILD b/libcxx/utils/google-benchmark/bindings/python/BUILD deleted file mode 100644 index 9559a76b30a9..000000000000 --- a/libcxx/utils/google-benchmark/bindings/python/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -exports_files(glob(["*.BUILD"])) -exports_files(["build_defs.bzl"]) - diff --git a/libcxx/utils/google-benchmark/bindings/python/build_defs.bzl b/libcxx/utils/google-benchmark/bindings/python/build_defs.bzl deleted file mode 100644 index 45907aaa5e2d..000000000000 --- a/libcxx/utils/google-benchmark/bindings/python/build_defs.bzl +++ /dev/null @@ -1,25 +0,0 @@ -_SHARED_LIB_SUFFIX = { - "//conditions:default": ".so", - "//:windows": ".dll", -} - -def py_extension(name, srcs, hdrs = [], copts = [], features = [], deps = []): - for shared_lib_suffix in _SHARED_LIB_SUFFIX.values(): - shared_lib_name = name + shared_lib_suffix - native.cc_binary( - name = shared_lib_name, - linkshared = 1, - linkstatic = 1, - srcs = srcs + hdrs, - copts = copts, - features = features, - deps = deps, - ) - - return native.py_library( - name = name, - data = select({ - platform: [name + shared_lib_suffix] - for platform, shared_lib_suffix in _SHARED_LIB_SUFFIX.items() - }), - ) diff --git a/libcxx/utils/google-benchmark/bindings/python/google_benchmark/BUILD b/libcxx/utils/google-benchmark/bindings/python/google_benchmark/BUILD deleted file mode 100644 index 3c1561f48eee..000000000000 --- a/libcxx/utils/google-benchmark/bindings/python/google_benchmark/BUILD +++ /dev/null @@ -1,38 +0,0 @@ -load("//bindings/python:build_defs.bzl", "py_extension") - -py_library( - name = "google_benchmark", - srcs = ["__init__.py"], - visibility = ["//visibility:public"], - deps = [ - ":_benchmark", - # pip; absl:app - ], -) - -py_extension( - name = "_benchmark", - srcs = ["benchmark.cc"], - copts = [ - "-fexceptions", - "-fno-strict-aliasing", - ], - features = ["-use_header_modules"], - deps = [ - "//:benchmark", - "@pybind11", - "@python_headers", - ], -) - -py_test( - name = "example", - srcs = ["example.py"], - python_version = "PY3", - srcs_version = "PY3", - visibility = ["//visibility:public"], - deps = [ - ":google_benchmark", - ], -) - diff --git a/libcxx/utils/google-benchmark/bindings/python/google_benchmark/__init__.py b/libcxx/utils/google-benchmark/bindings/python/google_benchmark/__init__.py deleted file mode 100644 index 1055bf241856..000000000000 --- a/libcxx/utils/google-benchmark/bindings/python/google_benchmark/__init__.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright 2020 Google Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Python benchmarking utilities. - -Example usage: - import google_benchmark as benchmark - - @benchmark.register - def my_benchmark(state): - ... # Code executed outside `while` loop is not timed. - - while state: - ... # Code executed within `while` loop is timed. - - if __name__ == '__main__': - benchmark.main() -""" - -from absl import app -from google_benchmark import _benchmark -from google_benchmark._benchmark import ( - Counter, - kNanosecond, - kMicrosecond, - kMillisecond, - kSecond, - oNone, - o1, - oN, - oNSquared, - oNCubed, - oLogN, - oNLogN, - oAuto, - oLambda, -) - - -__all__ = [ - "register", - "main", - "Counter", - "kNanosecond", - "kMicrosecond", - "kMillisecond", - "kSecond", - "oNone", - "o1", - "oN", - "oNSquared", - "oNCubed", - "oLogN", - "oNLogN", - "oAuto", - "oLambda", -] - -__version__ = "0.2.0" - - -class __OptionMaker: - """A stateless class to collect benchmark options. - - Collect all decorator calls like @option.range(start=0, limit=1<<5). - """ - - class Options: - """Pure data class to store options calls, along with the benchmarked function.""" - - def __init__(self, func): - self.func = func - self.builder_calls = [] - - @classmethod - def make(cls, func_or_options): - """Make Options from Options or the benchmarked function.""" - if isinstance(func_or_options, cls.Options): - return func_or_options - return cls.Options(func_or_options) - - def __getattr__(self, builder_name): - """Append option call in the Options.""" - - # The function that get returned on @option.range(start=0, limit=1<<5). - def __builder_method(*args, **kwargs): - - # The decorator that get called, either with the benchmared function - # or the previous Options - def __decorator(func_or_options): - options = self.make(func_or_options) - options.builder_calls.append((builder_name, args, kwargs)) - # The decorator returns Options so it is not technically a decorator - # and needs a final call to @regiser - return options - - return __decorator - - return __builder_method - - -# Alias for nicer API. -# We have to instantiate an object, even if stateless, to be able to use __getattr__ -# on option.range -option = __OptionMaker() - - -def register(undefined=None, *, name=None): - """Register function for benchmarking.""" - if undefined is None: - # Decorator is called without parenthesis so we return a decorator - return lambda f: register(f, name=name) - - # We have either the function to benchmark (simple case) or an instance of Options - # (@option._ case). - options = __OptionMaker.make(undefined) - - if name is None: - name = options.func.__name__ - - # We register the benchmark and reproduce all the @option._ calls onto the - # benchmark builder pattern - benchmark = _benchmark.RegisterBenchmark(name, options.func) - for name, args, kwargs in options.builder_calls[::-1]: - getattr(benchmark, name)(*args, **kwargs) - - # return the benchmarked function because the decorator does not modify it - return options.func - - -def _flags_parser(argv): - argv = _benchmark.Initialize(argv) - return app.parse_flags_with_usage(argv) - - -def _run_benchmarks(argv): - if len(argv) > 1: - raise app.UsageError("Too many command-line arguments.") - return _benchmark.RunSpecifiedBenchmarks() - - -def main(argv=None): - return app.run(_run_benchmarks, argv=argv, flags_parser=_flags_parser) - - -# Methods for use with custom main function. -initialize = _benchmark.Initialize -run_benchmarks = _benchmark.RunSpecifiedBenchmarks diff --git a/libcxx/utils/google-benchmark/bindings/python/google_benchmark/benchmark.cc b/libcxx/utils/google-benchmark/bindings/python/google_benchmark/benchmark.cc deleted file mode 100644 index 1b01fe7f7f0f..000000000000 --- a/libcxx/utils/google-benchmark/bindings/python/google_benchmark/benchmark.cc +++ /dev/null @@ -1,181 +0,0 @@ -// Benchmark for Python. - -#include -#include -#include - -#include "pybind11/operators.h" -#include "pybind11/pybind11.h" -#include "pybind11/stl.h" -#include "pybind11/stl_bind.h" - -#include "benchmark/benchmark.h" - -PYBIND11_MAKE_OPAQUE(benchmark::UserCounters); - -namespace { -namespace py = ::pybind11; - -std::vector Initialize(const std::vector& argv) { - // The `argv` pointers here become invalid when this function returns, but - // benchmark holds the pointer to `argv[0]`. We create a static copy of it - // so it persists, and replace the pointer below. - static std::string executable_name(argv[0]); - std::vector ptrs; - ptrs.reserve(argv.size()); - for (auto& arg : argv) { - ptrs.push_back(const_cast(arg.c_str())); - } - ptrs[0] = const_cast(executable_name.c_str()); - int argc = static_cast(argv.size()); - benchmark::Initialize(&argc, ptrs.data()); - std::vector remaining_argv; - remaining_argv.reserve(argc); - for (int i = 0; i < argc; ++i) { - remaining_argv.emplace_back(ptrs[i]); - } - return remaining_argv; -} - -benchmark::internal::Benchmark* RegisterBenchmark(const char* name, - py::function f) { - return benchmark::RegisterBenchmark( - name, [f](benchmark::State& state) { f(&state); }); -} - -PYBIND11_MODULE(_benchmark, m) { - using benchmark::TimeUnit; - py::enum_(m, "TimeUnit") - .value("kNanosecond", TimeUnit::kNanosecond) - .value("kMicrosecond", TimeUnit::kMicrosecond) - .value("kMillisecond", TimeUnit::kMillisecond) - .value("kSecond", TimeUnit::kSecond) - .export_values(); - - using benchmark::BigO; - py::enum_(m, "BigO") - .value("oNone", BigO::oNone) - .value("o1", BigO::o1) - .value("oN", BigO::oN) - .value("oNSquared", BigO::oNSquared) - .value("oNCubed", BigO::oNCubed) - .value("oLogN", BigO::oLogN) - .value("oNLogN", BigO::oLogN) - .value("oAuto", BigO::oAuto) - .value("oLambda", BigO::oLambda) - .export_values(); - - using benchmark::internal::Benchmark; - py::class_(m, "Benchmark") - // For methods returning a pointer tor the current object, reference - // return policy is used to ask pybind not to take ownership oof the - // returned object and avoid calling delete on it. - // https://pybind11.readthedocs.io/en/stable/advanced/functions.html#return-value-policies - // - // For methods taking a const std::vector<...>&, a copy is created - // because a it is bound to a Python list. - // https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html - .def("unit", &Benchmark::Unit, py::return_value_policy::reference) - .def("arg", &Benchmark::Arg, py::return_value_policy::reference) - .def("args", &Benchmark::Args, py::return_value_policy::reference) - .def("range", &Benchmark::Range, py::return_value_policy::reference, - py::arg("start"), py::arg("limit")) - .def("dense_range", &Benchmark::DenseRange, - py::return_value_policy::reference, py::arg("start"), - py::arg("limit"), py::arg("step") = 1) - .def("ranges", &Benchmark::Ranges, py::return_value_policy::reference) - .def("args_product", &Benchmark::ArgsProduct, - py::return_value_policy::reference) - .def("arg_name", &Benchmark::ArgName, py::return_value_policy::reference) - .def("arg_names", &Benchmark::ArgNames, - py::return_value_policy::reference) - .def("range_pair", &Benchmark::RangePair, - py::return_value_policy::reference, py::arg("lo1"), py::arg("hi1"), - py::arg("lo2"), py::arg("hi2")) - .def("range_multiplier", &Benchmark::RangeMultiplier, - py::return_value_policy::reference) - .def("min_time", &Benchmark::MinTime, py::return_value_policy::reference) - .def("iterations", &Benchmark::Iterations, - py::return_value_policy::reference) - .def("repetitions", &Benchmark::Repetitions, - py::return_value_policy::reference) - .def("report_aggregates_only", &Benchmark::ReportAggregatesOnly, - py::return_value_policy::reference, py::arg("value") = true) - .def("display_aggregates_only", &Benchmark::DisplayAggregatesOnly, - py::return_value_policy::reference, py::arg("value") = true) - .def("measure_process_cpu_time", &Benchmark::MeasureProcessCPUTime, - py::return_value_policy::reference) - .def("use_real_time", &Benchmark::UseRealTime, - py::return_value_policy::reference) - .def("use_manual_time", &Benchmark::UseManualTime, - py::return_value_policy::reference) - .def( - "complexity", - (Benchmark * (Benchmark::*)(benchmark::BigO)) & Benchmark::Complexity, - py::return_value_policy::reference, - py::arg("complexity") = benchmark::oAuto); - - using benchmark::Counter; - py::class_ py_counter(m, "Counter"); - - py::enum_(py_counter, "Flags") - .value("kDefaults", Counter::Flags::kDefaults) - .value("kIsRate", Counter::Flags::kIsRate) - .value("kAvgThreads", Counter::Flags::kAvgThreads) - .value("kAvgThreadsRate", Counter::Flags::kAvgThreadsRate) - .value("kIsIterationInvariant", Counter::Flags::kIsIterationInvariant) - .value("kIsIterationInvariantRate", - Counter::Flags::kIsIterationInvariantRate) - .value("kAvgIterations", Counter::Flags::kAvgIterations) - .value("kAvgIterationsRate", Counter::Flags::kAvgIterationsRate) - .value("kInvert", Counter::Flags::kInvert) - .export_values() - .def(py::self | py::self); - - py::enum_(py_counter, "OneK") - .value("kIs1000", Counter::OneK::kIs1000) - .value("kIs1024", Counter::OneK::kIs1024) - .export_values(); - - py_counter - .def(py::init(), - py::arg("value") = 0., py::arg("flags") = Counter::kDefaults, - py::arg("k") = Counter::kIs1000) - .def(py::init([](double value) { return Counter(value); })) - .def_readwrite("value", &Counter::value) - .def_readwrite("flags", &Counter::flags) - .def_readwrite("oneK", &Counter::oneK); - py::implicitly_convertible(); - py::implicitly_convertible(); - - py::bind_map(m, "UserCounters"); - - using benchmark::State; - py::class_(m, "State") - .def("__bool__", &State::KeepRunning) - .def_property_readonly("keep_running", &State::KeepRunning) - .def("pause_timing", &State::PauseTiming) - .def("resume_timing", &State::ResumeTiming) - .def("skip_with_error", &State::SkipWithError) - .def_property_readonly("error_occurred", &State::error_occurred) - .def("set_iteration_time", &State::SetIterationTime) - .def_property("bytes_processed", &State::bytes_processed, - &State::SetBytesProcessed) - .def_property("complexity_n", &State::complexity_length_n, - &State::SetComplexityN) - .def_property("items_processed", &State::items_processed, - &State::SetItemsProcessed) - .def("set_label", (void (State::*)(const char*)) & State::SetLabel) - .def("range", &State::range, py::arg("pos") = 0) - .def_property_readonly("iterations", &State::iterations) - .def_readwrite("counters", &State::counters) - .def_readonly("thread_index", &State::thread_index) - .def_readonly("threads", &State::threads); - - m.def("Initialize", Initialize); - m.def("RegisterBenchmark", RegisterBenchmark, - py::return_value_policy::reference); - m.def("RunSpecifiedBenchmarks", - []() { benchmark::RunSpecifiedBenchmarks(); }); -}; -} // namespace diff --git a/libcxx/utils/google-benchmark/bindings/python/google_benchmark/example.py b/libcxx/utils/google-benchmark/bindings/python/google_benchmark/example.py deleted file mode 100644 index 9134e8cffeaf..000000000000 --- a/libcxx/utils/google-benchmark/bindings/python/google_benchmark/example.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright 2020 Google Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Example of Python using C++ benchmark framework. - -To run this example, you must first install the `google_benchmark` Python package. - -To install using `setup.py`, download and extract the `google_benchmark` source. -In the extracted directory, execute: - python setup.py install -""" - -import random -import time - -import google_benchmark as benchmark -from google_benchmark import Counter - - -@benchmark.register -def empty(state): - while state: - pass - - -@benchmark.register -def sum_million(state): - while state: - sum(range(1_000_000)) - -@benchmark.register -def pause_timing(state): - """Pause timing every iteration.""" - while state: - # Construct a list of random ints every iteration without timing it - state.pause_timing() - random_list = [random.randint(0, 100) for _ in range(100)] - state.resume_timing() - # Time the in place sorting algorithm - random_list.sort() - - -@benchmark.register -def skipped(state): - if True: # Test some predicate here. - state.skip_with_error("some error") - return # NOTE: You must explicitly return, or benchmark will continue. - - ... # Benchmark code would be here. - - -@benchmark.register -def manual_timing(state): - while state: - # Manually count Python CPU time - start = time.perf_counter() # perf_counter_ns() in Python 3.7+ - # Something to benchmark - time.sleep(0.01) - end = time.perf_counter() - state.set_iteration_time(end - start) - - -@benchmark.register -def custom_counters(state): - """Collect cutom metric using benchmark.Counter.""" - num_foo = 0.0 - while state: - # Benchmark some code here - pass - # Collect some custom metric named foo - num_foo += 0.13 - - # Automatic Counter from numbers. - state.counters["foo"] = num_foo - # Set a counter as a rate. - state.counters["foo_rate"] = Counter(num_foo, Counter.kIsRate) - # Set a counter as an inverse of rate. - state.counters["foo_inv_rate"] = Counter(num_foo, Counter.kIsRate | Counter.kInvert) - # Set a counter as a thread-average quantity. - state.counters["foo_avg"] = Counter(num_foo, Counter.kAvgThreads) - # There's also a combined flag: - state.counters["foo_avg_rate"] = Counter(num_foo, Counter.kAvgThreadsRate) - - -@benchmark.register -@benchmark.option.measure_process_cpu_time() -@benchmark.option.use_real_time() -def with_options(state): - while state: - sum(range(1_000_000)) - - -@benchmark.register(name="sum_million_microseconds") -@benchmark.option.unit(benchmark.kMicrosecond) -def with_options(state): - while state: - sum(range(1_000_000)) - - -@benchmark.register -@benchmark.option.arg(100) -@benchmark.option.arg(1000) -def passing_argument(state): - while state: - sum(range(state.range(0))) - - -@benchmark.register -@benchmark.option.range(8, limit=8 << 10) -def using_range(state): - while state: - sum(range(state.range(0))) - - -@benchmark.register -@benchmark.option.range_multiplier(2) -@benchmark.option.range(1 << 10, 1 << 18) -@benchmark.option.complexity(benchmark.oN) -def computing_complexity(state): - while state: - sum(range(state.range(0))) - state.complexity_n = state.range(0) - - -if __name__ == "__main__": - benchmark.main() diff --git a/libcxx/utils/google-benchmark/bindings/python/pybind11.BUILD b/libcxx/utils/google-benchmark/bindings/python/pybind11.BUILD deleted file mode 100644 index bc833500383a..000000000000 --- a/libcxx/utils/google-benchmark/bindings/python/pybind11.BUILD +++ /dev/null @@ -1,20 +0,0 @@ -cc_library( - name = "pybind11", - hdrs = glob( - include = [ - "include/pybind11/*.h", - "include/pybind11/detail/*.h", - ], - exclude = [ - "include/pybind11/common.h", - "include/pybind11/eigen.h", - ], - ), - copts = [ - "-fexceptions", - "-Wno-undefined-inline", - "-Wno-pragma-once-outside-header", - ], - includes = ["include"], - visibility = ["//visibility:public"], -) diff --git a/libcxx/utils/google-benchmark/bindings/python/python_headers.BUILD b/libcxx/utils/google-benchmark/bindings/python/python_headers.BUILD deleted file mode 100644 index 9c34cf6ca4bd..000000000000 --- a/libcxx/utils/google-benchmark/bindings/python/python_headers.BUILD +++ /dev/null @@ -1,6 +0,0 @@ -cc_library( - name = "python_headers", - hdrs = glob(["**/*.h"]), - includes = ["."], - visibility = ["//visibility:public"], -) diff --git a/libcxx/utils/google-benchmark/bindings/python/requirements.txt b/libcxx/utils/google-benchmark/bindings/python/requirements.txt deleted file mode 100644 index f5bbe7eca5ce..000000000000 --- a/libcxx/utils/google-benchmark/bindings/python/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -absl-py>=0.7.1 - diff --git a/libcxx/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake b/libcxx/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake deleted file mode 100644 index 858589e9775c..000000000000 --- a/libcxx/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake +++ /dev/null @@ -1,78 +0,0 @@ -# - Adds a compiler flag if it is supported by the compiler -# -# This function checks that the supplied compiler flag is supported and then -# adds it to the corresponding compiler flags -# -# add_cxx_compiler_flag( []) -# -# - Example -# -# include(AddCXXCompilerFlag) -# add_cxx_compiler_flag(-Wall) -# add_cxx_compiler_flag(-no-strict-aliasing RELEASE) -# Requires CMake 2.6+ - -if(__add_cxx_compiler_flag) - return() -endif() -set(__add_cxx_compiler_flag INCLUDED) - -include(CheckCXXCompilerFlag) - -function(mangle_compiler_flag FLAG OUTPUT) - string(TOUPPER "HAVE_CXX_FLAG_${FLAG}" SANITIZED_FLAG) - string(REPLACE "+" "X" SANITIZED_FLAG ${SANITIZED_FLAG}) - string(REGEX REPLACE "[^A-Za-z_0-9]" "_" SANITIZED_FLAG ${SANITIZED_FLAG}) - string(REGEX REPLACE "_+" "_" SANITIZED_FLAG ${SANITIZED_FLAG}) - set(${OUTPUT} "${SANITIZED_FLAG}" PARENT_SCOPE) -endfunction(mangle_compiler_flag) - -function(add_cxx_compiler_flag FLAG) - mangle_compiler_flag("${FLAG}" MANGLED_FLAG) - set(OLD_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") - set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${FLAG}") - check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG}) - set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}") - if(${MANGLED_FLAG}) - if(ARGC GREATER 1) - set(VARIANT ${ARGV1}) - string(TOUPPER "_${VARIANT}" VARIANT) - else() - set(VARIANT "") - endif() - set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${BENCHMARK_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE) - endif() -endfunction() - -function(add_required_cxx_compiler_flag FLAG) - mangle_compiler_flag("${FLAG}" MANGLED_FLAG) - set(OLD_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") - set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${FLAG}") - check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG}) - set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}") - if(${MANGLED_FLAG}) - if(ARGC GREATER 1) - set(VARIANT ${ARGV1}) - string(TOUPPER "_${VARIANT}" VARIANT) - else() - set(VARIANT "") - endif() - set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE) - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE) - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE) - set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE) - set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${FLAG}" PARENT_SCOPE) - else() - message(FATAL_ERROR "Required flag '${FLAG}' is not supported by the compiler") - endif() -endfunction() - -function(check_cxx_warning_flag FLAG) - mangle_compiler_flag("${FLAG}" MANGLED_FLAG) - set(OLD_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") - # Add -Werror to ensure the compiler generates an error if the warning flag - # doesn't exist. - set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror ${FLAG}") - check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG}) - set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}") -endfunction() diff --git a/libcxx/utils/google-benchmark/cmake/CXXFeatureCheck.cmake b/libcxx/utils/google-benchmark/cmake/CXXFeatureCheck.cmake deleted file mode 100644 index 62e6741fe3de..000000000000 --- a/libcxx/utils/google-benchmark/cmake/CXXFeatureCheck.cmake +++ /dev/null @@ -1,69 +0,0 @@ -# - Compile and run code to check for C++ features -# -# This functions compiles a source file under the `cmake` folder -# and adds the corresponding `HAVE_[FILENAME]` flag to the CMake -# environment -# -# cxx_feature_check( []) -# -# - Example -# -# include(CXXFeatureCheck) -# cxx_feature_check(STD_REGEX) -# Requires CMake 2.8.12+ - -if(__cxx_feature_check) - return() -endif() -set(__cxx_feature_check INCLUDED) - -function(cxx_feature_check FILE) - string(TOLOWER ${FILE} FILE) - string(TOUPPER ${FILE} VAR) - string(TOUPPER "HAVE_${VAR}" FEATURE) - if (DEFINED HAVE_${VAR}) - set(HAVE_${VAR} 1 PARENT_SCOPE) - add_definitions(-DHAVE_${VAR}) - return() - endif() - - if (ARGC GREATER 1) - message(STATUS "Enabling additional flags: ${ARGV1}") - list(APPEND BENCHMARK_CXX_LINKER_FLAGS ${ARGV1}) - endif() - - if (NOT DEFINED COMPILE_${FEATURE}) - message(STATUS "Performing Test ${FEATURE}") - if(CMAKE_CROSSCOMPILING) - try_compile(COMPILE_${FEATURE} - ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp - CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS} - LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}) - if(COMPILE_${FEATURE}) - message(WARNING - "If you see build failures due to cross compilation, try setting HAVE_${VAR} to 0") - set(RUN_${FEATURE} 0 CACHE INTERNAL "") - else() - set(RUN_${FEATURE} 1 CACHE INTERNAL "") - endif() - else() - message(STATUS "Performing Test ${FEATURE}") - try_run(RUN_${FEATURE} COMPILE_${FEATURE} - ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp - CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS} - LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}) - endif() - endif() - - if(RUN_${FEATURE} EQUAL 0) - message(STATUS "Performing Test ${FEATURE} -- success") - set(HAVE_${VAR} 1 PARENT_SCOPE) - add_definitions(-DHAVE_${VAR}) - else() - if(NOT COMPILE_${FEATURE}) - message(STATUS "Performing Test ${FEATURE} -- failed to compile") - else() - message(STATUS "Performing Test ${FEATURE} -- compiled but failed to run") - endif() - endif() -endfunction() diff --git a/libcxx/utils/google-benchmark/cmake/Config.cmake.in b/libcxx/utils/google-benchmark/cmake/Config.cmake.in deleted file mode 100644 index 6e9256eea8a2..000000000000 --- a/libcxx/utils/google-benchmark/cmake/Config.cmake.in +++ /dev/null @@ -1 +0,0 @@ -include("${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake") diff --git a/libcxx/utils/google-benchmark/cmake/GetGitVersion.cmake b/libcxx/utils/google-benchmark/cmake/GetGitVersion.cmake deleted file mode 100644 index 04a1f9b70d68..000000000000 --- a/libcxx/utils/google-benchmark/cmake/GetGitVersion.cmake +++ /dev/null @@ -1,58 +0,0 @@ -# - Returns a version string from Git tags -# -# This function inspects the annotated git tags for the project and returns a string -# into a CMake variable -# -# get_git_version() -# -# - Example -# -# include(GetGitVersion) -# get_git_version(GIT_VERSION) -# -# Requires CMake 2.8.11+ -find_package(Git) - -if(__get_git_version) - return() -endif() -set(__get_git_version INCLUDED) - -function(get_git_version var) - if(GIT_EXECUTABLE) - execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8 - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - RESULT_VARIABLE status - OUTPUT_VARIABLE GIT_DESCRIBE_VERSION - ERROR_QUIET) - if(status) - set(GIT_DESCRIBE_VERSION "v0.0.0") - endif() - - string(STRIP ${GIT_DESCRIBE_VERSION} GIT_DESCRIBE_VERSION) - if(GIT_DESCRIBE_VERSION MATCHES v[^-]*-) - string(REGEX REPLACE "v([^-]*)-([0-9]+)-.*" "\\1.\\2" GIT_VERSION ${GIT_DESCRIBE_VERSION}) - else() - string(REGEX REPLACE "v(.*)" "\\1" GIT_VERSION ${GIT_DESCRIBE_VERSION}) - endif() - - # Work out if the repository is dirty - execute_process(COMMAND ${GIT_EXECUTABLE} update-index -q --refresh - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - OUTPUT_QUIET - ERROR_QUIET) - execute_process(COMMAND ${GIT_EXECUTABLE} diff-index --name-only HEAD -- - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - OUTPUT_VARIABLE GIT_DIFF_INDEX - ERROR_QUIET) - string(COMPARE NOTEQUAL "${GIT_DIFF_INDEX}" "" GIT_DIRTY) - if (${GIT_DIRTY}) - set(GIT_DESCRIBE_VERSION "${GIT_DESCRIBE_VERSION}-dirty") - endif() - message(STATUS "git version: ${GIT_DESCRIBE_VERSION} normalized to ${GIT_VERSION}") - else() - set(GIT_VERSION "0.0.0") - endif() - - set(${var} ${GIT_VERSION} PARENT_SCOPE) -endfunction() diff --git a/libcxx/utils/google-benchmark/cmake/GoogleTest.cmake b/libcxx/utils/google-benchmark/cmake/GoogleTest.cmake deleted file mode 100644 index dd611fc875f1..000000000000 --- a/libcxx/utils/google-benchmark/cmake/GoogleTest.cmake +++ /dev/null @@ -1,41 +0,0 @@ -# Download and unpack googletest at configure time -set(GOOGLETEST_PREFIX "${benchmark_BINARY_DIR}/third_party/googletest") -configure_file(${benchmark_SOURCE_DIR}/cmake/GoogleTest.cmake.in ${GOOGLETEST_PREFIX}/CMakeLists.txt @ONLY) - -set(GOOGLETEST_PATH "${CMAKE_CURRENT_SOURCE_DIR}/googletest" CACHE PATH "") # Mind the quotes -execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" - -DALLOW_DOWNLOADING_GOOGLETEST=${BENCHMARK_DOWNLOAD_DEPENDENCIES} -DGOOGLETEST_PATH:PATH=${GOOGLETEST_PATH} . - RESULT_VARIABLE result - WORKING_DIRECTORY ${GOOGLETEST_PREFIX} -) - -if(result) - message(FATAL_ERROR "CMake step for googletest failed: ${result}") -endif() - -execute_process( - COMMAND ${CMAKE_COMMAND} --build . - RESULT_VARIABLE result - WORKING_DIRECTORY ${GOOGLETEST_PREFIX} -) - -if(result) - message(FATAL_ERROR "Build step for googletest failed: ${result}") -endif() - -# Prevent overriding the parent project's compiler/linker -# settings on Windows -set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) - -include(${GOOGLETEST_PREFIX}/googletest-paths.cmake) - -# Add googletest directly to our build. This defines -# the gtest and gtest_main targets. -add_subdirectory(${GOOGLETEST_SOURCE_DIR} - ${GOOGLETEST_BINARY_DIR} - EXCLUDE_FROM_ALL) - -set_target_properties(gtest PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $) -set_target_properties(gtest_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $) -set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $) -set_target_properties(gmock_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $) diff --git a/libcxx/utils/google-benchmark/cmake/GoogleTest.cmake.in b/libcxx/utils/google-benchmark/cmake/GoogleTest.cmake.in deleted file mode 100644 index fd957ff56409..000000000000 --- a/libcxx/utils/google-benchmark/cmake/GoogleTest.cmake.in +++ /dev/null @@ -1,58 +0,0 @@ -cmake_minimum_required(VERSION 2.8.12) - -project(googletest-download NONE) - -# Enable ExternalProject CMake module -include(ExternalProject) - -option(ALLOW_DOWNLOADING_GOOGLETEST "If googletest src tree is not found in location specified by GOOGLETEST_PATH, do fetch the archive from internet" OFF) -set(GOOGLETEST_PATH "/usr/src/googletest" CACHE PATH - "Path to the googletest root tree. Should contain googletest and googlemock subdirs. And CMakeLists.txt in root, and in both of these subdirs") - -# Download and install GoogleTest - -message(STATUS "Looking for Google Test sources") -message(STATUS "Looking for Google Test sources in ${GOOGLETEST_PATH}") -if(EXISTS "${GOOGLETEST_PATH}" AND IS_DIRECTORY "${GOOGLETEST_PATH}" AND EXISTS "${GOOGLETEST_PATH}/CMakeLists.txt" AND - EXISTS "${GOOGLETEST_PATH}/googletest" AND IS_DIRECTORY "${GOOGLETEST_PATH}/googletest" AND EXISTS "${GOOGLETEST_PATH}/googletest/CMakeLists.txt" AND - EXISTS "${GOOGLETEST_PATH}/googlemock" AND IS_DIRECTORY "${GOOGLETEST_PATH}/googlemock" AND EXISTS "${GOOGLETEST_PATH}/googlemock/CMakeLists.txt") - message(STATUS "Found Google Test in ${GOOGLETEST_PATH}") - - ExternalProject_Add( - googletest - PREFIX "${CMAKE_BINARY_DIR}" - DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/download" - SOURCE_DIR "${GOOGLETEST_PATH}" # use existing src dir. - BINARY_DIR "${CMAKE_BINARY_DIR}/build" - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - TEST_COMMAND "" - ) -else() - if(NOT ALLOW_DOWNLOADING_GOOGLETEST) - message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.") - else() - message(WARNING "Did not find Google Test sources! Fetching from web...") - ExternalProject_Add( - googletest - GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG master - PREFIX "${CMAKE_BINARY_DIR}" - STAMP_DIR "${CMAKE_BINARY_DIR}/stamp" - DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/download" - SOURCE_DIR "${CMAKE_BINARY_DIR}/src" - BINARY_DIR "${CMAKE_BINARY_DIR}/build" - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - TEST_COMMAND "" - ) - endif() -endif() - -ExternalProject_Get_Property(googletest SOURCE_DIR BINARY_DIR) -file(WRITE googletest-paths.cmake -"set(GOOGLETEST_SOURCE_DIR \"${SOURCE_DIR}\") -set(GOOGLETEST_BINARY_DIR \"${BINARY_DIR}\") -") diff --git a/libcxx/utils/google-benchmark/cmake/benchmark.pc.in b/libcxx/utils/google-benchmark/cmake/benchmark.pc.in deleted file mode 100644 index 34beb012eef1..000000000000 --- a/libcxx/utils/google-benchmark/cmake/benchmark.pc.in +++ /dev/null @@ -1,12 +0,0 @@ -prefix=@CMAKE_INSTALL_PREFIX@ -exec_prefix=${prefix} -libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ -includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ - -Name: @PROJECT_NAME@ -Description: Google microbenchmark framework -Version: @VERSION@ - -Libs: -L${libdir} -lbenchmark -Libs.private: -lpthread -Cflags: -I${includedir} diff --git a/libcxx/utils/google-benchmark/cmake/gnu_posix_regex.cpp b/libcxx/utils/google-benchmark/cmake/gnu_posix_regex.cpp deleted file mode 100644 index b5b91cdab7c2..000000000000 --- a/libcxx/utils/google-benchmark/cmake/gnu_posix_regex.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include -#include -int main() { - std::string str = "test0159"; - regex_t re; - int ec = regcomp(&re, "^[a-z]+[0-9]+$", REG_EXTENDED | REG_NOSUB); - if (ec != 0) { - return ec; - } - return regexec(&re, str.c_str(), 0, nullptr, 0) ? -1 : 0; -} - diff --git a/libcxx/utils/google-benchmark/cmake/llvm-toolchain.cmake b/libcxx/utils/google-benchmark/cmake/llvm-toolchain.cmake deleted file mode 100644 index fc119e52fd26..000000000000 --- a/libcxx/utils/google-benchmark/cmake/llvm-toolchain.cmake +++ /dev/null @@ -1,8 +0,0 @@ -find_package(LLVMAr REQUIRED) -set(CMAKE_AR "${LLVMAR_EXECUTABLE}" CACHE FILEPATH "" FORCE) - -find_package(LLVMNm REQUIRED) -set(CMAKE_NM "${LLVMNM_EXECUTABLE}" CACHE FILEPATH "" FORCE) - -find_package(LLVMRanLib REQUIRED) -set(CMAKE_RANLIB "${LLVMRANLIB_EXECUTABLE}" CACHE FILEPATH "" FORCE) diff --git a/libcxx/utils/google-benchmark/cmake/posix_regex.cpp b/libcxx/utils/google-benchmark/cmake/posix_regex.cpp deleted file mode 100644 index 466dc62560a2..000000000000 --- a/libcxx/utils/google-benchmark/cmake/posix_regex.cpp +++ /dev/null @@ -1,14 +0,0 @@ -#include -#include -int main() { - std::string str = "test0159"; - regex_t re; - int ec = regcomp(&re, "^[a-z]+[0-9]+$", REG_EXTENDED | REG_NOSUB); - if (ec != 0) { - return ec; - } - int ret = regexec(&re, str.c_str(), 0, nullptr, 0) ? -1 : 0; - regfree(&re); - return ret; -} - diff --git a/libcxx/utils/google-benchmark/cmake/split_list.cmake b/libcxx/utils/google-benchmark/cmake/split_list.cmake deleted file mode 100644 index 67aed3fdc857..000000000000 --- a/libcxx/utils/google-benchmark/cmake/split_list.cmake +++ /dev/null @@ -1,3 +0,0 @@ -macro(split_list listname) - string(REPLACE ";" " " ${listname} "${${listname}}") -endmacro() diff --git a/libcxx/utils/google-benchmark/cmake/std_regex.cpp b/libcxx/utils/google-benchmark/cmake/std_regex.cpp deleted file mode 100644 index 696f2a26bce0..000000000000 --- a/libcxx/utils/google-benchmark/cmake/std_regex.cpp +++ /dev/null @@ -1,10 +0,0 @@ -#include -#include -int main() { - const std::string str = "test0159"; - std::regex re; - re = std::regex("^[a-z]+[0-9]+$", - std::regex_constants::extended | std::regex_constants::nosubs); - return std::regex_search(str, re) ? 0 : -1; -} - diff --git a/libcxx/utils/google-benchmark/cmake/steady_clock.cpp b/libcxx/utils/google-benchmark/cmake/steady_clock.cpp deleted file mode 100644 index 66d50d17e9e6..000000000000 --- a/libcxx/utils/google-benchmark/cmake/steady_clock.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include - -int main() { - typedef std::chrono::steady_clock Clock; - Clock::time_point tp = Clock::now(); - ((void)tp); -} diff --git a/libcxx/utils/google-benchmark/cmake/thread_safety_attributes.cpp b/libcxx/utils/google-benchmark/cmake/thread_safety_attributes.cpp deleted file mode 100644 index 46161babdb10..000000000000 --- a/libcxx/utils/google-benchmark/cmake/thread_safety_attributes.cpp +++ /dev/null @@ -1,4 +0,0 @@ -#define HAVE_THREAD_SAFETY_ATTRIBUTES -#include "../src/mutex.h" - -int main() {} diff --git a/libcxx/utils/google-benchmark/dependencies.md b/libcxx/utils/google-benchmark/dependencies.md deleted file mode 100644 index 6289b4e3548b..000000000000 --- a/libcxx/utils/google-benchmark/dependencies.md +++ /dev/null @@ -1,18 +0,0 @@ -# Build tool dependency policy - -To ensure the broadest compatibility when building the benchmark library, but -still allow forward progress, we require any build tooling to be available for: - -* Debian stable AND -* The last two Ubuntu LTS releases AND - -Currently, this means using build tool versions that are available for Ubuntu -16.04 (Xenial), Ubuntu 18.04 (Bionic), and Debian stretch. - -_Note, [travis](.travis.yml) runs under Ubuntu 14.04 (Trusty) for linux builds._ - -## cmake -The current supported version is cmake 3.5.1 as of 2018-06-06. - -_Note, this version is also available for Ubuntu 14.04, the previous Ubuntu LTS -release, as `cmake3`._ diff --git a/libcxx/utils/google-benchmark/docs/AssemblyTests.md b/libcxx/utils/google-benchmark/docs/AssemblyTests.md deleted file mode 100644 index 1fbdc269b53d..000000000000 --- a/libcxx/utils/google-benchmark/docs/AssemblyTests.md +++ /dev/null @@ -1,147 +0,0 @@ -# Assembly Tests - -The Benchmark library provides a number of functions whose primary -purpose in to affect assembly generation, including `DoNotOptimize` -and `ClobberMemory`. In addition there are other functions, -such as `KeepRunning`, for which generating good assembly is paramount. - -For these functions it's important to have tests that verify the -correctness and quality of the implementation. This requires testing -the code generated by the compiler. - -This document describes how the Benchmark library tests compiler output, -as well as how to properly write new tests. - - -## Anatomy of a Test - -Writing a test has two steps: - -* Write the code you want to generate assembly for. -* Add `// CHECK` lines to match against the verified assembly. - -Example: -```c++ - -// CHECK-LABEL: test_add: -extern "C" int test_add() { - extern int ExternInt; - return ExternInt + 1; - - // CHECK: movl ExternInt(%rip), %eax - // CHECK: addl %eax - // CHECK: ret -} - -``` - -#### LLVM Filecheck - -[LLVM's Filecheck](https://llvm.org/docs/CommandGuide/FileCheck.html) -is used to test the generated assembly against the `// CHECK` lines -specified in the tests source file. Please see the documentation -linked above for information on how to write `CHECK` directives. - -#### Tips and Tricks: - -* Tests should match the minimal amount of output required to establish -correctness. `CHECK` directives don't have to match on the exact next line -after the previous match, so tests should omit checks for unimportant -bits of assembly. ([`CHECK-NEXT`](https://llvm.org/docs/CommandGuide/FileCheck.html#the-check-next-directive) -can be used to ensure a match occurs exactly after the previous match). - -* The tests are compiled with `-O3 -g0`. So we're only testing the -optimized output. - -* The assembly output is further cleaned up using `tools/strip_asm.py`. -This removes comments, assembler directives, and unused labels before -the test is run. - -* The generated and stripped assembly file for a test is output under -`/test/.s` - -* Filecheck supports using [`CHECK` prefixes](https://llvm.org/docs/CommandGuide/FileCheck.html#cmdoption-check-prefixes) -to specify lines that should only match in certain situations. -The Benchmark tests use `CHECK-CLANG` and `CHECK-GNU` for lines that -are only expected to match Clang or GCC's output respectively. Normal -`CHECK` lines match against all compilers. (Note: `CHECK-NOT` and -`CHECK-LABEL` are NOT prefixes. They are versions of non-prefixed -`CHECK` lines) - -* Use `extern "C"` to disable name mangling for specific functions. This -makes them easier to name in the `CHECK` lines. - - -## Problems Writing Portable Tests - -Writing tests which check the code generated by a compiler are -inherently non-portable. Different compilers and even different compiler -versions may generate entirely different code. The Benchmark tests -must tolerate this. - -LLVM Filecheck provides a number of mechanisms to help write -"more portable" tests; including [matching using regular expressions](https://llvm.org/docs/CommandGuide/FileCheck.html#filecheck-pattern-matching-syntax), -allowing the creation of [named variables](https://llvm.org/docs/CommandGuide/FileCheck.html#filecheck-variables) -for later matching, and [checking non-sequential matches](https://llvm.org/docs/CommandGuide/FileCheck.html#the-check-dag-directive). - -#### Capturing Variables - -For example, say GCC stores a variable in a register but Clang stores -it in memory. To write a test that tolerates both cases we "capture" -the destination of the store, and then use the captured expression -to write the remainder of the test. - -```c++ -// CHECK-LABEL: test_div_no_op_into_shr: -extern "C" void test_div_no_op_into_shr(int value) { - int divisor = 2; - benchmark::DoNotOptimize(divisor); // hide the value from the optimizer - return value / divisor; - - // CHECK: movl $2, [[DEST:.*]] - // CHECK: idivl [[DEST]] - // CHECK: ret -} -``` - -#### Using Regular Expressions to Match Differing Output - -Often tests require testing assembly lines which may subtly differ -between compilers or compiler versions. A common example of this -is matching stack frame addresses. In this case regular expressions -can be used to match the differing bits of output. For example: - -```c++ -int ExternInt; -struct Point { int x, y, z; }; - -// CHECK-LABEL: test_store_point: -extern "C" void test_store_point() { - Point p{ExternInt, ExternInt, ExternInt}; - benchmark::DoNotOptimize(p); - - // CHECK: movl ExternInt(%rip), %eax - // CHECK: movl %eax, -{{[0-9]+}}(%rsp) - // CHECK: movl %eax, -{{[0-9]+}}(%rsp) - // CHECK: movl %eax, -{{[0-9]+}}(%rsp) - // CHECK: ret -} -``` - -## Current Requirements and Limitations - -The tests require Filecheck to be installed along the `PATH` of the -build machine. Otherwise the tests will be disabled. - -Additionally, as mentioned in the previous section, codegen tests are -inherently non-portable. Currently the tests are limited to: - -* x86_64 targets. -* Compiled with GCC or Clang - -Further work could be done, at least on a limited basis, to extend the -tests to other architectures and compilers (using `CHECK` prefixes). - -Furthermore, the tests fail for builds which specify additional flags -that modify code generation, including `--coverage` or `-fsanitize=`. - diff --git a/libcxx/utils/google-benchmark/docs/_config.yml b/libcxx/utils/google-benchmark/docs/_config.yml deleted file mode 100644 index fc24e7a62dc2..000000000000 --- a/libcxx/utils/google-benchmark/docs/_config.yml +++ /dev/null @@ -1 +0,0 @@ -theme: jekyll-theme-hacker \ No newline at end of file diff --git a/libcxx/utils/google-benchmark/docs/perf_counters.md b/libcxx/utils/google-benchmark/docs/perf_counters.md deleted file mode 100644 index 74560e966971..000000000000 --- a/libcxx/utils/google-benchmark/docs/perf_counters.md +++ /dev/null @@ -1,34 +0,0 @@ - - -# User-Requested Performance Counters - -When running benchmarks, the user may choose to request collection of -performance counters. This may be useful in investigation scenarios - narrowing -down the cause of a regression; or verifying that the underlying cause of a -performance improvement matches expectations. - -This feature is available if: - -* The benchmark is run on an architecture featuring a Performance Monitoring - Unit (PMU), -* The benchmark is compiled with support for collecting counters. Currently, - this requires [libpfm](http://perfmon2.sourceforge.net/) be available at build - time - -The feature does not require modifying benchmark code. Counter collection is -handled at the boundaries where timer collection is also handled. - -To opt-in: - -* Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`. -* Enable the cmake flag BENCHMARK_ENABLE_LIBPFM. - -To use, pass a comma-separated list of counter names through the -`--benchmark_perf_counters` flag. The names are decoded through libpfm - meaning, -they are platform specific, but some (e.g. `CYCLES` or `INSTRUCTIONS`) are -mapped by libpfm to platform-specifics - see libpfm -[documentation](http://perfmon2.sourceforge.net/docs.html) for more details. - -The counter values are reported back through the [User Counters](../README.md#custom-counters) -mechanism, meaning, they are available in all the formats (e.g. JSON) supported -by User Counters. \ No newline at end of file diff --git a/libcxx/utils/google-benchmark/docs/random_interleaving.md b/libcxx/utils/google-benchmark/docs/random_interleaving.md deleted file mode 100644 index c08303684148..000000000000 --- a/libcxx/utils/google-benchmark/docs/random_interleaving.md +++ /dev/null @@ -1,13 +0,0 @@ - - -# Random Interleaving - -[Random Interleaving](https://github.com/google/benchmark/issues/1051) is a -technique to lower run-to-run variance. It randomly interleaves repetitions of a -microbenchmark with repetitions from other microbenchmarks in the same benchmark -test. Data shows it is able to lower run-to-run variance by -[40%](https://github.com/google/benchmark/issues/1051) on average. - -To use, you mainly need to set `--benchmark_enable_random_interleaving=true`, -and optionally specify non-zero repetition count `--benchmark_repetitions=9` -and optionally decrease the per-repetition time `--benchmark_min_time=0.1`. diff --git a/libcxx/utils/google-benchmark/docs/releasing.md b/libcxx/utils/google-benchmark/docs/releasing.md deleted file mode 100644 index 7a6dfc4017b2..000000000000 --- a/libcxx/utils/google-benchmark/docs/releasing.md +++ /dev/null @@ -1,22 +0,0 @@ -# How to release - -* Make sure you're on main and synced to HEAD -* Ensure the project builds and tests run (sanity check only, obviously) - * `parallel -j0 exec ::: test/*_test` can help ensure everything at least - passes -* Prepare release notes - * `git log $(git describe --abbrev=0 --tags)..HEAD` gives you the list of - commits between the last annotated tag and HEAD - * Pick the most interesting. -* Create one last commit that updates the version saved in `CMakeLists.txt` to the release version you're creating. (This version will be used if benchmark is installed from the archive you'll be creating in the next step.) - -``` -project (benchmark VERSION 1.5.3 LANGUAGES CXX) -``` - -* Create a release through github's interface - * Note this will create a lightweight tag. - * Update this to an annotated tag: - * `git pull --tags` - * `git tag -a -f ` - * `git push --force origin` diff --git a/libcxx/utils/google-benchmark/docs/tools.md b/libcxx/utils/google-benchmark/docs/tools.md deleted file mode 100644 index f2d0c497f3fc..000000000000 --- a/libcxx/utils/google-benchmark/docs/tools.md +++ /dev/null @@ -1,203 +0,0 @@ -# Benchmark Tools - -## compare.py - -The `compare.py` can be used to compare the result of benchmarks. - -### Dependencies -The utility relies on the [scipy](https://www.scipy.org) package which can be installed using pip: -```bash -pip3 install -r requirements.txt -``` - -### Displaying aggregates only - -The switch `-a` / `--display_aggregates_only` can be used to control the -displayment of the normal iterations vs the aggregates. When passed, it will -be passthrough to the benchmark binaries to be run, and will be accounted for -in the tool itself; only the aggregates will be displayed, but not normal runs. -It only affects the display, the separate runs will still be used to calculate -the U test. - -### Modes of operation - -There are three modes of operation: - -1. Just compare two benchmarks -The program is invoked like: - -``` bash -$ compare.py benchmarks [benchmark options]... -``` -Where `` and `` either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file. - -`[benchmark options]` will be passed to the benchmarks invocations. They can be anything that binary accepts, be it either normal `--benchmark_*` parameters, or some custom parameters your binary takes. - -Example output: -``` -$ ./compare.py benchmarks ./a.out ./a.out -RUNNING: ./a.out --benchmark_out=/tmp/tmprBT5nW -Run on (8 X 4000 MHz CPU s) -2017-11-07 21:16:44 ------------------------------------------------------- -Benchmark Time CPU Iterations ------------------------------------------------------- -BM_memcpy/8 36 ns 36 ns 19101577 211.669MB/s -BM_memcpy/64 76 ns 76 ns 9412571 800.199MB/s -BM_memcpy/512 84 ns 84 ns 8249070 5.64771GB/s -BM_memcpy/1024 116 ns 116 ns 6181763 8.19505GB/s -BM_memcpy/8192 643 ns 643 ns 1062855 11.8636GB/s -BM_copy/8 222 ns 222 ns 3137987 34.3772MB/s -BM_copy/64 1608 ns 1608 ns 432758 37.9501MB/s -BM_copy/512 12589 ns 12589 ns 54806 38.7867MB/s -BM_copy/1024 25169 ns 25169 ns 27713 38.8003MB/s -BM_copy/8192 201165 ns 201112 ns 3486 38.8466MB/s -RUNNING: ./a.out --benchmark_out=/tmp/tmpt1wwG_ -Run on (8 X 4000 MHz CPU s) -2017-11-07 21:16:53 ------------------------------------------------------- -Benchmark Time CPU Iterations ------------------------------------------------------- -BM_memcpy/8 36 ns 36 ns 19397903 211.255MB/s -BM_memcpy/64 73 ns 73 ns 9691174 839.635MB/s -BM_memcpy/512 85 ns 85 ns 8312329 5.60101GB/s -BM_memcpy/1024 118 ns 118 ns 6438774 8.11608GB/s -BM_memcpy/8192 656 ns 656 ns 1068644 11.6277GB/s -BM_copy/8 223 ns 223 ns 3146977 34.2338MB/s -BM_copy/64 1611 ns 1611 ns 435340 37.8751MB/s -BM_copy/512 12622 ns 12622 ns 54818 38.6844MB/s -BM_copy/1024 25257 ns 25239 ns 27779 38.6927MB/s -BM_copy/8192 205013 ns 205010 ns 3479 38.108MB/s -Comparing ./a.out to ./a.out -Benchmark Time CPU Time Old Time New CPU Old CPU New ------------------------------------------------------------------------------------------------------- -BM_memcpy/8 +0.0020 +0.0020 36 36 36 36 -BM_memcpy/64 -0.0468 -0.0470 76 73 76 73 -BM_memcpy/512 +0.0081 +0.0083 84 85 84 85 -BM_memcpy/1024 +0.0098 +0.0097 116 118 116 118 -BM_memcpy/8192 +0.0200 +0.0203 643 656 643 656 -BM_copy/8 +0.0046 +0.0042 222 223 222 223 -BM_copy/64 +0.0020 +0.0020 1608 1611 1608 1611 -BM_copy/512 +0.0027 +0.0026 12589 12622 12589 12622 -BM_copy/1024 +0.0035 +0.0028 25169 25257 25169 25239 -BM_copy/8192 +0.0191 +0.0194 201165 205013 201112 205010 -``` - -What it does is for the every benchmark from the first run it looks for the benchmark with exactly the same name in the second run, and then compares the results. If the names differ, the benchmark is omitted from the diff. -As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`. - -2. Compare two different filters of one benchmark -The program is invoked like: - -``` bash -$ compare.py filters [benchmark options]... -``` -Where `` either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file. - -Where `` and `` are the same regex filters that you would pass to the `[--benchmark_filter=]` parameter of the benchmark binary. - -`[benchmark options]` will be passed to the benchmarks invocations. They can be anything that binary accepts, be it either normal `--benchmark_*` parameters, or some custom parameters your binary takes. - -Example output: -``` -$ ./compare.py filters ./a.out BM_memcpy BM_copy -RUNNING: ./a.out --benchmark_filter=BM_memcpy --benchmark_out=/tmp/tmpBWKk0k -Run on (8 X 4000 MHz CPU s) -2017-11-07 21:37:28 ------------------------------------------------------- -Benchmark Time CPU Iterations ------------------------------------------------------- -BM_memcpy/8 36 ns 36 ns 17891491 211.215MB/s -BM_memcpy/64 74 ns 74 ns 9400999 825.646MB/s -BM_memcpy/512 87 ns 87 ns 8027453 5.46126GB/s -BM_memcpy/1024 111 ns 111 ns 6116853 8.5648GB/s -BM_memcpy/8192 657 ns 656 ns 1064679 11.6247GB/s -RUNNING: ./a.out --benchmark_filter=BM_copy --benchmark_out=/tmp/tmpAvWcOM -Run on (8 X 4000 MHz CPU s) -2017-11-07 21:37:33 ----------------------------------------------------- -Benchmark Time CPU Iterations ----------------------------------------------------- -BM_copy/8 227 ns 227 ns 3038700 33.6264MB/s -BM_copy/64 1640 ns 1640 ns 426893 37.2154MB/s -BM_copy/512 12804 ns 12801 ns 55417 38.1444MB/s -BM_copy/1024 25409 ns 25407 ns 27516 38.4365MB/s -BM_copy/8192 202986 ns 202990 ns 3454 38.4871MB/s -Comparing BM_memcpy to BM_copy (from ./a.out) -Benchmark Time CPU Time Old Time New CPU Old CPU New --------------------------------------------------------------------------------------------------------------------- -[BM_memcpy vs. BM_copy]/8 +5.2829 +5.2812 36 227 36 227 -[BM_memcpy vs. BM_copy]/64 +21.1719 +21.1856 74 1640 74 1640 -[BM_memcpy vs. BM_copy]/512 +145.6487 +145.6097 87 12804 87 12801 -[BM_memcpy vs. BM_copy]/1024 +227.1860 +227.1776 111 25409 111 25407 -[BM_memcpy vs. BM_copy]/8192 +308.1664 +308.2898 657 202986 656 202990 -``` - -As you can see, it applies filter to the benchmarks, both when running the benchmark, and before doing the diff. And to make the diff work, the matches are replaced with some common string. Thus, you can compare two different benchmark families within one benchmark binary. -As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`. - -3. Compare filter one from benchmark one to filter two from benchmark two: -The program is invoked like: - -``` bash -$ compare.py filters [benchmark options]... -``` - -Where `` and `` either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file. - -Where `` and `` are the same regex filters that you would pass to the `[--benchmark_filter=]` parameter of the benchmark binary. - -`[benchmark options]` will be passed to the benchmarks invocations. They can be anything that binary accepts, be it either normal `--benchmark_*` parameters, or some custom parameters your binary takes. - -Example output: -``` -$ ./compare.py benchmarksfiltered ./a.out BM_memcpy ./a.out BM_copy -RUNNING: ./a.out --benchmark_filter=BM_memcpy --benchmark_out=/tmp/tmp_FvbYg -Run on (8 X 4000 MHz CPU s) -2017-11-07 21:38:27 ------------------------------------------------------- -Benchmark Time CPU Iterations ------------------------------------------------------- -BM_memcpy/8 37 ns 37 ns 18953482 204.118MB/s -BM_memcpy/64 74 ns 74 ns 9206578 828.245MB/s -BM_memcpy/512 91 ns 91 ns 8086195 5.25476GB/s -BM_memcpy/1024 120 ns 120 ns 5804513 7.95662GB/s -BM_memcpy/8192 664 ns 664 ns 1028363 11.4948GB/s -RUNNING: ./a.out --benchmark_filter=BM_copy --benchmark_out=/tmp/tmpDfL5iE -Run on (8 X 4000 MHz CPU s) -2017-11-07 21:38:32 ----------------------------------------------------- -Benchmark Time CPU Iterations ----------------------------------------------------- -BM_copy/8 230 ns 230 ns 2985909 33.1161MB/s -BM_copy/64 1654 ns 1653 ns 419408 36.9137MB/s -BM_copy/512 13122 ns 13120 ns 53403 37.2156MB/s -BM_copy/1024 26679 ns 26666 ns 26575 36.6218MB/s -BM_copy/8192 215068 ns 215053 ns 3221 36.3283MB/s -Comparing BM_memcpy (from ./a.out) to BM_copy (from ./a.out) -Benchmark Time CPU Time Old Time New CPU Old CPU New --------------------------------------------------------------------------------------------------------------------- -[BM_memcpy vs. BM_copy]/8 +5.1649 +5.1637 37 230 37 230 -[BM_memcpy vs. BM_copy]/64 +21.4352 +21.4374 74 1654 74 1653 -[BM_memcpy vs. BM_copy]/512 +143.6022 +143.5865 91 13122 91 13120 -[BM_memcpy vs. BM_copy]/1024 +221.5903 +221.4790 120 26679 120 26666 -[BM_memcpy vs. BM_copy]/8192 +322.9059 +323.0096 664 215068 664 215053 -``` -This is a mix of the previous two modes, two (potentially different) benchmark binaries are run, and a different filter is applied to each one. -As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`. - -### U test - -If there is a sufficient repetition count of the benchmarks, the tool can do -a [U Test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test), of the -null hypothesis that it is equally likely that a randomly selected value from -one sample will be less than or greater than a randomly selected value from a -second sample. - -If the calculated p-value is below this value is lower than the significance -level alpha, then the result is said to be statistically significant and the -null hypothesis is rejected. Which in other words means that the two benchmarks -aren't identical. - -**WARNING**: requires **LARGE** (no less than 9) number of repetitions to be -meaningful! diff --git a/libcxx/utils/google-benchmark/include/benchmark/benchmark.h b/libcxx/utils/google-benchmark/include/benchmark/benchmark.h deleted file mode 100644 index 9b5480244d6f..000000000000 --- a/libcxx/utils/google-benchmark/include/benchmark/benchmark.h +++ /dev/null @@ -1,1654 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Support for registering benchmarks for functions. - -/* Example usage: -// Define a function that executes the code to be measured a -// specified number of times: -static void BM_StringCreation(benchmark::State& state) { - for (auto _ : state) - std::string empty_string; -} - -// Register the function as a benchmark -BENCHMARK(BM_StringCreation); - -// Define another benchmark -static void BM_StringCopy(benchmark::State& state) { - std::string x = "hello"; - for (auto _ : state) - std::string copy(x); -} -BENCHMARK(BM_StringCopy); - -// Augment the main() program to invoke benchmarks if specified -// via the --benchmarks command line flag. E.g., -// my_unittest --benchmark_filter=all -// my_unittest --benchmark_filter=BM_StringCreation -// my_unittest --benchmark_filter=String -// my_unittest --benchmark_filter='Copy|Creation' -int main(int argc, char** argv) { - benchmark::Initialize(&argc, argv); - benchmark::RunSpecifiedBenchmarks(); - benchmark::Shutdown(); - return 0; -} - -// Sometimes a family of microbenchmarks can be implemented with -// just one routine that takes an extra argument to specify which -// one of the family of benchmarks to run. For example, the following -// code defines a family of microbenchmarks for measuring the speed -// of memcpy() calls of different lengths: - -static void BM_memcpy(benchmark::State& state) { - char* src = new char[state.range(0)]; char* dst = new char[state.range(0)]; - memset(src, 'x', state.range(0)); - for (auto _ : state) - memcpy(dst, src, state.range(0)); - state.SetBytesProcessed(state.iterations() * state.range(0)); - delete[] src; delete[] dst; -} -BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10); - -// The preceding code is quite repetitive, and can be replaced with the -// following short-hand. The following invocation will pick a few -// appropriate arguments in the specified range and will generate a -// microbenchmark for each such argument. -BENCHMARK(BM_memcpy)->Range(8, 8<<10); - -// You might have a microbenchmark that depends on two inputs. For -// example, the following code defines a family of microbenchmarks for -// measuring the speed of set insertion. -static void BM_SetInsert(benchmark::State& state) { - set data; - for (auto _ : state) { - state.PauseTiming(); - data = ConstructRandomSet(state.range(0)); - state.ResumeTiming(); - for (int j = 0; j < state.range(1); ++j) - data.insert(RandomNumber()); - } -} -BENCHMARK(BM_SetInsert) - ->Args({1<<10, 128}) - ->Args({2<<10, 128}) - ->Args({4<<10, 128}) - ->Args({8<<10, 128}) - ->Args({1<<10, 512}) - ->Args({2<<10, 512}) - ->Args({4<<10, 512}) - ->Args({8<<10, 512}); - -// The preceding code is quite repetitive, and can be replaced with -// the following short-hand. The following macro will pick a few -// appropriate arguments in the product of the two specified ranges -// and will generate a microbenchmark for each such pair. -BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}}); - -// For more complex patterns of inputs, passing a custom function -// to Apply allows programmatic specification of an -// arbitrary set of arguments to run the microbenchmark on. -// The following example enumerates a dense range on -// one parameter, and a sparse range on the second. -static void CustomArguments(benchmark::internal::Benchmark* b) { - for (int i = 0; i <= 10; ++i) - for (int j = 32; j <= 1024*1024; j *= 8) - b->Args({i, j}); -} -BENCHMARK(BM_SetInsert)->Apply(CustomArguments); - -// Templated microbenchmarks work the same way: -// Produce then consume 'size' messages 'iters' times -// Measures throughput in the absence of multiprogramming. -template int BM_Sequential(benchmark::State& state) { - Q q; - typename Q::value_type v; - for (auto _ : state) { - for (int i = state.range(0); i--; ) - q.push(v); - for (int e = state.range(0); e--; ) - q.Wait(&v); - } - // actually messages, not bytes: - state.SetBytesProcessed(state.iterations() * state.range(0)); -} -BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); - -Use `Benchmark::MinTime(double t)` to set the minimum time used to run the -benchmark. This option overrides the `benchmark_min_time` flag. - -void BM_test(benchmark::State& state) { - ... body ... -} -BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds. - -In a multithreaded test, it is guaranteed that none of the threads will start -until all have reached the loop start, and all will have finished before any -thread exits the loop body. As such, any global setup or teardown you want to -do can be wrapped in a check against the thread index: - -static void BM_MultiThreaded(benchmark::State& state) { - if (state.thread_index == 0) { - // Setup code here. - } - for (auto _ : state) { - // Run the test as normal. - } - if (state.thread_index == 0) { - // Teardown code here. - } -} -BENCHMARK(BM_MultiThreaded)->Threads(4); - - -If a benchmark runs a few milliseconds it may be hard to visually compare the -measured times, since the output data is given in nanoseconds per default. In -order to manually set the time unit, you can specify it manually: - -BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); -*/ - -#ifndef BENCHMARK_BENCHMARK_H_ -#define BENCHMARK_BENCHMARK_H_ - -// The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer. -#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) -#define BENCHMARK_HAS_CXX11 -#endif - -// This _MSC_VER check should detect VS 2017 v15.3 and newer. -#if __cplusplus >= 201703L || \ - (defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L) -#define BENCHMARK_HAS_CXX17 -#endif - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if defined(BENCHMARK_HAS_CXX11) -#include -#include -#include -#endif - -#if defined(_MSC_VER) -#include // for _ReadWriteBarrier -#endif - -#ifndef BENCHMARK_HAS_CXX11 -#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName&); \ - TypeName& operator=(const TypeName&) -#else -#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName&) = delete; \ - TypeName& operator=(const TypeName&) = delete -#endif - -#ifdef BENCHMARK_HAS_CXX17 -#define BENCHMARK_UNUSED [[maybe_unused]] -#elif defined(__GNUC__) || defined(__clang__) -#define BENCHMARK_UNUSED __attribute__((unused)) -#else -#define BENCHMARK_UNUSED -#endif - -#if defined(__GNUC__) || defined(__clang__) -#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline)) -#define BENCHMARK_NOEXCEPT noexcept -#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) -#elif defined(_MSC_VER) && !defined(__clang__) -#define BENCHMARK_ALWAYS_INLINE __forceinline -#if _MSC_VER >= 1900 -#define BENCHMARK_NOEXCEPT noexcept -#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) -#else -#define BENCHMARK_NOEXCEPT -#define BENCHMARK_NOEXCEPT_OP(x) -#endif -#define __func__ __FUNCTION__ -#else -#define BENCHMARK_ALWAYS_INLINE -#define BENCHMARK_NOEXCEPT -#define BENCHMARK_NOEXCEPT_OP(x) -#endif - -#define BENCHMARK_INTERNAL_TOSTRING2(x) #x -#define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x) - -#if defined(__GNUC__) || defined(__clang__) -#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y) -#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) -#else -#define BENCHMARK_BUILTIN_EXPECT(x, y) x -#define BENCHMARK_DEPRECATED_MSG(msg) -#define BENCHMARK_WARNING_MSG(msg) \ - __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \ - __LINE__) ") : warning note: " msg)) -#endif - -#if defined(__GNUC__) && !defined(__clang__) -#define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -#endif - -#ifndef __has_builtin -#define __has_builtin(x) 0 -#endif - -#if defined(__GNUC__) || __has_builtin(__builtin_unreachable) -#define BENCHMARK_UNREACHABLE() __builtin_unreachable() -#elif defined(_MSC_VER) -#define BENCHMARK_UNREACHABLE() __assume(false) -#else -#define BENCHMARK_UNREACHABLE() ((void)0) -#endif - -#ifdef BENCHMARK_HAS_CXX11 -#define BENCHMARK_OVERRIDE override -#else -#define BENCHMARK_OVERRIDE -#endif - -namespace benchmark { -class BenchmarkReporter; -class MemoryManager; - -void Initialize(int* argc, char** argv); -void Shutdown(); - -// Report to stdout all arguments in 'argv' as unrecognized except the first. -// Returns true there is at least on unrecognized argument (i.e. 'argc' > 1). -bool ReportUnrecognizedArguments(int argc, char** argv); - -// Generate a list of benchmarks matching the specified --benchmark_filter flag -// and if --benchmark_list_tests is specified return after printing the name -// of each matching benchmark. Otherwise run each matching benchmark and -// report the results. -// -// The second and third overload use the specified 'display_reporter' and -// 'file_reporter' respectively. 'file_reporter' will write to the file -// specified -// by '--benchmark_output'. If '--benchmark_output' is not given the -// 'file_reporter' is ignored. -// -// RETURNS: The number of matching benchmarks. -size_t RunSpecifiedBenchmarks(); -size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter); -size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, - BenchmarkReporter* file_reporter); - -// Register a MemoryManager instance that will be used to collect and report -// allocation measurements for benchmark runs. -void RegisterMemoryManager(MemoryManager* memory_manager); - -// Add a key-value pair to output as part of the context stanza in the report. -void AddCustomContext(const std::string& key, const std::string& value); - -namespace internal { -class Benchmark; -class BenchmarkImp; -class BenchmarkFamilies; - -void UseCharPointer(char const volatile*); - -// Take ownership of the pointer and register the benchmark. Return the -// registered benchmark. -Benchmark* RegisterBenchmarkInternal(Benchmark*); - -// Ensure that the standard streams are properly initialized in every TU. -int InitializeStreams(); -BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams(); - -} // namespace internal - -#if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \ - defined(__EMSCRIPTEN__) -#define BENCHMARK_HAS_NO_INLINE_ASSEMBLY -#endif - -// The DoNotOptimize(...) function can be used to prevent a value or -// expression from being optimized away by the compiler. This function is -// intended to add little to no overhead. -// See: https://youtu.be/nXaxk27zwlk?t=2441 -#ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY -template -inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { - asm volatile("" : : "r,m"(value) : "memory"); -} - -template -inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) { -#if defined(__clang__) - asm volatile("" : "+r,m"(value) : : "memory"); -#else - asm volatile("" : "+m,r"(value) : : "memory"); -#endif -} - -// Force the compiler to flush pending writes to global memory. Acts as an -// effective read/write barrier -inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { - asm volatile("" : : : "memory"); -} -#elif defined(_MSC_VER) -template -inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { - internal::UseCharPointer(&reinterpret_cast(value)); - _ReadWriteBarrier(); -} - -inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); } -#else -template -inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { - internal::UseCharPointer(&reinterpret_cast(value)); -} -// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers -#endif - -// This class is used for user-defined counters. -class Counter { - public: - enum Flags { - kDefaults = 0, - // Mark the counter as a rate. It will be presented divided - // by the duration of the benchmark. - kIsRate = 1U << 0U, - // Mark the counter as a thread-average quantity. It will be - // presented divided by the number of threads. - kAvgThreads = 1U << 1U, - // Mark the counter as a thread-average rate. See above. - kAvgThreadsRate = kIsRate | kAvgThreads, - // Mark the counter as a constant value, valid/same for *every* iteration. - // When reporting, it will be *multiplied* by the iteration count. - kIsIterationInvariant = 1U << 2U, - // Mark the counter as a constant rate. - // When reporting, it will be *multiplied* by the iteration count - // and then divided by the duration of the benchmark. - kIsIterationInvariantRate = kIsRate | kIsIterationInvariant, - // Mark the counter as a iteration-average quantity. - // It will be presented divided by the number of iterations. - kAvgIterations = 1U << 3U, - // Mark the counter as a iteration-average rate. See above. - kAvgIterationsRate = kIsRate | kAvgIterations, - - // In the end, invert the result. This is always done last! - kInvert = 1U << 31U - }; - - enum OneK { - // 1'000 items per 1k - kIs1000 = 1000, - // 1'024 items per 1k - kIs1024 = 1024 - }; - - double value; - Flags flags; - OneK oneK; - - BENCHMARK_ALWAYS_INLINE - Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000) - : value(v), flags(f), oneK(k) {} - - BENCHMARK_ALWAYS_INLINE operator double const&() const { return value; } - BENCHMARK_ALWAYS_INLINE operator double&() { return value; } -}; - -// A helper for user code to create unforeseen combinations of Flags, without -// having to do this cast manually each time, or providing this operator. -Counter::Flags inline operator|(const Counter::Flags& LHS, - const Counter::Flags& RHS) { - return static_cast(static_cast(LHS) | - static_cast(RHS)); -} - -// This is the container for the user-defined counters. -typedef std::map UserCounters; - -// TimeUnit is passed to a benchmark in order to specify the order of magnitude -// for the measured time. -enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond }; - -// BigO is passed to a benchmark in order to specify the asymptotic -// computational -// complexity for the benchmark. In case oAuto is selected, complexity will be -// calculated automatically to the best fit. -enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda }; - -typedef uint64_t IterationCount; - -// BigOFunc is passed to a benchmark in order to specify the asymptotic -// computational complexity for the benchmark. -typedef double(BigOFunc)(IterationCount); - -// StatisticsFunc is passed to a benchmark in order to compute some descriptive -// statistics over all the measurements of some type -typedef double(StatisticsFunc)(const std::vector&); - -namespace internal { -struct Statistics { - std::string name_; - StatisticsFunc* compute_; - - Statistics(const std::string& name, StatisticsFunc* compute) - : name_(name), compute_(compute) {} -}; - -class BenchmarkInstance; -class ThreadTimer; -class ThreadManager; -class PerfCountersMeasurement; - -enum AggregationReportMode -#if defined(BENCHMARK_HAS_CXX11) - : unsigned -#else -#endif -{ - // The mode has not been manually specified - ARM_Unspecified = 0, - // The mode is user-specified. - // This may or may not be set when the following bit-flags are set. - ARM_Default = 1U << 0U, - // File reporter should only output aggregates. - ARM_FileReportAggregatesOnly = 1U << 1U, - // Display reporter should only output aggregates - ARM_DisplayReportAggregatesOnly = 1U << 2U, - // Both reporters should only display aggregates. - ARM_ReportAggregatesOnly = - ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly -}; - -} // namespace internal - -// State is passed to a running Benchmark and contains state for the -// benchmark to use. -class State { - public: - struct StateIterator; - friend struct StateIterator; - - // Returns iterators used to run each iteration of a benchmark using a - // C++11 ranged-based for loop. These functions should not be called directly. - // - // REQUIRES: The benchmark has not started running yet. Neither begin nor end - // have been called previously. - // - // NOTE: KeepRunning may not be used after calling either of these functions. - BENCHMARK_ALWAYS_INLINE StateIterator begin(); - BENCHMARK_ALWAYS_INLINE StateIterator end(); - - // Returns true if the benchmark should continue through another iteration. - // NOTE: A benchmark may not return from the test until KeepRunning() has - // returned false. - bool KeepRunning(); - - // Returns true iff the benchmark should run n more iterations. - // REQUIRES: 'n' > 0. - // NOTE: A benchmark must not return from the test until KeepRunningBatch() - // has returned false. - // NOTE: KeepRunningBatch() may overshoot by up to 'n' iterations. - // - // Intended usage: - // while (state.KeepRunningBatch(1000)) { - // // process 1000 elements - // } - bool KeepRunningBatch(IterationCount n); - - // REQUIRES: timer is running and 'SkipWithError(...)' has not been called - // by the current thread. - // Stop the benchmark timer. If not called, the timer will be - // automatically stopped after the last iteration of the benchmark loop. - // - // For threaded benchmarks the PauseTiming() function only pauses the timing - // for the current thread. - // - // NOTE: The "real time" measurement is per-thread. If different threads - // report different measurements the largest one is reported. - // - // NOTE: PauseTiming()/ResumeTiming() are relatively - // heavyweight, and so their use should generally be avoided - // within each benchmark iteration, if possible. - void PauseTiming(); - - // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called - // by the current thread. - // Start the benchmark timer. The timer is NOT running on entrance to the - // benchmark function. It begins running after control flow enters the - // benchmark loop. - // - // NOTE: PauseTiming()/ResumeTiming() are relatively - // heavyweight, and so their use should generally be avoided - // within each benchmark iteration, if possible. - void ResumeTiming(); - - // REQUIRES: 'SkipWithError(...)' has not been called previously by the - // current thread. - // Report the benchmark as resulting in an error with the specified 'msg'. - // After this call the user may explicitly 'return' from the benchmark. - // - // If the ranged-for style of benchmark loop is used, the user must explicitly - // break from the loop, otherwise all future iterations will be run. - // If the 'KeepRunning()' loop is used the current thread will automatically - // exit the loop at the end of the current iteration. - // - // For threaded benchmarks only the current thread stops executing and future - // calls to `KeepRunning()` will block until all threads have completed - // the `KeepRunning()` loop. If multiple threads report an error only the - // first error message is used. - // - // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit - // the current scope immediately. If the function is called from within - // the 'KeepRunning()' loop the current iteration will finish. It is the users - // responsibility to exit the scope as needed. - void SkipWithError(const char* msg); - - // Returns true if an error has been reported with 'SkipWithError(...)'. - bool error_occurred() const { return error_occurred_; } - - // REQUIRES: called exactly once per iteration of the benchmarking loop. - // Set the manually measured time for this benchmark iteration, which - // is used instead of automatically measured time if UseManualTime() was - // specified. - // - // For threaded benchmarks the final value will be set to the largest - // reported values. - void SetIterationTime(double seconds); - - // Set the number of bytes processed by the current benchmark - // execution. This routine is typically called once at the end of a - // throughput oriented benchmark. - // - // REQUIRES: a benchmark has exited its benchmarking loop. - BENCHMARK_ALWAYS_INLINE - void SetBytesProcessed(int64_t bytes) { - counters["bytes_per_second"] = - Counter(static_cast(bytes), Counter::kIsRate, Counter::kIs1024); - } - - BENCHMARK_ALWAYS_INLINE - int64_t bytes_processed() const { - if (counters.find("bytes_per_second") != counters.end()) - return static_cast(counters.at("bytes_per_second")); - return 0; - } - - // If this routine is called with complexity_n > 0 and complexity report is - // requested for the - // family benchmark, then current benchmark will be part of the computation - // and complexity_n will - // represent the length of N. - BENCHMARK_ALWAYS_INLINE - void SetComplexityN(int64_t complexity_n) { complexity_n_ = complexity_n; } - - BENCHMARK_ALWAYS_INLINE - int64_t complexity_length_n() const { return complexity_n_; } - - // If this routine is called with items > 0, then an items/s - // label is printed on the benchmark report line for the currently - // executing benchmark. It is typically called at the end of a processing - // benchmark where a processing items/second output is desired. - // - // REQUIRES: a benchmark has exited its benchmarking loop. - BENCHMARK_ALWAYS_INLINE - void SetItemsProcessed(int64_t items) { - counters["items_per_second"] = - Counter(static_cast(items), benchmark::Counter::kIsRate); - } - - BENCHMARK_ALWAYS_INLINE - int64_t items_processed() const { - if (counters.find("items_per_second") != counters.end()) - return static_cast(counters.at("items_per_second")); - return 0; - } - - // If this routine is called, the specified label is printed at the - // end of the benchmark report line for the currently executing - // benchmark. Example: - // static void BM_Compress(benchmark::State& state) { - // ... - // double compress = input_size / output_size; - // state.SetLabel(StrFormat("compress:%.1f%%", 100.0*compression)); - // } - // Produces output that looks like: - // BM_Compress 50 50 14115038 compress:27.3% - // - // REQUIRES: a benchmark has exited its benchmarking loop. - void SetLabel(const char* label); - - void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) { - this->SetLabel(str.c_str()); - } - - // Range arguments for this run. CHECKs if the argument has been set. - BENCHMARK_ALWAYS_INLINE - int64_t range(std::size_t pos = 0) const { - assert(range_.size() > pos); - return range_[pos]; - } - - BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead") - int64_t range_x() const { return range(0); } - - BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead") - int64_t range_y() const { return range(1); } - - BENCHMARK_ALWAYS_INLINE - IterationCount iterations() const { - if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) { - return 0; - } - return max_iterations - total_iterations_ + batch_leftover_; - } - - private - : // items we expect on the first cache line (ie 64 bytes of the struct) - // When total_iterations_ is 0, KeepRunning() and friends will return false. - // May be larger than max_iterations. - IterationCount total_iterations_; - - // When using KeepRunningBatch(), batch_leftover_ holds the number of - // iterations beyond max_iters that were run. Used to track - // completed_iterations_ accurately. - IterationCount batch_leftover_; - - public: - const IterationCount max_iterations; - - private: - bool started_; - bool finished_; - bool error_occurred_; - - private: // items we don't need on the first cache line - std::vector range_; - - int64_t complexity_n_; - - public: - // Container for user-defined counters. - UserCounters counters; - // Index of the executing thread. Values from [0, threads). - const int thread_index; - // Number of threads concurrently executing the benchmark. - const int threads; - - private: - State(IterationCount max_iters, const std::vector& ranges, - int thread_i, int n_threads, internal::ThreadTimer* timer, - internal::ThreadManager* manager, - internal::PerfCountersMeasurement* perf_counters_measurement); - - void StartKeepRunning(); - // Implementation of KeepRunning() and KeepRunningBatch(). - // is_batch must be true unless n is 1. - bool KeepRunningInternal(IterationCount n, bool is_batch); - void FinishKeepRunning(); - internal::ThreadTimer* const timer_; - internal::ThreadManager* const manager_; - internal::PerfCountersMeasurement* const perf_counters_measurement_; - - friend class internal::BenchmarkInstance; -}; - -inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() { - return KeepRunningInternal(1, /*is_batch=*/false); -} - -inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningBatch(IterationCount n) { - return KeepRunningInternal(n, /*is_batch=*/true); -} - -inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n, - bool is_batch) { - // total_iterations_ is set to 0 by the constructor, and always set to a - // nonzero value by StartKepRunning(). - assert(n > 0); - // n must be 1 unless is_batch is true. - assert(is_batch || n == 1); - if (BENCHMARK_BUILTIN_EXPECT(total_iterations_ >= n, true)) { - total_iterations_ -= n; - return true; - } - if (!started_) { - StartKeepRunning(); - if (!error_occurred_ && total_iterations_ >= n) { - total_iterations_ -= n; - return true; - } - } - // For non-batch runs, total_iterations_ must be 0 by now. - if (is_batch && total_iterations_ != 0) { - batch_leftover_ = n - total_iterations_; - total_iterations_ = 0; - return true; - } - FinishKeepRunning(); - return false; -} - -struct State::StateIterator { - struct BENCHMARK_UNUSED Value {}; - typedef std::forward_iterator_tag iterator_category; - typedef Value value_type; - typedef Value reference; - typedef Value pointer; - typedef std::ptrdiff_t difference_type; - - private: - friend class State; - BENCHMARK_ALWAYS_INLINE - StateIterator() : cached_(0), parent_() {} - - BENCHMARK_ALWAYS_INLINE - explicit StateIterator(State* st) - : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {} - - public: - BENCHMARK_ALWAYS_INLINE - Value operator*() const { return Value(); } - - BENCHMARK_ALWAYS_INLINE - StateIterator& operator++() { - assert(cached_ > 0); - --cached_; - return *this; - } - - BENCHMARK_ALWAYS_INLINE - bool operator!=(StateIterator const&) const { - if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true; - parent_->FinishKeepRunning(); - return false; - } - - private: - IterationCount cached_; - State* const parent_; -}; - -inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() { - return StateIterator(this); -} -inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() { - StartKeepRunning(); - return StateIterator(); -} - -namespace internal { - -typedef void(Function)(State&); - -// ------------------------------------------------------ -// Benchmark registration object. The BENCHMARK() macro expands -// into an internal::Benchmark* object. Various methods can -// be called on this object to change the properties of the benchmark. -// Each method returns "this" so that multiple method calls can -// chained into one expression. -class Benchmark { - public: - virtual ~Benchmark(); - - // Note: the following methods all return "this" so that multiple - // method calls can be chained together in one expression. - - // Specify the name of the benchmark - Benchmark* Name(const std::string& name); - - // Run this benchmark once with "x" as the extra argument passed - // to the function. - // REQUIRES: The function passed to the constructor must accept an arg1. - Benchmark* Arg(int64_t x); - - // Run this benchmark with the given time unit for the generated output report - Benchmark* Unit(TimeUnit unit); - - // Run this benchmark once for a number of values picked from the - // range [start..limit]. (start and limit are always picked.) - // REQUIRES: The function passed to the constructor must accept an arg1. - Benchmark* Range(int64_t start, int64_t limit); - - // Run this benchmark once for all values in the range [start..limit] with - // specific step - // REQUIRES: The function passed to the constructor must accept an arg1. - Benchmark* DenseRange(int64_t start, int64_t limit, int step = 1); - - // Run this benchmark once with "args" as the extra arguments passed - // to the function. - // REQUIRES: The function passed to the constructor must accept arg1, arg2 ... - Benchmark* Args(const std::vector& args); - - // Equivalent to Args({x, y}) - // NOTE: This is a legacy C++03 interface provided for compatibility only. - // New code should use 'Args'. - Benchmark* ArgPair(int64_t x, int64_t y) { - std::vector args; - args.push_back(x); - args.push_back(y); - return Args(args); - } - - // Run this benchmark once for a number of values picked from the - // ranges [start..limit]. (starts and limits are always picked.) - // REQUIRES: The function passed to the constructor must accept arg1, arg2 ... - Benchmark* Ranges(const std::vector >& ranges); - - // Run this benchmark once for each combination of values in the (cartesian) - // product of the supplied argument lists. - // REQUIRES: The function passed to the constructor must accept arg1, arg2 ... - Benchmark* ArgsProduct(const std::vector >& arglists); - - // Equivalent to ArgNames({name}) - Benchmark* ArgName(const std::string& name); - - // Set the argument names to display in the benchmark name. If not called, - // only argument values will be shown. - Benchmark* ArgNames(const std::vector& names); - - // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}). - // NOTE: This is a legacy C++03 interface provided for compatibility only. - // New code should use 'Ranges'. - Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) { - std::vector > ranges; - ranges.push_back(std::make_pair(lo1, hi1)); - ranges.push_back(std::make_pair(lo2, hi2)); - return Ranges(ranges); - } - - // Pass this benchmark object to *func, which can customize - // the benchmark by calling various methods like Arg, Args, - // Threads, etc. - Benchmark* Apply(void (*func)(Benchmark* benchmark)); - - // Set the range multiplier for non-dense range. If not called, the range - // multiplier kRangeMultiplier will be used. - Benchmark* RangeMultiplier(int multiplier); - - // Set the minimum amount of time to use when running this benchmark. This - // option overrides the `benchmark_min_time` flag. - // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark. - Benchmark* MinTime(double t); - - // Specify the amount of iterations that should be run by this benchmark. - // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark. - // - // NOTE: This function should only be used when *exact* iteration control is - // needed and never to control or limit how long a benchmark runs, where - // `--benchmark_min_time=N` or `MinTime(...)` should be used instead. - Benchmark* Iterations(IterationCount n); - - // Specify the amount of times to repeat this benchmark. This option overrides - // the `benchmark_repetitions` flag. - // REQUIRES: `n > 0` - Benchmark* Repetitions(int n); - - // Specify if each repetition of the benchmark should be reported separately - // or if only the final statistics should be reported. If the benchmark - // is not repeated then the single result is always reported. - // Applies to *ALL* reporters (display and file). - Benchmark* ReportAggregatesOnly(bool value = true); - - // Same as ReportAggregatesOnly(), but applies to display reporter only. - Benchmark* DisplayAggregatesOnly(bool value = true); - - // By default, the CPU time is measured only for the main thread, which may - // be unrepresentative if the benchmark uses threads internally. If called, - // the total CPU time spent by all the threads will be measured instead. - // By default, the only the main thread CPU time will be measured. - Benchmark* MeasureProcessCPUTime(); - - // If a particular benchmark should use the Wall clock instead of the CPU time - // (be it either the CPU time of the main thread only (default), or the - // total CPU usage of the benchmark), call this method. If called, the elapsed - // (wall) time will be used to control how many iterations are run, and in the - // printing of items/second or MB/seconds values. - // If not called, the CPU time used by the benchmark will be used. - Benchmark* UseRealTime(); - - // If a benchmark must measure time manually (e.g. if GPU execution time is - // being - // measured), call this method. If called, each benchmark iteration should - // call - // SetIterationTime(seconds) to report the measured time, which will be used - // to control how many iterations are run, and in the printing of items/second - // or MB/second values. - Benchmark* UseManualTime(); - - // Set the asymptotic computational complexity for the benchmark. If called - // the asymptotic computational complexity will be shown on the output. - Benchmark* Complexity(BigO complexity = benchmark::oAuto); - - // Set the asymptotic computational complexity for the benchmark. If called - // the asymptotic computational complexity will be shown on the output. - Benchmark* Complexity(BigOFunc* complexity); - - // Add this statistics to be computed over all the values of benchmark run - Benchmark* ComputeStatistics(std::string name, StatisticsFunc* statistics); - - // Support for running multiple copies of the same benchmark concurrently - // in multiple threads. This may be useful when measuring the scaling - // of some piece of code. - - // Run one instance of this benchmark concurrently in t threads. - Benchmark* Threads(int t); - - // Pick a set of values T from [min_threads,max_threads]. - // min_threads and max_threads are always included in T. Run this - // benchmark once for each value in T. The benchmark run for a - // particular value t consists of t threads running the benchmark - // function concurrently. For example, consider: - // BENCHMARK(Foo)->ThreadRange(1,16); - // This will run the following benchmarks: - // Foo in 1 thread - // Foo in 2 threads - // Foo in 4 threads - // Foo in 8 threads - // Foo in 16 threads - Benchmark* ThreadRange(int min_threads, int max_threads); - - // For each value n in the range, run this benchmark once using n threads. - // min_threads and max_threads are always included in the range. - // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts - // a benchmark with 1, 4, 7 and 8 threads. - Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1); - - // Equivalent to ThreadRange(NumCPUs(), NumCPUs()) - Benchmark* ThreadPerCpu(); - - virtual void Run(State& state) = 0; - - protected: - explicit Benchmark(const char* name); - Benchmark(Benchmark const&); - void SetName(const char* name); - - int ArgsCnt() const; - - private: - friend class BenchmarkFamilies; - friend class BenchmarkInstance; - - std::string name_; - AggregationReportMode aggregation_report_mode_; - std::vector arg_names_; // Args for all benchmark runs - std::vector > args_; // Args for all benchmark runs - TimeUnit time_unit_; - int range_multiplier_; - double min_time_; - IterationCount iterations_; - int repetitions_; - bool measure_process_cpu_time_; - bool use_real_time_; - bool use_manual_time_; - BigO complexity_; - BigOFunc* complexity_lambda_; - std::vector statistics_; - std::vector thread_counts_; - - Benchmark& operator=(Benchmark const&); -}; - -} // namespace internal - -// Create and register a benchmark with the specified 'name' that invokes -// the specified functor 'fn'. -// -// RETURNS: A pointer to the registered benchmark. -internal::Benchmark* RegisterBenchmark(const char* name, - internal::Function* fn); - -#if defined(BENCHMARK_HAS_CXX11) -template -internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn); -#endif - -// Remove all registered benchmarks. All pointers to previously registered -// benchmarks are invalidated. -void ClearRegisteredBenchmarks(); - -namespace internal { -// The class used to hold all Benchmarks created from static function. -// (ie those created using the BENCHMARK(...) macros. -class FunctionBenchmark : public Benchmark { - public: - FunctionBenchmark(const char* name, Function* func) - : Benchmark(name), func_(func) {} - - virtual void Run(State& st) BENCHMARK_OVERRIDE; - - private: - Function* func_; -}; - -#ifdef BENCHMARK_HAS_CXX11 -template -class LambdaBenchmark : public Benchmark { - public: - virtual void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); } - - private: - template - LambdaBenchmark(const char* name, OLambda&& lam) - : Benchmark(name), lambda_(std::forward(lam)) {} - - LambdaBenchmark(LambdaBenchmark const&) = delete; - - private: - template - friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&); - - Lambda lambda_; -}; -#endif - -} // namespace internal - -inline internal::Benchmark* RegisterBenchmark(const char* name, - internal::Function* fn) { - return internal::RegisterBenchmarkInternal( - ::new internal::FunctionBenchmark(name, fn)); -} - -#ifdef BENCHMARK_HAS_CXX11 -template -internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) { - using BenchType = - internal::LambdaBenchmark::type>; - return internal::RegisterBenchmarkInternal( - ::new BenchType(name, std::forward(fn))); -} -#endif - -#if defined(BENCHMARK_HAS_CXX11) && \ - (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409) -template -internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn, - Args&&... args) { - return benchmark::RegisterBenchmark( - name, [=](benchmark::State& st) { fn(st, args...); }); -} -#else -#define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK -#endif - -// The base class for all fixture tests. -class Fixture : public internal::Benchmark { - public: - Fixture() : internal::Benchmark("") {} - - virtual void Run(State& st) BENCHMARK_OVERRIDE { - this->SetUp(st); - this->BenchmarkCase(st); - this->TearDown(st); - } - - // These will be deprecated ... - virtual void SetUp(const State&) {} - virtual void TearDown(const State&) {} - // ... In favor of these. - virtual void SetUp(State& st) { SetUp(const_cast(st)); } - virtual void TearDown(State& st) { TearDown(const_cast(st)); } - - protected: - virtual void BenchmarkCase(State&) = 0; -}; - -} // namespace benchmark - -// ------------------------------------------------------ -// Macro to register benchmarks - -// Check that __COUNTER__ is defined and that __COUNTER__ increases by 1 -// every time it is expanded. X + 1 == X + 0 is used in case X is defined to be -// empty. If X is empty the expression becomes (+1 == +0). -#if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0) -#define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__ -#else -#define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__ -#endif - -// Helpers for generating unique variable names -#define BENCHMARK_PRIVATE_NAME(n) \ - BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, n) -#define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c) -#define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c -// Helper for concatenation with macro name expansion -#define BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method) \ - BaseClass##_##Method##_Benchmark - -#define BENCHMARK_PRIVATE_DECLARE(n) \ - static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \ - BENCHMARK_UNUSED - -#define BENCHMARK(n) \ - BENCHMARK_PRIVATE_DECLARE(n) = \ - (::benchmark::internal::RegisterBenchmarkInternal( \ - new ::benchmark::internal::FunctionBenchmark(#n, n))) - -// Old-style macros -#define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a)) -#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)}) -#define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t)) -#define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi)) -#define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \ - BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}}) - -#ifdef BENCHMARK_HAS_CXX11 - -// Register a benchmark which invokes the function specified by `func` -// with the additional arguments specified by `...`. -// -// For example: -// -// template ` -// void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) { -// [...] -//} -// /* Registers a benchmark named "BM_takes_args/int_string_test` */ -// BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); -#define BENCHMARK_CAPTURE(func, test_case_name, ...) \ - BENCHMARK_PRIVATE_DECLARE(func) = \ - (::benchmark::internal::RegisterBenchmarkInternal( \ - new ::benchmark::internal::FunctionBenchmark( \ - #func "/" #test_case_name, \ - [](::benchmark::State& st) { func(st, __VA_ARGS__); }))) - -#endif // BENCHMARK_HAS_CXX11 - -// This will register a benchmark for a templatized function. For example: -// -// template -// void BM_Foo(int iters); -// -// BENCHMARK_TEMPLATE(BM_Foo, 1); -// -// will register BM_Foo<1> as a benchmark. -#define BENCHMARK_TEMPLATE1(n, a) \ - BENCHMARK_PRIVATE_DECLARE(n) = \ - (::benchmark::internal::RegisterBenchmarkInternal( \ - new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n))) - -#define BENCHMARK_TEMPLATE2(n, a, b) \ - BENCHMARK_PRIVATE_DECLARE(n) = \ - (::benchmark::internal::RegisterBenchmarkInternal( \ - new ::benchmark::internal::FunctionBenchmark(#n "<" #a "," #b ">", \ - n))) - -#ifdef BENCHMARK_HAS_CXX11 -#define BENCHMARK_TEMPLATE(n, ...) \ - BENCHMARK_PRIVATE_DECLARE(n) = \ - (::benchmark::internal::RegisterBenchmarkInternal( \ - new ::benchmark::internal::FunctionBenchmark( \ - #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>))) -#else -#define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a) -#endif - -#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ - class BaseClass##_##Method##_Benchmark : public BaseClass { \ - public: \ - BaseClass##_##Method##_Benchmark() : BaseClass() { \ - this->SetName(#BaseClass "/" #Method); \ - } \ - \ - protected: \ - virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ - }; - -#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ - class BaseClass##_##Method##_Benchmark : public BaseClass { \ - public: \ - BaseClass##_##Method##_Benchmark() : BaseClass() { \ - this->SetName(#BaseClass "<" #a ">/" #Method); \ - } \ - \ - protected: \ - virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ - }; - -#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ - class BaseClass##_##Method##_Benchmark : public BaseClass { \ - public: \ - BaseClass##_##Method##_Benchmark() : BaseClass() { \ - this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \ - } \ - \ - protected: \ - virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ - }; - -#ifdef BENCHMARK_HAS_CXX11 -#define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...) \ - class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \ - public: \ - BaseClass##_##Method##_Benchmark() : BaseClass<__VA_ARGS__>() { \ - this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); \ - } \ - \ - protected: \ - virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ - }; -#else -#define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \ - BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(n, a) -#endif - -#define BENCHMARK_DEFINE_F(BaseClass, Method) \ - BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ - void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase - -#define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) \ - BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ - void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase - -#define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b) \ - BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ - void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase - -#ifdef BENCHMARK_HAS_CXX11 -#define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...) \ - BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \ - void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase -#else -#define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, a) \ - BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) -#endif - -#define BENCHMARK_REGISTER_F(BaseClass, Method) \ - BENCHMARK_PRIVATE_REGISTER_F(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)) - -#define BENCHMARK_PRIVATE_REGISTER_F(TestName) \ - BENCHMARK_PRIVATE_DECLARE(TestName) = \ - (::benchmark::internal::RegisterBenchmarkInternal(new TestName())) - -// This macro will define and register a benchmark within a fixture class. -#define BENCHMARK_F(BaseClass, Method) \ - BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ - BENCHMARK_REGISTER_F(BaseClass, Method); \ - void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase - -#define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) \ - BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ - BENCHMARK_REGISTER_F(BaseClass, Method); \ - void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase - -#define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b) \ - BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ - BENCHMARK_REGISTER_F(BaseClass, Method); \ - void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase - -#ifdef BENCHMARK_HAS_CXX11 -#define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...) \ - BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \ - BENCHMARK_REGISTER_F(BaseClass, Method); \ - void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase -#else -#define BENCHMARK_TEMPLATE_F(BaseClass, Method, a) \ - BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) -#endif - -// Helper macro to create a main routine in a test that runs the benchmarks -#define BENCHMARK_MAIN() \ - int main(int argc, char** argv) { \ - ::benchmark::Initialize(&argc, argv); \ - if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \ - ::benchmark::RunSpecifiedBenchmarks(); \ - ::benchmark::Shutdown(); \ - return 0; \ - } \ - int main(int, char**) - -// ------------------------------------------------------ -// Benchmark Reporters - -namespace benchmark { - -struct CPUInfo { - struct CacheInfo { - std::string type; - int level; - int size; - int num_sharing; - }; - - enum Scaling { - UNKNOWN, - ENABLED, - DISABLED - }; - - int num_cpus; - Scaling scaling; - double cycles_per_second; - std::vector caches; - std::vector load_avg; - - static const CPUInfo& Get(); - - private: - CPUInfo(); - BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo); -}; - -// Adding Struct for System Information -struct SystemInfo { - std::string name; - static const SystemInfo& Get(); - - private: - SystemInfo(); - BENCHMARK_DISALLOW_COPY_AND_ASSIGN(SystemInfo); -}; - -// BenchmarkName contains the components of the Benchmark's name -// which allows individual fields to be modified or cleared before -// building the final name using 'str()'. -struct BenchmarkName { - std::string function_name; - std::string args; - std::string min_time; - std::string iterations; - std::string repetitions; - std::string time_type; - std::string threads; - - // Return the full name of the benchmark with each non-empty - // field separated by a '/' - std::string str() const; -}; - -// Interface for custom benchmark result printers. -// By default, benchmark reports are printed to stdout. However an application -// can control the destination of the reports by calling -// RunSpecifiedBenchmarks and passing it a custom reporter object. -// The reporter object must implement the following interface. -class BenchmarkReporter { - public: - struct Context { - CPUInfo const& cpu_info; - SystemInfo const& sys_info; - // The number of chars in the longest benchmark name. - size_t name_field_width; - static const char* executable_name; - Context(); - }; - - struct Run { - static const int64_t no_repetition_index = -1; - enum RunType { RT_Iteration, RT_Aggregate }; - - Run() - : run_type(RT_Iteration), - error_occurred(false), - iterations(1), - threads(1), - time_unit(kNanosecond), - real_accumulated_time(0), - cpu_accumulated_time(0), - max_heapbytes_used(0), - complexity(oNone), - complexity_lambda(), - complexity_n(0), - report_big_o(false), - report_rms(false), - counters(), - has_memory_result(false), - allocs_per_iter(0.0), - max_bytes_used(0) {} - - std::string benchmark_name() const; - BenchmarkName run_name; - int64_t family_index; - int64_t per_family_instance_index; - RunType run_type; - std::string aggregate_name; - std::string report_label; // Empty if not set by benchmark. - bool error_occurred; - std::string error_message; - - IterationCount iterations; - int64_t threads; - int64_t repetition_index; - int64_t repetitions; - TimeUnit time_unit; - double real_accumulated_time; - double cpu_accumulated_time; - - // Return a value representing the real time per iteration in the unit - // specified by 'time_unit'. - // NOTE: If 'iterations' is zero the returned value represents the - // accumulated time. - double GetAdjustedRealTime() const; - - // Return a value representing the cpu time per iteration in the unit - // specified by 'time_unit'. - // NOTE: If 'iterations' is zero the returned value represents the - // accumulated time. - double GetAdjustedCPUTime() const; - - // This is set to 0.0 if memory tracing is not enabled. - double max_heapbytes_used; - - // Keep track of arguments to compute asymptotic complexity - BigO complexity; - BigOFunc* complexity_lambda; - int64_t complexity_n; - - // what statistics to compute from the measurements - const std::vector* statistics; - - // Inform print function whether the current run is a complexity report - bool report_big_o; - bool report_rms; - - UserCounters counters; - - // Memory metrics. - bool has_memory_result; - double allocs_per_iter; - int64_t max_bytes_used; - }; - - struct PerFamilyRunReports { - PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {} - - // How many runs will all instances of this benchmark perform? - int num_runs_total; - - // How many runs have happened already? - int num_runs_done; - - // The reports about (non-errneous!) runs of this family. - std::vector Runs; - }; - - // Construct a BenchmarkReporter with the output stream set to 'std::cout' - // and the error stream set to 'std::cerr' - BenchmarkReporter(); - - // Called once for every suite of benchmarks run. - // The parameter "context" contains information that the - // reporter may wish to use when generating its report, for example the - // platform under which the benchmarks are running. The benchmark run is - // never started if this function returns false, allowing the reporter - // to skip runs based on the context information. - virtual bool ReportContext(const Context& context) = 0; - - // Called once for each group of benchmark runs, gives information about - // cpu-time and heap memory usage during the benchmark run. If the group - // of runs contained more than two entries then 'report' contains additional - // elements representing the mean and standard deviation of those runs. - // Additionally if this group of runs was the last in a family of benchmarks - // 'reports' contains additional entries representing the asymptotic - // complexity and RMS of that benchmark family. - virtual void ReportRuns(const std::vector& report) = 0; - - // Called once and only once after ever group of benchmarks is run and - // reported. - virtual void Finalize() {} - - // REQUIRES: The object referenced by 'out' is valid for the lifetime - // of the reporter. - void SetOutputStream(std::ostream* out) { - assert(out); - output_stream_ = out; - } - - // REQUIRES: The object referenced by 'err' is valid for the lifetime - // of the reporter. - void SetErrorStream(std::ostream* err) { - assert(err); - error_stream_ = err; - } - - std::ostream& GetOutputStream() const { return *output_stream_; } - - std::ostream& GetErrorStream() const { return *error_stream_; } - - virtual ~BenchmarkReporter(); - - // Write a human readable string to 'out' representing the specified - // 'context'. - // REQUIRES: 'out' is non-null. - static void PrintBasicContext(std::ostream* out, Context const& context); - - private: - std::ostream* output_stream_; - std::ostream* error_stream_; -}; - -// Simple reporter that outputs benchmark data to the console. This is the -// default reporter used by RunSpecifiedBenchmarks(). -class ConsoleReporter : public BenchmarkReporter { - public: - enum OutputOptions { - OO_None = 0, - OO_Color = 1, - OO_Tabular = 2, - OO_ColorTabular = OO_Color | OO_Tabular, - OO_Defaults = OO_ColorTabular - }; - explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults) - : output_options_(opts_), - name_field_width_(0), - prev_counters_(), - printed_header_(false) {} - - virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; - virtual void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; - - protected: - virtual void PrintRunData(const Run& report); - virtual void PrintHeader(const Run& report); - - OutputOptions output_options_; - size_t name_field_width_; - UserCounters prev_counters_; - bool printed_header_; -}; - -class JSONReporter : public BenchmarkReporter { - public: - JSONReporter() : first_report_(true) {} - virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; - virtual void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; - virtual void Finalize() BENCHMARK_OVERRIDE; - - private: - void PrintRunData(const Run& report); - - bool first_report_; -}; - -class BENCHMARK_DEPRECATED_MSG( - "The CSV Reporter will be removed in a future release") CSVReporter - : public BenchmarkReporter { - public: - CSVReporter() : printed_header_(false) {} - virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; - virtual void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; - - private: - void PrintRunData(const Run& report); - - bool printed_header_; - std::set user_counter_names_; -}; - -// If a MemoryManager is registered, it can be used to collect and report -// allocation metrics for a run of the benchmark. -class MemoryManager { - public: - struct Result { - Result() : num_allocs(0), max_bytes_used(0) {} - - // The number of allocations made in total between Start and Stop. - int64_t num_allocs; - - // The peak memory use between Start and Stop. - int64_t max_bytes_used; - }; - - virtual ~MemoryManager() {} - - // Implement this to start recording allocation information. - virtual void Start() = 0; - - // Implement this to stop recording and fill out the given Result structure. - virtual void Stop(Result* result) = 0; -}; - -inline const char* GetTimeUnitString(TimeUnit unit) { - switch (unit) { - case kSecond: - return "s"; - case kMillisecond: - return "ms"; - case kMicrosecond: - return "us"; - case kNanosecond: - return "ns"; - } - BENCHMARK_UNREACHABLE(); -} - -inline double GetTimeUnitMultiplier(TimeUnit unit) { - switch (unit) { - case kSecond: - return 1; - case kMillisecond: - return 1e3; - case kMicrosecond: - return 1e6; - case kNanosecond: - return 1e9; - } - BENCHMARK_UNREACHABLE(); -} - -} // namespace benchmark - -#endif // BENCHMARK_BENCHMARK_H_ diff --git a/libcxx/utils/google-benchmark/requirements.txt b/libcxx/utils/google-benchmark/requirements.txt deleted file mode 100644 index 85e898604068..000000000000 --- a/libcxx/utils/google-benchmark/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -numpy == 1.19.4 -scipy == 1.5.4 diff --git a/libcxx/utils/google-benchmark/setup.py b/libcxx/utils/google-benchmark/setup.py deleted file mode 100644 index 5cdab10cf77c..000000000000 --- a/libcxx/utils/google-benchmark/setup.py +++ /dev/null @@ -1,140 +0,0 @@ -import os -import posixpath -import re -import shutil -import sys - -from distutils import sysconfig -import setuptools -from setuptools.command import build_ext - - -HERE = os.path.dirname(os.path.abspath(__file__)) - - -IS_WINDOWS = sys.platform.startswith("win") - - -def _get_version(): - """Parse the version string from __init__.py.""" - with open( - os.path.join(HERE, "bindings", "python", "google_benchmark", "__init__.py") - ) as init_file: - try: - version_line = next( - line for line in init_file if line.startswith("__version__") - ) - except StopIteration: - raise ValueError("__version__ not defined in __init__.py") - else: - namespace = {} - exec(version_line, namespace) # pylint: disable=exec-used - return namespace["__version__"] - - -def _parse_requirements(path): - with open(os.path.join(HERE, path)) as requirements: - return [ - line.rstrip() - for line in requirements - if not (line.isspace() or line.startswith("#")) - ] - - -class BazelExtension(setuptools.Extension): - """A C/C++ extension that is defined as a Bazel BUILD target.""" - - def __init__(self, name, bazel_target): - self.bazel_target = bazel_target - self.relpath, self.target_name = posixpath.relpath(bazel_target, "//").split( - ":" - ) - setuptools.Extension.__init__(self, name, sources=[]) - - -class BuildBazelExtension(build_ext.build_ext): - """A command that runs Bazel to build a C/C++ extension.""" - - def run(self): - for ext in self.extensions: - self.bazel_build(ext) - build_ext.build_ext.run(self) - - def bazel_build(self, ext): - """Runs the bazel build to create the package.""" - with open("WORKSPACE", "r") as workspace: - workspace_contents = workspace.read() - - with open("WORKSPACE", "w") as workspace: - workspace.write( - re.sub( - r'(?<=path = ").*(?=", # May be overwritten by setup\.py\.)', - sysconfig.get_python_inc().replace(os.path.sep, posixpath.sep), - workspace_contents, - ) - ) - - if not os.path.exists(self.build_temp): - os.makedirs(self.build_temp) - - bazel_argv = [ - "bazel", - "build", - ext.bazel_target, - "--symlink_prefix=" + os.path.join(self.build_temp, "bazel-"), - "--compilation_mode=" + ("dbg" if self.debug else "opt"), - ] - - if IS_WINDOWS: - # Link with python*.lib. - for library_dir in self.library_dirs: - bazel_argv.append("--linkopt=/LIBPATH:" + library_dir) - - self.spawn(bazel_argv) - - shared_lib_suffix = '.dll' if IS_WINDOWS else '.so' - ext_bazel_bin_path = os.path.join( - self.build_temp, 'bazel-bin', - ext.relpath, ext.target_name + shared_lib_suffix) - - ext_dest_path = self.get_ext_fullpath(ext.name) - ext_dest_dir = os.path.dirname(ext_dest_path) - if not os.path.exists(ext_dest_dir): - os.makedirs(ext_dest_dir) - shutil.copyfile(ext_bazel_bin_path, ext_dest_path) - - -setuptools.setup( - name="google_benchmark", - version=_get_version(), - url="https://github.com/google/benchmark", - description="A library to benchmark code snippets.", - author="Google", - author_email="benchmark-py@google.com", - # Contained modules and scripts. - package_dir={"": "bindings/python"}, - packages=setuptools.find_packages("bindings/python"), - install_requires=_parse_requirements("bindings/python/requirements.txt"), - cmdclass=dict(build_ext=BuildBazelExtension), - ext_modules=[ - BazelExtension( - "google_benchmark._benchmark", - "//bindings/python/google_benchmark:_benchmark", - ) - ], - zip_safe=False, - # PyPI package information. - classifiers=[ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Topic :: Software Development :: Testing", - "Topic :: System :: Benchmark", - ], - license="Apache 2.0", - keywords="benchmark", -) diff --git a/libcxx/utils/google-benchmark/src/CMakeLists.txt b/libcxx/utils/google-benchmark/src/CMakeLists.txt deleted file mode 100644 index a6c8e9a7a0b7..000000000000 --- a/libcxx/utils/google-benchmark/src/CMakeLists.txt +++ /dev/null @@ -1,120 +0,0 @@ -# Allow the source files to find headers in src/ -include(GNUInstallDirs) -include_directories(${PROJECT_SOURCE_DIR}/src) - -if (DEFINED BENCHMARK_CXX_LINKER_FLAGS) - list(APPEND CMAKE_SHARED_LINKER_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}) - list(APPEND CMAKE_MODULE_LINKER_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}) -endif() - -file(GLOB - SOURCE_FILES - *.cc - ${PROJECT_SOURCE_DIR}/include/benchmark/*.h - ${CMAKE_CURRENT_SOURCE_DIR}/*.h) -file(GLOB BENCHMARK_MAIN "benchmark_main.cc") -foreach(item ${BENCHMARK_MAIN}) - list(REMOVE_ITEM SOURCE_FILES "${item}") -endforeach() - -add_library(benchmark ${SOURCE_FILES}) -add_library(benchmark::benchmark ALIAS benchmark) -set_target_properties(benchmark PROPERTIES - OUTPUT_NAME "benchmark" - VERSION ${GENERIC_LIB_VERSION} - SOVERSION ${GENERIC_LIB_SOVERSION} -) -target_include_directories(benchmark PUBLIC - $ - ) - -# libpfm, if available -if (HAVE_LIBPFM) - target_link_libraries(benchmark libpfm.a) - add_definitions(-DHAVE_LIBPFM) -endif() - -# Link threads. -target_link_libraries(benchmark ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) -find_library(LIBRT rt) -if(LIBRT) - target_link_libraries(benchmark ${LIBRT}) -endif() - -if(CMAKE_BUILD_TYPE) - string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER) -endif() -if(NOT CMAKE_THREAD_LIBS_INIT AND "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}" MATCHES ".*-fsanitize=[^ ]*address.*") - message(WARNING "CMake's FindThreads.cmake did not fail, but CMAKE_THREAD_LIBS_INIT ended up being empty. This was fixed in https://github.com/Kitware/CMake/commit/d53317130e84898c5328c237186dbd995aaf1c12 Let's guess that -pthread is sufficient.") - target_link_libraries(benchmark -pthread) -endif() - -# We need extra libraries on Windows -if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") - target_link_libraries(benchmark shlwapi) -endif() - -# We need extra libraries on Solaris -if(${CMAKE_SYSTEM_NAME} MATCHES "SunOS") - target_link_libraries(benchmark kstat) -endif() - -# Benchmark main library -add_library(benchmark_main "benchmark_main.cc") -add_library(benchmark::benchmark_main ALIAS benchmark_main) -set_target_properties(benchmark_main PROPERTIES - OUTPUT_NAME "benchmark_main" - VERSION ${GENERIC_LIB_VERSION} - SOVERSION ${GENERIC_LIB_SOVERSION} -) -target_include_directories(benchmark PUBLIC - $ - ) -target_link_libraries(benchmark_main benchmark::benchmark) - - -set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated") - -set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake") -set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake") -set(pkg_config "${generated_dir}/${PROJECT_NAME}.pc") -set(targets_export_name "${PROJECT_NAME}Targets") - -set(namespace "${PROJECT_NAME}::") - -include(CMakePackageConfigHelpers) -write_basic_package_version_file( - "${version_config}" VERSION ${GENERIC_LIB_VERSION} COMPATIBILITY SameMajorVersion -) - -configure_file("${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in" "${project_config}" @ONLY) -configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark.pc.in" "${pkg_config}" @ONLY) - -if (BENCHMARK_ENABLE_INSTALL) - # Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable) - install( - TARGETS benchmark benchmark_main - EXPORT ${targets_export_name} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) - - install( - DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark" - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} - FILES_MATCHING PATTERN "*.*h") - - install( - FILES "${project_config}" "${version_config}" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") - - install( - FILES "${pkg_config}" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig") - - install( - EXPORT "${targets_export_name}" - NAMESPACE "${namespace}" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") -endif() diff --git a/libcxx/utils/google-benchmark/src/arraysize.h b/libcxx/utils/google-benchmark/src/arraysize.h deleted file mode 100644 index 51a50f2dff27..000000000000 --- a/libcxx/utils/google-benchmark/src/arraysize.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef BENCHMARK_ARRAYSIZE_H_ -#define BENCHMARK_ARRAYSIZE_H_ - -#include "internal_macros.h" - -namespace benchmark { -namespace internal { -// The arraysize(arr) macro returns the # of elements in an array arr. -// The expression is a compile-time constant, and therefore can be -// used in defining new arrays, for example. If you use arraysize on -// a pointer by mistake, you will get a compile-time error. -// - -// This template function declaration is used in defining arraysize. -// Note that the function doesn't need an implementation, as we only -// use its type. -template -char (&ArraySizeHelper(T (&array)[N]))[N]; - -// That gcc wants both of these prototypes seems mysterious. VC, for -// its part, can't decide which to use (another mystery). Matching of -// template overloads: the final frontier. -#ifndef COMPILER_MSVC -template -char (&ArraySizeHelper(const T (&array)[N]))[N]; -#endif - -#define arraysize(array) (sizeof(::benchmark::internal::ArraySizeHelper(array))) - -} // end namespace internal -} // end namespace benchmark - -#endif // BENCHMARK_ARRAYSIZE_H_ diff --git a/libcxx/utils/google-benchmark/src/benchmark.cc b/libcxx/utils/google-benchmark/src/benchmark.cc deleted file mode 100644 index 89f64967bf18..000000000000 --- a/libcxx/utils/google-benchmark/src/benchmark.cc +++ /dev/null @@ -1,617 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "benchmark/benchmark.h" - -#include "benchmark_api_internal.h" -#include "benchmark_runner.h" -#include "internal_macros.h" - -#ifndef BENCHMARK_OS_WINDOWS -#ifndef BENCHMARK_OS_FUCHSIA -#include -#endif -#include -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "check.h" -#include "colorprint.h" -#include "commandlineflags.h" -#include "complexity.h" -#include "counter.h" -#include "internal_macros.h" -#include "log.h" -#include "mutex.h" -#include "perf_counters.h" -#include "re.h" -#include "statistics.h" -#include "string_util.h" -#include "thread_manager.h" -#include "thread_timer.h" - -// Print a list of benchmarks. This option overrides all other options. -DEFINE_bool(benchmark_list_tests, false); - -// A regular expression that specifies the set of benchmarks to execute. If -// this flag is empty, or if this flag is the string \"all\", all benchmarks -// linked into the binary are run. -DEFINE_string(benchmark_filter, "."); - -// Minimum number of seconds we should run benchmark before results are -// considered significant. For cpu-time based tests, this is the lower bound -// on the total cpu time used by all threads that make up the test. For -// real-time based tests, this is the lower bound on the elapsed time of the -// benchmark execution, regardless of number of threads. -DEFINE_double(benchmark_min_time, 0.5); - -// The number of runs of each benchmark. If greater than 1, the mean and -// standard deviation of the runs will be reported. -DEFINE_int32(benchmark_repetitions, 1); - -// If set, enable random interleaving of repetitions of all benchmarks. -// See http://github.com/google/benchmark/issues/1051 for details. -DEFINE_bool(benchmark_enable_random_interleaving, false); - -// Report the result of each benchmark repetitions. When 'true' is specified -// only the mean, standard deviation, and other statistics are reported for -// repeated benchmarks. Affects all reporters. -DEFINE_bool(benchmark_report_aggregates_only, false); - -// Display the result of each benchmark repetitions. When 'true' is specified -// only the mean, standard deviation, and other statistics are displayed for -// repeated benchmarks. Unlike benchmark_report_aggregates_only, only affects -// the display reporter, but *NOT* file reporter, which will still contain -// all the output. -DEFINE_bool(benchmark_display_aggregates_only, false); - -// The format to use for console output. -// Valid values are 'console', 'json', or 'csv'. -DEFINE_string(benchmark_format, "console"); - -// The format to use for file output. -// Valid values are 'console', 'json', or 'csv'. -DEFINE_string(benchmark_out_format, "json"); - -// The file to write additional output to. -DEFINE_string(benchmark_out, ""); - -// Whether to use colors in the output. Valid values: -// 'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use colors if -// the output is being sent to a terminal and the TERM environment variable is -// set to a terminal type that supports colors. -DEFINE_string(benchmark_color, "auto"); - -// Whether to use tabular format when printing user counters to the console. -// Valid values: 'true'/'yes'/1, 'false'/'no'/0. Defaults to false. -DEFINE_bool(benchmark_counters_tabular, false); - -// The level of verbose logging to output -DEFINE_int32(v, 0); - -// List of additional perf counters to collect, in libpfm format. For more -// information about libpfm: https://man7.org/linux/man-pages/man3/libpfm.3.html -DEFINE_string(benchmark_perf_counters, ""); - -namespace benchmark { -namespace internal { - -// Extra context to include in the output formatted as comma-separated key-value -// pairs. Kept internal as it's only used for parsing from env/command line. -DEFINE_kvpairs(benchmark_context, {}); - -std::map* global_context = nullptr; - -// FIXME: wouldn't LTO mess this up? -void UseCharPointer(char const volatile*) {} - -} // namespace internal - -State::State(IterationCount max_iters, const std::vector& ranges, - int thread_i, int n_threads, internal::ThreadTimer* timer, - internal::ThreadManager* manager, - internal::PerfCountersMeasurement* perf_counters_measurement) - : total_iterations_(0), - batch_leftover_(0), - max_iterations(max_iters), - started_(false), - finished_(false), - error_occurred_(false), - range_(ranges), - complexity_n_(0), - counters(), - thread_index(thread_i), - threads(n_threads), - timer_(timer), - manager_(manager), - perf_counters_measurement_(perf_counters_measurement) { - CHECK(max_iterations != 0) << "At least one iteration must be run"; - CHECK_LT(thread_index, threads) << "thread_index must be less than threads"; - - // Note: The use of offsetof below is technically undefined until C++17 - // because State is not a standard layout type. However, all compilers - // currently provide well-defined behavior as an extension (which is - // demonstrated since constexpr evaluation must diagnose all undefined - // behavior). However, GCC and Clang also warn about this use of offsetof, - // which must be suppressed. -#if defined(__INTEL_COMPILER) -#pragma warning push -#pragma warning(disable : 1875) -#elif defined(__GNUC__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Winvalid-offsetof" -#endif - // Offset tests to ensure commonly accessed data is on the first cache line. - const int cache_line_size = 64; - static_assert(offsetof(State, error_occurred_) <= - (cache_line_size - sizeof(error_occurred_)), - ""); -#if defined(__INTEL_COMPILER) -#pragma warning pop -#elif defined(__GNUC__) -#pragma GCC diagnostic pop -#endif -} - -void State::PauseTiming() { - // Add in time accumulated so far - CHECK(started_ && !finished_ && !error_occurred_); - timer_->StopTimer(); - if (perf_counters_measurement_) { - auto measurements = perf_counters_measurement_->StopAndGetMeasurements(); - for (const auto& name_and_measurement : measurements) { - auto name = name_and_measurement.first; - auto measurement = name_and_measurement.second; - CHECK_EQ(counters[name], 0.0); - counters[name] = Counter(measurement, Counter::kAvgIterations); - } - } -} - -void State::ResumeTiming() { - CHECK(started_ && !finished_ && !error_occurred_); - timer_->StartTimer(); - if (perf_counters_measurement_) { - perf_counters_measurement_->Start(); - } -} - -void State::SkipWithError(const char* msg) { - CHECK(msg); - error_occurred_ = true; - { - MutexLock l(manager_->GetBenchmarkMutex()); - if (manager_->results.has_error_ == false) { - manager_->results.error_message_ = msg; - manager_->results.has_error_ = true; - } - } - total_iterations_ = 0; - if (timer_->running()) timer_->StopTimer(); -} - -void State::SetIterationTime(double seconds) { - timer_->SetIterationTime(seconds); -} - -void State::SetLabel(const char* label) { - MutexLock l(manager_->GetBenchmarkMutex()); - manager_->results.report_label_ = label; -} - -void State::StartKeepRunning() { - CHECK(!started_ && !finished_); - started_ = true; - total_iterations_ = error_occurred_ ? 0 : max_iterations; - manager_->StartStopBarrier(); - if (!error_occurred_) ResumeTiming(); -} - -void State::FinishKeepRunning() { - CHECK(started_ && (!finished_ || error_occurred_)); - if (!error_occurred_) { - PauseTiming(); - } - // Total iterations has now wrapped around past 0. Fix this. - total_iterations_ = 0; - finished_ = true; - manager_->StartStopBarrier(); -} - -namespace internal { -namespace { - -// Flushes streams after invoking reporter methods that write to them. This -// ensures users get timely updates even when streams are not line-buffered. -void FlushStreams(BenchmarkReporter* reporter) { - if (!reporter) return; - std::flush(reporter->GetOutputStream()); - std::flush(reporter->GetErrorStream()); -} - -// Reports in both display and file reporters. -void Report(BenchmarkReporter* display_reporter, - BenchmarkReporter* file_reporter, const RunResults& run_results) { - auto report_one = [](BenchmarkReporter* reporter, bool aggregates_only, - const RunResults& results) { - assert(reporter); - // If there are no aggregates, do output non-aggregates. - aggregates_only &= !results.aggregates_only.empty(); - if (!aggregates_only) reporter->ReportRuns(results.non_aggregates); - if (!results.aggregates_only.empty()) - reporter->ReportRuns(results.aggregates_only); - }; - - report_one(display_reporter, run_results.display_report_aggregates_only, - run_results); - if (file_reporter) - report_one(file_reporter, run_results.file_report_aggregates_only, - run_results); - - FlushStreams(display_reporter); - FlushStreams(file_reporter); -} - -void RunBenchmarks(const std::vector& benchmarks, - BenchmarkReporter* display_reporter, - BenchmarkReporter* file_reporter) { - // Note the file_reporter can be null. - CHECK(display_reporter != nullptr); - - // Determine the width of the name field using a minimum width of 10. - bool might_have_aggregates = FLAGS_benchmark_repetitions > 1; - size_t name_field_width = 10; - size_t stat_field_width = 0; - for (const BenchmarkInstance& benchmark : benchmarks) { - name_field_width = - std::max(name_field_width, benchmark.name().str().size()); - might_have_aggregates |= benchmark.repetitions() > 1; - - for (const auto& Stat : benchmark.statistics()) - stat_field_width = std::max(stat_field_width, Stat.name_.size()); - } - if (might_have_aggregates) name_field_width += 1 + stat_field_width; - - // Print header here - BenchmarkReporter::Context context; - context.name_field_width = name_field_width; - - // Keep track of running times of all instances of each benchmark family. - std::map - per_family_reports; - - if (display_reporter->ReportContext(context) && - (!file_reporter || file_reporter->ReportContext(context))) { - FlushStreams(display_reporter); - FlushStreams(file_reporter); - - size_t num_repetitions_total = 0; - - std::vector runners; - runners.reserve(benchmarks.size()); - for (const BenchmarkInstance& benchmark : benchmarks) { - BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr; - if (benchmark.complexity() != oNone) - reports_for_family = &per_family_reports[benchmark.family_index()]; - - runners.emplace_back(benchmark, reports_for_family); - int num_repeats_of_this_instance = runners.back().GetNumRepeats(); - num_repetitions_total += num_repeats_of_this_instance; - if (reports_for_family) - reports_for_family->num_runs_total += num_repeats_of_this_instance; - } - assert(runners.size() == benchmarks.size() && "Unexpected runner count."); - - std::vector repetition_indices; - repetition_indices.reserve(num_repetitions_total); - for (size_t runner_index = 0, num_runners = runners.size(); - runner_index != num_runners; ++runner_index) { - const internal::BenchmarkRunner& runner = runners[runner_index]; - std::fill_n(std::back_inserter(repetition_indices), - runner.GetNumRepeats(), runner_index); - } - assert(repetition_indices.size() == num_repetitions_total && - "Unexpected number of repetition indexes."); - - if (FLAGS_benchmark_enable_random_interleaving) { - std::random_device rd; - std::mt19937 g(rd()); - std::shuffle(repetition_indices.begin(), repetition_indices.end(), g); - } - - for (size_t repetition_index : repetition_indices) { - internal::BenchmarkRunner& runner = runners[repetition_index]; - runner.DoOneRepetition(); - if (runner.HasRepeatsRemaining()) continue; - // FIXME: report each repetition separately, not all of them in bulk. - - RunResults run_results = runner.GetResults(); - - // Maybe calculate complexity report - if (const auto* reports_for_family = runner.GetReportsForFamily()) { - if (reports_for_family->num_runs_done == - reports_for_family->num_runs_total) { - auto additional_run_stats = ComputeBigO(reports_for_family->Runs); - run_results.aggregates_only.insert(run_results.aggregates_only.end(), - additional_run_stats.begin(), - additional_run_stats.end()); - per_family_reports.erase( - (int)reports_for_family->Runs.front().family_index); - } - } - - Report(display_reporter, file_reporter, run_results); - } - } - display_reporter->Finalize(); - if (file_reporter) file_reporter->Finalize(); - FlushStreams(display_reporter); - FlushStreams(file_reporter); -} - -// Disable deprecated warnings temporarily because we need to reference -// CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif - -std::unique_ptr CreateReporter( - std::string const& name, ConsoleReporter::OutputOptions output_opts) { - typedef std::unique_ptr PtrType; - if (name == "console") { - return PtrType(new ConsoleReporter(output_opts)); - } else if (name == "json") { - return PtrType(new JSONReporter); - } else if (name == "csv") { - return PtrType(new CSVReporter); - } else { - std::cerr << "Unexpected format: '" << name << "'\n"; - std::exit(1); - } -} - -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif - -} // end namespace - -bool IsZero(double n) { - return std::abs(n) < std::numeric_limits::epsilon(); -} - -ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) { - int output_opts = ConsoleReporter::OO_Defaults; - auto is_benchmark_color = [force_no_color]() -> bool { - if (force_no_color) { - return false; - } - if (FLAGS_benchmark_color == "auto") { - return IsColorTerminal(); - } - return IsTruthyFlagValue(FLAGS_benchmark_color); - }; - if (is_benchmark_color()) { - output_opts |= ConsoleReporter::OO_Color; - } else { - output_opts &= ~ConsoleReporter::OO_Color; - } - if (FLAGS_benchmark_counters_tabular) { - output_opts |= ConsoleReporter::OO_Tabular; - } else { - output_opts &= ~ConsoleReporter::OO_Tabular; - } - return static_cast(output_opts); -} - -} // end namespace internal - -size_t RunSpecifiedBenchmarks() { - return RunSpecifiedBenchmarks(nullptr, nullptr); -} - -size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter) { - return RunSpecifiedBenchmarks(display_reporter, nullptr); -} - -size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, - BenchmarkReporter* file_reporter) { - std::string spec = FLAGS_benchmark_filter; - if (spec.empty() || spec == "all") - spec = "."; // Regexp that matches all benchmarks - - // Setup the reporters - std::ofstream output_file; - std::unique_ptr default_display_reporter; - std::unique_ptr default_file_reporter; - if (!display_reporter) { - default_display_reporter = internal::CreateReporter( - FLAGS_benchmark_format, internal::GetOutputOptions()); - display_reporter = default_display_reporter.get(); - } - auto& Out = display_reporter->GetOutputStream(); - auto& Err = display_reporter->GetErrorStream(); - - std::string const& fname = FLAGS_benchmark_out; - if (fname.empty() && file_reporter) { - Err << "A custom file reporter was provided but " - "--benchmark_out= was not specified." - << std::endl; - std::exit(1); - } - if (!fname.empty()) { - output_file.open(fname); - if (!output_file.is_open()) { - Err << "invalid file name: '" << fname << "'" << std::endl; - std::exit(1); - } - if (!file_reporter) { - default_file_reporter = internal::CreateReporter( - FLAGS_benchmark_out_format, ConsoleReporter::OO_None); - file_reporter = default_file_reporter.get(); - } - file_reporter->SetOutputStream(&output_file); - file_reporter->SetErrorStream(&output_file); - } - - std::vector benchmarks; - if (!FindBenchmarksInternal(spec, &benchmarks, &Err)) return 0; - - if (benchmarks.empty()) { - Err << "Failed to match any benchmarks against regex: " << spec << "\n"; - return 0; - } - - if (FLAGS_benchmark_list_tests) { - for (auto const& benchmark : benchmarks) - Out << benchmark.name().str() << "\n"; - } else { - internal::RunBenchmarks(benchmarks, display_reporter, file_reporter); - } - - return benchmarks.size(); -} - -void RegisterMemoryManager(MemoryManager* manager) { - internal::memory_manager = manager; -} - -void AddCustomContext(const std::string& key, const std::string& value) { - if (internal::global_context == nullptr) { - internal::global_context = new std::map(); - } - if (!internal::global_context->emplace(key, value).second) { - std::cerr << "Failed to add custom context \"" << key << "\" as it already " - << "exists with value \"" << value << "\"\n"; - } -} - -namespace internal { - -void PrintUsageAndExit() { - fprintf(stdout, - "benchmark" - " [--benchmark_list_tests={true|false}]\n" - " [--benchmark_filter=]\n" - " [--benchmark_min_time=]\n" - " [--benchmark_repetitions=]\n" - " [--benchmark_enable_random_interleaving={true|false}]\n" - " [--benchmark_report_aggregates_only={true|false}]\n" - " [--benchmark_display_aggregates_only={true|false}]\n" - " [--benchmark_format=]\n" - " [--benchmark_out=]\n" - " [--benchmark_out_format=]\n" - " [--benchmark_color={auto|true|false}]\n" - " [--benchmark_counters_tabular={true|false}]\n" - " [--benchmark_context==,...]\n" - " [--v=]\n"); - exit(0); -} - -void ParseCommandLineFlags(int* argc, char** argv) { - using namespace benchmark; - BenchmarkReporter::Context::executable_name = - (argc && *argc > 0) ? argv[0] : "unknown"; - for (int i = 1; argc && i < *argc; ++i) { - if (ParseBoolFlag(argv[i], "benchmark_list_tests", - &FLAGS_benchmark_list_tests) || - ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || - ParseDoubleFlag(argv[i], "benchmark_min_time", - &FLAGS_benchmark_min_time) || - ParseInt32Flag(argv[i], "benchmark_repetitions", - &FLAGS_benchmark_repetitions) || - ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving", - &FLAGS_benchmark_enable_random_interleaving) || - ParseBoolFlag(argv[i], "benchmark_report_aggregates_only", - &FLAGS_benchmark_report_aggregates_only) || - ParseBoolFlag(argv[i], "benchmark_display_aggregates_only", - &FLAGS_benchmark_display_aggregates_only) || - ParseStringFlag(argv[i], "benchmark_format", &FLAGS_benchmark_format) || - ParseStringFlag(argv[i], "benchmark_out", &FLAGS_benchmark_out) || - ParseStringFlag(argv[i], "benchmark_out_format", - &FLAGS_benchmark_out_format) || - ParseStringFlag(argv[i], "benchmark_color", &FLAGS_benchmark_color) || - // "color_print" is the deprecated name for "benchmark_color". - // TODO: Remove this. - ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) || - ParseBoolFlag(argv[i], "benchmark_counters_tabular", - &FLAGS_benchmark_counters_tabular) || - ParseStringFlag(argv[i], "benchmark_perf_counters", - &FLAGS_benchmark_perf_counters) || - ParseKeyValueFlag(argv[i], "benchmark_context", - &FLAGS_benchmark_context) || - ParseInt32Flag(argv[i], "v", &FLAGS_v)) { - for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1]; - - --(*argc); - --i; - } else if (IsFlag(argv[i], "help")) { - PrintUsageAndExit(); - } - } - for (auto const* flag : - {&FLAGS_benchmark_format, &FLAGS_benchmark_out_format}) { - if (*flag != "console" && *flag != "json" && *flag != "csv") { - PrintUsageAndExit(); - } - } - if (FLAGS_benchmark_color.empty()) { - PrintUsageAndExit(); - } - for (const auto& kv : FLAGS_benchmark_context) { - AddCustomContext(kv.first, kv.second); - } -} - -int InitializeStreams() { - static std::ios_base::Init init; - return 0; -} - -} // end namespace internal - -void Initialize(int* argc, char** argv) { - internal::ParseCommandLineFlags(argc, argv); - internal::LogLevel() = FLAGS_v; -} - -void Shutdown() { - delete internal::global_context; -} - -bool ReportUnrecognizedArguments(int argc, char** argv) { - for (int i = 1; i < argc; ++i) { - fprintf(stderr, "%s: error: unrecognized command-line flag: %s\n", argv[0], - argv[i]); - } - return argc > 1; -} - -} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/benchmark_api_internal.cc b/libcxx/utils/google-benchmark/src/benchmark_api_internal.cc deleted file mode 100644 index 89da519afc8c..000000000000 --- a/libcxx/utils/google-benchmark/src/benchmark_api_internal.cc +++ /dev/null @@ -1,94 +0,0 @@ -#include "benchmark_api_internal.h" - -#include - -#include "string_util.h" - -namespace benchmark { -namespace internal { - -BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx, - int per_family_instance_idx, - const std::vector& args, - int thread_count) - : benchmark_(*benchmark), - family_index_(family_idx), - per_family_instance_index_(per_family_instance_idx), - aggregation_report_mode_(benchmark_.aggregation_report_mode_), - args_(args), - time_unit_(benchmark_.time_unit_), - measure_process_cpu_time_(benchmark_.measure_process_cpu_time_), - use_real_time_(benchmark_.use_real_time_), - use_manual_time_(benchmark_.use_manual_time_), - complexity_(benchmark_.complexity_), - complexity_lambda_(benchmark_.complexity_lambda_), - statistics_(benchmark_.statistics_), - repetitions_(benchmark_.repetitions_), - min_time_(benchmark_.min_time_), - iterations_(benchmark_.iterations_), - threads_(thread_count) { - name_.function_name = benchmark_.name_; - - size_t arg_i = 0; - for (const auto& arg : args) { - if (!name_.args.empty()) { - name_.args += '/'; - } - - if (arg_i < benchmark->arg_names_.size()) { - const auto& arg_name = benchmark_.arg_names_[arg_i]; - if (!arg_name.empty()) { - name_.args += StrFormat("%s:", arg_name.c_str()); - } - } - - name_.args += StrFormat("%" PRId64, arg); - ++arg_i; - } - - if (!IsZero(benchmark->min_time_)) { - name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_); - } - - if (benchmark_.iterations_ != 0) { - name_.iterations = StrFormat( - "iterations:%lu", static_cast(benchmark_.iterations_)); - } - - if (benchmark_.repetitions_ != 0) { - name_.repetitions = StrFormat("repeats:%d", benchmark_.repetitions_); - } - - if (benchmark_.measure_process_cpu_time_) { - name_.time_type = "process_time"; - } - - if (benchmark_.use_manual_time_) { - if (!name_.time_type.empty()) { - name_.time_type += '/'; - } - name_.time_type += "manual_time"; - } else if (benchmark_.use_real_time_) { - if (!name_.time_type.empty()) { - name_.time_type += '/'; - } - name_.time_type += "real_time"; - } - - if (!benchmark_.thread_counts_.empty()) { - name_.threads = StrFormat("threads:%d", threads_); - } -} - -State BenchmarkInstance::Run( - IterationCount iters, int thread_id, internal::ThreadTimer* timer, - internal::ThreadManager* manager, - internal::PerfCountersMeasurement* perf_counters_measurement) const { - State st(iters, args_, thread_id, threads_, timer, manager, - perf_counters_measurement); - benchmark_.Run(st); - return st; -} - -} // namespace internal -} // namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/benchmark_api_internal.h b/libcxx/utils/google-benchmark/src/benchmark_api_internal.h deleted file mode 100644 index 9296b7d2c816..000000000000 --- a/libcxx/utils/google-benchmark/src/benchmark_api_internal.h +++ /dev/null @@ -1,78 +0,0 @@ -#ifndef BENCHMARK_API_INTERNAL_H -#define BENCHMARK_API_INTERNAL_H - -#include -#include -#include -#include -#include -#include - -#include "benchmark/benchmark.h" -#include "commandlineflags.h" - -namespace benchmark { -namespace internal { - -// Information kept per benchmark we may want to run -class BenchmarkInstance { - public: - BenchmarkInstance(Benchmark* benchmark, int family_index, - int per_family_instance_index, - const std::vector& args, int threads); - - const BenchmarkName& name() const { return name_; } - int family_index() const { return family_index_; } - int per_family_instance_index() const { return per_family_instance_index_; } - AggregationReportMode aggregation_report_mode() const { - return aggregation_report_mode_; - } - TimeUnit time_unit() const { return time_unit_; } - bool measure_process_cpu_time() const { return measure_process_cpu_time_; } - bool use_real_time() const { return use_real_time_; } - bool use_manual_time() const { return use_manual_time_; } - BigO complexity() const { return complexity_; } - BigOFunc& complexity_lambda() const { return *complexity_lambda_; } - const std::vector& statistics() const { return statistics_; } - int repetitions() const { return repetitions_; } - double min_time() const { return min_time_; } - IterationCount iterations() const { return iterations_; } - int threads() const { return threads_; } - - State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer, - internal::ThreadManager* manager, - internal::PerfCountersMeasurement* perf_counters_measurement) const; - - private: - BenchmarkName name_; - Benchmark& benchmark_; - const int family_index_; - const int per_family_instance_index_; - AggregationReportMode aggregation_report_mode_; - const std::vector& args_; - TimeUnit time_unit_; - bool measure_process_cpu_time_; - bool use_real_time_; - bool use_manual_time_; - BigO complexity_; - BigOFunc* complexity_lambda_; - UserCounters counters_; - const std::vector& statistics_; - int repetitions_; - double min_time_; - IterationCount iterations_; - int threads_; // Number of concurrent threads to us -}; - -bool FindBenchmarksInternal(const std::string& re, - std::vector* benchmarks, - std::ostream* Err); - -bool IsZero(double n); - -ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false); - -} // end namespace internal -} // end namespace benchmark - -#endif // BENCHMARK_API_INTERNAL_H diff --git a/libcxx/utils/google-benchmark/src/benchmark_main.cc b/libcxx/utils/google-benchmark/src/benchmark_main.cc deleted file mode 100644 index b3b247831496..000000000000 --- a/libcxx/utils/google-benchmark/src/benchmark_main.cc +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2018 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "benchmark/benchmark.h" - -BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/src/benchmark_name.cc b/libcxx/utils/google-benchmark/src/benchmark_name.cc deleted file mode 100644 index 2a17ebce277f..000000000000 --- a/libcxx/utils/google-benchmark/src/benchmark_name.cc +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -namespace benchmark { - -namespace { - -// Compute the total size of a pack of std::strings -size_t size_impl() { return 0; } - -template -size_t size_impl(const Head& head, const Tail&... tail) { - return head.size() + size_impl(tail...); -} - -// Join a pack of std::strings using a delimiter -// TODO: use absl::StrJoin -void join_impl(std::string&, char) {} - -template -void join_impl(std::string& s, const char delimiter, const Head& head, - const Tail&... tail) { - if (!s.empty() && !head.empty()) { - s += delimiter; - } - - s += head; - - join_impl(s, delimiter, tail...); -} - -template -std::string join(char delimiter, const Ts&... ts) { - std::string s; - s.reserve(sizeof...(Ts) + size_impl(ts...)); - join_impl(s, delimiter, ts...); - return s; -} -} // namespace - -std::string BenchmarkName::str() const { - return join('/', function_name, args, min_time, iterations, repetitions, - time_type, threads); -} -} // namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/benchmark_register.cc b/libcxx/utils/google-benchmark/src/benchmark_register.cc deleted file mode 100644 index 574462220e7c..000000000000 --- a/libcxx/utils/google-benchmark/src/benchmark_register.cc +++ /dev/null @@ -1,461 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "benchmark_register.h" - -#ifndef BENCHMARK_OS_WINDOWS -#ifndef BENCHMARK_OS_FUCHSIA -#include -#endif -#include -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "benchmark/benchmark.h" -#include "benchmark_api_internal.h" -#include "check.h" -#include "commandlineflags.h" -#include "complexity.h" -#include "internal_macros.h" -#include "log.h" -#include "mutex.h" -#include "re.h" -#include "statistics.h" -#include "string_util.h" -#include "timers.h" - -namespace benchmark { - -namespace { -// For non-dense Range, intermediate values are powers of kRangeMultiplier. -static const int kRangeMultiplier = 8; -// The size of a benchmark family determines is the number of inputs to repeat -// the benchmark on. If this is "large" then warn the user during configuration. -static const size_t kMaxFamilySize = 100; -} // end namespace - -namespace internal { - -//=============================================================================// -// BenchmarkFamilies -//=============================================================================// - -// Class for managing registered benchmarks. Note that each registered -// benchmark identifies a family of related benchmarks to run. -class BenchmarkFamilies { - public: - static BenchmarkFamilies* GetInstance(); - - // Registers a benchmark family and returns the index assigned to it. - size_t AddBenchmark(std::unique_ptr family); - - // Clear all registered benchmark families. - void ClearBenchmarks(); - - // Extract the list of benchmark instances that match the specified - // regular expression. - bool FindBenchmarks(std::string re, - std::vector* benchmarks, - std::ostream* Err); - - private: - BenchmarkFamilies() {} - - std::vector> families_; - Mutex mutex_; -}; - -BenchmarkFamilies* BenchmarkFamilies::GetInstance() { - static BenchmarkFamilies instance; - return &instance; -} - -size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr family) { - MutexLock l(mutex_); - size_t index = families_.size(); - families_.push_back(std::move(family)); - return index; -} - -void BenchmarkFamilies::ClearBenchmarks() { - MutexLock l(mutex_); - families_.clear(); - families_.shrink_to_fit(); -} - -bool BenchmarkFamilies::FindBenchmarks( - std::string spec, std::vector* benchmarks, - std::ostream* ErrStream) { - CHECK(ErrStream); - auto& Err = *ErrStream; - // Make regular expression out of command-line flag - std::string error_msg; - Regex re; - bool isNegativeFilter = false; - if (spec[0] == '-') { - spec.replace(0, 1, ""); - isNegativeFilter = true; - } - if (!re.Init(spec, &error_msg)) { - Err << "Could not compile benchmark re: " << error_msg << std::endl; - return false; - } - - // Special list of thread counts to use when none are specified - const std::vector one_thread = {1}; - - int next_family_index = 0; - - MutexLock l(mutex_); - for (std::unique_ptr& family : families_) { - int family_index = next_family_index; - int per_family_instance_index = 0; - - // Family was deleted or benchmark doesn't match - if (!family) continue; - - if (family->ArgsCnt() == -1) { - family->Args({}); - } - const std::vector* thread_counts = - (family->thread_counts_.empty() - ? &one_thread - : &static_cast&>(family->thread_counts_)); - const size_t family_size = family->args_.size() * thread_counts->size(); - // The benchmark will be run at least 'family_size' different inputs. - // If 'family_size' is very large warn the user. - if (family_size > kMaxFamilySize) { - Err << "The number of inputs is very large. " << family->name_ - << " will be repeated at least " << family_size << " times.\n"; - } - // reserve in the special case the regex ".", since we know the final - // family size. - if (spec == ".") benchmarks->reserve(benchmarks->size() + family_size); - - for (auto const& args : family->args_) { - for (int num_threads : *thread_counts) { - BenchmarkInstance instance(family.get(), family_index, - per_family_instance_index, args, - num_threads); - - const auto full_name = instance.name().str(); - if ((re.Match(full_name) && !isNegativeFilter) || - (!re.Match(full_name) && isNegativeFilter)) { - benchmarks->push_back(std::move(instance)); - - ++per_family_instance_index; - - // Only bump the next family index once we've estabilished that - // at least one instance of this family will be run. - if (next_family_index == family_index) ++next_family_index; - } - } - } - } - return true; -} - -Benchmark* RegisterBenchmarkInternal(Benchmark* bench) { - std::unique_ptr bench_ptr(bench); - BenchmarkFamilies* families = BenchmarkFamilies::GetInstance(); - families->AddBenchmark(std::move(bench_ptr)); - return bench; -} - -// FIXME: This function is a hack so that benchmark.cc can access -// `BenchmarkFamilies` -bool FindBenchmarksInternal(const std::string& re, - std::vector* benchmarks, - std::ostream* Err) { - return BenchmarkFamilies::GetInstance()->FindBenchmarks(re, benchmarks, Err); -} - -//=============================================================================// -// Benchmark -//=============================================================================// - -Benchmark::Benchmark(const char* name) - : name_(name), - aggregation_report_mode_(ARM_Unspecified), - time_unit_(kNanosecond), - range_multiplier_(kRangeMultiplier), - min_time_(0), - iterations_(0), - repetitions_(0), - measure_process_cpu_time_(false), - use_real_time_(false), - use_manual_time_(false), - complexity_(oNone), - complexity_lambda_(nullptr) { - ComputeStatistics("mean", StatisticsMean); - ComputeStatistics("median", StatisticsMedian); - ComputeStatistics("stddev", StatisticsStdDev); -} - -Benchmark::~Benchmark() {} - -Benchmark* Benchmark::Name(const std::string& name) { - SetName(name.c_str()); - return this; -} - -Benchmark* Benchmark::Arg(int64_t x) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); - args_.push_back({x}); - return this; -} - -Benchmark* Benchmark::Unit(TimeUnit unit) { - time_unit_ = unit; - return this; -} - -Benchmark* Benchmark::Range(int64_t start, int64_t limit) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); - std::vector arglist; - AddRange(&arglist, start, limit, range_multiplier_); - - for (int64_t i : arglist) { - args_.push_back({i}); - } - return this; -} - -Benchmark* Benchmark::Ranges( - const std::vector>& ranges) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(ranges.size())); - std::vector> arglists(ranges.size()); - for (std::size_t i = 0; i < ranges.size(); i++) { - AddRange(&arglists[i], ranges[i].first, ranges[i].second, - range_multiplier_); - } - - ArgsProduct(arglists); - - return this; -} - -Benchmark* Benchmark::ArgsProduct( - const std::vector>& arglists) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(arglists.size())); - - std::vector indices(arglists.size()); - const std::size_t total = std::accumulate( - std::begin(arglists), std::end(arglists), std::size_t{1}, - [](const std::size_t res, const std::vector& arglist) { - return res * arglist.size(); - }); - std::vector args; - args.reserve(arglists.size()); - for (std::size_t i = 0; i < total; i++) { - for (std::size_t arg = 0; arg < arglists.size(); arg++) { - args.push_back(arglists[arg][indices[arg]]); - } - args_.push_back(args); - args.clear(); - - std::size_t arg = 0; - do { - indices[arg] = (indices[arg] + 1) % arglists[arg].size(); - } while (indices[arg++] == 0 && arg < arglists.size()); - } - - return this; -} - -Benchmark* Benchmark::ArgName(const std::string& name) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); - arg_names_ = {name}; - return this; -} - -Benchmark* Benchmark::ArgNames(const std::vector& names) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(names.size())); - arg_names_ = names; - return this; -} - -Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); - CHECK_LE(start, limit); - for (int64_t arg = start; arg <= limit; arg += step) { - args_.push_back({arg}); - } - return this; -} - -Benchmark* Benchmark::Args(const std::vector& args) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(args.size())); - args_.push_back(args); - return this; -} - -Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) { - custom_arguments(this); - return this; -} - -Benchmark* Benchmark::RangeMultiplier(int multiplier) { - CHECK(multiplier > 1); - range_multiplier_ = multiplier; - return this; -} - -Benchmark* Benchmark::MinTime(double t) { - CHECK(t > 0.0); - CHECK(iterations_ == 0); - min_time_ = t; - return this; -} - -Benchmark* Benchmark::Iterations(IterationCount n) { - CHECK(n > 0); - CHECK(IsZero(min_time_)); - iterations_ = n; - return this; -} - -Benchmark* Benchmark::Repetitions(int n) { - CHECK(n > 0); - repetitions_ = n; - return this; -} - -Benchmark* Benchmark::ReportAggregatesOnly(bool value) { - aggregation_report_mode_ = value ? ARM_ReportAggregatesOnly : ARM_Default; - return this; -} - -Benchmark* Benchmark::DisplayAggregatesOnly(bool value) { - // If we were called, the report mode is no longer 'unspecified', in any case. - aggregation_report_mode_ = static_cast( - aggregation_report_mode_ | ARM_Default); - - if (value) { - aggregation_report_mode_ = static_cast( - aggregation_report_mode_ | ARM_DisplayReportAggregatesOnly); - } else { - aggregation_report_mode_ = static_cast( - aggregation_report_mode_ & ~ARM_DisplayReportAggregatesOnly); - } - - return this; -} - -Benchmark* Benchmark::MeasureProcessCPUTime() { - // Can be used together with UseRealTime() / UseManualTime(). - measure_process_cpu_time_ = true; - return this; -} - -Benchmark* Benchmark::UseRealTime() { - CHECK(!use_manual_time_) - << "Cannot set UseRealTime and UseManualTime simultaneously."; - use_real_time_ = true; - return this; -} - -Benchmark* Benchmark::UseManualTime() { - CHECK(!use_real_time_) - << "Cannot set UseRealTime and UseManualTime simultaneously."; - use_manual_time_ = true; - return this; -} - -Benchmark* Benchmark::Complexity(BigO complexity) { - complexity_ = complexity; - return this; -} - -Benchmark* Benchmark::Complexity(BigOFunc* complexity) { - complexity_lambda_ = complexity; - complexity_ = oLambda; - return this; -} - -Benchmark* Benchmark::ComputeStatistics(std::string name, - StatisticsFunc* statistics) { - statistics_.emplace_back(name, statistics); - return this; -} - -Benchmark* Benchmark::Threads(int t) { - CHECK_GT(t, 0); - thread_counts_.push_back(t); - return this; -} - -Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) { - CHECK_GT(min_threads, 0); - CHECK_GE(max_threads, min_threads); - - AddRange(&thread_counts_, min_threads, max_threads, 2); - return this; -} - -Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads, - int stride) { - CHECK_GT(min_threads, 0); - CHECK_GE(max_threads, min_threads); - CHECK_GE(stride, 1); - - for (auto i = min_threads; i < max_threads; i += stride) { - thread_counts_.push_back(i); - } - thread_counts_.push_back(max_threads); - return this; -} - -Benchmark* Benchmark::ThreadPerCpu() { - thread_counts_.push_back(CPUInfo::Get().num_cpus); - return this; -} - -void Benchmark::SetName(const char* name) { name_ = name; } - -int Benchmark::ArgsCnt() const { - if (args_.empty()) { - if (arg_names_.empty()) return -1; - return static_cast(arg_names_.size()); - } - return static_cast(args_.front().size()); -} - -//=============================================================================// -// FunctionBenchmark -//=============================================================================// - -void FunctionBenchmark::Run(State& st) { func_(st); } - -} // end namespace internal - -void ClearRegisteredBenchmarks() { - internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks(); -} - -} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/benchmark_register.h b/libcxx/utils/google-benchmark/src/benchmark_register.h deleted file mode 100644 index 09496607f224..000000000000 --- a/libcxx/utils/google-benchmark/src/benchmark_register.h +++ /dev/null @@ -1,108 +0,0 @@ -#ifndef BENCHMARK_REGISTER_H -#define BENCHMARK_REGISTER_H - -#include -#include - -#include "check.h" - -namespace benchmark { -namespace internal { - -// Append the powers of 'mult' in the closed interval [lo, hi]. -// Returns iterator to the start of the inserted range. -template -typename std::vector::iterator -AddPowers(std::vector* dst, T lo, T hi, int mult) { - CHECK_GE(lo, 0); - CHECK_GE(hi, lo); - CHECK_GE(mult, 2); - - const size_t start_offset = dst->size(); - - static const T kmax = std::numeric_limits::max(); - - // Space out the values in multiples of "mult" - for (T i = static_cast(1); i <= hi; i *= mult) { - if (i >= lo) { - dst->push_back(i); - } - // Break the loop here since multiplying by - // 'mult' would move outside of the range of T - if (i > kmax / mult) break; - } - - return dst->begin() + start_offset; -} - -template -void AddNegatedPowers(std::vector* dst, T lo, T hi, int mult) { - // We negate lo and hi so we require that they cannot be equal to 'min'. - CHECK_GT(lo, std::numeric_limits::min()); - CHECK_GT(hi, std::numeric_limits::min()); - CHECK_GE(hi, lo); - CHECK_LE(hi, 0); - - // Add positive powers, then negate and reverse. - // Casts necessary since small integers get promoted - // to 'int' when negating. - const auto lo_complement = static_cast(-lo); - const auto hi_complement = static_cast(-hi); - - const auto it = AddPowers(dst, hi_complement, lo_complement, mult); - - std::for_each(it, dst->end(), [](T& t) { t *= -1; }); - std::reverse(it, dst->end()); -} - -template -void AddRange(std::vector* dst, T lo, T hi, int mult) { - static_assert(std::is_integral::value && std::is_signed::value, - "Args type must be a signed integer"); - - CHECK_GE(hi, lo); - CHECK_GE(mult, 2); - - // Add "lo" - dst->push_back(lo); - - // Handle lo == hi as a special case, so we then know - // lo < hi and so it is safe to add 1 to lo and subtract 1 - // from hi without falling outside of the range of T. - if (lo == hi) return; - - // Ensure that lo_inner <= hi_inner below. - if (lo + 1 == hi) { - dst->push_back(hi); - return; - } - - // Add all powers of 'mult' in the range [lo+1, hi-1] (inclusive). - const auto lo_inner = static_cast(lo + 1); - const auto hi_inner = static_cast(hi - 1); - - // Insert negative values - if (lo_inner < 0) { - AddNegatedPowers(dst, lo_inner, std::min(hi_inner, T{-1}), mult); - } - - // Treat 0 as a special case (see discussion on #762). - if (lo < 0 && hi >= 0) { - dst->push_back(0); - } - - // Insert positive values - if (hi_inner > 0) { - AddPowers(dst, std::max(lo_inner, T{1}), hi_inner, mult); - } - - // Add "hi" (if different from last value). - if (hi != dst->back()) { - dst->push_back(hi); - } -} - -} // namespace internal -} // namespace benchmark - -#endif // BENCHMARK_REGISTER_H diff --git a/libcxx/utils/google-benchmark/src/benchmark_runner.cc b/libcxx/utils/google-benchmark/src/benchmark_runner.cc deleted file mode 100644 index 6742d42dbecd..000000000000 --- a/libcxx/utils/google-benchmark/src/benchmark_runner.cc +++ /dev/null @@ -1,349 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "benchmark_runner.h" - -#include "benchmark/benchmark.h" -#include "benchmark_api_internal.h" -#include "internal_macros.h" - -#ifndef BENCHMARK_OS_WINDOWS -#ifndef BENCHMARK_OS_FUCHSIA -#include -#endif -#include -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "check.h" -#include "colorprint.h" -#include "commandlineflags.h" -#include "complexity.h" -#include "counter.h" -#include "internal_macros.h" -#include "log.h" -#include "mutex.h" -#include "perf_counters.h" -#include "re.h" -#include "statistics.h" -#include "string_util.h" -#include "thread_manager.h" -#include "thread_timer.h" - -namespace benchmark { - -namespace internal { - -MemoryManager* memory_manager = nullptr; - -namespace { - -static constexpr IterationCount kMaxIterations = 1000000000; - -BenchmarkReporter::Run CreateRunReport( - const benchmark::internal::BenchmarkInstance& b, - const internal::ThreadManager::Result& results, - IterationCount memory_iterations, - const MemoryManager::Result& memory_result, double seconds, - int64_t repetition_index, int64_t repeats) { - // Create report about this benchmark run. - BenchmarkReporter::Run report; - - report.run_name = b.name(); - report.family_index = b.family_index(); - report.per_family_instance_index = b.per_family_instance_index(); - report.error_occurred = results.has_error_; - report.error_message = results.error_message_; - report.report_label = results.report_label_; - // This is the total iterations across all threads. - report.iterations = results.iterations; - report.time_unit = b.time_unit(); - report.threads = b.threads(); - report.repetition_index = repetition_index; - report.repetitions = repeats; - - if (!report.error_occurred) { - if (b.use_manual_time()) { - report.real_accumulated_time = results.manual_time_used; - } else { - report.real_accumulated_time = results.real_time_used; - } - report.cpu_accumulated_time = results.cpu_time_used; - report.complexity_n = results.complexity_n; - report.complexity = b.complexity(); - report.complexity_lambda = b.complexity_lambda(); - report.statistics = &b.statistics(); - report.counters = results.counters; - - if (memory_iterations > 0) { - report.has_memory_result = true; - report.allocs_per_iter = - memory_iterations ? static_cast(memory_result.num_allocs) / - memory_iterations - : 0; - report.max_bytes_used = memory_result.max_bytes_used; - } - - internal::Finish(&report.counters, results.iterations, seconds, - b.threads()); - } - return report; -} - -// Execute one thread of benchmark b for the specified number of iterations. -// Adds the stats collected for the thread into manager->results. -void RunInThread(const BenchmarkInstance* b, IterationCount iters, - int thread_id, ThreadManager* manager, - PerfCountersMeasurement* perf_counters_measurement) { - internal::ThreadTimer timer( - b->measure_process_cpu_time() - ? internal::ThreadTimer::CreateProcessCpuTime() - : internal::ThreadTimer::Create()); - State st = - b->Run(iters, thread_id, &timer, manager, perf_counters_measurement); - CHECK(st.error_occurred() || st.iterations() >= st.max_iterations) - << "Benchmark returned before State::KeepRunning() returned false!"; - { - MutexLock l(manager->GetBenchmarkMutex()); - internal::ThreadManager::Result& results = manager->results; - results.iterations += st.iterations(); - results.cpu_time_used += timer.cpu_time_used(); - results.real_time_used += timer.real_time_used(); - results.manual_time_used += timer.manual_time_used(); - results.complexity_n += st.complexity_length_n(); - internal::Increment(&results.counters, st.counters); - } - manager->NotifyThreadComplete(); -} - -} // end namespace - -BenchmarkRunner::BenchmarkRunner( - const benchmark::internal::BenchmarkInstance& b_, - BenchmarkReporter::PerFamilyRunReports* reports_for_family_) - : b(b_), - reports_for_family(reports_for_family_), - min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time), - repeats(b.repetitions() != 0 ? b.repetitions() - : FLAGS_benchmark_repetitions), - has_explicit_iteration_count(b.iterations() != 0), - pool(b.threads() - 1), - iters(has_explicit_iteration_count ? b.iterations() : 1), - perf_counters_measurement( - PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))), - perf_counters_measurement_ptr(perf_counters_measurement.IsValid() - ? &perf_counters_measurement - : nullptr) { - run_results.display_report_aggregates_only = - (FLAGS_benchmark_report_aggregates_only || - FLAGS_benchmark_display_aggregates_only); - run_results.file_report_aggregates_only = - FLAGS_benchmark_report_aggregates_only; - if (b.aggregation_report_mode() != internal::ARM_Unspecified) { - run_results.display_report_aggregates_only = - (b.aggregation_report_mode() & - internal::ARM_DisplayReportAggregatesOnly); - run_results.file_report_aggregates_only = - (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly); - CHECK(FLAGS_benchmark_perf_counters.empty() || - perf_counters_measurement.IsValid()) - << "Perf counters were requested but could not be set up."; - } -} - -BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() { - VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n"; - - std::unique_ptr manager; - manager.reset(new internal::ThreadManager(b.threads())); - - // Run all but one thread in separate threads - for (std::size_t ti = 0; ti < pool.size(); ++ti) { - pool[ti] = std::thread(&RunInThread, &b, iters, static_cast(ti + 1), - manager.get(), perf_counters_measurement_ptr); - } - // And run one thread here directly. - // (If we were asked to run just one thread, we don't create new threads.) - // Yes, we need to do this here *after* we start the separate threads. - RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr); - - // The main thread has finished. Now let's wait for the other threads. - manager->WaitForAllThreads(); - for (std::thread& thread : pool) thread.join(); - - IterationResults i; - // Acquire the measurements/counters from the manager, UNDER THE LOCK! - { - MutexLock l(manager->GetBenchmarkMutex()); - i.results = manager->results; - } - - // And get rid of the manager. - manager.reset(); - - // Adjust real/manual time stats since they were reported per thread. - i.results.real_time_used /= b.threads(); - i.results.manual_time_used /= b.threads(); - // If we were measuring whole-process CPU usage, adjust the CPU time too. - if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads(); - - VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" - << i.results.real_time_used << "\n"; - - // By using KeepRunningBatch a benchmark can iterate more times than - // requested, so take the iteration count from i.results. - i.iters = i.results.iterations / b.threads(); - - // Base decisions off of real time if requested by this benchmark. - i.seconds = i.results.cpu_time_used; - if (b.use_manual_time()) { - i.seconds = i.results.manual_time_used; - } else if (b.use_real_time()) { - i.seconds = i.results.real_time_used; - } - - return i; -} - -IterationCount BenchmarkRunner::PredictNumItersNeeded( - const IterationResults& i) const { - // See how much iterations should be increased by. - // Note: Avoid division by zero with max(seconds, 1ns). - double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); - // If our last run was at least 10% of FLAGS_benchmark_min_time then we - // use the multiplier directly. - // Otherwise we use at most 10 times expansion. - // NOTE: When the last run was at least 10% of the min time the max - // expansion should be 14x. - bool is_significant = (i.seconds / min_time) > 0.1; - multiplier = is_significant ? multiplier : std::min(10.0, multiplier); - if (multiplier <= 1.0) multiplier = 2.0; - - // So what seems to be the sufficiently-large iteration count? Round up. - const IterationCount max_next_iters = static_cast( - std::lround(std::max(multiplier * static_cast(i.iters), - static_cast(i.iters) + 1.0))); - // But we do have *some* sanity limits though.. - const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); - - VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; - return next_iters; // round up before conversion to integer. -} - -bool BenchmarkRunner::ShouldReportIterationResults( - const IterationResults& i) const { - // Determine if this run should be reported; - // Either it has run for a sufficient amount of time - // or because an error was reported. - return i.results.has_error_ || - i.iters >= kMaxIterations || // Too many iterations already. - i.seconds >= min_time || // The elapsed time is large enough. - // CPU time is specified but the elapsed real time greatly exceeds - // the minimum time. - // Note that user provided timers are except from this sanity check. - ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time()); -} - -void BenchmarkRunner::DoOneRepetition() { - assert(HasRepeatsRemaining() && "Already done all repetitions?"); - - const bool is_the_first_repetition = num_repetitions_done == 0; - IterationResults i; - - // We *may* be gradually increasing the length (iteration count) - // of the benchmark until we decide the results are significant. - // And once we do, we report those last results and exit. - // Please do note that the if there are repetitions, the iteration count - // is *only* calculated for the *first* repetition, and other repetitions - // simply use that precomputed iteration count. - for (;;) { - i = DoNIterations(); - - // Do we consider the results to be significant? - // If we are doing repetitions, and the first repetition was already done, - // it has calculated the correct iteration time, so we have run that very - // iteration count just now. No need to calculate anything. Just report. - // Else, the normal rules apply. - const bool results_are_significant = !is_the_first_repetition || - has_explicit_iteration_count || - ShouldReportIterationResults(i); - - if (results_are_significant) break; // Good, let's report them! - - // Nope, bad iteration. Let's re-estimate the hopefully-sufficient - // iteration count, and run the benchmark again... - - iters = PredictNumItersNeeded(i); - assert(iters > i.iters && - "if we did more iterations than we want to do the next time, " - "then we should have accepted the current iteration run."); - } - - // Oh, one last thing, we need to also produce the 'memory measurements'.. - MemoryManager::Result memory_result; - IterationCount memory_iterations = 0; - if (memory_manager != nullptr) { - // Only run a few iterations to reduce the impact of one-time - // allocations in benchmarks that are not properly managed. - memory_iterations = std::min(16, iters); - memory_manager->Start(); - std::unique_ptr manager; - manager.reset(new internal::ThreadManager(1)); - RunInThread(&b, memory_iterations, 0, manager.get(), - perf_counters_measurement_ptr); - manager->WaitForAllThreads(); - manager.reset(); - - memory_manager->Stop(&memory_result); - } - - // Ok, now actualy report. - BenchmarkReporter::Run report = - CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds, - num_repetitions_done, repeats); - - if (reports_for_family) { - ++reports_for_family->num_runs_done; - if (!report.error_occurred) reports_for_family->Runs.push_back(report); - } - - run_results.non_aggregates.push_back(report); - - ++num_repetitions_done; -} - -RunResults&& BenchmarkRunner::GetResults() { - assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?"); - - // Calculate additional statistics over the repetitions of this instance. - run_results.aggregates_only = ComputeStats(run_results.non_aggregates); - - return std::move(run_results); -} - -} // end namespace internal - -} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/benchmark_runner.h b/libcxx/utils/google-benchmark/src/benchmark_runner.h deleted file mode 100644 index 8a855236b227..000000000000 --- a/libcxx/utils/google-benchmark/src/benchmark_runner.h +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef BENCHMARK_RUNNER_H_ -#define BENCHMARK_RUNNER_H_ - -#include -#include - -#include "benchmark_api_internal.h" -#include "internal_macros.h" -#include "perf_counters.h" -#include "thread_manager.h" - -DECLARE_double(benchmark_min_time); - -DECLARE_int32(benchmark_repetitions); - -DECLARE_bool(benchmark_report_aggregates_only); - -DECLARE_bool(benchmark_display_aggregates_only); - -DECLARE_string(benchmark_perf_counters); - -namespace benchmark { - -namespace internal { - -extern MemoryManager* memory_manager; - -struct RunResults { - std::vector non_aggregates; - std::vector aggregates_only; - - bool display_report_aggregates_only = false; - bool file_report_aggregates_only = false; -}; - -class BenchmarkRunner { - public: - BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, - BenchmarkReporter::PerFamilyRunReports* reports_for_family); - - int GetNumRepeats() const { return repeats; } - - bool HasRepeatsRemaining() const { - return GetNumRepeats() != num_repetitions_done; - } - - void DoOneRepetition(); - - RunResults&& GetResults(); - - BenchmarkReporter::PerFamilyRunReports* GetReportsForFamily() const { - return reports_for_family; - }; - - private: - RunResults run_results; - - const benchmark::internal::BenchmarkInstance& b; - BenchmarkReporter::PerFamilyRunReports* reports_for_family; - - const double min_time; - const int repeats; - const bool has_explicit_iteration_count; - - int num_repetitions_done = 0; - - std::vector pool; - - IterationCount iters; // preserved between repetitions! - // So only the first repetition has to find/calculate it, - // the other repetitions will just use that precomputed iteration count. - - PerfCountersMeasurement perf_counters_measurement; - PerfCountersMeasurement* const perf_counters_measurement_ptr; - - struct IterationResults { - internal::ThreadManager::Result results; - IterationCount iters; - double seconds; - }; - IterationResults DoNIterations(); - - IterationCount PredictNumItersNeeded(const IterationResults& i) const; - - bool ShouldReportIterationResults(const IterationResults& i) const; -}; - -} // namespace internal - -} // end namespace benchmark - -#endif // BENCHMARK_RUNNER_H_ diff --git a/libcxx/utils/google-benchmark/src/check.h b/libcxx/utils/google-benchmark/src/check.h deleted file mode 100644 index f5f8253f8040..000000000000 --- a/libcxx/utils/google-benchmark/src/check.h +++ /dev/null @@ -1,82 +0,0 @@ -#ifndef CHECK_H_ -#define CHECK_H_ - -#include -#include -#include - -#include "internal_macros.h" -#include "log.h" - -namespace benchmark { -namespace internal { - -typedef void(AbortHandlerT)(); - -inline AbortHandlerT*& GetAbortHandler() { - static AbortHandlerT* handler = &std::abort; - return handler; -} - -BENCHMARK_NORETURN inline void CallAbortHandler() { - GetAbortHandler()(); - std::abort(); // fallback to enforce noreturn -} - -// CheckHandler is the class constructed by failing CHECK macros. CheckHandler -// will log information about the failures and abort when it is destructed. -class CheckHandler { - public: - CheckHandler(const char* check, const char* file, const char* func, int line) - : log_(GetErrorLogInstance()) { - log_ << file << ":" << line << ": " << func << ": Check `" << check - << "' failed. "; - } - - LogType& GetLog() { return log_; } - - BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) { - log_ << std::endl; - CallAbortHandler(); - } - - CheckHandler& operator=(const CheckHandler&) = delete; - CheckHandler(const CheckHandler&) = delete; - CheckHandler() = delete; - - private: - LogType& log_; -}; - -} // end namespace internal -} // end namespace benchmark - -// The CHECK macro returns a std::ostream object that can have extra information -// written to it. -#ifndef NDEBUG -#define CHECK(b) \ - (b ? ::benchmark::internal::GetNullLogInstance() \ - : ::benchmark::internal::CheckHandler(#b, __FILE__, __func__, __LINE__) \ - .GetLog()) -#else -#define CHECK(b) ::benchmark::internal::GetNullLogInstance() -#endif - -// clang-format off -// preserve whitespacing between operators for alignment -#define CHECK_EQ(a, b) CHECK((a) == (b)) -#define CHECK_NE(a, b) CHECK((a) != (b)) -#define CHECK_GE(a, b) CHECK((a) >= (b)) -#define CHECK_LE(a, b) CHECK((a) <= (b)) -#define CHECK_GT(a, b) CHECK((a) > (b)) -#define CHECK_LT(a, b) CHECK((a) < (b)) - -#define CHECK_FLOAT_EQ(a, b, eps) CHECK(std::fabs((a) - (b)) < (eps)) -#define CHECK_FLOAT_NE(a, b, eps) CHECK(std::fabs((a) - (b)) >= (eps)) -#define CHECK_FLOAT_GE(a, b, eps) CHECK((a) - (b) > -(eps)) -#define CHECK_FLOAT_LE(a, b, eps) CHECK((b) - (a) > -(eps)) -#define CHECK_FLOAT_GT(a, b, eps) CHECK((a) - (b) > (eps)) -#define CHECK_FLOAT_LT(a, b, eps) CHECK((b) - (a) > (eps)) -//clang-format on - -#endif // CHECK_H_ diff --git a/libcxx/utils/google-benchmark/src/colorprint.cc b/libcxx/utils/google-benchmark/src/colorprint.cc deleted file mode 100644 index fff6a98818b8..000000000000 --- a/libcxx/utils/google-benchmark/src/colorprint.cc +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "colorprint.h" - -#include -#include -#include -#include -#include -#include - -#include "check.h" -#include "internal_macros.h" - -#ifdef BENCHMARK_OS_WINDOWS -#include -#include -#else -#include -#endif // BENCHMARK_OS_WINDOWS - -namespace benchmark { -namespace { -#ifdef BENCHMARK_OS_WINDOWS -typedef WORD PlatformColorCode; -#else -typedef const char* PlatformColorCode; -#endif - -PlatformColorCode GetPlatformColorCode(LogColor color) { -#ifdef BENCHMARK_OS_WINDOWS - switch (color) { - case COLOR_RED: - return FOREGROUND_RED; - case COLOR_GREEN: - return FOREGROUND_GREEN; - case COLOR_YELLOW: - return FOREGROUND_RED | FOREGROUND_GREEN; - case COLOR_BLUE: - return FOREGROUND_BLUE; - case COLOR_MAGENTA: - return FOREGROUND_BLUE | FOREGROUND_RED; - case COLOR_CYAN: - return FOREGROUND_BLUE | FOREGROUND_GREEN; - case COLOR_WHITE: // fall through to default - default: - return 0; - } -#else - switch (color) { - case COLOR_RED: - return "1"; - case COLOR_GREEN: - return "2"; - case COLOR_YELLOW: - return "3"; - case COLOR_BLUE: - return "4"; - case COLOR_MAGENTA: - return "5"; - case COLOR_CYAN: - return "6"; - case COLOR_WHITE: - return "7"; - default: - return nullptr; - }; -#endif -} - -} // end namespace - -std::string FormatString(const char* msg, va_list args) { - // we might need a second shot at this, so pre-emptivly make a copy - va_list args_cp; - va_copy(args_cp, args); - - std::size_t size = 256; - char local_buff[256]; - auto ret = vsnprintf(local_buff, size, msg, args_cp); - - va_end(args_cp); - - // currently there is no error handling for failure, so this is hack. - CHECK(ret >= 0); - - if (ret == 0) // handle empty expansion - return {}; - else if (static_cast(ret) < size) - return local_buff; - else { - // we did not provide a long enough buffer on our first attempt. - size = (size_t)ret + 1; // + 1 for the null byte - std::unique_ptr buff(new char[size]); - ret = vsnprintf(buff.get(), size, msg, args); - CHECK(ret > 0 && ((size_t)ret) < size); - return buff.get(); - } -} - -std::string FormatString(const char* msg, ...) { - va_list args; - va_start(args, msg); - auto tmp = FormatString(msg, args); - va_end(args); - return tmp; -} - -void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...) { - va_list args; - va_start(args, fmt); - ColorPrintf(out, color, fmt, args); - va_end(args); -} - -void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, - va_list args) { -#ifdef BENCHMARK_OS_WINDOWS - ((void)out); // suppress unused warning - - const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE); - - // Gets the current text color. - CONSOLE_SCREEN_BUFFER_INFO buffer_info; - GetConsoleScreenBufferInfo(stdout_handle, &buffer_info); - const WORD old_color_attrs = buffer_info.wAttributes; - - // We need to flush the stream buffers into the console before each - // SetConsoleTextAttribute call lest it affect the text that is already - // printed but has not yet reached the console. - fflush(stdout); - SetConsoleTextAttribute(stdout_handle, - GetPlatformColorCode(color) | FOREGROUND_INTENSITY); - vprintf(fmt, args); - - fflush(stdout); - // Restores the text color. - SetConsoleTextAttribute(stdout_handle, old_color_attrs); -#else - const char* color_code = GetPlatformColorCode(color); - if (color_code) out << FormatString("\033[0;3%sm", color_code); - out << FormatString(fmt, args) << "\033[m"; -#endif -} - -bool IsColorTerminal() { -#if BENCHMARK_OS_WINDOWS - // On Windows the TERM variable is usually not set, but the - // console there does support colors. - return 0 != _isatty(_fileno(stdout)); -#else - // On non-Windows platforms, we rely on the TERM variable. This list of - // supported TERM values is copied from Google Test: - // . - const char* const SUPPORTED_TERM_VALUES[] = { - "xterm", "xterm-color", "xterm-256color", - "screen", "screen-256color", "tmux", - "tmux-256color", "rxvt-unicode", "rxvt-unicode-256color", - "linux", "cygwin", - }; - - const char* const term = getenv("TERM"); - - bool term_supports_color = false; - for (const char* candidate : SUPPORTED_TERM_VALUES) { - if (term && 0 == strcmp(term, candidate)) { - term_supports_color = true; - break; - } - } - - return 0 != isatty(fileno(stdout)) && term_supports_color; -#endif // BENCHMARK_OS_WINDOWS -} - -} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/colorprint.h b/libcxx/utils/google-benchmark/src/colorprint.h deleted file mode 100644 index 9f6fab9b3422..000000000000 --- a/libcxx/utils/google-benchmark/src/colorprint.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef BENCHMARK_COLORPRINT_H_ -#define BENCHMARK_COLORPRINT_H_ - -#include -#include -#include - -namespace benchmark { -enum LogColor { - COLOR_DEFAULT, - COLOR_RED, - COLOR_GREEN, - COLOR_YELLOW, - COLOR_BLUE, - COLOR_MAGENTA, - COLOR_CYAN, - COLOR_WHITE -}; - -std::string FormatString(const char* msg, va_list args); -std::string FormatString(const char* msg, ...); - -void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, - va_list args); -void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...); - -// Returns true if stdout appears to be a terminal that supports colored -// output, false otherwise. -bool IsColorTerminal(); - -} // end namespace benchmark - -#endif // BENCHMARK_COLORPRINT_H_ diff --git a/libcxx/utils/google-benchmark/src/commandlineflags.cc b/libcxx/utils/google-benchmark/src/commandlineflags.cc deleted file mode 100644 index 5724aaa29402..000000000000 --- a/libcxx/utils/google-benchmark/src/commandlineflags.cc +++ /dev/null @@ -1,286 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "commandlineflags.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../src/string_util.h" - -namespace benchmark { -namespace { - -// Parses 'str' for a 32-bit signed integer. If successful, writes -// the result to *value and returns true; otherwise leaves *value -// unchanged and returns false. -bool ParseInt32(const std::string& src_text, const char* str, int32_t* value) { - // Parses the environment variable as a decimal integer. - char* end = nullptr; - const long long_value = strtol(str, &end, 10); // NOLINT - - // Has strtol() consumed all characters in the string? - if (*end != '\0') { - // No - an invalid character was encountered. - std::cerr << src_text << " is expected to be a 32-bit integer, " - << "but actually has value \"" << str << "\".\n"; - return false; - } - - // Is the parsed value in the range of an Int32? - const int32_t result = static_cast(long_value); - if (long_value == std::numeric_limits::max() || - long_value == std::numeric_limits::min() || - // The parsed value overflows as a long. (strtol() returns - // LONG_MAX or LONG_MIN when the input overflows.) - result != long_value - // The parsed value overflows as an Int32. - ) { - std::cerr << src_text << " is expected to be a 32-bit integer, " - << "but actually has value \"" << str << "\", " - << "which overflows.\n"; - return false; - } - - *value = result; - return true; -} - -// Parses 'str' for a double. If successful, writes the result to *value and -// returns true; otherwise leaves *value unchanged and returns false. -bool ParseDouble(const std::string& src_text, const char* str, double* value) { - // Parses the environment variable as a decimal integer. - char* end = nullptr; - const double double_value = strtod(str, &end); // NOLINT - - // Has strtol() consumed all characters in the string? - if (*end != '\0') { - // No - an invalid character was encountered. - std::cerr << src_text << " is expected to be a double, " - << "but actually has value \"" << str << "\".\n"; - return false; - } - - *value = double_value; - return true; -} - -// Parses 'str' into KV pairs. If successful, writes the result to *value and -// returns true; otherwise leaves *value unchanged and returns false. -bool ParseKvPairs(const std::string& src_text, const char* str, - std::map* value) { - std::map kvs; - for (const auto& kvpair : StrSplit(str, ',')) { - const auto kv = StrSplit(kvpair, '='); - if (kv.size() != 2) { - std::cerr << src_text << " is expected to be a comma-separated list of " - << "= strings, but actually has value \"" << str - << "\".\n"; - return false; - } - if (!kvs.emplace(kv[0], kv[1]).second) { - std::cerr << src_text << " is expected to contain unique keys but key \"" - << kv[0] << "\" was repeated.\n"; - return false; - } - } - - *value = kvs; - return true; -} - -// Returns the name of the environment variable corresponding to the -// given flag. For example, FlagToEnvVar("foo") will return -// "BENCHMARK_FOO" in the open-source version. -static std::string FlagToEnvVar(const char* flag) { - const std::string flag_str(flag); - - std::string env_var; - for (size_t i = 0; i != flag_str.length(); ++i) - env_var += static_cast(::toupper(flag_str.c_str()[i])); - - return env_var; -} - -} // namespace - -bool BoolFromEnv(const char* flag, bool default_val) { - const std::string env_var = FlagToEnvVar(flag); - const char* const value_str = getenv(env_var.c_str()); - return value_str == nullptr ? default_val : IsTruthyFlagValue(value_str); -} - -int32_t Int32FromEnv(const char* flag, int32_t default_val) { - const std::string env_var = FlagToEnvVar(flag); - const char* const value_str = getenv(env_var.c_str()); - int32_t value = default_val; - if (value_str == nullptr || - !ParseInt32(std::string("Environment variable ") + env_var, value_str, - &value)) { - return default_val; - } - return value; -} - -double DoubleFromEnv(const char* flag, double default_val) { - const std::string env_var = FlagToEnvVar(flag); - const char* const value_str = getenv(env_var.c_str()); - double value = default_val; - if (value_str == nullptr || - !ParseDouble(std::string("Environment variable ") + env_var, value_str, - &value)) { - return default_val; - } - return value; -} - -const char* StringFromEnv(const char* flag, const char* default_val) { - const std::string env_var = FlagToEnvVar(flag); - const char* const value = getenv(env_var.c_str()); - return value == nullptr ? default_val : value; -} - -std::map KvPairsFromEnv( - const char* flag, std::map default_val) { - const std::string env_var = FlagToEnvVar(flag); - const char* const value_str = getenv(env_var.c_str()); - - if (value_str == nullptr) return default_val; - - std::map value; - if (!ParseKvPairs("Environment variable " + env_var, value_str, &value)) { - return default_val; - } - return value; -} - -// Parses a string as a command line flag. The string should have -// the format "--flag=value". When def_optional is true, the "=value" -// part can be omitted. -// -// Returns the value of the flag, or nullptr if the parsing failed. -const char* ParseFlagValue(const char* str, const char* flag, - bool def_optional) { - // str and flag must not be nullptr. - if (str == nullptr || flag == nullptr) return nullptr; - - // The flag must start with "--". - const std::string flag_str = std::string("--") + std::string(flag); - const size_t flag_len = flag_str.length(); - if (strncmp(str, flag_str.c_str(), flag_len) != 0) return nullptr; - - // Skips the flag name. - const char* flag_end = str + flag_len; - - // When def_optional is true, it's OK to not have a "=value" part. - if (def_optional && (flag_end[0] == '\0')) return flag_end; - - // If def_optional is true and there are more characters after the - // flag name, or if def_optional is false, there must be a '=' after - // the flag name. - if (flag_end[0] != '=') return nullptr; - - // Returns the string after "=". - return flag_end + 1; -} - -bool ParseBoolFlag(const char* str, const char* flag, bool* value) { - // Gets the value of the flag as a string. - const char* const value_str = ParseFlagValue(str, flag, true); - - // Aborts if the parsing failed. - if (value_str == nullptr) return false; - - // Converts the string value to a bool. - *value = IsTruthyFlagValue(value_str); - return true; -} - -bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) { - // Gets the value of the flag as a string. - const char* const value_str = ParseFlagValue(str, flag, false); - - // Aborts if the parsing failed. - if (value_str == nullptr) return false; - - // Sets *value to the value of the flag. - return ParseInt32(std::string("The value of flag --") + flag, value_str, - value); -} - -bool ParseDoubleFlag(const char* str, const char* flag, double* value) { - // Gets the value of the flag as a string. - const char* const value_str = ParseFlagValue(str, flag, false); - - // Aborts if the parsing failed. - if (value_str == nullptr) return false; - - // Sets *value to the value of the flag. - return ParseDouble(std::string("The value of flag --") + flag, value_str, - value); -} - -bool ParseStringFlag(const char* str, const char* flag, std::string* value) { - // Gets the value of the flag as a string. - const char* const value_str = ParseFlagValue(str, flag, false); - - // Aborts if the parsing failed. - if (value_str == nullptr) return false; - - *value = value_str; - return true; -} - -bool ParseKeyValueFlag( - const char* str, const char* flag, - std::map* value) { - const char* const value_str = ParseFlagValue(str, flag, false); - - if (value_str == nullptr) return false; - - for (const auto& kvpair : StrSplit(value_str, ',')) { - const auto kv = StrSplit(kvpair, '='); - if (kv.size() != 2) return false; - value->emplace(kv[0], kv[1]); - } - - return true; -} - -bool IsFlag(const char* str, const char* flag) { - return (ParseFlagValue(str, flag, true) != nullptr); -} - -bool IsTruthyFlagValue(const std::string& value) { - if (value.size() == 1) { - char v = value[0]; - return isalnum(v) && - !(v == '0' || v == 'f' || v == 'F' || v == 'n' || v == 'N'); - } else if (!value.empty()) { - std::string value_lower(value); - std::transform(value_lower.begin(), value_lower.end(), value_lower.begin(), - [](char c) { return static_cast(::tolower(c)); }); - return !(value_lower == "false" || value_lower == "no" || - value_lower == "off"); - } else - return true; -} - -} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/commandlineflags.h b/libcxx/utils/google-benchmark/src/commandlineflags.h deleted file mode 100644 index 0c988cccb3ae..000000000000 --- a/libcxx/utils/google-benchmark/src/commandlineflags.h +++ /dev/null @@ -1,116 +0,0 @@ -#ifndef BENCHMARK_COMMANDLINEFLAGS_H_ -#define BENCHMARK_COMMANDLINEFLAGS_H_ - -#include -#include -#include - -// Macro for referencing flags. -#define FLAG(name) FLAGS_##name - -// Macros for declaring flags. -#define DECLARE_bool(name) extern bool FLAG(name) -#define DECLARE_int32(name) extern int32_t FLAG(name) -#define DECLARE_double(name) extern double FLAG(name) -#define DECLARE_string(name) extern std::string FLAG(name) -#define DECLARE_kvpairs(name) \ - extern std::map FLAG(name) - -// Macros for defining flags. -#define DEFINE_bool(name, default_val) \ - bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val) -#define DEFINE_int32(name, default_val) \ - int32_t FLAG(name) = benchmark::Int32FromEnv(#name, default_val) -#define DEFINE_double(name, default_val) \ - double FLAG(name) = benchmark::DoubleFromEnv(#name, default_val) -#define DEFINE_string(name, default_val) \ - std::string FLAG(name) = benchmark::StringFromEnv(#name, default_val) -#define DEFINE_kvpairs(name, default_val) \ - std::map FLAG(name) = \ - benchmark::KvPairsFromEnv(#name, default_val) - -namespace benchmark { - -// Parses a bool from the environment variable corresponding to the given flag. -// -// If the variable exists, returns IsTruthyFlagValue() value; if not, -// returns the given default value. -bool BoolFromEnv(const char* flag, bool default_val); - -// Parses an Int32 from the environment variable corresponding to the given -// flag. -// -// If the variable exists, returns ParseInt32() value; if not, returns -// the given default value. -int32_t Int32FromEnv(const char* flag, int32_t default_val); - -// Parses an Double from the environment variable corresponding to the given -// flag. -// -// If the variable exists, returns ParseDouble(); if not, returns -// the given default value. -double DoubleFromEnv(const char* flag, double default_val); - -// Parses a string from the environment variable corresponding to the given -// flag. -// -// If variable exists, returns its value; if not, returns -// the given default value. -const char* StringFromEnv(const char* flag, const char* default_val); - -// Parses a set of kvpairs from the environment variable corresponding to the -// given flag. -// -// If variable exists, returns its value; if not, returns -// the given default value. -std::map KvPairsFromEnv( - const char* flag, std::map default_val); - -// Parses a string for a bool flag, in the form of either -// "--flag=value" or "--flag". -// -// In the former case, the value is taken as true if it passes IsTruthyValue(). -// -// In the latter case, the value is taken as true. -// -// On success, stores the value of the flag in *value, and returns -// true. On failure, returns false without changing *value. -bool ParseBoolFlag(const char* str, const char* flag, bool* value); - -// Parses a string for an Int32 flag, in the form of "--flag=value". -// -// On success, stores the value of the flag in *value, and returns -// true. On failure, returns false without changing *value. -bool ParseInt32Flag(const char* str, const char* flag, int32_t* value); - -// Parses a string for a Double flag, in the form of "--flag=value". -// -// On success, stores the value of the flag in *value, and returns -// true. On failure, returns false without changing *value. -bool ParseDoubleFlag(const char* str, const char* flag, double* value); - -// Parses a string for a string flag, in the form of "--flag=value". -// -// On success, stores the value of the flag in *value, and returns -// true. On failure, returns false without changing *value. -bool ParseStringFlag(const char* str, const char* flag, std::string* value); - -// Parses a string for a kvpairs flag in the form "--flag=key=value,key=value" -// -// On success, stores the value of the flag in *value and returns true. On -// failure returns false, though *value may have been mutated. -bool ParseKeyValueFlag(const char* str, const char* flag, - std::map* value); - -// Returns true if the string matches the flag. -bool IsFlag(const char* str, const char* flag); - -// Returns true unless value starts with one of: '0', 'f', 'F', 'n' or 'N', or -// some non-alphanumeric character. Also returns false if the value matches -// one of 'no', 'false', 'off' (case-insensitive). As a special case, also -// returns true if value is the empty string. -bool IsTruthyFlagValue(const std::string& value); - -} // end namespace benchmark - -#endif // BENCHMARK_COMMANDLINEFLAGS_H_ diff --git a/libcxx/utils/google-benchmark/src/complexity.cc b/libcxx/utils/google-benchmark/src/complexity.cc deleted file mode 100644 index 29f7c3b03155..000000000000 --- a/libcxx/utils/google-benchmark/src/complexity.cc +++ /dev/null @@ -1,240 +0,0 @@ -// Copyright 2016 Ismael Jimenez Martinez. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Source project : https://github.com/ismaelJimenez/cpp.leastsq -// Adapted to be used with google benchmark - -#include "benchmark/benchmark.h" - -#include -#include -#include "check.h" -#include "complexity.h" - -namespace benchmark { - -// Internal function to calculate the different scalability forms -BigOFunc* FittingCurve(BigO complexity) { - static const double kLog2E = 1.44269504088896340736; - switch (complexity) { - case oN: - return [](IterationCount n) -> double { return static_cast(n); }; - case oNSquared: - return [](IterationCount n) -> double { return std::pow(n, 2); }; - case oNCubed: - return [](IterationCount n) -> double { return std::pow(n, 3); }; - case oLogN: - /* Note: can't use log2 because Android's GNU STL lacks it */ - return - [](IterationCount n) { return kLog2E * log(static_cast(n)); }; - case oNLogN: - /* Note: can't use log2 because Android's GNU STL lacks it */ - return [](IterationCount n) { - return kLog2E * n * log(static_cast(n)); - }; - case o1: - default: - return [](IterationCount) { return 1.0; }; - } -} - -// Function to return an string for the calculated complexity -std::string GetBigOString(BigO complexity) { - switch (complexity) { - case oN: - return "N"; - case oNSquared: - return "N^2"; - case oNCubed: - return "N^3"; - case oLogN: - return "lgN"; - case oNLogN: - return "NlgN"; - case o1: - return "(1)"; - default: - return "f(N)"; - } -} - -// Find the coefficient for the high-order term in the running time, by -// minimizing the sum of squares of relative error, for the fitting curve -// given by the lambda expression. -// - n : Vector containing the size of the benchmark tests. -// - time : Vector containing the times for the benchmark tests. -// - fitting_curve : lambda expression (e.g. [](int64_t n) {return n; };). - -// For a deeper explanation on the algorithm logic, please refer to -// https://en.wikipedia.org/wiki/Least_squares#Least_squares,_regression_analysis_and_statistics - -LeastSq MinimalLeastSq(const std::vector& n, - const std::vector& time, - BigOFunc* fitting_curve) { - double sigma_gn_squared = 0.0; - double sigma_time = 0.0; - double sigma_time_gn = 0.0; - - // Calculate least square fitting parameter - for (size_t i = 0; i < n.size(); ++i) { - double gn_i = fitting_curve(n[i]); - sigma_gn_squared += gn_i * gn_i; - sigma_time += time[i]; - sigma_time_gn += time[i] * gn_i; - } - - LeastSq result; - result.complexity = oLambda; - - // Calculate complexity. - result.coef = sigma_time_gn / sigma_gn_squared; - - // Calculate RMS - double rms = 0.0; - for (size_t i = 0; i < n.size(); ++i) { - double fit = result.coef * fitting_curve(n[i]); - rms += pow((time[i] - fit), 2); - } - - // Normalized RMS by the mean of the observed values - double mean = sigma_time / n.size(); - result.rms = sqrt(rms / n.size()) / mean; - - return result; -} - -// Find the coefficient for the high-order term in the running time, by -// minimizing the sum of squares of relative error. -// - n : Vector containing the size of the benchmark tests. -// - time : Vector containing the times for the benchmark tests. -// - complexity : If different than oAuto, the fitting curve will stick to -// this one. If it is oAuto, it will be calculated the best -// fitting curve. -LeastSq MinimalLeastSq(const std::vector& n, - const std::vector& time, const BigO complexity) { - CHECK_EQ(n.size(), time.size()); - CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two - // benchmark runs are given - CHECK_NE(complexity, oNone); - - LeastSq best_fit; - - if (complexity == oAuto) { - std::vector fit_curves = {oLogN, oN, oNLogN, oNSquared, oNCubed}; - - // Take o1 as default best fitting curve - best_fit = MinimalLeastSq(n, time, FittingCurve(o1)); - best_fit.complexity = o1; - - // Compute all possible fitting curves and stick to the best one - for (const auto& fit : fit_curves) { - LeastSq current_fit = MinimalLeastSq(n, time, FittingCurve(fit)); - if (current_fit.rms < best_fit.rms) { - best_fit = current_fit; - best_fit.complexity = fit; - } - } - } else { - best_fit = MinimalLeastSq(n, time, FittingCurve(complexity)); - best_fit.complexity = complexity; - } - - return best_fit; -} - -std::vector ComputeBigO( - const std::vector& reports) { - typedef BenchmarkReporter::Run Run; - std::vector results; - - if (reports.size() < 2) return results; - - // Accumulators. - std::vector n; - std::vector real_time; - std::vector cpu_time; - - // Populate the accumulators. - for (const Run& run : reports) { - CHECK_GT(run.complexity_n, 0) << "Did you forget to call SetComplexityN?"; - n.push_back(run.complexity_n); - real_time.push_back(run.real_accumulated_time / run.iterations); - cpu_time.push_back(run.cpu_accumulated_time / run.iterations); - } - - LeastSq result_cpu; - LeastSq result_real; - - if (reports[0].complexity == oLambda) { - result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity_lambda); - result_real = MinimalLeastSq(n, real_time, reports[0].complexity_lambda); - } else { - result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity); - result_real = MinimalLeastSq(n, real_time, result_cpu.complexity); - } - - // Drop the 'args' when reporting complexity. - auto run_name = reports[0].run_name; - run_name.args.clear(); - - // Get the data from the accumulator to BenchmarkReporter::Run's. - Run big_o; - big_o.run_name = run_name; - big_o.family_index = reports[0].family_index; - big_o.per_family_instance_index = reports[0].per_family_instance_index; - big_o.run_type = BenchmarkReporter::Run::RT_Aggregate; - big_o.repetitions = reports[0].repetitions; - big_o.repetition_index = Run::no_repetition_index; - big_o.threads = reports[0].threads; - big_o.aggregate_name = "BigO"; - big_o.report_label = reports[0].report_label; - big_o.iterations = 0; - big_o.real_accumulated_time = result_real.coef; - big_o.cpu_accumulated_time = result_cpu.coef; - big_o.report_big_o = true; - big_o.complexity = result_cpu.complexity; - - // All the time results are reported after being multiplied by the - // time unit multiplier. But since RMS is a relative quantity it - // should not be multiplied at all. So, here, we _divide_ it by the - // multiplier so that when it is multiplied later the result is the - // correct one. - double multiplier = GetTimeUnitMultiplier(reports[0].time_unit); - - // Only add label to mean/stddev if it is same for all runs - Run rms; - rms.run_name = run_name; - rms.family_index = reports[0].family_index; - rms.per_family_instance_index = reports[0].per_family_instance_index; - rms.run_type = BenchmarkReporter::Run::RT_Aggregate; - rms.aggregate_name = "RMS"; - rms.report_label = big_o.report_label; - rms.iterations = 0; - rms.repetition_index = Run::no_repetition_index; - rms.repetitions = reports[0].repetitions; - rms.threads = reports[0].threads; - rms.real_accumulated_time = result_real.rms / multiplier; - rms.cpu_accumulated_time = result_cpu.rms / multiplier; - rms.report_rms = true; - rms.complexity = result_cpu.complexity; - // don't forget to keep the time unit, or we won't be able to - // recover the correct value. - rms.time_unit = reports[0].time_unit; - - results.push_back(big_o); - results.push_back(rms); - return results; -} - -} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/complexity.h b/libcxx/utils/google-benchmark/src/complexity.h deleted file mode 100644 index df29b48d29b4..000000000000 --- a/libcxx/utils/google-benchmark/src/complexity.h +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2016 Ismael Jimenez Martinez. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Source project : https://github.com/ismaelJimenez/cpp.leastsq -// Adapted to be used with google benchmark - -#ifndef COMPLEXITY_H_ -#define COMPLEXITY_H_ - -#include -#include - -#include "benchmark/benchmark.h" - -namespace benchmark { - -// Return a vector containing the bigO and RMS information for the specified -// list of reports. If 'reports.size() < 2' an empty vector is returned. -std::vector ComputeBigO( - const std::vector& reports); - -// This data structure will contain the result returned by MinimalLeastSq -// - coef : Estimated coeficient for the high-order term as -// interpolated from data. -// - rms : Normalized Root Mean Squared Error. -// - complexity : Scalability form (e.g. oN, oNLogN). In case a scalability -// form has been provided to MinimalLeastSq this will return -// the same value. In case BigO::oAuto has been selected, this -// parameter will return the best fitting curve detected. - -struct LeastSq { - LeastSq() : coef(0.0), rms(0.0), complexity(oNone) {} - - double coef; - double rms; - BigO complexity; -}; - -// Function to return an string for the calculated complexity -std::string GetBigOString(BigO complexity); - -} // end namespace benchmark - -#endif // COMPLEXITY_H_ diff --git a/libcxx/utils/google-benchmark/src/console_reporter.cc b/libcxx/utils/google-benchmark/src/console_reporter.cc deleted file mode 100644 index 6fd764525e81..000000000000 --- a/libcxx/utils/google-benchmark/src/console_reporter.cc +++ /dev/null @@ -1,177 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "benchmark/benchmark.h" -#include "check.h" -#include "colorprint.h" -#include "commandlineflags.h" -#include "complexity.h" -#include "counter.h" -#include "internal_macros.h" -#include "string_util.h" -#include "timers.h" - -namespace benchmark { - -bool ConsoleReporter::ReportContext(const Context& context) { - name_field_width_ = context.name_field_width; - printed_header_ = false; - prev_counters_.clear(); - - PrintBasicContext(&GetErrorStream(), context); - -#ifdef BENCHMARK_OS_WINDOWS - if ((output_options_ & OO_Color) && &std::cout != &GetOutputStream()) { - GetErrorStream() - << "Color printing is only supported for stdout on windows." - " Disabling color printing\n"; - output_options_ = static_cast< OutputOptions >(output_options_ & ~OO_Color); - } -#endif - - return true; -} - -void ConsoleReporter::PrintHeader(const Run& run) { - std::string str = FormatString("%-*s %13s %15s %12s", static_cast(name_field_width_), - "Benchmark", "Time", "CPU", "Iterations"); - if(!run.counters.empty()) { - if(output_options_ & OO_Tabular) { - for(auto const& c : run.counters) { - str += FormatString(" %10s", c.first.c_str()); - } - } else { - str += " UserCounters..."; - } - } - std::string line = std::string(str.length(), '-'); - GetOutputStream() << line << "\n" << str << "\n" << line << "\n"; -} - -void ConsoleReporter::ReportRuns(const std::vector& reports) { - for (const auto& run : reports) { - // print the header: - // --- if none was printed yet - bool print_header = !printed_header_; - // --- or if the format is tabular and this run - // has different fields from the prev header - print_header |= (output_options_ & OO_Tabular) && - (!internal::SameNames(run.counters, prev_counters_)); - if (print_header) { - printed_header_ = true; - prev_counters_ = run.counters; - PrintHeader(run); - } - // As an alternative to printing the headers like this, we could sort - // the benchmarks by header and then print. But this would require - // waiting for the full results before printing, or printing twice. - PrintRunData(run); - } -} - -static void IgnoreColorPrint(std::ostream& out, LogColor, const char* fmt, - ...) { - va_list args; - va_start(args, fmt); - out << FormatString(fmt, args); - va_end(args); -} - - -static std::string FormatTime(double time) { - // Align decimal places... - if (time < 1.0) { - return FormatString("%10.3f", time); - } - if (time < 10.0) { - return FormatString("%10.2f", time); - } - if (time < 100.0) { - return FormatString("%10.1f", time); - } - return FormatString("%10.0f", time); -} - -void ConsoleReporter::PrintRunData(const Run& result) { - typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...); - auto& Out = GetOutputStream(); - PrinterFn* printer = (output_options_ & OO_Color) ? - (PrinterFn*)ColorPrintf : IgnoreColorPrint; - auto name_color = - (result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN; - printer(Out, name_color, "%-*s ", name_field_width_, - result.benchmark_name().c_str()); - - if (result.error_occurred) { - printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'", - result.error_message.c_str()); - printer(Out, COLOR_DEFAULT, "\n"); - return; - } - - const double real_time = result.GetAdjustedRealTime(); - const double cpu_time = result.GetAdjustedCPUTime(); - const std::string real_time_str = FormatTime(real_time); - const std::string cpu_time_str = FormatTime(cpu_time); - - - if (result.report_big_o) { - std::string big_o = GetBigOString(result.complexity); - printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", real_time, big_o.c_str(), - cpu_time, big_o.c_str()); - } else if (result.report_rms) { - printer(Out, COLOR_YELLOW, "%10.0f %-4s %10.0f %-4s ", real_time * 100, "%", - cpu_time * 100, "%"); - } else { - const char* timeLabel = GetTimeUnitString(result.time_unit); - printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(), timeLabel, - cpu_time_str.c_str(), timeLabel); - } - - if (!result.report_big_o && !result.report_rms) { - printer(Out, COLOR_CYAN, "%10lld", result.iterations); - } - - for (auto& c : result.counters) { - const std::size_t cNameLen = std::max(std::string::size_type(10), - c.first.length()); - auto const& s = HumanReadableNumber(c.second.value, c.second.oneK); - const char* unit = ""; - if (c.second.flags & Counter::kIsRate) - unit = (c.second.flags & Counter::kInvert) ? "s" : "/s"; - if (output_options_ & OO_Tabular) { - printer(Out, COLOR_DEFAULT, " %*s%s", cNameLen - strlen(unit), s.c_str(), - unit); - } else { - printer(Out, COLOR_DEFAULT, " %s=%s%s", c.first.c_str(), s.c_str(), unit); - } - } - - if (!result.report_label.empty()) { - printer(Out, COLOR_DEFAULT, " %s", result.report_label.c_str()); - } - - printer(Out, COLOR_DEFAULT, "\n"); -} - -} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/counter.cc b/libcxx/utils/google-benchmark/src/counter.cc deleted file mode 100644 index cf5b78ee3ac6..000000000000 --- a/libcxx/utils/google-benchmark/src/counter.cc +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "counter.h" - -namespace benchmark { -namespace internal { - -double Finish(Counter const& c, IterationCount iterations, double cpu_time, - double num_threads) { - double v = c.value; - if (c.flags & Counter::kIsRate) { - v /= cpu_time; - } - if (c.flags & Counter::kAvgThreads) { - v /= num_threads; - } - if (c.flags & Counter::kIsIterationInvariant) { - v *= iterations; - } - if (c.flags & Counter::kAvgIterations) { - v /= iterations; - } - - if (c.flags & Counter::kInvert) { // Invert is *always* last. - v = 1.0 / v; - } - return v; -} - -void Finish(UserCounters* l, IterationCount iterations, double cpu_time, - double num_threads) { - for (auto& c : *l) { - c.second.value = Finish(c.second, iterations, cpu_time, num_threads); - } -} - -void Increment(UserCounters* l, UserCounters const& r) { - // add counters present in both or just in *l - for (auto& c : *l) { - auto it = r.find(c.first); - if (it != r.end()) { - c.second.value = c.second + it->second; - } - } - // add counters present in r, but not in *l - for (auto const& tc : r) { - auto it = l->find(tc.first); - if (it == l->end()) { - (*l)[tc.first] = tc.second; - } - } -} - -bool SameNames(UserCounters const& l, UserCounters const& r) { - if (&l == &r) return true; - if (l.size() != r.size()) { - return false; - } - for (auto const& c : l) { - if (r.find(c.first) == r.end()) { - return false; - } - } - return true; -} - -} // end namespace internal -} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/counter.h b/libcxx/utils/google-benchmark/src/counter.h deleted file mode 100644 index 1f5a58e31f0c..000000000000 --- a/libcxx/utils/google-benchmark/src/counter.h +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef BENCHMARK_COUNTER_H_ -#define BENCHMARK_COUNTER_H_ - -#include "benchmark/benchmark.h" - -namespace benchmark { - -// these counter-related functions are hidden to reduce API surface. -namespace internal { -void Finish(UserCounters* l, IterationCount iterations, double time, - double num_threads); -void Increment(UserCounters* l, UserCounters const& r); -bool SameNames(UserCounters const& l, UserCounters const& r); -} // end namespace internal - -} // end namespace benchmark - -#endif // BENCHMARK_COUNTER_H_ diff --git a/libcxx/utils/google-benchmark/src/csv_reporter.cc b/libcxx/utils/google-benchmark/src/csv_reporter.cc deleted file mode 100644 index af2c18fc8a6e..000000000000 --- a/libcxx/utils/google-benchmark/src/csv_reporter.cc +++ /dev/null @@ -1,154 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "benchmark/benchmark.h" -#include "complexity.h" - -#include -#include -#include -#include -#include -#include - -#include "check.h" -#include "string_util.h" -#include "timers.h" - -// File format reference: http://edoceo.com/utilitas/csv-file-format. - -namespace benchmark { - -namespace { -std::vector elements = { - "name", "iterations", "real_time", "cpu_time", - "time_unit", "bytes_per_second", "items_per_second", "label", - "error_occurred", "error_message"}; -} // namespace - -std::string CsvEscape(const std::string & s) { - std::string tmp; - tmp.reserve(s.size() + 2); - for (char c : s) { - switch (c) { - case '"' : tmp += "\"\""; break; - default : tmp += c; break; - } - } - return '"' + tmp + '"'; -} - -bool CSVReporter::ReportContext(const Context& context) { - PrintBasicContext(&GetErrorStream(), context); - return true; -} - -void CSVReporter::ReportRuns(const std::vector& reports) { - std::ostream& Out = GetOutputStream(); - - if (!printed_header_) { - // save the names of all the user counters - for (const auto& run : reports) { - for (const auto& cnt : run.counters) { - if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second") - continue; - user_counter_names_.insert(cnt.first); - } - } - - // print the header - for (auto B = elements.begin(); B != elements.end();) { - Out << *B++; - if (B != elements.end()) Out << ","; - } - for (auto B = user_counter_names_.begin(); - B != user_counter_names_.end();) { - Out << ",\"" << *B++ << "\""; - } - Out << "\n"; - - printed_header_ = true; - } else { - // check that all the current counters are saved in the name set - for (const auto& run : reports) { - for (const auto& cnt : run.counters) { - if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second") - continue; - CHECK(user_counter_names_.find(cnt.first) != user_counter_names_.end()) - << "All counters must be present in each run. " - << "Counter named \"" << cnt.first - << "\" was not in a run after being added to the header"; - } - } - } - - // print results for each run - for (const auto& run : reports) { - PrintRunData(run); - } -} - -void CSVReporter::PrintRunData(const Run& run) { - std::ostream& Out = GetOutputStream(); - Out << CsvEscape(run.benchmark_name()) << ","; - if (run.error_occurred) { - Out << std::string(elements.size() - 3, ','); - Out << "true,"; - Out << CsvEscape(run.error_message) << "\n"; - return; - } - - // Do not print iteration on bigO and RMS report - if (!run.report_big_o && !run.report_rms) { - Out << run.iterations; - } - Out << ","; - - Out << run.GetAdjustedRealTime() << ","; - Out << run.GetAdjustedCPUTime() << ","; - - // Do not print timeLabel on bigO and RMS report - if (run.report_big_o) { - Out << GetBigOString(run.complexity); - } else if (!run.report_rms) { - Out << GetTimeUnitString(run.time_unit); - } - Out << ","; - - if (run.counters.find("bytes_per_second") != run.counters.end()) { - Out << run.counters.at("bytes_per_second"); - } - Out << ","; - if (run.counters.find("items_per_second") != run.counters.end()) { - Out << run.counters.at("items_per_second"); - } - Out << ","; - if (!run.report_label.empty()) { - Out << CsvEscape(run.report_label); - } - Out << ",,"; // for error_occurred and error_message - - // Print user counters - for (const auto& ucn : user_counter_names_) { - auto it = run.counters.find(ucn); - if (it == run.counters.end()) { - Out << ","; - } else { - Out << "," << it->second; - } - } - Out << '\n'; -} - -} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/cycleclock.h b/libcxx/utils/google-benchmark/src/cycleclock.h deleted file mode 100644 index f22ca9f7d299..000000000000 --- a/libcxx/utils/google-benchmark/src/cycleclock.h +++ /dev/null @@ -1,225 +0,0 @@ -// ---------------------------------------------------------------------- -// CycleClock -// A CycleClock tells you the current time in Cycles. The "time" -// is actually time since power-on. This is like time() but doesn't -// involve a system call and is much more precise. -// -// NOTE: Not all cpu/platform/kernel combinations guarantee that this -// clock increments at a constant rate or is synchronized across all logical -// cpus in a system. -// -// If you need the above guarantees, please consider using a different -// API. There are efforts to provide an interface which provides a millisecond -// granularity and implemented as a memory read. A memory read is generally -// cheaper than the CycleClock for many architectures. -// -// Also, in some out of order CPU implementations, the CycleClock is not -// serializing. So if you're trying to count at cycles granularity, your -// data might be inaccurate due to out of order instruction execution. -// ---------------------------------------------------------------------- - -#ifndef BENCHMARK_CYCLECLOCK_H_ -#define BENCHMARK_CYCLECLOCK_H_ - -#include - -#include "benchmark/benchmark.h" -#include "internal_macros.h" - -#if defined(BENCHMARK_OS_MACOSX) -#include -#endif -// For MSVC, we want to use '_asm rdtsc' when possible (since it works -// with even ancient MSVC compilers), and when not possible the -// __rdtsc intrinsic, declared in . Unfortunately, in some -// environments, and have conflicting -// declarations of some other intrinsics, breaking compilation. -// Therefore, we simply declare __rdtsc ourselves. See also -// http://connect.microsoft.com/VisualStudio/feedback/details/262047 -#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) -extern "C" uint64_t __rdtsc(); -#pragma intrinsic(__rdtsc) -#endif - -#if !defined(BENCHMARK_OS_WINDOWS) || defined(BENCHMARK_OS_MINGW) -#include -#include -#endif - -#ifdef BENCHMARK_OS_EMSCRIPTEN -#include -#endif - -namespace benchmark { -// NOTE: only i386 and x86_64 have been well tested. -// PPC, sparc, alpha, and ia64 are based on -// http://peter.kuscsik.com/wordpress/?p=14 -// with modifications by m3b. See also -// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h -namespace cycleclock { -// This should return the number of cycles since power-on. Thread-safe. -inline BENCHMARK_ALWAYS_INLINE int64_t Now() { -#if defined(BENCHMARK_OS_MACOSX) - // this goes at the top because we need ALL Macs, regardless of - // architecture, to return the number of "mach time units" that - // have passed since startup. See sysinfo.cc where - // InitializeSystemInfo() sets the supposed cpu clock frequency of - // macs to the number of mach time units per second, not actual - // CPU clock frequency (which can change in the face of CPU - // frequency scaling). Also note that when the Mac sleeps, this - // counter pauses; it does not continue counting, nor does it - // reset to zero. - return mach_absolute_time(); -#elif defined(BENCHMARK_OS_EMSCRIPTEN) - // this goes above x86-specific code because old versions of Emscripten - // define __x86_64__, although they have nothing to do with it. - return static_cast(emscripten_get_now() * 1e+6); -#elif defined(__i386__) - int64_t ret; - __asm__ volatile("rdtsc" : "=A"(ret)); - return ret; -#elif defined(__x86_64__) || defined(__amd64__) - uint64_t low, high; - __asm__ volatile("rdtsc" : "=a"(low), "=d"(high)); - return (high << 32) | low; -#elif defined(__powerpc__) || defined(__ppc__) - // This returns a time-base, which is not always precisely a cycle-count. -#if defined(__powerpc64__) || defined(__ppc64__) - int64_t tb; - asm volatile("mfspr %0, 268" : "=r"(tb)); - return tb; -#else - uint32_t tbl, tbu0, tbu1; - asm volatile( - "mftbu %0\n" - "mftb %1\n" - "mftbu %2" - : "=r"(tbu0), "=r"(tbl), "=r"(tbu1)); - tbl &= -static_cast(tbu0 == tbu1); - // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is no longer needed) - return (static_cast(tbu1) << 32) | tbl; -#endif -#elif defined(__sparc__) - int64_t tick; - asm(".byte 0x83, 0x41, 0x00, 0x00"); - asm("mov %%g1, %0" : "=r"(tick)); - return tick; -#elif defined(__ia64__) - int64_t itc; - asm("mov %0 = ar.itc" : "=r"(itc)); - return itc; -#elif defined(COMPILER_MSVC) && defined(_M_IX86) - // Older MSVC compilers (like 7.x) don't seem to support the - // __rdtsc intrinsic properly, so I prefer to use _asm instead - // when I know it will work. Otherwise, I'll use __rdtsc and hope - // the code is being compiled with a non-ancient compiler. - _asm rdtsc -#elif defined(COMPILER_MSVC) && defined(_M_ARM64) - // See https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=vs-2019 - // and https://reviews.llvm.org/D53115 - int64_t virtual_timer_value; - virtual_timer_value = _ReadStatusReg(ARM64_CNTVCT); - return virtual_timer_value; -#elif defined(COMPILER_MSVC) - return __rdtsc(); -#elif defined(BENCHMARK_OS_NACL) - // Native Client validator on x86/x86-64 allows RDTSC instructions, - // and this case is handled above. Native Client validator on ARM - // rejects MRC instructions (used in the ARM-specific sequence below), - // so we handle it here. Portable Native Client compiles to - // architecture-agnostic bytecode, which doesn't provide any - // cycle counter access mnemonics. - - // Native Client does not provide any API to access cycle counter. - // Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday - // because is provides nanosecond resolution (which is noticable at - // least for PNaCl modules running on x86 Mac & Linux). - // Initialize to always return 0 if clock_gettime fails. - struct timespec ts = {0, 0}; - clock_gettime(CLOCK_MONOTONIC, &ts); - return static_cast(ts.tv_sec) * 1000000000 + ts.tv_nsec; -#elif defined(__aarch64__) - // System timer of ARMv8 runs at a different frequency than the CPU's. - // The frequency is fixed, typically in the range 1-50MHz. It can be - // read at CNTFRQ special register. We assume the OS has set up - // the virtual timer properly. - int64_t virtual_timer_value; - asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); - return virtual_timer_value; -#elif defined(__ARM_ARCH) - // V6 is the earliest arch that has a standard cyclecount - // Native Client validator doesn't allow MRC instructions. -#if (__ARM_ARCH >= 6) - uint32_t pmccntr; - uint32_t pmuseren; - uint32_t pmcntenset; - // Read the user mode perf monitor counter access permissions. - asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren)); - if (pmuseren & 1) { // Allows reading perfmon counters for user mode code. - asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset)); - if (pmcntenset & 0x80000000ul) { // Is it counting? - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr)); - // The counter is set up to count every 64th cycle - return static_cast(pmccntr) * 64; // Should optimize to << 6 - } - } -#endif - struct timeval tv; - gettimeofday(&tv, nullptr); - return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; -#elif defined(__mips__) || defined(__m68k__) - // mips apparently only allows rdtsc for superusers, so we fall - // back to gettimeofday. It's possible clock_gettime would be better. - struct timeval tv; - gettimeofday(&tv, nullptr); - return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; -#elif defined(__loongarch__) - struct timeval tv; - gettimeofday(&tv, nullptr); - return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; -#elif defined(__s390__) // Covers both s390 and s390x. - // Return the CPU clock. - uint64_t tsc; -#if defined(BENCHMARK_OS_ZOS) && defined(COMPILER_IBMXL) - // z/OS XL compiler HLASM syntax. - asm(" stck %0" : "=m"(tsc) : : "cc"); -#else - asm("stck %0" : "=Q"(tsc) : : "cc"); -#endif - return tsc; -#elif defined(__riscv) // RISC-V - // Use RDCYCLE (and RDCYCLEH on riscv32) -#if __riscv_xlen == 32 - uint32_t cycles_lo, cycles_hi0, cycles_hi1; - // This asm also includes the PowerPC overflow handling strategy, as above. - // Implemented in assembly because Clang insisted on branching. - asm volatile( - "rdcycleh %0\n" - "rdcycle %1\n" - "rdcycleh %2\n" - "sub %0, %0, %2\n" - "seqz %0, %0\n" - "sub %0, zero, %0\n" - "and %1, %1, %0\n" - : "=r"(cycles_hi0), "=r"(cycles_lo), "=r"(cycles_hi1)); - return (static_cast(cycles_hi1) << 32) | cycles_lo; -#else - uint64_t cycles; - asm volatile("rdcycle %0" : "=r"(cycles)); - return cycles; -#endif -#elif defined(__e2k__) || defined(__elbrus__) - struct timeval tv; - gettimeofday(&tv, nullptr); - return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; -#else -// The soft failover to a generic implementation is automatic only for ARM. -// For other platforms the developer is expected to make an attempt to create -// a fast implementation and use generic version if nothing better is available. -#error You need to define CycleTimer for your OS and CPU -#endif -} -} // end namespace cycleclock -} // end namespace benchmark - -#endif // BENCHMARK_CYCLECLOCK_H_ diff --git a/libcxx/utils/google-benchmark/src/internal_macros.h b/libcxx/utils/google-benchmark/src/internal_macros.h deleted file mode 100644 index 91f367b894bc..000000000000 --- a/libcxx/utils/google-benchmark/src/internal_macros.h +++ /dev/null @@ -1,102 +0,0 @@ -#ifndef BENCHMARK_INTERNAL_MACROS_H_ -#define BENCHMARK_INTERNAL_MACROS_H_ - -#include "benchmark/benchmark.h" - -/* Needed to detect STL */ -#include - -// clang-format off - -#ifndef __has_feature -#define __has_feature(x) 0 -#endif - -#if defined(__clang__) - #if defined(__ibmxl__) - #if !defined(COMPILER_IBMXL) - #define COMPILER_IBMXL - #endif - #elif !defined(COMPILER_CLANG) - #define COMPILER_CLANG - #endif -#elif defined(_MSC_VER) - #if !defined(COMPILER_MSVC) - #define COMPILER_MSVC - #endif -#elif defined(__GNUC__) - #if !defined(COMPILER_GCC) - #define COMPILER_GCC - #endif -#endif - -#if __has_feature(cxx_attributes) - #define BENCHMARK_NORETURN [[noreturn]] -#elif defined(__GNUC__) - #define BENCHMARK_NORETURN __attribute__((noreturn)) -#elif defined(COMPILER_MSVC) - #define BENCHMARK_NORETURN __declspec(noreturn) -#else - #define BENCHMARK_NORETURN -#endif - -#if defined(__CYGWIN__) - #define BENCHMARK_OS_CYGWIN 1 -#elif defined(_WIN32) - #define BENCHMARK_OS_WINDOWS 1 - #if defined(__MINGW32__) - #define BENCHMARK_OS_MINGW 1 - #endif -#elif defined(__APPLE__) - #define BENCHMARK_OS_APPLE 1 - #include "TargetConditionals.h" - #if defined(TARGET_OS_MAC) - #define BENCHMARK_OS_MACOSX 1 - #if defined(TARGET_OS_IPHONE) - #define BENCHMARK_OS_IOS 1 - #endif - #endif -#elif defined(__FreeBSD__) - #define BENCHMARK_OS_FREEBSD 1 -#elif defined(__NetBSD__) - #define BENCHMARK_OS_NETBSD 1 -#elif defined(__OpenBSD__) - #define BENCHMARK_OS_OPENBSD 1 -#elif defined(__DragonFly__) - #define BENCHMARK_OS_DRAGONFLY 1 -#elif defined(__linux__) - #define BENCHMARK_OS_LINUX 1 -#elif defined(__native_client__) - #define BENCHMARK_OS_NACL 1 -#elif defined(__EMSCRIPTEN__) - #define BENCHMARK_OS_EMSCRIPTEN 1 -#elif defined(__rtems__) - #define BENCHMARK_OS_RTEMS 1 -#elif defined(__Fuchsia__) -#define BENCHMARK_OS_FUCHSIA 1 -#elif defined (__SVR4) && defined (__sun) -#define BENCHMARK_OS_SOLARIS 1 -#elif defined(__QNX__) -#define BENCHMARK_OS_QNX 1 -#elif defined(__MVS__) -#define BENCHMARK_OS_ZOS 1 -#endif - -#if defined(__ANDROID__) && defined(__GLIBCXX__) -#define BENCHMARK_STL_ANDROID_GNUSTL 1 -#endif - -#if !__has_feature(cxx_exceptions) && !defined(__cpp_exceptions) \ - && !defined(__EXCEPTIONS) - #define BENCHMARK_HAS_NO_EXCEPTIONS -#endif - -#if defined(COMPILER_CLANG) || defined(COMPILER_GCC) - #define BENCHMARK_MAYBE_UNUSED __attribute__((unused)) -#else - #define BENCHMARK_MAYBE_UNUSED -#endif - -// clang-format on - -#endif // BENCHMARK_INTERNAL_MACROS_H_ diff --git a/libcxx/utils/google-benchmark/src/json_reporter.cc b/libcxx/utils/google-benchmark/src/json_reporter.cc deleted file mode 100644 index 26898456f854..000000000000 --- a/libcxx/utils/google-benchmark/src/json_reporter.cc +++ /dev/null @@ -1,269 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "benchmark/benchmark.h" -#include "complexity.h" - -#include -#include -#include -#include // for setprecision -#include -#include -#include -#include -#include - -#include "string_util.h" -#include "timers.h" - -namespace benchmark { -namespace internal { -extern std::map* global_context; -} - -namespace { - -std::string StrEscape(const std::string & s) { - std::string tmp; - tmp.reserve(s.size()); - for (char c : s) { - switch (c) { - case '\b': tmp += "\\b"; break; - case '\f': tmp += "\\f"; break; - case '\n': tmp += "\\n"; break; - case '\r': tmp += "\\r"; break; - case '\t': tmp += "\\t"; break; - case '\\': tmp += "\\\\"; break; - case '"' : tmp += "\\\""; break; - default : tmp += c; break; - } - } - return tmp; -} - -std::string FormatKV(std::string const& key, std::string const& value) { - return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str()); -} - -std::string FormatKV(std::string const& key, const char* value) { - return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str()); -} - -std::string FormatKV(std::string const& key, bool value) { - return StrFormat("\"%s\": %s", StrEscape(key).c_str(), value ? "true" : "false"); -} - -std::string FormatKV(std::string const& key, int64_t value) { - std::stringstream ss; - ss << '"' << StrEscape(key) << "\": " << value; - return ss.str(); -} - -std::string FormatKV(std::string const& key, IterationCount value) { - std::stringstream ss; - ss << '"' << StrEscape(key) << "\": " << value; - return ss.str(); -} - -std::string FormatKV(std::string const& key, double value) { - std::stringstream ss; - ss << '"' << StrEscape(key) << "\": "; - - if (std::isnan(value)) - ss << (value < 0 ? "-" : "") << "NaN"; - else if (std::isinf(value)) - ss << (value < 0 ? "-" : "") << "Infinity"; - else { - const auto max_digits10 = - std::numeric_limits::max_digits10; - const auto max_fractional_digits10 = max_digits10 - 1; - ss << std::scientific << std::setprecision(max_fractional_digits10) - << value; - } - return ss.str(); -} - -int64_t RoundDouble(double v) { return std::lround(v); } - -} // end namespace - -bool JSONReporter::ReportContext(const Context& context) { - std::ostream& out = GetOutputStream(); - - out << "{\n"; - std::string inner_indent(2, ' '); - - // Open context block and print context information. - out << inner_indent << "\"context\": {\n"; - std::string indent(4, ' '); - - std::string walltime_value = LocalDateTimeString(); - out << indent << FormatKV("date", walltime_value) << ",\n"; - - out << indent << FormatKV("host_name", context.sys_info.name) << ",\n"; - - if (Context::executable_name) { - out << indent << FormatKV("executable", Context::executable_name) << ",\n"; - } - - CPUInfo const& info = context.cpu_info; - out << indent << FormatKV("num_cpus", static_cast(info.num_cpus)) - << ",\n"; - out << indent - << FormatKV("mhz_per_cpu", - RoundDouble(info.cycles_per_second / 1000000.0)) - << ",\n"; - if (CPUInfo::Scaling::UNKNOWN != info.scaling) { - out << indent << FormatKV("cpu_scaling_enabled", info.scaling == CPUInfo::Scaling::ENABLED ? true : false) - << ",\n"; - } - - out << indent << "\"caches\": [\n"; - indent = std::string(6, ' '); - std::string cache_indent(8, ' '); - for (size_t i = 0; i < info.caches.size(); ++i) { - auto& CI = info.caches[i]; - out << indent << "{\n"; - out << cache_indent << FormatKV("type", CI.type) << ",\n"; - out << cache_indent << FormatKV("level", static_cast(CI.level)) - << ",\n"; - out << cache_indent - << FormatKV("size", static_cast(CI.size)) << ",\n"; - out << cache_indent - << FormatKV("num_sharing", static_cast(CI.num_sharing)) - << "\n"; - out << indent << "}"; - if (i != info.caches.size() - 1) out << ","; - out << "\n"; - } - indent = std::string(4, ' '); - out << indent << "],\n"; - out << indent << "\"load_avg\": ["; - for (auto it = info.load_avg.begin(); it != info.load_avg.end();) { - out << *it++; - if (it != info.load_avg.end()) out << ","; - } - out << "],\n"; - -#if defined(NDEBUG) - const char build_type[] = "release"; -#else - const char build_type[] = "debug"; -#endif - out << indent << FormatKV("library_build_type", build_type) << "\n"; - - if (internal::global_context != nullptr) { - for (const auto& kv: *internal::global_context) { - out << indent << FormatKV(kv.first, kv.second) << "\n"; - } - } - - // Close context block and open the list of benchmarks. - out << inner_indent << "},\n"; - out << inner_indent << "\"benchmarks\": [\n"; - return true; -} - -void JSONReporter::ReportRuns(std::vector const& reports) { - if (reports.empty()) { - return; - } - std::string indent(4, ' '); - std::ostream& out = GetOutputStream(); - if (!first_report_) { - out << ",\n"; - } - first_report_ = false; - - for (auto it = reports.begin(); it != reports.end(); ++it) { - out << indent << "{\n"; - PrintRunData(*it); - out << indent << '}'; - auto it_cp = it; - if (++it_cp != reports.end()) { - out << ",\n"; - } - } -} - -void JSONReporter::Finalize() { - // Close the list of benchmarks and the top level object. - GetOutputStream() << "\n ]\n}\n"; -} - -void JSONReporter::PrintRunData(Run const& run) { - std::string indent(6, ' '); - std::ostream& out = GetOutputStream(); - out << indent << FormatKV("name", run.benchmark_name()) << ",\n"; - out << indent << FormatKV("family_index", run.family_index) << ",\n"; - out << indent - << FormatKV("per_family_instance_index", run.per_family_instance_index) - << ",\n"; - out << indent << FormatKV("run_name", run.run_name.str()) << ",\n"; - out << indent << FormatKV("run_type", [&run]() -> const char* { - switch (run.run_type) { - case BenchmarkReporter::Run::RT_Iteration: - return "iteration"; - case BenchmarkReporter::Run::RT_Aggregate: - return "aggregate"; - } - BENCHMARK_UNREACHABLE(); - }()) << ",\n"; - out << indent << FormatKV("repetitions", run.repetitions) << ",\n"; - if (run.run_type != BenchmarkReporter::Run::RT_Aggregate) { - out << indent << FormatKV("repetition_index", run.repetition_index) - << ",\n"; - } - out << indent << FormatKV("threads", run.threads) << ",\n"; - if (run.run_type == BenchmarkReporter::Run::RT_Aggregate) { - out << indent << FormatKV("aggregate_name", run.aggregate_name) << ",\n"; - } - if (run.error_occurred) { - out << indent << FormatKV("error_occurred", run.error_occurred) << ",\n"; - out << indent << FormatKV("error_message", run.error_message) << ",\n"; - } - if (!run.report_big_o && !run.report_rms) { - out << indent << FormatKV("iterations", run.iterations) << ",\n"; - out << indent << FormatKV("real_time", run.GetAdjustedRealTime()) << ",\n"; - out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime()); - out << ",\n" - << indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit)); - } else if (run.report_big_o) { - out << indent << FormatKV("cpu_coefficient", run.GetAdjustedCPUTime()) - << ",\n"; - out << indent << FormatKV("real_coefficient", run.GetAdjustedRealTime()) - << ",\n"; - out << indent << FormatKV("big_o", GetBigOString(run.complexity)) << ",\n"; - out << indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit)); - } else if (run.report_rms) { - out << indent << FormatKV("rms", run.GetAdjustedCPUTime()); - } - - for (auto& c : run.counters) { - out << ",\n" << indent << FormatKV(c.first, c.second); - } - - if (run.has_memory_result) { - out << ",\n" << indent << FormatKV("allocs_per_iter", run.allocs_per_iter); - out << ",\n" << indent << FormatKV("max_bytes_used", run.max_bytes_used); - } - - if (!run.report_label.empty()) { - out << ",\n" << indent << FormatKV("label", run.report_label); - } - out << '\n'; -} - -} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/log.h b/libcxx/utils/google-benchmark/src/log.h deleted file mode 100644 index 47d0c35c0182..000000000000 --- a/libcxx/utils/google-benchmark/src/log.h +++ /dev/null @@ -1,74 +0,0 @@ -#ifndef BENCHMARK_LOG_H_ -#define BENCHMARK_LOG_H_ - -#include -#include - -#include "benchmark/benchmark.h" - -namespace benchmark { -namespace internal { - -typedef std::basic_ostream&(EndLType)(std::basic_ostream&); - -class LogType { - friend LogType& GetNullLogInstance(); - friend LogType& GetErrorLogInstance(); - - // FIXME: Add locking to output. - template - friend LogType& operator<<(LogType&, Tp const&); - friend LogType& operator<<(LogType&, EndLType*); - - private: - LogType(std::ostream* out) : out_(out) {} - std::ostream* out_; - BENCHMARK_DISALLOW_COPY_AND_ASSIGN(LogType); -}; - -template -LogType& operator<<(LogType& log, Tp const& value) { - if (log.out_) { - *log.out_ << value; - } - return log; -} - -inline LogType& operator<<(LogType& log, EndLType* m) { - if (log.out_) { - *log.out_ << m; - } - return log; -} - -inline int& LogLevel() { - static int log_level = 0; - return log_level; -} - -inline LogType& GetNullLogInstance() { - static LogType log(nullptr); - return log; -} - -inline LogType& GetErrorLogInstance() { - static LogType log(&std::clog); - return log; -} - -inline LogType& GetLogInstanceForLevel(int level) { - if (level <= LogLevel()) { - return GetErrorLogInstance(); - } - return GetNullLogInstance(); -} - -} // end namespace internal -} // end namespace benchmark - -// clang-format off -#define VLOG(x) \ - (::benchmark::internal::GetLogInstanceForLevel(x) << "-- LOG(" << x << "):" \ - " ") -// clang-format on -#endif diff --git a/libcxx/utils/google-benchmark/src/mutex.h b/libcxx/utils/google-benchmark/src/mutex.h deleted file mode 100644 index 9cc414ec467e..000000000000 --- a/libcxx/utils/google-benchmark/src/mutex.h +++ /dev/null @@ -1,155 +0,0 @@ -#ifndef BENCHMARK_MUTEX_H_ -#define BENCHMARK_MUTEX_H_ - -#include -#include - -#include "check.h" - -// Enable thread safety attributes only with clang. -// The attributes can be safely erased when compiling with other compilers. -#if defined(HAVE_THREAD_SAFETY_ATTRIBUTES) -#define THREAD_ANNOTATION_ATTRIBUTE_(x) __attribute__((x)) -#else -#define THREAD_ANNOTATION_ATTRIBUTE_(x) // no-op -#endif - -#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(capability(x)) - -#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE_(scoped_lockable) - -#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(guarded_by(x)) - -#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(pt_guarded_by(x)) - -#define ACQUIRED_BEFORE(...) \ - THREAD_ANNOTATION_ATTRIBUTE_(acquired_before(__VA_ARGS__)) - -#define ACQUIRED_AFTER(...) \ - THREAD_ANNOTATION_ATTRIBUTE_(acquired_after(__VA_ARGS__)) - -#define REQUIRES(...) \ - THREAD_ANNOTATION_ATTRIBUTE_(requires_capability(__VA_ARGS__)) - -#define REQUIRES_SHARED(...) \ - THREAD_ANNOTATION_ATTRIBUTE_(requires_shared_capability(__VA_ARGS__)) - -#define ACQUIRE(...) \ - THREAD_ANNOTATION_ATTRIBUTE_(acquire_capability(__VA_ARGS__)) - -#define ACQUIRE_SHARED(...) \ - THREAD_ANNOTATION_ATTRIBUTE_(acquire_shared_capability(__VA_ARGS__)) - -#define RELEASE(...) \ - THREAD_ANNOTATION_ATTRIBUTE_(release_capability(__VA_ARGS__)) - -#define RELEASE_SHARED(...) \ - THREAD_ANNOTATION_ATTRIBUTE_(release_shared_capability(__VA_ARGS__)) - -#define TRY_ACQUIRE(...) \ - THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_capability(__VA_ARGS__)) - -#define TRY_ACQUIRE_SHARED(...) \ - THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_shared_capability(__VA_ARGS__)) - -#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE_(locks_excluded(__VA_ARGS__)) - -#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(assert_capability(x)) - -#define ASSERT_SHARED_CAPABILITY(x) \ - THREAD_ANNOTATION_ATTRIBUTE_(assert_shared_capability(x)) - -#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(lock_returned(x)) - -#define NO_THREAD_SAFETY_ANALYSIS \ - THREAD_ANNOTATION_ATTRIBUTE_(no_thread_safety_analysis) - -namespace benchmark { - -typedef std::condition_variable Condition; - -// NOTE: Wrappers for std::mutex and std::unique_lock are provided so that -// we can annotate them with thread safety attributes and use the -// -Wthread-safety warning with clang. The standard library types cannot be -// used directly because they do not provide the required annotations. -class CAPABILITY("mutex") Mutex { - public: - Mutex() {} - - void lock() ACQUIRE() { mut_.lock(); } - void unlock() RELEASE() { mut_.unlock(); } - std::mutex& native_handle() { return mut_; } - - private: - std::mutex mut_; -}; - -class SCOPED_CAPABILITY MutexLock { - typedef std::unique_lock MutexLockImp; - - public: - MutexLock(Mutex& m) ACQUIRE(m) : ml_(m.native_handle()) {} - ~MutexLock() RELEASE() {} - MutexLockImp& native_handle() { return ml_; } - - private: - MutexLockImp ml_; -}; - -class Barrier { - public: - Barrier(int num_threads) : running_threads_(num_threads) {} - - // Called by each thread - bool wait() EXCLUDES(lock_) { - bool last_thread = false; - { - MutexLock ml(lock_); - last_thread = createBarrier(ml); - } - if (last_thread) phase_condition_.notify_all(); - return last_thread; - } - - void removeThread() EXCLUDES(lock_) { - MutexLock ml(lock_); - --running_threads_; - if (entered_ != 0) phase_condition_.notify_all(); - } - - private: - Mutex lock_; - Condition phase_condition_; - int running_threads_; - - // State for barrier management - int phase_number_ = 0; - int entered_ = 0; // Number of threads that have entered this barrier - - // Enter the barrier and wait until all other threads have also - // entered the barrier. Returns iff this is the last thread to - // enter the barrier. - bool createBarrier(MutexLock& ml) REQUIRES(lock_) { - CHECK_LT(entered_, running_threads_); - entered_++; - if (entered_ < running_threads_) { - // Wait for all threads to enter - int phase_number_cp = phase_number_; - auto cb = [this, phase_number_cp]() { - return this->phase_number_ > phase_number_cp || - entered_ == running_threads_; // A thread has aborted in error - }; - phase_condition_.wait(ml.native_handle(), cb); - if (phase_number_ > phase_number_cp) return false; - // else (running_threads_ == entered_) and we are the last thread. - } - // Last thread has reached the barrier - phase_number_++; - entered_ = 0; - return true; - } -}; - -} // end namespace benchmark - -#endif // BENCHMARK_MUTEX_H_ diff --git a/libcxx/utils/google-benchmark/src/perf_counters.cc b/libcxx/utils/google-benchmark/src/perf_counters.cc deleted file mode 100644 index 4ddf0de2502c..000000000000 --- a/libcxx/utils/google-benchmark/src/perf_counters.cc +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright 2021 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "perf_counters.h" - -#include -#include - -#if defined HAVE_LIBPFM -#include "perfmon/pfmlib.h" -#include "perfmon/pfmlib_perf_event.h" -#endif - -namespace benchmark { -namespace internal { - -constexpr size_t PerfCounterValues::kMaxCounters; - -#if defined HAVE_LIBPFM -const bool PerfCounters::kSupported = true; - -bool PerfCounters::Initialize() { return pfm_initialize() == PFM_SUCCESS; } - -PerfCounters PerfCounters::Create( - const std::vector& counter_names) { - if (counter_names.empty()) { - return NoCounters(); - } - if (counter_names.size() > PerfCounterValues::kMaxCounters) { - GetErrorLogInstance() - << counter_names.size() - << " counters were requested. The minimum is 1, the maximum is " - << PerfCounterValues::kMaxCounters << "\n"; - return NoCounters(); - } - std::vector counter_ids(counter_names.size()); - - const int mode = PFM_PLM3; // user mode only - for (size_t i = 0; i < counter_names.size(); ++i) { - const bool is_first = i == 0; - struct perf_event_attr attr{}; - attr.size = sizeof(attr); - const int group_id = !is_first ? counter_ids[0] : -1; - const auto& name = counter_names[i]; - if (name.empty()) { - GetErrorLogInstance() << "A counter name was the empty string\n"; - return NoCounters(); - } - pfm_perf_encode_arg_t arg{}; - arg.attr = &attr; - - const int pfm_get = - pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT, &arg); - if (pfm_get != PFM_SUCCESS) { - GetErrorLogInstance() << "Unknown counter name: " << name << "\n"; - return NoCounters(); - } - attr.disabled = is_first; - // Note: the man page for perf_event_create suggests inerit = true and - // read_format = PERF_FORMAT_GROUP don't work together, but that's not the - // case. - attr.inherit = true; - attr.pinned = is_first; - attr.exclude_kernel = true; - attr.exclude_user = false; - attr.exclude_hv = true; - // Read all counters in one read. - attr.read_format = PERF_FORMAT_GROUP; - - int id = -1; - static constexpr size_t kNrOfSyscallRetries = 5; - // Retry syscall as it was interrupted often (b/64774091). - for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries; - ++num_retries) { - id = perf_event_open(&attr, 0, -1, group_id, 0); - if (id >= 0 || errno != EINTR) { - break; - } - } - if (id < 0) { - GetErrorLogInstance() - << "Failed to get a file descriptor for " << name << "\n"; - return NoCounters(); - } - - counter_ids[i] = id; - } - if (ioctl(counter_ids[0], PERF_EVENT_IOC_ENABLE) != 0) { - GetErrorLogInstance() << "Failed to start counters\n"; - return NoCounters(); - } - - return PerfCounters(counter_names, std::move(counter_ids)); -} - -PerfCounters::~PerfCounters() { - if (counter_ids_.empty()) { - return; - } - ioctl(counter_ids_[0], PERF_EVENT_IOC_DISABLE); - for (int fd : counter_ids_) { - close(fd); - } -} -#else // defined HAVE_LIBPFM -const bool PerfCounters::kSupported = false; - -bool PerfCounters::Initialize() { return false; } - -PerfCounters PerfCounters::Create( - const std::vector& counter_names) { - if (!counter_names.empty()) { - GetErrorLogInstance() << "Performance counters not supported."; - } - return NoCounters(); -} - -PerfCounters::~PerfCounters() = default; -#endif // defined HAVE_LIBPFM -} // namespace internal -} // namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/perf_counters.h b/libcxx/utils/google-benchmark/src/perf_counters.h deleted file mode 100644 index b6629b99070b..000000000000 --- a/libcxx/utils/google-benchmark/src/perf_counters.h +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright 2021 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef BENCHMARK_PERF_COUNTERS_H -#define BENCHMARK_PERF_COUNTERS_H - -#include -#include -#include - -#include "benchmark/benchmark.h" -#include "check.h" -#include "log.h" - -#ifndef BENCHMARK_OS_WINDOWS -#include -#endif - -namespace benchmark { -namespace internal { - -// Typically, we can only read a small number of counters. There is also a -// padding preceding counter values, when reading multiple counters with one -// syscall (which is desirable). PerfCounterValues abstracts these details. -// The implementation ensures the storage is inlined, and allows 0-based -// indexing into the counter values. -// The object is used in conjunction with a PerfCounters object, by passing it -// to Snapshot(). The values are populated such that -// perfCounters->names()[i]'s value is obtained at position i (as given by -// operator[]) of this object. -class PerfCounterValues { - public: - explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) { - CHECK_LE(nr_counters_, kMaxCounters); - } - - uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; } - - static constexpr size_t kMaxCounters = 3; - - private: - friend class PerfCounters; - // Get the byte buffer in which perf counters can be captured. - // This is used by PerfCounters::Read - std::pair get_data_buffer() { - return {reinterpret_cast(values_.data()), - sizeof(uint64_t) * (kPadding + nr_counters_)}; - } - - static constexpr size_t kPadding = 1; - std::array values_; - const size_t nr_counters_; -}; - -// Collect PMU counters. The object, once constructed, is ready to be used by -// calling read(). PMU counter collection is enabled from the time create() is -// called, to obtain the object, until the object's destructor is called. -class PerfCounters final { - public: - // True iff this platform supports performance counters. - static const bool kSupported; - - bool IsValid() const { return is_valid_; } - static PerfCounters NoCounters() { return PerfCounters(); } - - ~PerfCounters(); - PerfCounters(PerfCounters&&) = default; - PerfCounters(const PerfCounters&) = delete; - - // Platform-specific implementations may choose to do some library - // initialization here. - static bool Initialize(); - - // Return a PerfCounters object ready to read the counters with the names - // specified. The values are user-mode only. The counter name format is - // implementation and OS specific. - // TODO: once we move to C++-17, this should be a std::optional, and then the - // IsValid() boolean can be dropped. - static PerfCounters Create(const std::vector& counter_names); - - // Take a snapshot of the current value of the counters into the provided - // valid PerfCounterValues storage. The values are populated such that: - // names()[i]'s value is (*values)[i] - BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const { -#ifndef BENCHMARK_OS_WINDOWS - assert(values != nullptr); - assert(IsValid()); - auto buffer = values->get_data_buffer(); - auto read_bytes = ::read(counter_ids_[0], buffer.first, buffer.second); - return static_cast(read_bytes) == buffer.second; -#else - (void)values; - return false; -#endif - } - - const std::vector& names() const { return counter_names_; } - size_t num_counters() const { return counter_names_.size(); } - - private: - PerfCounters(const std::vector& counter_names, - std::vector&& counter_ids) - : counter_ids_(std::move(counter_ids)), - counter_names_(counter_names), - is_valid_(true) {} - PerfCounters() : is_valid_(false) {} - - std::vector counter_ids_; - const std::vector counter_names_; - const bool is_valid_; -}; - -// Typical usage of the above primitives. -class PerfCountersMeasurement final { - public: - PerfCountersMeasurement(PerfCounters&& c) - : counters_(std::move(c)), - start_values_(counters_.IsValid() ? counters_.names().size() : 0), - end_values_(counters_.IsValid() ? counters_.names().size() : 0) {} - - bool IsValid() const { return counters_.IsValid(); } - - BENCHMARK_ALWAYS_INLINE void Start() { - assert(IsValid()); - // Tell the compiler to not move instructions above/below where we take - // the snapshot. - ClobberMemory(); - counters_.Snapshot(&start_values_); - ClobberMemory(); - } - - BENCHMARK_ALWAYS_INLINE std::vector> - StopAndGetMeasurements() { - assert(IsValid()); - // Tell the compiler to not move instructions above/below where we take - // the snapshot. - ClobberMemory(); - counters_.Snapshot(&end_values_); - ClobberMemory(); - - std::vector> ret; - for (size_t i = 0; i < counters_.names().size(); ++i) { - double measurement = static_cast(end_values_[i]) - - static_cast(start_values_[i]); - ret.push_back({counters_.names()[i], measurement}); - } - return ret; - } - - private: - PerfCounters counters_; - PerfCounterValues start_values_; - PerfCounterValues end_values_; -}; - -BENCHMARK_UNUSED static bool perf_init_anchor = PerfCounters::Initialize(); - -} // namespace internal -} // namespace benchmark - -#endif // BENCHMARK_PERF_COUNTERS_H diff --git a/libcxx/utils/google-benchmark/src/re.h b/libcxx/utils/google-benchmark/src/re.h deleted file mode 100644 index fbe25037b463..000000000000 --- a/libcxx/utils/google-benchmark/src/re.h +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef BENCHMARK_RE_H_ -#define BENCHMARK_RE_H_ - -#include "internal_macros.h" - -// clang-format off - -#if !defined(HAVE_STD_REGEX) && \ - !defined(HAVE_GNU_POSIX_REGEX) && \ - !defined(HAVE_POSIX_REGEX) - // No explicit regex selection; detect based on builtin hints. - #if defined(BENCHMARK_OS_LINUX) || defined(BENCHMARK_OS_APPLE) - #define HAVE_POSIX_REGEX 1 - #elif __cplusplus >= 199711L - #define HAVE_STD_REGEX 1 - #endif -#endif - -// Prefer C regex libraries when compiling w/o exceptions so that we can -// correctly report errors. -#if defined(BENCHMARK_HAS_NO_EXCEPTIONS) && \ - defined(BENCHMARK_HAVE_STD_REGEX) && \ - (defined(HAVE_GNU_POSIX_REGEX) || defined(HAVE_POSIX_REGEX)) - #undef HAVE_STD_REGEX -#endif - -#if defined(HAVE_STD_REGEX) - #include -#elif defined(HAVE_GNU_POSIX_REGEX) - #include -#elif defined(HAVE_POSIX_REGEX) - #include -#else -#error No regular expression backend was found! -#endif - -// clang-format on - -#include - -#include "check.h" - -namespace benchmark { - -// A wrapper around the POSIX regular expression API that provides automatic -// cleanup -class Regex { - public: - Regex() : init_(false) {} - - ~Regex(); - - // Compile a regular expression matcher from spec. Returns true on success. - // - // On failure (and if error is not nullptr), error is populated with a human - // readable error message if an error occurs. - bool Init(const std::string& spec, std::string* error); - - // Returns whether str matches the compiled regular expression. - bool Match(const std::string& str); - - private: - bool init_; -// Underlying regular expression object -#if defined(HAVE_STD_REGEX) - std::regex re_; -#elif defined(HAVE_POSIX_REGEX) || defined(HAVE_GNU_POSIX_REGEX) - regex_t re_; -#else -#error No regular expression backend implementation available -#endif -}; - -#if defined(HAVE_STD_REGEX) - -inline bool Regex::Init(const std::string& spec, std::string* error) { -#ifdef BENCHMARK_HAS_NO_EXCEPTIONS - ((void)error); // suppress unused warning -#else - try { -#endif - re_ = std::regex(spec, std::regex_constants::extended); - init_ = true; -#ifndef BENCHMARK_HAS_NO_EXCEPTIONS -} -catch (const std::regex_error& e) { - if (error) { - *error = e.what(); - } -} -#endif -return init_; -} - -inline Regex::~Regex() {} - -inline bool Regex::Match(const std::string& str) { - if (!init_) { - return false; - } - return std::regex_search(str, re_); -} - -#else -inline bool Regex::Init(const std::string& spec, std::string* error) { - int ec = regcomp(&re_, spec.c_str(), REG_EXTENDED | REG_NOSUB); - if (ec != 0) { - if (error) { - size_t needed = regerror(ec, &re_, nullptr, 0); - char* errbuf = new char[needed]; - regerror(ec, &re_, errbuf, needed); - - // regerror returns the number of bytes necessary to null terminate - // the string, so we move that when assigning to error. - CHECK_NE(needed, 0); - error->assign(errbuf, needed - 1); - - delete[] errbuf; - } - - return false; - } - - init_ = true; - return true; -} - -inline Regex::~Regex() { - if (init_) { - regfree(&re_); - } -} - -inline bool Regex::Match(const std::string& str) { - if (!init_) { - return false; - } - return regexec(&re_, str.c_str(), 0, nullptr, 0) == 0; -} -#endif - -} // end namespace benchmark - -#endif // BENCHMARK_RE_H_ diff --git a/libcxx/utils/google-benchmark/src/reporter.cc b/libcxx/utils/google-benchmark/src/reporter.cc deleted file mode 100644 index 14dd40dc72f4..000000000000 --- a/libcxx/utils/google-benchmark/src/reporter.cc +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "benchmark/benchmark.h" -#include "timers.h" - -#include - -#include -#include -#include -#include -#include - -#include "check.h" -#include "string_util.h" - -namespace benchmark { -namespace internal { -extern std::map* global_context; -} - -BenchmarkReporter::BenchmarkReporter() - : output_stream_(&std::cout), error_stream_(&std::cerr) {} - -BenchmarkReporter::~BenchmarkReporter() {} - -void BenchmarkReporter::PrintBasicContext(std::ostream *out, - Context const &context) { - CHECK(out) << "cannot be null"; - auto &Out = *out; - - Out << LocalDateTimeString() << "\n"; - - if (context.executable_name) - Out << "Running " << context.executable_name << "\n"; - - const CPUInfo &info = context.cpu_info; - Out << "Run on (" << info.num_cpus << " X " - << (info.cycles_per_second / 1000000.0) << " MHz CPU " - << ((info.num_cpus > 1) ? "s" : "") << ")\n"; - if (info.caches.size() != 0) { - Out << "CPU Caches:\n"; - for (auto &CInfo : info.caches) { - Out << " L" << CInfo.level << " " << CInfo.type << " " - << (CInfo.size / 1024) << " KiB"; - if (CInfo.num_sharing != 0) - Out << " (x" << (info.num_cpus / CInfo.num_sharing) << ")"; - Out << "\n"; - } - } - if (!info.load_avg.empty()) { - Out << "Load Average: "; - for (auto It = info.load_avg.begin(); It != info.load_avg.end();) { - Out << StrFormat("%.2f", *It++); - if (It != info.load_avg.end()) Out << ", "; - } - Out << "\n"; - } - - if (internal::global_context != nullptr) { - for (const auto& kv: *internal::global_context) { - Out << kv.first << ": " << kv.second << "\n"; - } - } - - if (CPUInfo::Scaling::ENABLED == info.scaling) { - Out << "***WARNING*** CPU scaling is enabled, the benchmark " - "real time measurements may be noisy and will incur extra " - "overhead.\n"; - } - -#ifndef NDEBUG - Out << "***WARNING*** Library was built as DEBUG. Timings may be " - "affected.\n"; -#endif -} - -// No initializer because it's already initialized to NULL. -const char *BenchmarkReporter::Context::executable_name; - -BenchmarkReporter::Context::Context() - : cpu_info(CPUInfo::Get()), sys_info(SystemInfo::Get()) {} - -std::string BenchmarkReporter::Run::benchmark_name() const { - std::string name = run_name.str(); - if (run_type == RT_Aggregate) { - name += "_" + aggregate_name; - } - return name; -} - -double BenchmarkReporter::Run::GetAdjustedRealTime() const { - double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit); - if (iterations != 0) new_time /= static_cast(iterations); - return new_time; -} - -double BenchmarkReporter::Run::GetAdjustedCPUTime() const { - double new_time = cpu_accumulated_time * GetTimeUnitMultiplier(time_unit); - if (iterations != 0) new_time /= static_cast(iterations); - return new_time; -} - -} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/sleep.cc b/libcxx/utils/google-benchmark/src/sleep.cc deleted file mode 100644 index 4609d540eade..000000000000 --- a/libcxx/utils/google-benchmark/src/sleep.cc +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "sleep.h" - -#include -#include -#include - -#include "internal_macros.h" - -#ifdef BENCHMARK_OS_WINDOWS -#include -#endif - -#ifdef BENCHMARK_OS_ZOS -#include -#endif - -namespace benchmark { -#ifdef BENCHMARK_OS_WINDOWS -// Window's Sleep takes milliseconds argument. -void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); } -void SleepForSeconds(double seconds) { - SleepForMilliseconds(static_cast(kNumMillisPerSecond * seconds)); -} -#else // BENCHMARK_OS_WINDOWS -void SleepForMicroseconds(int microseconds) { -#ifdef BENCHMARK_OS_ZOS - // z/OS does not support nanosleep. Instead call sleep() and then usleep() to - // sleep for the remaining microseconds because usleep() will fail if its - // argument is greater than 1000000. - div_t sleepTime = div(microseconds, kNumMicrosPerSecond); - int seconds = sleepTime.quot; - while (seconds != 0) - seconds = sleep(seconds); - while (usleep(sleepTime.rem) == -1 && errno == EINTR) - ; -#else - struct timespec sleep_time; - sleep_time.tv_sec = microseconds / kNumMicrosPerSecond; - sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro; - while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) - ; // Ignore signals and wait for the full interval to elapse. -#endif -} - -void SleepForMilliseconds(int milliseconds) { - SleepForMicroseconds(milliseconds * kNumMicrosPerMilli); -} - -void SleepForSeconds(double seconds) { - SleepForMicroseconds(static_cast(seconds * kNumMicrosPerSecond)); -} -#endif // BENCHMARK_OS_WINDOWS -} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/sleep.h b/libcxx/utils/google-benchmark/src/sleep.h deleted file mode 100644 index f98551afe284..000000000000 --- a/libcxx/utils/google-benchmark/src/sleep.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef BENCHMARK_SLEEP_H_ -#define BENCHMARK_SLEEP_H_ - -namespace benchmark { -const int kNumMillisPerSecond = 1000; -const int kNumMicrosPerMilli = 1000; -const int kNumMicrosPerSecond = kNumMillisPerSecond * 1000; -const int kNumNanosPerMicro = 1000; -const int kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond; - -void SleepForMilliseconds(int milliseconds); -void SleepForSeconds(double seconds); -} // end namespace benchmark - -#endif // BENCHMARK_SLEEP_H_ diff --git a/libcxx/utils/google-benchmark/src/statistics.cc b/libcxx/utils/google-benchmark/src/statistics.cc deleted file mode 100644 index 57472b9ff99b..000000000000 --- a/libcxx/utils/google-benchmark/src/statistics.cc +++ /dev/null @@ -1,195 +0,0 @@ -// Copyright 2016 Ismael Jimenez Martinez. All rights reserved. -// Copyright 2017 Roman Lebedev. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "benchmark/benchmark.h" - -#include -#include -#include -#include -#include -#include "check.h" -#include "statistics.h" - -namespace benchmark { - -auto StatisticsSum = [](const std::vector& v) { - return std::accumulate(v.begin(), v.end(), 0.0); -}; - -double StatisticsMean(const std::vector& v) { - if (v.empty()) return 0.0; - return StatisticsSum(v) * (1.0 / v.size()); -} - -double StatisticsMedian(const std::vector& v) { - if (v.size() < 3) return StatisticsMean(v); - std::vector copy(v); - - auto center = copy.begin() + v.size() / 2; - std::nth_element(copy.begin(), center, copy.end()); - - // did we have an odd number of samples? - // if yes, then center is the median - // it no, then we are looking for the average between center and the value - // before - if (v.size() % 2 == 1) return *center; - auto center2 = copy.begin() + v.size() / 2 - 1; - std::nth_element(copy.begin(), center2, copy.end()); - return (*center + *center2) / 2.0; -} - -// Return the sum of the squares of this sample set -auto SumSquares = [](const std::vector& v) { - return std::inner_product(v.begin(), v.end(), v.begin(), 0.0); -}; - -auto Sqr = [](const double dat) { return dat * dat; }; -auto Sqrt = [](const double dat) { - // Avoid NaN due to imprecision in the calculations - if (dat < 0.0) return 0.0; - return std::sqrt(dat); -}; - -double StatisticsStdDev(const std::vector& v) { - const auto mean = StatisticsMean(v); - if (v.empty()) return mean; - - // Sample standard deviation is undefined for n = 1 - if (v.size() == 1) return 0.0; - - const double avg_squares = SumSquares(v) * (1.0 / v.size()); - return Sqrt(v.size() / (v.size() - 1.0) * (avg_squares - Sqr(mean))); -} - -std::vector ComputeStats( - const std::vector& reports) { - typedef BenchmarkReporter::Run Run; - std::vector results; - - auto error_count = - std::count_if(reports.begin(), reports.end(), - [](Run const& run) { return run.error_occurred; }); - - if (reports.size() - error_count < 2) { - // We don't report aggregated data if there was a single run. - return results; - } - - // Accumulators. - std::vector real_accumulated_time_stat; - std::vector cpu_accumulated_time_stat; - - real_accumulated_time_stat.reserve(reports.size()); - cpu_accumulated_time_stat.reserve(reports.size()); - - // All repetitions should be run with the same number of iterations so we - // can take this information from the first benchmark. - const IterationCount run_iterations = reports.front().iterations; - // create stats for user counters - struct CounterStat { - Counter c; - std::vector s; - }; - std::map counter_stats; - for (Run const& r : reports) { - for (auto const& cnt : r.counters) { - auto it = counter_stats.find(cnt.first); - if (it == counter_stats.end()) { - counter_stats.insert({cnt.first, {cnt.second, std::vector{}}}); - it = counter_stats.find(cnt.first); - it->second.s.reserve(reports.size()); - } else { - CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags); - } - } - } - - // Populate the accumulators. - for (Run const& run : reports) { - CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name()); - CHECK_EQ(run_iterations, run.iterations); - if (run.error_occurred) continue; - real_accumulated_time_stat.emplace_back(run.real_accumulated_time); - cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time); - // user counters - for (auto const& cnt : run.counters) { - auto it = counter_stats.find(cnt.first); - CHECK_NE(it, counter_stats.end()); - it->second.s.emplace_back(cnt.second); - } - } - - // Only add label if it is same for all runs - std::string report_label = reports[0].report_label; - for (std::size_t i = 1; i < reports.size(); i++) { - if (reports[i].report_label != report_label) { - report_label = ""; - break; - } - } - - const double iteration_rescale_factor = - double(reports.size()) / double(run_iterations); - - for (const auto& Stat : *reports[0].statistics) { - // Get the data from the accumulator to BenchmarkReporter::Run's. - Run data; - data.run_name = reports[0].run_name; - data.family_index = reports[0].family_index; - data.per_family_instance_index = reports[0].per_family_instance_index; - data.run_type = BenchmarkReporter::Run::RT_Aggregate; - data.threads = reports[0].threads; - data.repetitions = reports[0].repetitions; - data.repetition_index = Run::no_repetition_index; - data.aggregate_name = Stat.name_; - data.report_label = report_label; - - // It is incorrect to say that an aggregate is computed over - // run's iterations, because those iterations already got averaged. - // Similarly, if there are N repetitions with 1 iterations each, - // an aggregate will be computed over N measurements, not 1. - // Thus it is best to simply use the count of separate reports. - data.iterations = reports.size(); - - data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat); - data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat); - - // We will divide these times by data.iterations when reporting, but the - // data.iterations is not nessesairly the scale of these measurements, - // because in each repetition, these timers are sum over all the iterations. - // And if we want to say that the stats are over N repetitions and not - // M iterations, we need to multiply these by (N/M). - data.real_accumulated_time *= iteration_rescale_factor; - data.cpu_accumulated_time *= iteration_rescale_factor; - - data.time_unit = reports[0].time_unit; - - // user counters - for (auto const& kv : counter_stats) { - // Do *NOT* rescale the custom counters. They are already properly scaled. - const auto uc_stat = Stat.compute_(kv.second.s); - auto c = Counter(uc_stat, counter_stats[kv.first].c.flags, - counter_stats[kv.first].c.oneK); - data.counters[kv.first] = c; - } - - results.push_back(data); - } - - return results; -} - -} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/statistics.h b/libcxx/utils/google-benchmark/src/statistics.h deleted file mode 100644 index 7eccc85536a5..000000000000 --- a/libcxx/utils/google-benchmark/src/statistics.h +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2016 Ismael Jimenez Martinez. All rights reserved. -// Copyright 2017 Roman Lebedev. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef STATISTICS_H_ -#define STATISTICS_H_ - -#include - -#include "benchmark/benchmark.h" - -namespace benchmark { - -// Return a vector containing the mean, median and standard devation information -// (and any user-specified info) for the specified list of reports. If 'reports' -// contains less than two non-errored runs an empty vector is returned -std::vector ComputeStats( - const std::vector& reports); - -double StatisticsMean(const std::vector& v); -double StatisticsMedian(const std::vector& v); -double StatisticsStdDev(const std::vector& v); - -} // end namespace benchmark - -#endif // STATISTICS_H_ diff --git a/libcxx/utils/google-benchmark/src/string_util.cc b/libcxx/utils/google-benchmark/src/string_util.cc deleted file mode 100644 index 3551418174fd..000000000000 --- a/libcxx/utils/google-benchmark/src/string_util.cc +++ /dev/null @@ -1,268 +0,0 @@ -#include "string_util.h" - -#include -#ifdef BENCHMARK_STL_ANDROID_GNUSTL -#include -#endif -#include -#include -#include -#include -#include - -#include "arraysize.h" - -namespace benchmark { -namespace { - -// kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta. -const char kBigSIUnits[] = "kMGTPEZY"; -// Kibi, Mebi, Gibi, Tebi, Pebi, Exbi, Zebi, Yobi. -const char kBigIECUnits[] = "KMGTPEZY"; -// milli, micro, nano, pico, femto, atto, zepto, yocto. -const char kSmallSIUnits[] = "munpfazy"; - -// We require that all three arrays have the same size. -static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits), - "SI and IEC unit arrays must be the same size"); -static_assert(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits), - "Small SI and Big SI unit arrays must be the same size"); - -static const int64_t kUnitsSize = arraysize(kBigSIUnits); - -void ToExponentAndMantissa(double val, double thresh, int precision, - double one_k, std::string* mantissa, - int64_t* exponent) { - std::stringstream mantissa_stream; - - if (val < 0) { - mantissa_stream << "-"; - val = -val; - } - - // Adjust threshold so that it never excludes things which can't be rendered - // in 'precision' digits. - const double adjusted_threshold = - std::max(thresh, 1.0 / std::pow(10.0, precision)); - const double big_threshold = adjusted_threshold * one_k; - const double small_threshold = adjusted_threshold; - // Values in ]simple_threshold,small_threshold[ will be printed as-is - const double simple_threshold = 0.01; - - if (val > big_threshold) { - // Positive powers - double scaled = val; - for (size_t i = 0; i < arraysize(kBigSIUnits); ++i) { - scaled /= one_k; - if (scaled <= big_threshold) { - mantissa_stream << scaled; - *exponent = i + 1; - *mantissa = mantissa_stream.str(); - return; - } - } - mantissa_stream << val; - *exponent = 0; - } else if (val < small_threshold) { - // Negative powers - if (val < simple_threshold) { - double scaled = val; - for (size_t i = 0; i < arraysize(kSmallSIUnits); ++i) { - scaled *= one_k; - if (scaled >= small_threshold) { - mantissa_stream << scaled; - *exponent = -static_cast(i + 1); - *mantissa = mantissa_stream.str(); - return; - } - } - } - mantissa_stream << val; - *exponent = 0; - } else { - mantissa_stream << val; - *exponent = 0; - } - *mantissa = mantissa_stream.str(); -} - -std::string ExponentToPrefix(int64_t exponent, bool iec) { - if (exponent == 0) return ""; - - const int64_t index = (exponent > 0 ? exponent - 1 : -exponent - 1); - if (index >= kUnitsSize) return ""; - - const char* array = - (exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits); - if (iec) - return array[index] + std::string("i"); - else - return std::string(1, array[index]); -} - -std::string ToBinaryStringFullySpecified(double value, double threshold, - int precision, double one_k = 1024.0) { - std::string mantissa; - int64_t exponent; - ToExponentAndMantissa(value, threshold, precision, one_k, &mantissa, - &exponent); - return mantissa + ExponentToPrefix(exponent, false); -} - -} // end namespace - -void AppendHumanReadable(int n, std::string* str) { - std::stringstream ss; - // Round down to the nearest SI prefix. - ss << ToBinaryStringFullySpecified(n, 1.0, 0); - *str += ss.str(); -} - -std::string HumanReadableNumber(double n, double one_k) { - // 1.1 means that figures up to 1.1k should be shown with the next unit down; - // this softens edge effects. - // 1 means that we should show one decimal place of precision. - return ToBinaryStringFullySpecified(n, 1.1, 1, one_k); -} - -std::string StrFormatImp(const char* msg, va_list args) { - // we might need a second shot at this, so pre-emptivly make a copy - va_list args_cp; - va_copy(args_cp, args); - - // TODO(ericwf): use std::array for first attempt to avoid one memory - // allocation guess what the size might be - std::array local_buff; - std::size_t size = local_buff.size(); - // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation - // in the android-ndk - auto ret = vsnprintf(local_buff.data(), size, msg, args_cp); - - va_end(args_cp); - - // handle empty expansion - if (ret == 0) return std::string{}; - if (static_cast(ret) < size) - return std::string(local_buff.data()); - - // we did not provide a long enough buffer on our first attempt. - // add 1 to size to account for null-byte in size cast to prevent overflow - size = static_cast(ret) + 1; - auto buff_ptr = std::unique_ptr(new char[size]); - // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation - // in the android-ndk - ret = vsnprintf(buff_ptr.get(), size, msg, args); - return std::string(buff_ptr.get()); -} - -std::string StrFormat(const char* format, ...) { - va_list args; - va_start(args, format); - std::string tmp = StrFormatImp(format, args); - va_end(args); - return tmp; -} - -std::vector StrSplit(const std::string& str, char delim) { - if (str.empty()) return {}; - std::vector ret; - size_t first = 0; - size_t next = str.find(delim); - for (; next != std::string::npos; - first = next + 1, next = str.find(delim, first)) { - ret.push_back(str.substr(first, next - first)); - } - ret.push_back(str.substr(first)); - return ret; -} - -#ifdef BENCHMARK_STL_ANDROID_GNUSTL -/* - * GNU STL in Android NDK lacks support for some C++11 functions, including - * stoul, stoi, stod. We reimplement them here using C functions strtoul, - * strtol, strtod. Note that reimplemented functions are in benchmark:: - * namespace, not std:: namespace. - */ -unsigned long stoul(const std::string& str, size_t* pos, int base) { - /* Record previous errno */ - const int oldErrno = errno; - errno = 0; - - const char* strStart = str.c_str(); - char* strEnd = const_cast(strStart); - const unsigned long result = strtoul(strStart, &strEnd, base); - - const int strtoulErrno = errno; - /* Restore previous errno */ - errno = oldErrno; - - /* Check for errors and return */ - if (strtoulErrno == ERANGE) { - throw std::out_of_range( - "stoul failed: " + str + " is outside of range of unsigned long"); - } else if (strEnd == strStart || strtoulErrno != 0) { - throw std::invalid_argument( - "stoul failed: " + str + " is not an integer"); - } - if (pos != nullptr) { - *pos = static_cast(strEnd - strStart); - } - return result; -} - -int stoi(const std::string& str, size_t* pos, int base) { - /* Record previous errno */ - const int oldErrno = errno; - errno = 0; - - const char* strStart = str.c_str(); - char* strEnd = const_cast(strStart); - const long result = strtol(strStart, &strEnd, base); - - const int strtolErrno = errno; - /* Restore previous errno */ - errno = oldErrno; - - /* Check for errors and return */ - if (strtolErrno == ERANGE || long(int(result)) != result) { - throw std::out_of_range( - "stoul failed: " + str + " is outside of range of int"); - } else if (strEnd == strStart || strtolErrno != 0) { - throw std::invalid_argument( - "stoul failed: " + str + " is not an integer"); - } - if (pos != nullptr) { - *pos = static_cast(strEnd - strStart); - } - return int(result); -} - -double stod(const std::string& str, size_t* pos) { - /* Record previous errno */ - const int oldErrno = errno; - errno = 0; - - const char* strStart = str.c_str(); - char* strEnd = const_cast(strStart); - const double result = strtod(strStart, &strEnd); - - /* Restore previous errno */ - const int strtodErrno = errno; - errno = oldErrno; - - /* Check for errors and return */ - if (strtodErrno == ERANGE) { - throw std::out_of_range( - "stoul failed: " + str + " is outside of range of int"); - } else if (strEnd == strStart || strtodErrno != 0) { - throw std::invalid_argument( - "stoul failed: " + str + " is not an integer"); - } - if (pos != nullptr) { - *pos = static_cast(strEnd - strStart); - } - return result; -} -#endif - -} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/string_util.h b/libcxx/utils/google-benchmark/src/string_util.h deleted file mode 100644 index 6bc28b6912a8..000000000000 --- a/libcxx/utils/google-benchmark/src/string_util.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef BENCHMARK_STRING_UTIL_H_ -#define BENCHMARK_STRING_UTIL_H_ - -#include -#include -#include -#include "internal_macros.h" - -namespace benchmark { - -void AppendHumanReadable(int n, std::string* str); - -std::string HumanReadableNumber(double n, double one_k = 1024.0); - -#if defined(__MINGW32__) -__attribute__((format(__MINGW_PRINTF_FORMAT, 1, 2))) -#elif defined(__GNUC__) -__attribute__((format(printf, 1, 2))) -#endif -std::string -StrFormat(const char* format, ...); - -inline std::ostream& StrCatImp(std::ostream& out) BENCHMARK_NOEXCEPT { - return out; -} - -template -inline std::ostream& StrCatImp(std::ostream& out, First&& f, Rest&&... rest) { - out << std::forward(f); - return StrCatImp(out, std::forward(rest)...); -} - -template -inline std::string StrCat(Args&&... args) { - std::ostringstream ss; - StrCatImp(ss, std::forward(args)...); - return ss.str(); -} - -std::vector StrSplit(const std::string& str, char delim); - -#ifdef BENCHMARK_STL_ANDROID_GNUSTL -/* - * GNU STL in Android NDK lacks support for some C++11 functions, including - * stoul, stoi, stod. We reimplement them here using C functions strtoul, - * strtol, strtod. Note that reimplemented functions are in benchmark:: - * namespace, not std:: namespace. - */ -unsigned long stoul(const std::string& str, size_t* pos = nullptr, - int base = 10); -int stoi(const std::string& str, size_t* pos = nullptr, int base = 10); -double stod(const std::string& str, size_t* pos = nullptr); -#else -using std::stoul; -using std::stoi; -using std::stod; -#endif - -} // end namespace benchmark - -#endif // BENCHMARK_STRING_UTIL_H_ diff --git a/libcxx/utils/google-benchmark/src/sysinfo.cc b/libcxx/utils/google-benchmark/src/sysinfo.cc deleted file mode 100644 index c1969ea2d3fe..000000000000 --- a/libcxx/utils/google-benchmark/src/sysinfo.cc +++ /dev/null @@ -1,726 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "internal_macros.h" - -#ifdef BENCHMARK_OS_WINDOWS -#include -#undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA -#include -#include -#include -#else -#include -#ifndef BENCHMARK_OS_FUCHSIA -#include -#endif -#include -#include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD -#include -#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \ - defined BENCHMARK_OS_NETBSD || defined BENCHMARK_OS_OPENBSD || \ - defined BENCHMARK_OS_DRAGONFLY -#define BENCHMARK_HAS_SYSCTL -#include -#endif -#endif -#if defined(BENCHMARK_OS_SOLARIS) -#include -#endif -#if defined(BENCHMARK_OS_QNX) -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "check.h" -#include "cycleclock.h" -#include "internal_macros.h" -#include "log.h" -#include "sleep.h" -#include "string_util.h" - -namespace benchmark { -namespace { - -void PrintImp(std::ostream& out) { out << std::endl; } - -template -void PrintImp(std::ostream& out, First&& f, Rest&&... rest) { - out << std::forward(f); - PrintImp(out, std::forward(rest)...); -} - -template -BENCHMARK_NORETURN void PrintErrorAndDie(Args&&... args) { - PrintImp(std::cerr, std::forward(args)...); - std::exit(EXIT_FAILURE); -} - -#ifdef BENCHMARK_HAS_SYSCTL - -/// ValueUnion - A type used to correctly alias the byte-for-byte output of -/// `sysctl` with the result type it's to be interpreted as. -struct ValueUnion { - union DataT { - uint32_t uint32_value; - uint64_t uint64_value; - // For correct aliasing of union members from bytes. - char bytes[8]; - }; - using DataPtr = std::unique_ptr; - - // The size of the data union member + its trailing array size. - size_t Size; - DataPtr Buff; - - public: - ValueUnion() : Size(0), Buff(nullptr, &std::free) {} - - explicit ValueUnion(size_t BuffSize) - : Size(sizeof(DataT) + BuffSize), - Buff(::new (std::malloc(Size)) DataT(), &std::free) {} - - ValueUnion(ValueUnion&& other) = default; - - explicit operator bool() const { return bool(Buff); } - - char* data() const { return Buff->bytes; } - - std::string GetAsString() const { return std::string(data()); } - - int64_t GetAsInteger() const { - if (Size == sizeof(Buff->uint32_value)) - return static_cast(Buff->uint32_value); - else if (Size == sizeof(Buff->uint64_value)) - return static_cast(Buff->uint64_value); - BENCHMARK_UNREACHABLE(); - } - - uint64_t GetAsUnsigned() const { - if (Size == sizeof(Buff->uint32_value)) - return Buff->uint32_value; - else if (Size == sizeof(Buff->uint64_value)) - return Buff->uint64_value; - BENCHMARK_UNREACHABLE(); - } - - template - std::array GetAsArray() { - const int ArrSize = sizeof(T) * N; - CHECK_LE(ArrSize, Size); - std::array Arr; - std::memcpy(Arr.data(), data(), ArrSize); - return Arr; - } -}; - -ValueUnion GetSysctlImp(std::string const& Name) { -#if defined BENCHMARK_OS_OPENBSD - int mib[2]; - - mib[0] = CTL_HW; - if ((Name == "hw.ncpu") || (Name == "hw.cpuspeed")){ - ValueUnion buff(sizeof(int)); - - if (Name == "hw.ncpu") { - mib[1] = HW_NCPU; - } else { - mib[1] = HW_CPUSPEED; - } - - if (sysctl(mib, 2, buff.data(), &buff.Size, nullptr, 0) == -1) { - return ValueUnion(); - } - return buff; - } - return ValueUnion(); -#else - size_t CurBuffSize = 0; - if (sysctlbyname(Name.c_str(), nullptr, &CurBuffSize, nullptr, 0) == -1) - return ValueUnion(); - - ValueUnion buff(CurBuffSize); - if (sysctlbyname(Name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0) - return buff; - return ValueUnion(); -#endif -} - -BENCHMARK_MAYBE_UNUSED -bool GetSysctl(std::string const& Name, std::string* Out) { - Out->clear(); - auto Buff = GetSysctlImp(Name); - if (!Buff) return false; - Out->assign(Buff.data()); - return true; -} - -template ::value>::type> -bool GetSysctl(std::string const& Name, Tp* Out) { - *Out = 0; - auto Buff = GetSysctlImp(Name); - if (!Buff) return false; - *Out = static_cast(Buff.GetAsUnsigned()); - return true; -} - -template -bool GetSysctl(std::string const& Name, std::array* Out) { - auto Buff = GetSysctlImp(Name); - if (!Buff) return false; - *Out = Buff.GetAsArray(); - return true; -} -#endif - -template -bool ReadFromFile(std::string const& fname, ArgT* arg) { - *arg = ArgT(); - std::ifstream f(fname.c_str()); - if (!f.is_open()) return false; - f >> *arg; - return f.good(); -} - -CPUInfo::Scaling CpuScaling(int num_cpus) { - // We don't have a valid CPU count, so don't even bother. - if (num_cpus <= 0) return CPUInfo::Scaling::UNKNOWN; -#ifdef BENCHMARK_OS_QNX - return CPUInfo::Scaling::UNKNOWN; -#endif -#ifndef BENCHMARK_OS_WINDOWS - // On Linux, the CPUfreq subsystem exposes CPU information as files on the - // local file system. If reading the exported files fails, then we may not be - // running on Linux, so we silently ignore all the read errors. - std::string res; - for (int cpu = 0; cpu < num_cpus; ++cpu) { - std::string governor_file = - StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor"); - if (ReadFromFile(governor_file, &res) && res != "performance") return CPUInfo::Scaling::ENABLED; - } - return CPUInfo::Scaling::DISABLED; -#endif - return CPUInfo::Scaling::UNKNOWN; -} - -int CountSetBitsInCPUMap(std::string Val) { - auto CountBits = [](std::string Part) { - using CPUMask = std::bitset; - Part = "0x" + Part; - CPUMask Mask(benchmark::stoul(Part, nullptr, 16)); - return static_cast(Mask.count()); - }; - size_t Pos; - int total = 0; - while ((Pos = Val.find(',')) != std::string::npos) { - total += CountBits(Val.substr(0, Pos)); - Val = Val.substr(Pos + 1); - } - if (!Val.empty()) { - total += CountBits(Val); - } - return total; -} - -BENCHMARK_MAYBE_UNUSED -std::vector GetCacheSizesFromKVFS() { - std::vector res; - std::string dir = "/sys/devices/system/cpu/cpu0/cache/"; - int Idx = 0; - while (true) { - CPUInfo::CacheInfo info; - std::string FPath = StrCat(dir, "index", Idx++, "/"); - std::ifstream f(StrCat(FPath, "size").c_str()); - if (!f.is_open()) break; - std::string suffix; - f >> info.size; - if (f.fail()) - PrintErrorAndDie("Failed while reading file '", FPath, "size'"); - if (f.good()) { - f >> suffix; - if (f.bad()) - PrintErrorAndDie( - "Invalid cache size format: failed to read size suffix"); - else if (f && suffix != "K") - PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix); - else if (suffix == "K") - info.size *= 1024; - } - if (!ReadFromFile(StrCat(FPath, "type"), &info.type)) - PrintErrorAndDie("Failed to read from file ", FPath, "type"); - if (!ReadFromFile(StrCat(FPath, "level"), &info.level)) - PrintErrorAndDie("Failed to read from file ", FPath, "level"); - std::string map_str; - if (!ReadFromFile(StrCat(FPath, "shared_cpu_map"), &map_str)) - PrintErrorAndDie("Failed to read from file ", FPath, "shared_cpu_map"); - info.num_sharing = CountSetBitsInCPUMap(map_str); - res.push_back(info); - } - - return res; -} - -#ifdef BENCHMARK_OS_MACOSX -std::vector GetCacheSizesMacOSX() { - std::vector res; - std::array CacheCounts{{0, 0, 0, 0}}; - GetSysctl("hw.cacheconfig", &CacheCounts); - - struct { - std::string name; - std::string type; - int level; - uint64_t num_sharing; - } Cases[] = {{"hw.l1dcachesize", "Data", 1, CacheCounts[1]}, - {"hw.l1icachesize", "Instruction", 1, CacheCounts[1]}, - {"hw.l2cachesize", "Unified", 2, CacheCounts[2]}, - {"hw.l3cachesize", "Unified", 3, CacheCounts[3]}}; - for (auto& C : Cases) { - int val; - if (!GetSysctl(C.name, &val)) continue; - CPUInfo::CacheInfo info; - info.type = C.type; - info.level = C.level; - info.size = val; - info.num_sharing = static_cast(C.num_sharing); - res.push_back(std::move(info)); - } - return res; -} -#elif defined(BENCHMARK_OS_WINDOWS) -std::vector GetCacheSizesWindows() { - std::vector res; - DWORD buffer_size = 0; - using PInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION; - using CInfo = CACHE_DESCRIPTOR; - - using UPtr = std::unique_ptr; - GetLogicalProcessorInformation(nullptr, &buffer_size); - UPtr buff((PInfo*)malloc(buffer_size), &std::free); - if (!GetLogicalProcessorInformation(buff.get(), &buffer_size)) - PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ", - GetLastError()); - - PInfo* it = buff.get(); - PInfo* end = buff.get() + (buffer_size / sizeof(PInfo)); - - for (; it != end; ++it) { - if (it->Relationship != RelationCache) continue; - using BitSet = std::bitset; - BitSet B(it->ProcessorMask); - // To prevent duplicates, only consider caches where CPU 0 is specified - if (!B.test(0)) continue; - CInfo* Cache = &it->Cache; - CPUInfo::CacheInfo C; - C.num_sharing = static_cast(B.count()); - C.level = Cache->Level; - C.size = Cache->Size; - switch (Cache->Type) { - case CacheUnified: - C.type = "Unified"; - break; - case CacheInstruction: - C.type = "Instruction"; - break; - case CacheData: - C.type = "Data"; - break; - case CacheTrace: - C.type = "Trace"; - break; - default: - C.type = "Unknown"; - break; - } - res.push_back(C); - } - return res; -} -#elif BENCHMARK_OS_QNX -std::vector GetCacheSizesQNX() { - std::vector res; - struct cacheattr_entry *cache = SYSPAGE_ENTRY(cacheattr); - uint32_t const elsize = SYSPAGE_ELEMENT_SIZE(cacheattr); - int num = SYSPAGE_ENTRY_SIZE(cacheattr) / elsize ; - for(int i = 0; i < num; ++i ) { - CPUInfo::CacheInfo info; - switch (cache->flags){ - case CACHE_FLAG_INSTR : - info.type = "Instruction"; - info.level = 1; - break; - case CACHE_FLAG_DATA : - info.type = "Data"; - info.level = 1; - break; - case CACHE_FLAG_UNIFIED : - info.type = "Unified"; - info.level = 2; - break; - case CACHE_FLAG_SHARED : - info.type = "Shared"; - info.level = 3; - break; - default : - continue; - break; - } - info.size = cache->line_size * cache->num_lines; - info.num_sharing = 0; - res.push_back(std::move(info)); - cache = SYSPAGE_ARRAY_ADJ_OFFSET(cacheattr, cache, elsize); - } - return res; -} -#endif - -std::vector GetCacheSizes() { -#ifdef BENCHMARK_OS_MACOSX - return GetCacheSizesMacOSX(); -#elif defined(BENCHMARK_OS_WINDOWS) - return GetCacheSizesWindows(); -#elif defined(BENCHMARK_OS_QNX) - return GetCacheSizesQNX(); -#else - return GetCacheSizesFromKVFS(); -#endif -} - -std::string GetSystemName() { -#if defined(BENCHMARK_OS_WINDOWS) - std::string str; - const unsigned COUNT = MAX_COMPUTERNAME_LENGTH+1; - TCHAR hostname[COUNT] = {'\0'}; - DWORD DWCOUNT = COUNT; - if (!GetComputerName(hostname, &DWCOUNT)) - return std::string(""); -#ifndef UNICODE - str = std::string(hostname, DWCOUNT); -#else - //Using wstring_convert, Is deprecated in C++17 - using convert_type = std::codecvt_utf8; - std::wstring_convert converter; - std::wstring wStr(hostname, DWCOUNT); - str = converter.to_bytes(wStr); -#endif - return str; -#else // defined(BENCHMARK_OS_WINDOWS) -#ifndef HOST_NAME_MAX -#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac Doesnt have HOST_NAME_MAX defined -#define HOST_NAME_MAX 64 -#elif defined(BENCHMARK_OS_NACL) -#define HOST_NAME_MAX 64 -#elif defined(BENCHMARK_OS_QNX) -#define HOST_NAME_MAX 154 -#elif defined(BENCHMARK_OS_RTEMS) -#define HOST_NAME_MAX 256 -#else -#warning "HOST_NAME_MAX not defined. using 64" -#define HOST_NAME_MAX 64 -#endif -#endif // def HOST_NAME_MAX - char hostname[HOST_NAME_MAX]; - int retVal = gethostname(hostname, HOST_NAME_MAX); - if (retVal != 0) return std::string(""); - return std::string(hostname); -#endif // Catch-all POSIX block. -} - -int GetNumCPUs() { -#ifdef BENCHMARK_HAS_SYSCTL - int NumCPU = -1; - if (GetSysctl("hw.ncpu", &NumCPU)) return NumCPU; - fprintf(stderr, "Err: %s\n", strerror(errno)); - std::exit(EXIT_FAILURE); -#elif defined(BENCHMARK_OS_WINDOWS) - SYSTEM_INFO sysinfo; - // Use memset as opposed to = {} to avoid GCC missing initializer false - // positives. - std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO)); - GetSystemInfo(&sysinfo); - return sysinfo.dwNumberOfProcessors; // number of logical - // processors in the current - // group -#elif defined(BENCHMARK_OS_SOLARIS) - // Returns -1 in case of a failure. - int NumCPU = sysconf(_SC_NPROCESSORS_ONLN); - if (NumCPU < 0) { - fprintf(stderr, - "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n", - strerror(errno)); - } - return NumCPU; -#elif defined(BENCHMARK_OS_QNX) - return static_cast(_syspage_ptr->num_cpu); -#else - int NumCPUs = 0; - int MaxID = -1; - std::ifstream f("/proc/cpuinfo"); - if (!f.is_open()) { - std::cerr << "failed to open /proc/cpuinfo\n"; - return -1; - } - const std::string Key = "processor"; - std::string ln; - while (std::getline(f, ln)) { - if (ln.empty()) continue; - size_t SplitIdx = ln.find(':'); - std::string value; -#if defined(__s390__) - // s390 has another format in /proc/cpuinfo - // it needs to be parsed differently - if (SplitIdx != std::string::npos) value = ln.substr(Key.size()+1,SplitIdx-Key.size()-1); -#else - if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); -#endif - if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) { - NumCPUs++; - if (!value.empty()) { - int CurID = benchmark::stoi(value); - MaxID = std::max(CurID, MaxID); - } - } - } - if (f.bad()) { - std::cerr << "Failure reading /proc/cpuinfo\n"; - return -1; - } - if (!f.eof()) { - std::cerr << "Failed to read to end of /proc/cpuinfo\n"; - return -1; - } - f.close(); - - if ((MaxID + 1) != NumCPUs) { - fprintf(stderr, - "CPU ID assignments in /proc/cpuinfo seem messed up." - " This is usually caused by a bad BIOS.\n"); - } - return NumCPUs; -#endif - BENCHMARK_UNREACHABLE(); -} - -double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) { - // Currently, scaling is only used on linux path here, - // suppress diagnostics about it being unused on other paths. - (void)scaling; - -#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN - long freq; - - // If the kernel is exporting the tsc frequency use that. There are issues - // where cpuinfo_max_freq cannot be relied on because the BIOS may be - // exporintg an invalid p-state (on x86) or p-states may be used to put the - // processor in a new mode (turbo mode). Essentially, those frequencies - // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as - // well. - if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq) - // If CPU scaling is disabled, use the the *current* frequency. - // Note that we specifically don't want to read cpuinfo_cur_freq, - // because it is only readable by root. - || (scaling == CPUInfo::Scaling::DISABLED && - ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq", - &freq)) - // Otherwise, if CPU scaling may be in effect, we want to use - // the *maximum* frequency, not whatever CPU speed some random processor - // happens to be using now. - || ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", - &freq)) { - // The value is in kHz (as the file name suggests). For example, on a - // 2GHz warpstation, the file contains the value "2000000". - return freq * 1000.0; - } - - const double error_value = -1; - double bogo_clock = error_value; - - std::ifstream f("/proc/cpuinfo"); - if (!f.is_open()) { - std::cerr << "failed to open /proc/cpuinfo\n"; - return error_value; - } - - auto startsWithKey = [](std::string const& Value, std::string const& Key) { - if (Key.size() > Value.size()) return false; - auto Cmp = [&](char X, char Y) { - return std::tolower(X) == std::tolower(Y); - }; - return std::equal(Key.begin(), Key.end(), Value.begin(), Cmp); - }; - - std::string ln; - while (std::getline(f, ln)) { - if (ln.empty()) continue; - size_t SplitIdx = ln.find(':'); - std::string value; - if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); - // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only - // accept positive values. Some environments (virtual machines) report zero, - // which would cause infinite looping in WallTime_Init. - if (startsWithKey(ln, "cpu MHz")) { - if (!value.empty()) { - double cycles_per_second = benchmark::stod(value) * 1000000.0; - if (cycles_per_second > 0) return cycles_per_second; - } - } else if (startsWithKey(ln, "bogomips")) { - if (!value.empty()) { - bogo_clock = benchmark::stod(value) * 1000000.0; - if (bogo_clock < 0.0) bogo_clock = error_value; - } - } - } - if (f.bad()) { - std::cerr << "Failure reading /proc/cpuinfo\n"; - return error_value; - } - if (!f.eof()) { - std::cerr << "Failed to read to end of /proc/cpuinfo\n"; - return error_value; - } - f.close(); - // If we found the bogomips clock, but nothing better, we'll use it (but - // we're not happy about it); otherwise, fallback to the rough estimation - // below. - if (bogo_clock >= 0.0) return bogo_clock; - -#elif defined BENCHMARK_HAS_SYSCTL - constexpr auto* FreqStr = -#if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD) - "machdep.tsc_freq"; -#elif defined BENCHMARK_OS_OPENBSD - "hw.cpuspeed"; -#elif defined BENCHMARK_OS_DRAGONFLY - "hw.tsc_frequency"; -#else - "hw.cpufrequency"; -#endif - unsigned long long hz = 0; -#if defined BENCHMARK_OS_OPENBSD - if (GetSysctl(FreqStr, &hz)) return hz * 1000000; -#else - if (GetSysctl(FreqStr, &hz)) return hz; -#endif - fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", - FreqStr, strerror(errno)); - -#elif defined BENCHMARK_OS_WINDOWS - // In NT, read MHz from the registry. If we fail to do so or we're in win9x - // then make a crude estimate. - DWORD data, data_size = sizeof(data); - if (IsWindowsXPOrGreater() && - SUCCEEDED( - SHGetValueA(HKEY_LOCAL_MACHINE, - "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", - "~MHz", nullptr, &data, &data_size))) - return static_cast((int64_t)data * - (int64_t)(1000 * 1000)); // was mhz -#elif defined (BENCHMARK_OS_SOLARIS) - kstat_ctl_t *kc = kstat_open(); - if (!kc) { - std::cerr << "failed to open /dev/kstat\n"; - return -1; - } - kstat_t *ksp = kstat_lookup(kc, (char*)"cpu_info", -1, (char*)"cpu_info0"); - if (!ksp) { - std::cerr << "failed to lookup in /dev/kstat\n"; - return -1; - } - if (kstat_read(kc, ksp, NULL) < 0) { - std::cerr << "failed to read from /dev/kstat\n"; - return -1; - } - kstat_named_t *knp = - (kstat_named_t*)kstat_data_lookup(ksp, (char*)"current_clock_Hz"); - if (!knp) { - std::cerr << "failed to lookup data in /dev/kstat\n"; - return -1; - } - if (knp->data_type != KSTAT_DATA_UINT64) { - std::cerr << "current_clock_Hz is of unexpected data type: " - << knp->data_type << "\n"; - return -1; - } - double clock_hz = knp->value.ui64; - kstat_close(kc); - return clock_hz; -#elif defined (BENCHMARK_OS_QNX) - return static_cast((int64_t)(SYSPAGE_ENTRY(cpuinfo)->speed) * - (int64_t)(1000 * 1000)); -#endif - // If we've fallen through, attempt to roughly estimate the CPU clock rate. - const int estimate_time_ms = 1000; - const auto start_ticks = cycleclock::Now(); - SleepForMilliseconds(estimate_time_ms); - return static_cast(cycleclock::Now() - start_ticks); -} - -std::vector GetLoadAvg() { -#if (defined BENCHMARK_OS_FREEBSD || defined(BENCHMARK_OS_LINUX) || \ - defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD || \ - defined BENCHMARK_OS_OPENBSD || defined BENCHMARK_OS_DRAGONFLY) && \ - !defined(__ANDROID__) - constexpr int kMaxSamples = 3; - std::vector res(kMaxSamples, 0.0); - const int nelem = getloadavg(res.data(), kMaxSamples); - if (nelem < 1) { - res.clear(); - } else { - res.resize(nelem); - } - return res; -#else - return {}; -#endif -} - -} // end namespace - -const CPUInfo& CPUInfo::Get() { - static const CPUInfo* info = new CPUInfo(); - return *info; -} - -CPUInfo::CPUInfo() - : num_cpus(GetNumCPUs()), - scaling(CpuScaling(num_cpus)), - cycles_per_second(GetCPUCyclesPerSecond(scaling)), - caches(GetCacheSizes()), - load_avg(GetLoadAvg()) {} - -const SystemInfo& SystemInfo::Get() { - static const SystemInfo* info = new SystemInfo(); - return *info; -} - -SystemInfo::SystemInfo() : name(GetSystemName()) {} -} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/thread_manager.h b/libcxx/utils/google-benchmark/src/thread_manager.h deleted file mode 100644 index 28e2dd53aff2..000000000000 --- a/libcxx/utils/google-benchmark/src/thread_manager.h +++ /dev/null @@ -1,64 +0,0 @@ -#ifndef BENCHMARK_THREAD_MANAGER_H -#define BENCHMARK_THREAD_MANAGER_H - -#include - -#include "benchmark/benchmark.h" -#include "mutex.h" - -namespace benchmark { -namespace internal { - -class ThreadManager { - public: - explicit ThreadManager(int num_threads) - : alive_threads_(num_threads), start_stop_barrier_(num_threads) {} - - Mutex& GetBenchmarkMutex() const RETURN_CAPABILITY(benchmark_mutex_) { - return benchmark_mutex_; - } - - bool StartStopBarrier() EXCLUDES(end_cond_mutex_) { - return start_stop_barrier_.wait(); - } - - void NotifyThreadComplete() EXCLUDES(end_cond_mutex_) { - start_stop_barrier_.removeThread(); - if (--alive_threads_ == 0) { - MutexLock lock(end_cond_mutex_); - end_condition_.notify_all(); - } - } - - void WaitForAllThreads() EXCLUDES(end_cond_mutex_) { - MutexLock lock(end_cond_mutex_); - end_condition_.wait(lock.native_handle(), - [this]() { return alive_threads_ == 0; }); - } - - public: - struct Result { - IterationCount iterations = 0; - double real_time_used = 0; - double cpu_time_used = 0; - double manual_time_used = 0; - int64_t complexity_n = 0; - std::string report_label_; - std::string error_message_; - bool has_error_ = false; - UserCounters counters; - }; - GUARDED_BY(GetBenchmarkMutex()) Result results; - - private: - mutable Mutex benchmark_mutex_; - std::atomic alive_threads_; - Barrier start_stop_barrier_; - Mutex end_cond_mutex_; - Condition end_condition_; -}; - -} // namespace internal -} // namespace benchmark - -#endif // BENCHMARK_THREAD_MANAGER_H diff --git a/libcxx/utils/google-benchmark/src/thread_timer.h b/libcxx/utils/google-benchmark/src/thread_timer.h deleted file mode 100644 index 1703ca0d6f87..000000000000 --- a/libcxx/utils/google-benchmark/src/thread_timer.h +++ /dev/null @@ -1,86 +0,0 @@ -#ifndef BENCHMARK_THREAD_TIMER_H -#define BENCHMARK_THREAD_TIMER_H - -#include "check.h" -#include "timers.h" - -namespace benchmark { -namespace internal { - -class ThreadTimer { - explicit ThreadTimer(bool measure_process_cpu_time_) - : measure_process_cpu_time(measure_process_cpu_time_) {} - - public: - static ThreadTimer Create() { - return ThreadTimer(/*measure_process_cpu_time_=*/false); - } - static ThreadTimer CreateProcessCpuTime() { - return ThreadTimer(/*measure_process_cpu_time_=*/true); - } - - // Called by each thread - void StartTimer() { - running_ = true; - start_real_time_ = ChronoClockNow(); - start_cpu_time_ = ReadCpuTimerOfChoice(); - } - - // Called by each thread - void StopTimer() { - CHECK(running_); - running_ = false; - real_time_used_ += ChronoClockNow() - start_real_time_; - // Floating point error can result in the subtraction producing a negative - // time. Guard against that. - cpu_time_used_ += - std::max(ReadCpuTimerOfChoice() - start_cpu_time_, 0); - } - - // Called by each thread - void SetIterationTime(double seconds) { manual_time_used_ += seconds; } - - bool running() const { return running_; } - - // REQUIRES: timer is not running - double real_time_used() const { - CHECK(!running_); - return real_time_used_; - } - - // REQUIRES: timer is not running - double cpu_time_used() const { - CHECK(!running_); - return cpu_time_used_; - } - - // REQUIRES: timer is not running - double manual_time_used() const { - CHECK(!running_); - return manual_time_used_; - } - - private: - double ReadCpuTimerOfChoice() const { - if (measure_process_cpu_time) return ProcessCPUUsage(); - return ThreadCPUUsage(); - } - - // should the thread, or the process, time be measured? - const bool measure_process_cpu_time; - - bool running_ = false; // Is the timer running - double start_real_time_ = 0; // If running_ - double start_cpu_time_ = 0; // If running_ - - // Accumulated time so far (does not contain current slice if running_) - double real_time_used_ = 0; - double cpu_time_used_ = 0; - // Manually set iteration time. User sets this with SetIterationTime(seconds). - double manual_time_used_ = 0; -}; - -} // namespace internal -} // namespace benchmark - -#endif // BENCHMARK_THREAD_TIMER_H diff --git a/libcxx/utils/google-benchmark/src/timers.cc b/libcxx/utils/google-benchmark/src/timers.cc deleted file mode 100644 index af4767dff944..000000000000 --- a/libcxx/utils/google-benchmark/src/timers.cc +++ /dev/null @@ -1,253 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "timers.h" -#include "internal_macros.h" - -#ifdef BENCHMARK_OS_WINDOWS -#include -#undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA -#include -#include -#else -#include -#ifndef BENCHMARK_OS_FUCHSIA -#include -#endif -#include -#include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD -#include -#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_DRAGONFLY || \ - defined BENCHMARK_OS_MACOSX -#include -#endif -#if defined(BENCHMARK_OS_MACOSX) -#include -#include -#include -#endif -#endif - -#ifdef BENCHMARK_OS_EMSCRIPTEN -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "check.h" -#include "log.h" -#include "sleep.h" -#include "string_util.h" - -namespace benchmark { - -// Suppress unused warnings on helper functions. -#if defined(__GNUC__) -#pragma GCC diagnostic ignored "-Wunused-function" -#endif - -namespace { -#if defined(BENCHMARK_OS_WINDOWS) -double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) { - ULARGE_INTEGER kernel; - ULARGE_INTEGER user; - kernel.HighPart = kernel_time.dwHighDateTime; - kernel.LowPart = kernel_time.dwLowDateTime; - user.HighPart = user_time.dwHighDateTime; - user.LowPart = user_time.dwLowDateTime; - return (static_cast(kernel.QuadPart) + - static_cast(user.QuadPart)) * - 1e-7; -} -#elif !defined(BENCHMARK_OS_FUCHSIA) -double MakeTime(struct rusage const& ru) { - return (static_cast(ru.ru_utime.tv_sec) + - static_cast(ru.ru_utime.tv_usec) * 1e-6 + - static_cast(ru.ru_stime.tv_sec) + - static_cast(ru.ru_stime.tv_usec) * 1e-6); -} -#endif -#if defined(BENCHMARK_OS_MACOSX) -double MakeTime(thread_basic_info_data_t const& info) { - return (static_cast(info.user_time.seconds) + - static_cast(info.user_time.microseconds) * 1e-6 + - static_cast(info.system_time.seconds) + - static_cast(info.system_time.microseconds) * 1e-6); -} -#endif -#if defined(CLOCK_PROCESS_CPUTIME_ID) || defined(CLOCK_THREAD_CPUTIME_ID) -double MakeTime(struct timespec const& ts) { - return ts.tv_sec + (static_cast(ts.tv_nsec) * 1e-9); -} -#endif - -BENCHMARK_NORETURN static void DiagnoseAndExit(const char* msg) { - std::cerr << "ERROR: " << msg << std::endl; - std::exit(EXIT_FAILURE); -} - -} // end namespace - -double ProcessCPUUsage() { -#if defined(BENCHMARK_OS_WINDOWS) - HANDLE proc = GetCurrentProcess(); - FILETIME creation_time; - FILETIME exit_time; - FILETIME kernel_time; - FILETIME user_time; - if (GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, - &user_time)) - return MakeTime(kernel_time, user_time); - DiagnoseAndExit("GetProccessTimes() failed"); -#elif defined(BENCHMARK_OS_EMSCRIPTEN) - // clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) returns 0 on Emscripten. - // Use Emscripten-specific API. Reported CPU time would be exactly the - // same as total time, but this is ok because there aren't long-latency - // syncronous system calls in Emscripten. - return emscripten_get_now() * 1e-3; -#elif defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX) - // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See - // https://github.com/google/benchmark/pull/292 - struct timespec spec; - if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0) - return MakeTime(spec); - DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed"); -#else - struct rusage ru; - if (getrusage(RUSAGE_SELF, &ru) == 0) return MakeTime(ru); - DiagnoseAndExit("getrusage(RUSAGE_SELF, ...) failed"); -#endif -} - -double ThreadCPUUsage() { -#if defined(BENCHMARK_OS_WINDOWS) - HANDLE this_thread = GetCurrentThread(); - FILETIME creation_time; - FILETIME exit_time; - FILETIME kernel_time; - FILETIME user_time; - GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time, - &user_time); - return MakeTime(kernel_time, user_time); -#elif defined(BENCHMARK_OS_MACOSX) - // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See - // https://github.com/google/benchmark/pull/292 - mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; - thread_basic_info_data_t info; - mach_port_t thread = pthread_mach_thread_np(pthread_self()); - if (thread_info(thread, THREAD_BASIC_INFO, (thread_info_t)&info, &count) == - KERN_SUCCESS) { - return MakeTime(info); - } - DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info"); -#elif defined(BENCHMARK_OS_EMSCRIPTEN) - // Emscripten doesn't support traditional threads - return ProcessCPUUsage(); -#elif defined(BENCHMARK_OS_RTEMS) - // RTEMS doesn't support CLOCK_THREAD_CPUTIME_ID. See - // https://github.com/RTEMS/rtems/blob/master/cpukit/posix/src/clockgettime.c - return ProcessCPUUsage(); -#elif defined(BENCHMARK_OS_SOLARIS) - struct rusage ru; - if (getrusage(RUSAGE_LWP, &ru) == 0) return MakeTime(ru); - DiagnoseAndExit("getrusage(RUSAGE_LWP, ...) failed"); -#elif defined(CLOCK_THREAD_CPUTIME_ID) - struct timespec ts; - if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) return MakeTime(ts); - DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed"); -#else -#error Per-thread timing is not available on your system. -#endif -} - -std::string LocalDateTimeString() { - // Write the local time in RFC3339 format yyyy-mm-ddTHH:MM:SS+/-HH:MM. - typedef std::chrono::system_clock Clock; - std::time_t now = Clock::to_time_t(Clock::now()); - const std::size_t kTzOffsetLen = 6; - const std::size_t kTimestampLen = 19; - - std::size_t tz_len; - std::size_t timestamp_len; - long int offset_minutes; - char tz_offset_sign = '+'; - // tz_offset is set in one of three ways: - // * strftime with %z - This either returns empty or the ISO 8601 time. The maximum length an - // ISO 8601 string can be is 7 (e.g. -03:30, plus trailing zero). - // * snprintf with %c%02li:%02li - The maximum length is 41 (one for %c, up to 19 for %02li, - // one for :, up to 19 %02li, plus trailing zero). - // * A fixed string of "-00:00". The maximum length is 7 (-00:00, plus trailing zero). - // - // Thus, the maximum size this needs to be is 41. - char tz_offset[41]; - // Long enough buffer to avoid format-overflow warnings - char storage[128]; - -#if defined(BENCHMARK_OS_WINDOWS) - std::tm *timeinfo_p = ::localtime(&now); -#else - std::tm timeinfo; - std::tm *timeinfo_p = &timeinfo; - ::localtime_r(&now, &timeinfo); -#endif - - tz_len = std::strftime(tz_offset, sizeof(tz_offset), "%z", timeinfo_p); - - if (tz_len < kTzOffsetLen && tz_len > 1) { - // Timezone offset was written. strftime writes offset as +HHMM or -HHMM, - // RFC3339 specifies an offset as +HH:MM or -HH:MM. To convert, we parse - // the offset as an integer, then reprint it to a string. - - offset_minutes = ::strtol(tz_offset, NULL, 10); - if (offset_minutes < 0) { - offset_minutes *= -1; - tz_offset_sign = '-'; - } - - tz_len = ::snprintf(tz_offset, sizeof(tz_offset), "%c%02li:%02li", - tz_offset_sign, offset_minutes / 100, offset_minutes % 100); - CHECK(tz_len == kTzOffsetLen); - ((void)tz_len); // Prevent unused variable warning in optimized build. - } else { - // Unknown offset. RFC3339 specifies that unknown local offsets should be - // written as UTC time with -00:00 timezone. -#if defined(BENCHMARK_OS_WINDOWS) - // Potential race condition if another thread calls localtime or gmtime. - timeinfo_p = ::gmtime(&now); -#else - ::gmtime_r(&now, &timeinfo); -#endif - - strncpy(tz_offset, "-00:00", kTzOffsetLen + 1); - } - - timestamp_len = std::strftime(storage, sizeof(storage), "%Y-%m-%dT%H:%M:%S", - timeinfo_p); - CHECK(timestamp_len == kTimestampLen); - // Prevent unused variable warning in optimized build. - ((void)kTimestampLen); - - std::strncat(storage, tz_offset, sizeof(storage) - timestamp_len - 1); - return std::string(storage); -} - -} // end namespace benchmark diff --git a/libcxx/utils/google-benchmark/src/timers.h b/libcxx/utils/google-benchmark/src/timers.h deleted file mode 100644 index 65606ccd93d1..000000000000 --- a/libcxx/utils/google-benchmark/src/timers.h +++ /dev/null @@ -1,48 +0,0 @@ -#ifndef BENCHMARK_TIMERS_H -#define BENCHMARK_TIMERS_H - -#include -#include - -namespace benchmark { - -// Return the CPU usage of the current process -double ProcessCPUUsage(); - -// Return the CPU usage of the children of the current process -double ChildrenCPUUsage(); - -// Return the CPU usage of the current thread -double ThreadCPUUsage(); - -#if defined(HAVE_STEADY_CLOCK) -template -struct ChooseSteadyClock { - typedef std::chrono::high_resolution_clock type; -}; - -template <> -struct ChooseSteadyClock { - typedef std::chrono::steady_clock type; -}; -#endif - -struct ChooseClockType { -#if defined(HAVE_STEADY_CLOCK) - typedef ChooseSteadyClock<>::type type; -#else - typedef std::chrono::high_resolution_clock type; -#endif -}; - -inline double ChronoClockNow() { - typedef ChooseClockType::type ClockType; - using FpSeconds = std::chrono::duration; - return FpSeconds(ClockType::now().time_since_epoch()).count(); -} - -std::string LocalDateTimeString(); - -} // end namespace benchmark - -#endif // BENCHMARK_TIMERS_H diff --git a/libcxx/utils/google-benchmark/test/AssemblyTests.cmake b/libcxx/utils/google-benchmark/test/AssemblyTests.cmake deleted file mode 100644 index 3d078586f1de..000000000000 --- a/libcxx/utils/google-benchmark/test/AssemblyTests.cmake +++ /dev/null @@ -1,46 +0,0 @@ - -include(split_list) - -set(ASM_TEST_FLAGS "") -check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG) -if (BENCHMARK_HAS_O3_FLAG) - list(APPEND ASM_TEST_FLAGS -O3) -endif() - -check_cxx_compiler_flag(-g0 BENCHMARK_HAS_G0_FLAG) -if (BENCHMARK_HAS_G0_FLAG) - list(APPEND ASM_TEST_FLAGS -g0) -endif() - -check_cxx_compiler_flag(-fno-stack-protector BENCHMARK_HAS_FNO_STACK_PROTECTOR_FLAG) -if (BENCHMARK_HAS_FNO_STACK_PROTECTOR_FLAG) - list(APPEND ASM_TEST_FLAGS -fno-stack-protector) -endif() - -split_list(ASM_TEST_FLAGS) -string(TOUPPER "${CMAKE_CXX_COMPILER_ID}" ASM_TEST_COMPILER) - -macro(add_filecheck_test name) - cmake_parse_arguments(ARG "" "" "CHECK_PREFIXES" ${ARGV}) - add_library(${name} OBJECT ${name}.cc) - set_target_properties(${name} PROPERTIES COMPILE_FLAGS "-S ${ASM_TEST_FLAGS}") - set(ASM_OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/${name}.s") - add_custom_target(copy_${name} ALL - COMMAND ${PROJECT_SOURCE_DIR}/tools/strip_asm.py - $ - ${ASM_OUTPUT_FILE} - BYPRODUCTS ${ASM_OUTPUT_FILE}) - add_dependencies(copy_${name} ${name}) - if (NOT ARG_CHECK_PREFIXES) - set(ARG_CHECK_PREFIXES "CHECK") - endif() - foreach(prefix ${ARG_CHECK_PREFIXES}) - add_test(NAME run_${name}_${prefix} - COMMAND - ${LLVM_FILECHECK_EXE} ${name}.cc - --input-file=${ASM_OUTPUT_FILE} - --check-prefixes=CHECK,CHECK-${ASM_TEST_COMPILER} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) - endforeach() -endmacro() - diff --git a/libcxx/utils/google-benchmark/test/BUILD b/libcxx/utils/google-benchmark/test/BUILD deleted file mode 100644 index 1f27f99ede9f..000000000000 --- a/libcxx/utils/google-benchmark/test/BUILD +++ /dev/null @@ -1,74 +0,0 @@ -TEST_COPTS = [ - "-pedantic", - "-pedantic-errors", - "-std=c++11", - "-Wall", - "-Wextra", - "-Wshadow", - # "-Wshorten-64-to-32", - "-Wfloat-equal", - "-fstrict-aliasing", -] - -PER_SRC_COPTS = ({ - "cxx03_test.cc": ["-std=c++03"], - # Some of the issues with DoNotOptimize only occur when optimization is enabled - "donotoptimize_test.cc": ["-O3"], -}) - -TEST_ARGS = ["--benchmark_min_time=0.01"] - -PER_SRC_TEST_ARGS = ({ - "user_counters_tabular_test.cc": ["--benchmark_counters_tabular=true"], - "repetitions_test.cc": [" --benchmark_repetitions=3"], -}) - -load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") - -cc_library( - name = "output_test_helper", - testonly = 1, - srcs = ["output_test_helper.cc"], - hdrs = ["output_test.h"], - copts = TEST_COPTS, - deps = [ - "//:benchmark", - "//:benchmark_internal_headers", - ], -) - -[ - cc_test( - name = test_src[:-len(".cc")], - size = "small", - srcs = [test_src], - args = TEST_ARGS + PER_SRC_TEST_ARGS.get(test_src, []), - copts = TEST_COPTS + PER_SRC_COPTS.get(test_src, []), - deps = [ - ":output_test_helper", - "//:benchmark", - "//:benchmark_internal_headers", - "@com_google_googletest//:gtest", - ] + ( - ["@com_google_googletest//:gtest_main"] if (test_src[-len("gtest.cc"):] == "gtest.cc") else [] - ), - # FIXME: Add support for assembly tests to bazel. - # See Issue #556 - # https://github.com/google/benchmark/issues/556 - ) - for test_src in glob( - ["*test.cc"], - exclude = [ - "*_assembly_test.cc", - "link_main_test.cc", - ], - ) -] - -cc_test( - name = "link_main_test", - size = "small", - srcs = ["link_main_test.cc"], - copts = TEST_COPTS, - deps = ["//:benchmark_main"], -) diff --git a/libcxx/utils/google-benchmark/test/CMakeLists.txt b/libcxx/utils/google-benchmark/test/CMakeLists.txt deleted file mode 100644 index 79cdf53b402c..000000000000 --- a/libcxx/utils/google-benchmark/test/CMakeLists.txt +++ /dev/null @@ -1,271 +0,0 @@ -# Enable the tests - -find_package(Threads REQUIRED) -include(CheckCXXCompilerFlag) - -# NOTE: Some tests use `` to perform the test. Therefore we must -# strip -DNDEBUG from the default CMake flags in DEBUG mode. -string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) -if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" ) - add_definitions( -UNDEBUG ) - add_definitions(-DTEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS) - # Also remove /D NDEBUG to avoid MSVC warnings about conflicting defines. - foreach (flags_var_to_scrub - CMAKE_CXX_FLAGS_RELEASE - CMAKE_CXX_FLAGS_RELWITHDEBINFO - CMAKE_CXX_FLAGS_MINSIZEREL - CMAKE_C_FLAGS_RELEASE - CMAKE_C_FLAGS_RELWITHDEBINFO - CMAKE_C_FLAGS_MINSIZEREL) - string (REGEX REPLACE "(^| )[/-]D *NDEBUG($| )" " " - "${flags_var_to_scrub}" "${${flags_var_to_scrub}}") - endforeach() -endif() - -check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG) -set(BENCHMARK_O3_FLAG "") -if (BENCHMARK_HAS_O3_FLAG) - set(BENCHMARK_O3_FLAG "-O3") -endif() - -# NOTE: These flags must be added after find_package(Threads REQUIRED) otherwise -# they will break the configuration check. -if (DEFINED BENCHMARK_CXX_LINKER_FLAGS) - list(APPEND CMAKE_EXE_LINKER_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}) -endif() - -add_library(output_test_helper STATIC output_test_helper.cc output_test.h) - -macro(compile_benchmark_test name) - add_executable(${name} "${name}.cc") - target_link_libraries(${name} benchmark::benchmark ${CMAKE_THREAD_LIBS_INIT}) -endmacro(compile_benchmark_test) - -macro(compile_benchmark_test_with_main name) - add_executable(${name} "${name}.cc") - target_link_libraries(${name} benchmark::benchmark_main) -endmacro(compile_benchmark_test_with_main) - -macro(compile_output_test name) - add_executable(${name} "${name}.cc" output_test.h) - target_link_libraries(${name} output_test_helper benchmark::benchmark - ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) -endmacro(compile_output_test) - -# Demonstration executable -compile_benchmark_test(benchmark_test) -add_test(NAME benchmark COMMAND benchmark_test --benchmark_min_time=0.01) - -compile_benchmark_test(filter_test) -macro(add_filter_test name filter expect) - add_test(NAME ${name} COMMAND filter_test --benchmark_min_time=0.01 --benchmark_filter=${filter} ${expect}) - add_test(NAME ${name}_list_only COMMAND filter_test --benchmark_list_tests --benchmark_filter=${filter} ${expect}) -endmacro(add_filter_test) - -add_filter_test(filter_simple "Foo" 3) -add_filter_test(filter_simple_negative "-Foo" 2) -add_filter_test(filter_suffix "BM_.*" 4) -add_filter_test(filter_suffix_negative "-BM_.*" 1) -add_filter_test(filter_regex_all ".*" 5) -add_filter_test(filter_regex_all_negative "-.*" 0) -add_filter_test(filter_regex_blank "" 5) -add_filter_test(filter_regex_blank_negative "-" 0) -add_filter_test(filter_regex_none "monkey" 0) -add_filter_test(filter_regex_none_negative "-monkey" 5) -add_filter_test(filter_regex_wildcard ".*Foo.*" 3) -add_filter_test(filter_regex_wildcard_negative "-.*Foo.*" 2) -add_filter_test(filter_regex_begin "^BM_.*" 4) -add_filter_test(filter_regex_begin_negative "-^BM_.*" 1) -add_filter_test(filter_regex_begin2 "^N" 1) -add_filter_test(filter_regex_begin2_negative "-^N" 4) -add_filter_test(filter_regex_end ".*Ba$" 1) -add_filter_test(filter_regex_end_negative "-.*Ba$" 4) - -compile_benchmark_test(options_test) -add_test(NAME options_benchmarks COMMAND options_test --benchmark_min_time=0.01) - -compile_benchmark_test(basic_test) -add_test(NAME basic_benchmark COMMAND basic_test --benchmark_min_time=0.01) - -compile_output_test(repetitions_test) -add_test(NAME repetitions_benchmark COMMAND repetitions_test --benchmark_min_time=0.01 --benchmark_repetitions=3) - -compile_benchmark_test(diagnostics_test) -add_test(NAME diagnostics_test COMMAND diagnostics_test --benchmark_min_time=0.01) - -compile_benchmark_test(skip_with_error_test) -add_test(NAME skip_with_error_test COMMAND skip_with_error_test --benchmark_min_time=0.01) - -compile_benchmark_test(donotoptimize_test) -# Some of the issues with DoNotOptimize only occur when optimization is enabled -check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG) -if (BENCHMARK_HAS_O3_FLAG) - set_target_properties(donotoptimize_test PROPERTIES COMPILE_FLAGS "-O3") -endif() -add_test(NAME donotoptimize_test COMMAND donotoptimize_test --benchmark_min_time=0.01) - -compile_benchmark_test(fixture_test) -add_test(NAME fixture_test COMMAND fixture_test --benchmark_min_time=0.01) - -compile_benchmark_test(register_benchmark_test) -add_test(NAME register_benchmark_test COMMAND register_benchmark_test --benchmark_min_time=0.01) - -compile_benchmark_test(map_test) -add_test(NAME map_test COMMAND map_test --benchmark_min_time=0.01) - -compile_benchmark_test(multiple_ranges_test) -add_test(NAME multiple_ranges_test COMMAND multiple_ranges_test --benchmark_min_time=0.01) - -compile_benchmark_test(args_product_test) -add_test(NAME args_product_test COMMAND args_product_test --benchmark_min_time=0.01) - -compile_benchmark_test_with_main(link_main_test) -add_test(NAME link_main_test COMMAND link_main_test --benchmark_min_time=0.01) - -compile_output_test(reporter_output_test) -add_test(NAME reporter_output_test COMMAND reporter_output_test --benchmark_min_time=0.01) - -compile_output_test(templated_fixture_test) -add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_min_time=0.01) - -compile_output_test(user_counters_test) -add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01) - -compile_output_test(perf_counters_test) -add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmark_min_time=0.01 --benchmark_perf_counters=CYCLES,BRANCHES) - -compile_output_test(internal_threading_test) -add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01) - -compile_output_test(report_aggregates_only_test) -add_test(NAME report_aggregates_only_test COMMAND report_aggregates_only_test --benchmark_min_time=0.01) - -compile_output_test(display_aggregates_only_test) -add_test(NAME display_aggregates_only_test COMMAND display_aggregates_only_test --benchmark_min_time=0.01) - -compile_output_test(user_counters_tabular_test) -add_test(NAME user_counters_tabular_test COMMAND user_counters_tabular_test --benchmark_counters_tabular=true --benchmark_min_time=0.01) - -compile_output_test(user_counters_thousands_test) -add_test(NAME user_counters_thousands_test COMMAND user_counters_thousands_test --benchmark_min_time=0.01) - -compile_output_test(memory_manager_test) -add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01) - -check_cxx_compiler_flag(-std=c++03 BENCHMARK_HAS_CXX03_FLAG) -if (BENCHMARK_HAS_CXX03_FLAG) - compile_benchmark_test(cxx03_test) - set_target_properties(cxx03_test - PROPERTIES - CXX_STANDARD 98 - CXX_STANDARD_REQUIRED YES) - # libstdc++ provides different definitions within between dialects. When - # LTO is enabled and -Werror is specified GCC diagnoses this ODR violation - # causing the test to fail to compile. To prevent this we explicitly disable - # the warning. - check_cxx_compiler_flag(-Wno-odr BENCHMARK_HAS_WNO_ODR) - if (BENCHMARK_ENABLE_LTO AND BENCHMARK_HAS_WNO_ODR) - set_target_properties(cxx03_test - PROPERTIES - LINK_FLAGS "-Wno-odr") - endif() - add_test(NAME cxx03 COMMAND cxx03_test --benchmark_min_time=0.01) -endif() - -# Attempt to work around flaky test failures when running on Appveyor servers. -if (DEFINED ENV{APPVEYOR}) - set(COMPLEXITY_MIN_TIME "0.5") -else() - set(COMPLEXITY_MIN_TIME "0.01") -endif() -compile_output_test(complexity_test) -add_test(NAME complexity_benchmark COMMAND complexity_test --benchmark_min_time=${COMPLEXITY_MIN_TIME}) - -############################################################################### -# GoogleTest Unit Tests -############################################################################### - -if (BENCHMARK_ENABLE_GTEST_TESTS) - macro(compile_gtest name) - add_executable(${name} "${name}.cc") - target_link_libraries(${name} benchmark::benchmark - gmock_main ${CMAKE_THREAD_LIBS_INIT}) - endmacro(compile_gtest) - - macro(add_gtest name) - compile_gtest(${name}) - add_test(NAME ${name} COMMAND ${name}) - endmacro() - - add_gtest(benchmark_gtest) - add_gtest(benchmark_name_gtest) - add_gtest(benchmark_random_interleaving_gtest) - add_gtest(commandlineflags_gtest) - add_gtest(statistics_gtest) - add_gtest(string_util_gtest) - add_gtest(perf_counters_gtest) -endif(BENCHMARK_ENABLE_GTEST_TESTS) - -############################################################################### -# Assembly Unit Tests -############################################################################### - -if (BENCHMARK_ENABLE_ASSEMBLY_TESTS) - if (NOT LLVM_FILECHECK_EXE) - message(FATAL_ERROR "LLVM FileCheck is required when including this file") - endif() - include(AssemblyTests.cmake) - add_filecheck_test(donotoptimize_assembly_test) - add_filecheck_test(state_assembly_test) - add_filecheck_test(clobber_memory_assembly_test) -endif() - - - -############################################################################### -# Code Coverage Configuration -############################################################################### - -# Add the coverage command(s) -if(CMAKE_BUILD_TYPE) - string(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_LOWER) -endif() -if (${CMAKE_BUILD_TYPE_LOWER} MATCHES "coverage") - find_program(GCOV gcov) - find_program(LCOV lcov) - find_program(GENHTML genhtml) - find_program(CTEST ctest) - if (GCOV AND LCOV AND GENHTML AND CTEST AND HAVE_CXX_FLAG_COVERAGE) - add_custom_command( - OUTPUT ${CMAKE_BINARY_DIR}/lcov/index.html - COMMAND ${LCOV} -q -z -d . - COMMAND ${LCOV} -q --no-external -c -b "${CMAKE_SOURCE_DIR}" -d . -o before.lcov -i - COMMAND ${CTEST} --force-new-ctest-process - COMMAND ${LCOV} -q --no-external -c -b "${CMAKE_SOURCE_DIR}" -d . -o after.lcov - COMMAND ${LCOV} -q -a before.lcov -a after.lcov --output-file final.lcov - COMMAND ${LCOV} -q -r final.lcov "'${CMAKE_SOURCE_DIR}/test/*'" -o final.lcov - COMMAND ${GENHTML} final.lcov -o lcov --demangle-cpp --sort -p "${CMAKE_BINARY_DIR}" -t benchmark - DEPENDS filter_test benchmark_test options_test basic_test fixture_test cxx03_test complexity_test - WORKING_DIRECTORY ${CMAKE_BINARY_DIR} - COMMENT "Running LCOV" - ) - add_custom_target(coverage - DEPENDS ${CMAKE_BINARY_DIR}/lcov/index.html - COMMENT "LCOV report at lcov/index.html" - ) - message(STATUS "Coverage command added") - else() - if (HAVE_CXX_FLAG_COVERAGE) - set(CXX_FLAG_COVERAGE_MESSAGE supported) - else() - set(CXX_FLAG_COVERAGE_MESSAGE unavailable) - endif() - message(WARNING - "Coverage not available:\n" - " gcov: ${GCOV}\n" - " lcov: ${LCOV}\n" - " genhtml: ${GENHTML}\n" - " ctest: ${CTEST}\n" - " --coverage flag: ${CXX_FLAG_COVERAGE_MESSAGE}") - endif() -endif() diff --git a/libcxx/utils/google-benchmark/test/args_product_test.cc b/libcxx/utils/google-benchmark/test/args_product_test.cc deleted file mode 100644 index 32a75d50dd9e..000000000000 --- a/libcxx/utils/google-benchmark/test/args_product_test.cc +++ /dev/null @@ -1,77 +0,0 @@ -#include "benchmark/benchmark.h" - -#include -#include -#include -#include - -class ArgsProductFixture : public ::benchmark::Fixture { - public: - ArgsProductFixture() - : expectedValues({{0, 100, 2000, 30000}, - {1, 15, 3, 8}, - {1, 15, 3, 9}, - {1, 15, 7, 8}, - {1, 15, 7, 9}, - {1, 15, 10, 8}, - {1, 15, 10, 9}, - {2, 15, 3, 8}, - {2, 15, 3, 9}, - {2, 15, 7, 8}, - {2, 15, 7, 9}, - {2, 15, 10, 8}, - {2, 15, 10, 9}, - {4, 5, 6, 11}}) {} - - void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE { - std::vector ranges = {state.range(0), state.range(1), - state.range(2), state.range(3)}; - - assert(expectedValues.find(ranges) != expectedValues.end()); - - actualValues.insert(ranges); - } - - // NOTE: This is not TearDown as we want to check after _all_ runs are - // complete. - virtual ~ArgsProductFixture() { - if (actualValues != expectedValues) { - std::cout << "EXPECTED\n"; - for (auto v : expectedValues) { - std::cout << "{"; - for (int64_t iv : v) { - std::cout << iv << ", "; - } - std::cout << "}\n"; - } - std::cout << "ACTUAL\n"; - for (auto v : actualValues) { - std::cout << "{"; - for (int64_t iv : v) { - std::cout << iv << ", "; - } - std::cout << "}\n"; - } - } - } - - std::set> expectedValues; - std::set> actualValues; -}; - -BENCHMARK_DEFINE_F(ArgsProductFixture, Empty)(benchmark::State& state) { - for (auto _ : state) { - int64_t product = - state.range(0) * state.range(1) * state.range(2) * state.range(3); - for (int64_t x = 0; x < product; x++) { - benchmark::DoNotOptimize(x); - } - } -} - -BENCHMARK_REGISTER_F(ArgsProductFixture, Empty) - ->Args({0, 100, 2000, 30000}) - ->ArgsProduct({{1, 2}, {15}, {3, 7, 10}, {8, 9}}) - ->Args({4, 5, 6, 11}); - -BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/test/basic_test.cc b/libcxx/utils/google-benchmark/test/basic_test.cc deleted file mode 100644 index 33642211e205..000000000000 --- a/libcxx/utils/google-benchmark/test/basic_test.cc +++ /dev/null @@ -1,151 +0,0 @@ - -#include "benchmark/benchmark.h" - -#define BASIC_BENCHMARK_TEST(x) BENCHMARK(x)->Arg(8)->Arg(512)->Arg(8192) - -void BM_empty(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(state.iterations()); - } -} -BENCHMARK(BM_empty); -BENCHMARK(BM_empty)->ThreadPerCpu(); - -void BM_spin_empty(benchmark::State& state) { - for (auto _ : state) { - for (int x = 0; x < state.range(0); ++x) { - benchmark::DoNotOptimize(x); - } - } -} -BASIC_BENCHMARK_TEST(BM_spin_empty); -BASIC_BENCHMARK_TEST(BM_spin_empty)->ThreadPerCpu(); - -void BM_spin_pause_before(benchmark::State& state) { - for (int i = 0; i < state.range(0); ++i) { - benchmark::DoNotOptimize(i); - } - for (auto _ : state) { - for (int i = 0; i < state.range(0); ++i) { - benchmark::DoNotOptimize(i); - } - } -} -BASIC_BENCHMARK_TEST(BM_spin_pause_before); -BASIC_BENCHMARK_TEST(BM_spin_pause_before)->ThreadPerCpu(); - -void BM_spin_pause_during(benchmark::State& state) { - for (auto _ : state) { - state.PauseTiming(); - for (int i = 0; i < state.range(0); ++i) { - benchmark::DoNotOptimize(i); - } - state.ResumeTiming(); - for (int i = 0; i < state.range(0); ++i) { - benchmark::DoNotOptimize(i); - } - } -} -BASIC_BENCHMARK_TEST(BM_spin_pause_during); -BASIC_BENCHMARK_TEST(BM_spin_pause_during)->ThreadPerCpu(); - -void BM_pause_during(benchmark::State& state) { - for (auto _ : state) { - state.PauseTiming(); - state.ResumeTiming(); - } -} -BENCHMARK(BM_pause_during); -BENCHMARK(BM_pause_during)->ThreadPerCpu(); -BENCHMARK(BM_pause_during)->UseRealTime(); -BENCHMARK(BM_pause_during)->UseRealTime()->ThreadPerCpu(); - -void BM_spin_pause_after(benchmark::State& state) { - for (auto _ : state) { - for (int i = 0; i < state.range(0); ++i) { - benchmark::DoNotOptimize(i); - } - } - for (int i = 0; i < state.range(0); ++i) { - benchmark::DoNotOptimize(i); - } -} -BASIC_BENCHMARK_TEST(BM_spin_pause_after); -BASIC_BENCHMARK_TEST(BM_spin_pause_after)->ThreadPerCpu(); - -void BM_spin_pause_before_and_after(benchmark::State& state) { - for (int i = 0; i < state.range(0); ++i) { - benchmark::DoNotOptimize(i); - } - for (auto _ : state) { - for (int i = 0; i < state.range(0); ++i) { - benchmark::DoNotOptimize(i); - } - } - for (int i = 0; i < state.range(0); ++i) { - benchmark::DoNotOptimize(i); - } -} -BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after); -BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after)->ThreadPerCpu(); - -void BM_empty_stop_start(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_empty_stop_start); -BENCHMARK(BM_empty_stop_start)->ThreadPerCpu(); - - -void BM_KeepRunning(benchmark::State& state) { - benchmark::IterationCount iter_count = 0; - assert(iter_count == state.iterations()); - while (state.KeepRunning()) { - ++iter_count; - } - assert(iter_count == state.iterations()); -} -BENCHMARK(BM_KeepRunning); - -void BM_KeepRunningBatch(benchmark::State& state) { - // Choose a batch size >1000 to skip the typical runs with iteration - // targets of 10, 100 and 1000. If these are not actually skipped the - // bug would be detectable as consecutive runs with the same iteration - // count. Below we assert that this does not happen. - const benchmark::IterationCount batch_size = 1009; - - static benchmark::IterationCount prior_iter_count = 0; - benchmark::IterationCount iter_count = 0; - while (state.KeepRunningBatch(batch_size)) { - iter_count += batch_size; - } - assert(state.iterations() == iter_count); - - // Verify that the iteration count always increases across runs (see - // comment above). - assert(iter_count == batch_size // max_iterations == 1 - || iter_count > prior_iter_count); // max_iterations > batch_size - prior_iter_count = iter_count; -} -// Register with a fixed repetition count to establish the invariant that -// the iteration count should always change across runs. This overrides -// the --benchmark_repetitions command line flag, which would otherwise -// cause this test to fail if set > 1. -BENCHMARK(BM_KeepRunningBatch)->Repetitions(1); - -void BM_RangedFor(benchmark::State& state) { - benchmark::IterationCount iter_count = 0; - for (auto _ : state) { - ++iter_count; - } - assert(iter_count == state.max_iterations); -} -BENCHMARK(BM_RangedFor); - -// Ensure that StateIterator provides all the necessary typedefs required to -// instantiate std::iterator_traits. -static_assert(std::is_same< - typename std::iterator_traits::value_type, - typename benchmark::State::StateIterator::value_type>::value, ""); - -BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/test/benchmark_gtest.cc b/libcxx/utils/google-benchmark/test/benchmark_gtest.cc deleted file mode 100644 index 14a885ba46da..000000000000 --- a/libcxx/utils/google-benchmark/test/benchmark_gtest.cc +++ /dev/null @@ -1,165 +0,0 @@ -#include -#include -#include - -#include "../src/benchmark_register.h" -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -namespace benchmark { -namespace internal { -extern std::map* global_context; - -namespace { - -TEST(AddRangeTest, Simple) { - std::vector dst; - AddRange(&dst, 1, 2, 2); - EXPECT_THAT(dst, testing::ElementsAre(1, 2)); -} - -TEST(AddRangeTest, Simple64) { - std::vector dst; - AddRange(&dst, static_cast(1), static_cast(2), 2); - EXPECT_THAT(dst, testing::ElementsAre(1, 2)); -} - -TEST(AddRangeTest, Advanced) { - std::vector dst; - AddRange(&dst, 5, 15, 2); - EXPECT_THAT(dst, testing::ElementsAre(5, 8, 15)); -} - -TEST(AddRangeTest, Advanced64) { - std::vector dst; - AddRange(&dst, static_cast(5), static_cast(15), 2); - EXPECT_THAT(dst, testing::ElementsAre(5, 8, 15)); -} - -TEST(AddRangeTest, FullRange8) { - std::vector dst; - AddRange(&dst, int8_t{1}, std::numeric_limits::max(), 8); - EXPECT_THAT(dst, testing::ElementsAre(1, 8, 64, 127)); -} - -TEST(AddRangeTest, FullRange64) { - std::vector dst; - AddRange(&dst, int64_t{1}, std::numeric_limits::max(), 1024); - EXPECT_THAT( - dst, testing::ElementsAre(1LL, 1024LL, 1048576LL, 1073741824LL, - 1099511627776LL, 1125899906842624LL, - 1152921504606846976LL, 9223372036854775807LL)); -} - -TEST(AddRangeTest, NegativeRanges) { - std::vector dst; - AddRange(&dst, -8, 0, 2); - EXPECT_THAT(dst, testing::ElementsAre(-8, -4, -2, -1, 0)); -} - -TEST(AddRangeTest, StrictlyNegative) { - std::vector dst; - AddRange(&dst, -8, -1, 2); - EXPECT_THAT(dst, testing::ElementsAre(-8, -4, -2, -1)); -} - -TEST(AddRangeTest, SymmetricNegativeRanges) { - std::vector dst; - AddRange(&dst, -8, 8, 2); - EXPECT_THAT(dst, testing::ElementsAre(-8, -4, -2, -1, 0, 1, 2, 4, 8)); -} - -TEST(AddRangeTest, SymmetricNegativeRangesOddMult) { - std::vector dst; - AddRange(&dst, -30, 32, 5); - EXPECT_THAT(dst, testing::ElementsAre(-30, -25, -5, -1, 0, 1, 5, 25, 32)); -} - -TEST(AddRangeTest, NegativeRangesAsymmetric) { - std::vector dst; - AddRange(&dst, -3, 5, 2); - EXPECT_THAT(dst, testing::ElementsAre(-3, -2, -1, 0, 1, 2, 4, 5)); -} - -TEST(AddRangeTest, NegativeRangesLargeStep) { - // Always include -1, 0, 1 when crossing zero. - std::vector dst; - AddRange(&dst, -8, 8, 10); - EXPECT_THAT(dst, testing::ElementsAre(-8, -1, 0, 1, 8)); -} - -TEST(AddRangeTest, ZeroOnlyRange) { - std::vector dst; - AddRange(&dst, 0, 0, 2); - EXPECT_THAT(dst, testing::ElementsAre(0)); -} - -TEST(AddRangeTest, ZeroStartingRange) { - std::vector dst; - AddRange(&dst, 0, 2, 2); - EXPECT_THAT(dst, testing::ElementsAre(0, 1, 2)); -} - -TEST(AddRangeTest, NegativeRange64) { - std::vector dst; - AddRange(&dst, -4, 4, 2); - EXPECT_THAT(dst, testing::ElementsAre(-4, -2, -1, 0, 1, 2, 4)); -} - -TEST(AddRangeTest, NegativeRangePreservesExistingOrder) { - // If elements already exist in the range, ensure we don't change - // their ordering by adding negative values. - std::vector dst = {1, 2, 3}; - AddRange(&dst, -2, 2, 2); - EXPECT_THAT(dst, testing::ElementsAre(1, 2, 3, -2, -1, 0, 1, 2)); -} - -TEST(AddRangeTest, FullNegativeRange64) { - std::vector dst; - const auto min = std::numeric_limits::min(); - const auto max = std::numeric_limits::max(); - AddRange(&dst, min, max, 1024); - EXPECT_THAT( - dst, testing::ElementsAreArray(std::vector{ - min, -1152921504606846976LL, -1125899906842624LL, - -1099511627776LL, -1073741824LL, -1048576LL, -1024LL, -1LL, 0LL, - 1LL, 1024LL, 1048576LL, 1073741824LL, 1099511627776LL, - 1125899906842624LL, 1152921504606846976LL, max})); -} - -TEST(AddRangeTest, Simple8) { - std::vector dst; - AddRange(&dst, 1, 8, 2); - EXPECT_THAT(dst, testing::ElementsAre(1, 2, 4, 8)); -} - -TEST(AddCustomContext, Simple) { - EXPECT_THAT(global_context, nullptr); - - AddCustomContext("foo", "bar"); - AddCustomContext("baz", "qux"); - - EXPECT_THAT(*global_context, - testing::UnorderedElementsAre(testing::Pair("foo", "bar"), - testing::Pair("baz", "qux"))); - - delete global_context; - global_context = nullptr; -} - -TEST(AddCustomContext, DuplicateKey) { - EXPECT_THAT(global_context, nullptr); - - AddCustomContext("foo", "bar"); - AddCustomContext("foo", "qux"); - - EXPECT_THAT(*global_context, - testing::UnorderedElementsAre(testing::Pair("foo", "bar"))); - - delete global_context; - global_context = nullptr; -} - -} // namespace -} // namespace internal -} // namespace benchmark diff --git a/libcxx/utils/google-benchmark/test/benchmark_name_gtest.cc b/libcxx/utils/google-benchmark/test/benchmark_name_gtest.cc deleted file mode 100644 index afb401c1f532..000000000000 --- a/libcxx/utils/google-benchmark/test/benchmark_name_gtest.cc +++ /dev/null @@ -1,74 +0,0 @@ -#include "benchmark/benchmark.h" -#include "gtest/gtest.h" - -namespace { - -using namespace benchmark; -using namespace benchmark::internal; - -TEST(BenchmarkNameTest, Empty) { - const auto name = BenchmarkName(); - EXPECT_EQ(name.str(), std::string()); -} - -TEST(BenchmarkNameTest, FunctionName) { - auto name = BenchmarkName(); - name.function_name = "function_name"; - EXPECT_EQ(name.str(), "function_name"); -} - -TEST(BenchmarkNameTest, FunctionNameAndArgs) { - auto name = BenchmarkName(); - name.function_name = "function_name"; - name.args = "some_args:3/4/5"; - EXPECT_EQ(name.str(), "function_name/some_args:3/4/5"); -} - -TEST(BenchmarkNameTest, MinTime) { - auto name = BenchmarkName(); - name.function_name = "function_name"; - name.args = "some_args:3/4"; - name.min_time = "min_time:3.4s"; - EXPECT_EQ(name.str(), "function_name/some_args:3/4/min_time:3.4s"); -} - -TEST(BenchmarkNameTest, Iterations) { - auto name = BenchmarkName(); - name.function_name = "function_name"; - name.min_time = "min_time:3.4s"; - name.iterations = "iterations:42"; - EXPECT_EQ(name.str(), "function_name/min_time:3.4s/iterations:42"); -} - -TEST(BenchmarkNameTest, Repetitions) { - auto name = BenchmarkName(); - name.function_name = "function_name"; - name.min_time = "min_time:3.4s"; - name.repetitions = "repetitions:24"; - EXPECT_EQ(name.str(), "function_name/min_time:3.4s/repetitions:24"); -} - -TEST(BenchmarkNameTest, TimeType) { - auto name = BenchmarkName(); - name.function_name = "function_name"; - name.min_time = "min_time:3.4s"; - name.time_type = "hammer_time"; - EXPECT_EQ(name.str(), "function_name/min_time:3.4s/hammer_time"); -} - -TEST(BenchmarkNameTest, Threads) { - auto name = BenchmarkName(); - name.function_name = "function_name"; - name.min_time = "min_time:3.4s"; - name.threads = "threads:256"; - EXPECT_EQ(name.str(), "function_name/min_time:3.4s/threads:256"); -} - -TEST(BenchmarkNameTest, TestEmptyFunctionName) { - auto name = BenchmarkName(); - name.args = "first:3/second:4"; - name.threads = "threads:22"; - EXPECT_EQ(name.str(), "first:3/second:4/threads:22"); -} - -} // end namespace diff --git a/libcxx/utils/google-benchmark/test/benchmark_random_interleaving_gtest.cc b/libcxx/utils/google-benchmark/test/benchmark_random_interleaving_gtest.cc deleted file mode 100644 index 8e28dab3f41d..000000000000 --- a/libcxx/utils/google-benchmark/test/benchmark_random_interleaving_gtest.cc +++ /dev/null @@ -1,126 +0,0 @@ -#include -#include -#include - -#include "../src/commandlineflags.h" -#include "../src/string_util.h" -#include "benchmark/benchmark.h" -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -DECLARE_bool(benchmark_enable_random_interleaving); -DECLARE_string(benchmark_filter); -DECLARE_int32(benchmark_repetitions); - -namespace benchmark { -namespace internal { -namespace { - -class EventQueue : public std::queue { - public: - void Put(const std::string& event) { push(event); } - - void Clear() { - while (!empty()) { - pop(); - } - } - - std::string Get() { - std::string event = front(); - pop(); - return event; - } -}; - -static EventQueue* queue = new EventQueue; - -class NullReporter : public BenchmarkReporter { - public: - bool ReportContext(const Context& /*context*/) override { return true; } - void ReportRuns(const std::vector& /* report */) override {} -}; - -class BenchmarkTest : public testing::Test { - public: - static void SetupHook(int /* num_threads */) { queue->push("Setup"); } - - static void TeardownHook(int /* num_threads */) { queue->push("Teardown"); } - - void Execute(const std::string& pattern) { - queue->Clear(); - - BenchmarkReporter* reporter = new NullReporter; - FLAGS_benchmark_filter = pattern; - RunSpecifiedBenchmarks(reporter); - delete reporter; - - queue->Put("DONE"); // End marker - } -}; - -static void BM_Match1(benchmark::State& state) { - const int64_t arg = state.range(0); - - for (auto _ : state) { - } - queue->Put(StrFormat("BM_Match1/%d", static_cast(arg))); -} -BENCHMARK(BM_Match1) - ->Iterations(100) - ->Arg(1) - ->Arg(2) - ->Arg(3) - ->Range(10, 80) - ->Args({90}) - ->Args({100}); - -TEST_F(BenchmarkTest, Match1) { - Execute("BM_Match1"); - ASSERT_EQ("BM_Match1/1", queue->Get()); - ASSERT_EQ("BM_Match1/2", queue->Get()); - ASSERT_EQ("BM_Match1/3", queue->Get()); - ASSERT_EQ("BM_Match1/10", queue->Get()); - ASSERT_EQ("BM_Match1/64", queue->Get()); - ASSERT_EQ("BM_Match1/80", queue->Get()); - ASSERT_EQ("BM_Match1/90", queue->Get()); - ASSERT_EQ("BM_Match1/100", queue->Get()); - ASSERT_EQ("DONE", queue->Get()); -} - -TEST_F(BenchmarkTest, Match1WithRepetition) { - FLAGS_benchmark_repetitions = 2; - - Execute("BM_Match1/(64|80)"); - ASSERT_EQ("BM_Match1/64", queue->Get()); - ASSERT_EQ("BM_Match1/64", queue->Get()); - ASSERT_EQ("BM_Match1/80", queue->Get()); - ASSERT_EQ("BM_Match1/80", queue->Get()); - ASSERT_EQ("DONE", queue->Get()); -} - -TEST_F(BenchmarkTest, Match1WithRandomInterleaving) { - FLAGS_benchmark_enable_random_interleaving = true; - FLAGS_benchmark_repetitions = 100; - - std::map element_count; - std::map interleaving_count; - Execute("BM_Match1/(64|80)"); - for (int i = 0; i < 100; ++i) { - std::vector interleaving; - interleaving.push_back(queue->Get()); - interleaving.push_back(queue->Get()); - element_count[interleaving[0].c_str()]++; - element_count[interleaving[1].c_str()]++; - interleaving_count[StrFormat("%s,%s", interleaving[0].c_str(), - interleaving[1].c_str())]++; - } - EXPECT_EQ(element_count["BM_Match1/64"], 100) << "Unexpected repetitions."; - EXPECT_EQ(element_count["BM_Match1/80"], 100) << "Unexpected repetitions."; - EXPECT_GE(interleaving_count.size(), 2) << "Interleaving was not randomized."; - ASSERT_EQ("DONE", queue->Get()); -} - -} // namespace -} // namespace internal -} // namespace benchmark diff --git a/libcxx/utils/google-benchmark/test/benchmark_test.cc b/libcxx/utils/google-benchmark/test/benchmark_test.cc deleted file mode 100644 index 3cd4f5565fa1..000000000000 --- a/libcxx/utils/google-benchmark/test/benchmark_test.cc +++ /dev/null @@ -1,245 +0,0 @@ -#include "benchmark/benchmark.h" - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if defined(__GNUC__) -#define BENCHMARK_NOINLINE __attribute__((noinline)) -#else -#define BENCHMARK_NOINLINE -#endif - -namespace { - -int BENCHMARK_NOINLINE Factorial(uint32_t n) { - return (n == 1) ? 1 : n * Factorial(n - 1); -} - -double CalculatePi(int depth) { - double pi = 0.0; - for (int i = 0; i < depth; ++i) { - double numerator = static_cast(((i % 2) * 2) - 1); - double denominator = static_cast((2 * i) - 1); - pi += numerator / denominator; - } - return (pi - 1.0) * 4; -} - -std::set ConstructRandomSet(int64_t size) { - std::set s; - for (int i = 0; i < size; ++i) s.insert(s.end(), i); - return s; -} - -std::mutex test_vector_mu; -std::vector* test_vector = nullptr; - -} // end namespace - -static void BM_Factorial(benchmark::State& state) { - int fac_42 = 0; - for (auto _ : state) fac_42 = Factorial(8); - // Prevent compiler optimizations - std::stringstream ss; - ss << fac_42; - state.SetLabel(ss.str()); -} -BENCHMARK(BM_Factorial); -BENCHMARK(BM_Factorial)->UseRealTime(); - -static void BM_CalculatePiRange(benchmark::State& state) { - double pi = 0.0; - for (auto _ : state) pi = CalculatePi(static_cast(state.range(0))); - std::stringstream ss; - ss << pi; - state.SetLabel(ss.str()); -} -BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024); - -static void BM_CalculatePi(benchmark::State& state) { - static const int depth = 1024; - for (auto _ : state) { - benchmark::DoNotOptimize(CalculatePi(static_cast(depth))); - } -} -BENCHMARK(BM_CalculatePi)->Threads(8); -BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32); -BENCHMARK(BM_CalculatePi)->ThreadPerCpu(); - -static void BM_SetInsert(benchmark::State& state) { - std::set data; - for (auto _ : state) { - state.PauseTiming(); - data = ConstructRandomSet(state.range(0)); - state.ResumeTiming(); - for (int j = 0; j < state.range(1); ++j) data.insert(rand()); - } - state.SetItemsProcessed(state.iterations() * state.range(1)); - state.SetBytesProcessed(state.iterations() * state.range(1) * sizeof(int)); -} - -// Test many inserts at once to reduce the total iterations needed. Otherwise, the slower, -// non-timed part of each iteration will make the benchmark take forever. -BENCHMARK(BM_SetInsert)->Ranges({{1 << 10, 8 << 10}, {128, 512}}); - -template -static void BM_Sequential(benchmark::State& state) { - ValueType v = 42; - for (auto _ : state) { - Container c; - for (int64_t i = state.range(0); --i;) c.push_back(v); - } - const int64_t items_processed = state.iterations() * state.range(0); - state.SetItemsProcessed(items_processed); - state.SetBytesProcessed(items_processed * sizeof(v)); -} -BENCHMARK_TEMPLATE2(BM_Sequential, std::vector, int) - ->Range(1 << 0, 1 << 10); -BENCHMARK_TEMPLATE(BM_Sequential, std::list)->Range(1 << 0, 1 << 10); -// Test the variadic version of BENCHMARK_TEMPLATE in C++11 and beyond. -#ifdef BENCHMARK_HAS_CXX11 -BENCHMARK_TEMPLATE(BM_Sequential, std::vector, int)->Arg(512); -#endif - -static void BM_StringCompare(benchmark::State& state) { - size_t len = static_cast(state.range(0)); - std::string s1(len, '-'); - std::string s2(len, '-'); - for (auto _ : state) benchmark::DoNotOptimize(s1.compare(s2)); -} -BENCHMARK(BM_StringCompare)->Range(1, 1 << 20); - -static void BM_SetupTeardown(benchmark::State& state) { - if (state.thread_index == 0) { - // No need to lock test_vector_mu here as this is running single-threaded. - test_vector = new std::vector(); - } - int i = 0; - for (auto _ : state) { - std::lock_guard l(test_vector_mu); - if (i % 2 == 0) - test_vector->push_back(i); - else - test_vector->pop_back(); - ++i; - } - if (state.thread_index == 0) { - delete test_vector; - } -} -BENCHMARK(BM_SetupTeardown)->ThreadPerCpu(); - -static void BM_LongTest(benchmark::State& state) { - double tracker = 0.0; - for (auto _ : state) { - for (int i = 0; i < state.range(0); ++i) - benchmark::DoNotOptimize(tracker += i); - } -} -BENCHMARK(BM_LongTest)->Range(1 << 16, 1 << 28); - -static void BM_ParallelMemset(benchmark::State& state) { - int64_t size = state.range(0) / static_cast(sizeof(int)); - int thread_size = static_cast(size) / state.threads; - int from = thread_size * state.thread_index; - int to = from + thread_size; - - if (state.thread_index == 0) { - test_vector = new std::vector(static_cast(size)); - } - - for (auto _ : state) { - for (int i = from; i < to; i++) { - // No need to lock test_vector_mu as ranges - // do not overlap between threads. - benchmark::DoNotOptimize(test_vector->at(i) = 1); - } - } - - if (state.thread_index == 0) { - delete test_vector; - } -} -BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4); - -static void BM_ManualTiming(benchmark::State& state) { - int64_t slept_for = 0; - int64_t microseconds = state.range(0); - std::chrono::duration sleep_duration{ - static_cast(microseconds)}; - - for (auto _ : state) { - auto start = std::chrono::high_resolution_clock::now(); - // Simulate some useful workload with a sleep - std::this_thread::sleep_for( - std::chrono::duration_cast(sleep_duration)); - auto end = std::chrono::high_resolution_clock::now(); - - auto elapsed = - std::chrono::duration_cast>(end - start); - - state.SetIterationTime(elapsed.count()); - slept_for += microseconds; - } - state.SetItemsProcessed(slept_for); -} -BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseRealTime(); -BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseManualTime(); - -#ifdef BENCHMARK_HAS_CXX11 - -template -void BM_with_args(benchmark::State& state, Args&&...) { - for (auto _ : state) { - } -} -BENCHMARK_CAPTURE(BM_with_args, int_test, 42, 43, 44); -BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test, std::string("abc"), - std::pair(42, 3.8)); - -void BM_non_template_args(benchmark::State& state, int, double) { - while(state.KeepRunning()) {} -} -BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0); - -#endif // BENCHMARK_HAS_CXX11 - -static void BM_DenseThreadRanges(benchmark::State& st) { - switch (st.range(0)) { - case 1: - assert(st.threads == 1 || st.threads == 2 || st.threads == 3); - break; - case 2: - assert(st.threads == 1 || st.threads == 3 || st.threads == 4); - break; - case 3: - assert(st.threads == 5 || st.threads == 8 || st.threads == 11 || - st.threads == 14); - break; - default: - assert(false && "Invalid test case number"); - } - while (st.KeepRunning()) { - } -} -BENCHMARK(BM_DenseThreadRanges)->Arg(1)->DenseThreadRange(1, 3); -BENCHMARK(BM_DenseThreadRanges)->Arg(2)->DenseThreadRange(1, 4, 2); -BENCHMARK(BM_DenseThreadRanges)->Arg(3)->DenseThreadRange(5, 14, 3); - -BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/test/clobber_memory_assembly_test.cc b/libcxx/utils/google-benchmark/test/clobber_memory_assembly_test.cc deleted file mode 100644 index f41911a39ce7..000000000000 --- a/libcxx/utils/google-benchmark/test/clobber_memory_assembly_test.cc +++ /dev/null @@ -1,64 +0,0 @@ -#include - -#ifdef __clang__ -#pragma clang diagnostic ignored "-Wreturn-type" -#endif - -extern "C" { - -extern int ExternInt; -extern int ExternInt2; -extern int ExternInt3; - -} - -// CHECK-LABEL: test_basic: -extern "C" void test_basic() { - int x; - benchmark::DoNotOptimize(&x); - x = 101; - benchmark::ClobberMemory(); - // CHECK: leaq [[DEST:[^,]+]], %rax - // CHECK: movl $101, [[DEST]] - // CHECK: ret -} - -// CHECK-LABEL: test_redundant_store: -extern "C" void test_redundant_store() { - ExternInt = 3; - benchmark::ClobberMemory(); - ExternInt = 51; - // CHECK-DAG: ExternInt - // CHECK-DAG: movl $3 - // CHECK: movl $51 -} - -// CHECK-LABEL: test_redundant_read: -extern "C" void test_redundant_read() { - int x; - benchmark::DoNotOptimize(&x); - x = ExternInt; - benchmark::ClobberMemory(); - x = ExternInt2; - // CHECK: leaq [[DEST:[^,]+]], %rax - // CHECK: ExternInt(%rip) - // CHECK: movl %eax, [[DEST]] - // CHECK-NOT: ExternInt2 - // CHECK: ret -} - -// CHECK-LABEL: test_redundant_read2: -extern "C" void test_redundant_read2() { - int x; - benchmark::DoNotOptimize(&x); - x = ExternInt; - benchmark::ClobberMemory(); - x = ExternInt2; - benchmark::ClobberMemory(); - // CHECK: leaq [[DEST:[^,]+]], %rax - // CHECK: ExternInt(%rip) - // CHECK: movl %eax, [[DEST]] - // CHECK: ExternInt2(%rip) - // CHECK: movl %eax, [[DEST]] - // CHECK: ret -} diff --git a/libcxx/utils/google-benchmark/test/commandlineflags_gtest.cc b/libcxx/utils/google-benchmark/test/commandlineflags_gtest.cc deleted file mode 100644 index 8412008ffe35..000000000000 --- a/libcxx/utils/google-benchmark/test/commandlineflags_gtest.cc +++ /dev/null @@ -1,228 +0,0 @@ -#include - -#include "../src/commandlineflags.h" -#include "../src/internal_macros.h" -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -namespace benchmark { -namespace { - -#if defined(BENCHMARK_OS_WINDOWS) -int setenv(const char* name, const char* value, int overwrite) { - if (!overwrite) { - // NOTE: getenv_s is far superior but not available under mingw. - char* env_value = getenv(name); - if (env_value == nullptr) { - return -1; - } - } - return _putenv_s(name, value); -} - -int unsetenv(const char* name) { return _putenv_s(name, ""); } - -#endif // BENCHMARK_OS_WINDOWS - -TEST(BoolFromEnv, Default) { - ASSERT_EQ(unsetenv("NOT_IN_ENV"), 0); - EXPECT_EQ(BoolFromEnv("not_in_env", true), true); -} - -TEST(BoolFromEnv, False) { - ASSERT_EQ(setenv("IN_ENV", "0", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", true), false); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "N", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", true), false); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "n", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", true), false); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "NO", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", true), false); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "No", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", true), false); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "no", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", true), false); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "F", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", true), false); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "f", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", true), false); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "FALSE", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", true), false); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "False", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", true), false); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "false", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", true), false); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "OFF", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", true), false); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "Off", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", true), false); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "off", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", true), false); - unsetenv("IN_ENV"); -} - -TEST(BoolFromEnv, True) { - ASSERT_EQ(setenv("IN_ENV", "1", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", false), true); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "Y", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", false), true); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "y", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", false), true); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "YES", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", false), true); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "Yes", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", false), true); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "yes", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", false), true); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "T", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", false), true); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "t", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", false), true); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "TRUE", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", false), true); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "True", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", false), true); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "true", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", false), true); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "ON", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", false), true); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "On", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", false), true); - unsetenv("IN_ENV"); - - ASSERT_EQ(setenv("IN_ENV", "on", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", false), true); - unsetenv("IN_ENV"); - -#ifndef BENCHMARK_OS_WINDOWS - ASSERT_EQ(setenv("IN_ENV", "", 1), 0); - EXPECT_EQ(BoolFromEnv("in_env", false), true); - unsetenv("IN_ENV"); -#endif -} - -TEST(Int32FromEnv, NotInEnv) { - ASSERT_EQ(unsetenv("NOT_IN_ENV"), 0); - EXPECT_EQ(Int32FromEnv("not_in_env", 42), 42); -} - -TEST(Int32FromEnv, InvalidInteger) { - ASSERT_EQ(setenv("IN_ENV", "foo", 1), 0); - EXPECT_EQ(Int32FromEnv("in_env", 42), 42); - unsetenv("IN_ENV"); -} - -TEST(Int32FromEnv, ValidInteger) { - ASSERT_EQ(setenv("IN_ENV", "42", 1), 0); - EXPECT_EQ(Int32FromEnv("in_env", 64), 42); - unsetenv("IN_ENV"); -} - -TEST(DoubleFromEnv, NotInEnv) { - ASSERT_EQ(unsetenv("NOT_IN_ENV"), 0); - EXPECT_EQ(DoubleFromEnv("not_in_env", 0.51), 0.51); -} - -TEST(DoubleFromEnv, InvalidReal) { - ASSERT_EQ(setenv("IN_ENV", "foo", 1), 0); - EXPECT_EQ(DoubleFromEnv("in_env", 0.51), 0.51); - unsetenv("IN_ENV"); -} - -TEST(DoubleFromEnv, ValidReal) { - ASSERT_EQ(setenv("IN_ENV", "0.51", 1), 0); - EXPECT_EQ(DoubleFromEnv("in_env", 0.71), 0.51); - unsetenv("IN_ENV"); -} - -TEST(StringFromEnv, Default) { - ASSERT_EQ(unsetenv("NOT_IN_ENV"), 0); - EXPECT_STREQ(StringFromEnv("not_in_env", "foo"), "foo"); -} - -TEST(StringFromEnv, Valid) { - ASSERT_EQ(setenv("IN_ENV", "foo", 1), 0); - EXPECT_STREQ(StringFromEnv("in_env", "bar"), "foo"); - unsetenv("IN_ENV"); -} - -TEST(KvPairsFromEnv, Default) { - ASSERT_EQ(unsetenv("NOT_IN_ENV"), 0); - EXPECT_THAT(KvPairsFromEnv("not_in_env", {{"foo", "bar"}}), - testing::ElementsAre(testing::Pair("foo", "bar"))); -} - -TEST(KvPairsFromEnv, MalformedReturnsDefault) { - ASSERT_EQ(setenv("IN_ENV", "foo", 1), 0); - EXPECT_THAT(KvPairsFromEnv("in_env", {{"foo", "bar"}}), - testing::ElementsAre(testing::Pair("foo", "bar"))); - unsetenv("IN_ENV"); -} - -TEST(KvPairsFromEnv, Single) { - ASSERT_EQ(setenv("IN_ENV", "foo=bar", 1), 0); - EXPECT_THAT(KvPairsFromEnv("in_env", {}), - testing::ElementsAre(testing::Pair("foo", "bar"))); - unsetenv("IN_ENV"); -} - -TEST(KvPairsFromEnv, Multiple) { - ASSERT_EQ(setenv("IN_ENV", "foo=bar,baz=qux", 1), 0); - EXPECT_THAT(KvPairsFromEnv("in_env", {}), - testing::UnorderedElementsAre(testing::Pair("foo", "bar"), - testing::Pair("baz", "qux"))); - unsetenv("IN_ENV"); -} - -} // namespace -} // namespace benchmark diff --git a/libcxx/utils/google-benchmark/test/complexity_test.cc b/libcxx/utils/google-benchmark/test/complexity_test.cc deleted file mode 100644 index 0de73c5722b5..000000000000 --- a/libcxx/utils/google-benchmark/test/complexity_test.cc +++ /dev/null @@ -1,222 +0,0 @@ -#undef NDEBUG -#include -#include -#include -#include -#include -#include "benchmark/benchmark.h" -#include "output_test.h" - -namespace { - -#define ADD_COMPLEXITY_CASES(...) \ - int CONCAT(dummy, __LINE__) = AddComplexityTest(__VA_ARGS__) - -int AddComplexityTest(std::string test_name, std::string big_o_test_name, - std::string rms_test_name, std::string big_o, - int family_index) { - SetSubstitutions({{"%name", test_name}, - {"%bigo_name", big_o_test_name}, - {"%rms_name", rms_test_name}, - {"%bigo_str", "[ ]* %float " + big_o}, - {"%bigo", big_o}, - {"%rms", "[ ]*[0-9]+ %"}}); - AddCases( - TC_ConsoleOut, - {{"^%bigo_name %bigo_str %bigo_str[ ]*$"}, - {"^%bigo_name", MR_Not}, // Assert we we didn't only matched a name. - {"^%rms_name %rms %rms[ ]*$", MR_Next}}); - AddCases( - TC_JSONOut, - {{"\"name\": \"%bigo_name\",$"}, - {"\"family_index\": " + std::to_string(family_index) + ",$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"%name\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": %int,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"BigO\",$", MR_Next}, - {"\"cpu_coefficient\": %float,$", MR_Next}, - {"\"real_coefficient\": %float,$", MR_Next}, - {"\"big_o\": \"%bigo\",$", MR_Next}, - {"\"time_unit\": \"ns\"$", MR_Next}, - {"}", MR_Next}, - {"\"name\": \"%rms_name\",$"}, - {"\"family_index\": " + std::to_string(family_index) + ",$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"%name\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": %int,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"RMS\",$", MR_Next}, - {"\"rms\": %float$", MR_Next}, - {"}", MR_Next}}); - AddCases(TC_CSVOut, {{"^\"%bigo_name\",,%float,%float,%bigo,,,,,$"}, - {"^\"%bigo_name\"", MR_Not}, - {"^\"%rms_name\",,%float,%float,,,,,,$", MR_Next}}); - return 0; -} - -} // end namespace - -// ========================================================================= // -// --------------------------- Testing BigO O(1) --------------------------- // -// ========================================================================= // - -void BM_Complexity_O1(benchmark::State& state) { - for (auto _ : state) { - for (int i = 0; i < 1024; ++i) { - benchmark::DoNotOptimize(&i); - } - } - state.SetComplexityN(state.range(0)); -} -BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity(benchmark::o1); -BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity(); -BENCHMARK(BM_Complexity_O1) - ->Range(1, 1 << 18) - ->Complexity([](benchmark::IterationCount) { return 1.0; }); - -const char *one_test_name = "BM_Complexity_O1"; -const char *big_o_1_test_name = "BM_Complexity_O1_BigO"; -const char *rms_o_1_test_name = "BM_Complexity_O1_RMS"; -const char *enum_big_o_1 = "\\([0-9]+\\)"; -// FIXME: Tolerate both '(1)' and 'lgN' as output when the complexity is auto -// deduced. -// See https://github.com/google/benchmark/issues/272 -const char *auto_big_o_1 = "(\\([0-9]+\\))|(lgN)"; -const char *lambda_big_o_1 = "f\\(N\\)"; - -// Add enum tests -ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name, - enum_big_o_1, /*family_index=*/0); - -// Add auto enum tests -ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name, - auto_big_o_1, /*family_index=*/1); - -// Add lambda tests -ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name, - lambda_big_o_1, /*family_index=*/2); - -// ========================================================================= // -// --------------------------- Testing BigO O(N) --------------------------- // -// ========================================================================= // - -std::vector ConstructRandomVector(int64_t size) { - std::vector v; - v.reserve(static_cast(size)); - for (int i = 0; i < size; ++i) { - v.push_back(static_cast(std::rand() % size)); - } - return v; -} - -void BM_Complexity_O_N(benchmark::State& state) { - auto v = ConstructRandomVector(state.range(0)); - // Test worst case scenario (item not in vector) - const int64_t item_not_in_vector = state.range(0) * 2; - for (auto _ : state) { - benchmark::DoNotOptimize(std::find(v.begin(), v.end(), item_not_in_vector)); - } - state.SetComplexityN(state.range(0)); -} -BENCHMARK(BM_Complexity_O_N) - ->RangeMultiplier(2) - ->Range(1 << 10, 1 << 16) - ->Complexity(benchmark::oN); -BENCHMARK(BM_Complexity_O_N) - ->RangeMultiplier(2) - ->Range(1 << 10, 1 << 16) - ->Complexity([](benchmark::IterationCount n) -> double { - return static_cast(n); - }); -BENCHMARK(BM_Complexity_O_N) - ->RangeMultiplier(2) - ->Range(1 << 10, 1 << 16) - ->Complexity(); - -const char *n_test_name = "BM_Complexity_O_N"; -const char *big_o_n_test_name = "BM_Complexity_O_N_BigO"; -const char *rms_o_n_test_name = "BM_Complexity_O_N_RMS"; -const char *enum_auto_big_o_n = "N"; -const char *lambda_big_o_n = "f\\(N\\)"; - -// Add enum tests -ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name, - enum_auto_big_o_n, /*family_index=*/3); - -// Add lambda tests -ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name, - lambda_big_o_n, /*family_index=*/4); - -// ========================================================================= // -// ------------------------- Testing BigO O(N*lgN) ------------------------- // -// ========================================================================= // - -static void BM_Complexity_O_N_log_N(benchmark::State& state) { - auto v = ConstructRandomVector(state.range(0)); - for (auto _ : state) { - std::sort(v.begin(), v.end()); - } - state.SetComplexityN(state.range(0)); -} -static const double kLog2E = 1.44269504088896340736; -BENCHMARK(BM_Complexity_O_N_log_N) - ->RangeMultiplier(2) - ->Range(1 << 10, 1 << 16) - ->Complexity(benchmark::oNLogN); -BENCHMARK(BM_Complexity_O_N_log_N) - ->RangeMultiplier(2) - ->Range(1 << 10, 1 << 16) - ->Complexity([](benchmark::IterationCount n) { - return kLog2E * n * log(static_cast(n)); - }); -BENCHMARK(BM_Complexity_O_N_log_N) - ->RangeMultiplier(2) - ->Range(1 << 10, 1 << 16) - ->Complexity(); - -const char *n_lg_n_test_name = "BM_Complexity_O_N_log_N"; -const char *big_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_BigO"; -const char *rms_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_RMS"; -const char *enum_auto_big_o_n_lg_n = "NlgN"; -const char *lambda_big_o_n_lg_n = "f\\(N\\)"; - -// Add enum tests -ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name, - rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n, - /*family_index=*/6); - -// Add lambda tests -ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name, - rms_o_n_lg_n_test_name, lambda_big_o_n_lg_n, - /*family_index=*/7); - -// ========================================================================= // -// -------- Testing formatting of Complexity with captured args ------------ // -// ========================================================================= // - -void BM_ComplexityCaptureArgs(benchmark::State& state, int n) { - for (auto _ : state) { - // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); - } - state.SetComplexityN(n); -} - -BENCHMARK_CAPTURE(BM_ComplexityCaptureArgs, capture_test, 100) - ->Complexity(benchmark::oN) - ->Ranges({{1, 2}, {3, 4}}); - -const std::string complexity_capture_name = - "BM_ComplexityCaptureArgs/capture_test"; - -ADD_COMPLEXITY_CASES(complexity_capture_name, complexity_capture_name + "_BigO", - complexity_capture_name + "_RMS", "N", /*family_index=*/9); - -// ========================================================================= // -// --------------------------- TEST CASES END ------------------------------ // -// ========================================================================= // - -int main(int argc, char *argv[]) { RunOutputTests(argc, argv); } diff --git a/libcxx/utils/google-benchmark/test/cxx03_test.cc b/libcxx/utils/google-benchmark/test/cxx03_test.cc deleted file mode 100644 index c4c9a52273e3..000000000000 --- a/libcxx/utils/google-benchmark/test/cxx03_test.cc +++ /dev/null @@ -1,63 +0,0 @@ -#undef NDEBUG -#include -#include - -#include "benchmark/benchmark.h" - -#if __cplusplus >= 201103L -#error C++11 or greater detected. Should be C++03. -#endif - -#ifdef BENCHMARK_HAS_CXX11 -#error C++11 or greater detected by the library. BENCHMARK_HAS_CXX11 is defined. -#endif - -void BM_empty(benchmark::State& state) { - while (state.KeepRunning()) { - volatile benchmark::IterationCount x = state.iterations(); - ((void)x); - } -} -BENCHMARK(BM_empty); - -// The new C++11 interface for args/ranges requires initializer list support. -// Therefore we provide the old interface to support C++03. -void BM_old_arg_range_interface(benchmark::State& state) { - assert((state.range(0) == 1 && state.range(1) == 2) || - (state.range(0) == 5 && state.range(1) == 6)); - while (state.KeepRunning()) { - } -} -BENCHMARK(BM_old_arg_range_interface)->ArgPair(1, 2)->RangePair(5, 5, 6, 6); - -template -void BM_template2(benchmark::State& state) { - BM_empty(state); -} -BENCHMARK_TEMPLATE2(BM_template2, int, long); - -template -void BM_template1(benchmark::State& state) { - BM_empty(state); -} -BENCHMARK_TEMPLATE(BM_template1, long); -BENCHMARK_TEMPLATE1(BM_template1, int); - -template -struct BM_Fixture : public ::benchmark::Fixture { -}; - -BENCHMARK_TEMPLATE_F(BM_Fixture, BM_template1, long)(benchmark::State& state) { - BM_empty(state); -} -BENCHMARK_TEMPLATE1_F(BM_Fixture, BM_template2, int)(benchmark::State& state) { - BM_empty(state); -} - -void BM_counters(benchmark::State& state) { - BM_empty(state); - state.counters["Foo"] = 2; -} -BENCHMARK(BM_counters); - -BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/test/diagnostics_test.cc b/libcxx/utils/google-benchmark/test/diagnostics_test.cc deleted file mode 100644 index dd64a3365531..000000000000 --- a/libcxx/utils/google-benchmark/test/diagnostics_test.cc +++ /dev/null @@ -1,80 +0,0 @@ -// Testing: -// State::PauseTiming() -// State::ResumeTiming() -// Test that CHECK's within these function diagnose when they are called -// outside of the KeepRunning() loop. -// -// NOTE: Users should NOT include or use src/check.h. This is only done in -// order to test library internals. - -#include -#include - -#include "../src/check.h" -#include "benchmark/benchmark.h" - -#if defined(__GNUC__) && !defined(__EXCEPTIONS) -#define TEST_HAS_NO_EXCEPTIONS -#endif - -void TestHandler() { -#ifndef TEST_HAS_NO_EXCEPTIONS - throw std::logic_error(""); -#else - std::abort(); -#endif -} - -void try_invalid_pause_resume(benchmark::State& state) { -#if !defined(TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS) && !defined(TEST_HAS_NO_EXCEPTIONS) - try { - state.PauseTiming(); - std::abort(); - } catch (std::logic_error const&) { - } - try { - state.ResumeTiming(); - std::abort(); - } catch (std::logic_error const&) { - } -#else - (void)state; // avoid unused warning -#endif -} - -void BM_diagnostic_test(benchmark::State& state) { - static bool called_once = false; - - if (called_once == false) try_invalid_pause_resume(state); - - for (auto _ : state) { - benchmark::DoNotOptimize(state.iterations()); - } - - if (called_once == false) try_invalid_pause_resume(state); - - called_once = true; -} -BENCHMARK(BM_diagnostic_test); - - -void BM_diagnostic_test_keep_running(benchmark::State& state) { - static bool called_once = false; - - if (called_once == false) try_invalid_pause_resume(state); - - while(state.KeepRunning()) { - benchmark::DoNotOptimize(state.iterations()); - } - - if (called_once == false) try_invalid_pause_resume(state); - - called_once = true; -} -BENCHMARK(BM_diagnostic_test_keep_running); - -int main(int argc, char* argv[]) { - benchmark::internal::GetAbortHandler() = &TestHandler; - benchmark::Initialize(&argc, argv); - benchmark::RunSpecifiedBenchmarks(); -} diff --git a/libcxx/utils/google-benchmark/test/display_aggregates_only_test.cc b/libcxx/utils/google-benchmark/test/display_aggregates_only_test.cc deleted file mode 100644 index 3c36d3f03c11..000000000000 --- a/libcxx/utils/google-benchmark/test/display_aggregates_only_test.cc +++ /dev/null @@ -1,43 +0,0 @@ - -#undef NDEBUG -#include -#include - -#include "benchmark/benchmark.h" -#include "output_test.h" - -// Ok this test is super ugly. We want to check what happens with the file -// reporter in the presence of DisplayAggregatesOnly(). -// We do not care about console output, the normal tests check that already. - -void BM_SummaryRepeat(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->DisplayAggregatesOnly(); - -int main(int argc, char* argv[]) { - const std::string output = GetFileReporterOutput(argc, argv); - - if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 6 || - SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3\"") != 3 || - SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_mean\"") != 1 || - SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_median\"") != - 1 || - SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"") != - 1) { - std::cout << "Precondition mismatch. Expected to only find 6 " - "occurrences of \"BM_SummaryRepeat/repeats:3\" substring:\n" - "\"name\": \"BM_SummaryRepeat/repeats:3\", " - "\"name\": \"BM_SummaryRepeat/repeats:3\", " - "\"name\": \"BM_SummaryRepeat/repeats:3\", " - "\"name\": \"BM_SummaryRepeat/repeats:3_mean\", " - "\"name\": \"BM_SummaryRepeat/repeats:3_median\", " - "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"\nThe entire " - "output:\n"; - std::cout << output; - return 1; - } - - return 0; -} diff --git a/libcxx/utils/google-benchmark/test/donotoptimize_assembly_test.cc b/libcxx/utils/google-benchmark/test/donotoptimize_assembly_test.cc deleted file mode 100644 index d4b0bab70e77..000000000000 --- a/libcxx/utils/google-benchmark/test/donotoptimize_assembly_test.cc +++ /dev/null @@ -1,163 +0,0 @@ -#include - -#ifdef __clang__ -#pragma clang diagnostic ignored "-Wreturn-type" -#endif - -extern "C" { - -extern int ExternInt; -extern int ExternInt2; -extern int ExternInt3; - -inline int Add42(int x) { return x + 42; } - -struct NotTriviallyCopyable { - NotTriviallyCopyable(); - explicit NotTriviallyCopyable(int x) : value(x) {} - NotTriviallyCopyable(NotTriviallyCopyable const&); - int value; -}; - -struct Large { - int value; - int data[2]; -}; - -} -// CHECK-LABEL: test_with_rvalue: -extern "C" void test_with_rvalue() { - benchmark::DoNotOptimize(Add42(0)); - // CHECK: movl $42, %eax - // CHECK: ret -} - -// CHECK-LABEL: test_with_large_rvalue: -extern "C" void test_with_large_rvalue() { - benchmark::DoNotOptimize(Large{ExternInt, {ExternInt, ExternInt}}); - // CHECK: ExternInt(%rip) - // CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]] - // CHECK: movl %eax, -{{[0-9]+}}(%[[REG]]) - // CHECK: movl %eax, -{{[0-9]+}}(%[[REG]]) - // CHECK: ret -} - -// CHECK-LABEL: test_with_non_trivial_rvalue: -extern "C" void test_with_non_trivial_rvalue() { - benchmark::DoNotOptimize(NotTriviallyCopyable(ExternInt)); - // CHECK: mov{{l|q}} ExternInt(%rip) - // CHECK: ret -} - -// CHECK-LABEL: test_with_lvalue: -extern "C" void test_with_lvalue() { - int x = 101; - benchmark::DoNotOptimize(x); - // CHECK-GNU: movl $101, %eax - // CHECK-CLANG: movl $101, -{{[0-9]+}}(%[[REG:[a-z]+]]) - // CHECK: ret -} - -// CHECK-LABEL: test_with_large_lvalue: -extern "C" void test_with_large_lvalue() { - Large L{ExternInt, {ExternInt, ExternInt}}; - benchmark::DoNotOptimize(L); - // CHECK: ExternInt(%rip) - // CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]]) - // CHECK: movl %eax, -{{[0-9]+}}(%[[REG]]) - // CHECK: movl %eax, -{{[0-9]+}}(%[[REG]]) - // CHECK: ret -} - -// CHECK-LABEL: test_with_non_trivial_lvalue: -extern "C" void test_with_non_trivial_lvalue() { - NotTriviallyCopyable NTC(ExternInt); - benchmark::DoNotOptimize(NTC); - // CHECK: ExternInt(%rip) - // CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]]) - // CHECK: ret -} - -// CHECK-LABEL: test_with_const_lvalue: -extern "C" void test_with_const_lvalue() { - const int x = 123; - benchmark::DoNotOptimize(x); - // CHECK: movl $123, %eax - // CHECK: ret -} - -// CHECK-LABEL: test_with_large_const_lvalue: -extern "C" void test_with_large_const_lvalue() { - const Large L{ExternInt, {ExternInt, ExternInt}}; - benchmark::DoNotOptimize(L); - // CHECK: ExternInt(%rip) - // CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]]) - // CHECK: movl %eax, -{{[0-9]+}}(%[[REG]]) - // CHECK: movl %eax, -{{[0-9]+}}(%[[REG]]) - // CHECK: ret -} - -// CHECK-LABEL: test_with_non_trivial_const_lvalue: -extern "C" void test_with_non_trivial_const_lvalue() { - const NotTriviallyCopyable Obj(ExternInt); - benchmark::DoNotOptimize(Obj); - // CHECK: mov{{q|l}} ExternInt(%rip) - // CHECK: ret -} - -// CHECK-LABEL: test_div_by_two: -extern "C" int test_div_by_two(int input) { - int divisor = 2; - benchmark::DoNotOptimize(divisor); - return input / divisor; - // CHECK: movl $2, [[DEST:.*]] - // CHECK: idivl [[DEST]] - // CHECK: ret -} - -// CHECK-LABEL: test_inc_integer: -extern "C" int test_inc_integer() { - int x = 0; - for (int i=0; i < 5; ++i) - benchmark::DoNotOptimize(++x); - // CHECK: movl $1, [[DEST:.*]] - // CHECK: {{(addl \$1,|incl)}} [[DEST]] - // CHECK: {{(addl \$1,|incl)}} [[DEST]] - // CHECK: {{(addl \$1,|incl)}} [[DEST]] - // CHECK: {{(addl \$1,|incl)}} [[DEST]] - // CHECK-CLANG: movl [[DEST]], %eax - // CHECK: ret - return x; -} - -// CHECK-LABEL: test_pointer_rvalue -extern "C" void test_pointer_rvalue() { - // CHECK: movl $42, [[DEST:.*]] - // CHECK: leaq [[DEST]], %rax - // CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z]+]]) - // CHECK: ret - int x = 42; - benchmark::DoNotOptimize(&x); -} - -// CHECK-LABEL: test_pointer_const_lvalue: -extern "C" void test_pointer_const_lvalue() { - // CHECK: movl $42, [[DEST:.*]] - // CHECK: leaq [[DEST]], %rax - // CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z]+]]) - // CHECK: ret - int x = 42; - int * const xp = &x; - benchmark::DoNotOptimize(xp); -} - -// CHECK-LABEL: test_pointer_lvalue: -extern "C" void test_pointer_lvalue() { - // CHECK: movl $42, [[DEST:.*]] - // CHECK: leaq [[DEST]], %rax - // CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z+]+]]) - // CHECK: ret - int x = 42; - int *xp = &x; - benchmark::DoNotOptimize(xp); -} diff --git a/libcxx/utils/google-benchmark/test/donotoptimize_test.cc b/libcxx/utils/google-benchmark/test/donotoptimize_test.cc deleted file mode 100644 index 2ce92d1c72be..000000000000 --- a/libcxx/utils/google-benchmark/test/donotoptimize_test.cc +++ /dev/null @@ -1,52 +0,0 @@ -#include "benchmark/benchmark.h" - -#include - -namespace { -#if defined(__GNUC__) -std::uint64_t double_up(const std::uint64_t x) __attribute__((const)); -#endif -std::uint64_t double_up(const std::uint64_t x) { return x * 2; } -} - -// Using DoNotOptimize on types like BitRef seem to cause a lot of problems -// with the inline assembly on both GCC and Clang. -struct BitRef { - int index; - unsigned char &byte; - -public: - static BitRef Make() { - static unsigned char arr[2] = {}; - BitRef b(1, arr[0]); - return b; - } -private: - BitRef(int i, unsigned char& b) : index(i), byte(b) {} -}; - -int main(int, char*[]) { - // this test verifies compilation of DoNotOptimize() for some types - - char buffer8[8] = ""; - benchmark::DoNotOptimize(buffer8); - - char buffer20[20] = ""; - benchmark::DoNotOptimize(buffer20); - - char buffer1024[1024] = ""; - benchmark::DoNotOptimize(buffer1024); - benchmark::DoNotOptimize(&buffer1024[0]); - - int x = 123; - benchmark::DoNotOptimize(x); - benchmark::DoNotOptimize(&x); - benchmark::DoNotOptimize(x += 42); - - benchmark::DoNotOptimize(double_up(x)); - - // These tests are to e - benchmark::DoNotOptimize(BitRef::Make()); - BitRef lval = BitRef::Make(); - benchmark::DoNotOptimize(lval); -} diff --git a/libcxx/utils/google-benchmark/test/filter_test.cc b/libcxx/utils/google-benchmark/test/filter_test.cc deleted file mode 100644 index 1c198913b36a..000000000000 --- a/libcxx/utils/google-benchmark/test/filter_test.cc +++ /dev/null @@ -1,118 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "benchmark/benchmark.h" - -namespace { - -class TestReporter : public benchmark::ConsoleReporter { - public: - virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { - return ConsoleReporter::ReportContext(context); - }; - - virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { - ++count_; - max_family_index_ = - std::max(max_family_index_, report[0].family_index); - ConsoleReporter::ReportRuns(report); - }; - - TestReporter() : count_(0), max_family_index_(0) {} - - virtual ~TestReporter() {} - - size_t GetCount() const { return count_; } - - size_t GetMaxFamilyIndex() const { return max_family_index_; } - - private: - mutable size_t count_; - mutable size_t max_family_index_; -}; - -} // end namespace - -static void NoPrefix(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(NoPrefix); - -static void BM_Foo(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_Foo); - -static void BM_Bar(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_Bar); - -static void BM_FooBar(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_FooBar); - -static void BM_FooBa(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_FooBa); - -int main(int argc, char **argv) { - bool list_only = false; - for (int i = 0; i < argc; ++i) - list_only |= std::string(argv[i]).find("--benchmark_list_tests") != - std::string::npos; - - benchmark::Initialize(&argc, argv); - - TestReporter test_reporter; - const size_t returned_count = - benchmark::RunSpecifiedBenchmarks(&test_reporter); - - if (argc == 2) { - // Make sure we ran all of the tests - std::stringstream ss(argv[1]); - size_t expected_return; - ss >> expected_return; - - if (returned_count != expected_return) { - std::cerr << "ERROR: Expected " << expected_return - << " tests to match the filter but returned_count = " - << returned_count << std::endl; - return -1; - } - - const size_t expected_reports = list_only ? 0 : expected_return; - const size_t reports_count = test_reporter.GetCount(); - if (reports_count != expected_reports) { - std::cerr << "ERROR: Expected " << expected_reports - << " tests to be run but reported_count = " << reports_count - << std::endl; - return -1; - } - - const size_t max_family_index = test_reporter.GetMaxFamilyIndex(); - const size_t num_families = reports_count == 0 ? 0 : 1 + max_family_index; - if (num_families != expected_reports) { - std::cerr << "ERROR: Expected " << expected_reports - << " test families to be run but num_families = " - << num_families << std::endl; - return -1; - } - } - - return 0; -} diff --git a/libcxx/utils/google-benchmark/test/fixture_test.cc b/libcxx/utils/google-benchmark/test/fixture_test.cc deleted file mode 100644 index eba0a42d9cb0..000000000000 --- a/libcxx/utils/google-benchmark/test/fixture_test.cc +++ /dev/null @@ -1,51 +0,0 @@ - -#include "benchmark/benchmark.h" - -#include -#include - -#define FIXTURE_BECHMARK_NAME MyFixture - -class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture { - public: - void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE { - if (state.thread_index == 0) { - assert(data.get() == nullptr); - data.reset(new int(42)); - } - } - - void TearDown(const ::benchmark::State& state) BENCHMARK_OVERRIDE { - if (state.thread_index == 0) { - assert(data.get() != nullptr); - data.reset(); - } - } - - ~FIXTURE_BECHMARK_NAME() { assert(data == nullptr); } - - std::unique_ptr data; -}; - -BENCHMARK_F(FIXTURE_BECHMARK_NAME, Foo)(benchmark::State &st) { - assert(data.get() != nullptr); - assert(*data == 42); - for (auto _ : st) { - } -} - -BENCHMARK_DEFINE_F(FIXTURE_BECHMARK_NAME, Bar)(benchmark::State& st) { - if (st.thread_index == 0) { - assert(data.get() != nullptr); - assert(*data == 42); - } - for (auto _ : st) { - assert(data.get() != nullptr); - assert(*data == 42); - } - st.SetItemsProcessed(st.range(0)); -} -BENCHMARK_REGISTER_F(FIXTURE_BECHMARK_NAME, Bar)->Arg(42); -BENCHMARK_REGISTER_F(FIXTURE_BECHMARK_NAME, Bar)->Arg(42)->ThreadPerCpu(); - -BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/test/internal_threading_test.cc b/libcxx/utils/google-benchmark/test/internal_threading_test.cc deleted file mode 100644 index 039d7c14a8c4..000000000000 --- a/libcxx/utils/google-benchmark/test/internal_threading_test.cc +++ /dev/null @@ -1,184 +0,0 @@ - -#undef NDEBUG - -#include -#include -#include "../src/timers.h" -#include "benchmark/benchmark.h" -#include "output_test.h" - -static const std::chrono::duration time_frame(50); -static const double time_frame_in_sec( - std::chrono::duration_cast>>( - time_frame) - .count()); - -void MyBusySpinwait() { - const auto start = benchmark::ChronoClockNow(); - - while (true) { - const auto now = benchmark::ChronoClockNow(); - const auto elapsed = now - start; - - if (std::chrono::duration(elapsed) >= - time_frame) - return; - } -} - -// ========================================================================= // -// --------------------------- TEST CASES BEGIN ---------------------------- // -// ========================================================================= // - -// ========================================================================= // -// BM_MainThread - -void BM_MainThread(benchmark::State& state) { - for (auto _ : state) { - MyBusySpinwait(); - state.SetIterationTime(time_frame_in_sec); - } - state.counters["invtime"] = - benchmark::Counter{1, benchmark::Counter::kIsRate}; -} - -BENCHMARK(BM_MainThread)->Iterations(1)->Threads(1); -BENCHMARK(BM_MainThread)->Iterations(1)->Threads(1)->UseRealTime(); -BENCHMARK(BM_MainThread)->Iterations(1)->Threads(1)->UseManualTime(); -BENCHMARK(BM_MainThread)->Iterations(1)->Threads(1)->MeasureProcessCPUTime(); -BENCHMARK(BM_MainThread) - ->Iterations(1) - ->Threads(1) - ->MeasureProcessCPUTime() - ->UseRealTime(); -BENCHMARK(BM_MainThread) - ->Iterations(1) - ->Threads(1) - ->MeasureProcessCPUTime() - ->UseManualTime(); - -BENCHMARK(BM_MainThread)->Iterations(1)->Threads(2); -BENCHMARK(BM_MainThread)->Iterations(1)->Threads(2)->UseRealTime(); -BENCHMARK(BM_MainThread)->Iterations(1)->Threads(2)->UseManualTime(); -BENCHMARK(BM_MainThread)->Iterations(1)->Threads(2)->MeasureProcessCPUTime(); -BENCHMARK(BM_MainThread) - ->Iterations(1) - ->Threads(2) - ->MeasureProcessCPUTime() - ->UseRealTime(); -BENCHMARK(BM_MainThread) - ->Iterations(1) - ->Threads(2) - ->MeasureProcessCPUTime() - ->UseManualTime(); - -// ========================================================================= // -// BM_WorkerThread - -void BM_WorkerThread(benchmark::State& state) { - for (auto _ : state) { - std::thread Worker(&MyBusySpinwait); - Worker.join(); - state.SetIterationTime(time_frame_in_sec); - } - state.counters["invtime"] = - benchmark::Counter{1, benchmark::Counter::kIsRate}; -} - -BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(1); -BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(1)->UseRealTime(); -BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(1)->UseManualTime(); -BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(1)->MeasureProcessCPUTime(); -BENCHMARK(BM_WorkerThread) - ->Iterations(1) - ->Threads(1) - ->MeasureProcessCPUTime() - ->UseRealTime(); -BENCHMARK(BM_WorkerThread) - ->Iterations(1) - ->Threads(1) - ->MeasureProcessCPUTime() - ->UseManualTime(); - -BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(2); -BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(2)->UseRealTime(); -BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(2)->UseManualTime(); -BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(2)->MeasureProcessCPUTime(); -BENCHMARK(BM_WorkerThread) - ->Iterations(1) - ->Threads(2) - ->MeasureProcessCPUTime() - ->UseRealTime(); -BENCHMARK(BM_WorkerThread) - ->Iterations(1) - ->Threads(2) - ->MeasureProcessCPUTime() - ->UseManualTime(); - -// ========================================================================= // -// BM_MainThreadAndWorkerThread - -void BM_MainThreadAndWorkerThread(benchmark::State& state) { - for (auto _ : state) { - std::thread Worker(&MyBusySpinwait); - MyBusySpinwait(); - Worker.join(); - state.SetIterationTime(time_frame_in_sec); - } - state.counters["invtime"] = - benchmark::Counter{1, benchmark::Counter::kIsRate}; -} - -BENCHMARK(BM_MainThreadAndWorkerThread)->Iterations(1)->Threads(1); -BENCHMARK(BM_MainThreadAndWorkerThread) - ->Iterations(1) - ->Threads(1) - ->UseRealTime(); -BENCHMARK(BM_MainThreadAndWorkerThread) - ->Iterations(1) - ->Threads(1) - ->UseManualTime(); -BENCHMARK(BM_MainThreadAndWorkerThread) - ->Iterations(1) - ->Threads(1) - ->MeasureProcessCPUTime(); -BENCHMARK(BM_MainThreadAndWorkerThread) - ->Iterations(1) - ->Threads(1) - ->MeasureProcessCPUTime() - ->UseRealTime(); -BENCHMARK(BM_MainThreadAndWorkerThread) - ->Iterations(1) - ->Threads(1) - ->MeasureProcessCPUTime() - ->UseManualTime(); - -BENCHMARK(BM_MainThreadAndWorkerThread)->Iterations(1)->Threads(2); -BENCHMARK(BM_MainThreadAndWorkerThread) - ->Iterations(1) - ->Threads(2) - ->UseRealTime(); -BENCHMARK(BM_MainThreadAndWorkerThread) - ->Iterations(1) - ->Threads(2) - ->UseManualTime(); -BENCHMARK(BM_MainThreadAndWorkerThread) - ->Iterations(1) - ->Threads(2) - ->MeasureProcessCPUTime(); -BENCHMARK(BM_MainThreadAndWorkerThread) - ->Iterations(1) - ->Threads(2) - ->MeasureProcessCPUTime() - ->UseRealTime(); -BENCHMARK(BM_MainThreadAndWorkerThread) - ->Iterations(1) - ->Threads(2) - ->MeasureProcessCPUTime() - ->UseManualTime(); - -// ========================================================================= // -// ---------------------------- TEST CASES END ----------------------------- // -// ========================================================================= // - -int main(int argc, char* argv[]) { RunOutputTests(argc, argv); } diff --git a/libcxx/utils/google-benchmark/test/link_main_test.cc b/libcxx/utils/google-benchmark/test/link_main_test.cc deleted file mode 100644 index 241ad5c3905e..000000000000 --- a/libcxx/utils/google-benchmark/test/link_main_test.cc +++ /dev/null @@ -1,8 +0,0 @@ -#include "benchmark/benchmark.h" - -void BM_empty(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(state.iterations()); - } -} -BENCHMARK(BM_empty); diff --git a/libcxx/utils/google-benchmark/test/map_test.cc b/libcxx/utils/google-benchmark/test/map_test.cc deleted file mode 100644 index 86391b36016f..000000000000 --- a/libcxx/utils/google-benchmark/test/map_test.cc +++ /dev/null @@ -1,57 +0,0 @@ -#include "benchmark/benchmark.h" - -#include -#include - -namespace { - -std::map ConstructRandomMap(int size) { - std::map m; - for (int i = 0; i < size; ++i) { - m.insert(std::make_pair(std::rand() % size, std::rand() % size)); - } - return m; -} - -} // namespace - -// Basic version. -static void BM_MapLookup(benchmark::State& state) { - const int size = static_cast(state.range(0)); - std::map m; - for (auto _ : state) { - state.PauseTiming(); - m = ConstructRandomMap(size); - state.ResumeTiming(); - for (int i = 0; i < size; ++i) { - benchmark::DoNotOptimize(m.find(std::rand() % size)); - } - } - state.SetItemsProcessed(state.iterations() * size); -} -BENCHMARK(BM_MapLookup)->Range(1 << 3, 1 << 12); - -// Using fixtures. -class MapFixture : public ::benchmark::Fixture { - public: - void SetUp(const ::benchmark::State& st) BENCHMARK_OVERRIDE { - m = ConstructRandomMap(static_cast(st.range(0))); - } - - void TearDown(const ::benchmark::State&) BENCHMARK_OVERRIDE { m.clear(); } - - std::map m; -}; - -BENCHMARK_DEFINE_F(MapFixture, Lookup)(benchmark::State& state) { - const int size = static_cast(state.range(0)); - for (auto _ : state) { - for (int i = 0; i < size; ++i) { - benchmark::DoNotOptimize(m.find(std::rand() % size)); - } - } - state.SetItemsProcessed(state.iterations() * size); -} -BENCHMARK_REGISTER_F(MapFixture, Lookup)->Range(1 << 3, 1 << 12); - -BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/test/memory_manager_test.cc b/libcxx/utils/google-benchmark/test/memory_manager_test.cc deleted file mode 100644 index f0c192fcbd00..000000000000 --- a/libcxx/utils/google-benchmark/test/memory_manager_test.cc +++ /dev/null @@ -1,46 +0,0 @@ -#include - -#include "../src/check.h" -#include "benchmark/benchmark.h" -#include "output_test.h" - -class TestMemoryManager : public benchmark::MemoryManager { - void Start() BENCHMARK_OVERRIDE {} - void Stop(Result* result) BENCHMARK_OVERRIDE { - result->num_allocs = 42; - result->max_bytes_used = 42000; - } -}; - -void BM_empty(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(state.iterations()); - } -} -BENCHMARK(BM_empty); - -ADD_CASES(TC_ConsoleOut, {{"^BM_empty %console_report$"}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_empty\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_empty\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"allocs_per_iter\": %float,$", MR_Next}, - {"\"max_bytes_used\": 42000$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_empty\",%csv_report$"}}); - -int main(int argc, char* argv[]) { - std::unique_ptr mm(new TestMemoryManager()); - - benchmark::RegisterMemoryManager(mm.get()); - RunOutputTests(argc, argv); - benchmark::RegisterMemoryManager(nullptr); -} diff --git a/libcxx/utils/google-benchmark/test/multiple_ranges_test.cc b/libcxx/utils/google-benchmark/test/multiple_ranges_test.cc deleted file mode 100644 index 6b61f3af47bb..000000000000 --- a/libcxx/utils/google-benchmark/test/multiple_ranges_test.cc +++ /dev/null @@ -1,96 +0,0 @@ -#include "benchmark/benchmark.h" - -#include -#include -#include -#include - -class MultipleRangesFixture : public ::benchmark::Fixture { - public: - MultipleRangesFixture() - : expectedValues({{1, 3, 5}, - {1, 3, 8}, - {1, 3, 15}, - {2, 3, 5}, - {2, 3, 8}, - {2, 3, 15}, - {1, 4, 5}, - {1, 4, 8}, - {1, 4, 15}, - {2, 4, 5}, - {2, 4, 8}, - {2, 4, 15}, - {1, 7, 5}, - {1, 7, 8}, - {1, 7, 15}, - {2, 7, 5}, - {2, 7, 8}, - {2, 7, 15}, - {7, 6, 3}}) {} - - void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE { - std::vector ranges = {state.range(0), state.range(1), - state.range(2)}; - - assert(expectedValues.find(ranges) != expectedValues.end()); - - actualValues.insert(ranges); - } - - // NOTE: This is not TearDown as we want to check after _all_ runs are - // complete. - virtual ~MultipleRangesFixture() { - if (actualValues != expectedValues) { - std::cout << "EXPECTED\n"; - for (auto v : expectedValues) { - std::cout << "{"; - for (int64_t iv : v) { - std::cout << iv << ", "; - } - std::cout << "}\n"; - } - std::cout << "ACTUAL\n"; - for (auto v : actualValues) { - std::cout << "{"; - for (int64_t iv : v) { - std::cout << iv << ", "; - } - std::cout << "}\n"; - } - } - } - - std::set> expectedValues; - std::set> actualValues; -}; - -BENCHMARK_DEFINE_F(MultipleRangesFixture, Empty)(benchmark::State& state) { - for (auto _ : state) { - int64_t product = state.range(0) * state.range(1) * state.range(2); - for (int64_t x = 0; x < product; x++) { - benchmark::DoNotOptimize(x); - } - } -} - -BENCHMARK_REGISTER_F(MultipleRangesFixture, Empty) - ->RangeMultiplier(2) - ->Ranges({{1, 2}, {3, 7}, {5, 15}}) - ->Args({7, 6, 3}); - -void BM_CheckDefaultArgument(benchmark::State& state) { - // Test that the 'range()' without an argument is the same as 'range(0)'. - assert(state.range() == state.range(0)); - assert(state.range() != state.range(1)); - for (auto _ : state) { - } -} -BENCHMARK(BM_CheckDefaultArgument)->Ranges({{1, 5}, {6, 10}}); - -static void BM_MultipleRanges(benchmark::State& st) { - for (auto _ : st) { - } -} -BENCHMARK(BM_MultipleRanges)->Ranges({{5, 5}, {6, 6}}); - -BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/test/options_test.cc b/libcxx/utils/google-benchmark/test/options_test.cc deleted file mode 100644 index 9f9a78667c9e..000000000000 --- a/libcxx/utils/google-benchmark/test/options_test.cc +++ /dev/null @@ -1,76 +0,0 @@ -#include "benchmark/benchmark.h" -#include -#include - -#if defined(NDEBUG) -#undef NDEBUG -#endif -#include - -void BM_basic(benchmark::State& state) { - for (auto _ : state) { - } -} - -void BM_basic_slow(benchmark::State& state) { - std::chrono::milliseconds sleep_duration(state.range(0)); - for (auto _ : state) { - std::this_thread::sleep_for( - std::chrono::duration_cast(sleep_duration)); - } -} - -BENCHMARK(BM_basic); -BENCHMARK(BM_basic)->Arg(42); -BENCHMARK(BM_basic_slow)->Arg(10)->Unit(benchmark::kNanosecond); -BENCHMARK(BM_basic_slow)->Arg(100)->Unit(benchmark::kMicrosecond); -BENCHMARK(BM_basic_slow)->Arg(1000)->Unit(benchmark::kMillisecond); -BENCHMARK(BM_basic_slow)->Arg(1000)->Unit(benchmark::kSecond); -BENCHMARK(BM_basic)->Range(1, 8); -BENCHMARK(BM_basic)->RangeMultiplier(2)->Range(1, 8); -BENCHMARK(BM_basic)->DenseRange(10, 15); -BENCHMARK(BM_basic)->Args({42, 42}); -BENCHMARK(BM_basic)->Ranges({{64, 512}, {64, 512}}); -BENCHMARK(BM_basic)->MinTime(0.7); -BENCHMARK(BM_basic)->UseRealTime(); -BENCHMARK(BM_basic)->ThreadRange(2, 4); -BENCHMARK(BM_basic)->ThreadPerCpu(); -BENCHMARK(BM_basic)->Repetitions(3); -BENCHMARK(BM_basic) - ->RangeMultiplier(std::numeric_limits::max()) - ->Range(std::numeric_limits::min(), - std::numeric_limits::max()); - -// Negative ranges -BENCHMARK(BM_basic)->Range(-64, -1); -BENCHMARK(BM_basic)->RangeMultiplier(4)->Range(-8, 8); -BENCHMARK(BM_basic)->DenseRange(-2, 2, 1); -BENCHMARK(BM_basic)->Ranges({{-64, 1}, {-8, -1}}); - -void CustomArgs(benchmark::internal::Benchmark* b) { - for (int i = 0; i < 10; ++i) { - b->Arg(i); - } -} - -BENCHMARK(BM_basic)->Apply(CustomArgs); - -void BM_explicit_iteration_count(benchmark::State& state) { - // Test that benchmarks specified with an explicit iteration count are - // only run once. - static bool invoked_before = false; - assert(!invoked_before); - invoked_before = true; - - // Test that the requested iteration count is respected. - assert(state.max_iterations == 42); - size_t actual_iterations = 0; - for (auto _ : state) - ++actual_iterations; - assert(state.iterations() == state.max_iterations); - assert(state.iterations() == 42); - -} -BENCHMARK(BM_explicit_iteration_count)->Iterations(42); - -BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/test/output_test.h b/libcxx/utils/google-benchmark/test/output_test.h deleted file mode 100644 index 15368f9b6830..000000000000 --- a/libcxx/utils/google-benchmark/test/output_test.h +++ /dev/null @@ -1,213 +0,0 @@ -#ifndef TEST_OUTPUT_TEST_H -#define TEST_OUTPUT_TEST_H - -#undef NDEBUG -#include -#include -#include -#include -#include -#include -#include - -#include "../src/re.h" -#include "benchmark/benchmark.h" - -#define CONCAT2(x, y) x##y -#define CONCAT(x, y) CONCAT2(x, y) - -#define ADD_CASES(...) int CONCAT(dummy, __LINE__) = ::AddCases(__VA_ARGS__) - -#define SET_SUBSTITUTIONS(...) \ - int CONCAT(dummy, __LINE__) = ::SetSubstitutions(__VA_ARGS__) - -enum MatchRules { - MR_Default, // Skip non-matching lines until a match is found. - MR_Next, // Match must occur on the next line. - MR_Not // No line between the current position and the next match matches - // the regex -}; - -struct TestCase { - TestCase(std::string re, int rule = MR_Default); - - std::string regex_str; - int match_rule; - std::string substituted_regex; - std::shared_ptr regex; -}; - -enum TestCaseID { - TC_ConsoleOut, - TC_ConsoleErr, - TC_JSONOut, - TC_JSONErr, - TC_CSVOut, - TC_CSVErr, - - TC_NumID // PRIVATE -}; - -// Add a list of test cases to be run against the output specified by -// 'ID' -int AddCases(TestCaseID ID, std::initializer_list il); - -// Add or set a list of substitutions to be performed on constructed regex's -// See 'output_test_helper.cc' for a list of default substitutions. -int SetSubstitutions( - std::initializer_list> il); - -// Run all output tests. -void RunOutputTests(int argc, char* argv[]); - -// Count the number of 'pat' substrings in the 'haystack' string. -int SubstrCnt(const std::string& haystack, const std::string& pat); - -// Run registered benchmarks with file reporter enabled, and return the content -// outputted by the file reporter. -std::string GetFileReporterOutput(int argc, char* argv[]); - -// ========================================================================= // -// ------------------------- Results checking ------------------------------ // -// ========================================================================= // - -// Call this macro to register a benchmark for checking its results. This -// should be all that's needed. It subscribes a function to check the (CSV) -// results of a benchmark. This is done only after verifying that the output -// strings are really as expected. -// bm_name_pattern: a name or a regex pattern which will be matched against -// all the benchmark names. Matching benchmarks -// will be the subject of a call to checker_function -// checker_function: should be of type ResultsCheckFn (see below) -#define CHECK_BENCHMARK_RESULTS(bm_name_pattern, checker_function) \ - size_t CONCAT(dummy, __LINE__) = AddChecker(bm_name_pattern, checker_function) - -struct Results; -typedef std::function ResultsCheckFn; - -size_t AddChecker(const char* bm_name_pattern, ResultsCheckFn fn); - -// Class holding the results of a benchmark. -// It is passed in calls to checker functions. -struct Results { - // the benchmark name - std::string name; - // the benchmark fields - std::map values; - - Results(const std::string& n) : name(n) {} - - int NumThreads() const; - - double NumIterations() const; - - typedef enum { kCpuTime, kRealTime } BenchmarkTime; - - // get cpu_time or real_time in seconds - double GetTime(BenchmarkTime which) const; - - // get the real_time duration of the benchmark in seconds. - // it is better to use fuzzy float checks for this, as the float - // ASCII formatting is lossy. - double DurationRealTime() const { - return NumIterations() * GetTime(kRealTime); - } - // get the cpu_time duration of the benchmark in seconds - double DurationCPUTime() const { - return NumIterations() * GetTime(kCpuTime); - } - - // get the string for a result by name, or nullptr if the name - // is not found - const std::string* Get(const char* entry_name) const { - auto it = values.find(entry_name); - if (it == values.end()) return nullptr; - return &it->second; - } - - // get a result by name, parsed as a specific type. - // NOTE: for counters, use GetCounterAs instead. - template - T GetAs(const char* entry_name) const; - - // counters are written as doubles, so they have to be read first - // as a double, and only then converted to the asked type. - template - T GetCounterAs(const char* entry_name) const { - double dval = GetAs(entry_name); - T tval = static_cast(dval); - return tval; - } -}; - -template -T Results::GetAs(const char* entry_name) const { - auto* sv = Get(entry_name); - CHECK(sv != nullptr && !sv->empty()); - std::stringstream ss; - ss << *sv; - T out; - ss >> out; - CHECK(!ss.fail()); - return out; -} - -//---------------------------------- -// Macros to help in result checking. Do not use them with arguments causing -// side-effects. - -// clang-format off - -#define CHECK_RESULT_VALUE_IMPL(entry, getfn, var_type, var_name, relationship, value) \ - CONCAT(CHECK_, relationship) \ - (entry.getfn< var_type >(var_name), (value)) << "\n" \ - << __FILE__ << ":" << __LINE__ << ": " << (entry).name << ":\n" \ - << __FILE__ << ":" << __LINE__ << ": " \ - << "expected (" << #var_type << ")" << (var_name) \ - << "=" << (entry).getfn< var_type >(var_name) \ - << " to be " #relationship " to " << (value) << "\n" - -// check with tolerance. eps_factor is the tolerance window, which is -// interpreted relative to value (eg, 0.1 means 10% of value). -#define CHECK_FLOAT_RESULT_VALUE_IMPL(entry, getfn, var_type, var_name, relationship, value, eps_factor) \ - CONCAT(CHECK_FLOAT_, relationship) \ - (entry.getfn< var_type >(var_name), (value), (eps_factor) * (value)) << "\n" \ - << __FILE__ << ":" << __LINE__ << ": " << (entry).name << ":\n" \ - << __FILE__ << ":" << __LINE__ << ": " \ - << "expected (" << #var_type << ")" << (var_name) \ - << "=" << (entry).getfn< var_type >(var_name) \ - << " to be " #relationship " to " << (value) << "\n" \ - << __FILE__ << ":" << __LINE__ << ": " \ - << "with tolerance of " << (eps_factor) * (value) \ - << " (" << (eps_factor)*100. << "%), " \ - << "but delta was " << ((entry).getfn< var_type >(var_name) - (value)) \ - << " (" << (((entry).getfn< var_type >(var_name) - (value)) \ - / \ - ((value) > 1.e-5 || value < -1.e-5 ? value : 1.e-5)*100.) \ - << "%)" - -#define CHECK_RESULT_VALUE(entry, var_type, var_name, relationship, value) \ - CHECK_RESULT_VALUE_IMPL(entry, GetAs, var_type, var_name, relationship, value) - -#define CHECK_COUNTER_VALUE(entry, var_type, var_name, relationship, value) \ - CHECK_RESULT_VALUE_IMPL(entry, GetCounterAs, var_type, var_name, relationship, value) - -#define CHECK_FLOAT_RESULT_VALUE(entry, var_name, relationship, value, eps_factor) \ - CHECK_FLOAT_RESULT_VALUE_IMPL(entry, GetAs, double, var_name, relationship, value, eps_factor) - -#define CHECK_FLOAT_COUNTER_VALUE(entry, var_name, relationship, value, eps_factor) \ - CHECK_FLOAT_RESULT_VALUE_IMPL(entry, GetCounterAs, double, var_name, relationship, value, eps_factor) - -// clang-format on - -// ========================================================================= // -// --------------------------- Misc Utilities ------------------------------ // -// ========================================================================= // - -namespace { - -const char* const dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"; - -} // end namespace - -#endif // TEST_OUTPUT_TEST_H diff --git a/libcxx/utils/google-benchmark/test/output_test_helper.cc b/libcxx/utils/google-benchmark/test/output_test_helper.cc deleted file mode 100644 index b8ef1205744a..000000000000 --- a/libcxx/utils/google-benchmark/test/output_test_helper.cc +++ /dev/null @@ -1,520 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../src/benchmark_api_internal.h" -#include "../src/check.h" // NOTE: check.h is for internal use only! -#include "../src/re.h" // NOTE: re.h is for internal use only -#include "output_test.h" - -// ========================================================================= // -// ------------------------------ Internals -------------------------------- // -// ========================================================================= // -namespace internal { -namespace { - -using TestCaseList = std::vector; - -// Use a vector because the order elements are added matters during iteration. -// std::map/unordered_map don't guarantee that. -// For example: -// SetSubstitutions({{"%HelloWorld", "Hello"}, {"%Hello", "Hi"}}); -// Substitute("%HelloWorld") // Always expands to Hello. -using SubMap = std::vector>; - -TestCaseList& GetTestCaseList(TestCaseID ID) { - // Uses function-local statics to ensure initialization occurs - // before first use. - static TestCaseList lists[TC_NumID]; - return lists[ID]; -} - -SubMap& GetSubstitutions() { - // Don't use 'dec_re' from header because it may not yet be initialized. - // clang-format off - static std::string safe_dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"; - static std::string time_re = "([0-9]+[.])?[0-9]+"; - static SubMap map = { - {"%float", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"}, - // human-readable float - {"%hrfloat", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?[kMGTPEZYmunpfazy]?"}, - {"%int", "[ ]*[0-9]+"}, - {" %s ", "[ ]+"}, - {"%time", "[ ]*" + time_re + "[ ]+ns"}, - {"%console_report", "[ ]*" + time_re + "[ ]+ns [ ]*" + time_re + "[ ]+ns [ ]*[0-9]+"}, - {"%console_us_report", "[ ]*" + time_re + "[ ]+us [ ]*" + time_re + "[ ]+us [ ]*[0-9]+"}, - {"%console_ms_report", "[ ]*" + time_re + "[ ]+ms [ ]*" + time_re + "[ ]+ms [ ]*[0-9]+"}, - {"%console_s_report", "[ ]*" + time_re + "[ ]+s [ ]*" + time_re + "[ ]+s [ ]*[0-9]+"}, - {"%console_time_only_report", "[ ]*" + time_re + "[ ]+ns [ ]*" + time_re + "[ ]+ns"}, - {"%console_us_report", "[ ]*" + time_re + "[ ]+us [ ]*" + time_re + "[ ]+us [ ]*[0-9]+"}, - {"%console_us_time_only_report", "[ ]*" + time_re + "[ ]+us [ ]*" + time_re + "[ ]+us"}, - {"%csv_header", - "name,iterations,real_time,cpu_time,time_unit,bytes_per_second," - "items_per_second,label,error_occurred,error_message"}, - {"%csv_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns,,,,,"}, - {"%csv_us_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",us,,,,,"}, - {"%csv_ms_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ms,,,,,"}, - {"%csv_s_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",s,,,,,"}, - {"%csv_bytes_report", - "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns," + safe_dec_re + ",,,,"}, - {"%csv_items_report", - "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns,," + safe_dec_re + ",,,"}, - {"%csv_bytes_items_report", - "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns," + safe_dec_re + - "," + safe_dec_re + ",,,"}, - {"%csv_label_report_begin", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns,,,"}, - {"%csv_label_report_end", ",,"}}; - // clang-format on - return map; -} - -std::string PerformSubstitutions(std::string source) { - SubMap const& subs = GetSubstitutions(); - using SizeT = std::string::size_type; - for (auto const& KV : subs) { - SizeT pos; - SizeT next_start = 0; - while ((pos = source.find(KV.first, next_start)) != std::string::npos) { - next_start = pos + KV.second.size(); - source.replace(pos, KV.first.size(), KV.second); - } - } - return source; -} - -void CheckCase(std::stringstream& remaining_output, TestCase const& TC, - TestCaseList const& not_checks) { - std::string first_line; - bool on_first = true; - std::string line; - while (remaining_output.eof() == false) { - CHECK(remaining_output.good()); - std::getline(remaining_output, line); - if (on_first) { - first_line = line; - on_first = false; - } - for (const auto& NC : not_checks) { - CHECK(!NC.regex->Match(line)) - << "Unexpected match for line \"" << line << "\" for MR_Not regex \"" - << NC.regex_str << "\"" - << "\n actual regex string \"" << TC.substituted_regex << "\"" - << "\n started matching near: " << first_line; - } - if (TC.regex->Match(line)) return; - CHECK(TC.match_rule != MR_Next) - << "Expected line \"" << line << "\" to match regex \"" << TC.regex_str - << "\"" - << "\n actual regex string \"" << TC.substituted_regex << "\"" - << "\n started matching near: " << first_line; - } - CHECK(remaining_output.eof() == false) - << "End of output reached before match for regex \"" << TC.regex_str - << "\" was found" - << "\n actual regex string \"" << TC.substituted_regex << "\"" - << "\n started matching near: " << first_line; -} - -void CheckCases(TestCaseList const& checks, std::stringstream& output) { - std::vector not_checks; - for (size_t i = 0; i < checks.size(); ++i) { - const auto& TC = checks[i]; - if (TC.match_rule == MR_Not) { - not_checks.push_back(TC); - continue; - } - CheckCase(output, TC, not_checks); - not_checks.clear(); - } -} - -class TestReporter : public benchmark::BenchmarkReporter { - public: - TestReporter(std::vector reps) - : reporters_(reps) {} - - virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { - bool last_ret = false; - bool first = true; - for (auto rep : reporters_) { - bool new_ret = rep->ReportContext(context); - CHECK(first || new_ret == last_ret) - << "Reports return different values for ReportContext"; - first = false; - last_ret = new_ret; - } - (void)first; - return last_ret; - } - - void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { - for (auto rep : reporters_) rep->ReportRuns(report); - } - void Finalize() BENCHMARK_OVERRIDE { - for (auto rep : reporters_) rep->Finalize(); - } - - private: - std::vector reporters_; -}; -} // namespace - -} // end namespace internal - -// ========================================================================= // -// -------------------------- Results checking ----------------------------- // -// ========================================================================= // - -namespace internal { - -// Utility class to manage subscribers for checking benchmark results. -// It works by parsing the CSV output to read the results. -class ResultsChecker { - public: - struct PatternAndFn : public TestCase { // reusing TestCase for its regexes - PatternAndFn(const std::string& rx, ResultsCheckFn fn_) - : TestCase(rx), fn(fn_) {} - ResultsCheckFn fn; - }; - - std::vector check_patterns; - std::vector results; - std::vector field_names; - - void Add(const std::string& entry_pattern, ResultsCheckFn fn); - - void CheckResults(std::stringstream& output); - - private: - void SetHeader_(const std::string& csv_header); - void SetValues_(const std::string& entry_csv_line); - - std::vector SplitCsv_(const std::string& line); -}; - -// store the static ResultsChecker in a function to prevent initialization -// order problems -ResultsChecker& GetResultsChecker() { - static ResultsChecker rc; - return rc; -} - -// add a results checker for a benchmark -void ResultsChecker::Add(const std::string& entry_pattern, ResultsCheckFn fn) { - check_patterns.emplace_back(entry_pattern, fn); -} - -// check the results of all subscribed benchmarks -void ResultsChecker::CheckResults(std::stringstream& output) { - // first reset the stream to the start - { - auto start = std::stringstream::pos_type(0); - // clear before calling tellg() - output.clear(); - // seek to zero only when needed - if (output.tellg() > start) output.seekg(start); - // and just in case - output.clear(); - } - // now go over every line and publish it to the ResultsChecker - std::string line; - bool on_first = true; - while (output.eof() == false) { - CHECK(output.good()); - std::getline(output, line); - if (on_first) { - SetHeader_(line); // this is important - on_first = false; - continue; - } - SetValues_(line); - } - // finally we can call the subscribed check functions - for (const auto& p : check_patterns) { - VLOG(2) << "--------------------------------\n"; - VLOG(2) << "checking for benchmarks matching " << p.regex_str << "...\n"; - for (const auto& r : results) { - if (!p.regex->Match(r.name)) { - VLOG(2) << p.regex_str << " is not matched by " << r.name << "\n"; - continue; - } else { - VLOG(2) << p.regex_str << " is matched by " << r.name << "\n"; - } - VLOG(1) << "Checking results of " << r.name << ": ... \n"; - p.fn(r); - VLOG(1) << "Checking results of " << r.name << ": OK.\n"; - } - } -} - -// prepare for the names in this header -void ResultsChecker::SetHeader_(const std::string& csv_header) { - field_names = SplitCsv_(csv_header); -} - -// set the values for a benchmark -void ResultsChecker::SetValues_(const std::string& entry_csv_line) { - if (entry_csv_line.empty()) return; // some lines are empty - CHECK(!field_names.empty()); - auto vals = SplitCsv_(entry_csv_line); - CHECK_EQ(vals.size(), field_names.size()); - results.emplace_back(vals[0]); // vals[0] is the benchmark name - auto& entry = results.back(); - for (size_t i = 1, e = vals.size(); i < e; ++i) { - entry.values[field_names[i]] = vals[i]; - } -} - -// a quick'n'dirty csv splitter (eliminating quotes) -std::vector ResultsChecker::SplitCsv_(const std::string& line) { - std::vector out; - if (line.empty()) return out; - if (!field_names.empty()) out.reserve(field_names.size()); - size_t prev = 0, pos = line.find_first_of(','), curr = pos; - while (pos != line.npos) { - CHECK(curr > 0); - if (line[prev] == '"') ++prev; - if (line[curr - 1] == '"') --curr; - out.push_back(line.substr(prev, curr - prev)); - prev = pos + 1; - pos = line.find_first_of(',', pos + 1); - curr = pos; - } - curr = line.size(); - if (line[prev] == '"') ++prev; - if (line[curr - 1] == '"') --curr; - out.push_back(line.substr(prev, curr - prev)); - return out; -} - -} // end namespace internal - -size_t AddChecker(const char* bm_name, ResultsCheckFn fn) { - auto& rc = internal::GetResultsChecker(); - rc.Add(bm_name, fn); - return rc.results.size(); -} - -int Results::NumThreads() const { - auto pos = name.find("/threads:"); - if (pos == name.npos) return 1; - auto end = name.find('/', pos + 9); - std::stringstream ss; - ss << name.substr(pos + 9, end); - int num = 1; - ss >> num; - CHECK(!ss.fail()); - return num; -} - -double Results::NumIterations() const { - return GetAs("iterations"); -} - -double Results::GetTime(BenchmarkTime which) const { - CHECK(which == kCpuTime || which == kRealTime); - const char* which_str = which == kCpuTime ? "cpu_time" : "real_time"; - double val = GetAs(which_str); - auto unit = Get("time_unit"); - CHECK(unit); - if (*unit == "ns") { - return val * 1.e-9; - } else if (*unit == "us") { - return val * 1.e-6; - } else if (*unit == "ms") { - return val * 1.e-3; - } else if (*unit == "s") { - return val; - } else { - CHECK(1 == 0) << "unknown time unit: " << *unit; - return 0; - } -} - -// ========================================================================= // -// -------------------------- Public API Definitions------------------------ // -// ========================================================================= // - -TestCase::TestCase(std::string re, int rule) - : regex_str(std::move(re)), - match_rule(rule), - substituted_regex(internal::PerformSubstitutions(regex_str)), - regex(std::make_shared()) { - std::string err_str; - regex->Init(substituted_regex, &err_str); - CHECK(err_str.empty()) << "Could not construct regex \"" << substituted_regex - << "\"" - << "\n originally \"" << regex_str << "\"" - << "\n got error: " << err_str; -} - -int AddCases(TestCaseID ID, std::initializer_list il) { - auto& L = internal::GetTestCaseList(ID); - L.insert(L.end(), il); - return 0; -} - -int SetSubstitutions( - std::initializer_list> il) { - auto& subs = internal::GetSubstitutions(); - for (auto KV : il) { - bool exists = false; - KV.second = internal::PerformSubstitutions(KV.second); - for (auto& EKV : subs) { - if (EKV.first == KV.first) { - EKV.second = std::move(KV.second); - exists = true; - break; - } - } - if (!exists) subs.push_back(std::move(KV)); - } - return 0; -} - -// Disable deprecated warnings temporarily because we need to reference -// CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif -void RunOutputTests(int argc, char* argv[]) { - using internal::GetTestCaseList; - benchmark::Initialize(&argc, argv); - auto options = benchmark::internal::GetOutputOptions(/*force_no_color*/ true); - benchmark::ConsoleReporter CR(options); - benchmark::JSONReporter JR; - benchmark::CSVReporter CSVR; - struct ReporterTest { - const char* name; - std::vector& output_cases; - std::vector& error_cases; - benchmark::BenchmarkReporter& reporter; - std::stringstream out_stream; - std::stringstream err_stream; - - ReporterTest(const char* n, std::vector& out_tc, - std::vector& err_tc, - benchmark::BenchmarkReporter& br) - : name(n), output_cases(out_tc), error_cases(err_tc), reporter(br) { - reporter.SetOutputStream(&out_stream); - reporter.SetErrorStream(&err_stream); - } - } TestCases[] = { - {"ConsoleReporter", GetTestCaseList(TC_ConsoleOut), - GetTestCaseList(TC_ConsoleErr), CR}, - {"JSONReporter", GetTestCaseList(TC_JSONOut), GetTestCaseList(TC_JSONErr), - JR}, - {"CSVReporter", GetTestCaseList(TC_CSVOut), GetTestCaseList(TC_CSVErr), - CSVR}, - }; - - // Create the test reporter and run the benchmarks. - std::cout << "Running benchmarks...\n"; - internal::TestReporter test_rep({&CR, &JR, &CSVR}); - benchmark::RunSpecifiedBenchmarks(&test_rep); - - for (auto& rep_test : TestCases) { - std::string msg = std::string("\nTesting ") + rep_test.name + " Output\n"; - std::string banner(msg.size() - 1, '-'); - std::cout << banner << msg << banner << "\n"; - - std::cerr << rep_test.err_stream.str(); - std::cout << rep_test.out_stream.str(); - - internal::CheckCases(rep_test.error_cases, rep_test.err_stream); - internal::CheckCases(rep_test.output_cases, rep_test.out_stream); - - std::cout << "\n"; - } - - // now that we know the output is as expected, we can dispatch - // the checks to subscribees. - auto& csv = TestCases[2]; - // would use == but gcc spits a warning - CHECK(std::strcmp(csv.name, "CSVReporter") == 0); - internal::GetResultsChecker().CheckResults(csv.out_stream); -} - -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif - -int SubstrCnt(const std::string& haystack, const std::string& pat) { - if (pat.length() == 0) return 0; - int count = 0; - for (size_t offset = haystack.find(pat); offset != std::string::npos; - offset = haystack.find(pat, offset + pat.length())) - ++count; - return count; -} - -static char ToHex(int ch) { - return ch < 10 ? static_cast('0' + ch) - : static_cast('a' + (ch - 10)); -} - -static char RandomHexChar() { - static std::mt19937 rd{std::random_device{}()}; - static std::uniform_int_distribution mrand{0, 15}; - return ToHex(mrand(rd)); -} - -static std::string GetRandomFileName() { - std::string model = "test.%%%%%%"; - for (auto & ch : model) { - if (ch == '%') - ch = RandomHexChar(); - } - return model; -} - -static bool FileExists(std::string const& name) { - std::ifstream in(name.c_str()); - return in.good(); -} - -static std::string GetTempFileName() { - // This function attempts to avoid race conditions where two tests - // create the same file at the same time. However, it still introduces races - // similar to tmpnam. - int retries = 3; - while (--retries) { - std::string name = GetRandomFileName(); - if (!FileExists(name)) - return name; - } - std::cerr << "Failed to create unique temporary file name" << std::endl; - std::abort(); -} - -std::string GetFileReporterOutput(int argc, char* argv[]) { - std::vector new_argv(argv, argv + argc); - assert(static_cast(argc) == new_argv.size()); - - std::string tmp_file_name = GetTempFileName(); - std::cout << "Will be using this as the tmp file: " << tmp_file_name << '\n'; - - std::string tmp = "--benchmark_out="; - tmp += tmp_file_name; - new_argv.emplace_back(const_cast(tmp.c_str())); - - argc = int(new_argv.size()); - - benchmark::Initialize(&argc, new_argv.data()); - benchmark::RunSpecifiedBenchmarks(); - - // Read the output back from the file, and delete the file. - std::ifstream tmp_stream(tmp_file_name); - std::string output = std::string((std::istreambuf_iterator(tmp_stream)), - std::istreambuf_iterator()); - std::remove(tmp_file_name.c_str()); - - return output; -} diff --git a/libcxx/utils/google-benchmark/test/perf_counters_gtest.cc b/libcxx/utils/google-benchmark/test/perf_counters_gtest.cc deleted file mode 100644 index 2a2868a71536..000000000000 --- a/libcxx/utils/google-benchmark/test/perf_counters_gtest.cc +++ /dev/null @@ -1,145 +0,0 @@ -#include - -#include "../src/perf_counters.h" -#include "gtest/gtest.h" - -#ifndef GTEST_SKIP -struct MsgHandler { - void operator=(std::ostream&){} -}; -#define GTEST_SKIP() return MsgHandler() = std::cout -#endif - -using benchmark::internal::PerfCounters; -using benchmark::internal::PerfCounterValues; - -namespace { -const char kGenericPerfEvent1[] = "CYCLES"; -const char kGenericPerfEvent2[] = "BRANCHES"; -const char kGenericPerfEvent3[] = "INSTRUCTIONS"; - -TEST(PerfCountersTest, Init) { - EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported); -} - -TEST(PerfCountersTest, OneCounter) { - if (!PerfCounters::kSupported) { - GTEST_SKIP() << "Performance counters not supported.\n"; - } - EXPECT_TRUE(PerfCounters::Initialize()); - EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1}).IsValid()); -} - -TEST(PerfCountersTest, NegativeTest) { - if (!PerfCounters::kSupported) { - EXPECT_FALSE(PerfCounters::Initialize()); - return; - } - EXPECT_TRUE(PerfCounters::Initialize()); - EXPECT_FALSE(PerfCounters::Create({}).IsValid()); - EXPECT_FALSE(PerfCounters::Create({""}).IsValid()); - EXPECT_FALSE(PerfCounters::Create({"not a counter name"}).IsValid()); - { - EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2, - kGenericPerfEvent3}) - .IsValid()); - } - EXPECT_FALSE( - PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1}) - .IsValid()); - EXPECT_FALSE(PerfCounters::Create({kGenericPerfEvent3, "not a counter name", - kGenericPerfEvent1}) - .IsValid()); - { - EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2, - kGenericPerfEvent3}) - .IsValid()); - } - EXPECT_FALSE( - PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2, - kGenericPerfEvent3, "MISPREDICTED_BRANCH_RETIRED"}) - .IsValid()); -} - -TEST(PerfCountersTest, Read1Counter) { - if (!PerfCounters::kSupported) { - GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; - } - EXPECT_TRUE(PerfCounters::Initialize()); - auto counters = PerfCounters::Create({kGenericPerfEvent1}); - EXPECT_TRUE(counters.IsValid()); - PerfCounterValues values1(1); - EXPECT_TRUE(counters.Snapshot(&values1)); - EXPECT_GT(values1[0], 0); - PerfCounterValues values2(1); - EXPECT_TRUE(counters.Snapshot(&values2)); - EXPECT_GT(values2[0], 0); - EXPECT_GT(values2[0], values1[0]); -} - -TEST(PerfCountersTest, Read2Counters) { - if (!PerfCounters::kSupported) { - GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; - } - EXPECT_TRUE(PerfCounters::Initialize()); - auto counters = - PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2}); - EXPECT_TRUE(counters.IsValid()); - PerfCounterValues values1(2); - EXPECT_TRUE(counters.Snapshot(&values1)); - EXPECT_GT(values1[0], 0); - EXPECT_GT(values1[1], 0); - PerfCounterValues values2(2); - EXPECT_TRUE(counters.Snapshot(&values2)); - EXPECT_GT(values2[0], 0); - EXPECT_GT(values2[1], 0); -} - -size_t do_work() { - size_t res = 0; - for (size_t i = 0; i < 100000000; ++i) res += i * i; - return res; -} - -void measure(size_t threadcount, PerfCounterValues* values1, - PerfCounterValues* values2) { - CHECK_NE(values1, nullptr); - CHECK_NE(values2, nullptr); - std::vector threads(threadcount); - auto work = [&]() { CHECK(do_work() > 1000); }; - - // We need to first set up the counters, then start the threads, so the - // threads would inherit the counters. But later, we need to first destroy the - // thread pool (so all the work finishes), then measure the counters. So the - // scopes overlap, and we need to explicitly control the scope of the - // threadpool. - auto counters = - PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent3}); - for (auto& t : threads) t = std::thread(work); - counters.Snapshot(values1); - for (auto& t : threads) t.join(); - counters.Snapshot(values2); -} - -TEST(PerfCountersTest, MultiThreaded) { - if (!PerfCounters::kSupported) { - GTEST_SKIP() << "Test skipped because libpfm is not supported."; - } - EXPECT_TRUE(PerfCounters::Initialize()); - PerfCounterValues values1(2); - PerfCounterValues values2(2); - - measure(2, &values1, &values2); - std::vector D1{static_cast(values2[0] - values1[0]), - static_cast(values2[1] - values1[1])}; - - measure(4, &values1, &values2); - std::vector D2{static_cast(values2[0] - values1[0]), - static_cast(values2[1] - values1[1])}; - - // Some extra work will happen on the main thread - like joining the threads - // - so the ratio won't be quite 2.0, but very close. - EXPECT_GE(D2[0], 1.9 * D1[0]); - EXPECT_GE(D2[1], 1.9 * D1[1]); -} -} // namespace diff --git a/libcxx/utils/google-benchmark/test/perf_counters_test.cc b/libcxx/utils/google-benchmark/test/perf_counters_test.cc deleted file mode 100644 index d6e0284d4d4b..000000000000 --- a/libcxx/utils/google-benchmark/test/perf_counters_test.cc +++ /dev/null @@ -1,27 +0,0 @@ -#undef NDEBUG - -#include "../src/perf_counters.h" - -#include "benchmark/benchmark.h" -#include "output_test.h" - -void BM_Simple(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(state.iterations()); - } -} -BENCHMARK(BM_Simple); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Simple\",$"}}); - -void CheckSimple(Results const& e) { - CHECK_COUNTER_VALUE(e, double, "CYCLES", GT, 0); - CHECK_COUNTER_VALUE(e, double, "BRANCHES", GT, 0.0); -} -CHECK_BENCHMARK_RESULTS("BM_Simple", &CheckSimple); - -int main(int argc, char* argv[]) { - if (!benchmark::internal::PerfCounters::kSupported) { - return 0; - } - RunOutputTests(argc, argv); -} diff --git a/libcxx/utils/google-benchmark/test/register_benchmark_test.cc b/libcxx/utils/google-benchmark/test/register_benchmark_test.cc deleted file mode 100644 index c027eabacae0..000000000000 --- a/libcxx/utils/google-benchmark/test/register_benchmark_test.cc +++ /dev/null @@ -1,184 +0,0 @@ - -#undef NDEBUG -#include -#include - -#include "../src/check.h" // NOTE: check.h is for internal use only! -#include "benchmark/benchmark.h" - -namespace { - -class TestReporter : public benchmark::ConsoleReporter { - public: - virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { - all_runs_.insert(all_runs_.end(), begin(report), end(report)); - ConsoleReporter::ReportRuns(report); - } - - std::vector all_runs_; -}; - -struct TestCase { - std::string name; - const char* label; - // Note: not explicit as we rely on it being converted through ADD_CASES. - TestCase(const char* xname) : TestCase(xname, nullptr) {} - TestCase(const char* xname, const char* xlabel) - : name(xname), label(xlabel) {} - - typedef benchmark::BenchmarkReporter::Run Run; - - void CheckRun(Run const& run) const { - // clang-format off - CHECK(name == run.benchmark_name()) << "expected " << name << " got " - << run.benchmark_name(); - if (label) { - CHECK(run.report_label == label) << "expected " << label << " got " - << run.report_label; - } else { - CHECK(run.report_label == ""); - } - // clang-format on - } -}; - -std::vector ExpectedResults; - -int AddCases(std::initializer_list const& v) { - for (auto N : v) { - ExpectedResults.push_back(N); - } - return 0; -} - -#define CONCAT(x, y) CONCAT2(x, y) -#define CONCAT2(x, y) x##y -#define ADD_CASES(...) int CONCAT(dummy, __LINE__) = AddCases({__VA_ARGS__}) - -} // end namespace - -typedef benchmark::internal::Benchmark* ReturnVal; - -//----------------------------------------------------------------------------// -// Test RegisterBenchmark with no additional arguments -//----------------------------------------------------------------------------// -void BM_function(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_function); -ReturnVal dummy = benchmark::RegisterBenchmark( - "BM_function_manual_registration", BM_function); -ADD_CASES({"BM_function"}, {"BM_function_manual_registration"}); - -//----------------------------------------------------------------------------// -// Test RegisterBenchmark with additional arguments -// Note: GCC <= 4.8 do not support this form of RegisterBenchmark because they -// reject the variadic pack expansion of lambda captures. -//----------------------------------------------------------------------------// -#ifndef BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK - -void BM_extra_args(benchmark::State& st, const char* label) { - for (auto _ : st) { - } - st.SetLabel(label); -} -int RegisterFromFunction() { - std::pair cases[] = { - {"test1", "One"}, {"test2", "Two"}, {"test3", "Three"}}; - for (auto const& c : cases) - benchmark::RegisterBenchmark(c.first, &BM_extra_args, c.second); - return 0; -} -int dummy2 = RegisterFromFunction(); -ADD_CASES({"test1", "One"}, {"test2", "Two"}, {"test3", "Three"}); - -#endif // BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK - -//----------------------------------------------------------------------------// -// Test RegisterBenchmark with different callable types -//----------------------------------------------------------------------------// - -struct CustomFixture { - void operator()(benchmark::State& st) { - for (auto _ : st) { - } - } -}; - -void TestRegistrationAtRuntime() { -#ifdef BENCHMARK_HAS_CXX11 - { - CustomFixture fx; - benchmark::RegisterBenchmark("custom_fixture", fx); - AddCases({"custom_fixture"}); - } -#endif -#ifndef BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK - { - const char* x = "42"; - auto capturing_lam = [=](benchmark::State& st) { - for (auto _ : st) { - } - st.SetLabel(x); - }; - benchmark::RegisterBenchmark("lambda_benchmark", capturing_lam); - AddCases({{"lambda_benchmark", x}}); - } -#endif -} - -// Test that all benchmarks, registered at either during static init or runtime, -// are run and the results are passed to the reported. -void RunTestOne() { - TestRegistrationAtRuntime(); - - TestReporter test_reporter; - benchmark::RunSpecifiedBenchmarks(&test_reporter); - - typedef benchmark::BenchmarkReporter::Run Run; - auto EB = ExpectedResults.begin(); - - for (Run const& run : test_reporter.all_runs_) { - assert(EB != ExpectedResults.end()); - EB->CheckRun(run); - ++EB; - } - assert(EB == ExpectedResults.end()); -} - -// Test that ClearRegisteredBenchmarks() clears all previously registered -// benchmarks. -// Also test that new benchmarks can be registered and ran afterwards. -void RunTestTwo() { - assert(ExpectedResults.size() != 0 && - "must have at least one registered benchmark"); - ExpectedResults.clear(); - benchmark::ClearRegisteredBenchmarks(); - - TestReporter test_reporter; - size_t num_ran = benchmark::RunSpecifiedBenchmarks(&test_reporter); - assert(num_ran == 0); - assert(test_reporter.all_runs_.begin() == test_reporter.all_runs_.end()); - - TestRegistrationAtRuntime(); - num_ran = benchmark::RunSpecifiedBenchmarks(&test_reporter); - assert(num_ran == ExpectedResults.size()); - - typedef benchmark::BenchmarkReporter::Run Run; - auto EB = ExpectedResults.begin(); - - for (Run const& run : test_reporter.all_runs_) { - assert(EB != ExpectedResults.end()); - EB->CheckRun(run); - ++EB; - } - assert(EB == ExpectedResults.end()); -} - -int main(int argc, char* argv[]) { - benchmark::Initialize(&argc, argv); - - RunTestOne(); - RunTestTwo(); -} diff --git a/libcxx/utils/google-benchmark/test/repetitions_test.cc b/libcxx/utils/google-benchmark/test/repetitions_test.cc deleted file mode 100644 index f93de502a35a..000000000000 --- a/libcxx/utils/google-benchmark/test/repetitions_test.cc +++ /dev/null @@ -1,208 +0,0 @@ - -#include "benchmark/benchmark.h" -#include "output_test.h" - -// ========================================================================= // -// ------------------------ Testing Basic Output --------------------------- // -// ========================================================================= // - -void BM_ExplicitRepetitions(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_ExplicitRepetitions)->Repetitions(2); - -ADD_CASES(TC_ConsoleOut, - {{"^BM_ExplicitRepetitions/repeats:2 %console_report$"}}); -ADD_CASES(TC_ConsoleOut, - {{"^BM_ExplicitRepetitions/repeats:2 %console_report$"}}); -ADD_CASES(TC_ConsoleOut, - {{"^BM_ExplicitRepetitions/repeats:2_mean %console_report$"}}); -ADD_CASES(TC_ConsoleOut, - {{"^BM_ExplicitRepetitions/repeats:2_median %console_report$"}}); -ADD_CASES(TC_ConsoleOut, - {{"^BM_ExplicitRepetitions/repeats:2_stddev %console_report$"}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_ExplicitRepetitions/repeats:2\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\"$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_ExplicitRepetitions/repeats:2\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"repetition_index\": 1,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\"$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_ExplicitRepetitions/repeats:2_mean\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"mean\",$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\"$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_ExplicitRepetitions/repeats:2_median\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"median\",$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\"$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_ExplicitRepetitions/repeats:2_stddev\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"stddev\",$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\"$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_ExplicitRepetitions/repeats:2\",%csv_report$"}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_ExplicitRepetitions/repeats:2\",%csv_report$"}}); -ADD_CASES(TC_CSVOut, - {{"^\"BM_ExplicitRepetitions/repeats:2_mean\",%csv_report$"}}); -ADD_CASES(TC_CSVOut, - {{"^\"BM_ExplicitRepetitions/repeats:2_median\",%csv_report$"}}); -ADD_CASES(TC_CSVOut, - {{"^\"BM_ExplicitRepetitions/repeats:2_stddev\",%csv_report$"}}); - -// ========================================================================= // -// ------------------------ Testing Basic Output --------------------------- // -// ========================================================================= // - -void BM_ImplicitRepetitions(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_ImplicitRepetitions); - -ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions %console_report$"}}); -ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions %console_report$"}}); -ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions %console_report$"}}); -ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions_mean %console_report$"}}); -ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions_median %console_report$"}}); -ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions_stddev %console_report$"}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions\",$"}, - {"\"family_index\": 1,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\"$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions\",$"}, - {"\"family_index\": 1,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"repetition_index\": 1,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\"$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions\",$"}, - {"\"family_index\": 1,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"repetition_index\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\"$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_mean\",$"}, - {"\"family_index\": 1,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"mean\",$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\"$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_median\",$"}, - {"\"family_index\": 1,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"median\",$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\"$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_stddev\",$"}, - {"\"family_index\": 1,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"stddev\",$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\"$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions\",%csv_report$"}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions\",%csv_report$"}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions_mean\",%csv_report$"}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions_median\",%csv_report$"}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions_stddev\",%csv_report$"}}); - -// ========================================================================= // -// --------------------------- TEST CASES END ------------------------------ // -// ========================================================================= // - -int main(int argc, char* argv[]) { RunOutputTests(argc, argv); } diff --git a/libcxx/utils/google-benchmark/test/report_aggregates_only_test.cc b/libcxx/utils/google-benchmark/test/report_aggregates_only_test.cc deleted file mode 100644 index 9646b9be534d..000000000000 --- a/libcxx/utils/google-benchmark/test/report_aggregates_only_test.cc +++ /dev/null @@ -1,39 +0,0 @@ - -#undef NDEBUG -#include -#include - -#include "benchmark/benchmark.h" -#include "output_test.h" - -// Ok this test is super ugly. We want to check what happens with the file -// reporter in the presence of ReportAggregatesOnly(). -// We do not care about console output, the normal tests check that already. - -void BM_SummaryRepeat(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->ReportAggregatesOnly(); - -int main(int argc, char* argv[]) { - const std::string output = GetFileReporterOutput(argc, argv); - - if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 3 || - SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_mean\"") != 1 || - SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_median\"") != - 1 || - SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"") != - 1) { - std::cout << "Precondition mismatch. Expected to only find three " - "occurrences of \"BM_SummaryRepeat/repeats:3\" substring:\n" - "\"name\": \"BM_SummaryRepeat/repeats:3_mean\", " - "\"name\": \"BM_SummaryRepeat/repeats:3_median\", " - "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"\nThe entire " - "output:\n"; - std::cout << output; - return 1; - } - - return 0; -} diff --git a/libcxx/utils/google-benchmark/test/reporter_output_test.cc b/libcxx/utils/google-benchmark/test/reporter_output_test.cc deleted file mode 100644 index 989eb48ecc81..000000000000 --- a/libcxx/utils/google-benchmark/test/reporter_output_test.cc +++ /dev/null @@ -1,956 +0,0 @@ - -#undef NDEBUG -#include - -#include "benchmark/benchmark.h" -#include "output_test.h" - -// ========================================================================= // -// ---------------------- Testing Prologue Output -------------------------- // -// ========================================================================= // - -ADD_CASES(TC_ConsoleOut, {{"^[-]+$", MR_Next}, - {"^Benchmark %s Time %s CPU %s Iterations$", MR_Next}, - {"^[-]+$", MR_Next}}); -static int AddContextCases() { - AddCases(TC_ConsoleErr, - { - {"^%int-%int-%intT%int:%int:%int[-+]%int:%int$", MR_Default}, - {"Running .*/reporter_output_test(\\.exe)?$", MR_Next}, - {"Run on \\(%int X %float MHz CPU s?\\)", MR_Next}, - }); - AddCases(TC_JSONOut, - {{"^\\{", MR_Default}, - {"\"context\":", MR_Next}, - {"\"date\": \"", MR_Next}, - {"\"host_name\":", MR_Next}, - {"\"executable\": \".*(/|\\\\)reporter_output_test(\\.exe)?\",", - MR_Next}, - {"\"num_cpus\": %int,$", MR_Next}, - {"\"mhz_per_cpu\": %float,$", MR_Next}, - {"\"caches\": \\[$", MR_Default}}); - auto const& Info = benchmark::CPUInfo::Get(); - auto const& Caches = Info.caches; - if (!Caches.empty()) { - AddCases(TC_ConsoleErr, {{"CPU Caches:$", MR_Next}}); - } - for (size_t I = 0; I < Caches.size(); ++I) { - std::string num_caches_str = - Caches[I].num_sharing != 0 ? " \\(x%int\\)$" : "$"; - AddCases(TC_ConsoleErr, - {{"L%int (Data|Instruction|Unified) %int KiB" + num_caches_str, - MR_Next}}); - AddCases(TC_JSONOut, {{"\\{$", MR_Next}, - {"\"type\": \"", MR_Next}, - {"\"level\": %int,$", MR_Next}, - {"\"size\": %int,$", MR_Next}, - {"\"num_sharing\": %int$", MR_Next}, - {"}[,]{0,1}$", MR_Next}}); - } - AddCases(TC_JSONOut, {{"],$"}}); - auto const& LoadAvg = Info.load_avg; - if (!LoadAvg.empty()) { - AddCases(TC_ConsoleErr, - {{"Load Average: (%float, ){0,2}%float$", MR_Next}}); - } - AddCases(TC_JSONOut, {{"\"load_avg\": \\[(%float,?){0,3}],$", MR_Next}}); - return 0; -} -int dummy_register = AddContextCases(); -ADD_CASES(TC_CSVOut, {{"%csv_header"}}); - -// ========================================================================= // -// ------------------------ Testing Basic Output --------------------------- // -// ========================================================================= // - -void BM_basic(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_basic); - -ADD_CASES(TC_ConsoleOut, {{"^BM_basic %console_report$"}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_basic\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_basic\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\"$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_basic\",%csv_report$"}}); - -// ========================================================================= // -// ------------------------ Testing Bytes per Second Output ---------------- // -// ========================================================================= // - -void BM_bytes_per_second(benchmark::State& state) { - for (auto _ : state) { - // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); - } - state.SetBytesProcessed(1); -} -BENCHMARK(BM_bytes_per_second); - -ADD_CASES(TC_ConsoleOut, {{"^BM_bytes_per_second %console_report " - "bytes_per_second=%float[kM]{0,1}/s$"}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_bytes_per_second\",$"}, - {"\"family_index\": 1,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_bytes_per_second\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"bytes_per_second\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_bytes_per_second\",%csv_bytes_report$"}}); - -// ========================================================================= // -// ------------------------ Testing Items per Second Output ---------------- // -// ========================================================================= // - -void BM_items_per_second(benchmark::State& state) { - for (auto _ : state) { - // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); - } - state.SetItemsProcessed(1); -} -BENCHMARK(BM_items_per_second); - -ADD_CASES(TC_ConsoleOut, {{"^BM_items_per_second %console_report " - "items_per_second=%float[kM]{0,1}/s$"}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_items_per_second\",$"}, - {"\"family_index\": 2,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_items_per_second\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"items_per_second\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_items_per_second\",%csv_items_report$"}}); - -// ========================================================================= // -// ------------------------ Testing Label Output --------------------------- // -// ========================================================================= // - -void BM_label(benchmark::State& state) { - for (auto _ : state) { - } - state.SetLabel("some label"); -} -BENCHMARK(BM_label); - -ADD_CASES(TC_ConsoleOut, {{"^BM_label %console_report some label$"}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_label\",$"}, - {"\"family_index\": 3,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_label\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"label\": \"some label\"$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_label\",%csv_label_report_begin\"some " - "label\"%csv_label_report_end$"}}); - -// ========================================================================= // -// ------------------------ Testing Time Label Output ---------------------- // -// ========================================================================= // - -void BM_time_label_nanosecond(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_time_label_nanosecond)->Unit(benchmark::kNanosecond); - -ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_nanosecond %console_report$"}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_time_label_nanosecond\",$"}, - {"\"family_index\": 4,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_time_label_nanosecond\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\"$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_time_label_nanosecond\",%csv_report$"}}); - -void BM_time_label_microsecond(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_time_label_microsecond)->Unit(benchmark::kMicrosecond); - -ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_microsecond %console_us_report$"}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_time_label_microsecond\",$"}, - {"\"family_index\": 5,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_time_label_microsecond\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"us\"$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_time_label_microsecond\",%csv_us_report$"}}); - -void BM_time_label_millisecond(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_time_label_millisecond)->Unit(benchmark::kMillisecond); - -ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_millisecond %console_ms_report$"}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_time_label_millisecond\",$"}, - {"\"family_index\": 6,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_time_label_millisecond\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ms\"$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_time_label_millisecond\",%csv_ms_report$"}}); - -void BM_time_label_second(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_time_label_second)->Unit(benchmark::kSecond); - -ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_second %console_s_report$"}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_time_label_second\",$"}, - {"\"family_index\": 7,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_time_label_second\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"s\"$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_time_label_second\",%csv_s_report$"}}); - -// ========================================================================= // -// ------------------------ Testing Error Output --------------------------- // -// ========================================================================= // - -void BM_error(benchmark::State& state) { - state.SkipWithError("message"); - for (auto _ : state) { - } -} -BENCHMARK(BM_error); -ADD_CASES(TC_ConsoleOut, {{"^BM_error[ ]+ERROR OCCURRED: 'message'$"}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_error\",$"}, - {"\"family_index\": 8,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_error\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"error_occurred\": true,$", MR_Next}, - {"\"error_message\": \"message\",$", MR_Next}}); - -ADD_CASES(TC_CSVOut, {{"^\"BM_error\",,,,,,,,true,\"message\"$"}}); - -// ========================================================================= // -// ------------------------ Testing No Arg Name Output ----------------------- -// // -// ========================================================================= // - -void BM_no_arg_name(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_no_arg_name)->Arg(3); -ADD_CASES(TC_ConsoleOut, {{"^BM_no_arg_name/3 %console_report$"}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_no_arg_name/3\",$"}, - {"\"family_index\": 9,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_no_arg_name/3\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_no_arg_name/3\",%csv_report$"}}); - -// ========================================================================= // -// ------------------------ Testing Arg Name Output ----------------------- // -// ========================================================================= // - -void BM_arg_name(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_arg_name)->ArgName("first")->Arg(3); -ADD_CASES(TC_ConsoleOut, {{"^BM_arg_name/first:3 %console_report$"}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_arg_name/first:3\",$"}, - {"\"family_index\": 10,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_arg_name/first:3\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_arg_name/first:3\",%csv_report$"}}); - -// ========================================================================= // -// ------------------------ Testing Arg Names Output ----------------------- // -// ========================================================================= // - -void BM_arg_names(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_arg_names)->Args({2, 5, 4})->ArgNames({"first", "", "third"}); -ADD_CASES(TC_ConsoleOut, - {{"^BM_arg_names/first:2/5/third:4 %console_report$"}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_arg_names/first:2/5/third:4\",$"}, - {"\"family_index\": 11,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_arg_names/first:2/5/third:4\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_arg_names/first:2/5/third:4\",%csv_report$"}}); - -// ========================================================================= // -// ------------------------ Testing Name Output ---------------------------- // -// ========================================================================= // - -void BM_name(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_name)->Name("BM_custom_name"); - -ADD_CASES(TC_ConsoleOut, {{"^BM_custom_name %console_report$"}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_custom_name\",$"}, - {"\"family_index\": 12,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_custom_name\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\"$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_custom_name\",%csv_report$"}}); - -// ========================================================================= // -// ------------------------ Testing Big Args Output ------------------------ // -// ========================================================================= // - -void BM_BigArgs(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_BigArgs)->RangeMultiplier(2)->Range(1U << 30U, 1U << 31U); -ADD_CASES(TC_ConsoleOut, {{"^BM_BigArgs/1073741824 %console_report$"}, - {"^BM_BigArgs/2147483648 %console_report$"}}); - -// ========================================================================= // -// ----------------------- Testing Complexity Output ----------------------- // -// ========================================================================= // - -void BM_Complexity_O1(benchmark::State& state) { - for (auto _ : state) { - // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); - } - state.SetComplexityN(state.range(0)); -} -BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity(benchmark::o1); -SET_SUBSTITUTIONS({{"%bigOStr", "[ ]* %float \\([0-9]+\\)"}, - {"%RMS", "[ ]*[0-9]+ %"}}); -ADD_CASES(TC_ConsoleOut, {{"^BM_Complexity_O1_BigO %bigOStr %bigOStr[ ]*$"}, - {"^BM_Complexity_O1_RMS %RMS %RMS[ ]*$"}}); - -// ========================================================================= // -// ----------------------- Testing Aggregate Output ------------------------ // -// ========================================================================= // - -// Test that non-aggregate data is printed by default -void BM_Repeat(benchmark::State& state) { - for (auto _ : state) { - } -} -// need two repetitions min to be able to output any aggregate output -BENCHMARK(BM_Repeat)->Repetitions(2); -ADD_CASES(TC_ConsoleOut, - {{"^BM_Repeat/repeats:2 %console_report$"}, - {"^BM_Repeat/repeats:2 %console_report$"}, - {"^BM_Repeat/repeats:2_mean %console_time_only_report [ ]*2$"}, - {"^BM_Repeat/repeats:2_median %console_time_only_report [ ]*2$"}, - {"^BM_Repeat/repeats:2_stddev %console_time_only_report [ ]*2$"}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:2\",$"}, - {"\"family_index\": 15,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Repeat/repeats:2\"", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"name\": \"BM_Repeat/repeats:2\",$"}, - {"\"family_index\": 15,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"repetition_index\": 1,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"name\": \"BM_Repeat/repeats:2_mean\",$"}, - {"\"family_index\": 15,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"mean\",$", MR_Next}, - {"\"iterations\": 2,$", MR_Next}, - {"\"name\": \"BM_Repeat/repeats:2_median\",$"}, - {"\"family_index\": 15,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"median\",$", MR_Next}, - {"\"iterations\": 2,$", MR_Next}, - {"\"name\": \"BM_Repeat/repeats:2_stddev\",$"}, - {"\"family_index\": 15,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"stddev\",$", MR_Next}, - {"\"iterations\": 2,$", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:2\",%csv_report$"}, - {"^\"BM_Repeat/repeats:2\",%csv_report$"}, - {"^\"BM_Repeat/repeats:2_mean\",%csv_report$"}, - {"^\"BM_Repeat/repeats:2_median\",%csv_report$"}, - {"^\"BM_Repeat/repeats:2_stddev\",%csv_report$"}}); -// but for two repetitions, mean and median is the same, so let's repeat.. -BENCHMARK(BM_Repeat)->Repetitions(3); -ADD_CASES(TC_ConsoleOut, - {{"^BM_Repeat/repeats:3 %console_report$"}, - {"^BM_Repeat/repeats:3 %console_report$"}, - {"^BM_Repeat/repeats:3 %console_report$"}, - {"^BM_Repeat/repeats:3_mean %console_time_only_report [ ]*3$"}, - {"^BM_Repeat/repeats:3_median %console_time_only_report [ ]*3$"}, - {"^BM_Repeat/repeats:3_stddev %console_time_only_report [ ]*3$"}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:3\",$"}, - {"\"family_index\": 16,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"name\": \"BM_Repeat/repeats:3\",$"}, - {"\"family_index\": 16,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"repetition_index\": 1,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"name\": \"BM_Repeat/repeats:3\",$"}, - {"\"family_index\": 16,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"repetition_index\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"name\": \"BM_Repeat/repeats:3_mean\",$"}, - {"\"family_index\": 16,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"mean\",$", MR_Next}, - {"\"iterations\": 3,$", MR_Next}, - {"\"name\": \"BM_Repeat/repeats:3_median\",$"}, - {"\"family_index\": 16,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"median\",$", MR_Next}, - {"\"iterations\": 3,$", MR_Next}, - {"\"name\": \"BM_Repeat/repeats:3_stddev\",$"}, - {"\"family_index\": 16,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"stddev\",$", MR_Next}, - {"\"iterations\": 3,$", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:3\",%csv_report$"}, - {"^\"BM_Repeat/repeats:3\",%csv_report$"}, - {"^\"BM_Repeat/repeats:3\",%csv_report$"}, - {"^\"BM_Repeat/repeats:3_mean\",%csv_report$"}, - {"^\"BM_Repeat/repeats:3_median\",%csv_report$"}, - {"^\"BM_Repeat/repeats:3_stddev\",%csv_report$"}}); -// median differs between even/odd number of repetitions, so just to be sure -BENCHMARK(BM_Repeat)->Repetitions(4); -ADD_CASES(TC_ConsoleOut, - {{"^BM_Repeat/repeats:4 %console_report$"}, - {"^BM_Repeat/repeats:4 %console_report$"}, - {"^BM_Repeat/repeats:4 %console_report$"}, - {"^BM_Repeat/repeats:4 %console_report$"}, - {"^BM_Repeat/repeats:4_mean %console_time_only_report [ ]*4$"}, - {"^BM_Repeat/repeats:4_median %console_time_only_report [ ]*4$"}, - {"^BM_Repeat/repeats:4_stddev %console_time_only_report [ ]*4$"}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:4\",$"}, - {"\"family_index\": 17,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 4,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"name\": \"BM_Repeat/repeats:4\",$"}, - {"\"family_index\": 17,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 4,$", MR_Next}, - {"\"repetition_index\": 1,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"name\": \"BM_Repeat/repeats:4\",$"}, - {"\"family_index\": 17,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 4,$", MR_Next}, - {"\"repetition_index\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"name\": \"BM_Repeat/repeats:4\",$"}, - {"\"family_index\": 17,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 4,$", MR_Next}, - {"\"repetition_index\": 3,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"name\": \"BM_Repeat/repeats:4_mean\",$"}, - {"\"family_index\": 17,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 4,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"mean\",$", MR_Next}, - {"\"iterations\": 4,$", MR_Next}, - {"\"name\": \"BM_Repeat/repeats:4_median\",$"}, - {"\"family_index\": 17,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 4,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"median\",$", MR_Next}, - {"\"iterations\": 4,$", MR_Next}, - {"\"name\": \"BM_Repeat/repeats:4_stddev\",$"}, - {"\"family_index\": 17,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 4,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"stddev\",$", MR_Next}, - {"\"iterations\": 4,$", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:4\",%csv_report$"}, - {"^\"BM_Repeat/repeats:4\",%csv_report$"}, - {"^\"BM_Repeat/repeats:4\",%csv_report$"}, - {"^\"BM_Repeat/repeats:4\",%csv_report$"}, - {"^\"BM_Repeat/repeats:4_mean\",%csv_report$"}, - {"^\"BM_Repeat/repeats:4_median\",%csv_report$"}, - {"^\"BM_Repeat/repeats:4_stddev\",%csv_report$"}}); - -// Test that a non-repeated test still prints non-aggregate results even when -// only-aggregate reports have been requested -void BM_RepeatOnce(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_RepeatOnce)->Repetitions(1)->ReportAggregatesOnly(); -ADD_CASES(TC_ConsoleOut, {{"^BM_RepeatOnce/repeats:1 %console_report$"}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_RepeatOnce/repeats:1\",$"}, - {"\"family_index\": 18,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_RepeatOnce/repeats:1\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_RepeatOnce/repeats:1\",%csv_report$"}}); - -// Test that non-aggregate data is not reported -void BM_SummaryRepeat(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->ReportAggregatesOnly(); -ADD_CASES( - TC_ConsoleOut, - {{".*BM_SummaryRepeat/repeats:3 ", MR_Not}, - {"^BM_SummaryRepeat/repeats:3_mean %console_time_only_report [ ]*3$"}, - {"^BM_SummaryRepeat/repeats:3_median %console_time_only_report [ ]*3$"}, - {"^BM_SummaryRepeat/repeats:3_stddev %console_time_only_report [ ]*3$"}}); -ADD_CASES(TC_JSONOut, - {{".*BM_SummaryRepeat/repeats:3 ", MR_Not}, - {"\"name\": \"BM_SummaryRepeat/repeats:3_mean\",$"}, - {"\"family_index\": 19,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_SummaryRepeat/repeats:3\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"mean\",$", MR_Next}, - {"\"iterations\": 3,$", MR_Next}, - {"\"name\": \"BM_SummaryRepeat/repeats:3_median\",$"}, - {"\"family_index\": 19,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_SummaryRepeat/repeats:3\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"median\",$", MR_Next}, - {"\"iterations\": 3,$", MR_Next}, - {"\"name\": \"BM_SummaryRepeat/repeats:3_stddev\",$"}, - {"\"family_index\": 19,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_SummaryRepeat/repeats:3\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"stddev\",$", MR_Next}, - {"\"iterations\": 3,$", MR_Next}}); -ADD_CASES(TC_CSVOut, {{".*BM_SummaryRepeat/repeats:3 ", MR_Not}, - {"^\"BM_SummaryRepeat/repeats:3_mean\",%csv_report$"}, - {"^\"BM_SummaryRepeat/repeats:3_median\",%csv_report$"}, - {"^\"BM_SummaryRepeat/repeats:3_stddev\",%csv_report$"}}); - -// Test that non-aggregate data is not displayed. -// NOTE: this test is kinda bad. we are only testing the display output. -// But we don't check that the file output still contains everything... -void BM_SummaryDisplay(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_SummaryDisplay)->Repetitions(2)->DisplayAggregatesOnly(); -ADD_CASES( - TC_ConsoleOut, - {{".*BM_SummaryDisplay/repeats:2 ", MR_Not}, - {"^BM_SummaryDisplay/repeats:2_mean %console_time_only_report [ ]*2$"}, - {"^BM_SummaryDisplay/repeats:2_median %console_time_only_report [ ]*2$"}, - {"^BM_SummaryDisplay/repeats:2_stddev %console_time_only_report [ ]*2$"}}); -ADD_CASES(TC_JSONOut, - {{".*BM_SummaryDisplay/repeats:2 ", MR_Not}, - {"\"name\": \"BM_SummaryDisplay/repeats:2_mean\",$"}, - {"\"family_index\": 20,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_SummaryDisplay/repeats:2\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"mean\",$", MR_Next}, - {"\"iterations\": 2,$", MR_Next}, - {"\"name\": \"BM_SummaryDisplay/repeats:2_median\",$"}, - {"\"family_index\": 20,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_SummaryDisplay/repeats:2\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"median\",$", MR_Next}, - {"\"iterations\": 2,$", MR_Next}, - {"\"name\": \"BM_SummaryDisplay/repeats:2_stddev\",$"}, - {"\"family_index\": 20,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_SummaryDisplay/repeats:2\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"stddev\",$", MR_Next}, - {"\"iterations\": 2,$", MR_Next}}); -ADD_CASES(TC_CSVOut, - {{".*BM_SummaryDisplay/repeats:2 ", MR_Not}, - {"^\"BM_SummaryDisplay/repeats:2_mean\",%csv_report$"}, - {"^\"BM_SummaryDisplay/repeats:2_median\",%csv_report$"}, - {"^\"BM_SummaryDisplay/repeats:2_stddev\",%csv_report$"}}); - -// Test repeats with custom time unit. -void BM_RepeatTimeUnit(benchmark::State& state) { - for (auto _ : state) { - } -} -BENCHMARK(BM_RepeatTimeUnit) - ->Repetitions(3) - ->ReportAggregatesOnly() - ->Unit(benchmark::kMicrosecond); -ADD_CASES( - TC_ConsoleOut, - {{".*BM_RepeatTimeUnit/repeats:3 ", MR_Not}, - {"^BM_RepeatTimeUnit/repeats:3_mean %console_us_time_only_report [ ]*3$"}, - {"^BM_RepeatTimeUnit/repeats:3_median %console_us_time_only_report [ " - "]*3$"}, - {"^BM_RepeatTimeUnit/repeats:3_stddev %console_us_time_only_report [ " - "]*3$"}}); -ADD_CASES(TC_JSONOut, - {{".*BM_RepeatTimeUnit/repeats:3 ", MR_Not}, - {"\"name\": \"BM_RepeatTimeUnit/repeats:3_mean\",$"}, - {"\"family_index\": 21,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_RepeatTimeUnit/repeats:3\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"mean\",$", MR_Next}, - {"\"iterations\": 3,$", MR_Next}, - {"\"time_unit\": \"us\",?$"}, - {"\"name\": \"BM_RepeatTimeUnit/repeats:3_median\",$"}, - {"\"family_index\": 21,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_RepeatTimeUnit/repeats:3\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"median\",$", MR_Next}, - {"\"iterations\": 3,$", MR_Next}, - {"\"time_unit\": \"us\",?$"}, - {"\"name\": \"BM_RepeatTimeUnit/repeats:3_stddev\",$"}, - {"\"family_index\": 21,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_RepeatTimeUnit/repeats:3\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"stddev\",$", MR_Next}, - {"\"iterations\": 3,$", MR_Next}, - {"\"time_unit\": \"us\",?$"}}); -ADD_CASES(TC_CSVOut, - {{".*BM_RepeatTimeUnit/repeats:3 ", MR_Not}, - {"^\"BM_RepeatTimeUnit/repeats:3_mean\",%csv_us_report$"}, - {"^\"BM_RepeatTimeUnit/repeats:3_median\",%csv_us_report$"}, - {"^\"BM_RepeatTimeUnit/repeats:3_stddev\",%csv_us_report$"}}); - -// ========================================================================= // -// -------------------- Testing user-provided statistics ------------------- // -// ========================================================================= // - -const auto UserStatistics = [](const std::vector& v) { - return v.back(); -}; -void BM_UserStats(benchmark::State& state) { - for (auto _ : state) { - state.SetIterationTime(150 / 10e8); - } -} -// clang-format off -BENCHMARK(BM_UserStats) - ->Repetitions(3) - ->Iterations(5) - ->UseManualTime() - ->ComputeStatistics("", UserStatistics); -// clang-format on - -// check that user-provided stats is calculated, and is after the default-ones -// empty string as name is intentional, it would sort before anything else -ADD_CASES(TC_ConsoleOut, {{"^BM_UserStats/iterations:5/repeats:3/manual_time [ " - "]* 150 ns %time [ ]*5$"}, - {"^BM_UserStats/iterations:5/repeats:3/manual_time [ " - "]* 150 ns %time [ ]*5$"}, - {"^BM_UserStats/iterations:5/repeats:3/manual_time [ " - "]* 150 ns %time [ ]*5$"}, - {"^BM_UserStats/iterations:5/repeats:3/" - "manual_time_mean [ ]* 150 ns %time [ ]*3$"}, - {"^BM_UserStats/iterations:5/repeats:3/" - "manual_time_median [ ]* 150 ns %time [ ]*3$"}, - {"^BM_UserStats/iterations:5/repeats:3/" - "manual_time_stddev [ ]* 0.000 ns %time [ ]*3$"}, - {"^BM_UserStats/iterations:5/repeats:3/manual_time_ " - "[ ]* 150 ns %time [ ]*3$"}}); -ADD_CASES( - TC_JSONOut, - {{"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$"}, - {"\"family_index\": 22,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", - MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": 5,$", MR_Next}, - {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, - {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$"}, - {"\"family_index\": 22,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", - MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"repetition_index\": 1,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": 5,$", MR_Next}, - {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, - {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$"}, - {"\"family_index\": 22,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", - MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"repetition_index\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": 5,$", MR_Next}, - {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, - {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_mean\",$"}, - {"\"family_index\": 22,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", - MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"mean\",$", MR_Next}, - {"\"iterations\": 3,$", MR_Next}, - {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, - {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_median\",$"}, - {"\"family_index\": 22,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", - MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"median\",$", MR_Next}, - {"\"iterations\": 3,$", MR_Next}, - {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, - {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_stddev\",$"}, - {"\"family_index\": 22,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", - MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"stddev\",$", MR_Next}, - {"\"iterations\": 3,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_\",$"}, - {"\"family_index\": 22,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", - MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 3,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"\",$", MR_Next}, - {"\"iterations\": 3,$", MR_Next}, - {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}}); -ADD_CASES( - TC_CSVOut, - {{"^\"BM_UserStats/iterations:5/repeats:3/manual_time\",%csv_report$"}, - {"^\"BM_UserStats/iterations:5/repeats:3/manual_time\",%csv_report$"}, - {"^\"BM_UserStats/iterations:5/repeats:3/manual_time\",%csv_report$"}, - {"^\"BM_UserStats/iterations:5/repeats:3/manual_time_mean\",%csv_report$"}, - {"^\"BM_UserStats/iterations:5/repeats:3/" - "manual_time_median\",%csv_report$"}, - {"^\"BM_UserStats/iterations:5/repeats:3/" - "manual_time_stddev\",%csv_report$"}, - {"^\"BM_UserStats/iterations:5/repeats:3/manual_time_\",%csv_report$"}}); - -// ========================================================================= // -// ------------------------- Testing StrEscape JSON ------------------------ // -// ========================================================================= // -#if 0 // enable when csv testing code correctly handles multi-line fields -void BM_JSON_Format(benchmark::State& state) { - state.SkipWithError("val\b\f\n\r\t\\\"with\"es,capes"); - for (auto _ : state) { - } -} -BENCHMARK(BM_JSON_Format); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_JSON_Format\",$"}, - {"\"family_index\": 23,$", MR_Next}, -{"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_JSON_Format\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"error_occurred\": true,$", MR_Next}, - {R"("error_message": "val\\b\\f\\n\\r\\t\\\\\\"with\\"es,capes",$)", MR_Next}}); -#endif -// ========================================================================= // -// -------------------------- Testing CsvEscape ---------------------------- // -// ========================================================================= // - -void BM_CSV_Format(benchmark::State& state) { - state.SkipWithError("\"freedom\""); - for (auto _ : state) { - } -} -BENCHMARK(BM_CSV_Format); -ADD_CASES(TC_CSVOut, {{"^\"BM_CSV_Format\",,,,,,,,true,\"\"\"freedom\"\"\"$"}}); - -// ========================================================================= // -// --------------------------- TEST CASES END ------------------------------ // -// ========================================================================= // - -int main(int argc, char* argv[]) { RunOutputTests(argc, argv); } diff --git a/libcxx/utils/google-benchmark/test/skip_with_error_test.cc b/libcxx/utils/google-benchmark/test/skip_with_error_test.cc deleted file mode 100644 index 827966e9dfe3..000000000000 --- a/libcxx/utils/google-benchmark/test/skip_with_error_test.cc +++ /dev/null @@ -1,195 +0,0 @@ - -#undef NDEBUG -#include -#include - -#include "../src/check.h" // NOTE: check.h is for internal use only! -#include "benchmark/benchmark.h" - -namespace { - -class TestReporter : public benchmark::ConsoleReporter { - public: - virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { - return ConsoleReporter::ReportContext(context); - }; - - virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { - all_runs_.insert(all_runs_.end(), begin(report), end(report)); - ConsoleReporter::ReportRuns(report); - } - - TestReporter() {} - virtual ~TestReporter() {} - - mutable std::vector all_runs_; -}; - -struct TestCase { - std::string name; - bool error_occurred; - std::string error_message; - - typedef benchmark::BenchmarkReporter::Run Run; - - void CheckRun(Run const& run) const { - CHECK(name == run.benchmark_name()) - << "expected " << name << " got " << run.benchmark_name(); - CHECK(error_occurred == run.error_occurred); - CHECK(error_message == run.error_message); - if (error_occurred) { - // CHECK(run.iterations == 0); - } else { - CHECK(run.iterations != 0); - } - } -}; - -std::vector ExpectedResults; - -int AddCases(const char* base_name, std::initializer_list const& v) { - for (auto TC : v) { - TC.name = base_name + TC.name; - ExpectedResults.push_back(std::move(TC)); - } - return 0; -} - -#define CONCAT(x, y) CONCAT2(x, y) -#define CONCAT2(x, y) x##y -#define ADD_CASES(...) int CONCAT(dummy, __LINE__) = AddCases(__VA_ARGS__) - -} // end namespace - -void BM_error_no_running(benchmark::State& state) { - state.SkipWithError("error message"); -} -BENCHMARK(BM_error_no_running); -ADD_CASES("BM_error_no_running", {{"", true, "error message"}}); - -void BM_error_before_running(benchmark::State& state) { - state.SkipWithError("error message"); - while (state.KeepRunning()) { - assert(false); - } -} -BENCHMARK(BM_error_before_running); -ADD_CASES("BM_error_before_running", {{"", true, "error message"}}); - -void BM_error_before_running_batch(benchmark::State& state) { - state.SkipWithError("error message"); - while (state.KeepRunningBatch(17)) { - assert(false); - } -} -BENCHMARK(BM_error_before_running_batch); -ADD_CASES("BM_error_before_running_batch", {{"", true, "error message"}}); - -void BM_error_before_running_range_for(benchmark::State& state) { - state.SkipWithError("error message"); - for (auto _ : state) { - assert(false); - } -} -BENCHMARK(BM_error_before_running_range_for); -ADD_CASES("BM_error_before_running_range_for", {{"", true, "error message"}}); - -void BM_error_during_running(benchmark::State& state) { - int first_iter = true; - while (state.KeepRunning()) { - if (state.range(0) == 1 && state.thread_index <= (state.threads / 2)) { - assert(first_iter); - first_iter = false; - state.SkipWithError("error message"); - } else { - state.PauseTiming(); - state.ResumeTiming(); - } - } -} -BENCHMARK(BM_error_during_running)->Arg(1)->Arg(2)->ThreadRange(1, 8); -ADD_CASES("BM_error_during_running", {{"/1/threads:1", true, "error message"}, - {"/1/threads:2", true, "error message"}, - {"/1/threads:4", true, "error message"}, - {"/1/threads:8", true, "error message"}, - {"/2/threads:1", false, ""}, - {"/2/threads:2", false, ""}, - {"/2/threads:4", false, ""}, - {"/2/threads:8", false, ""}}); - -void BM_error_during_running_ranged_for(benchmark::State& state) { - assert(state.max_iterations > 3 && "test requires at least a few iterations"); - int first_iter = true; - // NOTE: Users should not write the for loop explicitly. - for (auto It = state.begin(), End = state.end(); It != End; ++It) { - if (state.range(0) == 1) { - assert(first_iter); - first_iter = false; - state.SkipWithError("error message"); - // Test the unfortunate but documented behavior that the ranged-for loop - // doesn't automatically terminate when SkipWithError is set. - assert(++It != End); - break; // Required behavior - } - } -} -BENCHMARK(BM_error_during_running_ranged_for)->Arg(1)->Arg(2)->Iterations(5); -ADD_CASES("BM_error_during_running_ranged_for", - {{"/1/iterations:5", true, "error message"}, - {"/2/iterations:5", false, ""}}); - -void BM_error_after_running(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(state.iterations()); - } - if (state.thread_index <= (state.threads / 2)) - state.SkipWithError("error message"); -} -BENCHMARK(BM_error_after_running)->ThreadRange(1, 8); -ADD_CASES("BM_error_after_running", {{"/threads:1", true, "error message"}, - {"/threads:2", true, "error message"}, - {"/threads:4", true, "error message"}, - {"/threads:8", true, "error message"}}); - -void BM_error_while_paused(benchmark::State& state) { - bool first_iter = true; - while (state.KeepRunning()) { - if (state.range(0) == 1 && state.thread_index <= (state.threads / 2)) { - assert(first_iter); - first_iter = false; - state.PauseTiming(); - state.SkipWithError("error message"); - } else { - state.PauseTiming(); - state.ResumeTiming(); - } - } -} -BENCHMARK(BM_error_while_paused)->Arg(1)->Arg(2)->ThreadRange(1, 8); -ADD_CASES("BM_error_while_paused", {{"/1/threads:1", true, "error message"}, - {"/1/threads:2", true, "error message"}, - {"/1/threads:4", true, "error message"}, - {"/1/threads:8", true, "error message"}, - {"/2/threads:1", false, ""}, - {"/2/threads:2", false, ""}, - {"/2/threads:4", false, ""}, - {"/2/threads:8", false, ""}}); - -int main(int argc, char* argv[]) { - benchmark::Initialize(&argc, argv); - - TestReporter test_reporter; - benchmark::RunSpecifiedBenchmarks(&test_reporter); - - typedef benchmark::BenchmarkReporter::Run Run; - auto EB = ExpectedResults.begin(); - - for (Run const& run : test_reporter.all_runs_) { - assert(EB != ExpectedResults.end()); - EB->CheckRun(run); - ++EB; - } - assert(EB == ExpectedResults.end()); - - return 0; -} diff --git a/libcxx/utils/google-benchmark/test/state_assembly_test.cc b/libcxx/utils/google-benchmark/test/state_assembly_test.cc deleted file mode 100644 index 7ddbb3b2a92c..000000000000 --- a/libcxx/utils/google-benchmark/test/state_assembly_test.cc +++ /dev/null @@ -1,68 +0,0 @@ -#include - -#ifdef __clang__ -#pragma clang diagnostic ignored "-Wreturn-type" -#endif - -// clang-format off -extern "C" { - extern int ExternInt; - benchmark::State& GetState(); - void Fn(); -} -// clang-format on - -using benchmark::State; - -// CHECK-LABEL: test_for_auto_loop: -extern "C" int test_for_auto_loop() { - State& S = GetState(); - int x = 42; - // CHECK: [[CALL:call(q)*]] _ZN9benchmark5State16StartKeepRunningEv - // CHECK-NEXT: testq %rbx, %rbx - // CHECK-NEXT: je [[LOOP_END:.*]] - - for (auto _ : S) { - // CHECK: .L[[LOOP_HEAD:[a-zA-Z0-9_]+]]: - // CHECK-GNU-NEXT: subq $1, %rbx - // CHECK-CLANG-NEXT: {{(addq \$1, %rax|incq %rax|addq \$-1, %rbx)}} - // CHECK-NEXT: jne .L[[LOOP_HEAD]] - benchmark::DoNotOptimize(x); - } - // CHECK: [[LOOP_END]]: - // CHECK: [[CALL]] _ZN9benchmark5State17FinishKeepRunningEv - - // CHECK: movl $101, %eax - // CHECK: ret - return 101; -} - -// CHECK-LABEL: test_while_loop: -extern "C" int test_while_loop() { - State& S = GetState(); - int x = 42; - - // CHECK: j{{(e|mp)}} .L[[LOOP_HEADER:[a-zA-Z0-9_]+]] - // CHECK-NEXT: .L[[LOOP_BODY:[a-zA-Z0-9_]+]]: - while (S.KeepRunning()) { - // CHECK-GNU-NEXT: subq $1, %[[IREG:[a-z]+]] - // CHECK-CLANG-NEXT: {{(addq \$-1,|decq)}} %[[IREG:[a-z]+]] - // CHECK: movq %[[IREG]], [[DEST:.*]] - benchmark::DoNotOptimize(x); - } - // CHECK-DAG: movq [[DEST]], %[[IREG]] - // CHECK-DAG: testq %[[IREG]], %[[IREG]] - // CHECK-DAG: jne .L[[LOOP_BODY]] - // CHECK-DAG: .L[[LOOP_HEADER]]: - - // CHECK: cmpb $0 - // CHECK-NEXT: jne .L[[LOOP_END:[a-zA-Z0-9_]+]] - // CHECK: [[CALL:call(q)*]] _ZN9benchmark5State16StartKeepRunningEv - - // CHECK: .L[[LOOP_END]]: - // CHECK: [[CALL]] _ZN9benchmark5State17FinishKeepRunningEv - - // CHECK: movl $101, %eax - // CHECK: ret - return 101; -} diff --git a/libcxx/utils/google-benchmark/test/statistics_gtest.cc b/libcxx/utils/google-benchmark/test/statistics_gtest.cc deleted file mode 100644 index 3ddc72dd7ac6..000000000000 --- a/libcxx/utils/google-benchmark/test/statistics_gtest.cc +++ /dev/null @@ -1,28 +0,0 @@ -//===---------------------------------------------------------------------===// -// statistics_test - Unit tests for src/statistics.cc -//===---------------------------------------------------------------------===// - -#include "../src/statistics.h" -#include "gtest/gtest.h" - -namespace { -TEST(StatisticsTest, Mean) { - EXPECT_DOUBLE_EQ(benchmark::StatisticsMean({42, 42, 42, 42}), 42.0); - EXPECT_DOUBLE_EQ(benchmark::StatisticsMean({1, 2, 3, 4}), 2.5); - EXPECT_DOUBLE_EQ(benchmark::StatisticsMean({1, 2, 5, 10, 10, 14}), 7.0); -} - -TEST(StatisticsTest, Median) { - EXPECT_DOUBLE_EQ(benchmark::StatisticsMedian({42, 42, 42, 42}), 42.0); - EXPECT_DOUBLE_EQ(benchmark::StatisticsMedian({1, 2, 3, 4}), 2.5); - EXPECT_DOUBLE_EQ(benchmark::StatisticsMedian({1, 2, 5, 10, 10}), 5.0); -} - -TEST(StatisticsTest, StdDev) { - EXPECT_DOUBLE_EQ(benchmark::StatisticsStdDev({101, 101, 101, 101}), 0.0); - EXPECT_DOUBLE_EQ(benchmark::StatisticsStdDev({1, 2, 3}), 1.0); - EXPECT_DOUBLE_EQ(benchmark::StatisticsStdDev({2.5, 2.4, 3.3, 4.2, 5.1}), - 1.151086443322134); -} - -} // end namespace diff --git a/libcxx/utils/google-benchmark/test/string_util_gtest.cc b/libcxx/utils/google-benchmark/test/string_util_gtest.cc deleted file mode 100644 index c7061b409e91..000000000000 --- a/libcxx/utils/google-benchmark/test/string_util_gtest.cc +++ /dev/null @@ -1,161 +0,0 @@ -//===---------------------------------------------------------------------===// -// statistics_test - Unit tests for src/statistics.cc -//===---------------------------------------------------------------------===// - -#include "../src/string_util.h" -#include "../src/internal_macros.h" -#include "gtest/gtest.h" - -namespace { -TEST(StringUtilTest, stoul) { - { - size_t pos = 0; - EXPECT_EQ(0ul, benchmark::stoul("0", &pos)); - EXPECT_EQ(1ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(7ul, benchmark::stoul("7", &pos)); - EXPECT_EQ(1ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(135ul, benchmark::stoul("135", &pos)); - EXPECT_EQ(3ul, pos); - } -#if ULONG_MAX == 0xFFFFFFFFul - { - size_t pos = 0; - EXPECT_EQ(0xFFFFFFFFul, benchmark::stoul("4294967295", &pos)); - EXPECT_EQ(10ul, pos); - } -#elif ULONG_MAX == 0xFFFFFFFFFFFFFFFFul - { - size_t pos = 0; - EXPECT_EQ(0xFFFFFFFFFFFFFFFFul, benchmark::stoul("18446744073709551615", &pos)); - EXPECT_EQ(20ul, pos); - } -#endif - { - size_t pos = 0; - EXPECT_EQ(10ul, benchmark::stoul("1010", &pos, 2)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(520ul, benchmark::stoul("1010", &pos, 8)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(1010ul, benchmark::stoul("1010", &pos, 10)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(4112ul, benchmark::stoul("1010", &pos, 16)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(0xBEEFul, benchmark::stoul("BEEF", &pos, 16)); - EXPECT_EQ(4ul, pos); - } -#ifndef BENCHMARK_HAS_NO_EXCEPTIONS - { - ASSERT_THROW(benchmark::stoul("this is a test"), std::invalid_argument); - } -#endif -} - -TEST(StringUtilTest, stoi) { - { - size_t pos = 0; - EXPECT_EQ(0, benchmark::stoi("0", &pos)); - EXPECT_EQ(1ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(-17, benchmark::stoi("-17", &pos)); - EXPECT_EQ(3ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(1357, benchmark::stoi("1357", &pos)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(10, benchmark::stoi("1010", &pos, 2)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(520, benchmark::stoi("1010", &pos, 8)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(1010, benchmark::stoi("1010", &pos, 10)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(4112, benchmark::stoi("1010", &pos, 16)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(0xBEEF, benchmark::stoi("BEEF", &pos, 16)); - EXPECT_EQ(4ul, pos); - } -#ifndef BENCHMARK_HAS_NO_EXCEPTIONS - { - ASSERT_THROW(benchmark::stoi("this is a test"), std::invalid_argument); - } -#endif -} - -TEST(StringUtilTest, stod) { - { - size_t pos = 0; - EXPECT_EQ(0.0, benchmark::stod("0", &pos)); - EXPECT_EQ(1ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(-84.0, benchmark::stod("-84", &pos)); - EXPECT_EQ(3ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(1234.0, benchmark::stod("1234", &pos)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(1.5, benchmark::stod("1.5", &pos)); - EXPECT_EQ(3ul, pos); - } - { - size_t pos = 0; - /* Note: exactly representable as double */ - EXPECT_EQ(-1.25e+9, benchmark::stod("-1.25e+9", &pos)); - EXPECT_EQ(8ul, pos); - } -#ifndef BENCHMARK_HAS_NO_EXCEPTIONS - { - ASSERT_THROW(benchmark::stod("this is a test"), std::invalid_argument); - } -#endif -} - -TEST(StringUtilTest, StrSplit) { - EXPECT_EQ(benchmark::StrSplit("", ','), std::vector{}); - EXPECT_EQ(benchmark::StrSplit("hello", ','), - std::vector({"hello"})); - EXPECT_EQ(benchmark::StrSplit("hello,there,is,more", ','), - std::vector({"hello", "there", "is", "more"})); -} - -} // end namespace diff --git a/libcxx/utils/google-benchmark/test/templated_fixture_test.cc b/libcxx/utils/google-benchmark/test/templated_fixture_test.cc deleted file mode 100644 index fe9865cc776f..000000000000 --- a/libcxx/utils/google-benchmark/test/templated_fixture_test.cc +++ /dev/null @@ -1,28 +0,0 @@ - -#include "benchmark/benchmark.h" - -#include -#include - -template -class MyFixture : public ::benchmark::Fixture { - public: - MyFixture() : data(0) {} - - T data; -}; - -BENCHMARK_TEMPLATE_F(MyFixture, Foo, int)(benchmark::State& st) { - for (auto _ : st) { - data += 1; - } -} - -BENCHMARK_TEMPLATE_DEFINE_F(MyFixture, Bar, double)(benchmark::State& st) { - for (auto _ : st) { - data += 1.0; - } -} -BENCHMARK_REGISTER_F(MyFixture, Bar); - -BENCHMARK_MAIN(); diff --git a/libcxx/utils/google-benchmark/test/user_counters_tabular_test.cc b/libcxx/utils/google-benchmark/test/user_counters_tabular_test.cc deleted file mode 100644 index 421f27b5cb8b..000000000000 --- a/libcxx/utils/google-benchmark/test/user_counters_tabular_test.cc +++ /dev/null @@ -1,500 +0,0 @@ - -#undef NDEBUG - -#include "benchmark/benchmark.h" -#include "output_test.h" - -// @todo: this checks the full output at once; the rule for -// CounterSet1 was failing because it was not matching "^[-]+$". -// @todo: check that the counters are vertically aligned. -ADD_CASES(TC_ConsoleOut, - { - // keeping these lines long improves readability, so: - // clang-format off - {"^[-]+$", MR_Next}, - {"^Benchmark %s Time %s CPU %s Iterations %s Bar %s Bat %s Baz %s Foo %s Frob %s Lob$", MR_Next}, - {"^[-]+$", MR_Next}, - {"^BM_Counters_Tabular/repeats:2/threads:1 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/repeats:2/threads:1 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/repeats:2/threads:1_mean %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/repeats:2/threads:1_median %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/repeats:2/threads:1_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/repeats:2/threads:2 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/repeats:2/threads:2 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/repeats:2/threads:2_mean %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/repeats:2/threads:2_median %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/repeats:2/threads:2_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, - {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, - {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, - {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, - {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, - {"^[-]+$", MR_Next}, - {"^Benchmark %s Time %s CPU %s Iterations %s Bar %s Baz %s Foo$", MR_Next}, - {"^[-]+$", MR_Next}, - {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_CounterSet0_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_CounterSet1_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^[-]+$", MR_Next}, - {"^Benchmark %s Time %s CPU %s Iterations %s Bat %s Baz %s Foo$", MR_Next}, - {"^[-]+$", MR_Next}, - {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$"}, - // clang-format on - }); -ADD_CASES(TC_CSVOut, {{"%csv_header," - "\"Bar\",\"Bat\",\"Baz\",\"Foo\",\"Frob\",\"Lob\""}}); - -// ========================================================================= // -// ------------------------- Tabular Counters Output ----------------------- // -// ========================================================================= // - -void BM_Counters_Tabular(benchmark::State& state) { - for (auto _ : state) { - } - namespace bm = benchmark; - state.counters.insert({ - {"Foo", {1, bm::Counter::kAvgThreads}}, - {"Bar", {2, bm::Counter::kAvgThreads}}, - {"Baz", {4, bm::Counter::kAvgThreads}}, - {"Bat", {8, bm::Counter::kAvgThreads}}, - {"Frob", {16, bm::Counter::kAvgThreads}}, - {"Lob", {32, bm::Counter::kAvgThreads}}, - }); -} -BENCHMARK(BM_Counters_Tabular)->ThreadRange(1, 2)->Repetitions(2); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", - MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"Bar\": %float,$", MR_Next}, - {"\"Bat\": %float,$", MR_Next}, - {"\"Baz\": %float,$", MR_Next}, - {"\"Foo\": %float,$", MR_Next}, - {"\"Frob\": %float,$", MR_Next}, - {"\"Lob\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", - MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"repetition_index\": 1,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"Bar\": %float,$", MR_Next}, - {"\"Bat\": %float,$", MR_Next}, - {"\"Baz\": %float,$", MR_Next}, - {"\"Foo\": %float,$", MR_Next}, - {"\"Frob\": %float,$", MR_Next}, - {"\"Lob\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_mean\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", - MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"mean\",$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"Bar\": %float,$", MR_Next}, - {"\"Bat\": %float,$", MR_Next}, - {"\"Baz\": %float,$", MR_Next}, - {"\"Foo\": %float,$", MR_Next}, - {"\"Frob\": %float,$", MR_Next}, - {"\"Lob\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_median\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", - MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"median\",$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"Bar\": %float,$", MR_Next}, - {"\"Bat\": %float,$", MR_Next}, - {"\"Baz\": %float,$", MR_Next}, - {"\"Foo\": %float,$", MR_Next}, - {"\"Frob\": %float,$", MR_Next}, - {"\"Lob\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_stddev\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", - MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"stddev\",$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"Bar\": %float,$", MR_Next}, - {"\"Bat\": %float,$", MR_Next}, - {"\"Baz\": %float,$", MR_Next}, - {"\"Foo\": %float,$", MR_Next}, - {"\"Frob\": %float,$", MR_Next}, - {"\"Lob\": %float$", MR_Next}, - {"}", MR_Next}}); - -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 1,$", MR_Next}, - {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", - MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 2,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"Bar\": %float,$", MR_Next}, - {"\"Bat\": %float,$", MR_Next}, - {"\"Baz\": %float,$", MR_Next}, - {"\"Foo\": %float,$", MR_Next}, - {"\"Frob\": %float,$", MR_Next}, - {"\"Lob\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 1,$", MR_Next}, - {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", - MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"repetition_index\": 1,$", MR_Next}, - {"\"threads\": 2,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"Bar\": %float,$", MR_Next}, - {"\"Bat\": %float,$", MR_Next}, - {"\"Baz\": %float,$", MR_Next}, - {"\"Foo\": %float,$", MR_Next}, - {"\"Frob\": %float,$", MR_Next}, - {"\"Lob\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2_median\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 1,$", MR_Next}, - {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", - MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"threads\": 2,$", MR_Next}, - {"\"aggregate_name\": \"median\",$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"Bar\": %float,$", MR_Next}, - {"\"Bat\": %float,$", MR_Next}, - {"\"Baz\": %float,$", MR_Next}, - {"\"Foo\": %float,$", MR_Next}, - {"\"Frob\": %float,$", MR_Next}, - {"\"Lob\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2_stddev\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 1,$", MR_Next}, - {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", - MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"threads\": 2,$", MR_Next}, - {"\"aggregate_name\": \"stddev\",$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"Bar\": %float,$", MR_Next}, - {"\"Bat\": %float,$", MR_Next}, - {"\"Baz\": %float,$", MR_Next}, - {"\"Foo\": %float,$", MR_Next}, - {"\"Frob\": %float,$", MR_Next}, - {"\"Lob\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, - {{"^\"BM_Counters_Tabular/repeats:2/threads:1\",%csv_report," - "%float,%float,%float,%float,%float,%float$"}}); -ADD_CASES(TC_CSVOut, - {{"^\"BM_Counters_Tabular/repeats:2/threads:1\",%csv_report," - "%float,%float,%float,%float,%float,%float$"}}); -ADD_CASES(TC_CSVOut, - {{"^\"BM_Counters_Tabular/repeats:2/threads:1_mean\",%csv_report," - "%float,%float,%float,%float,%float,%float$"}}); -ADD_CASES(TC_CSVOut, - {{"^\"BM_Counters_Tabular/repeats:2/threads:1_median\",%csv_report," - "%float,%float,%float,%float,%float,%float$"}}); -ADD_CASES(TC_CSVOut, - {{"^\"BM_Counters_Tabular/repeats:2/threads:1_stddev\",%csv_report," - "%float,%float,%float,%float,%float,%float$"}}); -ADD_CASES(TC_CSVOut, - {{"^\"BM_Counters_Tabular/repeats:2/threads:2\",%csv_report," - "%float,%float,%float,%float,%float,%float$"}}); -ADD_CASES(TC_CSVOut, - {{"^\"BM_Counters_Tabular/repeats:2/threads:2\",%csv_report," - "%float,%float,%float,%float,%float,%float$"}}); -ADD_CASES(TC_CSVOut, - {{"^\"BM_Counters_Tabular/repeats:2/threads:2_mean\",%csv_report," - "%float,%float,%float,%float,%float,%float$"}}); -ADD_CASES(TC_CSVOut, - {{"^\"BM_Counters_Tabular/repeats:2/threads:2_median\",%csv_report," - "%float,%float,%float,%float,%float,%float$"}}); -ADD_CASES(TC_CSVOut, - {{"^\"BM_Counters_Tabular/repeats:2/threads:2_stddev\",%csv_report," - "%float,%float,%float,%float,%float,%float$"}}); -// VS2013 does not allow this function to be passed as a lambda argument -// to CHECK_BENCHMARK_RESULTS() -void CheckTabular(Results const& e) { - CHECK_COUNTER_VALUE(e, int, "Foo", EQ, 1); - CHECK_COUNTER_VALUE(e, int, "Bar", EQ, 2); - CHECK_COUNTER_VALUE(e, int, "Baz", EQ, 4); - CHECK_COUNTER_VALUE(e, int, "Bat", EQ, 8); - CHECK_COUNTER_VALUE(e, int, "Frob", EQ, 16); - CHECK_COUNTER_VALUE(e, int, "Lob", EQ, 32); -} -CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/repeats:2/threads:1$", - &CheckTabular); -CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/repeats:2/threads:2$", - &CheckTabular); - -// ========================================================================= // -// -------------------- Tabular+Rate Counters Output ----------------------- // -// ========================================================================= // - -void BM_CounterRates_Tabular(benchmark::State& state) { - for (auto _ : state) { - // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); - } - namespace bm = benchmark; - state.counters.insert({ - {"Foo", {1, bm::Counter::kAvgThreadsRate}}, - {"Bar", {2, bm::Counter::kAvgThreadsRate}}, - {"Baz", {4, bm::Counter::kAvgThreadsRate}}, - {"Bat", {8, bm::Counter::kAvgThreadsRate}}, - {"Frob", {16, bm::Counter::kAvgThreadsRate}}, - {"Lob", {32, bm::Counter::kAvgThreadsRate}}, - }); -} -BENCHMARK(BM_CounterRates_Tabular)->ThreadRange(1, 16); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_CounterRates_Tabular/threads:%int\",$"}, - {"\"family_index\": 1,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_CounterRates_Tabular/threads:%int\",$", - MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"Bar\": %float,$", MR_Next}, - {"\"Bat\": %float,$", MR_Next}, - {"\"Baz\": %float,$", MR_Next}, - {"\"Foo\": %float,$", MR_Next}, - {"\"Frob\": %float,$", MR_Next}, - {"\"Lob\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_CounterRates_Tabular/threads:%int\",%csv_report," - "%float,%float,%float,%float,%float,%float$"}}); -// VS2013 does not allow this function to be passed as a lambda argument -// to CHECK_BENCHMARK_RESULTS() -void CheckTabularRate(Results const& e) { - double t = e.DurationCPUTime(); - CHECK_FLOAT_COUNTER_VALUE(e, "Foo", EQ, 1. / t, 0.001); - CHECK_FLOAT_COUNTER_VALUE(e, "Bar", EQ, 2. / t, 0.001); - CHECK_FLOAT_COUNTER_VALUE(e, "Baz", EQ, 4. / t, 0.001); - CHECK_FLOAT_COUNTER_VALUE(e, "Bat", EQ, 8. / t, 0.001); - CHECK_FLOAT_COUNTER_VALUE(e, "Frob", EQ, 16. / t, 0.001); - CHECK_FLOAT_COUNTER_VALUE(e, "Lob", EQ, 32. / t, 0.001); -} -CHECK_BENCHMARK_RESULTS("BM_CounterRates_Tabular/threads:%int", - &CheckTabularRate); - -// ========================================================================= // -// ------------------------- Tabular Counters Output ----------------------- // -// ========================================================================= // - -// set only some of the counters -void BM_CounterSet0_Tabular(benchmark::State& state) { - for (auto _ : state) { - } - namespace bm = benchmark; - state.counters.insert({ - {"Foo", {10, bm::Counter::kAvgThreads}}, - {"Bar", {20, bm::Counter::kAvgThreads}}, - {"Baz", {40, bm::Counter::kAvgThreads}}, - }); -} -BENCHMARK(BM_CounterSet0_Tabular)->ThreadRange(1, 16); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_CounterSet0_Tabular/threads:%int\",$"}, - {"\"family_index\": 2,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_CounterSet0_Tabular/threads:%int\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"Bar\": %float,$", MR_Next}, - {"\"Baz\": %float,$", MR_Next}, - {"\"Foo\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_CounterSet0_Tabular/threads:%int\",%csv_report," - "%float,,%float,%float,,"}}); -// VS2013 does not allow this function to be passed as a lambda argument -// to CHECK_BENCHMARK_RESULTS() -void CheckSet0(Results const& e) { - CHECK_COUNTER_VALUE(e, int, "Foo", EQ, 10); - CHECK_COUNTER_VALUE(e, int, "Bar", EQ, 20); - CHECK_COUNTER_VALUE(e, int, "Baz", EQ, 40); -} -CHECK_BENCHMARK_RESULTS("BM_CounterSet0_Tabular", &CheckSet0); - -// again. -void BM_CounterSet1_Tabular(benchmark::State& state) { - for (auto _ : state) { - } - namespace bm = benchmark; - state.counters.insert({ - {"Foo", {15, bm::Counter::kAvgThreads}}, - {"Bar", {25, bm::Counter::kAvgThreads}}, - {"Baz", {45, bm::Counter::kAvgThreads}}, - }); -} -BENCHMARK(BM_CounterSet1_Tabular)->ThreadRange(1, 16); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_CounterSet1_Tabular/threads:%int\",$"}, - {"\"family_index\": 3,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_CounterSet1_Tabular/threads:%int\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"Bar\": %float,$", MR_Next}, - {"\"Baz\": %float,$", MR_Next}, - {"\"Foo\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_CounterSet1_Tabular/threads:%int\",%csv_report," - "%float,,%float,%float,,"}}); -// VS2013 does not allow this function to be passed as a lambda argument -// to CHECK_BENCHMARK_RESULTS() -void CheckSet1(Results const& e) { - CHECK_COUNTER_VALUE(e, int, "Foo", EQ, 15); - CHECK_COUNTER_VALUE(e, int, "Bar", EQ, 25); - CHECK_COUNTER_VALUE(e, int, "Baz", EQ, 45); -} -CHECK_BENCHMARK_RESULTS("BM_CounterSet1_Tabular/threads:%int", &CheckSet1); - -// ========================================================================= // -// ------------------------- Tabular Counters Output ----------------------- // -// ========================================================================= // - -// set only some of the counters, different set now. -void BM_CounterSet2_Tabular(benchmark::State& state) { - for (auto _ : state) { - } - namespace bm = benchmark; - state.counters.insert({ - {"Foo", {10, bm::Counter::kAvgThreads}}, - {"Bat", {30, bm::Counter::kAvgThreads}}, - {"Baz", {40, bm::Counter::kAvgThreads}}, - }); -} -BENCHMARK(BM_CounterSet2_Tabular)->ThreadRange(1, 16); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_CounterSet2_Tabular/threads:%int\",$"}, - {"\"family_index\": 4,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_CounterSet2_Tabular/threads:%int\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"Bat\": %float,$", MR_Next}, - {"\"Baz\": %float,$", MR_Next}, - {"\"Foo\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_CounterSet2_Tabular/threads:%int\",%csv_report," - ",%float,%float,%float,,"}}); -// VS2013 does not allow this function to be passed as a lambda argument -// to CHECK_BENCHMARK_RESULTS() -void CheckSet2(Results const& e) { - CHECK_COUNTER_VALUE(e, int, "Foo", EQ, 10); - CHECK_COUNTER_VALUE(e, int, "Bat", EQ, 30); - CHECK_COUNTER_VALUE(e, int, "Baz", EQ, 40); -} -CHECK_BENCHMARK_RESULTS("BM_CounterSet2_Tabular", &CheckSet2); - -// ========================================================================= // -// --------------------------- TEST CASES END ------------------------------ // -// ========================================================================= // - -int main(int argc, char* argv[]) { RunOutputTests(argc, argv); } diff --git a/libcxx/utils/google-benchmark/test/user_counters_test.cc b/libcxx/utils/google-benchmark/test/user_counters_test.cc deleted file mode 100644 index 377bb32ca948..000000000000 --- a/libcxx/utils/google-benchmark/test/user_counters_test.cc +++ /dev/null @@ -1,555 +0,0 @@ - -#undef NDEBUG - -#include "benchmark/benchmark.h" -#include "output_test.h" - -// ========================================================================= // -// ---------------------- Testing Prologue Output -------------------------- // -// ========================================================================= // - -// clang-format off - -ADD_CASES(TC_ConsoleOut, - {{"^[-]+$", MR_Next}, - {"^Benchmark %s Time %s CPU %s Iterations UserCounters...$", MR_Next}, - {"^[-]+$", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"%csv_header,\"bar\",\"foo\""}}); - -// clang-format on - -// ========================================================================= // -// ------------------------- Simple Counters Output ------------------------ // -// ========================================================================= // - -void BM_Counters_Simple(benchmark::State& state) { - for (auto _ : state) { - } - state.counters["foo"] = 1; - state.counters["bar"] = 2 * (double)state.iterations(); -} -BENCHMARK(BM_Counters_Simple); -ADD_CASES(TC_ConsoleOut, - {{"^BM_Counters_Simple %console_report bar=%hrfloat foo=%hrfloat$"}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Simple\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_Simple\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"bar\": %float,$", MR_Next}, - {"\"foo\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Simple\",%csv_report,%float,%float$"}}); -// VS2013 does not allow this function to be passed as a lambda argument -// to CHECK_BENCHMARK_RESULTS() -void CheckSimple(Results const& e) { - double its = e.NumIterations(); - CHECK_COUNTER_VALUE(e, int, "foo", EQ, 1); - // check that the value of bar is within 0.1% of the expected value - CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. * its, 0.001); -} -CHECK_BENCHMARK_RESULTS("BM_Counters_Simple", &CheckSimple); - -// ========================================================================= // -// --------------------- Counters+Items+Bytes/s Output --------------------- // -// ========================================================================= // - -namespace { -int num_calls1 = 0; -} -void BM_Counters_WithBytesAndItemsPSec(benchmark::State& state) { - for (auto _ : state) { - // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); - } - state.counters["foo"] = 1; - state.counters["bar"] = ++num_calls1; - state.SetBytesProcessed(364); - state.SetItemsProcessed(150); -} -BENCHMARK(BM_Counters_WithBytesAndItemsPSec); -ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_WithBytesAndItemsPSec %console_report " - "bar=%hrfloat bytes_per_second=%hrfloat/s " - "foo=%hrfloat items_per_second=%hrfloat/s$"}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_WithBytesAndItemsPSec\",$"}, - {"\"family_index\": 1,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_WithBytesAndItemsPSec\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"bar\": %float,$", MR_Next}, - {"\"bytes_per_second\": %float,$", MR_Next}, - {"\"foo\": %float,$", MR_Next}, - {"\"items_per_second\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_WithBytesAndItemsPSec\"," - "%csv_bytes_items_report,%float,%float$"}}); -// VS2013 does not allow this function to be passed as a lambda argument -// to CHECK_BENCHMARK_RESULTS() -void CheckBytesAndItemsPSec(Results const& e) { - double t = e.DurationCPUTime(); // this (and not real time) is the time used - CHECK_COUNTER_VALUE(e, int, "foo", EQ, 1); - CHECK_COUNTER_VALUE(e, int, "bar", EQ, num_calls1); - // check that the values are within 0.1% of the expected values - CHECK_FLOAT_RESULT_VALUE(e, "bytes_per_second", EQ, 364. / t, 0.001); - CHECK_FLOAT_RESULT_VALUE(e, "items_per_second", EQ, 150. / t, 0.001); -} -CHECK_BENCHMARK_RESULTS("BM_Counters_WithBytesAndItemsPSec", - &CheckBytesAndItemsPSec); - -// ========================================================================= // -// ------------------------- Rate Counters Output -------------------------- // -// ========================================================================= // - -void BM_Counters_Rate(benchmark::State& state) { - for (auto _ : state) { - // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); - } - namespace bm = benchmark; - state.counters["foo"] = bm::Counter{1, bm::Counter::kIsRate}; - state.counters["bar"] = bm::Counter{2, bm::Counter::kIsRate}; -} -BENCHMARK(BM_Counters_Rate); -ADD_CASES( - TC_ConsoleOut, - {{"^BM_Counters_Rate %console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Rate\",$"}, - {"\"family_index\": 2,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_Rate\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"bar\": %float,$", MR_Next}, - {"\"foo\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Rate\",%csv_report,%float,%float$"}}); -// VS2013 does not allow this function to be passed as a lambda argument -// to CHECK_BENCHMARK_RESULTS() -void CheckRate(Results const& e) { - double t = e.DurationCPUTime(); // this (and not real time) is the time used - // check that the values are within 0.1% of the expected values - CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 1. / t, 0.001); - CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. / t, 0.001); -} -CHECK_BENCHMARK_RESULTS("BM_Counters_Rate", &CheckRate); - -// ========================================================================= // -// ----------------------- Inverted Counters Output ------------------------ // -// ========================================================================= // - -void BM_Invert(benchmark::State& state) { - for (auto _ : state) { - // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); - } - namespace bm = benchmark; - state.counters["foo"] = bm::Counter{0.0001, bm::Counter::kInvert}; - state.counters["bar"] = bm::Counter{10000, bm::Counter::kInvert}; -} -BENCHMARK(BM_Invert); -ADD_CASES(TC_ConsoleOut, - {{"^BM_Invert %console_report bar=%hrfloatu foo=%hrfloatk$"}}); -ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Invert\",$"}, - {"\"family_index\": 3,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Invert\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"bar\": %float,$", MR_Next}, - {"\"foo\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_Invert\",%csv_report,%float,%float$"}}); -// VS2013 does not allow this function to be passed as a lambda argument -// to CHECK_BENCHMARK_RESULTS() -void CheckInvert(Results const& e) { - CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 10000, 0.0001); - CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 0.0001, 0.0001); -} -CHECK_BENCHMARK_RESULTS("BM_Invert", &CheckInvert); - -// ========================================================================= // -// ------------------------- InvertedRate Counters Output -// -------------------------- // -// ========================================================================= // - -void BM_Counters_InvertedRate(benchmark::State& state) { - for (auto _ : state) { - // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); - } - namespace bm = benchmark; - state.counters["foo"] = - bm::Counter{1, bm::Counter::kIsRate | bm::Counter::kInvert}; - state.counters["bar"] = - bm::Counter{8192, bm::Counter::kIsRate | bm::Counter::kInvert}; -} -BENCHMARK(BM_Counters_InvertedRate); -ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_InvertedRate %console_report " - "bar=%hrfloats foo=%hrfloats$"}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_InvertedRate\",$"}, - {"\"family_index\": 4,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_InvertedRate\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"bar\": %float,$", MR_Next}, - {"\"foo\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, - {{"^\"BM_Counters_InvertedRate\",%csv_report,%float,%float$"}}); -// VS2013 does not allow this function to be passed as a lambda argument -// to CHECK_BENCHMARK_RESULTS() -void CheckInvertedRate(Results const& e) { - double t = e.DurationCPUTime(); // this (and not real time) is the time used - // check that the values are within 0.1% of the expected values - CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, t, 0.001); - CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, t / 8192.0, 0.001); -} -CHECK_BENCHMARK_RESULTS("BM_Counters_InvertedRate", &CheckInvertedRate); - -// ========================================================================= // -// ------------------------- Thread Counters Output ------------------------ // -// ========================================================================= // - -void BM_Counters_Threads(benchmark::State& state) { - for (auto _ : state) { - } - state.counters["foo"] = 1; - state.counters["bar"] = 2; -} -BENCHMARK(BM_Counters_Threads)->ThreadRange(1, 8); -ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_Threads/threads:%int %console_report " - "bar=%hrfloat foo=%hrfloat$"}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_Threads/threads:%int\",$"}, - {"\"family_index\": 5,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_Threads/threads:%int\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"bar\": %float,$", MR_Next}, - {"\"foo\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES( - TC_CSVOut, - {{"^\"BM_Counters_Threads/threads:%int\",%csv_report,%float,%float$"}}); -// VS2013 does not allow this function to be passed as a lambda argument -// to CHECK_BENCHMARK_RESULTS() -void CheckThreads(Results const& e) { - CHECK_COUNTER_VALUE(e, int, "foo", EQ, e.NumThreads()); - CHECK_COUNTER_VALUE(e, int, "bar", EQ, 2 * e.NumThreads()); -} -CHECK_BENCHMARK_RESULTS("BM_Counters_Threads/threads:%int", &CheckThreads); - -// ========================================================================= // -// ---------------------- ThreadAvg Counters Output ------------------------ // -// ========================================================================= // - -void BM_Counters_AvgThreads(benchmark::State& state) { - for (auto _ : state) { - } - namespace bm = benchmark; - state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgThreads}; - state.counters["bar"] = bm::Counter{2, bm::Counter::kAvgThreads}; -} -BENCHMARK(BM_Counters_AvgThreads)->ThreadRange(1, 8); -ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgThreads/threads:%int " - "%console_report bar=%hrfloat foo=%hrfloat$"}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_AvgThreads/threads:%int\",$"}, - {"\"family_index\": 6,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_AvgThreads/threads:%int\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"bar\": %float,$", MR_Next}, - {"\"foo\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES( - TC_CSVOut, - {{"^\"BM_Counters_AvgThreads/threads:%int\",%csv_report,%float,%float$"}}); -// VS2013 does not allow this function to be passed as a lambda argument -// to CHECK_BENCHMARK_RESULTS() -void CheckAvgThreads(Results const& e) { - CHECK_COUNTER_VALUE(e, int, "foo", EQ, 1); - CHECK_COUNTER_VALUE(e, int, "bar", EQ, 2); -} -CHECK_BENCHMARK_RESULTS("BM_Counters_AvgThreads/threads:%int", - &CheckAvgThreads); - -// ========================================================================= // -// ---------------------- ThreadAvg Counters Output ------------------------ // -// ========================================================================= // - -void BM_Counters_AvgThreadsRate(benchmark::State& state) { - for (auto _ : state) { - // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); - } - namespace bm = benchmark; - state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgThreadsRate}; - state.counters["bar"] = bm::Counter{2, bm::Counter::kAvgThreadsRate}; -} -BENCHMARK(BM_Counters_AvgThreadsRate)->ThreadRange(1, 8); -ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgThreadsRate/threads:%int " - "%console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_AvgThreadsRate/threads:%int\",$"}, - {"\"family_index\": 7,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_AvgThreadsRate/threads:%int\",$", - MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"bar\": %float,$", MR_Next}, - {"\"foo\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_AvgThreadsRate/" - "threads:%int\",%csv_report,%float,%float$"}}); -// VS2013 does not allow this function to be passed as a lambda argument -// to CHECK_BENCHMARK_RESULTS() -void CheckAvgThreadsRate(Results const& e) { - CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 1. / e.DurationCPUTime(), 0.001); - CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. / e.DurationCPUTime(), 0.001); -} -CHECK_BENCHMARK_RESULTS("BM_Counters_AvgThreadsRate/threads:%int", - &CheckAvgThreadsRate); - -// ========================================================================= // -// ------------------- IterationInvariant Counters Output ------------------ // -// ========================================================================= // - -void BM_Counters_IterationInvariant(benchmark::State& state) { - for (auto _ : state) { - } - namespace bm = benchmark; - state.counters["foo"] = bm::Counter{1, bm::Counter::kIsIterationInvariant}; - state.counters["bar"] = bm::Counter{2, bm::Counter::kIsIterationInvariant}; -} -BENCHMARK(BM_Counters_IterationInvariant); -ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_IterationInvariant %console_report " - "bar=%hrfloat foo=%hrfloat$"}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_IterationInvariant\",$"}, - {"\"family_index\": 8,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_IterationInvariant\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"bar\": %float,$", MR_Next}, - {"\"foo\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, - {{"^\"BM_Counters_IterationInvariant\",%csv_report,%float,%float$"}}); -// VS2013 does not allow this function to be passed as a lambda argument -// to CHECK_BENCHMARK_RESULTS() -void CheckIterationInvariant(Results const& e) { - double its = e.NumIterations(); - // check that the values are within 0.1% of the expected value - CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, its, 0.001); - CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. * its, 0.001); -} -CHECK_BENCHMARK_RESULTS("BM_Counters_IterationInvariant", - &CheckIterationInvariant); - -// ========================================================================= // -// ----------------- IterationInvariantRate Counters Output ---------------- // -// ========================================================================= // - -void BM_Counters_kIsIterationInvariantRate(benchmark::State& state) { - for (auto _ : state) { - // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); - } - namespace bm = benchmark; - state.counters["foo"] = - bm::Counter{1, bm::Counter::kIsIterationInvariantRate}; - state.counters["bar"] = - bm::Counter{2, bm::Counter::kIsRate | bm::Counter::kIsIterationInvariant}; -} -BENCHMARK(BM_Counters_kIsIterationInvariantRate); -ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_kIsIterationInvariantRate " - "%console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_kIsIterationInvariantRate\",$"}, - {"\"family_index\": 9,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_kIsIterationInvariantRate\",$", - MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"bar\": %float,$", MR_Next}, - {"\"foo\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_kIsIterationInvariantRate\",%csv_report," - "%float,%float$"}}); -// VS2013 does not allow this function to be passed as a lambda argument -// to CHECK_BENCHMARK_RESULTS() -void CheckIsIterationInvariantRate(Results const& e) { - double its = e.NumIterations(); - double t = e.DurationCPUTime(); // this (and not real time) is the time used - // check that the values are within 0.1% of the expected values - CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, its * 1. / t, 0.001); - CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, its * 2. / t, 0.001); -} -CHECK_BENCHMARK_RESULTS("BM_Counters_kIsIterationInvariantRate", - &CheckIsIterationInvariantRate); - -// ========================================================================= // -// ------------------- AvgIterations Counters Output ------------------ // -// ========================================================================= // - -void BM_Counters_AvgIterations(benchmark::State& state) { - for (auto _ : state) { - } - namespace bm = benchmark; - state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgIterations}; - state.counters["bar"] = bm::Counter{2, bm::Counter::kAvgIterations}; -} -BENCHMARK(BM_Counters_AvgIterations); -ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgIterations %console_report " - "bar=%hrfloat foo=%hrfloat$"}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_AvgIterations\",$"}, - {"\"family_index\": 10,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_AvgIterations\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"bar\": %float,$", MR_Next}, - {"\"foo\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, - {{"^\"BM_Counters_AvgIterations\",%csv_report,%float,%float$"}}); -// VS2013 does not allow this function to be passed as a lambda argument -// to CHECK_BENCHMARK_RESULTS() -void CheckAvgIterations(Results const& e) { - double its = e.NumIterations(); - // check that the values are within 0.1% of the expected value - CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 1. / its, 0.001); - CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. / its, 0.001); -} -CHECK_BENCHMARK_RESULTS("BM_Counters_AvgIterations", &CheckAvgIterations); - -// ========================================================================= // -// ----------------- AvgIterationsRate Counters Output ---------------- // -// ========================================================================= // - -void BM_Counters_kAvgIterationsRate(benchmark::State& state) { - for (auto _ : state) { - // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); - } - namespace bm = benchmark; - state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgIterationsRate}; - state.counters["bar"] = - bm::Counter{2, bm::Counter::kIsRate | bm::Counter::kAvgIterations}; -} -BENCHMARK(BM_Counters_kAvgIterationsRate); -ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_kAvgIterationsRate " - "%console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_kAvgIterationsRate\",$"}, - {"\"family_index\": 11,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_kAvgIterationsRate\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 1,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"bar\": %float,$", MR_Next}, - {"\"foo\": %float$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_kAvgIterationsRate\",%csv_report," - "%float,%float$"}}); -// VS2013 does not allow this function to be passed as a lambda argument -// to CHECK_BENCHMARK_RESULTS() -void CheckAvgIterationsRate(Results const& e) { - double its = e.NumIterations(); - double t = e.DurationCPUTime(); // this (and not real time) is the time used - // check that the values are within 0.1% of the expected values - CHECK_FLOAT_COUNTER_VALUE(e, "foo", EQ, 1. / its / t, 0.001); - CHECK_FLOAT_COUNTER_VALUE(e, "bar", EQ, 2. / its / t, 0.001); -} -CHECK_BENCHMARK_RESULTS("BM_Counters_kAvgIterationsRate", - &CheckAvgIterationsRate); - -// ========================================================================= // -// --------------------------- TEST CASES END ------------------------------ // -// ========================================================================= // - -int main(int argc, char* argv[]) { RunOutputTests(argc, argv); } diff --git a/libcxx/utils/google-benchmark/test/user_counters_thousands_test.cc b/libcxx/utils/google-benchmark/test/user_counters_thousands_test.cc deleted file mode 100644 index bbe194264ed4..000000000000 --- a/libcxx/utils/google-benchmark/test/user_counters_thousands_test.cc +++ /dev/null @@ -1,183 +0,0 @@ - -#undef NDEBUG - -#include "benchmark/benchmark.h" -#include "output_test.h" - -// ========================================================================= // -// ------------------------ Thousands Customisation ------------------------ // -// ========================================================================= // - -void BM_Counters_Thousands(benchmark::State& state) { - for (auto _ : state) { - } - namespace bm = benchmark; - state.counters.insert({ - {"t0_1000000DefaultBase", - bm::Counter(1000 * 1000, bm::Counter::kDefaults)}, - {"t1_1000000Base1000", bm::Counter(1000 * 1000, bm::Counter::kDefaults, - benchmark::Counter::OneK::kIs1000)}, - {"t2_1000000Base1024", bm::Counter(1000 * 1000, bm::Counter::kDefaults, - benchmark::Counter::OneK::kIs1024)}, - {"t3_1048576Base1000", bm::Counter(1024 * 1024, bm::Counter::kDefaults, - benchmark::Counter::OneK::kIs1000)}, - {"t4_1048576Base1024", bm::Counter(1024 * 1024, bm::Counter::kDefaults, - benchmark::Counter::OneK::kIs1024)}, - }); -} -BENCHMARK(BM_Counters_Thousands)->Repetitions(2); -ADD_CASES( - TC_ConsoleOut, - { - {"^BM_Counters_Thousands/repeats:2 %console_report " - "t0_1000000DefaultBase=1000k " - "t1_1000000Base1000=1000k t2_1000000Base1024=976.56[23]k " - "t3_1048576Base1000=1048.58k t4_1048576Base1024=1024k$"}, - {"^BM_Counters_Thousands/repeats:2 %console_report " - "t0_1000000DefaultBase=1000k " - "t1_1000000Base1000=1000k t2_1000000Base1024=976.56[23]k " - "t3_1048576Base1000=1048.58k t4_1048576Base1024=1024k$"}, - {"^BM_Counters_Thousands/repeats:2_mean %console_report " - "t0_1000000DefaultBase=1000k t1_1000000Base1000=1000k " - "t2_1000000Base1024=976.56[23]k t3_1048576Base1000=1048.58k " - "t4_1048576Base1024=1024k$"}, - {"^BM_Counters_Thousands/repeats:2_median %console_report " - "t0_1000000DefaultBase=1000k t1_1000000Base1000=1000k " - "t2_1000000Base1024=976.56[23]k t3_1048576Base1000=1048.58k " - "t4_1048576Base1024=1024k$"}, - {"^BM_Counters_Thousands/repeats:2_stddev %console_time_only_report [ " - "]*2 t0_1000000DefaultBase=0 t1_1000000Base1000=0 " - "t2_1000000Base1024=0 t3_1048576Base1000=0 t4_1048576Base1024=0$"}, - }); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_Thousands/repeats:2\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"repetition_index\": 0,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"t0_1000000DefaultBase\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, - {"\"t1_1000000Base1000\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, - {"\"t2_1000000Base1024\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, - {"\"t3_1048576Base1000\": 1\\.048576(0)*e\\+(0)*6,$", MR_Next}, - {"\"t4_1048576Base1024\": 1\\.048576(0)*e\\+(0)*6$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_Thousands/repeats:2\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, - {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"repetition_index\": 1,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"iterations\": %int,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"t0_1000000DefaultBase\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, - {"\"t1_1000000Base1000\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, - {"\"t2_1000000Base1024\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, - {"\"t3_1048576Base1000\": 1\\.048576(0)*e\\+(0)*6,$", MR_Next}, - {"\"t4_1048576Base1024\": 1\\.048576(0)*e\\+(0)*6$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_Thousands/repeats:2_mean\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"mean\",$", MR_Next}, - {"\"iterations\": 2,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"t0_1000000DefaultBase\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, - {"\"t1_1000000Base1000\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, - {"\"t2_1000000Base1024\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, - {"\"t3_1048576Base1000\": 1\\.048576(0)*e\\+(0)*6,$", MR_Next}, - {"\"t4_1048576Base1024\": 1\\.048576(0)*e\\+(0)*6$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_Thousands/repeats:2_median\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"median\",$", MR_Next}, - {"\"iterations\": 2,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"t0_1000000DefaultBase\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, - {"\"t1_1000000Base1000\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, - {"\"t2_1000000Base1024\": 1\\.(0)*e\\+(0)*6,$", MR_Next}, - {"\"t3_1048576Base1000\": 1\\.048576(0)*e\\+(0)*6,$", MR_Next}, - {"\"t4_1048576Base1024\": 1\\.048576(0)*e\\+(0)*6$", MR_Next}, - {"}", MR_Next}}); -ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_Thousands/repeats:2_stddev\",$"}, - {"\"family_index\": 0,$", MR_Next}, - {"\"per_family_instance_index\": 0,$", MR_Next}, - {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": 2,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"stddev\",$", MR_Next}, - {"\"iterations\": 2,$", MR_Next}, - {"\"real_time\": %float,$", MR_Next}, - {"\"cpu_time\": %float,$", MR_Next}, - {"\"time_unit\": \"ns\",$", MR_Next}, - {"\"t0_1000000DefaultBase\": 0\\.(0)*e\\+(0)*,$", MR_Next}, - {"\"t1_1000000Base1000\": 0\\.(0)*e\\+(0)*,$", MR_Next}, - {"\"t2_1000000Base1024\": 0\\.(0)*e\\+(0)*,$", MR_Next}, - {"\"t3_1048576Base1000\": 0\\.(0)*e\\+(0)*,$", MR_Next}, - {"\"t4_1048576Base1024\": 0\\.(0)*e\\+(0)*$", MR_Next}, - {"}", MR_Next}}); - -ADD_CASES( - TC_CSVOut, - {{"^\"BM_Counters_Thousands/" - "repeats:2\",%csv_report,1e\\+(0)*6,1e\\+(0)*6,1e\\+(0)*6,1\\.04858e\\+(" - "0)*6,1\\.04858e\\+(0)*6$"}, - {"^\"BM_Counters_Thousands/" - "repeats:2\",%csv_report,1e\\+(0)*6,1e\\+(0)*6,1e\\+(0)*6,1\\.04858e\\+(" - "0)*6,1\\.04858e\\+(0)*6$"}, - {"^\"BM_Counters_Thousands/" - "repeats:2_mean\",%csv_report,1e\\+(0)*6,1e\\+(0)*6,1e\\+(0)*6,1\\." - "04858e\\+(0)*6,1\\.04858e\\+(0)*6$"}, - {"^\"BM_Counters_Thousands/" - "repeats:2_median\",%csv_report,1e\\+(0)*6,1e\\+(0)*6,1e\\+(0)*6,1\\." - "04858e\\+(0)*6,1\\.04858e\\+(0)*6$"}, - {"^\"BM_Counters_Thousands/repeats:2_stddev\",%csv_report,0,0,0,0,0$"}}); -// VS2013 does not allow this function to be passed as a lambda argument -// to CHECK_BENCHMARK_RESULTS() -void CheckThousands(Results const& e) { - if (e.name != "BM_Counters_Thousands/repeats:2") - return; // Do not check the aggregates! - - // check that the values are within 0.01% of the expected values - CHECK_FLOAT_COUNTER_VALUE(e, "t0_1000000DefaultBase", EQ, 1000 * 1000, - 0.0001); - CHECK_FLOAT_COUNTER_VALUE(e, "t1_1000000Base1000", EQ, 1000 * 1000, 0.0001); - CHECK_FLOAT_COUNTER_VALUE(e, "t2_1000000Base1024", EQ, 1000 * 1000, 0.0001); - CHECK_FLOAT_COUNTER_VALUE(e, "t3_1048576Base1000", EQ, 1024 * 1024, 0.0001); - CHECK_FLOAT_COUNTER_VALUE(e, "t4_1048576Base1024", EQ, 1024 * 1024, 0.0001); -} -CHECK_BENCHMARK_RESULTS("BM_Counters_Thousands", &CheckThousands); - -// ========================================================================= // -// --------------------------- TEST CASES END ------------------------------ // -// ========================================================================= // - -int main(int argc, char* argv[]) { RunOutputTests(argc, argv); } diff --git a/libcxx/utils/google-benchmark/tools/BUILD.bazel b/libcxx/utils/google-benchmark/tools/BUILD.bazel deleted file mode 100644 index 5895883a2eb3..000000000000 --- a/libcxx/utils/google-benchmark/tools/BUILD.bazel +++ /dev/null @@ -1,19 +0,0 @@ -load("@py_deps//:requirements.bzl", "requirement") - -py_library( - name = "gbench", - srcs = glob(["gbench/*.py"]), - deps = [ - requirement("numpy"), - requirement("scipy"), - ], -) - -py_binary( - name = "compare", - srcs = ["compare.py"], - python_version = "PY2", - deps = [ - ":gbench", - ], -) diff --git a/libcxx/utils/google-benchmark/tools/compare.py b/libcxx/utils/google-benchmark/tools/compare.py deleted file mode 100755 index 01d2c89f50fb..000000000000 --- a/libcxx/utils/google-benchmark/tools/compare.py +++ /dev/null @@ -1,429 +0,0 @@ -#!/usr/bin/env python - -import unittest -""" -compare.py - versatile benchmark output compare tool -""" - -import argparse -from argparse import ArgumentParser -import json -import sys -import gbench -from gbench import util, report -from gbench.util import * - - -def check_inputs(in1, in2, flags): - """ - Perform checking on the user provided inputs and diagnose any abnormalities - """ - in1_kind, in1_err = classify_input_file(in1) - in2_kind, in2_err = classify_input_file(in2) - output_file = find_benchmark_flag('--benchmark_out=', flags) - output_type = find_benchmark_flag('--benchmark_out_format=', flags) - if in1_kind == IT_Executable and in2_kind == IT_Executable and output_file: - print(("WARNING: '--benchmark_out=%s' will be passed to both " - "benchmarks causing it to be overwritten") % output_file) - if in1_kind == IT_JSON and in2_kind == IT_JSON and len(flags) > 0: - print("WARNING: passing optional flags has no effect since both " - "inputs are JSON") - if output_type is not None and output_type != 'json': - print(("ERROR: passing '--benchmark_out_format=%s' to 'compare.py`" - " is not supported.") % output_type) - sys.exit(1) - - -def create_parser(): - parser = ArgumentParser( - description='versatile benchmark output compare tool') - - parser.add_argument( - '-a', - '--display_aggregates_only', - dest='display_aggregates_only', - action="store_true", - help="If there are repetitions, by default, we display everything - the" - " actual runs, and the aggregates computed. Sometimes, it is " - "desirable to only view the aggregates. E.g. when there are a lot " - "of repetitions. Do note that only the display is affected. " - "Internally, all the actual runs are still used, e.g. for U test.") - - parser.add_argument( - '--no-color', - dest='color', - default=True, - action="store_false", - help="Do not use colors in the terminal output" - ) - - parser.add_argument( - '-d', - '--dump_to_json', - dest='dump_to_json', - help="Additionally, dump benchmark comparison output to this file in JSON format.") - - utest = parser.add_argument_group() - utest.add_argument( - '--no-utest', - dest='utest', - default=True, - action="store_false", - help="The tool can do a two-tailed Mann-Whitney U test with the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample.\nWARNING: requires **LARGE** (no less than {}) number of repetitions to be meaningful!\nThe test is being done by default, if at least {} repetitions were done.\nThis option can disable the U Test.".format(report.UTEST_OPTIMAL_REPETITIONS, report.UTEST_MIN_REPETITIONS)) - alpha_default = 0.05 - utest.add_argument( - "--alpha", - dest='utest_alpha', - default=alpha_default, - type=float, - help=("significance level alpha. if the calculated p-value is below this value, then the result is said to be statistically significant and the null hypothesis is rejected.\n(default: %0.4f)") % - alpha_default) - - subparsers = parser.add_subparsers( - help='This tool has multiple modes of operation:', - dest='mode') - - parser_a = subparsers.add_parser( - 'benchmarks', - help='The most simple use-case, compare all the output of these two benchmarks') - baseline = parser_a.add_argument_group( - 'baseline', 'The benchmark baseline') - baseline.add_argument( - 'test_baseline', - metavar='test_baseline', - type=argparse.FileType('r'), - nargs=1, - help='A benchmark executable or JSON output file') - contender = parser_a.add_argument_group( - 'contender', 'The benchmark that will be compared against the baseline') - contender.add_argument( - 'test_contender', - metavar='test_contender', - type=argparse.FileType('r'), - nargs=1, - help='A benchmark executable or JSON output file') - parser_a.add_argument( - 'benchmark_options', - metavar='benchmark_options', - nargs=argparse.REMAINDER, - help='Arguments to pass when running benchmark executables') - - parser_b = subparsers.add_parser( - 'filters', help='Compare filter one with the filter two of benchmark') - baseline = parser_b.add_argument_group( - 'baseline', 'The benchmark baseline') - baseline.add_argument( - 'test', - metavar='test', - type=argparse.FileType('r'), - nargs=1, - help='A benchmark executable or JSON output file') - baseline.add_argument( - 'filter_baseline', - metavar='filter_baseline', - type=str, - nargs=1, - help='The first filter, that will be used as baseline') - contender = parser_b.add_argument_group( - 'contender', 'The benchmark that will be compared against the baseline') - contender.add_argument( - 'filter_contender', - metavar='filter_contender', - type=str, - nargs=1, - help='The second filter, that will be compared against the baseline') - parser_b.add_argument( - 'benchmark_options', - metavar='benchmark_options', - nargs=argparse.REMAINDER, - help='Arguments to pass when running benchmark executables') - - parser_c = subparsers.add_parser( - 'benchmarksfiltered', - help='Compare filter one of first benchmark with filter two of the second benchmark') - baseline = parser_c.add_argument_group( - 'baseline', 'The benchmark baseline') - baseline.add_argument( - 'test_baseline', - metavar='test_baseline', - type=argparse.FileType('r'), - nargs=1, - help='A benchmark executable or JSON output file') - baseline.add_argument( - 'filter_baseline', - metavar='filter_baseline', - type=str, - nargs=1, - help='The first filter, that will be used as baseline') - contender = parser_c.add_argument_group( - 'contender', 'The benchmark that will be compared against the baseline') - contender.add_argument( - 'test_contender', - metavar='test_contender', - type=argparse.FileType('r'), - nargs=1, - help='The second benchmark executable or JSON output file, that will be compared against the baseline') - contender.add_argument( - 'filter_contender', - metavar='filter_contender', - type=str, - nargs=1, - help='The second filter, that will be compared against the baseline') - parser_c.add_argument( - 'benchmark_options', - metavar='benchmark_options', - nargs=argparse.REMAINDER, - help='Arguments to pass when running benchmark executables') - - return parser - - -def main(): - # Parse the command line flags - parser = create_parser() - args, unknown_args = parser.parse_known_args() - if args.mode is None: - parser.print_help() - exit(1) - assert not unknown_args - benchmark_options = args.benchmark_options - - if args.mode == 'benchmarks': - test_baseline = args.test_baseline[0].name - test_contender = args.test_contender[0].name - filter_baseline = '' - filter_contender = '' - - # NOTE: if test_baseline == test_contender, you are analyzing the stdev - - description = 'Comparing %s to %s' % (test_baseline, test_contender) - elif args.mode == 'filters': - test_baseline = args.test[0].name - test_contender = args.test[0].name - filter_baseline = args.filter_baseline[0] - filter_contender = args.filter_contender[0] - - # NOTE: if filter_baseline == filter_contender, you are analyzing the - # stdev - - description = 'Comparing %s to %s (from %s)' % ( - filter_baseline, filter_contender, args.test[0].name) - elif args.mode == 'benchmarksfiltered': - test_baseline = args.test_baseline[0].name - test_contender = args.test_contender[0].name - filter_baseline = args.filter_baseline[0] - filter_contender = args.filter_contender[0] - - # NOTE: if test_baseline == test_contender and - # filter_baseline == filter_contender, you are analyzing the stdev - - description = 'Comparing %s (from %s) to %s (from %s)' % ( - filter_baseline, test_baseline, filter_contender, test_contender) - else: - # should never happen - print("Unrecognized mode of operation: '%s'" % args.mode) - parser.print_help() - exit(1) - - check_inputs(test_baseline, test_contender, benchmark_options) - - if args.display_aggregates_only: - benchmark_options += ['--benchmark_display_aggregates_only=true'] - - options_baseline = [] - options_contender = [] - - if filter_baseline and filter_contender: - options_baseline = ['--benchmark_filter=%s' % filter_baseline] - options_contender = ['--benchmark_filter=%s' % filter_contender] - - # Run the benchmarks and report the results - json1 = json1_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark( - test_baseline, benchmark_options + options_baseline)) - json2 = json2_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark( - test_contender, benchmark_options + options_contender)) - - # Now, filter the benchmarks so that the difference report can work - if filter_baseline and filter_contender: - replacement = '[%s vs. %s]' % (filter_baseline, filter_contender) - json1 = gbench.report.filter_benchmark( - json1_orig, filter_baseline, replacement) - json2 = gbench.report.filter_benchmark( - json2_orig, filter_contender, replacement) - - diff_report = gbench.report.get_difference_report( - json1, json2, args.utest) - output_lines = gbench.report.print_difference_report( - diff_report, - args.display_aggregates_only, - args.utest, args.utest_alpha, args.color) - print(description) - for ln in output_lines: - print(ln) - - # Optionally, diff and output to JSON - if args.dump_to_json is not None: - with open(args.dump_to_json, 'w') as f_json: - json.dump(diff_report, f_json) - -class TestParser(unittest.TestCase): - def setUp(self): - self.parser = create_parser() - testInputs = os.path.join( - os.path.dirname( - os.path.realpath(__file__)), - 'gbench', - 'Inputs') - self.testInput0 = os.path.join(testInputs, 'test1_run1.json') - self.testInput1 = os.path.join(testInputs, 'test1_run2.json') - - def test_benchmarks_basic(self): - parsed = self.parser.parse_args( - ['benchmarks', self.testInput0, self.testInput1]) - self.assertFalse(parsed.display_aggregates_only) - self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, 'benchmarks') - self.assertEqual(parsed.test_baseline[0].name, self.testInput0) - self.assertEqual(parsed.test_contender[0].name, self.testInput1) - self.assertFalse(parsed.benchmark_options) - - def test_benchmarks_basic_without_utest(self): - parsed = self.parser.parse_args( - ['--no-utest', 'benchmarks', self.testInput0, self.testInput1]) - self.assertFalse(parsed.display_aggregates_only) - self.assertFalse(parsed.utest) - self.assertEqual(parsed.utest_alpha, 0.05) - self.assertEqual(parsed.mode, 'benchmarks') - self.assertEqual(parsed.test_baseline[0].name, self.testInput0) - self.assertEqual(parsed.test_contender[0].name, self.testInput1) - self.assertFalse(parsed.benchmark_options) - - def test_benchmarks_basic_display_aggregates_only(self): - parsed = self.parser.parse_args( - ['-a', 'benchmarks', self.testInput0, self.testInput1]) - self.assertTrue(parsed.display_aggregates_only) - self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, 'benchmarks') - self.assertEqual(parsed.test_baseline[0].name, self.testInput0) - self.assertEqual(parsed.test_contender[0].name, self.testInput1) - self.assertFalse(parsed.benchmark_options) - - def test_benchmarks_basic_with_utest_alpha(self): - parsed = self.parser.parse_args( - ['--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1]) - self.assertFalse(parsed.display_aggregates_only) - self.assertTrue(parsed.utest) - self.assertEqual(parsed.utest_alpha, 0.314) - self.assertEqual(parsed.mode, 'benchmarks') - self.assertEqual(parsed.test_baseline[0].name, self.testInput0) - self.assertEqual(parsed.test_contender[0].name, self.testInput1) - self.assertFalse(parsed.benchmark_options) - - def test_benchmarks_basic_without_utest_with_utest_alpha(self): - parsed = self.parser.parse_args( - ['--no-utest', '--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1]) - self.assertFalse(parsed.display_aggregates_only) - self.assertFalse(parsed.utest) - self.assertEqual(parsed.utest_alpha, 0.314) - self.assertEqual(parsed.mode, 'benchmarks') - self.assertEqual(parsed.test_baseline[0].name, self.testInput0) - self.assertEqual(parsed.test_contender[0].name, self.testInput1) - self.assertFalse(parsed.benchmark_options) - - def test_benchmarks_with_remainder(self): - parsed = self.parser.parse_args( - ['benchmarks', self.testInput0, self.testInput1, 'd']) - self.assertFalse(parsed.display_aggregates_only) - self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, 'benchmarks') - self.assertEqual(parsed.test_baseline[0].name, self.testInput0) - self.assertEqual(parsed.test_contender[0].name, self.testInput1) - self.assertEqual(parsed.benchmark_options, ['d']) - - def test_benchmarks_with_remainder_after_doubleminus(self): - parsed = self.parser.parse_args( - ['benchmarks', self.testInput0, self.testInput1, '--', 'e']) - self.assertFalse(parsed.display_aggregates_only) - self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, 'benchmarks') - self.assertEqual(parsed.test_baseline[0].name, self.testInput0) - self.assertEqual(parsed.test_contender[0].name, self.testInput1) - self.assertEqual(parsed.benchmark_options, ['e']) - - def test_filters_basic(self): - parsed = self.parser.parse_args( - ['filters', self.testInput0, 'c', 'd']) - self.assertFalse(parsed.display_aggregates_only) - self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, 'filters') - self.assertEqual(parsed.test[0].name, self.testInput0) - self.assertEqual(parsed.filter_baseline[0], 'c') - self.assertEqual(parsed.filter_contender[0], 'd') - self.assertFalse(parsed.benchmark_options) - - def test_filters_with_remainder(self): - parsed = self.parser.parse_args( - ['filters', self.testInput0, 'c', 'd', 'e']) - self.assertFalse(parsed.display_aggregates_only) - self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, 'filters') - self.assertEqual(parsed.test[0].name, self.testInput0) - self.assertEqual(parsed.filter_baseline[0], 'c') - self.assertEqual(parsed.filter_contender[0], 'd') - self.assertEqual(parsed.benchmark_options, ['e']) - - def test_filters_with_remainder_after_doubleminus(self): - parsed = self.parser.parse_args( - ['filters', self.testInput0, 'c', 'd', '--', 'f']) - self.assertFalse(parsed.display_aggregates_only) - self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, 'filters') - self.assertEqual(parsed.test[0].name, self.testInput0) - self.assertEqual(parsed.filter_baseline[0], 'c') - self.assertEqual(parsed.filter_contender[0], 'd') - self.assertEqual(parsed.benchmark_options, ['f']) - - def test_benchmarksfiltered_basic(self): - parsed = self.parser.parse_args( - ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e']) - self.assertFalse(parsed.display_aggregates_only) - self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, 'benchmarksfiltered') - self.assertEqual(parsed.test_baseline[0].name, self.testInput0) - self.assertEqual(parsed.filter_baseline[0], 'c') - self.assertEqual(parsed.test_contender[0].name, self.testInput1) - self.assertEqual(parsed.filter_contender[0], 'e') - self.assertFalse(parsed.benchmark_options) - - def test_benchmarksfiltered_with_remainder(self): - parsed = self.parser.parse_args( - ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', 'f']) - self.assertFalse(parsed.display_aggregates_only) - self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, 'benchmarksfiltered') - self.assertEqual(parsed.test_baseline[0].name, self.testInput0) - self.assertEqual(parsed.filter_baseline[0], 'c') - self.assertEqual(parsed.test_contender[0].name, self.testInput1) - self.assertEqual(parsed.filter_contender[0], 'e') - self.assertEqual(parsed.benchmark_options[0], 'f') - - def test_benchmarksfiltered_with_remainder_after_doubleminus(self): - parsed = self.parser.parse_args( - ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', '--', 'g']) - self.assertFalse(parsed.display_aggregates_only) - self.assertTrue(parsed.utest) - self.assertEqual(parsed.mode, 'benchmarksfiltered') - self.assertEqual(parsed.test_baseline[0].name, self.testInput0) - self.assertEqual(parsed.filter_baseline[0], 'c') - self.assertEqual(parsed.test_contender[0].name, self.testInput1) - self.assertEqual(parsed.filter_contender[0], 'e') - self.assertEqual(parsed.benchmark_options[0], 'g') - - -if __name__ == '__main__': - # unittest.main() - main() - -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 -# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; -# kate: indent-mode python; remove-trailing-spaces modified; diff --git a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run1.json b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run1.json deleted file mode 100644 index 601e327aefb5..000000000000 --- a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run1.json +++ /dev/null @@ -1,119 +0,0 @@ -{ - "context": { - "date": "2016-08-02 17:44:46", - "num_cpus": 4, - "mhz_per_cpu": 4228, - "cpu_scaling_enabled": false, - "library_build_type": "release" - }, - "benchmarks": [ - { - "name": "BM_SameTimes", - "iterations": 1000, - "real_time": 10, - "cpu_time": 10, - "time_unit": "ns" - }, - { - "name": "BM_2xFaster", - "iterations": 1000, - "real_time": 50, - "cpu_time": 50, - "time_unit": "ns" - }, - { - "name": "BM_2xSlower", - "iterations": 1000, - "real_time": 50, - "cpu_time": 50, - "time_unit": "ns" - }, - { - "name": "BM_1PercentFaster", - "iterations": 1000, - "real_time": 100, - "cpu_time": 100, - "time_unit": "ns" - }, - { - "name": "BM_1PercentSlower", - "iterations": 1000, - "real_time": 100, - "cpu_time": 100, - "time_unit": "ns" - }, - { - "name": "BM_10PercentFaster", - "iterations": 1000, - "real_time": 100, - "cpu_time": 100, - "time_unit": "ns" - }, - { - "name": "BM_10PercentSlower", - "iterations": 1000, - "real_time": 100, - "cpu_time": 100, - "time_unit": "ns" - }, - { - "name": "BM_100xSlower", - "iterations": 1000, - "real_time": 100, - "cpu_time": 100, - "time_unit": "ns" - }, - { - "name": "BM_100xFaster", - "iterations": 1000, - "real_time": 10000, - "cpu_time": 10000, - "time_unit": "ns" - }, - { - "name": "BM_10PercentCPUToTime", - "iterations": 1000, - "real_time": 100, - "cpu_time": 100, - "time_unit": "ns" - }, - { - "name": "BM_ThirdFaster", - "iterations": 1000, - "real_time": 100, - "cpu_time": 100, - "time_unit": "ns" - }, - { - "name": "MyComplexityTest_BigO", - "run_name": "MyComplexityTest", - "run_type": "aggregate", - "aggregate_name": "BigO", - "cpu_coefficient": 4.2749856294592886e+00, - "real_coefficient": 6.4789275289789780e+00, - "big_o": "N", - "time_unit": "ns" - }, - { - "name": "MyComplexityTest_RMS", - "run_name": "MyComplexityTest", - "run_type": "aggregate", - "aggregate_name": "RMS", - "rms": 4.5097802512472874e-03 - }, - { - "name": "BM_NotBadTimeUnit", - "iterations": 1000, - "real_time": 0.4, - "cpu_time": 0.5, - "time_unit": "s" - }, - { - "name": "BM_DifferentTimeUnit", - "iterations": 1, - "real_time": 1, - "cpu_time": 1, - "time_unit": "s" - } - ] -} diff --git a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run2.json b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run2.json deleted file mode 100644 index 3cbcf39b0c93..000000000000 --- a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test1_run2.json +++ /dev/null @@ -1,119 +0,0 @@ -{ - "context": { - "date": "2016-08-02 17:44:46", - "num_cpus": 4, - "mhz_per_cpu": 4228, - "cpu_scaling_enabled": false, - "library_build_type": "release" - }, - "benchmarks": [ - { - "name": "BM_SameTimes", - "iterations": 1000, - "real_time": 10, - "cpu_time": 10, - "time_unit": "ns" - }, - { - "name": "BM_2xFaster", - "iterations": 1000, - "real_time": 25, - "cpu_time": 25, - "time_unit": "ns" - }, - { - "name": "BM_2xSlower", - "iterations": 20833333, - "real_time": 100, - "cpu_time": 100, - "time_unit": "ns" - }, - { - "name": "BM_1PercentFaster", - "iterations": 1000, - "real_time": 98.9999999, - "cpu_time": 98.9999999, - "time_unit": "ns" - }, - { - "name": "BM_1PercentSlower", - "iterations": 1000, - "real_time": 100.9999999, - "cpu_time": 100.9999999, - "time_unit": "ns" - }, - { - "name": "BM_10PercentFaster", - "iterations": 1000, - "real_time": 90, - "cpu_time": 90, - "time_unit": "ns" - }, - { - "name": "BM_10PercentSlower", - "iterations": 1000, - "real_time": 110, - "cpu_time": 110, - "time_unit": "ns" - }, - { - "name": "BM_100xSlower", - "iterations": 1000, - "real_time": 1.0000e+04, - "cpu_time": 1.0000e+04, - "time_unit": "ns" - }, - { - "name": "BM_100xFaster", - "iterations": 1000, - "real_time": 100, - "cpu_time": 100, - "time_unit": "ns" - }, - { - "name": "BM_10PercentCPUToTime", - "iterations": 1000, - "real_time": 110, - "cpu_time": 90, - "time_unit": "ns" - }, - { - "name": "BM_ThirdFaster", - "iterations": 1000, - "real_time": 66.665, - "cpu_time": 66.664, - "time_unit": "ns" - }, - { - "name": "MyComplexityTest_BigO", - "run_name": "MyComplexityTest", - "run_type": "aggregate", - "aggregate_name": "BigO", - "cpu_coefficient": 5.6215779594361486e+00, - "real_coefficient": 5.6288314793554610e+00, - "big_o": "N", - "time_unit": "ns" - }, - { - "name": "MyComplexityTest_RMS", - "run_name": "MyComplexityTest", - "run_type": "aggregate", - "aggregate_name": "RMS", - "rms": 3.3128901852342174e-03 - }, - { - "name": "BM_NotBadTimeUnit", - "iterations": 1000, - "real_time": 0.04, - "cpu_time": 0.6, - "time_unit": "s" - }, - { - "name": "BM_DifferentTimeUnit", - "iterations": 1, - "real_time": 1, - "cpu_time": 1, - "time_unit": "ns" - } - ] -} diff --git a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test2_run.json b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test2_run.json deleted file mode 100644 index 15bc69803049..000000000000 --- a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test2_run.json +++ /dev/null @@ -1,81 +0,0 @@ -{ - "context": { - "date": "2016-08-02 17:44:46", - "num_cpus": 4, - "mhz_per_cpu": 4228, - "cpu_scaling_enabled": false, - "library_build_type": "release" - }, - "benchmarks": [ - { - "name": "BM_Hi", - "iterations": 1234, - "real_time": 42, - "cpu_time": 24, - "time_unit": "ms" - }, - { - "name": "BM_Zero", - "iterations": 1000, - "real_time": 10, - "cpu_time": 10, - "time_unit": "ns" - }, - { - "name": "BM_Zero/4", - "iterations": 4000, - "real_time": 40, - "cpu_time": 40, - "time_unit": "ns" - }, - { - "name": "Prefix/BM_Zero", - "iterations": 2000, - "real_time": 20, - "cpu_time": 20, - "time_unit": "ns" - }, - { - "name": "Prefix/BM_Zero/3", - "iterations": 3000, - "real_time": 30, - "cpu_time": 30, - "time_unit": "ns" - }, - { - "name": "BM_One", - "iterations": 5000, - "real_time": 5, - "cpu_time": 5, - "time_unit": "ns" - }, - { - "name": "BM_One/4", - "iterations": 2000, - "real_time": 20, - "cpu_time": 20, - "time_unit": "ns" - }, - { - "name": "Prefix/BM_One", - "iterations": 1000, - "real_time": 10, - "cpu_time": 10, - "time_unit": "ns" - }, - { - "name": "Prefix/BM_One/3", - "iterations": 1500, - "real_time": 15, - "cpu_time": 15, - "time_unit": "ns" - }, - { - "name": "BM_Bye", - "iterations": 5321, - "real_time": 11, - "cpu_time": 63, - "time_unit": "ns" - } - ] -} diff --git a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test3_run0.json b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test3_run0.json deleted file mode 100644 index 49f8b061437f..000000000000 --- a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test3_run0.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "context": { - "date": "2016-08-02 17:44:46", - "num_cpus": 4, - "mhz_per_cpu": 4228, - "cpu_scaling_enabled": false, - "library_build_type": "release" - }, - "benchmarks": [ - { - "name": "BM_One", - "run_type": "aggregate", - "iterations": 1000, - "real_time": 10, - "cpu_time": 100, - "time_unit": "ns" - }, - { - "name": "BM_Two", - "iterations": 1000, - "real_time": 9, - "cpu_time": 90, - "time_unit": "ns" - }, - { - "name": "BM_Two", - "iterations": 1000, - "real_time": 8, - "cpu_time": 86, - "time_unit": "ns" - }, - { - "name": "short", - "run_type": "aggregate", - "iterations": 1000, - "real_time": 8, - "cpu_time": 80, - "time_unit": "ns" - }, - { - "name": "short", - "run_type": "aggregate", - "iterations": 1000, - "real_time": 8, - "cpu_time": 77, - "time_unit": "ns" - }, - { - "name": "medium", - "run_type": "iteration", - "iterations": 1000, - "real_time": 8, - "cpu_time": 80, - "time_unit": "ns" - }, - { - "name": "medium", - "run_type": "iteration", - "iterations": 1000, - "real_time": 9, - "cpu_time": 82, - "time_unit": "ns" - } - ] -} diff --git a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test3_run1.json b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test3_run1.json deleted file mode 100644 index acc5ba17aed1..000000000000 --- a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test3_run1.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "context": { - "date": "2016-08-02 17:44:46", - "num_cpus": 4, - "mhz_per_cpu": 4228, - "cpu_scaling_enabled": false, - "library_build_type": "release" - }, - "benchmarks": [ - { - "name": "BM_One", - "iterations": 1000, - "real_time": 9, - "cpu_time": 110, - "time_unit": "ns" - }, - { - "name": "BM_Two", - "run_type": "aggregate", - "iterations": 1000, - "real_time": 10, - "cpu_time": 89, - "time_unit": "ns" - }, - { - "name": "BM_Two", - "iterations": 1000, - "real_time": 7, - "cpu_time": 72, - "time_unit": "ns" - }, - { - "name": "short", - "run_type": "aggregate", - "iterations": 1000, - "real_time": 7, - "cpu_time": 75, - "time_unit": "ns" - }, - { - "name": "short", - "run_type": "aggregate", - "iterations": 762, - "real_time": 4.54, - "cpu_time": 66.6, - "time_unit": "ns" - }, - { - "name": "short", - "run_type": "iteration", - "iterations": 1000, - "real_time": 800, - "cpu_time": 1, - "time_unit": "ns" - }, - { - "name": "medium", - "run_type": "iteration", - "iterations": 1200, - "real_time": 5, - "cpu_time": 53, - "time_unit": "ns" - } - ] -} diff --git a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test4_run.json b/libcxx/utils/google-benchmark/tools/gbench/Inputs/test4_run.json deleted file mode 100644 index eaa005f3a9f4..000000000000 --- a/libcxx/utils/google-benchmark/tools/gbench/Inputs/test4_run.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "benchmarks": [ - { - "name": "99 family 0 instance 0 repetition 0", - "run_type": "iteration", - "family_index": 0, - "per_family_instance_index": 0, - "repetition_index": 0 - }, - { - "name": "98 family 0 instance 0 repetition 1", - "run_type": "iteration", - "family_index": 0, - "per_family_instance_index": 0, - "repetition_index": 1 - }, - { - "name": "97 family 0 instance 0 aggregate", - "run_type": "aggregate", - "family_index": 0, - "per_family_instance_index": 0, - "aggregate_name": "9 aggregate" - }, - - - { - "name": "96 family 0 instance 1 repetition 0", - "run_type": "iteration", - "family_index": 0, - "per_family_instance_index": 1, - "repetition_index": 0 - }, - { - "name": "95 family 0 instance 1 repetition 1", - "run_type": "iteration", - "family_index": 0, - "per_family_instance_index": 1, - "repetition_index": 1 - }, - { - "name": "94 family 0 instance 1 aggregate", - "run_type": "aggregate", - "family_index": 0, - "per_family_instance_index": 1, - "aggregate_name": "9 aggregate" - }, - - - - - { - "name": "93 family 1 instance 0 repetition 0", - "run_type": "iteration", - "family_index": 1, - "per_family_instance_index": 0, - "repetition_index": 0 - }, - { - "name": "92 family 1 instance 0 repetition 1", - "run_type": "iteration", - "family_index": 1, - "per_family_instance_index": 0, - "repetition_index": 1 - }, - { - "name": "91 family 1 instance 0 aggregate", - "run_type": "aggregate", - "family_index": 1, - "per_family_instance_index": 0, - "aggregate_name": "9 aggregate" - }, - - - { - "name": "90 family 1 instance 1 repetition 0", - "run_type": "iteration", - "family_index": 1, - "per_family_instance_index": 1, - "repetition_index": 0 - }, - { - "name": "89 family 1 instance 1 repetition 1", - "run_type": "iteration", - "family_index": 1, - "per_family_instance_index": 1, - "repetition_index": 1 - }, - { - "name": "88 family 1 instance 1 aggregate", - "run_type": "aggregate", - "family_index": 1, - "per_family_instance_index": 1, - "aggregate_name": "9 aggregate" - } - ] -} diff --git a/libcxx/utils/google-benchmark/tools/gbench/__init__.py b/libcxx/utils/google-benchmark/tools/gbench/__init__.py deleted file mode 100644 index fce1a1acfbb3..000000000000 --- a/libcxx/utils/google-benchmark/tools/gbench/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -"""Google Benchmark tooling""" - -__author__ = 'Eric Fiselier' -__email__ = 'eric@efcs.ca' -__versioninfo__ = (0, 5, 0) -__version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev' - -__all__ = [] diff --git a/libcxx/utils/google-benchmark/tools/gbench/report.py b/libcxx/utils/google-benchmark/tools/gbench/report.py deleted file mode 100644 index 6bea82f6bf7b..000000000000 --- a/libcxx/utils/google-benchmark/tools/gbench/report.py +++ /dev/null @@ -1,991 +0,0 @@ -"""report.py - Utilities for reporting statistics about benchmark results -""" - -import unittest -import os -import re -import copy -import random - -from scipy.stats import mannwhitneyu - - -class BenchmarkColor(object): - def __init__(self, name, code): - self.name = name - self.code = code - - def __repr__(self): - return '%s%r' % (self.__class__.__name__, - (self.name, self.code)) - - def __format__(self, format): - return self.code - - -# Benchmark Colors Enumeration -BC_NONE = BenchmarkColor('NONE', '') -BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m') -BC_CYAN = BenchmarkColor('CYAN', '\033[96m') -BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m') -BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m') -BC_HEADER = BenchmarkColor('HEADER', '\033[92m') -BC_WARNING = BenchmarkColor('WARNING', '\033[93m') -BC_WHITE = BenchmarkColor('WHITE', '\033[97m') -BC_FAIL = BenchmarkColor('FAIL', '\033[91m') -BC_ENDC = BenchmarkColor('ENDC', '\033[0m') -BC_BOLD = BenchmarkColor('BOLD', '\033[1m') -BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m') - -UTEST_MIN_REPETITIONS = 2 -UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better. -UTEST_COL_NAME = "_pvalue" - - -def color_format(use_color, fmt_str, *args, **kwargs): - """ - Return the result of 'fmt_str.format(*args, **kwargs)' after transforming - 'args' and 'kwargs' according to the value of 'use_color'. If 'use_color' - is False then all color codes in 'args' and 'kwargs' are replaced with - the empty string. - """ - assert use_color is True or use_color is False - if not use_color: - args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE - for arg in args] - kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE - for key, arg in kwargs.items()} - return fmt_str.format(*args, **kwargs) - - -def find_longest_name(benchmark_list): - """ - Return the length of the longest benchmark name in a given list of - benchmark JSON objects - """ - longest_name = 1 - for bc in benchmark_list: - if len(bc['name']) > longest_name: - longest_name = len(bc['name']) - return longest_name - - -def calculate_change(old_val, new_val): - """ - Return a float representing the decimal change between old_val and new_val. - """ - if old_val == 0 and new_val == 0: - return 0.0 - if old_val == 0: - return float(new_val - old_val) / (float(old_val + new_val) / 2) - return float(new_val - old_val) / abs(old_val) - - -def filter_benchmark(json_orig, family, replacement=""): - """ - Apply a filter to the json, and only leave the 'family' of benchmarks. - """ - regex = re.compile(family) - filtered = {} - filtered['benchmarks'] = [] - for be in json_orig['benchmarks']: - if not regex.search(be['name']): - continue - filteredbench = copy.deepcopy(be) # Do NOT modify the old name! - filteredbench['name'] = regex.sub(replacement, filteredbench['name']) - filtered['benchmarks'].append(filteredbench) - return filtered - - -def get_unique_benchmark_names(json): - """ - While *keeping* the order, give all the unique 'names' used for benchmarks. - """ - seen = set() - uniqued = [x['name'] for x in json['benchmarks'] - if x['name'] not in seen and - (seen.add(x['name']) or True)] - return uniqued - - -def intersect(list1, list2): - """ - Given two lists, get a new list consisting of the elements only contained - in *both of the input lists*, while preserving the ordering. - """ - return [x for x in list1 if x in list2] - - -def is_potentially_comparable_benchmark(x): - return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x) - - -def partition_benchmarks(json1, json2): - """ - While preserving the ordering, find benchmarks with the same names in - both of the inputs, and group them. - (i.e. partition/filter into groups with common name) - """ - json1_unique_names = get_unique_benchmark_names(json1) - json2_unique_names = get_unique_benchmark_names(json2) - names = intersect(json1_unique_names, json2_unique_names) - partitions = [] - for name in names: - time_unit = None - # Pick the time unit from the first entry of the lhs benchmark. - # We should be careful not to crash with unexpected input. - for x in json1['benchmarks']: - if (x['name'] == name and is_potentially_comparable_benchmark(x)): - time_unit = x['time_unit'] - break - if time_unit is None: - continue - # Filter by name and time unit. - # All the repetitions are assumed to be comparable. - lhs = [x for x in json1['benchmarks'] if x['name'] == name and - x['time_unit'] == time_unit] - rhs = [x for x in json2['benchmarks'] if x['name'] == name and - x['time_unit'] == time_unit] - partitions.append([lhs, rhs]) - return partitions - - -def extract_field(partition, field_name): - # The count of elements may be different. We want *all* of them. - lhs = [x[field_name] for x in partition[0]] - rhs = [x[field_name] for x in partition[1]] - return [lhs, rhs] - - -def calc_utest(timings_cpu, timings_time): - min_rep_cnt = min(len(timings_time[0]), - len(timings_time[1]), - len(timings_cpu[0]), - len(timings_cpu[1])) - - # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions? - if min_rep_cnt < UTEST_MIN_REPETITIONS: - return False, None, None - - time_pvalue = mannwhitneyu( - timings_time[0], timings_time[1], alternative='two-sided').pvalue - cpu_pvalue = mannwhitneyu( - timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue - - return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue - -def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True): - def get_utest_color(pval): - return BC_FAIL if pval >= utest_alpha else BC_OKGREEN - - # Check if we failed miserably with minimum required repetitions for utest - if not utest['have_optimal_repetitions'] and utest['cpu_pvalue'] is None and utest['time_pvalue'] is None: - return [] - - dsc = "U Test, Repetitions: {} vs {}".format( - utest['nr_of_repetitions'], utest['nr_of_repetitions_other']) - dsc_color = BC_OKGREEN - - # We still got some results to show but issue a warning about it. - if not utest['have_optimal_repetitions']: - dsc_color = BC_WARNING - dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format( - UTEST_OPTIMAL_REPETITIONS) - - special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}" - - return [color_format(use_color, - special_str, - BC_HEADER, - "{}{}".format(bc_name, UTEST_COL_NAME), - first_col_width, - get_utest_color( - utest['time_pvalue']), utest['time_pvalue'], - get_utest_color( - utest['cpu_pvalue']), utest['cpu_pvalue'], - dsc_color, dsc, - endc=BC_ENDC)] - - -def get_difference_report( - json1, - json2, - utest=False): - """ - Calculate and report the difference between each test of two benchmarks - runs specified as 'json1' and 'json2'. Output is another json containing - relevant details for each test run. - """ - assert utest is True or utest is False - - diff_report = [] - partitions = partition_benchmarks(json1, json2) - for partition in partitions: - benchmark_name = partition[0][0]['name'] - time_unit = partition[0][0]['time_unit'] - measurements = [] - utest_results = {} - # Careful, we may have different repetition count. - for i in range(min(len(partition[0]), len(partition[1]))): - bn = partition[0][i] - other_bench = partition[1][i] - measurements.append({ - 'real_time': bn['real_time'], - 'cpu_time': bn['cpu_time'], - 'real_time_other': other_bench['real_time'], - 'cpu_time_other': other_bench['cpu_time'], - 'time': calculate_change(bn['real_time'], other_bench['real_time']), - 'cpu': calculate_change(bn['cpu_time'], other_bench['cpu_time']) - }) - - # After processing the whole partition, if requested, do the U test. - if utest: - timings_cpu = extract_field(partition, 'cpu_time') - timings_time = extract_field(partition, 'real_time') - have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time) - if cpu_pvalue and time_pvalue: - utest_results = { - 'have_optimal_repetitions': have_optimal_repetitions, - 'cpu_pvalue': cpu_pvalue, - 'time_pvalue': time_pvalue, - 'nr_of_repetitions': len(timings_cpu[0]), - 'nr_of_repetitions_other': len(timings_cpu[1]) - } - - # Store only if we had any measurements for given benchmark. - # E.g. partition_benchmarks will filter out the benchmarks having - # time units which are not compatible with other time units in the - # benchmark suite. - if measurements: - run_type = partition[0][0]['run_type'] if 'run_type' in partition[0][0] else '' - aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else '' - diff_report.append({ - 'name': benchmark_name, - 'measurements': measurements, - 'time_unit': time_unit, - 'run_type': run_type, - 'aggregate_name': aggregate_name, - 'utest': utest_results - }) - - return diff_report - - -def print_difference_report( - json_diff_report, - include_aggregates_only=False, - utest=False, - utest_alpha=0.05, - use_color=True): - """ - Calculate and report the difference between each test of two benchmarks - runs specified as 'json1' and 'json2'. - """ - assert utest is True or utest is False - - def get_color(res): - if res > 0.05: - return BC_FAIL - elif res > -0.07: - return BC_WHITE - else: - return BC_CYAN - - first_col_width = find_longest_name(json_diff_report) - first_col_width = max( - first_col_width, - len('Benchmark')) - first_col_width += len(UTEST_COL_NAME) - first_line = "{:<{}s}Time CPU Time Old Time New CPU Old CPU New".format( - 'Benchmark', 12 + first_col_width) - output_strs = [first_line, '-' * len(first_line)] - - fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}" - for benchmark in json_diff_report: - # *If* we were asked to only include aggregates, - # and if it is non-aggregate, then don't print it. - if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate': - for measurement in benchmark['measurements']: - output_strs += [color_format(use_color, - fmt_str, - BC_HEADER, - benchmark['name'], - first_col_width, - get_color(measurement['time']), - measurement['time'], - get_color(measurement['cpu']), - measurement['cpu'], - measurement['real_time'], - measurement['real_time_other'], - measurement['cpu_time'], - measurement['cpu_time_other'], - endc=BC_ENDC)] - - # After processing the measurements, if requested and - # if applicable (e.g. u-test exists for given benchmark), - # print the U test. - if utest and benchmark['utest']: - output_strs += print_utest(benchmark['name'], - benchmark['utest'], - utest_alpha=utest_alpha, - first_col_width=first_col_width, - use_color=use_color) - - return output_strs - - -############################################################################### -# Unit tests - - -class TestGetUniqueBenchmarkNames(unittest.TestCase): - def load_results(self): - import json - testInputs = os.path.join( - os.path.dirname( - os.path.realpath(__file__)), - 'Inputs') - testOutput = os.path.join(testInputs, 'test3_run0.json') - with open(testOutput, 'r') as f: - json = json.load(f) - return json - - def test_basic(self): - expect_lines = [ - 'BM_One', - 'BM_Two', - 'short', # These two are not sorted - 'medium', # These two are not sorted - ] - json = self.load_results() - output_lines = get_unique_benchmark_names(json) - print("\n") - print("\n".join(output_lines)) - self.assertEqual(len(output_lines), len(expect_lines)) - for i in range(0, len(output_lines)): - self.assertEqual(expect_lines[i], output_lines[i]) - - -class TestReportDifference(unittest.TestCase): - @classmethod - def setUpClass(cls): - def load_results(): - import json - testInputs = os.path.join( - os.path.dirname( - os.path.realpath(__file__)), - 'Inputs') - testOutput1 = os.path.join(testInputs, 'test1_run1.json') - testOutput2 = os.path.join(testInputs, 'test1_run2.json') - with open(testOutput1, 'r') as f: - json1 = json.load(f) - with open(testOutput2, 'r') as f: - json2 = json.load(f) - return json1, json2 - - json1, json2 = load_results() - cls.json_diff_report = get_difference_report(json1, json2) - - def test_json_diff_report_pretty_printing(self): - expect_lines = [ - ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'], - ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'], - ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'], - ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'], - ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'], - ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'], - ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'], - ['BM_100xSlower', '+99.0000', '+99.0000', - '100', '10000', '100', '10000'], - ['BM_100xFaster', '-0.9900', '-0.9900', - '10000', '100', '10000', '100'], - ['BM_10PercentCPUToTime', '+0.1000', - '-0.1000', '100', '110', '100', '90'], - ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'], - ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'], - ] - output_lines_with_header = print_difference_report( - self.json_diff_report, use_color=False) - output_lines = output_lines_with_header[2:] - print("\n") - print("\n".join(output_lines_with_header)) - self.assertEqual(len(output_lines), len(expect_lines)) - for i in range(0, len(output_lines)): - parts = [x for x in output_lines[i].split(' ') if x] - self.assertEqual(len(parts), 7) - self.assertEqual(expect_lines[i], parts) - - def test_json_diff_report_output(self): - expected_output = [ - { - 'name': 'BM_SameTimes', - 'measurements': [{'time': 0.0000, 'cpu': 0.0000, 'real_time': 10, 'real_time_other': 10, 'cpu_time': 10, 'cpu_time_other': 10}], - 'time_unit': 'ns', - 'utest': {} - }, - { - 'name': 'BM_2xFaster', - 'measurements': [{'time': -0.5000, 'cpu': -0.5000, 'real_time': 50, 'real_time_other': 25, 'cpu_time': 50, 'cpu_time_other': 25}], - 'time_unit': 'ns', - 'utest': {} - }, - { - 'name': 'BM_2xSlower', - 'measurements': [{'time': 1.0000, 'cpu': 1.0000, 'real_time': 50, 'real_time_other': 100, 'cpu_time': 50, 'cpu_time_other': 100}], - 'time_unit': 'ns', - 'utest': {} - }, - { - 'name': 'BM_1PercentFaster', - 'measurements': [{'time': -0.0100, 'cpu': -0.0100, 'real_time': 100, 'real_time_other': 98.9999999, 'cpu_time': 100, 'cpu_time_other': 98.9999999}], - 'time_unit': 'ns', - 'utest': {} - }, - { - 'name': 'BM_1PercentSlower', - 'measurements': [{'time': 0.0100, 'cpu': 0.0100, 'real_time': 100, 'real_time_other': 101, 'cpu_time': 100, 'cpu_time_other': 101}], - 'time_unit': 'ns', - 'utest': {} - }, - { - 'name': 'BM_10PercentFaster', - 'measurements': [{'time': -0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 90, 'cpu_time': 100, 'cpu_time_other': 90}], - 'time_unit': 'ns', - 'utest': {} - }, - { - 'name': 'BM_10PercentSlower', - 'measurements': [{'time': 0.1000, 'cpu': 0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 110}], - 'time_unit': 'ns', - 'utest': {} - }, - { - 'name': 'BM_100xSlower', - 'measurements': [{'time': 99.0000, 'cpu': 99.0000, 'real_time': 100, 'real_time_other': 10000, 'cpu_time': 100, 'cpu_time_other': 10000}], - 'time_unit': 'ns', - 'utest': {} - }, - { - 'name': 'BM_100xFaster', - 'measurements': [{'time': -0.9900, 'cpu': -0.9900, 'real_time': 10000, 'real_time_other': 100, 'cpu_time': 10000, 'cpu_time_other': 100}], - 'time_unit': 'ns', - 'utest': {} - }, - { - 'name': 'BM_10PercentCPUToTime', - 'measurements': [{'time': 0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 90}], - 'time_unit': 'ns', - 'utest': {} - }, - { - 'name': 'BM_ThirdFaster', - 'measurements': [{'time': -0.3333, 'cpu': -0.3334, 'real_time': 100, 'real_time_other': 67, 'cpu_time': 100, 'cpu_time_other': 67}], - 'time_unit': 'ns', - 'utest': {} - }, - { - 'name': 'BM_NotBadTimeUnit', - 'measurements': [{'time': -0.9000, 'cpu': 0.2000, 'real_time': 0.4, 'real_time_other': 0.04, 'cpu_time': 0.5, 'cpu_time_other': 0.6}], - 'time_unit': 's', - 'utest': {} - }, - ] - self.assertEqual(len(self.json_diff_report), len(expected_output)) - for out, expected in zip( - self.json_diff_report, expected_output): - self.assertEqual(out['name'], expected['name']) - self.assertEqual(out['time_unit'], expected['time_unit']) - assert_utest(self, out, expected) - assert_measurements(self, out, expected) - - -class TestReportDifferenceBetweenFamilies(unittest.TestCase): - @classmethod - def setUpClass(cls): - def load_result(): - import json - testInputs = os.path.join( - os.path.dirname( - os.path.realpath(__file__)), - 'Inputs') - testOutput = os.path.join(testInputs, 'test2_run.json') - with open(testOutput, 'r') as f: - json = json.load(f) - return json - - json = load_result() - json1 = filter_benchmark(json, "BM_Z.ro", ".") - json2 = filter_benchmark(json, "BM_O.e", ".") - cls.json_diff_report = get_difference_report(json1, json2) - - def test_json_diff_report_pretty_printing(self): - expect_lines = [ - ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'], - ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'], - ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'], - ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'], - ] - output_lines_with_header = print_difference_report( - self.json_diff_report, use_color=False) - output_lines = output_lines_with_header[2:] - print("\n") - print("\n".join(output_lines_with_header)) - self.assertEqual(len(output_lines), len(expect_lines)) - for i in range(0, len(output_lines)): - parts = [x for x in output_lines[i].split(' ') if x] - self.assertEqual(len(parts), 7) - self.assertEqual(expect_lines[i], parts) - - def test_json_diff_report(self): - expected_output = [ - { - 'name': u'.', - 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 10, 'real_time_other': 5, 'cpu_time': 10, 'cpu_time_other': 5}], - 'time_unit': 'ns', - 'utest': {} - }, - { - 'name': u'./4', - 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 40, 'real_time_other': 20, 'cpu_time': 40, 'cpu_time_other': 20}], - 'time_unit': 'ns', - 'utest': {}, - }, - { - 'name': u'Prefix/.', - 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 20, 'real_time_other': 10, 'cpu_time': 20, 'cpu_time_other': 10}], - 'time_unit': 'ns', - 'utest': {} - }, - { - 'name': u'Prefix/./3', - 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}], - 'time_unit': 'ns', - 'utest': {} - } - ] - self.assertEqual(len(self.json_diff_report), len(expected_output)) - for out, expected in zip( - self.json_diff_report, expected_output): - self.assertEqual(out['name'], expected['name']) - self.assertEqual(out['time_unit'], expected['time_unit']) - assert_utest(self, out, expected) - assert_measurements(self, out, expected) - - -class TestReportDifferenceWithUTest(unittest.TestCase): - @classmethod - def setUpClass(cls): - def load_results(): - import json - testInputs = os.path.join( - os.path.dirname( - os.path.realpath(__file__)), - 'Inputs') - testOutput1 = os.path.join(testInputs, 'test3_run0.json') - testOutput2 = os.path.join(testInputs, 'test3_run1.json') - with open(testOutput1, 'r') as f: - json1 = json.load(f) - with open(testOutput2, 'r') as f: - json2 = json.load(f) - return json1, json2 - - json1, json2 = load_results() - cls.json_diff_report = get_difference_report( - json1, json2, utest=True) - - def test_json_diff_report_pretty_printing(self): - expect_lines = [ - ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], - ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], - ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], - ['BM_Two_pvalue', - '0.6985', - '0.6985', - 'U', - 'Test,', - 'Repetitions:', - '2', - 'vs', - '2.', - 'WARNING:', - 'Results', - 'unreliable!', - '9+', - 'repetitions', - 'recommended.'], - ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], - ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], - ['short_pvalue', - '0.7671', - '0.1489', - 'U', - 'Test,', - 'Repetitions:', - '2', - 'vs', - '3.', - 'WARNING:', - 'Results', - 'unreliable!', - '9+', - 'repetitions', - 'recommended.'], - ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], - ] - output_lines_with_header = print_difference_report( - self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False) - output_lines = output_lines_with_header[2:] - print("\n") - print("\n".join(output_lines_with_header)) - self.assertEqual(len(output_lines), len(expect_lines)) - for i in range(0, len(output_lines)): - parts = [x for x in output_lines[i].split(' ') if x] - self.assertEqual(expect_lines[i], parts) - - def test_json_diff_report_pretty_printing_aggregates_only(self): - expect_lines = [ - ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], - ['BM_Two_pvalue', - '0.6985', - '0.6985', - 'U', - 'Test,', - 'Repetitions:', - '2', - 'vs', - '2.', - 'WARNING:', - 'Results', - 'unreliable!', - '9+', - 'repetitions', - 'recommended.'], - ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], - ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], - ['short_pvalue', - '0.7671', - '0.1489', - 'U', - 'Test,', - 'Repetitions:', - '2', - 'vs', - '3.', - 'WARNING:', - 'Results', - 'unreliable!', - '9+', - 'repetitions', - 'recommended.'], - ] - output_lines_with_header = print_difference_report( - self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False) - output_lines = output_lines_with_header[2:] - print("\n") - print("\n".join(output_lines_with_header)) - self.assertEqual(len(output_lines), len(expect_lines)) - for i in range(0, len(output_lines)): - parts = [x for x in output_lines[i].split(' ') if x] - self.assertEqual(expect_lines[i], parts) - - def test_json_diff_report(self): - expected_output = [ - { - 'name': u'BM_One', - 'measurements': [ - {'time': -0.1, - 'cpu': 0.1, - 'real_time': 10, - 'real_time_other': 9, - 'cpu_time': 100, - 'cpu_time_other': 110} - ], - 'time_unit': 'ns', - 'utest': {} - }, - { - 'name': u'BM_Two', - 'measurements': [ - {'time': 0.1111111111111111, - 'cpu': -0.011111111111111112, - 'real_time': 9, - 'real_time_other': 10, - 'cpu_time': 90, - 'cpu_time_other': 89}, - {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8, - 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72} - ], - 'time_unit': 'ns', - 'utest': { - 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387 - } - }, - { - 'name': u'short', - 'measurements': [ - {'time': -0.125, - 'cpu': -0.0625, - 'real_time': 8, - 'real_time_other': 7, - 'cpu_time': 80, - 'cpu_time_other': 75}, - {'time': -0.4325, - 'cpu': -0.13506493506493514, - 'real_time': 8, - 'real_time_other': 4.54, - 'cpu_time': 77, - 'cpu_time_other': 66.6} - ], - 'time_unit': 'ns', - 'utest': { - 'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772 - } - }, - { - 'name': u'medium', - 'measurements': [ - {'time': -0.375, - 'cpu': -0.3375, - 'real_time': 8, - 'real_time_other': 5, - 'cpu_time': 80, - 'cpu_time_other': 53} - ], - 'time_unit': 'ns', - 'utest': {} - } - ] - self.assertEqual(len(self.json_diff_report), len(expected_output)) - for out, expected in zip( - self.json_diff_report, expected_output): - self.assertEqual(out['name'], expected['name']) - self.assertEqual(out['time_unit'], expected['time_unit']) - assert_utest(self, out, expected) - assert_measurements(self, out, expected) - - -class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( - unittest.TestCase): - @classmethod - def setUpClass(cls): - def load_results(): - import json - testInputs = os.path.join( - os.path.dirname( - os.path.realpath(__file__)), - 'Inputs') - testOutput1 = os.path.join(testInputs, 'test3_run0.json') - testOutput2 = os.path.join(testInputs, 'test3_run1.json') - with open(testOutput1, 'r') as f: - json1 = json.load(f) - with open(testOutput2, 'r') as f: - json2 = json.load(f) - return json1, json2 - - json1, json2 = load_results() - cls.json_diff_report = get_difference_report( - json1, json2, utest=True) - - def test_json_diff_report_pretty_printing(self): - expect_lines = [ - ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], - ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], - ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], - ['BM_Two_pvalue', - '0.6985', - '0.6985', - 'U', - 'Test,', - 'Repetitions:', - '2', - 'vs', - '2.', - 'WARNING:', - 'Results', - 'unreliable!', - '9+', - 'repetitions', - 'recommended.'], - ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], - ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], - ['short_pvalue', - '0.7671', - '0.1489', - 'U', - 'Test,', - 'Repetitions:', - '2', - 'vs', - '3.', - 'WARNING:', - 'Results', - 'unreliable!', - '9+', - 'repetitions', - 'recommended.'], - ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'] - ] - output_lines_with_header = print_difference_report( - self.json_diff_report, - utest=True, utest_alpha=0.05, use_color=False) - output_lines = output_lines_with_header[2:] - print("\n") - print("\n".join(output_lines_with_header)) - self.assertEqual(len(output_lines), len(expect_lines)) - for i in range(0, len(output_lines)): - parts = [x for x in output_lines[i].split(' ') if x] - self.assertEqual(expect_lines[i], parts) - - def test_json_diff_report(self): - expected_output = [ - { - 'name': u'BM_One', - 'measurements': [ - {'time': -0.1, - 'cpu': 0.1, - 'real_time': 10, - 'real_time_other': 9, - 'cpu_time': 100, - 'cpu_time_other': 110} - ], - 'time_unit': 'ns', - 'utest': {} - }, - { - 'name': u'BM_Two', - 'measurements': [ - {'time': 0.1111111111111111, - 'cpu': -0.011111111111111112, - 'real_time': 9, - 'real_time_other': 10, - 'cpu_time': 90, - 'cpu_time_other': 89}, - {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8, - 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72} - ], - 'time_unit': 'ns', - 'utest': { - 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387 - } - }, - { - 'name': u'short', - 'measurements': [ - {'time': -0.125, - 'cpu': -0.0625, - 'real_time': 8, - 'real_time_other': 7, - 'cpu_time': 80, - 'cpu_time_other': 75}, - {'time': -0.4325, - 'cpu': -0.13506493506493514, - 'real_time': 8, - 'real_time_other': 4.54, - 'cpu_time': 77, - 'cpu_time_other': 66.6} - ], - 'time_unit': 'ns', - 'utest': { - 'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772 - } - }, - { - 'name': u'medium', - 'measurements': [ - {'real_time_other': 5, - 'cpu_time': 80, - 'time': -0.375, - 'real_time': 8, - 'cpu_time_other': 53, - 'cpu': -0.3375 - } - ], - 'utest': {}, - 'time_unit': u'ns', - 'aggregate_name': '' - } - ] - self.assertEqual(len(self.json_diff_report), len(expected_output)) - for out, expected in zip( - self.json_diff_report, expected_output): - self.assertEqual(out['name'], expected['name']) - self.assertEqual(out['time_unit'], expected['time_unit']) - assert_utest(self, out, expected) - assert_measurements(self, out, expected) - - -class TestReportSorting(unittest.TestCase): - @classmethod - def setUpClass(cls): - def load_result(): - import json - testInputs = os.path.join( - os.path.dirname( - os.path.realpath(__file__)), - 'Inputs') - testOutput = os.path.join(testInputs, 'test4_run.json') - with open(testOutput, 'r') as f: - json = json.load(f) - return json - - cls.json = load_result() - - def test_json_diff_report_pretty_printing(self): - import util - - expected_names = [ - "99 family 0 instance 0 repetition 0", - "98 family 0 instance 0 repetition 1", - "97 family 0 instance 0 aggregate", - "96 family 0 instance 1 repetition 0", - "95 family 0 instance 1 repetition 1", - "94 family 0 instance 1 aggregate", - "93 family 1 instance 0 repetition 0", - "92 family 1 instance 0 repetition 1", - "91 family 1 instance 0 aggregate", - "90 family 1 instance 1 repetition 0", - "89 family 1 instance 1 repetition 1", - "88 family 1 instance 1 aggregate" - ] - - for n in range(len(self.json['benchmarks']) ** 2): - random.shuffle(self.json['benchmarks']) - sorted_benchmarks = util.sort_benchmark_results(self.json)[ - 'benchmarks'] - self.assertEqual(len(expected_names), len(sorted_benchmarks)) - for out, expected in zip(sorted_benchmarks, expected_names): - self.assertEqual(out['name'], expected) - - -def assert_utest(unittest_instance, lhs, rhs): - if lhs['utest']: - unittest_instance.assertAlmostEqual( - lhs['utest']['cpu_pvalue'], - rhs['utest']['cpu_pvalue']) - unittest_instance.assertAlmostEqual( - lhs['utest']['time_pvalue'], - rhs['utest']['time_pvalue']) - unittest_instance.assertEqual( - lhs['utest']['have_optimal_repetitions'], - rhs['utest']['have_optimal_repetitions']) - else: - # lhs is empty. assert if rhs is not. - unittest_instance.assertEqual(lhs['utest'], rhs['utest']) - - -def assert_measurements(unittest_instance, lhs, rhs): - for m1, m2 in zip(lhs['measurements'], rhs['measurements']): - unittest_instance.assertEqual(m1['real_time'], m2['real_time']) - unittest_instance.assertEqual(m1['cpu_time'], m2['cpu_time']) - # m1['time'] and m1['cpu'] hold values which are being calculated, - # and therefore we must use almost-equal pattern. - unittest_instance.assertAlmostEqual(m1['time'], m2['time'], places=4) - unittest_instance.assertAlmostEqual(m1['cpu'], m2['cpu'], places=4) - - -if __name__ == '__main__': - unittest.main() - -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 -# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; -# kate: indent-mode python; remove-trailing-spaces modified; diff --git a/libcxx/utils/google-benchmark/tools/gbench/util.py b/libcxx/utils/google-benchmark/tools/gbench/util.py deleted file mode 100644 index 5d0012c0cb1c..000000000000 --- a/libcxx/utils/google-benchmark/tools/gbench/util.py +++ /dev/null @@ -1,181 +0,0 @@ -"""util.py - General utilities for running, loading, and processing benchmarks -""" -import json -import os -import tempfile -import subprocess -import sys -import functools - -# Input file type enumeration -IT_Invalid = 0 -IT_JSON = 1 -IT_Executable = 2 - -_num_magic_bytes = 2 if sys.platform.startswith('win') else 4 - - -def is_executable_file(filename): - """ - Return 'True' if 'filename' names a valid file which is likely - an executable. A file is considered an executable if it starts with the - magic bytes for a EXE, Mach O, or ELF file. - """ - if not os.path.isfile(filename): - return False - with open(filename, mode='rb') as f: - magic_bytes = f.read(_num_magic_bytes) - if sys.platform == 'darwin': - return magic_bytes in [ - b'\xfe\xed\xfa\xce', # MH_MAGIC - b'\xce\xfa\xed\xfe', # MH_CIGAM - b'\xfe\xed\xfa\xcf', # MH_MAGIC_64 - b'\xcf\xfa\xed\xfe', # MH_CIGAM_64 - b'\xca\xfe\xba\xbe', # FAT_MAGIC - b'\xbe\xba\xfe\xca' # FAT_CIGAM - ] - elif sys.platform.startswith('win'): - return magic_bytes == b'MZ' - else: - return magic_bytes == b'\x7FELF' - - -def is_json_file(filename): - """ - Returns 'True' if 'filename' names a valid JSON output file. - 'False' otherwise. - """ - try: - with open(filename, 'r') as f: - json.load(f) - return True - except BaseException: - pass - return False - - -def classify_input_file(filename): - """ - Return a tuple (type, msg) where 'type' specifies the classified type - of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable - string represeting the error. - """ - ftype = IT_Invalid - err_msg = None - if not os.path.exists(filename): - err_msg = "'%s' does not exist" % filename - elif not os.path.isfile(filename): - err_msg = "'%s' does not name a file" % filename - elif is_executable_file(filename): - ftype = IT_Executable - elif is_json_file(filename): - ftype = IT_JSON - else: - err_msg = "'%s' does not name a valid benchmark executable or JSON file" % filename - return ftype, err_msg - - -def check_input_file(filename): - """ - Classify the file named by 'filename' and return the classification. - If the file is classified as 'IT_Invalid' print an error message and exit - the program. - """ - ftype, msg = classify_input_file(filename) - if ftype == IT_Invalid: - print("Invalid input file: %s" % msg) - sys.exit(1) - return ftype - - -def find_benchmark_flag(prefix, benchmark_flags): - """ - Search the specified list of flags for a flag matching `` and - if it is found return the arg it specifies. If specified more than once the - last value is returned. If the flag is not found None is returned. - """ - assert prefix.startswith('--') and prefix.endswith('=') - result = None - for f in benchmark_flags: - if f.startswith(prefix): - result = f[len(prefix):] - return result - - -def remove_benchmark_flags(prefix, benchmark_flags): - """ - Return a new list containing the specified benchmark_flags except those - with the specified prefix. - """ - assert prefix.startswith('--') and prefix.endswith('=') - return [f for f in benchmark_flags if not f.startswith(prefix)] - - -def load_benchmark_results(fname): - """ - Read benchmark output from a file and return the JSON object. - REQUIRES: 'fname' names a file containing JSON benchmark output. - """ - with open(fname, 'r') as f: - return json.load(f) - - -def sort_benchmark_results(result): - benchmarks = result['benchmarks'] - - # From inner key to the outer key! - benchmarks = sorted( - benchmarks, key=lambda benchmark: benchmark['repetition_index'] if 'repetition_index' in benchmark else -1) - benchmarks = sorted( - benchmarks, key=lambda benchmark: 1 if 'run_type' in benchmark and benchmark['run_type'] == "aggregate" else 0) - benchmarks = sorted( - benchmarks, key=lambda benchmark: benchmark['per_family_instance_index'] if 'per_family_instance_index' in benchmark else -1) - benchmarks = sorted( - benchmarks, key=lambda benchmark: benchmark['family_index'] if 'family_index' in benchmark else -1) - - result['benchmarks'] = benchmarks - return result - - -def run_benchmark(exe_name, benchmark_flags): - """ - Run a benchmark specified by 'exe_name' with the specified - 'benchmark_flags'. The benchmark is run directly as a subprocess to preserve - real time console output. - RETURNS: A JSON object representing the benchmark output - """ - output_name = find_benchmark_flag('--benchmark_out=', - benchmark_flags) - is_temp_output = False - if output_name is None: - is_temp_output = True - thandle, output_name = tempfile.mkstemp() - os.close(thandle) - benchmark_flags = list(benchmark_flags) + \ - ['--benchmark_out=%s' % output_name] - - cmd = [exe_name] + benchmark_flags - print("RUNNING: %s" % ' '.join(cmd)) - exitCode = subprocess.call(cmd) - if exitCode != 0: - print('TEST FAILED...') - sys.exit(exitCode) - json_res = load_benchmark_results(output_name) - if is_temp_output: - os.unlink(output_name) - return json_res - - -def run_or_load_benchmark(filename, benchmark_flags): - """ - Get the results for a specified benchmark. If 'filename' specifies - an executable benchmark then the results are generated by running the - benchmark. Otherwise 'filename' must name a valid JSON output file, - which is loaded and the result returned. - """ - ftype = check_input_file(filename) - if ftype == IT_JSON: - return load_benchmark_results(filename) - if ftype == IT_Executable: - return run_benchmark(filename, benchmark_flags) - raise ValueError('Unknown file type %s' % ftype) diff --git a/libcxx/utils/google-benchmark/tools/requirements.txt b/libcxx/utils/google-benchmark/tools/requirements.txt deleted file mode 100644 index 3b3331b5af12..000000000000 --- a/libcxx/utils/google-benchmark/tools/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -scipy>=1.5.0 \ No newline at end of file diff --git a/libcxx/utils/google-benchmark/tools/strip_asm.py b/libcxx/utils/google-benchmark/tools/strip_asm.py deleted file mode 100755 index 9030550b43be..000000000000 --- a/libcxx/utils/google-benchmark/tools/strip_asm.py +++ /dev/null @@ -1,151 +0,0 @@ -#!/usr/bin/env python - -""" -strip_asm.py - Cleanup ASM output for the specified file -""" - -from argparse import ArgumentParser -import sys -import os -import re - -def find_used_labels(asm): - found = set() - label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)") - for l in asm.splitlines(): - m = label_re.match(l) - if m: - found.add('.L%s' % m.group(1)) - return found - - -def normalize_labels(asm): - decls = set() - label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") - for l in asm.splitlines(): - m = label_decl.match(l) - if m: - decls.add(m.group(0)) - if len(decls) == 0: - return asm - needs_dot = next(iter(decls))[0] != '.' - if not needs_dot: - return asm - for ld in decls: - asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm) - return asm - - -def transform_labels(asm): - asm = normalize_labels(asm) - used_decls = find_used_labels(asm) - new_asm = '' - label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") - for l in asm.splitlines(): - m = label_decl.match(l) - if not m or m.group(0) in used_decls: - new_asm += l - new_asm += '\n' - return new_asm - - -def is_identifier(tk): - if len(tk) == 0: - return False - first = tk[0] - if not first.isalpha() and first != '_': - return False - for i in range(1, len(tk)): - c = tk[i] - if not c.isalnum() and c != '_': - return False - return True - -def process_identifiers(l): - """ - process_identifiers - process all identifiers and modify them to have - consistent names across all platforms; specifically across ELF and MachO. - For example, MachO inserts an additional understore at the beginning of - names. This function removes that. - """ - parts = re.split(r'([a-zA-Z0-9_]+)', l) - new_line = '' - for tk in parts: - if is_identifier(tk): - if tk.startswith('__Z'): - tk = tk[1:] - elif tk.startswith('_') and len(tk) > 1 and \ - tk[1].isalpha() and tk[1] != 'Z': - tk = tk[1:] - new_line += tk - return new_line - - -def process_asm(asm): - """ - Strip the ASM of unwanted directives and lines - """ - new_contents = '' - asm = transform_labels(asm) - - # TODO: Add more things we want to remove - discard_regexes = [ - re.compile("\s+\..*$"), # directive - re.compile("\s*#(NO_APP|APP)$"), #inline ASM - re.compile("\s*#.*$"), # comment line - re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive - re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"), - ] - keep_regexes = [ - - ] - fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:") - for l in asm.splitlines(): - # Remove Mach-O attribute - l = l.replace('@GOTPCREL', '') - add_line = True - for reg in discard_regexes: - if reg.match(l) is not None: - add_line = False - break - for reg in keep_regexes: - if reg.match(l) is not None: - add_line = True - break - if add_line: - if fn_label_def.match(l) and len(new_contents) != 0: - new_contents += '\n' - l = process_identifiers(l) - new_contents += l - new_contents += '\n' - return new_contents - -def main(): - parser = ArgumentParser( - description='generate a stripped assembly file') - parser.add_argument( - 'input', metavar='input', type=str, nargs=1, - help='An input assembly file') - parser.add_argument( - 'out', metavar='output', type=str, nargs=1, - help='The output file') - args, unknown_args = parser.parse_known_args() - input = args.input[0] - output = args.out[0] - if not os.path.isfile(input): - print(("ERROR: input file '%s' does not exist") % input) - sys.exit(1) - contents = None - with open(input, 'r') as f: - contents = f.read() - new_contents = process_asm(contents) - with open(output, 'w') as f: - f.write(new_contents) - - -if __name__ == '__main__': - main() - -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 -# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; -# kate: indent-mode python; remove-trailing-spaces modified; diff --git a/lld/include/lld/ReaderWriter/MachOLinkingContext.h b/lld/include/lld/ReaderWriter/MachOLinkingContext.h deleted file mode 100644 index 974f323bc612..000000000000 --- a/lld/include/lld/ReaderWriter/MachOLinkingContext.h +++ /dev/null @@ -1,505 +0,0 @@ -//===- lld/ReaderWriter/MachOLinkingContext.h -----------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_READER_WRITER_MACHO_LINKING_CONTEXT_H -#define LLD_READER_WRITER_MACHO_LINKING_CONTEXT_H - -#include "lld/Core/LinkingContext.h" -#include "lld/Core/Reader.h" -#include "lld/Core/Writer.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Support/ErrorHandling.h" -#include - -using llvm::MachO::HeaderFileType; - -namespace lld { - -namespace mach_o { -class ArchHandler; -class MachODylibFile; -class MachOFile; -class SectCreateFile; -} - -class MachOLinkingContext : public LinkingContext { -public: - MachOLinkingContext(); - ~MachOLinkingContext() override; - - enum Arch { - arch_unknown, - arch_ppc, - arch_x86, - arch_x86_64, - arch_armv6, - arch_armv7, - arch_armv7s, - arch_arm64, - }; - - enum class OS { - unknown, - macOSX, - iOS, - iOS_simulator - }; - - enum class ExportMode { - globals, // Default, all global symbols exported. - exported, // -exported_symbol[s_list], only listed symbols exported. - unexported // -unexported_symbol[s_list], no listed symbol exported. - }; - - enum class DebugInfoMode { - addDebugMap, // Default - noDebugMap // -S option - }; - - enum class UndefinedMode { - error, - warning, - suppress, - dynamicLookup - }; - - enum ObjCConstraint { - objc_unknown = 0, - objc_supports_gc = 2, - objc_gc_only = 4, - // Image optimized by dyld = 8 - // GC compaction = 16 - objc_retainReleaseForSimulator = 32, - objc_retainRelease - }; - - /// Initializes the context to sane default values given the specified output - /// file type, arch, os, and minimum os version. This should be called before - /// other setXXX() methods. - void configure(HeaderFileType type, Arch arch, OS os, uint32_t minOSVersion, - bool exportDynamicSymbols); - - void addPasses(PassManager &pm) override; - bool validateImpl() override; - std::string demangle(StringRef symbolName) const override; - - void createImplicitFiles(std::vector> &) override; - - /// Creates a new file which is owned by the context. Returns a pointer to - /// the new file. - template - typename std::enable_if::value, T *>::type - make_file(Args &&... args) const { - auto file = std::unique_ptr(new T(std::forward(args)...)); - auto *filePtr = file.get(); - auto *ctx = const_cast(this); - ctx->getNodes().push_back(std::make_unique(std::move(file))); - return filePtr; - } - - uint32_t getCPUType() const; - uint32_t getCPUSubType() const; - - bool addEntryPointLoadCommand() const; - bool addUnixThreadLoadCommand() const; - bool outputTypeHasEntry() const; - bool is64Bit() const; - - virtual uint64_t pageZeroSize() const { return _pageZeroSize; } - virtual uint64_t pageSize() const { return _pageSize; } - - mach_o::ArchHandler &archHandler() const; - - HeaderFileType outputMachOType() const { return _outputMachOType; } - - Arch arch() const { return _arch; } - StringRef archName() const { return nameFromArch(_arch); } - OS os() const { return _os; } - - ExportMode exportMode() const { return _exportMode; } - void setExportMode(ExportMode mode) { _exportMode = mode; } - void addExportSymbol(StringRef sym); - bool exportRestrictMode() const { return _exportMode != ExportMode::globals; } - bool exportSymbolNamed(StringRef sym) const; - - DebugInfoMode debugInfoMode() const { return _debugInfoMode; } - void setDebugInfoMode(DebugInfoMode mode) { - _debugInfoMode = mode; - } - - void appendOrderedSymbol(StringRef symbol, StringRef filename); - - bool keepPrivateExterns() const { return _keepPrivateExterns; } - void setKeepPrivateExterns(bool v) { _keepPrivateExterns = v; } - bool demangleSymbols() const { return _demangle; } - void setDemangleSymbols(bool d) { _demangle = d; } - bool mergeObjCCategories() const { return _mergeObjCCategories; } - void setMergeObjCCategories(bool v) { _mergeObjCCategories = v; } - /// Create file at specified path which will contain a binary encoding - /// of all input and output file paths. - std::error_code createDependencyFile(StringRef path); - void addInputFileDependency(StringRef path) const; - void addInputFileNotFound(StringRef path) const; - void addOutputFileDependency(StringRef path) const; - - bool minOS(StringRef mac, StringRef iOS) const; - void setDoNothing(bool value) { _doNothing = value; } - bool doNothing() const { return _doNothing; } - bool printAtoms() const { return _printAtoms; } - bool testingFileUsage() const { return _testingFileUsage; } - const StringRefVector &searchDirs() const { return _searchDirs; } - const StringRefVector &frameworkDirs() const { return _frameworkDirs; } - void setSysLibRoots(const StringRefVector &paths); - const StringRefVector &sysLibRoots() const { return _syslibRoots; } - bool PIE() const { return _pie; } - void setPIE(bool pie) { _pie = pie; } - bool generateVersionLoadCommand() const { - return _generateVersionLoadCommand; - } - void setGenerateVersionLoadCommand(bool v) { - _generateVersionLoadCommand = v; - } - - bool generateFunctionStartsLoadCommand() const { - return _generateFunctionStartsLoadCommand; - } - void setGenerateFunctionStartsLoadCommand(bool v) { - _generateFunctionStartsLoadCommand = v; - } - - bool generateDataInCodeLoadCommand() const { - return _generateDataInCodeLoadCommand; - } - void setGenerateDataInCodeLoadCommand(bool v) { - _generateDataInCodeLoadCommand = v; - } - - uint64_t stackSize() const { return _stackSize; } - void setStackSize(uint64_t stackSize) { _stackSize = stackSize; } - - uint64_t baseAddress() const { return _baseAddress; } - void setBaseAddress(uint64_t baseAddress) { _baseAddress = baseAddress; } - - ObjCConstraint objcConstraint() const { return _objcConstraint; } - - uint32_t osMinVersion() const { return _osMinVersion; } - - uint32_t sdkVersion() const { return _sdkVersion; } - void setSdkVersion(uint64_t v) { _sdkVersion = v; } - - uint64_t sourceVersion() const { return _sourceVersion; } - void setSourceVersion(uint64_t v) { _sourceVersion = v; } - - uint32_t swiftVersion() const { return _swiftVersion; } - - /// Checks whether a given path on the filesystem exists. - /// - /// When running in -test_file_usage mode, this method consults an - /// internally maintained list of files that exist (provided by -path_exists) - /// instead of the actual filesystem. - bool pathExists(StringRef path) const; - - /// Like pathExists() but only used on files - not directories. - bool fileExists(StringRef path) const; - - /// Adds any library search paths derived from the given base, possibly - /// modified by -syslibroots. - /// - /// The set of paths added consists of approximately all syslibroot-prepended - /// versions of libPath that exist, or the original libPath if there are none - /// for whatever reason. With various edge-cases for compatibility. - void addModifiedSearchDir(StringRef libPath, bool isSystemPath = false); - - /// Determine whether -lFoo can be resolve within the given path, and - /// return the filename if so. - /// - /// The -lFoo option is documented to search for libFoo.dylib and libFoo.a in - /// that order, unless Foo ends in ".o", in which case only the exact file - /// matches (e.g. -lfoo.o would only find foo.o). - llvm::Optional searchDirForLibrary(StringRef path, - StringRef libName) const; - - /// Iterates through all search path entries looking for libName (as - /// specified by -lFoo). - llvm::Optional searchLibrary(StringRef libName) const; - - /// Add a framework search path. Internally, this method may be prepended - /// the path with syslibroot. - void addFrameworkSearchDir(StringRef fwPath, bool isSystemPath = false); - - /// Iterates through all framework directories looking for - /// Foo.framework/Foo (when fwName = "Foo"). - llvm::Optional findPathForFramework(StringRef fwName) const; - - /// The dylib's binary compatibility version, in the raw uint32 format. - /// - /// When building a dynamic library, this is the compatibility version that - /// gets embedded into the result. Other Mach-O binaries that link against - /// this library will store the compatibility version in its load command. At - /// runtime, the loader will verify that the binary is compatible with the - /// installed dynamic library. - uint32_t compatibilityVersion() const { return _compatibilityVersion; } - - /// The dylib's current version, in the raw uint32 format. - /// - /// When building a dynamic library, this is the current version that gets - /// embedded into the result. Other Mach-O binaries that link against - /// this library will store the compatibility version in its load command. - uint32_t currentVersion() const { return _currentVersion; } - - /// The dylib's install name. - /// - /// Binaries that link against the dylib will embed this path into the dylib - /// load command. When loading the binaries at runtime, this is the location - /// on disk that the loader will look for the dylib. - StringRef installName() const { return _installName; } - - /// Whether or not the dylib has side effects during initialization. - /// - /// Dylibs marked as being dead strippable provide the guarantee that loading - /// the dylib has no side effects, allowing the linker to strip out the dylib - /// when linking a binary that does not use any of its symbols. - bool deadStrippableDylib() const { return _deadStrippableDylib; } - - /// Whether or not to use flat namespace. - /// - /// MachO usually uses a two-level namespace, where each external symbol - /// referenced by the target is associated with the dylib that will provide - /// the symbol's definition at runtime. Using flat namespace overrides this - /// behavior: the linker searches all dylibs on the command line and all - /// dylibs those original dylibs depend on, but does not record which dylib - /// an external symbol came from. At runtime dyld again searches all images - /// and uses the first definition it finds. In addition, any undefines in - /// loaded flat_namespace dylibs must be resolvable at build time. - bool useFlatNamespace() const { return _flatNamespace; } - - /// How to handle undefined symbols. - /// - /// Options are: - /// * error: Report an error and terminate linking. - /// * warning: Report a warning, but continue linking. - /// * suppress: Ignore and continue linking. - /// * dynamic_lookup: For use with -twolevel namespace: Records source dylibs - /// for symbols that are defined in a linked dylib at static link time. - /// Undefined symbols are handled by searching all loaded images at - /// runtime. - UndefinedMode undefinedMode() const { return _undefinedMode; } - - /// The path to the executable that will load the bundle at runtime. - /// - /// When building a Mach-O bundle, this executable will be examined if there - /// are undefined symbols after the main link phase. It is expected that this - /// binary will be loading the bundle at runtime and will provide the symbols - /// at that point. - StringRef bundleLoader() const { return _bundleLoader; } - - void setCompatibilityVersion(uint32_t vers) { _compatibilityVersion = vers; } - void setCurrentVersion(uint32_t vers) { _currentVersion = vers; } - void setInstallName(StringRef name) { _installName = name; } - void setDeadStrippableDylib(bool deadStrippable) { - _deadStrippableDylib = deadStrippable; - } - void setUseFlatNamespace(bool flatNamespace) { - _flatNamespace = flatNamespace; - } - - void setUndefinedMode(UndefinedMode undefinedMode) { - _undefinedMode = undefinedMode; - } - - void setBundleLoader(StringRef loader) { _bundleLoader = loader; } - void setPrintAtoms(bool value=true) { _printAtoms = value; } - void setTestingFileUsage(bool value = true) { - _testingFileUsage = value; - } - void addExistingPathForDebug(StringRef path) { - _existingPaths.insert(path); - } - - void addRpath(StringRef rpath); - const StringRefVector &rpaths() const { return _rpaths; } - - /// Add section alignment constraint on final layout. - void addSectionAlignment(StringRef seg, StringRef sect, uint16_t align); - - /// Add a section based on a command-line sectcreate option. - void addSectCreateSection(StringRef seg, StringRef sect, - std::unique_ptr content); - - /// Returns true if specified section had alignment constraints. - bool sectionAligned(StringRef seg, StringRef sect, uint16_t &align) const; - - StringRef dyldPath() const { return "/usr/lib/dyld"; } - - /// Stub creation Pass should be run. - bool needsStubsPass() const; - - // GOT creation Pass should be run. - bool needsGOTPass() const; - - /// Pass to add TLV sections. - bool needsTLVPass() const; - - /// Pass to transform __compact_unwind into __unwind_info should be run. - bool needsCompactUnwindPass() const; - - /// Pass to add shims switching between thumb and arm mode. - bool needsShimPass() const; - - /// Pass to add objc image info and optimized objc data. - bool needsObjCPass() const; - - /// Magic symbol name stubs will need to help lazy bind. - StringRef binderSymbolName() const; - - /// Used to keep track of direct and indirect dylibs. - void registerDylib(mach_o::MachODylibFile *dylib, bool upward) const; - - // Reads a file from disk to memory. Returns only a needed chunk - // if a fat binary. - ErrorOr> getMemoryBuffer(StringRef path); - - /// Used to find indirect dylibs. Instantiates a MachODylibFile if one - /// has not already been made for the requested dylib. Uses -L and -F - /// search paths to allow indirect dylibs to be overridden. - mach_o::MachODylibFile* findIndirectDylib(StringRef path); - - uint32_t dylibCurrentVersion(StringRef installName) const; - - uint32_t dylibCompatVersion(StringRef installName) const; - - ArrayRef allDylibs() const { - return _allDylibs; - } - - /// Creates a copy (owned by this MachOLinkingContext) of a string. - StringRef copy(StringRef str) { return str.copy(_allocator); } - - /// If the memoryBuffer is a fat file with a slice for the current arch, - /// this method will return the offset and size of that slice. - bool sliceFromFatFile(MemoryBufferRef mb, uint32_t &offset, uint32_t &size); - - /// Returns if a command line option specified dylib is an upward link. - bool isUpwardDylib(StringRef installName) const; - - static bool isThinObjectFile(StringRef path, Arch &arch); - static Arch archFromCpuType(uint32_t cputype, uint32_t cpusubtype); - static Arch archFromName(StringRef archName); - static StringRef nameFromArch(Arch arch); - static uint32_t cpuTypeFromArch(Arch arch); - static uint32_t cpuSubtypeFromArch(Arch arch); - static bool is64Bit(Arch arch); - static bool isHostEndian(Arch arch); - static bool isBigEndian(Arch arch); - - /// Construct 32-bit value from string "X.Y.Z" where - /// bits are xxxx.yy.zz. Largest number is 65535.255.255 - static bool parsePackedVersion(StringRef str, uint32_t &result); - - /// Construct 64-bit value from string "A.B.C.D.E" where - /// bits are aaaa.bb.cc.dd.ee. Largest number is 16777215.1023.1023.1023.1023 - static bool parsePackedVersion(StringRef str, uint64_t &result); - - void finalizeInputFiles() override; - - llvm::Error handleLoadedFile(File &file) override; - - bool customAtomOrderer(const DefinedAtom *left, const DefinedAtom *right, - bool &leftBeforeRight) const; - - /// Return the 'flat namespace' file. This is the file that supplies - /// atoms for otherwise undefined symbols when the -flat_namespace or - /// -undefined dynamic_lookup options are used. - File* flatNamespaceFile() const { return _flatNamespaceFile; } - -private: - Writer &writer() const override; - mach_o::MachODylibFile* loadIndirectDylib(StringRef path); - struct ArchInfo { - StringRef archName; - MachOLinkingContext::Arch arch; - bool littleEndian; - uint32_t cputype; - uint32_t cpusubtype; - }; - - struct SectionAlign { - StringRef segmentName; - StringRef sectionName; - uint16_t align; - }; - - struct OrderFileNode { - StringRef fileFilter; - unsigned order; - }; - - static bool findOrderOrdinal(const std::vector &nodes, - const DefinedAtom *atom, unsigned &ordinal); - - static ArchInfo _s_archInfos[]; - - std::set _existingPaths; // For testing only. - StringRefVector _searchDirs; - StringRefVector _syslibRoots; - StringRefVector _frameworkDirs; - HeaderFileType _outputMachOType = llvm::MachO::MH_EXECUTE; - bool _outputMachOTypeStatic = false; // Disambiguate static vs dynamic prog - bool _doNothing = false; // for -help and -v which just print info - bool _pie = false; - Arch _arch = arch_unknown; - OS _os = OS::macOSX; - uint32_t _osMinVersion = 0; - uint32_t _sdkVersion = 0; - uint64_t _sourceVersion = 0; - uint64_t _pageZeroSize = 0; - uint64_t _pageSize = 4096; - uint64_t _baseAddress = 0; - uint64_t _stackSize = 0; - uint32_t _compatibilityVersion = 0; - uint32_t _currentVersion = 0; - ObjCConstraint _objcConstraint = objc_unknown; - uint32_t _swiftVersion = 0; - StringRef _installName; - StringRefVector _rpaths; - bool _flatNamespace = false; - UndefinedMode _undefinedMode = UndefinedMode::error; - bool _deadStrippableDylib = false; - bool _printAtoms = false; - bool _testingFileUsage = false; - bool _keepPrivateExterns = false; - bool _demangle = false; - bool _mergeObjCCategories = true; - bool _generateVersionLoadCommand = false; - bool _generateFunctionStartsLoadCommand = false; - bool _generateDataInCodeLoadCommand = false; - StringRef _bundleLoader; - mutable std::unique_ptr _archHandler; - mutable std::unique_ptr _writer; - std::vector _sectAligns; - mutable llvm::StringMap _pathToDylibMap; - mutable std::vector _allDylibs; - mutable std::set _upwardDylibs; - mutable std::vector> _indirectDylibs; - mutable std::mutex _dylibsMutex; - ExportMode _exportMode = ExportMode::globals; - llvm::StringSet<> _exportedSymbols; - DebugInfoMode _debugInfoMode = DebugInfoMode::addDebugMap; - std::unique_ptr _dependencyInfo; - llvm::StringMap> _orderFiles; - unsigned _orderFileEntries = 0; - File *_flatNamespaceFile = nullptr; - mach_o::SectCreateFile *_sectCreateFile = nullptr; -}; - -} // end namespace lld - -#endif // LLD_READER_WRITER_MACHO_LINKING_CONTEXT_H diff --git a/lld/include/lld/ReaderWriter/YamlContext.h b/lld/include/lld/ReaderWriter/YamlContext.h deleted file mode 100644 index dc133e3627de..000000000000 --- a/lld/include/lld/ReaderWriter/YamlContext.h +++ /dev/null @@ -1,42 +0,0 @@ -//===- lld/ReaderWriter/YamlContext.h - object used in YAML I/O context ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_READER_WRITER_YAML_CONTEXT_H -#define LLD_READER_WRITER_YAML_CONTEXT_H - -#include "lld/Common/LLVM.h" -#include -#include -#include - -namespace lld { -class File; -class LinkingContext; -class Registry; -namespace mach_o { -namespace normalized { -struct NormalizedFile; -} -} - -using lld::mach_o::normalized::NormalizedFile; - -/// When YAML I/O is used in lld, the yaml context always holds a YamlContext -/// object. We need to support hetergenous yaml documents which each require -/// different context info. This struct supports all clients. -struct YamlContext { - const LinkingContext *_ctx = nullptr; - const Registry *_registry = nullptr; - File *_file = nullptr; - NormalizedFile *_normalizeMachOFile = nullptr; - StringRef _path; -}; - -} // end namespace lld - -#endif // LLD_READER_WRITER_YAML_CONTEXT_H diff --git a/lld/lib/CMakeLists.txt b/lld/lib/CMakeLists.txt deleted file mode 100644 index 8884efcfe9ba..000000000000 --- a/lld/lib/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_subdirectory(Core) -add_subdirectory(Driver) -add_subdirectory(ReaderWriter) diff --git a/lld/lib/Core/CMakeLists.txt b/lld/lib/Core/CMakeLists.txt deleted file mode 100644 index d5e507536b72..000000000000 --- a/lld/lib/Core/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -add_lld_library(lldCore - DefinedAtom.cpp - Error.cpp - File.cpp - LinkingContext.cpp - Reader.cpp - Resolver.cpp - SymbolTable.cpp - Writer.cpp - - ADDITIONAL_HEADER_DIRS - ${LLD_INCLUDE_DIR}/lld/Core - - LINK_COMPONENTS - BinaryFormat - MC - Support - - LINK_LIBS - ${LLVM_PTHREAD_LIB} - - DEPENDS - intrinsics_gen - ) diff --git a/lld/lib/Core/DefinedAtom.cpp b/lld/lib/Core/DefinedAtom.cpp deleted file mode 100644 index 3c1eece16841..000000000000 --- a/lld/lib/Core/DefinedAtom.cpp +++ /dev/null @@ -1,81 +0,0 @@ -//===- DefinedAtom.cpp ------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/ErrorHandling.h" -#include "lld/Core/DefinedAtom.h" -#include "lld/Core/File.h" - -namespace lld { - -DefinedAtom::ContentPermissions DefinedAtom::permissions() const { - // By default base permissions on content type. - return permissions(this->contentType()); -} - -// Utility function for deriving permissions from content type -DefinedAtom::ContentPermissions DefinedAtom::permissions(ContentType type) { - switch (type) { - case typeCode: - case typeResolver: - case typeBranchIsland: - case typeBranchShim: - case typeStub: - case typeStubHelper: - case typeMachHeader: - return permR_X; - - case typeConstant: - case typeCString: - case typeUTF16String: - case typeCFI: - case typeLSDA: - case typeLiteral4: - case typeLiteral8: - case typeLiteral16: - case typeDTraceDOF: - case typeCompactUnwindInfo: - case typeProcessedUnwindInfo: - case typeObjCImageInfo: - case typeObjCMethodList: - return permR__; - - case typeData: - case typeDataFast: - case typeZeroFill: - case typeZeroFillFast: - case typeObjC1Class: - case typeLazyPointer: - case typeLazyDylibPointer: - case typeNonLazyPointer: - case typeThunkTLV: - return permRW_; - - case typeGOT: - case typeConstData: - case typeCFString: - case typeInitializerPtr: - case typeTerminatorPtr: - case typeCStringPtr: - case typeObjCClassPtr: - case typeObjC2CategoryList: - case typeInterposingTuples: - case typeTLVInitialData: - case typeTLVInitialZeroFill: - case typeTLVInitializerPtr: - return permRW_L; - - case typeUnknown: - case typeTempLTO: - case typeSectCreate: - case typeDSOHandle: - return permUnknown; - } - llvm_unreachable("unknown content type"); -} - -} // namespace diff --git a/lld/lib/Core/Error.cpp b/lld/lib/Core/Error.cpp deleted file mode 100644 index a4f4b1b8af48..000000000000 --- a/lld/lib/Core/Error.cpp +++ /dev/null @@ -1,93 +0,0 @@ -//===- Error.cpp - system_error extensions for lld --------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "lld/Core/Error.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/ErrorHandling.h" -#include -#include -#include - -using namespace lld; - -namespace { -class _YamlReaderErrorCategory : public std::error_category { -public: - const char* name() const noexcept override { - return "lld.yaml.reader"; - } - - std::string message(int ev) const override { - switch (static_cast(ev)) { - case YamlReaderError::unknown_keyword: - return "Unknown keyword found in yaml file"; - case YamlReaderError::illegal_value: - return "Bad value found in yaml file"; - } - llvm_unreachable("An enumerator of YamlReaderError does not have a " - "message defined."); - } -}; -} // end anonymous namespace - -const std::error_category &lld::YamlReaderCategory() { - static _YamlReaderErrorCategory o; - return o; -} - -namespace lld { - -/// Temporary class to enable make_dynamic_error_code() until -/// llvm::ErrorOr<> is updated to work with error encapsulations -/// other than error_code. -class dynamic_error_category : public std::error_category { -public: - ~dynamic_error_category() override = default; - - const char *name() const noexcept override { - return "lld.dynamic_error"; - } - - std::string message(int ev) const override { - assert(ev >= 0); - assert(ev < (int)_messages.size()); - // The value is an index into the string vector. - return _messages[ev]; - } - - int add(std::string msg) { - std::lock_guard lock(_mutex); - // Value zero is always the success value. - if (_messages.empty()) - _messages.push_back("Success"); - _messages.push_back(msg); - // Return the index of the string just appended. - return _messages.size() - 1; - } - -private: - std::vector _messages; - std::recursive_mutex _mutex; -}; - -static dynamic_error_category categorySingleton; - -std::error_code make_dynamic_error_code(StringRef msg) { - return std::error_code(categorySingleton.add(std::string(msg)), - categorySingleton); -} - -char GenericError::ID = 0; - -GenericError::GenericError(Twine Msg) : Msg(Msg.str()) { } - -void GenericError::log(raw_ostream &OS) const { - OS << Msg; -} - -} // namespace lld diff --git a/lld/lib/Core/File.cpp b/lld/lib/Core/File.cpp deleted file mode 100644 index ce33923c136e..000000000000 --- a/lld/lib/Core/File.cpp +++ /dev/null @@ -1,28 +0,0 @@ -//===- Core/File.cpp - A Container of Atoms -------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "lld/Core/File.h" -#include - -namespace lld { - -File::~File() = default; - -File::AtomVector File::_noDefinedAtoms; -File::AtomVector File::_noUndefinedAtoms; -File::AtomVector File::_noSharedLibraryAtoms; -File::AtomVector File::_noAbsoluteAtoms; - -std::error_code File::parse() { - std::lock_guard lock(_parseMutex); - if (!_lastError.hasValue()) - _lastError = doParse(); - return _lastError.getValue(); -} - -} // end namespace lld diff --git a/lld/lib/Core/LinkingContext.cpp b/lld/lib/Core/LinkingContext.cpp deleted file mode 100644 index 911ae606678d..000000000000 --- a/lld/lib/Core/LinkingContext.cpp +++ /dev/null @@ -1,69 +0,0 @@ -//===- lib/Core/LinkingContext.cpp - Linker Context Object Interface ------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "lld/Core/LinkingContext.h" -#include "lld/Core/File.h" -#include "lld/Core/Node.h" -#include "lld/Core/Simple.h" -#include "lld/Core/Writer.h" -#include - -namespace lld { - -LinkingContext::LinkingContext() = default; - -LinkingContext::~LinkingContext() = default; - -bool LinkingContext::validate() { - return validateImpl(); -} - -llvm::Error LinkingContext::writeFile(const File &linkedFile) const { - return this->writer().writeFile(linkedFile, _outputPath); -} - -std::unique_ptr LinkingContext::createEntrySymbolFile() const { - return createEntrySymbolFile(""); -} - -std::unique_ptr -LinkingContext::createEntrySymbolFile(StringRef filename) const { - if (entrySymbolName().empty()) - return nullptr; - std::unique_ptr entryFile(new SimpleFile(filename, - File::kindEntryObject)); - entryFile->addAtom( - *(new (_allocator) SimpleUndefinedAtom(*entryFile, entrySymbolName()))); - return std::move(entryFile); -} - -std::unique_ptr LinkingContext::createUndefinedSymbolFile() const { - return createUndefinedSymbolFile(""); -} - -std::unique_ptr -LinkingContext::createUndefinedSymbolFile(StringRef filename) const { - if (_initialUndefinedSymbols.empty()) - return nullptr; - std::unique_ptr undefinedSymFile( - new SimpleFile(filename, File::kindUndefinedSymsObject)); - for (StringRef undefSym : _initialUndefinedSymbols) - undefinedSymFile->addAtom(*(new (_allocator) SimpleUndefinedAtom( - *undefinedSymFile, undefSym))); - return std::move(undefinedSymFile); -} - -void LinkingContext::createInternalFiles( - std::vector> &result) const { - if (std::unique_ptr file = createEntrySymbolFile()) - result.push_back(std::move(file)); - if (std::unique_ptr file = createUndefinedSymbolFile()) - result.push_back(std::move(file)); -} - -} // end namespace lld diff --git a/lld/lib/Core/Reader.cpp b/lld/lib/Core/Reader.cpp deleted file mode 100644 index 3592d87ce627..000000000000 --- a/lld/lib/Core/Reader.cpp +++ /dev/null @@ -1,113 +0,0 @@ -//===- lib/Core/Reader.cpp ------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "lld/Core/Reader.h" -#include "lld/Core/File.h" -#include "lld/Core/Reference.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/Magic.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/MemoryBuffer.h" -#include -#include - -using llvm::file_magic; -using llvm::identify_magic; - -namespace lld { - -YamlIOTaggedDocumentHandler::~YamlIOTaggedDocumentHandler() = default; - -void Registry::add(std::unique_ptr reader) { - _readers.push_back(std::move(reader)); -} - -void Registry::add(std::unique_ptr handler) { - _yamlHandlers.push_back(std::move(handler)); -} - -ErrorOr> -Registry::loadFile(std::unique_ptr mb) const { - // Get file magic. - StringRef content(mb->getBufferStart(), mb->getBufferSize()); - file_magic fileType = identify_magic(content); - - // Ask each registered reader if it can handle this file type or extension. - for (const std::unique_ptr &reader : _readers) { - if (!reader->canParse(fileType, mb->getMemBufferRef())) - continue; - return reader->loadFile(std::move(mb), *this); - } - - // No Reader could parse this file. - return make_error_code(llvm::errc::executable_format_error); -} - -static const Registry::KindStrings kindStrings[] = { - {Reference::kindLayoutAfter, "layout-after"}, - {Reference::kindAssociate, "associate"}, - LLD_KIND_STRING_END}; - -Registry::Registry() { - addKindTable(Reference::KindNamespace::all, Reference::KindArch::all, - kindStrings); -} - -bool Registry::handleTaggedDoc(llvm::yaml::IO &io, - const lld::File *&file) const { - for (const std::unique_ptr &h : _yamlHandlers) - if (h->handledDocTag(io, file)) - return true; - return false; -} - -void Registry::addKindTable(Reference::KindNamespace ns, - Reference::KindArch arch, - const KindStrings array[]) { - KindEntry entry = { ns, arch, array }; - _kindEntries.push_back(entry); -} - -bool Registry::referenceKindFromString(StringRef inputStr, - Reference::KindNamespace &ns, - Reference::KindArch &arch, - Reference::KindValue &value) const { - for (const KindEntry &entry : _kindEntries) { - for (const KindStrings *pair = entry.array; !pair->name.empty(); ++pair) { - if (!inputStr.equals(pair->name)) - continue; - ns = entry.ns; - arch = entry.arch; - value = pair->value; - return true; - } - } - return false; -} - -bool Registry::referenceKindToString(Reference::KindNamespace ns, - Reference::KindArch arch, - Reference::KindValue value, - StringRef &str) const { - for (const KindEntry &entry : _kindEntries) { - if (entry.ns != ns) - continue; - if (entry.arch != arch) - continue; - for (const KindStrings *pair = entry.array; !pair->name.empty(); ++pair) { - if (pair->value != value) - continue; - str = pair->name; - return true; - } - } - return false; -} - -} // end namespace lld diff --git a/lld/lib/Core/Resolver.cpp b/lld/lib/Core/Resolver.cpp deleted file mode 100644 index 1ed0b1c6e618..000000000000 --- a/lld/lib/Core/Resolver.cpp +++ /dev/null @@ -1,496 +0,0 @@ -//===- Core/Resolver.cpp - Resolves Atom References -----------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "lld/Core/Resolver.h" -#include "lld/Common/LLVM.h" -#include "lld/Core/ArchiveLibraryFile.h" -#include "lld/Core/Atom.h" -#include "lld/Core/File.h" -#include "lld/Core/Instrumentation.h" -#include "lld/Core/LinkingContext.h" -#include "lld/Core/SharedLibraryFile.h" -#include "lld/Core/SymbolTable.h" -#include "lld/Core/UndefinedAtom.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include - -namespace lld { - -llvm::Expected Resolver::handleFile(File &file) { - if (auto ec = _ctx.handleLoadedFile(file)) - return std::move(ec); - bool undefAdded = false; - for (auto &atom : file.defined().owning_ptrs()) - doDefinedAtom(std::move(atom)); - for (auto &atom : file.undefined().owning_ptrs()) { - if (doUndefinedAtom(std::move(atom))) - undefAdded = true; - } - for (auto &atom : file.sharedLibrary().owning_ptrs()) - doSharedLibraryAtom(std::move(atom)); - for (auto &atom : file.absolute().owning_ptrs()) - doAbsoluteAtom(std::move(atom)); - return undefAdded; -} - -llvm::Expected Resolver::forEachUndefines(File &file, - UndefCallback callback) { - size_t i = _undefineIndex[&file]; - bool undefAdded = false; - do { - for (; i < _undefines.size(); ++i) { - StringRef undefName = _undefines[i]; - if (undefName.empty()) - continue; - const Atom *atom = _symbolTable.findByName(undefName); - if (!isa(atom) || _symbolTable.isCoalescedAway(atom)) { - // The symbol was resolved by some other file. Cache the result. - _undefines[i] = ""; - continue; - } - auto undefAddedOrError = callback(undefName); - if (auto ec = undefAddedOrError.takeError()) - return std::move(ec); - undefAdded |= undefAddedOrError.get(); - } - } while (i < _undefines.size()); - _undefineIndex[&file] = i; - return undefAdded; -} - -llvm::Expected Resolver::handleArchiveFile(File &file) { - ArchiveLibraryFile *archiveFile = cast(&file); - return forEachUndefines(file, - [&](StringRef undefName) -> llvm::Expected { - if (File *member = archiveFile->find(undefName)) { - member->setOrdinal(_ctx.getNextOrdinalAndIncrement()); - return handleFile(*member); - } - return false; - }); -} - -llvm::Error Resolver::handleSharedLibrary(File &file) { - // Add all the atoms from the shared library - SharedLibraryFile *sharedLibrary = cast(&file); - auto undefAddedOrError = handleFile(*sharedLibrary); - if (auto ec = undefAddedOrError.takeError()) - return ec; - undefAddedOrError = - forEachUndefines(file, [&](StringRef undefName) -> llvm::Expected { - auto atom = sharedLibrary->exports(undefName); - if (atom.get()) - doSharedLibraryAtom(std::move(atom)); - return false; - }); - - if (auto ec = undefAddedOrError.takeError()) - return ec; - return llvm::Error::success(); -} - -bool Resolver::doUndefinedAtom(OwningAtomPtr atom) { - DEBUG_WITH_TYPE("resolver", llvm::dbgs() - << " UndefinedAtom: " - << llvm::format("0x%09lX", atom.get()) - << ", name=" << atom.get()->name() << "\n"); - - // tell symbol table - bool newUndefAdded = _symbolTable.add(*atom.get()); - if (newUndefAdded) - _undefines.push_back(atom.get()->name()); - - // add to list of known atoms - _atoms.push_back(OwningAtomPtr(atom.release())); - - return newUndefAdded; -} - -// Called on each atom when a file is added. Returns true if a given -// atom is added to the symbol table. -void Resolver::doDefinedAtom(OwningAtomPtr atom) { - DEBUG_WITH_TYPE("resolver", llvm::dbgs() - << " DefinedAtom: " - << llvm::format("0x%09lX", atom.get()) - << ", file=#" - << atom.get()->file().ordinal() - << ", atom=#" - << atom.get()->ordinal() - << ", name=" - << atom.get()->name() - << ", type=" - << atom.get()->contentType() - << "\n"); - - // An atom that should never be dead-stripped is a dead-strip root. - if (_ctx.deadStrip() && - atom.get()->deadStrip() == DefinedAtom::deadStripNever) { - _deadStripRoots.insert(atom.get()); - } - - // add to list of known atoms - _symbolTable.add(*atom.get()); - _atoms.push_back(OwningAtomPtr(atom.release())); -} - -void Resolver::doSharedLibraryAtom(OwningAtomPtr atom) { - DEBUG_WITH_TYPE("resolver", llvm::dbgs() - << " SharedLibraryAtom: " - << llvm::format("0x%09lX", atom.get()) - << ", name=" - << atom.get()->name() - << "\n"); - - // tell symbol table - _symbolTable.add(*atom.get()); - - // add to list of known atoms - _atoms.push_back(OwningAtomPtr(atom.release())); -} - -void Resolver::doAbsoluteAtom(OwningAtomPtr atom) { - DEBUG_WITH_TYPE("resolver", llvm::dbgs() - << " AbsoluteAtom: " - << llvm::format("0x%09lX", atom.get()) - << ", name=" - << atom.get()->name() - << "\n"); - - // tell symbol table - if (atom.get()->scope() != Atom::scopeTranslationUnit) - _symbolTable.add(*atom.get()); - - // add to list of known atoms - _atoms.push_back(OwningAtomPtr(atom.release())); -} - -// Returns true if at least one of N previous files has created an -// undefined symbol. -bool Resolver::undefinesAdded(int begin, int end) { - std::vector> &inputs = _ctx.getNodes(); - for (int i = begin; i < end; ++i) - if (FileNode *node = dyn_cast(inputs[i].get())) - if (_newUndefinesAdded[node->getFile()]) - return true; - return false; -} - -File *Resolver::getFile(int &index) { - std::vector> &inputs = _ctx.getNodes(); - if ((size_t)index >= inputs.size()) - return nullptr; - if (GroupEnd *group = dyn_cast(inputs[index].get())) { - // We are at the end of the current group. If one or more new - // undefined atom has been added in the last groupSize files, we - // reiterate over the files. - int size = group->getSize(); - if (undefinesAdded(index - size, index)) { - index -= size; - return getFile(index); - } - ++index; - return getFile(index); - } - return cast(inputs[index++].get())->getFile(); -} - -// Keep adding atoms until _ctx.getNextFile() returns an error. This -// function is where undefined atoms are resolved. -bool Resolver::resolveUndefines() { - DEBUG_WITH_TYPE("resolver", - llvm::dbgs() << "******** Resolving undefines:\n"); - ScopedTask task(getDefaultDomain(), "resolveUndefines"); - int index = 0; - std::set seen; - for (;;) { - bool undefAdded = false; - DEBUG_WITH_TYPE("resolver", - llvm::dbgs() << "Loading file #" << index << "\n"); - File *file = getFile(index); - if (!file) - return true; - if (std::error_code ec = file->parse()) { - llvm::errs() << "Cannot open " + file->path() << ": " << ec.message() - << "\n"; - return false; - } - DEBUG_WITH_TYPE("resolver", - llvm::dbgs() << "Loaded file: " << file->path() << "\n"); - switch (file->kind()) { - case File::kindErrorObject: - case File::kindNormalizedObject: - case File::kindMachObject: - case File::kindCEntryObject: - case File::kindHeaderObject: - case File::kindEntryObject: - case File::kindUndefinedSymsObject: - case File::kindStubHelperObject: - case File::kindResolverMergedObject: - case File::kindSectCreateObject: { - // The same file may be visited more than once if the file is - // in --start-group and --end-group. Only library files should - // be processed more than once. - if (seen.count(file)) - break; - seen.insert(file); - assert(!file->hasOrdinal()); - file->setOrdinal(_ctx.getNextOrdinalAndIncrement()); - auto undefAddedOrError = handleFile(*file); - if (auto EC = undefAddedOrError.takeError()) { - // FIXME: This should be passed to logAllUnhandledErrors but it needs - // to be passed a Twine instead of a string. - llvm::errs() << "Error in " + file->path() << ": "; - logAllUnhandledErrors(std::move(EC), llvm::errs(), std::string()); - return false; - } - undefAdded = undefAddedOrError.get(); - break; - } - case File::kindArchiveLibrary: { - if (!file->hasOrdinal()) - file->setOrdinal(_ctx.getNextOrdinalAndIncrement()); - auto undefAddedOrError = handleArchiveFile(*file); - if (auto EC = undefAddedOrError.takeError()) { - // FIXME: This should be passed to logAllUnhandledErrors but it needs - // to be passed a Twine instead of a string. - llvm::errs() << "Error in " + file->path() << ": "; - logAllUnhandledErrors(std::move(EC), llvm::errs(), std::string()); - return false; - } - undefAdded = undefAddedOrError.get(); - break; - } - case File::kindSharedLibrary: - if (!file->hasOrdinal()) - file->setOrdinal(_ctx.getNextOrdinalAndIncrement()); - if (auto EC = handleSharedLibrary(*file)) { - // FIXME: This should be passed to logAllUnhandledErrors but it needs - // to be passed a Twine instead of a string. - llvm::errs() << "Error in " + file->path() << ": "; - logAllUnhandledErrors(std::move(EC), llvm::errs(), std::string()); - return false; - } - break; - } - _newUndefinesAdded[file] = undefAdded; - } -} - -// switch all references to undefined or coalesced away atoms -// to the new defined atom -void Resolver::updateReferences() { - DEBUG_WITH_TYPE("resolver", - llvm::dbgs() << "******** Updating references:\n"); - ScopedTask task(getDefaultDomain(), "updateReferences"); - for (const OwningAtomPtr &atom : _atoms) { - if (const DefinedAtom *defAtom = dyn_cast(atom.get())) { - for (const Reference *ref : *defAtom) { - // A reference of type kindAssociate shouldn't be updated. - // Instead, an atom having such reference will be removed - // if the target atom is coalesced away, so that they will - // go away as a group. - if (ref->kindNamespace() == lld::Reference::KindNamespace::all && - ref->kindValue() == lld::Reference::kindAssociate) { - if (_symbolTable.isCoalescedAway(atom.get())) - _deadAtoms.insert(ref->target()); - continue; - } - const Atom *newTarget = _symbolTable.replacement(ref->target()); - const_cast(ref)->setTarget(newTarget); - } - } - } -} - -// For dead code stripping, recursively mark atoms "live" -void Resolver::markLive(const Atom *atom) { - // Mark the atom is live. If it's already marked live, then stop recursion. - auto exists = _liveAtoms.insert(atom); - if (!exists.second) - return; - - // Mark all atoms it references as live - if (const DefinedAtom *defAtom = dyn_cast(atom)) { - for (const Reference *ref : *defAtom) - markLive(ref->target()); - for (auto &p : llvm::make_range(_reverseRef.equal_range(defAtom))) { - const Atom *target = p.second; - markLive(target); - } - } -} - -static bool isBackref(const Reference *ref) { - if (ref->kindNamespace() != lld::Reference::KindNamespace::all) - return false; - return (ref->kindValue() == lld::Reference::kindLayoutAfter); -} - -// remove all atoms not actually used -void Resolver::deadStripOptimize() { - DEBUG_WITH_TYPE("resolver", - llvm::dbgs() << "******** Dead stripping unused atoms:\n"); - ScopedTask task(getDefaultDomain(), "deadStripOptimize"); - // only do this optimization with -dead_strip - if (!_ctx.deadStrip()) - return; - - // Some type of references prevent referring atoms to be dead-striped. - // Make a reverse map of such references before traversing the graph. - // While traversing the list of atoms, mark AbsoluteAtoms as live - // in order to avoid reclaim. - for (const OwningAtomPtr &atom : _atoms) { - if (const DefinedAtom *defAtom = dyn_cast(atom.get())) - for (const Reference *ref : *defAtom) - if (isBackref(ref)) - _reverseRef.insert(std::make_pair(ref->target(), atom.get())); - if (const AbsoluteAtom *absAtom = dyn_cast(atom.get())) - markLive(absAtom); - } - - // By default, shared libraries are built with all globals as dead strip roots - if (_ctx.globalsAreDeadStripRoots()) - for (const OwningAtomPtr &atom : _atoms) - if (const DefinedAtom *defAtom = dyn_cast(atom.get())) - if (defAtom->scope() == DefinedAtom::scopeGlobal) - _deadStripRoots.insert(defAtom); - - // Or, use list of names that are dead strip roots. - for (const StringRef &name : _ctx.deadStripRoots()) { - const Atom *symAtom = _symbolTable.findByName(name); - assert(symAtom); - _deadStripRoots.insert(symAtom); - } - - // mark all roots as live, and recursively all atoms they reference - for (const Atom *dsrAtom : _deadStripRoots) - markLive(dsrAtom); - - // now remove all non-live atoms from _atoms - llvm::erase_if(_atoms, [&](OwningAtomPtr &a) { - return _liveAtoms.count(a.get()) == 0; - }); -} - -// error out if some undefines remain -bool Resolver::checkUndefines() { - DEBUG_WITH_TYPE("resolver", - llvm::dbgs() << "******** Checking for undefines:\n"); - - // build vector of remaining undefined symbols - std::vector undefinedAtoms = _symbolTable.undefines(); - if (_ctx.deadStrip()) { - // When dead code stripping, we don't care if dead atoms are undefined. - llvm::erase_if(undefinedAtoms, - [&](const Atom *a) { return _liveAtoms.count(a) == 0; }); - } - - if (undefinedAtoms.empty()) - return false; - - // Warn about unresolved symbols. - bool foundUndefines = false; - for (const UndefinedAtom *undef : undefinedAtoms) { - // Skip over a weak symbol. - if (undef->canBeNull() != UndefinedAtom::canBeNullNever) - continue; - - // If this is a library and undefined symbols are allowed on the - // target platform, skip over it. - if (isa(undef->file()) && _ctx.allowShlibUndefines()) - continue; - - // If the undefine is coalesced away, skip over it. - if (_symbolTable.isCoalescedAway(undef)) - continue; - - // Seems like this symbol is undefined. Warn that. - foundUndefines = true; - if (_ctx.printRemainingUndefines()) { - llvm::errs() << "Undefined symbol: " << undef->file().path() << ": " - << _ctx.demangle(undef->name()) << "\n"; - } - } - if (!foundUndefines) - return false; - if (_ctx.printRemainingUndefines()) - llvm::errs() << "symbol(s) not found\n"; - return true; -} - -// Remove from _atoms all coalesced away atoms. -void Resolver::removeCoalescedAwayAtoms() { - DEBUG_WITH_TYPE("resolver", - llvm::dbgs() << "******** Removing coalesced away atoms:\n"); - ScopedTask task(getDefaultDomain(), "removeCoalescedAwayAtoms"); - llvm::erase_if(_atoms, [&](OwningAtomPtr &a) { - return _symbolTable.isCoalescedAway(a.get()) || _deadAtoms.count(a.get()); - }); -} - -bool Resolver::resolve() { - DEBUG_WITH_TYPE("resolver", - llvm::dbgs() << "******** Resolving atom references:\n"); - if (!resolveUndefines()) - return false; - updateReferences(); - deadStripOptimize(); - if (checkUndefines()) { - DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "Found undefines... "); - if (!_ctx.allowRemainingUndefines()) { - DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "which we don't allow\n"); - return false; - } - DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "which we are ok with\n"); - } - removeCoalescedAwayAtoms(); - _result->addAtoms(_atoms); - DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "******** Finished resolver\n"); - return true; -} - -void Resolver::MergedFile::addAtoms( - llvm::MutableArrayRef> all) { - ScopedTask task(getDefaultDomain(), "addAtoms"); - DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "Resolver final atom list:\n"); - - for (OwningAtomPtr &atom : all) { -#ifndef NDEBUG - if (auto *definedAtom = dyn_cast(atom.get())) { - DEBUG_WITH_TYPE("resolver", llvm::dbgs() - << llvm::format(" 0x%09lX", definedAtom) - << ", file=#" - << definedAtom->file().ordinal() - << ", atom=#" - << definedAtom->ordinal() - << ", name=" - << definedAtom->name() - << ", type=" - << definedAtom->contentType() - << "\n"); - } else { - DEBUG_WITH_TYPE("resolver", llvm::dbgs() - << llvm::format(" 0x%09lX", atom.get()) - << ", name=" - << atom.get()->name() - << "\n"); - } -#endif - addAtom(*atom.release()); - } -} - -} // namespace lld diff --git a/lld/lib/Core/SymbolTable.cpp b/lld/lib/Core/SymbolTable.cpp deleted file mode 100644 index 3ce9555aa494..000000000000 --- a/lld/lib/Core/SymbolTable.cpp +++ /dev/null @@ -1,284 +0,0 @@ -//===- Core/SymbolTable.cpp - Main Symbol Table ---------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "lld/Core/SymbolTable.h" -#include "lld/Common/LLVM.h" -#include "lld/Core/AbsoluteAtom.h" -#include "lld/Core/Atom.h" -#include "lld/Core/DefinedAtom.h" -#include "lld/Core/File.h" -#include "lld/Core/LinkingContext.h" -#include "lld/Core/Resolver.h" -#include "lld/Core/SharedLibraryAtom.h" -#include "lld/Core/UndefinedAtom.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/Hashing.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include - -namespace lld { -bool SymbolTable::add(const UndefinedAtom &atom) { return addByName(atom); } - -bool SymbolTable::add(const SharedLibraryAtom &atom) { return addByName(atom); } - -bool SymbolTable::add(const AbsoluteAtom &atom) { return addByName(atom); } - -bool SymbolTable::add(const DefinedAtom &atom) { - if (!atom.name().empty() && - atom.scope() != DefinedAtom::scopeTranslationUnit) { - // Named atoms cannot be merged by content. - assert(atom.merge() != DefinedAtom::mergeByContent); - // Track named atoms that are not scoped to file (static). - return addByName(atom); - } - if (atom.merge() == DefinedAtom::mergeByContent) { - // Named atoms cannot be merged by content. - assert(atom.name().empty()); - // Currently only read-only constants can be merged. - if (atom.permissions() == DefinedAtom::permR__) - return addByContent(atom); - // TODO: support mergeByContent of data atoms by comparing content & fixups. - } - return false; -} - -enum NameCollisionResolution { - NCR_First, - NCR_Second, - NCR_DupDef, - NCR_DupUndef, - NCR_DupShLib, - NCR_Error -}; - -static NameCollisionResolution cases[4][4] = { - //regular absolute undef sharedLib - { - // first is regular - NCR_DupDef, NCR_Error, NCR_First, NCR_First - }, - { - // first is absolute - NCR_Error, NCR_Error, NCR_First, NCR_First - }, - { - // first is undef - NCR_Second, NCR_Second, NCR_DupUndef, NCR_Second - }, - { - // first is sharedLib - NCR_Second, NCR_Second, NCR_First, NCR_DupShLib - } -}; - -static NameCollisionResolution collide(Atom::Definition first, - Atom::Definition second) { - return cases[first][second]; -} - -enum MergeResolution { - MCR_First, - MCR_Second, - MCR_Largest, - MCR_SameSize, - MCR_Error -}; - -static MergeResolution mergeCases[][6] = { - // no tentative weak weakAddress sameNameAndSize largest - {MCR_Error, MCR_First, MCR_First, MCR_First, MCR_SameSize, MCR_Largest}, // no - {MCR_Second, MCR_Largest, MCR_Second, MCR_Second, MCR_SameSize, MCR_Largest}, // tentative - {MCR_Second, MCR_First, MCR_First, MCR_Second, MCR_SameSize, MCR_Largest}, // weak - {MCR_Second, MCR_First, MCR_First, MCR_First, MCR_SameSize, MCR_Largest}, // weakAddress - {MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize}, // sameSize - {MCR_Largest, MCR_Largest, MCR_Largest, MCR_Largest, MCR_SameSize, MCR_Largest}, // largest -}; - -static MergeResolution mergeSelect(DefinedAtom::Merge first, - DefinedAtom::Merge second) { - assert(first != DefinedAtom::mergeByContent); - assert(second != DefinedAtom::mergeByContent); - return mergeCases[first][second]; -} - -bool SymbolTable::addByName(const Atom &newAtom) { - StringRef name = newAtom.name(); - assert(!name.empty()); - const Atom *existing = findByName(name); - if (existing == nullptr) { - // Name is not in symbol table yet, add it associate with this atom. - _nameTable[name] = &newAtom; - return true; - } - - // Do nothing if the same object is added more than once. - if (existing == &newAtom) - return false; - - // Name is already in symbol table and associated with another atom. - bool useNew = true; - switch (collide(existing->definition(), newAtom.definition())) { - case NCR_First: - useNew = false; - break; - case NCR_Second: - useNew = true; - break; - case NCR_DupDef: { - const auto *existingDef = cast(existing); - const auto *newDef = cast(&newAtom); - switch (mergeSelect(existingDef->merge(), newDef->merge())) { - case MCR_First: - useNew = false; - break; - case MCR_Second: - useNew = true; - break; - case MCR_Largest: { - uint64_t existingSize = existingDef->sectionSize(); - uint64_t newSize = newDef->sectionSize(); - useNew = (newSize >= existingSize); - break; - } - case MCR_SameSize: { - uint64_t existingSize = existingDef->sectionSize(); - uint64_t newSize = newDef->sectionSize(); - if (existingSize == newSize) { - useNew = true; - break; - } - llvm::errs() << "Size mismatch: " << existing->name() << " (" - << existingSize << ") " << newAtom.name() << " (" << newSize - << ")\n"; - LLVM_FALLTHROUGH; - } - case MCR_Error: - llvm::errs() << "Duplicate symbols: " << existing->name() << ":" - << existing->file().path() << " and " << newAtom.name() - << ":" << newAtom.file().path() << "\n"; - llvm::report_fatal_error("duplicate symbol error"); - break; - } - break; - } - case NCR_DupUndef: { - const UndefinedAtom* existingUndef = cast(existing); - const UndefinedAtom* newUndef = cast(&newAtom); - - bool sameCanBeNull = (existingUndef->canBeNull() == newUndef->canBeNull()); - if (sameCanBeNull) - useNew = false; - else - useNew = (newUndef->canBeNull() < existingUndef->canBeNull()); - break; - } - case NCR_DupShLib: { - useNew = false; - break; - } - case NCR_Error: - llvm::errs() << "SymbolTable: error while merging " << name << "\n"; - llvm::report_fatal_error("duplicate symbol error"); - break; - } - - if (useNew) { - // Update name table to use new atom. - _nameTable[name] = &newAtom; - // Add existing atom to replacement table. - _replacedAtoms[existing] = &newAtom; - } else { - // New atom is not being used. Add it to replacement table. - _replacedAtoms[&newAtom] = existing; - } - return false; -} - -unsigned SymbolTable::AtomMappingInfo::getHashValue(const DefinedAtom *atom) { - auto content = atom->rawContent(); - return llvm::hash_combine(atom->size(), - atom->contentType(), - llvm::hash_combine_range(content.begin(), - content.end())); -} - -bool SymbolTable::AtomMappingInfo::isEqual(const DefinedAtom * const l, - const DefinedAtom * const r) { - if (l == r) - return true; - if (l == getEmptyKey() || r == getEmptyKey()) - return false; - if (l == getTombstoneKey() || r == getTombstoneKey()) - return false; - if (l->contentType() != r->contentType()) - return false; - if (l->size() != r->size()) - return false; - if (l->sectionChoice() != r->sectionChoice()) - return false; - if (l->sectionChoice() == DefinedAtom::sectionCustomRequired) { - if (!l->customSectionName().equals(r->customSectionName())) - return false; - } - ArrayRef lc = l->rawContent(); - ArrayRef rc = r->rawContent(); - return memcmp(lc.data(), rc.data(), lc.size()) == 0; -} - -bool SymbolTable::addByContent(const DefinedAtom &newAtom) { - AtomContentSet::iterator pos = _contentTable.find(&newAtom); - if (pos == _contentTable.end()) { - _contentTable.insert(&newAtom); - return true; - } - const Atom* existing = *pos; - // New atom is not being used. Add it to replacement table. - _replacedAtoms[&newAtom] = existing; - return false; -} - -const Atom *SymbolTable::findByName(StringRef sym) { - NameToAtom::iterator pos = _nameTable.find(sym); - if (pos == _nameTable.end()) - return nullptr; - return pos->second; -} - -const Atom *SymbolTable::replacement(const Atom *atom) { - // Find the replacement for a given atom. Atoms in _replacedAtoms - // may be chained, so find the last one. - for (;;) { - AtomToAtom::iterator pos = _replacedAtoms.find(atom); - if (pos == _replacedAtoms.end()) - return atom; - atom = pos->second; - } -} - -bool SymbolTable::isCoalescedAway(const Atom *atom) { - return _replacedAtoms.count(atom) > 0; -} - -std::vector SymbolTable::undefines() { - std::vector ret; - for (auto it : _nameTable) { - const Atom *atom = it.second; - assert(atom != nullptr); - if (const auto *undef = dyn_cast(atom)) - if (_replacedAtoms.count(undef) == 0) - ret.push_back(undef); - } - return ret; -} - -} // namespace lld diff --git a/lld/lib/Core/Writer.cpp b/lld/lib/Core/Writer.cpp deleted file mode 100644 index 12788b187e11..000000000000 --- a/lld/lib/Core/Writer.cpp +++ /dev/null @@ -1,17 +0,0 @@ -//===- lib/Core/Writer.cpp ------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "lld/Core/Writer.h" - -namespace lld { - -Writer::Writer() = default; - -Writer::~Writer() = default; - -} // end namespace lld diff --git a/lld/lib/Driver/CMakeLists.txt b/lld/lib/Driver/CMakeLists.txt deleted file mode 100644 index afc0bd1187f8..000000000000 --- a/lld/lib/Driver/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS DarwinLdOptions.td) -tablegen(LLVM DarwinLdOptions.inc -gen-opt-parser-defs) -add_public_tablegen_target(DriverOptionsTableGen) - -add_lld_library(lldDriver - DarwinLdDriver.cpp - - ADDITIONAL_HEADER_DIRS - ${LLD_INCLUDE_DIR}/lld/Driver - - LINK_COMPONENTS - Option - Support - - LINK_LIBS - lldCommon - lldCore - lldMachOOld - lldReaderWriter - lldYAML - ) - -add_dependencies(lldDriver DriverOptionsTableGen) diff --git a/lld/lib/Driver/DarwinLdDriver.cpp b/lld/lib/Driver/DarwinLdDriver.cpp deleted file mode 100644 index 21d125726192..000000000000 --- a/lld/lib/Driver/DarwinLdDriver.cpp +++ /dev/null @@ -1,1229 +0,0 @@ -//===- lib/Driver/DarwinLdDriver.cpp --------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// -/// Concrete instance of the Driver for darwin's ld. -/// -//===----------------------------------------------------------------------===// - -#include "lld/Common/Args.h" -#include "lld/Common/ErrorHandler.h" -#include "lld/Common/LLVM.h" -#include "lld/Core/ArchiveLibraryFile.h" -#include "lld/Core/Error.h" -#include "lld/Core/File.h" -#include "lld/Core/Instrumentation.h" -#include "lld/Core/LinkingContext.h" -#include "lld/Core/Node.h" -#include "lld/Core/PassManager.h" -#include "lld/Core/Resolver.h" -#include "lld/Core/SharedLibraryFile.h" -#include "lld/Core/Simple.h" -#include "lld/ReaderWriter/MachOLinkingContext.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Option/Arg.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Option/Option.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/ErrorOr.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include -#include -#include -#include - -using namespace lld; - -namespace { - -// Create enum with OPT_xxx values for each option in DarwinLdOptions.td -enum { - OPT_INVALID = 0, -#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ - HELP, META, VALUES) \ - OPT_##ID, -#include "DarwinLdOptions.inc" -#undef OPTION -}; - -// Create prefix string literals used in DarwinLdOptions.td -#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; -#include "DarwinLdOptions.inc" -#undef PREFIX - -// Create table mapping all options defined in DarwinLdOptions.td -static const llvm::opt::OptTable::Info InfoTable[] = { -#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ - HELPTEXT, METAVAR, VALUES) \ - {PREFIX, NAME, HELPTEXT, \ - METAVAR, OPT_##ID, llvm::opt::Option::KIND##Class, \ - PARAM, FLAGS, OPT_##GROUP, \ - OPT_##ALIAS, ALIASARGS, VALUES}, -#include "DarwinLdOptions.inc" -#undef OPTION -}; - -// Create OptTable class for parsing actual command line arguments -class DarwinLdOptTable : public llvm::opt::OptTable { -public: - DarwinLdOptTable() : OptTable(InfoTable) {} -}; - -static std::vector> -makeErrorFile(StringRef path, std::error_code ec) { - std::vector> result; - result.push_back(std::make_unique(path, ec)); - return result; -} - -static std::vector> -parseMemberFiles(std::unique_ptr file) { - std::vector> members; - if (auto *archive = dyn_cast(file.get())) { - if (std::error_code ec = archive->parseAllMembers(members)) - return makeErrorFile(file->path(), ec); - } else { - members.push_back(std::move(file)); - } - return members; -} - -std::vector> loadFile(MachOLinkingContext &ctx, - StringRef path, bool wholeArchive, - bool upwardDylib) { - if (ctx.logInputFiles()) - message(path); - - ErrorOr> mbOrErr = ctx.getMemoryBuffer(path); - if (std::error_code ec = mbOrErr.getError()) - return makeErrorFile(path, ec); - ErrorOr> fileOrErr = - ctx.registry().loadFile(std::move(mbOrErr.get())); - if (std::error_code ec = fileOrErr.getError()) - return makeErrorFile(path, ec); - std::unique_ptr &file = fileOrErr.get(); - - // If file is a dylib, inform LinkingContext about it. - if (SharedLibraryFile *shl = dyn_cast(file.get())) { - if (std::error_code ec = shl->parse()) - return makeErrorFile(path, ec); - ctx.registerDylib(reinterpret_cast(shl), - upwardDylib); - } - if (wholeArchive) - return parseMemberFiles(std::move(file)); - std::vector> files; - files.push_back(std::move(file)); - return files; -} - -} // end anonymous namespace - -// Test may be running on Windows. Canonicalize the path -// separator to '/' to get consistent outputs for tests. -static std::string canonicalizePath(StringRef path) { - char sep = llvm::sys::path::get_separator().front(); - if (sep != '/') { - std::string fixedPath = std::string(path); - std::replace(fixedPath.begin(), fixedPath.end(), sep, '/'); - return fixedPath; - } else { - return std::string(path); - } -} - -static void addFile(StringRef path, MachOLinkingContext &ctx, - bool loadWholeArchive, bool upwardDylib) { - std::vector> files = - loadFile(ctx, path, loadWholeArchive, upwardDylib); - for (std::unique_ptr &file : files) - ctx.getNodes().push_back(std::make_unique(std::move(file))); -} - -// Export lists are one symbol per line. Blank lines are ignored. -// Trailing comments start with #. -static std::error_code parseExportsList(StringRef exportFilePath, - MachOLinkingContext &ctx) { - // Map in export list file. - ErrorOr> mb = - MemoryBuffer::getFileOrSTDIN(exportFilePath); - if (std::error_code ec = mb.getError()) - return ec; - ctx.addInputFileDependency(exportFilePath); - StringRef buffer = mb->get()->getBuffer(); - while (!buffer.empty()) { - // Split off each line in the file. - std::pair lineAndRest = buffer.split('\n'); - StringRef line = lineAndRest.first; - // Ignore trailing # comments. - std::pair symAndComment = line.split('#'); - StringRef sym = symAndComment.first.trim(); - if (!sym.empty()) - ctx.addExportSymbol(sym); - buffer = lineAndRest.second; - } - return std::error_code(); -} - -/// Order files are one symbol per line. Blank lines are ignored. -/// Trailing comments start with #. Symbol names can be prefixed with an -/// architecture name and/or .o leaf name. Examples: -/// _foo -/// bar.o:_bar -/// libfrob.a(bar.o):_bar -/// x86_64:_foo64 -static std::error_code parseOrderFile(StringRef orderFilePath, - MachOLinkingContext &ctx) { - // Map in order file. - ErrorOr> mb = - MemoryBuffer::getFileOrSTDIN(orderFilePath); - if (std::error_code ec = mb.getError()) - return ec; - ctx.addInputFileDependency(orderFilePath); - StringRef buffer = mb->get()->getBuffer(); - while (!buffer.empty()) { - // Split off each line in the file. - std::pair lineAndRest = buffer.split('\n'); - StringRef line = lineAndRest.first; - buffer = lineAndRest.second; - // Ignore trailing # comments. - std::pair symAndComment = line.split('#'); - if (symAndComment.first.empty()) - continue; - StringRef sym = symAndComment.first.trim(); - if (sym.empty()) - continue; - // Check for prefix. - StringRef prefix; - std::pair prefixAndSym = sym.split(':'); - if (!prefixAndSym.second.empty()) { - sym = prefixAndSym.second; - prefix = prefixAndSym.first; - if (!prefix.endswith(".o") && !prefix.endswith(".o)")) { - // If arch name prefix does not match arch being linked, ignore symbol. - if (!ctx.archName().equals(prefix)) - continue; - prefix = ""; - } - } else - sym = prefixAndSym.first; - if (!sym.empty()) { - ctx.appendOrderedSymbol(sym, prefix); - // llvm::errs() << sym << ", prefix=" << prefix << "\n"; - } - } - return std::error_code(); -} - -// -// There are two variants of the -filelist option: -// -// -filelist -// In this variant, the path is to a text file which contains one file path -// per line. There are no comments or trimming of whitespace. -// -// -fileList , -// In this variant, the path is to a text file which contains a partial path -// per line. The prefix is prepended to each partial path. -// -static llvm::Error loadFileList(StringRef fileListPath, - MachOLinkingContext &ctx, bool forceLoad) { - // If there is a comma, split off . - std::pair opt = fileListPath.split(','); - StringRef filePath = opt.first; - StringRef dirName = opt.second; - ctx.addInputFileDependency(filePath); - // Map in file list file. - ErrorOr> mb = - MemoryBuffer::getFileOrSTDIN(filePath); - if (std::error_code ec = mb.getError()) - return llvm::errorCodeToError(ec); - StringRef buffer = mb->get()->getBuffer(); - while (!buffer.empty()) { - // Split off each line in the file. - std::pair lineAndRest = buffer.split('\n'); - StringRef line = lineAndRest.first; - StringRef path; - if (!dirName.empty()) { - // If there is a then prepend dir to each line. - SmallString<256> fullPath; - fullPath.assign(dirName); - llvm::sys::path::append(fullPath, Twine(line)); - path = ctx.copy(fullPath.str()); - } else { - // No use whole line as input file path. - path = ctx.copy(line); - } - if (!ctx.pathExists(path)) { - return llvm::make_error(Twine("File not found '") - + path - + "'"); - } - if (ctx.testingFileUsage()) { - message("Found filelist entry " + canonicalizePath(path)); - } - addFile(path, ctx, forceLoad, false); - buffer = lineAndRest.second; - } - return llvm::Error::success(); -} - -/// Parse number assuming it is base 16, but allow 0x prefix. -static bool parseNumberBase16(StringRef numStr, uint64_t &baseAddress) { - if (numStr.startswith_insensitive("0x")) - numStr = numStr.drop_front(2); - return numStr.getAsInteger(16, baseAddress); -} - -static void parseLLVMOptions(const LinkingContext &ctx) { - // Honor -mllvm - if (!ctx.llvmOptions().empty()) { - unsigned numArgs = ctx.llvmOptions().size(); - auto **args = new const char *[numArgs + 2]; - args[0] = "lld (LLVM option parsing)"; - for (unsigned i = 0; i != numArgs; ++i) - args[i + 1] = ctx.llvmOptions()[i]; - args[numArgs + 1] = nullptr; - llvm::cl::ResetAllOptionOccurrences(); - llvm::cl::ParseCommandLineOptions(numArgs + 1, args); - } -} - -namespace lld { -namespace mach_o { - -bool parse(llvm::ArrayRef args, MachOLinkingContext &ctx) { - // Parse command line options using DarwinLdOptions.td - DarwinLdOptTable table; - unsigned missingIndex; - unsigned missingCount; - llvm::opt::InputArgList parsedArgs = - table.ParseArgs(args.slice(1), missingIndex, missingCount); - if (missingCount) { - error("missing arg value for '" + - Twine(parsedArgs.getArgString(missingIndex)) + "' expected " + - Twine(missingCount) + " argument(s)."); - return false; - } - - for (auto unknownArg : parsedArgs.filtered(OPT_UNKNOWN)) { - warn("ignoring unknown argument: " + - Twine(unknownArg->getAsString(parsedArgs))); - } - - errorHandler().verbose = parsedArgs.hasArg(OPT_v); - errorHandler().errorLimit = args::getInteger(parsedArgs, OPT_error_limit, 20); - - // Figure out output kind ( -dylib, -r, -bundle, -preload, or -static ) - llvm::MachO::HeaderFileType fileType = llvm::MachO::MH_EXECUTE; - bool isStaticExecutable = false; - if (llvm::opt::Arg *kind = parsedArgs.getLastArg( - OPT_dylib, OPT_relocatable, OPT_bundle, OPT_static, OPT_preload)) { - switch (kind->getOption().getID()) { - case OPT_dylib: - fileType = llvm::MachO::MH_DYLIB; - break; - case OPT_relocatable: - fileType = llvm::MachO::MH_OBJECT; - break; - case OPT_bundle: - fileType = llvm::MachO::MH_BUNDLE; - break; - case OPT_static: - fileType = llvm::MachO::MH_EXECUTE; - isStaticExecutable = true; - break; - case OPT_preload: - fileType = llvm::MachO::MH_PRELOAD; - break; - } - } - - // Handle -arch xxx - MachOLinkingContext::Arch arch = MachOLinkingContext::arch_unknown; - if (llvm::opt::Arg *archStr = parsedArgs.getLastArg(OPT_arch)) { - arch = MachOLinkingContext::archFromName(archStr->getValue()); - if (arch == MachOLinkingContext::arch_unknown) { - error("unknown arch named '" + Twine(archStr->getValue()) + "'"); - return false; - } - } - // If no -arch specified, scan input files to find first non-fat .o file. - if (arch == MachOLinkingContext::arch_unknown) { - for (auto &inFile : parsedArgs.filtered(OPT_INPUT)) { - // This is expensive because it opens and maps the file. But that is - // ok because no -arch is rare. - if (MachOLinkingContext::isThinObjectFile(inFile->getValue(), arch)) - break; - } - if (arch == MachOLinkingContext::arch_unknown && - !parsedArgs.getLastArg(OPT_test_file_usage)) { - // If no -arch and no options at all, print usage message. - if (parsedArgs.size() == 0) { - table.printHelp(llvm::outs(), - (std::string(args[0]) + " [options] file...").c_str(), - "LLVM Linker", false); - } else { - error("-arch not specified and could not be inferred"); - } - return false; - } - } - - // Handle -macosx_version_min or -ios_version_min - MachOLinkingContext::OS os = MachOLinkingContext::OS::unknown; - uint32_t minOSVersion = 0; - if (llvm::opt::Arg *minOS = - parsedArgs.getLastArg(OPT_macosx_version_min, OPT_ios_version_min, - OPT_ios_simulator_version_min)) { - switch (minOS->getOption().getID()) { - case OPT_macosx_version_min: - os = MachOLinkingContext::OS::macOSX; - if (MachOLinkingContext::parsePackedVersion(minOS->getValue(), - minOSVersion)) { - error("malformed macosx_version_min value"); - return false; - } - break; - case OPT_ios_version_min: - os = MachOLinkingContext::OS::iOS; - if (MachOLinkingContext::parsePackedVersion(minOS->getValue(), - minOSVersion)) { - error("malformed ios_version_min value"); - return false; - } - break; - case OPT_ios_simulator_version_min: - os = MachOLinkingContext::OS::iOS_simulator; - if (MachOLinkingContext::parsePackedVersion(minOS->getValue(), - minOSVersion)) { - error("malformed ios_simulator_version_min value"); - return false; - } - break; - } - } else { - // No min-os version on command line, check environment variables - } - - // Handle export_dynamic - // FIXME: Should we warn when this applies to something other than a static - // executable or dylib? Those are the only cases where this has an effect. - // Note, this has to come before ctx.configure() so that we get the correct - // value for _globalsAreDeadStripRoots. - bool exportDynamicSymbols = parsedArgs.hasArg(OPT_export_dynamic); - - // Now that there's enough information parsed in, let the linking context - // set up default values. - ctx.configure(fileType, arch, os, minOSVersion, exportDynamicSymbols); - - // Handle -e xxx - if (llvm::opt::Arg *entry = parsedArgs.getLastArg(OPT_entry)) - ctx.setEntrySymbolName(entry->getValue()); - - // Handle -o xxx - if (llvm::opt::Arg *outpath = parsedArgs.getLastArg(OPT_output)) - ctx.setOutputPath(outpath->getValue()); - else - ctx.setOutputPath("a.out"); - - // Handle -image_base XXX and -seg1addr XXXX - if (llvm::opt::Arg *imageBase = parsedArgs.getLastArg(OPT_image_base)) { - uint64_t baseAddress; - if (parseNumberBase16(imageBase->getValue(), baseAddress)) { - error("image_base expects a hex number"); - return false; - } else if (baseAddress < ctx.pageZeroSize()) { - error("image_base overlaps with __PAGEZERO"); - return false; - } else if (baseAddress % ctx.pageSize()) { - error("image_base must be a multiple of page size (0x" + - llvm::utohexstr(ctx.pageSize()) + ")"); - return false; - } - - ctx.setBaseAddress(baseAddress); - } - - // Handle -dead_strip - if (parsedArgs.getLastArg(OPT_dead_strip)) - ctx.setDeadStripping(true); - - bool globalWholeArchive = false; - // Handle -all_load - if (parsedArgs.getLastArg(OPT_all_load)) - globalWholeArchive = true; - - // Handle -install_name - if (llvm::opt::Arg *installName = parsedArgs.getLastArg(OPT_install_name)) - ctx.setInstallName(installName->getValue()); - else - ctx.setInstallName(ctx.outputPath()); - - // Handle -mark_dead_strippable_dylib - if (parsedArgs.getLastArg(OPT_mark_dead_strippable_dylib)) - ctx.setDeadStrippableDylib(true); - - // Handle -compatibility_version and -current_version - if (llvm::opt::Arg *vers = parsedArgs.getLastArg(OPT_compatibility_version)) { - if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) { - error("-compatibility_version can only be used with -dylib"); - return false; - } - uint32_t parsedVers; - if (MachOLinkingContext::parsePackedVersion(vers->getValue(), parsedVers)) { - error("-compatibility_version value is malformed"); - return false; - } - ctx.setCompatibilityVersion(parsedVers); - } - - if (llvm::opt::Arg *vers = parsedArgs.getLastArg(OPT_current_version)) { - if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) { - error("-current_version can only be used with -dylib"); - return false; - } - uint32_t parsedVers; - if (MachOLinkingContext::parsePackedVersion(vers->getValue(), parsedVers)) { - error("-current_version value is malformed"); - return false; - } - ctx.setCurrentVersion(parsedVers); - } - - // Handle -bundle_loader - if (llvm::opt::Arg *loader = parsedArgs.getLastArg(OPT_bundle_loader)) - ctx.setBundleLoader(loader->getValue()); - - // Handle -sectalign segname sectname align - for (auto &alignArg : parsedArgs.filtered(OPT_sectalign)) { - const char* segName = alignArg->getValue(0); - const char* sectName = alignArg->getValue(1); - const char* alignStr = alignArg->getValue(2); - if ((alignStr[0] == '0') && (alignStr[1] == 'x')) - alignStr += 2; - unsigned long long alignValue; - if (llvm::getAsUnsignedInteger(alignStr, 16, alignValue)) { - error("-sectalign alignment value '" + Twine(alignStr) + - "' not a valid number"); - return false; - } - uint16_t align = 1 << llvm::countTrailingZeros(alignValue); - if (!llvm::isPowerOf2_64(alignValue)) { - std::string Msg; - llvm::raw_string_ostream OS(Msg); - OS << "alignment for '-sectalign " << segName << " " << sectName - << llvm::format(" 0x%llX", alignValue) - << "' is not a power of two, using " << llvm::format("0x%08X", align); - OS.flush(); - warn(Msg); - } - ctx.addSectionAlignment(segName, sectName, align); - } - - // Handle -mllvm - for (auto &llvmArg : parsedArgs.filtered(OPT_mllvm)) { - ctx.appendLLVMOption(llvmArg->getValue()); - } - - // Handle -print_atoms - if (parsedArgs.getLastArg(OPT_print_atoms)) - ctx.setPrintAtoms(); - - // Handle -t (trace) option. - if (parsedArgs.getLastArg(OPT_t)) - ctx.setLogInputFiles(true); - - // Handle -demangle option. - if (parsedArgs.getLastArg(OPT_demangle)) - ctx.setDemangleSymbols(true); - - // Handle -keep_private_externs - if (parsedArgs.getLastArg(OPT_keep_private_externs)) { - ctx.setKeepPrivateExterns(true); - if (ctx.outputMachOType() != llvm::MachO::MH_OBJECT) - warn("-keep_private_externs only used in -r mode"); - } - - // Handle -dependency_info used by Xcode. - if (llvm::opt::Arg *depInfo = parsedArgs.getLastArg(OPT_dependency_info)) - if (std::error_code ec = ctx.createDependencyFile(depInfo->getValue())) - warn(ec.message() + ", processing '-dependency_info " + - depInfo->getValue()); - - // In -test_file_usage mode, we'll be given an explicit list of paths that - // exist. We'll also be expected to print out information about how we located - // libraries and so on that the user specified, but not to actually do any - // linking. - if (parsedArgs.getLastArg(OPT_test_file_usage)) { - ctx.setTestingFileUsage(); - - // With paths existing by fiat, linking is not going to end well. - ctx.setDoNothing(true); - - // Only bother looking for an existence override if we're going to use it. - for (auto existingPath : parsedArgs.filtered(OPT_path_exists)) { - ctx.addExistingPathForDebug(existingPath->getValue()); - } - } - - // Register possible input file parsers. - if (!ctx.doNothing()) { - ctx.registry().addSupportMachOObjects(ctx); - ctx.registry().addSupportArchives(ctx.logInputFiles()); - ctx.registry().addSupportYamlFiles(); - } - - // Now construct the set of library search directories, following ld64's - // baroque set of accumulated hacks. Mostly, the algorithm constructs - // { syslibroots } x { libpaths } - // - // Unfortunately, there are numerous exceptions: - // 1. Only absolute paths get modified by syslibroot options. - // 2. If there is just 1 -syslibroot, system paths not found in it are - // skipped. - // 3. If the last -syslibroot is "/", all of them are ignored entirely. - // 4. If { syslibroots } x path == {}, the original path is kept. - std::vector sysLibRoots; - for (auto syslibRoot : parsedArgs.filtered(OPT_syslibroot)) { - sysLibRoots.push_back(syslibRoot->getValue()); - } - if (!sysLibRoots.empty()) { - // Ignore all if last -syslibroot is "/". - if (sysLibRoots.back() != "/") - ctx.setSysLibRoots(sysLibRoots); - } - - // Paths specified with -L come first, and are not considered system paths for - // the case where there is precisely 1 -syslibroot. - for (auto libPath : parsedArgs.filtered(OPT_L)) { - ctx.addModifiedSearchDir(libPath->getValue()); - } - - // Process -F directories (where to look for frameworks). - for (auto fwPath : parsedArgs.filtered(OPT_F)) { - ctx.addFrameworkSearchDir(fwPath->getValue()); - } - - // -Z suppresses the standard search paths. - if (!parsedArgs.hasArg(OPT_Z)) { - ctx.addModifiedSearchDir("/usr/lib", true); - ctx.addModifiedSearchDir("/usr/local/lib", true); - ctx.addFrameworkSearchDir("/Library/Frameworks", true); - ctx.addFrameworkSearchDir("/System/Library/Frameworks", true); - } - - // Now that we've constructed the final set of search paths, print out those - // search paths in verbose mode. - if (errorHandler().verbose) { - message("Library search paths:"); - for (auto path : ctx.searchDirs()) { - message(" " + path); - } - message("Framework search paths:"); - for (auto path : ctx.frameworkDirs()) { - message(" " + path); - } - } - - // Handle -exported_symbols_list - for (auto expFile : parsedArgs.filtered(OPT_exported_symbols_list)) { - if (ctx.exportMode() == MachOLinkingContext::ExportMode::unexported) { - error("-exported_symbols_list cannot be combined with " - "-unexported_symbol[s_list]"); - return false; - } - ctx.setExportMode(MachOLinkingContext::ExportMode::exported); - if (std::error_code ec = parseExportsList(expFile->getValue(), ctx)) { - error(ec.message() + ", processing '-exported_symbols_list " + - expFile->getValue()); - return false; - } - } - - // Handle -exported_symbol - for (auto symbol : parsedArgs.filtered(OPT_exported_symbol)) { - if (ctx.exportMode() == MachOLinkingContext::ExportMode::unexported) { - error("-exported_symbol cannot be combined with " - "-unexported_symbol[s_list]"); - return false; - } - ctx.setExportMode(MachOLinkingContext::ExportMode::exported); - ctx.addExportSymbol(symbol->getValue()); - } - - // Handle -unexported_symbols_list - for (auto expFile : parsedArgs.filtered(OPT_unexported_symbols_list)) { - if (ctx.exportMode() == MachOLinkingContext::ExportMode::exported) { - error("-unexported_symbols_list cannot be combined with " - "-exported_symbol[s_list]"); - return false; - } - ctx.setExportMode(MachOLinkingContext::ExportMode::unexported); - if (std::error_code ec = parseExportsList(expFile->getValue(), ctx)) { - error(ec.message() + ", processing '-unexported_symbols_list " + - expFile->getValue()); - return false; - } - } - - // Handle -unexported_symbol - for (auto symbol : parsedArgs.filtered(OPT_unexported_symbol)) { - if (ctx.exportMode() == MachOLinkingContext::ExportMode::exported) { - error("-unexported_symbol cannot be combined with " - "-exported_symbol[s_list]"); - return false; - } - ctx.setExportMode(MachOLinkingContext::ExportMode::unexported); - ctx.addExportSymbol(symbol->getValue()); - } - - // Handle obosolete -multi_module and -single_module - if (llvm::opt::Arg *mod = - parsedArgs.getLastArg(OPT_multi_module, OPT_single_module)) { - if (mod->getOption().getID() == OPT_multi_module) - warn("-multi_module is obsolete and being ignored"); - else if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) - warn("-single_module being ignored. It is only for use when producing a " - "dylib"); - } - - // Handle obsolete ObjC options: -objc_gc_compaction, -objc_gc, -objc_gc_only - if (parsedArgs.getLastArg(OPT_objc_gc_compaction)) { - error("-objc_gc_compaction is not supported"); - return false; - } - - if (parsedArgs.getLastArg(OPT_objc_gc)) { - error("-objc_gc is not supported"); - return false; - } - - if (parsedArgs.getLastArg(OPT_objc_gc_only)) { - error("-objc_gc_only is not supported"); - return false; - } - - // Handle -pie or -no_pie - if (llvm::opt::Arg *pie = parsedArgs.getLastArg(OPT_pie, OPT_no_pie)) { - switch (ctx.outputMachOType()) { - case llvm::MachO::MH_EXECUTE: - switch (ctx.os()) { - case MachOLinkingContext::OS::macOSX: - if ((minOSVersion < 0x000A0500) && - (pie->getOption().getID() == OPT_pie)) { - error("-pie can only be used when targeting Mac OS X 10.5 or later"); - return false; - } - break; - case MachOLinkingContext::OS::iOS: - if ((minOSVersion < 0x00040200) && - (pie->getOption().getID() == OPT_pie)) { - error("-pie can only be used when targeting iOS 4.2 or later"); - return false; - } - break; - case MachOLinkingContext::OS::iOS_simulator: - if (pie->getOption().getID() == OPT_no_pie) { - error("iOS simulator programs must be built PIE"); - return false; - } - break; - case MachOLinkingContext::OS::unknown: - break; - } - ctx.setPIE(pie->getOption().getID() == OPT_pie); - break; - case llvm::MachO::MH_PRELOAD: - break; - case llvm::MachO::MH_DYLIB: - case llvm::MachO::MH_BUNDLE: - warn(pie->getSpelling() + - " being ignored. It is only used when linking main executables"); - break; - default: - error(pie->getSpelling() + - " can only used when linking main executables"); - return false; - } - } - - // Handle -version_load_command or -no_version_load_command - { - bool flagOn = false; - bool flagOff = false; - if (auto *arg = parsedArgs.getLastArg(OPT_version_load_command, - OPT_no_version_load_command)) { - flagOn = arg->getOption().getID() == OPT_version_load_command; - flagOff = arg->getOption().getID() == OPT_no_version_load_command; - } - - // default to adding version load command for dynamic code, - // static code must opt-in - switch (ctx.outputMachOType()) { - case llvm::MachO::MH_OBJECT: - ctx.setGenerateVersionLoadCommand(false); - break; - case llvm::MachO::MH_EXECUTE: - // dynamic executables default to generating a version load command, - // while static executables only generate it if required. - if (isStaticExecutable) { - if (flagOn) - ctx.setGenerateVersionLoadCommand(true); - } else { - if (!flagOff) - ctx.setGenerateVersionLoadCommand(true); - } - break; - case llvm::MachO::MH_PRELOAD: - case llvm::MachO::MH_KEXT_BUNDLE: - if (flagOn) - ctx.setGenerateVersionLoadCommand(true); - break; - case llvm::MachO::MH_DYLINKER: - case llvm::MachO::MH_DYLIB: - case llvm::MachO::MH_BUNDLE: - if (!flagOff) - ctx.setGenerateVersionLoadCommand(true); - break; - case llvm::MachO::MH_FVMLIB: - case llvm::MachO::MH_DYLDLINK: - case llvm::MachO::MH_DYLIB_STUB: - case llvm::MachO::MH_DSYM: - // We don't generate load commands for these file types, even if - // forced on. - break; - } - } - - // Handle -function_starts or -no_function_starts - { - bool flagOn = false; - bool flagOff = false; - if (auto *arg = parsedArgs.getLastArg(OPT_function_starts, - OPT_no_function_starts)) { - flagOn = arg->getOption().getID() == OPT_function_starts; - flagOff = arg->getOption().getID() == OPT_no_function_starts; - } - - // default to adding functions start for dynamic code, static code must - // opt-in - switch (ctx.outputMachOType()) { - case llvm::MachO::MH_OBJECT: - ctx.setGenerateFunctionStartsLoadCommand(false); - break; - case llvm::MachO::MH_EXECUTE: - // dynamic executables default to generating a version load command, - // while static executables only generate it if required. - if (isStaticExecutable) { - if (flagOn) - ctx.setGenerateFunctionStartsLoadCommand(true); - } else { - if (!flagOff) - ctx.setGenerateFunctionStartsLoadCommand(true); - } - break; - case llvm::MachO::MH_PRELOAD: - case llvm::MachO::MH_KEXT_BUNDLE: - if (flagOn) - ctx.setGenerateFunctionStartsLoadCommand(true); - break; - case llvm::MachO::MH_DYLINKER: - case llvm::MachO::MH_DYLIB: - case llvm::MachO::MH_BUNDLE: - if (!flagOff) - ctx.setGenerateFunctionStartsLoadCommand(true); - break; - case llvm::MachO::MH_FVMLIB: - case llvm::MachO::MH_DYLDLINK: - case llvm::MachO::MH_DYLIB_STUB: - case llvm::MachO::MH_DSYM: - // We don't generate load commands for these file types, even if - // forced on. - break; - } - } - - // Handle -data_in_code_info or -no_data_in_code_info - { - bool flagOn = false; - bool flagOff = false; - if (auto *arg = parsedArgs.getLastArg(OPT_data_in_code_info, - OPT_no_data_in_code_info)) { - flagOn = arg->getOption().getID() == OPT_data_in_code_info; - flagOff = arg->getOption().getID() == OPT_no_data_in_code_info; - } - - // default to adding data in code for dynamic code, static code must - // opt-in - switch (ctx.outputMachOType()) { - case llvm::MachO::MH_OBJECT: - if (!flagOff) - ctx.setGenerateDataInCodeLoadCommand(true); - break; - case llvm::MachO::MH_EXECUTE: - // dynamic executables default to generating a version load command, - // while static executables only generate it if required. - if (isStaticExecutable) { - if (flagOn) - ctx.setGenerateDataInCodeLoadCommand(true); - } else { - if (!flagOff) - ctx.setGenerateDataInCodeLoadCommand(true); - } - break; - case llvm::MachO::MH_PRELOAD: - case llvm::MachO::MH_KEXT_BUNDLE: - if (flagOn) - ctx.setGenerateDataInCodeLoadCommand(true); - break; - case llvm::MachO::MH_DYLINKER: - case llvm::MachO::MH_DYLIB: - case llvm::MachO::MH_BUNDLE: - if (!flagOff) - ctx.setGenerateDataInCodeLoadCommand(true); - break; - case llvm::MachO::MH_FVMLIB: - case llvm::MachO::MH_DYLDLINK: - case llvm::MachO::MH_DYLIB_STUB: - case llvm::MachO::MH_DSYM: - // We don't generate load commands for these file types, even if - // forced on. - break; - } - } - - // Handle sdk_version - if (llvm::opt::Arg *arg = parsedArgs.getLastArg(OPT_sdk_version)) { - uint32_t sdkVersion = 0; - if (MachOLinkingContext::parsePackedVersion(arg->getValue(), - sdkVersion)) { - error("malformed sdkVersion value"); - return false; - } - ctx.setSdkVersion(sdkVersion); - } else if (ctx.generateVersionLoadCommand()) { - // If we don't have an sdk version, but were going to emit a load command - // with min_version, then we need to give a warning as we have no sdk - // version to put in that command. - // FIXME: We need to decide whether to make this an error. - warn("-sdk_version is required when emitting min version load command. " - "Setting sdk version to match provided min version"); - ctx.setSdkVersion(ctx.osMinVersion()); - } - - // Handle source_version - if (llvm::opt::Arg *arg = parsedArgs.getLastArg(OPT_source_version)) { - uint64_t version = 0; - if (MachOLinkingContext::parsePackedVersion(arg->getValue(), - version)) { - error("malformed source_version value"); - return false; - } - ctx.setSourceVersion(version); - } - - // Handle stack_size - if (llvm::opt::Arg *stackSize = parsedArgs.getLastArg(OPT_stack_size)) { - uint64_t stackSizeVal; - if (parseNumberBase16(stackSize->getValue(), stackSizeVal)) { - error("stack_size expects a hex number"); - return false; - } - if ((stackSizeVal % ctx.pageSize()) != 0) { - error("stack_size must be a multiple of page size (0x" + - llvm::utohexstr(ctx.pageSize()) + ")"); - return false; - } - - ctx.setStackSize(stackSizeVal); - } - - // Handle debug info handling options: -S - if (parsedArgs.hasArg(OPT_S)) - ctx.setDebugInfoMode(MachOLinkingContext::DebugInfoMode::noDebugMap); - - // Handle -order_file - for (auto orderFile : parsedArgs.filtered(OPT_order_file)) { - if (std::error_code ec = parseOrderFile(orderFile->getValue(), ctx)) { - error(ec.message() + ", processing '-order_file " + orderFile->getValue() - + "'"); - return false; - } - } - - // Handle -flat_namespace. - if (llvm::opt::Arg *ns = - parsedArgs.getLastArg(OPT_flat_namespace, OPT_twolevel_namespace)) { - if (ns->getOption().getID() == OPT_flat_namespace) - ctx.setUseFlatNamespace(true); - } - - // Handle -undefined - if (llvm::opt::Arg *undef = parsedArgs.getLastArg(OPT_undefined)) { - MachOLinkingContext::UndefinedMode UndefMode; - if (StringRef(undef->getValue()).equals("error")) - UndefMode = MachOLinkingContext::UndefinedMode::error; - else if (StringRef(undef->getValue()).equals("warning")) - UndefMode = MachOLinkingContext::UndefinedMode::warning; - else if (StringRef(undef->getValue()).equals("suppress")) - UndefMode = MachOLinkingContext::UndefinedMode::suppress; - else if (StringRef(undef->getValue()).equals("dynamic_lookup")) - UndefMode = MachOLinkingContext::UndefinedMode::dynamicLookup; - else { - error("invalid option to -undefined [ warning | error | suppress | " - "dynamic_lookup ]"); - return false; - } - - if (ctx.useFlatNamespace()) { - // If we're using -flat_namespace then 'warning', 'suppress' and - // 'dynamic_lookup' are all equivalent, so map them to 'suppress'. - if (UndefMode != MachOLinkingContext::UndefinedMode::error) - UndefMode = MachOLinkingContext::UndefinedMode::suppress; - } else { - // If we're using -twolevel_namespace then 'warning' and 'suppress' are - // illegal. Emit a diagnostic if they've been (mis)used. - if (UndefMode == MachOLinkingContext::UndefinedMode::warning || - UndefMode == MachOLinkingContext::UndefinedMode::suppress) { - error("can't use -undefined warning or suppress with " - "-twolevel_namespace"); - return false; - } - } - - ctx.setUndefinedMode(UndefMode); - } - - // Handle -no_objc_category_merging. - if (parsedArgs.getLastArg(OPT_no_objc_category_merging)) - ctx.setMergeObjCCategories(false); - - // Handle -rpath - if (parsedArgs.hasArg(OPT_rpath)) { - switch (ctx.outputMachOType()) { - case llvm::MachO::MH_EXECUTE: - case llvm::MachO::MH_DYLIB: - case llvm::MachO::MH_BUNDLE: - if (!ctx.minOS("10.5", "2.0")) { - if (ctx.os() == MachOLinkingContext::OS::macOSX) - error("-rpath can only be used when targeting OS X 10.5 or later"); - else - error("-rpath can only be used when targeting iOS 2.0 or later"); - return false; - } - break; - default: - error("-rpath can only be used when creating a dynamic final linked " - "image"); - return false; - } - - for (auto rPath : parsedArgs.filtered(OPT_rpath)) { - ctx.addRpath(rPath->getValue()); - } - } - - // Parse the LLVM options before we process files in case the file handling - // makes use of things like LLVM_DEBUG(). - parseLLVMOptions(ctx); - - // Handle input files and sectcreate. - for (auto &arg : parsedArgs) { - bool upward; - llvm::Optional resolvedPath; - switch (arg->getOption().getID()) { - default: - continue; - case OPT_INPUT: - addFile(arg->getValue(), ctx, globalWholeArchive, false); - break; - case OPT_upward_library: - addFile(arg->getValue(), ctx, false, true); - break; - case OPT_force_load: - addFile(arg->getValue(), ctx, true, false); - break; - case OPT_l: - case OPT_upward_l: - upward = (arg->getOption().getID() == OPT_upward_l); - resolvedPath = ctx.searchLibrary(arg->getValue()); - if (!resolvedPath) { - error("Unable to find library for " + arg->getSpelling() + - arg->getValue()); - return false; - } else if (ctx.testingFileUsage()) { - message(Twine("Found ") + (upward ? "upward " : " ") + "library " + - canonicalizePath(resolvedPath.getValue())); - } - addFile(resolvedPath.getValue(), ctx, globalWholeArchive, upward); - break; - case OPT_framework: - case OPT_upward_framework: - upward = (arg->getOption().getID() == OPT_upward_framework); - resolvedPath = ctx.findPathForFramework(arg->getValue()); - if (!resolvedPath) { - error("Unable to find framework for " + arg->getSpelling() + " " + - arg->getValue()); - return false; - } else if (ctx.testingFileUsage()) { - message(Twine("Found ") + (upward ? "upward " : " ") + "framework " + - canonicalizePath(resolvedPath.getValue())); - } - addFile(resolvedPath.getValue(), ctx, globalWholeArchive, upward); - break; - case OPT_filelist: - if (auto ec = loadFileList(arg->getValue(), ctx, globalWholeArchive)) { - handleAllErrors(std::move(ec), [&](const llvm::ErrorInfoBase &EI) { - error(EI.message() + ", processing '-filelist " + arg->getValue()); - }); - return false; - } - break; - case OPT_sectcreate: { - const char* seg = arg->getValue(0); - const char* sect = arg->getValue(1); - const char* fileName = arg->getValue(2); - - ErrorOr> contentOrErr = - MemoryBuffer::getFile(fileName); - - if (!contentOrErr) { - error("can't open -sectcreate file " + Twine(fileName)); - return false; - } - - ctx.addSectCreateSection(seg, sect, std::move(*contentOrErr)); - } - break; - } - } - - if (ctx.getNodes().empty()) { - error("No input files"); - return false; - } - - // Validate the combination of options used. - return ctx.validate(); -} - -static void createFiles(MachOLinkingContext &ctx, bool Implicit) { - std::vector> Files; - if (Implicit) - ctx.createImplicitFiles(Files); - else - ctx.createInternalFiles(Files); - for (auto i = Files.rbegin(), e = Files.rend(); i != e; ++i) { - auto &members = ctx.getNodes(); - members.insert(members.begin(), std::make_unique(std::move(*i))); - } -} - -/// This is where the link is actually performed. -bool link(llvm::ArrayRef args, bool CanExitEarly, - raw_ostream &StdoutOS, raw_ostream &StderrOS) { - lld::stdoutOS = &StdoutOS; - lld::stderrOS = &StderrOS; - - errorHandler().logName = args::getFilenameWithoutExe(args[0]); - errorHandler().errorLimitExceededMsg = - "too many errors emitted, stopping now (use " - "'-error-limit 0' to see all errors)"; - errorHandler().exitEarly = CanExitEarly; - StderrOS.enable_colors(StderrOS.has_colors()); - - MachOLinkingContext ctx; - if (!parse(args, ctx)) - return false; - if (ctx.doNothing()) - return true; - if (ctx.getNodes().empty()) - return false; - - for (std::unique_ptr &ie : ctx.getNodes()) - if (FileNode *node = dyn_cast(ie.get())) - node->getFile()->parse(); - - createFiles(ctx, false /* Implicit */); - - // Give target a chance to add files - createFiles(ctx, true /* Implicit */); - - // Give target a chance to postprocess input files. - // Mach-O uses this chance to move all object files before library files. - ctx.finalizeInputFiles(); - - // Do core linking. - ScopedTask resolveTask(getDefaultDomain(), "Resolve"); - Resolver resolver(ctx); - if (!resolver.resolve()) - return false; - SimpleFile *merged = nullptr; - { - std::unique_ptr mergedFile = resolver.resultFile(); - merged = mergedFile.get(); - auto &members = ctx.getNodes(); - members.insert(members.begin(), - std::make_unique(std::move(mergedFile))); - } - resolveTask.end(); - - // Run passes on linked atoms. - ScopedTask passTask(getDefaultDomain(), "Passes"); - PassManager pm; - ctx.addPasses(pm); - if (auto ec = pm.runOnFile(*merged)) { - // FIXME: This should be passed to logAllUnhandledErrors but it needs - // to be passed a Twine instead of a string. - lld::errs() << "Failed to run passes on file '" << ctx.outputPath() - << "': "; - logAllUnhandledErrors(std::move(ec), lld::errs(), std::string()); - return false; - } - - passTask.end(); - - // Give linked atoms to Writer to generate output file. - ScopedTask writeTask(getDefaultDomain(), "Write"); - if (auto ec = ctx.writeFile(*merged)) { - // FIXME: This should be passed to logAllUnhandledErrors but it needs - // to be passed a Twine instead of a string. - lld::errs() << "Failed to write file '" << ctx.outputPath() << "': "; - logAllUnhandledErrors(std::move(ec), lld::errs(), std::string()); - return false; - } - - // Call exit() if we can to avoid calling destructors. - if (CanExitEarly) - exitLld(errorCount() ? 1 : 0); - - - return true; -} - -} // end namespace mach_o -} // end namespace lld diff --git a/lld/lib/Driver/DarwinLdOptions.td b/lld/lib/Driver/DarwinLdOptions.td deleted file mode 100644 index 3bbde8bf1c1c..000000000000 --- a/lld/lib/Driver/DarwinLdOptions.td +++ /dev/null @@ -1,250 +0,0 @@ -include "llvm/Option/OptParser.td" - - -// output kinds -def grp_kind : OptionGroup<"outs">, HelpText<"OUTPUT KIND">; -def relocatable : Flag<["-"], "r">, - HelpText<"Create relocatable object file">, Group; -def static : Flag<["-"], "static">, - HelpText<"Create static executable">, Group; -def dynamic : Flag<["-"], "dynamic">, - HelpText<"Create dynamic executable (default)">,Group; -def dylib : Flag<["-"], "dylib">, - HelpText<"Create dynamic library">, Group; -def bundle : Flag<["-"], "bundle">, - HelpText<"Create dynamic bundle">, Group; -def execute : Flag<["-"], "execute">, - HelpText<"Create main executable (default)">, Group; -def preload : Flag<["-"], "preload">, - HelpText<"Create binary for use with embedded systems">, Group; - -// optimizations -def grp_opts : OptionGroup<"opts">, HelpText<"OPTIMIZATIONS">; -def dead_strip : Flag<["-"], "dead_strip">, - HelpText<"Remove unreference code and data">, Group; -def macosx_version_min : Separate<["-"], "macosx_version_min">, - MetaVarName<"">, - HelpText<"Minimum Mac OS X version">, Group; -def ios_version_min : Separate<["-"], "ios_version_min">, - MetaVarName<"">, - HelpText<"Minimum iOS version">, Group; -def iphoneos_version_min : Separate<["-"], "iphoneos_version_min">, - Alias; -def ios_simulator_version_min : Separate<["-"], "ios_simulator_version_min">, - MetaVarName<"">, - HelpText<"Minimum iOS simulator version">, Group; -def sdk_version : Separate<["-"], "sdk_version">, - MetaVarName<"">, - HelpText<"SDK version">, Group; -def source_version : Separate<["-"], "source_version">, - MetaVarName<"">, - HelpText<"Source version">, Group; -def version_load_command : Flag<["-"], "version_load_command">, - HelpText<"Force generation of a version load command">, Group; -def no_version_load_command : Flag<["-"], "no_version_load_command">, - HelpText<"Disable generation of a version load command">, Group; -def function_starts : Flag<["-"], "function_starts">, - HelpText<"Force generation of a function starts load command">, - Group; -def no_function_starts : Flag<["-"], "no_function_starts">, - HelpText<"Disable generation of a function starts load command">, - Group; -def data_in_code_info : Flag<["-"], "data_in_code_info">, - HelpText<"Force generation of a data in code load command">, - Group; -def no_data_in_code_info : Flag<["-"], "no_data_in_code_info">, - HelpText<"Disable generation of a data in code load command">, - Group; -def mllvm : Separate<["-"], "mllvm">, - MetaVarName<"">, - HelpText<"Add directory to library search path">, Group; -def F : JoinedOrSeparate<["-"], "F">, - MetaVarName<"">, - HelpText<"Add directory to framework search path">, Group; -def Z : Flag<["-"], "Z">, - HelpText<"Do not search standard directories for libraries or frameworks">; -def all_load : Flag<["-"], "all_load">, - HelpText<"Forces all members of all static libraries to be loaded">, - Group; -def force_load : Separate<["-"], "force_load">, - MetaVarName<"">, - HelpText<"Forces all members of specified static libraries to be loaded">, - Group; -def syslibroot : Separate<["-"], "syslibroot">, MetaVarName<"">, - HelpText<"Add path to SDK to all absolute library search paths">, - Group; - -// Input options -def l : Joined<["-"], "l">, - MetaVarName<"">, - HelpText<"Base name of library searched for in -L directories">; -def upward_l : Joined<["-"], "upward-l">, - MetaVarName<"">, - HelpText<"Base name of upward library searched for in -L directories">; -def framework : Separate<["-"], "framework">, - MetaVarName<"">, - HelpText<"Base name of framework searched for in -F directories">; -def upward_framework : Separate<["-"], "upward_framework">, - MetaVarName<"">, - HelpText<"Base name of upward framework searched for in -F directories">; -def upward_library : Separate<["-"], "upward_library">, - MetaVarName<"">, - HelpText<"path to upward dylib to link with">; -def filelist : Separate<["-"], "filelist">, - MetaVarName<"">, - HelpText<"file containing paths to input files">; - - -// test case options -def print_atoms : Flag<["-"], "print_atoms">, - HelpText<"Emit output as yaml atoms">; -def test_file_usage : Flag<["-"], "test_file_usage">, - HelpText<"Only files specified by -file_exists are considered to exist. " - "Print which files would be used">; -def path_exists : Separate<["-"], "path_exists">, - MetaVarName<"">, - HelpText<"Used with -test_file_usage to declare a path">; - - -// general options -def output : Separate<["-"], "o">, - MetaVarName<"">, - HelpText<"Output file path">; -def arch : Separate<["-"], "arch">, - MetaVarName<"">, - HelpText<"Architecture to link">; -def sectalign : MultiArg<["-"], "sectalign", 3>, - MetaVarName<" ">, - HelpText<"Alignment for segment/section">; -def sectcreate : MultiArg<["-"], "sectcreate", 3>, - MetaVarName<" ">, - HelpText<"Create section / from contents of ">; -def image_base : Separate<["-"], "image_base">; -def seg1addr : Separate<["-"], "seg1addr">, Alias; -def demangle : Flag<["-"], "demangle">, - HelpText<"Demangles symbol names in errors and warnings">; -def dependency_info : Separate<["-"], "dependency_info">, - MetaVarName<"">, - HelpText<"Write binary list of files used during link">; -def S : Flag<["-"], "S">, - HelpText<"Remove debug information (STABS or DWARF) from the output file">; -def rpath : Separate<["-"], "rpath">, - MetaVarName<"">, - HelpText<"Add path to the runpath search path list for image being created">; - -def t : Flag<["-"], "t">, - HelpText<"Print the names of the input files as ld processes them">; -def v : Flag<["-"], "v">, - HelpText<"Print linker information">; -def error_limit : Separate<["-", "--"], "error-limit">, - MetaVarName<"">, - HelpText<"Maximum number of errors to emit before stopping (0 = no limit)">; - -// Ignored options -def lto_library : Separate<["-"], "lto_library">, - MetaVarName<"">, - HelpText<"Ignored for compatibility with other linkers">; - -// Obsolete options -def grp_obsolete : OptionGroup<"obsolete">, HelpText<"OBSOLETE OPTIONS">; -def single_module : Flag<["-"], "single_module">, - HelpText<"Default for dylibs">, Group; -def multi_module : Flag<["-"], "multi_module">, - HelpText<"Unsupported way to build dylibs">, Group; -def objc_gc_compaction : Flag<["-"], "objc_gc_compaction">, - HelpText<"Unsupported ObjC GC option">, Group; -def objc_gc : Flag<["-"], "objc_gc">, - HelpText<"Unsupported ObjC GC option">, Group; -def objc_gc_only : Flag<["-"], "objc_gc_only">, - HelpText<"Unsupported ObjC GC option">, Group; diff --git a/lld/lib/ReaderWriter/CMakeLists.txt b/lld/lib/ReaderWriter/CMakeLists.txt deleted file mode 100644 index bedb836d2c1e..000000000000 --- a/lld/lib/ReaderWriter/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -add_subdirectory(MachO) -add_subdirectory(YAML) - -if (MSVC) - add_definitions(-wd4062) # Suppress 'warning C4062: Enumerator has no associated handler in a switch statement.' -endif() - -add_lld_library(lldReaderWriter - FileArchive.cpp - - ADDITIONAL_HEADER_DIRS - ${LLD_INCLUDE_DIR}/lld/ReaderWriter - - LINK_COMPONENTS - Object - Support - - LINK_LIBS - lldCore - ) diff --git a/lld/lib/ReaderWriter/FileArchive.cpp b/lld/lib/ReaderWriter/FileArchive.cpp deleted file mode 100644 index 98f4d06ee210..000000000000 --- a/lld/lib/ReaderWriter/FileArchive.cpp +++ /dev/null @@ -1,227 +0,0 @@ -//===- lib/ReaderWriter/FileArchive.cpp -----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "lld/Common/LLVM.h" -#include "lld/Core/ArchiveLibraryFile.h" -#include "lld/Core/File.h" -#include "lld/Core/Reader.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/Magic.h" -#include "llvm/Object/Archive.h" -#include "llvm/Object/Error.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorOr.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include -#include -#include -#include - -using llvm::object::Archive; -using llvm::file_magic; -using llvm::identify_magic; - -namespace lld { - -namespace { - -/// The FileArchive class represents an Archive Library file -class FileArchive : public lld::ArchiveLibraryFile { -public: - FileArchive(std::unique_ptr mb, const Registry ®, - StringRef path, bool logLoading) - : ArchiveLibraryFile(path), _mb(std::shared_ptr(mb.release())), - _registry(reg), _logLoading(logLoading) {} - - /// Check if any member of the archive contains an Atom with the - /// specified name and return the File object for that member, or nullptr. - File *find(StringRef name) override { - auto member = _symbolMemberMap.find(name); - if (member == _symbolMemberMap.end()) - return nullptr; - Archive::Child c = member->second; - - // Don't return a member already returned - Expected buf = c.getBuffer(); - if (!buf) { - // TODO: Actually report errors helpfully. - consumeError(buf.takeError()); - return nullptr; - } - const char *memberStart = buf->data(); - if (_membersInstantiated.count(memberStart)) - return nullptr; - _membersInstantiated.insert(memberStart); - - std::unique_ptr result; - if (instantiateMember(c, result)) - return nullptr; - - File *file = result.get(); - _filesReturned.push_back(std::move(result)); - - // Give up the file pointer. It was stored and will be destroyed with destruction of FileArchive - return file; - } - - /// parse each member - std::error_code - parseAllMembers(std::vector> &result) override { - if (std::error_code ec = parse()) - return ec; - llvm::Error err = llvm::Error::success(); - for (auto mf = _archive->child_begin(err), me = _archive->child_end(); - mf != me; ++mf) { - std::unique_ptr file; - if (std::error_code ec = instantiateMember(*mf, file)) { - // err is Success (or we wouldn't be in the loop body) but we can't - // return without testing or consuming it. - consumeError(std::move(err)); - return ec; - } - result.push_back(std::move(file)); - } - if (err) - return errorToErrorCode(std::move(err)); - return std::error_code(); - } - - const AtomRange defined() const override { - return _noDefinedAtoms; - } - - const AtomRange undefined() const override { - return _noUndefinedAtoms; - } - - const AtomRange sharedLibrary() const override { - return _noSharedLibraryAtoms; - } - - const AtomRange absolute() const override { - return _noAbsoluteAtoms; - } - - void clearAtoms() override { - _noDefinedAtoms.clear(); - _noUndefinedAtoms.clear(); - _noSharedLibraryAtoms.clear(); - _noAbsoluteAtoms.clear(); - } - -protected: - std::error_code doParse() override { - // Make Archive object which will be owned by FileArchive object. - llvm::Error Err = llvm::Error::success(); - _archive.reset(new Archive(_mb->getMemBufferRef(), Err)); - if (Err) - return errorToErrorCode(std::move(Err)); - std::error_code ec; - if ((ec = buildTableOfContents())) - return ec; - return std::error_code(); - } - -private: - std::error_code instantiateMember(Archive::Child member, - std::unique_ptr &result) const { - Expected mbOrErr = member.getMemoryBufferRef(); - if (!mbOrErr) - return errorToErrorCode(mbOrErr.takeError()); - llvm::MemoryBufferRef mb = mbOrErr.get(); - std::string memberPath = (_archive->getFileName() + "(" - + mb.getBufferIdentifier() + ")").str(); - - if (_logLoading) - llvm::errs() << memberPath << "\n"; - - std::unique_ptr memberMB(MemoryBuffer::getMemBuffer( - mb.getBuffer(), mb.getBufferIdentifier(), false)); - - ErrorOr> fileOrErr = - _registry.loadFile(std::move(memberMB)); - if (std::error_code ec = fileOrErr.getError()) - return ec; - result = std::move(fileOrErr.get()); - if (std::error_code ec = result->parse()) - return ec; - result->setArchivePath(_archive->getFileName()); - - // The memory buffer is co-owned by the archive file and the children, - // so that the bufffer is deallocated when all the members are destructed. - result->setSharedMemoryBuffer(_mb); - return std::error_code(); - } - - std::error_code buildTableOfContents() { - DEBUG_WITH_TYPE("FileArchive", llvm::dbgs() - << "Table of contents for archive '" - << _archive->getFileName() << "':\n"); - for (const Archive::Symbol &sym : _archive->symbols()) { - StringRef name = sym.getName(); - Expected memberOrErr = sym.getMember(); - if (!memberOrErr) - return errorToErrorCode(memberOrErr.takeError()); - Archive::Child member = memberOrErr.get(); - DEBUG_WITH_TYPE("FileArchive", - llvm::dbgs() - << llvm::format("0x%08llX ", - member.getBuffer()->data()) - << "'" << name << "'\n"); - _symbolMemberMap.insert(std::make_pair(name, member)); - } - return std::error_code(); - } - - typedef std::unordered_map MemberMap; - typedef std::set InstantiatedSet; - - std::shared_ptr _mb; - const Registry &_registry; - std::unique_ptr _archive; - MemberMap _symbolMemberMap; - InstantiatedSet _membersInstantiated; - bool _logLoading; - std::vector> _memberBuffers; - std::vector> _filesReturned; -}; - -class ArchiveReader : public Reader { -public: - ArchiveReader(bool logLoading) : _logLoading(logLoading) {} - - bool canParse(file_magic magic, MemoryBufferRef) const override { - return magic == file_magic::archive; - } - - ErrorOr> loadFile(std::unique_ptr mb, - const Registry ®) const override { - StringRef path = mb->getBufferIdentifier(); - std::unique_ptr ret = - std::make_unique(std::move(mb), reg, path, _logLoading); - return std::move(ret); - } - -private: - bool _logLoading; -}; - -} // anonymous namespace - -void Registry::addSupportArchives(bool logLoading) { - add(std::unique_ptr(new ArchiveReader(logLoading))); -} - -} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/ArchHandler.cpp b/lld/lib/ReaderWriter/MachO/ArchHandler.cpp deleted file mode 100644 index c101f3b157bb..000000000000 --- a/lld/lib/ReaderWriter/MachO/ArchHandler.cpp +++ /dev/null @@ -1,171 +0,0 @@ -//===- lib/FileFormat/MachO/ArchHandler.cpp -------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - - -#include "ArchHandler.h" -#include "Atoms.h" -#include "MachONormalizedFileBinaryUtils.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm::MachO; -using namespace lld::mach_o::normalized; - -namespace lld { -namespace mach_o { - - -ArchHandler::ArchHandler() { -} - -ArchHandler::~ArchHandler() { -} - -std::unique_ptr ArchHandler::create( - MachOLinkingContext::Arch arch) { - switch (arch) { - case MachOLinkingContext::arch_x86_64: - return create_x86_64(); - case MachOLinkingContext::arch_x86: - return create_x86(); - case MachOLinkingContext::arch_armv6: - case MachOLinkingContext::arch_armv7: - case MachOLinkingContext::arch_armv7s: - return create_arm(); - case MachOLinkingContext::arch_arm64: - return create_arm64(); - default: - llvm_unreachable("Unknown arch"); - } -} - - -bool ArchHandler::isLazyPointer(const Reference &ref) { - // A lazy bind entry is needed for a lazy pointer. - const StubInfo &info = stubInfo(); - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return false; - if (ref.kindArch() != info.lazyPointerReferenceToFinal.arch) - return false; - return (ref.kindValue() == info.lazyPointerReferenceToFinal.kind); -} - - -ArchHandler::RelocPattern ArchHandler::relocPattern(const Relocation &reloc) { - assert((reloc.type & 0xFFF0) == 0); - uint16_t result = reloc.type; - if (reloc.scattered) - result |= rScattered; - if (reloc.pcRel) - result |= rPcRel; - if (reloc.isExtern) - result |= rExtern; - switch(reloc.length) { - case 0: - break; - case 1: - result |= rLength2; - break; - case 2: - result |= rLength4; - break; - case 3: - result |= rLength8; - break; - default: - llvm_unreachable("bad r_length"); - } - return result; -} - -normalized::Relocation -ArchHandler::relocFromPattern(ArchHandler::RelocPattern pattern) { - normalized::Relocation result; - result.offset = 0; - result.scattered = (pattern & rScattered); - result.type = (RelocationInfoType)(pattern & 0xF); - result.pcRel = (pattern & rPcRel); - result.isExtern = (pattern & rExtern); - result.value = 0; - result.symbol = 0; - switch (pattern & 0x300) { - case rLength1: - result.length = 0; - break; - case rLength2: - result.length = 1; - break; - case rLength4: - result.length = 2; - break; - case rLength8: - result.length = 3; - break; - } - return result; -} - -void ArchHandler::appendReloc(normalized::Relocations &relocs, uint32_t offset, - uint32_t symbol, uint32_t value, - RelocPattern pattern) { - normalized::Relocation reloc = relocFromPattern(pattern); - reloc.offset = offset; - reloc.symbol = symbol; - reloc.value = value; - relocs.push_back(reloc); -} - - -int16_t ArchHandler::readS16(const uint8_t *addr, bool isBig) { - return read16(addr, isBig); -} - -int32_t ArchHandler::readS32(const uint8_t *addr, bool isBig) { - return read32(addr, isBig); -} - -uint32_t ArchHandler::readU32(const uint8_t *addr, bool isBig) { - return read32(addr, isBig); -} - - int64_t ArchHandler::readS64(const uint8_t *addr, bool isBig) { - return read64(addr, isBig); -} - -bool ArchHandler::isDwarfCIE(bool isBig, const DefinedAtom *atom) { - assert(atom->contentType() == DefinedAtom::typeCFI); - if (atom->rawContent().size() < sizeof(uint32_t)) - return false; - uint32_t size = read32(atom->rawContent().data(), isBig); - - uint32_t idOffset = sizeof(uint32_t); - if (size == 0xffffffffU) - idOffset += sizeof(uint64_t); - - return read32(atom->rawContent().data() + idOffset, isBig) == 0; -} - -const Atom *ArchHandler::fdeTargetFunction(const DefinedAtom *fde) { - for (auto ref : *fde) { - if (ref->kindNamespace() == Reference::KindNamespace::mach_o && - ref->kindValue() == unwindRefToFunctionKind()) { - assert(ref->kindArch() == kindArch() && "unexpected Reference arch"); - return ref->target(); - } - } - - return nullptr; -} - -} // namespace mach_o -} // namespace lld - - - diff --git a/lld/lib/ReaderWriter/MachO/ArchHandler.h b/lld/lib/ReaderWriter/MachO/ArchHandler.h deleted file mode 100644 index 83646c09b1a8..000000000000 --- a/lld/lib/ReaderWriter/MachO/ArchHandler.h +++ /dev/null @@ -1,322 +0,0 @@ -//===- lib/FileFormat/MachO/ArchHandler.h ---------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_READER_WRITER_MACHO_ARCH_HANDLER_H -#define LLD_READER_WRITER_MACHO_ARCH_HANDLER_H - -#include "Atoms.h" -#include "File.h" -#include "MachONormalizedFile.h" -#include "lld/Common/LLVM.h" -#include "lld/Core/Error.h" -#include "lld/Core/Reference.h" -#include "lld/Core/Simple.h" -#include "lld/ReaderWriter/MachOLinkingContext.h" -#include "llvm/ADT/Triple.h" - -namespace lld { -namespace mach_o { - -/// -/// The ArchHandler class handles all architecture specific aspects of -/// mach-o linking. -/// -class ArchHandler { -public: - virtual ~ArchHandler(); - - /// There is no public interface to subclasses of ArchHandler, so this - /// is the only way to instantiate an ArchHandler. - static std::unique_ptr create(MachOLinkingContext::Arch arch); - - /// Get (arch specific) kind strings used by Registry. - virtual const Registry::KindStrings *kindStrings() = 0; - - /// Convert mach-o Arch to Reference::KindArch. - virtual Reference::KindArch kindArch() = 0; - - /// Used by StubPass to update References to shared library functions - /// to be references to a stub. - virtual bool isCallSite(const Reference &) = 0; - - /// Used by GOTPass to locate GOT References - virtual bool isGOTAccess(const Reference &, bool &canBypassGOT) { - return false; - } - - /// Used by TLVPass to locate TLV References. - virtual bool isTLVAccess(const Reference &) const { return false; } - - /// Used by the TLVPass to update TLV References. - virtual void updateReferenceToTLV(const Reference *) {} - - /// Used by ShimPass to insert shims in branches that switch mode. - virtual bool isNonCallBranch(const Reference &) = 0; - - /// Used by GOTPass to update GOT References - virtual void updateReferenceToGOT(const Reference *, bool targetIsNowGOT) {} - - /// Does this architecture make use of __unwind_info sections for exception - /// handling? If so, it will need a separate pass to create them. - virtual bool needsCompactUnwind() = 0; - - /// Returns the kind of reference to use to synthesize a 32-bit image-offset - /// value, used in the __unwind_info section. - virtual Reference::KindValue imageOffsetKind() = 0; - - /// Returns the kind of reference to use to synthesize a 32-bit image-offset - /// indirect value. Used for personality functions in the __unwind_info - /// section. - virtual Reference::KindValue imageOffsetKindIndirect() = 0; - - /// Architecture specific compact unwind type that signals __eh_frame should - /// actually be used. - virtual uint32_t dwarfCompactUnwindType() = 0; - - /// Reference from an __eh_frame CIE atom to its personality function it's - /// describing. Usually pointer-sized and PC-relative, but differs in whether - /// it needs to be in relocatable objects. - virtual Reference::KindValue unwindRefToPersonalityFunctionKind() = 0; - - /// Reference from an __eh_frame FDE to the CIE it's based on. - virtual Reference::KindValue unwindRefToCIEKind() = 0; - - /// Reference from an __eh_frame FDE atom to the function it's - /// describing. Usually pointer-sized and PC-relative, but differs in whether - /// it needs to be in relocatable objects. - virtual Reference::KindValue unwindRefToFunctionKind() = 0; - - /// Reference from an __unwind_info entry of dwarfCompactUnwindType to the - /// required __eh_frame entry. On current architectures, the low 24 bits - /// represent the offset of the function's FDE entry from the start of - /// __eh_frame. - virtual Reference::KindValue unwindRefToEhFrameKind() = 0; - - /// Returns a pointer sized reference kind. On 64-bit targets this will - /// likely be something like pointer64, and pointer32 on 32-bit targets. - virtual Reference::KindValue pointerKind() = 0; - - virtual const Atom *fdeTargetFunction(const DefinedAtom *fde); - - /// Used by normalizedFromAtoms() to know where to generated rebasing and - /// binding info in final executables. - virtual bool isPointer(const Reference &) = 0; - - /// Used by normalizedFromAtoms() to know where to generated lazy binding - /// info in final executables. - virtual bool isLazyPointer(const Reference &); - - /// Reference from an __stub_helper entry to the required offset of the - /// lazy bind commands. - virtual Reference::KindValue lazyImmediateLocationKind() = 0; - - /// Returns true if the specified relocation is paired to the next relocation. - virtual bool isPairedReloc(const normalized::Relocation &) = 0; - - /// Prototype for a helper function. Given a sectionIndex and address, - /// finds the atom and offset with that atom of that address. - typedef std::function - FindAtomBySectionAndAddress; - - /// Prototype for a helper function. Given a symbolIndex, finds the atom - /// representing that symbol. - typedef std::function FindAtomBySymbolIndex; - - /// Analyzes a relocation from a .o file and returns the info - /// (kind, target, addend) needed to instantiate a Reference. - /// Two helper functions are passed as parameters to find the target atom - /// given a symbol index or address. - virtual llvm::Error - getReferenceInfo(const normalized::Relocation &reloc, - const DefinedAtom *inAtom, - uint32_t offsetInAtom, - uint64_t fixupAddress, bool isBigEndian, - FindAtomBySectionAndAddress atomFromAddress, - FindAtomBySymbolIndex atomFromSymbolIndex, - Reference::KindValue *kind, - const lld::Atom **target, - Reference::Addend *addend) = 0; - - /// Analyzes a pair of relocations from a .o file and returns the info - /// (kind, target, addend) needed to instantiate a Reference. - /// Two helper functions are passed as parameters to find the target atom - /// given a symbol index or address. - virtual llvm::Error - getPairReferenceInfo(const normalized::Relocation &reloc1, - const normalized::Relocation &reloc2, - const DefinedAtom *inAtom, - uint32_t offsetInAtom, - uint64_t fixupAddress, bool isBig, bool scatterable, - FindAtomBySectionAndAddress atomFromAddress, - FindAtomBySymbolIndex atomFromSymbolIndex, - Reference::KindValue *kind, - const lld::Atom **target, - Reference::Addend *addend) = 0; - - /// Prototype for a helper function. Given an atom, finds the symbol table - /// index for it in the output file. - typedef std::function FindSymbolIndexForAtom; - - /// Prototype for a helper function. Given an atom, finds the index - /// of the section that will contain the atom. - typedef std::function FindSectionIndexForAtom; - - /// Prototype for a helper function. Given an atom, finds the address - /// assigned to it in the output file. - typedef std::function FindAddressForAtom; - - /// Some architectures require local symbols on anonymous atoms. - virtual bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) { - return false; - } - - /// Copy raw content then apply all fixup References on an Atom. - virtual void generateAtomContent(const DefinedAtom &atom, bool relocatable, - FindAddressForAtom findAddress, - FindAddressForAtom findSectionAddress, - uint64_t imageBaseAddress, - llvm::MutableArrayRef atomContentBuffer) = 0; - - /// Used in -r mode to convert a Reference to a mach-o relocation. - virtual void appendSectionRelocations(const DefinedAtom &atom, - uint64_t atomSectionOffset, - const Reference &ref, - FindSymbolIndexForAtom, - FindSectionIndexForAtom, - FindAddressForAtom, - normalized::Relocations&) = 0; - - /// Add arch-specific References. - virtual void addAdditionalReferences(MachODefinedAtom &atom) { } - - // Add Reference for data-in-code marker. - virtual void addDataInCodeReference(MachODefinedAtom &atom, uint32_t atomOff, - uint16_t length, uint16_t kind) { } - - /// Returns true if the specificed Reference value marks the start or end - /// of a data-in-code range in an atom. - virtual bool isDataInCodeTransition(Reference::KindValue refKind) { - return false; - } - - /// Returns the Reference value for a Reference that marks that start of - /// a data-in-code range. - virtual Reference::KindValue dataInCodeTransitionStart( - const MachODefinedAtom &atom) { - return 0; - } - - /// Returns the Reference value for a Reference that marks that end of - /// a data-in-code range. - virtual Reference::KindValue dataInCodeTransitionEnd( - const MachODefinedAtom &atom) { - return 0; - } - - /// Only relevant for 32-bit arm archs. - virtual bool isThumbFunction(const DefinedAtom &atom) { return false; } - - /// Only relevant for 32-bit arm archs. - virtual const DefinedAtom *createShim(MachOFile &file, bool thumbToArm, - const DefinedAtom &) { - llvm_unreachable("shims only support on arm"); - } - - /// Does a given unwind-cfi atom represent a CIE (as opposed to an FDE). - static bool isDwarfCIE(bool isBig, const DefinedAtom *atom); - - struct ReferenceInfo { - Reference::KindArch arch; - uint16_t kind; - uint32_t offset; - int32_t addend; - }; - - struct OptionalRefInfo { - bool used; - uint16_t kind; - uint32_t offset; - int32_t addend; - }; - - /// Table of architecture specific information for creating stubs. - struct StubInfo { - const char* binderSymbolName; - ReferenceInfo lazyPointerReferenceToHelper; - ReferenceInfo lazyPointerReferenceToFinal; - ReferenceInfo nonLazyPointerReferenceToBinder; - uint8_t codeAlignment; - - uint32_t stubSize; - uint8_t stubBytes[16]; - ReferenceInfo stubReferenceToLP; - OptionalRefInfo optStubReferenceToLP; - - uint32_t stubHelperSize; - uint8_t stubHelperBytes[16]; - ReferenceInfo stubHelperReferenceToImm; - ReferenceInfo stubHelperReferenceToHelperCommon; - - DefinedAtom::ContentType stubHelperImageCacheContentType; - - uint32_t stubHelperCommonSize; - uint8_t stubHelperCommonAlignment; - uint8_t stubHelperCommonBytes[36]; - ReferenceInfo stubHelperCommonReferenceToCache; - OptionalRefInfo optStubHelperCommonReferenceToCache; - ReferenceInfo stubHelperCommonReferenceToBinder; - OptionalRefInfo optStubHelperCommonReferenceToBinder; - }; - - virtual const StubInfo &stubInfo() = 0; - -protected: - ArchHandler(); - - static std::unique_ptr create_x86_64(); - static std::unique_ptr create_x86(); - static std::unique_ptr create_arm(); - static std::unique_ptr create_arm64(); - - // Handy way to pack mach-o r_type and other bit fields into one 16-bit value. - typedef uint16_t RelocPattern; - enum { - rScattered = 0x8000, - rPcRel = 0x4000, - rExtern = 0x2000, - rLength1 = 0x0000, - rLength2 = 0x0100, - rLength4 = 0x0200, - rLength8 = 0x0300, - rLenArmLo = rLength1, - rLenArmHi = rLength2, - rLenThmbLo = rLength4, - rLenThmbHi = rLength8 - }; - /// Extract RelocPattern from normalized mach-o relocation. - static RelocPattern relocPattern(const normalized::Relocation &reloc); - /// Create normalized Relocation initialized from pattern. - static normalized::Relocation relocFromPattern(RelocPattern pattern); - /// One liner to add a relocation. - static void appendReloc(normalized::Relocations &relocs, uint32_t offset, - uint32_t symbol, uint32_t value, - RelocPattern pattern); - - - static int16_t readS16(const uint8_t *addr, bool isBig); - static int32_t readS32(const uint8_t *addr, bool isBig); - static uint32_t readU32(const uint8_t *addr, bool isBig); - static int64_t readS64(const uint8_t *addr, bool isBig); -}; - -} // namespace mach_o -} // namespace lld - -#endif // LLD_READER_WRITER_MACHO_ARCH_HANDLER_H diff --git a/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp b/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp deleted file mode 100644 index 06c98ac06fd1..000000000000 --- a/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp +++ /dev/null @@ -1,1522 +0,0 @@ -//===- lib/FileFormat/MachO/ArchHandler_arm.cpp ---------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "ArchHandler.h" -#include "Atoms.h" -#include "MachONormalizedFileBinaryUtils.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm::MachO; -using namespace lld::mach_o::normalized; - -namespace lld { -namespace mach_o { - -using llvm::support::ulittle32_t; -using llvm::support::little32_t; - - -class ArchHandler_arm : public ArchHandler { -public: - ArchHandler_arm() = default; - ~ArchHandler_arm() override = default; - - const Registry::KindStrings *kindStrings() override { return _sKindStrings; } - - Reference::KindArch kindArch() override { return Reference::KindArch::ARM; } - - const ArchHandler::StubInfo &stubInfo() override; - bool isCallSite(const Reference &) override; - bool isPointer(const Reference &) override; - bool isPairedReloc(const normalized::Relocation &) override; - bool isNonCallBranch(const Reference &) override; - - bool needsCompactUnwind() override { - return false; - } - Reference::KindValue imageOffsetKind() override { - return invalid; - } - Reference::KindValue imageOffsetKindIndirect() override { - return invalid; - } - - Reference::KindValue unwindRefToPersonalityFunctionKind() override { - return invalid; - } - - Reference::KindValue unwindRefToCIEKind() override { - return invalid; - } - - Reference::KindValue unwindRefToFunctionKind() override { - return invalid; - } - - Reference::KindValue unwindRefToEhFrameKind() override { - return invalid; - } - - Reference::KindValue lazyImmediateLocationKind() override { - return lazyImmediateLocation; - } - - Reference::KindValue pointerKind() override { - return invalid; - } - - uint32_t dwarfCompactUnwindType() override { - // FIXME - return -1; - } - - llvm::Error getReferenceInfo(const normalized::Relocation &reloc, - const DefinedAtom *inAtom, - uint32_t offsetInAtom, - uint64_t fixupAddress, bool swap, - FindAtomBySectionAndAddress atomFromAddress, - FindAtomBySymbolIndex atomFromSymbolIndex, - Reference::KindValue *kind, - const lld::Atom **target, - Reference::Addend *addend) override; - llvm::Error - getPairReferenceInfo(const normalized::Relocation &reloc1, - const normalized::Relocation &reloc2, - const DefinedAtom *inAtom, - uint32_t offsetInAtom, - uint64_t fixupAddress, bool swap, bool scatterable, - FindAtomBySectionAndAddress atomFromAddress, - FindAtomBySymbolIndex atomFromSymbolIndex, - Reference::KindValue *kind, - const lld::Atom **target, - Reference::Addend *addend) override; - - void generateAtomContent(const DefinedAtom &atom, bool relocatable, - FindAddressForAtom findAddress, - FindAddressForAtom findSectionAddress, - uint64_t imageBaseAddress, - llvm::MutableArrayRef atomContentBuffer) override; - - void appendSectionRelocations(const DefinedAtom &atom, - uint64_t atomSectionOffset, - const Reference &ref, - FindSymbolIndexForAtom, - FindSectionIndexForAtom, - FindAddressForAtom, - normalized::Relocations &) override; - - void addAdditionalReferences(MachODefinedAtom &atom) override; - - bool isDataInCodeTransition(Reference::KindValue refKind) override { - switch (refKind) { - case modeThumbCode: - case modeArmCode: - case modeData: - return true; - default: - return false; - break; - } - } - - Reference::KindValue dataInCodeTransitionStart( - const MachODefinedAtom &atom) override { - return modeData; - } - - Reference::KindValue dataInCodeTransitionEnd( - const MachODefinedAtom &atom) override { - return atom.isThumb() ? modeThumbCode : modeArmCode; - } - - bool isThumbFunction(const DefinedAtom &atom) override; - const DefinedAtom *createShim(MachOFile &file, bool thumbToArm, - const DefinedAtom &) override; - -private: - friend class Thumb2ToArmShimAtom; - friend class ArmToThumbShimAtom; - - static const Registry::KindStrings _sKindStrings[]; - static const StubInfo _sStubInfoArmPIC; - - enum ArmKind : Reference::KindValue { - invalid, /// for error condition - - modeThumbCode, /// Content starting at this offset is thumb. - modeArmCode, /// Content starting at this offset is arm. - modeData, /// Content starting at this offset is data. - - // Kinds found in mach-o .o files: - thumb_bl22, /// ex: bl _foo - thumb_b22, /// ex: b _foo - thumb_movw, /// ex: movw r1, :lower16:_foo - thumb_movt, /// ex: movt r1, :lower16:_foo - thumb_movw_funcRel, /// ex: movw r1, :lower16:(_foo-(L1+4)) - thumb_movt_funcRel, /// ex: movt r1, :upper16:(_foo-(L1+4)) - arm_bl24, /// ex: bl _foo - arm_b24, /// ex: b _foo - arm_movw, /// ex: movw r1, :lower16:_foo - arm_movt, /// ex: movt r1, :lower16:_foo - arm_movw_funcRel, /// ex: movw r1, :lower16:(_foo-(L1+4)) - arm_movt_funcRel, /// ex: movt r1, :upper16:(_foo-(L1+4)) - pointer32, /// ex: .long _foo - delta32, /// ex: .long _foo - . - - // Kinds introduced by Passes: - lazyPointer, /// Location contains a lazy pointer. - lazyImmediateLocation, /// Location contains immediate value used in stub. - }; - - // Utility functions for inspecting/updating instructions. - static bool isThumbMovw(uint32_t instruction); - static bool isThumbMovt(uint32_t instruction); - static bool isArmMovw(uint32_t instruction); - static bool isArmMovt(uint32_t instruction); - static int32_t getDisplacementFromThumbBranch(uint32_t instruction, uint32_t); - static int32_t getDisplacementFromArmBranch(uint32_t instruction); - static uint16_t getWordFromThumbMov(uint32_t instruction); - static uint16_t getWordFromArmMov(uint32_t instruction); - static uint32_t clearThumbBit(uint32_t value, const Atom *target); - static uint32_t setDisplacementInArmBranch(uint32_t instr, int32_t disp, - bool targetIsThumb); - static uint32_t setDisplacementInThumbBranch(uint32_t instr, uint32_t ia, - int32_t disp, bool targetThumb); - static uint32_t setWordFromThumbMov(uint32_t instruction, uint16_t word); - static uint32_t setWordFromArmMov(uint32_t instruction, uint16_t word); - - StringRef stubName(const DefinedAtom &); - bool useExternalRelocationTo(const Atom &target); - - void applyFixupFinal(const Reference &ref, uint8_t *location, - uint64_t fixupAddress, uint64_t targetAddress, - uint64_t inAtomAddress, bool &thumbMode, - bool targetIsThumb); - - void applyFixupRelocatable(const Reference &ref, uint8_t *location, - uint64_t fixupAddress, - uint64_t targetAddress, - uint64_t inAtomAddress, bool &thumbMode, - bool targetIsThumb); -}; - -//===----------------------------------------------------------------------===// -// ArchHandler_arm -//===----------------------------------------------------------------------===// - -const Registry::KindStrings ArchHandler_arm::_sKindStrings[] = { - LLD_KIND_STRING_ENTRY(invalid), - LLD_KIND_STRING_ENTRY(modeThumbCode), - LLD_KIND_STRING_ENTRY(modeArmCode), - LLD_KIND_STRING_ENTRY(modeData), - LLD_KIND_STRING_ENTRY(thumb_bl22), - LLD_KIND_STRING_ENTRY(thumb_b22), - LLD_KIND_STRING_ENTRY(thumb_movw), - LLD_KIND_STRING_ENTRY(thumb_movt), - LLD_KIND_STRING_ENTRY(thumb_movw_funcRel), - LLD_KIND_STRING_ENTRY(thumb_movt_funcRel), - LLD_KIND_STRING_ENTRY(arm_bl24), - LLD_KIND_STRING_ENTRY(arm_b24), - LLD_KIND_STRING_ENTRY(arm_movw), - LLD_KIND_STRING_ENTRY(arm_movt), - LLD_KIND_STRING_ENTRY(arm_movw_funcRel), - LLD_KIND_STRING_ENTRY(arm_movt_funcRel), - LLD_KIND_STRING_ENTRY(pointer32), - LLD_KIND_STRING_ENTRY(delta32), - LLD_KIND_STRING_ENTRY(lazyPointer), - LLD_KIND_STRING_ENTRY(lazyImmediateLocation), - LLD_KIND_STRING_END -}; - -const ArchHandler::StubInfo ArchHandler_arm::_sStubInfoArmPIC = { - "dyld_stub_binder", - - // References in lazy pointer - { Reference::KindArch::ARM, pointer32, 0, 0 }, - { Reference::KindArch::ARM, lazyPointer, 0, 0 }, - - // GOT pointer to dyld_stub_binder - { Reference::KindArch::ARM, pointer32, 0, 0 }, - - // arm code alignment 2^2 - 2, - - // Stub size and code - 16, - { 0x04, 0xC0, 0x9F, 0xE5, // ldr ip, pc + 12 - 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip - 0x00, 0xF0, 0x9C, 0xE5, // ldr pc, [ip] - 0x00, 0x00, 0x00, 0x00 }, // .long L_foo$lazy_ptr - (L1$scv + 8) - { Reference::KindArch::ARM, delta32, 12, 0 }, - { false, 0, 0, 0 }, - - // Stub Helper size and code - 12, - { 0x00, 0xC0, 0x9F, 0xE5, // ldr ip, [pc, #0] - 0x00, 0x00, 0x00, 0xEA, // b _helperhelper - 0x00, 0x00, 0x00, 0x00 }, // .long lazy-info-offset - { Reference::KindArch::ARM, lazyImmediateLocation, 8, 0 }, - { Reference::KindArch::ARM, arm_b24, 4, 0 }, - - // Stub helper image cache content type - DefinedAtom::typeGOT, - - // Stub Helper-Common size and code - 36, - // Stub helper alignment - 2, - { // push lazy-info-offset - 0x04, 0xC0, 0x2D, 0xE5, // str ip, [sp, #-4]! - // push address of dyld_mageLoaderCache - 0x10, 0xC0, 0x9F, 0xE5, // ldr ip, L1 - 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip - 0x04, 0xC0, 0x2D, 0xE5, // str ip, [sp, #-4]! - // jump through dyld_stub_binder - 0x08, 0xC0, 0x9F, 0xE5, // ldr ip, L2 - 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip - 0x00, 0xF0, 0x9C, 0xE5, // ldr pc, [ip] - 0x00, 0x00, 0x00, 0x00, // L1: .long fFastStubGOTAtom - (helper+16) - 0x00, 0x00, 0x00, 0x00 }, // L2: .long dyld_stub_binder - (helper+28) - { Reference::KindArch::ARM, delta32, 28, 0xC }, - { false, 0, 0, 0 }, - { Reference::KindArch::ARM, delta32, 32, 0x04 }, - { false, 0, 0, 0 } -}; - -const ArchHandler::StubInfo &ArchHandler_arm::stubInfo() { - // If multiple kinds of stubs are supported, select which StubInfo here. - return _sStubInfoArmPIC; -} - -bool ArchHandler_arm::isCallSite(const Reference &ref) { - switch (ref.kindValue()) { - case thumb_b22: - case thumb_bl22: - case arm_b24: - case arm_bl24: - return true; - default: - return false; - } -} - -bool ArchHandler_arm::isPointer(const Reference &ref) { - return (ref.kindValue() == pointer32); -} - -bool ArchHandler_arm::isNonCallBranch(const Reference &ref) { - switch (ref.kindValue()) { - case thumb_b22: - case arm_b24: - return true; - default: - return false; - } -} - -bool ArchHandler_arm::isPairedReloc(const Relocation &reloc) { - switch (reloc.type) { - case ARM_RELOC_SECTDIFF: - case ARM_RELOC_LOCAL_SECTDIFF: - case ARM_RELOC_HALF_SECTDIFF: - case ARM_RELOC_HALF: - return true; - default: - return false; - } -} - -/// Trace references from stub atom to lazy pointer to target and get its name. -StringRef ArchHandler_arm::stubName(const DefinedAtom &stubAtom) { - assert(stubAtom.contentType() == DefinedAtom::typeStub); - for (const Reference *ref : stubAtom) { - if (const DefinedAtom* lp = dyn_cast(ref->target())) { - if (lp->contentType() != DefinedAtom::typeLazyPointer) - continue; - for (const Reference *ref2 : *lp) { - if (ref2->kindValue() != lazyPointer) - continue; - return ref2->target()->name(); - } - } - } - return "stub"; -} - -/// Extract displacement from an ARM b/bl/blx instruction. -int32_t ArchHandler_arm::getDisplacementFromArmBranch(uint32_t instruction) { - // Sign-extend imm24 - int32_t displacement = (instruction & 0x00FFFFFF) << 2; - if ((displacement & 0x02000000) != 0) - displacement |= 0xFC000000; - // If this is BLX and H bit set, add 2. - if ((instruction & 0xFF000000) == 0xFB000000) - displacement += 2; - return displacement; -} - -/// Update an ARM b/bl/blx instruction, switching bl <-> blx as needed. -uint32_t ArchHandler_arm::setDisplacementInArmBranch(uint32_t instruction, - int32_t displacement, - bool targetIsThumb) { - assert((displacement <= 33554428) && (displacement > (-33554432)) - && "arm branch out of range"); - bool is_blx = ((instruction & 0xF0000000) == 0xF0000000); - uint32_t newInstruction = (instruction & 0xFF000000); - uint32_t h = 0; - if (targetIsThumb) { - // Force use of BLX. - newInstruction = 0xFA000000; - if (!is_blx) { - assert(((instruction & 0xF0000000) == 0xE0000000) - && "no conditional arm blx"); - assert(((instruction & 0xFF000000) == 0xEB000000) - && "no arm pc-rel BX instruction"); - } - if (displacement & 2) - h = 1; - } - else { - // Force use of B/BL. - if (is_blx) - newInstruction = 0xEB000000; - } - newInstruction |= (h << 24) | ((displacement >> 2) & 0x00FFFFFF); - return newInstruction; -} - -/// Extract displacement from a thumb b/bl/blx instruction. -int32_t ArchHandler_arm::getDisplacementFromThumbBranch(uint32_t instruction, - uint32_t instrAddr) { - bool is_blx = ((instruction & 0xD000F800) == 0xC000F000); - uint32_t s = (instruction >> 10) & 0x1; - uint32_t j1 = (instruction >> 29) & 0x1; - uint32_t j2 = (instruction >> 27) & 0x1; - uint32_t imm10 = instruction & 0x3FF; - uint32_t imm11 = (instruction >> 16) & 0x7FF; - uint32_t i1 = (j1 == s); - uint32_t i2 = (j2 == s); - uint32_t dis = - (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1); - int32_t sdis = dis; - int32_t result = s ? (sdis | 0xFE000000) : sdis; - if (is_blx && (instrAddr & 0x2)) { - // The thumb blx instruction always has low bit of imm11 as zero. The way - // a 2-byte aligned blx can branch to a 4-byte aligned ARM target is that - // the blx instruction always 4-byte aligns the pc before adding the - // displacement from the blx. We must emulate that when decoding this. - result -= 2; - } - return result; -} - -/// Update a thumb b/bl/blx instruction, switching bl <-> blx as needed. -uint32_t ArchHandler_arm::setDisplacementInThumbBranch(uint32_t instruction, - uint32_t instrAddr, - int32_t displacement, - bool targetIsThumb) { - assert((displacement <= 16777214) && (displacement > (-16777216)) - && "thumb branch out of range"); - bool is_bl = ((instruction & 0xD000F800) == 0xD000F000); - bool is_blx = ((instruction & 0xD000F800) == 0xC000F000); - bool is_b = ((instruction & 0xD000F800) == 0x9000F000); - uint32_t newInstruction = (instruction & 0xD000F800); - if (is_bl || is_blx) { - if (targetIsThumb) { - newInstruction = 0xD000F000; // Use bl - } else { - newInstruction = 0xC000F000; // Use blx - // See note in getDisplacementFromThumbBranch() about blx. - if (instrAddr & 0x2) - displacement += 2; - } - } else if (is_b) { - assert(targetIsThumb && "no pc-rel thumb branch instruction that " - "switches to arm mode"); - } - else { - llvm_unreachable("thumb branch22 reloc on a non-branch instruction"); - } - uint32_t s = (uint32_t)(displacement >> 24) & 0x1; - uint32_t i1 = (uint32_t)(displacement >> 23) & 0x1; - uint32_t i2 = (uint32_t)(displacement >> 22) & 0x1; - uint32_t imm10 = (uint32_t)(displacement >> 12) & 0x3FF; - uint32_t imm11 = (uint32_t)(displacement >> 1) & 0x7FF; - uint32_t j1 = (i1 == s); - uint32_t j2 = (i2 == s); - uint32_t nextDisp = (j1 << 13) | (j2 << 11) | imm11; - uint32_t firstDisp = (s << 10) | imm10; - newInstruction |= (nextDisp << 16) | firstDisp; - return newInstruction; -} - -bool ArchHandler_arm::isThumbMovw(uint32_t instruction) { - return (instruction & 0x8000FBF0) == 0x0000F240; -} - -bool ArchHandler_arm::isThumbMovt(uint32_t instruction) { - return (instruction & 0x8000FBF0) == 0x0000F2C0; -} - -bool ArchHandler_arm::isArmMovw(uint32_t instruction) { - return (instruction & 0x0FF00000) == 0x03000000; -} - -bool ArchHandler_arm::isArmMovt(uint32_t instruction) { - return (instruction & 0x0FF00000) == 0x03400000; -} - -uint16_t ArchHandler_arm::getWordFromThumbMov(uint32_t instruction) { - assert(isThumbMovw(instruction) || isThumbMovt(instruction)); - uint32_t i = ((instruction & 0x00000400) >> 10); - uint32_t imm4 = (instruction & 0x0000000F); - uint32_t imm3 = ((instruction & 0x70000000) >> 28); - uint32_t imm8 = ((instruction & 0x00FF0000) >> 16); - return (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8; -} - -uint16_t ArchHandler_arm::getWordFromArmMov(uint32_t instruction) { - assert(isArmMovw(instruction) || isArmMovt(instruction)); - uint32_t imm4 = ((instruction & 0x000F0000) >> 16); - uint32_t imm12 = (instruction & 0x00000FFF); - return (imm4 << 12) | imm12; -} - -uint32_t ArchHandler_arm::setWordFromThumbMov(uint32_t instr, uint16_t word) { - assert(isThumbMovw(instr) || isThumbMovt(instr)); - uint32_t imm4 = (word & 0xF000) >> 12; - uint32_t i = (word & 0x0800) >> 11; - uint32_t imm3 = (word & 0x0700) >> 8; - uint32_t imm8 = word & 0x00FF; - return (instr & 0x8F00FBF0) | imm4 | (i << 10) | (imm3 << 28) | (imm8 << 16); -} - -uint32_t ArchHandler_arm::setWordFromArmMov(uint32_t instr, uint16_t word) { - assert(isArmMovw(instr) || isArmMovt(instr)); - uint32_t imm4 = (word & 0xF000) >> 12; - uint32_t imm12 = word & 0x0FFF; - return (instr & 0xFFF0F000) | (imm4 << 16) | imm12; -} - -uint32_t ArchHandler_arm::clearThumbBit(uint32_t value, const Atom *target) { - // The assembler often adds one to the address of a thumb function. - // We need to undo that so it does not look like an addend. - if (value & 1) { - if (isa(target)) { - const MachODefinedAtom *machoTarget = - reinterpret_cast(target); - if (machoTarget->isThumb()) - value &= -2; // mask off thumb-bit - } - } - return value; -} - -llvm::Error ArchHandler_arm::getReferenceInfo( - const Relocation &reloc, const DefinedAtom *inAtom, uint32_t offsetInAtom, - uint64_t fixupAddress, bool isBig, - FindAtomBySectionAndAddress atomFromAddress, - FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, - const lld::Atom **target, Reference::Addend *addend) { - const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; - uint64_t targetAddress; - uint32_t instruction = *(const ulittle32_t *)fixupContent; - int32_t displacement; - switch (relocPattern(reloc)) { - case ARM_THUMB_RELOC_BR22 | rPcRel | rExtern | rLength4: - // ex: bl _foo (and _foo is undefined) - if ((instruction & 0xD000F800) == 0x9000F000) - *kind = thumb_b22; - else - *kind = thumb_bl22; - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - // Instruction contains branch to addend. - displacement = getDisplacementFromThumbBranch(instruction, fixupAddress); - *addend = fixupAddress + 4 + displacement; - return llvm::Error::success(); - case ARM_THUMB_RELOC_BR22 | rPcRel | rLength4: - // ex: bl _foo (and _foo is defined) - if ((instruction & 0xD000F800) == 0x9000F000) - *kind = thumb_b22; - else - *kind = thumb_bl22; - displacement = getDisplacementFromThumbBranch(instruction, fixupAddress); - targetAddress = fixupAddress + 4 + displacement; - return atomFromAddress(reloc.symbol, targetAddress, target, addend); - case ARM_THUMB_RELOC_BR22 | rScattered | rPcRel | rLength4: - // ex: bl _foo+4 (and _foo is defined) - if ((instruction & 0xD000F800) == 0x9000F000) - *kind = thumb_b22; - else - *kind = thumb_bl22; - displacement = getDisplacementFromThumbBranch(instruction, fixupAddress); - targetAddress = fixupAddress + 4 + displacement; - if (auto ec = atomFromAddress(0, reloc.value, target, addend)) - return ec; - // reloc.value is target atom's address. Instruction contains branch - // to atom+addend. - *addend += (targetAddress - reloc.value); - return llvm::Error::success(); - case ARM_RELOC_BR24 | rPcRel | rExtern | rLength4: - // ex: bl _foo (and _foo is undefined) - if (((instruction & 0x0F000000) == 0x0A000000) - && ((instruction & 0xF0000000) != 0xF0000000)) - *kind = arm_b24; - else - *kind = arm_bl24; - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - // Instruction contains branch to addend. - displacement = getDisplacementFromArmBranch(instruction); - *addend = fixupAddress + 8 + displacement; - return llvm::Error::success(); - case ARM_RELOC_BR24 | rPcRel | rLength4: - // ex: bl _foo (and _foo is defined) - if (((instruction & 0x0F000000) == 0x0A000000) - && ((instruction & 0xF0000000) != 0xF0000000)) - *kind = arm_b24; - else - *kind = arm_bl24; - displacement = getDisplacementFromArmBranch(instruction); - targetAddress = fixupAddress + 8 + displacement; - return atomFromAddress(reloc.symbol, targetAddress, target, addend); - case ARM_RELOC_BR24 | rScattered | rPcRel | rLength4: - // ex: bl _foo+4 (and _foo is defined) - if (((instruction & 0x0F000000) == 0x0A000000) - && ((instruction & 0xF0000000) != 0xF0000000)) - *kind = arm_b24; - else - *kind = arm_bl24; - displacement = getDisplacementFromArmBranch(instruction); - targetAddress = fixupAddress + 8 + displacement; - if (auto ec = atomFromAddress(0, reloc.value, target, addend)) - return ec; - // reloc.value is target atom's address. Instruction contains branch - // to atom+addend. - *addend += (targetAddress - reloc.value); - return llvm::Error::success(); - case ARM_RELOC_VANILLA | rExtern | rLength4: - // ex: .long _foo (and _foo is undefined) - *kind = pointer32; - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = instruction; - return llvm::Error::success(); - case ARM_RELOC_VANILLA | rLength4: - // ex: .long _foo (and _foo is defined) - *kind = pointer32; - if (auto ec = atomFromAddress(reloc.symbol, instruction, target, addend)) - return ec; - *addend = clearThumbBit((uint32_t) * addend, *target); - return llvm::Error::success(); - case ARM_RELOC_VANILLA | rScattered | rLength4: - // ex: .long _foo+a (and _foo is defined) - *kind = pointer32; - if (auto ec = atomFromAddress(0, reloc.value, target, addend)) - return ec; - *addend += (clearThumbBit(instruction, *target) - reloc.value); - return llvm::Error::success(); - default: - return llvm::make_error("unsupported arm relocation type"); - } - return llvm::Error::success(); -} - -llvm::Error -ArchHandler_arm::getPairReferenceInfo(const normalized::Relocation &reloc1, - const normalized::Relocation &reloc2, - const DefinedAtom *inAtom, - uint32_t offsetInAtom, - uint64_t fixupAddress, bool isBig, - bool scatterable, - FindAtomBySectionAndAddress atomFromAddr, - FindAtomBySymbolIndex atomFromSymbolIndex, - Reference::KindValue *kind, - const lld::Atom **target, - Reference::Addend *addend) { - bool pointerDiff = false; - bool funcRel; - bool top; - bool thumbReloc; - switch(relocPattern(reloc1) << 16 | relocPattern(reloc2)) { - case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbLo) << 16 | - ARM_RELOC_PAIR | rScattered | rLenThmbLo): - // ex: movw r1, :lower16:(_x-L1) [thumb mode] - *kind = thumb_movw_funcRel; - funcRel = true; - top = false; - thumbReloc = true; - break; - case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbHi) << 16 | - ARM_RELOC_PAIR | rScattered | rLenThmbHi): - // ex: movt r1, :upper16:(_x-L1) [thumb mode] - *kind = thumb_movt_funcRel; - funcRel = true; - top = true; - thumbReloc = true; - break; - case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmLo) << 16 | - ARM_RELOC_PAIR | rScattered | rLenArmLo): - // ex: movw r1, :lower16:(_x-L1) [arm mode] - *kind = arm_movw_funcRel; - funcRel = true; - top = false; - thumbReloc = false; - break; - case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmHi) << 16 | - ARM_RELOC_PAIR | rScattered | rLenArmHi): - // ex: movt r1, :upper16:(_x-L1) [arm mode] - *kind = arm_movt_funcRel; - funcRel = true; - top = true; - thumbReloc = false; - break; - case ((ARM_RELOC_HALF | rLenThmbLo) << 16 | - ARM_RELOC_PAIR | rLenThmbLo): - // ex: movw r1, :lower16:_x [thumb mode] - *kind = thumb_movw; - funcRel = false; - top = false; - thumbReloc = true; - break; - case ((ARM_RELOC_HALF | rLenThmbHi) << 16 | - ARM_RELOC_PAIR | rLenThmbHi): - // ex: movt r1, :upper16:_x [thumb mode] - *kind = thumb_movt; - funcRel = false; - top = true; - thumbReloc = true; - break; - case ((ARM_RELOC_HALF | rLenArmLo) << 16 | - ARM_RELOC_PAIR | rLenArmLo): - // ex: movw r1, :lower16:_x [arm mode] - *kind = arm_movw; - funcRel = false; - top = false; - thumbReloc = false; - break; - case ((ARM_RELOC_HALF | rLenArmHi) << 16 | - ARM_RELOC_PAIR | rLenArmHi): - // ex: movt r1, :upper16:_x [arm mode] - *kind = arm_movt; - funcRel = false; - top = true; - thumbReloc = false; - break; - case ((ARM_RELOC_HALF | rScattered | rLenThmbLo) << 16 | - ARM_RELOC_PAIR | rLenThmbLo): - // ex: movw r1, :lower16:_x+a [thumb mode] - *kind = thumb_movw; - funcRel = false; - top = false; - thumbReloc = true; - break; - case ((ARM_RELOC_HALF | rScattered | rLenThmbHi) << 16 | - ARM_RELOC_PAIR | rLenThmbHi): - // ex: movt r1, :upper16:_x+a [thumb mode] - *kind = thumb_movt; - funcRel = false; - top = true; - thumbReloc = true; - break; - case ((ARM_RELOC_HALF | rScattered | rLenArmLo) << 16 | - ARM_RELOC_PAIR | rLenArmLo): - // ex: movw r1, :lower16:_x+a [arm mode] - *kind = arm_movw; - funcRel = false; - top = false; - thumbReloc = false; - break; - case ((ARM_RELOC_HALF | rScattered | rLenArmHi) << 16 | - ARM_RELOC_PAIR | rLenArmHi): - // ex: movt r1, :upper16:_x+a [arm mode] - *kind = arm_movt; - funcRel = false; - top = true; - thumbReloc = false; - break; - case ((ARM_RELOC_HALF | rExtern | rLenThmbLo) << 16 | - ARM_RELOC_PAIR | rLenThmbLo): - // ex: movw r1, :lower16:_undef [thumb mode] - *kind = thumb_movw; - funcRel = false; - top = false; - thumbReloc = true; - break; - case ((ARM_RELOC_HALF | rExtern | rLenThmbHi) << 16 | - ARM_RELOC_PAIR | rLenThmbHi): - // ex: movt r1, :upper16:_undef [thumb mode] - *kind = thumb_movt; - funcRel = false; - top = true; - thumbReloc = true; - break; - case ((ARM_RELOC_HALF | rExtern | rLenArmLo) << 16 | - ARM_RELOC_PAIR | rLenArmLo): - // ex: movw r1, :lower16:_undef [arm mode] - *kind = arm_movw; - funcRel = false; - top = false; - thumbReloc = false; - break; - case ((ARM_RELOC_HALF | rExtern | rLenArmHi) << 16 | - ARM_RELOC_PAIR | rLenArmHi): - // ex: movt r1, :upper16:_undef [arm mode] - *kind = arm_movt; - funcRel = false; - top = true; - thumbReloc = false; - break; - case ((ARM_RELOC_SECTDIFF | rScattered | rLength4) << 16 | - ARM_RELOC_PAIR | rScattered | rLength4): - case ((ARM_RELOC_LOCAL_SECTDIFF | rScattered | rLength4) << 16 | - ARM_RELOC_PAIR | rScattered | rLength4): - // ex: .long _foo - . - pointerDiff = true; - break; - default: - return llvm::make_error("unsupported arm relocation pair"); - } - const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; - uint32_t instruction = *(const ulittle32_t *)fixupContent; - uint32_t value; - uint32_t fromAddress; - uint32_t toAddress; - uint16_t instruction16; - uint16_t other16; - const lld::Atom *fromTarget; - Reference::Addend offsetInTo; - Reference::Addend offsetInFrom; - if (pointerDiff) { - toAddress = reloc1.value; - fromAddress = reloc2.value; - if (auto ec = atomFromAddr(0, toAddress, target, &offsetInTo)) - return ec; - if (auto ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom)) - return ec; - if (scatterable && (fromTarget != inAtom)) - return llvm::make_error( - "SECTDIFF relocation where subtrahend label is not in atom"); - *kind = delta32; - value = clearThumbBit(instruction, *target); - *addend = (int32_t)(value - (toAddress - fixupAddress)); - } else if (funcRel) { - toAddress = reloc1.value; - fromAddress = reloc2.value; - if (auto ec = atomFromAddr(0, toAddress, target, &offsetInTo)) - return ec; - if (auto ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom)) - return ec; - if (fromTarget != inAtom) - return llvm::make_error("ARM_RELOC_HALF_SECTDIFF relocation" - " where subtrahend label is not in atom"); - other16 = (reloc2.offset & 0xFFFF); - if (thumbReloc) { - if (top) { - if (!isThumbMovt(instruction)) - return llvm::make_error("expected movt instruction"); - } - else { - if (!isThumbMovw(instruction)) - return llvm::make_error("expected movw instruction"); - } - instruction16 = getWordFromThumbMov(instruction); - } - else { - if (top) { - if (!isArmMovt(instruction)) - return llvm::make_error("expected movt instruction"); - } - else { - if (!isArmMovw(instruction)) - return llvm::make_error("expected movw instruction"); - } - instruction16 = getWordFromArmMov(instruction); - } - if (top) - value = (instruction16 << 16) | other16; - else - value = (other16 << 16) | instruction16; - value = clearThumbBit(value, *target); - int64_t ta = (int64_t) value - (toAddress - fromAddress); - *addend = ta - offsetInFrom; - return llvm::Error::success(); - } else { - uint32_t sectIndex; - if (thumbReloc) { - if (top) { - if (!isThumbMovt(instruction)) - return llvm::make_error("expected movt instruction"); - } - else { - if (!isThumbMovw(instruction)) - return llvm::make_error("expected movw instruction"); - } - instruction16 = getWordFromThumbMov(instruction); - } - else { - if (top) { - if (!isArmMovt(instruction)) - return llvm::make_error("expected movt instruction"); - } - else { - if (!isArmMovw(instruction)) - return llvm::make_error("expected movw instruction"); - } - instruction16 = getWordFromArmMov(instruction); - } - other16 = (reloc2.offset & 0xFFFF); - if (top) - value = (instruction16 << 16) | other16; - else - value = (other16 << 16) | instruction16; - if (reloc1.isExtern) { - if (auto ec = atomFromSymbolIndex(reloc1.symbol, target)) - return ec; - *addend = value; - } else { - if (reloc1.scattered) { - toAddress = reloc1.value; - sectIndex = 0; - } else { - toAddress = value; - sectIndex = reloc1.symbol; - } - if (auto ec = atomFromAddr(sectIndex, toAddress, target, &offsetInTo)) - return ec; - *addend = value - toAddress; - } - } - - return llvm::Error::success(); -} - -void ArchHandler_arm::applyFixupFinal(const Reference &ref, uint8_t *loc, - uint64_t fixupAddress, - uint64_t targetAddress, - uint64_t inAtomAddress, - bool &thumbMode, bool targetIsThumb) { - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return; - assert(ref.kindArch() == Reference::KindArch::ARM); - ulittle32_t *loc32 = reinterpret_cast(loc); - int32_t displacement; - uint16_t value16; - uint32_t value32; - switch (static_cast(ref.kindValue())) { - case modeThumbCode: - thumbMode = true; - break; - case modeArmCode: - thumbMode = false; - break; - case modeData: - break; - case thumb_b22: - case thumb_bl22: - assert(thumbMode); - displacement = (targetAddress - (fixupAddress + 4)) + ref.addend(); - value32 = setDisplacementInThumbBranch(*loc32, fixupAddress, - displacement, targetIsThumb); - *loc32 = value32; - break; - case thumb_movw: - assert(thumbMode); - value16 = (targetAddress + ref.addend()) & 0xFFFF; - if (targetIsThumb) - value16 |= 1; - *loc32 = setWordFromThumbMov(*loc32, value16); - break; - case thumb_movt: - assert(thumbMode); - value16 = (targetAddress + ref.addend()) >> 16; - *loc32 = setWordFromThumbMov(*loc32, value16); - break; - case thumb_movw_funcRel: - assert(thumbMode); - value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; - if (targetIsThumb) - value16 |= 1; - *loc32 = setWordFromThumbMov(*loc32, value16); - break; - case thumb_movt_funcRel: - assert(thumbMode); - value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; - *loc32 = setWordFromThumbMov(*loc32, value16); - break; - case arm_b24: - case arm_bl24: - assert(!thumbMode); - displacement = (targetAddress - (fixupAddress + 8)) + ref.addend(); - value32 = setDisplacementInArmBranch(*loc32, displacement, targetIsThumb); - *loc32 = value32; - break; - case arm_movw: - assert(!thumbMode); - value16 = (targetAddress + ref.addend()) & 0xFFFF; - if (targetIsThumb) - value16 |= 1; - *loc32 = setWordFromArmMov(*loc32, value16); - break; - case arm_movt: - assert(!thumbMode); - value16 = (targetAddress + ref.addend()) >> 16; - *loc32 = setWordFromArmMov(*loc32, value16); - break; - case arm_movw_funcRel: - assert(!thumbMode); - value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; - if (targetIsThumb) - value16 |= 1; - *loc32 = setWordFromArmMov(*loc32, value16); - break; - case arm_movt_funcRel: - assert(!thumbMode); - value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; - *loc32 = setWordFromArmMov(*loc32, value16); - break; - case pointer32: - if (targetIsThumb) - *loc32 = targetAddress + ref.addend() + 1; - else - *loc32 = targetAddress + ref.addend(); - break; - case delta32: - if (targetIsThumb) - *loc32 = targetAddress - fixupAddress + ref.addend() + 1; - else - *loc32 = targetAddress - fixupAddress + ref.addend(); - break; - case lazyPointer: - // do nothing - break; - case lazyImmediateLocation: - *loc32 = ref.addend(); - break; - case invalid: - llvm_unreachable("invalid ARM Reference Kind"); - break; - } -} - -void ArchHandler_arm::generateAtomContent(const DefinedAtom &atom, - bool relocatable, - FindAddressForAtom findAddress, - FindAddressForAtom findSectionAddress, - uint64_t imageBaseAddress, - llvm::MutableArrayRef atomContentBuffer) { - // Copy raw bytes. - std::copy(atom.rawContent().begin(), atom.rawContent().end(), - atomContentBuffer.begin()); - // Apply fix-ups. - bool thumbMode = false; - for (const Reference *ref : atom) { - uint32_t offset = ref->offsetInAtom(); - const Atom *target = ref->target(); - uint64_t targetAddress = 0; - bool targetIsThumb = false; - if (const DefinedAtom *defTarg = dyn_cast(target)) { - targetAddress = findAddress(*target); - targetIsThumb = isThumbFunction(*defTarg); - } - uint64_t atomAddress = findAddress(atom); - uint64_t fixupAddress = atomAddress + offset; - if (relocatable) { - applyFixupRelocatable(*ref, &atomContentBuffer[offset], fixupAddress, - targetAddress, atomAddress, thumbMode, - targetIsThumb); - } else { - applyFixupFinal(*ref, &atomContentBuffer[offset], fixupAddress, - targetAddress, atomAddress, thumbMode, targetIsThumb); - } - } -} - -bool ArchHandler_arm::useExternalRelocationTo(const Atom &target) { - // Undefined symbols are referenced via external relocations. - if (isa(&target)) - return true; - if (const DefinedAtom *defAtom = dyn_cast(&target)) { - switch (defAtom->merge()) { - case DefinedAtom::mergeAsTentative: - // Tentative definitions are referenced via external relocations. - return true; - case DefinedAtom::mergeAsWeak: - case DefinedAtom::mergeAsWeakAndAddressUsed: - // Global weak-defs are referenced via external relocations. - return (defAtom->scope() == DefinedAtom::scopeGlobal); - default: - break; - } - } - // Everything else is reference via an internal relocation. - return false; -} - -void ArchHandler_arm::applyFixupRelocatable(const Reference &ref, uint8_t *loc, - uint64_t fixupAddress, - uint64_t targetAddress, - uint64_t inAtomAddress, - bool &thumbMode, - bool targetIsThumb) { - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return; - assert(ref.kindArch() == Reference::KindArch::ARM); - bool useExternalReloc = useExternalRelocationTo(*ref.target()); - ulittle32_t *loc32 = reinterpret_cast(loc); - int32_t displacement; - uint16_t value16; - uint32_t value32; - bool targetIsUndef = isa(ref.target()); - switch (static_cast(ref.kindValue())) { - case modeThumbCode: - thumbMode = true; - break; - case modeArmCode: - thumbMode = false; - break; - case modeData: - break; - case thumb_b22: - case thumb_bl22: - assert(thumbMode); - if (useExternalReloc) - displacement = (ref.addend() - (fixupAddress + 4)); - else - displacement = (targetAddress - (fixupAddress + 4)) + ref.addend(); - value32 = setDisplacementInThumbBranch(*loc32, fixupAddress, - displacement, - targetIsUndef || targetIsThumb); - *loc32 = value32; - break; - case thumb_movw: - assert(thumbMode); - if (useExternalReloc) - value16 = ref.addend() & 0xFFFF; - else - value16 = (targetAddress + ref.addend()) & 0xFFFF; - *loc32 = setWordFromThumbMov(*loc32, value16); - break; - case thumb_movt: - assert(thumbMode); - if (useExternalReloc) - value16 = ref.addend() >> 16; - else - value16 = (targetAddress + ref.addend()) >> 16; - *loc32 = setWordFromThumbMov(*loc32, value16); - break; - case thumb_movw_funcRel: - assert(thumbMode); - value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; - *loc32 = setWordFromThumbMov(*loc32, value16); - break; - case thumb_movt_funcRel: - assert(thumbMode); - value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; - *loc32 = setWordFromThumbMov(*loc32, value16); - break; - case arm_b24: - case arm_bl24: - assert(!thumbMode); - if (useExternalReloc) - displacement = (ref.addend() - (fixupAddress + 8)); - else - displacement = (targetAddress - (fixupAddress + 8)) + ref.addend(); - value32 = setDisplacementInArmBranch(*loc32, displacement, - targetIsThumb); - *loc32 = value32; - break; - case arm_movw: - assert(!thumbMode); - if (useExternalReloc) - value16 = ref.addend() & 0xFFFF; - else - value16 = (targetAddress + ref.addend()) & 0xFFFF; - *loc32 = setWordFromArmMov(*loc32, value16); - break; - case arm_movt: - assert(!thumbMode); - if (useExternalReloc) - value16 = ref.addend() >> 16; - else - value16 = (targetAddress + ref.addend()) >> 16; - *loc32 = setWordFromArmMov(*loc32, value16); - break; - case arm_movw_funcRel: - assert(!thumbMode); - value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; - *loc32 = setWordFromArmMov(*loc32, value16); - break; - case arm_movt_funcRel: - assert(!thumbMode); - value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; - *loc32 = setWordFromArmMov(*loc32, value16); - break; - case pointer32: - *loc32 = targetAddress + ref.addend(); - break; - case delta32: - *loc32 = targetAddress - fixupAddress + ref.addend(); - break; - case lazyPointer: - case lazyImmediateLocation: - // do nothing - break; - case invalid: - llvm_unreachable("invalid ARM Reference Kind"); - break; - } -} - -void ArchHandler_arm::appendSectionRelocations( - const DefinedAtom &atom, - uint64_t atomSectionOffset, - const Reference &ref, - FindSymbolIndexForAtom symbolIndexForAtom, - FindSectionIndexForAtom sectionIndexForAtom, - FindAddressForAtom addressForAtom, - normalized::Relocations &relocs) { - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return; - assert(ref.kindArch() == Reference::KindArch::ARM); - uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); - bool useExternalReloc = useExternalRelocationTo(*ref.target()); - uint32_t targetAtomAddress; - uint32_t fromAtomAddress; - uint16_t other16; - switch (static_cast(ref.kindValue())) { - case modeThumbCode: - case modeArmCode: - case modeData: - // Do nothing. - break; - case thumb_b22: - case thumb_bl22: - if (useExternalReloc) { - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM_THUMB_RELOC_BR22 | rExtern | rPcRel | rLength4); - } else { - if (ref.addend() != 0) - appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), - ARM_THUMB_RELOC_BR22 | rScattered | rPcRel | rLength4); - else - appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, - ARM_THUMB_RELOC_BR22 | rPcRel | rLength4); - } - break; - case thumb_movw: - if (useExternalReloc) { - other16 = ref.addend() >> 16; - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM_RELOC_HALF | rExtern | rLenThmbLo); - appendReloc(relocs, other16, 0, 0, - ARM_RELOC_PAIR | rLenThmbLo); - } else { - targetAtomAddress = addressForAtom(*ref.target()); - if (ref.addend() != 0) { - other16 = (targetAtomAddress + ref.addend()) >> 16; - appendReloc(relocs, sectionOffset, 0, targetAtomAddress, - ARM_RELOC_HALF | rScattered | rLenThmbLo); - appendReloc(relocs, other16, 0, 0, - ARM_RELOC_PAIR | rLenThmbLo); - } else { - other16 = (targetAtomAddress + ref.addend()) >> 16; - appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, - ARM_RELOC_HALF | rLenThmbLo); - appendReloc(relocs, other16, 0, 0, - ARM_RELOC_PAIR | rLenThmbLo); - } - } - break; - case thumb_movt: - if (useExternalReloc) { - other16 = ref.addend() & 0xFFFF; - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM_RELOC_HALF | rExtern | rLenThmbHi); - appendReloc(relocs, other16, 0, 0, - ARM_RELOC_PAIR | rLenThmbHi); - } else { - targetAtomAddress = addressForAtom(*ref.target()); - if (ref.addend() != 0) { - other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; - appendReloc(relocs, sectionOffset, 0, targetAtomAddress, - ARM_RELOC_HALF | rScattered | rLenThmbHi); - appendReloc(relocs, other16, 0, 0, - ARM_RELOC_PAIR | rLenThmbHi); - } else { - other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; - appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, - ARM_RELOC_HALF | rLenThmbHi); - appendReloc(relocs, other16, 0, 0, - ARM_RELOC_PAIR | rLenThmbHi); - } - } - break; - case thumb_movw_funcRel: - fromAtomAddress = addressForAtom(atom); - targetAtomAddress = addressForAtom(*ref.target()); - other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) >> 16; - appendReloc(relocs, sectionOffset, 0, targetAtomAddress, - ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbLo); - appendReloc(relocs, other16, 0, fromAtomAddress, - ARM_RELOC_PAIR | rScattered | rLenThmbLo); - break; - case thumb_movt_funcRel: - fromAtomAddress = addressForAtom(atom); - targetAtomAddress = addressForAtom(*ref.target()); - other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) & 0xFFFF; - appendReloc(relocs, sectionOffset, 0, targetAtomAddress, - ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbHi); - appendReloc(relocs, other16, 0, fromAtomAddress, - ARM_RELOC_PAIR | rScattered | rLenThmbHi); - break; - case arm_b24: - case arm_bl24: - if (useExternalReloc) { - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM_RELOC_BR24 | rExtern | rPcRel | rLength4); - } else { - if (ref.addend() != 0) - appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), - ARM_RELOC_BR24 | rScattered | rPcRel | rLength4); - else - appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, - ARM_RELOC_BR24 | rPcRel | rLength4); - } - break; - case arm_movw: - if (useExternalReloc) { - other16 = ref.addend() >> 16; - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM_RELOC_HALF | rExtern | rLenArmLo); - appendReloc(relocs, other16, 0, 0, - ARM_RELOC_PAIR | rLenArmLo); - } else { - targetAtomAddress = addressForAtom(*ref.target()); - if (ref.addend() != 0) { - other16 = (targetAtomAddress + ref.addend()) >> 16; - appendReloc(relocs, sectionOffset, 0, targetAtomAddress, - ARM_RELOC_HALF | rScattered | rLenArmLo); - appendReloc(relocs, other16, 0, 0, - ARM_RELOC_PAIR | rLenArmLo); - } else { - other16 = (targetAtomAddress + ref.addend()) >> 16; - appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, - ARM_RELOC_HALF | rLenArmLo); - appendReloc(relocs, other16, 0, 0, - ARM_RELOC_PAIR | rLenArmLo); - } - } - break; - case arm_movt: - if (useExternalReloc) { - other16 = ref.addend() & 0xFFFF; - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM_RELOC_HALF | rExtern | rLenArmHi); - appendReloc(relocs, other16, 0, 0, - ARM_RELOC_PAIR | rLenArmHi); - } else { - targetAtomAddress = addressForAtom(*ref.target()); - if (ref.addend() != 0) { - other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; - appendReloc(relocs, sectionOffset, 0, targetAtomAddress, - ARM_RELOC_HALF | rScattered | rLenArmHi); - appendReloc(relocs, other16, 0, 0, - ARM_RELOC_PAIR | rLenArmHi); - } else { - other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; - appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, - ARM_RELOC_HALF | rLenArmHi); - appendReloc(relocs, other16, 0, 0, - ARM_RELOC_PAIR | rLenArmHi); - } - } - break; - case arm_movw_funcRel: - fromAtomAddress = addressForAtom(atom); - targetAtomAddress = addressForAtom(*ref.target()); - other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) >> 16; - appendReloc(relocs, sectionOffset, 0, targetAtomAddress, - ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmLo); - appendReloc(relocs, other16, 0, fromAtomAddress, - ARM_RELOC_PAIR | rScattered | rLenArmLo); - break; - case arm_movt_funcRel: - fromAtomAddress = addressForAtom(atom); - targetAtomAddress = addressForAtom(*ref.target()); - other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) & 0xFFFF; - appendReloc(relocs, sectionOffset, 0, targetAtomAddress, - ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmHi); - appendReloc(relocs, other16, 0, fromAtomAddress, - ARM_RELOC_PAIR | rScattered | rLenArmHi); - break; - case pointer32: - if (useExternalReloc) { - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM_RELOC_VANILLA | rExtern | rLength4); - } - else { - if (ref.addend() != 0) - appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), - ARM_RELOC_VANILLA | rScattered | rLength4); - else - appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, - ARM_RELOC_VANILLA | rLength4); - } - break; - case delta32: - appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), - ARM_RELOC_SECTDIFF | rScattered | rLength4); - appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) + - ref.offsetInAtom(), - ARM_RELOC_PAIR | rScattered | rLength4); - break; - case lazyPointer: - case lazyImmediateLocation: - // do nothing - break; - case invalid: - llvm_unreachable("invalid ARM Reference Kind"); - break; - } -} - -void ArchHandler_arm::addAdditionalReferences(MachODefinedAtom &atom) { - if (atom.isThumb()) { - atom.addReference(Reference::KindNamespace::mach_o, - Reference::KindArch::ARM, modeThumbCode, 0, &atom, 0); - } -} - -bool ArchHandler_arm::isThumbFunction(const DefinedAtom &atom) { - for (const Reference *ref : atom) { - if (ref->offsetInAtom() != 0) - return false; - if (ref->kindNamespace() != Reference::KindNamespace::mach_o) - continue; - assert(ref->kindArch() == Reference::KindArch::ARM); - if (ref->kindValue() == modeThumbCode) - return true; - } - return false; -} - -class Thumb2ToArmShimAtom : public SimpleDefinedAtom { -public: - Thumb2ToArmShimAtom(MachOFile &file, StringRef targetName, - const DefinedAtom &target) - : SimpleDefinedAtom(file) { - addReference(Reference::KindNamespace::mach_o, Reference::KindArch::ARM, - ArchHandler_arm::modeThumbCode, 0, this, 0); - addReference(Reference::KindNamespace::mach_o, Reference::KindArch::ARM, - ArchHandler_arm::delta32, 8, &target, 0); - std::string name = std::string(targetName) + "$shim"; - StringRef tmp(name); - _name = tmp.copy(file.allocator()); - } - - ~Thumb2ToArmShimAtom() override = default; - - StringRef name() const override { - return _name; - } - - ContentType contentType() const override { - return DefinedAtom::typeCode; - } - - Alignment alignment() const override { return 4; } - - uint64_t size() const override { - return 12; - } - - ContentPermissions permissions() const override { - return DefinedAtom::permR_X; - } - - ArrayRef rawContent() const override { - static const uint8_t bytes[] = - { 0xDF, 0xF8, 0x04, 0xC0, // ldr ip, pc + 4 - 0xFF, 0x44, // add ip, pc, ip - 0x60, 0x47, // ldr pc, [ip] - 0x00, 0x00, 0x00, 0x00 }; // .long target - this - assert(sizeof(bytes) == size()); - return llvm::makeArrayRef(bytes, sizeof(bytes)); - } -private: - StringRef _name; -}; - -class ArmToThumbShimAtom : public SimpleDefinedAtom { -public: - ArmToThumbShimAtom(MachOFile &file, StringRef targetName, - const DefinedAtom &target) - : SimpleDefinedAtom(file) { - addReference(Reference::KindNamespace::mach_o, Reference::KindArch::ARM, - ArchHandler_arm::delta32, 12, &target, 0); - std::string name = std::string(targetName) + "$shim"; - StringRef tmp(name); - _name = tmp.copy(file.allocator()); - } - - ~ArmToThumbShimAtom() override = default; - - StringRef name() const override { - return _name; - } - - ContentType contentType() const override { - return DefinedAtom::typeCode; - } - - Alignment alignment() const override { return 4; } - - uint64_t size() const override { - return 16; - } - - ContentPermissions permissions() const override { - return DefinedAtom::permR_X; - } - - ArrayRef rawContent() const override { - static const uint8_t bytes[] = - { 0x04, 0xC0, 0x9F, 0xE5, // ldr ip, pc + 4 - 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip - 0x1C, 0xFF, 0x2F, 0xE1, // ldr pc, [ip] - 0x00, 0x00, 0x00, 0x00 }; // .long target - this - assert(sizeof(bytes) == size()); - return llvm::makeArrayRef(bytes, sizeof(bytes)); - } -private: - StringRef _name; -}; - -const DefinedAtom *ArchHandler_arm::createShim(MachOFile &file, - bool thumbToArm, - const DefinedAtom &target) { - bool isStub = (target.contentType() == DefinedAtom::typeStub); - StringRef targetName = isStub ? stubName(target) : target.name(); - if (thumbToArm) - return new (file.allocator()) Thumb2ToArmShimAtom(file, targetName, target); - else - return new (file.allocator()) ArmToThumbShimAtom(file, targetName, target); -} - -std::unique_ptr ArchHandler::create_arm() { - return std::unique_ptr(new ArchHandler_arm()); -} - -} // namespace mach_o -} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp b/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp deleted file mode 100644 index bee081aec067..000000000000 --- a/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp +++ /dev/null @@ -1,897 +0,0 @@ -//===- lib/FileFormat/MachO/ArchHandler_arm64.cpp -------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "ArchHandler.h" -#include "Atoms.h" -#include "MachONormalizedFileBinaryUtils.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" - -using namespace llvm::MachO; -using namespace lld::mach_o::normalized; - -namespace lld { -namespace mach_o { - -using llvm::support::ulittle32_t; -using llvm::support::ulittle64_t; - -using llvm::support::little32_t; -using llvm::support::little64_t; - -class ArchHandler_arm64 : public ArchHandler { -public: - ArchHandler_arm64() = default; - ~ArchHandler_arm64() override = default; - - const Registry::KindStrings *kindStrings() override { return _sKindStrings; } - - Reference::KindArch kindArch() override { - return Reference::KindArch::AArch64; - } - - /// Used by GOTPass to locate GOT References - bool isGOTAccess(const Reference &ref, bool &canBypassGOT) override { - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return false; - assert(ref.kindArch() == Reference::KindArch::AArch64); - switch (ref.kindValue()) { - case gotPage21: - case gotOffset12: - canBypassGOT = true; - return true; - case delta32ToGOT: - case unwindCIEToPersonalityFunction: - case imageOffsetGot: - canBypassGOT = false; - return true; - default: - return false; - } - } - - /// Used by GOTPass to update GOT References. - void updateReferenceToGOT(const Reference *ref, bool targetNowGOT) override { - // If GOT slot was instantiated, transform: - // gotPage21/gotOffset12 -> page21/offset12scale8 - // If GOT slot optimized away, transform: - // gotPage21/gotOffset12 -> page21/addOffset12 - assert(ref->kindNamespace() == Reference::KindNamespace::mach_o); - assert(ref->kindArch() == Reference::KindArch::AArch64); - switch (ref->kindValue()) { - case gotPage21: - const_cast(ref)->setKindValue(page21); - break; - case gotOffset12: - const_cast(ref)->setKindValue(targetNowGOT ? - offset12scale8 : addOffset12); - break; - case delta32ToGOT: - const_cast(ref)->setKindValue(delta32); - break; - case imageOffsetGot: - const_cast(ref)->setKindValue(imageOffset); - break; - default: - llvm_unreachable("Not a GOT reference"); - } - } - - const StubInfo &stubInfo() override { return _sStubInfo; } - - bool isCallSite(const Reference &) override; - bool isNonCallBranch(const Reference &) override { - return false; - } - - bool isPointer(const Reference &) override; - bool isPairedReloc(const normalized::Relocation &) override; - - bool needsCompactUnwind() override { - return true; - } - Reference::KindValue imageOffsetKind() override { - return imageOffset; - } - Reference::KindValue imageOffsetKindIndirect() override { - return imageOffsetGot; - } - - Reference::KindValue unwindRefToPersonalityFunctionKind() override { - return unwindCIEToPersonalityFunction; - } - - Reference::KindValue unwindRefToCIEKind() override { - return negDelta32; - } - - Reference::KindValue unwindRefToFunctionKind() override { - return unwindFDEToFunction; - } - - Reference::KindValue unwindRefToEhFrameKind() override { - return unwindInfoToEhFrame; - } - - Reference::KindValue pointerKind() override { - return pointer64; - } - - Reference::KindValue lazyImmediateLocationKind() override { - return lazyImmediateLocation; - } - - uint32_t dwarfCompactUnwindType() override { - return 0x03000000; - } - - llvm::Error getReferenceInfo(const normalized::Relocation &reloc, - const DefinedAtom *inAtom, - uint32_t offsetInAtom, - uint64_t fixupAddress, bool isBig, - FindAtomBySectionAndAddress atomFromAddress, - FindAtomBySymbolIndex atomFromSymbolIndex, - Reference::KindValue *kind, - const lld::Atom **target, - Reference::Addend *addend) override; - llvm::Error - getPairReferenceInfo(const normalized::Relocation &reloc1, - const normalized::Relocation &reloc2, - const DefinedAtom *inAtom, - uint32_t offsetInAtom, - uint64_t fixupAddress, bool isBig, bool scatterable, - FindAtomBySectionAndAddress atomFromAddress, - FindAtomBySymbolIndex atomFromSymbolIndex, - Reference::KindValue *kind, - const lld::Atom **target, - Reference::Addend *addend) override; - - bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) override { - return (atom->contentType() == DefinedAtom::typeCString); - } - - void generateAtomContent(const DefinedAtom &atom, bool relocatable, - FindAddressForAtom findAddress, - FindAddressForAtom findSectionAddress, - uint64_t imageBaseAddress, - llvm::MutableArrayRef atomContentBuffer) override; - - void appendSectionRelocations(const DefinedAtom &atom, - uint64_t atomSectionOffset, - const Reference &ref, - FindSymbolIndexForAtom symbolIndexForAtom, - FindSectionIndexForAtom sectionIndexForAtom, - FindAddressForAtom addressForAtom, - normalized::Relocations &relocs) override; - -private: - static const Registry::KindStrings _sKindStrings[]; - static const StubInfo _sStubInfo; - - enum Arm64Kind : Reference::KindValue { - invalid, /// for error condition - - // Kinds found in mach-o .o files: - branch26, /// ex: bl _foo - page21, /// ex: adrp x1, _foo@PAGE - offset12, /// ex: ldrb w0, [x1, _foo@PAGEOFF] - offset12scale2, /// ex: ldrs w0, [x1, _foo@PAGEOFF] - offset12scale4, /// ex: ldr w0, [x1, _foo@PAGEOFF] - offset12scale8, /// ex: ldr x0, [x1, _foo@PAGEOFF] - offset12scale16, /// ex: ldr q0, [x1, _foo@PAGEOFF] - gotPage21, /// ex: adrp x1, _foo@GOTPAGE - gotOffset12, /// ex: ldr w0, [x1, _foo@GOTPAGEOFF] - tlvPage21, /// ex: adrp x1, _foo@TLVPAGE - tlvOffset12, /// ex: ldr w0, [x1, _foo@TLVPAGEOFF] - - pointer64, /// ex: .quad _foo - delta64, /// ex: .quad _foo - . - delta32, /// ex: .long _foo - . - negDelta32, /// ex: .long . - _foo - pointer64ToGOT, /// ex: .quad _foo@GOT - delta32ToGOT, /// ex: .long _foo@GOT - . - - // Kinds introduced by Passes: - addOffset12, /// Location contains LDR to change into ADD. - lazyPointer, /// Location contains a lazy pointer. - lazyImmediateLocation, /// Location contains immediate value used in stub. - imageOffset, /// Location contains offset of atom in final image - imageOffsetGot, /// Location contains offset of GOT entry for atom in - /// final image (typically personality function). - unwindCIEToPersonalityFunction, /// Nearly delta32ToGOT, but cannot be - /// rematerialized in relocatable object - /// (yay for implicit contracts!). - unwindFDEToFunction, /// Nearly delta64, but cannot be rematerialized in - /// relocatable object (yay for implicit contracts!). - unwindInfoToEhFrame, /// Fix low 24 bits of compact unwind encoding to - /// refer to __eh_frame entry. - }; - - void applyFixupFinal(const Reference &ref, uint8_t *location, - uint64_t fixupAddress, uint64_t targetAddress, - uint64_t inAtomAddress, uint64_t imageBaseAddress, - FindAddressForAtom findSectionAddress); - - void applyFixupRelocatable(const Reference &ref, uint8_t *location, - uint64_t fixupAddress, uint64_t targetAddress, - uint64_t inAtomAddress, bool targetUnnamed); - - // Utility functions for inspecting/updating instructions. - static uint32_t setDisplacementInBranch26(uint32_t instr, int32_t disp); - static uint32_t setDisplacementInADRP(uint32_t instr, int64_t disp); - static Arm64Kind offset12KindFromInstruction(uint32_t instr); - static uint32_t setImm12(uint32_t instr, uint32_t offset); -}; - -const Registry::KindStrings ArchHandler_arm64::_sKindStrings[] = { - LLD_KIND_STRING_ENTRY(invalid), - LLD_KIND_STRING_ENTRY(branch26), - LLD_KIND_STRING_ENTRY(page21), - LLD_KIND_STRING_ENTRY(offset12), - LLD_KIND_STRING_ENTRY(offset12scale2), - LLD_KIND_STRING_ENTRY(offset12scale4), - LLD_KIND_STRING_ENTRY(offset12scale8), - LLD_KIND_STRING_ENTRY(offset12scale16), - LLD_KIND_STRING_ENTRY(gotPage21), - LLD_KIND_STRING_ENTRY(gotOffset12), - LLD_KIND_STRING_ENTRY(tlvPage21), - LLD_KIND_STRING_ENTRY(tlvOffset12), - LLD_KIND_STRING_ENTRY(pointer64), - LLD_KIND_STRING_ENTRY(delta64), - LLD_KIND_STRING_ENTRY(delta32), - LLD_KIND_STRING_ENTRY(negDelta32), - LLD_KIND_STRING_ENTRY(pointer64ToGOT), - LLD_KIND_STRING_ENTRY(delta32ToGOT), - - LLD_KIND_STRING_ENTRY(addOffset12), - LLD_KIND_STRING_ENTRY(lazyPointer), - LLD_KIND_STRING_ENTRY(lazyImmediateLocation), - LLD_KIND_STRING_ENTRY(imageOffset), - LLD_KIND_STRING_ENTRY(imageOffsetGot), - LLD_KIND_STRING_ENTRY(unwindCIEToPersonalityFunction), - LLD_KIND_STRING_ENTRY(unwindFDEToFunction), - LLD_KIND_STRING_ENTRY(unwindInfoToEhFrame), - - LLD_KIND_STRING_END -}; - -const ArchHandler::StubInfo ArchHandler_arm64::_sStubInfo = { - "dyld_stub_binder", - - // Lazy pointer references - { Reference::KindArch::AArch64, pointer64, 0, 0 }, - { Reference::KindArch::AArch64, lazyPointer, 0, 0 }, - - // GOT pointer to dyld_stub_binder - { Reference::KindArch::AArch64, pointer64, 0, 0 }, - - // arm64 code alignment 2^1 - 1, - - // Stub size and code - 12, - { 0x10, 0x00, 0x00, 0x90, // ADRP X16, lazy_pointer@page - 0x10, 0x02, 0x40, 0xF9, // LDR X16, [X16, lazy_pointer@pageoff] - 0x00, 0x02, 0x1F, 0xD6 }, // BR X16 - { Reference::KindArch::AArch64, page21, 0, 0 }, - { true, offset12scale8, 4, 0 }, - - // Stub Helper size and code - 12, - { 0x50, 0x00, 0x00, 0x18, // LDR W16, L0 - 0x00, 0x00, 0x00, 0x14, // LDR B helperhelper - 0x00, 0x00, 0x00, 0x00 }, // L0: .long 0 - { Reference::KindArch::AArch64, lazyImmediateLocation, 8, 0 }, - { Reference::KindArch::AArch64, branch26, 4, 0 }, - - // Stub helper image cache content type - DefinedAtom::typeGOT, - - // Stub Helper-Common size and code - 24, - // Stub helper alignment - 2, - { 0x11, 0x00, 0x00, 0x90, // ADRP X17, dyld_ImageLoaderCache@page - 0x31, 0x02, 0x00, 0x91, // ADD X17, X17, dyld_ImageLoaderCache@pageoff - 0xF0, 0x47, 0xBF, 0xA9, // STP X16/X17, [SP, #-16]! - 0x10, 0x00, 0x00, 0x90, // ADRP X16, _fast_lazy_bind@page - 0x10, 0x02, 0x40, 0xF9, // LDR X16, [X16,_fast_lazy_bind@pageoff] - 0x00, 0x02, 0x1F, 0xD6 }, // BR X16 - { Reference::KindArch::AArch64, page21, 0, 0 }, - { true, offset12, 4, 0 }, - { Reference::KindArch::AArch64, page21, 12, 0 }, - { true, offset12scale8, 16, 0 } -}; - -bool ArchHandler_arm64::isCallSite(const Reference &ref) { - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return false; - assert(ref.kindArch() == Reference::KindArch::AArch64); - return (ref.kindValue() == branch26); -} - -bool ArchHandler_arm64::isPointer(const Reference &ref) { - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return false; - assert(ref.kindArch() == Reference::KindArch::AArch64); - Reference::KindValue kind = ref.kindValue(); - return (kind == pointer64); -} - -bool ArchHandler_arm64::isPairedReloc(const Relocation &r) { - return ((r.type == ARM64_RELOC_ADDEND) || (r.type == ARM64_RELOC_SUBTRACTOR)); -} - -uint32_t ArchHandler_arm64::setDisplacementInBranch26(uint32_t instr, - int32_t displacement) { - assert((displacement <= 134217727) && (displacement > (-134217728)) && - "arm64 branch out of range"); - return (instr & 0xFC000000) | ((uint32_t)(displacement >> 2) & 0x03FFFFFF); -} - -uint32_t ArchHandler_arm64::setDisplacementInADRP(uint32_t instruction, - int64_t displacement) { - assert((displacement <= 0x100000000LL) && (displacement > (-0x100000000LL)) && - "arm64 ADRP out of range"); - assert(((instruction & 0x9F000000) == 0x90000000) && - "reloc not on ADRP instruction"); - uint32_t immhi = (displacement >> 9) & (0x00FFFFE0); - uint32_t immlo = (displacement << 17) & (0x60000000); - return (instruction & 0x9F00001F) | immlo | immhi; -} - -ArchHandler_arm64::Arm64Kind -ArchHandler_arm64::offset12KindFromInstruction(uint32_t instruction) { - if (instruction & 0x08000000) { - switch ((instruction >> 30) & 0x3) { - case 0: - if ((instruction & 0x04800000) == 0x04800000) - return offset12scale16; - return offset12; - case 1: - return offset12scale2; - case 2: - return offset12scale4; - case 3: - return offset12scale8; - } - } - return offset12; -} - -uint32_t ArchHandler_arm64::setImm12(uint32_t instruction, uint32_t offset) { - assert(((offset & 0xFFFFF000) == 0) && "imm12 offset out of range"); - uint32_t imm12 = offset << 10; - return (instruction & 0xFFC003FF) | imm12; -} - -llvm::Error ArchHandler_arm64::getReferenceInfo( - const Relocation &reloc, const DefinedAtom *inAtom, uint32_t offsetInAtom, - uint64_t fixupAddress, bool isBig, - FindAtomBySectionAndAddress atomFromAddress, - FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, - const lld::Atom **target, Reference::Addend *addend) { - const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; - switch (relocPattern(reloc)) { - case ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4: - // ex: bl _foo - *kind = branch26; - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = 0; - return llvm::Error::success(); - case ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4: - // ex: adrp x1, _foo@PAGE - *kind = page21; - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = 0; - return llvm::Error::success(); - case ARM64_RELOC_PAGEOFF12 | rExtern | rLength4: - // ex: ldr x0, [x1, _foo@PAGEOFF] - *kind = offset12KindFromInstruction(*(const little32_t *)fixupContent); - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = 0; - return llvm::Error::success(); - case ARM64_RELOC_GOT_LOAD_PAGE21 | rPcRel | rExtern | rLength4: - // ex: adrp x1, _foo@GOTPAGE - *kind = gotPage21; - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = 0; - return llvm::Error::success(); - case ARM64_RELOC_GOT_LOAD_PAGEOFF12 | rExtern | rLength4: - // ex: ldr x0, [x1, _foo@GOTPAGEOFF] - *kind = gotOffset12; - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = 0; - return llvm::Error::success(); - case ARM64_RELOC_TLVP_LOAD_PAGE21 | rPcRel | rExtern | rLength4: - // ex: adrp x1, _foo@TLVPAGE - *kind = tlvPage21; - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = 0; - return llvm::Error::success(); - case ARM64_RELOC_TLVP_LOAD_PAGEOFF12 | rExtern | rLength4: - // ex: ldr x0, [x1, _foo@TLVPAGEOFF] - *kind = tlvOffset12; - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = 0; - return llvm::Error::success(); - case ARM64_RELOC_UNSIGNED | rExtern | rLength8: - // ex: .quad _foo + N - *kind = pointer64; - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = *(const little64_t *)fixupContent; - return llvm::Error::success(); - case ARM64_RELOC_UNSIGNED | rLength8: - // ex: .quad Lfoo + N - *kind = pointer64; - return atomFromAddress(reloc.symbol, *(const little64_t *)fixupContent, - target, addend); - case ARM64_RELOC_POINTER_TO_GOT | rExtern | rLength8: - // ex: .quad _foo@GOT - *kind = pointer64ToGOT; - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = 0; - return llvm::Error::success(); - case ARM64_RELOC_POINTER_TO_GOT | rPcRel | rExtern | rLength4: - // ex: .long _foo@GOT - . - - // If we are in an .eh_frame section, then the kind of the relocation should - // not be delta32ToGOT. It may instead be unwindCIEToPersonalityFunction. - if (inAtom->contentType() == DefinedAtom::typeCFI) - *kind = unwindCIEToPersonalityFunction; - else - *kind = delta32ToGOT; - - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = 0; - return llvm::Error::success(); - default: - return llvm::make_error("unsupported arm64 relocation type"); - } -} - -llvm::Error ArchHandler_arm64::getPairReferenceInfo( - const normalized::Relocation &reloc1, const normalized::Relocation &reloc2, - const DefinedAtom *inAtom, uint32_t offsetInAtom, uint64_t fixupAddress, - bool swap, bool scatterable, FindAtomBySectionAndAddress atomFromAddress, - FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, - const lld::Atom **target, Reference::Addend *addend) { - const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; - switch (relocPattern(reloc1) << 16 | relocPattern(reloc2)) { - case ((ARM64_RELOC_ADDEND | rLength4) << 16 | - ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4): - // ex: bl _foo+8 - *kind = branch26; - if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) - return ec; - *addend = reloc1.symbol; - return llvm::Error::success(); - case ((ARM64_RELOC_ADDEND | rLength4) << 16 | - ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4): - // ex: adrp x1, _foo@PAGE - *kind = page21; - if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) - return ec; - *addend = reloc1.symbol; - return llvm::Error::success(); - case ((ARM64_RELOC_ADDEND | rLength4) << 16 | - ARM64_RELOC_PAGEOFF12 | rExtern | rLength4): { - // ex: ldr w0, [x1, _foo@PAGEOFF] - uint32_t cont32 = (int32_t)*(const little32_t *)fixupContent; - *kind = offset12KindFromInstruction(cont32); - if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) - return ec; - *addend = reloc1.symbol; - return llvm::Error::success(); - } - case ((ARM64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 | - ARM64_RELOC_UNSIGNED | rExtern | rLength8): - // ex: .quad _foo - . - if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) - return ec; - - // If we are in an .eh_frame section, then the kind of the relocation should - // not be delta64. It may instead be unwindFDEToFunction. - if (inAtom->contentType() == DefinedAtom::typeCFI) - *kind = unwindFDEToFunction; - else - *kind = delta64; - - // The offsets of the 2 relocations must match - if (reloc1.offset != reloc2.offset) - return llvm::make_error( - "paired relocs must have the same offset"); - *addend = (int64_t)*(const little64_t *)fixupContent + offsetInAtom; - return llvm::Error::success(); - case ((ARM64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 | - ARM64_RELOC_UNSIGNED | rExtern | rLength4): - // ex: .quad _foo - . - *kind = delta32; - if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) - return ec; - *addend = (int32_t)*(const little32_t *)fixupContent + offsetInAtom; - return llvm::Error::success(); - default: - return llvm::make_error("unsupported arm64 relocation pair"); - } -} - -void ArchHandler_arm64::generateAtomContent( - const DefinedAtom &atom, bool relocatable, FindAddressForAtom findAddress, - FindAddressForAtom findSectionAddress, uint64_t imageBaseAddress, - llvm::MutableArrayRef atomContentBuffer) { - // Copy raw bytes. - std::copy(atom.rawContent().begin(), atom.rawContent().end(), - atomContentBuffer.begin()); - // Apply fix-ups. -#ifndef NDEBUG - if (atom.begin() != atom.end()) { - DEBUG_WITH_TYPE("atom-content", llvm::dbgs() - << "Applying fixups to atom:\n" - << " address=" - << llvm::format(" 0x%09lX", &atom) - << ", file=#" - << atom.file().ordinal() - << ", atom=#" - << atom.ordinal() - << ", name=" - << atom.name() - << ", type=" - << atom.contentType() - << "\n"); - } -#endif - for (const Reference *ref : atom) { - uint32_t offset = ref->offsetInAtom(); - const Atom *target = ref->target(); - bool targetUnnamed = target->name().empty(); - uint64_t targetAddress = 0; - if (isa(target)) - targetAddress = findAddress(*target); - uint64_t atomAddress = findAddress(atom); - uint64_t fixupAddress = atomAddress + offset; - if (relocatable) { - applyFixupRelocatable(*ref, &atomContentBuffer[offset], fixupAddress, - targetAddress, atomAddress, targetUnnamed); - } else { - applyFixupFinal(*ref, &atomContentBuffer[offset], fixupAddress, - targetAddress, atomAddress, imageBaseAddress, - findSectionAddress); - } - } -} - -void ArchHandler_arm64::applyFixupFinal(const Reference &ref, uint8_t *loc, - uint64_t fixupAddress, - uint64_t targetAddress, - uint64_t inAtomAddress, - uint64_t imageBaseAddress, - FindAddressForAtom findSectionAddress) { - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return; - assert(ref.kindArch() == Reference::KindArch::AArch64); - ulittle32_t *loc32 = reinterpret_cast(loc); - ulittle64_t *loc64 = reinterpret_cast(loc); - int32_t displacement; - uint32_t instruction; - uint32_t value32; - uint32_t value64; - switch (static_cast(ref.kindValue())) { - case branch26: - displacement = (targetAddress - fixupAddress) + ref.addend(); - *loc32 = setDisplacementInBranch26(*loc32, displacement); - return; - case page21: - case gotPage21: - case tlvPage21: - displacement = - ((targetAddress + ref.addend()) & (-4096)) - (fixupAddress & (-4096)); - *loc32 = setDisplacementInADRP(*loc32, displacement); - return; - case offset12: - case gotOffset12: - case tlvOffset12: - displacement = (targetAddress + ref.addend()) & 0x00000FFF; - *loc32 = setImm12(*loc32, displacement); - return; - case offset12scale2: - displacement = (targetAddress + ref.addend()) & 0x00000FFF; - assert(((displacement & 0x1) == 0) && - "scaled imm12 not accessing 2-byte aligneds"); - *loc32 = setImm12(*loc32, displacement >> 1); - return; - case offset12scale4: - displacement = (targetAddress + ref.addend()) & 0x00000FFF; - assert(((displacement & 0x3) == 0) && - "scaled imm12 not accessing 4-byte aligned"); - *loc32 = setImm12(*loc32, displacement >> 2); - return; - case offset12scale8: - displacement = (targetAddress + ref.addend()) & 0x00000FFF; - assert(((displacement & 0x7) == 0) && - "scaled imm12 not accessing 8-byte aligned"); - *loc32 = setImm12(*loc32, displacement >> 3); - return; - case offset12scale16: - displacement = (targetAddress + ref.addend()) & 0x00000FFF; - assert(((displacement & 0xF) == 0) && - "scaled imm12 not accessing 16-byte aligned"); - *loc32 = setImm12(*loc32, displacement >> 4); - return; - case addOffset12: - instruction = *loc32; - assert(((instruction & 0xFFC00000) == 0xF9400000) && - "GOT reloc is not an LDR instruction"); - displacement = (targetAddress + ref.addend()) & 0x00000FFF; - value32 = 0x91000000 | (instruction & 0x000003FF); - instruction = setImm12(value32, displacement); - *loc32 = instruction; - return; - case pointer64: - case pointer64ToGOT: - *loc64 = targetAddress + ref.addend(); - return; - case delta64: - case unwindFDEToFunction: - *loc64 = (targetAddress - fixupAddress) + ref.addend(); - return; - case delta32: - case delta32ToGOT: - case unwindCIEToPersonalityFunction: - *loc32 = (targetAddress - fixupAddress) + ref.addend(); - return; - case negDelta32: - *loc32 = fixupAddress - targetAddress + ref.addend(); - return; - case lazyPointer: - // Do nothing - return; - case lazyImmediateLocation: - *loc32 = ref.addend(); - return; - case imageOffset: - *loc32 = (targetAddress - imageBaseAddress) + ref.addend(); - return; - case imageOffsetGot: - llvm_unreachable("imageOffsetGot should have been changed to imageOffset"); - break; - case unwindInfoToEhFrame: - value64 = targetAddress - findSectionAddress(*ref.target()) + ref.addend(); - assert(value64 < 0xffffffU && "offset in __eh_frame too large"); - *loc32 = (*loc32 & 0xff000000U) | value64; - return; - case invalid: - // Fall into llvm_unreachable(). - break; - } - llvm_unreachable("invalid arm64 Reference Kind"); -} - -void ArchHandler_arm64::applyFixupRelocatable(const Reference &ref, - uint8_t *loc, - uint64_t fixupAddress, - uint64_t targetAddress, - uint64_t inAtomAddress, - bool targetUnnamed) { - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return; - assert(ref.kindArch() == Reference::KindArch::AArch64); - ulittle32_t *loc32 = reinterpret_cast(loc); - ulittle64_t *loc64 = reinterpret_cast(loc); - switch (static_cast(ref.kindValue())) { - case branch26: - *loc32 = setDisplacementInBranch26(*loc32, 0); - return; - case page21: - case gotPage21: - case tlvPage21: - *loc32 = setDisplacementInADRP(*loc32, 0); - return; - case offset12: - case offset12scale2: - case offset12scale4: - case offset12scale8: - case offset12scale16: - case gotOffset12: - case tlvOffset12: - *loc32 = setImm12(*loc32, 0); - return; - case pointer64: - if (targetUnnamed) - *loc64 = targetAddress + ref.addend(); - else - *loc64 = ref.addend(); - return; - case delta64: - *loc64 = ref.addend() + inAtomAddress - fixupAddress; - return; - case unwindFDEToFunction: - // We don't emit unwindFDEToFunction in -r mode as they are implicitly - // generated from the data in the __eh_frame section. So here we need - // to use the targetAddress so that we can generate the full relocation - // when we parse again later. - *loc64 = targetAddress - fixupAddress; - return; - case delta32: - *loc32 = ref.addend() + inAtomAddress - fixupAddress; - return; - case negDelta32: - // We don't emit negDelta32 in -r mode as they are implicitly - // generated from the data in the __eh_frame section. So here we need - // to use the targetAddress so that we can generate the full relocation - // when we parse again later. - *loc32 = fixupAddress - targetAddress + ref.addend(); - return; - case pointer64ToGOT: - *loc64 = 0; - return; - case delta32ToGOT: - *loc32 = inAtomAddress - fixupAddress; - return; - case unwindCIEToPersonalityFunction: - // We don't emit unwindCIEToPersonalityFunction in -r mode as they are - // implicitly generated from the data in the __eh_frame section. So here we - // need to use the targetAddress so that we can generate the full relocation - // when we parse again later. - *loc32 = targetAddress - fixupAddress; - return; - case addOffset12: - llvm_unreachable("lazy reference kind implies GOT pass was run"); - case lazyPointer: - case lazyImmediateLocation: - llvm_unreachable("lazy reference kind implies Stubs pass was run"); - case imageOffset: - case imageOffsetGot: - case unwindInfoToEhFrame: - llvm_unreachable("fixup implies __unwind_info"); - return; - case invalid: - // Fall into llvm_unreachable(). - break; - } - llvm_unreachable("unknown arm64 Reference Kind"); -} - -void ArchHandler_arm64::appendSectionRelocations( - const DefinedAtom &atom, uint64_t atomSectionOffset, const Reference &ref, - FindSymbolIndexForAtom symbolIndexForAtom, - FindSectionIndexForAtom sectionIndexForAtom, - FindAddressForAtom addressForAtom, normalized::Relocations &relocs) { - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return; - assert(ref.kindArch() == Reference::KindArch::AArch64); - uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); - switch (static_cast(ref.kindValue())) { - case branch26: - if (ref.addend()) { - appendReloc(relocs, sectionOffset, ref.addend(), 0, - ARM64_RELOC_ADDEND | rLength4); - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4); - } else { - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4); - } - return; - case page21: - if (ref.addend()) { - appendReloc(relocs, sectionOffset, ref.addend(), 0, - ARM64_RELOC_ADDEND | rLength4); - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4); - } else { - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4); - } - return; - case offset12: - case offset12scale2: - case offset12scale4: - case offset12scale8: - case offset12scale16: - if (ref.addend()) { - appendReloc(relocs, sectionOffset, ref.addend(), 0, - ARM64_RELOC_ADDEND | rLength4); - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM64_RELOC_PAGEOFF12 | rExtern | rLength4); - } else { - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM64_RELOC_PAGEOFF12 | rExtern | rLength4); - } - return; - case gotPage21: - assert(ref.addend() == 0); - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM64_RELOC_GOT_LOAD_PAGE21 | rPcRel | rExtern | rLength4); - return; - case gotOffset12: - assert(ref.addend() == 0); - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM64_RELOC_GOT_LOAD_PAGEOFF12 | rExtern | rLength4); - return; - case tlvPage21: - assert(ref.addend() == 0); - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM64_RELOC_TLVP_LOAD_PAGE21 | rPcRel | rExtern | rLength4); - return; - case tlvOffset12: - assert(ref.addend() == 0); - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM64_RELOC_TLVP_LOAD_PAGEOFF12 | rExtern | rLength4); - return; - case pointer64: - if (ref.target()->name().empty()) - appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, - ARM64_RELOC_UNSIGNED | rLength8); - else - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM64_RELOC_UNSIGNED | rExtern | rLength8); - return; - case delta64: - appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, - ARM64_RELOC_SUBTRACTOR | rExtern | rLength8); - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM64_RELOC_UNSIGNED | rExtern | rLength8); - return; - case delta32: - appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, - ARM64_RELOC_SUBTRACTOR | rExtern | rLength4 ); - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM64_RELOC_UNSIGNED | rExtern | rLength4 ); - return; - case pointer64ToGOT: - assert(ref.addend() == 0); - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM64_RELOC_POINTER_TO_GOT | rExtern | rLength8); - return; - case delta32ToGOT: - assert(ref.addend() == 0); - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - ARM64_RELOC_POINTER_TO_GOT | rPcRel | rExtern | rLength4); - return; - case addOffset12: - llvm_unreachable("lazy reference kind implies GOT pass was run"); - case lazyPointer: - case lazyImmediateLocation: - llvm_unreachable("lazy reference kind implies Stubs pass was run"); - case imageOffset: - case imageOffsetGot: - llvm_unreachable("deltas from mach_header can only be in final images"); - case unwindCIEToPersonalityFunction: - case unwindFDEToFunction: - case unwindInfoToEhFrame: - case negDelta32: - // Do nothing. - return; - case invalid: - // Fall into llvm_unreachable(). - break; - } - llvm_unreachable("unknown arm64 Reference Kind"); -} - -std::unique_ptr ArchHandler::create_arm64() { - return std::unique_ptr(new ArchHandler_arm64()); -} - -} // namespace mach_o -} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp b/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp deleted file mode 100644 index 6ea8e8c42e80..000000000000 --- a/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp +++ /dev/null @@ -1,643 +0,0 @@ -//===- lib/FileFormat/MachO/ArchHandler_x86.cpp ---------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "ArchHandler.h" -#include "Atoms.h" -#include "MachONormalizedFileBinaryUtils.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm::MachO; -using namespace lld::mach_o::normalized; - -namespace lld { -namespace mach_o { - -using llvm::support::ulittle16_t; -using llvm::support::ulittle32_t; - -using llvm::support::little16_t; -using llvm::support::little32_t; - -class ArchHandler_x86 : public ArchHandler { -public: - ArchHandler_x86() = default; - ~ArchHandler_x86() override = default; - - const Registry::KindStrings *kindStrings() override { return _sKindStrings; } - - Reference::KindArch kindArch() override { return Reference::KindArch::x86; } - - const StubInfo &stubInfo() override { return _sStubInfo; } - bool isCallSite(const Reference &) override; - bool isNonCallBranch(const Reference &) override { - return false; - } - - bool isPointer(const Reference &) override; - bool isPairedReloc(const normalized::Relocation &) override; - - bool needsCompactUnwind() override { - return false; - } - - Reference::KindValue imageOffsetKind() override { - return invalid; - } - - Reference::KindValue imageOffsetKindIndirect() override { - return invalid; - } - - Reference::KindValue unwindRefToPersonalityFunctionKind() override { - return invalid; - } - - Reference::KindValue unwindRefToCIEKind() override { - return negDelta32; - } - - Reference::KindValue unwindRefToFunctionKind() override{ - return delta32; - } - - Reference::KindValue lazyImmediateLocationKind() override { - return lazyImmediateLocation; - } - - Reference::KindValue unwindRefToEhFrameKind() override { - return invalid; - } - - Reference::KindValue pointerKind() override { - return invalid; - } - - uint32_t dwarfCompactUnwindType() override { - return 0x04000000U; - } - - llvm::Error getReferenceInfo(const normalized::Relocation &reloc, - const DefinedAtom *inAtom, - uint32_t offsetInAtom, - uint64_t fixupAddress, bool swap, - FindAtomBySectionAndAddress atomFromAddress, - FindAtomBySymbolIndex atomFromSymbolIndex, - Reference::KindValue *kind, - const lld::Atom **target, - Reference::Addend *addend) override; - llvm::Error - getPairReferenceInfo(const normalized::Relocation &reloc1, - const normalized::Relocation &reloc2, - const DefinedAtom *inAtom, - uint32_t offsetInAtom, - uint64_t fixupAddress, bool swap, bool scatterable, - FindAtomBySectionAndAddress atomFromAddress, - FindAtomBySymbolIndex atomFromSymbolIndex, - Reference::KindValue *kind, - const lld::Atom **target, - Reference::Addend *addend) override; - - void generateAtomContent(const DefinedAtom &atom, bool relocatable, - FindAddressForAtom findAddress, - FindAddressForAtom findSectionAddress, - uint64_t imageBaseAddress, - llvm::MutableArrayRef atomContentBuffer) override; - - void appendSectionRelocations(const DefinedAtom &atom, - uint64_t atomSectionOffset, - const Reference &ref, - FindSymbolIndexForAtom symbolIndexForAtom, - FindSectionIndexForAtom sectionIndexForAtom, - FindAddressForAtom addressForAtom, - normalized::Relocations &relocs) override; - - bool isDataInCodeTransition(Reference::KindValue refKind) override { - return refKind == modeCode || refKind == modeData; - } - - Reference::KindValue dataInCodeTransitionStart( - const MachODefinedAtom &atom) override { - return modeData; - } - - Reference::KindValue dataInCodeTransitionEnd( - const MachODefinedAtom &atom) override { - return modeCode; - } - -private: - static const Registry::KindStrings _sKindStrings[]; - static const StubInfo _sStubInfo; - - enum X86Kind : Reference::KindValue { - invalid, /// for error condition - - modeCode, /// Content starting at this offset is code. - modeData, /// Content starting at this offset is data. - - // Kinds found in mach-o .o files: - branch32, /// ex: call _foo - branch16, /// ex: callw _foo - abs32, /// ex: movl _foo, %eax - funcRel32, /// ex: movl _foo-L1(%eax), %eax - pointer32, /// ex: .long _foo - delta32, /// ex: .long _foo - . - negDelta32, /// ex: .long . - _foo - - // Kinds introduced by Passes: - lazyPointer, /// Location contains a lazy pointer. - lazyImmediateLocation, /// Location contains immediate value used in stub. - }; - - static bool useExternalRelocationTo(const Atom &target); - - void applyFixupFinal(const Reference &ref, uint8_t *location, - uint64_t fixupAddress, uint64_t targetAddress, - uint64_t inAtomAddress); - - void applyFixupRelocatable(const Reference &ref, uint8_t *location, - uint64_t fixupAddress, - uint64_t targetAddress, - uint64_t inAtomAddress); -}; - -//===----------------------------------------------------------------------===// -// ArchHandler_x86 -//===----------------------------------------------------------------------===// - -const Registry::KindStrings ArchHandler_x86::_sKindStrings[] = { - LLD_KIND_STRING_ENTRY(invalid), - LLD_KIND_STRING_ENTRY(modeCode), - LLD_KIND_STRING_ENTRY(modeData), - LLD_KIND_STRING_ENTRY(branch32), - LLD_KIND_STRING_ENTRY(branch16), - LLD_KIND_STRING_ENTRY(abs32), - LLD_KIND_STRING_ENTRY(funcRel32), - LLD_KIND_STRING_ENTRY(pointer32), - LLD_KIND_STRING_ENTRY(delta32), - LLD_KIND_STRING_ENTRY(negDelta32), - LLD_KIND_STRING_ENTRY(lazyPointer), - LLD_KIND_STRING_ENTRY(lazyImmediateLocation), - LLD_KIND_STRING_END -}; - -const ArchHandler::StubInfo ArchHandler_x86::_sStubInfo = { - "dyld_stub_binder", - - // Lazy pointer references - { Reference::KindArch::x86, pointer32, 0, 0 }, - { Reference::KindArch::x86, lazyPointer, 0, 0 }, - - // GOT pointer to dyld_stub_binder - { Reference::KindArch::x86, pointer32, 0, 0 }, - - // x86 code alignment - 1, - - // Stub size and code - 6, - { 0xff, 0x25, 0x00, 0x00, 0x00, 0x00 }, // jmp *lazyPointer - { Reference::KindArch::x86, abs32, 2, 0 }, - { false, 0, 0, 0 }, - - // Stub Helper size and code - 10, - { 0x68, 0x00, 0x00, 0x00, 0x00, // pushl $lazy-info-offset - 0xE9, 0x00, 0x00, 0x00, 0x00 }, // jmp helperhelper - { Reference::KindArch::x86, lazyImmediateLocation, 1, 0 }, - { Reference::KindArch::x86, branch32, 6, 0 }, - - // Stub helper image cache content type - DefinedAtom::typeNonLazyPointer, - - // Stub Helper-Common size and code - 12, - // Stub helper alignment - 2, - { 0x68, 0x00, 0x00, 0x00, 0x00, // pushl $dyld_ImageLoaderCache - 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *_fast_lazy_bind - 0x90 }, // nop - { Reference::KindArch::x86, abs32, 1, 0 }, - { false, 0, 0, 0 }, - { Reference::KindArch::x86, abs32, 7, 0 }, - { false, 0, 0, 0 } -}; - -bool ArchHandler_x86::isCallSite(const Reference &ref) { - return (ref.kindValue() == branch32); -} - -bool ArchHandler_x86::isPointer(const Reference &ref) { - return (ref.kindValue() == pointer32); -} - -bool ArchHandler_x86::isPairedReloc(const Relocation &reloc) { - if (!reloc.scattered) - return false; - return (reloc.type == GENERIC_RELOC_LOCAL_SECTDIFF) || - (reloc.type == GENERIC_RELOC_SECTDIFF); -} - -llvm::Error -ArchHandler_x86::getReferenceInfo(const Relocation &reloc, - const DefinedAtom *inAtom, - uint32_t offsetInAtom, - uint64_t fixupAddress, bool swap, - FindAtomBySectionAndAddress atomFromAddress, - FindAtomBySymbolIndex atomFromSymbolIndex, - Reference::KindValue *kind, - const lld::Atom **target, - Reference::Addend *addend) { - DefinedAtom::ContentPermissions perms; - const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; - uint64_t targetAddress; - switch (relocPattern(reloc)) { - case GENERIC_RELOC_VANILLA | rPcRel | rExtern | rLength4: - // ex: call _foo (and _foo undefined) - *kind = branch32; - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = fixupAddress + 4 + (int32_t)*(const little32_t *)fixupContent; - break; - case GENERIC_RELOC_VANILLA | rPcRel | rLength4: - // ex: call _foo (and _foo defined) - *kind = branch32; - targetAddress = - fixupAddress + 4 + (int32_t) * (const little32_t *)fixupContent; - return atomFromAddress(reloc.symbol, targetAddress, target, addend); - break; - case GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength4: - // ex: call _foo+n (and _foo defined) - *kind = branch32; - targetAddress = - fixupAddress + 4 + (int32_t) * (const little32_t *)fixupContent; - if (auto ec = atomFromAddress(0, reloc.value, target, addend)) - return ec; - *addend = targetAddress - reloc.value; - break; - case GENERIC_RELOC_VANILLA | rPcRel | rExtern | rLength2: - // ex: callw _foo (and _foo undefined) - *kind = branch16; - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = fixupAddress + 2 + (int16_t)*(const little16_t *)fixupContent; - break; - case GENERIC_RELOC_VANILLA | rPcRel | rLength2: - // ex: callw _foo (and _foo defined) - *kind = branch16; - targetAddress = - fixupAddress + 2 + (int16_t) * (const little16_t *)fixupContent; - return atomFromAddress(reloc.symbol, targetAddress, target, addend); - break; - case GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength2: - // ex: callw _foo+n (and _foo defined) - *kind = branch16; - targetAddress = - fixupAddress + 2 + (int16_t) * (const little16_t *)fixupContent; - if (auto ec = atomFromAddress(0, reloc.value, target, addend)) - return ec; - *addend = targetAddress - reloc.value; - break; - case GENERIC_RELOC_VANILLA | rExtern | rLength4: - // ex: movl _foo, %eax (and _foo undefined) - // ex: .long _foo (and _foo undefined) - perms = inAtom->permissions(); - *kind = - ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32 - : pointer32; - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = *(const ulittle32_t *)fixupContent; - break; - case GENERIC_RELOC_VANILLA | rLength4: - // ex: movl _foo, %eax (and _foo defined) - // ex: .long _foo (and _foo defined) - perms = inAtom->permissions(); - *kind = - ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32 - : pointer32; - targetAddress = *(const ulittle32_t *)fixupContent; - return atomFromAddress(reloc.symbol, targetAddress, target, addend); - break; - case GENERIC_RELOC_VANILLA | rScattered | rLength4: - // ex: .long _foo+n (and _foo defined) - perms = inAtom->permissions(); - *kind = - ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32 - : pointer32; - if (auto ec = atomFromAddress(0, reloc.value, target, addend)) - return ec; - *addend = *(const ulittle32_t *)fixupContent - reloc.value; - break; - default: - return llvm::make_error("unsupported i386 relocation type"); - } - return llvm::Error::success(); -} - -llvm::Error -ArchHandler_x86::getPairReferenceInfo(const normalized::Relocation &reloc1, - const normalized::Relocation &reloc2, - const DefinedAtom *inAtom, - uint32_t offsetInAtom, - uint64_t fixupAddress, bool swap, - bool scatterable, - FindAtomBySectionAndAddress atomFromAddr, - FindAtomBySymbolIndex atomFromSymbolIndex, - Reference::KindValue *kind, - const lld::Atom **target, - Reference::Addend *addend) { - const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; - DefinedAtom::ContentPermissions perms = inAtom->permissions(); - uint32_t fromAddress; - uint32_t toAddress; - uint32_t value; - const lld::Atom *fromTarget; - Reference::Addend offsetInTo; - Reference::Addend offsetInFrom; - switch (relocPattern(reloc1) << 16 | relocPattern(reloc2)) { - case ((GENERIC_RELOC_SECTDIFF | rScattered | rLength4) << 16 | - GENERIC_RELOC_PAIR | rScattered | rLength4): - case ((GENERIC_RELOC_LOCAL_SECTDIFF | rScattered | rLength4) << 16 | - GENERIC_RELOC_PAIR | rScattered | rLength4): - toAddress = reloc1.value; - fromAddress = reloc2.value; - value = *(const little32_t *)fixupContent; - if (auto ec = atomFromAddr(0, toAddress, target, &offsetInTo)) - return ec; - if (auto ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom)) - return ec; - if (fromTarget != inAtom) { - if (*target != inAtom) - return llvm::make_error( - "SECTDIFF relocation where neither target is in atom"); - *kind = negDelta32; - *addend = toAddress - value - fromAddress; - *target = fromTarget; - } else { - if ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) { - // SECTDIFF relocations are used in i386 codegen where the function - // prolog does a CALL to the next instruction which POPs the return - // address into EBX which becomes the pic-base register. The POP - // instruction is label the used for the subtrahend in expressions. - // The funcRel32 kind represents the 32-bit delta to some symbol from - // the start of the function (atom) containing the funcRel32. - *kind = funcRel32; - uint32_t ta = fromAddress + value - toAddress; - *addend = ta - offsetInFrom; - } else { - *kind = delta32; - *addend = fromAddress + value - toAddress; - } - } - return llvm::Error::success(); - break; - default: - return llvm::make_error("unsupported i386 relocation type"); - } -} - -void ArchHandler_x86::generateAtomContent(const DefinedAtom &atom, - bool relocatable, - FindAddressForAtom findAddress, - FindAddressForAtom findSectionAddress, - uint64_t imageBaseAddress, - llvm::MutableArrayRef atomContentBuffer) { - // Copy raw bytes. - std::copy(atom.rawContent().begin(), atom.rawContent().end(), - atomContentBuffer.begin()); - // Apply fix-ups. - for (const Reference *ref : atom) { - uint32_t offset = ref->offsetInAtom(); - const Atom *target = ref->target(); - uint64_t targetAddress = 0; - if (isa(target)) - targetAddress = findAddress(*target); - uint64_t atomAddress = findAddress(atom); - uint64_t fixupAddress = atomAddress + offset; - if (relocatable) { - applyFixupRelocatable(*ref, &atomContentBuffer[offset], - fixupAddress, targetAddress, - atomAddress); - } else { - applyFixupFinal(*ref, &atomContentBuffer[offset], - fixupAddress, targetAddress, - atomAddress); - } - } -} - -void ArchHandler_x86::applyFixupFinal(const Reference &ref, uint8_t *loc, - uint64_t fixupAddress, - uint64_t targetAddress, - uint64_t inAtomAddress) { - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return; - assert(ref.kindArch() == Reference::KindArch::x86); - ulittle32_t *loc32 = reinterpret_cast(loc); - switch (static_cast(ref.kindValue())) { - case branch32: - *loc32 = (targetAddress - (fixupAddress + 4)) + ref.addend(); - break; - case branch16: - *loc32 = (targetAddress - (fixupAddress + 2)) + ref.addend(); - break; - case pointer32: - case abs32: - *loc32 = targetAddress + ref.addend(); - break; - case funcRel32: - *loc32 = targetAddress - inAtomAddress + ref.addend(); - break; - case delta32: - *loc32 = targetAddress - fixupAddress + ref.addend(); - break; - case negDelta32: - *loc32 = fixupAddress - targetAddress + ref.addend(); - break; - case modeCode: - case modeData: - case lazyPointer: - // do nothing - break; - case lazyImmediateLocation: - *loc32 = ref.addend(); - break; - case invalid: - llvm_unreachable("invalid x86 Reference Kind"); - break; - } -} - -void ArchHandler_x86::applyFixupRelocatable(const Reference &ref, - uint8_t *loc, - uint64_t fixupAddress, - uint64_t targetAddress, - uint64_t inAtomAddress) { - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return; - assert(ref.kindArch() == Reference::KindArch::x86); - bool useExternalReloc = useExternalRelocationTo(*ref.target()); - ulittle16_t *loc16 = reinterpret_cast(loc); - ulittle32_t *loc32 = reinterpret_cast(loc); - switch (static_cast(ref.kindValue())) { - case branch32: - if (useExternalReloc) - *loc32 = ref.addend() - (fixupAddress + 4); - else - *loc32 =(targetAddress - (fixupAddress+4)) + ref.addend(); - break; - case branch16: - if (useExternalReloc) - *loc16 = ref.addend() - (fixupAddress + 2); - else - *loc16 = (targetAddress - (fixupAddress+2)) + ref.addend(); - break; - case pointer32: - case abs32: - *loc32 = targetAddress + ref.addend(); - break; - case funcRel32: - *loc32 = targetAddress - inAtomAddress + ref.addend(); // FIXME - break; - case delta32: - *loc32 = targetAddress - fixupAddress + ref.addend(); - break; - case negDelta32: - *loc32 = fixupAddress - targetAddress + ref.addend(); - break; - case modeCode: - case modeData: - case lazyPointer: - case lazyImmediateLocation: - // do nothing - break; - case invalid: - llvm_unreachable("invalid x86 Reference Kind"); - break; - } -} - -bool ArchHandler_x86::useExternalRelocationTo(const Atom &target) { - // Undefined symbols are referenced via external relocations. - if (isa(&target)) - return true; - if (const DefinedAtom *defAtom = dyn_cast(&target)) { - switch (defAtom->merge()) { - case DefinedAtom::mergeAsTentative: - // Tentative definitions are referenced via external relocations. - return true; - case DefinedAtom::mergeAsWeak: - case DefinedAtom::mergeAsWeakAndAddressUsed: - // Global weak-defs are referenced via external relocations. - return (defAtom->scope() == DefinedAtom::scopeGlobal); - default: - break; - } - } - // Everything else is reference via an internal relocation. - return false; -} - -void ArchHandler_x86::appendSectionRelocations( - const DefinedAtom &atom, - uint64_t atomSectionOffset, - const Reference &ref, - FindSymbolIndexForAtom symbolIndexForAtom, - FindSectionIndexForAtom sectionIndexForAtom, - FindAddressForAtom addressForAtom, - normalized::Relocations &relocs) { - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return; - assert(ref.kindArch() == Reference::KindArch::x86); - uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); - bool useExternalReloc = useExternalRelocationTo(*ref.target()); - switch (static_cast(ref.kindValue())) { - case modeCode: - case modeData: - break; - case branch32: - if (useExternalReloc) { - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - GENERIC_RELOC_VANILLA | rExtern | rPcRel | rLength4); - } else { - if (ref.addend() != 0) - appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), - GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength4); - else - appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, - GENERIC_RELOC_VANILLA | rPcRel | rLength4); - } - break; - case branch16: - if (useExternalReloc) { - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - GENERIC_RELOC_VANILLA | rExtern | rPcRel | rLength2); - } else { - if (ref.addend() != 0) - appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), - GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength2); - else - appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, - GENERIC_RELOC_VANILLA | rPcRel | rLength2); - } - break; - case pointer32: - case abs32: - if (useExternalReloc) - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - GENERIC_RELOC_VANILLA | rExtern | rLength4); - else { - if (ref.addend() != 0) - appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), - GENERIC_RELOC_VANILLA | rScattered | rLength4); - else - appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, - GENERIC_RELOC_VANILLA | rLength4); - } - break; - case funcRel32: - appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), - GENERIC_RELOC_SECTDIFF | rScattered | rLength4); - appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) - ref.addend(), - GENERIC_RELOC_PAIR | rScattered | rLength4); - break; - case delta32: - appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), - GENERIC_RELOC_SECTDIFF | rScattered | rLength4); - appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) + - ref.offsetInAtom(), - GENERIC_RELOC_PAIR | rScattered | rLength4); - break; - case negDelta32: - appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) + - ref.offsetInAtom(), - GENERIC_RELOC_SECTDIFF | rScattered | rLength4); - appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), - GENERIC_RELOC_PAIR | rScattered | rLength4); - break; - case lazyPointer: - case lazyImmediateLocation: - llvm_unreachable("lazy reference kind implies Stubs pass was run"); - break; - case invalid: - llvm_unreachable("unknown x86 Reference Kind"); - break; - } -} - -std::unique_ptr ArchHandler::create_x86() { - return std::unique_ptr(new ArchHandler_x86()); -} - -} // namespace mach_o -} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp b/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp deleted file mode 100644 index 687407049d4b..000000000000 --- a/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp +++ /dev/null @@ -1,899 +0,0 @@ -//===- lib/FileFormat/MachO/ArchHandler_x86_64.cpp ------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "ArchHandler.h" -#include "Atoms.h" -#include "MachONormalizedFileBinaryUtils.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm::MachO; -using namespace lld::mach_o::normalized; - -namespace lld { -namespace mach_o { - -using llvm::support::ulittle32_t; -using llvm::support::ulittle64_t; - -using llvm::support::little32_t; -using llvm::support::little64_t; - -class ArchHandler_x86_64 : public ArchHandler { -public: - ArchHandler_x86_64() = default; - ~ArchHandler_x86_64() override = default; - - const Registry::KindStrings *kindStrings() override { return _sKindStrings; } - - Reference::KindArch kindArch() override { - return Reference::KindArch::x86_64; - } - - /// Used by GOTPass to locate GOT References - bool isGOTAccess(const Reference &ref, bool &canBypassGOT) override { - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return false; - assert(ref.kindArch() == Reference::KindArch::x86_64); - switch (ref.kindValue()) { - case ripRel32GotLoad: - canBypassGOT = true; - return true; - case ripRel32Got: - canBypassGOT = false; - return true; - case imageOffsetGot: - canBypassGOT = false; - return true; - default: - return false; - } - } - - bool isTLVAccess(const Reference &ref) const override { - assert(ref.kindNamespace() == Reference::KindNamespace::mach_o); - assert(ref.kindArch() == Reference::KindArch::x86_64); - return ref.kindValue() == ripRel32Tlv; - } - - void updateReferenceToTLV(const Reference *ref) override { - assert(ref->kindNamespace() == Reference::KindNamespace::mach_o); - assert(ref->kindArch() == Reference::KindArch::x86_64); - assert(ref->kindValue() == ripRel32Tlv); - const_cast(ref)->setKindValue(ripRel32); - } - - /// Used by GOTPass to update GOT References - void updateReferenceToGOT(const Reference *ref, bool targetNowGOT) override { - assert(ref->kindNamespace() == Reference::KindNamespace::mach_o); - assert(ref->kindArch() == Reference::KindArch::x86_64); - - switch (ref->kindValue()) { - case ripRel32Got: - assert(targetNowGOT && "target must be GOT"); - LLVM_FALLTHROUGH; - case ripRel32GotLoad: - const_cast(ref) - ->setKindValue(targetNowGOT ? ripRel32 : ripRel32GotLoadNowLea); - break; - case imageOffsetGot: - const_cast(ref)->setKindValue(imageOffset); - break; - default: - llvm_unreachable("unknown GOT reference kind"); - } - } - - bool needsCompactUnwind() override { - return true; - } - - Reference::KindValue imageOffsetKind() override { - return imageOffset; - } - - Reference::KindValue imageOffsetKindIndirect() override { - return imageOffsetGot; - } - - Reference::KindValue unwindRefToPersonalityFunctionKind() override { - return ripRel32Got; - } - - Reference::KindValue unwindRefToCIEKind() override { - return negDelta32; - } - - Reference::KindValue unwindRefToFunctionKind() override{ - return unwindFDEToFunction; - } - - Reference::KindValue lazyImmediateLocationKind() override { - return lazyImmediateLocation; - } - - Reference::KindValue unwindRefToEhFrameKind() override { - return unwindInfoToEhFrame; - } - - Reference::KindValue pointerKind() override { - return pointer64; - } - - uint32_t dwarfCompactUnwindType() override { - return 0x04000000U; - } - - const StubInfo &stubInfo() override { return _sStubInfo; } - - bool isNonCallBranch(const Reference &) override { - return false; - } - - bool isCallSite(const Reference &) override; - bool isPointer(const Reference &) override; - bool isPairedReloc(const normalized::Relocation &) override; - - llvm::Error getReferenceInfo(const normalized::Relocation &reloc, - const DefinedAtom *inAtom, - uint32_t offsetInAtom, - uint64_t fixupAddress, bool swap, - FindAtomBySectionAndAddress atomFromAddress, - FindAtomBySymbolIndex atomFromSymbolIndex, - Reference::KindValue *kind, - const lld::Atom **target, - Reference::Addend *addend) override; - llvm::Error - getPairReferenceInfo(const normalized::Relocation &reloc1, - const normalized::Relocation &reloc2, - const DefinedAtom *inAtom, - uint32_t offsetInAtom, - uint64_t fixupAddress, bool swap, bool scatterable, - FindAtomBySectionAndAddress atomFromAddress, - FindAtomBySymbolIndex atomFromSymbolIndex, - Reference::KindValue *kind, - const lld::Atom **target, - Reference::Addend *addend) override; - - bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) override { - return (atom->contentType() == DefinedAtom::typeCString); - } - - void generateAtomContent(const DefinedAtom &atom, bool relocatable, - FindAddressForAtom findAddress, - FindAddressForAtom findSectionAddress, - uint64_t imageBase, - llvm::MutableArrayRef atomContentBuffer) override; - - void appendSectionRelocations(const DefinedAtom &atom, - uint64_t atomSectionOffset, - const Reference &ref, - FindSymbolIndexForAtom symbolIndexForAtom, - FindSectionIndexForAtom sectionIndexForAtom, - FindAddressForAtom addressForAtom, - normalized::Relocations &relocs) override; - - bool isDataInCodeTransition(Reference::KindValue refKind) override { - return refKind == modeCode || refKind == modeData; - } - - Reference::KindValue dataInCodeTransitionStart( - const MachODefinedAtom &atom) override { - return modeData; - } - - Reference::KindValue dataInCodeTransitionEnd( - const MachODefinedAtom &atom) override { - return modeCode; - } - -private: - static const Registry::KindStrings _sKindStrings[]; - static const StubInfo _sStubInfo; - - enum X86_64Kind: Reference::KindValue { - invalid, /// for error condition - - modeCode, /// Content starting at this offset is code. - modeData, /// Content starting at this offset is data. - - // Kinds found in mach-o .o files: - branch32, /// ex: call _foo - ripRel32, /// ex: movq _foo(%rip), %rax - ripRel32Minus1, /// ex: movb $0x12, _foo(%rip) - ripRel32Minus2, /// ex: movw $0x1234, _foo(%rip) - ripRel32Minus4, /// ex: movl $0x12345678, _foo(%rip) - ripRel32Anon, /// ex: movq L1(%rip), %rax - ripRel32Minus1Anon, /// ex: movb $0x12, L1(%rip) - ripRel32Minus2Anon, /// ex: movw $0x1234, L1(%rip) - ripRel32Minus4Anon, /// ex: movw $0x12345678, L1(%rip) - ripRel32GotLoad, /// ex: movq _foo@GOTPCREL(%rip), %rax - ripRel32Got, /// ex: pushq _foo@GOTPCREL(%rip) - ripRel32Tlv, /// ex: movq _foo@TLVP(%rip), %rdi - pointer64, /// ex: .quad _foo - pointer64Anon, /// ex: .quad L1 - delta64, /// ex: .quad _foo - . - delta32, /// ex: .long _foo - . - delta64Anon, /// ex: .quad L1 - . - delta32Anon, /// ex: .long L1 - . - negDelta64, /// ex: .quad . - _foo - negDelta32, /// ex: .long . - _foo - - // Kinds introduced by Passes: - ripRel32GotLoadNowLea, /// Target of GOT load is in linkage unit so - /// "movq _foo@GOTPCREL(%rip), %rax" can be changed - /// to "leaq _foo(%rip), %rax - lazyPointer, /// Location contains a lazy pointer. - lazyImmediateLocation, /// Location contains immediate value used in stub. - - imageOffset, /// Location contains offset of atom in final image - imageOffsetGot, /// Location contains offset of GOT entry for atom in - /// final image (typically personality function). - unwindFDEToFunction, /// Nearly delta64, but cannot be rematerialized in - /// relocatable object (yay for implicit contracts!). - unwindInfoToEhFrame, /// Fix low 24 bits of compact unwind encoding to - /// refer to __eh_frame entry. - tlvInitSectionOffset /// Location contains offset tlv init-value atom - /// within the __thread_data section. - }; - - Reference::KindValue kindFromReloc(const normalized::Relocation &reloc); - - void applyFixupFinal(const Reference &ref, uint8_t *location, - uint64_t fixupAddress, uint64_t targetAddress, - uint64_t inAtomAddress, uint64_t imageBaseAddress, - FindAddressForAtom findSectionAddress); - - void applyFixupRelocatable(const Reference &ref, uint8_t *location, - uint64_t fixupAddress, - uint64_t targetAddress, - uint64_t inAtomAddress); -}; - -const Registry::KindStrings ArchHandler_x86_64::_sKindStrings[] = { - LLD_KIND_STRING_ENTRY(invalid), - LLD_KIND_STRING_ENTRY(modeCode), - LLD_KIND_STRING_ENTRY(modeData), - LLD_KIND_STRING_ENTRY(branch32), - LLD_KIND_STRING_ENTRY(ripRel32), - LLD_KIND_STRING_ENTRY(ripRel32Minus1), - LLD_KIND_STRING_ENTRY(ripRel32Minus2), - LLD_KIND_STRING_ENTRY(ripRel32Minus4), - LLD_KIND_STRING_ENTRY(ripRel32Anon), - LLD_KIND_STRING_ENTRY(ripRel32Minus1Anon), - LLD_KIND_STRING_ENTRY(ripRel32Minus2Anon), - LLD_KIND_STRING_ENTRY(ripRel32Minus4Anon), - LLD_KIND_STRING_ENTRY(ripRel32GotLoad), - LLD_KIND_STRING_ENTRY(ripRel32GotLoadNowLea), - LLD_KIND_STRING_ENTRY(ripRel32Got), - LLD_KIND_STRING_ENTRY(ripRel32Tlv), - LLD_KIND_STRING_ENTRY(lazyPointer), - LLD_KIND_STRING_ENTRY(lazyImmediateLocation), - LLD_KIND_STRING_ENTRY(pointer64), - LLD_KIND_STRING_ENTRY(pointer64Anon), - LLD_KIND_STRING_ENTRY(delta32), - LLD_KIND_STRING_ENTRY(delta64), - LLD_KIND_STRING_ENTRY(delta32Anon), - LLD_KIND_STRING_ENTRY(delta64Anon), - LLD_KIND_STRING_ENTRY(negDelta64), - LLD_KIND_STRING_ENTRY(negDelta32), - LLD_KIND_STRING_ENTRY(imageOffset), - LLD_KIND_STRING_ENTRY(imageOffsetGot), - LLD_KIND_STRING_ENTRY(unwindFDEToFunction), - LLD_KIND_STRING_ENTRY(unwindInfoToEhFrame), - LLD_KIND_STRING_ENTRY(tlvInitSectionOffset), - LLD_KIND_STRING_END -}; - -const ArchHandler::StubInfo ArchHandler_x86_64::_sStubInfo = { - "dyld_stub_binder", - - // Lazy pointer references - { Reference::KindArch::x86_64, pointer64, 0, 0 }, - { Reference::KindArch::x86_64, lazyPointer, 0, 0 }, - - // GOT pointer to dyld_stub_binder - { Reference::KindArch::x86_64, pointer64, 0, 0 }, - - // x86_64 code alignment 2^1 - 1, - - // Stub size and code - 6, - { 0xff, 0x25, 0x00, 0x00, 0x00, 0x00 }, // jmp *lazyPointer - { Reference::KindArch::x86_64, ripRel32, 2, 0 }, - { false, 0, 0, 0 }, - - // Stub Helper size and code - 10, - { 0x68, 0x00, 0x00, 0x00, 0x00, // pushq $lazy-info-offset - 0xE9, 0x00, 0x00, 0x00, 0x00 }, // jmp helperhelper - { Reference::KindArch::x86_64, lazyImmediateLocation, 1, 0 }, - { Reference::KindArch::x86_64, branch32, 6, 0 }, - - // Stub helper image cache content type - DefinedAtom::typeNonLazyPointer, - - // Stub Helper-Common size and code - 16, - // Stub helper alignment - 2, - { 0x4C, 0x8D, 0x1D, 0x00, 0x00, 0x00, 0x00, // leaq cache(%rip),%r11 - 0x41, 0x53, // push %r11 - 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *binder(%rip) - 0x90 }, // nop - { Reference::KindArch::x86_64, ripRel32, 3, 0 }, - { false, 0, 0, 0 }, - { Reference::KindArch::x86_64, ripRel32, 11, 0 }, - { false, 0, 0, 0 } - -}; - -bool ArchHandler_x86_64::isCallSite(const Reference &ref) { - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return false; - assert(ref.kindArch() == Reference::KindArch::x86_64); - return (ref.kindValue() == branch32); -} - -bool ArchHandler_x86_64::isPointer(const Reference &ref) { - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return false; - assert(ref.kindArch() == Reference::KindArch::x86_64); - Reference::KindValue kind = ref.kindValue(); - return (kind == pointer64 || kind == pointer64Anon); -} - -bool ArchHandler_x86_64::isPairedReloc(const Relocation &reloc) { - return (reloc.type == X86_64_RELOC_SUBTRACTOR); -} - -Reference::KindValue -ArchHandler_x86_64::kindFromReloc(const Relocation &reloc) { - switch(relocPattern(reloc)) { - case X86_64_RELOC_BRANCH | rPcRel | rExtern | rLength4: - return branch32; - case X86_64_RELOC_SIGNED | rPcRel | rExtern | rLength4: - return ripRel32; - case X86_64_RELOC_SIGNED | rPcRel | rLength4: - return ripRel32Anon; - case X86_64_RELOC_SIGNED_1 | rPcRel | rExtern | rLength4: - return ripRel32Minus1; - case X86_64_RELOC_SIGNED_1 | rPcRel | rLength4: - return ripRel32Minus1Anon; - case X86_64_RELOC_SIGNED_2 | rPcRel | rExtern | rLength4: - return ripRel32Minus2; - case X86_64_RELOC_SIGNED_2 | rPcRel | rLength4: - return ripRel32Minus2Anon; - case X86_64_RELOC_SIGNED_4 | rPcRel | rExtern | rLength4: - return ripRel32Minus4; - case X86_64_RELOC_SIGNED_4 | rPcRel | rLength4: - return ripRel32Minus4Anon; - case X86_64_RELOC_GOT_LOAD | rPcRel | rExtern | rLength4: - return ripRel32GotLoad; - case X86_64_RELOC_GOT | rPcRel | rExtern | rLength4: - return ripRel32Got; - case X86_64_RELOC_TLV | rPcRel | rExtern | rLength4: - return ripRel32Tlv; - case X86_64_RELOC_UNSIGNED | rExtern | rLength8: - return pointer64; - case X86_64_RELOC_UNSIGNED | rLength8: - return pointer64Anon; - default: - return invalid; - } -} - -llvm::Error -ArchHandler_x86_64::getReferenceInfo(const Relocation &reloc, - const DefinedAtom *inAtom, - uint32_t offsetInAtom, - uint64_t fixupAddress, bool swap, - FindAtomBySectionAndAddress atomFromAddress, - FindAtomBySymbolIndex atomFromSymbolIndex, - Reference::KindValue *kind, - const lld::Atom **target, - Reference::Addend *addend) { - *kind = kindFromReloc(reloc); - if (*kind == invalid) - return llvm::make_error("unknown type"); - const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; - uint64_t targetAddress; - switch (*kind) { - case branch32: - case ripRel32: - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = *(const little32_t *)fixupContent; - return llvm::Error::success(); - case ripRel32Minus1: - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = (int32_t)*(const little32_t *)fixupContent + 1; - return llvm::Error::success(); - case ripRel32Minus2: - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = (int32_t)*(const little32_t *)fixupContent + 2; - return llvm::Error::success(); - case ripRel32Minus4: - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = (int32_t)*(const little32_t *)fixupContent + 4; - return llvm::Error::success(); - case ripRel32Anon: - targetAddress = fixupAddress + 4 + *(const little32_t *)fixupContent; - return atomFromAddress(reloc.symbol, targetAddress, target, addend); - case ripRel32Minus1Anon: - targetAddress = fixupAddress + 5 + *(const little32_t *)fixupContent; - return atomFromAddress(reloc.symbol, targetAddress, target, addend); - case ripRel32Minus2Anon: - targetAddress = fixupAddress + 6 + *(const little32_t *)fixupContent; - return atomFromAddress(reloc.symbol, targetAddress, target, addend); - case ripRel32Minus4Anon: - targetAddress = fixupAddress + 8 + *(const little32_t *)fixupContent; - return atomFromAddress(reloc.symbol, targetAddress, target, addend); - case ripRel32GotLoad: - case ripRel32Got: - case ripRel32Tlv: - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - *addend = *(const little32_t *)fixupContent; - return llvm::Error::success(); - case tlvInitSectionOffset: - case pointer64: - if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) - return ec; - // If this is the 3rd pointer of a tlv-thunk (i.e. the pointer to the TLV's - // initial value) we need to handle it specially. - if (inAtom->contentType() == DefinedAtom::typeThunkTLV && - offsetInAtom == 16) { - *kind = tlvInitSectionOffset; - assert(*addend == 0 && "TLV-init has non-zero addend?"); - } else - *addend = *(const little64_t *)fixupContent; - return llvm::Error::success(); - case pointer64Anon: - targetAddress = *(const little64_t *)fixupContent; - return atomFromAddress(reloc.symbol, targetAddress, target, addend); - default: - llvm_unreachable("bad reloc kind"); - } -} - -llvm::Error -ArchHandler_x86_64::getPairReferenceInfo(const normalized::Relocation &reloc1, - const normalized::Relocation &reloc2, - const DefinedAtom *inAtom, - uint32_t offsetInAtom, - uint64_t fixupAddress, bool swap, - bool scatterable, - FindAtomBySectionAndAddress atomFromAddress, - FindAtomBySymbolIndex atomFromSymbolIndex, - Reference::KindValue *kind, - const lld::Atom **target, - Reference::Addend *addend) { - const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; - uint64_t targetAddress; - const lld::Atom *fromTarget; - if (auto ec = atomFromSymbolIndex(reloc1.symbol, &fromTarget)) - return ec; - - switch(relocPattern(reloc1) << 16 | relocPattern(reloc2)) { - case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 | - X86_64_RELOC_UNSIGNED | rExtern | rLength8): { - if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) - return ec; - uint64_t encodedAddend = (int64_t)*(const little64_t *)fixupContent; - if (inAtom == fromTarget) { - if (inAtom->contentType() == DefinedAtom::typeCFI) - *kind = unwindFDEToFunction; - else - *kind = delta64; - *addend = encodedAddend + offsetInAtom; - } else if (inAtom == *target) { - *kind = negDelta64; - *addend = encodedAddend - offsetInAtom; - *target = fromTarget; - } else - return llvm::make_error("Invalid pointer diff"); - return llvm::Error::success(); - } - case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 | - X86_64_RELOC_UNSIGNED | rExtern | rLength4): { - if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) - return ec; - uint32_t encodedAddend = (int32_t)*(const little32_t *)fixupContent; - if (inAtom == fromTarget) { - *kind = delta32; - *addend = encodedAddend + offsetInAtom; - } else if (inAtom == *target) { - *kind = negDelta32; - *addend = encodedAddend - offsetInAtom; - *target = fromTarget; - } else - return llvm::make_error("Invalid pointer diff"); - return llvm::Error::success(); - } - case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 | - X86_64_RELOC_UNSIGNED | rLength8): - if (fromTarget != inAtom) - return llvm::make_error("pointer diff not in base atom"); - *kind = delta64Anon; - targetAddress = offsetInAtom + (int64_t)*(const little64_t *)fixupContent; - return atomFromAddress(reloc2.symbol, targetAddress, target, addend); - case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 | - X86_64_RELOC_UNSIGNED | rLength4): - if (fromTarget != inAtom) - return llvm::make_error("pointer diff not in base atom"); - *kind = delta32Anon; - targetAddress = offsetInAtom + (int32_t)*(const little32_t *)fixupContent; - return atomFromAddress(reloc2.symbol, targetAddress, target, addend); - default: - return llvm::make_error("unknown pair"); - } -} - -void ArchHandler_x86_64::generateAtomContent( - const DefinedAtom &atom, bool relocatable, FindAddressForAtom findAddress, - FindAddressForAtom findSectionAddress, uint64_t imageBaseAddress, - llvm::MutableArrayRef atomContentBuffer) { - // Copy raw bytes. - std::copy(atom.rawContent().begin(), atom.rawContent().end(), - atomContentBuffer.begin()); - // Apply fix-ups. - for (const Reference *ref : atom) { - uint32_t offset = ref->offsetInAtom(); - const Atom *target = ref->target(); - uint64_t targetAddress = 0; - if (isa(target)) - targetAddress = findAddress(*target); - uint64_t atomAddress = findAddress(atom); - uint64_t fixupAddress = atomAddress + offset; - if (relocatable) { - applyFixupRelocatable(*ref, &atomContentBuffer[offset], - fixupAddress, targetAddress, - atomAddress); - } else { - applyFixupFinal(*ref, &atomContentBuffer[offset], - fixupAddress, targetAddress, - atomAddress, imageBaseAddress, findSectionAddress); - } - } -} - -void ArchHandler_x86_64::applyFixupFinal( - const Reference &ref, uint8_t *loc, uint64_t fixupAddress, - uint64_t targetAddress, uint64_t inAtomAddress, uint64_t imageBaseAddress, - FindAddressForAtom findSectionAddress) { - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return; - assert(ref.kindArch() == Reference::KindArch::x86_64); - ulittle32_t *loc32 = reinterpret_cast(loc); - ulittle64_t *loc64 = reinterpret_cast(loc); - switch (static_cast(ref.kindValue())) { - case branch32: - case ripRel32: - case ripRel32Anon: - case ripRel32Got: - case ripRel32GotLoad: - case ripRel32Tlv: - *loc32 = targetAddress - (fixupAddress + 4) + ref.addend(); - return; - case pointer64: - case pointer64Anon: - *loc64 = targetAddress + ref.addend(); - return; - case tlvInitSectionOffset: - *loc64 = targetAddress - findSectionAddress(*ref.target()) + ref.addend(); - return; - case ripRel32Minus1: - case ripRel32Minus1Anon: - *loc32 = targetAddress - (fixupAddress + 5) + ref.addend(); - return; - case ripRel32Minus2: - case ripRel32Minus2Anon: - *loc32 = targetAddress - (fixupAddress + 6) + ref.addend(); - return; - case ripRel32Minus4: - case ripRel32Minus4Anon: - *loc32 = targetAddress - (fixupAddress + 8) + ref.addend(); - return; - case delta32: - case delta32Anon: - *loc32 = targetAddress - fixupAddress + ref.addend(); - return; - case delta64: - case delta64Anon: - case unwindFDEToFunction: - *loc64 = targetAddress - fixupAddress + ref.addend(); - return; - case ripRel32GotLoadNowLea: - // Change MOVQ to LEA - assert(loc[-2] == 0x8B); - loc[-2] = 0x8D; - *loc32 = targetAddress - (fixupAddress + 4) + ref.addend(); - return; - case negDelta64: - *loc64 = fixupAddress - targetAddress + ref.addend(); - return; - case negDelta32: - *loc32 = fixupAddress - targetAddress + ref.addend(); - return; - case modeCode: - case modeData: - case lazyPointer: - // Do nothing - return; - case lazyImmediateLocation: - *loc32 = ref.addend(); - return; - case imageOffset: - case imageOffsetGot: - *loc32 = (targetAddress - imageBaseAddress) + ref.addend(); - return; - case unwindInfoToEhFrame: { - uint64_t val = targetAddress - findSectionAddress(*ref.target()) + ref.addend(); - assert(val < 0xffffffU && "offset in __eh_frame too large"); - *loc32 = (*loc32 & 0xff000000U) | val; - return; - } - case invalid: - // Fall into llvm_unreachable(). - break; - } - llvm_unreachable("invalid x86_64 Reference Kind"); -} - -void ArchHandler_x86_64::applyFixupRelocatable(const Reference &ref, - uint8_t *loc, - uint64_t fixupAddress, - uint64_t targetAddress, - uint64_t inAtomAddress) { - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return; - assert(ref.kindArch() == Reference::KindArch::x86_64); - ulittle32_t *loc32 = reinterpret_cast(loc); - ulittle64_t *loc64 = reinterpret_cast(loc); - switch (static_cast(ref.kindValue())) { - case branch32: - case ripRel32: - case ripRel32Got: - case ripRel32GotLoad: - case ripRel32Tlv: - *loc32 = ref.addend(); - return; - case ripRel32Anon: - *loc32 = (targetAddress - (fixupAddress + 4)) + ref.addend(); - return; - case tlvInitSectionOffset: - case pointer64: - *loc64 = ref.addend(); - return; - case pointer64Anon: - *loc64 = targetAddress + ref.addend(); - return; - case ripRel32Minus1: - *loc32 = ref.addend() - 1; - return; - case ripRel32Minus1Anon: - *loc32 = (targetAddress - (fixupAddress + 5)) + ref.addend(); - return; - case ripRel32Minus2: - *loc32 = ref.addend() - 2; - return; - case ripRel32Minus2Anon: - *loc32 = (targetAddress - (fixupAddress + 6)) + ref.addend(); - return; - case ripRel32Minus4: - *loc32 = ref.addend() - 4; - return; - case ripRel32Minus4Anon: - *loc32 = (targetAddress - (fixupAddress + 8)) + ref.addend(); - return; - case delta32: - *loc32 = ref.addend() + inAtomAddress - fixupAddress; - return; - case delta32Anon: - // The value we write here should be the delta to the target - // after taking in to account the difference from the fixup back to the - // last defined label - // ie, if we have: - // _base: ... - // Lfixup: .quad Ltarget - . - // ... - // Ltarget: - // - // Then we want to encode the value (Ltarget + addend) - (LFixup - _base) - *loc32 = (targetAddress + ref.addend()) - (fixupAddress - inAtomAddress); - return; - case delta64: - *loc64 = ref.addend() + inAtomAddress - fixupAddress; - return; - case delta64Anon: - // The value we write here should be the delta to the target - // after taking in to account the difference from the fixup back to the - // last defined label - // ie, if we have: - // _base: ... - // Lfixup: .quad Ltarget - . - // ... - // Ltarget: - // - // Then we want to encode the value (Ltarget + addend) - (LFixup - _base) - *loc64 = (targetAddress + ref.addend()) - (fixupAddress - inAtomAddress); - return; - case negDelta64: - *loc64 = ref.addend() + fixupAddress - inAtomAddress; - return; - case negDelta32: - *loc32 = ref.addend() + fixupAddress - inAtomAddress; - return; - case ripRel32GotLoadNowLea: - llvm_unreachable("ripRel32GotLoadNowLea implies GOT pass was run"); - return; - case lazyPointer: - case lazyImmediateLocation: - llvm_unreachable("lazy reference kind implies Stubs pass was run"); - return; - case imageOffset: - case imageOffsetGot: - case unwindInfoToEhFrame: - llvm_unreachable("fixup implies __unwind_info"); - return; - case modeCode: - case modeData: - case unwindFDEToFunction: - // Do nothing for now - return; - case invalid: - // Fall into llvm_unreachable(). - break; - } - llvm_unreachable("unknown x86_64 Reference Kind"); -} - -void ArchHandler_x86_64::appendSectionRelocations( - const DefinedAtom &atom, - uint64_t atomSectionOffset, - const Reference &ref, - FindSymbolIndexForAtom symbolIndexForAtom, - FindSectionIndexForAtom sectionIndexForAtom, - FindAddressForAtom addressForAtom, - normalized::Relocations &relocs) { - if (ref.kindNamespace() != Reference::KindNamespace::mach_o) - return; - assert(ref.kindArch() == Reference::KindArch::x86_64); - uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); - switch (static_cast(ref.kindValue())) { - case modeCode: - case modeData: - return; - case branch32: - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - X86_64_RELOC_BRANCH | rPcRel | rExtern | rLength4); - return; - case ripRel32: - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - X86_64_RELOC_SIGNED | rPcRel | rExtern | rLength4 ); - return; - case ripRel32Anon: - appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, - X86_64_RELOC_SIGNED | rPcRel | rLength4 ); - return; - case ripRel32Got: - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - X86_64_RELOC_GOT | rPcRel | rExtern | rLength4 ); - return; - case ripRel32GotLoad: - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - X86_64_RELOC_GOT_LOAD | rPcRel | rExtern | rLength4 ); - return; - case ripRel32Tlv: - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - X86_64_RELOC_TLV | rPcRel | rExtern | rLength4 ); - return; - case tlvInitSectionOffset: - case pointer64: - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - X86_64_RELOC_UNSIGNED | rExtern | rLength8); - return; - case pointer64Anon: - appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, - X86_64_RELOC_UNSIGNED | rLength8); - return; - case ripRel32Minus1: - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - X86_64_RELOC_SIGNED_1 | rPcRel | rExtern | rLength4 ); - return; - case ripRel32Minus1Anon: - appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, - X86_64_RELOC_SIGNED_1 | rPcRel | rLength4 ); - return; - case ripRel32Minus2: - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - X86_64_RELOC_SIGNED_2 | rPcRel | rExtern | rLength4 ); - return; - case ripRel32Minus2Anon: - appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, - X86_64_RELOC_SIGNED_2 | rPcRel | rLength4 ); - return; - case ripRel32Minus4: - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - X86_64_RELOC_SIGNED_4 | rPcRel | rExtern | rLength4 ); - return; - case ripRel32Minus4Anon: - appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, - X86_64_RELOC_SIGNED_4 | rPcRel | rLength4 ); - return; - case delta32: - appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, - X86_64_RELOC_SUBTRACTOR | rExtern | rLength4 ); - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - X86_64_RELOC_UNSIGNED | rExtern | rLength4 ); - return; - case delta32Anon: - appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, - X86_64_RELOC_SUBTRACTOR | rExtern | rLength4 ); - appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, - X86_64_RELOC_UNSIGNED | rLength4 ); - return; - case delta64: - appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, - X86_64_RELOC_SUBTRACTOR | rExtern | rLength8 ); - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - X86_64_RELOC_UNSIGNED | rExtern | rLength8 ); - return; - case delta64Anon: - appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, - X86_64_RELOC_SUBTRACTOR | rExtern | rLength8 ); - appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, - X86_64_RELOC_UNSIGNED | rLength8 ); - return; - case unwindFDEToFunction: - case unwindInfoToEhFrame: - return; - case negDelta32: - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - X86_64_RELOC_SUBTRACTOR | rExtern | rLength4 ); - appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, - X86_64_RELOC_UNSIGNED | rExtern | rLength4 ); - return; - case negDelta64: - appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, - X86_64_RELOC_SUBTRACTOR | rExtern | rLength8 ); - appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, - X86_64_RELOC_UNSIGNED | rExtern | rLength8 ); - return; - case ripRel32GotLoadNowLea: - llvm_unreachable("ripRel32GotLoadNowLea implies GOT pass was run"); - return; - case lazyPointer: - case lazyImmediateLocation: - llvm_unreachable("lazy reference kind implies Stubs pass was run"); - return; - case imageOffset: - case imageOffsetGot: - llvm_unreachable("__unwind_info references should have been resolved"); - return; - case invalid: - // Fall into llvm_unreachable(). - break; - } - llvm_unreachable("unknown x86_64 Reference Kind"); -} - -std::unique_ptr ArchHandler::create_x86_64() { - return std::unique_ptr(new ArchHandler_x86_64()); -} - -} // namespace mach_o -} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/Atoms.h b/lld/lib/ReaderWriter/MachO/Atoms.h deleted file mode 100644 index c61aaa88e8df..000000000000 --- a/lld/lib/ReaderWriter/MachO/Atoms.h +++ /dev/null @@ -1,180 +0,0 @@ -//===- lib/ReaderWriter/MachO/Atoms.h ---------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_READER_WRITER_MACHO_ATOMS_H -#define LLD_READER_WRITER_MACHO_ATOMS_H - -#include "lld/Core/Atom.h" -#include "lld/Core/DefinedAtom.h" -#include "lld/Core/SharedLibraryAtom.h" -#include "lld/Core/Simple.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" -#include -#include - -namespace lld { - -class File; - -namespace mach_o { - -class MachODefinedAtom : public SimpleDefinedAtom { -public: - MachODefinedAtom(const File &f, const StringRef name, Scope scope, - ContentType type, Merge merge, bool thumb, bool noDeadStrip, - const ArrayRef content, Alignment align) - : SimpleDefinedAtom(f), _name(name), _content(content), - _align(align), _contentType(type), _scope(scope), _merge(merge), - _thumb(thumb), _noDeadStrip(noDeadStrip) {} - - // Constructor for zero-fill content - MachODefinedAtom(const File &f, const StringRef name, Scope scope, - ContentType type, uint64_t size, bool noDeadStrip, - Alignment align) - : SimpleDefinedAtom(f), _name(name), - _content(ArrayRef(nullptr, size)), _align(align), - _contentType(type), _scope(scope), _merge(mergeNo), _thumb(false), - _noDeadStrip(noDeadStrip) {} - - ~MachODefinedAtom() override = default; - - uint64_t size() const override { return _content.size(); } - - ContentType contentType() const override { return _contentType; } - - Alignment alignment() const override { return _align; } - - StringRef name() const override { return _name; } - - Scope scope() const override { return _scope; } - - Merge merge() const override { return _merge; } - - DeadStripKind deadStrip() const override { - if (_contentType == DefinedAtom::typeInitializerPtr) - return deadStripNever; - if (_contentType == DefinedAtom::typeTerminatorPtr) - return deadStripNever; - if (_noDeadStrip) - return deadStripNever; - return deadStripNormal; - } - - ArrayRef rawContent() const override { - // Note: Zerofill atoms have a content pointer which is null. - return _content; - } - - bool isThumb() const { return _thumb; } - -private: - const StringRef _name; - const ArrayRef _content; - const DefinedAtom::Alignment _align; - const ContentType _contentType; - const Scope _scope; - const Merge _merge; - const bool _thumb; - const bool _noDeadStrip; -}; - -class MachODefinedCustomSectionAtom : public MachODefinedAtom { -public: - MachODefinedCustomSectionAtom(const File &f, const StringRef name, - Scope scope, ContentType type, Merge merge, - bool thumb, bool noDeadStrip, - const ArrayRef content, - StringRef sectionName, Alignment align) - : MachODefinedAtom(f, name, scope, type, merge, thumb, noDeadStrip, - content, align), - _sectionName(sectionName) {} - - ~MachODefinedCustomSectionAtom() override = default; - - SectionChoice sectionChoice() const override { - return DefinedAtom::sectionCustomRequired; - } - - StringRef customSectionName() const override { - return _sectionName; - } -private: - StringRef _sectionName; -}; - -class MachOTentativeDefAtom : public SimpleDefinedAtom { -public: - MachOTentativeDefAtom(const File &f, const StringRef name, Scope scope, - uint64_t size, DefinedAtom::Alignment align) - : SimpleDefinedAtom(f), _name(std::string(name)), _scope(scope), - _size(size), _align(align) {} - - ~MachOTentativeDefAtom() override = default; - - uint64_t size() const override { return _size; } - - Merge merge() const override { return DefinedAtom::mergeAsTentative; } - - ContentType contentType() const override { return DefinedAtom::typeZeroFill; } - - Alignment alignment() const override { return _align; } - - StringRef name() const override { return _name; } - - Scope scope() const override { return _scope; } - - ArrayRef rawContent() const override { return ArrayRef(); } - -private: - const std::string _name; - const Scope _scope; - const uint64_t _size; - const DefinedAtom::Alignment _align; -}; - -class MachOSharedLibraryAtom : public SharedLibraryAtom { -public: - MachOSharedLibraryAtom(const File &file, StringRef name, - StringRef dylibInstallName, bool weakDef) - : SharedLibraryAtom(), _file(file), _name(name), - _dylibInstallName(dylibInstallName) {} - ~MachOSharedLibraryAtom() override = default; - - StringRef loadName() const override { return _dylibInstallName; } - - bool canBeNullAtRuntime() const override { - // FIXME: this may actually be changeable. For now, all symbols are strongly - // defined though. - return false; - } - - const File &file() const override { return _file; } - - StringRef name() const override { return _name; } - - Type type() const override { - // Unused in MachO (I think). - return Type::Unknown; - } - - uint64_t size() const override { - // Unused in MachO (I think) - return 0; - } - -private: - const File &_file; - StringRef _name; - StringRef _dylibInstallName; -}; - -} // end namespace mach_o -} // end namespace lld - -#endif // LLD_READER_WRITER_MACHO_ATOMS_H diff --git a/lld/lib/ReaderWriter/MachO/CMakeLists.txt b/lld/lib/ReaderWriter/MachO/CMakeLists.txt deleted file mode 100644 index c3e2497b8c9e..000000000000 --- a/lld/lib/ReaderWriter/MachO/CMakeLists.txt +++ /dev/null @@ -1,36 +0,0 @@ -add_lld_library(lldMachOOld - ArchHandler.cpp - ArchHandler_arm.cpp - ArchHandler_arm64.cpp - ArchHandler_x86.cpp - ArchHandler_x86_64.cpp - CompactUnwindPass.cpp - GOTPass.cpp - LayoutPass.cpp - MachOLinkingContext.cpp - MachONormalizedFileBinaryReader.cpp - MachONormalizedFileBinaryWriter.cpp - MachONormalizedFileFromAtoms.cpp - MachONormalizedFileToAtoms.cpp - MachONormalizedFileYAML.cpp - ObjCPass.cpp - ShimPass.cpp - StubsPass.cpp - TLVPass.cpp - WriterMachO.cpp - - LINK_COMPONENTS - DebugInfoDWARF - Demangle - Object - Support - TextAPI - - LINK_LIBS - lldCommon - lldCore - lldYAML - ${LLVM_PTHREAD_LIB} - ) - -include_directories(.) diff --git a/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp b/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp deleted file mode 100644 index f3636feb217b..000000000000 --- a/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp +++ /dev/null @@ -1,580 +0,0 @@ -//===- lib/ReaderWriter/MachO/CompactUnwindPass.cpp -------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file A pass to convert MachO's __compact_unwind sections into the final -/// __unwind_info format used during runtime. See -/// mach-o/compact_unwind_encoding.h for more details on the formats involved. -/// -//===----------------------------------------------------------------------===// - -#include "ArchHandler.h" -#include "File.h" -#include "MachONormalizedFileBinaryUtils.h" -#include "MachOPasses.h" -#include "lld/Common/LLVM.h" -#include "lld/Core/DefinedAtom.h" -#include "lld/Core/File.h" -#include "lld/Core/Reference.h" -#include "lld/Core/Simple.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" - -#define DEBUG_TYPE "macho-compact-unwind" - -namespace lld { -namespace mach_o { - -namespace { -struct CompactUnwindEntry { - const Atom *rangeStart; - const Atom *personalityFunction; - const Atom *lsdaLocation; - const Atom *ehFrame; - - uint32_t rangeLength; - - // There are 3 types of compact unwind entry, distinguished by the encoding - // value: 0 indicates a function with no unwind info; - // _archHandler.dwarfCompactUnwindType() indicates that the entry defers to - // __eh_frame, and that the ehFrame entry will be valid; any other value is a - // real compact unwind entry -- personalityFunction will be set and - // lsdaLocation may be. - uint32_t encoding; - - CompactUnwindEntry(const DefinedAtom *function) - : rangeStart(function), personalityFunction(nullptr), - lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(function->size()), - encoding(0) {} - - CompactUnwindEntry() - : rangeStart(nullptr), personalityFunction(nullptr), - lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(0), encoding(0) {} -}; - -struct UnwindInfoPage { - ArrayRef entries; -}; -} - -class UnwindInfoAtom : public SimpleDefinedAtom { -public: - UnwindInfoAtom(ArchHandler &archHandler, const File &file, bool isBig, - std::vector &personalities, - std::vector &commonEncodings, - std::vector &pages, uint32_t numLSDAs) - : SimpleDefinedAtom(file), _archHandler(archHandler), - _commonEncodingsOffset(7 * sizeof(uint32_t)), - _personalityArrayOffset(_commonEncodingsOffset + - commonEncodings.size() * sizeof(uint32_t)), - _topLevelIndexOffset(_personalityArrayOffset + - personalities.size() * sizeof(uint32_t)), - _lsdaIndexOffset(_topLevelIndexOffset + - 3 * (pages.size() + 1) * sizeof(uint32_t)), - _firstPageOffset(_lsdaIndexOffset + 2 * numLSDAs * sizeof(uint32_t)), - _isBig(isBig) { - - addHeader(commonEncodings.size(), personalities.size(), pages.size()); - addCommonEncodings(commonEncodings); - addPersonalityFunctions(personalities); - addTopLevelIndexes(pages); - addLSDAIndexes(pages, numLSDAs); - addSecondLevelPages(pages); - } - - ~UnwindInfoAtom() override = default; - - ContentType contentType() const override { - return DefinedAtom::typeProcessedUnwindInfo; - } - - Alignment alignment() const override { return 4; } - - uint64_t size() const override { return _contents.size(); } - - ContentPermissions permissions() const override { - return DefinedAtom::permR__; - } - - ArrayRef rawContent() const override { return _contents; } - - void addHeader(uint32_t numCommon, uint32_t numPersonalities, - uint32_t numPages) { - using normalized::write32; - - uint32_t headerSize = 7 * sizeof(uint32_t); - _contents.resize(headerSize); - - uint8_t *headerEntries = _contents.data(); - // version - write32(headerEntries, 1, _isBig); - // commonEncodingsArraySectionOffset - write32(headerEntries + sizeof(uint32_t), _commonEncodingsOffset, _isBig); - // commonEncodingsArrayCount - write32(headerEntries + 2 * sizeof(uint32_t), numCommon, _isBig); - // personalityArraySectionOffset - write32(headerEntries + 3 * sizeof(uint32_t), _personalityArrayOffset, - _isBig); - // personalityArrayCount - write32(headerEntries + 4 * sizeof(uint32_t), numPersonalities, _isBig); - // indexSectionOffset - write32(headerEntries + 5 * sizeof(uint32_t), _topLevelIndexOffset, _isBig); - // indexCount - write32(headerEntries + 6 * sizeof(uint32_t), numPages + 1, _isBig); - } - - /// Add the list of common encodings to the section; this is simply an array - /// of uint32_t compact values. Size has already been specified in the header. - void addCommonEncodings(std::vector &commonEncodings) { - using normalized::write32; - - _contents.resize(_commonEncodingsOffset + - commonEncodings.size() * sizeof(uint32_t)); - uint8_t *commonEncodingsArea = - reinterpret_cast(_contents.data() + _commonEncodingsOffset); - - for (uint32_t encoding : commonEncodings) { - write32(commonEncodingsArea, encoding, _isBig); - commonEncodingsArea += sizeof(uint32_t); - } - } - - void addPersonalityFunctions(std::vector personalities) { - _contents.resize(_personalityArrayOffset + - personalities.size() * sizeof(uint32_t)); - - for (unsigned i = 0; i < personalities.size(); ++i) - addImageReferenceIndirect(_personalityArrayOffset + i * sizeof(uint32_t), - personalities[i]); - } - - void addTopLevelIndexes(std::vector &pages) { - using normalized::write32; - - uint32_t numIndexes = pages.size() + 1; - _contents.resize(_topLevelIndexOffset + numIndexes * 3 * sizeof(uint32_t)); - - uint32_t pageLoc = _firstPageOffset; - - // The most difficult job here is calculating the LSDAs; everything else - // follows fairly naturally, but we can't state where the first - uint8_t *indexData = &_contents[_topLevelIndexOffset]; - uint32_t numLSDAs = 0; - for (unsigned i = 0; i < pages.size(); ++i) { - // functionOffset - addImageReference(_topLevelIndexOffset + 3 * i * sizeof(uint32_t), - pages[i].entries[0].rangeStart); - // secondLevelPagesSectionOffset - write32(indexData + (3 * i + 1) * sizeof(uint32_t), pageLoc, _isBig); - write32(indexData + (3 * i + 2) * sizeof(uint32_t), - _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig); - - for (auto &entry : pages[i].entries) - if (entry.lsdaLocation) - ++numLSDAs; - } - - // Finally, write out the final sentinel index - auto &finalEntry = pages[pages.size() - 1].entries.back(); - addImageReference(_topLevelIndexOffset + - 3 * pages.size() * sizeof(uint32_t), - finalEntry.rangeStart, finalEntry.rangeLength); - // secondLevelPagesSectionOffset => 0 - write32(indexData + (3 * pages.size() + 2) * sizeof(uint32_t), - _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig); - } - - void addLSDAIndexes(std::vector &pages, uint32_t numLSDAs) { - _contents.resize(_lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t)); - - uint32_t curOffset = _lsdaIndexOffset; - for (auto &page : pages) { - for (auto &entry : page.entries) { - if (!entry.lsdaLocation) - continue; - - addImageReference(curOffset, entry.rangeStart); - addImageReference(curOffset + sizeof(uint32_t), entry.lsdaLocation); - curOffset += 2 * sizeof(uint32_t); - } - } - } - - void addSecondLevelPages(std::vector &pages) { - for (auto &page : pages) { - addRegularSecondLevelPage(page); - } - } - - void addRegularSecondLevelPage(const UnwindInfoPage &page) { - uint32_t curPageOffset = _contents.size(); - const int16_t headerSize = sizeof(uint32_t) + 2 * sizeof(uint16_t); - uint32_t curPageSize = - headerSize + 2 * page.entries.size() * sizeof(uint32_t); - _contents.resize(curPageOffset + curPageSize); - - using normalized::write32; - using normalized::write16; - // 2 => regular page - write32(&_contents[curPageOffset], 2, _isBig); - // offset of 1st entry - write16(&_contents[curPageOffset + 4], headerSize, _isBig); - write16(&_contents[curPageOffset + 6], page.entries.size(), _isBig); - - uint32_t pagePos = curPageOffset + headerSize; - for (auto &entry : page.entries) { - addImageReference(pagePos, entry.rangeStart); - - write32(_contents.data() + pagePos + sizeof(uint32_t), entry.encoding, - _isBig); - if ((entry.encoding & 0x0f000000U) == - _archHandler.dwarfCompactUnwindType()) - addEhFrameReference(pagePos + sizeof(uint32_t), entry.ehFrame); - - pagePos += 2 * sizeof(uint32_t); - } - } - - void addEhFrameReference(uint32_t offset, const Atom *dest, - Reference::Addend addend = 0) { - addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), - _archHandler.unwindRefToEhFrameKind(), offset, dest, addend); - } - - void addImageReference(uint32_t offset, const Atom *dest, - Reference::Addend addend = 0) { - addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), - _archHandler.imageOffsetKind(), offset, dest, addend); - } - - void addImageReferenceIndirect(uint32_t offset, const Atom *dest) { - addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), - _archHandler.imageOffsetKindIndirect(), offset, dest, 0); - } - -private: - mach_o::ArchHandler &_archHandler; - std::vector _contents; - uint32_t _commonEncodingsOffset; - uint32_t _personalityArrayOffset; - uint32_t _topLevelIndexOffset; - uint32_t _lsdaIndexOffset; - uint32_t _firstPageOffset; - bool _isBig; -}; - -/// Pass for instantiating and optimizing GOT slots. -/// -class CompactUnwindPass : public Pass { -public: - CompactUnwindPass(const MachOLinkingContext &context) - : _ctx(context), _archHandler(_ctx.archHandler()), - _file(*_ctx.make_file("")), - _isBig(MachOLinkingContext::isBigEndian(_ctx.arch())) { - _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); - } - -private: - llvm::Error perform(SimpleFile &mergedFile) override { - LLVM_DEBUG(llvm::dbgs() << "MachO Compact Unwind pass\n"); - - std::map unwindLocs; - std::map dwarfFrames; - std::vector personalities; - uint32_t numLSDAs = 0; - - // First collect all __compact_unwind and __eh_frame entries, addressable by - // the function referred to. - collectCompactUnwindEntries(mergedFile, unwindLocs, personalities, - numLSDAs); - - collectDwarfFrameEntries(mergedFile, dwarfFrames); - - // Skip rest of pass if no unwind info. - if (unwindLocs.empty() && dwarfFrames.empty()) - return llvm::Error::success(); - - // FIXME: if there are more than 4 personality functions then we need to - // defer to DWARF info for the ones we don't put in the list. They should - // also probably be sorted by frequency. - assert(personalities.size() <= 4); - - // TODO: Find common encodings for use by compressed pages. - std::vector commonEncodings; - - // Now sort the entries by final address and fixup the compact encoding to - // its final form (i.e. set personality function bits & create DWARF - // references where needed). - std::vector unwindInfos = createUnwindInfoEntries( - mergedFile, unwindLocs, personalities, dwarfFrames); - - // Remove any unused eh-frame atoms. - pruneUnusedEHFrames(mergedFile, unwindInfos, unwindLocs, dwarfFrames); - - // Finally, we can start creating pages based on these entries. - - LLVM_DEBUG(llvm::dbgs() << " Splitting entries into pages\n"); - // FIXME: we split the entries into pages naively: lots of 4k pages followed - // by a small one. ld64 tried to minimize space and align them to real 4k - // boundaries. That might be worth doing, or perhaps we could perform some - // minor balancing for expected number of lookups. - std::vector pages; - auto remainingInfos = llvm::makeArrayRef(unwindInfos); - do { - pages.push_back(UnwindInfoPage()); - - // FIXME: we only create regular pages at the moment. These can hold up to - // 1021 entries according to the documentation. - unsigned entriesInPage = std::min(1021U, (unsigned)remainingInfos.size()); - - pages.back().entries = remainingInfos.slice(0, entriesInPage); - remainingInfos = remainingInfos.slice(entriesInPage); - - LLVM_DEBUG(llvm::dbgs() - << " Page from " - << pages.back().entries[0].rangeStart->name() << " to " - << pages.back().entries.back().rangeStart->name() << " + " - << llvm::format("0x%x", - pages.back().entries.back().rangeLength) - << " has " << entriesInPage << " entries\n"); - } while (!remainingInfos.empty()); - - auto *unwind = new (_file.allocator()) - UnwindInfoAtom(_archHandler, _file, _isBig, personalities, - commonEncodings, pages, numLSDAs); - mergedFile.addAtom(*unwind); - - // Finally, remove all __compact_unwind atoms now that we've processed them. - mergedFile.removeDefinedAtomsIf([](const DefinedAtom *atom) { - return atom->contentType() == DefinedAtom::typeCompactUnwindInfo; - }); - - return llvm::Error::success(); - } - - void collectCompactUnwindEntries( - const SimpleFile &mergedFile, - std::map &unwindLocs, - std::vector &personalities, uint32_t &numLSDAs) { - LLVM_DEBUG(llvm::dbgs() << " Collecting __compact_unwind entries\n"); - - for (const DefinedAtom *atom : mergedFile.defined()) { - if (atom->contentType() != DefinedAtom::typeCompactUnwindInfo) - continue; - - auto unwindEntry = extractCompactUnwindEntry(atom); - unwindLocs.insert(std::make_pair(unwindEntry.rangeStart, unwindEntry)); - - LLVM_DEBUG(llvm::dbgs() << " Entry for " - << unwindEntry.rangeStart->name() << ", encoding=" - << llvm::format("0x%08x", unwindEntry.encoding)); - if (unwindEntry.personalityFunction) - LLVM_DEBUG(llvm::dbgs() - << ", personality=" - << unwindEntry.personalityFunction->name() - << ", lsdaLoc=" << unwindEntry.lsdaLocation->name()); - LLVM_DEBUG(llvm::dbgs() << '\n'); - - // Count number of LSDAs we see, since we need to know how big the index - // will be while laying out the section. - if (unwindEntry.lsdaLocation) - ++numLSDAs; - - // Gather the personality functions now, so that they're in deterministic - // order (derived from the DefinedAtom order). - if (unwindEntry.personalityFunction && - !llvm::count(personalities, unwindEntry.personalityFunction)) - personalities.push_back(unwindEntry.personalityFunction); - } - } - - CompactUnwindEntry extractCompactUnwindEntry(const DefinedAtom *atom) { - CompactUnwindEntry entry; - - for (const Reference *ref : *atom) { - switch (ref->offsetInAtom()) { - case 0: - // FIXME: there could legitimately be functions with multiple encoding - // entries. However, nothing produces them at the moment. - assert(ref->addend() == 0 && "unexpected offset into function"); - entry.rangeStart = ref->target(); - break; - case 0x10: - assert(ref->addend() == 0 && "unexpected offset into personality fn"); - entry.personalityFunction = ref->target(); - break; - case 0x18: - assert(ref->addend() == 0 && "unexpected offset into LSDA atom"); - entry.lsdaLocation = ref->target(); - break; - } - } - - if (atom->rawContent().size() < 4 * sizeof(uint32_t)) - return entry; - - using normalized::read32; - entry.rangeLength = - read32(atom->rawContent().data() + 2 * sizeof(uint32_t), _isBig); - entry.encoding = - read32(atom->rawContent().data() + 3 * sizeof(uint32_t), _isBig); - return entry; - } - - void - collectDwarfFrameEntries(const SimpleFile &mergedFile, - std::map &dwarfFrames) { - for (const DefinedAtom *ehFrameAtom : mergedFile.defined()) { - if (ehFrameAtom->contentType() != DefinedAtom::typeCFI) - continue; - if (ArchHandler::isDwarfCIE(_isBig, ehFrameAtom)) - continue; - - if (const Atom *function = _archHandler.fdeTargetFunction(ehFrameAtom)) - dwarfFrames[function] = ehFrameAtom; - } - } - - /// Every atom defined in __TEXT,__text needs an entry in the final - /// __unwind_info section (in order). These comes from two sources: - /// + Input __compact_unwind sections where possible (after adding the - /// personality function offset which is only known now). - /// + A synthesised reference to __eh_frame if there's no __compact_unwind - /// or too many personality functions to be accommodated. - std::vector createUnwindInfoEntries( - const SimpleFile &mergedFile, - const std::map &unwindLocs, - const std::vector &personalities, - const std::map &dwarfFrames) { - std::vector unwindInfos; - - LLVM_DEBUG(llvm::dbgs() << " Creating __unwind_info entries\n"); - // The final order in the __unwind_info section must be derived from the - // order of typeCode atoms, since that's how they'll be put into the object - // file eventually (yuck!). - for (const DefinedAtom *atom : mergedFile.defined()) { - if (atom->contentType() != DefinedAtom::typeCode) - continue; - - unwindInfos.push_back(finalizeUnwindInfoEntryForAtom( - atom, unwindLocs, personalities, dwarfFrames)); - - LLVM_DEBUG(llvm::dbgs() - << " Entry for " << atom->name() << ", final encoding=" - << llvm::format("0x%08x", unwindInfos.back().encoding) - << '\n'); - } - - return unwindInfos; - } - - /// Remove unused EH frames. - /// - /// An EH frame is considered unused if there is a corresponding compact - /// unwind atom that doesn't require the EH frame. - void pruneUnusedEHFrames( - SimpleFile &mergedFile, - const std::vector &unwindInfos, - const std::map &unwindLocs, - const std::map &dwarfFrames) { - - // Worklist of all 'used' FDEs. - std::vector usedDwarfWorklist; - - // We have to check two conditions when building the worklist: - // (1) EH frames used by compact unwind entries. - for (auto &entry : unwindInfos) - if (entry.ehFrame) - usedDwarfWorklist.push_back(cast(entry.ehFrame)); - - // (2) EH frames that reference functions with no corresponding compact - // unwind info. - for (auto &entry : dwarfFrames) - if (!unwindLocs.count(entry.first)) - usedDwarfWorklist.push_back(cast(entry.second)); - - // Add all transitively referenced CFI atoms by processing the worklist. - std::set usedDwarfFrames; - while (!usedDwarfWorklist.empty()) { - const DefinedAtom *cfiAtom = usedDwarfWorklist.back(); - usedDwarfWorklist.pop_back(); - usedDwarfFrames.insert(cfiAtom); - for (const auto *ref : *cfiAtom) { - const DefinedAtom *cfiTarget = dyn_cast(ref->target()); - if (cfiTarget->contentType() == DefinedAtom::typeCFI) - usedDwarfWorklist.push_back(cfiTarget); - } - } - - // Finally, delete all unreferenced CFI atoms. - mergedFile.removeDefinedAtomsIf([&](const DefinedAtom *atom) { - if ((atom->contentType() == DefinedAtom::typeCFI) && - !usedDwarfFrames.count(atom)) - return true; - return false; - }); - } - - CompactUnwindEntry finalizeUnwindInfoEntryForAtom( - const DefinedAtom *function, - const std::map &unwindLocs, - const std::vector &personalities, - const std::map &dwarfFrames) { - auto unwindLoc = unwindLocs.find(function); - - CompactUnwindEntry entry; - if (unwindLoc == unwindLocs.end()) { - // Default entry has correct encoding (0 => no unwind), but we need to - // synthesise the function. - entry.rangeStart = function; - entry.rangeLength = function->size(); - } else - entry = unwindLoc->second; - - - // If there's no __compact_unwind entry, or it explicitly says to use - // __eh_frame, we need to try and fill in the correct DWARF atom. - if (entry.encoding == _archHandler.dwarfCompactUnwindType() || - entry.encoding == 0) { - auto dwarfFrame = dwarfFrames.find(function); - if (dwarfFrame != dwarfFrames.end()) { - entry.encoding = _archHandler.dwarfCompactUnwindType(); - entry.ehFrame = dwarfFrame->second; - } - } - - auto personality = llvm::find(personalities, entry.personalityFunction); - uint32_t personalityIdx = personality == personalities.end() - ? 0 - : personality - personalities.begin() + 1; - - // FIXME: We should also use DWARF when there isn't enough room for the - // personality function in the compact encoding. - assert(personalityIdx < 4 && "too many personality functions"); - - entry.encoding |= personalityIdx << 28; - - if (entry.lsdaLocation) - entry.encoding |= 1U << 30; - - return entry; - } - - const MachOLinkingContext &_ctx; - mach_o::ArchHandler &_archHandler; - MachOFile &_file; - bool _isBig; -}; - -void addCompactUnwindPass(PassManager &pm, const MachOLinkingContext &ctx) { - assert(ctx.needsCompactUnwindPass()); - pm.add(std::make_unique(ctx)); -} - -} // end namespace mach_o -} // end namespace lld diff --git a/lld/lib/ReaderWriter/MachO/DebugInfo.h b/lld/lib/ReaderWriter/MachO/DebugInfo.h deleted file mode 100644 index 591dd1ebad86..000000000000 --- a/lld/lib/ReaderWriter/MachO/DebugInfo.h +++ /dev/null @@ -1,106 +0,0 @@ -//===- lib/ReaderWriter/MachO/File.h ----------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_READER_WRITER_MACHO_DEBUGINFO_H -#define LLD_READER_WRITER_MACHO_DEBUGINFO_H - -#include "lld/Core/Atom.h" -#include - -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" - - -namespace lld { -namespace mach_o { - -class DebugInfo { -public: - enum class Kind { - Dwarf, - Stabs - }; - - Kind kind() const { return _kind; } - - void setAllocator(std::unique_ptr allocator) { - _allocator = std::move(allocator); - } - -protected: - DebugInfo(Kind kind) : _kind(kind) {} - -private: - std::unique_ptr _allocator; - Kind _kind; -}; - -struct TranslationUnitSource { - StringRef name; - StringRef path; -}; - -class DwarfDebugInfo : public DebugInfo { -public: - DwarfDebugInfo(TranslationUnitSource tu) - : DebugInfo(Kind::Dwarf), _tu(std::move(tu)) {} - - static inline bool classof(const DebugInfo *di) { - return di->kind() == Kind::Dwarf; - } - - const TranslationUnitSource &translationUnitSource() const { return _tu; } - -private: - TranslationUnitSource _tu; -}; - -struct Stab { - Stab(const Atom* atom, uint8_t type, uint8_t other, uint16_t desc, - uint32_t value, StringRef str) - : atom(atom), type(type), other(other), desc(desc), value(value), - str(str) {} - - const class Atom* atom; - uint8_t type; - uint8_t other; - uint16_t desc; - uint32_t value; - StringRef str; -}; - -inline raw_ostream& operator<<(raw_ostream &os, Stab &s) { - os << "Stab -- atom: " << llvm::format("%p", s.atom) << ", type: " << (uint32_t)s.type - << ", other: " << (uint32_t)s.other << ", desc: " << s.desc << ", value: " << s.value - << ", str: '" << s.str << "'"; - return os; -} - -class StabsDebugInfo : public DebugInfo { -public: - - typedef std::vector StabsList; - - StabsDebugInfo(StabsList stabs) - : DebugInfo(Kind::Stabs), _stabs(std::move(stabs)) {} - - static inline bool classof(const DebugInfo *di) { - return di->kind() == Kind::Stabs; - } - - const StabsList& stabs() const { return _stabs; } - -public: - StabsList _stabs; -}; - -} // end namespace mach_o -} // end namespace lld - -#endif // LLD_READER_WRITER_MACHO_DEBUGINFO_H diff --git a/lld/lib/ReaderWriter/MachO/ExecutableAtoms.h b/lld/lib/ReaderWriter/MachO/ExecutableAtoms.h deleted file mode 100644 index ce94be457026..000000000000 --- a/lld/lib/ReaderWriter/MachO/ExecutableAtoms.h +++ /dev/null @@ -1,154 +0,0 @@ -//===- lib/ReaderWriter/MachO/ExecutableAtoms.h ---------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_READER_WRITER_MACHO_EXECUTABLE_ATOMS_H -#define LLD_READER_WRITER_MACHO_EXECUTABLE_ATOMS_H - -#include "Atoms.h" -#include "File.h" - -#include "llvm/BinaryFormat/MachO.h" - -#include "lld/Core/DefinedAtom.h" -#include "lld/Core/File.h" -#include "lld/Core/LinkingContext.h" -#include "lld/Core/Reference.h" -#include "lld/Core/Simple.h" -#include "lld/Core/UndefinedAtom.h" -#include "lld/ReaderWriter/MachOLinkingContext.h" - -namespace lld { -namespace mach_o { - - -// -// CEntryFile adds an UndefinedAtom for "_main" so that the Resolving -// phase will fail if "_main" is undefined. -// -class CEntryFile : public SimpleFile { -public: - CEntryFile(const MachOLinkingContext &context) - : SimpleFile("C entry", kindCEntryObject), - _undefMain(*this, context.entrySymbolName()) { - this->addAtom(_undefMain); - } - -private: - SimpleUndefinedAtom _undefMain; -}; - - -// -// StubHelperFile adds an UndefinedAtom for "dyld_stub_binder" so that -// the Resolveing phase will fail if "dyld_stub_binder" is undefined. -// -class StubHelperFile : public SimpleFile { -public: - StubHelperFile(const MachOLinkingContext &context) - : SimpleFile("stub runtime", kindStubHelperObject), - _undefBinder(*this, context.binderSymbolName()) { - this->addAtom(_undefBinder); - } - -private: - SimpleUndefinedAtom _undefBinder; -}; - - -// -// MachHeaderAliasFile lazily instantiates the magic symbols that mark the start -// of the mach_header for final linked images. -// -class MachHeaderAliasFile : public SimpleFile { -public: - MachHeaderAliasFile(const MachOLinkingContext &context) - : SimpleFile("mach_header symbols", kindHeaderObject) { - StringRef machHeaderSymbolName; - DefinedAtom::Scope symbolScope = DefinedAtom::scopeLinkageUnit; - StringRef dsoHandleName; - switch (context.outputMachOType()) { - case llvm::MachO::MH_OBJECT: - machHeaderSymbolName = "__mh_object_header"; - break; - case llvm::MachO::MH_EXECUTE: - machHeaderSymbolName = "__mh_execute_header"; - symbolScope = DefinedAtom::scopeGlobal; - dsoHandleName = "___dso_handle"; - break; - case llvm::MachO::MH_FVMLIB: - llvm_unreachable("no mach_header symbol for file type"); - case llvm::MachO::MH_CORE: - llvm_unreachable("no mach_header symbol for file type"); - case llvm::MachO::MH_PRELOAD: - llvm_unreachable("no mach_header symbol for file type"); - case llvm::MachO::MH_DYLIB: - machHeaderSymbolName = "__mh_dylib_header"; - dsoHandleName = "___dso_handle"; - break; - case llvm::MachO::MH_DYLINKER: - machHeaderSymbolName = "__mh_dylinker_header"; - dsoHandleName = "___dso_handle"; - break; - case llvm::MachO::MH_BUNDLE: - machHeaderSymbolName = "__mh_bundle_header"; - dsoHandleName = "___dso_handle"; - break; - case llvm::MachO::MH_DYLIB_STUB: - llvm_unreachable("no mach_header symbol for file type"); - case llvm::MachO::MH_DSYM: - llvm_unreachable("no mach_header symbol for file type"); - case llvm::MachO::MH_KEXT_BUNDLE: - dsoHandleName = "___dso_handle"; - break; - } - if (!machHeaderSymbolName.empty()) - _definedAtoms.push_back(new (allocator()) MachODefinedAtom( - *this, machHeaderSymbolName, symbolScope, - DefinedAtom::typeMachHeader, DefinedAtom::mergeNo, false, - true /* noDeadStrip */, - ArrayRef(), DefinedAtom::Alignment(4096))); - - if (!dsoHandleName.empty()) - _definedAtoms.push_back(new (allocator()) MachODefinedAtom( - *this, dsoHandleName, DefinedAtom::scopeLinkageUnit, - DefinedAtom::typeDSOHandle, DefinedAtom::mergeNo, false, - true /* noDeadStrip */, - ArrayRef(), DefinedAtom::Alignment(1))); - } - - const AtomRange defined() const override { - return _definedAtoms; - } - const AtomRange undefined() const override { - return _noUndefinedAtoms; - } - - const AtomRange sharedLibrary() const override { - return _noSharedLibraryAtoms; - } - - const AtomRange absolute() const override { - return _noAbsoluteAtoms; - } - - void clearAtoms() override { - _definedAtoms.clear(); - _noUndefinedAtoms.clear(); - _noSharedLibraryAtoms.clear(); - _noAbsoluteAtoms.clear(); - } - - -private: - mutable AtomVector _definedAtoms; -}; - -} // namespace mach_o -} // namespace lld - -#endif // LLD_READER_WRITER_MACHO_EXECUTABLE_ATOMS_H diff --git a/lld/lib/ReaderWriter/MachO/File.h b/lld/lib/ReaderWriter/MachO/File.h deleted file mode 100644 index 77832969c6b3..000000000000 --- a/lld/lib/ReaderWriter/MachO/File.h +++ /dev/null @@ -1,467 +0,0 @@ -//===- lib/ReaderWriter/MachO/File.h ----------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_READER_WRITER_MACHO_FILE_H -#define LLD_READER_WRITER_MACHO_FILE_H - -#include "Atoms.h" -#include "DebugInfo.h" -#include "MachONormalizedFile.h" -#include "lld/Core/SharedLibraryFile.h" -#include "lld/Core/Simple.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/Support/Format.h" -#include "llvm/TextAPI/InterfaceFile.h" -#include "llvm/TextAPI/TextAPIReader.h" -#include - -namespace lld { -namespace mach_o { - -using lld::mach_o::normalized::Section; - -class MachOFile : public SimpleFile { -public: - - /// Real file constructor - for on-disk files. - MachOFile(std::unique_ptr mb, MachOLinkingContext *ctx) - : SimpleFile(mb->getBufferIdentifier(), File::kindMachObject), - _mb(std::move(mb)), _ctx(ctx) {} - - /// Dummy file constructor - for virtual files. - MachOFile(StringRef path) - : SimpleFile(path, File::kindMachObject) {} - - void addDefinedAtom(StringRef name, Atom::Scope scope, - DefinedAtom::ContentType type, DefinedAtom::Merge merge, - uint64_t sectionOffset, uint64_t contentSize, bool thumb, - bool noDeadStrip, bool copyRefs, - const Section *inSection) { - assert(sectionOffset+contentSize <= inSection->content.size()); - ArrayRef content = inSection->content.slice(sectionOffset, - contentSize); - if (copyRefs) { - // Make a copy of the atom's name and content that is owned by this file. - name = name.copy(allocator()); - content = content.copy(allocator()); - } - DefinedAtom::Alignment align( - inSection->alignment, - sectionOffset % inSection->alignment); - auto *atom = - new (allocator()) MachODefinedAtom(*this, name, scope, type, merge, - thumb, noDeadStrip, content, align); - addAtomForSection(inSection, atom, sectionOffset); - } - - void addDefinedAtomInCustomSection(StringRef name, Atom::Scope scope, - DefinedAtom::ContentType type, DefinedAtom::Merge merge, - bool thumb, bool noDeadStrip, uint64_t sectionOffset, - uint64_t contentSize, StringRef sectionName, - bool copyRefs, const Section *inSection) { - assert(sectionOffset+contentSize <= inSection->content.size()); - ArrayRef content = inSection->content.slice(sectionOffset, - contentSize); - if (copyRefs) { - // Make a copy of the atom's name and content that is owned by this file. - name = name.copy(allocator()); - content = content.copy(allocator()); - sectionName = sectionName.copy(allocator()); - } - DefinedAtom::Alignment align( - inSection->alignment, - sectionOffset % inSection->alignment); - auto *atom = - new (allocator()) MachODefinedCustomSectionAtom(*this, name, scope, type, - merge, thumb, - noDeadStrip, content, - sectionName, align); - addAtomForSection(inSection, atom, sectionOffset); - } - - void addZeroFillDefinedAtom(StringRef name, Atom::Scope scope, - uint64_t sectionOffset, uint64_t size, - bool noDeadStrip, bool copyRefs, - const Section *inSection) { - if (copyRefs) { - // Make a copy of the atom's name and content that is owned by this file. - name = name.copy(allocator()); - } - DefinedAtom::Alignment align( - inSection->alignment, - sectionOffset % inSection->alignment); - - DefinedAtom::ContentType type = DefinedAtom::typeUnknown; - switch (inSection->type) { - case llvm::MachO::S_ZEROFILL: - type = DefinedAtom::typeZeroFill; - break; - case llvm::MachO::S_THREAD_LOCAL_ZEROFILL: - type = DefinedAtom::typeTLVInitialZeroFill; - break; - default: - llvm_unreachable("Unrecognized zero-fill section"); - } - - auto *atom = - new (allocator()) MachODefinedAtom(*this, name, scope, type, size, - noDeadStrip, align); - addAtomForSection(inSection, atom, sectionOffset); - } - - void addUndefinedAtom(StringRef name, bool copyRefs) { - if (copyRefs) { - // Make a copy of the atom's name that is owned by this file. - name = name.copy(allocator()); - } - auto *atom = new (allocator()) SimpleUndefinedAtom(*this, name); - addAtom(*atom); - _undefAtoms[name] = atom; - } - - void addTentativeDefAtom(StringRef name, Atom::Scope scope, uint64_t size, - DefinedAtom::Alignment align, bool copyRefs) { - if (copyRefs) { - // Make a copy of the atom's name that is owned by this file. - name = name.copy(allocator()); - } - auto *atom = - new (allocator()) MachOTentativeDefAtom(*this, name, scope, size, align); - addAtom(*atom); - _undefAtoms[name] = atom; - } - - /// Search this file for the atom from 'section' that covers - /// 'offsetInSect'. Returns nullptr is no atom found. - MachODefinedAtom *findAtomCoveringAddress(const Section §ion, - uint64_t offsetInSect, - uint32_t *foundOffsetAtom=nullptr) { - const auto &pos = _sectionAtoms.find(§ion); - if (pos == _sectionAtoms.end()) - return nullptr; - const auto &vec = pos->second; - assert(offsetInSect < section.content.size()); - // Vector of atoms for section are already sorted, so do binary search. - const auto &atomPos = std::lower_bound(vec.begin(), vec.end(), offsetInSect, - [offsetInSect](const SectionOffsetAndAtom &ao, - uint64_t targetAddr) -> bool { - // Each atom has a start offset of its slice of the - // section's content. This compare function must return true - // iff the atom's range is before the offset being searched for. - uint64_t atomsEndOffset = ao.offset+ao.atom->rawContent().size(); - return (atomsEndOffset <= offsetInSect); - }); - if (atomPos == vec.end()) - return nullptr; - if (foundOffsetAtom) - *foundOffsetAtom = offsetInSect - atomPos->offset; - return atomPos->atom; - } - - /// Searches this file for an UndefinedAtom named 'name'. Returns - /// nullptr is no such atom found. - const lld::Atom *findUndefAtom(StringRef name) { - auto pos = _undefAtoms.find(name); - if (pos == _undefAtoms.end()) - return nullptr; - return pos->second; - } - - typedef std::function DefinedAtomVisitor; - - void eachDefinedAtom(DefinedAtomVisitor vistor) { - for (auto §AndAtoms : _sectionAtoms) { - for (auto &offAndAtom : sectAndAtoms.second) { - vistor(offAndAtom.atom); - } - } - } - - typedef std::function - SectionAtomVisitor; - - void eachAtomInSection(const Section §ion, SectionAtomVisitor visitor) { - auto pos = _sectionAtoms.find(§ion); - if (pos == _sectionAtoms.end()) - return; - auto vec = pos->second; - - for (auto &offAndAtom : vec) - visitor(offAndAtom.atom, offAndAtom.offset); - } - - MachOLinkingContext::Arch arch() const { return _arch; } - void setArch(MachOLinkingContext::Arch arch) { _arch = arch; } - - MachOLinkingContext::OS OS() const { return _os; } - void setOS(MachOLinkingContext::OS os) { _os = os; } - - MachOLinkingContext::ObjCConstraint objcConstraint() const { - return _objcConstraint; - } - void setObjcConstraint(MachOLinkingContext::ObjCConstraint v) { - _objcConstraint = v; - } - - uint32_t minVersion() const { return _minVersion; } - void setMinVersion(uint32_t v) { _minVersion = v; } - - LoadCommandType minVersionLoadCommandKind() const { - return _minVersionLoadCommandKind; - } - void setMinVersionLoadCommandKind(LoadCommandType v) { - _minVersionLoadCommandKind = v; - } - - uint32_t swiftVersion() const { return _swiftVersion; } - void setSwiftVersion(uint32_t v) { _swiftVersion = v; } - - bool subsectionsViaSymbols() const { - return _flags & llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS; - } - void setFlags(normalized::FileFlags v) { _flags = v; } - - /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const File *F) { - return F->kind() == File::kindMachObject; - } - - void setDebugInfo(std::unique_ptr debugInfo) { - _debugInfo = std::move(debugInfo); - } - - DebugInfo* debugInfo() const { return _debugInfo.get(); } - std::unique_ptr takeDebugInfo() { return std::move(_debugInfo); } - -protected: - std::error_code doParse() override { - // Convert binary file to normalized mach-o. - auto normFile = normalized::readBinary(_mb, _ctx->arch()); - if (auto ec = normFile.takeError()) - return llvm::errorToErrorCode(std::move(ec)); - // Convert normalized mach-o to atoms. - if (auto ec = normalized::normalizedObjectToAtoms(this, **normFile, false)) - return llvm::errorToErrorCode(std::move(ec)); - return std::error_code(); - } - -private: - struct SectionOffsetAndAtom { uint64_t offset; MachODefinedAtom *atom; }; - - void addAtomForSection(const Section *inSection, MachODefinedAtom* atom, - uint64_t sectionOffset) { - SectionOffsetAndAtom offAndAtom; - offAndAtom.offset = sectionOffset; - offAndAtom.atom = atom; - _sectionAtoms[inSection].push_back(offAndAtom); - addAtom(*atom); - } - - typedef llvm::DenseMap> SectionToAtoms; - typedef llvm::StringMap NameToAtom; - - std::unique_ptr _mb; - MachOLinkingContext *_ctx; - SectionToAtoms _sectionAtoms; - NameToAtom _undefAtoms; - MachOLinkingContext::Arch _arch = MachOLinkingContext::arch_unknown; - MachOLinkingContext::OS _os = MachOLinkingContext::OS::unknown; - uint32_t _minVersion = 0; - LoadCommandType _minVersionLoadCommandKind = (LoadCommandType)0; - MachOLinkingContext::ObjCConstraint _objcConstraint = - MachOLinkingContext::objc_unknown; - uint32_t _swiftVersion = 0; - normalized::FileFlags _flags = llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS; - std::unique_ptr _debugInfo; -}; - -class MachODylibFile : public SharedLibraryFile { -public: - MachODylibFile(std::unique_ptr mb, MachOLinkingContext *ctx) - : SharedLibraryFile(mb->getBufferIdentifier()), - _mb(std::move(mb)), _ctx(ctx) {} - - MachODylibFile(StringRef path) : SharedLibraryFile(path) {} - - OwningAtomPtr exports(StringRef name) const override { - // Pass down _installName so that if this requested symbol - // is re-exported through this dylib, the SharedLibraryAtom's loadName() - // is this dylib installName and not the implementation dylib's. - // NOTE: isData is not needed for dylibs (it matters for static libs). - return exports(name, _installName); - } - - /// Adds symbol name that this dylib exports. The corresponding - /// SharedLibraryAtom is created lazily (since most symbols are not used). - void addExportedSymbol(StringRef name, bool weakDef, bool copyRefs) { - if (copyRefs) { - name = name.copy(allocator()); - } - AtomAndFlags info(weakDef); - _nameToAtom[name] = info; - } - - void addReExportedDylib(StringRef dylibPath) { - _reExportedDylibs.emplace_back(dylibPath); - } - - StringRef installName() const { return _installName; } - uint32_t currentVersion() { return _currentVersion; } - uint32_t compatVersion() { return _compatVersion; } - - void setInstallName(StringRef name) { _installName = name; } - void setCompatVersion(uint32_t version) { _compatVersion = version; } - void setCurrentVersion(uint32_t version) { _currentVersion = version; } - - typedef std::function FindDylib; - - void loadReExportedDylibs(FindDylib find) { - for (ReExportedDylib &entry : _reExportedDylibs) { - if (!entry.file) - entry.file = find(entry.path); - } - } - - StringRef getDSOName() const override { return _installName; } - - std::error_code doParse() override { - // Convert binary file to normalized mach-o. - auto normFile = normalized::readBinary(_mb, _ctx->arch()); - if (auto ec = normFile.takeError()) - return llvm::errorToErrorCode(std::move(ec)); - // Convert normalized mach-o to atoms. - if (auto ec = normalized::normalizedDylibToAtoms(this, **normFile, false)) - return llvm::errorToErrorCode(std::move(ec)); - return std::error_code(); - } - -protected: - OwningAtomPtr exports(StringRef name, - StringRef installName) const { - // First, check if requested symbol is directly implemented by this dylib. - auto entry = _nameToAtom.find(name); - if (entry != _nameToAtom.end()) { - // FIXME: Make this map a set and only used in assert builds. - // Note, its safe to assert here as the resolver is the only client of - // this API and it only requests exports for undefined symbols. - // If we return from here we are no longer undefined so we should never - // get here again. - assert(!entry->second.atom && "Duplicate shared library export"); - bool weakDef = entry->second.weakDef; - auto *atom = new (allocator()) MachOSharedLibraryAtom(*this, name, - installName, - weakDef); - entry->second.atom = atom; - return atom; - } - - // Next, check if symbol is implemented in some re-exported dylib. - for (const ReExportedDylib &dylib : _reExportedDylibs) { - assert(dylib.file); - auto atom = dylib.file->exports(name, installName); - if (atom.get()) - return atom; - } - - // Symbol not exported or re-exported by this dylib. - return nullptr; - } - - struct ReExportedDylib { - ReExportedDylib(StringRef p) : path(p), file(nullptr) { } - ReExportedDylib(StringRef p, MachODylibFile *file) : path(p), file(file) { } - StringRef path; - MachODylibFile *file; - }; - - struct AtomAndFlags { - AtomAndFlags() : atom(nullptr), weakDef(false) { } - AtomAndFlags(bool weak) : atom(nullptr), weakDef(weak) { } - const SharedLibraryAtom *atom; - bool weakDef; - }; - - std::unique_ptr _mb; - MachOLinkingContext *_ctx; - StringRef _installName; - uint32_t _currentVersion; - uint32_t _compatVersion; - std::vector _reExportedDylibs; - mutable std::unordered_map _nameToAtom; -}; - -class TAPIFile : public MachODylibFile { -public: - - TAPIFile(std::unique_ptr mb, MachOLinkingContext *ctx) - : MachODylibFile(std::move(mb), ctx) {} - - std::error_code doParse() override { - - llvm::Expected> result = - llvm::MachO::TextAPIReader::get(*_mb); - if (!result) - return std::make_error_code(std::errc::invalid_argument); - - std::unique_ptr interface{std::move(*result)}; - return loadFromInterface(*interface); - } - -private: - std::error_code loadFromInterface(llvm::MachO::InterfaceFile &interface) { - llvm::MachO::Architecture arch; - switch(_ctx->arch()) { - case MachOLinkingContext::arch_x86: - arch = llvm::MachO::AK_i386; - break; - case MachOLinkingContext::arch_x86_64: - arch = llvm::MachO::AK_x86_64; - break; - case MachOLinkingContext::arch_arm64: - arch = llvm::MachO::AK_arm64; - break; - default: - return std::make_error_code(std::errc::invalid_argument); - } - - setInstallName(interface.getInstallName().copy(allocator())); - // TODO(compnerd) filter out symbols based on the target platform - for (const auto symbol : interface.symbols()) - if (symbol->getArchitectures().has(arch)) - addExportedSymbol(symbol->getName(), symbol->isWeakDefined(), true); - - for (const llvm::MachO::InterfaceFileRef &reexport : - interface.reexportedLibraries()) - addReExportedDylib(reexport.getInstallName().copy(allocator())); - - for (const auto& document : interface.documents()) { - for (auto& reexport : _reExportedDylibs) { - if (reexport.path != document->getInstallName()) - continue; - assert(!reexport.file); - _ownedFiles.push_back(std::make_unique( - MemoryBuffer::getMemBuffer("", _mb->getBufferIdentifier()), _ctx)); - reexport.file = _ownedFiles.back().get(); - std::error_code err = _ownedFiles.back()->loadFromInterface(*document); - if (err) - return err; - } - } - - return std::error_code(); - } - - std::vector> _ownedFiles; -}; - -} // end namespace mach_o -} // end namespace lld - -#endif // LLD_READER_WRITER_MACHO_FILE_H diff --git a/lld/lib/ReaderWriter/MachO/FlatNamespaceFile.h b/lld/lib/ReaderWriter/MachO/FlatNamespaceFile.h deleted file mode 100644 index 1885effef49f..000000000000 --- a/lld/lib/ReaderWriter/MachO/FlatNamespaceFile.h +++ /dev/null @@ -1,62 +0,0 @@ -//===- lib/ReaderWriter/MachO/FlatNamespaceFile.h -------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_READER_WRITER_MACHO_FLAT_NAMESPACE_FILE_H -#define LLD_READER_WRITER_MACHO_FLAT_NAMESPACE_FILE_H - -#include "Atoms.h" -#include "lld/Core/SharedLibraryFile.h" -#include "lld/ReaderWriter/MachOLinkingContext.h" -#include "llvm/Support/Debug.h" - -namespace lld { -namespace mach_o { - -// -// A FlateNamespaceFile instance may be added as a resolution source of last -// resort, depending on how -flat_namespace and -undefined are set. -// -class FlatNamespaceFile : public SharedLibraryFile { -public: - FlatNamespaceFile(const MachOLinkingContext &context) - : SharedLibraryFile("flat namespace") { } - - OwningAtomPtr exports(StringRef name) const override { - return new (allocator()) MachOSharedLibraryAtom(*this, name, getDSOName(), - false); - } - - StringRef getDSOName() const override { return "flat-namespace"; } - - const AtomRange defined() const override { - return _noDefinedAtoms; - } - const AtomRange undefined() const override { - return _noUndefinedAtoms; - } - - const AtomRange sharedLibrary() const override { - return _noSharedLibraryAtoms; - } - - const AtomRange absolute() const override { - return _noAbsoluteAtoms; - } - - void clearAtoms() override { - _noDefinedAtoms.clear(); - _noUndefinedAtoms.clear(); - _noSharedLibraryAtoms.clear(); - _noAbsoluteAtoms.clear(); - } -}; - -} // namespace mach_o -} // namespace lld - -#endif // LLD_READER_WRITER_MACHO_FLAT_NAMESPACE_FILE_H diff --git a/lld/lib/ReaderWriter/MachO/GOTPass.cpp b/lld/lib/ReaderWriter/MachO/GOTPass.cpp deleted file mode 100644 index 10e611c1bd2b..000000000000 --- a/lld/lib/ReaderWriter/MachO/GOTPass.cpp +++ /dev/null @@ -1,183 +0,0 @@ -//===- lib/ReaderWriter/MachO/GOTPass.cpp -----------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This linker pass transforms all GOT kind references to real references. -/// That is, in assembly you can write something like: -/// movq foo@GOTPCREL(%rip), %rax -/// which means you want to load a pointer to "foo" out of the GOT (global -/// Offsets Table). In the object file, the Atom containing this instruction -/// has a Reference whose target is an Atom named "foo" and the Reference -/// kind is a GOT load. The linker needs to instantiate a pointer sized -/// GOT entry. This is done be creating a GOT Atom to represent that pointer -/// sized data in this pass, and altering the Atom graph so the Reference now -/// points to the GOT Atom entry (corresponding to "foo") and changing the -/// Reference Kind to reflect it is now pointing to a GOT entry (rather -/// then needing a GOT entry). -/// -/// There is one optimization the linker can do here. If the target of the GOT -/// is in the same linkage unit and does not need to be interposable, and -/// the GOT use is just a load (not some other operation), this pass can -/// transform that load into an LEA (add). This optimizes away one memory load -/// which at runtime that could stall the pipeline. This optimization only -/// works for architectures in which a (GOT) load instruction can be change to -/// an LEA instruction that is the same size. The method isGOTAccess() should -/// only return true for "canBypassGOT" if this optimization is supported. -/// -//===----------------------------------------------------------------------===// - -#include "ArchHandler.h" -#include "File.h" -#include "MachOPasses.h" -#include "lld/Common/LLVM.h" -#include "lld/Core/DefinedAtom.h" -#include "lld/Core/File.h" -#include "lld/Core/Reference.h" -#include "lld/Core/Simple.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" - -namespace lld { -namespace mach_o { - -// -// GOT Entry Atom created by the GOT pass. -// -class GOTEntryAtom : public SimpleDefinedAtom { -public: - GOTEntryAtom(const File &file, bool is64, StringRef name) - : SimpleDefinedAtom(file), _is64(is64), _name(name) { } - - ~GOTEntryAtom() override = default; - - ContentType contentType() const override { - return DefinedAtom::typeGOT; - } - - Alignment alignment() const override { - return _is64 ? 8 : 4; - } - - uint64_t size() const override { - return _is64 ? 8 : 4; - } - - ContentPermissions permissions() const override { - return DefinedAtom::permRW_; - } - - ArrayRef rawContent() const override { - static const uint8_t zeros[] = - { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; - return llvm::makeArrayRef(zeros, size()); - } - - StringRef slotName() const { - return _name; - } - -private: - const bool _is64; - StringRef _name; -}; - -/// Pass for instantiating and optimizing GOT slots. -/// -class GOTPass : public Pass { -public: - GOTPass(const MachOLinkingContext &context) - : _ctx(context), _archHandler(_ctx.archHandler()), - _file(*_ctx.make_file("")) { - _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); - } - -private: - llvm::Error perform(SimpleFile &mergedFile) override { - // Scan all references in all atoms. - for (const DefinedAtom *atom : mergedFile.defined()) { - for (const Reference *ref : *atom) { - // Look at instructions accessing the GOT. - bool canBypassGOT; - if (!_archHandler.isGOTAccess(*ref, canBypassGOT)) - continue; - const Atom *target = ref->target(); - assert(target != nullptr); - - if (!shouldReplaceTargetWithGOTAtom(target, canBypassGOT)) { - // Update reference kind to reflect that target is a direct access. - _archHandler.updateReferenceToGOT(ref, false); - } else { - // Replace the target with a reference to a GOT entry. - const DefinedAtom *gotEntry = makeGOTEntry(target); - const_cast(ref)->setTarget(gotEntry); - // Update reference kind to reflect that target is now a GOT entry. - _archHandler.updateReferenceToGOT(ref, true); - } - } - } - - // Sort and add all created GOT Atoms to master file - std::vector entries; - entries.reserve(_targetToGOT.size()); - for (auto &it : _targetToGOT) - entries.push_back(it.second); - std::sort(entries.begin(), entries.end(), - [](const GOTEntryAtom *left, const GOTEntryAtom *right) { - return (left->slotName().compare(right->slotName()) < 0); - }); - for (const GOTEntryAtom *slot : entries) - mergedFile.addAtom(*slot); - - return llvm::Error::success(); - } - - bool shouldReplaceTargetWithGOTAtom(const Atom *target, bool canBypassGOT) { - // Accesses to shared library symbols must go through GOT. - if (isa(target)) - return true; - // Accesses to interposable symbols in same linkage unit must also go - // through GOT. - const DefinedAtom *defTarget = dyn_cast(target); - if (defTarget != nullptr && - defTarget->interposable() != DefinedAtom::interposeNo) { - assert(defTarget->scope() != DefinedAtom::scopeTranslationUnit); - return true; - } - // Target does not require indirection. So, if instruction allows GOT to be - // by-passed, do that optimization and don't create GOT entry. - return !canBypassGOT; - } - - const DefinedAtom *makeGOTEntry(const Atom *target) { - auto pos = _targetToGOT.find(target); - if (pos == _targetToGOT.end()) { - auto *gotEntry = new (_file.allocator()) - GOTEntryAtom(_file, _ctx.is64Bit(), target->name()); - _targetToGOT[target] = gotEntry; - const ArchHandler::ReferenceInfo &nlInfo = _archHandler.stubInfo(). - nonLazyPointerReferenceToBinder; - gotEntry->addReference(Reference::KindNamespace::mach_o, nlInfo.arch, - nlInfo.kind, 0, target, 0); - return gotEntry; - } - return pos->second; - } - - const MachOLinkingContext &_ctx; - mach_o::ArchHandler &_archHandler; - MachOFile &_file; - llvm::DenseMap _targetToGOT; -}; - -void addGOTPass(PassManager &pm, const MachOLinkingContext &ctx) { - assert(ctx.needsGOTPass()); - pm.add(std::make_unique(ctx)); -} - -} // end namespace mach_o -} // end namespace lld diff --git a/lld/lib/ReaderWriter/MachO/LayoutPass.cpp b/lld/lib/ReaderWriter/MachO/LayoutPass.cpp deleted file mode 100644 index e92fdf1b4913..000000000000 --- a/lld/lib/ReaderWriter/MachO/LayoutPass.cpp +++ /dev/null @@ -1,490 +0,0 @@ -//===-- ReaderWriter/MachO/LayoutPass.cpp - Layout atoms ------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "LayoutPass.h" -#include "lld/Core/Instrumentation.h" -#include "lld/Core/PassManager.h" -#include "lld/ReaderWriter/MachOLinkingContext.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Parallel.h" -#include -#include -#include - -using namespace lld; - -#define DEBUG_TYPE "LayoutPass" - -namespace lld { -namespace mach_o { - -static bool compareAtoms(const LayoutPass::SortKey &, - const LayoutPass::SortKey &, - LayoutPass::SortOverride customSorter); - -#ifndef NDEBUG -// Return "reason (leftval, rightval)" -static std::string formatReason(StringRef reason, int leftVal, int rightVal) { - return (Twine(reason) + " (" + Twine(leftVal) + ", " + Twine(rightVal) + ")") - .str(); -} - -// Less-than relationship of two atoms must be transitive, which is, if a < b -// and b < c, a < c must be true. This function checks the transitivity by -// checking the sort results. -static void checkTransitivity(std::vector &vec, - LayoutPass::SortOverride customSorter) { - for (auto i = vec.begin(), e = vec.end(); (i + 1) != e; ++i) { - for (auto j = i + 1; j != e; ++j) { - assert(compareAtoms(*i, *j, customSorter)); - assert(!compareAtoms(*j, *i, customSorter)); - } - } -} - -// Helper functions to check follow-on graph. -typedef llvm::DenseMap AtomToAtomT; - -static std::string atomToDebugString(const Atom *atom) { - const DefinedAtom *definedAtom = dyn_cast(atom); - std::string str; - llvm::raw_string_ostream s(str); - if (definedAtom->name().empty()) - s << ""; - else - s << definedAtom->name(); - s << " in "; - if (definedAtom->customSectionName().empty()) - s << ""; - else - s << definedAtom->customSectionName(); - s.flush(); - return str; -} - -static void showCycleDetectedError(const Registry ®istry, - AtomToAtomT &followOnNexts, - const DefinedAtom *atom) { - const DefinedAtom *start = atom; - llvm::dbgs() << "There's a cycle in a follow-on chain!\n"; - do { - llvm::dbgs() << " " << atomToDebugString(atom) << "\n"; - for (const Reference *ref : *atom) { - StringRef kindValStr; - if (!registry.referenceKindToString(ref->kindNamespace(), ref->kindArch(), - ref->kindValue(), kindValStr)) { - kindValStr = ""; - } - llvm::dbgs() << " " << kindValStr - << ": " << atomToDebugString(ref->target()) << "\n"; - } - atom = followOnNexts[atom]; - } while (atom != start); - llvm::report_fatal_error("Cycle detected"); -} - -/// Exit if there's a cycle in a followon chain reachable from the -/// given root atom. Uses the tortoise and hare algorithm to detect a -/// cycle. -static void checkNoCycleInFollowonChain(const Registry ®istry, - AtomToAtomT &followOnNexts, - const DefinedAtom *root) { - const DefinedAtom *tortoise = root; - const DefinedAtom *hare = followOnNexts[root]; - while (true) { - if (!tortoise || !hare) - return; - if (tortoise == hare) - showCycleDetectedError(registry, followOnNexts, tortoise); - tortoise = followOnNexts[tortoise]; - hare = followOnNexts[followOnNexts[hare]]; - } -} - -static void checkReachabilityFromRoot(AtomToAtomT &followOnRoots, - const DefinedAtom *atom) { - if (!atom) return; - auto i = followOnRoots.find(atom); - if (i == followOnRoots.end()) { - llvm_unreachable(((Twine("Atom <") + atomToDebugString(atom) + - "> has no follow-on root!")) - .str() - .c_str()); - } - const DefinedAtom *ap = i->second; - while (true) { - const DefinedAtom *next = followOnRoots[ap]; - if (!next) { - llvm_unreachable((Twine("Atom <" + atomToDebugString(atom) + - "> is not reachable from its root!")) - .str() - .c_str()); - } - if (next == ap) - return; - ap = next; - } -} - -static void printDefinedAtoms(const File::AtomRange &atomRange) { - for (const DefinedAtom *atom : atomRange) { - llvm::dbgs() << " file=" << atom->file().path() - << ", name=" << atom->name() - << ", size=" << atom->size() - << ", type=" << atom->contentType() - << ", ordinal=" << atom->ordinal() - << "\n"; - } -} - -/// Verify that the followon chain is sane. Should not be called in -/// release binary. -void LayoutPass::checkFollowonChain(const File::AtomRange &range) { - ScopedTask task(getDefaultDomain(), "LayoutPass::checkFollowonChain"); - - // Verify that there's no cycle in follow-on chain. - std::set roots; - for (const auto &ai : _followOnRoots) - roots.insert(ai.second); - for (const DefinedAtom *root : roots) - checkNoCycleInFollowonChain(_registry, _followOnNexts, root); - - // Verify that all the atoms in followOnNexts have references to - // their roots. - for (const auto &ai : _followOnNexts) { - checkReachabilityFromRoot(_followOnRoots, ai.first); - checkReachabilityFromRoot(_followOnRoots, ai.second); - } -} -#endif // #ifndef NDEBUG - -/// The function compares atoms by sorting atoms in the following order -/// a) Sorts atoms by their ordinal overrides (layout-after/ingroup) -/// b) Sorts atoms by their permissions -/// c) Sorts atoms by their content -/// d) Sorts atoms by custom sorter -/// e) Sorts atoms on how they appear using File Ordinality -/// f) Sorts atoms on how they appear within the File -static bool compareAtomsSub(const LayoutPass::SortKey &lc, - const LayoutPass::SortKey &rc, - LayoutPass::SortOverride customSorter, - std::string &reason) { - const DefinedAtom *left = lc._atom.get(); - const DefinedAtom *right = rc._atom.get(); - if (left == right) { - reason = "same"; - return false; - } - - // Find the root of the chain if it is a part of a follow-on chain. - const DefinedAtom *leftRoot = lc._root; - const DefinedAtom *rightRoot = rc._root; - - // Sort atoms by their ordinal overrides only if they fall in the same - // chain. - if (leftRoot == rightRoot) { - LLVM_DEBUG(reason = formatReason("override", lc._override, rc._override)); - return lc._override < rc._override; - } - - // Sort same permissions together. - DefinedAtom::ContentPermissions leftPerms = leftRoot->permissions(); - DefinedAtom::ContentPermissions rightPerms = rightRoot->permissions(); - - if (leftPerms != rightPerms) { - LLVM_DEBUG( - reason = formatReason("contentPerms", (int)leftPerms, (int)rightPerms)); - return leftPerms < rightPerms; - } - - // Sort same content types together. - DefinedAtom::ContentType leftType = leftRoot->contentType(); - DefinedAtom::ContentType rightType = rightRoot->contentType(); - - if (leftType != rightType) { - LLVM_DEBUG(reason = - formatReason("contentType", (int)leftType, (int)rightType)); - return leftType < rightType; - } - - // Use custom sorter if supplied. - if (customSorter) { - bool leftBeforeRight; - if (customSorter(leftRoot, rightRoot, leftBeforeRight)) - return leftBeforeRight; - } - - // Sort by .o order. - const File *leftFile = &leftRoot->file(); - const File *rightFile = &rightRoot->file(); - - if (leftFile != rightFile) { - LLVM_DEBUG(reason = formatReason(".o order", (int)leftFile->ordinal(), - (int)rightFile->ordinal())); - return leftFile->ordinal() < rightFile->ordinal(); - } - - // Sort by atom order with .o file. - uint64_t leftOrdinal = leftRoot->ordinal(); - uint64_t rightOrdinal = rightRoot->ordinal(); - - if (leftOrdinal != rightOrdinal) { - LLVM_DEBUG(reason = formatReason("ordinal", (int)leftRoot->ordinal(), - (int)rightRoot->ordinal())); - return leftOrdinal < rightOrdinal; - } - - llvm::errs() << "Unordered: <" << left->name() << "> <" << right->name() - << ">\n"; - llvm_unreachable("Atoms with Same Ordinal!"); -} - -static bool compareAtoms(const LayoutPass::SortKey &lc, - const LayoutPass::SortKey &rc, - LayoutPass::SortOverride customSorter) { - std::string reason; - bool result = compareAtomsSub(lc, rc, customSorter, reason); - LLVM_DEBUG({ - StringRef comp = result ? "<" : ">="; - llvm::dbgs() << "Layout: '" << lc._atom.get()->name() - << "' " << comp << " '" - << rc._atom.get()->name() << "' (" << reason << ")\n"; - }); - return result; -} - -LayoutPass::LayoutPass(const Registry ®istry, SortOverride sorter) - : _registry(registry), _customSorter(std::move(sorter)) {} - -// Returns the atom immediately followed by the given atom in the followon -// chain. -const DefinedAtom *LayoutPass::findAtomFollowedBy( - const DefinedAtom *targetAtom) { - // Start from the beginning of the chain and follow the chain until - // we find the targetChain. - const DefinedAtom *atom = _followOnRoots[targetAtom]; - while (true) { - const DefinedAtom *prevAtom = atom; - AtomToAtomT::iterator targetFollowOnAtomsIter = _followOnNexts.find(atom); - // The target atom must be in the chain of its root. - assert(targetFollowOnAtomsIter != _followOnNexts.end()); - atom = targetFollowOnAtomsIter->second; - if (atom == targetAtom) - return prevAtom; - } -} - -// Check if all the atoms followed by the given target atom are of size zero. -// When this method is called, an atom being added is not of size zero and -// will be added to the head of the followon chain. All the atoms between the -// atom and the targetAtom (specified by layout-after) need to be of size zero -// in this case. Otherwise the desired layout is impossible. -bool LayoutPass::checkAllPrevAtomsZeroSize(const DefinedAtom *targetAtom) { - const DefinedAtom *atom = _followOnRoots[targetAtom]; - while (true) { - if (atom == targetAtom) - return true; - if (atom->size() != 0) - // TODO: print warning that an impossible layout is being desired by the - // user. - return false; - AtomToAtomT::iterator targetFollowOnAtomsIter = _followOnNexts.find(atom); - // The target atom must be in the chain of its root. - assert(targetFollowOnAtomsIter != _followOnNexts.end()); - atom = targetFollowOnAtomsIter->second; - } -} - -// Set the root of all atoms in targetAtom's chain to the given root. -void LayoutPass::setChainRoot(const DefinedAtom *targetAtom, - const DefinedAtom *root) { - // Walk through the followon chain and override each node's root. - while (true) { - _followOnRoots[targetAtom] = root; - AtomToAtomT::iterator targetFollowOnAtomsIter = - _followOnNexts.find(targetAtom); - if (targetFollowOnAtomsIter == _followOnNexts.end()) - return; - targetAtom = targetFollowOnAtomsIter->second; - } -} - -/// This pass builds the followon tables described by two DenseMaps -/// followOnRoots and followonNexts. -/// The followOnRoots map contains a mapping of a DefinedAtom to its root -/// The followOnNexts map contains a mapping of what DefinedAtom follows the -/// current Atom -/// The algorithm follows a very simple approach -/// a) If the atom is first seen, then make that as the root atom -/// b) The targetAtom which this Atom contains, has the root thats set to the -/// root of the current atom -/// c) If the targetAtom is part of a different tree and the root of the -/// targetAtom is itself, Chain all the atoms that are contained in the tree -/// to the current Tree -/// d) If the targetAtom is part of a different chain and the root of the -/// targetAtom until the targetAtom has all atoms of size 0, then chain the -/// targetAtoms and its tree to the current chain -void LayoutPass::buildFollowOnTable(const File::AtomRange &range) { - ScopedTask task(getDefaultDomain(), "LayoutPass::buildFollowOnTable"); - // Set the initial size of the followon and the followonNext hash to the - // number of atoms that we have. - _followOnRoots.reserve(range.size()); - _followOnNexts.reserve(range.size()); - for (const DefinedAtom *ai : range) { - for (const Reference *r : *ai) { - if (r->kindNamespace() != lld::Reference::KindNamespace::all || - r->kindValue() != lld::Reference::kindLayoutAfter) - continue; - const DefinedAtom *targetAtom = dyn_cast(r->target()); - _followOnNexts[ai] = targetAtom; - - // If we find a followon for the first time, let's make that atom as the - // root atom. - if (_followOnRoots.count(ai) == 0) - _followOnRoots[ai] = ai; - - auto iter = _followOnRoots.find(targetAtom); - if (iter == _followOnRoots.end()) { - // If the targetAtom is not a root of any chain, let's make the root of - // the targetAtom to the root of the current chain. - - // The expression m[i] = m[j] where m is a DenseMap and i != j is not - // safe. m[j] returns a reference, which would be invalidated when a - // rehashing occurs. If rehashing occurs to make room for m[i], m[j] - // becomes invalid, and that invalid reference would be used as the RHS - // value of the expression. - // Copy the value to workaround. - const DefinedAtom *tmp = _followOnRoots[ai]; - _followOnRoots[targetAtom] = tmp; - continue; - } - if (iter->second == targetAtom) { - // If the targetAtom is the root of a chain, the chain becomes part of - // the current chain. Rewrite the subchain's root to the current - // chain's root. - setChainRoot(targetAtom, _followOnRoots[ai]); - continue; - } - // The targetAtom is already a part of a chain. If the current atom is - // of size zero, we can insert it in the middle of the chain just - // before the target atom, while not breaking other atom's followon - // relationships. If it's not, we can only insert the current atom at - // the beginning of the chain. All the atoms followed by the target - // atom must be of size zero in that case to satisfy the followon - // relationships. - size_t currentAtomSize = ai->size(); - if (currentAtomSize == 0) { - const DefinedAtom *targetPrevAtom = findAtomFollowedBy(targetAtom); - _followOnNexts[targetPrevAtom] = ai; - const DefinedAtom *tmp = _followOnRoots[targetPrevAtom]; - _followOnRoots[ai] = tmp; - continue; - } - if (!checkAllPrevAtomsZeroSize(targetAtom)) - break; - _followOnNexts[ai] = _followOnRoots[targetAtom]; - setChainRoot(_followOnRoots[targetAtom], _followOnRoots[ai]); - } - } -} - -/// Build an ordinal override map by traversing the followon chain, and -/// assigning ordinals to each atom, if the atoms have their ordinals -/// already assigned skip the atom and move to the next. This is the -/// main map thats used to sort the atoms while comparing two atoms together -void -LayoutPass::buildOrdinalOverrideMap(const File::AtomRange &range) { - ScopedTask task(getDefaultDomain(), "LayoutPass::buildOrdinalOverrideMap"); - uint64_t index = 0; - for (const DefinedAtom *ai : range) { - const DefinedAtom *atom = ai; - if (_ordinalOverrideMap.find(atom) != _ordinalOverrideMap.end()) - continue; - AtomToAtomT::iterator start = _followOnRoots.find(atom); - if (start == _followOnRoots.end()) - continue; - for (const DefinedAtom *nextAtom = start->second; nextAtom; - nextAtom = _followOnNexts[nextAtom]) { - AtomToOrdinalT::iterator pos = _ordinalOverrideMap.find(nextAtom); - if (pos == _ordinalOverrideMap.end()) - _ordinalOverrideMap[nextAtom] = index++; - } - } -} - -std::vector -LayoutPass::decorate(File::AtomRange &atomRange) const { - std::vector ret; - for (OwningAtomPtr &atom : atomRange.owning_ptrs()) { - auto ri = _followOnRoots.find(atom.get()); - auto oi = _ordinalOverrideMap.find(atom.get()); - const auto *root = (ri == _followOnRoots.end()) ? atom.get() : ri->second; - uint64_t override = (oi == _ordinalOverrideMap.end()) ? 0 : oi->second; - ret.push_back(SortKey(std::move(atom), root, override)); - } - return ret; -} - -void LayoutPass::undecorate(File::AtomRange &atomRange, - std::vector &keys) const { - size_t i = 0; - for (SortKey &k : keys) - atomRange[i++] = std::move(k._atom); -} - -/// Perform the actual pass -llvm::Error LayoutPass::perform(SimpleFile &mergedFile) { - LLVM_DEBUG(llvm::dbgs() << "******** Laying out atoms:\n"); - // sort the atoms - ScopedTask task(getDefaultDomain(), "LayoutPass"); - File::AtomRange atomRange = mergedFile.defined(); - - // Build follow on tables - buildFollowOnTable(atomRange); - - // Check the structure of followon graph if running in debug mode. - LLVM_DEBUG(checkFollowonChain(atomRange)); - - // Build override maps - buildOrdinalOverrideMap(atomRange); - - LLVM_DEBUG({ - llvm::dbgs() << "unsorted atoms:\n"; - printDefinedAtoms(atomRange); - }); - - std::vector vec = decorate(atomRange); - llvm::parallelSort( - vec, - [&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool { - return compareAtoms(l, r, _customSorter); - }); - LLVM_DEBUG(checkTransitivity(vec, _customSorter)); - undecorate(atomRange, vec); - - LLVM_DEBUG({ - llvm::dbgs() << "sorted atoms:\n"; - printDefinedAtoms(atomRange); - }); - - LLVM_DEBUG(llvm::dbgs() << "******** Finished laying out atoms\n"); - return llvm::Error::success(); -} - -void addLayoutPass(PassManager &pm, const MachOLinkingContext &ctx) { - pm.add(std::make_unique( - ctx.registry(), [&](const DefinedAtom * left, const DefinedAtom * right, - bool & leftBeforeRight) ->bool { - return ctx.customAtomOrderer(left, right, leftBeforeRight); - })); -} - -} // namespace mach_o -} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/LayoutPass.h b/lld/lib/ReaderWriter/MachO/LayoutPass.h deleted file mode 100644 index 904e16b7fb0e..000000000000 --- a/lld/lib/ReaderWriter/MachO/LayoutPass.h +++ /dev/null @@ -1,118 +0,0 @@ -//===------ lib/ReaderWriter/MachO/LayoutPass.h - Handles Layout of atoms -===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_READER_WRITER_MACHO_LAYOUT_PASS_H -#define LLD_READER_WRITER_MACHO_LAYOUT_PASS_H - -#include "lld/Core/File.h" -#include "lld/Core/Pass.h" -#include "lld/Core/Reader.h" -#include "lld/Core/Simple.h" -#include "llvm/ADT/DenseMap.h" -#include -#include -#include - -namespace lld { -class DefinedAtom; -class SimpleFile; - -namespace mach_o { - -/// This linker pass does the layout of the atoms. The pass is done after the -/// order their .o files were found on the command line, then by order of the -/// atoms (address) in the .o file. But some atoms have a preferred location -/// in their section (such as pinned to the start or end of the section), so -/// the sort must take that into account too. -class LayoutPass : public Pass { -public: - struct SortKey { - SortKey(OwningAtomPtr &&atom, - const DefinedAtom *root, uint64_t override) - : _atom(std::move(atom)), _root(root), _override(override) {} - OwningAtomPtr _atom; - const DefinedAtom *_root; - uint64_t _override; - - // Note, these are only here to appease MSVC bots which didn't like - // the same methods being implemented/deleted in OwningAtomPtr. - SortKey(SortKey &&key) : _atom(std::move(key._atom)), _root(key._root), - _override(key._override) { - key._root = nullptr; - } - - SortKey &operator=(SortKey &&key) { - _atom = std::move(key._atom); - _root = key._root; - key._root = nullptr; - _override = key._override; - return *this; - } - - private: - SortKey(const SortKey &) = delete; - void operator=(const SortKey&) = delete; - }; - - typedef std::function SortOverride; - - LayoutPass(const Registry ®istry, SortOverride sorter); - - /// Sorts atoms in mergedFile by content type then by command line order. - llvm::Error perform(SimpleFile &mergedFile) override; - - ~LayoutPass() override = default; - -private: - // Build the followOn atoms chain as specified by the kindLayoutAfter - // reference type - void buildFollowOnTable(const File::AtomRange &range); - - // Build a map of Atoms to ordinals for sorting the atoms - void buildOrdinalOverrideMap(const File::AtomRange &range); - - const Registry &_registry; - SortOverride _customSorter; - - typedef llvm::DenseMap AtomToAtomT; - typedef llvm::DenseMap AtomToOrdinalT; - - // A map to be used to sort atoms. It represents the order of atoms in the - // result; if Atom X is mapped to atom Y in this map, X will be located - // immediately before Y in the output file. Y might be mapped to another - // atom, constructing a follow-on chain. An atom cannot be mapped to more - // than one atom unless all but one atom are of size zero. - AtomToAtomT _followOnNexts; - - // A map to be used to sort atoms. It's a map from an atom to its root of - // follow-on chain. A root atom is mapped to itself. If an atom is not in - // _followOnNexts, the atom is not in this map, and vice versa. - AtomToAtomT _followOnRoots; - - AtomToOrdinalT _ordinalOverrideMap; - - // Helper methods for buildFollowOnTable(). - const DefinedAtom *findAtomFollowedBy(const DefinedAtom *targetAtom); - bool checkAllPrevAtomsZeroSize(const DefinedAtom *targetAtom); - - void setChainRoot(const DefinedAtom *targetAtom, const DefinedAtom *root); - - std::vector decorate(File::AtomRange &atomRange) const; - - void undecorate(File::AtomRange &atomRange, - std::vector &keys) const; - - // Check if the follow-on graph is a correct structure. For debugging only. - void checkFollowonChain(const File::AtomRange &range); -}; - -} // namespace mach_o -} // namespace lld - -#endif // LLD_READER_WRITER_MACHO_LAYOUT_PASS_H diff --git a/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp b/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp deleted file mode 100644 index acd919e4d411..000000000000 --- a/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp +++ /dev/null @@ -1,1104 +0,0 @@ -//===- lib/ReaderWriter/MachO/MachOLinkingContext.cpp ---------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "lld/Common/ErrorHandler.h" -#include "lld/ReaderWriter/MachOLinkingContext.h" -#include "ArchHandler.h" -#include "File.h" -#include "FlatNamespaceFile.h" -#include "MachONormalizedFile.h" -#include "MachOPasses.h" -#include "SectCreateFile.h" -#include "lld/Common/Driver.h" -#include "lld/Core/ArchiveLibraryFile.h" -#include "lld/Core/PassManager.h" -#include "lld/Core/Reader.h" -#include "lld/Core/Writer.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/Triple.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Demangle/Demangle.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/Path.h" -#include - -using lld::mach_o::ArchHandler; -using lld::mach_o::MachOFile; -using lld::mach_o::MachODylibFile; -using namespace llvm::MachO; - -namespace lld { - -bool MachOLinkingContext::parsePackedVersion(StringRef str, uint32_t &result) { - result = 0; - - if (str.empty()) - return false; - - SmallVector parts; - llvm::SplitString(str, parts, "."); - - unsigned long long num; - if (llvm::getAsUnsignedInteger(parts[0], 10, num)) - return true; - if (num > 65535) - return true; - result = num << 16; - - if (parts.size() > 1) { - if (llvm::getAsUnsignedInteger(parts[1], 10, num)) - return true; - if (num > 255) - return true; - result |= (num << 8); - } - - if (parts.size() > 2) { - if (llvm::getAsUnsignedInteger(parts[2], 10, num)) - return true; - if (num > 255) - return true; - result |= num; - } - - return false; -} - -bool MachOLinkingContext::parsePackedVersion(StringRef str, uint64_t &result) { - result = 0; - - if (str.empty()) - return false; - - SmallVector parts; - llvm::SplitString(str, parts, "."); - - unsigned long long num; - if (llvm::getAsUnsignedInteger(parts[0], 10, num)) - return true; - if (num > 0xFFFFFF) - return true; - result = num << 40; - - unsigned Shift = 30; - for (StringRef str : llvm::makeArrayRef(parts).slice(1)) { - if (llvm::getAsUnsignedInteger(str, 10, num)) - return true; - if (num > 0x3FF) - return true; - result |= (num << Shift); - Shift -= 10; - } - - return false; -} - -MachOLinkingContext::ArchInfo MachOLinkingContext::_s_archInfos[] = { - { "x86_64", arch_x86_64, true, CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_ALL }, - { "i386", arch_x86, true, CPU_TYPE_I386, CPU_SUBTYPE_X86_ALL }, - { "ppc", arch_ppc, false, CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_ALL }, - { "armv6", arch_armv6, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V6 }, - { "armv7", arch_armv7, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7 }, - { "armv7s", arch_armv7s, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7S }, - { "arm64", arch_arm64, true, CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_ALL }, - { "", arch_unknown,false, 0, 0 } -}; - -MachOLinkingContext::Arch -MachOLinkingContext::archFromCpuType(uint32_t cputype, uint32_t cpusubtype) { - for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { - if ((info->cputype == cputype) && (info->cpusubtype == cpusubtype)) - return info->arch; - } - return arch_unknown; -} - -MachOLinkingContext::Arch -MachOLinkingContext::archFromName(StringRef archName) { - for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { - if (info->archName.equals(archName)) - return info->arch; - } - return arch_unknown; -} - -StringRef MachOLinkingContext::nameFromArch(Arch arch) { - for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { - if (info->arch == arch) - return info->archName; - } - return ""; -} - -uint32_t MachOLinkingContext::cpuTypeFromArch(Arch arch) { - assert(arch != arch_unknown); - for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { - if (info->arch == arch) - return info->cputype; - } - llvm_unreachable("Unknown arch type"); -} - -uint32_t MachOLinkingContext::cpuSubtypeFromArch(Arch arch) { - assert(arch != arch_unknown); - for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { - if (info->arch == arch) - return info->cpusubtype; - } - llvm_unreachable("Unknown arch type"); -} - -bool MachOLinkingContext::isThinObjectFile(StringRef path, Arch &arch) { - return mach_o::normalized::isThinObjectFile(path, arch); -} - -bool MachOLinkingContext::sliceFromFatFile(MemoryBufferRef mb, uint32_t &offset, - uint32_t &size) { - return mach_o::normalized::sliceFromFatFile(mb, _arch, offset, size); -} - -MachOLinkingContext::MachOLinkingContext() {} - -MachOLinkingContext::~MachOLinkingContext() { - // Atoms are allocated on BumpPtrAllocator's on File's. - // As we transfer atoms from one file to another, we need to clear all of the - // atoms before we remove any of the BumpPtrAllocator's. - auto &nodes = getNodes(); - for (unsigned i = 0, e = nodes.size(); i != e; ++i) { - FileNode *node = dyn_cast(nodes[i].get()); - if (!node) - continue; - File *file = node->getFile(); - file->clearAtoms(); - } -} - -void MachOLinkingContext::configure(HeaderFileType type, Arch arch, OS os, - uint32_t minOSVersion, - bool exportDynamicSymbols) { - _outputMachOType = type; - _arch = arch; - _os = os; - _osMinVersion = minOSVersion; - - // If min OS not specified on command line, use reasonable defaults. - // Note that we only do sensible defaults when emitting something other than - // object and preload. - if (_outputMachOType != llvm::MachO::MH_OBJECT && - _outputMachOType != llvm::MachO::MH_PRELOAD) { - if (minOSVersion == 0) { - switch (_arch) { - case arch_x86_64: - case arch_x86: - parsePackedVersion("10.8", _osMinVersion); - _os = MachOLinkingContext::OS::macOSX; - break; - case arch_armv6: - case arch_armv7: - case arch_armv7s: - case arch_arm64: - parsePackedVersion("7.0", _osMinVersion); - _os = MachOLinkingContext::OS::iOS; - break; - default: - break; - } - } - } - - switch (_outputMachOType) { - case llvm::MachO::MH_EXECUTE: - // If targeting newer OS, use _main - if (minOS("10.8", "6.0")) { - _entrySymbolName = "_main"; - } else { - // If targeting older OS, use start (in crt1.o) - _entrySymbolName = "start"; - } - - // __PAGEZERO defaults to 4GB on 64-bit (except for PP64 which lld does not - // support) and 4KB on 32-bit. - if (is64Bit(_arch)) { - _pageZeroSize = 0x100000000; - } else { - _pageZeroSize = 0x1000; - } - - // Initial base address is __PAGEZERO size. - _baseAddress = _pageZeroSize; - - // Make PIE by default when targetting newer OSs. - switch (os) { - case OS::macOSX: - if (minOSVersion >= 0x000A0700) // MacOSX 10.7 - _pie = true; - break; - case OS::iOS: - if (minOSVersion >= 0x00040300) // iOS 4.3 - _pie = true; - break; - case OS::iOS_simulator: - _pie = true; - break; - case OS::unknown: - break; - } - setGlobalsAreDeadStripRoots(exportDynamicSymbols); - break; - case llvm::MachO::MH_DYLIB: - setGlobalsAreDeadStripRoots(exportDynamicSymbols); - break; - case llvm::MachO::MH_BUNDLE: - break; - case llvm::MachO::MH_OBJECT: - _printRemainingUndefines = false; - _allowRemainingUndefines = true; - break; - default: - break; - } - - // Set default segment page sizes based on arch. - if (arch == arch_arm64) - _pageSize = 4*4096; -} - -uint32_t MachOLinkingContext::getCPUType() const { - return cpuTypeFromArch(_arch); -} - -uint32_t MachOLinkingContext::getCPUSubType() const { - return cpuSubtypeFromArch(_arch); -} - -bool MachOLinkingContext::is64Bit(Arch arch) { - for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { - if (info->arch == arch) { - return (info->cputype & CPU_ARCH_ABI64); - } - } - // unknown archs are not 64-bit. - return false; -} - -bool MachOLinkingContext::isHostEndian(Arch arch) { - assert(arch != arch_unknown); - for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { - if (info->arch == arch) { - return (info->littleEndian == llvm::sys::IsLittleEndianHost); - } - } - llvm_unreachable("Unknown arch type"); -} - -bool MachOLinkingContext::isBigEndian(Arch arch) { - assert(arch != arch_unknown); - for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { - if (info->arch == arch) { - return ! info->littleEndian; - } - } - llvm_unreachable("Unknown arch type"); -} - -bool MachOLinkingContext::is64Bit() const { - return is64Bit(_arch); -} - -bool MachOLinkingContext::outputTypeHasEntry() const { - switch (_outputMachOType) { - case MH_EXECUTE: - case MH_DYLINKER: - case MH_PRELOAD: - return true; - default: - return false; - } -} - -bool MachOLinkingContext::needsStubsPass() const { - switch (_outputMachOType) { - case MH_EXECUTE: - return !_outputMachOTypeStatic; - case MH_DYLIB: - case MH_BUNDLE: - return true; - default: - return false; - } -} - -bool MachOLinkingContext::needsGOTPass() const { - // GOT pass not used in -r mode. - if (_outputMachOType == MH_OBJECT) - return false; - // Only some arches use GOT pass. - switch (_arch) { - case arch_x86_64: - case arch_arm64: - return true; - default: - return false; - } -} - -bool MachOLinkingContext::needsCompactUnwindPass() const { - switch (_outputMachOType) { - case MH_EXECUTE: - case MH_DYLIB: - case MH_BUNDLE: - return archHandler().needsCompactUnwind(); - default: - return false; - } -} - -bool MachOLinkingContext::needsObjCPass() const { - // ObjC pass is only needed if any of the inputs were ObjC. - return _objcConstraint != objc_unknown; -} - -bool MachOLinkingContext::needsShimPass() const { - // Shim pass only used in final executables. - if (_outputMachOType == MH_OBJECT) - return false; - // Only 32-bit arm arches use Shim pass. - switch (_arch) { - case arch_armv6: - case arch_armv7: - case arch_armv7s: - return true; - default: - return false; - } -} - -bool MachOLinkingContext::needsTLVPass() const { - switch (_outputMachOType) { - case MH_BUNDLE: - case MH_EXECUTE: - case MH_DYLIB: - return true; - default: - return false; - } -} - -StringRef MachOLinkingContext::binderSymbolName() const { - return archHandler().stubInfo().binderSymbolName; -} - -bool MachOLinkingContext::minOS(StringRef mac, StringRef iOS) const { - uint32_t parsedVersion; - switch (_os) { - case OS::macOSX: - if (parsePackedVersion(mac, parsedVersion)) - return false; - return _osMinVersion >= parsedVersion; - case OS::iOS: - case OS::iOS_simulator: - if (parsePackedVersion(iOS, parsedVersion)) - return false; - return _osMinVersion >= parsedVersion; - case OS::unknown: - // If we don't know the target, then assume that we don't meet the min OS. - // This matches the ld64 behaviour - return false; - } - llvm_unreachable("invalid OS enum"); -} - -bool MachOLinkingContext::addEntryPointLoadCommand() const { - if ((_outputMachOType == MH_EXECUTE) && !_outputMachOTypeStatic) { - return minOS("10.8", "6.0"); - } - return false; -} - -bool MachOLinkingContext::addUnixThreadLoadCommand() const { - switch (_outputMachOType) { - case MH_EXECUTE: - if (_outputMachOTypeStatic) - return true; - else - return !minOS("10.8", "6.0"); - break; - case MH_DYLINKER: - case MH_PRELOAD: - return true; - default: - return false; - } -} - -bool MachOLinkingContext::pathExists(StringRef path) const { - if (!_testingFileUsage) - return llvm::sys::fs::exists(path.str()); - - // Otherwise, we're in test mode: only files explicitly provided on the - // command-line exist. - std::string key = path.str(); - std::replace(key.begin(), key.end(), '\\', '/'); - return _existingPaths.find(key) != _existingPaths.end(); -} - -bool MachOLinkingContext::fileExists(StringRef path) const { - bool found = pathExists(path); - // Log search misses. - if (!found) - addInputFileNotFound(path); - - // When testing, file is never opened, so logging is done here. - if (_testingFileUsage && found) - addInputFileDependency(path); - - return found; -} - -void MachOLinkingContext::setSysLibRoots(const StringRefVector &paths) { - _syslibRoots = paths; -} - -void MachOLinkingContext::addRpath(StringRef rpath) { - _rpaths.push_back(rpath); -} - -void MachOLinkingContext::addModifiedSearchDir(StringRef libPath, - bool isSystemPath) { - bool addedModifiedPath = false; - - // -syslibroot only applies to absolute paths. - if (libPath.startswith("/")) { - for (auto syslibRoot : _syslibRoots) { - SmallString<256> path(syslibRoot); - llvm::sys::path::append(path, libPath); - if (pathExists(path)) { - _searchDirs.push_back(path.str().copy(_allocator)); - addedModifiedPath = true; - } - } - } - - if (addedModifiedPath) - return; - - // Finally, if only one -syslibroot is given, system paths which aren't in it - // get suppressed. - if (_syslibRoots.size() != 1 || !isSystemPath) { - if (pathExists(libPath)) { - _searchDirs.push_back(libPath); - } - } -} - -void MachOLinkingContext::addFrameworkSearchDir(StringRef fwPath, - bool isSystemPath) { - bool pathAdded = false; - - // -syslibroot only used with to absolute framework search paths. - if (fwPath.startswith("/")) { - for (auto syslibRoot : _syslibRoots) { - SmallString<256> path(syslibRoot); - llvm::sys::path::append(path, fwPath); - if (pathExists(path)) { - _frameworkDirs.push_back(path.str().copy(_allocator)); - pathAdded = true; - } - } - } - // If fwPath found in any -syslibroot, then done. - if (pathAdded) - return; - - // If only one -syslibroot, system paths not in that SDK are suppressed. - if (isSystemPath && (_syslibRoots.size() == 1)) - return; - - // Only use raw fwPath if that directory exists. - if (pathExists(fwPath)) - _frameworkDirs.push_back(fwPath); -} - -llvm::Optional -MachOLinkingContext::searchDirForLibrary(StringRef path, - StringRef libName) const { - SmallString<256> fullPath; - if (libName.endswith(".o")) { - // A request ending in .o is special: just search for the file directly. - fullPath.assign(path); - llvm::sys::path::append(fullPath, libName); - if (fileExists(fullPath)) - return fullPath.str().copy(_allocator); - return llvm::None; - } - - // Search for stub library - fullPath.assign(path); - llvm::sys::path::append(fullPath, Twine("lib") + libName + ".tbd"); - if (fileExists(fullPath)) - return fullPath.str().copy(_allocator); - - // Search for dynamic library - fullPath.assign(path); - llvm::sys::path::append(fullPath, Twine("lib") + libName + ".dylib"); - if (fileExists(fullPath)) - return fullPath.str().copy(_allocator); - - // If not, try for a static library - fullPath.assign(path); - llvm::sys::path::append(fullPath, Twine("lib") + libName + ".a"); - if (fileExists(fullPath)) - return fullPath.str().copy(_allocator); - - return llvm::None; -} - -llvm::Optional -MachOLinkingContext::searchLibrary(StringRef libName) const { - SmallString<256> path; - for (StringRef dir : searchDirs()) { - llvm::Optional searchDir = searchDirForLibrary(dir, libName); - if (searchDir) - return searchDir; - } - - return llvm::None; -} - -llvm::Optional -MachOLinkingContext::findPathForFramework(StringRef fwName) const{ - SmallString<256> fullPath; - for (StringRef dir : frameworkDirs()) { - fullPath.assign(dir); - llvm::sys::path::append(fullPath, Twine(fwName) + ".framework", fwName); - if (fileExists(fullPath)) - return fullPath.str().copy(_allocator); - } - - return llvm::None; -} - -bool MachOLinkingContext::validateImpl() { - // TODO: if -arch not specified, look at arch of first .o file. - - if (_currentVersion && _outputMachOType != MH_DYLIB) { - error("-current_version can only be used with dylibs"); - return false; - } - - if (_compatibilityVersion && _outputMachOType != MH_DYLIB) { - error("-compatibility_version can only be used with dylibs"); - return false; - } - - if (_deadStrippableDylib && _outputMachOType != MH_DYLIB) { - error("-mark_dead_strippable_dylib can only be used with dylibs"); - return false; - } - - if (!_bundleLoader.empty() && outputMachOType() != MH_BUNDLE) { - error("-bundle_loader can only be used with Mach-O bundles"); - return false; - } - - // If -exported_symbols_list used, all exported symbols must be defined. - if (_exportMode == ExportMode::exported) { - for (const auto &symbol : _exportedSymbols) - addInitialUndefinedSymbol(symbol.getKey()); - } - - // If -dead_strip, set up initial live symbols. - if (deadStrip()) { - // Entry point is live. - if (outputTypeHasEntry()) - addDeadStripRoot(entrySymbolName()); - // Lazy binding helper is live. - if (needsStubsPass()) - addDeadStripRoot(binderSymbolName()); - // If using -exported_symbols_list, make all exported symbols live. - if (_exportMode == ExportMode::exported) { - setGlobalsAreDeadStripRoots(false); - for (const auto &symbol : _exportedSymbols) - addDeadStripRoot(symbol.getKey()); - } - } - - addOutputFileDependency(outputPath()); - - return true; -} - -void MachOLinkingContext::addPasses(PassManager &pm) { - // objc pass should be before layout pass. Otherwise test cases may contain - // no atoms which confuses the layout pass. - if (needsObjCPass()) - mach_o::addObjCPass(pm, *this); - mach_o::addLayoutPass(pm, *this); - if (needsStubsPass()) - mach_o::addStubsPass(pm, *this); - if (needsCompactUnwindPass()) - mach_o::addCompactUnwindPass(pm, *this); - if (needsGOTPass()) - mach_o::addGOTPass(pm, *this); - if (needsTLVPass()) - mach_o::addTLVPass(pm, *this); - if (needsShimPass()) - mach_o::addShimPass(pm, *this); // Shim pass must run after stubs pass. -} - -Writer &MachOLinkingContext::writer() const { - if (!_writer) - _writer = createWriterMachO(*this); - return *_writer; -} - -ErrorOr> -MachOLinkingContext::getMemoryBuffer(StringRef path) { - addInputFileDependency(path); - - ErrorOr> mbOrErr = - MemoryBuffer::getFileOrSTDIN(path); - if (std::error_code ec = mbOrErr.getError()) - return ec; - std::unique_ptr mb = std::move(mbOrErr.get()); - - // If buffer contains a fat file, find required arch in fat buffer - // and switch buffer to point to just that required slice. - uint32_t offset; - uint32_t size; - if (sliceFromFatFile(mb->getMemBufferRef(), offset, size)) - return MemoryBuffer::getFileSlice(path, size, offset); - return std::move(mb); -} - -MachODylibFile* MachOLinkingContext::loadIndirectDylib(StringRef path) { - ErrorOr> mbOrErr = getMemoryBuffer(path); - if (mbOrErr.getError()) - return nullptr; - - ErrorOr> fileOrErr = - registry().loadFile(std::move(mbOrErr.get())); - if (!fileOrErr) - return nullptr; - std::unique_ptr &file = fileOrErr.get(); - file->parse(); - MachODylibFile *result = reinterpret_cast(file.get()); - // Node object now owned by _indirectDylibs vector. - _indirectDylibs.push_back(std::move(file)); - return result; -} - -MachODylibFile* MachOLinkingContext::findIndirectDylib(StringRef path) { - // See if already loaded. - auto pos = _pathToDylibMap.find(path); - if (pos != _pathToDylibMap.end()) - return pos->second; - - // Search -L paths if of the form "libXXX.dylib" - std::pair split = path.rsplit('/'); - StringRef leafName = split.second; - if (leafName.startswith("lib") && leafName.endswith(".dylib")) { - // FIXME: Need to enhance searchLibrary() to only look for .dylib - auto libPath = searchLibrary(leafName); - if (libPath) - return loadIndirectDylib(libPath.getValue()); - } - - // Try full path with sysroot. - for (StringRef sysPath : _syslibRoots) { - SmallString<256> fullPath; - fullPath.assign(sysPath); - llvm::sys::path::append(fullPath, path); - if (pathExists(fullPath)) - return loadIndirectDylib(fullPath); - } - - // Try full path. - if (pathExists(path)) { - return loadIndirectDylib(path); - } - - return nullptr; -} - -uint32_t MachOLinkingContext::dylibCurrentVersion(StringRef installName) const { - auto pos = _pathToDylibMap.find(installName); - if (pos != _pathToDylibMap.end()) - return pos->second->currentVersion(); - else - return 0x10000; // 1.0 -} - -uint32_t MachOLinkingContext::dylibCompatVersion(StringRef installName) const { - auto pos = _pathToDylibMap.find(installName); - if (pos != _pathToDylibMap.end()) - return pos->second->compatVersion(); - else - return 0x10000; // 1.0 -} - -void MachOLinkingContext::createImplicitFiles( - std::vector > &result) { - // Add indirect dylibs by asking each linked dylib to add its indirects. - // Iterate until no more dylibs get loaded. - size_t dylibCount = 0; - while (dylibCount != _allDylibs.size()) { - dylibCount = _allDylibs.size(); - for (MachODylibFile *dylib : _allDylibs) { - dylib->loadReExportedDylibs([this] (StringRef path) -> MachODylibFile* { - return findIndirectDylib(path); }); - } - } - - // Let writer add output type specific extras. - writer().createImplicitFiles(result); - - // If undefinedMode is != error, add a FlatNamespaceFile instance. This will - // provide a SharedLibraryAtom for symbols that aren't defined elsewhere. - if (undefinedMode() != UndefinedMode::error) { - result.emplace_back(new mach_o::FlatNamespaceFile(*this)); - _flatNamespaceFile = result.back().get(); - } -} - -void MachOLinkingContext::registerDylib(MachODylibFile *dylib, - bool upward) const { - std::lock_guard lock(_dylibsMutex); - - if (!llvm::count(_allDylibs, dylib)) - _allDylibs.push_back(dylib); - _pathToDylibMap[dylib->installName()] = dylib; - // If path is different than install name, register path too. - if (!dylib->path().equals(dylib->installName())) - _pathToDylibMap[dylib->path()] = dylib; - if (upward) - _upwardDylibs.insert(dylib); -} - -bool MachOLinkingContext::isUpwardDylib(StringRef installName) const { - for (MachODylibFile *dylib : _upwardDylibs) { - if (dylib->installName().equals(installName)) - return true; - } - return false; -} - -ArchHandler &MachOLinkingContext::archHandler() const { - if (!_archHandler) - _archHandler = ArchHandler::create(_arch); - return *_archHandler; -} - -void MachOLinkingContext::addSectionAlignment(StringRef seg, StringRef sect, - uint16_t align) { - SectionAlign entry = { seg, sect, align }; - _sectAligns.push_back(entry); -} - -void MachOLinkingContext::addSectCreateSection( - StringRef seg, StringRef sect, - std::unique_ptr content) { - - if (!_sectCreateFile) { - auto sectCreateFile = std::make_unique(); - _sectCreateFile = sectCreateFile.get(); - getNodes().push_back(std::make_unique(std::move(sectCreateFile))); - } - - assert(_sectCreateFile && "sectcreate file does not exist."); - _sectCreateFile->addSection(seg, sect, std::move(content)); -} - -bool MachOLinkingContext::sectionAligned(StringRef seg, StringRef sect, - uint16_t &align) const { - for (const SectionAlign &entry : _sectAligns) { - if (seg.equals(entry.segmentName) && sect.equals(entry.sectionName)) { - align = entry.align; - return true; - } - } - return false; -} - -void MachOLinkingContext::addExportSymbol(StringRef sym) { - // Support old crufty export lists with bogus entries. - if (sym.endswith(".eh") || sym.startswith(".objc_category_name_")) { - llvm::errs() << "warning: ignoring " << sym << " in export list\n"; - return; - } - // Only i386 MacOSX uses old ABI, so don't change those. - if ((_os != OS::macOSX) || (_arch != arch_x86)) { - // ObjC has two different ABIs. Be nice and allow one export list work for - // both ABIs by renaming symbols. - if (sym.startswith(".objc_class_name_")) { - std::string abi2className("_OBJC_CLASS_$_"); - abi2className += sym.substr(17); - _exportedSymbols.insert(copy(abi2className)); - std::string abi2metaclassName("_OBJC_METACLASS_$_"); - abi2metaclassName += sym.substr(17); - _exportedSymbols.insert(copy(abi2metaclassName)); - return; - } - } - - // FIXME: Support wildcards. - _exportedSymbols.insert(sym); -} - -bool MachOLinkingContext::exportSymbolNamed(StringRef sym) const { - switch (_exportMode) { - case ExportMode::globals: - llvm_unreachable("exportSymbolNamed() should not be called in this mode"); - break; - case ExportMode::exported: - return _exportedSymbols.count(sym); - case ExportMode::unexported: - return !_exportedSymbols.count(sym); - } - llvm_unreachable("_exportMode unknown enum value"); -} - -std::string MachOLinkingContext::demangle(StringRef symbolName) const { - // Only try to demangle symbols if -demangle on command line - if (!demangleSymbols()) - return std::string(symbolName); - - // Only try to demangle symbols that look like C++ symbols - if (!symbolName.startswith("__Z")) - return std::string(symbolName); - - SmallString<256> symBuff; - StringRef nullTermSym = Twine(symbolName).toNullTerminatedStringRef(symBuff); - // Mach-O has extra leading underscore that needs to be removed. - const char *cstr = nullTermSym.data() + 1; - int status; - char *demangled = llvm::itaniumDemangle(cstr, nullptr, nullptr, &status); - if (demangled) { - std::string result(demangled); - // __cxa_demangle() always uses a malloc'ed buffer to return the result. - free(demangled); - return result; - } - - return std::string(symbolName); -} - -static void addDependencyInfoHelper(llvm::raw_fd_ostream *DepInfo, - char Opcode, StringRef Path) { - if (!DepInfo) - return; - - *DepInfo << Opcode; - *DepInfo << Path; - *DepInfo << '\0'; -} - -std::error_code MachOLinkingContext::createDependencyFile(StringRef path) { - std::error_code ec; - _dependencyInfo = std::unique_ptr( - new llvm::raw_fd_ostream(path, ec, llvm::sys::fs::OF_None)); - if (ec) { - _dependencyInfo.reset(); - return ec; - } - - addDependencyInfoHelper(_dependencyInfo.get(), 0x00, "lld" /*FIXME*/); - return std::error_code(); -} - -void MachOLinkingContext::addInputFileDependency(StringRef path) const { - addDependencyInfoHelper(_dependencyInfo.get(), 0x10, path); -} - -void MachOLinkingContext::addInputFileNotFound(StringRef path) const { - addDependencyInfoHelper(_dependencyInfo.get(), 0x11, path); -} - -void MachOLinkingContext::addOutputFileDependency(StringRef path) const { - addDependencyInfoHelper(_dependencyInfo.get(), 0x40, path); -} - -void MachOLinkingContext::appendOrderedSymbol(StringRef symbol, - StringRef filename) { - // To support sorting static functions which may have the same name in - // multiple .o files, _orderFiles maps the symbol name to a vector - // of OrderFileNode each of which can specify a file prefix. - OrderFileNode info; - if (!filename.empty()) - info.fileFilter = copy(filename); - info.order = _orderFileEntries++; - _orderFiles[symbol].push_back(info); -} - -bool -MachOLinkingContext::findOrderOrdinal(const std::vector &nodes, - const DefinedAtom *atom, - unsigned &ordinal) { - const File *objFile = &atom->file(); - assert(objFile); - StringRef objName = objFile->path(); - std::pair dirAndLeaf = objName.rsplit('/'); - if (!dirAndLeaf.second.empty()) - objName = dirAndLeaf.second; - for (const OrderFileNode &info : nodes) { - if (info.fileFilter.empty()) { - // Have unprefixed symbol name in order file that matches this atom. - ordinal = info.order; - return true; - } - if (info.fileFilter.equals(objName)) { - // Have prefixed symbol name in order file that matches atom's path. - ordinal = info.order; - return true; - } - } - return false; -} - -bool MachOLinkingContext::customAtomOrderer(const DefinedAtom *left, - const DefinedAtom *right, - bool &leftBeforeRight) const { - // No custom sorting if no order file entries. - if (!_orderFileEntries) - return false; - - // Order files can only order named atoms. - StringRef leftName = left->name(); - StringRef rightName = right->name(); - if (leftName.empty() || rightName.empty()) - return false; - - // If neither is in order file list, no custom sorter. - auto leftPos = _orderFiles.find(leftName); - auto rightPos = _orderFiles.find(rightName); - bool leftIsOrdered = (leftPos != _orderFiles.end()); - bool rightIsOrdered = (rightPos != _orderFiles.end()); - if (!leftIsOrdered && !rightIsOrdered) - return false; - - // There could be multiple symbols with same name but different file prefixes. - unsigned leftOrder; - unsigned rightOrder; - bool foundLeft = - leftIsOrdered && findOrderOrdinal(leftPos->getValue(), left, leftOrder); - bool foundRight = rightIsOrdered && - findOrderOrdinal(rightPos->getValue(), right, rightOrder); - if (!foundLeft && !foundRight) - return false; - - // If only one is in order file list, ordered one goes first. - if (foundLeft != foundRight) - leftBeforeRight = foundLeft; - else - leftBeforeRight = (leftOrder < rightOrder); - - return true; -} - -static bool isLibrary(const std::unique_ptr &elem) { - if (FileNode *node = dyn_cast(const_cast(elem.get()))) { - File *file = node->getFile(); - return isa(file) || isa(file); - } - return false; -} - -// The darwin linker processes input files in two phases. The first phase -// links in all object (.o) files in command line order. The second phase -// links in libraries in command line order. -// In this function we reorder the input files so that all the object files -// comes before any library file. We also make a group for the library files -// so that the Resolver will reiterate over the libraries as long as we find -// new undefines from libraries. -void MachOLinkingContext::finalizeInputFiles() { - std::vector> &elements = getNodes(); - llvm::stable_sort(elements, [](const std::unique_ptr &a, - const std::unique_ptr &b) { - return !isLibrary(a) && isLibrary(b); - }); - size_t numLibs = std::count_if(elements.begin(), elements.end(), isLibrary); - elements.push_back(std::make_unique(numLibs)); -} - -llvm::Error MachOLinkingContext::handleLoadedFile(File &file) { - auto *machoFile = dyn_cast(&file); - if (!machoFile) - return llvm::Error::success(); - - // Check that the arch of the context matches that of the file. - // Also set the arch of the context if it didn't have one. - if (_arch == arch_unknown) { - _arch = machoFile->arch(); - } else if (machoFile->arch() != arch_unknown && machoFile->arch() != _arch) { - // Archs are different. - return llvm::make_error(file.path() + - Twine(" cannot be linked due to incompatible architecture")); - } - - // Check that the OS of the context matches that of the file. - // Also set the OS of the context if it didn't have one. - if (_os == OS::unknown) { - _os = machoFile->OS(); - } else if (machoFile->OS() != OS::unknown && machoFile->OS() != _os) { - // OSes are different. - return llvm::make_error(file.path() + - Twine(" cannot be linked due to incompatible operating systems")); - } - - // Check that if the objc info exists, that it is compatible with the target - // OS. - switch (machoFile->objcConstraint()) { - case objc_unknown: - // The file is not compiled with objc, so skip the checks. - break; - case objc_gc_only: - case objc_supports_gc: - llvm_unreachable("GC support should already have thrown an error"); - case objc_retainReleaseForSimulator: - // The file is built with simulator objc, so make sure that the context - // is also building with simulator support. - if (_os != OS::iOS_simulator) - return llvm::make_error(file.path() + - Twine(" cannot be linked. It contains ObjC built for the simulator" - " while we are linking a non-simulator target")); - assert((_objcConstraint == objc_unknown || - _objcConstraint == objc_retainReleaseForSimulator) && - "Must be linking with retain/release for the simulator"); - _objcConstraint = objc_retainReleaseForSimulator; - break; - case objc_retainRelease: - // The file is built without simulator objc, so make sure that the - // context is also building without simulator support. - if (_os == OS::iOS_simulator) - return llvm::make_error(file.path() + - Twine(" cannot be linked. It contains ObjC built for a non-simulator" - " target while we are linking a simulator target")); - assert((_objcConstraint == objc_unknown || - _objcConstraint == objc_retainRelease) && - "Must be linking with retain/release for a non-simulator target"); - _objcConstraint = objc_retainRelease; - break; - } - - // Check that the swift version of the context matches that of the file. - // Also set the swift version of the context if it didn't have one. - if (!_swiftVersion) { - _swiftVersion = machoFile->swiftVersion(); - } else if (machoFile->swiftVersion() && - machoFile->swiftVersion() != _swiftVersion) { - // Swift versions are different. - return llvm::make_error("different swift versions"); - } - - return llvm::Error::success(); -} - -} // end namespace lld diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h b/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h deleted file mode 100644 index 3ef2949addab..000000000000 --- a/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h +++ /dev/null @@ -1,336 +0,0 @@ -//===- lib/ReaderWriter/MachO/MachONormalizedFile.h -----------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// -/// \file These data structures comprise the "normalized" view of -/// mach-o object files. The normalized view is an in-memory only data structure -/// which is always in native endianness and pointer size. -/// -/// The normalized view easily converts to and from YAML using YAML I/O. -/// -/// The normalized view converts to and from binary mach-o object files using -/// the writeBinary() and readBinary() functions. -/// -/// The normalized view converts to and from lld::Atoms using the -/// normalizedToAtoms() and normalizedFromAtoms(). -/// -/// Overall, the conversion paths available look like: -/// -/// +---------------+ -/// | binary mach-o | -/// +---------------+ -/// ^ -/// | -/// v -/// +------------+ +------+ -/// | normalized | <-> | yaml | -/// +------------+ +------+ -/// ^ -/// | -/// v -/// +-------+ -/// | Atoms | -/// +-------+ -/// - -#ifndef LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H -#define LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H - -#include "DebugInfo.h" -#include "lld/Common/LLVM.h" -#include "lld/Core/Error.h" -#include "lld/ReaderWriter/MachOLinkingContext.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorOr.h" -#include "llvm/Support/YAMLTraits.h" - -using llvm::BumpPtrAllocator; -using llvm::yaml::Hex64; -using llvm::yaml::Hex32; -using llvm::yaml::Hex16; -using llvm::yaml::Hex8; -using llvm::yaml::SequenceTraits; -using llvm::MachO::HeaderFileType; -using llvm::MachO::BindType; -using llvm::MachO::RebaseType; -using llvm::MachO::NListType; -using llvm::MachO::RelocationInfoType; -using llvm::MachO::SectionType; -using llvm::MachO::LoadCommandType; -using llvm::MachO::ExportSymbolKind; -using llvm::MachO::DataRegionType; - -namespace lld { -namespace mach_o { -namespace normalized { - - -/// The real mach-o relocation record is 8-bytes on disk and is -/// encoded in one of two different bit-field patterns. This -/// normalized form has the union of all possible fields. -struct Relocation { - Relocation() : offset(0), scattered(false), - type(llvm::MachO::GENERIC_RELOC_VANILLA), - length(0), pcRel(false), isExtern(false), value(0), - symbol(0) { } - - Hex32 offset; - bool scattered; - RelocationInfoType type; - uint8_t length; - bool pcRel; - bool isExtern; - Hex32 value; - uint32_t symbol; -}; - -/// A typedef so that YAML I/O can treat this vector as a sequence. -typedef std::vector Relocations; - -/// A typedef so that YAML I/O can process the raw bytes in a section. -typedef std::vector ContentBytes; - -/// A typedef so that YAML I/O can treat indirect symbols as a flow sequence. -typedef std::vector IndirectSymbols; - -/// A typedef so that YAML I/O can encode/decode section attributes. -LLVM_YAML_STRONG_TYPEDEF(uint32_t, SectionAttr) - -/// A typedef so that YAML I/O can encode/decode section alignment. -LLVM_YAML_STRONG_TYPEDEF(uint16_t, SectionAlignment) - -/// Mach-O has a 32-bit and 64-bit section record. This normalized form -/// can support either kind. -struct Section { - Section() : type(llvm::MachO::S_REGULAR), - attributes(0), alignment(1), address(0) { } - - StringRef segmentName; - StringRef sectionName; - SectionType type; - SectionAttr attributes; - SectionAlignment alignment; - Hex64 address; - ArrayRef content; - Relocations relocations; - IndirectSymbols indirectSymbols; -}; - - -/// A typedef so that YAML I/O can encode/decode the scope bits of an nlist. -LLVM_YAML_STRONG_TYPEDEF(uint8_t, SymbolScope) - -/// A typedef so that YAML I/O can encode/decode the desc bits of an nlist. -LLVM_YAML_STRONG_TYPEDEF(uint16_t, SymbolDesc) - -/// Mach-O has a 32-bit and 64-bit symbol table entry (nlist), and the symbol -/// type and scope and mixed in the same n_type field. This normalized form -/// works for any pointer size and separates out the type and scope. -struct Symbol { - Symbol() : type(llvm::MachO::N_UNDF), scope(0), sect(0), desc(0), value(0) { } - - StringRef name; - NListType type; - SymbolScope scope; - uint8_t sect; - SymbolDesc desc; - Hex64 value; -}; - -/// Check whether the given section type indicates a zero-filled section. -// FIXME: Utility functions of this kind should probably be moved into -// llvm/Support. -inline bool isZeroFillSection(SectionType T) { - return (T == llvm::MachO::S_ZEROFILL || - T == llvm::MachO::S_THREAD_LOCAL_ZEROFILL); -} - -/// A typedef so that YAML I/O can (de/en)code the protection bits of a segment. -LLVM_YAML_STRONG_TYPEDEF(uint32_t, VMProtect) - -/// A typedef to hold verions X.Y.X packed into 32-bit xxxx.yy.zz -LLVM_YAML_STRONG_TYPEDEF(uint32_t, PackedVersion) - -/// Segments are only used in normalized final linked images (not in relocatable -/// object files). They specify how a range of the file is loaded. -struct Segment { - StringRef name; - Hex64 address; - Hex64 size; - VMProtect init_access; - VMProtect max_access; -}; - -/// Only used in normalized final linked images to specify on which dylibs -/// it depends. -struct DependentDylib { - StringRef path; - LoadCommandType kind; - PackedVersion compatVersion; - PackedVersion currentVersion; -}; - -/// A normalized rebasing entry. Only used in normalized final linked images. -struct RebaseLocation { - Hex32 segOffset; - uint8_t segIndex; - RebaseType kind; -}; - -/// A normalized binding entry. Only used in normalized final linked images. -struct BindLocation { - Hex32 segOffset; - uint8_t segIndex; - BindType kind; - bool canBeNull; - int ordinal; - StringRef symbolName; - Hex64 addend; -}; - -/// A typedef so that YAML I/O can encode/decode export flags. -LLVM_YAML_STRONG_TYPEDEF(uint32_t, ExportFlags) - -/// A normalized export entry. Only used in normalized final linked images. -struct Export { - StringRef name; - Hex64 offset; - ExportSymbolKind kind; - ExportFlags flags; - Hex32 otherOffset; - StringRef otherName; -}; - -/// A normalized data-in-code entry. -struct DataInCode { - Hex32 offset; - Hex16 length; - DataRegionType kind; -}; - -/// A typedef so that YAML I/O can encode/decode mach_header.flags. -LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags) - -/// -struct NormalizedFile { - MachOLinkingContext::Arch arch = MachOLinkingContext::arch_unknown; - HeaderFileType fileType = llvm::MachO::MH_OBJECT; - FileFlags flags = 0; - std::vector segments; // Not used in object files. - std::vector
sections; - - // Symbols sorted by kind. - std::vector localSymbols; - std::vector globalSymbols; - std::vector undefinedSymbols; - std::vector stabsSymbols; - - // Maps to load commands with no LINKEDIT content (final linked images only). - std::vector dependentDylibs; - StringRef installName; // dylibs only - PackedVersion compatVersion = 0; // dylibs only - PackedVersion currentVersion = 0; // dylibs only - bool hasUUID = false; - bool hasMinVersionLoadCommand = false; - bool generateDataInCodeLoadCommand = false; - std::vector rpaths; - Hex64 entryAddress = 0; - Hex64 stackSize = 0; - MachOLinkingContext::OS os = MachOLinkingContext::OS::unknown; - Hex64 sourceVersion = 0; - PackedVersion minOSverson = 0; - PackedVersion sdkVersion = 0; - LoadCommandType minOSVersionKind = (LoadCommandType)0; - - // Maps to load commands with LINKEDIT content (final linked images only). - Hex32 pageSize = 0; - std::vector rebasingInfo; - std::vector bindingInfo; - std::vector weakBindingInfo; - std::vector lazyBindingInfo; - std::vector exportInfo; - std::vector functionStarts; - std::vector dataInCode; - - // TODO: - // code-signature - // split-seg-info - // function-starts - - // For any allocations in this struct which need to be owned by this struct. - BumpPtrAllocator ownedAllocations; -}; - -/// Tests if a file is a non-fat mach-o object file. -bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch); - -/// If the buffer is a fat file with the request arch, then this function -/// returns true with 'offset' and 'size' set to location of the arch slice -/// within the buffer. Otherwise returns false; -bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch, - uint32_t &offset, uint32_t &size); - -/// Reads a mach-o file and produces an in-memory normalized view. -llvm::Expected> -readBinary(std::unique_ptr &mb, - const MachOLinkingContext::Arch arch); - -/// Takes in-memory normalized view and writes a mach-o object file. -llvm::Error writeBinary(const NormalizedFile &file, StringRef path); - -size_t headerAndLoadCommandsSize(const NormalizedFile &file, - bool includeFunctionStarts); - - -/// Parses a yaml encoded mach-o file to produce an in-memory normalized view. -llvm::Expected> -readYaml(std::unique_ptr &mb); - -/// Writes a yaml encoded mach-o files given an in-memory normalized view. -std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out); - -llvm::Error -normalizedObjectToAtoms(MachOFile *file, - const NormalizedFile &normalizedFile, - bool copyRefs); - -llvm::Error -normalizedDylibToAtoms(MachODylibFile *file, - const NormalizedFile &normalizedFile, - bool copyRefs); - -/// Takes in-memory normalized dylib or object and parses it into lld::File -llvm::Expected> -normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, - bool copyRefs); - -/// Takes atoms and generates a normalized macho-o view. -llvm::Expected> -normalizedFromAtoms(const lld::File &atomFile, const MachOLinkingContext &ctxt); - - -} // namespace normalized - -/// Class for interfacing mach-o yaml files into generic yaml parsing -class MachOYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler { -public: - MachOYamlIOTaggedDocumentHandler(MachOLinkingContext::Arch arch) - : _arch(arch) { } - bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override; -private: - const MachOLinkingContext::Arch _arch; -}; - -} // namespace mach_o -} // namespace lld - -#endif // LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp deleted file mode 100644 index 87601ca1be8b..000000000000 --- a/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp +++ /dev/null @@ -1,614 +0,0 @@ -//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp ---------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// -/// \file For mach-o object files, this implementation converts from -/// mach-o on-disk binary format to in-memory normalized mach-o. -/// -/// +---------------+ -/// | binary mach-o | -/// +---------------+ -/// | -/// | -/// v -/// +------------+ -/// | normalized | -/// +------------+ - -#include "ArchHandler.h" -#include "MachONormalizedFile.h" -#include "MachONormalizedFileBinaryUtils.h" -#include "lld/Common/LLVM.h" -#include "lld/Core/Error.h" -#include "lld/Core/SharedLibraryFile.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/BinaryFormat/Magic.h" -#include "llvm/Object/MachO.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileOutputBuffer.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include - -using namespace llvm::MachO; -using llvm::object::ExportEntry; -using llvm::file_magic; -using llvm::object::MachOObjectFile; - -namespace lld { -namespace mach_o { -namespace normalized { - -// Utility to call a lambda expression on each load command. -static llvm::Error forEachLoadCommand( - StringRef lcRange, unsigned lcCount, bool isBig, bool is64, - std::function func) { - const char* p = lcRange.begin(); - for (unsigned i=0; i < lcCount; ++i) { - const load_command *lc = reinterpret_cast(p); - load_command lcCopy; - const load_command *slc = lc; - if (isBig != llvm::sys::IsBigEndianHost) { - memcpy(&lcCopy, lc, sizeof(load_command)); - swapStruct(lcCopy); - slc = &lcCopy; - } - if ( (p + slc->cmdsize) > lcRange.end() ) - return llvm::make_error("Load command exceeds range"); - - if (func(slc->cmd, slc->cmdsize, p)) - return llvm::Error::success(); - - p += slc->cmdsize; - } - - return llvm::Error::success(); -} - -static std::error_code appendRelocations(Relocations &relocs, StringRef buffer, - bool bigEndian, - uint32_t reloff, uint32_t nreloc) { - if ((reloff + nreloc*8) > buffer.size()) - return make_error_code(llvm::errc::executable_format_error); - const any_relocation_info* relocsArray = - reinterpret_cast(buffer.begin()+reloff); - - for(uint32_t i=0; i < nreloc; ++i) { - relocs.push_back(unpackRelocation(relocsArray[i], bigEndian)); - } - return std::error_code(); -} - -static std::error_code -appendIndirectSymbols(IndirectSymbols &isyms, StringRef buffer, bool isBig, - uint32_t istOffset, uint32_t istCount, - uint32_t startIndex, uint32_t count) { - if ((istOffset + istCount*4) > buffer.size()) - return make_error_code(llvm::errc::executable_format_error); - if (startIndex+count > istCount) - return make_error_code(llvm::errc::executable_format_error); - const uint8_t *indirectSymbolArray = (const uint8_t *)buffer.data(); - - for(uint32_t i=0; i < count; ++i) { - isyms.push_back(read32( - indirectSymbolArray + (startIndex + i) * sizeof(uint32_t), isBig)); - } - return std::error_code(); -} - - -template static T readBigEndian(T t) { - if (llvm::sys::IsLittleEndianHost) - llvm::sys::swapByteOrder(t); - return t; -} - - -static bool isMachOHeader(const mach_header *mh, bool &is64, bool &isBig) { - switch (read32(&mh->magic, false)) { - case llvm::MachO::MH_MAGIC: - is64 = false; - isBig = false; - return true; - case llvm::MachO::MH_MAGIC_64: - is64 = true; - isBig = false; - return true; - case llvm::MachO::MH_CIGAM: - is64 = false; - isBig = true; - return true; - case llvm::MachO::MH_CIGAM_64: - is64 = true; - isBig = true; - return true; - default: - return false; - } -} - - -bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch) { - // Try opening and mapping file at path. - ErrorOr> b = MemoryBuffer::getFileOrSTDIN(path); - if (b.getError()) - return false; - - // If file length < 32 it is too small to be mach-o object file. - StringRef fileBuffer = b->get()->getBuffer(); - if (fileBuffer.size() < 32) - return false; - - // If file buffer does not start with MH_MAGIC (and variants), not obj file. - const mach_header *mh = reinterpret_cast( - fileBuffer.begin()); - bool is64, isBig; - if (!isMachOHeader(mh, is64, isBig)) - return false; - - // If not MH_OBJECT, not object file. - if (read32(&mh->filetype, isBig) != MH_OBJECT) - return false; - - // Lookup up arch from cpu/subtype pair. - arch = MachOLinkingContext::archFromCpuType( - read32(&mh->cputype, isBig), - read32(&mh->cpusubtype, isBig)); - return true; -} - -bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch, - uint32_t &offset, uint32_t &size) { - const char *start = mb.getBufferStart(); - const llvm::MachO::fat_header *fh = - reinterpret_cast(start); - if (readBigEndian(fh->magic) != llvm::MachO::FAT_MAGIC) - return false; - uint32_t nfat_arch = readBigEndian(fh->nfat_arch); - const fat_arch *fstart = - reinterpret_cast(start + sizeof(fat_header)); - const fat_arch *fend = - reinterpret_cast(start + sizeof(fat_header) + - sizeof(fat_arch) * nfat_arch); - const uint32_t reqCpuType = MachOLinkingContext::cpuTypeFromArch(arch); - const uint32_t reqCpuSubtype = MachOLinkingContext::cpuSubtypeFromArch(arch); - for (const fat_arch *fa = fstart; fa < fend; ++fa) { - if ((readBigEndian(fa->cputype) == reqCpuType) && - (readBigEndian(fa->cpusubtype) == reqCpuSubtype)) { - offset = readBigEndian(fa->offset); - size = readBigEndian(fa->size); - if ((offset + size) > mb.getBufferSize()) - return false; - return true; - } - } - return false; -} - -/// Reads a mach-o file and produces an in-memory normalized view. -llvm::Expected> -readBinary(std::unique_ptr &mb, - const MachOLinkingContext::Arch arch) { - // Make empty NormalizedFile. - std::unique_ptr f(new NormalizedFile()); - - const char *start = mb->getBufferStart(); - size_t objSize = mb->getBufferSize(); - const mach_header *mh = reinterpret_cast(start); - - uint32_t sliceOffset; - uint32_t sliceSize; - if (sliceFromFatFile(mb->getMemBufferRef(), arch, sliceOffset, sliceSize)) { - start = &start[sliceOffset]; - objSize = sliceSize; - mh = reinterpret_cast(start); - } - - // Determine endianness and pointer size for mach-o file. - bool is64, isBig; - if (!isMachOHeader(mh, is64, isBig)) - return llvm::make_error("File is not a mach-o"); - - // Endian swap header, if needed. - mach_header headerCopy; - const mach_header *smh = mh; - if (isBig != llvm::sys::IsBigEndianHost) { - memcpy(&headerCopy, mh, sizeof(mach_header)); - swapStruct(headerCopy); - smh = &headerCopy; - } - - // Validate head and load commands fit in buffer. - const uint32_t lcCount = smh->ncmds; - const char *lcStart = - start + (is64 ? sizeof(mach_header_64) : sizeof(mach_header)); - StringRef lcRange(lcStart, smh->sizeofcmds); - if (lcRange.end() > (start + objSize)) - return llvm::make_error("Load commands exceed file size"); - - // Get architecture from mach_header. - f->arch = MachOLinkingContext::archFromCpuType(smh->cputype, smh->cpusubtype); - if (f->arch != arch) { - return llvm::make_error( - Twine("file is wrong architecture. Expected " - "(" + MachOLinkingContext::nameFromArch(arch) - + ") found (" - + MachOLinkingContext::nameFromArch(f->arch) - + ")" )); - } - // Copy file type and flags - f->fileType = HeaderFileType(smh->filetype); - f->flags = smh->flags; - - - // Pre-scan load commands looking for indirect symbol table. - uint32_t indirectSymbolTableOffset = 0; - uint32_t indirectSymbolTableCount = 0; - auto ec = forEachLoadCommand(lcRange, lcCount, isBig, is64, - [&](uint32_t cmd, uint32_t size, - const char *lc) -> bool { - if (cmd == LC_DYSYMTAB) { - const dysymtab_command *d = reinterpret_cast(lc); - indirectSymbolTableOffset = read32(&d->indirectsymoff, isBig); - indirectSymbolTableCount = read32(&d->nindirectsyms, isBig); - return true; - } - return false; - }); - if (ec) - return std::move(ec); - - // Walk load commands looking for segments/sections and the symbol table. - const data_in_code_entry *dataInCode = nullptr; - const dyld_info_command *dyldInfo = nullptr; - uint32_t dataInCodeSize = 0; - ec = forEachLoadCommand(lcRange, lcCount, isBig, is64, - [&] (uint32_t cmd, uint32_t size, const char* lc) -> bool { - switch(cmd) { - case LC_SEGMENT_64: - if (is64) { - const segment_command_64 *seg = - reinterpret_cast(lc); - const unsigned sectionCount = read32(&seg->nsects, isBig); - const section_64 *sects = reinterpret_cast - (lc + sizeof(segment_command_64)); - const unsigned lcSize = sizeof(segment_command_64) - + sectionCount*sizeof(section_64); - // Verify sections don't extend beyond end of segment load command. - if (lcSize > size) - return true; - for (unsigned i=0; i < sectionCount; ++i) { - const section_64 *sect = §s[i]; - Section section; - section.segmentName = getString16(sect->segname); - section.sectionName = getString16(sect->sectname); - section.type = (SectionType)(read32(§->flags, isBig) & - SECTION_TYPE); - section.attributes = read32(§->flags, isBig) & SECTION_ATTRIBUTES; - section.alignment = 1 << read32(§->align, isBig); - section.address = read64(§->addr, isBig); - const uint8_t *content = - (const uint8_t *)start + read32(§->offset, isBig); - size_t contentSize = read64(§->size, isBig); - // Note: this assign() is copying the content bytes. Ideally, - // we can use a custom allocator for vector to avoid the copy. - section.content = llvm::makeArrayRef(content, contentSize); - appendRelocations(section.relocations, mb->getBuffer(), isBig, - read32(§->reloff, isBig), - read32(§->nreloc, isBig)); - if (section.type == S_NON_LAZY_SYMBOL_POINTERS) { - appendIndirectSymbols(section.indirectSymbols, mb->getBuffer(), - isBig, - indirectSymbolTableOffset, - indirectSymbolTableCount, - read32(§->reserved1, isBig), - contentSize/4); - } - f->sections.push_back(section); - } - } - break; - case LC_SEGMENT: - if (!is64) { - const segment_command *seg = - reinterpret_cast(lc); - const unsigned sectionCount = read32(&seg->nsects, isBig); - const section *sects = reinterpret_cast - (lc + sizeof(segment_command)); - const unsigned lcSize = sizeof(segment_command) - + sectionCount*sizeof(section); - // Verify sections don't extend beyond end of segment load command. - if (lcSize > size) - return true; - for (unsigned i=0; i < sectionCount; ++i) { - const section *sect = §s[i]; - Section section; - section.segmentName = getString16(sect->segname); - section.sectionName = getString16(sect->sectname); - section.type = (SectionType)(read32(§->flags, isBig) & - SECTION_TYPE); - section.attributes = - read32((const uint8_t *)§->flags, isBig) & SECTION_ATTRIBUTES; - section.alignment = 1 << read32(§->align, isBig); - section.address = read32(§->addr, isBig); - const uint8_t *content = - (const uint8_t *)start + read32(§->offset, isBig); - size_t contentSize = read32(§->size, isBig); - // Note: this assign() is copying the content bytes. Ideally, - // we can use a custom allocator for vector to avoid the copy. - section.content = llvm::makeArrayRef(content, contentSize); - appendRelocations(section.relocations, mb->getBuffer(), isBig, - read32(§->reloff, isBig), - read32(§->nreloc, isBig)); - if (section.type == S_NON_LAZY_SYMBOL_POINTERS) { - appendIndirectSymbols( - section.indirectSymbols, mb->getBuffer(), isBig, - indirectSymbolTableOffset, indirectSymbolTableCount, - read32(§->reserved1, isBig), contentSize / 4); - } - f->sections.push_back(section); - } - } - break; - case LC_SYMTAB: { - const symtab_command *st = reinterpret_cast(lc); - const char *strings = start + read32(&st->stroff, isBig); - const uint32_t strSize = read32(&st->strsize, isBig); - // Validate string pool and symbol table all in buffer. - if (read32((const uint8_t *)&st->stroff, isBig) + - read32((const uint8_t *)&st->strsize, isBig) > - objSize) - return true; - if (is64) { - const uint32_t symOffset = read32(&st->symoff, isBig); - const uint32_t symCount = read32(&st->nsyms, isBig); - if ( symOffset+(symCount*sizeof(nlist_64)) > objSize) - return true; - const nlist_64 *symbols = - reinterpret_cast(start + symOffset); - // Convert each nlist_64 to a lld::mach_o::normalized::Symbol. - for(uint32_t i=0; i < symCount; ++i) { - nlist_64 tempSym; - memcpy(&tempSym, &symbols[i], sizeof(nlist_64)); - const nlist_64 *sin = &tempSym; - if (isBig != llvm::sys::IsBigEndianHost) - swapStruct(tempSym); - Symbol sout; - if (sin->n_strx > strSize) - return true; - sout.name = &strings[sin->n_strx]; - sout.type = static_cast(sin->n_type & (N_STAB|N_TYPE)); - sout.scope = (sin->n_type & (N_PEXT|N_EXT)); - sout.sect = sin->n_sect; - sout.desc = sin->n_desc; - sout.value = sin->n_value; - if (sin->n_type & N_STAB) - f->stabsSymbols.push_back(sout); - else if (sout.type == N_UNDF) - f->undefinedSymbols.push_back(sout); - else if (sin->n_type & N_EXT) - f->globalSymbols.push_back(sout); - else - f->localSymbols.push_back(sout); - } - } else { - const uint32_t symOffset = read32(&st->symoff, isBig); - const uint32_t symCount = read32(&st->nsyms, isBig); - if ( symOffset+(symCount*sizeof(nlist)) > objSize) - return true; - const nlist *symbols = - reinterpret_cast(start + symOffset); - // Convert each nlist to a lld::mach_o::normalized::Symbol. - for(uint32_t i=0; i < symCount; ++i) { - const nlist *sin = &symbols[i]; - nlist tempSym; - if (isBig != llvm::sys::IsBigEndianHost) { - tempSym = *sin; swapStruct(tempSym); sin = &tempSym; - } - Symbol sout; - if (sin->n_strx > strSize) - return true; - sout.name = &strings[sin->n_strx]; - sout.type = (NListType)(sin->n_type & N_TYPE); - sout.scope = (sin->n_type & (N_PEXT|N_EXT)); - sout.sect = sin->n_sect; - sout.desc = sin->n_desc; - sout.value = sin->n_value; - if (sout.type == N_UNDF) - f->undefinedSymbols.push_back(sout); - else if (sout.scope == (SymbolScope)N_EXT) - f->globalSymbols.push_back(sout); - else if (sin->n_type & N_STAB) - f->stabsSymbols.push_back(sout); - else - f->localSymbols.push_back(sout); - } - } - } - break; - case LC_ID_DYLIB: { - const dylib_command *dl = reinterpret_cast(lc); - f->installName = lc + read32(&dl->dylib.name, isBig); - f->currentVersion = read32(&dl->dylib.current_version, isBig); - f->compatVersion = read32(&dl->dylib.compatibility_version, isBig); - } - break; - case LC_DATA_IN_CODE: { - const linkedit_data_command *ldc = - reinterpret_cast(lc); - dataInCode = reinterpret_cast( - start + read32(&ldc->dataoff, isBig)); - dataInCodeSize = read32(&ldc->datasize, isBig); - } - break; - case LC_LOAD_DYLIB: - case LC_LOAD_WEAK_DYLIB: - case LC_REEXPORT_DYLIB: - case LC_LOAD_UPWARD_DYLIB: { - const dylib_command *dl = reinterpret_cast(lc); - DependentDylib entry; - entry.path = lc + read32(&dl->dylib.name, isBig); - entry.kind = LoadCommandType(cmd); - entry.compatVersion = read32(&dl->dylib.compatibility_version, isBig); - entry.currentVersion = read32(&dl->dylib.current_version, isBig); - f->dependentDylibs.push_back(entry); - } - break; - case LC_RPATH: { - const rpath_command *rpc = reinterpret_cast(lc); - f->rpaths.push_back(lc + read32(&rpc->path, isBig)); - } - break; - case LC_DYLD_INFO: - case LC_DYLD_INFO_ONLY: - dyldInfo = reinterpret_cast(lc); - break; - case LC_VERSION_MIN_MACOSX: - case LC_VERSION_MIN_IPHONEOS: - case LC_VERSION_MIN_WATCHOS: - case LC_VERSION_MIN_TVOS: - // If we are emitting an object file, then we may take the load command - // kind from these commands and pass it on to the output - // file. - f->minOSVersionKind = (LoadCommandType)cmd; - break; - } - return false; - }); - if (ec) - return std::move(ec); - - if (dataInCode) { - // Convert on-disk data_in_code_entry array to DataInCode vector. - for (unsigned i=0; i < dataInCodeSize/sizeof(data_in_code_entry); ++i) { - DataInCode entry; - entry.offset = read32(&dataInCode[i].offset, isBig); - entry.length = read16(&dataInCode[i].length, isBig); - entry.kind = - (DataRegionType)read16((const uint8_t *)&dataInCode[i].kind, isBig); - f->dataInCode.push_back(entry); - } - } - - if (dyldInfo) { - // If any exports, extract and add to normalized exportInfo vector. - if (dyldInfo->export_size) { - const uint8_t *trieStart = reinterpret_cast( - start + read32(&dyldInfo->export_off, isBig)); - ArrayRef trie(trieStart, read32(&dyldInfo->export_size, isBig)); - Error Err = Error::success(); - for (const ExportEntry &trieExport : MachOObjectFile::exports(Err, trie)) { - Export normExport; - normExport.name = trieExport.name().copy(f->ownedAllocations); - normExport.offset = trieExport.address(); - normExport.kind = ExportSymbolKind(trieExport.flags() & EXPORT_SYMBOL_FLAGS_KIND_MASK); - normExport.flags = trieExport.flags() & ~EXPORT_SYMBOL_FLAGS_KIND_MASK; - normExport.otherOffset = trieExport.other(); - if (!trieExport.otherName().empty()) - normExport.otherName = trieExport.otherName().copy(f->ownedAllocations); - f->exportInfo.push_back(normExport); - } - if (Err) - return std::move(Err); - } - } - - return std::move(f); -} - -class MachOObjectReader : public Reader { -public: - MachOObjectReader(MachOLinkingContext &ctx) : _ctx(ctx) {} - - bool canParse(file_magic magic, MemoryBufferRef mb) const override { - return (magic == file_magic::macho_object && mb.getBufferSize() > 32); - } - - ErrorOr> - loadFile(std::unique_ptr mb, - const Registry ®istry) const override { - std::unique_ptr ret = - std::make_unique(std::move(mb), &_ctx); - return std::move(ret); - } - -private: - MachOLinkingContext &_ctx; -}; - -class MachODylibReader : public Reader { -public: - MachODylibReader(MachOLinkingContext &ctx) : _ctx(ctx) {} - - bool canParse(file_magic magic, MemoryBufferRef mb) const override { - switch (magic) { - case file_magic::macho_dynamically_linked_shared_lib: - case file_magic::macho_dynamically_linked_shared_lib_stub: - return mb.getBufferSize() > 32; - default: - return false; - } - } - - ErrorOr> - loadFile(std::unique_ptr mb, - const Registry ®istry) const override { - std::unique_ptr ret = - std::make_unique(std::move(mb), &_ctx); - return std::move(ret); - } - -private: - MachOLinkingContext &_ctx; -}; - -class MachOTAPIReader : public Reader { -public: - MachOTAPIReader(MachOLinkingContext &ctx) : _ctx(ctx) {} - - bool canParse(file_magic magic, MemoryBufferRef mb) const override { - return magic == file_magic::tapi_file; - } - - ErrorOr> - loadFile(std::unique_ptr mb, - const Registry ®istry) const override { - std::unique_ptr ret = - std::make_unique(std::move(mb), &_ctx); - return std::move(ret); - } - -private: - MachOLinkingContext &_ctx; -}; - -} // namespace normalized -} // namespace mach_o - -void Registry::addSupportMachOObjects(MachOLinkingContext &ctx) { - MachOLinkingContext::Arch arch = ctx.arch(); - add(std::unique_ptr(new mach_o::normalized::MachOObjectReader(ctx))); - add(std::unique_ptr(new mach_o::normalized::MachODylibReader(ctx))); - add(std::unique_ptr(new mach_o::normalized::MachOTAPIReader(ctx))); - addKindTable(Reference::KindNamespace::mach_o, ctx.archHandler().kindArch(), - ctx.archHandler().kindStrings()); - add(std::unique_ptr( - new mach_o::MachOYamlIOTaggedDocumentHandler(arch))); -} - - -} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h deleted file mode 100644 index aeb04ef4508a..000000000000 --- a/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h +++ /dev/null @@ -1,213 +0,0 @@ -//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h ------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_READER_WRITER_MACHO_NORMALIZED_FILE_BINARY_UTILS_H -#define LLD_READER_WRITER_MACHO_NORMALIZED_FILE_BINARY_UTILS_H - -#include "MachONormalizedFile.h" -#include "lld/Common/LLVM.h" -#include "lld/Core/Error.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/LEB128.h" -#include - -namespace lld { -namespace mach_o { -namespace normalized { - -class ByteBuffer { -public: - ByteBuffer() : _ostream(_bytes) { } - - void append_byte(uint8_t b) { - _ostream << b; - } - void append_uleb128(uint64_t value) { - llvm::encodeULEB128(value, _ostream); - } - void append_uleb128Fixed(uint64_t value, unsigned byteCount) { - unsigned min = llvm::getULEB128Size(value); - assert(min <= byteCount); - unsigned pad = byteCount - min; - llvm::encodeULEB128(value, _ostream, pad); - } - void append_sleb128(int64_t value) { - llvm::encodeSLEB128(value, _ostream); - } - void append_string(StringRef str) { - _ostream << str; - append_byte(0); - } - void align(unsigned alignment) { - while ( (_ostream.tell() % alignment) != 0 ) - append_byte(0); - } - size_t size() { - return _ostream.tell(); - } - const uint8_t *bytes() { - return reinterpret_cast(_ostream.str().data()); - } - -private: - SmallVector _bytes; - // Stream ivar must be after SmallVector ivar to construct properly. - llvm::raw_svector_ostream _ostream; -}; - -using namespace llvm::support::endian; -using llvm::sys::getSwappedBytes; - -template -static inline uint16_t read16(const T *loc, bool isBig) { - assert((uint64_t)loc % alignof(T) == 0 && "invalid pointer alignment"); - return isBig ? read16be(loc) : read16le(loc); -} - -template -static inline uint32_t read32(const T *loc, bool isBig) { - assert((uint64_t)loc % alignof(T) == 0 && "invalid pointer alignment"); - return isBig ? read32be(loc) : read32le(loc); -} - -template -static inline uint64_t read64(const T *loc, bool isBig) { - assert((uint64_t)loc % alignof(T) == 0 && "invalid pointer alignment"); - return isBig ? read64be(loc) : read64le(loc); -} - -inline void write16(uint8_t *loc, uint16_t value, bool isBig) { - if (isBig) - write16be(loc, value); - else - write16le(loc, value); -} - -inline void write32(uint8_t *loc, uint32_t value, bool isBig) { - if (isBig) - write32be(loc, value); - else - write32le(loc, value); -} - -inline void write64(uint8_t *loc, uint64_t value, bool isBig) { - if (isBig) - write64be(loc, value); - else - write64le(loc, value); -} - -inline uint32_t -bitFieldExtract(uint32_t value, bool isBigEndianBigField, uint8_t firstBit, - uint8_t bitCount) { - const uint32_t mask = ((1<> shift) & mask; -} - -inline void -bitFieldSet(uint32_t &bits, bool isBigEndianBigField, uint32_t newBits, - uint8_t firstBit, uint8_t bitCount) { - const uint32_t mask = ((1< 16) ? 16: str.size()); -} - -// Implemented in normalizedToAtoms() and used by normalizedFromAtoms() so -// that the same table can be used to map mach-o sections to and from -// DefinedAtom::ContentType. -void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType, - StringRef &segmentName, - StringRef §ionName, - SectionType §ionType, - SectionAttr §ionAttrs, - bool &relocsToDefinedCanBeImplicit); - -} // namespace normalized -} // namespace mach_o -} // namespace lld - -#endif // LLD_READER_WRITER_MACHO_NORMALIZED_FILE_BINARY_UTILS_H diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp deleted file mode 100644 index 17b45b9ca827..000000000000 --- a/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp +++ /dev/null @@ -1,1560 +0,0 @@ -//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp ---------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// -/// \file For mach-o object files, this implementation converts normalized -/// mach-o in memory to mach-o binary on disk. -/// -/// +---------------+ -/// | binary mach-o | -/// +---------------+ -/// ^ -/// | -/// | -/// +------------+ -/// | normalized | -/// +------------+ - -#include "MachONormalizedFile.h" -#include "MachONormalizedFileBinaryUtils.h" -#include "lld/Common/LLVM.h" -#include "lld/Core/Error.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/ilist.h" -#include "llvm/ADT/ilist_node.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileOutputBuffer.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include - -using namespace llvm::MachO; - -namespace lld { -namespace mach_o { -namespace normalized { - -struct TrieNode; // Forward declaration. - -struct TrieEdge : public llvm::ilist_node { - TrieEdge(StringRef s, TrieNode *node) : _subString(s), _child(node) {} - - StringRef _subString; - struct TrieNode *_child; -}; - -} // namespace normalized -} // namespace mach_o -} // namespace lld - - -namespace llvm { -using lld::mach_o::normalized::TrieEdge; -template <> -struct ilist_alloc_traits : ilist_noalloc_traits {}; -} // namespace llvm - - -namespace lld { -namespace mach_o { -namespace normalized { - -struct TrieNode { - typedef llvm::ilist TrieEdgeList; - - TrieNode(StringRef s) - : _cummulativeString(s), _address(0), _flags(0), _other(0), - _trieOffset(0), _hasExportInfo(false) {} - ~TrieNode() = default; - - void addSymbol(const Export &entry, BumpPtrAllocator &allocator, - std::vector &allNodes); - - void addOrderedNodes(const Export &entry, - std::vector &allNodes); - bool updateOffset(uint32_t &offset); - void appendToByteBuffer(ByteBuffer &out); - -private: - StringRef _cummulativeString; - TrieEdgeList _children; - uint64_t _address; - uint64_t _flags; - uint64_t _other; - StringRef _importedName; - uint32_t _trieOffset; - bool _hasExportInfo; - bool _ordered = false; -}; - -/// Utility class for writing a mach-o binary file given an in-memory -/// normalized file. -class MachOFileLayout { -public: - /// All layout computation is done in the constructor. - MachOFileLayout(const NormalizedFile &file, bool alwaysIncludeFunctionStarts); - - /// Returns the final file size as computed in the constructor. - size_t size() const; - - // Returns size of the mach_header and load commands. - size_t headerAndLoadCommandsSize() const; - - /// Writes the normalized file as a binary mach-o file to the specified - /// path. This does not have a stream interface because the generated - /// file may need the 'x' bit set. - llvm::Error writeBinary(StringRef path); - -private: - uint32_t loadCommandsSize(uint32_t &count, - bool alwaysIncludeFunctionStarts); - void buildFileOffsets(); - void writeMachHeader(); - llvm::Error writeLoadCommands(); - void writeSectionContent(); - void writeRelocations(); - void writeSymbolTable(); - void writeRebaseInfo(); - void writeBindingInfo(); - void writeLazyBindingInfo(); - void writeExportInfo(); - void writeFunctionStartsInfo(); - void writeDataInCodeInfo(); - void writeLinkEditContent(); - void buildLinkEditInfo(); - void buildRebaseInfo(); - void buildBindInfo(); - void buildLazyBindInfo(); - void buildExportTrie(); - void computeFunctionStartsSize(); - void computeDataInCodeSize(); - void computeSymbolTableSizes(); - void buildSectionRelocations(); - void appendSymbols(const std::vector &symbols, - uint32_t &symOffset, uint32_t &strOffset); - uint32_t indirectSymbolIndex(const Section §, uint32_t &index); - uint32_t indirectSymbolElementSize(const Section §); - - // For use as template parameter to load command methods. - struct MachO64Trait { - typedef llvm::MachO::segment_command_64 command; - typedef llvm::MachO::section_64 section; - enum { LC = llvm::MachO::LC_SEGMENT_64 }; - }; - - // For use as template parameter to load command methods. - struct MachO32Trait { - typedef llvm::MachO::segment_command command; - typedef llvm::MachO::section section; - enum { LC = llvm::MachO::LC_SEGMENT }; - }; - - template - llvm::Error writeSingleSegmentLoadCommand(uint8_t *&lc); - template llvm::Error writeSegmentLoadCommands(uint8_t *&lc); - - uint32_t pointerAlign(uint32_t value); - static StringRef dyldPath(); - - struct SegExtraInfo { - uint32_t fileOffset; - uint32_t fileSize; - std::vector sections; - }; - typedef std::map SegMap; - struct SectionExtraInfo { - uint32_t fileOffset; - }; - typedef std::map SectionMap; - - const NormalizedFile &_file; - std::error_code _ec; - uint8_t *_buffer; - const bool _is64; - const bool _swap; - const bool _bigEndianArch; - uint64_t _seg1addr; - uint32_t _startOfLoadCommands; - uint32_t _countOfLoadCommands; - uint32_t _endOfLoadCommands; - uint32_t _startOfRelocations; - uint32_t _startOfFunctionStarts; - uint32_t _startOfDataInCode; - uint32_t _startOfSymbols; - uint32_t _startOfIndirectSymbols; - uint32_t _startOfSymbolStrings; - uint32_t _endOfSymbolStrings; - uint32_t _symbolTableLocalsStartIndex; - uint32_t _symbolTableGlobalsStartIndex; - uint32_t _symbolTableUndefinesStartIndex; - uint32_t _symbolStringPoolSize; - uint32_t _symbolTableSize; - uint32_t _functionStartsSize; - uint32_t _dataInCodeSize; - uint32_t _indirectSymbolTableCount; - // Used in object file creation only - uint32_t _startOfSectionsContent; - uint32_t _endOfSectionsContent; - // Used in final linked image only - uint32_t _startOfLinkEdit; - uint32_t _startOfRebaseInfo; - uint32_t _endOfRebaseInfo; - uint32_t _startOfBindingInfo; - uint32_t _endOfBindingInfo; - uint32_t _startOfLazyBindingInfo; - uint32_t _endOfLazyBindingInfo; - uint32_t _startOfExportTrie; - uint32_t _endOfExportTrie; - uint32_t _endOfLinkEdit; - uint64_t _addressOfLinkEdit; - SegMap _segInfo; - SectionMap _sectInfo; - ByteBuffer _rebaseInfo; - ByteBuffer _bindingInfo; - ByteBuffer _lazyBindingInfo; - ByteBuffer _weakBindingInfo; - ByteBuffer _exportTrie; -}; - -size_t headerAndLoadCommandsSize(const NormalizedFile &file, - bool includeFunctionStarts) { - MachOFileLayout layout(file, includeFunctionStarts); - return layout.headerAndLoadCommandsSize(); -} - -StringRef MachOFileLayout::dyldPath() { - return "/usr/lib/dyld"; -} - -uint32_t MachOFileLayout::pointerAlign(uint32_t value) { - return llvm::alignTo(value, _is64 ? 8 : 4); -} - - -size_t MachOFileLayout::headerAndLoadCommandsSize() const { - return _endOfLoadCommands; -} - -MachOFileLayout::MachOFileLayout(const NormalizedFile &file, - bool alwaysIncludeFunctionStarts) - : _file(file), - _is64(MachOLinkingContext::is64Bit(file.arch)), - _swap(!MachOLinkingContext::isHostEndian(file.arch)), - _bigEndianArch(MachOLinkingContext::isBigEndian(file.arch)), - _seg1addr(INT64_MAX) { - _startOfLoadCommands = _is64 ? sizeof(mach_header_64) : sizeof(mach_header); - const size_t segCommandBaseSize = - (_is64 ? sizeof(segment_command_64) : sizeof(segment_command)); - const size_t sectsSize = (_is64 ? sizeof(section_64) : sizeof(section)); - if (file.fileType == llvm::MachO::MH_OBJECT) { - // object files have just one segment load command containing all sections - _endOfLoadCommands = _startOfLoadCommands - + segCommandBaseSize - + file.sections.size() * sectsSize - + sizeof(symtab_command); - _countOfLoadCommands = 2; - if (file.hasMinVersionLoadCommand) { - _endOfLoadCommands += sizeof(version_min_command); - _countOfLoadCommands++; - } - if (!_file.functionStarts.empty() || alwaysIncludeFunctionStarts) { - _endOfLoadCommands += sizeof(linkedit_data_command); - _countOfLoadCommands++; - } - if (_file.generateDataInCodeLoadCommand) { - _endOfLoadCommands += sizeof(linkedit_data_command); - _countOfLoadCommands++; - } - // Assign file offsets to each section. - _startOfSectionsContent = _endOfLoadCommands; - unsigned relocCount = 0; - uint64_t offset = _startOfSectionsContent; - for (const Section § : file.sections) { - if (isZeroFillSection(sect.type)) - _sectInfo[§].fileOffset = 0; - else { - offset = llvm::alignTo(offset, sect.alignment); - _sectInfo[§].fileOffset = offset; - offset += sect.content.size(); - } - relocCount += sect.relocations.size(); - } - _endOfSectionsContent = offset; - - computeSymbolTableSizes(); - computeFunctionStartsSize(); - computeDataInCodeSize(); - - // Align start of relocations. - _startOfRelocations = pointerAlign(_endOfSectionsContent); - _startOfFunctionStarts = _startOfRelocations + relocCount * 8; - _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize; - _startOfSymbols = _startOfDataInCode + _dataInCodeSize; - // Add Indirect symbol table. - _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; - // Align start of symbol table and symbol strings. - _startOfSymbolStrings = _startOfIndirectSymbols - + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); - _endOfSymbolStrings = _startOfSymbolStrings - + pointerAlign(_symbolStringPoolSize); - _endOfLinkEdit = _endOfSymbolStrings; - DEBUG_WITH_TYPE("MachOFileLayout", - llvm::dbgs() << "MachOFileLayout()\n" - << " startOfLoadCommands=" << _startOfLoadCommands << "\n" - << " countOfLoadCommands=" << _countOfLoadCommands << "\n" - << " endOfLoadCommands=" << _endOfLoadCommands << "\n" - << " startOfRelocations=" << _startOfRelocations << "\n" - << " startOfSymbols=" << _startOfSymbols << "\n" - << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" - << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" - << " startOfSectionsContent=" << _startOfSectionsContent << "\n" - << " endOfSectionsContent=" << _endOfSectionsContent << "\n"); - } else { - // Final linked images have one load command per segment. - _endOfLoadCommands = _startOfLoadCommands - + loadCommandsSize(_countOfLoadCommands, - alwaysIncludeFunctionStarts); - - // Assign section file offsets. - buildFileOffsets(); - buildLinkEditInfo(); - - // LINKEDIT of final linked images has in order: - // rebase info, binding info, lazy binding info, weak binding info, - // data-in-code, symbol table, indirect symbol table, symbol table strings. - _startOfRebaseInfo = _startOfLinkEdit; - _endOfRebaseInfo = _startOfRebaseInfo + _rebaseInfo.size(); - _startOfBindingInfo = _endOfRebaseInfo; - _endOfBindingInfo = _startOfBindingInfo + _bindingInfo.size(); - _startOfLazyBindingInfo = _endOfBindingInfo; - _endOfLazyBindingInfo = _startOfLazyBindingInfo + _lazyBindingInfo.size(); - _startOfExportTrie = _endOfLazyBindingInfo; - _endOfExportTrie = _startOfExportTrie + _exportTrie.size(); - _startOfFunctionStarts = _endOfExportTrie; - _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize; - _startOfSymbols = _startOfDataInCode + _dataInCodeSize; - _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; - _startOfSymbolStrings = _startOfIndirectSymbols - + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); - _endOfSymbolStrings = _startOfSymbolStrings - + pointerAlign(_symbolStringPoolSize); - _endOfLinkEdit = _endOfSymbolStrings; - DEBUG_WITH_TYPE("MachOFileLayout", - llvm::dbgs() << "MachOFileLayout()\n" - << " startOfLoadCommands=" << _startOfLoadCommands << "\n" - << " countOfLoadCommands=" << _countOfLoadCommands << "\n" - << " endOfLoadCommands=" << _endOfLoadCommands << "\n" - << " startOfLinkEdit=" << _startOfLinkEdit << "\n" - << " startOfRebaseInfo=" << _startOfRebaseInfo << "\n" - << " endOfRebaseInfo=" << _endOfRebaseInfo << "\n" - << " startOfBindingInfo=" << _startOfBindingInfo << "\n" - << " endOfBindingInfo=" << _endOfBindingInfo << "\n" - << " startOfLazyBindingInfo=" << _startOfLazyBindingInfo << "\n" - << " endOfLazyBindingInfo=" << _endOfLazyBindingInfo << "\n" - << " startOfExportTrie=" << _startOfExportTrie << "\n" - << " endOfExportTrie=" << _endOfExportTrie << "\n" - << " startOfFunctionStarts=" << _startOfFunctionStarts << "\n" - << " startOfDataInCode=" << _startOfDataInCode << "\n" - << " startOfSymbols=" << _startOfSymbols << "\n" - << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" - << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" - << " addressOfLinkEdit=" << _addressOfLinkEdit << "\n"); - } -} - -uint32_t MachOFileLayout::loadCommandsSize(uint32_t &count, - bool alwaysIncludeFunctionStarts) { - uint32_t size = 0; - count = 0; - - const size_t segCommandSize = - (_is64 ? sizeof(segment_command_64) : sizeof(segment_command)); - const size_t sectionSize = (_is64 ? sizeof(section_64) : sizeof(section)); - - // Add LC_SEGMENT for each segment. - size += _file.segments.size() * segCommandSize; - count += _file.segments.size(); - // Add section record for each section. - size += _file.sections.size() * sectionSize; - - // If creating a dylib, add LC_ID_DYLIB. - if (_file.fileType == llvm::MachO::MH_DYLIB) { - size += sizeof(dylib_command) + pointerAlign(_file.installName.size() + 1); - ++count; - } - - // Add LC_DYLD_INFO - size += sizeof(dyld_info_command); - ++count; - - // Add LC_SYMTAB - size += sizeof(symtab_command); - ++count; - - // Add LC_DYSYMTAB - if (_file.fileType != llvm::MachO::MH_PRELOAD) { - size += sizeof(dysymtab_command); - ++count; - } - - // If main executable add LC_LOAD_DYLINKER - if (_file.fileType == llvm::MachO::MH_EXECUTE) { - size += pointerAlign(sizeof(dylinker_command) + dyldPath().size()+1); - ++count; - } - - // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS, - // LC_VERSION_MIN_TVOS - if (_file.hasMinVersionLoadCommand) { - size += sizeof(version_min_command); - ++count; - } - - // Add LC_SOURCE_VERSION - size += sizeof(source_version_command); - ++count; - - // If main executable add LC_MAIN - if (_file.fileType == llvm::MachO::MH_EXECUTE) { - size += sizeof(entry_point_command); - ++count; - } - - // Add LC_LOAD_DYLIB for each dependent dylib. - for (const DependentDylib &dep : _file.dependentDylibs) { - size += sizeof(dylib_command) + pointerAlign(dep.path.size()+1); - ++count; - } - - // Add LC_RPATH - for (const StringRef &path : _file.rpaths) { - size += pointerAlign(sizeof(rpath_command) + path.size() + 1); - ++count; - } - - // Add LC_FUNCTION_STARTS if needed - if (!_file.functionStarts.empty() || alwaysIncludeFunctionStarts) { - size += sizeof(linkedit_data_command); - ++count; - } - - // Add LC_DATA_IN_CODE if requested. Note, we do encode zero length entries. - // FIXME: Zero length entries is only to match ld64. Should we change this? - if (_file.generateDataInCodeLoadCommand) { - size += sizeof(linkedit_data_command); - ++count; - } - - return size; -} - -static bool overlaps(const Segment &s1, const Segment &s2) { - if (s2.address >= s1.address+s1.size) - return false; - if (s1.address >= s2.address+s2.size) - return false; - return true; -} - -static bool overlaps(const Section &s1, const Section &s2) { - if (s2.address >= s1.address+s1.content.size()) - return false; - if (s1.address >= s2.address+s2.content.size()) - return false; - return true; -} - -void MachOFileLayout::buildFileOffsets() { - // Verify no segments overlap - for (const Segment &sg1 : _file.segments) { - for (const Segment &sg2 : _file.segments) { - if (&sg1 == &sg2) - continue; - if (overlaps(sg1,sg2)) { - _ec = make_error_code(llvm::errc::executable_format_error); - return; - } - } - } - - // Verify no sections overlap - for (const Section &s1 : _file.sections) { - for (const Section &s2 : _file.sections) { - if (&s1 == &s2) - continue; - if (overlaps(s1,s2)) { - _ec = make_error_code(llvm::errc::executable_format_error); - return; - } - } - } - - // Build side table of extra info about segments and sections. - SegExtraInfo t; - t.fileOffset = 0; - for (const Segment &sg : _file.segments) { - _segInfo[&sg] = t; - } - SectionExtraInfo t2; - t2.fileOffset = 0; - // Assign sections to segments. - for (const Section &s : _file.sections) { - _sectInfo[&s] = t2; - bool foundSegment = false; - for (const Segment &sg : _file.segments) { - if (sg.name.equals(s.segmentName)) { - if ((s.address >= sg.address) - && (s.address+s.content.size() <= sg.address+sg.size)) { - _segInfo[&sg].sections.push_back(&s); - foundSegment = true; - break; - } - } - } - if (!foundSegment) { - _ec = make_error_code(llvm::errc::executable_format_error); - return; - } - } - - // Assign file offsets. - uint32_t fileOffset = 0; - DEBUG_WITH_TYPE("MachOFileLayout", - llvm::dbgs() << "buildFileOffsets()\n"); - for (const Segment &sg : _file.segments) { - _segInfo[&sg].fileOffset = fileOffset; - if ((_seg1addr == INT64_MAX) && sg.init_access) - _seg1addr = sg.address; - DEBUG_WITH_TYPE("MachOFileLayout", - llvm::dbgs() << " segment=" << sg.name - << ", fileOffset=" << _segInfo[&sg].fileOffset << "\n"); - - uint32_t segFileSize = 0; - // A segment that is not zero-fill must use a least one page of disk space. - if (sg.init_access) - segFileSize = _file.pageSize; - for (const Section *s : _segInfo[&sg].sections) { - uint32_t sectOffset = s->address - sg.address; - uint32_t sectFileSize = - isZeroFillSection(s->type) ? 0 : s->content.size(); - segFileSize = std::max(segFileSize, sectOffset + sectFileSize); - - _sectInfo[s].fileOffset = _segInfo[&sg].fileOffset + sectOffset; - DEBUG_WITH_TYPE("MachOFileLayout", - llvm::dbgs() << " section=" << s->sectionName - << ", fileOffset=" << fileOffset << "\n"); - } - - // round up all segments to page aligned, except __LINKEDIT - if (!sg.name.equals("__LINKEDIT")) { - _segInfo[&sg].fileSize = llvm::alignTo(segFileSize, _file.pageSize); - fileOffset = llvm::alignTo(fileOffset + segFileSize, _file.pageSize); - } - _addressOfLinkEdit = sg.address + sg.size; - } - _startOfLinkEdit = fileOffset; -} - -size_t MachOFileLayout::size() const { - return _endOfSymbolStrings; -} - -void MachOFileLayout::writeMachHeader() { - auto cpusubtype = MachOLinkingContext::cpuSubtypeFromArch(_file.arch); - // dynamic x86 executables on newer OS version should also set the - // CPU_SUBTYPE_LIB64 mask in the CPU subtype. - // FIXME: Check that this is a dynamic executable, not a static one. - if (_file.fileType == llvm::MachO::MH_EXECUTE && - cpusubtype == CPU_SUBTYPE_X86_64_ALL && - _file.os == MachOLinkingContext::OS::macOSX) { - uint32_t version; - bool failed = MachOLinkingContext::parsePackedVersion("10.5", version); - if (!failed && _file.minOSverson >= version) - cpusubtype |= CPU_SUBTYPE_LIB64; - } - - mach_header *mh = reinterpret_cast(_buffer); - mh->magic = _is64 ? llvm::MachO::MH_MAGIC_64 : llvm::MachO::MH_MAGIC; - mh->cputype = MachOLinkingContext::cpuTypeFromArch(_file.arch); - mh->cpusubtype = cpusubtype; - mh->filetype = _file.fileType; - mh->ncmds = _countOfLoadCommands; - mh->sizeofcmds = _endOfLoadCommands - _startOfLoadCommands; - mh->flags = _file.flags; - if (_swap) - swapStruct(*mh); -} - -uint32_t MachOFileLayout::indirectSymbolIndex(const Section §, - uint32_t &index) { - if (sect.indirectSymbols.empty()) - return 0; - uint32_t result = index; - index += sect.indirectSymbols.size(); - return result; -} - -uint32_t MachOFileLayout::indirectSymbolElementSize(const Section §) { - if (sect.indirectSymbols.empty()) - return 0; - if (sect.type != S_SYMBOL_STUBS) - return 0; - return sect.content.size() / sect.indirectSymbols.size(); -} - -template -llvm::Error MachOFileLayout::writeSingleSegmentLoadCommand(uint8_t *&lc) { - typename T::command* seg = reinterpret_cast(lc); - seg->cmd = T::LC; - seg->cmdsize = sizeof(typename T::command) - + _file.sections.size() * sizeof(typename T::section); - uint8_t *next = lc + seg->cmdsize; - memset(seg->segname, 0, 16); - seg->flags = 0; - seg->vmaddr = 0; - seg->fileoff = _endOfLoadCommands; - seg->maxprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; - seg->initprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; - seg->nsects = _file.sections.size(); - if (seg->nsects) { - seg->vmsize = _file.sections.back().address - + _file.sections.back().content.size(); - seg->filesize = _sectInfo[&_file.sections.back()].fileOffset + - _file.sections.back().content.size() - - _sectInfo[&_file.sections.front()].fileOffset; - } - if (_swap) - swapStruct(*seg); - typename T::section *sout = reinterpret_cast - (lc+sizeof(typename T::command)); - uint32_t relOffset = _startOfRelocations; - uint32_t indirectSymRunningIndex = 0; - for (const Section &sin : _file.sections) { - setString16(sin.sectionName, sout->sectname); - setString16(sin.segmentName, sout->segname); - sout->addr = sin.address; - sout->size = sin.content.size(); - sout->offset = _sectInfo[&sin].fileOffset; - sout->align = llvm::Log2_32(sin.alignment); - sout->reloff = sin.relocations.empty() ? 0 : relOffset; - sout->nreloc = sin.relocations.size(); - sout->flags = sin.type | sin.attributes; - sout->reserved1 = indirectSymbolIndex(sin, indirectSymRunningIndex); - sout->reserved2 = indirectSymbolElementSize(sin); - relOffset += sin.relocations.size() * sizeof(any_relocation_info); - if (_swap) - swapStruct(*sout); - ++sout; - } - lc = next; - return llvm::Error::success(); -} - -template -llvm::Error MachOFileLayout::writeSegmentLoadCommands(uint8_t *&lc) { - uint32_t indirectSymRunningIndex = 0; - for (const Segment &seg : _file.segments) { - // Link edit has no sections and a custom range of address, so handle it - // specially. - SegExtraInfo &segInfo = _segInfo[&seg]; - if (seg.name.equals("__LINKEDIT")) { - size_t linkeditSize = _endOfLinkEdit - _startOfLinkEdit; - typename T::command* cmd = reinterpret_cast(lc); - cmd->cmd = T::LC; - cmd->cmdsize = sizeof(typename T::command); - uint8_t *next = lc + cmd->cmdsize; - setString16("__LINKEDIT", cmd->segname); - cmd->vmaddr = _addressOfLinkEdit; - cmd->vmsize = llvm::alignTo(linkeditSize, _file.pageSize); - cmd->fileoff = _startOfLinkEdit; - cmd->filesize = linkeditSize; - cmd->initprot = seg.init_access; - cmd->maxprot = seg.max_access; - cmd->nsects = 0; - cmd->flags = 0; - if (_swap) - swapStruct(*cmd); - lc = next; - continue; - } - // Write segment command with trailing sections. - typename T::command* cmd = reinterpret_cast(lc); - cmd->cmd = T::LC; - cmd->cmdsize = sizeof(typename T::command) - + segInfo.sections.size() * sizeof(typename T::section); - uint8_t *next = lc + cmd->cmdsize; - setString16(seg.name, cmd->segname); - cmd->vmaddr = seg.address; - cmd->vmsize = seg.size; - cmd->fileoff = segInfo.fileOffset; - cmd->filesize = segInfo.fileSize; - cmd->initprot = seg.init_access; - cmd->maxprot = seg.max_access; - cmd->nsects = segInfo.sections.size(); - cmd->flags = 0; - if (_swap) - swapStruct(*cmd); - typename T::section *sect = reinterpret_cast - (lc+sizeof(typename T::command)); - for (const Section *section : segInfo.sections) { - setString16(section->sectionName, sect->sectname); - setString16(section->segmentName, sect->segname); - sect->addr = section->address; - sect->size = section->content.size(); - if (isZeroFillSection(section->type)) - sect->offset = 0; - else - sect->offset = section->address - seg.address + segInfo.fileOffset; - sect->align = llvm::Log2_32(section->alignment); - sect->reloff = 0; - sect->nreloc = 0; - sect->flags = section->type | section->attributes; - sect->reserved1 = indirectSymbolIndex(*section, indirectSymRunningIndex); - sect->reserved2 = indirectSymbolElementSize(*section); - if (_swap) - swapStruct(*sect); - ++sect; - } - lc = reinterpret_cast(next); - } - return llvm::Error::success(); -} - -static void writeVersionMinLoadCommand(const NormalizedFile &_file, - bool _swap, - uint8_t *&lc) { - if (!_file.hasMinVersionLoadCommand) - return; - version_min_command *vm = reinterpret_cast(lc); - switch (_file.os) { - case MachOLinkingContext::OS::unknown: - vm->cmd = _file.minOSVersionKind; - vm->cmdsize = sizeof(version_min_command); - vm->version = _file.minOSverson; - vm->sdk = 0; - break; - case MachOLinkingContext::OS::macOSX: - vm->cmd = LC_VERSION_MIN_MACOSX; - vm->cmdsize = sizeof(version_min_command); - vm->version = _file.minOSverson; - vm->sdk = _file.sdkVersion; - break; - case MachOLinkingContext::OS::iOS: - case MachOLinkingContext::OS::iOS_simulator: - vm->cmd = LC_VERSION_MIN_IPHONEOS; - vm->cmdsize = sizeof(version_min_command); - vm->version = _file.minOSverson; - vm->sdk = _file.sdkVersion; - break; - } - if (_swap) - swapStruct(*vm); - lc += sizeof(version_min_command); -} - -llvm::Error MachOFileLayout::writeLoadCommands() { - uint8_t *lc = &_buffer[_startOfLoadCommands]; - if (_file.fileType == llvm::MachO::MH_OBJECT) { - // Object files have one unnamed segment which holds all sections. - if (_is64) { - if (auto ec = writeSingleSegmentLoadCommand(lc)) - return ec; - } else { - if (auto ec = writeSingleSegmentLoadCommand(lc)) - return ec; - } - // Add LC_SYMTAB with symbol table info - symtab_command* st = reinterpret_cast(lc); - st->cmd = LC_SYMTAB; - st->cmdsize = sizeof(symtab_command); - st->symoff = _startOfSymbols; - st->nsyms = _file.stabsSymbols.size() + _file.localSymbols.size() + - _file.globalSymbols.size() + _file.undefinedSymbols.size(); - st->stroff = _startOfSymbolStrings; - st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; - if (_swap) - swapStruct(*st); - lc += sizeof(symtab_command); - - // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, - // LC_VERSION_MIN_WATCHOS, LC_VERSION_MIN_TVOS - writeVersionMinLoadCommand(_file, _swap, lc); - - // Add LC_FUNCTION_STARTS if needed. - if (_functionStartsSize != 0) { - linkedit_data_command* dl = reinterpret_cast(lc); - dl->cmd = LC_FUNCTION_STARTS; - dl->cmdsize = sizeof(linkedit_data_command); - dl->dataoff = _startOfFunctionStarts; - dl->datasize = _functionStartsSize; - if (_swap) - swapStruct(*dl); - lc += sizeof(linkedit_data_command); - } - - // Add LC_DATA_IN_CODE if requested. - if (_file.generateDataInCodeLoadCommand) { - linkedit_data_command* dl = reinterpret_cast(lc); - dl->cmd = LC_DATA_IN_CODE; - dl->cmdsize = sizeof(linkedit_data_command); - dl->dataoff = _startOfDataInCode; - dl->datasize = _dataInCodeSize; - if (_swap) - swapStruct(*dl); - lc += sizeof(linkedit_data_command); - } - } else { - // Final linked images have sections under segments. - if (_is64) { - if (auto ec = writeSegmentLoadCommands(lc)) - return ec; - } else { - if (auto ec = writeSegmentLoadCommands(lc)) - return ec; - } - - // Add LC_ID_DYLIB command for dynamic libraries. - if (_file.fileType == llvm::MachO::MH_DYLIB) { - dylib_command *dc = reinterpret_cast(lc); - StringRef path = _file.installName; - uint32_t size = sizeof(dylib_command) + pointerAlign(path.size() + 1); - dc->cmd = LC_ID_DYLIB; - dc->cmdsize = size; - dc->dylib.name = sizeof(dylib_command); // offset - // needs to be some constant value different than the one in LC_LOAD_DYLIB - dc->dylib.timestamp = 1; - dc->dylib.current_version = _file.currentVersion; - dc->dylib.compatibility_version = _file.compatVersion; - if (_swap) - swapStruct(*dc); - memcpy(lc + sizeof(dylib_command), path.begin(), path.size()); - lc[sizeof(dylib_command) + path.size()] = '\0'; - lc += size; - } - - // Add LC_DYLD_INFO_ONLY. - dyld_info_command* di = reinterpret_cast(lc); - di->cmd = LC_DYLD_INFO_ONLY; - di->cmdsize = sizeof(dyld_info_command); - di->rebase_off = _rebaseInfo.size() ? _startOfRebaseInfo : 0; - di->rebase_size = _rebaseInfo.size(); - di->bind_off = _bindingInfo.size() ? _startOfBindingInfo : 0; - di->bind_size = _bindingInfo.size(); - di->weak_bind_off = 0; - di->weak_bind_size = 0; - di->lazy_bind_off = _lazyBindingInfo.size() ? _startOfLazyBindingInfo : 0; - di->lazy_bind_size = _lazyBindingInfo.size(); - di->export_off = _exportTrie.size() ? _startOfExportTrie : 0; - di->export_size = _exportTrie.size(); - if (_swap) - swapStruct(*di); - lc += sizeof(dyld_info_command); - - // Add LC_SYMTAB with symbol table info. - symtab_command* st = reinterpret_cast(lc); - st->cmd = LC_SYMTAB; - st->cmdsize = sizeof(symtab_command); - st->symoff = _startOfSymbols; - st->nsyms = _file.stabsSymbols.size() + _file.localSymbols.size() + - _file.globalSymbols.size() + _file.undefinedSymbols.size(); - st->stroff = _startOfSymbolStrings; - st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; - if (_swap) - swapStruct(*st); - lc += sizeof(symtab_command); - - // Add LC_DYSYMTAB - if (_file.fileType != llvm::MachO::MH_PRELOAD) { - dysymtab_command* dst = reinterpret_cast(lc); - dst->cmd = LC_DYSYMTAB; - dst->cmdsize = sizeof(dysymtab_command); - dst->ilocalsym = _symbolTableLocalsStartIndex; - dst->nlocalsym = _file.stabsSymbols.size() + - _file.localSymbols.size(); - dst->iextdefsym = _symbolTableGlobalsStartIndex; - dst->nextdefsym = _file.globalSymbols.size(); - dst->iundefsym = _symbolTableUndefinesStartIndex; - dst->nundefsym = _file.undefinedSymbols.size(); - dst->tocoff = 0; - dst->ntoc = 0; - dst->modtaboff = 0; - dst->nmodtab = 0; - dst->extrefsymoff = 0; - dst->nextrefsyms = 0; - dst->indirectsymoff = _startOfIndirectSymbols; - dst->nindirectsyms = _indirectSymbolTableCount; - dst->extreloff = 0; - dst->nextrel = 0; - dst->locreloff = 0; - dst->nlocrel = 0; - if (_swap) - swapStruct(*dst); - lc += sizeof(dysymtab_command); - } - - // If main executable, add LC_LOAD_DYLINKER - if (_file.fileType == llvm::MachO::MH_EXECUTE) { - // Build LC_LOAD_DYLINKER load command. - uint32_t size=pointerAlign(sizeof(dylinker_command)+dyldPath().size()+1); - dylinker_command* dl = reinterpret_cast(lc); - dl->cmd = LC_LOAD_DYLINKER; - dl->cmdsize = size; - dl->name = sizeof(dylinker_command); // offset - if (_swap) - swapStruct(*dl); - memcpy(lc+sizeof(dylinker_command), dyldPath().data(), dyldPath().size()); - lc[sizeof(dylinker_command)+dyldPath().size()] = '\0'; - lc += size; - } - - // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS, - // LC_VERSION_MIN_TVOS - writeVersionMinLoadCommand(_file, _swap, lc); - - // Add LC_SOURCE_VERSION - { - // Note, using a temporary here to appease UB as we may not be aligned - // enough for a struct containing a uint64_t when emitting a 32-bit binary - source_version_command sv; - sv.cmd = LC_SOURCE_VERSION; - sv.cmdsize = sizeof(source_version_command); - sv.version = _file.sourceVersion; - if (_swap) - swapStruct(sv); - memcpy(lc, &sv, sizeof(source_version_command)); - lc += sizeof(source_version_command); - } - - // If main executable, add LC_MAIN. - if (_file.fileType == llvm::MachO::MH_EXECUTE) { - // Build LC_MAIN load command. - // Note, using a temporary here to appease UB as we may not be aligned - // enough for a struct containing a uint64_t when emitting a 32-bit binary - entry_point_command ep; - ep.cmd = LC_MAIN; - ep.cmdsize = sizeof(entry_point_command); - ep.entryoff = _file.entryAddress - _seg1addr; - ep.stacksize = _file.stackSize; - if (_swap) - swapStruct(ep); - memcpy(lc, &ep, sizeof(entry_point_command)); - lc += sizeof(entry_point_command); - } - - // Add LC_LOAD_DYLIB commands - for (const DependentDylib &dep : _file.dependentDylibs) { - dylib_command* dc = reinterpret_cast(lc); - uint32_t size = sizeof(dylib_command) + pointerAlign(dep.path.size()+1); - dc->cmd = dep.kind; - dc->cmdsize = size; - dc->dylib.name = sizeof(dylib_command); // offset - // needs to be some constant value different than the one in LC_ID_DYLIB - dc->dylib.timestamp = 2; - dc->dylib.current_version = dep.currentVersion; - dc->dylib.compatibility_version = dep.compatVersion; - if (_swap) - swapStruct(*dc); - memcpy(lc+sizeof(dylib_command), dep.path.begin(), dep.path.size()); - lc[sizeof(dylib_command)+dep.path.size()] = '\0'; - lc += size; - } - - // Add LC_RPATH - for (const StringRef &path : _file.rpaths) { - rpath_command *rpc = reinterpret_cast(lc); - uint32_t size = pointerAlign(sizeof(rpath_command) + path.size() + 1); - rpc->cmd = LC_RPATH; - rpc->cmdsize = size; - rpc->path = sizeof(rpath_command); // offset - if (_swap) - swapStruct(*rpc); - memcpy(lc+sizeof(rpath_command), path.begin(), path.size()); - lc[sizeof(rpath_command)+path.size()] = '\0'; - lc += size; - } - - // Add LC_FUNCTION_STARTS if needed. - if (_functionStartsSize != 0) { - linkedit_data_command* dl = reinterpret_cast(lc); - dl->cmd = LC_FUNCTION_STARTS; - dl->cmdsize = sizeof(linkedit_data_command); - dl->dataoff = _startOfFunctionStarts; - dl->datasize = _functionStartsSize; - if (_swap) - swapStruct(*dl); - lc += sizeof(linkedit_data_command); - } - - // Add LC_DATA_IN_CODE if requested. - if (_file.generateDataInCodeLoadCommand) { - linkedit_data_command* dl = reinterpret_cast(lc); - dl->cmd = LC_DATA_IN_CODE; - dl->cmdsize = sizeof(linkedit_data_command); - dl->dataoff = _startOfDataInCode; - dl->datasize = _dataInCodeSize; - if (_swap) - swapStruct(*dl); - lc += sizeof(linkedit_data_command); - } - } - assert(lc == &_buffer[_endOfLoadCommands]); - return llvm::Error::success(); -} - -void MachOFileLayout::writeSectionContent() { - for (const Section &s : _file.sections) { - // Copy all section content to output buffer. - if (isZeroFillSection(s.type)) - continue; - if (s.content.empty()) - continue; - uint32_t offset = _sectInfo[&s].fileOffset; - assert(offset >= _endOfLoadCommands); - uint8_t *p = &_buffer[offset]; - memcpy(p, &s.content[0], s.content.size()); - p += s.content.size(); - } -} - -void MachOFileLayout::writeRelocations() { - uint32_t relOffset = _startOfRelocations; - for (Section sect : _file.sections) { - for (Relocation r : sect.relocations) { - any_relocation_info* rb = reinterpret_cast( - &_buffer[relOffset]); - *rb = packRelocation(r, _swap, _bigEndianArch); - relOffset += sizeof(any_relocation_info); - } - } -} - -void MachOFileLayout::appendSymbols(const std::vector &symbols, - uint32_t &symOffset, uint32_t &strOffset) { - for (const Symbol &sym : symbols) { - if (_is64) { - nlist_64* nb = reinterpret_cast(&_buffer[symOffset]); - nb->n_strx = strOffset - _startOfSymbolStrings; - nb->n_type = sym.type | sym.scope; - nb->n_sect = sym.sect; - nb->n_desc = sym.desc; - nb->n_value = sym.value; - if (_swap) - swapStruct(*nb); - symOffset += sizeof(nlist_64); - } else { - nlist* nb = reinterpret_cast(&_buffer[symOffset]); - nb->n_strx = strOffset - _startOfSymbolStrings; - nb->n_type = sym.type | sym.scope; - nb->n_sect = sym.sect; - nb->n_desc = sym.desc; - nb->n_value = sym.value; - if (_swap) - swapStruct(*nb); - symOffset += sizeof(nlist); - } - memcpy(&_buffer[strOffset], sym.name.begin(), sym.name.size()); - strOffset += sym.name.size(); - _buffer[strOffset++] ='\0'; // Strings in table have nul terminator. - } -} - -void MachOFileLayout::writeFunctionStartsInfo() { - if (!_functionStartsSize) - return; - memcpy(&_buffer[_startOfFunctionStarts], _file.functionStarts.data(), - _functionStartsSize); -} - -void MachOFileLayout::writeDataInCodeInfo() { - uint32_t offset = _startOfDataInCode; - for (const DataInCode &entry : _file.dataInCode) { - data_in_code_entry *dst = reinterpret_cast( - &_buffer[offset]); - dst->offset = entry.offset; - dst->length = entry.length; - dst->kind = entry.kind; - if (_swap) - swapStruct(*dst); - offset += sizeof(data_in_code_entry); - } -} - -void MachOFileLayout::writeSymbolTable() { - // Write symbol table and symbol strings in parallel. - uint32_t symOffset = _startOfSymbols; - uint32_t strOffset = _startOfSymbolStrings; - // Reserve n_strx offset of zero to mean no name. - _buffer[strOffset++] = ' '; - _buffer[strOffset++] = '\0'; - appendSymbols(_file.stabsSymbols, symOffset, strOffset); - appendSymbols(_file.localSymbols, symOffset, strOffset); - appendSymbols(_file.globalSymbols, symOffset, strOffset); - appendSymbols(_file.undefinedSymbols, symOffset, strOffset); - // Write indirect symbol table array. - uint32_t *indirects = reinterpret_cast - (&_buffer[_startOfIndirectSymbols]); - if (_file.fileType == llvm::MachO::MH_OBJECT) { - // Object files have sections in same order as input normalized file. - for (const Section §ion : _file.sections) { - for (uint32_t index : section.indirectSymbols) { - if (_swap) - *indirects++ = llvm::sys::getSwappedBytes(index); - else - *indirects++ = index; - } - } - } else { - // Final linked images must sort sections from normalized file. - for (const Segment &seg : _file.segments) { - SegExtraInfo &segInfo = _segInfo[&seg]; - for (const Section *section : segInfo.sections) { - for (uint32_t index : section->indirectSymbols) { - if (_swap) - *indirects++ = llvm::sys::getSwappedBytes(index); - else - *indirects++ = index; - } - } - } - } -} - -void MachOFileLayout::writeRebaseInfo() { - memcpy(&_buffer[_startOfRebaseInfo], _rebaseInfo.bytes(), _rebaseInfo.size()); -} - -void MachOFileLayout::writeBindingInfo() { - memcpy(&_buffer[_startOfBindingInfo], - _bindingInfo.bytes(), _bindingInfo.size()); -} - -void MachOFileLayout::writeLazyBindingInfo() { - memcpy(&_buffer[_startOfLazyBindingInfo], - _lazyBindingInfo.bytes(), _lazyBindingInfo.size()); -} - -void MachOFileLayout::writeExportInfo() { - memcpy(&_buffer[_startOfExportTrie], _exportTrie.bytes(), _exportTrie.size()); -} - -void MachOFileLayout::buildLinkEditInfo() { - buildRebaseInfo(); - buildBindInfo(); - buildLazyBindInfo(); - buildExportTrie(); - computeSymbolTableSizes(); - computeFunctionStartsSize(); - computeDataInCodeSize(); -} - -void MachOFileLayout::buildSectionRelocations() { - -} - -void MachOFileLayout::buildRebaseInfo() { - // TODO: compress rebasing info. - for (const RebaseLocation& entry : _file.rebasingInfo) { - _rebaseInfo.append_byte(REBASE_OPCODE_SET_TYPE_IMM | entry.kind); - _rebaseInfo.append_byte(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB - | entry.segIndex); - _rebaseInfo.append_uleb128(entry.segOffset); - _rebaseInfo.append_uleb128(REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1); - } - _rebaseInfo.append_byte(REBASE_OPCODE_DONE); - _rebaseInfo.align(_is64 ? 8 : 4); -} - -void MachOFileLayout::buildBindInfo() { - // TODO: compress bind info. - uint64_t lastAddend = 0; - int lastOrdinal = 0x80000000; - StringRef lastSymbolName; - BindType lastType = (BindType)0; - Hex32 lastSegOffset = ~0U; - uint8_t lastSegIndex = (uint8_t)~0U; - for (const BindLocation& entry : _file.bindingInfo) { - if (entry.ordinal != lastOrdinal) { - if (entry.ordinal <= 0) - _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | - (entry.ordinal & BIND_IMMEDIATE_MASK)); - else if (entry.ordinal <= BIND_IMMEDIATE_MASK) - _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | - entry.ordinal); - else { - _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); - _bindingInfo.append_uleb128(entry.ordinal); - } - lastOrdinal = entry.ordinal; - } - - if (lastSymbolName != entry.symbolName) { - _bindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); - _bindingInfo.append_string(entry.symbolName); - lastSymbolName = entry.symbolName; - } - - if (lastType != entry.kind) { - _bindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind); - lastType = entry.kind; - } - - if (lastSegIndex != entry.segIndex || lastSegOffset != entry.segOffset) { - _bindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB - | entry.segIndex); - _bindingInfo.append_uleb128(entry.segOffset); - lastSegIndex = entry.segIndex; - lastSegOffset = entry.segOffset; - } - if (entry.addend != lastAddend) { - _bindingInfo.append_byte(BIND_OPCODE_SET_ADDEND_SLEB); - _bindingInfo.append_sleb128(entry.addend); - lastAddend = entry.addend; - } - _bindingInfo.append_byte(BIND_OPCODE_DO_BIND); - } - _bindingInfo.append_byte(BIND_OPCODE_DONE); - _bindingInfo.align(_is64 ? 8 : 4); -} - -void MachOFileLayout::buildLazyBindInfo() { - for (const BindLocation& entry : _file.lazyBindingInfo) { - _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB - | entry.segIndex); - _lazyBindingInfo.append_uleb128(entry.segOffset); - if (entry.ordinal <= 0) - _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | - (entry.ordinal & BIND_IMMEDIATE_MASK)); - else if (entry.ordinal <= BIND_IMMEDIATE_MASK) - _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | - entry.ordinal); - else { - _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); - _lazyBindingInfo.append_uleb128(entry.ordinal); - } - // FIXME: We need to | the opcode here with flags. - _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); - _lazyBindingInfo.append_string(entry.symbolName); - _lazyBindingInfo.append_byte(BIND_OPCODE_DO_BIND); - _lazyBindingInfo.append_byte(BIND_OPCODE_DONE); - } - _lazyBindingInfo.align(_is64 ? 8 : 4); -} - -void TrieNode::addSymbol(const Export& entry, - BumpPtrAllocator &allocator, - std::vector &allNodes) { - StringRef partialStr = entry.name.drop_front(_cummulativeString.size()); - for (TrieEdge &edge : _children) { - StringRef edgeStr = edge._subString; - if (partialStr.startswith(edgeStr)) { - // Already have matching edge, go down that path. - edge._child->addSymbol(entry, allocator, allNodes); - return; - } - // See if string has common prefix with existing edge. - for (int n=edgeStr.size()-1; n > 0; --n) { - if (partialStr.substr(0, n).equals(edgeStr.substr(0, n))) { - // Splice in new node: was A -> C, now A -> B -> C - StringRef bNodeStr = edge._child->_cummulativeString; - bNodeStr = bNodeStr.drop_back(edgeStr.size()-n).copy(allocator); - auto *bNode = new (allocator) TrieNode(bNodeStr); - allNodes.push_back(bNode); - TrieNode* cNode = edge._child; - StringRef abEdgeStr = edgeStr.substr(0,n).copy(allocator); - StringRef bcEdgeStr = edgeStr.substr(n).copy(allocator); - DEBUG_WITH_TYPE("trie-builder", llvm::dbgs() - << "splice in TrieNode('" << bNodeStr - << "') between edge '" - << abEdgeStr << "' and edge='" - << bcEdgeStr<< "'\n"); - TrieEdge& abEdge = edge; - abEdge._subString = abEdgeStr; - abEdge._child = bNode; - auto *bcEdge = new (allocator) TrieEdge(bcEdgeStr, cNode); - bNode->_children.insert(bNode->_children.end(), bcEdge); - bNode->addSymbol(entry, allocator, allNodes); - return; - } - } - } - if (entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { - assert(entry.otherOffset != 0); - } - if (entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { - assert(entry.otherOffset != 0); - } - // No commonality with any existing child, make a new edge. - auto *newNode = new (allocator) TrieNode(entry.name.copy(allocator)); - auto *newEdge = new (allocator) TrieEdge(partialStr, newNode); - _children.insert(_children.end(), newEdge); - DEBUG_WITH_TYPE("trie-builder", llvm::dbgs() - << "new TrieNode('" << entry.name << "') with edge '" - << partialStr << "' from node='" - << _cummulativeString << "'\n"); - newNode->_address = entry.offset; - newNode->_flags = entry.flags | entry.kind; - newNode->_other = entry.otherOffset; - if ((entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && !entry.otherName.empty()) - newNode->_importedName = entry.otherName.copy(allocator); - newNode->_hasExportInfo = true; - allNodes.push_back(newNode); -} - -void TrieNode::addOrderedNodes(const Export& entry, - std::vector &orderedNodes) { - if (!_ordered) { - orderedNodes.push_back(this); - _ordered = true; - } - - StringRef partialStr = entry.name.drop_front(_cummulativeString.size()); - for (TrieEdge &edge : _children) { - StringRef edgeStr = edge._subString; - if (partialStr.startswith(edgeStr)) { - // Already have matching edge, go down that path. - edge._child->addOrderedNodes(entry, orderedNodes); - return; - } - } -} - -bool TrieNode::updateOffset(uint32_t& offset) { - uint32_t nodeSize = 1; // Length when no export info - if (_hasExportInfo) { - if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { - nodeSize = llvm::getULEB128Size(_flags); - nodeSize += llvm::getULEB128Size(_other); // Other contains ordinal. - nodeSize += _importedName.size(); - ++nodeSize; // Trailing zero in imported name. - } else { - nodeSize = llvm::getULEB128Size(_flags) + llvm::getULEB128Size(_address); - if (_flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) - nodeSize += llvm::getULEB128Size(_other); - } - // Overall node size so far is uleb128 of export info + actual export info. - nodeSize += llvm::getULEB128Size(nodeSize); - } - // Compute size of all child edges. - ++nodeSize; // Byte for number of children. - for (TrieEdge &edge : _children) { - nodeSize += edge._subString.size() + 1 // String length. - + llvm::getULEB128Size(edge._child->_trieOffset); // Offset len. - } - // On input, 'offset' is new prefered location for this node. - bool result = (_trieOffset != offset); - // Store new location in node object for use by parents. - _trieOffset = offset; - // Update offset for next iteration. - offset += nodeSize; - // Return true if _trieOffset was changed. - return result; -} - -void TrieNode::appendToByteBuffer(ByteBuffer &out) { - if (_hasExportInfo) { - if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { - if (!_importedName.empty()) { - // nodes with re-export info: size, flags, ordinal, import-name - uint32_t nodeSize = llvm::getULEB128Size(_flags) - + llvm::getULEB128Size(_other) - + _importedName.size() + 1; - assert(nodeSize < 256); - out.append_byte(nodeSize); - out.append_uleb128(_flags); - out.append_uleb128(_other); - out.append_string(_importedName); - } else { - // nodes without re-export info: size, flags, ordinal, empty-string - uint32_t nodeSize = llvm::getULEB128Size(_flags) - + llvm::getULEB128Size(_other) + 1; - assert(nodeSize < 256); - out.append_byte(nodeSize); - out.append_uleb128(_flags); - out.append_uleb128(_other); - out.append_byte(0); - } - } else if ( _flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) { - // Nodes with export info: size, flags, address, other - uint32_t nodeSize = llvm::getULEB128Size(_flags) - + llvm::getULEB128Size(_address) - + llvm::getULEB128Size(_other); - assert(nodeSize < 256); - out.append_byte(nodeSize); - out.append_uleb128(_flags); - out.append_uleb128(_address); - out.append_uleb128(_other); - } else { - // Nodes with export info: size, flags, address - uint32_t nodeSize = llvm::getULEB128Size(_flags) - + llvm::getULEB128Size(_address); - assert(nodeSize < 256); - out.append_byte(nodeSize); - out.append_uleb128(_flags); - out.append_uleb128(_address); - } - } else { - // Node with no export info. - uint32_t nodeSize = 0; - out.append_byte(nodeSize); - } - // Add number of children. - assert(_children.size() < 256); - out.append_byte(_children.size()); - // Append each child edge substring and node offset. - for (TrieEdge &edge : _children) { - out.append_string(edge._subString); - out.append_uleb128(edge._child->_trieOffset); - } -} - -void MachOFileLayout::buildExportTrie() { - if (_file.exportInfo.empty()) - return; - - // For all temporary strings and objects used building trie. - BumpPtrAllocator allocator; - - // Build trie of all exported symbols. - auto *rootNode = new (allocator) TrieNode(StringRef()); - std::vector allNodes; - allNodes.reserve(_file.exportInfo.size()*2); - allNodes.push_back(rootNode); - for (const Export& entry : _file.exportInfo) { - rootNode->addSymbol(entry, allocator, allNodes); - } - - std::vector orderedNodes; - orderedNodes.reserve(allNodes.size()); - - for (const Export& entry : _file.exportInfo) - rootNode->addOrderedNodes(entry, orderedNodes); - - // Assign each node in the vector an offset in the trie stream, iterating - // until all uleb128 sizes have stabilized. - bool more; - do { - uint32_t offset = 0; - more = false; - for (TrieNode* node : orderedNodes) { - if (node->updateOffset(offset)) - more = true; - } - } while (more); - - // Serialize trie to ByteBuffer. - for (TrieNode* node : orderedNodes) { - node->appendToByteBuffer(_exportTrie); - } - _exportTrie.align(_is64 ? 8 : 4); -} - -void MachOFileLayout::computeSymbolTableSizes() { - // MachO symbol tables have three ranges: locals, globals, and undefines - const size_t nlistSize = (_is64 ? sizeof(nlist_64) : sizeof(nlist)); - _symbolTableSize = nlistSize * (_file.stabsSymbols.size() - + _file.localSymbols.size() - + _file.globalSymbols.size() - + _file.undefinedSymbols.size()); - // Always reserve 1-byte for the empty string and 1-byte for its terminator. - _symbolStringPoolSize = 2; - for (const Symbol &sym : _file.stabsSymbols) { - _symbolStringPoolSize += (sym.name.size()+1); - } - for (const Symbol &sym : _file.localSymbols) { - _symbolStringPoolSize += (sym.name.size()+1); - } - for (const Symbol &sym : _file.globalSymbols) { - _symbolStringPoolSize += (sym.name.size()+1); - } - for (const Symbol &sym : _file.undefinedSymbols) { - _symbolStringPoolSize += (sym.name.size()+1); - } - _symbolTableLocalsStartIndex = 0; - _symbolTableGlobalsStartIndex = _file.stabsSymbols.size() + - _file.localSymbols.size(); - _symbolTableUndefinesStartIndex = _symbolTableGlobalsStartIndex - + _file.globalSymbols.size(); - - _indirectSymbolTableCount = 0; - for (const Section § : _file.sections) { - _indirectSymbolTableCount += sect.indirectSymbols.size(); - } -} - -void MachOFileLayout::computeFunctionStartsSize() { - _functionStartsSize = _file.functionStarts.size(); -} - -void MachOFileLayout::computeDataInCodeSize() { - _dataInCodeSize = _file.dataInCode.size() * sizeof(data_in_code_entry); -} - -void MachOFileLayout::writeLinkEditContent() { - if (_file.fileType == llvm::MachO::MH_OBJECT) { - writeRelocations(); - writeFunctionStartsInfo(); - writeDataInCodeInfo(); - writeSymbolTable(); - } else { - writeRebaseInfo(); - writeBindingInfo(); - writeLazyBindingInfo(); - // TODO: add weak binding info - writeExportInfo(); - writeFunctionStartsInfo(); - writeDataInCodeInfo(); - writeSymbolTable(); - } -} - -llvm::Error MachOFileLayout::writeBinary(StringRef path) { - // Check for pending error from constructor. - if (_ec) - return llvm::errorCodeToError(_ec); - // Create FileOutputBuffer with calculated size. - unsigned flags = 0; - if (_file.fileType != llvm::MachO::MH_OBJECT) - flags = llvm::FileOutputBuffer::F_executable; - Expected> fobOrErr = - llvm::FileOutputBuffer::create(path, size(), flags); - if (Error E = fobOrErr.takeError()) - return E; - std::unique_ptr &fob = *fobOrErr; - // Write content. - _buffer = fob->getBufferStart(); - writeMachHeader(); - if (auto ec = writeLoadCommands()) - return ec; - writeSectionContent(); - writeLinkEditContent(); - if (Error E = fob->commit()) - return E; - - return llvm::Error::success(); -} - -/// Takes in-memory normalized view and writes a mach-o object file. -llvm::Error writeBinary(const NormalizedFile &file, StringRef path) { - MachOFileLayout layout(file, false); - return layout.writeBinary(path); -} - -} // namespace normalized -} // namespace mach_o -} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp deleted file mode 100644 index ddfd1764f7e1..000000000000 --- a/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp +++ /dev/null @@ -1,1657 +0,0 @@ -//===- lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp ------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// -/// \file Converts from in-memory Atoms to in-memory normalized mach-o. -/// -/// +------------+ -/// | normalized | -/// +------------+ -/// ^ -/// | -/// | -/// +-------+ -/// | Atoms | -/// +-------+ - -#include "ArchHandler.h" -#include "DebugInfo.h" -#include "MachONormalizedFile.h" -#include "MachONormalizedFileBinaryUtils.h" -#include "lld/Common/LLVM.h" -#include "lld/Core/Error.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" -#include -#include -#include - -using llvm::StringRef; -using llvm::isa; -using namespace llvm::MachO; -using namespace lld::mach_o::normalized; -using namespace lld; - -namespace { - -struct AtomInfo { - const DefinedAtom *atom; - uint64_t offsetInSection; -}; - -struct SectionInfo { - SectionInfo(StringRef seg, StringRef sect, SectionType type, - const MachOLinkingContext &ctxt, uint32_t attr, - bool relocsToDefinedCanBeImplicit); - - StringRef segmentName; - StringRef sectionName; - SectionType type; - uint32_t attributes; - uint64_t address; - uint64_t size; - uint16_t alignment; - - /// If this is set, the any relocs in this section which point to defined - /// addresses can be implicitly generated. This is the case for the - /// __eh_frame section where references to the function can be implicit if the - /// function is defined. - bool relocsToDefinedCanBeImplicit; - - - std::vector atomsAndOffsets; - uint32_t normalizedSectionIndex; - uint32_t finalSectionIndex; -}; - -SectionInfo::SectionInfo(StringRef sg, StringRef sct, SectionType t, - const MachOLinkingContext &ctxt, uint32_t attrs, - bool relocsToDefinedCanBeImplicit) - : segmentName(sg), sectionName(sct), type(t), attributes(attrs), - address(0), size(0), alignment(1), - relocsToDefinedCanBeImplicit(relocsToDefinedCanBeImplicit), - normalizedSectionIndex(0), finalSectionIndex(0) { - uint16_t align = 1; - if (ctxt.sectionAligned(segmentName, sectionName, align)) { - alignment = align; - } -} - -struct SegmentInfo { - SegmentInfo(StringRef name); - - StringRef name; - uint64_t address; - uint64_t size; - uint32_t init_access; - uint32_t max_access; - std::vector sections; - uint32_t normalizedSegmentIndex; -}; - -SegmentInfo::SegmentInfo(StringRef n) - : name(n), address(0), size(0), init_access(0), max_access(0), - normalizedSegmentIndex(0) { -} - -class Util { -public: - Util(const MachOLinkingContext &ctxt) - : _ctx(ctxt), _archHandler(ctxt.archHandler()), _entryAtom(nullptr), - _hasTLVDescriptors(false), _subsectionsViaSymbols(true) {} - ~Util(); - - void processDefinedAtoms(const lld::File &atomFile); - void processAtomAttributes(const DefinedAtom *atom); - void assignAtomToSection(const DefinedAtom *atom); - void organizeSections(); - void assignAddressesToSections(const NormalizedFile &file); - uint32_t fileFlags(); - void copySegmentInfo(NormalizedFile &file); - void copySectionInfo(NormalizedFile &file); - void updateSectionInfo(NormalizedFile &file); - void buildAtomToAddressMap(); - llvm::Error synthesizeDebugNotes(NormalizedFile &file); - llvm::Error addSymbols(const lld::File &atomFile, NormalizedFile &file); - void addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file); - void addRebaseAndBindingInfo(const lld::File &, NormalizedFile &file); - void addExportInfo(const lld::File &, NormalizedFile &file); - void addSectionRelocs(const lld::File &, NormalizedFile &file); - void addFunctionStarts(const lld::File &, NormalizedFile &file); - void buildDataInCodeArray(const lld::File &, NormalizedFile &file); - void addDependentDylibs(const lld::File &, NormalizedFile &file); - void copyEntryPointAddress(NormalizedFile &file); - void copySectionContent(NormalizedFile &file); - - bool allSourceFilesHaveMinVersions() const { - return _allSourceFilesHaveMinVersions; - } - - uint32_t minVersion() const { - return _minVersion; - } - - LoadCommandType minVersionCommandType() const { - return _minVersionCommandType; - } - -private: - typedef std::map TypeToSection; - typedef llvm::DenseMap AtomToAddress; - - struct DylibInfo { int ordinal; bool hasWeak; bool hasNonWeak; }; - typedef llvm::StringMap DylibPathToInfo; - - SectionInfo *sectionForAtom(const DefinedAtom*); - SectionInfo *getRelocatableSection(DefinedAtom::ContentType type); - SectionInfo *getFinalSection(DefinedAtom::ContentType type); - void appendAtom(SectionInfo *sect, const DefinedAtom *atom); - SegmentInfo *segmentForName(StringRef segName); - void layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr); - void layoutSectionsInTextSegment(size_t, SegmentInfo *, uint64_t &); - void copySectionContent(SectionInfo *si, ContentBytes &content); - uint16_t descBits(const DefinedAtom* atom); - int dylibOrdinal(const SharedLibraryAtom *sa); - void segIndexForSection(const SectionInfo *sect, - uint8_t &segmentIndex, uint64_t &segmentStartAddr); - const Atom *targetOfLazyPointer(const DefinedAtom *lpAtom); - const Atom *targetOfStub(const DefinedAtom *stubAtom); - llvm::Error getSymbolTableRegion(const DefinedAtom* atom, - bool &inGlobalsRegion, - SymbolScope &symbolScope); - void appendSection(SectionInfo *si, NormalizedFile &file); - uint32_t sectionIndexForAtom(const Atom *atom); - void fixLazyReferenceImm(const DefinedAtom *atom, uint32_t offset, - NormalizedFile &file); - - typedef llvm::DenseMap AtomToIndex; - struct AtomAndIndex { const Atom *atom; uint32_t index; SymbolScope scope; }; - struct AtomSorter { - bool operator()(const AtomAndIndex &left, const AtomAndIndex &right); - }; - struct SegmentSorter { - bool operator()(const SegmentInfo *left, const SegmentInfo *right); - static unsigned weight(const SegmentInfo *); - }; - struct TextSectionSorter { - bool operator()(const SectionInfo *left, const SectionInfo *right); - static unsigned weight(const SectionInfo *); - }; - - const MachOLinkingContext &_ctx; - mach_o::ArchHandler &_archHandler; - llvm::BumpPtrAllocator _allocator; - std::vector _sectionInfos; - std::vector _segmentInfos; - TypeToSection _sectionMap; - std::vector _customSections; - AtomToAddress _atomToAddress; - DylibPathToInfo _dylibInfo; - const DefinedAtom *_entryAtom; - AtomToIndex _atomToSymbolIndex; - std::vector _machHeaderAliasAtoms; - bool _hasTLVDescriptors; - bool _subsectionsViaSymbols; - bool _allSourceFilesHaveMinVersions = true; - LoadCommandType _minVersionCommandType = (LoadCommandType)0; - uint32_t _minVersion = 0; - std::vector _stabs; -}; - -Util::~Util() { - // The SectionInfo structs are BumpPtr allocated, but atomsAndOffsets needs - // to be deleted. - for (SectionInfo *si : _sectionInfos) { - // clear() destroys vector elements, but does not deallocate. - // Instead use swap() to deallocate vector buffer. - std::vector empty; - si->atomsAndOffsets.swap(empty); - } - // The SegmentInfo structs are BumpPtr allocated, but sections needs - // to be deleted. - for (SegmentInfo *sgi : _segmentInfos) { - std::vector empty2; - sgi->sections.swap(empty2); - } -} - -SectionInfo *Util::getRelocatableSection(DefinedAtom::ContentType type) { - StringRef segmentName; - StringRef sectionName; - SectionType sectionType; - SectionAttr sectionAttrs; - bool relocsToDefinedCanBeImplicit; - - // Use same table used by when parsing .o files. - relocatableSectionInfoForContentType(type, segmentName, sectionName, - sectionType, sectionAttrs, - relocsToDefinedCanBeImplicit); - // If we already have a SectionInfo with this name, re-use it. - // This can happen if two ContentType map to the same mach-o section. - for (auto sect : _sectionMap) { - if (sect.second->sectionName.equals(sectionName) && - sect.second->segmentName.equals(segmentName)) { - return sect.second; - } - } - // Otherwise allocate new SectionInfo object. - auto *sect = new (_allocator) - SectionInfo(segmentName, sectionName, sectionType, _ctx, sectionAttrs, - relocsToDefinedCanBeImplicit); - _sectionInfos.push_back(sect); - _sectionMap[type] = sect; - return sect; -} - -#define ENTRY(seg, sect, type, atomType) \ - {seg, sect, type, DefinedAtom::atomType } - -struct MachOFinalSectionFromAtomType { - StringRef segmentName; - StringRef sectionName; - SectionType sectionType; - DefinedAtom::ContentType atomType; -}; - -const MachOFinalSectionFromAtomType sectsToAtomType[] = { - ENTRY("__TEXT", "__text", S_REGULAR, typeCode), - ENTRY("__TEXT", "__text", S_REGULAR, typeMachHeader), - ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString), - ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String), - ENTRY("__TEXT", "__const", S_REGULAR, typeConstant), - ENTRY("__TEXT", "__const", S_4BYTE_LITERALS, typeLiteral4), - ENTRY("__TEXT", "__const", S_8BYTE_LITERALS, typeLiteral8), - ENTRY("__TEXT", "__const", S_16BYTE_LITERALS, typeLiteral16), - ENTRY("__TEXT", "__stubs", S_SYMBOL_STUBS, typeStub), - ENTRY("__TEXT", "__stub_helper", S_REGULAR, typeStubHelper), - ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA), - ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI), - ENTRY("__TEXT", "__unwind_info", S_REGULAR, typeProcessedUnwindInfo), - ENTRY("__DATA", "__data", S_REGULAR, typeData), - ENTRY("__DATA", "__const", S_REGULAR, typeConstData), - ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString), - ENTRY("__DATA", "__la_symbol_ptr", S_LAZY_SYMBOL_POINTERS, - typeLazyPointer), - ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS, - typeInitializerPtr), - ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS, - typeTerminatorPtr), - ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS, - typeGOT), - ENTRY("__DATA", "__nl_symbol_ptr", S_NON_LAZY_SYMBOL_POINTERS, - typeNonLazyPointer), - ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES, - typeThunkTLV), - ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, - typeTLVInitialData), - ENTRY("__DATA", "__thread_ptrs", S_THREAD_LOCAL_VARIABLE_POINTERS, - typeTLVInitializerPtr), - ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL, - typeTLVInitialZeroFill), - ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill), - ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples), -}; -#undef ENTRY - -SectionInfo *Util::getFinalSection(DefinedAtom::ContentType atomType) { - for (auto &p : sectsToAtomType) { - if (p.atomType != atomType) - continue; - SectionAttr sectionAttrs = 0; - switch (atomType) { - case DefinedAtom::typeMachHeader: - case DefinedAtom::typeCode: - case DefinedAtom::typeStub: - case DefinedAtom::typeStubHelper: - sectionAttrs = S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS; - break; - case DefinedAtom::typeThunkTLV: - _hasTLVDescriptors = true; - break; - default: - break; - } - // If we already have a SectionInfo with this name, re-use it. - // This can happen if two ContentType map to the same mach-o section. - for (auto sect : _sectionMap) { - if (sect.second->sectionName.equals(p.sectionName) && - sect.second->segmentName.equals(p.segmentName)) { - return sect.second; - } - } - // Otherwise allocate new SectionInfo object. - auto *sect = new (_allocator) SectionInfo( - p.segmentName, p.sectionName, p.sectionType, _ctx, sectionAttrs, - /* relocsToDefinedCanBeImplicit */ false); - _sectionInfos.push_back(sect); - _sectionMap[atomType] = sect; - return sect; - } - llvm_unreachable("content type not yet supported"); -} - -SectionInfo *Util::sectionForAtom(const DefinedAtom *atom) { - if (atom->sectionChoice() == DefinedAtom::sectionBasedOnContent) { - // Section for this atom is derived from content type. - DefinedAtom::ContentType type = atom->contentType(); - auto pos = _sectionMap.find(type); - if ( pos != _sectionMap.end() ) - return pos->second; - bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); - return rMode ? getRelocatableSection(type) : getFinalSection(type); - } else { - // This atom needs to be in a custom section. - StringRef customName = atom->customSectionName(); - // Look to see if we have already allocated the needed custom section. - for(SectionInfo *sect : _customSections) { - const DefinedAtom *firstAtom = sect->atomsAndOffsets.front().atom; - if (firstAtom->customSectionName().equals(customName)) { - return sect; - } - } - // Not found, so need to create a new custom section. - size_t seperatorIndex = customName.find('/'); - assert(seperatorIndex != StringRef::npos); - StringRef segName = customName.slice(0, seperatorIndex); - StringRef sectName = customName.drop_front(seperatorIndex + 1); - auto *sect = - new (_allocator) SectionInfo(segName, sectName, S_REGULAR, _ctx, - 0, /* relocsToDefinedCanBeImplicit */ false); - _customSections.push_back(sect); - _sectionInfos.push_back(sect); - return sect; - } -} - -void Util::appendAtom(SectionInfo *sect, const DefinedAtom *atom) { - // Figure out offset for atom in this section given alignment constraints. - uint64_t offset = sect->size; - DefinedAtom::Alignment atomAlign = atom->alignment(); - uint64_t align = atomAlign.value; - uint64_t requiredModulus = atomAlign.modulus; - uint64_t currentModulus = (offset % align); - if ( currentModulus != requiredModulus ) { - if ( requiredModulus > currentModulus ) - offset += requiredModulus-currentModulus; - else - offset += align+requiredModulus-currentModulus; - } - // Record max alignment of any atom in this section. - if (align > sect->alignment) - sect->alignment = atomAlign.value; - // Assign atom to this section with this offset. - AtomInfo ai = {atom, offset}; - sect->atomsAndOffsets.push_back(ai); - // Update section size to include this atom. - sect->size = offset + atom->size(); -} - -void Util::processDefinedAtoms(const lld::File &atomFile) { - for (const DefinedAtom *atom : atomFile.defined()) { - processAtomAttributes(atom); - assignAtomToSection(atom); - } -} - -void Util::processAtomAttributes(const DefinedAtom *atom) { - if (auto *machoFile = dyn_cast(&atom->file())) { - // If the file doesn't use subsections via symbols, then make sure we don't - // add that flag to the final output file if we have a relocatable file. - if (!machoFile->subsectionsViaSymbols()) - _subsectionsViaSymbols = false; - - // All the source files must have min versions for us to output an object - // file with a min version. - if (auto v = machoFile->minVersion()) - _minVersion = std::max(_minVersion, v); - else - _allSourceFilesHaveMinVersions = false; - - // If we don't have a platform load command, but one of the source files - // does, then take the one from the file. - if (!_minVersionCommandType) - if (auto v = machoFile->minVersionLoadCommandKind()) - _minVersionCommandType = v; - } -} - -void Util::assignAtomToSection(const DefinedAtom *atom) { - if (atom->contentType() == DefinedAtom::typeMachHeader) { - _machHeaderAliasAtoms.push_back(atom); - // Assign atom to this section with this offset. - AtomInfo ai = {atom, 0}; - sectionForAtom(atom)->atomsAndOffsets.push_back(ai); - } else if (atom->contentType() == DefinedAtom::typeDSOHandle) - _machHeaderAliasAtoms.push_back(atom); - else - appendAtom(sectionForAtom(atom), atom); -} - -SegmentInfo *Util::segmentForName(StringRef segName) { - for (SegmentInfo *si : _segmentInfos) { - if ( si->name.equals(segName) ) - return si; - } - auto *info = new (_allocator) SegmentInfo(segName); - - // Set the initial segment protection. - if (segName.equals("__TEXT")) - info->init_access = VM_PROT_READ | VM_PROT_EXECUTE; - else if (segName.equals("__PAGEZERO")) - info->init_access = 0; - else if (segName.equals("__LINKEDIT")) - info->init_access = VM_PROT_READ; - else { - // All others default to read-write - info->init_access = VM_PROT_READ | VM_PROT_WRITE; - } - - // Set max segment protection - // Note, its overkill to use a switch statement here, but makes it so much - // easier to use switch coverage to catch new cases. - switch (_ctx.os()) { - case lld::MachOLinkingContext::OS::unknown: - case lld::MachOLinkingContext::OS::macOSX: - case lld::MachOLinkingContext::OS::iOS_simulator: - if (segName.equals("__PAGEZERO")) { - info->max_access = 0; - break; - } - // All others default to all - info->max_access = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; - break; - case lld::MachOLinkingContext::OS::iOS: - // iPhoneOS always uses same protection for max and initial - info->max_access = info->init_access; - break; - } - _segmentInfos.push_back(info); - return info; -} - -unsigned Util::SegmentSorter::weight(const SegmentInfo *seg) { - return llvm::StringSwitch(seg->name) - .Case("__PAGEZERO", 1) - .Case("__TEXT", 2) - .Case("__DATA", 3) - .Default(100); -} - -bool Util::SegmentSorter::operator()(const SegmentInfo *left, - const SegmentInfo *right) { - return (weight(left) < weight(right)); -} - -unsigned Util::TextSectionSorter::weight(const SectionInfo *sect) { - return llvm::StringSwitch(sect->sectionName) - .Case("__text", 1) - .Case("__stubs", 2) - .Case("__stub_helper", 3) - .Case("__const", 4) - .Case("__cstring", 5) - .Case("__unwind_info", 98) - .Case("__eh_frame", 99) - .Default(10); -} - -bool Util::TextSectionSorter::operator()(const SectionInfo *left, - const SectionInfo *right) { - return (weight(left) < weight(right)); -} - -void Util::organizeSections() { - // NOTE!: Keep this in sync with assignAddressesToSections. - switch (_ctx.outputMachOType()) { - case llvm::MachO::MH_EXECUTE: - // Main executables, need a zero-page segment - segmentForName("__PAGEZERO"); - // Fall into next case. - LLVM_FALLTHROUGH; - case llvm::MachO::MH_DYLIB: - case llvm::MachO::MH_BUNDLE: - // All dynamic code needs TEXT segment to hold the load commands. - segmentForName("__TEXT"); - break; - default: - break; - } - segmentForName("__LINKEDIT"); - - // Group sections into segments. - for (SectionInfo *si : _sectionInfos) { - SegmentInfo *seg = segmentForName(si->segmentName); - seg->sections.push_back(si); - } - // Sort segments. - std::sort(_segmentInfos.begin(), _segmentInfos.end(), SegmentSorter()); - - // Sort sections within segments. - for (SegmentInfo *seg : _segmentInfos) { - if (seg->name.equals("__TEXT")) { - std::sort(seg->sections.begin(), seg->sections.end(), - TextSectionSorter()); - } - } - - // Record final section indexes. - uint32_t segmentIndex = 0; - uint32_t sectionIndex = 1; - for (SegmentInfo *seg : _segmentInfos) { - seg->normalizedSegmentIndex = segmentIndex++; - for (SectionInfo *sect : seg->sections) - sect->finalSectionIndex = sectionIndex++; - } -} - -void Util::layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr) { - seg->address = addr; - for (SectionInfo *sect : seg->sections) { - sect->address = llvm::alignTo(addr, sect->alignment); - addr = sect->address + sect->size; - } - seg->size = llvm::alignTo(addr - seg->address, _ctx.pageSize()); -} - -// __TEXT segment lays out backwards so padding is at front after load commands. -void Util::layoutSectionsInTextSegment(size_t hlcSize, SegmentInfo *seg, - uint64_t &addr) { - seg->address = addr; - // Walks sections starting at end to calculate padding for start. - int64_t taddr = 0; - for (auto it = seg->sections.rbegin(); it != seg->sections.rend(); ++it) { - SectionInfo *sect = *it; - taddr -= sect->size; - taddr = taddr & (0 - sect->alignment); - } - int64_t padding = taddr - hlcSize; - while (padding < 0) - padding += _ctx.pageSize(); - // Start assigning section address starting at padded offset. - addr += (padding + hlcSize); - for (SectionInfo *sect : seg->sections) { - sect->address = llvm::alignTo(addr, sect->alignment); - addr = sect->address + sect->size; - } - seg->size = llvm::alignTo(addr - seg->address, _ctx.pageSize()); -} - -void Util::assignAddressesToSections(const NormalizedFile &file) { - // NOTE!: Keep this in sync with organizeSections. - size_t hlcSize = headerAndLoadCommandsSize(file, - _ctx.generateFunctionStartsLoadCommand()); - uint64_t address = 0; - for (SegmentInfo *seg : _segmentInfos) { - if (seg->name.equals("__PAGEZERO")) { - seg->size = _ctx.pageZeroSize(); - address += seg->size; - } - else if (seg->name.equals("__TEXT")) { - // _ctx.baseAddress() == 0 implies it was either unspecified or - // pageZeroSize is also 0. In either case resetting address is safe. - address = _ctx.baseAddress() ? _ctx.baseAddress() : address; - layoutSectionsInTextSegment(hlcSize, seg, address); - } else - layoutSectionsInSegment(seg, address); - - address = llvm::alignTo(address, _ctx.pageSize()); - } - DEBUG_WITH_TYPE("WriterMachO-norm", - llvm::dbgs() << "assignAddressesToSections()\n"; - for (SegmentInfo *sgi : _segmentInfos) { - llvm::dbgs() << " address=" << llvm::format("0x%08llX", sgi->address) - << ", size=" << llvm::format("0x%08llX", sgi->size) - << ", segment-name='" << sgi->name - << "'\n"; - for (SectionInfo *si : sgi->sections) { - llvm::dbgs()<< " addr=" << llvm::format("0x%08llX", si->address) - << ", size=" << llvm::format("0x%08llX", si->size) - << ", section-name='" << si->sectionName - << "\n"; - } - } - ); -} - -void Util::copySegmentInfo(NormalizedFile &file) { - for (SegmentInfo *sgi : _segmentInfos) { - Segment seg; - seg.name = sgi->name; - seg.address = sgi->address; - seg.size = sgi->size; - seg.init_access = sgi->init_access; - seg.max_access = sgi->max_access; - file.segments.push_back(seg); - } -} - -void Util::appendSection(SectionInfo *si, NormalizedFile &file) { - // Add new empty section to end of file.sections. - Section temp; - file.sections.push_back(std::move(temp)); - Section* normSect = &file.sections.back(); - // Copy fields to normalized section. - normSect->segmentName = si->segmentName; - normSect->sectionName = si->sectionName; - normSect->type = si->type; - normSect->attributes = si->attributes; - normSect->address = si->address; - normSect->alignment = si->alignment; - // Record where normalized section is. - si->normalizedSectionIndex = file.sections.size()-1; -} - -void Util::copySectionContent(NormalizedFile &file) { - const bool r = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); - - // Utility function for ArchHandler to find address of atom in output file. - auto addrForAtom = [&] (const Atom &atom) -> uint64_t { - auto pos = _atomToAddress.find(&atom); - assert(pos != _atomToAddress.end()); - return pos->second; - }; - - auto sectionAddrForAtom = [&] (const Atom &atom) -> uint64_t { - for (const SectionInfo *sectInfo : _sectionInfos) - for (const AtomInfo &atomInfo : sectInfo->atomsAndOffsets) - if (atomInfo.atom == &atom) - return sectInfo->address; - llvm_unreachable("atom not assigned to section"); - }; - - for (SectionInfo *si : _sectionInfos) { - Section *normSect = &file.sections[si->normalizedSectionIndex]; - if (isZeroFillSection(si->type)) { - const uint8_t *empty = nullptr; - normSect->content = llvm::makeArrayRef(empty, si->size); - continue; - } - // Copy content from atoms to content buffer for section. - llvm::MutableArrayRef sectionContent; - if (si->size) { - uint8_t *sectContent = file.ownedAllocations.Allocate(si->size); - sectionContent = llvm::MutableArrayRef(sectContent, si->size); - normSect->content = sectionContent; - } - for (AtomInfo &ai : si->atomsAndOffsets) { - if (!ai.atom->size()) { - assert(ai.atom->begin() == ai.atom->end() && - "Cannot have references without content"); - continue; - } - auto atomContent = sectionContent.slice(ai.offsetInSection, - ai.atom->size()); - _archHandler.generateAtomContent(*ai.atom, r, addrForAtom, - sectionAddrForAtom, _ctx.baseAddress(), - atomContent); - } - } -} - -void Util::copySectionInfo(NormalizedFile &file) { - file.sections.reserve(_sectionInfos.size()); - // Write sections grouped by segment. - for (SegmentInfo *sgi : _segmentInfos) { - for (SectionInfo *si : sgi->sections) { - appendSection(si, file); - } - } -} - -void Util::updateSectionInfo(NormalizedFile &file) { - file.sections.reserve(_sectionInfos.size()); - // sections grouped by segment. - for (SegmentInfo *sgi : _segmentInfos) { - Segment *normSeg = &file.segments[sgi->normalizedSegmentIndex]; - normSeg->address = sgi->address; - normSeg->size = sgi->size; - for (SectionInfo *si : sgi->sections) { - Section *normSect = &file.sections[si->normalizedSectionIndex]; - normSect->address = si->address; - } - } -} - -void Util::copyEntryPointAddress(NormalizedFile &nFile) { - if (!_entryAtom) { - nFile.entryAddress = 0; - return; - } - - if (_ctx.outputTypeHasEntry()) { - if (_archHandler.isThumbFunction(*_entryAtom)) - nFile.entryAddress = (_atomToAddress[_entryAtom] | 1); - else - nFile.entryAddress = _atomToAddress[_entryAtom]; - } -} - -void Util::buildAtomToAddressMap() { - DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() - << "assign atom addresses:\n"); - const bool lookForEntry = _ctx.outputTypeHasEntry(); - for (SectionInfo *sect : _sectionInfos) { - for (const AtomInfo &info : sect->atomsAndOffsets) { - _atomToAddress[info.atom] = sect->address + info.offsetInSection; - if (lookForEntry && (info.atom->contentType() == DefinedAtom::typeCode) && - (info.atom->size() != 0) && - info.atom->name() == _ctx.entrySymbolName()) { - _entryAtom = info.atom; - } - DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() - << " address=" - << llvm::format("0x%016X", _atomToAddress[info.atom]) - << llvm::format(" 0x%09lX", info.atom) - << ", file=#" - << info.atom->file().ordinal() - << ", atom=#" - << info.atom->ordinal() - << ", name=" - << info.atom->name() - << ", type=" - << info.atom->contentType() - << "\n"); - } - } - DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() - << "assign header alias atom addresses:\n"); - for (const Atom *atom : _machHeaderAliasAtoms) { - _atomToAddress[atom] = _ctx.baseAddress(); -#ifndef NDEBUG - if (auto *definedAtom = dyn_cast(atom)) { - DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() - << " address=" - << llvm::format("0x%016X", _atomToAddress[atom]) - << llvm::format(" 0x%09lX", atom) - << ", file=#" - << definedAtom->file().ordinal() - << ", atom=#" - << definedAtom->ordinal() - << ", name=" - << definedAtom->name() - << ", type=" - << definedAtom->contentType() - << "\n"); - } else { - DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() - << " address=" - << llvm::format("0x%016X", _atomToAddress[atom]) - << " atom=" << atom - << " name=" << atom->name() << "\n"); - } -#endif - } -} - -llvm::Error Util::synthesizeDebugNotes(NormalizedFile &file) { - - // Bail out early if we don't need to generate a debug map. - if (_ctx.debugInfoMode() == MachOLinkingContext::DebugInfoMode::noDebugMap) - return llvm::Error::success(); - - std::vector atomsNeedingDebugNotes; - std::set filesWithStabs; - bool objFileHasDwarf = false; - const File *objFile = nullptr; - - for (SectionInfo *sect : _sectionInfos) { - for (const AtomInfo &info : sect->atomsAndOffsets) { - if (const DefinedAtom *atom = dyn_cast(info.atom)) { - - // FIXME: No stabs/debug-notes for symbols that wouldn't be in the - // symbol table. - // FIXME: No stabs/debug-notes for kernel dtrace probes. - - if (atom->contentType() == DefinedAtom::typeCFI || - atom->contentType() == DefinedAtom::typeCString) - continue; - - // Whenever we encounter a new file, update the 'objfileHasDwarf' flag. - if (&info.atom->file() != objFile) { - objFileHasDwarf = false; - if (const mach_o::MachOFile *atomFile = - dyn_cast(&info.atom->file())) { - if (atomFile->debugInfo()) { - if (isa(atomFile->debugInfo())) - objFileHasDwarf = true; - else if (isa(atomFile->debugInfo())) - filesWithStabs.insert(atomFile); - } - } - } - - // If this atom is from a file that needs dwarf, add it to the list. - if (objFileHasDwarf) - atomsNeedingDebugNotes.push_back(info.atom); - } - } - } - - // Sort atoms needing debug notes by file ordinal, then atom ordinal. - std::sort(atomsNeedingDebugNotes.begin(), atomsNeedingDebugNotes.end(), - [](const DefinedAtom *lhs, const DefinedAtom *rhs) { - if (lhs->file().ordinal() != rhs->file().ordinal()) - return (lhs->file().ordinal() < rhs->file().ordinal()); - return (lhs->ordinal() < rhs->ordinal()); - }); - - // FIXME: Handle : Add -add_ast_path option to \ - // linker which add N_AST stab entry to output - // See OutputFile::synthesizeDebugNotes in ObjectFile.cpp in ld64. - - StringRef oldFileName = ""; - StringRef oldDirPath = ""; - bool wroteStartSO = false; - std::unordered_set seenFiles; - for (const DefinedAtom *atom : atomsNeedingDebugNotes) { - const auto &atomFile = cast(atom->file()); - assert(dyn_cast_or_null(atomFile.debugInfo()) - && "file for atom needing debug notes does not contain dwarf"); - auto &dwarf = cast(*atomFile.debugInfo()); - - auto &tu = dwarf.translationUnitSource(); - StringRef newFileName = tu.name; - StringRef newDirPath = tu.path; - - // Add an SO whenever the TU source file changes. - if (newFileName != oldFileName || newDirPath != oldDirPath) { - // Translation unit change, emit ending SO - if (oldFileName != "") - _stabs.push_back(mach_o::Stab(nullptr, N_SO, 1, 0, 0, "")); - - oldFileName = newFileName; - oldDirPath = newDirPath; - - // If newDirPath doesn't end with a '/' we need to add one: - if (newDirPath.back() != '/') { - char *p = - file.ownedAllocations.Allocate(newDirPath.size() + 2); - memcpy(p, newDirPath.data(), newDirPath.size()); - p[newDirPath.size()] = '/'; - p[newDirPath.size() + 1] = '\0'; - newDirPath = p; - } - - // New translation unit, emit start SOs: - _stabs.push_back(mach_o::Stab(nullptr, N_SO, 0, 0, 0, newDirPath)); - _stabs.push_back(mach_o::Stab(nullptr, N_SO, 0, 0, 0, newFileName)); - - // Synthesize OSO for start of file. - char *fullPath = nullptr; - { - SmallString<1024> pathBuf(atomFile.path()); - if (auto EC = llvm::sys::fs::make_absolute(pathBuf)) - return llvm::errorCodeToError(EC); - fullPath = file.ownedAllocations.Allocate(pathBuf.size() + 1); - memcpy(fullPath, pathBuf.c_str(), pathBuf.size() + 1); - } - - // Get mod time. - uint32_t modTime = 0; - llvm::sys::fs::file_status stat; - if (!llvm::sys::fs::status(fullPath, stat)) - if (llvm::sys::fs::exists(stat)) - modTime = llvm::sys::toTimeT(stat.getLastModificationTime()); - - _stabs.push_back(mach_o::Stab(nullptr, N_OSO, _ctx.getCPUSubType(), 1, - modTime, fullPath)); - // linker should put cpusubtype in n_sect field - // of nlist entry for N_OSO debug note entries. - wroteStartSO = true; - } - - if (atom->contentType() == DefinedAtom::typeCode) { - // Synthesize BNSYM and start FUN stabs. - _stabs.push_back(mach_o::Stab(atom, N_BNSYM, 1, 0, 0, "")); - _stabs.push_back(mach_o::Stab(atom, N_FUN, 1, 0, 0, atom->name())); - // Synthesize any SOL stabs needed - // FIXME: add SOL stabs. - _stabs.push_back(mach_o::Stab(nullptr, N_FUN, 0, 0, - atom->rawContent().size(), "")); - _stabs.push_back(mach_o::Stab(nullptr, N_ENSYM, 1, 0, - atom->rawContent().size(), "")); - } else { - if (atom->scope() == Atom::scopeTranslationUnit) - _stabs.push_back(mach_o::Stab(atom, N_STSYM, 1, 0, 0, atom->name())); - else - _stabs.push_back(mach_o::Stab(nullptr, N_GSYM, 1, 0, 0, atom->name())); - } - } - - // Emit ending SO if necessary. - if (wroteStartSO) - _stabs.push_back(mach_o::Stab(nullptr, N_SO, 1, 0, 0, "")); - - // Copy any stabs from .o file. - for (const auto *objFile : filesWithStabs) { - const auto &stabsList = - cast(objFile->debugInfo())->stabs(); - for (auto &stab : stabsList) { - // FIXME: Drop stabs whose atoms have been dead-stripped. - _stabs.push_back(stab); - } - } - - return llvm::Error::success(); -} - -uint16_t Util::descBits(const DefinedAtom* atom) { - uint16_t desc = 0; - switch (atom->merge()) { - case lld::DefinedAtom::mergeNo: - case lld::DefinedAtom::mergeAsTentative: - break; - case lld::DefinedAtom::mergeAsWeak: - case lld::DefinedAtom::mergeAsWeakAndAddressUsed: - desc |= N_WEAK_DEF; - break; - case lld::DefinedAtom::mergeSameNameAndSize: - case lld::DefinedAtom::mergeByLargestSection: - case lld::DefinedAtom::mergeByContent: - llvm_unreachable("Unsupported DefinedAtom::merge()"); - break; - } - if (atom->contentType() == lld::DefinedAtom::typeResolver) - desc |= N_SYMBOL_RESOLVER; - if (atom->contentType() == lld::DefinedAtom::typeMachHeader) - desc |= REFERENCED_DYNAMICALLY; - if (_archHandler.isThumbFunction(*atom)) - desc |= N_ARM_THUMB_DEF; - if (atom->deadStrip() == DefinedAtom::deadStripNever && - _ctx.outputMachOType() == llvm::MachO::MH_OBJECT) { - if ((atom->contentType() != DefinedAtom::typeInitializerPtr) - && (atom->contentType() != DefinedAtom::typeTerminatorPtr)) - desc |= N_NO_DEAD_STRIP; - } - return desc; -} - -bool Util::AtomSorter::operator()(const AtomAndIndex &left, - const AtomAndIndex &right) { - return (left.atom->name().compare(right.atom->name()) < 0); -} - -llvm::Error Util::getSymbolTableRegion(const DefinedAtom* atom, - bool &inGlobalsRegion, - SymbolScope &scope) { - bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); - switch (atom->scope()) { - case Atom::scopeTranslationUnit: - scope = 0; - inGlobalsRegion = false; - return llvm::Error::success(); - case Atom::scopeLinkageUnit: - if ((_ctx.exportMode() == MachOLinkingContext::ExportMode::exported) && - _ctx.exportSymbolNamed(atom->name())) { - return llvm::make_error( - Twine("cannot export hidden symbol ") + atom->name()); - } - if (rMode) { - if (_ctx.keepPrivateExterns()) { - // -keep_private_externs means keep in globals region as N_PEXT. - scope = N_PEXT | N_EXT; - inGlobalsRegion = true; - return llvm::Error::success(); - } - } - // scopeLinkageUnit symbols are no longer global once linked. - scope = N_PEXT; - inGlobalsRegion = false; - return llvm::Error::success(); - case Atom::scopeGlobal: - if (_ctx.exportRestrictMode()) { - if (_ctx.exportSymbolNamed(atom->name())) { - scope = N_EXT; - inGlobalsRegion = true; - return llvm::Error::success(); - } else { - scope = N_PEXT; - inGlobalsRegion = false; - return llvm::Error::success(); - } - } else { - scope = N_EXT; - inGlobalsRegion = true; - return llvm::Error::success(); - } - break; - } - llvm_unreachable("atom->scope() unknown enum value"); -} - - - -llvm::Error Util::addSymbols(const lld::File &atomFile, - NormalizedFile &file) { - bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); - // Mach-O symbol table has four regions: stabs, locals, globals, undefs. - - // Add all stabs. - for (auto &stab : _stabs) { - lld::mach_o::normalized::Symbol sym; - sym.type = static_cast(stab.type); - sym.scope = 0; - sym.sect = stab.other; - sym.desc = stab.desc; - if (stab.atom) - sym.value = _atomToAddress[stab.atom]; - else - sym.value = stab.value; - sym.name = stab.str; - file.stabsSymbols.push_back(sym); - } - - // Add all local (non-global) symbols in address order - std::vector globals; - globals.reserve(512); - for (SectionInfo *sect : _sectionInfos) { - for (const AtomInfo &info : sect->atomsAndOffsets) { - const DefinedAtom *atom = info.atom; - if (!atom->name().empty()) { - SymbolScope symbolScope; - bool inGlobalsRegion; - if (auto ec = getSymbolTableRegion(atom, inGlobalsRegion, symbolScope)){ - return ec; - } - if (inGlobalsRegion) { - AtomAndIndex ai = { atom, sect->finalSectionIndex, symbolScope }; - globals.push_back(ai); - } else { - lld::mach_o::normalized::Symbol sym; - sym.name = atom->name(); - sym.type = N_SECT; - sym.scope = symbolScope; - sym.sect = sect->finalSectionIndex; - sym.desc = descBits(atom); - sym.value = _atomToAddress[atom]; - _atomToSymbolIndex[atom] = file.localSymbols.size(); - file.localSymbols.push_back(sym); - } - } else if (rMode && _archHandler.needsLocalSymbolInRelocatableFile(atom)){ - // Create 'Lxxx' labels for anonymous atoms if archHandler says so. - static unsigned tempNum = 1; - char tmpName[16]; - sprintf(tmpName, "L%04u", tempNum++); - StringRef tempRef(tmpName); - lld::mach_o::normalized::Symbol sym; - sym.name = tempRef.copy(file.ownedAllocations); - sym.type = N_SECT; - sym.scope = 0; - sym.sect = sect->finalSectionIndex; - sym.desc = 0; - sym.value = _atomToAddress[atom]; - _atomToSymbolIndex[atom] = file.localSymbols.size(); - file.localSymbols.push_back(sym); - } - } - } - - // Sort global symbol alphabetically, then add to symbol table. - std::sort(globals.begin(), globals.end(), AtomSorter()); - const uint32_t globalStartIndex = file.localSymbols.size(); - for (AtomAndIndex &ai : globals) { - lld::mach_o::normalized::Symbol sym; - sym.name = ai.atom->name(); - sym.type = N_SECT; - sym.scope = ai.scope; - sym.sect = ai.index; - sym.desc = descBits(static_cast(ai.atom)); - sym.value = _atomToAddress[ai.atom]; - _atomToSymbolIndex[ai.atom] = globalStartIndex + file.globalSymbols.size(); - file.globalSymbols.push_back(sym); - } - - // Sort undefined symbol alphabetically, then add to symbol table. - std::vector undefs; - undefs.reserve(128); - for (const UndefinedAtom *atom : atomFile.undefined()) { - AtomAndIndex ai = { atom, 0, N_EXT }; - undefs.push_back(ai); - } - for (const SharedLibraryAtom *atom : atomFile.sharedLibrary()) { - AtomAndIndex ai = { atom, 0, N_EXT }; - undefs.push_back(ai); - } - std::sort(undefs.begin(), undefs.end(), AtomSorter()); - const uint32_t start = file.globalSymbols.size() + file.localSymbols.size(); - for (AtomAndIndex &ai : undefs) { - lld::mach_o::normalized::Symbol sym; - uint16_t desc = 0; - if (!rMode) { - uint8_t ordinal = 0; - if (!_ctx.useFlatNamespace()) - ordinal = dylibOrdinal(dyn_cast(ai.atom)); - llvm::MachO::SET_LIBRARY_ORDINAL(desc, ordinal); - } - sym.name = ai.atom->name(); - sym.type = N_UNDF; - sym.scope = ai.scope; - sym.sect = 0; - sym.desc = desc; - sym.value = 0; - _atomToSymbolIndex[ai.atom] = file.undefinedSymbols.size() + start; - file.undefinedSymbols.push_back(sym); - } - - return llvm::Error::success(); -} - -const Atom *Util::targetOfLazyPointer(const DefinedAtom *lpAtom) { - for (const Reference *ref : *lpAtom) { - if (_archHandler.isLazyPointer(*ref)) { - return ref->target(); - } - } - return nullptr; -} - -const Atom *Util::targetOfStub(const DefinedAtom *stubAtom) { - for (const Reference *ref : *stubAtom) { - if (const Atom *ta = ref->target()) { - if (const DefinedAtom *lpAtom = dyn_cast(ta)) { - const Atom *target = targetOfLazyPointer(lpAtom); - if (target) - return target; - } - } - } - return nullptr; -} - -void Util::addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file) { - for (SectionInfo *si : _sectionInfos) { - Section &normSect = file.sections[si->normalizedSectionIndex]; - switch (si->type) { - case llvm::MachO::S_NON_LAZY_SYMBOL_POINTERS: - for (const AtomInfo &info : si->atomsAndOffsets) { - bool foundTarget = false; - for (const Reference *ref : *info.atom) { - const Atom *target = ref->target(); - if (target) { - if (isa(target)) { - uint32_t index = _atomToSymbolIndex[target]; - normSect.indirectSymbols.push_back(index); - foundTarget = true; - } else { - normSect.indirectSymbols.push_back( - llvm::MachO::INDIRECT_SYMBOL_LOCAL); - } - } - } - if (!foundTarget) { - normSect.indirectSymbols.push_back( - llvm::MachO::INDIRECT_SYMBOL_ABS); - } - } - break; - case llvm::MachO::S_LAZY_SYMBOL_POINTERS: - for (const AtomInfo &info : si->atomsAndOffsets) { - const Atom *target = targetOfLazyPointer(info.atom); - if (target) { - uint32_t index = _atomToSymbolIndex[target]; - normSect.indirectSymbols.push_back(index); - } - } - break; - case llvm::MachO::S_SYMBOL_STUBS: - for (const AtomInfo &info : si->atomsAndOffsets) { - const Atom *target = targetOfStub(info.atom); - if (target) { - uint32_t index = _atomToSymbolIndex[target]; - normSect.indirectSymbols.push_back(index); - } - } - break; - default: - break; - } - } -} - -void Util::addDependentDylibs(const lld::File &atomFile, - NormalizedFile &nFile) { - // Scan all imported symbols and build up list of dylibs they are from. - int ordinal = 1; - for (const auto *dylib : _ctx.allDylibs()) { - DylibPathToInfo::iterator pos = _dylibInfo.find(dylib->installName()); - if (pos == _dylibInfo.end()) { - DylibInfo info; - bool flatNamespaceAtom = dylib == _ctx.flatNamespaceFile(); - - // If we're in -flat_namespace mode (or this atom came from the flat - // namespace file under -undefined dynamic_lookup) then use the flat - // lookup ordinal. - if (flatNamespaceAtom || _ctx.useFlatNamespace()) - info.ordinal = BIND_SPECIAL_DYLIB_FLAT_LOOKUP; - else - info.ordinal = ordinal++; - info.hasWeak = false; - info.hasNonWeak = !info.hasWeak; - _dylibInfo[dylib->installName()] = info; - - // Unless this was a flat_namespace atom, record the source dylib. - if (!flatNamespaceAtom) { - DependentDylib depInfo; - depInfo.path = dylib->installName(); - depInfo.kind = llvm::MachO::LC_LOAD_DYLIB; - depInfo.currentVersion = _ctx.dylibCurrentVersion(dylib->path()); - depInfo.compatVersion = _ctx.dylibCompatVersion(dylib->path()); - nFile.dependentDylibs.push_back(depInfo); - } - } else { - pos->second.hasWeak = false; - pos->second.hasNonWeak = !pos->second.hasWeak; - } - } - // Automatically weak link dylib in which all symbols are weak (canBeNull). - for (DependentDylib &dep : nFile.dependentDylibs) { - DylibInfo &info = _dylibInfo[dep.path]; - if (info.hasWeak && !info.hasNonWeak) - dep.kind = llvm::MachO::LC_LOAD_WEAK_DYLIB; - else if (_ctx.isUpwardDylib(dep.path)) - dep.kind = llvm::MachO::LC_LOAD_UPWARD_DYLIB; - } -} - -int Util::dylibOrdinal(const SharedLibraryAtom *sa) { - return _dylibInfo[sa->loadName()].ordinal; -} - -void Util::segIndexForSection(const SectionInfo *sect, uint8_t &segmentIndex, - uint64_t &segmentStartAddr) { - segmentIndex = 0; - for (const SegmentInfo *seg : _segmentInfos) { - if ((seg->address <= sect->address) - && (seg->address+seg->size >= sect->address+sect->size)) { - segmentStartAddr = seg->address; - return; - } - ++segmentIndex; - } - llvm_unreachable("section not in any segment"); -} - -uint32_t Util::sectionIndexForAtom(const Atom *atom) { - uint64_t address = _atomToAddress[atom]; - for (const SectionInfo *si : _sectionInfos) { - if ((si->address <= address) && (address < si->address+si->size)) - return si->finalSectionIndex; - } - llvm_unreachable("atom not in any section"); -} - -void Util::addSectionRelocs(const lld::File &, NormalizedFile &file) { - if (_ctx.outputMachOType() != llvm::MachO::MH_OBJECT) - return; - - // Utility function for ArchHandler to find symbol index for an atom. - auto symIndexForAtom = [&] (const Atom &atom) -> uint32_t { - auto pos = _atomToSymbolIndex.find(&atom); - assert(pos != _atomToSymbolIndex.end()); - return pos->second; - }; - - // Utility function for ArchHandler to find section index for an atom. - auto sectIndexForAtom = [&] (const Atom &atom) -> uint32_t { - return sectionIndexForAtom(&atom); - }; - - // Utility function for ArchHandler to find address of atom in output file. - auto addressForAtom = [&] (const Atom &atom) -> uint64_t { - auto pos = _atomToAddress.find(&atom); - assert(pos != _atomToAddress.end()); - return pos->second; - }; - - for (SectionInfo *si : _sectionInfos) { - Section &normSect = file.sections[si->normalizedSectionIndex]; - for (const AtomInfo &info : si->atomsAndOffsets) { - const DefinedAtom *atom = info.atom; - for (const Reference *ref : *atom) { - // Skip emitting relocs for sections which are always able to be - // implicitly regenerated and where the relocation targets an address - // which is defined. - if (si->relocsToDefinedCanBeImplicit && isa(ref->target())) - continue; - _archHandler.appendSectionRelocations(*atom, info.offsetInSection, *ref, - symIndexForAtom, - sectIndexForAtom, - addressForAtom, - normSect.relocations); - } - } - } -} - -void Util::addFunctionStarts(const lld::File &, NormalizedFile &file) { - if (!_ctx.generateFunctionStartsLoadCommand()) - return; - file.functionStarts.reserve(8192); - // Delta compress function starts, starting with the mach header symbol. - const uint64_t badAddress = ~0ULL; - uint64_t addr = badAddress; - for (SectionInfo *si : _sectionInfos) { - for (const AtomInfo &info : si->atomsAndOffsets) { - auto type = info.atom->contentType(); - if (type == DefinedAtom::typeMachHeader) { - addr = _atomToAddress[info.atom]; - continue; - } - if (type != DefinedAtom::typeCode) - continue; - assert(addr != badAddress && "Missing mach header symbol"); - // Skip atoms which have 0 size. This is so that LC_FUNCTION_STARTS - // can't spill in to the next section. - if (!info.atom->size()) - continue; - uint64_t nextAddr = _atomToAddress[info.atom]; - if (_archHandler.isThumbFunction(*info.atom)) - nextAddr |= 1; - uint64_t delta = nextAddr - addr; - if (delta) { - ByteBuffer buffer; - buffer.append_uleb128(delta); - file.functionStarts.insert(file.functionStarts.end(), buffer.bytes(), - buffer.bytes() + buffer.size()); - } - addr = nextAddr; - } - } - - // Null terminate, and pad to pointer size for this arch. - file.functionStarts.push_back(0); - - auto size = file.functionStarts.size(); - for (unsigned i = size, e = llvm::alignTo(size, _ctx.is64Bit() ? 8 : 4); - i != e; ++i) - file.functionStarts.push_back(0); -} - -void Util::buildDataInCodeArray(const lld::File &, NormalizedFile &file) { - if (!_ctx.generateDataInCodeLoadCommand()) - return; - for (SectionInfo *si : _sectionInfos) { - for (const AtomInfo &info : si->atomsAndOffsets) { - // Atoms that contain data-in-code have "transition" references - // which mark a point where the embedded data starts of ends. - // This needs to be converted to the mach-o format which is an array - // of data-in-code ranges. - uint32_t startOffset = 0; - DataRegionType mode = DataRegionType(0); - for (const Reference *ref : *info.atom) { - if (ref->kindNamespace() != Reference::KindNamespace::mach_o) - continue; - if (_archHandler.isDataInCodeTransition(ref->kindValue())) { - DataRegionType nextMode = (DataRegionType)ref->addend(); - if (mode != nextMode) { - if (mode != 0) { - // Found end data range, so make range entry. - DataInCode entry; - entry.offset = si->address + info.offsetInSection + startOffset; - entry.length = ref->offsetInAtom() - startOffset; - entry.kind = mode; - file.dataInCode.push_back(entry); - } - } - mode = nextMode; - startOffset = ref->offsetInAtom(); - } - } - if (mode != 0) { - // Function ends with data (no end transition). - DataInCode entry; - entry.offset = si->address + info.offsetInSection + startOffset; - entry.length = info.atom->size() - startOffset; - entry.kind = mode; - file.dataInCode.push_back(entry); - } - } - } -} - -void Util::addRebaseAndBindingInfo(const lld::File &atomFile, - NormalizedFile &nFile) { - if (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT) - return; - - uint8_t segmentIndex; - uint64_t segmentStartAddr; - uint32_t offsetInBindInfo = 0; - - for (SectionInfo *sect : _sectionInfos) { - segIndexForSection(sect, segmentIndex, segmentStartAddr); - for (const AtomInfo &info : sect->atomsAndOffsets) { - const DefinedAtom *atom = info.atom; - for (const Reference *ref : *atom) { - uint64_t segmentOffset = _atomToAddress[atom] + ref->offsetInAtom() - - segmentStartAddr; - const Atom* targ = ref->target(); - if (_archHandler.isPointer(*ref)) { - // A pointer to a DefinedAtom requires rebasing. - if (isa(targ)) { - RebaseLocation rebase; - rebase.segIndex = segmentIndex; - rebase.segOffset = segmentOffset; - rebase.kind = llvm::MachO::REBASE_TYPE_POINTER; - nFile.rebasingInfo.push_back(rebase); - } - // A pointer to an SharedLibraryAtom requires binding. - if (const SharedLibraryAtom *sa = dyn_cast(targ)) { - BindLocation bind; - bind.segIndex = segmentIndex; - bind.segOffset = segmentOffset; - bind.kind = llvm::MachO::BIND_TYPE_POINTER; - bind.canBeNull = sa->canBeNullAtRuntime(); - bind.ordinal = dylibOrdinal(sa); - bind.symbolName = targ->name(); - bind.addend = ref->addend(); - nFile.bindingInfo.push_back(bind); - } - } - else if (_archHandler.isLazyPointer(*ref)) { - BindLocation bind; - if (const SharedLibraryAtom *sa = dyn_cast(targ)) { - bind.ordinal = dylibOrdinal(sa); - } else { - bind.ordinal = llvm::MachO::BIND_SPECIAL_DYLIB_SELF; - } - bind.segIndex = segmentIndex; - bind.segOffset = segmentOffset; - bind.kind = llvm::MachO::BIND_TYPE_POINTER; - bind.canBeNull = false; //sa->canBeNullAtRuntime(); - bind.symbolName = targ->name(); - bind.addend = ref->addend(); - nFile.lazyBindingInfo.push_back(bind); - - // Now that we know the segmentOffset and the ordinal attribute, - // we can fix the helper's code - - fixLazyReferenceImm(atom, offsetInBindInfo, nFile); - - // 5 bytes for opcodes + variable sizes (target name + \0 and offset - // encode's size) - offsetInBindInfo += - 6 + targ->name().size() + llvm::getULEB128Size(bind.segOffset); - if (bind.ordinal > BIND_IMMEDIATE_MASK) - offsetInBindInfo += llvm::getULEB128Size(bind.ordinal); - } - } - } - } -} - -void Util::fixLazyReferenceImm(const DefinedAtom *atom, uint32_t offset, - NormalizedFile &file) { - for (const Reference *ref : *atom) { - const DefinedAtom *da = dyn_cast(ref->target()); - if (da == nullptr) - return; - - const Reference *helperRef = nullptr; - for (const Reference *hr : *da) { - if (hr->kindValue() == _archHandler.lazyImmediateLocationKind()) { - helperRef = hr; - break; - } - } - if (helperRef == nullptr) - continue; - - // TODO: maybe get the fixed atom content from _archHandler ? - for (SectionInfo *sectInfo : _sectionInfos) { - for (const AtomInfo &atomInfo : sectInfo->atomsAndOffsets) { - if (atomInfo.atom == helperRef->target()) { - auto sectionContent = - file.sections[sectInfo->normalizedSectionIndex].content; - uint8_t *rawb = - file.ownedAllocations.Allocate(sectionContent.size()); - llvm::MutableArrayRef newContent{rawb, - sectionContent.size()}; - std::copy(sectionContent.begin(), sectionContent.end(), - newContent.begin()); - llvm::support::ulittle32_t *loc = - reinterpret_cast( - &newContent[atomInfo.offsetInSection + - helperRef->offsetInAtom()]); - *loc = offset; - file.sections[sectInfo->normalizedSectionIndex].content = newContent; - } - } - } - } -} - -void Util::addExportInfo(const lld::File &atomFile, NormalizedFile &nFile) { - if (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT) - return; - - for (SectionInfo *sect : _sectionInfos) { - for (const AtomInfo &info : sect->atomsAndOffsets) { - const DefinedAtom *atom = info.atom; - if (atom->scope() != Atom::scopeGlobal) - continue; - if (_ctx.exportRestrictMode()) { - if (!_ctx.exportSymbolNamed(atom->name())) - continue; - } - Export exprt; - exprt.name = atom->name(); - exprt.offset = _atomToAddress[atom] - _ctx.baseAddress(); - exprt.kind = EXPORT_SYMBOL_FLAGS_KIND_REGULAR; - if (atom->merge() == DefinedAtom::mergeAsWeak) - exprt.flags = EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; - else - exprt.flags = 0; - exprt.otherOffset = 0; - exprt.otherName = StringRef(); - nFile.exportInfo.push_back(exprt); - } - } -} - -uint32_t Util::fileFlags() { - // FIXME: these need to determined at runtime. - if (_ctx.outputMachOType() == MH_OBJECT) { - return _subsectionsViaSymbols ? (uint32_t)MH_SUBSECTIONS_VIA_SYMBOLS : 0; - } else { - uint32_t flags = MH_DYLDLINK; - if (!_ctx.useFlatNamespace()) - flags |= MH_TWOLEVEL | MH_NOUNDEFS; - if ((_ctx.outputMachOType() == MH_EXECUTE) && _ctx.PIE()) - flags |= MH_PIE; - if (_hasTLVDescriptors) - flags |= (MH_PIE | MH_HAS_TLV_DESCRIPTORS); - return flags; - } -} - -} // end anonymous namespace - -namespace lld { -namespace mach_o { -namespace normalized { - -/// Convert a set of Atoms into a normalized mach-o file. -llvm::Expected> -normalizedFromAtoms(const lld::File &atomFile, - const MachOLinkingContext &context) { - // The util object buffers info until the normalized file can be made. - Util util(context); - util.processDefinedAtoms(atomFile); - util.organizeSections(); - - std::unique_ptr f(new NormalizedFile()); - NormalizedFile &normFile = *f.get(); - normFile.arch = context.arch(); - normFile.fileType = context.outputMachOType(); - normFile.flags = util.fileFlags(); - normFile.stackSize = context.stackSize(); - normFile.installName = context.installName(); - normFile.currentVersion = context.currentVersion(); - normFile.compatVersion = context.compatibilityVersion(); - normFile.os = context.os(); - - // If we are emitting an object file, then the min version is the maximum - // of the min's of all the source files and the cmdline. - if (normFile.fileType == llvm::MachO::MH_OBJECT) - normFile.minOSverson = std::max(context.osMinVersion(), util.minVersion()); - else - normFile.minOSverson = context.osMinVersion(); - - normFile.minOSVersionKind = util.minVersionCommandType(); - - normFile.sdkVersion = context.sdkVersion(); - normFile.sourceVersion = context.sourceVersion(); - - if (context.generateVersionLoadCommand() && - context.os() != MachOLinkingContext::OS::unknown) - normFile.hasMinVersionLoadCommand = true; - else if (normFile.fileType == llvm::MachO::MH_OBJECT && - util.allSourceFilesHaveMinVersions() && - ((normFile.os != MachOLinkingContext::OS::unknown) || - util.minVersionCommandType())) { - // If we emit an object file, then it should contain a min version load - // command if all of the source files also contained min version commands. - // Also, we either need to have a platform, or found a platform from the - // source object files. - normFile.hasMinVersionLoadCommand = true; - } - normFile.generateDataInCodeLoadCommand = - context.generateDataInCodeLoadCommand(); - normFile.pageSize = context.pageSize(); - normFile.rpaths = context.rpaths(); - util.addDependentDylibs(atomFile, normFile); - util.copySegmentInfo(normFile); - util.copySectionInfo(normFile); - util.assignAddressesToSections(normFile); - util.buildAtomToAddressMap(); - if (auto err = util.synthesizeDebugNotes(normFile)) - return std::move(err); - util.updateSectionInfo(normFile); - util.copySectionContent(normFile); - if (auto ec = util.addSymbols(atomFile, normFile)) { - return std::move(ec); - } - util.addIndirectSymbols(atomFile, normFile); - util.addRebaseAndBindingInfo(atomFile, normFile); - util.addExportInfo(atomFile, normFile); - util.addSectionRelocs(atomFile, normFile); - util.addFunctionStarts(atomFile, normFile); - util.buildDataInCodeArray(atomFile, normFile); - util.copyEntryPointAddress(normFile); - - return std::move(f); -} - -} // namespace normalized -} // namespace mach_o -} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp deleted file mode 100644 index 164a283b972b..000000000000 --- a/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp +++ /dev/null @@ -1,1635 +0,0 @@ -//===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// -/// \file Converts from in-memory normalized mach-o to in-memory Atoms. -/// -/// +------------+ -/// | normalized | -/// +------------+ -/// | -/// | -/// v -/// +-------+ -/// | Atoms | -/// +-------+ - -#include "ArchHandler.h" -#include "Atoms.h" -#include "File.h" -#include "MachONormalizedFile.h" -#include "MachONormalizedFileBinaryUtils.h" -#include "lld/Common/LLVM.h" -#include "lld/Core/Error.h" -#include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" -#include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/LEB128.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm::MachO; -using namespace lld::mach_o::normalized; - -#define DEBUG_TYPE "normalized-file-to-atoms" - -namespace lld { -namespace mach_o { - - -namespace { // anonymous - - -#define ENTRY(seg, sect, type, atomType) \ - {seg, sect, type, DefinedAtom::atomType } - -struct MachORelocatableSectionToAtomType { - StringRef segmentName; - StringRef sectionName; - SectionType sectionType; - DefinedAtom::ContentType atomType; -}; - -const MachORelocatableSectionToAtomType sectsToAtomType[] = { - ENTRY("__TEXT", "__text", S_REGULAR, typeCode), - ENTRY("__TEXT", "__text", S_REGULAR, typeResolver), - ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString), - ENTRY("", "", S_CSTRING_LITERALS, typeCString), - ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String), - ENTRY("__TEXT", "__const", S_REGULAR, typeConstant), - ENTRY("__TEXT", "__const_coal", S_COALESCED, typeConstant), - ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI), - ENTRY("__TEXT", "__eh_frame", S_REGULAR, typeCFI), - ENTRY("__TEXT", "__literal4", S_4BYTE_LITERALS, typeLiteral4), - ENTRY("__TEXT", "__literal8", S_8BYTE_LITERALS, typeLiteral8), - ENTRY("__TEXT", "__literal16", S_16BYTE_LITERALS, typeLiteral16), - ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA), - ENTRY("__DATA", "__data", S_REGULAR, typeData), - ENTRY("__DATA", "__datacoal_nt", S_COALESCED, typeData), - ENTRY("__DATA", "__const", S_REGULAR, typeConstData), - ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString), - ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS, - typeInitializerPtr), - ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS, - typeTerminatorPtr), - ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS, - typeGOT), - ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill), - ENTRY("", "", S_NON_LAZY_SYMBOL_POINTERS, - typeGOT), - ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples), - ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES, - typeThunkTLV), - ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData), - ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL, - typeTLVInitialZeroFill), - ENTRY("__DATA", "__objc_imageinfo", S_REGULAR, typeObjCImageInfo), - ENTRY("__DATA", "__objc_catlist", S_REGULAR, typeObjC2CategoryList), - ENTRY("", "", S_INTERPOSING, typeInterposingTuples), - ENTRY("__LD", "__compact_unwind", S_REGULAR, - typeCompactUnwindInfo), - ENTRY("", "", S_REGULAR, typeUnknown) -}; -#undef ENTRY - - -/// Figures out ContentType of a mach-o section. -DefinedAtom::ContentType atomTypeFromSection(const Section §ion, - bool &customSectionName) { - // First look for match of name and type. Empty names in table are wildcards. - customSectionName = false; - for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; - p->atomType != DefinedAtom::typeUnknown; ++p) { - if (p->sectionType != section.type) - continue; - if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty()) - continue; - if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty()) - continue; - customSectionName = p->segmentName.empty() && p->sectionName.empty(); - return p->atomType; - } - // Look for code denoted by section attributes - if (section.attributes & S_ATTR_PURE_INSTRUCTIONS) - return DefinedAtom::typeCode; - - return DefinedAtom::typeUnknown; -} - -enum AtomizeModel { - atomizeAtSymbols, - atomizeFixedSize, - atomizePointerSize, - atomizeUTF8, - atomizeUTF16, - atomizeCFI, - atomizeCU, - atomizeCFString -}; - -/// Returns info on how to atomize a section of the specified ContentType. -void sectionParseInfo(DefinedAtom::ContentType atomType, - unsigned int &sizeMultiple, - DefinedAtom::Scope &scope, - DefinedAtom::Merge &merge, - AtomizeModel &atomizeModel) { - struct ParseInfo { - DefinedAtom::ContentType atomType; - unsigned int sizeMultiple; - DefinedAtom::Scope scope; - DefinedAtom::Merge merge; - AtomizeModel atomizeModel; - }; - - #define ENTRY(type, size, scope, merge, model) \ - {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model } - - static const ParseInfo parseInfo[] = { - ENTRY(typeCode, 1, scopeGlobal, mergeNo, - atomizeAtSymbols), - ENTRY(typeData, 1, scopeGlobal, mergeNo, - atomizeAtSymbols), - ENTRY(typeConstData, 1, scopeGlobal, mergeNo, - atomizeAtSymbols), - ENTRY(typeZeroFill, 1, scopeGlobal, mergeNo, - atomizeAtSymbols), - ENTRY(typeConstant, 1, scopeGlobal, mergeNo, - atomizeAtSymbols), - ENTRY(typeCString, 1, scopeLinkageUnit, mergeByContent, - atomizeUTF8), - ENTRY(typeUTF16String, 1, scopeLinkageUnit, mergeByContent, - atomizeUTF16), - ENTRY(typeCFI, 4, scopeTranslationUnit, mergeNo, - atomizeCFI), - ENTRY(typeLiteral4, 4, scopeLinkageUnit, mergeByContent, - atomizeFixedSize), - ENTRY(typeLiteral8, 8, scopeLinkageUnit, mergeByContent, - atomizeFixedSize), - ENTRY(typeLiteral16, 16, scopeLinkageUnit, mergeByContent, - atomizeFixedSize), - ENTRY(typeCFString, 4, scopeLinkageUnit, mergeByContent, - atomizeCFString), - ENTRY(typeInitializerPtr, 4, scopeTranslationUnit, mergeNo, - atomizePointerSize), - ENTRY(typeTerminatorPtr, 4, scopeTranslationUnit, mergeNo, - atomizePointerSize), - ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo, - atomizeCU), - ENTRY(typeGOT, 4, scopeLinkageUnit, mergeByContent, - atomizePointerSize), - ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent, - atomizePointerSize), - ENTRY(typeUnknown, 1, scopeGlobal, mergeNo, - atomizeAtSymbols) - }; - #undef ENTRY - const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo); - for (int i=0; i < tableLen; ++i) { - if (parseInfo[i].atomType == atomType) { - sizeMultiple = parseInfo[i].sizeMultiple; - scope = parseInfo[i].scope; - merge = parseInfo[i].merge; - atomizeModel = parseInfo[i].atomizeModel; - return; - } - } - - // Unknown type is atomized by symbols. - sizeMultiple = 1; - scope = DefinedAtom::scopeGlobal; - merge = DefinedAtom::mergeNo; - atomizeModel = atomizeAtSymbols; -} - - -Atom::Scope atomScope(uint8_t scope) { - switch (scope) { - case N_EXT: - return Atom::scopeGlobal; - case N_PEXT: - case N_PEXT | N_EXT: - return Atom::scopeLinkageUnit; - case 0: - return Atom::scopeTranslationUnit; - } - llvm_unreachable("unknown scope value!"); -} - -void appendSymbolsInSection( - const std::vector &inSymbols, - uint32_t sectionIndex, - SmallVector &outSyms) { - for (const lld::mach_o::normalized::Symbol &sym : inSymbols) { - // Only look at definition symbols. - if ((sym.type & N_TYPE) != N_SECT) - continue; - if (sym.sect != sectionIndex) - continue; - outSyms.push_back(&sym); - } -} - -void atomFromSymbol(DefinedAtom::ContentType atomType, const Section §ion, - MachOFile &file, uint64_t symbolAddr, StringRef symbolName, - uint16_t symbolDescFlags, Atom::Scope symbolScope, - uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) { - // Mach-O symbol table does have size in it. Instead the size is the - // difference between this and the next symbol. - uint64_t size = nextSymbolAddr - symbolAddr; - uint64_t offset = symbolAddr - section.address; - bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable; - if (isZeroFillSection(section.type)) { - file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size, - noDeadStrip, copyRefs, §ion); - } else { - DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF) - ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo; - bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF); - if (atomType == DefinedAtom::typeUnknown) { - // Mach-O needs a segment and section name. Concatenate those two - // with a / separator (e.g. "seg/sect") to fit into the lld model - // of just a section name. - std::string segSectName = section.segmentName.str() - + "/" + section.sectionName.str(); - file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType, - merge, thumb, noDeadStrip, offset, - size, segSectName, true, §ion); - } else { - if ((atomType == lld::DefinedAtom::typeCode) && - (symbolDescFlags & N_SYMBOL_RESOLVER)) { - atomType = lld::DefinedAtom::typeResolver; - } - file.addDefinedAtom(symbolName, symbolScope, atomType, merge, - offset, size, thumb, noDeadStrip, copyRefs, §ion); - } - } -} - -llvm::Error processSymboledSection(DefinedAtom::ContentType atomType, - const Section §ion, - const NormalizedFile &normalizedFile, - MachOFile &file, bool scatterable, - bool copyRefs) { - // Find section's index. - uint32_t sectIndex = 1; - for (auto § : normalizedFile.sections) { - if (§ == §ion) - break; - ++sectIndex; - } - - // Find all symbols in this section. - SmallVector symbols; - appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols); - appendSymbolsInSection(normalizedFile.localSymbols, sectIndex, symbols); - - // Sort symbols. - std::sort(symbols.begin(), symbols.end(), - [](const lld::mach_o::normalized::Symbol *lhs, - const lld::mach_o::normalized::Symbol *rhs) -> bool { - if (lhs == rhs) - return false; - // First by address. - uint64_t lhsAddr = lhs->value; - uint64_t rhsAddr = rhs->value; - if (lhsAddr != rhsAddr) - return lhsAddr < rhsAddr; - // If same address, one is an alias so sort by scope. - Atom::Scope lScope = atomScope(lhs->scope); - Atom::Scope rScope = atomScope(rhs->scope); - if (lScope != rScope) - return lScope < rScope; - // If same address and scope, see if one might be better as - // the alias. - bool lPrivate = (lhs->name.front() == 'l'); - bool rPrivate = (rhs->name.front() == 'l'); - if (lPrivate != rPrivate) - return lPrivate; - // If same address and scope, sort by name. - return lhs->name < rhs->name; - }); - - // Debug logging of symbols. - // for (const Symbol *sym : symbols) - // llvm::errs() << " sym: " - // << llvm::format("0x%08llx ", (uint64_t)sym->value) - // << ", " << sym->name << "\n"; - - // If section has no symbols and no content, there are no atoms. - if (symbols.empty() && section.content.empty()) - return llvm::Error::success(); - - if (symbols.empty()) { - // Section has no symbols, put all content in one anonymous atom. - atomFromSymbol(atomType, section, file, section.address, StringRef(), - 0, Atom::scopeTranslationUnit, - section.address + section.content.size(), - scatterable, copyRefs); - } - else if (symbols.front()->value != section.address) { - // Section has anonymous content before first symbol. - atomFromSymbol(atomType, section, file, section.address, StringRef(), - 0, Atom::scopeTranslationUnit, symbols.front()->value, - scatterable, copyRefs); - } - - const lld::mach_o::normalized::Symbol *lastSym = nullptr; - for (const lld::mach_o::normalized::Symbol *sym : symbols) { - if (lastSym != nullptr) { - // Ignore any assembler added "ltmpNNN" symbol at start of section - // if there is another symbol at the start. - if ((lastSym->value != sym->value) - || lastSym->value != section.address - || !lastSym->name.startswith("ltmp")) { - atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, - lastSym->desc, atomScope(lastSym->scope), sym->value, - scatterable, copyRefs); - } - } - lastSym = sym; - } - if (lastSym != nullptr) { - atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, - lastSym->desc, atomScope(lastSym->scope), - section.address + section.content.size(), - scatterable, copyRefs); - } - - // If object built without .subsections_via_symbols, add reference chain. - if (!scatterable) { - MachODefinedAtom *prevAtom = nullptr; - file.eachAtomInSection(section, - [&](MachODefinedAtom *atom, uint64_t offset)->void { - if (prevAtom) - prevAtom->addReference(Reference::KindNamespace::all, - Reference::KindArch::all, - Reference::kindLayoutAfter, 0, atom, 0); - prevAtom = atom; - }); - } - - return llvm::Error::success(); -} - -llvm::Error processSection(DefinedAtom::ContentType atomType, - const Section §ion, - bool customSectionName, - const NormalizedFile &normalizedFile, - MachOFile &file, bool scatterable, - bool copyRefs) { - const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); - const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); - - // Get info on how to atomize section. - unsigned int sizeMultiple; - DefinedAtom::Scope scope; - DefinedAtom::Merge merge; - AtomizeModel atomizeModel; - sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel); - - // Validate section size. - if ((section.content.size() % sizeMultiple) != 0) - return llvm::make_error(Twine("Section ") - + section.segmentName - + "/" + section.sectionName - + " has size (" - + Twine(section.content.size()) - + ") which is not a multiple of " - + Twine(sizeMultiple)); - - if (atomizeModel == atomizeAtSymbols) { - // Break section up into atoms each with a fixed size. - return processSymboledSection(atomType, section, normalizedFile, file, - scatterable, copyRefs); - } else { - unsigned int size; - for (unsigned int offset = 0, e = section.content.size(); offset != e;) { - switch (atomizeModel) { - case atomizeFixedSize: - // Break section up into atoms each with a fixed size. - size = sizeMultiple; - break; - case atomizePointerSize: - // Break section up into atoms each the size of a pointer. - size = is64 ? 8 : 4; - break; - case atomizeUTF8: - // Break section up into zero terminated c-strings. - size = 0; - for (unsigned int i = offset; i < e; ++i) { - if (section.content[i] == 0) { - size = i + 1 - offset; - break; - } - } - break; - case atomizeUTF16: - // Break section up into zero terminated UTF16 strings. - size = 0; - for (unsigned int i = offset; i < e; i += 2) { - if ((section.content[i] == 0) && (section.content[i + 1] == 0)) { - size = i + 2 - offset; - break; - } - } - break; - case atomizeCFI: - // Break section up into dwarf unwind CFIs (FDE or CIE). - size = read32(§ion.content[offset], isBig) + 4; - if (offset+size > section.content.size()) { - return llvm::make_error(Twine("Section ") - + section.segmentName - + "/" + section.sectionName - + " is malformed. Size of CFI " - "starting at offset (" - + Twine(offset) - + ") is past end of section."); - } - break; - case atomizeCU: - // Break section up into compact unwind entries. - size = is64 ? 32 : 20; - break; - case atomizeCFString: - // Break section up into NS/CFString objects. - size = is64 ? 32 : 16; - break; - case atomizeAtSymbols: - break; - } - if (size == 0) { - return llvm::make_error(Twine("Section ") - + section.segmentName - + "/" + section.sectionName - + " is malformed. The last atom " - "is not zero terminated."); - } - if (customSectionName) { - // Mach-O needs a segment and section name. Concatenate those two - // with a / separator (e.g. "seg/sect") to fit into the lld model - // of just a section name. - std::string segSectName = section.segmentName.str() - + "/" + section.sectionName.str(); - file.addDefinedAtomInCustomSection(StringRef(), scope, atomType, - merge, false, false, offset, - size, segSectName, true, §ion); - } else { - file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size, - false, false, copyRefs, §ion); - } - offset += size; - } - } - return llvm::Error::success(); -} - -const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile, - uint64_t address) { - for (const Section &s : normalizedFile.sections) { - uint64_t sAddr = s.address; - if ((sAddr <= address) && (address < sAddr+s.content.size())) { - return &s; - } - } - return nullptr; -} - -const MachODefinedAtom * -findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file, - uint64_t addr, Reference::Addend &addend) { - const Section *sect = nullptr; - sect = findSectionCoveringAddress(normalizedFile, addr); - if (!sect) - return nullptr; - - uint32_t offsetInTarget; - uint64_t offsetInSect = addr - sect->address; - auto atom = - file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); - addend = offsetInTarget; - return atom; -} - -// Walks all relocations for a section in a normalized .o file and -// creates corresponding lld::Reference objects. -llvm::Error convertRelocs(const Section §ion, - const NormalizedFile &normalizedFile, - bool scatterable, - MachOFile &file, - ArchHandler &handler) { - // Utility function for ArchHandler to find atom by its address. - auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr, - const lld::Atom **atom, Reference::Addend *addend) - -> llvm::Error { - if (sectIndex > normalizedFile.sections.size()) - return llvm::make_error(Twine("out of range section " - "index (") + Twine(sectIndex) + ")"); - const Section *sect = nullptr; - if (sectIndex == 0) { - sect = findSectionCoveringAddress(normalizedFile, addr); - if (!sect) - return llvm::make_error(Twine("address (" + Twine(addr) - + ") is not in any section")); - } else { - sect = &normalizedFile.sections[sectIndex-1]; - } - uint32_t offsetInTarget; - uint64_t offsetInSect = addr - sect->address; - *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); - *addend = offsetInTarget; - return llvm::Error::success(); - }; - - // Utility function for ArchHandler to find atom by its symbol index. - auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result) - -> llvm::Error { - // Find symbol from index. - const lld::mach_o::normalized::Symbol *sym = nullptr; - uint32_t numStabs = normalizedFile.stabsSymbols.size(); - uint32_t numLocal = normalizedFile.localSymbols.size(); - uint32_t numGlobal = normalizedFile.globalSymbols.size(); - uint32_t numUndef = normalizedFile.undefinedSymbols.size(); - assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?"); - if (symbolIndex < numStabs+numLocal) { - sym = &normalizedFile.localSymbols[symbolIndex-numStabs]; - } else if (symbolIndex < numStabs+numLocal+numGlobal) { - sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal]; - } else if (symbolIndex < numStabs+numLocal+numGlobal+numUndef) { - sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal- - numGlobal]; - } else { - return llvm::make_error(Twine("symbol index (") - + Twine(symbolIndex) + ") out of range"); - } - - // Find atom from symbol. - if ((sym->type & N_TYPE) == N_SECT) { - if (sym->sect > normalizedFile.sections.size()) - return llvm::make_error(Twine("symbol section index (") - + Twine(sym->sect) + ") out of range "); - const Section &symSection = normalizedFile.sections[sym->sect-1]; - uint64_t targetOffsetInSect = sym->value - symSection.address; - MachODefinedAtom *target = file.findAtomCoveringAddress(symSection, - targetOffsetInSect); - if (target) { - *result = target; - return llvm::Error::success(); - } - return llvm::make_error("no atom found for defined symbol"); - } else if ((sym->type & N_TYPE) == N_UNDF) { - const lld::Atom *target = file.findUndefAtom(sym->name); - if (target) { - *result = target; - return llvm::Error::success(); - } - return llvm::make_error("no undefined atom found for sym"); - } else { - // Search undefs - return llvm::make_error("no atom found for symbol"); - } - }; - - const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); - // Use old-school iterator so that paired relocations can be grouped. - for (auto it=section.relocations.begin(), e=section.relocations.end(); - it != e; ++it) { - const Relocation &reloc = *it; - // Find atom this relocation is in. - if (reloc.offset > section.content.size()) - return llvm::make_error( - Twine("r_address (") + Twine(reloc.offset) - + ") is larger than section size (" - + Twine(section.content.size()) + ")"); - uint32_t offsetInAtom; - MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section, - reloc.offset, - &offsetInAtom); - assert(inAtom && "r_address in range, should have found atom"); - uint64_t fixupAddress = section.address + reloc.offset; - - const lld::Atom *target = nullptr; - Reference::Addend addend = 0; - Reference::KindValue kind; - if (handler.isPairedReloc(reloc)) { - // Handle paired relocations together. - const Relocation &reloc2 = *++it; - auto relocErr = handler.getPairReferenceInfo( - reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable, - atomByAddr, atomBySymbol, &kind, &target, &addend); - if (relocErr) { - return handleErrors(std::move(relocErr), - [&](std::unique_ptr GE) { - return llvm::make_error( - Twine("bad relocation (") + GE->getMessage() - + ") in section " - + section.segmentName + "/" + section.sectionName - + " (r1_address=" + Twine::utohexstr(reloc.offset) - + ", r1_type=" + Twine(reloc.type) - + ", r1_extern=" + Twine(reloc.isExtern) - + ", r1_length=" + Twine((int)reloc.length) - + ", r1_pcrel=" + Twine(reloc.pcRel) - + (!reloc.scattered ? (Twine(", r1_symbolnum=") - + Twine(reloc.symbol)) - : (Twine(", r1_scattered=1, r1_value=") - + Twine(reloc.value))) - + ")" - + ", (r2_address=" + Twine::utohexstr(reloc2.offset) - + ", r2_type=" + Twine(reloc2.type) - + ", r2_extern=" + Twine(reloc2.isExtern) - + ", r2_length=" + Twine((int)reloc2.length) - + ", r2_pcrel=" + Twine(reloc2.pcRel) - + (!reloc2.scattered ? (Twine(", r2_symbolnum=") - + Twine(reloc2.symbol)) - : (Twine(", r2_scattered=1, r2_value=") - + Twine(reloc2.value))) - + ")" ); - }); - } - } - else { - // Use ArchHandler to convert relocation record into information - // needed to instantiate an lld::Reference object. - auto relocErr = handler.getReferenceInfo( - reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr, - atomBySymbol, &kind, &target, &addend); - if (relocErr) { - return handleErrors(std::move(relocErr), - [&](std::unique_ptr GE) { - return llvm::make_error( - Twine("bad relocation (") + GE->getMessage() - + ") in section " - + section.segmentName + "/" + section.sectionName - + " (r_address=" + Twine::utohexstr(reloc.offset) - + ", r_type=" + Twine(reloc.type) - + ", r_extern=" + Twine(reloc.isExtern) - + ", r_length=" + Twine((int)reloc.length) - + ", r_pcrel=" + Twine(reloc.pcRel) - + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol)) - : (Twine(", r_scattered=1, r_value=") - + Twine(reloc.value))) - + ")" ); - }); - } - } - // Instantiate an lld::Reference object and add to its atom. - inAtom->addReference(Reference::KindNamespace::mach_o, - handler.kindArch(), - kind, offsetInAtom, target, addend); - } - - return llvm::Error::success(); -} - -bool isDebugInfoSection(const Section §ion) { - if ((section.attributes & S_ATTR_DEBUG) == 0) - return false; - return section.segmentName.equals("__DWARF"); -} - -static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) { - std::string strName = name.str(); - for (auto *atom : file.defined()) - if (atom->name() == strName) - return atom; - return nullptr; -} - -static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) { - char *strCopy = alloc.Allocate(str.size() + 1); - memcpy(strCopy, str.data(), str.size()); - strCopy[str.size()] = '\0'; - return strCopy; -} - -llvm::Error parseStabs(MachOFile &file, - const NormalizedFile &normalizedFile, - bool copyRefs) { - - if (normalizedFile.stabsSymbols.empty()) - return llvm::Error::success(); - - // FIXME: Kill this off when we can move to sane yaml parsing. - std::unique_ptr allocator; - if (copyRefs) - allocator = std::make_unique(); - - enum { start, inBeginEnd } state = start; - - const Atom *currentAtom = nullptr; - uint64_t currentAtomAddress = 0; - StabsDebugInfo::StabsList stabsList; - for (const auto &stabSym : normalizedFile.stabsSymbols) { - Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc, - stabSym.value, stabSym.name); - switch (state) { - case start: - switch (static_cast(stabSym.type)) { - case N_BNSYM: - state = inBeginEnd; - currentAtomAddress = stabSym.value; - Reference::Addend addend; - currentAtom = findAtomCoveringAddress(normalizedFile, file, - currentAtomAddress, addend); - if (addend != 0) - return llvm::make_error( - "Non-zero addend for BNSYM '" + stabSym.name + "' in " + - file.path()); - if (currentAtom) - stab.atom = currentAtom; - else { - // FIXME: ld64 just issues a warning here - should we match that? - return llvm::make_error( - "can't find atom for stabs BNSYM at " + - Twine::utohexstr(stabSym.value) + " in " + file.path()); - } - break; - case N_SO: - case N_OSO: - // Not associated with an atom, just copy. - if (copyRefs) - stab.str = copyDebugString(stabSym.name, *allocator); - else - stab.str = stabSym.name; - break; - case N_GSYM: { - auto colonIdx = stabSym.name.find(':'); - if (colonIdx != StringRef::npos) { - StringRef name = stabSym.name.substr(0, colonIdx); - currentAtom = findDefinedAtomByName(file, "_" + name); - stab.atom = currentAtom; - if (copyRefs) - stab.str = copyDebugString(stabSym.name, *allocator); - else - stab.str = stabSym.name; - } else { - currentAtom = findDefinedAtomByName(file, stabSym.name); - stab.atom = currentAtom; - if (copyRefs) - stab.str = copyDebugString(stabSym.name, *allocator); - else - stab.str = stabSym.name; - } - if (stab.atom == nullptr) - return llvm::make_error( - "can't find atom for N_GSYM stabs" + stabSym.name + - " in " + file.path()); - break; - } - case N_FUN: - return llvm::make_error( - "old-style N_FUN stab '" + stabSym.name + "' unsupported"); - default: - return llvm::make_error( - "unrecognized stab symbol '" + stabSym.name + "'"); - } - break; - case inBeginEnd: - stab.atom = currentAtom; - switch (static_cast(stabSym.type)) { - case N_ENSYM: - state = start; - currentAtom = nullptr; - break; - case N_FUN: - // Just copy the string. - if (copyRefs) - stab.str = copyDebugString(stabSym.name, *allocator); - else - stab.str = stabSym.name; - break; - default: - return llvm::make_error( - "unrecognized stab symbol '" + stabSym.name + "'"); - } - } - llvm::dbgs() << "Adding to stabsList: " << stab << "\n"; - stabsList.push_back(stab); - } - - file.setDebugInfo(std::make_unique(std::move(stabsList))); - - // FIXME: Kill this off when we fix YAML memory ownership. - file.debugInfo()->setAllocator(std::move(allocator)); - - return llvm::Error::success(); -} - -static llvm::DataExtractor -dataExtractorFromSection(const NormalizedFile &normalizedFile, - const Section &S) { - const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); - const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); - StringRef SecData(reinterpret_cast(S.content.data()), - S.content.size()); - return llvm::DataExtractor(SecData, !isBig, is64 ? 8 : 4); -} - -// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE -// inspection" code if possible. -static uint64_t getCUAbbrevOffset(llvm::DataExtractor abbrevData, - uint64_t abbrCode) { - uint64_t offset = 0; - while (abbrevData.getULEB128(&offset) != abbrCode) { - // Tag - abbrevData.getULEB128(&offset); - // DW_CHILDREN - abbrevData.getU8(&offset); - // Attributes - while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset)) - ; - } - return offset; -} - -// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE -// inspection" code if possible. -static Expected -getIndexedString(const NormalizedFile &normalizedFile, - llvm::dwarf::Form form, llvm::DataExtractor infoData, - uint64_t &infoOffset, const Section &stringsSection) { - if (form == llvm::dwarf::DW_FORM_string) - return infoData.getCStr(&infoOffset); - if (form != llvm::dwarf::DW_FORM_strp) - return llvm::make_error( - "string field encoded without DW_FORM_strp"); - uint64_t stringOffset = infoData.getU32(&infoOffset); - llvm::DataExtractor stringsData = - dataExtractorFromSection(normalizedFile, stringsSection); - return stringsData.getCStr(&stringOffset); -} - -// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE -// inspection" code if possible. -static llvm::Expected -readCompUnit(const NormalizedFile &normalizedFile, - const Section &info, - const Section &abbrev, - const Section &strings, - StringRef path) { - // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE - // inspection" code if possible. - uint64_t offset = 0; - llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32; - auto infoData = dataExtractorFromSection(normalizedFile, info); - uint32_t length = infoData.getU32(&offset); - if (length == llvm::dwarf::DW_LENGTH_DWARF64) { - Format = llvm::dwarf::DwarfFormat::DWARF64; - infoData.getU64(&offset); - } - else if (length >= llvm::dwarf::DW_LENGTH_lo_reserved) - return llvm::make_error("Malformed DWARF in " + path); - - uint16_t version = infoData.getU16(&offset); - - if (version < 2 || version > 4) - return llvm::make_error("Unsupported DWARF version in " + - path); - - infoData.getU32(&offset); // Abbrev offset (should be zero) - uint8_t addrSize = infoData.getU8(&offset); - - uint32_t abbrCode = infoData.getULEB128(&offset); - auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev); - uint64_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode); - uint64_t tag = abbrevData.getULEB128(&abbrevOffset); - if (tag != llvm::dwarf::DW_TAG_compile_unit) - return llvm::make_error("top level DIE is not a compile unit"); - // DW_CHILDREN - abbrevData.getU8(&abbrevOffset); - uint32_t name; - llvm::dwarf::Form form; - llvm::dwarf::FormParams formParams = {version, addrSize, Format}; - TranslationUnitSource tu; - while ((name = abbrevData.getULEB128(&abbrevOffset)) | - (form = static_cast( - abbrevData.getULEB128(&abbrevOffset))) && - (name != 0 || form != 0)) { - switch (name) { - case llvm::dwarf::DW_AT_name: { - if (auto eName = getIndexedString(normalizedFile, form, infoData, offset, - strings)) - tu.name = *eName; - else - return eName.takeError(); - break; - } - case llvm::dwarf::DW_AT_comp_dir: { - if (auto eName = getIndexedString(normalizedFile, form, infoData, offset, - strings)) - tu.path = *eName; - else - return eName.takeError(); - break; - } - default: - llvm::DWARFFormValue::skipValue(form, infoData, &offset, formParams); - } - } - return tu; -} - -llvm::Error parseDebugInfo(MachOFile &file, - const NormalizedFile &normalizedFile, bool copyRefs) { - - // Find the interesting debug info sections. - const Section *debugInfo = nullptr; - const Section *debugAbbrev = nullptr; - const Section *debugStrings = nullptr; - - for (auto &s : normalizedFile.sections) { - if (s.segmentName == "__DWARF") { - if (s.sectionName == "__debug_info") - debugInfo = &s; - else if (s.sectionName == "__debug_abbrev") - debugAbbrev = &s; - else if (s.sectionName == "__debug_str") - debugStrings = &s; - } - } - - if (!debugInfo) - return parseStabs(file, normalizedFile, copyRefs); - - if (debugInfo->content.size() == 0) - return llvm::Error::success(); - - if (debugInfo->content.size() < 12) - return llvm::make_error("Malformed __debug_info section in " + - file.path() + ": too small"); - - if (!debugAbbrev) - return llvm::make_error("Missing __dwarf_abbrev section in " + - file.path()); - - if (auto tuOrErr = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev, - *debugStrings, file.path())) { - // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML - // memory ownership. - std::unique_ptr allocator; - if (copyRefs) { - allocator = std::make_unique(); - tuOrErr->name = copyDebugString(tuOrErr->name, *allocator); - tuOrErr->path = copyDebugString(tuOrErr->path, *allocator); - } - file.setDebugInfo(std::make_unique(std::move(*tuOrErr))); - if (copyRefs) - file.debugInfo()->setAllocator(std::move(allocator)); - } else - return tuOrErr.takeError(); - - return llvm::Error::success(); -} - -static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) { - if (is64) - return read64(addr, isBig); - - int32_t res = read32(addr, isBig); - return res; -} - -/// --- Augmentation String Processing --- - -struct CIEInfo { - bool _augmentationDataPresent = false; - bool _mayHaveEH = false; - uint32_t _offsetOfLSDA = ~0U; - uint32_t _offsetOfPersonality = ~0U; - uint32_t _offsetOfFDEPointerEncoding = ~0U; - uint32_t _augmentationDataLength = ~0U; -}; - -typedef llvm::DenseMap CIEInfoMap; - -static llvm::Error processAugmentationString(const uint8_t *augStr, - CIEInfo &cieInfo, - unsigned &len) { - - if (augStr[0] == '\0') { - len = 1; - return llvm::Error::success(); - } - - if (augStr[0] != 'z') - return llvm::make_error("expected 'z' at start of " - "augmentation string"); - - cieInfo._augmentationDataPresent = true; - uint64_t idx = 1; - - uint32_t offsetInAugmentationData = 0; - while (augStr[idx] != '\0') { - if (augStr[idx] == 'L') { - cieInfo._offsetOfLSDA = offsetInAugmentationData; - // This adds a single byte to the augmentation data. - ++offsetInAugmentationData; - ++idx; - continue; - } - if (augStr[idx] == 'P') { - cieInfo._offsetOfPersonality = offsetInAugmentationData; - // This adds a single byte to the augmentation data for the encoding, - // then a number of bytes for the pointer data. - // FIXME: We are assuming 4 is correct here for the pointer size as we - // always currently use delta32ToGOT. - offsetInAugmentationData += 5; - ++idx; - continue; - } - if (augStr[idx] == 'R') { - cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData; - // This adds a single byte to the augmentation data. - ++offsetInAugmentationData; - ++idx; - continue; - } - if (augStr[idx] == 'e') { - if (augStr[idx + 1] != 'h') - return llvm::make_error("expected 'eh' in " - "augmentation string"); - cieInfo._mayHaveEH = true; - idx += 2; - continue; - } - ++idx; - } - - cieInfo._augmentationDataLength = offsetInAugmentationData; - - len = idx + 1; - return llvm::Error::success(); -} - -static llvm::Error processCIE(const NormalizedFile &normalizedFile, - MachOFile &file, - mach_o::ArchHandler &handler, - const Section *ehFrameSection, - MachODefinedAtom *atom, - uint64_t offset, - CIEInfoMap &cieInfos) { - const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); - const uint8_t *frameData = atom->rawContent().data(); - - CIEInfo cieInfo; - - uint32_t size = read32(frameData, isBig); - uint64_t cieIDField = size == 0xffffffffU - ? sizeof(uint32_t) + sizeof(uint64_t) - : sizeof(uint32_t); - uint64_t versionField = cieIDField + sizeof(uint32_t); - uint64_t augmentationStringField = versionField + sizeof(uint8_t); - - unsigned augmentationStringLength = 0; - if (auto err = processAugmentationString(frameData + augmentationStringField, - cieInfo, augmentationStringLength)) - return err; - - if (cieInfo._offsetOfPersonality != ~0U) { - // If we have augmentation data for the personality function, then we may - // need to implicitly generate its relocation. - - // Parse the EH Data field which is pointer sized. - uint64_t EHDataField = augmentationStringField + augmentationStringLength; - const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); - unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0); - - // Parse Code Align Factor which is a ULEB128. - uint64_t CodeAlignField = EHDataField + EHDataFieldSize; - unsigned lengthFieldSize = 0; - llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize); - - // Parse Data Align Factor which is a SLEB128. - uint64_t DataAlignField = CodeAlignField + lengthFieldSize; - llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize); - - // Parse Return Address Register which is a byte. - uint64_t ReturnAddressField = DataAlignField + lengthFieldSize; - - // Parse the augmentation length which is a ULEB128. - uint64_t AugmentationLengthField = ReturnAddressField + 1; - uint64_t AugmentationLength = - llvm::decodeULEB128(frameData + AugmentationLengthField, - &lengthFieldSize); - - if (AugmentationLength != cieInfo._augmentationDataLength) - return llvm::make_error("CIE augmentation data length " - "mismatch"); - - // Get the start address of the augmentation data. - uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize; - - // Parse the personality function from the augmentation data. - uint64_t PersonalityField = - AugmentationDataField + cieInfo._offsetOfPersonality; - - // Parse the personality encoding. - // FIXME: Verify that this is a 32-bit pcrel offset. - uint64_t PersonalityFunctionField = PersonalityField + 1; - - if (atom->begin() != atom->end()) { - // If we have an explicit relocation, then make sure it matches this - // offset as this is where we'd expect it to be applied to. - DefinedAtom::reference_iterator CurrentRef = atom->begin(); - if (CurrentRef->offsetInAtom() != PersonalityFunctionField) - return llvm::make_error("CIE personality reloc at " - "wrong offset"); - - if (++CurrentRef != atom->end()) - return llvm::make_error("CIE contains too many relocs"); - } else { - // Implicitly generate the personality function reloc. It's assumed to - // be a delta32 offset to a GOT entry. - // FIXME: Parse the encoding and check this. - int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig); - uint64_t funcAddress = ehFrameSection->address + offset + - PersonalityFunctionField; - funcAddress += funcDelta; - - const MachODefinedAtom *func = nullptr; - Reference::Addend addend; - func = findAtomCoveringAddress(normalizedFile, file, funcAddress, - addend); - atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), - handler.unwindRefToPersonalityFunctionKind(), - PersonalityFunctionField, func, addend); - } - } else if (atom->begin() != atom->end()) { - // Otherwise, we expect there to be no relocations in this atom as the only - // relocation would have been to the personality function. - return llvm::make_error("unexpected relocation in CIE"); - } - - - cieInfos[atom] = std::move(cieInfo); - - return llvm::Error::success(); -} - -static llvm::Error processFDE(const NormalizedFile &normalizedFile, - MachOFile &file, - mach_o::ArchHandler &handler, - const Section *ehFrameSection, - MachODefinedAtom *atom, - uint64_t offset, - const CIEInfoMap &cieInfos) { - - const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); - const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); - - // Compiler wasn't lazy and actually told us what it meant. - // Unfortunately, the compiler may not have generated references for all of - // [cie, func, lsda] and so we still need to parse the FDE and add references - // for any the compiler didn't generate. - if (atom->begin() != atom->end()) - atom->sortReferences(); - - DefinedAtom::reference_iterator CurrentRef = atom->begin(); - - // This helper returns the reference (if one exists) at the offset we are - // currently processing. It automatically increments the ref iterator if we - // do return a ref, and throws an error if we pass over a ref without - // comsuming it. - auto currentRefGetter = [&CurrentRef, - &atom](uint64_t Offset)->const Reference* { - // If there are no more refs found, then we are done. - if (CurrentRef == atom->end()) - return nullptr; - - const Reference *Ref = *CurrentRef; - - // If we haven't reached the offset for this reference, then return that - // we don't yet have a reference to process. - if (Offset < Ref->offsetInAtom()) - return nullptr; - - // If the offset is equal, then we want to process this ref. - if (Offset == Ref->offsetInAtom()) { - ++CurrentRef; - return Ref; - } - - // The current ref is at an offset which is earlier than the current - // offset, then we failed to consume it when we should have. In this case - // throw an error. - llvm::report_fatal_error("Skipped reference when processing FDE"); - }; - - // Helper to either get the reference at this current location, and verify - // that it is of the expected type, or add a reference of that type. - // Returns the reference target. - auto verifyOrAddReference = [&](uint64_t targetAddress, - Reference::KindValue refKind, - uint64_t refAddress, - bool allowsAddend)->const Atom* { - if (auto *ref = currentRefGetter(refAddress)) { - // The compiler already emitted a relocation for the CIE ref. This should - // have been converted to the correct type of reference in - // get[Pair]ReferenceInfo(). - assert(ref->kindValue() == refKind && - "Incorrect EHFrame reference kind"); - return ref->target(); - } - Reference::Addend addend; - auto *target = findAtomCoveringAddress(normalizedFile, file, - targetAddress, addend); - atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), - refKind, refAddress, target, addend); - - if (!allowsAddend) - assert(!addend && "EHFrame reference cannot have addend"); - return target; - }; - - const uint8_t *startFrameData = atom->rawContent().data(); - const uint8_t *frameData = startFrameData; - - uint32_t size = read32(frameData, isBig); - uint64_t cieFieldInFDE = size == 0xffffffffU - ? sizeof(uint32_t) + sizeof(uint64_t) - : sizeof(uint32_t); - - // Linker needs to fixup a reference from the FDE to its parent CIE (a - // 32-bit byte offset backwards in the __eh_frame section). - uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig); - uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE; - cieAddress -= cieDelta; - - auto *cieRefTarget = verifyOrAddReference(cieAddress, - handler.unwindRefToCIEKind(), - cieFieldInFDE, false); - const MachODefinedAtom *cie = dyn_cast(cieRefTarget); - assert(cie && cie->contentType() == DefinedAtom::typeCFI && - "FDE's CIE field does not point at the start of a CIE."); - - const CIEInfo &cieInfo = cieInfos.find(cie)->second; - - // Linker needs to fixup reference from the FDE to the function it's - // describing. FIXME: there are actually different ways to do this, and the - // particular method used is specified in the CIE's augmentation fields - // (hopefully) - uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t); - - int64_t functionFromFDE = readSPtr(is64, isBig, - frameData + rangeFieldInFDE); - uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE; - rangeStart += functionFromFDE; - - verifyOrAddReference(rangeStart, - handler.unwindRefToFunctionKind(), - rangeFieldInFDE, true); - - // Handle the augmentation data if there is any. - if (cieInfo._augmentationDataPresent) { - // First process the augmentation data length field. - uint64_t augmentationDataLengthFieldInFDE = - rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t)); - unsigned lengthFieldSize = 0; - uint64_t augmentationDataLength = - llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE, - &lengthFieldSize); - - if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) { - - // Look at the augmentation data field. - uint64_t augmentationDataFieldInFDE = - augmentationDataLengthFieldInFDE + lengthFieldSize; - - int64_t lsdaFromFDE = readSPtr(is64, isBig, - frameData + augmentationDataFieldInFDE); - uint64_t lsdaStart = - ehFrameSection->address + offset + augmentationDataFieldInFDE + - lsdaFromFDE; - - verifyOrAddReference(lsdaStart, - handler.unwindRefToFunctionKind(), - augmentationDataFieldInFDE, true); - } - } - - return llvm::Error::success(); -} - -llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile, - MachOFile &file, - mach_o::ArchHandler &handler) { - - const Section *ehFrameSection = nullptr; - for (auto §ion : normalizedFile.sections) - if (section.segmentName == "__TEXT" && - section.sectionName == "__eh_frame") { - ehFrameSection = §ion; - break; - } - - // No __eh_frame so nothing to do. - if (!ehFrameSection) - return llvm::Error::success(); - - llvm::Error ehFrameErr = llvm::Error::success(); - CIEInfoMap cieInfos; - - file.eachAtomInSection(*ehFrameSection, - [&](MachODefinedAtom *atom, uint64_t offset) -> void { - assert(atom->contentType() == DefinedAtom::typeCFI); - - // Bail out if we've encountered an error. - if (ehFrameErr) - return; - - const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); - if (ArchHandler::isDwarfCIE(isBig, atom)) - ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection, - atom, offset, cieInfos); - else - ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection, - atom, offset, cieInfos); - }); - - return ehFrameErr; -} - -llvm::Error parseObjCImageInfo(const Section §, - const NormalizedFile &normalizedFile, - MachOFile &file) { - - // struct objc_image_info { - // uint32_t version; // initially 0 - // uint32_t flags; - // }; - - ArrayRef content = sect.content; - if (content.size() != 8) - return llvm::make_error(sect.segmentName + "/" + - sect.sectionName + - " in file " + file.path() + - " should be 8 bytes in size"); - - const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); - uint32_t version = read32(content.data(), isBig); - if (version) - return llvm::make_error(sect.segmentName + "/" + - sect.sectionName + - " in file " + file.path() + - " should have version=0"); - - uint32_t flags = read32(content.data() + 4, isBig); - if (flags & (MachOLinkingContext::objc_supports_gc | - MachOLinkingContext::objc_gc_only)) - return llvm::make_error(sect.segmentName + "/" + - sect.sectionName + - " in file " + file.path() + - " uses GC. This is not supported"); - - if (flags & MachOLinkingContext::objc_retainReleaseForSimulator) - file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator); - else - file.setObjcConstraint(MachOLinkingContext::objc_retainRelease); - - file.setSwiftVersion((flags >> 8) & 0xFF); - - return llvm::Error::success(); -} - -/// Converts normalized mach-o file into an lld::File and lld::Atoms. -llvm::Expected> -objectToAtoms(const NormalizedFile &normalizedFile, StringRef path, - bool copyRefs) { - auto file = std::make_unique(path); - if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs)) - return std::move(ec); - return std::unique_ptr(std::move(file)); -} - -llvm::Expected> -dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path, - bool copyRefs) { - // Instantiate SharedLibraryFile object. - auto file = std::make_unique(path); - if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs)) - return std::move(ec); - return std::unique_ptr(std::move(file)); -} - -} // anonymous namespace - -namespace normalized { - -static bool isObjCImageInfo(const Section §) { - return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") || - (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo"); -} - -llvm::Error -normalizedObjectToAtoms(MachOFile *file, - const NormalizedFile &normalizedFile, - bool copyRefs) { - LLVM_DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: " - << file->path() << "\n"); - bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0); - - // Create atoms from each section. - for (auto § : normalizedFile.sections) { - - // If this is a debug-info section parse it specially. - if (isDebugInfoSection(sect)) - continue; - - // If the file contains an objc_image_info struct, then we should parse the - // ObjC flags and Swift version. - if (isObjCImageInfo(sect)) { - if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file)) - return ec; - // We then skip adding atoms for this section as we use the ObjCPass to - // re-emit this data after it has been aggregated for all files. - continue; - } - - bool customSectionName; - DefinedAtom::ContentType atomType = atomTypeFromSection(sect, - customSectionName); - if (auto ec = processSection(atomType, sect, customSectionName, - normalizedFile, *file, scatterable, copyRefs)) - return ec; - } - // Create atoms from undefined symbols. - for (auto &sym : normalizedFile.undefinedSymbols) { - // Undefined symbols with n_value != 0 are actually tentative definitions. - if (sym.value == Hex64(0)) { - file->addUndefinedAtom(sym.name, copyRefs); - } else { - file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value, - DefinedAtom::Alignment(1 << (sym.desc >> 8)), - copyRefs); - } - } - - // Convert mach-o relocations to References - std::unique_ptr handler - = ArchHandler::create(normalizedFile.arch); - for (auto § : normalizedFile.sections) { - if (isDebugInfoSection(sect)) - continue; - if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable, - *file, *handler)) - return ec; - } - - // Add additional arch-specific References - file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void { - handler->addAdditionalReferences(*atom); - }); - - // Each __eh_frame section needs references to both __text (the function we're - // providing unwind info for) and itself (FDE -> CIE). These aren't - // represented in the relocations on some architectures, so we have to add - // them back in manually there. - if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler)) - return ec; - - // Process mach-o data-in-code regions array. That information is encoded in - // atoms as References at each transition point. - unsigned nextIndex = 0; - for (const DataInCode &entry : normalizedFile.dataInCode) { - ++nextIndex; - const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset); - if (!s) { - return llvm::make_error(Twine("LC_DATA_IN_CODE address (" - + Twine(entry.offset) - + ") is not in any section")); - } - uint64_t offsetInSect = entry.offset - s->address; - uint32_t offsetInAtom; - MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect, - &offsetInAtom); - if (offsetInAtom + entry.length > atom->size()) { - return llvm::make_error(Twine("LC_DATA_IN_CODE entry " - "(offset=" - + Twine(entry.offset) - + ", length=" - + Twine(entry.length) - + ") crosses atom boundary.")); - } - // Add reference that marks start of data-in-code. - atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), - handler->dataInCodeTransitionStart(*atom), - offsetInAtom, atom, entry.kind); - - // Peek at next entry, if it starts where this one ends, skip ending ref. - if (nextIndex < normalizedFile.dataInCode.size()) { - const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex]; - if (nextEntry.offset == (entry.offset + entry.length)) - continue; - } - - // If data goes to end of function, skip ending ref. - if ((offsetInAtom + entry.length) == atom->size()) - continue; - - // Add reference that marks end of data-in-code. - atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), - handler->dataInCodeTransitionEnd(*atom), - offsetInAtom+entry.length, atom, 0); - } - - // Cache some attributes on the file for use later. - file->setFlags(normalizedFile.flags); - file->setArch(normalizedFile.arch); - file->setOS(normalizedFile.os); - file->setMinVersion(normalizedFile.minOSverson); - file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind); - - // Sort references in each atom to their canonical order. - for (const DefinedAtom* defAtom : file->defined()) { - reinterpret_cast(defAtom)->sortReferences(); - } - - if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs)) - return err; - - return llvm::Error::success(); -} - -llvm::Error -normalizedDylibToAtoms(MachODylibFile *file, - const NormalizedFile &normalizedFile, - bool copyRefs) { - file->setInstallName(normalizedFile.installName); - file->setCompatVersion(normalizedFile.compatVersion); - file->setCurrentVersion(normalizedFile.currentVersion); - - // Tell MachODylibFile object about all symbols it exports. - if (!normalizedFile.exportInfo.empty()) { - // If exports trie exists, use it instead of traditional symbol table. - for (const Export &exp : normalizedFile.exportInfo) { - bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); - // StringRefs from export iterator are ephemeral, so force copy. - file->addExportedSymbol(exp.name, weakDef, true); - } - } else { - for (auto &sym : normalizedFile.globalSymbols) { - assert((sym.scope & N_EXT) && "only expect external symbols here"); - bool weakDef = (sym.desc & N_WEAK_DEF); - file->addExportedSymbol(sym.name, weakDef, copyRefs); - } - } - // Tell MachODylibFile object about all dylibs it re-exports. - for (const DependentDylib &dep : normalizedFile.dependentDylibs) { - if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB) - file->addReExportedDylib(dep.path); - } - return llvm::Error::success(); -} - -void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType, - StringRef &segmentName, - StringRef §ionName, - SectionType §ionType, - SectionAttr §ionAttrs, - bool &relocsToDefinedCanBeImplicit) { - - for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; - p->atomType != DefinedAtom::typeUnknown; ++p) { - if (p->atomType != atomType) - continue; - // Wild carded entries are ignored for reverse lookups. - if (p->segmentName.empty() || p->sectionName.empty()) - continue; - segmentName = p->segmentName; - sectionName = p->sectionName; - sectionType = p->sectionType; - sectionAttrs = 0; - relocsToDefinedCanBeImplicit = false; - if (atomType == DefinedAtom::typeCode) - sectionAttrs = S_ATTR_PURE_INSTRUCTIONS; - if (atomType == DefinedAtom::typeCFI) - relocsToDefinedCanBeImplicit = true; - return; - } - llvm_unreachable("content type not yet supported"); -} - -llvm::Expected> -normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, - bool copyRefs) { - switch (normalizedFile.fileType) { - case MH_DYLIB: - case MH_DYLIB_STUB: - return dylibToAtoms(normalizedFile, path, copyRefs); - case MH_OBJECT: - return objectToAtoms(normalizedFile, path, copyRefs); - default: - llvm_unreachable("unhandled MachO file type!"); - } -} - -} // namespace normalized -} // namespace mach_o -} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp deleted file mode 100644 index 3826e97d62b9..000000000000 --- a/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp +++ /dev/null @@ -1,840 +0,0 @@ -//===- lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp -----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// -/// \file For mach-o object files, this implementation uses YAML I/O to -/// provide the convert between YAML and the normalized mach-o (NM). -/// -/// +------------+ +------+ -/// | normalized | <-> | yaml | -/// +------------+ +------+ - -#include "MachONormalizedFile.h" -#include "lld/Common/LLVM.h" -#include "lld/Core/Error.h" -#include "lld/ReaderWriter/YamlContext.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/YAMLTraits.h" -#include "llvm/Support/raw_ostream.h" -#include - -using llvm::StringRef; -using namespace llvm::yaml; -using namespace llvm::MachO; -using namespace lld::mach_o::normalized; -using lld::YamlContext; - -LLVM_YAML_IS_SEQUENCE_VECTOR(Segment) -LLVM_YAML_IS_SEQUENCE_VECTOR(DependentDylib) -LLVM_YAML_IS_SEQUENCE_VECTOR(RebaseLocation) -LLVM_YAML_IS_SEQUENCE_VECTOR(BindLocation) -LLVM_YAML_IS_SEQUENCE_VECTOR(Export) -LLVM_YAML_IS_SEQUENCE_VECTOR(DataInCode) - - -// for compatibility with gcc-4.7 in C++11 mode, add extra namespace -namespace llvm { -namespace yaml { - -// A vector of Sections is a sequence. -template<> -struct SequenceTraits< std::vector
> { - static size_t size(IO &io, std::vector
&seq) { - return seq.size(); - } - static Section& element(IO &io, std::vector
&seq, size_t index) { - if ( index >= seq.size() ) - seq.resize(index+1); - return seq[index]; - } -}; - -template<> -struct SequenceTraits< std::vector > { - static size_t size(IO &io, std::vector &seq) { - return seq.size(); - } - static Symbol& element(IO &io, std::vector &seq, size_t index) { - if ( index >= seq.size() ) - seq.resize(index+1); - return seq[index]; - } -}; - -// A vector of Relocations is a sequence. -template<> -struct SequenceTraits< Relocations > { - static size_t size(IO &io, Relocations &seq) { - return seq.size(); - } - static Relocation& element(IO &io, Relocations &seq, size_t index) { - if ( index >= seq.size() ) - seq.resize(index+1); - return seq[index]; - } -}; - -// The content for a section is represented as a flow sequence of hex bytes. -template<> -struct SequenceTraits< ContentBytes > { - static size_t size(IO &io, ContentBytes &seq) { - return seq.size(); - } - static Hex8& element(IO &io, ContentBytes &seq, size_t index) { - if ( index >= seq.size() ) - seq.resize(index+1); - return seq[index]; - } - static const bool flow = true; -}; - -// The indirect symbols for a section is represented as a flow sequence -// of numbers (symbol table indexes). -template<> -struct SequenceTraits< IndirectSymbols > { - static size_t size(IO &io, IndirectSymbols &seq) { - return seq.size(); - } - static uint32_t& element(IO &io, IndirectSymbols &seq, size_t index) { - if ( index >= seq.size() ) - seq.resize(index+1); - return seq[index]; - } - static const bool flow = true; -}; - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &io, lld::MachOLinkingContext::Arch &value) { - io.enumCase(value, "unknown",lld::MachOLinkingContext::arch_unknown); - io.enumCase(value, "ppc", lld::MachOLinkingContext::arch_ppc); - io.enumCase(value, "x86", lld::MachOLinkingContext::arch_x86); - io.enumCase(value, "x86_64", lld::MachOLinkingContext::arch_x86_64); - io.enumCase(value, "armv6", lld::MachOLinkingContext::arch_armv6); - io.enumCase(value, "armv7", lld::MachOLinkingContext::arch_armv7); - io.enumCase(value, "armv7s", lld::MachOLinkingContext::arch_armv7s); - io.enumCase(value, "arm64", lld::MachOLinkingContext::arch_arm64); - } -}; - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &io, lld::MachOLinkingContext::OS &value) { - io.enumCase(value, "unknown", - lld::MachOLinkingContext::OS::unknown); - io.enumCase(value, "Mac OS X", - lld::MachOLinkingContext::OS::macOSX); - io.enumCase(value, "iOS", - lld::MachOLinkingContext::OS::iOS); - io.enumCase(value, "iOS Simulator", - lld::MachOLinkingContext::OS::iOS_simulator); - } -}; - - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &io, HeaderFileType &value) { - io.enumCase(value, "MH_OBJECT", llvm::MachO::MH_OBJECT); - io.enumCase(value, "MH_DYLIB", llvm::MachO::MH_DYLIB); - io.enumCase(value, "MH_EXECUTE", llvm::MachO::MH_EXECUTE); - io.enumCase(value, "MH_BUNDLE", llvm::MachO::MH_BUNDLE); - } -}; - - -template <> -struct ScalarBitSetTraits { - static void bitset(IO &io, FileFlags &value) { - io.bitSetCase(value, "MH_TWOLEVEL", - llvm::MachO::MH_TWOLEVEL); - io.bitSetCase(value, "MH_SUBSECTIONS_VIA_SYMBOLS", - llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); - } -}; - - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &io, SectionType &value) { - io.enumCase(value, "S_REGULAR", - llvm::MachO::S_REGULAR); - io.enumCase(value, "S_ZEROFILL", - llvm::MachO::S_ZEROFILL); - io.enumCase(value, "S_CSTRING_LITERALS", - llvm::MachO::S_CSTRING_LITERALS); - io.enumCase(value, "S_4BYTE_LITERALS", - llvm::MachO::S_4BYTE_LITERALS); - io.enumCase(value, "S_8BYTE_LITERALS", - llvm::MachO::S_8BYTE_LITERALS); - io.enumCase(value, "S_LITERAL_POINTERS", - llvm::MachO::S_LITERAL_POINTERS); - io.enumCase(value, "S_NON_LAZY_SYMBOL_POINTERS", - llvm::MachO::S_NON_LAZY_SYMBOL_POINTERS); - io.enumCase(value, "S_LAZY_SYMBOL_POINTERS", - llvm::MachO::S_LAZY_SYMBOL_POINTERS); - io.enumCase(value, "S_SYMBOL_STUBS", - llvm::MachO::S_SYMBOL_STUBS); - io.enumCase(value, "S_MOD_INIT_FUNC_POINTERS", - llvm::MachO::S_MOD_INIT_FUNC_POINTERS); - io.enumCase(value, "S_MOD_TERM_FUNC_POINTERS", - llvm::MachO::S_MOD_TERM_FUNC_POINTERS); - io.enumCase(value, "S_COALESCED", - llvm::MachO::S_COALESCED); - io.enumCase(value, "S_GB_ZEROFILL", - llvm::MachO::S_GB_ZEROFILL); - io.enumCase(value, "S_INTERPOSING", - llvm::MachO::S_INTERPOSING); - io.enumCase(value, "S_16BYTE_LITERALS", - llvm::MachO::S_16BYTE_LITERALS); - io.enumCase(value, "S_DTRACE_DOF", - llvm::MachO::S_DTRACE_DOF); - io.enumCase(value, "S_LAZY_DYLIB_SYMBOL_POINTERS", - llvm::MachO::S_LAZY_DYLIB_SYMBOL_POINTERS); - io.enumCase(value, "S_THREAD_LOCAL_REGULAR", - llvm::MachO::S_THREAD_LOCAL_REGULAR); - io.enumCase(value, "S_THREAD_LOCAL_ZEROFILL", - llvm::MachO::S_THREAD_LOCAL_ZEROFILL); - io.enumCase(value, "S_THREAD_LOCAL_VARIABLES", - llvm::MachO::S_THREAD_LOCAL_VARIABLES); - io.enumCase(value, "S_THREAD_LOCAL_VARIABLE_POINTERS", - llvm::MachO::S_THREAD_LOCAL_VARIABLE_POINTERS); - io.enumCase(value, "S_THREAD_LOCAL_INIT_FUNCTION_POINTERS", - llvm::MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS); - } -}; - -template <> -struct ScalarBitSetTraits { - static void bitset(IO &io, SectionAttr &value) { - io.bitSetCase(value, "S_ATTR_PURE_INSTRUCTIONS", - llvm::MachO::S_ATTR_PURE_INSTRUCTIONS); - io.bitSetCase(value, "S_ATTR_SOME_INSTRUCTIONS", - llvm::MachO::S_ATTR_SOME_INSTRUCTIONS); - io.bitSetCase(value, "S_ATTR_NO_DEAD_STRIP", - llvm::MachO::S_ATTR_NO_DEAD_STRIP); - io.bitSetCase(value, "S_ATTR_EXT_RELOC", - llvm::MachO::S_ATTR_EXT_RELOC); - io.bitSetCase(value, "S_ATTR_LOC_RELOC", - llvm::MachO::S_ATTR_LOC_RELOC); - io.bitSetCase(value, "S_ATTR_DEBUG", - llvm::MachO::S_ATTR_DEBUG); - } -}; - -/// This is a custom formatter for SectionAlignment. Values are -/// the power to raise by, ie, the n in 2^n. -template <> struct ScalarTraits { - static void output(const SectionAlignment &value, void *ctxt, - raw_ostream &out) { - out << llvm::format("%d", (uint32_t)value); - } - - static StringRef input(StringRef scalar, void *ctxt, - SectionAlignment &value) { - uint32_t alignment; - if (scalar.getAsInteger(0, alignment)) { - return "malformed alignment value"; - } - if (!llvm::isPowerOf2_32(alignment)) - return "alignment must be a power of 2"; - value = alignment; - return StringRef(); // returning empty string means success - } - - static QuotingType mustQuote(StringRef) { return QuotingType::None; } -}; - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &io, NListType &value) { - io.enumCase(value, "N_UNDF", llvm::MachO::N_UNDF); - io.enumCase(value, "N_ABS", llvm::MachO::N_ABS); - io.enumCase(value, "N_SECT", llvm::MachO::N_SECT); - io.enumCase(value, "N_PBUD", llvm::MachO::N_PBUD); - io.enumCase(value, "N_INDR", llvm::MachO::N_INDR); - } -}; - -template <> -struct ScalarBitSetTraits { - static void bitset(IO &io, SymbolScope &value) { - io.bitSetCase(value, "N_EXT", llvm::MachO::N_EXT); - io.bitSetCase(value, "N_PEXT", llvm::MachO::N_PEXT); - } -}; - -template <> -struct ScalarBitSetTraits { - static void bitset(IO &io, SymbolDesc &value) { - io.bitSetCase(value, "N_NO_DEAD_STRIP", llvm::MachO::N_NO_DEAD_STRIP); - io.bitSetCase(value, "N_WEAK_REF", llvm::MachO::N_WEAK_REF); - io.bitSetCase(value, "N_WEAK_DEF", llvm::MachO::N_WEAK_DEF); - io.bitSetCase(value, "N_ARM_THUMB_DEF", llvm::MachO::N_ARM_THUMB_DEF); - io.bitSetCase(value, "N_SYMBOL_RESOLVER", llvm::MachO::N_SYMBOL_RESOLVER); - } -}; - - -template <> -struct MappingTraits
{ - struct NormalizedContentBytes; - static void mapping(IO &io, Section §) { - io.mapRequired("segment", sect.segmentName); - io.mapRequired("section", sect.sectionName); - io.mapRequired("type", sect.type); - io.mapOptional("attributes", sect.attributes); - io.mapOptional("alignment", sect.alignment, (SectionAlignment)1); - io.mapRequired("address", sect.address); - if (isZeroFillSection(sect.type)) { - // S_ZEROFILL sections use "size:" instead of "content:" - uint64_t size = sect.content.size(); - io.mapOptional("size", size); - if (!io.outputting()) { - uint8_t *bytes = nullptr; - sect.content = makeArrayRef(bytes, size); - } - } else { - MappingNormalization> content( - io, sect.content); - io.mapOptional("content", content->_normalizedContent); - } - io.mapOptional("relocations", sect.relocations); - io.mapOptional("indirect-syms", sect.indirectSymbols); - } - - struct NormalizedContent { - NormalizedContent(IO &io) : _io(io) {} - NormalizedContent(IO &io, ArrayRef content) : _io(io) { - // When writing yaml, copy content byte array to Hex8 vector. - for (auto &c : content) { - _normalizedContent.push_back(c); - } - } - ArrayRef denormalize(IO &io) { - // When reading yaml, allocate byte array owned by NormalizedFile and - // copy Hex8 vector to byte array. - YamlContext *info = reinterpret_cast(io.getContext()); - assert(info != nullptr); - NormalizedFile *file = info->_normalizeMachOFile; - assert(file != nullptr); - size_t size = _normalizedContent.size(); - if (!size) - return None; - uint8_t *bytes = file->ownedAllocations.Allocate(size); - std::copy(_normalizedContent.begin(), _normalizedContent.end(), bytes); - return makeArrayRef(bytes, size); - } - - IO &_io; - ContentBytes _normalizedContent; - }; -}; - - -template <> -struct MappingTraits { - static void mapping(IO &io, Relocation &reloc) { - io.mapRequired("offset", reloc.offset); - io.mapOptional("scattered", reloc.scattered, false); - io.mapRequired("type", reloc.type); - io.mapRequired("length", reloc.length); - io.mapRequired("pc-rel", reloc.pcRel); - if ( !reloc.scattered ) - io.mapRequired("extern", reloc.isExtern); - if ( reloc.scattered ) - io.mapRequired("value", reloc.value); - if ( !reloc.scattered ) - io.mapRequired("symbol", reloc.symbol); - } -}; - - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &io, RelocationInfoType &value) { - YamlContext *info = reinterpret_cast(io.getContext()); - assert(info != nullptr); - NormalizedFile *file = info->_normalizeMachOFile; - assert(file != nullptr); - switch (file->arch) { - case lld::MachOLinkingContext::arch_x86_64: - io.enumCase(value, "X86_64_RELOC_UNSIGNED", - llvm::MachO::X86_64_RELOC_UNSIGNED); - io.enumCase(value, "X86_64_RELOC_SIGNED", - llvm::MachO::X86_64_RELOC_SIGNED); - io.enumCase(value, "X86_64_RELOC_BRANCH", - llvm::MachO::X86_64_RELOC_BRANCH); - io.enumCase(value, "X86_64_RELOC_GOT_LOAD", - llvm::MachO::X86_64_RELOC_GOT_LOAD); - io.enumCase(value, "X86_64_RELOC_GOT", - llvm::MachO::X86_64_RELOC_GOT); - io.enumCase(value, "X86_64_RELOC_SUBTRACTOR", - llvm::MachO::X86_64_RELOC_SUBTRACTOR); - io.enumCase(value, "X86_64_RELOC_SIGNED_1", - llvm::MachO::X86_64_RELOC_SIGNED_1); - io.enumCase(value, "X86_64_RELOC_SIGNED_2", - llvm::MachO::X86_64_RELOC_SIGNED_2); - io.enumCase(value, "X86_64_RELOC_SIGNED_4", - llvm::MachO::X86_64_RELOC_SIGNED_4); - io.enumCase(value, "X86_64_RELOC_TLV", - llvm::MachO::X86_64_RELOC_TLV); - break; - case lld::MachOLinkingContext::arch_x86: - io.enumCase(value, "GENERIC_RELOC_VANILLA", - llvm::MachO::GENERIC_RELOC_VANILLA); - io.enumCase(value, "GENERIC_RELOC_PAIR", - llvm::MachO::GENERIC_RELOC_PAIR); - io.enumCase(value, "GENERIC_RELOC_SECTDIFF", - llvm::MachO::GENERIC_RELOC_SECTDIFF); - io.enumCase(value, "GENERIC_RELOC_LOCAL_SECTDIFF", - llvm::MachO::GENERIC_RELOC_LOCAL_SECTDIFF); - io.enumCase(value, "GENERIC_RELOC_TLV", - llvm::MachO::GENERIC_RELOC_TLV); - break; - case lld::MachOLinkingContext::arch_armv6: - case lld::MachOLinkingContext::arch_armv7: - case lld::MachOLinkingContext::arch_armv7s: - io.enumCase(value, "ARM_RELOC_VANILLA", - llvm::MachO::ARM_RELOC_VANILLA); - io.enumCase(value, "ARM_RELOC_PAIR", - llvm::MachO::ARM_RELOC_PAIR); - io.enumCase(value, "ARM_RELOC_SECTDIFF", - llvm::MachO::ARM_RELOC_SECTDIFF); - io.enumCase(value, "ARM_RELOC_LOCAL_SECTDIFF", - llvm::MachO::ARM_RELOC_LOCAL_SECTDIFF); - io.enumCase(value, "ARM_RELOC_BR24", - llvm::MachO::ARM_RELOC_BR24); - io.enumCase(value, "ARM_THUMB_RELOC_BR22", - llvm::MachO::ARM_THUMB_RELOC_BR22); - io.enumCase(value, "ARM_RELOC_HALF", - llvm::MachO::ARM_RELOC_HALF); - io.enumCase(value, "ARM_RELOC_HALF_SECTDIFF", - llvm::MachO::ARM_RELOC_HALF_SECTDIFF); - break; - case lld::MachOLinkingContext::arch_arm64: - io.enumCase(value, "ARM64_RELOC_UNSIGNED", - llvm::MachO::ARM64_RELOC_UNSIGNED); - io.enumCase(value, "ARM64_RELOC_SUBTRACTOR", - llvm::MachO::ARM64_RELOC_SUBTRACTOR); - io.enumCase(value, "ARM64_RELOC_BRANCH26", - llvm::MachO::ARM64_RELOC_BRANCH26); - io.enumCase(value, "ARM64_RELOC_PAGE21", - llvm::MachO::ARM64_RELOC_PAGE21); - io.enumCase(value, "ARM64_RELOC_PAGEOFF12", - llvm::MachO::ARM64_RELOC_PAGEOFF12); - io.enumCase(value, "ARM64_RELOC_GOT_LOAD_PAGE21", - llvm::MachO::ARM64_RELOC_GOT_LOAD_PAGE21); - io.enumCase(value, "ARM64_RELOC_GOT_LOAD_PAGEOFF12", - llvm::MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12); - io.enumCase(value, "ARM64_RELOC_POINTER_TO_GOT", - llvm::MachO::ARM64_RELOC_POINTER_TO_GOT); - io.enumCase(value, "ARM64_RELOC_TLVP_LOAD_PAGE21", - llvm::MachO::ARM64_RELOC_TLVP_LOAD_PAGE21); - io.enumCase(value, "ARM64_RELOC_TLVP_LOAD_PAGEOFF12", - llvm::MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12); - io.enumCase(value, "ARM64_RELOC_ADDEND", - llvm::MachO::ARM64_RELOC_ADDEND); - break; - default: - llvm_unreachable("unknown architecture"); - } - } -}; - - -template <> -struct MappingTraits { - static void mapping(IO &io, Symbol& sym) { - io.mapRequired("name", sym.name); - io.mapRequired("type", sym.type); - io.mapOptional("scope", sym.scope, SymbolScope(0)); - io.mapOptional("sect", sym.sect, (uint8_t)0); - if (sym.type == llvm::MachO::N_UNDF) { - // In undef symbols, desc field contains alignment/ordinal info - // which is better represented as a hex vaule. - uint16_t t1 = sym.desc; - Hex16 t2 = t1; - io.mapOptional("desc", t2, Hex16(0)); - sym.desc = t2; - } else { - // In defined symbols, desc fit is a set of option bits. - io.mapOptional("desc", sym.desc, SymbolDesc(0)); - } - io.mapRequired("value", sym.value); - } -}; - -// Custom mapping for VMProtect (e.g. "r-x"). -template <> -struct ScalarTraits { - static void output(const VMProtect &value, void*, raw_ostream &out) { - out << ( (value & llvm::MachO::VM_PROT_READ) ? 'r' : '-'); - out << ( (value & llvm::MachO::VM_PROT_WRITE) ? 'w' : '-'); - out << ( (value & llvm::MachO::VM_PROT_EXECUTE) ? 'x' : '-'); - } - static StringRef input(StringRef scalar, void*, VMProtect &value) { - value = 0; - if (scalar.size() != 3) - return "segment access protection must be three chars (e.g. \"r-x\")"; - switch (scalar[0]) { - case 'r': - value = llvm::MachO::VM_PROT_READ; - break; - case '-': - break; - default: - return "segment access protection first char must be 'r' or '-'"; - } - switch (scalar[1]) { - case 'w': - value = value | llvm::MachO::VM_PROT_WRITE; - break; - case '-': - break; - default: - return "segment access protection second char must be 'w' or '-'"; - } - switch (scalar[2]) { - case 'x': - value = value | llvm::MachO::VM_PROT_EXECUTE; - break; - case '-': - break; - default: - return "segment access protection third char must be 'x' or '-'"; - } - // Return the empty string on success, - return StringRef(); - } - static QuotingType mustQuote(StringRef) { return QuotingType::None; } -}; - - -template <> -struct MappingTraits { - static void mapping(IO &io, Segment& seg) { - io.mapRequired("name", seg.name); - io.mapRequired("address", seg.address); - io.mapRequired("size", seg.size); - io.mapRequired("init-access", seg.init_access); - io.mapRequired("max-access", seg.max_access); - } -}; - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &io, LoadCommandType &value) { - io.enumCase(value, "LC_LOAD_DYLIB", - llvm::MachO::LC_LOAD_DYLIB); - io.enumCase(value, "LC_LOAD_WEAK_DYLIB", - llvm::MachO::LC_LOAD_WEAK_DYLIB); - io.enumCase(value, "LC_REEXPORT_DYLIB", - llvm::MachO::LC_REEXPORT_DYLIB); - io.enumCase(value, "LC_LOAD_UPWARD_DYLIB", - llvm::MachO::LC_LOAD_UPWARD_DYLIB); - io.enumCase(value, "LC_LAZY_LOAD_DYLIB", - llvm::MachO::LC_LAZY_LOAD_DYLIB); - io.enumCase(value, "LC_VERSION_MIN_MACOSX", - llvm::MachO::LC_VERSION_MIN_MACOSX); - io.enumCase(value, "LC_VERSION_MIN_IPHONEOS", - llvm::MachO::LC_VERSION_MIN_IPHONEOS); - io.enumCase(value, "LC_VERSION_MIN_TVOS", - llvm::MachO::LC_VERSION_MIN_TVOS); - io.enumCase(value, "LC_VERSION_MIN_WATCHOS", - llvm::MachO::LC_VERSION_MIN_WATCHOS); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &io, DependentDylib& dylib) { - io.mapRequired("path", dylib.path); - io.mapOptional("kind", dylib.kind, - llvm::MachO::LC_LOAD_DYLIB); - io.mapOptional("compat-version", dylib.compatVersion, - PackedVersion(0x10000)); - io.mapOptional("current-version", dylib.currentVersion, - PackedVersion(0x10000)); - } -}; - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &io, RebaseType &value) { - io.enumCase(value, "REBASE_TYPE_POINTER", - llvm::MachO::REBASE_TYPE_POINTER); - io.enumCase(value, "REBASE_TYPE_TEXT_PCREL32", - llvm::MachO::REBASE_TYPE_TEXT_PCREL32); - io.enumCase(value, "REBASE_TYPE_TEXT_ABSOLUTE32", - llvm::MachO::REBASE_TYPE_TEXT_ABSOLUTE32); - } -}; - - -template <> -struct MappingTraits { - static void mapping(IO &io, RebaseLocation& rebase) { - io.mapRequired("segment-index", rebase.segIndex); - io.mapRequired("segment-offset", rebase.segOffset); - io.mapOptional("kind", rebase.kind, - llvm::MachO::REBASE_TYPE_POINTER); - } -}; - - - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &io, BindType &value) { - io.enumCase(value, "BIND_TYPE_POINTER", - llvm::MachO::BIND_TYPE_POINTER); - io.enumCase(value, "BIND_TYPE_TEXT_ABSOLUTE32", - llvm::MachO::BIND_TYPE_TEXT_ABSOLUTE32); - io.enumCase(value, "BIND_TYPE_TEXT_PCREL32", - llvm::MachO::BIND_TYPE_TEXT_PCREL32); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &io, BindLocation &bind) { - io.mapRequired("segment-index", bind.segIndex); - io.mapRequired("segment-offset", bind.segOffset); - io.mapOptional("kind", bind.kind, - llvm::MachO::BIND_TYPE_POINTER); - io.mapOptional("can-be-null", bind.canBeNull, false); - io.mapRequired("ordinal", bind.ordinal); - io.mapRequired("symbol-name", bind.symbolName); - io.mapOptional("addend", bind.addend, Hex64(0)); - } -}; - - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &io, ExportSymbolKind &value) { - io.enumCase(value, "EXPORT_SYMBOL_FLAGS_KIND_REGULAR", - llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR); - io.enumCase(value, "EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL", - llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL); - io.enumCase(value, "EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE", - llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE); - } -}; - -template <> -struct ScalarBitSetTraits { - static void bitset(IO &io, ExportFlags &value) { - io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION", - llvm::MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); - io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_REEXPORT", - llvm::MachO::EXPORT_SYMBOL_FLAGS_REEXPORT); - io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER", - llvm::MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER); - } -}; - - -template <> -struct MappingTraits { - static void mapping(IO &io, Export &exp) { - io.mapRequired("name", exp.name); - io.mapOptional("offset", exp.offset); - io.mapOptional("kind", exp.kind, - llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR); - if (!io.outputting() || exp.flags) - io.mapOptional("flags", exp.flags); - io.mapOptional("other", exp.otherOffset, Hex32(0)); - io.mapOptional("other-name", exp.otherName, StringRef()); - } -}; - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &io, DataRegionType &value) { - io.enumCase(value, "DICE_KIND_DATA", - llvm::MachO::DICE_KIND_DATA); - io.enumCase(value, "DICE_KIND_JUMP_TABLE8", - llvm::MachO::DICE_KIND_JUMP_TABLE8); - io.enumCase(value, "DICE_KIND_JUMP_TABLE16", - llvm::MachO::DICE_KIND_JUMP_TABLE16); - io.enumCase(value, "DICE_KIND_JUMP_TABLE32", - llvm::MachO::DICE_KIND_JUMP_TABLE32); - io.enumCase(value, "DICE_KIND_ABS_JUMP_TABLE32", - llvm::MachO::DICE_KIND_ABS_JUMP_TABLE32); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &io, DataInCode &entry) { - io.mapRequired("offset", entry.offset); - io.mapRequired("length", entry.length); - io.mapRequired("kind", entry.kind); - } -}; - -template <> -struct ScalarTraits { - static void output(const PackedVersion &value, void*, raw_ostream &out) { - out << llvm::format("%d.%d", (value >> 16), (value >> 8) & 0xFF); - if (value & 0xFF) { - out << llvm::format(".%d", (value & 0xFF)); - } - } - static StringRef input(StringRef scalar, void*, PackedVersion &result) { - uint32_t value; - if (lld::MachOLinkingContext::parsePackedVersion(scalar, value)) - return "malformed version number"; - result = value; - // Return the empty string on success, - return StringRef(); - } - static QuotingType mustQuote(StringRef) { return QuotingType::None; } -}; - -template <> -struct MappingTraits { - static void mapping(IO &io, NormalizedFile &file) { - io.mapRequired("arch", file.arch); - io.mapRequired("file-type", file.fileType); - io.mapOptional("flags", file.flags); - io.mapOptional("dependents", file.dependentDylibs); - io.mapOptional("install-name", file.installName, StringRef()); - io.mapOptional("compat-version", file.compatVersion, PackedVersion(0x10000)); - io.mapOptional("current-version", file.currentVersion, PackedVersion(0x10000)); - io.mapOptional("has-UUID", file.hasUUID, true); - io.mapOptional("rpaths", file.rpaths); - io.mapOptional("entry-point", file.entryAddress, Hex64(0)); - io.mapOptional("stack-size", file.stackSize, Hex64(0)); - io.mapOptional("source-version", file.sourceVersion, Hex64(0)); - io.mapOptional("OS", file.os); - io.mapOptional("min-os-version", file.minOSverson, PackedVersion(0)); - io.mapOptional("min-os-version-kind", file.minOSVersionKind, (LoadCommandType)0); - io.mapOptional("sdk-version", file.sdkVersion, PackedVersion(0)); - io.mapOptional("segments", file.segments); - io.mapOptional("sections", file.sections); - io.mapOptional("local-symbols", file.localSymbols); - io.mapOptional("global-symbols", file.globalSymbols); - io.mapOptional("undefined-symbols",file.undefinedSymbols); - io.mapOptional("page-size", file.pageSize, Hex32(4096)); - io.mapOptional("rebasings", file.rebasingInfo); - io.mapOptional("bindings", file.bindingInfo); - io.mapOptional("weak-bindings", file.weakBindingInfo); - io.mapOptional("lazy-bindings", file.lazyBindingInfo); - io.mapOptional("exports", file.exportInfo); - io.mapOptional("dataInCode", file.dataInCode); - } - static std::string validate(IO &io, NormalizedFile &file) { return {}; } -}; - -} // namespace llvm -} // namespace yaml - - -namespace lld { -namespace mach_o { - -/// Handles !mach-o tagged yaml documents. -bool MachOYamlIOTaggedDocumentHandler::handledDocTag(llvm::yaml::IO &io, - const lld::File *&file) const { - if (!io.mapTag("!mach-o")) - return false; - // Step 1: parse yaml into normalized mach-o struct. - NormalizedFile nf; - YamlContext *info = reinterpret_cast(io.getContext()); - assert(info != nullptr); - assert(info->_normalizeMachOFile == nullptr); - info->_normalizeMachOFile = &nf; - MappingTraits::mapping(io, nf); - // Step 2: parse normalized mach-o struct into atoms. - auto fileOrError = normalizedToAtoms(nf, info->_path, true); - - // Check that we parsed successfully. - if (!fileOrError) { - std::string buffer; - llvm::raw_string_ostream stream(buffer); - handleAllErrors(fileOrError.takeError(), - [&](const llvm::ErrorInfoBase &EI) { - EI.log(stream); - stream << "\n"; - }); - io.setError(stream.str()); - return false; - } - - if (nf.arch != _arch) { - io.setError(Twine("file is wrong architecture. Expected (" - + MachOLinkingContext::nameFromArch(_arch) - + ") found (" - + MachOLinkingContext::nameFromArch(nf.arch) - + ")")); - return false; - } - info->_normalizeMachOFile = nullptr; - file = fileOrError->release(); - return true; -} - - - -namespace normalized { - -/// Parses a yaml encoded mach-o file to produce an in-memory normalized view. -llvm::Expected> -readYaml(std::unique_ptr &mb) { - // Make empty NormalizedFile. - std::unique_ptr f(new NormalizedFile()); - - // Create YAML Input parser. - YamlContext yamlContext; - yamlContext._normalizeMachOFile = f.get(); - llvm::yaml::Input yin(mb->getBuffer(), &yamlContext); - - // Fill NormalizedFile by parsing yaml. - yin >> *f; - - // Return error if there were parsing problems. - if (auto ec = yin.error()) - return llvm::make_error(Twine("YAML parsing error: ") - + ec.message()); - - // Hand ownership of instantiated NormalizedFile to caller. - return std::move(f); -} - - -/// Writes a yaml encoded mach-o files from an in-memory normalized view. -std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out) { - // YAML I/O is not const aware, so need to cast away ;-( - NormalizedFile *f = const_cast(&file); - - // Create yaml Output writer, using yaml options for context. - YamlContext yamlContext; - yamlContext._normalizeMachOFile = f; - llvm::yaml::Output yout(out, &yamlContext); - - // Stream out yaml. - yout << *f; - - return std::error_code(); -} - -} // namespace normalized -} // namespace mach_o -} // namespace lld diff --git a/lld/lib/ReaderWriter/MachO/MachOPasses.h b/lld/lib/ReaderWriter/MachO/MachOPasses.h deleted file mode 100644 index 93cd3e4df281..000000000000 --- a/lld/lib/ReaderWriter/MachO/MachOPasses.h +++ /dev/null @@ -1,29 +0,0 @@ -//===- lib/ReaderWriter/MachO/MachOPasses.h -------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_READER_WRITER_MACHO_PASSES_H -#define LLD_READER_WRITER_MACHO_PASSES_H - -#include "lld/Core/PassManager.h" -#include "lld/ReaderWriter/MachOLinkingContext.h" - -namespace lld { -namespace mach_o { - -void addLayoutPass(PassManager &pm, const MachOLinkingContext &ctx); -void addStubsPass(PassManager &pm, const MachOLinkingContext &ctx); -void addGOTPass(PassManager &pm, const MachOLinkingContext &ctx); -void addTLVPass(PassManager &pm, const MachOLinkingContext &ctx); -void addCompactUnwindPass(PassManager &pm, const MachOLinkingContext &ctx); -void addObjCPass(PassManager &pm, const MachOLinkingContext &ctx); -void addShimPass(PassManager &pm, const MachOLinkingContext &ctx); - -} // namespace mach_o -} // namespace lld - -#endif // LLD_READER_WRITER_MACHO_PASSES_H diff --git a/lld/lib/ReaderWriter/MachO/ObjCPass.cpp b/lld/lib/ReaderWriter/MachO/ObjCPass.cpp deleted file mode 100644 index 02a95b5aa0c0..000000000000 --- a/lld/lib/ReaderWriter/MachO/ObjCPass.cpp +++ /dev/null @@ -1,131 +0,0 @@ -//===- lib/ReaderWriter/MachO/ObjCPass.cpp -------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -//===----------------------------------------------------------------------===// - -#include "ArchHandler.h" -#include "File.h" -#include "MachONormalizedFileBinaryUtils.h" -#include "MachOPasses.h" -#include "lld/Common/LLVM.h" -#include "lld/Core/DefinedAtom.h" -#include "lld/Core/File.h" -#include "lld/Core/Reference.h" -#include "lld/Core/Simple.h" -#include "lld/ReaderWriter/MachOLinkingContext.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" - -namespace lld { -namespace mach_o { - -/// -/// ObjC Image Info Atom created by the ObjC pass. -/// -class ObjCImageInfoAtom : public SimpleDefinedAtom { -public: - ObjCImageInfoAtom(const File &file, bool isBig, - MachOLinkingContext::ObjCConstraint objCConstraint, - uint32_t swiftVersion) - : SimpleDefinedAtom(file) { - - Data.info.version = 0; - - switch (objCConstraint) { - case MachOLinkingContext::objc_unknown: - llvm_unreachable("Shouldn't run the objc pass without a constraint"); - case MachOLinkingContext::objc_supports_gc: - case MachOLinkingContext::objc_gc_only: - llvm_unreachable("GC is not supported"); - case MachOLinkingContext::objc_retainReleaseForSimulator: - // The retain/release for simulator flag is already the correct - // encoded value for the data so just set it here. - Data.info.flags = (uint32_t)objCConstraint; - break; - case MachOLinkingContext::objc_retainRelease: - // We don't need to encode this flag, so just leave the flags as 0. - Data.info.flags = 0; - break; - } - - Data.info.flags |= (swiftVersion << 8); - - normalized::write32(Data.bytes + 4, Data.info.flags, isBig); - } - - ~ObjCImageInfoAtom() override = default; - - ContentType contentType() const override { - return DefinedAtom::typeObjCImageInfo; - } - - Alignment alignment() const override { - return 4; - } - - uint64_t size() const override { - return 8; - } - - ContentPermissions permissions() const override { - return DefinedAtom::permR__; - } - - ArrayRef rawContent() const override { - return llvm::makeArrayRef(Data.bytes, size()); - } - -private: - - struct objc_image_info { - uint32_t version; - uint32_t flags; - }; - - union { - objc_image_info info; - uint8_t bytes[8]; - } Data; -}; - -class ObjCPass : public Pass { -public: - ObjCPass(const MachOLinkingContext &context) - : _ctx(context), - _file(*_ctx.make_file("")) { - _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); - } - - llvm::Error perform(SimpleFile &mergedFile) override { - // Add the image info. - mergedFile.addAtom(*getImageInfo()); - - return llvm::Error::success(); - } - -private: - - const DefinedAtom* getImageInfo() { - bool IsBig = MachOLinkingContext::isBigEndian(_ctx.arch()); - return new (_file.allocator()) ObjCImageInfoAtom(_file, IsBig, - _ctx.objcConstraint(), - _ctx.swiftVersion()); - } - - const MachOLinkingContext &_ctx; - MachOFile &_file; -}; - - - -void addObjCPass(PassManager &pm, const MachOLinkingContext &ctx) { - pm.add(std::make_unique(ctx)); -} - -} // end namespace mach_o -} // end namespace lld diff --git a/lld/lib/ReaderWriter/MachO/SectCreateFile.h b/lld/lib/ReaderWriter/MachO/SectCreateFile.h deleted file mode 100644 index 7bb98e16695c..000000000000 --- a/lld/lib/ReaderWriter/MachO/SectCreateFile.h +++ /dev/null @@ -1,101 +0,0 @@ -//===---- lib/ReaderWriter/MachO/SectCreateFile.h ---------------*- c++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_READER_WRITER_MACHO_SECTCREATE_FILE_H -#define LLD_READER_WRITER_MACHO_SECTCREATE_FILE_H - -#include "lld/Core/DefinedAtom.h" -#include "lld/Core/Simple.h" -#include "lld/ReaderWriter/MachOLinkingContext.h" - -namespace lld { -namespace mach_o { - -// -// A FlateNamespaceFile instance may be added as a resolution source of last -// resort, depending on how -flat_namespace and -undefined are set. -// -class SectCreateFile : public File { -public: - class SectCreateAtom : public SimpleDefinedAtom { - public: - SectCreateAtom(const File &file, StringRef segName, StringRef sectName, - std::unique_ptr content) - : SimpleDefinedAtom(file), - _combinedName((segName + "/" + sectName).str()), - _content(std::move(content)) {} - - ~SectCreateAtom() override = default; - - uint64_t size() const override { return _content->getBufferSize(); } - - Scope scope() const override { return scopeGlobal; } - - ContentType contentType() const override { return typeSectCreate; } - - SectionChoice sectionChoice() const override { return sectionCustomRequired; } - - StringRef customSectionName() const override { return _combinedName; } - - DeadStripKind deadStrip() const override { return deadStripNever; } - - ArrayRef rawContent() const override { - const uint8_t *data = - reinterpret_cast(_content->getBufferStart()); - return ArrayRef(data, _content->getBufferSize()); - } - - StringRef segmentName() const { return _segName; } - StringRef sectionName() const { return _sectName; } - - private: - std::string _combinedName; - StringRef _segName; - StringRef _sectName; - std::unique_ptr _content; - }; - - SectCreateFile() : File("sectcreate", kindSectCreateObject) {} - - void addSection(StringRef seg, StringRef sect, - std::unique_ptr content) { - _definedAtoms.push_back( - new (allocator()) SectCreateAtom(*this, seg, sect, std::move(content))); - } - - const AtomRange defined() const override { - return _definedAtoms; - } - - const AtomRange undefined() const override { - return _noUndefinedAtoms; - } - - const AtomRange sharedLibrary() const override { - return _noSharedLibraryAtoms; - } - - const AtomRange absolute() const override { - return _noAbsoluteAtoms; - } - - void clearAtoms() override { - _definedAtoms.clear(); - _noUndefinedAtoms.clear(); - _noSharedLibraryAtoms.clear(); - _noAbsoluteAtoms.clear(); - } - -private: - AtomVector _definedAtoms; -}; - -} // namespace mach_o -} // namespace lld - -#endif // LLD_READER_WRITER_MACHO_SECTCREATE_FILE_H diff --git a/lld/lib/ReaderWriter/MachO/ShimPass.cpp b/lld/lib/ReaderWriter/MachO/ShimPass.cpp deleted file mode 100644 index a5b34cfe8de6..000000000000 --- a/lld/lib/ReaderWriter/MachO/ShimPass.cpp +++ /dev/null @@ -1,128 +0,0 @@ -//===- lib/ReaderWriter/MachO/ShimPass.cpp -------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This linker pass updates branch-sites whose target is a different mode -// (thumb vs arm). -// -// Arm code has two instruction encodings thumb and arm. When branching from -// one code encoding to another, you need to use an instruction that switches -// the instruction mode. Usually the transition only happens at call sites, and -// the linker can transform a BL instruction in BLX (or vice versa). But if the -// compiler did a tail call optimization and a function ends with a branch (not -// branch and link), there is no pc-rel BX instruction. -// -// The ShimPass looks for pc-rel B instructions that will need to switch mode. -// For those cases it synthesizes a shim which does the transition, then -// modifies the original atom with the B instruction to target to the shim atom. -// -//===----------------------------------------------------------------------===// - -#include "ArchHandler.h" -#include "File.h" -#include "MachOPasses.h" -#include "lld/Common/LLVM.h" -#include "lld/Core/DefinedAtom.h" -#include "lld/Core/File.h" -#include "lld/Core/Reference.h" -#include "lld/Core/Simple.h" -#include "lld/ReaderWriter/MachOLinkingContext.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" - -namespace lld { -namespace mach_o { - -class ShimPass : public Pass { -public: - ShimPass(const MachOLinkingContext &context) - : _ctx(context), _archHandler(_ctx.archHandler()), - _stubInfo(_archHandler.stubInfo()), - _file(*_ctx.make_file("")) { - _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); - } - - llvm::Error perform(SimpleFile &mergedFile) override { - // Scan all references in all atoms. - for (const DefinedAtom *atom : mergedFile.defined()) { - for (const Reference *ref : *atom) { - // Look at non-call branches. - if (!_archHandler.isNonCallBranch(*ref)) - continue; - const Atom *target = ref->target(); - assert(target != nullptr); - if (const lld::DefinedAtom *daTarget = dyn_cast(target)) { - bool atomIsThumb = _archHandler.isThumbFunction(*atom); - bool targetIsThumb = _archHandler.isThumbFunction(*daTarget); - if (atomIsThumb != targetIsThumb) - updateBranchToUseShim(atomIsThumb, *daTarget, ref); - } - } - } - // Exit early if no shims needed. - if (_targetToShim.empty()) - return llvm::Error::success(); - - // Sort shim atoms so the layout order is stable. - std::vector shims; - shims.reserve(_targetToShim.size()); - for (auto element : _targetToShim) { - shims.push_back(element.second); - } - std::sort(shims.begin(), shims.end(), - [](const DefinedAtom *l, const DefinedAtom *r) { - return (l->name() < r->name()); - }); - - // Add all shims to master file. - for (const DefinedAtom *shim : shims) - mergedFile.addAtom(*shim); - - return llvm::Error::success(); - } - -private: - - void updateBranchToUseShim(bool thumbToArm, const DefinedAtom& target, - const Reference *ref) { - // Make file-format specific stub and other support atoms. - const DefinedAtom *shim = this->getShim(thumbToArm, target); - assert(shim != nullptr); - // Switch branch site to target shim atom. - const_cast(ref)->setTarget(shim); - } - - const DefinedAtom* getShim(bool thumbToArm, const DefinedAtom& target) { - auto pos = _targetToShim.find(&target); - if ( pos != _targetToShim.end() ) { - // Reuse an existing shim. - assert(pos->second != nullptr); - return pos->second; - } else { - // There is no existing shim, so create a new one. - const DefinedAtom *shim = _archHandler.createShim(_file, thumbToArm, - target); - _targetToShim[&target] = shim; - return shim; - } - } - - const MachOLinkingContext &_ctx; - mach_o::ArchHandler &_archHandler; - const ArchHandler::StubInfo &_stubInfo; - MachOFile &_file; - llvm::DenseMap _targetToShim; -}; - - - -void addShimPass(PassManager &pm, const MachOLinkingContext &ctx) { - pm.add(std::make_unique(ctx)); -} - -} // end namespace mach_o -} // end namespace lld diff --git a/lld/lib/ReaderWriter/MachO/StubsPass.cpp b/lld/lib/ReaderWriter/MachO/StubsPass.cpp deleted file mode 100644 index fbbd8b2c7584..000000000000 --- a/lld/lib/ReaderWriter/MachO/StubsPass.cpp +++ /dev/null @@ -1,377 +0,0 @@ -//===- lib/ReaderWriter/MachO/StubsPass.cpp ---------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This linker pass updates call-sites which have references to shared library -// atoms to instead have a reference to a stub (PLT entry) for the specified -// symbol. Each file format defines a subclass of StubsPass which implements -// the abstract methods for creating the file format specific StubAtoms. -// -//===----------------------------------------------------------------------===// - -#include "ArchHandler.h" -#include "File.h" -#include "MachOPasses.h" -#include "lld/Common/LLVM.h" -#include "lld/Core/DefinedAtom.h" -#include "lld/Core/File.h" -#include "lld/Core/Reference.h" -#include "lld/Core/Simple.h" -#include "lld/ReaderWriter/MachOLinkingContext.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" - -namespace lld { -namespace mach_o { - -// -// Lazy Pointer Atom created by the stubs pass. -// -class LazyPointerAtom : public SimpleDefinedAtom { -public: - LazyPointerAtom(const File &file, bool is64) - : SimpleDefinedAtom(file), _is64(is64) { } - - ~LazyPointerAtom() override = default; - - ContentType contentType() const override { - return DefinedAtom::typeLazyPointer; - } - - Alignment alignment() const override { - return _is64 ? 8 : 4; - } - - uint64_t size() const override { - return _is64 ? 8 : 4; - } - - ContentPermissions permissions() const override { - return DefinedAtom::permRW_; - } - - ArrayRef rawContent() const override { - static const uint8_t zeros[] = - { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; - return llvm::makeArrayRef(zeros, size()); - } - -private: - const bool _is64; -}; - -// -// NonLazyPointer (GOT) Atom created by the stubs pass. -// -class NonLazyPointerAtom : public SimpleDefinedAtom { -public: - NonLazyPointerAtom(const File &file, bool is64, ContentType contentType) - : SimpleDefinedAtom(file), _is64(is64), _contentType(contentType) { } - - ~NonLazyPointerAtom() override = default; - - ContentType contentType() const override { - return _contentType; - } - - Alignment alignment() const override { - return _is64 ? 8 : 4; - } - - uint64_t size() const override { - return _is64 ? 8 : 4; - } - - ContentPermissions permissions() const override { - return DefinedAtom::permRW_; - } - - ArrayRef rawContent() const override { - static const uint8_t zeros[] = - { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; - return llvm::makeArrayRef(zeros, size()); - } - -private: - const bool _is64; - const ContentType _contentType; -}; - -// -// Stub Atom created by the stubs pass. -// -class StubAtom : public SimpleDefinedAtom { -public: - StubAtom(const File &file, const ArchHandler::StubInfo &stubInfo) - : SimpleDefinedAtom(file), _stubInfo(stubInfo){ } - - ~StubAtom() override = default; - - ContentType contentType() const override { - return DefinedAtom::typeStub; - } - - Alignment alignment() const override { - return 1 << _stubInfo.codeAlignment; - } - - uint64_t size() const override { - return _stubInfo.stubSize; - } - - ContentPermissions permissions() const override { - return DefinedAtom::permR_X; - } - - ArrayRef rawContent() const override { - return llvm::makeArrayRef(_stubInfo.stubBytes, _stubInfo.stubSize); - } - -private: - const ArchHandler::StubInfo &_stubInfo; -}; - -// -// Stub Helper Atom created by the stubs pass. -// -class StubHelperAtom : public SimpleDefinedAtom { -public: - StubHelperAtom(const File &file, const ArchHandler::StubInfo &stubInfo) - : SimpleDefinedAtom(file), _stubInfo(stubInfo) { } - - ~StubHelperAtom() override = default; - - ContentType contentType() const override { - return DefinedAtom::typeStubHelper; - } - - Alignment alignment() const override { - return 1 << _stubInfo.codeAlignment; - } - - uint64_t size() const override { - return _stubInfo.stubHelperSize; - } - - ContentPermissions permissions() const override { - return DefinedAtom::permR_X; - } - - ArrayRef rawContent() const override { - return llvm::makeArrayRef(_stubInfo.stubHelperBytes, - _stubInfo.stubHelperSize); - } - -private: - const ArchHandler::StubInfo &_stubInfo; -}; - -// -// Stub Helper Common Atom created by the stubs pass. -// -class StubHelperCommonAtom : public SimpleDefinedAtom { -public: - StubHelperCommonAtom(const File &file, const ArchHandler::StubInfo &stubInfo) - : SimpleDefinedAtom(file), _stubInfo(stubInfo) { } - - ~StubHelperCommonAtom() override = default; - - ContentType contentType() const override { - return DefinedAtom::typeStubHelper; - } - - Alignment alignment() const override { - return 1 << _stubInfo.stubHelperCommonAlignment; - } - - uint64_t size() const override { - return _stubInfo.stubHelperCommonSize; - } - - ContentPermissions permissions() const override { - return DefinedAtom::permR_X; - } - - ArrayRef rawContent() const override { - return llvm::makeArrayRef(_stubInfo.stubHelperCommonBytes, - _stubInfo.stubHelperCommonSize); - } - -private: - const ArchHandler::StubInfo &_stubInfo; -}; - -class StubsPass : public Pass { -public: - StubsPass(const MachOLinkingContext &context) - : _ctx(context), _archHandler(_ctx.archHandler()), - _stubInfo(_archHandler.stubInfo()), - _file(*_ctx.make_file("")) { - _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); - } - - llvm::Error perform(SimpleFile &mergedFile) override { - // Skip this pass if output format uses text relocations instead of stubs. - if (!this->noTextRelocs()) - return llvm::Error::success(); - - // Scan all references in all atoms. - for (const DefinedAtom *atom : mergedFile.defined()) { - for (const Reference *ref : *atom) { - // Look at call-sites. - if (!this->isCallSite(*ref)) - continue; - const Atom *target = ref->target(); - assert(target != nullptr); - if (isa(target)) { - // Calls to shared libraries go through stubs. - _targetToUses[target].push_back(ref); - continue; - } - const DefinedAtom *defTarget = dyn_cast(target); - if (defTarget && defTarget->interposable() != DefinedAtom::interposeNo){ - // Calls to interposable functions in same linkage unit must also go - // through a stub. - assert(defTarget->scope() != DefinedAtom::scopeTranslationUnit); - _targetToUses[target].push_back(ref); - } - } - } - - // Exit early if no stubs needed. - if (_targetToUses.empty()) - return llvm::Error::success(); - - // First add help-common and GOT slots used by lazy binding. - SimpleDefinedAtom *helperCommonAtom = - new (_file.allocator()) StubHelperCommonAtom(_file, _stubInfo); - SimpleDefinedAtom *helperCacheNLPAtom = - new (_file.allocator()) NonLazyPointerAtom(_file, _ctx.is64Bit(), - _stubInfo.stubHelperImageCacheContentType); - SimpleDefinedAtom *helperBinderNLPAtom = - new (_file.allocator()) NonLazyPointerAtom(_file, _ctx.is64Bit(), - _stubInfo.stubHelperImageCacheContentType); - addReference(helperCommonAtom, _stubInfo.stubHelperCommonReferenceToCache, - helperCacheNLPAtom); - addOptReference( - helperCommonAtom, _stubInfo.stubHelperCommonReferenceToCache, - _stubInfo.optStubHelperCommonReferenceToCache, helperCacheNLPAtom); - addReference(helperCommonAtom, _stubInfo.stubHelperCommonReferenceToBinder, - helperBinderNLPAtom); - addOptReference( - helperCommonAtom, _stubInfo.stubHelperCommonReferenceToBinder, - _stubInfo.optStubHelperCommonReferenceToBinder, helperBinderNLPAtom); - mergedFile.addAtom(*helperCommonAtom); - mergedFile.addAtom(*helperBinderNLPAtom); - mergedFile.addAtom(*helperCacheNLPAtom); - - // Add reference to dyld_stub_binder in libSystem.dylib - auto I = llvm::find_if( - mergedFile.sharedLibrary(), [&](const SharedLibraryAtom *atom) { - return atom->name().equals(_stubInfo.binderSymbolName); - }); - assert(I != mergedFile.sharedLibrary().end() && - "dyld_stub_binder not found"); - addReference(helperBinderNLPAtom, _stubInfo.nonLazyPointerReferenceToBinder, *I); - - // Sort targets by name, so stubs and lazy pointers are consistent - std::vector targetsNeedingStubs; - for (auto it : _targetToUses) - targetsNeedingStubs.push_back(it.first); - std::sort(targetsNeedingStubs.begin(), targetsNeedingStubs.end(), - [](const Atom * left, const Atom * right) { - return (left->name().compare(right->name()) < 0); - }); - - // Make and append stubs, lazy pointers, and helpers in alphabetical order. - unsigned lazyOffset = 0; - for (const Atom *target : targetsNeedingStubs) { - auto *stub = new (_file.allocator()) StubAtom(_file, _stubInfo); - auto *lp = - new (_file.allocator()) LazyPointerAtom(_file, _ctx.is64Bit()); - auto *helper = new (_file.allocator()) StubHelperAtom(_file, _stubInfo); - - addReference(stub, _stubInfo.stubReferenceToLP, lp); - addOptReference(stub, _stubInfo.stubReferenceToLP, - _stubInfo.optStubReferenceToLP, lp); - addReference(lp, _stubInfo.lazyPointerReferenceToHelper, helper); - addReference(lp, _stubInfo.lazyPointerReferenceToFinal, target); - addReference(helper, _stubInfo.stubHelperReferenceToImm, helper); - addReferenceAddend(helper, _stubInfo.stubHelperReferenceToImm, helper, - lazyOffset); - addReference(helper, _stubInfo.stubHelperReferenceToHelperCommon, - helperCommonAtom); - - mergedFile.addAtom(*stub); - mergedFile.addAtom(*lp); - mergedFile.addAtom(*helper); - - // Update each reference to use stub. - for (const Reference *ref : _targetToUses[target]) { - assert(ref->target() == target); - // Switch call site to reference stub atom instead. - const_cast(ref)->setTarget(stub); - } - - // Calculate new offset - lazyOffset += target->name().size() + 12; - } - - return llvm::Error::success(); - } - -private: - bool noTextRelocs() { - return true; - } - - bool isCallSite(const Reference &ref) { - return _archHandler.isCallSite(ref); - } - - void addReference(SimpleDefinedAtom* atom, - const ArchHandler::ReferenceInfo &refInfo, - const lld::Atom* target) { - atom->addReference(Reference::KindNamespace::mach_o, - refInfo.arch, refInfo.kind, refInfo.offset, - target, refInfo.addend); - } - - void addReferenceAddend(SimpleDefinedAtom *atom, - const ArchHandler::ReferenceInfo &refInfo, - const lld::Atom *target, uint64_t addend) { - atom->addReference(Reference::KindNamespace::mach_o, refInfo.arch, - refInfo.kind, refInfo.offset, target, addend); - } - - void addOptReference(SimpleDefinedAtom* atom, - const ArchHandler::ReferenceInfo &refInfo, - const ArchHandler::OptionalRefInfo &optRef, - const lld::Atom* target) { - if (!optRef.used) - return; - atom->addReference(Reference::KindNamespace::mach_o, - refInfo.arch, optRef.kind, optRef.offset, - target, optRef.addend); - } - - typedef llvm::DenseMap> TargetToUses; - - const MachOLinkingContext &_ctx; - mach_o::ArchHandler &_archHandler; - const ArchHandler::StubInfo &_stubInfo; - MachOFile &_file; - TargetToUses _targetToUses; -}; - -void addStubsPass(PassManager &pm, const MachOLinkingContext &ctx) { - pm.add(std::unique_ptr(new StubsPass(ctx))); -} - -} // end namespace mach_o -} // end namespace lld diff --git a/lld/lib/ReaderWriter/MachO/TLVPass.cpp b/lld/lib/ReaderWriter/MachO/TLVPass.cpp deleted file mode 100644 index e0a031cfb07b..000000000000 --- a/lld/lib/ReaderWriter/MachO/TLVPass.cpp +++ /dev/null @@ -1,140 +0,0 @@ -//===- lib/ReaderWriter/MachO/TLVPass.cpp -----------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This linker pass transforms all TLV references to real references. -/// -//===----------------------------------------------------------------------===// - -#include "ArchHandler.h" -#include "File.h" -#include "MachOPasses.h" -#include "lld/Core/Simple.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" - -namespace lld { -namespace mach_o { - -// -// TLVP Entry Atom created by the TLV pass. -// -class TLVPEntryAtom : public SimpleDefinedAtom { -public: - TLVPEntryAtom(const File &file, bool is64, StringRef name) - : SimpleDefinedAtom(file), _is64(is64), _name(name) {} - - ~TLVPEntryAtom() override = default; - - ContentType contentType() const override { - return DefinedAtom::typeTLVInitializerPtr; - } - - Alignment alignment() const override { - return _is64 ? 8 : 4; - } - - uint64_t size() const override { - return _is64 ? 8 : 4; - } - - ContentPermissions permissions() const override { - return DefinedAtom::permRW_; - } - - ArrayRef rawContent() const override { - static const uint8_t zeros[] = - { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; - return llvm::makeArrayRef(zeros, size()); - } - - StringRef slotName() const { - return _name; - } - -private: - const bool _is64; - StringRef _name; -}; - -class TLVPass : public Pass { -public: - TLVPass(const MachOLinkingContext &context) - : _ctx(context), _archHandler(_ctx.archHandler()), - _file(*_ctx.make_file("")) { - _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); - } - -private: - llvm::Error perform(SimpleFile &mergedFile) override { - bool allowTLV = _ctx.minOS("10.7", "1.0"); - - for (const DefinedAtom *atom : mergedFile.defined()) { - for (const Reference *ref : *atom) { - if (!_archHandler.isTLVAccess(*ref)) - continue; - - if (!allowTLV) - return llvm::make_error( - "targeted OS version does not support use of thread local " - "variables in " + atom->name() + " for architecture " + - _ctx.archName()); - - const Atom *target = ref->target(); - assert(target != nullptr); - - const DefinedAtom *tlvpEntry = makeTLVPEntry(target); - const_cast(ref)->setTarget(tlvpEntry); - _archHandler.updateReferenceToTLV(ref); - } - } - - std::vector entries; - entries.reserve(_targetToTLVP.size()); - for (auto &it : _targetToTLVP) - entries.push_back(it.second); - std::sort(entries.begin(), entries.end(), - [](const TLVPEntryAtom *lhs, const TLVPEntryAtom *rhs) { - return (lhs->slotName().compare(rhs->slotName()) < 0); - }); - - for (const TLVPEntryAtom *slot : entries) - mergedFile.addAtom(*slot); - - return llvm::Error::success(); - } - - const DefinedAtom *makeTLVPEntry(const Atom *target) { - auto pos = _targetToTLVP.find(target); - - if (pos != _targetToTLVP.end()) - return pos->second; - - auto *tlvpEntry = new (_file.allocator()) - TLVPEntryAtom(_file, _ctx.is64Bit(), target->name()); - _targetToTLVP[target] = tlvpEntry; - const ArchHandler::ReferenceInfo &nlInfo = - _archHandler.stubInfo().nonLazyPointerReferenceToBinder; - tlvpEntry->addReference(Reference::KindNamespace::mach_o, nlInfo.arch, - nlInfo.kind, 0, target, 0); - return tlvpEntry; - } - - const MachOLinkingContext &_ctx; - mach_o::ArchHandler &_archHandler; - MachOFile &_file; - llvm::DenseMap _targetToTLVP; -}; - -void addTLVPass(PassManager &pm, const MachOLinkingContext &ctx) { - assert(ctx.needsTLVPass()); - pm.add(std::make_unique(ctx)); -} - -} // end namespace mach_o -} // end namespace lld diff --git a/lld/lib/ReaderWriter/MachO/WriterMachO.cpp b/lld/lib/ReaderWriter/MachO/WriterMachO.cpp deleted file mode 100644 index 60e0e9dd9a81..000000000000 --- a/lld/lib/ReaderWriter/MachO/WriterMachO.cpp +++ /dev/null @@ -1,70 +0,0 @@ -//===- lib/ReaderWriter/MachO/WriterMachO.cpp -----------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "ExecutableAtoms.h" -#include "MachONormalizedFile.h" -#include "lld/Core/File.h" -#include "lld/Core/Writer.h" -#include "lld/ReaderWriter/MachOLinkingContext.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileOutputBuffer.h" -#include "llvm/Support/raw_ostream.h" -#include - -using lld::mach_o::normalized::NormalizedFile; - -namespace lld { -namespace mach_o { - -class MachOWriter : public Writer { -public: - MachOWriter(const MachOLinkingContext &ctxt) : _ctx(ctxt) {} - - llvm::Error writeFile(const lld::File &file, StringRef path) override { - // Construct empty normalized file from atoms. - llvm::Expected> nFile = - normalized::normalizedFromAtoms(file, _ctx); - if (auto ec = nFile.takeError()) - return ec; - - // For testing, write out yaml form of normalized file. - if (_ctx.printAtoms()) { - std::unique_ptr yamlWriter = createWriterYAML(_ctx); - if (auto ec = yamlWriter->writeFile(file, "-")) - return ec; - } - - // Write normalized file as mach-o binary. - return writeBinary(*nFile->get(), path); - } - - void createImplicitFiles(std::vector> &r) override { - // When building main executables, add _main as required entry point. - if (_ctx.outputTypeHasEntry()) - r.emplace_back(new CEntryFile(_ctx)); - // If this can link with dylibs, need helper function (dyld_stub_binder). - if (_ctx.needsStubsPass()) - r.emplace_back(new StubHelperFile(_ctx)); - // Final linked images can access a symbol for their mach_header. - if (_ctx.outputMachOType() != llvm::MachO::MH_OBJECT) - r.emplace_back(new MachHeaderAliasFile(_ctx)); - } -private: - const MachOLinkingContext &_ctx; - }; - - -} // namespace mach_o - -std::unique_ptr createWriterMachO(const MachOLinkingContext &context) { - return std::unique_ptr(new lld::mach_o::MachOWriter(context)); -} - -} // namespace lld diff --git a/lld/lib/ReaderWriter/YAML/CMakeLists.txt b/lld/lib/ReaderWriter/YAML/CMakeLists.txt deleted file mode 100644 index 0e63574a63d2..000000000000 --- a/lld/lib/ReaderWriter/YAML/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -add_lld_library(lldYAML - ReaderWriterYAML.cpp - - LINK_COMPONENTS - Support - - LINK_LIBS - lldCore - ) diff --git a/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp b/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp deleted file mode 100644 index c0e6e0334fa6..000000000000 --- a/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp +++ /dev/null @@ -1,1403 +0,0 @@ -//===- lib/ReaderWriter/YAML/ReaderWriterYAML.cpp -------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "lld/Core/AbsoluteAtom.h" -#include "lld/Core/ArchiveLibraryFile.h" -#include "lld/Core/Atom.h" -#include "lld/Core/DefinedAtom.h" -#include "lld/Core/Error.h" -#include "lld/Core/File.h" -#include "lld/Core/LinkingContext.h" -#include "lld/Core/Reader.h" -#include "lld/Core/Reference.h" -#include "lld/Core/SharedLibraryAtom.h" -#include "lld/Core/Simple.h" -#include "lld/Core/UndefinedAtom.h" -#include "lld/Core/Writer.h" -#include "lld/ReaderWriter/YamlContext.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/BinaryFormat/Magic.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/ErrorOr.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/YAMLTraits.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include -#include -#include -#include - -using llvm::file_magic; -using llvm::yaml::MappingTraits; -using llvm::yaml::ScalarEnumerationTraits; -using llvm::yaml::ScalarTraits; -using llvm::yaml::IO; -using llvm::yaml::SequenceTraits; -using llvm::yaml::DocumentListTraits; - -using namespace lld; - -/// The conversion of Atoms to and from YAML uses LLVM's YAML I/O. This -/// file just defines template specializations on the lld types which control -/// how the mapping is done to and from YAML. - -namespace { - -/// Used when writing yaml files. -/// In most cases, atoms names are unambiguous, so references can just -/// use the atom name as the target (e.g. target: foo). But in a few -/// cases that does not work, so ref-names are added. These are labels -/// used only in yaml. The labels do not exist in the Atom model. -/// -/// One need for ref-names are when atoms have no user supplied name -/// (e.g. c-string literal). Another case is when two object files with -/// identically named static functions are merged (ld -r) into one object file. -/// In that case referencing the function by name is ambiguous, so a unique -/// ref-name is added. -class RefNameBuilder { -public: - RefNameBuilder(const lld::File &file) - : _collisionCount(0), _unnamedCounter(0) { - // visit all atoms - for (const lld::DefinedAtom *atom : file.defined()) { - // Build map of atoms names to detect duplicates - if (!atom->name().empty()) - buildDuplicateNameMap(*atom); - - // Find references to unnamed atoms and create ref-names for them. - for (const lld::Reference *ref : *atom) { - // create refname for any unnamed reference target - const lld::Atom *target = ref->target(); - if ((target != nullptr) && target->name().empty()) { - std::string storage; - llvm::raw_string_ostream buffer(storage); - buffer << llvm::format("L%03d", _unnamedCounter++); - StringRef newName = copyString(buffer.str()); - _refNames[target] = std::string(newName); - DEBUG_WITH_TYPE("WriterYAML", - llvm::dbgs() << "unnamed atom: creating ref-name: '" - << newName << "' (" - << (const void *)newName.data() << ", " - << newName.size() << ")\n"); - } - } - } - for (const lld::UndefinedAtom *undefAtom : file.undefined()) { - buildDuplicateNameMap(*undefAtom); - } - for (const lld::SharedLibraryAtom *shlibAtom : file.sharedLibrary()) { - buildDuplicateNameMap(*shlibAtom); - } - for (const lld::AbsoluteAtom *absAtom : file.absolute()) { - if (!absAtom->name().empty()) - buildDuplicateNameMap(*absAtom); - } - } - - void buildDuplicateNameMap(const lld::Atom &atom) { - assert(!atom.name().empty()); - NameToAtom::iterator pos = _nameMap.find(atom.name()); - if (pos != _nameMap.end()) { - // Found name collision, give each a unique ref-name. - std::string Storage; - llvm::raw_string_ostream buffer(Storage); - buffer << atom.name() << llvm::format(".%03d", ++_collisionCount); - StringRef newName = copyString(buffer.str()); - _refNames[&atom] = std::string(newName); - DEBUG_WITH_TYPE("WriterYAML", - llvm::dbgs() << "name collision: creating ref-name: '" - << newName << "' (" - << (const void *)newName.data() - << ", " << newName.size() << ")\n"); - const lld::Atom *prevAtom = pos->second; - AtomToRefName::iterator pos2 = _refNames.find(prevAtom); - if (pos2 == _refNames.end()) { - // Only create ref-name for previous if none already created. - std::string Storage2; - llvm::raw_string_ostream buffer2(Storage2); - buffer2 << prevAtom->name() << llvm::format(".%03d", ++_collisionCount); - StringRef newName2 = copyString(buffer2.str()); - _refNames[prevAtom] = std::string(newName2); - DEBUG_WITH_TYPE("WriterYAML", - llvm::dbgs() << "name collision: creating ref-name: '" - << newName2 << "' (" - << (const void *)newName2.data() << ", " - << newName2.size() << ")\n"); - } - } else { - // First time we've seen this name, just add it to map. - _nameMap[atom.name()] = &atom; - DEBUG_WITH_TYPE("WriterYAML", llvm::dbgs() - << "atom name seen for first time: '" - << atom.name() << "' (" - << (const void *)atom.name().data() - << ", " << atom.name().size() << ")\n"); - } - } - - bool hasRefName(const lld::Atom *atom) { return _refNames.count(atom); } - - StringRef refName(const lld::Atom *atom) { - return _refNames.find(atom)->second; - } - -private: - typedef llvm::StringMap NameToAtom; - typedef llvm::DenseMap AtomToRefName; - - // Allocate a new copy of this string in _storage, so the strings - // can be freed when RefNameBuilder is destroyed. - StringRef copyString(StringRef str) { - char *s = _storage.Allocate(str.size()); - memcpy(s, str.data(), str.size()); - return StringRef(s, str.size()); - } - - unsigned int _collisionCount; - unsigned int _unnamedCounter; - NameToAtom _nameMap; - AtomToRefName _refNames; - llvm::BumpPtrAllocator _storage; -}; - -/// Used when reading yaml files to find the target of a reference -/// that could be a name or ref-name. -class RefNameResolver { -public: - RefNameResolver(const lld::File *file, IO &io); - - const lld::Atom *lookup(StringRef name) const { - NameToAtom::const_iterator pos = _nameMap.find(name); - if (pos != _nameMap.end()) - return pos->second; - _io.setError(Twine("no such atom name: ") + name); - return nullptr; - } - -private: - typedef llvm::StringMap NameToAtom; - - void add(StringRef name, const lld::Atom *atom) { - if (_nameMap.count(name)) { - _io.setError(Twine("duplicate atom name: ") + name); - } else { - _nameMap[name] = atom; - } - } - - IO &_io; - NameToAtom _nameMap; -}; - -/// Mapping of Atoms. -template class AtomList { - using Ty = std::vector>; - -public: - typename Ty::iterator begin() { return _atoms.begin(); } - typename Ty::iterator end() { return _atoms.end(); } - Ty _atoms; -}; - -/// Mapping of kind: field in yaml files. -enum FileKinds { - fileKindObjectAtoms, // atom based object file encoded in yaml - fileKindArchive, // static archive library encoded in yaml - fileKindObjectMachO // mach-o object files encoded in yaml -}; - -struct ArchMember { - FileKinds _kind; - StringRef _name; - const lld::File *_content; -}; - -// The content bytes in a DefinedAtom are just uint8_t but we want -// special formatting, so define a strong type. -LLVM_YAML_STRONG_TYPEDEF(uint8_t, ImplicitHex8) - -// SharedLibraryAtoms have a bool canBeNull() method which we'd like to be -// more readable than just true/false. -LLVM_YAML_STRONG_TYPEDEF(bool, ShlibCanBeNull) - -// lld::Reference::Kind is a tuple of . -// For yaml, we just want one string that encapsulates the tuple. -struct RefKind { - Reference::KindNamespace ns; - Reference::KindArch arch; - Reference::KindValue value; -}; - -} // end anonymous namespace - -LLVM_YAML_IS_SEQUENCE_VECTOR(ArchMember) -LLVM_YAML_IS_SEQUENCE_VECTOR(const lld::Reference *) -// Always write DefinedAtoms content bytes as a flow sequence. -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(ImplicitHex8) - -// for compatibility with gcc-4.7 in C++11 mode, add extra namespace -namespace llvm { -namespace yaml { - -// This is a custom formatter for RefKind -template <> struct ScalarTraits { - static void output(const RefKind &kind, void *ctxt, raw_ostream &out) { - assert(ctxt != nullptr); - YamlContext *info = reinterpret_cast(ctxt); - assert(info->_registry); - StringRef str; - if (info->_registry->referenceKindToString(kind.ns, kind.arch, kind.value, - str)) - out << str; - else - out << (int)(kind.ns) << "-" << (int)(kind.arch) << "-" << kind.value; - } - - static StringRef input(StringRef scalar, void *ctxt, RefKind &kind) { - assert(ctxt != nullptr); - YamlContext *info = reinterpret_cast(ctxt); - assert(info->_registry); - if (info->_registry->referenceKindFromString(scalar, kind.ns, kind.arch, - kind.value)) - return StringRef(); - return StringRef("unknown reference kind"); - } - - static QuotingType mustQuote(StringRef) { return QuotingType::None; } -}; - -template <> struct ScalarEnumerationTraits { - static void enumeration(IO &io, lld::File::Kind &value) { - io.enumCase(value, "error-object", lld::File::kindErrorObject); - io.enumCase(value, "object", lld::File::kindMachObject); - io.enumCase(value, "shared-library", lld::File::kindSharedLibrary); - io.enumCase(value, "static-library", lld::File::kindArchiveLibrary); - } -}; - -template <> struct ScalarEnumerationTraits { - static void enumeration(IO &io, lld::Atom::Scope &value) { - io.enumCase(value, "global", lld::Atom::scopeGlobal); - io.enumCase(value, "hidden", lld::Atom::scopeLinkageUnit); - io.enumCase(value, "static", lld::Atom::scopeTranslationUnit); - } -}; - -template <> struct ScalarEnumerationTraits { - static void enumeration(IO &io, lld::DefinedAtom::SectionChoice &value) { - io.enumCase(value, "content", lld::DefinedAtom::sectionBasedOnContent); - io.enumCase(value, "custom", lld::DefinedAtom::sectionCustomPreferred); - io.enumCase(value, "custom-required", - lld::DefinedAtom::sectionCustomRequired); - } -}; - -template <> struct ScalarEnumerationTraits { - static void enumeration(IO &io, lld::DefinedAtom::Interposable &value) { - io.enumCase(value, "no", DefinedAtom::interposeNo); - io.enumCase(value, "yes", DefinedAtom::interposeYes); - io.enumCase(value, "yes-and-weak", DefinedAtom::interposeYesAndRuntimeWeak); - } -}; - -template <> struct ScalarEnumerationTraits { - static void enumeration(IO &io, lld::DefinedAtom::Merge &value) { - io.enumCase(value, "no", lld::DefinedAtom::mergeNo); - io.enumCase(value, "as-tentative", lld::DefinedAtom::mergeAsTentative); - io.enumCase(value, "as-weak", lld::DefinedAtom::mergeAsWeak); - io.enumCase(value, "as-addressed-weak", - lld::DefinedAtom::mergeAsWeakAndAddressUsed); - io.enumCase(value, "by-content", lld::DefinedAtom::mergeByContent); - io.enumCase(value, "same-name-and-size", - lld::DefinedAtom::mergeSameNameAndSize); - io.enumCase(value, "largest", lld::DefinedAtom::mergeByLargestSection); - } -}; - -template <> struct ScalarEnumerationTraits { - static void enumeration(IO &io, lld::DefinedAtom::DeadStripKind &value) { - io.enumCase(value, "normal", lld::DefinedAtom::deadStripNormal); - io.enumCase(value, "never", lld::DefinedAtom::deadStripNever); - io.enumCase(value, "always", lld::DefinedAtom::deadStripAlways); - } -}; - -template <> struct ScalarEnumerationTraits { - static void enumeration(IO &io, lld::DefinedAtom::DynamicExport &value) { - io.enumCase(value, "normal", lld::DefinedAtom::dynamicExportNormal); - io.enumCase(value, "always", lld::DefinedAtom::dynamicExportAlways); - } -}; - -template <> struct ScalarEnumerationTraits { - static void enumeration(IO &io, lld::DefinedAtom::CodeModel &value) { - io.enumCase(value, "none", lld::DefinedAtom::codeNA); - io.enumCase(value, "mips-pic", lld::DefinedAtom::codeMipsPIC); - io.enumCase(value, "mips-micro", lld::DefinedAtom::codeMipsMicro); - io.enumCase(value, "mips-micro-pic", lld::DefinedAtom::codeMipsMicroPIC); - io.enumCase(value, "mips-16", lld::DefinedAtom::codeMips16); - io.enumCase(value, "arm-thumb", lld::DefinedAtom::codeARMThumb); - io.enumCase(value, "arm-a", lld::DefinedAtom::codeARM_a); - io.enumCase(value, "arm-d", lld::DefinedAtom::codeARM_d); - io.enumCase(value, "arm-t", lld::DefinedAtom::codeARM_t); - } -}; - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &io, lld::DefinedAtom::ContentPermissions &value) { - io.enumCase(value, "---", lld::DefinedAtom::perm___); - io.enumCase(value, "r--", lld::DefinedAtom::permR__); - io.enumCase(value, "r-x", lld::DefinedAtom::permR_X); - io.enumCase(value, "rw-", lld::DefinedAtom::permRW_); - io.enumCase(value, "rwx", lld::DefinedAtom::permRWX); - io.enumCase(value, "rw-l", lld::DefinedAtom::permRW_L); - io.enumCase(value, "unknown", lld::DefinedAtom::permUnknown); - } -}; - -template <> struct ScalarEnumerationTraits { - static void enumeration(IO &io, lld::DefinedAtom::ContentType &value) { - io.enumCase(value, "unknown", DefinedAtom::typeUnknown); - io.enumCase(value, "code", DefinedAtom::typeCode); - io.enumCase(value, "stub", DefinedAtom::typeStub); - io.enumCase(value, "constant", DefinedAtom::typeConstant); - io.enumCase(value, "data", DefinedAtom::typeData); - io.enumCase(value, "quick-data", DefinedAtom::typeDataFast); - io.enumCase(value, "zero-fill", DefinedAtom::typeZeroFill); - io.enumCase(value, "zero-fill-quick", DefinedAtom::typeZeroFillFast); - io.enumCase(value, "const-data", DefinedAtom::typeConstData); - io.enumCase(value, "got", DefinedAtom::typeGOT); - io.enumCase(value, "resolver", DefinedAtom::typeResolver); - io.enumCase(value, "branch-island", DefinedAtom::typeBranchIsland); - io.enumCase(value, "branch-shim", DefinedAtom::typeBranchShim); - io.enumCase(value, "stub-helper", DefinedAtom::typeStubHelper); - io.enumCase(value, "c-string", DefinedAtom::typeCString); - io.enumCase(value, "utf16-string", DefinedAtom::typeUTF16String); - io.enumCase(value, "unwind-cfi", DefinedAtom::typeCFI); - io.enumCase(value, "unwind-lsda", DefinedAtom::typeLSDA); - io.enumCase(value, "const-4-byte", DefinedAtom::typeLiteral4); - io.enumCase(value, "const-8-byte", DefinedAtom::typeLiteral8); - io.enumCase(value, "const-16-byte", DefinedAtom::typeLiteral16); - io.enumCase(value, "lazy-pointer", DefinedAtom::typeLazyPointer); - io.enumCase(value, "lazy-dylib-pointer", - DefinedAtom::typeLazyDylibPointer); - io.enumCase(value, "cfstring", DefinedAtom::typeCFString); - io.enumCase(value, "initializer-pointer", - DefinedAtom::typeInitializerPtr); - io.enumCase(value, "terminator-pointer", - DefinedAtom::typeTerminatorPtr); - io.enumCase(value, "c-string-pointer",DefinedAtom::typeCStringPtr); - io.enumCase(value, "objc-class-pointer", - DefinedAtom::typeObjCClassPtr); - io.enumCase(value, "objc-category-list", - DefinedAtom::typeObjC2CategoryList); - io.enumCase(value, "objc-image-info", - DefinedAtom::typeObjCImageInfo); - io.enumCase(value, "objc-method-list", - DefinedAtom::typeObjCMethodList); - io.enumCase(value, "objc-class1", DefinedAtom::typeObjC1Class); - io.enumCase(value, "dtraceDOF", DefinedAtom::typeDTraceDOF); - io.enumCase(value, "interposing-tuples", - DefinedAtom::typeInterposingTuples); - io.enumCase(value, "lto-temp", DefinedAtom::typeTempLTO); - io.enumCase(value, "compact-unwind", DefinedAtom::typeCompactUnwindInfo); - io.enumCase(value, "unwind-info", DefinedAtom::typeProcessedUnwindInfo); - io.enumCase(value, "tlv-thunk", DefinedAtom::typeThunkTLV); - io.enumCase(value, "tlv-data", DefinedAtom::typeTLVInitialData); - io.enumCase(value, "tlv-zero-fill", DefinedAtom::typeTLVInitialZeroFill); - io.enumCase(value, "tlv-initializer-ptr", - DefinedAtom::typeTLVInitializerPtr); - io.enumCase(value, "mach_header", DefinedAtom::typeMachHeader); - io.enumCase(value, "dso_handle", DefinedAtom::typeDSOHandle); - io.enumCase(value, "sectcreate", DefinedAtom::typeSectCreate); - } -}; - -template <> struct ScalarEnumerationTraits { - static void enumeration(IO &io, lld::UndefinedAtom::CanBeNull &value) { - io.enumCase(value, "never", lld::UndefinedAtom::canBeNullNever); - io.enumCase(value, "at-runtime", lld::UndefinedAtom::canBeNullAtRuntime); - io.enumCase(value, "at-buildtime",lld::UndefinedAtom::canBeNullAtBuildtime); - } -}; - -template <> struct ScalarEnumerationTraits { - static void enumeration(IO &io, ShlibCanBeNull &value) { - io.enumCase(value, "never", false); - io.enumCase(value, "at-runtime", true); - } -}; - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &io, lld::SharedLibraryAtom::Type &value) { - io.enumCase(value, "code", lld::SharedLibraryAtom::Type::Code); - io.enumCase(value, "data", lld::SharedLibraryAtom::Type::Data); - io.enumCase(value, "unknown", lld::SharedLibraryAtom::Type::Unknown); - } -}; - -/// This is a custom formatter for lld::DefinedAtom::Alignment. Values look -/// like: -/// 8 # 8-byte aligned -/// 7 mod 16 # 16-byte aligned plus 7 bytes -template <> struct ScalarTraits { - static void output(const lld::DefinedAtom::Alignment &value, void *ctxt, - raw_ostream &out) { - if (value.modulus == 0) { - out << llvm::format("%d", value.value); - } else { - out << llvm::format("%d mod %d", value.modulus, value.value); - } - } - - static StringRef input(StringRef scalar, void *ctxt, - lld::DefinedAtom::Alignment &value) { - value.modulus = 0; - size_t modStart = scalar.find("mod"); - if (modStart != StringRef::npos) { - StringRef modStr = scalar.slice(0, modStart); - modStr = modStr.rtrim(); - unsigned int modulus; - if (modStr.getAsInteger(0, modulus)) { - return "malformed alignment modulus"; - } - value.modulus = modulus; - scalar = scalar.drop_front(modStart + 3); - scalar = scalar.ltrim(); - } - unsigned int power; - if (scalar.getAsInteger(0, power)) { - return "malformed alignment power"; - } - value.value = power; - if (value.modulus >= power) { - return "malformed alignment, modulus too large for power"; - } - return StringRef(); // returning empty string means success - } - - static QuotingType mustQuote(StringRef) { return QuotingType::None; } -}; - -template <> struct ScalarEnumerationTraits { - static void enumeration(IO &io, FileKinds &value) { - io.enumCase(value, "object", fileKindObjectAtoms); - io.enumCase(value, "archive", fileKindArchive); - io.enumCase(value, "object-mach-o", fileKindObjectMachO); - } -}; - -template <> struct MappingTraits { - static void mapping(IO &io, ArchMember &member) { - io.mapOptional("kind", member._kind, fileKindObjectAtoms); - io.mapOptional("name", member._name); - io.mapRequired("content", member._content); - } -}; - -// Declare that an AtomList is a yaml sequence. -template struct SequenceTraits > { - static size_t size(IO &io, AtomList &seq) { return seq._atoms.size(); } - static T *&element(IO &io, AtomList &seq, size_t index) { - if (index >= seq._atoms.size()) - seq._atoms.resize(index + 1); - return seq._atoms[index].get(); - } -}; - -// Declare that an AtomRange is a yaml sequence. -template struct SequenceTraits > { - static size_t size(IO &io, File::AtomRange &seq) { return seq.size(); } - static T *&element(IO &io, File::AtomRange &seq, size_t index) { - assert(io.outputting() && "AtomRange only used when outputting"); - assert(index < seq.size() && "Out of range access"); - return seq[index].get(); - } -}; - -// Used to allow DefinedAtom content bytes to be a flow sequence of -// two-digit hex numbers without the leading 0x (e.g. FF, 04, 0A) -template <> struct ScalarTraits { - static void output(const ImplicitHex8 &val, void *, raw_ostream &out) { - uint8_t num = val; - out << llvm::format("%02X", num); - } - - static StringRef input(StringRef str, void *, ImplicitHex8 &val) { - unsigned long long n; - if (getAsUnsignedInteger(str, 16, n)) - return "invalid two-digit-hex number"; - if (n > 0xFF) - return "out of range two-digit-hex number"; - val = n; - return StringRef(); // returning empty string means success - } - - static QuotingType mustQuote(StringRef) { return QuotingType::None; } -}; - -// YAML conversion for std::vector -template <> struct DocumentListTraits > { - static size_t size(IO &io, std::vector &seq) { - return seq.size(); - } - static const lld::File *&element(IO &io, std::vector &seq, - size_t index) { - if (index >= seq.size()) - seq.resize(index + 1); - return seq[index]; - } -}; - -// YAML conversion for const lld::File* -template <> struct MappingTraits { - class NormArchiveFile : public lld::ArchiveLibraryFile { - public: - NormArchiveFile(IO &io) : ArchiveLibraryFile("") {} - - NormArchiveFile(IO &io, const lld::File *file) - : ArchiveLibraryFile(file->path()), _path(file->path()) { - // If we want to support writing archives, this constructor would - // need to populate _members. - } - - const lld::File *denormalize(IO &io) { return this; } - - const AtomRange defined() const override { - return _noDefinedAtoms; - } - - const AtomRange undefined() const override { - return _noUndefinedAtoms; - } - - const AtomRange sharedLibrary() const override { - return _noSharedLibraryAtoms; - } - - const AtomRange absolute() const override { - return _noAbsoluteAtoms; - } - - void clearAtoms() override { - _noDefinedAtoms.clear(); - _noUndefinedAtoms.clear(); - _noSharedLibraryAtoms.clear(); - _noAbsoluteAtoms.clear(); - } - - File *find(StringRef name) override { - for (const ArchMember &member : _members) - for (const lld::DefinedAtom *atom : member._content->defined()) - if (name == atom->name()) - return const_cast(member._content); - return nullptr; - } - - std::error_code - parseAllMembers(std::vector> &result) override { - return std::error_code(); - } - - StringRef _path; - std::vector _members; - }; - - class NormalizedFile : public lld::File { - public: - NormalizedFile(IO &io) - : File("", kindNormalizedObject), _io(io), _rnb(nullptr), - _definedAtomsRef(_definedAtoms._atoms), - _undefinedAtomsRef(_undefinedAtoms._atoms), - _sharedLibraryAtomsRef(_sharedLibraryAtoms._atoms), - _absoluteAtomsRef(_absoluteAtoms._atoms) {} - - NormalizedFile(IO &io, const lld::File *file) - : File(file->path(), kindNormalizedObject), _io(io), - _rnb(new RefNameBuilder(*file)), _path(file->path()), - _definedAtomsRef(file->defined()), - _undefinedAtomsRef(file->undefined()), - _sharedLibraryAtomsRef(file->sharedLibrary()), - _absoluteAtomsRef(file->absolute()) { - } - - ~NormalizedFile() override { - } - - const lld::File *denormalize(IO &io); - - const AtomRange defined() const override { - return _definedAtomsRef; - } - - const AtomRange undefined() const override { - return _undefinedAtomsRef; - } - - const AtomRange sharedLibrary() const override { - return _sharedLibraryAtomsRef; - } - - const AtomRange absolute() const override { - return _absoluteAtomsRef; - } - - void clearAtoms() override { - _definedAtoms._atoms.clear(); - _undefinedAtoms._atoms.clear(); - _sharedLibraryAtoms._atoms.clear(); - _absoluteAtoms._atoms.clear(); - } - - // Allocate a new copy of this string in _storage, so the strings - // can be freed when File is destroyed. - StringRef copyString(StringRef str) { - char *s = _storage.Allocate(str.size()); - memcpy(s, str.data(), str.size()); - return StringRef(s, str.size()); - } - - IO &_io; - std::unique_ptr _rnb; - StringRef _path; - AtomList _definedAtoms; - AtomList _undefinedAtoms; - AtomList _sharedLibraryAtoms; - AtomList _absoluteAtoms; - AtomRange _definedAtomsRef; - AtomRange _undefinedAtomsRef; - AtomRange _sharedLibraryAtomsRef; - AtomRange _absoluteAtomsRef; - llvm::BumpPtrAllocator _storage; - }; - - static void mapping(IO &io, const lld::File *&file) { - YamlContext *info = reinterpret_cast(io.getContext()); - assert(info != nullptr); - // Let any register tag handler process this. - if (info->_registry && info->_registry->handleTaggedDoc(io, file)) - return; - // If no registered handler claims this tag and there is no tag, - // grandfather in as "!native". - if (io.mapTag("!native", true) || io.mapTag("tag:yaml.org,2002:map")) - mappingAtoms(io, file); - } - - static void mappingAtoms(IO &io, const lld::File *&file) { - YamlContext *info = reinterpret_cast(io.getContext()); - MappingNormalizationHeap - keys(io, file, nullptr); - assert(info != nullptr); - info->_file = keys.operator->(); - - io.mapOptional("path", keys->_path); - - if (io.outputting()) { - io.mapOptional("defined-atoms", keys->_definedAtomsRef); - io.mapOptional("undefined-atoms", keys->_undefinedAtomsRef); - io.mapOptional("shared-library-atoms", keys->_sharedLibraryAtomsRef); - io.mapOptional("absolute-atoms", keys->_absoluteAtomsRef); - } else { - io.mapOptional("defined-atoms", keys->_definedAtoms); - io.mapOptional("undefined-atoms", keys->_undefinedAtoms); - io.mapOptional("shared-library-atoms", keys->_sharedLibraryAtoms); - io.mapOptional("absolute-atoms", keys->_absoluteAtoms); - } - } - - static void mappingArchive(IO &io, const lld::File *&file) { - YamlContext *info = reinterpret_cast(io.getContext()); - MappingNormalizationHeap - keys(io, file, &info->_file->allocator()); - - io.mapOptional("path", keys->_path); - io.mapOptional("members", keys->_members); - } -}; - -// YAML conversion for const lld::Reference* -template <> struct MappingTraits { - class NormalizedReference : public lld::Reference { - public: - NormalizedReference(IO &io) - : lld::Reference(lld::Reference::KindNamespace::all, - lld::Reference::KindArch::all, 0), - _target(nullptr), _offset(0), _addend(0), _tag(0) {} - - NormalizedReference(IO &io, const lld::Reference *ref) - : lld::Reference(ref->kindNamespace(), ref->kindArch(), - ref->kindValue()), - _target(nullptr), _targetName(targetName(io, ref)), - _offset(ref->offsetInAtom()), _addend(ref->addend()), - _tag(ref->tag()) { - _mappedKind.ns = ref->kindNamespace(); - _mappedKind.arch = ref->kindArch(); - _mappedKind.value = ref->kindValue(); - } - - const lld::Reference *denormalize(IO &io) { - YamlContext *info = reinterpret_cast(io.getContext()); - assert(info != nullptr); - typedef MappingTraits::NormalizedFile NormalizedFile; - NormalizedFile *f = reinterpret_cast(info->_file); - if (!_targetName.empty()) - _targetName = f->copyString(_targetName); - DEBUG_WITH_TYPE("WriterYAML", llvm::dbgs() - << "created Reference to name: '" - << _targetName << "' (" - << (const void *)_targetName.data() - << ", " << _targetName.size() << ")\n"); - setKindNamespace(_mappedKind.ns); - setKindArch(_mappedKind.arch); - setKindValue(_mappedKind.value); - return this; - } - - void bind(const RefNameResolver &); - static StringRef targetName(IO &io, const lld::Reference *ref); - - uint64_t offsetInAtom() const override { return _offset; } - const lld::Atom *target() const override { return _target; } - Addend addend() const override { return _addend; } - void setAddend(Addend a) override { _addend = a; } - void setTarget(const lld::Atom *a) override { _target = a; } - - const lld::Atom *_target; - StringRef _targetName; - uint32_t _offset; - Addend _addend; - RefKind _mappedKind; - uint32_t _tag; - }; - - static void mapping(IO &io, const lld::Reference *&ref) { - YamlContext *info = reinterpret_cast(io.getContext()); - MappingNormalizationHeap keys( - io, ref, &info->_file->allocator()); - - io.mapRequired("kind", keys->_mappedKind); - io.mapOptional("offset", keys->_offset); - io.mapOptional("target", keys->_targetName); - io.mapOptional("addend", keys->_addend, (lld::Reference::Addend)0); - io.mapOptional("tag", keys->_tag, 0u); - } -}; - -// YAML conversion for const lld::DefinedAtom* -template <> struct MappingTraits { - - class NormalizedAtom : public lld::DefinedAtom { - public: - NormalizedAtom(IO &io) - : _file(fileFromContext(io)), _contentType(), _alignment(1) { - static uint32_t ordinalCounter = 1; - _ordinal = ordinalCounter++; - } - - NormalizedAtom(IO &io, const lld::DefinedAtom *atom) - : _file(fileFromContext(io)), _name(atom->name()), - _scope(atom->scope()), _interpose(atom->interposable()), - _merge(atom->merge()), _contentType(atom->contentType()), - _alignment(atom->alignment()), _sectionChoice(atom->sectionChoice()), - _deadStrip(atom->deadStrip()), _dynamicExport(atom->dynamicExport()), - _codeModel(atom->codeModel()), - _permissions(atom->permissions()), _size(atom->size()), - _sectionName(atom->customSectionName()), - _sectionSize(atom->sectionSize()) { - for (const lld::Reference *r : *atom) - _references.push_back(r); - if (!atom->occupiesDiskSpace()) - return; - ArrayRef cont = atom->rawContent(); - _content.reserve(cont.size()); - for (uint8_t x : cont) - _content.push_back(x); - } - - ~NormalizedAtom() override = default; - - const lld::DefinedAtom *denormalize(IO &io) { - YamlContext *info = reinterpret_cast(io.getContext()); - assert(info != nullptr); - typedef MappingTraits::NormalizedFile NormalizedFile; - NormalizedFile *f = reinterpret_cast(info->_file); - if (!_name.empty()) - _name = f->copyString(_name); - if (!_refName.empty()) - _refName = f->copyString(_refName); - if (!_sectionName.empty()) - _sectionName = f->copyString(_sectionName); - DEBUG_WITH_TYPE("WriterYAML", - llvm::dbgs() << "created DefinedAtom named: '" << _name - << "' (" << (const void *)_name.data() - << ", " << _name.size() << ")\n"); - return this; - } - - void bind(const RefNameResolver &); - - // Extract current File object from YAML I/O parsing context - const lld::File &fileFromContext(IO &io) { - YamlContext *info = reinterpret_cast(io.getContext()); - assert(info != nullptr); - assert(info->_file != nullptr); - return *info->_file; - } - - const lld::File &file() const override { return _file; } - StringRef name() const override { return _name; } - uint64_t size() const override { return _size; } - Scope scope() const override { return _scope; } - Interposable interposable() const override { return _interpose; } - Merge merge() const override { return _merge; } - ContentType contentType() const override { return _contentType; } - Alignment alignment() const override { return _alignment; } - SectionChoice sectionChoice() const override { return _sectionChoice; } - StringRef customSectionName() const override { return _sectionName; } - uint64_t sectionSize() const override { return _sectionSize; } - DeadStripKind deadStrip() const override { return _deadStrip; } - DynamicExport dynamicExport() const override { return _dynamicExport; } - CodeModel codeModel() const override { return _codeModel; } - ContentPermissions permissions() const override { return _permissions; } - ArrayRef rawContent() const override { - if (!occupiesDiskSpace()) - return ArrayRef(); - return ArrayRef( - reinterpret_cast(_content.data()), _content.size()); - } - - uint64_t ordinal() const override { return _ordinal; } - - reference_iterator begin() const override { - uintptr_t index = 0; - const void *it = reinterpret_cast(index); - return reference_iterator(*this, it); - } - reference_iterator end() const override { - uintptr_t index = _references.size(); - const void *it = reinterpret_cast(index); - return reference_iterator(*this, it); - } - const lld::Reference *derefIterator(const void *it) const override { - uintptr_t index = reinterpret_cast(it); - assert(index < _references.size()); - return _references[index]; - } - void incrementIterator(const void *&it) const override { - uintptr_t index = reinterpret_cast(it); - ++index; - it = reinterpret_cast(index); - } - - void addReference(Reference::KindNamespace ns, - Reference::KindArch arch, - Reference::KindValue kindValue, uint64_t off, - const Atom *target, Reference::Addend a) override { - assert(target && "trying to create reference to nothing"); - auto node = new (file().allocator()) SimpleReference(ns, arch, kindValue, - off, target, a); - _references.push_back(node); - } - - const lld::File &_file; - StringRef _name; - StringRef _refName; - Scope _scope; - Interposable _interpose; - Merge _merge; - ContentType _contentType; - Alignment _alignment; - SectionChoice _sectionChoice; - DeadStripKind _deadStrip; - DynamicExport _dynamicExport; - CodeModel _codeModel; - ContentPermissions _permissions; - uint32_t _ordinal; - std::vector _content; - uint64_t _size; - StringRef _sectionName; - uint64_t _sectionSize; - std::vector _references; - }; - - static void mapping(IO &io, const lld::DefinedAtom *&atom) { - YamlContext *info = reinterpret_cast(io.getContext()); - MappingNormalizationHeap keys( - io, atom, &info->_file->allocator()); - if (io.outputting()) { - // If writing YAML, check if atom needs a ref-name. - typedef MappingTraits::NormalizedFile NormalizedFile; - assert(info != nullptr); - NormalizedFile *f = reinterpret_cast(info->_file); - assert(f); - assert(f->_rnb); - if (f->_rnb->hasRefName(atom)) { - keys->_refName = f->_rnb->refName(atom); - } - } - - io.mapOptional("name", keys->_name, StringRef()); - io.mapOptional("ref-name", keys->_refName, StringRef()); - io.mapOptional("scope", keys->_scope, - DefinedAtom::scopeTranslationUnit); - io.mapOptional("type", keys->_contentType, - DefinedAtom::typeCode); - io.mapOptional("content", keys->_content); - io.mapOptional("size", keys->_size, (uint64_t)keys->_content.size()); - io.mapOptional("interposable", keys->_interpose, - DefinedAtom::interposeNo); - io.mapOptional("merge", keys->_merge, DefinedAtom::mergeNo); - io.mapOptional("alignment", keys->_alignment, - DefinedAtom::Alignment(1)); - io.mapOptional("section-choice", keys->_sectionChoice, - DefinedAtom::sectionBasedOnContent); - io.mapOptional("section-name", keys->_sectionName, StringRef()); - io.mapOptional("section-size", keys->_sectionSize, (uint64_t)0); - io.mapOptional("dead-strip", keys->_deadStrip, - DefinedAtom::deadStripNormal); - io.mapOptional("dynamic-export", keys->_dynamicExport, - DefinedAtom::dynamicExportNormal); - io.mapOptional("code-model", keys->_codeModel, DefinedAtom::codeNA); - // default permissions based on content type - io.mapOptional("permissions", keys->_permissions, - DefinedAtom::permissions( - keys->_contentType)); - io.mapOptional("references", keys->_references); - } -}; - -template <> struct MappingTraits { - static void mapping(IO &io, lld::DefinedAtom *&atom) { - const lld::DefinedAtom *atomPtr = atom; - MappingTraits::mapping(io, atomPtr); - atom = const_cast(atomPtr); - } -}; - -// YAML conversion for const lld::UndefinedAtom* -template <> struct MappingTraits { - class NormalizedAtom : public lld::UndefinedAtom { - public: - NormalizedAtom(IO &io) - : _file(fileFromContext(io)), _canBeNull(canBeNullNever) {} - - NormalizedAtom(IO &io, const lld::UndefinedAtom *atom) - : _file(fileFromContext(io)), _name(atom->name()), - _canBeNull(atom->canBeNull()) {} - - ~NormalizedAtom() override = default; - - const lld::UndefinedAtom *denormalize(IO &io) { - YamlContext *info = reinterpret_cast(io.getContext()); - assert(info != nullptr); - typedef MappingTraits::NormalizedFile NormalizedFile; - NormalizedFile *f = reinterpret_cast(info->_file); - if (!_name.empty()) - _name = f->copyString(_name); - - DEBUG_WITH_TYPE("WriterYAML", - llvm::dbgs() << "created UndefinedAtom named: '" << _name - << "' (" << (const void *)_name.data() << ", " - << _name.size() << ")\n"); - return this; - } - - // Extract current File object from YAML I/O parsing context - const lld::File &fileFromContext(IO &io) { - YamlContext *info = reinterpret_cast(io.getContext()); - assert(info != nullptr); - assert(info->_file != nullptr); - return *info->_file; - } - - const lld::File &file() const override { return _file; } - StringRef name() const override { return _name; } - CanBeNull canBeNull() const override { return _canBeNull; } - - const lld::File &_file; - StringRef _name; - CanBeNull _canBeNull; - }; - - static void mapping(IO &io, const lld::UndefinedAtom *&atom) { - YamlContext *info = reinterpret_cast(io.getContext()); - MappingNormalizationHeap keys( - io, atom, &info->_file->allocator()); - - io.mapRequired("name", keys->_name); - io.mapOptional("can-be-null", keys->_canBeNull, - lld::UndefinedAtom::canBeNullNever); - } -}; - -template <> struct MappingTraits { - static void mapping(IO &io, lld::UndefinedAtom *&atom) { - const lld::UndefinedAtom *atomPtr = atom; - MappingTraits::mapping(io, atomPtr); - atom = const_cast(atomPtr); - } -}; - -// YAML conversion for const lld::SharedLibraryAtom* -template <> struct MappingTraits { - class NormalizedAtom : public lld::SharedLibraryAtom { - public: - NormalizedAtom(IO &io) - : _file(fileFromContext(io)), _canBeNull(false), - _type(Type::Unknown), _size(0) {} - - NormalizedAtom(IO &io, const lld::SharedLibraryAtom *atom) - : _file(fileFromContext(io)), _name(atom->name()), - _loadName(atom->loadName()), _canBeNull(atom->canBeNullAtRuntime()), - _type(atom->type()), _size(atom->size()) {} - - ~NormalizedAtom() override = default; - - const lld::SharedLibraryAtom *denormalize(IO &io) { - YamlContext *info = reinterpret_cast(io.getContext()); - assert(info != nullptr); - typedef MappingTraits::NormalizedFile NormalizedFile; - NormalizedFile *f = reinterpret_cast(info->_file); - if (!_name.empty()) - _name = f->copyString(_name); - if (!_loadName.empty()) - _loadName = f->copyString(_loadName); - - DEBUG_WITH_TYPE("WriterYAML", - llvm::dbgs() << "created SharedLibraryAtom named: '" - << _name << "' (" - << (const void *)_name.data() - << ", " << _name.size() << ")\n"); - return this; - } - - // Extract current File object from YAML I/O parsing context - const lld::File &fileFromContext(IO &io) { - YamlContext *info = reinterpret_cast(io.getContext()); - assert(info != nullptr); - assert(info->_file != nullptr); - return *info->_file; - } - - const lld::File &file() const override { return _file; } - StringRef name() const override { return _name; } - StringRef loadName() const override { return _loadName; } - bool canBeNullAtRuntime() const override { return _canBeNull; } - Type type() const override { return _type; } - uint64_t size() const override { return _size; } - - const lld::File &_file; - StringRef _name; - StringRef _loadName; - ShlibCanBeNull _canBeNull; - Type _type; - uint64_t _size; - }; - - static void mapping(IO &io, const lld::SharedLibraryAtom *&atom) { - - YamlContext *info = reinterpret_cast(io.getContext()); - MappingNormalizationHeap - keys(io, atom, &info->_file->allocator()); - - io.mapRequired("name", keys->_name); - io.mapOptional("load-name", keys->_loadName); - io.mapOptional("can-be-null", keys->_canBeNull, (ShlibCanBeNull) false); - io.mapOptional("type", keys->_type, SharedLibraryAtom::Type::Code); - io.mapOptional("size", keys->_size, uint64_t(0)); - } -}; - -template <> struct MappingTraits { - static void mapping(IO &io, lld::SharedLibraryAtom *&atom) { - const lld::SharedLibraryAtom *atomPtr = atom; - MappingTraits::mapping(io, atomPtr); - atom = const_cast(atomPtr); - } -}; - -// YAML conversion for const lld::AbsoluteAtom* -template <> struct MappingTraits { - class NormalizedAtom : public lld::AbsoluteAtom { - public: - NormalizedAtom(IO &io) - : _file(fileFromContext(io)), _scope(), _value(0) {} - - NormalizedAtom(IO &io, const lld::AbsoluteAtom *atom) - : _file(fileFromContext(io)), _name(atom->name()), - _scope(atom->scope()), _value(atom->value()) {} - - ~NormalizedAtom() override = default; - - const lld::AbsoluteAtom *denormalize(IO &io) { - YamlContext *info = reinterpret_cast(io.getContext()); - assert(info != nullptr); - typedef MappingTraits::NormalizedFile NormalizedFile; - NormalizedFile *f = reinterpret_cast(info->_file); - if (!_name.empty()) - _name = f->copyString(_name); - - DEBUG_WITH_TYPE("WriterYAML", - llvm::dbgs() << "created AbsoluteAtom named: '" << _name - << "' (" << (const void *)_name.data() - << ", " << _name.size() << ")\n"); - return this; - } - - // Extract current File object from YAML I/O parsing context - const lld::File &fileFromContext(IO &io) { - YamlContext *info = reinterpret_cast(io.getContext()); - assert(info != nullptr); - assert(info->_file != nullptr); - return *info->_file; - } - - const lld::File &file() const override { return _file; } - StringRef name() const override { return _name; } - uint64_t value() const override { return _value; } - Scope scope() const override { return _scope; } - - const lld::File &_file; - StringRef _name; - StringRef _refName; - Scope _scope; - Hex64 _value; - }; - - static void mapping(IO &io, const lld::AbsoluteAtom *&atom) { - YamlContext *info = reinterpret_cast(io.getContext()); - MappingNormalizationHeap keys( - io, atom, &info->_file->allocator()); - - if (io.outputting()) { - typedef MappingTraits::NormalizedFile NormalizedFile; - YamlContext *info = reinterpret_cast(io.getContext()); - assert(info != nullptr); - NormalizedFile *f = reinterpret_cast(info->_file); - assert(f); - assert(f->_rnb); - if (f->_rnb->hasRefName(atom)) { - keys->_refName = f->_rnb->refName(atom); - } - } - - io.mapRequired("name", keys->_name); - io.mapOptional("ref-name", keys->_refName, StringRef()); - io.mapOptional("scope", keys->_scope); - io.mapRequired("value", keys->_value); - } -}; - -template <> struct MappingTraits { - static void mapping(IO &io, lld::AbsoluteAtom *&atom) { - const lld::AbsoluteAtom *atomPtr = atom; - MappingTraits::mapping(io, atomPtr); - atom = const_cast(atomPtr); - } -}; - -} // end namespace llvm -} // end namespace yaml - -RefNameResolver::RefNameResolver(const lld::File *file, IO &io) : _io(io) { - typedef MappingTraits::NormalizedAtom - NormalizedAtom; - for (const lld::DefinedAtom *a : file->defined()) { - const auto *na = (const NormalizedAtom *)a; - if (!na->_refName.empty()) - add(na->_refName, a); - else if (!na->_name.empty()) - add(na->_name, a); - } - - for (const lld::UndefinedAtom *a : file->undefined()) - add(a->name(), a); - - for (const lld::SharedLibraryAtom *a : file->sharedLibrary()) - add(a->name(), a); - - typedef MappingTraits::NormalizedAtom NormAbsAtom; - for (const lld::AbsoluteAtom *a : file->absolute()) { - const auto *na = (const NormAbsAtom *)a; - if (na->_refName.empty()) - add(na->_name, a); - else - add(na->_refName, a); - } -} - -inline const lld::File * -MappingTraits::NormalizedFile::denormalize(IO &io) { - typedef MappingTraits::NormalizedAtom - NormalizedAtom; - - RefNameResolver nameResolver(this, io); - // Now that all atoms are parsed, references can be bound. - for (const lld::DefinedAtom *a : this->defined()) { - auto *normAtom = (NormalizedAtom *)const_cast(a); - normAtom->bind(nameResolver); - } - - return this; -} - -inline void MappingTraits::NormalizedAtom::bind( - const RefNameResolver &resolver) { - typedef MappingTraits::NormalizedReference - NormalizedReference; - for (const lld::Reference *ref : _references) { - auto *normRef = (NormalizedReference *)const_cast(ref); - normRef->bind(resolver); - } -} - -inline void MappingTraits::NormalizedReference::bind( - const RefNameResolver &resolver) { - _target = resolver.lookup(_targetName); -} - -inline StringRef -MappingTraits::NormalizedReference::targetName( - IO &io, const lld::Reference *ref) { - if (ref->target() == nullptr) - return StringRef(); - YamlContext *info = reinterpret_cast(io.getContext()); - assert(info != nullptr); - typedef MappingTraits::NormalizedFile NormalizedFile; - NormalizedFile *f = reinterpret_cast(info->_file); - RefNameBuilder &rnb = *f->_rnb; - if (rnb.hasRefName(ref->target())) - return rnb.refName(ref->target()); - return ref->target()->name(); -} - -namespace lld { -namespace yaml { - -class Writer : public lld::Writer { -public: - Writer(const LinkingContext &context) : _ctx(context) {} - - llvm::Error writeFile(const lld::File &file, StringRef outPath) override { - // Create stream to path. - std::error_code ec; - llvm::raw_fd_ostream out(outPath, ec, llvm::sys::fs::OF_TextWithCRLF); - if (ec) - return llvm::errorCodeToError(ec); - - // Create yaml Output writer, using yaml options for context. - YamlContext yamlContext; - yamlContext._ctx = &_ctx; - yamlContext._registry = &_ctx.registry(); - llvm::yaml::Output yout(out, &yamlContext); - - // Write yaml output. - const lld::File *fileRef = &file; - yout << fileRef; - - return llvm::Error::success(); - } - -private: - const LinkingContext &_ctx; -}; - -} // end namespace yaml - -namespace { - -/// Handles !native tagged yaml documents. -class NativeYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler { - bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override { - if (io.mapTag("!native")) { - MappingTraits::mappingAtoms(io, file); - return true; - } - return false; - } -}; - -/// Handles !archive tagged yaml documents. -class ArchiveYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler { - bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override { - if (io.mapTag("!archive")) { - MappingTraits::mappingArchive(io, file); - return true; - } - return false; - } -}; - -class YAMLReader : public Reader { -public: - YAMLReader(const Registry ®istry) : _registry(registry) {} - - bool canParse(file_magic magic, MemoryBufferRef mb) const override { - StringRef name = mb.getBufferIdentifier(); - return name.endswith(".objtxt") || name.endswith(".yaml"); - } - - ErrorOr> - loadFile(std::unique_ptr mb, - const class Registry &) const override { - // Create YAML Input Reader. - YamlContext yamlContext; - yamlContext._registry = &_registry; - yamlContext._path = mb->getBufferIdentifier(); - llvm::yaml::Input yin(mb->getBuffer(), &yamlContext); - - // Fill vector with File objects created by parsing yaml. - std::vector createdFiles; - yin >> createdFiles; - assert(createdFiles.size() == 1); - - // Error out now if there were parsing errors. - if (yin.error()) - return make_error_code(lld::YamlReaderError::illegal_value); - - std::shared_ptr smb(mb.release()); - const File *file = createdFiles[0]; - // Note: loadFile() should return vector of *const* File - File *f = const_cast(file); - f->setLastError(std::error_code()); - f->setSharedMemoryBuffer(smb); - return std::unique_ptr(f); - } - -private: - const Registry &_registry; -}; - -} // end anonymous namespace - -void Registry::addSupportYamlFiles() { - add(std::unique_ptr(new YAMLReader(*this))); - add(std::unique_ptr( - new NativeYamlIOTaggedDocumentHandler())); - add(std::unique_ptr( - new ArchiveYamlIOTaggedDocumentHandler())); -} - -std::unique_ptr createWriterYAML(const LinkingContext &context) { - return std::unique_ptr(new lld::yaml::Writer(context)); -} - -} // end namespace lld diff --git a/lld/test/ELF/Inputs/copy-rel.s b/lld/test/ELF/Inputs/copy-rel.s deleted file mode 100644 index bcfc7a58a33f..000000000000 --- a/lld/test/ELF/Inputs/copy-rel.s +++ /dev/null @@ -1,11 +0,0 @@ -.globl foo -.type foo, @object -.size foo, 4 -foo: -.long 1 - -.weak bar -.type bar, @object -.size bar, 4 -bar: -.long 2 diff --git a/lld/test/ELF/copy-rel.s b/lld/test/ELF/copy-rel.s deleted file mode 100644 index 7a957d2c290c..000000000000 --- a/lld/test/ELF/copy-rel.s +++ /dev/null @@ -1,25 +0,0 @@ -# REQUIRES: x86 - -## Test copy relocations can be created for -no-pie and -pie. - -# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o -# RUN: llvm-mc -filetype=obj -triple=x86_64 %p/Inputs/copy-rel.s -o %t1.o -# RUN: ld.lld %t1.o -o %t1.so -shared -soname=so - -# RUN: ld.lld %t.o %t1.so -o %t -# RUN: llvm-readobj -r %t | FileCheck %s - -# RUN: ld.lld %t.o %t1.so -o %t -pie -# RUN: llvm-readobj -r %t | FileCheck %s - -# CHECK: Relocations [ -# CHECK-NEXT: .rela.dyn { -# CHECK-NEXT: R_X86_64_COPY foo 0x0 -# CHECK-NEXT: R_X86_64_COPY bar 0x0 -# CHECK-NEXT: } -# CHECK-NEXT: ] - -.global _start -_start: - mov $foo - ., %eax - movabs $bar, %rax diff --git a/lld/test/ELF/relocation-copy-alias.s b/lld/test/ELF/relocation-copy-alias.s deleted file mode 100644 index f2251bbeefc2..000000000000 --- a/lld/test/ELF/relocation-copy-alias.s +++ /dev/null @@ -1,69 +0,0 @@ -// REQUIRES: x86 -// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/relocation-copy-alias.s -o %t2.o -// RUN: ld.lld --hash-style=sysv -shared %t2.o -o %t.so -// RUN: ld.lld --hash-style=sysv %t.o %t.so -o %t3 -// RUN: llvm-readobj --dyn-symbols -r --expand-relocs %t3 | FileCheck %s -// RUN: ld.lld --hash-style=sysv --gc-sections %t.o %t.so -o %t3 -// RUN: llvm-readobj --dyn-symbols -r --expand-relocs %t3 | FileCheck %s - -.global _start -_start: -movl $5, a1 -movl $5, b1 -movl $5, b2 - -// CHECK: .rela.dyn { -// CHECK-NEXT: Relocation { -// CHECK-NEXT: Offset: -// CHECK-NEXT: Type: R_X86_64_COPY -// CHECK-NEXT: Symbol: a1 -// CHECK-NEXT: Addend: 0x0 -// CHECK-NEXT: } -// CHECK-NEXT: Relocation { -// CHECK-NEXT: Offset: -// CHECK-NEXT: Type: R_X86_64_COPY -// CHECK-NEXT: Symbol: b1 -// CHECK-NEXT: Addend: 0x0 -// CHECK-NEXT: } -// CHECK-NEXT: } - -// CHECK: Name: a1 -// CHECK-NEXT: Value: [[A:.*]] -// CHECK-NEXT: Size: 1 -// CHECK-NEXT: Binding: Global (0x1) -// CHECK-NEXT: Type: Object (0x1) -// CHECK-NEXT: Other: 0 -// CHECK-NEXT: Section: .bss (0x7) - -// CHECK: Name: b1 -// CHECK-NEXT: Value: [[B:.*]] -// CHECK-NEXT: Size: 1 -// CHECK-NEXT: Binding: Global -// CHECK-NEXT: Type: Object (0x1) -// CHECK-NEXT: Other: 0 -// CHECK-NEXT: Section: .bss - -// CHECK: Name: b2 -// CHECK-NEXT: Value: [[B]] -// CHECK-NEXT: Size: 1 -// CHECK-NEXT: Binding: Global -// CHECK-NEXT: Type: Object (0x1) -// CHECK-NEXT: Other: 0 -// CHECK-NEXT: Section: .bss - -// CHECK: Name: a2 -// CHECK-NEXT: Value: [[A]] -// CHECK-NEXT: Size: 1 -// CHECK-NEXT: Binding: Weak -// CHECK-NEXT: Type: Object (0x1) -// CHECK-NEXT: Other: 0 -// CHECK-NEXT: Section: .bss - -// CHECK: Name: b3 -// CHECK-NEXT: Value: [[B]] -// CHECK-NEXT: Size: 1 -// CHECK-NEXT: Binding: Global -// CHECK-NEXT: Type: Object (0x1) -// CHECK-NEXT: Other: 0 -// CHECK-NEXT: Section: .bss diff --git a/lld/test/darwin/Inputs/native-and-mach-o.objtxt b/lld/test/darwin/Inputs/native-and-mach-o.objtxt deleted file mode 100644 index 58124eb83321..000000000000 --- a/lld/test/darwin/Inputs/native-and-mach-o.objtxt +++ /dev/null @@ -1,17 +0,0 @@ ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS ] - address: 0 - content: [ 0xC3 ] -global-symbols: - - name: _foo - type: N_SECT - scope: [ N_EXT ] - sect: 1 - desc: [ ] - value: 0 diff --git a/lld/test/darwin/Inputs/native-and-mach-o2.objtxt b/lld/test/darwin/Inputs/native-and-mach-o2.objtxt deleted file mode 100644 index 344c9bc0b0d2..000000000000 --- a/lld/test/darwin/Inputs/native-and-mach-o2.objtxt +++ /dev/null @@ -1,19 +0,0 @@ ---- !mach-o -arch: x86_64 -file-type: MH_DYLIB -flags: [ ] -install-name: /usr/lib/libSystem.B.dylib -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55 ] - -global-symbols: - - name: dyld_stub_binder - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 diff --git a/lld/test/darwin/cmdline-lto_library.objtxt b/lld/test/darwin/cmdline-lto_library.objtxt deleted file mode 100644 index 48226ec81361..000000000000 --- a/lld/test/darwin/cmdline-lto_library.objtxt +++ /dev/null @@ -1,11 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -lto_library %t -print-atoms -r %s 2>&1 | FileCheck %s -# -# Test that the -lto_library option does not result in an error. -# - -# CHECK-NOT: -lto_library - ---- !native -defined-atoms: - - name: _foo -... diff --git a/lld/test/darwin/cmdline-objc_gc.objtxt b/lld/test/darwin/cmdline-objc_gc.objtxt deleted file mode 100644 index f6db8e06ef9a..000000000000 --- a/lld/test/darwin/cmdline-objc_gc.objtxt +++ /dev/null @@ -1,15 +0,0 @@ -# RUN: not ld64.lld.darwinold -arch x86_64 -objc_gc %s 2>&1 | FileCheck %s -# -# Test that the -objc_gc is rejected. -# - -# CHECK: error: -objc_gc is not supported - ---- !native -defined-atoms: - - name: _main - type: code - scope: global - content: [ 0x90 ] - -... diff --git a/lld/test/darwin/cmdline-objc_gc_compaction.objtxt b/lld/test/darwin/cmdline-objc_gc_compaction.objtxt deleted file mode 100644 index 47620b9b39a1..000000000000 --- a/lld/test/darwin/cmdline-objc_gc_compaction.objtxt +++ /dev/null @@ -1,15 +0,0 @@ -# RUN: not ld64.lld.darwinold -arch x86_64 -objc_gc_compaction %s 2>&1 | FileCheck %s -# -# Test that the -objc_gc_compaction is rejected. -# - -# CHECK: error: -objc_gc_compaction is not supported - ---- !native -defined-atoms: - - name: _main - type: code - scope: global - content: [ 0x90 ] - -... diff --git a/lld/test/darwin/cmdline-objc_gc_only.objtxt b/lld/test/darwin/cmdline-objc_gc_only.objtxt deleted file mode 100644 index 4c5f1b338bd6..000000000000 --- a/lld/test/darwin/cmdline-objc_gc_only.objtxt +++ /dev/null @@ -1,15 +0,0 @@ -# RUN: not ld64.lld.darwinold -arch x86_64 -objc_gc_only %s 2>&1 | FileCheck %s -# -# Test that the -objc_gc_only is rejected. -# - -# CHECK: error: -objc_gc_only is not supported - ---- !native -defined-atoms: - - name: _main - type: code - scope: global - content: [ 0x90 ] - -... diff --git a/lld/test/darwin/native-and-mach-o.objtxt b/lld/test/darwin/native-and-mach-o.objtxt deleted file mode 100644 index 41a9ef64dddb..000000000000 --- a/lld/test/darwin/native-and-mach-o.objtxt +++ /dev/null @@ -1,27 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s \ -# RUN: %p/Inputs/native-and-mach-o.objtxt \ -# RUN: %p/Inputs/native-and-mach-o2.objtxt -o %t && \ -# RUN: llvm-nm %t | FileCheck %s -# -# Test a mix of atoms and mach-o both encoded in yaml -# - ---- !native -defined-atoms: - - name: _main - type: code - scope: global - content: [ 55, 48, 89, E5, 30, C0, E8, 00, - 00, 00, 00, 31, C0, 5D, C3 ] - references: - - offset: 7 - kind: branch32 - target: _foo - -undefined-atoms: - - name: _foo - -... - -# CHECK: {{[0-9a-f]+}} T _foo -# CHECK: {{[0-9a-f]+}} T _main diff --git a/lld/test/mach-o/Inputs/DependencyDump.py b/lld/test/mach-o/Inputs/DependencyDump.py deleted file mode 100755 index 0f4d49d6fb9a..000000000000 --- a/lld/test/mach-o/Inputs/DependencyDump.py +++ /dev/null @@ -1,30 +0,0 @@ -# -*- Python -*- - - -# -# Dump out Xcode binary dependency file. -# - -import sys - -f = open(sys.argv[1], "rb") -byte = f.read(1) -while byte != b'': - if byte == b'\000': - sys.stdout.write("linker-vers: ") - elif byte == b'\020': - sys.stdout.write("input-file: ") - elif byte == b'\021': - sys.stdout.write("not-found: ") - elif byte == b'\100': - sys.stdout.write("output-file: ") - byte = f.read(1) - while byte != b'\000': - if byte != b'\012': - sys.stdout.write(byte.decode("ascii")) - byte = f.read(1) - sys.stdout.write("\n") - byte = f.read(1) - -f.close() - diff --git a/lld/test/mach-o/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd b/lld/test/mach-o/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd deleted file mode 100644 index fddd192630d1..000000000000 --- a/lld/test/mach-o/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd +++ /dev/null @@ -1,42 +0,0 @@ ---- !tapi-tbd-v3 -archs: [ x86_64 ] -uuids: [ 'x86_64: 00000000-0000-0000-0000-000000000000' ] -platform: macosx -install-name: '/usr/lib/libSystem.B.dylib' -current-version: 0001.001.1 -exports: - - archs: [ 'x86_64' ] - re-exports: [ '/usr/lib/system/libdyld.dylib', - '/usr/lib/system/libsystem_c.dylib', - '/usr/lib/system/libsystem_m.dylib' ] ---- !tapi-tbd-v3 -archs: [ x86_64 ] -uuids: [ 'x86_64: 00000000-0000-0000-0000-000000000001' ] -platform: macosx -install-name: '/usr/lib/system/libdyld.dylib' -current-version: 0001.001.1 -parent-umbrella: System -exports: - - archs: [ 'x86_64' ] - symbols: [ dyld_stub_binder ] ---- !tapi-tbd-v3 -archs: [ x86_64 ] -uuids: [ 'x86_64: 00000000-0000-0000-0000-000000000002' ] -platform: macosx -install-name: '/usr/lib/system/libsystem_c.dylib' -current-version: 0001.001.1 -parent-umbrella: System -exports: - - archs: [ 'x86_64' ] - symbols: [ ] ---- !tapi-tbd-v3 -archs: [ x86_64 ] -uuids: [ 'x86_64: 00000000-0000-0000-0000-000000000003' ] -platform: macosx -install-name: '/usr/lib/system/libsystem_m.dylib' -current-version: 0001.001.1 -parent-umbrella: System -exports: - - archs: [ 'x86_64' ] - symbols: [ ___nan ] -... diff --git a/lld/test/mach-o/Inputs/PIE.yaml b/lld/test/mach-o/Inputs/PIE.yaml deleted file mode 100644 index 0463154fcf28..000000000000 --- a/lld/test/mach-o/Inputs/PIE.yaml +++ /dev/null @@ -1,6 +0,0 @@ ---- !mach-o -arch: x86_64 -file-type: MH_DYLIB -install-name: /usr/lib/libSystem.B.dylib -exports: - - name: dyld_stub_binder diff --git a/lld/test/mach-o/Inputs/arm-interworking.yaml b/lld/test/mach-o/Inputs/arm-interworking.yaml deleted file mode 100644 index d78a2997fe33..000000000000 --- a/lld/test/mach-o/Inputs/arm-interworking.yaml +++ /dev/null @@ -1,83 +0,0 @@ ---- !mach-o -arch: armv7 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 2 - address: 0x0000000000000000 - content: [ 0xFE, 0xFF, 0xFF, 0xEB, 0x02, 0x00, 0x00, 0xFA, - 0xFC, 0xFF, 0xFF, 0xEB, 0xFB, 0xFF, 0xFF, 0xFA, - 0x1E, 0xFF, 0x2F, 0xE1, 0x1E, 0xFF, 0x2F, 0xE1 ] - relocations: - - offset: 0x0000000C - type: ARM_RELOC_BR24 - length: 2 - pc-rel: true - extern: true - symbol: 4 - - offset: 0x00000008 - type: ARM_RELOC_BR24 - length: 2 - pc-rel: true - extern: true - symbol: 3 - - offset: 0x00000004 - type: ARM_RELOC_BR24 - length: 2 - pc-rel: true - extern: false - symbol: 1 - - offset: 0x00000000 - type: ARM_RELOC_BR24 - length: 2 - pc-rel: true - extern: false - symbol: 1 - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - address: 0x0000000000000018 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000004 - type: ARM_RELOC_VANILLA - length: 2 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000000 - type: ARM_RELOC_VANILLA - length: 2 - pc-rel: false - extern: true - symbol: 3 -local-symbols: - - name: _d2 - type: N_SECT - sect: 2 - value: 0x0000000000000018 -global-symbols: - - name: _a1 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: _a2 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000014 -undefined-symbols: - - name: _t1 - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _t2 - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 diff --git a/lld/test/mach-o/Inputs/arm-shims.yaml b/lld/test/mach-o/Inputs/arm-shims.yaml deleted file mode 100644 index 8baebef17d86..000000000000 --- a/lld/test/mach-o/Inputs/arm-shims.yaml +++ /dev/null @@ -1,60 +0,0 @@ ---- !mach-o -arch: armv7 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 2 - address: 0x0000000000000000 - content: [ 0x00, 0xBF, 0xFF, 0xF7, 0xFE, 0xEF, 0xFF, 0xF7, - 0xFB, 0xBF, 0x00, 0x00, 0x00, 0xF0, 0x20, 0xE3, - 0xFA, 0xFF, 0xFF, 0xFA, 0xF9, 0xFF, 0xFF, 0xEA ] - relocations: - - offset: 0x00000014 - type: ARM_RELOC_BR24 - length: 2 - pc-rel: true - extern: true - symbol: 3 - - offset: 0x00000010 - type: ARM_RELOC_BR24 - length: 2 - pc-rel: true - extern: true - symbol: 3 - - offset: 0x00000006 - type: ARM_THUMB_RELOC_BR22 - length: 2 - pc-rel: true - extern: true - symbol: 2 - - offset: 0x00000002 - type: ARM_THUMB_RELOC_BR22 - length: 2 - pc-rel: true - extern: true - symbol: 2 -global-symbols: - - name: _a2 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x000000000000000C - - name: _t2 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - desc: [ N_ARM_THUMB_DEF ] - value: 0x0000000000000000 -undefined-symbols: - - name: _a1 - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _t1 - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 diff --git a/lld/test/mach-o/Inputs/arm64/libSystem.yaml b/lld/test/mach-o/Inputs/arm64/libSystem.yaml deleted file mode 100644 index 76cba1bc3255..000000000000 --- a/lld/test/mach-o/Inputs/arm64/libSystem.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# -# For use by test cases that create dynamic output types which may needs stubs -# and therefore will need a dylib definition of dyld_stub_binder. -# - ---- !mach-o -arch: arm64 -file-type: MH_DYLIB -install-name: /usr/lib/libSystem.B.dylib -exports: - - name: dyld_stub_binder - -... diff --git a/lld/test/mach-o/Inputs/armv7/libSystem.yaml b/lld/test/mach-o/Inputs/armv7/libSystem.yaml deleted file mode 100644 index 2539f9003540..000000000000 --- a/lld/test/mach-o/Inputs/armv7/libSystem.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# -# For use by test cases that create dynamic output types which may needs stubs -# and therefore will need a dylib definition of dyld_stub_binder. -# - ---- !mach-o -arch: armv7 -file-type: MH_DYLIB -install-name: /usr/lib/libSystem.B.dylib -exports: - - name: dyld_stub_binder - -... diff --git a/lld/test/mach-o/Inputs/bar.yaml b/lld/test/mach-o/Inputs/bar.yaml deleted file mode 100644 index 5605e67e7c35..000000000000 --- a/lld/test/mach-o/Inputs/bar.yaml +++ /dev/null @@ -1,18 +0,0 @@ - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0xC3 ] -global-symbols: - - name: _bar - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 diff --git a/lld/test/mach-o/Inputs/cstring-sections.yaml b/lld/test/mach-o/Inputs/cstring-sections.yaml deleted file mode 100644 index eb227f29f8f5..000000000000 --- a/lld/test/mach-o/Inputs/cstring-sections.yaml +++ /dev/null @@ -1,25 +0,0 @@ ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __objc_methname - type: S_CSTRING_LITERALS - attributes: [ ] - address: 0x0000000000000000 - content: [ 0x61, 0x62, 0x63, 0x00 ] - - segment: __TEXT - section: __objc_classname - type: S_CSTRING_LITERALS - attributes: [ ] - address: 0x0000000000000006 - content: [ 0x61, 0x62, 0x63, 0x00 ] - - segment: __TEXT - section: __cstring - type: S_CSTRING_LITERALS - attributes: [ ] - address: 0x000000000000000A - content: [ 0x61, 0x62, 0x63, 0x00 ] diff --git a/lld/test/mach-o/Inputs/exported_symbols_list.exp b/lld/test/mach-o/Inputs/exported_symbols_list.exp deleted file mode 100644 index ff6653342472..000000000000 --- a/lld/test/mach-o/Inputs/exported_symbols_list.exp +++ /dev/null @@ -1,6 +0,0 @@ -# -# For use with exported_symbols_list.yaml -# -_foo -_b - diff --git a/lld/test/mach-o/Inputs/full.filelist b/lld/test/mach-o/Inputs/full.filelist deleted file mode 100644 index abf98b633377..000000000000 --- a/lld/test/mach-o/Inputs/full.filelist +++ /dev/null @@ -1,3 +0,0 @@ -/foo/bar/a.o -/foo/bar/b.o -/foo/x.a diff --git a/lld/test/mach-o/Inputs/got-order.yaml b/lld/test/mach-o/Inputs/got-order.yaml deleted file mode 100644 index d256e9d7d463..000000000000 --- a/lld/test/mach-o/Inputs/got-order.yaml +++ /dev/null @@ -1,53 +0,0 @@ ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x48, 0x8B, 0x0D, 0x00, - 0x00, 0x00, 0x00, 0x48, 0x8B, 0x05, 0x00, 0x00, - 0x00, 0x00, 0x8B, 0x00, 0x03, 0x01, 0x48, 0x8B, - 0x0D, 0x00, 0x00, 0x00, 0x00, 0x03, 0x01, 0x5D, - 0xC3 ] - relocations: - - offset: 0x00000019 - type: X86_64_RELOC_GOT_LOAD - length: 2 - pc-rel: true - extern: true - symbol: 2 - - offset: 0x0000000E - type: X86_64_RELOC_GOT_LOAD - length: 2 - pc-rel: true - extern: true - symbol: 1 - - offset: 0x00000007 - type: X86_64_RELOC_GOT_LOAD - length: 2 - pc-rel: true - extern: true - symbol: 3 -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _bar - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _foo - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _zazzle - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 diff --git a/lld/test/mach-o/Inputs/got-order2.yaml b/lld/test/mach-o/Inputs/got-order2.yaml deleted file mode 100644 index faddeda924db..000000000000 --- a/lld/test/mach-o/Inputs/got-order2.yaml +++ /dev/null @@ -1,11 +0,0 @@ ---- !mach-o -arch: x86_64 -file-type: MH_DYLIB -install-name: /usr/lib/libfoobar.dylib -exports: - - name: _bar - - name: _zazzle - - name: _foo - - name: _aaa - - name: _fff - - name: _zzz diff --git a/lld/test/mach-o/Inputs/hello-world-arm64.yaml b/lld/test/mach-o/Inputs/hello-world-arm64.yaml deleted file mode 100644 index 31de71ef4941..000000000000 --- a/lld/test/mach-o/Inputs/hello-world-arm64.yaml +++ /dev/null @@ -1,8 +0,0 @@ ---- !mach-o -arch: arm64 -file-type: MH_DYLIB -install-name: /usr/lib/libSystem.B.dylib -exports: - - name: _fprintf - - name: ___stdoutp - - name: dyld_stub_binder diff --git a/lld/test/mach-o/Inputs/hello-world-armv6.yaml b/lld/test/mach-o/Inputs/hello-world-armv6.yaml deleted file mode 100644 index 0b29f65ab126..000000000000 --- a/lld/test/mach-o/Inputs/hello-world-armv6.yaml +++ /dev/null @@ -1,7 +0,0 @@ ---- !mach-o -arch: armv6 -file-type: MH_DYLIB -install-name: /usr/lib/libSystem.B.dylib -exports: - - name: _printf - - name: dyld_stub_binder diff --git a/lld/test/mach-o/Inputs/hello-world-armv7.yaml b/lld/test/mach-o/Inputs/hello-world-armv7.yaml deleted file mode 100644 index 4e26120fe216..000000000000 --- a/lld/test/mach-o/Inputs/hello-world-armv7.yaml +++ /dev/null @@ -1,7 +0,0 @@ ---- !mach-o -arch: armv7 -file-type: MH_DYLIB -install-name: /usr/lib/libSystem.B.dylib -exports: - - name: _printf - - name: dyld_stub_binder diff --git a/lld/test/mach-o/Inputs/hello-world-x86.yaml b/lld/test/mach-o/Inputs/hello-world-x86.yaml deleted file mode 100644 index dbec62b77f3b..000000000000 --- a/lld/test/mach-o/Inputs/hello-world-x86.yaml +++ /dev/null @@ -1,7 +0,0 @@ ---- !mach-o -arch: x86 -file-type: MH_DYLIB -install-name: /usr/lib/libSystem.B.dylib -exports: - - name: _printf - - name: dyld_stub_binder diff --git a/lld/test/mach-o/Inputs/hello-world-x86_64.yaml b/lld/test/mach-o/Inputs/hello-world-x86_64.yaml deleted file mode 100644 index 7840d5c1932e..000000000000 --- a/lld/test/mach-o/Inputs/hello-world-x86_64.yaml +++ /dev/null @@ -1,8 +0,0 @@ ---- !mach-o -arch: x86_64 -file-type: MH_DYLIB -install-name: /usr/lib/libSystem.B.dylib -exports: - - name: _fprintf - - name: dyld_stub_binder - - name: ___stdoutp diff --git a/lld/test/mach-o/Inputs/hw.raw_bytes b/lld/test/mach-o/Inputs/hw.raw_bytes deleted file mode 100644 index ce013625030b..000000000000 --- a/lld/test/mach-o/Inputs/hw.raw_bytes +++ /dev/null @@ -1 +0,0 @@ -hello diff --git a/lld/test/mach-o/Inputs/interposing-section.yaml b/lld/test/mach-o/Inputs/interposing-section.yaml deleted file mode 100644 index 45966b6870cc..000000000000 --- a/lld/test/mach-o/Inputs/interposing-section.yaml +++ /dev/null @@ -1,6 +0,0 @@ ---- !mach-o -arch: x86_64 -file-type: MH_DYLIB -install-name: /usr/lib/libSystem.B.dylib -exports: - - name: _open diff --git a/lld/test/mach-o/Inputs/lazy-bind-x86_64-2.yaml b/lld/test/mach-o/Inputs/lazy-bind-x86_64-2.yaml deleted file mode 100644 index 50a97bc9c09b..000000000000 --- a/lld/test/mach-o/Inputs/lazy-bind-x86_64-2.yaml +++ /dev/null @@ -1,8 +0,0 @@ ---- !mach-o -arch: x86_64 -file-type: MH_DYLIB -install-name: /usr/lib/libfoo.dylib -compat-version: 2.0 -current-version: 3.4 -exports: - - name: _foo diff --git a/lld/test/mach-o/Inputs/lazy-bind-x86_64-3.yaml b/lld/test/mach-o/Inputs/lazy-bind-x86_64-3.yaml deleted file mode 100644 index 2f61cc0cda1a..000000000000 --- a/lld/test/mach-o/Inputs/lazy-bind-x86_64-3.yaml +++ /dev/null @@ -1,8 +0,0 @@ ---- !mach-o -arch: x86_64 -file-type: MH_DYLIB -install-name: /usr/lib/libbaz.dylib -compat-version: 3.0 -current-version: 4.5 -exports: - - name: _baz diff --git a/lld/test/mach-o/Inputs/lazy-bind-x86_64.yaml b/lld/test/mach-o/Inputs/lazy-bind-x86_64.yaml deleted file mode 100644 index 7e6cd9007bf3..000000000000 --- a/lld/test/mach-o/Inputs/lazy-bind-x86_64.yaml +++ /dev/null @@ -1,8 +0,0 @@ ---- !mach-o -arch: x86_64 -file-type: MH_DYLIB -install-name: /usr/lib/libbar.dylib -compat-version: 1.0 -current-version: 2.3 -exports: - - name: _bar diff --git a/lld/test/mach-o/Inputs/lib-search-paths/usr/lib/libmyshared.dylib b/lld/test/mach-o/Inputs/lib-search-paths/usr/lib/libmyshared.dylib deleted file mode 100755 index 71185fbdf73600ff2b795605d217a3c609752edd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20628 zcmeI4J!n%=6vt26`av5_tqv6nhAN^KS_icq99n6yMG-$@=_1$Gq!=)*Bq`dhi$lSo zgMzqqbL^xd_yNVmK?Qel5eE^RI~4!_ym!+_n$SRTko*svoO|wj_rCLfna({Y`MCc2 zt(i5&%yz*xo3+CvpW`DA4d(_iY=0ci!AL|v1VlgtL_h>YKm zey=0Rk0J55ozT6|?@eY8OXmP)2jM@3I+q%mNMTSZJT8bz==0!U2NdTDC2_8pc~A@; zm`(fD*r&Xr#TLqA3^w8#9!d>GuK0aw*ziCj5?6Y%IJvRjx$671_*<|q@YEE>H41g^ z(%9vTBg12!Su$MW;|hHqv``qAXGMPx^c1uc8lAtJoyyG@?o8%0>E84_JY1)An=SDE zEhQQ3$#>x1x8QTU0DBy^4SEF{6+Yi730^ekU}dF0ud@x|M{AHMWw_aDVv z2}NtG8T6$0VJSR(8h~;w<1>4MuOMc9bA@~#UYmM-zEI5Mde28e9K)U5w=-$B8}>x# zC*f0ipuA3h5LO}rA|L`HAOa#F0wN#+A|L{dLLkVk8s%;Eo+2OuA|L`HAOa#F0wN#+ zA|L`H(4YijZaP1kbH1o-9nGa$u&1&1EzUCjIVelOlmnQfWx1GhY#ZP)LR+LH3zxqo z2reXewN&gi+lL_*`MoS@70*XeJM9X^xhXf5ok?f%+(aS*A|L`HAOa#F0wN#+A|L`H zAOa$=Edrl{{{K?4&P1Ml=PBgt?Ehzy{vNuiS08gepQrV`{=S!Bt6u+=MDW`H+Hzk# z_l)%TJ2y9T9|r&~J2O3tOO-La!rD>H)B5Lfi>NSu_KsQgodYQ6V7++go{ z`Je*h%C`INdVB8iEZ@gYI{}|D--Bfz{J-kC|Jw2A-RjT&U(08&U3m0lV7#85`_qWW z$68O%eYxKr@51lW_uF^Dr}%z*`aDFEh=2%)fCz|y2#A0Ph=2%)fC$t_;Kt&Qo2yGp zOUz5V!X;75ypnk}^Em1Yj42$mARk9Y8)WvJg;c5kuN=l6(CyUz8Ak#^GL~q~zn*&k EU$)S+n*aa+ diff --git a/lld/test/mach-o/Inputs/lib-search-paths/usr/lib/libmystatic.a b/lld/test/mach-o/Inputs/lib-search-paths/usr/lib/libmystatic.a deleted file mode 100644 index b12062941f376d739ba0bee9161c165f25c6a2a0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 556 zcmY$iNi0gvu;bEKKm~>-1}5evX6B}b3JL~bDP&SX!N>%rK7osYfq@Z-W8%|_@^j;J zD~n4KOEQxg7+@^De2D2VGfgZN6if|(1}hkW4K*|{Ff##~2ecAuXad*yUvK}h1I=ZI z8V;fZK(;6VF%U=s@e42uJHY}KE&%BP0vI1?CIbjFfdqjdKE5Qiq6ET*@Izc9LJ)j( z7R)>(@dhL|(7cqyl0<|^e7uWeh$DgzW5L`5G7sh!m>e<dAI2c(xD*)eDl3IOc4H2MGl diff --git a/lld/test/mach-o/Inputs/lib-search-paths/usr/local/lib/file.o b/lld/test/mach-o/Inputs/lib-search-paths/usr/local/lib/file.o deleted file mode 100644 index f9a923d37db381942611b306f0c87ed7266b2723..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 404 zcmX^A>+L^w1_nlE1|R{%AUXiVPyk{ekOblvU>0_Q1u9$s(gOrAKF~}C5M}}i0zrIy zNoqw2gbm?`xJHB^_~0we^0G&c|j05LKE$%8OCKnth|q?aGrG4W|d`ML3FnK`NXATt4O85SG> diff --git a/lld/test/mach-o/Inputs/libbar.a b/lld/test/mach-o/Inputs/libbar.a deleted file mode 100644 index 64cae6c749eee95e83a7273ad80f991a93b74ff3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 824 zcma)4yG{Z@6rBYFxPoqBp^3#9ZN**O1x$>=2!=u<0Zg8pwZqn*PP9iEvIl?XDjPv(%!a|j%|Cc zbR7N)opxO*nLcVJQL=J&lrJ2}%5k|`$O|CG3(yBy(?xv%lS3aTrBh*bpHxQo@u$#z z(w>g4nJAI$p$Oyo^W!Tjgb{`|z!`ANLW00Dm}UPD@LY@+6{CIB6ilwmAR0DDLtp!T zwQyRU`dTHF;CehuF67Sz=T674C2I6K{d!|a*WH_|?j_OT+QqOKiK$up%TLt1`~blQyoNzRt$tt34>fswW*=4NQH>OB(eEZjA9Up z&4AHAAhFmCV!-d$cke?&B4Y4M&OPUzd+zzT=e&3Ma(X6&2!WbFb)cva);)DrpbJHC z@)UBy^Q0vCShbZJSXiD(r6=XW z?0hDjf?pBl0&IXNt3ss$c8)l1%v=RGapLgq$MLf;apG zz+;ODf-p`W7QROdXb$udJSY7Ii-&hB7r4m#9ma`%bQ}2ZeNF^Zv1)(p^R?e+i-XLv z`0mFPBnapNB~86G&F7`4w>R3x>`m|Km-tEVY1haFpug{Cfagl(Dy5BVuBx`m+w0{u zrKVHV8*l~)SzCp;g+Jz!#K7oOIXVH9$`@5(Gh530Dy2TpX1}%^)BH*M(_7A)DZQrp zZcfK^{5f#u_~&yvr3Y0frr%SSY_b)iQ;T)G^TOJ3ET^?8d;}}z-3>Z<^&1 | FileCheck %s - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -sections: - - segment: __DATA - section: __objc_imageinfo - type: S_REGULAR - attributes: [ S_ATTR_NO_DEAD_STRIP ] - address: 0x0000000000000100 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00 ] -... diff --git a/lld/test/mach-o/Inputs/unwind-info-simple-arm64.yaml b/lld/test/mach-o/Inputs/unwind-info-simple-arm64.yaml deleted file mode 100644 index 5f7ae50717ba..000000000000 --- a/lld/test/mach-o/Inputs/unwind-info-simple-arm64.yaml +++ /dev/null @@ -1,13 +0,0 @@ ---- !mach-o -arch: arm64 -file-type: MH_DYLIB -install-name: /usr/lib/libc++.dylib -exports: - - name: __Unwind_Resume - - name: __ZTIl - - name: __ZTIi - - name: ___cxa_end_catch - - name: ___cxa_begin_catch - - name: ___cxa_allocate_exception - - name: ___cxa_throw - - name: ___gxx_personality_v0 diff --git a/lld/test/mach-o/Inputs/use-dylib-install-names.yaml b/lld/test/mach-o/Inputs/use-dylib-install-names.yaml deleted file mode 100644 index cec2559f2435..000000000000 --- a/lld/test/mach-o/Inputs/use-dylib-install-names.yaml +++ /dev/null @@ -1,28 +0,0 @@ ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0xE8, 0x00, 0x00, 0x00, - 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00, 0xE8, 0x00, - 0x00, 0x00, 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00, - 0xE8, 0x00, 0x00, 0x00, 0x00, 0x5D, 0xE9, 0x00, - 0x00, 0x00, 0x00 ] -global-symbols: - - name: _foo - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _myGlobal - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 diff --git a/lld/test/mach-o/Inputs/use-simple-dylib.yaml b/lld/test/mach-o/Inputs/use-simple-dylib.yaml deleted file mode 100644 index 9081bcf76932..000000000000 --- a/lld/test/mach-o/Inputs/use-simple-dylib.yaml +++ /dev/null @@ -1,58 +0,0 @@ ---- !mach-o -arch: x86_64 -file-type: MH_DYLIB -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 4 - address: 0x0000000000000000 - content: [ 0xCC, 0xC3, 0x90, 0xC3, 0x90, 0x90, 0xC3, 0x90, - 0x90, 0x90, 0xC3, 0x90, 0x90, 0x90, 0x90, 0xC3, - 0x31, 0xC0, 0xC3 ] -local-symbols: - - name: _myStatic - type: N_SECT - sect: 1 - value: 0x000000000000000B - - name: _myVariablePreviouslyKnownAsPrivateExtern - type: N_SECT - scope: [ N_PEXT ] - sect: 1 - desc: [ N_SYMBOL_RESOLVER ] - value: 0x0000000000000011 -global-symbols: - - name: _myGlobal - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000001 - - name: _myGlobalWeak - type: N_SECT - scope: [ N_EXT ] - sect: 1 - desc: [ N_WEAK_DEF ] - value: 0x0000000000000002 - - name: _myHidden - type: N_SECT - scope: [ N_EXT, N_PEXT ] - sect: 1 - value: 0x0000000000000004 - - name: _myHiddenWeak - type: N_SECT - scope: [ N_EXT, N_PEXT ] - sect: 1 - desc: [ N_WEAK_DEF ] - value: 0x0000000000000007 - - name: _myResolver - type: N_SECT - scope: [ N_EXT ] - sect: 1 - desc: [ N_SYMBOL_RESOLVER ] - value: 0x0000000000000010 - -install-name: libspecial.dylib diff --git a/lld/test/mach-o/Inputs/write-final-sections.yaml b/lld/test/mach-o/Inputs/write-final-sections.yaml deleted file mode 100644 index ed434917f2cf..000000000000 --- a/lld/test/mach-o/Inputs/write-final-sections.yaml +++ /dev/null @@ -1,20 +0,0 @@ ---- !mach-o -arch: x86_64 -file-type: MH_DYLIB -flags: [ ] -install-name: /usr/lib/libSystem.B.dylib -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55 ] - -global-symbols: - - name: dyld_stub_binder - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - diff --git a/lld/test/mach-o/Inputs/wrong-arch-error.yaml b/lld/test/mach-o/Inputs/wrong-arch-error.yaml deleted file mode 100644 index 714ce9f1631d..000000000000 --- a/lld/test/mach-o/Inputs/wrong-arch-error.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# RUN: not ld64.lld.darwinold -arch x86_64 -r %s 2> %t.err -# RUN: FileCheck %s < %t.err - ---- !mach-o -arch: x86 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 4 - address: 0x0000000000000000 - content: [ 0xC3 ] - -global-symbols: - - name: _bar - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 diff --git a/lld/test/mach-o/Inputs/x86/libSystem.yaml b/lld/test/mach-o/Inputs/x86/libSystem.yaml deleted file mode 100644 index 87a4895c9f4a..000000000000 --- a/lld/test/mach-o/Inputs/x86/libSystem.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# -# For use by test cases that create dynamic output types which may needs stubs -# and therefore will need a dylib definition of dyld_stub_binder. -# - ---- !mach-o -arch: x86 -file-type: MH_DYLIB -install-name: /usr/lib/libSystem.B.dylib -exports: - - name: dyld_stub_binder - -... diff --git a/lld/test/mach-o/Inputs/x86_64/libSystem.yaml b/lld/test/mach-o/Inputs/x86_64/libSystem.yaml deleted file mode 100644 index fbbf794f3264..000000000000 --- a/lld/test/mach-o/Inputs/x86_64/libSystem.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# -# For use by test cases that create dynamic output types which may needs stubs -# and therefore will need a dylib definition of dyld_stub_binder. -# - ---- !mach-o -arch: x86_64 -file-type: MH_DYLIB -install-name: /usr/lib/libSystem.B.dylib -exports: - - name: dyld_stub_binder - -... diff --git a/lld/test/mach-o/PIE.yaml b/lld/test/mach-o/PIE.yaml deleted file mode 100644 index 0195f5059cdf..000000000000 --- a/lld/test/mach-o/PIE.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s \ -# RUN: %p/Inputs/PIE.yaml -o %t && \ -# RUN: llvm-objdump --macho --private-headers %t | FileCheck %s -# -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s \ -# RUN: %p/Inputs/PIE.yaml -pie -o %t\ -# RUN: && llvm-objdump --macho --private-headers %t | FileCheck %s -# -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s \ -# RUN: %p/Inputs/PIE.yaml -no_pie -o %t\ -# RUN: && llvm-objdump --macho --private-headers %t \ -# RUN: | FileCheck --check-prefix=CHECK_NO_PIE %s -# -# Test various PIE options. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0xC3 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - -... - -# CHECK: MH_MAGIC_64 {{[0-9a-zA-Z _]+}} TWOLEVEL PIE -# CHECK_NO_PIE-NOT: MH_MAGIC_64 {{[0-9a-zA-Z _]+}} TWOLEVEL PIE diff --git a/lld/test/mach-o/align_text.yaml b/lld/test/mach-o/align_text.yaml deleted file mode 100644 index 6278ee251636..000000000000 --- a/lld/test/mach-o/align_text.yaml +++ /dev/null @@ -1,45 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r %s -o %t -print_atoms | FileCheck %s -# RUN: ld64.lld.darwinold -arch x86_64 -r %t -o %t2 -print_atoms | FileCheck %s -# -# Test that alignment info round trips through -r -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 16 - address: 0x0000000000000000 - content: [ 0x90, 0x90, 0x90, 0xC3, 0xC3, 0xC3 ] -local-symbols: - - name: _f1 - type: N_SECT - sect: 1 - value: 0x0000000000000003 - - name: _f2 - type: N_SECT - sect: 1 - value: 0x0000000000000004 - - name: _f3 - type: N_SECT - sect: 1 - value: 0x0000000000000005 -... - -# CHECK: defined-atoms: -# CHECK: - content: [ 90, 90, 90 ] -# CHECK: alignment: 16 -# CHECK: - name: _f1 -# CHECK: content: [ C3 ] -# CHECK: alignment: 3 mod 16 -# CHECK: - name: _f2 -# CHECK: content: [ C3 ] -# CHECK: alignment: 4 mod 16 -# CHECK: - name: _f3 -# CHECK: content: [ C3 ] -# CHECK: alignment: 5 mod 16 diff --git a/lld/test/mach-o/arm-interworking-movw.yaml b/lld/test/mach-o/arm-interworking-movw.yaml deleted file mode 100644 index 3f61fafd1943..000000000000 --- a/lld/test/mach-o/arm-interworking-movw.yaml +++ /dev/null @@ -1,393 +0,0 @@ -# REQUIRES: arm -# RUN: ld64.lld.darwinold -arch armv7 -r -print_atoms %s -o %t | FileCheck %s -# RUN: ld64.lld.darwinold -arch armv7 -dylib -print_atoms %t -o %t2 \ -# RUN: %p/Inputs/armv7/libSystem.yaml -sectalign __TEXT __text 0x1000 | FileCheck %s -# RUN: llvm-objdump -d --macho --no-symbolic-operands %t2 | FileCheck --check-prefix=CODE %s -# -# Test thumb and arm branches round trip through -r. -# Test movw/movt pairs have low bit set properly for thumb vs arm. -# -# - ---- !mach-o -arch: armv7 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 2 - address: 0x0000000000000000 - content: [ 0x40, 0xF2, 0x25, 0x00, 0xC0, 0xF2, 0x00, 0x00, - 0x40, 0xF2, 0x01, 0x01, 0xC0, 0xF2, 0x00, 0x01, - 0x40, 0xF2, 0x4E, 0x02, 0xC0, 0xF2, 0x00, 0x02, - 0x40, 0xF2, 0x2A, 0x03, 0xC0, 0xF2, 0x00, 0x03, - 0x78, 0x44, 0x70, 0x47, 0x70, 0x47, 0x25, 0x00, - 0x00, 0xE3, 0x00, 0x00, 0x40, 0xE3, 0xD7, 0x1F, - 0x0F, 0xE3, 0xFF, 0x1F, 0x4F, 0xE3, 0x4E, 0x20, - 0x00, 0xE3, 0x00, 0x20, 0x40, 0xE3, 0x00, 0x30, - 0x00, 0xE3, 0x00, 0x30, 0x40, 0xE3, 0x0F, 0x00, - 0x80, 0xE0, 0x1E, 0xFF, 0x2F, 0xE1, 0x1E, 0xFF, - 0x2F, 0xE1 ] - relocations: - - offset: 0x00000042 - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 1 - pc-rel: false - value: 0x0000004E - - offset: 0x00000000 - scattered: true - type: ARM_RELOC_PAIR - length: 1 - pc-rel: false - value: 0x00000046 - - offset: 0x0000003E - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 0 - pc-rel: false - value: 0x0000004E - - offset: 0x00000000 - scattered: true - type: ARM_RELOC_PAIR - length: 0 - pc-rel: false - value: 0x00000046 - - offset: 0x0000003A - type: ARM_RELOC_HALF - length: 1 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x0000004E - type: ARM_RELOC_PAIR - length: 1 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x00000036 - type: ARM_RELOC_HALF - length: 0 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000000 - type: ARM_RELOC_PAIR - length: 0 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x00000032 - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 1 - pc-rel: false - value: 0x00000024 - - offset: 0x0000FFD6 - scattered: true - type: ARM_RELOC_PAIR - length: 1 - pc-rel: false - value: 0x00000046 - - offset: 0x0000002E - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 0 - pc-rel: false - value: 0x00000024 - - offset: 0x0000FFFF - scattered: true - type: ARM_RELOC_PAIR - length: 0 - pc-rel: false - value: 0x00000046 - - offset: 0x0000002A - type: ARM_RELOC_HALF - length: 1 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000025 - type: ARM_RELOC_PAIR - length: 1 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x00000026 - type: ARM_RELOC_HALF - length: 0 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000000 - type: ARM_RELOC_PAIR - length: 0 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x0000001C - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 3 - pc-rel: false - value: 0x0000004E - - offset: 0x0000002A - scattered: true - type: ARM_RELOC_PAIR - length: 3 - pc-rel: false - value: 0x00000020 - - offset: 0x00000018 - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 2 - pc-rel: false - value: 0x0000004E - - offset: 0x00000000 - scattered: true - type: ARM_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x00000020 - - offset: 0x00000014 - type: ARM_RELOC_HALF - length: 3 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x0000004E - type: ARM_RELOC_PAIR - length: 3 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x00000010 - type: ARM_RELOC_HALF - length: 2 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000000 - type: ARM_RELOC_PAIR - length: 2 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x0000000C - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 3 - pc-rel: false - value: 0x00000024 - - offset: 0x00000000 - scattered: true - type: ARM_RELOC_PAIR - length: 3 - pc-rel: false - value: 0x00000020 - - offset: 0x00000008 - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000024 - - offset: 0x00000000 - scattered: true - type: ARM_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x00000020 - - offset: 0x00000004 - type: ARM_RELOC_HALF - length: 3 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000025 - type: ARM_RELOC_PAIR - length: 3 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x00000000 - type: ARM_RELOC_HALF - length: 2 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000000 - type: ARM_RELOC_PAIR - length: 2 - pc-rel: false - extern: false - symbol: 16777215 -local-symbols: - - name: _t1 - type: N_SECT - sect: 1 - desc: [ N_ARM_THUMB_DEF ] - value: 0x0000000000000000 - - name: _t2 - type: N_SECT - sect: 1 - desc: [ N_ARM_THUMB_DEF ] - value: 0x0000000000000024 - - name: _a2 - type: N_SECT - sect: 1 - value: 0x000000000000004E - - name: _a1 - type: N_SECT - sect: 1 - value: 0x0000000000000026 -... - -# CHECK: defined-atoms: -# CHECK: - name: _t1 -# CHECK: references: -# CHECK: - kind: modeThumbCode -# CHECK: offset: 0 -# CHECK: target: _t1 -# CHECK: - kind: thumb_movw -# CHECK: offset: 0 -# CHECK: target: _t2 -# CHECK-NOT: addend: -# CHECK: - kind: thumb_movt -# CHECK: offset: 4 -# CHECK: target: _t2 -# CHECK-NOT: addend: -# CHECK: - kind: thumb_movw_funcRel -# CHECK: offset: 8 -# CHECK: target: _t2 -# CHECK: addend: -36 -# CHECK: - kind: thumb_movt_funcRel -# CHECK: offset: 12 -# CHECK: target: _t2 -# CHECK: addend: -36 -# CHECK: - kind: thumb_movw -# CHECK: offset: 16 -# CHECK: target: _a2 -# CHECK-NOT: addend: -# CHECK: - kind: thumb_movt -# CHECK: offset: 20 -# CHECK: target: _a2 -# CHECK-NOT: addend: -# CHECK: - kind: thumb_movw_funcRel -# CHECK: offset: 24 -# CHECK: target: _a2 -# CHECK: addend: -36 -# CHECK: - kind: thumb_movt_funcRel -# CHECK: offset: 28 -# CHECK: target: _a2 -# CHECK: addend: -36 -# CHECK: - name: _t2 -# CHECK: references: -# CHECK: - kind: modeThumbCode -# CHECK: offset: 0 -# CHECK: target: _t2 -# CHECK: - name: _a1 -# CHECK: references: -# CHECK: - kind: arm_movw -# CHECK: offset: 0 -# CHECK: target: _t2 -# CHECK-NOT: addend: -# CHECK: - kind: arm_movt -# CHECK: offset: 4 -# CHECK: target: _t2 -# CHECK-NOT: addend: -# CHECK: - kind: arm_movw_funcRel -# CHECK: offset: 8 -# CHECK: target: _t2 -# CHECK: addend: -40 -# CHECK: - kind: arm_movt_funcRel -# CHECK: offset: 12 -# CHECK: target: _t2 -# CHECK: addend: -40 -# CHECK: - kind: arm_movw -# CHECK: offset: 16 -# CHECK: target: _a2 -# CHECK-NOT: addend: -# CHECK: - kind: arm_movt -# CHECK: offset: 20 -# CHECK: target: _a2 -# CHECK-NOT: addend: -# CHECK: - kind: arm_movw_funcRel -# CHECK: offset: 24 -# CHECK: target: _a2 -# CHECK: addend: -40 -# CHECK: - kind: arm_movt_funcRel -# CHECK: offset: 28 -# CHECK: target: _a2 -# CHECK: addend: -40 -# CHECK: - name: _a2 - - -# CODE: _t1: -# CODE-NEXT: movw r0, #4133 -# CODE-NEXT: movt r0, #0 -# CODE-NEXT: movw r1, #1 -# CODE-NEXT: movt r1, #0 -# CODE-NEXT: movw r2, #4174 -# CODE-NEXT: movt r2, #0 -# CODE-NEXT: movw r3, #42 -# CODE-NEXT: movt r3, #0 - - -# CODE: _a1: -# CODE-NEXT: movw r0, #4133 -# CODE-NEXT: movt r0, #0 -# CODE-NEXT: movw r1, #65495 -# CODE-NEXT: movt r1, #65535 -# CODE-NEXT: movw r2, #4174 -# CODE-NEXT: movt r2, #0 -# CODE-NEXT: movw r3, #0 -# CODE-NEXT: movt r3, #0 - - - -# .syntax unified -# .align 2 -# -# .code 16 -# .thumb_func _t1 -#_t1: -# movw r0, :lower16:(_t2) -# movt r0, :upper16:(_t2) -# movw r1, :lower16:(_t2-(L0+4)) -# movt r1, :upper16:(_t2-(L0+4)) -# movw r2, :lower16:(_a2) -# movt r2, :upper16:(_a2) -# movw r3, :lower16:(_a2-(L0+4)) -# movt r3, :upper16:(_a2-(L0+4)) -#L0: -# add r0, pc -# bx lr -# -# -# .code 16 -# .thumb_func _t2 -#_t2: -# bx lr -# -# -# -# .code 32 -#_a1: -# movw r0, :lower16:(_t2) -# movt r0, :upper16:(_t2) -# movw r1, :lower16:(_t2-(L1+8)) -# movt r1, :upper16:(_t2-(L1+8)) -# movw r2, :lower16:(_a2) -# movt r2, :upper16:(_a2) -# movw r3, :lower16:(_a2-(L1+8)) -# movt r3, :upper16:(_a2-(L1+8)) -#L1: -# add r0, pc -# bx lr -# -#_a2: -# bx lr - diff --git a/lld/test/mach-o/arm-interworking.yaml b/lld/test/mach-o/arm-interworking.yaml deleted file mode 100644 index 4196c12d0943..000000000000 --- a/lld/test/mach-o/arm-interworking.yaml +++ /dev/null @@ -1,288 +0,0 @@ -# RUN: ld64.lld.darwinold -arch armv7 -r -print_atoms %s \ -# RUN: %p/Inputs/arm-interworking.yaml -o %t | FileCheck %s \ -# RUN: && ld64.lld.darwinold -arch armv7 -dylib -print_atoms \ -# RUN: %p/Inputs/armv7/libSystem.yaml %t -o %t2 | FileCheck %s \ -# RUN: && llvm-readobj -S --section-data %t2 | FileCheck -check-prefix=CODE %s -# -# Test thumb and arm branches round trip through -r. -# Test bl/blx instructions are fixed up properly. -# -# - ---- !mach-o -arch: armv7 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 2 - address: 0x0000000000000000 - content: [ 0xFF, 0xF7, 0xFE, 0xFF, 0xC0, 0x46, 0xFF, 0xF7, - 0xFC, 0xEF, 0xC0, 0x46, 0xFF, 0xF7, 0xF8, 0xEF, - 0xFF, 0xF7, 0xF6, 0xFF, 0xC0, 0x46, 0xFF, 0xF7, - 0xF3, 0xFF, 0xC0, 0x46, 0x00, 0xF0, 0x06, 0xE8, - 0xC0, 0x46, 0x00, 0xF0, 0x03, 0xF8, 0x00, 0xF0, - 0x02, 0xF8, 0x70, 0x47, 0x70, 0x47, 0x70, 0x47 ] - relocations: - - offset: 0x00000026 - type: ARM_THUMB_RELOC_BR22 - length: 2 - pc-rel: true - extern: false - symbol: 1 - - offset: 0x00000022 - type: ARM_THUMB_RELOC_BR22 - length: 2 - pc-rel: true - extern: false - symbol: 1 - - offset: 0x0000001C - type: ARM_THUMB_RELOC_BR22 - length: 2 - pc-rel: true - extern: false - symbol: 1 - - offset: 0x00000016 - type: ARM_THUMB_RELOC_BR22 - length: 2 - pc-rel: true - extern: false - symbol: 1 - - offset: 0x00000010 - type: ARM_THUMB_RELOC_BR22 - length: 2 - pc-rel: true - extern: false - symbol: 1 - - offset: 0x0000000C - type: ARM_THUMB_RELOC_BR22 - length: 2 - pc-rel: true - extern: true - symbol: 5 - - offset: 0x00000006 - type: ARM_THUMB_RELOC_BR22 - length: 2 - pc-rel: true - extern: true - symbol: 5 - - offset: 0x00000000 - type: ARM_THUMB_RELOC_BR22 - length: 2 - pc-rel: true - extern: true - symbol: 4 - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - address: 0x0000000000000030 - content: [ 0x2D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000004 - type: ARM_RELOC_VANILLA - length: 2 - pc-rel: false - extern: true - symbol: 4 - - offset: 0x00000000 - type: ARM_RELOC_VANILLA - length: 2 - pc-rel: false - extern: false - symbol: 1 -local-symbols: - - name: _t3 - type: N_SECT - sect: 1 - desc: [ N_ARM_THUMB_DEF ] - value: 0x000000000000002E - - name: _d1 - type: N_SECT - sect: 2 - value: 0x0000000000000030 -global-symbols: - - name: _t1 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - desc: [ N_ARM_THUMB_DEF ] - value: 0x0000000000000000 - - name: _t2 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - desc: [ N_ARM_THUMB_DEF ] - value: 0x000000000000002C -undefined-symbols: - - name: _a1 - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _a2 - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - -... - - -# CHECK: defined-atoms: -# CHECK: - name: _d1 -# CHECK: type: data -# CHECK: references: -# CHECK: - kind: pointer32 -# CHECK: offset: 0 -# CHECK: target: _t2 -# CHECK: - kind: pointer32 -# CHECK: offset: 4 -# CHECK: target: _a1 -# CHECK: - name: _d2 -# CHECK: type: data -# CHECK: references: -# CHECK: - kind: pointer32 -# CHECK: offset: 0 -# CHECK: target: _t1 -# CHECK: - kind: pointer32 -# CHECK: offset: 4 -# CHECK: target: _a1 -# CHECK: - name: _t1 -# CHECK: scope: global -# CHECK: references: -# CHECK: - kind: modeThumbCode -# CHECK: offset: 0 -# CHECK: target: _t1 -# CHECK: - kind: thumb_bl22 -# CHECK: offset: 0 -# CHECK: target: _a1 -# CHECK: - kind: thumb_bl22 -# CHECK: offset: 6 -# CHECK: target: _a2 -# CHECK: - kind: thumb_bl22 -# CHECK: offset: 12 -# CHECK: target: _a2 -# CHECK: - kind: thumb_bl22 -# CHECK: offset: 16 -# CHECK: target: _t1 -# CHECK: - kind: thumb_bl22 -# CHECK: offset: 22 -# CHECK: target: _t1 -# CHECK: - kind: thumb_bl22 -# CHECK: offset: 28 -# CHECK: target: _t2 -# CHECK: - kind: thumb_bl22 -# CHECK: offset: 34 -# CHECK: target: _t2 -# CHECK: - kind: thumb_bl22 -# CHECK: offset: 38 -# CHECK: target: _t3 -# CHECK: - name: _t2 -# CHECK: scope: global -# CHECK: content: [ 70, 47 ] -# CHECK: references: -# CHECK: - kind: modeThumbCode -# CHECK: offset: 0 -# CHECK: target: _t2 -# CHECK: - name: _t3 -# CHECK: content: [ 70, 47 ] -# CHECK: references: -# CHECK: - kind: modeThumbCode -# CHECK: offset: 0 -# CHECK: target: _t3 -# CHECK: - name: _a1 -# CHECK: scope: global -# CHECK: references: -# CHECK: - kind: arm_bl24 -# CHECK: offset: 0 -# CHECK: target: _a1 -# CHECK: - kind: arm_bl24 -# CHECK: offset: 4 -# CHECK: target: _a2 -# CHECK: - kind: arm_bl24 -# CHECK: offset: 8 -# CHECK: target: _t1 -# CHECK: - kind: arm_bl24 -# CHECK: offset: 12 -# CHECK: target: _t2 -# CHECK: - name: _a2 -# CHECK: scope: global - -# CODE: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00) -# CODE: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00) -# CODE: SectionData ( -# CODE: 0000: 00F016E8 C04600F0 1EE8C046 00F01AE8 -# CODE: 0010: FFF7F6FF C046FFF7 F3FFC046 00F006F8 -# CODE: 0020: C04600F0 03F800F0 02F87047 70477047 -# CODE: 0030: FEFFFFEB 020000EB F0FFFFFA FAFFFFFA -# CODE: 0040: 1EFF2FE1 1EFF2FE1 -# CODE: ) - -# CODE: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00) -# CODE: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00) -# CODE: SectionData ( -# CODE: 0000: E50F0000 E80F0000 B90F0000 E80F0000 -# CODE: ) - -# When we get a good mach-o disassembler the above __text section content check can be change to be symbolic. -# Verify the low (thumb) bit is set on the first and third pointers but not the second and fourth. - - - -# Input file one: -# -# .align 2 -# .code 16 -# .globl _t1 -# .thumb_func _t1 -#_t1: -# bl _a1 -# nop -# blx _a2 -# nop -# blx _a2 -# bl _t1 -# nop -# bl _t1 -# nop -# blx _t2 -# nop -# blx _t2 -# bx lr -# -# .globl _t2 -# .thumb_func _t2 -#_t2: -# bx lr -# -# .data -#_d1: .long _t2 -# .long _a1 - - - -# Input file two: -# -# .align 2 -# .code 32 -# .globl _a1 -#_a1: -# bl _a1 -# blx _a2 -# bl _t1 -# blx _t2 -# bx lr -# -# .globl _a2 -#_a2: -# bx lr -# -# .data -#_d2: .long _t1 -# .long _a1 - - - - diff --git a/lld/test/mach-o/arm-shims.yaml b/lld/test/mach-o/arm-shims.yaml deleted file mode 100644 index 7c4f04677f1c..000000000000 --- a/lld/test/mach-o/arm-shims.yaml +++ /dev/null @@ -1,126 +0,0 @@ -# RUN: ld64.lld.darwinold -arch armv7 %s %p/Inputs/arm-shims.yaml \ -# RUN: -dylib %p/Inputs/armv7/libSystem.yaml -o %t -# RUN: llvm-readobj -S --section-data %t | FileCheck %s -# -# Test b from arm to thumb or vice versa has shims added.s -# -# - ---- !mach-o -arch: armv7 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 2 - address: 0x0000000000000000 - content: [ 0x00, 0xBF, 0xFF, 0xF7, 0xFE, 0xEF, 0xFF, 0xF7, - 0xFB, 0xBF, 0x00, 0x00, 0x00, 0xF0, 0x20, 0xE3, - 0xFA, 0xFF, 0xFF, 0xFA, 0xF9, 0xFF, 0xFF, 0xEA ] - relocations: - - offset: 0x00000014 - type: ARM_RELOC_BR24 - length: 2 - pc-rel: true - extern: true - symbol: 3 - - offset: 0x00000010 - type: ARM_RELOC_BR24 - length: 2 - pc-rel: true - extern: true - symbol: 3 - - offset: 0x00000006 - type: ARM_THUMB_RELOC_BR22 - length: 2 - pc-rel: true - extern: true - symbol: 2 - - offset: 0x00000002 - type: ARM_THUMB_RELOC_BR22 - length: 2 - pc-rel: true - extern: true - symbol: 2 -global-symbols: - - name: _a1 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x000000000000000C - - name: _t1 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - desc: [ N_ARM_THUMB_DEF ] - value: 0x0000000000000000 -undefined-symbols: - - name: _a2 - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _t2 - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - -... - -# CHECK: Section { -# CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00) -# CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00) -# CHECK: SectionData ( -# CHECK: 0000: 00BF00F0 10E800F0 19B80000 00F020E3 -# CHECK: 0010: 000000FA 0F0000EA 00BFFFF7 F8EF00F0 -# CHECK: 0020: 07B80000 00F020E3 F4FFFFFA 050000EA -# CHECK: 0030: DFF804C0 FF446047 D4FFFFFF DFF804C0 -# CHECK: 0040: FF446047 E0FFFFFF 04C09FE5 0CC08FE0 -# CHECK: 0050: 1CFF2FE1 ADFFFFFF 04C09FE5 0CC08FE0 -# CHECK: 0060: 1CFF2FE1 B5FFFFFF -# CHECK: ) - -# When we get a good mach-o disassembler the above __text section content check can be change to be symbolic. - - -# Input file one: -# -# .align 2 -# .code 16 -# .globl _t1 -# .thumb_func _t1 -#_t1: -# nop -# blx _a2 -# b _a2 -# -# .code 32 -# .align 2 -# .globl _a1 -#_a1: -# nop -# blx _t2 -# b _t2 - - - -# Input file two: -# -# .align 2 -# .code 16 -# .globl _t2 -# .thumb_func _t2 -#_t2: -# nop -# blx _a1 -# b _a1 -# -# .code 32 -# .align 2 -# .globl _a2 -#_a2: -# nop -# blx _t1 -# b _t1 diff --git a/lld/test/mach-o/arm-subsections-via-symbols.yaml b/lld/test/mach-o/arm-subsections-via-symbols.yaml deleted file mode 100644 index 96346e845c9a..000000000000 --- a/lld/test/mach-o/arm-subsections-via-symbols.yaml +++ /dev/null @@ -1,60 +0,0 @@ -# RUN: ld64.lld.darwinold -arch armv7 %s -r -print_atoms -o %t | FileCheck %s -# -# Test that assembly written without .subsections_via_symbols is parsed so -# that atoms are non-dead-strip and there is a layout-after references -# chaining atoms together. -# - ---- !mach-o -arch: armv7 -file-type: MH_OBJECT -flags: [ ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 2 - address: 0x0000000000000000 - content: [ 0x04, 0x10, 0x9F, 0xE5, 0x04, 0x20, 0x9F, 0xE5, - 0x1E, 0xFF, 0x2F, 0xE1, 0x78, 0x56, 0x34, 0x12, - 0x21, 0x43, 0x65, 0x87 ] -local-symbols: - - name: constants1 - type: N_SECT - sect: 1 - value: 0x000000000000000C - - name: constants2 - type: N_SECT - sect: 1 - value: 0x0000000000000010 -global-symbols: - - name: _foo - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -... - - -# CHECK:defined-atoms: -# CHECK: - name: _foo -# CHECK: scope: global -# CHECK: content: [ 04, 10, 9F, E5, 04, 20, 9F, E5, 1E, FF, 2F, E1 ] -# CHECK: dead-strip: never -# CHECK: references: -# CHECK: - kind: layout-after -# CHECK: offset: 0 -# CHECK: target: constants1 -# CHECK: - name: constants1 -# CHECK: content: [ 78, 56, 34, 12 ] -# CHECK: dead-strip: never -# CHECK: references: -# CHECK: - kind: layout-after -# CHECK: offset: 0 -# CHECK: target: constants2 -# CHECK: - name: constants2 -# CHECK: content: [ 21, 43, 65, 87 ] -# CHECK: dead-strip: never diff --git a/lld/test/mach-o/arm64-reloc-negDelta32-fixup.yaml b/lld/test/mach-o/arm64-reloc-negDelta32-fixup.yaml deleted file mode 100644 index 02200908d7ca..000000000000 --- a/lld/test/mach-o/arm64-reloc-negDelta32-fixup.yaml +++ /dev/null @@ -1,124 +0,0 @@ -# RUN: ld64.lld.darwinold -arch arm64 -r %s -o %t -# RUN: ld64.lld.darwinold -arch arm64 -r %t -o %t2 -# RUN: llvm-objdump -s --section="__eh_frame" %t | FileCheck %s -# RUN: llvm-objdump -s --section="__eh_frame" %t2 | FileCheck %s - -# The reference from FDE->CIE is implicitly created as a negDelta32. -# We don't emit these in to the binary as relocations, so we need to -# make sure that the offset in the FDE to the CIE is the correct value. -# CHECK: {{[0-9abcdef]*}} 10000000 00000000 017a5200 01781e01 -# CHECK: {{[0-9abcdef]*}} 100c1f00 20000000 18000000 b8ffffff -# Note, this one that matters ^~~~~~~~ -# It needs to be 0x18 as that is the offset back to 0 where the CIE is. -# CHECK: {{[0-9abcdef]*}} ffffffff 20000000 00000000 00480e10 -# CHECK: {{[0-9abcdef]*}} 9e019d02 00000000 - ---- !mach-o -arch: arm64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 4 - address: 0x0000000000000000 - content: [ 0xFD, 0x7B, 0xBF, 0xA9, 0xFD, 0x03, 0x00, 0x91, - 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x91, - 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x80, 0x52, - 0xFD, 0x7B, 0xC1, 0xA8, 0xC0, 0x03, 0x5F, 0xD6 ] - relocations: - - offset: 0x00000010 - type: ARM64_RELOC_BRANCH26 - length: 2 - pc-rel: true - extern: true - symbol: 6 - - offset: 0x0000000C - type: ARM64_RELOC_PAGEOFF12 - length: 2 - pc-rel: false - extern: true - symbol: 1 - - offset: 0x00000008 - type: ARM64_RELOC_PAGE21 - length: 2 - pc-rel: true - extern: true - symbol: 1 - - segment: __TEXT - section: __cstring - type: S_CSTRING_LITERALS - attributes: [ ] - address: 0x0000000000000020 - content: [ 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, - 0x72, 0x6C, 0x64, 0x00 ] - - segment: __LD - section: __compact_unwind - type: S_REGULAR - attributes: [ ] - alignment: 8 - address: 0x0000000000000030 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000000 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 1 - - segment: __TEXT - section: __eh_frame - type: S_COALESCED - attributes: [ ] - alignment: 8 - address: 0x0000000000000050 - content: [ 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x7A, 0x52, 0x00, 0x01, 0x78, 0x1E, 0x01, - 0x10, 0x0C, 0x1F, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x94, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x0E, 0x10, - 0x9E, 0x01, 0x9D, 0x02, 0x00, 0x00, 0x00, 0x00 ] -local-symbols: - - name: ltmp0 - type: N_SECT - sect: 1 - value: 0x0000000000000000 - - name: L_str - type: N_SECT - sect: 2 - value: 0x0000000000000020 - - name: ltmp1 - type: N_SECT - sect: 2 - value: 0x0000000000000020 - - name: ltmp2 - type: N_SECT - sect: 3 - value: 0x0000000000000030 - - name: ltmp3 - type: N_SECT - sect: 4 - value: 0x0000000000000050 -global-symbols: - - name: __Z3fooi - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _puts - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -page-size: 0x00000000 -... diff --git a/lld/test/mach-o/arm64-relocs-errors-delta64-offset.yaml b/lld/test/mach-o/arm64-relocs-errors-delta64-offset.yaml deleted file mode 100644 index d0f7389a4cbe..000000000000 --- a/lld/test/mach-o/arm64-relocs-errors-delta64-offset.yaml +++ /dev/null @@ -1,65 +0,0 @@ -# RUN: not ld64.lld.darwinold -arch arm64 %s -r \ -# RUN: 2> %t.err -# RUN: FileCheck %s < %t.err - - ---- !mach-o -arch: arm64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 4 - address: 0x0000000000000000 - content: [ 0xFF, 0x83, 0x00, 0xD1, 0xE0, 0x0B, 0x00, 0xF9, - 0x08, 0x00, 0x40, 0xB9, 0x08, 0x0D, 0x00, 0x71, - 0x08, 0x09, 0x00, 0x71, 0xE8, 0x0F, 0x00, 0xB9, - 0xC8, 0x00, 0x00, 0x54, 0x01, 0x00, 0x00, 0x14, - 0xE8, 0x03, 0x00, 0x32, 0x08, 0x01, 0x00, 0x12, - 0xE8, 0x7F, 0x00, 0x39, 0x02, 0x00, 0x00, 0x14 ] - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - alignment: 8 - address: 0x000000000001C348 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - -# Make sure that the offsets of the subtractor and unsigned both match. -# CHECK: bad relocation (paired relocs must have the same offset) in section __DATA/__data (r1_address=1, r1_type=1, r1_extern=1, r1_length=3, r1_pcrel=0, r1_symbolnum=1), (r2_address=0, r2_type=0, r2_extern=1, r2_length=3, r2_pcrel=0, r2_symbolnum=1) - - offset: 0x00000001 - type: ARM64_RELOC_SUBTRACTOR - length: 3 - pc-rel: false - extern: true - symbol: 1 - - offset: 0x00000000 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 1 -global-symbols: - - name: _f1 - type: N_SECT - sect: 2 - value: 0x000000000001C348 - - name: _f2 - type: N_SECT - sect: 1 - value: 0x0000000000000010 - - name: _f3 - type: N_SECT - sect: 1 - value: 0x0000000000000020 diff --git a/lld/test/mach-o/arm64-section-order.yaml b/lld/test/mach-o/arm64-section-order.yaml deleted file mode 100644 index e4174b64f67c..000000000000 --- a/lld/test/mach-o/arm64-section-order.yaml +++ /dev/null @@ -1,67 +0,0 @@ -# RUN: ld64.lld.darwinold -arch arm64 -r -print_atoms %s -o %t -# RUN: ld64.lld.darwinold -arch arm64 -r -print_atoms %t -o %t2 -# RUN: llvm-objdump --section-headers %t | FileCheck %s -# RUN: llvm-objdump --section-headers %t2 | FileCheck %s - -# Make sure that the sections are sorted. Currently we want this order: -# __text, __unwind_info - -# CHECK: Sections: -# CHECK: 0 __text {{.*}} TEXT -# CHECK: 1 __compact_unwind {{.*}} - - ---- !mach-o -arch: arm64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 8 - address: 0x0000000000000000 - content: [ 0xC0, 0x03, 0x5F, 0xD6, 0xC0, 0x03, 0x5F, 0xD6 ] - - segment: __LD - section: __compact_unwind - type: S_REGULAR - attributes: [ ] - alignment: 8 - address: 0x0000000000000008 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000020 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000000 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 1 -global-symbols: - - name: __Z3fooi - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: __Z4foo2i - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000004 diff --git a/lld/test/mach-o/bind-opcodes.yaml b/lld/test/mach-o/bind-opcodes.yaml deleted file mode 100644 index 4a33b54cb4e1..000000000000 --- a/lld/test/mach-o/bind-opcodes.yaml +++ /dev/null @@ -1,140 +0,0 @@ -# RUN: ld64.lld.darwinold -arch arm64 %s %p/Inputs/hello-world-arm64.yaml -o %t -# RUN: obj2yaml %t | FileCheck %s -# - ---- !mach-o -arch: arm64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 2 - address: 0x0000000000000000 - content: [ 0xFD, 0x7B, 0xBF, 0xA9, 0xFD, 0x03, 0x00, 0x91, - 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xF9, - 0x00, 0x01, 0x40, 0xF9, 0x01, 0x00, 0x00, 0x90, - 0x21, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00, 0x94, - 0x00, 0x00, 0x80, 0x52, 0xFD, 0x7B, 0xC1, 0xA8, - 0xC0, 0x03, 0x5F, 0xD6 ] - relocations: - - offset: 0x0000001C - type: ARM64_RELOC_BRANCH26 - length: 2 - pc-rel: true - extern: true - symbol: 5 - - offset: 0x00000018 - type: ARM64_RELOC_PAGEOFF12 - length: 2 - pc-rel: false - extern: true - symbol: 1 - - offset: 0x00000014 - type: ARM64_RELOC_PAGE21 - length: 2 - pc-rel: true - extern: true - symbol: 1 - - offset: 0x0000000C - type: ARM64_RELOC_GOT_LOAD_PAGEOFF12 - length: 2 - pc-rel: false - extern: true - symbol: 4 - - offset: 0x00000008 - type: ARM64_RELOC_GOT_LOAD_PAGE21 - length: 2 - pc-rel: true - extern: true - symbol: 4 - - segment: __TEXT - section: __cstring - type: S_CSTRING_LITERALS - attributes: [ ] - address: 0x000000000000002C - content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x00 ] -local-symbols: - - name: ltmp0 - type: N_SECT - sect: 1 - value: 0x0000000000000000 - - name: l_.str - type: N_SECT - sect: 2 - value: 0x000000000000002C - - name: ltmp1 - type: N_SECT - sect: 2 - value: 0x000000000000002C -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: ___stdoutp - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _fprintf - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - - -# CHECK: BindOpcodes: -# CHECK: - Opcode: BIND_OPCODE_SET_DYLIB_ORDINAL_IMM -# CHECK: Imm: 1 -# CHECK: Symbol: '' -# CHECK: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM -# CHECK: Imm: 0 -# CHECK: Symbol: dyld_stub_binder -# CHECK: - Opcode: BIND_OPCODE_SET_TYPE_IMM -# CHECK: Imm: 1 -# CHECK: Symbol: '' -# CHECK: - Opcode: BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB -# CHECK: Imm: 2 -# CHECK: ULEBExtraData: [ 0x0 ] -# CHECK: Symbol: '' -# CHECK: - Opcode: BIND_OPCODE_DO_BIND -# CHECK: Imm: 0 -# CHECK: Symbol: '' -# CHECK: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM -# CHECK: Imm: 0 -# CHECK: Symbol: ___stdoutp -# CHECK: - Opcode: BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB -# CHECK: Imm: 2 -# CHECK: ULEBExtraData: [ 0x10 ] -# CHECK: Symbol: '' -# CHECK: - Opcode: BIND_OPCODE_DO_BIND -# CHECK: Imm: 0 -# CHECK: Symbol: '' -# CHECK: - Opcode: BIND_OPCODE_DONE -# CHECK: Imm: 0 -# CHECK: Symbol: '' - -# CHECK: LazyBindOpcodes: -# CHECK: - Opcode: BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB -# CHECK: Imm: 2 -# CHECK: ULEBExtraData: [ 0x18 ] -# CHECK: Symbol: '' -# CHECK: - Opcode: BIND_OPCODE_SET_DYLIB_ORDINAL_IMM -# CHECK: Imm: 1 -# CHECK: Symbol: '' -# CHECK: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM -# CHECK: Imm: 0 -# CHECK: Symbol: _fprintf -# CHECK: - Opcode: BIND_OPCODE_DO_BIND -# CHECK: Imm: 0 -# CHECK: Symbol: '' -# CHECK: - Opcode: BIND_OPCODE_DONE -# CHECK: Imm: 0 -# CHECK: Symbol: '' -# CHECK: - Opcode: BIND_OPCODE_DONE -# CHECK: Imm: 0 -# CHECK: Symbol: '' \ No newline at end of file diff --git a/lld/test/mach-o/cstring-sections.yaml b/lld/test/mach-o/cstring-sections.yaml deleted file mode 100644 index 251df8e3587e..000000000000 --- a/lld/test/mach-o/cstring-sections.yaml +++ /dev/null @@ -1,65 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r %s -o %t -print_atoms | FileCheck %s -# -# Test -keep_private_externs in -r mode. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __objc_methname - type: S_CSTRING_LITERALS - attributes: [ ] - address: 0x0000000000000000 - content: [ 0x61, 0x62, 0x63, 0x00, 0x64, 0x65, 0x66, 0x00 ] - - segment: __TEXT - section: __objc_classname - type: S_CSTRING_LITERALS - attributes: [ ] - address: 0x0000000000000006 - content: [ 0x61, 0x62, 0x63, 0x00, 0x67, 0x68, 0x69, 0x00 ] - - segment: __TEXT - section: __cstring - type: S_CSTRING_LITERALS - attributes: [ ] - address: 0x000000000000000A - content: [ 0x61, 0x62, 0x63, 0x00, 0x6A, 0x6B, 0x6C, 0x00 ] - - -... - -# CHECK: defined-atoms: -# CHECK: - scope: hidden -# CHECK: type: c-string -# CHECK: content: [ 61, 62, 63, 00 ] -# CHECK: merge: by-content -# CHECK: section-choice: custom-required -# CHECK: section-name: '__TEXT/__objc_methname' -# CHECK: - scope: hidden -# CHECK: type: c-string -# CHECK: content: [ 64, 65, 66, 00 ] -# CHECK: merge: by-content -# CHECK: section-choice: custom-required -# CHECK: section-name: '__TEXT/__objc_methname' -# CHECK: - scope: hidden -# CHECK: type: c-string -# CHECK: content: [ 61, 62, 63, 00 ] -# CHECK: merge: by-content -# CHECK: section-choice: custom-required -# CHECK: section-name: '__TEXT/__objc_classname' -# CHECK: - scope: hidden -# CHECK: type: c-string -# CHECK: content: [ 67, 68, 69, 00 ] -# CHECK: merge: by-content -# CHECK: section-choice: custom-required -# CHECK: section-name: '__TEXT/__objc_classname' -# CHECK: - scope: hidden -# CHECK: type: c-string -# CHECK: content: [ 61, 62, 63, 00 ] -# CHECK: merge: by-content -# CHECK: - scope: hidden -# CHECK: type: c-string -# CHECK: content: [ 6A, 6B, 6C, 00 ] -# CHECK: merge: by-content diff --git a/lld/test/mach-o/data-in-code-load-command.yaml b/lld/test/mach-o/data-in-code-load-command.yaml deleted file mode 100644 index e2131783619a..000000000000 --- a/lld/test/mach-o/data-in-code-load-command.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml && llvm-objdump --private-headers %t | FileCheck %s -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -static -data_in_code_info && llvm-objdump --private-headers %t | FileCheck %s -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -no_data_in_code_info && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_DATA_IN_CODE_INFO -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -static -data_in_code_info -no_data_in_code_info && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_DATA_IN_CODE_INFO -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -static && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_DATA_IN_CODE_INFO -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -r && llvm-objdump --private-headers %t | FileCheck %s -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -r -data_in_code_info && llvm-objdump --private-headers %t | FileCheck %s -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -r -no_data_in_code_info && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_DATA_IN_CODE_INFO - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x00, 0x00, 0x00, 0x00 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -... - -# CHECK: Load command {{[0-9]*}} -# CHECK: cmd LC_DATA_IN_CODE -# CHECK: cmdsize 16 -# CHECK: dataoff -# CHECK: datasize - -# NO_DATA_IN_CODE_INFO-NOT: LC_DATA_IN_CODE diff --git a/lld/test/mach-o/data-only-dylib.yaml b/lld/test/mach-o/data-only-dylib.yaml deleted file mode 100644 index f865755e3c52..000000000000 --- a/lld/test/mach-o/data-only-dylib.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -dylib %s -o %t %p/Inputs/x86_64/libSystem.yaml -# RUN: llvm-nm %t | FileCheck %s -# -# Test that a data-only dylib can be built. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - alignment: 2 - address: 0x0000000000000000 - content: [ 0x00, 0x00, 0x00, 0x00 ] -global-symbols: - - name: _myData - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -... - -# CHECK: _myData diff --git a/lld/test/mach-o/dead-strip-globals.yaml b/lld/test/mach-o/dead-strip-globals.yaml deleted file mode 100644 index cacc44f4b93c..000000000000 --- a/lld/test/mach-o/dead-strip-globals.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -dead_strip -export_dynamic %s -dylib %p/Inputs/x86_64/libSystem.yaml -o %t.dylib -print_atoms | FileCheck -check-prefix=CHECK1 %s -# RUN: ld64.lld.darwinold -arch x86_64 -export_dynamic -dead_strip %s -dylib %p/Inputs/x86_64/libSystem.yaml -o %t.dylib -print_atoms | FileCheck -check-prefix=CHECK1 %s -# RUN: ld64.lld.darwinold -arch x86_64 -dead_strip %s -dylib %p/Inputs/x86_64/libSystem.yaml -o %t2.dylib -print_atoms | FileCheck -check-prefix=CHECK2 %s - -# RUN: ld64.lld.darwinold -arch x86_64 -r %s -dylib %p/Inputs/x86_64/libSystem.yaml -o %t3.o -# RUN: llvm-nm -m %t3.o | FileCheck -check-prefix=RELOCATABLE_SYMBOLS %s - -# -# Test that -export_dynamic -dead-strip from removing globals. -# - ---- -defined-atoms: - - name: def - scope: global - dead-strip: never - - name: dead - scope: global -shared-library-atoms: - - name: dyld_stub_binder - load-name: /usr/lib/libSystem.B.dylib - type: unknown -... - -# CHECK1: name: def -# CHECK1: name: dead - -# CHECK2: name: def -# CHECK2-NOT: name: dead - -# RELOCATABLE_SYMBOLS: external def diff --git a/lld/test/mach-o/debug-syms.yaml b/lld/test/mach-o/debug-syms.yaml deleted file mode 100644 index 901c2528fc72..000000000000 --- a/lld/test/mach-o/debug-syms.yaml +++ /dev/null @@ -1,249 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -o %t %s -dylib %p/Inputs/x86_64/libSystem.yaml && \ -# RUN: llvm-nm --no-sort --debug-syms %t | FileCheck %s - -# CHECK: 0000000000000000 - 00 0000 SO /Users/lhames/Projects/lld/lld-svn-tot/scratch/ -# CHECK-NEXT: 0000000000000000 - 00 0000 SO hw.c -# CHECK-NEXT: {{[0-9a-f]+}} - 03 0001 OSO {{.*}}{{/|\\}}test{{/|\\}}mach-o{{/|\\}}debug-syms.yaml -# CHECK-NEXT: 0000000000000fa0 - 01 0000 BNSYM -# CHECK-NEXT: 0000000000000fa0 - 01 0000 FUN _main -# CHECK-NEXT: 0000000000000016 - 00 0000 FUN -# CHECK-NEXT: 0000000000000016 - 01 0000 ENSYM -# CHECK-NEXT: 0000000000000000 - 01 0000 SO - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -min-os-version-kind: LC_VERSION_MIN_MACOSX -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 16 - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0xC7, 0x45, - 0xFC, 0x00, 0x00, 0x00, 0x00, 0x89, 0x7D, 0xF8, - 0x48, 0x89, 0x75, 0xF0, 0x5D, 0xC3 ] - - segment: __DWARF - section: __debug_str - type: S_REGULAR - attributes: [ S_ATTR_DEBUG ] - address: 0x0000000000000016 - content: [ 0x41, 0x70, 0x70, 0x6C, 0x65, 0x20, 0x4C, 0x4C, - 0x56, 0x4D, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, - 0x6F, 0x6E, 0x20, 0x38, 0x2E, 0x30, 0x2E, 0x30, - 0x20, 0x28, 0x63, 0x6C, 0x61, 0x6E, 0x67, 0x2D, - 0x38, 0x30, 0x30, 0x2E, 0x30, 0x2E, 0x32, 0x34, - 0x2E, 0x31, 0x29, 0x00, 0x68, 0x77, 0x2E, 0x63, - 0x00, 0x2F, 0x55, 0x73, 0x65, 0x72, 0x73, 0x2F, - 0x6C, 0x68, 0x61, 0x6D, 0x65, 0x73, 0x2F, 0x50, - 0x72, 0x6F, 0x6A, 0x65, 0x63, 0x74, 0x73, 0x2F, - 0x6C, 0x6C, 0x64, 0x2F, 0x6C, 0x6C, 0x64, 0x2D, - 0x73, 0x76, 0x6E, 0x2D, 0x74, 0x6F, 0x74, 0x2F, - 0x73, 0x63, 0x72, 0x61, 0x74, 0x63, 0x68, 0x00, - 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x69, 0x6E, 0x74, - 0x00, 0x61, 0x72, 0x67, 0x63, 0x00, 0x61, 0x72, - 0x67, 0x76, 0x00, 0x63, 0x68, 0x61, 0x72, 0x00 ] - - segment: __DWARF - section: __debug_loc - type: S_REGULAR - attributes: [ S_ATTR_DEBUG ] - address: 0x000000000000008E - - segment: __DWARF - section: __debug_abbrev - type: S_REGULAR - attributes: [ S_ATTR_DEBUG ] - address: 0x000000000000008E - content: [ 0x01, 0x11, 0x01, 0x25, 0x0E, 0x13, 0x05, 0x03, - 0x0E, 0x10, 0x06, 0x1B, 0x0E, 0x11, 0x01, 0x12, - 0x01, 0x00, 0x00, 0x02, 0x2E, 0x01, 0x11, 0x01, - 0x12, 0x01, 0x40, 0x0A, 0x03, 0x0E, 0x3A, 0x0B, - 0x3B, 0x0B, 0x27, 0x0C, 0x49, 0x13, 0x3F, 0x0C, - 0x00, 0x00, 0x03, 0x05, 0x00, 0x02, 0x0A, 0x03, - 0x0E, 0x3A, 0x0B, 0x3B, 0x0B, 0x49, 0x13, 0x00, - 0x00, 0x04, 0x24, 0x00, 0x03, 0x0E, 0x3E, 0x0B, - 0x0B, 0x0B, 0x00, 0x00, 0x05, 0x0F, 0x00, 0x49, - 0x13, 0x00, 0x00, 0x00 ] - - segment: __DWARF - section: __debug_info - type: S_REGULAR - attributes: [ S_ATTR_DEBUG ] - address: 0x00000000000000DA - content: [ 0x7F, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, - 0x56, 0x60, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, - 0x6A, 0x00, 0x00, 0x00, 0x01, 0x03, 0x02, 0x91, - 0x78, 0x69, 0x00, 0x00, 0x00, 0x01, 0x01, 0x6A, - 0x00, 0x00, 0x00, 0x03, 0x02, 0x91, 0x70, 0x6E, - 0x00, 0x00, 0x00, 0x01, 0x01, 0x71, 0x00, 0x00, - 0x00, 0x00, 0x04, 0x65, 0x00, 0x00, 0x00, 0x05, - 0x04, 0x05, 0x76, 0x00, 0x00, 0x00, 0x05, 0x7B, - 0x00, 0x00, 0x00, 0x04, 0x73, 0x00, 0x00, 0x00, - 0x06, 0x01, 0x00 ] - relocations: - - offset: 0x00000037 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x0000002F - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000026 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x0000001E - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 1 - - segment: __DWARF - section: __debug_ranges - type: S_REGULAR - attributes: [ S_ATTR_DEBUG ] - address: 0x000000000000015D - - segment: __DWARF - section: __debug_macinfo - type: S_REGULAR - attributes: [ S_ATTR_DEBUG ] - address: 0x000000000000015D - content: [ 0x00 ] - - segment: __DWARF - section: __apple_names - type: S_REGULAR - attributes: [ S_ATTR_DEBUG ] - address: 0x000000000000015E - content: [ 0x48, 0x53, 0x41, 0x48, 0x01, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x06, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x6A, 0x7F, 0x9A, 0x7C, - 0x2C, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00 ] - - segment: __DWARF - section: __apple_objc - type: S_REGULAR - attributes: [ S_ATTR_DEBUG ] - address: 0x000000000000019A - content: [ 0x48, 0x53, 0x41, 0x48, 0x01, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x06, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF ] - - segment: __DWARF - section: __apple_namespac - type: S_REGULAR - attributes: [ S_ATTR_DEBUG ] - address: 0x00000000000001BE - content: [ 0x48, 0x53, 0x41, 0x48, 0x01, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x06, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF ] - - segment: __DWARF - section: __apple_types - type: S_REGULAR - attributes: [ S_ATTR_DEBUG ] - address: 0x00000000000001E2 - content: [ 0x48, 0x53, 0x41, 0x48, 0x01, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x06, 0x00, - 0x03, 0x00, 0x05, 0x00, 0x04, 0x00, 0x0B, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x30, 0x80, 0x88, 0x0B, 0x63, 0x20, 0x95, 0x7C, - 0x40, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, - 0x65, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x6A, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x7B, 0x00, 0x00, 0x00, 0x24, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - - segment: __DWARF - section: __apple_exttypes - type: S_REGULAR - attributes: [ S_ATTR_DEBUG ] - address: 0x0000000000000248 - content: [ 0x48, 0x53, 0x41, 0x48, 0x01, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x06, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF ] - - segment: __LD - section: __compact_unwind - type: S_REGULAR - attributes: [ S_ATTR_DEBUG ] - alignment: 8 - address: 0x0000000000000270 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000000 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 1 - - segment: __TEXT - section: __eh_frame - type: S_COALESCED - attributes: [ ] - alignment: 8 - address: 0x0000000000000290 - content: [ 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x7A, 0x52, 0x00, 0x01, 0x78, 0x10, 0x01, - 0x10, 0x0C, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, - 0x50, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x41, 0x0E, 0x10, 0x86, 0x02, 0x43, 0x0D, - 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - - segment: __DWARF - section: __debug_line - type: S_REGULAR - attributes: [ S_ATTR_DEBUG ] - address: 0x00000000000002D0 - content: [ 0x37, 0x00, 0x00, 0x00, 0x02, 0x00, 0x1B, 0x00, - 0x00, 0x00, 0x01, 0x01, 0xFB, 0x0E, 0x0D, 0x00, - 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x01, 0x00, 0x68, 0x77, 0x2E, 0x63, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x02, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x05, 0x03, 0x0A, 0x08, 0x3D, 0x02, 0x02, - 0x00, 0x01, 0x01 ] - relocations: - - offset: 0x00000028 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 1 -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -page-size: 0x00000000 -... diff --git a/lld/test/mach-o/demangle.yaml b/lld/test/mach-o/demangle.yaml deleted file mode 100644 index 2f1cba527f26..000000000000 --- a/lld/test/mach-o/demangle.yaml +++ /dev/null @@ -1,74 +0,0 @@ -# REQUIRES: system-linker-mach-o -# -# RUN: not ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s \ -# RUN: -dylib -o %t %p/Inputs/x86_64/libSystem.yaml 2> %t.err -# RUN: FileCheck %s < %t.err -# -# RUN: not ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s \ -# RUN: -dylib -o %t %p/Inputs/x86_64/libSystem.yaml -demangle 2> %t.err2 -# RUN: FileCheck %s --check-prefix=DCHECK < %t.err2 -# -# Test -demangle option works on undefined symbol errors. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0xE8, 0x00, 0x00, 0x00, 0x00, 0xE8, 0x00, 0x00, - 0x00, 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x0000000B - type: X86_64_RELOC_BRANCH - length: 2 - pc-rel: true - extern: true - symbol: 2 - - offset: 0x00000006 - type: X86_64_RELOC_BRANCH - length: 2 - pc-rel: true - extern: true - symbol: 3 - - offset: 0x00000001 - type: X86_64_RELOC_BRANCH - length: 2 - pc-rel: true - extern: true - symbol: 1 -global-symbols: - - name: __Z1xv - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: __Znam - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: __Znotcpp - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _foo - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - -... - -# CHECK: __Znotcpp -# CHECK: __Znam -# CHECK: _foo - -# DCHECK: __Znotcpp -# DCHECK: operator new[](unsigned long) -# DCHECK: _foo - diff --git a/lld/test/mach-o/dependency_info.yaml b/lld/test/mach-o/dependency_info.yaml deleted file mode 100644 index 1195c9e91967..000000000000 --- a/lld/test/mach-o/dependency_info.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# Test -dependency_info option -# -# RUN: ld64.lld.darwinold -arch x86_64 -test_file_usage \ -# RUN: -dependency_info %t.info \ -# RUN: -path_exists /System/Library/Frameworks \ -# RUN: -path_exists /System/Library/Frameworks/Foo.framework/Foo \ -# RUN: -path_exists /Custom/Frameworks \ -# RUN: -path_exists /Custom/Frameworks/Bar.framework/Bar \ -# RUN: -F/Custom/Frameworks \ -# RUN: -framework Bar \ -# RUN: -framework Foo -# RUN: %python %p/Inputs/DependencyDump.py %t.info | FileCheck %s - - -# CHECK: linker-vers: lld -# CHECK: input-file: /Custom/Frameworks{{[/\\]}}Bar.framework{{[/\\]}}Bar -# CHECK: not-found: /Custom/Frameworks{{[/\\]}}Foo.framework{{[/\\]}}Foo -# CHECK: input-file: /System/Library/Frameworks{{[/\\]}}Foo.framework{{[/\\]}}Foo -# CHECK: output-file: a.out diff --git a/lld/test/mach-o/do-not-emit-unwind-fde-arm64.yaml b/lld/test/mach-o/do-not-emit-unwind-fde-arm64.yaml deleted file mode 100644 index 0cb3655d49a1..000000000000 --- a/lld/test/mach-o/do-not-emit-unwind-fde-arm64.yaml +++ /dev/null @@ -1,208 +0,0 @@ -# RUN: ld64.lld.darwinold -arch arm64 -r -print_atoms %s -o %t | FileCheck %s -# RUN: ld64.lld.darwinold -arch arm64 -r -print_atoms %t -o %t2 | FileCheck %s -# RUN: llvm-objdump -r -s --section="__eh_frame" --macho %t | FileCheck --check-prefix=CODE %s -# RUN: llvm-objdump -r -s --section="__eh_frame" --macho %t2 | FileCheck --check-prefix=CODE %s - - ---- !mach-o -arch: arm64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 4 - address: 0x0000000000000000 - content: [ 0xFD, 0x7B, 0xBF, 0xA9, 0xFD, 0x03, 0x00, 0x91, - 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x91, - 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x80, 0x52, - 0xFD, 0x7B, 0xC1, 0xA8, 0xC0, 0x03, 0x5F, 0xD6 ] - relocations: - - offset: 0x00000010 - type: ARM64_RELOC_BRANCH26 - length: 2 - pc-rel: true - extern: true - symbol: 9 - - offset: 0x0000000C - type: ARM64_RELOC_PAGEOFF12 - length: 2 - pc-rel: false - extern: true - symbol: 1 - - offset: 0x00000008 - type: ARM64_RELOC_PAGE21 - length: 2 - pc-rel: true - extern: true - symbol: 1 - - segment: __TEXT - section: __cstring - type: S_CSTRING_LITERALS - attributes: [ ] - address: 0x0000000000000020 - content: [ 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, - 0x72, 0x6C, 0x64, 0x00 ] - - segment: __LD - section: __compact_unwind - type: S_REGULAR - attributes: [ ] - alignment: 8 - address: 0x0000000000000030 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000000 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 1 - - segment: __TEXT - section: __eh_frame - type: S_COALESCED - attributes: [ ] - alignment: 8 - address: 0x0000000000000050 - content: [ 0x1C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x7A, 0x50, 0x4C, 0x52, 0x00, 0x01, 0x78, - 0x1E, 0x07, 0x00, 0x9D, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0x00, 0x10, 0x0C, 0x1F, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x88, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x08, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x48, 0x0E, 0x10, 0x9E, 0x01, 0x9D, 0x02 ] - - segment: __TEXT - section: __gcc_except_tab - type: S_REGULAR - attributes: [ ] - address: 0x00000000000000A0 - content: [ 0x00, 0x00, 0x00, 0x00 ] -local-symbols: - - name: ltmp0 - type: N_SECT - sect: 1 - value: 0x0000000000000000 - - name: L_str - type: N_SECT - sect: 2 - value: 0x0000000000000020 - - name: ltmp1 - type: N_SECT - sect: 2 - value: 0x0000000000000020 - - name: ltmp2 - type: N_SECT - sect: 3 - value: 0x0000000000000030 - - name: ltmp3 - type: N_SECT - sect: 4 - value: 0x0000000000000050 - - name: ltmp4 - type: N_SECT - sect: 4 - value: 0x0000000000000070 -global-symbols: - - name: __Z3fooi - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: __gxx_personality_v0 - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _bar - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _puts - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -page-size: 0x00000000 - -# CHECK: defined-atoms: -# CHECK: - ref-name: L{{[0-9]*}} -# CHECK: scope: hidden -# CHECK: type: c-string -# CHECK: content: [ 48, 65, 6C, 6C, 6F, 20, 77, 6F, 72, 6C, 64, 00 ] -# CHECK: merge: by-content -# CHECK: - ref-name: L{{[0-9]*}} -# CHECK: type: unwind-cfi -# CHECK: content: [ 1C, 00, 00, 00, 00, 00, 00, 00, 01, 7A, 50, 4C, -# CHECK: 52, 00, 01, 78, 1E, 07, 00, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, -# CHECK: {{..}}, {{..}}, {{..}}, 00, 10, 0C, 1F, 00 ] -# CHECK: - type: unwind-cfi -# CHECK: content: [ 24, 00, 00, 00, 24, 00, 00, 00, {{..}}, {{..}}, {{..}}, {{..}}, -# CHECK: {{..}}, {{..}}, {{..}}, {{..}}, 20, 00, 00, 00, 00, 00, 00, 00, -# CHECK: 08, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, 48, 0E, 10, -# CHECK: 9E, 01, 9D, 02 ] -# CHECK: references: -# CHECK: - kind: negDelta32 -# CHECK: offset: 4 -# CHECK: target: L{{[0-9]*}} -# CHECK: - kind: unwindFDEToFunction -# CHECK: offset: 8 -# CHECK: target: __Z3fooi -# CHECK: - kind: unwindFDEToFunction -# CHECK: offset: 25 -# CHECK: target: L{{[0-9]*}} -# CHECK: - ref-name: L{{[0-9]*}} -# CHECK: type: unwind-lsda -# CHECK: content: [ 00, 00, 00, 00 ] -# CHECK: - type: compact-unwind -# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00, 20, 00, 00, 00, -# CHECK: 00, 00, 00, 03, 00, 00, 00, 00, 00, 00, 00, 00, -# CHECK: 00, 00, 00, 00, 00, 00, 00, 00 ] -# CHECK: alignment: 8 -# CHECK: references: -# CHECK: - kind: pointer64 -# CHECK: offset: 0 -# CHECK: target: __Z3fooi -# CHECK: - name: __Z3fooi -# CHECK: scope: global -# CHECK: content: [ FD, 7B, BF, A9, FD, 03, 00, 91, 00, 00, 00, 90, -# CHECK: 00, 00, 00, 91, 00, 00, 00, 94, 00, 00, 80, 52, -# CHECK: FD, 7B, C1, A8, C0, 03, 5F, D6 ] -# CHECK: alignment: 4 -# CHECK: references: -# CHECK: - kind: page21 -# CHECK: offset: 8 -# CHECK: target: L{{[0-9]*}} -# CHECK: - kind: offset12 -# CHECK: offset: 12 -# CHECK: target: L{{[0-9]*}} -# CHECK: - kind: branch26 -# CHECK: offset: 16 -# CHECK: target: _puts - -# Make sure we don't have any relocations in the __eh_frame section -# CODE-NOT: RELOCATION RECORDS FOR [__eh_frame] - -# Also make sure the reloc for the FDE->function is the correct offset -# It should be the offset from the fixup location back to the address -# of the function we are referencing -# CODE: Contents of section __TEXT,__eh_frame: -# This is the CIE: -# CODE-NEXT: {{[0-9abcdef]*}} 1c000000 00000000 017a504c 52000178 -# CODE-NEXT: {{[0-9abcdef]*}} 1e0700bd ffffffff ffffff00 100c1f00 -# This is the FDE: -# CODE-NEXT: {{[0-9abcdef]*}} 24000000 24000000 a8ffffff ffffffff -# This is the important offset for FDE->func ^~~~~~~~ ~~~~~~~~ - -# CODE-NEXT: {{[0-9abcdef]*}} 20000000 00000000 08c3ffff ffffffff -# And this is the offset for FDE->lsda ^~~~~~~~ ~~~~~~ -# CODE-NEXT: {{[0-9abcdef]*}} ff480e10 9e019d02 -# And this byte ^~ diff --git a/lld/test/mach-o/dso_handle.yaml b/lld/test/mach-o/dso_handle.yaml deleted file mode 100644 index f35f8b1cd6f8..000000000000 --- a/lld/test/mach-o/dso_handle.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/x86_64/libSystem.yaml -o %t1 -# RUN: llvm-nm -m -n %t1 | FileCheck %s -# -# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/x86_64/libSystem.yaml -dead_strip -o %t2 -# RUN: llvm-nm -m -n %t2 | FileCheck %s -# -# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/x86_64/libSystem.yaml -dylib -o %t3 -# RUN: llvm-nm -m -n %t3 | FileCheck %s -# -# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/x86_64/libSystem.yaml -bundle -o %t4 -# RUN: llvm-nm -m -n %t4 | FileCheck %s -# -# Test that ___dso_handle symbol is available for executables, bundles, and dylibs -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0x5D, 0xC3 ] - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - alignment: 8 - address: 0x0000000000000008 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000000 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 2 -global-symbols: - - name: _d - type: N_SECT - scope: [ N_EXT ] - sect: 2 - value: 0x0000000000000008 - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: ___dso_handle - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - -... - -# CHECK_NOT: ___dso_handle -# CHECK: _main diff --git a/lld/test/mach-o/dylib-install-names.yaml b/lld/test/mach-o/dylib-install-names.yaml deleted file mode 100644 index 869b19bdab17..000000000000 --- a/lld/test/mach-o/dylib-install-names.yaml +++ /dev/null @@ -1,74 +0,0 @@ -# Check we accept -install_name correctly: -# RUN: ld64.lld.darwinold -arch x86_64 -install_name libwibble.dylib -dylib \ -# RUN: -compatibility_version 2.0 -current_version 5.3 \ -# RUN: %p/Inputs/x86_64/libSystem.yaml %s -o %t.dylib -# RUN: llvm-objdump --private-headers %t.dylib | FileCheck %s --check-prefix=CHECK-BINARY-WRITE - -# Check we read LC_ID_DYLIB correctly: -# RUN: ld64.lld.darwinold -arch x86_64 %p/Inputs/use-dylib-install-names.yaml \ -# RUN: %p/Inputs/x86_64/libSystem.yaml %t.dylib -dylib -o %t2.dylib -# RUN: llvm-objdump --private-headers %t2.dylib | FileCheck %s --check-prefix=CHECK-BINARY-READ - -# Check we default the install-name to the output file: -# RUN: ld64.lld.darwinold -arch x86_64 -dylib %s -o libwibble.dylib \ -# RUN: -compatibility_version 2.0 -current_version 5.3 \ -# RUN: %p/Inputs/x86_64/libSystem.yaml -# RUN: llvm-objdump --private-headers libwibble.dylib | FileCheck %s --check-prefix=CHECK-BINARY-WRITE -# RUN: rm -f libwibble.dylib - -# Check -single_module does nothing -# RUN: ld64.lld.darwinold -arch x86_64 -dylib %s -install_name libwibble.dylib \ -# RUN: -compatibility_version 2.0 -current_version 5.3 \ -# RUN: -single_module -o %t2.dylib %p/Inputs/x86_64/libSystem.yaml -# RUN: llvm-objdump --private-headers %t2.dylib | FileCheck %s --check-prefix=CHECK-BINARY-WRITE - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 4 - address: 0x0000000000000000 - content: [ 0xCC, 0xC3, 0x90, 0xC3, 0x90, 0x90, 0xC3, 0x90, - 0x90, 0x90, 0xC3, 0x90, 0x90, 0x90, 0x90, 0xC3, - 0x31, 0xC0, 0xC3 ] -local-symbols: - - name: _myStatic - type: N_SECT - sect: 1 - value: 0x000000000000000B -global-symbols: - - name: _myGlobal - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000001 -... - - -# CHECK-BINARY-WRITE: cmd LC_ID_DYLIB -# CHECK-BINARY-WRITE-NEXT: cmdsize 40 -# CHECK-BINARY-WRITE-NEXT: name libwibble.dylib (offset 24) -# CHECK-BINARY-WRITE-NEXT: time stamp 1 -# CHECK-BINARY-WRITE-NEXT: current version 5.3.0 -# CHECK-BINARY-WRITE-NEXT: compatibility version 2.0.0 - -# CHECK-BINARY-READ: cmd LC_LOAD_DYLIB -# CHECK-BINARY-READ-NEXT: cmdsize 56 -# CHECK-BINARY-READ-NEXT: name /usr/lib/libSystem.B.dylib (offset 24) -# CHECK-BINARY-READ-NEXT: time stamp 2 -# CHECK-BINARY-READ-NEXT: current version 1.0.0 -# CHECK-BINARY-READ-NEXT: compatibility version 1.0.0 - -# CHECK-BINARY-READ: cmd LC_LOAD_DYLIB -# CHECK-BINARY-READ-NEXT: cmdsize 40 -# CHECK-BINARY-READ-NEXT: name libwibble.dylib (offset 24) -# CHECK-BINARY-READ-NEXT: time stamp 2 -# CHECK-BINARY-READ-NEXT: current version 5.3.0 -# CHECK-BINARY-READ-NEXT: compatibility version 2.0.0 diff --git a/lld/test/mach-o/eh-frame-relocs-arm64.yaml b/lld/test/mach-o/eh-frame-relocs-arm64.yaml deleted file mode 100644 index 3d7245e5d114..000000000000 --- a/lld/test/mach-o/eh-frame-relocs-arm64.yaml +++ /dev/null @@ -1,318 +0,0 @@ -# RUN: ld64.lld.darwinold -arch arm64 -r -print_atoms %s -o %t | FileCheck %s -# RUN: ld64.lld.darwinold -arch arm64 -r -print_atoms %t -o %t2 | FileCheck %s -# RUN: llvm-objdump -r -s --section="__eh_frame" --macho %t | FileCheck --check-prefix=CODE %s -# RUN: llvm-objdump -r -s --section="__eh_frame" --macho %t2 | FileCheck --check-prefix=CODE %s - - ---- !mach-o -arch: arm64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 4 - address: 0x0000000000000000 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xC0, 0x03, 0x5F, 0xD6, 0xC0, 0x03, 0x5F, 0xD6, - 0xC0, 0x03, 0x5F, 0xD6 ] - - segment: __TEXT - section: __gcc_except_tab - type: S_REGULAR - attributes: [ ] - address: 0x0000000000000014 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - address: 0x000000000000001C - content: [ 0x00, 0x00, 0x00, 0x00 ] - - segment: __LD - section: __compact_unwind - type: S_REGULAR - attributes: [ ] - alignment: 8 - address: 0x0000000000000020 - content: [ 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000020 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000000 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 1 - - segment: __TEXT - section: __eh_frame - type: S_COALESCED - attributes: [ ] - alignment: 8 - address: 0x0000000000000060 - content: [ 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x03, 0x7A, 0x50, 0x4C, 0x52, 0x00, 0x01, 0x78, - 0x1E, 0x07, 0x9B, 0xED, 0xFF, 0xFF, 0xFF, 0x10, - 0x10, 0x0C, 0x1F, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0xDC, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x08, 0xCB, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0E, 0x10, 0x9E, - 0x01, 0x9D, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x03, 0x7A, 0x50, 0x4C, 0x52, 0x00, 0x01, 0x78, - 0x1E, 0x07, 0x9B, 0xA9, 0xFF, 0xFF, 0xFF, 0x10, - 0x10, 0x0C, 0x1F, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x94, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x08, 0x83, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0E, 0x10, 0x9E, - 0x01, 0x9D, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x0000007D - type: ARM64_RELOC_SUBTRACTOR - length: 3 - pc-rel: false - extern: true - symbol: 6 - - offset: 0x0000007D - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 3 - - offset: 0x0000006C - type: ARM64_RELOC_SUBTRACTOR - length: 3 - pc-rel: false - extern: true - symbol: 6 - - offset: 0x0000006C - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 8 - - offset: 0x0000005B - type: ARM64_RELOC_POINTER_TO_GOT - length: 2 - pc-rel: true - extern: true - symbol: 10 - - offset: 0x00000035 - type: ARM64_RELOC_SUBTRACTOR - length: 3 - pc-rel: false - extern: true - symbol: 6 - - offset: 0x00000035 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000024 - type: ARM64_RELOC_SUBTRACTOR - length: 3 - pc-rel: false - extern: true - symbol: 6 - - offset: 0x00000024 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 7 - - offset: 0x00000013 - type: ARM64_RELOC_POINTER_TO_GOT - length: 2 - pc-rel: true - extern: true - symbol: 9 -local-symbols: - - name: ltmp0 - type: N_SECT - sect: 1 - value: 0x0000000000000000 - - name: ltmp1 - type: N_SECT - sect: 2 - value: 0x0000000000000014 - - name: _bar1 - type: N_SECT - sect: 2 - value: 0x0000000000000014 - - name: _bar2 - type: N_SECT - sect: 2 - value: 0x0000000000000018 - - name: ltmp12 - type: N_SECT - sect: 3 - value: 0x000000000000001C - - name: ltmp13 - type: N_SECT - sect: 4 - value: 0x0000000000000020 - - name: ltmp16 - type: N_SECT - sect: 5 - value: 0x0000000000000060 -global-symbols: - - name: __Z3fooi - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000008 - - name: __Z4foo2i - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x000000000000000C - - name: __gxx_personality_v0 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: __gxx_personality_v1 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000004 - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000010 - - name: _someData - type: N_SECT - scope: [ N_EXT ] - sect: 3 - value: 0x000000000000001C -page-size: 0x00000000 -... - -# CHECK: --- !native -# CHECK: path: '' -# CHECK: defined-atoms: -# CHECK: - ref-name: L000 -# CHECK: type: unwind-cfi -# CHECK: content: [ 18, 00, 00, 00, 00, 00, 00, 00, 03, 7A, 50, 4C, -# CHECK: 52, 00, 01, 78, 1E, 07, 9B, {{..}}, {{..}}, {{..}}, {{..}}, 10, -# CHECK: 10, 0C, 1F, 00 ] -# CHECK: alignment: 8 -# CHECK: references: -# CHECK: - kind: unwindCIEToPersonalityFunction -# CHECK: offset: 19 -# CHECK: target: __gxx_personality_v0 -# CHECK: - type: unwind-cfi -# CHECK: content: [ 28, 00, 00, 00, 20, 00, 00, 00, {{..}}, {{..}}, {{..}}, {{..}}, -# CHECK: {{..}}, {{..}}, {{..}}, {{..}}, 04, 00, 00, 00, 00, 00, 00, 00, -# CHECK: 08, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, 0E, 10, 9E, -# CHECK: 01, 9D, 02, 00, 00, 00, 00, 00 ] -# CHECK: alignment: 4 mod 8 -# CHECK: references: -# CHECK: - kind: negDelta32 -# CHECK: offset: 4 -# CHECK: target: L000 -# CHECK: - kind: unwindFDEToFunction -# CHECK: offset: 8 -# CHECK: target: __Z3fooi -# CHECK: - kind: unwindFDEToFunction -# CHECK: offset: 25 -# CHECK: target: _bar1 -# CHECK: - ref-name: L001 -# CHECK: type: unwind-cfi -# CHECK: content: [ 18, 00, 00, 00, 00, 00, 00, 00, 03, 7A, 50, 4C, -# CHECK: 52, 00, 01, 78, 1E, 07, 9B, {{..}}, {{..}}, {{..}}, {{..}}, 10, -# CHECK: 10, 0C, 1F, 00 ] -# CHECK: alignment: 8 -# CHECK: references: -# CHECK: - kind: unwindCIEToPersonalityFunction -# CHECK: offset: 19 -# CHECK: target: __gxx_personality_v1 -# CHECK: - type: unwind-cfi -# CHECK: content: [ 28, 00, 00, 00, 20, 00, 00, 00, {{..}}, {{..}}, {{..}}, {{..}}, -# CHECK: {{..}}, {{..}}, {{..}}, {{..}}, 04, 00, 00, 00, 00, 00, 00, 00, -# CHECK: 08, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, 0E, 10, 9E, -# CHECK: 01, 9D, 02, 00, 00, 00, 00, 00 ] -# CHECK: alignment: 4 mod 8 -# CHECK: references: -# CHECK: - kind: negDelta32 -# CHECK: offset: 4 -# CHECK: target: L001 -# CHECK: - kind: unwindFDEToFunction -# CHECK: offset: 8 -# CHECK: target: __Z4foo2i -# CHECK: - kind: unwindFDEToFunction -# CHECK: offset: 25 -# CHECK: target: _bar2 -# CHECK: - name: _bar1 -# CHECK: type: unwind-lsda -# CHECK: content: [ 00, 00, 00, 00 ] -# CHECK: - name: _bar2 -# CHECK: type: unwind-lsda -# CHECK: content: [ 00, 00, 00, 00 ] -# CHECK: - name: _someData -# CHECK: scope: global -# CHECK: type: data -# CHECK: content: [ 00, 00, 00, 00 ] -# CHECK: - name: __gxx_personality_v0 -# CHECK: scope: global -# CHECK: content: [ 00, 00, 00, 00 ] -# CHECK: alignment: 4 -# CHECK: - name: __gxx_personality_v1 -# CHECK: scope: global -# CHECK: content: [ 00, 00, 00, 00 ] -# CHECK: alignment: 4 -# CHECK: - name: __Z3fooi -# CHECK: scope: global -# CHECK: content: [ C0, 03, 5F, D6 ] -# CHECK: alignment: 4 -# CHECK: - name: __Z4foo2i -# CHECK: scope: global -# CHECK: content: [ C0, 03, 5F, D6 ] -# CHECK: alignment: 4 -# CHECK: - name: _main -# CHECK: scope: global -# CHECK: content: [ C0, 03, 5F, D6 ] -# CHECK: alignment: 4 -# CHECK: ... - -# # Make sure we don't have any relocations in the __eh_frame section -# CODE-NOT: RELOCATION RECORDS FOR [__eh_frame] - -# Also make sure the reloc for the CIE->personality function is the -# correct offset -# It should be the offset from the fixup location back to the address -# of the function we are referencing -# CODE: Contents of section __TEXT,__eh_frame: -# This is the CIE: -# CODE-NEXT: {{[0-9abcdef]*}} 18000000 00000000 037a504c 52000178 -# CODE-NEXT: {{[0-9abcdef]*}} 1e079bd1 ffffff10 100c1f00 28000000 -# This is the important offset for CIE->pfunc -# ^~~~~~~~~ -# Then we have an FDE starting from 28000000 above -# CODE-NEXT: {{[0-9abcdef]*}} 20000000 c8ffffff ffffffff 04000000 -# CODE-NEXT: {{[0-9abcdef]*}} 00000000 08c3ffff ffffffff ff0e109e -# And a new CIE starts at this 00000018 right below here -# CODE-NEXT: {{[0-9abcdef]*}} 019d0200 00000000 18000000 00000000 -# CODE-NEXT: {{[0-9abcdef]*}} 037a504c 52000178 1e079b8d ffffff10 -# This is the important offset for its CIE->pfunc ^~~~~~~~~ diff --git a/lld/test/mach-o/empty-sections.yaml b/lld/test/mach-o/empty-sections.yaml deleted file mode 100644 index 83cd97aeac39..000000000000 --- a/lld/test/mach-o/empty-sections.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r %s -o %t -# -# Test that writing empty mach-o sections does not segfault the linker. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -... diff --git a/lld/test/mach-o/error-simulator-vs-macosx.yaml b/lld/test/mach-o/error-simulator-vs-macosx.yaml deleted file mode 100644 index 94b73d6c5334..000000000000 --- a/lld/test/mach-o/error-simulator-vs-macosx.yaml +++ /dev/null @@ -1,30 +0,0 @@ -# RUN: ld64.lld.darwinold -arch i386 -macosx_version_min 10.8 %s %p/Inputs/hello-world-x86.yaml -o %t && llvm-nm -m %t | FileCheck %s -# RUN: not ld64.lld.darwinold -arch i386 -ios_simulator_version_min 5.0 %s %p/Inputs/hello-world-x86.yaml -o %t 2>&1 | FileCheck %s --check-prefix=ERROR -# -# Test that i386 can link with a macos version but gives an error with a simulator version. -# - ---- !mach-o -arch: x86 -OS: Mac OS X -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x90 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -... - -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main -# CHECK: (undefined) external dyld_stub_binder (from libSystem) - -# ERROR: cannot be linked due to incompatible operating systems diff --git a/lld/test/mach-o/exe-offsets.yaml b/lld/test/mach-o/exe-offsets.yaml deleted file mode 100644 index 65025febf4e4..000000000000 --- a/lld/test/mach-o/exe-offsets.yaml +++ /dev/null @@ -1,45 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %s -o %t -e start %p/Inputs/x86_64/libSystem.yaml -# RUN: llvm-readobj --sections %t | FileCheck %s - -# Make sure data gets put at offset - ---- !native -defined-atoms: - - name: start - scope: global - content: [ 90 ] - - - name: _s1 - type: data - content: [ 31, 32, 33, 34 ] - - - name: _s2 - type: zero-fill - size: 8192 - - - name: _s3 - type: zero-fill - size: 100 - - - name: _s4 - type: data - content: [ 01 ] - - -# CHECK-LABEL: Section { -# CHECK: Name: __text -# CHECK: Segment: __TEXT -# CHECK: Size: 0x1 -# CHECK: Offset: 0 - -# CHECK-LABEL: Section { -# CHECK: Name: __data -# CHECK: Segment: __DATA -# CHECK: Size: 0x5 -# CHECK: Offset: 4096 - -# CHECK-LABEL: Section { -# CHECK: Name: __bss -# CHECK: Segment: __DATA -# CHECK: Size: 0x2064 -# CHECK: Offset: 0 diff --git a/lld/test/mach-o/exe-segment-overlap.yaml b/lld/test/mach-o/exe-segment-overlap.yaml deleted file mode 100644 index f1bf67bd0908..000000000000 --- a/lld/test/mach-o/exe-segment-overlap.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %s -o %t %p/Inputs/x86_64/libSystem.yaml -# RUN: llvm-readobj --sections --section-data %t | FileCheck %s - ---- !native -defined-atoms: - - name: _main - scope: global - content: [ 90 ] - - - name: _s2 - type: data - content: [ 31, 32, 33, 34 ] - - - name: _kustom - scope: global - type: unknown - content: [ 01, 02, 03, 04, 05, 06, 07, 08 ] - section-choice: custom-required - section-name: __CUST/__custom - - -# CHECK-LABEL: Section { -# CHECK: Name: __text -# CHECK: Segment: __TEXT -# CHECK: Size: 0x1 -# CHECK: Offset: 4095 - -# CHECK-LABEL: Section { -# CHECK: Name: __data -# CHECK: Segment: __DATA -# CHECK: Size: 0x4 -# CHECK: Offset: 4096 -# CHECK: SectionData ( -# CHECK-NEXT: 0000: 31323334 -# CHECK-NEXT: ) - -# CHECK-LABEL: Section { -# CHECK: Name: __custom{{ }} -# CHECK: Segment: __CUST{{ }} -# CHECK: Size: 0x8 -# CHECK: Offset: 8192 -# CHECK: SectionData ( -# CHECK-NEXT: 0000: 01020304 05060708 -# CHECK-NEXT: ) diff --git a/lld/test/mach-o/executable-exports.yaml b/lld/test/mach-o/executable-exports.yaml deleted file mode 100644 index 8f0f3146e421..000000000000 --- a/lld/test/mach-o/executable-exports.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 \ -# RUN: %s %p/Inputs/x86_64/libSystem.yaml -o %t && \ -# RUN: llvm-objdump --macho --exports-trie %t | FileCheck %s -# -# -# Tests that exports trie builds properly. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0xC3, 0xC3, 0xC3, 0xC3 ] -global-symbols: - - name: _myHidden - type: N_SECT - scope: [ N_EXT, N_PEXT ] - sect: 1 - value: 0x0000000000000000 - - name: _myRegular - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000001 - - name: _myWeak - type: N_SECT - scope: [ N_EXT ] - sect: 1 - desc: [ N_WEAK_DEF ] - value: 0x0000000000000002 - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000003 -... - -# CHECK-NOT: _myHidden -# CHECK: 0x100000FFD _myRegular -# CHECK: 0x100000FFE _myWeak [weak_def] diff --git a/lld/test/mach-o/export-trie-order.yaml b/lld/test/mach-o/export-trie-order.yaml deleted file mode 100644 index e8819e00b400..000000000000 --- a/lld/test/mach-o/export-trie-order.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# RUN: ld64.lld.darwinold -arch i386 %s %p/Inputs/hello-world-x86.yaml -o %t -# RUN: llvm-objdump --macho --exports-trie %t | FileCheck %s -# -# Test that the export trie is emitted in order. -# - ---- !mach-o -arch: x86 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x89, 0xE5, 0x83, 0xEC, 0x08, 0xE8, 0x00, - 0x00, 0x00, 0x00, 0x58, 0x8D, 0x80, 0x16, 0x00, - 0x00, 0x00, 0x89, 0x04, 0x24, 0xE8, 0xE6, 0xFF, - 0xFF, 0xFF, 0x31, 0xC0, 0x83, 0xC4, 0x08, 0x5D, - 0xC3 ] - relocations: - - offset: 0x00000016 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: true - extern: true - symbol: 1 - - offset: 0x0000000E - scattered: true - type: GENERIC_RELOC_LOCAL_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000021 - - offset: 0x00000000 - scattered: true - type: GENERIC_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x0000000B - - segment: __TEXT - section: __cstring - type: S_CSTRING_LITERALS - attributes: [ ] - address: 0x0000000000000021 - content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x00 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _printf - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - -# CHECK: Exports trie: -# CHECK-NEXT: __mh_execute_header -# CHECK-NEXT: _main diff --git a/lld/test/mach-o/exported_symbols_list-dylib.yaml b/lld/test/mach-o/exported_symbols_list-dylib.yaml deleted file mode 100644 index 1c417d860341..000000000000 --- a/lld/test/mach-o/exported_symbols_list-dylib.yaml +++ /dev/null @@ -1,77 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 -dylib \ -# RUN: %s %p/Inputs/x86_64/libSystem.yaml -o %t \ -# RUN: -exported_symbols_list %p/Inputs/exported_symbols_list.exp && \ -# RUN: llvm-nm -m %t | FileCheck %s -# -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 -dylib \ -# RUN: %s %p/Inputs/x86_64/libSystem.yaml -o %t2 \ -# RUN: -exported_symbol _foo -exported_symbol _b && \ -# RUN: llvm-nm -m %t2 | FileCheck %s -# -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 -dylib \ -# RUN: %s %p/Inputs/x86_64/libSystem.yaml -o %t3 \ -# RUN: -unexported_symbol _bar -unexported_symbol _a && \ -# RUN: llvm-nm -m %t3 | FileCheck %s -# -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 -dylib \ -# RUN: %s %p/Inputs/x86_64/libSystem.yaml -dead_strip -o %t \ -# RUN: -exported_symbols_list %p/Inputs/exported_symbols_list.exp && \ -# RUN: llvm-nm -m %t | FileCheck -check-prefix=CHECK_DEAD %s -# -# Test -exported_symbols_list and -exported_symbol properly changes visibility. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x5D, 0xC3, 0x55, 0x48, - 0x89, 0xE5, 0x5D, 0xC3 ] - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - alignment: 2 - address: 0x000000000000000C - content: [ 0x0A, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00 ] - -global-symbols: - - name: _a - type: N_SECT - scope: [ N_EXT ] - sect: 2 - value: 0x000000000000000C - - name: _b - type: N_SECT - scope: [ N_EXT ] - sect: 2 - value: 0x0000000000000010 - - name: _bar - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000006 - - name: _foo - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - -... - -# CHECK: (__DATA,__data) non-external (was a private external) _a -# CHECK: (__DATA,__data) external _b -# CHECK: (__TEXT,__text) non-external (was a private external) _bar -# CHECK: (__TEXT,__text) external _foo - -# CHECK_DEAD-NOT: (__DATA,__data) non-external (was a private external) _a -# CHECK_DEAD: (__DATA,__data) external _b -# CHECK_DEAD-NOT: (__TEXT,__text) non-external (was a private external) _bar -# CHECK_DEAD: (__TEXT,__text) external _foo diff --git a/lld/test/mach-o/exported_symbols_list-obj.yaml b/lld/test/mach-o/exported_symbols_list-obj.yaml deleted file mode 100644 index 420b9cc60317..000000000000 --- a/lld/test/mach-o/exported_symbols_list-obj.yaml +++ /dev/null @@ -1,67 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r %s -o %t -exported_symbol _bar \ -# RUN: && llvm-nm -m %t | FileCheck %s -# -# RUN: ld64.lld.darwinold -arch x86_64 -r %s -o %t2 -keep_private_externs \ -# RUN: -exported_symbol _bar && \ -# RUN: llvm-nm -m %t2 | FileCheck -check-prefix=CHECK_KPE %s -# -# RUN: not ld64.lld.darwinold -arch x86_64 -r %s -o %t3 \ -# RUN: -exported_symbol _foo 2> %t4 - -# Test -exported_symbols_list properly changes visibility in -r mode. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x5D, 0xC3, 0x55, 0x48, - 0x89, 0xE5, 0x5D, 0xC3 ] - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - alignment: 2 - address: 0x000000000000000C - content: [ 0x0A, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00 ] - -global-symbols: - - name: _a - type: N_SECT - scope: [ N_EXT ] - sect: 2 - value: 0x000000000000000C - - name: _b - type: N_SECT - scope: [ N_EXT, N_PEXT ] - sect: 2 - value: 0x0000000000000010 - - name: _bar - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000006 - - name: _foo - type: N_SECT - scope: [ N_EXT, N_PEXT ] - sect: 1 - value: 0x0000000000000000 - - -... - -# CHECK: (__DATA,__data) non-external (was a private external) _a -# CHECK: (__DATA,__data) non-external (was a private external) _b -# CHECK: (__TEXT,__text) external _bar -# CHECK: (__TEXT,__text) non-external (was a private external) _foo - -# CHECK_KPE: (__DATA,__data) non-external (was a private external) _a -# CHECK_KPE: (__DATA,__data) private external _b -# CHECK_KPE: (__TEXT,__text) external _bar -# CHECK_KPE: (__TEXT,__text) private external _foo diff --git a/lld/test/mach-o/exported_symbols_list-undef.yaml b/lld/test/mach-o/exported_symbols_list-undef.yaml deleted file mode 100644 index 85480af7fdae..000000000000 --- a/lld/test/mach-o/exported_symbols_list-undef.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# RUN: not ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 -dylib \ -# RUN: %s %p/Inputs/x86_64/libSystem.yaml -o %t -exported_symbol _foobar 2> %t2 -# -# Test -exported_symbol fails if exported symbol not found. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x5D, 0xC3, 0x55, 0x48, - 0x89, 0xE5, 0x5D, 0xC3 ] - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - alignment: 2 - address: 0x000000000000000C - content: [ 0x0A, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00 ] - -global-symbols: - - name: _a - type: N_SECT - scope: [ N_EXT ] - sect: 2 - value: 0x000000000000000C - - name: _b - type: N_SECT - scope: [ N_EXT ] - sect: 2 - value: 0x0000000000000010 - - name: _bar - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000006 - - name: _foo - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - -... - -# CHECK: (__DATA,__data) private external _a -# CHECK: (__DATA,__data) external _b -# CHECK: (__TEXT,__text) private external _bar -# CHECK: (__TEXT,__text) external _foo diff --git a/lld/test/mach-o/fat-archive.yaml b/lld/test/mach-o/fat-archive.yaml deleted file mode 100644 index 33631ed3b1f3..000000000000 --- a/lld/test/mach-o/fat-archive.yaml +++ /dev/null @@ -1,45 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t \ -# RUN: -L %p/Inputs -lfoo %p/Inputs/x86_64/libSystem.yaml -# RUN: llvm-nm -m -n %t | FileCheck %s -# -# Test that fat archives are handled. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 4 - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x10, - 0xC7, 0x45, 0xFC, 0x00, 0x00, 0x00, 0x00, 0xB0, - 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00, 0x31, 0xC0, - 0x48, 0x83, 0xC4, 0x10, 0x5D, 0xC3 ] - relocations: - - offset: 0x00000012 - type: X86_64_RELOC_BRANCH - length: 2 - pc-rel: true - extern: true - symbol: 1 -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _foo - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - -... - -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _foo diff --git a/lld/test/mach-o/filelist.yaml b/lld/test/mach-o/filelist.yaml deleted file mode 100644 index e2ffa9fce7fe..000000000000 --- a/lld/test/mach-o/filelist.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# RUN: ld64.lld.darwinold -test_file_usage \ -# RUN: -filelist %p/Inputs/full.filelist \ -# RUN: -path_exists /foo/bar/a.o \ -# RUN: -path_exists /foo/bar/b.o \ -# RUN: -path_exists /foo/x.a \ -# RUN: 2>&1 | FileCheck %s -# -# RUN: ld64.lld.darwinold -test_file_usage -t \ -# RUN: -filelist %p/Inputs/partial.filelist,/foo \ -# RUN: -path_exists /foo/bar/a.o \ -# RUN: -path_exists /foo/bar/b.o \ -# RUN: -path_exists /foo/x.a \ -# RUN: 2>&1 | FileCheck %s - - -# CHECK: Found filelist entry /foo/bar/a.o -# CHECK: Found filelist entry /foo/bar/b.o -# CHECK: Found filelist entry /foo/x.a diff --git a/lld/test/mach-o/flat_namespace_undef_error.yaml b/lld/test/mach-o/flat_namespace_undef_error.yaml deleted file mode 100644 index 004ab3b8add3..000000000000 --- a/lld/test/mach-o/flat_namespace_undef_error.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# RUN: not ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.9 -flat_namespace -undefined error %s -o %t %p/Inputs/x86_64/libSystem.yaml 2>&1 | FileCheck %s - ---- !native -defined-atoms: - - name: _main - scope: global - content: [ E9, 00, 00, 00, 00 ] - alignment: 16 - references: - - kind: branch32 - offset: 1 - target: _bar -undefined-atoms: - - name: _bar - -# Make sure we error out for -flat_namespace -undefined error. -# CHECK: Undefined symbol: : _bar diff --git a/lld/test/mach-o/flat_namespace_undef_suppress.yaml b/lld/test/mach-o/flat_namespace_undef_suppress.yaml deleted file mode 100644 index 9ad0db86332d..000000000000 --- a/lld/test/mach-o/flat_namespace_undef_suppress.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.9 -flat_namespace -undefined suppress %s -o %t %p/Inputs/x86_64/libSystem.yaml -# -# Sanity check '-flat_namespace -undefined suppress'. -# This should pass without error, even though '_bar' is undefined. - ---- !native -defined-atoms: - - name: _main - scope: global - content: [ E9, 00, 00, 00, 00 ] - alignment: 16 - references: - - kind: branch32 - offset: 1 - target: _bar -undefined-atoms: - - name: _bar diff --git a/lld/test/mach-o/force_load-dylib.yaml b/lld/test/mach-o/force_load-dylib.yaml deleted file mode 100644 index fb40aa9b67b5..000000000000 --- a/lld/test/mach-o/force_load-dylib.yaml +++ /dev/null @@ -1,45 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -dylib %p/Inputs/bar.yaml \ -# RUN: -install_name /usr/lib/libbar.dylib %p/Inputs/x86_64/libSystem.yaml -o %t1.dylib -# RUN: ld64.lld.darwinold -arch x86_64 -dylib %s -all_load %t1.dylib \ -# RUN: -install_name /usr/lib/libfoo.dylib %p/Inputs/x86_64/libSystem.yaml -o %t -# RUN: llvm-nm -m %t | FileCheck %s -# -# -# Test -all_load does not break linking with dylibs -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0x5D, 0xE9, - 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000008 - type: X86_64_RELOC_BRANCH - length: 2 - pc-rel: true - extern: true - symbol: 1 -global-symbols: - - name: _foo - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _bar - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - -... - - -# CHECK: (__TEXT,__text) external _foo diff --git a/lld/test/mach-o/force_load-x86_64.yaml b/lld/test/mach-o/force_load-x86_64.yaml deleted file mode 100644 index 295217c8b3c1..000000000000 --- a/lld/test/mach-o/force_load-x86_64.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/x86_64/libSystem.yaml \ -# RUN: %p/Inputs/libfoo.a %p/Inputs/libbar.a -o %t1 -# RUN: llvm-nm -m -n %t1 | FileCheck %s -# -# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/x86_64/libSystem.yaml \ -# RUN: -force_load %p/Inputs/libfoo.a %p/Inputs/libbar.a -o %t2 -# RUN: llvm-nm -m -n %t2 | FileCheck --check-prefix=CHECKF %s -# -# Test that -force_load causes members of static library to be loaded. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0xC3 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -... - -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main -# CHECK-NOT: {{[0-9a-f]+}} (__TEXT,__text) external _main - -# CHECKF: {{[0-9a-f]+}} (__TEXT,__text) external _main -# CHECKF: {{[0-9a-f]+}} (__TEXT,__text) external _foo -# CHECKF-NOT: {{[0-9a-f]+}} (__TEXT,__text) external _bar diff --git a/lld/test/mach-o/framework-user-paths.yaml b/lld/test/mach-o/framework-user-paths.yaml deleted file mode 100644 index a96cfed7ab75..000000000000 --- a/lld/test/mach-o/framework-user-paths.yaml +++ /dev/null @@ -1,41 +0,0 @@ -# -# Test framework and SDK search paths. -# myFrameworks is not an absolute path, so it should not by found in SDK -# /Custom/Frameworks should be found in SDK -# /opt/Frameworks should not be found in SDK -# /System/Library/Frameworks is implicit and should be in SDK -# -# RUN: ld64.lld.darwinold -arch x86_64 -r -test_file_usage -v \ -# RUN: -path_exists myFrameworks \ -# RUN: -path_exists myFrameworks/my.framework/my \ -# RUN: -path_exists /opt/Frameworks \ -# RUN: -path_exists /opt/Frameworks/other.framework/other \ -# RUN: -path_exists /Custom/Frameworks \ -# RUN: -path_exists /Custom/Frameworks/Bar.framework/Bar \ -# RUN: -path_exists /System/Library/Frameworks \ -# RUN: -path_exists /System/Library/Frameworks/Foo.framework/Foo \ -# RUN: -path_exists /SDK/myFrameworks \ -# RUN: -path_exists /SDK/myFrameworks/my.framework/my \ -# RUN: -path_exists /SDK/Custom/Frameworks \ -# RUN: -path_exists /SDK/Custom/Frameworks/Bar.framework/Bar \ -# RUN: -path_exists /SDK/System/Library/Frameworks \ -# RUN: -path_exists /SDK/System/Library/Frameworks/Foo.framework/Foo \ -# RUN: -syslibroot /SDK \ -# RUN: -FmyFrameworks \ -# RUN: -F/Custom/Frameworks \ -# RUN: -F/opt/Frameworks \ -# RUN: -framework my \ -# RUN: -framework Bar \ -# RUN: -framework Foo \ -# RUN: -framework other \ -# RUN: 2>&1 | FileCheck %s - -# CHECK: Framework search paths: -# CHECK-NEXT: myFrameworks -# CHECK-NEXT: /SDK/Custom/Frameworks -# CHECK-NEXT: /opt/Frameworks -# CHECK-NEXT: /SDK/System/Library/Frameworks -# CHECK: Found framework myFrameworks/my.framework/my -# CHECK: Found framework /SDK/Custom/Frameworks/Bar.framework/Bar -# CHECK: Found framework /SDK/System/Library/Frameworks/Foo.framework/Foo -# CHECK: Found framework /opt/Frameworks/other.framework/other diff --git a/lld/test/mach-o/function-starts-load-command.yaml b/lld/test/mach-o/function-starts-load-command.yaml deleted file mode 100644 index cb558ad688e2..000000000000 --- a/lld/test/mach-o/function-starts-load-command.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml && llvm-objdump --private-headers %t | FileCheck %s -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -static -function_starts && llvm-objdump --private-headers %t | FileCheck %s -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -no_function_starts && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_FUNCTION_STARTS -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -static -function_starts -no_function_starts && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_FUNCTION_STARTS -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -static && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_FUNCTION_STARTS - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x00, 0x00, 0x00, 0x00 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -... - -# CHECK: Load command {{[0-9]*}} -# CHECK: cmd LC_FUNCTION_STARTS -# CHECK: cmdsize 16 -# CHECK: dataoff -# CHECK: datasize - -# NO_FUNCTION_STARTS-NOT: LC_FUNCTION_STARTS diff --git a/lld/test/mach-o/gcc_except_tab-got-arm64.yaml b/lld/test/mach-o/gcc_except_tab-got-arm64.yaml deleted file mode 100644 index caccf4f2fe14..000000000000 --- a/lld/test/mach-o/gcc_except_tab-got-arm64.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# RUN: ld64.lld.darwinold -arch arm64 %s \ -# RUN: -dylib %p/Inputs/arm64/libSystem.yaml -o %t -# RUN: llvm-objdump --section-headers %t | FileCheck %s - -# Make sure that the GOT relocation from gcc_except_tab to the data -# is not removed. - ---- !native -defined-atoms: - - name: _main - scope: global - content: [ FD, 7B, BF, A9, FD, 03, 00, 91, FF, 43, 00, D1, - BF, C3, 1F, B8, 00, 00, 00, 94, BF, 03, 00, 91, - FD, 7B, C1, A8, C0, 03, 5F, D6 ] - alignment: 4 - - name: __ZTSP1A - scope: hidden - type: constant - content: [ 50, 31, 41, 00 ] - merge: as-weak - - name: GCC_except_table0 - type: unwind-lsda - content: [ FF, 9B, E7, 80, 00, 03, 5B, 00, 00, 00, 00, 1C, - 00, 00, 00, 00, 00, 00, 00, 00, 1C, 00, 00, 00, - 18, 00, 00, 00, 84, 00, 00, 00, 03, 40, 00, 00, - 00, 10, 00, 00, 00, 94, 00, 00, 00, 03, 60, 00, - 00, 00, 20, 00, 00, 00, B4, 00, 00, 00, 05, 80, - 00, 00, 00, 68, 00, 00, 00, 00, 00, 00, 00, 00, - E8, 00, 00, 00, 08, 00, 00, 00, 28, 01, 00, 00, - 00, F0, 00, 00, 00, 74, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 01, 7D, 01, 00, A8, FF, FF, FF ] - alignment: 4 - references: - - kind: delta32ToGOT - offset: 104 - target: __ZTIP1A - - name: __ZTIP1A - scope: hidden - type: data - content: [ 10, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 80, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00 ] - merge: as-weak - alignment: 16 -shared-library-atoms: - - name: dyld_stub_binder - load-name: /usr/lib/libSystem.B.dylib - type: unknown -... - -# Make sure we have a GOT relocation. -# This could only have come from __gcc_except_tab to __ZTIP1A -# CHECK: __got \ No newline at end of file diff --git a/lld/test/mach-o/got-order.yaml b/lld/test/mach-o/got-order.yaml deleted file mode 100644 index 5ebb86042245..000000000000 --- a/lld/test/mach-o/got-order.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/got-order.yaml \ -# RUN: %p/Inputs/got-order2.yaml -o %t %p/Inputs/x86_64/libSystem.yaml -# RUN: llvm-objdump --macho --bind %t | FileCheck %s -# -# Test that GOT slots are sorted by name -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x48, 0x8B, 0x0D, 0x00, - 0x00, 0x00, 0x00, 0x48, 0x8B, 0x05, 0x00, 0x00, - 0x00, 0x00, 0x8B, 0x00, 0x03, 0x01, 0x48, 0x8B, - 0x0D, 0x00, 0x00, 0x00, 0x00, 0x03, 0x01, 0x5D, - 0xC3 ] - relocations: - - offset: 0x00000019 - type: X86_64_RELOC_GOT_LOAD - length: 2 - pc-rel: true - extern: true - symbol: 2 - - offset: 0x0000000E - type: X86_64_RELOC_GOT_LOAD - length: 2 - pc-rel: true - extern: true - symbol: 1 - - offset: 0x00000007 - type: X86_64_RELOC_GOT_LOAD - length: 2 - pc-rel: true - extern: true - symbol: 3 -global-symbols: - - name: _func - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _aaa - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _fff - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _zzz - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - - -# CHECK: __DATA __got {{[0-9a-zA-Z _]+}} pointer 0 libfoobar _aaa -# CHECK-NEXT: __DATA __got {{[0-9a-zA-Z _]+}} pointer 0 libfoobar _bar -# CHECK-NEXT: __DATA __got {{[0-9a-zA-Z _]+}} pointer 0 libfoobar _fff -# CHECK-NEXT: __DATA __got {{[0-9a-zA-Z _]+}} pointer 0 libfoobar _foo -# CHECK-NEXT: __DATA __got {{[0-9a-zA-Z _]+}} pointer 0 libfoobar _zazzle -# CHECK-NEXT: __DATA __got {{[0-9a-zA-Z _]+}} pointer 0 libfoobar _zzz diff --git a/lld/test/mach-o/hello-world-arm64.yaml b/lld/test/mach-o/hello-world-arm64.yaml deleted file mode 100644 index c2e232233dc6..000000000000 --- a/lld/test/mach-o/hello-world-arm64.yaml +++ /dev/null @@ -1,102 +0,0 @@ -# RUN: ld64.lld.darwinold -arch arm64 %s %p/Inputs/hello-world-arm64.yaml -o %t -# RUN: llvm-nm -m -n %t | FileCheck %s -# RUN: llvm-objdump --private-headers %t | FileCheck %s --check-prefix=CHECK-PRIVATE-HEADER -# -# Test that arm64 hello-world can be linked into a mach-o executable -# - ---- !mach-o -arch: arm64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 2 - address: 0x0000000000000000 - content: [ 0xFD, 0x7B, 0xBF, 0xA9, 0xFD, 0x03, 0x00, 0x91, - 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xF9, - 0x00, 0x01, 0x40, 0xF9, 0x01, 0x00, 0x00, 0x90, - 0x21, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00, 0x94, - 0x00, 0x00, 0x80, 0x52, 0xFD, 0x7B, 0xC1, 0xA8, - 0xC0, 0x03, 0x5F, 0xD6 ] - relocations: - - offset: 0x0000001C - type: ARM64_RELOC_BRANCH26 - length: 2 - pc-rel: true - extern: true - symbol: 5 - - offset: 0x00000018 - type: ARM64_RELOC_PAGEOFF12 - length: 2 - pc-rel: false - extern: true - symbol: 1 - - offset: 0x00000014 - type: ARM64_RELOC_PAGE21 - length: 2 - pc-rel: true - extern: true - symbol: 1 - - offset: 0x0000000C - type: ARM64_RELOC_GOT_LOAD_PAGEOFF12 - length: 2 - pc-rel: false - extern: true - symbol: 4 - - offset: 0x00000008 - type: ARM64_RELOC_GOT_LOAD_PAGE21 - length: 2 - pc-rel: true - extern: true - symbol: 4 - - segment: __TEXT - section: __cstring - type: S_CSTRING_LITERALS - attributes: [ ] - address: 0x000000000000002C - content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x00 ] -local-symbols: - - name: ltmp0 - type: N_SECT - sect: 1 - value: 0x0000000000000000 - - name: l_.str - type: N_SECT - sect: 2 - value: 0x000000000000002C - - name: ltmp1 - type: N_SECT - sect: 2 - value: 0x000000000000002C -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: ___stdoutp - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _fprintf - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - -# CHECK: (undefined) external ___stdoutp (from libSystem) -# CHECK: (undefined) external _fprintf (from libSystem) -# CHECK: (undefined) external dyld_stub_binder (from libSystem) -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main - -# CHECK-PRIVATE-HEADER: sectname __stubs -# CHECK-PRIVATE-HEADER-NEXT: segname __TEXT -# CHECK-PRIVATE-HEADER-NEXT: addr -# CHECK-PRIVATE-HEADER-NEXT: size -# CHECK-PRIVATE-HEADER-NEXT: offset -# CHECK-PRIVATE-HEADER-NEXT: align 2^1 (2) diff --git a/lld/test/mach-o/hello-world-armv6.yaml b/lld/test/mach-o/hello-world-armv6.yaml deleted file mode 100644 index 4004c963da89..000000000000 --- a/lld/test/mach-o/hello-world-armv6.yaml +++ /dev/null @@ -1,64 +0,0 @@ -# RUN: ld64.lld.darwinold -arch armv6 %s %p/Inputs/hello-world-armv6.yaml -o %t -# RUN: llvm-nm -m %t | FileCheck %s -# -# Test that armv6 (arm) hello-world can be linked into a mach-o executable -# - ---- !mach-o -arch: armv6 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 2 - address: 0x0000000000000000 - content: [ 0x80, 0x40, 0x2D, 0xE9, 0x10, 0x00, 0x9F, 0xE5, - 0x0D, 0x70, 0xA0, 0xE1, 0x00, 0x00, 0x8F, 0xE0, - 0xFA, 0xFF, 0xFF, 0xEB, 0x00, 0x00, 0xA0, 0xE3, - 0x80, 0x80, 0xBD, 0xE8, 0x0C, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x0000001C - scattered: true - type: ARM_RELOC_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000020 - - offset: 0x00000000 - scattered: true - type: ARM_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x0000000C - - offset: 0x00000010 - type: ARM_RELOC_BR24 - length: 2 - pc-rel: true - extern: true - symbol: 1 - - segment: __TEXT - section: __cstring - type: S_CSTRING_LITERALS - attributes: [ ] - address: 0x0000000000000020 - content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x00 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _printf - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main -# CHECK: (undefined) external _printf (from libSystem) -# CHECK: (undefined) external dyld_stub_binder (from libSystem) diff --git a/lld/test/mach-o/hello-world-armv7.yaml b/lld/test/mach-o/hello-world-armv7.yaml deleted file mode 100644 index 0407e924aa5b..000000000000 --- a/lld/test/mach-o/hello-world-armv7.yaml +++ /dev/null @@ -1,76 +0,0 @@ -# RUN: ld64.lld.darwinold -arch armv7 %s %p/Inputs/hello-world-armv7.yaml -o %t -# RUN: llvm-nm -m -n %t | FileCheck %s -# -# Test that armv7 (thumb) hello-world can be linked into a mach-o executable -# - ---- !mach-o -arch: armv7 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 2 - address: 0x0000000000000000 - content: [ 0x80, 0xB5, 0x40, 0xF2, 0x06, 0x00, 0x6F, 0x46, - 0xC0, 0xF2, 0x00, 0x00, 0x78, 0x44, 0xFF, 0xF7, - 0xF8, 0xEF, 0x00, 0x20, 0x80, 0xBD ] - relocations: - - offset: 0x0000000E - type: ARM_THUMB_RELOC_BR22 - length: 2 - pc-rel: true - extern: true - symbol: 1 - - offset: 0x00000008 - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 3 - pc-rel: false - value: 0x00000016 - - offset: 0x00000006 - scattered: true - type: ARM_RELOC_PAIR - length: 3 - pc-rel: false - value: 0x0000000C - - offset: 0x00000002 - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000016 - - offset: 0x00000000 - scattered: true - type: ARM_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x0000000C - - segment: __TEXT - section: __cstring - type: S_CSTRING_LITERALS - attributes: [ ] - address: 0x0000000000000016 - content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x00 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - desc: [ N_ARM_THUMB_DEF ] - value: 0x0000000000000000 -undefined-symbols: - - name: _printf - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - -# CHECK: (undefined) external _printf (from libSystem) -# CHECK: (undefined) external dyld_stub_binder (from libSystem) -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external [Thumb] _main diff --git a/lld/test/mach-o/hello-world-x86.yaml b/lld/test/mach-o/hello-world-x86.yaml deleted file mode 100644 index 5c3bc6731cd4..000000000000 --- a/lld/test/mach-o/hello-world-x86.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# RUN: ld64.lld.darwinold -arch i386 %s %p/Inputs/hello-world-x86.yaml -o %t -# RUN: llvm-nm -m %t | FileCheck %s -# -# Test that i386 hello-world can be linked into a mach-o executable -# - ---- !mach-o -arch: x86 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x89, 0xE5, 0x83, 0xEC, 0x08, 0xE8, 0x00, - 0x00, 0x00, 0x00, 0x58, 0x8D, 0x80, 0x16, 0x00, - 0x00, 0x00, 0x89, 0x04, 0x24, 0xE8, 0xE6, 0xFF, - 0xFF, 0xFF, 0x31, 0xC0, 0x83, 0xC4, 0x08, 0x5D, - 0xC3 ] - relocations: - - offset: 0x00000016 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: true - extern: true - symbol: 1 - - offset: 0x0000000E - scattered: true - type: GENERIC_RELOC_LOCAL_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000021 - - offset: 0x00000000 - scattered: true - type: GENERIC_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x0000000B - - segment: __TEXT - section: __cstring - type: S_CSTRING_LITERALS - attributes: [ ] - address: 0x0000000000000021 - content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x00 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _printf - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main -# CHECK: (undefined) external _printf (from libSystem) -# CHECK: (undefined) external dyld_stub_binder (from libSystem) diff --git a/lld/test/mach-o/hello-world-x86_64.yaml b/lld/test/mach-o/hello-world-x86_64.yaml deleted file mode 100644 index c49565244a19..000000000000 --- a/lld/test/mach-o/hello-world-x86_64.yaml +++ /dev/null @@ -1,120 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/hello-world-x86_64.yaml \ -# RUN: -o %t -# RUN: llvm-nm -m -n %t | FileCheck %s -# -# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/hello-world-x86_64.yaml \ -# RUN: -dead_strip -o %t2 -# RUN: llvm-nm -m -n %t2 | FileCheck %s -# -# Test that x86_64 hello-world can be linked into a mach-o executable -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x48, 0x8B, 0x05, 0x00, - 0x00, 0x00, 0x00, 0x48, 0x8B, 0x38, 0x48, 0x8D, - 0x35, 0x00, 0x00, 0x00, 0x00, 0x31, 0xC0, 0xE8, - 0x00, 0x00, 0x00, 0x00, 0x31, 0xC0, 0x5D, 0xC3 ] - relocations: - - offset: 0x00000018 - type: X86_64_RELOC_BRANCH - length: 2 - pc-rel: true - extern: true - symbol: 5 - - offset: 0x00000011 - type: X86_64_RELOC_SIGNED - length: 2 - pc-rel: true - extern: true - symbol: 0 - - offset: 0x00000007 - type: X86_64_RELOC_GOT_LOAD - length: 2 - pc-rel: true - extern: true - symbol: 4 - - segment: __TEXT - section: __cstring - type: S_CSTRING_LITERALS - attributes: [ ] - address: 0x0000000000000020 - content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x00 ] - - segment: __LD - section: __compact_unwind - type: S_REGULAR - attributes: [ ] - alignment: 8 - address: 0x0000000000000028 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000000 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 1 - - segment: __TEXT - section: __eh_frame - type: S_COALESCED - attributes: [ ] - alignment: 8 - address: 0x0000000000000048 - content: [ 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x7A, 0x52, 0x00, 0x01, 0x78, 0x10, 0x01, - 0x10, 0x0C, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, - 0x98, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x41, 0x0E, 0x10, 0x86, 0x02, 0x43, 0x0D, - 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] -local-symbols: - - name: L1 - type: N_SECT - sect: 2 - value: 0x0000000000000020 - - name: EH_frame0 - type: N_SECT - sect: 4 - value: 0x0000000000000048 -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: _main.eh - type: N_SECT - scope: [ N_EXT ] - sect: 4 - value: 0x0000000000000060 -undefined-symbols: - - name: ___stdoutp - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _fprintf - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - -... - -# CHECK: (undefined) external ___stdoutp (from libSystem) -# CHECK: (undefined) external _fprintf (from libSystem) -# CHECK: (undefined) external dyld_stub_binder (from libSystem) -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) [referenced dynamically] external __mh_execute_header -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main diff --git a/lld/test/mach-o/image-base.yaml b/lld/test/mach-o/image-base.yaml deleted file mode 100644 index c56eed199e5d..000000000000 --- a/lld/test/mach-o/image-base.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.9 %s -o %t -image_base 31415926000 %p/Inputs/x86_64/libSystem.yaml -# RUN: llvm-readobj --macho-segment %t | FileCheck %s -# RUN: not ld64.lld.darwinold -arch x86_64 -image_base 0x31415926530 %s >/dev/null 2> %t -# RUN: FileCheck < %t %s --check-prefix=CHECK-ERROR-MISPAGED -# RUN: not ld64.lld.darwinold -arch x86_64 -image_base 1000 %s >/dev/null 2> %t -# RUN: FileCheck < %t %s --check-prefix=CHECK-ERROR-OVERLAP -# RUN: not ld64.lld.darwinold -arch x86_64 -image_base hithere %s >/dev/null 2> %t -# RUN: FileCheck < %t %s --check-prefix=CHECK-ERROR-NOTHEX - ---- !native -defined-atoms: - - name: _main - scope: global - content: [] - -# CHECK: Segment { -# CHECK: Cmd: LC_SEGMENT_64 -# CHECK: Name: __TEXT -# CHECK-NEXT: Size: 152 -# CHECK-NEXT: vmaddr: 0x31415926000 -# CHECK-NEXT: vmsize: 0x1000 - - -# CHECK-ERROR-MISPAGED: error: image_base must be a multiple of page size (0x1000) - -# CHECK-ERROR-OVERLAP: error: image_base overlaps with __PAGEZERO - -# CHECK-ERROR-NOTHEX: error: image_base expects a hex number diff --git a/lld/test/mach-o/infer-arch.yaml b/lld/test/mach-o/infer-arch.yaml deleted file mode 100644 index a66d17bc58df..000000000000 --- a/lld/test/mach-o/infer-arch.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# RUN: ld64.lld.darwinold -arch i386 -macosx_version_min 10.8 %s -r -o %t \ -# RUN: && ld64.lld.darwinold -r %t -o %t2 -print_atoms | FileCheck %s -# -# Test linker can detect architecture without -arch option. -# - ---- !mach-o -arch: x86 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0xC3 ] -global-symbols: - - name: _foo - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - -... - - -# CHECK: defined-atoms: -# CHECK: - name: _foo diff --git a/lld/test/mach-o/interposing-section.yaml b/lld/test/mach-o/interposing-section.yaml deleted file mode 100644 index 340ea8cc4b71..000000000000 --- a/lld/test/mach-o/interposing-section.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/interposing-section.yaml \ -# RUN: -dylib -o %t %p/Inputs/x86_64/libSystem.yaml -# RUN: llvm-objdump --private-headers %t | FileCheck %s -# -# RUN: ld64.lld.darwinold -arch x86_64 %s -r -o %t1 -# RUN: llvm-objdump --private-headers %t1 | FileCheck %s -# -# Test that interposing section is preserved by linker. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0x5D, 0xE9, - 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000008 - type: X86_64_RELOC_BRANCH - length: 2 - pc-rel: true - extern: true - symbol: 2 - - segment: __DATA - section: __interpose - type: S_INTERPOSING - attributes: [ ] - alignment: 8 - address: 0x0000000000000010 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000008 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000000 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 0 -local-symbols: - - name: _my_open - type: N_SECT - sect: 1 - value: 0x0000000000000000 - - name: __interpose_open - type: N_SECT - sect: 2 - desc: [ N_NO_DEAD_STRIP ] - value: 0x0000000000000010 -undefined-symbols: - - name: _open - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - - -# CHECK: sectname __interposing -# CHECK: segname __DATA -# CHECK: type S_INTERPOSING - diff --git a/lld/test/mach-o/keep_private_externs.yaml b/lld/test/mach-o/keep_private_externs.yaml deleted file mode 100644 index b8f0e4f7a065..000000000000 --- a/lld/test/mach-o/keep_private_externs.yaml +++ /dev/null @@ -1,63 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r %s -o %t \ -# RUN: && llvm-nm -m %t | FileCheck %s -# -# RUN: ld64.lld.darwinold -arch x86_64 -r %s -o %t2 -keep_private_externs \ -# RUN: && llvm-nm -m %t2 | FileCheck -check-prefix=CHECK_KPE %s -# -# Test -keep_private_externs in -r mode. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x5D, 0xC3, 0x55, 0x48, - 0x89, 0xE5, 0x5D, 0xC3 ] - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - alignment: 2 - address: 0x000000000000000C - content: [ 0x0A, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00 ] - -global-symbols: - - name: _a - type: N_SECT - scope: [ N_EXT ] - sect: 2 - value: 0x000000000000000C - - name: _b - type: N_SECT - scope: [ N_EXT, N_PEXT ] - sect: 2 - value: 0x0000000000000010 - - name: _bar - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000006 - - name: _foo - type: N_SECT - scope: [ N_EXT, N_PEXT ] - sect: 1 - value: 0x0000000000000000 - - -... - -# CHECK: (__DATA,__data) external _a -# CHECK: (__DATA,__data) non-external (was a private external) _b -# CHECK: (__TEXT,__text) external _bar -# CHECK: (__TEXT,__text) non-external (was a private external) _foo - -# CHECK_KPE: (__DATA,__data) external _a -# CHECK_KPE: (__DATA,__data) private external _b -# CHECK_KPE: (__TEXT,__text) external _bar -# CHECK_KPE: (__TEXT,__text) private external _foo diff --git a/lld/test/mach-o/lazy-bind-x86_64.yaml b/lld/test/mach-o/lazy-bind-x86_64.yaml deleted file mode 100644 index 3bad4c2ad83c..000000000000 --- a/lld/test/mach-o/lazy-bind-x86_64.yaml +++ /dev/null @@ -1,111 +0,0 @@ -# REQUIRES: x86 - -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s \ -# RUN: %p/Inputs/lazy-bind-x86_64.yaml %p/Inputs/lazy-bind-x86_64-2.yaml \ -# RUN: %p/Inputs/lazy-bind-x86_64-3.yaml -o %t \ -# RUN: %p/Inputs/x86_64/libSystem.yaml -# RUN: llvm-objdump --macho --lazy-bind %t | FileCheck %s -# RUN: llvm-nm -m %t | FileCheck --check-prefix=CHECK-NM %s -# RUN: llvm-objdump --disassemble %t | FileCheck --check-prefix=CHECK-HELPERS %s -# RUN: llvm-objdump --private-headers %t | FileCheck --check-prefix=CHECK-DYLIBS %s -# -# Test that correct two-level namespace ordinals are used for lazy bindings. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0xE8, 0x00, - 0x00, 0x00, 0x00, 0x31, 0xC0, 0xE8, 0x00, 0x00, - 0x00, 0x00, 0x31, 0xC0, 0xE8, 0x00, 0x00, 0x00, - 0x00, 0x31, 0xC0, 0x5D, 0xC3 ] - relocations: - - offset: 0x00000015 - type: X86_64_RELOC_BRANCH - length: 2 - pc-rel: true - extern: true - symbol: 3 - - offset: 0x0000000E - type: X86_64_RELOC_BRANCH - length: 2 - pc-rel: true - extern: true - symbol: 2 - - offset: 0x00000007 - type: X86_64_RELOC_BRANCH - length: 2 - pc-rel: true - extern: true - symbol: 1 -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _bar - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _baz - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _foo - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - -... - - -# CHECK: libbar _bar -# CHECK: libbaz _baz -# CHECK: libfoo _foo - - -# CHECK-NM: (undefined) external _bar (from libbar) -# CHECK-NM: (undefined) external _baz (from libbaz) -# CHECK-NM: (undefined) external _foo (from libfoo) - - -# CHECK-HELPERS:Disassembly of section __TEXT,__stub_helper: -# CHECK-HELPERS: 68 00 00 00 00 pushq $0 -# CHECK-HELPERS: 68 0b 00 00 00 pushq $11 -# CHECK-HELPERS: 68 16 00 00 00 pushq $22 - -# Make sure the stub helper is correctly aligned -# CHECK-DYLIBS: sectname __stub_helper -# CHECK-DYLIBS-NEXT: segname __TEXT -# CHECK-DYLIBS-NEXT: addr -# CHECK-DYLIBS-NEXT: size -# CHECK-DYLIBS-NEXT: offset -# CHECK-DYLIBS-NEXT: align 2^2 (4) - -# Make sure the __nl_symbol_ptr section is used instea of __got as this is x86_64 -# CHECK-DYLIBS: sectname __nl_symbol_ptr -# CHECK-DYLIBS-NEXT: segname __DATA - -# CHECK-DYLIBS: cmd LC_LOAD_DYLIB -# CHECK-DYLIBS: name /usr/lib/libbar.dylib (offset 24) -# CHECK-DYLIBS: current version 2.3.0 -# CHECK-DYLIBS: compatibility version 1.0.0 -# CHECK-DYLIBS: cmd LC_LOAD_DYLIB -# CHECK-DYLIBS: name /usr/lib/libfoo.dylib (offset 24) -# CHECK-DYLIBS: current version 3.4.0 -# CHECK-DYLIBS: compatibility version 2.0.0 -# CHECK-DYLIBS: cmd LC_LOAD_DYLIB -# CHECK-DYLIBS: name /usr/lib/libbaz.dylib (offset 24) -# CHECK-DYLIBS: current version 4.5.0 -# CHECK-DYLIBS: compatibility version 3.0.0 - - diff --git a/lld/test/mach-o/lc_segment_filesize.yaml b/lld/test/mach-o/lc_segment_filesize.yaml deleted file mode 100644 index fea5008adbc5..000000000000 --- a/lld/test/mach-o/lc_segment_filesize.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r -o %t %s && llvm-objdump --private-headers %t | FileCheck %s - -# CHECK: filesize 19 - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS ] - alignment: 16 - address: 0x0000000000000000 - content: [ 0x00, 0x00, 0x00 ] - - segment: __TEXT - section: __alt - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS ] - alignment: 16 - address: 0x0000000000000010 - content: [ 0x00, 0x00, 0x00 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -page-size: 0x00000000 -... diff --git a/lld/test/mach-o/lib-search-paths.yaml b/lld/test/mach-o/lib-search-paths.yaml deleted file mode 100644 index 29c5e62ce429..000000000000 --- a/lld/test/mach-o/lib-search-paths.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %s -syslibroot %p/Inputs/lib-search-paths -lmyshared -lmystatic -lfile.o -r -print_atoms 2>&1 | FileCheck %s - ---- !native -undefined-atoms: - - name: _from_myshared - - name: _from_mystatic - - name: _from_fileo - -# CHECK: defined-atoms: -# CHECK: - name: _from_fileo -# CHECK: content: [ 2A, 00, 00, 00 ] -# CHECK: - name: _from_mystatic -# CHECK: content: [ 02, 00, 00, 00 ] -# CHECK: shared-library-atoms: -# CHECK: - name: _from_myshared -# CHECK: load-name: libmyshared.dylib diff --git a/lld/test/mach-o/library-order.yaml b/lld/test/mach-o/library-order.yaml deleted file mode 100644 index 02d31c578a4b..000000000000 --- a/lld/test/mach-o/library-order.yaml +++ /dev/null @@ -1,45 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %p/Inputs/libfoo.a %s -o %t \ -# RUN: %p/Inputs/x86_64/libSystem.yaml -# RUN: llvm-nm -m -n %t | FileCheck %s -# -# Test that if library is before object file on command line, it still is used. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 4 - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x10, - 0xC7, 0x45, 0xFC, 0x00, 0x00, 0x00, 0x00, 0xB0, - 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00, 0x31, 0xC0, - 0x48, 0x83, 0xC4, 0x10, 0x5D, 0xC3 ] - relocations: - - offset: 0x00000012 - type: X86_64_RELOC_BRANCH - length: 2 - pc-rel: true - extern: true - symbol: 1 -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _foo - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - -... - -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _foo diff --git a/lld/test/mach-o/library-rescan.yaml b/lld/test/mach-o/library-rescan.yaml deleted file mode 100644 index 138a696fe5ff..000000000000 --- a/lld/test/mach-o/library-rescan.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %p/Inputs/libfoo.a %p/Inputs/libbar.a \ -# RUN: %s -o %t %p/Inputs/x86_64/libSystem.yaml -# RUN: llvm-nm -m -n %t | FileCheck %s -# -# Test that static libraries are automatically rescanned (bar needs foo). -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 4 - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x10, - 0xC7, 0x45, 0xFC, 0x00, 0x00, 0x00, 0x00, 0xB0, - 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00, 0x31, 0xC0, - 0x48, 0x83, 0xC4, 0x10, 0x5D, 0xC3 ] - relocations: - - offset: 0x00000012 - type: X86_64_RELOC_BRANCH - length: 2 - pc-rel: true - extern: true - symbol: 1 -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _bar - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - -... - -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _bar -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _foo diff --git a/lld/test/mach-o/libresolve-bizarre-root-override.yaml b/lld/test/mach-o/libresolve-bizarre-root-override.yaml deleted file mode 100644 index 0fda09a9b8d8..000000000000 --- a/lld/test/mach-o/libresolve-bizarre-root-override.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# RUN: not ld64.lld.darwinold -test_file_usage -v \ -# RUN: -path_exists /usr/lib \ -# RUN: -path_exists /Applications/MySDK/usr/local/lib \ -# RUN: -path_exists /Applications/MySDK/usr/lib \ -# RUN: -path_exists /Applications/MySDK/usr/lib/libSystem.dylib \ -# RUN: -syslibroot /Applications/MySDK \ -# RUN: -syslibroot / \ -# RUN: -lSystem \ -# RUN: 2>&1 | FileCheck %s - -# When the last -syslibroot is simply "/", all of them get discarded. So in this -# case, only /usr/lib should show up. - -# CHECK: Library search paths: -# CHECK: /usr/lib -# CHECK-NOT: /usr/local/lib -# CHECK: Unable to find library for -lSystem diff --git a/lld/test/mach-o/libresolve-multiple-syslibroots.yaml b/lld/test/mach-o/libresolve-multiple-syslibroots.yaml deleted file mode 100644 index 66627056afd3..000000000000 --- a/lld/test/mach-o/libresolve-multiple-syslibroots.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# RUN: ld64.lld.darwinold -test_file_usage -v \ -# RUN: -path_exists /usr/lib \ -# RUN: -path_exists /Applications/MyFirstSDK/usr/local/lib \ -# RUN: -path_exists /Applications/MySecondSDK/usr/local/lib \ -# RUN: -path_exists /Applications/MyFirstSDK/usr/local/lib/libSystem.a \ -# RUN: -path_exists /Applications/MySecondSDK/usr/local/lib/libSystem.a \ -# RUN: -syslibroot /Applications/MyFirstSDK \ -# RUN: -syslibroot /Applications/MySecondSDK \ -# RUN: -lSystem \ -# RUN: 2>&1 | FileCheck %s - - -# CHECK: Library search paths: -# CHECK: /usr/lib -# CHECK: /Applications/MyFirstSDK/usr/local/lib -# CHECK: /Applications/MySecondSDK/usr/local/lib -# CHECK: Found library /Applications/MyFirstSDK/usr/local/lib/libSystem.a diff --git a/lld/test/mach-o/libresolve-one-syslibroot.yaml b/lld/test/mach-o/libresolve-one-syslibroot.yaml deleted file mode 100644 index 7ca2670a7277..000000000000 --- a/lld/test/mach-o/libresolve-one-syslibroot.yaml +++ /dev/null @@ -1,25 +0,0 @@ -# RUN: ld64.lld.darwinold -test_file_usage -v \ -# RUN: -path_exists /usr/lib \ -# RUN: -path_exists /Applications/MySDK/usr/local/lib \ -# RUN: -path_exists /Applications/MySDK/usr/local/lib/libSystem.a \ -# RUN: -path_exists /hasFoo \ -# RUN: -path_exists /hasFoo/foo.o \ -# RUN: -syslibroot /Applications/MySDK \ -# RUN: -L/hasFoo \ -# RUN: -lSystem -lfoo.o \ -# RUN: 2>&1 | FileCheck %s - -# When just one -syslibroot is specified, we apparently want to skip *system* -# paths that aren't found. User ones should still get added. In this case -# /usr/lib exists, but not the equivalent in the -syslibroot, so there should be -# no mention of /usr/lib. - -# CHECK: Library search paths: -# CHECK: /hasFoo -# CHECK-NOT: /usr/lib -# CHECK-NOT: /usr/local/lib -# CHECK: /Applications/MySDK/usr/local/lib -# CHECK-NOT: /usr/lib -# CHECK-NOT: /usr/local/lib -# CHECK: Found library /Applications/MySDK/usr/local/lib/libSystem.a -# CHECK: Found library /hasFoo/foo.o diff --git a/lld/test/mach-o/libresolve-simple.yaml b/lld/test/mach-o/libresolve-simple.yaml deleted file mode 100644 index 00e35734ebde..000000000000 --- a/lld/test/mach-o/libresolve-simple.yaml +++ /dev/null @@ -1,21 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r -test_file_usage -v \ -# RUN: -path_exists /usr/lib \ -# RUN: -path_exists /usr/local/lib \ -# RUN: -path_exists /usr/lib/libSystem.dylib \ -# RUN: -path_exists hasFoo \ -# RUN: -path_exists hasFoo/libFoo.dylib \ -# RUN: -path_exists /hasBar \ -# RUN: -path_exists /hasBar/libBar.dylib \ -# RUN: -L hasFoo \ -# RUN: -L /hasBar \ -# RUN: -lSystem -lFoo -lBar \ -# RUN: 2>&1 | FileCheck %s - -# CHECK: Library search paths: -# CHECK: hasFoo -# CHECK: /hasBar -# CHECK: /usr/lib -# CHECK: /usr/local/lib -# CHECK: Found library /usr/lib/libSystem.dylib -# CHECK: Found library hasFoo/libFoo.dylib -# CHECK: Found library /hasBar/libBar.dylib diff --git a/lld/test/mach-o/libresolve-user-paths.yaml b/lld/test/mach-o/libresolve-user-paths.yaml deleted file mode 100644 index 3fbb205eba5f..000000000000 --- a/lld/test/mach-o/libresolve-user-paths.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r -test_file_usage -v \ -# RUN: -path_exists hasFoo \ -# RUN: -path_exists hasFoo/libFoo.dylib \ -# RUN: -path_exists /hasBar \ -# RUN: -path_exists /hasBar/libBar.dylib \ -# RUN: -path_exists /SDK/hasFoo \ -# RUN: -path_exists /SDK/hasFoo/libFoo.dylib \ -# RUN: -path_exists /SDK/hasBar \ -# RUN: -path_exists /SDK/hasBar/libBar.dylib \ -# RUN: -syslibroot /SDK \ -# RUN: -L hasFoo \ -# RUN: -L /hasBar \ -# RUN: -lFoo -lBar \ -# RUN: 2>&1 | FileCheck %s - -# CHECK: Library search paths: -# CHECK: hasFoo -# CHECK: /SDK/hasBar -# CHECK: Found library hasFoo/libFoo.dylib -# CHECK: Found library /SDK/hasBar/libBar.dylib diff --git a/lld/test/mach-o/libresolve-z.yaml b/lld/test/mach-o/libresolve-z.yaml deleted file mode 100644 index aaf98ebec46b..000000000000 --- a/lld/test/mach-o/libresolve-z.yaml +++ /dev/null @@ -1,21 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r -test_file_usage -v \ -# RUN: -path_exists /usr/lib \ -# RUN: -path_exists /usr/local/lib \ -# RUN: -path_exists /usr/lib/libSystem.dylib \ -# RUN: -path_exists hasFoo \ -# RUN: -path_exists hasFoo/libFoo.dylib \ -# RUN: -path_exists /hasBar \ -# RUN: -path_exists /hasBar/libBar.dylib \ -# RUN: -L hasFoo \ -# RUN: -L /hasBar \ -# RUN: -Z \ -# RUN: -lFoo -lBar \ -# RUN: 2>&1 | FileCheck %s - -# CHECK: Library search paths: -# CHECK: hasFoo -# CHECK: /hasBar -# CHECK-NOT: /usr/lib -# CHECK-NOT: /usr/local/lib -# CHECK: Found library hasFoo/libFoo.dylib -# CHECK: Found library /hasBar/libBar.dylib diff --git a/lld/test/mach-o/lit.local.cfg b/lld/test/mach-o/lit.local.cfg deleted file mode 100644 index ccbf4e12fbf0..000000000000 --- a/lld/test/mach-o/lit.local.cfg +++ /dev/null @@ -1,4 +0,0 @@ - -# mach-o test cases encode input files in yaml and use .yaml extension -config.suffixes = ['.yaml'] -config.excludes = ['Inputs'] diff --git a/lld/test/mach-o/load-commands-size.yaml b/lld/test/mach-o/load-commands-size.yaml deleted file mode 100644 index 25314d7eb4ba..000000000000 --- a/lld/test/mach-o/load-commands-size.yaml +++ /dev/null @@ -1,305 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %s -o %t -dylib \ -# RUN: -macosx_version_min 10.10 -sdk_version 10.10 \ -# RUN: -install_name /usr/lib/foo.dylib \ -# RUN: %p/Inputs/x86_64/libSystem.yaml && \ -# RUN: llvm-readobj %t - -# (Tests that lld doesn't crash or produce an invalid file.) - ---- !native -path: '' -defined-atoms: - - name: _foo - scope: global - type: unknown - content: [ 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00 ] - alignment: 16 - section-choice: custom-required - section-name: '__TEXT/__foo' diff --git a/lld/test/mach-o/mach_header-cpusubtype.yaml b/lld/test/mach-o/mach_header-cpusubtype.yaml deleted file mode 100644 index 7c97e4063e2d..000000000000 --- a/lld/test/mach-o/mach_header-cpusubtype.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.4 %s %p/Inputs/hello-world-x86_64.yaml -o %t && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_LIB64 -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.5 %s %p/Inputs/hello-world-x86_64.yaml -o %t && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=LIB64 -# RUN: ld64.lld.darwinold -arch x86_64 -dylib -macosx_version_min 10.5 %s %p/Inputs/hello-world-x86_64.yaml -o %t && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=DYLIB - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x00, 0x00 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: start - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000001 - -... - -# NO_LIB64: MH_MAGIC_64 X86_64 ALL 0x00 EXECUTE -# LIB64: MH_MAGIC_64 X86_64 ALL LIB64 EXECUTE -# DYLIB: MH_MAGIC_64 X86_64 ALL 0x00 DYLIB diff --git a/lld/test/mach-o/mh_bundle_header.yaml b/lld/test/mach-o/mh_bundle_header.yaml deleted file mode 100644 index 3db78b103eab..000000000000 --- a/lld/test/mach-o/mh_bundle_header.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %s -bundle -o %t %p/Inputs/x86_64/libSystem.yaml && llvm-nm -m -n %t | FileCheck %s -# RUN: ld64.lld.darwinold -arch x86_64 %s -bundle -dead_strip -o %t %p/Inputs/x86_64/libSystem.yaml && llvm-nm -m -n %t | FileCheck %s -# -# Test that __mh_bundle_header symbol is available for bundles -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0x5D, 0xC3 ] - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - alignment: 8 - address: 0x0000000000000008 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000000 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 2 -global-symbols: - - name: _d - type: N_SECT - scope: [ N_EXT ] - sect: 2 - value: 0x0000000000000008 - - name: _foo - type: N_SECT - scope: [ N_EXT ] - sect: 1 - desc: [ N_NO_DEAD_STRIP ] - value: 0x0000000000000000 -undefined-symbols: - - name: __mh_bundle_header - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - -... - -# CHECK: __mh_bundle_header -# CHECK: _foo diff --git a/lld/test/mach-o/mh_dylib_header.yaml b/lld/test/mach-o/mh_dylib_header.yaml deleted file mode 100644 index ce03d3b3c487..000000000000 --- a/lld/test/mach-o/mh_dylib_header.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %s -dylib -o %t %p/Inputs/x86_64/libSystem.yaml -# RUN: llvm-nm -m -n %t | FileCheck %s -# -# Test that __mh_dylib_header symbol is available for dylibs -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0x5D, 0xC3 ] - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - alignment: 8 - address: 0x0000000000000008 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000000 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 2 -global-symbols: - - name: _d - type: N_SECT - scope: [ N_EXT ] - sect: 2 - value: 0x0000000000000008 - - name: _foo - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: __mh_dylib_header - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - -... - -# CHECK_NOT: __mh_dylib_header -# CHECK: _foo diff --git a/lld/test/mach-o/objc-category-list-atom.yaml b/lld/test/mach-o/objc-category-list-atom.yaml deleted file mode 100644 index cf4d4966903a..000000000000 --- a/lld/test/mach-o/objc-category-list-atom.yaml +++ /dev/null @@ -1,70 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s -# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %t -o %t2 | FileCheck %s - - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -sections: - - segment: __DATA - section: __objc_catlist - type: S_REGULAR - attributes: [ S_ATTR_NO_DEAD_STRIP ] - alignment: 8 - address: 0x00000000000003F8 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000008 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 0 - - offset: 0x00000000 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 1 -undefined-symbols: - - name: __category1 - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: __category2 - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -page-size: 0x00000000 -... - -# Make sure we atomize the category list section by pointer sized atoms. - -# CHECK: path: '' -# CHECK: defined-atoms: -# CHECK: - type: objc-category-list -# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00 ] -# CHECK: merge: by-content -# CHECK: alignment: 8 -# CHECK: references: -# CHECK: - kind: pointer64 -# CHECK: offset: 0 -# CHECK: target: __category2 -# CHECK: - type: objc-category-list -# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00 ] -# CHECK: merge: by-content -# CHECK: alignment: 8 -# CHECK: references: -# CHECK: - kind: pointer64 -# CHECK: offset: 0 -# CHECK: target: __category1 -# CHECK: undefined-atoms: -# CHECK: - name: __category1 -# CHECK: - name: __category2 -# CHECK: ... diff --git a/lld/test/mach-o/objc-image-info-host-vs-simulator.yaml b/lld/test/mach-o/objc-image-info-host-vs-simulator.yaml deleted file mode 100644 index 06913009936b..000000000000 --- a/lld/test/mach-o/objc-image-info-host-vs-simulator.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# RUN: not ld64.lld.darwinold -arch x86_64 -r %s 2>&1 | FileCheck %s - -# The file is built for the host, but the objc image info flags are for -# the simulator. - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -sections: - - segment: __DATA - section: __objc_imageinfo - type: S_REGULAR - attributes: [ S_ATTR_NO_DEAD_STRIP ] - address: 0x0000000000000100 - content: [ 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00 ] -... - -# CHECK: {{.*}} cannot be linked. It contains ObjC built for the simulator while we are linking a non-simulator target \ No newline at end of file diff --git a/lld/test/mach-o/objc-image-info-invalid-size.yaml b/lld/test/mach-o/objc-image-info-invalid-size.yaml deleted file mode 100644 index ea00bfae077d..000000000000 --- a/lld/test/mach-o/objc-image-info-invalid-size.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# RUN: not ld64.lld.darwinold -arch x86_64 -r %s 2>&1 | FileCheck %s - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -sections: - - segment: __DATA - section: __objc_imageinfo - type: S_REGULAR - attributes: [ S_ATTR_NO_DEAD_STRIP ] - address: 0x0000000000000100 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] -... - -# CHECK: error: __DATA/__objc_imageinfo in file {{.*}} should be 8 bytes in size \ No newline at end of file diff --git a/lld/test/mach-o/objc-image-info-invalid-version.yaml b/lld/test/mach-o/objc-image-info-invalid-version.yaml deleted file mode 100644 index c64206c436e8..000000000000 --- a/lld/test/mach-o/objc-image-info-invalid-version.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# RUN: not ld64.lld.darwinold -arch x86_64 -r %s 2>&1 | FileCheck %s - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -sections: - - segment: __DATA - section: __objc_imageinfo - type: S_REGULAR - attributes: [ S_ATTR_NO_DEAD_STRIP ] - address: 0x0000000000000100 - content: [ 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00 ] -... - -# CHECK: error: __DATA/__objc_imageinfo in file {{.*}} should have version=0 \ No newline at end of file diff --git a/lld/test/mach-o/objc-image-info-mismatched-swift-version.yaml b/lld/test/mach-o/objc-image-info-mismatched-swift-version.yaml deleted file mode 100644 index 35539ca21cac..000000000000 --- a/lld/test/mach-o/objc-image-info-mismatched-swift-version.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# RUN: not ld64.lld.darwinold -arch x86_64 -r %s %p/Inputs/swift-version-1.yaml 2>&1 | FileCheck %s - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -sections: - - segment: __DATA - section: __objc_imageinfo - type: S_REGULAR - attributes: [ S_ATTR_NO_DEAD_STRIP ] - address: 0x0000000000000100 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00 ] -... - -# CHECK: different swift versions \ No newline at end of file diff --git a/lld/test/mach-o/objc-image-info-pass-output.yaml b/lld/test/mach-o/objc-image-info-pass-output.yaml deleted file mode 100644 index 7f7953e3c892..000000000000 --- a/lld/test/mach-o/objc-image-info-pass-output.yaml +++ /dev/null @@ -1,30 +0,0 @@ -# RUN: ld64.lld.darwinold -ios_simulator_version_min 5.0 -arch x86_64 -r %s -o %t -print_atoms | FileCheck %s - -# Make sure that we have an objc image info in the output. It should have -# been generated by the objc pass. - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -sections: - - segment: __DATA - section: __objc_imageinfo - type: S_REGULAR - attributes: [ S_ATTR_NO_DEAD_STRIP ] - address: 0x0000000000000100 - content: [ 0x00, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00 ] -... - -# CHECK: --- !native -# CHECK: path: '' -# CHECK: defined-atoms: -# CHECK: - scope: hidden -# CHECK: type: objc-image-info -# CHECK: content: [ 00, 00, 00, 00, 20, 02, 00, 00 ] -# CHECK: alignment: 4 -# CHECK: ... \ No newline at end of file diff --git a/lld/test/mach-o/objc-image-info-simulator-vs-host.yaml b/lld/test/mach-o/objc-image-info-simulator-vs-host.yaml deleted file mode 100644 index 37e5f7489d80..000000000000 --- a/lld/test/mach-o/objc-image-info-simulator-vs-host.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# RUN: not ld64.lld.darwinold -ios_simulator_version_min 5.0 -arch x86_64 -r %s 2>&1 | FileCheck %s - -# The file is built for the simulator, but the objc image info flags are for -# the host. - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -sections: - - segment: __DATA - section: __objc_imageinfo - type: S_REGULAR - attributes: [ S_ATTR_NO_DEAD_STRIP ] - address: 0x0000000000000100 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] -... - -# CHECK: {{.*}} cannot be linked. It contains ObjC built for a non-simulator target while we are linking a simulator target \ No newline at end of file diff --git a/lld/test/mach-o/objc-image-info-unsupported-gc.yaml b/lld/test/mach-o/objc-image-info-unsupported-gc.yaml deleted file mode 100644 index 4615e7ebed49..000000000000 --- a/lld/test/mach-o/objc-image-info-unsupported-gc.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# RUN: not ld64.lld.darwinold -arch x86_64 -r %s 2>&1 | FileCheck %s - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -sections: - - segment: __DATA - section: __objc_imageinfo - type: S_REGULAR - attributes: [ S_ATTR_NO_DEAD_STRIP ] - address: 0x0000000000000100 - content: [ 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00 ] -... - -# CHECK: error: __DATA/__objc_imageinfo in file {{.*}} uses GC. This is not supported \ No newline at end of file diff --git a/lld/test/mach-o/objc_export_list.yaml b/lld/test/mach-o/objc_export_list.yaml deleted file mode 100644 index 1629c1880130..000000000000 --- a/lld/test/mach-o/objc_export_list.yaml +++ /dev/null @@ -1,63 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -dylib %s -o %t \ -# RUN: -exported_symbol .objc_class_name_Foo %p/Inputs/x86_64/libSystem.yaml -# RUN: llvm-nm -m %t | FileCheck %s -# -# Test that exported objc classes can be specificed using old naming -# (.e.g .objc_class_name_Foo instead of _OBJC_CLASS_$_Foo) -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __DATA - section: __objc_data - type: S_REGULAR - attributes: [ ] - alignment: 8 - address: 0x0000000000000000 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000030 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 0 - - offset: 0x00000028 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 1 - - offset: 0x00000000 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 1 -global-symbols: - - name: '_OBJC_CLASS_$_Foo' - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: '_OBJC_METACLASS_$_Foo' - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000028 -... - -# CHECK: (__DATA,__objc_data) external _OBJC_CLASS_$_Foo -# CHECK: (__DATA,__objc_data) external _OBJC_METACLASS_$_Foo diff --git a/lld/test/mach-o/order_file-basic.yaml b/lld/test/mach-o/order_file-basic.yaml deleted file mode 100644 index 9dc1c009921f..000000000000 --- a/lld/test/mach-o/order_file-basic.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/x86_64/libSystem.yaml \ -# RUN: -order_file %p/Inputs/order_file-basic.order \ -# RUN: -force_load %p/Inputs/libfoo.a -o %t -# RUN: llvm-nm -m -n %t | FileCheck %s -# -# Test -order_file -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0xC3, 0xC3, 0xC3, 0xC3 ] - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - alignment: 2 - address: 0x0000000000000014 - content: [ 0x05, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00 ] -global-symbols: - - name: _data1 - type: N_SECT - scope: [ N_EXT ] - sect: 2 - value: 0x0000000000000014 - - name: _data2 - type: N_SECT - scope: [ N_EXT ] - sect: 2 - value: 0x0000000000000018 - - name: _data3 - type: N_SECT - scope: [ N_EXT ] - sect: 2 - value: 0x000000000000001C - - name: _func1 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: _func2 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000001 - - name: _func3 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000002 - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000003 -... - - -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _func2 -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _foo -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _func1 -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _func3 -# CHECK: {{[0-9a-f]+}} (__TEXT,__text) external _main -# CHECK: {{[0-9a-f]+}} (__DATA,__data) external _data3 -# CHECK: {{[0-9a-f]+}} (__DATA,__data) external _data1 -# CHECK: {{[0-9a-f]+}} (__DATA,__data) external _data2 - diff --git a/lld/test/mach-o/parse-aliases.yaml b/lld/test/mach-o/parse-aliases.yaml deleted file mode 100644 index 59dcb546c7c6..000000000000 --- a/lld/test/mach-o/parse-aliases.yaml +++ /dev/null @@ -1,90 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s -# -# Test multiple labels to same address parse into aliases. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 4 - address: 0x0000000000000000 - content: [ 0xCC, 0xC3 ] -local-symbols: - - name: _pad - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: _myStaticAlias1 - type: N_SECT - sect: 1 - value: 0x0000000000000001 - - name: _myStaticAlias3 - type: N_SECT - sect: 1 - value: 0x0000000000000001 - - name: _myStaticAlias2 - type: N_SECT - sect: 1 - value: 0x0000000000000001 -global-symbols: - - name: _myGlobalFunc1 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000001 - - name: _myGlobalFunc2 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000001 - - name: _myGlobalFunc3 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000001 - - name: _myHiddenAlias1 - type: N_SECT - scope: [ N_EXT, N_PEXT ] - sect: 1 - value: 0x0000000000000001 - - name: _myHiddenAlias2 - type: N_SECT - scope: [ N_EXT, N_PEXT ] - sect: 1 - value: 0x0000000000000001 - - name: _myHiddenAlias3 - type: N_SECT - scope: [ N_EXT, N_PEXT ] - sect: 1 - value: 0x0000000000000001 -... - -# CHECK: defined-atoms: -# CHECK: - name: _pad -# CHECK: scope: global -# CHECK: content: [ CC ] -# CHECK: - name: _myStaticAlias1 -# CHECK: - name: _myStaticAlias2 -# CHECK: - name: _myStaticAlias3 -# CHECK: - name: _myHiddenAlias1 -# CHECK: scope: hidden -# CHECK: - name: _myHiddenAlias2 -# CHECK: scope: hidden -# CHECK: - name: _myHiddenAlias3 -# CHECK: scope: hidden -# CHECK: - name: _myGlobalFunc1 -# CHECK: scope: global -# CHECK: - name: _myGlobalFunc2 -# CHECK: scope: global -# CHECK: - name: _myGlobalFunc3 -# CHECK: scope: global -# CHECK: content: [ C3 ] diff --git a/lld/test/mach-o/parse-arm-relocs.yaml b/lld/test/mach-o/parse-arm-relocs.yaml deleted file mode 100644 index 26e1dcf9dae7..000000000000 --- a/lld/test/mach-o/parse-arm-relocs.yaml +++ /dev/null @@ -1,818 +0,0 @@ -# RUN: ld64.lld.darwinold -arch armv7 -r -print_atoms %s -o %t | FileCheck %s -# RUN: ld64.lld.darwinold -arch armv7 -r -print_atoms %t -o %t2 | FileCheck %s -# -# Test parsing of armv7 relocations. -# -# - ---- !mach-o -arch: armv7 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 2 - address: 0x0000000000000000 - content: [ 0x00, 0xF0, 0x4E, 0xF8, 0x00, 0xF0, 0x4E, 0xF8, - 0xFF, 0xF7, 0xFA, 0xFF, 0xFF, 0xF7, 0xFA, 0xFF, - 0xFF, 0xF7, 0xF6, 0xBF, 0x40, 0xF2, 0x72, 0x01, - 0xC0, 0xF2, 0x00, 0x01, 0x40, 0xF2, 0x7A, 0x02, - 0xC0, 0xF2, 0x00, 0x02, 0x40, 0xF2, 0x29, 0x01, - 0xC0, 0xF2, 0x00, 0x01, 0x79, 0x44, 0x40, 0xF2, - 0xA0, 0x03, 0xC0, 0xF2, 0x00, 0x03, 0x40, 0xF2, - 0xA8, 0x04, 0xC0, 0xF2, 0x00, 0x04, 0x40, 0xF2, - 0x57, 0x03, 0xC0, 0xF2, 0x00, 0x03, 0x40, 0xF2, - 0x00, 0x05, 0xC0, 0xF2, 0x00, 0x05, 0x40, 0xF2, - 0x08, 0x06, 0xC0, 0xF2, 0x00, 0x06, 0xC0, 0x46, - 0x10, 0x00, 0x00, 0xEB, 0x10, 0x00, 0x00, 0xEB, - 0xE6, 0xFF, 0xFF, 0xEB, 0xE6, 0xFF, 0xFF, 0xEB, - 0xE4, 0xFF, 0xFF, 0xEA, 0x20, 0x10, 0x00, 0xE3, - 0x00, 0x10, 0x40, 0xE3, 0x28, 0x20, 0x00, 0xE3, - 0x00, 0x20, 0x40, 0xE3, 0x0F, 0x10, 0x81, 0xE0, - 0xA0, 0x30, 0x00, 0xE3, 0x00, 0x30, 0x40, 0xE3, - 0xA8, 0x40, 0x00, 0xE3, 0x00, 0x40, 0x40, 0xE3, - 0x00, 0x50, 0x00, 0xE3, 0x00, 0x50, 0x40, 0xE3, - 0x08, 0x60, 0x00, 0xE3, 0x00, 0x60, 0x40, 0xE3 ] - relocations: - - offset: 0x0000009C - type: ARM_RELOC_HALF - length: 1 - pc-rel: false - extern: true - symbol: 4 - - offset: 0x00000008 - type: ARM_RELOC_PAIR - length: 1 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x00000098 - type: ARM_RELOC_HALF - length: 0 - pc-rel: false - extern: true - symbol: 4 - - offset: 0x00000000 - type: ARM_RELOC_PAIR - length: 0 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x00000094 - type: ARM_RELOC_HALF - length: 1 - pc-rel: false - extern: true - symbol: 4 - - offset: 0x00000000 - type: ARM_RELOC_PAIR - length: 1 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x00000090 - type: ARM_RELOC_HALF - length: 0 - pc-rel: false - extern: true - symbol: 4 - - offset: 0x00000000 - type: ARM_RELOC_PAIR - length: 0 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x0000008C - scattered: true - type: ARM_RELOC_HALF - length: 1 - pc-rel: false - value: 0x000000A0 - - offset: 0x000000A8 - type: ARM_RELOC_PAIR - length: 1 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x00000088 - scattered: true - type: ARM_RELOC_HALF - length: 0 - pc-rel: false - value: 0x000000A0 - - offset: 0x00000000 - type: ARM_RELOC_PAIR - length: 0 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x00000084 - type: ARM_RELOC_HALF - length: 1 - pc-rel: false - extern: false - symbol: 2 - - offset: 0x000000A0 - type: ARM_RELOC_PAIR - length: 1 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x00000080 - type: ARM_RELOC_HALF - length: 0 - pc-rel: false - extern: false - symbol: 2 - - offset: 0x00000000 - type: ARM_RELOC_PAIR - length: 0 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x00000078 - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 1 - pc-rel: false - value: 0x000000A0 - - offset: 0x00000028 - scattered: true - type: ARM_RELOC_PAIR - length: 1 - pc-rel: false - value: 0x00000080 - - offset: 0x00000074 - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 0 - pc-rel: false - value: 0x000000A0 - - offset: 0x00000000 - scattered: true - type: ARM_RELOC_PAIR - length: 0 - pc-rel: false - value: 0x00000080 - - offset: 0x00000070 - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 1 - pc-rel: false - value: 0x000000A0 - - offset: 0x00000020 - scattered: true - type: ARM_RELOC_PAIR - length: 1 - pc-rel: false - value: 0x00000080 - - offset: 0x0000006C - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 0 - pc-rel: false - value: 0x000000A0 - - offset: 0x00000000 - scattered: true - type: ARM_RELOC_PAIR - length: 0 - pc-rel: false - value: 0x00000080 - - offset: 0x00000068 - type: ARM_RELOC_BR24 - length: 2 - pc-rel: true - extern: true - symbol: 4 - - offset: 0x00000064 - type: ARM_RELOC_BR24 - length: 2 - pc-rel: true - extern: true - symbol: 4 - - offset: 0x00000060 - type: ARM_RELOC_BR24 - length: 2 - pc-rel: true - extern: true - symbol: 4 - - offset: 0x0000005C - scattered: true - type: ARM_RELOC_BR24 - length: 2 - pc-rel: true - value: 0x000000A0 - - offset: 0x00000058 - type: ARM_RELOC_BR24 - length: 2 - pc-rel: true - extern: false - symbol: 2 - - offset: 0x00000052 - type: ARM_RELOC_HALF - length: 3 - pc-rel: false - extern: true - symbol: 4 - - offset: 0x00000008 - type: ARM_RELOC_PAIR - length: 3 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x0000004E - type: ARM_RELOC_HALF - length: 2 - pc-rel: false - extern: true - symbol: 4 - - offset: 0x00000000 - type: ARM_RELOC_PAIR - length: 2 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x0000004A - type: ARM_RELOC_HALF - length: 3 - pc-rel: false - extern: true - symbol: 4 - - offset: 0x00000000 - type: ARM_RELOC_PAIR - length: 3 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x00000046 - type: ARM_RELOC_HALF - length: 2 - pc-rel: false - extern: true - symbol: 4 - - offset: 0x00000000 - type: ARM_RELOC_PAIR - length: 2 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x00000042 - type: ARM_RELOC_HALF - length: 3 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000057 - type: ARM_RELOC_PAIR - length: 3 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x0000003E - type: ARM_RELOC_HALF - length: 2 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000000 - type: ARM_RELOC_PAIR - length: 2 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x0000003A - scattered: true - type: ARM_RELOC_HALF - length: 3 - pc-rel: false - value: 0x000000A0 - - offset: 0x000000A8 - type: ARM_RELOC_PAIR - length: 3 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x00000036 - scattered: true - type: ARM_RELOC_HALF - length: 2 - pc-rel: false - value: 0x000000A0 - - offset: 0x00000000 - type: ARM_RELOC_PAIR - length: 2 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x00000032 - type: ARM_RELOC_HALF - length: 3 - pc-rel: false - extern: false - symbol: 2 - - offset: 0x000000A0 - type: ARM_RELOC_PAIR - length: 3 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x0000002E - type: ARM_RELOC_HALF - length: 2 - pc-rel: false - extern: false - symbol: 2 - - offset: 0x00000000 - type: ARM_RELOC_PAIR - length: 2 - pc-rel: false - extern: false - symbol: 16777215 - - offset: 0x00000028 - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 3 - pc-rel: false - value: 0x00000056 - - offset: 0x00000028 - scattered: true - type: ARM_RELOC_PAIR - length: 3 - pc-rel: false - value: 0x0000002E - - offset: 0x00000024 - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000056 - - offset: 0x00000000 - scattered: true - type: ARM_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x0000002E - - offset: 0x00000020 - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 3 - pc-rel: false - value: 0x000000A0 - - offset: 0x0000007A - scattered: true - type: ARM_RELOC_PAIR - length: 3 - pc-rel: false - value: 0x0000002E - - offset: 0x0000001C - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 2 - pc-rel: false - value: 0x000000A0 - - offset: 0x00000000 - scattered: true - type: ARM_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x0000002E - - offset: 0x00000018 - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 3 - pc-rel: false - value: 0x000000A0 - - offset: 0x00000072 - scattered: true - type: ARM_RELOC_PAIR - length: 3 - pc-rel: false - value: 0x0000002E - - offset: 0x00000014 - scattered: true - type: ARM_RELOC_HALF_SECTDIFF - length: 2 - pc-rel: false - value: 0x000000A0 - - offset: 0x00000000 - scattered: true - type: ARM_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x0000002E - - offset: 0x00000010 - type: ARM_THUMB_RELOC_BR22 - length: 2 - pc-rel: true - extern: true - symbol: 4 - - offset: 0x0000000C - type: ARM_THUMB_RELOC_BR22 - length: 2 - pc-rel: true - extern: true - symbol: 4 - - offset: 0x00000008 - type: ARM_THUMB_RELOC_BR22 - length: 2 - pc-rel: true - extern: true - symbol: 4 - - offset: 0x00000004 - scattered: true - type: ARM_THUMB_RELOC_BR22 - length: 2 - pc-rel: true - value: 0x000000A0 - - offset: 0x00000000 - type: ARM_THUMB_RELOC_BR22 - length: 2 - pc-rel: true - extern: false - symbol: 2 - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - address: 0x00000000000000A0 - content: [ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0xA4, 0xFF, 0xFF, 0xFF, - 0xA4, 0xFF, 0xFF, 0xFF, 0x45, 0xFF, 0xFF, 0xFF, - 0x45, 0xFF, 0xFF, 0xFF ] - relocations: - - offset: 0x00000020 - scattered: true - type: ARM_RELOC_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000000 - - offset: 0x00000000 - scattered: true - type: ARM_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x000000C0 - - offset: 0x0000001C - scattered: true - type: ARM_RELOC_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000000 - - offset: 0x00000000 - scattered: true - type: ARM_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x000000BC - - offset: 0x00000018 - scattered: true - type: ARM_RELOC_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000058 - - offset: 0x00000000 - scattered: true - type: ARM_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x000000B8 - - offset: 0x00000014 - scattered: true - type: ARM_RELOC_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000058 - - offset: 0x00000000 - scattered: true - type: ARM_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x000000B4 - - offset: 0x00000010 - type: ARM_RELOC_VANILLA - length: 2 - pc-rel: false - extern: true - symbol: 4 - - offset: 0x0000000C - type: ARM_RELOC_VANILLA - length: 2 - pc-rel: false - extern: true - symbol: 4 - - offset: 0x00000008 - scattered: true - type: ARM_RELOC_VANILLA - length: 2 - pc-rel: false - value: 0x00000000 - - offset: 0x00000004 - type: ARM_RELOC_VANILLA - length: 2 - pc-rel: false - extern: false - symbol: 1 -local-symbols: - - name: _foo_thumb - type: N_SECT - sect: 1 - desc: [ N_ARM_THUMB_DEF ] - value: 0x0000000000000000 - - name: _x - type: N_SECT - sect: 2 - value: 0x00000000000000A0 - - name: _t1 - type: N_SECT - sect: 1 - desc: [ N_ARM_THUMB_DEF ] - value: 0x0000000000000056 - - name: _foo_arm - type: N_SECT - sect: 1 - value: 0x0000000000000058 -undefined-symbols: - - name: _undef - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - -# CHECK: defined-atoms: -# CHECK: - name: _x -# CHECK: type: data -# CHECK: references: -# CHECK: - kind: pointer32 -# CHECK: offset: 4 -# CHECK: target: _foo_thumb -# CHECK-NOT: addend: -# CHECK: - kind: pointer32 -# CHECK: offset: 8 -# CHECK: target: _foo_thumb -# CHECK: addend: 4 -# CHECK: - kind: pointer32 -# CHECK: offset: 12 -# CHECK: target: _undef -# CHECK-NOT: addend: -# CHECK: - kind: pointer32 -# CHECK: offset: 16 -# CHECK: target: _undef -# CHECK: addend: 4 -# CHECK: - kind: delta32 -# CHECK: offset: 20 -# CHECK: target: _foo_arm -# CHECK-NOT: addend: -# CHECK: - kind: delta32 -# CHECK: offset: 24 -# CHECK: target: _foo_arm -# CHECK: addend: 4 -# CHECK: - kind: delta32 -# CHECK: offset: 28 -# CHECK: target: _foo_thumb -# CHECK-NOT: addend: -# CHECK: - kind: delta32 -# CHECK: offset: 32 -# CHECK: target: _foo_thumb -# CHECK: addend: 4 -# CHECK: - name: _foo_thumb -# CHECK: references: -# CHECK: - kind: modeThumbCode -# CHECK: offset: 0 -# CHECK: - kind: thumb_bl22 -# CHECK: offset: 0 -# CHECK: target: _x -# CHECK-NOT: addend: -# CHECK: - kind: thumb_bl22 -# CHECK: offset: 4 -# CHECK: target: _x -# CHECK: addend: 4 -# CHECK: - kind: thumb_bl22 -# CHECK: offset: 8 -# CHECK: target: _undef -# CHECK-NOT: addend: -# CHECK: - kind: thumb_bl22 -# CHECK: offset: 12 -# CHECK: target: _undef -# CHECK: addend: 4 -# CHECK: - kind: thumb_b22 -# CHECK: offset: 16 -# CHECK: target: _undef -# CHECK-NOT: addend: -# CHECK: - kind: thumb_movw_funcRel -# CHECK: offset: 20 -# CHECK: target: _x -# CHECK: addend: -46 -# CHECK: - kind: thumb_movt_funcRel -# CHECK: offset: 24 -# CHECK: target: _x -# CHECK: addend: -46 -# CHECK: - kind: thumb_movw_funcRel -# CHECK: offset: 28 -# CHECK: target: _x -# CHECK: addend: -38 -# CHECK: - kind: thumb_movt_funcRel -# CHECK: offset: 32 -# CHECK: target: _x -# CHECK: addend: -38 -# CHECK: - kind: thumb_movw_funcRel -# CHECK: offset: 36 -# CHECK: target: _t1 -# CHECK: addend: -46 -# CHECK: - kind: thumb_movt_funcRel -# CHECK: offset: 40 -# CHECK: target: _t1 -# CHECK: addend: -46 -# CHECK: - kind: thumb_movw -# CHECK: offset: 46 -# CHECK: target: _x -# CHECK-NOT: addend: -# CHECK: - kind: thumb_movt -# CHECK: offset: 50 -# CHECK: target: _x -# CHECK-NOT: addend: -# CHECK: - kind: thumb_movw -# CHECK: offset: 54 -# CHECK: target: _x -# CHECK: addend: 8 -# CHECK: - kind: thumb_movt -# CHECK: offset: 58 -# CHECK: target: _x -# CHECK: addend: 8 -# CHECK: - kind: thumb_movw -# CHECK: offset: 62 -# CHECK: target: _t1 -# CHECK-NOT: addend: -# CHECK: - kind: thumb_movt -# CHECK: offset: 66 -# CHECK: target: _t1 -# CHECK-NOT: addend: -# CHECK: - kind: thumb_movw -# CHECK: offset: 70 -# CHECK: target: _undef -# CHECK-NOT: addend: -# CHECK: - kind: thumb_movt -# CHECK: offset: 74 -# CHECK: target: _undef -# CHECK-NOT: addend: -# CHECK: - kind: thumb_movw -# CHECK: offset: 78 -# CHECK: target: _undef -# CHECK: addend: 8 -# CHECK: - kind: thumb_movt -# CHECK: offset: 82 -# CHECK: target: _undef -# CHECK: addend: 8 -# CHECK: - name: _t1 -# CHECK: content: [ C0, 46 ] -# CHECK: references: -# CHECK: - kind: modeThumbCode -# CHECK: offset: 0 -# CHECK: - name: _foo_arm -# CHECK: references: -# CHECK-NOT: - kind: modeThumbCode -# CHECK: - kind: arm_bl24 -# CHECK: offset: 0 -# CHECK: target: _x -# CHECK-NOT: addend: -# CHECK: - kind: arm_bl24 -# CHECK: offset: 4 -# CHECK: target: _x -# CHECK: addend: 4 -# CHECK: - kind: arm_bl24 -# CHECK: offset: 8 -# CHECK: target: _undef -# CHECK-NOT: addend: -# CHECK: - kind: arm_bl24 -# CHECK: offset: 12 -# CHECK: target: _undef -# CHECK: addend: 4 -# CHECK: - kind: arm_b24 -# CHECK: offset: 16 -# CHECK: target: _undef -# CHECK-NOT: addend: -# CHECK: - kind: arm_movw_funcRel -# CHECK: offset: 20 -# CHECK: target: _x -# CHECK: addend: -40 -# CHECK: - kind: arm_movt_funcRel -# CHECK: offset: 24 -# CHECK: target: _x -# CHECK: addend: -40 -# CHECK: - kind: arm_movw_funcRel -# CHECK: offset: 28 -# CHECK: target: _x -# CHECK: addend: -32 -# CHECK: - kind: arm_movt_funcRel -# CHECK: offset: 32 -# CHECK: target: _x -# CHECK: addend: -32 -# CHECK: - kind: arm_movw -# CHECK: offset: 40 -# CHECK: target: _x -# CHECK-NOT: addend: -# CHECK: - kind: arm_movt -# CHECK: offset: 44 -# CHECK: target: _x -# CHECK-NOT: addend: -# CHECK: - kind: arm_movw -# CHECK: offset: 48 -# CHECK: target: _x -# CHECK: addend: 8 -# CHECK: - kind: arm_movt -# CHECK: offset: 52 -# CHECK: target: _x -# CHECK: addend: 8 -# CHECK: - kind: arm_movw -# CHECK: offset: 56 -# CHECK: target: _undef -# CHECK-NOT: addend: -# CHECK: - kind: arm_movt -# CHECK: offset: 60 -# CHECK: target: _undef -# CHECK-NOT: addend: -# CHECK: - kind: arm_movw -# CHECK: offset: 64 -# CHECK: target: _undef -# CHECK: addend: 8 -# CHECK: - kind: arm_movt -# CHECK: offset: 68 -# CHECK: target: _undef -# CHECK: addend: 8 -# CHECK: undefined-atoms: -# CHECK: - name: _undef - - - - -# .align 2 -# .code 16 -# .thumb_func _foo_thumb -#_foo_thumb: -# bl _x -# bl _x+4 -# bl _undef -# bl _undef+4 -# b _undef -# movw r1, :lower16:(_x-L1) -# movt r1, :upper16:(_x-L1) -# movw r2, :lower16:(_x+8-L1) -# movt r2, :upper16:(_x+8-L1) -# movw r1, :lower16:(_t1-L1) -# movt r1, :upper16:(_t1-L1) -# add r1, pc -#L1: -# movw r3, :lower16:_x -# movt r3, :upper16:_x -# movw r4, :lower16:_x+8 -# movt r4, :upper16:_x+8 -# movw r3, :lower16:_t1 -# movt r3, :upper16:_t1 -# movw r5, :lower16:_undef -# movt r5, :upper16:_undef -# movw r6, :lower16:_undef+8 -# movt r6, :upper16:_undef+8 -# -# .thumb_func _t1 -#_t1: -# nop -# -# -# .code 32 -# .align 2 -#_foo_arm: -# bl _x -# bl _x+4 -# bl _undef -# bl _undef+4 -# b _undef -# movw r1, :lower16:(_x-L2) -# movt r1, :upper16:(_x-L2) -# movw r2, :lower16:(_x+8-L2) -# movt r2, :upper16:(_x+8-L2) -# add r1, pc -#L2: -# movw r3, :lower16:_x -# movt r3, :upper16:_x -# movw r4, :lower16:_x+8 -# movt r4, :upper16:_x+8 -# movw r5, :lower16:_undef -# movt r5, :upper16:_undef -# movw r6, :lower16:_undef+8 -# movt r6, :upper16:_undef+8 -# -# -# .data -#_x: .long 0 -# .long _foo_thumb -# .long _foo_thumb+4 -# .long _undef -# .long _undef+4 -# .long _foo_arm - . -# .long _foo_arm+4- . -# .long _foo_thumb - . -# .long _foo_thumb+4 - . -# diff --git a/lld/test/mach-o/parse-cfstring32.yaml b/lld/test/mach-o/parse-cfstring32.yaml deleted file mode 100644 index aee244ab931b..000000000000 --- a/lld/test/mach-o/parse-cfstring32.yaml +++ /dev/null @@ -1,94 +0,0 @@ -# RUN: ld64.lld.darwinold -arch i386 -r -print_atoms %s -o %t | FileCheck %s -# -# Test parsing of mach-o functions. -# - ---- !mach-o -arch: x86 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __cstring - type: S_CSTRING_LITERALS - attributes: [ ] - address: 0x0000000000000000 - content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x00, 0x74, 0x68, - 0x65, 0x72, 0x65, 0x00 ] - - segment: __DATA - section: __cfstring - type: S_REGULAR - attributes: [ ] - alignment: 8 - address: 0x0000000000000010 - content: [ 0x00, 0x00, 0x00, 0x00, 0xC8, 0x07, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xC8, 0x07, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000018 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000010 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: false - extern: true - symbol: 0 - - offset: 0x00000008 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000000 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: false - extern: true - symbol: 0 -undefined-symbols: - - name: ___CFConstantStringClassReference - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - -# CHECK: defined-atoms: -# CHECK: - ref-name: [[STR1:L[L0-9]+]] -# CHECK: scope: hidden -# CHECK: type: c-string -# CHECK: content: [ 68, 65, 6C, 6C, 6F, 00 ] -# CHECK: merge: by-content -# CHECK: - ref-name: [[STR2:L[L0-9]+]] -# CHECK: scope: hidden -# CHECK: type: c-string -# CHECK: content: [ 74, 68, 65, 72, 65, 00 ] -# CHECK: merge: by-content -# CHECK: - scope: hidden -# CHECK: type: cfstring -# CHECK: merge: by-content -# CHECK: references: -# CHECK: - kind: pointer32 -# CHECK: offset: 0 -# CHECK: target: ___CFConstantStringClassReference -# CHECK: - kind: pointer32 -# CHECK: offset: 8 -# CHECK: target: [[STR1]] -# CHECK: - scope: hidden -# CHECK: type: cfstring -# CHECK: merge: by-content -# CHECK: references: -# CHECK: - kind: pointer32 -# CHECK: offset: 0 -# CHECK: target: ___CFConstantStringClassReference -# CHECK: - kind: pointer32 -# CHECK: offset: 8 -# CHECK: target: [[STR2]] -# CHECK:undefined-atoms: -# CHECK: - name: ___CFConstantStringClassReference diff --git a/lld/test/mach-o/parse-cfstring64.yaml b/lld/test/mach-o/parse-cfstring64.yaml deleted file mode 100644 index 2725047a5b6f..000000000000 --- a/lld/test/mach-o/parse-cfstring64.yaml +++ /dev/null @@ -1,108 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s -# -# Test parsing of CFString constants. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __cstring - type: S_CSTRING_LITERALS - attributes: [ ] - address: 0x0000000000000000 - content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x00, 0x74, 0x68, - 0x65, 0x72, 0x65, 0x00 ] - - segment: __DATA - section: __cfstring - type: S_REGULAR - attributes: [ ] - alignment: 4 - address: 0x0000000000000010 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xC8, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xC8, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000030 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 1 - - offset: 0x00000020 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000010 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 0 - - offset: 0x00000000 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 2 -local-symbols: - - name: Lstr1 - type: N_SECT - sect: 1 - value: 0x0000000000000000 - - name: Lstr2 - type: N_SECT - sect: 1 - value: 0x0000000000000006 -undefined-symbols: - - name: ___CFConstantStringClassReference - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - -# CHECK:defined-atoms: -# CHECK: - ref-name: L000 -# CHECK: scope: hidden -# CHECK: type: c-string -# CHECK: content: [ 68, 65, 6C, 6C, 6F, 00 ] -# CHECK: merge: by-content -# CHECK: - ref-name: L001 -# CHECK: scope: hidden -# CHECK: type: c-string -# CHECK: content: [ 74, 68, 65, 72, 65, 00 ] -# CHECK: merge: by-content -# CHECK: - scope: hidden -# CHECK: type: cfstring -# CHECK: merge: by-content -# CHECK: references: -# CHECK: - kind: pointer64 -# CHECK: offset: 0 -# CHECK: target: ___CFConstantStringClassReference -# CHECK: - kind: pointer64 -# CHECK: offset: 16 -# CHECK: target: L000 -# CHECK: - scope: hidden -# CHECK: type: cfstring -# CHECK: merge: by-content -# CHECK: references: -# CHECK: - kind: pointer64 -# CHECK: offset: 0 -# CHECK: target: ___CFConstantStringClassReference -# CHECK: - kind: pointer64 -# CHECK: offset: 16 -# CHECK: target: L001 -# CHECK:undefined-atoms: -# CHECK: - name: ___CFConstantStringClassReference - diff --git a/lld/test/mach-o/parse-compact-unwind32.yaml b/lld/test/mach-o/parse-compact-unwind32.yaml deleted file mode 100644 index 3b0edff5cfca..000000000000 --- a/lld/test/mach-o/parse-compact-unwind32.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# RUN: ld64.lld.darwinold -arch i386 -r -print_atoms %s -o %t | FileCheck %s -# -# Test parsing of __LD/__compact_unwind (compact unwind) section. -# - ---- !mach-o -arch: x86 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 4 - address: 0x0000000000000000 - content: [ 0x55, 0x89, 0xE5, 0xB8, 0x0A, 0x00, 0x00, 0x00, - 0x5D, 0xC3, 0x55, 0x89, 0xE5, 0xB8, 0x0A, 0x00, - 0x00, 0x00, 0x5D, 0xC3 ] - - segment: __LD - section: __compact_unwind - type: S_REGULAR - attributes: [ ] - alignment: 2 - address: 0x000000000000001C - content: [ 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000014 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000000 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: false - extern: false - symbol: 1 -global-symbols: - - name: __Z3barv - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x000000000000000A - - name: __Z3foov - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -... - -# CHECK: defined-atoms: -# CHECK: - type: compact-unwind -# CHECK: content: [ 00, 00, 00, 00, 0A, 00, 00, 00, 00, 00, 00, 01, -# CHECK: 00, 00, 00, 00, 00, 00, 00, 00 ] -# CHECK: - type: compact-unwind -# CHECK: content: [ 10, 00, 00, 00, 0A, 00, 00, 00, 00, 00, 00, 01, -# CHECK: 00, 00, 00, 00, 00, 00, 00, 00 ] -# CHECK: - name: __Z3foov -# CHECK: scope: global -# CHECK: content: [ 55, 89, E5, B8, 0A, 00, 00, 00, 5D, C3 ] -# CHECK: - name: __Z3barv -# CHECK: scope: global -# CHECK: content: [ 55, 89, E5, B8, 0A, 00, 00, 00, 5D, C3 ] - diff --git a/lld/test/mach-o/parse-compact-unwind64.yaml b/lld/test/mach-o/parse-compact-unwind64.yaml deleted file mode 100644 index a3d2cdfcaf3c..000000000000 --- a/lld/test/mach-o/parse-compact-unwind64.yaml +++ /dev/null @@ -1,76 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s -# -# Test parsing of __LD/__compact_unwind (compact unwind) section. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 4 - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0xB8, 0x0A, 0x00, 0x00, - 0x00, 0x5D, 0xC3, 0x55, 0x48, 0x89, 0xE5, 0xB8, - 0x0A, 0x00, 0x00, 0x00, 0x5D, 0xC3 ] - - segment: __LD - section: __compact_unwind - type: S_REGULAR - attributes: [ ] - alignment: 8 - address: 0x0000000000000020 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000020 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000000 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 1 -global-symbols: - - name: __Z3barv - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: __Z3foov - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x000000000000000B -... - -# CHECK: defined-atoms: -# CHECK: - type: compact-unwind -# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00, 0B, 00, 00, 00, -# CHECK: 00, 00, 00, 01, 00, 00, 00, 00, 00, 00, 00, 00, -# CHECK: 00, 00, 00, 00, 00, 00, 00, 00 ] -# CHECK: - type: compact-unwind -# CHECK: content: [ 10, 00, 00, 00, 00, 00, 00, 00, 0B, 00, 00, 00, -# CHECK: 00, 00, 00, 01, 00, 00, 00, 00, 00, 00, 00, 00, -# CHECK: 00, 00, 00, 00, 00, 00, 00, 00 ] -# CHECK: - name: __Z3barv -# CHECK: scope: global -# CHECK: content: [ 55, 48, 89, E5, B8, 0A, 00, 00, 00, 5D, C3 ] -# CHECK: - name: __Z3foov -# CHECK: scope: global -# CHECK: content: [ 55, 48, 89, E5, B8, 0A, 00, 00, 00, 5D, C3 ] diff --git a/lld/test/mach-o/parse-data-in-code-armv7.yaml b/lld/test/mach-o/parse-data-in-code-armv7.yaml deleted file mode 100644 index 6f8ffa914025..000000000000 --- a/lld/test/mach-o/parse-data-in-code-armv7.yaml +++ /dev/null @@ -1,157 +0,0 @@ -# RUN: ld64.lld.darwinold -arch armv7 -r -print_atoms %s -o %t | FileCheck %s -# RUN: ld64.lld.darwinold -arch armv7 -r -print_atoms %t -o %t2 | FileCheck %s -# RUN: ld64.lld.darwinold -arch armv7 -dylib %s -o %t3.dylib %p/Inputs/armv7/libSystem.yaml \ -# RUN: && llvm-objdump --macho --private-headers %t3.dylib | FileCheck --check-prefix=CHECK2 %s -# -# Test parsing LC_DATA_IN_CODE -# -# - ---- !mach-o -arch: armv7 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 2 - address: 0x0000000000000000 - content: [ 0x00, 0xBF, 0x00, 0xBF, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x00, 0xBF, 0x00, 0xBF, - 0x00, 0xF0, 0x20, 0xE3, 0x0A, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x20, 0xE3 ] -local-symbols: - - name: _foo_thumb - type: N_SECT - sect: 1 - desc: [ N_ARM_THUMB_DEF ] - value: 0x0000000000000000 - - name: _foo_arm - type: N_SECT - sect: 1 - value: 0x0000000000000018 -dataInCode: - - offset: 0x00000004 - length: 0x0004 - kind: DICE_KIND_DATA - - offset: 0x00000008 - length: 0x0004 - kind: DICE_KIND_JUMP_TABLE32 - - offset: 0x0000000C - length: 0x0004 - kind: DICE_KIND_JUMP_TABLE16 - - offset: 0x00000010 - length: 0x0004 - kind: DICE_KIND_JUMP_TABLE8 - - offset: 0x0000001C - length: 0x0004 - kind: DICE_KIND_DATA - - offset: 0x00000020 - length: 0x0004 - kind: DICE_KIND_JUMP_TABLE32 - - offset: 0x00000024 - length: 0x0004 - kind: DICE_KIND_JUMP_TABLE16 - - offset: 0x00000028 - length: 0x0004 - kind: DICE_KIND_JUMP_TABLE8 -... - - - -# CHECK: defined-atoms: -# CHECK: - name: _foo_thumb -# CHECK: references: -# CHECK: - kind: modeThumbCode -# CHECK: offset: 0 -# CHECK: - kind: modeData -# CHECK: offset: 4 -# CHECK: addend: 1 -# CHECK: - kind: modeData -# CHECK: offset: 8 -# CHECK: addend: 4 -# CHECK: - kind: modeData -# CHECK: offset: 12 -# CHECK: addend: 3 -# CHECK: - kind: modeData -# CHECK: offset: 16 -# CHECK: addend: 2 -# CHECK: - kind: modeThumbCode -# CHECK: offset: 20 -# CHECK: - name: _foo_arm -# CHECK: references: -# CHECK: - kind: modeData -# CHECK: offset: 4 -# CHECK: addend: 1 -# CHECK: - kind: modeData -# CHECK: offset: 8 -# CHECK: addend: 4 -# CHECK: - kind: modeData -# CHECK: offset: 12 -# CHECK: addend: 3 -# CHECK: - kind: modeData -# CHECK: offset: 16 -# CHECK: addend: 2 -# CHECK: - kind: modeArmCode -# CHECK: offset: 20 - - -# CHECK2: cmd LC_DATA_IN_CODE -# CHECK2: cmdsize 16 -# CHECK2: datasize 64 - - -# .code 16 -# .thumb_func _foo_thumb -#_foo_thumb: -# nop -# nop -# -# .data_region -# .long 0 -# .end_data_region -# -# .data_region jt32 -# .long 1 -# .end_data_region -# -# .data_region jt16 -# .long 2 -# .end_data_region -# -# .data_region jt8 -# .long 3 -# .end_data_region -# -# nop -# nop -# -# -# -# .code 32 -# .align 2 -#_foo_arm: -# nop -# -# .data_region -# .long 10 -# .end_data_region -# -# .data_region jt32 -# .long 11 -# .end_data_region -# -# .data_region jt16 -# .long 12 -# .end_data_region -# -# .data_region jt8 -# .long 13 -# .end_data_region -# -# nop -# diff --git a/lld/test/mach-o/parse-data-in-code-x86.yaml b/lld/test/mach-o/parse-data-in-code-x86.yaml deleted file mode 100644 index 34de2d6f9439..000000000000 --- a/lld/test/mach-o/parse-data-in-code-x86.yaml +++ /dev/null @@ -1,77 +0,0 @@ -# RUN: ld64.lld.darwinold -arch i386 -r -print_atoms %s -o %t | FileCheck %s \ -# RUN: && ld64.lld.darwinold -arch i386 -r -print_atoms %t -o %t2 | FileCheck %s -# -# Test parsing LC_DATA_IN_CODE -# -# - ---- !mach-o -arch: x86 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x90, 0x90, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, - 0x00, 0x00, 0x90, 0x90, 0x90, 0x90, 0x03, 0x00, - 0x00, 0x00 ] -local-symbols: - - name: _func1 - type: N_SECT - sect: 1 - value: 0x0000000000000000 - - name: _func2 - type: N_SECT - sect: 1 - value: 0x000000000000000B -dataInCode: - - offset: 0x00000002 - length: 0x0008 - kind: DICE_KIND_JUMP_TABLE32 - - offset: 0x0000000E - length: 0x0004 - kind: DICE_KIND_JUMP_TABLE32 -... - - - -# CHECK: defined-atoms: -# CHECK: - name: _func1 -# CHECK: references: -# CHECK: - kind: modeData -# CHECK: offset: 2 -# CHECK: addend: 4 -# CHECK: - kind: modeCode -# CHECK: offset: 10 -# CHECK: - name: _func2 -# CHECK: references: -# CHECK: - kind: modeData -# CHECK: offset: 3 -# CHECK: addend: 4 -# CHECK-NOT: - kind: modeData - - - - -# -#_func1: -# nop -# nop -# .data_region jt32 -# .long 1 -# .long 2 -# .end_data_region -# nop -# -# -# _func2: -# nop -# nop -# nop -# .data_region jt32 -# .long 3 -# .end_data_region -# diff --git a/lld/test/mach-o/parse-data-relocs-arm64.yaml b/lld/test/mach-o/parse-data-relocs-arm64.yaml deleted file mode 100644 index 504d69f99712..000000000000 --- a/lld/test/mach-o/parse-data-relocs-arm64.yaml +++ /dev/null @@ -1,244 +0,0 @@ -# RUN: ld64.lld.darwinold -arch arm64 -r -print_atoms %s -o %t | FileCheck %s -# RUN: ld64.lld.darwinold -arch arm64 -r -print_atoms %t -o %t2 | FileCheck %s -# -# Test parsing and writing of arm64 data relocations. -# -# The first step tests if the supplied mach-o file is parsed into the correct -# set of references. The second step verifies relocations can be round-tripped -# by writing to a new .o file, then parsing that file which should result in -# the same references. -# -#_test: - - ---- !mach-o -arch: arm64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 4 - address: 0x0000000000000000 - content: [ 0xC0, 0x03, 0x5F, 0xD6 ] - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - address: 0x0000000000000004 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xDC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xC0, 0xFF, 0xFF, 0xFF, 0xBE, 0xFF, 0xFF, 0xFF, - 0xB0, 0xFF, 0xFF, 0xFF ] - relocations: - - offset: 0x00000050 - type: ARM64_RELOC_POINTER_TO_GOT - length: 2 - pc-rel: true - extern: true - symbol: 2 - - offset: 0x0000004C - type: ARM64_RELOC_SUBTRACTOR - length: 2 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x0000004C - type: ARM64_RELOC_UNSIGNED - length: 2 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000048 - type: ARM64_RELOC_SUBTRACTOR - length: 2 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000048 - type: ARM64_RELOC_UNSIGNED - length: 2 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000040 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000038 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 2 - - offset: 0x00000030 - type: ARM64_RELOC_SUBTRACTOR - length: 3 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000030 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000028 - type: ARM64_RELOC_SUBTRACTOR - length: 3 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000028 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000020 - type: ARM64_RELOC_SUBTRACTOR - length: 3 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000020 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000018 - type: ARM64_RELOC_POINTER_TO_GOT - length: 3 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000010 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000008 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 2 -local-symbols: - - name: _v1 - type: N_SECT - sect: 2 - value: 0x000000000000000C -global-symbols: - - name: _bar - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _foo - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -page-size: 0x00000000 -... - -# CHECK: defined-atoms: -# CHECK: - ref-name: L000 -# CHECK: type: data -# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00 ] -# CHECK: - name: _v1 -# CHECK: type: data -# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00, 08, 00, 00, 00, -# CHECK: 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, -# CHECK: 00, 00, 00, 00, 00, 00, 00, 00, E0, FF, FF, FF, -# CHECK: FF, FF, FF, FF, DC, FF, FF, FF, FF, FF, FF, FF, -# CHECK: {{..}}, {{..}}, 00, 00, 00, 00, 00, 00, 04, 00, 00, 00, -# CHECK: 00, 00, 00, 00, C0, FF, FF, FF, BE, FF, FF, FF, -# CHECK: {{B0|B8}}, {{..}}, FF, FF ] -# CHECK: references: -# CHECK: - kind: pointer64 -# CHECK: offset: 0 -# CHECK: target: _foo -# CHECK-NOT: addend: -# CHECK: - kind: pointer64 -# CHECK: offset: 8 -# CHECK: target: _foo -# CHECK: addend: 8 -# CHECK: - kind: pointer64ToGOT -# CHECK: offset: 16 -# CHECK: target: _foo -# CHECK-NOT: addend: -# CHECK: - kind: delta64 -# CHECK: offset: 24 -# CHECK: target: _foo -# CHECK: addend: 24 -# CHECK: - kind: delta64 -# CHECK: offset: 32 -# CHECK: target: _foo -# CHECK-NOT: addend: -# CHECK: - kind: delta64 -# CHECK: offset: 40 -# CHECK: target: _foo -# CHECK: addend: 4 -# CHECK: - kind: pointer64 -# CHECK: offset: 48 -# CHECK: target: L000 -# CHECK-NOT: addend: -# CHECK: - kind: pointer64 -# CHECK: offset: 56 -# CHECK: target: _foo -# CHECK: addend: 4 -# CHECK: - kind: delta32 -# CHECK: offset: 64 -# CHECK: target: _foo -# CHECK-NOT: addend: -# CHECK: - kind: delta32 -# CHECK: offset: 68 -# CHECK: target: _foo -# CHECK: addend: 2 -# CHECK: - kind: delta32ToGOT -# CHECK: offset: 72 -# CHECK: target: _foo -# CHECK-NOT: addend: -# CHECK: - name: _bar -# CHECK: scope: global -# CHECK: content: [ C0, 03, 5F, D6 ] -# CHECK: alignment: 4 -# CHECK: undefined-atoms: -# CHECK: - name: _foo - -# .subsections_via_symbols -# .text -# .globl_foo -# .align2 -# _foo: -# ret -# .data -#Lanon: -# .quad 0 -#_v1: -# .quad _foo -# .quad _foo + 8 -# .quad _foo@GOT -# .quad _foo + 24 - . -# .quad _foo - . -# .quad _foo + 4 - . -# .quad Lanon -# .quad Lanon + 4 -# .long _foo - . -# .long _foo +2 - . -# .long _foo@GOT - . - diff --git a/lld/test/mach-o/parse-data-relocs-x86_64.yaml b/lld/test/mach-o/parse-data-relocs-x86_64.yaml deleted file mode 100644 index 8aea7cc88778..000000000000 --- a/lld/test/mach-o/parse-data-relocs-x86_64.yaml +++ /dev/null @@ -1,372 +0,0 @@ - -# RUN: ld64.lld.darwinold -arch x86_64 -r %s -o %t -print_atoms | FileCheck %s \ -# RUN: && ld64.lld.darwinold -arch x86_64 %t -r -print_atoms -o %t2 | FileCheck %s -# -# Test parsing and writing of x86_64 data relocations. -# -# The first step tests if the supplied mach-o file is parsed into the correct -# set of references. The second step verifies relocations can be round-tripped -# by writing to a new .o file, then parsing that file which should result in -# the same references. -# -#_foo: -# ret -# -#_bar: -# ret -# -# .section __DATA,__custom -#L1: -# .quad 0 -# -# .data -#_d: -# .quad _foo -# .quad _foo+4 -# .quad _foo - . -# .quad L1 -# .quad L1 + 2 -# .quad _foo - . -# .quad _foo + 4 - . -# .quad L1 - . -# .long _foo - . -# .long _foo + 4 - . -# .long L1 - . -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0xC3, 0xC3 ] - - segment: __DATA - section: __custom - type: S_REGULAR - attributes: [ ] - address: 0x0000000000000002 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - address: 0x000000000000000A - content: [ -# .quad _foo -# No addend is needed here as we are referencing _foo directly and that is -# encoded entirely in the X86_64_RELOC_UNSIGNED - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -# .quad _foo+4 -# Addend of 4 is needed here as we are referencing _foo from the -# X86_64_RELOC_UNSIGNED, then the addend gives us 4 more. - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -# .quad _foo - . -# This is the pair X86_64_RELOC_SUBTRACTOR and X86_64_RELOC_UNSIGNED. -# The subtractor references _d which is the first nonlocal label in this -# section. The unsigned references _foo. -# Note the addend here is -16 because that is the offset from here back -# to _d. - 0xF0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, -# .quad . - _foo -# This is the pair X86_64_RELOC_SUBTRACTOR and X86_64_RELOC_UNSIGNED. -# The subtractor references _d which is the first nonlocal label in this -# section. The unsigned references _foo. -# Note the addend here is -16 because that is the offset from here back -# to _d. - 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -# .quad L1 -# This is a X86_64_RELOC_UNSIGNED without extern set. -# In this case, we encode the section number for L1 in the relocation, and -# the addend here is the absolute address of the location in that section -# we want to reference. - 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -# .quad L1 + 2 -# This is a X86_64_RELOC_UNSIGNED without extern set. -# In this case, we encode the section number for L1 in the relocation, and -# the addend here is the absolute address of the location in that section -# we want to reference. We have a 4 because the section is at address 2 -# and we want an offset of 2 from there. - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -# .quad _foo - . -# This is the pair X86_64_RELOC_SUBTRACTOR and X86_64_RELOC_UNSIGNED. -# The subtractor references _d which is the first nonlocal label in this -# section. The unsigned references _foo. -# Note the addend here is -40 because that is the offset from here back -# to _d. - 0xD0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, -# .quad _foo + 4 - . -# This is the pair X86_64_RELOC_SUBTRACTOR and X86_64_RELOC_UNSIGNED. -# The subtractor references _d which is the first nonlocal label in this -# section. The unsigned references _foo. -# Note the addend here is -52. It would have been -56 because that -# would take us from the address of this relocation back to _d. But as -# we also add 4 for the offset, we get -52. - 0xCC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, -# .quad L1 - . -# This is the pair X86_64_RELOC_SUBTRACTOR and X86_64_RELOC_UNSIGNED. -# The subtractor references _d which is the first nonlocal label in this -# section. The unsigned does not have extern set, so the relocation -# number is the section number for L1. -# Note the addend here is -62. Of that, -64 would be the offset from -# this location from _d. The remaining 2 is the absolute address -# of L1. - 0xC2, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, -# .long _foo - . -# This is the pair X86_64_RELOC_SUBTRACTOR and X86_64_RELOC_UNSIGNED. -# The subtractor references _d which is the first nonlocal label in this -# section. The unsigned references _foo. -# Note the addend here is -72 because that is the offset from here back -# to _d. - 0xB8, 0xFF, 0xFF, 0xFF, -# .long . - _foo -# This is the pair X86_64_RELOC_SUBTRACTOR and X86_64_RELOC_UNSIGNED. -# The subtractor references _d which is the first nonlocal label in this -# section. The unsigned references _foo. -# Note the addend here is -76 because that is the offset from here back -# to _d. - 0xB4, 0xFF, 0xFF, 0xFF, -# .long _foo + 4 - . -# This is the pair X86_64_RELOC_SUBTRACTOR and X86_64_RELOC_UNSIGNED. -# The subtractor references _d which is the first nonlocal label in this -# section. The unsigned references _foo. -# Note the addend here is -76. It would have been -80 because that -# would take us from the address of this relocation back to _d. But as -# we also add 4 for the offset, we get -76. - 0xB4, 0xFF, 0xFF, 0xFF, -# .long L1 - . -# This is the pair X86_64_RELOC_SUBTRACTOR and X86_64_RELOC_UNSIGNED. -# The subtractor references _d which is the first nonlocal label in this -# section. The unsigned does not have extern set, so the relocation -# number is the section number for L1. -# Note the addend here is -82. Of that, -84 would be the offset from -# this location from _d. The remaining 2 is the absolute address -# of L1. - 0xAE, 0xFF, 0xFF, 0xFF ] - relocations: - - offset: 0x00000054 - type: X86_64_RELOC_SUBTRACTOR - length: 2 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000054 - type: X86_64_RELOC_UNSIGNED - length: 2 - pc-rel: false - extern: false - symbol: 2 - - offset: 0x00000050 - type: X86_64_RELOC_SUBTRACTOR - length: 2 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000050 - type: X86_64_RELOC_UNSIGNED - length: 2 - pc-rel: false - extern: true - symbol: 0 - - offset: 0x0000004C - type: X86_64_RELOC_SUBTRACTOR - length: 2 - pc-rel: false - extern: true - symbol: 0 - - offset: 0x0000004C - type: X86_64_RELOC_UNSIGNED - length: 2 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000048 - type: X86_64_RELOC_SUBTRACTOR - length: 2 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000048 - type: X86_64_RELOC_UNSIGNED - length: 2 - pc-rel: false - extern: true - symbol: 0 - - offset: 0x00000040 - type: X86_64_RELOC_SUBTRACTOR - length: 3 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000040 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 2 - - offset: 0x00000038 - type: X86_64_RELOC_SUBTRACTOR - length: 3 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000038 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 0 - - offset: 0x00000030 - type: X86_64_RELOC_SUBTRACTOR - length: 3 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000030 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 0 - - offset: 0x00000028 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 2 - - offset: 0x00000020 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 2 - - offset: 0x00000018 - type: X86_64_RELOC_SUBTRACTOR - length: 3 - pc-rel: false - extern: true - symbol: 0 - - offset: 0x00000018 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000010 - type: X86_64_RELOC_SUBTRACTOR - length: 3 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000010 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 0 - - offset: 0x00000008 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 0 - - offset: 0x00000000 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 0 -local-symbols: - - name: _foo - type: N_SECT - sect: 1 - value: 0x0000000000000000 - - name: _bar - type: N_SECT - sect: 1 - value: 0x0000000000000001 - - name: _d - type: N_SECT - sect: 3 - value: 0x000000000000000A -page-size: 0x00000000 -... - - -# CHECK:defined-atoms: -# CHECK: - name: _d -# CHECK: type: data -# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00, 04, 00, 00, 00, -# CHECK: 00, 00, 00, 00, F0, FF, FF, FF, FF, FF, FF, FF, -# CHECK: 18, 00, 00, 00, 00, 00, 00, 00, {{..}}, {{..}}, 00, 00, -# CHECK: 00, 00, 00, 00, {{..}}, {{..}}, 00, 00, 00, 00, 00, 00, -# CHECK: D0, FF, FF, FF, FF, FF, FF, FF, CC, FF, FF, FF, -# CHECK: FF, FF, FF, FF, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, {{..}}, -# CHECK: B8, FF, FF, FF, B4, FF, FF, FF, B4, FF, FF, FF, -# CHECK: {{..}}, {{..}}, {{..}}, {{..}} ] -# CHECK: dead-strip: never -# CHECK: references: -# CHECK: - kind: pointer64 -# CHECK: offset: 0 -# CHECK: target: _foo -# CHECK: - kind: pointer64 -# CHECK: offset: 8 -# CHECK: target: _foo -# CHECK: addend: 4 -# CHECK: - kind: delta64 -# CHECK: offset: 16 -# CHECK: target: _foo -# CHECK: - kind: negDelta64 -# CHECK: offset: 24 -# CHECK: target: _foo -# CHECK: - kind: pointer64Anon -# CHECK: offset: 32 -# CHECK: target: L003 -# CHECK: - kind: pointer64Anon -# CHECK: offset: 40 -# CHECK: target: L003 -# CHECK: addend: 2 -# CHECK: - kind: delta64 -# CHECK: offset: 48 -# CHECK: target: _foo -# CHECK: - kind: delta64 -# CHECK: offset: 56 -# CHECK: target: _foo -# CHECK: addend: 4 -# CHECK: - kind: delta64Anon -# CHECK: offset: 64 -# CHECK: target: L003 -# CHECK: - kind: delta32 -# CHECK: offset: 72 -# CHECK: target: _foo -# CHECK: - kind: negDelta32 -# CHECK: offset: 76 -# CHECK: target: _foo -# CHECK: - kind: delta32 -# CHECK: offset: 80 -# CHECK: target: _foo -# CHECK: addend: 4 -# CHECK: - kind: delta32Anon -# CHECK: offset: 84 -# CHECK: target: L003 -# CHECK: - name: _foo -# CHECK: content: [ C3 ] -# CHECK: dead-strip: never -# CHECK: - name: _bar -# CHECK: content: [ C3 ] -# CHECK: dead-strip: never -# CHECK: - ref-name: L003 -# CHECK: type: unknown -# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00 ] -# CHECK: section-choice: custom-required -# CHECK: section-name: '__DATA/__custom' -# CHECK: dead-strip: never - diff --git a/lld/test/mach-o/parse-data.yaml b/lld/test/mach-o/parse-data.yaml deleted file mode 100644 index ec8a2ad439c4..000000000000 --- a/lld/test/mach-o/parse-data.yaml +++ /dev/null @@ -1,119 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s -# -# Test parsing of mach-o data symbols. -# -# long a = 0x0807060504030201; -# int b = 0x14131211; -# int c = 0x24232221; -# static int s1; -# static int s2 = 0x34333231; -# -# - - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - alignment: 8 - address: 0x0000000000000000 - content: [ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, - 0x11, 0x12, 0x13, 0x14, 0x21, 0x22, 0x23, 0x24, - 0x31, 0x32, 0x33, 0x34, 0x41, 0x42, 0x43, 0x44 ] - - segment: __CUST - section: __custom - type: S_REGULAR - attributes: [ ] - alignment: 8 - address: 0x0000000000000018 - content: [ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 ] - - segment: __DATA - section: __bss - type: S_ZEROFILL - attributes: [ ] - alignment: 2 - address: 0x0000000000000020 - size: 4 -local-symbols: - - name: _s1 - type: N_SECT - sect: 3 - value: 0x0000000000000020 - - name: _s2 - type: N_SECT - sect: 1 - value: 0x0000000000000010 -global-symbols: - - name: _a - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: _b - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000008 - - name: _c - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x000000000000000C - - name: _cWeak - type: N_SECT - scope: [ N_EXT ] - sect: 1 - desc: [ N_WEAK_DEF ] - value: 0x0000000000000014 - - name: _kustom - type: N_SECT - scope: [ N_EXT ] - sect: 2 - value: 0x0000000000000018 -... - -# CHECK: defined-atoms: - -# CHECK: - name: _a -# CHECK: scope: global -# CHECK: type: data -# CHECK: content: [ 01, 02, 03, 04, 05, 06, 07, 08 ] - -# CHECK: - name: _b -# CHECK: scope: global -# CHECK: type: data -# CHECK: content: [ 11, 12, 13, 14 ] - -# CHECK: - name: _c -# CHECK: scope: global -# CHECK: type: data -# CHECK: content: [ 21, 22, 23, 24 ] - -# CHECK: - name: _s2 -# CHECK: type: data -# CHECK: content: [ 31, 32, 33, 34 ] - -# CHECK: - name: _cWeak -# CHECK: scope: global -# CHECK: type: data -# CHECK: content: [ 41, 42, 43, 44 ] -# CHECK: merge: as-weak - -# CHECK: - name: _s1 -# CHECK: type: zero-fill -# CHECK: size: 4 - -# CHECK: - name: _kustom -# CHECK: scope: global -# CHECK: type: unknown -# CHECK: content: [ 01, 02, 03, 04, 05, 06, 07, 08 ] -# CHECK: section-choice: custom-required -# CHECK: section-name: '__CUST/__custom' - diff --git a/lld/test/mach-o/parse-eh-frame-relocs-x86_64.yaml b/lld/test/mach-o/parse-eh-frame-relocs-x86_64.yaml deleted file mode 100644 index c82c85f0ec0c..000000000000 --- a/lld/test/mach-o/parse-eh-frame-relocs-x86_64.yaml +++ /dev/null @@ -1,176 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s -# -# Test parsing of x86_64 __eh_frame (dwarf unwind) relocations. - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 16 - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0xE8, 0x00, 0x00, 0x00, - 0x00, 0x5D, 0xC3, 0x48, 0x89, 0xC7, 0xE8, 0x00, - 0x00, 0x00, 0x00, 0x5D, 0xE9, 0x00, 0x00, 0x00, - 0x00, 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00, - 0x55, 0x48, 0x89, 0xE5, 0x5D, 0xC3, 0x66, 0x2E, - 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x55, 0x48, 0x89, 0xE5, 0xE8, 0x00, 0x00, 0x00, - 0x00, 0x5D, 0xC3, 0x48, 0x89, 0xC7, 0xE8, 0x00, - 0x00, 0x00, 0x00, 0x5D, 0xE9, 0x00, 0x00, 0x00, - 0x00 ] - - segment: __TEXT - section: __gcc_except_tab - type: S_REGULAR - attributes: [ ] - alignment: 4 - address: 0x000000000000004C - content: [ 0xFF, 0x9B, 0xA2, 0x80, 0x80, 0x00, 0x03, 0x1A, - 0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x01, 0x09, 0x00, 0x00, - 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xFF, 0x9B, 0xA2, 0x80, 0x80, 0x00, 0x03, 0x1A, - 0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x01, 0x09, 0x00, 0x00, - 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 ] - - segment: __TEXT - section: __eh_frame - type: S_COALESCED - attributes: [ ] - alignment: 8 - address: 0x0000000000000100 - content: [ 0x1C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x03, 0x7A, 0x50, 0x4C, 0x52, 0x00, 0x01, 0x78, - 0x10, 0x07, 0x9B, 0x04, 0x00, 0x00, 0x00, 0x10, - 0x10, 0x0C, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, - 0x2C, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0xD8, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x08, 0x13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0x41, 0x0E, 0x10, 0x86, 0x02, 0x43, 0x0D, - 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x03, 0x7A, 0x52, 0x00, 0x01, 0x78, 0x10, 0x01, - 0x10, 0x0C, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, - 0xB0, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x41, 0x0E, 0x10, 0x86, 0x02, 0x43, 0x0D, - 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x2C, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, - 0x98, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x08, 0xCB, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0x41, 0x0E, 0x10, 0x86, 0x02, 0x43, 0x0D, - 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000013 - type: X86_64_RELOC_GOT - length: 2 - pc-rel: true - extern: true - symbol: 8 -local-symbols: - - name: GCC_except_table0 - type: N_SECT - sect: 2 - value: 0x000000000000004C - - name: GCC_except_table2 - type: N_SECT - sect: 2 - value: 0x0000000000000074 -global-symbols: - - name: _catchMyException1 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: _catchMyException2 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000030 - - name: _bar - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000020 -undefined-symbols: - - name: _foo - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: ___cxa_begin_catch - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: ___cxa_end_catch - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: ___gxx_personality_v0 - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -page-size: 0x00000000 -... - -# Check that LSDA fields are fixed up correctly, even when there are multiple -# CIEs involved. -# -# (1) Check that we can relocate an LSDA at all. Requires correct interpretation -# of augmentation data strings in CIEs and augmentation data fields of FDEs. -# -# CHECK: - type: unwind-cfi -# CHECK-NOT: - type: -# CHECK: references: -# CHECK-NEXT: - kind: negDelta32 -# CHECK-NEXT: offset: 4 -# CHECK-NEXT: target: L002 -# CHECK-NEXT: - kind: unwindFDEToFunction -# CHECK-NEXT: offset: 8 -# CHECK-NEXT: target: _catchMyException1 -# CHECK-NEXT: - kind: unwindFDEToFunction -# CHECK-NEXT: offset: 25 -# CHECK-NEXT: target: GCC_except_table0 -# -# (2) Check that we have an intervening FDE with a different CIE. -# If the test fails here then test (3) probably isn't testing what it -# should, and this test-case should be updated. -# -# CHECK: - type: unwind-cfi -# CHECK-NOT: - type: -# CHECK: references: -# CHECK-NEXT: - kind: negDelta32 -# CHECK-NEXT: offset: 4 -# CHECK-NEXT: target: L001 -# CHECK-NEXT: - kind: unwindFDEToFunction -# CHECK-NEXT: offset: 8 -# CHECK-NEXT: target: _bar -# -# (3) Check that we can relocate the LSDA on a second FDE that references the -# original CIE from (1). Requires us to match this FDE up with the correct -# CIE. -# -# CHECK-NEXT: - type: unwind-cfi -# CHECK-NOT: - type: -# CHECK: references: -# CHECK-NEXT: - kind: negDelta32 -# CHECK-NEXT: offset: 4 -# CHECK-NEXT: target: L002 -# CHECK-NEXT: - kind: unwindFDEToFunction -# CHECK-NEXT: offset: 8 -# CHECK-NEXT: target: _catchMyException2 -# CHECK-NEXT: - kind: unwindFDEToFunction -# CHECK-NEXT: offset: 25 -# CHECK-NEXT: target: GCC_except_table2 diff --git a/lld/test/mach-o/parse-eh-frame-x86-anon.yaml b/lld/test/mach-o/parse-eh-frame-x86-anon.yaml deleted file mode 100644 index feb914b581b9..000000000000 --- a/lld/test/mach-o/parse-eh-frame-x86-anon.yaml +++ /dev/null @@ -1,129 +0,0 @@ -# RUN: ld64.lld.darwinold -arch i386 -r -print_atoms %s -o %t | FileCheck %s -# -# Test parsing of new __eh_frame (dwarf unwind) section that has no .eh labels -# and no relocations. -# - ---- !mach-o -arch: x86 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x89, 0xE5, 0x56, 0x83, 0xEC, 0x14, 0xE8, - 0x00, 0x00, 0x00, 0x00, 0x5E, 0xC7, 0x04, 0x24, - 0x04, 0x00, 0x00, 0x00, 0xE8, 0xE7, 0xFF, 0xFF, - 0xFF, 0xC7, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x8B, - 0x8E, 0x38, 0x00, 0x00, 0x00, 0x89, 0x4C, 0x24, - 0x04, 0x89, 0x04, 0x24, 0xC7, 0x44, 0x24, 0x08, - 0x00, 0x00, 0x00, 0x00, 0xE8, 0xC7, 0xFF, 0xFF, - 0xFF, 0x55, 0x89, 0xE5, 0x83, 0xEC, 0x08, 0xE8, - 0xBC, 0xFF, 0xFF, 0xFF ] - relocations: - - offset: 0x00000040 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: true - extern: false - symbol: 1 - - offset: 0x00000035 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: true - extern: true - symbol: 4 - - offset: 0x00000021 - scattered: true - type: GENERIC_RELOC_LOCAL_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000044 - - offset: 0x00000000 - scattered: true - type: GENERIC_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x0000000C - - offset: 0x00000015 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: true - extern: true - symbol: 3 - - segment: __IMPORT - section: __pointers - type: S_NON_LAZY_SYMBOL_POINTERS - attributes: [ ] - address: 0x0000000000000044 - content: [ 0x00, 0x00, 0x00, 0x00 ] - indirect-syms: [ 5 ] - - segment: __TEXT - section: __eh_frame - type: S_REGULAR - attributes: [ ] - alignment: 2 - address: 0x0000000000000048 - content: [ 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x7A, 0x52, 0x00, 0x01, 0x7C, 0x08, 0x01, - 0x10, 0x0C, 0x05, 0x04, 0x88, 0x01, 0x00, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, - 0x98, 0xFF, 0xFF, 0xFF, 0x39, 0x00, 0x00, 0x00, - 0x00, 0x41, 0x0E, 0x08, 0x84, 0x02, 0x42, 0x0D, - 0x04, 0x44, 0x86, 0x03, 0x18, 0x00, 0x00, 0x00, - 0x38, 0x00, 0x00, 0x00, 0xB5, 0xFF, 0xFF, 0xFF, - 0x0B, 0x00, 0x00, 0x00, 0x00, 0x41, 0x0E, 0x08, - 0x84, 0x02, 0x42, 0x0D, 0x04, 0x00, 0x00, 0x00 ] -global-symbols: - - name: __Z3barv - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000039 - - name: __Z3foov - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: __ZTIi - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: ___cxa_allocate_exception - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: ___cxa_throw - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - -# CHECK: defined-atoms: -# CHECK: - ref-name: [[CIE:L[L0-9]+]] -# CHECK: type: unwind-cfi -# CHECK: content: -# CHECK: - type: unwind-cfi -# CHECK: content: -# CHECK: references: -# CHECK: - kind: negDelta32 -# CHECK: offset: 4 -# CHECK: target: [[CIE]] -# CHECK: - kind: delta32 -# CHECK: offset: 8 -# CHECK: target: __Z3foov -# CHECK: - type: unwind-cfi -# CHECK: content: -# CHECK: references: -# CHECK: - kind: negDelta32 -# CHECK: offset: 4 -# CHECK: target: [[CIE]] -# CHECK: - kind: delta32 -# CHECK: offset: 8 -# CHECK: target: __Z3barv - diff --git a/lld/test/mach-o/parse-eh-frame-x86-labeled.yaml b/lld/test/mach-o/parse-eh-frame-x86-labeled.yaml deleted file mode 100644 index b1853ae9ac46..000000000000 --- a/lld/test/mach-o/parse-eh-frame-x86-labeled.yaml +++ /dev/null @@ -1,193 +0,0 @@ -# RUN: ld64.lld.darwinold -arch i386 -r -print_atoms %s -o %t | FileCheck %s -# -# Test parsing of old __eh_frame (dwarf unwind) section that has .eh labels -# and relocations. -# - ---- !mach-o -arch: x86 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x89, 0xE5, 0x56, 0x83, 0xEC, 0x14, 0xE8, - 0x00, 0x00, 0x00, 0x00, 0x5E, 0xC7, 0x04, 0x24, - 0x04, 0x00, 0x00, 0x00, 0xE8, 0xE7, 0xFF, 0xFF, - 0xFF, 0xC7, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x8B, - 0x8E, 0x38, 0x00, 0x00, 0x00, 0x89, 0x4C, 0x24, - 0x04, 0x89, 0x04, 0x24, 0xC7, 0x44, 0x24, 0x08, - 0x00, 0x00, 0x00, 0x00, 0xE8, 0xC7, 0xFF, 0xFF, - 0xFF, 0x55, 0x89, 0xE5, 0x83, 0xEC, 0x08, 0xE8, - 0xBC, 0xFF, 0xFF, 0xFF ] - relocations: - - offset: 0x00000040 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: true - extern: false - symbol: 1 - - offset: 0x00000035 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: true - extern: true - symbol: 7 - - offset: 0x00000021 - scattered: true - type: GENERIC_RELOC_LOCAL_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000044 - - offset: 0x00000000 - scattered: true - type: GENERIC_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x0000000C - - offset: 0x00000015 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: true - extern: true - symbol: 6 - - segment: __IMPORT - section: __pointers - type: S_NON_LAZY_SYMBOL_POINTERS - attributes: [ ] - address: 0x0000000000000044 - content: [ 0x00, 0x00, 0x00, 0x00 ] - indirect-syms: [ 5 ] - - segment: __TEXT - section: __eh_frame - type: S_REGULAR - attributes: [ ] - alignment: 2 - address: 0x0000000000000048 - content: [ 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x7A, 0x52, 0x00, 0x01, 0x7C, 0x08, 0x01, - 0x10, 0x0C, 0x05, 0x04, 0x88, 0x01, 0x00, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, - 0x98, 0xFF, 0xFF, 0xFF, 0x39, 0x00, 0x00, 0x00, - 0x00, 0x41, 0x0E, 0x08, 0x84, 0x02, 0x42, 0x0D, - 0x04, 0x44, 0x86, 0x03, 0x18, 0x00, 0x00, 0x00, - 0x38, 0x00, 0x00, 0x00, 0xB5, 0xFF, 0xFF, 0xFF, - 0x0B, 0x00, 0x00, 0x00, 0x00, 0x41, 0x0E, 0x08, - 0x84, 0x02, 0x42, 0x0D, 0x04, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x0000001C - scattered: true - type: GENERIC_RELOC_LOCAL_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000064 - - offset: 0x00000000 - scattered: true - type: GENERIC_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x00000048 - - offset: 0x00000020 - scattered: true - type: GENERIC_RELOC_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000000 - - offset: 0x00000000 - scattered: true - type: GENERIC_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x00000068 - - offset: 0x00000038 - scattered: true - type: GENERIC_RELOC_LOCAL_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000080 - - offset: 0x00000000 - scattered: true - type: GENERIC_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x00000048 - - offset: 0x0000003C - scattered: true - type: GENERIC_RELOC_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000039 - - offset: 0x00000000 - scattered: true - type: GENERIC_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x00000084 -local-symbols: - - name: EH_frame0 - type: N_SECT - sect: 3 - value: 0x0000000000000048 -global-symbols: - - name: __Z3barv - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000039 - - name: __Z3barv.eh - type: N_SECT - scope: [ N_EXT ] - sect: 3 - value: 0x000000000000007C - - name: __Z3foov - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: __Z3foov.eh - type: N_SECT - scope: [ N_EXT ] - sect: 3 - value: 0x0000000000000060 -undefined-symbols: - - name: __ZTIi - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: ___cxa_allocate_exception - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: ___cxa_throw - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - -# CHECK: defined-atoms: -# CHECK: - ref-name: [[CIE:L[L0-9]+]] -# CHECK: type: unwind-cfi -# CHECK: content: -# CHECK: - type: unwind-cfi -# CHECK: content: -# CHECK: references: -# CHECK: - kind: negDelta32 -# CHECK: offset: 4 -# CHECK: target: [[CIE]] -# CHECK: - kind: delta32 -# CHECK: offset: 8 -# CHECK: target: __Z3foov -# CHECK: - type: unwind-cfi -# CHECK: content: -# CHECK: references: -# CHECK: - kind: negDelta32 -# CHECK: offset: 4 -# CHECK: target: [[CIE]] -# CHECK: - kind: delta32 -# CHECK: offset: 8 -# CHECK: target: __Z3barv - diff --git a/lld/test/mach-o/parse-eh-frame.yaml b/lld/test/mach-o/parse-eh-frame.yaml deleted file mode 100644 index 014e1f21c005..000000000000 --- a/lld/test/mach-o/parse-eh-frame.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s -# -# Test parsing of __eh_frame (dwarf unwind) section. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0xB8, 0x09, 0x00, 0x00, - 0x00, 0x5D, 0xC3, 0x55, 0x48, 0x89, 0xE5, 0xB8, - 0x0A, 0x00, 0x00, 0x00, 0x5D, 0xC3 ] - - segment: __TEXT - section: __eh_frame - type: S_COALESCED - attributes: [ ] - alignment: 8 - address: 0x0000000000000058 - content: [ 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x7A, 0x52, 0x00, 0x01, 0x78, 0x10, 0x01, - 0x10, 0x0C, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, - 0x88, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x41, 0x0E, 0x10, 0x86, 0x02, 0x43, 0x0D, - 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, - 0x6B, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x41, 0x0E, 0x10, 0x86, 0x02, 0x43, 0x0D, - 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] -global-symbols: - - name: __Z3barv - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: __Z3foov - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x000000000000000B -... - -# CHECK: defined-atoms: -# CHECK: - ref-name: [[CIE:L[0-9]+]] -# CHECK: type: unwind-cfi -# CHECK: content: [ 14, 00, 00, 00, 00, 00, 00, 00, 01, 7A, 52, 00, -# CHECK: 01, 78, 10, 01, 10, 0C, 07, 08, 90, 01, 00, 00 ] -# CHECK: - type: unwind-cfi -# CHECK: content: [ 24, 00, 00, 00, 1C, 00, 00, 00, 88, FF, FF, FF, -# CHECK: FF, FF, FF, FF, 0B, 00, 00, 00, 00, 00, 00, 00, -# CHECK: 00, 41, 0E, 10, 86, 02, 43, 0D, 06, 00, 00, 00, -# CHECK: 00, 00, 00, 00 ] -# CHECK: references: -# CHECK: - kind: negDelta32 -# CHECK: offset: 4 -# CHECK: target: [[CIE]] -# CHECK: - kind: unwindFDEToFunction -# CHECK: offset: 8 -# CHECK: target: __Z3barv -# CHECK: - type: unwind-cfi -# CHECK: content: [ 24, 00, 00, 00, 44, 00, 00, 00, 6B, FF, FF, FF, -# CHECK: FF, FF, FF, FF, 0B, 00, 00, 00, 00, 00, 00, 00, -# CHECK: 00, 41, 0E, 10, 86, 02, 43, 0D, 06, 00, 00, 00, -# CHECK: 00, 00, 00, 00 ] -# CHECK: references: -# CHECK: - kind: negDelta32 -# CHECK: offset: 4 -# CHECK: target: [[CIE]] -# CHECK: - kind: unwindFDEToFunction -# CHECK: offset: 8 -# CHECK: target: __Z3foov -# CHECK: - name: __Z3barv -# CHECK: scope: global -# CHECK: content: [ 55, 48, 89, E5, B8, 09, 00, 00, 00, 5D, C3 ] -# CHECK: - name: __Z3foov -# CHECK: scope: global -# CHECK: content: [ 55, 48, 89, E5, B8, 0A, 00, 00, 00, 5D, C3 ] - diff --git a/lld/test/mach-o/parse-function.yaml b/lld/test/mach-o/parse-function.yaml deleted file mode 100644 index b6d24fee6ff6..000000000000 --- a/lld/test/mach-o/parse-function.yaml +++ /dev/null @@ -1,100 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r %s -o %t -# RUN: ld64.lld.darwinold -arch x86_64 -r %t -print_atoms -o %t2 | FileCheck %s -# -# Test parsing of mach-o functions. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 4 - address: 0x0000000000000000 - content: [ 0xCC, 0xC3, 0x90, 0xC3, 0x90, 0x90, 0xC3, 0x90, - 0x90, 0x90, 0xC3, 0x90, 0x90, 0x90, 0x90, 0xC3, - 0xCC, 0x31, 0xC0, 0xC3 ] -local-symbols: - - name: _myStatic - type: N_SECT - sect: 1 - value: 0x000000000000000B -global-symbols: - - name: _myGlobal - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000001 - - name: _myGlobalWeak - type: N_SECT - scope: [ N_EXT ] - sect: 1 - desc: [ N_WEAK_DEF ] - value: 0x0000000000000002 - - name: _myHidden - type: N_SECT - scope: [ N_EXT, N_PEXT ] - sect: 1 - value: 0x0000000000000004 - - name: _myHiddenWeak - type: N_SECT - scope: [ N_EXT, N_PEXT ] - sect: 1 - desc: [ N_WEAK_DEF ] - value: 0x0000000000000007 - - name: _myStripNot - type: N_SECT - scope: [ N_EXT ] - sect: 1 - desc: [ N_NO_DEAD_STRIP ] - value: 0x0000000000000010 - - name: _myResolver - type: N_SECT - scope: [ N_EXT ] - sect: 1 - desc: [ N_SYMBOL_RESOLVER ] - value: 0x0000000000000011 -... - -# CHECK-NOT: name: -# CHECK: content: [ CC ] - -# CHECK: name: _myGlobal -# CHECK: scope: global -# CHECK: content: [ C3 ] - -# CHECK: name: _myGlobalWeak -# CHECK: scope: global -# CHECK: content: [ 90, C3 ] -# CHECK: merge: as-weak - -# CHECK: name: _myHidden -# CHECK: scope: hidden -# CHECK: content: [ 90, 90, C3 ] - -# CHECK: name: _myHiddenWeak -# CHECK: scope: hidden -# CHECK: content: [ 90, 90, 90, C3 ] -# CHECK: merge: as-weak - -# CHECK: name: _myStatic -# CHECK-NOT: scope: global -# CHECK-NOT: scope: hidden -# CHECK: content: [ 90, 90, 90, 90, C3 ] - -# CHECK: name: _myStripNot -# CHECK: scope: global -# CHECK: content: [ CC ] -# CHECK: dead-strip: never - -# CHECK: name: _myResolver -# CHECK: scope: global -# CHECK: type: resolver -# CHECK: content: [ 31, C0, C3 ] - diff --git a/lld/test/mach-o/parse-initializers32.yaml b/lld/test/mach-o/parse-initializers32.yaml deleted file mode 100644 index 5f243198050e..000000000000 --- a/lld/test/mach-o/parse-initializers32.yaml +++ /dev/null @@ -1,84 +0,0 @@ -# RUN: ld64.lld.darwinold -arch i386 -r -print_atoms %s -o %t | FileCheck %s -# -# Test parsing of literal sections. -# - ---- !mach-o -arch: x86 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x89, 0xE5, 0x5D, 0xC3, 0x55, 0x89, 0xE5, - 0x5D, 0xC3, 0x55, 0x89, 0xE5, 0x5D, 0xC3 ] - - segment: __DATA - section: __mod_init_func - type: S_MOD_INIT_FUNC_POINTERS - attributes: [ ] - alignment: 2 - address: 0x0000000000000044 - content: [ 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000000 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000004 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: false - extern: false - symbol: 1 - - segment: __DATA - section: __mod_term_func - type: S_MOD_TERM_FUNC_POINTERS - attributes: [ ] - alignment: 2 - address: 0x0000000000000104 - content: [ 0x0A, 0x00, 0x00, 0x00 ] -global-symbols: - - name: _init - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: _init2 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000005 - - name: _term - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x000000000000000A -... - - -# CHECK:defined-atoms: -# CHECK: - type: initializer-pointer -# CHECK: content: [ 00, 00, 00, 00 ] -# CHECK: dead-strip: never -# CHECK: - type: initializer-pointer -# CHECK: content: [ 05, 00, 00, 00 ] -# CHECK: dead-strip: never -# CHECK: - type: terminator-pointer -# CHECK: content: [ 0A, 00, 00, 00 ] -# CHECK: dead-strip: never -# CHECK: - name: _init -# CHECK: scope: global -# CHECK: content: [ 55, 89, E5, 5D, C3 ] -# CHECK: - name: _init2 -# CHECK: scope: global -# CHECK: content: [ 55, 89, E5, 5D, C3 ] -# CHECK: - name: _term -# CHECK: scope: global -# CHECK: content: [ 55, 89, E5, 5D, C3 ] diff --git a/lld/test/mach-o/parse-initializers64.yaml b/lld/test/mach-o/parse-initializers64.yaml deleted file mode 100644 index 8d1503ba6665..000000000000 --- a/lld/test/mach-o/parse-initializers64.yaml +++ /dev/null @@ -1,105 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s -# -# Test parsing of literal sections. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x5D, 0xC3, 0x55, 0x48, - 0x89, 0xE5, 0x5D, 0xC3, 0x55, 0x48, 0x89, 0xE5, - 0x5D, 0xC3 ] - - segment: __DATA - section: __mod_init_func - type: S_MOD_INIT_FUNC_POINTERS - attributes: [ ] - alignment: 1 - address: 0x0000000000000100 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000000 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 0 - - offset: 0x00000008 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 1 - - segment: __DATA - section: __mod_term_func - type: S_MOD_TERM_FUNC_POINTERS - attributes: [ ] - alignment: 8 - address: 0x0000000000000108 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000000 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 2 -global-symbols: - - name: _init - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: _init2 - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000006 - - name: _term - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x000000000000000C -... - - -# CHECK:defined-atoms: -# CHECK: - type: initializer-pointer -# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00 ] -# CHECK: dead-strip: never -# CHECK: references: -# CHECK: - kind: pointer64 -# CHECK: offset: 0 -# CHECK: target: _init -# CHECK: - type: initializer-pointer -# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00 ] -# CHECK: dead-strip: never -# CHECK: references: -# CHECK: - kind: pointer64 -# CHECK: offset: 0 -# CHECK: target: _init2 -# CHECK: - type: terminator-pointer -# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00 ] -# CHECK: dead-strip: never -# CHECK: references: -# CHECK: - kind: pointer64 -# CHECK: offset: 0 -# CHECK: target: _term -# CHECK: - name: _init -# CHECK: scope: global -# CHECK: content: [ 55, 48, 89, E5, 5D, C3 ] -# CHECK: - name: _init2 -# CHECK: scope: global -# CHECK: content: [ 55, 48, 89, E5, 5D, C3 ] -# CHECK: - name: _term -# CHECK: scope: global -# CHECK: content: [ 55, 48, 89, E5, 5D, C3 ] diff --git a/lld/test/mach-o/parse-literals-error.yaml b/lld/test/mach-o/parse-literals-error.yaml deleted file mode 100644 index b426c819422f..000000000000 --- a/lld/test/mach-o/parse-literals-error.yaml +++ /dev/null @@ -1,25 +0,0 @@ -# RUN: not ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t 2> %t.err -# RUN: FileCheck %s < %t.err -# -# Test for error if literal section is not correct size multiple. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __literal8 - type: S_8BYTE_LITERALS - attributes: [ ] - alignment: 0 - address: 0x0000000000000120 - content: [ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, - 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D ] -... - -# CHECK: error: - diff --git a/lld/test/mach-o/parse-literals.yaml b/lld/test/mach-o/parse-literals.yaml deleted file mode 100644 index 8792e9524206..000000000000 --- a/lld/test/mach-o/parse-literals.yaml +++ /dev/null @@ -1,93 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s -# -# Test parsing of literal sections. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __cstring - type: S_CSTRING_LITERALS - attributes: [ ] - alignment: 1 - address: 0x0000000000000100 - content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x00, 0x74, 0x68, - 0x65, 0x72, 0x65, 0x00, 0x77, 0x6F, 0x72, 0x6C, - 0x00 ] - - segment: __TEXT - section: __literal4 - type: S_4BYTE_LITERALS - attributes: [ ] - alignment: 1 - address: 0x0000000000000114 - content: [ 0x01, 0x02, 0x03, 0x04, 0x11, 0x12, 0x13, 0x14, - 0x28, 0x29, 0x2A, 0x2B ] - - segment: __TEXT - section: __literal8 - type: S_8BYTE_LITERALS - attributes: [ ] - alignment: 1 - address: 0x0000000000000120 - content: [ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, - 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F ] - - segment: __TEXT - section: __literal16 - type: S_16BYTE_LITERALS - attributes: [ ] - alignment: 1 - address: 0x0000000000000130 - content: [ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, - 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00 ] - - segment: __TEXT - section: __ustring - type: S_REGULAR - attributes: [ ] - alignment: 1 - address: 0x0000000000000100 - content: [ 0x68, 0x00, 0x65, 0x00, 0x6C, 0x00, 0x6C, 0x00, - 0x6F, 0x00, 0x00, 0x00, 0x74, 0x00, 0x68, 0x00, - 0x65, 0x00, 0x72, 0x00, 0x00, 0x00 ] -... - - -# CHECK:defined-atoms: -# CHECK: - scope: hidden -# CHECK: type: c-string -# CHECK: content: [ 68, 65, 6C, 6C, 6F, 00 ] -# CHECK: - scope: hidden -# CHECK: type: c-string -# CHECK: content: [ 74, 68, 65, 72, 65, 00 ] -# CHECK: - scope: hidden -# CHECK: type: c-string -# CHECK: content: [ 77, 6F, 72, 6C, 00 ] -# CHECK: - scope: hidden -# CHECK: type: utf16-string -# CHECK: content: [ 68, 00, 65, 00, 6C, 00, 6C, 00, 6F, 00, 00, 00 ] -# CHECK: - scope: hidden -# CHECK: type: utf16-string -# CHECK: content: [ 74, 00, 68, 00, 65, 00, 72, 00, 00, 00 ] -# CHECK: - scope: hidden -# CHECK: type: const-4-byte -# CHECK: content: [ 01, 02, 03, 04 ] -# CHECK: - scope: hidden -# CHECK: type: const-4-byte -# CHECK: content: [ 11, 12, 13, 14 ] -# CHECK: - scope: hidden -# CHECK: type: const-4-byte -# CHECK: content: [ 28, 29, 2A, 2B ] -# CHECK: - scope: hidden -# CHECK: type: const-8-byte -# CHECK: content: [ 01, 02, 03, 04, 05, 06, 07, 08 ] -# CHECK: - scope: hidden -# CHECK: type: const-8-byte -# CHECK: content: [ 28, 29, 2A, 2B, 2C, 2D, 2E, 2F ] -# CHECK: - scope: hidden -# CHECK: type: const-16-byte -# CHECK: content: [ 01, 02, 03, 04, 05, 06, 07, 08, 09, 0A, 0B, 0C, -# CHECK: 0D, 0E, 0F, 00 ] - diff --git a/lld/test/mach-o/parse-non-lazy-pointers.yaml b/lld/test/mach-o/parse-non-lazy-pointers.yaml deleted file mode 100644 index 591c116e00c6..000000000000 --- a/lld/test/mach-o/parse-non-lazy-pointers.yaml +++ /dev/null @@ -1,98 +0,0 @@ -# RUN: ld64.lld.darwinold -arch i386 -r -print_atoms %s -o %t | FileCheck %s -# -# Test parsing of non-lazy-pointer sections. -# - ---- !mach-o -arch: x86 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x89, 0xE5, 0xE8, 0x00, 0x00, 0x00, 0x00, - 0x59, 0x8D, 0x81, 0x14, 0x00, 0x00, 0x00, 0x8D, - 0x81, 0x18, 0x00, 0x00, 0x00, 0x5D, 0xC3, 0x55, - 0x89, 0xE5, 0x5D, 0xC3 ] - relocations: - - offset: 0x00000011 - scattered: true - type: GENERIC_RELOC_LOCAL_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000020 - - offset: 0x00000000 - scattered: true - type: GENERIC_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x00000008 - - offset: 0x0000000B - scattered: true - type: GENERIC_RELOC_LOCAL_SECTDIFF - length: 2 - pc-rel: false - value: 0x0000001C - - offset: 0x00000000 - scattered: true - type: GENERIC_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x00000008 - - segment: __IMPORT - section: __pointers - type: S_NON_LAZY_SYMBOL_POINTERS - attributes: [ ] - address: 0x000000000000001C - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - indirect-syms: [ 2, 2147483648 ] -local-symbols: - - name: _foo - type: N_SECT - sect: 1 - value: 0x0000000000000017 -global-symbols: - - name: _get - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _bar - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - - -# CHECK:defined-atoms: -# CHECK: - ref-name: [[GOT1:L[L0-9]+]] -# CHECK: scope: hidden -# CHECK: type: got -# CHECK: content: [ 00, 00, 00, 00 ] -# CHECK: merge: by-content -# CHECK: - ref-name: [[GOT2:L[L0-9]+]] -# CHECK: scope: hidden -# CHECK: type: got -# CHECK: content: [ 00, 00, 00, 00 ] -# CHECK: merge: by-content -# CHECK: - name: _get -# CHECK: scope: global -# CHECK: content: [ 55, 89, E5, E8, 00, 00, 00, 00, 59, 8D, 81, 14, -# CHECK: 00, 00, 00, 8D, 81, 18, 00, 00, 00, 5D, C3 ] -# CHECK: references: -# CHECK: - kind: funcRel32 -# CHECK: offset: 11 -# CHECK: target: [[GOT1]] -# CHECK: - kind: funcRel32 -# CHECK: offset: 17 -# CHECK: target: [[GOT2]] -# CHECK: - name: _foo -# CHECK: content: [ 55, 89, E5, 5D, C3 ] - - diff --git a/lld/test/mach-o/parse-relocs-x86.yaml b/lld/test/mach-o/parse-relocs-x86.yaml deleted file mode 100644 index b1bd6199ff0a..000000000000 --- a/lld/test/mach-o/parse-relocs-x86.yaml +++ /dev/null @@ -1,296 +0,0 @@ -# RUN: ld64.lld.darwinold -arch i386 -r -print_atoms %s -o %t | FileCheck %s \ -# RUN: && ld64.lld.darwinold -arch i386 -r -print_atoms %t -o %t2 | FileCheck %s -# -# Test parsing and writing of x86 relocations. -# -# The first step tests if the supplied mach-o file is parsed into the correct -# set of references. The second step verifies relocations can be round-tripped -# by writing to a new .o file, then parsing that file which should result in -# the same references. -# -# .text -#_test: -# call _undef -# call _undef+2 -# call _foo -# call _foo+2 -# callw _undef -# callw _foo -# callw _foo+2 -#L1: -# movl _undef, %eax -# movl _x, %eax -# movl _x+4, %eax -# movl _x-L1(%eax), %eax -# movl _x+4-L1(%eax), %eax -# -#_foo: -# ret -# -# .data -#_x: -# .long _undef -# .long _undef+7 -# .long _foo -# .long _foo+3 -# .long _test - . -# .long _test+3 - . -# - ---- !mach-o -arch: x86 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0xE8, 0xFB, 0xFF, 0xFF, 0xFF, 0xE8, 0xF8, 0xFF, - 0xFF, 0xFF, 0xE8, 0x2C, 0x00, 0x00, 0x00, 0xE8, - 0x29, 0x00, 0x00, 0x00, 0x66, 0xE8, 0xE8, 0xFF, - 0x66, 0xE8, 0x1F, 0x00, 0x66, 0xE8, 0x1D, 0x00, - 0xA1, 0x00, 0x00, 0x00, 0x00, 0xA1, 0x3C, 0x00, - 0x00, 0x00, 0xA1, 0x40, 0x00, 0x00, 0x00, 0x8B, - 0x80, 0x1C, 0x00, 0x00, 0x00, 0x8B, 0x80, 0x20, - 0x00, 0x00, 0x00, 0xC3 ] - relocations: - - offset: 0x00000037 - scattered: true - type: GENERIC_RELOC_LOCAL_SECTDIFF - length: 2 - pc-rel: false - value: 0x0000003C - - offset: 0x00000000 - scattered: true - type: GENERIC_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x00000020 - - offset: 0x00000031 - scattered: true - type: GENERIC_RELOC_LOCAL_SECTDIFF - length: 2 - pc-rel: false - value: 0x0000003C - - offset: 0x00000000 - scattered: true - type: GENERIC_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x00000020 - - offset: 0x0000002B - scattered: true - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: false - value: 0x0000003C - - offset: 0x00000026 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: false - extern: false - symbol: 2 - - offset: 0x00000021 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: false - extern: true - symbol: 3 - - offset: 0x0000001E - scattered: true - type: GENERIC_RELOC_VANILLA - length: 1 - pc-rel: true - value: 0x0000003B - - offset: 0x0000001A - type: GENERIC_RELOC_VANILLA - length: 1 - pc-rel: true - extern: false - symbol: 1 - - offset: 0x00000016 - type: GENERIC_RELOC_VANILLA - length: 1 - pc-rel: true - extern: true - symbol: 3 - - offset: 0x00000010 - scattered: true - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: true - value: 0x0000003B - - offset: 0x0000000B - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: true - extern: false - symbol: 1 - - offset: 0x00000006 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: true - extern: true - symbol: 3 - - offset: 0x00000001 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: true - extern: true - symbol: 3 - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - address: 0x000000000000003C - content: [ 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, - 0xB4, 0xFF, 0xFF, 0xFF, 0xB3, 0xFF, 0xFF, 0xFF ] - relocations: - - offset: 0x00000014 - scattered: true - type: GENERIC_RELOC_LOCAL_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000000 - - offset: 0x00000000 - scattered: true - type: GENERIC_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x00000050 - - offset: 0x00000010 - scattered: true - type: GENERIC_RELOC_LOCAL_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000000 - - offset: 0x00000000 - scattered: true - type: GENERIC_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x0000004C - - offset: 0x0000000C - scattered: true - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: false - value: 0x0000003B - - offset: 0x00000008 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000004 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: false - extern: true - symbol: 3 - - offset: 0x00000000 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: false - extern: true - symbol: 3 -local-symbols: - - name: _test - type: N_SECT - sect: 1 - value: 0x0000000000000000 - - name: _foo - type: N_SECT - sect: 1 - value: 0x000000000000003B - - name: _x - type: N_SECT - sect: 2 - value: 0x000000000000003C -undefined-symbols: - - name: _undef - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - -# CHECK: defined-atoms: -# CHECK: - name: _x -# CHECK: type: data -# CHECK: references: -# CHECK: - kind: pointer32 -# CHECK: offset: 0 -# CHECK: target: _undef -# CHECK-NOT: addend: -# CHECK: - kind: pointer32 -# CHECK: offset: 4 -# CHECK: target: _undef -# CHECK: addend: 7 -# CHECK: - kind: pointer32 -# CHECK: offset: 8 -# CHECK: target: _foo -# CHECK-NOT: addend: -# CHECK: - kind: pointer32 -# CHECK: offset: 12 -# CHECK: target: _foo -# CHECK: addend: 3 -# CHECK: - kind: delta32 -# CHECK: offset: 16 -# CHECK: target: _test -# CHECK: - kind: delta32 -# CHECK: offset: 20 -# CHECK: target: _test -# CHECK: addend: 3 -# CHECK: - name: _test -# CHECK: references: -# CHECK: - kind: branch32 -# CHECK: offset: 1 -# CHECK: target: _undef -# CHECK-NOT: addend: -# CHECK: - kind: branch32 -# CHECK: offset: 6 -# CHECK: target: _undef -# CHECK: addend: 2 -# CHECK: - kind: branch32 -# CHECK: offset: 11 -# CHECK: target: _foo -# CHECK-NOT: addend: -# CHECK: - kind: branch32 -# CHECK: offset: 16 -# CHECK: target: _foo -# CHECK: addend: 2 -# CHECK: - kind: branch16 -# CHECK: offset: 22 -# CHECK: target: _undef -# CHECK-NOT: addend: -# CHECK: - kind: branch16 -# CHECK: offset: 26 -# CHECK: target: _foo -# CHECK-NOT: addend: -# CHECK: - kind: branch16 -# CHECK: offset: 30 -# CHECK: target: _foo -# CHECK: addend: 2 -# CHECK: - kind: abs32 -# CHECK: offset: 33 -# CHECK: target: _undef -# CHECK: - kind: abs32 -# CHECK: offset: 38 -# CHECK: target: _x -# CHECK: - kind: abs32 -# CHECK: offset: 43 -# CHECK: target: _x -# CHECK: addend: 4 -# CHECK: - kind: funcRel32 -# CHECK: offset: 49 -# CHECK: target: _x -# CHECK: addend: -32 -# CHECK: - kind: funcRel32 -# CHECK: offset: 55 -# CHECK: target: _x -# CHECK: addend: -28 - diff --git a/lld/test/mach-o/parse-section-no-symbol.yaml b/lld/test/mach-o/parse-section-no-symbol.yaml deleted file mode 100644 index a1747e97c126..000000000000 --- a/lld/test/mach-o/parse-section-no-symbol.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r %s -print_atoms -o %t2 | FileCheck %s -# -# Test parsing of mach-o functions with no symbols at all. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 4 - address: 0x0000000000000000 - content: [ 0xCC ] -... - -# CHECK-NOT: name: -# CHECK: content: [ CC ] diff --git a/lld/test/mach-o/parse-tentative-defs.yaml b/lld/test/mach-o/parse-tentative-defs.yaml deleted file mode 100644 index 345038b00859..000000000000 --- a/lld/test/mach-o/parse-tentative-defs.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s -# -# Test parsing of tentative definitions, including size, scope, and alignment. -# -# -# int tent4; -# long tent8; -# __attribute__((visibility("hidden"))) int tentHidden; -# __attribute__((aligned(16))) int tent4_16; -# __attribute__((aligned(32))) long tent64_32[8]; -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __tex - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS ] - address: 0x0000000000000000 -undefined-symbols: - - name: _tent4 - type: N_UNDF - scope: [ N_EXT ] - desc: 0x0200 - value: 0x0000000000000004 - - name: _tent4_16 - type: N_UNDF - scope: [ N_EXT ] - desc: 0x0400 - value: 0x0000000000000004 - - name: _tent64_32 - type: N_UNDF - scope: [ N_EXT ] - desc: 0x0500 - value: 0x0000000000000040 - - name: _tent8 - type: N_UNDF - scope: [ N_EXT ] - desc: 0x0300 - value: 0x0000000000000008 - - name: _tentHidden - type: N_UNDF - scope: [ N_EXT, N_PEXT ] - desc: 0x0200 - value: 0x0000000000000004 -... - - -# CHECK: defined-atoms: -# CHECK: name: _tent4 -# CHECK: scope: global -# CHECK: type: zero-fill -# CHECK: size: 4 -# CHECK: merge: as-tentative -# CHECK: alignment: 4 - -# CHECK: name: _tent4_16 -# CHECK: scope: global -# CHECK: type: zero-fill -# CHECK: size: 4 -# CHECK: merge: as-tentative -# CHECK: alignment: 16 - -# CHECK: name: _tent64_32 -# CHECK: scope: global -# CHECK: type: zero-fill -# CHECK: size: 64 -# CHECK: merge: as-tentative -# CHECK: alignment: 32 - -# CHECK: name: _tent8 -# CHECK: scope: global -# CHECK: type: zero-fill -# CHECK: size: 8 -# CHECK: merge: as-tentative -# CHECK: alignment: 8 - -# CHECK: name: _tentHidden -# CHECK: scope: hidden -# CHECK: type: zero-fill -# CHECK: size: 4 -# CHECK: merge: as-tentative -# CHECK: alignment: 4 diff --git a/lld/test/mach-o/parse-text-relocs-arm64.yaml b/lld/test/mach-o/parse-text-relocs-arm64.yaml deleted file mode 100644 index 1c0922bd29f9..000000000000 --- a/lld/test/mach-o/parse-text-relocs-arm64.yaml +++ /dev/null @@ -1,237 +0,0 @@ -# RUN: ld64.lld.darwinold -arch arm64 -r -print_atoms %s -o %t | FileCheck %s \ -# RUN: && ld64.lld.darwinold -arch arm64 -r -print_atoms %t -o %t2 | FileCheck %s -# -# Test parsing and writing of arm64 text relocations. -# -# The first step tests if the supplied mach-o file is parsed into the correct -# set of references. The second step verifies relocations can be round-tripped -# by writing to a new .o file, then parsing that file which should result in -# the same references. -# -#_test: - - ---- !mach-o -arch: arm64 -file-type: MH_OBJECT -flags: [ ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, 0x94, - 0x01, 0x00, 0x00, 0x90, 0x20, 0x00, 0x40, 0x39, - 0x20, 0x00, 0x40, 0x79, 0x20, 0x00, 0x40, 0xB9, - 0x20, 0x00, 0x40, 0xF9, 0x20, 0x00, 0xC0, 0x3D, - 0x01, 0x00, 0x00, 0x90, 0x20, 0x00, 0x40, 0xB9, - 0x01, 0x00, 0x00, 0x90, 0x20, 0x00, 0x40, 0xF9, - 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x40, 0xF9 ] - relocations: - - offset: 0x00000034 - type: ARM64_RELOC_TLVP_LOAD_PAGEOFF12 - length: 2 - pc-rel: false - extern: true - symbol: 5 - - offset: 0x00000030 - type: ARM64_RELOC_TLVP_LOAD_PAGE21 - length: 2 - pc-rel: true - extern: true - symbol: 5 - - offset: 0x0000002C - type: ARM64_RELOC_GOT_LOAD_PAGEOFF12 - length: 2 - pc-rel: false - extern: true - symbol: 6 - - offset: 0x00000028 - type: ARM64_RELOC_GOT_LOAD_PAGE21 - length: 2 - pc-rel: true - extern: true - symbol: 6 - - offset: 0x00000024 - type: ARM64_RELOC_ADDEND - length: 2 - pc-rel: false - extern: false - symbol: 16 - - offset: 0x00000024 - type: ARM64_RELOC_PAGEOFF12 - length: 2 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000020 - type: ARM64_RELOC_ADDEND - length: 2 - pc-rel: false - extern: false - symbol: 16 - - offset: 0x00000020 - type: ARM64_RELOC_PAGE21 - length: 2 - pc-rel: true - extern: true - symbol: 2 - - offset: 0x0000001C - type: ARM64_RELOC_PAGEOFF12 - length: 2 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000018 - type: ARM64_RELOC_PAGEOFF12 - length: 2 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000014 - type: ARM64_RELOC_PAGEOFF12 - length: 2 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000010 - type: ARM64_RELOC_PAGEOFF12 - length: 2 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x0000000C - type: ARM64_RELOC_PAGEOFF12 - length: 2 - pc-rel: false - extern: true - symbol: 2 - - offset: 0x00000008 - type: ARM64_RELOC_PAGE21 - length: 2 - pc-rel: true - extern: true - symbol: 2 - - offset: 0x00000004 - type: ARM64_RELOC_ADDEND - length: 2 - pc-rel: false - extern: false - symbol: 8 - - offset: 0x00000004 - type: ARM64_RELOC_BRANCH26 - length: 2 - pc-rel: true - extern: true - symbol: 4 - - offset: 0x00000000 - type: ARM64_RELOC_BRANCH26 - length: 2 - pc-rel: true - extern: true - symbol: 4 - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - alignment: 2 - address: 0x0000000000000038 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] -local-symbols: - - name: ltmp0 - type: N_SECT - sect: 1 - value: 0x0000000000000000 - - name: _func - type: N_SECT - sect: 1 - value: 0x0000000000000000 - - name: _v1 - type: N_SECT - sect: 2 - value: 0x0000000000000038 - - name: ltmp1 - type: N_SECT - sect: 2 - value: 0x0000000000000038 -undefined-symbols: - - name: _foo - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _tlv - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _v2 - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - -# CHECK: defined-atoms: -# CHECK: - name: _v1 -# CHECK: type: data -# CHECK: content: [ 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, -# CHECK: 00, 00, 00, 00 ] -# CHECK: - name: _func -# CHECK: content: [ 00, 00, 00, 94, 00, 00, 00, 94, 01, 00, 00, 90, -# CHECK: 20, 00, 40, 39, 20, 00, 40, 79, 20, 00, 40, B9, -# CHECK: 20, 00, 40, F9, 20, 00, C0, 3D, 01, 00, 00, 90, -# CHECK: 20, 00, 40, B9, 01, 00, 00, 90, 20, 00, 40, F9, -# CHECK: 00, 00, 00, 90, 00, 00, 40, F9 ] -# CHECK: references: -# CHECK: - kind: branch26 -# CHECK: offset: 0 -# CHECK: target: _foo -# CHECK: - kind: branch26 -# CHECK: offset: 4 -# CHECK: target: _foo -# CHECK: addend: 8 -# CHECK: - kind: page21 -# CHECK: offset: 8 -# CHECK: target: _v1 -# CHECK: - kind: offset12 -# CHECK: offset: 12 -# CHECK: target: _v1 -# CHECK: - kind: offset12scale2 -# CHECK: offset: 16 -# CHECK: target: _v1 -# CHECK: - kind: offset12scale4 -# CHECK: offset: 20 -# CHECK: target: _v1 -# CHECK: - kind: offset12scale8 -# CHECK: offset: 24 -# CHECK: target: _v1 -# CHECK: - kind: offset12scale16 -# CHECK: offset: 28 -# CHECK: target: _v1 -# CHECK: - kind: page21 -# CHECK: offset: 32 -# CHECK: target: _v1 -# CHECK: addend: 16 -# CHECK: - kind: offset12scale4 -# CHECK: offset: 36 -# CHECK: target: _v1 -# CHECK: addend: 16 -# CHECK: - kind: gotPage21 -# CHECK: offset: 40 -# CHECK: target: _v2 -# CHECK: - kind: gotOffset12 -# CHECK: offset: 44 -# CHECK: target: _v2 -# CHECK: - kind: tlvPage21 -# CHECK: offset: 48 -# CHECK: target: _tlv -# CHECK: - kind: tlvOffset12 -# CHECK: offset: 52 -# CHECK: target: _tlv -# CHECK: undefined-atoms: -# CHECK: - name: _foo -# CHECK: - name: _tlv -# CHECK: - name: _v2 - diff --git a/lld/test/mach-o/parse-text-relocs-x86_64.yaml b/lld/test/mach-o/parse-text-relocs-x86_64.yaml deleted file mode 100644 index d5ce60dca2e9..000000000000 --- a/lld/test/mach-o/parse-text-relocs-x86_64.yaml +++ /dev/null @@ -1,204 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s \ -# RUN: && ld64.lld.darwinold -arch x86_64 -r -print_atoms %t -o %t2 | FileCheck %s -# -# Test parsing and writing of x86_64 text relocations. -# -# The first step tests if the supplied mach-o file is parsed into the correct -# set of references. The second step verifies relocations can be round-tripped -# by writing to a new .o file, then parsing that file which should result in -# the same references. -# -#_test: -# call _foo -# call _foo+4 -# movq _foo@GOTPCREL(%rip), %rax -# pushq _foo@GOTPCREL(%rip) -# movl _foo(%rip), %eax -# movl _foo+4(%rip), %eax -# movb $0x12, _foo(%rip) -# movw $0x1234, _foo(%rip) -# movl $0x12345678, _foo(%rip) -# movl L2(%rip), %eax -# movb $0x12, L2(%rip) -# movw $0x1234, L2(%rip) -# movl $0x12345678, L2(%rip) -# -# .data -#L2: .long 0 - - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0xE8, 0x00, 0x00, 0x00, 0x00, 0xE8, 0x04, 0x00, - 0x00, 0x00, 0x48, 0x8B, 0x05, 0x04, 0x00, 0x00, - 0x00, 0xFF, 0x35, 0x04, 0x00, 0x00, 0x00, 0x8B, - 0x05, 0x00, 0x00, 0x00, 0x00, 0x8B, 0x05, 0x04, - 0x00, 0x00, 0x00, 0xC6, 0x05, 0xFF, 0xFF, 0xFF, - 0xFF, 0x12, 0x66, 0xC7, 0x05, 0xFE, 0xFF, 0xFF, - 0xFF, 0x34, 0x12, 0xC7, 0x05, 0xFC, 0xFF, 0xFF, - 0xFF, 0x78, 0x56, 0x34, 0x12, 0x8B, 0x05, 0x1A, - 0x00, 0x00, 0x00, 0xc6, 0x05, 0x13, 0x00, 0x00, - 0x00, 0x12, 0x66, 0xc7, 0x05, 0x0a, 0x00, 0x00, - 0x00, 0x34, 0x12, 0xc7, 0x05, 0x00, 0x00, 0x00, - 0x00, 0x78, 0x56, 0x34, 0x12 ] - relocations: - - offset: 0x00000055 - type: X86_64_RELOC_SIGNED_4 - length: 2 - pc-rel: true - extern: false - symbol: 2 - - offset: 0x0000004d - type: X86_64_RELOC_SIGNED_2 - length: 2 - pc-rel: true - extern: false - symbol: 2 - - offset: 0x00000045 - type: X86_64_RELOC_SIGNED_1 - length: 2 - pc-rel: true - extern: false - symbol: 2 - - offset: 0x0000003F - type: X86_64_RELOC_SIGNED - length: 2 - pc-rel: true - extern: false - symbol: 2 - - offset: 0x00000035 - type: X86_64_RELOC_SIGNED_4 - length: 2 - pc-rel: true - extern: true - symbol: 1 - - offset: 0x0000002D - type: X86_64_RELOC_SIGNED_2 - length: 2 - pc-rel: true - extern: true - symbol: 1 - - offset: 0x00000025 - type: X86_64_RELOC_SIGNED_1 - length: 2 - pc-rel: true - extern: true - symbol: 1 - - offset: 0x0000001F - type: X86_64_RELOC_SIGNED - length: 2 - pc-rel: true - extern: true - symbol: 1 - - offset: 0x00000019 - type: X86_64_RELOC_SIGNED - length: 2 - pc-rel: true - extern: true - symbol: 1 - - offset: 0x00000013 - type: X86_64_RELOC_GOT - length: 2 - pc-rel: true - extern: true - symbol: 1 - - offset: 0x0000000D - type: X86_64_RELOC_GOT_LOAD - length: 2 - pc-rel: true - extern: true - symbol: 1 - - offset: 0x00000006 - type: X86_64_RELOC_BRANCH - length: 2 - pc-rel: true - extern: true - symbol: 1 - - offset: 0x00000001 - type: X86_64_RELOC_BRANCH - length: 2 - pc-rel: true - extern: true - symbol: 1 - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - address: 0x000000000000005D - content: [ 0x00, 0x00, 0x00, 0x00 ] -local-symbols: - - name: _test - type: N_SECT - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _foo - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - -# CHECK: defined-atoms: -# CHECK: - ref-name: [[LABEL:L[0-9]+]] -# CHECK: type: data -# CHECK: content: [ 00, 00, 00, 00 ] -# CHECK: - name: _test -# CHECK: references: -# CHECK: - kind: branch32 -# CHECK: offset: 1 -# CHECK: target: _foo -# CHECK: - kind: branch32 -# CHECK: offset: 6 -# CHECK: target: _foo -# CHECK: addend: 4 -# CHECK: - kind: ripRel32GotLoad -# CHECK: offset: 13 -# CHECK: target: _foo -# CHECK: addend: 4 -# CHECK: - kind: ripRel32Got -# CHECK: offset: 19 -# CHECK: target: _foo -# CHECK: addend: 4 -# CHECK: - kind: ripRel32 -# CHECK: offset: 25 -# CHECK: target: _foo -# CHECK: - kind: ripRel32 -# CHECK: offset: 31 -# CHECK: target: _foo -# CHECK: addend: 4 -# CHECK: - kind: ripRel32Minus1 -# CHECK: offset: 37 -# CHECK: target: _foo -# CHECK-NOT: addend: -# CHECK: - kind: ripRel32Minus2 -# CHECK: offset: 45 -# CHECK: target: _foo -# CHECK-NOT: addend: -# CHECK: - kind: ripRel32Minus4 -# CHECK: offset: 53 -# CHECK: target: _foo -# CHECK-NOT: addend: -# CHECK: - kind: ripRel32Anon -# CHECK: offset: 63 -# CHECK: target: [[LABEL]] -# CHECK-NOT: addend: -# CHECK: - kind: ripRel32Minus1Anon -# CHECK: offset: 69 -# CHECK: target: [[LABEL]] -# CHECK-NOT: addend: -# CHECK: - kind: ripRel32Minus2Anon -# CHECK: offset: 77 -# CHECK: target: [[LABEL]] -# CHECK-NOT: addend: -# CHECK: - kind: ripRel32Minus4Anon -# CHECK: offset: 85 -# CHECK: target: [[LABEL]] -# CHECK-NOT: addend: diff --git a/lld/test/mach-o/parse-tlv-relocs-x86-64.yaml b/lld/test/mach-o/parse-tlv-relocs-x86-64.yaml deleted file mode 100644 index 7c968dc094a7..000000000000 --- a/lld/test/mach-o/parse-tlv-relocs-x86-64.yaml +++ /dev/null @@ -1,100 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s \ -# RUN: && ld64.lld.darwinold -arch x86_64 -r -print_atoms %t -o %t2 | FileCheck %s -# -# Test parsing of x86_64 tlv relocations. - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 16 - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x48, 0x8B, 0x3D, 0x00, - 0x00, 0x00, 0x00, 0xFF, 0x17, 0x8B, 0x00, 0x5D, - 0xC3 ] - relocations: - - offset: 0x00000007 - type: X86_64_RELOC_TLV - length: 2 - pc-rel: true - extern: true - symbol: 2 - - segment: __DATA - section: __thread_data - type: S_THREAD_LOCAL_REGULAR - attributes: [ ] - alignment: 4 - address: 0x0000000000000014 - content: [ 0x07, 0x00, 0x00, 0x00 ] - - segment: __DATA - section: __thread_vars - type: S_THREAD_LOCAL_VARIABLES - attributes: [ ] - address: 0x0000000000000018 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000010 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 0 - - offset: 0x00000000 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 3 -local-symbols: - - name: '_x$tlv$init' - type: N_SECT - sect: 2 - value: 0x0000000000000014 -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: _x - type: N_SECT - scope: [ N_EXT ] - sect: 3 - value: 0x0000000000000018 -undefined-symbols: - - name: __tlv_bootstrap - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -page-size: 0x00000000 -... - -# CHECK: - name: _x -# CHECK-NEXT: scope: global -# CHECK-NEXT: type: tlv-thunk -# CHECK-NOT: - name: -# CHECK: references: -# CHECK-NEXT: - kind: pointer64 -# CHECK-NEXT: offset: 0 -# CHECK-NEXT: target: __tlv_bootstrap -# CHECK-NEXT: - kind: tlvInitSectionOffset -# CHECK-NEXT: offset: 16 -# CHECK-NEXT: target: '_x$tlv$init' -# CHECK: - name: _main -# CHECK-NOT: - name: -# CHECK-NEXT: scope: global -# CHECK: references: -# CHECK-NEXT: - kind: ripRel32Tlv -# CHECK-NEXT: offset: 7 -# CHECK-NEXT: target: _x diff --git a/lld/test/mach-o/re-exported-dylib-ordinal.yaml b/lld/test/mach-o/re-exported-dylib-ordinal.yaml deleted file mode 100644 index 8fb5ac8b9967..000000000000 --- a/lld/test/mach-o/re-exported-dylib-ordinal.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s \ -# RUN: %p/Inputs/re-exported-dylib-ordinal.yaml \ -# RUN: %p/Inputs/re-exported-dylib-ordinal2.yaml \ -# RUN: %p/Inputs/re-exported-dylib-ordinal3.yaml -dylib -o %t \ -# RUN: && llvm-nm -m %t | FileCheck %s -# -# Test that when one dylib A re-exports dylib B that using a symbol from B -# gets recorded as coming from A. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0x5D, 0xE9, - 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000008 - type: X86_64_RELOC_BRANCH - length: 2 - pc-rel: true - extern: true - symbol: 1 -global-symbols: - - name: _test - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _bar - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - -# CHECK: (undefined) external _bar (from libfoo) -# CHECK: (undefined) external dyld_stub_binder (from libSystem) diff --git a/lld/test/mach-o/rpath.yaml b/lld/test/mach-o/rpath.yaml deleted file mode 100644 index 604eafefb119..000000000000 --- a/lld/test/mach-o/rpath.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# Check we handle -rpath correctly: -# RUN: ld64.lld.darwinold -arch x86_64 -rpath @loader_path/../Frameworks \ -# RUN: %p/Inputs/x86_64/libSystem.yaml %s -o %t -# RUN: llvm-objdump --private-headers %t | FileCheck %s --check-prefix=CHECK-BINARY-WRITE - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 4 - address: 0x0000000000000000 - content: [ 0xCC, 0xC3, 0x90, 0xC3, 0x90, 0x90, 0xC3, 0x90, - 0x90, 0x90, 0xC3, 0x90, 0x90, 0x90, 0x90, 0xC3, - 0x31, 0xC0, 0xC3 ] -local-symbols: - - name: _myStatic - type: N_SECT - sect: 1 - value: 0x000000000000000B -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000001 -... - - -# CHECK-BINARY-WRITE: cmd LC_RPATH -# CHECK-BINARY-WRITE-NEXT: cmdsize 40 -# CHECK-BINARY-WRITE-NEXT: path @loader_path/../Frameworks (offset 12) diff --git a/lld/test/mach-o/run-tlv-pass-x86-64.yaml b/lld/test/mach-o/run-tlv-pass-x86-64.yaml deleted file mode 100644 index 73b8c37f10af..000000000000 --- a/lld/test/mach-o/run-tlv-pass-x86-64.yaml +++ /dev/null @@ -1,144 +0,0 @@ -# RUN: ld64.lld.darwinold -macosx_version_min 10.7 -arch x86_64 -print_atoms %s -o %t | FileCheck %s -# RUN: not ld64.lld.darwinold -macosx_version_min 10.6 -arch x86_64 -o %t %s 2> %t2 -# RUN: FileCheck < %t2 %s --check-prefix=CHECK-ERROR -# RUN: llvm-objdump --macho --private-headers %t | FileCheck %s --check-prefix=CHECK-LOADCMDS -# -# Test parsing of x86_64 tlv relocations. - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 16 - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x48, 0x8B, 0x3D, 0x00, - 0x00, 0x00, 0x00, 0xFF, 0x17, 0x8B, 0x00, 0x5D, - 0xC3 ] - relocations: - - offset: 0x00000007 - type: X86_64_RELOC_TLV - length: 2 - pc-rel: true - extern: true - symbol: 2 - - segment: __DATA - section: __thread_bss - type: S_THREAD_LOCAL_ZEROFILL - attributes: [ ] - alignment: 4 - address: 0x0000000000000014 - size: 4 - - segment: __DATA - section: __thread_vars - type: S_THREAD_LOCAL_VARIABLES - attributes: [ ] - address: 0x0000000000000018 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000010 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 0 - - offset: 0x00000000 - type: X86_64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 3 - - segment: __DATA - section: __dummy - type: S_REGULAR - attributes: [ ] - alignment: 8 - address: 0x00000000000000C0 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] -local-symbols: - - name: '_x$tlv$init' - type: N_SECT - sect: 2 - value: 0x0000000000000014 -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: _x - type: N_SECT - scope: [ N_EXT ] - sect: 3 - value: 0x0000000000000018 - - name: '__tlv_bootstrap' - type: N_SECT - scope: [ N_EXT ] - sect: 4 - value: 0x00000000000000C0 - - name: 'dyld_stub_binder' - type: N_SECT - scope: [ N_EXT ] - sect: 4 - value: 0x00000000000000C8 - - name: 'start' - type: N_SECT - scope: [ N_EXT ] - sect: 4 - value: 0x00000000000000D0 -page-size: 0x00000000 -... - -# CHECK: - name: _x -# CHECK-NEXT: scope: global -# CHECK-NEXT: type: tlv-thunk -# CHECK-NOT: - name: -# CHECK: references: -# CHECK-NEXT: - kind: pointer64 -# CHECK-NEXT: offset: 0 -# CHECK-NEXT: target: __tlv_bootstrap -# CHECK-NEXT: - kind: tlvInitSectionOffset -# CHECK-NEXT: offset: 16 -# CHECK-NEXT: target: '_x$tlv$init' -# CHECK: - name: '_x$tlv$init' -# CHECK-NEXT: type: tlv-zero-fill -# CHECK: - name: _main -# CHECK-NOT: - name: -# CHECK: references: -# CHECK-NEXT: - kind: ripRel32 -# CHECK-NEXT: offset: 7 -# CHECK-NEXT: target: L[[ID:[0-9]+]] -# CHECK: - ref-name: L[[ID]] -# CHECK-NEXT: scope: hidden -# CHECK-NEXT: type: tlv-initializer-ptr -# CHECK-NEXT: content: [ 00, 00, 00, 00, 00, 00, 00, 00 ] -# CHECK-NEXT: alignment: 8 -# CHECK-NEXT: permissions: rw- -# CHECK-NEXT: references: -# CHECK-NEXT: - kind: pointer64 -# CHECK-NEXT: offset: 0 -# CHECK-NEXT: target: _x - -# CHECK-ERROR: targeted OS version does not support use of thread local variables in _main for architecture x86_64 - -# CHECK-LOADCMDS: sectname __thread_bss -# CHECK-LOADCMDS: segname __DATA -# CHECK-LOADCMDS: addr 0x{{[0-9A-F]*}} -# CHECK-LOADCMDS: size 0x0000000000000004 -# CHECK-LOADCMDS: offset 0 -# CHECK-LOADCMDS: align 2^2 (4) -# CHECK-LOADCMDS: reloff 0 -# CHECK-LOADCMDS: nreloc 0 -# CHECK-LOADCMDS: type S_THREAD_LOCAL_ZEROFILL diff --git a/lld/test/mach-o/sdk-version-error.yaml b/lld/test/mach-o/sdk-version-error.yaml deleted file mode 100644 index c3c497022d84..000000000000 --- a/lld/test/mach-o/sdk-version-error.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# RUN: not ld64.lld.darwinold -arch x86_64 -sdk_version 10.blah %s -o %t 2>&1 | FileCheck %s --check-prefix=ERROR - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x00, 0x00, 0x00, 0x00 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -... - -# ERROR: malformed sdkVersion value \ No newline at end of file diff --git a/lld/test/mach-o/sectalign.yaml b/lld/test/mach-o/sectalign.yaml deleted file mode 100644 index 2270faff04e4..000000000000 --- a/lld/test/mach-o/sectalign.yaml +++ /dev/null @@ -1,80 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -dylib \ -# RUN: -sectalign __DATA __custom 0x800 -sectalign __TEXT __text 0x400 \ -# RUN: %p/Inputs/x86_64/libSystem.yaml -o %t \ -# RUN: && llvm-readobj --sections %t | FileCheck %s -# -# Test -sectalign option on __text and a custom section. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x8B, 0x05, 0x00, 0x00, - 0x00, 0x00, 0x03, 0x05, 0x00, 0x00, 0x00, 0x00, - 0x5D, 0xC3 ] - relocations: - - offset: 0x0000000C - type: X86_64_RELOC_SIGNED - length: 2 - pc-rel: true - extern: true - symbol: 1 - - offset: 0x00000006 - type: X86_64_RELOC_SIGNED - length: 2 - pc-rel: true - extern: true - symbol: 2 - - segment: __DATA - section: __data - type: S_REGULAR - attributes: [ ] - alignment: 2 - address: 0x0000000000000014 - content: [ 0x0A, 0x00, 0x00, 0x00 ] - - segment: __DATA - section: __custom - type: S_REGULAR - attributes: [ ] - alignment: 2 - address: 0x0000000000000018 - content: [ 0x0A, 0x00, 0x00, 0x00 ] -global-symbols: - - name: _a - type: N_SECT - scope: [ N_EXT ] - sect: 2 - value: 0x0000000000000014 - - name: _b - type: N_SECT - scope: [ N_EXT ] - sect: 3 - value: 0x0000000000000018 - - name: _get - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - -... - - -# CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00) -# CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00) -# CHECK: Address: 0xC00 - -# CHECK: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00) -# CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00) -# CHECK: Address: 0x1000 - -# CHECK: Name: __custom (5F 5F 63 75 73 74 6F 6D 00 00 00 00 00 00 00 00) -# CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00) -# CHECK: Address: 0x1800 - diff --git a/lld/test/mach-o/sectattrs.yaml b/lld/test/mach-o/sectattrs.yaml deleted file mode 100644 index b2a71720c5eb..000000000000 --- a/lld/test/mach-o/sectattrs.yaml +++ /dev/null @@ -1,30 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -dylib \ -# RUN: %p/Inputs/x86_64/libSystem.yaml -o %t \ -# RUN: && llvm-objdump --private-headers %t | FileCheck %s -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x8B, 0x05, 0x00, 0x00, - 0x00, 0x00, 0x03, 0x05, 0x00, 0x00, 0x00, 0x00, - 0x5D, 0xC3 ] -global-symbols: - - name: _get - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - -... - - -# CHECK: PURE_INSTRUCTIONS SOME_INSTRUCTIONS - diff --git a/lld/test/mach-o/sectcreate.yaml b/lld/test/mach-o/sectcreate.yaml deleted file mode 100644 index 1252b492dda5..000000000000 --- a/lld/test/mach-o/sectcreate.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# RUN: ld64.lld.darwinold -r -arch x86_64 -o %t -sectcreate __DATA __data \ -# RUN: %p/Inputs/hw.raw_bytes -print_atoms | FileCheck %s - -# CHECK: --- !native -# CHECK: path: '' -# CHECK: defined-atoms: -# CHECK: - scope: global -# CHECK: type: sectcreate -# CHECK: content: [ 68, 65, 6C, 6C, 6F, 0A ] -# CHECK: section-choice: custom-required -# CHECK: section-name: '__DATA/__data' -# CHECK: dead-strip: never diff --git a/lld/test/mach-o/seg-protection-arm64.yaml b/lld/test/mach-o/seg-protection-arm64.yaml deleted file mode 100644 index 0a17574ef32b..000000000000 --- a/lld/test/mach-o/seg-protection-arm64.yaml +++ /dev/null @@ -1,78 +0,0 @@ -# RUN: ld64.lld.darwinold -arch arm64 %s %p/Inputs/hello-world-arm64.yaml -o %t && llvm-objdump --private-headers %t | FileCheck %s - ---- !mach-o -arch: arm64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x00, 0x00 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: start - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000001 - -... - -# CHECK: Load command 0 -# CHECK: cmd LC_SEGMENT_64 -# CHECK: cmdsize 72 -# CHECK: segname __PAGEZERO -# CHECK: vmaddr -# CHECK: vmsize -# CHECK: fileoff -# CHECK: filesize -# CHECK: maxprot --- -# CHECK: initprot --- -# CHECK: nsects 0 -# CHECK: flags (none) -# CHECK: Load command 1 -# CHECK: cmd LC_SEGMENT_64 -# CHECK: cmdsize 152 -# CHECK: segname __TEXT -# CHECK: vmaddr -# CHECK: vmsize -# CHECK: fileoff -# CHECK: filesize -# CHECK: maxprot r-x -# CHECK: initprot r-x -# CHECK: nsects 1 -# CHECK: flags (none) -# CHECK: Section -# CHECK: sectname __text -# CHECK: segname __TEXT -# CHECK: addr -# CHECK: size -# CHECK: offset -# CHECK: align 2^0 (1) -# CHECK: reloff 0 -# CHECK: nreloc 0 -# CHECK: type S_REGULAR -# CHECK: attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS -# CHECK: reserved1 0 -# CHECK: reserved2 0 -# CHECK: Load command 2 -# CHECK: cmd LC_SEGMENT_64 -# CHECK: cmdsize 72 -# CHECK: segname __LINKEDIT -# CHECK: vmaddr -# CHECK: vmsize -# CHECK: fileoff -# CHECK: filesize -# CHECK: maxprot r-- -# CHECK: initprot r-- -# CHECK: nsects 0 -# CHECK: flags (none) diff --git a/lld/test/mach-o/seg-protection-x86_64.yaml b/lld/test/mach-o/seg-protection-x86_64.yaml deleted file mode 100644 index b68929d69e61..000000000000 --- a/lld/test/mach-o/seg-protection-x86_64.yaml +++ /dev/null @@ -1,78 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/hello-world-x86_64.yaml -o %t && llvm-objdump --private-headers %t | FileCheck %s - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x00, 0x00 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: start - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000001 - -... - -# CHECK: Load command 0 -# CHECK: cmd LC_SEGMENT_64 -# CHECK: cmdsize 72 -# CHECK: segname __PAGEZERO -# CHECK: vmaddr -# CHECK: vmsize -# CHECK: fileoff -# CHECK: filesize -# CHECK: maxprot --- -# CHECK: initprot --- -# CHECK: nsects 0 -# CHECK: flags (none) -# CHECK: Load command 1 -# CHECK: cmd LC_SEGMENT_64 -# CHECK: cmdsize 152 -# CHECK: segname __TEXT -# CHECK: vmaddr -# CHECK: vmsize -# CHECK: fileoff -# CHECK: filesize -# CHECK: maxprot rwx -# CHECK: initprot r-x -# CHECK: nsects 1 -# CHECK: flags (none) -# CHECK: Section -# CHECK: sectname __text -# CHECK: segname __TEXT -# CHECK: addr -# CHECK: size -# CHECK: offset -# CHECK: align 2^0 (1) -# CHECK: reloff 0 -# CHECK: nreloc 0 -# CHECK: type S_REGULAR -# CHECK: attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS -# CHECK: reserved1 0 -# CHECK: reserved2 0 -# CHECK: Load command 2 -# CHECK: cmd LC_SEGMENT_64 -# CHECK: cmdsize 72 -# CHECK: segname __LINKEDIT -# CHECK: vmaddr -# CHECK: vmsize -# CHECK: fileoff -# CHECK: filesize -# CHECK: maxprot rwx -# CHECK: initprot r-- -# CHECK: nsects 0 -# CHECK: flags (none) diff --git a/lld/test/mach-o/source-version.yaml b/lld/test/mach-o/source-version.yaml deleted file mode 100644 index 6a19ec6fa0b0..000000000000 --- a/lld/test/mach-o/source-version.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# RUN: not ld64.lld.darwinold -arch x86_64 -source_version 10.blah %s -o %t 2>&1 | FileCheck %s --check-prefix=ERROR -# RUN: ld64.lld.darwinold -arch x86_64 -source_version 10.1.2.3.4 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml && llvm-objdump --private-headers %t | FileCheck %s - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x00, 0x00, 0x00, 0x00 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -... - -# ERROR: malformed source_version value - -# CHECK: Load command {{[0-9]*}} -# CHECK: cmd LC_SOURCE_VERSION -# CHECK: cmdsize 16 -# CHECK: version 10.1.2.3.4 \ No newline at end of file diff --git a/lld/test/mach-o/stack-size.yaml b/lld/test/mach-o/stack-size.yaml deleted file mode 100644 index 35a1700b7dd5..000000000000 --- a/lld/test/mach-o/stack-size.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.9 %s -o %t %p/Inputs/x86_64/libSystem.yaml -# RUN: llvm-objdump --private-headers %t | FileCheck --check-prefix=CHECK-DEFAULT %s -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.9 %s -o %t -stack_size 31415926000 %p/Inputs/x86_64/libSystem.yaml -# RUN: llvm-objdump --private-headers %t | FileCheck --check-prefix=CHECK-EXPLICIT %s -# RUN: not ld64.lld.darwinold -arch x86_64 -stack_size 0x31415926530 %s >/dev/null 2> %t -# RUN: FileCheck < %t %s --check-prefix=CHECK-ERROR-MISPAGED -# RUN: not ld64.lld.darwinold -arch x86_64 -stack_size hithere %s >/dev/null 2> %t -# RUN: FileCheck < %t %s --check-prefix=CHECK-ERROR-NOTHEX - ---- !native -defined-atoms: - - name: _main - scope: global - content: [] - -# CHECK-DEFAULT: cmd LC_MAIN -# CHECK-DEFAULT: stacksize 0 - -# CHECK-EXPLICIT: cmd LC_MAIN -# CHECK-EXPLICIT: stacksize 3384796143616 - -# CHECK-ERROR-MISPAGED: error: stack_size must be a multiple of page size (0x1000) - -# CHECK-ERROR-NOTHEX: error: stack_size expects a hex number diff --git a/lld/test/mach-o/string-table.yaml b/lld/test/mach-o/string-table.yaml deleted file mode 100644 index d22c13945b42..000000000000 --- a/lld/test/mach-o/string-table.yaml +++ /dev/null @@ -1,66 +0,0 @@ -# RUN: ld64.lld.darwinold -arch i386 %s %p/Inputs/hello-world-x86.yaml -o %t -# RUN: obj2yaml %t | FileCheck %s -# -# Test that the string table contains a ' ' as its first symbol -# - ---- !mach-o -arch: x86 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x89, 0xE5, 0x83, 0xEC, 0x08, 0xE8, 0x00, - 0x00, 0x00, 0x00, 0x58, 0x8D, 0x80, 0x16, 0x00, - 0x00, 0x00, 0x89, 0x04, 0x24, 0xE8, 0xE6, 0xFF, - 0xFF, 0xFF, 0x31, 0xC0, 0x83, 0xC4, 0x08, 0x5D, - 0xC3 ] - relocations: - - offset: 0x00000016 - type: GENERIC_RELOC_VANILLA - length: 2 - pc-rel: true - extern: true - symbol: 1 - - offset: 0x0000000E - scattered: true - type: GENERIC_RELOC_LOCAL_SECTDIFF - length: 2 - pc-rel: false - value: 0x00000021 - - offset: 0x00000000 - scattered: true - type: GENERIC_RELOC_PAIR - length: 2 - pc-rel: false - value: 0x0000000B - - segment: __TEXT - section: __cstring - type: S_CSTRING_LITERALS - attributes: [ ] - address: 0x0000000000000021 - content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x00 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _printf - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - -# CHECK: StringTable: -# CHECK-NEXT: - ' ' -# CHECK-NEXT: - __mh_execute_header -# CHECK-NEXT: - _main -# CHECK-NEXT: - _printf -# CHECK-NEXT: - dyld_stub_binder -# CHECK-NEXT: - '' diff --git a/lld/test/mach-o/stub-link.s b/lld/test/mach-o/stub-link.s deleted file mode 100644 index e0b053605582..000000000000 --- a/lld/test/mach-o/stub-link.s +++ /dev/null @@ -1,21 +0,0 @@ -# REQUIRES: x86 - -# RUN: mkdir -p %t -# -# RUN: llvm-mc -filetype obj -triple x86_64-apple-darwin %s -o %t/test.o -# RUN: ld64.lld.darwinold -o %t/test -Z -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem %t/test.o -# -# RUN: llvm-objdump --bind --no-show-raw-insn -d -r %t/test | FileCheck %s - -# CHECK: Disassembly of section __TEXT,__text: -# CHECK: movq {{.*}} # [[ADDR:[0-9a-f]+]] - -# CHECK: Bind table: -# CHECK: __DATA __got 0x[[ADDR]] pointer 0 libSystem ___nan - -.section __TEXT,__text -.global _main - -_main: - movq ___nan@GOTPCREL(%rip), %rax - ret diff --git a/lld/test/mach-o/subsections-via-symbols-default.yaml b/lld/test/mach-o/subsections-via-symbols-default.yaml deleted file mode 100644 index 44e11bcc6df7..000000000000 --- a/lld/test/mach-o/subsections-via-symbols-default.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# RUN: ld64.lld.darwinold -ios_simulator_version_min 5.0 -arch x86_64 -r %s -o %t -# RUN: llvm-readobj --file-headers %t | FileCheck %s - -# Make sure that we have an objc image info in the output. It should have -# been generated by the objc pass. - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -compat-version: 0.0 -current-version: 0.0 -has-UUID: false -OS: unknown -sections: - - segment: __DATA - section: __objc_imageinfo - type: S_REGULAR - attributes: [ S_ATTR_NO_DEAD_STRIP ] - address: 0x0000000000000100 - content: [ 0x00, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00 ] -... - -# The ObjC pass creates a new image info in a new MachoFile internal to the pass. -# Make sure that we still have MH_SUBSECTIONS_VIA_SYMBOLS in the output file, even -# though that file in the ObjCPass didn't get it set from being parsed. - -# CHECK: MH_SUBSECTIONS_VIA_SYMBOLS \ No newline at end of file diff --git a/lld/test/mach-o/twolevel_namespace_undef_dynamic_lookup.yaml b/lld/test/mach-o/twolevel_namespace_undef_dynamic_lookup.yaml deleted file mode 100644 index cc0e61b7b7d9..000000000000 --- a/lld/test/mach-o/twolevel_namespace_undef_dynamic_lookup.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.9 -twolevel_namespace -undefined dynamic_lookup %s -o %t %p/Inputs/x86_64/libSystem.yaml -# -# Sanity check '-twolevel_namespace -undefined dynamic_lookup'. -# This should pass without error, even though '_bar' is undefined. - ---- !native -defined-atoms: - - name: _main - scope: global - content: [ E9, 00, 00, 00, 00 ] - alignment: 16 - references: - - kind: branch32 - offset: 1 - target: _bar -undefined-atoms: - - name: _bar diff --git a/lld/test/mach-o/twolevel_namespace_undef_warning_suppress.yaml b/lld/test/mach-o/twolevel_namespace_undef_warning_suppress.yaml deleted file mode 100644 index 1195c090f86a..000000000000 --- a/lld/test/mach-o/twolevel_namespace_undef_warning_suppress.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# RUN: not ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.9 -twolevel_namespace -undefined warning %s -o %t %p/Inputs/x86_64/libSystem.yaml 2>&1 | \ -# RUN: FileCheck --check-prefix=CHECK-WARNING %s -# RUN: not ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.9 -twolevel_namespace -undefined suppress %s -o %t %p/Inputs/x86_64/libSystem.yaml 2>&1 | \ -# RUN: FileCheck --check-prefix=CHECK-SUPPRESS %s - ---- !native -defined-atoms: - - name: _main - scope: global - content: [ E9, 00, 00, 00, 00 ] - alignment: 16 - references: - - kind: branch32 - offset: 1 - target: _bar -undefined-atoms: - - name: _bar - -# Make sure that the driver issues an error diagnostic about this combination -# being invalid. -# -# CHECK-WARNING: can't use -undefined warning or suppress with -twolevel_namespace -# CHECK-SUPPRESS: can't use -undefined warning or suppress with -twolevel_namespace \ No newline at end of file diff --git a/lld/test/mach-o/unwind-info-simple-arm64.yaml b/lld/test/mach-o/unwind-info-simple-arm64.yaml deleted file mode 100644 index 75d817a57989..000000000000 --- a/lld/test/mach-o/unwind-info-simple-arm64.yaml +++ /dev/null @@ -1,267 +0,0 @@ -# RUN: ld64.lld.darwinold -arch arm64 -o %t %s \ -# RUN: %p/Inputs/unwind-info-simple-arm64.yaml -e _main %p/Inputs/arm64/libSystem.yaml -# RUN: llvm-objdump --unwind-info %t | FileCheck %s - ---- !mach-o -arch: arm64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - alignment: 2 - address: 0x0000000000000000 - content: [ 0xFD, 0x7B, 0xBF, 0xA9, 0xFD, 0x03, 0x00, 0x91, - 0xE0, 0x03, 0x1E, 0x32, 0x00, 0x00, 0x00, 0x94, - 0x48, 0x01, 0x80, 0x52, 0x08, 0x00, 0x00, 0xB9, - 0x02, 0x00, 0x80, 0xD2, 0x01, 0x00, 0x00, 0x90, - 0x21, 0x00, 0x40, 0xF9, 0x00, 0x00, 0x00, 0x94, - 0xFD, 0x7B, 0xBF, 0xA9, 0xFD, 0x03, 0x00, 0x91, - 0xE0, 0x03, 0x1E, 0x32, 0x00, 0x00, 0x00, 0x94, - 0x48, 0x01, 0x80, 0x52, 0x08, 0x00, 0x00, 0xB9, - 0x02, 0x00, 0x80, 0xD2, 0x01, 0x00, 0x00, 0x90, - 0x21, 0x00, 0x40, 0xF9, 0x00, 0x00, 0x00, 0x94, - 0x3F, 0x04, 0x00, 0x71, 0x81, 0x00, 0x00, 0x54, - 0x00, 0x00, 0x00, 0x94, 0xFD, 0x7B, 0xC1, 0xA8, - 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x94, - 0xFD, 0x7B, 0xBF, 0xA9, 0xFD, 0x03, 0x00, 0x91, - 0x00, 0x00, 0x00, 0x94 ] - relocations: - - offset: 0x00000070 - type: ARM64_RELOC_BRANCH26 - length: 2 - pc-rel: true - extern: true - symbol: 5 - - offset: 0x00000064 - type: ARM64_RELOC_BRANCH26 - length: 2 - pc-rel: true - extern: true - symbol: 7 - - offset: 0x00000060 - type: ARM64_RELOC_BRANCH26 - length: 2 - pc-rel: true - extern: true - symbol: 12 - - offset: 0x00000058 - type: ARM64_RELOC_BRANCH26 - length: 2 - pc-rel: true - extern: true - symbol: 11 - - offset: 0x0000004C - type: ARM64_RELOC_BRANCH26 - length: 2 - pc-rel: true - extern: true - symbol: 13 - - offset: 0x00000048 - type: ARM64_RELOC_GOT_LOAD_PAGEOFF12 - length: 2 - pc-rel: false - extern: true - symbol: 8 - - offset: 0x00000044 - type: ARM64_RELOC_GOT_LOAD_PAGE21 - length: 2 - pc-rel: true - extern: true - symbol: 8 - - offset: 0x00000034 - type: ARM64_RELOC_BRANCH26 - length: 2 - pc-rel: true - extern: true - symbol: 10 - - offset: 0x00000024 - type: ARM64_RELOC_BRANCH26 - length: 2 - pc-rel: true - extern: true - symbol: 13 - - offset: 0x00000020 - type: ARM64_RELOC_GOT_LOAD_PAGEOFF12 - length: 2 - pc-rel: false - extern: true - symbol: 8 - - offset: 0x0000001C - type: ARM64_RELOC_GOT_LOAD_PAGE21 - length: 2 - pc-rel: true - extern: true - symbol: 8 - - offset: 0x0000000C - type: ARM64_RELOC_BRANCH26 - length: 2 - pc-rel: true - extern: true - symbol: 10 - - segment: __TEXT - section: __gcc_except_tab - type: S_REGULAR - attributes: [ ] - alignment: 2 - address: 0x0000000000000074 - content: [ 0xFF, 0x9B, 0xAF, 0x80, 0x00, 0x03, 0x27, 0x00, - 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x01, 0x28, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, - 0xD0, 0xFF, 0xFF, 0xFF ] - relocations: - - offset: 0x00000030 - type: ARM64_RELOC_POINTER_TO_GOT - length: 2 - pc-rel: true - extern: true - symbol: 9 - - segment: __LD - section: __compact_unwind - type: S_REGULAR - attributes: [ ] - alignment: 8 - address: 0x00000000000000A8 - content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000040 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000038 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 2 - - offset: 0x00000030 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: true - symbol: 14 - - offset: 0x00000020 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 1 - - offset: 0x00000000 - type: ARM64_RELOC_UNSIGNED - length: 3 - pc-rel: false - extern: false - symbol: 1 -local-symbols: - - name: ltmp0 - type: N_SECT - sect: 1 - value: 0x0000000000000000 - - name: ltmp14 - type: N_SECT - sect: 2 - value: 0x0000000000000074 - - name: GCC_except_table1 - type: N_SECT - sect: 2 - value: 0x0000000000000074 - - name: ltmp21 - type: N_SECT - sect: 3 - value: 0x00000000000000A8 -global-symbols: - - name: __Z3barv - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000028 - - name: __Z3foov - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000068 -undefined-symbols: - - name: __Unwind_Resume - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: __ZTIi - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: __ZTIl - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: ___cxa_allocate_exception - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: ___cxa_begin_catch - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: ___cxa_end_catch - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: ___cxa_throw - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: ___gxx_personality_v0 - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - -... - - -# CHECK: Contents of __unwind_info section: -# CHECK: Version: 0x1 -# CHECK: Common encodings array section offset: 0x1c -# CHECK: Number of common encodings in array: 0x0 -# CHECK: Personality function array section offset: 0x1c -# CHECK: Number of personality functions in array: 0x1 -# CHECK: Index array section offset: 0x20 -# CHECK: Number of indices in array: 0x2 -# CHECK: Common encodings: (count = 0) -# CHECK: Personality functions: (count = 1) -# CHECK: personality[1]: 0x00004020 -# CHECK: Top level indices: (count = 2) -# CHECK: [0]: function offset=0x00003e68, 2nd level page offset=0x00000040, LSDA offset=0x00000038 -# CHECK: [1]: function offset=0x00003edc, 2nd level page offset=0x00000000, LSDA offset=0x00000040 -# CHECK: LSDA descriptors: -# CHECK: [0]: function offset=0x00003e90, LSDA offset=0x00003f6c -# CHECK: Second level indices: -# CHECK: Second level index[0]: offset in section=0x00000040, base function offset=0x00003e68 -# CHECK: [0]: function offset=0x00003e68, encoding=0x04000000 -# CHECK: [1]: function offset=0x00003e90, encoding=0x54000000 -# CHECK: [2]: function offset=0x00003ed0, encoding=0x04000000 -# CHECK-NOT: Contents of __compact_unwind section - - - diff --git a/lld/test/mach-o/unwind-info-simple-x86_64.yaml b/lld/test/mach-o/unwind-info-simple-x86_64.yaml deleted file mode 100644 index a711f4457c2a..000000000000 --- a/lld/test/mach-o/unwind-info-simple-x86_64.yaml +++ /dev/null @@ -1,133 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %s -o %t -e _main %p/Inputs/x86_64/libSystem.yaml -# RUN: llvm-objdump --unwind-info %t | FileCheck %s - -# CHECK: Contents of __unwind_info section: -# CHECK: Version: 0x1 -# CHECK: Common encodings array section offset: 0x1c -# CHECK: Number of common encodings in array: 0x0 -# CHECK: Personality function array section offset: 0x1c -# CHECK: Number of personality functions in array: 0x1 -# CHECK: Index array section offset: 0x20 -# CHECK: Number of indices in array: 0x2 -# CHECK: Common encodings: (count = 0) -# CHECK: Personality functions: (count = 1) -# CHECK: personality[1]: 0x00001000 -# CHECK: Top level indices: (count = 2) -# CHECK: [0]: function offset=0x00000efb, 2nd level page offset=0x00000040, LSDA offset=0x00000038 -# CHECK: [1]: function offset=0x00000f00, 2nd level page offset=0x00000000, LSDA offset=0x00000040 -# CHECK: LSDA descriptors: -# CHECK: [0]: function offset=0x00000efb, LSDA offset=0x00000f00 -# CHECK: Second level indices: -# CHECK: Second level index[0]: offset in section=0x00000040, base function offset=0x00000efb -# CHECK: [0]: function offset=0x00000efb, encoding=0x51000000 -# CHECK: [1]: function offset=0x00000efc, encoding=0x01000000 -# CHECK: [2]: function offset=0x00000efd, encoding=0x04000018 -# CHECK: [3]: function offset=0x00000efe, encoding=0x04000040 -# CHECK: [4]: function offset=0x00000eff, encoding=0x00000000 -# CHECK-NOT: Contents of __compact_unwind section - ---- !native -path: '' -defined-atoms: - - name: GCC_except_table1 - type: unwind-lsda - content: [ FF, 9B, A2, 80, 80, 00, 03, 1A, 08, 00, 00, 00, - 05, 00, 00, 00, 1A, 00, 00, 00, 01, 0D, 00, 00, - 00, 64, 00, 00, 00, 00, 00, 00, 00, 00, 01, 00, - 04, 00, 00, 00 ] - - type: compact-unwind - content: [ 40, 00, 00, 00, 00, 00, 00, 00, 01, 00, 00, 00, - 00, 00, 00, 41, 00, 00, 00, 00, 00, 00, 00, 00, - E0, 00, 00, 00, 00, 00, 00, 00 ] - references: - - kind: pointer64Anon - offset: 0 - target: __Z3barv - - kind: pointer64 - offset: 16 - target: ___gxx_personality_v0 - - kind: pointer64Anon - offset: 24 - target: GCC_except_table1 - - type: compact-unwind - content: [ C0, 00, 00, 00, 00, 00, 00, 00, 01, 00, 00, 00, - 00, 00, 00, 01, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00 ] - references: - - kind: pointer64Anon - offset: 0 - target: _main - - type: compact-unwind - content: [ C1, 00, 00, 00, 00, 00, 00, 00, 01, 00, 00, 00, - 00, 00, 00, 04, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00 ] - references: - - kind: pointer64Anon - offset: 0 - target: _needsDwarfButNoCompactUnwind - -# Generic x86_64 CIE: - - name: LCIE - type: unwind-cfi - content: [ 14, 00, 00, 00, 00, 00, 00, 00, 01, 7A, 52, 00, - 01, 78, 10, 01, 10, 0C, 07, 08, 90, 01, 00, 00 ] - - - type: unwind-cfi - content: [ 24, 00, 00, 00, 1C, 00, 00, 00, C8, FE, FF, FF, - FF, FF, FF, FF, 01, 00, 00, 00, 00, 00, 00, 00, - 00, 41, 0E, 10, 86, 02, 43, 0D, 06, 00, 00, 00, - 00, 00, 00, 00 ] - references: - - kind: unwindFDEToFunction - offset: 8 - target: _needsDwarfButNoCompactUnwind - - kind: negDelta32 - offset: 4 - target: LCIE - - - type: unwind-cfi - content: [ 24, 00, 00, 00, 44, 00, 00, 00, C8, FE, FF, FF, - FF, FF, FF, FF, 01, 00, 00, 00, 00, 00, 00, 00, - 00, 41, 0E, 10, 86, 02, 43, 0D, 06, 00, 00, 00, - 00, 00, 00, 00 ] - references: - - kind: unwindFDEToFunction - offset: 8 - target: _needsDwarfSaysCompactUnwind - - kind: negDelta32 - offset: 4 - target: LCIE - - - type: unwind-cfi - content: [ 24, 00, 00, 00, 6C, 00, 00, 00, C8, FE, FF, FF, - FF, FF, FF, FF, 01, 00, 00, 00, 00, 00, 00, 00, - 00, 41, 0E, 10, 86, 02, 43, 0D, 06, 00, 00, 00, - 00, 00, 00, 00 ] - references: - - kind: unwindFDEToFunction - offset: 8 - target: _main - - kind: negDelta32 - offset: 4 - target: LCIE - - - name: __Z3barv - scope: global - content: [ C3 ] - - name: _main - scope: global - content: [ C3 ] - - name: _needsDwarfButNoCompactUnwind - scope: global - content: [ C3 ] - - name: _needsDwarfSaysCompactUnwind - scope: global - content: [ C3 ] - - name: _noUnwindData - scope: global - content: [ C3 ] - -shared-library-atoms: - - name: ___gxx_personality_v0 - load-name: '/usr/lib/libc++abi.dylib' - type: unknown diff --git a/lld/test/mach-o/upward-dylib-load-command.yaml b/lld/test/mach-o/upward-dylib-load-command.yaml deleted file mode 100644 index 6dbeb44895a4..000000000000 --- a/lld/test/mach-o/upward-dylib-load-command.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -dylib %p/Inputs/bar.yaml \ -# RUN: -install_name /usr/lib/libbar.dylib %p/Inputs/x86_64/libSystem.yaml -o %t1.dylib -# RUN: ld64.lld.darwinold -arch x86_64 -dylib %s -upward_library %t1.dylib \ -# RUN: -install_name /usr/lib/libfoo.dylib %p/Inputs/x86_64/libSystem.yaml -o %t -# RUN: llvm-objdump --private-headers %t | FileCheck %s -# -# -# Test upward linking: 1) build libbar.dylib, 2) build libfoo.dylib and upward -# like with libbar.dylib, 3) dump load commands of libfoo and verify upward link. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0x5D, 0xE9, - 0x00, 0x00, 0x00, 0x00 ] - relocations: - - offset: 0x00000008 - type: X86_64_RELOC_BRANCH - length: 2 - pc-rel: true - extern: true - symbol: 1 -global-symbols: - - name: _foo - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _bar - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - -... - - -# CHECK: cmd LC_LOAD_UPWARD_DYLIB -# CHECK-NEXT: cmdsize 48 -# CHECK-NEXT: name /usr/lib/libbar.dylib (offset 24) diff --git a/lld/test/mach-o/upward-dylib-paths.yaml b/lld/test/mach-o/upward-dylib-paths.yaml deleted file mode 100644 index 509edd3d5ca8..000000000000 --- a/lld/test/mach-o/upward-dylib-paths.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# -# -# RUN: ld64.lld.darwinold -arch x86_64 -r -test_file_usage -v \ -# RUN: -path_exists /Custom/Frameworks \ -# RUN: -path_exists /Custom/Frameworks/Bar.framework/Bar \ -# RUN: -path_exists /usr/lib \ -# RUN: -path_exists /usr/lib/libfoo.dylib \ -# RUN: -path_exists /opt/stuff/libstuff.dylib \ -# RUN: -F/Custom/Frameworks \ -# RUN: -upward_framework Bar \ -# RUN: -upward-lfoo \ -# RUN: -upward_library /opt/stuff/libstuff.dylib \ -# RUN: 2>&1 | FileCheck %s - -# CHECK: Found upward framework /Custom/Frameworks/Bar.framework/Bar -# CHECK: Found upward library /usr/lib/libfoo.dylib - - diff --git a/lld/test/mach-o/usage.yaml b/lld/test/mach-o/usage.yaml deleted file mode 100644 index efae4d0d3144..000000000000 --- a/lld/test/mach-o/usage.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# RUN: not ld64.lld.darwinold | FileCheck %s -# -# Test that running darwin linker with no option prints out usage message. -# - - -# CHECK: USAGE: -# CHECK: -arch diff --git a/lld/test/mach-o/use-dylib.yaml b/lld/test/mach-o/use-dylib.yaml deleted file mode 100644 index 5717a9316fb7..000000000000 --- a/lld/test/mach-o/use-dylib.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %s \ -# RUN: %p/Inputs/use-simple-dylib.yaml %p/Inputs/x86_64/libSystem.yaml -dylib -o %t.dylib -# RUN: llvm-objdump --private-headers %t.dylib | FileCheck %s - -# This test ensures that we have a LC_LOAD_DYLIB for libspecial.dylib even though we don't -# use any atoms from it. This matches the ld64 behaviour. ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0xE8, 0x00, 0x00, 0x00, - 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00, 0xE8, 0x00, - 0x00, 0x00, 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00, - 0xE8, 0x00, 0x00, 0x00, 0x00, 0x5D, 0xE9, 0x00, - 0x00, 0x00, 0x00 ] -global-symbols: - - name: _foo - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 - - -# CHECK: cmd LC_LOAD_DYLIB -# CHECK: name libspecial.dylib (offset 24) -# CHECK: current version 1.0.0 -# CHECK: compatibility version 1.0.0 -# CHECK: cmd LC_LOAD_DYLIB -# CHECK: name /usr/lib/libSystem.B.dylib (offset 24) -# CHECK: current version 1.0.0 -# CHECK: compatibility version 1.0.0 diff --git a/lld/test/mach-o/use-simple-dylib.yaml b/lld/test/mach-o/use-simple-dylib.yaml deleted file mode 100644 index 62fb18f1f089..000000000000 --- a/lld/test/mach-o/use-simple-dylib.yaml +++ /dev/null @@ -1,73 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -print_atoms -r %s \ -# RUN: %p/Inputs/use-simple-dylib.yaml -o %t | FileCheck %s - - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x55, 0x48, 0x89, 0xE5, 0xE8, 0x00, 0x00, 0x00, - 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00, 0xE8, 0x00, - 0x00, 0x00, 0x00, 0xE8, 0x00, 0x00, 0x00, 0x00, - 0xE8, 0x00, 0x00, 0x00, 0x00, 0x5D, 0xE9, 0x00, - 0x00, 0x00, 0x00 ] -global-symbols: - - name: _foo - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -undefined-symbols: - - name: _myGlobal - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _myGlobalWeak - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _myHidden - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _myHiddenWeak - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _myResolver - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _myStatic - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 - - name: _myVariablePreviouslyKnownAsPrivateExtern - type: N_UNDF - scope: [ N_EXT ] - value: 0x0000000000000000 -... - - -# CHECK: undefined-atoms: -# CHECK: - name: _myStatic -# CHECK: - name: _myVariablePreviouslyKnownAsPrivateExtern -# CHECK: shared-library-atoms: -# CHECK: - name: _myGlobal -# CHECK: load-name: libspecial.dylib -# CHECK: - name: _myGlobalWeak -# CHECK: load-name: libspecial.dylib -# CHECK: - name: _myHidden -# CHECK: load-name: libspecial.dylib -# CHECK: - name: _myHiddenWeak -# CHECK: load-name: libspecial.dylib -# CHECK: - name: _myResolver -# CHECK: load-name: libspecial.dylib diff --git a/lld/test/mach-o/version-min-load-command-object.yaml b/lld/test/mach-o/version-min-load-command-object.yaml deleted file mode 100644 index 8d9089bc6c01..000000000000 --- a/lld/test/mach-o/version-min-load-command-object.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %s -o %t -r -macosx_version_min 10.8 && llvm-objdump --private-headers %t | FileCheck %s -# RUN: ld64.lld.darwinold -arch x86_64 %s -o %t -r && llvm-objdump --private-headers %t | FileCheck %s -# RUN: ld64.lld.darwinold -arch x86_64 %s -o %t -r %p/Inputs/no-version-min-load-command-object.yaml && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_VERSION_MIN - -# If we are emitting an object file, then we only emit a min version load command if the source object file(s) all have -# version(s) and either known platforms or contain min version load commands themselves. - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -min-os-version-kind: LC_VERSION_MIN_MACOSX -min-os-version: 10.8 -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x00, 0x00, 0x00, 0x00 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -... - -# CHECK: Load command {{[0-9]*}} -# CHECK: cmd LC_VERSION_MIN_MACOSX -# CHECK: cmdsize 16 -# CHECK: version 10.8 -# CHECK: sdk n/a - -# NO_VERSION_MIN-NOT: LC_VERSION_MIN_MACOSX \ No newline at end of file diff --git a/lld/test/mach-o/version-min-load-command.yaml b/lld/test/mach-o/version-min-load-command.yaml deleted file mode 100644 index e31319dfb790..000000000000 --- a/lld/test/mach-o/version-min-load-command.yaml +++ /dev/null @@ -1,43 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml && llvm-objdump --private-headers %t | FileCheck %s -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml 2>&1 | FileCheck %s --check-prefix=WARNING -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -static -version_load_command && llvm-objdump --private-headers %t | FileCheck %s -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -no_version_load_command && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_VERSION_MIN -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -static -version_load_command -no_version_load_command && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_VERSION_MIN -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml -static && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=NO_VERSION_MIN - -# RUN: ld64.lld.darwinold -arch x86_64 -macosx_version_min 10.8 -sdk_version 10.9 %s -o %t -dylib %p/Inputs/x86_64/libSystem.yaml && llvm-objdump --private-headers %t | FileCheck %s --check-prefix=SDK_VERSION - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0x00, 0x00, 0x00, 0x00 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -... - -# CHECK: Load command {{[0-9]*}} -# CHECK: cmd LC_VERSION_MIN_MACOSX -# CHECK: cmdsize 16 -# CHECK: version 10.8 -# CHECK: sdk 10.8 - -# SDK_VERSION: Load command {{[0-9]*}} -# SDK_VERSION: cmd LC_VERSION_MIN_MACOSX -# SDK_VERSION: cmdsize 16 -# SDK_VERSION: version 10.8 -# SDK_VERSION: sdk 10.9 - -# WARNING: warning: -sdk_version is required when emitting min version load command. Setting sdk version to match provided min version - -# NO_VERSION_MIN-NOT: LC_VERSION_MIN_MACOSX diff --git a/lld/test/mach-o/write-final-sections.yaml b/lld/test/mach-o/write-final-sections.yaml deleted file mode 100644 index d0c0f3c8b777..000000000000 --- a/lld/test/mach-o/write-final-sections.yaml +++ /dev/null @@ -1,165 +0,0 @@ -# RUN: ld64.lld.darwinold -arch x86_64 %s %p/Inputs/write-final-sections.yaml \ -# RUN: -o %t -e _foo -# RUN: llvm-readobj --sections --section-data %t | FileCheck %s - ---- !native -defined-atoms: -# For __TEXT, __text (with typeCode) - - name: _foo - scope: global - content: [ 55 ] -# CHECK: Name: __text -# CHECK: Segment: __TEXT -# CHECK: SectionData ( -# CHECK-NEXT: 0000: 55 -# CHECK-NEXT: ) - -# For __TEXT, __const (with typeConstant), - - type: constant - content: [ 01, 00, 00, 00 ] -# From __TEXT, __literal4, (with typeLiteral4) - - scope: hidden - type: const-4-byte - content: [ 02, 00, 00, 00 ] -# From __TEXT, __literal8, (with typeLiteral8) - - scope: hidden - type: const-8-byte - content: [ 03, 00, 00, 00, 00, 00, 00, 00 ] -# From __TEXT, __literal16, (with typeLiteral16) - - scope: hidden - type: const-16-byte - content: [ 04, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00 ] -# CHECK: Name: __const -# CHECK: Segment: __TEXT -# CHECK: SectionData ( -# CHECK-NEXT: 0000: 01000000 02000000 03000000 00000000 -# CHECK-NEXT: 0010: 04000000 00000000 00000000 00000000 -# CHECK-NEXT: ) - -# For __TEXT, __cstring (with typeCString) - - scope: hidden - type: c-string - content: [ 57, 69, 62, 62, 6C, 65, 00 ] - merge: by-content -# CHECK: Name: __cstring -# CHECK: Segment: __TEXT -# CHECK: SectionData ( -# CHECK-NEXT: 0000: 57696262 6C6500 -# CHECK-NEXT: ) - -# For __TEXT, __ustring (with typeUTF16String) - - scope: hidden - type: utf16-string - content: [ 05, 00 ] - merge: by-content -# CHECK: Name: __ustring -# CHECK: Segment: __TEXT -# CHECK: SectionData ( -# CHECK-NEXT: 0000: 0500 -# CHECK-NEXT: ) - -# For __TEXT, __gcc_except_tab, (with typeLSDA) - - name: GCC_except_table0 - type: unwind-lsda - content: [ 06, 00 ] -# CHECK: Name: __gcc_except_tab -# CHECK: Segment: __TEXT -# CHECK: SectionData ( -# CHECK-NEXT: 0000: 0600 -# CHECK-NEXT: ) - -# For __TEXT, __eh_frame, (with typeCFI) - - name: LCIE - type: unwind-cfi - content: [ 14, 00, 00, 00, 00, 00, 00, 00, 01, 7A, 52, 00, - 01, 78, 10, 01, 10, 0C, 07, 08, 90, 01, 00, 00 ] - - - type: unwind-cfi - content: [ 24, 00, 00, 00, 1C, 00, 00, 00, C8, FE, FF, FF, - FF, FF, FF, FF, 01, 00, 00, 00, 00, 00, 00, 00, - 00, 41, 0E, 10, 86, 02, 43, 0D, 06, 00, 00, 00, - 00, 00, 00, 00 ] - references: - - kind: unwindFDEToFunction - offset: 8 - target: _foo - - kind: negDelta32 - offset: 4 - target: LCIE - -# CHECK: Name: __eh_frame -# CHECK: Segment: __TEXT -# CHECK: SectionData ( -# CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 -# CHECK-NEXT: 0010: 100C0708 90010000 24000000 1C000000 -# CHECK-NEXT: 0020: 70FFFFFF FFFFFFFF 01000000 00000000 -# CHECK-NEXT: 0030: 00410E10 8602430D 06000000 00000000 -# CHECK-NEXT: ) - -# For __DATA, __data, (with typeData) - - name: var - type: data - content: [ 08 ] -# CHECK: Name: __data -# CHECK: Segment: __DATA -# CHECK: SectionData ( -# CHECK-NEXT: 0000: 08 -# CHECK-NEXT: ) - -# For __DATA, __bss (with typeZeroFill) -# FIXME: Attributes & tags of __bss are mostly broken. Should be at end of -# __DATA, should have size, should have S_ZEROFILL flag. - - type: zero-fill - size: 8 -# CHECK: Name: __bss -# CHECK: Segment: __DATA - -# For __DATA, __const, (with typeConstData) - - type: const-data - content: [ 09, 00, 00, 00 ] -# CHECK: Name: __const -# CHECK: Segment: __DATA -# CHECK: SectionData ( -# CHECK-NEXT: 0000: 09000000 -# CHECK-NEXT: ) - -# For __DATA, __cfstring, (with typeCFString) - - type: cfstring - content: [ 0A, 00 ] -# CHECK: Name: __cfstring -# CHECK: Segment: __DATA -# CHECK: SectionData ( -# CHECK-NEXT: 0000: 0A00 -# CHECK-NEXT: ) - -# For __DATA, __got (with typeGOT) - - type: got - content: [ 0B, 00, 00, 00, 00, 00, 00, 00 ] -# CHECK: Name: __got -# CHECK: Segment: __DATA -# CHECK: SectionData ( -# CHECK-NEXT: 0000: 0B000000 00000000 -# CHECK-NEXT: ) - - -# For __DATA, __mod_init_func (with typeInitializerPtr) - - type: initializer-pointer - content: [ 0C, 00, 00, 00, 00, 00, 00, 00 ] -# CHECK: Name: __mod_init_func -# CHECK: Segment: __DATA -# CHECK: SectionData ( -# CHECK-NEXT: 0000: 0C000000 00000000 -# CHECK-NEXT: ) - -# For __DATA, __mod_term_func (with typeTerminatorPointer) - - type: terminator-pointer - content: [ 0D, 00, 00, 00, 00, 00, 00, 00 ] -# CHECK: Name: __mod_term_func -# CHECK: Segment: __DATA -# CHECK: SectionData ( -# CHECK-NEXT: 0000: 0D000000 00000000 -# CHECK-NEXT: ) - - - type: compact-unwind - content: [ 0E, 00, 00, 00, 00, 00, 00, 00 ] -# CHECK-NOT: Name: __compact_unwind diff --git a/lld/test/mach-o/wrong-arch-error.yaml b/lld/test/mach-o/wrong-arch-error.yaml deleted file mode 100644 index 17bd024bc2b4..000000000000 --- a/lld/test/mach-o/wrong-arch-error.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# RUN: not ld64.lld.darwinold -arch x86_64 -r %s \ -# RUN: %p/Inputs/wrong-arch-error.yaml 2> %t.err -# RUN: FileCheck %s < %t.err - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0xCC ] - -global-symbols: - - name: _foo - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -... - - -# CHECK: wrong architecture diff --git a/lld/unittests/CMakeLists.txt b/lld/unittests/CMakeLists.txt deleted file mode 100644 index 84d35d43f4e8..000000000000 --- a/lld/unittests/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -add_custom_target(LLDUnitTests) -set_target_properties(LLDUnitTests PROPERTIES FOLDER "lld tests") - -set(CMAKE_BUILD_WITH_INSTALL_RPATH OFF) - -# add_lld_unittest(test_dirname file1.cpp file2.cpp) -# -# Will compile the list of files together and link against lld -# Produces a binary named 'basename(test_dirname)'. -function(add_lld_unittest test_dirname) - add_unittest(LLDUnitTests ${test_dirname} ${ARGN}) - target_link_libraries(${test_dirname} ${LLVM_COMMON_LIBS}) -endfunction() - -add_subdirectory(DriverTests) -add_subdirectory(MachOTests) diff --git a/lld/unittests/DriverTests/CMakeLists.txt b/lld/unittests/DriverTests/CMakeLists.txt deleted file mode 100644 index e750bf6b069b..000000000000 --- a/lld/unittests/DriverTests/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -add_lld_unittest(DriverTests - DarwinLdDriverTest.cpp - ) - -target_link_libraries(DriverTests - PRIVATE - lldDriver - lldMachOOld - ) diff --git a/lld/unittests/DriverTests/DarwinLdDriverTest.cpp b/lld/unittests/DriverTests/DarwinLdDriverTest.cpp deleted file mode 100644 index af0fbbeef2a5..000000000000 --- a/lld/unittests/DriverTests/DarwinLdDriverTest.cpp +++ /dev/null @@ -1,263 +0,0 @@ -//===- lld/unittest/DarwinLdDriverTest.cpp --------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Darwin's ld driver tests. -/// -//===----------------------------------------------------------------------===// - -#include "lld/Common/Driver.h" -#include "lld/ReaderWriter/MachOLinkingContext.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Support/raw_ostream.h" -#include "gtest/gtest.h" - -using namespace llvm; -using namespace lld; - -namespace lld { -namespace mach_o { -bool parse(llvm::ArrayRef args, MachOLinkingContext &ctx); -} -} - -namespace { -class DarwinLdParserTest : public testing::Test { -protected: - int inputFileCount() { return _ctx.getNodes().size(); } - - StringRef inputFile(int index) { - Node &node = *_ctx.getNodes()[index]; - if (node.kind() == Node::Kind::File) - return cast(&node)->getFile()->path(); - llvm_unreachable("not handling other types of input files"); - } - - bool parse(std::vector args) { - args.insert(args.begin(), "ld"); - return mach_o::parse(args, _ctx); - } - - MachOLinkingContext _ctx; -}; -} - -TEST_F(DarwinLdParserTest, Basic) { - EXPECT_TRUE(parse({"foo.o", "bar.o", "-arch", "i386"})); - EXPECT_FALSE(_ctx.allowRemainingUndefines()); - EXPECT_FALSE(_ctx.deadStrip()); - EXPECT_EQ(2, inputFileCount()); - EXPECT_EQ("foo.o", inputFile(0)); - EXPECT_EQ("bar.o", inputFile(1)); -} - -TEST_F(DarwinLdParserTest, Output) { - EXPECT_TRUE(parse({"-o", "my.out", "foo.o", "-arch", "i386"})); - EXPECT_EQ("my.out", _ctx.outputPath()); -} - -TEST_F(DarwinLdParserTest, Dylib) { - EXPECT_TRUE(parse({"-dylib", "foo.o", "-arch", "i386"})); - EXPECT_EQ(llvm::MachO::MH_DYLIB, _ctx.outputMachOType()); -} - -TEST_F(DarwinLdParserTest, Relocatable) { - EXPECT_TRUE(parse({"-r", "foo.o", "-arch", "i386"})); - EXPECT_EQ(llvm::MachO::MH_OBJECT, _ctx.outputMachOType()); -} - -TEST_F(DarwinLdParserTest, Bundle) { - EXPECT_TRUE(parse({"-bundle", "foo.o", "-arch", "i386"})); - EXPECT_EQ(llvm::MachO::MH_BUNDLE, _ctx.outputMachOType()); -} - -TEST_F(DarwinLdParserTest, Preload) { - EXPECT_TRUE(parse({"-preload", "foo.o", "-arch", "i386"})); - EXPECT_EQ(llvm::MachO::MH_PRELOAD, _ctx.outputMachOType()); -} - -TEST_F(DarwinLdParserTest, Static) { - EXPECT_TRUE(parse({"-static", "foo.o", "-arch", "i386"})); - EXPECT_EQ(llvm::MachO::MH_EXECUTE, _ctx.outputMachOType()); -} - -TEST_F(DarwinLdParserTest, Entry) { - EXPECT_TRUE(parse({"-e", "entryFunc", "foo.o", "-arch", "i386"})); - EXPECT_EQ("entryFunc", _ctx.entrySymbolName()); -} - -TEST_F(DarwinLdParserTest, DeadStrip) { - EXPECT_TRUE(parse({"-arch", "x86_64", "-dead_strip", "foo.o"})); - EXPECT_TRUE(_ctx.deadStrip()); -} - -TEST_F(DarwinLdParserTest, DeadStripRootsExe) { - EXPECT_TRUE(parse({"-arch", "x86_64", "-dead_strip", "foo.o"})); - EXPECT_FALSE(_ctx.globalsAreDeadStripRoots()); -} - -TEST_F(DarwinLdParserTest, DeadStripRootsDylib) { - EXPECT_TRUE(parse({"-arch", "x86_64", "-dylib", "-dead_strip", "foo.o"})); - EXPECT_FALSE(_ctx.globalsAreDeadStripRoots()); -} - -TEST_F(DarwinLdParserTest, DeadStripRootsRelocatable) { - EXPECT_TRUE(parse({"-arch", "x86_64", "-r", "-dead_strip", "foo.o"})); - EXPECT_FALSE(_ctx.globalsAreDeadStripRoots()); -} - -TEST_F(DarwinLdParserTest, DeadStripRootsExportDynamicExe) { - EXPECT_TRUE( - parse({"-arch", "x86_64", "-dead_strip", "-export_dynamic", "foo.o"})); - EXPECT_TRUE(_ctx.globalsAreDeadStripRoots()); -} - -TEST_F(DarwinLdParserTest, DeadStripRootsExportDynamicDylib) { - EXPECT_TRUE(parse({"-arch", "x86_64", "-dylib", "-dead_strip", - "-export_dynamic", "foo.o"})); - EXPECT_TRUE(_ctx.globalsAreDeadStripRoots()); -} - -TEST_F(DarwinLdParserTest, DeadStripRootsExportDynamicRelocatable) { - EXPECT_TRUE(parse( - {"-arch", "x86_64", "-r", "-dead_strip", "-export_dynamic", "foo.o"})); - EXPECT_FALSE(_ctx.globalsAreDeadStripRoots()); -} - -TEST_F(DarwinLdParserTest, Arch) { - EXPECT_TRUE(parse({"-arch", "x86_64", "foo.o"})); - EXPECT_EQ(MachOLinkingContext::arch_x86_64, _ctx.arch()); - EXPECT_EQ((uint32_t)llvm::MachO::CPU_TYPE_X86_64, _ctx.getCPUType()); - EXPECT_EQ(llvm::MachO::CPU_SUBTYPE_X86_64_ALL, _ctx.getCPUSubType()); -} - -TEST_F(DarwinLdParserTest, Arch_x86) { - EXPECT_TRUE(parse({"-arch", "i386", "foo.o"})); - EXPECT_EQ(MachOLinkingContext::arch_x86, _ctx.arch()); - EXPECT_EQ((uint32_t)llvm::MachO::CPU_TYPE_I386, _ctx.getCPUType()); - EXPECT_EQ(llvm::MachO::CPU_SUBTYPE_X86_ALL, _ctx.getCPUSubType()); -} - -TEST_F(DarwinLdParserTest, Arch_armv6) { - EXPECT_TRUE(parse({"-arch", "armv6", "foo.o"})); - EXPECT_EQ(MachOLinkingContext::arch_armv6, _ctx.arch()); - EXPECT_EQ((uint32_t)llvm::MachO::CPU_TYPE_ARM, _ctx.getCPUType()); - EXPECT_EQ(llvm::MachO::CPU_SUBTYPE_ARM_V6, _ctx.getCPUSubType()); -} - -TEST_F(DarwinLdParserTest, Arch_armv7) { - EXPECT_TRUE(parse({"-arch", "armv7", "foo.o"})); - EXPECT_EQ(MachOLinkingContext::arch_armv7, _ctx.arch()); - EXPECT_EQ((uint32_t)llvm::MachO::CPU_TYPE_ARM, _ctx.getCPUType()); - EXPECT_EQ(llvm::MachO::CPU_SUBTYPE_ARM_V7, _ctx.getCPUSubType()); -} - -TEST_F(DarwinLdParserTest, Arch_armv7s) { - EXPECT_TRUE(parse({"-arch", "armv7s", "foo.o"})); - EXPECT_EQ(MachOLinkingContext::arch_armv7s, _ctx.arch()); - EXPECT_EQ((uint32_t)llvm::MachO::CPU_TYPE_ARM, _ctx.getCPUType()); - EXPECT_EQ(llvm::MachO::CPU_SUBTYPE_ARM_V7S, _ctx.getCPUSubType()); -} - -TEST_F(DarwinLdParserTest, MinMacOSX10_7) { - EXPECT_TRUE( - parse({"-macosx_version_min", "10.7", "foo.o", "-arch", "x86_64"})); - EXPECT_EQ(MachOLinkingContext::OS::macOSX, _ctx.os()); - EXPECT_TRUE(_ctx.minOS("10.7", "")); - EXPECT_FALSE(_ctx.minOS("10.8", "")); -} - -TEST_F(DarwinLdParserTest, MinMacOSX10_8) { - EXPECT_TRUE( - parse({"-macosx_version_min", "10.8.3", "foo.o", "-arch", "x86_64"})); - EXPECT_EQ(MachOLinkingContext::OS::macOSX, _ctx.os()); - EXPECT_TRUE(_ctx.minOS("10.7", "")); - EXPECT_TRUE(_ctx.minOS("10.8", "")); -} - -TEST_F(DarwinLdParserTest, iOS5) { - EXPECT_TRUE(parse({"-ios_version_min", "5.0", "foo.o", "-arch", "armv7"})); - EXPECT_EQ(MachOLinkingContext::OS::iOS, _ctx.os()); - EXPECT_TRUE(_ctx.minOS("", "5.0")); - EXPECT_FALSE(_ctx.minOS("", "6.0")); -} - -TEST_F(DarwinLdParserTest, iOS6) { - EXPECT_TRUE(parse({"-ios_version_min", "6.0", "foo.o", "-arch", "armv7"})); - EXPECT_EQ(MachOLinkingContext::OS::iOS, _ctx.os()); - EXPECT_TRUE(_ctx.minOS("", "5.0")); - EXPECT_TRUE(_ctx.minOS("", "6.0")); -} - -TEST_F(DarwinLdParserTest, iOS_Simulator5) { - EXPECT_TRUE( - parse({"-ios_simulator_version_min", "5.0", "a.o", "-arch", "i386"})); - EXPECT_EQ(MachOLinkingContext::OS::iOS_simulator, _ctx.os()); - EXPECT_TRUE(_ctx.minOS("", "5.0")); - EXPECT_FALSE(_ctx.minOS("", "6.0")); -} - -TEST_F(DarwinLdParserTest, iOS_Simulator6) { - EXPECT_TRUE( - parse({"-ios_simulator_version_min", "6.0", "a.o", "-arch", "i386"})); - EXPECT_EQ(MachOLinkingContext::OS::iOS_simulator, _ctx.os()); - EXPECT_TRUE(_ctx.minOS("", "5.0")); - EXPECT_TRUE(_ctx.minOS("", "6.0")); -} - -TEST_F(DarwinLdParserTest, compatibilityVersion) { - EXPECT_TRUE(parse( - {"-dylib", "-compatibility_version", "1.2.3", "a.o", "-arch", "i386"})); - EXPECT_EQ(_ctx.compatibilityVersion(), 0x10203U); -} - -TEST_F(DarwinLdParserTest, compatibilityVersionInvalidType) { - EXPECT_FALSE(parse( - {"-bundle", "-compatibility_version", "1.2.3", "a.o", "-arch", "i386"})); -} - -TEST_F(DarwinLdParserTest, compatibilityVersionInvalidValue) { - EXPECT_FALSE(parse( - {"-bundle", "-compatibility_version", "1,2,3", "a.o", "-arch", "i386"})); -} - -TEST_F(DarwinLdParserTest, currentVersion) { - EXPECT_TRUE( - parse({"-dylib", "-current_version", "1.2.3", "a.o", "-arch", "i386"})); - EXPECT_EQ(_ctx.currentVersion(), 0x10203U); -} - -TEST_F(DarwinLdParserTest, currentVersionInvalidType) { - EXPECT_FALSE( - parse({"-bundle", "-current_version", "1.2.3", "a.o", "-arch", "i386"})); -} - -TEST_F(DarwinLdParserTest, currentVersionInvalidValue) { - EXPECT_FALSE( - parse({"-bundle", "-current_version", "1,2,3", "a.o", "-arch", "i386"})); -} - -TEST_F(DarwinLdParserTest, bundleLoader) { - EXPECT_TRUE( - parse({"-bundle", "-bundle_loader", "/bin/ls", "a.o", "-arch", "i386"})); - EXPECT_EQ(_ctx.bundleLoader(), "/bin/ls"); -} - -TEST_F(DarwinLdParserTest, bundleLoaderInvalidType) { - EXPECT_FALSE(parse({"-bundle_loader", "/bin/ls", "a.o", "-arch", "i386"})); -} - -TEST_F(DarwinLdParserTest, deadStrippableDylib) { - EXPECT_TRUE( - parse({"-dylib", "-mark_dead_strippable_dylib", "a.o", "-arch", "i386"})); - EXPECT_EQ(true, _ctx.deadStrippableDylib()); -} - -TEST_F(DarwinLdParserTest, deadStrippableDylibInvalidType) { - EXPECT_FALSE(parse({"-mark_dead_strippable_dylib", "a.o", "-arch", "i386"})); -} diff --git a/lld/unittests/MachOTests/CMakeLists.txt b/lld/unittests/MachOTests/CMakeLists.txt deleted file mode 100644 index 7cc71380cd62..000000000000 --- a/lld/unittests/MachOTests/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ - -add_lld_unittest(lldMachOOldTests - MachONormalizedFileBinaryReaderTests.cpp - MachONormalizedFileBinaryWriterTests.cpp - MachONormalizedFileToAtomsTests.cpp - MachONormalizedFileYAMLTests.cpp - ) - -target_link_libraries(lldMachOOldTests - PRIVATE - lldDriver - lldMachOOld - lldYAML - ) diff --git a/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp b/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp deleted file mode 100644 index fbf18a8d9e00..000000000000 --- a/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp +++ /dev/null @@ -1,753 +0,0 @@ -//===- lld/unittest/MachOTests/MachONormalizedFileBinaryReaderTests.cpp ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "../../lib/ReaderWriter/MachO/MachONormalizedFile.h" -#include "lld/ReaderWriter/MachOLinkingContext.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/YAMLTraits.h" -#include "gtest/gtest.h" -#include -#include - -using llvm::SmallString; -using llvm::StringRef; -using llvm::MemoryBuffer; -using llvm::Twine; - -using namespace lld::mach_o::normalized; -using namespace llvm::MachO; - -static std::unique_ptr -fromBinary(const uint8_t bytes[], unsigned length, StringRef archStr) { - StringRef sr((const char*)bytes, length); - std::unique_ptr mb(MemoryBuffer::getMemBuffer(sr, "", false)); - llvm::Expected> r = - lld::mach_o::normalized::readBinary( - mb, lld::MachOLinkingContext::archFromName(archStr)); - EXPECT_FALSE(!r); - return std::move(*r); -} - -// The Mach-O object reader uses functions such as read32 or read64 -// which don't allow unaligned access. Our in-memory object file -// needs to be aligned to a larger boundary than uint8_t's. -#if _MSC_VER -#define FILEBYTES __declspec(align(64)) const uint8_t fileBytes[] -#else -#define FILEBYTES const uint8_t fileBytes[] __attribute__((aligned(64))) -#endif - -TEST(BinaryReaderTest, empty_obj_x86_64) { - FILEBYTES = { - 0xcf, 0xfa, 0xed, 0xfe, 0x07, 0x00, 0x00, 0x01, - 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, - 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x5f, 0x5f, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x5f, 0x5f, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; - std::unique_ptr f = - fromBinary(fileBytes, sizeof(fileBytes), "x86_64"); - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); - EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_TRUE(f->localSymbols.empty()); - EXPECT_TRUE(f->globalSymbols.empty()); - EXPECT_TRUE(f->undefinedSymbols.empty()); -} - -TEST(BinaryReaderTest, empty_obj_x86) { - FILEBYTES = { - 0xce, 0xfa, 0xed, 0xfe, 0x07, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, - 0x00, 0x20, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x74, 0x65, - 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x54, 0x45, - 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; - std::unique_ptr f = - fromBinary(fileBytes, sizeof(fileBytes), "i386"); - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86); - EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_TRUE(f->localSymbols.empty()); - EXPECT_TRUE(f->globalSymbols.empty()); - EXPECT_TRUE(f->undefinedSymbols.empty()); -} - -TEST(BinaryReaderTest, empty_obj_ppc) { - FILEBYTES = { - 0xfe, 0xed, 0xfa, 0xce, 0x00, 0x00, 0x00, 0x12, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x7c, - 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x98, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, - 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x74, 0x65, - 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x54, 0x45, - 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x98, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; - std::unique_ptr f = - fromBinary(fileBytes, sizeof(fileBytes), "ppc"); - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_ppc); - EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_TRUE(f->localSymbols.empty()); - EXPECT_TRUE(f->globalSymbols.empty()); - EXPECT_TRUE(f->undefinedSymbols.empty()); -} - -TEST(BinaryReaderTest, empty_obj_armv7) { - FILEBYTES = { - 0xce, 0xfa, 0xed, 0xfe, 0x0c, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, - 0x00, 0x20, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x74, 0x65, - 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x54, 0x45, - 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; - std::unique_ptr f = - fromBinary(fileBytes, sizeof(fileBytes), "armv7"); - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7); - EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_TRUE(f->localSymbols.empty()); - EXPECT_TRUE(f->globalSymbols.empty()); - EXPECT_TRUE(f->undefinedSymbols.empty()); -} - -TEST(BinaryReaderTest, empty_obj_x86_64_arm7) { - FILEBYTES = { -#include "empty_obj_x86_armv7.txt" - }; - std::unique_ptr f = - fromBinary(fileBytes, sizeof(fileBytes), "x86_64"); - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); - EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_TRUE(f->localSymbols.empty()); - EXPECT_TRUE(f->globalSymbols.empty()); - EXPECT_TRUE(f->undefinedSymbols.empty()); - - std::unique_ptr f2 = - fromBinary(fileBytes, sizeof(fileBytes), "armv7"); - EXPECT_EQ(f2->arch, lld::MachOLinkingContext::arch_armv7); - EXPECT_EQ((int)(f2->fileType), MH_OBJECT); - EXPECT_EQ((int)(f2->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_TRUE(f2->localSymbols.empty()); - EXPECT_TRUE(f2->globalSymbols.empty()); - EXPECT_TRUE(f2->undefinedSymbols.empty()); -} - -TEST(BinaryReaderTest, hello_obj_x86_64) { - FILEBYTES = { - 0xCF, 0xFA, 0xED, 0xFE, 0x07, 0x00, 0x00, 0x01, - 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x50, 0x01, 0x00, 0x00, - 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0xE8, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x70, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x5F, 0x5F, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x5F, 0x5F, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x2D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x70, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0xA4, 0x01, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x00, 0x04, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x5F, 0x5F, 0x63, 0x73, 0x74, 0x72, 0x69, 0x6E, - 0x67, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x5F, 0x5F, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x2D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x9D, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0xB4, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0xE4, 0x01, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x55, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x10, - 0x48, 0x8D, 0x3D, 0x00, 0x00, 0x00, 0x00, 0xC7, - 0x45, 0xFC, 0x00, 0x00, 0x00, 0x00, 0xB0, 0x00, - 0xE8, 0x00, 0x00, 0x00, 0x00, 0xB9, 0x00, 0x00, - 0x00, 0x00, 0x89, 0x45, 0xF8, 0x89, 0xC8, 0x48, - 0x83, 0xC4, 0x10, 0x5D, 0xC3, 0x68, 0x65, 0x6C, - 0x6C, 0x6F, 0x0A, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x2D, 0x0B, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x1D, 0x0F, 0x00, 0x00, 0x00, - 0x0E, 0x02, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0F, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x5F, 0x6D, 0x61, - 0x69, 0x6E, 0x00, 0x5F, 0x70, 0x72, 0x69, 0x6E, - 0x74, 0x66, 0x00, 0x4C, 0x5F, 0x2E, 0x73, 0x74, - 0x72, 0x00, 0x00, 0x00 }; - std::unique_ptr f = - fromBinary(fileBytes, sizeof(fileBytes), "x86_64"); - - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); - EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_EQ(f->sections.size(), 2UL); - const Section& text = f->sections[0]; - EXPECT_TRUE(text.segmentName.equals("__TEXT")); - EXPECT_TRUE(text.sectionName.equals("__text")); - EXPECT_EQ(text.type, S_REGULAR); - EXPECT_EQ(text.attributes,SectionAttr(S_ATTR_PURE_INSTRUCTIONS - | S_ATTR_SOME_INSTRUCTIONS)); - EXPECT_EQ((uint16_t)text.alignment, 16U); - EXPECT_EQ(text.address, Hex64(0x0)); - EXPECT_EQ(text.content.size(), 45UL); - EXPECT_EQ((int)(text.content[0]), 0x55); - EXPECT_EQ((int)(text.content[1]), 0x48); - EXPECT_TRUE(text.indirectSymbols.empty()); - EXPECT_EQ(text.relocations.size(), 2UL); - const Relocation& call = text.relocations[0]; - EXPECT_EQ(call.offset, Hex32(0x19)); - EXPECT_EQ(call.type, X86_64_RELOC_BRANCH); - EXPECT_EQ(call.length, 2); - EXPECT_EQ(call.isExtern, true); - EXPECT_EQ(call.symbol, 2U); - const Relocation& str = text.relocations[1]; - EXPECT_EQ(str.offset, Hex32(0xB)); - EXPECT_EQ(str.type, X86_64_RELOC_SIGNED); - EXPECT_EQ(str.length, 2); - EXPECT_EQ(str.isExtern, true); - EXPECT_EQ(str.symbol, 0U); - - const Section& cstring = f->sections[1]; - EXPECT_TRUE(cstring.segmentName.equals("__TEXT")); - EXPECT_TRUE(cstring.sectionName.equals("__cstring")); - EXPECT_EQ(cstring.type, S_CSTRING_LITERALS); - EXPECT_EQ(cstring.attributes, SectionAttr(0)); - EXPECT_EQ((uint16_t)cstring.alignment, 1U); - EXPECT_EQ(cstring.address, Hex64(0x02D)); - EXPECT_EQ(cstring.content.size(), 7UL); - EXPECT_EQ((int)(cstring.content[0]), 0x68); - EXPECT_EQ((int)(cstring.content[1]), 0x65); - EXPECT_EQ((int)(cstring.content[2]), 0x6c); - EXPECT_TRUE(cstring.indirectSymbols.empty()); - EXPECT_TRUE(cstring.relocations.empty()); - - EXPECT_EQ(f->localSymbols.size(), 1UL); - const Symbol& strLabel = f->localSymbols[0]; - EXPECT_EQ(strLabel.type, N_SECT); - EXPECT_EQ(strLabel.sect, 2); - EXPECT_EQ(strLabel.value, Hex64(0x2D)); - EXPECT_EQ(f->globalSymbols.size(), 1UL); - const Symbol& mainLabel = f->globalSymbols[0]; - EXPECT_TRUE(mainLabel.name.equals("_main")); - EXPECT_EQ(mainLabel.type, N_SECT); - EXPECT_EQ(mainLabel.sect, 1); - EXPECT_EQ(mainLabel.scope, SymbolScope(N_EXT)); - EXPECT_EQ(mainLabel.value, Hex64(0x0)); - EXPECT_EQ(f->undefinedSymbols.size(), 1UL); - const Symbol& printfLabel = f->undefinedSymbols[0]; - EXPECT_TRUE(printfLabel.name.equals("_printf")); - EXPECT_EQ(printfLabel.type, N_UNDF); - EXPECT_EQ(printfLabel.scope, SymbolScope(N_EXT)); -} - -TEST(BinaryReaderTest, hello_obj_x86) { - FILEBYTES = { - 0xCE, 0xFA, 0xED, 0xFE, 0x07, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x28, 0x01, 0x00, 0x00, - 0x00, 0x20, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x37, 0x00, 0x00, 0x00, 0x44, 0x01, 0x00, 0x00, - 0x37, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x5F, 0x5F, 0x74, 0x65, - 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x5F, 0x5F, 0x54, 0x45, - 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x44, 0x01, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x7C, 0x01, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x80, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x5F, 0x5F, 0x63, 0x73, 0x74, 0x72, 0x69, 0x6E, - 0x67, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x5F, 0x5F, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x74, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x94, 0x01, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0xAC, 0x01, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x55, 0x89, 0xE5, 0x83, - 0xEC, 0x18, 0xE8, 0x00, 0x00, 0x00, 0x00, 0x58, - 0x8D, 0x80, 0x25, 0x00, 0x00, 0x00, 0xC7, 0x45, - 0xFC, 0x00, 0x00, 0x00, 0x00, 0x89, 0x04, 0x24, - 0xE8, 0xDF, 0xFF, 0xFF, 0xFF, 0xB9, 0x00, 0x00, - 0x00, 0x00, 0x89, 0x45, 0xF8, 0x89, 0xC8, 0x83, - 0xC4, 0x18, 0x5D, 0xC3, 0x68, 0x65, 0x6C, 0x6C, - 0x6F, 0x0A, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x0D, 0x0E, 0x00, 0x00, 0xA4, - 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, - 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0F, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x5F, 0x6D, 0x61, - 0x69, 0x6E, 0x00, 0x5F, 0x70, 0x72, 0x69, 0x6E, - 0x74, 0x66, 0x00, 0x00 - }; - std::unique_ptr f = - fromBinary(fileBytes, sizeof(fileBytes), "i386"); - - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86); - EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_EQ(f->sections.size(), 2UL); - const Section& text = f->sections[0]; - EXPECT_TRUE(text.segmentName.equals("__TEXT")); - EXPECT_TRUE(text.sectionName.equals("__text")); - EXPECT_EQ(text.type, S_REGULAR); - EXPECT_EQ(text.attributes,SectionAttr(S_ATTR_PURE_INSTRUCTIONS - | S_ATTR_SOME_INSTRUCTIONS)); - EXPECT_EQ((uint16_t)text.alignment, 16U); - EXPECT_EQ(text.address, Hex64(0x0)); - EXPECT_EQ(text.content.size(), 48UL); - EXPECT_EQ((int)(text.content[0]), 0x55); - EXPECT_EQ((int)(text.content[1]), 0x89); - EXPECT_TRUE(text.indirectSymbols.empty()); - EXPECT_EQ(text.relocations.size(), 3UL); - const Relocation& call = text.relocations[0]; - EXPECT_EQ(call.offset, Hex32(0x1D)); - EXPECT_EQ(call.scattered, false); - EXPECT_EQ(call.type, GENERIC_RELOC_VANILLA); - EXPECT_EQ(call.pcRel, true); - EXPECT_EQ(call.length, 2); - EXPECT_EQ(call.isExtern, true); - EXPECT_EQ(call.symbol, 1U); - const Relocation& sectDiff = text.relocations[1]; - EXPECT_EQ(sectDiff.offset, Hex32(0xE)); - EXPECT_EQ(sectDiff.scattered, true); - EXPECT_EQ(sectDiff.type, GENERIC_RELOC_LOCAL_SECTDIFF); - EXPECT_EQ(sectDiff.pcRel, false); - EXPECT_EQ(sectDiff.length, 2); - EXPECT_EQ(sectDiff.value, 0x30U); - const Relocation& pair = text.relocations[2]; - EXPECT_EQ(pair.offset, Hex32(0x0)); - EXPECT_EQ(pair.scattered, true); - EXPECT_EQ(pair.type, GENERIC_RELOC_PAIR); - EXPECT_EQ(pair.pcRel, false); - EXPECT_EQ(pair.length, 2); - EXPECT_EQ(pair.value, 0x0BU); - - const Section& cstring = f->sections[1]; - EXPECT_TRUE(cstring.segmentName.equals("__TEXT")); - EXPECT_TRUE(cstring.sectionName.equals("__cstring")); - EXPECT_EQ(cstring.type, S_CSTRING_LITERALS); - EXPECT_EQ(cstring.attributes, SectionAttr(0)); - EXPECT_EQ((uint16_t)cstring.alignment, 1U); - EXPECT_EQ(cstring.address, Hex64(0x030)); - EXPECT_EQ(cstring.content.size(), 7UL); - EXPECT_EQ((int)(cstring.content[0]), 0x68); - EXPECT_EQ((int)(cstring.content[1]), 0x65); - EXPECT_EQ((int)(cstring.content[2]), 0x6c); - EXPECT_TRUE(cstring.indirectSymbols.empty()); - EXPECT_TRUE(cstring.relocations.empty()); - - EXPECT_EQ(f->localSymbols.size(), 0UL); - EXPECT_EQ(f->globalSymbols.size(), 1UL); - const Symbol& mainLabel = f->globalSymbols[0]; - EXPECT_TRUE(mainLabel.name.equals("_main")); - EXPECT_EQ(mainLabel.type, N_SECT); - EXPECT_EQ(mainLabel.sect, 1); - EXPECT_EQ(mainLabel.scope, SymbolScope(N_EXT)); - EXPECT_EQ(mainLabel.value, Hex64(0x0)); - EXPECT_EQ(f->undefinedSymbols.size(), 1UL); - const Symbol& printfLabel = f->undefinedSymbols[0]; - EXPECT_TRUE(printfLabel.name.equals("_printf")); - EXPECT_EQ(printfLabel.type, N_UNDF); - EXPECT_EQ(printfLabel.scope, SymbolScope(N_EXT)); -} - -TEST(BinaryReaderTest, hello_obj_armv7) { - FILEBYTES = { - 0xCE, 0xFA, 0xED, 0xFE, 0x0C, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x28, 0x01, 0x00, 0x00, - 0x00, 0x20, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x31, 0x00, 0x00, 0x00, 0x44, 0x01, 0x00, 0x00, - 0x31, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x5F, 0x5F, 0x74, 0x65, - 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x5F, 0x5F, 0x54, 0x45, - 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x2A, 0x00, 0x00, 0x00, 0x44, 0x01, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x78, 0x01, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x80, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x5F, 0x5F, 0x63, 0x73, 0x74, 0x72, 0x69, 0x6E, - 0x67, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x5F, 0x5F, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x2A, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x6E, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x18, 0x00, 0x00, 0x00, 0xA0, 0x01, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0xB8, 0x01, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x80, 0xB5, 0x6F, 0x46, - 0x82, 0xB0, 0x40, 0xF2, 0x18, 0x00, 0xC0, 0xF2, - 0x00, 0x00, 0x78, 0x44, 0x00, 0x21, 0xC0, 0xF2, - 0x00, 0x01, 0x01, 0x91, 0xFF, 0xF7, 0xF2, 0xFF, - 0x00, 0x21, 0xC0, 0xF2, 0x00, 0x01, 0x00, 0x90, - 0x08, 0x46, 0x02, 0xB0, 0x80, 0xBD, 0x68, 0x65, - 0x6C, 0x6C, 0x6F, 0x0A, 0x00, 0x00, 0x00, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x6D, - 0x0A, 0x00, 0x00, 0xB9, 0x2A, 0x00, 0x00, 0x00, - 0x18, 0x00, 0x00, 0xB1, 0x0E, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0xA9, 0x2A, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0xA1, 0x0E, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0F, 0x01, 0x08, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x5F, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x5F, - 0x70, 0x72, 0x69, 0x6E, 0x74, 0x66, 0x00, 0x00 - }; - std::unique_ptr f = - fromBinary(fileBytes, sizeof(fileBytes), "armv7"); - - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7); - EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_EQ(f->sections.size(), 2UL); - const Section& text = f->sections[0]; - EXPECT_TRUE(text.segmentName.equals("__TEXT")); - EXPECT_TRUE(text.sectionName.equals("__text")); - EXPECT_EQ(text.type, S_REGULAR); - EXPECT_EQ(text.attributes,SectionAttr(S_ATTR_PURE_INSTRUCTIONS - | S_ATTR_SOME_INSTRUCTIONS)); - EXPECT_EQ((uint16_t)text.alignment, 4U); - EXPECT_EQ(text.address, Hex64(0x0)); - EXPECT_EQ(text.content.size(), 42UL); - EXPECT_EQ((int)(text.content[0]), 0x80); - EXPECT_EQ((int)(text.content[1]), 0xB5); - EXPECT_TRUE(text.indirectSymbols.empty()); - EXPECT_EQ(text.relocations.size(), 5UL); - const Relocation& call = text.relocations[0]; - EXPECT_EQ(call.offset, Hex32(0x18)); - EXPECT_EQ(call.scattered, false); - EXPECT_EQ(call.type, ARM_THUMB_RELOC_BR22); - EXPECT_EQ(call.length, 2); - EXPECT_EQ(call.isExtern, true); - EXPECT_EQ(call.symbol, 1U); - const Relocation& movt = text.relocations[1]; - EXPECT_EQ(movt.offset, Hex32(0xA)); - EXPECT_EQ(movt.scattered, true); - EXPECT_EQ(movt.type, ARM_RELOC_HALF_SECTDIFF); - EXPECT_EQ(movt.length, 3); - EXPECT_EQ(movt.value, Hex32(0x2A)); - const Relocation& movtPair = text.relocations[2]; - EXPECT_EQ(movtPair.offset, Hex32(0x18)); - EXPECT_EQ(movtPair.scattered, true); - EXPECT_EQ(movtPair.type, ARM_RELOC_PAIR); - EXPECT_EQ(movtPair.length, 3); - EXPECT_EQ(movtPair.value, Hex32(0xE)); - const Relocation& movw = text.relocations[3]; - EXPECT_EQ(movw.offset, Hex32(0x6)); - EXPECT_EQ(movw.scattered, true); - EXPECT_EQ(movw.type, ARM_RELOC_HALF_SECTDIFF); - EXPECT_EQ(movw.length, 2); - EXPECT_EQ(movw.value, Hex32(0x2A)); - const Relocation& movwPair = text.relocations[4]; - EXPECT_EQ(movwPair.offset, Hex32(0x0)); - EXPECT_EQ(movwPair.scattered, true); - EXPECT_EQ(movwPair.type, ARM_RELOC_PAIR); - EXPECT_EQ(movwPair.length, 2); - EXPECT_EQ(movwPair.value, Hex32(0xE)); - - const Section& cstring = f->sections[1]; - EXPECT_TRUE(cstring.segmentName.equals("__TEXT")); - EXPECT_TRUE(cstring.sectionName.equals("__cstring")); - EXPECT_EQ(cstring.type, S_CSTRING_LITERALS); - EXPECT_EQ(cstring.attributes, SectionAttr(0)); - EXPECT_EQ((uint16_t)cstring.alignment, 1U); - EXPECT_EQ(cstring.address, Hex64(0x02A)); - EXPECT_EQ(cstring.content.size(), 7UL); - EXPECT_EQ((int)(cstring.content[0]), 0x68); - EXPECT_EQ((int)(cstring.content[1]), 0x65); - EXPECT_EQ((int)(cstring.content[2]), 0x6c); - EXPECT_TRUE(cstring.indirectSymbols.empty()); - EXPECT_TRUE(cstring.relocations.empty()); - - EXPECT_EQ(f->localSymbols.size(), 0UL); - EXPECT_EQ(f->globalSymbols.size(), 1UL); - const Symbol& mainLabel = f->globalSymbols[0]; - EXPECT_TRUE(mainLabel.name.equals("_main")); - EXPECT_EQ(mainLabel.type, N_SECT); - EXPECT_EQ(mainLabel.sect, 1); - EXPECT_EQ(mainLabel.scope, SymbolScope(N_EXT)); - EXPECT_EQ(mainLabel.value, Hex64(0x0)); - EXPECT_EQ(f->undefinedSymbols.size(), 1UL); - const Symbol& printfLabel = f->undefinedSymbols[0]; - EXPECT_TRUE(printfLabel.name.equals("_printf")); - EXPECT_EQ(printfLabel.type, N_UNDF); - EXPECT_EQ(printfLabel.scope, SymbolScope(N_EXT)); -} - -TEST(BinaryReaderTest, hello_obj_ppc) { - FILEBYTES = { - 0xFE, 0xED, 0xFA, 0xCE, 0x00, 0x00, 0x00, 0x12, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x01, 0x28, - 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x4B, 0x00, 0x00, 0x01, 0x44, - 0x00, 0x00, 0x00, 0x4B, 0x00, 0x00, 0x00, 0x07, - 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x02, - 0x00, 0x00, 0x00, 0x00, 0x5F, 0x5F, 0x74, 0x65, - 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x5F, 0x5F, 0x54, 0x45, - 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x01, 0x44, - 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x01, 0x90, - 0x00, 0x00, 0x00, 0x05, 0x80, 0x00, 0x04, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x5F, 0x5F, 0x63, 0x73, 0x74, 0x72, 0x69, 0x6E, - 0x67, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x5F, 0x5F, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x07, - 0x00, 0x00, 0x01, 0x88, 0x00, 0x00, 0x00, 0x02, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, - 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x01, 0xB8, - 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x01, 0xD0, - 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0B, - 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x7C, 0x08, 0x02, 0xA6, - 0xBF, 0xC1, 0xFF, 0xF8, 0x90, 0x01, 0x00, 0x08, - 0x94, 0x21, 0xFF, 0xB0, 0x7C, 0x3E, 0x0B, 0x78, - 0x42, 0x9F, 0x00, 0x05, 0x7F, 0xE8, 0x02, 0xA6, - 0x3C, 0x5F, 0x00, 0x00, 0x38, 0x62, 0x00, 0x2C, - 0x4B, 0xFF, 0xFF, 0xDD, 0x38, 0x00, 0x00, 0x00, - 0x7C, 0x03, 0x03, 0x78, 0x80, 0x21, 0x00, 0x00, - 0x80, 0x01, 0x00, 0x08, 0x7C, 0x08, 0x03, 0xA6, - 0xBB, 0xC1, 0xFF, 0xF8, 0x4E, 0x80, 0x00, 0x20, - 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x01, 0xD3, - 0xAB, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x44, - 0xA1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, - 0xAC, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x44, - 0xA1, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x18, - 0x00, 0x00, 0x00, 0x01, 0x0F, 0x01, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x5F, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x5F, - 0x70, 0x72, 0x69, 0x6E, 0x74, 0x66, 0x00, 0x00 - }; - std::unique_ptr f = - fromBinary(fileBytes, sizeof(fileBytes), "ppc"); - - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_ppc); - EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_EQ(f->sections.size(), 2UL); - const Section& text = f->sections[0]; - EXPECT_TRUE(text.segmentName.equals("__TEXT")); - EXPECT_TRUE(text.sectionName.equals("__text")); - EXPECT_EQ(text.type, S_REGULAR); - EXPECT_EQ(text.attributes,SectionAttr(S_ATTR_PURE_INSTRUCTIONS - | S_ATTR_SOME_INSTRUCTIONS)); - EXPECT_EQ((uint16_t)text.alignment, 4U); - EXPECT_EQ(text.address, Hex64(0x0)); - EXPECT_EQ(text.content.size(), 68UL); - EXPECT_EQ((int)(text.content[0]), 0x7C); - EXPECT_EQ((int)(text.content[1]), 0x08); - EXPECT_TRUE(text.indirectSymbols.empty()); - EXPECT_EQ(text.relocations.size(), 5UL); - const Relocation& bl = text.relocations[0]; - EXPECT_EQ(bl.offset, Hex32(0x24)); - EXPECT_EQ(bl.type, PPC_RELOC_BR24); - EXPECT_EQ(bl.length, 2); - EXPECT_EQ(bl.isExtern, true); - EXPECT_EQ(bl.symbol, 1U); - const Relocation& lo = text.relocations[1]; - EXPECT_EQ(lo.offset, Hex32(0x20)); - EXPECT_EQ(lo.scattered, true); - EXPECT_EQ(lo.type, PPC_RELOC_LO16_SECTDIFF); - EXPECT_EQ(lo.length, 2); - EXPECT_EQ(lo.value, Hex32(0x44)); - const Relocation& loPair = text.relocations[2]; - EXPECT_EQ(loPair.offset, Hex32(0x0)); - EXPECT_EQ(loPair.scattered, true); - EXPECT_EQ(loPair.type, PPC_RELOC_PAIR); - EXPECT_EQ(loPair.length, 2); - EXPECT_EQ(loPair.value, Hex32(0x18)); - const Relocation& ha = text.relocations[3]; - EXPECT_EQ(ha.offset, Hex32(0x1C)); - EXPECT_EQ(ha.scattered, true); - EXPECT_EQ(ha.type, PPC_RELOC_HA16_SECTDIFF); - EXPECT_EQ(ha.length, 2); - EXPECT_EQ(ha.value, Hex32(0x44)); - const Relocation& haPair = text.relocations[4]; - EXPECT_EQ(haPair.offset, Hex32(0x2c)); - EXPECT_EQ(haPair.scattered, true); - EXPECT_EQ(haPair.type, PPC_RELOC_PAIR); - EXPECT_EQ(haPair.length, 2); - EXPECT_EQ(haPair.value, Hex32(0x18)); - - const Section& cstring = f->sections[1]; - EXPECT_TRUE(cstring.segmentName.equals("__TEXT")); - EXPECT_TRUE(cstring.sectionName.equals("__cstring")); - EXPECT_EQ(cstring.type, S_CSTRING_LITERALS); - EXPECT_EQ(cstring.attributes, SectionAttr(0)); - EXPECT_EQ((uint16_t)cstring.alignment, 4U); - EXPECT_EQ(cstring.address, Hex64(0x044)); - EXPECT_EQ(cstring.content.size(), 7UL); - EXPECT_EQ((int)(cstring.content[0]), 0x68); - EXPECT_EQ((int)(cstring.content[1]), 0x65); - EXPECT_EQ((int)(cstring.content[2]), 0x6c); - EXPECT_TRUE(cstring.indirectSymbols.empty()); - EXPECT_TRUE(cstring.relocations.empty()); - - EXPECT_EQ(f->localSymbols.size(), 0UL); - EXPECT_EQ(f->globalSymbols.size(), 1UL); - const Symbol& mainLabel = f->globalSymbols[0]; - EXPECT_TRUE(mainLabel.name.equals("_main")); - EXPECT_EQ(mainLabel.type, N_SECT); - EXPECT_EQ(mainLabel.sect, 1); - EXPECT_EQ(mainLabel.scope, SymbolScope(N_EXT)); - EXPECT_EQ(mainLabel.value, Hex64(0x0)); - EXPECT_EQ(f->undefinedSymbols.size(), 1UL); - const Symbol& printfLabel = f->undefinedSymbols[0]; - EXPECT_TRUE(printfLabel.name.equals("_printf")); - EXPECT_EQ(printfLabel.type, N_UNDF); - EXPECT_EQ(printfLabel.scope, SymbolScope(N_EXT)); - - SmallString<128> tmpFl; - std::error_code ec = - llvm::sys::fs::createTemporaryFile(Twine("xx"), "o", tmpFl); - EXPECT_FALSE(ec); - llvm::Error ec2 = writeBinary(*f, tmpFl); - EXPECT_FALSE(ec2); - llvm::sys::fs::remove(tmpFl); -} diff --git a/lld/unittests/MachOTests/MachONormalizedFileBinaryWriterTests.cpp b/lld/unittests/MachOTests/MachONormalizedFileBinaryWriterTests.cpp deleted file mode 100644 index f2314da28a4f..000000000000 --- a/lld/unittests/MachOTests/MachONormalizedFileBinaryWriterTests.cpp +++ /dev/null @@ -1,695 +0,0 @@ -//===- lld/unittest/MachOTests/MachONormalizedFileBinaryWriterTests.cpp ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "../../lib/ReaderWriter/MachO/MachONormalizedFile.h" -#include "llvm/ADT/Twine.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Support/FileSystem.h" -#include "gtest/gtest.h" -#include -#include -#include -#include - -using llvm::StringRef; -using llvm::MemoryBuffer; -using llvm::SmallString; -using llvm::Twine; -using llvm::ErrorOr; -using namespace llvm::MachO; -using namespace lld::mach_o::normalized; - -// Parses binary mach-o file at specified path and returns -// ownership of buffer to mb parameter and ownership of -// Normalized file to nf parameter. -static void fromBinary(StringRef path, std::unique_ptr &mb, - std::unique_ptr &nf, StringRef archStr) { - ErrorOr> mbOrErr = MemoryBuffer::getFile(path); - std::error_code ec = mbOrErr.getError(); - EXPECT_FALSE(ec); - mb = std::move(mbOrErr.get()); - - llvm::Expected> r = - lld::mach_o::normalized::readBinary( - mb, lld::MachOLinkingContext::archFromName(archStr)); - EXPECT_FALSE(!r); - nf.reset(r->release()); -} - -static Relocation -makeReloc(unsigned addr, bool rel, bool ext, RelocationInfoType type, - unsigned sym) { - Relocation result; - result.offset = addr; - result.scattered = false; - result.type = type; - result.length = 2; - result.pcRel = rel; - result.isExtern = ext; - result.value = 0; - result.symbol = sym; - return result; -} - -static Relocation -makeScatReloc(unsigned addr, RelocationInfoType type, unsigned value) { - Relocation result; - result.offset = addr; - result.scattered = true; - result.type = type; - result.length = 2; - result.pcRel = false; - result.isExtern = true; - result.value = value; - result.symbol = 0; - return result; -} - -static Symbol -makeUndefSymbol(StringRef name) { - Symbol sym; - sym.name = name; - sym.type = N_UNDF; - sym.scope = N_EXT; - sym.sect = NO_SECT; - sym.desc = 0; - sym.value = 0; - return sym; -} - - -static Symbol -makeSymbol(StringRef name, unsigned addr) { - Symbol sym; - sym.name = name; - sym.type = N_SECT; - sym.scope = N_EXT; - sym.sect = 1; - sym.desc = 0; - sym.value = addr; - return sym; -} - -static Symbol -makeThumbSymbol(StringRef name, unsigned addr) { - Symbol sym; - sym.name = name; - sym.type = N_SECT; - sym.scope = N_EXT; - sym.sect = 1; - sym.desc = N_ARM_THUMB_DEF; - sym.value = addr; - return sym; -} - -TEST(BinaryWriterTest, obj_relocs_x86_64) { - SmallString<128> tmpFl; - { - NormalizedFile f; - f.arch = lld::MachOLinkingContext::arch_x86_64; - f.fileType = MH_OBJECT; - f.flags = MH_SUBSECTIONS_VIA_SYMBOLS; - f.os = lld::MachOLinkingContext::OS::macOSX; - f.sections.resize(1); - Section& text = f.sections.front(); - text.segmentName = "__TEXT"; - text.sectionName = "__text"; - text.type = S_REGULAR; - text.attributes = SectionAttr(S_ATTR_PURE_INSTRUCTIONS - | S_ATTR_SOME_INSTRUCTIONS); - text.alignment = 16; - text.address = 0; - const uint8_t textBytes[] = { - 0xe8, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8b, 0x05, - 0x00, 0x00, 0x00, 0x00, 0xff, 0x35, 0x00, 0x00, - 0x00, 0x00, 0x8b, 0x05, 0x00, 0x00, 0x00, 0x00, - 0xc6, 0x05, 0xff, 0xff, 0xff, 0xff, 0x12, 0xc7, - 0x05, 0xfc, 0xff, 0xff, 0xff, 0x78, 0x56, 0x34, - 0x12, 0x48, 0x8b, 0x3d, 0x00, 0x00, 0x00, 0x00 }; - - text.content = llvm::makeArrayRef(textBytes, sizeof(textBytes)); - text.relocations.push_back(makeReloc(0x01, false, true, X86_64_RELOC_BRANCH, 1)); - text.relocations.push_back(makeReloc(0x08, false, true, X86_64_RELOC_GOT_LOAD, 1)); - text.relocations.push_back(makeReloc(0x0E, false, true, X86_64_RELOC_GOT, 1)); - text.relocations.push_back(makeReloc(0x14, false, true, X86_64_RELOC_SIGNED, 1)); - text.relocations.push_back(makeReloc(0x1A, false, true, X86_64_RELOC_SIGNED_1, 1)); - text.relocations.push_back(makeReloc(0x21, false, true, X86_64_RELOC_SIGNED_4, 1)); - text.relocations.push_back(makeReloc(0x2C, false, true, X86_64_RELOC_TLV, 2)); - - f.undefinedSymbols.push_back(makeUndefSymbol("_bar")); - f.undefinedSymbols.push_back(makeUndefSymbol("_tbar")); - - std::error_code ec = - llvm::sys::fs::createTemporaryFile(Twine("xx"), "o", tmpFl); - EXPECT_FALSE(ec); - llvm::Error ec2 = writeBinary(f, tmpFl); - EXPECT_FALSE(ec2); - } - - std::unique_ptr bufferOwner; - std::unique_ptr f2; - fromBinary(tmpFl, bufferOwner, f2, "x86_64"); - - EXPECT_EQ(lld::MachOLinkingContext::arch_x86_64, f2->arch); - EXPECT_EQ(MH_OBJECT, f2->fileType); - EXPECT_EQ(FileFlags(MH_SUBSECTIONS_VIA_SYMBOLS), f2->flags); - - EXPECT_TRUE(f2->localSymbols.empty()); - EXPECT_TRUE(f2->globalSymbols.empty()); - EXPECT_EQ(2UL, f2->undefinedSymbols.size()); - const Symbol& barUndef = f2->undefinedSymbols[0]; - EXPECT_TRUE(barUndef.name.equals("_bar")); - EXPECT_EQ(N_UNDF, barUndef.type); - EXPECT_EQ(SymbolScope(N_EXT), barUndef.scope); - const Symbol& tbarUndef = f2->undefinedSymbols[1]; - EXPECT_TRUE(tbarUndef.name.equals("_tbar")); - EXPECT_EQ(N_UNDF, tbarUndef.type); - EXPECT_EQ(SymbolScope(N_EXT), tbarUndef.scope); - - EXPECT_EQ(1UL, f2->sections.size()); - const Section& text = f2->sections[0]; - EXPECT_TRUE(text.segmentName.equals("__TEXT")); - EXPECT_TRUE(text.sectionName.equals("__text")); - EXPECT_EQ(S_REGULAR, text.type); - EXPECT_EQ(text.attributes,SectionAttr(S_ATTR_PURE_INSTRUCTIONS - | S_ATTR_SOME_INSTRUCTIONS)); - EXPECT_EQ((uint16_t)text.alignment, 16U); - EXPECT_EQ(text.address, Hex64(0x0)); - EXPECT_EQ(48UL, text.content.size()); - const Relocation& call = text.relocations[0]; - EXPECT_EQ(call.offset, Hex32(0x1)); - EXPECT_EQ(call.type, X86_64_RELOC_BRANCH); - EXPECT_EQ(call.length, 2); - EXPECT_EQ(call.isExtern, true); - EXPECT_EQ(call.symbol, 1U); - const Relocation& gotLoad = text.relocations[1]; - EXPECT_EQ(gotLoad.offset, Hex32(0x8)); - EXPECT_EQ(gotLoad.type, X86_64_RELOC_GOT_LOAD); - EXPECT_EQ(gotLoad.length, 2); - EXPECT_EQ(gotLoad.isExtern, true); - EXPECT_EQ(gotLoad.symbol, 1U); - const Relocation& gotUse = text.relocations[2]; - EXPECT_EQ(gotUse.offset, Hex32(0xE)); - EXPECT_EQ(gotUse.type, X86_64_RELOC_GOT); - EXPECT_EQ(gotUse.length, 2); - EXPECT_EQ(gotUse.isExtern, true); - EXPECT_EQ(gotUse.symbol, 1U); - const Relocation& signed0 = text.relocations[3]; - EXPECT_EQ(signed0.offset, Hex32(0x14)); - EXPECT_EQ(signed0.type, X86_64_RELOC_SIGNED); - EXPECT_EQ(signed0.length, 2); - EXPECT_EQ(signed0.isExtern, true); - EXPECT_EQ(signed0.symbol, 1U); - const Relocation& signed1 = text.relocations[4]; - EXPECT_EQ(signed1.offset, Hex32(0x1A)); - EXPECT_EQ(signed1.type, X86_64_RELOC_SIGNED_1); - EXPECT_EQ(signed1.length, 2); - EXPECT_EQ(signed1.isExtern, true); - EXPECT_EQ(signed1.symbol, 1U); - const Relocation& signed4 = text.relocations[5]; - EXPECT_EQ(signed4.offset, Hex32(0x21)); - EXPECT_EQ(signed4.type, X86_64_RELOC_SIGNED_4); - EXPECT_EQ(signed4.length, 2); - EXPECT_EQ(signed4.isExtern, true); - EXPECT_EQ(signed4.symbol, 1U); - - bufferOwner.reset(nullptr); - std::error_code ec = llvm::sys::fs::remove(Twine(tmpFl)); - EXPECT_FALSE(ec); -} - - - -TEST(BinaryWriterTest, obj_relocs_x86) { - SmallString<128> tmpFl; - { - NormalizedFile f; - f.arch = lld::MachOLinkingContext::arch_x86; - f.fileType = MH_OBJECT; - f.flags = MH_SUBSECTIONS_VIA_SYMBOLS; - f.os = lld::MachOLinkingContext::OS::macOSX; - f.sections.resize(1); - Section& text = f.sections.front(); - text.segmentName = "__TEXT"; - text.sectionName = "__text"; - text.type = S_REGULAR; - text.attributes = SectionAttr(S_ATTR_PURE_INSTRUCTIONS - | S_ATTR_SOME_INSTRUCTIONS); - text.alignment = 16; - text.address = 0; - const uint8_t textBytes[] = { - 0xe8, 0xfb, 0xff, 0xff, 0xff, 0xa1, 0x00, 0x00, - 0x00, 0x00, 0x8b, 0xb0, 0xfb, 0xff, 0xff, 0xff, - 0x8b, 0x80, 0x11, 0x00, 0x00, 0x00 }; - - text.content = llvm::makeArrayRef(textBytes, sizeof(textBytes)); - text.relocations.push_back(makeReloc(0x01, true, true, GENERIC_RELOC_VANILLA, 0)); - text.relocations.push_back(makeReloc(0x06, false, true, GENERIC_RELOC_VANILLA, 0)); - text.relocations.push_back(makeScatReloc(0x0c, GENERIC_RELOC_LOCAL_SECTDIFF, 0)); - text.relocations.push_back(makeScatReloc(0x0, GENERIC_RELOC_PAIR, 5)); - text.relocations.push_back(makeReloc(0x12, true, true, GENERIC_RELOC_TLV, 1)); - - f.undefinedSymbols.push_back(makeUndefSymbol("_bar")); - f.undefinedSymbols.push_back(makeUndefSymbol("_tbar")); - - std::error_code ec = - llvm::sys::fs::createTemporaryFile(Twine("xx"), "o", tmpFl); - EXPECT_FALSE(ec); - llvm::Error ec2 = writeBinary(f, tmpFl); - EXPECT_FALSE(ec2); - } - std::unique_ptr bufferOwner; - std::unique_ptr f2; - fromBinary(tmpFl, bufferOwner, f2, "i386"); - - EXPECT_EQ(lld::MachOLinkingContext::arch_x86, f2->arch); - EXPECT_EQ(MH_OBJECT, f2->fileType); - EXPECT_EQ(FileFlags(MH_SUBSECTIONS_VIA_SYMBOLS), f2->flags); - - EXPECT_TRUE(f2->localSymbols.empty()); - EXPECT_TRUE(f2->globalSymbols.empty()); - EXPECT_EQ(2UL, f2->undefinedSymbols.size()); - const Symbol& barUndef = f2->undefinedSymbols[0]; - EXPECT_TRUE(barUndef.name.equals("_bar")); - EXPECT_EQ(N_UNDF, barUndef.type); - EXPECT_EQ(SymbolScope(N_EXT), barUndef.scope); - const Symbol& tbarUndef = f2->undefinedSymbols[1]; - EXPECT_TRUE(tbarUndef.name.equals("_tbar")); - EXPECT_EQ(N_UNDF, tbarUndef.type); - EXPECT_EQ(SymbolScope(N_EXT), tbarUndef.scope); - - EXPECT_EQ(1UL, f2->sections.size()); - const Section& text = f2->sections[0]; - EXPECT_TRUE(text.segmentName.equals("__TEXT")); - EXPECT_TRUE(text.sectionName.equals("__text")); - EXPECT_EQ(S_REGULAR, text.type); - EXPECT_EQ(text.attributes,SectionAttr(S_ATTR_PURE_INSTRUCTIONS - | S_ATTR_SOME_INSTRUCTIONS)); - EXPECT_EQ((uint16_t)text.alignment, 16U); - EXPECT_EQ(text.address, Hex64(0x0)); - EXPECT_EQ(22UL, text.content.size()); - const Relocation& call = text.relocations[0]; - EXPECT_EQ(call.offset, Hex32(0x1)); - EXPECT_EQ(call.scattered, false); - EXPECT_EQ(call.type, GENERIC_RELOC_VANILLA); - EXPECT_EQ(call.pcRel, true); - EXPECT_EQ(call.length, 2); - EXPECT_EQ(call.isExtern, true); - EXPECT_EQ(call.symbol, 0U); - const Relocation& absLoad = text.relocations[1]; - EXPECT_EQ(absLoad.offset, Hex32(0x6)); - EXPECT_EQ(absLoad.scattered, false); - EXPECT_EQ(absLoad.type, GENERIC_RELOC_VANILLA); - EXPECT_EQ(absLoad.pcRel, false); - EXPECT_EQ(absLoad.length, 2); - EXPECT_EQ(absLoad.isExtern, true); - EXPECT_EQ(absLoad.symbol,0U); - const Relocation& pic1 = text.relocations[2]; - EXPECT_EQ(pic1.offset, Hex32(0xc)); - EXPECT_EQ(pic1.scattered, true); - EXPECT_EQ(pic1.type, GENERIC_RELOC_LOCAL_SECTDIFF); - EXPECT_EQ(pic1.length, 2); - EXPECT_EQ(pic1.value, 0U); - const Relocation& pic2 = text.relocations[3]; - EXPECT_EQ(pic2.offset, Hex32(0x0)); - EXPECT_EQ(pic1.scattered, true); - EXPECT_EQ(pic2.type, GENERIC_RELOC_PAIR); - EXPECT_EQ(pic2.length, 2); - EXPECT_EQ(pic2.value, 5U); - const Relocation& tlv = text.relocations[4]; - EXPECT_EQ(tlv.offset, Hex32(0x12)); - EXPECT_EQ(tlv.type, GENERIC_RELOC_TLV); - EXPECT_EQ(tlv.length, 2); - EXPECT_EQ(tlv.isExtern, true); - EXPECT_EQ(tlv.symbol, 1U); - - // lld::errs() << "temp = " << tmpFl << "\n"; - bufferOwner.reset(nullptr); - std::error_code ec = llvm::sys::fs::remove(Twine(tmpFl)); - EXPECT_FALSE(ec); -} - - - -TEST(BinaryWriterTest, obj_relocs_armv7) { - SmallString<128> tmpFl; - { - NormalizedFile f; - f.arch = lld::MachOLinkingContext::arch_armv7; - f.fileType = MH_OBJECT; - f.flags = MH_SUBSECTIONS_VIA_SYMBOLS; - f.os = lld::MachOLinkingContext::OS::macOSX; - f.sections.resize(1); - Section& text = f.sections.front(); - text.segmentName = "__TEXT"; - text.sectionName = "__text"; - text.type = S_REGULAR; - text.attributes = SectionAttr(S_ATTR_PURE_INSTRUCTIONS - | S_ATTR_SOME_INSTRUCTIONS); - text.alignment = 4; - text.address = 0; - const uint8_t textBytes[] = { - 0xff, 0xf7, 0xfe, 0xef, 0x40, 0xf2, 0x05, 0x01, - 0xc0, 0xf2, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, - 0x00, 0xbf }; - - text.content = llvm::makeArrayRef(textBytes, sizeof(textBytes)); - text.relocations.push_back(makeReloc(0x00, true, true, - ARM_THUMB_RELOC_BR22, 2)); - text.relocations.push_back(makeScatReloc(0x04, - ARM_RELOC_HALF_SECTDIFF, 0x10)); - text.relocations.push_back(makeScatReloc(0x00, - ARM_RELOC_PAIR, 0xC)); - text.relocations.push_back(makeScatReloc(0x08, - ARM_RELOC_HALF_SECTDIFF, 0x10)); - text.relocations.push_back(makeScatReloc(0x00, - ARM_RELOC_PAIR, 0xC)); - text.relocations.push_back(makeReloc(0x0C, false, true, - ARM_RELOC_VANILLA, 2)); - - f.globalSymbols.push_back(makeThumbSymbol("_foo", 0x00)); - f.globalSymbols.push_back(makeThumbSymbol("_foo2", 0x10)); - f.undefinedSymbols.push_back(makeUndefSymbol("_bar")); - - std::error_code ec = - llvm::sys::fs::createTemporaryFile(Twine("xx"), "o", tmpFl); - EXPECT_FALSE(ec); - llvm::Error ec2 = writeBinary(f, tmpFl); - EXPECT_FALSE(ec2); - } - std::unique_ptr bufferOwner; - std::unique_ptr f2; - fromBinary(tmpFl, bufferOwner, f2, "armv7"); - - EXPECT_EQ(lld::MachOLinkingContext::arch_armv7, f2->arch); - EXPECT_EQ(MH_OBJECT, f2->fileType); - EXPECT_EQ(FileFlags(MH_SUBSECTIONS_VIA_SYMBOLS), f2->flags); - - EXPECT_TRUE(f2->localSymbols.empty()); - EXPECT_EQ(2UL, f2->globalSymbols.size()); - const Symbol& fooDef = f2->globalSymbols[0]; - EXPECT_TRUE(fooDef.name.equals("_foo")); - EXPECT_EQ(N_SECT, fooDef.type); - EXPECT_EQ(1, fooDef.sect); - EXPECT_EQ(SymbolScope(N_EXT), fooDef.scope); - const Symbol& foo2Def = f2->globalSymbols[1]; - EXPECT_TRUE(foo2Def.name.equals("_foo2")); - EXPECT_EQ(N_SECT, foo2Def.type); - EXPECT_EQ(1, foo2Def.sect); - EXPECT_EQ(SymbolScope(N_EXT), foo2Def.scope); - - EXPECT_EQ(1UL, f2->undefinedSymbols.size()); - const Symbol& barUndef = f2->undefinedSymbols[0]; - EXPECT_TRUE(barUndef.name.equals("_bar")); - EXPECT_EQ(N_UNDF, barUndef.type); - EXPECT_EQ(SymbolScope(N_EXT), barUndef.scope); - - EXPECT_EQ(1UL, f2->sections.size()); - const Section& text = f2->sections[0]; - EXPECT_TRUE(text.segmentName.equals("__TEXT")); - EXPECT_TRUE(text.sectionName.equals("__text")); - EXPECT_EQ(S_REGULAR, text.type); - EXPECT_EQ(text.attributes,SectionAttr(S_ATTR_PURE_INSTRUCTIONS - | S_ATTR_SOME_INSTRUCTIONS)); - EXPECT_EQ((uint16_t)text.alignment, 4U); - EXPECT_EQ(text.address, Hex64(0x0)); - EXPECT_EQ(18UL, text.content.size()); - const Relocation& blx = text.relocations[0]; - EXPECT_EQ(blx.offset, Hex32(0x0)); - EXPECT_EQ(blx.scattered, false); - EXPECT_EQ(blx.type, ARM_THUMB_RELOC_BR22); - EXPECT_EQ(blx.pcRel, true); - EXPECT_EQ(blx.length, 2); - EXPECT_EQ(blx.isExtern, true); - EXPECT_EQ(blx.symbol, 2U); - const Relocation& movw1 = text.relocations[1]; - EXPECT_EQ(movw1.offset, Hex32(0x4)); - EXPECT_EQ(movw1.scattered, true); - EXPECT_EQ(movw1.type, ARM_RELOC_HALF_SECTDIFF); - EXPECT_EQ(movw1.length, 2); - EXPECT_EQ(movw1.value, 0x10U); - const Relocation& movw2 = text.relocations[2]; - EXPECT_EQ(movw2.offset, Hex32(0x0)); - EXPECT_EQ(movw2.scattered, true); - EXPECT_EQ(movw2.type, ARM_RELOC_PAIR); - EXPECT_EQ(movw2.length, 2); - EXPECT_EQ(movw2.value, Hex32(0xC)); - const Relocation& movt1 = text.relocations[3]; - EXPECT_EQ(movt1.offset, Hex32(0x8)); - EXPECT_EQ(movt1.scattered, true); - EXPECT_EQ(movt1.type, ARM_RELOC_HALF_SECTDIFF); - EXPECT_EQ(movt1.length, 2); - EXPECT_EQ(movt1.value, Hex32(0x10)); - const Relocation& movt2 = text.relocations[4]; - EXPECT_EQ(movt2.offset, Hex32(0x0)); - EXPECT_EQ(movt2.scattered, true); - EXPECT_EQ(movt2.type, ARM_RELOC_PAIR); - EXPECT_EQ(movt2.length, 2); - EXPECT_EQ(movt2.value, Hex32(0xC)); - const Relocation& absPointer = text.relocations[5]; - EXPECT_EQ(absPointer.offset, Hex32(0xC)); - EXPECT_EQ(absPointer.type, ARM_RELOC_VANILLA); - EXPECT_EQ(absPointer.length, 2); - EXPECT_EQ(absPointer.isExtern, true); - EXPECT_EQ(absPointer.symbol, 2U); - - // lld::errs() << "temp = " << tmpFl << "\n"; - bufferOwner.reset(nullptr); - std::error_code ec = llvm::sys::fs::remove(Twine(tmpFl)); - EXPECT_FALSE(ec); -} - - - -TEST(BinaryWriterTest, obj_relocs_ppc) { - SmallString<128> tmpFl; - { - NormalizedFile f; - f.arch = lld::MachOLinkingContext::arch_ppc; - f.fileType = MH_OBJECT; - f.flags = MH_SUBSECTIONS_VIA_SYMBOLS; - f.os = lld::MachOLinkingContext::OS::macOSX; - f.sections.resize(1); - Section& text = f.sections.front(); - text.segmentName = "__TEXT"; - text.sectionName = "__text"; - text.type = S_REGULAR; - text.attributes = SectionAttr(S_ATTR_PURE_INSTRUCTIONS - | S_ATTR_SOME_INSTRUCTIONS); - text.alignment = 4; - text.address = 0; - const uint8_t textBytes[] = { - 0x48, 0x00, 0x00, 0x01, 0x40, 0x82, 0xff, 0xfc, - 0x3c, 0x62, 0x00, 0x00, 0x3c, 0x62, 0x00, 0x00, - 0x80, 0x63, 0x00, 0x24, 0x80, 0x63, 0x00, 0x24, - 0x3c, 0x40, 0x00, 0x00, 0x3c, 0x60, 0x00, 0x00, - 0x80, 0x42, 0x00, 0x28, 0x80, 0x63, 0x00, 0x28, - 0x60, 0x00, 0x00, 0x00 }; - - text.content = llvm::makeArrayRef(textBytes, sizeof(textBytes)); - text.relocations.push_back(makeReloc(0x00, true, true, - PPC_RELOC_BR24, 2)); - text.relocations.push_back(makeReloc(0x04, true, true, - PPC_RELOC_BR14, 2)); - text.relocations.push_back(makeScatReloc(0x08, - PPC_RELOC_HI16_SECTDIFF, 0x28)); - text.relocations.push_back(makeScatReloc(0x24, - PPC_RELOC_PAIR, 0x4)); - text.relocations.push_back(makeScatReloc(0x0C, - PPC_RELOC_HA16_SECTDIFF, 0x28)); - text.relocations.push_back(makeScatReloc(0x24, - PPC_RELOC_PAIR, 0x4)); - text.relocations.push_back(makeScatReloc(0x10, - PPC_RELOC_LO16_SECTDIFF, 0x28)); - text.relocations.push_back(makeScatReloc(0x00, - PPC_RELOC_PAIR, 0x4)); - text.relocations.push_back(makeScatReloc(0x14, - PPC_RELOC_LO14_SECTDIFF, 0x28)); - text.relocations.push_back(makeScatReloc(0x00, - PPC_RELOC_PAIR, 0x4)); - text.relocations.push_back(makeReloc(0x18, false, false, - PPC_RELOC_HI16, 1)); - text.relocations.push_back(makeReloc(0x28, false, false, - PPC_RELOC_PAIR, 0)); - text.relocations.push_back(makeReloc(0x1C, false, false, - PPC_RELOC_HA16, 1)); - text.relocations.push_back(makeReloc(0x28, false, false, - PPC_RELOC_PAIR, 0)); - text.relocations.push_back(makeReloc(0x20, false, false, - PPC_RELOC_LO16, 1)); - text.relocations.push_back(makeReloc(0x00, false, false, - PPC_RELOC_PAIR, 0)); - text.relocations.push_back(makeReloc(0x24, false, false, - PPC_RELOC_LO14, 1)); - text.relocations.push_back(makeReloc(0x00, false, false, - PPC_RELOC_PAIR, 0)); - - f.globalSymbols.push_back(makeSymbol("_foo", 0x00)); - f.globalSymbols.push_back(makeSymbol("_foo2", 0x28)); - f.undefinedSymbols.push_back(makeUndefSymbol("_bar")); - - std::error_code ec = - llvm::sys::fs::createTemporaryFile(Twine("xx"), "o", tmpFl); - EXPECT_FALSE(ec); - llvm::Error ec2 = writeBinary(f, tmpFl); - EXPECT_FALSE(ec2); - } - std::unique_ptr bufferOwner; - std::unique_ptr f2; - fromBinary(tmpFl, bufferOwner, f2, "ppc"); - - EXPECT_EQ(lld::MachOLinkingContext::arch_ppc, f2->arch); - EXPECT_EQ(MH_OBJECT, f2->fileType); - EXPECT_EQ(FileFlags(MH_SUBSECTIONS_VIA_SYMBOLS), f2->flags); - - EXPECT_TRUE(f2->localSymbols.empty()); - EXPECT_EQ(2UL, f2->globalSymbols.size()); - const Symbol& fooDef = f2->globalSymbols[0]; - EXPECT_TRUE(fooDef.name.equals("_foo")); - EXPECT_EQ(N_SECT, fooDef.type); - EXPECT_EQ(1, fooDef.sect); - EXPECT_EQ(SymbolScope(N_EXT), fooDef.scope); - const Symbol& foo2Def = f2->globalSymbols[1]; - EXPECT_TRUE(foo2Def.name.equals("_foo2")); - EXPECT_EQ(N_SECT, foo2Def.type); - EXPECT_EQ(1, foo2Def.sect); - EXPECT_EQ(SymbolScope(N_EXT), foo2Def.scope); - - EXPECT_EQ(1UL, f2->undefinedSymbols.size()); - const Symbol& barUndef = f2->undefinedSymbols[0]; - EXPECT_TRUE(barUndef.name.equals("_bar")); - EXPECT_EQ(N_UNDF, barUndef.type); - EXPECT_EQ(SymbolScope(N_EXT), barUndef.scope); - - EXPECT_EQ(1UL, f2->sections.size()); - const Section& text = f2->sections[0]; - EXPECT_TRUE(text.segmentName.equals("__TEXT")); - EXPECT_TRUE(text.sectionName.equals("__text")); - EXPECT_EQ(S_REGULAR, text.type); - EXPECT_EQ(text.attributes,SectionAttr(S_ATTR_PURE_INSTRUCTIONS - | S_ATTR_SOME_INSTRUCTIONS)); - EXPECT_EQ((uint16_t)text.alignment, 4U); - EXPECT_EQ(text.address, Hex64(0x0)); - EXPECT_EQ(44UL, text.content.size()); - const Relocation& br24 = text.relocations[0]; - EXPECT_EQ(br24.offset, Hex32(0x0)); - EXPECT_EQ(br24.scattered, false); - EXPECT_EQ(br24.type, PPC_RELOC_BR24); - EXPECT_EQ(br24.pcRel, true); - EXPECT_EQ(br24.length, 2); - EXPECT_EQ(br24.isExtern, true); - EXPECT_EQ(br24.symbol, 2U); - const Relocation& br14 = text.relocations[1]; - EXPECT_EQ(br14.offset, Hex32(0x4)); - EXPECT_EQ(br14.scattered, false); - EXPECT_EQ(br14.type, PPC_RELOC_BR14); - EXPECT_EQ(br14.pcRel, true); - EXPECT_EQ(br14.length, 2); - EXPECT_EQ(br14.isExtern, true); - EXPECT_EQ(br14.symbol, 2U); - const Relocation& pichi1 = text.relocations[2]; - EXPECT_EQ(pichi1.offset, Hex32(0x8)); - EXPECT_EQ(pichi1.scattered, true); - EXPECT_EQ(pichi1.type, PPC_RELOC_HI16_SECTDIFF); - EXPECT_EQ(pichi1.length, 2); - EXPECT_EQ(pichi1.value, 0x28U); - const Relocation& pichi2 = text.relocations[3]; - EXPECT_EQ(pichi2.offset, Hex32(0x24)); - EXPECT_EQ(pichi2.scattered, true); - EXPECT_EQ(pichi2.type, PPC_RELOC_PAIR); - EXPECT_EQ(pichi2.length, 2); - EXPECT_EQ(pichi2.value, 0x4U); - const Relocation& picha1 = text.relocations[4]; - EXPECT_EQ(picha1.offset, Hex32(0xC)); - EXPECT_EQ(picha1.scattered, true); - EXPECT_EQ(picha1.type, PPC_RELOC_HA16_SECTDIFF); - EXPECT_EQ(picha1.length, 2); - EXPECT_EQ(picha1.value, 0x28U); - const Relocation& picha2 = text.relocations[5]; - EXPECT_EQ(picha2.offset, Hex32(0x24)); - EXPECT_EQ(picha2.scattered, true); - EXPECT_EQ(picha2.type, PPC_RELOC_PAIR); - EXPECT_EQ(picha2.length, 2); - EXPECT_EQ(picha2.value, 0x4U); - const Relocation& piclo1 = text.relocations[6]; - EXPECT_EQ(piclo1.offset, Hex32(0x10)); - EXPECT_EQ(piclo1.scattered, true); - EXPECT_EQ(piclo1.type, PPC_RELOC_LO16_SECTDIFF); - EXPECT_EQ(piclo1.length, 2); - EXPECT_EQ(piclo1.value, 0x28U); - const Relocation& piclo2 = text.relocations[7]; - EXPECT_EQ(piclo2.offset, Hex32(0x0)); - EXPECT_EQ(piclo2.scattered, true); - EXPECT_EQ(piclo2.type, PPC_RELOC_PAIR); - EXPECT_EQ(piclo2.length, 2); - EXPECT_EQ(piclo2.value, 0x4U); - const Relocation& picloa1 = text.relocations[8]; - EXPECT_EQ(picloa1.offset, Hex32(0x14)); - EXPECT_EQ(picloa1.scattered, true); - EXPECT_EQ(picloa1.type, PPC_RELOC_LO14_SECTDIFF); - EXPECT_EQ(picloa1.length, 2); - EXPECT_EQ(picloa1.value, 0x28U); - const Relocation& picloa2 = text.relocations[9]; - EXPECT_EQ(picloa2.offset, Hex32(0x0)); - EXPECT_EQ(picloa2.scattered, true); - EXPECT_EQ(picloa2.type, PPC_RELOC_PAIR); - EXPECT_EQ(picloa2.length, 2); - EXPECT_EQ(picloa2.value, 0x4U); - const Relocation& abshi1 = text.relocations[10]; - EXPECT_EQ(abshi1.offset, Hex32(0x18)); - EXPECT_EQ(abshi1.scattered, false); - EXPECT_EQ(abshi1.type, PPC_RELOC_HI16); - EXPECT_EQ(abshi1.length, 2); - EXPECT_EQ(abshi1.symbol, 1U); - const Relocation& abshi2 = text.relocations[11]; - EXPECT_EQ(abshi2.offset, Hex32(0x28)); - EXPECT_EQ(abshi2.scattered, false); - EXPECT_EQ(abshi2.type, PPC_RELOC_PAIR); - EXPECT_EQ(abshi2.length, 2); - EXPECT_EQ(abshi2.symbol, 0U); - const Relocation& absha1 = text.relocations[12]; - EXPECT_EQ(absha1.offset, Hex32(0x1C)); - EXPECT_EQ(absha1.scattered, false); - EXPECT_EQ(absha1.type, PPC_RELOC_HA16); - EXPECT_EQ(absha1.length, 2); - EXPECT_EQ(absha1.symbol, 1U); - const Relocation& absha2 = text.relocations[13]; - EXPECT_EQ(absha2.offset, Hex32(0x28)); - EXPECT_EQ(absha2.scattered, false); - EXPECT_EQ(absha2.type, PPC_RELOC_PAIR); - EXPECT_EQ(absha2.length, 2); - EXPECT_EQ(absha2.symbol, 0U); - const Relocation& abslo1 = text.relocations[14]; - EXPECT_EQ(abslo1.offset, Hex32(0x20)); - EXPECT_EQ(abslo1.scattered, false); - EXPECT_EQ(abslo1.type, PPC_RELOC_LO16); - EXPECT_EQ(abslo1.length, 2); - EXPECT_EQ(abslo1.symbol, 1U); - const Relocation& abslo2 = text.relocations[15]; - EXPECT_EQ(abslo2.offset, Hex32(0x00)); - EXPECT_EQ(abslo2.scattered, false); - EXPECT_EQ(abslo2.type, PPC_RELOC_PAIR); - EXPECT_EQ(abslo2.length, 2); - EXPECT_EQ(abslo2.symbol, 0U); - const Relocation& absloa1 = text.relocations[16]; - EXPECT_EQ(absloa1.offset, Hex32(0x24)); - EXPECT_EQ(absloa1.scattered, false); - EXPECT_EQ(absloa1.type, PPC_RELOC_LO14); - EXPECT_EQ(absloa1.length, 2); - EXPECT_EQ(absloa1.symbol, 1U); - const Relocation& absloa2 = text.relocations[17]; - EXPECT_EQ(absloa2.offset, Hex32(0x00)); - EXPECT_EQ(absloa2.scattered, false); - EXPECT_EQ(absloa2.type, PPC_RELOC_PAIR); - EXPECT_EQ(absloa2.length, 2); - EXPECT_EQ(absloa2.symbol, 0U); - - bufferOwner.reset(nullptr); - std::error_code ec = llvm::sys::fs::remove(Twine(tmpFl)); - EXPECT_FALSE(ec); -} diff --git a/lld/unittests/MachOTests/MachONormalizedFileToAtomsTests.cpp b/lld/unittests/MachOTests/MachONormalizedFileToAtomsTests.cpp deleted file mode 100644 index 19534eadaf5b..000000000000 --- a/lld/unittests/MachOTests/MachONormalizedFileToAtomsTests.cpp +++ /dev/null @@ -1,140 +0,0 @@ -//===- lld/unittest/MachOTests/MachONormalizedFileToAtomsTests.cpp --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "../../lib/ReaderWriter/MachO/MachONormalizedFile.h" -#include "lld/Core/Atom.h" -#include "lld/Core/DefinedAtom.h" -#include "lld/Core/File.h" -#include "lld/Core/UndefinedAtom.h" -#include "lld/ReaderWriter/MachOLinkingContext.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/YAMLTraits.h" -#include "gtest/gtest.h" -#include -#include - -using namespace lld::mach_o::normalized; -using namespace llvm::MachO; - -TEST(ToAtomsTest, empty_obj_x86_64) { - NormalizedFile f; - f.arch = lld::MachOLinkingContext::arch_x86_64; - llvm::Expected> atom_f = - normalizedToAtoms(f, "", false); - EXPECT_FALSE(!atom_f); - EXPECT_EQ(0U, (*atom_f)->defined().size()); -} - -TEST(ToAtomsTest, basic_obj_x86_64) { - NormalizedFile f; - f.arch = lld::MachOLinkingContext::arch_x86_64; - Section textSection; - static const uint8_t contentBytes[] = { 0x90, 0xC3, 0xC3, 0xC4 }; - const unsigned contentSize = sizeof(contentBytes) / sizeof(contentBytes[0]); - textSection.content = llvm::makeArrayRef(contentBytes, contentSize); - f.sections.push_back(textSection); - Symbol fooSymbol; - fooSymbol.name = "_foo"; - fooSymbol.type = N_SECT; - fooSymbol.scope = N_EXT; - fooSymbol.sect = 1; - fooSymbol.value = 0; - f.globalSymbols.push_back(fooSymbol); - Symbol barSymbol; - barSymbol.name = "_bar"; - barSymbol.type = N_SECT; - barSymbol.scope = N_EXT; - barSymbol.sect = 1; - barSymbol.value = 2; - f.globalSymbols.push_back(barSymbol); - Symbol undefSym; - undefSym.name = "_undef"; - undefSym.type = N_UNDF; - f.undefinedSymbols.push_back(undefSym); - Symbol bazSymbol; - bazSymbol.name = "_baz"; - bazSymbol.type = N_SECT; - bazSymbol.scope = N_EXT | N_PEXT; - bazSymbol.sect = 1; - bazSymbol.value = 3; - f.localSymbols.push_back(bazSymbol); - - llvm::Expected> atom_f = - normalizedToAtoms(f, "", false); - EXPECT_FALSE(!atom_f); - const lld::File &file = **atom_f; - EXPECT_EQ(3U, file.defined().size()); - auto it = file.defined().begin(); - const lld::DefinedAtom *atom1 = *it; - ++it; - const lld::DefinedAtom *atom2 = *it; - ++it; - const lld::DefinedAtom *atom3 = *it; - const lld::UndefinedAtom *atom4 = *file.undefined().begin(); - EXPECT_TRUE(atom1->name().equals("_foo")); - EXPECT_EQ(2U, atom1->rawContent().size()); - EXPECT_EQ(0x90, atom1->rawContent()[0]); - EXPECT_EQ(0xC3, atom1->rawContent()[1]); - EXPECT_EQ(lld::Atom::scopeGlobal, atom1->scope()); - - EXPECT_TRUE(atom2->name().equals("_bar")); - EXPECT_EQ(1U, atom2->rawContent().size()); - EXPECT_EQ(0xC3, atom2->rawContent()[0]); - EXPECT_EQ(lld::Atom::scopeGlobal, atom2->scope()); - - EXPECT_TRUE(atom3->name().equals("_baz")); - EXPECT_EQ(1U, atom3->rawContent().size()); - EXPECT_EQ(0xC4, atom3->rawContent()[0]); - EXPECT_EQ(lld::Atom::scopeLinkageUnit, atom3->scope()); - - EXPECT_TRUE(atom4->name().equals("_undef")); - EXPECT_EQ(lld::Atom::definitionUndefined, atom4->definition()); -} - -TEST(ToAtomsTest, reservedUnitLength) { - static const uint8_t debugInfoWithReservedLengthContent[12] = { - 0xf0, 0xff, 0xff, 0xff // Reserved length value - }; - static const uint8_t debugInfoWithValidBigLengthContent[12] = { - 0xef, 0xff, 0xff, 0xff, // The maximum valid length value for DWARF32 - 0x00, 0x00 // Wrong version - }; - static const uint8_t dummyContent[] = {0x00}; - - NormalizedFile fReservedLength, fValidBigLength; - fReservedLength.arch = lld::MachOLinkingContext::arch_x86; - fValidBigLength.arch = lld::MachOLinkingContext::arch_x86; - Section section; - section.segmentName = "__DWARF"; - section.sectionName = "__debug_info"; - section.content = llvm::makeArrayRef(debugInfoWithReservedLengthContent); - fReservedLength.sections.push_back(section); - section.content = llvm::makeArrayRef(debugInfoWithValidBigLengthContent); - fValidBigLength.sections.push_back(section); - section.sectionName = "__debug_abbrev"; - section.content = llvm::makeArrayRef(dummyContent); - fReservedLength.sections.push_back(section); - fValidBigLength.sections.push_back(section); - section.sectionName = "__debug_str"; - fReservedLength.sections.push_back(section); - fValidBigLength.sections.push_back(section); - - auto resultReservedLength = normalizedToAtoms(fReservedLength, "foo", false); - auto resultValidBigLength = normalizedToAtoms(fValidBigLength, "foo", false); - - // Both cases should return errors, but different. - ASSERT_FALSE(resultReservedLength); - ASSERT_FALSE(resultValidBigLength); - - EXPECT_STREQ("Malformed DWARF in foo", - toString(resultReservedLength.takeError()).c_str()); - EXPECT_STREQ("Unsupported DWARF version in foo", - toString(resultValidBigLength.takeError()).c_str()); -} diff --git a/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp b/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp deleted file mode 100644 index dbfe3a051811..000000000000 --- a/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp +++ /dev/null @@ -1,762 +0,0 @@ -//===- lld/unittest/MachOTests/MachONormalizedFileYAMLTests.cpp -----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "../../lib/ReaderWriter/MachO/MachONormalizedFile.h" -#include "lld/ReaderWriter/MachOLinkingContext.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" -#include "gtest/gtest.h" -#include -#include -#include -#include - -using llvm::StringRef; -using llvm::MemoryBuffer; -using lld::mach_o::normalized::NormalizedFile; -using lld::mach_o::normalized::Symbol; -using lld::mach_o::normalized::Section; -using lld::mach_o::normalized::Relocation; - -static std::unique_ptr fromYAML(StringRef str) { - std::unique_ptr mb(MemoryBuffer::getMemBuffer(str)); - llvm::Expected> r - = lld::mach_o::normalized::readYaml(mb); - EXPECT_FALSE(!r); - return std::move(*r); -} - -static void toYAML(const NormalizedFile &f, std::string &out) { - llvm::raw_string_ostream ostr(out); - std::error_code ec = lld::mach_o::normalized::writeYaml(f, ostr); - EXPECT_TRUE(!ec); -} - -// ppc is no longer supported, but it is here to test endianness handling. -TEST(ObjectFileYAML, empty_ppc) { - std::unique_ptr f = fromYAML( - "---\n" - "arch: ppc\n" - "file-type: MH_OBJECT\n" - "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" - "...\n"); - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_ppc); - EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)(int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_TRUE(f->sections.empty()); - EXPECT_TRUE(f->localSymbols.empty()); - EXPECT_TRUE(f->globalSymbols.empty()); - EXPECT_TRUE(f->undefinedSymbols.empty()); -} - -TEST(ObjectFileYAML, empty_x86_64) { - std::unique_ptr f = fromYAML( - "---\n" - "arch: x86_64\n" - "file-type: MH_OBJECT\n" - "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" - "...\n"); - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); - EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)(int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_TRUE(f->sections.empty()); - EXPECT_TRUE(f->localSymbols.empty()); - EXPECT_TRUE(f->globalSymbols.empty()); - EXPECT_TRUE(f->undefinedSymbols.empty()); -} - -TEST(ObjectFileYAML, empty_x86) { - std::unique_ptr f = fromYAML( - "---\n" - "arch: x86\n" - "file-type: MH_OBJECT\n" - "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" - "...\n"); - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86); - EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_TRUE(f->sections.empty()); - EXPECT_TRUE(f->localSymbols.empty()); - EXPECT_TRUE(f->globalSymbols.empty()); - EXPECT_TRUE(f->undefinedSymbols.empty()); -} - -TEST(ObjectFileYAML, empty_armv6) { - std::unique_ptr f = fromYAML( - "---\n" - "arch: armv6\n" - "file-type: MH_OBJECT\n" - "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" - "...\n"); - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv6); - EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_TRUE(f->sections.empty()); - EXPECT_TRUE(f->localSymbols.empty()); - EXPECT_TRUE(f->globalSymbols.empty()); - EXPECT_TRUE(f->undefinedSymbols.empty()); -} - -TEST(ObjectFileYAML, empty_armv7) { - std::unique_ptr f = fromYAML( - "---\n" - "arch: armv7\n" - "file-type: MH_OBJECT\n" - "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" - "...\n"); - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7); - EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_TRUE(f->sections.empty()); - EXPECT_TRUE(f->localSymbols.empty()); - EXPECT_TRUE(f->globalSymbols.empty()); - EXPECT_TRUE(f->undefinedSymbols.empty()); -} - -TEST(ObjectFileYAML, empty_armv7s) { - std::unique_ptr f = fromYAML( - "---\n" - "arch: armv7s\n" - "file-type: MH_OBJECT\n" - "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" - "...\n"); - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7s); - EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_TRUE(f->sections.empty()); - EXPECT_TRUE(f->localSymbols.empty()); - EXPECT_TRUE(f->globalSymbols.empty()); - EXPECT_TRUE(f->undefinedSymbols.empty()); -} - -TEST(ObjectFileYAML, roundTrip) { - std::string intermediate; - { - NormalizedFile f; - f.arch = lld::MachOLinkingContext::arch_x86_64; - f.fileType = llvm::MachO::MH_OBJECT; - f.flags = (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS; - f.os = lld::MachOLinkingContext::OS::macOSX; - toYAML(f, intermediate); - } - { - std::unique_ptr f2 = fromYAML(intermediate); - EXPECT_EQ(f2->arch, lld::MachOLinkingContext::arch_x86_64); - EXPECT_EQ((int)(f2->fileType), llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f2->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_TRUE(f2->sections.empty()); - EXPECT_TRUE(f2->localSymbols.empty()); - EXPECT_TRUE(f2->globalSymbols.empty()); - EXPECT_TRUE(f2->undefinedSymbols.empty()); - } -} - -TEST(ObjectFileYAML, oneSymbol) { - std::unique_ptr f = fromYAML( - "---\n" - "arch: x86_64\n" - "file-type: MH_OBJECT\n" - "global-symbols:\n" - " - name: _main\n" - " type: N_SECT\n" - " scope: [ N_EXT ]\n" - " sect: 1\n" - " desc: [ ]\n" - " value: 0x100\n" - "...\n"); - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); - EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_TRUE(f->sections.empty()); - EXPECT_TRUE(f->localSymbols.empty()); - EXPECT_TRUE(f->undefinedSymbols.empty()); - EXPECT_EQ(f->globalSymbols.size(), 1UL); - const Symbol& sym = f->globalSymbols[0]; - EXPECT_TRUE(sym.name.equals("_main")); - EXPECT_EQ((int)(sym.type), llvm::MachO::N_SECT); - EXPECT_EQ((int)(sym.scope), llvm::MachO::N_EXT); - EXPECT_EQ(sym.sect, 1); - EXPECT_EQ((int)(sym.desc), 0); - EXPECT_EQ((uint64_t)sym.value, 0x100ULL); -} - -TEST(ObjectFileYAML, oneSection) { - std::unique_ptr f = fromYAML( - "---\n" - "arch: x86_64\n" - "file-type: MH_OBJECT\n" - "sections:\n" - " - segment: __TEXT\n" - " section: __text\n" - " type: S_REGULAR\n" - " attributes: [ S_ATTR_PURE_INSTRUCTIONS ]\n" - " alignment: 2\n" - " address: 0x12345678\n" - " content: [ 0x90, 0x90 ]\n" - "...\n"); - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); - EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_TRUE(f->localSymbols.empty()); - EXPECT_TRUE(f->globalSymbols.empty()); - EXPECT_TRUE(f->undefinedSymbols.empty()); - EXPECT_EQ(f->sections.size(), 1UL); - const Section& sect = f->sections[0]; - EXPECT_TRUE(sect.segmentName.equals("__TEXT")); - EXPECT_TRUE(sect.sectionName.equals("__text")); - EXPECT_EQ((uint32_t)(sect.type), (uint32_t)(llvm::MachO::S_REGULAR)); - EXPECT_EQ((uint32_t)(sect.attributes), - (uint32_t)(llvm::MachO::S_ATTR_PURE_INSTRUCTIONS)); - EXPECT_EQ((uint16_t)sect.alignment, 2U); - EXPECT_EQ((uint64_t)sect.address, 0x12345678ULL); - EXPECT_EQ(sect.content.size(), 2UL); - EXPECT_EQ((int)(sect.content[0]), 0x90); - EXPECT_EQ((int)(sect.content[1]), 0x90); -} - -TEST(ObjectFileYAML, hello_x86_64) { - std::unique_ptr f = fromYAML( - "---\n" - "arch: x86_64\n" - "file-type: MH_OBJECT\n" - "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" - "sections:\n" - " - segment: __TEXT\n" - " section: __text\n" - " type: S_REGULAR\n" - " attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS]\n" - " alignment: 1\n" - " address: 0x0000\n" - " content: [ 0x55, 0x48, 0x89, 0xe5, 0x48, 0x8d, 0x3d, 0x00,\n" - " 0x00, 0x00, 0x00, 0x30, 0xc0, 0xe8, 0x00, 0x00,\n" - " 0x00, 0x00, 0x31, 0xc0, 0x5d, 0xc3 ]\n" - " relocations:\n" - " - offset: 0x0e\n" - " type: X86_64_RELOC_BRANCH\n" - " length: 2\n" - " pc-rel: true\n" - " extern: true\n" - " symbol: 2\n" - " - offset: 0x07\n" - " type: X86_64_RELOC_SIGNED\n" - " length: 2\n" - " pc-rel: true\n" - " extern: true\n" - " symbol: 1\n" - " - segment: __TEXT\n" - " section: __cstring\n" - " type: S_CSTRING_LITERALS\n" - " attributes: [ ]\n" - " alignment: 1\n" - " address: 0x0016\n" - " content: [ 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x0a, 0x00 ]\n" - "global-symbols:\n" - " - name: _main\n" - " type: N_SECT\n" - " scope: [ N_EXT ]\n" - " sect: 1\n" - " value: 0x0\n" - "local-symbols:\n" - " - name: L_.str\n" - " type: N_SECT\n" - " scope: [ ]\n" - " sect: 2\n" - " value: 0x16\n" - "undefined-symbols:\n" - " - name: _printf\n" - " type: N_UNDF\n" - " value: 0x0\n" - "...\n"); - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); - EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_EQ(f->sections.size(), 2UL); - - const Section& sect1 = f->sections[0]; - EXPECT_TRUE(sect1.segmentName.equals("__TEXT")); - EXPECT_TRUE(sect1.sectionName.equals("__text")); - EXPECT_EQ((uint32_t)(sect1.type), (uint32_t)(llvm::MachO::S_REGULAR)); - EXPECT_EQ((uint32_t)(sect1.attributes), - (uint32_t)(llvm::MachO::S_ATTR_PURE_INSTRUCTIONS - | llvm::MachO::S_ATTR_SOME_INSTRUCTIONS)); - EXPECT_EQ((uint16_t)sect1.alignment, 1U); - EXPECT_EQ((uint64_t)sect1.address, 0x0ULL); - EXPECT_EQ(sect1.content.size(), 22UL); - EXPECT_EQ((int)(sect1.content[0]), 0x55); - EXPECT_EQ((int)(sect1.content[1]), 0x48); - EXPECT_EQ(sect1.relocations.size(), 2UL); - const Relocation& reloc1 = sect1.relocations[0]; - EXPECT_EQ(reloc1.offset, 0x0eU); - EXPECT_FALSE(reloc1.scattered); - EXPECT_EQ((int)reloc1.type, (int)llvm::MachO::X86_64_RELOC_BRANCH); - EXPECT_EQ(reloc1.length, 2); - EXPECT_TRUE(reloc1.pcRel); - EXPECT_TRUE(reloc1.isExtern); - EXPECT_EQ(reloc1.symbol, 2U); - EXPECT_EQ((int)(reloc1.value), 0); - const Relocation& reloc2 = sect1.relocations[1]; - EXPECT_EQ(reloc2.offset, 0x07U); - EXPECT_FALSE(reloc2.scattered); - EXPECT_EQ((int)reloc2.type, (int)llvm::MachO::X86_64_RELOC_SIGNED); - EXPECT_EQ(reloc2.length, 2); - EXPECT_TRUE(reloc2.pcRel); - EXPECT_TRUE(reloc2.isExtern); - EXPECT_EQ(reloc2.symbol, 1U); - EXPECT_EQ((int)(reloc2.value), 0); - - const Section& sect2 = f->sections[1]; - EXPECT_TRUE(sect2.segmentName.equals("__TEXT")); - EXPECT_TRUE(sect2.sectionName.equals("__cstring")); - EXPECT_EQ((uint32_t)(sect2.type), (uint32_t)(llvm::MachO::S_CSTRING_LITERALS)); - EXPECT_EQ((uint32_t)(sect2.attributes), 0U); - EXPECT_EQ((uint16_t)sect2.alignment, 1U); - EXPECT_EQ((uint64_t)sect2.address, 0x016ULL); - EXPECT_EQ(sect2.content.size(), 7UL); - EXPECT_EQ((int)(sect2.content[0]), 0x68); - EXPECT_EQ((int)(sect2.content[1]), 0x65); - EXPECT_EQ((int)(sect2.content[2]), 0x6c); - - EXPECT_EQ(f->globalSymbols.size(), 1UL); - const Symbol& sym1 = f->globalSymbols[0]; - EXPECT_TRUE(sym1.name.equals("_main")); - EXPECT_EQ((int)(sym1.type), llvm::MachO::N_SECT); - EXPECT_EQ((int)(sym1.scope), llvm::MachO::N_EXT); - EXPECT_EQ(sym1.sect, 1); - EXPECT_EQ((int)(sym1.desc), 0); - EXPECT_EQ((uint64_t)sym1.value, 0x0ULL); - EXPECT_EQ(f->localSymbols.size(), 1UL); - const Symbol& sym2 = f->localSymbols[0]; - EXPECT_TRUE(sym2.name.equals("L_.str")); - EXPECT_EQ((int)(sym2.type), llvm::MachO::N_SECT); - EXPECT_EQ((int)(sym2.scope), 0); - EXPECT_EQ(sym2.sect, 2); - EXPECT_EQ((int)(sym2.desc), 0); - EXPECT_EQ((uint64_t)sym2.value, 0x16ULL); - EXPECT_EQ(f->undefinedSymbols.size(), 1UL); - const Symbol& sym3 = f->undefinedSymbols[0]; - EXPECT_TRUE(sym3.name.equals("_printf")); - EXPECT_EQ((int)(sym3.type), llvm::MachO::N_UNDF); - EXPECT_EQ((int)(sym3.scope), 0); - EXPECT_EQ(sym3.sect, 0); - EXPECT_EQ((int)(sym3.desc), 0); - EXPECT_EQ((uint64_t)sym3.value, 0x0ULL); -} - -TEST(ObjectFileYAML, hello_x86) { - std::unique_ptr f = fromYAML( - "---\n" - "arch: x86\n" - "file-type: MH_OBJECT\n" - "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" - "sections:\n" - " - segment: __TEXT\n" - " section: __text\n" - " type: S_REGULAR\n" - " attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS]\n" - " alignment: 1\n" - " address: 0x0000\n" - " content: [ 0x55, 0x89, 0xe5, 0x83, 0xec, 0x08, 0xe8, 0x00,\n" - " 0x00, 0x00, 0x00, 0x58, 0x8d, 0x80, 0x16, 0x00,\n" - " 0x00, 0x00, 0x89, 0x04, 0x24, 0xe8, 0xe6, 0xff,\n" - " 0xff, 0xff, 0x31, 0xc0, 0x83, 0xc4, 0x08, 0x5d,\n" - " 0xc3 ]\n" - " relocations:\n" - " - offset: 0x16\n" - " type: GENERIC_RELOC_VANILLA\n" - " length: 2\n" - " pc-rel: true\n" - " extern: true\n" - " symbol: 1\n" - " - offset: 0x0e\n" - " scattered: true\n" - " type: GENERIC_RELOC_LOCAL_SECTDIFF\n" - " length: 2\n" - " pc-rel: false\n" - " value: 0x21\n" - " - offset: 0x0\n" - " scattered: true\n" - " type: GENERIC_RELOC_PAIR\n" - " length: 2\n" - " pc-rel: false\n" - " value: 0xb\n" - " - segment: __TEXT\n" - " section: __cstring\n" - " type: S_CSTRING_LITERALS\n" - " attributes: [ ]\n" - " alignment: 1\n" - " address: 0x0021\n" - " content: [ 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x0a, 0x00 ]\n" - "global-symbols:\n" - " - name: _main\n" - " type: N_SECT\n" - " scope: [ N_EXT ]\n" - " sect: 1\n" - " value: 0x0\n" - "undefined-symbols:\n" - " - name: _printf\n" - " type: N_UNDF\n" - " value: 0x0\n" - "...\n"); - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86); - EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_EQ(f->sections.size(), 2UL); - - const Section& sect1 = f->sections[0]; - EXPECT_TRUE(sect1.segmentName.equals("__TEXT")); - EXPECT_TRUE(sect1.sectionName.equals("__text")); - EXPECT_EQ((uint32_t)(sect1.type), (uint32_t)(llvm::MachO::S_REGULAR)); - EXPECT_EQ((uint32_t)(sect1.attributes), - (uint32_t)(llvm::MachO::S_ATTR_PURE_INSTRUCTIONS - | llvm::MachO::S_ATTR_SOME_INSTRUCTIONS)); - EXPECT_EQ((uint16_t)sect1.alignment, 1U); - EXPECT_EQ((uint64_t)sect1.address, 0x0ULL); - EXPECT_EQ(sect1.content.size(), 33UL); - EXPECT_EQ((int)(sect1.content[0]), 0x55); - EXPECT_EQ((int)(sect1.content[1]), 0x89); - EXPECT_EQ(sect1.relocations.size(), 3UL); - const Relocation& reloc1 = sect1.relocations[0]; - EXPECT_EQ(reloc1.offset, 0x16U); - EXPECT_FALSE(reloc1.scattered); - EXPECT_EQ((int)reloc1.type, (int)llvm::MachO::GENERIC_RELOC_VANILLA); - EXPECT_EQ(reloc1.length, 2); - EXPECT_TRUE(reloc1.pcRel); - EXPECT_TRUE(reloc1.isExtern); - EXPECT_EQ(reloc1.symbol, 1U); - EXPECT_EQ((int)(reloc1.value), 0); - const Relocation& reloc2 = sect1.relocations[1]; - EXPECT_EQ(reloc2.offset, 0x0eU); - EXPECT_TRUE(reloc2.scattered); - EXPECT_EQ((int)reloc2.type, (int)llvm::MachO::GENERIC_RELOC_LOCAL_SECTDIFF); - EXPECT_EQ(reloc2.length, 2); - EXPECT_FALSE(reloc2.pcRel); - EXPECT_EQ(reloc2.symbol, 0U); - EXPECT_EQ((int)(reloc2.value), 0x21); - const Relocation& reloc3 = sect1.relocations[2]; - EXPECT_EQ(reloc3.offset, 0U); - EXPECT_TRUE(reloc3.scattered); - EXPECT_EQ((int)reloc3.type, (int)llvm::MachO::GENERIC_RELOC_PAIR); - EXPECT_EQ(reloc3.length, 2); - EXPECT_FALSE(reloc3.pcRel); - EXPECT_EQ(reloc3.symbol, 0U); - EXPECT_EQ((int)(reloc3.value), 0xb); - - const Section& sect2 = f->sections[1]; - EXPECT_TRUE(sect2.segmentName.equals("__TEXT")); - EXPECT_TRUE(sect2.sectionName.equals("__cstring")); - EXPECT_EQ((uint32_t)(sect2.type), (uint32_t)(llvm::MachO::S_CSTRING_LITERALS)); - EXPECT_EQ((uint32_t)(sect2.attributes), 0U); - EXPECT_EQ((uint16_t)sect2.alignment, 1U); - EXPECT_EQ((uint64_t)sect2.address, 0x021ULL); - EXPECT_EQ(sect2.content.size(), 7UL); - EXPECT_EQ((int)(sect2.content[0]), 0x68); - EXPECT_EQ((int)(sect2.content[1]), 0x65); - EXPECT_EQ((int)(sect2.content[2]), 0x6c); - - EXPECT_EQ(f->globalSymbols.size(), 1UL); - const Symbol& sym1 = f->globalSymbols[0]; - EXPECT_TRUE(sym1.name.equals("_main")); - EXPECT_EQ((int)(sym1.type), llvm::MachO::N_SECT); - EXPECT_EQ((int)(sym1.scope), llvm::MachO::N_EXT); - EXPECT_EQ(sym1.sect, 1); - EXPECT_EQ((int)(sym1.desc), 0); - EXPECT_EQ((uint64_t)sym1.value, 0x0ULL); - EXPECT_EQ(f->undefinedSymbols.size(), 1UL); - const Symbol& sym2 = f->undefinedSymbols[0]; - EXPECT_TRUE(sym2.name.equals("_printf")); - EXPECT_EQ((int)(sym2.type), llvm::MachO::N_UNDF); - EXPECT_EQ((int)(sym2.scope), 0); - EXPECT_EQ(sym2.sect, 0); - EXPECT_EQ((int)(sym2.desc), 0); - EXPECT_EQ((uint64_t)sym2.value, 0x0ULL); -} - -TEST(ObjectFileYAML, hello_armv6) { - std::unique_ptr f = fromYAML( - "---\n" - "arch: armv6\n" - "file-type: MH_OBJECT\n" - "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" - "sections:\n" - " - segment: __TEXT\n" - " section: __text\n" - " type: S_REGULAR\n" - " attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS]\n" - " alignment: 4\n" - " address: 0x0000\n" - " content: [ 0x80, 0x40, 0x2d, 0xe9, 0x10, 0x00, 0x9f, 0xe5,\n" - " 0x0d, 0x70, 0xa0, 0xe1, 0x00, 0x00, 0x8f, 0xe0,\n" - " 0xfa, 0xff, 0xff, 0xeb, 0x00, 0x00, 0xa0, 0xe3,\n" - " 0x80, 0x80, 0xbd, 0xe8, 0x0c, 0x00, 0x00, 0x00 ]\n" - " relocations:\n" - " - offset: 0x1c\n" - " scattered: true\n" - " type: ARM_RELOC_SECTDIFF\n" - " length: 2\n" - " pc-rel: false\n" - " value: 0x20\n" - " - offset: 0x0\n" - " scattered: true\n" - " type: ARM_RELOC_PAIR\n" - " length: 2\n" - " pc-rel: false\n" - " value: 0xc\n" - " - offset: 0x10\n" - " type: ARM_RELOC_BR24\n" - " length: 2\n" - " pc-rel: true\n" - " extern: true\n" - " symbol: 1\n" - " - segment: __TEXT\n" - " section: __cstring\n" - " type: S_CSTRING_LITERALS\n" - " attributes: [ ]\n" - " alignment: 1\n" - " address: 0x0020\n" - " content: [ 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x0a, 0x00 ]\n" - "global-symbols:\n" - " - name: _main\n" - " type: N_SECT\n" - " scope: [ N_EXT ]\n" - " sect: 1\n" - " value: 0x0\n" - "undefined-symbols:\n" - " - name: _printf\n" - " type: N_UNDF\n" - " value: 0x0\n" - "...\n"); - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv6); - EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_EQ(f->sections.size(), 2UL); - - const Section& sect1 = f->sections[0]; - EXPECT_TRUE(sect1.segmentName.equals("__TEXT")); - EXPECT_TRUE(sect1.sectionName.equals("__text")); - EXPECT_EQ((uint32_t)(sect1.type), (uint32_t)(llvm::MachO::S_REGULAR)); - EXPECT_EQ((uint32_t)(sect1.attributes), - (uint32_t)(llvm::MachO::S_ATTR_PURE_INSTRUCTIONS - | llvm::MachO::S_ATTR_SOME_INSTRUCTIONS)); - EXPECT_EQ((uint16_t)sect1.alignment, 4U); - EXPECT_EQ((uint64_t)sect1.address, 0x0ULL); - EXPECT_EQ(sect1.content.size(), 32UL); - EXPECT_EQ((int)(sect1.content[0]), 0x80); - EXPECT_EQ((int)(sect1.content[1]), 0x40); - EXPECT_EQ(sect1.relocations.size(), 3UL); - const Relocation& reloc1 = sect1.relocations[0]; - EXPECT_EQ(reloc1.offset, 0x1cU); - EXPECT_TRUE(reloc1.scattered); - EXPECT_EQ((int)reloc1.type, (int)llvm::MachO::ARM_RELOC_SECTDIFF); - EXPECT_EQ(reloc1.length, 2); - EXPECT_FALSE(reloc1.pcRel); - EXPECT_EQ(reloc1.symbol, 0U); - EXPECT_EQ((int)(reloc1.value), 0x20); - const Relocation& reloc2 = sect1.relocations[1]; - EXPECT_EQ(reloc2.offset, 0x0U); - EXPECT_TRUE(reloc2.scattered); - EXPECT_EQ((int)reloc2.type, (int)llvm::MachO::ARM_RELOC_PAIR); - EXPECT_EQ(reloc2.length, 2); - EXPECT_FALSE(reloc2.pcRel); - EXPECT_EQ(reloc2.symbol, 0U); - EXPECT_EQ((int)(reloc2.value), 0xc); - const Relocation& reloc3 = sect1.relocations[2]; - EXPECT_EQ(reloc3.offset, 0x10U); - EXPECT_FALSE(reloc3.scattered); - EXPECT_EQ((int)reloc3.type, (int)llvm::MachO::ARM_RELOC_BR24); - EXPECT_EQ(reloc3.length, 2); - EXPECT_TRUE(reloc3.pcRel); - EXPECT_TRUE(reloc3.isExtern); - EXPECT_EQ(reloc3.symbol, 1U); - EXPECT_EQ((int)(reloc3.value), 0); - - const Section& sect2 = f->sections[1]; - EXPECT_TRUE(sect2.segmentName.equals("__TEXT")); - EXPECT_TRUE(sect2.sectionName.equals("__cstring")); - EXPECT_EQ((uint32_t)(sect2.type), (uint32_t)(llvm::MachO::S_CSTRING_LITERALS)); - EXPECT_EQ((uint32_t)(sect2.attributes), 0U); - EXPECT_EQ((uint16_t)sect2.alignment, 1U); - EXPECT_EQ((uint64_t)sect2.address, 0x020ULL); - EXPECT_EQ(sect2.content.size(), 7UL); - EXPECT_EQ((int)(sect2.content[0]), 0x68); - EXPECT_EQ((int)(sect2.content[1]), 0x65); - EXPECT_EQ((int)(sect2.content[2]), 0x6c); - - EXPECT_EQ(f->globalSymbols.size(), 1UL); - const Symbol& sym1 = f->globalSymbols[0]; - EXPECT_TRUE(sym1.name.equals("_main")); - EXPECT_EQ((int)(sym1.type), llvm::MachO::N_SECT); - EXPECT_EQ((int)(sym1.scope), llvm::MachO::N_EXT); - EXPECT_EQ(sym1.sect, 1); - EXPECT_EQ((int)(sym1.desc), 0); - EXPECT_EQ((uint64_t)sym1.value, 0x0ULL); - EXPECT_EQ(f->undefinedSymbols.size(), 1UL); - const Symbol& sym2 = f->undefinedSymbols[0]; - EXPECT_TRUE(sym2.name.equals("_printf")); - EXPECT_EQ((int)(sym2.type), llvm::MachO::N_UNDF); - EXPECT_EQ((int)(sym2.scope), 0); - EXPECT_EQ(sym2.sect, 0); - EXPECT_EQ((int)(sym2.desc), 0); - EXPECT_EQ((uint64_t)sym2.value, 0x0ULL); -} - -TEST(ObjectFileYAML, hello_armv7) { - std::unique_ptr f = fromYAML( - "---\n" - "arch: armv7\n" - "file-type: MH_OBJECT\n" - "flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]\n" - "sections:\n" - " - segment: __TEXT\n" - " section: __text\n" - " type: S_REGULAR\n" - " attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS]\n" - " alignment: 2\n" - " address: 0x0000\n" - " content: [ 0x80, 0xb5, 0x40, 0xf2, 0x06, 0x00, 0x6f, 0x46,\n" - " 0xc0, 0xf2, 0x00, 0x00, 0x78, 0x44, 0xff, 0xf7,\n" - " 0xf8, 0xef, 0x00, 0x20, 0x80, 0xbd ]\n" - " relocations:\n" - " - offset: 0x0e\n" - " type: ARM_THUMB_RELOC_BR22\n" - " length: 2\n" - " pc-rel: true\n" - " extern: true\n" - " symbol: 1\n" - " - offset: 0x08\n" - " scattered: true\n" - " type: ARM_RELOC_HALF_SECTDIFF\n" - " length: 3\n" - " pc-rel: false\n" - " value: 0x16\n" - " - offset: 0x06\n" - " scattered: true\n" - " type: ARM_RELOC_PAIR\n" - " length: 3\n" - " pc-rel: false\n" - " value: 0xc\n" - " - offset: 0x02\n" - " scattered: true\n" - " type: ARM_RELOC_HALF_SECTDIFF\n" - " length: 2\n" - " pc-rel: false\n" - " value: 0x16\n" - " - offset: 0x0\n" - " scattered: true\n" - " type: ARM_RELOC_PAIR\n" - " length: 2\n" - " pc-rel: false\n" - " value: 0xc\n" - " - segment: __TEXT\n" - " section: __cstring\n" - " type: S_CSTRING_LITERALS\n" - " attributes: [ ]\n" - " alignment: 1\n" - " address: 0x0016\n" - " content: [ 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x0a, 0x00 ]\n" - "global-symbols:\n" - " - name: _main\n" - " type: N_SECT\n" - " scope: [ N_EXT ]\n" - " sect: 1\n" - " desc: [ N_ARM_THUMB_DEF ]\n" - " value: 0x0\n" - "undefined-symbols:\n" - " - name: _printf\n" - " type: N_UNDF\n" - " value: 0x0\n" - "...\n"); - EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7); - EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); - EXPECT_EQ(f->sections.size(), 2UL); - - const Section& sect1 = f->sections[0]; - EXPECT_TRUE(sect1.segmentName.equals("__TEXT")); - EXPECT_TRUE(sect1.sectionName.equals("__text")); - EXPECT_EQ((uint32_t)(sect1.type), (uint32_t)(llvm::MachO::S_REGULAR)); - EXPECT_EQ((uint32_t)(sect1.attributes), - (uint32_t)(llvm::MachO::S_ATTR_PURE_INSTRUCTIONS - | llvm::MachO::S_ATTR_SOME_INSTRUCTIONS)); - EXPECT_EQ((uint16_t)sect1.alignment, 2U); - EXPECT_EQ((uint64_t)sect1.address, 0x0ULL); - EXPECT_EQ(sect1.content.size(), 22UL); - EXPECT_EQ((int)(sect1.content[0]), 0x80); - EXPECT_EQ((int)(sect1.content[1]), 0xb5); - EXPECT_EQ(sect1.relocations.size(), 5UL); - const Relocation& reloc1 = sect1.relocations[0]; - EXPECT_EQ(reloc1.offset, 0x0eU); - EXPECT_FALSE(reloc1.scattered); - EXPECT_EQ((int)reloc1.type, (int)llvm::MachO::ARM_THUMB_RELOC_BR22); - EXPECT_EQ(reloc1.length, 2); - EXPECT_TRUE(reloc1.pcRel); - EXPECT_TRUE(reloc1.isExtern); - EXPECT_EQ(reloc1.symbol, 1U); - EXPECT_EQ((int)(reloc1.value), 0); - const Relocation& reloc2 = sect1.relocations[1]; - EXPECT_EQ(reloc2.offset, 0x8U); - EXPECT_TRUE(reloc2.scattered); - EXPECT_EQ((int)reloc2.type, (int)llvm::MachO::ARM_RELOC_HALF_SECTDIFF); - EXPECT_EQ(reloc2.length, 3); - EXPECT_FALSE(reloc2.pcRel); - EXPECT_EQ(reloc2.symbol, 0U); - EXPECT_EQ((int)(reloc2.value), 0x16); - const Relocation& reloc3 = sect1.relocations[2]; - EXPECT_EQ(reloc3.offset, 0x6U); - EXPECT_TRUE(reloc3.scattered); - EXPECT_EQ((int)reloc3.type, (int)llvm::MachO::ARM_RELOC_PAIR); - EXPECT_EQ(reloc3.length, 3); - EXPECT_FALSE(reloc3.pcRel); - EXPECT_EQ(reloc3.symbol, 0U); - EXPECT_EQ((int)(reloc3.value), 0xc); - const Relocation& reloc4 = sect1.relocations[3]; - EXPECT_EQ(reloc4.offset, 0x2U); - EXPECT_TRUE(reloc4.scattered); - EXPECT_EQ((int)reloc4.type, (int)llvm::MachO::ARM_RELOC_HALF_SECTDIFF); - EXPECT_EQ(reloc4.length, 2); - EXPECT_FALSE(reloc4.pcRel); - EXPECT_EQ(reloc4.symbol, 0U); - EXPECT_EQ((int)(reloc4.value), 0x16); - const Relocation& reloc5 = sect1.relocations[4]; - EXPECT_EQ(reloc5.offset, 0x0U); - EXPECT_TRUE(reloc5.scattered); - EXPECT_EQ((int)reloc5.type, (int)llvm::MachO::ARM_RELOC_PAIR); - EXPECT_EQ(reloc5.length, 2); - EXPECT_FALSE(reloc5.pcRel); - EXPECT_EQ(reloc5.symbol, 0U); - EXPECT_EQ((int)(reloc5.value), 0xc); - - const Section& sect2 = f->sections[1]; - EXPECT_TRUE(sect2.segmentName.equals("__TEXT")); - EXPECT_TRUE(sect2.sectionName.equals("__cstring")); - EXPECT_EQ((uint32_t)(sect2.type), (uint32_t)(llvm::MachO::S_CSTRING_LITERALS)); - EXPECT_EQ((uint32_t)(sect2.attributes), 0U); - EXPECT_EQ((uint16_t)sect2.alignment, 1U); - EXPECT_EQ((uint64_t)sect2.address, 0x016ULL); - EXPECT_EQ(sect2.content.size(), 7UL); - EXPECT_EQ((int)(sect2.content[0]), 0x68); - EXPECT_EQ((int)(sect2.content[1]), 0x65); - EXPECT_EQ((int)(sect2.content[2]), 0x6c); - - EXPECT_EQ(f->globalSymbols.size(), 1UL); - const Symbol& sym1 = f->globalSymbols[0]; - EXPECT_TRUE(sym1.name.equals("_main")); - EXPECT_EQ((int)(sym1.type), llvm::MachO::N_SECT); - EXPECT_EQ((int)(sym1.scope), llvm::MachO::N_EXT); - EXPECT_EQ(sym1.sect, 1); - EXPECT_EQ((int)(sym1.desc), (int)(llvm::MachO::N_ARM_THUMB_DEF)); - EXPECT_EQ((uint64_t)sym1.value, 0x0ULL); - EXPECT_EQ(f->undefinedSymbols.size(), 1UL); - const Symbol& sym2 = f->undefinedSymbols[0]; - EXPECT_TRUE(sym2.name.equals("_printf")); - EXPECT_EQ((int)(sym2.type), llvm::MachO::N_UNDF); - EXPECT_EQ((int)(sym2.scope), 0); - EXPECT_EQ(sym2.sect, 0); - EXPECT_EQ((int)(sym2.desc), 0); - EXPECT_EQ((uint64_t)sym2.value, 0x0ULL); -} diff --git a/lld/unittests/MachOTests/empty_obj_x86_armv7.txt b/lld/unittests/MachOTests/empty_obj_x86_armv7.txt deleted file mode 100644 index 9d340cb7132e..000000000000 --- a/lld/unittests/MachOTests/empty_obj_x86_armv7.txt +++ /dev/null @@ -1,1272 +0,0 @@ -0xca, 0xfe, 0xba, 0xbe, 0x00, 0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x07, 0x00, -0x00, 0x00, 0x03, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0xb8, 0x00, 0x00, -0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x40, -0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xcf, 0xfa, 0xed, 0xfe, 0x07, 0x00, 0x00, 0x01, 0x03, 0x00, 0x00, 0x00, -0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, 0x00, -0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x98, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb8, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, -0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x5f, 0x5f, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb8, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0xce, 0xfa, 0xed, 0xfe, 0x0c, 0x00, 0x00, 0x00, 0x09, -0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x7c, 0x00, -0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x98, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, -0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x74, -0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x5f, 0x5f, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x98, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxOptional.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxOptional.cpp deleted file mode 100644 index c0c819632851..000000000000 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxOptional.cpp +++ /dev/null @@ -1,84 +0,0 @@ -//===-- LibCxxOptional.cpp ------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "LibCxx.h" -#include "lldb/DataFormatters/FormattersHelpers.h" - -using namespace lldb; -using namespace lldb_private; - -namespace { - -class OptionalFrontEnd : public SyntheticChildrenFrontEnd { -public: - OptionalFrontEnd(ValueObject &valobj) : SyntheticChildrenFrontEnd(valobj) { - Update(); - } - - size_t GetIndexOfChildWithName(ConstString name) override { - return formatters::ExtractIndexFromString(name.GetCString()); - } - - bool MightHaveChildren() override { return true; } - bool Update() override; - size_t CalculateNumChildren() override { return m_has_value ? 1U : 0U; } - ValueObjectSP GetChildAtIndex(size_t idx) override; - -private: - /// True iff the option contains a value. - bool m_has_value = false; -}; -} // namespace - -bool OptionalFrontEnd::Update() { - ValueObjectSP engaged_sp( - m_backend.GetChildMemberWithName(ConstString("__engaged_"), true)); - - if (!engaged_sp) - return false; - - // __engaged_ is a bool flag and is true if the optional contains a value. - // Converting it to unsigned gives us a size of 1 if it contains a value - // and 0 if not. - m_has_value = engaged_sp->GetValueAsUnsigned(0) == 1; - - return false; -} - -ValueObjectSP OptionalFrontEnd::GetChildAtIndex(size_t idx) { - if (!m_has_value) - return ValueObjectSP(); - - // __val_ contains the underlying value of an optional if it has one. - // Currently because it is part of an anonymous union GetChildMemberWithName() - // does not peer through and find it unless we are at the parent itself. - // We can obtain the parent through __engaged_. - ValueObjectSP val_sp( - m_backend.GetChildMemberWithName(ConstString("__engaged_"), true) - ->GetParent() - ->GetChildAtIndex(0, true) - ->GetChildMemberWithName(ConstString("__val_"), true)); - - if (!val_sp) - return ValueObjectSP(); - - CompilerType holder_type = val_sp->GetCompilerType(); - - if (!holder_type) - return ValueObjectSP(); - - return val_sp->Clone(ConstString("Value")); -} - -SyntheticChildrenFrontEnd * -formatters::LibcxxOptionalFrontEndCreator(CXXSyntheticChildren *, - lldb::ValueObjectSP valobj_sp) { - if (valobj_sp) - return new OptionalFrontEnd(*valobj_sp); - return nullptr; -} diff --git a/lldb/source/lldb.cpp b/lldb/source/lldb.cpp deleted file mode 100644 index 371902f6c1b5..000000000000 --- a/lldb/source/lldb.cpp +++ /dev/null @@ -1,77 +0,0 @@ -//===-- lldb.cpp ----------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "VCSVersion.inc" -#include "lldb/lldb-private.h" -#include "clang/Basic/Version.h" - -using namespace lldb; -using namespace lldb_private; - -// LLDB_VERSION_STRING is set through a define so unlike the other defines -// expanded with CMake, it lacks the double quotes. -#define QUOTE(str) #str -#define EXPAND_AND_QUOTE(str) QUOTE(str) - -static const char *GetLLDBVersion() { -#ifdef LLDB_VERSION_STRING - return EXPAND_AND_QUOTE(LLDB_VERSION_STRING); -#else - return "lldb version " CLANG_VERSION_STRING; -#endif -} - -static const char *GetLLDBRevision() { -#ifdef LLDB_REVISION - return LLDB_REVISION; -#else - return NULL; -#endif -} - -static const char *GetLLDBRepository() { -#ifdef LLDB_REPOSITORY - return LLDB_REPOSITORY; -#else - return NULL; -#endif -} - -const char *lldb_private::GetVersion() { - static std::string g_version_str; - if (g_version_str.empty()) { - const char *lldb_version = GetLLDBVersion(); - const char *lldb_repo = GetLLDBRepository(); - const char *lldb_rev = GetLLDBRevision(); - g_version_str += lldb_version; - if (lldb_repo || lldb_rev) { - g_version_str += " ("; - if (lldb_repo) - g_version_str += lldb_repo; - if (lldb_repo && lldb_rev) - g_version_str += " "; - if (lldb_rev) { - g_version_str += "revision "; - g_version_str += lldb_rev; - } - g_version_str += ")"; - } - - std::string clang_rev(clang::getClangRevision()); - if (clang_rev.length() > 0) { - g_version_str += "\n clang revision "; - g_version_str += clang_rev; - } - std::string llvm_rev(clang::getLLVMRevision()); - if (llvm_rev.length() > 0) { - g_version_str += "\n llvm revision "; - g_version_str += llvm_rev; - } - } - return g_version_str.c_str(); -} diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/Makefile deleted file mode 100644 index c5df567e01a2..000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -CXX_SOURCES := main.cpp - -USE_LIBCPP := 1 - -include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/TestDataFormatterLibcxxDeque.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/TestDataFormatterLibcxxDeque.py deleted file mode 100644 index b9949288c989..000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/TestDataFormatterLibcxxDeque.py +++ /dev/null @@ -1,25 +0,0 @@ -import lldb -from lldbsuite.test.decorators import * -from lldbsuite.test.lldbtest import * -from lldbsuite.test import lldbutil - - -class LibcxxDequeDataFormatterTestCase(TestBase): - - mydir = TestBase.compute_mydir(__file__) - - @add_test_categories(["libc++"]) - def test(self): - self.build() - lldbutil.run_to_source_breakpoint(self, "break here", - lldb.SBFileSpec("main.cpp")) - - self.expect_expr("empty", result_children=[]) - self.expect_expr("deque_1", result_children=[ - ValueCheck(name="[0]", value="1"), - ]) - self.expect_expr("deque_3", result_children=[ - ValueCheck(name="[0]", value="3"), - ValueCheck(name="[1]", value="1"), - ValueCheck(name="[2]", value="2") - ]) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/main.cpp deleted file mode 100644 index 43c3f374a0f9..000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/deque/main.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include - -int main() { - std::deque empty; - std::deque deque_1 = {1}; - std::deque deque_3 = {3, 1, 2}; - return empty.size() + deque_1.front() + deque_3.front(); // break here -} diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/Makefile deleted file mode 100644 index 564cbada74e0..000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -CXX_SOURCES := main.cpp - -USE_LIBCPP := 1 - -CXXFLAGS_EXTRAS := -O0 -include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/TestDataFormatterLibcxxList.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/TestDataFormatterLibcxxList.py deleted file mode 100644 index 8de749d74f03..000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/TestDataFormatterLibcxxList.py +++ /dev/null @@ -1,218 +0,0 @@ -""" -Test lldb data formatter subsystem. -""" - - - -import lldb -from lldbsuite.test.decorators import * -from lldbsuite.test.lldbtest import * -from lldbsuite.test import lldbutil - - -class LibcxxListDataFormatterTestCase(TestBase): - - mydir = TestBase.compute_mydir(__file__) - - def setUp(self): - # Call super's setUp(). - TestBase.setUp(self) - # Find the line number to break at. - self.line = line_number('main.cpp', '// Set break point at this line.') - self.line2 = line_number('main.cpp', - '// Set second break point at this line.') - self.line3 = line_number('main.cpp', - '// Set third break point at this line.') - self.line4 = line_number('main.cpp', - '// Set fourth break point at this line.') - - @add_test_categories(["libc++"]) - def test_with_run_command(self): - """Test that that file and class static variables display correctly.""" - self.build() - self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) - - lldbutil.run_break_set_by_file_and_line( - self, "main.cpp", self.line, num_expected_locations=-1) - lldbutil.run_break_set_by_file_and_line( - self, "main.cpp", self.line2, num_expected_locations=-1) - lldbutil.run_break_set_by_file_and_line( - self, "main.cpp", self.line3, num_expected_locations=-1) - lldbutil.run_break_set_by_file_and_line( - self, "main.cpp", self.line4, num_expected_locations=-1) - - self.runCmd("run", RUN_SUCCEEDED) - - lldbutil.skip_if_library_missing( - self, self.target(), lldbutil.PrintableRegex("libc\+\+")) - - # The stop reason of the thread should be breakpoint. - self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT, - substrs=['stopped', - 'stop reason = breakpoint']) - - # This is the function to remove the custom formats in order to have a - # clean slate for the next test case. - def cleanup(): - self.runCmd('type format clear', check=False) - self.runCmd('type summary clear', check=False) - self.runCmd('type filter clear', check=False) - self.runCmd('type synth clear', check=False) - self.runCmd( - "settings set target.max-children-count 256", - check=False) - - # Execute the cleanup function during test case tear down. - self.addTearDownHook(cleanup) - - self.runCmd("frame variable numbers_list --show-types") - self.runCmd( - "type summary add std::int_list std::string_list int_list string_list --summary-string \"list has ${svar%#} items\" -e") - self.runCmd("type format add -f hex int") - - self.expect("frame variable numbers_list --raw", matching=False, - substrs=['list has 0 items', - '{}']) - - self.expect("frame variable numbers_list", - substrs=['list has 0 items', - '{}']) - - self.expect("p numbers_list", - substrs=['list has 0 items', - '{}']) - - self.runCmd("n") # This gets up past the printf - self.runCmd("n") # Now advance over the first push_back. - - self.expect("frame variable numbers_list", - substrs=['list has 1 items', - '[0] = ', - '0x12345678']) - - self.runCmd("n") - self.runCmd("n") - self.runCmd("n") - - self.expect("frame variable numbers_list", - substrs=['list has 4 items', - '[0] = ', - '0x12345678', - '[1] =', - '0x11223344', - '[2] =', - '0xbeeffeed', - '[3] =', - '0x00abba00']) - - self.runCmd("n") - self.runCmd("n") - - self.expect("frame variable numbers_list", - substrs=['list has 6 items', - '[0] = ', - '0x12345678', - '0x11223344', - '0xbeeffeed', - '0x00abba00', - '[4] =', - '0x0abcdef0', - '[5] =', - '0x0cab0cab']) - - self.expect("p numbers_list", - substrs=['list has 6 items', - '[0] = ', - '0x12345678', - '0x11223344', - '0xbeeffeed', - '0x00abba00', - '[4] =', - '0x0abcdef0', - '[5] =', - '0x0cab0cab']) - - # check access-by-index - self.expect("frame variable numbers_list[0]", - substrs=['0x12345678']) - self.expect("frame variable numbers_list[1]", - substrs=['0x11223344']) - - self.runCmd("n") - - self.expect("frame variable numbers_list", - substrs=['list has 0 items', - '{}']) - - self.runCmd("n") - self.runCmd("n") - self.runCmd("n") - self.runCmd("n") - - self.expect("frame variable numbers_list", - substrs=['list has 4 items', - '[0] = ', '1', - '[1] = ', '2', - '[2] = ', '3', - '[3] = ', '4']) - - ListPtr = self.frame().FindVariable("list_ptr") - self.assertTrue(ListPtr.GetChildAtIndex( - 0).GetValueAsUnsigned(0) == 1, "[0] = 1") - - # check that MightHaveChildren() gets it right - self.assertTrue( - self.frame().FindVariable("numbers_list").MightHaveChildren(), - "numbers_list.MightHaveChildren() says False for non empty!") - - self.runCmd("type format delete int") - - self.runCmd("c") - - self.expect("frame variable text_list", - substrs=['list has 3 items', - '[0]', 'goofy', - '[1]', 'is', - '[2]', 'smart']) - - # check that MightHaveChildren() gets it right - self.assertTrue( - self.frame().FindVariable("text_list").MightHaveChildren(), - "text_list.MightHaveChildren() says False for non empty!") - - self.expect("p text_list", - substrs=['list has 3 items', - '\"goofy\"', - '\"is\"', - '\"smart\"']) - - self.runCmd("n") # This gets us past the printf - self.runCmd("n") - self.runCmd("n") - - # check access-by-index - self.expect("frame variable text_list[0]", - substrs=['goofy']) - self.expect("frame variable text_list[3]", - substrs=['!!!']) - - self.runCmd("continue") - - # check that the list provider correctly updates if elements move - countingList = self.frame().FindVariable("countingList") - countingList.SetPreferDynamicValue(True) - countingList.SetPreferSyntheticValue(True) - - self.assertTrue(countingList.GetChildAtIndex( - 0).GetValueAsUnsigned(0) == 3141, "list[0] == 3141") - self.assertTrue(countingList.GetChildAtIndex( - 1).GetValueAsUnsigned(0) == 3141, "list[1] == 3141") - - self.runCmd("continue") - - self.assertEqual( - countingList.GetChildAtIndex(0).GetValueAsUnsigned(0), 3141, - "uniqued list[0] == 3141") - self.assertEqual( - countingList.GetChildAtIndex(1).GetValueAsUnsigned(0), 3142, - "uniqued list[1] == 3142") diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/Makefile deleted file mode 100644 index 564cbada74e0..000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -CXX_SOURCES := main.cpp - -USE_LIBCPP := 1 - -CXXFLAGS_EXTRAS := -O0 -include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/TestDataFormatterLibcxxListLoop.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/TestDataFormatterLibcxxListLoop.py deleted file mode 100644 index 1678c513e50b..000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/TestDataFormatterLibcxxListLoop.py +++ /dev/null @@ -1,69 +0,0 @@ -""" -Test that the debugger handles loops in std::list (which can appear as a result of e.g. memory -corruption). -""" - - - -import lldb -from lldbsuite.test.decorators import * -from lldbsuite.test.lldbtest import * -from lldbsuite.test import lldbutil - - -class LibcxxListDataFormatterTestCase(TestBase): - - mydir = TestBase.compute_mydir(__file__) - NO_DEBUG_INFO_TESTCASE = True - - @add_test_categories(["libc++"]) - @expectedFailureAndroid(bugnumber="llvm.org/pr32592") - def test_with_run_command(self): - self.build() - exe = self.getBuildArtifact("a.out") - target = self.dbg.CreateTarget(exe) - self.assertTrue(target and target.IsValid(), "Target is valid") - - file_spec = lldb.SBFileSpec("main.cpp", False) - breakpoint1 = target.BreakpointCreateBySourceRegex( - '// Set break point at this line.', file_spec) - self.assertTrue(breakpoint1 and breakpoint1.IsValid()) - breakpoint2 = target.BreakpointCreateBySourceRegex( - '// Set second break point at this line.', file_spec) - self.assertTrue(breakpoint2 and breakpoint2.IsValid()) - - # Run the program, it should stop at breakpoint 1. - process = target.LaunchSimple( - None, None, self.get_process_working_directory()) - self.assertTrue(process and process.IsValid(), PROCESS_IS_VALID) - self.assertEqual( - len(lldbutil.get_threads_stopped_at_breakpoint(process, breakpoint1)), 1) - - # verify our list is displayed correctly - self.expect( - "frame variable *numbers_list", - substrs=[ - '[0] = 1', - '[1] = 2', - '[2] = 3', - '[3] = 4', - '[5] = 6']) - - # Continue to breakpoint 2. - process.Continue() - self.assertTrue(process and process.IsValid(), PROCESS_IS_VALID) - self.assertEqual( - len(lldbutil.get_threads_stopped_at_breakpoint(process, breakpoint2)), 1) - - # The list is now inconsistent. However, we should be able to get the first three - # elements at least (and most importantly, not crash). - self.expect( - "frame variable *numbers_list", - substrs=[ - '[0] = 1', - '[1] = 2', - '[2] = 3']) - - # Run to completion. - process.Continue() - self.assertEqual(process.GetState(), lldb.eStateExited, PROCESS_EXITED) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/main.cpp deleted file mode 100644 index e07e93838b9f..000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/loop/main.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// Evil hack: To simulate memory corruption, we want to fiddle with some internals of std::list. -// Make those accessible to us. -#define private public -#define protected public - -#include -#include -#include - -typedef std::list int_list; - -int main() -{ -#ifdef LLDB_USING_LIBCPP - int_list *numbers_list = new int_list{1,2,3,4,5,6,7,8,9,10}; - - printf("// Set break point at this line."); - -#if _LIBCPP_VERSION >= 3800 - auto *third_elem = numbers_list->__end_.__next_->__next_->__next_; - assert(third_elem->__as_node()->__value_ == 3); - auto *fifth_elem = third_elem->__next_->__next_; - assert(fifth_elem->__as_node()->__value_ == 5); -#else - auto *third_elem = numbers_list->__end_.__next_->__next_->__next_; - assert(third_elem->__value_ == 3); - auto *fifth_elem = third_elem->__next_->__next_; - assert(fifth_elem->__value_ == 5); -#endif - fifth_elem->__next_ = third_elem; -#endif - - // Any attempt to free the list will probably crash the program. Let's just leak it. - return 0; // Set second break point at this line. -} diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/main.cpp deleted file mode 100644 index a3ef06b18e74..000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/list/main.cpp +++ /dev/null @@ -1,44 +0,0 @@ -#include -#include -#include - -typedef std::list int_list; -typedef std::list string_list; - -int main() -{ - int_list numbers_list; - std::list* list_ptr = &numbers_list; - - printf("// Set break point at this line."); - (numbers_list.push_back(0x12345678)); - (numbers_list.push_back(0x11223344)); - (numbers_list.push_back(0xBEEFFEED)); - (numbers_list.push_back(0x00ABBA00)); - (numbers_list.push_back(0x0ABCDEF0)); - (numbers_list.push_back(0x0CAB0CAB)); - - numbers_list.clear(); - - (numbers_list.push_back(1)); - (numbers_list.push_back(2)); - (numbers_list.push_back(3)); - (numbers_list.push_back(4)); - - string_list text_list; - (text_list.push_back(std::string("goofy"))); - (text_list.push_back(std::string("is"))); - (text_list.push_back(std::string("smart"))); - - printf("// Set second break point at this line."); - (text_list.push_back(std::string("!!!"))); - - std::list countingList = {3141, 3142, 3142,3142,3142, 3142, 3142, 3141}; - countingList.sort(); - printf("// Set third break point at this line."); - countingList.unique(); - printf("// Set fourth break point at this line."); - countingList.size(); - - return 0; -} diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/Makefile deleted file mode 100644 index 23496eb20657..000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -CXX_SOURCES := main.cpp - -USE_LIBCPP := 1 - -CXXFLAGS_EXTRAS := -std=c++17 -fno-exceptions -include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/TestDataFormatterLibcxxOptional.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/TestDataFormatterLibcxxOptional.py deleted file mode 100644 index 27c8d7f474ed..000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/TestDataFormatterLibcxxOptional.py +++ /dev/null @@ -1,73 +0,0 @@ -""" -Test lldb data formatter subsystem. -""" - - - -import lldb -from lldbsuite.test.decorators import * -from lldbsuite.test.lldbtest import * -from lldbsuite.test import lldbutil - - -class LibcxxOptionalDataFormatterTestCase(TestBase): - - mydir = TestBase.compute_mydir(__file__) - - @add_test_categories(["libc++"]) - ## Clang 7.0 is the oldest Clang that can reliably parse newer libc++ versions - ## with -std=c++17. - @skipIf(oslist=no_match(["macosx"]), compiler="clang", compiler_version=['<', '7.0']) - ## We are skipping gcc version less that 5.1 since this test requires -std=c++17 - @skipIf(compiler="gcc", compiler_version=['<', '5.1']) - - def test_with_run_command(self): - """Test that that file and class static variables display correctly.""" - self.build() - self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) - - bkpt = self.target().FindBreakpointByID( - lldbutil.run_break_set_by_source_regexp( - self, "break here")) - - self.runCmd("run", RUN_SUCCEEDED) - - # The stop reason of the thread should be breakpoint. - self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT, - substrs=['stopped', - 'stop reason = breakpoint']) - - self.runCmd( "frame variable has_optional" ) - - output = self.res.GetOutput() - - ## The variable has_optional tells us if the test program - ## detected we have a sufficient libc++ version to support optional - ## false means we do not and therefore should skip the test - if output.find("(bool) has_optional = false") != -1 : - self.skipTest( "Optional not supported" ) - - lldbutil.continue_to_breakpoint(self.process(), bkpt) - - self.expect("frame variable number_not_engaged", - substrs=['Has Value=false']) - - self.expect("frame variable number_engaged", - substrs=['Has Value=true', - 'Value = 42', - '}']) - - self.expect("frame var numbers", - substrs=['(optional_int_vect) numbers = Has Value=true {', - 'Value = size=4 {', - '[0] = 1', - '[1] = 2', - '[2] = 3', - '[3] = 4', - '}', - '}']) - - self.expect("frame var ostring", - substrs=['(optional_string) ostring = Has Value=true {', - 'Value = "hello"', - '}']) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/main.cpp deleted file mode 100644 index 16bb98c61056..000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/optional/main.cpp +++ /dev/null @@ -1,42 +0,0 @@ -#include -#include -#include - -// If we have libc++ 4.0 or greater we should have -// According to libc++ C++1z status page https://libcxx.llvm.org/cxx1z_status.html -#if _LIBCPP_VERSION >= 4000 -#include -#define HAVE_OPTIONAL 1 -#else -#define HAVE_OPTIONAL 0 -#endif - - -int main() -{ - bool has_optional = HAVE_OPTIONAL ; - - printf( "%d\n", has_optional ) ; // break here - -#if HAVE_OPTIONAL == 1 - using int_vect = std::vector ; - using optional_int = std::optional ; - using optional_int_vect = std::optional ; - using optional_string = std::optional ; - - optional_int number_not_engaged ; - optional_int number_engaged = 42 ; - - printf( "%d\n", *number_engaged) ; - - optional_int_vect numbers{{1,2,3,4}} ; - - printf( "%d %d\n", numbers.value()[0], numbers.value()[1] ) ; - - optional_string ostring = "hello" ; - - printf( "%s\n", ostring->c_str() ) ; -#endif - - return 0; // break here -} diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/Makefile deleted file mode 100644 index 913a52fb191c..000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -CXX_SOURCES := main.cpp - -# Work around "exception specification in declaration does not match previous -# declaration" errors present in older libc++ releases. This error was fixed in -# the 3.8 release. -CFLAGS_EXTRAS := -fno-exceptions - -USE_LIBCPP := 1 -include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/TestDataFormatterUnordered.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/TestDataFormatterUnordered.py deleted file mode 100644 index 3519daec6ec4..000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/TestDataFormatterUnordered.py +++ /dev/null @@ -1,78 +0,0 @@ -""" -Test lldb data formatter subsystem. -""" - - - -import lldb -from lldbsuite.test.decorators import * -from lldbsuite.test.lldbtest import * -from lldbsuite.test import lldbutil - - -class LibcxxUnorderedDataFormatterTestCase(TestBase): - - mydir = TestBase.compute_mydir(__file__) - - def setUp(self): - TestBase.setUp(self) - self.namespace = 'std' - - @add_test_categories(["libc++"]) - def test_with_run_command(self): - self.build() - self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) - - lldbutil.run_break_set_by_source_regexp( - self, "Set break point at this line.") - - self.runCmd("run", RUN_SUCCEEDED) - - # The stop reason of the thread should be breakpoint. - self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT, - substrs=['stopped', - 'stop reason = breakpoint']) - - # This is the function to remove the custom formats in order to have a - # clean slate for the next test case. - def cleanup(): - self.runCmd('type format clear', check=False) - self.runCmd('type summary clear', check=False) - self.runCmd('type filter clear', check=False) - self.runCmd('type synth clear', check=False) - self.runCmd( - "settings set target.max-children-count 256", - check=False) - - # Execute the cleanup function during test case tear down. - self.addTearDownHook(cleanup) - - ns = self.namespace - self.look_for_content_and_continue( - "map", ['%s::unordered_map' % - ns, 'size=5 {', 'hello', 'world', 'this', 'is', 'me']) - - self.look_for_content_and_continue( - "mmap", ['%s::unordered_multimap' % ns, 'size=6 {', 'first = 3', 'second = "this"', - 'first = 2', 'second = "hello"']) - - self.look_for_content_and_continue( - "iset", ['%s::unordered_set' % - ns, 'size=5 {', '\[\d\] = 5', '\[\d\] = 3', '\[\d\] = 2']) - - self.look_for_content_and_continue( - "sset", ['%s::unordered_set' % ns, 'size=5 {', '\[\d\] = "is"', '\[\d\] = "world"', - '\[\d\] = "hello"']) - - self.look_for_content_and_continue( - "imset", ['%s::unordered_multiset' % ns, 'size=6 {', '(\[\d\] = 3(\\n|.)+){3}', - '\[\d\] = 2', '\[\d\] = 1']) - - self.look_for_content_and_continue( - "smset", ['%s::unordered_multiset' % ns, 'size=5 {', '(\[\d\] = "is"(\\n|.)+){2}', - '(\[\d\] = "world"(\\n|.)+){2}']) - - def look_for_content_and_continue(self, var_name, patterns): - self.expect(("frame variable %s" % var_name), patterns=patterns) - self.expect(("frame variable %s" % var_name), patterns=patterns) - self.runCmd("continue") diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/main.cpp deleted file mode 100644 index 81a5763559d3..000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/unordered/main.cpp +++ /dev/null @@ -1,80 +0,0 @@ -#include -#include -#include - -using std::string; - -#define intstr_map std::unordered_map -#define intstr_mmap std::unordered_multimap - -#define int_set std::unordered_set -#define str_set std::unordered_set -#define int_mset std::unordered_multiset -#define str_mset std::unordered_multiset - -int g_the_foo = 0; - -int thefoo_rw(int arg = 1) -{ - if (arg < 0) - arg = 0; - if (!arg) - arg = 1; - g_the_foo += arg; - return g_the_foo; -} - -int main() -{ - intstr_map map; - map.emplace(1,"hello"); - map.emplace(2,"world"); - map.emplace(3,"this"); - map.emplace(4,"is"); - map.emplace(5,"me"); - thefoo_rw(); // Set break point at this line. - - intstr_mmap mmap; - mmap.emplace(1,"hello"); - mmap.emplace(2,"hello"); - mmap.emplace(2,"world"); - mmap.emplace(3,"this"); - mmap.emplace(3,"this"); - mmap.emplace(3,"this"); - thefoo_rw(); // Set break point at this line. - - int_set iset; - iset.emplace(1); - iset.emplace(2); - iset.emplace(3); - iset.emplace(4); - iset.emplace(5); - thefoo_rw(); // Set break point at this line. - - str_set sset; - sset.emplace("hello"); - sset.emplace("world"); - sset.emplace("this"); - sset.emplace("is"); - sset.emplace("me"); - thefoo_rw(); // Set break point at this line. - - int_mset imset; - imset.emplace(1); - imset.emplace(2); - imset.emplace(2); - imset.emplace(3); - imset.emplace(3); - imset.emplace(3); - thefoo_rw(); // Set break point at this line. - - str_mset smset; - smset.emplace("hello"); - smset.emplace("world"); - smset.emplace("world"); - smset.emplace("is"); - smset.emplace("is"); - thefoo_rw(); // Set break point at this line. - - return 0; -} diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/Makefile deleted file mode 100644 index c825977b1a5d..000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -CXX_SOURCES := main.cpp - -CFLAGS_EXTRAS := -O0 -USE_LIBSTDCPP := 1 - -include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/TestDataFormatterStdList.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/TestDataFormatterStdList.py deleted file mode 100644 index 03131ccfde2f..000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/TestDataFormatterStdList.py +++ /dev/null @@ -1,207 +0,0 @@ -""" -Test lldb data formatter subsystem. -""" - - - -import lldb -from lldbsuite.test.decorators import * -from lldbsuite.test.lldbtest import * -from lldbsuite.test import lldbutil - - -class StdListDataFormatterTestCase(TestBase): - - mydir = TestBase.compute_mydir(__file__) - - def setUp(self): - # Call super's setUp(). - TestBase.setUp(self) - # Find the line numbers to break at for the different tests. - self.line = line_number('main.cpp', '// Set break point at this line.') - self.optional_line = line_number( - 'main.cpp', '// Optional break point at this line.') - self.final_line = line_number( - 'main.cpp', '// Set final break point at this line.') - - @add_test_categories(["libstdcxx"]) - @expectedFailureAll(bugnumber="llvm.org/pr50861", compiler="gcc") - def test_with_run_command(self): - """Test that that file and class static variables display correctly.""" - self.build() - self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) - - lldbutil.run_break_set_by_file_and_line( - self, "main.cpp", self.line, num_expected_locations=-1) - - self.runCmd("run", RUN_SUCCEEDED) - - # The stop reason of the thread should be breakpoint. - self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT, - substrs=['stopped', - 'stop reason = breakpoint']) - - # This is the function to remove the custom formats in order to have a - # clean slate for the next test case. - def cleanup(): - self.runCmd('type format clear', check=False) - self.runCmd('type summary clear', check=False) - self.runCmd('type filter clear', check=False) - self.runCmd('type synth clear', check=False) - self.runCmd( - "settings set target.max-children-count 256", - check=False) - - # Execute the cleanup function during test case tear down. - self.addTearDownHook(cleanup) - - self.runCmd("frame variable numbers_list --show-types") - - self.runCmd("type format add -f hex int") - - self.expect("frame variable numbers_list --raw", matching=False, - substrs=['size=0', - '{}']) - self.expect( - "frame variable &numbers_list._M_impl._M_node --raw", - matching=False, - substrs=[ - 'size=0', - '{}']) - - self.expect("frame variable numbers_list", - substrs=['size=0', - '{}']) - - self.expect("p numbers_list", - substrs=['size=0', - '{}']) - - self.runCmd("n") - - self.expect("frame variable numbers_list", - substrs=['size=1', - '[0] = ', - '0x12345678']) - - self.runCmd("n") - self.runCmd("n") - self.runCmd("n") - - self.expect("frame variable numbers_list", - substrs=['size=4', - '[0] = ', - '0x12345678', - '[1] =', - '0x11223344', - '[2] =', - '0xbeeffeed', - '[3] =', - '0x00abba00']) - - self.runCmd("n") - self.runCmd("n") - - self.expect("frame variable numbers_list", - substrs=['size=6', - '[0] = ', - '0x12345678', - '0x11223344', - '0xbeeffeed', - '0x00abba00', - '[4] =', - '0x0abcdef0', - '[5] =', - '0x0cab0cab']) - - self.expect("p numbers_list", - substrs=['size=6', - '[0] = ', - '0x12345678', - '0x11223344', - '0xbeeffeed', - '0x00abba00', - '[4] =', - '0x0abcdef0', - '[5] =', - '0x0cab0cab']) - - # check access-by-index - self.expect("frame variable numbers_list[0]", - substrs=['0x12345678']) - self.expect("frame variable numbers_list[1]", - substrs=['0x11223344']) - - # but check that expression does not rely on us - self.expect("expression numbers_list[0]", matching=False, error=True, - substrs=['0x12345678']) - - # check that MightHaveChildren() gets it right - self.assertTrue( - self.frame().FindVariable("numbers_list").MightHaveChildren(), - "numbers_list.MightHaveChildren() says False for non empty!") - - self.runCmd("n") - - self.expect("frame variable numbers_list", - substrs=['size=0', - '{}']) - - self.runCmd("n") - self.runCmd("n") - self.runCmd("n") - self.runCmd("n") - - self.expect("frame variable numbers_list", - substrs=['size=4', - '[0] = ', '1', - '[1] = ', '2', - '[2] = ', '3', - '[3] = ', '4']) - - self.runCmd("type format delete int") - - self.runCmd("n") - - self.expect("frame variable text_list", - substrs=['size=0', - '{}']) - - lldbutil.run_break_set_by_file_and_line( - self, "main.cpp", self.final_line, num_expected_locations=-1) - - self.runCmd("c", RUN_SUCCEEDED) - - # The stop reason of the thread should be breakpoint. - self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT, - substrs=['stopped', - 'stop reason = breakpoint']) - - self.expect("frame variable text_list", - substrs=['size=4', - '[0]', 'goofy', - '[1]', 'is', - '[2]', 'smart', - '[3]', '!!!']) - - self.expect("p text_list", - substrs=['size=4', - '\"goofy\"', - '\"is\"', - '\"smart\"', - '\"!!!\"']) - - # check access-by-index - self.expect("frame variable text_list[0]", - substrs=['goofy']) - self.expect("frame variable text_list[3]", - substrs=['!!!']) - - # but check that expression does not rely on us - self.expect("expression text_list[0]", matching=False, error=True, - substrs=['goofy']) - - # check that MightHaveChildren() gets it right - self.assertTrue( - self.frame().FindVariable("text_list").MightHaveChildren(), - "text_list.MightHaveChildren() says False for non empty!") diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/main.cpp deleted file mode 100644 index 191acdcc97be..000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/list/main.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include -#include - -typedef std::list int_list; -typedef std::list string_list; - -int main() -{ - int_list numbers_list; - - numbers_list.push_back(0x12345678); // Set break point at this line. - numbers_list.push_back(0x11223344); - numbers_list.push_back(0xBEEFFEED); - numbers_list.push_back(0x00ABBA00); - numbers_list.push_back(0x0ABCDEF0); - numbers_list.push_back(0x0CAB0CAB); - - numbers_list.clear(); - - numbers_list.push_back(1); - numbers_list.push_back(2); - numbers_list.push_back(3); - numbers_list.push_back(4); - - string_list text_list; - text_list.push_back(std::string("goofy")); // Optional break point at this line. - text_list.push_back(std::string("is")); - text_list.push_back(std::string("smart")); - - text_list.push_back(std::string("!!!")); - - return 0; // Set final break point at this line. -} - diff --git a/lldb/test/API/functionalities/gdb_remote_client/gdbclientutils.py b/lldb/test/API/functionalities/gdb_remote_client/gdbclientutils.py deleted file mode 100644 index a1ab7ab052e2..000000000000 --- a/lldb/test/API/functionalities/gdb_remote_client/gdbclientutils.py +++ /dev/null @@ -1,717 +0,0 @@ -import ctypes -import errno -import io -import os -import os.path -import threading -import socket -import lldb -import binascii -import traceback -from lldbsuite.support import seven -from lldbsuite.test.lldbtest import * -from lldbsuite.test import lldbtest_config - - -def checksum(message): - """ - Calculate the GDB server protocol checksum of the message. - - The GDB server protocol uses a simple modulo 256 sum. - """ - check = 0 - for c in message: - check += ord(c) - return check % 256 - - -def frame_packet(message): - """ - Create a framed packet that's ready to send over the GDB connection - channel. - - Framing includes surrounding the message between $ and #, and appending - a two character hex checksum. - """ - return "$%s#%02x" % (message, checksum(message)) - - -def escape_binary(message): - """ - Escape the binary message using the process described in the GDB server - protocol documentation. - - Most bytes are sent through as-is, but $, #, and { are escaped by writing - a { followed by the original byte mod 0x20. - """ - out = "" - for c in message: - d = ord(c) - if d in (0x23, 0x24, 0x7d): - out += chr(0x7d) - out += chr(d ^ 0x20) - else: - out += c - return out - - -def hex_encode_bytes(message): - """ - Encode the binary message by converting each byte into a two-character - hex string. - """ - out = "" - for c in message: - out += "%02x" % ord(c) - return out - - -def hex_decode_bytes(hex_bytes): - """ - Decode the hex string into a binary message by converting each two-character - hex string into a single output byte. - """ - out = "" - hex_len = len(hex_bytes) - while i < hex_len - 1: - out += chr(int(hex_bytes[i:i + 2]), 16) - i += 2 - return out - - -class MockGDBServerResponder: - """ - A base class for handling client packets and issuing server responses for - GDB tests. - - This handles many typical situations, while still allowing subclasses to - completely customize their responses. - - Most subclasses will be interested in overriding the other() method, which - handles any packet not recognized in the common packet handling code. - """ - - registerCount = 40 - packetLog = None - - def __init__(self): - self.packetLog = [] - - def respond(self, packet): - """ - Return the unframed packet data that the server should issue in response - to the given packet received from the client. - """ - self.packetLog.append(packet) - if packet is MockGDBServer.PACKET_INTERRUPT: - return self.interrupt() - if packet == "c": - return self.cont() - if packet.startswith("vCont;c"): - return self.vCont(packet) - if packet[0] == "A": - return self.A(packet) - if packet[0] == "D": - return self.D(packet) - if packet[0] == "g": - return self.readRegisters() - if packet[0] == "G": - # Gxxxxxxxxxxx - # Gxxxxxxxxxxx;thread:1234; - return self.writeRegisters(packet[1:].split(';')[0]) - if packet[0] == "p": - regnum = packet[1:].split(';')[0] - return self.readRegister(int(regnum, 16)) - if packet[0] == "P": - register, value = packet[1:].split("=") - return self.writeRegister(int(register, 16), value) - if packet[0] == "m": - addr, length = [int(x, 16) for x in packet[1:].split(',')] - return self.readMemory(addr, length) - if packet[0] == "M": - location, encoded_data = packet[1:].split(":") - addr, length = [int(x, 16) for x in location.split(',')] - return self.writeMemory(addr, encoded_data) - if packet[0:7] == "qSymbol": - return self.qSymbol(packet[8:]) - if packet[0:10] == "qSupported": - return self.qSupported(packet[11:].split(";")) - if packet == "qfThreadInfo": - return self.qfThreadInfo() - if packet == "qsThreadInfo": - return self.qsThreadInfo() - if packet == "qC": - return self.qC() - if packet == "QEnableErrorStrings": - return self.QEnableErrorStrings() - if packet == "?": - return self.haltReason() - if packet == "s": - return self.haltReason() - if packet[0] == "H": - tid = packet[2:] - if "." in tid: - assert tid.startswith("p") - # TODO: do we want to do anything with PID? - tid = tid.split(".", 1)[1] - return self.selectThread(packet[1], int(tid, 16)) - if packet[0:6] == "qXfer:": - obj, read, annex, location = packet[6:].split(":") - offset, length = [int(x, 16) for x in location.split(',')] - data, has_more = self.qXferRead(obj, annex, offset, length) - if data is not None: - return self._qXferResponse(data, has_more) - return "" - if packet.startswith("vAttach;"): - pid = packet.partition(';')[2] - return self.vAttach(int(pid, 16)) - if packet[0] == "Z": - return self.setBreakpoint(packet) - if packet.startswith("qThreadStopInfo"): - threadnum = int (packet[15:], 16) - return self.threadStopInfo(threadnum) - if packet == "QThreadSuffixSupported": - return self.QThreadSuffixSupported() - if packet == "QListThreadsInStopReply": - return self.QListThreadsInStopReply() - if packet.startswith("qMemoryRegionInfo:"): - return self.qMemoryRegionInfo(int(packet.split(':')[1], 16)) - if packet == "qQueryGDBServer": - return self.qQueryGDBServer() - if packet == "qHostInfo": - return self.qHostInfo() - if packet == "qGetWorkingDir": - return self.qGetWorkingDir() - if packet == "qOffsets": - return self.qOffsets(); - if packet == "qsProcessInfo": - return self.qsProcessInfo() - if packet.startswith("qfProcessInfo"): - return self.qfProcessInfo(packet) - if packet.startswith("qPathComplete:"): - return self.qPathComplete() - if packet.startswith("vFile:"): - return self.vFile(packet) - if packet.startswith("vRun;"): - return self.vRun(packet) - if packet.startswith("qLaunchSuccess"): - return self.qLaunchSuccess() - if packet.startswith("QEnvironment:"): - return self.QEnvironment(packet) - if packet.startswith("QEnvironmentHexEncoded:"): - return self.QEnvironmentHexEncoded(packet) - if packet.startswith("qRegisterInfo"): - regnum = int(packet[len("qRegisterInfo"):], 16) - return self.qRegisterInfo(regnum) - if packet == "k": - return self.k() - - return self.other(packet) - - def qsProcessInfo(self): - return "E04" - - def qfProcessInfo(self, packet): - return "E04" - - def qGetWorkingDir(self): - return "2f" - - def qOffsets(self): - return "" - - def qHostInfo(self): - return "ptrsize:8;endian:little;" - - def qQueryGDBServer(self): - return "E04" - - def interrupt(self): - raise self.UnexpectedPacketException() - - def cont(self): - raise self.UnexpectedPacketException() - - def vCont(self, packet): - raise self.UnexpectedPacketException() - - def A(self, packet): - return "" - - def D(self, packet): - return "OK" - - def readRegisters(self): - return "00000000" * self.registerCount - - def readRegister(self, register): - return "00000000" - - def writeRegisters(self, registers_hex): - return "OK" - - def writeRegister(self, register, value_hex): - return "OK" - - def readMemory(self, addr, length): - return "00" * length - - def writeMemory(self, addr, data_hex): - return "OK" - - def qSymbol(self, symbol_args): - return "OK" - - def qSupported(self, client_supported): - return "qXfer:features:read+;PacketSize=3fff;QStartNoAckMode+" - - def qfThreadInfo(self): - return "l" - - def qsThreadInfo(self): - return "l" - - def qC(self): - return "QC0" - - def QEnableErrorStrings(self): - return "OK" - - def haltReason(self): - # SIGINT is 2, return type is 2 digit hex string - return "S02" - - def qXferRead(self, obj, annex, offset, length): - return None, False - - def _qXferResponse(self, data, has_more): - return "%s%s" % ("m" if has_more else "l", escape_binary(data)) - - def vAttach(self, pid): - raise self.UnexpectedPacketException() - - def selectThread(self, op, thread_id): - return "OK" - - def setBreakpoint(self, packet): - raise self.UnexpectedPacketException() - - def threadStopInfo(self, threadnum): - return "" - - def other(self, packet): - # empty string means unsupported - return "" - - def QThreadSuffixSupported(self): - return "" - - def QListThreadsInStopReply(self): - return "" - - def qMemoryRegionInfo(self, addr): - return "" - - def qPathComplete(self): - return "" - - def vFile(self, packet): - return "" - - def vRun(self, packet): - return "" - - def qLaunchSuccess(self): - return "" - - def QEnvironment(self, packet): - return "OK" - - def QEnvironmentHexEncoded(self, packet): - return "OK" - - def qRegisterInfo(self, num): - return "" - - def k(self): - return "" - - """ - Raised when we receive a packet for which there is no default action. - Override the responder class to implement behavior suitable for the test at - hand. - """ - class UnexpectedPacketException(Exception): - pass - - -class ServerSocket: - """ - A wrapper class for TCP or pty-based server. - """ - - def get_connect_address(self): - """Get address for the client to connect to.""" - - def get_connect_url(self): - """Get URL suitable for process connect command.""" - - def close_server(self): - """Close all resources used by the server.""" - - def accept(self): - """Accept a single client connection to the server.""" - - def close_connection(self): - """Close all resources used by the accepted connection.""" - - def recv(self): - """Receive a data packet from the connected client.""" - - def sendall(self, data): - """Send the data to the connected client.""" - - -class TCPServerSocket(ServerSocket): - def __init__(self): - family, type, proto, _, addr = socket.getaddrinfo( - "localhost", 0, proto=socket.IPPROTO_TCP)[0] - self._server_socket = socket.socket(family, type, proto) - self._connection = None - - self._server_socket.bind(addr) - self._server_socket.listen(1) - - def get_connect_address(self): - return "[{}]:{}".format(*self._server_socket.getsockname()) - - def get_connect_url(self): - return "connect://" + self.get_connect_address() - - def close_server(self): - self._server_socket.close() - - def accept(self): - assert self._connection is None - # accept() is stubborn and won't fail even when the socket is - # shutdown, so we'll use a timeout - self._server_socket.settimeout(30.0) - client, client_addr = self._server_socket.accept() - # The connected client inherits its timeout from self._socket, - # but we'll use a blocking socket for the client - client.settimeout(None) - self._connection = client - - def close_connection(self): - assert self._connection is not None - self._connection.close() - self._connection = None - - def recv(self): - assert self._connection is not None - return self._connection.recv(4096) - - def sendall(self, data): - assert self._connection is not None - return self._connection.sendall(data) - - -class PtyServerSocket(ServerSocket): - def __init__(self): - import pty - import tty - master, slave = pty.openpty() - tty.setraw(master) - self._master = io.FileIO(master, 'r+b') - self._slave = io.FileIO(slave, 'r+b') - - def get_connect_address(self): - libc = ctypes.CDLL(None) - libc.ptsname.argtypes = (ctypes.c_int,) - libc.ptsname.restype = ctypes.c_char_p - return libc.ptsname(self._master.fileno()).decode() - - def get_connect_url(self): - return "serial://" + self.get_connect_address() - - def close_server(self): - self._slave.close() - self._master.close() - - def recv(self): - try: - return self._master.read(4096) - except OSError as e: - # closing the pty results in EIO on Linux, convert it to EOF - if e.errno == errno.EIO: - return b'' - raise - - def sendall(self, data): - return self._master.write(data) - - -class MockGDBServer: - """ - A simple TCP-based GDB server that can test client behavior by receiving - commands and issuing custom-tailored responses. - - Responses are generated via the .responder property, which should be an - instance of a class based on MockGDBServerResponder. - """ - - responder = None - _socket = None - _thread = None - _receivedData = None - _receivedDataOffset = None - _shouldSendAck = True - - def __init__(self, socket_class): - self._socket_class = socket_class - self.responder = MockGDBServerResponder() - - def start(self): - self._socket = self._socket_class() - # Start a thread that waits for a client connection. - self._thread = threading.Thread(target=self._run) - self._thread.start() - - def stop(self): - self._socket.close_server() - self._thread.join() - self._thread = None - - def get_connect_address(self): - return self._socket.get_connect_address() - - def get_connect_url(self): - return self._socket.get_connect_url() - - def _run(self): - # For testing purposes, we only need to worry about one client - # connecting just one time. - try: - self._socket.accept() - except: - return - self._shouldSendAck = True - self._receivedData = "" - self._receivedDataOffset = 0 - data = None - while True: - try: - data = seven.bitcast_to_string(self._socket.recv()) - if data is None or len(data) == 0: - break - self._receive(data) - except Exception as e: - print("An exception happened when receiving the response from the gdb server. Closing the client...") - traceback.print_exc() - self._socket.close_connection() - break - - def _receive(self, data): - """ - Collects data, parses and responds to as many packets as exist. - Any leftover data is kept for parsing the next time around. - """ - self._receivedData += data - try: - packet = self._parsePacket() - while packet is not None: - self._handlePacket(packet) - packet = self._parsePacket() - except self.InvalidPacketException: - self._socket.close_connection() - - def _parsePacket(self): - """ - Reads bytes from self._receivedData, returning: - - a packet's contents if a valid packet is found - - the PACKET_ACK unique object if we got an ack - - None if we only have a partial packet - - Raises an InvalidPacketException if unexpected data is received - or if checksums fail. - - Once a complete packet is found at the front of self._receivedData, - its data is removed form self._receivedData. - """ - data = self._receivedData - i = self._receivedDataOffset - data_len = len(data) - if data_len == 0: - return None - if i == 0: - # If we're looking at the start of the received data, that means - # we're looking for the start of a new packet, denoted by a $. - # It's also possible we'll see an ACK here, denoted by a + - if data[0] == '+': - self._receivedData = data[1:] - return self.PACKET_ACK - if ord(data[0]) == 3: - self._receivedData = data[1:] - return self.PACKET_INTERRUPT - if data[0] == '$': - i += 1 - else: - raise self.InvalidPacketException( - "Unexpected leading byte: %s" % data[0]) - - # If we're looking beyond the start of the received data, then we're - # looking for the end of the packet content, denoted by a #. - # Note that we pick up searching from where we left off last time - while i < data_len and data[i] != '#': - i += 1 - - # If there isn't enough data left for a checksum, just remember where - # we left off so we can pick up there the next time around - if i > data_len - 3: - self._receivedDataOffset = i - return None - - # If we have enough data remaining for the checksum, extract it and - # compare to the packet contents - packet = data[1:i] - i += 1 - try: - check = int(data[i:i + 2], 16) - except ValueError: - raise self.InvalidPacketException("Checksum is not valid hex") - i += 2 - if check != checksum(packet): - raise self.InvalidPacketException( - "Checksum %02x does not match content %02x" % - (check, checksum(packet))) - # remove parsed bytes from _receivedData and reset offset so parsing - # can start on the next packet the next time around - self._receivedData = data[i:] - self._receivedDataOffset = 0 - return packet - - def _handlePacket(self, packet): - if packet is self.PACKET_ACK: - # Ignore ACKs from the client. For the future, we can consider - # adding validation code to make sure the client only sends ACKs - # when it's supposed to. - return - response = "" - # We'll handle the ack stuff here since it's not something any of the - # tests will be concerned about, and it'll get turned off quickly anyway. - if self._shouldSendAck: - self._socket.sendall(seven.bitcast_to_bytes('+')) - if packet == "QStartNoAckMode": - self._shouldSendAck = False - response = "OK" - elif self.responder is not None: - # Delegate everything else to our responder - response = self.responder.respond(packet) - # Handle packet framing since we don't want to bother tests with it. - if response is not None: - framed = frame_packet(response) - self._socket.sendall(seven.bitcast_to_bytes(framed)) - - PACKET_ACK = object() - PACKET_INTERRUPT = object() - - class InvalidPacketException(Exception): - pass - - -class GDBRemoteTestBase(TestBase): - """ - Base class for GDB client tests. - - This class will setup and start a mock GDB server for the test to use. - It also provides assertPacketLogContains, which simplifies the checking - of packets sent by the client. - """ - - NO_DEBUG_INFO_TESTCASE = True - mydir = TestBase.compute_mydir(__file__) - server = None - server_socket_class = TCPServerSocket - - def setUp(self): - TestBase.setUp(self) - self.server = MockGDBServer(socket_class=self.server_socket_class) - self.server.start() - - def tearDown(self): - # TestBase.tearDown will kill the process, but we need to kill it early - # so its client connection closes and we can stop the server before - # finally calling the base tearDown. - if self.process() is not None: - self.process().Kill() - self.server.stop() - TestBase.tearDown(self) - - def createTarget(self, yaml_path): - """ - Create a target by auto-generating the object based on the given yaml - instructions. - - This will track the generated object so it can be automatically removed - during tearDown. - """ - yaml_base, ext = os.path.splitext(yaml_path) - obj_path = self.getBuildArtifact(yaml_base) - self.yaml2obj(yaml_path, obj_path) - return self.dbg.CreateTarget(obj_path) - - def connect(self, target): - """ - Create a process by connecting to the mock GDB server. - - Includes assertions that the process was successfully created. - """ - listener = self.dbg.GetListener() - error = lldb.SBError() - process = target.ConnectRemote(listener, - self.server.get_connect_url(), "gdb-remote", error) - self.assertTrue(error.Success(), error.description) - self.assertTrue(process, PROCESS_IS_VALID) - return process - - def assertPacketLogContains(self, packets): - """ - Assert that the mock server's packet log contains the given packets. - - The packet log includes all packets sent by the client and received - by the server. This fuction makes it easy to verify that the client - sent the expected packets to the server. - - The check does not require that the packets be consecutive, but does - require that they are ordered in the log as they ordered in the arg. - """ - i = 0 - j = 0 - log = self.server.responder.packetLog - - while i < len(packets) and j < len(log): - if log[j] == packets[i]: - i += 1 - j += 1 - if i < len(packets): - self.fail(u"Did not receive: %s\nLast 10 packets:\n\t%s" % - (packets[i], u'\n\t'.join(log))) - - -class GDBPlatformClientTestBase(GDBRemoteTestBase): - """ - Base class for platform server clients. - - This class extends GDBRemoteTestBase by automatically connecting - via "platform connect" in the setUp() method. - """ - - def setUp(self): - super().setUp() - self.runCmd("platform select remote-gdb-server") - self.runCmd("platform connect " + self.server.get_connect_url()) - self.assertTrue(self.dbg.GetSelectedPlatform().IsConnected()) - - def tearDown(self): - self.dbg.GetSelectedPlatform().DisconnectRemote() - super().tearDown() diff --git a/lldb/test/API/functionalities/memory/read/Makefile b/lldb/test/API/functionalities/memory/read/Makefile deleted file mode 100644 index 99998b20bcb0..000000000000 --- a/lldb/test/API/functionalities/memory/read/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -CXX_SOURCES := main.cpp - -include Makefile.rules diff --git a/lldb/test/API/functionalities/memory/read/TestMemoryRead.py b/lldb/test/API/functionalities/memory/read/TestMemoryRead.py deleted file mode 100644 index ceea4ab2f067..000000000000 --- a/lldb/test/API/functionalities/memory/read/TestMemoryRead.py +++ /dev/null @@ -1,177 +0,0 @@ -""" -Test the 'memory read' command. -""" - -import lldb -import lldbsuite.test.lldbutil as lldbutil - -from lldbsuite.test.decorators import * -from lldbsuite.test.lldbtest import * - - -class MemoryReadTestCase(TestBase): - - mydir = TestBase.compute_mydir(__file__) - - def setUp(self): - # Call super's setUp(). - TestBase.setUp(self) - # Find the line number to break inside main(). - self.line = line_number('main.cpp', '// Set break point at this line.') - - def build_run_stop(self): - self.build() - exe = self.getBuildArtifact("a.out") - self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) - - # Break in main() after the variables are assigned values. - lldbutil.run_break_set_by_file_and_line(self, - "main.cpp", - self.line, - num_expected_locations=1, - loc_exact=True) - - self.runCmd("run", RUN_SUCCEEDED) - - # The stop reason of the thread should be breakpoint. - self.expect("thread list", - STOPPED_DUE_TO_BREAKPOINT, - substrs=['stopped', 'stop reason = breakpoint']) - - # The breakpoint should have a hit count of 1. - self.expect("breakpoint list -f", - BREAKPOINT_HIT_ONCE, - substrs=[' resolved, hit count = 1']) - - @no_debug_info_test - def test_memory_read(self): - """Test the 'memory read' command with plain and vector formats.""" - self.build_run_stop() - - # (lldb) memory read -f d -c 1 `&argc` - # 0x7fff5fbff9a0: 1 - self.runCmd("memory read -f d -c 1 `&argc`") - - # Find the starting address for variable 'argc' to verify later that the - # '--format uint32_t[] --size 4 --count 4' option increments the address - # correctly. - line = self.res.GetOutput().splitlines()[0] - items = line.split(':') - address = int(items[0], 0) - argc = int(items[1], 0) - self.assertGreater(address, 0) - self.assertEquals(argc, 1) - - # (lldb) memory read --format uint32_t[] --size 4 --count 4 `&argc` - # 0x7fff5fbff9a0: {0x00000001} - # 0x7fff5fbff9a4: {0x00000000} - # 0x7fff5fbff9a8: {0x0ec0bf27} - # 0x7fff5fbff9ac: {0x215db505} - self.runCmd( - "memory read --format uint32_t[] --size 4 --count 4 `&argc`") - lines = self.res.GetOutput().splitlines() - for i in range(4): - if i == 0: - # Verify that the printout for argc is correct. - self.assertEqual( - argc, int(lines[i].split(':')[1].strip(' {}'), 0)) - addr = int(lines[i].split(':')[0], 0) - # Verify that the printout for addr is incremented correctly. - self.assertEqual(addr, (address + i * 4)) - - # (lldb) memory read --format char[] --size 7 --count 1 `&my_string` - # 0x7fff5fbff990: {abcdefg} - self.expect( - "memory read --format char[] --size 7 --count 1 `&my_string`", - substrs=['abcdefg']) - - # (lldb) memory read --format 'hex float' --size 16 `&argc` - # 0x7fff5fbff5b0: error: unsupported byte size (16) for hex float - # format - self.expect( - "memory read --format 'hex float' --size 16 `&argc`", - substrs=['unsupported byte size (16) for hex float format']) - - self.expect( - "memory read --format 'float' --count 1 --size 8 `&my_double`", - substrs=['1234.']) - - # (lldb) memory read --format 'float' --count 1 --size 20 `&my_double` - # 0x7fff5fbff598: error: unsupported byte size (20) for float format - self.expect( - "memory read --format 'float' --count 1 --size 20 `&my_double`", - substrs=['unsupported byte size (20) for float format']) - - self.expect('memory read --type int --count 5 `&my_ints[0]`', - substrs=['(int) 0x', '2', '4', '6', '8', '10']) - - self.expect( - 'memory read --type int --count 5 --format hex `&my_ints[0]`', - substrs=[ - '(int) 0x', - '0x', - '0a']) - - self.expect( - 'memory read --type int --count 5 --offset 5 `&my_ints[0]`', - substrs=[ - '(int) 0x', - '12', - '14', - '16', - '18', - '20']) - - # the gdb format specifier and the size in characters for - # the returned values including the 0x prefix. - variations = [['b', 4], ['h', 6], ['w', 10], ['g', 18]] - for v in variations: - formatter = v[0] - expected_object_length = v[1] - self.runCmd( - "memory read --gdb-format 4%s &my_uint64s" % formatter) - lines = self.res.GetOutput().splitlines() - objects_read = [] - for l in lines: - objects_read.extend(l.split(':')[1].split()) - # Check that we got back 4 0x0000 etc bytes - for o in objects_read: - self.assertEqual(len(o), expected_object_length) - self.assertEquals(len(objects_read), 4) - - @no_debug_info_test - def test_memory_read_file(self): - self.build_run_stop() - res = lldb.SBCommandReturnObject() - self.ci.HandleCommand("memory read -f d -c 1 `&argc`", res) - self.assertTrue(res.Succeeded(), "memory read failed:" + res.GetError()) - - # Record golden output. - golden_output = res.GetOutput() - - memory_read_file = self.getBuildArtifact("memory-read-output") - - def check_file_content(expected): - with open(memory_read_file) as f: - lines = f.readlines() - lines = [s.strip() for s in lines] - expected = [s.strip() for s in expected] - self.assertEqual(lines, expected) - - # Sanity check. - self.runCmd("memory read -f d -c 1 -o '{}' `&argc`".format(memory_read_file)) - check_file_content([golden_output]) - - # Write some garbage to the file. - with open(memory_read_file, 'w') as f: - f.write("some garbage") - - # Make sure the file is truncated when we run the command again. - self.runCmd("memory read -f d -c 1 -o '{}' `&argc`".format(memory_read_file)) - check_file_content([golden_output]) - - # Make sure the file is appended when we run the command with --append-outfile. - self.runCmd( - "memory read -f d -c 1 -o '{}' --append-outfile `&argc`".format( - memory_read_file)) - check_file_content([golden_output, golden_output]) diff --git a/lldb/test/API/functionalities/memory/read/main.cpp b/lldb/test/API/functionalities/memory/read/main.cpp deleted file mode 100644 index 5a33ac1343c2..000000000000 --- a/lldb/test/API/functionalities/memory/read/main.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include -#include - -int main (int argc, char const *argv[]) -{ - char my_string[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 0}; - double my_double = 1234.5678; - int my_ints[] = {2,4,6,8,10,12,14,16,18,20,22}; - uint64_t my_uint64s[] = {0, 1, 2, 3, 4, 5, 6, 7}; - printf("my_string=%s\n", my_string); // Set break point at this line. - printf("my_double=%g\n", my_double); - return 0; -} diff --git a/lldb/test/API/linux/aarch64/tagged_memory_region/Makefile b/lldb/test/API/linux/aarch64/tagged_memory_region/Makefile deleted file mode 100644 index 10495940055b..000000000000 --- a/lldb/test/API/linux/aarch64/tagged_memory_region/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -C_SOURCES := main.c - -include Makefile.rules diff --git a/lldb/test/API/linux/aarch64/tagged_memory_region/TestAArch64LinuxTaggedMemoryRegion.py b/lldb/test/API/linux/aarch64/tagged_memory_region/TestAArch64LinuxTaggedMemoryRegion.py deleted file mode 100644 index b175f6234b10..000000000000 --- a/lldb/test/API/linux/aarch64/tagged_memory_region/TestAArch64LinuxTaggedMemoryRegion.py +++ /dev/null @@ -1,42 +0,0 @@ -""" -Test that "memory region" lookup uses the ABI plugin to remove -non address bits from addresses before lookup. -""" - - - -import lldb -from lldbsuite.test.decorators import * -from lldbsuite.test.lldbtest import * -from lldbsuite.test import lldbutil - - -class AArch64LinuxTaggedMemoryRegionTestCase(TestBase): - - mydir = TestBase.compute_mydir(__file__) - - NO_DEBUG_INFO_TESTCASE = True - - # AArch64 Linux always enables the top byte ignore feature - @skipUnlessArch("aarch64") - @skipUnlessPlatform(["linux"]) - def test_mte_regions(self): - self.build() - self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) - - lldbutil.run_break_set_by_file_and_line(self, "main.c", - line_number('main.c', '// Set break point at this line.'), - num_expected_locations=1) - - self.runCmd("run", RUN_SUCCEEDED) - - if self.process().GetState() == lldb.eStateExited: - self.fail("Test program failed to run.") - - self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT, - substrs=['stopped', - 'stop reason = breakpoint']) - - # Despite the non address bits we should find a region - self.expect("memory region the_page", patterns=[ - "\[0x[0-9A-Fa-f]+-0x[0-9A-Fa-f]+\) r-x"]) diff --git a/lldb/test/API/linux/aarch64/tagged_memory_region/main.c b/lldb/test/API/linux/aarch64/tagged_memory_region/main.c deleted file mode 100644 index 29f99d73e12d..000000000000 --- a/lldb/test/API/linux/aarch64/tagged_memory_region/main.c +++ /dev/null @@ -1,17 +0,0 @@ -#include -#include -#include -#include - -int main(int argc, char const *argv[]) { - void *the_page = mmap(0, sysconf(_SC_PAGESIZE), PROT_READ | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (the_page == MAP_FAILED) - return 1; - - // Put something in the top byte (AArch64 Linux always enables top byte - // ignore) - the_page = (void *)((size_t)the_page | ((size_t)0x34 << 56)); - - return 0; // Set break point at this line. -} diff --git a/llvm/lib/Analysis/ReleaseModeModelRunner.cpp b/llvm/lib/Analysis/ReleaseModeModelRunner.cpp deleted file mode 100644 index d2bf95388066..000000000000 --- a/llvm/lib/Analysis/ReleaseModeModelRunner.cpp +++ /dev/null @@ -1,90 +0,0 @@ -//===- ReleaseModeModelRunner.cpp - Fast, precompiled model runner -------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements a model runner wrapping an AOT compiled ML model. -// Only inference is supported. -// -//===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" -#if defined(LLVM_HAVE_TF_AOT) - -#include "llvm/Analysis/InlineModelFeatureMaps.h" -#include "llvm/Analysis/MLInlineAdvisor.h" - -// codegen-ed file -#include "InlinerSizeModel.h" // NOLINT - -#include -#include - -using namespace llvm; -namespace { - -const char FeedPrefix[] = "feed_"; -const char FetchPrefix[] = "fetch_"; - -/// MLModelRunner - production mode implementation. It uses a AOT-compiled -/// SavedModel for efficient execution. -class ReleaseModeModelRunner final : public MLModelRunner { -public: - ReleaseModeModelRunner(LLVMContext &Ctx); - virtual ~ReleaseModeModelRunner() = default; - - bool run() override; - - void setFeature(FeatureIndex Index, int64_t Value) override; - int64_t getFeature(int Index) const override; - -private: - std::vector FeatureIndices; - int32_t ResultIndex = -1; - std::unique_ptr CompiledModel; -}; -} // namespace - -ReleaseModeModelRunner::ReleaseModeModelRunner(LLVMContext &Ctx) - : MLModelRunner(Ctx), - CompiledModel(std::make_unique()) { - assert(CompiledModel && "The CompiledModel should be valid"); - - FeatureIndices.resize(NumberOfFeatures); - - for (size_t I = 0; I < NumberOfFeatures; ++I) { - const int Index = - CompiledModel->LookupArgIndex(FeedPrefix + FeatureNameMap[I]); - assert(Index >= 0 && "Cannot find Feature in inlining model"); - FeatureIndices[I] = Index; - } - - ResultIndex = - CompiledModel->LookupResultIndex(std::string(FetchPrefix) + DecisionName); - assert(ResultIndex >= 0 && "Cannot find DecisionName in inlining model"); -} - -int64_t ReleaseModeModelRunner::getFeature(int Index) const { - return *static_cast( - CompiledModel->arg_data(FeatureIndices[Index])); -} - -void ReleaseModeModelRunner::setFeature(FeatureIndex Index, int64_t Value) { - *static_cast(CompiledModel->arg_data( - FeatureIndices[static_cast(Index)])) = Value; -} - -bool ReleaseModeModelRunner::run() { - CompiledModel->Run(); - return static_cast( - *static_cast(CompiledModel->result_data(ResultIndex))); -} - -std::unique_ptr -llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { - auto AOTRunner = std::make_unique(M.getContext()); - return std::make_unique(M, MAM, std::move(AOTRunner)); -} -#endif // defined(LLVM_HAVE_TF_AOT) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ddb77bf00e42..9f138136e6e9 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8413,6 +8413,16 @@ SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue Cond = Op.getOperand(0); + if (Subtarget->hasScalarCompareEq64() && Op->getOperand(0)->hasOneUse() && + !Op->isDivergent()) { + if (VT == MVT::i64) + return Op; + SDValue LHS = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(1)); + SDValue RHS = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(2)); + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getSelect(DL, MVT::i64, Cond, LHS, RHS)); + } + SDValue Zero = DAG.getConstant(0, DL, MVT::i32); SDValue One = DAG.getConstant(1, DL, MVT::i32); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4b7f06996ed6..1755b93538ce 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6122,8 +6122,11 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst, continue; case AMDGPU::S_CSELECT_B32: + lowerSelect32(Worklist, Inst, MDT); + Inst.eraseFromParent(); + continue; case AMDGPU::S_CSELECT_B64: - lowerSelect(Worklist, Inst, MDT); + splitSelect64(Worklist, Inst, MDT); Inst.eraseFromParent(); continue; case AMDGPU::S_CMP_EQ_I32: @@ -6301,8 +6304,8 @@ SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst, return std::make_pair(false, nullptr); } -void SIInstrInfo::lowerSelect(SetVectorType &Worklist, MachineInstr &Inst, - MachineDominatorTree *MDT) const { +void SIInstrInfo::lowerSelect32(SetVectorType &Worklist, MachineInstr &Inst, + MachineDominatorTree *MDT) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -6377,6 +6380,95 @@ void SIInstrInfo::lowerSelect(SetVectorType &Worklist, MachineInstr &Inst, addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); } +void SIInstrInfo::splitSelect64(SetVectorType &Worklist, MachineInstr &Inst, + MachineDominatorTree *MDT) const { + // Split S_CSELECT_B64 into a pair of S_CSELECT_B32 and lower them + // further. + const DebugLoc &DL = Inst.getDebugLoc(); + MachineBasicBlock::iterator MII = Inst; + MachineBasicBlock &MBB = *Inst.getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + + // Get the original operands. + MachineOperand &Dest = Inst.getOperand(0); + MachineOperand &Src0 = Inst.getOperand(1); + MachineOperand &Src1 = Inst.getOperand(2); + MachineOperand &Cond = Inst.getOperand(3); + + Register SCCSource = Cond.getReg(); + bool IsSCC = (SCCSource == AMDGPU::SCC); + + // If this is a trivial select where the condition is effectively not SCC + // (SCCSource is a source of copy to SCC), then the select is semantically + // equivalent to copying SCCSource. Hence, there is no need to create + // V_CNDMASK, we can just use that and bail out. + if (!IsSCC && (Src0.isImm() && Src0.getImm() == -1) && + (Src1.isImm() && Src1.getImm() == 0)) { + MRI.replaceRegWith(Dest.getReg(), SCCSource); + return; + } + + // Prepare the split destination. + Register FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + + // Split the source operands. + const TargetRegisterClass *Src0RC = nullptr; + const TargetRegisterClass *Src0SubRC = nullptr; + if (Src0.isReg()) { + Src0RC = MRI.getRegClass(Src0.getReg()); + Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); + } + const TargetRegisterClass *Src1RC = nullptr; + const TargetRegisterClass *Src1SubRC = nullptr; + if (Src1.isReg()) { + Src1RC = MRI.getRegClass(Src1.getReg()); + Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); + } + // Split lo. + MachineOperand SrcReg0Sub0 = + buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); + MachineOperand SrcReg1Sub0 = + buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC); + // Split hi. + MachineOperand SrcReg0Sub1 = + buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC); + MachineOperand SrcReg1Sub1 = + buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC); + // Select the lo part. + MachineInstr *LoHalf = + BuildMI(MBB, MII, DL, get(AMDGPU::S_CSELECT_B32), DestSub0) + .add(SrcReg0Sub0) + .add(SrcReg1Sub0); + // Replace the condition operand with the original one. + LoHalf->getOperand(3).setReg(SCCSource); + Worklist.insert(LoHalf); + // Select the hi part. + MachineInstr *HiHalf = + BuildMI(MBB, MII, DL, get(AMDGPU::S_CSELECT_B32), DestSub1) + .add(SrcReg0Sub1) + .add(SrcReg1Sub1); + // Replace the condition operand with the original one. + HiHalf->getOperand(3).setReg(SCCSource); + Worklist.insert(HiHalf); + // Merge them back to the original 64-bit one. + BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) + .addReg(DestSub0) + .addImm(AMDGPU::sub0) + .addReg(DestSub1) + .addImm(AMDGPU::sub1); + MRI.replaceRegWith(Dest.getReg(), FullDestReg); + + // Try to legalize the operands in case we need to swap the order to keep + // it valid. + legalizeOperands(*LoHalf, MDT); + legalizeOperands(*HiHalf, MDT); + + // Move all users of this moved value. + addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); +} + void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 70a48cd58e38..dd9ea2b53ca2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -78,8 +78,11 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst, MachineDominatorTree *MDT = nullptr) const; - void lowerSelect(SetVectorType &Worklist, MachineInstr &Inst, - MachineDominatorTree *MDT = nullptr) const; + void lowerSelect32(SetVectorType &Worklist, MachineInstr &Inst, + MachineDominatorTree *MDT = nullptr) const; + + void splitSelect64(SetVectorType &Worklist, MachineInstr &Inst, + MachineDominatorTree *MDT = nullptr) const; void lowerScalarAbs(SetVectorType &Worklist, MachineInstr &Inst) const; diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 96438e9247a2..1713586dcf5b 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -518,9 +518,10 @@ let Uses = [SCC] in { def S_CSELECT_B32 : SOP2_32 <"s_cselect_b32", [(set i32:$sdst, (SelectPat i64:$src0, i64:$src1))] + >; } - - def S_CSELECT_B64 : SOP2_64 <"s_cselect_b64">; } // End Uses = [SCC] let Defs = [SCC] in { diff --git a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp b/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp deleted file mode 100644 index 9cd959012e6f..000000000000 --- a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp +++ /dev/null @@ -1,152 +0,0 @@ -//===-- M68kCallLowering.cpp - Call lowering -------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// This file implements the lowering of LLVM calls to machine code calls for -/// GlobalISel. -// -//===----------------------------------------------------------------------===// - -#include "M68kCallLowering.h" -#include "M68kISelLowering.h" -#include "M68kInstrInfo.h" -#include "M68kSubtarget.h" -#include "M68kTargetMachine.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/GlobalISel/CallLowering.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/TargetCallingConv.h" - -using namespace llvm; - -M68kCallLowering::M68kCallLowering(const M68kTargetLowering &TLI) - : CallLowering(&TLI) {} - -struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { - OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, - MachineInstrBuilder MIB) - : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB) {} - - void assignValueToReg(Register ValVReg, Register PhysReg, - CCValAssign VA) override { - MIB.addUse(PhysReg, RegState::Implicit); - Register ExtReg = extendRegister(ValVReg, VA); - MIRBuilder.buildCopy(PhysReg, ExtReg); - } - - void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, - MachinePointerInfo &MPO, CCValAssign &VA) override { - llvm_unreachable("unimplemented"); - } - - Register getStackAddress(uint64_t Size, int64_t Offset, - MachinePointerInfo &MPO, - ISD::ArgFlagsTy Flags) override { - llvm_unreachable("unimplemented"); - } - - MachineInstrBuilder MIB; -}; -bool M68kCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, - const Value *Val, ArrayRef VRegs, - FunctionLoweringInfo &FLI, - Register SwiftErrorVReg) const { - - auto MIB = MIRBuilder.buildInstrNoInsert(M68k::RTS); - bool Success = true; - MachineFunction &MF = MIRBuilder.getMF(); - const Function &F = MF.getFunction(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - const M68kTargetLowering &TLI = *getTLI(); - CCAssignFn *AssignFn = - TLI.getCCAssignFn(F.getCallingConv(), true, F.isVarArg()); - auto &DL = F.getParent()->getDataLayout(); - if (!VRegs.empty()) { - SmallVector SplitArgs; - ArgInfo OrigArg{VRegs, Val->getType(), 0}; - setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); - splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv()); - OutgoingValueAssigner ArgAssigner(AssignFn); - OutgoingArgHandler ArgHandler(MIRBuilder, MRI, MIB); - Success = determineAndHandleAssignments(ArgHandler, ArgAssigner, SplitArgs, - MIRBuilder, F.getCallingConv(), - F.isVarArg()); - } - MIRBuilder.insertInstr(MIB); - return Success; -} - -bool M68kCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, - const Function &F, - ArrayRef> VRegs, - FunctionLoweringInfo &FLI) const { - MachineFunction &MF = MIRBuilder.getMF(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - const auto &DL = F.getParent()->getDataLayout(); - auto &TLI = *getTLI(); - - SmallVector SplitArgs; - unsigned I = 0; - for (const auto &Arg : F.args()) { - ArgInfo OrigArg{VRegs[I], Arg.getType(), I}; - setArgFlags(OrigArg, I + AttributeList::FirstArgIndex, DL, F); - splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv()); - ++I; - } - - CCAssignFn *AssignFn = - TLI.getCCAssignFn(F.getCallingConv(), false, F.isVarArg()); - IncomingValueAssigner ArgAssigner(AssignFn); - FormalArgHandler ArgHandler(MIRBuilder, MRI); - return determineAndHandleAssignments(ArgHandler, ArgAssigner, SplitArgs, - MIRBuilder, F.getCallingConv(), - F.isVarArg()); -} - -void M68kIncomingValueHandler::assignValueToReg(Register ValVReg, - Register PhysReg, - CCValAssign VA) { - MIRBuilder.getMRI()->addLiveIn(PhysReg); - MIRBuilder.getMBB().addLiveIn(PhysReg); - IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA); -} - -void M68kIncomingValueHandler::assignValueToAddress(Register ValVReg, - Register Addr, - LLT MemTy, - MachinePointerInfo &MPO, - CCValAssign &VA) { - MachineFunction &MF = MIRBuilder.getMF(); - auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, MemTy, - inferAlignFromPtrInfo(MF, MPO)); - MIRBuilder.buildLoad(ValVReg, Addr, *MMO); -} - -Register M68kIncomingValueHandler::getStackAddress(uint64_t Size, - int64_t Offset, - MachinePointerInfo &MPO, - ISD::ArgFlagsTy Flags) { - auto &MFI = MIRBuilder.getMF().getFrameInfo(); - const bool IsImmutable = !Flags.isByVal(); - int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable); - MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); - - // Build Frame Index - llvm::LLT FramePtr = LLT::pointer( - 0, MIRBuilder.getMF().getDataLayout().getPointerSizeInBits()); - MachineInstrBuilder AddrReg = MIRBuilder.buildFrameIndex(FramePtr, FI); - StackUsed = std::max(StackUsed, Size + Offset); - return AddrReg.getReg(0); -} - -bool M68kCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, - CallLoweringInfo &Info) const { - return false; -} - -bool M68kCallLowering::enableBigEndian() const { return true; } diff --git a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h b/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h deleted file mode 100644 index 47cdefdba100..000000000000 --- a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h +++ /dev/null @@ -1,72 +0,0 @@ -//===-- M68kCallLowering.h - Call lowering -------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// This file implements the lowering of LLVM calls to machine code calls for -/// GlobalISel. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_M68K_GLSEL_M68KCALLLOWERING_H -#define LLVM_LIB_TARGET_M68K_GLSEL_M68KCALLLOWERING_H - -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/GlobalISel/CallLowering.h" -#include "llvm/CodeGen/ValueTypes.h" - -namespace llvm { - -class M68kTargetLowering; - -class M68kCallLowering : public CallLowering { - // TODO: We are only supporting return instruction with no value at this time - // point - -public: - M68kCallLowering(const M68kTargetLowering &TLI); - - bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, - ArrayRef VRegs, FunctionLoweringInfo &FLI, - Register SwiftErrorVReg) const override; - - bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, - ArrayRef> VRegs, - FunctionLoweringInfo &FLI) const override; - - bool lowerCall(MachineIRBuilder &MIRBuilder, - CallLoweringInfo &Info) const override; - - bool enableBigEndian() const override; -}; -struct M68kIncomingValueHandler : public CallLowering::IncomingValueHandler { - M68kIncomingValueHandler(MachineIRBuilder &MIRBuilder, - MachineRegisterInfo &MRI) - : CallLowering::IncomingValueHandler(MIRBuilder, MRI) {} - - uint64_t StackUsed; - -private: - void assignValueToReg(Register ValVReg, Register PhysReg, - CCValAssign VA) override; - - void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, - MachinePointerInfo &MPO, CCValAssign &VA) override; - - Register getStackAddress(uint64_t Size, int64_t Offset, - MachinePointerInfo &MPO, - ISD::ArgFlagsTy Flags) override; -}; - -struct FormalArgHandler : public M68kIncomingValueHandler { - FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) - : M68kIncomingValueHandler(MIRBuilder, MRI) {} -}; - -} // end namespace llvm - -#endif // LLVM_LIB_TARGET_M68K_GLSEL_M68KCALLLOWERING_H diff --git a/llvm/lib/Target/M68k/GlSel/M68kInstructionSelector.cpp b/llvm/lib/Target/M68k/GlSel/M68kInstructionSelector.cpp deleted file mode 100644 index 9ac4ab9a5ba1..000000000000 --- a/llvm/lib/Target/M68k/GlSel/M68kInstructionSelector.cpp +++ /dev/null @@ -1,90 +0,0 @@ -//===- M68kInstructionSelector.cpp ------------------------------*- C++ -*-===// -//===----------------------------------------------------------------------===// -/// \file -/// This file implements the targeting of the InstructionSelector class for -/// M68k. -/// \todo This should be generated by TableGen. -//===----------------------------------------------------------------------===// - -#include "M68kRegisterBankInfo.h" -#include "M68kSubtarget.h" -#include "M68kTargetMachine.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "m68k-isel" - -using namespace llvm; - -#define GET_GLOBALISEL_PREDICATE_BITSET -#include "M68kGenGlobalISel.inc" -#undef GET_GLOBALISEL_PREDICATE_BITSET - -namespace { - -class M68kInstructionSelector : public InstructionSelector { -public: - M68kInstructionSelector(const M68kTargetMachine &TM, const M68kSubtarget &STI, - const M68kRegisterBankInfo &RBI); - - bool select(MachineInstr &I) override; - static const char *getName() { return DEBUG_TYPE; } - -private: - bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; - - const M68kTargetMachine &TM; - const M68kInstrInfo &TII; - const M68kRegisterInfo &TRI; - const M68kRegisterBankInfo &RBI; - -#define GET_GLOBALISEL_PREDICATES_DECL -#include "M68kGenGlobalISel.inc" -#undef GET_GLOBALISEL_PREDICATES_DECL - -#define GET_GLOBALISEL_TEMPORARIES_DECL -#include "M68kGenGlobalISel.inc" -#undef GET_GLOBALISEL_TEMPORARIES_DECL -}; - -} // end anonymous namespace - -#define GET_GLOBALISEL_IMPL -#include "M68kGenGlobalISel.inc" -#undef GET_GLOBALISEL_IMPL - -M68kInstructionSelector::M68kInstructionSelector( - const M68kTargetMachine &TM, const M68kSubtarget &STI, - const M68kRegisterBankInfo &RBI) - : InstructionSelector(), TM(TM), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI), - -#define GET_GLOBALISEL_PREDICATES_INIT -#include "M68kGenGlobalISel.inc" -#undef GET_GLOBALISEL_PREDICATES_INIT -#define GET_GLOBALISEL_TEMPORARIES_INIT -#include "M68kGenGlobalISel.inc" -#undef GET_GLOBALISEL_TEMPORARIES_INIT -{ -} - -bool M68kInstructionSelector::select(MachineInstr &I) { - // Certain non-generic instructions also need some special handling. - if (!isPreISelGenericOpcode(I.getOpcode())) - return true; - - if (selectImpl(I, *CoverageInfo)) - return true; - - return false; -} - -namespace llvm { -InstructionSelector * -createM68kInstructionSelector(const M68kTargetMachine &TM, - const M68kSubtarget &Subtarget, - const M68kRegisterBankInfo &RBI) { - return new M68kInstructionSelector(TM, Subtarget, RBI); -} -} // end namespace llvm diff --git a/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.cpp b/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.cpp deleted file mode 100644 index bcbe62816beb..000000000000 --- a/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.cpp +++ /dev/null @@ -1,33 +0,0 @@ -//===-- M68kLegalizerInfo.cpp ----------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// This file implements the targeting of the Machinelegalizer class for M68k. -//===----------------------------------------------------------------------===// - -#include "M68kLegalizerInfo.h" -#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" -#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" -#include "llvm/CodeGen/TargetOpcodes.h" -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Type.h" - -using namespace llvm; - -M68kLegalizerInfo::M68kLegalizerInfo(const M68kSubtarget &ST) { - using namespace TargetOpcode; - const LLT S32 = LLT::scalar(32); - const LLT P0 = LLT::pointer(0, 32); - getActionDefinitionsBuilder(G_LOAD).legalFor({S32}); - getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({P0}); - getActionDefinitionsBuilder(G_ADD).legalFor({S32}); - getActionDefinitionsBuilder(G_SUB).legalFor({S32}); - getActionDefinitionsBuilder(G_MUL).legalFor({S32}); - getActionDefinitionsBuilder(G_UDIV).legalFor({S32}); - getLegacyLegalizerInfo().computeTables(); -} diff --git a/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.h b/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.h deleted file mode 100644 index 205aa81aedcc..000000000000 --- a/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.h +++ /dev/null @@ -1,29 +0,0 @@ -//===- M68kLegalizerInfo --------------------------------------*- C++ -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// This file declares the targeting of the MachineLegalizer class for -/// M68k. -/// \todo This should be generated by TableGen. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_M68K_GLSEL_M68KLEGALIZERINFO_H -#define LLVM_LIB_TARGET_M68K_GLSEL_M68KLEGALIZERINFO_H - -#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" - -namespace llvm { - -class M68kSubtarget; - -/// This struct provides the information for the target register banks. -struct M68kLegalizerInfo : public LegalizerInfo { -public: - M68kLegalizerInfo(const M68kSubtarget &ST); -}; -} // end namespace llvm -#endif // LLVM_LIB_TARGET_M68K_GLSEL_M68KLEGALIZERINFO_H diff --git a/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.cpp b/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.cpp deleted file mode 100644 index 5c0f5dae8e37..000000000000 --- a/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.cpp +++ /dev/null @@ -1,105 +0,0 @@ -//===-- M68kRegisterBankInfo.cpp -------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// This file implements the targeting of the RegisterBankInfo class for M68k. -/// \todo This should be generated by TableGen. -//===----------------------------------------------------------------------===// - -#include "M68kRegisterBankInfo.h" -#include "M68kInstrInfo.h" // For the register classes -#include "M68kSubtarget.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" - -#define GET_TARGET_REGBANK_IMPL -#include "M68kGenRegisterBank.inc" - -using namespace llvm; - -// FIXME: TableGen this. -// If it grows too much and TableGen still isn't ready to do the job, extract it -// into an M68kGenRegisterBankInfo.def (similar to AArch64). -namespace llvm { -namespace M68k { -enum PartialMappingIdx { - PMI_GPR, - PMI_Min = PMI_GPR, -}; - -RegisterBankInfo::PartialMapping PartMappings[]{ - // GPR Partial Mapping - {0, 32, GPRRegBank}, -}; - -enum ValueMappingIdx { - InvalidIdx = 0, - GPR3OpsIdx = 1, -}; - -RegisterBankInfo::ValueMapping ValueMappings[] = { - // invalid - {nullptr, 0}, - // 3 operands in GPRs - {&PartMappings[PMI_GPR - PMI_Min], 1}, - {&PartMappings[PMI_GPR - PMI_Min], 1}, - {&PartMappings[PMI_GPR - PMI_Min], 1}, - -}; -} // end namespace M68k -} // end namespace llvm - -M68kRegisterBankInfo::M68kRegisterBankInfo(const TargetRegisterInfo &TRI) - : M68kGenRegisterBankInfo() {} - -const RegisterBank & -M68kRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, - LLT) const { - return getRegBank(M68k::GPRRegBankID); -} - -const RegisterBankInfo::InstructionMapping & -M68kRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { - auto Opc = MI.getOpcode(); - - if (!isPreISelGenericOpcode(Opc)) { - const InstructionMapping &Mapping = getInstrMappingImpl(MI); - if (Mapping.isValid()) - return Mapping; - } - - using namespace TargetOpcode; - - unsigned NumOperands = MI.getNumOperands(); - const ValueMapping *OperandsMapping = &M68k::ValueMappings[M68k::GPR3OpsIdx]; - - switch (Opc) { - case G_ADD: - case G_SUB: - case G_MUL: - case G_SDIV: - case G_UDIV: - case G_LOAD: - case G_STORE: { - OperandsMapping = &M68k::ValueMappings[M68k::GPR3OpsIdx]; - break; - } - - case G_CONSTANT: - case G_FRAME_INDEX: - OperandsMapping = - getOperandsMapping({&M68k::ValueMappings[M68k::GPR3OpsIdx], nullptr}); - break; - default: - return getInvalidInstructionMapping(); - } - - return getInstructionMapping(DefaultMappingID, /*Cost=*/1, OperandsMapping, - NumOperands); -} diff --git a/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.h b/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.h deleted file mode 100644 index 853c75df2bb3..000000000000 --- a/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.h +++ /dev/null @@ -1,45 +0,0 @@ -//===-- M68kRegisterBankInfo.h ---------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// This file declares the targeting of the RegisterBankInfo class for M68k. -/// \todo This should be generated by TableGen. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_M68K_GLSEL_M68KREGISTERBANKINFO_H -#define LLVM_LIB_TARGET_M68K_GLSEL_M68KREGISTERBANKINFO_H - -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" - -#define GET_REGBANK_DECLARATIONS -#include "M68kGenRegisterBank.inc" -#undef GET_REGBANK_DECLARATIONS - -namespace llvm { - -class TargetRegisterInfo; - -class M68kGenRegisterBankInfo : public RegisterBankInfo { -protected: -#define GET_TARGET_REGBANK_CLASS -#include "M68kGenRegisterBank.inc" -#undef GET_TARGET_REGBANK_CLASS -}; - -/// This class provides the information for the target register banks. -class M68kRegisterBankInfo final : public M68kGenRegisterBankInfo { -public: - M68kRegisterBankInfo(const TargetRegisterInfo &TRI); - - const RegisterBank &getRegBankFromRegClass(const TargetRegisterClass &RC, - LLT) const override; - - const InstructionMapping & - getInstrMapping(const MachineInstr &MI) const override; -}; -} // end namespace llvm -#endif diff --git a/llvm/lib/Target/M68k/GlSel/M68kRegisterBanks.td b/llvm/lib/Target/M68k/GlSel/M68kRegisterBanks.td deleted file mode 100644 index 942677a60e6c..000000000000 --- a/llvm/lib/Target/M68k/GlSel/M68kRegisterBanks.td +++ /dev/null @@ -1,15 +0,0 @@ -//===-- M68kRegisterBanks.td - Describe the M68k Banks -------*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Define the M68k register banks used for GlobalISel. -/// -//===----------------------------------------------------------------------===// - -/// General Purpose Registers. Here we define a register bank with name AnyGPR -def GPRRegBank : RegisterBank<"AnyGPR", [XR32]>; diff --git a/llvm/test/Analysis/BasicAA/memset_pattern.ll b/llvm/test/Analysis/BasicAA/memset_pattern.ll deleted file mode 100644 index 1096d3896fda..000000000000 --- a/llvm/test/Analysis/BasicAA/memset_pattern.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: opt -S -basic-aa -gvn < %s | FileCheck %s -; PR10872 -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-macosx10.7" - -@z = internal global i32 0, align 4 -@y = internal global i32 0, align 4 -@x = internal constant i32 0, align 4 - -; CHECK: @test -define i32 @test() nounwind uwtable ssp { -entry: - store i32 1, i32* @z - tail call void @memset_pattern16(i8* bitcast (i32* @y to i8*), i8* bitcast (i32* @x to i8*), i64 4) nounwind -; CHECK-NOT: load - %l = load i32, i32* @z -; CHECK: ret i32 1 - ret i32 %l -} - -declare void @memset_pattern16(i8*, i8* readonly, i64) argmemonly diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll deleted file mode 100644 index 5b7a7bdbadfe..000000000000 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll +++ /dev/null @@ -1,204 +0,0 @@ -; RUN: opt -loop-vectorize -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=DISABLED_MASKED_STRIDED -; RUN: opt -loop-vectorize -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=ENABLED_MASKED_STRIDED -; REQUIRES: asserts - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; (1) Interleave-group with factor 4, storing only 2 members out of the 4. -; Check that when we allow masked-memops to support interleave-group with gaps, -; the store is vectorized using a wide masked store, with a 1,1,0,0,1,1,0,0,... mask. -; Check that when we don't allow masked-memops to support interleave-group with gaps, -; the store is scalarized. -; The input IR was generated from this source: -; for(i=0;i<1024;i++){ -; points[i*4] = x[i]; -; points[i*4 + 1] = y[i]; -; } -; (relates to the testcase in PR50566) - -; DISABLED_MASKED_STRIDED: LV: Checking a loop in "test1" -; -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 54 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 54 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 110 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 110 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 228 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 228 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 - -; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test1" -; -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 15 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 21 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 36 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 73 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 - -define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) { -entry: - br label %for.body - -for.body: - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv - %0 = load i16, i16* %arrayidx, align 2 - %1 = shl nuw nsw i64 %indvars.iv, 2 - %arrayidx2 = getelementptr inbounds i16, i16* %points, i64 %1 - store i16 %0, i16* %arrayidx2, align 2 - %arrayidx4 = getelementptr inbounds i16, i16* %y, i64 %indvars.iv - %2 = load i16, i16* %arrayidx4, align 2 - %3 = or i64 %1, 1 - %arrayidx7 = getelementptr inbounds i16, i16* %points, i64 %3 - store i16 %2, i16* %arrayidx7, align 2 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond.not = icmp eq i64 %indvars.iv.next, 1024 - br i1 %exitcond.not, label %for.end, label %for.body - -for.end: - ret void -} - -; (2) Same as above, but this time the gaps mask of the store is also And-ed with the -; fold-tail mask. If using masked memops to vectorize interleaved-group with gaps is -; not allowed, the store is scalarized and predicated. -; The input IR was generated from this source: -; for(i=0;i 0). -; If using masked memops to vectorize interleaved-group with gaps is -; not allowed, the store is scalarized and predicated. -; Here the Interleave-group is with factor 3, storing only 1 member out of the 3. -; The input IR was generated from this source: -; for(i=0;i<1024;i++){ -; if (x[i] > 0) -; points[i*3] = x[i]; -; } - -; DISABLED_MASKED_STRIDED: LV: Checking a loop in "test" -; -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx6, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, i16* %arrayidx6, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, i16* %arrayidx6, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2 - -; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test" -; -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx6, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, i16* %arrayidx6, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, i16* %arrayidx6, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2 - -define void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) { -entry: - br label %for.body - -for.body: - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] - %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv - %0 = load i16, i16* %arrayidx, align 2 - %cmp1 = icmp sgt i16 %0, 0 - br i1 %cmp1, label %if.then, label %for.inc - -if.then: - %1 = mul nuw nsw i64 %indvars.iv, 3 - %arrayidx6 = getelementptr inbounds i16, i16* %points, i64 %1 - store i16 %0, i16* %arrayidx6, align 2 - br label %for.inc - -for.inc: - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond.not = icmp eq i64 %indvars.iv.next, 1024 - br i1 %exitcond.not, label %for.end, label %for.body - -for.end: - ret void -} diff --git a/llvm/test/Analysis/LoopAccessAnalysis/gep-induction-operand-typesize-warning.ll b/llvm/test/Analysis/LoopAccessAnalysis/gep-induction-operand-typesize-warning.ll deleted file mode 100644 index 35e4935d3ee2..000000000000 --- a/llvm/test/Analysis/LoopAccessAnalysis/gep-induction-operand-typesize-warning.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: opt -loop-load-elim -mtriple=aarch64--linux-gnu -mattr=+sve < %s - -; This regression test is verifying that a GEP instruction performed on a -; scalable vector does not produce a 'assumption that TypeSize is not scalable' -; warning in the llvm::getGEPInductionOperand function. - -define void @get_gep_induction_operand_typesize_warning(i64 %n, * %a) { -entry: - br label %loop.body - -loop.body: - %0 = phi i64 [ 0, %entry ], [ %1, %loop.body ] - %idx = getelementptr , * %a, i64 %0 - store zeroinitializer, * %idx - %1 = add i64 %0, 1 - %2 = icmp eq i64 %1, %n - br i1 %2, label %loop.end, label %loop.body - -loop.end: - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll index 355158783b52..27215568482b 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll @@ -17,17 +17,15 @@ ; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 -; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}} +; GFX9-DAG: s_load_dword s[[PTR:[0-9]+]], s[4:5], 0x0{{$}} ; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16) -; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16 -; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_SHARED_BASE]] +; GFX9-DAG: s_lshl_b32 s[[SSRC_SHARED_BASE:[0-9]+]], [[SSRC_SHARED]], 16 ; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base -; GFX9: s_cmp_lg_u32 [[PTR]], -1 -; GFX9: s_cselect_b64 vcc, -1, 0 -; GFX9: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc -; GFX9-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] +; GFX9: s_cmp_lg_u32 s[[PTR]], -1 +; GFX9: s_cselect_b64 s{{\[}}[[SEL_LO:[0-9]+]]:[[SEL_HI:[0-9]+]]{{\]}}, s{{\[}}[[PTR]]:[[SSRC_SHARED_BASE]]{{\]}}, 0 +; GFX9-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s[[SEL_LO]] +; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s[[SEL_HI]] ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]] @@ -84,19 +82,17 @@ define void @use_group_to_flat_addrspacecast_func(i32 addrspace(3)* %ptr) #0 { ; CI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] -; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}} +; GFX9-DAG: s_load_dword s[[PTR:[0-9]+]], s[4:5], 0x0{{$}} ; GFX9-DAG: s_getreg_b32 [[SSRC_PRIVATE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16) -; GFX9-DAG: s_lshl_b32 [[SSRC_PRIVATE_BASE:s[0-9]+]], [[SSRC_PRIVATE]], 16 -; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_PRIVATE_BASE]] +; GFX9-DAG: s_lshl_b32 s[[SSRC_PRIVATE_BASE:[0-9]+]], [[SSRC_PRIVATE]], 16 ; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base ; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 -; GFX9: s_cmp_lg_u32 [[PTR]], -1 -; GFX9: s_cselect_b64 vcc, -1, 0 -; GFX9: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc -; GFX9: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] +; GFX9: s_cmp_lg_u32 s[[PTR]], -1 +; GFX9: s_cselect_b64 s{{\[}}[[SEL_LO:[0-9]+]]:[[SEL_HI:[0-9]+]]{{\]}}, s{{\[}}[[PTR]]:[[SSRC_PRIVATE_BASE]]{{\]}}, 0 +; GFX9-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s[[SEL_LO]] +; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s[[SEL_HI]] ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]] diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll index 7cd0add30ccc..a8cefd4e50cf 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -2,6 +2,7 @@ ; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -amdgpu-codegenprepare -amdgpu-bypass-slow-div=0 %s | FileCheck %s ; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -amdgpu-bypass-slow-div=0 < %s | FileCheck -check-prefix=GFX6 %s ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -amdgpu-bypass-slow-div=0 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-- -mcpu=gfx90a -amdgpu-bypass-slow-div=0 < %s | FileCheck -check-prefix=GFX90A %s define amdgpu_kernel void @udiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; CHECK-LABEL: @udiv_i32( @@ -94,6 +95,34 @@ define amdgpu_kernel void @udiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GFX9-NEXT: global_store_dword v2, v0, s[0:1] ; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: udiv_i32: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; GFX90A-NEXT: v_mov_b32_e32 v1, 0 +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GFX90A-NEXT: s_sub_i32 s4, 0, s3 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_lo_u32 v2, s4, v0 +; GFX90A-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX90A-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX90A-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX90A-NEXT: v_mul_lo_u32 v2, v0, s3 +; GFX90A-NEXT: v_sub_u32_e32 v2, s2, v2 +; GFX90A-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v3, s3, v2 +; GFX90A-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX90A-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX90A-NEXT: global_store_dword v1, v0, s[0:1] +; GFX90A-NEXT: s_endpgm %r = udiv i32 %x, %y store i32 %r, i32 addrspace(1)* %out ret void @@ -184,6 +213,32 @@ define amdgpu_kernel void @urem_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: urem_i32: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; GFX90A-NEXT: v_mov_b32_e32 v1, 0 +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GFX90A-NEXT: s_sub_i32 s4, 0, s3 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_lo_u32 v2, s4, v0 +; GFX90A-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX90A-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX90A-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX90A-NEXT: v_mul_lo_u32 v0, v0, s3 +; GFX90A-NEXT: v_sub_u32_e32 v0, s2, v0 +; GFX90A-NEXT: v_subrev_u32_e32 v2, s3, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v2, s3, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX90A-NEXT: global_store_dword v1, v0, s[0:1] +; GFX90A-NEXT: s_endpgm %r = urem i32 %x, %y store i32 %r, i32 addrspace(1)* %out ret void @@ -307,6 +362,43 @@ define amdgpu_kernel void @sdiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; GFX9-NEXT: v_subrev_u32_e32 v0, s4, v0 ; GFX9-NEXT: global_store_dword v2, v0, s[0:1] ; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: sdiv_i32: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; GFX90A-NEXT: v_mov_b32_e32 v1, 0 +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_ashr_i32 s4, s3, 31 +; GFX90A-NEXT: s_add_i32 s3, s3, s4 +; GFX90A-NEXT: s_xor_b32 s3, s3, s4 +; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GFX90A-NEXT: s_ashr_i32 s5, s2, 31 +; GFX90A-NEXT: s_add_i32 s2, s2, s5 +; GFX90A-NEXT: s_xor_b32 s4, s5, s4 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX90A-NEXT: s_xor_b32 s2, s2, s5 +; GFX90A-NEXT: s_sub_i32 s5, 0, s3 +; GFX90A-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_lo_u32 v2, s5, v0 +; GFX90A-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX90A-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX90A-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX90A-NEXT: v_mul_lo_u32 v2, v0, s3 +; GFX90A-NEXT: v_sub_u32_e32 v2, s2, v2 +; GFX90A-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v3, s3, v2 +; GFX90A-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX90A-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX90A-NEXT: v_xor_b32_e32 v0, s4, v0 +; GFX90A-NEXT: v_subrev_u32_e32 v0, s4, v0 +; GFX90A-NEXT: global_store_dword v1, v0, s[0:1] +; GFX90A-NEXT: s_endpgm %r = sdiv i32 %x, %y store i32 %r, i32 addrspace(1)* %out ret void @@ -421,6 +513,40 @@ define amdgpu_kernel void @srem_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: srem_i32: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; GFX90A-NEXT: v_mov_b32_e32 v1, 0 +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_ashr_i32 s4, s3, 31 +; GFX90A-NEXT: s_add_i32 s3, s3, s4 +; GFX90A-NEXT: s_xor_b32 s3, s3, s4 +; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GFX90A-NEXT: s_sub_i32 s5, 0, s3 +; GFX90A-NEXT: s_ashr_i32 s4, s2, 31 +; GFX90A-NEXT: s_add_i32 s2, s2, s4 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX90A-NEXT: s_xor_b32 s2, s2, s4 +; GFX90A-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_lo_u32 v2, s5, v0 +; GFX90A-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX90A-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX90A-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX90A-NEXT: v_mul_lo_u32 v0, v0, s3 +; GFX90A-NEXT: v_sub_u32_e32 v0, s2, v0 +; GFX90A-NEXT: v_subrev_u32_e32 v2, s3, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v2, s3, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX90A-NEXT: v_xor_b32_e32 v0, s4, v0 +; GFX90A-NEXT: v_subrev_u32_e32 v0, s4, v0 +; GFX90A-NEXT: global_store_dword v1, v0, s[0:1] +; GFX90A-NEXT: s_endpgm %r = srem i32 %x, %y store i32 %r, i32 addrspace(1)* %out ret void @@ -488,6 +614,26 @@ define amdgpu_kernel void @udiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v4, vcc ; GFX9-NEXT: global_store_short v3, v0, s[0:1] ; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: udiv_i16: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX90A-NEXT: v_mov_b32_e32 v3, 0 +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_lshr_b32 s3, s2, 16 +; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GFX90A-NEXT: s_and_b32 s2, s2, 0xffff +; GFX90A-NEXT: v_cvt_f32_u32_e32 v1, s2 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v2, v0 +; GFX90A-NEXT: v_mul_f32_e32 v2, v1, v2 +; GFX90A-NEXT: v_trunc_f32_e32 v2, v2 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v4, v2 +; GFX90A-NEXT: v_mad_f32 v1, -v2, v0, v1 +; GFX90A-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 +; GFX90A-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v4, vcc +; GFX90A-NEXT: global_store_short v3, v0, s[0:1] +; GFX90A-NEXT: s_endpgm %r = udiv i16 %x, %y store i16 %r, i16 addrspace(1)* %out ret void @@ -562,6 +708,28 @@ define amdgpu_kernel void @urem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_short v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: urem_i16: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX90A-NEXT: v_mov_b32_e32 v3, 0 +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_lshr_b32 s3, s2, 16 +; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GFX90A-NEXT: s_and_b32 s4, s2, 0xffff +; GFX90A-NEXT: v_cvt_f32_u32_e32 v1, s4 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v2, v0 +; GFX90A-NEXT: v_mul_f32_e32 v2, v1, v2 +; GFX90A-NEXT: v_trunc_f32_e32 v2, v2 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v4, v2 +; GFX90A-NEXT: v_mad_f32 v1, -v2, v0, v1 +; GFX90A-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 +; GFX90A-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v4, vcc +; GFX90A-NEXT: v_mul_lo_u32 v0, v0, s3 +; GFX90A-NEXT: v_sub_u32_e32 v0, s2, v0 +; GFX90A-NEXT: global_store_short v3, v0, s[0:1] +; GFX90A-NEXT: s_endpgm %r = urem i16 %x, %y store i16 %r, i16 addrspace(1)* %out ret void @@ -643,7 +811,6 @@ define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; GFX9-NEXT: v_add_u32_e32 v0, s0, v3 ; GFX9-NEXT: global_store_short v1, v0, s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_i16: ; GFX90A: ; %bb.0: @@ -669,8 +836,6 @@ define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; GFX90A-NEXT: v_add_u32_e32 v0, s0, v3 ; GFX90A-NEXT: global_store_short v1, v0, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv i16 %x, %y store i16 %r, i16 addrspace(1)* %out ret void @@ -759,7 +924,6 @@ define amdgpu_kernel void @srem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_short v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: srem_i16: ; GFX90A: ; %bb.0: @@ -787,8 +951,6 @@ define amdgpu_kernel void @srem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; GFX90A-NEXT: v_sub_u32_e32 v0, s4, v0 ; GFX90A-NEXT: global_store_short v1, v0, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem i16 %x, %y store i16 %r, i16 addrspace(1)* %out ret void @@ -852,6 +1014,24 @@ define amdgpu_kernel void @udiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v4, vcc ; GFX9-NEXT: global_store_byte v2, v0, s[0:1] ; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: udiv_i8: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_cvt_f32_ubyte1_e32 v0, s2 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v1, v0 +; GFX90A-NEXT: v_cvt_f32_ubyte0_e32 v3, s2 +; GFX90A-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX90A-NEXT: v_trunc_f32_e32 v1, v1 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v4, v1 +; GFX90A-NEXT: v_mad_f32 v1, -v1, v0, v3 +; GFX90A-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 +; GFX90A-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v4, vcc +; GFX90A-NEXT: global_store_byte v2, v0, s[0:1] +; GFX90A-NEXT: s_endpgm %r = udiv i8 %x, %y store i8 %r, i8 addrspace(1)* %out ret void @@ -924,7 +1104,6 @@ define amdgpu_kernel void @urem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_byte v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: urem_i8: ; GFX90A: ; %bb.0: @@ -946,8 +1125,6 @@ define amdgpu_kernel void @urem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX90A-NEXT: v_sub_u32_e32 v0, s4, v0 ; GFX90A-NEXT: global_store_byte v2, v0, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem i8 %x, %y store i8 %r, i8 addrspace(1)* %out ret void @@ -1029,7 +1206,6 @@ define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX9-NEXT: v_add_u32_e32 v0, s0, v3 ; GFX9-NEXT: global_store_byte v1, v0, s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_i8: ; GFX90A: ; %bb.0: @@ -1055,8 +1231,6 @@ define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX90A-NEXT: v_add_u32_e32 v0, s0, v3 ; GFX90A-NEXT: global_store_byte v1, v0, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv i8 %x, %y store i8 %r, i8 addrspace(1)* %out ret void @@ -1146,7 +1320,6 @@ define amdgpu_kernel void @srem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0 ; GFX9-NEXT: global_store_byte v1, v0, s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: srem_i8: ; GFX90A: ; %bb.0: @@ -1175,8 +1348,6 @@ define amdgpu_kernel void @srem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX90A-NEXT: v_sub_u32_e32 v1, s4, v1 ; GFX90A-NEXT: global_store_byte v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem i8 %x, %y store i8 %r, i8 addrspace(1)* %out ret void @@ -1487,7 +1658,6 @@ define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_v4i32: ; GFX90A: ; %bb.0: @@ -1574,8 +1744,6 @@ define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX90A-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv <4 x i32> %x, %y store <4 x i32> %r, <4 x i32> addrspace(1)* %out ret void @@ -1862,6 +2030,84 @@ define amdgpu_kernel void @urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: urem_v4i32: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 +; GFX90A-NEXT: s_mov_b32 s12, 0x4f7ffffe +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: v_mov_b32_e32 v4, 0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s8 +; GFX90A-NEXT: s_sub_i32 s2, 0, s8 +; GFX90A-NEXT: v_cvt_f32_u32_e32 v1, s9 +; GFX90A-NEXT: s_sub_i32 s3, 0, s9 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GFX90A-NEXT: v_mul_f32_e32 v0, s12, v0 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_f32_e32 v1, s12, v1 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX90A-NEXT: v_mul_lo_u32 v2, s2, v0 +; GFX90A-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX90A-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX90A-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX90A-NEXT: v_mul_lo_u32 v0, v0, s8 +; GFX90A-NEXT: v_sub_u32_e32 v0, s4, v0 +; GFX90A-NEXT: v_subrev_u32_e32 v2, s8, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v2, s8, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX90A-NEXT: v_cvt_f32_u32_e32 v2, s10 +; GFX90A-NEXT: v_mul_lo_u32 v3, s3, v1 +; GFX90A-NEXT: v_mul_hi_u32 v3, v1, v3 +; GFX90A-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GFX90A-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX90A-NEXT: v_mul_lo_u32 v1, v1, s9 +; GFX90A-NEXT: v_sub_u32_e32 v1, s5, v1 +; GFX90A-NEXT: v_mul_f32_e32 v2, s12, v2 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX90A-NEXT: v_subrev_u32_e32 v3, s9, v1 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 +; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v3, s9, v1 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 +; GFX90A-NEXT: s_sub_i32 s2, 0, s10 +; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX90A-NEXT: v_mul_lo_u32 v3, s2, v2 +; GFX90A-NEXT: v_mul_hi_u32 v3, v2, v3 +; GFX90A-NEXT: v_add_u32_e32 v2, v2, v3 +; GFX90A-NEXT: v_cvt_f32_u32_e32 v3, s11 +; GFX90A-NEXT: v_mul_hi_u32 v2, s6, v2 +; GFX90A-NEXT: v_mul_lo_u32 v2, v2, s10 +; GFX90A-NEXT: v_sub_u32_e32 v2, s6, v2 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GFX90A-NEXT: v_subrev_u32_e32 v5, s10, v2 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s10, v2 +; GFX90A-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; GFX90A-NEXT: v_mul_f32_e32 v3, s12, v3 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX90A-NEXT: v_subrev_u32_e32 v5, s10, v2 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s10, v2 +; GFX90A-NEXT: s_sub_i32 s2, 0, s11 +; GFX90A-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; GFX90A-NEXT: v_mul_lo_u32 v5, s2, v3 +; GFX90A-NEXT: v_mul_hi_u32 v5, v3, v5 +; GFX90A-NEXT: v_add_u32_e32 v3, v3, v5 +; GFX90A-NEXT: v_mul_hi_u32 v3, s7, v3 +; GFX90A-NEXT: v_mul_lo_u32 v3, v3, s11 +; GFX90A-NEXT: v_sub_u32_e32 v3, s7, v3 +; GFX90A-NEXT: v_subrev_u32_e32 v5, s11, v3 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 +; GFX90A-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v5, s11, v3 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 +; GFX90A-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX90A-NEXT: s_endpgm %r = urem <4 x i32> %x, %y store <4 x i32> %r, <4 x i32> addrspace(1)* %out ret void @@ -2280,6 +2526,128 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX9-NEXT: v_subrev_u32_e32 v3, s2, v3 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: sdiv_v4i32: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 +; GFX90A-NEXT: s_mov_b32 s13, 0x4f7ffffe +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: v_mov_b32_e32 v4, 0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_ashr_i32 s2, s8, 31 +; GFX90A-NEXT: s_add_i32 s3, s8, s2 +; GFX90A-NEXT: s_xor_b32 s3, s3, s2 +; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GFX90A-NEXT: s_ashr_i32 s8, s4, 31 +; GFX90A-NEXT: s_add_i32 s4, s4, s8 +; GFX90A-NEXT: s_xor_b32 s2, s8, s2 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX90A-NEXT: s_xor_b32 s4, s4, s8 +; GFX90A-NEXT: s_sub_i32 s8, 0, s3 +; GFX90A-NEXT: s_ashr_i32 s12, s9, 31 +; GFX90A-NEXT: v_mul_f32_e32 v0, s13, v0 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_lo_u32 v1, s8, v0 +; GFX90A-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX90A-NEXT: v_add_u32_e32 v0, v0, v1 +; GFX90A-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX90A-NEXT: v_mul_lo_u32 v1, v0, s3 +; GFX90A-NEXT: v_sub_u32_e32 v1, s4, v1 +; GFX90A-NEXT: s_add_i32 s4, s9, s12 +; GFX90A-NEXT: s_xor_b32 s4, s4, s12 +; GFX90A-NEXT: v_cvt_f32_u32_e32 v3, s4 +; GFX90A-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v2, s3, v1 +; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v1, v3 +; GFX90A-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX90A-NEXT: v_xor_b32_e32 v0, s2, v0 +; GFX90A-NEXT: v_mul_f32_e32 v1, s13, v1 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX90A-NEXT: v_subrev_u32_e32 v0, s2, v0 +; GFX90A-NEXT: s_ashr_i32 s2, s5, 31 +; GFX90A-NEXT: s_add_i32 s5, s5, s2 +; GFX90A-NEXT: s_xor_b32 s3, s2, s12 +; GFX90A-NEXT: s_xor_b32 s2, s5, s2 +; GFX90A-NEXT: s_sub_i32 s5, 0, s4 +; GFX90A-NEXT: v_mul_lo_u32 v2, s5, v1 +; GFX90A-NEXT: v_mul_hi_u32 v2, v1, v2 +; GFX90A-NEXT: v_add_u32_e32 v1, v1, v2 +; GFX90A-NEXT: v_mul_hi_u32 v1, s2, v1 +; GFX90A-NEXT: v_mul_lo_u32 v2, v1, s4 +; GFX90A-NEXT: v_sub_u32_e32 v2, s2, v2 +; GFX90A-NEXT: s_ashr_i32 s2, s10, 31 +; GFX90A-NEXT: s_add_i32 s5, s10, s2 +; GFX90A-NEXT: s_xor_b32 s5, s5, s2 +; GFX90A-NEXT: v_cvt_f32_u32_e32 v5, s5 +; GFX90A-NEXT: v_add_u32_e32 v3, 1, v1 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s4, v2 +; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v3, s4, v2 +; GFX90A-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s4, v2 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v2, v5 +; GFX90A-NEXT: v_add_u32_e32 v3, 1, v1 +; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX90A-NEXT: v_xor_b32_e32 v1, s3, v1 +; GFX90A-NEXT: v_mul_f32_e32 v2, s13, v2 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX90A-NEXT: v_subrev_u32_e32 v1, s3, v1 +; GFX90A-NEXT: s_ashr_i32 s3, s6, 31 +; GFX90A-NEXT: s_add_i32 s4, s6, s3 +; GFX90A-NEXT: s_xor_b32 s2, s3, s2 +; GFX90A-NEXT: s_xor_b32 s3, s4, s3 +; GFX90A-NEXT: s_sub_i32 s4, 0, s5 +; GFX90A-NEXT: v_mul_lo_u32 v3, s4, v2 +; GFX90A-NEXT: v_mul_hi_u32 v3, v2, v3 +; GFX90A-NEXT: v_add_u32_e32 v2, v2, v3 +; GFX90A-NEXT: v_mul_hi_u32 v2, s3, v2 +; GFX90A-NEXT: v_mul_lo_u32 v3, v2, s5 +; GFX90A-NEXT: v_sub_u32_e32 v3, s3, v3 +; GFX90A-NEXT: s_ashr_i32 s3, s11, 31 +; GFX90A-NEXT: s_add_i32 s4, s11, s3 +; GFX90A-NEXT: s_xor_b32 s4, s4, s3 +; GFX90A-NEXT: v_cvt_f32_u32_e32 v6, s4 +; GFX90A-NEXT: v_add_u32_e32 v5, 1, v2 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s5, v3 +; GFX90A-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v5, s5, v3 +; GFX90A-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s5, v3 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v3, v6 +; GFX90A-NEXT: v_add_u32_e32 v5, 1, v2 +; GFX90A-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; GFX90A-NEXT: v_xor_b32_e32 v2, s2, v2 +; GFX90A-NEXT: v_mul_f32_e32 v3, s13, v3 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX90A-NEXT: v_subrev_u32_e32 v2, s2, v2 +; GFX90A-NEXT: s_ashr_i32 s2, s7, 31 +; GFX90A-NEXT: s_add_i32 s5, s7, s2 +; GFX90A-NEXT: s_xor_b32 s3, s2, s3 +; GFX90A-NEXT: s_xor_b32 s2, s5, s2 +; GFX90A-NEXT: s_sub_i32 s5, 0, s4 +; GFX90A-NEXT: v_mul_lo_u32 v5, s5, v3 +; GFX90A-NEXT: v_mul_hi_u32 v5, v3, v5 +; GFX90A-NEXT: v_add_u32_e32 v3, v3, v5 +; GFX90A-NEXT: v_mul_hi_u32 v3, s2, v3 +; GFX90A-NEXT: v_mul_lo_u32 v5, v3, s4 +; GFX90A-NEXT: v_sub_u32_e32 v5, s2, v5 +; GFX90A-NEXT: v_add_u32_e32 v6, 1, v3 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s4, v5 +; GFX90A-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v6, s4, v5 +; GFX90A-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc +; GFX90A-NEXT: v_add_u32_e32 v6, 1, v3 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s4, v5 +; GFX90A-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; GFX90A-NEXT: v_xor_b32_e32 v3, s3, v3 +; GFX90A-NEXT: v_subrev_u32_e32 v3, s3, v3 +; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX90A-NEXT: s_endpgm %r = sdiv <4 x i32> %x, %y store <4 x i32> %r, <4 x i32> addrspace(1)* %out ret void @@ -2662,7 +3030,6 @@ define amdgpu_kernel void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX9-NEXT: v_subrev_u32_e32 v3, s5, v3 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: srem_v4i32: ; GFX90A: ; %bb.0: @@ -2773,8 +3140,6 @@ define amdgpu_kernel void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX90A-NEXT: v_subrev_u32_e32 v3, s2, v3 ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem <4 x i32> %x, %y store <4 x i32> %r, <4 x i32> addrspace(1)* %out ret void @@ -2984,7 +3349,6 @@ define amdgpu_kernel void @udiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX9-NEXT: v_lshl_or_b32 v0, v3, 16, v0 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_v4i16: ; GFX90A: ; %bb.0: @@ -3044,8 +3408,6 @@ define amdgpu_kernel void @udiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX90A-NEXT: v_lshl_or_b32 v0, v3, 16, v0 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv <4 x i16> %x, %y store <4 x i16> %r, <4 x i16> addrspace(1)* %out ret void @@ -3279,7 +3641,6 @@ define amdgpu_kernel void @urem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX9-NEXT: v_lshl_or_b32 v0, v5, 16, v0 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: urem_v4i16: ; GFX90A: ; %bb.0: @@ -3347,8 +3708,6 @@ define amdgpu_kernel void @urem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX90A-NEXT: v_lshl_or_b32 v0, v3, 16, v0 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem <4 x i16> %x, %y store <4 x i16> %r, <4 x i16> addrspace(1)* %out ret void @@ -3613,7 +3972,6 @@ define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX9-NEXT: v_lshl_or_b32 v0, v4, 16, v0 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_v4i16: ; GFX90A: ; %bb.0: @@ -3692,8 +4050,6 @@ define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX90A-NEXT: v_lshl_or_b32 v0, v4, 16, v0 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv <4 x i16> %x, %y store <4 x i16> %r, <4 x i16> addrspace(1)* %out ret void @@ -3982,7 +4338,6 @@ define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v3 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: srem_v4i16: ; GFX90A: ; %bb.0: @@ -4069,8 +4424,6 @@ define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX90A-NEXT: v_lshl_or_b32 v0, v4, 16, v0 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem <4 x i16> %x, %y store <4 x i16> %r, <4 x i16> addrspace(1)* %out ret void @@ -4140,7 +4493,6 @@ define amdgpu_kernel void @udiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX9-NEXT: v_and_b32_e32 v0, 7, v0 ; GFX9-NEXT: global_store_byte v2, v0, s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_i3: ; GFX90A: ; %bb.0: @@ -4162,8 +4514,6 @@ define amdgpu_kernel void @udiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX90A-NEXT: v_and_b32_e32 v0, 7, v0 ; GFX90A-NEXT: global_store_byte v2, v0, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv i3 %x, %y store i3 %r, i3 addrspace(1)* %out ret void @@ -4242,7 +4592,6 @@ define amdgpu_kernel void @urem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_byte v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: urem_i3: ; GFX90A: ; %bb.0: @@ -4267,8 +4616,6 @@ define amdgpu_kernel void @urem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX90A-NEXT: v_and_b32_e32 v1, 7, v1 ; GFX90A-NEXT: global_store_byte v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem i3 %x, %y store i3 %r, i3 addrspace(1)* %out ret void @@ -4352,7 +4699,6 @@ define amdgpu_kernel void @sdiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX9-NEXT: v_and_b32_e32 v0, 7, v0 ; GFX9-NEXT: global_store_byte v1, v0, s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_i3: ; GFX90A: ; %bb.0: @@ -4379,8 +4725,6 @@ define amdgpu_kernel void @sdiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX90A-NEXT: v_and_b32_e32 v0, 7, v0 ; GFX90A-NEXT: global_store_byte v1, v0, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv i3 %x, %y store i3 %r, i3 addrspace(1)* %out ret void @@ -4473,7 +4817,6 @@ define amdgpu_kernel void @srem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_byte v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: srem_i3: ; GFX90A: ; %bb.0: @@ -4503,8 +4846,6 @@ define amdgpu_kernel void @srem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX90A-NEXT: v_and_b32_e32 v1, 7, v1 ; GFX90A-NEXT: global_store_byte v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem i3 %x, %y store i3 %r, i3 addrspace(1)* %out ret void @@ -4670,7 +5011,6 @@ define amdgpu_kernel void @udiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX9-NEXT: global_store_short v1, v3, s[4:5] offset:4 ; GFX9-NEXT: global_store_dword v1, v0, s[4:5] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_v3i16: ; GFX90A: ; %bb.0: @@ -4718,8 +5058,6 @@ define amdgpu_kernel void @udiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX90A-NEXT: global_store_short v1, v3, s[4:5] offset:4 ; GFX90A-NEXT: global_store_dword v1, v0, s[4:5] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv <3 x i16> %x, %y store <3 x i16> %r, <3 x i16> addrspace(1)* %out ret void @@ -4907,7 +5245,6 @@ define amdgpu_kernel void @urem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX9-NEXT: global_store_short v3, v2, s[6:7] offset:4 ; GFX9-NEXT: global_store_dword v3, v0, s[6:7] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: urem_v3i16: ; GFX90A: ; %bb.0: @@ -4961,8 +5298,6 @@ define amdgpu_kernel void @urem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX90A-NEXT: global_store_short v1, v3, s[4:5] offset:4 ; GFX90A-NEXT: global_store_dword v1, v0, s[4:5] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem <3 x i16> %x, %y store <3 x i16> %r, <3 x i16> addrspace(1)* %out ret void @@ -5168,7 +5503,6 @@ define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX9-NEXT: global_store_short v1, v0, s[6:7] offset:4 ; GFX9-NEXT: global_store_dword v1, v2, s[6:7] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_v3i16: ; GFX90A: ; %bb.0: @@ -5230,8 +5564,6 @@ define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX90A-NEXT: global_store_short v1, v0, s[6:7] offset:4 ; GFX90A-NEXT: global_store_dword v1, v2, s[6:7] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv <3 x i16> %x, %y store <3 x i16> %r, <3 x i16> addrspace(1)* %out ret void @@ -5459,7 +5791,6 @@ define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX9-NEXT: global_store_short v3, v2, s[0:1] offset:4 ; GFX9-NEXT: global_store_dword v3, v0, s[0:1] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: srem_v3i16: ; GFX90A: ; %bb.0: @@ -5527,8 +5858,6 @@ define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX90A-NEXT: global_store_short v1, v3, s[6:7] offset:4 ; GFX90A-NEXT: global_store_dword v1, v0, s[6:7] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem <3 x i16> %x, %y store <3 x i16> %r, <3 x i16> addrspace(1)* %out ret void @@ -5712,7 +6041,6 @@ define amdgpu_kernel void @udiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX9-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX9-NEXT: global_store_short v2, v0, s[4:5] offset:4 ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_v3i15: ; GFX90A: ; %bb.0: @@ -5769,8 +6097,6 @@ define amdgpu_kernel void @udiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX90A-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX90A-NEXT: global_store_short v2, v0, s[4:5] offset:4 ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv <3 x i15> %x, %y store <3 x i15> %r, <3 x i15> addrspace(1)* %out ret void @@ -5976,7 +6302,6 @@ define amdgpu_kernel void @urem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX9-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX9-NEXT: global_store_short v2, v0, s[4:5] offset:4 ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: urem_v3i15: ; GFX90A: ; %bb.0: @@ -6041,8 +6366,6 @@ define amdgpu_kernel void @urem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX90A-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX90A-NEXT: global_store_short v2, v0, s[4:5] offset:4 ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem <3 x i15> %x, %y store <3 x i15> %r, <3 x i15> addrspace(1)* %out ret void @@ -6266,7 +6589,6 @@ define amdgpu_kernel void @sdiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX9-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX9-NEXT: global_store_short v2, v0, s[6:7] offset:4 ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_v3i15: ; GFX90A: ; %bb.0: @@ -6337,8 +6659,6 @@ define amdgpu_kernel void @sdiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX90A-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX90A-NEXT: global_store_short v2, v0, s[6:7] offset:4 ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv <3 x i15> %x, %y store <3 x i15> %r, <3 x i15> addrspace(1)* %out ret void @@ -6596,7 +6916,6 @@ define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX9-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX9-NEXT: global_store_short v4, v0, s[4:5] offset:4 ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: srem_v3i15: ; GFX90A: ; %bb.0: @@ -6681,8 +7000,6 @@ define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX90A-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX90A-NEXT: global_store_short v2, v0, s[4:5] offset:4 ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem <3 x i15> %x, %y store <3 x i15> %r, <3 x i15> addrspace(1)* %out ret void @@ -6724,7 +7041,6 @@ define amdgpu_kernel void @udiv_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_i32_oddk_denom: ; GFX90A: ; %bb.0: @@ -6740,8 +7056,6 @@ define amdgpu_kernel void @udiv_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv i32 %x, 1235195 store i32 %r, i32 addrspace(1)* %out ret void @@ -6775,7 +7089,6 @@ define amdgpu_kernel void @udiv_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_i32_pow2k_denom: ; GFX90A: ; %bb.0: @@ -6787,8 +7100,6 @@ define amdgpu_kernel void @udiv_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv i32 %x, 4096 store i32 %r, i32 addrspace(1)* %out ret void @@ -6825,7 +7136,6 @@ define amdgpu_kernel void @udiv_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[4:5] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_i32_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -6838,8 +7148,6 @@ define amdgpu_kernel void @udiv_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl i32 4096, %y %r = udiv i32 %x, %shl.y store i32 %r, i32 addrspace(1)* %out @@ -6883,7 +7191,6 @@ define amdgpu_kernel void @udiv_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_v2i32_pow2k_denom: ; GFX90A: ; %bb.0: @@ -6897,8 +7204,6 @@ define amdgpu_kernel void @udiv_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX90A-NEXT: v_mov_b32_e32 v1, s1 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv <2 x i32> %x, store <2 x i32> %r, <2 x i32> addrspace(1)* %out ret void @@ -6949,7 +7254,6 @@ define amdgpu_kernel void @udiv_v2i32_mixed_pow2k_denom(<2 x i32> addrspace(1)* ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_v2i32_mixed_pow2k_denom: ; GFX90A: ; %bb.0: @@ -6967,8 +7271,6 @@ define amdgpu_kernel void @udiv_v2i32_mixed_pow2k_denom(<2 x i32> addrspace(1)* ; GFX90A-NEXT: v_mov_b32_e32 v1, s1 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv <2 x i32> %x, store <2 x i32> %r, <2 x i32> addrspace(1)* %out ret void @@ -7150,6 +7452,58 @@ define amdgpu_kernel void @udiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: udiv_v2i32_pow2_shl_denom: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX90A-NEXT: s_movk_i32 s8, 0x1000 +; GFX90A-NEXT: s_mov_b32 s9, 0x4f7ffffe +; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 +; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x2c +; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_lshl_b32 s2, s8, s2 +; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s2 +; GFX90A-NEXT: s_lshl_b32 s0, s8, s3 +; GFX90A-NEXT: v_cvt_f32_u32_e32 v1, s0 +; GFX90A-NEXT: s_sub_i32 s1, 0, s2 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GFX90A-NEXT: v_mul_f32_e32 v0, s9, v0 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_f32_e32 v1, s9, v1 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX90A-NEXT: v_mul_lo_u32 v3, s1, v0 +; GFX90A-NEXT: v_mul_hi_u32 v3, v0, v3 +; GFX90A-NEXT: v_add_u32_e32 v0, v0, v3 +; GFX90A-NEXT: v_mul_hi_u32 v0, s6, v0 +; GFX90A-NEXT: v_mul_lo_u32 v3, v0, s2 +; GFX90A-NEXT: v_sub_u32_e32 v3, s6, v3 +; GFX90A-NEXT: v_add_u32_e32 v4, 1, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s2, v3 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v4, s2, v3 +; GFX90A-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX90A-NEXT: s_sub_i32 s1, 0, s0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s2, v3 +; GFX90A-NEXT: v_mul_lo_u32 v3, s1, v1 +; GFX90A-NEXT: v_mul_hi_u32 v3, v1, v3 +; GFX90A-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX90A-NEXT: v_mul_hi_u32 v1, s7, v1 +; GFX90A-NEXT: v_mul_lo_u32 v3, v1, s0 +; GFX90A-NEXT: v_add_u32_e32 v4, 1, v0 +; GFX90A-NEXT: v_sub_u32_e32 v3, s7, v3 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX90A-NEXT: v_add_u32_e32 v4, 1, v1 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s0, v3 +; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v4, s0, v3 +; GFX90A-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX90A-NEXT: v_add_u32_e32 v4, 1, v1 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s0, v3 +; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] +; GFX90A-NEXT: s_endpgm %shl.y = shl <2 x i32> , %y %r = udiv <2 x i32> %x, %shl.y store <2 x i32> %r, <2 x i32> addrspace(1)* %out @@ -7197,7 +7551,6 @@ define amdgpu_kernel void @urem_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: urem_i32_oddk_denom: ; GFX90A: ; %bb.0: @@ -7215,8 +7568,6 @@ define amdgpu_kernel void @urem_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem i32 %x, 1235195 store i32 %r, i32 addrspace(1)* %out ret void @@ -7250,7 +7601,6 @@ define amdgpu_kernel void @urem_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: urem_i32_pow2k_denom: ; GFX90A: ; %bb.0: @@ -7262,8 +7612,6 @@ define amdgpu_kernel void @urem_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem i32 %x, 4096 store i32 %r, i32 addrspace(1)* %out ret void @@ -7302,7 +7650,6 @@ define amdgpu_kernel void @urem_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[4:5] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: urem_i32_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -7316,8 +7663,6 @@ define amdgpu_kernel void @urem_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl i32 4096, %y %r = urem i32 %x, %shl.y store i32 %r, i32 addrspace(1)* %out @@ -7363,7 +7708,6 @@ define amdgpu_kernel void @urem_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: urem_v2i32_pow2k_denom: ; GFX90A: ; %bb.0: @@ -7378,8 +7722,6 @@ define amdgpu_kernel void @urem_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem <2 x i32> %x, store <2 x i32> %r, <2 x i32> addrspace(1)* %out ret void @@ -7549,13 +7891,61 @@ define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm - %shl.y = shl <2 x i32> , %y - %r = urem <2 x i32> %x, %shl.y - store <2 x i32> %r, <2 x i32> addrspace(1)* %out - ret void -} - -define amdgpu_kernel void @sdiv_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { +; +; GFX90A-LABEL: urem_v2i32_pow2_shl_denom: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX90A-NEXT: s_movk_i32 s8, 0x1000 +; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 +; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x2c +; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_lshl_b32 s2, s8, s2 +; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s2 +; GFX90A-NEXT: s_lshl_b32 s0, s8, s3 +; GFX90A-NEXT: s_mov_b32 s3, 0x4f7ffffe +; GFX90A-NEXT: v_cvt_f32_u32_e32 v1, s0 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX90A-NEXT: s_sub_i32 s1, 0, s2 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GFX90A-NEXT: v_mul_f32_e32 v0, s3, v0 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_f32_e32 v1, s3, v1 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX90A-NEXT: v_mul_lo_u32 v3, s1, v0 +; GFX90A-NEXT: v_mul_hi_u32 v3, v0, v3 +; GFX90A-NEXT: v_add_u32_e32 v0, v0, v3 +; GFX90A-NEXT: v_mul_hi_u32 v0, s6, v0 +; GFX90A-NEXT: v_mul_lo_u32 v0, v0, s2 +; GFX90A-NEXT: v_sub_u32_e32 v0, s6, v0 +; GFX90A-NEXT: v_subrev_u32_e32 v3, s2, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s2, v0 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v3, s2, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s2, v0 +; GFX90A-NEXT: s_sub_i32 s1, 0, s0 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX90A-NEXT: v_mul_lo_u32 v3, s1, v1 +; GFX90A-NEXT: v_mul_hi_u32 v3, v1, v3 +; GFX90A-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX90A-NEXT: v_mul_hi_u32 v1, s7, v1 +; GFX90A-NEXT: v_mul_lo_u32 v1, v1, s0 +; GFX90A-NEXT: v_sub_u32_e32 v1, s7, v1 +; GFX90A-NEXT: v_subrev_u32_e32 v3, s0, v1 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s0, v1 +; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v3, s0, v1 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s0, v1 +; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] +; GFX90A-NEXT: s_endpgm + %shl.y = shl <2 x i32> , %y + %r = urem <2 x i32> %x, %shl.y + store <2 x i32> %r, <2 x i32> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @sdiv_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; CHECK-LABEL: @sdiv_i32_oddk_denom( ; CHECK-NEXT: [[R:%.*]] = sdiv i32 [[X:%.*]], 1235195 ; CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[OUT:%.*]], align 4 @@ -7591,7 +7981,6 @@ define amdgpu_kernel void @sdiv_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_i32_oddk_denom: ; GFX90A: ; %bb.0: @@ -7607,8 +7996,6 @@ define amdgpu_kernel void @sdiv_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv i32 %x, 1235195 store i32 %r, i32 addrspace(1)* %out ret void @@ -7648,7 +8035,6 @@ define amdgpu_kernel void @sdiv_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_i32_pow2k_denom: ; GFX90A: ; %bb.0: @@ -7663,8 +8049,6 @@ define amdgpu_kernel void @sdiv_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv i32 %x, 4096 store i32 %r, i32 addrspace(1)* %out ret void @@ -7754,6 +8138,44 @@ define amdgpu_kernel void @sdiv_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX9-NEXT: v_subrev_u32_e32 v0, s2, v0 ; GFX9-NEXT: global_store_dword v2, v0, s[0:1] ; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: sdiv_i32_pow2_shl_denom: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; GFX90A-NEXT: v_mov_b32_e32 v1, 0 +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_lshl_b32 s3, 0x1000, s3 +; GFX90A-NEXT: s_ashr_i32 s4, s3, 31 +; GFX90A-NEXT: s_add_i32 s3, s3, s4 +; GFX90A-NEXT: s_xor_b32 s3, s3, s4 +; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GFX90A-NEXT: s_sub_i32 s6, 0, s3 +; GFX90A-NEXT: s_ashr_i32 s5, s2, 31 +; GFX90A-NEXT: s_add_i32 s2, s2, s5 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX90A-NEXT: s_xor_b32 s2, s2, s5 +; GFX90A-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_lo_u32 v2, s6, v0 +; GFX90A-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX90A-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX90A-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX90A-NEXT: v_mul_lo_u32 v3, v0, s3 +; GFX90A-NEXT: v_sub_u32_e32 v3, s2, v3 +; GFX90A-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v3 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v2, s3, v3 +; GFX90A-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc +; GFX90A-NEXT: v_add_u32_e32 v4, 1, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX90A-NEXT: s_xor_b32 s2, s5, s4 +; GFX90A-NEXT: v_xor_b32_e32 v0, s2, v0 +; GFX90A-NEXT: v_subrev_u32_e32 v0, s2, v0 +; GFX90A-NEXT: global_store_dword v1, v0, s[0:1] +; GFX90A-NEXT: s_endpgm %shl.y = shl i32 4096, %y %r = sdiv i32 %x, %shl.y store i32 %r, i32 addrspace(1)* %out @@ -7809,7 +8231,6 @@ define amdgpu_kernel void @sdiv_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_v2i32_pow2k_denom: ; GFX90A: ; %bb.0: @@ -7829,8 +8250,6 @@ define amdgpu_kernel void @sdiv_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX90A-NEXT: v_mov_b32_e32 v1, s1 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv <2 x i32> %x, store <2 x i32> %r, <2 x i32> addrspace(1)* %out ret void @@ -7887,7 +8306,6 @@ define amdgpu_kernel void @ssdiv_v2i32_mixed_pow2k_denom(<2 x i32> addrspace(1)* ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: ssdiv_v2i32_mixed_pow2k_denom: ; GFX90A: ; %bb.0: @@ -7908,8 +8326,6 @@ define amdgpu_kernel void @ssdiv_v2i32_mixed_pow2k_denom(<2 x i32> addrspace(1)* ; GFX90A-NEXT: v_mov_b32_e32 v1, s1 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv <2 x i32> %x, store <2 x i32> %r, <2 x i32> addrspace(1)* %out ret void @@ -8143,6 +8559,76 @@ define amdgpu_kernel void @sdiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; GFX9-NEXT: v_subrev_u32_e32 v1, s1, v1 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: sdiv_v2i32_pow2_shl_denom: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX90A-NEXT: s_movk_i32 s8, 0x1000 +; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 +; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x2c +; GFX90A-NEXT: s_mov_b32 s10, 0x4f7ffffe +; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_lshl_b32 s2, s8, s2 +; GFX90A-NEXT: s_ashr_i32 s9, s2, 31 +; GFX90A-NEXT: s_add_i32 s2, s2, s9 +; GFX90A-NEXT: s_xor_b32 s2, s2, s9 +; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s2 +; GFX90A-NEXT: s_ashr_i32 s1, s6, 31 +; GFX90A-NEXT: s_lshl_b32 s0, s8, s3 +; GFX90A-NEXT: s_add_i32 s3, s6, s1 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX90A-NEXT: s_xor_b32 s6, s1, s9 +; GFX90A-NEXT: s_xor_b32 s1, s3, s1 +; GFX90A-NEXT: s_sub_i32 s3, 0, s2 +; GFX90A-NEXT: v_mul_f32_e32 v0, s10, v0 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_lo_u32 v1, s3, v0 +; GFX90A-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX90A-NEXT: v_add_u32_e32 v0, v0, v1 +; GFX90A-NEXT: v_mul_hi_u32 v0, s1, v0 +; GFX90A-NEXT: v_mul_lo_u32 v1, v0, s2 +; GFX90A-NEXT: v_sub_u32_e32 v1, s1, v1 +; GFX90A-NEXT: s_ashr_i32 s1, s0, 31 +; GFX90A-NEXT: s_add_i32 s0, s0, s1 +; GFX90A-NEXT: s_xor_b32 s0, s0, s1 +; GFX90A-NEXT: v_cvt_f32_u32_e32 v4, s0 +; GFX90A-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v3, s2, v1 +; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v1, v4 +; GFX90A-NEXT: s_ashr_i32 s2, s7, 31 +; GFX90A-NEXT: s_add_i32 s3, s7, s2 +; GFX90A-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX90A-NEXT: v_mul_f32_e32 v1, s10, v1 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX90A-NEXT: s_xor_b32 s1, s2, s1 +; GFX90A-NEXT: s_xor_b32 s2, s3, s2 +; GFX90A-NEXT: s_sub_i32 s3, 0, s0 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX90A-NEXT: v_mul_lo_u32 v3, s3, v1 +; GFX90A-NEXT: v_mul_hi_u32 v3, v1, v3 +; GFX90A-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX90A-NEXT: v_mul_hi_u32 v1, s2, v1 +; GFX90A-NEXT: v_mul_lo_u32 v3, v1, s0 +; GFX90A-NEXT: v_sub_u32_e32 v3, s2, v3 +; GFX90A-NEXT: v_add_u32_e32 v4, 1, v1 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s0, v3 +; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v4, s0, v3 +; GFX90A-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX90A-NEXT: v_add_u32_e32 v4, 1, v1 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s0, v3 +; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX90A-NEXT: v_xor_b32_e32 v0, s6, v0 +; GFX90A-NEXT: v_xor_b32_e32 v1, s1, v1 +; GFX90A-NEXT: v_subrev_u32_e32 v0, s6, v0 +; GFX90A-NEXT: v_subrev_u32_e32 v1, s1, v1 +; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] +; GFX90A-NEXT: s_endpgm %shl.y = shl <2 x i32> , %y %r = sdiv <2 x i32> %x, %shl.y store <2 x i32> %r, <2 x i32> addrspace(1)* %out @@ -8190,7 +8676,6 @@ define amdgpu_kernel void @srem_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: srem_i32_oddk_denom: ; GFX90A: ; %bb.0: @@ -8208,8 +8693,6 @@ define amdgpu_kernel void @srem_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem i32 %x, 1235195 store i32 %r, i32 addrspace(1)* %out ret void @@ -8251,7 +8734,6 @@ define amdgpu_kernel void @srem_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: srem_i32_pow2k_denom: ; GFX90A: ; %bb.0: @@ -8267,8 +8749,6 @@ define amdgpu_kernel void @srem_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem i32 %x, 4096 store i32 %r, i32 addrspace(1)* %out ret void @@ -8352,6 +8832,41 @@ define amdgpu_kernel void @srem_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: srem_i32_pow2_shl_denom: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; GFX90A-NEXT: v_mov_b32_e32 v1, 0 +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_lshl_b32 s3, 0x1000, s3 +; GFX90A-NEXT: s_ashr_i32 s4, s3, 31 +; GFX90A-NEXT: s_add_i32 s3, s3, s4 +; GFX90A-NEXT: s_xor_b32 s3, s3, s4 +; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GFX90A-NEXT: s_sub_i32 s5, 0, s3 +; GFX90A-NEXT: s_ashr_i32 s4, s2, 31 +; GFX90A-NEXT: s_add_i32 s2, s2, s4 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX90A-NEXT: s_xor_b32 s2, s2, s4 +; GFX90A-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX90A-NEXT: v_mul_lo_u32 v2, s5, v0 +; GFX90A-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX90A-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX90A-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX90A-NEXT: v_mul_lo_u32 v0, v0, s3 +; GFX90A-NEXT: v_sub_u32_e32 v0, s2, v0 +; GFX90A-NEXT: v_subrev_u32_e32 v2, s3, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v2, s3, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX90A-NEXT: v_xor_b32_e32 v0, s4, v0 +; GFX90A-NEXT: v_subrev_u32_e32 v0, s4, v0 +; GFX90A-NEXT: global_store_dword v1, v0, s[0:1] +; GFX90A-NEXT: s_endpgm %shl.y = shl i32 4096, %y %r = srem i32 %x, %shl.y store i32 %r, i32 addrspace(1)* %out @@ -8413,7 +8928,6 @@ define amdgpu_kernel void @srem_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: srem_v2i32_pow2k_denom: ; GFX90A: ; %bb.0: @@ -8436,8 +8950,6 @@ define amdgpu_kernel void @srem_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem <2 x i32> %x, store <2 x i32> %r, <2 x i32> addrspace(1)* %out ret void @@ -8654,7 +9166,6 @@ define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; GFX9-NEXT: v_subrev_u32_e32 v1, s6, v1 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: srem_v2i32_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -8719,8 +9230,6 @@ define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; GFX90A-NEXT: v_subrev_u32_e32 v1, s1, v1 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl <2 x i32> , %y %r = srem <2 x i32> %x, %shl.y store <2 x i32> %r, <2 x i32> addrspace(1)* %out @@ -8951,28 +9460,20 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_cmp_lt_u32_e64 s[0:1], s6, v5 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v4 -<<<<<<< HEAD ; GFX9-NEXT: v_cndmask_b32_e64 v4, v7, v5, s[0:1] -======= -; GFX9-NEXT: v_cndmask_b32_e64 v4, v7, v6, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v6, s[0:1], 2, v0 -; GFX9-NEXT: v_addc_co_u32_e64 v7, s[0:1], 0, v1, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v8, s[0:1], 1, v0 -; GFX9-NEXT: v_addc_co_u32_e64 v9, s[0:1], 0, v1, s[0:1] -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v4, v9, v7, s[0:1] ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_mov_b32_e32 v7, s7 ; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v7, v2, vcc ; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s3, v2 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc ; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s6, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v4, 1, 2, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s2, v2 +; GFX9-NEXT: v_add_co_u32_e64 v4, s[0:1], v0, v4 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc ; GFX9-NEXT: v_addc_co_u32_e64 v5, s[0:1], 0, v1, s[0:1] ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -<<<<<<< HEAD ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX9-NEXT: global_store_dwordx2 v6, v[0:1], s[4:5] @@ -9092,13 +9593,6 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm -======= -; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, v6, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX9-NEXT: global_store_dwordx2 v5, v[0:1], s[4:5] -; GFX9-NEXT: s_endpgm ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv i64 %x, 1235195949943 store i64 %r, i64 addrspace(1)* %out ret void @@ -9134,6 +9628,16 @@ define amdgpu_kernel void @udiv_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) ; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: udiv_i64_pow2k_denom: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_lshr_b64 s[2:3], s[2:3], 12 +; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX90A-NEXT: s_endpgm %r = udiv i64 %x, 4096 store i64 %r, i64 addrspace(1)* %out ret void @@ -9174,7 +9678,6 @@ define amdgpu_kernel void @udiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_i64_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -9187,8 +9690,6 @@ define amdgpu_kernel void @udiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl i64 4096, %y %r = udiv i64 %x, %shl.y store i64 %r, i64 addrspace(1)* %out @@ -9236,7 +9737,6 @@ define amdgpu_kernel void @udiv_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX9-NEXT: v_mov_b32_e32 v3, s5 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_v2i64_pow2k_denom: ; GFX90A: ; %bb.0: @@ -9252,8 +9752,6 @@ define amdgpu_kernel void @udiv_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX90A-NEXT: v_mov_b32_e32 v3, s5 ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv <2 x i64> %x, store <2 x i64> %r, <2 x i64> addrspace(1)* %out ret void @@ -9393,17 +9891,11 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 -<<<<<<< HEAD ; GFX9-NEXT: s_movk_i32 s8, 0xfff ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: v_mul_hi_u32 v2, v0, s2 ; GFX9-NEXT: v_mul_lo_u32 v4, v1, s2 ; GFX9-NEXT: v_mul_lo_u32 v3, v0, s2 -======= -; GFX9-NEXT: v_mul_hi_u32 v2, v0, s4 -; GFX9-NEXT: v_mul_lo_u32 v4, v1, s4 -; GFX9-NEXT: v_mul_lo_u32 v3, v0, s4 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_sub_u32_e32 v2, v2, v0 ; GFX9-NEXT: v_add_u32_e32 v2, v2, v4 ; GFX9-NEXT: v_mul_hi_u32 v6, v0, v3 @@ -9419,37 +9911,7 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v6, v3, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v8, v5, vcc ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 -<<<<<<< HEAD ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc -======= -; GFX9-NEXT: v_add_co_u32_e64 v0, s[2:3], v0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v7, v4, vcc -; GFX9-NEXT: v_addc_co_u32_e64 v2, vcc, v1, v3, s[2:3] -; GFX9-NEXT: v_mul_hi_u32 v4, v0, s4 -; GFX9-NEXT: v_mul_lo_u32 v6, v2, s4 -; GFX9-NEXT: v_mul_lo_u32 v8, v0, s4 -; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: v_sub_u32_e32 v4, v4, v0 -; GFX9-NEXT: v_add_u32_e32 v4, v4, v6 -; GFX9-NEXT: v_mul_lo_u32 v6, v0, v4 -; GFX9-NEXT: v_mul_hi_u32 v9, v0, v8 -; GFX9-NEXT: v_mul_hi_u32 v10, v0, v4 -; GFX9-NEXT: v_mul_hi_u32 v11, v2, v4 -; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 -; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v9, v6 -; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, v7, v10, vcc -; GFX9-NEXT: v_mul_lo_u32 v10, v2, v8 -; GFX9-NEXT: v_mul_hi_u32 v8, v2, v8 -; GFX9-NEXT: v_mul_lo_u32 v2, v2, v4 -; GFX9-NEXT: s_movk_i32 s0, 0xfff -; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v6, v10 -; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v9, v8, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v11, v5, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v6, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v4, vcc -; GFX9-NEXT: v_addc_co_u32_e64 v1, vcc, v1, v4, s[2:3] ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX9-NEXT: v_mul_hi_u32 v2, v0, s2 @@ -9485,15 +9947,10 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc ; GFX9-NEXT: v_mul_lo_u32 v4, s7, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, s7, v0 -<<<<<<< HEAD -======= -; GFX9-NEXT: s_lshr_b64 s[2:3], s[4:5], 12 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v3, v0, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v6, v5, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1 -<<<<<<< HEAD ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc ; GFX9-NEXT: v_mul_lo_u32 v2, v1, s8 ; GFX9-NEXT: v_mul_hi_u32 v3, v0, s8 @@ -9519,42 +9976,10 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v3, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v6, vcc -======= -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v7, v2, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 2, v0 -; GFX9-NEXT: v_mul_lo_u32 v4, v1, s0 -; GFX9-NEXT: v_mul_hi_u32 v6, v0, s0 -; GFX9-NEXT: v_mul_lo_u32 v9, v0, s0 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, 1, v0 -; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc -; GFX9-NEXT: v_add_u32_e32 v4, v6, v4 -; GFX9-NEXT: v_mov_b32_e32 v6, s7 -; GFX9-NEXT: v_sub_co_u32_e32 v9, vcc, s6, v9 -; GFX9-NEXT: v_subb_co_u32_e32 v4, vcc, v6, v4, vcc -; GFX9-NEXT: v_subrev_co_u32_e32 v6, vcc, s0, v9 -; GFX9-NEXT: v_subbrev_co_u32_e32 v10, vcc, 0, v4, vcc -; GFX9-NEXT: s_movk_i32 s0, 0xffe -; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s0, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10 -; GFX9-NEXT: v_cndmask_b32_e32 v6, -1, v6, vcc -; GFX9-NEXT: v_cmp_lt_u32_e64 s[0:1], s0, v9 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v4, -1, v6, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v3, v1, v3, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v2, v0, v1, s[0:1] ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: global_store_dwordx4 v5, v[0:3], s[8:9] +; GFX9-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_v2i64_mixed_pow2k_denom: ; GFX90A: ; %bb.0: @@ -9659,8 +10084,6 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX90A-NEXT: v_mov_b32_e32 v1, s1 ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = udiv <2 x i64> %x, store <2 x i64> %r, <2 x i64> addrspace(1)* %out ret void @@ -9716,7 +10139,6 @@ define amdgpu_kernel void @udiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_mov_b32_e32 v3, s5 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: udiv_v2i64_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -9735,8 +10157,6 @@ define amdgpu_kernel void @udiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX90A-NEXT: v_mov_b32_e32 v3, s5 ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl <2 x i64> , %y %r = udiv <2 x i64> %x, %shl.y store <2 x i64> %r, <2 x i64> addrspace(1)* %out @@ -9971,7 +10391,6 @@ define amdgpu_kernel void @urem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[2:3] ; GFX9-NEXT: v_subbrev_co_u32_e64 v2, s[0:1], 0, v2, s[0:1] ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v7 -<<<<<<< HEAD ; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v4, s7 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v4, v1, vcc @@ -10104,23 +10523,6 @@ define amdgpu_kernel void @urem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX90A-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm -======= -; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[0:1] -; GFX9-NEXT: v_mov_b32_e32 v6, s7 -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v6, v1, vcc -; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s6, v1 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s10, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s8, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, v3, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX9-NEXT: global_store_dwordx2 v5, v[0:1], s[4:5] -; GFX9-NEXT: s_endpgm ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem i64 %x, 1235195393993 store i64 %r, i64 addrspace(1)* %out ret void @@ -10155,6 +10557,16 @@ define amdgpu_kernel void @urem_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: urem_i64_pow2k_denom: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX90A-NEXT: v_mov_b32_e32 v1, 0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_and_b32 s2, s2, 0xfff +; GFX90A-NEXT: v_mov_b32_e32 v0, s2 +; GFX90A-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX90A-NEXT: s_endpgm %r = urem i64 %x, 4096 store i64 %r, i64 addrspace(1)* %out ret void @@ -10201,7 +10613,6 @@ define amdgpu_kernel void @urem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: urem_i64_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -10217,8 +10628,6 @@ define amdgpu_kernel void @urem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl i64 4096, %y %r = urem i64 %x, %shl.y store i64 %r, i64 addrspace(1)* %out @@ -10267,7 +10676,6 @@ define amdgpu_kernel void @urem_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX9-NEXT: v_mov_b32_e32 v2, s0 ; GFX9-NEXT: global_store_dwordx4 v1, v[0:3], s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: urem_v2i64_pow2k_denom: ; GFX90A: ; %bb.0: @@ -10283,8 +10691,6 @@ define amdgpu_kernel void @urem_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX90A-NEXT: v_mov_b32_e32 v2, s0 ; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = urem <2 x i64> %x, store <2 x i64> %r, <2 x i64> addrspace(1)* %out ret void @@ -10350,7 +10756,6 @@ define amdgpu_kernel void @urem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_mov_b32_e32 v3, s5 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: urem_v2i64_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -10374,8 +10779,6 @@ define amdgpu_kernel void @urem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX90A-NEXT: v_mov_b32_e32 v3, s5 ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl <2 x i64> , %y %r = urem <2 x i64> %x, %shl.y store <2 x i64> %r, <2 x i64> addrspace(1)* %out @@ -10518,17 +10921,10 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_trunc_f32_e32 v1, v1 ; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 -<<<<<<< HEAD ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_lo_u32 v2, v1, s4 ; GFX9-NEXT: v_mul_hi_u32 v3, v0, s4 ; GFX9-NEXT: v_mul_lo_u32 v4, v0, s4 -======= -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: v_mul_hi_u32 v3, v0, s8 -; GFX9-NEXT: v_mul_lo_u32 v2, v1, s8 -; GFX9-NEXT: v_mul_lo_u32 v4, v0, s8 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 ; GFX9-NEXT: v_sub_u32_e32 v2, v2, v0 ; GFX9-NEXT: v_mul_hi_u32 v3, v0, v4 @@ -10544,7 +10940,6 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v6, v4, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v8, v5, vcc ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 -<<<<<<< HEAD ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc @@ -10585,55 +10980,10 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_mul_lo_u32 v4, s3, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, s3, v0 ; GFX9-NEXT: s_mov_b32 s5, 0x12d8fb -======= -; GFX9-NEXT: v_add_co_u32_e64 v0, s[2:3], v0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v7, v4, vcc -; GFX9-NEXT: v_addc_co_u32_e64 v2, vcc, v1, v3, s[2:3] -; GFX9-NEXT: v_mul_lo_u32 v4, v2, s8 -; GFX9-NEXT: v_mul_hi_u32 v6, v0, s8 -; GFX9-NEXT: v_mul_lo_u32 v8, v0, s8 -; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 -; GFX9-NEXT: v_add_u32_e32 v4, v6, v4 -; GFX9-NEXT: v_sub_u32_e32 v4, v4, v0 -; GFX9-NEXT: v_mul_lo_u32 v10, v0, v4 -; GFX9-NEXT: v_mul_hi_u32 v11, v0, v8 -; GFX9-NEXT: v_mul_hi_u32 v12, v0, v4 -; GFX9-NEXT: v_mul_hi_u32 v9, v2, v8 -; GFX9-NEXT: v_mul_lo_u32 v8, v2, v8 -; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v11, v10 -; GFX9-NEXT: v_mul_hi_u32 v6, v2, v4 -; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, v7, v12, vcc -; GFX9-NEXT: v_mul_lo_u32 v2, v2, v4 -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v10, v8 -; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v11, v9, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v6, v5, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v8, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v4, vcc -; GFX9-NEXT: v_addc_co_u32_e64 v1, vcc, v1, v4, s[2:3] -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_ashr_i32 s2, s7, 31 -; GFX9-NEXT: s_add_u32 s0, s6, s2 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: s_mov_b32 s3, s2 -; GFX9-NEXT: s_addc_u32 s1, s7, s2 -; GFX9-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: v_mul_lo_u32 v2, s0, v1 -; GFX9-NEXT: v_mul_hi_u32 v3, s0, v0 -; GFX9-NEXT: v_mul_hi_u32 v4, s0, v1 -; GFX9-NEXT: v_mul_hi_u32 v6, s1, v1 -; GFX9-NEXT: v_mul_lo_u32 v1, s1, v1 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v7, v4, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, s1, v0 -; GFX9-NEXT: v_mul_hi_u32 v0, s1, v0 -; GFX9-NEXT: s_mov_b32 s3, 0x12d8fb ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v3, v0, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v6, v5, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1 -<<<<<<< HEAD ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc ; GFX9-NEXT: v_mul_lo_u32 v2, v1, s5 ; GFX9-NEXT: v_mul_hi_u32 v3, v0, s5 @@ -10664,45 +11014,9 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 ; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s4, v0 -======= -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v7, v2, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 2, v0 -; GFX9-NEXT: v_mul_lo_u32 v4, v1, s3 -; GFX9-NEXT: v_mul_hi_u32 v6, v0, s3 -; GFX9-NEXT: v_mul_lo_u32 v9, v0, s3 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, 1, v0 -; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc -; GFX9-NEXT: v_add_u32_e32 v4, v6, v4 -; GFX9-NEXT: v_sub_co_u32_e32 v9, vcc, s0, v9 -; GFX9-NEXT: v_mov_b32_e32 v6, s1 -; GFX9-NEXT: v_subb_co_u32_e32 v4, vcc, v6, v4, vcc -; GFX9-NEXT: v_subrev_co_u32_e32 v6, vcc, s3, v9 -; GFX9-NEXT: v_subbrev_co_u32_e32 v10, vcc, 0, v4, vcc -; GFX9-NEXT: s_mov_b32 s0, 0x12d8fa -; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s0, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10 -; GFX9-NEXT: v_cndmask_b32_e32 v6, -1, v6, vcc -; GFX9-NEXT: v_cmp_lt_u32_e64 s[0:1], s0, v9 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v4, -1, v6, s[0:1] -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] -; GFX9-NEXT: v_xor_b32_e32 v0, s2, v0 -; GFX9-NEXT: v_xor_b32_e32 v1, s2, v1 -; GFX9-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s2, v0 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc ; GFX9-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_i64_oddk_denom: ; GFX90A: ; %bb.0: @@ -10813,8 +11127,6 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv i64 %x, 1235195 store i64 %r, i64 addrspace(1)* %out ret void @@ -10858,6 +11170,20 @@ define amdgpu_kernel void @sdiv_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) ; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: sdiv_i64_pow2k_denom: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_ashr_i32 s4, s3, 31 +; GFX90A-NEXT: s_lshr_b32 s4, s4, 20 +; GFX90A-NEXT: s_add_u32 s2, s2, s4 +; GFX90A-NEXT: s_addc_u32 s3, s3, 0 +; GFX90A-NEXT: s_ashr_i64 s[2:3], s[2:3], 12 +; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX90A-NEXT: s_endpgm %r = sdiv i64 %x, 4096 store i64 %r, i64 addrspace(1)* %out ret void @@ -11115,15 +11441,8 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v5 ; GFX9-NEXT: v_cndmask_b32_e64 v5, v7, v6, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v6, s[0:1], 2, v0 -; GFX9-NEXT: v_addc_co_u32_e64 v7, s[0:1], 0, v1, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v8, s[0:1], 1, v0 -; GFX9-NEXT: v_addc_co_u32_e64 v9, s[0:1], 0, v1, s[0:1] -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, v9, v7, s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v7, s7 ; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v7, v3, vcc -<<<<<<< HEAD ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s9, v3 ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v5 ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc @@ -11132,25 +11451,12 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s9, v3 ; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], v0, v5 -======= -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s10, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s11, v3 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v6, s[0:1], 0, v1, s[0:1] ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -<<<<<<< HEAD ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GFX9-NEXT: s_xor_b64 s[0:1], s[10:11], s[2:3] ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc -======= -; GFX9-NEXT: v_cndmask_b32_e64 v3, v8, v6, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX9-NEXT: s_xor_b64 s[0:1], s[2:3], s[8:9] -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_xor_b32_e32 v0, s0, v0 ; GFX9-NEXT: v_xor_b32_e32 v1, s1, v1 ; GFX9-NEXT: v_mov_b32_e32 v3, s1 @@ -11158,7 +11464,6 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_i64_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -11290,8 +11595,6 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl i64 4096, %y %r = sdiv i64 %x, %shl.y store i64 %r, i64 addrspace(1)* %out @@ -11355,7 +11658,6 @@ define amdgpu_kernel void @sdiv_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX9-NEXT: v_mov_b32_e32 v3, s5 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_v2i64_pow2k_denom: ; GFX90A: ; %bb.0: @@ -11379,8 +11681,6 @@ define amdgpu_kernel void @sdiv_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX90A-NEXT: v_mov_b32_e32 v3, s5 ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv <2 x i64> %x, store <2 x i64> %r, <2 x i64> addrspace(1)* %out ret void @@ -11555,48 +11855,13 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v7, vcc ; GFX9-NEXT: v_mul_lo_u32 v7, v1, v5 ; GFX9-NEXT: v_mul_hi_u32 v5, v1, v5 -<<<<<<< HEAD ; GFX9-NEXT: s_ashr_i64 s[2:3], s[2:3], 12 ; GFX9-NEXT: s_ashr_i32 s4, s7, 31 -======= -; GFX9-NEXT: s_ashr_i64 s[4:5], s[4:5], 12 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v7 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v6, v5, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v8, v4, vcc ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 -<<<<<<< HEAD ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v5, vcc -======= -; GFX9-NEXT: v_mov_b32_e32 v6, 0 -; GFX9-NEXT: v_add_co_u32_e64 v0, s[2:3], v0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v6, v5, vcc -; GFX9-NEXT: v_addc_co_u32_e64 v2, vcc, v1, v3, s[2:3] -; GFX9-NEXT: v_mul_lo_u32 v5, v2, s8 -; GFX9-NEXT: v_mul_hi_u32 v7, v0, s8 -; GFX9-NEXT: v_mul_lo_u32 v8, v0, s8 -; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 -; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 -; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 -; GFX9-NEXT: v_sub_u32_e32 v5, v5, v0 -; GFX9-NEXT: v_mul_lo_u32 v10, v0, v5 -; GFX9-NEXT: v_mul_hi_u32 v11, v0, v8 -; GFX9-NEXT: v_mul_hi_u32 v12, v0, v5 -; GFX9-NEXT: v_mul_hi_u32 v9, v2, v8 -; GFX9-NEXT: v_mul_lo_u32 v8, v2, v8 -; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v11, v10 -; GFX9-NEXT: v_mul_hi_u32 v7, v2, v5 -; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v12, vcc -; GFX9-NEXT: v_mul_lo_u32 v2, v2, v5 -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v10, v8 -; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v11, v9, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v7, v4, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v8, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v6, v5, vcc -; GFX9-NEXT: v_addc_co_u32_e64 v1, vcc, v1, v5, s[2:3] -; GFX9-NEXT: s_ashr_i32 s2, s7, 31 -; GFX9-NEXT: s_add_u32 s6, s6, s2 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX9-NEXT: v_mul_lo_u32 v2, v1, s8 @@ -11633,17 +11898,12 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v5, vcc ; GFX9-NEXT: v_mul_lo_u32 v5, s7, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, s7, v0 -<<<<<<< HEAD ; GFX9-NEXT: s_movk_i32 s5, 0xfff ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -======= -; GFX9-NEXT: s_movk_i32 s0, 0xfff ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 ; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v3, v0, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v6, v4, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1 -<<<<<<< HEAD ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc ; GFX9-NEXT: v_mul_lo_u32 v2, v1, s5 ; GFX9-NEXT: v_mul_hi_u32 v3, v0, s5 @@ -11674,48 +11934,12 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 ; GFX9-NEXT: v_mov_b32_e32 v3, s4 ; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s4, v0 -======= -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v6, v2, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 2, v0 -; GFX9-NEXT: v_mul_lo_u32 v5, v1, s0 -; GFX9-NEXT: v_mul_hi_u32 v6, v0, s0 -; GFX9-NEXT: v_mul_lo_u32 v9, v0, s0 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, 1, v0 -; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc -; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 -; GFX9-NEXT: v_mov_b32_e32 v6, s7 -; GFX9-NEXT: v_sub_co_u32_e32 v9, vcc, s6, v9 -; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v6, v5, vcc -; GFX9-NEXT: v_subrev_co_u32_e32 v6, vcc, s0, v9 -; GFX9-NEXT: v_subbrev_co_u32_e32 v10, vcc, 0, v5, vcc -; GFX9-NEXT: s_movk_i32 s0, 0xffe -; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s0, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10 -; GFX9-NEXT: v_cndmask_b32_e32 v6, -1, v6, vcc -; GFX9-NEXT: v_cmp_lt_u32_e64 s[0:1], s0, v9 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, -1, v6, s[0:1] -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v5 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] -; GFX9-NEXT: v_xor_b32_e32 v0, s2, v0 -; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s2, v0 -; GFX9-NEXT: v_xor_b32_e32 v1, s2, v1 -; GFX9-NEXT: v_mov_b32_e32 v3, s2 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[8:9] +; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: ssdiv_v2i64_mixed_pow2k_denom: ; GFX90A: ; %bb.0: @@ -11835,8 +12059,6 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX90A-NEXT: v_mov_b32_e32 v1, s1 ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = sdiv <2 x i64> %x, store <2 x i64> %r, <2 x i64> addrspace(1)* %out ret void @@ -12229,7 +12451,6 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_subbrev_co_u32_e64 v5, s[0:1], 0, v5, s[0:1] ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v5 ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] -<<<<<<< HEAD ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v6 ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v5 @@ -12342,123 +12563,6 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v7, vcc ; GFX9-NEXT: v_add_u32_e32 v5, v5, v8 ; GFX9-NEXT: v_sub_u32_e32 v7, s7, v5 -======= -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v7, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v7, s[0:1], 2, v0 -; GFX9-NEXT: v_addc_co_u32_e64 v8, s[0:1], 0, v1, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v9, s[0:1], 1, v0 -; GFX9-NEXT: v_addc_co_u32_e64 v10, s[0:1], 0, v1, s[0:1] -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, v8, s[0:1] -; GFX9-NEXT: v_mov_b32_e32 v8, s5 -; GFX9-NEXT: s_xor_b64 s[4:5], s[14:15], s[12:13] -; GFX9-NEXT: s_ashr_i32 s12, s9, 31 -; GFX9-NEXT: s_add_u32 s8, s8, s12 -; GFX9-NEXT: s_mov_b32 s13, s12 -; GFX9-NEXT: s_addc_u32 s9, s9, s12 -; GFX9-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] -; GFX9-NEXT: v_cvt_f32_u32_e32 v10, s8 -; GFX9-NEXT: v_cvt_f32_u32_e32 v11, s9 -; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v8, v2, vcc -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s11, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s10, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s11, v2 -; GFX9-NEXT: v_mac_f32_e32 v10, s16, v11 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc -; GFX9-NEXT: v_rcp_f32_e32 v3, v10 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX9-NEXT: s_sub_u32 s10, 0, s8 -; GFX9-NEXT: v_mul_f32_e32 v3, s17, v3 -; GFX9-NEXT: v_mul_f32_e32 v4, s18, v3 -; GFX9-NEXT: v_trunc_f32_e32 v4, v4 -; GFX9-NEXT: v_mac_f32_e32 v3, s19, v4 -; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v9, v7, s[0:1] -; GFX9-NEXT: s_subb_u32 s11, 0, s9 -; GFX9-NEXT: v_mul_lo_u32 v8, s10, v4 -; GFX9-NEXT: v_mul_hi_u32 v7, s10, v3 -; GFX9-NEXT: v_mul_lo_u32 v9, s11, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX9-NEXT: v_mul_lo_u32 v2, s10, v3 -; GFX9-NEXT: v_add_u32_e32 v7, v7, v8 -; GFX9-NEXT: v_add_u32_e32 v7, v7, v9 -; GFX9-NEXT: v_mul_lo_u32 v8, v3, v7 -; GFX9-NEXT: v_mul_hi_u32 v9, v3, v2 -; GFX9-NEXT: v_mul_hi_u32 v10, v3, v7 -; GFX9-NEXT: v_mul_hi_u32 v11, v4, v7 -; GFX9-NEXT: v_mul_lo_u32 v7, v4, v7 -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v9, v8 -; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v10, vcc -; GFX9-NEXT: v_mul_lo_u32 v10, v4, v2 -; GFX9-NEXT: v_mul_hi_u32 v2, v4, v2 -; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX9-NEXT: v_xor_b32_e32 v1, s5, v1 -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v10 -; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v9, v2, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v11, v6, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 -; GFX9-NEXT: v_add_co_u32_e64 v2, s[0:1], v3, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v5, v8, vcc -; GFX9-NEXT: v_addc_co_u32_e64 v3, vcc, v4, v7, s[0:1] -; GFX9-NEXT: v_mul_lo_u32 v8, s10, v3 -; GFX9-NEXT: v_mul_hi_u32 v9, s10, v2 -; GFX9-NEXT: v_mul_lo_u32 v10, s11, v2 -; GFX9-NEXT: v_mul_lo_u32 v11, s10, v2 -; GFX9-NEXT: s_ashr_i32 s10, s7, 31 -; GFX9-NEXT: v_add_u32_e32 v8, v9, v8 -; GFX9-NEXT: v_add_u32_e32 v8, v8, v10 -; GFX9-NEXT: v_mul_lo_u32 v12, v2, v8 -; GFX9-NEXT: v_mul_hi_u32 v13, v2, v11 -; GFX9-NEXT: v_mul_hi_u32 v14, v2, v8 -; GFX9-NEXT: v_mul_hi_u32 v10, v3, v11 -; GFX9-NEXT: v_mul_lo_u32 v11, v3, v11 -; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v13, v12 -; GFX9-NEXT: v_mul_hi_u32 v9, v3, v8 -; GFX9-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v14, vcc -; GFX9-NEXT: v_mul_lo_u32 v3, v3, v8 -; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, v12, v11 -; GFX9-NEXT: v_addc_co_u32_e32 v10, vcc, v13, v10, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v9, v6, vcc -; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v10, v3 -; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v5, v8, vcc -; GFX9-NEXT: v_add_u32_e32 v4, v4, v7 -; GFX9-NEXT: v_addc_co_u32_e64 v4, vcc, v4, v8, s[0:1] -; GFX9-NEXT: s_add_u32 s0, s6, s10 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 -; GFX9-NEXT: s_mov_b32 s11, s10 -; GFX9-NEXT: s_addc_u32 s1, s7, s10 -; GFX9-NEXT: s_xor_b64 s[6:7], s[0:1], s[10:11] -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, s6, v3 -; GFX9-NEXT: v_mul_hi_u32 v7, s6, v2 -; GFX9-NEXT: v_mul_hi_u32 v9, s6, v3 -; GFX9-NEXT: v_mul_hi_u32 v10, s7, v3 -; GFX9-NEXT: v_mul_lo_u32 v3, s7, v3 -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v7, v4 -; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v9, vcc -; GFX9-NEXT: v_mul_lo_u32 v9, s7, v2 -; GFX9-NEXT: v_mul_hi_u32 v2, s7, v2 -; GFX9-NEXT: v_mov_b32_e32 v8, s5 -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 -; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v7, v2, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v10, v6, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v4, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, s8, v3 -; GFX9-NEXT: v_mul_hi_u32 v5, s8, v2 -; GFX9-NEXT: v_mul_lo_u32 v7, s9, v2 -; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s4, v0 -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v8, vcc -; GFX9-NEXT: v_add_u32_e32 v4, v5, v4 -; GFX9-NEXT: v_mul_lo_u32 v5, s8, v2 -; GFX9-NEXT: v_add_u32_e32 v4, v4, v7 -; GFX9-NEXT: v_sub_u32_e32 v7, s7, v4 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_mov_b32_e32 v8, s9 ; GFX9-NEXT: v_sub_co_u32_e32 v6, vcc, s6, v6 ; GFX9-NEXT: v_subb_co_u32_e64 v7, s[0:1], v7, v8, vcc @@ -12470,14 +12574,7 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v7 ; GFX9-NEXT: v_cndmask_b32_e64 v7, v9, v8, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v8, s[0:1], 2, v2 -; GFX9-NEXT: v_addc_co_u32_e64 v9, s[0:1], 0, v3, s[0:1] -; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v2 -; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1] -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v7 -; GFX9-NEXT: v_cndmask_b32_e64 v7, v11, v9, s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v9, s7 -<<<<<<< HEAD ; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v9, v5, vcc ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s9, v5 ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v7 @@ -12498,29 +12595,9 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_mov_b32_e32 v5, s1 ; GFX9-NEXT: v_subrev_co_u32_e32 v3, vcc, s0, v3 ; GFX9-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v5, vcc -======= -; GFX9-NEXT: v_subb_co_u32_e32 v4, vcc, v9, v4, vcc -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s9, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s9, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v5, vcc -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, v8, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GFX9-NEXT: s_xor_b64 s[0:1], s[10:11], s[12:13] -; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc -; GFX9-NEXT: v_xor_b32_e32 v2, s0, v2 -; GFX9-NEXT: v_xor_b32_e32 v3, s1, v3 -; GFX9-NEXT: v_mov_b32_e32 v4, s1 -; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s0, v2 -; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v4, vcc ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: sdiv_v2i64_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -12777,8 +12854,6 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v5, vcc ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl <2 x i64> , %y %r = sdiv <2 x i64> %x, %shl.y store <2 x i64> %r, <2 x i64> addrspace(1)* %out @@ -12919,17 +12994,10 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_trunc_f32_e32 v1, v1 ; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 -<<<<<<< HEAD ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_lo_u32 v2, v1, s4 ; GFX9-NEXT: v_mul_hi_u32 v3, v0, s4 ; GFX9-NEXT: v_mul_lo_u32 v4, v0, s4 -======= -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: v_mul_hi_u32 v3, v0, s8 -; GFX9-NEXT: v_mul_lo_u32 v2, v1, s8 -; GFX9-NEXT: v_mul_lo_u32 v4, v0, s8 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 ; GFX9-NEXT: v_sub_u32_e32 v2, v2, v0 ; GFX9-NEXT: v_mul_hi_u32 v3, v0, v4 @@ -12945,7 +13013,6 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v6, v4, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v8, v5, vcc ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 -<<<<<<< HEAD ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc @@ -12986,91 +13053,30 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_mul_lo_u32 v4, s3, v0 ; GFX9-NEXT: v_mul_hi_u32 v0, s3, v0 ; GFX9-NEXT: s_mov_b32 s5, 0x12d8fb -======= -; GFX9-NEXT: v_add_co_u32_e64 v0, s[2:3], v0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v7, v4, vcc -; GFX9-NEXT: v_addc_co_u32_e64 v2, vcc, v1, v3, s[2:3] -; GFX9-NEXT: v_mul_lo_u32 v4, v2, s8 -; GFX9-NEXT: v_mul_hi_u32 v6, v0, s8 -; GFX9-NEXT: v_mul_lo_u32 v8, v0, s8 -; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 -; GFX9-NEXT: v_add_u32_e32 v4, v6, v4 -; GFX9-NEXT: v_sub_u32_e32 v4, v4, v0 -; GFX9-NEXT: v_mul_lo_u32 v10, v0, v4 -; GFX9-NEXT: v_mul_hi_u32 v11, v0, v8 -; GFX9-NEXT: v_mul_hi_u32 v12, v0, v4 -; GFX9-NEXT: v_mul_hi_u32 v9, v2, v8 -; GFX9-NEXT: v_mul_lo_u32 v8, v2, v8 -; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v11, v10 -; GFX9-NEXT: v_mul_hi_u32 v6, v2, v4 -; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, v7, v12, vcc -; GFX9-NEXT: v_mul_lo_u32 v2, v2, v4 -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v10, v8 -; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v11, v9, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v6, v5, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v8, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v4, vcc -; GFX9-NEXT: v_addc_co_u32_e64 v1, vcc, v1, v4, s[2:3] -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_ashr_i32 s2, s7, 31 -; GFX9-NEXT: s_add_u32 s0, s6, s2 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: s_mov_b32 s3, s2 -; GFX9-NEXT: s_addc_u32 s1, s7, s2 -; GFX9-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: v_mul_lo_u32 v2, s0, v1 -; GFX9-NEXT: v_mul_hi_u32 v3, s0, v0 -; GFX9-NEXT: v_mul_hi_u32 v4, s0, v1 -; GFX9-NEXT: v_mul_hi_u32 v6, s1, v1 -; GFX9-NEXT: v_mul_lo_u32 v1, s1, v1 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v7, v4, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, s1, v0 -; GFX9-NEXT: v_mul_hi_u32 v0, s1, v0 -; GFX9-NEXT: s_mov_b32 s3, 0x12d8fb ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v3, v0, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v6, v5, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1 -<<<<<<< HEAD ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc ; GFX9-NEXT: v_mul_lo_u32 v1, v1, s5 ; GFX9-NEXT: v_mul_hi_u32 v2, v0, s5 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, s5 -======= -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v7, v2, vcc -; GFX9-NEXT: v_mul_hi_u32 v2, v0, s3 -; GFX9-NEXT: v_mul_lo_u32 v1, v1, s3 -; GFX9-NEXT: v_mul_lo_u32 v0, v0, s3 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 -; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s0, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NEXT: v_mov_b32_e32 v2, s3 +; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s2, v0 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v1, vcc -<<<<<<< HEAD ; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s5, v0 ; GFX9-NEXT: v_subbrev_co_u32_e32 v3, vcc, 0, v1, vcc ; GFX9-NEXT: v_subrev_co_u32_e32 v4, vcc, s5, v2 ; GFX9-NEXT: v_subbrev_co_u32_e32 v6, vcc, 0, v3, vcc ; GFX9-NEXT: s_mov_b32 s2, 0x12d8fa ; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s2, v2 -======= -; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s3, v0 -; GFX9-NEXT: v_subbrev_co_u32_e32 v3, vcc, 0, v1, vcc -; GFX9-NEXT: v_subrev_co_u32_e32 v4, vcc, s3, v2 -; GFX9-NEXT: v_subbrev_co_u32_e32 v6, vcc, 0, v3, vcc -; GFX9-NEXT: s_mov_b32 s0, 0x12d8fa -; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s0, v2 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX9-NEXT: v_cndmask_b32_e32 v7, -1, v7, vcc ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; GFX9-NEXT: v_cmp_lt_u32_e64 s[0:1], s0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc -<<<<<<< HEAD ; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s2, v0 ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 @@ -13082,23 +13088,9 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 ; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s4, v0 -======= -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v1 -; GFX9-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[0:1] -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] -; GFX9-NEXT: v_xor_b32_e32 v0, s2, v0 -; GFX9-NEXT: v_xor_b32_e32 v1, s2, v1 -; GFX9-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s2, v0 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc ; GFX9-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: srem_i64_oddk_denom: ; GFX90A: ; %bb.0: @@ -13210,8 +13202,6 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem i64 %x, 1235195 store i64 %r, i64 addrspace(1)* %out ret void @@ -13259,6 +13249,22 @@ define amdgpu_kernel void @srem_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) ; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm +; +; GFX90A-LABEL: srem_i64_pow2k_denom: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_ashr_i32 s4, s3, 31 +; GFX90A-NEXT: s_lshr_b32 s4, s4, 20 +; GFX90A-NEXT: s_add_u32 s4, s2, s4 +; GFX90A-NEXT: s_addc_u32 s5, s3, 0 +; GFX90A-NEXT: s_and_b32 s4, s4, 0xfffff000 +; GFX90A-NEXT: s_sub_u32 s2, s2, s4 +; GFX90A-NEXT: s_subb_u32 s3, s3, s5 +; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX90A-NEXT: s_endpgm %r = srem i64 %x, 4096 store i64 %r, i64 addrspace(1)* %out ret void @@ -13518,19 +13524,19 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[2:3] ; GFX9-NEXT: v_subbrev_co_u32_e64 v3, s[0:1], 0, v3, s[0:1] ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v7 -; GFX9-NEXT: v_cndmask_b32_e64 v3, v6, v3, s[0:1] -; GFX9-NEXT: v_mov_b32_e32 v6, s7 -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v6, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v4, v5, v4, s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v5, s7 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v5, v1, vcc ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v3, v6, v3, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s9, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v4, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GFX9-NEXT: v_xor_b32_e32 v0, s10, v0 ; GFX9-NEXT: v_xor_b32_e32 v1, s10, v1 ; GFX9-NEXT: v_mov_b32_e32 v3, s10 @@ -13538,7 +13544,6 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: srem_i64_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -13671,8 +13676,6 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl i64 4096, %y %r = srem i64 %x, %shl.y store i64 %r, i64 addrspace(1)* %out @@ -13746,7 +13749,6 @@ define amdgpu_kernel void @srem_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX9-NEXT: v_mov_b32_e32 v3, s5 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: srem_v2i64_pow2k_denom: ; GFX90A: ; %bb.0: @@ -13775,8 +13777,6 @@ define amdgpu_kernel void @srem_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX90A-NEXT: v_mov_b32_e32 v3, s5 ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %r = srem <2 x i64> %x, store <2 x i64> %r, <2 x i64> addrspace(1)* %out ret void @@ -14155,7 +14155,6 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_mul_hi_u32 v3, s12, v1 ; GFX9-NEXT: v_mul_lo_u32 v4, s13, v1 ; GFX9-NEXT: v_mul_lo_u32 v1, s12, v1 -<<<<<<< HEAD ; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 ; GFX9-NEXT: v_add_u32_e32 v2, v2, v4 ; GFX9-NEXT: v_sub_u32_e32 v3, s15, v2 @@ -14219,74 +14218,6 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v9, vcc ; GFX9-NEXT: v_mul_lo_u32 v9, v5, v3 ; GFX9-NEXT: v_mul_hi_u32 v3, v5, v3 -======= -; GFX9-NEXT: v_mul_hi_u32 v2, s12, v0 -; GFX9-NEXT: v_mul_lo_u32 v3, s13, v0 -; GFX9-NEXT: v_mul_lo_u32 v0, s12, v0 -; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 -; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 -; GFX9-NEXT: v_sub_u32_e32 v2, s15, v1 -; GFX9-NEXT: v_mov_b32_e32 v3, s13 -; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s14, v0 -; GFX9-NEXT: v_subb_co_u32_e64 v2, s[0:1], v2, v3, vcc -; GFX9-NEXT: v_subrev_co_u32_e64 v4, s[0:1], s12, v0 -; GFX9-NEXT: v_subbrev_co_u32_e64 v7, s[2:3], 0, v2, s[0:1] -; GFX9-NEXT: v_cmp_le_u32_e64 s[2:3], s13, v7 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[2:3] -; GFX9-NEXT: v_cmp_le_u32_e64 s[2:3], s12, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[2:3] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[2:3], s13, v7 -; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[2:3] -; GFX9-NEXT: s_ashr_i32 s2, s11, 31 -; GFX9-NEXT: v_subb_co_u32_e64 v2, s[0:1], v2, v3, s[0:1] -; GFX9-NEXT: s_add_u32 s10, s10, s2 -; GFX9-NEXT: v_subrev_co_u32_e64 v3, s[0:1], s12, v4 -; GFX9-NEXT: s_mov_b32 s3, s2 -; GFX9-NEXT: s_addc_u32 s11, s11, s2 -; GFX9-NEXT: s_xor_b64 s[10:11], s[10:11], s[2:3] -; GFX9-NEXT: v_subbrev_co_u32_e64 v2, s[0:1], 0, v2, s[0:1] -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v8 -; GFX9-NEXT: v_cvt_f32_u32_e32 v8, s10 -; GFX9-NEXT: v_cvt_f32_u32_e32 v9, s11 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v7, v2, s[0:1] -; GFX9-NEXT: v_mov_b32_e32 v7, s15 -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v7, v1, vcc -; GFX9-NEXT: v_mac_f32_e32 v8, s16, v9 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s13, v1 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s12, v0 -; GFX9-NEXT: v_rcp_f32_e32 v8, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s13, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v10, vcc -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, v3, s[0:1] -; GFX9-NEXT: v_mul_f32_e32 v3, s17, v8 -; GFX9-NEXT: v_mul_f32_e32 v4, s18, v3 -; GFX9-NEXT: v_trunc_f32_e32 v4, v4 -; GFX9-NEXT: v_mac_f32_e32 v3, s19, v4 -; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 -; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GFX9-NEXT: s_sub_u32 s2, 0, s10 -; GFX9-NEXT: s_subb_u32 s3, 0, s11 -; GFX9-NEXT: v_mul_hi_u32 v7, s2, v3 -; GFX9-NEXT: v_mul_lo_u32 v8, s2, v4 -; GFX9-NEXT: v_mul_lo_u32 v9, s3, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX9-NEXT: v_mul_lo_u32 v2, s2, v3 -; GFX9-NEXT: v_add_u32_e32 v7, v7, v8 -; GFX9-NEXT: v_add_u32_e32 v7, v7, v9 -; GFX9-NEXT: v_mul_lo_u32 v8, v3, v7 -; GFX9-NEXT: v_mul_hi_u32 v9, v3, v2 -; GFX9-NEXT: v_mul_hi_u32 v10, v3, v7 -; GFX9-NEXT: v_mul_hi_u32 v11, v4, v7 -; GFX9-NEXT: v_mul_lo_u32 v7, v4, v7 -; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v9, v8 -; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v10, vcc -; GFX9-NEXT: v_mul_lo_u32 v10, v4, v2 -; GFX9-NEXT: v_mul_hi_u32 v2, v4, v2 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: s_ashr_i32 s12, s7, 31 ; GFX9-NEXT: s_mov_b32 s13, s12 ; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v9 @@ -14362,7 +14293,6 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[2:3] ; GFX9-NEXT: v_subbrev_co_u32_e64 v5, s[0:1], 0, v5, s[0:1] ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v9 -<<<<<<< HEAD ; GFX9-NEXT: v_cndmask_b32_e64 v6, v7, v6, s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v7, s7 ; GFX9-NEXT: v_subb_co_u32_e32 v4, vcc, v7, v4, vcc @@ -14376,22 +14306,6 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc -======= -; GFX9-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[0:1] -; GFX9-NEXT: v_mov_b32_e32 v8, s7 -; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v8, v3, vcc -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s10, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s11, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v4, v7, v5, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GFX9-NEXT: v_xor_b32_e32 v2, s12, v2 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: v_xor_b32_e32 v3, s12, v3 ; GFX9-NEXT: v_xor_b32_e32 v4, s12, v4 ; GFX9-NEXT: v_mov_b32_e32 v5, s12 @@ -14400,7 +14314,6 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dwordx4 v0, v[1:4], s[4:5] ; GFX9-NEXT: s_endpgm -<<<<<<< HEAD ; ; GFX90A-LABEL: srem_v2i64_pow2_shl_denom: ; GFX90A: ; %bb.0: @@ -14659,8 +14572,6 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v5, vcc ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[8:9] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %shl.y = shl <2 x i64> , %y %r = srem <2 x i64> %x, %shl.y store <2 x i64> %r, <2 x i64> addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll deleted file mode 100644 index a6ba6a16223f..000000000000 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ /dev/null @@ -1,615 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,CIVI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,GFX9 %s - -; GCN-LABEL: {{^}}use_dispatch_ptr: -; GCN: s_load_dword s{{[0-9]+}}, s[4:5] -define hidden void @use_dispatch_ptr() #1 { - %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 - %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* - %value = load volatile i32, i32 addrspace(4)* %header_ptr - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr: -; GCN: enable_sgpr_dispatch_ptr = 1 -; GCN-NOT: s[4:5] -; GCN-NOT: s4 -; GCN-NOT: s5 -define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 { - call void @use_dispatch_ptr() - ret void -} - -; GCN-LABEL: {{^}}use_queue_ptr: -; GCN: s_load_dword s{{[0-9]+}}, s[4:5] -define hidden void @use_queue_ptr() #1 { - %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 - %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* - %value = load volatile i32, i32 addrspace(4)* %header_ptr - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr: -; GCN: enable_sgpr_queue_ptr = 1 -; GCN-NOT: s[4:5] -; GCN-NOT: s4 -; GCN-NOT: s5 -define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 { - call void @use_queue_ptr() - ret void -} - -; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast: -; CIVI: s_load_dword [[APERTURE_LOAD:s[0-9]+]], s[4:5], 0x10 -; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]] -; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16 -; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]] -; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}} -; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}} -define hidden void @use_queue_ptr_addrspacecast() #1 { - %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32* - store volatile i32 0, i32* %asc - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast: -; CIVI: enable_sgpr_queue_ptr = 1 -; CIVI-NOT: s[4:5] -; CIVI-NOT: s4 -; CIVI-NOT: s5 -define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 { - call void @use_queue_ptr_addrspacecast() - ret void -} - -; Not really supported in callable functions. -; GCN-LABEL: {{^}}use_kernarg_segment_ptr: -; GCN: s_mov_b64 [[PTR:s\[[0-9]+:[0-9]+\]]], 0{{$}} -; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], 0x0{{$}} -define hidden void @use_kernarg_segment_ptr() #1 { - %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 - %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)* - %value = load volatile i32, i32 addrspace(4)* %header_ptr - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr: -; GCN: enable_sgpr_kernarg_segment_ptr = 1 -define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 { - call void @use_kernarg_segment_ptr() - ret void -} - -; GCN-LABEL: {{^}}use_dispatch_id: -; GCN: ; use s[4:5] -define hidden void @use_dispatch_id() #1 { - %id = call i64 @llvm.amdgcn.dispatch.id() - call void asm sideeffect "; use $0", "s"(i64 %id) - ret void -} - -; No kernarg segment so that there is a mov to check. With kernarg -; pointer enabled, it happens to end up in the right place anyway. - -; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id: -; GCN: enable_sgpr_dispatch_id = 1 -; GCN-NOT: s[4:5] -; GCN-NOT: s4 -; GCN-NOT: s5 -define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 { - call void @use_dispatch_id() - ret void -} - -; GCN-LABEL: {{^}}use_workgroup_id_x: -; GCN: s_waitcnt -; GCN: ; use s4 -define hidden void @use_workgroup_id_x() #1 { - %val = call i32 @llvm.amdgcn.workgroup.id.x() - call void asm sideeffect "; use $0", "s"(i32 %val) - ret void -} - -; GCN-LABEL: {{^}}use_stack_workgroup_id_x: -; GCN: s_waitcnt -; GCN-NOT: s32 -; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}} -; GCN: ; use s4 -; GCN: s_setpc_b64 -define hidden void @use_stack_workgroup_id_x() #1 { - %alloca = alloca i32, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %alloca - %val = call i32 @llvm.amdgcn.workgroup.id.x() - call void asm sideeffect "; use $0", "s"(i32 %val) - ret void -} - -; GCN-LABEL: {{^}}use_workgroup_id_y: -; GCN: s_waitcnt -; GCN: ; use s4 -define hidden void @use_workgroup_id_y() #1 { - %val = call i32 @llvm.amdgcn.workgroup.id.y() - call void asm sideeffect "; use $0", "s"(i32 %val) - ret void -} - -; GCN-LABEL: {{^}}use_workgroup_id_z: -; GCN: s_waitcnt -; GCN: ; use s4 -define hidden void @use_workgroup_id_z() #1 { - %val = call i32 @llvm.amdgcn.workgroup.id.z() - call void asm sideeffect "; use $0", "s"(i32 %val) - ret void -} - -; GCN-LABEL: {{^}}use_workgroup_id_xy: -; GCN: ; use s4 -; GCN: ; use s5 -define hidden void @use_workgroup_id_xy() #1 { - %val0 = call i32 @llvm.amdgcn.workgroup.id.x() - %val1 = call i32 @llvm.amdgcn.workgroup.id.y() - call void asm sideeffect "; use $0", "s"(i32 %val0) - call void asm sideeffect "; use $0", "s"(i32 %val1) - ret void -} - -; GCN-LABEL: {{^}}use_workgroup_id_xyz: -; GCN: ; use s4 -; GCN: ; use s5 -; GCN: ; use s6 -define hidden void @use_workgroup_id_xyz() #1 { - %val0 = call i32 @llvm.amdgcn.workgroup.id.x() - %val1 = call i32 @llvm.amdgcn.workgroup.id.y() - %val2 = call i32 @llvm.amdgcn.workgroup.id.z() - call void asm sideeffect "; use $0", "s"(i32 %val0) - call void asm sideeffect "; use $0", "s"(i32 %val1) - call void asm sideeffect "; use $0", "s"(i32 %val2) - ret void -} - -; GCN-LABEL: {{^}}use_workgroup_id_xz: -; GCN: ; use s4 -; GCN: ; use s5 -define hidden void @use_workgroup_id_xz() #1 { - %val0 = call i32 @llvm.amdgcn.workgroup.id.x() - %val1 = call i32 @llvm.amdgcn.workgroup.id.z() - call void asm sideeffect "; use $0", "s"(i32 %val0) - call void asm sideeffect "; use $0", "s"(i32 %val1) - ret void -} - -; GCN-LABEL: {{^}}use_workgroup_id_yz: -; GCN: ; use s4 -; GCN: ; use s5 -define hidden void @use_workgroup_id_yz() #1 { - %val0 = call i32 @llvm.amdgcn.workgroup.id.y() - %val1 = call i32 @llvm.amdgcn.workgroup.id.z() - call void asm sideeffect "; use $0", "s"(i32 %val0) - call void asm sideeffect "; use $0", "s"(i32 %val1) - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 0 -; GCN: enable_sgpr_workgroup_id_z = 0 - -; GCN-NOT: s6 -; GCN: s_mov_b32 s4, s6 -; GCN: s_mov_b32 s32, 0 -; GCN: s_getpc_b64 s[6:7] -; GCN-NEXT: s_add_u32 s6, s6, use_workgroup_id_x@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s7, s7, use_workgroup_id_x@rel32@hi+12 -; GCN: s_swappc_b64 -; GCN-NEXT: s_endpgm -define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 { - call void @use_workgroup_id_x() - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 1 -; GCN: enable_sgpr_workgroup_id_z = 0 - -; GCN: s_mov_b32 s4, s7 -; GCN: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 { - call void @use_workgroup_id_y() - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 0 -; GCN: enable_sgpr_workgroup_id_z = 1 - -; GCN: s_mov_b32 s4, s7 - -; GCN: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 { - call void @use_workgroup_id_z() - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 1 -; GCN: enable_sgpr_workgroup_id_z = 0 - -; GCN: s_mov_b32 s5, s7 -; GCN: s_mov_b32 s4, s6 - -; GCN: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 { - call void @use_workgroup_id_xy() - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 1 -; GCN: enable_sgpr_workgroup_id_z = 1 - -; GCN: s_mov_b32 s5, s7 -; GCN: s_mov_b32 s4, s6 -; GCN: s_mov_b32 s6, s8 - -; GCN: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 { - call void @use_workgroup_id_xyz() - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 0 -; GCN: enable_sgpr_workgroup_id_z = 1 - -; GCN: s_mov_b32 s5, s7 -; GCN: s_mov_b32 s4, s6 - -; GCN: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 { - call void @use_workgroup_id_xz() - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 1 -; GCN: enable_sgpr_workgroup_id_z = 1 - -; GCN: s_mov_b32 s5, s8 -; GCN: s_mov_b32 s4, s7 - -; GCN: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 { - call void @use_workgroup_id_yz() - ret void -} - -; Argument is in right place already -; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x: -; GCN-NOT: s4 -; GCN: v_readlane_b32 s4, v40, 0 -define hidden void @func_indirect_use_workgroup_id_x() #1 { - call void @use_workgroup_id_x() - ret void -} - -; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y: -; GCN-NOT: s4 -; GCN: v_readlane_b32 s4, v40, 0 -define hidden void @func_indirect_use_workgroup_id_y() #1 { - call void @use_workgroup_id_y() - ret void -} - -; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z: -; GCN-NOT: s4 -; GCN: v_readlane_b32 s4, v40, 0 -define hidden void @func_indirect_use_workgroup_id_z() #1 { - call void @use_workgroup_id_z() - ret void -} - -; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x: -; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 -; GCN: ; use s4 -define hidden void @other_arg_use_workgroup_id_x(i32 %arg0) #1 { - %val = call i32 @llvm.amdgcn.workgroup.id.x() - store volatile i32 %arg0, i32 addrspace(1)* undef - call void asm sideeffect "; use $0", "s"(i32 %val) - ret void -} - -; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y: -; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 -; GCN: ; use s4 -define hidden void @other_arg_use_workgroup_id_y(i32 %arg0) #1 { - %val = call i32 @llvm.amdgcn.workgroup.id.y() - store volatile i32 %arg0, i32 addrspace(1)* undef - call void asm sideeffect "; use $0", "s"(i32 %val) - ret void -} - -; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z: -; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 -; GCN: ; use s4 -define hidden void @other_arg_use_workgroup_id_z(i32 %arg0) #1 { - %val = call i32 @llvm.amdgcn.workgroup.id.z() - store volatile i32 %arg0, i32 addrspace(1)* undef - call void asm sideeffect "; use $0", "s"(i32 %val) - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 0 -; GCN: enable_sgpr_workgroup_id_z = 0 - -; GCN-DAG: v_mov_b32_e32 v0, 0x22b -; GCN-DAG: s_mov_b32 s4, s6 - -; GCN-DAG: s_mov_b32 s32, 0 -; GCN-NOT: s4 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 { - call void @other_arg_use_workgroup_id_x(i32 555) - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 1 -; GCN: enable_sgpr_workgroup_id_z = 0 - -; GCN-DAG: v_mov_b32_e32 v0, 0x22b -; GCN-DAG: s_mov_b32 s4, s7 - -; GCN-DAG: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 { - call void @other_arg_use_workgroup_id_y(i32 555) - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 0 -; GCN: enable_sgpr_workgroup_id_z = 1 - -; GCN-DAG: v_mov_b32_e32 v0, 0x22b - -; GCN: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 { - call void @other_arg_use_workgroup_id_z(i32 555) - ret void -} - -; GCN-LABEL: {{^}}use_every_sgpr_input: -; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32{{$}} -; GCN: s_load_dword s{{[0-9]+}}, s[4:5] -; GCN: s_load_dword s{{[0-9]+}}, s[6:7] -; GCN: s_load_dword s{{[0-9]+}}, s[8:9] - -; GCN: ; use s[10:11] -; GCN: ; use s12 -; GCN: ; use s13 -; GCN: ; use s14 -define hidden void @use_every_sgpr_input() #1 { - %alloca = alloca i32, align 4, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %alloca - - %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 - %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* - %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc - - %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 - %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* - %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc - - %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 - %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc - - %val3 = call i64 @llvm.amdgcn.dispatch.id() - call void asm sideeffect "; use $0", "s"(i64 %val3) - - %val4 = call i32 @llvm.amdgcn.workgroup.id.x() - call void asm sideeffect "; use $0", "s"(i32 %val4) - - %val5 = call i32 @llvm.amdgcn.workgroup.id.y() - call void asm sideeffect "; use $0", "s"(i32 %val5) - - %val6 = call i32 @llvm.amdgcn.workgroup.id.z() - call void asm sideeffect "; use $0", "s"(i32 %val6) - - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 1 -; GCN: enable_sgpr_workgroup_id_z = 1 -; GCN: enable_sgpr_workgroup_info = 0 - -; GCN: enable_sgpr_private_segment_buffer = 1 -; GCN: enable_sgpr_dispatch_ptr = 1 -; GCN: enable_sgpr_queue_ptr = 1 -; GCN: enable_sgpr_kernarg_segment_ptr = 1 -; GCN: enable_sgpr_dispatch_id = 1 -; GCN: enable_sgpr_flat_scratch_init = 1 - -; GCN: s_mov_b32 s13, s15 -; GCN: s_mov_b32 s12, s14 -; GCN: s_mov_b32 s14, s16 -; GCN: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_use_every_sgpr_input(i8) #1 { - call void @use_every_sgpr_input() - ret void -} - -; We have to pass the kernarg segment, but there are no kernel -; arguments so null is passed. -; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input_no_kernargs: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 1 -; GCN: enable_sgpr_workgroup_id_z = 1 -; GCN: enable_sgpr_workgroup_info = 0 - -; GCN: enable_sgpr_private_segment_buffer = 1 -; GCN: enable_sgpr_dispatch_ptr = 1 -; GCN: enable_sgpr_queue_ptr = 1 -; GCN: enable_sgpr_kernarg_segment_ptr = 0 -; GCN: enable_sgpr_dispatch_id = 1 -; GCN: enable_sgpr_flat_scratch_init = 1 - -; GCN: s_mov_b64 s[10:11], s[8:9] -; GCN: s_mov_b64 s[8:9], 0{{$}} -; GCN: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_use_every_sgpr_input_no_kernargs() #1 { - call void @use_every_sgpr_input() - ret void -} - -; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input: -; GCN-NOT: s6 -; GCN-NOT: s7 -; GCN-NOT: s8 -; GCN-NOT: s9 -; GCN-NOT: s10 -; GCN-NOT: s11 -; GCN-NOT: s12 -; GCN-NOT: s13 -; GCN-NOT: s[6:7] -; GCN-NOT: s[8:9] -; GCN-NOT: s[10:11] -; GCN-NOT: s[12:13] -; GCN: s_or_saveexec_b64 s[16:17], -1 -define hidden void @func_indirect_use_every_sgpr_input() #1 { - call void @use_every_sgpr_input() - ret void -} - -; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz: -; GCN: s_mov_b32 s4, s12 -; GCN: s_mov_b32 s5, s13 -; GCN: s_mov_b32 s6, s14 -; GCN: ; use s[10:11] -; GCN: ; use s12 -; GCN: ; use s13 -; GCN: ; use s14 - -; GCN: s_swappc_b64 -define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 { - %alloca = alloca i32, align 4, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %alloca - - %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 - %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* - %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc - - %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 - %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* - %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc - - %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 - %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc - - %val3 = call i64 @llvm.amdgcn.dispatch.id() - call void asm sideeffect "; use $0", "s"(i64 %val3) - - %val4 = call i32 @llvm.amdgcn.workgroup.id.x() - call void asm sideeffect "; use $0", "s"(i32 %val4) - - %val5 = call i32 @llvm.amdgcn.workgroup.id.y() - call void asm sideeffect "; use $0", "s"(i32 %val5) - - %val6 = call i32 @llvm.amdgcn.workgroup.id.z() - call void asm sideeffect "; use $0", "s"(i32 %val6) - - call void @use_workgroup_id_xyz() - ret void -} - -; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill: -; GCN-DAG: s_mov_b32 s33, s32 -; GCN-DAG: s_addk_i32 s32, 0x400 -; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[4:5] -; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[6:7] - -; GCN: s_mov_b32 s4, s12 -; GCN: s_mov_b32 s5, s13 -; GCN: s_mov_b32 s6, s14 - -; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-68-9][0-9]*]], s14 -; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-57-9][0-9]*]], s13 -; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s12 -; GCN: s_mov_b64 s{{\[}}[[LO_Z:[0-9]+]]{{\:}}[[HI_Z:[0-9]+]]{{\]}}, s[8:9] - -; GCN: s_swappc_b64 - -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33{{$}} -; GCN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO_X]]:[[HI_X]]{{\]}}, 0x0 -; GCN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO_Y]]:[[HI_Y]]{{\]}}, 0x0 -; GCN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO_Z]]:[[HI_Z]]{{\]}}, 0x0 -; GCN: ; use -; GCN: ; use [[SAVE_X]] -; GCN: ; use [[SAVE_Y]] -; GCN: ; use [[SAVE_Z]] -define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 { - %alloca = alloca i32, align 4, addrspace(5) - call void @use_workgroup_id_xyz() - - store volatile i32 0, i32 addrspace(5)* %alloca - - %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 - %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* - %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc - - %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 - %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* - %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc - - %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 - %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc - - %val3 = call i64 @llvm.amdgcn.dispatch.id() - call void asm sideeffect "; use $0", "s"(i64 %val3) - - %val4 = call i32 @llvm.amdgcn.workgroup.id.x() - call void asm sideeffect "; use $0", "s"(i32 %val4) - - %val5 = call i32 @llvm.amdgcn.workgroup.id.y() - call void asm sideeffect "; use $0", "s"(i32 %val5) - - %val6 = call i32 @llvm.amdgcn.workgroup.id.z() - call void asm sideeffect "; use $0", "s"(i32 %val6) - - ret void -} - -declare i32 @llvm.amdgcn.workgroup.id.x() #0 -declare i32 @llvm.amdgcn.workgroup.id.y() #0 -declare i32 @llvm.amdgcn.workgroup.id.z() #0 -declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 -declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 -declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 -declare i64 @llvm.amdgcn.dispatch.id() #0 -declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 - -attributes #0 = { nounwind readnone speculatable } -attributes #1 = { nounwind noinline } diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll index d10d0dd74741..0be2e867e3cf 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll @@ -159,7 +159,7 @@ define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v4i32(<4 x i } ; GCN-LABEL: {{^}}sdiv_constant_sel_constants_i64: -; GCN: s_cselect_b32 s{{[0-9]+}}, 0, 5 +; GCN: s_cselect_b64 s[{{[0-9]+}}:{{[0-9]+}}], 0, 5 define amdgpu_kernel void @sdiv_constant_sel_constants_i64(i64 addrspace(1)* %p, i1 %cond) { %sel = select i1 %cond, i64 121, i64 23 %bo = sdiv i64 120, %sel @@ -177,7 +177,7 @@ define amdgpu_kernel void @sdiv_constant_sel_constants_i32(i32 addrspace(1)* %p, } ; GCN-LABEL: {{^}}udiv_constant_sel_constants_i64: -; GCN: s_cselect_b32 s{{[0-9]+}}, 0, 5 +; GCN: s_cselect_b64 s[{{[0-9]+}}:{{[0-9]+}}], 0, 5 define amdgpu_kernel void @udiv_constant_sel_constants_i64(i64 addrspace(1)* %p, i1 %cond) { %sel = select i1 %cond, i64 -4, i64 23 %bo = udiv i64 120, %sel @@ -186,7 +186,7 @@ define amdgpu_kernel void @udiv_constant_sel_constants_i64(i64 addrspace(1)* %p, } ; GCN-LABEL: {{^}}srem_constant_sel_constants: -; GCN: s_cselect_b32 s{{[0-9]+}}, 33, 3 +; GCN: s_cselect_b64 s[{{[0-9]+}}:{{[0-9]+}}], 33, 3 define amdgpu_kernel void @srem_constant_sel_constants(i64 addrspace(1)* %p, i1 %cond) { %sel = select i1 %cond, i64 34, i64 15 %bo = srem i64 33, %sel @@ -195,7 +195,7 @@ define amdgpu_kernel void @srem_constant_sel_constants(i64 addrspace(1)* %p, i1 } ; GCN-LABEL: {{^}}urem_constant_sel_constants: -; GCN: s_cselect_b32 s{{[0-9]+}}, 33, 3 +; GCN: s_cselect_b64 s[{{[0-9]+}}:{{[0-9]+}}], 33, 3 define amdgpu_kernel void @urem_constant_sel_constants(i64 addrspace(1)* %p, i1 %cond) { %sel = select i1 %cond, i64 34, i64 15 %bo = urem i64 33, %sel diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll index ad255818c9fe..b66ab4e577aa 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll @@ -38,16 +38,23 @@ entry: ; GCN-LABEL: {{^}}double4_extelt: ; GCN-NOT: buffer_ +; GCN-DAG: s_mov_b32 s[[L0LO:[0-9]+]], 0x47ae147b +; GCN-DAG: s_mov_b32 s[[L0HI:[0-9]+]], 0x3f847ae1 +; GCN-DAG: s_mov_b32 s[[L1LO:[0-9]+]], 0xc28f5c29 +; GCN-DAG: s_mov_b32 s[[L1HI:[0-9]+]], 0x3ff028f5 ; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 -; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 -; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 -; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3 -; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C2]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C3]] -; GCN: store_dwordx2 v[{{[0-9:]+}}] +; GCN: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s{{\[}}[[L1LO]]:[[L1HI]]{{\]}}, s{{\[}}[[L0LO]]:[[L0HI]]{{\]}} +; GCN-DAG: s_mov_b32 s[[L2LO:[0-9]+]], 0xe147ae14 +; GCN-DAG: s_mov_b32 s[[L2HI:[0-9]+]], 0x4000147a +; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 +; GCN: s_cselect_b64 s{{\[}}[[T1LO:[0-9]+]]:[[T1HI:[0-9]+]]{{\]}}, s{{\[}}[[T0LO]]:[[T0HI]]{{\]}}, s{{\[}}[[L2LO]]:[[L2HI]]{{\]}} +; GCN-DAG: s_mov_b32 s[[L3LO:[0-9]+]], 0x70a3d70a +; GCN-DAG: s_mov_b32 s[[L3HI:[0-9]+]], 0x40100a3d +; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 +; GCN: s_cselect_b64 s{{\[}}[[T2LO:[0-9]+]]:[[T2HI:[0-9]+]]{{\]}}, s{{\[}}[[T1LO]]:[[T1HI]]{{\]}}, s{{\[}}[[L3LO]]:[[L3HI]]{{\]}} +; GCN-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T2LO]] +; GCN-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T2HI]] +; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} define amdgpu_kernel void @double4_extelt(double addrspace(1)* %out, i32 %sel) { entry: %ext = extractelement <4 x double> , i32 %sel @@ -57,19 +64,27 @@ entry: ; GCN-LABEL: {{^}}double5_extelt: ; GCN-NOT: buffer_ +; GCN-DAG: s_mov_b32 s[[L0LO:[0-9]+]], 0x47ae147b +; GCN-DAG: s_mov_b32 s[[L0HI:[0-9]+]], 0x3f847ae1 +; GCN-DAG: s_mov_b32 s[[L1LO:[0-9]+]], 0xc28f5c29 +; GCN-DAG: s_mov_b32 s[[L1HI:[0-9]+]], 0x3ff028f5 ; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 -; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 -; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 -; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3 -; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 -; GCN-DAG: s_cmp_eq_u32 [[IDX]], 4 -; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C2]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C3]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C4]] -; GCN: store_dwordx2 v[{{[0-9:]+}}] +; GCN: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s{{\[}}[[L1LO]]:[[L1HI]]{{\]}}, s{{\[}}[[L0LO]]:[[L0HI]]{{\]}} +; GCN-DAG: s_mov_b32 s[[L2LO:[0-9]+]], 0xe147ae14 +; GCN-DAG: s_mov_b32 s[[L2HI:[0-9]+]], 0x4000147a +; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 +; GCN: s_cselect_b64 s{{\[}}[[T1LO:[0-9]+]]:[[T1HI:[0-9]+]]{{\]}}, s{{\[}}[[T0LO]]:[[T0HI]]{{\]}}, s{{\[}}[[L2LO]]:[[L2HI]]{{\]}} +; GCN-DAG: s_mov_b32 s[[L3LO:[0-9]+]], 0x70a3d70a +; GCN-DAG: s_mov_b32 s[[L3HI:[0-9]+]], 0x40100a3d +; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 +; GCN: s_cselect_b64 s{{\[}}[[T2LO:[0-9]+]]:[[T2HI:[0-9]+]]{{\]}}, s{{\[}}[[T1LO]]:[[T1HI]]{{\]}}, s{{\[}}[[L3LO]]:[[L3HI]]{{\]}} +; Double literals 5.01 and 4.01 share the same low 32 bits. +; GCN-DAG: s_mov_b32 s[[L4HI:[0-9]+]], 0x40140a3d +; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4 +; GCN: s_cselect_b64 s{{\[}}[[T3LO:[0-9]+]]:[[T3HI:[0-9]+]]{{\]}}, s{{\[}}[[T2LO]]:[[T2HI]]{{\]}}, s{{\[}}[[L3LO]]:[[L4HI]]{{\]}} +; GCN-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T3LO]] +; GCN-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T3HI]] +; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} define amdgpu_kernel void @double5_extelt(double addrspace(1)* %out, i32 %sel) { entry: %ext = extractelement <5 x double> , i32 %sel @@ -107,11 +122,15 @@ entry: ; GCN-LABEL: {{^}}double2_extelt: ; GCN-NOT: buffer_ +; GCN-DAG: s_mov_b32 s[[L0LO:[0-9]+]], 0x47ae147b +; GCN-DAG: s_mov_b32 s[[L0HI:[0-9]+]], 0x3f847ae1 +; GCN-DAG: s_mov_b32 s[[L1LO:[0-9]+]], 0xc28f5c29 +; GCN-DAG: s_mov_b32 s[[L1HI:[0-9]+]], 0x3ff028f5 ; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] -; GCN: store_dwordx2 v[{{[0-9:]+}}] +; GCN: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s{{\[}}[[L1LO]]:[[L1HI]]{{\]}}, s{{\[}}[[L0LO]]:[[L0HI]]{{\]}} +; GCN-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T0LO]] +; GCN-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T0HI]] +; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} define amdgpu_kernel void @double2_extelt(double addrspace(1)* %out, i32 %sel) { entry: %ext = extractelement <2 x double> , i32 %sel diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll index 094ae27b5c57..35b2d4d8306d 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s ; GCN-LABEL: {{^}}extract_vector_elt_v3f64_2: ; GCN: buffer_load_dwordx4 @@ -14,15 +14,22 @@ define amdgpu_kernel void @extract_vector_elt_v3f64_2(double addrspace(1)* %out, ; GCN-LABEL: {{^}}dyn_extract_vector_elt_v3f64: ; GCN-NOT: buffer_load -; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 -; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 -; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] -; GCN: store_dwordx2 v[{{[0-9:]+}}] +; SI-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; SI-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; SI-DAG: s_cmp_eq_u32 [[IDX]], 2 +; SI-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; SI: store_dwordx2 v[{{[0-9:]+}}] +; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; VI: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] +; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 2 +; VI: s_cselect_b64 s{{\[}}[[T1LO:[0-9]+]]:[[T1HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s{{\[}}[[T0LO]]:[[T0HI]]{{\]}} +; VI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T1LO]] +; VI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T1HI]] +; VI: store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} define amdgpu_kernel void @dyn_extract_vector_elt_v3f64(double addrspace(1)* %out, <3 x double> %foo, i32 %elt) #0 { %dynelt = extractelement <3 x double> %foo, i32 %elt store volatile double %dynelt, double addrspace(1)* %out @@ -31,19 +38,28 @@ define amdgpu_kernel void @dyn_extract_vector_elt_v3f64(double addrspace(1)* %ou ; GCN-LABEL: {{^}}dyn_extract_vector_elt_v4f64: ; GCN-NOT: buffer_load -; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 -; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 -; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 -; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3 -; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] -; GCN: store_dwordx2 v[{{[0-9:]+}}] +; SI-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; SI-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; SI-DAG: s_cmp_eq_u32 [[IDX]], 2 +; SI-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 +; SI-DAG: s_cmp_eq_u32 [[IDX]], 3 +; SI-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] +; SI: store_dwordx2 v[{{[0-9:]+}}] +; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; VI: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] +; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 2 +; VI: s_cselect_b64 s{{\[}}[[T1LO:[0-9]+]]:[[T1HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s{{\[}}[[T0LO]]:[[T0HI]]{{\]}} +; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 3 +; VI: s_cselect_b64 s{{\[}}[[T2LO:[0-9]+]]:[[T2HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s{{\[}}[[T1LO]]:[[T1HI]]{{\]}} +; VI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T2LO]] +; VI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T2HI]] +; VI: store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} define amdgpu_kernel void @dyn_extract_vector_elt_v4f64(double addrspace(1)* %out, <4 x double> %foo, i32 %elt) #0 { %dynelt = extractelement <4 x double> %foo, i32 %elt store volatile double %dynelt, double addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll index 248f5fc985ee..985490592487 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s ; How the replacement of i64 stores with v2i32 stores resulted in ; breaking other users of the bitcast if they already existed @@ -32,10 +32,14 @@ define amdgpu_kernel void @extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x ; GCN-LABEL: {{^}}dyn_extract_vector_elt_v2i64: ; GCN-NOT: buffer_load ; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; GCN: store_dwordx2 v[{{[0-9:]+}}] +; SI-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; SI: store_dwordx2 v[{{[0-9:]+}}] +; VI: s_cselect_b64 s{{\[}}[[S_LO:[0-9]+]]:[[S_HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] +; VI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[S_LO]] +; VI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[S_HI]] +; VI: store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} define amdgpu_kernel void @dyn_extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x i64> %foo, i32 %elt) #0 { %dynelt = extractelement <2 x i64> %foo, i32 %elt store volatile i64 %dynelt, i64 addrspace(1)* %out @@ -59,16 +63,23 @@ define amdgpu_kernel void @dyn_extract_vector_elt_v2i64_2(i64 addrspace(1)* %out } ; GCN-LABEL: {{^}}dyn_extract_vector_elt_v3i64: -; GCN-NOT: buffer_load -; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 -; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 -; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] -; GCN: store_dwordx2 v[{{[0-9:]+}}] +; SI-NOT: buffer_load +; SI-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; SI-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; SI-DAG: s_cmp_eq_u32 [[IDX]], 2 +; SI-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; SI: store_dwordx2 v[{{[0-9:]+}}] +; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; VI: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] +; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 2 +; VI: s_cselect_b64 s{{\[}}[[T1LO:[0-9]+]]:[[T1HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s{{\[}}[[T0LO]]:[[T0HI]]{{\]}} +; VI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T1LO]] +; VI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T1HI]] +; VI: store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} define amdgpu_kernel void @dyn_extract_vector_elt_v3i64(i64 addrspace(1)* %out, <3 x i64> %foo, i32 %elt) #0 { %dynelt = extractelement <3 x i64> %foo, i32 %elt store volatile i64 %dynelt, i64 addrspace(1)* %out @@ -77,19 +88,28 @@ define amdgpu_kernel void @dyn_extract_vector_elt_v3i64(i64 addrspace(1)* %out, ; GCN-LABEL: {{^}}dyn_extract_vector_elt_v4i64: ; GCN-NOT: buffer_load -; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 -; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 -; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 -; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3 -; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] -; GCN: store_dwordx2 v[{{[0-9:]+}}] +; SI-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; SI-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; SI-DAG: s_cmp_eq_u32 [[IDX]], 2 +; SI-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 +; SI-DAG: s_cmp_eq_u32 [[IDX]], 3 +; SI-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] +; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] +; SI: store_dwordx2 v[{{[0-9:]+}}] +; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; VI: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] +; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 2 +; VI: s_cselect_b64 s{{\[}}[[T1LO:[0-9]+]]:[[T1HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s{{\[}}[[T0LO]]:[[T0HI]]{{\]}} +; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 3 +; VI: s_cselect_b64 s{{\[}}[[T2LO:[0-9]+]]:[[T2HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s{{\[}}[[T1LO]]:[[T1HI]]{{\]}} +; VI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T2LO]] +; VI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T2HI]] +; VI: store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} define amdgpu_kernel void @dyn_extract_vector_elt_v4i64(i64 addrspace(1)* %out, <4 x i64> %foo, i32 %elt) #0 { %dynelt = extractelement <4 x i64> %foo, i32 %elt store volatile i64 %dynelt, i64 addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll index 80d0b3499c70..86fd814d95fa 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll @@ -5,7 +5,6 @@ declare void @extern_func() #0 define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() { -<<<<<<< HEAD ; FLAT_SCR_OPT-LABEL: stack_object_addrspacecast_in_kernel_no_calls: ; FLAT_SCR_OPT: ; %bb.0: ; FLAT_SCR_OPT-NEXT: s_add_u32 s0, s0, s3 @@ -35,24 +34,6 @@ define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() { ; FLAT_SCR_ARCH-NEXT: flat_store_dword v[0:1], v2 ; FLAT_SCR_ARCH-NEXT: s_waitcnt_vscnt null, 0x0 ; FLAT_SCR_ARCH-NEXT: s_endpgm -======= -; GCN-LABEL: stack_object_addrspacecast_in_kernel_no_calls: -; GCN: ; %bb.0: -; GCN-NEXT: s_add_u32 s0, s0, s3 -; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GCN-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 -; GCN-NEXT: v_mov_b32_e32 v0, 4 -; GCN-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16) -; GCN-NEXT: v_mov_b32_e32 v2, 0 -; GCN-NEXT: s_lshl_b32 s0, s0, 16 -; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, -1, v0 -; GCN-NEXT: v_cndmask_b32_e64 v1, 0, s0, vcc_lo -; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo -; GCN-NEXT: flat_store_dword v[0:1], v2 -; GCN-NEXT: s_waitcnt_vscnt null, 0x0 -; GCN-NEXT: s_endpgm ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %alloca = alloca i32, addrspace(5) %cast = addrspacecast i32 addrspace(5)* %alloca to i32* store volatile i32 0, i32* %cast diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll index 3a4923693470..2ea25fe7a31c 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll @@ -285,16 +285,18 @@ entry: } ; GCN-LABEL: {{^}}double2_inselt: +; GCN: s_load_dwordx4 s{{\[}}[[FIRST:[0-9]+]]:[[LAST:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}] ; GCN-NOT: v_movrel ; GCN-NOT: buffer_ -; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 -; GCN-DAG: s_cselect_b64 [[CC1:[^,]+]], -1, 0 -; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC1]] -; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 0, [[CC1]] -; GCN-DAG: s_cmp_eq_u32 [[IDX]], 0 -; GCN-DAG: s_cselect_b64 [[CC2:[^,]+]], -1, 0 -; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC2]] -; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 0, [[CC2]] +; GCN: s_cmp_lg_u32 [[IDX:s[0-9]+]], 1 +; GCN: s_cselect_b64 s{{\[}}[[P0_LO:[0-9]+]]:[[P0_HI:[0-9]+]]{{\]}}, s{{\[}}{{[0-9]+}}:[[LAST]]{{\]}}, 1.0 +; GCN: s_cmp_lg_u32 [[IDX]], 0 +; GCN: s_cselect_b64 s{{\[}}[[P1_LO:[0-9]+]]:[[P1_HI:[0-9]+]]{{\]}}, s{{\[}}[[FIRST]]:{{[0-9]+}}{{\]}}, 1.0 +; GCN: v_mov_b32_e32 v[[V_FIRST:[0-9]+]], s[[P1_LO]] +; GCN: v_mov_b32_e32 v[[V_SECOND:[0-9]+]], s[[P1_HI]] +; GCN: v_mov_b32_e32 v[[V_THIRD:[0-9]+]], s[[P0_LO]] +; GCN: v_mov_b32_e32 v[[V_LAST:[0-9]+]], s[[P0_HI]] +; GCN: flat_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[V_FIRST]]:[[V_LAST]]{{\]}} define amdgpu_kernel void @double2_inselt(<2 x double> addrspace(1)* %out, <2 x double> %vec, i32 %sel) { entry: %v = insertelement <2 x double> %vec, double 1.000000e+00, i32 %sel @@ -305,7 +307,7 @@ entry: ; GCN-LABEL: {{^}}double5_inselt: ; GCN-NOT: v_movrel ; GCN-NOT: buffer_ -; GCN-COUNT-10: v_cndmask_b32 +; GCN-COUNT-5: s_cselect_b64 define amdgpu_kernel void @double5_inselt(<5 x double> addrspace(1)* %out, <5 x double> %vec, i32 %sel) { entry: %v = insertelement <5 x double> %vec, double 1.000000e+00, i32 %sel diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll index c269811008c5..bbdff9c4e897 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -1627,7 +1627,6 @@ define amdgpu_kernel void @dynamic_insertelement_v2f64(<2 x double> addrspace(1) ; ; VI-LABEL: dynamic_insertelement_v2f64: ; VI: ; %bb.0: -<<<<<<< HEAD ; VI-NEXT: s_load_dword s10, s[4:5], 0x60 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x30 @@ -1639,27 +1638,11 @@ define amdgpu_kernel void @dynamic_insertelement_v2f64(<2 x double> addrspace(1) ; VI-NEXT: s_cselect_b64 s[6:7], s[6:7], s[8:9] ; VI-NEXT: s_cmp_lg_u32 s10, 0 ; VI-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9] -======= -; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x30 -; VI-NEXT: s_load_dword s4, s[4:5], 0x60 -; VI-NEXT: v_mov_b32_e32 v1, 0x40200000 -; VI-NEXT: s_mov_b32 s3, 0x1100f000 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v0, s11 -; VI-NEXT: s_cmp_eq_u32 s4, 1 -; VI-NEXT: s_cselect_b64 vcc, -1, 0 -; VI-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc -; VI-NEXT: v_mov_b32_e32 v0, s10 -; VI-NEXT: s_cmp_eq_u32 s4, 0 -; VI-NEXT: v_cndmask_b32_e64 v2, v0, 0, vcc -; VI-NEXT: v_mov_b32_e32 v0, s9 -; VI-NEXT: s_cselect_b64 vcc, -1, 0 -; VI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc -; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: v_mov_b32_e32 v3, s7 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm %vecins = insertelement <2 x double> %a, double 8.0, i32 %b @@ -1699,7 +1682,6 @@ define amdgpu_kernel void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* % ; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -<<<<<<< HEAD ; VI-NEXT: s_cmp_lg_u32 s8, 1 ; VI-NEXT: s_cselect_b64 s[2:3], s[2:3], 5 ; VI-NEXT: s_cmp_lg_u32 s8, 0 @@ -1709,21 +1691,6 @@ define amdgpu_kernel void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* % ; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: v_mov_b32_e32 v3, s3 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 -======= -; VI-NEXT: s_cmp_eq_u32 s6, 1 -; VI-NEXT: s_cselect_b64 s[4:5], -1, 0 -; VI-NEXT: v_mov_b32_e32 v0, s11 -; VI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[4:5] -; VI-NEXT: v_mov_b32_e32 v0, s10 -; VI-NEXT: s_cmp_eq_u32 s6, 0 -; VI-NEXT: v_cndmask_b32_e64 v2, v0, 5, s[4:5] -; VI-NEXT: v_mov_b32_e32 v0, s9 -; VI-NEXT: s_cselect_b64 s[4:5], -1, 0 -; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[4:5] -; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: v_cndmask_b32_e64 v0, v0, 5, s[4:5] -; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; VI-NEXT: s_endpgm %vecins = insertelement <2 x i64> %a, i64 5, i32 %b store <2 x i64> %vecins, <2 x i64> addrspace(1)* %out, align 8 @@ -1770,7 +1737,6 @@ define amdgpu_kernel void @dynamic_insertelement_v3i64(<3 x i64> addrspace(1)* % ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x30 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; VI-NEXT: s_waitcnt lgkmcnt(0) -<<<<<<< HEAD ; VI-NEXT: s_cmp_lg_u32 s12, 1 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_cselect_b64 s[6:7], s[10:11], 5 @@ -1785,27 +1751,6 @@ define amdgpu_kernel void @dynamic_insertelement_v3i64(<3 x i64> addrspace(1)* % ; VI-NEXT: v_mov_b32_e32 v1, s9 ; VI-NEXT: v_mov_b32_e32 v2, s6 ; VI-NEXT: v_mov_b32_e32 v3, s7 -======= -; VI-NEXT: v_mov_b32_e32 v0, s11 -; VI-NEXT: s_cmp_eq_u32 s12, 1 -; VI-NEXT: s_cselect_b64 s[4:5], -1, 0 -; VI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[4:5] -; VI-NEXT: v_mov_b32_e32 v0, s10 -; VI-NEXT: s_cmp_eq_u32 s12, 0 -; VI-NEXT: v_cndmask_b32_e64 v2, v0, 5, s[4:5] -; VI-NEXT: s_cselect_b64 s[4:5], -1, 0 -; VI-NEXT: v_mov_b32_e32 v0, s9 -; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[4:5] -; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_cmp_eq_u32 s12, 2 -; VI-NEXT: v_cndmask_b32_e64 v0, v0, 5, s[4:5] -; VI-NEXT: v_mov_b32_e32 v4, s7 -; VI-NEXT: s_cselect_b64 s[4:5], -1, 0 -; VI-NEXT: v_cndmask_b32_e64 v5, v4, 0, s[4:5] -; VI-NEXT: v_mov_b32_e32 v4, s6 -; VI-NEXT: v_cndmask_b32_e64 v4, v4, 5, s[4:5] -; VI-NEXT: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 offset:16 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm %vecins = insertelement <3 x i64> %a, i64 5, i32 %b @@ -1853,46 +1798,32 @@ define amdgpu_kernel void @dynamic_insertelement_v4f64(<4 x double> addrspace(1) ; ; VI-LABEL: dynamic_insertelement_v4f64: ; VI: ; %bb.0: -<<<<<<< HEAD ; VI-NEXT: s_load_dword s16, s[4:5], 0x40 ; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; VI-NEXT: s_mov_b32 s4, 0 ; VI-NEXT: s_mov_b32 s5, 0x40200000 -======= -; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20 -; VI-NEXT: s_load_dword s4, s[4:5], 0x40 -; VI-NEXT: v_mov_b32_e32 v4, 0x40200000 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; VI-NEXT: s_mov_b32 s3, 0x1100f000 -; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v0, s11 -; VI-NEXT: s_cmp_eq_u32 s4, 1 -; VI-NEXT: s_cselect_b64 vcc, -1, 0 -; VI-NEXT: v_cndmask_b32_e32 v3, v0, v4, vcc -; VI-NEXT: v_mov_b32_e32 v0, s10 -; VI-NEXT: s_cmp_eq_u32 s4, 0 -; VI-NEXT: v_cndmask_b32_e64 v2, v0, 0, vcc -; VI-NEXT: s_cselect_b64 vcc, -1, 0 -; VI-NEXT: v_mov_b32_e32 v0, s9 -; VI-NEXT: v_cndmask_b32_e32 v1, v0, v4, vcc +; VI-NEXT: s_cmp_lg_u32 s16, 1 +; VI-NEXT: s_cselect_b64 s[6:7], s[10:11], s[4:5] +; VI-NEXT: s_cmp_lg_u32 s16, 0 +; VI-NEXT: s_cselect_b64 s[8:9], s[8:9], s[4:5] +; VI-NEXT: s_cmp_lg_u32 s16, 3 +; VI-NEXT: s_cselect_b64 s[10:11], s[14:15], s[4:5] +; VI-NEXT: s_cmp_lg_u32 s16, 2 +; VI-NEXT: s_cselect_b64 s[4:5], s[12:13], s[4:5] +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: v_mov_b32_e32 v2, s10 +; VI-NEXT: v_mov_b32_e32 v3, s11 +; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 +; VI-NEXT: s_nop 0 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_cmp_eq_u32 s4, 3 -; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; VI-NEXT: s_cselect_b64 vcc, -1, 0 -; VI-NEXT: v_mov_b32_e32 v5, s15 -; VI-NEXT: v_cndmask_b32_e32 v7, v5, v4, vcc -; VI-NEXT: v_mov_b32_e32 v5, s14 -; VI-NEXT: s_cmp_eq_u32 s4, 2 -; VI-NEXT: v_cndmask_b32_e64 v6, v5, 0, vcc -; VI-NEXT: v_mov_b32_e32 v5, s13 -; VI-NEXT: s_cselect_b64 vcc, -1, 0 -; VI-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc -; VI-NEXT: v_mov_b32_e32 v4, s12 -; VI-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc -; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 +; VI-NEXT: v_mov_b32_e32 v1, s9 +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: v_mov_b32_e32 v3, s7 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm %vecins = insertelement <4 x double> %a, double 8.0, i32 %b diff --git a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll index af2100705794..618236dd8645 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll @@ -345,7 +345,6 @@ define amdgpu_kernel void @smulo_i64_s(i64 %x, i64 %y) { ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -<<<<<<< HEAD ; GFX9-NEXT: s_mul_i32 s9, s0, s3 ; GFX9-NEXT: s_mul_hi_u32 s10, s0, s2 ; GFX9-NEXT: s_mul_hi_u32 s5, s0, s3 @@ -362,48 +361,23 @@ define amdgpu_kernel void @smulo_i64_s(i64 %x, i64 %y) { ; GFX9-NEXT: s_addc_u32 s5, 0, s5 ; GFX9-NEXT: s_sub_u32 s6, s4, s2 ; GFX9-NEXT: s_subb_u32 s7, s5, 0 -======= -; GFX9-NEXT: s_mul_i32 s7, s0, s3 -; GFX9-NEXT: s_mul_hi_u32 s8, s0, s2 -; GFX9-NEXT: s_mul_hi_u32 s6, s0, s3 -; GFX9-NEXT: s_add_u32 s9, s8, s7 -; GFX9-NEXT: s_mul_i32 s5, s1, s2 -; GFX9-NEXT: s_addc_u32 s6, 0, s6 -; GFX9-NEXT: s_add_u32 s9, s9, s5 -; GFX9-NEXT: s_mul_hi_u32 s4, s1, s2 -; GFX9-NEXT: s_mul_hi_i32 s10, s1, s3 -; GFX9-NEXT: s_addc_u32 s4, s6, s4 -; GFX9-NEXT: s_addc_u32 s6, s10, 0 -; GFX9-NEXT: s_mul_i32 s9, s1, s3 -; GFX9-NEXT: s_add_u32 s4, s4, s9 -; GFX9-NEXT: s_addc_u32 s6, 0, s6 -; GFX9-NEXT: s_sub_u32 s9, s4, s2 -; GFX9-NEXT: s_subb_u32 s10, s6, 0 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX9-NEXT: s_cmp_lt_i32 s1, 0 -; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX9-NEXT: v_mov_b32_e32 v0, s6 -; GFX9-NEXT: v_mov_b32_e32 v1, s10 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: v_mov_b32_e32 v2, s9 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc -; GFX9-NEXT: v_subrev_co_u32_e32 v3, vcc, s0, v2 -; GFX9-NEXT: v_subbrev_co_u32_e32 v1, vcc, 0, v0, vcc +; GFX9-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5] +; GFX9-NEXT: s_sub_u32 s6, s4, s0 +; GFX9-NEXT: s_subb_u32 s7, s5, 0 ; GFX9-NEXT: s_cmp_lt_i32 s3, 0 -; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX9-NEXT: s_add_i32 s1, s8, s7 -; GFX9-NEXT: s_add_i32 s1, s1, s5 -; GFX9-NEXT: s_ashr_i32 s4, s1, 31 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX9-NEXT: s_mov_b32 s5, s4 -; GFX9-NEXT: s_mul_i32 s0, s0, s2 -; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, s[4:5], v[0:1] -; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX9-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5] +; GFX9-NEXT: s_add_i32 s1, s10, s9 +; GFX9-NEXT: s_add_i32 s1, s1, s8 +; GFX9-NEXT: s_ashr_i32 s6, s1, 31 +; GFX9-NEXT: s_mov_b32 s7, s6 +; GFX9-NEXT: s_cmp_lg_u64 s[4:5], s[6:7] +; GFX9-NEXT: s_mul_i32 s2, s0, s2 +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[0:1] ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: s_endpgm ; @@ -411,7 +385,6 @@ define amdgpu_kernel void @smulo_i64_s(i64 %x, i64 %y) { ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -<<<<<<< HEAD ; GFX10-NEXT: s_mul_i32 s9, s0, s3 ; GFX10-NEXT: s_mul_hi_u32 s10, s0, s2 ; GFX10-NEXT: s_mul_hi_u32 s5, s0, s3 @@ -428,44 +401,21 @@ define amdgpu_kernel void @smulo_i64_s(i64 %x, i64 %y) { ; GFX10-NEXT: s_addc_u32 s5, 0, s5 ; GFX10-NEXT: s_sub_u32 s6, s4, s2 ; GFX10-NEXT: s_subb_u32 s7, s5, 0 -======= -; GFX10-NEXT: s_mul_i32 s7, s0, s3 -; GFX10-NEXT: s_mul_hi_u32 s8, s0, s2 -; GFX10-NEXT: s_mul_hi_u32 s6, s0, s3 -; GFX10-NEXT: s_add_u32 s11, s8, s7 -; GFX10-NEXT: s_mul_i32 s5, s1, s2 -; GFX10-NEXT: s_addc_u32 s6, 0, s6 -; GFX10-NEXT: s_mul_hi_u32 s4, s1, s2 -; GFX10-NEXT: s_add_u32 s11, s11, s5 -; GFX10-NEXT: s_mul_hi_i32 s9, s1, s3 -; GFX10-NEXT: s_addc_u32 s4, s6, s4 -; GFX10-NEXT: s_mul_i32 s10, s1, s3 -; GFX10-NEXT: s_addc_u32 s6, s9, 0 -; GFX10-NEXT: s_add_u32 s4, s4, s10 -; GFX10-NEXT: s_addc_u32 s6, 0, s6 -; GFX10-NEXT: s_sub_u32 s9, s4, s2 -; GFX10-NEXT: s_subb_u32 s10, s6, 0 -; GFX10-NEXT: v_mov_b32_e32 v1, s9 ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. ; GFX10-NEXT: s_cmp_lt_i32 s1, 0 -; GFX10-NEXT: v_mov_b32_e32 v0, s10 -; GFX10-NEXT: s_cselect_b32 vcc_lo, -1, 0 +; GFX10-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5] +; GFX10-NEXT: s_sub_u32 s6, s4, s0 +; GFX10-NEXT: s_subb_u32 s7, s5, 0 ; GFX10-NEXT: s_cmp_lt_i32 s3, 0 -; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v1, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v0, s6, v0, vcc_lo -; GFX10-NEXT: v_sub_co_u32 v3, vcc_lo, v2, s0 ; GFX10-NEXT: s_mul_i32 s0, s0, s2 -; GFX10-NEXT: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v0, vcc_lo -; GFX10-NEXT: s_cselect_b32 vcc_lo, -1, 0 -; GFX10-NEXT: s_add_i32 s1, s8, s7 -; GFX10-NEXT: s_add_i32 s1, s1, s5 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo -; GFX10-NEXT: s_ashr_i32 s4, s1, 31 -; GFX10-NEXT: s_mov_b32 s5, s4 -; GFX10-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX10-NEXT: v_cndmask_b32_e64 v1, s1, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, s0, 0, vcc_lo +; GFX10-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5] +; GFX10-NEXT: s_add_i32 s1, s10, s9 +; GFX10-NEXT: s_add_i32 s1, s1, s8 +; GFX10-NEXT: s_ashr_i32 s6, s1, 31 +; GFX10-NEXT: s_mov_b32 s7, s6 +; GFX10-NEXT: s_cmp_lg_u64 s[4:5], s[6:7] +; GFX10-NEXT: s_cselect_b32 s2, -1, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, s1, 0, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v0, s0, 0, s2 ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_endpgm bb: diff --git a/llvm/test/CodeGen/AMDGPU/load-select-ptr.ll b/llvm/test/CodeGen/AMDGPU/load-select-ptr.ll index 407a4e5f1b76..3634cedfb0bb 100644 --- a/llvm/test/CodeGen/AMDGPU/load-select-ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/load-select-ptr.ll @@ -9,8 +9,7 @@ ; GCN: s_load_dwordx2 ; GCN: s_cmp_eq_u32 -; GCN: v_cndmask_b32 -; GCN: v_cndmask_b32 +; GCN: s_cselect_b64 ; GCN-NOT: load_dword ; GCN: flat_load_dwordx2 @@ -35,8 +34,7 @@ define amdgpu_kernel void @select_ptr_crash_i64_flat(i32 %tmp, [8 x i32], i64* % ; GCN: s_load_dwordx2 ; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}} ; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}} -; GCN: v_cndmask_b32 -; GCN: v_cndmask_b32 +; GCN: s_cselect_b64 ; GCN: flat_store_dwordx2 define amdgpu_kernel void @select_ptr_crash_i64_global(i32 %tmp, [8 x i32], i64 addrspace(1)* %ptr0, [8 x i32], i64 addrspace(1)* %ptr1, [8 x i32], i64 addrspace(1)* %ptr2) { %tmp2 = icmp eq i32 %tmp, 0 diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-global-uses.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-global-uses.ll deleted file mode 100644 index 3ea52f9309f6..000000000000 --- a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-global-uses.ll +++ /dev/null @@ -1,55 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s -; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s - -;. -; Kernel LDS lowering. -;. -; @lds.1: is part of @llvm.used list, and also it is used within kernel, hence it is lowered. -; @lds.2: is part of @llvm.compiler.used list, and also it is used within kernel, hence it is lowered. -; @lds.3: is used as initializer to @gptr.3, hence @lds.3 is not lowered, though it is used within kernel. -; @lds.4: is used as initializer to @gptr.4, hence @lds.4 is not lowered, though it is used within kernel, -; irrespective of the uses of @gptr.4 itself ( @gptr.4 is part of llvm.compiler.used list ). -; @lds.5: is part of @llvm.used list, but is not used within kernel, hence it is not lowered. -; @lds.6: is part of @llvm.compiler.used list, but is not used within kernel, hence it is not lowered. -;. - -; CHECK: %llvm.amdgcn.kernel.k0.lds.t = type { i32, i16 } - -; CHECK-NOT: @lds.1 -; CHECK-NOT: @lds.2 -; CHECK: @lds.3 = addrspace(3) global i64 undef, align 8 -; CHECK: @lds.4 = addrspace(3) global float undef, align 4 -; CHECK: @lds.5 = addrspace(3) global i16 undef, align 2 -; CHECK: @lds.6 = addrspace(3) global i32 undef, align 4 -@lds.1 = addrspace(3) global i16 undef, align 2 -@lds.2 = addrspace(3) global i32 undef, align 4 -@lds.3 = addrspace(3) global i64 undef, align 8 -@lds.4 = addrspace(3) global float undef, align 4 -@lds.5 = addrspace(3) global i16 undef, align 2 -@lds.6 = addrspace(3) global i32 undef, align 4 - -; CHECK: @gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8 -; CHECK: @gptr.4 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (float addrspace(3)* @lds.4 to i64 addrspace(3)*) to i64*), align 8 -@gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8 -@gptr.4 = addrspace(1) global i64* addrspacecast (float addrspace(3)* @lds.4 to i64*), align 8 - -; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 4 - -; CHECK: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.5 to i8 addrspace(3)*) to i8*)], section "llvm.metadata" -; CHECK: @llvm.compiler.used = appending global [2 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.6 to i8 addrspace(3)*) to i8*)], section "llvm.metadata" -@llvm.used = appending global [2 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.1 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.5 to i8 addrspace(3)*) to i8*)], section "llvm.metadata" -@llvm.compiler.used = appending global [3 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.2 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.6 to i8 addrspace(3)*) to i8*)], section "llvm.metadata" - -; CHECK-LABEL: @k0() -; CHECK: %ld.lds.1 = load i16, i16 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 4 -; CHECK: %ld.lds.2 = load i32, i32 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 0), align 4 -; CHECK: %ld.lds.3 = load i64, i64 addrspace(3)* @lds.3, align 4 -; CHECK: %ld.lds.4 = load float, float addrspace(3)* @lds.4, align 4 -; CHECK: ret void -define amdgpu_kernel void @k0() { - %ld.lds.1 = load i16, i16 addrspace(3)* @lds.1 - %ld.lds.2 = load i32, i32 addrspace(3)* @lds.2 - %ld.lds.3 = load i64, i64 addrspace(3)* @lds.3 - %ld.lds.4 = load float, float addrspace(3)* @lds.4 - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-alias.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-alias.ll deleted file mode 100644 index 104c87774a72..000000000000 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-alias.ll +++ /dev/null @@ -1,93 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s -; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s - -;. -; @lds.1: is aliased with @alias.to.lds.1, and @alias.to.lds.1 is used within kernel @k0. -; Hence, @lds.1 is lowered. -; @lds.2: is aliased with @alias.to.lds.2, and @alias.to.lds.2 is used within non-kernel @f0, -; Hence, @lds.2 is lowered. -; @lds.3: is used as initializer to global @gptr.3, and @gptr.3 is aliased with @alias.to.gptr.3, -; and @alias.to.gptr.3 is used within kernel @k1. Hence, @lds.3 is lowered. -; @lds.4: is used as initializer to global @gptr.4, and @gptr.4 is aliased with @alias.to.gptr.4, -; and @alias.to.gptr.4 is used within non-kernel @f1. Hence, @lds.4 is lowered. -; @lds.5: is aliased with @alias.to.lds.5, but neither @lds.5 nor @alias.to.lds.5 is used anywhere. -; Hence, @lds.5 is not lowered. -; @lds.6: is used as initializer to global @gptr.6, and @gptr.6 is aliased with @alias.to.gptr.6. -; But none of them are used anywhere. Hence, @lds.6 is not lowered. -;. - -; CHECK: %llvm.amdgcn.module.lds.t = type { [4 x i8], [3 x i8], [1 x i8], [2 x i8] } - -; CHECK-NOT: @lds.1 -; CHECK-NOT: @lds.2 -; CHECK-NOT: @lds.3 -; CHECK-NOT: @lds.4 -; CHECK: @lds.5 = internal unnamed_addr addrspace(3) global [5 x i8] undef, align 8 -; CHECK: @lds.6 = internal unnamed_addr addrspace(3) global [6 x i8] undef, align 8 -@lds.1 = internal unnamed_addr addrspace(3) global [1 x i8] undef, align 1 -@lds.2 = internal unnamed_addr addrspace(3) global [2 x i8] undef, align 2 -@lds.3 = internal unnamed_addr addrspace(3) global [3 x i8] undef, align 4 -@lds.4 = internal unnamed_addr addrspace(3) global [4 x i8] undef, align 4 -@lds.5 = internal unnamed_addr addrspace(3) global [5 x i8] undef, align 8 -@lds.6 = internal unnamed_addr addrspace(3) global [6 x i8] undef, align 8 - -; CHECK: @gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([3 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1) to i64 addrspace(3)*) to i64*), align 8 -; CHECK: @gptr.4 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i64 addrspace(3)*) to i64*), align 8 -; CHECK: @gptr.6 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([6 x i8] addrspace(3)* @lds.6 to i64 addrspace(3)*) to i64*), align 8 -@gptr.3 = addrspace(1) global i64* addrspacecast ([3 x i8] addrspace(3)* @lds.3 to i64*), align 8 -@gptr.4 = addrspace(1) global i64* addrspacecast ([4 x i8] addrspace(3)* @lds.4 to i64*), align 8 -@gptr.6 = addrspace(1) global i64* addrspacecast ([6 x i8] addrspace(3)* @lds.6 to i64*), align 8 - -; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 4 -; CHECK: @llvm.compiler.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0, i32 0) to i8*)], section "llvm.metadata" - -; CHECK: @alias.to.lds.1 = alias [1 x i8], getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2) -; CHECK: @alias.to.lds.2 = alias [2 x i8], getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 3) -; CHECK: @alias.to.gptr.3 = alias i64*, i64* addrspace(1)* @gptr.3 -; CHECK: @alias.to.gptr.4 = alias i64*, i64* addrspace(1)* @gptr.4 -; CHECK: @alias.to.lds.5 = alias [5 x i8], [5 x i8] addrspace(3)* @lds.5 -; CHECK: @alias.to.gptr.6 = alias i64*, i64* addrspace(1)* @gptr.6 -@alias.to.lds.1 = alias [1 x i8], [1 x i8] addrspace(3)* @lds.1 -@alias.to.lds.2 = alias [2 x i8], [2 x i8] addrspace(3)* @lds.2 -@alias.to.gptr.3 = alias i64*, i64* addrspace(1)* @gptr.3 -@alias.to.gptr.4 = alias i64*, i64* addrspace(1)* @gptr.4 -@alias.to.lds.5 = alias [5 x i8], [5 x i8] addrspace(3)* @lds.5 -@alias.to.gptr.6 = alias i64*, i64* addrspace(1)* @gptr.6 - -; CHECK-LABEL: @f1 -; CHECK: %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.4, align 8 -; CHECK: ret void -define void @f1() { - %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.4 - ret void -} - -; CHECK-LABEL: @f0 -; CHECK: %bc = bitcast [2 x i8] addrspace(3)* @alias.to.lds.2 to i8 addrspace(3)* -; CHECK: store i8 1, i8 addrspace(3)* %bc, align 2 -; CHECK: ret void -define void @f0() { - %bc = bitcast [2 x i8] addrspace(3)* @alias.to.lds.2 to i8 addrspace(3)* - store i8 1, i8 addrspace(3)* %bc, align 2 - ret void -} - -; CHECK-LABEL: @k1 -; CHECK-LABEL: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ] -; CHECK-LABEL: %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.3, align 8 -; CHECK-LABEL: ret void -define amdgpu_kernel void @k1() { - %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.3 - ret void -} - -; CHECK-LABEL: @k0 -; CHECK-LABEL: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ] -; CHECK-LABEL: %bc = bitcast [1 x i8] addrspace(3)* @alias.to.lds.1 to i8 addrspace(3)* -; CHECK-LABEL: store i8 1, i8 addrspace(3)* %bc, align 1 -; CHECK-LABEL: ret void -define amdgpu_kernel void @k0() { - %bc = bitcast [1 x i8] addrspace(3)* @alias.to.lds.1 to i8 addrspace(3)* - store i8 1, i8 addrspace(3)* %bc, align 1 - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-uses.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-uses.ll deleted file mode 100644 index 77fcefa7944d..000000000000 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-uses.ll +++ /dev/null @@ -1,88 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s -; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s - -;. -; @lds.1: is part of @llvm.used list, and is no-where used. Hence it is not lowered. -; @lds.2: is part of @llvm.compiler.used list, and is no-where used. Hence it is not lowered. -; @lds.3: is used as initializer to @gptr.3, and is no-where used. @gptr.3 itself is also not -; used anywhere else, hence @lds.3 is not lowered. -; @lds.4: is used as initializer to @gptr.4, and is no-where used. @gptr.4 is part of -; @llvm.compiler.used list, but is no-where else used. hence @lds.4 is not lowered. -; -; @lds.5: is used as initializer to @gptr.5, and is no-where used. @gptr.5 is part of -; @llvm.compiler.used list, but is also used within kernel @k0. Hence @lds.5 is lowered. -; @lds.6: is used as initializer to @gptr.6, and is no-where used. @gptr.6 is part of -; @llvm.compiler.used list, but is also used within non-kernel function @f0. Hence @lds.6 is lowered. -; @lds.7: is used as initializer to @gptr.7, and is no-where used. @gptr.7 is used as initializer to @gptr.8, -; and @gptr.8 is used within non-kernel function @f1. Hence @lds.7 is lowered. -;. - -; CHECK: %llvm.amdgcn.module.lds.t = type { [3 x float], [1 x float], [2 x float] } - -; CHECK: @lds.1 = addrspace(3) global i16 undef, align 2 -; CHECK: @lds.2 = addrspace(3) global i32 undef, align 4 -; CHECK: @lds.3 = addrspace(3) global i64 undef, align 8 -; CHECK: @lds.4 = addrspace(3) global float undef, align 4 -; CHECK-NOT: @lds.5 -; CHECK-NOT: @lds.6 -; CHECK-NOT: @lds.7 -@lds.1 = addrspace(3) global i16 undef, align 2 -@lds.2 = addrspace(3) global i32 undef, align 4 -@lds.3 = addrspace(3) global i64 undef, align 8 -@lds.4 = addrspace(3) global float undef, align 4 -@lds.5 = addrspace(3) global [1 x float] undef, align 4 -@lds.6 = addrspace(3) global [2 x float] undef, align 8 -@lds.7 = addrspace(3) global [3 x float] undef, align 16 - -; CHECK: @gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8 -; CHECK: @gptr.4 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (float addrspace(3)* @lds.4 to i64 addrspace(3)*) to i64*), align 8 -; CHECK: @gptr.5 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([1 x float] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1) to i64 addrspace(3)*) to i64*), align 8 -; CHECK: @gptr.6 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([2 x float] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2) to i64 addrspace(3)*) to i64*), align 8 -; CHECK: @gptr.7 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i64 addrspace(3)*) to i64*), align 8 -; CHECK: @gptr.8 = addrspace(1) global i64** addrspacecast (i64* addrspace(1)* @gptr.7 to i64**), align 8 -@gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8 -@gptr.4 = addrspace(1) global i64* addrspacecast (float addrspace(3)* @lds.4 to i64*), align 8 -@gptr.5 = addrspace(1) global i64* addrspacecast ([1 x float] addrspace(3)* @lds.5 to i64*), align 8 -@gptr.6 = addrspace(1) global i64* addrspacecast ([2 x float] addrspace(3)* @lds.6 to i64*), align 8 -@gptr.7 = addrspace(1) global i64* addrspacecast ([3 x float] addrspace(3)* @lds.7 to i64*), align 8 -@gptr.8 = addrspace(1) global i64** addrspacecast (i64* addrspace(1)* @gptr.7 to i64**), align 8 - -; CHECK: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.1 to i8 addrspace(3)*) to i8*)], section "llvm.metadata" -; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 16 -; CHECK: @llvm.compiler.used = appending global [5 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.2 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i8 addrspace(3)*) to i8*)], section "llvm.metadata" -@llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.1 to i8 addrspace(3)*) to i8*)], section "llvm.metadata" -@llvm.compiler.used = appending global [4 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.2 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i8 addrspace(1)*) to i8*)], section "llvm.metadata" - -; CHECK-LABEL: @f1() -; CHECK: %ld = load i64**, i64** addrspace(1)* @gptr.8, align 8 -; CHECK: ret void -define void @f1() { - %ld = load i64**, i64** addrspace(1)* @gptr.8 - ret void -} - -; CHECK-LABEL: @f0() -; CHECK: %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 -; CHECK: addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4 -; CHECK: ret void -define void @f0() { - %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4 - ret void -} - -; CHECK-LABEL: @k0() -; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ] -; CHECK: %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 -; CHECK: addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4 -; CHECK: ret void -define amdgpu_kernel void @k0() { - %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4 - ret void -} - -; CHECK-LABEL: @k1() -; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ] -; CHECK: ret void -define amdgpu_kernel void @k1() { - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect.ll deleted file mode 100644 index c3fd1c0f9e82..000000000000 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect.ll +++ /dev/null @@ -1,39 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s -; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s - -; CHECK: %llvm.amdgcn.module.lds.t = type { double, float } - -; CHECK: @function_indirect = addrspace(1) global float* addrspacecast (float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1) to float*), align 8 - -; CHECK: @kernel_indirect = addrspace(1) global double* addrspacecast (double addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0) to double*), align 8 - -; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 8 - -@function_target = addrspace(3) global float undef, align 4 -@function_indirect = addrspace(1) global float* addrspacecast (float addrspace(3)* @function_target to float*), align 8 - -@kernel_target = addrspace(3) global double undef, align 8 -@kernel_indirect = addrspace(1) global double* addrspacecast (double addrspace(3)* @kernel_target to double*), align 8 - -; CHECK-LABEL: @function(float %x) -; CHECK: %0 = load float*, float* addrspace(1)* @function_indirect, align 8 -define void @function(float %x) local_unnamed_addr #5 { -entry: - %0 = load float*, float* addrspace(1)* @function_indirect, align 8 - store float %x, float* %0, align 4 - ret void -} - -; CHECK-LABEL: @kernel(double %x) -; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ] -; CHECK: %0 = load double*, double* addrspace(1)* @kernel_indirect, align 8 -define amdgpu_kernel void @kernel(double %x) local_unnamed_addr #5 { -entry: - %0 = load double*, double* addrspace(1)* @kernel_indirect, align 8 - store double %x, double* %0, align 8 - ret void -} - - - - diff --git a/llvm/test/CodeGen/AMDGPU/select64.ll b/llvm/test/CodeGen/AMDGPU/select64.ll index ad4a5a6d1cb4..5a8b83c52370 100644 --- a/llvm/test/CodeGen/AMDGPU/select64.ll +++ b/llvm/test/CodeGen/AMDGPU/select64.ll @@ -1,14 +1,9 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=SI,GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=VI,GCN %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefix=SI %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=VI %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefix=GFX90A %s -; GCN-LABEL: {{^}}select0: -; i64 select should be split into two i32 selects, and we shouldn't need -; to use a shfit to extract the hi dword of the input. -; GCN-NOT: s_lshr_b64 -; GCN: v_cndmask -; GCN: v_cndmask define amdgpu_kernel void @select0(i64 addrspace(1)* %out, i32 %cond, i64 %in) { -<<<<<<< HEAD ; SI-LABEL: select0: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dword s6, s[0:1], 0xb @@ -53,8 +48,6 @@ define amdgpu_kernel void @select0(i64 addrspace(1)* %out, i32 %cond, i64 %in) { ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. entry: %0 = icmp ugt i32 %cond, 5 %1 = select i1 %0, i64 0, i64 %in @@ -62,13 +55,7 @@ entry: ret void } -; GCN-LABEL: {{^}}select_trunc_i64: -; VI: s_cselect_b32 -; VI-NOT: s_cselect_b32 -; SI: v_cndmask_b32 -; SI-NOT: v_cndmask_b32 define amdgpu_kernel void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i64 %in) nounwind { -<<<<<<< HEAD ; SI-LABEL: select_trunc_i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -110,8 +97,6 @@ define amdgpu_kernel void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %cmp = icmp ugt i32 %cond, 5 %sel = select i1 %cmp, i64 0, i64 %in %trunc = trunc i64 %sel to i32 @@ -119,13 +104,7 @@ define amdgpu_kernel void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i ret void } -; GCN-LABEL: {{^}}select_trunc_i64_2: -; VI: s_cselect_b32 -; VI-NOT: s_cselect_b32 -; SI: v_cndmask_b32 -; SI-NOT: v_cndmask_b32 define amdgpu_kernel void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 %a, i64 %b) nounwind { -<<<<<<< HEAD ; SI-LABEL: select_trunc_i64_2: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s8, s[0:1], 0xb @@ -168,8 +147,6 @@ define amdgpu_kernel void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %cmp = icmp ugt i32 %cond, 5 %sel = select i1 %cmp, i64 %a, i64 %b %trunc = trunc i64 %sel to i32 @@ -177,13 +154,7 @@ define amdgpu_kernel void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, ret void } -; GCN-LABEL: {{^}}v_select_trunc_i64_2: -; VI: s_cselect_b32 -; VI-NOT: s_cselect_b32 -; SI: v_cndmask_b32 -; SI-NOT: v_cndmask_b32 define amdgpu_kernel void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { -<<<<<<< HEAD ; SI-LABEL: v_select_trunc_i64_2: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd @@ -235,8 +206,6 @@ define amdgpu_kernel void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %con ; GFX90A-NEXT: v_mov_b32_e32 v1, s0 ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %cmp = icmp ugt i32 %cond, 5 %a = load i64, i64 addrspace(1)* %aptr, align 8 %b = load i64, i64 addrspace(1)* %bptr, align 8 @@ -246,12 +215,7 @@ define amdgpu_kernel void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %con ret void } -; GCN-LABEL: {{^}}v_select_i64_split_imm: -; GCN-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}} -; GCN-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 63, {{v[0-9]+}} -; GCN: s_endpgm define amdgpu_kernel void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { -<<<<<<< HEAD ; SI-LABEL: v_select_i64_split_imm: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd @@ -306,8 +270,6 @@ define amdgpu_kernel void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %c ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX90A-NEXT: s_endpgm -======= ->>>>>>> parent of 640beb38e771... [amdgpu] Enable selection of `s_cselect_b64`. %cmp = icmp ugt i32 %cond, 5 %a = load i64, i64 addrspace(1)* %aptr, align 8 %b = load i64, i64 addrspace(1)* %bptr, align 8 diff --git a/llvm/test/CodeGen/AMDGPU/selectcc.ll b/llvm/test/CodeGen/AMDGPU/selectcc.ll index 54a26a4cf676..48127d493fbc 100644 --- a/llvm/test/CodeGen/AMDGPU/selectcc.ll +++ b/llvm/test/CodeGen/AMDGPU/selectcc.ll @@ -1,6 +1,6 @@ ; RUN: llc -verify-machineinstrs -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI -check-prefix=FUNC %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI -check-prefix=FUNC %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}selectcc_i64: ; EG: XOR_INT @@ -9,9 +9,10 @@ ; EG: CNDE_INT ; EG: CNDE_INT ; SI: v_cmp_eq_u64 +; SI: v_cndmask +; SI: v_cndmask ; VI: s_cmp_eq_u64 -; GCN: v_cndmask -; GCN: v_cndmask +; VI: s_cselect_b64 define amdgpu_kernel void @selectcc_i64(i64 addrspace(1) * %out, i64 %lhs, i64 %rhs, i64 %true, i64 %false) { entry: %0 = icmp eq i64 %lhs, %rhs diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll index 62ae206572b6..651567fe602a 100644 --- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll @@ -16,10 +16,10 @@ define amdgpu_kernel void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 ; GCN-LABEL: {{^}}sint_to_fp_i1_f64: ; VI-DAG: s_cmp_eq_u32 -; VI-DAG: s_cselect_b32 s[[SSEL:[0-9]+]], 0xbff00000, 0 -; VI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} -; VI-DAG: v_mov_b32_e32 v[[SEL:[0-9]+]], s[[SSEL]] -; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[ZERO]]:[[SEL]]{{\]}} +; VI-DAG: s_cselect_b64 s{{\[}}[[S_LO:[0-9]+]]:[[S_HI:[0-9]+]]{{\]}}, -1.0, 0 +; VI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[S_LO]] +; VI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[S_HI]] +; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} ; VI: s_endpgm ; SI-DAG: s_cmp_eq_u32 diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll index 1f26cd39c4b8..d35af1510218 100644 --- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll @@ -76,13 +76,15 @@ define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1) ; GCN-LABEL: {{^}}uint_to_fp_i1_to_f64: ; VI-DAG: s_cmp_eq_u32 -; VI-DAG: s_cselect_b32 s[[SSEL:[0-9]+]], 0x3ff00000, 0 -; VI-DAG: v_mov_b32_e32 v[[SEL:[0-9]+]], s[[SSEL]] +; VI-DAG: s_cselect_b64 s{{\[}}[[S_LO:[0-9]+]]:[[S_HI:[0-9]+]]{{\]}}, 1.0, 0 +; VI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[S_LO]] +; VI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[S_HI]] +; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} ; SI-DAG: s_cmp_eq_u32 ; SI-DAG: s_cselect_b64 vcc, -1, 0 ; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, {{v[0-9]+}}, vcc -; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} -; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[ZERO]]:[[SEL]]{{\]}} +; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} +; SI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[ZERO]]:[[SEL]]{{\]}} ; GCN: s_endpgm define amdgpu_kernel void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) { %cmp = icmp eq i32 %in, 0 diff --git a/llvm/test/CodeGen/X86/peep-test-5.ll b/llvm/test/CodeGen/X86/peep-test-5.ll deleted file mode 100644 index 2530cf6576b8..000000000000 --- a/llvm/test/CodeGen/X86/peep-test-5.ll +++ /dev/null @@ -1,56 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -o - %s -mtriple=x86_64-- | FileCheck %s -; Example of a decref operation with "immortal" objects. -; void decref(long* refcount) { -; long count = *refcount; -; if (count == 1) { free_object() } -; else if (count > 1) { *refcount = count - 1; } -; else { /* immortal */ } -; } -; Resulting assembly should share flags from single CMP instruction for both -; conditions! -define void @decref(i32* %p) { -; CHECK-LABEL: decref: -; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: cmpl $1, %eax -; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: # %bb.1: # %bb_free -; CHECK-NEXT: callq free_object@PLT -; CHECK-NEXT: .LBB0_4: # %end -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB0_2: # %bb2 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: jle .LBB0_4 -; CHECK-NEXT: # %bb.3: # %bb_dec -; CHECK-NEXT: decl %eax -; CHECK-NEXT: movl %eax, (%rdi) -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq - %count = load i32, i32* %p, align 4 - %cmp0 = icmp eq i32 %count, 1 - br i1 %cmp0, label %bb_free, label %bb2 - -bb2: - %cmp1 = icmp sgt i32 %count, 1 - br i1 %cmp1, label %bb_dec, label %end - -bb_dec: - %dec = add nsw i32 %count, -1 - store i32 %dec, i32* %p, align 4 - br label %end - -bb_free: - call void @free_object() - br label %end - -end: - ret void -} - -declare void @free_object() diff --git a/llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_weak_defs_extra.s b/llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_weak_defs_extra.s deleted file mode 100644 index b25bb8a3079b..000000000000 --- a/llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_weak_defs_extra.s +++ /dev/null @@ -1,19 +0,0 @@ -# Supplies a weak def, WeakDef, and a pointer holding its address, -# WeakDefAddrInExtraFile. - - .section __TEXT,__text,regular,pure_instructions - .build_version macos, 10, 14 sdk_version 10, 14 - .section __DATA,__data - .globl WeakDef - .weak_definition WeakDef - .p2align 2 -WeakDef: - .long 2 - - .globl WeakDefAddrInExtraFile - .p2align 3 -WeakDefAddrInExtraFile: - .quad WeakDef - - -.subsections_via_symbols diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_skip_debug_sections.s b/llvm/test/ExecutionEngine/JITLink/X86/MachO_skip_debug_sections.s deleted file mode 100644 index 4d43ade6f3b7..000000000000 --- a/llvm/test/ExecutionEngine/JITLink/X86/MachO_skip_debug_sections.s +++ /dev/null @@ -1,21 +0,0 @@ -# REQUIRES: asserts -# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj -o %t %s -# RUN: llvm-jitlink -debug-only=jitlink -noexec %t 2>&1 | FileCheck %s -# -# Check that debug sections are not emitted, and consequently that we don't -# error out due to buggy past-the-end anonymous relocations in __debug_ranges. -# -# CHECK: __debug_ranges is a debug section: No graph section will be created. - .section __TEXT,__text,regular,pure_instructions - .macosx_version_min 10, 15 - .globl _main - .p2align 4, 0x90 -_main: - retq -Lpast_the_end: - - .section __DWARF,__debug_ranges - .p2align 4 - .quad Lpast_the_end - -.subsections_via_symbols diff --git a/llvm/test/Transforms/LICM/no-hoist-prof.ll b/llvm/test/Transforms/LICM/no-hoist-prof.ll deleted file mode 100644 index 1775ecc21c4d..000000000000 --- a/llvm/test/Transforms/LICM/no-hoist-prof.ll +++ /dev/null @@ -1,88 +0,0 @@ -; RUN: opt -passes='sample-profile,function(loop-mssa(licm))' -aa-pipeline=basic-aa -S -sample-profile-file='%S/Inputs/no-hoist-prof.prof' < %s | FileCheck %s --check-prefix=CHECK-BFI-LICM -; RUN: opt -passes=licm -S < %s | FileCheck %s --check-prefix=CHECK-LICM - -; Original source code: -; -; int bar(int); -; int foo(int iter, int explode) { -; int base = bar(explode); -; for (int i = 0; i != iter; ++i) -; if (i == explode) -; iter = (base * base) + bar(iter); -; return iter; -; } - -; We need debug information in this .ll in order to leverage the pgo file, so: -; .ll generated by running `clang++ -O3 -g -S -emit-llvm`, then: -; - move hoisted mul back into cold section -; - give labels names -; - reindex variables -; - remove metadata calls, attributes, module header -; - remove unnecessary metadata - -; CHECK-LICM: .l.check.preheader:{{.*}} -; CHECK-LICM-NEXT: {{.*}} = mul {{.*}} -; CHECK-LICM-NEXT: br{{.*}} - -; CHECK-BFI-LICM: .l.cold:{{.*}} -; CHECK-BFI-LICM-NEXT: {{.*}} = mul {{.*}} - -define dso_local i32 @_Z3fooii(i32, i32) local_unnamed_addr #0 !dbg !7 { - %3 = tail call i32 @_Z3bari(i32 %1), !dbg !19 - %4 = icmp eq i32 %0, 0, !dbg !22 - br i1 %4, label %.l.ret, label %.l.check.preheader, !dbg !24 - -.l.check.preheader: - br label %.l.check, !dbg !24 - -.l.ret: - %5 = phi i32 [ 0, %2 ], [ %12, %.l.iterate ] - ret i32 %5, !dbg !25 - -.l.check: - %6 = phi i32 [ 0, %.l.check.preheader ], [ %13, %.l.iterate ] - %7 = phi i32 [ %0, %.l.check.preheader ], [ %12, %.l.iterate ] - %8 = icmp eq i32 %6, %1, !dbg !26 - br i1 %8, label %.l.cold, label %.l.iterate, !dbg !28 - -.l.cold: - %9 = mul nsw i32 %3, %3 - %10 = tail call i32 @_Z3bari(i32 %7), !dbg !29 - %11 = add nsw i32 %10, %9, !dbg !30 - br label %.l.iterate, !dbg !31 - -.l.iterate: - %12 = phi i32 [ %11, %.l.cold ], [ %7, %.l.check ] - %13 = add nuw nsw i32 %6, 1, !dbg !32 - %14 = icmp eq i32 %13, %12, !dbg !22 - br i1 %14, label %.l.ret, label %.l.check, !dbg !24, !llvm.loop !33 -} - -attributes #0 = { "use-sample-profile" } - -declare dso_local i32 @_Z3bari(i32) local_unnamed_addr #1 - -!llvm.module.flags = !{!4} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 8.0.20181009 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, nameTableKind: None) -!1 = !DIFile(filename: "foo.cpp", directory: "/tmp/gather_pgo") -!4 = !{i32 2, !"Debug Info Version", i32 3} -!7 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooii", scope: !1, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!8 = !DISubroutineType(types: !9) -!9 = !{!10, !10, !10} -!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!16 = distinct !DILexicalBlock(scope: !7, file: !1, line: 4, column: 3) -!19 = !DILocation(line: 3, column: 14, scope: !7) -!22 = !DILocation(line: 4, column: 21, scope: !23) -!23 = distinct !DILexicalBlock(scope: !16, file: !1, line: 4, column: 3) -!24 = !DILocation(line: 4, column: 3, scope: !16) -!25 = !DILocation(line: 7, column: 3, scope: !7) -!26 = !DILocation(line: 5, column: 11, scope: !27) -!27 = distinct !DILexicalBlock(scope: !23, file: !1, line: 5, column: 9) -!28 = !DILocation(line: 5, column: 9, scope: !23) -!29 = !DILocation(line: 6, column: 30, scope: !27) -!30 = !DILocation(line: 6, column: 28, scope: !27) -!31 = !DILocation(line: 6, column: 7, scope: !27) -!32 = !DILocation(line: 4, column: 30, scope: !23) -!33 = distinct !{!33, !24, !34} -!34 = !DILocation(line: 6, column: 38, scope: !16) diff --git a/llvm/test/Transforms/LICM/sink.ll b/llvm/test/Transforms/LICM/sink.ll deleted file mode 100644 index d82168b147cc..000000000000 --- a/llvm/test/Transforms/LICM/sink.ll +++ /dev/null @@ -1,69 +0,0 @@ -; RUN: opt -S -licm -licm-coldness-threshold=0 < %s | FileCheck %s --check-prefix=CHECK-LICM -; RUN: opt -S -licm < %s | opt -S -loop-sink | FileCheck %s --check-prefix=CHECK-SINK -; RUN: opt -S < %s -passes='require,loop-mssa(licm),loop-sink' \ -; RUN: | FileCheck %s --check-prefix=CHECK-SINK -; RUN: opt -S -licm -licm-coldness-threshold=0 -verify-memoryssa < %s | FileCheck %s --check-prefix=CHECK-LICM -; RUN: opt -S -licm -verify-memoryssa < %s | FileCheck %s --check-prefix=CHECK-BFI-LICM - -; Original source code: -; int g; -; int foo(int p, int x) { -; for (int i = 0; i != x; i++) -; if (__builtin_expect(i == p, 0)) { -; x += g; x *= g; -; } -; return x; -; } -; -; Load of global value g should not be hoisted to preheader. - -@g = global i32 0, align 4 - -define i32 @foo(i32, i32) #0 !prof !2 { - %3 = icmp eq i32 %1, 0 - br i1 %3, label %._crit_edge, label %.lr.ph.preheader - -.lr.ph.preheader: - br label %.lr.ph - -; CHECK-LICM: .lr.ph.preheader: -; CHECK-LICM: load i32, i32* @g -; CHECK-LICM: br label %.lr.ph - -; CHECK-BFI-LICM: .lr.ph.preheader: -; CHECK-BFI-LICM-NOT: load i32, i32* @g -; CHECK-BFI-LICM: br label %.lr.ph - -.lr.ph: - %.03 = phi i32 [ %8, %.combine ], [ 0, %.lr.ph.preheader ] - %.012 = phi i32 [ %.1, %.combine ], [ %1, %.lr.ph.preheader ] - %4 = icmp eq i32 %.03, %0 - br i1 %4, label %.then, label %.combine, !prof !1 - -.then: - %5 = load i32, i32* @g, align 4 - %6 = add nsw i32 %5, %.012 - %7 = mul nsw i32 %6, %5 - br label %.combine - -; CHECK-SINK: .then: -; CHECK-SINK: load i32, i32* @g -; CHECK-SINK: br label %.combine - -.combine: - %.1 = phi i32 [ %7, %.then ], [ %.012, %.lr.ph ] - %8 = add nuw nsw i32 %.03, 1 - %9 = icmp eq i32 %8, %.1 - br i1 %9, label %._crit_edge.loopexit, label %.lr.ph - -._crit_edge.loopexit: - %.1.lcssa = phi i32 [ %.1, %.combine ] - br label %._crit_edge - -._crit_edge: - %.01.lcssa = phi i32 [ 0, %2 ], [ %.1.lcssa, %._crit_edge.loopexit ] - ret i32 %.01.lcssa -} - -!1 = !{!"branch_weights", i32 1, i32 2000} -!2 = !{!"function_entry_count", i64 1} diff --git a/llvm/test/Verifier/dbg-invalid-enum-as-scope.ll b/llvm/test/Verifier/dbg-invalid-enum-as-scope.ll deleted file mode 100644 index 4053d4aede2e..000000000000 --- a/llvm/test/Verifier/dbg-invalid-enum-as-scope.ll +++ /dev/null @@ -1,16 +0,0 @@ -; RUN: llvm-as -disable-output <%s 2>&1 | FileCheck %s -; CHECK: enum type is not a scope; check enum type ODR violation -; CHECK: warning: ignoring invalid debug info - -!llvm.module.flags = !{!0} -!0 = !{i32 2, !"Debug Info Version", i32 3} -!llvm.dbg.cu = !{!1} -!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !13, enums: !3) -!2 = !DIFile(filename: "file.c", directory: "dir") -!3 = !{!4} -!4 = distinct !DICompositeType(tag: DW_TAG_enumeration_type, name: "Stage", file: !2, line: 3, baseType: !10, size: 32, elements: !11, identifier: "_ZTS5Stage") -!6 = !DIDerivedType(tag: DW_TAG_member, name: "Var", scope: !4, file: !2, line: 5, baseType: !10) -!10 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) -!11 = !{!12} -!12 = !DIEnumerator(name: "A1", value: 0, isUnsigned: true) -!13 = !{!6} diff --git a/llvm/test/tools/llvm-nm/lit.local.cfg b/llvm/test/tools/llvm-nm/lit.local.cfg deleted file mode 100644 index c8625f4d9d24..000000000000 --- a/llvm/test/tools/llvm-nm/lit.local.cfg +++ /dev/null @@ -1,2 +0,0 @@ -if not 'X86' in config.root.targets: - config.unsupported = True From 954aaf7c1449e4a3a55345532da98fe3ec8710fa Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 5 Jan 2022 10:24:29 -0800 Subject: [PATCH 713/992] [ELF] Demote all lazy symbols. NFC This complements D111365. D111365 did not demote isUsedInRegularObj lazy symbols just to work around a --symbol-ordering-file diagnostic quirk. The quirk was dropped by 00dd2d15a40b0fe9916bb55a48f264498d8fe910, so we can demote all lazy symbols now, not just the isUsedInRegularObj ones. --- lld/ELF/Driver.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 505602f7da62..e93f83906d52 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1850,8 +1850,7 @@ static void demoteSharedSymbols() { llvm::TimeTraceScope timeScope("Demote shared symbols"); for (Symbol *sym : symtab->symbols()) { auto *s = dyn_cast(sym); - if (!((s && !s->getFile().isNeeded) || - (sym->isLazy() && sym->isUsedInRegularObj))) + if (!(s && !s->getFile().isNeeded) && !sym->isLazy()) continue; bool used = sym->used; From dd48c6aff99233ff46ab08009e26baf3b2ed6f14 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 5 Jan 2022 10:30:30 -0800 Subject: [PATCH 714/992] github: Add action for automated issue notification This adds a github action that will mention a team called issue-subscribers-$LABEL whenever a label is added to a bug. Mentioning the team will automatically subscribe all team members to the bug. Differential Revision: https://reviews.llvm.org/D114412 --- .github/workflows/issue-subscriber.yml | 35 ++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 .github/workflows/issue-subscriber.yml diff --git a/.github/workflows/issue-subscriber.yml b/.github/workflows/issue-subscriber.yml new file mode 100644 index 000000000000..51c55fa362ad --- /dev/null +++ b/.github/workflows/issue-subscriber.yml @@ -0,0 +1,35 @@ +name: Issue Subscriber + +on: + issues: + types: + - labeled + +jobs: + auto-subscribe: + runs-on: ubuntu-latest + if: github.repository == 'llvm/llvm-project' + steps: + - name: Update watchers + uses: actions/github-script@v5 + with: + github-token: ${{ secrets.ISSUE_MENTION_SECRET }} + script: | + const teamname = "issue-subscribers-" + context.payload.label.name.replace(/ /g, "-").replace(":","-").replace("/","-"); + const comment = "@llvm/" + teamname; + try { + // This will throw an exception if the team does not exist and no + // comment will be created. + team = await github.rest.teams.getByName({ + org: context.repo.owner, + team_slug: teamname + }); + github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: comment + }); + } catch (e){ + console.log(e); + } From 58a0e449e175e9ae48632b4bda1df1fc87f81323 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 5 Jan 2022 08:53:33 -0800 Subject: [PATCH 715/992] [instcombine] Allow sinking of calls with known writes to uses If we have a call whose only side effect is a write to a location which is known to be dead, we can sink said call to the users of the call's result value. This is analogous to the recent changes to delete said calls if unused, but framed as a sinking transform instead. Differential Revision: https://reviews.llvm.org/D116200 --- .../InstCombine/InstructionCombining.cpp | 54 +++++++++++++++++-- .../sink_sideeffecting_instruction.ll | 22 ++++---- 2 files changed, 61 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index c66b39fc7927..2d87f26ae39f 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3727,12 +3727,13 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) { /// beginning of DestBlock, which can only happen if it's safe to move the /// instruction past all of the instructions between it and the end of its /// block. -static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { +static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock, + TargetLibraryInfo &TLI) { assert(I->getUniqueUndroppableUser() && "Invariants didn't hold!"); BasicBlock *SrcBlock = I->getParent(); // Cannot move control-flow-involving, volatile loads, vaarg, etc. - if (isa(I) || I->isEHPad() || I->mayHaveSideEffects() || + if (isa(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() || I->isTerminator()) return false; @@ -3752,6 +3753,51 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { if (CI->isConvergent()) return false; } + + // Unless we can prove that the memory write isn't visibile except on the + // path we're sinking to, we must bail. + if (I->mayWriteToMemory()) { + // Check for case where the call writes to an otherwise dead alloca. This + // shows up for unused out-params in idiomatic C/C++ code. + auto *CB = cast(I); + if (!CB) + // TODO: handle e.g. store to alloca here - only worth doing if we extend + // to allow reload along used path as described below. Otherwise, this + // is simply a store to a dead allocation which will be removed. + return false; + Optional Dest = MemoryLocation::getForDest(CB, TLI); + if (!Dest) + return false; + auto *AI = dyn_cast(getUnderlyingObject(Dest->Ptr)); + if (!AI) + // TODO: allow malloc? + return false; + // TODO: allow memory access dominated by move point? Note that since AI + // could have a reference to itself captured by the call, we would need to + // account for cycles in doing so. + SmallVector AllocaUsers; + SmallPtrSet Visited; + auto pushUsers = [&](const Instruction &I) { + for (const User *U : I.users()) { + if (Visited.insert(U).second) + AllocaUsers.push_back(U); + } + }; + pushUsers(*AI); + while (!AllocaUsers.empty()) { + auto *UserI = cast(AllocaUsers.pop_back_val()); + if (isa(UserI) || isa(UserI) || + isa(UserI)) { + pushUsers(*UserI); + continue; + } + if (UserI == CB) + continue; + // TODO: support lifetime.start/end here + return false; + } + } + // We can only sink load instructions if there is nothing between the load and // the end of block that could change the value. if (I->mayReadFromMemory()) { @@ -3760,7 +3806,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { // successor block. if (DestBlock->getUniquePredecessor() != I->getParent()) return false; - for (BasicBlock::iterator Scan = I->getIterator(), + for (BasicBlock::iterator Scan = std::next(I->getIterator()), E = I->getParent()->end(); Scan != E; ++Scan) if (Scan->mayWriteToMemory()) @@ -3936,7 +3982,7 @@ bool InstCombinerImpl::run() { if (OptBB) { auto *UserParent = *OptBB; // Okay, the CFG is simple enough, try to sink this instruction. - if (TryToSinkInstruction(I, UserParent)) { + if (TryToSinkInstruction(I, UserParent, TLI)) { LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); MadeIRChange = true; // We'll add uses of the sunk instruction below, but since diff --git a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll index 49f9e5ec2cb4..8d0259df0392 100644 --- a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll +++ b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll @@ -117,11 +117,11 @@ define i32 @sink_write_to_use(i1 %c) { ; CHECK-LABEL: @sink_write_to_use( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull writeonly [[VAR]]) #[[ATTR1:[0-9]+]] ; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] ; CHECK: early_return: ; CHECK-NEXT: ret i32 0 ; CHECK: use_block: +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull writeonly [[VAR]]) #[[ATTR1:[0-9]+]] ; CHECK-NEXT: ret i32 [[VAR3]] ; entry: @@ -140,11 +140,11 @@ define i32 @sink_readwrite_to_use(i1 %c) { ; CHECK-LABEL: @sink_readwrite_to_use( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1]] ; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] ; CHECK: early_return: ; CHECK-NEXT: ret i32 0 ; CHECK: use_block: +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1]] ; CHECK-NEXT: ret i32 [[VAR3]] ; entry: @@ -163,12 +163,12 @@ define i32 @sink_bitcast(i1 %c) { ; CHECK-LABEL: @sink_bitcast( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAR:%.*]] = alloca i8, align 8 -; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i8* [[VAR]] to i32* -; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[BITCAST]]) #[[ATTR1]] ; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] ; CHECK: early_return: ; CHECK-NEXT: ret i32 0 ; CHECK: use_block: +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i8* [[VAR]] to i32* +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[BITCAST]]) #[[ATTR1]] ; CHECK-NEXT: ret i32 [[VAR3]] ; entry: @@ -189,12 +189,12 @@ define i32 @sink_gep1(i1 %c) { ; CHECK-LABEL: @sink_gep1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAR1:%.*]] = alloca [2 x i32], align 8 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VAR1]], i64 0, i64 1 -; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[GEP]]) #[[ATTR1]] ; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] ; CHECK: early_return: ; CHECK-NEXT: ret i32 0 ; CHECK: use_block: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VAR1]], i64 0, i64 1 +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[GEP]]) #[[ATTR1]] ; CHECK-NEXT: ret i32 [[VAR3]] ; entry: @@ -215,12 +215,12 @@ define i32 @sink_gep2(i1 %c) { ; CHECK-LABEL: @sink_gep2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAR1:%.*]] = alloca [2 x i32], align 8 -; CHECK-NEXT: [[VAR1_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VAR1]], i64 0, i64 0 -; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR1_SUB]]) #[[ATTR1]] ; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] ; CHECK: early_return: ; CHECK-NEXT: ret i32 0 ; CHECK: use_block: +; CHECK-NEXT: [[VAR1_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VAR1]], i64 0, i64 0 +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR1_SUB]]) #[[ATTR1]] ; CHECK-NEXT: ret i32 [[VAR3]] ; entry: @@ -240,12 +240,12 @@ define i32 @sink_addrspacecast(i1 %c) { ; CHECK-LABEL: @sink_addrspacecast( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 8 -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast i32* [[VAR]] to i32 addrspace(2)* -; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown.as2(i32 addrspace(2)* [[CAST]]) #[[ATTR1]] ; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] ; CHECK: early_return: ; CHECK-NEXT: ret i32 0 ; CHECK: use_block: +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast i32* [[VAR]] to i32 addrspace(2)* +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown.as2(i32 addrspace(2)* [[CAST]]) #[[ATTR1]] ; CHECK-NEXT: ret i32 [[VAR3]] ; entry: @@ -394,11 +394,11 @@ define i32 @sink_lifetime3(i1 %c) { ; CHECK-LABEL: @sink_lifetime3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1]] ; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] ; CHECK: early_return: ; CHECK-NEXT: ret i32 0 ; CHECK: use_block: +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1]] ; CHECK-NEXT: ret i32 [[VAR3]] ; entry: From d5b2921faf5123b609e506efb40b7ec031679077 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 17 Dec 2021 10:04:30 -0500 Subject: [PATCH 716/992] [lld/tests] Stop setting the "asserts" and "debug" features The last use of `REQUIRES: debug` was removed in 2013 in 72c5d3d7c in favor of `REQUIRES: asserts`. The last use of `REQUIRES: asserts` was removed in 2015 in 251b0e268 when the old COFF linker was removed. lld's test suite currently has no behavior difference with respect to assertions or debug builds (and hasn't had it for 6 years). Let's keep it that way :) Differential Revision: https://reviews.llvm.org/D115941 --- lld/test/lit.cfg.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lld/test/lit.cfg.py b/lld/test/lit.cfg.py index 236c46187002..22012b5863f9 100644 --- a/lld/test/lit.cfg.py +++ b/lld/test/lit.cfg.py @@ -63,9 +63,7 @@ config.available_features.add('demangler') llvm_config.feature_config( - [('--build-mode', {'DEBUG': 'debug'}), - ('--assertion-mode', {'ON': 'asserts'}), - ('--targets-built', {'AArch64': 'aarch64', + [('--targets-built', {'AArch64': 'aarch64', 'AMDGPU': 'amdgpu', 'ARM': 'arm', 'AVR': 'avr', From 356ada9df419ac44e82b1078ccac819f352cd18d Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 5 Jan 2022 11:00:01 -0800 Subject: [PATCH 717/992] Fix accidental usage of cast<> instead of dyn_cast<> in 58a0e44 --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 2d87f26ae39f..ca73ab913c34 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3759,7 +3759,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock, if (I->mayWriteToMemory()) { // Check for case where the call writes to an otherwise dead alloca. This // shows up for unused out-params in idiomatic C/C++ code. - auto *CB = cast(I); + auto *CB = dyn_cast(I); if (!CB) // TODO: handle e.g. store to alloca here - only worth doing if we extend // to allow reload along used path as described below. Otherwise, this From 34435fd1053783c325ca2f57258d715045531168 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Wed, 5 Jan 2022 18:48:41 +0000 Subject: [PATCH 718/992] [llvm] Add support for DW_TAG_immutable_type Added documentation about DW_TAG_immutable_type too. Reviewed By: probinson Differential Revision: https://reviews.llvm.org/D113633 --- llvm/docs/LangRef.rst | 5 +-- llvm/docs/SourceLevelDebugging.rst | 1 + .../CodeGen/AsmPrinter/DebugHandlerBase.cpp | 6 ++-- llvm/lib/DWARFLinker/DWARFLinker.cpp | 1 + llvm/lib/DebugInfo/DWARF/DWARFContext.cpp | 1 + llvm/lib/IR/Verifier.cpp | 1 + llvm/test/DebugInfo/dwarfdump-immutable.ll | 34 +++++++++++++++++++ 7 files changed, 45 insertions(+), 4 deletions(-) create mode 100644 llvm/test/DebugInfo/dwarfdump-immutable.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 389c90937bb0..d8fd7da7ce77 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -5328,6 +5328,7 @@ The following ``tag:`` values are valid: DW_TAG_volatile_type = 53 DW_TAG_restrict_type = 55 DW_TAG_atomic_type = 71 + DW_TAG_immutable_type = 75 .. _DIDerivedTypeMember: @@ -5344,8 +5345,8 @@ friends. ``DW_TAG_typedef`` is used to provide a name for the ``baseType:``. ``DW_TAG_pointer_type``, ``DW_TAG_reference_type``, ``DW_TAG_const_type``, -``DW_TAG_volatile_type``, ``DW_TAG_restrict_type`` and ``DW_TAG_atomic_type`` -are used to qualify the ``baseType:``. +``DW_TAG_volatile_type``, ``DW_TAG_restrict_type``, ``DW_TAG_atomic_type`` and +``DW_TAG_immutable_type`` are used to qualify the ``baseType:``. Note that the ``void *`` type is expressed as a type derived from NULL. diff --git a/llvm/docs/SourceLevelDebugging.rst b/llvm/docs/SourceLevelDebugging.rst index b3647efe2f14..e4a529d0e242 100644 --- a/llvm/docs/SourceLevelDebugging.rst +++ b/llvm/docs/SourceLevelDebugging.rst @@ -1881,6 +1881,7 @@ tag is one of: * DW_TAG_subrange_type * DW_TAG_base_type * DW_TAG_const_type +* DW_TAG_immutable_type * DW_TAG_file_type * DW_TAG_namelist * DW_TAG_packed_type diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 4df34d2c9402..18fc46c74eb4 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -155,7 +155,8 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) { if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && - Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type) + Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type && + Tag != dwarf::DW_TAG_immutable_type) return DDTy->getSizeInBits(); DIType *BaseType = DDTy->getBaseType(); @@ -210,7 +211,8 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) { return true; assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || T == dwarf::DW_TAG_volatile_type || - T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type); + T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type || + T == dwarf::DW_TAG_immutable_type); assert(DTy->getBaseType() && "Expected valid base type"); return isUnsignedDIType(DTy->getBaseType()); } diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp index 4cc146e086f9..30ff1e7fdaf3 100644 --- a/llvm/lib/DWARFLinker/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp @@ -124,6 +124,7 @@ static bool isTypeTag(uint16_t Tag) { case dwarf::DW_TAG_interface_type: case dwarf::DW_TAG_unspecified_type: case dwarf::DW_TAG_shared_type: + case dwarf::DW_TAG_immutable_type: return true; default: break; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index 95135c95e8d2..34c42025109d 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -1098,6 +1098,7 @@ static Optional getTypeSize(DWARFDie Type, uint64_t PointerSize) { return PointerSize; } case DW_TAG_const_type: + case DW_TAG_immutable_type: case DW_TAG_volatile_type: case DW_TAG_restrict_type: case DW_TAG_typedef: { diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 46da9cfbc6b5..254d7b2fdff4 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1059,6 +1059,7 @@ void Verifier::visitDIDerivedType(const DIDerivedType &N) { N.getTag() == dwarf::DW_TAG_reference_type || N.getTag() == dwarf::DW_TAG_rvalue_reference_type || N.getTag() == dwarf::DW_TAG_const_type || + N.getTag() == dwarf::DW_TAG_immutable_type || N.getTag() == dwarf::DW_TAG_volatile_type || N.getTag() == dwarf::DW_TAG_restrict_type || N.getTag() == dwarf::DW_TAG_atomic_type || diff --git a/llvm/test/DebugInfo/dwarfdump-immutable.ll b/llvm/test/DebugInfo/dwarfdump-immutable.ll new file mode 100644 index 000000000000..e9cd8015e1e0 --- /dev/null +++ b/llvm/test/DebugInfo/dwarfdump-immutable.ll @@ -0,0 +1,34 @@ +;; This test checks whether DWARF tag DW_TAG_immutable_type +;; is accepted and processed. + +; RUN: %llc_dwarf %s -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s + +;; Test whether DW_TAG_immutable_type is accepted. + +; CHECK: DW_TAG_immutable_type + +; ModuleID = 'immutable.d' +source_filename = "immutable.d" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +@_D9immutable1aya = constant i8 97, align 1, !dbg !0 ; [#uses = 0] + +!llvm.module.flags = !{!5} +!llvm.dbg.cu = !{!6} +!llvm.ident = !{!13} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "a", linkageName: "_D9immutable1aya", scope: !2, file: !3, line: 1, type: !4, isLocal: false, isDefinition: true) +!2 = !DIModule(scope: !3, name: "immutable") +!3 = !DIFile(filename: "immutable.d", directory: "/home/luis/Temp") +!4 = !DIDerivedType(tag: DW_TAG_immutable_type, baseType: !14) +!5 = !{i32 2, !"Debug Info Version", i32 3} +!6 = distinct !DICompileUnit(language: DW_LANG_D, file: !3, producer: "LDC 1.28.0 (LLVM 13.0.0)", isOptimized: false, runtimeVersion: 1, emissionKind: FullDebug, enums: !7, globals: !8, imports: !9) +!7 = !{} +!8 = !{!0} +!9 = !{!10} +!10 = !DIImportedEntity(tag: DW_TAG_imported_module, scope: !2, entity: !11, file: !3) +!11 = !DIModule(scope: !12, name: "object") +!12 = !DIFile(filename: "usr/include/dlang/ldc/object.d", directory: "/") +!13 = !{!"ldc version 1.28.0"} +!14 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_UTF) From 1a97138a1c98501d2f0c5e38426dc3544bd49394 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 5 Jan 2022 11:16:03 -0800 Subject: [PATCH 719/992] Add test case from 356ada9 --- .../sink_sideeffecting_instruction.ll | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll index 8d0259df0392..d218d7871cbe 100644 --- a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll +++ b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll @@ -446,6 +446,33 @@ use_block: ret i32 %var3 } +; Mostly checking that trying to sink a non-call doesn't crash (i.e. prior bug) +define i32 @sink_atomicrmw_to_use(i1 %c) { +; CHECK-LABEL: @sink_atomicrmw_to_use( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 0, i32* [[VAR]], align 4 +; CHECK-NEXT: [[VAR3:%.*]] = atomicrmw add i32* [[VAR]], i32 1 seq_cst, align 4 +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i32, align 4 + store i32 0, i32* %var + %var3 = atomicrmw add i32* %var, i32 1 seq_cst, align 4 + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + + declare i32 @bar() declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) From fffd663c871d91c431bb3f0ccda48567f0d9aca5 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 5 Jan 2022 19:34:27 +0000 Subject: [PATCH 720/992] [CodeGen] Initialize MaxBytesForAlignment in TargetLoweringBase::TargetLoweringBase. This appears to be missing from D114590, causing sanitizer errors. --- llvm/lib/CodeGen/TargetLoweringBase.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index f4cfe4f341cb..ab574232e367 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -715,6 +715,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { SchedPreferenceInfo = Sched::ILP; GatherAllAliasesMaxDepth = 18; IsStrictFPEnabled = DisableStrictNodeMutation; + MaxBytesForAlignment = 0; // TODO: the default will be switched to 0 in the next commit, along // with the Target-specific changes necessary. MaxAtomicSizeInBitsSupported = 1024; From 96e36048fddf7f9ce3e59c117fdd4d307f9165c5 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Tue, 7 Dec 2021 11:53:38 -0800 Subject: [PATCH 721/992] [CMake] Move the AIX archiver settings to a module This allows their reuse across projects. The name of the module is intentionally generic because we would like to move more platform checks there. Differential Revision: https://reviews.llvm.org/D115276 --- cmake/Modules/SetPlatformToolchainTools.cmake | 9 +++++++++ compiler-rt/CMakeLists.txt | 10 +--------- compiler-rt/lib/builtins/CMakeLists.txt | 10 +--------- 3 files changed, 11 insertions(+), 18 deletions(-) create mode 100644 cmake/Modules/SetPlatformToolchainTools.cmake diff --git a/cmake/Modules/SetPlatformToolchainTools.cmake b/cmake/Modules/SetPlatformToolchainTools.cmake new file mode 100644 index 000000000000..ab2abe934473 --- /dev/null +++ b/cmake/Modules/SetPlatformToolchainTools.cmake @@ -0,0 +1,9 @@ +get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES) + +if(CMAKE_SYSTEM_NAME MATCHES "AIX") + foreach(lang IN LISTS languages) + set(CMAKE_${lang}_ARCHIVE_CREATE " -X32_64 qc ") + set(CMAKE_${lang}_ARCHIVE_APPEND " -X32_64 q ") + set(CMAKE_${lang}_ARCHIVE_FINISH " -X32_64 ") + endforeach() +endif() diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt index 1e721a046a2f..c5003b5efa1d 100644 --- a/compiler-rt/CMakeLists.txt +++ b/compiler-rt/CMakeLists.txt @@ -28,6 +28,7 @@ else() set(CMAKE_CFG_RESOLVED_INTDIR "") endif() +include(SetPlatformToolchainTools) include(base-config-ix) include(CompilerRTUtils) @@ -574,15 +575,6 @@ if (CMAKE_LINKER MATCHES "link.exe$") set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /IGNORE:4221") endif() -if(${CMAKE_SYSTEM_NAME} MATCHES "AIX") - set(CMAKE_C_ARCHIVE_CREATE " -X32_64 qc ") - set(CMAKE_CXX_ARCHIVE_CREATE " -X32_64 qc ") - set(CMAKE_C_ARCHIVE_APPEND " -X32_64 q ") - set(CMAKE_CXX_ARCHIVE_APPEND " -X32_64 q ") - set(CMAKE_C_ARCHIVE_FINISH " -X32_64 ") - set(CMAKE_CXX_ARCHIVE_FINISH " -X32_64 ") -endif() - add_subdirectory(include) option(COMPILER_RT_USE_LIBCXX diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index e2489f1a3ed0..0b965d90a5b5 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -28,6 +28,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) load_llvm_config() construct_compiler_rt_default_triple() + include(SetPlatformToolchainTools) if(APPLE) include(CompilerRTDarwinUtils) endif() @@ -35,15 +36,6 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) include(UseLibtool) endif() include(AddCompilerRT) - - if(${CMAKE_SYSTEM_NAME} MATCHES "AIX") - set(CMAKE_C_ARCHIVE_CREATE " -X32_64 qc ") - set(CMAKE_CXX_ARCHIVE_CREATE " -X32_64 qc ") - set(CMAKE_C_ARCHIVE_APPEND " -X32_64 q ") - set(CMAKE_CXX_ARCHIVE_APPEND " -X32_64 q ") - set(CMAKE_C_ARCHIVE_FINISH " -X32_64 ") - set(CMAKE_CXX_ARCHIVE_FINISH " -X32_64 ") - endif() endif() if (COMPILER_RT_STANDALONE_BUILD) From 68ac7b17016e9449942d9c56f38f0dadca278117 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Mon, 20 Dec 2021 20:50:55 -0800 Subject: [PATCH 722/992] [NFC][mlgo] Add feature declarations for the ML regalloc advisor This just adds feature declarations and some boilerplate. Differential Revision: https://reviews.llvm.org/D116076 --- llvm/include/llvm/Analysis/MLModelRunner.h | 9 +- llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp | 173 ++++++++++++++++++++ 3 files changed, 179 insertions(+), 4 deletions(-) create mode 100644 llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp diff --git a/llvm/include/llvm/Analysis/MLModelRunner.h b/llvm/include/llvm/Analysis/MLModelRunner.h index 3a6fa99347fb..669c02af0b3b 100644 --- a/llvm/include/llvm/Analysis/MLModelRunner.h +++ b/llvm/include/llvm/Analysis/MLModelRunner.h @@ -41,6 +41,11 @@ class MLModelRunner { getTensorUntyped(static_cast(FeatureID))); } + virtual void *getTensorUntyped(size_t Index) = 0; + const void *getTensorUntyped(size_t Index) const { + return (const_cast(this))->getTensorUntyped(Index); + } + enum class Kind : int { Unknown, Release, Development, NoOp }; Kind getKind() const { return Type; } @@ -49,10 +54,6 @@ class MLModelRunner { assert(Type != Kind::Unknown); } virtual void *evaluateUntyped() = 0; - virtual void *getTensorUntyped(size_t Index) = 0; - const void *getTensorUntyped(size_t Index) const { - return (const_cast(this))->getTensorUntyped(Index); - } LLVMContext &Ctx; const Kind Type; diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 8ef99257bb40..db5a6ffd826b 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -113,6 +113,7 @@ add_llvm_component_library(LLVMCodeGen MIRFSDiscriminator.cpp MIRSampleProfile.cpp MIRYamlMapping.cpp + MLRegallocEvictAdvisor.cpp ModuloSchedule.cpp MultiHazardRecognizer.cpp PatchableFunction.cpp diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp new file mode 100644 index 000000000000..2839cd955b05 --- /dev/null +++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -0,0 +1,173 @@ +//===- MLRegAllocEvictAdvisor.cpp - ML eviction advisor -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the ML eviction advisor and reward injection pass +// +//===----------------------------------------------------------------------===// + +#include "llvm/Config/config.h" + +#if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) + +#include "RegAllocEvictionAdvisor.h" +#include "llvm/Analysis/MLModelRunner.h" +#include "llvm/Analysis/ModelUnderTrainingRunner.h" +#include "llvm/Analysis/NoInferenceModelRunner.h" +#include "llvm/Analysis/Utils/TFUtils.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetMachine.h" + +#include + +using namespace llvm; + +#define DEBUG_TYPE "ml-regalloc" + +namespace { +// This is the maximum number of interfererring ranges. That's the number of +// distinct AllocationOrder values, which comes from MCRegisterClass::RegsSize. +// For X86, that's 32. +// TODO: find a way to get this, statically, in a programmatic way. +static const int64_t MaxInterferences = 32; + +// Logically, we can think of the feature set given to the evaluator as a 2D +// matrix. The rows are the features (see next). The columns correspond to the +// interferences. We treat the candidate virt reg as an 'interference', too, as +// its feature set is the same as that of the interferring ranges. So we'll have +// MaxInterferences + 1 columns and by convention, we will use the last column +// for the virt reg seeking allocation. +static const int64_t CandidateVirtRegPos = MaxInterferences; +static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1; + +// Most features are as described above, so we'll reuse this vector in defining +// them. +static const std::vector PerLiveRangeShape{1, NumberOfInterferences}; + +// -------------- +// Features table +// -------------- +// For each interfering live range (incl. the candidate) we collect a number of +// features. However, because the features are of different types (and because +// of ML best practices), we organize the tensors per feature, not per +// candidate. Each such tensor has a scalar value corresponding to the +// interferring live range at that position, in the order in AllocationOrder. +// The last position corresponds to the virt reg seeking allocation. +// Exception to all that is the progression feature, which is just a scalar (see +// its documentation for details). +// Note on naming: the "_by_max" are normalized using the largest value of that +// tensor, as observed in the current decision making stage (i.e. for the +// current call to the advisor's tryFindEvictionCandidate) +// +// The feature list format: type, name, shape, documentation. +// Note: we can really just use int64 and float, hence the modeling of some +// bools as int64 values. +#define RA_EVICT_FEATURES_LIST(M) \ + M(int64_t, mask, PerLiveRangeShape, \ + "boolean values, 0 for unavailable candidates (i.e. if a position is 0, " \ + "it " \ + "can't be evicted)") \ + M(int64_t, is_free, PerLiveRangeShape, \ + "boolean values, 1 if this phys reg is actually free (no interferences)") \ + M(float, nr_urgent, PerLiveRangeShape, \ + "number of 'urgent' intervals, normalized. Urgent are those that are OK " \ + "to break cascades") \ + M(float, nr_broken_hints, PerLiveRangeShape, \ + "if this position were evicted, how many broken hints would there be") \ + M(int64_t, is_hint, PerLiveRangeShape, \ + "is this a preferred phys reg for the candidate") \ + M(int64_t, is_local, PerLiveRangeShape, \ + "is this live range local to a basic block") \ + M(float, nr_rematerializable, PerLiveRangeShape, \ + "nr rematerializable ranges") \ + M(float, nr_defs_and_uses, PerLiveRangeShape, \ + "bb freq - weighed nr defs and uses") \ + M(float, weighed_reads_by_max, PerLiveRangeShape, \ + "bb freq - weighed nr of reads, normalized") \ + M(float, weighed_writes_by_max, PerLiveRangeShape, \ + "bb feq - weighed nr of writes, normalized") \ + M(float, weighed_read_writes_by_max, PerLiveRangeShape, \ + "bb freq - weighed nr of uses that are both read and writes, normalized") \ + M(float, weighed_indvars_by_max, PerLiveRangeShape, \ + "bb freq - weighed nr of uses that are indvars, normalized") \ + M(float, hint_weights_by_max, PerLiveRangeShape, \ + "bb freq - weighed nr of uses that are hints, normalized") \ + M(float, start_bb_freq_by_max, PerLiveRangeShape, \ + "the freq in the start block, normalized") \ + M(float, end_bb_freq_by_max, PerLiveRangeShape, \ + "freq of end block, normalized") \ + M(float, hottest_bb_freq_by_max, PerLiveRangeShape, \ + "hottest BB freq, normalized") \ + M(float, liverange_size, PerLiveRangeShape, \ + "size (instr index diff) of the LR") \ + M(float, use_def_density, PerLiveRangeShape, \ + "the max weight, as computed by the manual heuristic") \ + M(int64_t, max_stage, PerLiveRangeShape, \ + "largest stage of an interval in this LR") \ + M(int64_t, min_stage, PerLiveRangeShape, \ + "lowest stage of an interval in this LR") \ + M(float, progress, {1}, "ratio of current queue size to initial size") + +// The model learns to pick one of the mask == 1 interferences. This is the name +// of the output tensor. +// The contract with the model is that the output will be guaranteed to be to a +// mask == 1 position. +const char *const DecisionName = "index_to_evict"; + +// Named features index. +enum FeatureIDs { +#define _FEATURE_IDX(_, name, __, ___) name, + RA_EVICT_FEATURES_LIST(_FEATURE_IDX) +#undef _FEATURE_IDX + FeatureCount +}; + +// The ML advisor will typically have a sparse input to the evaluator, because +// various phys regs won't be available. It's easier (maintenance-wise) to +// bulk-reset the state of the evaluator each time we are about to use it again. +template size_t getTotalSize(const std::vector &Shape) { + size_t Ret = sizeof(T); + for (const auto V : Shape) + Ret *= V; + return Ret; +} + +void resetInputs(MLModelRunner &Runner) { +#define _RESET(TYPE, NAME, SHAPE, __) \ + std::memset(Runner.getTensorUntyped(FeatureIDs::NAME), 0, \ + getTotalSize(SHAPE)); + RA_EVICT_FEATURES_LIST(_RESET) +#undef _RESET +} + +// Development mode-specifics +#ifdef LLVM_HAVE_TF_API +#define _DECL_FEATURES(type, name, shape, _) \ + TensorSpec::createSpec(#name, shape), + +static const std::vector InputFeatures{ + {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}}; +#undef _DECL_FEATURES +static const TensorSpec Output = + TensorSpec::createSpec(DecisionName, {1}); +const char *const RewardName = "reward"; +static const TensorSpec Reward = + TensorSpec::createSpec(RewardName, {1}); + +#endif //#ifdef LLVM_HAVE_TF_API +} // namespace +#endif // defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) \ No newline at end of file From 4016d440fec4edfc62543b1896a23ff70d08492d Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 5 Jan 2022 11:53:08 -0800 Subject: [PATCH 723/992] Precommit test for D116683 --- .../sink_sideeffecting_instruction.ll | 34 +++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll index d218d7871cbe..0a13742e6347 100644 --- a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll +++ b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll @@ -417,8 +417,8 @@ use_block: ret i32 %var3 } -define i32 @sink_lifetime4(i1 %c) { -; CHECK-LABEL: @sink_lifetime4( +define i32 @sink_lifetime4a(i1 %c) { +; CHECK-LABEL: @sink_lifetime4a( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[VAR]] to i8* @@ -446,6 +446,36 @@ use_block: ret i32 %var3 } +; Version which only writes to var, and thus can't rely on may-read scan for +; clobbers to prevent the transform +define i32 @sink_lifetime4b(i1 %c) { +; CHECK-LABEL: @sink_lifetime4b( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[VAR]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[BITCAST]]) +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull writeonly [[VAR]]) #[[ATTR1]] +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[BITCAST]]) +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i32, align 4 + %bitcast = bitcast i32* %var to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %bitcast) + %var3 = call i32 @unknown(i32* writeonly %var) argmemonly nounwind willreturn + call void @llvm.lifetime.end.p0i8(i64 4, i8* %bitcast) + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} ; Mostly checking that trying to sink a non-call doesn't crash (i.e. prior bug) define i32 @sink_atomicrmw_to_use(i1 %c) { ; CHECK-LABEL: @sink_atomicrmw_to_use( From 2353e1c87b09c20e75f0f3ceb05fa4a4261fe3dd Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 5 Jan 2022 22:58:35 +0300 Subject: [PATCH 724/992] [NFC][SimplifyCFG] Extract `performBlockTailMerging()` out of `tailMergeBlocksWithSimilarFunctionTerminators()` --- .../lib/Transforms/Scalar/SimplifyCFGPass.cpp | 142 +++++++++--------- 1 file changed, 75 insertions(+), 67 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 3799d2dd1cf2..b830c07715ae 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -78,6 +78,79 @@ static cl::opt UserSinkCommonInsts( STATISTIC(NumSimpl, "Number of blocks simplified"); +static bool +performBlockTailMerging(Function &F, ArrayRef BBs, + std::vector *Updates) { + SmallVector NewOps; + + // We don't want to change IR just because we can. + // Only do that if there are at least two blocks we'll tail-merge. + if (BBs.size() < 2) + return false; + + if (Updates) + Updates->reserve(Updates->size() + BBs.size()); + + BasicBlock *CanonicalBB; + Instruction *CanonicalTerm; + { + auto *Term = BBs[0]->getTerminator(); + + // Create a canonical block for this function terminator type now, + // placing it *before* the first block that will branch to it. + CanonicalBB = BasicBlock::Create( + F.getContext(), Twine("common.") + Term->getOpcodeName(), &F, BBs[0]); + // We'll also need a PHI node per each operand of the terminator. + NewOps.resize(Term->getNumOperands()); + for (auto I : zip(Term->operands(), NewOps)) { + std::get<1>(I) = PHINode::Create(std::get<0>(I)->getType(), + /*NumReservedValues=*/BBs.size(), + CanonicalBB->getName() + ".op"); + CanonicalBB->getInstList().push_back(std::get<1>(I)); + } + // Make it so that this canonical block actually has the right + // terminator. + CanonicalTerm = Term->clone(); + CanonicalBB->getInstList().push_back(CanonicalTerm); + // If the canonical terminator has operands, rewrite it to take PHI's. + for (auto I : zip(NewOps, CanonicalTerm->operands())) + std::get<1>(I) = std::get<0>(I); + } + + // Now, go through each block (with the current terminator type) + // we've recorded, and rewrite it to branch to the new common block. + const DILocation *CommonDebugLoc = nullptr; + for (BasicBlock *BB : BBs) { + auto *Term = BB->getTerminator(); + assert(Term->getOpcode() == CanonicalTerm->getOpcode() && + "All blocks to be tail-merged must be the same " + "(function-terminating) terminator type."); + + // Aha, found a new non-canonical function terminator. If it has operands, + // forward them to the PHI nodes in the canonical block. + for (auto I : zip(Term->operands(), NewOps)) + std::get<1>(I)->addIncoming(std::get<0>(I), BB); + + // Compute the debug location common to all the original terminators. + if (!CommonDebugLoc) + CommonDebugLoc = Term->getDebugLoc(); + else + CommonDebugLoc = + DILocation::getMergedLocation(CommonDebugLoc, Term->getDebugLoc()); + + // And turn BB into a block that just unconditionally branches + // to the canonical block. + Term->eraseFromParent(); + BranchInst::Create(CanonicalBB, BB); + if (Updates) + Updates->push_back({DominatorTree::Insert, BB, CanonicalBB}); + } + + CanonicalTerm->setDebugLoc(CommonDebugLoc); + + return true; +} + static bool tailMergeBlocksWithSimilarFunctionTerminators(Function &F, DomTreeUpdater *DTU) { SmallMapVector, 4> @@ -133,73 +206,8 @@ static bool tailMergeBlocksWithSimilarFunctionTerminators(Function &F, std::vector Updates; - for (ArrayRef BBs : make_second_range(Structure)) { - SmallVector NewOps; - - // We don't want to change IR just because we can. - // Only do that if there are at least two blocks we'll tail-merge. - if (BBs.size() < 2) - continue; - - Changed = true; - - if (DTU) - Updates.reserve(Updates.size() + BBs.size()); - - BasicBlock *CanonicalBB; - Instruction *CanonicalTerm; - { - auto *Term = BBs[0]->getTerminator(); - - // Create a canonical block for this function terminator type now, - // placing it *before* the first block that will branch to it. - CanonicalBB = BasicBlock::Create( - F.getContext(), Twine("common.") + Term->getOpcodeName(), &F, BBs[0]); - // We'll also need a PHI node per each operand of the terminator. - NewOps.resize(Term->getNumOperands()); - for (auto I : zip(Term->operands(), NewOps)) { - std::get<1>(I) = PHINode::Create(std::get<0>(I)->getType(), - /*NumReservedValues=*/BBs.size(), - CanonicalBB->getName() + ".op"); - CanonicalBB->getInstList().push_back(std::get<1>(I)); - } - // Make it so that this canonical block actually has the right - // terminator. - CanonicalTerm = Term->clone(); - CanonicalBB->getInstList().push_back(CanonicalTerm); - // If the canonical terminator has operands, rewrite it to take PHI's. - for (auto I : zip(NewOps, CanonicalTerm->operands())) - std::get<1>(I) = std::get<0>(I); - } - - // Now, go through each block (with the current terminator type) - // we've recorded, and rewrite it to branch to the new common block. - const DILocation *CommonDebugLoc = nullptr; - for (BasicBlock *BB : BBs) { - auto *Term = BB->getTerminator(); - - // Aha, found a new non-canonical function terminator. If it has operands, - // forward them to the PHI nodes in the canonical block. - for (auto I : zip(Term->operands(), NewOps)) - std::get<1>(I)->addIncoming(std::get<0>(I), BB); - - // Compute the debug location common to all the original terminators. - if (!CommonDebugLoc) - CommonDebugLoc = Term->getDebugLoc(); - else - CommonDebugLoc = - DILocation::getMergedLocation(CommonDebugLoc, Term->getDebugLoc()); - - // And turn BB into a block that just unconditionally branches - // to the canonical block. - Term->eraseFromParent(); - BranchInst::Create(CanonicalBB, BB); - if (DTU) - Updates.push_back({DominatorTree::Insert, BB, CanonicalBB}); - } - - CanonicalTerm->setDebugLoc(CommonDebugLoc); - } + for (ArrayRef BBs : make_second_range(Structure)) + Changed |= performBlockTailMerging(F, BBs, DTU ? &Updates : nullptr); if (DTU) DTU->applyUpdates(Updates); From ca7ffe09dc6e525109e3cd570cc5182ce568be13 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 5 Jan 2022 20:02:39 +0000 Subject: [PATCH 725/992] [AArch64] Rename CPY to DUP. NFC These instructions have nothing to do with the new MOP CPY instructions, and are better named DUP to avoid confusion. Differential Revision: https://reviews.llvm.org/D116655 --- .../lib/Target/AArch64/AArch64InstrFormats.td | 24 ++--- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 18 ++-- llvm/lib/Target/AArch64/AArch64SchedA57.td | 2 +- llvm/lib/Target/AArch64/AArch64SchedA64FX.td | 10 +- .../Target/AArch64/AArch64SchedExynosM3.td | 2 +- .../Target/AArch64/AArch64SchedExynosM4.td | 2 +- .../Target/AArch64/AArch64SchedExynosM5.td | 2 +- .../AArch64/AArch64SchedFalkorDetails.td | 2 +- .../AArch64/AArch64SchedThunderX2T99.td | 2 +- .../AArch64/AArch64SchedThunderX3T110.td | 2 +- .../GISel/AArch64InstructionSelector.cpp | 8 +- .../AArch64/GlobalISel/contract-store.mir | 6 +- .../GlobalISel/select-extract-vector-elt.mir | 32 +++--- .../AArch64/GlobalISel/select-extract.mir | 4 +- .../GlobalISel/select-frint-nofp16.mir | 40 +++---- .../AArch64/GlobalISel/select-unmerge.mir | 100 +++++++++--------- 16 files changed, 128 insertions(+), 128 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index fce3126db21e..ec8c3a851133 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -7700,10 +7700,10 @@ multiclass SIMDTableLookupTied { //---------------------------------------------------------------------------- -// AdvSIMD scalar CPY +// AdvSIMD scalar DUP //---------------------------------------------------------------------------- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDScalarCPY : I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), asm, "{\t$dst, $src" # kind # "$idx" # @@ -7717,30 +7717,30 @@ class BaseSIMDScalarCPY : InstAlias; -multiclass SIMDScalarCPY { - def i8 : BaseSIMDScalarCPY { +multiclass SIMDScalarDUP { + def i8 : BaseSIMDScalarDUP { bits<4> idx; let Inst{20-17} = idx; let Inst{16} = 1; } - def i16 : BaseSIMDScalarCPY { + def i16 : BaseSIMDScalarDUP { bits<3> idx; let Inst{20-18} = idx; let Inst{17-16} = 0b10; } - def i32 : BaseSIMDScalarCPY { + def i32 : BaseSIMDScalarDUP { bits<2> idx; let Inst{20-19} = idx; let Inst{18-16} = 0b100; } - def i64 : BaseSIMDScalarCPY { + def i64 : BaseSIMDScalarDUP { bits<1> idx; let Inst{20} = idx; let Inst{19-16} = 0b1000; @@ -7751,16 +7751,16 @@ multiclass SIMDScalarCPY { (!cast(NAME # i64) V128:$src, VectorIndexD:$idx)>; // 'DUP' mnemonic aliases. - def : SIMDScalarCPYAlias<"dup", ".b", + def : SIMDScalarDUPAlias<"dup", ".b", !cast(NAME#"i8"), FPR8, V128, VectorIndexB>; - def : SIMDScalarCPYAlias<"dup", ".h", + def : SIMDScalarDUPAlias<"dup", ".h", !cast(NAME#"i16"), FPR16, V128, VectorIndexH>; - def : SIMDScalarCPYAlias<"dup", ".s", + def : SIMDScalarDUPAlias<"dup", ".s", !cast(NAME#"i32"), FPR32, V128, VectorIndexS>; - def : SIMDScalarCPYAlias<"dup", ".d", + def : SIMDScalarDUPAlias<"dup", ".d", !cast(NAME#"i64"), FPR64, V128, VectorIndexD>; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index af944ce104a5..27c700977df4 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5374,10 +5374,10 @@ def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd), //---------------------------------------------------------------------------- -// AdvSIMD scalar CPY instruction +// AdvSIMD scalar DUP instruction //---------------------------------------------------------------------------- -defm CPY : SIMDScalarCPY<"mov">; +defm DUP : SIMDScalarDUP<"mov">; //---------------------------------------------------------------------------- // AdvSIMD scalar pairwise instructions @@ -5788,7 +5788,7 @@ defm : Neon_INS_elt_pattern; // Floating point vector extractions are codegen'd as either a sequence of -// subregister extractions, or a MOV (aka CPY here, alias for DUP) if +// subregister extractions, or a MOV (aka DUP here) if // the lane number is anything other than zero. def : Pat<(vector_extract (v2f64 V128:$Rn), 0), (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; @@ -5801,13 +5801,13 @@ def : Pat<(vector_extract (v8bf16 V128:$Rn), 0), def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), - (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>; + (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>; def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), - (f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>; + (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>; def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx), - (f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>; + (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx), - (bf16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>; + (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; // All concat_vectors operations are canonicalised to act on i64 vectors for // AArch64. In the general case we need an instruction, which had just as well be @@ -8104,7 +8104,7 @@ class NTStore128Pat : Pat<(nontemporalstore (VT FPR128:$Rt), (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub), - (CPYi64 FPR128:$Rt, (i64 1)), + (DUPi64 FPR128:$Rt, (i64 1)), GPR64sp:$Rn, simm7s8:$offset)>; def : NTStore128Pat; @@ -8116,7 +8116,7 @@ class NTStore64Pat : Pat<(nontemporalstore (VT FPR64:$Rt), (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub), - (CPYi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)), + (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)), GPR64sp:$Rn, simm7s4:$offset)>; // FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64? diff --git a/llvm/lib/Target/AArch64/AArch64SchedA57.td b/llvm/lib/Target/AArch64/AArch64SchedA57.td index 61538cb9b206..a860aa907fd1 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA57.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA57.td @@ -526,7 +526,7 @@ def : InstRW<[A57Write_5cyc_2V], (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; def : InstRW<[A57Write_3cyc_2V], (instregex "^(BIF|BIT|BSL|BSP)v16i8")>; // ASIMD duplicate, gen reg, D-form and Q-form -def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^CPY[^PMEF]")>; +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^DUP(i8|i16|i32|i64)$")>; def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^DUPv.+gpr")>; // ASIMD move, saturating diff --git a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td index 6d1ef662146b..fa10d056b7f7 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td @@ -1891,7 +1891,7 @@ def : InstRW<[A64FXWrite_4Cyc_GI0], // ASIMD duplicate, gen reg // ASIMD duplicate, element def : InstRW<[A64FXWrite_DUPGENERAL], (instregex "^DUPv")>; -def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^CPY[^PMEF]")>; +def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUP(i8|i16|i32|i64)$")>; def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUPv.+gpr")>; // ASIMD extract @@ -2512,16 +2512,16 @@ def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTW_XPiI)>; def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs COMPACT_ZPZ_D, COMPACT_ZPZ_S)>; // [72] "cpy $Zd, $Pg/m, $Rn"; -//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmR_B, CPY_ZPmR_D, CPY_ZPmR_H, CPY_ZPmR_S)>; +def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CPY_ZPmR_B, CPY_ZPmR_D, CPY_ZPmR_H, CPY_ZPmR_S)>; // [73] "cpy $Zd, $Pg/m, $Vn"; -//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmV_B, CPY_ZPmV_D, CPY_ZPmV_H, CPY_ZPmV_S)>; +def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CPY_ZPmV_B, CPY_ZPmV_D, CPY_ZPmV_H, CPY_ZPmV_S)>; // [74] "cpy $Zd, $Pg/m, $imm"; -//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmI_B, CPY_ZPmI_D, CPY_ZPmI_H, CPY_ZPmI_S)>; +def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CPY_ZPmI_B, CPY_ZPmI_D, CPY_ZPmI_H, CPY_ZPmI_S)>; // [75] "cpy $Zd, $Pg/z, $imm"; -//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPzI_B, CPY_ZPzI_D, CPY_ZPzI_H, CPY_ZPzI_S)>; +def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CPY_ZPzI_B, CPY_ZPzI_D, CPY_ZPzI_H, CPY_ZPzI_S)>; // [76] "ctermeq $Rn, $Rm"; def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs CTERMEQ_WW, CTERMEQ_XX)>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td index c834ed9fd8f9..d66efb82fccc 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td +++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td @@ -669,7 +669,7 @@ def : InstRW<[M3WriteNEONB], (instregex "^DUPv.+gpr")>; def : InstRW<[M3WriteNSHF1], (instregex "^DUPv.+lane")>; def : InstRW<[M3WriteNSHF1], (instregex "^EXTv")>; def : InstRW<[M3WriteNSHF1], (instregex "^[SU]?Q?XTU?Nv")>; -def : InstRW<[M3WriteNSHF1], (instregex "^CPY[^PMEF]")>; +def : InstRW<[M3WriteNSHF1], (instregex "^DUP(i8|i16|i32|i64)$")>; def : InstRW<[M3WriteNSHF1], (instregex "^INSv.+lane")>; def : InstRW<[M3WriteMOVI], (instregex "^MOVI")>; def : InstRW<[M3WriteNALU1], (instregex "^FMOVv")>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td index 65439dc943db..94e70793e855 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td +++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td @@ -810,7 +810,7 @@ def : InstRW<[M4WriteNALU1], (instregex "^RBITv")>; def : InstRW<[M4WriteNALU1], (instregex "^(BIF|BIT|BSL|BSP)v")>; def : InstRW<[M4WriteNALU1], (instregex "^CL[STZ]v")>; def : InstRW<[M4WriteNEONB], (instregex "^DUPv.+gpr")>; -def : InstRW<[M4WriteNSHF1], (instregex "^CPY[^PMEF]")>; +def : InstRW<[M4WriteNSHF1], (instregex "^DUP(i8|i16|i32|i64)$")>; def : InstRW<[M4WriteNSHF1], (instregex "^DUPv.+lane")>; def : InstRW<[M4WriteNSHF1], (instregex "^EXTv")>; def : InstRW<[M4WriteNSHT4A], (instregex "^XTNv")>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td index 6e14dbe2d6af..1db5f5322a64 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td +++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td @@ -848,7 +848,7 @@ def : InstRW<[M5WriteNALU2], (instregex "^RBITv")>; def : InstRW<[M5WriteNALU2], (instregex "^(BIF|BIT|BSL|BSP)v")>; def : InstRW<[M5WriteNALU2], (instregex "^CL[STZ]v")>; def : InstRW<[M5WriteNEONB], (instregex "^DUPv.+gpr")>; -def : InstRW<[M5WriteNSHF2], (instregex "^CPY[^PMEF]")>; +def : InstRW<[M5WriteNSHF2], (instregex "^DUP(i8|i16|i32|i64)$")>; def : InstRW<[M5WriteNSHF2], (instregex "^DUPv.+lane")>; def : InstRW<[M5WriteNSHF2], (instregex "^EXTv")>; def : InstRW<[M5WriteNSHT4A], (instregex "^XTNv")>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td index f2cd83caffa2..a3a038f869fb 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td +++ b/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td @@ -908,7 +908,7 @@ def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>; // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^CPY(i8|i16|i32|i64)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(i8|i16|i32|i64)$")>; def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>; def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL|BSP)v8i8$")>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td index 41dd1ca3768b..ffa0a5e7d91a 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td +++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td @@ -1499,7 +1499,7 @@ def : InstRW<[THX2T99Write_5Cyc_F01], // ASIMD duplicate, gen reg // ASIMD duplicate, element def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>; -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CPY[^PMEF]")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUP(i8|i16|i32|i64)$")>; def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv.+gpr")>; // ASIMD extract diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td index f27a431bd001..46a1c217f984 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td +++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td @@ -1608,7 +1608,7 @@ def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], // ASIMD duplicate, gen reg // ASIMD duplicate, element def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^DUPv")>; -def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^CPY[^PMEF]")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^DUP(i8|i16|i32|i64)$")>; def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^DUPv.+gpr")>; // ASIMD extract diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 3d9a626d3ac3..14c9bbd4222c 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -3937,19 +3937,19 @@ static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, // vector's elements. switch (EltSize) { case 8: - CopyOpc = AArch64::CPYi8; + CopyOpc = AArch64::DUPi8; ExtractSubReg = AArch64::bsub; break; case 16: - CopyOpc = AArch64::CPYi16; + CopyOpc = AArch64::DUPi16; ExtractSubReg = AArch64::hsub; break; case 32: - CopyOpc = AArch64::CPYi32; + CopyOpc = AArch64::DUPi32; ExtractSubReg = AArch64::ssub; break; case 64: - CopyOpc = AArch64::CPYi64; + CopyOpc = AArch64::DUPi64; ExtractSubReg = AArch64::dsub; break; default: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir index 5414fd05d45c..3265d560ef77 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir @@ -92,7 +92,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (dereferenceable load (<2 x s64>)) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub - ; CHECK-NEXT: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[LDRQui]], 1 + ; CHECK-NEXT: [[DUPi64_:%[0-9]+]]:fpr64 = DUPi64 [[LDRQui]], 1 ; CHECK-NEXT: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64)) %0:gpr(p0) = COPY $x0 %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>)) @@ -112,8 +112,8 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (dereferenceable load (<2 x s64>)) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub - ; CHECK-NEXT: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[LDRQui]], 1 - ; CHECK-NEXT: STRDui [[CPYi64_]], [[COPY]], 0 :: (store (s64)) + ; CHECK-NEXT: [[DUPi64_:%[0-9]+]]:fpr64 = DUPi64 [[LDRQui]], 1 + ; CHECK-NEXT: STRDui [[DUPi64_]], [[COPY]], 0 :: (store (s64)) %0:gpr(p0) = COPY $x0 %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>)) %2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %1:fpr(<2 x s64>) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir index b9ad0ecd5ce6..375506f300de 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir @@ -22,8 +22,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub - ; CHECK-NEXT: [[CPYi32_:%[0-9]+]]:fpr32 = CPYi32 [[INSERT_SUBREG]], 1 - ; CHECK-NEXT: $s0 = COPY [[CPYi32_]] + ; CHECK-NEXT: [[DUPi32_:%[0-9]+]]:fpr32 = DUPi32 [[INSERT_SUBREG]], 1 + ; CHECK-NEXT: $s0 = COPY [[DUPi32_]] ; CHECK-NEXT: RET_ReallyLR implicit $s0 %0:fpr(<2 x s32>) = COPY $d0 %2:gpr(s64) = G_CONSTANT i64 1 @@ -76,8 +76,8 @@ body: | ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK-NEXT: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[COPY]], 2 - ; CHECK-NEXT: $d0 = COPY [[CPYi64_]] + ; CHECK-NEXT: [[DUPi64_:%[0-9]+]]:fpr64 = DUPi64 [[COPY]], 2 + ; CHECK-NEXT: $d0 = COPY [[DUPi64_]] ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:fpr(<2 x s64>) = COPY $q0 %2:gpr(s64) = G_CONSTANT i64 2 @@ -108,8 +108,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub - ; CHECK-NEXT: [[CPYi16_:%[0-9]+]]:fpr16 = CPYi16 [[INSERT_SUBREG]], 1 - ; CHECK-NEXT: $h0 = COPY [[CPYi16_]] + ; CHECK-NEXT: [[DUPi16_:%[0-9]+]]:fpr16 = DUPi16 [[INSERT_SUBREG]], 1 + ; CHECK-NEXT: $h0 = COPY [[DUPi16_]] ; CHECK-NEXT: RET_ReallyLR implicit $h0 %0:fpr(<4 x s16>) = COPY $d0 %2:gpr(s64) = G_CONSTANT i64 1 @@ -132,8 +132,8 @@ body: | ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK-NEXT: [[CPYi16_:%[0-9]+]]:fpr16 = CPYi16 [[COPY]], 1 - ; CHECK-NEXT: $h0 = COPY [[CPYi16_]] + ; CHECK-NEXT: [[DUPi16_:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 1 + ; CHECK-NEXT: $h0 = COPY [[DUPi16_]] ; CHECK-NEXT: RET_ReallyLR implicit $h0 %0:fpr(<8 x s16>) = COPY $q0 %2:gpr(s64) = G_CONSTANT i64 1 @@ -156,8 +156,8 @@ body: | ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK-NEXT: [[CPYi16_:%[0-9]+]]:fpr16 = CPYi16 [[COPY]], 1 - ; CHECK-NEXT: $h0 = COPY [[CPYi16_]] + ; CHECK-NEXT: [[DUPi16_:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 1 + ; CHECK-NEXT: $h0 = COPY [[DUPi16_]] ; CHECK-NEXT: RET_ReallyLR implicit $h0 %0:fpr(<8 x s16>) = COPY $q0 %1:gpr(s32) = G_CONSTANT i32 1 @@ -181,8 +181,8 @@ body: | ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK-NEXT: [[CPYi16_:%[0-9]+]]:fpr16 = CPYi16 [[COPY]], 1 - ; CHECK-NEXT: $h0 = COPY [[CPYi16_]] + ; CHECK-NEXT: [[DUPi16_:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 1 + ; CHECK-NEXT: $h0 = COPY [[DUPi16_]] ; CHECK-NEXT: RET_ReallyLR implicit $h0 %0:fpr(<8 x s16>) = COPY $q0 %1:gpr(s32) = G_CONSTANT i32 1 @@ -206,8 +206,8 @@ body: | ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK-NEXT: [[CPYi16_:%[0-9]+]]:fpr16 = CPYi16 [[COPY]], 1 - ; CHECK-NEXT: $h0 = COPY [[CPYi16_]] + ; CHECK-NEXT: [[DUPi16_:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 1 + ; CHECK-NEXT: $h0 = COPY [[DUPi16_]] ; CHECK-NEXT: RET_ReallyLR implicit $h0 %0:fpr(<8 x s16>) = COPY $q0 %1:gpr(s64) = G_CONSTANT i64 1 @@ -290,8 +290,8 @@ body: | ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK-NEXT: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[COPY]], 1 - ; CHECK-NEXT: $d0 = COPY [[CPYi64_]] + ; CHECK-NEXT: [[DUPi64_:%[0-9]+]]:fpr64 = DUPi64 [[COPY]], 1 + ; CHECK-NEXT: $d0 = COPY [[DUPi64_]] ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:fpr(<2 x p0>) = COPY $q0 %2:gpr(s64) = G_CONSTANT i64 1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-extract.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-extract.mir index c5b48848cbbd..d3795e36bd04 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-extract.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-extract.mir @@ -15,9 +15,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]].dsub - ; CHECK-NEXT: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[COPY]], 1 + ; CHECK-NEXT: [[DUPi64_:%[0-9]+]]:fpr64 = DUPi64 [[COPY]], 1 ; CHECK-NEXT: $d3 = COPY [[COPY1]] - ; CHECK-NEXT: $d4 = COPY [[CPYi64_]] + ; CHECK-NEXT: $d4 = COPY [[DUPi64_]] ; CHECK-NEXT: RET_ReallyLR implicit $d3 %0:fpr(s128) = COPY $q0 %2:fpr(s64) = G_EXTRACT %0(s128), 0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-frint-nofp16.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-frint-nofp16.mir index 95c31434bb99..0b010ffcf61c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-frint-nofp16.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-frint-nofp16.mir @@ -52,19 +52,19 @@ body: | ; CHECK-NEXT: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[COPY]], %subreg.dsub ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY [[INSERT_SUBREG]].hsub - ; CHECK-NEXT: [[CPYi16_:%[0-9]+]]:fpr16 = CPYi16 [[INSERT_SUBREG]], 1 - ; CHECK-NEXT: [[CPYi16_1:%[0-9]+]]:fpr16 = CPYi16 [[INSERT_SUBREG1]], 2 - ; CHECK-NEXT: [[CPYi16_2:%[0-9]+]]:fpr16 = CPYi16 [[INSERT_SUBREG2]], 3 + ; CHECK-NEXT: [[DUPi16_:%[0-9]+]]:fpr16 = DUPi16 [[INSERT_SUBREG]], 1 + ; CHECK-NEXT: [[DUPi16_1:%[0-9]+]]:fpr16 = DUPi16 [[INSERT_SUBREG1]], 2 + ; CHECK-NEXT: [[DUPi16_2:%[0-9]+]]:fpr16 = DUPi16 [[INSERT_SUBREG2]], 3 ; CHECK-NEXT: [[FCVTSHr:%[0-9]+]]:fpr32 = FCVTSHr [[COPY1]] ; CHECK-NEXT: [[FRINTXSr:%[0-9]+]]:fpr32 = FRINTXSr [[FCVTSHr]] ; CHECK-NEXT: [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[FRINTXSr]] - ; CHECK-NEXT: [[FCVTSHr1:%[0-9]+]]:fpr32 = FCVTSHr [[CPYi16_]] + ; CHECK-NEXT: [[FCVTSHr1:%[0-9]+]]:fpr32 = FCVTSHr [[DUPi16_]] ; CHECK-NEXT: [[FRINTXSr1:%[0-9]+]]:fpr32 = FRINTXSr [[FCVTSHr1]] ; CHECK-NEXT: [[FCVTHSr1:%[0-9]+]]:fpr16 = FCVTHSr [[FRINTXSr1]] - ; CHECK-NEXT: [[FCVTSHr2:%[0-9]+]]:fpr32 = FCVTSHr [[CPYi16_1]] + ; CHECK-NEXT: [[FCVTSHr2:%[0-9]+]]:fpr32 = FCVTSHr [[DUPi16_1]] ; CHECK-NEXT: [[FRINTXSr2:%[0-9]+]]:fpr32 = FRINTXSr [[FCVTSHr2]] ; CHECK-NEXT: [[FCVTHSr2:%[0-9]+]]:fpr16 = FCVTHSr [[FRINTXSr2]] - ; CHECK-NEXT: [[FCVTSHr3:%[0-9]+]]:fpr32 = FCVTSHr [[CPYi16_2]] + ; CHECK-NEXT: [[FCVTSHr3:%[0-9]+]]:fpr32 = FCVTSHr [[DUPi16_2]] ; CHECK-NEXT: [[FRINTXSr3:%[0-9]+]]:fpr32 = FRINTXSr [[FCVTSHr3]] ; CHECK-NEXT: [[FCVTHSr3:%[0-9]+]]:fpr16 = FCVTHSr [[FRINTXSr3]] ; CHECK-NEXT: [[DEF3:%[0-9]+]]:fpr128 = IMPLICIT_DEF @@ -116,35 +116,35 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY [[COPY]].hsub - ; CHECK-NEXT: [[CPYi16_:%[0-9]+]]:fpr16 = CPYi16 [[COPY]], 1 - ; CHECK-NEXT: [[CPYi16_1:%[0-9]+]]:fpr16 = CPYi16 [[COPY]], 2 - ; CHECK-NEXT: [[CPYi16_2:%[0-9]+]]:fpr16 = CPYi16 [[COPY]], 3 - ; CHECK-NEXT: [[CPYi16_3:%[0-9]+]]:fpr16 = CPYi16 [[COPY]], 4 - ; CHECK-NEXT: [[CPYi16_4:%[0-9]+]]:fpr16 = CPYi16 [[COPY]], 5 - ; CHECK-NEXT: [[CPYi16_5:%[0-9]+]]:fpr16 = CPYi16 [[COPY]], 6 - ; CHECK-NEXT: [[CPYi16_6:%[0-9]+]]:fpr16 = CPYi16 [[COPY]], 7 + ; CHECK-NEXT: [[DUPi16_:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 1 + ; CHECK-NEXT: [[DUPi16_1:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 2 + ; CHECK-NEXT: [[DUPi16_2:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 3 + ; CHECK-NEXT: [[DUPi16_3:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 4 + ; CHECK-NEXT: [[DUPi16_4:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 5 + ; CHECK-NEXT: [[DUPi16_5:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 6 + ; CHECK-NEXT: [[DUPi16_6:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 7 ; CHECK-NEXT: [[FCVTSHr:%[0-9]+]]:fpr32 = FCVTSHr [[COPY1]] ; CHECK-NEXT: [[FRINTXSr:%[0-9]+]]:fpr32 = FRINTXSr [[FCVTSHr]] ; CHECK-NEXT: [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[FRINTXSr]] - ; CHECK-NEXT: [[FCVTSHr1:%[0-9]+]]:fpr32 = FCVTSHr [[CPYi16_]] + ; CHECK-NEXT: [[FCVTSHr1:%[0-9]+]]:fpr32 = FCVTSHr [[DUPi16_]] ; CHECK-NEXT: [[FRINTXSr1:%[0-9]+]]:fpr32 = FRINTXSr [[FCVTSHr1]] ; CHECK-NEXT: [[FCVTHSr1:%[0-9]+]]:fpr16 = FCVTHSr [[FRINTXSr1]] - ; CHECK-NEXT: [[FCVTSHr2:%[0-9]+]]:fpr32 = FCVTSHr [[CPYi16_1]] + ; CHECK-NEXT: [[FCVTSHr2:%[0-9]+]]:fpr32 = FCVTSHr [[DUPi16_1]] ; CHECK-NEXT: [[FRINTXSr2:%[0-9]+]]:fpr32 = FRINTXSr [[FCVTSHr2]] ; CHECK-NEXT: [[FCVTHSr2:%[0-9]+]]:fpr16 = FCVTHSr [[FRINTXSr2]] - ; CHECK-NEXT: [[FCVTSHr3:%[0-9]+]]:fpr32 = FCVTSHr [[CPYi16_2]] + ; CHECK-NEXT: [[FCVTSHr3:%[0-9]+]]:fpr32 = FCVTSHr [[DUPi16_2]] ; CHECK-NEXT: [[FRINTXSr3:%[0-9]+]]:fpr32 = FRINTXSr [[FCVTSHr3]] ; CHECK-NEXT: [[FCVTHSr3:%[0-9]+]]:fpr16 = FCVTHSr [[FRINTXSr3]] - ; CHECK-NEXT: [[FCVTSHr4:%[0-9]+]]:fpr32 = FCVTSHr [[CPYi16_3]] + ; CHECK-NEXT: [[FCVTSHr4:%[0-9]+]]:fpr32 = FCVTSHr [[DUPi16_3]] ; CHECK-NEXT: [[FRINTXSr4:%[0-9]+]]:fpr32 = FRINTXSr [[FCVTSHr4]] ; CHECK-NEXT: [[FCVTHSr4:%[0-9]+]]:fpr16 = FCVTHSr [[FRINTXSr4]] - ; CHECK-NEXT: [[FCVTSHr5:%[0-9]+]]:fpr32 = FCVTSHr [[CPYi16_4]] + ; CHECK-NEXT: [[FCVTSHr5:%[0-9]+]]:fpr32 = FCVTSHr [[DUPi16_4]] ; CHECK-NEXT: [[FRINTXSr5:%[0-9]+]]:fpr32 = FRINTXSr [[FCVTSHr5]] ; CHECK-NEXT: [[FCVTHSr5:%[0-9]+]]:fpr16 = FCVTHSr [[FRINTXSr5]] - ; CHECK-NEXT: [[FCVTSHr6:%[0-9]+]]:fpr32 = FCVTSHr [[CPYi16_5]] + ; CHECK-NEXT: [[FCVTSHr6:%[0-9]+]]:fpr32 = FCVTSHr [[DUPi16_5]] ; CHECK-NEXT: [[FRINTXSr6:%[0-9]+]]:fpr32 = FRINTXSr [[FCVTSHr6]] ; CHECK-NEXT: [[FCVTHSr6:%[0-9]+]]:fpr16 = FCVTHSr [[FRINTXSr6]] - ; CHECK-NEXT: [[FCVTSHr7:%[0-9]+]]:fpr32 = FCVTSHr [[CPYi16_6]] + ; CHECK-NEXT: [[FCVTSHr7:%[0-9]+]]:fpr32 = FCVTSHr [[DUPi16_6]] ; CHECK-NEXT: [[FRINTXSr7:%[0-9]+]]:fpr32 = FRINTXSr [[FCVTSHr7]] ; CHECK-NEXT: [[FCVTHSr7:%[0-9]+]]:fpr16 = FCVTHSr [[FRINTXSr7]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir index ec6fa8bddc21..4347cfe79ffe 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-unmerge.mir @@ -22,11 +22,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]].dsub - ; CHECK-NEXT: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[COPY]], 1 + ; CHECK-NEXT: [[DUPi64_:%[0-9]+]]:fpr64 = DUPi64 [[COPY]], 1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.dsub ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[CPYi64_]], %subreg.dsub + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[DUPi64_]], %subreg.dsub ; CHECK-NEXT: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG]], 1, [[INSERT_SUBREG1]], 0 ; CHECK-NEXT: $q0 = COPY [[INSvi64lane]] ; CHECK-NEXT: RET_ReallyLR implicit $q0 @@ -60,19 +60,19 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub - ; CHECK-NEXT: [[CPYi32_:%[0-9]+]]:fpr32 = CPYi32 [[COPY]], 1 - ; CHECK-NEXT: [[CPYi32_1:%[0-9]+]]:fpr32 = CPYi32 [[COPY]], 2 - ; CHECK-NEXT: [[CPYi32_2:%[0-9]+]]:fpr32 = CPYi32 [[COPY]], 3 + ; CHECK-NEXT: [[DUPi32_:%[0-9]+]]:fpr32 = DUPi32 [[COPY]], 1 + ; CHECK-NEXT: [[DUPi32_1:%[0-9]+]]:fpr32 = DUPi32 [[COPY]], 2 + ; CHECK-NEXT: [[DUPi32_2:%[0-9]+]]:fpr32 = DUPi32 [[COPY]], 3 ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.ssub ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[CPYi32_]], %subreg.ssub + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[DUPi32_]], %subreg.ssub ; CHECK-NEXT: [[INSvi32lane:%[0-9]+]]:fpr128 = INSvi32lane [[INSERT_SUBREG]], 1, [[INSERT_SUBREG1]], 0 ; CHECK-NEXT: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[CPYi32_1]], %subreg.ssub + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[DUPi32_1]], %subreg.ssub ; CHECK-NEXT: [[INSvi32lane1:%[0-9]+]]:fpr128 = INSvi32lane [[INSvi32lane]], 2, [[INSERT_SUBREG2]], 0 ; CHECK-NEXT: [[DEF3:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF3]], [[CPYi32_2]], %subreg.ssub + ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF3]], [[DUPi32_2]], %subreg.ssub ; CHECK-NEXT: [[INSvi32lane2:%[0-9]+]]:fpr128 = INSvi32lane [[INSvi32lane1]], 3, [[INSERT_SUBREG3]], 0 ; CHECK-NEXT: $q0 = COPY [[INSvi32lane2]] ; CHECK-NEXT: RET_ReallyLR implicit $q0 @@ -108,11 +108,11 @@ body: | ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.ssub ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY [[INSERT_SUBREG]].hsub - ; CHECK-NEXT: [[CPYi16_:%[0-9]+]]:fpr16 = CPYi16 [[INSERT_SUBREG]], 1 + ; CHECK-NEXT: [[DUPi16_:%[0-9]+]]:fpr16 = DUPi16 [[INSERT_SUBREG]], 1 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.hsub ; CHECK-NEXT: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[CPYi16_]], %subreg.hsub + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[DUPi16_]], %subreg.hsub ; CHECK-NEXT: [[INSvi16lane:%[0-9]+]]:fpr128 = INSvi16lane [[INSERT_SUBREG1]], 1, [[INSERT_SUBREG2]], 0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY [[INSvi16lane]].ssub ; CHECK-NEXT: $s0 = COPY [[COPY2]] @@ -157,19 +157,19 @@ body: | ; CHECK-NEXT: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[COPY]], %subreg.dsub ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY [[INSERT_SUBREG]].hsub - ; CHECK-NEXT: [[CPYi16_:%[0-9]+]]:fpr16 = CPYi16 [[INSERT_SUBREG]], 1 - ; CHECK-NEXT: [[CPYi16_1:%[0-9]+]]:fpr16 = CPYi16 [[INSERT_SUBREG1]], 2 - ; CHECK-NEXT: [[CPYi16_2:%[0-9]+]]:fpr16 = CPYi16 [[INSERT_SUBREG2]], 3 + ; CHECK-NEXT: [[DUPi16_:%[0-9]+]]:fpr16 = DUPi16 [[INSERT_SUBREG]], 1 + ; CHECK-NEXT: [[DUPi16_1:%[0-9]+]]:fpr16 = DUPi16 [[INSERT_SUBREG1]], 2 + ; CHECK-NEXT: [[DUPi16_2:%[0-9]+]]:fpr16 = DUPi16 [[INSERT_SUBREG2]], 3 ; CHECK-NEXT: [[DEF3:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF3]], [[COPY1]], %subreg.hsub ; CHECK-NEXT: [[DEF4:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT_SUBREG4:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF4]], [[CPYi16_]], %subreg.hsub + ; CHECK-NEXT: [[INSERT_SUBREG4:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF4]], [[DUPi16_]], %subreg.hsub ; CHECK-NEXT: [[INSvi16lane:%[0-9]+]]:fpr128 = INSvi16lane [[INSERT_SUBREG3]], 1, [[INSERT_SUBREG4]], 0 ; CHECK-NEXT: [[DEF5:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT_SUBREG5:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF5]], [[CPYi16_1]], %subreg.hsub + ; CHECK-NEXT: [[INSERT_SUBREG5:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF5]], [[DUPi16_1]], %subreg.hsub ; CHECK-NEXT: [[INSvi16lane1:%[0-9]+]]:fpr128 = INSvi16lane [[INSvi16lane]], 2, [[INSERT_SUBREG5]], 0 ; CHECK-NEXT: [[DEF6:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT_SUBREG6:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF6]], [[CPYi16_2]], %subreg.hsub + ; CHECK-NEXT: [[INSERT_SUBREG6:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF6]], [[DUPi16_2]], %subreg.hsub ; CHECK-NEXT: [[INSvi16lane2:%[0-9]+]]:fpr128 = INSvi16lane [[INSvi16lane1]], 3, [[INSERT_SUBREG6]], 0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[INSvi16lane2]].dsub ; CHECK-NEXT: $d0 = COPY [[COPY2]] @@ -210,35 +210,35 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY [[COPY]].hsub - ; CHECK-NEXT: [[CPYi16_:%[0-9]+]]:fpr16 = CPYi16 [[COPY]], 1 - ; CHECK-NEXT: [[CPYi16_1:%[0-9]+]]:fpr16 = CPYi16 [[COPY]], 2 - ; CHECK-NEXT: [[CPYi16_2:%[0-9]+]]:fpr16 = CPYi16 [[COPY]], 3 - ; CHECK-NEXT: [[CPYi16_3:%[0-9]+]]:fpr16 = CPYi16 [[COPY]], 4 - ; CHECK-NEXT: [[CPYi16_4:%[0-9]+]]:fpr16 = CPYi16 [[COPY]], 5 - ; CHECK-NEXT: [[CPYi16_5:%[0-9]+]]:fpr16 = CPYi16 [[COPY]], 6 - ; CHECK-NEXT: [[CPYi16_6:%[0-9]+]]:fpr16 = CPYi16 [[COPY]], 7 + ; CHECK-NEXT: [[DUPi16_:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 1 + ; CHECK-NEXT: [[DUPi16_1:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 2 + ; CHECK-NEXT: [[DUPi16_2:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 3 + ; CHECK-NEXT: [[DUPi16_3:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 4 + ; CHECK-NEXT: [[DUPi16_4:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 5 + ; CHECK-NEXT: [[DUPi16_5:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 6 + ; CHECK-NEXT: [[DUPi16_6:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.hsub ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[CPYi16_]], %subreg.hsub + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[DUPi16_]], %subreg.hsub ; CHECK-NEXT: [[INSvi16lane:%[0-9]+]]:fpr128 = INSvi16lane [[INSERT_SUBREG]], 1, [[INSERT_SUBREG1]], 0 ; CHECK-NEXT: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[CPYi16_1]], %subreg.hsub + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[DUPi16_1]], %subreg.hsub ; CHECK-NEXT: [[INSvi16lane1:%[0-9]+]]:fpr128 = INSvi16lane [[INSvi16lane]], 2, [[INSERT_SUBREG2]], 0 ; CHECK-NEXT: [[DEF3:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF3]], [[CPYi16_2]], %subreg.hsub + ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF3]], [[DUPi16_2]], %subreg.hsub ; CHECK-NEXT: [[INSvi16lane2:%[0-9]+]]:fpr128 = INSvi16lane [[INSvi16lane1]], 3, [[INSERT_SUBREG3]], 0 ; CHECK-NEXT: [[DEF4:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT_SUBREG4:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF4]], [[CPYi16_3]], %subreg.hsub + ; CHECK-NEXT: [[INSERT_SUBREG4:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF4]], [[DUPi16_3]], %subreg.hsub ; CHECK-NEXT: [[INSvi16lane3:%[0-9]+]]:fpr128 = INSvi16lane [[INSvi16lane2]], 4, [[INSERT_SUBREG4]], 0 ; CHECK-NEXT: [[DEF5:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT_SUBREG5:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF5]], [[CPYi16_4]], %subreg.hsub + ; CHECK-NEXT: [[INSERT_SUBREG5:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF5]], [[DUPi16_4]], %subreg.hsub ; CHECK-NEXT: [[INSvi16lane4:%[0-9]+]]:fpr128 = INSvi16lane [[INSvi16lane3]], 5, [[INSERT_SUBREG5]], 0 ; CHECK-NEXT: [[DEF6:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT_SUBREG6:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF6]], [[CPYi16_5]], %subreg.hsub + ; CHECK-NEXT: [[INSERT_SUBREG6:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF6]], [[DUPi16_5]], %subreg.hsub ; CHECK-NEXT: [[INSvi16lane5:%[0-9]+]]:fpr128 = INSvi16lane [[INSvi16lane4]], 6, [[INSERT_SUBREG6]], 0 ; CHECK-NEXT: [[DEF7:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT_SUBREG7:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF7]], [[CPYi16_6]], %subreg.hsub + ; CHECK-NEXT: [[INSERT_SUBREG7:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF7]], [[DUPi16_6]], %subreg.hsub ; CHECK-NEXT: [[INSvi16lane6:%[0-9]+]]:fpr128 = INSvi16lane [[INSvi16lane5]], 7, [[INSERT_SUBREG7]], 0 ; CHECK-NEXT: $q0 = COPY [[INSvi16lane6]] ; CHECK-NEXT: RET_ReallyLR implicit $q0 @@ -279,21 +279,21 @@ body: | ; CHECK-NEXT: [[DEF6:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG6:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF6]], [[COPY]], %subreg.dsub ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr8 = COPY [[INSERT_SUBREG]].bsub - ; CHECK-NEXT: [[CPYi8_:%[0-9]+]]:fpr8 = CPYi8 [[INSERT_SUBREG]], 1 - ; CHECK-NEXT: [[CPYi8_1:%[0-9]+]]:fpr8 = CPYi8 [[INSERT_SUBREG1]], 2 - ; CHECK-NEXT: [[CPYi8_2:%[0-9]+]]:fpr8 = CPYi8 [[INSERT_SUBREG2]], 3 - ; CHECK-NEXT: [[CPYi8_3:%[0-9]+]]:fpr8 = CPYi8 [[INSERT_SUBREG3]], 4 - ; CHECK-NEXT: [[CPYi8_4:%[0-9]+]]:fpr8 = CPYi8 [[INSERT_SUBREG4]], 5 - ; CHECK-NEXT: [[CPYi8_5:%[0-9]+]]:fpr8 = CPYi8 [[INSERT_SUBREG5]], 6 - ; CHECK-NEXT: [[CPYi8_6:%[0-9]+]]:fpr8 = CPYi8 [[INSERT_SUBREG6]], 7 + ; CHECK-NEXT: [[DUPi8_:%[0-9]+]]:fpr8 = DUPi8 [[INSERT_SUBREG]], 1 + ; CHECK-NEXT: [[DUPi8_1:%[0-9]+]]:fpr8 = DUPi8 [[INSERT_SUBREG1]], 2 + ; CHECK-NEXT: [[DUPi8_2:%[0-9]+]]:fpr8 = DUPi8 [[INSERT_SUBREG2]], 3 + ; CHECK-NEXT: [[DUPi8_3:%[0-9]+]]:fpr8 = DUPi8 [[INSERT_SUBREG3]], 4 + ; CHECK-NEXT: [[DUPi8_4:%[0-9]+]]:fpr8 = DUPi8 [[INSERT_SUBREG4]], 5 + ; CHECK-NEXT: [[DUPi8_5:%[0-9]+]]:fpr8 = DUPi8 [[INSERT_SUBREG5]], 6 + ; CHECK-NEXT: [[DUPi8_6:%[0-9]+]]:fpr8 = DUPi8 [[INSERT_SUBREG6]], 7 ; CHECK-NEXT: $b0 = COPY [[COPY1]] - ; CHECK-NEXT: $b1 = COPY [[CPYi8_]] - ; CHECK-NEXT: $b2 = COPY [[CPYi8_1]] - ; CHECK-NEXT: $b3 = COPY [[CPYi8_2]] - ; CHECK-NEXT: $b4 = COPY [[CPYi8_3]] - ; CHECK-NEXT: $b5 = COPY [[CPYi8_4]] - ; CHECK-NEXT: $b6 = COPY [[CPYi8_5]] - ; CHECK-NEXT: $b7 = COPY [[CPYi8_6]] + ; CHECK-NEXT: $b1 = COPY [[DUPi8_]] + ; CHECK-NEXT: $b2 = COPY [[DUPi8_1]] + ; CHECK-NEXT: $b3 = COPY [[DUPi8_2]] + ; CHECK-NEXT: $b4 = COPY [[DUPi8_3]] + ; CHECK-NEXT: $b5 = COPY [[DUPi8_4]] + ; CHECK-NEXT: $b6 = COPY [[DUPi8_5]] + ; CHECK-NEXT: $b7 = COPY [[DUPi8_6]] ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:fpr(<8 x s8>) = COPY $d0 %2:fpr(s8), %3:fpr(s8), %4:fpr(s8), %5:fpr(s8), %6:fpr(s8), %7:fpr(s8), %8:fpr(s8), %9:fpr(s8) = G_UNMERGE_VALUES %0(<8 x s8>) @@ -321,9 +321,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]].dsub - ; CHECK-NEXT: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[COPY]], 1 + ; CHECK-NEXT: [[DUPi64_:%[0-9]+]]:fpr64 = DUPi64 [[COPY]], 1 ; CHECK-NEXT: $d0 = COPY [[COPY1]] - ; CHECK-NEXT: $d1 = COPY [[CPYi64_]] + ; CHECK-NEXT: $d1 = COPY [[DUPi64_]] ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:fpr(<4 x s32>) = COPY $q0 %1:fpr(<2 x s32>), %2:fpr(<2 x s32>) = G_UNMERGE_VALUES %0(<4 x s32>) @@ -347,9 +347,9 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub - ; CHECK-NEXT: [[CPYi32_:%[0-9]+]]:fpr32 = CPYi32 [[INSERT_SUBREG]], 1 + ; CHECK-NEXT: [[DUPi32_:%[0-9]+]]:fpr32 = DUPi32 [[INSERT_SUBREG]], 1 ; CHECK-NEXT: $s0 = COPY [[COPY1]] - ; CHECK-NEXT: $s1 = COPY [[CPYi32_]] + ; CHECK-NEXT: $s1 = COPY [[DUPi32_]] ; CHECK-NEXT: RET_ReallyLR implicit $s0 %0:fpr(<4 x s16>) = COPY $d0 %1:fpr(<2 x s16>), %2:fpr(<2 x s16>) = G_UNMERGE_VALUES %0(<4 x s16>) @@ -371,9 +371,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]].dsub - ; CHECK-NEXT: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[COPY]], 1 + ; CHECK-NEXT: [[DUPi64_:%[0-9]+]]:fpr64 = DUPi64 [[COPY]], 1 ; CHECK-NEXT: $d0 = COPY [[COPY1]] - ; CHECK-NEXT: $d1 = COPY [[CPYi64_]] + ; CHECK-NEXT: $d1 = COPY [[DUPi64_]] ; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1 %0:fpr(s128) = COPY $q0 %1:fpr(s64), %2:fpr(s64) = G_UNMERGE_VALUES %0(s128) From a881215821e9cf27dbf4ccb89c047ecccd0c30e6 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 5 Jan 2022 20:03:02 +0000 Subject: [PATCH 726/992] [gn build] Port 68ac7b17016e --- llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn index e32b4b4e1182..45aac8741f6b 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -99,6 +99,7 @@ static_library("CodeGen") { "MIRSampleProfile.cpp", "MIRVRegNamerUtils.cpp", "MIRYamlMapping.cpp", + "MLRegallocEvictAdvisor.cpp", "MachineBasicBlock.cpp", "MachineBlockFrequencyInfo.cpp", "MachineBlockPlacement.cpp", From dd83befe33a055c9053f16ff44f4639c6ea15f4a Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 5 Jan 2022 12:30:13 -0800 Subject: [PATCH 727/992] [SLP][NFC]Improved isAltShuffle by comparing instructions instead of opcodes, NFC. NFC part of D115955. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1dd31dcae253..803dabc5e07b 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -435,7 +435,7 @@ struct InstructionsState { } /// Some of the instructions in the list have alternate opcodes. - bool isAltShuffle() const { return getOpcode() != getAltOpcode(); } + bool isAltShuffle() const { return AltOp != MainOp; } bool isOpcodeOrAlt(Instruction *I) const { unsigned CheckedOpcode = I->getOpcode(); @@ -2019,9 +2019,7 @@ class BoUpSLP { } /// Some of the instructions in the list have alternate opcodes. - bool isAltShuffle() const { - return getOpcode() != getAltOpcode(); - } + bool isAltShuffle() const { return MainOp != AltOp; } bool isOpcodeOrAlt(Instruction *I) const { unsigned CheckedOpcode = I->getOpcode(); From 4ca5e95c6f4e8f48cd8315d801c186f4e7cae69c Mon Sep 17 00:00:00 2001 From: Mogball Date: Mon, 20 Dec 2021 16:48:00 +0000 Subject: [PATCH 728/992] [mlir] Symbol DCE ignores unknown symbols Instead of failing when it encounters a reference to an unknown symbol, Symbol DCE should ignore them. References to unknown symbols do not affect the overall function of Symbol DCE, so it should not need to fail when it encounters one. In general, requiring that symbol references always be valid rather than only when necessary can be overly conservative. Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D116047 --- mlir/lib/Transforms/SymbolDCE.cpp | 8 +++----- mlir/test/Transforms/test-symbol-dce.mlir | 12 +++++++++++- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Transforms/SymbolDCE.cpp b/mlir/lib/Transforms/SymbolDCE.cpp index 0319cb95d77d..116923909959 100644 --- a/mlir/lib/Transforms/SymbolDCE.cpp +++ b/mlir/lib/Transforms/SymbolDCE.cpp @@ -124,11 +124,9 @@ LogicalResult SymbolDCE::computeLiveness(Operation *symbolTableOp, // Lookup the symbols referenced by this use. resolvedSymbols.clear(); if (failed(symbolTable.lookupSymbolIn( - op->getParentOp(), use.getSymbolRef(), resolvedSymbols))) { - return use.getUser()->emitError() - << "unable to resolve reference to symbol " - << use.getSymbolRef(); - } + op->getParentOp(), use.getSymbolRef(), resolvedSymbols))) + // Ignore references to unknown symbols. + continue; // Mark each of the resolved symbols as live. for (Operation *resolvedSymbol : resolvedSymbols) diff --git a/mlir/test/Transforms/test-symbol-dce.mlir b/mlir/test/Transforms/test-symbol-dce.mlir index 181352234bdd..a342cdcb6d1c 100644 --- a/mlir/test/Transforms/test-symbol-dce.mlir +++ b/mlir/test/Transforms/test-symbol-dce.mlir @@ -84,7 +84,17 @@ module { // ----- +// Check that unknown symbol references are OK. module { - // expected-error@+1 {{unable to resolve reference to symbol}} + // CHECK-NOT: func private @dead_private_function + func private @dead_private_function() + + // CHECK: func private @live_private_function + func private @live_private_function() + + // CHECK: "live.user"() {uses = [@live_private_function]} : () -> () + "live.user"() {uses = [@live_private_function]} : () -> () + + // CHECK: "live.user"() {uses = [@unknown_symbol]} : () -> () "live.user"() {uses = [@unknown_symbol]} : () -> () } From 7e08a1208889756bb7c44121f63a1b32f6c87ea5 Mon Sep 17 00:00:00 2001 From: Collin Baker Date: Wed, 5 Jan 2022 15:58:53 -0500 Subject: [PATCH 729/992] [clang] Fall back on Android triple w/o API level for runtimes search Clang searches for runtimes (e.g. libclang_rt*) first in a subdirectory named for the target triple (corresponding to LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON), then if it's not found uses .../lib//libclang_rt* with a suffix corresponding to the arch and environment name. Android triples optionally include an API level indicating the minimum Android version to be run on (e.g. aarch64-unknown-linux-android21). When compiler-rt is built with LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON this API level is part of the output path. Linking code built for a later API level against a runtime built for an earlier one is safe. In projects with several API level targets this is desireable to avoid re-building the same runtimes many times. This is difficult with the current runtime search method: if the API levels don't exactly match Clang gives up on the per-target runtime directory path. To enable this more simply, this change tries target triple without the API level before falling back on the old layout. Another option would be to try every API level in the triple, e.g. check aarch-64-unknown-linux-android21, then ...20, then ...19, etc. Differential Revision: https://reviews.llvm.org/D115049 --- clang/include/clang/Driver/ToolChain.h | 8 +-- clang/lib/Driver/Driver.cpp | 13 +++-- clang/lib/Driver/ToolChain.cpp | 50 +++++++++++++------ clang/lib/Driver/ToolChains/Fuchsia.cpp | 8 +-- clang/lib/Driver/ToolChains/VEToolchain.cpp | 3 +- .../libclang_rt.builtins.a | 0 .../libclang_rt.builtins.a | 0 .../Driver/linux-per-target-runtime-dir.c | 18 +++++++ 8 files changed, 73 insertions(+), 27 deletions(-) create mode 100644 clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-unknown-linux-android/libclang_rt.builtins.a create mode 100644 clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-unknown-linux-android21/libclang_rt.builtins.a diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index 4afc9bf36b5f..eb95806a2f75 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -452,11 +452,11 @@ class ToolChain { StringRef Component, FileType Type = ToolChain::FT_Static) const; - // Returns target specific runtime path if it exists. - virtual std::string getRuntimePath() const; + // Returns target specific runtime paths. + path_list getRuntimePaths() const; - // Returns target specific standard library path if it exists. - virtual std::string getStdlibPath() const; + // Returns target specific standard library paths. + path_list getStdlibPaths() const; // Returns /lib//. This is used by runtimes (such // as OpenMP) to find arch-specific libraries. diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 4ac48cc28016..bb7ccf7dd97e 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1869,9 +1869,16 @@ bool Driver::HandleImmediateArgs(const Compilation &C) { } if (C.getArgs().hasArg(options::OPT_print_runtime_dir)) { - std::string CandidateRuntimePath = TC.getRuntimePath(); - if (getVFS().exists(CandidateRuntimePath)) - llvm::outs() << CandidateRuntimePath << '\n'; + std::string RuntimePath; + // Get the first existing path, if any. + for (auto Path : TC.getRuntimePaths()) { + if (getVFS().exists(Path)) { + RuntimePath = Path; + break; + } + } + if (!RuntimePath.empty()) + llvm::outs() << RuntimePath << '\n'; else llvm::outs() << TC.getCompilerRTPath() << '\n'; return false; diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 50c89aaadc18..7551ee4aeb79 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -75,17 +75,16 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T, const ArgList &Args) : D(D), Triple(T), Args(Args), CachedRTTIArg(GetRTTIArgument(Args)), CachedRTTIMode(CalculateRTTIMode(Args, Triple, CachedRTTIArg)) { - std::string RuntimePath = getRuntimePath(); - if (getVFS().exists(RuntimePath)) - getLibraryPaths().push_back(RuntimePath); - - std::string StdlibPath = getStdlibPath(); - if (getVFS().exists(StdlibPath)) - getFilePaths().push_back(StdlibPath); + auto addIfExists = [this](path_list &List, const std::string &Path) { + if (getVFS().exists(Path)) + List.push_back(Path); + }; - std::string CandidateLibPath = getArchSpecificLibPath(); - if (getVFS().exists(CandidateLibPath)) - getFilePaths().push_back(CandidateLibPath); + for (const auto &Path : getRuntimePaths()) + addIfExists(getLibraryPaths(), Path); + for (const auto &Path : getStdlibPaths()) + addIfExists(getFilePaths(), Path); + addIfExists(getFilePaths(), getArchSpecificLibPath()); } void ToolChain::setTripleEnvironment(llvm::Triple::EnvironmentType Env) { @@ -485,16 +484,35 @@ const char *ToolChain::getCompilerRTArgString(const llvm::opt::ArgList &Args, return Args.MakeArgString(getCompilerRT(Args, Component, Type)); } -std::string ToolChain::getRuntimePath() const { - SmallString<128> P(D.ResourceDir); - llvm::sys::path::append(P, "lib", getTripleString()); - return std::string(P.str()); +ToolChain::path_list ToolChain::getRuntimePaths() const { + path_list Paths; + auto addPathForTriple = [this, &Paths](const llvm::Triple &Triple) { + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "lib", Triple.str()); + Paths.push_back(std::string(P.str())); + }; + + addPathForTriple(getTriple()); + + // Android targets may include an API level at the end. We still want to fall + // back on a path without the API level. + if (getTriple().isAndroid() && + getTriple().getEnvironmentName() != "android") { + llvm::Triple TripleWithoutLevel = getTriple(); + TripleWithoutLevel.setEnvironmentName("android"); + addPathForTriple(TripleWithoutLevel); + } + + return Paths; } -std::string ToolChain::getStdlibPath() const { +ToolChain::path_list ToolChain::getStdlibPaths() const { + path_list Paths; SmallString<128> P(D.Dir); llvm::sys::path::append(P, "..", "lib", getTripleString()); - return std::string(P.str()); + Paths.push_back(std::string(P.str())); + + return Paths; } std::string ToolChain::getArchSpecificLibPath() const { diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp index a7afec6963a1..0dbe97966801 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -191,9 +191,11 @@ Fuchsia::Fuchsia(const Driver &D, const llvm::Triple &Triple, auto FilePaths = [&](const Multilib &M) -> std::vector { std::vector FP; - SmallString<128> P(getStdlibPath()); - llvm::sys::path::append(P, M.gccSuffix()); - FP.push_back(std::string(P.str())); + for (const std::string &Path : getStdlibPaths()) { + SmallString<128> P(Path); + llvm::sys::path::append(P, M.gccSuffix()); + FP.push_back(std::string(P.str())); + } return FP; }; diff --git a/clang/lib/Driver/ToolChains/VEToolchain.cpp b/clang/lib/Driver/ToolChains/VEToolchain.cpp index 4cdeec7f9d8a..1e43796be1ff 100644 --- a/clang/lib/Driver/ToolChains/VEToolchain.cpp +++ b/clang/lib/Driver/ToolChains/VEToolchain.cpp @@ -48,7 +48,8 @@ VEToolChain::VEToolChain(const Driver &D, const llvm::Triple &Triple, // ${BINPATH}/../lib/ve-unknown-linux-gnu, (== getStdlibPath) // ${RESOURCEDIR}/lib/linux/ve, (== getArchSpecificLibPath) // ${SYSROOT}/opt/nec/ve/lib, - getFilePaths().push_back(getStdlibPath()); + for (auto &Path : getStdlibPaths()) + getFilePaths().push_back(std::move(Path)); getFilePaths().push_back(getArchSpecificLibPath()); getFilePaths().push_back(computeSysRoot() + "/opt/nec/ve/lib"); } diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-unknown-linux-android/libclang_rt.builtins.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-unknown-linux-android/libclang_rt.builtins.a new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-unknown-linux-android21/libclang_rt.builtins.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-unknown-linux-android21/libclang_rt.builtins.a new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/test/Driver/linux-per-target-runtime-dir.c b/clang/test/Driver/linux-per-target-runtime-dir.c index 5fbc7163132a..9b23774c5bb6 100644 --- a/clang/test/Driver/linux-per-target-runtime-dir.c +++ b/clang/test/Driver/linux-per-target-runtime-dir.c @@ -25,3 +25,21 @@ // RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ // RUN: | FileCheck --check-prefix=CHECK-FILE-NAME-X8664 %s // CHECK-FILE-NAME-X8664: lib{{/|\\}}x86_64-unknown-linux-gnu{{/|\\}}libclang_rt.builtins.a + +// RUN: %clang -rtlib=compiler-rt -print-file-name=libclang_rt.builtins.a 2>&1 \ +// RUN: --target=aarch64-unknown-linux-android21 \ +// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ +// RUN: | FileCheck --check-prefix=CHECK-FILE-NAME-ANDROID21 %s +// CHECK-FILE-NAME-ANDROID21: lib{{/|\\}}aarch64-unknown-linux-android21{{/|\\}}libclang_rt.builtins.a + +// RUN: %clang -rtlib=compiler-rt -print-file-name=libclang_rt.builtins.a 2>&1 \ +// RUN: --target=aarch64-unknown-linux-android23 \ +// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ +// RUN: | FileCheck --check-prefix=CHECK-FILE-NAME-ANDROID23 %s +// CHECK-FILE-NAME-ANDROID23: lib{{/|\\}}aarch64-unknown-linux-android{{/|\\}}libclang_rt.builtins.a + +// RUN: %clang -rtlib=compiler-rt -print-file-name=libclang_rt.builtins.a 2>&1 \ +// RUN: --target=aarch64-unknown-linux-android \ +// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ +// RUN: | FileCheck --check-prefix=CHECK-FILE-NAME-ANDROID %s +// CHECK-FILE-NAME-ANDROID: lib{{/|\\}}aarch64-unknown-linux-android{{/|\\}}libclang_rt.builtins.a From 4d06565bd81c7f3ce11f975b765b045a3f84696c Mon Sep 17 00:00:00 2001 From: Kevin Athey Date: Thu, 30 Dec 2021 16:40:00 -0800 Subject: [PATCH 730/992] Initialize SaveInfo in methods Output::preflightKey and Output::preflightElement. When enabling MSAN eager mode with noundef analysis these variables were found to not be initialized in unit tests. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D116428 --- llvm/lib/Support/YAMLTraits.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp index aa6163a76161..10ff020ad972 100644 --- a/llvm/lib/Support/YAMLTraits.cpp +++ b/llvm/lib/Support/YAMLTraits.cpp @@ -527,8 +527,9 @@ std::vector Output::keys() { } bool Output::preflightKey(const char *Key, bool Required, bool SameAsDefault, - bool &UseDefault, void *&) { + bool &UseDefault, void *&SaveInfo) { UseDefault = false; + SaveInfo = nullptr; if (Required || !SameAsDefault || WriteDefaultValues) { auto State = StateStack.back(); if (State == inFlowMapFirstKey || State == inFlowMapOtherKey) { @@ -599,7 +600,8 @@ void Output::endSequence() { StateStack.pop_back(); } -bool Output::preflightElement(unsigned, void *&) { +bool Output::preflightElement(unsigned, void *&SaveInfo) { + SaveInfo = nullptr; return true; } From 04496201e081884a3633fd5f7a517cb408124717 Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Tue, 4 Jan 2022 13:29:59 -0600 Subject: [PATCH 731/992] [PowerPC] Add support for ROP protection for 32 bit. Add support for Return Oriented Programming (ROP) protection for 32 bit. This patch also adds a testing for AIX on both 64 and 32 bit. Reviewed By: amyk Differential Revision: https://reviews.llvm.org/D111362 --- llvm/lib/Target/PowerPC/P10InstrResources.td | 8 +- llvm/lib/Target/PowerPC/P9InstrResources.td | 2 +- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 6 +- llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 25 +- llvm/lib/Target/PowerPC/PPCInstrInfo.td | 24 + .../PowerPC/ppc64-rop-protection-aix.ll | 2843 +++++++++++++++++ .../CodeGen/PowerPC/ppc64-rop-protection.ll | 684 ++++ 7 files changed, 3573 insertions(+), 19 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td index f3ae0010ad8e..68a1c225cb05 100644 --- a/llvm/lib/Target/PowerPC/P10InstrResources.td +++ b/llvm/lib/Target/PowerPC/P10InstrResources.td @@ -409,8 +409,8 @@ def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read], // 13 Cycles Decimal Floating Point operations, and 3 Cycles Store operations, 2 input operands def : InstRW<[P10W_DF_13C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY], (instrs - HASHST, - HASHSTP + HASHST, HASHST8, + HASHSTP, HASHSTP8 )>; // 24 Cycles Decimal Floating Point operations, 1 input operands @@ -1336,8 +1336,8 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read], // 6 Cycles Load operations, and 13 Cycles Decimal Floating Point operations, 2 input operands def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DF_13C, P10W_DISP_ANY], (instrs - HASHCHK, - HASHCHKP + HASHCHK, HASHCHK8, + HASHCHKP, HASHCHKP8 )>; // Single crack instructions diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td index f7c049951c54..c088d7847ce4 100644 --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -1415,7 +1415,7 @@ def : InstRW<[], (instregex "NOP_GT_PWR(6|7)$"), (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"), (instregex "WRTEE(I)?$"), - (instregex "HASH(ST|STP|CHK|CHKP)$"), + (instregex "HASH(ST|STP|CHK|CHKP)(8)?$"), ATTN, CLRBHRB, MFBHRBE, diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 3ca563fee970..1d195528c4c0 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -674,7 +674,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, : PPC::MFCR); const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); const MCInstrDesc &HashST = - TII.get(HasPrivileged ? PPC::HASHSTP : PPC::HASHST); + TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8) + : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST)); // Regarding this assert: Even though LR is saved in the caller's frame (i.e., // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no @@ -1590,7 +1591,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 : PPC::MTOCRF); const MCInstrDesc &HashChk = - TII.get(HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK); + TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8) + : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK)); int64_t LROffset = getReturnSaveOffset(); int64_t FPOffset = 0; diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 58af8037f59c..eae8e36e475e 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1760,26 +1760,27 @@ defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$frD), (ins f8rc:$frB), // These instructions store a hash computed from the value of the link register // and the value of the stack pointer. -let mayStore = 1 in { -def HASHST : XForm_XD6_RA5_RB5<31, 722, (outs), - (ins g8rc:$RB, memrihash:$D_RA_XD), - "hashst $RB, $D_RA_XD", IIC_IntGeneral, []>; -def HASHSTP : XForm_XD6_RA5_RB5<31, 658, (outs), +let mayStore = 1, Interpretation64Bit = 1, isCodeGenOnly = 1 in { +def HASHST8 : XForm_XD6_RA5_RB5<31, 722, (outs), (ins g8rc:$RB, memrihash:$D_RA_XD), - "hashstp $RB, $D_RA_XD", IIC_IntGeneral, []>; + "hashst $RB, $D_RA_XD", IIC_IntGeneral, []>; +def HASHSTP8 : XForm_XD6_RA5_RB5<31, 658, (outs), + (ins g8rc:$RB, memrihash:$D_RA_XD), + "hashstp $RB, $D_RA_XD", IIC_IntGeneral, []>; } // These instructions check a hash computed from the value of the link register // and the value of the stack pointer. The hasSideEffects flag is needed as the // instruction may TRAP if the hash does not match the hash stored at the // specified address. -let mayLoad = 1, hasSideEffects = 1 in { -def HASHCHK : XForm_XD6_RA5_RB5<31, 754, (outs), - (ins g8rc:$RB, memrihash:$D_RA_XD), - "hashchk $RB, $D_RA_XD", IIC_IntGeneral, []>; -def HASHCHKP : XForm_XD6_RA5_RB5<31, 690, (outs), +let mayLoad = 1, hasSideEffects = 1, + Interpretation64Bit = 1, isCodeGenOnly = 1 in { +def HASHCHK8 : XForm_XD6_RA5_RB5<31, 754, (outs), (ins g8rc:$RB, memrihash:$D_RA_XD), - "hashchkp $RB, $D_RA_XD", IIC_IntGeneral, []>; + "hashchk $RB, $D_RA_XD", IIC_IntGeneral, []>; +def HASHCHKP8 : XForm_XD6_RA5_RB5<31, 690, (outs), + (ins g8rc:$RB, memrihash:$D_RA_XD), + "hashchkp $RB, $D_RA_XD", IIC_IntGeneral, []>; } let Interpretation64Bit = 1, isCodeGenOnly = 1, hasSideEffects = 1 in diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 2340be5b5915..c26b4f6ceb7d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -5530,6 +5530,30 @@ def DWBytes3210 { (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), Word, sub_32)); } +// These instructions store a hash computed from the value of the link register +// and the value of the stack pointer. +let mayStore = 1 in { +def HASHST : XForm_XD6_RA5_RB5<31, 722, (outs), + (ins gprc:$RB, memrihash:$D_RA_XD), + "hashst $RB, $D_RA_XD", IIC_IntGeneral, []>; +def HASHSTP : XForm_XD6_RA5_RB5<31, 658, (outs), + (ins gprc:$RB, memrihash:$D_RA_XD), + "hashstp $RB, $D_RA_XD", IIC_IntGeneral, []>; +} + +// These instructions check a hash computed from the value of the link register +// and the value of the stack pointer. The hasSideEffects flag is needed as the +// instruction may TRAP if the hash does not match the hash stored at the +// specified address. +let mayLoad = 1, hasSideEffects = 1 in { +def HASHCHK : XForm_XD6_RA5_RB5<31, 754, (outs), + (ins gprc:$RB, memrihash:$D_RA_XD), + "hashchk $RB, $D_RA_XD", IIC_IntGeneral, []>; +def HASHCHKP : XForm_XD6_RA5_RB5<31, 690, (outs), + (ins gprc:$RB, memrihash:$D_RA_XD), + "hashchkp $RB, $D_RA_XD", IIC_IntGeneral, []>; +} + // Now both high word and low word are reversed, next // swap the high word and low word. def : Pat<(i64 (bitreverse i64:$A)), diff --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll new file mode 100644 index 000000000000..c00d5bcb6208 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll @@ -0,0 +1,2843 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -vec-extabi \ +; RUN: -mattr=+rop-protect < %s | FileCheck %s --check-prefix BE-P10 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -vec-extabi \ +; RUN: -mattr=+rop-protect < %s | FileCheck %s --check-prefix BE-P9 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -vec-extabi \ +; RUN: -mattr=+rop-protect < %s | FileCheck %s --check-prefix BE-P8 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -vec-extabi \ +; RUN: -mattr=+rop-protect < %s | FileCheck %s --check-prefix BE-32BIT-P10 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -vec-extabi \ +; RUN: -mattr=+rop-protect < %s | FileCheck %s --check-prefix BE-32BIT-P9 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff \ +; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -vec-extabi \ +; RUN: -mattr=+rop-protect < %s | FileCheck %s --check-prefix BE-32BIT-P8 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -vec-extabi \ +; RUN: -mattr=+rop-protect -mattr=+privileged < %s | FileCheck %s --check-prefix BE-P10-PRIV +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -vec-extabi \ +; RUN: -mattr=+rop-protect -mattr=+privileged < %s | FileCheck %s --check-prefix BE-P9-PRIV +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -vec-extabi \ +; RUN: -mattr=+rop-protect -mattr=+privileged < %s | FileCheck %s --check-prefix BE-P8-PRIV +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -vec-extabi \ +; RUN: -mattr=+rop-protect -mattr=+privileged < %s | FileCheck %s --check-prefix BE-32BIT-P10-PRIV +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -vec-extabi \ +; RUN: -mattr=+rop-protect -mattr=+privileged < %s | FileCheck %s --check-prefix BE-32BIT-P9-PRIV +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff \ +; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -vec-extabi \ +; RUN: -mattr=+rop-protect -mattr=+privileged < %s | FileCheck %s --check-prefix BE-32BIT-P8-PRIV + + + +;; This test checks that the ROP protect instructions have been correctly +;; added when the ROP protect option has been specified. The hashst +;; instruction should be added to the prologue and the hashchk should be added +;; to the epilogue. + +define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0 { +; BE-P10-LABEL: caller: +; BE-P10: # %bb.0: # %entry +; BE-P10-NEXT: mflr r0 +; BE-P10-NEXT: std r0, 16(r1) +; BE-P10-NEXT: hashst r0, -16(r1) +; BE-P10-NEXT: stdu r1, -128(r1) +; BE-P10-NEXT: std r31, 120(r1) # 8-byte Folded Spill +; BE-P10-NEXT: mr r31, r4 +; BE-P10-NEXT: bl .callee[PR] +; BE-P10-NEXT: nop +; BE-P10-NEXT: add r3, r3, r31 +; BE-P10-NEXT: ld r31, 120(r1) # 8-byte Folded Reload +; BE-P10-NEXT: clrldi r3, r3, 32 +; BE-P10-NEXT: addi r1, r1, 128 +; BE-P10-NEXT: ld r0, 16(r1) +; BE-P10-NEXT: hashchk r0, -16(r1) +; BE-P10-NEXT: mtlr r0 +; BE-P10-NEXT: blr +; +; BE-P9-LABEL: caller: +; BE-P9: # %bb.0: # %entry +; BE-P9-NEXT: mflr r0 +; BE-P9-NEXT: std r0, 16(r1) +; BE-P9-NEXT: hashst r0, -16(r1) +; BE-P9-NEXT: stdu r1, -128(r1) +; BE-P9-NEXT: std r31, 120(r1) # 8-byte Folded Spill +; BE-P9-NEXT: mr r31, r4 +; BE-P9-NEXT: bl .callee[PR] +; BE-P9-NEXT: nop +; BE-P9-NEXT: add r3, r3, r31 +; BE-P9-NEXT: ld r31, 120(r1) # 8-byte Folded Reload +; BE-P9-NEXT: clrldi r3, r3, 32 +; BE-P9-NEXT: addi r1, r1, 128 +; BE-P9-NEXT: ld r0, 16(r1) +; BE-P9-NEXT: mtlr r0 +; BE-P9-NEXT: hashchk r0, -16(r1) +; BE-P9-NEXT: blr +; +; BE-P8-LABEL: caller: +; BE-P8: # %bb.0: # %entry +; BE-P8-NEXT: mflr r0 +; BE-P8-NEXT: std r0, 16(r1) +; BE-P8-NEXT: hashst r0, -16(r1) +; BE-P8-NEXT: stdu r1, -128(r1) +; BE-P8-NEXT: std r31, 120(r1) # 8-byte Folded Spill +; BE-P8-NEXT: mr r31, r4 +; BE-P8-NEXT: bl .callee[PR] +; BE-P8-NEXT: nop +; BE-P8-NEXT: add r3, r3, r31 +; BE-P8-NEXT: ld r31, 120(r1) # 8-byte Folded Reload +; BE-P8-NEXT: clrldi r3, r3, 32 +; BE-P8-NEXT: addi r1, r1, 128 +; BE-P8-NEXT: ld r0, 16(r1) +; BE-P8-NEXT: hashchk r0, -16(r1) +; BE-P8-NEXT: mtlr r0 +; BE-P8-NEXT: blr +; +; BE-32BIT-P10-LABEL: caller: +; BE-32BIT-P10: # %bb.0: # %entry +; BE-32BIT-P10-NEXT: mflr r0 +; BE-32BIT-P10-NEXT: stw r0, 8(r1) +; BE-32BIT-P10-NEXT: hashst r0, -16(r1) +; BE-32BIT-P10-NEXT: stwu r1, -80(r1) +; BE-32BIT-P10-NEXT: stw r31, 76(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: mr r31, r4 +; BE-32BIT-P10-NEXT: bl .callee[PR] +; BE-32BIT-P10-NEXT: nop +; BE-32BIT-P10-NEXT: add r3, r3, r31 +; BE-32BIT-P10-NEXT: lwz r31, 76(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: addi r1, r1, 80 +; BE-32BIT-P10-NEXT: lwz r0, 8(r1) +; BE-32BIT-P10-NEXT: hashchk r0, -16(r1) +; BE-32BIT-P10-NEXT: mtlr r0 +; BE-32BIT-P10-NEXT: blr +; +; BE-32BIT-P9-LABEL: caller: +; BE-32BIT-P9: # %bb.0: # %entry +; BE-32BIT-P9-NEXT: mflr r0 +; BE-32BIT-P9-NEXT: stw r0, 8(r1) +; BE-32BIT-P9-NEXT: hashst r0, -16(r1) +; BE-32BIT-P9-NEXT: stwu r1, -80(r1) +; BE-32BIT-P9-NEXT: stw r31, 76(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: mr r31, r4 +; BE-32BIT-P9-NEXT: bl .callee[PR] +; BE-32BIT-P9-NEXT: nop +; BE-32BIT-P9-NEXT: add r3, r3, r31 +; BE-32BIT-P9-NEXT: lwz r31, 76(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: addi r1, r1, 80 +; BE-32BIT-P9-NEXT: lwz r0, 8(r1) +; BE-32BIT-P9-NEXT: mtlr r0 +; BE-32BIT-P9-NEXT: hashchk r0, -16(r1) +; BE-32BIT-P9-NEXT: blr +; +; BE-32BIT-P8-LABEL: caller: +; BE-32BIT-P8: # %bb.0: # %entry +; BE-32BIT-P8-NEXT: mflr r0 +; BE-32BIT-P8-NEXT: stw r0, 8(r1) +; BE-32BIT-P8-NEXT: hashst r0, -16(r1) +; BE-32BIT-P8-NEXT: stwu r1, -80(r1) +; BE-32BIT-P8-NEXT: stw r31, 76(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: mr r31, r4 +; BE-32BIT-P8-NEXT: bl .callee[PR] +; BE-32BIT-P8-NEXT: nop +; BE-32BIT-P8-NEXT: add r3, r3, r31 +; BE-32BIT-P8-NEXT: lwz r31, 76(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: addi r1, r1, 80 +; BE-32BIT-P8-NEXT: lwz r0, 8(r1) +; BE-32BIT-P8-NEXT: hashchk r0, -16(r1) +; BE-32BIT-P8-NEXT: mtlr r0 +; BE-32BIT-P8-NEXT: blr +; +; BE-P10-PRIV-LABEL: caller: +; BE-P10-PRIV: # %bb.0: # %entry +; BE-P10-PRIV-NEXT: mflr r0 +; BE-P10-PRIV-NEXT: std r0, 16(r1) +; BE-P10-PRIV-NEXT: hashstp r0, -16(r1) +; BE-P10-PRIV-NEXT: stdu r1, -128(r1) +; BE-P10-PRIV-NEXT: std r31, 120(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: mr r31, r4 +; BE-P10-PRIV-NEXT: bl .callee[PR] +; BE-P10-PRIV-NEXT: nop +; BE-P10-PRIV-NEXT: add r3, r3, r31 +; BE-P10-PRIV-NEXT: ld r31, 120(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: clrldi r3, r3, 32 +; BE-P10-PRIV-NEXT: addi r1, r1, 128 +; BE-P10-PRIV-NEXT: ld r0, 16(r1) +; BE-P10-PRIV-NEXT: hashchkp r0, -16(r1) +; BE-P10-PRIV-NEXT: mtlr r0 +; BE-P10-PRIV-NEXT: blr +; +; BE-P9-PRIV-LABEL: caller: +; BE-P9-PRIV: # %bb.0: # %entry +; BE-P9-PRIV-NEXT: mflr r0 +; BE-P9-PRIV-NEXT: std r0, 16(r1) +; BE-P9-PRIV-NEXT: hashstp r0, -16(r1) +; BE-P9-PRIV-NEXT: stdu r1, -128(r1) +; BE-P9-PRIV-NEXT: std r31, 120(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: mr r31, r4 +; BE-P9-PRIV-NEXT: bl .callee[PR] +; BE-P9-PRIV-NEXT: nop +; BE-P9-PRIV-NEXT: add r3, r3, r31 +; BE-P9-PRIV-NEXT: ld r31, 120(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: clrldi r3, r3, 32 +; BE-P9-PRIV-NEXT: addi r1, r1, 128 +; BE-P9-PRIV-NEXT: ld r0, 16(r1) +; BE-P9-PRIV-NEXT: mtlr r0 +; BE-P9-PRIV-NEXT: hashchkp r0, -16(r1) +; BE-P9-PRIV-NEXT: blr +; +; BE-P8-PRIV-LABEL: caller: +; BE-P8-PRIV: # %bb.0: # %entry +; BE-P8-PRIV-NEXT: mflr r0 +; BE-P8-PRIV-NEXT: std r0, 16(r1) +; BE-P8-PRIV-NEXT: hashstp r0, -16(r1) +; BE-P8-PRIV-NEXT: stdu r1, -128(r1) +; BE-P8-PRIV-NEXT: std r31, 120(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: mr r31, r4 +; BE-P8-PRIV-NEXT: bl .callee[PR] +; BE-P8-PRIV-NEXT: nop +; BE-P8-PRIV-NEXT: add r3, r3, r31 +; BE-P8-PRIV-NEXT: ld r31, 120(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: clrldi r3, r3, 32 +; BE-P8-PRIV-NEXT: addi r1, r1, 128 +; BE-P8-PRIV-NEXT: ld r0, 16(r1) +; BE-P8-PRIV-NEXT: hashchkp r0, -16(r1) +; BE-P8-PRIV-NEXT: mtlr r0 +; BE-P8-PRIV-NEXT: blr +; +; BE-32BIT-P10-PRIV-LABEL: caller: +; BE-32BIT-P10-PRIV: # %bb.0: # %entry +; BE-32BIT-P10-PRIV-NEXT: mflr r0 +; BE-32BIT-P10-PRIV-NEXT: stw r0, 8(r1) +; BE-32BIT-P10-PRIV-NEXT: hashstp r0, -16(r1) +; BE-32BIT-P10-PRIV-NEXT: stwu r1, -80(r1) +; BE-32BIT-P10-PRIV-NEXT: stw r31, 76(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: mr r31, r4 +; BE-32BIT-P10-PRIV-NEXT: bl .callee[PR] +; BE-32BIT-P10-PRIV-NEXT: nop +; BE-32BIT-P10-PRIV-NEXT: add r3, r3, r31 +; BE-32BIT-P10-PRIV-NEXT: lwz r31, 76(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: addi r1, r1, 80 +; BE-32BIT-P10-PRIV-NEXT: lwz r0, 8(r1) +; BE-32BIT-P10-PRIV-NEXT: hashchkp r0, -16(r1) +; BE-32BIT-P10-PRIV-NEXT: mtlr r0 +; BE-32BIT-P10-PRIV-NEXT: blr +; +; BE-32BIT-P9-PRIV-LABEL: caller: +; BE-32BIT-P9-PRIV: # %bb.0: # %entry +; BE-32BIT-P9-PRIV-NEXT: mflr r0 +; BE-32BIT-P9-PRIV-NEXT: stw r0, 8(r1) +; BE-32BIT-P9-PRIV-NEXT: hashstp r0, -16(r1) +; BE-32BIT-P9-PRIV-NEXT: stwu r1, -80(r1) +; BE-32BIT-P9-PRIV-NEXT: stw r31, 76(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: mr r31, r4 +; BE-32BIT-P9-PRIV-NEXT: bl .callee[PR] +; BE-32BIT-P9-PRIV-NEXT: nop +; BE-32BIT-P9-PRIV-NEXT: add r3, r3, r31 +; BE-32BIT-P9-PRIV-NEXT: lwz r31, 76(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: addi r1, r1, 80 +; BE-32BIT-P9-PRIV-NEXT: lwz r0, 8(r1) +; BE-32BIT-P9-PRIV-NEXT: mtlr r0 +; BE-32BIT-P9-PRIV-NEXT: hashchkp r0, -16(r1) +; BE-32BIT-P9-PRIV-NEXT: blr +; +; BE-32BIT-P8-PRIV-LABEL: caller: +; BE-32BIT-P8-PRIV: # %bb.0: # %entry +; BE-32BIT-P8-PRIV-NEXT: mflr r0 +; BE-32BIT-P8-PRIV-NEXT: stw r0, 8(r1) +; BE-32BIT-P8-PRIV-NEXT: hashstp r0, -16(r1) +; BE-32BIT-P8-PRIV-NEXT: stwu r1, -80(r1) +; BE-32BIT-P8-PRIV-NEXT: stw r31, 76(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: mr r31, r4 +; BE-32BIT-P8-PRIV-NEXT: bl .callee[PR] +; BE-32BIT-P8-PRIV-NEXT: nop +; BE-32BIT-P8-PRIV-NEXT: add r3, r3, r31 +; BE-32BIT-P8-PRIV-NEXT: lwz r31, 76(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: addi r1, r1, 80 +; BE-32BIT-P8-PRIV-NEXT: lwz r0, 8(r1) +; BE-32BIT-P8-PRIV-NEXT: hashchkp r0, -16(r1) +; BE-32BIT-P8-PRIV-NEXT: mtlr r0 +; BE-32BIT-P8-PRIV-NEXT: blr +entry: + %call = tail call zeroext i32 @callee(i32 zeroext %in) + %add = add i32 %call, %add_after + ret i32 %add +} + +;; TODO: Remove this comment once the new version of the ABI is available. +;; NOTE: This test is technically a violation of the ABI. The hash is saved +;; outside of the initial 288 byte volatile program storage region in the +;; Protected Zone. However, this restriction will be removed in an upcoming +;; revision of the ABI. +define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 { +; BE-P10-LABEL: spill: +; BE-P10: # %bb.0: # %entry +; BE-P10-NEXT: mflr r0 +; BE-P10-NEXT: mfcr r12 +; BE-P10-NEXT: std r0, 16(r1) +; BE-P10-NEXT: hashst r0, -488(r1) +; BE-P10-NEXT: stw r12, 8(r1) +; BE-P10-NEXT: stdu r1, -624(r1) +; BE-P10-NEXT: lwz r4, 12(r3) +; BE-P10-NEXT: std r14, 336(r1) # 8-byte Folded Spill +; BE-P10-NEXT: std r15, 344(r1) # 8-byte Folded Spill +; BE-P10-NEXT: std r16, 352(r1) # 8-byte Folded Spill +; BE-P10-NEXT: std r17, 360(r1) # 8-byte Folded Spill +; BE-P10-NEXT: std r18, 368(r1) # 8-byte Folded Spill +; BE-P10-NEXT: std r19, 376(r1) # 8-byte Folded Spill +; BE-P10-NEXT: std r20, 384(r1) # 8-byte Folded Spill +; BE-P10-NEXT: std r21, 392(r1) # 8-byte Folded Spill +; BE-P10-NEXT: std r22, 400(r1) # 8-byte Folded Spill +; BE-P10-NEXT: std r23, 408(r1) # 8-byte Folded Spill +; BE-P10-NEXT: std r24, 416(r1) # 8-byte Folded Spill +; BE-P10-NEXT: std r25, 424(r1) # 8-byte Folded Spill +; BE-P10-NEXT: std r26, 432(r1) # 8-byte Folded Spill +; BE-P10-NEXT: std r27, 440(r1) # 8-byte Folded Spill +; BE-P10-NEXT: std r28, 448(r1) # 8-byte Folded Spill +; BE-P10-NEXT: std r29, 456(r1) # 8-byte Folded Spill +; BE-P10-NEXT: std r30, 464(r1) # 8-byte Folded Spill +; BE-P10-NEXT: std r31, 472(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stfd f14, 480(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stfd f15, 488(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stfd f16, 496(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stfd f17, 504(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stfd f18, 512(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stfd f19, 520(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stfd f20, 528(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stfd f21, 536(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stfd f22, 544(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stfd f23, 552(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stfd f24, 560(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stfd f25, 568(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stfd f26, 576(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stfd f27, 584(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stfd f28, 592(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stfd f29, 600(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stfd f30, 608(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stfd f31, 616(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stxv v20, 144(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v21, 160(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v22, 176(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v23, 192(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v24, 208(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v25, 224(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v26, 240(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v27, 256(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v28, 272(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v29, 288(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v30, 304(r1) # 16-byte Folded Spill +; BE-P10-NEXT: stxv v31, 320(r1) # 16-byte Folded Spill +; BE-P10-NEXT: std r3, 120(r1) # 8-byte Folded Spill +; BE-P10-NEXT: stw r4, 132(r1) +; BE-P10-NEXT: #APP +; BE-P10-NEXT: nop +; BE-P10-NEXT: #NO_APP +; BE-P10-NEXT: addi r3, r1, 132 +; BE-P10-NEXT: bl .callee2[PR] +; BE-P10-NEXT: nop +; BE-P10-NEXT: ld r4, 120(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lxv v31, 320(r1) # 16-byte Folded Reload +; BE-P10-NEXT: lxv v30, 304(r1) # 16-byte Folded Reload +; BE-P10-NEXT: lxv v29, 288(r1) # 16-byte Folded Reload +; BE-P10-NEXT: lxv v28, 272(r1) # 16-byte Folded Reload +; BE-P10-NEXT: lxv v27, 256(r1) # 16-byte Folded Reload +; BE-P10-NEXT: lxv v26, 240(r1) # 16-byte Folded Reload +; BE-P10-NEXT: lxv v25, 224(r1) # 16-byte Folded Reload +; BE-P10-NEXT: lxv v24, 208(r1) # 16-byte Folded Reload +; BE-P10-NEXT: lxv v23, 192(r1) # 16-byte Folded Reload +; BE-P10-NEXT: lxv v22, 176(r1) # 16-byte Folded Reload +; BE-P10-NEXT: lxv v21, 160(r1) # 16-byte Folded Reload +; BE-P10-NEXT: lxv v20, 144(r1) # 16-byte Folded Reload +; BE-P10-NEXT: lfd f31, 616(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f30, 608(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f29, 600(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f28, 592(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f27, 584(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f26, 576(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f25, 568(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f24, 560(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f23, 552(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f22, 544(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f21, 536(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f20, 528(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f19, 520(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f18, 512(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f17, 504(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f16, 496(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f15, 488(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lfd f14, 480(r1) # 8-byte Folded Reload +; BE-P10-NEXT: ld r31, 472(r1) # 8-byte Folded Reload +; BE-P10-NEXT: ld r30, 464(r1) # 8-byte Folded Reload +; BE-P10-NEXT: ld r29, 456(r1) # 8-byte Folded Reload +; BE-P10-NEXT: ld r28, 448(r1) # 8-byte Folded Reload +; BE-P10-NEXT: ld r27, 440(r1) # 8-byte Folded Reload +; BE-P10-NEXT: ld r26, 432(r1) # 8-byte Folded Reload +; BE-P10-NEXT: ld r25, 424(r1) # 8-byte Folded Reload +; BE-P10-NEXT: ld r24, 416(r1) # 8-byte Folded Reload +; BE-P10-NEXT: ld r23, 408(r1) # 8-byte Folded Reload +; BE-P10-NEXT: ld r22, 400(r1) # 8-byte Folded Reload +; BE-P10-NEXT: ld r21, 392(r1) # 8-byte Folded Reload +; BE-P10-NEXT: ld r20, 384(r1) # 8-byte Folded Reload +; BE-P10-NEXT: ld r19, 376(r1) # 8-byte Folded Reload +; BE-P10-NEXT: ld r18, 368(r1) # 8-byte Folded Reload +; BE-P10-NEXT: ld r17, 360(r1) # 8-byte Folded Reload +; BE-P10-NEXT: ld r16, 352(r1) # 8-byte Folded Reload +; BE-P10-NEXT: ld r15, 344(r1) # 8-byte Folded Reload +; BE-P10-NEXT: lwz r4, 16(r4) +; BE-P10-NEXT: add r3, r4, r3 +; BE-P10-NEXT: clrldi r3, r3, 32 +; BE-P10-NEXT: ld r14, 336(r1) # 8-byte Folded Reload +; BE-P10-NEXT: addi r1, r1, 624 +; BE-P10-NEXT: ld r0, 16(r1) +; BE-P10-NEXT: lwz r12, 8(r1) +; BE-P10-NEXT: mtlr r0 +; BE-P10-NEXT: hashchk r0, -488(r1) +; BE-P10-NEXT: mtocrf 32, r12 +; BE-P10-NEXT: mtocrf 16, r12 +; BE-P10-NEXT: mtocrf 8, r12 +; BE-P10-NEXT: blr +; +; BE-P9-LABEL: spill: +; BE-P9: # %bb.0: # %entry +; BE-P9-NEXT: mflr r0 +; BE-P9-NEXT: mfcr r12 +; BE-P9-NEXT: std r0, 16(r1) +; BE-P9-NEXT: hashst r0, -488(r1) +; BE-P9-NEXT: stw r12, 8(r1) +; BE-P9-NEXT: stdu r1, -624(r1) +; BE-P9-NEXT: lwz r4, 12(r3) +; BE-P9-NEXT: std r14, 336(r1) # 8-byte Folded Spill +; BE-P9-NEXT: std r15, 344(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stxv v20, 144(r1) # 16-byte Folded Spill +; BE-P9-NEXT: stxv v21, 160(r1) # 16-byte Folded Spill +; BE-P9-NEXT: stxv v22, 176(r1) # 16-byte Folded Spill +; BE-P9-NEXT: std r16, 352(r1) # 8-byte Folded Spill +; BE-P9-NEXT: std r17, 360(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stxv v23, 192(r1) # 16-byte Folded Spill +; BE-P9-NEXT: std r18, 368(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stxv v24, 208(r1) # 16-byte Folded Spill +; BE-P9-NEXT: std r19, 376(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stxv v25, 224(r1) # 16-byte Folded Spill +; BE-P9-NEXT: std r20, 384(r1) # 8-byte Folded Spill +; BE-P9-NEXT: std r21, 392(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stxv v26, 240(r1) # 16-byte Folded Spill +; BE-P9-NEXT: std r22, 400(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stxv v27, 256(r1) # 16-byte Folded Spill +; BE-P9-NEXT: std r23, 408(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stxv v28, 272(r1) # 16-byte Folded Spill +; BE-P9-NEXT: std r24, 416(r1) # 8-byte Folded Spill +; BE-P9-NEXT: std r25, 424(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stxv v29, 288(r1) # 16-byte Folded Spill +; BE-P9-NEXT: std r26, 432(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stxv v30, 304(r1) # 16-byte Folded Spill +; BE-P9-NEXT: std r27, 440(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stxv v31, 320(r1) # 16-byte Folded Spill +; BE-P9-NEXT: std r28, 448(r1) # 8-byte Folded Spill +; BE-P9-NEXT: std r29, 456(r1) # 8-byte Folded Spill +; BE-P9-NEXT: std r30, 464(r1) # 8-byte Folded Spill +; BE-P9-NEXT: std r31, 472(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stfd f14, 480(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stfd f15, 488(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stfd f16, 496(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stfd f17, 504(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stfd f18, 512(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stfd f19, 520(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stfd f20, 528(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stfd f21, 536(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stfd f22, 544(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stfd f23, 552(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stfd f24, 560(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stfd f25, 568(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stfd f26, 576(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stfd f27, 584(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stfd f28, 592(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stfd f29, 600(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stfd f30, 608(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stfd f31, 616(r1) # 8-byte Folded Spill +; BE-P9-NEXT: std r3, 120(r1) # 8-byte Folded Spill +; BE-P9-NEXT: stw r4, 132(r1) +; BE-P9-NEXT: #APP +; BE-P9-NEXT: nop +; BE-P9-NEXT: #NO_APP +; BE-P9-NEXT: addi r3, r1, 132 +; BE-P9-NEXT: bl .callee2[PR] +; BE-P9-NEXT: nop +; BE-P9-NEXT: ld r4, 120(r1) # 8-byte Folded Reload +; BE-P9-NEXT: lxv v31, 320(r1) # 16-byte Folded Reload +; BE-P9-NEXT: lxv v30, 304(r1) # 16-byte Folded Reload +; BE-P9-NEXT: lxv v29, 288(r1) # 16-byte Folded Reload +; BE-P9-NEXT: lxv v28, 272(r1) # 16-byte Folded Reload +; BE-P9-NEXT: lxv v27, 256(r1) # 16-byte Folded Reload +; BE-P9-NEXT: lxv v26, 240(r1) # 16-byte Folded Reload +; BE-P9-NEXT: lxv v25, 224(r1) # 16-byte Folded Reload +; BE-P9-NEXT: lxv v24, 208(r1) # 16-byte Folded Reload +; BE-P9-NEXT: lxv v23, 192(r1) # 16-byte Folded Reload +; BE-P9-NEXT: lxv v22, 176(r1) # 16-byte Folded Reload +; BE-P9-NEXT: lxv v21, 160(r1) # 16-byte Folded Reload +; BE-P9-NEXT: lxv v20, 144(r1) # 16-byte Folded Reload +; BE-P9-NEXT: lfd f31, 616(r1) # 8-byte Folded Reload +; BE-P9-NEXT: lfd f30, 608(r1) # 8-byte Folded Reload +; BE-P9-NEXT: lfd f29, 600(r1) # 8-byte Folded Reload +; BE-P9-NEXT: lfd f28, 592(r1) # 8-byte Folded Reload +; BE-P9-NEXT: lfd f27, 584(r1) # 8-byte Folded Reload +; BE-P9-NEXT: ld r31, 472(r1) # 8-byte Folded Reload +; BE-P9-NEXT: ld r30, 464(r1) # 8-byte Folded Reload +; BE-P9-NEXT: ld r29, 456(r1) # 8-byte Folded Reload +; BE-P9-NEXT: lfd f26, 576(r1) # 8-byte Folded Reload +; BE-P9-NEXT: ld r28, 448(r1) # 8-byte Folded Reload +; BE-P9-NEXT: ld r27, 440(r1) # 8-byte Folded Reload +; BE-P9-NEXT: ld r26, 432(r1) # 8-byte Folded Reload +; BE-P9-NEXT: lfd f25, 568(r1) # 8-byte Folded Reload +; BE-P9-NEXT: ld r25, 424(r1) # 8-byte Folded Reload +; BE-P9-NEXT: ld r24, 416(r1) # 8-byte Folded Reload +; BE-P9-NEXT: ld r23, 408(r1) # 8-byte Folded Reload +; BE-P9-NEXT: lfd f24, 560(r1) # 8-byte Folded Reload +; BE-P9-NEXT: ld r22, 400(r1) # 8-byte Folded Reload +; BE-P9-NEXT: ld r21, 392(r1) # 8-byte Folded Reload +; BE-P9-NEXT: lwz r4, 16(r4) +; BE-P9-NEXT: add r3, r4, r3 +; BE-P9-NEXT: lfd f23, 552(r1) # 8-byte Folded Reload +; BE-P9-NEXT: lfd f22, 544(r1) # 8-byte Folded Reload +; BE-P9-NEXT: ld r20, 384(r1) # 8-byte Folded Reload +; BE-P9-NEXT: ld r19, 376(r1) # 8-byte Folded Reload +; BE-P9-NEXT: ld r18, 368(r1) # 8-byte Folded Reload +; BE-P9-NEXT: ld r17, 360(r1) # 8-byte Folded Reload +; BE-P9-NEXT: ld r16, 352(r1) # 8-byte Folded Reload +; BE-P9-NEXT: ld r15, 344(r1) # 8-byte Folded Reload +; BE-P9-NEXT: ld r14, 336(r1) # 8-byte Folded Reload +; BE-P9-NEXT: lfd f21, 536(r1) # 8-byte Folded Reload +; BE-P9-NEXT: lfd f20, 528(r1) # 8-byte Folded Reload +; BE-P9-NEXT: lfd f19, 520(r1) # 8-byte Folded Reload +; BE-P9-NEXT: lfd f18, 512(r1) # 8-byte Folded Reload +; BE-P9-NEXT: lfd f17, 504(r1) # 8-byte Folded Reload +; BE-P9-NEXT: lfd f16, 496(r1) # 8-byte Folded Reload +; BE-P9-NEXT: lfd f15, 488(r1) # 8-byte Folded Reload +; BE-P9-NEXT: lfd f14, 480(r1) # 8-byte Folded Reload +; BE-P9-NEXT: clrldi r3, r3, 32 +; BE-P9-NEXT: addi r1, r1, 624 +; BE-P9-NEXT: ld r0, 16(r1) +; BE-P9-NEXT: lwz r12, 8(r1) +; BE-P9-NEXT: mtlr r0 +; BE-P9-NEXT: hashchk r0, -488(r1) +; BE-P9-NEXT: mtocrf 32, r12 +; BE-P9-NEXT: mtocrf 16, r12 +; BE-P9-NEXT: mtocrf 8, r12 +; BE-P9-NEXT: blr +; +; BE-P8-LABEL: spill: +; BE-P8: # %bb.0: # %entry +; BE-P8-NEXT: mfcr r12 +; BE-P8-NEXT: mflr r0 +; BE-P8-NEXT: std r0, 16(r1) +; BE-P8-NEXT: hashst r0, -488(r1) +; BE-P8-NEXT: stw r12, 8(r1) +; BE-P8-NEXT: stdu r1, -624(r1) +; BE-P8-NEXT: li r4, 144 +; BE-P8-NEXT: std r14, 336(r1) # 8-byte Folded Spill +; BE-P8-NEXT: std r15, 344(r1) # 8-byte Folded Spill +; BE-P8-NEXT: std r16, 352(r1) # 8-byte Folded Spill +; BE-P8-NEXT: std r17, 360(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill +; BE-P8-NEXT: li r4, 160 +; BE-P8-NEXT: std r18, 368(r1) # 8-byte Folded Spill +; BE-P8-NEXT: std r19, 376(r1) # 8-byte Folded Spill +; BE-P8-NEXT: std r20, 384(r1) # 8-byte Folded Spill +; BE-P8-NEXT: std r21, 392(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill +; BE-P8-NEXT: li r4, 176 +; BE-P8-NEXT: std r22, 400(r1) # 8-byte Folded Spill +; BE-P8-NEXT: std r23, 408(r1) # 8-byte Folded Spill +; BE-P8-NEXT: std r24, 416(r1) # 8-byte Folded Spill +; BE-P8-NEXT: std r25, 424(r1) # 8-byte Folded Spill +; BE-P8-NEXT: std r26, 432(r1) # 8-byte Folded Spill +; BE-P8-NEXT: std r27, 440(r1) # 8-byte Folded Spill +; BE-P8-NEXT: std r28, 448(r1) # 8-byte Folded Spill +; BE-P8-NEXT: std r29, 456(r1) # 8-byte Folded Spill +; BE-P8-NEXT: std r30, 464(r1) # 8-byte Folded Spill +; BE-P8-NEXT: std r31, 472(r1) # 8-byte Folded Spill +; BE-P8-NEXT: std r3, 120(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill +; BE-P8-NEXT: li r4, 192 +; BE-P8-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill +; BE-P8-NEXT: li r4, 208 +; BE-P8-NEXT: stfd f14, 480(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill +; BE-P8-NEXT: li r4, 224 +; BE-P8-NEXT: stfd f15, 488(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill +; BE-P8-NEXT: li r4, 240 +; BE-P8-NEXT: stfd f16, 496(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill +; BE-P8-NEXT: li r4, 256 +; BE-P8-NEXT: stfd f17, 504(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill +; BE-P8-NEXT: li r4, 272 +; BE-P8-NEXT: stfd f18, 512(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill +; BE-P8-NEXT: li r4, 288 +; BE-P8-NEXT: stfd f19, 520(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill +; BE-P8-NEXT: li r4, 304 +; BE-P8-NEXT: stfd f20, 528(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill +; BE-P8-NEXT: li r4, 320 +; BE-P8-NEXT: stfd f21, 536(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill +; BE-P8-NEXT: lwz r4, 12(r3) +; BE-P8-NEXT: stfd f22, 544(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stfd f23, 552(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stfd f24, 560(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stfd f25, 568(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stfd f26, 576(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stfd f27, 584(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stfd f28, 592(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stfd f29, 600(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stfd f30, 608(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stfd f31, 616(r1) # 8-byte Folded Spill +; BE-P8-NEXT: stw r4, 132(r1) +; BE-P8-NEXT: #APP +; BE-P8-NEXT: nop +; BE-P8-NEXT: #NO_APP +; BE-P8-NEXT: addi r3, r1, 132 +; BE-P8-NEXT: bl .callee2[PR] +; BE-P8-NEXT: nop +; BE-P8-NEXT: ld r4, 120(r1) # 8-byte Folded Reload +; BE-P8-NEXT: lfd f31, 616(r1) # 8-byte Folded Reload +; BE-P8-NEXT: lfd f30, 608(r1) # 8-byte Folded Reload +; BE-P8-NEXT: ld r31, 472(r1) # 8-byte Folded Reload +; BE-P8-NEXT: lfd f29, 600(r1) # 8-byte Folded Reload +; BE-P8-NEXT: lfd f28, 592(r1) # 8-byte Folded Reload +; BE-P8-NEXT: ld r30, 464(r1) # 8-byte Folded Reload +; BE-P8-NEXT: ld r29, 456(r1) # 8-byte Folded Reload +; BE-P8-NEXT: lwz r4, 16(r4) +; BE-P8-NEXT: lfd f27, 584(r1) # 8-byte Folded Reload +; BE-P8-NEXT: lfd f26, 576(r1) # 8-byte Folded Reload +; BE-P8-NEXT: ld r28, 448(r1) # 8-byte Folded Reload +; BE-P8-NEXT: lfd f25, 568(r1) # 8-byte Folded Reload +; BE-P8-NEXT: lfd f24, 560(r1) # 8-byte Folded Reload +; BE-P8-NEXT: ld r27, 440(r1) # 8-byte Folded Reload +; BE-P8-NEXT: ld r26, 432(r1) # 8-byte Folded Reload +; BE-P8-NEXT: add r3, r4, r3 +; BE-P8-NEXT: li r4, 320 +; BE-P8-NEXT: lfd f23, 552(r1) # 8-byte Folded Reload +; BE-P8-NEXT: lfd f22, 544(r1) # 8-byte Folded Reload +; BE-P8-NEXT: ld r25, 424(r1) # 8-byte Folded Reload +; BE-P8-NEXT: ld r24, 416(r1) # 8-byte Folded Reload +; BE-P8-NEXT: lxvd2x v31, r1, r4 # 16-byte Folded Reload +; BE-P8-NEXT: li r4, 304 +; BE-P8-NEXT: lfd f21, 536(r1) # 8-byte Folded Reload +; BE-P8-NEXT: ld r23, 408(r1) # 8-byte Folded Reload +; BE-P8-NEXT: ld r22, 400(r1) # 8-byte Folded Reload +; BE-P8-NEXT: clrldi r3, r3, 32 +; BE-P8-NEXT: lxvd2x v30, r1, r4 # 16-byte Folded Reload +; BE-P8-NEXT: li r4, 288 +; BE-P8-NEXT: lfd f20, 528(r1) # 8-byte Folded Reload +; BE-P8-NEXT: ld r21, 392(r1) # 8-byte Folded Reload +; BE-P8-NEXT: ld r20, 384(r1) # 8-byte Folded Reload +; BE-P8-NEXT: lxvd2x v29, r1, r4 # 16-byte Folded Reload +; BE-P8-NEXT: li r4, 272 +; BE-P8-NEXT: lfd f19, 520(r1) # 8-byte Folded Reload +; BE-P8-NEXT: ld r19, 376(r1) # 8-byte Folded Reload +; BE-P8-NEXT: ld r18, 368(r1) # 8-byte Folded Reload +; BE-P8-NEXT: lxvd2x v28, r1, r4 # 16-byte Folded Reload +; BE-P8-NEXT: li r4, 256 +; BE-P8-NEXT: lfd f18, 512(r1) # 8-byte Folded Reload +; BE-P8-NEXT: ld r17, 360(r1) # 8-byte Folded Reload +; BE-P8-NEXT: ld r16, 352(r1) # 8-byte Folded Reload +; BE-P8-NEXT: lxvd2x v27, r1, r4 # 16-byte Folded Reload +; BE-P8-NEXT: li r4, 240 +; BE-P8-NEXT: lfd f17, 504(r1) # 8-byte Folded Reload +; BE-P8-NEXT: ld r15, 344(r1) # 8-byte Folded Reload +; BE-P8-NEXT: ld r14, 336(r1) # 8-byte Folded Reload +; BE-P8-NEXT: lxvd2x v26, r1, r4 # 16-byte Folded Reload +; BE-P8-NEXT: li r4, 224 +; BE-P8-NEXT: lfd f16, 496(r1) # 8-byte Folded Reload +; BE-P8-NEXT: lxvd2x v25, r1, r4 # 16-byte Folded Reload +; BE-P8-NEXT: li r4, 208 +; BE-P8-NEXT: lfd f15, 488(r1) # 8-byte Folded Reload +; BE-P8-NEXT: lxvd2x v24, r1, r4 # 16-byte Folded Reload +; BE-P8-NEXT: li r4, 192 +; BE-P8-NEXT: lfd f14, 480(r1) # 8-byte Folded Reload +; BE-P8-NEXT: lxvd2x v23, r1, r4 # 16-byte Folded Reload +; BE-P8-NEXT: li r4, 176 +; BE-P8-NEXT: lxvd2x v22, r1, r4 # 16-byte Folded Reload +; BE-P8-NEXT: li r4, 160 +; BE-P8-NEXT: lxvd2x v21, r1, r4 # 16-byte Folded Reload +; BE-P8-NEXT: li r4, 144 +; BE-P8-NEXT: lxvd2x v20, r1, r4 # 16-byte Folded Reload +; BE-P8-NEXT: addi r1, r1, 624 +; BE-P8-NEXT: ld r0, 16(r1) +; BE-P8-NEXT: lwz r12, 8(r1) +; BE-P8-NEXT: mtocrf 32, r12 +; BE-P8-NEXT: hashchk r0, -488(r1) +; BE-P8-NEXT: mtlr r0 +; BE-P8-NEXT: mtocrf 16, r12 +; BE-P8-NEXT: mtocrf 8, r12 +; BE-P8-NEXT: blr +; +; BE-32BIT-P10-LABEL: spill: +; BE-32BIT-P10: # %bb.0: # %entry +; BE-32BIT-P10-NEXT: mflr r0 +; BE-32BIT-P10-NEXT: mfcr r12 +; BE-32BIT-P10-NEXT: stw r0, 8(r1) +; BE-32BIT-P10-NEXT: hashst r0, -424(r1) +; BE-32BIT-P10-NEXT: stw r12, 4(r1) +; BE-32BIT-P10-NEXT: stwu r1, -496(r1) +; BE-32BIT-P10-NEXT: lwz r4, 12(r3) +; BE-32BIT-P10-NEXT: stw r13, 276(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r14, 280(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r15, 284(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r16, 288(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r17, 292(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r18, 296(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r19, 300(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r20, 304(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r21, 308(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r22, 312(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r23, 316(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r24, 320(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r25, 324(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r26, 328(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r27, 332(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r28, 336(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r29, 340(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r30, 344(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r31, 348(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f14, 352(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f15, 360(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f16, 368(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f17, 376(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f18, 384(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f19, 392(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f20, 400(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f21, 408(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f22, 416(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f23, 424(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f24, 432(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f25, 440(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f26, 448(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f27, 456(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f28, 464(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f29, 472(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f30, 480(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f31, 488(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v20, 80(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v21, 96(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v22, 112(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v23, 128(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v24, 144(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v25, 160(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v26, 176(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v27, 192(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v28, 208(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v29, 224(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v30, 240(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v31, 256(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r3, 64(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r4, 68(r1) +; BE-32BIT-P10-NEXT: #APP +; BE-32BIT-P10-NEXT: nop +; BE-32BIT-P10-NEXT: #NO_APP +; BE-32BIT-P10-NEXT: addi r3, r1, 68 +; BE-32BIT-P10-NEXT: bl .callee2[PR] +; BE-32BIT-P10-NEXT: nop +; BE-32BIT-P10-NEXT: lwz r4, 64(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v31, 256(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v30, 240(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v29, 224(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v28, 208(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v27, 192(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v26, 176(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v25, 160(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v24, 144(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v23, 128(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v22, 112(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v21, 96(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v20, 80(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f31, 488(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f30, 480(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f29, 472(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f28, 464(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f27, 456(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f26, 448(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f25, 440(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f24, 432(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f23, 424(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f22, 416(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f21, 408(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f20, 400(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f19, 392(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f18, 384(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f17, 376(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f16, 368(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f15, 360(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f14, 352(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r31, 348(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r30, 344(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r29, 340(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r28, 336(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r27, 332(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r26, 328(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r25, 324(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r24, 320(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r23, 316(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r22, 312(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r21, 308(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r20, 304(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r19, 300(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r18, 296(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r17, 292(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r16, 288(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r15, 284(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r4, 16(r4) +; BE-32BIT-P10-NEXT: add r3, r4, r3 +; BE-32BIT-P10-NEXT: lwz r14, 280(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r13, 276(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: addi r1, r1, 496 +; BE-32BIT-P10-NEXT: lwz r0, 8(r1) +; BE-32BIT-P10-NEXT: lwz r12, 4(r1) +; BE-32BIT-P10-NEXT: mtlr r0 +; BE-32BIT-P10-NEXT: hashchk r0, -424(r1) +; BE-32BIT-P10-NEXT: mtocrf 32, r12 +; BE-32BIT-P10-NEXT: mtocrf 16, r12 +; BE-32BIT-P10-NEXT: mtocrf 8, r12 +; BE-32BIT-P10-NEXT: blr +; +; BE-32BIT-P9-LABEL: spill: +; BE-32BIT-P9: # %bb.0: # %entry +; BE-32BIT-P9-NEXT: mflr r0 +; BE-32BIT-P9-NEXT: mfcr r12 +; BE-32BIT-P9-NEXT: stw r0, 8(r1) +; BE-32BIT-P9-NEXT: hashst r0, -424(r1) +; BE-32BIT-P9-NEXT: stw r12, 4(r1) +; BE-32BIT-P9-NEXT: stwu r1, -496(r1) +; BE-32BIT-P9-NEXT: lwz r4, 12(r3) +; BE-32BIT-P9-NEXT: stw r13, 276(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r14, 280(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v20, 80(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v21, 96(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v22, 112(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r15, 284(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r16, 288(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v23, 128(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r17, 292(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v24, 144(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r18, 296(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v25, 160(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r19, 300(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r20, 304(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v26, 176(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r21, 308(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v27, 192(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r22, 312(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v28, 208(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r23, 316(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r24, 320(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v29, 224(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r25, 324(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v30, 240(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r26, 328(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v31, 256(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r27, 332(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r28, 336(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r29, 340(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r30, 344(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r31, 348(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f14, 352(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f15, 360(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f16, 368(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f17, 376(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f18, 384(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f19, 392(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f20, 400(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f21, 408(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f22, 416(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f23, 424(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f24, 432(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f25, 440(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f26, 448(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f27, 456(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f28, 464(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f29, 472(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f30, 480(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f31, 488(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r3, 64(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r4, 68(r1) +; BE-32BIT-P9-NEXT: #APP +; BE-32BIT-P9-NEXT: nop +; BE-32BIT-P9-NEXT: #NO_APP +; BE-32BIT-P9-NEXT: addi r3, r1, 68 +; BE-32BIT-P9-NEXT: bl .callee2[PR] +; BE-32BIT-P9-NEXT: nop +; BE-32BIT-P9-NEXT: lwz r4, 64(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v31, 256(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v30, 240(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v29, 224(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v28, 208(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v27, 192(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v26, 176(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v25, 160(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v24, 144(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v23, 128(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v22, 112(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v21, 96(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v20, 80(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f31, 488(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f30, 480(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f29, 472(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f28, 464(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f27, 456(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r31, 348(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r30, 344(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r29, 340(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f26, 448(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r28, 336(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r27, 332(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r26, 328(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f25, 440(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r25, 324(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r24, 320(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r23, 316(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f24, 432(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r22, 312(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r21, 308(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r4, 16(r4) +; BE-32BIT-P9-NEXT: add r3, r4, r3 +; BE-32BIT-P9-NEXT: lfd f23, 424(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f22, 416(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r20, 304(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r19, 300(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r18, 296(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r17, 292(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r16, 288(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r15, 284(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r14, 280(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r13, 276(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f21, 408(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f20, 400(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f19, 392(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f18, 384(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f17, 376(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f16, 368(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f15, 360(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f14, 352(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: addi r1, r1, 496 +; BE-32BIT-P9-NEXT: lwz r0, 8(r1) +; BE-32BIT-P9-NEXT: lwz r12, 4(r1) +; BE-32BIT-P9-NEXT: mtlr r0 +; BE-32BIT-P9-NEXT: hashchk r0, -424(r1) +; BE-32BIT-P9-NEXT: mtocrf 32, r12 +; BE-32BIT-P9-NEXT: mtocrf 16, r12 +; BE-32BIT-P9-NEXT: mtocrf 8, r12 +; BE-32BIT-P9-NEXT: blr +; +; BE-32BIT-P8-LABEL: spill: +; BE-32BIT-P8: # %bb.0: # %entry +; BE-32BIT-P8-NEXT: mfcr r12 +; BE-32BIT-P8-NEXT: mflr r0 +; BE-32BIT-P8-NEXT: stw r0, 8(r1) +; BE-32BIT-P8-NEXT: hashst r0, -424(r1) +; BE-32BIT-P8-NEXT: stw r12, 4(r1) +; BE-32BIT-P8-NEXT: stwu r1, -496(r1) +; BE-32BIT-P8-NEXT: li r4, 80 +; BE-32BIT-P8-NEXT: stw r13, 276(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r14, 280(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 96 +; BE-32BIT-P8-NEXT: stw r15, 284(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 112 +; BE-32BIT-P8-NEXT: stw r16, 288(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 128 +; BE-32BIT-P8-NEXT: stw r17, 292(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 144 +; BE-32BIT-P8-NEXT: stw r18, 296(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 160 +; BE-32BIT-P8-NEXT: stw r19, 300(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 176 +; BE-32BIT-P8-NEXT: stw r20, 304(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 192 +; BE-32BIT-P8-NEXT: stw r21, 308(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 208 +; BE-32BIT-P8-NEXT: stw r22, 312(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 224 +; BE-32BIT-P8-NEXT: stw r23, 316(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 240 +; BE-32BIT-P8-NEXT: stw r24, 320(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 256 +; BE-32BIT-P8-NEXT: stw r25, 324(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: lwz r4, 12(r3) +; BE-32BIT-P8-NEXT: stw r26, 328(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r27, 332(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r28, 336(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r29, 340(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r30, 344(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r31, 348(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f14, 352(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f15, 360(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f16, 368(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f17, 376(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f18, 384(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f19, 392(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f20, 400(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f21, 408(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f22, 416(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f23, 424(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f24, 432(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f25, 440(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f26, 448(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f27, 456(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f28, 464(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f29, 472(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f30, 480(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f31, 488(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r3, 64(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r4, 68(r1) +; BE-32BIT-P8-NEXT: #APP +; BE-32BIT-P8-NEXT: nop +; BE-32BIT-P8-NEXT: #NO_APP +; BE-32BIT-P8-NEXT: addi r3, r1, 68 +; BE-32BIT-P8-NEXT: bl .callee2[PR] +; BE-32BIT-P8-NEXT: nop +; BE-32BIT-P8-NEXT: lwz r4, 64(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lfd f31, 488(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lfd f30, 480(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r31, 348(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lfd f29, 472(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lfd f28, 464(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r30, 344(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r29, 340(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r4, 16(r4) +; BE-32BIT-P8-NEXT: lfd f27, 456(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lfd f26, 448(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r28, 336(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lfd f25, 440(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lfd f24, 432(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r27, 332(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r26, 328(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: add r3, r4, r3 +; BE-32BIT-P8-NEXT: li r4, 256 +; BE-32BIT-P8-NEXT: lfd f23, 424(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lfd f22, 416(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r25, 324(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r24, 320(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lxvd2x v31, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 240 +; BE-32BIT-P8-NEXT: lfd f21, 408(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r23, 316(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r22, 312(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lxvd2x v30, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 224 +; BE-32BIT-P8-NEXT: lfd f20, 400(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r21, 308(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r20, 304(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lxvd2x v29, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 208 +; BE-32BIT-P8-NEXT: lfd f19, 392(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r19, 300(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r18, 296(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lxvd2x v28, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 192 +; BE-32BIT-P8-NEXT: lfd f18, 384(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r17, 292(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r16, 288(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lxvd2x v27, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 176 +; BE-32BIT-P8-NEXT: lfd f17, 376(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r15, 284(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r14, 280(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lxvd2x v26, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 160 +; BE-32BIT-P8-NEXT: lfd f16, 368(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r13, 276(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lxvd2x v25, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 144 +; BE-32BIT-P8-NEXT: lfd f15, 360(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lxvd2x v24, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 128 +; BE-32BIT-P8-NEXT: lfd f14, 352(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lxvd2x v23, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 112 +; BE-32BIT-P8-NEXT: lxvd2x v22, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 96 +; BE-32BIT-P8-NEXT: lxvd2x v21, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 80 +; BE-32BIT-P8-NEXT: lxvd2x v20, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: addi r1, r1, 496 +; BE-32BIT-P8-NEXT: lwz r0, 8(r1) +; BE-32BIT-P8-NEXT: lwz r12, 4(r1) +; BE-32BIT-P8-NEXT: mtocrf 32, r12 +; BE-32BIT-P8-NEXT: hashchk r0, -424(r1) +; BE-32BIT-P8-NEXT: mtlr r0 +; BE-32BIT-P8-NEXT: mtocrf 16, r12 +; BE-32BIT-P8-NEXT: mtocrf 8, r12 +; BE-32BIT-P8-NEXT: blr +; +; BE-P10-PRIV-LABEL: spill: +; BE-P10-PRIV: # %bb.0: # %entry +; BE-P10-PRIV-NEXT: mflr r0 +; BE-P10-PRIV-NEXT: mfcr r12 +; BE-P10-PRIV-NEXT: std r0, 16(r1) +; BE-P10-PRIV-NEXT: hashstp r0, -488(r1) +; BE-P10-PRIV-NEXT: stw r12, 8(r1) +; BE-P10-PRIV-NEXT: stdu r1, -624(r1) +; BE-P10-PRIV-NEXT: lwz r4, 12(r3) +; BE-P10-PRIV-NEXT: std r14, 336(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: std r15, 344(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: std r16, 352(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: std r17, 360(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: std r18, 368(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: std r19, 376(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: std r20, 384(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: std r21, 392(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: std r22, 400(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: std r23, 408(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: std r24, 416(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: std r25, 424(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: std r26, 432(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: std r27, 440(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: std r28, 448(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: std r29, 456(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: std r30, 464(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: std r31, 472(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stfd f14, 480(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stfd f15, 488(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stfd f16, 496(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stfd f17, 504(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stfd f18, 512(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stfd f19, 520(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stfd f20, 528(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stfd f21, 536(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stfd f22, 544(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stfd f23, 552(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stfd f24, 560(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stfd f25, 568(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stfd f26, 576(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stfd f27, 584(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stfd f28, 592(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stfd f29, 600(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stfd f30, 608(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stfd f31, 616(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v20, 144(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v21, 160(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v22, 176(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v23, 192(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v24, 208(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v25, 224(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v26, 240(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v27, 256(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v28, 272(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v29, 288(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v30, 304(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: stxv v31, 320(r1) # 16-byte Folded Spill +; BE-P10-PRIV-NEXT: std r3, 120(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: stw r4, 132(r1) +; BE-P10-PRIV-NEXT: #APP +; BE-P10-PRIV-NEXT: nop +; BE-P10-PRIV-NEXT: #NO_APP +; BE-P10-PRIV-NEXT: addi r3, r1, 132 +; BE-P10-PRIV-NEXT: bl .callee2[PR] +; BE-P10-PRIV-NEXT: nop +; BE-P10-PRIV-NEXT: ld r4, 120(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lxv v31, 320(r1) # 16-byte Folded Reload +; BE-P10-PRIV-NEXT: lxv v30, 304(r1) # 16-byte Folded Reload +; BE-P10-PRIV-NEXT: lxv v29, 288(r1) # 16-byte Folded Reload +; BE-P10-PRIV-NEXT: lxv v28, 272(r1) # 16-byte Folded Reload +; BE-P10-PRIV-NEXT: lxv v27, 256(r1) # 16-byte Folded Reload +; BE-P10-PRIV-NEXT: lxv v26, 240(r1) # 16-byte Folded Reload +; BE-P10-PRIV-NEXT: lxv v25, 224(r1) # 16-byte Folded Reload +; BE-P10-PRIV-NEXT: lxv v24, 208(r1) # 16-byte Folded Reload +; BE-P10-PRIV-NEXT: lxv v23, 192(r1) # 16-byte Folded Reload +; BE-P10-PRIV-NEXT: lxv v22, 176(r1) # 16-byte Folded Reload +; BE-P10-PRIV-NEXT: lxv v21, 160(r1) # 16-byte Folded Reload +; BE-P10-PRIV-NEXT: lxv v20, 144(r1) # 16-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f31, 616(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f30, 608(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f29, 600(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f28, 592(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f27, 584(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f26, 576(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f25, 568(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f24, 560(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f23, 552(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f22, 544(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f21, 536(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f20, 528(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f19, 520(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f18, 512(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f17, 504(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f16, 496(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f15, 488(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lfd f14, 480(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: ld r31, 472(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: ld r30, 464(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: ld r29, 456(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: ld r28, 448(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: ld r27, 440(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: ld r26, 432(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: ld r25, 424(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: ld r24, 416(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: ld r23, 408(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: ld r22, 400(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: ld r21, 392(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: ld r20, 384(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: ld r19, 376(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: ld r18, 368(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: ld r17, 360(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: ld r16, 352(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: ld r15, 344(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: lwz r4, 16(r4) +; BE-P10-PRIV-NEXT: add r3, r4, r3 +; BE-P10-PRIV-NEXT: clrldi r3, r3, 32 +; BE-P10-PRIV-NEXT: ld r14, 336(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: addi r1, r1, 624 +; BE-P10-PRIV-NEXT: ld r0, 16(r1) +; BE-P10-PRIV-NEXT: lwz r12, 8(r1) +; BE-P10-PRIV-NEXT: mtlr r0 +; BE-P10-PRIV-NEXT: hashchkp r0, -488(r1) +; BE-P10-PRIV-NEXT: mtocrf 32, r12 +; BE-P10-PRIV-NEXT: mtocrf 16, r12 +; BE-P10-PRIV-NEXT: mtocrf 8, r12 +; BE-P10-PRIV-NEXT: blr +; +; BE-P9-PRIV-LABEL: spill: +; BE-P9-PRIV: # %bb.0: # %entry +; BE-P9-PRIV-NEXT: mflr r0 +; BE-P9-PRIV-NEXT: mfcr r12 +; BE-P9-PRIV-NEXT: std r0, 16(r1) +; BE-P9-PRIV-NEXT: hashstp r0, -488(r1) +; BE-P9-PRIV-NEXT: stw r12, 8(r1) +; BE-P9-PRIV-NEXT: stdu r1, -624(r1) +; BE-P9-PRIV-NEXT: lwz r4, 12(r3) +; BE-P9-PRIV-NEXT: std r14, 336(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: std r15, 344(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stxv v20, 144(r1) # 16-byte Folded Spill +; BE-P9-PRIV-NEXT: stxv v21, 160(r1) # 16-byte Folded Spill +; BE-P9-PRIV-NEXT: stxv v22, 176(r1) # 16-byte Folded Spill +; BE-P9-PRIV-NEXT: std r16, 352(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: std r17, 360(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stxv v23, 192(r1) # 16-byte Folded Spill +; BE-P9-PRIV-NEXT: std r18, 368(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stxv v24, 208(r1) # 16-byte Folded Spill +; BE-P9-PRIV-NEXT: std r19, 376(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stxv v25, 224(r1) # 16-byte Folded Spill +; BE-P9-PRIV-NEXT: std r20, 384(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: std r21, 392(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stxv v26, 240(r1) # 16-byte Folded Spill +; BE-P9-PRIV-NEXT: std r22, 400(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stxv v27, 256(r1) # 16-byte Folded Spill +; BE-P9-PRIV-NEXT: std r23, 408(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stxv v28, 272(r1) # 16-byte Folded Spill +; BE-P9-PRIV-NEXT: std r24, 416(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: std r25, 424(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stxv v29, 288(r1) # 16-byte Folded Spill +; BE-P9-PRIV-NEXT: std r26, 432(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stxv v30, 304(r1) # 16-byte Folded Spill +; BE-P9-PRIV-NEXT: std r27, 440(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stxv v31, 320(r1) # 16-byte Folded Spill +; BE-P9-PRIV-NEXT: std r28, 448(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: std r29, 456(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: std r30, 464(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: std r31, 472(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stfd f14, 480(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stfd f15, 488(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stfd f16, 496(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stfd f17, 504(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stfd f18, 512(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stfd f19, 520(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stfd f20, 528(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stfd f21, 536(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stfd f22, 544(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stfd f23, 552(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stfd f24, 560(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stfd f25, 568(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stfd f26, 576(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stfd f27, 584(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stfd f28, 592(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stfd f29, 600(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stfd f30, 608(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stfd f31, 616(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: std r3, 120(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: stw r4, 132(r1) +; BE-P9-PRIV-NEXT: #APP +; BE-P9-PRIV-NEXT: nop +; BE-P9-PRIV-NEXT: #NO_APP +; BE-P9-PRIV-NEXT: addi r3, r1, 132 +; BE-P9-PRIV-NEXT: bl .callee2[PR] +; BE-P9-PRIV-NEXT: nop +; BE-P9-PRIV-NEXT: ld r4, 120(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: lxv v31, 320(r1) # 16-byte Folded Reload +; BE-P9-PRIV-NEXT: lxv v30, 304(r1) # 16-byte Folded Reload +; BE-P9-PRIV-NEXT: lxv v29, 288(r1) # 16-byte Folded Reload +; BE-P9-PRIV-NEXT: lxv v28, 272(r1) # 16-byte Folded Reload +; BE-P9-PRIV-NEXT: lxv v27, 256(r1) # 16-byte Folded Reload +; BE-P9-PRIV-NEXT: lxv v26, 240(r1) # 16-byte Folded Reload +; BE-P9-PRIV-NEXT: lxv v25, 224(r1) # 16-byte Folded Reload +; BE-P9-PRIV-NEXT: lxv v24, 208(r1) # 16-byte Folded Reload +; BE-P9-PRIV-NEXT: lxv v23, 192(r1) # 16-byte Folded Reload +; BE-P9-PRIV-NEXT: lxv v22, 176(r1) # 16-byte Folded Reload +; BE-P9-PRIV-NEXT: lxv v21, 160(r1) # 16-byte Folded Reload +; BE-P9-PRIV-NEXT: lxv v20, 144(r1) # 16-byte Folded Reload +; BE-P9-PRIV-NEXT: lfd f31, 616(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: lfd f30, 608(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: lfd f29, 600(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: lfd f28, 592(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: lfd f27, 584(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: ld r31, 472(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: ld r30, 464(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: ld r29, 456(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: lfd f26, 576(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: ld r28, 448(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: ld r27, 440(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: ld r26, 432(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: lfd f25, 568(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: ld r25, 424(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: ld r24, 416(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: ld r23, 408(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: lfd f24, 560(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: ld r22, 400(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: ld r21, 392(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: lwz r4, 16(r4) +; BE-P9-PRIV-NEXT: add r3, r4, r3 +; BE-P9-PRIV-NEXT: lfd f23, 552(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: lfd f22, 544(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: ld r20, 384(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: ld r19, 376(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: ld r18, 368(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: ld r17, 360(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: ld r16, 352(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: ld r15, 344(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: ld r14, 336(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: lfd f21, 536(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: lfd f20, 528(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: lfd f19, 520(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: lfd f18, 512(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: lfd f17, 504(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: lfd f16, 496(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: lfd f15, 488(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: lfd f14, 480(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: clrldi r3, r3, 32 +; BE-P9-PRIV-NEXT: addi r1, r1, 624 +; BE-P9-PRIV-NEXT: ld r0, 16(r1) +; BE-P9-PRIV-NEXT: lwz r12, 8(r1) +; BE-P9-PRIV-NEXT: mtlr r0 +; BE-P9-PRIV-NEXT: hashchkp r0, -488(r1) +; BE-P9-PRIV-NEXT: mtocrf 32, r12 +; BE-P9-PRIV-NEXT: mtocrf 16, r12 +; BE-P9-PRIV-NEXT: mtocrf 8, r12 +; BE-P9-PRIV-NEXT: blr +; +; BE-P8-PRIV-LABEL: spill: +; BE-P8-PRIV: # %bb.0: # %entry +; BE-P8-PRIV-NEXT: mfcr r12 +; BE-P8-PRIV-NEXT: mflr r0 +; BE-P8-PRIV-NEXT: std r0, 16(r1) +; BE-P8-PRIV-NEXT: hashstp r0, -488(r1) +; BE-P8-PRIV-NEXT: stw r12, 8(r1) +; BE-P8-PRIV-NEXT: stdu r1, -624(r1) +; BE-P8-PRIV-NEXT: li r4, 144 +; BE-P8-PRIV-NEXT: std r14, 336(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: std r15, 344(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: std r16, 352(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: std r17, 360(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill +; BE-P8-PRIV-NEXT: li r4, 160 +; BE-P8-PRIV-NEXT: std r18, 368(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: std r19, 376(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: std r20, 384(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: std r21, 392(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill +; BE-P8-PRIV-NEXT: li r4, 176 +; BE-P8-PRIV-NEXT: std r22, 400(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: std r23, 408(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: std r24, 416(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: std r25, 424(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: std r26, 432(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: std r27, 440(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: std r28, 448(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: std r29, 456(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: std r30, 464(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: std r31, 472(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: std r3, 120(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill +; BE-P8-PRIV-NEXT: li r4, 192 +; BE-P8-PRIV-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill +; BE-P8-PRIV-NEXT: li r4, 208 +; BE-P8-PRIV-NEXT: stfd f14, 480(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill +; BE-P8-PRIV-NEXT: li r4, 224 +; BE-P8-PRIV-NEXT: stfd f15, 488(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill +; BE-P8-PRIV-NEXT: li r4, 240 +; BE-P8-PRIV-NEXT: stfd f16, 496(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill +; BE-P8-PRIV-NEXT: li r4, 256 +; BE-P8-PRIV-NEXT: stfd f17, 504(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill +; BE-P8-PRIV-NEXT: li r4, 272 +; BE-P8-PRIV-NEXT: stfd f18, 512(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill +; BE-P8-PRIV-NEXT: li r4, 288 +; BE-P8-PRIV-NEXT: stfd f19, 520(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill +; BE-P8-PRIV-NEXT: li r4, 304 +; BE-P8-PRIV-NEXT: stfd f20, 528(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill +; BE-P8-PRIV-NEXT: li r4, 320 +; BE-P8-PRIV-NEXT: stfd f21, 536(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill +; BE-P8-PRIV-NEXT: lwz r4, 12(r3) +; BE-P8-PRIV-NEXT: stfd f22, 544(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stfd f23, 552(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stfd f24, 560(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stfd f25, 568(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stfd f26, 576(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stfd f27, 584(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stfd f28, 592(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stfd f29, 600(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stfd f30, 608(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stfd f31, 616(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: stw r4, 132(r1) +; BE-P8-PRIV-NEXT: #APP +; BE-P8-PRIV-NEXT: nop +; BE-P8-PRIV-NEXT: #NO_APP +; BE-P8-PRIV-NEXT: addi r3, r1, 132 +; BE-P8-PRIV-NEXT: bl .callee2[PR] +; BE-P8-PRIV-NEXT: nop +; BE-P8-PRIV-NEXT: ld r4, 120(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: lfd f31, 616(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: lfd f30, 608(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: ld r31, 472(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: lfd f29, 600(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: lfd f28, 592(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: ld r30, 464(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: ld r29, 456(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: lwz r4, 16(r4) +; BE-P8-PRIV-NEXT: lfd f27, 584(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: lfd f26, 576(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: ld r28, 448(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: lfd f25, 568(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: lfd f24, 560(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: ld r27, 440(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: ld r26, 432(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: add r3, r4, r3 +; BE-P8-PRIV-NEXT: li r4, 320 +; BE-P8-PRIV-NEXT: lfd f23, 552(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: lfd f22, 544(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: ld r25, 424(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: ld r24, 416(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: lxvd2x v31, r1, r4 # 16-byte Folded Reload +; BE-P8-PRIV-NEXT: li r4, 304 +; BE-P8-PRIV-NEXT: lfd f21, 536(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: ld r23, 408(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: ld r22, 400(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: clrldi r3, r3, 32 +; BE-P8-PRIV-NEXT: lxvd2x v30, r1, r4 # 16-byte Folded Reload +; BE-P8-PRIV-NEXT: li r4, 288 +; BE-P8-PRIV-NEXT: lfd f20, 528(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: ld r21, 392(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: ld r20, 384(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: lxvd2x v29, r1, r4 # 16-byte Folded Reload +; BE-P8-PRIV-NEXT: li r4, 272 +; BE-P8-PRIV-NEXT: lfd f19, 520(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: ld r19, 376(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: ld r18, 368(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: lxvd2x v28, r1, r4 # 16-byte Folded Reload +; BE-P8-PRIV-NEXT: li r4, 256 +; BE-P8-PRIV-NEXT: lfd f18, 512(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: ld r17, 360(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: ld r16, 352(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: lxvd2x v27, r1, r4 # 16-byte Folded Reload +; BE-P8-PRIV-NEXT: li r4, 240 +; BE-P8-PRIV-NEXT: lfd f17, 504(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: ld r15, 344(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: ld r14, 336(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: lxvd2x v26, r1, r4 # 16-byte Folded Reload +; BE-P8-PRIV-NEXT: li r4, 224 +; BE-P8-PRIV-NEXT: lfd f16, 496(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: lxvd2x v25, r1, r4 # 16-byte Folded Reload +; BE-P8-PRIV-NEXT: li r4, 208 +; BE-P8-PRIV-NEXT: lfd f15, 488(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: lxvd2x v24, r1, r4 # 16-byte Folded Reload +; BE-P8-PRIV-NEXT: li r4, 192 +; BE-P8-PRIV-NEXT: lfd f14, 480(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: lxvd2x v23, r1, r4 # 16-byte Folded Reload +; BE-P8-PRIV-NEXT: li r4, 176 +; BE-P8-PRIV-NEXT: lxvd2x v22, r1, r4 # 16-byte Folded Reload +; BE-P8-PRIV-NEXT: li r4, 160 +; BE-P8-PRIV-NEXT: lxvd2x v21, r1, r4 # 16-byte Folded Reload +; BE-P8-PRIV-NEXT: li r4, 144 +; BE-P8-PRIV-NEXT: lxvd2x v20, r1, r4 # 16-byte Folded Reload +; BE-P8-PRIV-NEXT: addi r1, r1, 624 +; BE-P8-PRIV-NEXT: ld r0, 16(r1) +; BE-P8-PRIV-NEXT: lwz r12, 8(r1) +; BE-P8-PRIV-NEXT: mtocrf 32, r12 +; BE-P8-PRIV-NEXT: hashchkp r0, -488(r1) +; BE-P8-PRIV-NEXT: mtlr r0 +; BE-P8-PRIV-NEXT: mtocrf 16, r12 +; BE-P8-PRIV-NEXT: mtocrf 8, r12 +; BE-P8-PRIV-NEXT: blr +; +; BE-32BIT-P10-PRIV-LABEL: spill: +; BE-32BIT-P10-PRIV: # %bb.0: # %entry +; BE-32BIT-P10-PRIV-NEXT: mflr r0 +; BE-32BIT-P10-PRIV-NEXT: mfcr r12 +; BE-32BIT-P10-PRIV-NEXT: stw r0, 8(r1) +; BE-32BIT-P10-PRIV-NEXT: hashstp r0, -424(r1) +; BE-32BIT-P10-PRIV-NEXT: stw r12, 4(r1) +; BE-32BIT-P10-PRIV-NEXT: stwu r1, -496(r1) +; BE-32BIT-P10-PRIV-NEXT: lwz r4, 12(r3) +; BE-32BIT-P10-PRIV-NEXT: stw r13, 276(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r14, 280(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r15, 284(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r16, 288(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r17, 292(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r18, 296(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r19, 300(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r20, 304(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r21, 308(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r22, 312(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r23, 316(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r24, 320(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r25, 324(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r26, 328(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r27, 332(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r28, 336(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r29, 340(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r30, 344(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r31, 348(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stfd f14, 352(r1) # 8-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stfd f15, 360(r1) # 8-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stfd f16, 368(r1) # 8-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stfd f17, 376(r1) # 8-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stfd f18, 384(r1) # 8-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stfd f19, 392(r1) # 8-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stfd f20, 400(r1) # 8-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stfd f21, 408(r1) # 8-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stfd f22, 416(r1) # 8-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stfd f23, 424(r1) # 8-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stfd f24, 432(r1) # 8-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stfd f25, 440(r1) # 8-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stfd f26, 448(r1) # 8-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stfd f27, 456(r1) # 8-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stfd f28, 464(r1) # 8-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stfd f29, 472(r1) # 8-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stfd f30, 480(r1) # 8-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stfd f31, 488(r1) # 8-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stxv v20, 80(r1) # 16-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stxv v21, 96(r1) # 16-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stxv v22, 112(r1) # 16-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stxv v23, 128(r1) # 16-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stxv v24, 144(r1) # 16-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stxv v25, 160(r1) # 16-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stxv v26, 176(r1) # 16-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stxv v27, 192(r1) # 16-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stxv v28, 208(r1) # 16-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stxv v29, 224(r1) # 16-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stxv v30, 240(r1) # 16-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stxv v31, 256(r1) # 16-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r3, 64(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: stw r4, 68(r1) +; BE-32BIT-P10-PRIV-NEXT: #APP +; BE-32BIT-P10-PRIV-NEXT: nop +; BE-32BIT-P10-PRIV-NEXT: #NO_APP +; BE-32BIT-P10-PRIV-NEXT: addi r3, r1, 68 +; BE-32BIT-P10-PRIV-NEXT: bl .callee2[PR] +; BE-32BIT-P10-PRIV-NEXT: nop +; BE-32BIT-P10-PRIV-NEXT: lwz r4, 64(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lxv v31, 256(r1) # 16-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lxv v30, 240(r1) # 16-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lxv v29, 224(r1) # 16-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lxv v28, 208(r1) # 16-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lxv v27, 192(r1) # 16-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lxv v26, 176(r1) # 16-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lxv v25, 160(r1) # 16-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lxv v24, 144(r1) # 16-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lxv v23, 128(r1) # 16-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lxv v22, 112(r1) # 16-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lxv v21, 96(r1) # 16-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lxv v20, 80(r1) # 16-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lfd f31, 488(r1) # 8-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lfd f30, 480(r1) # 8-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lfd f29, 472(r1) # 8-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lfd f28, 464(r1) # 8-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lfd f27, 456(r1) # 8-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lfd f26, 448(r1) # 8-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lfd f25, 440(r1) # 8-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lfd f24, 432(r1) # 8-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lfd f23, 424(r1) # 8-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lfd f22, 416(r1) # 8-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lfd f21, 408(r1) # 8-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lfd f20, 400(r1) # 8-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lfd f19, 392(r1) # 8-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lfd f18, 384(r1) # 8-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lfd f17, 376(r1) # 8-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lfd f16, 368(r1) # 8-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lfd f15, 360(r1) # 8-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lfd f14, 352(r1) # 8-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r31, 348(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r30, 344(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r29, 340(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r28, 336(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r27, 332(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r26, 328(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r25, 324(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r24, 320(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r23, 316(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r22, 312(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r21, 308(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r20, 304(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r19, 300(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r18, 296(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r17, 292(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r16, 288(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r15, 284(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r4, 16(r4) +; BE-32BIT-P10-PRIV-NEXT: add r3, r4, r3 +; BE-32BIT-P10-PRIV-NEXT: lwz r14, 280(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: lwz r13, 276(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: addi r1, r1, 496 +; BE-32BIT-P10-PRIV-NEXT: lwz r0, 8(r1) +; BE-32BIT-P10-PRIV-NEXT: lwz r12, 4(r1) +; BE-32BIT-P10-PRIV-NEXT: mtlr r0 +; BE-32BIT-P10-PRIV-NEXT: hashchkp r0, -424(r1) +; BE-32BIT-P10-PRIV-NEXT: mtocrf 32, r12 +; BE-32BIT-P10-PRIV-NEXT: mtocrf 16, r12 +; BE-32BIT-P10-PRIV-NEXT: mtocrf 8, r12 +; BE-32BIT-P10-PRIV-NEXT: blr +; +; BE-32BIT-P9-PRIV-LABEL: spill: +; BE-32BIT-P9-PRIV: # %bb.0: # %entry +; BE-32BIT-P9-PRIV-NEXT: mflr r0 +; BE-32BIT-P9-PRIV-NEXT: mfcr r12 +; BE-32BIT-P9-PRIV-NEXT: stw r0, 8(r1) +; BE-32BIT-P9-PRIV-NEXT: hashstp r0, -424(r1) +; BE-32BIT-P9-PRIV-NEXT: stw r12, 4(r1) +; BE-32BIT-P9-PRIV-NEXT: stwu r1, -496(r1) +; BE-32BIT-P9-PRIV-NEXT: lwz r4, 12(r3) +; BE-32BIT-P9-PRIV-NEXT: stw r13, 276(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r14, 280(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stxv v20, 80(r1) # 16-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stxv v21, 96(r1) # 16-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stxv v22, 112(r1) # 16-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r15, 284(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r16, 288(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stxv v23, 128(r1) # 16-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r17, 292(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stxv v24, 144(r1) # 16-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r18, 296(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stxv v25, 160(r1) # 16-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r19, 300(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r20, 304(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stxv v26, 176(r1) # 16-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r21, 308(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stxv v27, 192(r1) # 16-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r22, 312(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stxv v28, 208(r1) # 16-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r23, 316(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r24, 320(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stxv v29, 224(r1) # 16-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r25, 324(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stxv v30, 240(r1) # 16-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r26, 328(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stxv v31, 256(r1) # 16-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r27, 332(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r28, 336(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r29, 340(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r30, 344(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r31, 348(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stfd f14, 352(r1) # 8-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stfd f15, 360(r1) # 8-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stfd f16, 368(r1) # 8-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stfd f17, 376(r1) # 8-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stfd f18, 384(r1) # 8-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stfd f19, 392(r1) # 8-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stfd f20, 400(r1) # 8-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stfd f21, 408(r1) # 8-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stfd f22, 416(r1) # 8-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stfd f23, 424(r1) # 8-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stfd f24, 432(r1) # 8-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stfd f25, 440(r1) # 8-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stfd f26, 448(r1) # 8-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stfd f27, 456(r1) # 8-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stfd f28, 464(r1) # 8-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stfd f29, 472(r1) # 8-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stfd f30, 480(r1) # 8-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stfd f31, 488(r1) # 8-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r3, 64(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: stw r4, 68(r1) +; BE-32BIT-P9-PRIV-NEXT: #APP +; BE-32BIT-P9-PRIV-NEXT: nop +; BE-32BIT-P9-PRIV-NEXT: #NO_APP +; BE-32BIT-P9-PRIV-NEXT: addi r3, r1, 68 +; BE-32BIT-P9-PRIV-NEXT: bl .callee2[PR] +; BE-32BIT-P9-PRIV-NEXT: nop +; BE-32BIT-P9-PRIV-NEXT: lwz r4, 64(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lxv v31, 256(r1) # 16-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lxv v30, 240(r1) # 16-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lxv v29, 224(r1) # 16-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lxv v28, 208(r1) # 16-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lxv v27, 192(r1) # 16-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lxv v26, 176(r1) # 16-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lxv v25, 160(r1) # 16-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lxv v24, 144(r1) # 16-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lxv v23, 128(r1) # 16-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lxv v22, 112(r1) # 16-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lxv v21, 96(r1) # 16-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lxv v20, 80(r1) # 16-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lfd f31, 488(r1) # 8-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lfd f30, 480(r1) # 8-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lfd f29, 472(r1) # 8-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lfd f28, 464(r1) # 8-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lfd f27, 456(r1) # 8-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r31, 348(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r30, 344(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r29, 340(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lfd f26, 448(r1) # 8-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r28, 336(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r27, 332(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r26, 328(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lfd f25, 440(r1) # 8-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r25, 324(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r24, 320(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r23, 316(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lfd f24, 432(r1) # 8-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r22, 312(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r21, 308(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r4, 16(r4) +; BE-32BIT-P9-PRIV-NEXT: add r3, r4, r3 +; BE-32BIT-P9-PRIV-NEXT: lfd f23, 424(r1) # 8-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lfd f22, 416(r1) # 8-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r20, 304(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r19, 300(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r18, 296(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r17, 292(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r16, 288(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r15, 284(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r14, 280(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lwz r13, 276(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lfd f21, 408(r1) # 8-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lfd f20, 400(r1) # 8-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lfd f19, 392(r1) # 8-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lfd f18, 384(r1) # 8-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lfd f17, 376(r1) # 8-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lfd f16, 368(r1) # 8-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lfd f15, 360(r1) # 8-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: lfd f14, 352(r1) # 8-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: addi r1, r1, 496 +; BE-32BIT-P9-PRIV-NEXT: lwz r0, 8(r1) +; BE-32BIT-P9-PRIV-NEXT: lwz r12, 4(r1) +; BE-32BIT-P9-PRIV-NEXT: mtlr r0 +; BE-32BIT-P9-PRIV-NEXT: hashchkp r0, -424(r1) +; BE-32BIT-P9-PRIV-NEXT: mtocrf 32, r12 +; BE-32BIT-P9-PRIV-NEXT: mtocrf 16, r12 +; BE-32BIT-P9-PRIV-NEXT: mtocrf 8, r12 +; BE-32BIT-P9-PRIV-NEXT: blr +; +; BE-32BIT-P8-PRIV-LABEL: spill: +; BE-32BIT-P8-PRIV: # %bb.0: # %entry +; BE-32BIT-P8-PRIV-NEXT: mfcr r12 +; BE-32BIT-P8-PRIV-NEXT: mflr r0 +; BE-32BIT-P8-PRIV-NEXT: stw r0, 8(r1) +; BE-32BIT-P8-PRIV-NEXT: hashstp r0, -424(r1) +; BE-32BIT-P8-PRIV-NEXT: stw r12, 4(r1) +; BE-32BIT-P8-PRIV-NEXT: stwu r1, -496(r1) +; BE-32BIT-P8-PRIV-NEXT: li r4, 80 +; BE-32BIT-P8-PRIV-NEXT: stw r13, 276(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stw r14, 280(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: li r4, 96 +; BE-32BIT-P8-PRIV-NEXT: stw r15, 284(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: li r4, 112 +; BE-32BIT-P8-PRIV-NEXT: stw r16, 288(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: li r4, 128 +; BE-32BIT-P8-PRIV-NEXT: stw r17, 292(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: li r4, 144 +; BE-32BIT-P8-PRIV-NEXT: stw r18, 296(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: li r4, 160 +; BE-32BIT-P8-PRIV-NEXT: stw r19, 300(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: li r4, 176 +; BE-32BIT-P8-PRIV-NEXT: stw r20, 304(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: li r4, 192 +; BE-32BIT-P8-PRIV-NEXT: stw r21, 308(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: li r4, 208 +; BE-32BIT-P8-PRIV-NEXT: stw r22, 312(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: li r4, 224 +; BE-32BIT-P8-PRIV-NEXT: stw r23, 316(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: li r4, 240 +; BE-32BIT-P8-PRIV-NEXT: stw r24, 320(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: li r4, 256 +; BE-32BIT-P8-PRIV-NEXT: stw r25, 324(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: lwz r4, 12(r3) +; BE-32BIT-P8-PRIV-NEXT: stw r26, 328(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stw r27, 332(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stw r28, 336(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stw r29, 340(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stw r30, 344(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stw r31, 348(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stfd f14, 352(r1) # 8-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stfd f15, 360(r1) # 8-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stfd f16, 368(r1) # 8-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stfd f17, 376(r1) # 8-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stfd f18, 384(r1) # 8-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stfd f19, 392(r1) # 8-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stfd f20, 400(r1) # 8-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stfd f21, 408(r1) # 8-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stfd f22, 416(r1) # 8-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stfd f23, 424(r1) # 8-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stfd f24, 432(r1) # 8-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stfd f25, 440(r1) # 8-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stfd f26, 448(r1) # 8-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stfd f27, 456(r1) # 8-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stfd f28, 464(r1) # 8-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stfd f29, 472(r1) # 8-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stfd f30, 480(r1) # 8-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stfd f31, 488(r1) # 8-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stw r3, 64(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: stw r4, 68(r1) +; BE-32BIT-P8-PRIV-NEXT: #APP +; BE-32BIT-P8-PRIV-NEXT: nop +; BE-32BIT-P8-PRIV-NEXT: #NO_APP +; BE-32BIT-P8-PRIV-NEXT: addi r3, r1, 68 +; BE-32BIT-P8-PRIV-NEXT: bl .callee2[PR] +; BE-32BIT-P8-PRIV-NEXT: nop +; BE-32BIT-P8-PRIV-NEXT: lwz r4, 64(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lfd f31, 488(r1) # 8-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lfd f30, 480(r1) # 8-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r31, 348(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lfd f29, 472(r1) # 8-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lfd f28, 464(r1) # 8-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r30, 344(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r29, 340(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r4, 16(r4) +; BE-32BIT-P8-PRIV-NEXT: lfd f27, 456(r1) # 8-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lfd f26, 448(r1) # 8-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r28, 336(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lfd f25, 440(r1) # 8-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lfd f24, 432(r1) # 8-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r27, 332(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r26, 328(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: add r3, r4, r3 +; BE-32BIT-P8-PRIV-NEXT: li r4, 256 +; BE-32BIT-P8-PRIV-NEXT: lfd f23, 424(r1) # 8-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lfd f22, 416(r1) # 8-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r25, 324(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r24, 320(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lxvd2x v31, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: li r4, 240 +; BE-32BIT-P8-PRIV-NEXT: lfd f21, 408(r1) # 8-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r23, 316(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r22, 312(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lxvd2x v30, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: li r4, 224 +; BE-32BIT-P8-PRIV-NEXT: lfd f20, 400(r1) # 8-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r21, 308(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r20, 304(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lxvd2x v29, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: li r4, 208 +; BE-32BIT-P8-PRIV-NEXT: lfd f19, 392(r1) # 8-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r19, 300(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r18, 296(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lxvd2x v28, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: li r4, 192 +; BE-32BIT-P8-PRIV-NEXT: lfd f18, 384(r1) # 8-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r17, 292(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r16, 288(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lxvd2x v27, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: li r4, 176 +; BE-32BIT-P8-PRIV-NEXT: lfd f17, 376(r1) # 8-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r15, 284(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r14, 280(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lxvd2x v26, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: li r4, 160 +; BE-32BIT-P8-PRIV-NEXT: lfd f16, 368(r1) # 8-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lwz r13, 276(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lxvd2x v25, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: li r4, 144 +; BE-32BIT-P8-PRIV-NEXT: lfd f15, 360(r1) # 8-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lxvd2x v24, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: li r4, 128 +; BE-32BIT-P8-PRIV-NEXT: lfd f14, 352(r1) # 8-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: lxvd2x v23, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: li r4, 112 +; BE-32BIT-P8-PRIV-NEXT: lxvd2x v22, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: li r4, 96 +; BE-32BIT-P8-PRIV-NEXT: lxvd2x v21, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: li r4, 80 +; BE-32BIT-P8-PRIV-NEXT: lxvd2x v20, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: addi r1, r1, 496 +; BE-32BIT-P8-PRIV-NEXT: lwz r0, 8(r1) +; BE-32BIT-P8-PRIV-NEXT: lwz r12, 4(r1) +; BE-32BIT-P8-PRIV-NEXT: mtocrf 32, r12 +; BE-32BIT-P8-PRIV-NEXT: hashchkp r0, -424(r1) +; BE-32BIT-P8-PRIV-NEXT: mtlr r0 +; BE-32BIT-P8-PRIV-NEXT: mtocrf 16, r12 +; BE-32BIT-P8-PRIV-NEXT: mtocrf 8, r12 +; BE-32BIT-P8-PRIV-NEXT: blr +entry: + %local = alloca i32, align 4 + %0 = bitcast i32* %local to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %arrayidx = getelementptr inbounds i32, i32* %in, i64 3 + %1 = load i32, i32* %arrayidx, align 4 + store i32 %1, i32* %local, align 4 + tail call void asm sideeffect "nop", "~{cr2},~{cr3},~{cr4},~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %call = call zeroext i32 @callee2(i32* nonnull %local) + %arrayidx1 = getelementptr inbounds i32, i32* %in, i64 4 + %2 = load i32, i32* %arrayidx1, align 4 + %add = add i32 %2, %call + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret i32 %add +} + +define dso_local zeroext i32 @shrinkwrap(i32* readonly %in) #0 { +; BE-P10-LABEL: shrinkwrap: +; BE-P10: # %bb.0: # %entry +; BE-P10-NEXT: cmpldi r3, 0 +; BE-P10-NEXT: beq cr0, L..BB2_2 +; BE-P10-NEXT: # %bb.1: # %if.end +; BE-P10-NEXT: mflr r0 +; BE-P10-NEXT: std r0, 16(r1) +; BE-P10-NEXT: hashst r0, -16(r1) +; BE-P10-NEXT: stdu r1, -144(r1) +; BE-P10-NEXT: std r31, 136(r1) # 8-byte Folded Spill +; BE-P10-NEXT: mr r31, r3 +; BE-P10-NEXT: lwz r3, 12(r3) +; BE-P10-NEXT: stw r3, 124(r1) +; BE-P10-NEXT: addi r3, r1, 124 +; BE-P10-NEXT: bl .callee2[PR] +; BE-P10-NEXT: nop +; BE-P10-NEXT: lwz r4, 16(r31) +; BE-P10-NEXT: ld r31, 136(r1) # 8-byte Folded Reload +; BE-P10-NEXT: add r3, r4, r3 +; BE-P10-NEXT: addi r1, r1, 144 +; BE-P10-NEXT: ld r0, 16(r1) +; BE-P10-NEXT: clrldi r3, r3, 32 +; BE-P10-NEXT: hashchk r0, -16(r1) +; BE-P10-NEXT: mtlr r0 +; BE-P10-NEXT: blr +; BE-P10-NEXT: L..BB2_2: +; BE-P10-NEXT: li r3, 0 +; BE-P10-NEXT: blr +; +; BE-P9-LABEL: shrinkwrap: +; BE-P9: # %bb.0: # %entry +; BE-P9-NEXT: cmpldi r3, 0 +; BE-P9-NEXT: beq cr0, L..BB2_2 +; BE-P9-NEXT: # %bb.1: # %if.end +; BE-P9-NEXT: mflr r0 +; BE-P9-NEXT: std r0, 16(r1) +; BE-P9-NEXT: hashst r0, -16(r1) +; BE-P9-NEXT: stdu r1, -144(r1) +; BE-P9-NEXT: std r31, 136(r1) # 8-byte Folded Spill +; BE-P9-NEXT: mr r31, r3 +; BE-P9-NEXT: lwz r3, 12(r3) +; BE-P9-NEXT: stw r3, 124(r1) +; BE-P9-NEXT: addi r3, r1, 124 +; BE-P9-NEXT: bl .callee2[PR] +; BE-P9-NEXT: nop +; BE-P9-NEXT: lwz r4, 16(r31) +; BE-P9-NEXT: ld r31, 136(r1) # 8-byte Folded Reload +; BE-P9-NEXT: add r3, r4, r3 +; BE-P9-NEXT: addi r1, r1, 144 +; BE-P9-NEXT: ld r0, 16(r1) +; BE-P9-NEXT: clrldi r3, r3, 32 +; BE-P9-NEXT: mtlr r0 +; BE-P9-NEXT: hashchk r0, -16(r1) +; BE-P9-NEXT: blr +; BE-P9-NEXT: L..BB2_2: +; BE-P9-NEXT: li r3, 0 +; BE-P9-NEXT: blr +; +; BE-P8-LABEL: shrinkwrap: +; BE-P8: # %bb.0: # %entry +; BE-P8-NEXT: cmpldi r3, 0 +; BE-P8-NEXT: beq cr0, L..BB2_2 +; BE-P8-NEXT: # %bb.1: # %if.end +; BE-P8-NEXT: mflr r0 +; BE-P8-NEXT: std r0, 16(r1) +; BE-P8-NEXT: hashst r0, -16(r1) +; BE-P8-NEXT: stdu r1, -144(r1) +; BE-P8-NEXT: std r31, 136(r1) # 8-byte Folded Spill +; BE-P8-NEXT: mr r31, r3 +; BE-P8-NEXT: lwz r3, 12(r3) +; BE-P8-NEXT: stw r3, 124(r1) +; BE-P8-NEXT: addi r3, r1, 124 +; BE-P8-NEXT: bl .callee2[PR] +; BE-P8-NEXT: nop +; BE-P8-NEXT: lwz r4, 16(r31) +; BE-P8-NEXT: ld r31, 136(r1) # 8-byte Folded Reload +; BE-P8-NEXT: add r3, r4, r3 +; BE-P8-NEXT: addi r1, r1, 144 +; BE-P8-NEXT: ld r0, 16(r1) +; BE-P8-NEXT: clrldi r3, r3, 32 +; BE-P8-NEXT: hashchk r0, -16(r1) +; BE-P8-NEXT: mtlr r0 +; BE-P8-NEXT: blr +; BE-P8-NEXT: L..BB2_2: +; BE-P8-NEXT: li r3, 0 +; BE-P8-NEXT: blr +; +; BE-32BIT-P10-LABEL: shrinkwrap: +; BE-32BIT-P10: # %bb.0: # %entry +; BE-32BIT-P10-NEXT: cmplwi r3, 0 +; BE-32BIT-P10-NEXT: beq cr0, L..BB2_2 +; BE-32BIT-P10-NEXT: # %bb.1: # %if.end +; BE-32BIT-P10-NEXT: mflr r0 +; BE-32BIT-P10-NEXT: stw r0, 8(r1) +; BE-32BIT-P10-NEXT: hashst r0, -16(r1) +; BE-32BIT-P10-NEXT: stwu r1, -80(r1) +; BE-32BIT-P10-NEXT: stw r31, 76(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: mr r31, r3 +; BE-32BIT-P10-NEXT: lwz r3, 12(r3) +; BE-32BIT-P10-NEXT: stw r3, 60(r1) +; BE-32BIT-P10-NEXT: addi r3, r1, 60 +; BE-32BIT-P10-NEXT: bl .callee2[PR] +; BE-32BIT-P10-NEXT: nop +; BE-32BIT-P10-NEXT: lwz r4, 16(r31) +; BE-32BIT-P10-NEXT: lwz r31, 76(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: add r3, r4, r3 +; BE-32BIT-P10-NEXT: addi r1, r1, 80 +; BE-32BIT-P10-NEXT: lwz r0, 8(r1) +; BE-32BIT-P10-NEXT: hashchk r0, -16(r1) +; BE-32BIT-P10-NEXT: mtlr r0 +; BE-32BIT-P10-NEXT: blr +; BE-32BIT-P10-NEXT: L..BB2_2: +; BE-32BIT-P10-NEXT: li r3, 0 +; BE-32BIT-P10-NEXT: blr +; +; BE-32BIT-P9-LABEL: shrinkwrap: +; BE-32BIT-P9: # %bb.0: # %entry +; BE-32BIT-P9-NEXT: cmplwi r3, 0 +; BE-32BIT-P9-NEXT: beq cr0, L..BB2_2 +; BE-32BIT-P9-NEXT: # %bb.1: # %if.end +; BE-32BIT-P9-NEXT: mflr r0 +; BE-32BIT-P9-NEXT: stw r0, 8(r1) +; BE-32BIT-P9-NEXT: hashst r0, -16(r1) +; BE-32BIT-P9-NEXT: stwu r1, -80(r1) +; BE-32BIT-P9-NEXT: stw r31, 76(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: mr r31, r3 +; BE-32BIT-P9-NEXT: lwz r3, 12(r3) +; BE-32BIT-P9-NEXT: stw r3, 60(r1) +; BE-32BIT-P9-NEXT: addi r3, r1, 60 +; BE-32BIT-P9-NEXT: bl .callee2[PR] +; BE-32BIT-P9-NEXT: nop +; BE-32BIT-P9-NEXT: lwz r4, 16(r31) +; BE-32BIT-P9-NEXT: lwz r31, 76(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: add r3, r4, r3 +; BE-32BIT-P9-NEXT: addi r1, r1, 80 +; BE-32BIT-P9-NEXT: lwz r0, 8(r1) +; BE-32BIT-P9-NEXT: mtlr r0 +; BE-32BIT-P9-NEXT: hashchk r0, -16(r1) +; BE-32BIT-P9-NEXT: blr +; BE-32BIT-P9-NEXT: L..BB2_2: +; BE-32BIT-P9-NEXT: li r3, 0 +; BE-32BIT-P9-NEXT: blr +; +; BE-32BIT-P8-LABEL: shrinkwrap: +; BE-32BIT-P8: # %bb.0: # %entry +; BE-32BIT-P8-NEXT: cmplwi r3, 0 +; BE-32BIT-P8-NEXT: beq cr0, L..BB2_2 +; BE-32BIT-P8-NEXT: # %bb.1: # %if.end +; BE-32BIT-P8-NEXT: mflr r0 +; BE-32BIT-P8-NEXT: stw r0, 8(r1) +; BE-32BIT-P8-NEXT: hashst r0, -16(r1) +; BE-32BIT-P8-NEXT: stwu r1, -80(r1) +; BE-32BIT-P8-NEXT: stw r31, 76(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: mr r31, r3 +; BE-32BIT-P8-NEXT: lwz r3, 12(r3) +; BE-32BIT-P8-NEXT: stw r3, 60(r1) +; BE-32BIT-P8-NEXT: addi r3, r1, 60 +; BE-32BIT-P8-NEXT: bl .callee2[PR] +; BE-32BIT-P8-NEXT: nop +; BE-32BIT-P8-NEXT: lwz r4, 16(r31) +; BE-32BIT-P8-NEXT: lwz r31, 76(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: add r3, r4, r3 +; BE-32BIT-P8-NEXT: addi r1, r1, 80 +; BE-32BIT-P8-NEXT: lwz r0, 8(r1) +; BE-32BIT-P8-NEXT: hashchk r0, -16(r1) +; BE-32BIT-P8-NEXT: mtlr r0 +; BE-32BIT-P8-NEXT: blr +; BE-32BIT-P8-NEXT: L..BB2_2: +; BE-32BIT-P8-NEXT: li r3, 0 +; BE-32BIT-P8-NEXT: blr +; +; BE-P10-PRIV-LABEL: shrinkwrap: +; BE-P10-PRIV: # %bb.0: # %entry +; BE-P10-PRIV-NEXT: cmpldi r3, 0 +; BE-P10-PRIV-NEXT: beq cr0, L..BB2_2 +; BE-P10-PRIV-NEXT: # %bb.1: # %if.end +; BE-P10-PRIV-NEXT: mflr r0 +; BE-P10-PRIV-NEXT: std r0, 16(r1) +; BE-P10-PRIV-NEXT: hashstp r0, -16(r1) +; BE-P10-PRIV-NEXT: stdu r1, -144(r1) +; BE-P10-PRIV-NEXT: std r31, 136(r1) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: mr r31, r3 +; BE-P10-PRIV-NEXT: lwz r3, 12(r3) +; BE-P10-PRIV-NEXT: stw r3, 124(r1) +; BE-P10-PRIV-NEXT: addi r3, r1, 124 +; BE-P10-PRIV-NEXT: bl .callee2[PR] +; BE-P10-PRIV-NEXT: nop +; BE-P10-PRIV-NEXT: lwz r4, 16(r31) +; BE-P10-PRIV-NEXT: ld r31, 136(r1) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: add r3, r4, r3 +; BE-P10-PRIV-NEXT: addi r1, r1, 144 +; BE-P10-PRIV-NEXT: ld r0, 16(r1) +; BE-P10-PRIV-NEXT: clrldi r3, r3, 32 +; BE-P10-PRIV-NEXT: hashchkp r0, -16(r1) +; BE-P10-PRIV-NEXT: mtlr r0 +; BE-P10-PRIV-NEXT: blr +; BE-P10-PRIV-NEXT: L..BB2_2: +; BE-P10-PRIV-NEXT: li r3, 0 +; BE-P10-PRIV-NEXT: blr +; +; BE-P9-PRIV-LABEL: shrinkwrap: +; BE-P9-PRIV: # %bb.0: # %entry +; BE-P9-PRIV-NEXT: cmpldi r3, 0 +; BE-P9-PRIV-NEXT: beq cr0, L..BB2_2 +; BE-P9-PRIV-NEXT: # %bb.1: # %if.end +; BE-P9-PRIV-NEXT: mflr r0 +; BE-P9-PRIV-NEXT: std r0, 16(r1) +; BE-P9-PRIV-NEXT: hashstp r0, -16(r1) +; BE-P9-PRIV-NEXT: stdu r1, -144(r1) +; BE-P9-PRIV-NEXT: std r31, 136(r1) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: mr r31, r3 +; BE-P9-PRIV-NEXT: lwz r3, 12(r3) +; BE-P9-PRIV-NEXT: stw r3, 124(r1) +; BE-P9-PRIV-NEXT: addi r3, r1, 124 +; BE-P9-PRIV-NEXT: bl .callee2[PR] +; BE-P9-PRIV-NEXT: nop +; BE-P9-PRIV-NEXT: lwz r4, 16(r31) +; BE-P9-PRIV-NEXT: ld r31, 136(r1) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: add r3, r4, r3 +; BE-P9-PRIV-NEXT: addi r1, r1, 144 +; BE-P9-PRIV-NEXT: ld r0, 16(r1) +; BE-P9-PRIV-NEXT: clrldi r3, r3, 32 +; BE-P9-PRIV-NEXT: mtlr r0 +; BE-P9-PRIV-NEXT: hashchkp r0, -16(r1) +; BE-P9-PRIV-NEXT: blr +; BE-P9-PRIV-NEXT: L..BB2_2: +; BE-P9-PRIV-NEXT: li r3, 0 +; BE-P9-PRIV-NEXT: blr +; +; BE-P8-PRIV-LABEL: shrinkwrap: +; BE-P8-PRIV: # %bb.0: # %entry +; BE-P8-PRIV-NEXT: cmpldi r3, 0 +; BE-P8-PRIV-NEXT: beq cr0, L..BB2_2 +; BE-P8-PRIV-NEXT: # %bb.1: # %if.end +; BE-P8-PRIV-NEXT: mflr r0 +; BE-P8-PRIV-NEXT: std r0, 16(r1) +; BE-P8-PRIV-NEXT: hashstp r0, -16(r1) +; BE-P8-PRIV-NEXT: stdu r1, -144(r1) +; BE-P8-PRIV-NEXT: std r31, 136(r1) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: mr r31, r3 +; BE-P8-PRIV-NEXT: lwz r3, 12(r3) +; BE-P8-PRIV-NEXT: stw r3, 124(r1) +; BE-P8-PRIV-NEXT: addi r3, r1, 124 +; BE-P8-PRIV-NEXT: bl .callee2[PR] +; BE-P8-PRIV-NEXT: nop +; BE-P8-PRIV-NEXT: lwz r4, 16(r31) +; BE-P8-PRIV-NEXT: ld r31, 136(r1) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: add r3, r4, r3 +; BE-P8-PRIV-NEXT: addi r1, r1, 144 +; BE-P8-PRIV-NEXT: ld r0, 16(r1) +; BE-P8-PRIV-NEXT: clrldi r3, r3, 32 +; BE-P8-PRIV-NEXT: hashchkp r0, -16(r1) +; BE-P8-PRIV-NEXT: mtlr r0 +; BE-P8-PRIV-NEXT: blr +; BE-P8-PRIV-NEXT: L..BB2_2: +; BE-P8-PRIV-NEXT: li r3, 0 +; BE-P8-PRIV-NEXT: blr +; +; BE-32BIT-P10-PRIV-LABEL: shrinkwrap: +; BE-32BIT-P10-PRIV: # %bb.0: # %entry +; BE-32BIT-P10-PRIV-NEXT: cmplwi r3, 0 +; BE-32BIT-P10-PRIV-NEXT: beq cr0, L..BB2_2 +; BE-32BIT-P10-PRIV-NEXT: # %bb.1: # %if.end +; BE-32BIT-P10-PRIV-NEXT: mflr r0 +; BE-32BIT-P10-PRIV-NEXT: stw r0, 8(r1) +; BE-32BIT-P10-PRIV-NEXT: hashstp r0, -16(r1) +; BE-32BIT-P10-PRIV-NEXT: stwu r1, -80(r1) +; BE-32BIT-P10-PRIV-NEXT: stw r31, 76(r1) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: mr r31, r3 +; BE-32BIT-P10-PRIV-NEXT: lwz r3, 12(r3) +; BE-32BIT-P10-PRIV-NEXT: stw r3, 60(r1) +; BE-32BIT-P10-PRIV-NEXT: addi r3, r1, 60 +; BE-32BIT-P10-PRIV-NEXT: bl .callee2[PR] +; BE-32BIT-P10-PRIV-NEXT: nop +; BE-32BIT-P10-PRIV-NEXT: lwz r4, 16(r31) +; BE-32BIT-P10-PRIV-NEXT: lwz r31, 76(r1) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: add r3, r4, r3 +; BE-32BIT-P10-PRIV-NEXT: addi r1, r1, 80 +; BE-32BIT-P10-PRIV-NEXT: lwz r0, 8(r1) +; BE-32BIT-P10-PRIV-NEXT: hashchkp r0, -16(r1) +; BE-32BIT-P10-PRIV-NEXT: mtlr r0 +; BE-32BIT-P10-PRIV-NEXT: blr +; BE-32BIT-P10-PRIV-NEXT: L..BB2_2: +; BE-32BIT-P10-PRIV-NEXT: li r3, 0 +; BE-32BIT-P10-PRIV-NEXT: blr +; +; BE-32BIT-P9-PRIV-LABEL: shrinkwrap: +; BE-32BIT-P9-PRIV: # %bb.0: # %entry +; BE-32BIT-P9-PRIV-NEXT: cmplwi r3, 0 +; BE-32BIT-P9-PRIV-NEXT: beq cr0, L..BB2_2 +; BE-32BIT-P9-PRIV-NEXT: # %bb.1: # %if.end +; BE-32BIT-P9-PRIV-NEXT: mflr r0 +; BE-32BIT-P9-PRIV-NEXT: stw r0, 8(r1) +; BE-32BIT-P9-PRIV-NEXT: hashstp r0, -16(r1) +; BE-32BIT-P9-PRIV-NEXT: stwu r1, -80(r1) +; BE-32BIT-P9-PRIV-NEXT: stw r31, 76(r1) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: mr r31, r3 +; BE-32BIT-P9-PRIV-NEXT: lwz r3, 12(r3) +; BE-32BIT-P9-PRIV-NEXT: stw r3, 60(r1) +; BE-32BIT-P9-PRIV-NEXT: addi r3, r1, 60 +; BE-32BIT-P9-PRIV-NEXT: bl .callee2[PR] +; BE-32BIT-P9-PRIV-NEXT: nop +; BE-32BIT-P9-PRIV-NEXT: lwz r4, 16(r31) +; BE-32BIT-P9-PRIV-NEXT: lwz r31, 76(r1) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: add r3, r4, r3 +; BE-32BIT-P9-PRIV-NEXT: addi r1, r1, 80 +; BE-32BIT-P9-PRIV-NEXT: lwz r0, 8(r1) +; BE-32BIT-P9-PRIV-NEXT: mtlr r0 +; BE-32BIT-P9-PRIV-NEXT: hashchkp r0, -16(r1) +; BE-32BIT-P9-PRIV-NEXT: blr +; BE-32BIT-P9-PRIV-NEXT: L..BB2_2: +; BE-32BIT-P9-PRIV-NEXT: li r3, 0 +; BE-32BIT-P9-PRIV-NEXT: blr +; +; BE-32BIT-P8-PRIV-LABEL: shrinkwrap: +; BE-32BIT-P8-PRIV: # %bb.0: # %entry +; BE-32BIT-P8-PRIV-NEXT: cmplwi r3, 0 +; BE-32BIT-P8-PRIV-NEXT: beq cr0, L..BB2_2 +; BE-32BIT-P8-PRIV-NEXT: # %bb.1: # %if.end +; BE-32BIT-P8-PRIV-NEXT: mflr r0 +; BE-32BIT-P8-PRIV-NEXT: stw r0, 8(r1) +; BE-32BIT-P8-PRIV-NEXT: hashstp r0, -16(r1) +; BE-32BIT-P8-PRIV-NEXT: stwu r1, -80(r1) +; BE-32BIT-P8-PRIV-NEXT: stw r31, 76(r1) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: mr r31, r3 +; BE-32BIT-P8-PRIV-NEXT: lwz r3, 12(r3) +; BE-32BIT-P8-PRIV-NEXT: stw r3, 60(r1) +; BE-32BIT-P8-PRIV-NEXT: addi r3, r1, 60 +; BE-32BIT-P8-PRIV-NEXT: bl .callee2[PR] +; BE-32BIT-P8-PRIV-NEXT: nop +; BE-32BIT-P8-PRIV-NEXT: lwz r4, 16(r31) +; BE-32BIT-P8-PRIV-NEXT: lwz r31, 76(r1) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: add r3, r4, r3 +; BE-32BIT-P8-PRIV-NEXT: addi r1, r1, 80 +; BE-32BIT-P8-PRIV-NEXT: lwz r0, 8(r1) +; BE-32BIT-P8-PRIV-NEXT: hashchkp r0, -16(r1) +; BE-32BIT-P8-PRIV-NEXT: mtlr r0 +; BE-32BIT-P8-PRIV-NEXT: blr +; BE-32BIT-P8-PRIV-NEXT: L..BB2_2: +; BE-32BIT-P8-PRIV-NEXT: li r3, 0 +; BE-32BIT-P8-PRIV-NEXT: blr +entry: + %local = alloca i32, align 4 + %tobool.not = icmp eq i32* %in, null + br i1 %tobool.not, label %return, label %if.end + +if.end: ; preds = %entry + %0 = bitcast i32* %local to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %arrayidx = getelementptr inbounds i32, i32* %in, i64 3 + %1 = load i32, i32* %arrayidx, align 4 + store i32 %1, i32* %local, align 4 + %call = call zeroext i32 @callee2(i32* nonnull %local) + %arrayidx1 = getelementptr inbounds i32, i32* %in, i64 4 + %2 = load i32, i32* %arrayidx1, align 4 + %add = add i32 %2, %call + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + br label %return + +return: ; preds = %entry, %if.end + %retval.0 = phi i32 [ %add, %if.end ], [ 0, %entry ] + ret i32 %retval.0 +} + +define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 { +; BE-P10-LABEL: aligned: +; BE-P10: # %bb.0: # %entry +; BE-P10-NEXT: mflr r0 +; BE-P10-NEXT: lis r12, -1 +; BE-P10-NEXT: std r30, -16(r1) +; BE-P10-NEXT: mr r30, r1 +; BE-P10-NEXT: std r0, 16(r1) +; BE-P10-NEXT: hashst r0, -24(r1) +; BE-P10-NEXT: clrldi r0, r1, 49 +; BE-P10-NEXT: ori r12, r12, 0 +; BE-P10-NEXT: subc r0, r12, r0 +; BE-P10-NEXT: stdux r1, r1, r0 +; BE-P10-NEXT: std r31, -8(r30) # 8-byte Folded Spill +; BE-P10-NEXT: mr r31, r3 +; BE-P10-NEXT: lwz r3, 4(r3) +; BE-P10-NEXT: lis r4, 0 +; BE-P10-NEXT: addi r5, r1, 32764 +; BE-P10-NEXT: ori r4, r4, 65508 +; BE-P10-NEXT: stwx r3, r1, r4 +; BE-P10-NEXT: lwz r3, 12(r31) +; BE-P10-NEXT: lis r4, 0 +; BE-P10-NEXT: ori r4, r4, 32768 +; BE-P10-NEXT: stwx r3, r1, r4 +; BE-P10-NEXT: lwz r3, 20(r31) +; BE-P10-NEXT: lis r4, 0 +; BE-P10-NEXT: ori r4, r4, 65508 +; BE-P10-NEXT: add r4, r1, r4 +; BE-P10-NEXT: stw r3, 32764(r1) +; BE-P10-NEXT: lis r3, 0 +; BE-P10-NEXT: ori r3, r3, 32768 +; BE-P10-NEXT: add r3, r1, r3 +; BE-P10-NEXT: bl .callee3[PR] +; BE-P10-NEXT: nop +; BE-P10-NEXT: lwz r4, 16(r31) +; BE-P10-NEXT: ld r31, -8(r30) # 8-byte Folded Reload +; BE-P10-NEXT: add r3, r4, r3 +; BE-P10-NEXT: clrldi r3, r3, 32 +; BE-P10-NEXT: mr r1, r30 +; BE-P10-NEXT: ld r0, 16(r1) +; BE-P10-NEXT: ld r30, -16(r1) +; BE-P10-NEXT: mtlr r0 +; BE-P10-NEXT: hashchk r0, -24(r1) +; BE-P10-NEXT: blr +; +; BE-P9-LABEL: aligned: +; BE-P9: # %bb.0: # %entry +; BE-P9-NEXT: mflr r0 +; BE-P9-NEXT: lis r12, -1 +; BE-P9-NEXT: std r30, -16(r1) +; BE-P9-NEXT: mr r30, r1 +; BE-P9-NEXT: ori r12, r12, 0 +; BE-P9-NEXT: std r0, 16(r1) +; BE-P9-NEXT: hashst r0, -24(r1) +; BE-P9-NEXT: clrldi r0, r1, 49 +; BE-P9-NEXT: subc r0, r12, r0 +; BE-P9-NEXT: stdux r1, r1, r0 +; BE-P9-NEXT: std r31, -8(r30) # 8-byte Folded Spill +; BE-P9-NEXT: mr r31, r3 +; BE-P9-NEXT: lwz r3, 4(r3) +; BE-P9-NEXT: lis r4, 0 +; BE-P9-NEXT: addi r5, r1, 32764 +; BE-P9-NEXT: ori r4, r4, 65508 +; BE-P9-NEXT: stwx r3, r1, r4 +; BE-P9-NEXT: lwz r3, 12(r31) +; BE-P9-NEXT: lis r4, 0 +; BE-P9-NEXT: ori r4, r4, 32768 +; BE-P9-NEXT: stwx r3, r1, r4 +; BE-P9-NEXT: lwz r3, 20(r31) +; BE-P9-NEXT: lis r4, 0 +; BE-P9-NEXT: ori r4, r4, 65508 +; BE-P9-NEXT: stw r3, 32764(r1) +; BE-P9-NEXT: lis r3, 0 +; BE-P9-NEXT: add r4, r1, r4 +; BE-P9-NEXT: ori r3, r3, 32768 +; BE-P9-NEXT: add r3, r1, r3 +; BE-P9-NEXT: bl .callee3[PR] +; BE-P9-NEXT: nop +; BE-P9-NEXT: lwz r4, 16(r31) +; BE-P9-NEXT: ld r31, -8(r30) # 8-byte Folded Reload +; BE-P9-NEXT: add r3, r4, r3 +; BE-P9-NEXT: clrldi r3, r3, 32 +; BE-P9-NEXT: mr r1, r30 +; BE-P9-NEXT: ld r0, 16(r1) +; BE-P9-NEXT: ld r30, -16(r1) +; BE-P9-NEXT: mtlr r0 +; BE-P9-NEXT: hashchk r0, -24(r1) +; BE-P9-NEXT: blr +; +; BE-P8-LABEL: aligned: +; BE-P8: # %bb.0: # %entry +; BE-P8-NEXT: mflr r0 +; BE-P8-NEXT: lis r12, -1 +; BE-P8-NEXT: std r30, -16(r1) +; BE-P8-NEXT: mr r30, r1 +; BE-P8-NEXT: std r0, 16(r1) +; BE-P8-NEXT: hashst r0, -24(r1) +; BE-P8-NEXT: clrldi r0, r1, 49 +; BE-P8-NEXT: ori r12, r12, 0 +; BE-P8-NEXT: subc r0, r12, r0 +; BE-P8-NEXT: stdux r1, r1, r0 +; BE-P8-NEXT: std r31, -8(r30) # 8-byte Folded Spill +; BE-P8-NEXT: mr r31, r3 +; BE-P8-NEXT: lwz r3, 4(r3) +; BE-P8-NEXT: lis r6, 0 +; BE-P8-NEXT: ori r6, r6, 65508 +; BE-P8-NEXT: lwz r4, 12(r31) +; BE-P8-NEXT: lwz r5, 20(r31) +; BE-P8-NEXT: stwx r3, r1, r6 +; BE-P8-NEXT: lis r3, 0 +; BE-P8-NEXT: ori r3, r3, 32768 +; BE-P8-NEXT: stw r5, 32764(r1) +; BE-P8-NEXT: addi r5, r1, 32764 +; BE-P8-NEXT: stwx r4, r1, r3 +; BE-P8-NEXT: lis r3, 0 +; BE-P8-NEXT: lis r4, 0 +; BE-P8-NEXT: ori r3, r3, 32768 +; BE-P8-NEXT: ori r4, r4, 65508 +; BE-P8-NEXT: add r3, r1, r3 +; BE-P8-NEXT: add r4, r1, r4 +; BE-P8-NEXT: bl .callee3[PR] +; BE-P8-NEXT: nop +; BE-P8-NEXT: lwz r4, 16(r31) +; BE-P8-NEXT: ld r31, -8(r30) # 8-byte Folded Reload +; BE-P8-NEXT: add r3, r4, r3 +; BE-P8-NEXT: clrldi r3, r3, 32 +; BE-P8-NEXT: mr r1, r30 +; BE-P8-NEXT: ld r0, 16(r1) +; BE-P8-NEXT: ld r30, -16(r1) +; BE-P8-NEXT: hashchk r0, -24(r1) +; BE-P8-NEXT: mtlr r0 +; BE-P8-NEXT: blr +; +; BE-32BIT-P10-LABEL: aligned: +; BE-32BIT-P10: # %bb.0: # %entry +; BE-32BIT-P10-NEXT: mflr r0 +; BE-32BIT-P10-NEXT: lis r12, -1 +; BE-32BIT-P10-NEXT: stw r30, -8(r1) +; BE-32BIT-P10-NEXT: mr r30, r1 +; BE-32BIT-P10-NEXT: stw r0, 8(r1) +; BE-32BIT-P10-NEXT: hashst r0, -16(r1) +; BE-32BIT-P10-NEXT: clrlwi r0, r1, 17 +; BE-32BIT-P10-NEXT: ori r12, r12, 0 +; BE-32BIT-P10-NEXT: subc r0, r12, r0 +; BE-32BIT-P10-NEXT: stwux r1, r1, r0 +; BE-32BIT-P10-NEXT: stw r31, -4(r30) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: mr r31, r3 +; BE-32BIT-P10-NEXT: lwz r3, 4(r3) +; BE-32BIT-P10-NEXT: lis r4, 0 +; BE-32BIT-P10-NEXT: addi r5, r1, 32764 +; BE-32BIT-P10-NEXT: ori r4, r4, 65516 +; BE-32BIT-P10-NEXT: stwx r3, r1, r4 +; BE-32BIT-P10-NEXT: lwz r3, 12(r31) +; BE-32BIT-P10-NEXT: lis r4, 0 +; BE-32BIT-P10-NEXT: ori r4, r4, 32768 +; BE-32BIT-P10-NEXT: stwx r3, r1, r4 +; BE-32BIT-P10-NEXT: lwz r3, 20(r31) +; BE-32BIT-P10-NEXT: lis r4, 0 +; BE-32BIT-P10-NEXT: ori r4, r4, 65516 +; BE-32BIT-P10-NEXT: add r4, r1, r4 +; BE-32BIT-P10-NEXT: stw r3, 32764(r1) +; BE-32BIT-P10-NEXT: lis r3, 0 +; BE-32BIT-P10-NEXT: ori r3, r3, 32768 +; BE-32BIT-P10-NEXT: add r3, r1, r3 +; BE-32BIT-P10-NEXT: bl .callee3[PR] +; BE-32BIT-P10-NEXT: nop +; BE-32BIT-P10-NEXT: lwz r4, 16(r31) +; BE-32BIT-P10-NEXT: lwz r31, -4(r30) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: add r3, r4, r3 +; BE-32BIT-P10-NEXT: mr r1, r30 +; BE-32BIT-P10-NEXT: lwz r0, 8(r1) +; BE-32BIT-P10-NEXT: lwz r30, -8(r1) +; BE-32BIT-P10-NEXT: mtlr r0 +; BE-32BIT-P10-NEXT: hashchk r0, -16(r1) +; BE-32BIT-P10-NEXT: blr +; +; BE-32BIT-P9-LABEL: aligned: +; BE-32BIT-P9: # %bb.0: # %entry +; BE-32BIT-P9-NEXT: mflr r0 +; BE-32BIT-P9-NEXT: lis r12, -1 +; BE-32BIT-P9-NEXT: stw r30, -8(r1) +; BE-32BIT-P9-NEXT: mr r30, r1 +; BE-32BIT-P9-NEXT: ori r12, r12, 0 +; BE-32BIT-P9-NEXT: stw r0, 8(r1) +; BE-32BIT-P9-NEXT: hashst r0, -16(r1) +; BE-32BIT-P9-NEXT: clrlwi r0, r1, 17 +; BE-32BIT-P9-NEXT: subc r0, r12, r0 +; BE-32BIT-P9-NEXT: stwux r1, r1, r0 +; BE-32BIT-P9-NEXT: stw r31, -4(r30) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: mr r31, r3 +; BE-32BIT-P9-NEXT: lwz r3, 4(r3) +; BE-32BIT-P9-NEXT: lis r4, 0 +; BE-32BIT-P9-NEXT: addi r5, r1, 32764 +; BE-32BIT-P9-NEXT: ori r4, r4, 65516 +; BE-32BIT-P9-NEXT: stwx r3, r1, r4 +; BE-32BIT-P9-NEXT: lwz r3, 12(r31) +; BE-32BIT-P9-NEXT: lis r4, 0 +; BE-32BIT-P9-NEXT: ori r4, r4, 32768 +; BE-32BIT-P9-NEXT: stwx r3, r1, r4 +; BE-32BIT-P9-NEXT: lwz r3, 20(r31) +; BE-32BIT-P9-NEXT: lis r4, 0 +; BE-32BIT-P9-NEXT: ori r4, r4, 65516 +; BE-32BIT-P9-NEXT: stw r3, 32764(r1) +; BE-32BIT-P9-NEXT: lis r3, 0 +; BE-32BIT-P9-NEXT: add r4, r1, r4 +; BE-32BIT-P9-NEXT: ori r3, r3, 32768 +; BE-32BIT-P9-NEXT: add r3, r1, r3 +; BE-32BIT-P9-NEXT: bl .callee3[PR] +; BE-32BIT-P9-NEXT: nop +; BE-32BIT-P9-NEXT: lwz r4, 16(r31) +; BE-32BIT-P9-NEXT: lwz r31, -4(r30) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: add r3, r4, r3 +; BE-32BIT-P9-NEXT: mr r1, r30 +; BE-32BIT-P9-NEXT: lwz r0, 8(r1) +; BE-32BIT-P9-NEXT: lwz r30, -8(r1) +; BE-32BIT-P9-NEXT: mtlr r0 +; BE-32BIT-P9-NEXT: hashchk r0, -16(r1) +; BE-32BIT-P9-NEXT: blr +; +; BE-32BIT-P8-LABEL: aligned: +; BE-32BIT-P8: # %bb.0: # %entry +; BE-32BIT-P8-NEXT: mflr r0 +; BE-32BIT-P8-NEXT: lis r12, -1 +; BE-32BIT-P8-NEXT: stw r30, -8(r1) +; BE-32BIT-P8-NEXT: mr r30, r1 +; BE-32BIT-P8-NEXT: stw r0, 8(r1) +; BE-32BIT-P8-NEXT: hashst r0, -16(r1) +; BE-32BIT-P8-NEXT: clrlwi r0, r1, 17 +; BE-32BIT-P8-NEXT: ori r12, r12, 0 +; BE-32BIT-P8-NEXT: subc r0, r12, r0 +; BE-32BIT-P8-NEXT: stwux r1, r1, r0 +; BE-32BIT-P8-NEXT: stw r31, -4(r30) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: mr r31, r3 +; BE-32BIT-P8-NEXT: lwz r3, 4(r3) +; BE-32BIT-P8-NEXT: lis r6, 0 +; BE-32BIT-P8-NEXT: ori r6, r6, 65516 +; BE-32BIT-P8-NEXT: lwz r4, 12(r31) +; BE-32BIT-P8-NEXT: lwz r5, 20(r31) +; BE-32BIT-P8-NEXT: stwx r3, r1, r6 +; BE-32BIT-P8-NEXT: lis r3, 0 +; BE-32BIT-P8-NEXT: ori r3, r3, 32768 +; BE-32BIT-P8-NEXT: stw r5, 32764(r1) +; BE-32BIT-P8-NEXT: addi r5, r1, 32764 +; BE-32BIT-P8-NEXT: stwx r4, r1, r3 +; BE-32BIT-P8-NEXT: lis r3, 0 +; BE-32BIT-P8-NEXT: lis r4, 0 +; BE-32BIT-P8-NEXT: ori r3, r3, 32768 +; BE-32BIT-P8-NEXT: ori r4, r4, 65516 +; BE-32BIT-P8-NEXT: add r3, r1, r3 +; BE-32BIT-P8-NEXT: add r4, r1, r4 +; BE-32BIT-P8-NEXT: bl .callee3[PR] +; BE-32BIT-P8-NEXT: nop +; BE-32BIT-P8-NEXT: lwz r4, 16(r31) +; BE-32BIT-P8-NEXT: lwz r31, -4(r30) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: add r3, r4, r3 +; BE-32BIT-P8-NEXT: mr r1, r30 +; BE-32BIT-P8-NEXT: lwz r0, 8(r1) +; BE-32BIT-P8-NEXT: lwz r30, -8(r1) +; BE-32BIT-P8-NEXT: hashchk r0, -16(r1) +; BE-32BIT-P8-NEXT: mtlr r0 +; BE-32BIT-P8-NEXT: blr +; +; BE-P10-PRIV-LABEL: aligned: +; BE-P10-PRIV: # %bb.0: # %entry +; BE-P10-PRIV-NEXT: mflr r0 +; BE-P10-PRIV-NEXT: lis r12, -1 +; BE-P10-PRIV-NEXT: std r30, -16(r1) +; BE-P10-PRIV-NEXT: mr r30, r1 +; BE-P10-PRIV-NEXT: std r0, 16(r1) +; BE-P10-PRIV-NEXT: hashstp r0, -24(r1) +; BE-P10-PRIV-NEXT: clrldi r0, r1, 49 +; BE-P10-PRIV-NEXT: ori r12, r12, 0 +; BE-P10-PRIV-NEXT: subc r0, r12, r0 +; BE-P10-PRIV-NEXT: stdux r1, r1, r0 +; BE-P10-PRIV-NEXT: std r31, -8(r30) # 8-byte Folded Spill +; BE-P10-PRIV-NEXT: mr r31, r3 +; BE-P10-PRIV-NEXT: lwz r3, 4(r3) +; BE-P10-PRIV-NEXT: lis r4, 0 +; BE-P10-PRIV-NEXT: addi r5, r1, 32764 +; BE-P10-PRIV-NEXT: ori r4, r4, 65508 +; BE-P10-PRIV-NEXT: stwx r3, r1, r4 +; BE-P10-PRIV-NEXT: lwz r3, 12(r31) +; BE-P10-PRIV-NEXT: lis r4, 0 +; BE-P10-PRIV-NEXT: ori r4, r4, 32768 +; BE-P10-PRIV-NEXT: stwx r3, r1, r4 +; BE-P10-PRIV-NEXT: lwz r3, 20(r31) +; BE-P10-PRIV-NEXT: lis r4, 0 +; BE-P10-PRIV-NEXT: ori r4, r4, 65508 +; BE-P10-PRIV-NEXT: add r4, r1, r4 +; BE-P10-PRIV-NEXT: stw r3, 32764(r1) +; BE-P10-PRIV-NEXT: lis r3, 0 +; BE-P10-PRIV-NEXT: ori r3, r3, 32768 +; BE-P10-PRIV-NEXT: add r3, r1, r3 +; BE-P10-PRIV-NEXT: bl .callee3[PR] +; BE-P10-PRIV-NEXT: nop +; BE-P10-PRIV-NEXT: lwz r4, 16(r31) +; BE-P10-PRIV-NEXT: ld r31, -8(r30) # 8-byte Folded Reload +; BE-P10-PRIV-NEXT: add r3, r4, r3 +; BE-P10-PRIV-NEXT: clrldi r3, r3, 32 +; BE-P10-PRIV-NEXT: mr r1, r30 +; BE-P10-PRIV-NEXT: ld r0, 16(r1) +; BE-P10-PRIV-NEXT: ld r30, -16(r1) +; BE-P10-PRIV-NEXT: mtlr r0 +; BE-P10-PRIV-NEXT: hashchkp r0, -24(r1) +; BE-P10-PRIV-NEXT: blr +; +; BE-P9-PRIV-LABEL: aligned: +; BE-P9-PRIV: # %bb.0: # %entry +; BE-P9-PRIV-NEXT: mflr r0 +; BE-P9-PRIV-NEXT: lis r12, -1 +; BE-P9-PRIV-NEXT: std r30, -16(r1) +; BE-P9-PRIV-NEXT: mr r30, r1 +; BE-P9-PRIV-NEXT: ori r12, r12, 0 +; BE-P9-PRIV-NEXT: std r0, 16(r1) +; BE-P9-PRIV-NEXT: hashstp r0, -24(r1) +; BE-P9-PRIV-NEXT: clrldi r0, r1, 49 +; BE-P9-PRIV-NEXT: subc r0, r12, r0 +; BE-P9-PRIV-NEXT: stdux r1, r1, r0 +; BE-P9-PRIV-NEXT: std r31, -8(r30) # 8-byte Folded Spill +; BE-P9-PRIV-NEXT: mr r31, r3 +; BE-P9-PRIV-NEXT: lwz r3, 4(r3) +; BE-P9-PRIV-NEXT: lis r4, 0 +; BE-P9-PRIV-NEXT: addi r5, r1, 32764 +; BE-P9-PRIV-NEXT: ori r4, r4, 65508 +; BE-P9-PRIV-NEXT: stwx r3, r1, r4 +; BE-P9-PRIV-NEXT: lwz r3, 12(r31) +; BE-P9-PRIV-NEXT: lis r4, 0 +; BE-P9-PRIV-NEXT: ori r4, r4, 32768 +; BE-P9-PRIV-NEXT: stwx r3, r1, r4 +; BE-P9-PRIV-NEXT: lwz r3, 20(r31) +; BE-P9-PRIV-NEXT: lis r4, 0 +; BE-P9-PRIV-NEXT: ori r4, r4, 65508 +; BE-P9-PRIV-NEXT: stw r3, 32764(r1) +; BE-P9-PRIV-NEXT: lis r3, 0 +; BE-P9-PRIV-NEXT: add r4, r1, r4 +; BE-P9-PRIV-NEXT: ori r3, r3, 32768 +; BE-P9-PRIV-NEXT: add r3, r1, r3 +; BE-P9-PRIV-NEXT: bl .callee3[PR] +; BE-P9-PRIV-NEXT: nop +; BE-P9-PRIV-NEXT: lwz r4, 16(r31) +; BE-P9-PRIV-NEXT: ld r31, -8(r30) # 8-byte Folded Reload +; BE-P9-PRIV-NEXT: add r3, r4, r3 +; BE-P9-PRIV-NEXT: clrldi r3, r3, 32 +; BE-P9-PRIV-NEXT: mr r1, r30 +; BE-P9-PRIV-NEXT: ld r0, 16(r1) +; BE-P9-PRIV-NEXT: ld r30, -16(r1) +; BE-P9-PRIV-NEXT: mtlr r0 +; BE-P9-PRIV-NEXT: hashchkp r0, -24(r1) +; BE-P9-PRIV-NEXT: blr +; +; BE-P8-PRIV-LABEL: aligned: +; BE-P8-PRIV: # %bb.0: # %entry +; BE-P8-PRIV-NEXT: mflr r0 +; BE-P8-PRIV-NEXT: lis r12, -1 +; BE-P8-PRIV-NEXT: std r30, -16(r1) +; BE-P8-PRIV-NEXT: mr r30, r1 +; BE-P8-PRIV-NEXT: std r0, 16(r1) +; BE-P8-PRIV-NEXT: hashstp r0, -24(r1) +; BE-P8-PRIV-NEXT: clrldi r0, r1, 49 +; BE-P8-PRIV-NEXT: ori r12, r12, 0 +; BE-P8-PRIV-NEXT: subc r0, r12, r0 +; BE-P8-PRIV-NEXT: stdux r1, r1, r0 +; BE-P8-PRIV-NEXT: std r31, -8(r30) # 8-byte Folded Spill +; BE-P8-PRIV-NEXT: mr r31, r3 +; BE-P8-PRIV-NEXT: lwz r3, 4(r3) +; BE-P8-PRIV-NEXT: lis r6, 0 +; BE-P8-PRIV-NEXT: ori r6, r6, 65508 +; BE-P8-PRIV-NEXT: lwz r4, 12(r31) +; BE-P8-PRIV-NEXT: lwz r5, 20(r31) +; BE-P8-PRIV-NEXT: stwx r3, r1, r6 +; BE-P8-PRIV-NEXT: lis r3, 0 +; BE-P8-PRIV-NEXT: ori r3, r3, 32768 +; BE-P8-PRIV-NEXT: stw r5, 32764(r1) +; BE-P8-PRIV-NEXT: addi r5, r1, 32764 +; BE-P8-PRIV-NEXT: stwx r4, r1, r3 +; BE-P8-PRIV-NEXT: lis r3, 0 +; BE-P8-PRIV-NEXT: lis r4, 0 +; BE-P8-PRIV-NEXT: ori r3, r3, 32768 +; BE-P8-PRIV-NEXT: ori r4, r4, 65508 +; BE-P8-PRIV-NEXT: add r3, r1, r3 +; BE-P8-PRIV-NEXT: add r4, r1, r4 +; BE-P8-PRIV-NEXT: bl .callee3[PR] +; BE-P8-PRIV-NEXT: nop +; BE-P8-PRIV-NEXT: lwz r4, 16(r31) +; BE-P8-PRIV-NEXT: ld r31, -8(r30) # 8-byte Folded Reload +; BE-P8-PRIV-NEXT: add r3, r4, r3 +; BE-P8-PRIV-NEXT: clrldi r3, r3, 32 +; BE-P8-PRIV-NEXT: mr r1, r30 +; BE-P8-PRIV-NEXT: ld r0, 16(r1) +; BE-P8-PRIV-NEXT: ld r30, -16(r1) +; BE-P8-PRIV-NEXT: hashchkp r0, -24(r1) +; BE-P8-PRIV-NEXT: mtlr r0 +; BE-P8-PRIV-NEXT: blr +; +; BE-32BIT-P10-PRIV-LABEL: aligned: +; BE-32BIT-P10-PRIV: # %bb.0: # %entry +; BE-32BIT-P10-PRIV-NEXT: mflr r0 +; BE-32BIT-P10-PRIV-NEXT: lis r12, -1 +; BE-32BIT-P10-PRIV-NEXT: stw r30, -8(r1) +; BE-32BIT-P10-PRIV-NEXT: mr r30, r1 +; BE-32BIT-P10-PRIV-NEXT: stw r0, 8(r1) +; BE-32BIT-P10-PRIV-NEXT: hashstp r0, -16(r1) +; BE-32BIT-P10-PRIV-NEXT: clrlwi r0, r1, 17 +; BE-32BIT-P10-PRIV-NEXT: ori r12, r12, 0 +; BE-32BIT-P10-PRIV-NEXT: subc r0, r12, r0 +; BE-32BIT-P10-PRIV-NEXT: stwux r1, r1, r0 +; BE-32BIT-P10-PRIV-NEXT: stw r31, -4(r30) # 4-byte Folded Spill +; BE-32BIT-P10-PRIV-NEXT: mr r31, r3 +; BE-32BIT-P10-PRIV-NEXT: lwz r3, 4(r3) +; BE-32BIT-P10-PRIV-NEXT: lis r4, 0 +; BE-32BIT-P10-PRIV-NEXT: addi r5, r1, 32764 +; BE-32BIT-P10-PRIV-NEXT: ori r4, r4, 65516 +; BE-32BIT-P10-PRIV-NEXT: stwx r3, r1, r4 +; BE-32BIT-P10-PRIV-NEXT: lwz r3, 12(r31) +; BE-32BIT-P10-PRIV-NEXT: lis r4, 0 +; BE-32BIT-P10-PRIV-NEXT: ori r4, r4, 32768 +; BE-32BIT-P10-PRIV-NEXT: stwx r3, r1, r4 +; BE-32BIT-P10-PRIV-NEXT: lwz r3, 20(r31) +; BE-32BIT-P10-PRIV-NEXT: lis r4, 0 +; BE-32BIT-P10-PRIV-NEXT: ori r4, r4, 65516 +; BE-32BIT-P10-PRIV-NEXT: add r4, r1, r4 +; BE-32BIT-P10-PRIV-NEXT: stw r3, 32764(r1) +; BE-32BIT-P10-PRIV-NEXT: lis r3, 0 +; BE-32BIT-P10-PRIV-NEXT: ori r3, r3, 32768 +; BE-32BIT-P10-PRIV-NEXT: add r3, r1, r3 +; BE-32BIT-P10-PRIV-NEXT: bl .callee3[PR] +; BE-32BIT-P10-PRIV-NEXT: nop +; BE-32BIT-P10-PRIV-NEXT: lwz r4, 16(r31) +; BE-32BIT-P10-PRIV-NEXT: lwz r31, -4(r30) # 4-byte Folded Reload +; BE-32BIT-P10-PRIV-NEXT: add r3, r4, r3 +; BE-32BIT-P10-PRIV-NEXT: mr r1, r30 +; BE-32BIT-P10-PRIV-NEXT: lwz r0, 8(r1) +; BE-32BIT-P10-PRIV-NEXT: lwz r30, -8(r1) +; BE-32BIT-P10-PRIV-NEXT: mtlr r0 +; BE-32BIT-P10-PRIV-NEXT: hashchkp r0, -16(r1) +; BE-32BIT-P10-PRIV-NEXT: blr +; +; BE-32BIT-P9-PRIV-LABEL: aligned: +; BE-32BIT-P9-PRIV: # %bb.0: # %entry +; BE-32BIT-P9-PRIV-NEXT: mflr r0 +; BE-32BIT-P9-PRIV-NEXT: lis r12, -1 +; BE-32BIT-P9-PRIV-NEXT: stw r30, -8(r1) +; BE-32BIT-P9-PRIV-NEXT: mr r30, r1 +; BE-32BIT-P9-PRIV-NEXT: ori r12, r12, 0 +; BE-32BIT-P9-PRIV-NEXT: stw r0, 8(r1) +; BE-32BIT-P9-PRIV-NEXT: hashstp r0, -16(r1) +; BE-32BIT-P9-PRIV-NEXT: clrlwi r0, r1, 17 +; BE-32BIT-P9-PRIV-NEXT: subc r0, r12, r0 +; BE-32BIT-P9-PRIV-NEXT: stwux r1, r1, r0 +; BE-32BIT-P9-PRIV-NEXT: stw r31, -4(r30) # 4-byte Folded Spill +; BE-32BIT-P9-PRIV-NEXT: mr r31, r3 +; BE-32BIT-P9-PRIV-NEXT: lwz r3, 4(r3) +; BE-32BIT-P9-PRIV-NEXT: lis r4, 0 +; BE-32BIT-P9-PRIV-NEXT: addi r5, r1, 32764 +; BE-32BIT-P9-PRIV-NEXT: ori r4, r4, 65516 +; BE-32BIT-P9-PRIV-NEXT: stwx r3, r1, r4 +; BE-32BIT-P9-PRIV-NEXT: lwz r3, 12(r31) +; BE-32BIT-P9-PRIV-NEXT: lis r4, 0 +; BE-32BIT-P9-PRIV-NEXT: ori r4, r4, 32768 +; BE-32BIT-P9-PRIV-NEXT: stwx r3, r1, r4 +; BE-32BIT-P9-PRIV-NEXT: lwz r3, 20(r31) +; BE-32BIT-P9-PRIV-NEXT: lis r4, 0 +; BE-32BIT-P9-PRIV-NEXT: ori r4, r4, 65516 +; BE-32BIT-P9-PRIV-NEXT: stw r3, 32764(r1) +; BE-32BIT-P9-PRIV-NEXT: lis r3, 0 +; BE-32BIT-P9-PRIV-NEXT: add r4, r1, r4 +; BE-32BIT-P9-PRIV-NEXT: ori r3, r3, 32768 +; BE-32BIT-P9-PRIV-NEXT: add r3, r1, r3 +; BE-32BIT-P9-PRIV-NEXT: bl .callee3[PR] +; BE-32BIT-P9-PRIV-NEXT: nop +; BE-32BIT-P9-PRIV-NEXT: lwz r4, 16(r31) +; BE-32BIT-P9-PRIV-NEXT: lwz r31, -4(r30) # 4-byte Folded Reload +; BE-32BIT-P9-PRIV-NEXT: add r3, r4, r3 +; BE-32BIT-P9-PRIV-NEXT: mr r1, r30 +; BE-32BIT-P9-PRIV-NEXT: lwz r0, 8(r1) +; BE-32BIT-P9-PRIV-NEXT: lwz r30, -8(r1) +; BE-32BIT-P9-PRIV-NEXT: mtlr r0 +; BE-32BIT-P9-PRIV-NEXT: hashchkp r0, -16(r1) +; BE-32BIT-P9-PRIV-NEXT: blr +; +; BE-32BIT-P8-PRIV-LABEL: aligned: +; BE-32BIT-P8-PRIV: # %bb.0: # %entry +; BE-32BIT-P8-PRIV-NEXT: mflr r0 +; BE-32BIT-P8-PRIV-NEXT: lis r12, -1 +; BE-32BIT-P8-PRIV-NEXT: stw r30, -8(r1) +; BE-32BIT-P8-PRIV-NEXT: mr r30, r1 +; BE-32BIT-P8-PRIV-NEXT: stw r0, 8(r1) +; BE-32BIT-P8-PRIV-NEXT: hashstp r0, -16(r1) +; BE-32BIT-P8-PRIV-NEXT: clrlwi r0, r1, 17 +; BE-32BIT-P8-PRIV-NEXT: ori r12, r12, 0 +; BE-32BIT-P8-PRIV-NEXT: subc r0, r12, r0 +; BE-32BIT-P8-PRIV-NEXT: stwux r1, r1, r0 +; BE-32BIT-P8-PRIV-NEXT: stw r31, -4(r30) # 4-byte Folded Spill +; BE-32BIT-P8-PRIV-NEXT: mr r31, r3 +; BE-32BIT-P8-PRIV-NEXT: lwz r3, 4(r3) +; BE-32BIT-P8-PRIV-NEXT: lis r6, 0 +; BE-32BIT-P8-PRIV-NEXT: ori r6, r6, 65516 +; BE-32BIT-P8-PRIV-NEXT: lwz r4, 12(r31) +; BE-32BIT-P8-PRIV-NEXT: lwz r5, 20(r31) +; BE-32BIT-P8-PRIV-NEXT: stwx r3, r1, r6 +; BE-32BIT-P8-PRIV-NEXT: lis r3, 0 +; BE-32BIT-P8-PRIV-NEXT: ori r3, r3, 32768 +; BE-32BIT-P8-PRIV-NEXT: stw r5, 32764(r1) +; BE-32BIT-P8-PRIV-NEXT: addi r5, r1, 32764 +; BE-32BIT-P8-PRIV-NEXT: stwx r4, r1, r3 +; BE-32BIT-P8-PRIV-NEXT: lis r3, 0 +; BE-32BIT-P8-PRIV-NEXT: lis r4, 0 +; BE-32BIT-P8-PRIV-NEXT: ori r3, r3, 32768 +; BE-32BIT-P8-PRIV-NEXT: ori r4, r4, 65516 +; BE-32BIT-P8-PRIV-NEXT: add r3, r1, r3 +; BE-32BIT-P8-PRIV-NEXT: add r4, r1, r4 +; BE-32BIT-P8-PRIV-NEXT: bl .callee3[PR] +; BE-32BIT-P8-PRIV-NEXT: nop +; BE-32BIT-P8-PRIV-NEXT: lwz r4, 16(r31) +; BE-32BIT-P8-PRIV-NEXT: lwz r31, -4(r30) # 4-byte Folded Reload +; BE-32BIT-P8-PRIV-NEXT: add r3, r4, r3 +; BE-32BIT-P8-PRIV-NEXT: mr r1, r30 +; BE-32BIT-P8-PRIV-NEXT: lwz r0, 8(r1) +; BE-32BIT-P8-PRIV-NEXT: lwz r30, -8(r1) +; BE-32BIT-P8-PRIV-NEXT: hashchkp r0, -16(r1) +; BE-32BIT-P8-PRIV-NEXT: mtlr r0 +; BE-32BIT-P8-PRIV-NEXT: blr +entry: + %beforeLocal = alloca i32, align 4 + %local = alloca i32, align 32768 + %afterLocal = alloca i32, align 4 + %0 = bitcast i32* %beforeLocal to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %arrayidx = getelementptr inbounds i32, i32* %in, i64 1 + %1 = load i32, i32* %arrayidx, align 4 + store i32 %1, i32* %beforeLocal, align 4 + %2 = bitcast i32* %local to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) + %arrayidx1 = getelementptr inbounds i32, i32* %in, i64 3 + %3 = load i32, i32* %arrayidx1, align 4 + store i32 %3, i32* %local, align 32768 + %4 = bitcast i32* %afterLocal to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %4) + %arrayidx2 = getelementptr inbounds i32, i32* %in, i64 5 + %5 = load i32, i32* %arrayidx2, align 4 + store i32 %5, i32* %afterLocal, align 4 + %call = call zeroext i32 @callee3(i32* nonnull %local, i32* nonnull %beforeLocal, i32* nonnull %afterLocal) + %arrayidx3 = getelementptr inbounds i32, i32* %in, i64 4 + %6 = load i32, i32* %arrayidx3, align 4 + %add = add i32 %6, %call + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %4) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret i32 %add +} + +declare zeroext i32 @callee(i32 zeroext) local_unnamed_addr +declare zeroext i32 @callee2(i32*) local_unnamed_addr +declare zeroext i32 @callee3(i32*, i32*, i32*) local_unnamed_addr +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll index 98791c6f2316..59f342f3cd2a 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll @@ -26,6 +26,15 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -mattr=+rop-protect < %s | FileCheck %s --check-prefix BE-P8 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mattr=+rop-protect < %s | FileCheck %s --check-prefix BE-32BIT-P10 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mattr=+rop-protect < %s | FileCheck %s --check-prefix BE-32BIT-P9 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mattr=+rop-protect < %s | FileCheck %s --check-prefix BE-32BIT-P8 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -mattr=+rop-protect -mattr=+privileged < %s | FileCheck %s --check-prefix LE-P10-PRIV @@ -231,6 +240,57 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0 ; BE-P8-NEXT: mtlr r0 ; BE-P8-NEXT: blr ; +; BE-32BIT-P10-LABEL: caller: +; BE-32BIT-P10: # %bb.0: # %entry +; BE-32BIT-P10-NEXT: mflr r0 +; BE-32BIT-P10-NEXT: stw r0, 4(r1) +; BE-32BIT-P10-NEXT: hashst r0, -16(r1) +; BE-32BIT-P10-NEXT: stwu r1, -32(r1) +; BE-32BIT-P10-NEXT: stw r30, 24(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: mr r30, r4 +; BE-32BIT-P10-NEXT: bl callee +; BE-32BIT-P10-NEXT: add r3, r3, r30 +; BE-32BIT-P10-NEXT: lwz r30, 24(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r0, 36(r1) +; BE-32BIT-P10-NEXT: addi r1, r1, 32 +; BE-32BIT-P10-NEXT: hashchk r0, -16(r1) +; BE-32BIT-P10-NEXT: mtlr r0 +; BE-32BIT-P10-NEXT: blr +; +; BE-32BIT-P9-LABEL: caller: +; BE-32BIT-P9: # %bb.0: # %entry +; BE-32BIT-P9-NEXT: mflr r0 +; BE-32BIT-P9-NEXT: stw r0, 4(r1) +; BE-32BIT-P9-NEXT: hashst r0, -16(r1) +; BE-32BIT-P9-NEXT: stwu r1, -32(r1) +; BE-32BIT-P9-NEXT: stw r30, 24(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: mr r30, r4 +; BE-32BIT-P9-NEXT: bl callee +; BE-32BIT-P9-NEXT: add r3, r3, r30 +; BE-32BIT-P9-NEXT: lwz r30, 24(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r0, 36(r1) +; BE-32BIT-P9-NEXT: addi r1, r1, 32 +; BE-32BIT-P9-NEXT: mtlr r0 +; BE-32BIT-P9-NEXT: hashchk r0, -16(r1) +; BE-32BIT-P9-NEXT: blr +; +; BE-32BIT-P8-LABEL: caller: +; BE-32BIT-P8: # %bb.0: # %entry +; BE-32BIT-P8-NEXT: mflr r0 +; BE-32BIT-P8-NEXT: stw r0, 4(r1) +; BE-32BIT-P8-NEXT: hashst r0, -16(r1) +; BE-32BIT-P8-NEXT: stwu r1, -32(r1) +; BE-32BIT-P8-NEXT: stw r30, 24(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: mr r30, r4 +; BE-32BIT-P8-NEXT: bl callee +; BE-32BIT-P8-NEXT: add r3, r3, r30 +; BE-32BIT-P8-NEXT: lwz r30, 24(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r0, 36(r1) +; BE-32BIT-P8-NEXT: addi r1, r1, 32 +; BE-32BIT-P8-NEXT: mtlr r0 +; BE-32BIT-P8-NEXT: hashchk r0, -16(r1) +; BE-32BIT-P8-NEXT: blr +; ; LE-P10-PRIV-LABEL: caller: ; LE-P10-PRIV: # %bb.0: # %entry ; LE-P10-PRIV-NEXT: mflr r0 @@ -1571,6 +1631,405 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 { ; BE-P8-NEXT: mtocrf 8, r12 ; BE-P8-NEXT: blr ; +; BE-32BIT-P10-LABEL: spill: +; BE-32BIT-P10: # %bb.0: # %entry +; BE-32BIT-P10-NEXT: mflr r0 +; BE-32BIT-P10-NEXT: stw r0, 4(r1) +; BE-32BIT-P10-NEXT: hashst r0, -424(r1) +; BE-32BIT-P10-NEXT: stwu r1, -448(r1) +; BE-32BIT-P10-NEXT: mfcr r12 +; BE-32BIT-P10-NEXT: stw r14, 232(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r15, 236(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r16, 240(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r17, 244(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r18, 248(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r19, 252(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r20, 256(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r21, 260(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r22, 264(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r23, 268(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r24, 272(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r25, 276(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r26, 280(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r27, 284(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r28, 288(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r29, 292(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r30, 296(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r31, 300(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r12, 228(r1) +; BE-32BIT-P10-NEXT: lwz r4, 12(r3) +; BE-32BIT-P10-NEXT: stfd f14, 304(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f15, 312(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f16, 320(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f17, 328(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f18, 336(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f19, 344(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f20, 352(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f21, 360(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f22, 368(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f23, 376(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f24, 384(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f25, 392(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f26, 400(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f27, 408(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f28, 416(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f29, 424(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f30, 432(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stfd f31, 440(r1) # 8-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v20, 32(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v21, 48(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v22, 64(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v23, 80(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v24, 96(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v25, 112(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v26, 128(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v27, 144(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v28, 160(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v29, 176(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v30, 192(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stxv v31, 208(r1) # 16-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r3, 16(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: stw r4, 20(r1) +; BE-32BIT-P10-NEXT: #APP +; BE-32BIT-P10-NEXT: nop +; BE-32BIT-P10-NEXT: #NO_APP +; BE-32BIT-P10-NEXT: addi r3, r1, 20 +; BE-32BIT-P10-NEXT: bl callee2 +; BE-32BIT-P10-NEXT: lwz r4, 16(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v31, 208(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v30, 192(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v29, 176(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v28, 160(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v27, 144(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v26, 128(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v25, 112(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v24, 96(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v23, 80(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v22, 64(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v21, 48(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lxv v20, 32(r1) # 16-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f31, 440(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f30, 432(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f29, 424(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f28, 416(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f27, 408(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f26, 400(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f25, 392(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f24, 384(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f23, 376(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f22, 368(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f21, 360(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f20, 352(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f19, 344(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f18, 336(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f17, 328(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f16, 320(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lfd f15, 312(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r4, 16(r4) +; BE-32BIT-P10-NEXT: add r3, r4, r3 +; BE-32BIT-P10-NEXT: lwz r12, 228(r1) +; BE-32BIT-P10-NEXT: mtocrf 32, r12 +; BE-32BIT-P10-NEXT: mtocrf 16, r12 +; BE-32BIT-P10-NEXT: mtocrf 8, r12 +; BE-32BIT-P10-NEXT: lfd f14, 304(r1) # 8-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r31, 300(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r30, 296(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r29, 292(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r28, 288(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r27, 284(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r26, 280(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r25, 276(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r24, 272(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r23, 268(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r22, 264(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r21, 260(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r20, 256(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r19, 252(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r18, 248(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r17, 244(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r16, 240(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r15, 236(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r14, 232(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r0, 452(r1) +; BE-32BIT-P10-NEXT: addi r1, r1, 448 +; BE-32BIT-P10-NEXT: hashchk r0, -424(r1) +; BE-32BIT-P10-NEXT: mtlr r0 +; BE-32BIT-P10-NEXT: blr +; +; BE-32BIT-P9-LABEL: spill: +; BE-32BIT-P9: # %bb.0: # %entry +; BE-32BIT-P9-NEXT: mflr r0 +; BE-32BIT-P9-NEXT: stw r0, 4(r1) +; BE-32BIT-P9-NEXT: hashst r0, -424(r1) +; BE-32BIT-P9-NEXT: stwu r1, -448(r1) +; BE-32BIT-P9-NEXT: mfcr r12 +; BE-32BIT-P9-NEXT: stw r14, 232(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r15, 236(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r16, 240(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r17, 244(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r18, 248(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r19, 252(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r20, 256(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r21, 260(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r22, 264(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r23, 268(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r24, 272(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r25, 276(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r26, 280(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r27, 284(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r28, 288(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r29, 292(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r30, 296(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r31, 300(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r12, 228(r1) +; BE-32BIT-P9-NEXT: stxv v20, 32(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v21, 48(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v22, 64(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v23, 80(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v24, 96(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v25, 112(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v26, 128(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v27, 144(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v28, 160(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v29, 176(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v30, 192(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: stxv v31, 208(r1) # 16-byte Folded Spill +; BE-32BIT-P9-NEXT: lwz r4, 12(r3) +; BE-32BIT-P9-NEXT: stfd f14, 304(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f15, 312(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f16, 320(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f17, 328(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f18, 336(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f19, 344(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f20, 352(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f21, 360(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f22, 368(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f23, 376(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f24, 384(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f25, 392(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f26, 400(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f27, 408(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f28, 416(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f29, 424(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f30, 432(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stfd f31, 440(r1) # 8-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r3, 16(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: stw r4, 20(r1) +; BE-32BIT-P9-NEXT: #APP +; BE-32BIT-P9-NEXT: nop +; BE-32BIT-P9-NEXT: #NO_APP +; BE-32BIT-P9-NEXT: addi r3, r1, 20 +; BE-32BIT-P9-NEXT: bl callee2 +; BE-32BIT-P9-NEXT: lwz r4, 16(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v31, 208(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v30, 192(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v29, 176(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v28, 160(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v27, 144(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v26, 128(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v25, 112(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v24, 96(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v23, 80(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v22, 64(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v21, 48(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lxv v20, 32(r1) # 16-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f31, 440(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f30, 432(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f29, 424(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f28, 416(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f27, 408(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r4, 16(r4) +; BE-32BIT-P9-NEXT: lfd f26, 400(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: add r3, r4, r3 +; BE-32BIT-P9-NEXT: lfd f25, 392(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f24, 384(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f23, 376(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f22, 368(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f21, 360(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f20, 352(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f19, 344(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f18, 336(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f17, 328(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f16, 320(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f15, 312(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r12, 228(r1) +; BE-32BIT-P9-NEXT: lwz r31, 300(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r30, 296(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r29, 292(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r28, 288(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r27, 284(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r26, 280(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r25, 276(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r24, 272(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r23, 268(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r22, 264(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r21, 260(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r20, 256(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r19, 252(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r18, 248(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r17, 244(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r16, 240(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r15, 236(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r14, 232(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lfd f14, 304(r1) # 8-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r0, 452(r1) +; BE-32BIT-P9-NEXT: mtocrf 32, r12 +; BE-32BIT-P9-NEXT: mtocrf 16, r12 +; BE-32BIT-P9-NEXT: mtocrf 8, r12 +; BE-32BIT-P9-NEXT: addi r1, r1, 448 +; BE-32BIT-P9-NEXT: mtlr r0 +; BE-32BIT-P9-NEXT: hashchk r0, -424(r1) +; BE-32BIT-P9-NEXT: blr +; +; BE-32BIT-P8-LABEL: spill: +; BE-32BIT-P8: # %bb.0: # %entry +; BE-32BIT-P8-NEXT: mflr r0 +; BE-32BIT-P8-NEXT: stw r0, 4(r1) +; BE-32BIT-P8-NEXT: hashst r0, -424(r1) +; BE-32BIT-P8-NEXT: stwu r1, -448(r1) +; BE-32BIT-P8-NEXT: mfcr r12 +; BE-32BIT-P8-NEXT: li r4, 32 +; BE-32BIT-P8-NEXT: stw r14, 232(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r15, 236(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r16, 240(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r17, 244(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r18, 248(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r19, 252(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r20, 256(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r21, 260(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r22, 264(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r23, 268(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r24, 272(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r25, 276(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r26, 280(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r27, 284(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r28, 288(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r29, 292(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r30, 296(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r31, 300(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r12, 228(r1) +; BE-32BIT-P8-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 48 +; BE-32BIT-P8-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 64 +; BE-32BIT-P8-NEXT: stfd f14, 304(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 80 +; BE-32BIT-P8-NEXT: stfd f15, 312(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 96 +; BE-32BIT-P8-NEXT: stfd f16, 320(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 112 +; BE-32BIT-P8-NEXT: stfd f17, 328(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 128 +; BE-32BIT-P8-NEXT: stfd f18, 336(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 144 +; BE-32BIT-P8-NEXT: stfd f19, 344(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 160 +; BE-32BIT-P8-NEXT: stfd f20, 352(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 176 +; BE-32BIT-P8-NEXT: stfd f21, 360(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 192 +; BE-32BIT-P8-NEXT: stfd f22, 368(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: li r4, 208 +; BE-32BIT-P8-NEXT: stfd f23, 376(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill +; BE-32BIT-P8-NEXT: lwz r4, 12(r3) +; BE-32BIT-P8-NEXT: stfd f24, 384(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f25, 392(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f26, 400(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f27, 408(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f28, 416(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f29, 424(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f30, 432(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stfd f31, 440(r1) # 8-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r3, 16(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: stw r4, 20(r1) +; BE-32BIT-P8-NEXT: #APP +; BE-32BIT-P8-NEXT: nop +; BE-32BIT-P8-NEXT: #NO_APP +; BE-32BIT-P8-NEXT: addi r3, r1, 20 +; BE-32BIT-P8-NEXT: bl callee2 +; BE-32BIT-P8-NEXT: lwz r4, 16(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lfd f31, 440(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lfd f30, 432(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lfd f29, 424(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lfd f28, 416(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r4, 16(r4) +; BE-32BIT-P8-NEXT: lfd f27, 408(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lfd f26, 400(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lfd f25, 392(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lfd f24, 384(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: add r3, r4, r3 +; BE-32BIT-P8-NEXT: li r4, 208 +; BE-32BIT-P8-NEXT: lfd f23, 376(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lfd f22, 368(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lxvd2x v31, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 192 +; BE-32BIT-P8-NEXT: lfd f21, 360(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lxvd2x v30, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 176 +; BE-32BIT-P8-NEXT: lfd f20, 352(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lxvd2x v29, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 160 +; BE-32BIT-P8-NEXT: lfd f19, 344(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lxvd2x v28, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 144 +; BE-32BIT-P8-NEXT: lfd f18, 336(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lxvd2x v27, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 128 +; BE-32BIT-P8-NEXT: lfd f17, 328(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lxvd2x v26, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 112 +; BE-32BIT-P8-NEXT: lfd f16, 320(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lxvd2x v25, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 96 +; BE-32BIT-P8-NEXT: lfd f15, 312(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lxvd2x v24, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 80 +; BE-32BIT-P8-NEXT: lxvd2x v23, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 64 +; BE-32BIT-P8-NEXT: lxvd2x v22, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 48 +; BE-32BIT-P8-NEXT: lxvd2x v21, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: li r4, 32 +; BE-32BIT-P8-NEXT: lxvd2x v20, r1, r4 # 16-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r12, 228(r1) +; BE-32BIT-P8-NEXT: lfd f14, 304(r1) # 8-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r31, 300(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r30, 296(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r29, 292(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r28, 288(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r27, 284(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r26, 280(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r25, 276(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r24, 272(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r23, 268(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: mtocrf 32, r12 +; BE-32BIT-P8-NEXT: lwz r22, 264(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r21, 260(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r20, 256(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r19, 252(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: mtocrf 16, r12 +; BE-32BIT-P8-NEXT: lwz r18, 248(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r17, 244(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r16, 240(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r15, 236(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: mtocrf 8, r12 +; BE-32BIT-P8-NEXT: lwz r14, 232(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r0, 452(r1) +; BE-32BIT-P8-NEXT: addi r1, r1, 448 +; BE-32BIT-P8-NEXT: mtlr r0 +; BE-32BIT-P8-NEXT: hashchk r0, -424(r1) +; BE-32BIT-P8-NEXT: blr +; ; LE-P10-PRIV-LABEL: spill: ; LE-P10-PRIV: # %bb.0: # %entry ; LE-P10-PRIV-NEXT: mflr r0 @@ -2668,6 +3127,90 @@ define dso_local zeroext i32 @shrinkwrap(i32* readonly %in) #0 { ; BE-P8-NEXT: li r3, 0 ; BE-P8-NEXT: blr ; +; BE-32BIT-P10-LABEL: shrinkwrap: +; BE-32BIT-P10: # %bb.0: # %entry +; BE-32BIT-P10-NEXT: mflr r0 +; BE-32BIT-P10-NEXT: stw r0, 4(r1) +; BE-32BIT-P10-NEXT: hashst r0, -16(r1) +; BE-32BIT-P10-NEXT: stwu r1, -32(r1) +; BE-32BIT-P10-NEXT: cmplwi r3, 0 +; BE-32BIT-P10-NEXT: stw r30, 24(r1) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: beq cr0, .LBB2_2 +; BE-32BIT-P10-NEXT: # %bb.1: # %if.end +; BE-32BIT-P10-NEXT: mr r30, r3 +; BE-32BIT-P10-NEXT: lwz r3, 12(r3) +; BE-32BIT-P10-NEXT: stw r3, 12(r1) +; BE-32BIT-P10-NEXT: addi r3, r1, 12 +; BE-32BIT-P10-NEXT: bl callee2 +; BE-32BIT-P10-NEXT: lwz r4, 16(r30) +; BE-32BIT-P10-NEXT: add r3, r4, r3 +; BE-32BIT-P10-NEXT: b .LBB2_3 +; BE-32BIT-P10-NEXT: .LBB2_2: +; BE-32BIT-P10-NEXT: li r3, 0 +; BE-32BIT-P10-NEXT: .LBB2_3: # %return +; BE-32BIT-P10-NEXT: lwz r30, 24(r1) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: lwz r0, 36(r1) +; BE-32BIT-P10-NEXT: addi r1, r1, 32 +; BE-32BIT-P10-NEXT: hashchk r0, -16(r1) +; BE-32BIT-P10-NEXT: mtlr r0 +; BE-32BIT-P10-NEXT: blr +; +; BE-32BIT-P9-LABEL: shrinkwrap: +; BE-32BIT-P9: # %bb.0: # %entry +; BE-32BIT-P9-NEXT: mflr r0 +; BE-32BIT-P9-NEXT: stw r0, 4(r1) +; BE-32BIT-P9-NEXT: hashst r0, -16(r1) +; BE-32BIT-P9-NEXT: stwu r1, -32(r1) +; BE-32BIT-P9-NEXT: cmplwi r3, 0 +; BE-32BIT-P9-NEXT: stw r30, 24(r1) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: beq cr0, .LBB2_2 +; BE-32BIT-P9-NEXT: # %bb.1: # %if.end +; BE-32BIT-P9-NEXT: mr r30, r3 +; BE-32BIT-P9-NEXT: lwz r3, 12(r3) +; BE-32BIT-P9-NEXT: stw r3, 12(r1) +; BE-32BIT-P9-NEXT: addi r3, r1, 12 +; BE-32BIT-P9-NEXT: bl callee2 +; BE-32BIT-P9-NEXT: lwz r4, 16(r30) +; BE-32BIT-P9-NEXT: add r3, r4, r3 +; BE-32BIT-P9-NEXT: b .LBB2_3 +; BE-32BIT-P9-NEXT: .LBB2_2: +; BE-32BIT-P9-NEXT: li r3, 0 +; BE-32BIT-P9-NEXT: .LBB2_3: # %return +; BE-32BIT-P9-NEXT: lwz r30, 24(r1) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: lwz r0, 36(r1) +; BE-32BIT-P9-NEXT: addi r1, r1, 32 +; BE-32BIT-P9-NEXT: mtlr r0 +; BE-32BIT-P9-NEXT: hashchk r0, -16(r1) +; BE-32BIT-P9-NEXT: blr +; +; BE-32BIT-P8-LABEL: shrinkwrap: +; BE-32BIT-P8: # %bb.0: # %entry +; BE-32BIT-P8-NEXT: mflr r0 +; BE-32BIT-P8-NEXT: stw r0, 4(r1) +; BE-32BIT-P8-NEXT: hashst r0, -16(r1) +; BE-32BIT-P8-NEXT: stwu r1, -32(r1) +; BE-32BIT-P8-NEXT: cmplwi r3, 0 +; BE-32BIT-P8-NEXT: stw r30, 24(r1) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: beq cr0, .LBB2_2 +; BE-32BIT-P8-NEXT: # %bb.1: # %if.end +; BE-32BIT-P8-NEXT: mr r30, r3 +; BE-32BIT-P8-NEXT: lwz r3, 12(r3) +; BE-32BIT-P8-NEXT: stw r3, 12(r1) +; BE-32BIT-P8-NEXT: addi r3, r1, 12 +; BE-32BIT-P8-NEXT: bl callee2 +; BE-32BIT-P8-NEXT: lwz r4, 16(r30) +; BE-32BIT-P8-NEXT: add r3, r4, r3 +; BE-32BIT-P8-NEXT: b .LBB2_3 +; BE-32BIT-P8-NEXT: .LBB2_2: +; BE-32BIT-P8-NEXT: li r3, 0 +; BE-32BIT-P8-NEXT: .LBB2_3: # %return +; BE-32BIT-P8-NEXT: lwz r30, 24(r1) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: lwz r0, 36(r1) +; BE-32BIT-P8-NEXT: addi r1, r1, 32 +; BE-32BIT-P8-NEXT: mtlr r0 +; BE-32BIT-P8-NEXT: hashchk r0, -16(r1) +; BE-32BIT-P8-NEXT: blr +; ; LE-P10-PRIV-LABEL: shrinkwrap: ; LE-P10-PRIV: # %bb.0: # %entry ; LE-P10-PRIV-NEXT: cmpldi r3, 0 @@ -3259,6 +3802,147 @@ define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 { ; BE-P8-NEXT: mtlr r0 ; BE-P8-NEXT: blr ; +; BE-32BIT-P10-LABEL: aligned: +; BE-32BIT-P10: # %bb.0: # %entry +; BE-32BIT-P10-NEXT: mflr r0 +; BE-32BIT-P10-NEXT: lis r12, -1 +; BE-32BIT-P10-NEXT: stw r0, 4(r1) +; BE-32BIT-P10-NEXT: hashst r0, -24(r1) +; BE-32BIT-P10-NEXT: clrlwi r0, r1, 17 +; BE-32BIT-P10-NEXT: ori r12, r12, 0 +; BE-32BIT-P10-NEXT: subc r0, r12, r0 +; BE-32BIT-P10-NEXT: stwux r1, r1, r0 +; BE-32BIT-P10-NEXT: sub r0, r1, r0 +; BE-32BIT-P10-NEXT: lis r4, 0 +; BE-32BIT-P10-NEXT: addi r5, r1, 32764 +; BE-32BIT-P10-NEXT: addic r0, r0, -8 +; BE-32BIT-P10-NEXT: ori r4, r4, 65508 +; BE-32BIT-P10-NEXT: stwx r30, 0, r0 +; BE-32BIT-P10-NEXT: addic r30, r0, 8 +; BE-32BIT-P10-NEXT: stw r29, -12(r30) # 4-byte Folded Spill +; BE-32BIT-P10-NEXT: mr r29, r3 +; BE-32BIT-P10-NEXT: lwz r3, 4(r3) +; BE-32BIT-P10-NEXT: stwx r3, r1, r4 +; BE-32BIT-P10-NEXT: lwz r3, 12(r29) +; BE-32BIT-P10-NEXT: lis r4, 0 +; BE-32BIT-P10-NEXT: ori r4, r4, 32768 +; BE-32BIT-P10-NEXT: stwx r3, r1, r4 +; BE-32BIT-P10-NEXT: lwz r3, 20(r29) +; BE-32BIT-P10-NEXT: lis r4, 0 +; BE-32BIT-P10-NEXT: ori r4, r4, 65508 +; BE-32BIT-P10-NEXT: add r4, r1, r4 +; BE-32BIT-P10-NEXT: stw r3, 32764(r1) +; BE-32BIT-P10-NEXT: lis r3, 0 +; BE-32BIT-P10-NEXT: ori r3, r3, 32768 +; BE-32BIT-P10-NEXT: add r3, r1, r3 +; BE-32BIT-P10-NEXT: bl callee3 +; BE-32BIT-P10-NEXT: lwz r4, 16(r29) +; BE-32BIT-P10-NEXT: lwz r29, -12(r30) # 4-byte Folded Reload +; BE-32BIT-P10-NEXT: mr r0, r31 +; BE-32BIT-P10-NEXT: add r3, r4, r3 +; BE-32BIT-P10-NEXT: lwz r31, 0(r1) +; BE-32BIT-P10-NEXT: lwz r30, -8(r31) +; BE-32BIT-P10-NEXT: mr r1, r31 +; BE-32BIT-P10-NEXT: mr r31, r0 +; BE-32BIT-P10-NEXT: lwz r0, 4(r1) +; BE-32BIT-P10-NEXT: hashchk r0, -24(r1) +; BE-32BIT-P10-NEXT: mtlr r0 +; BE-32BIT-P10-NEXT: blr +; +; BE-32BIT-P9-LABEL: aligned: +; BE-32BIT-P9: # %bb.0: # %entry +; BE-32BIT-P9-NEXT: mflr r0 +; BE-32BIT-P9-NEXT: lis r12, -1 +; BE-32BIT-P9-NEXT: ori r12, r12, 0 +; BE-32BIT-P9-NEXT: stw r0, 4(r1) +; BE-32BIT-P9-NEXT: hashst r0, -24(r1) +; BE-32BIT-P9-NEXT: clrlwi r0, r1, 17 +; BE-32BIT-P9-NEXT: subc r0, r12, r0 +; BE-32BIT-P9-NEXT: stwux r1, r1, r0 +; BE-32BIT-P9-NEXT: sub r0, r1, r0 +; BE-32BIT-P9-NEXT: lis r4, 0 +; BE-32BIT-P9-NEXT: addi r5, r1, 32764 +; BE-32BIT-P9-NEXT: addic r0, r0, -8 +; BE-32BIT-P9-NEXT: ori r4, r4, 65508 +; BE-32BIT-P9-NEXT: stwx r30, 0, r0 +; BE-32BIT-P9-NEXT: addic r30, r0, 8 +; BE-32BIT-P9-NEXT: stw r29, -12(r30) # 4-byte Folded Spill +; BE-32BIT-P9-NEXT: mr r29, r3 +; BE-32BIT-P9-NEXT: lwz r3, 4(r3) +; BE-32BIT-P9-NEXT: stwx r3, r1, r4 +; BE-32BIT-P9-NEXT: lwz r3, 12(r29) +; BE-32BIT-P9-NEXT: lis r4, 0 +; BE-32BIT-P9-NEXT: ori r4, r4, 32768 +; BE-32BIT-P9-NEXT: stwx r3, r1, r4 +; BE-32BIT-P9-NEXT: lwz r3, 20(r29) +; BE-32BIT-P9-NEXT: lis r4, 0 +; BE-32BIT-P9-NEXT: ori r4, r4, 65508 +; BE-32BIT-P9-NEXT: stw r3, 32764(r1) +; BE-32BIT-P9-NEXT: lis r3, 0 +; BE-32BIT-P9-NEXT: add r4, r1, r4 +; BE-32BIT-P9-NEXT: ori r3, r3, 32768 +; BE-32BIT-P9-NEXT: add r3, r1, r3 +; BE-32BIT-P9-NEXT: bl callee3 +; BE-32BIT-P9-NEXT: lwz r4, 16(r29) +; BE-32BIT-P9-NEXT: lwz r29, -12(r30) # 4-byte Folded Reload +; BE-32BIT-P9-NEXT: mr r0, r31 +; BE-32BIT-P9-NEXT: lwz r31, 0(r1) +; BE-32BIT-P9-NEXT: lwz r30, -8(r31) +; BE-32BIT-P9-NEXT: add r3, r4, r3 +; BE-32BIT-P9-NEXT: mr r1, r31 +; BE-32BIT-P9-NEXT: mr r31, r0 +; BE-32BIT-P9-NEXT: lwz r0, 4(r1) +; BE-32BIT-P9-NEXT: mtlr r0 +; BE-32BIT-P9-NEXT: hashchk r0, -24(r1) +; BE-32BIT-P9-NEXT: blr +; +; BE-32BIT-P8-LABEL: aligned: +; BE-32BIT-P8: # %bb.0: # %entry +; BE-32BIT-P8-NEXT: mflr r0 +; BE-32BIT-P8-NEXT: lis r12, -1 +; BE-32BIT-P8-NEXT: stw r0, 4(r1) +; BE-32BIT-P8-NEXT: hashst r0, -24(r1) +; BE-32BIT-P8-NEXT: clrlwi r0, r1, 17 +; BE-32BIT-P8-NEXT: ori r12, r12, 0 +; BE-32BIT-P8-NEXT: subc r0, r12, r0 +; BE-32BIT-P8-NEXT: stwux r1, r1, r0 +; BE-32BIT-P8-NEXT: sub r0, r1, r0 +; BE-32BIT-P8-NEXT: lis r6, 0 +; BE-32BIT-P8-NEXT: addic r0, r0, -8 +; BE-32BIT-P8-NEXT: ori r6, r6, 65508 +; BE-32BIT-P8-NEXT: stwx r30, 0, r0 +; BE-32BIT-P8-NEXT: addic r30, r0, 8 +; BE-32BIT-P8-NEXT: stw r29, -12(r30) # 4-byte Folded Spill +; BE-32BIT-P8-NEXT: mr r29, r3 +; BE-32BIT-P8-NEXT: lwz r3, 4(r3) +; BE-32BIT-P8-NEXT: lwz r4, 12(r29) +; BE-32BIT-P8-NEXT: lwz r5, 20(r29) +; BE-32BIT-P8-NEXT: stwx r3, r1, r6 +; BE-32BIT-P8-NEXT: lis r3, 0 +; BE-32BIT-P8-NEXT: ori r3, r3, 32768 +; BE-32BIT-P8-NEXT: stw r5, 32764(r1) +; BE-32BIT-P8-NEXT: addi r5, r1, 32764 +; BE-32BIT-P8-NEXT: stwx r4, r1, r3 +; BE-32BIT-P8-NEXT: lis r3, 0 +; BE-32BIT-P8-NEXT: lis r4, 0 +; BE-32BIT-P8-NEXT: ori r3, r3, 32768 +; BE-32BIT-P8-NEXT: ori r4, r4, 65508 +; BE-32BIT-P8-NEXT: add r3, r1, r3 +; BE-32BIT-P8-NEXT: add r4, r1, r4 +; BE-32BIT-P8-NEXT: bl callee3 +; BE-32BIT-P8-NEXT: lwz r4, 16(r29) +; BE-32BIT-P8-NEXT: lwz r29, -12(r30) # 4-byte Folded Reload +; BE-32BIT-P8-NEXT: mr r0, r31 +; BE-32BIT-P8-NEXT: lwz r31, 0(r1) +; BE-32BIT-P8-NEXT: lwz r30, -8(r31) +; BE-32BIT-P8-NEXT: add r3, r4, r3 +; BE-32BIT-P8-NEXT: mr r1, r31 +; BE-32BIT-P8-NEXT: mr r31, r0 +; BE-32BIT-P8-NEXT: lwz r0, 4(r1) +; BE-32BIT-P8-NEXT: hashchk r0, -24(r1) +; BE-32BIT-P8-NEXT: mtlr r0 +; BE-32BIT-P8-NEXT: blr +; ; LE-P10-PRIV-LABEL: aligned: ; LE-P10-PRIV: # %bb.0: # %entry ; LE-P10-PRIV-NEXT: mflr r0 From 69deb3c8297b157fffaac6043583e4c7cd0d7ee7 Mon Sep 17 00:00:00 2001 From: Sumanth Gundapaneni Date: Fri, 12 Jul 2019 15:20:15 -0500 Subject: [PATCH 732/992] [Hexagon] Update instruction info for missing .cur post-increment cases --- llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index a36be6b02caa..ca395c063479 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -3564,6 +3564,10 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const { return Hexagon::V6_vL32b_nt_cur_pi; case Hexagon::V6_vL32b_nt_ai: return Hexagon::V6_vL32b_nt_cur_ai; + case Hexagon::V6_vL32b_ppu: + return Hexagon::V6_vL32b_cur_ppu; + case Hexagon::V6_vL32b_nt_ppu: + return Hexagon::V6_vL32b_nt_cur_ppu; } return 0; } @@ -3580,6 +3584,10 @@ int HexagonInstrInfo::getNonDotCurOp(const MachineInstr &MI) const { return Hexagon::V6_vL32b_nt_pi; case Hexagon::V6_vL32b_nt_cur_ai: return Hexagon::V6_vL32b_nt_ai; + case Hexagon::V6_vL32b_cur_ppu: + return Hexagon::V6_vL32b_ppu; + case Hexagon::V6_vL32b_nt_cur_ppu: + return Hexagon::V6_vL32b_nt_ppu; } return 0; } From 9d0f5c1f8ef1c7689abd52fc0f00a2e075d276d9 Mon Sep 17 00:00:00 2001 From: Sumanth Gundapaneni Date: Wed, 28 Oct 2020 12:06:38 -0500 Subject: [PATCH 733/992] [Hexagon] Add missing memop instructions to HexagonInstrInfo This patch updated HexagonInstrInfo API to deal with missing immediate memop instructions that checks for the validity of the offset. --- llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index ca395c063479..5152060aa921 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -2836,6 +2836,8 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::L4_isub_memopw_io: case Hexagon::L4_add_memopw_io: case Hexagon::L4_sub_memopw_io: + case Hexagon::L4_iand_memopw_io: + case Hexagon::L4_ior_memopw_io: case Hexagon::L4_and_memopw_io: case Hexagon::L4_or_memopw_io: return (0 <= Offset && Offset <= 255); @@ -2844,6 +2846,8 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::L4_isub_memoph_io: case Hexagon::L4_add_memoph_io: case Hexagon::L4_sub_memoph_io: + case Hexagon::L4_iand_memoph_io: + case Hexagon::L4_ior_memoph_io: case Hexagon::L4_and_memoph_io: case Hexagon::L4_or_memoph_io: return (0 <= Offset && Offset <= 127); @@ -2852,6 +2856,8 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::L4_isub_memopb_io: case Hexagon::L4_add_memopb_io: case Hexagon::L4_sub_memopb_io: + case Hexagon::L4_iand_memopb_io: + case Hexagon::L4_ior_memopb_io: case Hexagon::L4_and_memopb_io: case Hexagon::L4_or_memopb_io: return (0 <= Offset && Offset <= 63); From f6309db719a4bb78a22a80451d1c2998b5dcda01 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 4 Mar 2021 10:08:11 -0800 Subject: [PATCH 734/992] [Hexagon] Handle L2_loadb[sz]w[24]_io in HII::isValidOffset --- llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 10 ++++++++++ llvm/lib/Target/Hexagon/HexagonPatterns.td | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 5152060aa921..57dc2f5585b4 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -2902,8 +2902,18 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::S2_pstorerdt_io: case Hexagon::S2_pstorerdf_io: return isShiftedUInt<6,3>(Offset); + + case Hexagon::L2_loadbsw2_io: + case Hexagon::L2_loadbzw2_io: + return isShiftedInt<11,1>(Offset); + + case Hexagon::L2_loadbsw4_io: + case Hexagon::L2_loadbzw4_io: + return isShiftedInt<11,2>(Offset); } // switch + dbgs() << "Failed Opcode is : " << Opcode << " (" << getName(Opcode) + << ")\n"; llvm_unreachable("No offset range is defined for this opcode. " "Please define it in the above switch statement!"); } diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index cab59626a600..3abbd896c519 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -2080,7 +2080,7 @@ let AddedComplexity = 20 in { defm: Loadxi_pat; defm: Loadxi_pat; defm: Loadxi_pat; - defm: Loadxi_pat; + defm: Loadxi_pat; defm: Loadxi_pat; defm: Loadxi_pat; defm: Loadxi_pat; @@ -2132,7 +2132,7 @@ let AddedComplexity = 60 in { def: Loadxu_pat; def: Loadxu_pat; def: Loadxu_pat; - def: Loadxu_pat; + def: Loadxu_pat; def: Loadxu_pat; def: Loadxu_pat; def: Loadxu_pat; From 3892baaa711ab00e0abcbf9f813bfe0b61110f31 Mon Sep 17 00:00:00 2001 From: Ikhlas Ajbar Date: Wed, 5 Jan 2022 13:15:14 -0800 Subject: [PATCH 735/992] [Hexagon] Replace isImmValidForOpcode() with isExtendable flag --- llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp | 16 +++------------- llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 5 +++++ 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 338fda57c53a..43afae441457 100644 --- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -1587,16 +1587,6 @@ void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) { MO.setReg(NewR); } -static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) { - // These two instructions are not extendable. - if (CmpOpc == Hexagon::A4_cmpbeqi) - return isUInt<8>(Imm); - if (CmpOpc == Hexagon::A4_cmpbgti) - return isInt<8>(Imm); - // The rest of the comparison-with-immediate instructions are extendable. - return true; -} - bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { MachineBasicBlock *Header = L->getHeader(); MachineBasicBlock *Latch = L->getLoopLatch(); @@ -1812,9 +1802,9 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { // Most comparisons of register against an immediate value allow // the immediate to be constant-extended. There are some exceptions // though. Make sure the new combination will work. - if (CmpImmOp->isImm()) - if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm)) - return false; + if (CmpImmOp->isImm() && !TII->isExtendable(*PredDef) && + !TII->isValidOffset(PredDef->getOpcode(), CmpImm, TRI, false)) + return false; // Make sure that the compare happens after the bump. Otherwise, // after the fixup, the compare would use a yet-undefined register. diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 57dc2f5585b4..dadedff4038e 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -2799,6 +2799,11 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::S4_storeirit_io: case Hexagon::S4_storeirif_io: return isShiftedUInt<6,2>(Offset); + // Handle these two compare instructions that are not extendable. + case Hexagon::A4_cmpbeqi: + return isUInt<8>(Offset); + case Hexagon::A4_cmpbgti: + return isInt<8>(Offset); } if (Extend) From 8cc52ca73491635aa15ce24821a37cb7185a36e8 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 5 Jan 2022 13:30:19 -0800 Subject: [PATCH 736/992] [instcombine] Add test coverage for (x >>u y) pred x --- llvm/test/Transforms/InstCombine/lshr.ll | 64 ++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll index a158f5ecba92..ce2e3293aff1 100644 --- a/llvm/test/Transforms/InstCombine/lshr.ll +++ b/llvm/test/Transforms/InstCombine/lshr.ll @@ -719,3 +719,67 @@ define <3 x i14> @lshr_sext_i1_to_i14_splat_vec_use1(<3 x i1> %a) { %lshr = lshr <3 x i14> %sext, ret <3 x i14> %lshr } + +define i1 @icmp_ule(i32 %x, i32 %y) { +; CHECK-LABEL: @icmp_ule( +; CHECK-NEXT: ret i1 true +; + %x.shifted = lshr i32 %x, %y + %cmp = icmp ule i32 %x.shifted, %x + ret i1 %cmp +} + +define i1 @icmp_ult(i32 %x, i32 %y) { +; CHECK-LABEL: @icmp_ult( +; CHECK-NEXT: [[X_SHIFTED:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X_SHIFTED]], [[X]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.shifted = lshr i32 %x, %y + %cmp = icmp ult i32 %x.shifted, %x + ret i1 %cmp +} + +define i1 @icmp_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @icmp_eq( +; CHECK-NEXT: [[X_SHIFTED:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X_SHIFTED]], [[X]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.shifted = lshr i32 %x, %y + %cmp = icmp eq i32 %x.shifted, %x + ret i1 %cmp +} + +define i1 @icmp_ne(i32 %x, i32 %y) { +; CHECK-LABEL: @icmp_ne( +; CHECK-NEXT: [[X_SHIFTED:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[X_SHIFTED]], [[X]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.shifted = lshr i32 %x, %y + %cmp = icmp ne i32 %x.shifted, %x + ret i1 %cmp +} + +define i1 @icmp_ugt(i32 %x, i32 %y) { +; CHECK-LABEL: @icmp_ugt( +; CHECK-NEXT: ret i1 false +; + %x.shifted = lshr i32 %x, %y + %cmp = icmp ugt i32 %x.shifted, %x + ret i1 %cmp +} + +define i1 @icmp_uge(i32 %x, i32 %y) { +; CHECK-LABEL: @icmp_uge( +; CHECK-NEXT: [[X_SHIFTED:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[X_SHIFTED]], [[X]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.shifted = lshr i32 %x, %y + %cmp = icmp uge i32 %x.shifted, %x + ret i1 %cmp +} + + From cfcd7af8deb8a02c3832e211905dcb30dd04dc1d Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 5 Jan 2022 13:37:17 -0800 Subject: [PATCH 737/992] [instcombine] Add test coverage for (x >>u y) pred x [part 2] --- llvm/test/Transforms/InstCombine/lshr.ll | 42 ++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll index ce2e3293aff1..1862975975e6 100644 --- a/llvm/test/Transforms/InstCombine/lshr.ll +++ b/llvm/test/Transforms/InstCombine/lshr.ll @@ -782,4 +782,46 @@ define i1 @icmp_uge(i32 %x, i32 %y) { ret i1 %cmp } +define i1 @icmp_sle(i32 %x, i32 %y) { +; CHECK-LABEL: @icmp_sle( +; CHECK-NEXT: [[X_SHIFTED:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[X_SHIFTED]], [[X]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.shifted = lshr i32 %x, %y + %cmp = icmp sle i32 %x.shifted, %x + ret i1 %cmp +} + +define i1 @icmp_slt(i32 %x, i32 %y) { +; CHECK-LABEL: @icmp_slt( +; CHECK-NEXT: [[X_SHIFTED:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X_SHIFTED]], [[X]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.shifted = lshr i32 %x, %y + %cmp = icmp slt i32 %x.shifted, %x + ret i1 %cmp +} +define i1 @icmp_sgt(i32 %x, i32 %y) { +; CHECK-LABEL: @icmp_sgt( +; CHECK-NEXT: [[X_SHIFTED:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X_SHIFTED]], [[X]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.shifted = lshr i32 %x, %y + %cmp = icmp sgt i32 %x.shifted, %x + ret i1 %cmp +} + +define i1 @icmp_sge(i32 %x, i32 %y) { +; CHECK-LABEL: @icmp_sge( +; CHECK-NEXT: [[X_SHIFTED:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[X_SHIFTED]], [[X]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.shifted = lshr i32 %x, %y + %cmp = icmp sge i32 %x.shifted, %x + ret i1 %cmp +} From cdbad62c526c5b7e13f634b9d6bc54f2a01aabc0 Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Wed, 5 Jan 2022 13:48:50 -0800 Subject: [PATCH 738/992] [ADCE][NFC] Batch DT updates together This patch delayed the updates of the dominator tree to the very end of the pass instead of doing that in small increments after each basic block. This improves the runtime of the pass in particular in pathological cases because now the updater sees the full extend of the updates and can decide whether it is faster to apply the changes incrementally or just recompute the full tree from scratch. Put differently, thanks to this patch, we can take advantage of the improvements that Chijun Sima made in the dominator tree updater a while ago with commit 32fd196cbf4d: "Teach the DominatorTree fallback to recalculation when applying updates to speedup JT (PR37929)". This change is NFC but can improve the runtime of the compiler dramatically in some pathological cases (where the pass was pushing a lot (several thousands) of small updates (less than 6)). For instance on the motivating example we went from 300+ sec to less than a second. Differential Revision: https://reviews.llvm.org/D116610 --- llvm/lib/Transforms/Scalar/ADCE.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/ADCE.cpp b/llvm/lib/Transforms/Scalar/ADCE.cpp index b693acceb3f6..1cda206a7e14 100644 --- a/llvm/lib/Transforms/Scalar/ADCE.cpp +++ b/llvm/lib/Transforms/Scalar/ADCE.cpp @@ -579,6 +579,7 @@ bool AggressiveDeadCodeElimination::updateDeadRegions() { // Don't compute the post ordering unless we needed it. bool HavePostOrder = false; bool Changed = false; + SmallVector DeletedEdges; for (auto *BB : BlocksWithDeadTerminators) { auto &Info = BlockInfo[BB]; @@ -617,7 +618,6 @@ bool AggressiveDeadCodeElimination::updateDeadRegions() { makeUnconditional(BB, PreferredSucc->BB); // Inform the dominators about the deleted CFG edges. - SmallVector DeletedEdges; for (auto *Succ : RemovedSuccessors) { // It might have happened that the same successor appeared multiple times // and the CFG edge wasn't really removed. @@ -629,13 +629,14 @@ bool AggressiveDeadCodeElimination::updateDeadRegions() { } } - DomTreeUpdater(DT, &PDT, DomTreeUpdater::UpdateStrategy::Eager) - .applyUpdates(DeletedEdges); - NumBranchesRemoved += 1; Changed = true; } + if (!DeletedEdges.empty()) + DomTreeUpdater(DT, &PDT, DomTreeUpdater::UpdateStrategy::Eager) + .applyUpdates(DeletedEdges); + return Changed; } From 2819e5de42e03f100c9a90c1328465e99d28ee5d Mon Sep 17 00:00:00 2001 From: Ikhlas Ajbar Date: Thu, 16 Apr 2020 15:56:56 -0500 Subject: [PATCH 739/992] [Hexagon] Handle instruction selection for select(I1,Q,Q) Lower select(I1,Q,Q) by converting vector predicate Q to vector register V, doing select(I1,V,V), and then converting the resulting V back to Q. Also, try to avoid creating such situations in the first place. --- .../Target/Hexagon/HexagonISelDAGToDAG.cpp | 3 ++ llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 6 ++++ .../CodeGen/Hexagon/select-vector-pred.ll | 30 +++++++++++++++++++ 3 files changed, 39 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/select-vector-pred.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 2679e399852f..0225cc5f506a 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -1176,6 +1176,9 @@ void HexagonDAGToDAGISel::ppHoistZextI1(std::vector &&Nodes) { EVT UVT = U->getValueType(0); if (!UVT.isSimple() || !UVT.isInteger() || UVT.getSimpleVT() == MVT::i1) continue; + // Do not generate select for all i1 vector type. + if (UVT.isVector() && UVT.getVectorElementType() == MVT::i1) + continue; if (isMemOPCandidate(N, U)) continue; diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index 2e739d6e06f8..0a3dff057ccd 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -736,6 +736,12 @@ let Predicates = [UseHVX] in { def: HvxSel_pat; } +def V2Q: OutPatFrag<(ops node:$Vs), (V6_vandvrt $Vs, (A2_tfrsi -1))>; + +let Predicates = [UseHVX] in + def: Pat<(select I1:$Pu, VecI1:$Qs, VecI1:$Qt), + (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>; + let Predicates = [UseHVX] in { def: Pat<(VecQ8 (qtrue)), (PS_qtrue)>; def: Pat<(VecQ16 (qtrue)), (PS_qtrue)>; diff --git a/llvm/test/CodeGen/Hexagon/select-vector-pred.ll b/llvm/test/CodeGen/Hexagon/select-vector-pred.ll new file mode 100644 index 000000000000..58a052cc3701 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/select-vector-pred.ll @@ -0,0 +1,30 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b < %s | FileCheck %s + +; Do not generate selectI1,Q,Q. +; CHECK: q[[Q:[0-9]+]] = vsetq(r{{[0-9]+}}) +; CHECK: q{{[0-9]+}} = and(q{{[0-9]+}},q[[Q]]) +; CHECK-NOT: v{{[0-9]+}} = vand(q{{[0-9]+}},r{{[0-9]+}}) + +target triple = "hexagon" + +declare void @llvm.hexagon.V6.vS32b.qpred.ai.128B(<128 x i1>, i8*, <32 x i32>) #0 +declare <128 x i1> @llvm.hexagon.V6.pred.scalar2.128B(i32) #1 +declare <128 x i1> @llvm.hexagon.V6.pred.and.128B(<128 x i1>, <128 x i1>) #1 + +define void @libjit_convertFromD32_sm_hf_wrap_3_specialized(i16* %0) local_unnamed_addr #2 { +entry: + %arrayidx55.i.i = getelementptr inbounds i16, i16* %0, i32 undef + %1 = ptrtoint i16* %arrayidx55.i.i to i32 + %and.i5.i.i = and i32 %1, 127 + %2 = icmp eq i32 %and.i5.i.i, 127 + %.sroa.speculated.i13.i.i = zext i1 %2 to i32 + %3 = tail call <128 x i1> @llvm.hexagon.V6.pred.scalar2.128B(i32 %.sroa.speculated.i13.i.i) #3 + %4 = tail call <128 x i1> @llvm.hexagon.V6.pred.and.128B(<128 x i1> undef, <128 x i1> %3) #3 + tail call void @llvm.hexagon.V6.vS32b.qpred.ai.128B(<128 x i1> %4, i8* nonnull undef, <32 x i32> undef) #3 + unreachable + } + +attributes #0 = { nounwind writeonly } +attributes #1 = { nounwind readnone } +attributes #2 = { "use-soft-float"="false" } +attributes #3 = { nounwind } From 46a28a954e5e813f64ff458891b2d0891963c7d8 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Wed, 5 Jan 2022 14:42:21 -0800 Subject: [PATCH 740/992] [lldb] Create a property to store the REPL language Until the introduction of the C++ REPL, there was always a single REPL language. Several places relied on this assumption through repl_languages.GetSingularLanguage. Now that this is no longer the case, we need a way to specify a selected/preferred REPL language. This patch does that with the help of a debugger property, taking inspiration from how we store the scripting language. Differential revision: https://reviews.llvm.org/D116697 --- lldb/include/lldb/API/SBDebugger.h | 4 ++++ lldb/include/lldb/Core/Debugger.h | 4 ++++ .../lldb/Interpreter/OptionValueProperties.h | 3 +++ lldb/source/API/SBDebugger.cpp | 19 ++++++++++++++++ lldb/source/Core/CoreProperties.td | 4 ++++ lldb/source/Core/Debugger.cpp | 22 +++++++++++++++++-- .../source/Interpreter/CommandInterpreter.cpp | 18 ++++++++------- .../Interpreter/OptionValueProperties.cpp | 11 ++++++++++ lldb/source/Target/Target.cpp | 7 ++++-- lldb/tools/driver/Driver.cpp | 1 + 10 files changed, 81 insertions(+), 12 deletions(-) diff --git a/lldb/include/lldb/API/SBDebugger.h b/lldb/include/lldb/API/SBDebugger.h index 1c771330cddc..a82c147053eb 100644 --- a/lldb/include/lldb/API/SBDebugger.h +++ b/lldb/include/lldb/API/SBDebugger.h @@ -306,6 +306,10 @@ class LLDB_API SBDebugger { void SetScriptLanguage(lldb::ScriptLanguage script_lang); + lldb::LanguageType GetREPLLanguage() const; + + void SetREPLLanguage(lldb::LanguageType repl_lang); + bool GetCloseInputOnEOF() const; void SetCloseInputOnEOF(bool b); diff --git a/lldb/include/lldb/Core/Debugger.h b/lldb/include/lldb/Core/Debugger.h index 1ab21bec54c9..f9a1f1eea54f 100644 --- a/lldb/include/lldb/Core/Debugger.h +++ b/lldb/include/lldb/Core/Debugger.h @@ -306,6 +306,10 @@ class Debugger : public std::enable_shared_from_this, bool SetScriptLanguage(lldb::ScriptLanguage script_lang); + lldb::LanguageType GetREPLLanguage() const; + + bool SetREPLLanguage(lldb::LanguageType repl_lang); + uint32_t GetTerminalWidth() const; bool SetTerminalWidth(uint32_t term_width); diff --git a/lldb/include/lldb/Interpreter/OptionValueProperties.h b/lldb/include/lldb/Interpreter/OptionValueProperties.h index 6fa5403ac142..3e5685b7f0bf 100644 --- a/lldb/include/lldb/Interpreter/OptionValueProperties.h +++ b/lldb/include/lldb/Interpreter/OptionValueProperties.h @@ -114,6 +114,9 @@ class OptionValueProperties GetPropertyAtIndexAsOptionValueLanguage(const ExecutionContext *exe_ctx, uint32_t idx) const; + bool SetPropertyAtIndexAsLanguage(const ExecutionContext *exe_ctx, + uint32_t idx, lldb::LanguageType lang); + bool GetPropertyAtIndexAsArgs(const ExecutionContext *exe_ctx, uint32_t idx, Args &args) const; diff --git a/lldb/source/API/SBDebugger.cpp b/lldb/source/API/SBDebugger.cpp index fa5dcb57de7e..c8522b79ad89 100644 --- a/lldb/source/API/SBDebugger.cpp +++ b/lldb/source/API/SBDebugger.cpp @@ -1425,6 +1425,22 @@ void SBDebugger::SetScriptLanguage(ScriptLanguage script_lang) { } } +LanguageType SBDebugger::GetREPLLanguage() const { + LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::LanguageType, SBDebugger, + GetREPLLanguage); + + return (m_opaque_sp ? m_opaque_sp->GetREPLLanguage() : eLanguageTypeUnknown); +} + +void SBDebugger::SetREPLLanguage(LanguageType repl_lang) { + LLDB_RECORD_METHOD(void, SBDebugger, SetREPLLanguage, (lldb::LanguageType), + repl_lang); + + if (m_opaque_sp) { + m_opaque_sp->SetREPLLanguage(repl_lang); + } +} + bool SBDebugger::SetUseExternalEditor(bool value) { LLDB_RECORD_METHOD(bool, SBDebugger, SetUseExternalEditor, (bool), value); @@ -1870,6 +1886,9 @@ template <> void RegisterMethods(Registry &R) { GetScriptLanguage, ()); LLDB_REGISTER_METHOD(void, SBDebugger, SetScriptLanguage, (lldb::ScriptLanguage)); + LLDB_REGISTER_METHOD_CONST(lldb::LanguageType, SBDebugger, GetREPLLanguage, + ()); + LLDB_REGISTER_METHOD(void, SBDebugger, SetREPLLanguage, (lldb::LanguageType)); LLDB_REGISTER_METHOD(bool, SBDebugger, SetUseExternalEditor, (bool)); LLDB_REGISTER_METHOD(bool, SBDebugger, GetUseExternalEditor, ()); LLDB_REGISTER_METHOD(bool, SBDebugger, SetUseColor, (bool)); diff --git a/lldb/source/Core/CoreProperties.td b/lldb/source/Core/CoreProperties.td index 038ed00905f1..399407075a7e 100644 --- a/lldb/source/Core/CoreProperties.td +++ b/lldb/source/Core/CoreProperties.td @@ -62,6 +62,10 @@ let Definition = "debugger" in { DefaultEnumValue<"eScriptLanguagePython">, EnumValues<"OptionEnumValues(g_language_enumerators)">, Desc<"The script language to be used for evaluating user-written scripts.">; + def REPLLanguage: Property<"repl-lang", "Language">, + Global, + DefaultEnumValue<"eLanguageTypeUnknown">, + Desc<"The language to use for the REPL.">; def StopDisassemblyCount: Property<"stop-disassembly-count", "SInt64">, Global, DefaultUnsignedValue<4>, diff --git a/lldb/source/Core/Debugger.cpp b/lldb/source/Core/Debugger.cpp index ae454fae3322..49cc21b65951 100644 --- a/lldb/source/Core/Debugger.cpp +++ b/lldb/source/Core/Debugger.cpp @@ -25,6 +25,7 @@ #include "lldb/Interpreter/CommandInterpreter.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionValue.h" +#include "lldb/Interpreter/OptionValueLanguage.h" #include "lldb/Interpreter/OptionValueProperties.h" #include "lldb/Interpreter/OptionValueSInt64.h" #include "lldb/Interpreter/OptionValueString.h" @@ -324,6 +325,20 @@ bool Debugger::SetScriptLanguage(lldb::ScriptLanguage script_lang) { script_lang); } +lldb::LanguageType Debugger::GetREPLLanguage() const { + const uint32_t idx = ePropertyREPLLanguage; + OptionValueLanguage *value = + m_collection_sp->GetPropertyAtIndexAsOptionValueLanguage(nullptr, idx); + if (value) + return value->GetCurrentValue(); + return LanguageType(); +} + +bool Debugger::SetREPLLanguage(lldb::LanguageType repl_lang) { + const uint32_t idx = ePropertyREPLLanguage; + return m_collection_sp->SetPropertyAtIndexAsLanguage(nullptr, idx, repl_lang); +} + uint32_t Debugger::GetTerminalWidth() const { const uint32_t idx = ePropertyTerminalWidth; return m_collection_sp->GetPropertyAtIndexAsSInt64( @@ -1753,17 +1768,20 @@ Status Debugger::RunREPL(LanguageType language, const char *repl_options) { Status err; FileSpec repl_executable; + if (language == eLanguageTypeUnknown) + language = GetREPLLanguage(); + if (language == eLanguageTypeUnknown) { LanguageSet repl_languages = Language::GetLanguagesSupportingREPLs(); if (auto single_lang = repl_languages.GetSingularLanguage()) { language = *single_lang; } else if (repl_languages.Empty()) { - err.SetErrorStringWithFormat( + err.SetErrorString( "LLDB isn't configured with REPL support for any languages."); return err; } else { - err.SetErrorStringWithFormat( + err.SetErrorString( "Multiple possible REPL languages. Please specify a language."); return err; } diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index bd03f18b47c0..085b06bce0ea 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -2259,13 +2259,15 @@ static void GetHomeInitFile(llvm::SmallVectorImpl &init_file, FileSystem::Instance().Resolve(init_file); } -static void GetHomeREPLInitFile(llvm::SmallVectorImpl &init_file) { - LanguageSet repl_languages = Language::GetLanguagesSupportingREPLs(); - LanguageType language = eLanguageTypeUnknown; - if (auto main_repl_language = repl_languages.GetSingularLanguage()) - language = *main_repl_language; - else - return; +static void GetHomeREPLInitFile(llvm::SmallVectorImpl &init_file, + LanguageType language) { + if (language == eLanguageTypeUnknown) { + LanguageSet repl_languages = Language::GetLanguagesSupportingREPLs(); + if (auto main_repl_language = repl_languages.GetSingularLanguage()) + language = *main_repl_language; + else + return; + } std::string init_file_name = (llvm::Twine(".lldbinit-") + @@ -2355,7 +2357,7 @@ void CommandInterpreter::SourceInitFileHome(CommandReturnObject &result, llvm::SmallString<128> init_file; if (is_repl) - GetHomeREPLInitFile(init_file); + GetHomeREPLInitFile(init_file, GetDebugger().GetREPLLanguage()); if (init_file.empty()) GetHomeInitFile(init_file); diff --git a/lldb/source/Interpreter/OptionValueProperties.cpp b/lldb/source/Interpreter/OptionValueProperties.cpp index 1a8f2f0ab180..6e6580574edf 100644 --- a/lldb/source/Interpreter/OptionValueProperties.cpp +++ b/lldb/source/Interpreter/OptionValueProperties.cpp @@ -226,6 +226,17 @@ OptionValueProperties::GetPropertyAtIndexAsOptionValueLanguage( return nullptr; } +bool OptionValueProperties::SetPropertyAtIndexAsLanguage( + const ExecutionContext *exe_ctx, uint32_t idx, const LanguageType lang) { + const Property *property = GetPropertyAtIndex(exe_ctx, true, idx); + if (property) { + OptionValue *value = property->GetValue().get(); + if (value) + return value->SetLanguageValue(lang); + } + return false; +} + bool OptionValueProperties::GetPropertyAtIndexAsArgs( const ExecutionContext *exe_ctx, uint32_t idx, Args &args) const { const Property *property = GetPropertyAtIndex(exe_ctx, false, idx); diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index fa860399aca7..01e51c0577aa 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -212,17 +212,20 @@ const lldb::ProcessSP &Target::GetProcessSP() const { return m_process_sp; } lldb::REPLSP Target::GetREPL(Status &err, lldb::LanguageType language, const char *repl_options, bool can_create) { + if (language == eLanguageTypeUnknown) + language = m_debugger.GetREPLLanguage(); + if (language == eLanguageTypeUnknown) { LanguageSet repl_languages = Language::GetLanguagesSupportingREPLs(); if (auto single_lang = repl_languages.GetSingularLanguage()) { language = *single_lang; } else if (repl_languages.Empty()) { - err.SetErrorStringWithFormat( + err.SetErrorString( "LLDB isn't configured with REPL support for any languages."); return REPLSP(); } else { - err.SetErrorStringWithFormat( + err.SetErrorString( "Multiple possible REPL languages. Please specify a language."); return REPLSP(); } diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp index 2ed9958e51da..233e0dd977d3 100644 --- a/lldb/tools/driver/Driver.cpp +++ b/lldb/tools/driver/Driver.cpp @@ -296,6 +296,7 @@ SBError Driver::ProcessArgs(const opt::InputArgList &args, bool &exiting) { arg_value); return error; } + m_debugger.SetREPLLanguage(m_option_data.m_repl_lang); } if (args.hasArg(OPT_repl)) { From 4e173585f6cf1c55b4ed450bce1a9ae11ecbbb2b Mon Sep 17 00:00:00 2001 From: Andrew Browne Date: Wed, 22 Dec 2021 23:54:26 -0800 Subject: [PATCH 741/992] [DFSan] Add option for conditional callbacks. This allows DFSan to find tainted values used to control program behavior. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D116207 --- .../include/sanitizer/dfsan_interface.h | 17 +++ compiler-rt/lib/dfsan/dfsan.cpp | 55 +++++++++ compiler-rt/lib/dfsan/done_abilist.txt | 4 + .../test/dfsan/Inputs/flags_abilist.txt | 6 + .../test/dfsan/conditional_callbacks.c | 110 ++++++++++++++++++ .../test/dfsan/conditional_callbacks_sig.c | 98 ++++++++++++++++ .../Instrumentation/DataFlowSanitizer.cpp | 64 ++++++++++ 7 files changed, 354 insertions(+) create mode 100644 compiler-rt/test/dfsan/conditional_callbacks.c create mode 100644 compiler-rt/test/dfsan/conditional_callbacks_sig.c diff --git a/compiler-rt/include/sanitizer/dfsan_interface.h b/compiler-rt/include/sanitizer/dfsan_interface.h index bc0652c99a14..8e581a67572d 100644 --- a/compiler-rt/include/sanitizer/dfsan_interface.h +++ b/compiler-rt/include/sanitizer/dfsan_interface.h @@ -27,6 +27,10 @@ typedef uint32_t dfsan_origin; /// Signature of the callback argument to dfsan_set_write_callback(). typedef void (*dfsan_write_callback_t)(int fd, const void *buf, size_t count); +/// Signature of the callback argument to dfsan_set_conditional_callback(). +typedef void (*dfsan_conditional_callback_t)(dfsan_label label, + dfsan_origin origin); + /// Computes the union of \c l1 and \c l2, resulting in a union label. dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2); @@ -74,6 +78,19 @@ void dfsan_flush(void); /// callback executes. Pass in NULL to remove any callback. void dfsan_set_write_callback(dfsan_write_callback_t labeled_write_callback); +/// Sets a callback to be invoked on any conditional expressions which have a +/// taint label set. This can be used to find where tainted data influences +/// the behavior of the program. +/// These callbacks will only be added when -dfsan-conditional-callbacks=true. +void dfsan_set_conditional_callback(dfsan_conditional_callback_t callback); + +/// Conditional expressions occur during signal handlers. +/// Making callbacks that handle signals well is tricky, so when +/// -dfsan-conditional-callbacks=true, conditional expressions used in signal +/// handlers will add the labels they see into a global (bitwise-or together). +/// This function returns all label bits seen in signal handler conditions. +dfsan_label dfsan_get_labels_in_signal_conditional(); + /// Interceptor hooks. /// Whenever a dfsan's custom function is called the corresponding /// hook is called it non-zero. The hooks should be defined by the user. diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp index ee7221c7b9a8..c8a3bdca06f8 100644 --- a/compiler-rt/lib/dfsan/dfsan.cpp +++ b/compiler-rt/lib/dfsan/dfsan.cpp @@ -600,6 +600,60 @@ dfsan_has_label(dfsan_label label, dfsan_label elem) { return (label & elem) == elem; } +namespace __dfsan { + +typedef void (*dfsan_conditional_callback_t)(dfsan_label label, + dfsan_origin origin); +static dfsan_conditional_callback_t conditional_callback = nullptr; +static dfsan_label labels_in_signal_conditional = 0; + +static void ConditionalCallback(dfsan_label label, dfsan_origin origin) { + // Programs have many branches. For efficiency the conditional sink callback + // handler needs to ignore as many as possible as early as possible. + if (label == 0) { + return; + } + if (conditional_callback == nullptr) { + return; + } + + // This initial ConditionalCallback handler needs to be in here in dfsan + // runtime (rather than being an entirely user implemented hook) so that it + // has access to dfsan thread information. + DFsanThread *t = GetCurrentThread(); + // A callback operation which does useful work (like record the flow) will + // likely be too long executed in a signal handler. + if (t && t->InSignalHandler()) { + // Record set of labels used in signal handler for completeness. + labels_in_signal_conditional |= label; + return; + } + + conditional_callback(label, origin); +} + +} // namespace __dfsan + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void +__dfsan_conditional_callback_origin(dfsan_label label, dfsan_origin origin) { + __dfsan::ConditionalCallback(label, origin); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_conditional_callback( + dfsan_label label) { + __dfsan::ConditionalCallback(label, 0); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_set_conditional_callback( + __dfsan::dfsan_conditional_callback_t callback) { + __dfsan::conditional_callback = callback; +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label +dfsan_get_labels_in_signal_conditional() { + return __dfsan::labels_in_signal_conditional; +} + class Decorator : public __sanitizer::SanitizerCommonDecorator { public: Decorator() : SanitizerCommonDecorator() {} @@ -898,6 +952,7 @@ extern "C" void dfsan_flush() { Die(); } } + __dfsan::labels_in_signal_conditional = 0; } // TODO: CheckMemoryLayoutSanity is based on msan. diff --git a/compiler-rt/lib/dfsan/done_abilist.txt b/compiler-rt/lib/dfsan/done_abilist.txt index fc2dd02ccf5f..e8fcd83d13bf 100644 --- a/compiler-rt/lib/dfsan/done_abilist.txt +++ b/compiler-rt/lib/dfsan/done_abilist.txt @@ -46,6 +46,10 @@ fun:dfsan_get_init_origin=uninstrumented fun:dfsan_get_init_origin=discard fun:dfsan_get_track_origins=uninstrumented fun:dfsan_get_track_origins=discard +fun:dfsan_set_conditional_callback=uninstrumented +fun:dfsan_set_conditional_callback=discard +fun:dfsan_get_labels_in_signal_conditional=uninstrumented +fun:dfsan_get_labels_in_signal_conditional=discard ############################################################################### # glibc diff --git a/compiler-rt/test/dfsan/Inputs/flags_abilist.txt b/compiler-rt/test/dfsan/Inputs/flags_abilist.txt index ce827dd7a642..6245a419792b 100644 --- a/compiler-rt/test/dfsan/Inputs/flags_abilist.txt +++ b/compiler-rt/test/dfsan/Inputs/flags_abilist.txt @@ -7,3 +7,9 @@ fun:main=discard fun:dfsan_set_label=uninstrumented fun:dfsan_set_label=discard + +fun:my_dfsan_conditional_callback=uninstrumented +fun:my_dfsan_conditional_callback=discard + +fun:dfsan_set_conditional_callback=uninstrumented +fun:dfsan_set_conditional_callback=discard diff --git a/compiler-rt/test/dfsan/conditional_callbacks.c b/compiler-rt/test/dfsan/conditional_callbacks.c new file mode 100644 index 000000000000..53d9f288e842 --- /dev/null +++ b/compiler-rt/test/dfsan/conditional_callbacks.c @@ -0,0 +1,110 @@ +// RUN: %clang_dfsan -fno-sanitize=dataflow -O2 -fPIE -DCALLBACKS -c %s -o %t-callbacks.o +// RUN: %clang_dfsan -fsanitize-ignorelist=%S/Inputs/flags_abilist.txt -O2 -mllvm -dfsan-conditional-callbacks %s %t-callbacks.o -o %t +// RUN: %run %t FooBarBaz 2>&1 | FileCheck %s +// +// RUN: %clang_dfsan -fno-sanitize=dataflow -O2 -fPIE -DCALLBACKS -DORIGINS -c %s -o %t-callbacks-orig.o +// RUN: %clang_dfsan -fsanitize-ignorelist=%S/Inputs/flags_abilist.txt -O2 -mllvm -dfsan-conditional-callbacks -mllvm -dfsan-track-origins=1 -DORIGINS %s %t-callbacks-orig.o -o %t-orig +// RUN: %run %t-orig FooBarBaz 2>&1 | FileCheck %s +// +// REQUIRES: x86_64-target-arch + +// Tests that callbacks are inserted for conditionals when +// -dfsan-conditional-callbacks is specified. + +#include +#include +#include +#include + +#ifdef CALLBACKS +// Compile this code without DFSan to avoid recursive instrumentation. + +extern dfsan_label LabelI; +extern dfsan_label LabelJ; +extern dfsan_label LabelIJ; + +void my_dfsan_conditional_callback(dfsan_label Label, dfsan_origin Origin) { + assert(Label != 0); +#ifdef ORIGINS + assert(Origin != 0); +#else + assert(Origin == 0); +#endif + + static int Count = 0; + switch (Count++) { + case 0: + assert(Label == LabelI); + break; + case 1: + assert(Label == LabelJ); + break; + case 2: + assert(Label == LabelIJ); + break; + default: + break; + } + + fprintf(stderr, "Label %u used as condition\n", Label); +} + +#else +// Compile this code with DFSan and -dfsan-conditional-callbacks to insert the +// callbacks. + +dfsan_label LabelI; +dfsan_label LabelJ; +dfsan_label LabelIJ; + +extern void my_dfsan_conditional_callback(dfsan_label Label, + dfsan_origin Origin); + +int main(int Argc, char *Argv[]) { + assert(Argc == 2); + + dfsan_set_conditional_callback(my_dfsan_conditional_callback); + + int result = 0; + // Make these not look like constants, otherwise the branch we're expecting + // may be optimized out. + int DataI = (Argv[0][0] != 0) ? 1 : 0; + int DataJ = (Argv[1][0] != 0) ? 2 : 0; + LabelI = 1; + dfsan_set_label(LabelI, &DataI, sizeof(DataI)); + LabelJ = 2; + dfsan_set_label(LabelJ, &DataJ, sizeof(DataJ)); + LabelIJ = dfsan_union(LabelI, LabelJ); + + assert(dfsan_get_label(DataI) == LabelI); + + // CHECK: Label 1 used as condition + if (DataI) { + result = 42; + } + + assert(dfsan_get_label(DataJ) == LabelJ); + + // CHECK: Label 2 used as condition + switch (DataJ) { + case 1: + result += 10000; + break; + case 2: + result += 4200; + break; + default: + break; + } + + int tainted_cond = ((DataI * DataJ) != 1); + assert(dfsan_get_label(tainted_cond) == LabelIJ); + + // CHECK: Label 3 used as condition + result = tainted_cond ? result + 420000 : 9; + + assert(result == 424242); + return 0; +} + +#endif // #ifdef CALLBACKS diff --git a/compiler-rt/test/dfsan/conditional_callbacks_sig.c b/compiler-rt/test/dfsan/conditional_callbacks_sig.c new file mode 100644 index 000000000000..174f2fe442a7 --- /dev/null +++ b/compiler-rt/test/dfsan/conditional_callbacks_sig.c @@ -0,0 +1,98 @@ +// RUN: %clang_dfsan -fno-sanitize=dataflow -O2 -fPIE -DCALLBACKS -c %s -o %t-callbacks.o +// RUN: %clang_dfsan -fsanitize-ignorelist=%S/Inputs/flags_abilist.txt -O2 -mllvm -dfsan-conditional-callbacks %s %t-callbacks.o -o %t +// RUN: %run %t FooBarBaz 2>&1 | FileCheck %s +// +// REQUIRES: x86_64-target-arch + +#include +#include +#include +#include +#include +#include +#include + +#ifdef CALLBACKS +// Compile this code without DFSan to avoid recursive instrumentation. + +void my_dfsan_conditional_callback(dfsan_label Label, dfsan_origin Origin) { + assert(Label != 0); + assert(Origin == 0); + + static int Count = 0; + switch (Count++) { + case 0: + assert(Label == 1); + break; + case 1: + assert(Label == 4); + break; + default: + break; + } + + fprintf(stderr, "Label %u used as condition\n", Label); +} + +#else +// Compile this code with DFSan and -dfsan-conditional-callbacks to insert the +// callbacks. + +extern void my_dfsan_conditional_callback(dfsan_label Label, + dfsan_origin Origin); + +volatile int x = 0; +volatile int y = 1; +volatile int z = 0; + +void SignalHandler(int signo) { + assert(dfsan_get_label(x) == 0); + assert(dfsan_get_label(y) != 0); + assert(dfsan_get_label(z) != 0); + // Running the conditional callback from a signal handler is risky, + // because the code must be written with signal handler context in mind. + // Instead dfsan_get_labels_in_signal_conditional() will indicate labels + // used in conditions inside signal handlers. + // CHECK-NOT: Label 8 used as condition + if (z != 0) { + x = y; + } +} + +int main(int Argc, char *Argv[]) { + assert(Argc >= 1); + int unknown = (Argv[0][0] != 0) ? 1 : 0; + dfsan_set_label(1, &unknown, sizeof(unknown)); + + dfsan_set_conditional_callback(my_dfsan_conditional_callback); + + // CHECK: Label 1 used as condition + if (unknown) { + z = 42; + } + + assert(dfsan_get_labels_in_signal_conditional() == 0); + dfsan_set_label(4, (void *)&y, sizeof(y)); + dfsan_set_label(8, (void *)&z, sizeof(z)); + + struct sigaction sa = {}; + sa.sa_handler = SignalHandler; + int r = sigaction(SIGHUP, &sa, NULL); + assert(dfsan_get_label(r) == 0); + + kill(getpid(), SIGHUP); + signal(SIGHUP, SIG_DFL); + + assert(dfsan_get_labels_in_signal_conditional() == 8); + assert(x == 1); + // CHECK: Label 4 used as condition + if (x != 0) { + z = 123; + } + // Flush should clear the conditional signals seen. + dfsan_flush(); + assert(dfsan_get_labels_in_signal_conditional() == 0); + return 0; +} + +#endif // #ifdef CALLBACKS diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 1e5688828d30..7c7d3ee13de7 100644 --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -208,6 +208,14 @@ static cl::opt ClEventCallbacks( cl::desc("Insert calls to __dfsan_*_callback functions on data events."), cl::Hidden, cl::init(false)); +// Experimental feature that inserts callbacks for conditionals, including: +// conditional branch, switch, select. +// This must be true for dfsan_set_conditional_callback() to have effect. +static cl::opt ClConditionalCallbacks( + "dfsan-conditional-callbacks", + cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden, + cl::init(false)); + // Controls whether the pass tracks the control flow of select instructions. static cl::opt ClTrackSelectControlFlow( "dfsan-track-select-control-flow", @@ -428,6 +436,8 @@ class DataFlowSanitizer { FunctionType *DFSanSetLabelFnTy; FunctionType *DFSanNonzeroLabelFnTy; FunctionType *DFSanVarargWrapperFnTy; + FunctionType *DFSanConditionalCallbackFnTy; + FunctionType *DFSanConditionalCallbackOriginFnTy; FunctionType *DFSanCmpCallbackFnTy; FunctionType *DFSanLoadStoreCallbackFnTy; FunctionType *DFSanMemTransferCallbackFnTy; @@ -444,6 +454,8 @@ class DataFlowSanitizer { FunctionCallee DFSanLoadCallbackFn; FunctionCallee DFSanStoreCallbackFn; FunctionCallee DFSanMemTransferCallbackFn; + FunctionCallee DFSanConditionalCallbackFn; + FunctionCallee DFSanConditionalCallbackOriginFn; FunctionCallee DFSanCmpCallbackFn; FunctionCallee DFSanChainOriginFn; FunctionCallee DFSanChainOriginIfTaintedFn; @@ -642,6 +654,10 @@ struct DFSanFunction { Align getShadowAlign(Align InstAlignment); + // If ClConditionalCallbacks is enabled, insert a callback after a given + // branch instruction using the given conditional expression. + void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition); + private: /// Collapses the shadow with aggregate type into a single primitive shadow /// value. @@ -748,6 +764,8 @@ class DFSanVisitor : public InstVisitor { void visitSelectInst(SelectInst &I); void visitMemSetInst(MemSetInst &I); void visitMemTransferInst(MemTransferInst &I); + void visitBranchInst(BranchInst &BR); + void visitSwitchInst(SwitchInst &SW); private: void visitCASOrRMW(Align InstAlignment, Instruction &I); @@ -971,6 +989,22 @@ Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow, return PrimitiveShadow; } +void DFSanFunction::addConditionalCallbacksIfEnabled(Instruction &I, + Value *Condition) { + if (!ClConditionalCallbacks) { + return; + } + IRBuilder<> IRB(&I); + Value *CondShadow = getShadow(Condition); + if (DFS.shouldTrackOrigins()) { + Value *CondOrigin = getOrigin(Condition); + IRB.CreateCall(DFS.DFSanConditionalCallbackOriginFn, + {CondShadow, CondOrigin}); + } else { + IRB.CreateCall(DFS.DFSanConditionalCallbackFn, {CondShadow}); + } +} + Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) { if (!OrigTy->isSized()) return PrimitiveShadowTy; @@ -1032,6 +1066,13 @@ bool DataFlowSanitizer::initializeModule(Module &M) { FunctionType::get(Type::getVoidTy(*Ctx), None, /*isVarArg=*/false); DFSanVarargWrapperFnTy = FunctionType::get( Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false); + DFSanConditionalCallbackFnTy = + FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy, + /*isVarArg=*/false); + Type *DFSanConditionalCallbackOriginArgs[2] = {PrimitiveShadowTy, OriginTy}; + DFSanConditionalCallbackOriginFnTy = FunctionType::get( + Type::getVoidTy(*Ctx), DFSanConditionalCallbackOriginArgs, + /*isVarArg=*/false); DFSanCmpCallbackFnTy = FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy, /*isVarArg=*/false); @@ -1270,6 +1311,10 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) { DFSanStoreCallbackFn.getCallee()->stripPointerCasts()); DFSanRuntimeFunctions.insert( DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts()); + DFSanRuntimeFunctions.insert( + DFSanConditionalCallbackFn.getCallee()->stripPointerCasts()); + DFSanRuntimeFunctions.insert( + DFSanConditionalCallbackOriginFn.getCallee()->stripPointerCasts()); DFSanRuntimeFunctions.insert( DFSanCmpCallbackFn.getCallee()->stripPointerCasts()); DFSanRuntimeFunctions.insert( @@ -1292,6 +1337,12 @@ void DataFlowSanitizer::initializeCallbackFunctions(Module &M) { "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy); DFSanCmpCallbackFn = Mod->getOrInsertFunction("__dfsan_cmp_callback", DFSanCmpCallbackFnTy); + + DFSanConditionalCallbackFn = Mod->getOrInsertFunction( + "__dfsan_conditional_callback", DFSanConditionalCallbackFnTy); + DFSanConditionalCallbackOriginFn = + Mod->getOrInsertFunction("__dfsan_conditional_callback_origin", + DFSanConditionalCallbackOriginFnTy); } void DataFlowSanitizer::injectMetadataGlobals(Module &M) { @@ -2593,6 +2644,8 @@ void DFSanVisitor::visitSelectInst(SelectInst &I) { Value *FalseOrigin = ShouldTrackOrigins ? DFSF.getOrigin(I.getFalseValue()) : nullptr; + DFSF.addConditionalCallbacksIfEnabled(I, I.getCondition()); + if (isa(I.getCondition()->getType())) { ShadowSel = DFSF.combineShadowsThenConvert(I.getType(), TrueShadow, FalseShadow, &I); @@ -2683,6 +2736,17 @@ void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) { } } +void DFSanVisitor::visitBranchInst(BranchInst &BR) { + if (!BR.isConditional()) + return; + + DFSF.addConditionalCallbacksIfEnabled(BR, BR.getCondition()); +} + +void DFSanVisitor::visitSwitchInst(SwitchInst &SW) { + DFSF.addConditionalCallbacksIfEnabled(SW, SW.getCondition()); +} + static bool isAMustTailRetVal(Value *RetVal) { // Tail call may have a bitcast between return. if (auto *I = dyn_cast(RetVal)) { From d7b6574c3bf671d70acd751a8c85d3a062dcc7c6 Mon Sep 17 00:00:00 2001 From: Richard Date: Mon, 3 Jan 2022 10:16:38 -0700 Subject: [PATCH 742/992] [clang-tidy] Recognize transformer checks as providing fixits - Recognize older checks that might not end with Check.cpp - Update list of checks based on improvements to add_new_check - Fix spelling error in TransformerClangTidyCheck.h Fixes #52962 Differential Revision: https://reviews.llvm.org/D116550 --- clang-tools-extra/clang-tidy/add_new_check.py | 24 ++++++++++++------- .../utils/TransformerClangTidyCheck.h | 2 +- .../docs/clang-tidy/checks/list.rst | 16 ++++++------- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/clang-tools-extra/clang-tidy/add_new_check.py b/clang-tools-extra/clang-tidy/add_new_check.py index a3554b095975..1e26b07121c6 100755 --- a/clang-tools-extra/clang-tidy/add_new_check.py +++ b/clang-tools-extra/clang-tidy/add_new_check.py @@ -324,16 +324,20 @@ def has_auto_fix(check_name): dirname, _, check_name = check_name.partition("-") checker_code = get_actual_filename(os.path.join(clang_tidy_path, dirname), - get_camel_name(check_name) + '.cpp') - + get_camel_check_name(check_name) + '.cpp') if not os.path.isfile(checker_code): - return "" + # Some older checks don't end with 'Check.cpp' + checker_code = get_actual_filename(os.path.join(clang_tidy_path, dirname), + get_camel_name(check_name) + '.cpp') + if not os.path.isfile(checker_code): + return '' with io.open(checker_code, encoding='utf8') as f: code = f.read() - if 'FixItHint' in code or "ReplacementText" in code or "fixit" in code: - # Some simple heuristics to figure out if a checker has an autofix or not. - return ' "Yes"' + for needle in ['FixItHint', 'ReplacementText', 'fixit', 'TransformerClangTidyCheck']: + if needle in code: + # Some simple heuristics to figure out if a checker has an autofix or not. + return ' "Yes"' return "" def process_doc(doc_file): @@ -416,7 +420,11 @@ def write_docs(module_path, module, check_name): def get_camel_name(check_name): return ''.join(map(lambda elem: elem.capitalize(), - check_name.split('-'))) + 'Check' + check_name.split('-'))) + + +def get_camel_check_name(check_name): + return get_camel_name(check_name) + 'Check' def main(): @@ -458,7 +466,7 @@ def main(): module = args.module check_name = args.check - check_name_camel = get_camel_name(check_name) + check_name_camel = get_camel_check_name(check_name) if check_name.startswith(module): print('Check name "%s" must not start with the module "%s". Exiting.' % ( check_name, module)) diff --git a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h index 9736e64e7c31..d26737935b1a 100644 --- a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h +++ b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h @@ -54,7 +54,7 @@ class TransformerClangTidyCheck : public ClangTidyCheck { StringRef Name, ClangTidyContext *Context); /// Convenience overload of the constructor when the rule doesn't have any - /// dependies. + /// dependencies. TransformerClangTidyCheck(transformer::RewriteRule R, StringRef Name, ClangTidyContext *Context); diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 1e6936f9cbdf..8d0a568cff88 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -116,13 +116,12 @@ Clang-Tidy Checks `cert-dcl50-cpp `_, `cert-dcl58-cpp `_, `cert-env33-c `_, + `cert-err33-c `_, `cert-err34-c `_, `cert-err52-cpp `_, `cert-err58-cpp `_, `cert-err60-cpp `_, - `cert-exp42-c `_, `cert-flp30-c `_, - `cert-flp37-c `_, `cert-mem57-cpp `_, `cert-msc50-cpp `_, `cert-msc51-cpp `_, @@ -213,7 +212,7 @@ Clang-Tidy Checks `llvmlibc-implementation-in-namespace `_, `llvmlibc-restrict-system-libc-headers `_, "Yes" `misc-definitions-in-headers `_, "Yes" - `misc-misleading-identifier `_, + `misc-misleading-identifier `_, `misc-misplaced-const `_, `misc-new-delete-overloads `_, `misc-no-recursion `_, @@ -260,8 +259,8 @@ Clang-Tidy Checks `modernize-use-using `_, "Yes" `mpi-buffer-deref `_, "Yes" `mpi-type-mismatch `_, "Yes" - `objc-avoid-nserror-init `_, `objc-assert-equals `_, "Yes" + `objc-avoid-nserror-init `_, `objc-dealloc-in-category `_, `objc-forbidden-subclassing `_, `objc-missing-hash `_, @@ -283,16 +282,16 @@ Clang-Tidy Checks `performance-noexcept-move-constructor `_, "Yes" `performance-trivially-destructible `_, "Yes" `performance-type-promotion-in-math-fn `_, "Yes" - `performance-unnecessary-copy-initialization `_, + `performance-unnecessary-copy-initialization `_, "Yes" `performance-unnecessary-value-param `_, "Yes" `portability-restrict-system-includes `_, "Yes" `portability-simd-intrinsics `_, - `readability-avoid-const-params-in-decls `_, + `readability-avoid-const-params-in-decls `_, "Yes" `readability-braces-around-statements `_, "Yes" `readability-const-return-type `_, "Yes" `readability-container-data-pointer `_, "Yes" `readability-container-size-empty `_, "Yes" - `readability-convert-member-functions-to-static `_, + `readability-convert-member-functions-to-static `_, "Yes" `readability-delete-null-pointer `_, "Yes" `readability-else-after-return `_, "Yes" `readability-function-cognitive-complexity `_, @@ -338,13 +337,14 @@ Clang-Tidy Checks `cert-dcl03-c `_, `misc-static-assert `_, "Yes" `cert-dcl16-c `_, `readability-uppercase-literal-suffix `_, "Yes" `cert-dcl37-c `_, `bugprone-reserved-identifier `_, "Yes" - `cert-err33-c `_, `bugprone-unused-return-value `_, `cert-dcl51-cpp `_, `bugprone-reserved-identifier `_, "Yes" `cert-dcl54-cpp `_, `misc-new-delete-overloads `_, `cert-dcl59-cpp `_, `google-build-namespaces `_, `cert-err09-cpp `_, `misc-throw-by-value-catch-by-reference `_, `cert-err61-cpp `_, `misc-throw-by-value-catch-by-reference `_, + `cert-exp42-c `_, `bugprone-suspicious-memory-comparison `_, `cert-fio38-c `_, `misc-non-copyable-objects `_, + `cert-flp37-c `_, `bugprone-suspicious-memory-comparison `_, `cert-msc30-c `_, `cert-msc50-cpp `_, `cert-msc32-c `_, `cert-msc51-cpp `_, `cert-oop11-cpp `_, `performance-move-constructor-init `_, From 524abc68f231101996e8142aadb3f382fe40d20b Mon Sep 17 00:00:00 2001 From: Daniil Suchkov Date: Tue, 4 Jan 2022 02:38:56 +0000 Subject: [PATCH 743/992] Introduce NewPM .dot printers for DomTree This patch adds a couple of NewPM function passes (dot-dom and dot-dom-only) that dump DomTree into .dot files. Reviewed-By: aeubanks Differential Revision: https://reviews.llvm.org/D116629 --- .../llvm/Analysis/DOTGraphTraitsPass.h | 19 +++++ llvm/include/llvm/Analysis/DomPrinter.h | 14 ++++ llvm/lib/Analysis/DomPrinter.cpp | 13 ++++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 2 + .../test/Analysis/Dominators/print-dot-dom.ll | 71 +++++++++++++++++++ 6 files changed, 120 insertions(+) create mode 100644 llvm/test/Analysis/Dominators/print-dot-dom.ll diff --git a/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h b/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h index 59737744f576..d8021907b5b2 100644 --- a/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h +++ b/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h @@ -181,6 +181,25 @@ class DOTGraphTraitsModulePrinter : public ModulePass { std::string Name; }; +template +void WriteDOTGraphToFile(Function &F, GraphT &&Graph, + std::string FileNamePrefix, bool IsSimple) { + std::string Filename = FileNamePrefix + "." + F.getName().str() + ".dot"; + std::error_code EC; + + errs() << "Writing '" << Filename << "'..."; + + raw_fd_ostream File(Filename, EC, sys::fs::OF_TextWithCRLF); + std::string GraphName = DOTGraphTraits::getGraphName(Graph); + std::string Title = GraphName + " for '" + F.getName().str() + "' function"; + + if (!EC) + WriteGraph(File, Graph, IsSimple, Title); + else + errs() << " error opening file for writing!"; + errs() << "\n"; +} + } // end namespace llvm #endif diff --git a/llvm/include/llvm/Analysis/DomPrinter.h b/llvm/include/llvm/Analysis/DomPrinter.h index a177f877b295..e6df12d88072 100644 --- a/llvm/include/llvm/Analysis/DomPrinter.h +++ b/llvm/include/llvm/Analysis/DomPrinter.h @@ -14,6 +14,20 @@ #ifndef LLVM_ANALYSIS_DOMPRINTER_H #define LLVM_ANALYSIS_DOMPRINTER_H +#include "llvm/IR/PassManager.h" + +namespace llvm { +class DomTreePrinterPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +class DomTreeOnlyPrinterPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // namespace llvm + namespace llvm { class FunctionPass; FunctionPass *createDomPrinterPass(); diff --git a/llvm/lib/Analysis/DomPrinter.cpp b/llvm/lib/Analysis/DomPrinter.cpp index ebbe0d3e2c5f..6088de53028d 100644 --- a/llvm/lib/Analysis/DomPrinter.cpp +++ b/llvm/lib/Analysis/DomPrinter.cpp @@ -80,6 +80,19 @@ struct DOTGraphTraits }; } +PreservedAnalyses DomTreePrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + WriteDOTGraphToFile(F, &AM.getResult(F), "dom", false); + return PreservedAnalyses::all(); +} + +PreservedAnalyses DomTreeOnlyPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + WriteDOTGraphToFile(F, &AM.getResult(F), "domonly", + true); + return PreservedAnalyses::all(); +} + void DominatorTree::viewGraph(const Twine &Name, const Twine &Title) { #ifndef NDEBUG ViewGraph(this, Name, false, Title); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index d7615ef4e9bf..6fdddff86403 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -35,6 +35,7 @@ #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/DomPrinter.h" #include "llvm/Analysis/DominanceFrontier.h" #include "llvm/Analysis/FunctionPropertiesAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 74613a7fcce0..2fc4fa25897f 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -254,6 +254,8 @@ FUNCTION_PASS("div-rem-pairs", DivRemPairsPass()) FUNCTION_PASS("dse", DSEPass()) FUNCTION_PASS("dot-cfg", CFGPrinterPass()) FUNCTION_PASS("dot-cfg-only", CFGOnlyPrinterPass()) +FUNCTION_PASS("dot-dom", DomTreePrinterPass()) +FUNCTION_PASS("dot-dom-only", DomTreeOnlyPrinterPass()) FUNCTION_PASS("fix-irreducible", FixIrreduciblePass()) FUNCTION_PASS("flattencfg", FlattenCFGPass()) FUNCTION_PASS("make-guards-explicit", MakeGuardsExplicitPass()) diff --git a/llvm/test/Analysis/Dominators/print-dot-dom.ll b/llvm/test/Analysis/Dominators/print-dot-dom.ll new file mode 100644 index 000000000000..faad5a6a51db --- /dev/null +++ b/llvm/test/Analysis/Dominators/print-dot-dom.ll @@ -0,0 +1,71 @@ +; RUN: opt %s -passes=dot-dom -disable-output +; RUN: FileCheck %s -input-file=dom.test1.dot -check-prefix=TEST1 +; RUN: FileCheck %s -input-file=dom.test2.dot -check-prefix=TEST2 + +define void @test1() { +; TEST1: digraph "Dominator tree for 'test1' function" +; TEST1-NEXT: label="Dominator tree for 'test1' function" +; TEST1: Node0x[[EntryID:.*]] [shape=record,label="{entry: +; TEST1-NEXT: Node0x[[EntryID]] -> Node0x[[A_ID:.*]]; +; TEST1-NEXT: Node0x[[EntryID]] -> Node0x[[C_ID:.*]]; +; TEST1-NEXT: Node0x[[EntryID]] -> Node0x[[B_ID:.*]]; +; TEST1-NEXT: Node0x[[A_ID]] [shape=record,label="{a: +; TEST1-NEXT: Node0x[[C_ID]] [shape=record,label="{c: +; TEST1-NEXT: Node0x[[C_ID]] -> Node0x[[D_ID:.*]]; +; TEST1-NEXT: Node0x[[C_ID]] -> Node0x[[E_ID:.*]]; +; TEST1-NEXT: Node0x[[D_ID]] [shape=record,label="{d: +; TEST1-NEXT: Node0x[[E_ID]] [shape=record,label="{e: +; TEST1-NEXT: Node0x[[B_ID]] [shape=record,label="{b: + +entry: + br i1 undef, label %a, label %b + +a: + br label %c + +b: + br label %c + +c: + br i1 undef, label %d, label %e + +d: + ret void + +e: + ret void +} + +define void @test2() { +; TEST2: digraph "Dominator tree for 'test2' function" +; TEST2-NEXT: label="Dominator tree for 'test2' function" +; TEST2: Node0x[[EntryID:.*]] [shape=record,label="{entry: +; TEST2-NEXT: Node0x[[EntryID]] -> Node0x[[A_ID:.*]]; +; TEST2-NEXT: Node0x[[A_ID]] [shape=record,label="{a: +; TEST2-NEXT: Node0x[[A_ID]] -> Node0x[[B_ID:.*]]; +; TEST2-NEXT: Node0x[[B_ID]] [shape=record,label="{b: +; TEST2-NEXT: Node0x[[B_ID]] -> Node0x[[C_ID:.*]]; +; TEST2-NEXT: Node0x[[C_ID]] [shape=record,label="{c: +; TEST2-NEXT: Node0x[[C_ID]] -> Node0x[[D_ID:.*]]; +; TEST2-NEXT: Node0x[[C_ID]] -> Node0x[[E_ID:.*]]; +; TEST2-NEXT: Node0x[[D_ID]] [shape=record,label="{d: +; TEST2-NEXT: Node0x[[E_ID]] [shape=record,label="{e: + +entry: + br label %a + +a: + br label %b + +b: + br i1 undef, label %a, label %c + +c: + br i1 undef, label %d, label %e + +d: + br i1 undef, label %a, label %e + +e: + ret void +} From c03fd1e61fee892aa0ead68d0f9271dd680b655f Mon Sep 17 00:00:00 2001 From: wren romano <2998727+wrengr@users.noreply.github.com> Date: Wed, 5 Jan 2022 13:13:04 -0800 Subject: [PATCH 744/992] [mlir][sparse] Marking cursor parameters const These parameters aren't modified, so we make that invariant explicit. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D116693 --- mlir/lib/ExecutionEngine/SparseTensorUtils.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index 3681ca17674b..3bd405738eeb 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -187,12 +187,12 @@ class SparseTensorStorageBase { virtual void getValues(std::vector **) { fatal("vali8"); } /// Element-wise insertion in lexicographic index order. - virtual void lexInsert(uint64_t *, double) { fatal("insf64"); } - virtual void lexInsert(uint64_t *, float) { fatal("insf32"); } - virtual void lexInsert(uint64_t *, int64_t) { fatal("insi64"); } - virtual void lexInsert(uint64_t *, int32_t) { fatal("insi32"); } - virtual void lexInsert(uint64_t *, int16_t) { fatal("ins16"); } - virtual void lexInsert(uint64_t *, int8_t) { fatal("insi8"); } + virtual void lexInsert(const uint64_t *, double) { fatal("insf64"); } + virtual void lexInsert(const uint64_t *, float) { fatal("insf32"); } + virtual void lexInsert(const uint64_t *, int64_t) { fatal("insi64"); } + virtual void lexInsert(const uint64_t *, int32_t) { fatal("insi32"); } + virtual void lexInsert(const uint64_t *, int16_t) { fatal("ins16"); } + virtual void lexInsert(const uint64_t *, int8_t) { fatal("insi8"); } /// Expanded insertion. virtual void expInsert(uint64_t *, double *, bool *, uint64_t *, uint64_t) { @@ -300,7 +300,7 @@ class SparseTensorStorage : public SparseTensorStorageBase { void getValues(std::vector **out) override { *out = &values; } /// Partially specialize lexicographical insertions based on template types. - void lexInsert(uint64_t *cursor, V val) override { + void lexInsert(const uint64_t *cursor, V val) override { // First, wrap up pending insertion path. uint64_t diff = 0; uint64_t top = 0; @@ -499,7 +499,7 @@ class SparseTensorStorage : public SparseTensorStorageBase { } /// Continues a single insertion path, outer to inner. - void insPath(uint64_t *cursor, uint64_t diff, uint64_t top, V val) { + void insPath(const uint64_t *cursor, uint64_t diff, uint64_t top, V val) { uint64_t rank = getRank(); assert(diff < rank); for (uint64_t d = diff; d < rank; d++) { @@ -517,7 +517,7 @@ class SparseTensorStorage : public SparseTensorStorageBase { } /// Finds the lexicographic differing dimension. - uint64_t lexDiff(uint64_t *cursor) { + uint64_t lexDiff(const uint64_t *cursor) { for (uint64_t r = 0, rank = getRank(); r < rank; r++) if (cursor[r] > idx[r]) return r; From ceda1ae9a7b061a98b1309949c73c9958e2a94d7 Mon Sep 17 00:00:00 2001 From: wren romano <2998727+wrengr@users.noreply.github.com> Date: Wed, 5 Jan 2022 13:46:15 -0800 Subject: [PATCH 745/992] [mlir][sparse] Strengthening first arguments of fromCOO/toCOO Better capturing of invariants Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D116700 --- mlir/lib/ExecutionEngine/SparseTensorUtils.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index 3bd405738eeb..281016f785af 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -269,9 +269,10 @@ class SparseTensorStorage : public SparseTensorStorageBase { pointers[r].push_back(0); // Then assign contents from coordinate scheme tensor if provided. if (tensor) { - uint64_t nnz = tensor->getElements().size(); + const std::vector> &elements = tensor->getElements(); + uint64_t nnz = elements.size(); values.reserve(nnz); - fromCOO(tensor, 0, nnz, 0); + fromCOO(elements, 0, nnz, 0); } else if (allDense) { values.resize(sz, 0); } @@ -367,7 +368,7 @@ class SparseTensorStorage : public SparseTensorStorageBase { std::vector reord(rank); for (uint64_t r = 0; r < rank; r++) reord[r] = perm[rev[r]]; - toCOO(tensor, reord, 0, 0); + toCOO(*tensor, reord, 0, 0); assert(tensor->getElements().size() == values.size()); return tensor; } @@ -402,9 +403,8 @@ class SparseTensorStorage : public SparseTensorStorageBase { /// Initializes sparse tensor storage scheme from a memory-resident sparse /// tensor in coordinate scheme. This method prepares the pointers and /// indices arrays under the given per-dimension dense/sparse annotations. - void fromCOO(SparseTensorCOO *tensor, uint64_t lo, uint64_t hi, - uint64_t d) { - const std::vector> &elements = tensor->getElements(); + void fromCOO(const std::vector> &elements, uint64_t lo, + uint64_t hi, uint64_t d) { // Once dimensions are exhausted, insert the numerical values. assert(d <= getRank()); if (d == getRank()) { @@ -432,7 +432,7 @@ class SparseTensorStorage : public SparseTensorStorageBase { endDim(d + 1); full++; } - fromCOO(tensor, lo, seg, d + 1); + fromCOO(elements, lo, seg, d + 1); // And move on to next segment in interval. lo = seg; } @@ -449,12 +449,12 @@ class SparseTensorStorage : public SparseTensorStorageBase { /// Stores the sparse tensor storage scheme into a memory-resident sparse /// tensor in coordinate scheme. - void toCOO(SparseTensorCOO *tensor, std::vector &reord, + void toCOO(SparseTensorCOO &tensor, std::vector &reord, uint64_t pos, uint64_t d) { assert(d <= getRank()); if (d == getRank()) { assert(pos < values.size()); - tensor->add(idx, values[pos]); + tensor.add(idx, values[pos]); } else if (isCompressedDim(d)) { // Sparse dimension. for (uint64_t ii = pointers[d][pos]; ii < pointers[d][pos + 1]; ii++) { From 809c6a5a1d2f4366ab0e602c9d963b73f380b74e Mon Sep 17 00:00:00 2001 From: Egor Zhdan Date: Tue, 4 Jan 2022 19:58:10 +0000 Subject: [PATCH 746/992] [Clang] Extract availability mapping from VersionMap for watchOS/tvOS This change makes it possible to extract iOS-to-another-platform version mappings from `VersionMap` in the `SDKSettings.json` file in Darwin SDKs, for example, `iOS_watchOS` and `iOS_tvOS`. This code was originally authored by Alex Lorenz. rdar://81491680 Differential Revision: https://reviews.llvm.org/D116615 --- clang/lib/Basic/DarwinSDKInfo.cpp | 19 ++++ clang/unittests/Basic/DarwinSDKInfoTest.cpp | 108 +++++++++++++++++++- 2 files changed, 126 insertions(+), 1 deletion(-) diff --git a/clang/lib/Basic/DarwinSDKInfo.cpp b/clang/lib/Basic/DarwinSDKInfo.cpp index fe35f77782c9..64bcb45a4cd8 100644 --- a/clang/lib/Basic/DarwinSDKInfo.cpp +++ b/clang/lib/Basic/DarwinSDKInfo.cpp @@ -84,6 +84,25 @@ DarwinSDKInfo::parseDarwinSDKSettingsJSON(const llvm::json::Object *Obj) { llvm::DenseMap> VersionMappings; if (const auto *VM = Obj->getObject("VersionMap")) { + // FIXME: Generalize this out beyond iOS-deriving targets. + // Look for ios_ version mapping for targets that derive from ios. + for (const auto &KV : *VM) { + auto Pair = StringRef(KV.getFirst()).split("_"); + if (Pair.first.compare_insensitive("ios") == 0) { + llvm::Triple TT(llvm::Twine("--") + Pair.second.lower()); + if (TT.getOS() != llvm::Triple::UnknownOS) { + auto Mapping = RelatedTargetVersionMapping::parseJSON( + *KV.getSecond().getAsObject(), *MaximumDeploymentVersion); + if (Mapping) + VersionMappings[OSEnvPair(llvm::Triple::IOS, + llvm::Triple::UnknownEnvironment, + TT.getOS(), + llvm::Triple::UnknownEnvironment) + .Value] = std::move(Mapping); + } + } + } + if (const auto *Mapping = VM->getObject("macOS_iOSMac")) { auto VersionMap = RelatedTargetVersionMapping::parseJSON( *Mapping, *MaximumDeploymentVersion); diff --git a/clang/unittests/Basic/DarwinSDKInfoTest.cpp b/clang/unittests/Basic/DarwinSDKInfoTest.cpp index f845e1536da8..aa1feeb293c0 100644 --- a/clang/unittests/Basic/DarwinSDKInfoTest.cpp +++ b/clang/unittests/Basic/DarwinSDKInfoTest.cpp @@ -13,7 +13,68 @@ using namespace llvm; using namespace clang; -TEST(DarwinSDKInfoTest, ParseAndTestMapping) { +// Check the version mapping logic in DarwinSDKInfo. +TEST(DarwinSDKInfo, VersionMapping) { + llvm::json::Object Obj({{"3.0", "1.0"}, {"3.1", "1.2"}}); + Optional Mapping = + DarwinSDKInfo::RelatedTargetVersionMapping::parseJSON(Obj, + VersionTuple()); + EXPECT_TRUE(Mapping.hasValue()); + EXPECT_EQ(Mapping->getMinimumValue(), VersionTuple(1)); + + // Exact mapping. + EXPECT_EQ(Mapping->map(VersionTuple(3), VersionTuple(0, 1), None), + VersionTuple(1)); + EXPECT_EQ(Mapping->map(VersionTuple(3, 0), VersionTuple(0, 1), None), + VersionTuple(1)); + EXPECT_EQ(Mapping->map(VersionTuple(3, 0, 0), VersionTuple(0, 1), None), + VersionTuple(1)); + EXPECT_EQ(Mapping->map(VersionTuple(3, 1), VersionTuple(0, 1), None), + VersionTuple(1, 2)); + EXPECT_EQ(Mapping->map(VersionTuple(3, 1, 0), VersionTuple(0, 1), None), + VersionTuple(1, 2)); + + // Missing mapping - fallback to major. + EXPECT_EQ(Mapping->map(VersionTuple(3, 0, 1), VersionTuple(0, 1), None), + VersionTuple(1)); + + // Minimum + EXPECT_EQ(Mapping->map(VersionTuple(2), VersionTuple(0, 1), None), + VersionTuple(0, 1)); + + // Maximum + EXPECT_EQ( + Mapping->map(VersionTuple(4), VersionTuple(0, 1), VersionTuple(100)), + VersionTuple(100)); +} + +// Check the version mapping logic in DarwinSDKInfo. +TEST(DarwinSDKInfo, VersionMappingMissingKey) { + llvm::json::Object Obj({{"3.0", "1.0"}, {"5.0", "1.2"}}); + Optional Mapping = + DarwinSDKInfo::RelatedTargetVersionMapping::parseJSON(Obj, + VersionTuple()); + EXPECT_TRUE(Mapping.hasValue()); + EXPECT_EQ( + Mapping->map(VersionTuple(4), VersionTuple(0, 1), VersionTuple(100)), + None); +} + +TEST(DarwinSDKInfo, VersionMappingParseEmpty) { + llvm::json::Object Obj({}); + EXPECT_FALSE( + DarwinSDKInfo::RelatedTargetVersionMapping::parseJSON(Obj, VersionTuple()) + .hasValue()); +} + +TEST(DarwinSDKInfo, VersionMappingParseError) { + llvm::json::Object Obj({{"test", "1.2"}}); + EXPECT_FALSE( + DarwinSDKInfo::RelatedTargetVersionMapping::parseJSON(Obj, VersionTuple()) + .hasValue()); +} + +TEST(DarwinSDKInfoTest, ParseAndTestMappingMacCatalyst) { llvm::json::Object Obj; Obj["Version"] = "11.0"; Obj["MaximumDeploymentTarget"] = "11.99"; @@ -58,6 +119,51 @@ TEST(DarwinSDKInfoTest, ParseAndTestMapping) { VersionTuple(99, 99)); } +TEST(DarwinSDKInfoTest, ParseAndTestMappingIOSDerived) { + llvm::json::Object Obj; + Obj["Version"] = "15.0"; + Obj["MaximumDeploymentTarget"] = "15.0.99"; + llvm::json::Object VersionMap; + VersionMap["10.0"] = "10.0"; + VersionMap["10.3.1"] = "10.2"; + VersionMap["11.0"] = "11.0"; + llvm::json::Object IOSToTvOS; + IOSToTvOS["iOS_tvOS"] = std::move(VersionMap); + Obj["VersionMap"] = std::move(IOSToTvOS); + + auto SDKInfo = DarwinSDKInfo::parseDarwinSDKSettingsJSON(&Obj); + ASSERT_TRUE(SDKInfo); + EXPECT_EQ(SDKInfo->getVersion(), VersionTuple(15, 0)); + + // Verify that mapping is present for platforms that derive from iOS. + const auto *Mapping = SDKInfo->getVersionMapping(DarwinSDKInfo::OSEnvPair( + llvm::Triple::IOS, llvm::Triple::UnknownEnvironment, llvm::Triple::TvOS, + llvm::Triple::UnknownEnvironment)); + ASSERT_TRUE(Mapping); + + // Verify that the iOS versions that are present in the map are translated + // directly to their corresponding tvOS versions. + EXPECT_EQ(*Mapping->map(VersionTuple(10, 0), VersionTuple(), None), + VersionTuple(10, 0)); + EXPECT_EQ(*Mapping->map(VersionTuple(10, 3, 1), VersionTuple(), None), + VersionTuple(10, 2)); + EXPECT_EQ(*Mapping->map(VersionTuple(11, 0), VersionTuple(), None), + VersionTuple(11, 0)); + + // Verify that an iOS version that's not present in the map is translated + // like the nearest major OS version. + EXPECT_EQ(*Mapping->map(VersionTuple(10, 1), VersionTuple(), None), + VersionTuple(10, 0)); + + // Verify that the iOS versions that are outside of the mapped version + // range map to the min/max values passed to the `map` call. + EXPECT_EQ(*Mapping->map(VersionTuple(9, 0), VersionTuple(99, 99), None), + VersionTuple(99, 99)); + EXPECT_EQ( + *Mapping->map(VersionTuple(13, 0), VersionTuple(), VersionTuple(99, 99)), + VersionTuple(99, 99)); +} + TEST(DarwinSDKInfoTest, MissingKeys) { llvm::json::Object Obj; ASSERT_FALSE(DarwinSDKInfo::parseDarwinSDKSettingsJSON(&Obj)); From 9584c6fa2fe216522b86ee5422147b511c73cb4a Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 5 Jan 2022 20:19:55 -0500 Subject: [PATCH 747/992] [OpenMP][Offloading] Fixed data race in libomptarget caused by async data movement The async data movement can cause data race if the target supports it. Details can be found in [1]. This patch tries to fix this problem by attaching an event to the entry of data mapping table. Here are the details. For each issued data movement, a new event is generated and returned to `libomptarget` by calling `createEvent`. The event will be attached to the corresponding mapping table entry. For each data mapping lookup, if there is no need for a data movement, the attached event has to be inserted into the queue to gaurantee that all following operations in the queue can only be executed if the event is fulfilled. This design is to avoid synchronization on host side. Note that we are using CUDA terminolofy here. Similar mechanism is assumped to be supported by another targets. Even if the target doesn't support it, it can be easily implemented in the following fall back way: - `Event` can be any kind of flag that has at least two status, 0 and 1. - `waitEvent` can directly busy loop if `Event` is still 0. My local test shows that `bug49334.cpp` can pass. Reference: [1] https://bugs.llvm.org/show_bug.cgi?id=49940 Reviewed By: grokos, JonChesterfield, ye-luo Differential Revision: https://reviews.llvm.org/D104418 --- openmp/libomptarget/include/device.h | 14 ++++++ openmp/libomptarget/src/device.cpp | 65 +++++++++++++++++++++++---- openmp/libomptarget/src/omptarget.cpp | 29 +++++++++--- 3 files changed, 95 insertions(+), 13 deletions(-) diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h index 75dde85a8806..dbaa1bd0b460 100644 --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -82,6 +82,14 @@ struct HostDataToTargetTy { /// movement has been issued. This mutex *must* be locked right before /// releasing the mapping table lock. std::mutex UpdateMtx; + /// Pointer to the event corresponding to the data update of this map. + /// Note: At present this event is created when the first data transfer from + /// host to device is issued, and only being used for H2D. It is not used + /// for data transfer in another direction (device to host). It is still + /// unclear whether we need it for D2H. If in the future we need similar + /// mechanism for D2H, and if the event cannot be shared between them, Event + /// should be written as void *Event[2]. + void *Event = nullptr; }; // When HostDataToTargetTy is used by std::set, std::set::iterator is const // use unique_ptr to make States mutable. @@ -115,6 +123,12 @@ struct HostDataToTargetTy { /// Get the hold reference count. uint64_t getHoldRefCount() const { return States->HoldRefCount; } + /// Get the event bound to this data map. + void *getEvent() const { return States->Event; } + + /// Set the event bound to this data map. + void setEvent(void *Event) const { States->Event = Event; } + /// Reset the specified reference count unless it's infinity. Reset to 1 /// (even if currently 0) so it can be followed by a decrement. void resetRefCount(bool UseHoldRefCount) const { diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp index 90d994706820..75935b30520c 100644 --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -91,6 +91,9 @@ int DeviceTy::disassociatePtr(void *HstPtrBegin) { "count\n"); } else if (search->isDynRefCountInf()) { DP("Association found, removing it\n"); + void *Event = search->getEvent(); + if (Event) + destroyEvent(Event); HostDataToTargetMap.erase(search); DataMapMtx.unlock(); return OFFLOAD_SUCCESS; @@ -264,20 +267,62 @@ DeviceTy::getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size, DPxPTR(HstPtrBegin), DPxPTR(TargetPointer)); int Ret = submitData(TargetPointer, HstPtrBegin, Size, AsyncInfo); - - // Unlock the entry immediately after the data movement is issued. - Entry->unlock(); - if (Ret != OFFLOAD_SUCCESS) { + Entry->unlock(); REPORT("Copying data to device failed.\n"); // We will also return nullptr if the data movement fails because that // pointer points to a corrupted memory region so it doesn't make any // sense to continue to use it. TargetPointer = nullptr; } + + void *Event = Entry->getEvent(); + bool NeedNewEvent = Event == nullptr; + if (NeedNewEvent && createEvent(&Event) != OFFLOAD_SUCCESS) { + Entry->unlock(); + REPORT("Failed to create event\n"); + return {{false /* IsNewEntry */, false /* IsHostPointer */}, + {} /* MapTableEntry */, + nullptr /* TargetPointer */}; + } + // We cannot assume the event should not be nullptr because we don't + // know if the target support event. But if a target doesn't, + // recordEvent should always return success. + Ret = recordEvent(Event, AsyncInfo); + if (Ret != OFFLOAD_SUCCESS) { + Entry->unlock(); + REPORT("Failed to set dependence on event " DPxMOD "\n", DPxPTR(Event)); + return {{false /* IsNewEntry */, false /* IsHostPointer */}, + {} /* MapTableEntry */, + nullptr /* TargetPointer */}; + } + if (NeedNewEvent) + Entry->setEvent(Event); + // We're done with the entry. Release the entry. + Entry->unlock(); } else { // Release the mapping table lock directly. DataMapMtx.unlock(); + // If not a host pointer and no present modifier, we need to wait for the + // event if it exists. + if (!IsHostPtr && !HasPresentModifier) { + Entry->lock(); + void *Event = Entry->getEvent(); + if (Event) { + int Ret = waitEvent(Event, AsyncInfo); + Entry->unlock(); + if (Ret != OFFLOAD_SUCCESS) { + // If it fails to wait for the event, we need to return nullptr in + // case of any data race. + REPORT("Failed to wait for event " DPxMOD ".\n", DPxPTR(Event)); + return {{false /* IsNewEntry */, false /* IsHostPointer */}, + {} /* MapTableEntry */, + nullptr /* TargetPointer */}; + } + } else { + Entry->unlock(); + } + } } return {{IsNew, IsHostPtr}, Entry, TargetPointer}; @@ -365,7 +410,7 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) { int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool HasHoldModifier) { // Check if the pointer is contained in any sub-nodes. - int rc; + int Ret = OFFLOAD_SUCCESS; DataMapMtx.lock(); LookupResult lr = lookupMapping(HstPtrBegin, Size); if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) { @@ -380,18 +425,22 @@ int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size, (HT.HstPtrName) ? getNameFromMapping(HT.HstPtrName).c_str() : "unknown"); + void *Event = lr.Entry->getEvent(); HostDataToTargetMap.erase(lr.Entry); + if (Event && destroyEvent(Event) != OFFLOAD_SUCCESS) { + REPORT("Failed to destroy event " DPxMOD "\n", DPxPTR(Event)); + Ret = OFFLOAD_FAIL; + } } - rc = OFFLOAD_SUCCESS; } else { REPORT("Section to delete (hst addr " DPxMOD ") does not exist in the" " allocated memory\n", DPxPTR(HstPtrBegin)); - rc = OFFLOAD_FAIL; + Ret = OFFLOAD_FAIL; } DataMapMtx.unlock(); - return rc; + return Ret; } /// Init device, should not be called directly. diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 3e9f6427b472..dd3f97e12f72 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -581,14 +581,33 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num, void *&TgtPtrBase = AsyncInfo.getVoidPtrLocation(); TgtPtrBase = ExpectedTgtPtrBase; - int rt = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase, - sizeof(void *), AsyncInfo); - Pointer_TPR.MapTableEntry->unlock(); - - if (rt != OFFLOAD_SUCCESS) { + int Ret = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase, + sizeof(void *), AsyncInfo); + if (Ret != OFFLOAD_SUCCESS) { + Pointer_TPR.MapTableEntry->unlock(); REPORT("Copying data to device failed.\n"); return OFFLOAD_FAIL; } + void *Event = Pointer_TPR.MapTableEntry->getEvent(); + bool NeedNewEvent = Event == nullptr; + if (NeedNewEvent && Device.createEvent(&Event) != OFFLOAD_SUCCESS) { + Pointer_TPR.MapTableEntry->unlock(); + REPORT("Failed to create event.\n"); + return OFFLOAD_FAIL; + } + // We cannot assume the event should not be nullptr because we don't + // know if the target support event. But if a target doesn't, + // recordEvent should always return success. + Ret = Device.recordEvent(Event, AsyncInfo); + if (Ret != OFFLOAD_SUCCESS) { + Pointer_TPR.MapTableEntry->unlock(); + REPORT("Failed to set dependence on event " DPxMOD "\n", + DPxPTR(Event)); + return OFFLOAD_FAIL; + } + if (NeedNewEvent) + Pointer_TPR.MapTableEntry->setEvent(Event); + Pointer_TPR.MapTableEntry->unlock(); } else Device.ShadowMtx.unlock(); } From 74bb4ad5d4eb08f997a94b92d4e4eb27d48591b2 Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Wed, 5 Jan 2022 17:21:46 -0800 Subject: [PATCH 748/992] Consider CMAKE_{t}_LINKER_FLAGS_ when passing -gcodeview-ghash CMake may add /Debug in the CONFIG-specific flag. Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D116710 --- llvm/cmake/modules/HandleLLVMOptions.cmake | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 0280e1746ab0..3d3be961a675 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -488,8 +488,13 @@ if( MSVC ) endif() # Get all linker flags in upper case form so we can search them. - set(all_linker_flags_uppercase - "${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_MODULE_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS}") + string(CONCAT all_linker_flags_uppercase + ${CMAKE_EXE_LINKER_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} + ${CMAKE_EXE_LINKER_FLAGS} + ${CMAKE_MODULE_LINKER_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} + ${CMAKE_MODULE_LINKER_FLAGS} + ${CMAKE_SHARED_LINKER_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} + ${CMAKE_SHARED_LINKER_FLAGS}) string(TOUPPER "${all_linker_flags_uppercase}" all_linker_flags_uppercase) if (CLANG_CL AND LINKER_IS_LLD) From 0f5b718030e7112773ff4e88dd026204ba5b2890 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 6 Jan 2022 08:59:40 +0800 Subject: [PATCH 749/992] [LTO][codegen] Add TargetLibraryInfoWrapperPass initially Many codegen pass require this pass with useful triple info. Legacy pass manager need to add a TargetLibraryInfo with the module info before run passes. Or the TargetLibraryInfo will be initialized too conservative. Reviewed By: pengfei, aeubanks Differential Revision: https://reviews.llvm.org/D115850 --- llvm/lib/LTO/LTOBackend.cpp | 2 ++ llvm/test/LTO/X86/tli-sqrtf_finite.ll | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 llvm/test/LTO/X86/tli-sqrtf_finite.ll diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index 855d0fc8a8be..308c60281737 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -415,6 +415,8 @@ static void codegen(const Config &Conf, TargetMachine *TM, TM->Options.ObjectFilenameForDebug = Stream->ObjectPathName; legacy::PassManager CodeGenPasses; + TargetLibraryInfoImpl TLII(Triple(Mod.getTargetTriple())); + CodeGenPasses.add(new TargetLibraryInfoWrapperPass(TLII)); CodeGenPasses.add( createImmutableModuleSummaryIndexWrapperPass(&CombinedIndex)); if (Conf.PreCodeGenPassesHook) diff --git a/llvm/test/LTO/X86/tli-sqrtf_finite.ll b/llvm/test/LTO/X86/tli-sqrtf_finite.ll new file mode 100644 index 000000000000..2d6c683b93be --- /dev/null +++ b/llvm/test/LTO/X86/tli-sqrtf_finite.ll @@ -0,0 +1,16 @@ +; RUN: llvm-as < %s > %t +; RUN: llvm-lto %t -exported-symbol=foo -filetype=asm -o - | FileCheck %s + +; Check that sqrtf_finite is recognized as a libcall by SelectionDAGBuilder +; to enable sqrtss instruction to be used. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define float @foo(float %x) { +; CHECK: sqrtss + %call = tail call nnan ninf float @__sqrtf_finite(float %x) readnone + ret float %call +} + +declare float @__sqrtf_finite(float) readnone From b0774e5f500b5bb68451ee3f0590035d0f6e4e54 Mon Sep 17 00:00:00 2001 From: Mogball Date: Thu, 6 Jan 2022 01:42:12 +0000 Subject: [PATCH 750/992] [mlir][ods] ODS ops get an `extraClassDefinition` Extra definitions are placed in the generated source file for each op class. The substitution `$cppClass` is replaced by the op's C++ class name. This is useful when declaring but not defining methods in TableGen base classes: ``` class BaseOp : Op] { let extraClassDeclaration = [{ // ZOp is declared at at the bottom of the file and is incomplete here ZOp getParent(); }]; let extraClassDefinition = [{ int $cppClass::someInterfaceMethod() { return someUtilityFunction(*this); } ZOp $cppClass::getParent() { return dyn_cast(this->getParentOp()); } }]; } ``` Certain things may prevent defining these functions inline, in the declaration. In this example, `ZOp` in the same dialect is incomplete at the function declaration because ops classes are declared in alphabetical order. Alternatively, functions may be too big to be desired as inlined, or they may require dependencies that create cyclic includes, or they may be calling a templated utility function that one may not want to expose in a header. If the functions are not inlined, then inheriting from the base class N times means that each function will need to be defined N times. With `extraClassDefinitions`, they only need to be defined once. Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D115783 --- mlir/docs/OpDefinitions.md | 10 ++++++++++ mlir/include/mlir/IR/OpBase.td | 5 +++++ mlir/include/mlir/TableGen/Class.h | 18 ++++++++++++++---- mlir/include/mlir/TableGen/Operator.h | 3 +++ mlir/lib/TableGen/Class.cpp | 5 +++++ mlir/lib/TableGen/Operator.cpp | 7 +++++++ mlir/test/lib/Dialect/Test/TestOps.td | 5 ++++- mlir/tools/mlir-tblgen/OpClass.cpp | 6 ++++-- mlir/tools/mlir-tblgen/OpClass.h | 5 ++++- mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp | 10 +++++++++- 10 files changed, 65 insertions(+), 9 deletions(-) diff --git a/mlir/docs/OpDefinitions.md b/mlir/docs/OpDefinitions.md index ec9e6fdc80dd..1e1abdc20d2f 100644 --- a/mlir/docs/OpDefinitions.md +++ b/mlir/docs/OpDefinitions.md @@ -964,6 +964,16 @@ Note that `extraClassDeclaration` is a mechanism intended for long-tail cases by power users; for not-yet-implemented widely-applicable cases, improving the infrastructure is preferable. +### Extra definitions + +When defining base op classes in TableGen that are inherited many times by +different ops, users may want to provide common definitions of utility and +interface functions. However, many of these definitions may not be desirable or +possible in `extraClassDeclaration`, which append them to the op's C++ class +declaration. In these cases, users can add an `extraClassDefinition` to define +code that is added to the generated source file inside the op's C++ namespace. +The substitution `$cppClass` is replaced by the op's C++ class name. + ### Generated C++ code [OpDefinitionsGen][OpDefinitionsGen] processes the op definition spec file and diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index f1a5446ad1f9..8e70f4844008 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -2445,6 +2445,11 @@ class Op props = []> { // Additional code that will be added to the public part of the generated // C++ code of the op declaration. code extraClassDeclaration = ?; + + // Additional code that will be added to the generated source file. The + // generated code is placed inside the op's C++ namespace. `$cppClass` is + // replaced by the op's C++ class name. + code extraClassDefinition = ?; } // Base class for ops with static/dynamic offset, sizes and strides diff --git a/mlir/include/mlir/TableGen/Class.h b/mlir/include/mlir/TableGen/Class.h index 1f310fe1d082..a8a710ff85fe 100644 --- a/mlir/include/mlir/TableGen/Class.h +++ b/mlir/include/mlir/TableGen/Class.h @@ -532,22 +532,32 @@ class VisibilityDeclaration Visibility visibility; }; -/// Unstructured extra class declarations, from TableGen definitions. The -/// default visibility of extra class declarations is up to the owning class. +/// Unstructured extra class declarations and definitions, from TableGen +/// definitions. The default visibility of extra class declarations is up to the +/// owning class. class ExtraClassDeclaration : public ClassDeclarationBase { public: /// Create an extra class declaration. - ExtraClassDeclaration(StringRef extraClassDeclaration) - : extraClassDeclaration(extraClassDeclaration) {} + ExtraClassDeclaration(StringRef extraClassDeclaration, + StringRef extraClassDefinition = "") + : extraClassDeclaration(extraClassDeclaration), + extraClassDefinition(extraClassDefinition) {} /// Write the extra class declarations. void writeDeclTo(raw_indented_ostream &os) const override; + /// Write the extra class definitions. + void writeDefTo(raw_indented_ostream &os, + StringRef namePrefix) const override; + private: /// The string of the extra class declarations. It is re-indented before /// printed. StringRef extraClassDeclaration; + /// The string of the extra class definitions. It is re-indented before + /// printed. + StringRef extraClassDefinition; }; /// A class used to emit C++ classes from Tablegen. Contains a list of public diff --git a/mlir/include/mlir/TableGen/Operator.h b/mlir/include/mlir/TableGen/Operator.h index 44f10440c1e3..ddfb7dd0178b 100644 --- a/mlir/include/mlir/TableGen/Operator.h +++ b/mlir/include/mlir/TableGen/Operator.h @@ -235,6 +235,9 @@ class Operator { // Returns this op's extra class declaration code. StringRef getExtraClassDeclaration() const; + // Returns this op's extra class definition code. + StringRef getExtraClassDefinition() const; + // Returns the Tablegen definition this operator was constructed from. // TODO: do not expose the TableGen record, this is a temporary solution to // OpEmitter requiring a Record because Operator does not provide enough diff --git a/mlir/lib/TableGen/Class.cpp b/mlir/lib/TableGen/Class.cpp index 9b7124e2e3a5..a7c02d3ae543 100644 --- a/mlir/lib/TableGen/Class.cpp +++ b/mlir/lib/TableGen/Class.cpp @@ -260,6 +260,11 @@ void ExtraClassDeclaration::writeDeclTo(raw_indented_ostream &os) const { os.printReindented(extraClassDeclaration); } +void ExtraClassDeclaration::writeDefTo(raw_indented_ostream &os, + StringRef namePrefix) const { + os.printReindented(extraClassDefinition); +} + //===----------------------------------------------------------------------===// // Class definitions //===----------------------------------------------------------------------===// diff --git a/mlir/lib/TableGen/Operator.cpp b/mlir/lib/TableGen/Operator.cpp index f1c1fe534666..cde617dcd30b 100644 --- a/mlir/lib/TableGen/Operator.cpp +++ b/mlir/lib/TableGen/Operator.cpp @@ -128,6 +128,13 @@ StringRef Operator::getExtraClassDeclaration() const { return def.getValueAsString(attr); } +StringRef Operator::getExtraClassDefinition() const { + constexpr auto attr = "extraClassDefinition"; + if (def.isValueUnset(attr)) + return {}; + return def.getValueAsString(attr); +} + const llvm::Record &Operator::getDef() const { return def; } bool Operator::skipDefaultBuilders() const { diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 6fad11b85ad8..28dbc271d72d 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -382,7 +382,10 @@ def ConversionCallOp : TEST_Op<"conversion_call_op", let extraClassDeclaration = [{ /// Return the callee of this operation. - ::mlir::CallInterfaceCallable getCallableForCallee() { + ::mlir::CallInterfaceCallable getCallableForCallee(); + }]; + let extraClassDefinition = [{ + ::mlir::CallInterfaceCallable $cppClass::getCallableForCallee() { return (*this)->getAttrOfType<::mlir::SymbolRefAttr>("callee"); } }]; diff --git a/mlir/tools/mlir-tblgen/OpClass.cpp b/mlir/tools/mlir-tblgen/OpClass.cpp index 9524dc9210b8..3512212272f4 100644 --- a/mlir/tools/mlir-tblgen/OpClass.cpp +++ b/mlir/tools/mlir-tblgen/OpClass.cpp @@ -15,8 +15,10 @@ using namespace mlir::tblgen; // OpClass definitions //===----------------------------------------------------------------------===// -OpClass::OpClass(StringRef name, StringRef extraClassDeclaration) +OpClass::OpClass(StringRef name, StringRef extraClassDeclaration, + std::string extraClassDefinition) : Class(name.str()), extraClassDeclaration(extraClassDeclaration), + extraClassDefinition(std::move(extraClassDefinition)), parent(addParent("::mlir::Op")) { parent.addTemplateParam(getClassName().str()); declare(Visibility::Public); @@ -30,5 +32,5 @@ OpClass::OpClass(StringRef name, StringRef extraClassDeclaration) void OpClass::finalize() { Class::finalize(); declare(Visibility::Public); - declare(extraClassDeclaration); + declare(extraClassDeclaration, extraClassDefinition); } diff --git a/mlir/tools/mlir-tblgen/OpClass.h b/mlir/tools/mlir-tblgen/OpClass.h index b0558a0e5513..6b90dd2c3a3a 100644 --- a/mlir/tools/mlir-tblgen/OpClass.h +++ b/mlir/tools/mlir-tblgen/OpClass.h @@ -25,7 +25,8 @@ class OpClass : public Class { /// - inheritance of `print` /// - a type alias for the associated adaptor class /// - OpClass(StringRef name, StringRef extraClassDeclaration); + OpClass(StringRef name, StringRef extraClassDeclaration, + std::string extraClassDefinition); /// Add an op trait. void addTrait(Twine trait) { parent.addTemplateParam(trait.str()); } @@ -39,6 +40,8 @@ class OpClass : public Class { private: /// Hand-written extra class declarations. StringRef extraClassDeclaration; + /// Hand-written extra class definitions. + std::string extraClassDefinition; /// The parent class, which also contains the traits to be inherited. ParentClass &parent; }; diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index f024b90d3340..8511df9c54e6 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -557,10 +557,18 @@ static void genAttributeVerifier( } } +/// Op extra class definitions have a `$cppClass` substitution that is to be +/// replaced by the C++ class name. +static std::string formatExtraDefinitions(const Operator &op) { + FmtContext ctx = FmtContext().addSubst("cppClass", op.getCppClassName()); + return tgfmt(op.getExtraClassDefinition(), &ctx).str(); +} + OpEmitter::OpEmitter(const Operator &op, const StaticVerifierFunctionEmitter &staticVerifierEmitter) : def(op.getDef()), op(op), - opClass(op.getCppClassName(), op.getExtraClassDeclaration()), + opClass(op.getCppClassName(), op.getExtraClassDeclaration(), + formatExtraDefinitions(op)), staticVerifierEmitter(staticVerifierEmitter) { verifyCtx.withOp("(*this->getOperation())"); verifyCtx.addSubst("_ctxt", "this->getOperation()->getContext()"); From 75ea6b43197c3ece048c72c3553ce8219de90a2e Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Thu, 6 Jan 2022 09:35:23 +0800 Subject: [PATCH 751/992] [M68k][test][NFC] Fix no newline at end of file --- llvm/test/MC/M68k/Arith/Classes/MxBiArOp_FMI.s | 2 +- llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRI_xEA.s | 2 +- llvm/test/MC/M68k/Relocations/data-abs.s | 2 +- llvm/test/MC/M68k/Relocations/data-gotpcrel.s | 2 +- llvm/test/MC/M68k/Relocations/data-pc-rel.s | 2 +- llvm/test/MC/M68k/Relocations/text-plt.s | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_FMI.s b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_FMI.s index 098b8cda558d..0e563c05e2eb 100644 --- a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_FMI.s +++ b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_FMI.s @@ -28,4 +28,4 @@ add.b #-1, (%a0) add.b #0, (%a3) ; CHECK: add.l #-1, (%a2) ; CHECK-SAME: encoding: [0x06,0x92,0xff,0xff,0xff,0xff] -add.l #-1, (%a2) \ No newline at end of file +add.l #-1, (%a2) diff --git a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRI_xEA.s b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRI_xEA.s index 071f37b1eb2a..aa832cf8a14f 100644 --- a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRI_xEA.s +++ b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRI_xEA.s @@ -17,4 +17,4 @@ add.l #131071, %d0 add.l #458752, %d7 ; CHECK: adda.l #0, %a0 ; CHECK-SAME: encoding: [0xd1,0xfc,0x00,0x00,0x00,0x00] -adda.l #0, %a0 \ No newline at end of file +adda.l #0, %a0 diff --git a/llvm/test/MC/M68k/Relocations/data-abs.s b/llvm/test/MC/M68k/Relocations/data-abs.s index 59e384b1cd57..b8e678404a5f 100644 --- a/llvm/test/MC/M68k/Relocations/data-abs.s +++ b/llvm/test/MC/M68k/Relocations/data-abs.s @@ -6,4 +6,4 @@ ; RELOC: R_68K_32 dst 0x0 ; INSTR: move.l dst, %d0 ; FIXUP: fixup A - offset: 2, value: dst, kind: FK_Data_4 -move.l dst, %d0 \ No newline at end of file +move.l dst, %d0 diff --git a/llvm/test/MC/M68k/Relocations/data-gotpcrel.s b/llvm/test/MC/M68k/Relocations/data-gotpcrel.s index e964f0209dfc..ac3ba96d1f09 100644 --- a/llvm/test/MC/M68k/Relocations/data-gotpcrel.s +++ b/llvm/test/MC/M68k/Relocations/data-gotpcrel.s @@ -11,4 +11,4 @@ move.l (dst1@GOTPCREL,%pc,%d0), %a0 ; RELOC: R_68K_GOTPCREL16 dst2 0x0 ; INSTR: move.l (dst2@GOTPCREL,%pc), %a0 ; FIXUP: fixup A - offset: 2, value: dst2@GOTPCREL, kind: FK_PCRel_2 -move.l (dst2@GOTPCREL,%pc), %a0 \ No newline at end of file +move.l (dst2@GOTPCREL,%pc), %a0 diff --git a/llvm/test/MC/M68k/Relocations/data-pc-rel.s b/llvm/test/MC/M68k/Relocations/data-pc-rel.s index 1a75c629fddc..909681da5a95 100644 --- a/llvm/test/MC/M68k/Relocations/data-pc-rel.s +++ b/llvm/test/MC/M68k/Relocations/data-pc-rel.s @@ -17,4 +17,4 @@ move.l (dst2,%pc), %a0 ; RELOC-NOT: R_68K_PC ; INSTR: move.l (0,%pc), %a0 ; FIXUP-NOT: fixup -move.l (0,%pc), %a0 \ No newline at end of file +move.l (0,%pc), %a0 diff --git a/llvm/test/MC/M68k/Relocations/text-plt.s b/llvm/test/MC/M68k/Relocations/text-plt.s index ea746f7d1da5..609908ce7c91 100644 --- a/llvm/test/MC/M68k/Relocations/text-plt.s +++ b/llvm/test/MC/M68k/Relocations/text-plt.s @@ -6,4 +6,4 @@ ; RELOC: R_68K_PLT16 target 0x0 ; INSTR: jsr (target@PLT,%pc) ; FIXUP: fixup A - offset: 2, value: target@PLT, kind: FK_PCRel_2 -jsr (target@PLT,%pc) \ No newline at end of file +jsr (target@PLT,%pc) From 358d020017c346466c3b3eda0b89c3517c642ae3 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Wed, 5 Jan 2022 20:34:01 -0500 Subject: [PATCH 752/992] [MLIR][LLVM] Add simple folders for bitcast/addrspacecast/gep Add 5 simple folders * bitcast(x : T0, T0) -> x * addrcast(x : T0, T0) -> x * bitcast(bitcast(x : T0, T1), T0) -> x * addrcast(addrcast(x : T0, T1), T0) -> x * gep %x:T, 0 -> %x:T Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D116715 --- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 9 +++- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 43 ++++++++++++++++++ mlir/test/Dialect/LLVMIR/canonicalize.mlir | 49 +++++++++++++++++++++ 3 files changed, 99 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 345d03e93940..95551730ddc8 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -327,6 +327,7 @@ def LLVM_GEPOp let assemblyFormat = [{ $base `[` $indices `]` attr-dict `:` functional-type(operands, results) }]; + let hasFolder = 1; } def LLVM_LoadOp : LLVM_Op<"load">, MemoryOpWithAlignmentAndAttributes { @@ -398,10 +399,14 @@ class LLVM_CastOpgetOperation(), p); }]; } def LLVM_BitcastOp : LLVM_CastOp<"bitcast", "CreateBitCast", - LLVM_AnyNonAggregate, LLVM_AnyNonAggregate>; + LLVM_AnyNonAggregate, LLVM_AnyNonAggregate> { + let hasFolder = 1; +} def LLVM_AddrSpaceCastOp : LLVM_CastOp<"addrspacecast", "CreateAddrSpaceCast", LLVM_ScalarOrVectorOf, - LLVM_ScalarOrVectorOf>; + LLVM_ScalarOrVectorOf> { + let hasFolder = 1; +} def LLVM_IntToPtrOp : LLVM_CastOp<"inttoptr", "CreateIntToPtr", LLVM_ScalarOrVectorOf, LLVM_ScalarOrVectorOf>; diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 77f436f109ca..e65c14e38268 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -19,6 +19,7 @@ #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/FunctionImplementation.h" #include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Matchers.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/TypeSwitch.h" @@ -2259,6 +2260,48 @@ static LogicalResult verify(FenceOp &op) { return success(); } +//===----------------------------------------------------------------------===// +// Folder for LLVM::BitcastOp +//===----------------------------------------------------------------------===// + +OpFoldResult LLVM::BitcastOp::fold(ArrayRef operands) { + // bitcast(x : T0, T0) -> x + if (getArg().getType() == getType()) + return getArg(); + // bitcast(bitcast(x : T0, T1), T0) -> x + if (auto prev = getArg().getDefiningOp()) + if (prev.getArg().getType() == getType()) + return prev.getArg(); + return {}; +} + +//===----------------------------------------------------------------------===// +// Folder for LLVM::AddrSpaceCastOp +//===----------------------------------------------------------------------===// + +OpFoldResult LLVM::AddrSpaceCastOp::fold(ArrayRef operands) { + // addrcast(x : T0, T0) -> x + if (getArg().getType() == getType()) + return getArg(); + // addrcast(addrcast(x : T0, T1), T0) -> x + if (auto prev = getArg().getDefiningOp()) + if (prev.getArg().getType() == getType()) + return prev.getArg(); + return {}; +} + +//===----------------------------------------------------------------------===// +// Folder for LLVM::GEPOp +//===----------------------------------------------------------------------===// + +OpFoldResult LLVM::GEPOp::fold(ArrayRef operands) { + // gep %x:T, 0 -> %x + if (getBase().getType() == getType() && getIndices().size() == 1 && + matchPattern(getIndices()[0], m_Zero())) + return getBase(); + return {}; +} + //===----------------------------------------------------------------------===// // LLVMDialect initialization, type parsing, and registration. //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/LLVMIR/canonicalize.mlir b/mlir/test/Dialect/LLVMIR/canonicalize.mlir index acbe0035e147..3b52bc1efb94 100644 --- a/mlir/test/Dialect/LLVMIR/canonicalize.mlir +++ b/mlir/test/Dialect/LLVMIR/canonicalize.mlir @@ -38,3 +38,52 @@ llvm.func @no_fold_extractvalue(%arr: !llvm.array<4xf32>) -> f32 { llvm.return %3 : f32 } + +// ----- +// CHECK-LABEL: fold_bitcast +// CHECK-SAME: %[[a0:arg[0-9]+]] +// CHECK-NEXT: llvm.return %[[a0]] +llvm.func @fold_bitcast(%x : !llvm.ptr) -> !llvm.ptr { + %c = llvm.bitcast %x : !llvm.ptr to !llvm.ptr + llvm.return %c : !llvm.ptr +} + +// CHECK-LABEL: fold_bitcast2 +// CHECK-SAME: %[[a0:arg[0-9]+]] +// CHECK-NEXT: llvm.return %[[a0]] +llvm.func @fold_bitcast2(%x : !llvm.ptr) -> !llvm.ptr { + %c = llvm.bitcast %x : !llvm.ptr to !llvm.ptr + %d = llvm.bitcast %c : !llvm.ptr to !llvm.ptr + llvm.return %d : !llvm.ptr +} + +// ----- + +// CHECK-LABEL: fold_addrcast +// CHECK-SAME: %[[a0:arg[0-9]+]] +// CHECK-NEXT: llvm.return %[[a0]] +llvm.func @fold_addrcast(%x : !llvm.ptr) -> !llvm.ptr { + %c = llvm.addrspacecast %x : !llvm.ptr to !llvm.ptr + llvm.return %c : !llvm.ptr +} + +// CHECK-LABEL: fold_addrcast2 +// CHECK-SAME: %[[a0:arg[0-9]+]] +// CHECK-NEXT: llvm.return %[[a0]] +llvm.func @fold_addrcast2(%x : !llvm.ptr) -> !llvm.ptr { + %c = llvm.addrspacecast %x : !llvm.ptr to !llvm.ptr + %d = llvm.addrspacecast %c : !llvm.ptr to !llvm.ptr + llvm.return %d : !llvm.ptr +} + +// ----- + +// CHECK-LABEL: fold_gep +// CHECK-SAME: %[[a0:arg[0-9]+]] +// CHECK-NEXT: llvm.return %[[a0]] +llvm.func @fold_gep(%x : !llvm.ptr) -> !llvm.ptr { + %c0 = arith.constant 0 : i32 + %c = llvm.getelementptr %x[%c0] : (!llvm.ptr, i32) -> !llvm.ptr + llvm.return %c : !llvm.ptr +} + From cb0e12a144dea486b383876f138a8398e9c9386e Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Thu, 6 Jan 2022 10:14:46 +0800 Subject: [PATCH 753/992] [M68k][test][NFC] Add missing tests for MxCMP_RM Let each format of inst have two tests for it like other MxCMP testcases. --- llvm/test/MC/M68k/Arith/Classes/MxCMP_RM.s | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/llvm/test/MC/M68k/Arith/Classes/MxCMP_RM.s b/llvm/test/MC/M68k/Arith/Classes/MxCMP_RM.s index 3cceb1fbba0b..772268e100e7 100644 --- a/llvm/test/MC/M68k/Arith/Classes/MxCMP_RM.s +++ b/llvm/test/MC/M68k/Arith/Classes/MxCMP_RM.s @@ -9,10 +9,19 @@ cmp.b (-1,%pc,%d1), %d0 ; CHECK: cmp.l (0,%pc,%d1), %d0 ; CHECK-SAME: encoding: [0xb0,0xbb,0x18,0x00] cmp.l (0,%pc,%d1), %d0 +; CHECK: cmp.l (-1,%pc,%d1), %d0 +; CHECK-SAME: encoding: [0xb0,0xbb,0x18,0xff] +cmp.l (-1,%pc,%d1), %d0 ; CHECK: cmp.b (0,%pc), %d0 ; CHECK-SAME: encoding: [0xb0,0x3a,0x00,0x00] cmp.b (0,%pc), %d0 +; CHECK: cmp.b (-1,%pc), %d0 +; CHECK-SAME: encoding: [0xb0,0x3a,0xff,0xff] +cmp.b (-1,%pc), %d0 +; CHECK: cmp.l (0,%pc), %d0 +; CHECK-SAME: encoding: [0xb0,0xba,0x00,0x00] +cmp.l (0,%pc), %d0 ; CHECK: cmp.l (-1,%pc), %d0 ; CHECK-SAME: encoding: [0xb0,0xba,0xff,0xff] cmp.l (-1,%pc), %d0 @@ -33,6 +42,12 @@ cmp.l (0,%a2,%a2), %d1 ; CHECK: cmp.b (0,%a0), %d0 ; CHECK-SAME: encoding: [0xb0,0x28,0x00,0x00] cmp.b (0,%a0), %d0 +; CHECK: cmp.b (-1,%a1), %d0 +; CHECK-SAME: encoding: [0xb0,0x29,0xff,0xff] +cmp.b (-1,%a1), %d0 +; CHECK: cmp.l (0,%a0), %d0 +; CHECK-SAME: encoding: [0xb0,0xa8,0x00,0x00] +cmp.l (0,%a0), %d0 ; CHECK: cmp.l (-1,%a1), %d0 ; CHECK-SAME: encoding: [0xb0,0xa9,0xff,0xff] cmp.l (-1,%a1), %d0 @@ -40,6 +55,12 @@ cmp.l (-1,%a1), %d0 ; CHECK: cmp.b (%a0), %d0 ; CHECK-SAME: encoding: [0xb0,0x10] cmp.b (%a0), %d0 +; CHECK: cmp.b (%a0), %d1 +; CHECK-SAME: encoding: [0xb2,0x10] +cmp.b (%a0), %d1 +; CHECK: cmp.l (%a1), %d2 +; CHECK-SAME: encoding: [0xb4,0x91] +cmp.l (%a1), %d2 ; CHECK: cmp.l (%a1), %d3 ; CHECK-SAME: encoding: [0xb6,0x91] cmp.l (%a1), %d3 From ef6817f9329ce16dae33e64e2534a52647f089b0 Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Wed, 5 Jan 2022 14:22:33 -0800 Subject: [PATCH 754/992] [lldb] Break out long help for 'frame var' The current help for `frame variable` is somewhat long. Its length, combined with the few aliases (`var`, `v`, and `vo`) can make the output of `apropos` redundant and noisy. This separates out the details into a separate long help. Differential Revision: https://reviews.llvm.org/D116708 --- lldb/source/Commands/CommandObjectFrame.cpp | 26 ++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/lldb/source/Commands/CommandObjectFrame.cpp b/lldb/source/Commands/CommandObjectFrame.cpp index 2b9f5316409f..9cfe997f9227 100644 --- a/lldb/source/Commands/CommandObjectFrame.cpp +++ b/lldb/source/Commands/CommandObjectFrame.cpp @@ -394,19 +394,7 @@ class CommandObjectFrameVariable : public CommandObjectParsed { interpreter, "frame variable", "Show variables for the current stack frame. Defaults to all " "arguments and local variables in scope. Names of argument, " - "local, file static and file global variables can be specified. " - "Children of aggregate variables can be specified such as " - "'var->child.x'. The -> and [] operators in 'frame variable' do " - "not invoke operator overloads if they exist, but directly access " - "the specified element. If you want to trigger operator overloads " - "use the expression command to print the variable instead." - "\nIt is worth noting that except for overloaded " - "operators, when printing local variables 'expr local_var' and " - "'frame var local_var' produce the same " - "results. However, 'frame variable' is more efficient, since it " - "uses debug information and memory reads directly, rather than " - "parsing and evaluating an expression, which may even involve " - "JITing and running code in the target program.", + "local, file static and file global variables can be specified.", nullptr, eCommandRequiresFrame | eCommandTryTargetAPILock | eCommandProcessMustBeLaunched | eCommandProcessMustBePaused | @@ -415,6 +403,18 @@ class CommandObjectFrameVariable : public CommandObjectParsed { m_option_variable( true), // Include the frame specific options by passing "true" m_option_format(eFormatDefault), m_varobj_options() { + SetHelpLong(R"( +Children of aggregate variables can be specified such as 'var->child.x'. In +'frame variable', the operators -> and [] do not invoke operator overloads if +they exist, but directly access the specified element. If you want to trigger +operator overloads use the expression command to print the variable instead. + +It is worth noting that except for overloaded operators, when printing local +variables 'expr local_var' and 'frame var local_var' produce the same results. +However, 'frame variable' is more efficient, since it uses debug information and +memory reads directly, rather than parsing and evaluating an expression, which +may even involve JITing and running code in the target program.)"); + CommandArgumentEntry arg; CommandArgumentData var_name_arg; From 9e2cfb061a8821236944b3c8f40641846ab6bc94 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 4 Jan 2022 10:21:44 +1100 Subject: [PATCH 755/992] [ORC] Make ExecutorAddrDiff an alias for uint64_t. We don't need to restrict operations on ExecutorAddrDiff as carefully as we do for ExecutorAddr. --- .../Orc/Shared/ExecutorAddress.h | 20 +++++-------------- .../Orc/Shared/TargetProcessControlTypes.h | 2 +- .../Orc/TargetProcess/JITLoaderGDB.cpp | 3 +-- 3 files changed, 7 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h index 3c0b2b9edd52..2d316b9de007 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h @@ -21,17 +21,7 @@ namespace llvm { namespace orc { -/// Represents the difference between two addresses in the executor process. -class ExecutorAddrDiff { -public: - ExecutorAddrDiff() = default; - explicit ExecutorAddrDiff(uint64_t Value) : Value(Value) {} - - uint64_t getValue() const { return Value; } - -private: - int64_t Value = 0; -}; +using ExecutorAddrDiff = uint64_t; /// Represents an address in the executor process. class ExecutorAddr { @@ -99,12 +89,12 @@ class ExecutorAddr { ExecutorAddr operator--(int) { return ExecutorAddr(Addr--); } ExecutorAddr &operator+=(const ExecutorAddrDiff Delta) { - Addr += Delta.getValue(); + Addr += Delta; return *this; } ExecutorAddr &operator-=(const ExecutorAddrDiff Delta) { - Addr -= Delta.getValue(); + Addr -= Delta; return *this; } @@ -121,13 +111,13 @@ inline ExecutorAddrDiff operator-(const ExecutorAddr &LHS, /// Adding an offset and an address yields an address. inline ExecutorAddr operator+(const ExecutorAddr &LHS, const ExecutorAddrDiff &RHS) { - return ExecutorAddr(LHS.getValue() + RHS.getValue()); + return ExecutorAddr(LHS.getValue() + RHS); } /// Adding an address and an offset yields an address. inline ExecutorAddr operator+(const ExecutorAddrDiff &LHS, const ExecutorAddr &RHS) { - return ExecutorAddr(LHS.getValue() + RHS.getValue()); + return ExecutorAddr(LHS + RHS.getValue()); } /// Represents an address range in the exceutor process. diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h index 0e8b7e7d345a..9e329594012e 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h @@ -85,7 +85,7 @@ struct WrapperFunctionCall { shared::CWrapperFunctionResult(const char *ArgData, size_t ArgSize); return shared::WrapperFunctionResult( Func.toPtr()(ArgData.Start.toPtr(), - static_cast(ArgData.size().getValue()))); + static_cast(ArgData.size()))); } /// Run call and deserialize result using SPS. diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp index 4c15e25b1d89..1f9d7955acfa 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp @@ -120,8 +120,7 @@ llvm_orc_registerJITLoaderGDBWrapper(const char *Data, uint64_t Size) { return WrapperFunction::handle( Data, Size, [](ExecutorAddrRange R) { - registerJITLoaderGDBImpl(R.Start.toPtr(), - R.size().getValue()); + registerJITLoaderGDBImpl(R.Start.toPtr(), R.size()); }) .release(); } From 133f86e95492b2a00b944e070878424cfa73f87c Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 5 Jan 2022 17:00:06 +1100 Subject: [PATCH 756/992] [JITLink] Update JITLink to use ExecutorAddr rather than JITTargetAddress. ExecutorAddr is the preferred representation for executor process addresses now. --- .../LLJITWithObjectLinkingLayerPlugin.cpp | 11 +- .../ExecutionEngine/JITLink/EHFrameSupport.h | 13 +- .../llvm/ExecutionEngine/JITLink/JITLink.h | 140 ++++++++++-------- .../JITLink/JITLinkMemoryManager.h | 40 ++--- .../llvm/ExecutionEngine/JITLink/x86_64.h | 22 ++- .../llvm/ExecutionEngine/Orc/ELFNixPlatform.h | 2 +- .../ExecutionEngine/Orc/EPCEHFrameRegistrar.h | 4 +- .../Orc/EPCGenericJITLinkMemoryManager.h | 2 +- .../llvm/ExecutionEngine/Orc/MachOPlatform.h | 2 +- .../ExecutionEngine/Orc/ObjectLinkingLayer.h | 2 +- .../Orc/Shared/ExecutorAddress.h | 45 +++++- .../DefineExternalSectionStartAndEndSymbols.h | 4 +- .../JITLink/EHFrameSupport.cpp | 56 ++++--- .../JITLink/EHFrameSupportImpl.h | 11 +- .../JITLink/ELFLinkGraphBuilder.h | 14 +- .../ExecutionEngine/JITLink/ELF_aarch64.cpp | 8 +- .../lib/ExecutionEngine/JITLink/ELF_riscv.cpp | 27 ++-- .../ExecutionEngine/JITLink/ELF_x86_64.cpp | 11 +- llvm/lib/ExecutionEngine/JITLink/JITLink.cpp | 37 ++--- .../JITLink/JITLinkGeneric.cpp | 9 +- .../JITLink/JITLinkMemoryManager.cpp | 22 +-- .../JITLink/MachOLinkGraphBuilder.cpp | 82 +++++----- .../JITLink/MachOLinkGraphBuilder.h | 14 +- .../ExecutionEngine/JITLink/MachO_arm64.cpp | 44 +++--- .../ExecutionEngine/JITLink/MachO_x86_64.cpp | 22 +-- .../JITLink/PerGraphGOTAndPLTStubsBuilder.h | 8 +- llvm/lib/ExecutionEngine/JITLink/x86_64.cpp | 10 +- .../Orc/DebugObjectManagerPlugin.cpp | 6 +- .../Orc/DebuggerSupportPlugin.cpp | 24 +-- .../ExecutionEngine/Orc/ELFNixPlatform.cpp | 12 +- .../Orc/EPCEHFrameRegistrar.cpp | 10 +- .../Orc/EPCGenericJITLinkMemoryManager.cpp | 4 +- .../Orc/EPCIndirectionUtils.cpp | 23 +-- .../ExecutionEngine/Orc/IndirectionUtils.cpp | 8 +- .../lib/ExecutionEngine/Orc/MachOPlatform.cpp | 38 +++-- .../Orc/ObjectLinkingLayer.cpp | 6 +- llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp | 19 +-- .../tools/llvm-jitlink/llvm-jitlink-macho.cpp | 19 +-- llvm/tools/llvm-jitlink/llvm-jitlink.cpp | 27 ++-- .../JITLink/LinkGraphTests.cpp | 87 +++++++---- .../Orc/ObjectLinkingLayerTest.cpp | 3 +- 41 files changed, 519 insertions(+), 429 deletions(-) diff --git a/llvm/examples/OrcV2Examples/LLJITWithObjectLinkingLayerPlugin/LLJITWithObjectLinkingLayerPlugin.cpp b/llvm/examples/OrcV2Examples/LLJITWithObjectLinkingLayerPlugin/LLJITWithObjectLinkingLayerPlugin.cpp index 2215e2507db3..5a41a7c1e8a6 100644 --- a/llvm/examples/OrcV2Examples/LLJITWithObjectLinkingLayerPlugin/LLJITWithObjectLinkingLayerPlugin.cpp +++ b/llvm/examples/OrcV2Examples/LLJITWithObjectLinkingLayerPlugin/LLJITWithObjectLinkingLayerPlugin.cpp @@ -100,14 +100,15 @@ class MyPlugin : public ObjectLinkingLayer::Plugin { return; } - JITTargetAddress InitAddr = B.getAddress() & ~(LineWidth - 1); - JITTargetAddress StartAddr = B.getAddress(); - JITTargetAddress EndAddr = B.getAddress() + B.getSize(); + ExecutorAddr InitAddr(B.getAddress().getValue() & ~(LineWidth - 1)); + ExecutorAddr StartAddr = B.getAddress(); + ExecutorAddr EndAddr = B.getAddress() + B.getSize(); auto *Data = reinterpret_cast(B.getContent().data()); - for (JITTargetAddress CurAddr = InitAddr; CurAddr != EndAddr; ++CurAddr) { + for (ExecutorAddr CurAddr = InitAddr; CurAddr != EndAddr; ++CurAddr) { if (CurAddr % LineWidth == 0) - outs() << " " << formatv("{0:x16}", CurAddr) << ": "; + outs() << " " << formatv("{0:x16}", CurAddr.getValue()) + << ": "; if (CurAddr < StartAddr) outs() << " "; else diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h b/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h index ec78d9db40b6..e834042f8bb2 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h @@ -25,25 +25,24 @@ namespace jitlink { class EHFrameRegistrar { public: virtual ~EHFrameRegistrar(); - virtual Error registerEHFrames(JITTargetAddress EHFrameSectionAddr, + virtual Error registerEHFrames(orc::ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) = 0; - virtual Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr, + virtual Error deregisterEHFrames(orc::ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) = 0; }; /// Registers / Deregisters EH-frames in the current process. class InProcessEHFrameRegistrar final : public EHFrameRegistrar { public: - Error registerEHFrames(JITTargetAddress EHFrameSectionAddr, + Error registerEHFrames(orc::ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) override; - Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr, + Error deregisterEHFrames(orc::ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) override; }; -using StoreFrameRangeFunction = - std::function; +using StoreFrameRangeFunction = std::function; /// Creates a pass that records the address and size of the EH frame section. /// If no eh-frame section is found then the address and size will both be given diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index 69106fcb4c28..d0d497b75d9d 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -104,10 +104,10 @@ class Addressable { friend class LinkGraph; protected: - Addressable(JITTargetAddress Address, bool IsDefined) + Addressable(orc::ExecutorAddr Address, bool IsDefined) : Address(Address), IsDefined(IsDefined), IsAbsolute(false) {} - Addressable(JITTargetAddress Address) + Addressable(orc::ExecutorAddr Address) : Address(Address), IsDefined(false), IsAbsolute(true) { assert(!(IsDefined && IsAbsolute) && "Block cannot be both defined and absolute"); @@ -119,8 +119,8 @@ class Addressable { Addressable(Addressable &&) = delete; Addressable &operator=(Addressable &&) = default; - JITTargetAddress getAddress() const { return Address; } - void setAddress(JITTargetAddress Address) { this->Address = Address; } + orc::ExecutorAddr getAddress() const { return Address; } + void setAddress(orc::ExecutorAddr Address) { this->Address = Address; } /// Returns true if this is a defined addressable, in which case you /// can downcast this to a Block. @@ -133,7 +133,7 @@ class Addressable { this->IsAbsolute = IsAbsolute; } - JITTargetAddress Address = 0; + orc::ExecutorAddr Address; uint64_t IsDefined : 1; uint64_t IsAbsolute : 1; @@ -152,7 +152,7 @@ class Block : public Addressable { private: /// Create a zero-fill defined addressable. - Block(Section &Parent, JITTargetAddress Size, JITTargetAddress Address, + Block(Section &Parent, orc::ExecutorAddrDiff Size, orc::ExecutorAddr Address, uint64_t Alignment, uint64_t AlignmentOffset) : Addressable(Address, true), Parent(&Parent), Size(Size) { assert(isPowerOf2_64(Alignment) && "Alignment must be power of 2"); @@ -168,7 +168,7 @@ class Block : public Addressable { /// Create a defined addressable for the given content. /// The Content is assumed to be non-writable, and will be copied when /// mutations are required. - Block(Section &Parent, ArrayRef Content, JITTargetAddress Address, + Block(Section &Parent, ArrayRef Content, orc::ExecutorAddr Address, uint64_t Alignment, uint64_t AlignmentOffset) : Addressable(Address, true), Parent(&Parent), Data(Content.data()), Size(Content.size()) { @@ -188,7 +188,7 @@ class Block : public Addressable { /// The standard way to achieve this is to allocate it on the Graph's /// allocator. Block(Section &Parent, MutableArrayRef Content, - JITTargetAddress Address, uint64_t Alignment, uint64_t AlignmentOffset) + orc::ExecutorAddr Address, uint64_t Alignment, uint64_t AlignmentOffset) : Addressable(Address, true), Parent(&Parent), Data(Content.data()), Size(Content.size()) { assert(isPowerOf2_64(Alignment) && "Alignment must be power of 2"); @@ -328,7 +328,7 @@ class Block : public Addressable { /// Returns the address of the fixup for the given edge, which is equal to /// this block's address plus the edge's offset. - JITTargetAddress getFixupAddress(const Edge &E) const { + orc::ExecutorAddr getFixupAddress(const Edge &E) const { return getAddress() + E.getOffset(); } @@ -343,12 +343,17 @@ class Block : public Addressable { std::vector Edges; }; -// Align a JITTargetAddress to conform with block alignment requirements. -inline JITTargetAddress alignToBlock(JITTargetAddress Addr, Block &B) { +// Align an address to conform with block alignment requirements. +inline uint64_t alignToBlock(uint64_t Addr, Block &B) { uint64_t Delta = (B.getAlignmentOffset() - Addr) % B.getAlignment(); return Addr + Delta; } +// Align a orc::ExecutorAddr to conform with block alignment requirements. +inline orc::ExecutorAddr alignToBlock(orc::ExecutorAddr Addr, Block &B) { + return orc::ExecutorAddr(alignToBlock(Addr.getValue(), B)); +} + /// Describes symbol linkage. This can be used to make resolve definition /// clashes. enum class Linkage : uint8_t { @@ -391,8 +396,8 @@ class Symbol { friend class LinkGraph; private: - Symbol(Addressable &Base, JITTargetAddress Offset, StringRef Name, - JITTargetAddress Size, Linkage L, Scope S, bool IsLive, + Symbol(Addressable &Base, orc::ExecutorAddrDiff Offset, StringRef Name, + orc::ExecutorAddrDiff Size, Linkage L, Scope S, bool IsLive, bool IsCallable) : Name(Name), Base(&Base), Offset(Offset), Size(Size) { assert(Offset <= MaxOffset && "Offset out of range"); @@ -403,7 +408,8 @@ class Symbol { } static Symbol &constructCommon(void *SymStorage, Block &Base, StringRef Name, - JITTargetAddress Size, Scope S, bool IsLive) { + orc::ExecutorAddrDiff Size, Scope S, + bool IsLive) { assert(SymStorage && "Storage cannot be null"); assert(!Name.empty() && "Common symbol name cannot be empty"); assert(Base.isDefined() && @@ -416,7 +422,7 @@ class Symbol { } static Symbol &constructExternal(void *SymStorage, Addressable &Base, - StringRef Name, JITTargetAddress Size, + StringRef Name, orc::ExecutorAddrDiff Size, Linkage L) { assert(SymStorage && "Storage cannot be null"); assert(!Base.isDefined() && @@ -428,7 +434,7 @@ class Symbol { } static Symbol &constructAbsolute(void *SymStorage, Addressable &Base, - StringRef Name, JITTargetAddress Size, + StringRef Name, orc::ExecutorAddrDiff Size, Linkage L, Scope S, bool IsLive) { assert(SymStorage && "Storage cannot be null"); assert(!Base.isDefined() && @@ -439,8 +445,8 @@ class Symbol { } static Symbol &constructAnonDef(void *SymStorage, Block &Base, - JITTargetAddress Offset, - JITTargetAddress Size, bool IsCallable, + orc::ExecutorAddrDiff Offset, + orc::ExecutorAddrDiff Size, bool IsCallable, bool IsLive) { assert(SymStorage && "Storage cannot be null"); assert((Offset + Size) <= Base.getSize() && @@ -452,9 +458,9 @@ class Symbol { } static Symbol &constructNamedDef(void *SymStorage, Block &Base, - JITTargetAddress Offset, StringRef Name, - JITTargetAddress Size, Linkage L, Scope S, - bool IsLive, bool IsCallable) { + orc::ExecutorAddrDiff Offset, StringRef Name, + orc::ExecutorAddrDiff Size, Linkage L, + Scope S, bool IsLive, bool IsCallable) { assert(SymStorage && "Storage cannot be null"); assert((Offset + Size) <= Base.getSize() && "Symbol extends past end of block"); @@ -552,16 +558,16 @@ class Symbol { } /// Returns the offset for this symbol within the underlying addressable. - JITTargetAddress getOffset() const { return Offset; } + orc::ExecutorAddrDiff getOffset() const { return Offset; } /// Returns the address of this symbol. - JITTargetAddress getAddress() const { return Base->getAddress() + Offset; } + orc::ExecutorAddr getAddress() const { return Base->getAddress() + Offset; } /// Returns the size of this symbol. - JITTargetAddress getSize() const { return Size; } + orc::ExecutorAddrDiff getSize() const { return Size; } /// Set the size of this symbol. - void setSize(JITTargetAddress Size) { + void setSize(orc::ExecutorAddrDiff Size) { assert(Base && "Cannot set size for null Symbol"); assert((Size == 0 || Base->isDefined()) && "Non-zero size can only be set for defined symbols"); @@ -622,7 +628,7 @@ class Symbol { void setBlock(Block &B) { Base = &B; } - void setOffset(uint64_t NewOffset) { + void setOffset(orc::ExecutorAddrDiff NewOffset) { assert(NewOffset <= MaxOffset && "Offset out of range"); Offset = NewOffset; } @@ -637,7 +643,7 @@ class Symbol { uint64_t S : 2; uint64_t IsLive : 1; uint64_t IsCallable : 1; - JITTargetAddress Size = 0; + orc::ExecutorAddrDiff Size = 0; }; raw_ostream &operator<<(raw_ostream &OS, const Symbol &A); @@ -783,13 +789,13 @@ class SectionRange { assert((First || !Last) && "Last can not be null if start is non-null"); return !First; } - JITTargetAddress getStart() const { - return First ? First->getAddress() : 0; + orc::ExecutorAddr getStart() const { + return First ? First->getAddress() : orc::ExecutorAddr(); } - JITTargetAddress getEnd() const { - return Last ? Last->getAddress() + Last->getSize() : 0; + orc::ExecutorAddr getEnd() const { + return Last ? Last->getAddress() + Last->getSize() : orc::ExecutorAddr(); } - uint64_t getSize() const { return getEnd() - getStart(); } + orc::ExecutorAddrDiff getSize() const { return getEnd() - getStart(); } private: Block *First = nullptr; @@ -995,7 +1001,7 @@ class LinkGraph { /// Create a content block. Block &createContentBlock(Section &Parent, ArrayRef Content, - uint64_t Address, uint64_t Alignment, + orc::ExecutorAddr Address, uint64_t Alignment, uint64_t AlignmentOffset) { return createBlock(Parent, Content, Address, Alignment, AlignmentOffset); } @@ -1003,15 +1009,17 @@ class LinkGraph { /// Create a content block with initially mutable data. Block &createMutableContentBlock(Section &Parent, MutableArrayRef MutableContent, - uint64_t Address, uint64_t Alignment, + orc::ExecutorAddr Address, + uint64_t Alignment, uint64_t AlignmentOffset) { return createBlock(Parent, MutableContent, Address, Alignment, AlignmentOffset); } /// Create a zero-fill block. - Block &createZeroFillBlock(Section &Parent, uint64_t Size, uint64_t Address, - uint64_t Alignment, uint64_t AlignmentOffset) { + Block &createZeroFillBlock(Section &Parent, orc::ExecutorAddrDiff Size, + orc::ExecutorAddr Address, uint64_t Alignment, + uint64_t AlignmentOffset) { return createBlock(Parent, Size, Address, Alignment, AlignmentOffset); } @@ -1061,22 +1069,24 @@ class LinkGraph { /// present during lookup: Externals with strong linkage must be found or /// an error will be emitted. Externals with weak linkage are permitted to /// be undefined, in which case they are assigned a value of 0. - Symbol &addExternalSymbol(StringRef Name, uint64_t Size, Linkage L) { + Symbol &addExternalSymbol(StringRef Name, orc::ExecutorAddrDiff Size, + Linkage L) { assert(llvm::count_if(ExternalSymbols, [&](const Symbol *Sym) { return Sym->getName() == Name; }) == 0 && "Duplicate external symbol"); - auto &Sym = - Symbol::constructExternal(Allocator.Allocate(), - createAddressable(0, false), Name, Size, L); + auto &Sym = Symbol::constructExternal( + Allocator.Allocate(), + createAddressable(orc::ExecutorAddr(), false), Name, Size, L); ExternalSymbols.insert(&Sym); return Sym; } /// Add an absolute symbol. - Symbol &addAbsoluteSymbol(StringRef Name, JITTargetAddress Address, - uint64_t Size, Linkage L, Scope S, bool IsLive) { + Symbol &addAbsoluteSymbol(StringRef Name, orc::ExecutorAddr Address, + orc::ExecutorAddrDiff Size, Linkage L, Scope S, + bool IsLive) { assert(llvm::count_if(AbsoluteSymbols, [&](const Symbol *Sym) { return Sym->getName() == Name; @@ -1091,7 +1101,7 @@ class LinkGraph { /// Convenience method for adding a weak zero-fill symbol. Symbol &addCommonSymbol(StringRef Name, Scope S, Section &Section, - JITTargetAddress Address, uint64_t Size, + orc::ExecutorAddr Address, orc::ExecutorAddrDiff Size, uint64_t Alignment, bool IsLive) { assert(llvm::count_if(defined_symbols(), [&](const Symbol *Sym) { @@ -1107,8 +1117,8 @@ class LinkGraph { } /// Add an anonymous symbol. - Symbol &addAnonymousSymbol(Block &Content, JITTargetAddress Offset, - JITTargetAddress Size, bool IsCallable, + Symbol &addAnonymousSymbol(Block &Content, orc::ExecutorAddrDiff Offset, + orc::ExecutorAddrDiff Size, bool IsCallable, bool IsLive) { auto &Sym = Symbol::constructAnonDef(Allocator.Allocate(), Content, Offset, Size, IsCallable, IsLive); @@ -1117,9 +1127,9 @@ class LinkGraph { } /// Add a named symbol. - Symbol &addDefinedSymbol(Block &Content, JITTargetAddress Offset, - StringRef Name, JITTargetAddress Size, Linkage L, - Scope S, bool IsCallable, bool IsLive) { + Symbol &addDefinedSymbol(Block &Content, orc::ExecutorAddrDiff Offset, + StringRef Name, orc::ExecutorAddrDiff Size, + Linkage L, Scope S, bool IsCallable, bool IsLive) { assert((S == Scope::Local || llvm::count_if(defined_symbols(), [&](const Symbol *Sym) { return Sym->getName() == Name; @@ -1193,7 +1203,7 @@ class LinkGraph { assert(Sym.isDefined() && "Sym is not a defined symbol"); Section &Sec = Sym.getBlock().getSection(); Sec.removeSymbol(Sym); - Sym.makeExternal(createAddressable(0, false)); + Sym.makeExternal(createAddressable(orc::ExecutorAddr(), false)); } ExternalSymbols.insert(&Sym); } @@ -1203,7 +1213,7 @@ class LinkGraph { /// /// Symbol size, linkage, scope, and callability, and liveness will be left /// unchanged. Symbol offset will be reset to 0. - void makeAbsolute(Symbol &Sym, JITTargetAddress Address) { + void makeAbsolute(Symbol &Sym, orc::ExecutorAddr Address) { assert(!Sym.isAbsolute() && "Symbol is already absolute"); if (Sym.isExternal()) { assert(ExternalSymbols.count(&Sym) && @@ -1222,8 +1232,9 @@ class LinkGraph { /// Turn an absolute or external symbol into a defined one by attaching it to /// a block. Symbol must not already be defined. - void makeDefined(Symbol &Sym, Block &Content, JITTargetAddress Offset, - JITTargetAddress Size, Linkage L, Scope S, bool IsLive) { + void makeDefined(Symbol &Sym, Block &Content, orc::ExecutorAddrDiff Offset, + orc::ExecutorAddrDiff Size, Linkage L, Scope S, + bool IsLive) { assert(!Sym.isDefined() && "Sym is already a defined symbol"); if (Sym.isAbsolute()) { assert(AbsoluteSymbols.count(&Sym) && @@ -1255,15 +1266,15 @@ class LinkGraph { /// /// All other symbol attributes are unchanged. void transferDefinedSymbol(Symbol &Sym, Block &DestBlock, - JITTargetAddress NewOffset, - Optional ExplicitNewSize) { + orc::ExecutorAddrDiff NewOffset, + Optional ExplicitNewSize) { auto &OldSection = Sym.getBlock().getSection(); Sym.setBlock(DestBlock); Sym.setOffset(NewOffset); if (ExplicitNewSize) Sym.setSize(*ExplicitNewSize); else { - JITTargetAddress RemainingBlockSize = DestBlock.getSize() - NewOffset; + auto RemainingBlockSize = DestBlock.getSize() - NewOffset; if (Sym.getSize() > RemainingBlockSize) Sym.setSize(RemainingBlockSize); } @@ -1407,14 +1418,14 @@ inline MutableArrayRef Block::getMutableContent(LinkGraph &G) { /// Enables easy lookup of blocks by addresses. class BlockAddressMap { public: - using AddrToBlockMap = std::map; + using AddrToBlockMap = std::map; using const_iterator = AddrToBlockMap::const_iterator; /// A block predicate that always adds all blocks. static bool includeAllBlocks(const Block &B) { return true; } /// A block predicate that always includes blocks with non-null addresses. - static bool includeNonNull(const Block &B) { return B.getAddress(); } + static bool includeNonNull(const Block &B) { return !!B.getAddress(); } BlockAddressMap() = default; @@ -1478,7 +1489,7 @@ class BlockAddressMap { /// Returns the block starting at the given address, or nullptr if no such /// block exists. - Block *getBlockAt(JITTargetAddress Addr) const { + Block *getBlockAt(orc::ExecutorAddr Addr) const { auto I = AddrToBlock.find(Addr); if (I == AddrToBlock.end()) return nullptr; @@ -1487,7 +1498,7 @@ class BlockAddressMap { /// Returns the block covering the given address, or nullptr if no such block /// exists. - Block *getBlockCovering(JITTargetAddress Addr) const { + Block *getBlockCovering(orc::ExecutorAddr Addr) const { auto I = AddrToBlock.upper_bound(Addr); if (I == AddrToBlock.begin()) return nullptr; @@ -1504,10 +1515,11 @@ class BlockAddressMap { ExistingBlock.getAddress() + ExistingBlock.getSize(); return make_error( "Block at " + - formatv("{0:x16} -- {1:x16}", NewBlock.getAddress(), NewBlockEnd) + + formatv("{0:x16} -- {1:x16}", NewBlock.getAddress().getValue(), + NewBlockEnd.getValue()) + " overlaps " + - formatv("{0:x16} -- {1:x16}", ExistingBlock.getAddress(), - ExistingBlockEnd)); + formatv("{0:x16} -- {1:x16}", ExistingBlock.getAddress().getValue(), + ExistingBlockEnd.getValue())); } AddrToBlockMap AddrToBlock; @@ -1532,7 +1544,7 @@ class SymbolAddressMap { /// Returns the list of symbols that start at the given address, or nullptr if /// no such symbols exist. - const SymbolVector *getSymbolsAt(JITTargetAddress Addr) const { + const SymbolVector *getSymbolsAt(orc::ExecutorAddr Addr) const { auto I = AddrToSymbols.find(Addr); if (I == AddrToSymbols.end()) return nullptr; @@ -1540,7 +1552,7 @@ class SymbolAddressMap { } private: - std::map AddrToSymbols; + std::map AddrToSymbols; }; /// A function for mutating LinkGraphs. diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h index 7dd382facde8..179a8b946cf3 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h @@ -13,9 +13,10 @@ #ifndef LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H #define LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h" #include "llvm/ExecutionEngine/JITLink/MemoryFlags.h" -#include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" #include "llvm/Support/MSVCErrorWorkarounds.h" @@ -49,9 +50,9 @@ class Section; /// executor-side implementation code is responsible for freeing the error /// string). struct AllocActionCall { - JITTargetAddress FnAddr = 0; - JITTargetAddress CtxAddr = 0; - JITTargetAddress CtxSize = 0; + orc::ExecutorAddr FnAddr; + orc::ExecutorAddr CtxAddr; + orc::ExecutorAddrDiff CtxSize; }; /// A pair of AllocActionCalls, one to be run at finalization time, one to be @@ -93,47 +94,48 @@ class JITLinkMemoryManager { class FinalizedAlloc { friend class JITLinkMemoryManager; - public: - static constexpr JITTargetAddress InvalidAddr = ~JITTargetAddress(0); + static constexpr auto InvalidAddr = ~uint64_t(0); + public: FinalizedAlloc() = default; - explicit FinalizedAlloc(JITTargetAddress A) : A(A) { - assert(A != 0 && "Explicitly creating an invalid allocation?"); + explicit FinalizedAlloc(orc::ExecutorAddr A) : A(A) { + assert(A && "Explicitly creating an invalid allocation?"); } FinalizedAlloc(const FinalizedAlloc &) = delete; FinalizedAlloc(FinalizedAlloc &&Other) : A(Other.A) { - Other.A = InvalidAddr; + Other.A.setValue(InvalidAddr); } FinalizedAlloc &operator=(const FinalizedAlloc &) = delete; FinalizedAlloc &operator=(FinalizedAlloc &&Other) { - assert(A == InvalidAddr && + assert(A.getValue() == InvalidAddr && "Cannot overwrite active finalized allocation"); std::swap(A, Other.A); return *this; } ~FinalizedAlloc() { - assert(A == InvalidAddr && "Finalized allocation was not deallocated"); + assert(A.getValue() == InvalidAddr && + "Finalized allocation was not deallocated"); } /// FinalizedAllocs convert to false for default-constructed, and /// true otherwise. Default-constructed allocs need not be deallocated. - explicit operator bool() const { return A != InvalidAddr; } + explicit operator bool() const { return A.getValue() != InvalidAddr; } /// Returns the address associated with this finalized allocation. /// The allocation is unmodified. - JITTargetAddress getAddress() const { return A; } + orc::ExecutorAddr getAddress() const { return A; } /// Returns the address associated with this finalized allocation and /// resets this object to the default state. /// This should only be used by allocators when deallocating memory. - JITTargetAddress release() { - JITTargetAddress Tmp = A; - A = InvalidAddr; + orc::ExecutorAddr release() { + orc::ExecutorAddr Tmp = A; + A.setValue(InvalidAddr); return Tmp; } private: - JITTargetAddress A = InvalidAddr; + orc::ExecutorAddr A{InvalidAddr}; }; /// Represents an allocation which has not been finalized yet. @@ -263,7 +265,7 @@ class BasicLayout { Align Alignment; size_t ContentSize; uint64_t ZeroFillSize; - JITTargetAddress Addr; + orc::ExecutorAddr Addr; char *WorkingMem = nullptr; private: @@ -341,7 +343,7 @@ class SimpleSegmentAlloc { /// Describes the segment working memory and executor address. struct SegmentInfo { - JITTargetAddress Addr = 0; + orc::ExecutorAddr Addr; MutableArrayRef WorkingMem; }; diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h index 3130ea381534..4a4e8d15be66 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h @@ -368,18 +368,18 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E, char *BlockWorkingMem = B.getAlreadyMutableContent().data(); char *FixupPtr = BlockWorkingMem + E.getOffset(); - JITTargetAddress FixupAddress = B.getAddress() + E.getOffset(); + auto FixupAddress = B.getAddress() + E.getOffset(); switch (E.getKind()) { case Pointer64: { - uint64_t Value = E.getTarget().getAddress() + E.getAddend(); + uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); *(ulittle64_t *)FixupPtr = Value; break; } case Pointer32: { - uint64_t Value = E.getTarget().getAddress() + E.getAddend(); + uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); if (LLVM_LIKELY(isInRangeForImmU32(Value))) *(ulittle32_t *)FixupPtr = Value; else @@ -387,7 +387,7 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E, break; } case Pointer32Signed: { - int64_t Value = E.getTarget().getAddress() + E.getAddend(); + int64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); if (LLVM_LIKELY(isInRangeForImmS32(Value))) *(little32_t *)FixupPtr = Value; else @@ -483,8 +483,8 @@ extern const char PointerJumpStubContent[6]; inline Symbol &createAnonymousPointer(LinkGraph &G, Section &PointerSection, Symbol *InitialTarget = nullptr, uint64_t InitialAddend = 0) { - auto &B = - G.createContentBlock(PointerSection, NullPointerContent, ~7ULL, 8, 0); + auto &B = G.createContentBlock(PointerSection, NullPointerContent, + orc::ExecutorAddr(~uint64_t(7)), 8, 0); if (InitialTarget) B.addEdge(Pointer64, 0, *InitialTarget, InitialAddend); return G.addAnonymousSymbol(B, 0, 8, false, false); @@ -498,8 +498,8 @@ inline Symbol &createAnonymousPointer(LinkGraph &G, Section &PointerSection, /// address: highest allowable: (~5U) inline Block &createPointerJumpStubBlock(LinkGraph &G, Section &StubSection, Symbol &PointerSymbol) { - auto &B = - G.createContentBlock(StubSection, PointerJumpStubContent, ~5ULL, 1, 0); + auto &B = G.createContentBlock(StubSection, PointerJumpStubContent, + orc::ExecutorAddr(~uint64_t(5)), 1, 0); B.addEdge(Delta32, 2, PointerSymbol, -4); return B; } @@ -552,8 +552,7 @@ class GOTTableManager : public TableManager { "Fell through switch, but no new kind to set"); DEBUG_WITH_TYPE("jitlink", { dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " - << formatv("{0:x}", B->getFixupAddress(E)) << " (" - << formatv("{0:x}", B->getAddress()) << " + " + << B->getFixupAddress(E) << " (" << B->getAddress() << " + " << formatv("{0:x}", E.getOffset()) << ")\n"; }); E.setKind(KindToSet); @@ -586,8 +585,7 @@ class PLTTableManager : public TableManager { if (E.getKind() == x86_64::BranchPCRel32 && !E.getTarget().isDefined()) { DEBUG_WITH_TYPE("jitlink", { dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " - << formatv("{0:x}", B->getFixupAddress(E)) << " (" - << formatv("{0:x}", B->getAddress()) << " + " + << B->getFixupAddress(E) << " (" << B->getAddress() << " + " << formatv("{0:x}", E.getOffset()) << ")\n"; }); // Set the edge kind to Branch32ToPtrJumpStubBypassable to enable it to diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h index 20da3e3b89eb..8f1bf854843f 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h @@ -236,7 +236,7 @@ class ELFNixPlatform : public Platform { DenseMap InitSeqs; std::vector BootstrapPOSRs; - DenseMap HandleAddrToJITDylib; + DenseMap HandleAddrToJITDylib; DenseMap JITDylibToPThreadKey; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h index 6d113a7bdf1a..3b34400894df 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h @@ -39,9 +39,9 @@ class EPCEHFrameRegistrar : public jitlink::EHFrameRegistrar { : ES(ES), RegisterEHFrameWrapperFnAddr(RegisterEHFrameWrapperFnAddr), DeregisterEHFrameWrapperFnAddr(DeregisterEHFRameWrapperFnAddr) {} - Error registerEHFrames(JITTargetAddress EHFrameSectionAddr, + Error registerEHFrames(ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) override; - Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr, + Error deregisterEHFrames(ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) override; private: diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h index b9825f17ec17..18656d03e441 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h @@ -85,7 +85,7 @@ class SPSSerializationTraits::deserialize(IB, A)) return false; - FA = jitlink::JITLinkMemoryManager::FinalizedAlloc(A.getValue()); + FA = jitlink::JITLinkMemoryManager::FinalizedAlloc(A); return true; } }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h index d7b5e2eda6ee..9482f20ecec6 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h @@ -239,7 +239,7 @@ class MachOPlatform : public Platform { std::mutex PlatformMutex; DenseMap InitSeqs; - DenseMap HeaderAddrToJITDylib; + DenseMap HeaderAddrToJITDylib; DenseMap JITDylibToPThreadKey; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index 109922a46e26..e22d7f7de814 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -222,7 +222,7 @@ class EHFrameRegistrationPlugin : public ObjectLinkingLayer::Plugin { private: struct EHFrameRange { - JITTargetAddress Addr = 0; + orc::ExecutorAddr Addr; size_t Size; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h index 2d316b9de007..1abc9508d93a 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h @@ -13,7 +13,10 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_EXECUTORADDRESS_H #define LLVM_EXECUTIONENGINE_ORC_SHARED_EXECUTORADDRESS_H +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/raw_ostream.h" #include #include @@ -29,7 +32,7 @@ class ExecutorAddr { ExecutorAddr() = default; /// Create an ExecutorAddr from the given value. - explicit ExecutorAddr(uint64_t Addr) : Addr(Addr) {} + explicit constexpr ExecutorAddr(uint64_t Addr) : Addr(Addr) {} /// Create an ExecutorAddr from the given pointer. /// Warning: This should only be used when JITing in-process. @@ -88,12 +91,12 @@ class ExecutorAddr { ExecutorAddr operator++(int) { return ExecutorAddr(Addr++); } ExecutorAddr operator--(int) { return ExecutorAddr(Addr--); } - ExecutorAddr &operator+=(const ExecutorAddrDiff Delta) { + ExecutorAddr &operator+=(const ExecutorAddrDiff &Delta) { Addr += Delta; return *this; } - ExecutorAddr &operator-=(const ExecutorAddrDiff Delta) { + ExecutorAddr &operator-=(const ExecutorAddrDiff &Delta) { Addr -= Delta; return *this; } @@ -120,6 +123,18 @@ inline ExecutorAddr operator+(const ExecutorAddrDiff &LHS, return ExecutorAddr(LHS + RHS.getValue()); } +/// Subtracting an offset from an address yields an address. +inline ExecutorAddr operator-(const ExecutorAddr &LHS, + const ExecutorAddrDiff &RHS) { + return ExecutorAddr(LHS.getValue() - RHS); +} + +/// Taking the modulus of an address and a diff yields a diff. +inline ExecutorAddrDiff operator%(const ExecutorAddr &LHS, + const ExecutorAddrDiff &RHS) { + return ExecutorAddrDiff(LHS.getValue() % RHS); +} + /// Represents an address range in the exceutor process. struct ExecutorAddrRange { ExecutorAddrRange() = default; @@ -148,6 +163,10 @@ struct ExecutorAddrRange { ExecutorAddr End; }; +inline raw_ostream &operator<<(raw_ostream &OS, const ExecutorAddr &A) { + return OS << formatv("{0:x}", A.getValue()); +} + namespace shared { class SPSExecutorAddr {}; @@ -198,6 +217,26 @@ using SPSExecutorAddrRangeSequence = SPSSequence; } // End namespace shared. } // End namespace orc. + +// Provide DenseMapInfo for ExecutorAddrs. +template <> struct DenseMapInfo { + static inline orc::ExecutorAddr getEmptyKey() { + return orc::ExecutorAddr(DenseMapInfo::getEmptyKey()); + } + static inline orc::ExecutorAddr getTombstoneKey() { + return orc::ExecutorAddr(DenseMapInfo::getTombstoneKey()); + } + + static unsigned getHashValue(const orc::ExecutorAddr &Addr) { + return DenseMapInfo::getHashValue(Addr.getValue()); + } + + static bool isEqual(const orc::ExecutorAddr &LHS, + const orc::ExecutorAddr &RHS) { + return DenseMapInfo::isEqual(LHS.getValue(), RHS.getValue()); + } +}; + } // End namespace llvm. #endif // LLVM_EXECUTIONENGINE_ORC_SHARED_EXECUTORADDRESS_H diff --git a/llvm/lib/ExecutionEngine/JITLink/DefineExternalSectionStartAndEndSymbols.h b/llvm/lib/ExecutionEngine/JITLink/DefineExternalSectionStartAndEndSymbols.h index 8ae3bc2bf61d..159880e4b152 100644 --- a/llvm/lib/ExecutionEngine/JITLink/DefineExternalSectionStartAndEndSymbols.h +++ b/llvm/lib/ExecutionEngine/JITLink/DefineExternalSectionStartAndEndSymbols.h @@ -52,13 +52,13 @@ class DefineExternalSectionStartAndEndSymbols { auto &SR = getSectionRange(*D.Sec); if (D.IsStart) { if (SR.empty()) - G.makeAbsolute(*Sym, 0); + G.makeAbsolute(*Sym, orc::ExecutorAddr()); else G.makeDefined(*Sym, *SR.getFirstBlock(), 0, 0, Linkage::Strong, Scope::Local, false); } else { if (SR.empty()) - G.makeAbsolute(*Sym, 0); + G.makeAbsolute(*Sym, orc::ExecutorAddr()); else G.makeDefined(*Sym, *SR.getLastBlock(), SR.getLastBlock()->getSize(), 0, Linkage::Strong, diff --git a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp index 4d7d5ce26668..4d421b3e1ed3 100644 --- a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp @@ -65,10 +65,7 @@ Error EHFrameSplitter::operator()(LinkGraph &G) { Error EHFrameSplitter::processBlock(LinkGraph &G, Block &B, LinkGraph::SplitBlockCache &Cache) { - LLVM_DEBUG({ - dbgs() << " Processing block at " << formatv("{0:x16}", B.getAddress()) - << "\n"; - }); + LLVM_DEBUG(dbgs() << " Processing block at " << B.getAddress() << "\n"); // eh-frame should not contain zero-fill blocks. if (B.isZeroFill()) @@ -400,7 +397,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, BlockEdgeMap &BlockEdges) { LLVM_DEBUG(dbgs() << " Record is FDE\n"); - JITTargetAddress RecordAddress = B.getAddress() + RecordOffset; + orc::ExecutorAddr RecordAddress = B.getAddress() + RecordOffset; auto RecordContent = B.getContent().slice(RecordOffset, RecordLength); BinaryStreamReader RecordReader( @@ -418,8 +415,8 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, { // Process the CIE pointer field. auto CIEEdgeItr = BlockEdges.find(RecordOffset + CIEDeltaFieldOffset); - JITTargetAddress CIEAddress = - RecordAddress + CIEDeltaFieldOffset - CIEDelta; + orc::ExecutorAddr CIEAddress = + RecordAddress + orc::ExecutorAddrDiff(CIEDeltaFieldOffset - CIEDelta); if (CIEEdgeItr == BlockEdges.end()) { LLVM_DEBUG({ @@ -456,7 +453,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, { // Process the PC-Begin field. Block *PCBeginBlock = nullptr; - JITTargetAddress PCBeginFieldOffset = RecordReader.getOffset(); + orc::ExecutorAddrDiff PCBeginFieldOffset = RecordReader.getOffset(); auto PCEdgeItr = BlockEdges.find(RecordOffset + PCBeginFieldOffset); if (PCEdgeItr == BlockEdges.end()) { auto PCBeginPtrInfo = @@ -464,12 +461,12 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, RecordAddress + PCBeginFieldOffset, RecordReader); if (!PCBeginPtrInfo) return PCBeginPtrInfo.takeError(); - JITTargetAddress PCBegin = PCBeginPtrInfo->first; + orc::ExecutorAddr PCBegin = PCBeginPtrInfo->first; Edge::Kind PCBeginEdgeKind = PCBeginPtrInfo->second; LLVM_DEBUG({ dbgs() << " Adding edge at " - << formatv("{0:x16}", RecordAddress + PCBeginFieldOffset) - << " to PC at " << formatv("{0:x16}", PCBegin) << "\n"; + << (RecordAddress + PCBeginFieldOffset) << " to PC at " + << formatv("{0:x16}", PCBegin) << "\n"; }); auto PCBeginSym = getOrCreateSymbol(PC, PCBegin); if (!PCBeginSym) @@ -522,7 +519,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, if (auto Err = RecordReader.readULEB128(AugmentationDataSize)) return Err; - JITTargetAddress LSDAFieldOffset = RecordReader.getOffset(); + orc::ExecutorAddrDiff LSDAFieldOffset = RecordReader.getOffset(); auto LSDAEdgeItr = BlockEdges.find(RecordOffset + LSDAFieldOffset); if (LSDAEdgeItr == BlockEdges.end()) { auto LSDAPointerInfo = @@ -530,7 +527,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, RecordAddress + LSDAFieldOffset, RecordReader); if (!LSDAPointerInfo) return LSDAPointerInfo.takeError(); - JITTargetAddress LSDA = LSDAPointerInfo->first; + orc::ExecutorAddr LSDA = LSDAPointerInfo->first; Edge::Kind LSDAEdgeKind = LSDAPointerInfo->second; auto LSDASym = getOrCreateSymbol(PC, LSDA); if (!LSDASym) @@ -645,12 +642,10 @@ unsigned EHFrameEdgeFixer::getPointerEncodingDataSize(uint8_t PointerEncoding) { } } -Expected> +Expected> EHFrameEdgeFixer::readEncodedPointer(uint8_t PointerEncoding, - JITTargetAddress PointerFieldAddress, + orc::ExecutorAddr PointerFieldAddress, BinaryStreamReader &RecordReader) { - static_assert(sizeof(JITTargetAddress) == sizeof(uint64_t), - "Result must be able to hold a uint64_t"); assert(isSupportedPointerEncoding(PointerEncoding) && "Unsupported pointer encoding"); @@ -663,7 +658,7 @@ EHFrameEdgeFixer::readEncodedPointer(uint8_t PointerEncoding, if (EffectiveType == DW_EH_PE_absptr) EffectiveType = (PointerSize == 8) ? DW_EH_PE_udata8 : DW_EH_PE_udata4; - JITTargetAddress Addr; + orc::ExecutorAddr Addr; Edge::Kind PointerEdgeKind = Edge::Invalid; switch (EffectiveType) { case DW_EH_PE_udata4: { @@ -709,7 +704,7 @@ EHFrameEdgeFixer::readEncodedPointer(uint8_t PointerEncoding, } Expected EHFrameEdgeFixer::getOrCreateSymbol(ParseContext &PC, - JITTargetAddress Addr) { + orc::ExecutorAddr Addr) { Symbol *CanonicalSym = nullptr; auto UpdateCanonicalSym = [&](Symbol *Sym) { @@ -753,8 +748,9 @@ Error EHFrameNullTerminator::operator()(LinkGraph &G) { << EHFrameSectionName << "\n"; }); - auto &NullTerminatorBlock = G.createContentBlock( - *EHFrame, NullTerminatorBlockContent, 0xfffffffffffffffc, 1, 0); + auto &NullTerminatorBlock = + G.createContentBlock(*EHFrame, NullTerminatorBlockContent, + orc::ExecutorAddr(~uint64_t(4)), 1, 0); G.addAnonymousSymbol(NullTerminatorBlock, 0, 4, false, true); return Error::success(); } @@ -762,17 +758,15 @@ Error EHFrameNullTerminator::operator()(LinkGraph &G) { EHFrameRegistrar::~EHFrameRegistrar() {} Error InProcessEHFrameRegistrar::registerEHFrames( - JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) { - return orc::registerEHFrameSection( - jitTargetAddressToPointer(EHFrameSectionAddr), - EHFrameSectionSize); + orc::ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) { + return orc::registerEHFrameSection(EHFrameSectionAddr.toPtr(), + EHFrameSectionSize); } Error InProcessEHFrameRegistrar::deregisterEHFrames( - JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) { - return orc::deregisterEHFrameSection( - jitTargetAddressToPointer(EHFrameSectionAddr), - EHFrameSectionSize); + orc::ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) { + return orc::deregisterEHFrameSection(EHFrameSectionAddr.toPtr(), + EHFrameSectionSize); } LinkGraphPassFunction @@ -789,14 +783,14 @@ createEHFrameRecorderPass(const Triple &TT, StoreFrameRange = std::move(StoreRangeAddress)](LinkGraph &G) -> Error { // Search for a non-empty eh-frame and record the address of the first // symbol in it. - JITTargetAddress Addr = 0; + orc::ExecutorAddr Addr; size_t Size = 0; if (auto *S = G.findSectionByName(EHFrameSectionName)) { auto R = SectionRange(*S); Addr = R.getStart(); Size = R.getSize(); } - if (Addr == 0 && Size != 0) + if (!Addr && Size != 0) return make_error( StringRef(EHFrameSectionName) + " section can not have zero address with non-zero size"); diff --git a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h index b4c4b0f7b097..ef4b47b9aa28 100644 --- a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h +++ b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h @@ -71,12 +71,12 @@ class EHFrameEdgeFixer { }; using BlockEdgeMap = DenseMap; - using CIEInfosMap = DenseMap; + using CIEInfosMap = DenseMap; struct ParseContext { ParseContext(LinkGraph &G) : G(G) {} - Expected findCIEInfo(JITTargetAddress Address) { + Expected findCIEInfo(orc::ExecutorAddr Address) { auto I = CIEInfos.find(Address); if (I == CIEInfos.end()) return make_error("No CIE found at address " + @@ -102,12 +102,13 @@ class EHFrameEdgeFixer { static bool isSupportedPointerEncoding(uint8_t PointerEncoding); unsigned getPointerEncodingDataSize(uint8_t PointerEncoding); - Expected> + Expected> readEncodedPointer(uint8_t PointerEncoding, - JITTargetAddress PointerFieldAddress, + orc::ExecutorAddr PointerFieldAddress, BinaryStreamReader &RecordReader); - Expected getOrCreateSymbol(ParseContext &PC, JITTargetAddress Addr); + Expected getOrCreateSymbol(ParseContext &PC, + orc::ExecutorAddr Addr); StringRef EHFrameSectionName; unsigned PointerSize; diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h index f9101d71dfa8..23c8b77b913b 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h @@ -322,10 +322,12 @@ template Error ELFLinkGraphBuilder::graphifySections() { if (!Data) return Data.takeError(); - G->createContentBlock(GraphSec, *Data, Sec.sh_addr, Sec.sh_addralign, 0); + G->createContentBlock(GraphSec, *Data, orc::ExecutorAddr(Sec.sh_addr), + Sec.sh_addralign, 0); } else - G->createZeroFillBlock(GraphSec, Sec.sh_size, Sec.sh_addr, - Sec.sh_addralign, 0); + G->createZeroFillBlock(GraphSec, Sec.sh_size, + orc::ExecutorAddr(Sec.sh_addr), Sec.sh_addralign, + 0); setGraphSection(SecIndex, GraphSec); } @@ -393,9 +395,9 @@ template Error ELFLinkGraphBuilder::graphifySymbols() { // Handle common symbols specially. if (Sym.isCommon()) { - Symbol &GSym = - G->addCommonSymbol(*Name, Scope::Default, getCommonSection(), 0, - Sym.st_size, Sym.getValue(), false); + Symbol &GSym = G->addCommonSymbol(*Name, Scope::Default, + getCommonSection(), orc::ExecutorAddr(), + Sym.st_size, Sym.getValue(), false); setGraphSymbol(SymIndex, GSym); continue; } diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp index dc183dfddfae..35b70d533907 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp @@ -41,10 +41,11 @@ class ELFJITLinker_aarch64 : public JITLinker { char *BlockWorkingMem = B.getAlreadyMutableContent().data(); char *FixupPtr = BlockWorkingMem + E.getOffset(); - JITTargetAddress FixupAddress = B.getAddress() + E.getOffset(); + auto FixupAddress = B.getAddress() + E.getOffset(); switch (E.getKind()) { case aarch64::R_AARCH64_CALL26: { - assert((FixupAddress & 0x3) == 0 && "Call-inst is not 32-bit aligned"); + assert((FixupAddress.getValue() & 0x3) == 0 && + "Call-inst is not 32-bit aligned"); int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend(); if (static_cast(Value) & 0x3) @@ -124,7 +125,8 @@ class ELFLinkGraphBuilder_aarch64 : public ELFLinkGraphBuilder { int64_t Addend = Rel.r_addend; Block *BlockToFix = *(GraphSection.blocks().begin()); - JITTargetAddress FixupAddress = FixupSect.sh_addr + Rel.r_offset; + orc::ExecutorAddr FixupAddress = + orc::ExecutorAddr(FixupSect.sh_addr) + Rel.r_offset; Edge::OffsetT Offset = FixupAddress - BlockToFix->getAddress(); Edge GE(*Kind, Offset, *GraphSymbol, Addend); LLVM_DEBUG({ diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp index 26ec79ea50cf..a4d1cc8c6195 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp @@ -44,15 +44,16 @@ class PerGraphGOTAndPLTStubsBuilder_ELF_riscv bool isGOTEdgeToFix(Edge &E) const { return E.getKind() == R_RISCV_GOT_HI20; } Symbol &createGOTEntry(Symbol &Target) { - Block &GOTBlock = G.createContentBlock( - getGOTSection(), getGOTEntryBlockContent(), 0, G.getPointerSize(), 0); + Block &GOTBlock = + G.createContentBlock(getGOTSection(), getGOTEntryBlockContent(), + orc::ExecutorAddr(), G.getPointerSize(), 0); GOTBlock.addEdge(isRV64() ? R_RISCV_64 : R_RISCV_32, 0, Target, 0); return G.addAnonymousSymbol(GOTBlock, 0, G.getPointerSize(), false, false); } Symbol &createPLTStub(Symbol &Target) { - Block &StubContentBlock = - G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 4, 0); + Block &StubContentBlock = G.createContentBlock( + getStubsSection(), getStubBlockContent(), orc::ExecutorAddr(), 4, 0); auto &GOTEntrySymbol = getGOTEntry(Target); StubContentBlock.addEdge(R_RISCV_CALL, 0, GOTEntrySymbol, 0); return G.addAnonymousSymbol(StubContentBlock, 0, StubEntrySize, true, @@ -134,13 +135,13 @@ static Expected getRISCVPCRelHi20(const Edge &E) { const Symbol &Sym = E.getTarget(); const Block &B = Sym.getBlock(); - JITTargetAddress Offset = Sym.getOffset(); + orc::ExecutorAddrDiff Offset = Sym.getOffset(); struct Comp { - bool operator()(const Edge &Lhs, JITTargetAddress Offset) { + bool operator()(const Edge &Lhs, orc::ExecutorAddrDiff Offset) { return Lhs.getOffset() < Offset; } - bool operator()(JITTargetAddress Offset, const Edge &Rhs) { + bool operator()(orc::ExecutorAddrDiff Offset, const Edge &Rhs) { return Offset < Rhs.getOffset(); } }; @@ -176,27 +177,27 @@ class ELFJITLinker_riscv : public JITLinker { char *BlockWorkingMem = B.getAlreadyMutableContent().data(); char *FixupPtr = BlockWorkingMem + E.getOffset(); - JITTargetAddress FixupAddress = B.getAddress() + E.getOffset(); + orc::ExecutorAddr FixupAddress = B.getAddress() + E.getOffset(); switch (E.getKind()) { case R_RISCV_32: { - int64_t Value = E.getTarget().getAddress() + E.getAddend(); + int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue(); *(little32_t *)FixupPtr = static_cast(Value); break; } case R_RISCV_64: { - int64_t Value = E.getTarget().getAddress() + E.getAddend(); + int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue(); *(little64_t *)FixupPtr = static_cast(Value); break; } case R_RISCV_HI20: { - int64_t Value = E.getTarget().getAddress() + E.getAddend(); + int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue(); int32_t Hi = (Value + 0x800) & 0xFFFFF000; uint32_t RawInstr = *(little32_t *)FixupPtr; *(little32_t *)FixupPtr = (RawInstr & 0xFFF) | static_cast(Hi); break; } case R_RISCV_LO12_I: { - int64_t Value = E.getTarget().getAddress() + E.getAddend(); + int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue(); int32_t Lo = Value & 0xFFF; uint32_t RawInstr = *(little32_t *)FixupPtr; *(little32_t *)FixupPtr = @@ -322,7 +323,7 @@ class ELFLinkGraphBuilder_riscv : public ELFLinkGraphBuilder { int64_t Addend = Rel.r_addend; Block *BlockToFix = *(GraphSection.blocks().begin()); - JITTargetAddress FixupAddress = FixupSect.sh_addr + Rel.r_offset; + auto FixupAddress = orc::ExecutorAddr(FixupSect.sh_addr) + Rel.r_offset; Edge::OffsetT Offset = FixupAddress - BlockToFix->getAddress(); Edge GE(*Kind, Offset, *GraphSymbol, Addend); LLVM_DEBUG({ diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp index 27d8833ae19e..cebe9e9dac78 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp @@ -59,8 +59,8 @@ class TLSInfoTableManager_ELF_x86_64 // the TLS Info entry's key value will be written by the fixTLVSectionByName // pass, so create mutable content. auto &TLSInfoEntry = G.createMutableContentBlock( - getTLSInfoSection(G), G.allocateContent(getTLSInfoEntryContent()), 0, 8, - 0); + getTLSInfoSection(G), G.allocateContent(getTLSInfoEntryContent()), + orc::ExecutorAddr(), 8, 0); TLSInfoEntry.addEdge(x86_64::Pointer64, 8, Target, 0); return G.addAnonymousSymbol(TLSInfoEntry, 0, 16, false, false); } @@ -249,7 +249,7 @@ class ELFLinkGraphBuilder_x86_64 : public ELFLinkGraphBuilder { } Block *BlockToFix = *(GraphSection.blocks().begin()); - JITTargetAddress FixupAddress = FixupSection.sh_addr + Rel.r_offset; + auto FixupAddress = orc::ExecutorAddr(FixupSection.sh_addr) + Rel.r_offset; Edge::OffsetT Offset = FixupAddress - BlockToFix->getAddress(); Edge GE(Kind, Offset, *GraphSymbol, Addend); LLVM_DEBUG({ @@ -322,8 +322,9 @@ class ELFJITLinker_x86_64 : public JITLinker { // If there's no defined symbol then create one. SectionRange SR(*GOTSection); if (SR.empty()) - GOTSymbol = &G.addAbsoluteSymbol(ELFGOTSymbolName, 0, 0, - Linkage::Strong, Scope::Local, true); + GOTSymbol = + &G.addAbsoluteSymbol(ELFGOTSymbolName, orc::ExecutorAddr(), 0, + Linkage::Strong, Scope::Local, true); else GOTSymbol = &G.addDefinedSymbol(*SR.getFirstBlock(), 0, ELFGOTSymbolName, 0, diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp index 51dcc1c35fad..8c920c5fe2dd 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp @@ -90,8 +90,8 @@ const char *getScopeName(Scope S) { } raw_ostream &operator<<(raw_ostream &OS, const Block &B) { - return OS << formatv("{0:x16}", B.getAddress()) << " -- " - << formatv("{0:x8}", B.getAddress() + B.getSize()) << ": " + return OS << B.getAddress() << " -- " << (B.getAddress() + B.getSize()) + << ": " << "size = " << formatv("{0:x8}", B.getSize()) << ", " << (B.isZeroFill() ? "zero-fill" : "content") << ", align = " << B.getAlignment() @@ -100,9 +100,8 @@ raw_ostream &operator<<(raw_ostream &OS, const Block &B) { } raw_ostream &operator<<(raw_ostream &OS, const Symbol &Sym) { - OS << formatv("{0:x16}", Sym.getAddress()) << " (" - << (Sym.isDefined() ? "block" : "addressable") << " + " - << formatv("{0:x8}", Sym.getOffset()) + OS << Sym.getAddress() << " (" << (Sym.isDefined() ? "block" : "addressable") + << " + " << formatv("{0:x8}", Sym.getOffset()) << "): size: " << formatv("{0:x8}", Sym.getSize()) << ", linkage: " << formatv("{0:6}", getLinkageName(Sym.getLinkage())) << ", scope: " << formatv("{0:8}", getScopeName(Sym.getScope())) << ", " @@ -113,9 +112,9 @@ raw_ostream &operator<<(raw_ostream &OS, const Symbol &Sym) { void printEdge(raw_ostream &OS, const Block &B, const Edge &E, StringRef EdgeKindName) { - OS << "edge@" << formatv("{0:x16}", B.getAddress() + E.getOffset()) << ": " - << formatv("{0:x16}", B.getAddress()) << " + " - << formatv("{0:x}", E.getOffset()) << " -- " << EdgeKindName << " -> "; + OS << "edge@" << B.getAddress() + E.getOffset() << ": " << B.getAddress() + << " + " << formatv("{0:x}", E.getOffset()) << " -- " << EdgeKindName + << " -> "; auto &TargetSym = E.getTarget(); if (TargetSym.hasName()) @@ -123,17 +122,16 @@ void printEdge(raw_ostream &OS, const Block &B, const Edge &E, else { auto &TargetBlock = TargetSym.getBlock(); auto &TargetSec = TargetBlock.getSection(); - JITTargetAddress SecAddress = ~JITTargetAddress(0); + orc::ExecutorAddr SecAddress(~uint64_t(0)); for (auto *B : TargetSec.blocks()) if (B->getAddress() < SecAddress) SecAddress = B->getAddress(); - JITTargetAddress SecDelta = TargetSym.getAddress() - SecAddress; - OS << formatv("{0:x16}", TargetSym.getAddress()) << " (section " - << TargetSec.getName(); + orc::ExecutorAddrDiff SecDelta = TargetSym.getAddress() - SecAddress; + OS << TargetSym.getAddress() << " (section " << TargetSec.getName(); if (SecDelta) OS << " + " << formatv("{0:x}", SecDelta); - OS << " / block " << formatv("{0:x16}", TargetBlock.getAddress()); + OS << " / block " << TargetBlock.getAddress(); if (TargetSym.getOffset()) OS << " + " << formatv("{0:x}", TargetSym.getOffset()); OS << ")"; @@ -265,7 +263,7 @@ void LinkGraph::dump(raw_ostream &OS) { }); for (auto *B : SortedBlocks) { - OS << " block " << formatv("{0:x16}", B->getAddress()) + OS << " block " << B->getAddress() << " size = " << formatv("{0:x8}", B->getSize()) << ", align = " << B->getAlignment() << ", alignment-offset = " << B->getAlignmentOffset(); @@ -290,9 +288,8 @@ void LinkGraph::dump(raw_ostream &OS) { return LHS.getOffset() < RHS.getOffset(); }); for (auto &E : SortedEdges) { - OS << " " << formatv("{0:x16}", B->getFixupAddress(E)) - << " (block + " << formatv("{0:x8}", E.getOffset()) - << "), addend = "; + OS << " " << B->getFixupAddress(E) << " (block + " + << formatv("{0:x8}", E.getOffset()) << "), addend = "; if (E.getAddend() >= 0) OS << formatv("+{0:x8}", E.getAddend()); else @@ -315,16 +312,14 @@ void LinkGraph::dump(raw_ostream &OS) { OS << "Absolute symbols:\n"; if (!llvm::empty(absolute_symbols())) { for (auto *Sym : absolute_symbols()) - OS << " " << format("0x%016" PRIx64, Sym->getAddress()) << ": " << *Sym - << "\n"; + OS << " " << Sym->getAddress() << ": " << *Sym << "\n"; } else OS << " none\n"; OS << "\nExternal symbols:\n"; if (!llvm::empty(external_symbols())) { for (auto *Sym : external_symbols()) - OS << " " << format("0x%016" PRIx64, Sym->getAddress()) << ": " << *Sym - << "\n"; + OS << " " << Sym->getAddress() << ": " << *Sym << "\n"; } else OS << " none\n"; } diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp index 706688aba4ec..35ee050c8566 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp @@ -192,7 +192,7 @@ JITLinkContext::LookupMap JITLinkerBase::getExternalSymbolNames() const { // Identify unresolved external symbols. JITLinkContext::LookupMap UnresolvedExternals; for (auto *Sym : G->external_symbols()) { - assert(Sym->getAddress() == 0 && + assert(!Sym->getAddress() && "External has already been assigned an address"); assert(Sym->getName() != StringRef() && Sym->getName() != "" && "Externals must be named"); @@ -209,11 +209,12 @@ void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) { for (auto *Sym : G->external_symbols()) { assert(Sym->getOffset() == 0 && "External symbol is not at the start of its addressable block"); - assert(Sym->getAddress() == 0 && "Symbol already resolved"); + assert(!Sym->getAddress() && "Symbol already resolved"); assert(!Sym->isDefined() && "Symbol being resolved is already defined"); auto ResultI = Result.find(Sym->getName()); if (ResultI != Result.end()) - Sym->getAddressable().setAddress(ResultI->second.getAddress()); + Sym->getAddressable().setAddress( + orc::ExecutorAddr(ResultI->second.getAddress())); else assert(Sym->getLinkage() == Linkage::Weak && "Failed to resolve non-weak reference"); @@ -223,7 +224,7 @@ void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) { dbgs() << "Externals after applying lookup result:\n"; for (auto *Sym : G->external_symbols()) dbgs() << " " << Sym->getName() << ": " - << formatv("{0:x16}", Sym->getAddress()) << "\n"; + << formatv("{0:x16}", Sym->getAddress().getValue()) << "\n"; }); } diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp index 67fe6287e388..164014612247 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp @@ -66,10 +66,10 @@ JITLinkMemoryManager::InFlightAlloc::~InFlightAlloc() = default; static Error runAllocAction(AllocActionCall &C) { using WrapperFnTy = CWrapperFunctionResult (*)(const void *, size_t); - auto *Fn = jitTargetAddressToPointer(C.FnAddr); + auto *Fn = C.FnAddr.toPtr(); - return toError(Fn(jitTargetAddressToPointer(C.CtxAddr), - static_cast(C.CtxSize))); + return toError( + Fn(C.CtxAddr.toPtr(), static_cast(C.CtxSize))); } BasicLayout::BasicLayout(LinkGraph &G) : G(G) { @@ -207,7 +207,7 @@ void SimpleSegmentAlloc::Create(JITLinkMemoryManager &MemMgr, std::make_unique("", Triple(), 0, support::native, nullptr); AllocGroupSmallMap ContentBlocks; - JITTargetAddress NextAddr = 0x100000; + orc::ExecutorAddr NextAddr(0x100000); for (auto &KV : Segments) { auto &AG = KV.first; auto &Seg = KV.second; @@ -220,7 +220,8 @@ void SimpleSegmentAlloc::Create(JITLinkMemoryManager &MemMgr, Sec.setMemDeallocPolicy(AG.getMemDeallocPolicy()); if (Seg.ContentSize != 0) { - NextAddr = alignTo(NextAddr, Seg.ContentAlign); + NextAddr = + orc::ExecutorAddr(alignTo(NextAddr.getValue(), Seg.ContentAlign)); auto &B = G->createMutableContentBlock(Sec, G->allocateBuffer(Seg.ContentSize), NextAddr, Seg.ContentAlign.value(), 0); @@ -426,8 +427,8 @@ void InProcessMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G, static_cast(SegsSizes->FinalizeSegs)}; } - auto NextStandardSegAddr = pointerToJITTargetAddress(StandardSegsMem.base()); - auto NextFinalizeSegAddr = pointerToJITTargetAddress(FinalizeSegsMem.base()); + auto NextStandardSegAddr = orc::ExecutorAddr::fromPtr(StandardSegsMem.base()); + auto NextFinalizeSegAddr = orc::ExecutorAddr::fromPtr(FinalizeSegsMem.base()); LLVM_DEBUG({ dbgs() << "InProcessMemoryManager allocated:\n"; @@ -454,7 +455,7 @@ void InProcessMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G, ? NextStandardSegAddr : NextFinalizeSegAddr; - Seg.WorkingMem = jitTargetAddressToPointer(SegAddr); + Seg.WorkingMem = SegAddr.toPtr(); Seg.Addr = SegAddr; SegAddr += alignTo(Seg.ContentSize + Seg.ZeroFillSize, PageSize); @@ -478,8 +479,7 @@ void InProcessMemoryManager::deallocate(std::vector Allocs, { std::lock_guard Lock(FinalizedAllocsMutex); for (auto &Alloc : Allocs) { - auto *FA = - jitTargetAddressToPointer(Alloc.release()); + auto *FA = Alloc.release().toPtr(); StandardSegmentsList.push_back(std::move(FA->StandardSegments)); if (!FA->DeallocActions.empty()) DeallocActionsList.push_back(std::move(FA->DeallocActions)); @@ -520,7 +520,7 @@ InProcessMemoryManager::createFinalizedAlloc( auto *FA = FinalizedAllocInfos.Allocate(); new (FA) FinalizedAllocInfo( {std::move(StandardSegments), std::move(DeallocActions)}); - return FinalizedAlloc(pointerToJITTargetAddress(FA)); + return FinalizedAlloc(orc::ExecutorAddr::fromPtr(FA)); } } // end namespace jitlink diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp index d588b63d9e88..2fcf3e94b8b2 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp @@ -134,7 +134,7 @@ Error MachOLinkGraphBuilder::createNormalizedSections() { memcpy(&NSec.SegName, Sec64.segname, 16); NSec.SegName[16] = '\0'; - NSec.Address = Sec64.addr; + NSec.Address = orc::ExecutorAddr(Sec64.addr); NSec.Size = Sec64.size; NSec.Alignment = 1ULL << Sec64.align; NSec.Flags = Sec64.flags; @@ -147,7 +147,7 @@ Error MachOLinkGraphBuilder::createNormalizedSections() { memcpy(&NSec.SegName, Sec32.segname, 16); NSec.SegName[16] = '\0'; - NSec.Address = Sec32.addr; + NSec.Address = orc::ExecutorAddr(Sec32.addr); NSec.Size = Sec32.size; NSec.Alignment = 1ULL << Sec32.align; NSec.Flags = Sec32.flags; @@ -287,7 +287,8 @@ Error MachOLinkGraphBuilder::createNormalizedSymbols() { if (!NSec) return NSec.takeError(); - if (Value < NSec->Address || Value > NSec->Address + NSec->Size) + if (orc::ExecutorAddr(Value) < NSec->Address || + orc::ExecutorAddr(Value) > NSec->Address + NSec->Size) return make_error("Address " + formatv("{0:x}", Value) + " for symbol " + *Name + " does not fall within section"); @@ -311,8 +312,9 @@ Error MachOLinkGraphBuilder::createNormalizedSymbols() { } void MachOLinkGraphBuilder::addSectionStartSymAndBlock( - unsigned SecIndex, Section &GraphSec, uint64_t Address, const char *Data, - uint64_t Size, uint32_t Alignment, bool IsLive) { + unsigned SecIndex, Section &GraphSec, orc::ExecutorAddr Address, + const char *Data, orc::ExecutorAddrDiff Size, uint32_t Alignment, + bool IsLive) { Block &B = Data ? G->createContentBlock(GraphSec, ArrayRef(Data, Size), Address, Alignment, 0) @@ -346,7 +348,8 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { return make_error("Anonymous common symbol at index " + Twine(KV.first)); NSym.GraphSymbol = &G->addCommonSymbol( - *NSym.Name, NSym.S, getCommonSection(), 0, NSym.Value, + *NSym.Name, NSym.S, getCommonSection(), orc::ExecutorAddr(), + orc::ExecutorAddrDiff(NSym.Value), 1ull << MachO::GET_COMM_ALIGN(NSym.Desc), NSym.Desc & MachO::N_NO_DEAD_STRIP); } else { @@ -364,8 +367,8 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { return make_error("Anonymous absolute symbol at index " + Twine(KV.first)); NSym.GraphSymbol = &G->addAbsoluteSymbol( - *NSym.Name, NSym.Value, 0, Linkage::Strong, Scope::Default, - NSym.Desc & MachO::N_NO_DEAD_STRIP); + *NSym.Name, orc::ExecutorAddr(NSym.Value), 0, Linkage::Strong, + Scope::Default, NSym.Desc & MachO::N_NO_DEAD_STRIP); break; case MachO::N_SECT: SecIndexToSymbols[NSym.Sect - 1].push_back(&NSym); @@ -468,13 +471,13 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { // If the section is non-empty but there is no symbol covering the start // address then add an anonymous one. - if (SecNSymStack.back()->Value != NSec.Address) { - auto AnonBlockSize = SecNSymStack.back()->Value - NSec.Address; + if (orc::ExecutorAddr(SecNSymStack.back()->Value) != NSec.Address) { + auto AnonBlockSize = + orc::ExecutorAddr(SecNSymStack.back()->Value) - NSec.Address; LLVM_DEBUG({ dbgs() << " Section start not covered by symbol. " - << "Creating anonymous block to cover [ " - << formatv("{0:x16}", NSec.Address) << " -- " - << formatv("{0:x16}", NSec.Address + AnonBlockSize) << " ]\n"; + << "Creating anonymous block to cover [ " << NSec.Address + << " -- " << (NSec.Address + AnonBlockSize) << " ]\n"; }); addSectionStartSymAndBlock(SecIndex, *NSec.GraphSection, NSec.Address, NSec.Data, AnonBlockSize, NSec.Alignment, @@ -496,12 +499,12 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { } // BlockNSyms now contains the block symbols in reverse canonical order. - JITTargetAddress BlockStart = BlockSyms.front()->Value; - JITTargetAddress BlockEnd = SecNSymStack.empty() - ? NSec.Address + NSec.Size - : SecNSymStack.back()->Value; - JITTargetAddress BlockOffset = BlockStart - NSec.Address; - JITTargetAddress BlockSize = BlockEnd - BlockStart; + auto BlockStart = orc::ExecutorAddr(BlockSyms.front()->Value); + orc::ExecutorAddr BlockEnd = + SecNSymStack.empty() ? NSec.Address + NSec.Size + : orc::ExecutorAddr(SecNSymStack.back()->Value); + orc::ExecutorAddrDiff BlockOffset = BlockStart - NSec.Address; + orc::ExecutorAddrDiff BlockSize = BlockEnd - BlockStart; LLVM_DEBUG({ dbgs() << " Creating block for " << formatv("{0:x16}", BlockStart) @@ -521,8 +524,8 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { BlockStart, NSec.Alignment, BlockStart % NSec.Alignment); - Optional LastCanonicalAddr; - JITTargetAddress SymEnd = BlockEnd; + Optional LastCanonicalAddr; + auto SymEnd = BlockEnd; while (!BlockSyms.empty()) { auto &NSym = *BlockSyms.back(); BlockSyms.pop_back(); @@ -530,9 +533,9 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { bool SymLive = (NSym.Desc & MachO::N_NO_DEAD_STRIP) || SectionIsNoDeadStrip; - auto &Sym = createStandardGraphSymbol(NSym, B, SymEnd - NSym.Value, - SectionIsText, SymLive, - LastCanonicalAddr != NSym.Value); + auto &Sym = createStandardGraphSymbol( + NSym, B, SymEnd - orc::ExecutorAddr(NSym.Value), SectionIsText, + SymLive, LastCanonicalAddr != orc::ExecutorAddr(NSym.Value)); if (LastCanonicalAddr != Sym.getAddress()) { if (LastCanonicalAddr) @@ -568,11 +571,12 @@ Symbol &MachOLinkGraphBuilder::createStandardGraphSymbol(NormalizedSymbol &NSym, dbgs() << "\n"; }); - auto &Sym = NSym.Name ? G->addDefinedSymbol(B, NSym.Value - B.getAddress(), - *NSym.Name, Size, NSym.L, NSym.S, - IsText, IsNoDeadStrip) - : G->addAnonymousSymbol(B, NSym.Value - B.getAddress(), - Size, IsText, IsNoDeadStrip); + auto SymOffset = orc::ExecutorAddr(NSym.Value) - B.getAddress(); + auto &Sym = + NSym.Name + ? G->addDefinedSymbol(B, SymOffset, *NSym.Name, Size, NSym.L, NSym.S, + IsText, IsNoDeadStrip) + : G->addAnonymousSymbol(B, SymOffset, Size, IsText, IsNoDeadStrip); NSym.GraphSymbol = &Sym; if (IsCanonical) @@ -635,12 +639,12 @@ Error MachOLinkGraphBuilder::graphifyCStringSection( bool SectionIsNoDeadStrip = NSec.Flags & MachO::S_ATTR_NO_DEAD_STRIP; bool SectionIsText = NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS; - JITTargetAddress BlockStart = 0; + orc::ExecutorAddrDiff BlockStart = 0; // Scan section for null characters. for (size_t I = 0; I != NSec.Size; ++I) if (NSec.Data[I] == '\0') { - JITTargetAddress BlockEnd = I + 1; + orc::ExecutorAddrDiff BlockEnd = I + 1; size_t BlockSize = BlockEnd - BlockStart; // Create a block for this null terminated string. auto &B = G->createContentBlock(*NSec.GraphSection, @@ -654,7 +658,8 @@ Error MachOLinkGraphBuilder::graphifyCStringSection( }); // If there's no symbol at the start of this block then create one. - if (NSyms.empty() || NSyms.back()->Value != B.getAddress()) { + if (NSyms.empty() || + orc::ExecutorAddr(NSyms.back()->Value) != B.getAddress()) { auto &S = G->addAnonymousSymbol(B, 0, BlockSize, false, false); setCanonicalSymbol(NSec, S); LLVM_DEBUG({ @@ -666,18 +671,19 @@ Error MachOLinkGraphBuilder::graphifyCStringSection( } // Process any remaining symbols that point into this block. - JITTargetAddress LastCanonicalAddr = B.getAddress() + BlockEnd; - while (!NSyms.empty() && - NSyms.back()->Value < (B.getAddress() + BlockSize)) { + auto LastCanonicalAddr = B.getAddress() + BlockEnd; + while (!NSyms.empty() && orc::ExecutorAddr(NSyms.back()->Value) < + B.getAddress() + BlockSize) { auto &NSym = *NSyms.back(); - size_t SymSize = (B.getAddress() + BlockSize) - NSyms.back()->Value; + size_t SymSize = (B.getAddress() + BlockSize) - + orc::ExecutorAddr(NSyms.back()->Value); bool SymLive = (NSym.Desc & MachO::N_NO_DEAD_STRIP) || SectionIsNoDeadStrip; bool IsCanonical = false; - if (LastCanonicalAddr != NSym.Value) { + if (LastCanonicalAddr != orc::ExecutorAddr(NSym.Value)) { IsCanonical = true; - LastCanonicalAddr = NSym.Value; + LastCanonicalAddr = orc::ExecutorAddr(NSym.Value); } createStandardGraphSymbol(NSym, B, SymSize, SectionIsText, SymLive, diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h index d29732ebdba8..2951a8533098 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h @@ -71,13 +71,13 @@ class MachOLinkGraphBuilder { public: char SectName[17]; char SegName[17]; - uint64_t Address = 0; + orc::ExecutorAddr Address; uint64_t Size = 0; uint64_t Alignment = 0; uint32_t Flags = 0; const char *Data = nullptr; Section *GraphSection = nullptr; - std::map CanonicalSymbols; + std::map CanonicalSymbols; }; using SectionParserFunction = std::function; @@ -137,7 +137,7 @@ class MachOLinkGraphBuilder { /// Returns the symbol with the highest address not greater than the search /// address, or null if no such symbol exists. Symbol *getSymbolByAddress(NormalizedSection &NSec, - JITTargetAddress Address) { + orc::ExecutorAddr Address) { auto I = NSec.CanonicalSymbols.upper_bound(Address); if (I == NSec.CanonicalSymbols.begin()) return nullptr; @@ -147,7 +147,7 @@ class MachOLinkGraphBuilder { /// Returns the symbol with the highest address not greater than the search /// address, or an error if no such symbol exists. Expected findSymbolByAddress(NormalizedSection &NSec, - JITTargetAddress Address) { + orc::ExecutorAddr Address) { auto *Sym = getSymbolByAddress(NSec, Address); if (Sym) if (Address <= Sym->getAddress() + Sym->getSize()) @@ -193,9 +193,9 @@ class MachOLinkGraphBuilder { Section &getCommonSection(); void addSectionStartSymAndBlock(unsigned SecIndex, Section &GraphSec, - uint64_t Address, const char *Data, - uint64_t Size, uint32_t Alignment, - bool IsLive); + orc::ExecutorAddr Address, const char *Data, + orc::ExecutorAddrDiff Size, + uint32_t Alignment, bool IsLive); Error createNormalizedSections(); Error createNormalizedSymbols(); diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp index f2a029d35cd5..844e76ab0542 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp @@ -109,7 +109,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { Expected parsePairRelocation(Block &BlockToFix, Edge::Kind SubtractorKind, const MachO::relocation_info &SubRI, - JITTargetAddress FixupAddress, const char *FixupContent, + orc::ExecutorAddr FixupAddress, const char *FixupContent, object::relocation_iterator &UnsignedRelItr, object::relocation_iterator &RelEnd) { using namespace support; @@ -162,7 +162,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { return ToSymbolSec.takeError(); ToSymbol = getSymbolByAddress(*ToSymbolSec, ToSymbolSec->Address); assert(ToSymbol && "No symbol for section"); - FixupValue -= ToSymbol->getAddress(); + FixupValue -= ToSymbol->getAddress().getValue(); } MachOARM64RelocationKind DeltaKind; @@ -195,7 +195,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { for (auto &S : Obj.sections()) { - JITTargetAddress SectionAddress = S.getAddress(); + orc::ExecutorAddr SectionAddress(S.getAddress()); // Skip relocations virtual sections. if (S.isVirtual()) { @@ -234,7 +234,8 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { return Kind.takeError(); // Find the address of the value to fix up. - JITTargetAddress FixupAddress = SectionAddress + (uint32_t)RI.r_address; + orc::ExecutorAddr FixupAddress = + SectionAddress + (uint32_t)RI.r_address; LLVM_DEBUG({ dbgs() << " " << NSec->SectName << " + " << formatv("{0:x8}", RI.r_address) << ":\n"; @@ -249,7 +250,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { BlockToFix = &SymbolToFixOrErr->getBlock(); } - if (FixupAddress + static_cast(1ULL << RI.r_length) > + if (FixupAddress + orc::ExecutorAddrDiff(1ULL << RI.r_length) > BlockToFix->getAddress() + BlockToFix->getContent().size()) return make_error( "Relocation content extends past end of fixup block"); @@ -290,7 +291,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { }); // Find the address of the value to fix up. - JITTargetAddress PairedFixupAddress = + orc::ExecutorAddr PairedFixupAddress = SectionAddress + (uint32_t)RI.r_address; if (PairedFixupAddress != FixupAddress) return make_error("Paired relocation points at " @@ -324,7 +325,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { Addend = *(const ulittle64_t *)FixupContent; break; case Pointer64Anon: { - JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent; + orc::ExecutorAddr TargetAddress(*(const ulittle64_t *)FixupContent); auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1); if (!TargetNSec) return TargetNSec.takeError(); @@ -435,7 +436,7 @@ class PerGraphGOTAndPLTStubsBuilder_MachO_arm64 Symbol &createGOTEntry(Symbol &Target) { auto &GOTEntryBlock = G.createContentBlock( - getGOTSection(), getGOTEntryBlockContent(), 0, 8, 0); + getGOTSection(), getGOTEntryBlockContent(), orc::ExecutorAddr(), 8, 0); GOTEntryBlock.addEdge(Pointer64, 0, Target, 0); return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false); } @@ -457,8 +458,8 @@ class PerGraphGOTAndPLTStubsBuilder_MachO_arm64 } Symbol &createPLTStub(Symbol &Target) { - auto &StubContentBlock = - G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 1, 0); + auto &StubContentBlock = G.createContentBlock( + getStubsSection(), getStubBlockContent(), orc::ExecutorAddr(), 1, 0); // Re-use GOT entries for stub targets. auto &GOTEntrySymbol = getGOTEntry(Target); StubContentBlock.addEdge(LDRLiteral19, 0, GOTEntrySymbol, 0); @@ -545,11 +546,12 @@ class MachOJITLinker_arm64 : public JITLinker { char *BlockWorkingMem = B.getAlreadyMutableContent().data(); char *FixupPtr = BlockWorkingMem + E.getOffset(); - JITTargetAddress FixupAddress = B.getAddress() + E.getOffset(); + orc::ExecutorAddr FixupAddress = B.getAddress() + E.getOffset(); switch (E.getKind()) { case Branch26: { - assert((FixupAddress & 0x3) == 0 && "Branch-inst is not 32-bit aligned"); + assert((FixupAddress.getValue() & 0x3) == 0 && + "Branch-inst is not 32-bit aligned"); int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend(); @@ -569,7 +571,7 @@ class MachOJITLinker_arm64 : public JITLinker { break; } case Pointer32: { - uint64_t Value = E.getTarget().getAddress() + E.getAddend(); + uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); if (Value > std::numeric_limits::max()) return makeTargetOutOfRangeError(G, B, E); *(ulittle32_t *)FixupPtr = Value; @@ -577,7 +579,7 @@ class MachOJITLinker_arm64 : public JITLinker { } case Pointer64: case Pointer64Anon: { - uint64_t Value = E.getTarget().getAddress() + E.getAddend(); + uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); *(ulittle64_t *)FixupPtr = Value; break; } @@ -587,9 +589,10 @@ class MachOJITLinker_arm64 : public JITLinker { assert((E.getKind() != GOTPage21 || E.getAddend() == 0) && "GOTPAGE21 with non-zero addend"); uint64_t TargetPage = - (E.getTarget().getAddress() + E.getAddend()) & - ~static_cast(4096 - 1); - uint64_t PCPage = FixupAddress & ~static_cast(4096 - 1); + (E.getTarget().getAddress().getValue() + E.getAddend()) & + ~static_cast(4096 - 1); + uint64_t PCPage = + FixupAddress.getValue() & ~static_cast(4096 - 1); int64_t PageDelta = TargetPage - PCPage; if (PageDelta < -(1 << 30) || PageDelta > ((1 << 30) - 1)) @@ -606,7 +609,7 @@ class MachOJITLinker_arm64 : public JITLinker { } case PageOffset12: { uint64_t TargetOffset = - (E.getTarget().getAddress() + E.getAddend()) & 0xfff; + (E.getTarget().getAddress() + E.getAddend()).getValue() & 0xfff; uint32_t RawInstr = *(ulittle32_t *)FixupPtr; unsigned ImmShift = getPageOffset12Shift(RawInstr); @@ -627,7 +630,7 @@ class MachOJITLinker_arm64 : public JITLinker { assert((RawInstr & 0xfffffc00) == 0xf9400000 && "RawInstr isn't a 64-bit LDR immediate"); - uint32_t TargetOffset = E.getTarget().getAddress() & 0xfff; + uint32_t TargetOffset = E.getTarget().getAddress().getValue() & 0xfff; assert((TargetOffset & 0x7) == 0 && "GOT entry is not 8-byte aligned"); uint32_t EncodedImm = (TargetOffset >> 3) << 10; uint32_t FixedInstr = RawInstr | EncodedImm; @@ -635,7 +638,8 @@ class MachOJITLinker_arm64 : public JITLinker { break; } case LDRLiteral19: { - assert((FixupAddress & 0x3) == 0 && "LDR is not 32-bit aligned"); + assert((FixupAddress.getValue() & 0x3) == 0 && + "LDR is not 32-bit aligned"); assert(E.getAddend() == 0 && "LDRLiteral19 with non-zero addend"); uint32_t RawInstr = *(ulittle32_t *)FixupPtr; assert(RawInstr == 0x58000010 && "RawInstr isn't a 64-bit LDR literal"); diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp index a4fcd3b9a5f5..82afaa3aa3c5 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp @@ -119,7 +119,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { // returns the edge kind and addend to be used. Expected parsePairRelocation( Block &BlockToFix, MachONormalizedRelocationType SubtractorKind, - const MachO::relocation_info &SubRI, JITTargetAddress FixupAddress, + const MachO::relocation_info &SubRI, orc::ExecutorAddr FixupAddress, const char *FixupContent, object::relocation_iterator &UnsignedRelItr, object::relocation_iterator &RelEnd) { using namespace support; @@ -172,7 +172,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { return ToSymbolSec.takeError(); ToSymbol = getSymbolByAddress(*ToSymbolSec, ToSymbolSec->Address); assert(ToSymbol && "No symbol for section"); - FixupValue -= ToSymbol->getAddress(); + FixupValue -= ToSymbol->getAddress().getValue(); } Edge::Kind DeltaKind; @@ -206,7 +206,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { for (auto &S : Obj.sections()) { - JITTargetAddress SectionAddress = S.getAddress(); + orc::ExecutorAddr SectionAddress(S.getAddress()); // Skip relocations virtual sections. if (S.isVirtual()) { @@ -241,7 +241,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { MachO::relocation_info RI = getRelocationInfo(RelItr); // Find the address of the value to fix up. - JITTargetAddress FixupAddress = SectionAddress + (uint32_t)RI.r_address; + auto FixupAddress = SectionAddress + (uint32_t)RI.r_address; LLVM_DEBUG({ dbgs() << " " << NSec->SectName << " + " @@ -257,7 +257,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { BlockToFix = &SymbolToFixOrErr->getBlock(); } - if (FixupAddress + static_cast(1ULL << RI.r_length) > + if (FixupAddress + orc::ExecutorAddrDiff(1ULL << RI.r_length) > BlockToFix->getAddress() + BlockToFix->getContent().size()) return make_error( "Relocation extends past end of fixup block"); @@ -343,7 +343,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { Kind = x86_64::Pointer64; break; case MachOPointer64Anon: { - JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent; + orc::ExecutorAddr TargetAddress(*(const ulittle64_t *)FixupContent); auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1); if (!TargetNSec) return TargetNSec.takeError(); @@ -367,8 +367,8 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { Kind = x86_64::Delta32; break; case MachOPCRel32Anon: { - JITTargetAddress TargetAddress = - FixupAddress + 4 + *(const little32_t *)FixupContent; + orc::ExecutorAddr TargetAddress(FixupAddress + 4 + + *(const little32_t *)FixupContent); auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1); if (!TargetNSec) return TargetNSec.takeError(); @@ -384,10 +384,10 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { case MachOPCRel32Minus1Anon: case MachOPCRel32Minus2Anon: case MachOPCRel32Minus4Anon: { - JITTargetAddress Delta = - 4 + static_cast( + orc::ExecutorAddrDiff Delta = + 4 + orc::ExecutorAddrDiff( 1ULL << (*MachORelocKind - MachOPCRel32Minus1Anon)); - JITTargetAddress TargetAddress = + orc::ExecutorAddr TargetAddress = FixupAddress + Delta + *(const little32_t *)FixupContent; auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1); if (!TargetNSec) diff --git a/llvm/lib/ExecutionEngine/JITLink/PerGraphGOTAndPLTStubsBuilder.h b/llvm/lib/ExecutionEngine/JITLink/PerGraphGOTAndPLTStubsBuilder.h index 6e9df9c75a65..6e325f92bafb 100644 --- a/llvm/lib/ExecutionEngine/JITLink/PerGraphGOTAndPLTStubsBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/PerGraphGOTAndPLTStubsBuilder.h @@ -47,16 +47,16 @@ class PerGraphGOTAndPLTStubsBuilder { if (impl().isGOTEdgeToFix(E)) { LLVM_DEBUG({ dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) - << " edge at " << formatv("{0:x}", B->getFixupAddress(E)) - << " (" << formatv("{0:x}", B->getAddress()) << " + " + << " edge at " << B->getFixupAddress(E) << " (" + << B->getAddress() << " + " << formatv("{0:x}", E.getOffset()) << ")\n"; }); impl().fixGOTEdge(E, getGOTEntry(E.getTarget())); } else if (impl().isExternalBranchEdge(E)) { LLVM_DEBUG({ dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) - << " edge at " << formatv("{0:x}", B->getFixupAddress(E)) - << " (" << formatv("{0:x}", B->getAddress()) << " + " + << " edge at " << B->getFixupAddress(E) << " (" + << B->getAddress() << " + " << formatv("{0:x}", E.getOffset()) << ")\n"; }); impl().fixPLTEdge(E, getPLTStub(E.getTarget())); diff --git a/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp index 48521280059d..df9979b47e88 100644 --- a/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp @@ -95,10 +95,10 @@ Error optimizeGOTAndStubAccesses(LinkGraph &G) { assert(GOTEntryBlock.edges_size() == 1 && "GOT entry should only have one outgoing edge"); auto &GOTTarget = GOTEntryBlock.edges().begin()->getTarget(); - JITTargetAddress TargetAddr = GOTTarget.getAddress(); - JITTargetAddress EdgeAddr = B->getFixupAddress(E); + orc::ExecutorAddr TargetAddr = GOTTarget.getAddress(); + orc::ExecutorAddr EdgeAddr = B->getFixupAddress(E); int64_t Displacement = TargetAddr - EdgeAddr + 4; - bool TargetInRangeForImmU32 = isInRangeForImmU32(TargetAddr); + bool TargetInRangeForImmU32 = isInRangeForImmU32(TargetAddr.getValue()); bool DisplacementInRangeForImmS32 = isInRangeForImmS32(Displacement); // If both of the Target and displacement is out of range, then @@ -165,8 +165,8 @@ Error optimizeGOTAndStubAccesses(LinkGraph &G) { "GOT block should only have one outgoing edge"); auto &GOTTarget = GOTBlock.edges().begin()->getTarget(); - JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset(); - JITTargetAddress TargetAddr = GOTTarget.getAddress(); + orc::ExecutorAddr EdgeAddr = B->getAddress() + E.getOffset(); + orc::ExecutorAddr TargetAddr = GOTTarget.getAddress(); int64_t Displacement = TargetAddr - EdgeAddr + 4; if (isInRangeForImmS32(Displacement)) { diff --git a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp index fcfe389f82a8..4ff6b7fd54df 100644 --- a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp +++ b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp @@ -67,9 +67,9 @@ class ELFDebugObjectSection : public DebugObjectSection { template void ELFDebugObjectSection::setTargetMemoryRange(SectionRange Range) { // Only patch load-addresses for executable and data sections. - if (isTextOrDataSection()) { - Header->sh_addr = static_cast(Range.getStart()); - } + if (isTextOrDataSection()) + Header->sh_addr = + static_cast(Range.getStart().getValue()); } template diff --git a/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp index fe62138c790c..92657805efdd 100644 --- a/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp +++ b/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp @@ -129,8 +129,8 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { Section *Sec = nullptr; StringRef SegName; StringRef SecName; - JITTargetAddress Alignment = 0; - JITTargetAddress StartAddr = 0; + uint64_t Alignment = 0; + orc::ExecutorAddr StartAddr; uint64_t Size = 0; }; @@ -153,7 +153,8 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { return Error::success(); } DebugSecInfos.push_back({&Sec, Sec.getName().substr(0, SepPos), - Sec.getName().substr(SepPos + 1), 0, 0}); + Sec.getName().substr(SepPos + 1), 0, + orc::ExecutorAddr(), 0}); } else { NonDebugSections.push_back(&Sec); @@ -182,11 +183,11 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { size_t ContainerBlockSize = sizeof(typename MachOTraits::Header) + SegmentLCSize; auto ContainerBlockContent = G.allocateBuffer(ContainerBlockSize); - MachOContainerBlock = - &G.createMutableContentBlock(SDOSec, ContainerBlockContent, 0, 8, 0); + MachOContainerBlock = &G.createMutableContentBlock( + SDOSec, ContainerBlockContent, orc::ExecutorAddr(), 8, 0); // Copy debug section blocks and symbols. - JITTargetAddress NextBlockAddr = MachOContainerBlock->getSize(); + orc::ExecutorAddr NextBlockAddr(MachOContainerBlock->getSize()); for (auto &SI : DebugSecInfos) { assert(!llvm::empty(SI.Sec->blocks()) && "Empty debug info section?"); @@ -219,7 +220,8 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { G.mergeSections(SDOSec, *SI.Sec); SI.Sec = nullptr; } - size_t DebugSectionsSize = NextBlockAddr - MachOContainerBlock->getSize(); + size_t DebugSectionsSize = + NextBlockAddr - orc::ExecutorAddr(MachOContainerBlock->getSize()); // Write MachO header and debug section load commands. MachOStructWriter Writer(MachOContainerBlock->getAlreadyMutableContent()); @@ -266,9 +268,9 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { memset(&Sec, 0, sizeof(Sec)); memcpy(Sec.sectname, SI.SecName.data(), SI.SecName.size()); memcpy(Sec.segname, SI.SegName.data(), SI.SegName.size()); - Sec.addr = SI.StartAddr; + Sec.addr = SI.StartAddr.getValue(); Sec.size = SI.Size; - Sec.offset = SI.StartAddr; + Sec.offset = SI.StartAddr.getValue(); Sec.align = SI.Alignment; Sec.reloff = 0; Sec.nreloc = 0; @@ -336,7 +338,7 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { memset(&SecCmd, 0, sizeof(SecCmd)); memcpy(SecCmd.sectname, SecName.data(), SecName.size()); memcpy(SecCmd.segname, SegName.data(), SegName.size()); - SecCmd.addr = R.getStart(); + SecCmd.addr = R.getStart().getValue(); SecCmd.size = R.getSize(); SecCmd.offset = 0; SecCmd.align = R.getFirstBlock()->getAlignment(); @@ -348,7 +350,7 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { SectionRange R(MachOContainerBlock->getSection()); G.allocActions().push_back( - {{RegisterActionAddr.getValue(), R.getStart(), R.getSize()}, {}}); + {{RegisterActionAddr, R.getStart(), R.getSize()}, {}}); return Error::success(); } diff --git a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp index eded54f4bfb3..e25d7c4651a9 100644 --- a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp @@ -58,7 +58,8 @@ class DSOHandleMaterializationUnit : public MaterializationUnit { auto &DSOHandleSection = G->createSection(".data.__dso_handle", jitlink::MemProt::Read); auto &DSOHandleBlock = G->createContentBlock( - DSOHandleSection, getDSOHandleContent(PointerSize), 0, 8, 0); + DSOHandleSection, getDSOHandleContent(PointerSize), orc::ExecutorAddr(), + 8, 0); auto &DSOHandleSymbol = G->addDefinedSymbol( DSOHandleBlock, 0, *R->getInitializerSymbol(), DSOHandleBlock.getSize(), jitlink::Linkage::Strong, jitlink::Scope::Default, false, true); @@ -375,7 +376,7 @@ void ELFNixPlatform::rt_getDeinitializers( { std::lock_guard Lock(PlatformMutex); - auto I = HandleAddrToJITDylib.find(Handle.getValue()); + auto I = HandleAddrToJITDylib.find(Handle); if (I != HandleAddrToJITDylib.end()) JD = I->second; } @@ -406,7 +407,7 @@ void ELFNixPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult, { std::lock_guard Lock(PlatformMutex); - auto I = HandleAddrToJITDylib.find(Handle.getValue()); + auto I = HandleAddrToJITDylib.find(Handle); if (I != HandleAddrToJITDylib.end()) JD = I->second; } @@ -630,12 +631,11 @@ void ELFNixPlatform::ELFNixPlatformPlugin::addDSOHandleSupportPasses( assert(I != G.defined_symbols().end() && "Missing DSO handle symbol"); { std::lock_guard Lock(MP.PlatformMutex); - JITTargetAddress HandleAddr = (*I)->getAddress(); + auto HandleAddr = (*I)->getAddress(); MP.HandleAddrToJITDylib[HandleAddr] = &JD; assert(!MP.InitSeqs.count(&JD) && "InitSeq entry for JD already exists"); MP.InitSeqs.insert(std::make_pair( - &JD, - ELFNixJITDylibInitializers(JD.getName(), ExecutorAddr(HandleAddr)))); + &JD, ELFNixJITDylibInitializers(JD.getName(), HandleAddr))); } return Error::success(); }); diff --git a/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp b/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp index 4c0fab8aa9fa..99cacd1731a2 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp @@ -56,17 +56,17 @@ EPCEHFrameRegistrar::Create(ExecutionSession &ES) { ExecutorAddr(DeregisterEHFrameWrapperFnAddr)); } -Error EPCEHFrameRegistrar::registerEHFrames(JITTargetAddress EHFrameSectionAddr, +Error EPCEHFrameRegistrar::registerEHFrames(ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) { return ES.callSPSWrapper( - RegisterEHFrameWrapperFnAddr, ExecutorAddr(EHFrameSectionAddr), + RegisterEHFrameWrapperFnAddr, EHFrameSectionAddr, static_cast(EHFrameSectionSize)); } -Error EPCEHFrameRegistrar::deregisterEHFrames( - JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) { +Error EPCEHFrameRegistrar::deregisterEHFrames(ExecutorAddr EHFrameSectionAddr, + size_t EHFrameSectionSize) { return ES.callSPSWrapper( - DeregisterEHFrameWrapperFnAddr, ExecutorAddr(EHFrameSectionAddr), + DeregisterEHFrameWrapperFnAddr, EHFrameSectionAddr, static_cast(EHFrameSectionSize)); } diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp index 9b712cb8f7ca..247be794ad56 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp @@ -80,7 +80,7 @@ class EPCGenericJITLinkMemoryManager::InFlightAlloc } else if (FinalizeErr) OnFinalize(std::move(FinalizeErr)); else - OnFinalize(FinalizedAlloc(AllocAddr.getValue())); + OnFinalize(FinalizedAlloc(AllocAddr)); }, Parent.SAs.Allocator, std::move(FR)); } @@ -161,7 +161,7 @@ void EPCGenericJITLinkMemoryManager::completeAllocation( const auto &AG = KV.first; auto &Seg = KV.second; - Seg.Addr = NextSegAddr.getValue(); + Seg.Addr = NextSegAddr; KV.second.WorkingMem = BL.getGraph().allocateBuffer(Seg.ContentSize).data(); NextSegAddr += ExecutorAddrDiff( alignTo(Seg.ContentSize + Seg.ZeroFillSize, EPC.getPageSize())); diff --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp index 818b6b52ff83..b901a2d2da23 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp @@ -119,10 +119,12 @@ Error EPCTrampolinePool::grow() { unsigned NumTrampolines = TrampolinesPerPage; auto SegInfo = Alloc->getSegInfo(MemProt::Read | MemProt::Exec); - EPCIU.getABISupport().writeTrampolines( - SegInfo.WorkingMem.data(), SegInfo.Addr, ResolverAddress, NumTrampolines); + EPCIU.getABISupport().writeTrampolines(SegInfo.WorkingMem.data(), + SegInfo.Addr.getValue(), + ResolverAddress, NumTrampolines); for (unsigned I = 0; I < NumTrampolines; ++I) - AvailableTrampolines.push_back(SegInfo.Addr + (I * TrampolineSize)); + AvailableTrampolines.push_back(SegInfo.Addr.getValue() + + (I * TrampolineSize)); auto FA = Alloc->finalize(); if (!FA) @@ -300,15 +302,15 @@ EPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr, return Alloc.takeError(); auto SegInfo = Alloc->getSegInfo(MemProt::Read | MemProt::Exec); - ABI->writeResolverCode(SegInfo.WorkingMem.data(), SegInfo.Addr, ReentryFnAddr, - ReentryCtxAddr); + ABI->writeResolverCode(SegInfo.WorkingMem.data(), SegInfo.Addr.getValue(), + ReentryFnAddr, ReentryCtxAddr); auto FA = Alloc->finalize(); if (!FA) return FA.takeError(); ResolverBlock = std::move(*FA); - return SegInfo.Addr; + return SegInfo.Addr.getValue(); } std::unique_ptr @@ -369,8 +371,9 @@ EPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) { auto StubSeg = Alloc->getSegInfo(StubProt); auto PtrSeg = Alloc->getSegInfo(PtrProt); - ABI->writeIndirectStubsBlock(StubSeg.WorkingMem.data(), StubSeg.Addr, - PtrSeg.Addr, NumStubsToAllocate); + ABI->writeIndirectStubsBlock(StubSeg.WorkingMem.data(), + StubSeg.Addr.getValue(), + PtrSeg.Addr.getValue(), NumStubsToAllocate); auto FA = Alloc->finalize(); if (!FA) @@ -381,8 +384,8 @@ EPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) { auto StubExecutorAddr = StubSeg.Addr; auto PtrExecutorAddr = PtrSeg.Addr; for (unsigned I = 0; I != NumStubsToAllocate; ++I) { - AvailableIndirectStubs.push_back( - IndirectStubInfo(StubExecutorAddr, PtrExecutorAddr)); + AvailableIndirectStubs.push_back(IndirectStubInfo( + StubExecutorAddr.getValue(), PtrExecutorAddr.getValue())); StubExecutorAddr += ABI->getStubSize(); PtrExecutorAddr += ABI->getPointerSize(); } diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index f427271bb45d..7a71d2f781d7 100644 --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -410,7 +410,7 @@ Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym, while (I < Content.size()) { MCInst Instr; uint64_t InstrSize = 0; - uint64_t InstrStart = SymAddress + I; + uint64_t InstrStart = SymAddress.getValue() + I; auto DecodeStatus = Disassembler.getInstruction( Instr, InstrSize, Content.drop_front(I), InstrStart, CommentStream); if (DecodeStatus != MCDisassembler::Success) { @@ -426,7 +426,7 @@ Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym, // Check for a PC-relative address equal to the symbol itself. auto PCRelAddr = MIA.evaluateMemoryOperandAddress(Instr, &STI, InstrStart, InstrSize); - if (!PCRelAddr.hasValue() || PCRelAddr.getValue() != SymAddress) + if (!PCRelAddr || *PCRelAddr != SymAddress.getValue()) continue; auto RelocOffInInstr = @@ -438,8 +438,8 @@ Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym, continue; } - auto RelocOffInBlock = - InstrStart + *RelocOffInInstr - SymAddress + Sym.getOffset(); + auto RelocOffInBlock = orc::ExecutorAddr(InstrStart) + *RelocOffInInstr - + SymAddress + Sym.getOffset(); if (ExistingRelocations.contains(RelocOffInBlock)) continue; diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp index fb2e90e1c9c5..ab978ed3f3fc 100644 --- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp @@ -106,7 +106,8 @@ class MachOHeaderMaterializationUnit : public MaterializationUnit { auto HeaderContent = G.allocateString( StringRef(reinterpret_cast(&Hdr), sizeof(Hdr))); - return G.createContentBlock(HeaderSection, HeaderContent, 0, 8, 0); + return G.createContentBlock(HeaderSection, HeaderContent, + orc::ExecutorAddr(), 8, 0); } static MaterializationUnit::Interface @@ -439,7 +440,7 @@ void MachOPlatform::rt_getDeinitializers(SendDeinitializerSequenceFn SendResult, { std::lock_guard Lock(PlatformMutex); - auto I = HeaderAddrToJITDylib.find(Handle.getValue()); + auto I = HeaderAddrToJITDylib.find(Handle); if (I != HeaderAddrToJITDylib.end()) JD = I->second; } @@ -469,7 +470,7 @@ void MachOPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult, { std::lock_guard Lock(PlatformMutex); - auto I = HeaderAddrToJITDylib.find(Handle.getValue()); + auto I = HeaderAddrToJITDylib.find(Handle); if (I != HeaderAddrToJITDylib.end()) JD = I->second; } @@ -661,11 +662,11 @@ Error MachOPlatform::MachOPlatformPlugin::associateJITDylibHeaderSymbol( auto &JD = MR.getTargetJITDylib(); std::lock_guard Lock(MP.PlatformMutex); - JITTargetAddress HeaderAddr = (*I)->getAddress(); + auto HeaderAddr = (*I)->getAddress(); MP.HeaderAddrToJITDylib[HeaderAddr] = &JD; assert(!MP.InitSeqs.count(&JD) && "InitSeq entry for JD already exists"); - MP.InitSeqs.insert(std::make_pair( - &JD, MachOJITDylibInitializers(JD.getName(), ExecutorAddr(HeaderAddr)))); + MP.InitSeqs.insert( + std::make_pair(&JD, MachOJITDylibInitializers(JD.getName(), HeaderAddr))); return Error::success(); } @@ -792,7 +793,7 @@ Error MachOPlatform::MachOPlatformPlugin::registerInitSections( if (auto *ObjCImageInfoSec = G.findSectionByName(ObjCImageInfoSectionName)) { if (auto Addr = jitlink::SectionRange(*ObjCImageInfoSec).getStart()) - ObjCImageInfoAddr.setValue(Addr); + ObjCImageInfoAddr = Addr; } for (auto InitSectionName : InitSectionNames) @@ -879,11 +880,10 @@ Error MachOPlatform::MachOPlatformPlugin::registerEHAndTLVSections( if (auto *EHFrameSection = G.findSectionByName(EHFrameSectionName)) { jitlink::SectionRange R(*EHFrameSection); if (!R.empty()) - G.allocActions().push_back( - {{MP.orc_rt_macho_register_ehframe_section.getValue(), R.getStart(), - R.getSize()}, - {MP.orc_rt_macho_deregister_ehframe_section.getValue(), R.getStart(), - R.getSize()}}); + G.allocActions().push_back({{MP.orc_rt_macho_register_ehframe_section, + R.getStart(), R.getSize()}, + {MP.orc_rt_macho_deregister_ehframe_section, + R.getStart(), R.getSize()}}); } // Get a pointer to the thread data section if there is one. It will be used @@ -913,10 +913,10 @@ Error MachOPlatform::MachOPlatformPlugin::registerEHAndTLVSections( inconvertibleErrorCode()); G.allocActions().push_back( - {{MP.orc_rt_macho_register_thread_data_section.getValue(), - R.getStart(), R.getSize()}, - {MP.orc_rt_macho_deregister_thread_data_section.getValue(), - R.getStart(), R.getSize()}}); + {{MP.orc_rt_macho_register_thread_data_section, R.getStart(), + R.getSize()}, + {MP.orc_rt_macho_deregister_thread_data_section, R.getStart(), + R.getSize()}}); } } return Error::success(); @@ -963,10 +963,8 @@ Error MachOPlatform::MachOPlatformPlugin::registerEHSectionsPhase1( // Otherwise, add allocation actions to the graph to register eh-frames for // this object. G.allocActions().push_back( - {{orc_rt_macho_register_ehframe_section.getValue(), R.getStart(), - R.getSize()}, - {orc_rt_macho_deregister_ehframe_section.getValue(), R.getStart(), - R.getSize()}}); + {{orc_rt_macho_register_ehframe_section, R.getStart(), R.getSize()}, + {orc_rt_macho_deregister_ehframe_section, R.getStart(), R.getSize()}}); return Error::success(); } diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index 0d6a33c5685e..8b4347f5cf52 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -217,7 +217,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { Flags |= JITSymbolFlags::Exported; InternedResult[InternedName] = - JITEvaluatedSymbol(Sym->getAddress(), Flags); + JITEvaluatedSymbol(Sym->getAddress().getValue(), Flags); if (AutoClaim && !MR->getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); @@ -235,7 +235,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { if (Sym->getLinkage() == Linkage::Weak) Flags |= JITSymbolFlags::Weak; InternedResult[InternedName] = - JITEvaluatedSymbol(Sym->getAddress(), Flags); + JITEvaluatedSymbol(Sym->getAddress().getValue(), Flags); if (AutoClaim && !MR->getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); @@ -743,7 +743,7 @@ void EHFrameRegistrationPlugin::modifyPassConfig( PassConfiguration &PassConfig) { PassConfig.PostFixupPasses.push_back(createEHFrameRecorderPass( - G.getTargetTriple(), [this, &MR](JITTargetAddress Addr, size_t Size) { + G.getTargetTriple(), [this, &MR](ExecutorAddr Addr, size_t Size) { if (Addr) { std::lock_guard Lock(EHFramePluginMutex); assert(!InProcessLinks.count(&MR) && diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp index 5efdff65f566..d79dbc410e8e 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp @@ -120,8 +120,8 @@ Error registerELFGraphInfo(Session &S, LinkGraph &G) { // then add it to the GOT entry info table. if (Sym->getSize() != 0) { if (auto TS = getELFGOTTarget(G, Sym->getBlock())) - FileInfo.GOTEntryInfos[TS->getName()] = {Sym->getSymbolContent(), - Sym->getAddress()}; + FileInfo.GOTEntryInfos[TS->getName()] = { + Sym->getSymbolContent(), Sym->getAddress().getValue()}; else return TS.takeError(); } @@ -133,7 +133,7 @@ Error registerELFGraphInfo(Session &S, LinkGraph &G) { if (auto TS = getELFStubTarget(G, Sym->getBlock())) FileInfo.StubInfos[TS->getName()] = {Sym->getSymbolContent(), - Sym->getAddress()}; + Sym->getAddress().getValue()}; else return TS.takeError(); SectionContainsContent = true; @@ -141,18 +141,19 @@ Error registerELFGraphInfo(Session &S, LinkGraph &G) { if (Sym->hasName()) { if (Sym->isSymbolZeroFill()) { - S.SymbolInfos[Sym->getName()] = {Sym->getSize(), Sym->getAddress()}; + S.SymbolInfos[Sym->getName()] = {Sym->getSize(), + Sym->getAddress().getValue()}; SectionContainsZeroFill = true; } else { S.SymbolInfos[Sym->getName()] = {Sym->getSymbolContent(), - Sym->getAddress()}; + Sym->getAddress().getValue()}; SectionContainsContent = true; } } } - JITTargetAddress SecAddr = FirstSym->getAddress(); - uint64_t SecSize = + auto SecAddr = FirstSym->getAddress(); + auto SecSize = (LastSym->getBlock().getAddress() + LastSym->getBlock().getSize()) - SecAddr; @@ -161,11 +162,11 @@ Error registerELFGraphInfo(Session &S, LinkGraph &G) { "supported yet", inconvertibleErrorCode()); if (SectionContainsZeroFill) - FileInfo.SectionInfos[Sec.getName()] = {SecSize, SecAddr}; + FileInfo.SectionInfos[Sec.getName()] = {SecSize, SecAddr.getValue()}; else FileInfo.SectionInfos[Sec.getName()] = { ArrayRef(FirstSym->getBlock().getContent().data(), SecSize), - SecAddr}; + SecAddr.getValue()}; } return Error::success(); diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink-macho.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink-macho.cpp index 7bd6bded5b7f..ed7fd1a57a72 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink-macho.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink-macho.cpp @@ -118,8 +118,8 @@ Error registerMachOGraphInfo(Session &S, LinkGraph &G) { inconvertibleErrorCode()); if (auto TS = getMachOGOTTarget(G, Sym->getBlock())) - FileInfo.GOTEntryInfos[TS->getName()] = {Sym->getSymbolContent(), - Sym->getAddress()}; + FileInfo.GOTEntryInfos[TS->getName()] = { + Sym->getSymbolContent(), Sym->getAddress().getValue()}; else return TS.takeError(); SectionContainsContent = true; @@ -130,24 +130,25 @@ Error registerMachOGraphInfo(Session &S, LinkGraph &G) { if (auto TS = getMachOStubTarget(G, Sym->getBlock())) FileInfo.StubInfos[TS->getName()] = {Sym->getSymbolContent(), - Sym->getAddress()}; + Sym->getAddress().getValue()}; else return TS.takeError(); SectionContainsContent = true; } else if (Sym->hasName()) { if (Sym->isSymbolZeroFill()) { - S.SymbolInfos[Sym->getName()] = {Sym->getSize(), Sym->getAddress()}; + S.SymbolInfos[Sym->getName()] = {Sym->getSize(), + Sym->getAddress().getValue()}; SectionContainsZeroFill = true; } else { S.SymbolInfos[Sym->getName()] = {Sym->getSymbolContent(), - Sym->getAddress()}; + Sym->getAddress().getValue()}; SectionContainsContent = true; } } } - JITTargetAddress SecAddr = FirstSym->getAddress(); - uint64_t SecSize = + auto SecAddr = FirstSym->getAddress(); + auto SecSize = (LastSym->getBlock().getAddress() + LastSym->getBlock().getSize()) - SecAddr; @@ -156,11 +157,11 @@ Error registerMachOGraphInfo(Session &S, LinkGraph &G) { "supported yet", inconvertibleErrorCode()); if (SectionContainsZeroFill) - FileInfo.SectionInfos[Sec.getName()] = {SecSize, SecAddr}; + FileInfo.SectionInfos[Sec.getName()] = {SecSize, SecAddr.getValue()}; else FileInfo.SectionInfos[Sec.getName()] = { ArrayRef(FirstSym->getBlock().getContent().data(), SecSize), - SecAddr}; + SecAddr.getValue()}; } return Error::success(); diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index e6588090625e..7678a85b836f 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -327,7 +327,7 @@ static uint64_t computeTotalBlockSizes(LinkGraph &G) { } static void dumpSectionContents(raw_ostream &OS, LinkGraph &G) { - constexpr JITTargetAddress DumpWidth = 16; + constexpr orc::ExecutorAddrDiff DumpWidth = 16; static_assert(isPowerOf2_64(DumpWidth), "DumpWidth must be a power of two"); // Put sections in address order. @@ -360,12 +360,13 @@ static void dumpSectionContents(raw_ostream &OS, LinkGraph &G) { return LHS->getAddress() < RHS->getAddress(); }); - JITTargetAddress NextAddr = Syms.front()->getAddress() & ~(DumpWidth - 1); + orc::ExecutorAddr NextAddr(Syms.front()->getAddress().getValue() & + ~(DumpWidth - 1)); for (auto *Sym : Syms) { bool IsZeroFill = Sym->getBlock().isZeroFill(); - JITTargetAddress SymStart = Sym->getAddress(); - JITTargetAddress SymSize = Sym->getSize(); - JITTargetAddress SymEnd = SymStart + SymSize; + auto SymStart = Sym->getAddress(); + auto SymSize = Sym->getSize(); + auto SymEnd = SymStart + SymSize; const uint8_t *SymData = IsZeroFill ? nullptr : reinterpret_cast( Sym->getSymbolContent().data()); @@ -433,8 +434,8 @@ class JITLinkSlabAllocator final : public JITLinkMemoryManager { assert(BL.graphAllocActions().empty() && "Support function calls not supported yet"); - OnFinalized(FinalizedAlloc( - pointerToJITTargetAddress(new FinalizedAllocInfo()))); + OnFinalized( + FinalizedAlloc(ExecutorAddr::fromPtr(new FinalizedAllocInfo()))); } void abandon(OnAbandonedFunction OnAbandoned) override { @@ -500,8 +501,8 @@ class JITLinkSlabAllocator final : public JITLinkMemoryManager { sys::MemoryBlock FinalizeSegs(AllocBase + SegsSizes->StandardSegs, SegsSizes->FinalizeSegs); - auto NextStandardSegAddr = pointerToJITTargetAddress(StandardSegs.base()); - auto NextFinalizeSegAddr = pointerToJITTargetAddress(FinalizeSegs.base()); + auto NextStandardSegAddr = ExecutorAddr::fromPtr(StandardSegs.base()); + auto NextFinalizeSegAddr = ExecutorAddr::fromPtr(FinalizeSegs.base()); LLVM_DEBUG({ dbgs() << "JITLinkSlabAllocator allocated:\n"; @@ -532,7 +533,7 @@ class JITLinkSlabAllocator final : public JITLinkMemoryManager { dbgs() << " " << Group << " -> " << formatv("{0:x16}", SegAddr) << "\n"; }); - Seg.WorkingMem = jitTargetAddressToPointer(SegAddr); + Seg.WorkingMem = SegAddr.toPtr(); Seg.Addr = SegAddr + NextSlabDelta; SegAddr += alignTo(Seg.ContentSize + Seg.ZeroFillSize, PageSize); @@ -559,7 +560,7 @@ class JITLinkSlabAllocator final : public JITLinkMemoryManager { Error Err = Error::success(); for (auto &FA : FinalizedAllocs) { std::unique_ptr FAI( - jitTargetAddressToPointer(FA.release())); + FA.release().toPtr()); // FIXME: Run dealloc actions. @@ -613,8 +614,8 @@ class JITLinkSlabAllocator final : public JITLinkMemoryManager { // Calculate the target address delta to link as-if slab were at // SlabAddress. if (SlabAddress != ~0ULL) - NextSlabDelta = - SlabAddress - pointerToJITTargetAddress(SlabRemaining.base()); + NextSlabDelta = ExecutorAddr(SlabAddress) - + ExecutorAddr::fromPtr(SlabRemaining.base()); } Error freeBlock(sys::MemoryBlock MB) { diff --git a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp index 3cc6a8ad0fe6..fdc6fbdff19b 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp +++ b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp @@ -76,15 +76,16 @@ TEST(LinkGraphTest, AddressAccess) { getGenericEdgeKindName); auto &Sec1 = G.createSection("__data.1", MemProt::Read | MemProt::Write); - auto &B1 = G.createContentBlock(Sec1, BlockContent, 0x1000, 8, 0); + orc::ExecutorAddr B1Addr(0x1000); + auto &B1 = G.createContentBlock(Sec1, BlockContent, B1Addr, 8, 0); auto &S1 = G.addDefinedSymbol(B1, 4, "S1", 4, Linkage::Strong, Scope::Default, false, false); B1.addEdge(Edge::FirstRelocation, 8, S1, 0); auto &E1 = *B1.edges().begin(); - EXPECT_EQ(B1.getAddress(), 0x1000U) << "Incorrect block address"; - EXPECT_EQ(S1.getAddress(), 0x1004U) << "Incorrect symbol address"; - EXPECT_EQ(B1.getFixupAddress(E1), 0x1008U) << "Incorrect fixup address"; + EXPECT_EQ(B1.getAddress(), B1Addr) << "Incorrect block address"; + EXPECT_EQ(S1.getAddress(), B1Addr + 4) << "Incorrect symbol address"; + EXPECT_EQ(B1.getFixupAddress(E1), B1Addr + 8) << "Incorrect fixup address"; } TEST(LinkGraphTest, BlockAndSymbolIteration) { @@ -92,16 +93,20 @@ TEST(LinkGraphTest, BlockAndSymbolIteration) { LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, getGenericEdgeKindName); auto &Sec1 = G.createSection("__data.1", MemProt::Read | MemProt::Write); - auto &B1 = G.createContentBlock(Sec1, BlockContent, 0x1000, 8, 0); - auto &B2 = G.createContentBlock(Sec1, BlockContent, 0x2000, 8, 0); + orc::ExecutorAddr B1Addr(0x1000); + auto &B1 = G.createContentBlock(Sec1, BlockContent, B1Addr, 8, 0); + orc::ExecutorAddr B2Addr(0x1000); + auto &B2 = G.createContentBlock(Sec1, BlockContent, B2Addr, 8, 0); auto &S1 = G.addDefinedSymbol(B1, 0, "S1", 4, Linkage::Strong, Scope::Default, false, false); auto &S2 = G.addDefinedSymbol(B2, 4, "S2", 4, Linkage::Strong, Scope::Default, false, false); auto &Sec2 = G.createSection("__data.2", MemProt::Read | MemProt::Write); - auto &B3 = G.createContentBlock(Sec2, BlockContent, 0x3000, 8, 0); - auto &B4 = G.createContentBlock(Sec2, BlockContent, 0x4000, 8, 0); + orc::ExecutorAddr B3Addr(0x3000); + auto &B3 = G.createContentBlock(Sec2, BlockContent, B3Addr, 8, 0); + orc::ExecutorAddr B4Addr(0x4000); + auto &B4 = G.createContentBlock(Sec2, BlockContent, B4Addr, 8, 0); auto &S3 = G.addDefinedSymbol(B3, 0, "S3", 4, Linkage::Strong, Scope::Default, false, false); auto &S4 = G.addDefinedSymbol(B4, 4, "S4", 4, Linkage::Strong, Scope::Default, @@ -141,7 +146,8 @@ TEST(LinkGraphTest, ContentAccessAndUpdate) { auto &Sec = G.createSection("__data", MemProt::Read | MemProt::Write); // Create an initial block. - auto &B = G.createContentBlock(Sec, BlockContent, 0x1000, 8, 0); + orc::ExecutorAddr BAddr(0x1000); + auto &B = G.createContentBlock(Sec, BlockContent, BAddr, 8, 0); EXPECT_FALSE(B.isContentMutable()) << "Content unexpectedly mutable"; EXPECT_EQ(B.getContent().data(), BlockContent.data()) @@ -196,7 +202,8 @@ TEST(LinkGraphTest, ContentAccessAndUpdate) { << "Unexpected block content size"; // Create an initially mutable block. - auto &B2 = G.createMutableContentBlock(Sec, MutableContent, 0x10000, 8, 0); + auto &B2 = G.createMutableContentBlock(Sec, MutableContent, + orc::ExecutorAddr(0x10000), 8, 0); EXPECT_TRUE(B2.isContentMutable()) << "Expected B2 content to be mutable"; } @@ -208,7 +215,8 @@ TEST(LinkGraphTest, MakeExternal) { auto &Sec = G.createSection("__data", MemProt::Read | MemProt::Write); // Create an initial block. - auto &B1 = G.createContentBlock(Sec, BlockContent, 0x1000, 8, 0); + auto &B1 = + G.createContentBlock(Sec, BlockContent, orc::ExecutorAddr(0x1000), 8, 0); // Add a symbol to the block. auto &S1 = G.addDefinedSymbol(B1, 0, "S1", 4, Linkage::Strong, Scope::Default, @@ -218,7 +226,8 @@ TEST(LinkGraphTest, MakeExternal) { EXPECT_FALSE(S1.isExternal()) << "Symbol should not be external"; EXPECT_FALSE(S1.isAbsolute()) << "Symbol should not be absolute"; EXPECT_TRUE(&S1.getBlock()) << "Symbol should have a non-null block"; - EXPECT_EQ(S1.getAddress(), 0x1000U) << "Unexpected symbol address"; + EXPECT_EQ(S1.getAddress(), orc::ExecutorAddr(0x1000)) + << "Unexpected symbol address"; EXPECT_EQ( std::distance(G.defined_symbols().begin(), G.defined_symbols().end()), 1U) @@ -235,7 +244,8 @@ TEST(LinkGraphTest, MakeExternal) { EXPECT_FALSE(S1.isDefined()) << "Symbol should not be defined"; EXPECT_TRUE(S1.isExternal()) << "Symbol should be external"; EXPECT_FALSE(S1.isAbsolute()) << "Symbol should not be absolute"; - EXPECT_EQ(S1.getAddress(), 0U) << "Unexpected symbol address"; + EXPECT_EQ(S1.getAddress(), orc::ExecutorAddr()) + << "Unexpected symbol address"; EXPECT_EQ( std::distance(G.defined_symbols().begin(), G.defined_symbols().end()), 0U) @@ -253,7 +263,8 @@ TEST(LinkGraphTest, MakeDefined) { auto &Sec = G.createSection("__data", MemProt::Read | MemProt::Write); // Create an initial block. - auto &B1 = G.createContentBlock(Sec, BlockContent, 0x1000, 8, 0); + orc::ExecutorAddr B1Addr(0x1000); + auto &B1 = G.createContentBlock(Sec, BlockContent, B1Addr, 8, 0); // Add an external symbol. auto &S1 = G.addExternalSymbol("S1", 4, Linkage::Strong); @@ -261,7 +272,8 @@ TEST(LinkGraphTest, MakeDefined) { EXPECT_FALSE(S1.isDefined()) << "Symbol should not be defined"; EXPECT_TRUE(S1.isExternal()) << "Symbol should be external"; EXPECT_FALSE(S1.isAbsolute()) << "Symbol should not be absolute"; - EXPECT_EQ(S1.getAddress(), 0U) << "Unexpected symbol address"; + EXPECT_EQ(S1.getAddress(), orc::ExecutorAddr()) + << "Unexpected symbol address"; EXPECT_EQ( std::distance(G.defined_symbols().begin(), G.defined_symbols().end()), 0U) @@ -279,7 +291,8 @@ TEST(LinkGraphTest, MakeDefined) { EXPECT_FALSE(S1.isExternal()) << "Symbol should not be external"; EXPECT_FALSE(S1.isAbsolute()) << "Symbol should not be absolute"; EXPECT_TRUE(&S1.getBlock()) << "Symbol should have a non-null block"; - EXPECT_EQ(S1.getAddress(), 0x1000U) << "Unexpected symbol address"; + EXPECT_EQ(S1.getAddress(), orc::ExecutorAddr(0x1000U)) + << "Unexpected symbol address"; EXPECT_EQ( std::distance(G.defined_symbols().begin(), G.defined_symbols().end()), 1U) @@ -296,10 +309,13 @@ TEST(LinkGraphTest, TransferDefinedSymbol) { getGenericEdgeKindName); auto &Sec = G.createSection("__data", MemProt::Read | MemProt::Write); - // Create an initial block. - auto &B1 = G.createContentBlock(Sec, BlockContent, 0x1000, 8, 0); - auto &B2 = G.createContentBlock(Sec, BlockContent, 0x2000, 8, 0); - auto &B3 = G.createContentBlock(Sec, BlockContent.slice(0, 32), 0x3000, 8, 0); + // Create initial blocks. + orc::ExecutorAddr B1Addr(0x1000); + auto &B1 = G.createContentBlock(Sec, BlockContent, B1Addr, 8, 0); + orc::ExecutorAddr B2Addr(0x2000); + auto &B2 = G.createContentBlock(Sec, BlockContent, B2Addr, 8, 0); + orc::ExecutorAddr B3Addr(0x3000); + auto &B3 = G.createContentBlock(Sec, BlockContent.slice(0, 32), B3Addr, 8, 0); // Add a symbol. auto &S1 = G.addDefinedSymbol(B1, 0, "S1", B1.getSize(), Linkage::Strong, @@ -329,8 +345,10 @@ TEST(LinkGraphTest, TransferDefinedSymbolAcrossSections) { auto &Sec2 = G.createSection("__data.2", MemProt::Read | MemProt::Write); // Create blocks in each section. - auto &B1 = G.createContentBlock(Sec1, BlockContent, 0x1000, 8, 0); - auto &B2 = G.createContentBlock(Sec2, BlockContent, 0x2000, 8, 0); + orc::ExecutorAddr B1Addr(0x1000); + auto &B1 = G.createContentBlock(Sec1, BlockContent, B1Addr, 8, 0); + orc::ExecutorAddr B2Addr(0x2000); + auto &B2 = G.createContentBlock(Sec2, BlockContent, B2Addr, 8, 0); // Add a symbol to section 1. auto &S1 = G.addDefinedSymbol(B1, 0, "S1", B1.getSize(), Linkage::Strong, @@ -359,8 +377,10 @@ TEST(LinkGraphTest, TransferBlock) { auto &Sec2 = G.createSection("__data.2", MemProt::Read | MemProt::Write); // Create an initial block. - auto &B1 = G.createContentBlock(Sec1, BlockContent, 0x1000, 8, 0); - auto &B2 = G.createContentBlock(Sec1, BlockContent, 0x2000, 8, 0); + orc::ExecutorAddr B1Addr(0x1000); + auto &B1 = G.createContentBlock(Sec1, BlockContent, B1Addr, 8, 0); + orc::ExecutorAddr B2Addr(0x2000); + auto &B2 = G.createContentBlock(Sec1, BlockContent, B2Addr, 8, 0); // Add some symbols on B1... G.addDefinedSymbol(B1, 0, "S1", B1.getSize(), Linkage::Strong, Scope::Default, @@ -404,9 +424,12 @@ TEST(LinkGraphTest, MergeSections) { auto &Sec3 = G.createSection("__data.3", MemProt::Read | MemProt::Write); // Create an initial block. - auto &B1 = G.createContentBlock(Sec1, BlockContent, 0x1000, 8, 0); - auto &B2 = G.createContentBlock(Sec2, BlockContent, 0x2000, 8, 0); - auto &B3 = G.createContentBlock(Sec3, BlockContent, 0x3000, 8, 0); + orc::ExecutorAddr B1Addr(0x1000); + auto &B1 = G.createContentBlock(Sec1, BlockContent, B1Addr, 8, 0); + orc::ExecutorAddr B2Addr(0x2000); + auto &B2 = G.createContentBlock(Sec2, BlockContent, B2Addr, 8, 0); + orc::ExecutorAddr B3Addr(0x3000); + auto &B3 = G.createContentBlock(Sec3, BlockContent, B3Addr, 8, 0); // Add a symbols for each block. G.addDefinedSymbol(B1, 0, "S1", B1.getSize(), Linkage::Strong, Scope::Default, @@ -482,7 +505,8 @@ TEST(LinkGraphTest, SplitBlock) { auto &Sec = G.createSection("__data", MemProt::Read | MemProt::Write); // Create the block to split. - auto &B1 = G.createContentBlock(Sec, BlockContent, 0x1000, 8, 0); + orc::ExecutorAddr B1Addr(0x1000); + auto &B1 = G.createContentBlock(Sec, BlockContent, B1Addr, 8, 0); // Add some symbols to the block. auto &S1 = G.addDefinedSymbol(B1, 0, "S1", 4, Linkage::Strong, Scope::Default, @@ -499,7 +523,8 @@ TEST(LinkGraphTest, SplitBlock) { // Add an extra block, EB, and target symbols, and use these to add edges // from B1 to EB. - auto &EB = G.createContentBlock(Sec, BlockContent, 0x2000, 8, 0); + orc::ExecutorAddr EBAddr(0x2000); + auto &EB = G.createContentBlock(Sec, BlockContent, EBAddr, 8, 0); auto &ES1 = G.addDefinedSymbol(EB, 0, "TS1", 4, Linkage::Strong, Scope::Default, false, false); auto &ES2 = G.addDefinedSymbol(EB, 4, "TS2", 4, Linkage::Strong, @@ -519,10 +544,10 @@ TEST(LinkGraphTest, SplitBlock) { auto &B2 = G.splitBlock(B1, 8); // Check that the block addresses and content matches what we would expect. - EXPECT_EQ(B1.getAddress(), 0x1008U); + EXPECT_EQ(B1.getAddress(), B1Addr + 8); EXPECT_EQ(B1.getContent(), BlockContent.slice(8)); - EXPECT_EQ(B2.getAddress(), 0x1000U); + EXPECT_EQ(B2.getAddress(), B1Addr); EXPECT_EQ(B2.getContent(), BlockContent.slice(0, 8)); // Check that symbols in B1 were transferred as expected: diff --git a/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp index 0181c558b60d..1f638f407c48 100644 --- a/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp @@ -44,7 +44,8 @@ TEST_F(ObjectLinkingLayerTest, AddLinkGraph) { support::little, x86_64::getEdgeKindName); auto &Sec1 = G->createSection("__data", MemProt::Read | MemProt::Write); - auto &B1 = G->createContentBlock(Sec1, BlockContent, 0x1000, 8, 0); + auto &B1 = G->createContentBlock(Sec1, BlockContent, + orc::ExecutorAddr(0x1000), 8, 0); G->addDefinedSymbol(B1, 4, "_X", 4, Linkage::Strong, Scope::Default, false, false); From 3a1a0d4957ece964a88b9912f4a86f651f026392 Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Wed, 5 Jan 2022 10:21:13 -0800 Subject: [PATCH 757/992] [lldb] Add StringList::AppendString(const Twine&) (NFC) Add a convenience for appending constructed string values. Differential Revision: https://reviews.llvm.org/D116682 --- lldb/include/lldb/Utility/StringList.h | 3 +++ lldb/source/Utility/StringList.cpp | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/lldb/include/lldb/Utility/StringList.h b/lldb/include/lldb/Utility/StringList.h index 70f4654a6ac9..1357cf17173a 100644 --- a/lldb/include/lldb/Utility/StringList.h +++ b/lldb/include/lldb/Utility/StringList.h @@ -10,6 +10,7 @@ #define LLDB_UTILITY_STRINGLIST_H #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include #include @@ -44,6 +45,8 @@ class StringList { void AppendString(llvm::StringRef str); + void AppendString(const llvm::Twine &str); + void AppendList(const char **strv, int strc); void AppendList(StringList strings); diff --git a/lldb/source/Utility/StringList.cpp b/lldb/source/Utility/StringList.cpp index f78681c05a3d..ee1f157f16f1 100644 --- a/lldb/source/Utility/StringList.cpp +++ b/lldb/source/Utility/StringList.cpp @@ -55,6 +55,10 @@ void StringList::AppendString(llvm::StringRef str) { m_strings.push_back(str.str()); } +void StringList::AppendString(const llvm::Twine &str) { + m_strings.push_back(str.str()); +} + void StringList::AppendList(const char **strv, int strc) { for (int i = 0; i < strc; ++i) { if (strv[i]) From 9eeb2c98f4528856c63618e3ef94529fa6f4ff11 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Wed, 5 Jan 2022 19:24:00 -0800 Subject: [PATCH 758/992] [msan] Check for AVX regs using offset glibc versions < 2.26 use different names for the fields. However the layout is unchanged, so using the offset should be a portable way to address this issue across platforms. Fixes: https://github.com/llvm/llvm-project/issues/53014 Patch By: paulkirth Differential Revision: https://reviews.llvm.org/D116695 --- .../sanitizer_platform_limits_posix.cpp | 10 +++++++--- compiler-rt/test/msan/Linux/signal_mcontext.cpp | 7 ++++++- compiler-rt/test/msan/Linux/signal_mcontext2.cpp | 7 ++++++- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp index 0ffbb816bb88..349cdbcda6d9 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp @@ -224,9 +224,13 @@ namespace __sanitizer { # if SANITIZER_LINUX && SANITIZER_X64 // See kernel arch/x86/kernel/fpu/signal.c for details. const auto *fpregs = static_cast(ctx)->uc_mcontext.fpregs; - if (fpregs->__glibc_reserved1[12] == FP_XSTATE_MAGIC1) - return reinterpret_cast(fpregs) + - fpregs->__glibc_reserved1[13] - static_cast(ctx); + // The member names differ across header versions, but the actual layout + // is always the same. So avoid using members, just use arithmetic. + const uint32_t *after_xmm = + reinterpret_cast(fpregs + 1) - 24; + if (after_xmm[12] == FP_XSTATE_MAGIC1) + return reinterpret_cast(fpregs) + after_xmm[13] - + static_cast(ctx); # endif return sizeof(ucontext_t); } diff --git a/compiler-rt/test/msan/Linux/signal_mcontext.cpp b/compiler-rt/test/msan/Linux/signal_mcontext.cpp index f1184a4b6943..932b4b8c0977 100644 --- a/compiler-rt/test/msan/Linux/signal_mcontext.cpp +++ b/compiler-rt/test/msan/Linux/signal_mcontext.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -12,7 +13,11 @@ void handler(int sig, siginfo_t *info, void *uctx) { #if defined(__x86_64__) auto *mctx = &static_cast(uctx)->uc_mcontext; if (auto *fpregs = mctx->fpregs) { - if (fpregs->__glibc_reserved1[12] == FP_XSTATE_MAGIC1) { + // The member names differ across header versions, but the actual layout + // is always the same. So avoid using members, just use arithmetic. + const uint32_t *after_xmm = + reinterpret_cast(fpregs + 1) - 24; + if (after_xmm[12] == FP_XSTATE_MAGIC1) { auto *xstate = reinterpret_cast<_xstate *>(mctx->fpregs); __msan_check_mem_is_initialized(xstate, sizeof(*xstate)); } diff --git a/compiler-rt/test/msan/Linux/signal_mcontext2.cpp b/compiler-rt/test/msan/Linux/signal_mcontext2.cpp index ec75d2c94ac8..6bb6740c6fa7 100644 --- a/compiler-rt/test/msan/Linux/signal_mcontext2.cpp +++ b/compiler-rt/test/msan/Linux/signal_mcontext2.cpp @@ -4,13 +4,18 @@ #include #include +#include #include void handler(int sig, siginfo_t *info, void *uctx) { volatile int uninit; auto *mctx = &static_cast(uctx)->uc_mcontext; auto *fpregs = mctx->fpregs; - if (fpregs && fpregs->__glibc_reserved1[12] == FP_XSTATE_MAGIC1) + // The member names differ across header versions, but the actual layout + // is always the same. So avoid using members, just use arithmetic. + const uint32_t *after_xmm = + reinterpret_cast(fpregs + 1) - 24; + if (after_xmm[12] == FP_XSTATE_MAGIC1) reinterpret_cast<_xstate *>(mctx->fpregs)->ymmh.ymmh_space[0] = uninit; else mctx->gregs[REG_RAX] = uninit; From 15702ff9ce28b3f4aafec13be561359d4c721595 Mon Sep 17 00:00:00 2001 From: Congzhe Cao Date: Wed, 5 Jan 2022 22:17:03 -0500 Subject: [PATCH 759/992] [LoopInterchange] Remove a limitation in LoopInterchange legality There was a limitation in legality that in the original inner loop latch, no instruction was allowed between the induction variable increment and the branch instruction. This is because we used to split the inner latch at the induction variable increment instruction. Since now we have split at the inner latch branch instruction and have properly duplicated instructions over to the split block, we remove this limitation. Please refer to the test case updates to see how we now interchange loops where instructions exist between the induction variable increment and the branch instruction. Reviewed By: bmahjour Differential Revision: https://reviews.llvm.org/D115238 --- .../lib/Transforms/Scalar/LoopInterchange.cpp | 72 ------------------- .../LoopInterchange/currentLimitation.ll | 9 +-- .../LoopInterchange/interchangeable.ll | 20 ++++-- .../loop-interchange-optimization-remarks.ll | 6 +- 4 files changed, 21 insertions(+), 86 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 75b52a431e32..f03862cdf421 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -888,78 +888,6 @@ bool LoopInterchangeLegality::currentLimitations() { return true; } - // TODO: Current limitation: Since we split the inner loop latch at the point - // were induction variable is incremented (induction.next); We cannot have - // more than 1 user of induction.next since it would result in broken code - // after split. - // e.g. - // for(i=0;igetIncomingBlock(0) == InnerLoopPreHeader) - InnerIndexVarInc = - dyn_cast(InnerInductionVar->getIncomingValue(1)); - else - InnerIndexVarInc = - dyn_cast(InnerInductionVar->getIncomingValue(0)); - - if (!InnerIndexVarInc) { - LLVM_DEBUG( - dbgs() << "Did not find an instruction to increment the induction " - << "variable.\n"); - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NoIncrementInInner", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "The inner loop does not increment the induction variable."; - }); - return true; - } - - // Since we split the inner loop latch on this induction variable. Make sure - // we do not have any instruction between the induction variable and branch - // instruction. - - bool FoundInduction = false; - for (const Instruction &I : - llvm::reverse(InnerLoopLatch->instructionsWithoutDebug())) { - if (isa(I) || isa(I) || isa(I) || - isa(I)) - continue; - - // We found an instruction. If this is not induction variable then it is not - // safe to split this loop latch. - if (!I.isIdenticalTo(InnerIndexVarInc)) { - LLVM_DEBUG(dbgs() << "Found unsupported instructions between induction " - << "variable increment and branch.\n"); - ORE->emit([&]() { - return OptimizationRemarkMissed( - DEBUG_TYPE, "UnsupportedInsBetweenInduction", - InnerLoop->getStartLoc(), InnerLoop->getHeader()) - << "Found unsupported instruction between induction variable " - "increment and branch."; - }); - return true; - } - - FoundInduction = true; - break; - } - // The loop latch ended and we didn't find the induction variable return as - // current limitation. - if (!FoundInduction) { - LLVM_DEBUG(dbgs() << "Did not find the induction variable.\n"); - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NoIndutionVariable", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Did not find the induction variable."; - }); - return true; - } return false; } diff --git a/llvm/test/Transforms/LoopInterchange/currentLimitation.ll b/llvm/test/Transforms/LoopInterchange/currentLimitation.ll index 768dd3bb2e23..82c16555f44f 100644 --- a/llvm/test/Transforms/LoopInterchange/currentLimitation.ll +++ b/llvm/test/Transforms/LoopInterchange/currentLimitation.ll @@ -15,19 +15,16 @@ target triple = "x86_64-unknown-linux-gnu" @C = common global [100 x [100 x i64]] zeroinitializer ;;--------------------------------------Test case 01------------------------------------ -;; [FIXME] This loop though valid is currently not interchanged due to the limitation that we cannot split the inner loop latch due to multiple use of inner induction -;; variable.(used to increment the loop counter and to access A[j+1][i+1] +;; This loop can be interchanged with -da-disable-delinearization-checks, otherwise it cannot +;; be interchanged due to dependence. ;; for(int i=0;i Date: Wed, 5 Jan 2022 20:03:52 -0800 Subject: [PATCH 760/992] [CMake] Disable LLVM_INCLUDE_GO_TESTS by default llvm/test/Bindings/Go is quite flaky in the past few months and nobody fixes it. See * https://lists.llvm.org/pipermail/llvm-dev/2021-December/154353.html "Suggestions on debugging pre-merge test failure that looks irrelevant." * https://github.com/llvm/llvm-project/issues/53017 Reviewed By: aeubanks Differential Revision: https://reviews.llvm.org/D116698 --- llvm/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 672183f62da0..af32d35521a3 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -593,7 +593,7 @@ endif(LLVM_BUILD_EXAMPLES) option(LLVM_BUILD_TESTS "Build LLVM unit tests. If OFF, just generate build targets." OFF) option(LLVM_INCLUDE_TESTS "Generate build targets for the LLVM unit tests." ON) -option(LLVM_INCLUDE_GO_TESTS "Include the Go bindings tests in test build targets." ON) +option(LLVM_INCLUDE_GO_TESTS "Include the Go bindings tests in test build targets." OFF) option(LLVM_BUILD_BENCHMARKS "Add LLVM benchmark targets to the list of default targets. If OFF, benchmarks still could be built using Benchmarks target." OFF) From aab62aab043162a03e2693dca1be2194bccdeee4 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 5 Jan 2022 23:04:17 -0500 Subject: [PATCH 761/992] [OpenMP][Offloading] Fixed a crash caused by dereferencing nullptr In function `DeviceTy::getTargetPointer`, `Entry` could be `nullptr` because of zero length array section. We need to check if it is a valid iterator before using it. Reviewed By: ronlieb Differential Revision: https://reviews.llvm.org/D116716 --- openmp/libomptarget/src/device.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp index 75935b30520c..738284e82f20 100644 --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -305,7 +305,9 @@ DeviceTy::getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size, DataMapMtx.unlock(); // If not a host pointer and no present modifier, we need to wait for the // event if it exists. - if (!IsHostPtr && !HasPresentModifier) { + // Note: Entry might be nullptr because of zero length array section. + if (Entry != HostDataToTargetListTy::iterator() && !IsHostPtr && + !HasPresentModifier) { Entry->lock(); void *Event = Entry->getEvent(); if (Event) { From c5965a411c635106a47738b8d2e24db822b7416f Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 6 Jan 2022 15:18:40 +1100 Subject: [PATCH 762/992] Revert "[JITLink] Update JITLink to use ExecutorAddr rather than..." This reverts commit 133f86e95492b2a00b944e070878424cfa73f87c while I investigate the bot failures at https://lab.llvm.org/buildbot#builders/186/builds/3370. --- .../LLJITWithObjectLinkingLayerPlugin.cpp | 11 +- .../ExecutionEngine/JITLink/EHFrameSupport.h | 13 +- .../llvm/ExecutionEngine/JITLink/JITLink.h | 140 ++++++++---------- .../JITLink/JITLinkMemoryManager.h | 40 +++-- .../llvm/ExecutionEngine/JITLink/x86_64.h | 22 +-- .../llvm/ExecutionEngine/Orc/ELFNixPlatform.h | 2 +- .../ExecutionEngine/Orc/EPCEHFrameRegistrar.h | 4 +- .../Orc/EPCGenericJITLinkMemoryManager.h | 2 +- .../llvm/ExecutionEngine/Orc/MachOPlatform.h | 2 +- .../ExecutionEngine/Orc/ObjectLinkingLayer.h | 2 +- .../Orc/Shared/ExecutorAddress.h | 45 +----- .../DefineExternalSectionStartAndEndSymbols.h | 4 +- .../JITLink/EHFrameSupport.cpp | 56 +++---- .../JITLink/EHFrameSupportImpl.h | 11 +- .../JITLink/ELFLinkGraphBuilder.h | 14 +- .../ExecutionEngine/JITLink/ELF_aarch64.cpp | 8 +- .../lib/ExecutionEngine/JITLink/ELF_riscv.cpp | 27 ++-- .../ExecutionEngine/JITLink/ELF_x86_64.cpp | 11 +- llvm/lib/ExecutionEngine/JITLink/JITLink.cpp | 37 +++-- .../JITLink/JITLinkGeneric.cpp | 9 +- .../JITLink/JITLinkMemoryManager.cpp | 22 +-- .../JITLink/MachOLinkGraphBuilder.cpp | 82 +++++----- .../JITLink/MachOLinkGraphBuilder.h | 14 +- .../ExecutionEngine/JITLink/MachO_arm64.cpp | 44 +++--- .../ExecutionEngine/JITLink/MachO_x86_64.cpp | 22 +-- .../JITLink/PerGraphGOTAndPLTStubsBuilder.h | 8 +- llvm/lib/ExecutionEngine/JITLink/x86_64.cpp | 10 +- .../Orc/DebugObjectManagerPlugin.cpp | 6 +- .../Orc/DebuggerSupportPlugin.cpp | 24 ++- .../ExecutionEngine/Orc/ELFNixPlatform.cpp | 12 +- .../Orc/EPCEHFrameRegistrar.cpp | 10 +- .../Orc/EPCGenericJITLinkMemoryManager.cpp | 4 +- .../Orc/EPCIndirectionUtils.cpp | 23 ++- .../ExecutionEngine/Orc/IndirectionUtils.cpp | 8 +- .../lib/ExecutionEngine/Orc/MachOPlatform.cpp | 38 ++--- .../Orc/ObjectLinkingLayer.cpp | 6 +- llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp | 19 ++- .../tools/llvm-jitlink/llvm-jitlink-macho.cpp | 19 ++- llvm/tools/llvm-jitlink/llvm-jitlink.cpp | 27 ++-- .../JITLink/LinkGraphTests.cpp | 87 ++++------- .../Orc/ObjectLinkingLayerTest.cpp | 3 +- 41 files changed, 429 insertions(+), 519 deletions(-) diff --git a/llvm/examples/OrcV2Examples/LLJITWithObjectLinkingLayerPlugin/LLJITWithObjectLinkingLayerPlugin.cpp b/llvm/examples/OrcV2Examples/LLJITWithObjectLinkingLayerPlugin/LLJITWithObjectLinkingLayerPlugin.cpp index 5a41a7c1e8a6..2215e2507db3 100644 --- a/llvm/examples/OrcV2Examples/LLJITWithObjectLinkingLayerPlugin/LLJITWithObjectLinkingLayerPlugin.cpp +++ b/llvm/examples/OrcV2Examples/LLJITWithObjectLinkingLayerPlugin/LLJITWithObjectLinkingLayerPlugin.cpp @@ -100,15 +100,14 @@ class MyPlugin : public ObjectLinkingLayer::Plugin { return; } - ExecutorAddr InitAddr(B.getAddress().getValue() & ~(LineWidth - 1)); - ExecutorAddr StartAddr = B.getAddress(); - ExecutorAddr EndAddr = B.getAddress() + B.getSize(); + JITTargetAddress InitAddr = B.getAddress() & ~(LineWidth - 1); + JITTargetAddress StartAddr = B.getAddress(); + JITTargetAddress EndAddr = B.getAddress() + B.getSize(); auto *Data = reinterpret_cast(B.getContent().data()); - for (ExecutorAddr CurAddr = InitAddr; CurAddr != EndAddr; ++CurAddr) { + for (JITTargetAddress CurAddr = InitAddr; CurAddr != EndAddr; ++CurAddr) { if (CurAddr % LineWidth == 0) - outs() << " " << formatv("{0:x16}", CurAddr.getValue()) - << ": "; + outs() << " " << formatv("{0:x16}", CurAddr) << ": "; if (CurAddr < StartAddr) outs() << " "; else diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h b/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h index e834042f8bb2..ec78d9db40b6 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h @@ -25,24 +25,25 @@ namespace jitlink { class EHFrameRegistrar { public: virtual ~EHFrameRegistrar(); - virtual Error registerEHFrames(orc::ExecutorAddr EHFrameSectionAddr, + virtual Error registerEHFrames(JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) = 0; - virtual Error deregisterEHFrames(orc::ExecutorAddr EHFrameSectionAddr, + virtual Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) = 0; }; /// Registers / Deregisters EH-frames in the current process. class InProcessEHFrameRegistrar final : public EHFrameRegistrar { public: - Error registerEHFrames(orc::ExecutorAddr EHFrameSectionAddr, + Error registerEHFrames(JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) override; - Error deregisterEHFrames(orc::ExecutorAddr EHFrameSectionAddr, + Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) override; }; -using StoreFrameRangeFunction = std::function; +using StoreFrameRangeFunction = + std::function; /// Creates a pass that records the address and size of the EH frame section. /// If no eh-frame section is found then the address and size will both be given diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index d0d497b75d9d..69106fcb4c28 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -104,10 +104,10 @@ class Addressable { friend class LinkGraph; protected: - Addressable(orc::ExecutorAddr Address, bool IsDefined) + Addressable(JITTargetAddress Address, bool IsDefined) : Address(Address), IsDefined(IsDefined), IsAbsolute(false) {} - Addressable(orc::ExecutorAddr Address) + Addressable(JITTargetAddress Address) : Address(Address), IsDefined(false), IsAbsolute(true) { assert(!(IsDefined && IsAbsolute) && "Block cannot be both defined and absolute"); @@ -119,8 +119,8 @@ class Addressable { Addressable(Addressable &&) = delete; Addressable &operator=(Addressable &&) = default; - orc::ExecutorAddr getAddress() const { return Address; } - void setAddress(orc::ExecutorAddr Address) { this->Address = Address; } + JITTargetAddress getAddress() const { return Address; } + void setAddress(JITTargetAddress Address) { this->Address = Address; } /// Returns true if this is a defined addressable, in which case you /// can downcast this to a Block. @@ -133,7 +133,7 @@ class Addressable { this->IsAbsolute = IsAbsolute; } - orc::ExecutorAddr Address; + JITTargetAddress Address = 0; uint64_t IsDefined : 1; uint64_t IsAbsolute : 1; @@ -152,7 +152,7 @@ class Block : public Addressable { private: /// Create a zero-fill defined addressable. - Block(Section &Parent, orc::ExecutorAddrDiff Size, orc::ExecutorAddr Address, + Block(Section &Parent, JITTargetAddress Size, JITTargetAddress Address, uint64_t Alignment, uint64_t AlignmentOffset) : Addressable(Address, true), Parent(&Parent), Size(Size) { assert(isPowerOf2_64(Alignment) && "Alignment must be power of 2"); @@ -168,7 +168,7 @@ class Block : public Addressable { /// Create a defined addressable for the given content. /// The Content is assumed to be non-writable, and will be copied when /// mutations are required. - Block(Section &Parent, ArrayRef Content, orc::ExecutorAddr Address, + Block(Section &Parent, ArrayRef Content, JITTargetAddress Address, uint64_t Alignment, uint64_t AlignmentOffset) : Addressable(Address, true), Parent(&Parent), Data(Content.data()), Size(Content.size()) { @@ -188,7 +188,7 @@ class Block : public Addressable { /// The standard way to achieve this is to allocate it on the Graph's /// allocator. Block(Section &Parent, MutableArrayRef Content, - orc::ExecutorAddr Address, uint64_t Alignment, uint64_t AlignmentOffset) + JITTargetAddress Address, uint64_t Alignment, uint64_t AlignmentOffset) : Addressable(Address, true), Parent(&Parent), Data(Content.data()), Size(Content.size()) { assert(isPowerOf2_64(Alignment) && "Alignment must be power of 2"); @@ -328,7 +328,7 @@ class Block : public Addressable { /// Returns the address of the fixup for the given edge, which is equal to /// this block's address plus the edge's offset. - orc::ExecutorAddr getFixupAddress(const Edge &E) const { + JITTargetAddress getFixupAddress(const Edge &E) const { return getAddress() + E.getOffset(); } @@ -343,17 +343,12 @@ class Block : public Addressable { std::vector Edges; }; -// Align an address to conform with block alignment requirements. -inline uint64_t alignToBlock(uint64_t Addr, Block &B) { +// Align a JITTargetAddress to conform with block alignment requirements. +inline JITTargetAddress alignToBlock(JITTargetAddress Addr, Block &B) { uint64_t Delta = (B.getAlignmentOffset() - Addr) % B.getAlignment(); return Addr + Delta; } -// Align a orc::ExecutorAddr to conform with block alignment requirements. -inline orc::ExecutorAddr alignToBlock(orc::ExecutorAddr Addr, Block &B) { - return orc::ExecutorAddr(alignToBlock(Addr.getValue(), B)); -} - /// Describes symbol linkage. This can be used to make resolve definition /// clashes. enum class Linkage : uint8_t { @@ -396,8 +391,8 @@ class Symbol { friend class LinkGraph; private: - Symbol(Addressable &Base, orc::ExecutorAddrDiff Offset, StringRef Name, - orc::ExecutorAddrDiff Size, Linkage L, Scope S, bool IsLive, + Symbol(Addressable &Base, JITTargetAddress Offset, StringRef Name, + JITTargetAddress Size, Linkage L, Scope S, bool IsLive, bool IsCallable) : Name(Name), Base(&Base), Offset(Offset), Size(Size) { assert(Offset <= MaxOffset && "Offset out of range"); @@ -408,8 +403,7 @@ class Symbol { } static Symbol &constructCommon(void *SymStorage, Block &Base, StringRef Name, - orc::ExecutorAddrDiff Size, Scope S, - bool IsLive) { + JITTargetAddress Size, Scope S, bool IsLive) { assert(SymStorage && "Storage cannot be null"); assert(!Name.empty() && "Common symbol name cannot be empty"); assert(Base.isDefined() && @@ -422,7 +416,7 @@ class Symbol { } static Symbol &constructExternal(void *SymStorage, Addressable &Base, - StringRef Name, orc::ExecutorAddrDiff Size, + StringRef Name, JITTargetAddress Size, Linkage L) { assert(SymStorage && "Storage cannot be null"); assert(!Base.isDefined() && @@ -434,7 +428,7 @@ class Symbol { } static Symbol &constructAbsolute(void *SymStorage, Addressable &Base, - StringRef Name, orc::ExecutorAddrDiff Size, + StringRef Name, JITTargetAddress Size, Linkage L, Scope S, bool IsLive) { assert(SymStorage && "Storage cannot be null"); assert(!Base.isDefined() && @@ -445,8 +439,8 @@ class Symbol { } static Symbol &constructAnonDef(void *SymStorage, Block &Base, - orc::ExecutorAddrDiff Offset, - orc::ExecutorAddrDiff Size, bool IsCallable, + JITTargetAddress Offset, + JITTargetAddress Size, bool IsCallable, bool IsLive) { assert(SymStorage && "Storage cannot be null"); assert((Offset + Size) <= Base.getSize() && @@ -458,9 +452,9 @@ class Symbol { } static Symbol &constructNamedDef(void *SymStorage, Block &Base, - orc::ExecutorAddrDiff Offset, StringRef Name, - orc::ExecutorAddrDiff Size, Linkage L, - Scope S, bool IsLive, bool IsCallable) { + JITTargetAddress Offset, StringRef Name, + JITTargetAddress Size, Linkage L, Scope S, + bool IsLive, bool IsCallable) { assert(SymStorage && "Storage cannot be null"); assert((Offset + Size) <= Base.getSize() && "Symbol extends past end of block"); @@ -558,16 +552,16 @@ class Symbol { } /// Returns the offset for this symbol within the underlying addressable. - orc::ExecutorAddrDiff getOffset() const { return Offset; } + JITTargetAddress getOffset() const { return Offset; } /// Returns the address of this symbol. - orc::ExecutorAddr getAddress() const { return Base->getAddress() + Offset; } + JITTargetAddress getAddress() const { return Base->getAddress() + Offset; } /// Returns the size of this symbol. - orc::ExecutorAddrDiff getSize() const { return Size; } + JITTargetAddress getSize() const { return Size; } /// Set the size of this symbol. - void setSize(orc::ExecutorAddrDiff Size) { + void setSize(JITTargetAddress Size) { assert(Base && "Cannot set size for null Symbol"); assert((Size == 0 || Base->isDefined()) && "Non-zero size can only be set for defined symbols"); @@ -628,7 +622,7 @@ class Symbol { void setBlock(Block &B) { Base = &B; } - void setOffset(orc::ExecutorAddrDiff NewOffset) { + void setOffset(uint64_t NewOffset) { assert(NewOffset <= MaxOffset && "Offset out of range"); Offset = NewOffset; } @@ -643,7 +637,7 @@ class Symbol { uint64_t S : 2; uint64_t IsLive : 1; uint64_t IsCallable : 1; - orc::ExecutorAddrDiff Size = 0; + JITTargetAddress Size = 0; }; raw_ostream &operator<<(raw_ostream &OS, const Symbol &A); @@ -789,13 +783,13 @@ class SectionRange { assert((First || !Last) && "Last can not be null if start is non-null"); return !First; } - orc::ExecutorAddr getStart() const { - return First ? First->getAddress() : orc::ExecutorAddr(); + JITTargetAddress getStart() const { + return First ? First->getAddress() : 0; } - orc::ExecutorAddr getEnd() const { - return Last ? Last->getAddress() + Last->getSize() : orc::ExecutorAddr(); + JITTargetAddress getEnd() const { + return Last ? Last->getAddress() + Last->getSize() : 0; } - orc::ExecutorAddrDiff getSize() const { return getEnd() - getStart(); } + uint64_t getSize() const { return getEnd() - getStart(); } private: Block *First = nullptr; @@ -1001,7 +995,7 @@ class LinkGraph { /// Create a content block. Block &createContentBlock(Section &Parent, ArrayRef Content, - orc::ExecutorAddr Address, uint64_t Alignment, + uint64_t Address, uint64_t Alignment, uint64_t AlignmentOffset) { return createBlock(Parent, Content, Address, Alignment, AlignmentOffset); } @@ -1009,17 +1003,15 @@ class LinkGraph { /// Create a content block with initially mutable data. Block &createMutableContentBlock(Section &Parent, MutableArrayRef MutableContent, - orc::ExecutorAddr Address, - uint64_t Alignment, + uint64_t Address, uint64_t Alignment, uint64_t AlignmentOffset) { return createBlock(Parent, MutableContent, Address, Alignment, AlignmentOffset); } /// Create a zero-fill block. - Block &createZeroFillBlock(Section &Parent, orc::ExecutorAddrDiff Size, - orc::ExecutorAddr Address, uint64_t Alignment, - uint64_t AlignmentOffset) { + Block &createZeroFillBlock(Section &Parent, uint64_t Size, uint64_t Address, + uint64_t Alignment, uint64_t AlignmentOffset) { return createBlock(Parent, Size, Address, Alignment, AlignmentOffset); } @@ -1069,24 +1061,22 @@ class LinkGraph { /// present during lookup: Externals with strong linkage must be found or /// an error will be emitted. Externals with weak linkage are permitted to /// be undefined, in which case they are assigned a value of 0. - Symbol &addExternalSymbol(StringRef Name, orc::ExecutorAddrDiff Size, - Linkage L) { + Symbol &addExternalSymbol(StringRef Name, uint64_t Size, Linkage L) { assert(llvm::count_if(ExternalSymbols, [&](const Symbol *Sym) { return Sym->getName() == Name; }) == 0 && "Duplicate external symbol"); - auto &Sym = Symbol::constructExternal( - Allocator.Allocate(), - createAddressable(orc::ExecutorAddr(), false), Name, Size, L); + auto &Sym = + Symbol::constructExternal(Allocator.Allocate(), + createAddressable(0, false), Name, Size, L); ExternalSymbols.insert(&Sym); return Sym; } /// Add an absolute symbol. - Symbol &addAbsoluteSymbol(StringRef Name, orc::ExecutorAddr Address, - orc::ExecutorAddrDiff Size, Linkage L, Scope S, - bool IsLive) { + Symbol &addAbsoluteSymbol(StringRef Name, JITTargetAddress Address, + uint64_t Size, Linkage L, Scope S, bool IsLive) { assert(llvm::count_if(AbsoluteSymbols, [&](const Symbol *Sym) { return Sym->getName() == Name; @@ -1101,7 +1091,7 @@ class LinkGraph { /// Convenience method for adding a weak zero-fill symbol. Symbol &addCommonSymbol(StringRef Name, Scope S, Section &Section, - orc::ExecutorAddr Address, orc::ExecutorAddrDiff Size, + JITTargetAddress Address, uint64_t Size, uint64_t Alignment, bool IsLive) { assert(llvm::count_if(defined_symbols(), [&](const Symbol *Sym) { @@ -1117,8 +1107,8 @@ class LinkGraph { } /// Add an anonymous symbol. - Symbol &addAnonymousSymbol(Block &Content, orc::ExecutorAddrDiff Offset, - orc::ExecutorAddrDiff Size, bool IsCallable, + Symbol &addAnonymousSymbol(Block &Content, JITTargetAddress Offset, + JITTargetAddress Size, bool IsCallable, bool IsLive) { auto &Sym = Symbol::constructAnonDef(Allocator.Allocate(), Content, Offset, Size, IsCallable, IsLive); @@ -1127,9 +1117,9 @@ class LinkGraph { } /// Add a named symbol. - Symbol &addDefinedSymbol(Block &Content, orc::ExecutorAddrDiff Offset, - StringRef Name, orc::ExecutorAddrDiff Size, - Linkage L, Scope S, bool IsCallable, bool IsLive) { + Symbol &addDefinedSymbol(Block &Content, JITTargetAddress Offset, + StringRef Name, JITTargetAddress Size, Linkage L, + Scope S, bool IsCallable, bool IsLive) { assert((S == Scope::Local || llvm::count_if(defined_symbols(), [&](const Symbol *Sym) { return Sym->getName() == Name; @@ -1203,7 +1193,7 @@ class LinkGraph { assert(Sym.isDefined() && "Sym is not a defined symbol"); Section &Sec = Sym.getBlock().getSection(); Sec.removeSymbol(Sym); - Sym.makeExternal(createAddressable(orc::ExecutorAddr(), false)); + Sym.makeExternal(createAddressable(0, false)); } ExternalSymbols.insert(&Sym); } @@ -1213,7 +1203,7 @@ class LinkGraph { /// /// Symbol size, linkage, scope, and callability, and liveness will be left /// unchanged. Symbol offset will be reset to 0. - void makeAbsolute(Symbol &Sym, orc::ExecutorAddr Address) { + void makeAbsolute(Symbol &Sym, JITTargetAddress Address) { assert(!Sym.isAbsolute() && "Symbol is already absolute"); if (Sym.isExternal()) { assert(ExternalSymbols.count(&Sym) && @@ -1232,9 +1222,8 @@ class LinkGraph { /// Turn an absolute or external symbol into a defined one by attaching it to /// a block. Symbol must not already be defined. - void makeDefined(Symbol &Sym, Block &Content, orc::ExecutorAddrDiff Offset, - orc::ExecutorAddrDiff Size, Linkage L, Scope S, - bool IsLive) { + void makeDefined(Symbol &Sym, Block &Content, JITTargetAddress Offset, + JITTargetAddress Size, Linkage L, Scope S, bool IsLive) { assert(!Sym.isDefined() && "Sym is already a defined symbol"); if (Sym.isAbsolute()) { assert(AbsoluteSymbols.count(&Sym) && @@ -1266,15 +1255,15 @@ class LinkGraph { /// /// All other symbol attributes are unchanged. void transferDefinedSymbol(Symbol &Sym, Block &DestBlock, - orc::ExecutorAddrDiff NewOffset, - Optional ExplicitNewSize) { + JITTargetAddress NewOffset, + Optional ExplicitNewSize) { auto &OldSection = Sym.getBlock().getSection(); Sym.setBlock(DestBlock); Sym.setOffset(NewOffset); if (ExplicitNewSize) Sym.setSize(*ExplicitNewSize); else { - auto RemainingBlockSize = DestBlock.getSize() - NewOffset; + JITTargetAddress RemainingBlockSize = DestBlock.getSize() - NewOffset; if (Sym.getSize() > RemainingBlockSize) Sym.setSize(RemainingBlockSize); } @@ -1418,14 +1407,14 @@ inline MutableArrayRef Block::getMutableContent(LinkGraph &G) { /// Enables easy lookup of blocks by addresses. class BlockAddressMap { public: - using AddrToBlockMap = std::map; + using AddrToBlockMap = std::map; using const_iterator = AddrToBlockMap::const_iterator; /// A block predicate that always adds all blocks. static bool includeAllBlocks(const Block &B) { return true; } /// A block predicate that always includes blocks with non-null addresses. - static bool includeNonNull(const Block &B) { return !!B.getAddress(); } + static bool includeNonNull(const Block &B) { return B.getAddress(); } BlockAddressMap() = default; @@ -1489,7 +1478,7 @@ class BlockAddressMap { /// Returns the block starting at the given address, or nullptr if no such /// block exists. - Block *getBlockAt(orc::ExecutorAddr Addr) const { + Block *getBlockAt(JITTargetAddress Addr) const { auto I = AddrToBlock.find(Addr); if (I == AddrToBlock.end()) return nullptr; @@ -1498,7 +1487,7 @@ class BlockAddressMap { /// Returns the block covering the given address, or nullptr if no such block /// exists. - Block *getBlockCovering(orc::ExecutorAddr Addr) const { + Block *getBlockCovering(JITTargetAddress Addr) const { auto I = AddrToBlock.upper_bound(Addr); if (I == AddrToBlock.begin()) return nullptr; @@ -1515,11 +1504,10 @@ class BlockAddressMap { ExistingBlock.getAddress() + ExistingBlock.getSize(); return make_error( "Block at " + - formatv("{0:x16} -- {1:x16}", NewBlock.getAddress().getValue(), - NewBlockEnd.getValue()) + + formatv("{0:x16} -- {1:x16}", NewBlock.getAddress(), NewBlockEnd) + " overlaps " + - formatv("{0:x16} -- {1:x16}", ExistingBlock.getAddress().getValue(), - ExistingBlockEnd.getValue())); + formatv("{0:x16} -- {1:x16}", ExistingBlock.getAddress(), + ExistingBlockEnd)); } AddrToBlockMap AddrToBlock; @@ -1544,7 +1532,7 @@ class SymbolAddressMap { /// Returns the list of symbols that start at the given address, or nullptr if /// no such symbols exist. - const SymbolVector *getSymbolsAt(orc::ExecutorAddr Addr) const { + const SymbolVector *getSymbolsAt(JITTargetAddress Addr) const { auto I = AddrToSymbols.find(Addr); if (I == AddrToSymbols.end()) return nullptr; @@ -1552,7 +1540,7 @@ class SymbolAddressMap { } private: - std::map AddrToSymbols; + std::map AddrToSymbols; }; /// A function for mutating LinkGraphs. diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h index 179a8b946cf3..7dd382facde8 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h @@ -13,10 +13,9 @@ #ifndef LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H #define LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H -#include "llvm/ADT/FunctionExtras.h" #include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h" #include "llvm/ExecutionEngine/JITLink/MemoryFlags.h" -#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" +#include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" #include "llvm/Support/MSVCErrorWorkarounds.h" @@ -50,9 +49,9 @@ class Section; /// executor-side implementation code is responsible for freeing the error /// string). struct AllocActionCall { - orc::ExecutorAddr FnAddr; - orc::ExecutorAddr CtxAddr; - orc::ExecutorAddrDiff CtxSize; + JITTargetAddress FnAddr = 0; + JITTargetAddress CtxAddr = 0; + JITTargetAddress CtxSize = 0; }; /// A pair of AllocActionCalls, one to be run at finalization time, one to be @@ -94,48 +93,47 @@ class JITLinkMemoryManager { class FinalizedAlloc { friend class JITLinkMemoryManager; - static constexpr auto InvalidAddr = ~uint64_t(0); - public: + static constexpr JITTargetAddress InvalidAddr = ~JITTargetAddress(0); + FinalizedAlloc() = default; - explicit FinalizedAlloc(orc::ExecutorAddr A) : A(A) { - assert(A && "Explicitly creating an invalid allocation?"); + explicit FinalizedAlloc(JITTargetAddress A) : A(A) { + assert(A != 0 && "Explicitly creating an invalid allocation?"); } FinalizedAlloc(const FinalizedAlloc &) = delete; FinalizedAlloc(FinalizedAlloc &&Other) : A(Other.A) { - Other.A.setValue(InvalidAddr); + Other.A = InvalidAddr; } FinalizedAlloc &operator=(const FinalizedAlloc &) = delete; FinalizedAlloc &operator=(FinalizedAlloc &&Other) { - assert(A.getValue() == InvalidAddr && + assert(A == InvalidAddr && "Cannot overwrite active finalized allocation"); std::swap(A, Other.A); return *this; } ~FinalizedAlloc() { - assert(A.getValue() == InvalidAddr && - "Finalized allocation was not deallocated"); + assert(A == InvalidAddr && "Finalized allocation was not deallocated"); } /// FinalizedAllocs convert to false for default-constructed, and /// true otherwise. Default-constructed allocs need not be deallocated. - explicit operator bool() const { return A.getValue() != InvalidAddr; } + explicit operator bool() const { return A != InvalidAddr; } /// Returns the address associated with this finalized allocation. /// The allocation is unmodified. - orc::ExecutorAddr getAddress() const { return A; } + JITTargetAddress getAddress() const { return A; } /// Returns the address associated with this finalized allocation and /// resets this object to the default state. /// This should only be used by allocators when deallocating memory. - orc::ExecutorAddr release() { - orc::ExecutorAddr Tmp = A; - A.setValue(InvalidAddr); + JITTargetAddress release() { + JITTargetAddress Tmp = A; + A = InvalidAddr; return Tmp; } private: - orc::ExecutorAddr A{InvalidAddr}; + JITTargetAddress A = InvalidAddr; }; /// Represents an allocation which has not been finalized yet. @@ -265,7 +263,7 @@ class BasicLayout { Align Alignment; size_t ContentSize; uint64_t ZeroFillSize; - orc::ExecutorAddr Addr; + JITTargetAddress Addr; char *WorkingMem = nullptr; private: @@ -343,7 +341,7 @@ class SimpleSegmentAlloc { /// Describes the segment working memory and executor address. struct SegmentInfo { - orc::ExecutorAddr Addr; + JITTargetAddress Addr = 0; MutableArrayRef WorkingMem; }; diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h index 4a4e8d15be66..3130ea381534 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h @@ -368,18 +368,18 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E, char *BlockWorkingMem = B.getAlreadyMutableContent().data(); char *FixupPtr = BlockWorkingMem + E.getOffset(); - auto FixupAddress = B.getAddress() + E.getOffset(); + JITTargetAddress FixupAddress = B.getAddress() + E.getOffset(); switch (E.getKind()) { case Pointer64: { - uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); + uint64_t Value = E.getTarget().getAddress() + E.getAddend(); *(ulittle64_t *)FixupPtr = Value; break; } case Pointer32: { - uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); + uint64_t Value = E.getTarget().getAddress() + E.getAddend(); if (LLVM_LIKELY(isInRangeForImmU32(Value))) *(ulittle32_t *)FixupPtr = Value; else @@ -387,7 +387,7 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E, break; } case Pointer32Signed: { - int64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); + int64_t Value = E.getTarget().getAddress() + E.getAddend(); if (LLVM_LIKELY(isInRangeForImmS32(Value))) *(little32_t *)FixupPtr = Value; else @@ -483,8 +483,8 @@ extern const char PointerJumpStubContent[6]; inline Symbol &createAnonymousPointer(LinkGraph &G, Section &PointerSection, Symbol *InitialTarget = nullptr, uint64_t InitialAddend = 0) { - auto &B = G.createContentBlock(PointerSection, NullPointerContent, - orc::ExecutorAddr(~uint64_t(7)), 8, 0); + auto &B = + G.createContentBlock(PointerSection, NullPointerContent, ~7ULL, 8, 0); if (InitialTarget) B.addEdge(Pointer64, 0, *InitialTarget, InitialAddend); return G.addAnonymousSymbol(B, 0, 8, false, false); @@ -498,8 +498,8 @@ inline Symbol &createAnonymousPointer(LinkGraph &G, Section &PointerSection, /// address: highest allowable: (~5U) inline Block &createPointerJumpStubBlock(LinkGraph &G, Section &StubSection, Symbol &PointerSymbol) { - auto &B = G.createContentBlock(StubSection, PointerJumpStubContent, - orc::ExecutorAddr(~uint64_t(5)), 1, 0); + auto &B = + G.createContentBlock(StubSection, PointerJumpStubContent, ~5ULL, 1, 0); B.addEdge(Delta32, 2, PointerSymbol, -4); return B; } @@ -552,7 +552,8 @@ class GOTTableManager : public TableManager { "Fell through switch, but no new kind to set"); DEBUG_WITH_TYPE("jitlink", { dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " - << B->getFixupAddress(E) << " (" << B->getAddress() << " + " + << formatv("{0:x}", B->getFixupAddress(E)) << " (" + << formatv("{0:x}", B->getAddress()) << " + " << formatv("{0:x}", E.getOffset()) << ")\n"; }); E.setKind(KindToSet); @@ -585,7 +586,8 @@ class PLTTableManager : public TableManager { if (E.getKind() == x86_64::BranchPCRel32 && !E.getTarget().isDefined()) { DEBUG_WITH_TYPE("jitlink", { dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " - << B->getFixupAddress(E) << " (" << B->getAddress() << " + " + << formatv("{0:x}", B->getFixupAddress(E)) << " (" + << formatv("{0:x}", B->getAddress()) << " + " << formatv("{0:x}", E.getOffset()) << ")\n"; }); // Set the edge kind to Branch32ToPtrJumpStubBypassable to enable it to diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h index 8f1bf854843f..20da3e3b89eb 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h @@ -236,7 +236,7 @@ class ELFNixPlatform : public Platform { DenseMap InitSeqs; std::vector BootstrapPOSRs; - DenseMap HandleAddrToJITDylib; + DenseMap HandleAddrToJITDylib; DenseMap JITDylibToPThreadKey; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h index 3b34400894df..6d113a7bdf1a 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h @@ -39,9 +39,9 @@ class EPCEHFrameRegistrar : public jitlink::EHFrameRegistrar { : ES(ES), RegisterEHFrameWrapperFnAddr(RegisterEHFrameWrapperFnAddr), DeregisterEHFrameWrapperFnAddr(DeregisterEHFRameWrapperFnAddr) {} - Error registerEHFrames(ExecutorAddr EHFrameSectionAddr, + Error registerEHFrames(JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) override; - Error deregisterEHFrames(ExecutorAddr EHFrameSectionAddr, + Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) override; private: diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h index 18656d03e441..b9825f17ec17 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h @@ -85,7 +85,7 @@ class SPSSerializationTraits::deserialize(IB, A)) return false; - FA = jitlink::JITLinkMemoryManager::FinalizedAlloc(A); + FA = jitlink::JITLinkMemoryManager::FinalizedAlloc(A.getValue()); return true; } }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h index 9482f20ecec6..d7b5e2eda6ee 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h @@ -239,7 +239,7 @@ class MachOPlatform : public Platform { std::mutex PlatformMutex; DenseMap InitSeqs; - DenseMap HeaderAddrToJITDylib; + DenseMap HeaderAddrToJITDylib; DenseMap JITDylibToPThreadKey; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index e22d7f7de814..109922a46e26 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -222,7 +222,7 @@ class EHFrameRegistrationPlugin : public ObjectLinkingLayer::Plugin { private: struct EHFrameRange { - orc::ExecutorAddr Addr; + JITTargetAddress Addr = 0; size_t Size; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h index 1abc9508d93a..2d316b9de007 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h @@ -13,10 +13,7 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_EXECUTORADDRESS_H #define LLVM_EXECUTIONENGINE_ORC_SHARED_EXECUTORADDRESS_H -#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/raw_ostream.h" #include #include @@ -32,7 +29,7 @@ class ExecutorAddr { ExecutorAddr() = default; /// Create an ExecutorAddr from the given value. - explicit constexpr ExecutorAddr(uint64_t Addr) : Addr(Addr) {} + explicit ExecutorAddr(uint64_t Addr) : Addr(Addr) {} /// Create an ExecutorAddr from the given pointer. /// Warning: This should only be used when JITing in-process. @@ -91,12 +88,12 @@ class ExecutorAddr { ExecutorAddr operator++(int) { return ExecutorAddr(Addr++); } ExecutorAddr operator--(int) { return ExecutorAddr(Addr--); } - ExecutorAddr &operator+=(const ExecutorAddrDiff &Delta) { + ExecutorAddr &operator+=(const ExecutorAddrDiff Delta) { Addr += Delta; return *this; } - ExecutorAddr &operator-=(const ExecutorAddrDiff &Delta) { + ExecutorAddr &operator-=(const ExecutorAddrDiff Delta) { Addr -= Delta; return *this; } @@ -123,18 +120,6 @@ inline ExecutorAddr operator+(const ExecutorAddrDiff &LHS, return ExecutorAddr(LHS + RHS.getValue()); } -/// Subtracting an offset from an address yields an address. -inline ExecutorAddr operator-(const ExecutorAddr &LHS, - const ExecutorAddrDiff &RHS) { - return ExecutorAddr(LHS.getValue() - RHS); -} - -/// Taking the modulus of an address and a diff yields a diff. -inline ExecutorAddrDiff operator%(const ExecutorAddr &LHS, - const ExecutorAddrDiff &RHS) { - return ExecutorAddrDiff(LHS.getValue() % RHS); -} - /// Represents an address range in the exceutor process. struct ExecutorAddrRange { ExecutorAddrRange() = default; @@ -163,10 +148,6 @@ struct ExecutorAddrRange { ExecutorAddr End; }; -inline raw_ostream &operator<<(raw_ostream &OS, const ExecutorAddr &A) { - return OS << formatv("{0:x}", A.getValue()); -} - namespace shared { class SPSExecutorAddr {}; @@ -217,26 +198,6 @@ using SPSExecutorAddrRangeSequence = SPSSequence; } // End namespace shared. } // End namespace orc. - -// Provide DenseMapInfo for ExecutorAddrs. -template <> struct DenseMapInfo { - static inline orc::ExecutorAddr getEmptyKey() { - return orc::ExecutorAddr(DenseMapInfo::getEmptyKey()); - } - static inline orc::ExecutorAddr getTombstoneKey() { - return orc::ExecutorAddr(DenseMapInfo::getTombstoneKey()); - } - - static unsigned getHashValue(const orc::ExecutorAddr &Addr) { - return DenseMapInfo::getHashValue(Addr.getValue()); - } - - static bool isEqual(const orc::ExecutorAddr &LHS, - const orc::ExecutorAddr &RHS) { - return DenseMapInfo::isEqual(LHS.getValue(), RHS.getValue()); - } -}; - } // End namespace llvm. #endif // LLVM_EXECUTIONENGINE_ORC_SHARED_EXECUTORADDRESS_H diff --git a/llvm/lib/ExecutionEngine/JITLink/DefineExternalSectionStartAndEndSymbols.h b/llvm/lib/ExecutionEngine/JITLink/DefineExternalSectionStartAndEndSymbols.h index 159880e4b152..8ae3bc2bf61d 100644 --- a/llvm/lib/ExecutionEngine/JITLink/DefineExternalSectionStartAndEndSymbols.h +++ b/llvm/lib/ExecutionEngine/JITLink/DefineExternalSectionStartAndEndSymbols.h @@ -52,13 +52,13 @@ class DefineExternalSectionStartAndEndSymbols { auto &SR = getSectionRange(*D.Sec); if (D.IsStart) { if (SR.empty()) - G.makeAbsolute(*Sym, orc::ExecutorAddr()); + G.makeAbsolute(*Sym, 0); else G.makeDefined(*Sym, *SR.getFirstBlock(), 0, 0, Linkage::Strong, Scope::Local, false); } else { if (SR.empty()) - G.makeAbsolute(*Sym, orc::ExecutorAddr()); + G.makeAbsolute(*Sym, 0); else G.makeDefined(*Sym, *SR.getLastBlock(), SR.getLastBlock()->getSize(), 0, Linkage::Strong, diff --git a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp index 4d421b3e1ed3..4d7d5ce26668 100644 --- a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp @@ -65,7 +65,10 @@ Error EHFrameSplitter::operator()(LinkGraph &G) { Error EHFrameSplitter::processBlock(LinkGraph &G, Block &B, LinkGraph::SplitBlockCache &Cache) { - LLVM_DEBUG(dbgs() << " Processing block at " << B.getAddress() << "\n"); + LLVM_DEBUG({ + dbgs() << " Processing block at " << formatv("{0:x16}", B.getAddress()) + << "\n"; + }); // eh-frame should not contain zero-fill blocks. if (B.isZeroFill()) @@ -397,7 +400,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, BlockEdgeMap &BlockEdges) { LLVM_DEBUG(dbgs() << " Record is FDE\n"); - orc::ExecutorAddr RecordAddress = B.getAddress() + RecordOffset; + JITTargetAddress RecordAddress = B.getAddress() + RecordOffset; auto RecordContent = B.getContent().slice(RecordOffset, RecordLength); BinaryStreamReader RecordReader( @@ -415,8 +418,8 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, { // Process the CIE pointer field. auto CIEEdgeItr = BlockEdges.find(RecordOffset + CIEDeltaFieldOffset); - orc::ExecutorAddr CIEAddress = - RecordAddress + orc::ExecutorAddrDiff(CIEDeltaFieldOffset - CIEDelta); + JITTargetAddress CIEAddress = + RecordAddress + CIEDeltaFieldOffset - CIEDelta; if (CIEEdgeItr == BlockEdges.end()) { LLVM_DEBUG({ @@ -453,7 +456,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, { // Process the PC-Begin field. Block *PCBeginBlock = nullptr; - orc::ExecutorAddrDiff PCBeginFieldOffset = RecordReader.getOffset(); + JITTargetAddress PCBeginFieldOffset = RecordReader.getOffset(); auto PCEdgeItr = BlockEdges.find(RecordOffset + PCBeginFieldOffset); if (PCEdgeItr == BlockEdges.end()) { auto PCBeginPtrInfo = @@ -461,12 +464,12 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, RecordAddress + PCBeginFieldOffset, RecordReader); if (!PCBeginPtrInfo) return PCBeginPtrInfo.takeError(); - orc::ExecutorAddr PCBegin = PCBeginPtrInfo->first; + JITTargetAddress PCBegin = PCBeginPtrInfo->first; Edge::Kind PCBeginEdgeKind = PCBeginPtrInfo->second; LLVM_DEBUG({ dbgs() << " Adding edge at " - << (RecordAddress + PCBeginFieldOffset) << " to PC at " - << formatv("{0:x16}", PCBegin) << "\n"; + << formatv("{0:x16}", RecordAddress + PCBeginFieldOffset) + << " to PC at " << formatv("{0:x16}", PCBegin) << "\n"; }); auto PCBeginSym = getOrCreateSymbol(PC, PCBegin); if (!PCBeginSym) @@ -519,7 +522,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, if (auto Err = RecordReader.readULEB128(AugmentationDataSize)) return Err; - orc::ExecutorAddrDiff LSDAFieldOffset = RecordReader.getOffset(); + JITTargetAddress LSDAFieldOffset = RecordReader.getOffset(); auto LSDAEdgeItr = BlockEdges.find(RecordOffset + LSDAFieldOffset); if (LSDAEdgeItr == BlockEdges.end()) { auto LSDAPointerInfo = @@ -527,7 +530,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, RecordAddress + LSDAFieldOffset, RecordReader); if (!LSDAPointerInfo) return LSDAPointerInfo.takeError(); - orc::ExecutorAddr LSDA = LSDAPointerInfo->first; + JITTargetAddress LSDA = LSDAPointerInfo->first; Edge::Kind LSDAEdgeKind = LSDAPointerInfo->second; auto LSDASym = getOrCreateSymbol(PC, LSDA); if (!LSDASym) @@ -642,10 +645,12 @@ unsigned EHFrameEdgeFixer::getPointerEncodingDataSize(uint8_t PointerEncoding) { } } -Expected> +Expected> EHFrameEdgeFixer::readEncodedPointer(uint8_t PointerEncoding, - orc::ExecutorAddr PointerFieldAddress, + JITTargetAddress PointerFieldAddress, BinaryStreamReader &RecordReader) { + static_assert(sizeof(JITTargetAddress) == sizeof(uint64_t), + "Result must be able to hold a uint64_t"); assert(isSupportedPointerEncoding(PointerEncoding) && "Unsupported pointer encoding"); @@ -658,7 +663,7 @@ EHFrameEdgeFixer::readEncodedPointer(uint8_t PointerEncoding, if (EffectiveType == DW_EH_PE_absptr) EffectiveType = (PointerSize == 8) ? DW_EH_PE_udata8 : DW_EH_PE_udata4; - orc::ExecutorAddr Addr; + JITTargetAddress Addr; Edge::Kind PointerEdgeKind = Edge::Invalid; switch (EffectiveType) { case DW_EH_PE_udata4: { @@ -704,7 +709,7 @@ EHFrameEdgeFixer::readEncodedPointer(uint8_t PointerEncoding, } Expected EHFrameEdgeFixer::getOrCreateSymbol(ParseContext &PC, - orc::ExecutorAddr Addr) { + JITTargetAddress Addr) { Symbol *CanonicalSym = nullptr; auto UpdateCanonicalSym = [&](Symbol *Sym) { @@ -748,9 +753,8 @@ Error EHFrameNullTerminator::operator()(LinkGraph &G) { << EHFrameSectionName << "\n"; }); - auto &NullTerminatorBlock = - G.createContentBlock(*EHFrame, NullTerminatorBlockContent, - orc::ExecutorAddr(~uint64_t(4)), 1, 0); + auto &NullTerminatorBlock = G.createContentBlock( + *EHFrame, NullTerminatorBlockContent, 0xfffffffffffffffc, 1, 0); G.addAnonymousSymbol(NullTerminatorBlock, 0, 4, false, true); return Error::success(); } @@ -758,15 +762,17 @@ Error EHFrameNullTerminator::operator()(LinkGraph &G) { EHFrameRegistrar::~EHFrameRegistrar() {} Error InProcessEHFrameRegistrar::registerEHFrames( - orc::ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) { - return orc::registerEHFrameSection(EHFrameSectionAddr.toPtr(), - EHFrameSectionSize); + JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) { + return orc::registerEHFrameSection( + jitTargetAddressToPointer(EHFrameSectionAddr), + EHFrameSectionSize); } Error InProcessEHFrameRegistrar::deregisterEHFrames( - orc::ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) { - return orc::deregisterEHFrameSection(EHFrameSectionAddr.toPtr(), - EHFrameSectionSize); + JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) { + return orc::deregisterEHFrameSection( + jitTargetAddressToPointer(EHFrameSectionAddr), + EHFrameSectionSize); } LinkGraphPassFunction @@ -783,14 +789,14 @@ createEHFrameRecorderPass(const Triple &TT, StoreFrameRange = std::move(StoreRangeAddress)](LinkGraph &G) -> Error { // Search for a non-empty eh-frame and record the address of the first // symbol in it. - orc::ExecutorAddr Addr; + JITTargetAddress Addr = 0; size_t Size = 0; if (auto *S = G.findSectionByName(EHFrameSectionName)) { auto R = SectionRange(*S); Addr = R.getStart(); Size = R.getSize(); } - if (!Addr && Size != 0) + if (Addr == 0 && Size != 0) return make_error( StringRef(EHFrameSectionName) + " section can not have zero address with non-zero size"); diff --git a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h index ef4b47b9aa28..b4c4b0f7b097 100644 --- a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h +++ b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h @@ -71,12 +71,12 @@ class EHFrameEdgeFixer { }; using BlockEdgeMap = DenseMap; - using CIEInfosMap = DenseMap; + using CIEInfosMap = DenseMap; struct ParseContext { ParseContext(LinkGraph &G) : G(G) {} - Expected findCIEInfo(orc::ExecutorAddr Address) { + Expected findCIEInfo(JITTargetAddress Address) { auto I = CIEInfos.find(Address); if (I == CIEInfos.end()) return make_error("No CIE found at address " + @@ -102,13 +102,12 @@ class EHFrameEdgeFixer { static bool isSupportedPointerEncoding(uint8_t PointerEncoding); unsigned getPointerEncodingDataSize(uint8_t PointerEncoding); - Expected> + Expected> readEncodedPointer(uint8_t PointerEncoding, - orc::ExecutorAddr PointerFieldAddress, + JITTargetAddress PointerFieldAddress, BinaryStreamReader &RecordReader); - Expected getOrCreateSymbol(ParseContext &PC, - orc::ExecutorAddr Addr); + Expected getOrCreateSymbol(ParseContext &PC, JITTargetAddress Addr); StringRef EHFrameSectionName; unsigned PointerSize; diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h index 23c8b77b913b..f9101d71dfa8 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h @@ -322,12 +322,10 @@ template Error ELFLinkGraphBuilder::graphifySections() { if (!Data) return Data.takeError(); - G->createContentBlock(GraphSec, *Data, orc::ExecutorAddr(Sec.sh_addr), - Sec.sh_addralign, 0); + G->createContentBlock(GraphSec, *Data, Sec.sh_addr, Sec.sh_addralign, 0); } else - G->createZeroFillBlock(GraphSec, Sec.sh_size, - orc::ExecutorAddr(Sec.sh_addr), Sec.sh_addralign, - 0); + G->createZeroFillBlock(GraphSec, Sec.sh_size, Sec.sh_addr, + Sec.sh_addralign, 0); setGraphSection(SecIndex, GraphSec); } @@ -395,9 +393,9 @@ template Error ELFLinkGraphBuilder::graphifySymbols() { // Handle common symbols specially. if (Sym.isCommon()) { - Symbol &GSym = G->addCommonSymbol(*Name, Scope::Default, - getCommonSection(), orc::ExecutorAddr(), - Sym.st_size, Sym.getValue(), false); + Symbol &GSym = + G->addCommonSymbol(*Name, Scope::Default, getCommonSection(), 0, + Sym.st_size, Sym.getValue(), false); setGraphSymbol(SymIndex, GSym); continue; } diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp index 35b70d533907..dc183dfddfae 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp @@ -41,11 +41,10 @@ class ELFJITLinker_aarch64 : public JITLinker { char *BlockWorkingMem = B.getAlreadyMutableContent().data(); char *FixupPtr = BlockWorkingMem + E.getOffset(); - auto FixupAddress = B.getAddress() + E.getOffset(); + JITTargetAddress FixupAddress = B.getAddress() + E.getOffset(); switch (E.getKind()) { case aarch64::R_AARCH64_CALL26: { - assert((FixupAddress.getValue() & 0x3) == 0 && - "Call-inst is not 32-bit aligned"); + assert((FixupAddress & 0x3) == 0 && "Call-inst is not 32-bit aligned"); int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend(); if (static_cast(Value) & 0x3) @@ -125,8 +124,7 @@ class ELFLinkGraphBuilder_aarch64 : public ELFLinkGraphBuilder { int64_t Addend = Rel.r_addend; Block *BlockToFix = *(GraphSection.blocks().begin()); - orc::ExecutorAddr FixupAddress = - orc::ExecutorAddr(FixupSect.sh_addr) + Rel.r_offset; + JITTargetAddress FixupAddress = FixupSect.sh_addr + Rel.r_offset; Edge::OffsetT Offset = FixupAddress - BlockToFix->getAddress(); Edge GE(*Kind, Offset, *GraphSymbol, Addend); LLVM_DEBUG({ diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp index a4d1cc8c6195..26ec79ea50cf 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp @@ -44,16 +44,15 @@ class PerGraphGOTAndPLTStubsBuilder_ELF_riscv bool isGOTEdgeToFix(Edge &E) const { return E.getKind() == R_RISCV_GOT_HI20; } Symbol &createGOTEntry(Symbol &Target) { - Block &GOTBlock = - G.createContentBlock(getGOTSection(), getGOTEntryBlockContent(), - orc::ExecutorAddr(), G.getPointerSize(), 0); + Block &GOTBlock = G.createContentBlock( + getGOTSection(), getGOTEntryBlockContent(), 0, G.getPointerSize(), 0); GOTBlock.addEdge(isRV64() ? R_RISCV_64 : R_RISCV_32, 0, Target, 0); return G.addAnonymousSymbol(GOTBlock, 0, G.getPointerSize(), false, false); } Symbol &createPLTStub(Symbol &Target) { - Block &StubContentBlock = G.createContentBlock( - getStubsSection(), getStubBlockContent(), orc::ExecutorAddr(), 4, 0); + Block &StubContentBlock = + G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 4, 0); auto &GOTEntrySymbol = getGOTEntry(Target); StubContentBlock.addEdge(R_RISCV_CALL, 0, GOTEntrySymbol, 0); return G.addAnonymousSymbol(StubContentBlock, 0, StubEntrySize, true, @@ -135,13 +134,13 @@ static Expected getRISCVPCRelHi20(const Edge &E) { const Symbol &Sym = E.getTarget(); const Block &B = Sym.getBlock(); - orc::ExecutorAddrDiff Offset = Sym.getOffset(); + JITTargetAddress Offset = Sym.getOffset(); struct Comp { - bool operator()(const Edge &Lhs, orc::ExecutorAddrDiff Offset) { + bool operator()(const Edge &Lhs, JITTargetAddress Offset) { return Lhs.getOffset() < Offset; } - bool operator()(orc::ExecutorAddrDiff Offset, const Edge &Rhs) { + bool operator()(JITTargetAddress Offset, const Edge &Rhs) { return Offset < Rhs.getOffset(); } }; @@ -177,27 +176,27 @@ class ELFJITLinker_riscv : public JITLinker { char *BlockWorkingMem = B.getAlreadyMutableContent().data(); char *FixupPtr = BlockWorkingMem + E.getOffset(); - orc::ExecutorAddr FixupAddress = B.getAddress() + E.getOffset(); + JITTargetAddress FixupAddress = B.getAddress() + E.getOffset(); switch (E.getKind()) { case R_RISCV_32: { - int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue(); + int64_t Value = E.getTarget().getAddress() + E.getAddend(); *(little32_t *)FixupPtr = static_cast(Value); break; } case R_RISCV_64: { - int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue(); + int64_t Value = E.getTarget().getAddress() + E.getAddend(); *(little64_t *)FixupPtr = static_cast(Value); break; } case R_RISCV_HI20: { - int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue(); + int64_t Value = E.getTarget().getAddress() + E.getAddend(); int32_t Hi = (Value + 0x800) & 0xFFFFF000; uint32_t RawInstr = *(little32_t *)FixupPtr; *(little32_t *)FixupPtr = (RawInstr & 0xFFF) | static_cast(Hi); break; } case R_RISCV_LO12_I: { - int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue(); + int64_t Value = E.getTarget().getAddress() + E.getAddend(); int32_t Lo = Value & 0xFFF; uint32_t RawInstr = *(little32_t *)FixupPtr; *(little32_t *)FixupPtr = @@ -323,7 +322,7 @@ class ELFLinkGraphBuilder_riscv : public ELFLinkGraphBuilder { int64_t Addend = Rel.r_addend; Block *BlockToFix = *(GraphSection.blocks().begin()); - auto FixupAddress = orc::ExecutorAddr(FixupSect.sh_addr) + Rel.r_offset; + JITTargetAddress FixupAddress = FixupSect.sh_addr + Rel.r_offset; Edge::OffsetT Offset = FixupAddress - BlockToFix->getAddress(); Edge GE(*Kind, Offset, *GraphSymbol, Addend); LLVM_DEBUG({ diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp index cebe9e9dac78..27d8833ae19e 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp @@ -59,8 +59,8 @@ class TLSInfoTableManager_ELF_x86_64 // the TLS Info entry's key value will be written by the fixTLVSectionByName // pass, so create mutable content. auto &TLSInfoEntry = G.createMutableContentBlock( - getTLSInfoSection(G), G.allocateContent(getTLSInfoEntryContent()), - orc::ExecutorAddr(), 8, 0); + getTLSInfoSection(G), G.allocateContent(getTLSInfoEntryContent()), 0, 8, + 0); TLSInfoEntry.addEdge(x86_64::Pointer64, 8, Target, 0); return G.addAnonymousSymbol(TLSInfoEntry, 0, 16, false, false); } @@ -249,7 +249,7 @@ class ELFLinkGraphBuilder_x86_64 : public ELFLinkGraphBuilder { } Block *BlockToFix = *(GraphSection.blocks().begin()); - auto FixupAddress = orc::ExecutorAddr(FixupSection.sh_addr) + Rel.r_offset; + JITTargetAddress FixupAddress = FixupSection.sh_addr + Rel.r_offset; Edge::OffsetT Offset = FixupAddress - BlockToFix->getAddress(); Edge GE(Kind, Offset, *GraphSymbol, Addend); LLVM_DEBUG({ @@ -322,9 +322,8 @@ class ELFJITLinker_x86_64 : public JITLinker { // If there's no defined symbol then create one. SectionRange SR(*GOTSection); if (SR.empty()) - GOTSymbol = - &G.addAbsoluteSymbol(ELFGOTSymbolName, orc::ExecutorAddr(), 0, - Linkage::Strong, Scope::Local, true); + GOTSymbol = &G.addAbsoluteSymbol(ELFGOTSymbolName, 0, 0, + Linkage::Strong, Scope::Local, true); else GOTSymbol = &G.addDefinedSymbol(*SR.getFirstBlock(), 0, ELFGOTSymbolName, 0, diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp index 8c920c5fe2dd..51dcc1c35fad 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp @@ -90,8 +90,8 @@ const char *getScopeName(Scope S) { } raw_ostream &operator<<(raw_ostream &OS, const Block &B) { - return OS << B.getAddress() << " -- " << (B.getAddress() + B.getSize()) - << ": " + return OS << formatv("{0:x16}", B.getAddress()) << " -- " + << formatv("{0:x8}", B.getAddress() + B.getSize()) << ": " << "size = " << formatv("{0:x8}", B.getSize()) << ", " << (B.isZeroFill() ? "zero-fill" : "content") << ", align = " << B.getAlignment() @@ -100,8 +100,9 @@ raw_ostream &operator<<(raw_ostream &OS, const Block &B) { } raw_ostream &operator<<(raw_ostream &OS, const Symbol &Sym) { - OS << Sym.getAddress() << " (" << (Sym.isDefined() ? "block" : "addressable") - << " + " << formatv("{0:x8}", Sym.getOffset()) + OS << formatv("{0:x16}", Sym.getAddress()) << " (" + << (Sym.isDefined() ? "block" : "addressable") << " + " + << formatv("{0:x8}", Sym.getOffset()) << "): size: " << formatv("{0:x8}", Sym.getSize()) << ", linkage: " << formatv("{0:6}", getLinkageName(Sym.getLinkage())) << ", scope: " << formatv("{0:8}", getScopeName(Sym.getScope())) << ", " @@ -112,9 +113,9 @@ raw_ostream &operator<<(raw_ostream &OS, const Symbol &Sym) { void printEdge(raw_ostream &OS, const Block &B, const Edge &E, StringRef EdgeKindName) { - OS << "edge@" << B.getAddress() + E.getOffset() << ": " << B.getAddress() - << " + " << formatv("{0:x}", E.getOffset()) << " -- " << EdgeKindName - << " -> "; + OS << "edge@" << formatv("{0:x16}", B.getAddress() + E.getOffset()) << ": " + << formatv("{0:x16}", B.getAddress()) << " + " + << formatv("{0:x}", E.getOffset()) << " -- " << EdgeKindName << " -> "; auto &TargetSym = E.getTarget(); if (TargetSym.hasName()) @@ -122,16 +123,17 @@ void printEdge(raw_ostream &OS, const Block &B, const Edge &E, else { auto &TargetBlock = TargetSym.getBlock(); auto &TargetSec = TargetBlock.getSection(); - orc::ExecutorAddr SecAddress(~uint64_t(0)); + JITTargetAddress SecAddress = ~JITTargetAddress(0); for (auto *B : TargetSec.blocks()) if (B->getAddress() < SecAddress) SecAddress = B->getAddress(); - orc::ExecutorAddrDiff SecDelta = TargetSym.getAddress() - SecAddress; - OS << TargetSym.getAddress() << " (section " << TargetSec.getName(); + JITTargetAddress SecDelta = TargetSym.getAddress() - SecAddress; + OS << formatv("{0:x16}", TargetSym.getAddress()) << " (section " + << TargetSec.getName(); if (SecDelta) OS << " + " << formatv("{0:x}", SecDelta); - OS << " / block " << TargetBlock.getAddress(); + OS << " / block " << formatv("{0:x16}", TargetBlock.getAddress()); if (TargetSym.getOffset()) OS << " + " << formatv("{0:x}", TargetSym.getOffset()); OS << ")"; @@ -263,7 +265,7 @@ void LinkGraph::dump(raw_ostream &OS) { }); for (auto *B : SortedBlocks) { - OS << " block " << B->getAddress() + OS << " block " << formatv("{0:x16}", B->getAddress()) << " size = " << formatv("{0:x8}", B->getSize()) << ", align = " << B->getAlignment() << ", alignment-offset = " << B->getAlignmentOffset(); @@ -288,8 +290,9 @@ void LinkGraph::dump(raw_ostream &OS) { return LHS.getOffset() < RHS.getOffset(); }); for (auto &E : SortedEdges) { - OS << " " << B->getFixupAddress(E) << " (block + " - << formatv("{0:x8}", E.getOffset()) << "), addend = "; + OS << " " << formatv("{0:x16}", B->getFixupAddress(E)) + << " (block + " << formatv("{0:x8}", E.getOffset()) + << "), addend = "; if (E.getAddend() >= 0) OS << formatv("+{0:x8}", E.getAddend()); else @@ -312,14 +315,16 @@ void LinkGraph::dump(raw_ostream &OS) { OS << "Absolute symbols:\n"; if (!llvm::empty(absolute_symbols())) { for (auto *Sym : absolute_symbols()) - OS << " " << Sym->getAddress() << ": " << *Sym << "\n"; + OS << " " << format("0x%016" PRIx64, Sym->getAddress()) << ": " << *Sym + << "\n"; } else OS << " none\n"; OS << "\nExternal symbols:\n"; if (!llvm::empty(external_symbols())) { for (auto *Sym : external_symbols()) - OS << " " << Sym->getAddress() << ": " << *Sym << "\n"; + OS << " " << format("0x%016" PRIx64, Sym->getAddress()) << ": " << *Sym + << "\n"; } else OS << " none\n"; } diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp index 35ee050c8566..706688aba4ec 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp @@ -192,7 +192,7 @@ JITLinkContext::LookupMap JITLinkerBase::getExternalSymbolNames() const { // Identify unresolved external symbols. JITLinkContext::LookupMap UnresolvedExternals; for (auto *Sym : G->external_symbols()) { - assert(!Sym->getAddress() && + assert(Sym->getAddress() == 0 && "External has already been assigned an address"); assert(Sym->getName() != StringRef() && Sym->getName() != "" && "Externals must be named"); @@ -209,12 +209,11 @@ void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) { for (auto *Sym : G->external_symbols()) { assert(Sym->getOffset() == 0 && "External symbol is not at the start of its addressable block"); - assert(!Sym->getAddress() && "Symbol already resolved"); + assert(Sym->getAddress() == 0 && "Symbol already resolved"); assert(!Sym->isDefined() && "Symbol being resolved is already defined"); auto ResultI = Result.find(Sym->getName()); if (ResultI != Result.end()) - Sym->getAddressable().setAddress( - orc::ExecutorAddr(ResultI->second.getAddress())); + Sym->getAddressable().setAddress(ResultI->second.getAddress()); else assert(Sym->getLinkage() == Linkage::Weak && "Failed to resolve non-weak reference"); @@ -224,7 +223,7 @@ void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) { dbgs() << "Externals after applying lookup result:\n"; for (auto *Sym : G->external_symbols()) dbgs() << " " << Sym->getName() << ": " - << formatv("{0:x16}", Sym->getAddress().getValue()) << "\n"; + << formatv("{0:x16}", Sym->getAddress()) << "\n"; }); } diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp index 164014612247..67fe6287e388 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp @@ -66,10 +66,10 @@ JITLinkMemoryManager::InFlightAlloc::~InFlightAlloc() = default; static Error runAllocAction(AllocActionCall &C) { using WrapperFnTy = CWrapperFunctionResult (*)(const void *, size_t); - auto *Fn = C.FnAddr.toPtr(); + auto *Fn = jitTargetAddressToPointer(C.FnAddr); - return toError( - Fn(C.CtxAddr.toPtr(), static_cast(C.CtxSize))); + return toError(Fn(jitTargetAddressToPointer(C.CtxAddr), + static_cast(C.CtxSize))); } BasicLayout::BasicLayout(LinkGraph &G) : G(G) { @@ -207,7 +207,7 @@ void SimpleSegmentAlloc::Create(JITLinkMemoryManager &MemMgr, std::make_unique("", Triple(), 0, support::native, nullptr); AllocGroupSmallMap ContentBlocks; - orc::ExecutorAddr NextAddr(0x100000); + JITTargetAddress NextAddr = 0x100000; for (auto &KV : Segments) { auto &AG = KV.first; auto &Seg = KV.second; @@ -220,8 +220,7 @@ void SimpleSegmentAlloc::Create(JITLinkMemoryManager &MemMgr, Sec.setMemDeallocPolicy(AG.getMemDeallocPolicy()); if (Seg.ContentSize != 0) { - NextAddr = - orc::ExecutorAddr(alignTo(NextAddr.getValue(), Seg.ContentAlign)); + NextAddr = alignTo(NextAddr, Seg.ContentAlign); auto &B = G->createMutableContentBlock(Sec, G->allocateBuffer(Seg.ContentSize), NextAddr, Seg.ContentAlign.value(), 0); @@ -427,8 +426,8 @@ void InProcessMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G, static_cast(SegsSizes->FinalizeSegs)}; } - auto NextStandardSegAddr = orc::ExecutorAddr::fromPtr(StandardSegsMem.base()); - auto NextFinalizeSegAddr = orc::ExecutorAddr::fromPtr(FinalizeSegsMem.base()); + auto NextStandardSegAddr = pointerToJITTargetAddress(StandardSegsMem.base()); + auto NextFinalizeSegAddr = pointerToJITTargetAddress(FinalizeSegsMem.base()); LLVM_DEBUG({ dbgs() << "InProcessMemoryManager allocated:\n"; @@ -455,7 +454,7 @@ void InProcessMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G, ? NextStandardSegAddr : NextFinalizeSegAddr; - Seg.WorkingMem = SegAddr.toPtr(); + Seg.WorkingMem = jitTargetAddressToPointer(SegAddr); Seg.Addr = SegAddr; SegAddr += alignTo(Seg.ContentSize + Seg.ZeroFillSize, PageSize); @@ -479,7 +478,8 @@ void InProcessMemoryManager::deallocate(std::vector Allocs, { std::lock_guard Lock(FinalizedAllocsMutex); for (auto &Alloc : Allocs) { - auto *FA = Alloc.release().toPtr(); + auto *FA = + jitTargetAddressToPointer(Alloc.release()); StandardSegmentsList.push_back(std::move(FA->StandardSegments)); if (!FA->DeallocActions.empty()) DeallocActionsList.push_back(std::move(FA->DeallocActions)); @@ -520,7 +520,7 @@ InProcessMemoryManager::createFinalizedAlloc( auto *FA = FinalizedAllocInfos.Allocate(); new (FA) FinalizedAllocInfo( {std::move(StandardSegments), std::move(DeallocActions)}); - return FinalizedAlloc(orc::ExecutorAddr::fromPtr(FA)); + return FinalizedAlloc(pointerToJITTargetAddress(FA)); } } // end namespace jitlink diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp index 2fcf3e94b8b2..d588b63d9e88 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp @@ -134,7 +134,7 @@ Error MachOLinkGraphBuilder::createNormalizedSections() { memcpy(&NSec.SegName, Sec64.segname, 16); NSec.SegName[16] = '\0'; - NSec.Address = orc::ExecutorAddr(Sec64.addr); + NSec.Address = Sec64.addr; NSec.Size = Sec64.size; NSec.Alignment = 1ULL << Sec64.align; NSec.Flags = Sec64.flags; @@ -147,7 +147,7 @@ Error MachOLinkGraphBuilder::createNormalizedSections() { memcpy(&NSec.SegName, Sec32.segname, 16); NSec.SegName[16] = '\0'; - NSec.Address = orc::ExecutorAddr(Sec32.addr); + NSec.Address = Sec32.addr; NSec.Size = Sec32.size; NSec.Alignment = 1ULL << Sec32.align; NSec.Flags = Sec32.flags; @@ -287,8 +287,7 @@ Error MachOLinkGraphBuilder::createNormalizedSymbols() { if (!NSec) return NSec.takeError(); - if (orc::ExecutorAddr(Value) < NSec->Address || - orc::ExecutorAddr(Value) > NSec->Address + NSec->Size) + if (Value < NSec->Address || Value > NSec->Address + NSec->Size) return make_error("Address " + formatv("{0:x}", Value) + " for symbol " + *Name + " does not fall within section"); @@ -312,9 +311,8 @@ Error MachOLinkGraphBuilder::createNormalizedSymbols() { } void MachOLinkGraphBuilder::addSectionStartSymAndBlock( - unsigned SecIndex, Section &GraphSec, orc::ExecutorAddr Address, - const char *Data, orc::ExecutorAddrDiff Size, uint32_t Alignment, - bool IsLive) { + unsigned SecIndex, Section &GraphSec, uint64_t Address, const char *Data, + uint64_t Size, uint32_t Alignment, bool IsLive) { Block &B = Data ? G->createContentBlock(GraphSec, ArrayRef(Data, Size), Address, Alignment, 0) @@ -348,8 +346,7 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { return make_error("Anonymous common symbol at index " + Twine(KV.first)); NSym.GraphSymbol = &G->addCommonSymbol( - *NSym.Name, NSym.S, getCommonSection(), orc::ExecutorAddr(), - orc::ExecutorAddrDiff(NSym.Value), + *NSym.Name, NSym.S, getCommonSection(), 0, NSym.Value, 1ull << MachO::GET_COMM_ALIGN(NSym.Desc), NSym.Desc & MachO::N_NO_DEAD_STRIP); } else { @@ -367,8 +364,8 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { return make_error("Anonymous absolute symbol at index " + Twine(KV.first)); NSym.GraphSymbol = &G->addAbsoluteSymbol( - *NSym.Name, orc::ExecutorAddr(NSym.Value), 0, Linkage::Strong, - Scope::Default, NSym.Desc & MachO::N_NO_DEAD_STRIP); + *NSym.Name, NSym.Value, 0, Linkage::Strong, Scope::Default, + NSym.Desc & MachO::N_NO_DEAD_STRIP); break; case MachO::N_SECT: SecIndexToSymbols[NSym.Sect - 1].push_back(&NSym); @@ -471,13 +468,13 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { // If the section is non-empty but there is no symbol covering the start // address then add an anonymous one. - if (orc::ExecutorAddr(SecNSymStack.back()->Value) != NSec.Address) { - auto AnonBlockSize = - orc::ExecutorAddr(SecNSymStack.back()->Value) - NSec.Address; + if (SecNSymStack.back()->Value != NSec.Address) { + auto AnonBlockSize = SecNSymStack.back()->Value - NSec.Address; LLVM_DEBUG({ dbgs() << " Section start not covered by symbol. " - << "Creating anonymous block to cover [ " << NSec.Address - << " -- " << (NSec.Address + AnonBlockSize) << " ]\n"; + << "Creating anonymous block to cover [ " + << formatv("{0:x16}", NSec.Address) << " -- " + << formatv("{0:x16}", NSec.Address + AnonBlockSize) << " ]\n"; }); addSectionStartSymAndBlock(SecIndex, *NSec.GraphSection, NSec.Address, NSec.Data, AnonBlockSize, NSec.Alignment, @@ -499,12 +496,12 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { } // BlockNSyms now contains the block symbols in reverse canonical order. - auto BlockStart = orc::ExecutorAddr(BlockSyms.front()->Value); - orc::ExecutorAddr BlockEnd = - SecNSymStack.empty() ? NSec.Address + NSec.Size - : orc::ExecutorAddr(SecNSymStack.back()->Value); - orc::ExecutorAddrDiff BlockOffset = BlockStart - NSec.Address; - orc::ExecutorAddrDiff BlockSize = BlockEnd - BlockStart; + JITTargetAddress BlockStart = BlockSyms.front()->Value; + JITTargetAddress BlockEnd = SecNSymStack.empty() + ? NSec.Address + NSec.Size + : SecNSymStack.back()->Value; + JITTargetAddress BlockOffset = BlockStart - NSec.Address; + JITTargetAddress BlockSize = BlockEnd - BlockStart; LLVM_DEBUG({ dbgs() << " Creating block for " << formatv("{0:x16}", BlockStart) @@ -524,8 +521,8 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { BlockStart, NSec.Alignment, BlockStart % NSec.Alignment); - Optional LastCanonicalAddr; - auto SymEnd = BlockEnd; + Optional LastCanonicalAddr; + JITTargetAddress SymEnd = BlockEnd; while (!BlockSyms.empty()) { auto &NSym = *BlockSyms.back(); BlockSyms.pop_back(); @@ -533,9 +530,9 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { bool SymLive = (NSym.Desc & MachO::N_NO_DEAD_STRIP) || SectionIsNoDeadStrip; - auto &Sym = createStandardGraphSymbol( - NSym, B, SymEnd - orc::ExecutorAddr(NSym.Value), SectionIsText, - SymLive, LastCanonicalAddr != orc::ExecutorAddr(NSym.Value)); + auto &Sym = createStandardGraphSymbol(NSym, B, SymEnd - NSym.Value, + SectionIsText, SymLive, + LastCanonicalAddr != NSym.Value); if (LastCanonicalAddr != Sym.getAddress()) { if (LastCanonicalAddr) @@ -571,12 +568,11 @@ Symbol &MachOLinkGraphBuilder::createStandardGraphSymbol(NormalizedSymbol &NSym, dbgs() << "\n"; }); - auto SymOffset = orc::ExecutorAddr(NSym.Value) - B.getAddress(); - auto &Sym = - NSym.Name - ? G->addDefinedSymbol(B, SymOffset, *NSym.Name, Size, NSym.L, NSym.S, - IsText, IsNoDeadStrip) - : G->addAnonymousSymbol(B, SymOffset, Size, IsText, IsNoDeadStrip); + auto &Sym = NSym.Name ? G->addDefinedSymbol(B, NSym.Value - B.getAddress(), + *NSym.Name, Size, NSym.L, NSym.S, + IsText, IsNoDeadStrip) + : G->addAnonymousSymbol(B, NSym.Value - B.getAddress(), + Size, IsText, IsNoDeadStrip); NSym.GraphSymbol = &Sym; if (IsCanonical) @@ -639,12 +635,12 @@ Error MachOLinkGraphBuilder::graphifyCStringSection( bool SectionIsNoDeadStrip = NSec.Flags & MachO::S_ATTR_NO_DEAD_STRIP; bool SectionIsText = NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS; - orc::ExecutorAddrDiff BlockStart = 0; + JITTargetAddress BlockStart = 0; // Scan section for null characters. for (size_t I = 0; I != NSec.Size; ++I) if (NSec.Data[I] == '\0') { - orc::ExecutorAddrDiff BlockEnd = I + 1; + JITTargetAddress BlockEnd = I + 1; size_t BlockSize = BlockEnd - BlockStart; // Create a block for this null terminated string. auto &B = G->createContentBlock(*NSec.GraphSection, @@ -658,8 +654,7 @@ Error MachOLinkGraphBuilder::graphifyCStringSection( }); // If there's no symbol at the start of this block then create one. - if (NSyms.empty() || - orc::ExecutorAddr(NSyms.back()->Value) != B.getAddress()) { + if (NSyms.empty() || NSyms.back()->Value != B.getAddress()) { auto &S = G->addAnonymousSymbol(B, 0, BlockSize, false, false); setCanonicalSymbol(NSec, S); LLVM_DEBUG({ @@ -671,19 +666,18 @@ Error MachOLinkGraphBuilder::graphifyCStringSection( } // Process any remaining symbols that point into this block. - auto LastCanonicalAddr = B.getAddress() + BlockEnd; - while (!NSyms.empty() && orc::ExecutorAddr(NSyms.back()->Value) < - B.getAddress() + BlockSize) { + JITTargetAddress LastCanonicalAddr = B.getAddress() + BlockEnd; + while (!NSyms.empty() && + NSyms.back()->Value < (B.getAddress() + BlockSize)) { auto &NSym = *NSyms.back(); - size_t SymSize = (B.getAddress() + BlockSize) - - orc::ExecutorAddr(NSyms.back()->Value); + size_t SymSize = (B.getAddress() + BlockSize) - NSyms.back()->Value; bool SymLive = (NSym.Desc & MachO::N_NO_DEAD_STRIP) || SectionIsNoDeadStrip; bool IsCanonical = false; - if (LastCanonicalAddr != orc::ExecutorAddr(NSym.Value)) { + if (LastCanonicalAddr != NSym.Value) { IsCanonical = true; - LastCanonicalAddr = orc::ExecutorAddr(NSym.Value); + LastCanonicalAddr = NSym.Value; } createStandardGraphSymbol(NSym, B, SymSize, SectionIsText, SymLive, diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h index 2951a8533098..d29732ebdba8 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h @@ -71,13 +71,13 @@ class MachOLinkGraphBuilder { public: char SectName[17]; char SegName[17]; - orc::ExecutorAddr Address; + uint64_t Address = 0; uint64_t Size = 0; uint64_t Alignment = 0; uint32_t Flags = 0; const char *Data = nullptr; Section *GraphSection = nullptr; - std::map CanonicalSymbols; + std::map CanonicalSymbols; }; using SectionParserFunction = std::function; @@ -137,7 +137,7 @@ class MachOLinkGraphBuilder { /// Returns the symbol with the highest address not greater than the search /// address, or null if no such symbol exists. Symbol *getSymbolByAddress(NormalizedSection &NSec, - orc::ExecutorAddr Address) { + JITTargetAddress Address) { auto I = NSec.CanonicalSymbols.upper_bound(Address); if (I == NSec.CanonicalSymbols.begin()) return nullptr; @@ -147,7 +147,7 @@ class MachOLinkGraphBuilder { /// Returns the symbol with the highest address not greater than the search /// address, or an error if no such symbol exists. Expected findSymbolByAddress(NormalizedSection &NSec, - orc::ExecutorAddr Address) { + JITTargetAddress Address) { auto *Sym = getSymbolByAddress(NSec, Address); if (Sym) if (Address <= Sym->getAddress() + Sym->getSize()) @@ -193,9 +193,9 @@ class MachOLinkGraphBuilder { Section &getCommonSection(); void addSectionStartSymAndBlock(unsigned SecIndex, Section &GraphSec, - orc::ExecutorAddr Address, const char *Data, - orc::ExecutorAddrDiff Size, - uint32_t Alignment, bool IsLive); + uint64_t Address, const char *Data, + uint64_t Size, uint32_t Alignment, + bool IsLive); Error createNormalizedSections(); Error createNormalizedSymbols(); diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp index 844e76ab0542..f2a029d35cd5 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp @@ -109,7 +109,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { Expected parsePairRelocation(Block &BlockToFix, Edge::Kind SubtractorKind, const MachO::relocation_info &SubRI, - orc::ExecutorAddr FixupAddress, const char *FixupContent, + JITTargetAddress FixupAddress, const char *FixupContent, object::relocation_iterator &UnsignedRelItr, object::relocation_iterator &RelEnd) { using namespace support; @@ -162,7 +162,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { return ToSymbolSec.takeError(); ToSymbol = getSymbolByAddress(*ToSymbolSec, ToSymbolSec->Address); assert(ToSymbol && "No symbol for section"); - FixupValue -= ToSymbol->getAddress().getValue(); + FixupValue -= ToSymbol->getAddress(); } MachOARM64RelocationKind DeltaKind; @@ -195,7 +195,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { for (auto &S : Obj.sections()) { - orc::ExecutorAddr SectionAddress(S.getAddress()); + JITTargetAddress SectionAddress = S.getAddress(); // Skip relocations virtual sections. if (S.isVirtual()) { @@ -234,8 +234,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { return Kind.takeError(); // Find the address of the value to fix up. - orc::ExecutorAddr FixupAddress = - SectionAddress + (uint32_t)RI.r_address; + JITTargetAddress FixupAddress = SectionAddress + (uint32_t)RI.r_address; LLVM_DEBUG({ dbgs() << " " << NSec->SectName << " + " << formatv("{0:x8}", RI.r_address) << ":\n"; @@ -250,7 +249,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { BlockToFix = &SymbolToFixOrErr->getBlock(); } - if (FixupAddress + orc::ExecutorAddrDiff(1ULL << RI.r_length) > + if (FixupAddress + static_cast(1ULL << RI.r_length) > BlockToFix->getAddress() + BlockToFix->getContent().size()) return make_error( "Relocation content extends past end of fixup block"); @@ -291,7 +290,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { }); // Find the address of the value to fix up. - orc::ExecutorAddr PairedFixupAddress = + JITTargetAddress PairedFixupAddress = SectionAddress + (uint32_t)RI.r_address; if (PairedFixupAddress != FixupAddress) return make_error("Paired relocation points at " @@ -325,7 +324,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { Addend = *(const ulittle64_t *)FixupContent; break; case Pointer64Anon: { - orc::ExecutorAddr TargetAddress(*(const ulittle64_t *)FixupContent); + JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent; auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1); if (!TargetNSec) return TargetNSec.takeError(); @@ -436,7 +435,7 @@ class PerGraphGOTAndPLTStubsBuilder_MachO_arm64 Symbol &createGOTEntry(Symbol &Target) { auto &GOTEntryBlock = G.createContentBlock( - getGOTSection(), getGOTEntryBlockContent(), orc::ExecutorAddr(), 8, 0); + getGOTSection(), getGOTEntryBlockContent(), 0, 8, 0); GOTEntryBlock.addEdge(Pointer64, 0, Target, 0); return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false); } @@ -458,8 +457,8 @@ class PerGraphGOTAndPLTStubsBuilder_MachO_arm64 } Symbol &createPLTStub(Symbol &Target) { - auto &StubContentBlock = G.createContentBlock( - getStubsSection(), getStubBlockContent(), orc::ExecutorAddr(), 1, 0); + auto &StubContentBlock = + G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 1, 0); // Re-use GOT entries for stub targets. auto &GOTEntrySymbol = getGOTEntry(Target); StubContentBlock.addEdge(LDRLiteral19, 0, GOTEntrySymbol, 0); @@ -546,12 +545,11 @@ class MachOJITLinker_arm64 : public JITLinker { char *BlockWorkingMem = B.getAlreadyMutableContent().data(); char *FixupPtr = BlockWorkingMem + E.getOffset(); - orc::ExecutorAddr FixupAddress = B.getAddress() + E.getOffset(); + JITTargetAddress FixupAddress = B.getAddress() + E.getOffset(); switch (E.getKind()) { case Branch26: { - assert((FixupAddress.getValue() & 0x3) == 0 && - "Branch-inst is not 32-bit aligned"); + assert((FixupAddress & 0x3) == 0 && "Branch-inst is not 32-bit aligned"); int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend(); @@ -571,7 +569,7 @@ class MachOJITLinker_arm64 : public JITLinker { break; } case Pointer32: { - uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); + uint64_t Value = E.getTarget().getAddress() + E.getAddend(); if (Value > std::numeric_limits::max()) return makeTargetOutOfRangeError(G, B, E); *(ulittle32_t *)FixupPtr = Value; @@ -579,7 +577,7 @@ class MachOJITLinker_arm64 : public JITLinker { } case Pointer64: case Pointer64Anon: { - uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); + uint64_t Value = E.getTarget().getAddress() + E.getAddend(); *(ulittle64_t *)FixupPtr = Value; break; } @@ -589,10 +587,9 @@ class MachOJITLinker_arm64 : public JITLinker { assert((E.getKind() != GOTPage21 || E.getAddend() == 0) && "GOTPAGE21 with non-zero addend"); uint64_t TargetPage = - (E.getTarget().getAddress().getValue() + E.getAddend()) & - ~static_cast(4096 - 1); - uint64_t PCPage = - FixupAddress.getValue() & ~static_cast(4096 - 1); + (E.getTarget().getAddress() + E.getAddend()) & + ~static_cast(4096 - 1); + uint64_t PCPage = FixupAddress & ~static_cast(4096 - 1); int64_t PageDelta = TargetPage - PCPage; if (PageDelta < -(1 << 30) || PageDelta > ((1 << 30) - 1)) @@ -609,7 +606,7 @@ class MachOJITLinker_arm64 : public JITLinker { } case PageOffset12: { uint64_t TargetOffset = - (E.getTarget().getAddress() + E.getAddend()).getValue() & 0xfff; + (E.getTarget().getAddress() + E.getAddend()) & 0xfff; uint32_t RawInstr = *(ulittle32_t *)FixupPtr; unsigned ImmShift = getPageOffset12Shift(RawInstr); @@ -630,7 +627,7 @@ class MachOJITLinker_arm64 : public JITLinker { assert((RawInstr & 0xfffffc00) == 0xf9400000 && "RawInstr isn't a 64-bit LDR immediate"); - uint32_t TargetOffset = E.getTarget().getAddress().getValue() & 0xfff; + uint32_t TargetOffset = E.getTarget().getAddress() & 0xfff; assert((TargetOffset & 0x7) == 0 && "GOT entry is not 8-byte aligned"); uint32_t EncodedImm = (TargetOffset >> 3) << 10; uint32_t FixedInstr = RawInstr | EncodedImm; @@ -638,8 +635,7 @@ class MachOJITLinker_arm64 : public JITLinker { break; } case LDRLiteral19: { - assert((FixupAddress.getValue() & 0x3) == 0 && - "LDR is not 32-bit aligned"); + assert((FixupAddress & 0x3) == 0 && "LDR is not 32-bit aligned"); assert(E.getAddend() == 0 && "LDRLiteral19 with non-zero addend"); uint32_t RawInstr = *(ulittle32_t *)FixupPtr; assert(RawInstr == 0x58000010 && "RawInstr isn't a 64-bit LDR literal"); diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp index 82afaa3aa3c5..a4fcd3b9a5f5 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp @@ -119,7 +119,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { // returns the edge kind and addend to be used. Expected parsePairRelocation( Block &BlockToFix, MachONormalizedRelocationType SubtractorKind, - const MachO::relocation_info &SubRI, orc::ExecutorAddr FixupAddress, + const MachO::relocation_info &SubRI, JITTargetAddress FixupAddress, const char *FixupContent, object::relocation_iterator &UnsignedRelItr, object::relocation_iterator &RelEnd) { using namespace support; @@ -172,7 +172,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { return ToSymbolSec.takeError(); ToSymbol = getSymbolByAddress(*ToSymbolSec, ToSymbolSec->Address); assert(ToSymbol && "No symbol for section"); - FixupValue -= ToSymbol->getAddress().getValue(); + FixupValue -= ToSymbol->getAddress(); } Edge::Kind DeltaKind; @@ -206,7 +206,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { for (auto &S : Obj.sections()) { - orc::ExecutorAddr SectionAddress(S.getAddress()); + JITTargetAddress SectionAddress = S.getAddress(); // Skip relocations virtual sections. if (S.isVirtual()) { @@ -241,7 +241,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { MachO::relocation_info RI = getRelocationInfo(RelItr); // Find the address of the value to fix up. - auto FixupAddress = SectionAddress + (uint32_t)RI.r_address; + JITTargetAddress FixupAddress = SectionAddress + (uint32_t)RI.r_address; LLVM_DEBUG({ dbgs() << " " << NSec->SectName << " + " @@ -257,7 +257,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { BlockToFix = &SymbolToFixOrErr->getBlock(); } - if (FixupAddress + orc::ExecutorAddrDiff(1ULL << RI.r_length) > + if (FixupAddress + static_cast(1ULL << RI.r_length) > BlockToFix->getAddress() + BlockToFix->getContent().size()) return make_error( "Relocation extends past end of fixup block"); @@ -343,7 +343,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { Kind = x86_64::Pointer64; break; case MachOPointer64Anon: { - orc::ExecutorAddr TargetAddress(*(const ulittle64_t *)FixupContent); + JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent; auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1); if (!TargetNSec) return TargetNSec.takeError(); @@ -367,8 +367,8 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { Kind = x86_64::Delta32; break; case MachOPCRel32Anon: { - orc::ExecutorAddr TargetAddress(FixupAddress + 4 + - *(const little32_t *)FixupContent); + JITTargetAddress TargetAddress = + FixupAddress + 4 + *(const little32_t *)FixupContent; auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1); if (!TargetNSec) return TargetNSec.takeError(); @@ -384,10 +384,10 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { case MachOPCRel32Minus1Anon: case MachOPCRel32Minus2Anon: case MachOPCRel32Minus4Anon: { - orc::ExecutorAddrDiff Delta = - 4 + orc::ExecutorAddrDiff( + JITTargetAddress Delta = + 4 + static_cast( 1ULL << (*MachORelocKind - MachOPCRel32Minus1Anon)); - orc::ExecutorAddr TargetAddress = + JITTargetAddress TargetAddress = FixupAddress + Delta + *(const little32_t *)FixupContent; auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1); if (!TargetNSec) diff --git a/llvm/lib/ExecutionEngine/JITLink/PerGraphGOTAndPLTStubsBuilder.h b/llvm/lib/ExecutionEngine/JITLink/PerGraphGOTAndPLTStubsBuilder.h index 6e325f92bafb..6e9df9c75a65 100644 --- a/llvm/lib/ExecutionEngine/JITLink/PerGraphGOTAndPLTStubsBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/PerGraphGOTAndPLTStubsBuilder.h @@ -47,16 +47,16 @@ class PerGraphGOTAndPLTStubsBuilder { if (impl().isGOTEdgeToFix(E)) { LLVM_DEBUG({ dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) - << " edge at " << B->getFixupAddress(E) << " (" - << B->getAddress() << " + " + << " edge at " << formatv("{0:x}", B->getFixupAddress(E)) + << " (" << formatv("{0:x}", B->getAddress()) << " + " << formatv("{0:x}", E.getOffset()) << ")\n"; }); impl().fixGOTEdge(E, getGOTEntry(E.getTarget())); } else if (impl().isExternalBranchEdge(E)) { LLVM_DEBUG({ dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) - << " edge at " << B->getFixupAddress(E) << " (" - << B->getAddress() << " + " + << " edge at " << formatv("{0:x}", B->getFixupAddress(E)) + << " (" << formatv("{0:x}", B->getAddress()) << " + " << formatv("{0:x}", E.getOffset()) << ")\n"; }); impl().fixPLTEdge(E, getPLTStub(E.getTarget())); diff --git a/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp index df9979b47e88..48521280059d 100644 --- a/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp @@ -95,10 +95,10 @@ Error optimizeGOTAndStubAccesses(LinkGraph &G) { assert(GOTEntryBlock.edges_size() == 1 && "GOT entry should only have one outgoing edge"); auto &GOTTarget = GOTEntryBlock.edges().begin()->getTarget(); - orc::ExecutorAddr TargetAddr = GOTTarget.getAddress(); - orc::ExecutorAddr EdgeAddr = B->getFixupAddress(E); + JITTargetAddress TargetAddr = GOTTarget.getAddress(); + JITTargetAddress EdgeAddr = B->getFixupAddress(E); int64_t Displacement = TargetAddr - EdgeAddr + 4; - bool TargetInRangeForImmU32 = isInRangeForImmU32(TargetAddr.getValue()); + bool TargetInRangeForImmU32 = isInRangeForImmU32(TargetAddr); bool DisplacementInRangeForImmS32 = isInRangeForImmS32(Displacement); // If both of the Target and displacement is out of range, then @@ -165,8 +165,8 @@ Error optimizeGOTAndStubAccesses(LinkGraph &G) { "GOT block should only have one outgoing edge"); auto &GOTTarget = GOTBlock.edges().begin()->getTarget(); - orc::ExecutorAddr EdgeAddr = B->getAddress() + E.getOffset(); - orc::ExecutorAddr TargetAddr = GOTTarget.getAddress(); + JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset(); + JITTargetAddress TargetAddr = GOTTarget.getAddress(); int64_t Displacement = TargetAddr - EdgeAddr + 4; if (isInRangeForImmS32(Displacement)) { diff --git a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp index 4ff6b7fd54df..fcfe389f82a8 100644 --- a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp +++ b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp @@ -67,9 +67,9 @@ class ELFDebugObjectSection : public DebugObjectSection { template void ELFDebugObjectSection::setTargetMemoryRange(SectionRange Range) { // Only patch load-addresses for executable and data sections. - if (isTextOrDataSection()) - Header->sh_addr = - static_cast(Range.getStart().getValue()); + if (isTextOrDataSection()) { + Header->sh_addr = static_cast(Range.getStart()); + } } template diff --git a/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp index 92657805efdd..fe62138c790c 100644 --- a/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp +++ b/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp @@ -129,8 +129,8 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { Section *Sec = nullptr; StringRef SegName; StringRef SecName; - uint64_t Alignment = 0; - orc::ExecutorAddr StartAddr; + JITTargetAddress Alignment = 0; + JITTargetAddress StartAddr = 0; uint64_t Size = 0; }; @@ -153,8 +153,7 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { return Error::success(); } DebugSecInfos.push_back({&Sec, Sec.getName().substr(0, SepPos), - Sec.getName().substr(SepPos + 1), 0, - orc::ExecutorAddr(), 0}); + Sec.getName().substr(SepPos + 1), 0, 0}); } else { NonDebugSections.push_back(&Sec); @@ -183,11 +182,11 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { size_t ContainerBlockSize = sizeof(typename MachOTraits::Header) + SegmentLCSize; auto ContainerBlockContent = G.allocateBuffer(ContainerBlockSize); - MachOContainerBlock = &G.createMutableContentBlock( - SDOSec, ContainerBlockContent, orc::ExecutorAddr(), 8, 0); + MachOContainerBlock = + &G.createMutableContentBlock(SDOSec, ContainerBlockContent, 0, 8, 0); // Copy debug section blocks and symbols. - orc::ExecutorAddr NextBlockAddr(MachOContainerBlock->getSize()); + JITTargetAddress NextBlockAddr = MachOContainerBlock->getSize(); for (auto &SI : DebugSecInfos) { assert(!llvm::empty(SI.Sec->blocks()) && "Empty debug info section?"); @@ -220,8 +219,7 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { G.mergeSections(SDOSec, *SI.Sec); SI.Sec = nullptr; } - size_t DebugSectionsSize = - NextBlockAddr - orc::ExecutorAddr(MachOContainerBlock->getSize()); + size_t DebugSectionsSize = NextBlockAddr - MachOContainerBlock->getSize(); // Write MachO header and debug section load commands. MachOStructWriter Writer(MachOContainerBlock->getAlreadyMutableContent()); @@ -268,9 +266,9 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { memset(&Sec, 0, sizeof(Sec)); memcpy(Sec.sectname, SI.SecName.data(), SI.SecName.size()); memcpy(Sec.segname, SI.SegName.data(), SI.SegName.size()); - Sec.addr = SI.StartAddr.getValue(); + Sec.addr = SI.StartAddr; Sec.size = SI.Size; - Sec.offset = SI.StartAddr.getValue(); + Sec.offset = SI.StartAddr; Sec.align = SI.Alignment; Sec.reloff = 0; Sec.nreloc = 0; @@ -338,7 +336,7 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { memset(&SecCmd, 0, sizeof(SecCmd)); memcpy(SecCmd.sectname, SecName.data(), SecName.size()); memcpy(SecCmd.segname, SegName.data(), SegName.size()); - SecCmd.addr = R.getStart().getValue(); + SecCmd.addr = R.getStart(); SecCmd.size = R.getSize(); SecCmd.offset = 0; SecCmd.align = R.getFirstBlock()->getAlignment(); @@ -350,7 +348,7 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { SectionRange R(MachOContainerBlock->getSection()); G.allocActions().push_back( - {{RegisterActionAddr, R.getStart(), R.getSize()}, {}}); + {{RegisterActionAddr.getValue(), R.getStart(), R.getSize()}, {}}); return Error::success(); } diff --git a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp index e25d7c4651a9..eded54f4bfb3 100644 --- a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp @@ -58,8 +58,7 @@ class DSOHandleMaterializationUnit : public MaterializationUnit { auto &DSOHandleSection = G->createSection(".data.__dso_handle", jitlink::MemProt::Read); auto &DSOHandleBlock = G->createContentBlock( - DSOHandleSection, getDSOHandleContent(PointerSize), orc::ExecutorAddr(), - 8, 0); + DSOHandleSection, getDSOHandleContent(PointerSize), 0, 8, 0); auto &DSOHandleSymbol = G->addDefinedSymbol( DSOHandleBlock, 0, *R->getInitializerSymbol(), DSOHandleBlock.getSize(), jitlink::Linkage::Strong, jitlink::Scope::Default, false, true); @@ -376,7 +375,7 @@ void ELFNixPlatform::rt_getDeinitializers( { std::lock_guard Lock(PlatformMutex); - auto I = HandleAddrToJITDylib.find(Handle); + auto I = HandleAddrToJITDylib.find(Handle.getValue()); if (I != HandleAddrToJITDylib.end()) JD = I->second; } @@ -407,7 +406,7 @@ void ELFNixPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult, { std::lock_guard Lock(PlatformMutex); - auto I = HandleAddrToJITDylib.find(Handle); + auto I = HandleAddrToJITDylib.find(Handle.getValue()); if (I != HandleAddrToJITDylib.end()) JD = I->second; } @@ -631,11 +630,12 @@ void ELFNixPlatform::ELFNixPlatformPlugin::addDSOHandleSupportPasses( assert(I != G.defined_symbols().end() && "Missing DSO handle symbol"); { std::lock_guard Lock(MP.PlatformMutex); - auto HandleAddr = (*I)->getAddress(); + JITTargetAddress HandleAddr = (*I)->getAddress(); MP.HandleAddrToJITDylib[HandleAddr] = &JD; assert(!MP.InitSeqs.count(&JD) && "InitSeq entry for JD already exists"); MP.InitSeqs.insert(std::make_pair( - &JD, ELFNixJITDylibInitializers(JD.getName(), HandleAddr))); + &JD, + ELFNixJITDylibInitializers(JD.getName(), ExecutorAddr(HandleAddr)))); } return Error::success(); }); diff --git a/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp b/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp index 99cacd1731a2..4c0fab8aa9fa 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp @@ -56,17 +56,17 @@ EPCEHFrameRegistrar::Create(ExecutionSession &ES) { ExecutorAddr(DeregisterEHFrameWrapperFnAddr)); } -Error EPCEHFrameRegistrar::registerEHFrames(ExecutorAddr EHFrameSectionAddr, +Error EPCEHFrameRegistrar::registerEHFrames(JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) { return ES.callSPSWrapper( - RegisterEHFrameWrapperFnAddr, EHFrameSectionAddr, + RegisterEHFrameWrapperFnAddr, ExecutorAddr(EHFrameSectionAddr), static_cast(EHFrameSectionSize)); } -Error EPCEHFrameRegistrar::deregisterEHFrames(ExecutorAddr EHFrameSectionAddr, - size_t EHFrameSectionSize) { +Error EPCEHFrameRegistrar::deregisterEHFrames( + JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) { return ES.callSPSWrapper( - DeregisterEHFrameWrapperFnAddr, EHFrameSectionAddr, + DeregisterEHFrameWrapperFnAddr, ExecutorAddr(EHFrameSectionAddr), static_cast(EHFrameSectionSize)); } diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp index 247be794ad56..9b712cb8f7ca 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp @@ -80,7 +80,7 @@ class EPCGenericJITLinkMemoryManager::InFlightAlloc } else if (FinalizeErr) OnFinalize(std::move(FinalizeErr)); else - OnFinalize(FinalizedAlloc(AllocAddr)); + OnFinalize(FinalizedAlloc(AllocAddr.getValue())); }, Parent.SAs.Allocator, std::move(FR)); } @@ -161,7 +161,7 @@ void EPCGenericJITLinkMemoryManager::completeAllocation( const auto &AG = KV.first; auto &Seg = KV.second; - Seg.Addr = NextSegAddr; + Seg.Addr = NextSegAddr.getValue(); KV.second.WorkingMem = BL.getGraph().allocateBuffer(Seg.ContentSize).data(); NextSegAddr += ExecutorAddrDiff( alignTo(Seg.ContentSize + Seg.ZeroFillSize, EPC.getPageSize())); diff --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp index b901a2d2da23..818b6b52ff83 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp @@ -119,12 +119,10 @@ Error EPCTrampolinePool::grow() { unsigned NumTrampolines = TrampolinesPerPage; auto SegInfo = Alloc->getSegInfo(MemProt::Read | MemProt::Exec); - EPCIU.getABISupport().writeTrampolines(SegInfo.WorkingMem.data(), - SegInfo.Addr.getValue(), - ResolverAddress, NumTrampolines); + EPCIU.getABISupport().writeTrampolines( + SegInfo.WorkingMem.data(), SegInfo.Addr, ResolverAddress, NumTrampolines); for (unsigned I = 0; I < NumTrampolines; ++I) - AvailableTrampolines.push_back(SegInfo.Addr.getValue() + - (I * TrampolineSize)); + AvailableTrampolines.push_back(SegInfo.Addr + (I * TrampolineSize)); auto FA = Alloc->finalize(); if (!FA) @@ -302,15 +300,15 @@ EPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr, return Alloc.takeError(); auto SegInfo = Alloc->getSegInfo(MemProt::Read | MemProt::Exec); - ABI->writeResolverCode(SegInfo.WorkingMem.data(), SegInfo.Addr.getValue(), - ReentryFnAddr, ReentryCtxAddr); + ABI->writeResolverCode(SegInfo.WorkingMem.data(), SegInfo.Addr, ReentryFnAddr, + ReentryCtxAddr); auto FA = Alloc->finalize(); if (!FA) return FA.takeError(); ResolverBlock = std::move(*FA); - return SegInfo.Addr.getValue(); + return SegInfo.Addr; } std::unique_ptr @@ -371,9 +369,8 @@ EPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) { auto StubSeg = Alloc->getSegInfo(StubProt); auto PtrSeg = Alloc->getSegInfo(PtrProt); - ABI->writeIndirectStubsBlock(StubSeg.WorkingMem.data(), - StubSeg.Addr.getValue(), - PtrSeg.Addr.getValue(), NumStubsToAllocate); + ABI->writeIndirectStubsBlock(StubSeg.WorkingMem.data(), StubSeg.Addr, + PtrSeg.Addr, NumStubsToAllocate); auto FA = Alloc->finalize(); if (!FA) @@ -384,8 +381,8 @@ EPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) { auto StubExecutorAddr = StubSeg.Addr; auto PtrExecutorAddr = PtrSeg.Addr; for (unsigned I = 0; I != NumStubsToAllocate; ++I) { - AvailableIndirectStubs.push_back(IndirectStubInfo( - StubExecutorAddr.getValue(), PtrExecutorAddr.getValue())); + AvailableIndirectStubs.push_back( + IndirectStubInfo(StubExecutorAddr, PtrExecutorAddr)); StubExecutorAddr += ABI->getStubSize(); PtrExecutorAddr += ABI->getPointerSize(); } diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index 7a71d2f781d7..f427271bb45d 100644 --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -410,7 +410,7 @@ Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym, while (I < Content.size()) { MCInst Instr; uint64_t InstrSize = 0; - uint64_t InstrStart = SymAddress.getValue() + I; + uint64_t InstrStart = SymAddress + I; auto DecodeStatus = Disassembler.getInstruction( Instr, InstrSize, Content.drop_front(I), InstrStart, CommentStream); if (DecodeStatus != MCDisassembler::Success) { @@ -426,7 +426,7 @@ Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym, // Check for a PC-relative address equal to the symbol itself. auto PCRelAddr = MIA.evaluateMemoryOperandAddress(Instr, &STI, InstrStart, InstrSize); - if (!PCRelAddr || *PCRelAddr != SymAddress.getValue()) + if (!PCRelAddr.hasValue() || PCRelAddr.getValue() != SymAddress) continue; auto RelocOffInInstr = @@ -438,8 +438,8 @@ Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym, continue; } - auto RelocOffInBlock = orc::ExecutorAddr(InstrStart) + *RelocOffInInstr - - SymAddress + Sym.getOffset(); + auto RelocOffInBlock = + InstrStart + *RelocOffInInstr - SymAddress + Sym.getOffset(); if (ExistingRelocations.contains(RelocOffInBlock)) continue; diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp index ab978ed3f3fc..fb2e90e1c9c5 100644 --- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp @@ -106,8 +106,7 @@ class MachOHeaderMaterializationUnit : public MaterializationUnit { auto HeaderContent = G.allocateString( StringRef(reinterpret_cast(&Hdr), sizeof(Hdr))); - return G.createContentBlock(HeaderSection, HeaderContent, - orc::ExecutorAddr(), 8, 0); + return G.createContentBlock(HeaderSection, HeaderContent, 0, 8, 0); } static MaterializationUnit::Interface @@ -440,7 +439,7 @@ void MachOPlatform::rt_getDeinitializers(SendDeinitializerSequenceFn SendResult, { std::lock_guard Lock(PlatformMutex); - auto I = HeaderAddrToJITDylib.find(Handle); + auto I = HeaderAddrToJITDylib.find(Handle.getValue()); if (I != HeaderAddrToJITDylib.end()) JD = I->second; } @@ -470,7 +469,7 @@ void MachOPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult, { std::lock_guard Lock(PlatformMutex); - auto I = HeaderAddrToJITDylib.find(Handle); + auto I = HeaderAddrToJITDylib.find(Handle.getValue()); if (I != HeaderAddrToJITDylib.end()) JD = I->second; } @@ -662,11 +661,11 @@ Error MachOPlatform::MachOPlatformPlugin::associateJITDylibHeaderSymbol( auto &JD = MR.getTargetJITDylib(); std::lock_guard Lock(MP.PlatformMutex); - auto HeaderAddr = (*I)->getAddress(); + JITTargetAddress HeaderAddr = (*I)->getAddress(); MP.HeaderAddrToJITDylib[HeaderAddr] = &JD; assert(!MP.InitSeqs.count(&JD) && "InitSeq entry for JD already exists"); - MP.InitSeqs.insert( - std::make_pair(&JD, MachOJITDylibInitializers(JD.getName(), HeaderAddr))); + MP.InitSeqs.insert(std::make_pair( + &JD, MachOJITDylibInitializers(JD.getName(), ExecutorAddr(HeaderAddr)))); return Error::success(); } @@ -793,7 +792,7 @@ Error MachOPlatform::MachOPlatformPlugin::registerInitSections( if (auto *ObjCImageInfoSec = G.findSectionByName(ObjCImageInfoSectionName)) { if (auto Addr = jitlink::SectionRange(*ObjCImageInfoSec).getStart()) - ObjCImageInfoAddr = Addr; + ObjCImageInfoAddr.setValue(Addr); } for (auto InitSectionName : InitSectionNames) @@ -880,10 +879,11 @@ Error MachOPlatform::MachOPlatformPlugin::registerEHAndTLVSections( if (auto *EHFrameSection = G.findSectionByName(EHFrameSectionName)) { jitlink::SectionRange R(*EHFrameSection); if (!R.empty()) - G.allocActions().push_back({{MP.orc_rt_macho_register_ehframe_section, - R.getStart(), R.getSize()}, - {MP.orc_rt_macho_deregister_ehframe_section, - R.getStart(), R.getSize()}}); + G.allocActions().push_back( + {{MP.orc_rt_macho_register_ehframe_section.getValue(), R.getStart(), + R.getSize()}, + {MP.orc_rt_macho_deregister_ehframe_section.getValue(), R.getStart(), + R.getSize()}}); } // Get a pointer to the thread data section if there is one. It will be used @@ -913,10 +913,10 @@ Error MachOPlatform::MachOPlatformPlugin::registerEHAndTLVSections( inconvertibleErrorCode()); G.allocActions().push_back( - {{MP.orc_rt_macho_register_thread_data_section, R.getStart(), - R.getSize()}, - {MP.orc_rt_macho_deregister_thread_data_section, R.getStart(), - R.getSize()}}); + {{MP.orc_rt_macho_register_thread_data_section.getValue(), + R.getStart(), R.getSize()}, + {MP.orc_rt_macho_deregister_thread_data_section.getValue(), + R.getStart(), R.getSize()}}); } } return Error::success(); @@ -963,8 +963,10 @@ Error MachOPlatform::MachOPlatformPlugin::registerEHSectionsPhase1( // Otherwise, add allocation actions to the graph to register eh-frames for // this object. G.allocActions().push_back( - {{orc_rt_macho_register_ehframe_section, R.getStart(), R.getSize()}, - {orc_rt_macho_deregister_ehframe_section, R.getStart(), R.getSize()}}); + {{orc_rt_macho_register_ehframe_section.getValue(), R.getStart(), + R.getSize()}, + {orc_rt_macho_deregister_ehframe_section.getValue(), R.getStart(), + R.getSize()}}); return Error::success(); } diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index 8b4347f5cf52..0d6a33c5685e 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -217,7 +217,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { Flags |= JITSymbolFlags::Exported; InternedResult[InternedName] = - JITEvaluatedSymbol(Sym->getAddress().getValue(), Flags); + JITEvaluatedSymbol(Sym->getAddress(), Flags); if (AutoClaim && !MR->getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); @@ -235,7 +235,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { if (Sym->getLinkage() == Linkage::Weak) Flags |= JITSymbolFlags::Weak; InternedResult[InternedName] = - JITEvaluatedSymbol(Sym->getAddress().getValue(), Flags); + JITEvaluatedSymbol(Sym->getAddress(), Flags); if (AutoClaim && !MR->getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); @@ -743,7 +743,7 @@ void EHFrameRegistrationPlugin::modifyPassConfig( PassConfiguration &PassConfig) { PassConfig.PostFixupPasses.push_back(createEHFrameRecorderPass( - G.getTargetTriple(), [this, &MR](ExecutorAddr Addr, size_t Size) { + G.getTargetTriple(), [this, &MR](JITTargetAddress Addr, size_t Size) { if (Addr) { std::lock_guard Lock(EHFramePluginMutex); assert(!InProcessLinks.count(&MR) && diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp index d79dbc410e8e..5efdff65f566 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp @@ -120,8 +120,8 @@ Error registerELFGraphInfo(Session &S, LinkGraph &G) { // then add it to the GOT entry info table. if (Sym->getSize() != 0) { if (auto TS = getELFGOTTarget(G, Sym->getBlock())) - FileInfo.GOTEntryInfos[TS->getName()] = { - Sym->getSymbolContent(), Sym->getAddress().getValue()}; + FileInfo.GOTEntryInfos[TS->getName()] = {Sym->getSymbolContent(), + Sym->getAddress()}; else return TS.takeError(); } @@ -133,7 +133,7 @@ Error registerELFGraphInfo(Session &S, LinkGraph &G) { if (auto TS = getELFStubTarget(G, Sym->getBlock())) FileInfo.StubInfos[TS->getName()] = {Sym->getSymbolContent(), - Sym->getAddress().getValue()}; + Sym->getAddress()}; else return TS.takeError(); SectionContainsContent = true; @@ -141,19 +141,18 @@ Error registerELFGraphInfo(Session &S, LinkGraph &G) { if (Sym->hasName()) { if (Sym->isSymbolZeroFill()) { - S.SymbolInfos[Sym->getName()] = {Sym->getSize(), - Sym->getAddress().getValue()}; + S.SymbolInfos[Sym->getName()] = {Sym->getSize(), Sym->getAddress()}; SectionContainsZeroFill = true; } else { S.SymbolInfos[Sym->getName()] = {Sym->getSymbolContent(), - Sym->getAddress().getValue()}; + Sym->getAddress()}; SectionContainsContent = true; } } } - auto SecAddr = FirstSym->getAddress(); - auto SecSize = + JITTargetAddress SecAddr = FirstSym->getAddress(); + uint64_t SecSize = (LastSym->getBlock().getAddress() + LastSym->getBlock().getSize()) - SecAddr; @@ -162,11 +161,11 @@ Error registerELFGraphInfo(Session &S, LinkGraph &G) { "supported yet", inconvertibleErrorCode()); if (SectionContainsZeroFill) - FileInfo.SectionInfos[Sec.getName()] = {SecSize, SecAddr.getValue()}; + FileInfo.SectionInfos[Sec.getName()] = {SecSize, SecAddr}; else FileInfo.SectionInfos[Sec.getName()] = { ArrayRef(FirstSym->getBlock().getContent().data(), SecSize), - SecAddr.getValue()}; + SecAddr}; } return Error::success(); diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink-macho.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink-macho.cpp index ed7fd1a57a72..7bd6bded5b7f 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink-macho.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink-macho.cpp @@ -118,8 +118,8 @@ Error registerMachOGraphInfo(Session &S, LinkGraph &G) { inconvertibleErrorCode()); if (auto TS = getMachOGOTTarget(G, Sym->getBlock())) - FileInfo.GOTEntryInfos[TS->getName()] = { - Sym->getSymbolContent(), Sym->getAddress().getValue()}; + FileInfo.GOTEntryInfos[TS->getName()] = {Sym->getSymbolContent(), + Sym->getAddress()}; else return TS.takeError(); SectionContainsContent = true; @@ -130,25 +130,24 @@ Error registerMachOGraphInfo(Session &S, LinkGraph &G) { if (auto TS = getMachOStubTarget(G, Sym->getBlock())) FileInfo.StubInfos[TS->getName()] = {Sym->getSymbolContent(), - Sym->getAddress().getValue()}; + Sym->getAddress()}; else return TS.takeError(); SectionContainsContent = true; } else if (Sym->hasName()) { if (Sym->isSymbolZeroFill()) { - S.SymbolInfos[Sym->getName()] = {Sym->getSize(), - Sym->getAddress().getValue()}; + S.SymbolInfos[Sym->getName()] = {Sym->getSize(), Sym->getAddress()}; SectionContainsZeroFill = true; } else { S.SymbolInfos[Sym->getName()] = {Sym->getSymbolContent(), - Sym->getAddress().getValue()}; + Sym->getAddress()}; SectionContainsContent = true; } } } - auto SecAddr = FirstSym->getAddress(); - auto SecSize = + JITTargetAddress SecAddr = FirstSym->getAddress(); + uint64_t SecSize = (LastSym->getBlock().getAddress() + LastSym->getBlock().getSize()) - SecAddr; @@ -157,11 +156,11 @@ Error registerMachOGraphInfo(Session &S, LinkGraph &G) { "supported yet", inconvertibleErrorCode()); if (SectionContainsZeroFill) - FileInfo.SectionInfos[Sec.getName()] = {SecSize, SecAddr.getValue()}; + FileInfo.SectionInfos[Sec.getName()] = {SecSize, SecAddr}; else FileInfo.SectionInfos[Sec.getName()] = { ArrayRef(FirstSym->getBlock().getContent().data(), SecSize), - SecAddr.getValue()}; + SecAddr}; } return Error::success(); diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index 7678a85b836f..e6588090625e 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -327,7 +327,7 @@ static uint64_t computeTotalBlockSizes(LinkGraph &G) { } static void dumpSectionContents(raw_ostream &OS, LinkGraph &G) { - constexpr orc::ExecutorAddrDiff DumpWidth = 16; + constexpr JITTargetAddress DumpWidth = 16; static_assert(isPowerOf2_64(DumpWidth), "DumpWidth must be a power of two"); // Put sections in address order. @@ -360,13 +360,12 @@ static void dumpSectionContents(raw_ostream &OS, LinkGraph &G) { return LHS->getAddress() < RHS->getAddress(); }); - orc::ExecutorAddr NextAddr(Syms.front()->getAddress().getValue() & - ~(DumpWidth - 1)); + JITTargetAddress NextAddr = Syms.front()->getAddress() & ~(DumpWidth - 1); for (auto *Sym : Syms) { bool IsZeroFill = Sym->getBlock().isZeroFill(); - auto SymStart = Sym->getAddress(); - auto SymSize = Sym->getSize(); - auto SymEnd = SymStart + SymSize; + JITTargetAddress SymStart = Sym->getAddress(); + JITTargetAddress SymSize = Sym->getSize(); + JITTargetAddress SymEnd = SymStart + SymSize; const uint8_t *SymData = IsZeroFill ? nullptr : reinterpret_cast( Sym->getSymbolContent().data()); @@ -434,8 +433,8 @@ class JITLinkSlabAllocator final : public JITLinkMemoryManager { assert(BL.graphAllocActions().empty() && "Support function calls not supported yet"); - OnFinalized( - FinalizedAlloc(ExecutorAddr::fromPtr(new FinalizedAllocInfo()))); + OnFinalized(FinalizedAlloc( + pointerToJITTargetAddress(new FinalizedAllocInfo()))); } void abandon(OnAbandonedFunction OnAbandoned) override { @@ -501,8 +500,8 @@ class JITLinkSlabAllocator final : public JITLinkMemoryManager { sys::MemoryBlock FinalizeSegs(AllocBase + SegsSizes->StandardSegs, SegsSizes->FinalizeSegs); - auto NextStandardSegAddr = ExecutorAddr::fromPtr(StandardSegs.base()); - auto NextFinalizeSegAddr = ExecutorAddr::fromPtr(FinalizeSegs.base()); + auto NextStandardSegAddr = pointerToJITTargetAddress(StandardSegs.base()); + auto NextFinalizeSegAddr = pointerToJITTargetAddress(FinalizeSegs.base()); LLVM_DEBUG({ dbgs() << "JITLinkSlabAllocator allocated:\n"; @@ -533,7 +532,7 @@ class JITLinkSlabAllocator final : public JITLinkMemoryManager { dbgs() << " " << Group << " -> " << formatv("{0:x16}", SegAddr) << "\n"; }); - Seg.WorkingMem = SegAddr.toPtr(); + Seg.WorkingMem = jitTargetAddressToPointer(SegAddr); Seg.Addr = SegAddr + NextSlabDelta; SegAddr += alignTo(Seg.ContentSize + Seg.ZeroFillSize, PageSize); @@ -560,7 +559,7 @@ class JITLinkSlabAllocator final : public JITLinkMemoryManager { Error Err = Error::success(); for (auto &FA : FinalizedAllocs) { std::unique_ptr FAI( - FA.release().toPtr()); + jitTargetAddressToPointer(FA.release())); // FIXME: Run dealloc actions. @@ -614,8 +613,8 @@ class JITLinkSlabAllocator final : public JITLinkMemoryManager { // Calculate the target address delta to link as-if slab were at // SlabAddress. if (SlabAddress != ~0ULL) - NextSlabDelta = ExecutorAddr(SlabAddress) - - ExecutorAddr::fromPtr(SlabRemaining.base()); + NextSlabDelta = + SlabAddress - pointerToJITTargetAddress(SlabRemaining.base()); } Error freeBlock(sys::MemoryBlock MB) { diff --git a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp index fdc6fbdff19b..3cc6a8ad0fe6 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp +++ b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp @@ -76,16 +76,15 @@ TEST(LinkGraphTest, AddressAccess) { getGenericEdgeKindName); auto &Sec1 = G.createSection("__data.1", MemProt::Read | MemProt::Write); - orc::ExecutorAddr B1Addr(0x1000); - auto &B1 = G.createContentBlock(Sec1, BlockContent, B1Addr, 8, 0); + auto &B1 = G.createContentBlock(Sec1, BlockContent, 0x1000, 8, 0); auto &S1 = G.addDefinedSymbol(B1, 4, "S1", 4, Linkage::Strong, Scope::Default, false, false); B1.addEdge(Edge::FirstRelocation, 8, S1, 0); auto &E1 = *B1.edges().begin(); - EXPECT_EQ(B1.getAddress(), B1Addr) << "Incorrect block address"; - EXPECT_EQ(S1.getAddress(), B1Addr + 4) << "Incorrect symbol address"; - EXPECT_EQ(B1.getFixupAddress(E1), B1Addr + 8) << "Incorrect fixup address"; + EXPECT_EQ(B1.getAddress(), 0x1000U) << "Incorrect block address"; + EXPECT_EQ(S1.getAddress(), 0x1004U) << "Incorrect symbol address"; + EXPECT_EQ(B1.getFixupAddress(E1), 0x1008U) << "Incorrect fixup address"; } TEST(LinkGraphTest, BlockAndSymbolIteration) { @@ -93,20 +92,16 @@ TEST(LinkGraphTest, BlockAndSymbolIteration) { LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, getGenericEdgeKindName); auto &Sec1 = G.createSection("__data.1", MemProt::Read | MemProt::Write); - orc::ExecutorAddr B1Addr(0x1000); - auto &B1 = G.createContentBlock(Sec1, BlockContent, B1Addr, 8, 0); - orc::ExecutorAddr B2Addr(0x1000); - auto &B2 = G.createContentBlock(Sec1, BlockContent, B2Addr, 8, 0); + auto &B1 = G.createContentBlock(Sec1, BlockContent, 0x1000, 8, 0); + auto &B2 = G.createContentBlock(Sec1, BlockContent, 0x2000, 8, 0); auto &S1 = G.addDefinedSymbol(B1, 0, "S1", 4, Linkage::Strong, Scope::Default, false, false); auto &S2 = G.addDefinedSymbol(B2, 4, "S2", 4, Linkage::Strong, Scope::Default, false, false); auto &Sec2 = G.createSection("__data.2", MemProt::Read | MemProt::Write); - orc::ExecutorAddr B3Addr(0x3000); - auto &B3 = G.createContentBlock(Sec2, BlockContent, B3Addr, 8, 0); - orc::ExecutorAddr B4Addr(0x4000); - auto &B4 = G.createContentBlock(Sec2, BlockContent, B4Addr, 8, 0); + auto &B3 = G.createContentBlock(Sec2, BlockContent, 0x3000, 8, 0); + auto &B4 = G.createContentBlock(Sec2, BlockContent, 0x4000, 8, 0); auto &S3 = G.addDefinedSymbol(B3, 0, "S3", 4, Linkage::Strong, Scope::Default, false, false); auto &S4 = G.addDefinedSymbol(B4, 4, "S4", 4, Linkage::Strong, Scope::Default, @@ -146,8 +141,7 @@ TEST(LinkGraphTest, ContentAccessAndUpdate) { auto &Sec = G.createSection("__data", MemProt::Read | MemProt::Write); // Create an initial block. - orc::ExecutorAddr BAddr(0x1000); - auto &B = G.createContentBlock(Sec, BlockContent, BAddr, 8, 0); + auto &B = G.createContentBlock(Sec, BlockContent, 0x1000, 8, 0); EXPECT_FALSE(B.isContentMutable()) << "Content unexpectedly mutable"; EXPECT_EQ(B.getContent().data(), BlockContent.data()) @@ -202,8 +196,7 @@ TEST(LinkGraphTest, ContentAccessAndUpdate) { << "Unexpected block content size"; // Create an initially mutable block. - auto &B2 = G.createMutableContentBlock(Sec, MutableContent, - orc::ExecutorAddr(0x10000), 8, 0); + auto &B2 = G.createMutableContentBlock(Sec, MutableContent, 0x10000, 8, 0); EXPECT_TRUE(B2.isContentMutable()) << "Expected B2 content to be mutable"; } @@ -215,8 +208,7 @@ TEST(LinkGraphTest, MakeExternal) { auto &Sec = G.createSection("__data", MemProt::Read | MemProt::Write); // Create an initial block. - auto &B1 = - G.createContentBlock(Sec, BlockContent, orc::ExecutorAddr(0x1000), 8, 0); + auto &B1 = G.createContentBlock(Sec, BlockContent, 0x1000, 8, 0); // Add a symbol to the block. auto &S1 = G.addDefinedSymbol(B1, 0, "S1", 4, Linkage::Strong, Scope::Default, @@ -226,8 +218,7 @@ TEST(LinkGraphTest, MakeExternal) { EXPECT_FALSE(S1.isExternal()) << "Symbol should not be external"; EXPECT_FALSE(S1.isAbsolute()) << "Symbol should not be absolute"; EXPECT_TRUE(&S1.getBlock()) << "Symbol should have a non-null block"; - EXPECT_EQ(S1.getAddress(), orc::ExecutorAddr(0x1000)) - << "Unexpected symbol address"; + EXPECT_EQ(S1.getAddress(), 0x1000U) << "Unexpected symbol address"; EXPECT_EQ( std::distance(G.defined_symbols().begin(), G.defined_symbols().end()), 1U) @@ -244,8 +235,7 @@ TEST(LinkGraphTest, MakeExternal) { EXPECT_FALSE(S1.isDefined()) << "Symbol should not be defined"; EXPECT_TRUE(S1.isExternal()) << "Symbol should be external"; EXPECT_FALSE(S1.isAbsolute()) << "Symbol should not be absolute"; - EXPECT_EQ(S1.getAddress(), orc::ExecutorAddr()) - << "Unexpected symbol address"; + EXPECT_EQ(S1.getAddress(), 0U) << "Unexpected symbol address"; EXPECT_EQ( std::distance(G.defined_symbols().begin(), G.defined_symbols().end()), 0U) @@ -263,8 +253,7 @@ TEST(LinkGraphTest, MakeDefined) { auto &Sec = G.createSection("__data", MemProt::Read | MemProt::Write); // Create an initial block. - orc::ExecutorAddr B1Addr(0x1000); - auto &B1 = G.createContentBlock(Sec, BlockContent, B1Addr, 8, 0); + auto &B1 = G.createContentBlock(Sec, BlockContent, 0x1000, 8, 0); // Add an external symbol. auto &S1 = G.addExternalSymbol("S1", 4, Linkage::Strong); @@ -272,8 +261,7 @@ TEST(LinkGraphTest, MakeDefined) { EXPECT_FALSE(S1.isDefined()) << "Symbol should not be defined"; EXPECT_TRUE(S1.isExternal()) << "Symbol should be external"; EXPECT_FALSE(S1.isAbsolute()) << "Symbol should not be absolute"; - EXPECT_EQ(S1.getAddress(), orc::ExecutorAddr()) - << "Unexpected symbol address"; + EXPECT_EQ(S1.getAddress(), 0U) << "Unexpected symbol address"; EXPECT_EQ( std::distance(G.defined_symbols().begin(), G.defined_symbols().end()), 0U) @@ -291,8 +279,7 @@ TEST(LinkGraphTest, MakeDefined) { EXPECT_FALSE(S1.isExternal()) << "Symbol should not be external"; EXPECT_FALSE(S1.isAbsolute()) << "Symbol should not be absolute"; EXPECT_TRUE(&S1.getBlock()) << "Symbol should have a non-null block"; - EXPECT_EQ(S1.getAddress(), orc::ExecutorAddr(0x1000U)) - << "Unexpected symbol address"; + EXPECT_EQ(S1.getAddress(), 0x1000U) << "Unexpected symbol address"; EXPECT_EQ( std::distance(G.defined_symbols().begin(), G.defined_symbols().end()), 1U) @@ -309,13 +296,10 @@ TEST(LinkGraphTest, TransferDefinedSymbol) { getGenericEdgeKindName); auto &Sec = G.createSection("__data", MemProt::Read | MemProt::Write); - // Create initial blocks. - orc::ExecutorAddr B1Addr(0x1000); - auto &B1 = G.createContentBlock(Sec, BlockContent, B1Addr, 8, 0); - orc::ExecutorAddr B2Addr(0x2000); - auto &B2 = G.createContentBlock(Sec, BlockContent, B2Addr, 8, 0); - orc::ExecutorAddr B3Addr(0x3000); - auto &B3 = G.createContentBlock(Sec, BlockContent.slice(0, 32), B3Addr, 8, 0); + // Create an initial block. + auto &B1 = G.createContentBlock(Sec, BlockContent, 0x1000, 8, 0); + auto &B2 = G.createContentBlock(Sec, BlockContent, 0x2000, 8, 0); + auto &B3 = G.createContentBlock(Sec, BlockContent.slice(0, 32), 0x3000, 8, 0); // Add a symbol. auto &S1 = G.addDefinedSymbol(B1, 0, "S1", B1.getSize(), Linkage::Strong, @@ -345,10 +329,8 @@ TEST(LinkGraphTest, TransferDefinedSymbolAcrossSections) { auto &Sec2 = G.createSection("__data.2", MemProt::Read | MemProt::Write); // Create blocks in each section. - orc::ExecutorAddr B1Addr(0x1000); - auto &B1 = G.createContentBlock(Sec1, BlockContent, B1Addr, 8, 0); - orc::ExecutorAddr B2Addr(0x2000); - auto &B2 = G.createContentBlock(Sec2, BlockContent, B2Addr, 8, 0); + auto &B1 = G.createContentBlock(Sec1, BlockContent, 0x1000, 8, 0); + auto &B2 = G.createContentBlock(Sec2, BlockContent, 0x2000, 8, 0); // Add a symbol to section 1. auto &S1 = G.addDefinedSymbol(B1, 0, "S1", B1.getSize(), Linkage::Strong, @@ -377,10 +359,8 @@ TEST(LinkGraphTest, TransferBlock) { auto &Sec2 = G.createSection("__data.2", MemProt::Read | MemProt::Write); // Create an initial block. - orc::ExecutorAddr B1Addr(0x1000); - auto &B1 = G.createContentBlock(Sec1, BlockContent, B1Addr, 8, 0); - orc::ExecutorAddr B2Addr(0x2000); - auto &B2 = G.createContentBlock(Sec1, BlockContent, B2Addr, 8, 0); + auto &B1 = G.createContentBlock(Sec1, BlockContent, 0x1000, 8, 0); + auto &B2 = G.createContentBlock(Sec1, BlockContent, 0x2000, 8, 0); // Add some symbols on B1... G.addDefinedSymbol(B1, 0, "S1", B1.getSize(), Linkage::Strong, Scope::Default, @@ -424,12 +404,9 @@ TEST(LinkGraphTest, MergeSections) { auto &Sec3 = G.createSection("__data.3", MemProt::Read | MemProt::Write); // Create an initial block. - orc::ExecutorAddr B1Addr(0x1000); - auto &B1 = G.createContentBlock(Sec1, BlockContent, B1Addr, 8, 0); - orc::ExecutorAddr B2Addr(0x2000); - auto &B2 = G.createContentBlock(Sec2, BlockContent, B2Addr, 8, 0); - orc::ExecutorAddr B3Addr(0x3000); - auto &B3 = G.createContentBlock(Sec3, BlockContent, B3Addr, 8, 0); + auto &B1 = G.createContentBlock(Sec1, BlockContent, 0x1000, 8, 0); + auto &B2 = G.createContentBlock(Sec2, BlockContent, 0x2000, 8, 0); + auto &B3 = G.createContentBlock(Sec3, BlockContent, 0x3000, 8, 0); // Add a symbols for each block. G.addDefinedSymbol(B1, 0, "S1", B1.getSize(), Linkage::Strong, Scope::Default, @@ -505,8 +482,7 @@ TEST(LinkGraphTest, SplitBlock) { auto &Sec = G.createSection("__data", MemProt::Read | MemProt::Write); // Create the block to split. - orc::ExecutorAddr B1Addr(0x1000); - auto &B1 = G.createContentBlock(Sec, BlockContent, B1Addr, 8, 0); + auto &B1 = G.createContentBlock(Sec, BlockContent, 0x1000, 8, 0); // Add some symbols to the block. auto &S1 = G.addDefinedSymbol(B1, 0, "S1", 4, Linkage::Strong, Scope::Default, @@ -523,8 +499,7 @@ TEST(LinkGraphTest, SplitBlock) { // Add an extra block, EB, and target symbols, and use these to add edges // from B1 to EB. - orc::ExecutorAddr EBAddr(0x2000); - auto &EB = G.createContentBlock(Sec, BlockContent, EBAddr, 8, 0); + auto &EB = G.createContentBlock(Sec, BlockContent, 0x2000, 8, 0); auto &ES1 = G.addDefinedSymbol(EB, 0, "TS1", 4, Linkage::Strong, Scope::Default, false, false); auto &ES2 = G.addDefinedSymbol(EB, 4, "TS2", 4, Linkage::Strong, @@ -544,10 +519,10 @@ TEST(LinkGraphTest, SplitBlock) { auto &B2 = G.splitBlock(B1, 8); // Check that the block addresses and content matches what we would expect. - EXPECT_EQ(B1.getAddress(), B1Addr + 8); + EXPECT_EQ(B1.getAddress(), 0x1008U); EXPECT_EQ(B1.getContent(), BlockContent.slice(8)); - EXPECT_EQ(B2.getAddress(), B1Addr); + EXPECT_EQ(B2.getAddress(), 0x1000U); EXPECT_EQ(B2.getContent(), BlockContent.slice(0, 8)); // Check that symbols in B1 were transferred as expected: diff --git a/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp index 1f638f407c48..0181c558b60d 100644 --- a/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp @@ -44,8 +44,7 @@ TEST_F(ObjectLinkingLayerTest, AddLinkGraph) { support::little, x86_64::getEdgeKindName); auto &Sec1 = G->createSection("__data", MemProt::Read | MemProt::Write); - auto &B1 = G->createContentBlock(Sec1, BlockContent, - orc::ExecutorAddr(0x1000), 8, 0); + auto &B1 = G->createContentBlock(Sec1, BlockContent, 0x1000, 8, 0); G->addDefinedSymbol(B1, 4, "_X", 4, Linkage::Strong, Scope::Default, false, false); From 2c384c37727660f11f63fda461210d1a6f5d2afe Mon Sep 17 00:00:00 2001 From: Vaivaswatha Nagaraj Date: Wed, 5 Jan 2022 14:14:27 +0530 Subject: [PATCH 763/992] [MLIR][DataFlowAnalysis] Use a queue to maintain the worklist Since the analysis is described to be suitable for a forward data-flow analysis, maintaining the worklist as a queue mimics RPO ordering of block visits, thus reaching the fixpoint earlier. Differential Revision: https://reviews.llvm.org/D116393 --- mlir/lib/Analysis/DataFlowAnalysis.cpp | 30 ++++++++++++++++---------- mlir/lib/Transforms/SCCP.cpp | 6 ++++++ 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/mlir/lib/Analysis/DataFlowAnalysis.cpp b/mlir/lib/Analysis/DataFlowAnalysis.cpp index ca17e953e990..b8e801fea6db 100644 --- a/mlir/lib/Analysis/DataFlowAnalysis.cpp +++ b/mlir/lib/Analysis/DataFlowAnalysis.cpp @@ -12,6 +12,8 @@ #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "llvm/ADT/SmallPtrSet.h" +#include + using namespace mlir; using namespace mlir::detail; @@ -165,7 +167,7 @@ class ForwardDataFlowSolver { template void markAllPessimisticFixpoint(Operation *op, ValuesT values) { markAllPessimisticFixpoint(values); - opWorklist.push_back(op); + opWorklist.push(op); } template void markAllPessimisticFixpointAndVisitUsers(ValuesT values) { @@ -195,10 +197,10 @@ class ForwardDataFlowSolver { DenseSet> executableEdges; /// A worklist containing blocks that need to be processed. - SmallVector blockWorklist; + std::queue blockWorklist; /// A worklist of operations that need to be processed. - SmallVector opWorklist; + std::queue opWorklist; /// The callable operations that have their argument/result state tracked. DenseMap callableLatticeState; @@ -229,12 +231,18 @@ ForwardDataFlowSolver::ForwardDataFlowSolver( void ForwardDataFlowSolver::solve() { while (!blockWorklist.empty() || !opWorklist.empty()) { // Process any operations in the op worklist. - while (!opWorklist.empty()) - visitUsers(*opWorklist.pop_back_val()); + while (!opWorklist.empty()) { + Operation *nextOp = opWorklist.front(); + opWorklist.pop(); + visitUsers(*nextOp); + } // Process any blocks in the block worklist. - while (!blockWorklist.empty()) - visitBlock(blockWorklist.pop_back_val()); + while (!blockWorklist.empty()) { + Block *nextBlock = blockWorklist.front(); + blockWorklist.pop(); + visitBlock(nextBlock); + } } } @@ -368,7 +376,7 @@ void ForwardDataFlowSolver::visitOperation(Operation *op) { // Visit the current operation. if (analysis.visitOperation(op, operandLattices) == ChangeResult::Change) - opWorklist.push_back(op); + opWorklist.push(op); // `visitOperation` is required to define all of the result lattices. assert(llvm::none_of( @@ -477,7 +485,7 @@ void ForwardDataFlowSolver::visitRegionSuccessors( // region operation can provide information for certain results that // aren't part of the control flow. if (succArgs.size() != results.size()) { - opWorklist.push_back(parentOp); + opWorklist.push(parentOp); if (succArgs.empty()) { markAllPessimisticFixpoint(results); continue; @@ -713,7 +721,7 @@ ForwardDataFlowSolver::markEntryBlockExecutable(Region *region, ChangeResult ForwardDataFlowSolver::markBlockExecutable(Block *block) { bool marked = executableBlocks.insert(block).second; if (marked) - blockWorklist.push_back(block); + blockWorklist.push(block); return marked ? ChangeResult::Change : ChangeResult::NoChange; } @@ -749,7 +757,7 @@ bool ForwardDataFlowSolver::isAtFixpoint(Value value) const { void ForwardDataFlowSolver::join(Operation *owner, AbstractLatticeElement &to, const AbstractLatticeElement &from) { if (to.join(from) == ChangeResult::Change) - opWorklist.push_back(owner); + opWorklist.push(owner); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Transforms/SCCP.cpp b/mlir/lib/Transforms/SCCP.cpp index 7a2f9949a746..11d55e7454a0 100644 --- a/mlir/lib/Transforms/SCCP.cpp +++ b/mlir/lib/Transforms/SCCP.cpp @@ -23,6 +23,9 @@ #include "mlir/Pass/Pass.h" #include "mlir/Transforms/FoldUtils.h" #include "mlir/Transforms/Passes.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "sccp" using namespace mlir; @@ -70,6 +73,9 @@ struct SCCPAnalysis : public ForwardDataFlowAnalysis { ChangeResult visitOperation(Operation *op, ArrayRef *> operands) final { + + LLVM_DEBUG(llvm::dbgs() << "SCCP: Visiting operation: " << *op << "\n"); + // Don't try to simulate the results of a region operation as we can't // guarantee that folding will be out-of-place. We don't allow in-place // folds as the desire here is for simulated execution, and not general From 06c154602ed6f2619ab7cba8c4dfd190d7fa62f5 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 5 Jan 2022 13:25:13 -0800 Subject: [PATCH 764/992] DebugInfo: Rebuild varargs function types correctly Improves llvm-dwarfdump output and for simplified template names roundtripping. --- .../simplified_template_names.cpp | 2 + llvm/lib/DebugInfo/DWARF/DWARFDie.cpp | 8 +- .../llvm-dwarfdump/X86/prettyprint_types.s | 696 +++++++++--------- 3 files changed, 372 insertions(+), 334 deletions(-) diff --git a/cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp b/cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp index 312229f2359c..61ac76db832c 100644 --- a/cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp +++ b/cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp @@ -217,6 +217,8 @@ int main() { f1(); t3 v1; f1(); + f1(); + f1(); f1(); f1(); f1(); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 49aa27998ace..08f6c1645760 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -611,7 +611,8 @@ struct DWARFTypePrinter { bool First = true; bool RealFirst = true; for (DWARFDie P : D) { - if (P.getTag() != DW_TAG_formal_parameter) + if (P.getTag() != DW_TAG_formal_parameter && + P.getTag() != DW_TAG_unspecified_parameters) return; DWARFDie T = resolveReferencedType(P); if (SkipFirstParamIfArtificial && RealFirst && P.find(DW_AT_artificial)) { @@ -623,7 +624,10 @@ struct DWARFTypePrinter { OS << ", "; } First = false; - appendQualifiedName(T); + if (P.getTag() == DW_TAG_unspecified_parameters) + OS << "..."; + else + appendQualifiedName(T); } EndedWithTemplate = false; OS << ')'; diff --git a/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_types.s b/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_types.s index 217f001a0008..f04e245a420b 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_types.s +++ b/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_types.s @@ -43,8 +43,8 @@ # // arrays # int *const (&)[1], int *const[1], const int (&)[1], const int[1], # // subroutine types -# int(), void(int), void(int, int), void (*)(foo *, int), void (*const)(), -# void() const, void() volatile &&, void() const volatile &, +# int(), void(int), void(int, int), void(...), void (*)(foo *, int), +# void (*const)(), void() const, void() volatile &&, void() const volatile &, # void(const volatile foo *), void (*(int))(float), # // qualified types # ns::inner, ns::inner(), ns::inner[1], ns::inner *, ns::inner ns::inner::*, @@ -118,6 +118,7 @@ # CHECK: DW_AT_type{{.*}}"int ()" # CHECK: DW_AT_type{{.*}}"void (int)" # CHECK: DW_AT_type{{.*}}"void (int, int)" +# CHECK: DW_AT_type{{.*}}"void (...)" # CHECK: DW_AT_type{{.*}}"void (*)(foo *, int)" # CHECK: DW_AT_type{{.*}}"void (*const)()") # CHECK: DW_AT_type{{.*}}"void () const") @@ -151,12 +152,7 @@ # CHECK: DW_AT_type{{.*}}"tv") # CHECK: DW_AT_type{{.*}}"tv") -# int literals - these ones are a bit tricky since Clang is currently -# transforms integer type names (perhaps originally motivated to match GCC?) at -# the very end of `CGDebugInfo::CreateType(const BuiltinType *BT)`. The LLVM IR -# in this test is produced without those transformations. We should either add -# some code to libDebugInfoDWARF to reverse these transformations, or stop doing -# this transformation in clang if it's no longer needed. +# int literals # CHECK: DW_AT_type{{.*}}"tv" # CHECK: DW_AT_type{{.*}}"tv" # CHECK: DW_AT_type{{.*}}"tv" @@ -166,6 +162,32 @@ # CHECK: DW_AT_type{{.*}}"tv" # CHECK: DW_AT_type{{.*}}"tv" + .text + .file "test.cpp" + .file 1 "/usr/local/google/home/blaikie/dev/scratch" "test.cpp" + .globl _Z2f1v # -- Begin function _Z2f1v + .p2align 4, 0x90 + .type _Z2f1v,@function +_Z2f1v: # @_Z2f1v +.Lfunc_begin0: + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movabsq $v1, %rax + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Lfunc_end0: + .size _Z2f1v, .Lfunc_end0-_Z2f1v + .cfi_endproc + # -- End function + .type v1,@object # @v1 + .local v1 + .comm v1,1,1 .section .debug_abbrev,"",@progbits .byte 1 # Abbreviation Code .byte 17 # DW_TAG_compile_unit @@ -370,44 +392,49 @@ .byte 0 # EOM(1) .byte 0 # EOM(2) .byte 25 # Abbreviation Code - .byte 21 # DW_TAG_subroutine_type + .byte 24 # DW_TAG_unspecified_parameters .byte 0 # DW_CHILDREN_no .byte 0 # EOM(1) .byte 0 # EOM(2) .byte 26 # Abbreviation Code .byte 21 # DW_TAG_subroutine_type .byte 0 # DW_CHILDREN_no + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 27 # Abbreviation Code + .byte 21 # DW_TAG_subroutine_type + .byte 0 # DW_CHILDREN_no .byte 120 # DW_AT_rvalue_reference .byte 25 # DW_FORM_flag_present .byte 0 # EOM(1) .byte 0 # EOM(2) - .byte 27 # Abbreviation Code + .byte 28 # Abbreviation Code .byte 21 # DW_TAG_subroutine_type .byte 0 # DW_CHILDREN_no .byte 119 # DW_AT_reference .byte 25 # DW_FORM_flag_present .byte 0 # EOM(1) .byte 0 # EOM(2) - .byte 28 # Abbreviation Code + .byte 29 # Abbreviation Code .byte 21 # DW_TAG_subroutine_type .byte 1 # DW_CHILDREN_yes .byte 73 # DW_AT_type .byte 19 # DW_FORM_ref4 .byte 0 # EOM(1) .byte 0 # EOM(2) - .byte 29 # Abbreviation Code + .byte 30 # Abbreviation Code .byte 57 # DW_TAG_namespace .byte 1 # DW_CHILDREN_yes .byte 3 # DW_AT_name .byte 14 # DW_FORM_strp .byte 0 # EOM(1) .byte 0 # EOM(2) - .byte 30 # Abbreviation Code + .byte 31 # Abbreviation Code .byte 57 # DW_TAG_namespace .byte 1 # DW_CHILDREN_yes .byte 0 # EOM(1) .byte 0 # EOM(2) - .byte 31 # Abbreviation Code + .byte 32 # Abbreviation Code .byte 19 # DW_TAG_structure_type .byte 1 # DW_CHILDREN_yes .byte 3 # DW_AT_name @@ -416,7 +443,7 @@ .byte 25 # DW_FORM_flag_present .byte 0 # EOM(1) .byte 0 # EOM(2) - .byte 32 # Abbreviation Code + .byte 33 # Abbreviation Code .byte 47 # DW_TAG_template_type_parameter .byte 0 # DW_CHILDREN_no .byte 73 # DW_AT_type @@ -425,7 +452,7 @@ .byte 14 # DW_FORM_strp .byte 0 # EOM(1) .byte 0 # EOM(2) - .byte 33 # Abbreviation Code + .byte 34 # Abbreviation Code .byte 48 # DW_TAG_template_value_parameter .byte 0 # DW_CHILDREN_no .byte 73 # DW_AT_type @@ -436,7 +463,7 @@ .byte 15 # DW_FORM_udata .byte 0 # EOM(1) .byte 0 # EOM(2) - .byte 34 # Abbreviation Code + .byte 35 # Abbreviation Code .byte 4 # DW_TAG_enumeration_type .byte 1 # DW_CHILDREN_yes .byte 73 # DW_AT_type @@ -451,7 +478,7 @@ .byte 11 # DW_FORM_data1 .byte 0 # EOM(1) .byte 0 # EOM(2) - .byte 35 # Abbreviation Code + .byte 36 # Abbreviation Code .byte 40 # DW_TAG_enumerator .byte 0 # DW_CHILDREN_no .byte 3 # DW_AT_name @@ -460,7 +487,7 @@ .byte 15 # DW_FORM_udata .byte 0 # EOM(1) .byte 0 # EOM(2) - .byte 36 # Abbreviation Code + .byte 37 # Abbreviation Code .byte 48 # DW_TAG_template_value_parameter .byte 0 # DW_CHILDREN_no .byte 73 # DW_AT_type @@ -471,7 +498,7 @@ .byte 13 # DW_FORM_sdata .byte 0 # EOM(1) .byte 0 # EOM(2) - .byte 37 # Abbreviation Code + .byte 38 # Abbreviation Code .byte 4 # DW_TAG_enumeration_type .byte 1 # DW_CHILDREN_yes .byte 73 # DW_AT_type @@ -488,7 +515,7 @@ .byte 11 # DW_FORM_data1 .byte 0 # EOM(1) .byte 0 # EOM(2) - .byte 38 # Abbreviation Code + .byte 39 # Abbreviation Code .byte 40 # DW_TAG_enumerator .byte 0 # DW_CHILDREN_no .byte 3 # DW_AT_name @@ -497,7 +524,7 @@ .byte 13 # DW_FORM_sdata .byte 0 # EOM(1) .byte 0 # EOM(2) - .byte 39 # Abbreviation Code + .byte 40 # Abbreviation Code .byte 48 # DW_TAG_template_value_parameter .byte 0 # DW_CHILDREN_no .byte 73 # DW_AT_type @@ -514,7 +541,7 @@ .short 4 # DWARF version number .long .debug_abbrev # Offset Into Abbrev. Section .byte 8 # Address Size (in bytes) - .byte 1 # Abbrev [1] 0xb:0x4fd DW_TAG_compile_unit + .byte 1 # Abbrev [1] 0xb:0x505 DW_TAG_compile_unit .long .Linfo_string0 # DW_AT_producer .short 33 # DW_AT_language .long .Linfo_string1 # DW_AT_name @@ -528,570 +555,575 @@ .byte 9 # DW_AT_location .byte 3 .quad v1 - .byte 3 # Abbrev [3] 0x33:0x10f DW_TAG_structure_type + .byte 3 # Abbrev [3] 0x33:0x114 DW_TAG_structure_type .byte 5 # DW_AT_calling_convention .long .Linfo_string33 # DW_AT_name .byte 1 # DW_AT_byte_size .byte 1 # DW_AT_decl_file .byte 3 # DW_AT_decl_line - .byte 4 # Abbrev [4] 0x3c:0x105 DW_TAG_GNU_template_parameter_pack + .byte 4 # Abbrev [4] 0x3c:0x10a DW_TAG_GNU_template_parameter_pack .long .Linfo_string4 # DW_AT_name .byte 5 # Abbrev [5] 0x41:0x5 DW_TAG_template_type_parameter - .long 322 # DW_AT_type + .long 327 # DW_AT_type .byte 5 # Abbrev [5] 0x46:0x5 DW_TAG_template_type_parameter - .long 329 # DW_AT_type - .byte 5 # Abbrev [5] 0x4b:0x5 DW_TAG_template_type_parameter .long 334 # DW_AT_type - .byte 5 # Abbrev [5] 0x50:0x5 DW_TAG_template_type_parameter + .byte 5 # Abbrev [5] 0x4b:0x5 DW_TAG_template_type_parameter .long 339 # DW_AT_type - .byte 5 # Abbrev [5] 0x55:0x5 DW_TAG_template_type_parameter + .byte 5 # Abbrev [5] 0x50:0x5 DW_TAG_template_type_parameter .long 344 # DW_AT_type - .byte 5 # Abbrev [5] 0x5a:0x5 DW_TAG_template_type_parameter + .byte 5 # Abbrev [5] 0x55:0x5 DW_TAG_template_type_parameter .long 349 # DW_AT_type - .byte 5 # Abbrev [5] 0x5f:0x5 DW_TAG_template_type_parameter + .byte 5 # Abbrev [5] 0x5a:0x5 DW_TAG_template_type_parameter .long 354 # DW_AT_type + .byte 5 # Abbrev [5] 0x5f:0x5 DW_TAG_template_type_parameter + .long 359 # DW_AT_type .byte 5 # Abbrev [5] 0x64:0x5 DW_TAG_template_type_parameter - .long 365 # DW_AT_type - .byte 5 # Abbrev [5] 0x69:0x5 DW_TAG_template_type_parameter .long 370 # DW_AT_type - .byte 5 # Abbrev [5] 0x6e:0x5 DW_TAG_template_type_parameter + .byte 5 # Abbrev [5] 0x69:0x5 DW_TAG_template_type_parameter .long 375 # DW_AT_type + .byte 5 # Abbrev [5] 0x6e:0x5 DW_TAG_template_type_parameter + .long 380 # DW_AT_type .byte 5 # Abbrev [5] 0x73:0x5 DW_TAG_template_type_parameter - .long 381 # DW_AT_type + .long 386 # DW_AT_type .byte 5 # Abbrev [5] 0x78:0x5 DW_TAG_template_type_parameter - .long 395 # DW_AT_type + .long 400 # DW_AT_type .byte 5 # Abbrev [5] 0x7d:0x5 DW_TAG_template_type_parameter - .long 421 # DW_AT_type + .long 426 # DW_AT_type .byte 5 # Abbrev [5] 0x82:0x5 DW_TAG_template_type_parameter - .long 462 # DW_AT_type - .byte 5 # Abbrev [5] 0x87:0x5 DW_TAG_template_type_parameter .long 467 # DW_AT_type + .byte 5 # Abbrev [5] 0x87:0x5 DW_TAG_template_type_parameter + .long 472 # DW_AT_type .byte 5 # Abbrev [5] 0x8c:0x5 DW_TAG_template_type_parameter - .long 491 # DW_AT_type - .byte 5 # Abbrev [5] 0x91:0x5 DW_TAG_template_type_parameter .long 496 # DW_AT_type + .byte 5 # Abbrev [5] 0x91:0x5 DW_TAG_template_type_parameter + .long 501 # DW_AT_type .byte 5 # Abbrev [5] 0x96:0x5 DW_TAG_template_type_parameter - .long 513 # DW_AT_type - .byte 5 # Abbrev [5] 0x9b:0x5 DW_TAG_template_type_parameter .long 518 # DW_AT_type + .byte 5 # Abbrev [5] 0x9b:0x5 DW_TAG_template_type_parameter + .long 523 # DW_AT_type .byte 5 # Abbrev [5] 0xa0:0x5 DW_TAG_template_type_parameter - .long 525 # DW_AT_type + .long 530 # DW_AT_type .byte 5 # Abbrev [5] 0xa5:0x5 DW_TAG_template_type_parameter - .long 537 # DW_AT_type + .long 542 # DW_AT_type .byte 5 # Abbrev [5] 0xaa:0x5 DW_TAG_template_type_parameter - .long 559 # DW_AT_type + .long 545 # DW_AT_type .byte 5 # Abbrev [5] 0xaf:0x5 DW_TAG_template_type_parameter - .long 570 # DW_AT_type + .long 567 # DW_AT_type .byte 5 # Abbrev [5] 0xb4:0x5 DW_TAG_template_type_parameter - .long 575 # DW_AT_type + .long 578 # DW_AT_type .byte 5 # Abbrev [5] 0xb9:0x5 DW_TAG_template_type_parameter - .long 581 # DW_AT_type + .long 583 # DW_AT_type .byte 5 # Abbrev [5] 0xbe:0x5 DW_TAG_template_type_parameter - .long 592 # DW_AT_type + .long 589 # DW_AT_type .byte 5 # Abbrev [5] 0xc3:0x5 DW_TAG_template_type_parameter - .long 604 # DW_AT_type + .long 600 # DW_AT_type .byte 5 # Abbrev [5] 0xc8:0x5 DW_TAG_template_type_parameter - .long 639 # DW_AT_type + .long 612 # DW_AT_type .byte 5 # Abbrev [5] 0xcd:0x5 DW_TAG_template_type_parameter - .long 645 # DW_AT_type + .long 647 # DW_AT_type .byte 5 # Abbrev [5] 0xd2:0x5 DW_TAG_template_type_parameter - .long 650 # DW_AT_type + .long 653 # DW_AT_type .byte 5 # Abbrev [5] 0xd7:0x5 DW_TAG_template_type_parameter - .long 662 # DW_AT_type + .long 658 # DW_AT_type .byte 5 # Abbrev [5] 0xdc:0x5 DW_TAG_template_type_parameter - .long 667 # DW_AT_type + .long 670 # DW_AT_type .byte 5 # Abbrev [5] 0xe1:0x5 DW_TAG_template_type_parameter - .long 676 # DW_AT_type + .long 675 # DW_AT_type .byte 5 # Abbrev [5] 0xe6:0x5 DW_TAG_template_type_parameter - .long 706 # DW_AT_type + .long 684 # DW_AT_type .byte 5 # Abbrev [5] 0xeb:0x5 DW_TAG_template_type_parameter - .long 712 # DW_AT_type + .long 714 # DW_AT_type .byte 5 # Abbrev [5] 0xf0:0x5 DW_TAG_template_type_parameter - .long 718 # DW_AT_type + .long 720 # DW_AT_type .byte 5 # Abbrev [5] 0xf5:0x5 DW_TAG_template_type_parameter - .long 748 # DW_AT_type + .long 726 # DW_AT_type .byte 5 # Abbrev [5] 0xfa:0x5 DW_TAG_template_type_parameter - .long 799 # DW_AT_type + .long 756 # DW_AT_type .byte 5 # Abbrev [5] 0xff:0x5 DW_TAG_template_type_parameter - .long 824 # DW_AT_type + .long 807 # DW_AT_type .byte 5 # Abbrev [5] 0x104:0x5 DW_TAG_template_type_parameter - .long 868 # DW_AT_type + .long 832 # DW_AT_type .byte 5 # Abbrev [5] 0x109:0x5 DW_TAG_template_type_parameter - .long 900 # DW_AT_type + .long 876 # DW_AT_type .byte 5 # Abbrev [5] 0x10e:0x5 DW_TAG_template_type_parameter - .long 988 # DW_AT_type + .long 908 # DW_AT_type .byte 5 # Abbrev [5] 0x113:0x5 DW_TAG_template_type_parameter - .long 1020 # DW_AT_type + .long 996 # DW_AT_type .byte 5 # Abbrev [5] 0x118:0x5 DW_TAG_template_type_parameter - .long 1045 # DW_AT_type + .long 1028 # DW_AT_type .byte 5 # Abbrev [5] 0x11d:0x5 DW_TAG_template_type_parameter - .long 1077 # DW_AT_type + .long 1053 # DW_AT_type .byte 5 # Abbrev [5] 0x122:0x5 DW_TAG_template_type_parameter - .long 1109 # DW_AT_type + .long 1085 # DW_AT_type .byte 5 # Abbrev [5] 0x127:0x5 DW_TAG_template_type_parameter - .long 1134 # DW_AT_type + .long 1117 # DW_AT_type .byte 5 # Abbrev [5] 0x12c:0x5 DW_TAG_template_type_parameter - .long 1166 # DW_AT_type + .long 1142 # DW_AT_type .byte 5 # Abbrev [5] 0x131:0x5 DW_TAG_template_type_parameter - .long 1198 # DW_AT_type + .long 1174 # DW_AT_type .byte 5 # Abbrev [5] 0x136:0x5 DW_TAG_template_type_parameter - .long 1223 # DW_AT_type + .long 1206 # DW_AT_type .byte 5 # Abbrev [5] 0x13b:0x5 DW_TAG_template_type_parameter - .long 1255 # DW_AT_type + .long 1231 # DW_AT_type + .byte 5 # Abbrev [5] 0x140:0x5 DW_TAG_template_type_parameter + .long 1263 # DW_AT_type .byte 0 # End Of Children Mark .byte 0 # End Of Children Mark - .byte 6 # Abbrev [6] 0x142:0x7 DW_TAG_base_type + .byte 6 # Abbrev [6] 0x147:0x7 DW_TAG_base_type .long .Linfo_string5 # DW_AT_name .byte 5 # DW_AT_encoding .byte 4 # DW_AT_byte_size - .byte 7 # Abbrev [7] 0x149:0x5 DW_TAG_unspecified_type + .byte 7 # Abbrev [7] 0x14e:0x5 DW_TAG_unspecified_type .long .Linfo_string6 # DW_AT_name - .byte 8 # Abbrev [8] 0x14e:0x5 DW_TAG_reference_type - .long 322 # DW_AT_type - .byte 9 # Abbrev [9] 0x153:0x5 DW_TAG_rvalue_reference_type - .long 322 # DW_AT_type - .byte 10 # Abbrev [10] 0x158:0x5 DW_TAG_pointer_type - .long 322 # DW_AT_type + .byte 8 # Abbrev [8] 0x153:0x5 DW_TAG_reference_type + .long 327 # DW_AT_type + .byte 9 # Abbrev [9] 0x158:0x5 DW_TAG_rvalue_reference_type + .long 327 # DW_AT_type .byte 10 # Abbrev [10] 0x15d:0x5 DW_TAG_pointer_type - .long 354 # DW_AT_type - .byte 11 # Abbrev [11] 0x162:0x5 DW_TAG_const_type + .long 327 # DW_AT_type + .byte 10 # Abbrev [10] 0x162:0x5 DW_TAG_pointer_type .long 359 # DW_AT_type - .byte 10 # Abbrev [10] 0x167:0x5 DW_TAG_pointer_type + .byte 11 # Abbrev [11] 0x167:0x5 DW_TAG_const_type .long 364 # DW_AT_type - .byte 12 # Abbrev [12] 0x16c:0x1 DW_TAG_const_type - .byte 11 # Abbrev [11] 0x16d:0x5 DW_TAG_const_type - .long 370 # DW_AT_type - .byte 13 # Abbrev [13] 0x172:0x5 DW_TAG_volatile_type - .long 344 # DW_AT_type - .byte 11 # Abbrev [11] 0x177:0x5 DW_TAG_const_type - .long 380 # DW_AT_type - .byte 14 # Abbrev [14] 0x17c:0x1 DW_TAG_pointer_type - .byte 15 # Abbrev [15] 0x17d:0x9 DW_TAG_ptr_to_member_type - .long 322 # DW_AT_type - .long 390 # DW_AT_containing_type - .byte 16 # Abbrev [16] 0x186:0x5 DW_TAG_structure_type + .byte 10 # Abbrev [10] 0x16c:0x5 DW_TAG_pointer_type + .long 369 # DW_AT_type + .byte 12 # Abbrev [12] 0x171:0x1 DW_TAG_const_type + .byte 11 # Abbrev [11] 0x172:0x5 DW_TAG_const_type + .long 375 # DW_AT_type + .byte 13 # Abbrev [13] 0x177:0x5 DW_TAG_volatile_type + .long 349 # DW_AT_type + .byte 11 # Abbrev [11] 0x17c:0x5 DW_TAG_const_type + .long 385 # DW_AT_type + .byte 14 # Abbrev [14] 0x181:0x1 DW_TAG_pointer_type + .byte 15 # Abbrev [15] 0x182:0x9 DW_TAG_ptr_to_member_type + .long 327 # DW_AT_type + .long 395 # DW_AT_containing_type + .byte 16 # Abbrev [16] 0x18b:0x5 DW_TAG_structure_type .long .Linfo_string7 # DW_AT_name # DW_AT_declaration - .byte 15 # Abbrev [15] 0x18b:0x9 DW_TAG_ptr_to_member_type - .long 404 # DW_AT_type - .long 390 # DW_AT_containing_type - .byte 17 # Abbrev [17] 0x194:0xc DW_TAG_subroutine_type - .byte 18 # Abbrev [18] 0x195:0x5 DW_TAG_formal_parameter - .long 416 # DW_AT_type + .byte 15 # Abbrev [15] 0x190:0x9 DW_TAG_ptr_to_member_type + .long 409 # DW_AT_type + .long 395 # DW_AT_containing_type + .byte 17 # Abbrev [17] 0x199:0xc DW_TAG_subroutine_type + .byte 18 # Abbrev [18] 0x19a:0x5 DW_TAG_formal_parameter + .long 421 # DW_AT_type # DW_AT_artificial - .byte 19 # Abbrev [19] 0x19a:0x5 DW_TAG_formal_parameter - .long 322 # DW_AT_type + .byte 19 # Abbrev [19] 0x19f:0x5 DW_TAG_formal_parameter + .long 327 # DW_AT_type .byte 0 # End Of Children Mark - .byte 10 # Abbrev [10] 0x1a0:0x5 DW_TAG_pointer_type - .long 390 # DW_AT_type - .byte 8 # Abbrev [8] 0x1a5:0x5 DW_TAG_reference_type - .long 426 # DW_AT_type - .byte 11 # Abbrev [11] 0x1aa:0x5 DW_TAG_const_type + .byte 10 # Abbrev [10] 0x1a5:0x5 DW_TAG_pointer_type + .long 395 # DW_AT_type + .byte 8 # Abbrev [8] 0x1aa:0x5 DW_TAG_reference_type .long 431 # DW_AT_type - .byte 15 # Abbrev [15] 0x1af:0x9 DW_TAG_ptr_to_member_type - .long 440 # DW_AT_type - .long 390 # DW_AT_containing_type - .byte 20 # Abbrev [20] 0x1b8:0x7 DW_TAG_subroutine_type + .byte 11 # Abbrev [11] 0x1af:0x5 DW_TAG_const_type + .long 436 # DW_AT_type + .byte 15 # Abbrev [15] 0x1b4:0x9 DW_TAG_ptr_to_member_type + .long 445 # DW_AT_type + .long 395 # DW_AT_containing_type + .byte 20 # Abbrev [20] 0x1bd:0x7 DW_TAG_subroutine_type # DW_AT_rvalue_reference - .byte 18 # Abbrev [18] 0x1b9:0x5 DW_TAG_formal_parameter - .long 447 # DW_AT_type + .byte 18 # Abbrev [18] 0x1be:0x5 DW_TAG_formal_parameter + .long 452 # DW_AT_type # DW_AT_artificial .byte 0 # End Of Children Mark - .byte 10 # Abbrev [10] 0x1bf:0x5 DW_TAG_pointer_type - .long 452 # DW_AT_type - .byte 11 # Abbrev [11] 0x1c4:0x5 DW_TAG_const_type + .byte 10 # Abbrev [10] 0x1c4:0x5 DW_TAG_pointer_type .long 457 # DW_AT_type - .byte 13 # Abbrev [13] 0x1c9:0x5 DW_TAG_volatile_type - .long 390 # DW_AT_type - .byte 8 # Abbrev [8] 0x1ce:0x5 DW_TAG_reference_type - .long 467 # DW_AT_type - .byte 11 # Abbrev [11] 0x1d3:0x5 DW_TAG_const_type + .byte 11 # Abbrev [11] 0x1c9:0x5 DW_TAG_const_type + .long 462 # DW_AT_type + .byte 13 # Abbrev [13] 0x1ce:0x5 DW_TAG_volatile_type + .long 395 # DW_AT_type + .byte 8 # Abbrev [8] 0x1d3:0x5 DW_TAG_reference_type .long 472 # DW_AT_type - .byte 21 # Abbrev [21] 0x1d8:0xc DW_TAG_array_type - .long 344 # DW_AT_type - .byte 22 # Abbrev [22] 0x1dd:0x6 DW_TAG_subrange_type - .long 484 # DW_AT_type + .byte 11 # Abbrev [11] 0x1d8:0x5 DW_TAG_const_type + .long 477 # DW_AT_type + .byte 21 # Abbrev [21] 0x1dd:0xc DW_TAG_array_type + .long 349 # DW_AT_type + .byte 22 # Abbrev [22] 0x1e2:0x6 DW_TAG_subrange_type + .long 489 # DW_AT_type .byte 1 # DW_AT_count .byte 0 # End Of Children Mark - .byte 23 # Abbrev [23] 0x1e4:0x7 DW_TAG_base_type + .byte 23 # Abbrev [23] 0x1e9:0x7 DW_TAG_base_type .long .Linfo_string8 # DW_AT_name .byte 8 # DW_AT_byte_size .byte 7 # DW_AT_encoding - .byte 8 # Abbrev [8] 0x1eb:0x5 DW_TAG_reference_type - .long 496 # DW_AT_type - .byte 11 # Abbrev [11] 0x1f0:0x5 DW_TAG_const_type + .byte 8 # Abbrev [8] 0x1f0:0x5 DW_TAG_reference_type .long 501 # DW_AT_type - .byte 21 # Abbrev [21] 0x1f5:0xc DW_TAG_array_type - .long 322 # DW_AT_type - .byte 22 # Abbrev [22] 0x1fa:0x6 DW_TAG_subrange_type - .long 484 # DW_AT_type + .byte 11 # Abbrev [11] 0x1f5:0x5 DW_TAG_const_type + .long 506 # DW_AT_type + .byte 21 # Abbrev [21] 0x1fa:0xc DW_TAG_array_type + .long 327 # DW_AT_type + .byte 22 # Abbrev [22] 0x1ff:0x6 DW_TAG_subrange_type + .long 489 # DW_AT_type .byte 1 # DW_AT_count .byte 0 # End Of Children Mark - .byte 24 # Abbrev [24] 0x201:0x5 DW_TAG_subroutine_type - .long 322 # DW_AT_type - .byte 17 # Abbrev [17] 0x206:0x7 DW_TAG_subroutine_type - .byte 19 # Abbrev [19] 0x207:0x5 DW_TAG_formal_parameter - .long 322 # DW_AT_type + .byte 24 # Abbrev [24] 0x206:0x5 DW_TAG_subroutine_type + .long 327 # DW_AT_type + .byte 17 # Abbrev [17] 0x20b:0x7 DW_TAG_subroutine_type + .byte 19 # Abbrev [19] 0x20c:0x5 DW_TAG_formal_parameter + .long 327 # DW_AT_type .byte 0 # End Of Children Mark - .byte 17 # Abbrev [17] 0x20d:0xc DW_TAG_subroutine_type - .byte 19 # Abbrev [19] 0x20e:0x5 DW_TAG_formal_parameter - .long 322 # DW_AT_type + .byte 17 # Abbrev [17] 0x212:0xc DW_TAG_subroutine_type .byte 19 # Abbrev [19] 0x213:0x5 DW_TAG_formal_parameter - .long 322 # DW_AT_type + .long 327 # DW_AT_type + .byte 19 # Abbrev [19] 0x218:0x5 DW_TAG_formal_parameter + .long 327 # DW_AT_type .byte 0 # End Of Children Mark - .byte 10 # Abbrev [10] 0x219:0x5 DW_TAG_pointer_type - .long 542 # DW_AT_type - .byte 17 # Abbrev [17] 0x21e:0xc DW_TAG_subroutine_type - .byte 19 # Abbrev [19] 0x21f:0x5 DW_TAG_formal_parameter - .long 554 # DW_AT_type - .byte 19 # Abbrev [19] 0x224:0x5 DW_TAG_formal_parameter - .long 322 # DW_AT_type + .byte 17 # Abbrev [17] 0x21e:0x3 DW_TAG_subroutine_type + .byte 25 # Abbrev [25] 0x21f:0x1 DW_TAG_unspecified_parameters + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x221:0x5 DW_TAG_pointer_type + .long 550 # DW_AT_type + .byte 17 # Abbrev [17] 0x226:0xc DW_TAG_subroutine_type + .byte 19 # Abbrev [19] 0x227:0x5 DW_TAG_formal_parameter + .long 562 # DW_AT_type + .byte 19 # Abbrev [19] 0x22c:0x5 DW_TAG_formal_parameter + .long 327 # DW_AT_type .byte 0 # End Of Children Mark - .byte 10 # Abbrev [10] 0x22a:0x5 DW_TAG_pointer_type - .long 390 # DW_AT_type - .byte 11 # Abbrev [11] 0x22f:0x5 DW_TAG_const_type - .long 564 # DW_AT_type - .byte 10 # Abbrev [10] 0x234:0x5 DW_TAG_pointer_type - .long 569 # DW_AT_type - .byte 25 # Abbrev [25] 0x239:0x1 DW_TAG_subroutine_type - .byte 11 # Abbrev [11] 0x23a:0x5 DW_TAG_const_type - .long 569 # DW_AT_type - .byte 13 # Abbrev [13] 0x23f:0x5 DW_TAG_volatile_type - .long 580 # DW_AT_type - .byte 26 # Abbrev [26] 0x244:0x1 DW_TAG_subroutine_type + .byte 10 # Abbrev [10] 0x232:0x5 DW_TAG_pointer_type + .long 395 # DW_AT_type + .byte 11 # Abbrev [11] 0x237:0x5 DW_TAG_const_type + .long 572 # DW_AT_type + .byte 10 # Abbrev [10] 0x23c:0x5 DW_TAG_pointer_type + .long 577 # DW_AT_type + .byte 26 # Abbrev [26] 0x241:0x1 DW_TAG_subroutine_type + .byte 11 # Abbrev [11] 0x242:0x5 DW_TAG_const_type + .long 577 # DW_AT_type + .byte 13 # Abbrev [13] 0x247:0x5 DW_TAG_volatile_type + .long 588 # DW_AT_type + .byte 27 # Abbrev [27] 0x24c:0x1 DW_TAG_subroutine_type # DW_AT_rvalue_reference - .byte 11 # Abbrev [11] 0x245:0x5 DW_TAG_const_type - .long 586 # DW_AT_type - .byte 13 # Abbrev [13] 0x24a:0x5 DW_TAG_volatile_type - .long 591 # DW_AT_type - .byte 27 # Abbrev [27] 0x24f:0x1 DW_TAG_subroutine_type - # DW_AT_reference - .byte 17 # Abbrev [17] 0x250:0x7 DW_TAG_subroutine_type - .byte 19 # Abbrev [19] 0x251:0x5 DW_TAG_formal_parameter + .byte 11 # Abbrev [11] 0x24d:0x5 DW_TAG_const_type + .long 594 # DW_AT_type + .byte 13 # Abbrev [13] 0x252:0x5 DW_TAG_volatile_type .long 599 # DW_AT_type + .byte 28 # Abbrev [28] 0x257:0x1 DW_TAG_subroutine_type + # DW_AT_reference + .byte 17 # Abbrev [17] 0x258:0x7 DW_TAG_subroutine_type + .byte 19 # Abbrev [19] 0x259:0x5 DW_TAG_formal_parameter + .long 607 # DW_AT_type .byte 0 # End Of Children Mark - .byte 10 # Abbrev [10] 0x257:0x5 DW_TAG_pointer_type - .long 452 # DW_AT_type - .byte 28 # Abbrev [28] 0x25c:0xb DW_TAG_subroutine_type - .long 615 # DW_AT_type - .byte 19 # Abbrev [19] 0x261:0x5 DW_TAG_formal_parameter - .long 322 # DW_AT_type + .byte 10 # Abbrev [10] 0x25f:0x5 DW_TAG_pointer_type + .long 457 # DW_AT_type + .byte 29 # Abbrev [29] 0x264:0xb DW_TAG_subroutine_type + .long 623 # DW_AT_type + .byte 19 # Abbrev [19] 0x269:0x5 DW_TAG_formal_parameter + .long 327 # DW_AT_type .byte 0 # End Of Children Mark - .byte 10 # Abbrev [10] 0x267:0x5 DW_TAG_pointer_type - .long 620 # DW_AT_type - .byte 17 # Abbrev [17] 0x26c:0x7 DW_TAG_subroutine_type - .byte 19 # Abbrev [19] 0x26d:0x5 DW_TAG_formal_parameter - .long 627 # DW_AT_type + .byte 10 # Abbrev [10] 0x26f:0x5 DW_TAG_pointer_type + .long 628 # DW_AT_type + .byte 17 # Abbrev [17] 0x274:0x7 DW_TAG_subroutine_type + .byte 19 # Abbrev [19] 0x275:0x5 DW_TAG_formal_parameter + .long 635 # DW_AT_type .byte 0 # End Of Children Mark - .byte 6 # Abbrev [6] 0x273:0x7 DW_TAG_base_type + .byte 6 # Abbrev [6] 0x27b:0x7 DW_TAG_base_type .long .Linfo_string9 # DW_AT_name .byte 4 # DW_AT_encoding .byte 4 # DW_AT_byte_size - .byte 29 # Abbrev [29] 0x27a:0xb DW_TAG_namespace + .byte 30 # Abbrev [30] 0x282:0xb DW_TAG_namespace .long .Linfo_string10 # DW_AT_name - .byte 16 # Abbrev [16] 0x27f:0x5 DW_TAG_structure_type + .byte 16 # Abbrev [16] 0x287:0x5 DW_TAG_structure_type .long .Linfo_string11 # DW_AT_name # DW_AT_declaration .byte 0 # End Of Children Mark - .byte 24 # Abbrev [24] 0x285:0x5 DW_TAG_subroutine_type - .long 639 # DW_AT_type - .byte 21 # Abbrev [21] 0x28a:0xc DW_TAG_array_type - .long 639 # DW_AT_type - .byte 22 # Abbrev [22] 0x28f:0x6 DW_TAG_subrange_type - .long 484 # DW_AT_type + .byte 24 # Abbrev [24] 0x28d:0x5 DW_TAG_subroutine_type + .long 647 # DW_AT_type + .byte 21 # Abbrev [21] 0x292:0xc DW_TAG_array_type + .long 647 # DW_AT_type + .byte 22 # Abbrev [22] 0x297:0x6 DW_TAG_subrange_type + .long 489 # DW_AT_type .byte 1 # DW_AT_count .byte 0 # End Of Children Mark - .byte 10 # Abbrev [10] 0x296:0x5 DW_TAG_pointer_type - .long 639 # DW_AT_type - .byte 15 # Abbrev [15] 0x29b:0x9 DW_TAG_ptr_to_member_type - .long 639 # DW_AT_type - .long 639 # DW_AT_containing_type - .byte 15 # Abbrev [15] 0x2a4:0x9 DW_TAG_ptr_to_member_type - .long 685 # DW_AT_type - .long 639 # DW_AT_containing_type - .byte 28 # Abbrev [28] 0x2ad:0x10 DW_TAG_subroutine_type - .long 639 # DW_AT_type - .byte 18 # Abbrev [18] 0x2b2:0x5 DW_TAG_formal_parameter - .long 701 # DW_AT_type + .byte 10 # Abbrev [10] 0x29e:0x5 DW_TAG_pointer_type + .long 647 # DW_AT_type + .byte 15 # Abbrev [15] 0x2a3:0x9 DW_TAG_ptr_to_member_type + .long 647 # DW_AT_type + .long 647 # DW_AT_containing_type + .byte 15 # Abbrev [15] 0x2ac:0x9 DW_TAG_ptr_to_member_type + .long 693 # DW_AT_type + .long 647 # DW_AT_containing_type + .byte 29 # Abbrev [29] 0x2b5:0x10 DW_TAG_subroutine_type + .long 647 # DW_AT_type + .byte 18 # Abbrev [18] 0x2ba:0x5 DW_TAG_formal_parameter + .long 709 # DW_AT_type # DW_AT_artificial - .byte 19 # Abbrev [19] 0x2b7:0x5 DW_TAG_formal_parameter - .long 639 # DW_AT_type + .byte 19 # Abbrev [19] 0x2bf:0x5 DW_TAG_formal_parameter + .long 647 # DW_AT_type .byte 0 # End Of Children Mark - .byte 10 # Abbrev [10] 0x2bd:0x5 DW_TAG_pointer_type - .long 639 # DW_AT_type - .byte 11 # Abbrev [11] 0x2c2:0x5 DW_TAG_const_type - .long 639 # DW_AT_type - .byte 30 # Abbrev [30] 0x2c7:0x7 DW_TAG_namespace - .byte 16 # Abbrev [16] 0x2c8:0x5 DW_TAG_structure_type + .byte 10 # Abbrev [10] 0x2c5:0x5 DW_TAG_pointer_type + .long 647 # DW_AT_type + .byte 11 # Abbrev [11] 0x2ca:0x5 DW_TAG_const_type + .long 647 # DW_AT_type + .byte 31 # Abbrev [31] 0x2cf:0x7 DW_TAG_namespace + .byte 16 # Abbrev [16] 0x2d0:0x5 DW_TAG_structure_type .long .Linfo_string12 # DW_AT_name # DW_AT_declaration .byte 0 # End Of Children Mark - .byte 31 # Abbrev [31] 0x2ce:0xf DW_TAG_structure_type + .byte 32 # Abbrev [32] 0x2d6:0xf DW_TAG_structure_type .long .Linfo_string14 # DW_AT_name # DW_AT_declaration - .byte 32 # Abbrev [32] 0x2d3:0x9 DW_TAG_template_type_parameter - .long 733 # DW_AT_type + .byte 33 # Abbrev [33] 0x2db:0x9 DW_TAG_template_type_parameter + .long 741 # DW_AT_type .long .Linfo_string13 # DW_AT_name .byte 0 # End Of Children Mark - .byte 31 # Abbrev [31] 0x2dd:0xf DW_TAG_structure_type + .byte 32 # Abbrev [32] 0x2e5:0xf DW_TAG_structure_type .long .Linfo_string14 # DW_AT_name # DW_AT_declaration - .byte 32 # Abbrev [32] 0x2e2:0x9 DW_TAG_template_type_parameter - .long 322 # DW_AT_type + .byte 33 # Abbrev [33] 0x2ea:0x9 DW_TAG_template_type_parameter + .long 327 # DW_AT_type .long .Linfo_string13 # DW_AT_name .byte 0 # End Of Children Mark - .byte 31 # Abbrev [31] 0x2ec:0x19 DW_TAG_structure_type + .byte 32 # Abbrev [32] 0x2f4:0x19 DW_TAG_structure_type .long .Linfo_string19 # DW_AT_name # DW_AT_declaration - .byte 32 # Abbrev [32] 0x2f1:0x9 DW_TAG_template_type_parameter - .long 773 # DW_AT_type + .byte 33 # Abbrev [33] 0x2f9:0x9 DW_TAG_template_type_parameter + .long 781 # DW_AT_type .long .Linfo_string13 # DW_AT_name - .byte 33 # Abbrev [33] 0x2fa:0xa DW_TAG_template_value_parameter - .long 773 # DW_AT_type + .byte 34 # Abbrev [34] 0x302:0xa DW_TAG_template_value_parameter + .long 781 # DW_AT_type .long .Linfo_string18 # DW_AT_name .byte 0 # DW_AT_const_value .byte 0 # End Of Children Mark - .byte 34 # Abbrev [34] 0x305:0x13 DW_TAG_enumeration_type - .long 792 # DW_AT_type + .byte 35 # Abbrev [35] 0x30d:0x13 DW_TAG_enumeration_type + .long 800 # DW_AT_type .long .Linfo_string17 # DW_AT_name .byte 4 # DW_AT_byte_size .byte 1 # DW_AT_decl_file .byte 12 # DW_AT_decl_line - .byte 35 # Abbrev [35] 0x311:0x6 DW_TAG_enumerator + .byte 36 # Abbrev [36] 0x319:0x6 DW_TAG_enumerator .long .Linfo_string16 # DW_AT_name .byte 0 # DW_AT_const_value .byte 0 # End Of Children Mark - .byte 6 # Abbrev [6] 0x318:0x7 DW_TAG_base_type + .byte 6 # Abbrev [6] 0x320:0x7 DW_TAG_base_type .long .Linfo_string15 # DW_AT_name .byte 7 # DW_AT_encoding .byte 4 # DW_AT_byte_size - .byte 31 # Abbrev [31] 0x31f:0x19 DW_TAG_structure_type + .byte 32 # Abbrev [32] 0x327:0x19 DW_TAG_structure_type .long .Linfo_string19 # DW_AT_name # DW_AT_declaration - .byte 32 # Abbrev [32] 0x324:0x9 DW_TAG_template_type_parameter - .long 773 # DW_AT_type + .byte 33 # Abbrev [33] 0x32c:0x9 DW_TAG_template_type_parameter + .long 781 # DW_AT_type .long .Linfo_string13 # DW_AT_name - .byte 33 # Abbrev [33] 0x32d:0xa DW_TAG_template_value_parameter - .long 773 # DW_AT_type + .byte 34 # Abbrev [34] 0x335:0xa DW_TAG_template_value_parameter + .long 781 # DW_AT_type .long .Linfo_string18 # DW_AT_name .byte 1 # DW_AT_const_value .byte 0 # End Of Children Mark - .byte 31 # Abbrev [31] 0x338:0x19 DW_TAG_structure_type + .byte 32 # Abbrev [32] 0x340:0x19 DW_TAG_structure_type .long .Linfo_string19 # DW_AT_name # DW_AT_declaration - .byte 32 # Abbrev [32] 0x33d:0x9 DW_TAG_template_type_parameter - .long 849 # DW_AT_type + .byte 33 # Abbrev [33] 0x345:0x9 DW_TAG_template_type_parameter + .long 857 # DW_AT_type .long .Linfo_string13 # DW_AT_name - .byte 36 # Abbrev [36] 0x346:0xa DW_TAG_template_value_parameter - .long 849 # DW_AT_type + .byte 37 # Abbrev [37] 0x34e:0xa DW_TAG_template_value_parameter + .long 857 # DW_AT_type .long .Linfo_string18 # DW_AT_name .byte 0 # DW_AT_const_value .byte 0 # End Of Children Mark - .byte 37 # Abbrev [37] 0x351:0x13 DW_TAG_enumeration_type - .long 322 # DW_AT_type + .byte 38 # Abbrev [38] 0x359:0x13 DW_TAG_enumeration_type + .long 327 # DW_AT_type # DW_AT_enum_class .long .Linfo_string21 # DW_AT_name .byte 4 # DW_AT_byte_size .byte 1 # DW_AT_decl_file .byte 15 # DW_AT_decl_line - .byte 38 # Abbrev [38] 0x35d:0x6 DW_TAG_enumerator + .byte 39 # Abbrev [39] 0x365:0x6 DW_TAG_enumerator .long .Linfo_string20 # DW_AT_name .byte 0 # DW_AT_const_value .byte 0 # End Of Children Mark - .byte 31 # Abbrev [31] 0x364:0x19 DW_TAG_structure_type + .byte 32 # Abbrev [32] 0x36c:0x19 DW_TAG_structure_type .long .Linfo_string19 # DW_AT_name # DW_AT_declaration - .byte 32 # Abbrev [32] 0x369:0x9 DW_TAG_template_type_parameter - .long 893 # DW_AT_type + .byte 33 # Abbrev [33] 0x371:0x9 DW_TAG_template_type_parameter + .long 901 # DW_AT_type .long .Linfo_string13 # DW_AT_name - .byte 33 # Abbrev [33] 0x372:0xa DW_TAG_template_value_parameter - .long 893 # DW_AT_type + .byte 34 # Abbrev [34] 0x37a:0xa DW_TAG_template_value_parameter + .long 901 # DW_AT_type .long .Linfo_string18 # DW_AT_name .byte 120 # DW_AT_const_value .byte 0 # End Of Children Mark - .byte 6 # Abbrev [6] 0x37d:0x7 DW_TAG_base_type + .byte 6 # Abbrev [6] 0x385:0x7 DW_TAG_base_type .long .Linfo_string22 # DW_AT_name .byte 8 # DW_AT_encoding .byte 1 # DW_AT_byte_size - .byte 31 # Abbrev [31] 0x384:0x51 DW_TAG_structure_type + .byte 32 # Abbrev [32] 0x38c:0x51 DW_TAG_structure_type .long .Linfo_string25 # DW_AT_name # DW_AT_declaration - .byte 4 # Abbrev [4] 0x389:0x4b DW_TAG_GNU_template_parameter_pack + .byte 4 # Abbrev [4] 0x391:0x4b DW_TAG_GNU_template_parameter_pack .long .Linfo_string23 # DW_AT_name - .byte 39 # Abbrev [39] 0x38e:0x7 DW_TAG_template_value_parameter - .long 981 # DW_AT_type + .byte 40 # Abbrev [40] 0x396:0x7 DW_TAG_template_value_parameter + .long 989 # DW_AT_type .asciz "\370" # DW_AT_const_value - .byte 39 # Abbrev [39] 0x395:0x7 DW_TAG_template_value_parameter - .long 981 # DW_AT_type + .byte 40 # Abbrev [40] 0x39d:0x7 DW_TAG_template_value_parameter + .long 989 # DW_AT_type .asciz "\334" # DW_AT_const_value - .byte 39 # Abbrev [39] 0x39c:0x6 DW_TAG_template_value_parameter - .long 981 # DW_AT_type + .byte 40 # Abbrev [40] 0x3a4:0x6 DW_TAG_template_value_parameter + .long 989 # DW_AT_type .byte 39 # DW_AT_const_value - .byte 39 # Abbrev [39] 0x3a2:0x6 DW_TAG_template_value_parameter - .long 981 # DW_AT_type + .byte 40 # Abbrev [40] 0x3aa:0x6 DW_TAG_template_value_parameter + .long 989 # DW_AT_type .byte 7 # DW_AT_const_value - .byte 39 # Abbrev [39] 0x3a8:0x6 DW_TAG_template_value_parameter - .long 981 # DW_AT_type + .byte 40 # Abbrev [40] 0x3b0:0x6 DW_TAG_template_value_parameter + .long 989 # DW_AT_type .byte 8 # DW_AT_const_value - .byte 39 # Abbrev [39] 0x3ae:0x6 DW_TAG_template_value_parameter - .long 981 # DW_AT_type + .byte 40 # Abbrev [40] 0x3b6:0x6 DW_TAG_template_value_parameter + .long 989 # DW_AT_type .byte 12 # DW_AT_const_value - .byte 39 # Abbrev [39] 0x3b4:0x6 DW_TAG_template_value_parameter - .long 981 # DW_AT_type + .byte 40 # Abbrev [40] 0x3bc:0x6 DW_TAG_template_value_parameter + .long 989 # DW_AT_type .byte 10 # DW_AT_const_value - .byte 39 # Abbrev [39] 0x3ba:0x6 DW_TAG_template_value_parameter - .long 981 # DW_AT_type + .byte 40 # Abbrev [40] 0x3c2:0x6 DW_TAG_template_value_parameter + .long 989 # DW_AT_type .byte 13 # DW_AT_const_value - .byte 39 # Abbrev [39] 0x3c0:0x6 DW_TAG_template_value_parameter - .long 981 # DW_AT_type + .byte 40 # Abbrev [40] 0x3c8:0x6 DW_TAG_template_value_parameter + .long 989 # DW_AT_type .byte 9 # DW_AT_const_value - .byte 39 # Abbrev [39] 0x3c6:0x6 DW_TAG_template_value_parameter - .long 981 # DW_AT_type + .byte 40 # Abbrev [40] 0x3ce:0x6 DW_TAG_template_value_parameter + .long 989 # DW_AT_type .byte 11 # DW_AT_const_value - .byte 39 # Abbrev [39] 0x3cc:0x7 DW_TAG_template_value_parameter - .long 981 # DW_AT_type + .byte 40 # Abbrev [40] 0x3d4:0x7 DW_TAG_template_value_parameter + .long 989 # DW_AT_type .asciz "\377" # DW_AT_const_value .byte 0 # End Of Children Mark .byte 0 # End Of Children Mark - .byte 6 # Abbrev [6] 0x3d5:0x7 DW_TAG_base_type + .byte 6 # Abbrev [6] 0x3dd:0x7 DW_TAG_base_type .long .Linfo_string24 # DW_AT_name .byte 6 # DW_AT_encoding .byte 1 # DW_AT_byte_size - .byte 31 # Abbrev [31] 0x3dc:0x19 DW_TAG_structure_type + .byte 32 # Abbrev [32] 0x3e4:0x19 DW_TAG_structure_type .long .Linfo_string19 # DW_AT_name # DW_AT_declaration - .byte 32 # Abbrev [32] 0x3e1:0x9 DW_TAG_template_type_parameter - .long 1013 # DW_AT_type + .byte 33 # Abbrev [33] 0x3e9:0x9 DW_TAG_template_type_parameter + .long 1021 # DW_AT_type .long .Linfo_string13 # DW_AT_name - .byte 33 # Abbrev [33] 0x3ea:0xa DW_TAG_template_value_parameter - .long 1013 # DW_AT_type + .byte 34 # Abbrev [34] 0x3f2:0xa DW_TAG_template_value_parameter + .long 1021 # DW_AT_type .long .Linfo_string18 # DW_AT_name .byte 1 # DW_AT_const_value .byte 0 # End Of Children Mark - .byte 6 # Abbrev [6] 0x3f5:0x7 DW_TAG_base_type + .byte 6 # Abbrev [6] 0x3fd:0x7 DW_TAG_base_type .long .Linfo_string26 # DW_AT_name .byte 2 # DW_AT_encoding .byte 1 # DW_AT_byte_size - .byte 31 # Abbrev [31] 0x3fc:0x19 DW_TAG_structure_type + .byte 32 # Abbrev [32] 0x404:0x19 DW_TAG_structure_type .long .Linfo_string19 # DW_AT_name # DW_AT_declaration - .byte 32 # Abbrev [32] 0x401:0x9 DW_TAG_template_type_parameter - .long 1013 # DW_AT_type + .byte 33 # Abbrev [33] 0x409:0x9 DW_TAG_template_type_parameter + .long 1021 # DW_AT_type .long .Linfo_string13 # DW_AT_name - .byte 33 # Abbrev [33] 0x40a:0xa DW_TAG_template_value_parameter - .long 1013 # DW_AT_type + .byte 34 # Abbrev [34] 0x412:0xa DW_TAG_template_value_parameter + .long 1021 # DW_AT_type .long .Linfo_string18 # DW_AT_name .byte 0 # DW_AT_const_value .byte 0 # End Of Children Mark - .byte 31 # Abbrev [31] 0x415:0x19 DW_TAG_structure_type + .byte 32 # Abbrev [32] 0x41d:0x19 DW_TAG_structure_type .long .Linfo_string19 # DW_AT_name # DW_AT_declaration - .byte 32 # Abbrev [32] 0x41a:0x9 DW_TAG_template_type_parameter - .long 1070 # DW_AT_type + .byte 33 # Abbrev [33] 0x422:0x9 DW_TAG_template_type_parameter + .long 1078 # DW_AT_type .long .Linfo_string13 # DW_AT_name - .byte 36 # Abbrev [36] 0x423:0xa DW_TAG_template_value_parameter - .long 1070 # DW_AT_type + .byte 37 # Abbrev [37] 0x42b:0xa DW_TAG_template_value_parameter + .long 1078 # DW_AT_type .long .Linfo_string18 # DW_AT_name .byte 0 # DW_AT_const_value .byte 0 # End Of Children Mark - .byte 6 # Abbrev [6] 0x42e:0x7 DW_TAG_base_type + .byte 6 # Abbrev [6] 0x436:0x7 DW_TAG_base_type .long .Linfo_string27 # DW_AT_name .byte 5 # DW_AT_encoding .byte 2 # DW_AT_byte_size - .byte 31 # Abbrev [31] 0x435:0x19 DW_TAG_structure_type + .byte 32 # Abbrev [32] 0x43d:0x19 DW_TAG_structure_type .long .Linfo_string19 # DW_AT_name # DW_AT_declaration - .byte 32 # Abbrev [32] 0x43a:0x9 DW_TAG_template_type_parameter - .long 1102 # DW_AT_type + .byte 33 # Abbrev [33] 0x442:0x9 DW_TAG_template_type_parameter + .long 1110 # DW_AT_type .long .Linfo_string13 # DW_AT_name - .byte 33 # Abbrev [33] 0x443:0xa DW_TAG_template_value_parameter - .long 1102 # DW_AT_type + .byte 34 # Abbrev [34] 0x44b:0xa DW_TAG_template_value_parameter + .long 1110 # DW_AT_type .long .Linfo_string18 # DW_AT_name .byte 0 # DW_AT_const_value .byte 0 # End Of Children Mark - .byte 6 # Abbrev [6] 0x44e:0x7 DW_TAG_base_type + .byte 6 # Abbrev [6] 0x456:0x7 DW_TAG_base_type .long .Linfo_string28 # DW_AT_name .byte 7 # DW_AT_encoding .byte 2 # DW_AT_byte_size - .byte 31 # Abbrev [31] 0x455:0x19 DW_TAG_structure_type + .byte 32 # Abbrev [32] 0x45d:0x19 DW_TAG_structure_type .long .Linfo_string19 # DW_AT_name # DW_AT_declaration - .byte 32 # Abbrev [32] 0x45a:0x9 DW_TAG_template_type_parameter - .long 322 # DW_AT_type + .byte 33 # Abbrev [33] 0x462:0x9 DW_TAG_template_type_parameter + .long 327 # DW_AT_type .long .Linfo_string13 # DW_AT_name - .byte 36 # Abbrev [36] 0x463:0xa DW_TAG_template_value_parameter - .long 322 # DW_AT_type + .byte 37 # Abbrev [37] 0x46b:0xa DW_TAG_template_value_parameter + .long 327 # DW_AT_type .long .Linfo_string18 # DW_AT_name .byte 0 # DW_AT_const_value .byte 0 # End Of Children Mark - .byte 31 # Abbrev [31] 0x46e:0x19 DW_TAG_structure_type + .byte 32 # Abbrev [32] 0x476:0x19 DW_TAG_structure_type .long .Linfo_string19 # DW_AT_name # DW_AT_declaration - .byte 32 # Abbrev [32] 0x473:0x9 DW_TAG_template_type_parameter - .long 1159 # DW_AT_type + .byte 33 # Abbrev [33] 0x47b:0x9 DW_TAG_template_type_parameter + .long 1167 # DW_AT_type .long .Linfo_string13 # DW_AT_name - .byte 36 # Abbrev [36] 0x47c:0xa DW_TAG_template_value_parameter - .long 1159 # DW_AT_type + .byte 37 # Abbrev [37] 0x484:0xa DW_TAG_template_value_parameter + .long 1167 # DW_AT_type .long .Linfo_string18 # DW_AT_name .byte 0 # DW_AT_const_value .byte 0 # End Of Children Mark - .byte 6 # Abbrev [6] 0x487:0x7 DW_TAG_base_type + .byte 6 # Abbrev [6] 0x48f:0x7 DW_TAG_base_type .long .Linfo_string29 # DW_AT_name .byte 5 # DW_AT_encoding .byte 8 # DW_AT_byte_size - .byte 31 # Abbrev [31] 0x48e:0x19 DW_TAG_structure_type + .byte 32 # Abbrev [32] 0x496:0x19 DW_TAG_structure_type .long .Linfo_string19 # DW_AT_name # DW_AT_declaration - .byte 32 # Abbrev [32] 0x493:0x9 DW_TAG_template_type_parameter - .long 1191 # DW_AT_type + .byte 33 # Abbrev [33] 0x49b:0x9 DW_TAG_template_type_parameter + .long 1199 # DW_AT_type .long .Linfo_string13 # DW_AT_name - .byte 36 # Abbrev [36] 0x49c:0xa DW_TAG_template_value_parameter - .long 1191 # DW_AT_type + .byte 37 # Abbrev [37] 0x4a4:0xa DW_TAG_template_value_parameter + .long 1199 # DW_AT_type .long .Linfo_string18 # DW_AT_name .byte 0 # DW_AT_const_value .byte 0 # End Of Children Mark - .byte 6 # Abbrev [6] 0x4a7:0x7 DW_TAG_base_type + .byte 6 # Abbrev [6] 0x4af:0x7 DW_TAG_base_type .long .Linfo_string30 # DW_AT_name .byte 5 # DW_AT_encoding .byte 8 # DW_AT_byte_size - .byte 31 # Abbrev [31] 0x4ae:0x19 DW_TAG_structure_type + .byte 32 # Abbrev [32] 0x4b6:0x19 DW_TAG_structure_type .long .Linfo_string19 # DW_AT_name # DW_AT_declaration - .byte 32 # Abbrev [32] 0x4b3:0x9 DW_TAG_template_type_parameter - .long 792 # DW_AT_type + .byte 33 # Abbrev [33] 0x4bb:0x9 DW_TAG_template_type_parameter + .long 800 # DW_AT_type .long .Linfo_string13 # DW_AT_name - .byte 33 # Abbrev [33] 0x4bc:0xa DW_TAG_template_value_parameter - .long 792 # DW_AT_type + .byte 34 # Abbrev [34] 0x4c4:0xa DW_TAG_template_value_parameter + .long 800 # DW_AT_type .long .Linfo_string18 # DW_AT_name .byte 0 # DW_AT_const_value .byte 0 # End Of Children Mark - .byte 31 # Abbrev [31] 0x4c7:0x19 DW_TAG_structure_type + .byte 32 # Abbrev [32] 0x4cf:0x19 DW_TAG_structure_type .long .Linfo_string19 # DW_AT_name # DW_AT_declaration - .byte 32 # Abbrev [32] 0x4cc:0x9 DW_TAG_template_type_parameter - .long 1248 # DW_AT_type + .byte 33 # Abbrev [33] 0x4d4:0x9 DW_TAG_template_type_parameter + .long 1256 # DW_AT_type .long .Linfo_string13 # DW_AT_name - .byte 33 # Abbrev [33] 0x4d5:0xa DW_TAG_template_value_parameter - .long 1248 # DW_AT_type + .byte 34 # Abbrev [34] 0x4dd:0xa DW_TAG_template_value_parameter + .long 1256 # DW_AT_type .long .Linfo_string18 # DW_AT_name .byte 0 # DW_AT_const_value .byte 0 # End Of Children Mark - .byte 6 # Abbrev [6] 0x4e0:0x7 DW_TAG_base_type + .byte 6 # Abbrev [6] 0x4e8:0x7 DW_TAG_base_type .long .Linfo_string31 # DW_AT_name .byte 7 # DW_AT_encoding .byte 8 # DW_AT_byte_size - .byte 31 # Abbrev [31] 0x4e7:0x19 DW_TAG_structure_type + .byte 32 # Abbrev [32] 0x4ef:0x19 DW_TAG_structure_type .long .Linfo_string19 # DW_AT_name # DW_AT_declaration - .byte 32 # Abbrev [32] 0x4ec:0x9 DW_TAG_template_type_parameter - .long 1280 # DW_AT_type + .byte 33 # Abbrev [33] 0x4f4:0x9 DW_TAG_template_type_parameter + .long 1288 # DW_AT_type .long .Linfo_string13 # DW_AT_name - .byte 33 # Abbrev [33] 0x4f5:0xa DW_TAG_template_value_parameter - .long 1280 # DW_AT_type + .byte 34 # Abbrev [34] 0x4fd:0xa DW_TAG_template_value_parameter + .long 1288 # DW_AT_type .long .Linfo_string18 # DW_AT_name .byte 0 # DW_AT_const_value .byte 0 # End Of Children Mark - .byte 6 # Abbrev [6] 0x500:0x7 DW_TAG_base_type + .byte 6 # Abbrev [6] 0x508:0x7 DW_TAG_base_type .long .Linfo_string32 # DW_AT_name .byte 7 # DW_AT_encoding .byte 8 # DW_AT_byte_size @@ -1099,7 +1131,7 @@ .Ldebug_info_end0: .section .debug_str,"MS",@progbits,1 .Linfo_string0: - .asciz "clang version 14.0.0 (git@github.com:llvm/llvm-project.git e209925a875e1dfa15d5e4ddc3d00f2da4b42de1)" # string offset=0 + .asciz "clang version 14.0.0 (git@github.com:llvm/llvm-project.git e1e74f6cd6ce41ce8303a5a91f29736808fccc36)" # string offset=0 .Linfo_string1: .asciz "test.cpp" # string offset=101 .Linfo_string2: @@ -1166,7 +1198,7 @@ .asciz "unsigned long long" # string offset=346 .Linfo_string33: .asciz "t1" # string offset=365 - .ident "clang version 14.0.0 (git@github.com:llvm/llvm-project.git e209925a875e1dfa15d5e4ddc3d00f2da4b42de1)" + .ident "clang version 14.0.0 (git@github.com:llvm/llvm-project.git e1e74f6cd6ce41ce8303a5a91f29736808fccc36)" .section ".note.GNU-stack","",@progbits .addrsig .addrsig_sym v1 From 3988a06d86e1a14dfd5f5fdae84ddbf928e85dab Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 5 Jan 2022 20:29:35 -0800 Subject: [PATCH 765/992] Remove unused variable (-Wunused) --- llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index f03862cdf421..ff054e722430 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -793,7 +793,6 @@ bool LoopInterchangeLegality::findInductionAndReductions( // This function indicates the current limitations in the transform as a result // of which we do not proceed. bool LoopInterchangeLegality::currentLimitations() { - BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); // transform currently expects the loop latches to also be the exiting From 8ade3d43a3e48eb739c9db2f38b618fa213f0546 Mon Sep 17 00:00:00 2001 From: Congzhe Cao Date: Wed, 5 Jan 2022 23:26:03 -0500 Subject: [PATCH 766/992] Revert "[LoopInterchange] Remove a limitation in LoopInterchange legality" This reverts commit 15702ff9ce28b3f4aafec13be561359d4c721595 while I investigate a ppc build bot failure at https://lab.llvm.org/buildbot#builders/36/builds/16051. --- .../lib/Transforms/Scalar/LoopInterchange.cpp | 72 +++++++++++++++++++ .../LoopInterchange/currentLimitation.ll | 9 ++- .../LoopInterchange/interchangeable.ll | 20 ++---- .../loop-interchange-optimization-remarks.ll | 6 +- 4 files changed, 86 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index ff054e722430..08c66ee1b54a 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -887,6 +887,78 @@ bool LoopInterchangeLegality::currentLimitations() { return true; } + // TODO: Current limitation: Since we split the inner loop latch at the point + // were induction variable is incremented (induction.next); We cannot have + // more than 1 user of induction.next since it would result in broken code + // after split. + // e.g. + // for(i=0;igetIncomingBlock(0) == InnerLoopPreHeader) + InnerIndexVarInc = + dyn_cast(InnerInductionVar->getIncomingValue(1)); + else + InnerIndexVarInc = + dyn_cast(InnerInductionVar->getIncomingValue(0)); + + if (!InnerIndexVarInc) { + LLVM_DEBUG( + dbgs() << "Did not find an instruction to increment the induction " + << "variable.\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NoIncrementInInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "The inner loop does not increment the induction variable."; + }); + return true; + } + + // Since we split the inner loop latch on this induction variable. Make sure + // we do not have any instruction between the induction variable and branch + // instruction. + + bool FoundInduction = false; + for (const Instruction &I : + llvm::reverse(InnerLoopLatch->instructionsWithoutDebug())) { + if (isa(I) || isa(I) || isa(I) || + isa(I)) + continue; + + // We found an instruction. If this is not induction variable then it is not + // safe to split this loop latch. + if (!I.isIdenticalTo(InnerIndexVarInc)) { + LLVM_DEBUG(dbgs() << "Found unsupported instructions between induction " + << "variable increment and branch.\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed( + DEBUG_TYPE, "UnsupportedInsBetweenInduction", + InnerLoop->getStartLoc(), InnerLoop->getHeader()) + << "Found unsupported instruction between induction variable " + "increment and branch."; + }); + return true; + } + + FoundInduction = true; + break; + } + // The loop latch ended and we didn't find the induction variable return as + // current limitation. + if (!FoundInduction) { + LLVM_DEBUG(dbgs() << "Did not find the induction variable.\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NoIndutionVariable", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Did not find the induction variable."; + }); + return true; + } return false; } diff --git a/llvm/test/Transforms/LoopInterchange/currentLimitation.ll b/llvm/test/Transforms/LoopInterchange/currentLimitation.ll index 82c16555f44f..768dd3bb2e23 100644 --- a/llvm/test/Transforms/LoopInterchange/currentLimitation.ll +++ b/llvm/test/Transforms/LoopInterchange/currentLimitation.ll @@ -15,16 +15,19 @@ target triple = "x86_64-unknown-linux-gnu" @C = common global [100 x [100 x i64]] zeroinitializer ;;--------------------------------------Test case 01------------------------------------ -;; This loop can be interchanged with -da-disable-delinearization-checks, otherwise it cannot -;; be interchanged due to dependence. +;; [FIXME] This loop though valid is currently not interchanged due to the limitation that we cannot split the inner loop latch due to multiple use of inner induction +;; variable.(used to increment the loop counter and to access A[j+1][i+1] ;; for(int i=0;i Date: Wed, 5 Jan 2022 20:35:08 -0800 Subject: [PATCH 767/992] llvm-dwarfdump --summarize-types: skip compilation units Important for DWARFv5 debug info which might contain type units in the debug_info section, which made summarize-types fairly ineffective/lost amongst the noise of CUs being dumped. --- llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp | 2 ++ llvm/test/DebugInfo/dwarfdump-type-units.test | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp index 6e30309ae94a..d68ecd4f8a42 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp @@ -15,6 +15,8 @@ using namespace llvm; void DWARFCompileUnit::dump(raw_ostream &OS, DIDumpOptions DumpOpts) { + if (DumpOpts.SummarizeTypes) + return; int OffsetDumpWidth = 2 * dwarf::getDwarfOffsetByteSize(getFormat()); OS << format("0x%08" PRIx64, getOffset()) << ": Compile Unit:" << " length = " << format("0x%0*" PRIx64, OffsetDumpWidth, getLength()) diff --git a/llvm/test/DebugInfo/dwarfdump-type-units.test b/llvm/test/DebugInfo/dwarfdump-type-units.test index 5048ca097e8d..f34721de66d3 100644 --- a/llvm/test/DebugInfo/dwarfdump-type-units.test +++ b/llvm/test/DebugInfo/dwarfdump-type-units.test @@ -1,5 +1,5 @@ RUN: llvm-dwarfdump -v %p/Inputs/dwarfdump-type-units.elf-x86-64 | FileCheck -check-prefix=INFO -check-prefix=CHECK -check-prefix=LONG %s -RUN: llvm-dwarfdump -debug-types %p/Inputs/dwarfdump-type-units.elf-x86-64 -summarize-types | FileCheck -implicit-check-not=DW_ -check-prefix=CHECK -check-prefix=SHORT %s +RUN: llvm-dwarfdump -debug-types -debug-info %p/Inputs/dwarfdump-type-units.elf-x86-64 -summarize-types | FileCheck -implicit-check-not=DW_ -check-prefix=CHECK -check-prefix=SHORT %s RUN: llvm-dwarfdump -v -debug-types %p/Inputs/dwarfdump-type-units.elf-x86-64 | FileCheck -check-prefix=TYPES %s INFO: debug_info contents: From 811b60f0b99dad4b2989d21dde38d49155b0c4f9 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 5 Jan 2022 20:41:07 -0800 Subject: [PATCH 768/992] llvm-dwarfdump: Speed up type unit lookup using the TUIndex or a cache Use the TUIndex in a DWP file if present, otherwise (in .o, .dwo, and non-split linked executables) cache a DenseMap for lookup of type units. --- .../llvm/DebugInfo/DWARF/DWARFContext.h | 2 ++ llvm/lib/DebugInfo/DWARF/DWARFContext.cpp | 28 +++++++++++++++---- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h index 24714ac3d101..e82faf6eeb24 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -52,6 +52,7 @@ class raw_ostream; /// information parsing. The actual data is supplied through DWARFObj. class DWARFContext : public DIContext { DWARFUnitVector NormalUnits; + Optional> NormalTypeUnits; std::unique_ptr CUIndex; std::unique_ptr GdbIndex; std::unique_ptr TUIndex; @@ -70,6 +71,7 @@ class DWARFContext : public DIContext { std::unique_ptr AppleObjC; DWARFUnitVector DWOUnits; + Optional> DWOTypeUnits; std::unique_ptr AbbrevDWO; std::unique_ptr MacinfoDWO; std::unique_ptr MacroDWO; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index 34c42025109d..ef50ad53650a 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -695,14 +695,30 @@ void DWARFContext::dump( DWARFTypeUnit *DWARFContext::getTypeUnitForHash(uint16_t Version, uint64_t Hash, bool IsDWO) { - // FIXME: Check for/use the tu_index here, if there is one. - for (const auto &U : IsDWO ? dwo_units() : normal_units()) { - if (DWARFTypeUnit *TU = dyn_cast(U.get())) { - if (TU->getTypeHash() == Hash) - return TU; + parseDWOUnits(LazyParse); + + if (const auto &TUI = getTUIndex()) { + if (const auto *R = TUI.getFromHash(Hash)) + return dyn_cast_or_null( + DWOUnits.getUnitForIndexEntry(*R)); + return nullptr; + } + + struct UnitContainers { + const DWARFUnitVector &Units; + Optional> ⤅ + }; + UnitContainers Units = IsDWO ? UnitContainers{DWOUnits, DWOTypeUnits} + : UnitContainers{NormalUnits, NormalTypeUnits}; + if (!Units.Map) { + Units.Map.emplace(); + for (const auto &U : IsDWO ? dwo_units() : normal_units()) { + if (DWARFTypeUnit *TU = dyn_cast(U.get())) + (*Units.Map)[TU->getTypeHash()] = TU; } } - return nullptr; + + return (*Units.Map)[Hash]; } DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) { From 31b79b86ee3defa07f1aa4fa5a10d2389ec527dd Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 5 Jan 2022 20:43:30 -0800 Subject: [PATCH 769/992] Revert "Remove unused variable (-Wunused)" Patch that removed the use of this variable was reverted in 8ade3d43a3e48eb739c9db2f38b618fa213f0546 This reverts commit 3988a06d86e1a14dfd5f5fdae84ddbf928e85dab. --- llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 08c66ee1b54a..75b52a431e32 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -793,6 +793,7 @@ bool LoopInterchangeLegality::findInductionAndReductions( // This function indicates the current limitations in the transform as a result // of which we do not proceed. bool LoopInterchangeLegality::currentLimitations() { + BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); // transform currently expects the loop latches to also be the exiting From 50b5b367c1ae72be5265f81b4dba03b3deb0c4e4 Mon Sep 17 00:00:00 2001 From: Christudasan Devadasan Date: Fri, 24 Dec 2021 15:05:41 -0500 Subject: [PATCH 770/992] [AMDGPU] Iterate LoweredEndCf in the reverse order The function that optimally inserts the exec mask restore operations by combining the blocks currently visits the lowered END_CF pseudos in the forward direction as it iterates the setvector in the order the entries are inserted in it. Due to the absence of BranchFolding at -O0, the irregularly placed BBs cause the forward traversal to incorrectly place two unconditional branches in certain BBs while combining them, especially when an intervening block later gets optimized away in subsequent iterations. It is avoided by reverse iterating the setvector. The blocks at the bottom of a function will get optimized first before processing those at the top. Fixes: SWDEV-315215 Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D116273 --- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 2 +- llvm/test/CodeGen/AMDGPU/collapse-endcf.mir | 217 ++++++++++++++++++ 2 files changed, 218 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 3168bcd53eda..6ec37b32d0a6 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -582,7 +582,7 @@ void SILowerControlFlow::optimizeEndCf() { if (!RemoveRedundantEndcf) return; - for (MachineInstr *MI : LoweredEndCf) { + for (MachineInstr *MI : reverse(LoweredEndCf)) { MachineBasicBlock &MBB = *MI->getParent(); auto Next = skipIgnoreExecInstsTrivialSucc(MBB, std::next(MI->getIterator())); diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir index fc1ce0064afb..a8b97c793258 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir @@ -805,5 +805,222 @@ body: | bb.6: S_BRANCH %bb.4 +... + +--- +# While collapsing inner endcf, certain blocks ended up getting two S_BRANCH instructions. +# It happens in the absence of BranchFolding (mostly at -O0) when the irregularly placed BBs are traversed +# in the forward direction and the intervening block between a predecessor and its successor gets optimized +# away in subsequent iterations, leaving 2 S_BRANCH instructions in the predecessor block. +# The issue was fixed by iterating the blocks from bottom-up to ensure all endcf pseudos at the bottom of the +# function are processed first. +# This test ensures there are no multiple S_BRANCH instructions inserted in any block. + +name: no_multiple_unconditional_branches +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: no_multiple_unconditional_branches + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.14(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF]], implicit $exec + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.14, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.14(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF1]], implicit $exec + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], killed [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc + ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.14, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.7(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_2:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF2]], implicit $exec + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], killed [[V_CMP_EQ_U32_e64_2]], implicit-def dead $scc + ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_2]] + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3: + ; GCN-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_3:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF3]], implicit $exec + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_3:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY3]], killed [[V_CMP_EQ_U32_e64_3]], implicit-def dead $scc + ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_3]] + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.4 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.4: + ; GCN-NEXT: successors: %bb.7(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_BRANCH %bb.7 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.7: + ; GCN-NEXT: successors: %bb.8(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc + ; GCN-NEXT: S_BRANCH %bb.8 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.8: + ; GCN-NEXT: successors: %bb.9(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_BRANCH %bb.9 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.9: + ; GCN-NEXT: successors: %bb.11(0x40000000), %bb.12(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_4:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF4]], implicit $exec + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_4:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], killed [[V_CMP_EQ_U32_e64_4]], implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_4]], [[COPY4]], implicit-def dead $scc + ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_4]] + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.12, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.11 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.10: + ; GCN-NEXT: successors: %bb.14(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_BRANCH %bb.14 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.11: + ; GCN-NEXT: successors: %bb.12(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_BRANCH %bb.12 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.12: + ; GCN-NEXT: successors: %bb.10(0x40000000), %bb.14(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[S_OR_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_OR_SAVEEXEC_B64 [[S_XOR_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN-NEXT: [[S_AND_B64_5:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc + ; GCN-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_B64_5]], implicit-def $scc + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.14, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.10 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.14: + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.14 + + %0:vgpr_32 = IMPLICIT_DEF + %1:sreg_64 = V_CMP_EQ_U32_e64 0, killed %0:vgpr_32, implicit $exec + %2:sreg_64 = SI_IF %1:sreg_64, %bb.14, implicit-def $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.1 + + bb.1: + ; predecessors: %bb.0 + successors: %bb.2, %bb.6 + + %3:vgpr_32 = IMPLICIT_DEF + %4:sreg_64 = V_CMP_EQ_U32_e64 0, killed %3:vgpr_32, implicit $exec + %5:sreg_64 = SI_IF killed %4:sreg_64, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + + bb.2: + ; predecessors: %bb.1 + successors: %bb.3, %bb.7 + + %6:vgpr_32 = IMPLICIT_DEF + %7:sreg_64 = V_CMP_EQ_U32_e64 0, killed %6:vgpr_32, implicit $exec + %8:sreg_64 = SI_IF killed %7:sreg_64, %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.3 + + bb.3: + ; predecessors: %bb.2 + successors: %bb.4, %bb.5 + + %9:vgpr_32 = IMPLICIT_DEF + %10:sreg_64 = V_CMP_EQ_U32_e64 0, killed %9:vgpr_32, implicit $exec + %11:sreg_64 = SI_IF killed %10:sreg_64, %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.4 + + bb.4: + ; predecessors: %bb.3 + successors: %bb.5 + + S_BRANCH %bb.5 + + bb.5: + ; predecessors: %bb.3, %bb.4 + successors: %bb.7 + + SI_END_CF %11:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.7 + + bb.6: + ; predecessors: %bb.1, %bb.13 + successors: %bb.14 + + SI_END_CF %5:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.14 + + bb.7: + ; predecessors: %bb2, %bb.5 + successors: %bb.8 + + SI_END_CF %8:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.8 + + bb.8: + ; predecessors: %bb.7 + successors: %bb.9 + + S_BRANCH %bb.9 + + bb.9: + ; predecessors: %bb.8 + successors: %bb.11, %bb.12 + + %12:vgpr_32 = IMPLICIT_DEF + %13:sreg_64 = V_CMP_EQ_U32_e64 0, killed %12:vgpr_32, implicit $exec + %14:sreg_64 = SI_IF killed %13:sreg_64, %bb.12, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.11 + + bb.10: + ; predecessors: %bb.12 + successors: %bb.13 + + S_BRANCH %bb.13 + + bb.11: + ; predecessors: %bb.9 + successors: %bb.12 + + S_BRANCH %bb.12 + + bb.12: + ; predecessors: %bb.9, %bb.11 + successors: %bb.10, %bb.13 + + %15:sreg_64 = SI_ELSE %14:sreg_64, %bb.13, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.10 + + bb.13: + ; predecessors: %bb.10, %bb.12 + successors: %bb.6 + + SI_END_CF %15:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.6 + + bb.14: + ; predecessors: %bb.0, %bb.6 + + SI_END_CF %2:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_ENDPGM 0 ... From 6396a4436145930f1bf0171219214c9f202019be Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 5 Jan 2022 21:25:26 -0800 Subject: [PATCH 771/992] Revert "SIGSEGV in Sanitizer INTERCEPTOR of strstr function." Breaks Asan on Fuchsia's and ubsan with gcc. This reverts commit 685c94c6cbba4f2bf076b01fd3e0dcb4b1425b53. --- .../sanitizer_common/sanitizer_common_interceptors.inc | 10 ++++------ compiler-rt/lib/sanitizer_common/sanitizer_libc.cpp | 4 +--- compiler-rt/test/sanitizer_common/TestCases/strstr.c | 4 ---- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index 4cb4d4a59f69..b0ab08dff1db 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -575,12 +575,10 @@ INTERCEPTOR(int, strncasecmp, const char *s1, const char *s2, SIZE_T size) { #if SANITIZER_INTERCEPT_STRSTR || SANITIZER_INTERCEPT_STRCASESTR static inline void StrstrCheck(void *ctx, char *r, const char *s1, const char *s2) { - uptr len2 = internal_strlen(s2); - COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, len2 + 1); - if (len2 == 0 && !common_flags()->strict_string_checks) - return; - uptr len1 = internal_strlen(s1); - COMMON_INTERCEPTOR_READ_STRING(ctx, s1, r ? r - s1 + len2 : len1 + 1); + uptr len1 = internal_strlen(s1); + uptr len2 = internal_strlen(s2); + COMMON_INTERCEPTOR_READ_STRING(ctx, s1, r ? r - s1 + len2 : len1 + 1); + COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, len2 + 1); } #endif diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_libc.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_libc.cpp index d16e7bab69b5..d3076f0da489 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_libc.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_libc.cpp @@ -217,10 +217,8 @@ uptr internal_strnlen(const char *s, uptr maxlen) { char *internal_strstr(const char *haystack, const char *needle) { // This is O(N^2), but we are not using it in hot places. - uptr len2 = internal_strlen(needle); - if (len2 == 0) - return const_cast(haystack); uptr len1 = internal_strlen(haystack); + uptr len2 = internal_strlen(needle); if (len1 < len2) return nullptr; for (uptr pos = 0; pos <= len1 - len2; pos++) { if (internal_memcmp(haystack + pos, needle, len2) == 0) diff --git a/compiler-rt/test/sanitizer_common/TestCases/strstr.c b/compiler-rt/test/sanitizer_common/TestCases/strstr.c index d6cff1b424fd..2089ac7b5fcb 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/strstr.c +++ b/compiler-rt/test/sanitizer_common/TestCases/strstr.c @@ -8,9 +8,5 @@ int main(int argc, char **argv) { char s2[] = "b"; r = strstr(s1, s2); assert(r == s1 + 1); - char *s3 = NULL; - char *s4 = ""; - char *p = strstr(s3, s4); - assert(p == NULL); return 0; } From 118e953b18ff07d00b8f822dfbf2991e41d6d791 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 6 Jan 2022 16:03:06 +1100 Subject: [PATCH 772/992] Re-apply "[JITLink] Update JITLink to use ExecutorAddr rather... " with fixes. This re-applies 133f86e95492b2a00b944e070878424cfa73f87c, which was reverted in c5965a411c635106a47738b8d2e24db822b7416f while I investigated bot failures. The original failure contained an arithmetic conversion think-o (on line 419 of EHFrameSupport.cpp) that could cause failures on 32-bit platforms. The issue should be fixed in this patch. --- .../LLJITWithObjectLinkingLayerPlugin.cpp | 11 +- .../ExecutionEngine/JITLink/EHFrameSupport.h | 13 +- .../llvm/ExecutionEngine/JITLink/JITLink.h | 140 ++++++++++-------- .../JITLink/JITLinkMemoryManager.h | 40 ++--- .../llvm/ExecutionEngine/JITLink/x86_64.h | 22 ++- .../llvm/ExecutionEngine/Orc/ELFNixPlatform.h | 2 +- .../ExecutionEngine/Orc/EPCEHFrameRegistrar.h | 4 +- .../Orc/EPCGenericJITLinkMemoryManager.h | 2 +- .../llvm/ExecutionEngine/Orc/MachOPlatform.h | 2 +- .../ExecutionEngine/Orc/ObjectLinkingLayer.h | 2 +- .../Orc/Shared/ExecutorAddress.h | 45 +++++- .../DefineExternalSectionStartAndEndSymbols.h | 4 +- .../JITLink/EHFrameSupport.cpp | 57 ++++--- .../JITLink/EHFrameSupportImpl.h | 11 +- .../JITLink/ELFLinkGraphBuilder.h | 14 +- .../ExecutionEngine/JITLink/ELF_aarch64.cpp | 8 +- .../lib/ExecutionEngine/JITLink/ELF_riscv.cpp | 27 ++-- .../ExecutionEngine/JITLink/ELF_x86_64.cpp | 11 +- llvm/lib/ExecutionEngine/JITLink/JITLink.cpp | 37 ++--- .../JITLink/JITLinkGeneric.cpp | 9 +- .../JITLink/JITLinkMemoryManager.cpp | 22 +-- .../JITLink/MachOLinkGraphBuilder.cpp | 82 +++++----- .../JITLink/MachOLinkGraphBuilder.h | 14 +- .../ExecutionEngine/JITLink/MachO_arm64.cpp | 44 +++--- .../ExecutionEngine/JITLink/MachO_x86_64.cpp | 22 +-- .../JITLink/PerGraphGOTAndPLTStubsBuilder.h | 8 +- llvm/lib/ExecutionEngine/JITLink/x86_64.cpp | 10 +- .../Orc/DebugObjectManagerPlugin.cpp | 6 +- .../Orc/DebuggerSupportPlugin.cpp | 24 +-- .../ExecutionEngine/Orc/ELFNixPlatform.cpp | 12 +- .../Orc/EPCEHFrameRegistrar.cpp | 10 +- .../Orc/EPCGenericJITLinkMemoryManager.cpp | 4 +- .../Orc/EPCIndirectionUtils.cpp | 23 +-- .../ExecutionEngine/Orc/IndirectionUtils.cpp | 8 +- .../lib/ExecutionEngine/Orc/MachOPlatform.cpp | 38 +++-- .../Orc/ObjectLinkingLayer.cpp | 6 +- llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp | 19 +-- .../tools/llvm-jitlink/llvm-jitlink-macho.cpp | 19 +-- llvm/tools/llvm-jitlink/llvm-jitlink.cpp | 27 ++-- .../JITLink/LinkGraphTests.cpp | 87 +++++++---- .../Orc/ObjectLinkingLayerTest.cpp | 3 +- 41 files changed, 520 insertions(+), 429 deletions(-) diff --git a/llvm/examples/OrcV2Examples/LLJITWithObjectLinkingLayerPlugin/LLJITWithObjectLinkingLayerPlugin.cpp b/llvm/examples/OrcV2Examples/LLJITWithObjectLinkingLayerPlugin/LLJITWithObjectLinkingLayerPlugin.cpp index 2215e2507db3..5a41a7c1e8a6 100644 --- a/llvm/examples/OrcV2Examples/LLJITWithObjectLinkingLayerPlugin/LLJITWithObjectLinkingLayerPlugin.cpp +++ b/llvm/examples/OrcV2Examples/LLJITWithObjectLinkingLayerPlugin/LLJITWithObjectLinkingLayerPlugin.cpp @@ -100,14 +100,15 @@ class MyPlugin : public ObjectLinkingLayer::Plugin { return; } - JITTargetAddress InitAddr = B.getAddress() & ~(LineWidth - 1); - JITTargetAddress StartAddr = B.getAddress(); - JITTargetAddress EndAddr = B.getAddress() + B.getSize(); + ExecutorAddr InitAddr(B.getAddress().getValue() & ~(LineWidth - 1)); + ExecutorAddr StartAddr = B.getAddress(); + ExecutorAddr EndAddr = B.getAddress() + B.getSize(); auto *Data = reinterpret_cast(B.getContent().data()); - for (JITTargetAddress CurAddr = InitAddr; CurAddr != EndAddr; ++CurAddr) { + for (ExecutorAddr CurAddr = InitAddr; CurAddr != EndAddr; ++CurAddr) { if (CurAddr % LineWidth == 0) - outs() << " " << formatv("{0:x16}", CurAddr) << ": "; + outs() << " " << formatv("{0:x16}", CurAddr.getValue()) + << ": "; if (CurAddr < StartAddr) outs() << " "; else diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h b/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h index ec78d9db40b6..e834042f8bb2 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h @@ -25,25 +25,24 @@ namespace jitlink { class EHFrameRegistrar { public: virtual ~EHFrameRegistrar(); - virtual Error registerEHFrames(JITTargetAddress EHFrameSectionAddr, + virtual Error registerEHFrames(orc::ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) = 0; - virtual Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr, + virtual Error deregisterEHFrames(orc::ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) = 0; }; /// Registers / Deregisters EH-frames in the current process. class InProcessEHFrameRegistrar final : public EHFrameRegistrar { public: - Error registerEHFrames(JITTargetAddress EHFrameSectionAddr, + Error registerEHFrames(orc::ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) override; - Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr, + Error deregisterEHFrames(orc::ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) override; }; -using StoreFrameRangeFunction = - std::function; +using StoreFrameRangeFunction = std::function; /// Creates a pass that records the address and size of the EH frame section. /// If no eh-frame section is found then the address and size will both be given diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index 69106fcb4c28..d0d497b75d9d 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -104,10 +104,10 @@ class Addressable { friend class LinkGraph; protected: - Addressable(JITTargetAddress Address, bool IsDefined) + Addressable(orc::ExecutorAddr Address, bool IsDefined) : Address(Address), IsDefined(IsDefined), IsAbsolute(false) {} - Addressable(JITTargetAddress Address) + Addressable(orc::ExecutorAddr Address) : Address(Address), IsDefined(false), IsAbsolute(true) { assert(!(IsDefined && IsAbsolute) && "Block cannot be both defined and absolute"); @@ -119,8 +119,8 @@ class Addressable { Addressable(Addressable &&) = delete; Addressable &operator=(Addressable &&) = default; - JITTargetAddress getAddress() const { return Address; } - void setAddress(JITTargetAddress Address) { this->Address = Address; } + orc::ExecutorAddr getAddress() const { return Address; } + void setAddress(orc::ExecutorAddr Address) { this->Address = Address; } /// Returns true if this is a defined addressable, in which case you /// can downcast this to a Block. @@ -133,7 +133,7 @@ class Addressable { this->IsAbsolute = IsAbsolute; } - JITTargetAddress Address = 0; + orc::ExecutorAddr Address; uint64_t IsDefined : 1; uint64_t IsAbsolute : 1; @@ -152,7 +152,7 @@ class Block : public Addressable { private: /// Create a zero-fill defined addressable. - Block(Section &Parent, JITTargetAddress Size, JITTargetAddress Address, + Block(Section &Parent, orc::ExecutorAddrDiff Size, orc::ExecutorAddr Address, uint64_t Alignment, uint64_t AlignmentOffset) : Addressable(Address, true), Parent(&Parent), Size(Size) { assert(isPowerOf2_64(Alignment) && "Alignment must be power of 2"); @@ -168,7 +168,7 @@ class Block : public Addressable { /// Create a defined addressable for the given content. /// The Content is assumed to be non-writable, and will be copied when /// mutations are required. - Block(Section &Parent, ArrayRef Content, JITTargetAddress Address, + Block(Section &Parent, ArrayRef Content, orc::ExecutorAddr Address, uint64_t Alignment, uint64_t AlignmentOffset) : Addressable(Address, true), Parent(&Parent), Data(Content.data()), Size(Content.size()) { @@ -188,7 +188,7 @@ class Block : public Addressable { /// The standard way to achieve this is to allocate it on the Graph's /// allocator. Block(Section &Parent, MutableArrayRef Content, - JITTargetAddress Address, uint64_t Alignment, uint64_t AlignmentOffset) + orc::ExecutorAddr Address, uint64_t Alignment, uint64_t AlignmentOffset) : Addressable(Address, true), Parent(&Parent), Data(Content.data()), Size(Content.size()) { assert(isPowerOf2_64(Alignment) && "Alignment must be power of 2"); @@ -328,7 +328,7 @@ class Block : public Addressable { /// Returns the address of the fixup for the given edge, which is equal to /// this block's address plus the edge's offset. - JITTargetAddress getFixupAddress(const Edge &E) const { + orc::ExecutorAddr getFixupAddress(const Edge &E) const { return getAddress() + E.getOffset(); } @@ -343,12 +343,17 @@ class Block : public Addressable { std::vector Edges; }; -// Align a JITTargetAddress to conform with block alignment requirements. -inline JITTargetAddress alignToBlock(JITTargetAddress Addr, Block &B) { +// Align an address to conform with block alignment requirements. +inline uint64_t alignToBlock(uint64_t Addr, Block &B) { uint64_t Delta = (B.getAlignmentOffset() - Addr) % B.getAlignment(); return Addr + Delta; } +// Align a orc::ExecutorAddr to conform with block alignment requirements. +inline orc::ExecutorAddr alignToBlock(orc::ExecutorAddr Addr, Block &B) { + return orc::ExecutorAddr(alignToBlock(Addr.getValue(), B)); +} + /// Describes symbol linkage. This can be used to make resolve definition /// clashes. enum class Linkage : uint8_t { @@ -391,8 +396,8 @@ class Symbol { friend class LinkGraph; private: - Symbol(Addressable &Base, JITTargetAddress Offset, StringRef Name, - JITTargetAddress Size, Linkage L, Scope S, bool IsLive, + Symbol(Addressable &Base, orc::ExecutorAddrDiff Offset, StringRef Name, + orc::ExecutorAddrDiff Size, Linkage L, Scope S, bool IsLive, bool IsCallable) : Name(Name), Base(&Base), Offset(Offset), Size(Size) { assert(Offset <= MaxOffset && "Offset out of range"); @@ -403,7 +408,8 @@ class Symbol { } static Symbol &constructCommon(void *SymStorage, Block &Base, StringRef Name, - JITTargetAddress Size, Scope S, bool IsLive) { + orc::ExecutorAddrDiff Size, Scope S, + bool IsLive) { assert(SymStorage && "Storage cannot be null"); assert(!Name.empty() && "Common symbol name cannot be empty"); assert(Base.isDefined() && @@ -416,7 +422,7 @@ class Symbol { } static Symbol &constructExternal(void *SymStorage, Addressable &Base, - StringRef Name, JITTargetAddress Size, + StringRef Name, orc::ExecutorAddrDiff Size, Linkage L) { assert(SymStorage && "Storage cannot be null"); assert(!Base.isDefined() && @@ -428,7 +434,7 @@ class Symbol { } static Symbol &constructAbsolute(void *SymStorage, Addressable &Base, - StringRef Name, JITTargetAddress Size, + StringRef Name, orc::ExecutorAddrDiff Size, Linkage L, Scope S, bool IsLive) { assert(SymStorage && "Storage cannot be null"); assert(!Base.isDefined() && @@ -439,8 +445,8 @@ class Symbol { } static Symbol &constructAnonDef(void *SymStorage, Block &Base, - JITTargetAddress Offset, - JITTargetAddress Size, bool IsCallable, + orc::ExecutorAddrDiff Offset, + orc::ExecutorAddrDiff Size, bool IsCallable, bool IsLive) { assert(SymStorage && "Storage cannot be null"); assert((Offset + Size) <= Base.getSize() && @@ -452,9 +458,9 @@ class Symbol { } static Symbol &constructNamedDef(void *SymStorage, Block &Base, - JITTargetAddress Offset, StringRef Name, - JITTargetAddress Size, Linkage L, Scope S, - bool IsLive, bool IsCallable) { + orc::ExecutorAddrDiff Offset, StringRef Name, + orc::ExecutorAddrDiff Size, Linkage L, + Scope S, bool IsLive, bool IsCallable) { assert(SymStorage && "Storage cannot be null"); assert((Offset + Size) <= Base.getSize() && "Symbol extends past end of block"); @@ -552,16 +558,16 @@ class Symbol { } /// Returns the offset for this symbol within the underlying addressable. - JITTargetAddress getOffset() const { return Offset; } + orc::ExecutorAddrDiff getOffset() const { return Offset; } /// Returns the address of this symbol. - JITTargetAddress getAddress() const { return Base->getAddress() + Offset; } + orc::ExecutorAddr getAddress() const { return Base->getAddress() + Offset; } /// Returns the size of this symbol. - JITTargetAddress getSize() const { return Size; } + orc::ExecutorAddrDiff getSize() const { return Size; } /// Set the size of this symbol. - void setSize(JITTargetAddress Size) { + void setSize(orc::ExecutorAddrDiff Size) { assert(Base && "Cannot set size for null Symbol"); assert((Size == 0 || Base->isDefined()) && "Non-zero size can only be set for defined symbols"); @@ -622,7 +628,7 @@ class Symbol { void setBlock(Block &B) { Base = &B; } - void setOffset(uint64_t NewOffset) { + void setOffset(orc::ExecutorAddrDiff NewOffset) { assert(NewOffset <= MaxOffset && "Offset out of range"); Offset = NewOffset; } @@ -637,7 +643,7 @@ class Symbol { uint64_t S : 2; uint64_t IsLive : 1; uint64_t IsCallable : 1; - JITTargetAddress Size = 0; + orc::ExecutorAddrDiff Size = 0; }; raw_ostream &operator<<(raw_ostream &OS, const Symbol &A); @@ -783,13 +789,13 @@ class SectionRange { assert((First || !Last) && "Last can not be null if start is non-null"); return !First; } - JITTargetAddress getStart() const { - return First ? First->getAddress() : 0; + orc::ExecutorAddr getStart() const { + return First ? First->getAddress() : orc::ExecutorAddr(); } - JITTargetAddress getEnd() const { - return Last ? Last->getAddress() + Last->getSize() : 0; + orc::ExecutorAddr getEnd() const { + return Last ? Last->getAddress() + Last->getSize() : orc::ExecutorAddr(); } - uint64_t getSize() const { return getEnd() - getStart(); } + orc::ExecutorAddrDiff getSize() const { return getEnd() - getStart(); } private: Block *First = nullptr; @@ -995,7 +1001,7 @@ class LinkGraph { /// Create a content block. Block &createContentBlock(Section &Parent, ArrayRef Content, - uint64_t Address, uint64_t Alignment, + orc::ExecutorAddr Address, uint64_t Alignment, uint64_t AlignmentOffset) { return createBlock(Parent, Content, Address, Alignment, AlignmentOffset); } @@ -1003,15 +1009,17 @@ class LinkGraph { /// Create a content block with initially mutable data. Block &createMutableContentBlock(Section &Parent, MutableArrayRef MutableContent, - uint64_t Address, uint64_t Alignment, + orc::ExecutorAddr Address, + uint64_t Alignment, uint64_t AlignmentOffset) { return createBlock(Parent, MutableContent, Address, Alignment, AlignmentOffset); } /// Create a zero-fill block. - Block &createZeroFillBlock(Section &Parent, uint64_t Size, uint64_t Address, - uint64_t Alignment, uint64_t AlignmentOffset) { + Block &createZeroFillBlock(Section &Parent, orc::ExecutorAddrDiff Size, + orc::ExecutorAddr Address, uint64_t Alignment, + uint64_t AlignmentOffset) { return createBlock(Parent, Size, Address, Alignment, AlignmentOffset); } @@ -1061,22 +1069,24 @@ class LinkGraph { /// present during lookup: Externals with strong linkage must be found or /// an error will be emitted. Externals with weak linkage are permitted to /// be undefined, in which case they are assigned a value of 0. - Symbol &addExternalSymbol(StringRef Name, uint64_t Size, Linkage L) { + Symbol &addExternalSymbol(StringRef Name, orc::ExecutorAddrDiff Size, + Linkage L) { assert(llvm::count_if(ExternalSymbols, [&](const Symbol *Sym) { return Sym->getName() == Name; }) == 0 && "Duplicate external symbol"); - auto &Sym = - Symbol::constructExternal(Allocator.Allocate(), - createAddressable(0, false), Name, Size, L); + auto &Sym = Symbol::constructExternal( + Allocator.Allocate(), + createAddressable(orc::ExecutorAddr(), false), Name, Size, L); ExternalSymbols.insert(&Sym); return Sym; } /// Add an absolute symbol. - Symbol &addAbsoluteSymbol(StringRef Name, JITTargetAddress Address, - uint64_t Size, Linkage L, Scope S, bool IsLive) { + Symbol &addAbsoluteSymbol(StringRef Name, orc::ExecutorAddr Address, + orc::ExecutorAddrDiff Size, Linkage L, Scope S, + bool IsLive) { assert(llvm::count_if(AbsoluteSymbols, [&](const Symbol *Sym) { return Sym->getName() == Name; @@ -1091,7 +1101,7 @@ class LinkGraph { /// Convenience method for adding a weak zero-fill symbol. Symbol &addCommonSymbol(StringRef Name, Scope S, Section &Section, - JITTargetAddress Address, uint64_t Size, + orc::ExecutorAddr Address, orc::ExecutorAddrDiff Size, uint64_t Alignment, bool IsLive) { assert(llvm::count_if(defined_symbols(), [&](const Symbol *Sym) { @@ -1107,8 +1117,8 @@ class LinkGraph { } /// Add an anonymous symbol. - Symbol &addAnonymousSymbol(Block &Content, JITTargetAddress Offset, - JITTargetAddress Size, bool IsCallable, + Symbol &addAnonymousSymbol(Block &Content, orc::ExecutorAddrDiff Offset, + orc::ExecutorAddrDiff Size, bool IsCallable, bool IsLive) { auto &Sym = Symbol::constructAnonDef(Allocator.Allocate(), Content, Offset, Size, IsCallable, IsLive); @@ -1117,9 +1127,9 @@ class LinkGraph { } /// Add a named symbol. - Symbol &addDefinedSymbol(Block &Content, JITTargetAddress Offset, - StringRef Name, JITTargetAddress Size, Linkage L, - Scope S, bool IsCallable, bool IsLive) { + Symbol &addDefinedSymbol(Block &Content, orc::ExecutorAddrDiff Offset, + StringRef Name, orc::ExecutorAddrDiff Size, + Linkage L, Scope S, bool IsCallable, bool IsLive) { assert((S == Scope::Local || llvm::count_if(defined_symbols(), [&](const Symbol *Sym) { return Sym->getName() == Name; @@ -1193,7 +1203,7 @@ class LinkGraph { assert(Sym.isDefined() && "Sym is not a defined symbol"); Section &Sec = Sym.getBlock().getSection(); Sec.removeSymbol(Sym); - Sym.makeExternal(createAddressable(0, false)); + Sym.makeExternal(createAddressable(orc::ExecutorAddr(), false)); } ExternalSymbols.insert(&Sym); } @@ -1203,7 +1213,7 @@ class LinkGraph { /// /// Symbol size, linkage, scope, and callability, and liveness will be left /// unchanged. Symbol offset will be reset to 0. - void makeAbsolute(Symbol &Sym, JITTargetAddress Address) { + void makeAbsolute(Symbol &Sym, orc::ExecutorAddr Address) { assert(!Sym.isAbsolute() && "Symbol is already absolute"); if (Sym.isExternal()) { assert(ExternalSymbols.count(&Sym) && @@ -1222,8 +1232,9 @@ class LinkGraph { /// Turn an absolute or external symbol into a defined one by attaching it to /// a block. Symbol must not already be defined. - void makeDefined(Symbol &Sym, Block &Content, JITTargetAddress Offset, - JITTargetAddress Size, Linkage L, Scope S, bool IsLive) { + void makeDefined(Symbol &Sym, Block &Content, orc::ExecutorAddrDiff Offset, + orc::ExecutorAddrDiff Size, Linkage L, Scope S, + bool IsLive) { assert(!Sym.isDefined() && "Sym is already a defined symbol"); if (Sym.isAbsolute()) { assert(AbsoluteSymbols.count(&Sym) && @@ -1255,15 +1266,15 @@ class LinkGraph { /// /// All other symbol attributes are unchanged. void transferDefinedSymbol(Symbol &Sym, Block &DestBlock, - JITTargetAddress NewOffset, - Optional ExplicitNewSize) { + orc::ExecutorAddrDiff NewOffset, + Optional ExplicitNewSize) { auto &OldSection = Sym.getBlock().getSection(); Sym.setBlock(DestBlock); Sym.setOffset(NewOffset); if (ExplicitNewSize) Sym.setSize(*ExplicitNewSize); else { - JITTargetAddress RemainingBlockSize = DestBlock.getSize() - NewOffset; + auto RemainingBlockSize = DestBlock.getSize() - NewOffset; if (Sym.getSize() > RemainingBlockSize) Sym.setSize(RemainingBlockSize); } @@ -1407,14 +1418,14 @@ inline MutableArrayRef Block::getMutableContent(LinkGraph &G) { /// Enables easy lookup of blocks by addresses. class BlockAddressMap { public: - using AddrToBlockMap = std::map; + using AddrToBlockMap = std::map; using const_iterator = AddrToBlockMap::const_iterator; /// A block predicate that always adds all blocks. static bool includeAllBlocks(const Block &B) { return true; } /// A block predicate that always includes blocks with non-null addresses. - static bool includeNonNull(const Block &B) { return B.getAddress(); } + static bool includeNonNull(const Block &B) { return !!B.getAddress(); } BlockAddressMap() = default; @@ -1478,7 +1489,7 @@ class BlockAddressMap { /// Returns the block starting at the given address, or nullptr if no such /// block exists. - Block *getBlockAt(JITTargetAddress Addr) const { + Block *getBlockAt(orc::ExecutorAddr Addr) const { auto I = AddrToBlock.find(Addr); if (I == AddrToBlock.end()) return nullptr; @@ -1487,7 +1498,7 @@ class BlockAddressMap { /// Returns the block covering the given address, or nullptr if no such block /// exists. - Block *getBlockCovering(JITTargetAddress Addr) const { + Block *getBlockCovering(orc::ExecutorAddr Addr) const { auto I = AddrToBlock.upper_bound(Addr); if (I == AddrToBlock.begin()) return nullptr; @@ -1504,10 +1515,11 @@ class BlockAddressMap { ExistingBlock.getAddress() + ExistingBlock.getSize(); return make_error( "Block at " + - formatv("{0:x16} -- {1:x16}", NewBlock.getAddress(), NewBlockEnd) + + formatv("{0:x16} -- {1:x16}", NewBlock.getAddress().getValue(), + NewBlockEnd.getValue()) + " overlaps " + - formatv("{0:x16} -- {1:x16}", ExistingBlock.getAddress(), - ExistingBlockEnd)); + formatv("{0:x16} -- {1:x16}", ExistingBlock.getAddress().getValue(), + ExistingBlockEnd.getValue())); } AddrToBlockMap AddrToBlock; @@ -1532,7 +1544,7 @@ class SymbolAddressMap { /// Returns the list of symbols that start at the given address, or nullptr if /// no such symbols exist. - const SymbolVector *getSymbolsAt(JITTargetAddress Addr) const { + const SymbolVector *getSymbolsAt(orc::ExecutorAddr Addr) const { auto I = AddrToSymbols.find(Addr); if (I == AddrToSymbols.end()) return nullptr; @@ -1540,7 +1552,7 @@ class SymbolAddressMap { } private: - std::map AddrToSymbols; + std::map AddrToSymbols; }; /// A function for mutating LinkGraphs. diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h index 7dd382facde8..179a8b946cf3 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h @@ -13,9 +13,10 @@ #ifndef LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H #define LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h" #include "llvm/ExecutionEngine/JITLink/MemoryFlags.h" -#include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" #include "llvm/Support/MSVCErrorWorkarounds.h" @@ -49,9 +50,9 @@ class Section; /// executor-side implementation code is responsible for freeing the error /// string). struct AllocActionCall { - JITTargetAddress FnAddr = 0; - JITTargetAddress CtxAddr = 0; - JITTargetAddress CtxSize = 0; + orc::ExecutorAddr FnAddr; + orc::ExecutorAddr CtxAddr; + orc::ExecutorAddrDiff CtxSize; }; /// A pair of AllocActionCalls, one to be run at finalization time, one to be @@ -93,47 +94,48 @@ class JITLinkMemoryManager { class FinalizedAlloc { friend class JITLinkMemoryManager; - public: - static constexpr JITTargetAddress InvalidAddr = ~JITTargetAddress(0); + static constexpr auto InvalidAddr = ~uint64_t(0); + public: FinalizedAlloc() = default; - explicit FinalizedAlloc(JITTargetAddress A) : A(A) { - assert(A != 0 && "Explicitly creating an invalid allocation?"); + explicit FinalizedAlloc(orc::ExecutorAddr A) : A(A) { + assert(A && "Explicitly creating an invalid allocation?"); } FinalizedAlloc(const FinalizedAlloc &) = delete; FinalizedAlloc(FinalizedAlloc &&Other) : A(Other.A) { - Other.A = InvalidAddr; + Other.A.setValue(InvalidAddr); } FinalizedAlloc &operator=(const FinalizedAlloc &) = delete; FinalizedAlloc &operator=(FinalizedAlloc &&Other) { - assert(A == InvalidAddr && + assert(A.getValue() == InvalidAddr && "Cannot overwrite active finalized allocation"); std::swap(A, Other.A); return *this; } ~FinalizedAlloc() { - assert(A == InvalidAddr && "Finalized allocation was not deallocated"); + assert(A.getValue() == InvalidAddr && + "Finalized allocation was not deallocated"); } /// FinalizedAllocs convert to false for default-constructed, and /// true otherwise. Default-constructed allocs need not be deallocated. - explicit operator bool() const { return A != InvalidAddr; } + explicit operator bool() const { return A.getValue() != InvalidAddr; } /// Returns the address associated with this finalized allocation. /// The allocation is unmodified. - JITTargetAddress getAddress() const { return A; } + orc::ExecutorAddr getAddress() const { return A; } /// Returns the address associated with this finalized allocation and /// resets this object to the default state. /// This should only be used by allocators when deallocating memory. - JITTargetAddress release() { - JITTargetAddress Tmp = A; - A = InvalidAddr; + orc::ExecutorAddr release() { + orc::ExecutorAddr Tmp = A; + A.setValue(InvalidAddr); return Tmp; } private: - JITTargetAddress A = InvalidAddr; + orc::ExecutorAddr A{InvalidAddr}; }; /// Represents an allocation which has not been finalized yet. @@ -263,7 +265,7 @@ class BasicLayout { Align Alignment; size_t ContentSize; uint64_t ZeroFillSize; - JITTargetAddress Addr; + orc::ExecutorAddr Addr; char *WorkingMem = nullptr; private: @@ -341,7 +343,7 @@ class SimpleSegmentAlloc { /// Describes the segment working memory and executor address. struct SegmentInfo { - JITTargetAddress Addr = 0; + orc::ExecutorAddr Addr; MutableArrayRef WorkingMem; }; diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h index 3130ea381534..4a4e8d15be66 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h @@ -368,18 +368,18 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E, char *BlockWorkingMem = B.getAlreadyMutableContent().data(); char *FixupPtr = BlockWorkingMem + E.getOffset(); - JITTargetAddress FixupAddress = B.getAddress() + E.getOffset(); + auto FixupAddress = B.getAddress() + E.getOffset(); switch (E.getKind()) { case Pointer64: { - uint64_t Value = E.getTarget().getAddress() + E.getAddend(); + uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); *(ulittle64_t *)FixupPtr = Value; break; } case Pointer32: { - uint64_t Value = E.getTarget().getAddress() + E.getAddend(); + uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); if (LLVM_LIKELY(isInRangeForImmU32(Value))) *(ulittle32_t *)FixupPtr = Value; else @@ -387,7 +387,7 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E, break; } case Pointer32Signed: { - int64_t Value = E.getTarget().getAddress() + E.getAddend(); + int64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); if (LLVM_LIKELY(isInRangeForImmS32(Value))) *(little32_t *)FixupPtr = Value; else @@ -483,8 +483,8 @@ extern const char PointerJumpStubContent[6]; inline Symbol &createAnonymousPointer(LinkGraph &G, Section &PointerSection, Symbol *InitialTarget = nullptr, uint64_t InitialAddend = 0) { - auto &B = - G.createContentBlock(PointerSection, NullPointerContent, ~7ULL, 8, 0); + auto &B = G.createContentBlock(PointerSection, NullPointerContent, + orc::ExecutorAddr(~uint64_t(7)), 8, 0); if (InitialTarget) B.addEdge(Pointer64, 0, *InitialTarget, InitialAddend); return G.addAnonymousSymbol(B, 0, 8, false, false); @@ -498,8 +498,8 @@ inline Symbol &createAnonymousPointer(LinkGraph &G, Section &PointerSection, /// address: highest allowable: (~5U) inline Block &createPointerJumpStubBlock(LinkGraph &G, Section &StubSection, Symbol &PointerSymbol) { - auto &B = - G.createContentBlock(StubSection, PointerJumpStubContent, ~5ULL, 1, 0); + auto &B = G.createContentBlock(StubSection, PointerJumpStubContent, + orc::ExecutorAddr(~uint64_t(5)), 1, 0); B.addEdge(Delta32, 2, PointerSymbol, -4); return B; } @@ -552,8 +552,7 @@ class GOTTableManager : public TableManager { "Fell through switch, but no new kind to set"); DEBUG_WITH_TYPE("jitlink", { dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " - << formatv("{0:x}", B->getFixupAddress(E)) << " (" - << formatv("{0:x}", B->getAddress()) << " + " + << B->getFixupAddress(E) << " (" << B->getAddress() << " + " << formatv("{0:x}", E.getOffset()) << ")\n"; }); E.setKind(KindToSet); @@ -586,8 +585,7 @@ class PLTTableManager : public TableManager { if (E.getKind() == x86_64::BranchPCRel32 && !E.getTarget().isDefined()) { DEBUG_WITH_TYPE("jitlink", { dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " - << formatv("{0:x}", B->getFixupAddress(E)) << " (" - << formatv("{0:x}", B->getAddress()) << " + " + << B->getFixupAddress(E) << " (" << B->getAddress() << " + " << formatv("{0:x}", E.getOffset()) << ")\n"; }); // Set the edge kind to Branch32ToPtrJumpStubBypassable to enable it to diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h index 20da3e3b89eb..8f1bf854843f 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h @@ -236,7 +236,7 @@ class ELFNixPlatform : public Platform { DenseMap InitSeqs; std::vector BootstrapPOSRs; - DenseMap HandleAddrToJITDylib; + DenseMap HandleAddrToJITDylib; DenseMap JITDylibToPThreadKey; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h index 6d113a7bdf1a..3b34400894df 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h @@ -39,9 +39,9 @@ class EPCEHFrameRegistrar : public jitlink::EHFrameRegistrar { : ES(ES), RegisterEHFrameWrapperFnAddr(RegisterEHFrameWrapperFnAddr), DeregisterEHFrameWrapperFnAddr(DeregisterEHFRameWrapperFnAddr) {} - Error registerEHFrames(JITTargetAddress EHFrameSectionAddr, + Error registerEHFrames(ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) override; - Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr, + Error deregisterEHFrames(ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) override; private: diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h index b9825f17ec17..18656d03e441 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h @@ -85,7 +85,7 @@ class SPSSerializationTraits::deserialize(IB, A)) return false; - FA = jitlink::JITLinkMemoryManager::FinalizedAlloc(A.getValue()); + FA = jitlink::JITLinkMemoryManager::FinalizedAlloc(A); return true; } }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h index d7b5e2eda6ee..9482f20ecec6 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h @@ -239,7 +239,7 @@ class MachOPlatform : public Platform { std::mutex PlatformMutex; DenseMap InitSeqs; - DenseMap HeaderAddrToJITDylib; + DenseMap HeaderAddrToJITDylib; DenseMap JITDylibToPThreadKey; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index 109922a46e26..e22d7f7de814 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -222,7 +222,7 @@ class EHFrameRegistrationPlugin : public ObjectLinkingLayer::Plugin { private: struct EHFrameRange { - JITTargetAddress Addr = 0; + orc::ExecutorAddr Addr; size_t Size; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h index 2d316b9de007..1abc9508d93a 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h @@ -13,7 +13,10 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_EXECUTORADDRESS_H #define LLVM_EXECUTIONENGINE_ORC_SHARED_EXECUTORADDRESS_H +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/raw_ostream.h" #include #include @@ -29,7 +32,7 @@ class ExecutorAddr { ExecutorAddr() = default; /// Create an ExecutorAddr from the given value. - explicit ExecutorAddr(uint64_t Addr) : Addr(Addr) {} + explicit constexpr ExecutorAddr(uint64_t Addr) : Addr(Addr) {} /// Create an ExecutorAddr from the given pointer. /// Warning: This should only be used when JITing in-process. @@ -88,12 +91,12 @@ class ExecutorAddr { ExecutorAddr operator++(int) { return ExecutorAddr(Addr++); } ExecutorAddr operator--(int) { return ExecutorAddr(Addr--); } - ExecutorAddr &operator+=(const ExecutorAddrDiff Delta) { + ExecutorAddr &operator+=(const ExecutorAddrDiff &Delta) { Addr += Delta; return *this; } - ExecutorAddr &operator-=(const ExecutorAddrDiff Delta) { + ExecutorAddr &operator-=(const ExecutorAddrDiff &Delta) { Addr -= Delta; return *this; } @@ -120,6 +123,18 @@ inline ExecutorAddr operator+(const ExecutorAddrDiff &LHS, return ExecutorAddr(LHS + RHS.getValue()); } +/// Subtracting an offset from an address yields an address. +inline ExecutorAddr operator-(const ExecutorAddr &LHS, + const ExecutorAddrDiff &RHS) { + return ExecutorAddr(LHS.getValue() - RHS); +} + +/// Taking the modulus of an address and a diff yields a diff. +inline ExecutorAddrDiff operator%(const ExecutorAddr &LHS, + const ExecutorAddrDiff &RHS) { + return ExecutorAddrDiff(LHS.getValue() % RHS); +} + /// Represents an address range in the exceutor process. struct ExecutorAddrRange { ExecutorAddrRange() = default; @@ -148,6 +163,10 @@ struct ExecutorAddrRange { ExecutorAddr End; }; +inline raw_ostream &operator<<(raw_ostream &OS, const ExecutorAddr &A) { + return OS << formatv("{0:x}", A.getValue()); +} + namespace shared { class SPSExecutorAddr {}; @@ -198,6 +217,26 @@ using SPSExecutorAddrRangeSequence = SPSSequence; } // End namespace shared. } // End namespace orc. + +// Provide DenseMapInfo for ExecutorAddrs. +template <> struct DenseMapInfo { + static inline orc::ExecutorAddr getEmptyKey() { + return orc::ExecutorAddr(DenseMapInfo::getEmptyKey()); + } + static inline orc::ExecutorAddr getTombstoneKey() { + return orc::ExecutorAddr(DenseMapInfo::getTombstoneKey()); + } + + static unsigned getHashValue(const orc::ExecutorAddr &Addr) { + return DenseMapInfo::getHashValue(Addr.getValue()); + } + + static bool isEqual(const orc::ExecutorAddr &LHS, + const orc::ExecutorAddr &RHS) { + return DenseMapInfo::isEqual(LHS.getValue(), RHS.getValue()); + } +}; + } // End namespace llvm. #endif // LLVM_EXECUTIONENGINE_ORC_SHARED_EXECUTORADDRESS_H diff --git a/llvm/lib/ExecutionEngine/JITLink/DefineExternalSectionStartAndEndSymbols.h b/llvm/lib/ExecutionEngine/JITLink/DefineExternalSectionStartAndEndSymbols.h index 8ae3bc2bf61d..159880e4b152 100644 --- a/llvm/lib/ExecutionEngine/JITLink/DefineExternalSectionStartAndEndSymbols.h +++ b/llvm/lib/ExecutionEngine/JITLink/DefineExternalSectionStartAndEndSymbols.h @@ -52,13 +52,13 @@ class DefineExternalSectionStartAndEndSymbols { auto &SR = getSectionRange(*D.Sec); if (D.IsStart) { if (SR.empty()) - G.makeAbsolute(*Sym, 0); + G.makeAbsolute(*Sym, orc::ExecutorAddr()); else G.makeDefined(*Sym, *SR.getFirstBlock(), 0, 0, Linkage::Strong, Scope::Local, false); } else { if (SR.empty()) - G.makeAbsolute(*Sym, 0); + G.makeAbsolute(*Sym, orc::ExecutorAddr()); else G.makeDefined(*Sym, *SR.getLastBlock(), SR.getLastBlock()->getSize(), 0, Linkage::Strong, diff --git a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp index 4d7d5ce26668..6a58358aa2d1 100644 --- a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp @@ -65,10 +65,7 @@ Error EHFrameSplitter::operator()(LinkGraph &G) { Error EHFrameSplitter::processBlock(LinkGraph &G, Block &B, LinkGraph::SplitBlockCache &Cache) { - LLVM_DEBUG({ - dbgs() << " Processing block at " << formatv("{0:x16}", B.getAddress()) - << "\n"; - }); + LLVM_DEBUG(dbgs() << " Processing block at " << B.getAddress() << "\n"); // eh-frame should not contain zero-fill blocks. if (B.isZeroFill()) @@ -400,7 +397,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, BlockEdgeMap &BlockEdges) { LLVM_DEBUG(dbgs() << " Record is FDE\n"); - JITTargetAddress RecordAddress = B.getAddress() + RecordOffset; + orc::ExecutorAddr RecordAddress = B.getAddress() + RecordOffset; auto RecordContent = B.getContent().slice(RecordOffset, RecordLength); BinaryStreamReader RecordReader( @@ -418,8 +415,9 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, { // Process the CIE pointer field. auto CIEEdgeItr = BlockEdges.find(RecordOffset + CIEDeltaFieldOffset); - JITTargetAddress CIEAddress = - RecordAddress + CIEDeltaFieldOffset - CIEDelta; + orc::ExecutorAddr CIEAddress = + RecordAddress + orc::ExecutorAddrDiff(CIEDeltaFieldOffset) - + orc::ExecutorAddrDiff(CIEDelta); if (CIEEdgeItr == BlockEdges.end()) { LLVM_DEBUG({ @@ -456,7 +454,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, { // Process the PC-Begin field. Block *PCBeginBlock = nullptr; - JITTargetAddress PCBeginFieldOffset = RecordReader.getOffset(); + orc::ExecutorAddrDiff PCBeginFieldOffset = RecordReader.getOffset(); auto PCEdgeItr = BlockEdges.find(RecordOffset + PCBeginFieldOffset); if (PCEdgeItr == BlockEdges.end()) { auto PCBeginPtrInfo = @@ -464,12 +462,12 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, RecordAddress + PCBeginFieldOffset, RecordReader); if (!PCBeginPtrInfo) return PCBeginPtrInfo.takeError(); - JITTargetAddress PCBegin = PCBeginPtrInfo->first; + orc::ExecutorAddr PCBegin = PCBeginPtrInfo->first; Edge::Kind PCBeginEdgeKind = PCBeginPtrInfo->second; LLVM_DEBUG({ dbgs() << " Adding edge at " - << formatv("{0:x16}", RecordAddress + PCBeginFieldOffset) - << " to PC at " << formatv("{0:x16}", PCBegin) << "\n"; + << (RecordAddress + PCBeginFieldOffset) << " to PC at " + << formatv("{0:x16}", PCBegin) << "\n"; }); auto PCBeginSym = getOrCreateSymbol(PC, PCBegin); if (!PCBeginSym) @@ -522,7 +520,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, if (auto Err = RecordReader.readULEB128(AugmentationDataSize)) return Err; - JITTargetAddress LSDAFieldOffset = RecordReader.getOffset(); + orc::ExecutorAddrDiff LSDAFieldOffset = RecordReader.getOffset(); auto LSDAEdgeItr = BlockEdges.find(RecordOffset + LSDAFieldOffset); if (LSDAEdgeItr == BlockEdges.end()) { auto LSDAPointerInfo = @@ -530,7 +528,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B, RecordAddress + LSDAFieldOffset, RecordReader); if (!LSDAPointerInfo) return LSDAPointerInfo.takeError(); - JITTargetAddress LSDA = LSDAPointerInfo->first; + orc::ExecutorAddr LSDA = LSDAPointerInfo->first; Edge::Kind LSDAEdgeKind = LSDAPointerInfo->second; auto LSDASym = getOrCreateSymbol(PC, LSDA); if (!LSDASym) @@ -645,12 +643,10 @@ unsigned EHFrameEdgeFixer::getPointerEncodingDataSize(uint8_t PointerEncoding) { } } -Expected> +Expected> EHFrameEdgeFixer::readEncodedPointer(uint8_t PointerEncoding, - JITTargetAddress PointerFieldAddress, + orc::ExecutorAddr PointerFieldAddress, BinaryStreamReader &RecordReader) { - static_assert(sizeof(JITTargetAddress) == sizeof(uint64_t), - "Result must be able to hold a uint64_t"); assert(isSupportedPointerEncoding(PointerEncoding) && "Unsupported pointer encoding"); @@ -663,7 +659,7 @@ EHFrameEdgeFixer::readEncodedPointer(uint8_t PointerEncoding, if (EffectiveType == DW_EH_PE_absptr) EffectiveType = (PointerSize == 8) ? DW_EH_PE_udata8 : DW_EH_PE_udata4; - JITTargetAddress Addr; + orc::ExecutorAddr Addr; Edge::Kind PointerEdgeKind = Edge::Invalid; switch (EffectiveType) { case DW_EH_PE_udata4: { @@ -709,7 +705,7 @@ EHFrameEdgeFixer::readEncodedPointer(uint8_t PointerEncoding, } Expected EHFrameEdgeFixer::getOrCreateSymbol(ParseContext &PC, - JITTargetAddress Addr) { + orc::ExecutorAddr Addr) { Symbol *CanonicalSym = nullptr; auto UpdateCanonicalSym = [&](Symbol *Sym) { @@ -753,8 +749,9 @@ Error EHFrameNullTerminator::operator()(LinkGraph &G) { << EHFrameSectionName << "\n"; }); - auto &NullTerminatorBlock = G.createContentBlock( - *EHFrame, NullTerminatorBlockContent, 0xfffffffffffffffc, 1, 0); + auto &NullTerminatorBlock = + G.createContentBlock(*EHFrame, NullTerminatorBlockContent, + orc::ExecutorAddr(~uint64_t(4)), 1, 0); G.addAnonymousSymbol(NullTerminatorBlock, 0, 4, false, true); return Error::success(); } @@ -762,17 +759,15 @@ Error EHFrameNullTerminator::operator()(LinkGraph &G) { EHFrameRegistrar::~EHFrameRegistrar() {} Error InProcessEHFrameRegistrar::registerEHFrames( - JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) { - return orc::registerEHFrameSection( - jitTargetAddressToPointer(EHFrameSectionAddr), - EHFrameSectionSize); + orc::ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) { + return orc::registerEHFrameSection(EHFrameSectionAddr.toPtr(), + EHFrameSectionSize); } Error InProcessEHFrameRegistrar::deregisterEHFrames( - JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) { - return orc::deregisterEHFrameSection( - jitTargetAddressToPointer(EHFrameSectionAddr), - EHFrameSectionSize); + orc::ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) { + return orc::deregisterEHFrameSection(EHFrameSectionAddr.toPtr(), + EHFrameSectionSize); } LinkGraphPassFunction @@ -789,14 +784,14 @@ createEHFrameRecorderPass(const Triple &TT, StoreFrameRange = std::move(StoreRangeAddress)](LinkGraph &G) -> Error { // Search for a non-empty eh-frame and record the address of the first // symbol in it. - JITTargetAddress Addr = 0; + orc::ExecutorAddr Addr; size_t Size = 0; if (auto *S = G.findSectionByName(EHFrameSectionName)) { auto R = SectionRange(*S); Addr = R.getStart(); Size = R.getSize(); } - if (Addr == 0 && Size != 0) + if (!Addr && Size != 0) return make_error( StringRef(EHFrameSectionName) + " section can not have zero address with non-zero size"); diff --git a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h index b4c4b0f7b097..ef4b47b9aa28 100644 --- a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h +++ b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h @@ -71,12 +71,12 @@ class EHFrameEdgeFixer { }; using BlockEdgeMap = DenseMap; - using CIEInfosMap = DenseMap; + using CIEInfosMap = DenseMap; struct ParseContext { ParseContext(LinkGraph &G) : G(G) {} - Expected findCIEInfo(JITTargetAddress Address) { + Expected findCIEInfo(orc::ExecutorAddr Address) { auto I = CIEInfos.find(Address); if (I == CIEInfos.end()) return make_error("No CIE found at address " + @@ -102,12 +102,13 @@ class EHFrameEdgeFixer { static bool isSupportedPointerEncoding(uint8_t PointerEncoding); unsigned getPointerEncodingDataSize(uint8_t PointerEncoding); - Expected> + Expected> readEncodedPointer(uint8_t PointerEncoding, - JITTargetAddress PointerFieldAddress, + orc::ExecutorAddr PointerFieldAddress, BinaryStreamReader &RecordReader); - Expected getOrCreateSymbol(ParseContext &PC, JITTargetAddress Addr); + Expected getOrCreateSymbol(ParseContext &PC, + orc::ExecutorAddr Addr); StringRef EHFrameSectionName; unsigned PointerSize; diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h index f9101d71dfa8..23c8b77b913b 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h @@ -322,10 +322,12 @@ template Error ELFLinkGraphBuilder::graphifySections() { if (!Data) return Data.takeError(); - G->createContentBlock(GraphSec, *Data, Sec.sh_addr, Sec.sh_addralign, 0); + G->createContentBlock(GraphSec, *Data, orc::ExecutorAddr(Sec.sh_addr), + Sec.sh_addralign, 0); } else - G->createZeroFillBlock(GraphSec, Sec.sh_size, Sec.sh_addr, - Sec.sh_addralign, 0); + G->createZeroFillBlock(GraphSec, Sec.sh_size, + orc::ExecutorAddr(Sec.sh_addr), Sec.sh_addralign, + 0); setGraphSection(SecIndex, GraphSec); } @@ -393,9 +395,9 @@ template Error ELFLinkGraphBuilder::graphifySymbols() { // Handle common symbols specially. if (Sym.isCommon()) { - Symbol &GSym = - G->addCommonSymbol(*Name, Scope::Default, getCommonSection(), 0, - Sym.st_size, Sym.getValue(), false); + Symbol &GSym = G->addCommonSymbol(*Name, Scope::Default, + getCommonSection(), orc::ExecutorAddr(), + Sym.st_size, Sym.getValue(), false); setGraphSymbol(SymIndex, GSym); continue; } diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp index dc183dfddfae..35b70d533907 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp @@ -41,10 +41,11 @@ class ELFJITLinker_aarch64 : public JITLinker { char *BlockWorkingMem = B.getAlreadyMutableContent().data(); char *FixupPtr = BlockWorkingMem + E.getOffset(); - JITTargetAddress FixupAddress = B.getAddress() + E.getOffset(); + auto FixupAddress = B.getAddress() + E.getOffset(); switch (E.getKind()) { case aarch64::R_AARCH64_CALL26: { - assert((FixupAddress & 0x3) == 0 && "Call-inst is not 32-bit aligned"); + assert((FixupAddress.getValue() & 0x3) == 0 && + "Call-inst is not 32-bit aligned"); int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend(); if (static_cast(Value) & 0x3) @@ -124,7 +125,8 @@ class ELFLinkGraphBuilder_aarch64 : public ELFLinkGraphBuilder { int64_t Addend = Rel.r_addend; Block *BlockToFix = *(GraphSection.blocks().begin()); - JITTargetAddress FixupAddress = FixupSect.sh_addr + Rel.r_offset; + orc::ExecutorAddr FixupAddress = + orc::ExecutorAddr(FixupSect.sh_addr) + Rel.r_offset; Edge::OffsetT Offset = FixupAddress - BlockToFix->getAddress(); Edge GE(*Kind, Offset, *GraphSymbol, Addend); LLVM_DEBUG({ diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp index 26ec79ea50cf..a4d1cc8c6195 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp @@ -44,15 +44,16 @@ class PerGraphGOTAndPLTStubsBuilder_ELF_riscv bool isGOTEdgeToFix(Edge &E) const { return E.getKind() == R_RISCV_GOT_HI20; } Symbol &createGOTEntry(Symbol &Target) { - Block &GOTBlock = G.createContentBlock( - getGOTSection(), getGOTEntryBlockContent(), 0, G.getPointerSize(), 0); + Block &GOTBlock = + G.createContentBlock(getGOTSection(), getGOTEntryBlockContent(), + orc::ExecutorAddr(), G.getPointerSize(), 0); GOTBlock.addEdge(isRV64() ? R_RISCV_64 : R_RISCV_32, 0, Target, 0); return G.addAnonymousSymbol(GOTBlock, 0, G.getPointerSize(), false, false); } Symbol &createPLTStub(Symbol &Target) { - Block &StubContentBlock = - G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 4, 0); + Block &StubContentBlock = G.createContentBlock( + getStubsSection(), getStubBlockContent(), orc::ExecutorAddr(), 4, 0); auto &GOTEntrySymbol = getGOTEntry(Target); StubContentBlock.addEdge(R_RISCV_CALL, 0, GOTEntrySymbol, 0); return G.addAnonymousSymbol(StubContentBlock, 0, StubEntrySize, true, @@ -134,13 +135,13 @@ static Expected getRISCVPCRelHi20(const Edge &E) { const Symbol &Sym = E.getTarget(); const Block &B = Sym.getBlock(); - JITTargetAddress Offset = Sym.getOffset(); + orc::ExecutorAddrDiff Offset = Sym.getOffset(); struct Comp { - bool operator()(const Edge &Lhs, JITTargetAddress Offset) { + bool operator()(const Edge &Lhs, orc::ExecutorAddrDiff Offset) { return Lhs.getOffset() < Offset; } - bool operator()(JITTargetAddress Offset, const Edge &Rhs) { + bool operator()(orc::ExecutorAddrDiff Offset, const Edge &Rhs) { return Offset < Rhs.getOffset(); } }; @@ -176,27 +177,27 @@ class ELFJITLinker_riscv : public JITLinker { char *BlockWorkingMem = B.getAlreadyMutableContent().data(); char *FixupPtr = BlockWorkingMem + E.getOffset(); - JITTargetAddress FixupAddress = B.getAddress() + E.getOffset(); + orc::ExecutorAddr FixupAddress = B.getAddress() + E.getOffset(); switch (E.getKind()) { case R_RISCV_32: { - int64_t Value = E.getTarget().getAddress() + E.getAddend(); + int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue(); *(little32_t *)FixupPtr = static_cast(Value); break; } case R_RISCV_64: { - int64_t Value = E.getTarget().getAddress() + E.getAddend(); + int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue(); *(little64_t *)FixupPtr = static_cast(Value); break; } case R_RISCV_HI20: { - int64_t Value = E.getTarget().getAddress() + E.getAddend(); + int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue(); int32_t Hi = (Value + 0x800) & 0xFFFFF000; uint32_t RawInstr = *(little32_t *)FixupPtr; *(little32_t *)FixupPtr = (RawInstr & 0xFFF) | static_cast(Hi); break; } case R_RISCV_LO12_I: { - int64_t Value = E.getTarget().getAddress() + E.getAddend(); + int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue(); int32_t Lo = Value & 0xFFF; uint32_t RawInstr = *(little32_t *)FixupPtr; *(little32_t *)FixupPtr = @@ -322,7 +323,7 @@ class ELFLinkGraphBuilder_riscv : public ELFLinkGraphBuilder { int64_t Addend = Rel.r_addend; Block *BlockToFix = *(GraphSection.blocks().begin()); - JITTargetAddress FixupAddress = FixupSect.sh_addr + Rel.r_offset; + auto FixupAddress = orc::ExecutorAddr(FixupSect.sh_addr) + Rel.r_offset; Edge::OffsetT Offset = FixupAddress - BlockToFix->getAddress(); Edge GE(*Kind, Offset, *GraphSymbol, Addend); LLVM_DEBUG({ diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp index 27d8833ae19e..cebe9e9dac78 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp @@ -59,8 +59,8 @@ class TLSInfoTableManager_ELF_x86_64 // the TLS Info entry's key value will be written by the fixTLVSectionByName // pass, so create mutable content. auto &TLSInfoEntry = G.createMutableContentBlock( - getTLSInfoSection(G), G.allocateContent(getTLSInfoEntryContent()), 0, 8, - 0); + getTLSInfoSection(G), G.allocateContent(getTLSInfoEntryContent()), + orc::ExecutorAddr(), 8, 0); TLSInfoEntry.addEdge(x86_64::Pointer64, 8, Target, 0); return G.addAnonymousSymbol(TLSInfoEntry, 0, 16, false, false); } @@ -249,7 +249,7 @@ class ELFLinkGraphBuilder_x86_64 : public ELFLinkGraphBuilder { } Block *BlockToFix = *(GraphSection.blocks().begin()); - JITTargetAddress FixupAddress = FixupSection.sh_addr + Rel.r_offset; + auto FixupAddress = orc::ExecutorAddr(FixupSection.sh_addr) + Rel.r_offset; Edge::OffsetT Offset = FixupAddress - BlockToFix->getAddress(); Edge GE(Kind, Offset, *GraphSymbol, Addend); LLVM_DEBUG({ @@ -322,8 +322,9 @@ class ELFJITLinker_x86_64 : public JITLinker { // If there's no defined symbol then create one. SectionRange SR(*GOTSection); if (SR.empty()) - GOTSymbol = &G.addAbsoluteSymbol(ELFGOTSymbolName, 0, 0, - Linkage::Strong, Scope::Local, true); + GOTSymbol = + &G.addAbsoluteSymbol(ELFGOTSymbolName, orc::ExecutorAddr(), 0, + Linkage::Strong, Scope::Local, true); else GOTSymbol = &G.addDefinedSymbol(*SR.getFirstBlock(), 0, ELFGOTSymbolName, 0, diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp index 51dcc1c35fad..8c920c5fe2dd 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp @@ -90,8 +90,8 @@ const char *getScopeName(Scope S) { } raw_ostream &operator<<(raw_ostream &OS, const Block &B) { - return OS << formatv("{0:x16}", B.getAddress()) << " -- " - << formatv("{0:x8}", B.getAddress() + B.getSize()) << ": " + return OS << B.getAddress() << " -- " << (B.getAddress() + B.getSize()) + << ": " << "size = " << formatv("{0:x8}", B.getSize()) << ", " << (B.isZeroFill() ? "zero-fill" : "content") << ", align = " << B.getAlignment() @@ -100,9 +100,8 @@ raw_ostream &operator<<(raw_ostream &OS, const Block &B) { } raw_ostream &operator<<(raw_ostream &OS, const Symbol &Sym) { - OS << formatv("{0:x16}", Sym.getAddress()) << " (" - << (Sym.isDefined() ? "block" : "addressable") << " + " - << formatv("{0:x8}", Sym.getOffset()) + OS << Sym.getAddress() << " (" << (Sym.isDefined() ? "block" : "addressable") + << " + " << formatv("{0:x8}", Sym.getOffset()) << "): size: " << formatv("{0:x8}", Sym.getSize()) << ", linkage: " << formatv("{0:6}", getLinkageName(Sym.getLinkage())) << ", scope: " << formatv("{0:8}", getScopeName(Sym.getScope())) << ", " @@ -113,9 +112,9 @@ raw_ostream &operator<<(raw_ostream &OS, const Symbol &Sym) { void printEdge(raw_ostream &OS, const Block &B, const Edge &E, StringRef EdgeKindName) { - OS << "edge@" << formatv("{0:x16}", B.getAddress() + E.getOffset()) << ": " - << formatv("{0:x16}", B.getAddress()) << " + " - << formatv("{0:x}", E.getOffset()) << " -- " << EdgeKindName << " -> "; + OS << "edge@" << B.getAddress() + E.getOffset() << ": " << B.getAddress() + << " + " << formatv("{0:x}", E.getOffset()) << " -- " << EdgeKindName + << " -> "; auto &TargetSym = E.getTarget(); if (TargetSym.hasName()) @@ -123,17 +122,16 @@ void printEdge(raw_ostream &OS, const Block &B, const Edge &E, else { auto &TargetBlock = TargetSym.getBlock(); auto &TargetSec = TargetBlock.getSection(); - JITTargetAddress SecAddress = ~JITTargetAddress(0); + orc::ExecutorAddr SecAddress(~uint64_t(0)); for (auto *B : TargetSec.blocks()) if (B->getAddress() < SecAddress) SecAddress = B->getAddress(); - JITTargetAddress SecDelta = TargetSym.getAddress() - SecAddress; - OS << formatv("{0:x16}", TargetSym.getAddress()) << " (section " - << TargetSec.getName(); + orc::ExecutorAddrDiff SecDelta = TargetSym.getAddress() - SecAddress; + OS << TargetSym.getAddress() << " (section " << TargetSec.getName(); if (SecDelta) OS << " + " << formatv("{0:x}", SecDelta); - OS << " / block " << formatv("{0:x16}", TargetBlock.getAddress()); + OS << " / block " << TargetBlock.getAddress(); if (TargetSym.getOffset()) OS << " + " << formatv("{0:x}", TargetSym.getOffset()); OS << ")"; @@ -265,7 +263,7 @@ void LinkGraph::dump(raw_ostream &OS) { }); for (auto *B : SortedBlocks) { - OS << " block " << formatv("{0:x16}", B->getAddress()) + OS << " block " << B->getAddress() << " size = " << formatv("{0:x8}", B->getSize()) << ", align = " << B->getAlignment() << ", alignment-offset = " << B->getAlignmentOffset(); @@ -290,9 +288,8 @@ void LinkGraph::dump(raw_ostream &OS) { return LHS.getOffset() < RHS.getOffset(); }); for (auto &E : SortedEdges) { - OS << " " << formatv("{0:x16}", B->getFixupAddress(E)) - << " (block + " << formatv("{0:x8}", E.getOffset()) - << "), addend = "; + OS << " " << B->getFixupAddress(E) << " (block + " + << formatv("{0:x8}", E.getOffset()) << "), addend = "; if (E.getAddend() >= 0) OS << formatv("+{0:x8}", E.getAddend()); else @@ -315,16 +312,14 @@ void LinkGraph::dump(raw_ostream &OS) { OS << "Absolute symbols:\n"; if (!llvm::empty(absolute_symbols())) { for (auto *Sym : absolute_symbols()) - OS << " " << format("0x%016" PRIx64, Sym->getAddress()) << ": " << *Sym - << "\n"; + OS << " " << Sym->getAddress() << ": " << *Sym << "\n"; } else OS << " none\n"; OS << "\nExternal symbols:\n"; if (!llvm::empty(external_symbols())) { for (auto *Sym : external_symbols()) - OS << " " << format("0x%016" PRIx64, Sym->getAddress()) << ": " << *Sym - << "\n"; + OS << " " << Sym->getAddress() << ": " << *Sym << "\n"; } else OS << " none\n"; } diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp index 706688aba4ec..35ee050c8566 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp @@ -192,7 +192,7 @@ JITLinkContext::LookupMap JITLinkerBase::getExternalSymbolNames() const { // Identify unresolved external symbols. JITLinkContext::LookupMap UnresolvedExternals; for (auto *Sym : G->external_symbols()) { - assert(Sym->getAddress() == 0 && + assert(!Sym->getAddress() && "External has already been assigned an address"); assert(Sym->getName() != StringRef() && Sym->getName() != "" && "Externals must be named"); @@ -209,11 +209,12 @@ void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) { for (auto *Sym : G->external_symbols()) { assert(Sym->getOffset() == 0 && "External symbol is not at the start of its addressable block"); - assert(Sym->getAddress() == 0 && "Symbol already resolved"); + assert(!Sym->getAddress() && "Symbol already resolved"); assert(!Sym->isDefined() && "Symbol being resolved is already defined"); auto ResultI = Result.find(Sym->getName()); if (ResultI != Result.end()) - Sym->getAddressable().setAddress(ResultI->second.getAddress()); + Sym->getAddressable().setAddress( + orc::ExecutorAddr(ResultI->second.getAddress())); else assert(Sym->getLinkage() == Linkage::Weak && "Failed to resolve non-weak reference"); @@ -223,7 +224,7 @@ void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) { dbgs() << "Externals after applying lookup result:\n"; for (auto *Sym : G->external_symbols()) dbgs() << " " << Sym->getName() << ": " - << formatv("{0:x16}", Sym->getAddress()) << "\n"; + << formatv("{0:x16}", Sym->getAddress().getValue()) << "\n"; }); } diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp index 67fe6287e388..164014612247 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp @@ -66,10 +66,10 @@ JITLinkMemoryManager::InFlightAlloc::~InFlightAlloc() = default; static Error runAllocAction(AllocActionCall &C) { using WrapperFnTy = CWrapperFunctionResult (*)(const void *, size_t); - auto *Fn = jitTargetAddressToPointer(C.FnAddr); + auto *Fn = C.FnAddr.toPtr(); - return toError(Fn(jitTargetAddressToPointer(C.CtxAddr), - static_cast(C.CtxSize))); + return toError( + Fn(C.CtxAddr.toPtr(), static_cast(C.CtxSize))); } BasicLayout::BasicLayout(LinkGraph &G) : G(G) { @@ -207,7 +207,7 @@ void SimpleSegmentAlloc::Create(JITLinkMemoryManager &MemMgr, std::make_unique("", Triple(), 0, support::native, nullptr); AllocGroupSmallMap ContentBlocks; - JITTargetAddress NextAddr = 0x100000; + orc::ExecutorAddr NextAddr(0x100000); for (auto &KV : Segments) { auto &AG = KV.first; auto &Seg = KV.second; @@ -220,7 +220,8 @@ void SimpleSegmentAlloc::Create(JITLinkMemoryManager &MemMgr, Sec.setMemDeallocPolicy(AG.getMemDeallocPolicy()); if (Seg.ContentSize != 0) { - NextAddr = alignTo(NextAddr, Seg.ContentAlign); + NextAddr = + orc::ExecutorAddr(alignTo(NextAddr.getValue(), Seg.ContentAlign)); auto &B = G->createMutableContentBlock(Sec, G->allocateBuffer(Seg.ContentSize), NextAddr, Seg.ContentAlign.value(), 0); @@ -426,8 +427,8 @@ void InProcessMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G, static_cast(SegsSizes->FinalizeSegs)}; } - auto NextStandardSegAddr = pointerToJITTargetAddress(StandardSegsMem.base()); - auto NextFinalizeSegAddr = pointerToJITTargetAddress(FinalizeSegsMem.base()); + auto NextStandardSegAddr = orc::ExecutorAddr::fromPtr(StandardSegsMem.base()); + auto NextFinalizeSegAddr = orc::ExecutorAddr::fromPtr(FinalizeSegsMem.base()); LLVM_DEBUG({ dbgs() << "InProcessMemoryManager allocated:\n"; @@ -454,7 +455,7 @@ void InProcessMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G, ? NextStandardSegAddr : NextFinalizeSegAddr; - Seg.WorkingMem = jitTargetAddressToPointer(SegAddr); + Seg.WorkingMem = SegAddr.toPtr(); Seg.Addr = SegAddr; SegAddr += alignTo(Seg.ContentSize + Seg.ZeroFillSize, PageSize); @@ -478,8 +479,7 @@ void InProcessMemoryManager::deallocate(std::vector Allocs, { std::lock_guard Lock(FinalizedAllocsMutex); for (auto &Alloc : Allocs) { - auto *FA = - jitTargetAddressToPointer(Alloc.release()); + auto *FA = Alloc.release().toPtr(); StandardSegmentsList.push_back(std::move(FA->StandardSegments)); if (!FA->DeallocActions.empty()) DeallocActionsList.push_back(std::move(FA->DeallocActions)); @@ -520,7 +520,7 @@ InProcessMemoryManager::createFinalizedAlloc( auto *FA = FinalizedAllocInfos.Allocate(); new (FA) FinalizedAllocInfo( {std::move(StandardSegments), std::move(DeallocActions)}); - return FinalizedAlloc(pointerToJITTargetAddress(FA)); + return FinalizedAlloc(orc::ExecutorAddr::fromPtr(FA)); } } // end namespace jitlink diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp index d588b63d9e88..2fcf3e94b8b2 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp @@ -134,7 +134,7 @@ Error MachOLinkGraphBuilder::createNormalizedSections() { memcpy(&NSec.SegName, Sec64.segname, 16); NSec.SegName[16] = '\0'; - NSec.Address = Sec64.addr; + NSec.Address = orc::ExecutorAddr(Sec64.addr); NSec.Size = Sec64.size; NSec.Alignment = 1ULL << Sec64.align; NSec.Flags = Sec64.flags; @@ -147,7 +147,7 @@ Error MachOLinkGraphBuilder::createNormalizedSections() { memcpy(&NSec.SegName, Sec32.segname, 16); NSec.SegName[16] = '\0'; - NSec.Address = Sec32.addr; + NSec.Address = orc::ExecutorAddr(Sec32.addr); NSec.Size = Sec32.size; NSec.Alignment = 1ULL << Sec32.align; NSec.Flags = Sec32.flags; @@ -287,7 +287,8 @@ Error MachOLinkGraphBuilder::createNormalizedSymbols() { if (!NSec) return NSec.takeError(); - if (Value < NSec->Address || Value > NSec->Address + NSec->Size) + if (orc::ExecutorAddr(Value) < NSec->Address || + orc::ExecutorAddr(Value) > NSec->Address + NSec->Size) return make_error("Address " + formatv("{0:x}", Value) + " for symbol " + *Name + " does not fall within section"); @@ -311,8 +312,9 @@ Error MachOLinkGraphBuilder::createNormalizedSymbols() { } void MachOLinkGraphBuilder::addSectionStartSymAndBlock( - unsigned SecIndex, Section &GraphSec, uint64_t Address, const char *Data, - uint64_t Size, uint32_t Alignment, bool IsLive) { + unsigned SecIndex, Section &GraphSec, orc::ExecutorAddr Address, + const char *Data, orc::ExecutorAddrDiff Size, uint32_t Alignment, + bool IsLive) { Block &B = Data ? G->createContentBlock(GraphSec, ArrayRef(Data, Size), Address, Alignment, 0) @@ -346,7 +348,8 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { return make_error("Anonymous common symbol at index " + Twine(KV.first)); NSym.GraphSymbol = &G->addCommonSymbol( - *NSym.Name, NSym.S, getCommonSection(), 0, NSym.Value, + *NSym.Name, NSym.S, getCommonSection(), orc::ExecutorAddr(), + orc::ExecutorAddrDiff(NSym.Value), 1ull << MachO::GET_COMM_ALIGN(NSym.Desc), NSym.Desc & MachO::N_NO_DEAD_STRIP); } else { @@ -364,8 +367,8 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { return make_error("Anonymous absolute symbol at index " + Twine(KV.first)); NSym.GraphSymbol = &G->addAbsoluteSymbol( - *NSym.Name, NSym.Value, 0, Linkage::Strong, Scope::Default, - NSym.Desc & MachO::N_NO_DEAD_STRIP); + *NSym.Name, orc::ExecutorAddr(NSym.Value), 0, Linkage::Strong, + Scope::Default, NSym.Desc & MachO::N_NO_DEAD_STRIP); break; case MachO::N_SECT: SecIndexToSymbols[NSym.Sect - 1].push_back(&NSym); @@ -468,13 +471,13 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { // If the section is non-empty but there is no symbol covering the start // address then add an anonymous one. - if (SecNSymStack.back()->Value != NSec.Address) { - auto AnonBlockSize = SecNSymStack.back()->Value - NSec.Address; + if (orc::ExecutorAddr(SecNSymStack.back()->Value) != NSec.Address) { + auto AnonBlockSize = + orc::ExecutorAddr(SecNSymStack.back()->Value) - NSec.Address; LLVM_DEBUG({ dbgs() << " Section start not covered by symbol. " - << "Creating anonymous block to cover [ " - << formatv("{0:x16}", NSec.Address) << " -- " - << formatv("{0:x16}", NSec.Address + AnonBlockSize) << " ]\n"; + << "Creating anonymous block to cover [ " << NSec.Address + << " -- " << (NSec.Address + AnonBlockSize) << " ]\n"; }); addSectionStartSymAndBlock(SecIndex, *NSec.GraphSection, NSec.Address, NSec.Data, AnonBlockSize, NSec.Alignment, @@ -496,12 +499,12 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { } // BlockNSyms now contains the block symbols in reverse canonical order. - JITTargetAddress BlockStart = BlockSyms.front()->Value; - JITTargetAddress BlockEnd = SecNSymStack.empty() - ? NSec.Address + NSec.Size - : SecNSymStack.back()->Value; - JITTargetAddress BlockOffset = BlockStart - NSec.Address; - JITTargetAddress BlockSize = BlockEnd - BlockStart; + auto BlockStart = orc::ExecutorAddr(BlockSyms.front()->Value); + orc::ExecutorAddr BlockEnd = + SecNSymStack.empty() ? NSec.Address + NSec.Size + : orc::ExecutorAddr(SecNSymStack.back()->Value); + orc::ExecutorAddrDiff BlockOffset = BlockStart - NSec.Address; + orc::ExecutorAddrDiff BlockSize = BlockEnd - BlockStart; LLVM_DEBUG({ dbgs() << " Creating block for " << formatv("{0:x16}", BlockStart) @@ -521,8 +524,8 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { BlockStart, NSec.Alignment, BlockStart % NSec.Alignment); - Optional LastCanonicalAddr; - JITTargetAddress SymEnd = BlockEnd; + Optional LastCanonicalAddr; + auto SymEnd = BlockEnd; while (!BlockSyms.empty()) { auto &NSym = *BlockSyms.back(); BlockSyms.pop_back(); @@ -530,9 +533,9 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { bool SymLive = (NSym.Desc & MachO::N_NO_DEAD_STRIP) || SectionIsNoDeadStrip; - auto &Sym = createStandardGraphSymbol(NSym, B, SymEnd - NSym.Value, - SectionIsText, SymLive, - LastCanonicalAddr != NSym.Value); + auto &Sym = createStandardGraphSymbol( + NSym, B, SymEnd - orc::ExecutorAddr(NSym.Value), SectionIsText, + SymLive, LastCanonicalAddr != orc::ExecutorAddr(NSym.Value)); if (LastCanonicalAddr != Sym.getAddress()) { if (LastCanonicalAddr) @@ -568,11 +571,12 @@ Symbol &MachOLinkGraphBuilder::createStandardGraphSymbol(NormalizedSymbol &NSym, dbgs() << "\n"; }); - auto &Sym = NSym.Name ? G->addDefinedSymbol(B, NSym.Value - B.getAddress(), - *NSym.Name, Size, NSym.L, NSym.S, - IsText, IsNoDeadStrip) - : G->addAnonymousSymbol(B, NSym.Value - B.getAddress(), - Size, IsText, IsNoDeadStrip); + auto SymOffset = orc::ExecutorAddr(NSym.Value) - B.getAddress(); + auto &Sym = + NSym.Name + ? G->addDefinedSymbol(B, SymOffset, *NSym.Name, Size, NSym.L, NSym.S, + IsText, IsNoDeadStrip) + : G->addAnonymousSymbol(B, SymOffset, Size, IsText, IsNoDeadStrip); NSym.GraphSymbol = &Sym; if (IsCanonical) @@ -635,12 +639,12 @@ Error MachOLinkGraphBuilder::graphifyCStringSection( bool SectionIsNoDeadStrip = NSec.Flags & MachO::S_ATTR_NO_DEAD_STRIP; bool SectionIsText = NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS; - JITTargetAddress BlockStart = 0; + orc::ExecutorAddrDiff BlockStart = 0; // Scan section for null characters. for (size_t I = 0; I != NSec.Size; ++I) if (NSec.Data[I] == '\0') { - JITTargetAddress BlockEnd = I + 1; + orc::ExecutorAddrDiff BlockEnd = I + 1; size_t BlockSize = BlockEnd - BlockStart; // Create a block for this null terminated string. auto &B = G->createContentBlock(*NSec.GraphSection, @@ -654,7 +658,8 @@ Error MachOLinkGraphBuilder::graphifyCStringSection( }); // If there's no symbol at the start of this block then create one. - if (NSyms.empty() || NSyms.back()->Value != B.getAddress()) { + if (NSyms.empty() || + orc::ExecutorAddr(NSyms.back()->Value) != B.getAddress()) { auto &S = G->addAnonymousSymbol(B, 0, BlockSize, false, false); setCanonicalSymbol(NSec, S); LLVM_DEBUG({ @@ -666,18 +671,19 @@ Error MachOLinkGraphBuilder::graphifyCStringSection( } // Process any remaining symbols that point into this block. - JITTargetAddress LastCanonicalAddr = B.getAddress() + BlockEnd; - while (!NSyms.empty() && - NSyms.back()->Value < (B.getAddress() + BlockSize)) { + auto LastCanonicalAddr = B.getAddress() + BlockEnd; + while (!NSyms.empty() && orc::ExecutorAddr(NSyms.back()->Value) < + B.getAddress() + BlockSize) { auto &NSym = *NSyms.back(); - size_t SymSize = (B.getAddress() + BlockSize) - NSyms.back()->Value; + size_t SymSize = (B.getAddress() + BlockSize) - + orc::ExecutorAddr(NSyms.back()->Value); bool SymLive = (NSym.Desc & MachO::N_NO_DEAD_STRIP) || SectionIsNoDeadStrip; bool IsCanonical = false; - if (LastCanonicalAddr != NSym.Value) { + if (LastCanonicalAddr != orc::ExecutorAddr(NSym.Value)) { IsCanonical = true; - LastCanonicalAddr = NSym.Value; + LastCanonicalAddr = orc::ExecutorAddr(NSym.Value); } createStandardGraphSymbol(NSym, B, SymSize, SectionIsText, SymLive, diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h index d29732ebdba8..2951a8533098 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h @@ -71,13 +71,13 @@ class MachOLinkGraphBuilder { public: char SectName[17]; char SegName[17]; - uint64_t Address = 0; + orc::ExecutorAddr Address; uint64_t Size = 0; uint64_t Alignment = 0; uint32_t Flags = 0; const char *Data = nullptr; Section *GraphSection = nullptr; - std::map CanonicalSymbols; + std::map CanonicalSymbols; }; using SectionParserFunction = std::function; @@ -137,7 +137,7 @@ class MachOLinkGraphBuilder { /// Returns the symbol with the highest address not greater than the search /// address, or null if no such symbol exists. Symbol *getSymbolByAddress(NormalizedSection &NSec, - JITTargetAddress Address) { + orc::ExecutorAddr Address) { auto I = NSec.CanonicalSymbols.upper_bound(Address); if (I == NSec.CanonicalSymbols.begin()) return nullptr; @@ -147,7 +147,7 @@ class MachOLinkGraphBuilder { /// Returns the symbol with the highest address not greater than the search /// address, or an error if no such symbol exists. Expected findSymbolByAddress(NormalizedSection &NSec, - JITTargetAddress Address) { + orc::ExecutorAddr Address) { auto *Sym = getSymbolByAddress(NSec, Address); if (Sym) if (Address <= Sym->getAddress() + Sym->getSize()) @@ -193,9 +193,9 @@ class MachOLinkGraphBuilder { Section &getCommonSection(); void addSectionStartSymAndBlock(unsigned SecIndex, Section &GraphSec, - uint64_t Address, const char *Data, - uint64_t Size, uint32_t Alignment, - bool IsLive); + orc::ExecutorAddr Address, const char *Data, + orc::ExecutorAddrDiff Size, + uint32_t Alignment, bool IsLive); Error createNormalizedSections(); Error createNormalizedSymbols(); diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp index f2a029d35cd5..844e76ab0542 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp @@ -109,7 +109,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { Expected parsePairRelocation(Block &BlockToFix, Edge::Kind SubtractorKind, const MachO::relocation_info &SubRI, - JITTargetAddress FixupAddress, const char *FixupContent, + orc::ExecutorAddr FixupAddress, const char *FixupContent, object::relocation_iterator &UnsignedRelItr, object::relocation_iterator &RelEnd) { using namespace support; @@ -162,7 +162,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { return ToSymbolSec.takeError(); ToSymbol = getSymbolByAddress(*ToSymbolSec, ToSymbolSec->Address); assert(ToSymbol && "No symbol for section"); - FixupValue -= ToSymbol->getAddress(); + FixupValue -= ToSymbol->getAddress().getValue(); } MachOARM64RelocationKind DeltaKind; @@ -195,7 +195,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { for (auto &S : Obj.sections()) { - JITTargetAddress SectionAddress = S.getAddress(); + orc::ExecutorAddr SectionAddress(S.getAddress()); // Skip relocations virtual sections. if (S.isVirtual()) { @@ -234,7 +234,8 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { return Kind.takeError(); // Find the address of the value to fix up. - JITTargetAddress FixupAddress = SectionAddress + (uint32_t)RI.r_address; + orc::ExecutorAddr FixupAddress = + SectionAddress + (uint32_t)RI.r_address; LLVM_DEBUG({ dbgs() << " " << NSec->SectName << " + " << formatv("{0:x8}", RI.r_address) << ":\n"; @@ -249,7 +250,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { BlockToFix = &SymbolToFixOrErr->getBlock(); } - if (FixupAddress + static_cast(1ULL << RI.r_length) > + if (FixupAddress + orc::ExecutorAddrDiff(1ULL << RI.r_length) > BlockToFix->getAddress() + BlockToFix->getContent().size()) return make_error( "Relocation content extends past end of fixup block"); @@ -290,7 +291,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { }); // Find the address of the value to fix up. - JITTargetAddress PairedFixupAddress = + orc::ExecutorAddr PairedFixupAddress = SectionAddress + (uint32_t)RI.r_address; if (PairedFixupAddress != FixupAddress) return make_error("Paired relocation points at " @@ -324,7 +325,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { Addend = *(const ulittle64_t *)FixupContent; break; case Pointer64Anon: { - JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent; + orc::ExecutorAddr TargetAddress(*(const ulittle64_t *)FixupContent); auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1); if (!TargetNSec) return TargetNSec.takeError(); @@ -435,7 +436,7 @@ class PerGraphGOTAndPLTStubsBuilder_MachO_arm64 Symbol &createGOTEntry(Symbol &Target) { auto &GOTEntryBlock = G.createContentBlock( - getGOTSection(), getGOTEntryBlockContent(), 0, 8, 0); + getGOTSection(), getGOTEntryBlockContent(), orc::ExecutorAddr(), 8, 0); GOTEntryBlock.addEdge(Pointer64, 0, Target, 0); return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false); } @@ -457,8 +458,8 @@ class PerGraphGOTAndPLTStubsBuilder_MachO_arm64 } Symbol &createPLTStub(Symbol &Target) { - auto &StubContentBlock = - G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 1, 0); + auto &StubContentBlock = G.createContentBlock( + getStubsSection(), getStubBlockContent(), orc::ExecutorAddr(), 1, 0); // Re-use GOT entries for stub targets. auto &GOTEntrySymbol = getGOTEntry(Target); StubContentBlock.addEdge(LDRLiteral19, 0, GOTEntrySymbol, 0); @@ -545,11 +546,12 @@ class MachOJITLinker_arm64 : public JITLinker { char *BlockWorkingMem = B.getAlreadyMutableContent().data(); char *FixupPtr = BlockWorkingMem + E.getOffset(); - JITTargetAddress FixupAddress = B.getAddress() + E.getOffset(); + orc::ExecutorAddr FixupAddress = B.getAddress() + E.getOffset(); switch (E.getKind()) { case Branch26: { - assert((FixupAddress & 0x3) == 0 && "Branch-inst is not 32-bit aligned"); + assert((FixupAddress.getValue() & 0x3) == 0 && + "Branch-inst is not 32-bit aligned"); int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend(); @@ -569,7 +571,7 @@ class MachOJITLinker_arm64 : public JITLinker { break; } case Pointer32: { - uint64_t Value = E.getTarget().getAddress() + E.getAddend(); + uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); if (Value > std::numeric_limits::max()) return makeTargetOutOfRangeError(G, B, E); *(ulittle32_t *)FixupPtr = Value; @@ -577,7 +579,7 @@ class MachOJITLinker_arm64 : public JITLinker { } case Pointer64: case Pointer64Anon: { - uint64_t Value = E.getTarget().getAddress() + E.getAddend(); + uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend(); *(ulittle64_t *)FixupPtr = Value; break; } @@ -587,9 +589,10 @@ class MachOJITLinker_arm64 : public JITLinker { assert((E.getKind() != GOTPage21 || E.getAddend() == 0) && "GOTPAGE21 with non-zero addend"); uint64_t TargetPage = - (E.getTarget().getAddress() + E.getAddend()) & - ~static_cast(4096 - 1); - uint64_t PCPage = FixupAddress & ~static_cast(4096 - 1); + (E.getTarget().getAddress().getValue() + E.getAddend()) & + ~static_cast(4096 - 1); + uint64_t PCPage = + FixupAddress.getValue() & ~static_cast(4096 - 1); int64_t PageDelta = TargetPage - PCPage; if (PageDelta < -(1 << 30) || PageDelta > ((1 << 30) - 1)) @@ -606,7 +609,7 @@ class MachOJITLinker_arm64 : public JITLinker { } case PageOffset12: { uint64_t TargetOffset = - (E.getTarget().getAddress() + E.getAddend()) & 0xfff; + (E.getTarget().getAddress() + E.getAddend()).getValue() & 0xfff; uint32_t RawInstr = *(ulittle32_t *)FixupPtr; unsigned ImmShift = getPageOffset12Shift(RawInstr); @@ -627,7 +630,7 @@ class MachOJITLinker_arm64 : public JITLinker { assert((RawInstr & 0xfffffc00) == 0xf9400000 && "RawInstr isn't a 64-bit LDR immediate"); - uint32_t TargetOffset = E.getTarget().getAddress() & 0xfff; + uint32_t TargetOffset = E.getTarget().getAddress().getValue() & 0xfff; assert((TargetOffset & 0x7) == 0 && "GOT entry is not 8-byte aligned"); uint32_t EncodedImm = (TargetOffset >> 3) << 10; uint32_t FixedInstr = RawInstr | EncodedImm; @@ -635,7 +638,8 @@ class MachOJITLinker_arm64 : public JITLinker { break; } case LDRLiteral19: { - assert((FixupAddress & 0x3) == 0 && "LDR is not 32-bit aligned"); + assert((FixupAddress.getValue() & 0x3) == 0 && + "LDR is not 32-bit aligned"); assert(E.getAddend() == 0 && "LDRLiteral19 with non-zero addend"); uint32_t RawInstr = *(ulittle32_t *)FixupPtr; assert(RawInstr == 0x58000010 && "RawInstr isn't a 64-bit LDR literal"); diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp index a4fcd3b9a5f5..82afaa3aa3c5 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp @@ -119,7 +119,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { // returns the edge kind and addend to be used. Expected parsePairRelocation( Block &BlockToFix, MachONormalizedRelocationType SubtractorKind, - const MachO::relocation_info &SubRI, JITTargetAddress FixupAddress, + const MachO::relocation_info &SubRI, orc::ExecutorAddr FixupAddress, const char *FixupContent, object::relocation_iterator &UnsignedRelItr, object::relocation_iterator &RelEnd) { using namespace support; @@ -172,7 +172,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { return ToSymbolSec.takeError(); ToSymbol = getSymbolByAddress(*ToSymbolSec, ToSymbolSec->Address); assert(ToSymbol && "No symbol for section"); - FixupValue -= ToSymbol->getAddress(); + FixupValue -= ToSymbol->getAddress().getValue(); } Edge::Kind DeltaKind; @@ -206,7 +206,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { for (auto &S : Obj.sections()) { - JITTargetAddress SectionAddress = S.getAddress(); + orc::ExecutorAddr SectionAddress(S.getAddress()); // Skip relocations virtual sections. if (S.isVirtual()) { @@ -241,7 +241,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { MachO::relocation_info RI = getRelocationInfo(RelItr); // Find the address of the value to fix up. - JITTargetAddress FixupAddress = SectionAddress + (uint32_t)RI.r_address; + auto FixupAddress = SectionAddress + (uint32_t)RI.r_address; LLVM_DEBUG({ dbgs() << " " << NSec->SectName << " + " @@ -257,7 +257,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { BlockToFix = &SymbolToFixOrErr->getBlock(); } - if (FixupAddress + static_cast(1ULL << RI.r_length) > + if (FixupAddress + orc::ExecutorAddrDiff(1ULL << RI.r_length) > BlockToFix->getAddress() + BlockToFix->getContent().size()) return make_error( "Relocation extends past end of fixup block"); @@ -343,7 +343,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { Kind = x86_64::Pointer64; break; case MachOPointer64Anon: { - JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent; + orc::ExecutorAddr TargetAddress(*(const ulittle64_t *)FixupContent); auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1); if (!TargetNSec) return TargetNSec.takeError(); @@ -367,8 +367,8 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { Kind = x86_64::Delta32; break; case MachOPCRel32Anon: { - JITTargetAddress TargetAddress = - FixupAddress + 4 + *(const little32_t *)FixupContent; + orc::ExecutorAddr TargetAddress(FixupAddress + 4 + + *(const little32_t *)FixupContent); auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1); if (!TargetNSec) return TargetNSec.takeError(); @@ -384,10 +384,10 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { case MachOPCRel32Minus1Anon: case MachOPCRel32Minus2Anon: case MachOPCRel32Minus4Anon: { - JITTargetAddress Delta = - 4 + static_cast( + orc::ExecutorAddrDiff Delta = + 4 + orc::ExecutorAddrDiff( 1ULL << (*MachORelocKind - MachOPCRel32Minus1Anon)); - JITTargetAddress TargetAddress = + orc::ExecutorAddr TargetAddress = FixupAddress + Delta + *(const little32_t *)FixupContent; auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1); if (!TargetNSec) diff --git a/llvm/lib/ExecutionEngine/JITLink/PerGraphGOTAndPLTStubsBuilder.h b/llvm/lib/ExecutionEngine/JITLink/PerGraphGOTAndPLTStubsBuilder.h index 6e9df9c75a65..6e325f92bafb 100644 --- a/llvm/lib/ExecutionEngine/JITLink/PerGraphGOTAndPLTStubsBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/PerGraphGOTAndPLTStubsBuilder.h @@ -47,16 +47,16 @@ class PerGraphGOTAndPLTStubsBuilder { if (impl().isGOTEdgeToFix(E)) { LLVM_DEBUG({ dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) - << " edge at " << formatv("{0:x}", B->getFixupAddress(E)) - << " (" << formatv("{0:x}", B->getAddress()) << " + " + << " edge at " << B->getFixupAddress(E) << " (" + << B->getAddress() << " + " << formatv("{0:x}", E.getOffset()) << ")\n"; }); impl().fixGOTEdge(E, getGOTEntry(E.getTarget())); } else if (impl().isExternalBranchEdge(E)) { LLVM_DEBUG({ dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) - << " edge at " << formatv("{0:x}", B->getFixupAddress(E)) - << " (" << formatv("{0:x}", B->getAddress()) << " + " + << " edge at " << B->getFixupAddress(E) << " (" + << B->getAddress() << " + " << formatv("{0:x}", E.getOffset()) << ")\n"; }); impl().fixPLTEdge(E, getPLTStub(E.getTarget())); diff --git a/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp index 48521280059d..df9979b47e88 100644 --- a/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp @@ -95,10 +95,10 @@ Error optimizeGOTAndStubAccesses(LinkGraph &G) { assert(GOTEntryBlock.edges_size() == 1 && "GOT entry should only have one outgoing edge"); auto &GOTTarget = GOTEntryBlock.edges().begin()->getTarget(); - JITTargetAddress TargetAddr = GOTTarget.getAddress(); - JITTargetAddress EdgeAddr = B->getFixupAddress(E); + orc::ExecutorAddr TargetAddr = GOTTarget.getAddress(); + orc::ExecutorAddr EdgeAddr = B->getFixupAddress(E); int64_t Displacement = TargetAddr - EdgeAddr + 4; - bool TargetInRangeForImmU32 = isInRangeForImmU32(TargetAddr); + bool TargetInRangeForImmU32 = isInRangeForImmU32(TargetAddr.getValue()); bool DisplacementInRangeForImmS32 = isInRangeForImmS32(Displacement); // If both of the Target and displacement is out of range, then @@ -165,8 +165,8 @@ Error optimizeGOTAndStubAccesses(LinkGraph &G) { "GOT block should only have one outgoing edge"); auto &GOTTarget = GOTBlock.edges().begin()->getTarget(); - JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset(); - JITTargetAddress TargetAddr = GOTTarget.getAddress(); + orc::ExecutorAddr EdgeAddr = B->getAddress() + E.getOffset(); + orc::ExecutorAddr TargetAddr = GOTTarget.getAddress(); int64_t Displacement = TargetAddr - EdgeAddr + 4; if (isInRangeForImmS32(Displacement)) { diff --git a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp index fcfe389f82a8..4ff6b7fd54df 100644 --- a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp +++ b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp @@ -67,9 +67,9 @@ class ELFDebugObjectSection : public DebugObjectSection { template void ELFDebugObjectSection::setTargetMemoryRange(SectionRange Range) { // Only patch load-addresses for executable and data sections. - if (isTextOrDataSection()) { - Header->sh_addr = static_cast(Range.getStart()); - } + if (isTextOrDataSection()) + Header->sh_addr = + static_cast(Range.getStart().getValue()); } template diff --git a/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp index fe62138c790c..92657805efdd 100644 --- a/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp +++ b/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp @@ -129,8 +129,8 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { Section *Sec = nullptr; StringRef SegName; StringRef SecName; - JITTargetAddress Alignment = 0; - JITTargetAddress StartAddr = 0; + uint64_t Alignment = 0; + orc::ExecutorAddr StartAddr; uint64_t Size = 0; }; @@ -153,7 +153,8 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { return Error::success(); } DebugSecInfos.push_back({&Sec, Sec.getName().substr(0, SepPos), - Sec.getName().substr(SepPos + 1), 0, 0}); + Sec.getName().substr(SepPos + 1), 0, + orc::ExecutorAddr(), 0}); } else { NonDebugSections.push_back(&Sec); @@ -182,11 +183,11 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { size_t ContainerBlockSize = sizeof(typename MachOTraits::Header) + SegmentLCSize; auto ContainerBlockContent = G.allocateBuffer(ContainerBlockSize); - MachOContainerBlock = - &G.createMutableContentBlock(SDOSec, ContainerBlockContent, 0, 8, 0); + MachOContainerBlock = &G.createMutableContentBlock( + SDOSec, ContainerBlockContent, orc::ExecutorAddr(), 8, 0); // Copy debug section blocks and symbols. - JITTargetAddress NextBlockAddr = MachOContainerBlock->getSize(); + orc::ExecutorAddr NextBlockAddr(MachOContainerBlock->getSize()); for (auto &SI : DebugSecInfos) { assert(!llvm::empty(SI.Sec->blocks()) && "Empty debug info section?"); @@ -219,7 +220,8 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { G.mergeSections(SDOSec, *SI.Sec); SI.Sec = nullptr; } - size_t DebugSectionsSize = NextBlockAddr - MachOContainerBlock->getSize(); + size_t DebugSectionsSize = + NextBlockAddr - orc::ExecutorAddr(MachOContainerBlock->getSize()); // Write MachO header and debug section load commands. MachOStructWriter Writer(MachOContainerBlock->getAlreadyMutableContent()); @@ -266,9 +268,9 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { memset(&Sec, 0, sizeof(Sec)); memcpy(Sec.sectname, SI.SecName.data(), SI.SecName.size()); memcpy(Sec.segname, SI.SegName.data(), SI.SegName.size()); - Sec.addr = SI.StartAddr; + Sec.addr = SI.StartAddr.getValue(); Sec.size = SI.Size; - Sec.offset = SI.StartAddr; + Sec.offset = SI.StartAddr.getValue(); Sec.align = SI.Alignment; Sec.reloff = 0; Sec.nreloc = 0; @@ -336,7 +338,7 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { memset(&SecCmd, 0, sizeof(SecCmd)); memcpy(SecCmd.sectname, SecName.data(), SecName.size()); memcpy(SecCmd.segname, SegName.data(), SegName.size()); - SecCmd.addr = R.getStart(); + SecCmd.addr = R.getStart().getValue(); SecCmd.size = R.getSize(); SecCmd.offset = 0; SecCmd.align = R.getFirstBlock()->getAlignment(); @@ -348,7 +350,7 @@ class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase { SectionRange R(MachOContainerBlock->getSection()); G.allocActions().push_back( - {{RegisterActionAddr.getValue(), R.getStart(), R.getSize()}, {}}); + {{RegisterActionAddr, R.getStart(), R.getSize()}, {}}); return Error::success(); } diff --git a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp index eded54f4bfb3..e25d7c4651a9 100644 --- a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp @@ -58,7 +58,8 @@ class DSOHandleMaterializationUnit : public MaterializationUnit { auto &DSOHandleSection = G->createSection(".data.__dso_handle", jitlink::MemProt::Read); auto &DSOHandleBlock = G->createContentBlock( - DSOHandleSection, getDSOHandleContent(PointerSize), 0, 8, 0); + DSOHandleSection, getDSOHandleContent(PointerSize), orc::ExecutorAddr(), + 8, 0); auto &DSOHandleSymbol = G->addDefinedSymbol( DSOHandleBlock, 0, *R->getInitializerSymbol(), DSOHandleBlock.getSize(), jitlink::Linkage::Strong, jitlink::Scope::Default, false, true); @@ -375,7 +376,7 @@ void ELFNixPlatform::rt_getDeinitializers( { std::lock_guard Lock(PlatformMutex); - auto I = HandleAddrToJITDylib.find(Handle.getValue()); + auto I = HandleAddrToJITDylib.find(Handle); if (I != HandleAddrToJITDylib.end()) JD = I->second; } @@ -406,7 +407,7 @@ void ELFNixPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult, { std::lock_guard Lock(PlatformMutex); - auto I = HandleAddrToJITDylib.find(Handle.getValue()); + auto I = HandleAddrToJITDylib.find(Handle); if (I != HandleAddrToJITDylib.end()) JD = I->second; } @@ -630,12 +631,11 @@ void ELFNixPlatform::ELFNixPlatformPlugin::addDSOHandleSupportPasses( assert(I != G.defined_symbols().end() && "Missing DSO handle symbol"); { std::lock_guard Lock(MP.PlatformMutex); - JITTargetAddress HandleAddr = (*I)->getAddress(); + auto HandleAddr = (*I)->getAddress(); MP.HandleAddrToJITDylib[HandleAddr] = &JD; assert(!MP.InitSeqs.count(&JD) && "InitSeq entry for JD already exists"); MP.InitSeqs.insert(std::make_pair( - &JD, - ELFNixJITDylibInitializers(JD.getName(), ExecutorAddr(HandleAddr)))); + &JD, ELFNixJITDylibInitializers(JD.getName(), HandleAddr))); } return Error::success(); }); diff --git a/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp b/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp index 4c0fab8aa9fa..99cacd1731a2 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp @@ -56,17 +56,17 @@ EPCEHFrameRegistrar::Create(ExecutionSession &ES) { ExecutorAddr(DeregisterEHFrameWrapperFnAddr)); } -Error EPCEHFrameRegistrar::registerEHFrames(JITTargetAddress EHFrameSectionAddr, +Error EPCEHFrameRegistrar::registerEHFrames(ExecutorAddr EHFrameSectionAddr, size_t EHFrameSectionSize) { return ES.callSPSWrapper( - RegisterEHFrameWrapperFnAddr, ExecutorAddr(EHFrameSectionAddr), + RegisterEHFrameWrapperFnAddr, EHFrameSectionAddr, static_cast(EHFrameSectionSize)); } -Error EPCEHFrameRegistrar::deregisterEHFrames( - JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) { +Error EPCEHFrameRegistrar::deregisterEHFrames(ExecutorAddr EHFrameSectionAddr, + size_t EHFrameSectionSize) { return ES.callSPSWrapper( - DeregisterEHFrameWrapperFnAddr, ExecutorAddr(EHFrameSectionAddr), + DeregisterEHFrameWrapperFnAddr, EHFrameSectionAddr, static_cast(EHFrameSectionSize)); } diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp index 9b712cb8f7ca..247be794ad56 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp @@ -80,7 +80,7 @@ class EPCGenericJITLinkMemoryManager::InFlightAlloc } else if (FinalizeErr) OnFinalize(std::move(FinalizeErr)); else - OnFinalize(FinalizedAlloc(AllocAddr.getValue())); + OnFinalize(FinalizedAlloc(AllocAddr)); }, Parent.SAs.Allocator, std::move(FR)); } @@ -161,7 +161,7 @@ void EPCGenericJITLinkMemoryManager::completeAllocation( const auto &AG = KV.first; auto &Seg = KV.second; - Seg.Addr = NextSegAddr.getValue(); + Seg.Addr = NextSegAddr; KV.second.WorkingMem = BL.getGraph().allocateBuffer(Seg.ContentSize).data(); NextSegAddr += ExecutorAddrDiff( alignTo(Seg.ContentSize + Seg.ZeroFillSize, EPC.getPageSize())); diff --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp index 818b6b52ff83..b901a2d2da23 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp @@ -119,10 +119,12 @@ Error EPCTrampolinePool::grow() { unsigned NumTrampolines = TrampolinesPerPage; auto SegInfo = Alloc->getSegInfo(MemProt::Read | MemProt::Exec); - EPCIU.getABISupport().writeTrampolines( - SegInfo.WorkingMem.data(), SegInfo.Addr, ResolverAddress, NumTrampolines); + EPCIU.getABISupport().writeTrampolines(SegInfo.WorkingMem.data(), + SegInfo.Addr.getValue(), + ResolverAddress, NumTrampolines); for (unsigned I = 0; I < NumTrampolines; ++I) - AvailableTrampolines.push_back(SegInfo.Addr + (I * TrampolineSize)); + AvailableTrampolines.push_back(SegInfo.Addr.getValue() + + (I * TrampolineSize)); auto FA = Alloc->finalize(); if (!FA) @@ -300,15 +302,15 @@ EPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr, return Alloc.takeError(); auto SegInfo = Alloc->getSegInfo(MemProt::Read | MemProt::Exec); - ABI->writeResolverCode(SegInfo.WorkingMem.data(), SegInfo.Addr, ReentryFnAddr, - ReentryCtxAddr); + ABI->writeResolverCode(SegInfo.WorkingMem.data(), SegInfo.Addr.getValue(), + ReentryFnAddr, ReentryCtxAddr); auto FA = Alloc->finalize(); if (!FA) return FA.takeError(); ResolverBlock = std::move(*FA); - return SegInfo.Addr; + return SegInfo.Addr.getValue(); } std::unique_ptr @@ -369,8 +371,9 @@ EPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) { auto StubSeg = Alloc->getSegInfo(StubProt); auto PtrSeg = Alloc->getSegInfo(PtrProt); - ABI->writeIndirectStubsBlock(StubSeg.WorkingMem.data(), StubSeg.Addr, - PtrSeg.Addr, NumStubsToAllocate); + ABI->writeIndirectStubsBlock(StubSeg.WorkingMem.data(), + StubSeg.Addr.getValue(), + PtrSeg.Addr.getValue(), NumStubsToAllocate); auto FA = Alloc->finalize(); if (!FA) @@ -381,8 +384,8 @@ EPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) { auto StubExecutorAddr = StubSeg.Addr; auto PtrExecutorAddr = PtrSeg.Addr; for (unsigned I = 0; I != NumStubsToAllocate; ++I) { - AvailableIndirectStubs.push_back( - IndirectStubInfo(StubExecutorAddr, PtrExecutorAddr)); + AvailableIndirectStubs.push_back(IndirectStubInfo( + StubExecutorAddr.getValue(), PtrExecutorAddr.getValue())); StubExecutorAddr += ABI->getStubSize(); PtrExecutorAddr += ABI->getPointerSize(); } diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index f427271bb45d..7a71d2f781d7 100644 --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -410,7 +410,7 @@ Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym, while (I < Content.size()) { MCInst Instr; uint64_t InstrSize = 0; - uint64_t InstrStart = SymAddress + I; + uint64_t InstrStart = SymAddress.getValue() + I; auto DecodeStatus = Disassembler.getInstruction( Instr, InstrSize, Content.drop_front(I), InstrStart, CommentStream); if (DecodeStatus != MCDisassembler::Success) { @@ -426,7 +426,7 @@ Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym, // Check for a PC-relative address equal to the symbol itself. auto PCRelAddr = MIA.evaluateMemoryOperandAddress(Instr, &STI, InstrStart, InstrSize); - if (!PCRelAddr.hasValue() || PCRelAddr.getValue() != SymAddress) + if (!PCRelAddr || *PCRelAddr != SymAddress.getValue()) continue; auto RelocOffInInstr = @@ -438,8 +438,8 @@ Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym, continue; } - auto RelocOffInBlock = - InstrStart + *RelocOffInInstr - SymAddress + Sym.getOffset(); + auto RelocOffInBlock = orc::ExecutorAddr(InstrStart) + *RelocOffInInstr - + SymAddress + Sym.getOffset(); if (ExistingRelocations.contains(RelocOffInBlock)) continue; diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp index fb2e90e1c9c5..ab978ed3f3fc 100644 --- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp @@ -106,7 +106,8 @@ class MachOHeaderMaterializationUnit : public MaterializationUnit { auto HeaderContent = G.allocateString( StringRef(reinterpret_cast(&Hdr), sizeof(Hdr))); - return G.createContentBlock(HeaderSection, HeaderContent, 0, 8, 0); + return G.createContentBlock(HeaderSection, HeaderContent, + orc::ExecutorAddr(), 8, 0); } static MaterializationUnit::Interface @@ -439,7 +440,7 @@ void MachOPlatform::rt_getDeinitializers(SendDeinitializerSequenceFn SendResult, { std::lock_guard Lock(PlatformMutex); - auto I = HeaderAddrToJITDylib.find(Handle.getValue()); + auto I = HeaderAddrToJITDylib.find(Handle); if (I != HeaderAddrToJITDylib.end()) JD = I->second; } @@ -469,7 +470,7 @@ void MachOPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult, { std::lock_guard Lock(PlatformMutex); - auto I = HeaderAddrToJITDylib.find(Handle.getValue()); + auto I = HeaderAddrToJITDylib.find(Handle); if (I != HeaderAddrToJITDylib.end()) JD = I->second; } @@ -661,11 +662,11 @@ Error MachOPlatform::MachOPlatformPlugin::associateJITDylibHeaderSymbol( auto &JD = MR.getTargetJITDylib(); std::lock_guard Lock(MP.PlatformMutex); - JITTargetAddress HeaderAddr = (*I)->getAddress(); + auto HeaderAddr = (*I)->getAddress(); MP.HeaderAddrToJITDylib[HeaderAddr] = &JD; assert(!MP.InitSeqs.count(&JD) && "InitSeq entry for JD already exists"); - MP.InitSeqs.insert(std::make_pair( - &JD, MachOJITDylibInitializers(JD.getName(), ExecutorAddr(HeaderAddr)))); + MP.InitSeqs.insert( + std::make_pair(&JD, MachOJITDylibInitializers(JD.getName(), HeaderAddr))); return Error::success(); } @@ -792,7 +793,7 @@ Error MachOPlatform::MachOPlatformPlugin::registerInitSections( if (auto *ObjCImageInfoSec = G.findSectionByName(ObjCImageInfoSectionName)) { if (auto Addr = jitlink::SectionRange(*ObjCImageInfoSec).getStart()) - ObjCImageInfoAddr.setValue(Addr); + ObjCImageInfoAddr = Addr; } for (auto InitSectionName : InitSectionNames) @@ -879,11 +880,10 @@ Error MachOPlatform::MachOPlatformPlugin::registerEHAndTLVSections( if (auto *EHFrameSection = G.findSectionByName(EHFrameSectionName)) { jitlink::SectionRange R(*EHFrameSection); if (!R.empty()) - G.allocActions().push_back( - {{MP.orc_rt_macho_register_ehframe_section.getValue(), R.getStart(), - R.getSize()}, - {MP.orc_rt_macho_deregister_ehframe_section.getValue(), R.getStart(), - R.getSize()}}); + G.allocActions().push_back({{MP.orc_rt_macho_register_ehframe_section, + R.getStart(), R.getSize()}, + {MP.orc_rt_macho_deregister_ehframe_section, + R.getStart(), R.getSize()}}); } // Get a pointer to the thread data section if there is one. It will be used @@ -913,10 +913,10 @@ Error MachOPlatform::MachOPlatformPlugin::registerEHAndTLVSections( inconvertibleErrorCode()); G.allocActions().push_back( - {{MP.orc_rt_macho_register_thread_data_section.getValue(), - R.getStart(), R.getSize()}, - {MP.orc_rt_macho_deregister_thread_data_section.getValue(), - R.getStart(), R.getSize()}}); + {{MP.orc_rt_macho_register_thread_data_section, R.getStart(), + R.getSize()}, + {MP.orc_rt_macho_deregister_thread_data_section, R.getStart(), + R.getSize()}}); } } return Error::success(); @@ -963,10 +963,8 @@ Error MachOPlatform::MachOPlatformPlugin::registerEHSectionsPhase1( // Otherwise, add allocation actions to the graph to register eh-frames for // this object. G.allocActions().push_back( - {{orc_rt_macho_register_ehframe_section.getValue(), R.getStart(), - R.getSize()}, - {orc_rt_macho_deregister_ehframe_section.getValue(), R.getStart(), - R.getSize()}}); + {{orc_rt_macho_register_ehframe_section, R.getStart(), R.getSize()}, + {orc_rt_macho_deregister_ehframe_section, R.getStart(), R.getSize()}}); return Error::success(); } diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index 0d6a33c5685e..8b4347f5cf52 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -217,7 +217,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { Flags |= JITSymbolFlags::Exported; InternedResult[InternedName] = - JITEvaluatedSymbol(Sym->getAddress(), Flags); + JITEvaluatedSymbol(Sym->getAddress().getValue(), Flags); if (AutoClaim && !MR->getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); @@ -235,7 +235,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { if (Sym->getLinkage() == Linkage::Weak) Flags |= JITSymbolFlags::Weak; InternedResult[InternedName] = - JITEvaluatedSymbol(Sym->getAddress(), Flags); + JITEvaluatedSymbol(Sym->getAddress().getValue(), Flags); if (AutoClaim && !MR->getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); @@ -743,7 +743,7 @@ void EHFrameRegistrationPlugin::modifyPassConfig( PassConfiguration &PassConfig) { PassConfig.PostFixupPasses.push_back(createEHFrameRecorderPass( - G.getTargetTriple(), [this, &MR](JITTargetAddress Addr, size_t Size) { + G.getTargetTriple(), [this, &MR](ExecutorAddr Addr, size_t Size) { if (Addr) { std::lock_guard Lock(EHFramePluginMutex); assert(!InProcessLinks.count(&MR) && diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp index 5efdff65f566..d79dbc410e8e 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp @@ -120,8 +120,8 @@ Error registerELFGraphInfo(Session &S, LinkGraph &G) { // then add it to the GOT entry info table. if (Sym->getSize() != 0) { if (auto TS = getELFGOTTarget(G, Sym->getBlock())) - FileInfo.GOTEntryInfos[TS->getName()] = {Sym->getSymbolContent(), - Sym->getAddress()}; + FileInfo.GOTEntryInfos[TS->getName()] = { + Sym->getSymbolContent(), Sym->getAddress().getValue()}; else return TS.takeError(); } @@ -133,7 +133,7 @@ Error registerELFGraphInfo(Session &S, LinkGraph &G) { if (auto TS = getELFStubTarget(G, Sym->getBlock())) FileInfo.StubInfos[TS->getName()] = {Sym->getSymbolContent(), - Sym->getAddress()}; + Sym->getAddress().getValue()}; else return TS.takeError(); SectionContainsContent = true; @@ -141,18 +141,19 @@ Error registerELFGraphInfo(Session &S, LinkGraph &G) { if (Sym->hasName()) { if (Sym->isSymbolZeroFill()) { - S.SymbolInfos[Sym->getName()] = {Sym->getSize(), Sym->getAddress()}; + S.SymbolInfos[Sym->getName()] = {Sym->getSize(), + Sym->getAddress().getValue()}; SectionContainsZeroFill = true; } else { S.SymbolInfos[Sym->getName()] = {Sym->getSymbolContent(), - Sym->getAddress()}; + Sym->getAddress().getValue()}; SectionContainsContent = true; } } } - JITTargetAddress SecAddr = FirstSym->getAddress(); - uint64_t SecSize = + auto SecAddr = FirstSym->getAddress(); + auto SecSize = (LastSym->getBlock().getAddress() + LastSym->getBlock().getSize()) - SecAddr; @@ -161,11 +162,11 @@ Error registerELFGraphInfo(Session &S, LinkGraph &G) { "supported yet", inconvertibleErrorCode()); if (SectionContainsZeroFill) - FileInfo.SectionInfos[Sec.getName()] = {SecSize, SecAddr}; + FileInfo.SectionInfos[Sec.getName()] = {SecSize, SecAddr.getValue()}; else FileInfo.SectionInfos[Sec.getName()] = { ArrayRef(FirstSym->getBlock().getContent().data(), SecSize), - SecAddr}; + SecAddr.getValue()}; } return Error::success(); diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink-macho.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink-macho.cpp index 7bd6bded5b7f..ed7fd1a57a72 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink-macho.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink-macho.cpp @@ -118,8 +118,8 @@ Error registerMachOGraphInfo(Session &S, LinkGraph &G) { inconvertibleErrorCode()); if (auto TS = getMachOGOTTarget(G, Sym->getBlock())) - FileInfo.GOTEntryInfos[TS->getName()] = {Sym->getSymbolContent(), - Sym->getAddress()}; + FileInfo.GOTEntryInfos[TS->getName()] = { + Sym->getSymbolContent(), Sym->getAddress().getValue()}; else return TS.takeError(); SectionContainsContent = true; @@ -130,24 +130,25 @@ Error registerMachOGraphInfo(Session &S, LinkGraph &G) { if (auto TS = getMachOStubTarget(G, Sym->getBlock())) FileInfo.StubInfos[TS->getName()] = {Sym->getSymbolContent(), - Sym->getAddress()}; + Sym->getAddress().getValue()}; else return TS.takeError(); SectionContainsContent = true; } else if (Sym->hasName()) { if (Sym->isSymbolZeroFill()) { - S.SymbolInfos[Sym->getName()] = {Sym->getSize(), Sym->getAddress()}; + S.SymbolInfos[Sym->getName()] = {Sym->getSize(), + Sym->getAddress().getValue()}; SectionContainsZeroFill = true; } else { S.SymbolInfos[Sym->getName()] = {Sym->getSymbolContent(), - Sym->getAddress()}; + Sym->getAddress().getValue()}; SectionContainsContent = true; } } } - JITTargetAddress SecAddr = FirstSym->getAddress(); - uint64_t SecSize = + auto SecAddr = FirstSym->getAddress(); + auto SecSize = (LastSym->getBlock().getAddress() + LastSym->getBlock().getSize()) - SecAddr; @@ -156,11 +157,11 @@ Error registerMachOGraphInfo(Session &S, LinkGraph &G) { "supported yet", inconvertibleErrorCode()); if (SectionContainsZeroFill) - FileInfo.SectionInfos[Sec.getName()] = {SecSize, SecAddr}; + FileInfo.SectionInfos[Sec.getName()] = {SecSize, SecAddr.getValue()}; else FileInfo.SectionInfos[Sec.getName()] = { ArrayRef(FirstSym->getBlock().getContent().data(), SecSize), - SecAddr}; + SecAddr.getValue()}; } return Error::success(); diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index e6588090625e..7678a85b836f 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -327,7 +327,7 @@ static uint64_t computeTotalBlockSizes(LinkGraph &G) { } static void dumpSectionContents(raw_ostream &OS, LinkGraph &G) { - constexpr JITTargetAddress DumpWidth = 16; + constexpr orc::ExecutorAddrDiff DumpWidth = 16; static_assert(isPowerOf2_64(DumpWidth), "DumpWidth must be a power of two"); // Put sections in address order. @@ -360,12 +360,13 @@ static void dumpSectionContents(raw_ostream &OS, LinkGraph &G) { return LHS->getAddress() < RHS->getAddress(); }); - JITTargetAddress NextAddr = Syms.front()->getAddress() & ~(DumpWidth - 1); + orc::ExecutorAddr NextAddr(Syms.front()->getAddress().getValue() & + ~(DumpWidth - 1)); for (auto *Sym : Syms) { bool IsZeroFill = Sym->getBlock().isZeroFill(); - JITTargetAddress SymStart = Sym->getAddress(); - JITTargetAddress SymSize = Sym->getSize(); - JITTargetAddress SymEnd = SymStart + SymSize; + auto SymStart = Sym->getAddress(); + auto SymSize = Sym->getSize(); + auto SymEnd = SymStart + SymSize; const uint8_t *SymData = IsZeroFill ? nullptr : reinterpret_cast( Sym->getSymbolContent().data()); @@ -433,8 +434,8 @@ class JITLinkSlabAllocator final : public JITLinkMemoryManager { assert(BL.graphAllocActions().empty() && "Support function calls not supported yet"); - OnFinalized(FinalizedAlloc( - pointerToJITTargetAddress(new FinalizedAllocInfo()))); + OnFinalized( + FinalizedAlloc(ExecutorAddr::fromPtr(new FinalizedAllocInfo()))); } void abandon(OnAbandonedFunction OnAbandoned) override { @@ -500,8 +501,8 @@ class JITLinkSlabAllocator final : public JITLinkMemoryManager { sys::MemoryBlock FinalizeSegs(AllocBase + SegsSizes->StandardSegs, SegsSizes->FinalizeSegs); - auto NextStandardSegAddr = pointerToJITTargetAddress(StandardSegs.base()); - auto NextFinalizeSegAddr = pointerToJITTargetAddress(FinalizeSegs.base()); + auto NextStandardSegAddr = ExecutorAddr::fromPtr(StandardSegs.base()); + auto NextFinalizeSegAddr = ExecutorAddr::fromPtr(FinalizeSegs.base()); LLVM_DEBUG({ dbgs() << "JITLinkSlabAllocator allocated:\n"; @@ -532,7 +533,7 @@ class JITLinkSlabAllocator final : public JITLinkMemoryManager { dbgs() << " " << Group << " -> " << formatv("{0:x16}", SegAddr) << "\n"; }); - Seg.WorkingMem = jitTargetAddressToPointer(SegAddr); + Seg.WorkingMem = SegAddr.toPtr(); Seg.Addr = SegAddr + NextSlabDelta; SegAddr += alignTo(Seg.ContentSize + Seg.ZeroFillSize, PageSize); @@ -559,7 +560,7 @@ class JITLinkSlabAllocator final : public JITLinkMemoryManager { Error Err = Error::success(); for (auto &FA : FinalizedAllocs) { std::unique_ptr FAI( - jitTargetAddressToPointer(FA.release())); + FA.release().toPtr()); // FIXME: Run dealloc actions. @@ -613,8 +614,8 @@ class JITLinkSlabAllocator final : public JITLinkMemoryManager { // Calculate the target address delta to link as-if slab were at // SlabAddress. if (SlabAddress != ~0ULL) - NextSlabDelta = - SlabAddress - pointerToJITTargetAddress(SlabRemaining.base()); + NextSlabDelta = ExecutorAddr(SlabAddress) - + ExecutorAddr::fromPtr(SlabRemaining.base()); } Error freeBlock(sys::MemoryBlock MB) { diff --git a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp index 3cc6a8ad0fe6..fdc6fbdff19b 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp +++ b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp @@ -76,15 +76,16 @@ TEST(LinkGraphTest, AddressAccess) { getGenericEdgeKindName); auto &Sec1 = G.createSection("__data.1", MemProt::Read | MemProt::Write); - auto &B1 = G.createContentBlock(Sec1, BlockContent, 0x1000, 8, 0); + orc::ExecutorAddr B1Addr(0x1000); + auto &B1 = G.createContentBlock(Sec1, BlockContent, B1Addr, 8, 0); auto &S1 = G.addDefinedSymbol(B1, 4, "S1", 4, Linkage::Strong, Scope::Default, false, false); B1.addEdge(Edge::FirstRelocation, 8, S1, 0); auto &E1 = *B1.edges().begin(); - EXPECT_EQ(B1.getAddress(), 0x1000U) << "Incorrect block address"; - EXPECT_EQ(S1.getAddress(), 0x1004U) << "Incorrect symbol address"; - EXPECT_EQ(B1.getFixupAddress(E1), 0x1008U) << "Incorrect fixup address"; + EXPECT_EQ(B1.getAddress(), B1Addr) << "Incorrect block address"; + EXPECT_EQ(S1.getAddress(), B1Addr + 4) << "Incorrect symbol address"; + EXPECT_EQ(B1.getFixupAddress(E1), B1Addr + 8) << "Incorrect fixup address"; } TEST(LinkGraphTest, BlockAndSymbolIteration) { @@ -92,16 +93,20 @@ TEST(LinkGraphTest, BlockAndSymbolIteration) { LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, getGenericEdgeKindName); auto &Sec1 = G.createSection("__data.1", MemProt::Read | MemProt::Write); - auto &B1 = G.createContentBlock(Sec1, BlockContent, 0x1000, 8, 0); - auto &B2 = G.createContentBlock(Sec1, BlockContent, 0x2000, 8, 0); + orc::ExecutorAddr B1Addr(0x1000); + auto &B1 = G.createContentBlock(Sec1, BlockContent, B1Addr, 8, 0); + orc::ExecutorAddr B2Addr(0x1000); + auto &B2 = G.createContentBlock(Sec1, BlockContent, B2Addr, 8, 0); auto &S1 = G.addDefinedSymbol(B1, 0, "S1", 4, Linkage::Strong, Scope::Default, false, false); auto &S2 = G.addDefinedSymbol(B2, 4, "S2", 4, Linkage::Strong, Scope::Default, false, false); auto &Sec2 = G.createSection("__data.2", MemProt::Read | MemProt::Write); - auto &B3 = G.createContentBlock(Sec2, BlockContent, 0x3000, 8, 0); - auto &B4 = G.createContentBlock(Sec2, BlockContent, 0x4000, 8, 0); + orc::ExecutorAddr B3Addr(0x3000); + auto &B3 = G.createContentBlock(Sec2, BlockContent, B3Addr, 8, 0); + orc::ExecutorAddr B4Addr(0x4000); + auto &B4 = G.createContentBlock(Sec2, BlockContent, B4Addr, 8, 0); auto &S3 = G.addDefinedSymbol(B3, 0, "S3", 4, Linkage::Strong, Scope::Default, false, false); auto &S4 = G.addDefinedSymbol(B4, 4, "S4", 4, Linkage::Strong, Scope::Default, @@ -141,7 +146,8 @@ TEST(LinkGraphTest, ContentAccessAndUpdate) { auto &Sec = G.createSection("__data", MemProt::Read | MemProt::Write); // Create an initial block. - auto &B = G.createContentBlock(Sec, BlockContent, 0x1000, 8, 0); + orc::ExecutorAddr BAddr(0x1000); + auto &B = G.createContentBlock(Sec, BlockContent, BAddr, 8, 0); EXPECT_FALSE(B.isContentMutable()) << "Content unexpectedly mutable"; EXPECT_EQ(B.getContent().data(), BlockContent.data()) @@ -196,7 +202,8 @@ TEST(LinkGraphTest, ContentAccessAndUpdate) { << "Unexpected block content size"; // Create an initially mutable block. - auto &B2 = G.createMutableContentBlock(Sec, MutableContent, 0x10000, 8, 0); + auto &B2 = G.createMutableContentBlock(Sec, MutableContent, + orc::ExecutorAddr(0x10000), 8, 0); EXPECT_TRUE(B2.isContentMutable()) << "Expected B2 content to be mutable"; } @@ -208,7 +215,8 @@ TEST(LinkGraphTest, MakeExternal) { auto &Sec = G.createSection("__data", MemProt::Read | MemProt::Write); // Create an initial block. - auto &B1 = G.createContentBlock(Sec, BlockContent, 0x1000, 8, 0); + auto &B1 = + G.createContentBlock(Sec, BlockContent, orc::ExecutorAddr(0x1000), 8, 0); // Add a symbol to the block. auto &S1 = G.addDefinedSymbol(B1, 0, "S1", 4, Linkage::Strong, Scope::Default, @@ -218,7 +226,8 @@ TEST(LinkGraphTest, MakeExternal) { EXPECT_FALSE(S1.isExternal()) << "Symbol should not be external"; EXPECT_FALSE(S1.isAbsolute()) << "Symbol should not be absolute"; EXPECT_TRUE(&S1.getBlock()) << "Symbol should have a non-null block"; - EXPECT_EQ(S1.getAddress(), 0x1000U) << "Unexpected symbol address"; + EXPECT_EQ(S1.getAddress(), orc::ExecutorAddr(0x1000)) + << "Unexpected symbol address"; EXPECT_EQ( std::distance(G.defined_symbols().begin(), G.defined_symbols().end()), 1U) @@ -235,7 +244,8 @@ TEST(LinkGraphTest, MakeExternal) { EXPECT_FALSE(S1.isDefined()) << "Symbol should not be defined"; EXPECT_TRUE(S1.isExternal()) << "Symbol should be external"; EXPECT_FALSE(S1.isAbsolute()) << "Symbol should not be absolute"; - EXPECT_EQ(S1.getAddress(), 0U) << "Unexpected symbol address"; + EXPECT_EQ(S1.getAddress(), orc::ExecutorAddr()) + << "Unexpected symbol address"; EXPECT_EQ( std::distance(G.defined_symbols().begin(), G.defined_symbols().end()), 0U) @@ -253,7 +263,8 @@ TEST(LinkGraphTest, MakeDefined) { auto &Sec = G.createSection("__data", MemProt::Read | MemProt::Write); // Create an initial block. - auto &B1 = G.createContentBlock(Sec, BlockContent, 0x1000, 8, 0); + orc::ExecutorAddr B1Addr(0x1000); + auto &B1 = G.createContentBlock(Sec, BlockContent, B1Addr, 8, 0); // Add an external symbol. auto &S1 = G.addExternalSymbol("S1", 4, Linkage::Strong); @@ -261,7 +272,8 @@ TEST(LinkGraphTest, MakeDefined) { EXPECT_FALSE(S1.isDefined()) << "Symbol should not be defined"; EXPECT_TRUE(S1.isExternal()) << "Symbol should be external"; EXPECT_FALSE(S1.isAbsolute()) << "Symbol should not be absolute"; - EXPECT_EQ(S1.getAddress(), 0U) << "Unexpected symbol address"; + EXPECT_EQ(S1.getAddress(), orc::ExecutorAddr()) + << "Unexpected symbol address"; EXPECT_EQ( std::distance(G.defined_symbols().begin(), G.defined_symbols().end()), 0U) @@ -279,7 +291,8 @@ TEST(LinkGraphTest, MakeDefined) { EXPECT_FALSE(S1.isExternal()) << "Symbol should not be external"; EXPECT_FALSE(S1.isAbsolute()) << "Symbol should not be absolute"; EXPECT_TRUE(&S1.getBlock()) << "Symbol should have a non-null block"; - EXPECT_EQ(S1.getAddress(), 0x1000U) << "Unexpected symbol address"; + EXPECT_EQ(S1.getAddress(), orc::ExecutorAddr(0x1000U)) + << "Unexpected symbol address"; EXPECT_EQ( std::distance(G.defined_symbols().begin(), G.defined_symbols().end()), 1U) @@ -296,10 +309,13 @@ TEST(LinkGraphTest, TransferDefinedSymbol) { getGenericEdgeKindName); auto &Sec = G.createSection("__data", MemProt::Read | MemProt::Write); - // Create an initial block. - auto &B1 = G.createContentBlock(Sec, BlockContent, 0x1000, 8, 0); - auto &B2 = G.createContentBlock(Sec, BlockContent, 0x2000, 8, 0); - auto &B3 = G.createContentBlock(Sec, BlockContent.slice(0, 32), 0x3000, 8, 0); + // Create initial blocks. + orc::ExecutorAddr B1Addr(0x1000); + auto &B1 = G.createContentBlock(Sec, BlockContent, B1Addr, 8, 0); + orc::ExecutorAddr B2Addr(0x2000); + auto &B2 = G.createContentBlock(Sec, BlockContent, B2Addr, 8, 0); + orc::ExecutorAddr B3Addr(0x3000); + auto &B3 = G.createContentBlock(Sec, BlockContent.slice(0, 32), B3Addr, 8, 0); // Add a symbol. auto &S1 = G.addDefinedSymbol(B1, 0, "S1", B1.getSize(), Linkage::Strong, @@ -329,8 +345,10 @@ TEST(LinkGraphTest, TransferDefinedSymbolAcrossSections) { auto &Sec2 = G.createSection("__data.2", MemProt::Read | MemProt::Write); // Create blocks in each section. - auto &B1 = G.createContentBlock(Sec1, BlockContent, 0x1000, 8, 0); - auto &B2 = G.createContentBlock(Sec2, BlockContent, 0x2000, 8, 0); + orc::ExecutorAddr B1Addr(0x1000); + auto &B1 = G.createContentBlock(Sec1, BlockContent, B1Addr, 8, 0); + orc::ExecutorAddr B2Addr(0x2000); + auto &B2 = G.createContentBlock(Sec2, BlockContent, B2Addr, 8, 0); // Add a symbol to section 1. auto &S1 = G.addDefinedSymbol(B1, 0, "S1", B1.getSize(), Linkage::Strong, @@ -359,8 +377,10 @@ TEST(LinkGraphTest, TransferBlock) { auto &Sec2 = G.createSection("__data.2", MemProt::Read | MemProt::Write); // Create an initial block. - auto &B1 = G.createContentBlock(Sec1, BlockContent, 0x1000, 8, 0); - auto &B2 = G.createContentBlock(Sec1, BlockContent, 0x2000, 8, 0); + orc::ExecutorAddr B1Addr(0x1000); + auto &B1 = G.createContentBlock(Sec1, BlockContent, B1Addr, 8, 0); + orc::ExecutorAddr B2Addr(0x2000); + auto &B2 = G.createContentBlock(Sec1, BlockContent, B2Addr, 8, 0); // Add some symbols on B1... G.addDefinedSymbol(B1, 0, "S1", B1.getSize(), Linkage::Strong, Scope::Default, @@ -404,9 +424,12 @@ TEST(LinkGraphTest, MergeSections) { auto &Sec3 = G.createSection("__data.3", MemProt::Read | MemProt::Write); // Create an initial block. - auto &B1 = G.createContentBlock(Sec1, BlockContent, 0x1000, 8, 0); - auto &B2 = G.createContentBlock(Sec2, BlockContent, 0x2000, 8, 0); - auto &B3 = G.createContentBlock(Sec3, BlockContent, 0x3000, 8, 0); + orc::ExecutorAddr B1Addr(0x1000); + auto &B1 = G.createContentBlock(Sec1, BlockContent, B1Addr, 8, 0); + orc::ExecutorAddr B2Addr(0x2000); + auto &B2 = G.createContentBlock(Sec2, BlockContent, B2Addr, 8, 0); + orc::ExecutorAddr B3Addr(0x3000); + auto &B3 = G.createContentBlock(Sec3, BlockContent, B3Addr, 8, 0); // Add a symbols for each block. G.addDefinedSymbol(B1, 0, "S1", B1.getSize(), Linkage::Strong, Scope::Default, @@ -482,7 +505,8 @@ TEST(LinkGraphTest, SplitBlock) { auto &Sec = G.createSection("__data", MemProt::Read | MemProt::Write); // Create the block to split. - auto &B1 = G.createContentBlock(Sec, BlockContent, 0x1000, 8, 0); + orc::ExecutorAddr B1Addr(0x1000); + auto &B1 = G.createContentBlock(Sec, BlockContent, B1Addr, 8, 0); // Add some symbols to the block. auto &S1 = G.addDefinedSymbol(B1, 0, "S1", 4, Linkage::Strong, Scope::Default, @@ -499,7 +523,8 @@ TEST(LinkGraphTest, SplitBlock) { // Add an extra block, EB, and target symbols, and use these to add edges // from B1 to EB. - auto &EB = G.createContentBlock(Sec, BlockContent, 0x2000, 8, 0); + orc::ExecutorAddr EBAddr(0x2000); + auto &EB = G.createContentBlock(Sec, BlockContent, EBAddr, 8, 0); auto &ES1 = G.addDefinedSymbol(EB, 0, "TS1", 4, Linkage::Strong, Scope::Default, false, false); auto &ES2 = G.addDefinedSymbol(EB, 4, "TS2", 4, Linkage::Strong, @@ -519,10 +544,10 @@ TEST(LinkGraphTest, SplitBlock) { auto &B2 = G.splitBlock(B1, 8); // Check that the block addresses and content matches what we would expect. - EXPECT_EQ(B1.getAddress(), 0x1008U); + EXPECT_EQ(B1.getAddress(), B1Addr + 8); EXPECT_EQ(B1.getContent(), BlockContent.slice(8)); - EXPECT_EQ(B2.getAddress(), 0x1000U); + EXPECT_EQ(B2.getAddress(), B1Addr); EXPECT_EQ(B2.getContent(), BlockContent.slice(0, 8)); // Check that symbols in B1 were transferred as expected: diff --git a/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp index 0181c558b60d..1f638f407c48 100644 --- a/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp @@ -44,7 +44,8 @@ TEST_F(ObjectLinkingLayerTest, AddLinkGraph) { support::little, x86_64::getEdgeKindName); auto &Sec1 = G->createSection("__data", MemProt::Read | MemProt::Write); - auto &B1 = G->createContentBlock(Sec1, BlockContent, 0x1000, 8, 0); + auto &B1 = G->createContentBlock(Sec1, BlockContent, + orc::ExecutorAddr(0x1000), 8, 0); G->addDefinedSymbol(B1, 4, "_X", 4, Linkage::Strong, Scope::Default, false, false); From 49d311874edc928831ccaddd621801a4dbee580d Mon Sep 17 00:00:00 2001 From: mydeveloperday Date: Thu, 6 Jan 2022 08:05:59 +0000 Subject: [PATCH 773/992] [clang-format] Missing space after cast in a macro https://github.com/llvm/llvm-project/issues/52979 Though SpaceAfterCStyleCast is set to true, clang-format 13 does not add a space after (void *) here: ``` ``` This patch addresses that Fixes: #52979 Reviewed By: curdeius, HazardyKnusperkeks, owenpan Differential Revision: https://reviews.llvm.org/D116592 --- clang/lib/Format/TokenAnnotator.cpp | 7 ++++--- clang/unittests/Format/FormatTest.cpp | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index ec9bfdb0b2a7..5241685630a5 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1883,9 +1883,10 @@ class AnnotatingParser { FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment(); if (LeftOfParens) { - // If there is a closing parenthesis left of the current parentheses, - // look past it as these might be chained casts. - if (LeftOfParens->is(tok::r_paren)) { + // If there is a closing parenthesis left of the current + // parentheses, look past it as these might be chained casts. + if (LeftOfParens->is(tok::r_paren) && + LeftOfParens->isNot(TT_CastRParen)) { if (!LeftOfParens->MatchingParen || !LeftOfParens->MatchingParen->Previous) return false; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index d90c3d3a291f..85ce3171bbc8 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -10150,6 +10150,15 @@ TEST_F(FormatTest, FormatsCasts) { " (aaaaaaaaaaaaaaaaaaaaaaaaaa *)(aaaaaaaaaaaaaaaaaaaaaa +\n" " bbbbbbbbbbbbbbbbbbbbbb);"); + verifyFormat("#define CONF_BOOL(x) (bool *)(void *)(x)"); + verifyFormat("#define CONF_BOOL(x) (bool *)(x)"); + verifyFormat("#define CONF_BOOL(x) (bool)(x)"); + verifyFormat("bool *y = (bool *)(void *)(x);"); + verifyFormat("#define CONF_BOOL(x) (bool *)(void *)(int)(x)"); + verifyFormat("bool *y = (bool *)(void *)(int)(x);"); + verifyFormat("#define CONF_BOOL(x) (bool *)(void *)(int)foo(x)"); + verifyFormat("bool *y = (bool *)(void *)(int)foo(x);"); + // These are not casts. verifyFormat("void f(int *) {}"); verifyFormat("f(foo)->b;"); @@ -14661,6 +14670,11 @@ TEST_F(FormatTest, ConfigurableSpacesInParentheses) { " break;\n" "}", Spaces); + verifyFormat("#define CONF_BOOL(x) ( bool * ) ( void * ) (x)", Spaces); + verifyFormat("#define CONF_BOOL(x) ( bool * ) (x)", Spaces); + verifyFormat("#define CONF_BOOL(x) ( bool ) (x)", Spaces); + verifyFormat("bool *y = ( bool * ) ( void * ) (x);", Spaces); + verifyFormat("bool *y = ( bool * ) (x);", Spaces); // Run subset of tests again with: Spaces.SpacesInCStyleCastParentheses = false; @@ -14680,6 +14694,11 @@ TEST_F(FormatTest, ConfigurableSpacesInParentheses) { verifyFormat("size_t idx = (a->foo)(a - 1);", Spaces); verifyFormat("size_t idx = (*foo)(a - 1);", Spaces); verifyFormat("size_t idx = (*(foo))(a - 1);", Spaces); + verifyFormat("#define CONF_BOOL(x) (bool *) (void *) (x)", Spaces); + verifyFormat("#define CONF_BOOL(x) (bool *) (void *) (int) (x)", Spaces); + verifyFormat("bool *y = (bool *) (void *) (x);", Spaces); + verifyFormat("bool *y = (bool *) (void *) (int) (x);", Spaces); + verifyFormat("bool *y = (bool *) (void *) (int) foo(x);", Spaces); Spaces.ColumnLimit = 80; Spaces.IndentWidth = 4; Spaces.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; From 32808cfb24b8d83a99223b7f797be1dbe5573c10 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 16 Dec 2021 10:48:40 +0100 Subject: [PATCH 774/992] [IR] Track users of comdats Track all GlobalObjects that reference a given comdat, which allows determining whether a function in a comdat is dead without scanning the whole module. In particular, this makes filterDeadComdatFunctions() have complexity O(#DeadFunctions) rather than O(#SymbolsInModule), which addresses half of the compile-time issue exposed by D115545. Differential Revision: https://reviews.llvm.org/D115864 --- llvm/include/llvm/IR/Comdat.h | 8 +++ llvm/include/llvm/IR/GlobalObject.h | 3 +- llvm/lib/IR/Comdat.cpp | 4 ++ llvm/lib/IR/Globals.cpp | 10 ++++ llvm/lib/Transforms/Utils/ModuleUtils.cpp | 72 ++++++----------------- llvm/unittests/IR/ConstantsTest.cpp | 27 +++++++++ 6 files changed, 69 insertions(+), 55 deletions(-) diff --git a/llvm/include/llvm/IR/Comdat.h b/llvm/include/llvm/IR/Comdat.h index 01a047d36455..1701802e6977 100644 --- a/llvm/include/llvm/IR/Comdat.h +++ b/llvm/include/llvm/IR/Comdat.h @@ -16,10 +16,12 @@ #define LLVM_IR_COMDAT_H #include "llvm-c/Types.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/CBindingWrapping.h" namespace llvm { +class GlobalObject; class raw_ostream; class StringRef; template class StringMapEntry; @@ -46,15 +48,21 @@ class Comdat { StringRef getName() const; void print(raw_ostream &OS, bool IsForDebug = false) const; void dump() const; + const SmallPtrSetImpl &getUsers() const { return Users; } private: friend class Module; + friend class GlobalObject; Comdat(); + void addUser(GlobalObject *GO); + void removeUser(GlobalObject *GO); // Points to the map in Module. StringMapEntry *Name = nullptr; SelectionKind SK = Any; + // Globals using this comdat. + SmallPtrSet Users; }; // Create wrappers for C Binding types (see CBindingWrapping.h). diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h index e15cf718bb10..1f73c8540a4a 100644 --- a/llvm/include/llvm/IR/GlobalObject.h +++ b/llvm/include/llvm/IR/GlobalObject.h @@ -48,6 +48,7 @@ class GlobalObject : public GlobalValue { ObjComdat(nullptr) { setGlobalValueSubClassData(0); } + ~GlobalObject(); Comdat *ObjComdat; enum { @@ -122,7 +123,7 @@ class GlobalObject : public GlobalValue { bool hasComdat() const { return getComdat() != nullptr; } const Comdat *getComdat() const { return ObjComdat; } Comdat *getComdat() { return ObjComdat; } - void setComdat(Comdat *C) { ObjComdat = C; } + void setComdat(Comdat *C); using Value::addMetadata; using Value::clearMetadata; diff --git a/llvm/lib/IR/Comdat.cpp b/llvm/lib/IR/Comdat.cpp index 1a5d38d17bc0..90d5c6e82e5c 100644 --- a/llvm/lib/IR/Comdat.cpp +++ b/llvm/lib/IR/Comdat.cpp @@ -25,6 +25,10 @@ Comdat::Comdat() = default; StringRef Comdat::getName() const { return Name->first(); } +void Comdat::addUser(GlobalObject *GO) { Users.insert(GO); } + +void Comdat::removeUser(GlobalObject *GO) { Users.erase(GO); } + LLVMComdatRef LLVMGetOrInsertComdat(LLVMModuleRef M, const char *Name) { return wrap(unwrap(M)->getOrInsertComdat(Name)); } diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index b6bd25aa1234..99affa8a84e6 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -95,6 +95,8 @@ void GlobalValue::eraseFromParent() { llvm_unreachable("not a global"); } +GlobalObject::~GlobalObject() { setComdat(nullptr); } + bool GlobalValue::isInterposable() const { if (isInterposableLinkage(getLinkage())) return true; @@ -182,6 +184,14 @@ const Comdat *GlobalValue::getComdat() const { return cast(this)->getComdat(); } +void GlobalObject::setComdat(Comdat *C) { + if (ObjComdat) + ObjComdat->removeUser(this); + ObjComdat = C; + if (C) + C->addUser(this); +} + StringRef GlobalValue::getPartition() const { if (!hasPartition()) return ""; diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp index bb5ff59cba4b..c8b9af3fd6db 100644 --- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -179,65 +179,29 @@ llvm::getOrCreateSanitizerCtorAndInitFunctions( void llvm::filterDeadComdatFunctions( Module &M, SmallVectorImpl &DeadComdatFunctions) { - // Build a map from the comdat to the number of entries in that comdat we - // think are dead. If this fully covers the comdat group, then the entire - // group is dead. If we find another entry in the comdat group though, we'll - // have to preserve the whole group. - SmallDenseMap ComdatEntriesCovered; + SmallPtrSet MaybeDeadFunctions; + SmallPtrSet MaybeDeadComdats; for (Function *F : DeadComdatFunctions) { - Comdat *C = F->getComdat(); - assert(C && "Expected all input GVs to be in a comdat!"); - ComdatEntriesCovered[C] += 1; + MaybeDeadFunctions.insert(F); + if (Comdat *C = F->getComdat()) + MaybeDeadComdats.insert(C); } - auto CheckComdat = [&](Comdat &C) { - auto CI = ComdatEntriesCovered.find(&C); - if (CI == ComdatEntriesCovered.end()) - return; - - // If this could have been covered by a dead entry, just subtract one to - // account for it. - if (CI->second > 0) { - CI->second -= 1; - return; - } - - // If we've already accounted for all the entries that were dead, the - // entire comdat is alive so remove it from the map. - ComdatEntriesCovered.erase(CI); - }; - - auto CheckAllComdats = [&] { - for (Function &F : M.functions()) - if (Comdat *C = F.getComdat()) { - CheckComdat(*C); - if (ComdatEntriesCovered.empty()) - return; - } - for (GlobalVariable &GV : M.globals()) - if (Comdat *C = GV.getComdat()) { - CheckComdat(*C); - if (ComdatEntriesCovered.empty()) - return; - } - for (GlobalAlias &GA : M.aliases()) - if (Comdat *C = GA.getComdat()) { - CheckComdat(*C); - if (ComdatEntriesCovered.empty()) - return; - } - }; - CheckAllComdats(); - - if (ComdatEntriesCovered.empty()) { - DeadComdatFunctions.clear(); - return; + // Find comdats for which all users are dead now. + SmallPtrSet DeadComdats; + for (Comdat *C : MaybeDeadComdats) { + auto IsUserDead = [&](GlobalObject *GO) { + auto *F = dyn_cast(GO); + return F && MaybeDeadFunctions.contains(F); + }; + if (all_of(C->getUsers(), IsUserDead)) + DeadComdats.insert(C); } - // Remove the entries that were not covering. - erase_if(DeadComdatFunctions, [&](GlobalValue *GV) { - return ComdatEntriesCovered.find(GV->getComdat()) == - ComdatEntriesCovered.end(); + // Only keep functions which have no comdat or a dead comdat. + erase_if(DeadComdatFunctions, [&](Function *F) { + Comdat *C = F->getComdat(); + return C && !DeadComdats.contains(C); }); } diff --git a/llvm/unittests/IR/ConstantsTest.cpp b/llvm/unittests/IR/ConstantsTest.cpp index 155383d5a0c6..faf8502b19df 100644 --- a/llvm/unittests/IR/ConstantsTest.cpp +++ b/llvm/unittests/IR/ConstantsTest.cpp @@ -763,5 +763,32 @@ TEST(ConstantsTest, GetSplatValueRoundTrip) { } } +TEST(ConstantsTest, ComdatUserTracking) { + LLVMContext Context; + Module M("MyModule", Context); + + Comdat *C = M.getOrInsertComdat("comdat"); + const SmallPtrSetImpl &Users = C->getUsers(); + EXPECT_TRUE(Users.size() == 0); + + Type *Ty = Type::getInt8Ty(Context); + GlobalVariable *GV1 = cast(M.getOrInsertGlobal("gv1", Ty)); + GV1->setComdat(C); + EXPECT_TRUE(Users.size() == 1); + EXPECT_TRUE(Users.contains(GV1)); + + GlobalVariable *GV2 = cast(M.getOrInsertGlobal("gv2", Ty)); + GV2->setComdat(C); + EXPECT_TRUE(Users.size() == 2); + EXPECT_TRUE(Users.contains(GV2)); + + GV1->eraseFromParent(); + EXPECT_TRUE(Users.size() == 1); + EXPECT_TRUE(Users.contains(GV2)); + + GV2->eraseFromParent(); + EXPECT_TRUE(Users.size() == 0); +} + } // end anonymous namespace } // end namespace llvm From 1919720fdd348ca568b235bf3f1357c198eccd15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 4 Jan 2022 23:14:30 +0200 Subject: [PATCH 775/992] [lldb] [debugserver] Simplify handling of arch specific files There are no duplicates among the include files, and all the source files are wrapped in architecture ifdefs, so there's no harm in including all of them, always. This fixes builds if TARGET_TRIPLE is set to something else than the build architecture. This also allows building for multiple architectures at once by setting CMAKE_OSX_ARCHITECTURES. Differential Revision: https://reviews.llvm.org/D116625 --- .../debugserver/source/MacOSX/CMakeLists.txt | 30 +++---------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/lldb/tools/debugserver/source/MacOSX/CMakeLists.txt b/lldb/tools/debugserver/source/MacOSX/CMakeLists.txt index ea4593fcf451..8f44d1bfbb43 100644 --- a/lldb/tools/debugserver/source/MacOSX/CMakeLists.txt +++ b/lldb/tools/debugserver/source/MacOSX/CMakeLists.txt @@ -1,30 +1,8 @@ -# The debugserver build needs to conditionally include files depending on the -# target architecture. -# -# Switch on the architecture specified by TARGET_TRIPLE, as -# the llvm and swift build systems use this variable to identify the -# target (through LLVM_HOST_TRIPLE). -# -# It would be possible to switch on CMAKE_OSX_ARCHITECTURES, but the swift -# build does not provide it, preferring instead to pass arch-specific -# CFLAGS etc explicitly. Switching on LLVM_HOST_TRIPLE is also an option, -# but it breaks down when cross-compiling. +list(APPEND SOURCES arm/DNBArchImpl.cpp arm64/DNBArchImplARM64.cpp) +include_directories(${CURRENT_SOURCE_DIR}/arm ${CURRENT_SOURCE_DIR}/arm64) -if(TARGET_TRIPLE) - string(REGEX MATCH "^[^-]*" LLDB_DEBUGSERVER_ARCH ${TARGET_TRIPLE}) -else() - set(LLDB_DEBUGSERVER_ARCH ${CMAKE_OSX_ARCHITECTURES}) -endif() - -if("${LLDB_DEBUGSERVER_ARCH}" MATCHES ".*arm.*") - list(APPEND SOURCES arm/DNBArchImpl.cpp arm64/DNBArchImplARM64.cpp) - include_directories(${CURRENT_SOURCE_DIR}/arm ${CURRENT_SOURCE_DIR}/arm64) -endif() - -if(NOT LLDB_DEBUGSERVER_ARCH OR "${LLDB_DEBUGSERVER_ARCH}" MATCHES ".*86.*") - list(APPEND SOURCES i386/DNBArchImplI386.cpp x86_64/DNBArchImplX86_64.cpp) - include_directories(${CURRENT_SOURCE_DIR}/i386 ${CURRENT_SOURCE_DIR}/x86_64) -endif() +list(APPEND SOURCES i386/DNBArchImplI386.cpp x86_64/DNBArchImplX86_64.cpp) +include_directories(${CURRENT_SOURCE_DIR}/i386 ${CURRENT_SOURCE_DIR}/x86_64) include_directories(..) From e8b98a5216dbfdaa31f7016955f9586cef94a626 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 5 Jan 2022 15:16:24 +0100 Subject: [PATCH 776/992] [CodeGen] Emit elementtype attributes for indirect inline asm constraints This implements the clang side of D116531. The elementtype attribute is added for all indirect constraints (*) and tests are updated accordingly. Differential Revision: https://reviews.llvm.org/D116666 --- clang/lib/CodeGen/CGBuiltin.cpp | 5 +- clang/lib/CodeGen/CGObjCMac.cpp | 10 +- clang/lib/CodeGen/CGStmt.cpp | 116 ++++++++++-------- clang/lib/CodeGen/CodeGenFunction.h | 15 +-- clang/test/CodeGen/RISCV/riscv-inline-asm.c | 2 +- .../test/CodeGen/SystemZ/systemz-inline-asm.c | 22 ++-- clang/test/CodeGen/aarch64-inline-asm.c | 4 +- clang/test/CodeGen/asm-inout.c | 2 +- clang/test/CodeGen/asm.c | 2 +- ...xlcompat-LoadReseve-StoreCond-64bit-only.c | 2 +- ...iltins-ppc-xlcompat-LoadReseve-StoreCond.c | 10 +- .../test/CodeGen/inline-asm-x86-flag-output.c | 56 ++++----- clang/test/CodeGen/matrix-type.c | 2 +- clang/test/CodeGen/mips-constraints-mem.c | 2 +- .../test/CodeGen/mips-inline-asm-modifiers.c | 4 +- clang/test/CodeGen/mips-inline-asm.c | 6 +- clang/test/CodeGen/mozilla-ms-inline-asm.c | 2 +- clang/test/CodeGen/ms-inline-asm-64.c | 6 +- .../CodeGen/ms-inline-asm-static-variable.c | 2 +- clang/test/CodeGen/ms-inline-asm.c | 98 +++++++-------- clang/test/CodeGen/ms-inline-asm.cpp | 6 +- clang/test/CodeGen/ms-intrinsics.c | 16 +-- clang/test/CodeGen/mult-alt-generic.c | 4 +- clang/test/CodeGen/mult-alt-x86.c | 68 +++++----- clang/test/CodeGen/ppc64-inline-asm.c | 4 +- .../test/CodeGenCXX/ms-inline-asm-fields.cpp | 4 +- clang/test/CodeGenObjC/exceptions.m | 4 +- clang/test/CodeGenObjC/synchronized.m | 2 +- 28 files changed, 250 insertions(+), 226 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c1541ff0c846..50f59a2abab8 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1060,7 +1060,10 @@ static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); - return CGF.Builder.CreateCall(IA, {Addr}); + llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr}); + CI->addParamAttr( + 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType)); + return CI; } namespace { diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index 425d1a793439..d769574c1f5e 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -4370,7 +4370,11 @@ FragileHazards::FragileHazards(CodeGenFunction &CGF) : CGF(CGF) { void FragileHazards::emitWriteHazard() { if (Locals.empty()) return; - CGF.EmitNounwindRuntimeCall(WriteHazard, Locals); + llvm::CallInst *Call = CGF.EmitNounwindRuntimeCall(WriteHazard, Locals); + for (auto Pair : llvm::enumerate(Locals)) + Call->addParamAttr(Pair.index(), llvm::Attribute::get( + CGF.getLLVMContext(), llvm::Attribute::ElementType, + cast(Pair.value())->getAllocatedType())); } void FragileHazards::emitReadHazard(CGBuilderTy &Builder) { @@ -4378,6 +4382,10 @@ void FragileHazards::emitReadHazard(CGBuilderTy &Builder) { llvm::CallInst *call = Builder.CreateCall(ReadHazard, Locals); call->setDoesNotThrow(); call->setCallingConv(CGF.getRuntimeCC()); + for (auto Pair : llvm::enumerate(Locals)) + call->addParamAttr(Pair.index(), llvm::Attribute::get( + Builder.getContext(), llvm::Attribute::ElementType, + cast(Pair.value())->getAllocatedType())); } /// Emit read hazards in all the protected blocks, i.e. all the blocks diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index ef0068cd3b0c..feff8a2c178a 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -2109,42 +2109,35 @@ AddVariableConstraints(const std::string &Constraint, const Expr &AsmExpr, return (EarlyClobber ? "&{" : "{") + Register.str() + "}"; } -llvm::Value* -CodeGenFunction::EmitAsmInputLValue(const TargetInfo::ConstraintInfo &Info, - LValue InputValue, QualType InputType, - std::string &ConstraintStr, - SourceLocation Loc) { - llvm::Value *Arg; +std::pair CodeGenFunction::EmitAsmInputLValue( + const TargetInfo::ConstraintInfo &Info, LValue InputValue, + QualType InputType, std::string &ConstraintStr, SourceLocation Loc) { if (Info.allowsRegister() || !Info.allowsMemory()) { - if (CodeGenFunction::hasScalarEvaluationKind(InputType)) { - Arg = EmitLoadOfLValue(InputValue, Loc).getScalarVal(); - } else { - llvm::Type *Ty = ConvertType(InputType); - uint64_t Size = CGM.getDataLayout().getTypeSizeInBits(Ty); - if ((Size <= 64 && llvm::isPowerOf2_64(Size)) || - getTargetHooks().isScalarizableAsmOperand(*this, Ty)) { - Ty = llvm::IntegerType::get(getLLVMContext(), Size); - Ty = llvm::PointerType::getUnqual(Ty); - - Arg = Builder.CreateLoad( - Builder.CreateBitCast(InputValue.getAddress(*this), Ty)); - } else { - Arg = InputValue.getPointer(*this); - ConstraintStr += '*'; - } + if (CodeGenFunction::hasScalarEvaluationKind(InputType)) + return {EmitLoadOfLValue(InputValue, Loc).getScalarVal(), nullptr}; + + llvm::Type *Ty = ConvertType(InputType); + uint64_t Size = CGM.getDataLayout().getTypeSizeInBits(Ty); + if ((Size <= 64 && llvm::isPowerOf2_64(Size)) || + getTargetHooks().isScalarizableAsmOperand(*this, Ty)) { + Ty = llvm::IntegerType::get(getLLVMContext(), Size); + Ty = llvm::PointerType::getUnqual(Ty); + + return {Builder.CreateLoad( + Builder.CreateBitCast(InputValue.getAddress(*this), Ty)), + nullptr}; } - } else { - Arg = InputValue.getPointer(*this); - ConstraintStr += '*'; } - return Arg; + Address Addr = InputValue.getAddress(*this); + ConstraintStr += '*'; + return {Addr.getPointer(), Addr.getElementType()}; } -llvm::Value* CodeGenFunction::EmitAsmInput( - const TargetInfo::ConstraintInfo &Info, - const Expr *InputExpr, - std::string &ConstraintStr) { +std::pair +CodeGenFunction::EmitAsmInput(const TargetInfo::ConstraintInfo &Info, + const Expr *InputExpr, + std::string &ConstraintStr) { // If this can't be a register or memory, i.e., has to be a constant // (immediate or symbolic), try to emit it as such. if (!Info.allowsRegister() && !Info.allowsMemory()) { @@ -2155,19 +2148,20 @@ llvm::Value* CodeGenFunction::EmitAsmInput( llvm::APSInt IntResult; if (EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(), getContext())) - return llvm::ConstantInt::get(getLLVMContext(), IntResult); + return {llvm::ConstantInt::get(getLLVMContext(), IntResult), nullptr}; } Expr::EvalResult Result; if (InputExpr->EvaluateAsInt(Result, getContext())) - return llvm::ConstantInt::get(getLLVMContext(), Result.Val.getInt()); + return {llvm::ConstantInt::get(getLLVMContext(), Result.Val.getInt()), + nullptr}; } if (Info.allowsRegister() || !Info.allowsMemory()) if (CodeGenFunction::hasScalarEvaluationKind(InputExpr->getType())) - return EmitScalarExpr(InputExpr); + return {EmitScalarExpr(InputExpr), nullptr}; if (InputExpr->getStmtClass() == Expr::CXXThisExprClass) - return EmitScalarExpr(InputExpr); + return {EmitScalarExpr(InputExpr), nullptr}; InputExpr = InputExpr->IgnoreParenNoopCasts(getContext()); LValue Dest = EmitLValue(InputExpr); return EmitAsmInputLValue(Info, Dest, InputExpr->getType(), ConstraintStr, @@ -2209,6 +2203,7 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect, bool HasUnwindClobber, bool ReadOnly, bool ReadNone, bool NoMerge, const AsmStmt &S, const std::vector &ResultRegTypes, + const std::vector &ArgElemTypes, CodeGenFunction &CGF, std::vector &RegResults) { if (!HasUnwindClobber) @@ -2224,6 +2219,15 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect, Result.addFnAttr(llvm::Attribute::ReadOnly); } + // Add elementtype attribute for indirect constraints. + for (auto Pair : llvm::enumerate(ArgElemTypes)) { + if (Pair.value()) { + auto Attr = llvm::Attribute::get( + CGF.getLLVMContext(), llvm::Attribute::ElementType, Pair.value()); + Result.addParamAttr(Pair.index(), Attr); + } + } + // Slap the source location of the inline asm into a !srcloc metadata on the // call. if (const auto *gccAsmStmt = dyn_cast(&S)) @@ -2291,6 +2295,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { std::vector ResultRegTypes; std::vector ResultTruncRegTypes; std::vector ArgTypes; + std::vector ArgElemTypes; std::vector Args; llvm::BitVector ResultTypeRequiresCast; @@ -2298,6 +2303,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { std::string InOutConstraints; std::vector InOutArgs; std::vector InOutArgTypes; + std::vector InOutArgElemTypes; // Keep track of out constraints for tied input operand. std::vector OutputConstraints; @@ -2399,21 +2405,19 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { std::max((uint64_t)LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize()); } else { - llvm::Type *DestAddrTy = Dest.getAddress(*this).getType(); - llvm::Value *DestPtr = Dest.getPointer(*this); + Address DestAddr = Dest.getAddress(*this); // Matrix types in memory are represented by arrays, but accessed through // vector pointers, with the alignment specified on the access operation. // For inline assembly, update pointer arguments to use vector pointers. // Otherwise there will be a mis-match if the matrix is also an // input-argument which is represented as vector. - if (isa(OutExpr->getType().getCanonicalType())) { - DestAddrTy = llvm::PointerType::get( - ConvertType(OutExpr->getType()), - cast(DestAddrTy)->getAddressSpace()); - DestPtr = Builder.CreateBitCast(DestPtr, DestAddrTy); - } - ArgTypes.push_back(DestAddrTy); - Args.push_back(DestPtr); + if (isa(OutExpr->getType().getCanonicalType())) + DestAddr = Builder.CreateElementBitCast( + DestAddr, ConvertType(OutExpr->getType())); + + ArgTypes.push_back(DestAddr.getType()); + ArgElemTypes.push_back(DestAddr.getElementType()); + Args.push_back(DestAddr.getPointer()); Constraints += "=*"; Constraints += OutputConstraint; ReadOnly = ReadNone = false; @@ -2423,9 +2427,11 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { InOutConstraints += ','; const Expr *InputExpr = S.getOutputExpr(i); - llvm::Value *Arg = EmitAsmInputLValue(Info, Dest, InputExpr->getType(), - InOutConstraints, - InputExpr->getExprLoc()); + llvm::Value *Arg; + llvm::Type *ArgElemType; + std::tie(Arg, ArgElemType) = EmitAsmInputLValue( + Info, Dest, InputExpr->getType(), InOutConstraints, + InputExpr->getExprLoc()); if (llvm::Type* AdjTy = getTargetHooks().adjustInlineAsmType(*this, OutputConstraint, @@ -2444,6 +2450,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { InOutConstraints += OutputConstraint; InOutArgTypes.push_back(Arg->getType()); + InOutArgElemTypes.push_back(ArgElemType); InOutArgs.push_back(Arg); } } @@ -2483,7 +2490,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { getTarget(), CGM, S, false /* No EarlyClobber */); std::string ReplaceConstraint (InputConstraint); - llvm::Value *Arg = EmitAsmInput(Info, InputExpr, Constraints); + llvm::Value *Arg; + llvm::Type *ArgElemType; + std::tie(Arg, ArgElemType) = EmitAsmInput(Info, InputExpr, Constraints); // If this input argument is tied to a larger output result, extend the // input to be the same size as the output. The LLVM backend wants to see @@ -2528,6 +2537,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { VT->getPrimitiveSizeInBits().getKnownMinSize()); ArgTypes.push_back(Arg->getType()); + ArgElemTypes.push_back(ArgElemType); Args.push_back(Arg); Constraints += InputConstraint; } @@ -2546,6 +2556,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { llvm::BlockAddress::get(CurFn, Dest.getBlock()); Args.push_back(BA); ArgTypes.push_back(BA->getType()); + ArgElemTypes.push_back(nullptr); if (!Constraints.empty()) Constraints += ','; Constraints += 'X'; @@ -2557,6 +2568,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Append the "input" part of inout constraints last. for (unsigned i = 0, e = InOutArgs.size(); i != e; i++) { ArgTypes.push_back(InOutArgTypes[i]); + ArgElemTypes.push_back(InOutArgElemTypes[i]); Args.push_back(InOutArgs[i]); } Constraints += InOutConstraints; @@ -2647,18 +2659,18 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { EmitBlock(Fallthrough); UpdateAsmCallInst(cast(*Result), HasSideEffect, false, ReadOnly, ReadNone, InNoMergeAttributedStmt, S, - ResultRegTypes, *this, RegResults); + ResultRegTypes, ArgElemTypes, *this, RegResults); } else if (HasUnwindClobber) { llvm::CallBase *Result = EmitCallOrInvoke(IA, Args, ""); UpdateAsmCallInst(*Result, HasSideEffect, true, ReadOnly, ReadNone, - InNoMergeAttributedStmt, S, ResultRegTypes, *this, - RegResults); + InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes, + *this, RegResults); } else { llvm::CallInst *Result = Builder.CreateCall(IA, Args, getBundlesForFunclet(IA)); UpdateAsmCallInst(cast(*Result), HasSideEffect, false, ReadOnly, ReadNone, InNoMergeAttributedStmt, S, - ResultRegTypes, *this, RegResults); + ResultRegTypes, ArgElemTypes, *this, RegResults); } assert(RegResults.size() == ResultRegTypes.size()); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 17bdbc0bd334..b7011a08299a 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4677,13 +4677,14 @@ class CodeGenFunction : public CodeGenTypeCache { SmallVectorImpl &IRCallArgs, unsigned &IRCallArgPos); - llvm::Value* EmitAsmInput(const TargetInfo::ConstraintInfo &Info, - const Expr *InputExpr, std::string &ConstraintStr); - - llvm::Value* EmitAsmInputLValue(const TargetInfo::ConstraintInfo &Info, - LValue InputValue, QualType InputType, - std::string &ConstraintStr, - SourceLocation Loc); + std::pair + EmitAsmInput(const TargetInfo::ConstraintInfo &Info, const Expr *InputExpr, + std::string &ConstraintStr); + + std::pair + EmitAsmInputLValue(const TargetInfo::ConstraintInfo &Info, LValue InputValue, + QualType InputType, std::string &ConstraintStr, + SourceLocation Loc); /// Attempts to statically evaluate the object size of E. If that /// fails, emits code to figure the size of E out for us. This is diff --git a/clang/test/CodeGen/RISCV/riscv-inline-asm.c b/clang/test/CodeGen/RISCV/riscv-inline-asm.c index 230329387692..45ab3a6b5ae8 100644 --- a/clang/test/CodeGen/RISCV/riscv-inline-asm.c +++ b/clang/test/CodeGen/RISCV/riscv-inline-asm.c @@ -41,7 +41,7 @@ void test_f() { void test_A(int *p) { // CHECK-LABEL: define{{.*}} void @test_A(i32* %p) -// CHECK: call void asm sideeffect "", "*A"(i32* %p) +// CHECK: call void asm sideeffect "", "*A"(i32* elementtype(i32) %p) asm volatile("" :: "A"(*p)); } diff --git a/clang/test/CodeGen/SystemZ/systemz-inline-asm.c b/clang/test/CodeGen/SystemZ/systemz-inline-asm.c index 2a656eaf4a2a..78c5a4b45fda 100644 --- a/clang/test/CodeGen/SystemZ/systemz-inline-asm.c +++ b/clang/test/CodeGen/SystemZ/systemz-inline-asm.c @@ -6,31 +6,31 @@ unsigned long gl; void test_store_m(unsigned int i) { asm("st %1, %0" : "=m" (gi) : "r" (i)); // CHECK-LABEL: define{{.*}} void @test_store_m(i32 zeroext %i) -// CHECK: call void asm "st $1, $0", "=*m,r"(i32* nonnull @gi, i32 %i) +// CHECK: call void asm "st $1, $0", "=*m,r"(i32* nonnull elementtype(i32) @gi, i32 %i) } void test_store_Q(unsigned int i) { asm("st %1, %0" : "=Q" (gi) : "r" (i)); // CHECK-LABEL: define{{.*}} void @test_store_Q(i32 zeroext %i) -// CHECK: call void asm "st $1, $0", "=*Q,r"(i32* nonnull @gi, i32 %i) +// CHECK: call void asm "st $1, $0", "=*Q,r"(i32* nonnull elementtype(i32) @gi, i32 %i) } void test_store_R(unsigned int i) { asm("st %1, %0" : "=R" (gi) : "r" (i)); // CHECK-LABEL: define{{.*}} void @test_store_R(i32 zeroext %i) -// CHECK: call void asm "st $1, $0", "=*R,r"(i32* nonnull @gi, i32 %i) +// CHECK: call void asm "st $1, $0", "=*R,r"(i32* nonnull elementtype(i32) @gi, i32 %i) } void test_store_S(unsigned int i) { asm("st %1, %0" : "=S" (gi) : "r" (i)); // CHECK-LABEL: define{{.*}} void @test_store_S(i32 zeroext %i) -// CHECK: call void asm "st $1, $0", "=*S,r"(i32* nonnull @gi, i32 %i) +// CHECK: call void asm "st $1, $0", "=*S,r"(i32* nonnull elementtype(i32) @gi, i32 %i) } void test_store_T(unsigned int i) { asm("st %1, %0" : "=T" (gi) : "r" (i)); // CHECK-LABEL: define{{.*}} void @test_store_T(i32 zeroext %i) -// CHECK: call void asm "st $1, $0", "=*T,r"(i32* nonnull @gi, i32 %i) +// CHECK: call void asm "st $1, $0", "=*T,r"(i32* nonnull elementtype(i32) @gi, i32 %i) } int test_load_m() { @@ -38,7 +38,7 @@ int test_load_m() { asm("l %0, %1" : "=r" (i) : "m" (gi)); return i; // CHECK-LABEL: define{{.*}} signext i32 @test_load_m() -// CHECK: call i32 asm "l $0, $1", "=r,*m"(i32* nonnull @gi) +// CHECK: call i32 asm "l $0, $1", "=r,*m"(i32* nonnull elementtype(i32) @gi) } int test_load_Q() { @@ -46,7 +46,7 @@ int test_load_Q() { asm("l %0, %1" : "=r" (i) : "Q" (gi)); return i; // CHECK-LABEL: define{{.*}} signext i32 @test_load_Q() -// CHECK: call i32 asm "l $0, $1", "=r,*Q"(i32* nonnull @gi) +// CHECK: call i32 asm "l $0, $1", "=r,*Q"(i32* nonnull elementtype(i32) @gi) } int test_load_R() { @@ -54,7 +54,7 @@ int test_load_R() { asm("l %0, %1" : "=r" (i) : "R" (gi)); return i; // CHECK-LABEL: define{{.*}} signext i32 @test_load_R() -// CHECK: call i32 asm "l $0, $1", "=r,*R"(i32* nonnull @gi) +// CHECK: call i32 asm "l $0, $1", "=r,*R"(i32* nonnull elementtype(i32) @gi) } int test_load_S() { @@ -62,7 +62,7 @@ int test_load_S() { asm("l %0, %1" : "=r" (i) : "S" (gi)); return i; // CHECK-LABEL: define{{.*}} signext i32 @test_load_S() -// CHECK: call i32 asm "l $0, $1", "=r,*S"(i32* nonnull @gi) +// CHECK: call i32 asm "l $0, $1", "=r,*S"(i32* nonnull elementtype(i32) @gi) } int test_load_T() { @@ -70,13 +70,13 @@ int test_load_T() { asm("l %0, %1" : "=r" (i) : "T" (gi)); return i; // CHECK-LABEL: define{{.*}} signext i32 @test_load_T() -// CHECK: call i32 asm "l $0, $1", "=r,*T"(i32* nonnull @gi) +// CHECK: call i32 asm "l $0, $1", "=r,*T"(i32* nonnull elementtype(i32) @gi) } void test_mI(unsigned char *c) { asm volatile("cli %0, %1" :: "Q" (*c), "I" (100)); // CHECK-LABEL: define{{.*}} void @test_mI(i8* %c) -// CHECK: call void asm sideeffect "cli $0, $1", "*Q,I"(i8* %c, i32 100) +// CHECK: call void asm sideeffect "cli $0, $1", "*Q,I"(i8* elementtype(i8) %c, i32 100) } unsigned int test_dJa(unsigned int i, unsigned int j) { diff --git a/clang/test/CodeGen/aarch64-inline-asm.c b/clang/test/CodeGen/aarch64-inline-asm.c index a6e8faef8b9e..4dfaff5679d0 100644 --- a/clang/test/CodeGen/aarch64-inline-asm.c +++ b/clang/test/CodeGen/aarch64-inline-asm.c @@ -17,7 +17,7 @@ void test_generic_constraints(int var32, long var64) { asm("ldr %0, %1" : "=r"(var32) : "m"(var)); asm("ldr %0, [%1]" : "=r"(var64) : "r"(&var)); -// CHECK: call i32 asm "ldr $0, $1", "=r,*m"(i64* @var) +// CHECK: call i32 asm "ldr $0, $1", "=r,*m"(i64* elementtype(i64) @var) // CHECK: call i64 asm "ldr $0, [$1]", "=r,r"(i64* @var) } @@ -52,7 +52,7 @@ void test_constraint_S(void) { void test_constraint_Q(void) { int val; asm("ldxr %0, %1" : "=r"(val) : "Q"(var)); -// CHECK: call i32 asm "ldxr $0, $1", "=r,*Q"(i64* @var) +// CHECK: call i32 asm "ldxr $0, $1", "=r,*Q"(i64* elementtype(i64) @var) } void test_gcc_registers(void) { diff --git a/clang/test/CodeGen/asm-inout.c b/clang/test/CodeGen/asm-inout.c index 411f6fadac10..68bdfe7f956f 100644 --- a/clang/test/CodeGen/asm-inout.c +++ b/clang/test/CodeGen/asm-inout.c @@ -5,7 +5,7 @@ int *foo(void); // CHECK: @test1 void test1() { // CHECK: [[REGCALLRESULT:%[a-zA-Z0-9\.]+]] = call i32* @foo() - // CHECK: call void asm "foobar", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32* [[REGCALLRESULT]], i32* [[REGCALLRESULT]]) + // CHECK: call void asm "foobar", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) [[REGCALLRESULT]], i32* elementtype(i32) [[REGCALLRESULT]]) asm ("foobar" : "+m"(*foo())); } diff --git a/clang/test/CodeGen/asm.c b/clang/test/CodeGen/asm.c index 7de79639bfd7..0411daae0aeb 100644 --- a/clang/test/CodeGen/asm.c +++ b/clang/test/CodeGen/asm.c @@ -246,7 +246,7 @@ void t29(void) { : : "m"(t29_var)); // CHECK: @t29 - // CHECK: call void asm sideeffect "movl %eax, $0", "*m,~{dirflag},~{fpsr},~{flags}"([1 x i32]* @t29_var) + // CHECK: call void asm sideeffect "movl %eax, $0", "*m,~{dirflag},~{fpsr},~{flags}"([1 x i32]* elementtype([1 x i32]) @t29_var) } void t30(int len) { diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond-64bit-only.c b/clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond-64bit-only.c index 81bf4d54db02..9766d1d2bde5 100644 --- a/clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond-64bit-only.c +++ b/clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond-64bit-only.c @@ -10,7 +10,7 @@ long test_ldarx(volatile long* a) { // CHECK64-LABEL: @test_ldarx - // CHECK64: %0 = tail call i64 asm sideeffect "ldarx $0, ${1:y}", "=r,*Z,~{memory}"(i64* %a) + // CHECK64: %0 = tail call i64 asm sideeffect "ldarx $0, ${1:y}", "=r,*Z,~{memory}"(i64* elementtype(i64) %a) // CHECK32-ERROR: error: this builtin is only available on 64-bit targets return __ldarx(a); } diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c b/clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c index 0362ae389ba0..a7e36c6dc73e 100644 --- a/clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c +++ b/clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c @@ -13,20 +13,20 @@ int test_lwarx(volatile int* a) { // CHECK-LABEL: @test_lwarx - // CHECK: %0 = tail call i32 asm sideeffect "lwarx $0, ${1:y}", "=r,*Z,~{memory}"(i32* %a) + // CHECK: %0 = tail call i32 asm sideeffect "lwarx $0, ${1:y}", "=r,*Z,~{memory}"(i32* elementtype(i32) %a) return __lwarx(a); } short test_lharx(volatile short *a) { // CHECK-LABEL: @test_lharx - // CHECK: %0 = tail call i16 asm sideeffect "lharx $0, ${1:y}", "=r,*Z,~{memory}"(i16* %a) + // CHECK: %0 = tail call i16 asm sideeffect "lharx $0, ${1:y}", "=r,*Z,~{memory}"(i16* elementtype(i16) %a) // CHECK-NON-PWR8-ERR: error: this builtin is only valid on POWER8 or later CPUs return __lharx(a); } char test_lbarx(volatile char *a) { // CHECK-LABEL: @test_lbarx - // CHECK: %0 = tail call i8 asm sideeffect "lbarx $0, ${1:y}", "=r,*Z,~{memory}"(i8* %a) + // CHECK: %0 = tail call i8 asm sideeffect "lbarx $0, ${1:y}", "=r,*Z,~{memory}"(i8* elementtype(i8) %a) // CHECK-NON-PWR8-ERR: error: this builtin is only valid on POWER8 or later CPUs return __lbarx(a); } @@ -50,14 +50,14 @@ int test_sthcx(volatile short *a, short val) { // Extra test cases that previously caused error during usage. int test_lharx_intret(volatile short *a) { // CHECK-LABEL: @test_lharx_intret - // CHECK: %0 = tail call i16 asm sideeffect "lharx $0, ${1:y}", "=r,*Z,~{memory}"(i16* %a) + // CHECK: %0 = tail call i16 asm sideeffect "lharx $0, ${1:y}", "=r,*Z,~{memory}"(i16* elementtype(i16) %a) // CHECK-NON-PWR8-ERR: error: this builtin is only valid on POWER8 or later CPUs return __lharx(a); } int test_lbarx_intret(volatile char *a) { // CHECK-LABEL: @test_lbarx_intret - // CHECK: %0 = tail call i8 asm sideeffect "lbarx $0, ${1:y}", "=r,*Z,~{memory}"(i8* %a) + // CHECK: %0 = tail call i8 asm sideeffect "lbarx $0, ${1:y}", "=r,*Z,~{memory}"(i8* elementtype(i8) %a) // CHECK-NON-PWR8-ERR: error: this builtin is only valid on POWER8 or later CPUs return __lbarx(a); } diff --git a/clang/test/CodeGen/inline-asm-x86-flag-output.c b/clang/test/CodeGen/inline-asm-x86-flag-output.c index 74ad3a46e70c..36bffad7169c 100644 --- a/clang/test/CodeGen/inline-asm-x86-flag-output.c +++ b/clang/test/CodeGen/inline-asm-x86-flag-output.c @@ -2,7 +2,7 @@ int test_cca(long nr, volatile long *addr) { //CHECK-LABEL: @test_cca - //CHECK: = tail call i32 asm "cmp $2,$1", "={@cca},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@cca},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@cca"(x), "=m"(*(volatile long *)(addr)) @@ -15,7 +15,7 @@ int test_cca(long nr, volatile long *addr) { int test_ccae(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccae - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccae},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccae},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccae"(x), "=m"(*(volatile long *)(addr)) @@ -28,7 +28,7 @@ int test_ccae(long nr, volatile long *addr) { int test_ccb(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccb - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccb},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccb},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccb"(x), "=m"(*(volatile long *)(addr)) @@ -41,7 +41,7 @@ int test_ccb(long nr, volatile long *addr) { int test_ccbe(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccbe - //CHECK: tail call i32 asm "cmp $2,$1", "={@ccbe},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: tail call i32 asm "cmp $2,$1", "={@ccbe},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccbe"(x), "=m"(*(volatile long *)(addr)) @@ -54,7 +54,7 @@ int test_ccbe(long nr, volatile long *addr) { int test_ccc(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccc - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccc},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccc},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccc"(x), "=m"(*(volatile long *)(addr)) @@ -67,7 +67,7 @@ int test_ccc(long nr, volatile long *addr) { int test_cce(long nr, volatile long *addr) { //CHECK-LABEL: @test_cce - //CHECK: = tail call i32 asm "cmp $2,$1", "={@cce},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@cce},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@cce"(x), "=m"(*(volatile long *)(addr)) @@ -80,7 +80,7 @@ int test_cce(long nr, volatile long *addr) { int test_ccz(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccz - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccz},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccz},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccz"(x), "=m"(*(volatile long *)(addr)) @@ -93,7 +93,7 @@ int test_ccz(long nr, volatile long *addr) { int test_ccg(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccg - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccg},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccg},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccg"(x), "=m"(*(volatile long *)(addr)) @@ -106,7 +106,7 @@ int test_ccg(long nr, volatile long *addr) { int test_ccge(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccge - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccge},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccge},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccge"(x), "=m"(*(volatile long *)(addr)) @@ -119,7 +119,7 @@ int test_ccge(long nr, volatile long *addr) { int test_ccl(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccl - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccl},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccl},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccl"(x), "=m"(*(volatile long *)(addr)) @@ -132,7 +132,7 @@ int test_ccl(long nr, volatile long *addr) { int test_ccle(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccle - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccle},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccle},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccle"(x), "=m"(*(volatile long *)(addr)) @@ -145,7 +145,7 @@ int test_ccle(long nr, volatile long *addr) { int test_ccna(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccna - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccna},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccna},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccna"(x), "=m"(*(volatile long *)(addr)) @@ -158,7 +158,7 @@ int test_ccna(long nr, volatile long *addr) { int test_ccnae(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccnae - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnae},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnae},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccnae"(x), "=m"(*(volatile long *)(addr)) @@ -171,7 +171,7 @@ int test_ccnae(long nr, volatile long *addr) { int test_ccnb(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccnb - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnb},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnb},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccnb"(x), "=m"(*(volatile long *)(addr)) @@ -184,7 +184,7 @@ int test_ccnb(long nr, volatile long *addr) { int test_ccnbe(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccnbe - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnbe},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnbe},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccnbe"(x), "=m"(*(volatile long *)(addr)) @@ -197,7 +197,7 @@ int test_ccnbe(long nr, volatile long *addr) { int test_ccnc(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccnc - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnc},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnc},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccnc"(x), "=m"(*(volatile long *)(addr)) @@ -210,7 +210,7 @@ int test_ccnc(long nr, volatile long *addr) { int test_ccne(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccne - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccne},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccne},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccne"(x), "=m"(*(volatile long *)(addr)) @@ -223,7 +223,7 @@ int test_ccne(long nr, volatile long *addr) { int test_ccnz(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccnz - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnz},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnz},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccnz"(x), "=m"(*(volatile long *)(addr)) @@ -236,7 +236,7 @@ int test_ccnz(long nr, volatile long *addr) { int test_ccng(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccng - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccng},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccng},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccng"(x), "=m"(*(volatile long *)(addr)) @@ -249,7 +249,7 @@ int test_ccng(long nr, volatile long *addr) { int test_ccnge(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccnge - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnge},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnge},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccnge"(x), "=m"(*(volatile long *)(addr)) @@ -262,7 +262,7 @@ int test_ccnge(long nr, volatile long *addr) { int test_ccnl(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccnl - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnl},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnl},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccnl"(x), "=m"(*(volatile long *)(addr)) @@ -275,7 +275,7 @@ int test_ccnl(long nr, volatile long *addr) { int test_ccnle(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccnle - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnle},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnle},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccnle"(x), "=m"(*(volatile long *)(addr)) @@ -288,7 +288,7 @@ int test_ccnle(long nr, volatile long *addr) { int test_ccno(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccno - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccno},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccno},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccno"(x), "=m"(*(volatile long *)(addr)) @@ -301,7 +301,7 @@ int test_ccno(long nr, volatile long *addr) { int test_ccnp(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccnp - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnp},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnp},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccnp"(x), "=m"(*(volatile long *)(addr)) @@ -314,7 +314,7 @@ int test_ccnp(long nr, volatile long *addr) { int test_ccns(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccns - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccns},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccns},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccns"(x), "=m"(*(volatile long *)(addr)) @@ -327,7 +327,7 @@ int test_ccns(long nr, volatile long *addr) { int test_cco(long nr, volatile long *addr) { //CHECK-LABEL: @test_cco - //CHECK: = tail call i32 asm "cmp $2,$1", "={@cco},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@cco},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@cco"(x), "=m"(*(volatile long *)(addr)) @@ -340,7 +340,7 @@ int test_cco(long nr, volatile long *addr) { int test_ccp(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccp - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccp},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccp},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccp"(x), "=m"(*(volatile long *)(addr)) @@ -353,7 +353,7 @@ int test_ccp(long nr, volatile long *addr) { int test_ccs(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccs - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccs},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccs},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccs"(x), "=m"(*(volatile long *)(addr)) diff --git a/clang/test/CodeGen/matrix-type.c b/clang/test/CodeGen/matrix-type.c index bb0dc3dd1b86..e338e1eb5302 100644 --- a/clang/test/CodeGen/matrix-type.c +++ b/clang/test/CodeGen/matrix-type.c @@ -169,7 +169,7 @@ void matrix_inline_asm_memory_readwrite() { // CHECK-NEXT: [[PTR1:%.+]] = bitcast [16 x double]* [[ALLOCA]] to <16 x double>* // CHECK-NEXT: [[PTR2:%.+]] = bitcast [16 x double]* [[ALLOCA]] to <16 x double>* // CHECK-NEXT: [[VAL:%.+]] = load <16 x double>, <16 x double>* [[PTR2]], align 8 - // CHECK-NEXT: call void asm sideeffect "", "=*r|m,0,~{memory},~{dirflag},~{fpsr},~{flags}"(<16 x double>* [[PTR1]], <16 x double> [[VAL]]) + // CHECK-NEXT: call void asm sideeffect "", "=*r|m,0,~{memory},~{dirflag},~{fpsr},~{flags}"(<16 x double>* elementtype(<16 x double>) [[PTR1]], <16 x double> [[VAL]]) // CHECK-NEXT: ret void dx4x4_t m; diff --git a/clang/test/CodeGen/mips-constraints-mem.c b/clang/test/CodeGen/mips-constraints-mem.c index 295d67cadf0a..676ccc120c79 100644 --- a/clang/test/CodeGen/mips-constraints-mem.c +++ b/clang/test/CodeGen/mips-constraints-mem.c @@ -9,7 +9,7 @@ int foo() // 'R': An address that can be used in a non-macro load or stor' // This test will result in the higher and lower nibbles being // switched due to the lwl/lwr instruction pairs. - // CHECK: %{{[0-9]+}} = call i32 asm sideeffect "lwl $0, 1 + $1\0A\09lwr $0, 2 + $1\0A\09", "=r,*R,~{$1}"(i32* %{{[0-9,a-f]+}}) #1, + // CHECK: %{{[0-9]+}} = call i32 asm sideeffect "lwl $0, 1 + $1\0A\09lwr $0, 2 + $1\0A\09", "=r,*R,~{$1}"(i32* elementtype(i32) %{{[0-9,a-f]+}}) #1, int c = 0xffbbccdd; diff --git a/clang/test/CodeGen/mips-inline-asm-modifiers.c b/clang/test/CodeGen/mips-inline-asm-modifiers.c index 3116e764b213..413ba3bfa9c5 100644 --- a/clang/test/CodeGen/mips-inline-asm-modifiers.c +++ b/clang/test/CodeGen/mips-inline-asm-modifiers.c @@ -7,8 +7,8 @@ int printf(const char*, ...); typedef int v4i32 __attribute__((vector_size(16))); - // CHECK: %{{[0-9]+}} = call i32 asm ".set noreorder;\0Alw $0,$1;\0A.set reorder;\0A", "=r,*m,~{$1}"(i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i32 {{[0-9]+}}, i32 {{[0-9]+}})) #2, - // CHECK: %{{[0-9]+}} = call i32 asm "lw $0,${1:D};\0A", "=r,*m,~{$1}"(i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i32 {{[0-9]+}}, i32 {{[0-9]+}})) #2, + // CHECK: %{{[0-9]+}} = call i32 asm ".set noreorder;\0Alw $0,$1;\0A.set reorder;\0A", "=r,*m,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8 x i32], [8 x i32]* @b, i32 {{[0-9]+}}, i32 {{[0-9]+}})) #2, + // CHECK: %{{[0-9]+}} = call i32 asm "lw $0,${1:D};\0A", "=r,*m,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8 x i32], [8 x i32]* @b, i32 {{[0-9]+}}, i32 {{[0-9]+}})) #2, // CHECK: %{{[0-9]+}} = call <4 x i32> asm "ldi.w ${0:w},1", "=f,~{$1}" int b[8] = {0,1,2,3,4,5,6,7}; int main() diff --git a/clang/test/CodeGen/mips-inline-asm.c b/clang/test/CodeGen/mips-inline-asm.c index fa38663f387d..352b7ea25e52 100644 --- a/clang/test/CodeGen/mips-inline-asm.c +++ b/clang/test/CodeGen/mips-inline-asm.c @@ -5,17 +5,17 @@ int data; void m () { asm("lw $1, %0" :: "m"(data)); - // CHECK: call void asm sideeffect "lw $$1, $0", "*m,~{$1}"(i32* @data) + // CHECK: call void asm sideeffect "lw $$1, $0", "*m,~{$1}"(i32* elementtype(i32) @data) } void ZC () { asm("ll $1, %0" :: "ZC"(data)); - // CHECK: call void asm sideeffect "ll $$1, $0", "*^ZC,~{$1}"(i32* @data) + // CHECK: call void asm sideeffect "ll $$1, $0", "*^ZC,~{$1}"(i32* elementtype(i32) @data) } void R () { asm("lw $1, %0" :: "R"(data)); - // CHECK: call void asm sideeffect "lw $$1, $0", "*R,~{$1}"(i32* @data) + // CHECK: call void asm sideeffect "lw $$1, $0", "*R,~{$1}"(i32* elementtype(i32) @data) } int additionalClobberedRegisters () { diff --git a/clang/test/CodeGen/mozilla-ms-inline-asm.c b/clang/test/CodeGen/mozilla-ms-inline-asm.c index 210c7f2b9c8e..30ad028ce59e 100644 --- a/clang/test/CodeGen/mozilla-ms-inline-asm.c +++ b/clang/test/CodeGen/mozilla-ms-inline-asm.c @@ -38,7 +38,7 @@ void invoke(void* that, unsigned methodIndex, // CHECK-SAME: pop ebp // CHECK-SAME: ret // CHECK: "=*m,*m,*m,*m,*m,~{eax},~{ebp},~{ecx},~{edx},~{flags},~{esp},~{dirflag},~{fpsr},~{flags}" -// CHECK: (i8** %8, i32* %7, void (...)* bitcast (void ()* @invoke_copy_to_stack to void (...)*), i8** %5, i32* %6) +// CHECK: (i8** elementtype(i8*) %8, i32* elementtype(i32) %7, void (...)* elementtype(void (...)) bitcast (void ()* @invoke_copy_to_stack to void (...)*), i8** elementtype(i8*) %5, i32* elementtype(i32) %6) // CHECK: ret void __asm { mov edx,paramCount diff --git a/clang/test/CodeGen/ms-inline-asm-64.c b/clang/test/CodeGen/ms-inline-asm-64.c index 20e8228a04b6..7fbafaee3fd6 100644 --- a/clang/test/CodeGen/ms-inline-asm-64.c +++ b/clang/test/CodeGen/ms-inline-asm-64.c @@ -36,7 +36,7 @@ int t3() { // CHECK-SAME: lea ebx, $0 // CHECK-SAME: mov eax, [ebx] // CHECK-SAME: mov [ebx + $$4], ecx -// CHECK-SAME: "*m,~{eax},~{ebx},~{dirflag},~{fpsr},~{flags}"(%struct.t3_type* %{{.*}}) +// CHECK-SAME: "*m,~{eax},~{ebx},~{dirflag},~{fpsr},~{flags}"(%struct.t3_type* elementtype(%struct.t3_type) %{{.*}}) } int t4() { @@ -56,7 +56,7 @@ int t4() { // CHECK-SAME: lea ebx, $0 // CHECK-SAME: mov eax, [ebx] // CHECK-SAME: mov [ebx + $$4], ecx -// CHECK-SAME: "*m,~{eax},~{ebx},~{dirflag},~{fpsr},~{flags}"(%struct.t3_type* %{{.*}}) +// CHECK-SAME: "*m,~{eax},~{ebx},~{dirflag},~{fpsr},~{flags}"(%struct.t3_type* elementtype(%struct.t3_type) %{{.*}}) } void bar() {} @@ -70,5 +70,5 @@ void t5() { // CHECK: call void asm sideeffect inteldialect // CHECK-SAME: call qword ptr ${0:P} // CHECK-SAME: jmp qword ptr ${1:P} - // CHECK-SAME: "*m,*m,~{dirflag},~{fpsr},~{flags}"(void (...)* bitcast (void ()* @bar to void (...)*), void (...)* bitcast (void ()* @bar to void (...)*)) + // CHECK-SAME: "*m,*m,~{dirflag},~{fpsr},~{flags}"(void (...)* elementtype(void (...)) bitcast (void ()* @bar to void (...)*), void (...)* elementtype(void (...)) bitcast (void ()* @bar to void (...)*)) } diff --git a/clang/test/CodeGen/ms-inline-asm-static-variable.c b/clang/test/CodeGen/ms-inline-asm-static-variable.c index 8099ea5ac8cf..67334cb5d0f7 100644 --- a/clang/test/CodeGen/ms-inline-asm-static-variable.c +++ b/clang/test/CodeGen/ms-inline-asm-static-variable.c @@ -5,6 +5,6 @@ static int arr[10]; void t1() { // CHECK: @arr = internal global [10 x i32] - // CHECK: call void asm sideeffect inteldialect "mov dword ptr arr[edx * $$4],edx", "=*m,{{.*}}([10 x i32]* @arr) + // CHECK: call void asm sideeffect inteldialect "mov dword ptr arr[edx * $$4],edx", "=*m,{{.*}}([10 x i32]* elementtype([10 x i32]) @arr) __asm mov dword ptr arr[edx*4],edx } diff --git a/clang/test/CodeGen/ms-inline-asm.c b/clang/test/CodeGen/ms-inline-asm.c index ef0345b2a867..985c2232ebf8 100644 --- a/clang/test/CodeGen/ms-inline-asm.c +++ b/clang/test/CodeGen/ms-inline-asm.c @@ -114,7 +114,7 @@ unsigned t10(void) { // CHECK: call i32 asm sideeffect inteldialect // CHECK-SAME: mov eax, $2 // CHECK-SAME: mov $0, eax -// CHECK-SAME: "=*m,=&{eax},*m,~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}, i32* %{{.*}}) +// CHECK-SAME: "=*m,=&{eax},*m,~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}, i32* elementtype(i32) %{{.*}}) // CHECK: [[RET:%[a-zA-Z0-9]+]] = load i32, i32* [[J]], align 4 // CHECK: ret i32 [[RET]] } @@ -140,7 +140,7 @@ unsigned t12(void) { // CHECK-SAME: mov $0, eax // CHECK-SAME: mov eax, $4 // CHECK-SAME: mov $1, eax -// CHECK-SAME: "=*m,=*m,=&{eax},*m,*m,~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}) +// CHECK-SAME: "=*m,=*m,=&{eax},*m,*m,~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}, i32* elementtype(i32) %{{.*}}, i32* elementtype(i32) %{{.*}}, i32* elementtype(i32) %{{.*}}) } void t13() { @@ -152,7 +152,7 @@ void t13() { // CHECK: call void asm sideeffect inteldialect // CHECK-SAME: movzx eax, byte ptr $0 // CHECK-SAME: movzx eax, word ptr $1 -// CHECK-SAME: "*m,*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i8* %{{.*}}i, i16* %{{.*}}j) +// CHECK-SAME: "*m,*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i8* elementtype(i8) %{{.*}}i, i16* elementtype(i16) %{{.*}}j) } void t13_brac() { @@ -164,7 +164,7 @@ void t13_brac() { // CHECK: call void asm sideeffect inteldialect // CHECK-SAME: movzx eax, byte ptr $0 // CHECK-SAME: movzx eax, word ptr $1 -// CHECK-SAME: "*m,*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i8* %{{.*}}i, i16* %{{.*}}j) +// CHECK-SAME: "*m,*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i8* elementtype(i8) %{{.*}}i, i16* elementtype(i16) %{{.*}}j) } void t14() { @@ -177,7 +177,7 @@ void t14() { .endif } // CHECK: t14 -// CHECK: call void asm sideeffect inteldialect ".if 1\0A\09mov eax, $0\0A\09.else\0A\09mov ebx, j\0A\09.endif", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect ".if 1\0A\09mov eax, $0\0A\09.else\0A\09mov ebx, j\0A\09.endif", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}) } int gvar = 10; @@ -196,7 +196,7 @@ void t15() { // CHECK: mov eax, $4 + $$1 __asm mov eax, 1+offset gvar+1 ; eax = 2 + address of gvar // CHECK: mov eax, $5 + $$2 -// CHECK: "*m,r,i,i,i,i,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}, i32* %{{.*}}, i32* @{{.*}}, i32* @{{.*}}, i32* @{{.*}}, i32* @{{.*}}) +// CHECK: "*m,r,i,i,i,i,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}, i32* %{{.*}}, i32* @{{.*}}, i32* @{{.*}}, i32* @{{.*}}, i32* @{{.*}}) } void t16() { @@ -312,7 +312,7 @@ void t24_helper(void) {} void t24() { __asm call t24_helper // CHECK: t24 -// CHECK: call void asm sideeffect inteldialect "call dword ptr ${0:P}", "*m,~{dirflag},~{fpsr},~{flags}"(void ()* @t24_helper) +// CHECK: call void asm sideeffect inteldialect "call dword ptr ${0:P}", "*m,~{dirflag},~{fpsr},~{flags}"(void ()* elementtype(void ()) @t24_helper) } void t25() { @@ -376,7 +376,7 @@ void t29() { // CHECK: mov dword ptr $1, $$8 __asm mov otype, TYPE arr // CHECK: mov dword ptr $2, $$4 -// CHECK: "=*m,=*m,=*m,~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}) +// CHECK: "=*m,=*m,=*m,~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}, i32* elementtype(i32) %{{.*}}, i32* elementtype(i32) %{{.*}}) } int results[2] = {13, 37}; @@ -389,7 +389,7 @@ int *t30() __asm mov res, edi // CHECK: mov $0, edi return res; -// CHECK: "=*m,={eax},*m,~{edi},~{dirflag},~{fpsr},~{flags}"(i32** %{{.*}}, [2 x i32]* @{{.*}}) +// CHECK: "=*m,={eax},*m,~{edi},~{dirflag},~{fpsr},~{flags}"(i32** elementtype(i32*) %{{.*}}, [2 x i32]* elementtype([2 x i32]) @{{.*}}) } void t31() { @@ -412,7 +412,7 @@ void t32() { // CHECK: mov ax, word ptr $2 __asm mov al, byte ptr i // CHECK: mov al, byte ptr $3 -// CHECK: "*m,*m,*m,*m,~{al},~{ax},~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}) +// CHECK: "*m,*m,*m,*m,~{al},~{ax},~{eax},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}, i32* elementtype(i32) %{{.*}}, i32* elementtype(i32) %{{.*}}, i32* elementtype(i32) %{{.*}}) } void t33() { @@ -426,7 +426,7 @@ void t33() { // CHECK: mov ax, word ptr $2 __asm mov al, byte ptr [i] // CHECK: mov al, byte ptr $3 -// CHECK: "*m,*m,*m,*m,~{al},~{ax},~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}) +// CHECK: "*m,*m,*m,*m,~{al},~{ax},~{eax},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}, i32* elementtype(i32) %{{.*}}, i32* elementtype(i32) %{{.*}}, i32* elementtype(i32) %{{.*}}) } void t34() { @@ -452,31 +452,31 @@ void t36() { int arr[4]; // Work around PR20368: These should be single line blocks __asm { mov eax, 4[arr] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$4]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$4]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, 4[arr + 4] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$8]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$8]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, 8[arr + 4 + 32*2 - 4] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$72]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$72]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, 12[4 + arr] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$16]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$16]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, 4[4 + arr + 4] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$12]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$12]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, 4[64 + arr + (2*32)] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$132]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$132]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, 4[64 + arr - 2*32] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$4]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$4]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, [arr + 4] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$4]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$4]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, [arr + 4 + 32*2 - 4] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$64]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$64]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, [4 + arr] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$4]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$4]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, [4 + arr + 4] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$8]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$8]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, [64 + arr + (2*32)] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$128]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$128]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, [64 + arr - 2*32] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) } void t37() { @@ -507,21 +507,21 @@ void t38() { int arr[4]; // Work around PR20368: These should be single line blocks __asm { mov eax, 4+4[arr] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$8]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$8]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, (4+4)[arr + 4] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$12]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$12]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, 8*2[arr + 4 + 32*2 - 4] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$80]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$80]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, 12+20[4 + arr] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$36]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$36]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, 4*16+4[4 + arr + 4] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$76]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$76]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, 4*4[64 + arr + (2*32)] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$144]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$144]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, 4*(4-2)[64 + arr - 2*32] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$8]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$8]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) __asm { mov eax, 32*(4-2)[arr - 2*32] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"([4 x i32]* elementtype([4 x i32]) %{{.*}}) } void cpuid() { @@ -577,7 +577,7 @@ void t40(float a) { // CHECK: fld dword ptr $1 __asm fistp i // CHECK: fistp dword ptr $0 -// CHECK: "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}, float* %{{.*}}) +// CHECK: "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}, float* elementtype(float) %{{.*}}) } void t41(unsigned short a) { @@ -602,7 +602,7 @@ void t42() { int flags; __asm mov flags, eax // CHECK: mov $0, eax -// CHECK: "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %flags) +// CHECK: "=*m,~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %flags) } void t42b() { @@ -610,7 +610,7 @@ void t42b() { int mxcsr; __asm mov mxcsr, eax // CHECK: mov $0, eax -// CHECK: "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %mxcsr) +// CHECK: "=*m,~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %mxcsr) } void t43() { @@ -618,31 +618,31 @@ void t43() { C strct; // Work around PR20368: These should be single line blocks __asm { mov eax, 4[strct.c1] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$4]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$4]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}) __asm { mov eax, 4[strct.c3 + 4] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$8]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$8]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}) __asm { mov eax, 8[strct.c2.a + 4 + 32*2 - 4] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$72]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$72]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}) __asm { mov eax, 12[4 + strct.c2.b] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$16]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$16]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}) __asm { mov eax, 4[4 + strct.c4.b2.b + 4] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$12]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$12]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}) __asm { mov eax, 4[64 + strct.c1 + (2*32)] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$132]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$132]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}) __asm { mov eax, 4[64 + strct.c2.a - 2*32] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$4]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$4]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}) __asm { mov eax, [strct.c4.b1 + 4] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$4]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$4]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}) __asm { mov eax, [strct.c4.b2.a + 4 + 32*2 - 4] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$64]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$64]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}) __asm { mov eax, [4 + strct.c1] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$4]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$4]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}) __asm { mov eax, [4 + strct.c2.b + 4] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$8]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$8]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}) __asm { mov eax, [64 + strct.c3 + (2*32)] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$128]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0[$$128]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}) __asm { mov eax, [64 + strct.c4.b2.b - 2*32] } -// CHECK: call void asm sideeffect inteldialect "mov eax, $0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}) +// CHECK: call void asm sideeffect inteldialect "mov eax, $0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %{{.*}}) } void t44() { @@ -684,7 +684,7 @@ void dot_operator(){ void call_clobber() { __asm call t41 // CHECK-LABEL: define{{.*}} void @call_clobber - // CHECK: call void asm sideeffect inteldialect "call dword ptr ${0:P}", "*m,~{dirflag},~{fpsr},~{flags}"(void (i16)* @t41) + // CHECK: call void asm sideeffect inteldialect "call dword ptr ${0:P}", "*m,~{dirflag},~{fpsr},~{flags}"(void (i16)* elementtype(void (i16)) @t41) } void xgetbv() { diff --git a/clang/test/CodeGen/ms-inline-asm.cpp b/clang/test/CodeGen/ms-inline-asm.cpp index 16d9d0f89794..8fc57953f422 100644 --- a/clang/test/CodeGen/ms-inline-asm.cpp +++ b/clang/test/CodeGen/ms-inline-asm.cpp @@ -23,7 +23,7 @@ void t1() { // CHECK-SAME: mov eax, $2 // CHECK-SAME: mov eax, dword ptr $3 // CHECK-SAME: mov eax, dword ptr $4 -// CHECK-SAME: "*m,*m,*m,*m,*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32** @_ZN3Foo3ptrE, i32** @_ZN3Foo3Bar3ptrE, i32** @_ZN3Foo3ptrE, i32** @_ZN3Foo3ptrE, i32** @_ZN3Foo3ptrE) +// CHECK-SAME: "*m,*m,*m,*m,*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32** elementtype(i32*) @_ZN3Foo3ptrE, i32** elementtype(i32*) @_ZN3Foo3Bar3ptrE, i32** elementtype(i32*) @_ZN3Foo3ptrE, i32** elementtype(i32*) @_ZN3Foo3ptrE, i32** elementtype(i32*) @_ZN3Foo3ptrE) __asm mov eax, Foo ::ptr __asm mov eax, Foo :: Bar :: ptr __asm mov eax, [Foo:: ptr] @@ -92,7 +92,7 @@ void T4::test() { // CHECK: call void asm sideeffect inteldialect // CHECK-SAME: mov eax, $1 // CHECK-SAME: mov $0, eax -// CHECK-SAME: "=*m,*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* @_ZN2T41yE, i32* {{.*}}) +// CHECK-SAME: "=*m,*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) @_ZN2T41yE, i32* elementtype(i32) {{.*}}) } template struct T5 { @@ -111,7 +111,7 @@ void test5() { // CHECK-SAME: push $0 // CHECK-SAME: call dword ptr ${2:P} // CHECK-SAME: mov $1, eax - // CHECK-SAME: "=*m,=*m,*m,~{esp},~{dirflag},~{fpsr},~{flags}"(i32* %y, i32* %x, i32 (float)* @_ZN2T5IiE6createIfEEiT_) + // CHECK-SAME: "=*m,=*m,*m,~{esp},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %y, i32* elementtype(i32) %x, i32 (float)* elementtype(i32 (float)) @_ZN2T5IiE6createIfEEiT_) } // Just verify this doesn't emit an error. diff --git a/clang/test/CodeGen/ms-intrinsics.c b/clang/test/CodeGen/ms-intrinsics.c index 5399e4e1cd59..0b5d9e19cbf3 100644 --- a/clang/test/CodeGen/ms-intrinsics.c +++ b/clang/test/CodeGen/ms-intrinsics.c @@ -626,48 +626,48 @@ __int64 test_InterlockedDecrement64(__int64 volatile *Addend) { #if defined(__i386__) || defined(__x86_64__) long test_InterlockedExchange_HLEAcquire(long volatile *Target, long Value) { // CHECK-INTEL: define{{.*}} i32 @test_InterlockedExchange_HLEAcquire(i32*{{[a-z_ ]*}}%Target, i32{{[a-z_ ]*}}%Value) -// CHECK-INTEL: call i32 asm sideeffect ".byte 0xf2 ; lock ; xchg $($0, $1$|$1, $0$)", "=r,=*m,0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* %Target, i32 %Value, i32* %Target) +// CHECK-INTEL: call i32 asm sideeffect ".byte 0xf2 ; lock ; xchg $($0, $1$|$1, $0$)", "=r,=*m,0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %Target, i32 %Value, i32* elementtype(i32) %Target) return _InterlockedExchange_HLEAcquire(Target, Value); } long test_InterlockedExchange_HLERelease(long volatile *Target, long Value) { // CHECK-INTEL: define{{.*}} i32 @test_InterlockedExchange_HLERelease(i32*{{[a-z_ ]*}}%Target, i32{{[a-z_ ]*}}%Value) -// CHECK-INTEL: call i32 asm sideeffect ".byte 0xf3 ; lock ; xchg $($0, $1$|$1, $0$)", "=r,=*m,0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* %Target, i32 %Value, i32* %Target) +// CHECK-INTEL: call i32 asm sideeffect ".byte 0xf3 ; lock ; xchg $($0, $1$|$1, $0$)", "=r,=*m,0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %Target, i32 %Value, i32* elementtype(i32) %Target) return _InterlockedExchange_HLERelease(Target, Value); } long test_InterlockedCompareExchange_HLEAcquire(long volatile *Destination, long Exchange, long Comparand) { // CHECK-INTEL: define{{.*}} i32 @test_InterlockedCompareExchange_HLEAcquire(i32*{{[a-z_ ]*}}%Destination, i32{{[a-z_ ]*}}%Exchange, i32{{[a-z_ ]*}}%Comparand) -// CHECK-INTEL: call i32 asm sideeffect ".byte 0xf2 ; lock ; cmpxchg $($2, $1$|$1, $2$)", "={ax},=*m,r,0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* %Destination, i32 %Exchange, i32 %Comparand, i32* %Destination) +// CHECK-INTEL: call i32 asm sideeffect ".byte 0xf2 ; lock ; cmpxchg $($2, $1$|$1, $2$)", "={ax},=*m,r,0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %Destination, i32 %Exchange, i32 %Comparand, i32* elementtype(i32) %Destination) return _InterlockedCompareExchange_HLEAcquire(Destination, Exchange, Comparand); } long test_InterlockedCompareExchange_HLERelease(long volatile *Destination, long Exchange, long Comparand) { // CHECK-INTEL: define{{.*}} i32 @test_InterlockedCompareExchange_HLERelease(i32*{{[a-z_ ]*}}%Destination, i32{{[a-z_ ]*}}%Exchange, i32{{[a-z_ ]*}}%Comparand) -// CHECK-INTEL: call i32 asm sideeffect ".byte 0xf3 ; lock ; cmpxchg $($2, $1$|$1, $2$)", "={ax},=*m,r,0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* %Destination, i32 %Exchange, i32 %Comparand, i32* %Destination) +// CHECK-INTEL: call i32 asm sideeffect ".byte 0xf3 ; lock ; cmpxchg $($2, $1$|$1, $2$)", "={ax},=*m,r,0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %Destination, i32 %Exchange, i32 %Comparand, i32* elementtype(i32) %Destination) return _InterlockedCompareExchange_HLERelease(Destination, Exchange, Comparand); } #endif #if defined(__x86_64__) __int64 test_InterlockedExchange64_HLEAcquire(__int64 volatile *Target, __int64 Value) { // CHECK-X64: define{{.*}} i64 @test_InterlockedExchange64_HLEAcquire(i64*{{[a-z_ ]*}}%Target, i64{{[a-z_ ]*}}%Value) -// CHECK-X64: call i64 asm sideeffect ".byte 0xf2 ; lock ; xchg $($0, $1$|$1, $0$)", "=r,=*m,0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* %Target, i64 %Value, i64* %Target) +// CHECK-X64: call i64 asm sideeffect ".byte 0xf2 ; lock ; xchg $($0, $1$|$1, $0$)", "=r,=*m,0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %Target, i64 %Value, i64* elementtype(i64) %Target) return _InterlockedExchange64_HLEAcquire(Target, Value); } __int64 test_InterlockedExchange64_HLERelease(__int64 volatile *Target, __int64 Value) { // CHECK-X64: define{{.*}} i64 @test_InterlockedExchange64_HLERelease(i64*{{[a-z_ ]*}}%Target, i64{{[a-z_ ]*}}%Value) -// CHECK-X64: call i64 asm sideeffect ".byte 0xf3 ; lock ; xchg $($0, $1$|$1, $0$)", "=r,=*m,0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* %Target, i64 %Value, i64* %Target) +// CHECK-X64: call i64 asm sideeffect ".byte 0xf3 ; lock ; xchg $($0, $1$|$1, $0$)", "=r,=*m,0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %Target, i64 %Value, i64* elementtype(i64) %Target) return _InterlockedExchange64_HLERelease(Target, Value); } __int64 test_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *Destination, __int64 Exchange, __int64 Comparand) { // CHECK-X64: define{{.*}} i64 @test_InterlockedCompareExchange64_HLEAcquire(i64*{{[a-z_ ]*}}%Destination, i64{{[a-z_ ]*}}%Exchange, i64{{[a-z_ ]*}}%Comparand) -// CHECK-X64: call i64 asm sideeffect ".byte 0xf2 ; lock ; cmpxchg $($2, $1$|$1, $2$)", "={ax},=*m,r,0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* %Destination, i64 %Exchange, i64 %Comparand, i64* %Destination) +// CHECK-X64: call i64 asm sideeffect ".byte 0xf2 ; lock ; cmpxchg $($2, $1$|$1, $2$)", "={ax},=*m,r,0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %Destination, i64 %Exchange, i64 %Comparand, i64* elementtype(i64) %Destination) return _InterlockedCompareExchange64_HLEAcquire(Destination, Exchange, Comparand); } __int64 test_InterlockedCompareExchange64_HLERelease(__int64 volatile *Destination, __int64 Exchange, __int64 Comparand) { // CHECK-X64: define{{.*}} i64 @test_InterlockedCompareExchange64_HLERelease(i64*{{[a-z_ ]*}}%Destination, i64{{[a-z_ ]*}}%Exchange, i64{{[a-z_ ]*}}%Comparand) -// CHECK-X64: call i64 asm sideeffect ".byte 0xf3 ; lock ; cmpxchg $($2, $1$|$1, $2$)", "={ax},=*m,r,0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* %Destination, i64 %Exchange, i64 %Comparand, i64* %Destination) +// CHECK-X64: call i64 asm sideeffect ".byte 0xf3 ; lock ; cmpxchg $($2, $1$|$1, $2$)", "={ax},=*m,r,0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) %Destination, i64 %Exchange, i64 %Comparand, i64* elementtype(i64) %Destination) return _InterlockedCompareExchange64_HLERelease(Destination, Exchange, Comparand); } #endif diff --git a/clang/test/CodeGen/mult-alt-generic.c b/clang/test/CodeGen/mult-alt-generic.c index f5546d45ccbb..433f2763472a 100644 --- a/clang/test/CodeGen/mult-alt-generic.c +++ b/clang/test/CodeGen/mult-alt-generic.c @@ -17,7 +17,7 @@ int marray[2]; // CHECK: @single_m void single_m() { - // CHECK: call void asm "foo $1,$0", "=*m,*m[[CLOBBERS:[a-zA-Z0-9@%{},~_$ ]*\"]](i32* {{[a-zA-Z0-9@%]+}}, i32* {{[a-zA-Z0-9@%]+}}) + // CHECK: call void asm "foo $1,$0", "=*m,*m[[CLOBBERS:[a-zA-Z0-9@%{},~_$ ]*\"]](i32* elementtype(i32) {{[a-zA-Z0-9@%]+}}, i32* elementtype(i32) {{[a-zA-Z0-9@%]+}}) asm("foo %1,%0" : "=m" (mout0) : "m" (min1)); } @@ -150,7 +150,7 @@ void single_p() // CHECK: @multi_m void multi_m() { - // CHECK: call void asm "foo $1,$0", "=*m|r,m|r[[CLOBBERS]](i32* {{[a-zA-Z0-9@%]+}}, i32 {{[a-zA-Z0-9@%]+}}) + // CHECK: call void asm "foo $1,$0", "=*m|r,m|r[[CLOBBERS]](i32* elementtype(i32) {{[a-zA-Z0-9@%]+}}, i32 {{[a-zA-Z0-9@%]+}}) asm("foo %1,%0" : "=m,r" (mout0) : "m,r" (min1)); } diff --git a/clang/test/CodeGen/mult-alt-x86.c b/clang/test/CodeGen/mult-alt-x86.c index c74c2841b957..85e65bf88168 100644 --- a/clang/test/CodeGen/mult-alt-x86.c +++ b/clang/test/CodeGen/mult-alt-x86.c @@ -123,144 +123,144 @@ void single_Y() // CHECK: @single_I void single_I() { - // CHECK: asm "foo $1,$0", "=*m,I[[CLOBBERS]](i32* @mout0, i32 1) + // CHECK: asm "foo $1,$0", "=*m,I[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 1) asm("foo %1,%0" : "=m" (mout0) : "I" (1)); } // CHECK: @single_J void single_J() { - // CHECK: asm "foo $1,$0", "=*m,J[[CLOBBERS]](i32* @mout0, i32 1) + // CHECK: asm "foo $1,$0", "=*m,J[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 1) asm("foo %1,%0" : "=m" (mout0) : "J" (1)); } // CHECK: @single_K void single_K() { - // CHECK: asm "foo $1,$0", "=*m,K[[CLOBBERS]](i32* @mout0, i32 1) + // CHECK: asm "foo $1,$0", "=*m,K[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 1) asm("foo %1,%0" : "=m" (mout0) : "K" (1)); } // CHECK: @single_L void single_L() { - // CHECK: asm "foo $1,$0", "=*m,L[[CLOBBERS]](i32* @mout0, i32 255) + // CHECK: asm "foo $1,$0", "=*m,L[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 255) asm("foo %1,%0" : "=m" (mout0) : "L" (0xff)); - // CHECK: asm "foo $1,$0", "=*m,L[[CLOBBERS]](i32* @mout0, i32 65535) + // CHECK: asm "foo $1,$0", "=*m,L[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 65535) asm("foo %1,%0" : "=m" (mout0) : "L" (0xffff)); - // CHECK: asm "foo $1,$0", "=*m,L[[CLOBBERS]](i32* @mout0, i32 -1) + // CHECK: asm "foo $1,$0", "=*m,L[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 -1) asm("foo %1,%0" : "=m" (mout0) : "L" (0xffffffff)); } // CHECK: @single_M void single_M() { - // CHECK: asm "foo $1,$0", "=*m,M[[CLOBBERS]](i32* @mout0, i32 1) + // CHECK: asm "foo $1,$0", "=*m,M[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 1) asm("foo %1,%0" : "=m" (mout0) : "M" (1)); } // CHECK: @single_N void single_N() { - // CHECK: asm "foo $1,$0", "=*m,N[[CLOBBERS]](i32* @mout0, i32 1) + // CHECK: asm "foo $1,$0", "=*m,N[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 1) asm("foo %1,%0" : "=m" (mout0) : "N" (1)); } // CHECK: @single_G void single_G() { - // CHECK: asm "foo $1,$0", "=*m,G[[CLOBBERS]](i32* @mout0, double {{1.[0]+e[+]*[0]+}}) + // CHECK: asm "foo $1,$0", "=*m,G[[CLOBBERS]](i32* elementtype(i32) @mout0, double {{1.[0]+e[+]*[0]+}}) asm("foo %1,%0" : "=m" (mout0) : "G" (1.0)); } // CHECK: @single_C void single_C() { - // CHECK: asm "foo $1,$0", "=*m,C[[CLOBBERS]](i32* @mout0, double {{1.[0]+e[+]*[0]+}}) + // CHECK: asm "foo $1,$0", "=*m,C[[CLOBBERS]](i32* elementtype(i32) @mout0, double {{1.[0]+e[+]*[0]+}}) asm("foo %1,%0" : "=m" (mout0) : "C" (1.0)); } // CHECK: @single_e void single_e() { - // CHECK: asm "foo $1,$0", "=*m,e[[CLOBBERS]](i32* @mout0, i32 1) + // CHECK: asm "foo $1,$0", "=*m,e[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 1) asm("foo %1,%0" : "=m" (mout0) : "e" (1)); } // CHECK: @single_Z void single_Z() { - // CHECK: asm "foo $1,$0", "=*m,Z[[CLOBBERS]](i32* @mout0, i32 1) + // CHECK: asm "foo $1,$0", "=*m,Z[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 1) asm("foo %1,%0" : "=m" (mout0) : "Z" (1)); } // CHECK: @multi_R void multi_R() { - // CHECK: asm "foo $1,$0", "=*r|R|m,r|R|m[[CLOBBERS]](i32* @mout0, i32 {{[a-zA-Z0-9@%]+}}) + // CHECK: asm "foo $1,$0", "=*r|R|m,r|R|m[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 {{[a-zA-Z0-9@%]+}}) asm("foo %1,%0" : "=r,R,m" (mout0) : "r,R,m" (min1)); } // CHECK: @multi_q void multi_q() { - // CHECK: asm "foo $1,$0", "=*r|q|m,r|q|m[[CLOBBERS]](i32* @mout0, i32 {{[a-zA-Z0-9@%]+}}) + // CHECK: asm "foo $1,$0", "=*r|q|m,r|q|m[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 {{[a-zA-Z0-9@%]+}}) asm("foo %1,%0" : "=r,q,m" (mout0) : "r,q,m" (min1)); } // CHECK: @multi_Q void multi_Q() { - // CHECK: asm "foo $1,$0", "=*r|Q|m,r|Q|m[[CLOBBERS]](i32* @mout0, i32 {{[a-zA-Z0-9@%]+}}) + // CHECK: asm "foo $1,$0", "=*r|Q|m,r|Q|m[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 {{[a-zA-Z0-9@%]+}}) asm("foo %1,%0" : "=r,Q,m" (mout0) : "r,Q,m" (min1)); } // CHECK: @multi_a void multi_a() { - // CHECK: asm "foo $1,$0", "=*r|{ax}|m,r|{ax}|m[[CLOBBERS]](i32* @mout0, i32 {{[a-zA-Z0-9@%]+}}) + // CHECK: asm "foo $1,$0", "=*r|{ax}|m,r|{ax}|m[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 {{[a-zA-Z0-9@%]+}}) asm("foo %1,%0" : "=r,a,m" (mout0) : "r,a,m" (min1)); } // CHECK: @multi_b void multi_b() { - // CHECK: asm "foo $1,$0", "=*r|{bx}|m,r|{bx}|m[[CLOBBERS]](i32* @mout0, i32 {{[a-zA-Z0-9@%]+}}) + // CHECK: asm "foo $1,$0", "=*r|{bx}|m,r|{bx}|m[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 {{[a-zA-Z0-9@%]+}}) asm("foo %1,%0" : "=r,b,m" (mout0) : "r,b,m" (min1)); } // CHECK: @multi_c void multi_c() { - // CHECK: asm "foo $1,$0", "=*r|{cx}|m,r|{cx}|m[[CLOBBERS]](i32* @mout0, i32 {{[a-zA-Z0-9@%]+}}) + // CHECK: asm "foo $1,$0", "=*r|{cx}|m,r|{cx}|m[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 {{[a-zA-Z0-9@%]+}}) asm("foo %1,%0" : "=r,c,m" (mout0) : "r,c,m" (min1)); } // CHECK: @multi_d void multi_d() { - // CHECK: asm "foo $1,$0", "=*r|{dx}|m,r|{dx}|m[[CLOBBERS]](i32* @mout0, i32 {{[a-zA-Z0-9@%]+}}) + // CHECK: asm "foo $1,$0", "=*r|{dx}|m,r|{dx}|m[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 {{[a-zA-Z0-9@%]+}}) asm("foo %1,%0" : "=r,d,m" (mout0) : "r,d,m" (min1)); } // CHECK: @multi_S void multi_S() { - // CHECK: asm "foo $1,$0", "=*r|{si}|m,r|{si}|m[[CLOBBERS]](i32* @mout0, i32 {{[a-zA-Z0-9@%]+}}) + // CHECK: asm "foo $1,$0", "=*r|{si}|m,r|{si}|m[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 {{[a-zA-Z0-9@%]+}}) asm("foo %1,%0" : "=r,S,m" (mout0) : "r,S,m" (min1)); } // CHECK: @multi_D void multi_D() { - // CHECK: asm "foo $1,$0", "=*r|{di}|m,r|{di}|m[[CLOBBERS]](i32* @mout0, i32 {{[a-zA-Z0-9@%]+}}) + // CHECK: asm "foo $1,$0", "=*r|{di}|m,r|{di}|m[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 {{[a-zA-Z0-9@%]+}}) asm("foo %1,%0" : "=r,D,m" (mout0) : "r,D,m" (min1)); } // CHECK: @multi_A void multi_A() { - // CHECK: asm "foo $1,$0", "=*r|A|m,r|A|m[[CLOBBERS]](i32* @mout0, i32 {{[a-zA-Z0-9@%]+}}) + // CHECK: asm "foo $1,$0", "=*r|A|m,r|A|m[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 {{[a-zA-Z0-9@%]+}}) asm("foo %1,%0" : "=r,A,m" (mout0) : "r,A,m" (min1)); } @@ -285,14 +285,14 @@ void multi_u() // CHECK: @multi_y void multi_y() { - // CHECK: asm "foo $1,$0", "=*r|y|m,r|y|m[[CLOBBERS]](double* @dout0, double {{[a-zA-Z0-9@%]+}}) + // CHECK: asm "foo $1,$0", "=*r|y|m,r|y|m[[CLOBBERS]](double* elementtype(double) @dout0, double {{[a-zA-Z0-9@%]+}}) asm("foo %1,%0" : "=r,y,m" (dout0) : "r,y,m" (din1)); } // CHECK: @multi_x void multi_x() { - // CHECK: asm "foo $1,$0", "=*r|x|m,r|x|m[[CLOBBERS]](double* @dout0, double {{[a-zA-Z0-9@%]+}}) + // CHECK: asm "foo $1,$0", "=*r|x|m,r|x|m[[CLOBBERS]](double* elementtype(double) @dout0, double {{[a-zA-Z0-9@%]+}}) asm("foo %1,%0" : "=r,x,m" (dout0) : "r,x,m" (din1)); } @@ -310,69 +310,69 @@ void multi_Y0() // CHECK: @multi_I void multi_I() { - // CHECK: asm "foo $1,$0", "=*r|m|m,r|I|m[[CLOBBERS]](i32* @mout0, i32 1) + // CHECK: asm "foo $1,$0", "=*r|m|m,r|I|m[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 1) asm("foo %1,%0" : "=r,m,m" (mout0) : "r,I,m" (1)); } // CHECK: @multi_J void multi_J() { - // CHECK: asm "foo $1,$0", "=*r|m|m,r|J|m[[CLOBBERS]](i32* @mout0, i32 1) + // CHECK: asm "foo $1,$0", "=*r|m|m,r|J|m[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 1) asm("foo %1,%0" : "=r,m,m" (mout0) : "r,J,m" (1)); } // CHECK: @multi_K void multi_K() { - // CHECK: asm "foo $1,$0", "=*r|m|m,r|K|m[[CLOBBERS]](i32* @mout0, i32 1) + // CHECK: asm "foo $1,$0", "=*r|m|m,r|K|m[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 1) asm("foo %1,%0" : "=r,m,m" (mout0) : "r,K,m" (1)); } // CHECK: @multi_L void multi_L() { - // CHECK: asm "foo $1,$0", "=*r|m|m,r|L|m[[CLOBBERS]](i32* @mout0, i32 1) + // CHECK: asm "foo $1,$0", "=*r|m|m,r|L|m[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 1) asm("foo %1,%0" : "=r,m,m" (mout0) : "r,L,m" (1)); } // CHECK: @multi_M void multi_M() { - // CHECK: asm "foo $1,$0", "=*r|m|m,r|M|m[[CLOBBERS]](i32* @mout0, i32 1) + // CHECK: asm "foo $1,$0", "=*r|m|m,r|M|m[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 1) asm("foo %1,%0" : "=r,m,m" (mout0) : "r,M,m" (1)); } // CHECK: @multi_N void multi_N() { - // CHECK: asm "foo $1,$0", "=*r|m|m,r|N|m[[CLOBBERS]](i32* @mout0, i32 1) + // CHECK: asm "foo $1,$0", "=*r|m|m,r|N|m[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 1) asm("foo %1,%0" : "=r,m,m" (mout0) : "r,N,m" (1)); } // CHECK: @multi_G void multi_G() { - // CHECK: asm "foo $1,$0", "=*r|m|m,r|G|m[[CLOBBERS]](i32* @mout0, double {{1.[0]+e[+]*[0]+}}) + // CHECK: asm "foo $1,$0", "=*r|m|m,r|G|m[[CLOBBERS]](i32* elementtype(i32) @mout0, double {{1.[0]+e[+]*[0]+}}) asm("foo %1,%0" : "=r,m,m" (mout0) : "r,G,m" (1.0)); } // CHECK: @multi_C void multi_C() { - // CHECK: asm "foo $1,$0", "=*r|m|m,r|C|m[[CLOBBERS]](i32* @mout0, double {{1.[0]+e[+]*[0]+}}) + // CHECK: asm "foo $1,$0", "=*r|m|m,r|C|m[[CLOBBERS]](i32* elementtype(i32) @mout0, double {{1.[0]+e[+]*[0]+}}) asm("foo %1,%0" : "=r,m,m" (mout0) : "r,C,m" (1.0)); } // CHECK: @multi_e void multi_e() { - // CHECK: asm "foo $1,$0", "=*r|m|m,r|e|m[[CLOBBERS]](i32* @mout0, i32 1) + // CHECK: asm "foo $1,$0", "=*r|m|m,r|e|m[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 1) asm("foo %1,%0" : "=r,m,m" (mout0) : "r,e,m" (1)); } // CHECK: @multi_Z void multi_Z() { - // CHECK: asm "foo $1,$0", "=*r|m|m,r|Z|m[[CLOBBERS]](i32* @mout0, i32 1) + // CHECK: asm "foo $1,$0", "=*r|m|m,r|Z|m[[CLOBBERS]](i32* elementtype(i32) @mout0, i32 1) asm("foo %1,%0" : "=r,m,m" (mout0) : "r,Z,m" (1)); } diff --git a/clang/test/CodeGen/ppc64-inline-asm.c b/clang/test/CodeGen/ppc64-inline-asm.c index 94c737a4e243..4f05f4533da6 100644 --- a/clang/test/CodeGen/ppc64-inline-asm.c +++ b/clang/test/CodeGen/ppc64-inline-asm.c @@ -41,12 +41,12 @@ double test_fmax(double x, double y) { void testZ(void *addr) { asm volatile ("dcbz %y0\n" :: "Z"(*(unsigned char *)addr) : "memory"); // CHECK-LABEL: void @testZ(i8* %addr) -// CHECK: call void asm sideeffect "dcbz ${0:y}\0A", "*Z,~{memory}"(i8* %addr) +// CHECK: call void asm sideeffect "dcbz ${0:y}\0A", "*Z,~{memory}"(i8* elementtype(i8) %addr) } void testZwOff(void *addr, long long off) { asm volatile ("dcbz %y0\n" :: "Z"(*(unsigned char *)(addr + off)) : "memory"); // CHECK-LABEL: void @testZwOff(i8* %addr, i64 %off) // CHECK: %[[VAL:[^ ]+]] = getelementptr i8, i8* %addr, i64 %off -// CHECK: call void asm sideeffect "dcbz ${0:y}\0A", "*Z,~{memory}"(i8* %[[VAL]]) +// CHECK: call void asm sideeffect "dcbz ${0:y}\0A", "*Z,~{memory}"(i8* elementtype(i8) %[[VAL]]) } diff --git a/clang/test/CodeGenCXX/ms-inline-asm-fields.cpp b/clang/test/CodeGenCXX/ms-inline-asm-fields.cpp index 0817f3e4ecc8..91b007e808ed 100644 --- a/clang/test/CodeGenCXX/ms-inline-asm-fields.cpp +++ b/clang/test/CodeGenCXX/ms-inline-asm-fields.cpp @@ -24,7 +24,7 @@ extern "C" int test_param_field(A p) { extern "C" int test_namespace_global() { // CHECK: define{{.*}} i32 @test_namespace_global() -// CHECK: call i32 asm sideeffect inteldialect "mov eax, $1", "{{.*}}"(i32* getelementptr inbounds (%struct.A, %struct.A* @_ZN4asdf8a_globalE, i32 0, i32 2, i32 1)) +// CHECK: call i32 asm sideeffect inteldialect "mov eax, $1", "{{.*}}"(i32* elementtype(i32) getelementptr inbounds (%struct.A, %struct.A* @_ZN4asdf8a_globalE, i32 0, i32 2, i32 1)) // CHECK: ret i32 __asm mov eax, asdf::a_global.a3.b2 } @@ -53,4 +53,4 @@ template void msvc_dcas_x86::store(); // CHECK: %[[P:.*]] = alloca %"struct.make_storage_type::type", align 4 // CHECK: %[[B:.*]] = getelementptr inbounds %"struct.make_storage_type::type", %"struct.make_storage_type::type"* %[[P]], i32 0, i32 0 // CHECK: %[[X:.*]] = getelementptr inbounds %"struct.make_storage_type::type::B", %"struct.make_storage_type::type::B"* %[[B]], i32 0, i32 1 -// CHECK: call void asm sideeffect inteldialect "mov edx, dword ptr $0", "*m,~{edx},~{dirflag},~{fpsr},~{flags}"(i32* %[[X]]) +// CHECK: call void asm sideeffect inteldialect "mov edx, dword ptr $0", "*m,~{edx},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %[[X]]) diff --git a/clang/test/CodeGenObjC/exceptions.m b/clang/test/CodeGenObjC/exceptions.m index 1db03be2516a..e330a652300d 100644 --- a/clang/test/CodeGenObjC/exceptions.m +++ b/clang/test/CodeGenObjC/exceptions.m @@ -54,14 +54,14 @@ int f2() { // CHECK-NEXT: br i1 [[CAUGHT]] @try { // Landing pad. Note that we elide the re-enter. - // CHECK: call void asm sideeffect "", "=*m,=*m"(i32* nonnull [[X]] + // CHECK: call void asm sideeffect "", "=*m,=*m"(i32* nonnull elementtype(i32) [[X]] // CHECK-NEXT: call i8* @objc_exception_extract // CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[X]] // CHECK-NEXT: [[T2:%.*]] = add nsw i32 [[T1]], -1 // CHECK: store i32 6, i32* [[X]] x++; - // CHECK-NEXT: call void asm sideeffect "", "*m,*m"(i32* nonnull [[X]] + // CHECK-NEXT: call void asm sideeffect "", "*m,*m"(i32* nonnull elementtype(i32) [[X]] // CHECK-NEXT: call void @foo() // CHECK-NEXT: call void @objc_exception_try_exit // CHECK-NEXT: [[T:%.*]] = load i32, i32* [[X]] diff --git a/clang/test/CodeGenObjC/synchronized.m b/clang/test/CodeGenObjC/synchronized.m index 44f4826d19dc..1ab25fadf3c0 100644 --- a/clang/test/CodeGenObjC/synchronized.m +++ b/clang/test/CodeGenObjC/synchronized.m @@ -32,7 +32,7 @@ void foo(id a) { // CHECK: call i32 @_setjmp @synchronized(a) { // This is unreachable, but the optimizers can't know that. - // CHECK: call void asm sideeffect "", "=*m,=*m,=*m"(i8** nonnull [[A]], i8** nonnull [[SYNC]] + // CHECK: call void asm sideeffect "", "=*m,=*m,=*m"(i8** nonnull elementtype(i8*) [[A]], i8** nonnull elementtype(i8*) [[SYNC]] // CHECK: call i32 @objc_sync_exit // CHECK: call i8* @objc_exception_extract // CHECK: call void @objc_exception_throw From 0e5f258452b053cc3374754efaeabe3c30f42482 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Thu, 6 Jan 2022 17:34:01 +0900 Subject: [PATCH 777/992] [mlir][linalg][bufferize][NFC] Simplify InsertSliceOp bufferization No need to keep track of equivalent extract_slice / insert_slice tensors during bufferization. Just emit a copy, it will fold away. Note: The analysis still keeps track of equivalent tensors to make the correct inplace bufferization decisions. Differential Revision: https://reviews.llvm.org/D116684 --- .../TensorInterfaceImpl.h | 8 -- .../TensorInterfaceImpl.cpp | 85 ++++--------------- .../Transforms/ComprehensiveBufferizePass.cpp | 3 - .../Linalg/TestComprehensiveBufferize.cpp | 3 - 4 files changed, 16 insertions(+), 83 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h index ca620138d643..29355ef338f3 100644 --- a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h +++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h @@ -9,8 +9,6 @@ #ifndef MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_TENSOR_INTERFACE_IMPL_H #define MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_TENSOR_INTERFACE_IMPL_H -#include "mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h" - namespace mlir { class DialectRegistry; @@ -19,12 +17,6 @@ namespace linalg { namespace comprehensive_bufferize { namespace tensor_ext { -struct InplaceInsertSliceOpAnalysis : public PostAnalysisStep { - LogicalResult run(Operation *op, BufferizationState &state, - BufferizationAliasInfo &aliasInfo, - SmallVector &newOps) override; -}; - void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry); } // namespace tensor_ext diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp index 0f91e52a5227..9ee1d23d5d8a 100644 --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp @@ -23,20 +23,6 @@ namespace tensor_ext { using tensor::ExtractSliceOp; using tensor::InsertSliceOp; -namespace { -/// Extra bufferization state that is required for bufferization of tensor ops. -struct TensorBufferizationState : public DialectBufferizationState { - /// InsertSliceOps that bufferize inplace and do not require a copy. - DenseSet insertSliceOpsWithoutCopy; -}; -} // namespace - -static TensorBufferizationState & -getTensorBufferizationState(BufferizationState &state) { - return state.getDialectState( - tensor::TensorDialect::getDialectNamespace()); -} - struct CastOpInterface : public BufferizableOpInterface::ExternalModel { @@ -274,23 +260,6 @@ areEquivalentExtractSliceOps(const BufferizationAliasInfo &aliasInfo, return true; } -/// Return true if the source of a `insertSliceOp` bufferizes to an -/// equivalent ExtractSliceOp that bufferizes inplace. -static bool isSourceEquivalentToAMatchingInplaceExtractSliceOp( - const BufferizationAliasInfo &aliasInfo, InsertSliceOp insertSliceOp) { - bool foundOp = false; - aliasInfo.applyOnEquivalenceClass(insertSliceOp.source(), [&](Value value) { - auto extractSliceOp = value.getDefiningOp(); - if (extractSliceOp && - areEquivalentExtractSliceOps(aliasInfo, extractSliceOp, - insertSliceOp) && - aliasInfo.isInPlace(extractSliceOp->getResult(0))) { - foundOp = true; - } - }); - return foundOp; -} - /// Return true if `value` is originating from an ExtractSliceOp that matches /// the given InsertSliceOp. static bool hasMatchingExtractSliceOp(const BufferizationAliasInfo &aliasInfo, @@ -419,7 +388,6 @@ struct InsertSliceOpInterface // TODO: be very loud about it or even consider failing the pass. auto insertSliceOp = cast(op); Location loc = insertSliceOp.getLoc(); - TensorBufferizationState &tensorState = getTensorBufferizationState(state); // When bufferizing out-of-place, `getResultBuffer` allocates. Value dstMemref = @@ -427,24 +395,22 @@ struct InsertSliceOpInterface if (!dstMemref) return failure(); - bool needCopy = - !tensorState.insertSliceOpsWithoutCopy.contains(insertSliceOp); - if (needCopy) { - // Take a subview of the dst. - auto dstMemrefType = dstMemref.getType().cast(); - auto subviewMemRefType = - memref::SubViewOp::inferRankReducedResultType( - insertSliceOp.getSourceType().getRank(), dstMemrefType, - insertSliceOp.getMixedOffsets(), insertSliceOp.getMixedSizes(), - insertSliceOp.getMixedStrides()) - .cast(); - Value subView = rewriter.create( - loc, subviewMemRefType, dstMemref, insertSliceOp.getMixedOffsets(), - insertSliceOp.getMixedSizes(), insertSliceOp.getMixedStrides()); - // Copy tensor. - Value srcMemref = state.lookupBuffer(rewriter, insertSliceOp.source()); - state.createMemCpy(rewriter, insertSliceOp.getLoc(), srcMemref, subView); - } + // Take a subview of the dst. + auto dstMemrefType = dstMemref.getType().cast(); + auto subviewMemRefType = + memref::SubViewOp::inferRankReducedResultType( + insertSliceOp.getSourceType().getRank(), dstMemrefType, + insertSliceOp.getMixedOffsets(), insertSliceOp.getMixedSizes(), + insertSliceOp.getMixedStrides()) + .cast(); + Value subView = rewriter.create( + loc, subviewMemRefType, dstMemref, insertSliceOp.getMixedOffsets(), + insertSliceOp.getMixedSizes(), insertSliceOp.getMixedStrides()); + + // Copy tensor. If this tensor.insert_slice has a matching + // tensor.extract_slice, the copy operation will eventually fold away. + Value srcMemref = state.lookupBuffer(rewriter, insertSliceOp.source()); + state.createMemCpy(rewriter, insertSliceOp.getLoc(), srcMemref, subView); state.replaceOp(rewriter, op, dstMemref); return success(); @@ -456,25 +422,6 @@ struct InsertSliceOpInterface } // namespace linalg } // namespace mlir -LogicalResult mlir::linalg::comprehensive_bufferize::tensor_ext:: - InplaceInsertSliceOpAnalysis::run(Operation *op, BufferizationState &state, - BufferizationAliasInfo &aliasInfo, - SmallVector &newOps) { - auto &tensorState = getTensorBufferizationState(state); - op->walk([&](InsertSliceOp insertSliceOp) { - // A copy of the source buffer is needed if either: - // - The producer of `source` is not inplace. This is the case where a - // slice is computed out of place into the inplace full tensor. - // - The result is not inplace. This is the case where the whole tensor is - // cloned and the clone needs to be updated. - if (isSourceEquivalentToAMatchingInplaceExtractSliceOp(aliasInfo, - insertSliceOp) && - state.isInPlace(insertSliceOp->getResult(0))) - tensorState.insertSliceOpsWithoutCopy.insert(insertSliceOp); - }); - return success(); -} - void mlir::linalg::comprehensive_bufferize::tensor_ext:: registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry) { registry.addOpInterface(); diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp index c5fdf402d941..13e18001d82e 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp @@ -94,9 +94,6 @@ void LinalgComprehensiveModuleBufferize::runOnOperation() { // Enable InitTensorOp elimination. options->addPostAnalysisStep< linalg_ext::InsertSliceAnchoredInitTensorEliminationStep>(); - // TODO: Find a way to enable this step automatically when bufferizing tensor - // dialect ops. - options->addPostAnalysisStep(); if (!allowReturnMemref) options->addPostAnalysisStep(); diff --git a/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp b/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp index f4a43aab1ebb..59e53eaba56a 100644 --- a/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp @@ -99,9 +99,6 @@ void TestComprehensiveFunctionBufferize::runOnFunction() { // Enable InitTensorOp elimination. options->addPostAnalysisStep< linalg_ext::InsertSliceAnchoredInitTensorEliminationStep>(); - // TODO: Find a way to enable this step automatically when bufferizing - // tensor dialect ops. - options->addPostAnalysisStep(); if (!allowReturnMemref) options->addPostAnalysisStep(); From bfc2f4b122a4b6c49129cd448e4c2fda306c9a52 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 6 Jan 2022 00:43:46 -0800 Subject: [PATCH 778/992] [ELF] Update help messages to prefer canonical name for some long options And improve the help message for --pop-state. --- lld/ELF/Options.td | 4 ++-- lld/ELF/Writer.cpp | 2 +- lld/docs/ld.lld.1 | 2 +- lld/test/ELF/execute-only-mixed-data.s | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index bddf13a3cb42..14b138e99da5 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -344,10 +344,10 @@ defm print_symbol_order: Eq<"print-symbol-order", "Print a symbol order specified by --call-graph-ordering-file into the specified file">; def pop_state: F<"pop-state">, - HelpText<"Undo the effect of -push-state">; + HelpText<"Restore the states saved by --push-state">; def push_state: F<"push-state">, - HelpText<"Save the current state of -as-needed, -static and -whole-archive">; + HelpText<"Save the current state of --as-needed, -static and --whole-archive">; def print_map: F<"print-map">, HelpText<"Print a link map to the standard output">; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 9db997cddfbe..d02584e9471b 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -2171,7 +2171,7 @@ template void Writer::checkExecuteOnly() { if (!(isec->flags & SHF_EXECINSTR)) error("cannot place " + toString(isec) + " into " + toString(osec->name) + - ": -execute-only does not support intermingling data and code"); + ": --execute-only does not support intermingling data and code"); } // The linker is expected to define SECNAME_start and SECNAME_end diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index 04f0982b4ced..da43cf0ef7ab 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -486,7 +486,7 @@ Save the current state of and .Fl -whole-archive. .It Fl -pop-state -Undo the effect of +Restore the states saved by .Fl -push-state. .It Fl -relocatable , Fl r Create relocatable object file. diff --git a/lld/test/ELF/execute-only-mixed-data.s b/lld/test/ELF/execute-only-mixed-data.s index 5a03d730893e..e0a31afb9e90 100644 --- a/lld/test/ELF/execute-only-mixed-data.s +++ b/lld/test/ELF/execute-only-mixed-data.s @@ -16,7 +16,7 @@ // RUN: }" > %t.lds // RUN: ld.lld -T%t.lds %t.o -o %t -execute-only 2>&1 -// CHECK: cannot place {{.*}}:(.rodata.foo) into .text: -execute-only does not support intermingling data and code +// CHECK: cannot place {{.*}}:(.rodata.foo) into .text: --execute-only does not support intermingling data and code br lr From 9c0ac101d7d1019595b6d02c60eaaf45fa8345fa Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Thu, 6 Jan 2022 11:40:36 +0800 Subject: [PATCH 779/992] [M68k][test][NFC] Add missing tests for arith inst with size byte or word --- .../test/MC/M68k/Arith/Classes/MxBiArOp_FMI.s | 9 ++++++ .../test/MC/M68k/Arith/Classes/MxBiArOp_FMR.s | 12 ++++++++ .../MC/M68k/Arith/Classes/MxBiArOp_RFRI.s | 6 ++++ .../MC/M68k/Arith/Classes/MxBiArOp_RFRI_xEA.s | 6 ++++ .../MC/M68k/Arith/Classes/MxBiArOp_RFRM.s | 21 +++++++++++++ .../MC/M68k/Arith/Classes/MxBiArOp_RFRRF.s | 6 ++++ .../MC/M68k/Arith/Classes/MxBiArOp_RFRR_EAd.s | 6 ++++ .../MC/M68k/Arith/Classes/MxBiArOp_RFRR_xEA.s | 6 ++++ llvm/test/MC/M68k/Arith/Classes/MxCMP_BI.s | 6 ++++ llvm/test/MC/M68k/Arith/Classes/MxCMP_MI.s | 30 +++++++++++++++++++ llvm/test/MC/M68k/Arith/Classes/MxCMP_RI.s | 6 ++++ llvm/test/MC/M68k/Arith/Classes/MxCMP_RM.s | 30 +++++++++++++++++++ llvm/test/MC/M68k/Arith/Classes/MxCMP_RR.s | 6 ++++ llvm/test/MC/M68k/Arith/Classes/MxNEG.s | 6 ++++ 14 files changed, 156 insertions(+) diff --git a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_FMI.s b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_FMI.s index 0e563c05e2eb..7a2bc96dade4 100644 --- a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_FMI.s +++ b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_FMI.s @@ -6,6 +6,9 @@ add.b #-1, (0,%a0,%d0) ; CHECK: add.b #0, (-1,%a3,%a1) ; CHECK-SAME: encoding: [0x06,0x33,0x00,0x00,0x98,0xff] add.b #0, (-1,%a3,%a1) +; CHECK: add.w #-1, (7,%a2,%d0) +; CHECK-SAME: encoding: [0x06,0x72,0xff,0xff,0x08,0x07] +add.w #-1, (7,%a2,%d0) ; CHECK: add.l #-1, (13,%a2,%d1) ; CHECK-SAME: encoding: [0x06,0xb2,0xff,0xff,0xff,0xff,0x18,0x0d] add.l #-1, (13,%a2,%d1) @@ -16,6 +19,9 @@ add.b #-1, (0,%a0) ; CHECK: add.b #0, (-1,%a3) ; CHECK-SAME: encoding: [0x06,0x2b,0x00,0x00,0xff,0xff] add.b #0, (-1,%a3) +; CHECK: add.w #-1, (7,%a1) +; CHECK-SAME: encoding: [0x06,0x69,0xff,0xff,0x00,0x07] +add.w #-1, (7,%a1) ; CHECK: add.l #-1, (13,%a2) ; CHECK-SAME: encoding: [0x06,0xaa,0xff,0xff,0xff,0xff,0x00,0x0d] add.l #-1, (13,%a2) @@ -26,6 +32,9 @@ add.b #-1, (%a0) ; CHECK: add.b #0, (%a3) ; CHECK-SAME: encoding: [0x06,0x13,0x00,0x00] add.b #0, (%a3) +; CHECK: add.w #-1, (%a1) +; CHECK-SAME: encoding: [0x06,0x51,0xff,0xff] +add.w #-1, (%a1) ; CHECK: add.l #-1, (%a2) ; CHECK-SAME: encoding: [0x06,0x92,0xff,0xff,0xff,0xff] add.l #-1, (%a2) diff --git a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_FMR.s b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_FMR.s index 7e3b9fcd055d..8a685c8fe621 100644 --- a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_FMR.s +++ b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_FMR.s @@ -6,6 +6,12 @@ add.b %d0, (0,%a0,%d1) ; CHECK: add.b %d0, (-1,%a0,%d1) ; CHECK-SAME: encoding: [0xd1,0x30,0x18,0xff] add.b %d0, (-1,%a0,%d1) +; CHECK: add.w %d0, (0,%a1,%d1) +; CHECK-SAME: encoding: [0xd1,0x71,0x18,0x00] +add.w %d0, (0,%a1,%d1) +; CHECK: add.w %d1, (0,%a2,%a2) +; CHECK-SAME: encoding: [0xd3,0x72,0xa8,0x00] +add.w %d1, (0,%a2,%a2) ; CHECK: add.l %d0, (0,%a1,%d1) ; CHECK-SAME: encoding: [0xd1,0xb1,0x18,0x00] add.l %d0, (0,%a1,%d1) @@ -16,6 +22,9 @@ add.l %d1, (0,%a2,%a2) ; CHECK: add.b %d0, (0,%a0) ; CHECK-SAME: encoding: [0xd1,0x28,0x00,0x00] add.b %d0, (0,%a0) +; CHECK: add.w %d1, (1,%a0) +; CHECK-SAME: encoding: [0xd3,0x68,0x00,0x01] +add.w %d1, (1,%a0) ; CHECK: add.l %d0, (-1,%a1) ; CHECK-SAME: encoding: [0xd1,0xa9,0xff,0xff] add.l %d0, (-1,%a1) @@ -23,6 +32,9 @@ add.l %d0, (-1,%a1) ; CHECK: add.b %d0, (%a0) ; CHECK-SAME: encoding: [0xd1,0x10] add.b %d0, (%a0) +; CHECK: add.w %d0, (%a1) +; CHECK-SAME: encoding: [0xd1,0x51] +add.w %d0, (%a1) ; CHECK: add.l %d3, (%a1) ; CHECK-SAME: encoding: [0xd7,0x91] add.l %d3, (%a1) diff --git a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRI.s b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRI.s index b37196dd8455..b33a730dc14d 100644 --- a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRI.s +++ b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRI.s @@ -1,5 +1,11 @@ ; RUN: llvm-mc -triple=m68k -show-encoding %s | FileCheck %s +; CHECK: eori.b #0, %d1 +; CHECK-SAME: encoding: [0x0a,0x01,0x00,0x00] +eori.b #0, %d1 +; CHECK: eori.b #-1, %d2 +; CHECK-SAME: encoding: [0x0a,0x02,0x00,0xff] +eori.b #-1, %d2 ; CHECK: eori.w #0, %d0 ; CHECK-SAME: encoding: [0x0a,0x40,0x00,0x00] eori.w #0, %d0 diff --git a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRI_xEA.s b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRI_xEA.s index aa832cf8a14f..856ece6b569f 100644 --- a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRI_xEA.s +++ b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRI_xEA.s @@ -1,5 +1,11 @@ ; RUN: llvm-mc -triple=m68k -show-encoding %s | FileCheck %s +; CHECK: add.b #0, %d1 +; CHECK-SAME: encoding: [0xd2,0x3c,0x00,0x00] +add.b #0, %d1 +; CHECK: add.b #-1, %d2 +; CHECK-SAME: encoding: [0xd4,0x3c,0x00,0xff] +add.b #-1, %d2 ; CHECK: add.w #0, %d0 ; CHECK-SAME: encoding: [0xd0,0x7c,0x00,0x00] add.w #0, %d0 diff --git a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRM.s b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRM.s index f7141cf6fec5..15e596856124 100644 --- a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRM.s +++ b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRM.s @@ -6,6 +6,12 @@ add.b (0,%pc,%d1), %d0 ; CHECK: add.b (-1,%pc,%d1), %d0 ; CHECK-SAME: encoding: [0xd0,0x3b,0x18,0xff] add.b (-1,%pc,%d1), %d0 +; CHECK: add.w (0,%pc,%d2), %d1 +; CHECK-SAME: encoding: [0xd2,0x7b,0x28,0x00] +add.w (0,%pc,%d2), %d1 +; CHECK: add.w (-1,%pc,%d2), %d2 +; CHECK-SAME: encoding: [0xd4,0x7b,0x28,0xff] +add.w (-1,%pc,%d2), %d2 ; CHECK: add.l (0,%pc,%d1), %d0 ; CHECK-SAME: encoding: [0xd0,0xbb,0x18,0x00] add.l (0,%pc,%d1), %d0 @@ -16,6 +22,9 @@ adda.l (0,%pc,%a2), %a1 ; CHECK: add.b (0,%pc), %d0 ; CHECK-SAME: encoding: [0xd0,0x3a,0x00,0x00] add.b (0,%pc), %d0 +; CHECK: add.w (1,%pc), %d1 +; CHECK-SAME: encoding: [0xd2,0x7a,0x00,0x01] +add.w (1,%pc), %d1 ; CHECK: add.l (-1,%pc), %d0 ; CHECK-SAME: encoding: [0xd0,0xba,0xff,0xff] add.l (-1,%pc), %d0 @@ -26,6 +35,12 @@ add.b (0,%a0,%d1), %d0 ; CHECK: add.b (-1,%a0,%d1), %d0 ; CHECK-SAME: encoding: [0xd0,0x30,0x18,0xff] add.b (-1,%a0,%d1), %d0 +; CHECK: add.w (0,%a1,%d1), %d0 +; CHECK-SAME: encoding: [0xd0,0x71,0x18,0x00] +add.w (0,%a1,%d1), %d0 +; CHECK: add.w (-1,%a1,%d2), %d0 +; CHECK-SAME: encoding: [0xd0,0x71,0x28,0xff] +add.w (-1,%a1,%d2), %d0 ; CHECK: add.l (0,%a1,%d1), %d0 ; CHECK-SAME: encoding: [0xd0,0xb1,0x18,0x00] add.l (0,%a1,%d1), %d0 @@ -36,6 +51,9 @@ adda.l (0,%a2,%a2), %a1 ; CHECK: add.b (0,%a0), %d0 ; CHECK-SAME: encoding: [0xd0,0x28,0x00,0x00] add.b (0,%a0), %d0 +; CHECK: add.w (1,%a2), %d0 +; CHECK-SAME: encoding: [0xd0,0x6a,0x00,0x01] +add.w (1,%a2), %d0 ; CHECK: add.l (-1,%a1), %d0 ; CHECK-SAME: encoding: [0xd0,0xa9,0xff,0xff] add.l (-1,%a1), %d0 @@ -43,6 +61,9 @@ add.l (-1,%a1), %d0 ; CHECK: add.b (%a0), %d0 ; CHECK-SAME: encoding: [0xd0,0x10] add.b (%a0), %d0 +; CHECK: add.w (%a2), %d0 +; CHECK-SAME: encoding: [0xd0,0x52] +add.w (%a2), %d0 ; CHECK: adda.l (%a1), %a3 ; CHECK-SAME: encoding: [0xd7,0xd1] adda.l (%a1), %a3 diff --git a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRRF.s b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRRF.s index adb26d373db0..3eda00ee8990 100644 --- a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRRF.s +++ b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRRF.s @@ -1,5 +1,11 @@ ; RUN: llvm-mc -triple=m68k -show-encoding %s | FileCheck %s +; CHECK: addx.b %d1, %d0 +; CHECK-SAME: encoding: [0xd1,0x01] +addx.b %d1, %d0 +; CHECK: addx.b %d4, %d5 +; CHECK-SAME: encoding: [0xdb,0x04] +addx.b %d4, %d5 ; CHECK: addx.w %d1, %d0 ; CHECK-SAME: encoding: [0xd1,0x41] addx.w %d1, %d0 diff --git a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRR_EAd.s b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRR_EAd.s index ef4fc90c6993..9351a14ee3d6 100644 --- a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRR_EAd.s +++ b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRR_EAd.s @@ -1,5 +1,11 @@ ; RUN: llvm-mc -triple=m68k -show-encoding %s | FileCheck %s +; CHECK: eor.b %d1, %d0 +; CHECK-SAME: encoding: [0xb3,0x00] +eor.b %d1, %d0 +; CHECK: eor.b %d4, %d5 +; CHECK-SAME: encoding: [0xb9,0x05] +eor.b %d4, %d5 ; CHECK: eor.w %d1, %d0 ; CHECK-SAME: encoding: [0xb3,0x40] eor.w %d1, %d0 diff --git a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRR_xEA.s b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRR_xEA.s index 8e4c29c5726b..3096dedab884 100644 --- a/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRR_xEA.s +++ b/llvm/test/MC/M68k/Arith/Classes/MxBiArOp_RFRR_xEA.s @@ -1,5 +1,11 @@ ; RUN: llvm-mc -triple=m68k -show-encoding %s | FileCheck %s +; CHECK: add.b %d1, %d0 +; CHECK-SAME: encoding: [0xd0,0x01] +add.b %d1, %d0 +; CHECK: add.b %d4, %d5 +; CHECK-SAME: encoding: [0xda,0x04] +add.b %d4, %d5 ; CHECK: add.w %d1, %d0 ; CHECK-SAME: encoding: [0xd0,0x41] add.w %d1, %d0 diff --git a/llvm/test/MC/M68k/Arith/Classes/MxCMP_BI.s b/llvm/test/MC/M68k/Arith/Classes/MxCMP_BI.s index c1248059a086..b1c442a4a5de 100644 --- a/llvm/test/MC/M68k/Arith/Classes/MxCMP_BI.s +++ b/llvm/test/MC/M68k/Arith/Classes/MxCMP_BI.s @@ -6,6 +6,12 @@ cmpi.b #0, $ffffffffffffffff ; CHECK: cmpi.b #-1, $0 ; CHECK-SAME: encoding: [0x0c,0x39,0x00,0xff,0x00,0x00,0x00,0x00] cmpi.b #-1, $0 +; CHECK: cmpi.w #0, $0 +; CHECK-SAME: encoding: [0x0c,0x79,0x00,0x00,0x00,0x00,0x00,0x00] +cmpi.w #0, $0 +; CHECK: cmpi.w #37, $ffffffffffffffff +; CHECK-SAME: encoding: [0x0c,0x79,0x00,0x25,0xff,0xff,0xff,0xff] +cmpi.w #37, $ffffffffffffffff ; CHECK: cmpi.l #-1, $0 ; CHECK-SAME: encoding: [0x0c,0xb9,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00] cmpi.l #-1, $0 diff --git a/llvm/test/MC/M68k/Arith/Classes/MxCMP_MI.s b/llvm/test/MC/M68k/Arith/Classes/MxCMP_MI.s index e53ce21e735b..01f2a4279af4 100644 --- a/llvm/test/MC/M68k/Arith/Classes/MxCMP_MI.s +++ b/llvm/test/MC/M68k/Arith/Classes/MxCMP_MI.s @@ -6,6 +6,12 @@ cmpi.b #0, (-1,%pc,%d1) ; CHECK: cmpi.b #-1, (0,%pc,%d0) ; CHECK-SAME: encoding: [0x0c,0x3b,0x00,0xff,0x08,0x00] cmpi.b #-1, (0,%pc,%d0) +; CHECK: cmpi.w #0, (-1,%pc,%d3) +; CHECK-SAME: encoding: [0x0c,0x7b,0x00,0x00,0x38,0xff] +cmpi.w #0, (-1,%pc,%d3) +; CHECK: cmpi.w #37, (-2,%pc,%d2) +; CHECK-SAME: encoding: [0x0c,0x7b,0x00,0x25,0x28,0xfe] +cmpi.w #37, (-2,%pc,%d2) ; CHECK: cmpi.l #-1, (0,%pc,%d7) ; CHECK-SAME: encoding: [0x0c,0xbb,0xff,0xff,0xff,0xff,0x78,0x00] cmpi.l #-1, (0,%pc,%d7) @@ -19,6 +25,12 @@ cmpi.b #0, (0,%pc) ; CHECK: cmpi.b #-1, (-1,%pc) ; CHECK-SAME: encoding: [0x0c,0x3a,0x00,0xff,0xff,0xff] cmpi.b #-1, (-1,%pc) +; CHECK: cmpi.w #0, (-1,%pc) +; CHECK-SAME: encoding: [0x0c,0x7a,0x00,0x00,0xff,0xff] +cmpi.w #0, (-1,%pc) +; CHECK: cmpi.w #37, (-2,%pc) +; CHECK-SAME: encoding: [0x0c,0x7a,0x00,0x25,0xff,0xfe] +cmpi.w #37, (-2,%pc) ; CHECK: cmpi.l #-1, (0,%pc) ; CHECK-SAME: encoding: [0x0c,0xba,0xff,0xff,0xff,0xff,0x00,0x00] cmpi.l #-1, (0,%pc) @@ -32,6 +44,12 @@ cmpi.b #0, (-1,%a1,%a0) ; CHECK: cmpi.b #-1, (0,%a0,%a0) ; CHECK-SAME: encoding: [0x0c,0x30,0x00,0xff,0x88,0x00] cmpi.b #-1, (0,%a0,%a0) +; CHECK: cmpi.w #0, (-1,%a1,%a0) +; CHECK-SAME: encoding: [0x0c,0x71,0x00,0x00,0x88,0xff] +cmpi.w #0, (-1,%a1,%a0) +; CHECK: cmpi.w #37, (-2,%a0,%a0) +; CHECK-SAME: encoding: [0x0c,0x70,0x00,0x25,0x88,0xfe] +cmpi.w #37, (-2,%a0,%a0) ; CHECK: cmpi.l #-1, (0,%a6,%a0) ; CHECK-SAME: encoding: [0x0c,0xb6,0xff,0xff,0xff,0xff,0x88,0x00] cmpi.l #-1, (0,%a6,%a0) @@ -45,6 +63,12 @@ cmpi.b #0, (-1,%a1) ; CHECK: cmpi.b #-1, (0,%a0) ; CHECK-SAME: encoding: [0x0c,0x28,0x00,0xff,0x00,0x00] cmpi.b #-1, (0,%a0) +; CHECK: cmpi.w #0, (-1,%a1) +; CHECK-SAME: encoding: [0x0c,0x69,0x00,0x00,0xff,0xff] +cmpi.w #0, (-1,%a1) +; CHECK: cmpi.w #37, (-2,%a0) +; CHECK-SAME: encoding: [0x0c,0x68,0x00,0x25,0xff,0xfe] +cmpi.w #37, (-2,%a0) ; CHECK: cmpi.l #-1, (0,%a6) ; CHECK-SAME: encoding: [0x0c,0xae,0xff,0xff,0xff,0xff,0x00,0x00] cmpi.l #-1, (0,%a6) @@ -58,6 +82,12 @@ cmpi.b #0, (%a1) ; CHECK: cmpi.b #-1, (%a0) ; CHECK-SAME: encoding: [0x0c,0x10,0x00,0xff] cmpi.b #-1, (%a0) +; CHECK: cmpi.w #0, (%a1) +; CHECK-SAME: encoding: [0x0c,0x51,0x00,0x00] +cmpi.w #0, (%a1) +; CHECK: cmpi.w #37, (%a0) +; CHECK-SAME: encoding: [0x0c,0x50,0x00,0x25] +cmpi.w #37, (%a0) ; CHECK: cmpi.l #-1, (%a6) ; CHECK-SAME: encoding: [0x0c,0x96,0xff,0xff,0xff,0xff] cmpi.l #-1, (%a6) diff --git a/llvm/test/MC/M68k/Arith/Classes/MxCMP_RI.s b/llvm/test/MC/M68k/Arith/Classes/MxCMP_RI.s index 14b0735effa8..00048b1daf76 100644 --- a/llvm/test/MC/M68k/Arith/Classes/MxCMP_RI.s +++ b/llvm/test/MC/M68k/Arith/Classes/MxCMP_RI.s @@ -6,6 +6,12 @@ cmpi.b #0, %d1 ; CHECK: cmpi.b #-1, %d0 ; CHECK-SAME: encoding: [0x0c,0x00,0x00,0xff] cmpi.b #-1, %d0 +; CHECK: cmpi.w #0, %d3 +; CHECK-SAME: encoding: [0x0c,0x43,0x00,0x00] +cmpi.w #0, %d3 +; CHECK: cmpi.w #37, %d2 +; CHECK-SAME: encoding: [0x0c,0x42,0x00,0x25] +cmpi.w #37, %d2 ; CHECK: cmpi.l #13, %d7 ; CHECK-SAME: encoding: [0x0c,0x87,0x00,0x00,0x00,0x0d] cmpi.l #13, %d7 diff --git a/llvm/test/MC/M68k/Arith/Classes/MxCMP_RM.s b/llvm/test/MC/M68k/Arith/Classes/MxCMP_RM.s index 772268e100e7..1cbe507d3e5c 100644 --- a/llvm/test/MC/M68k/Arith/Classes/MxCMP_RM.s +++ b/llvm/test/MC/M68k/Arith/Classes/MxCMP_RM.s @@ -6,6 +6,12 @@ cmp.b (0,%pc,%d1), %d0 ; CHECK: cmp.b (-1,%pc,%d1), %d0 ; CHECK-SAME: encoding: [0xb0,0x3b,0x18,0xff] cmp.b (-1,%pc,%d1), %d0 +; CHECK: cmp.w (0,%pc,%d1), %d0 +; CHECK-SAME: encoding: [0xb0,0x7b,0x18,0x00] +cmp.w (0,%pc,%d1), %d0 +; CHECK: cmp.w (-1,%pc,%d1), %d0 +; CHECK-SAME: encoding: [0xb0,0x7b,0x18,0xff] +cmp.w (-1,%pc,%d1), %d0 ; CHECK: cmp.l (0,%pc,%d1), %d0 ; CHECK-SAME: encoding: [0xb0,0xbb,0x18,0x00] cmp.l (0,%pc,%d1), %d0 @@ -19,6 +25,12 @@ cmp.b (0,%pc), %d0 ; CHECK: cmp.b (-1,%pc), %d0 ; CHECK-SAME: encoding: [0xb0,0x3a,0xff,0xff] cmp.b (-1,%pc), %d0 +; CHECK: cmp.w (0,%pc), %d0 +; CHECK-SAME: encoding: [0xb0,0x7a,0x00,0x00] +cmp.w (0,%pc), %d0 +; CHECK: cmp.w (-1,%pc), %d0 +; CHECK-SAME: encoding: [0xb0,0x7a,0xff,0xff] +cmp.w (-1,%pc), %d0 ; CHECK: cmp.l (0,%pc), %d0 ; CHECK-SAME: encoding: [0xb0,0xba,0x00,0x00] cmp.l (0,%pc), %d0 @@ -32,6 +44,12 @@ cmp.b (0,%a0,%d1), %d0 ; CHECK: cmp.b (-1,%a0,%d1), %d0 ; CHECK-SAME: encoding: [0xb0,0x30,0x18,0xff] cmp.b (-1,%a0,%d1), %d0 +; CHECK: cmp.w (0,%a3,%d2), %d1 +; CHECK-SAME: encoding: [0xb2,0x73,0x28,0x00] +cmp.w (0,%a3,%d2), %d1 +; CHECK: cmp.w (-1,%a4,%d1), %d0 +; CHECK-SAME: encoding: [0xb0,0x74,0x18,0xff] +cmp.w (-1,%a4,%d1), %d0 ; CHECK: cmp.l (0,%a1,%d1), %d0 ; CHECK-SAME: encoding: [0xb0,0xb1,0x18,0x00] cmp.l (0,%a1,%d1), %d0 @@ -45,6 +63,12 @@ cmp.b (0,%a0), %d0 ; CHECK: cmp.b (-1,%a1), %d0 ; CHECK-SAME: encoding: [0xb0,0x29,0xff,0xff] cmp.b (-1,%a1), %d0 +; CHECK: cmp.w (0,%a0), %d0 +; CHECK-SAME: encoding: [0xb0,0x68,0x00,0x00] +cmp.w (0,%a0), %d0 +; CHECK: cmp.w (-1,%a1), %d0 +; CHECK-SAME: encoding: [0xb0,0x69,0xff,0xff] +cmp.w (-1,%a1), %d0 ; CHECK: cmp.l (0,%a0), %d0 ; CHECK-SAME: encoding: [0xb0,0xa8,0x00,0x00] cmp.l (0,%a0), %d0 @@ -58,6 +82,12 @@ cmp.b (%a0), %d0 ; CHECK: cmp.b (%a0), %d1 ; CHECK-SAME: encoding: [0xb2,0x10] cmp.b (%a0), %d1 +; CHECK: cmp.w (%a1), %d0 +; CHECK-SAME: encoding: [0xb0,0x51] +cmp.w (%a1), %d0 +; CHECK: cmp.w (%a1), %d1 +; CHECK-SAME: encoding: [0xb2,0x51] +cmp.w (%a1), %d1 ; CHECK: cmp.l (%a1), %d2 ; CHECK-SAME: encoding: [0xb4,0x91] cmp.l (%a1), %d2 diff --git a/llvm/test/MC/M68k/Arith/Classes/MxCMP_RR.s b/llvm/test/MC/M68k/Arith/Classes/MxCMP_RR.s index 1eb4bdbdab4d..8045710b739d 100644 --- a/llvm/test/MC/M68k/Arith/Classes/MxCMP_RR.s +++ b/llvm/test/MC/M68k/Arith/Classes/MxCMP_RR.s @@ -6,6 +6,12 @@ cmp.b %d0, %d1 ; CHECK: cmp.b %d3, %d2 ; CHECK-SAME: encoding: [0xb4,0x03] cmp.b %d3, %d2 +; CHECK: cmp.w %d4, %d5 +; CHECK-SAME: encoding: [0xba,0x44] +cmp.w %d4, %d5 +; CHECK: cmp.w %d2, %d3 +; CHECK-SAME: encoding: [0xb6,0x42] +cmp.w %d2, %d3 ; CHECK: cmp.l %d0, %d1 ; CHECK-SAME: encoding: [0xb2,0x80] cmp.l %d0, %d1 diff --git a/llvm/test/MC/M68k/Arith/Classes/MxNEG.s b/llvm/test/MC/M68k/Arith/Classes/MxNEG.s index 78286485cdbd..cf26646ad755 100644 --- a/llvm/test/MC/M68k/Arith/Classes/MxNEG.s +++ b/llvm/test/MC/M68k/Arith/Classes/MxNEG.s @@ -3,6 +3,9 @@ ; CHECK: neg.b %d0 ; CHECK-SAME: encoding: [0x44,0x00] neg.b %d0 +; CHECK: neg.w %d0 +; CHECK-SAME: encoding: [0x44,0x40] +neg.w %d0 ; CHECK: neg.l %d0 ; CHECK-SAME: encoding: [0x44,0x80] neg.l %d0 @@ -10,6 +13,9 @@ neg.l %d0 ; CHECK: negx.b %d0 ; CHECK-SAME: encoding: [0x40,0x00] negx.b %d0 +; CHECK: negx.w %d0 +; CHECK-SAME: encoding: [0x40,0x40] +negx.w %d0 ; CHECK: negx.l %d0 ; CHECK-SAME: encoding: [0x40,0x80] negx.l %d0 From 18e08fbd01bfc1efeccbdb0278660487c20eccba Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Thu, 6 Jan 2022 17:49:21 +0900 Subject: [PATCH 780/992] [mlir][linalg][bufferize] Fix tiled_loop bufferization Until now, bufferization assumed that the yieleded tensor of a linalg.tiled_loop is an output tensor. This is not necessarily the case. Differential Revision: https://reviews.llvm.org/D116685 --- .../LinalgInterfaceImpl.cpp | 18 ++++++++-- .../comprehensive-module-bufferize.mlir | 35 +++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp index dd9f12311754..536664a6dfb7 100644 --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp @@ -323,9 +323,23 @@ struct TiledLoopOpInterface newBlockArgs); // Replace previous terminator with a new one that does not yield anything. - Operation *oldTerminator = newTiledLoopOp.getBody()->getTerminator(); + auto oldTerminator = + cast(newTiledLoopOp.getBody()->getTerminator()); rewriter.setInsertionPointToEnd(newTiledLoopOp.getBody()); - rewriter.create(oldTerminator->getLoc()); + auto newTerminator = + rewriter.create(oldTerminator->getLoc()); + + // Copy buffer of yielded tensor to output buffer. If everything bufferized + // inplace, this copy will fold away. + rewriter.setInsertionPoint(newTerminator); + for (auto it : llvm::zip(oldTerminator.values(), newOutputs)) { + Value output = std::get<1>(it); + Value toMemrefOp = rewriter.create( + newTerminator.getLoc(), output.getType(), std::get<0>(it)); + state.createMemCpy(rewriter, newTerminator.getLoc(), toMemrefOp, output); + } + + // Erase old terminator. rewriter.eraseOp(oldTerminator); // Replace results and delete old op. diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir index 8f08e37c6774..30fad7a2b928 100644 --- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir @@ -576,6 +576,7 @@ func @tiled_dot(%A: tensor, %B: tensor, %c: tensor {linalg.in %0 = tensor.dim %A, %c0 : tensor // CHECK: linalg.tiled_loop {{.*}} to (%[[M]]) {{.*}} %[[A]]{{.*}}%[[B]]{{.*}}outs{{.*}}%[[c]] + // CHECK-NOT: copy %1 = linalg.tiled_loop (%arg3) = (%c0) to (%0) step (%c3) ins (%arg4 = %A: tensor, %use = %effecting : memref, %arg5 = %B: tensor) outs (%arg6 = %c: tensor) @@ -655,6 +656,40 @@ func @tiled_fill(%A: tensor {linalg.inplaceable = true}) -> tensor // ----- +// CHECK: func @tiled_loop_yield_out_of_place( +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref, +// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref +func @tiled_loop_yield_out_of_place( + %A: tensor {linalg.inplaceable = true}, + %B: tensor {linalg.inplaceable = true}) + -> tensor +{ + %c3 = arith.constant 3 : index + %c0 = arith.constant 0 : index + %f0 = arith.constant 0.0 : f32 + + // CHECK: %[[M:.*]] = memref.dim %[[A]], {{.*}} : memref + %0 = tensor.dim %A, %c0 : tensor + + // CHECK: linalg.tiled_loop {{.*}} to (%[[M]]) {{.*}} outs{{.*}}%[[A]] + %1 = linalg.tiled_loop (%arg3) = (%c0) to (%0) step (%c3) + outs (%arg1 = %A: tensor) + iterators["parallel"] + { + // CHECK-NOT: alloc + // CHECK: linalg.copy(%[[B]], %[[A]]) + linalg.yield %B : tensor + // CHECK: linalg.yield + // CHECK-NOT: tensor + } + + // CHECK: return + // CHECK-NOT: tensor + return %1 : tensor +} + +// ----- + // CHECK: #[[$DYNAMIC:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> // CHECK: func private @external_func(memref) From 37c9171764ee1a1ce34cd5cf984fee818cd617fb Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 6 Jan 2022 09:55:56 +0100 Subject: [PATCH 781/992] [ConstantFold] Add test for invalid non-inbounds gep icmp fold The gep evaluated to null in this case, and as such is not ne null. --- .../Transforms/InstSimplify/ConstProp/icmp-global.ll | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll index aeb6ab4e504f..bac75d3788d5 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll @@ -92,8 +92,16 @@ define i1 @global_sgt_null() { ret i1 %cmp } -define i1 @global_gep_ne_null() { -; CHECK-LABEL: @global_gep_ne_null( +define i1 @global_out_of_bounds_gep_ne_null() { +; CHECK-LABEL: @global_out_of_bounds_gep_ne_null( +; CHECK-NEXT: ret i1 true +; + %cmp = icmp ne i8* getelementptr (i8, i8* @g3, i64 sub (i64 0, i64 ptrtoint (i8* @g3 to i64))), null + ret i1 %cmp +} + +define i1 @global_inbounds_gep_ne_null() { +; CHECK-LABEL: @global_inbounds_gep_ne_null( ; CHECK-NEXT: ret i1 true ; %gep = getelementptr inbounds [2 x i32], [2 x i32]* @g, i64 1 From c41aa41957c102fdbe1e92c31fd1aec1c5fccbd5 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 6 Jan 2022 09:58:31 +0100 Subject: [PATCH 782/992] [ConstFold] Add missing check for inbounds gep If the gep is not inbounds, then the gep might compute a null value even if the base pointer is non-null. --- llvm/lib/IR/ConstantFold.cpp | 2 +- llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 0d76dd732d61..9f1d76b0c768 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1537,7 +1537,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, if (const GlobalValue *GV = dyn_cast(CE1Op0)) { // If its not weak linkage, the GVal must have a non-zero address // so the result is greater-than - if (!GV->hasExternalWeakLinkage()) + if (!GV->hasExternalWeakLinkage() && CE1GEP->isInBounds()) return ICmpInst::ICMP_UGT; } } else if (const GlobalValue *GV2 = dyn_cast(V2)) { diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll index bac75d3788d5..67f007982a62 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/icmp-global.ll @@ -92,9 +92,10 @@ define i1 @global_sgt_null() { ret i1 %cmp } +; Should not fold to true, as the gep computes a null value. define i1 @global_out_of_bounds_gep_ne_null() { ; CHECK-LABEL: @global_out_of_bounds_gep_ne_null( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: ret i1 icmp ne (i8* getelementptr (i8, i8* @g3, i64 sub (i64 0, i64 ptrtoint (i8* @g3 to i64))), i8* null) ; %cmp = icmp ne i8* getelementptr (i8, i8* @g3, i64 sub (i64 0, i64 ptrtoint (i8* @g3 to i64))), null ret i1 %cmp From 6e4bbbfcc8327465440da38d86f1f4b37d452e43 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 6 Jan 2022 01:02:14 -0800 Subject: [PATCH 783/992] [ELF] Enforce double-dash form for --color-diagnostics/--rsp-quoting/--symbol-ordering-file They are LLD-specific and by convention we enforce the double-dash form to avoid collision with short options (e.g. weird `-c olor-diagnostics` interpretation in GNU ld). They are rarely used and to the best of my investigation the undesired single-dash forms are not used in the wild. --- lld/ELF/Options.td | 8 ++++---- lld/test/ELF/color-diagnostics.test | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index 14b138e99da5..ca9fdcde791f 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -119,10 +119,10 @@ defm call_graph_profile_sort: BB<"call-graph-profile-sort", // --chroot doesn't have a help text because it is an internal option. def chroot: Separate<["--"], "chroot">; -defm color_diagnostics: B<"color-diagnostics", +defm color_diagnostics: BB<"color-diagnostics", "Alias for --color-diagnostics=always", "Alias for --color-diagnostics=never">; -def color_diagnostics_eq: J<"color-diagnostics=">, +def color_diagnostics_eq: JJ<"color-diagnostics=">, HelpText<"Use colors in diagnostics (default: auto)">, MetaVarName<"[auto,always,never]">; @@ -395,7 +395,7 @@ def strip_all: F<"strip-all">, HelpText<"Strip all symbols. Implies --strip-debu def strip_debug: F<"strip-debug">, HelpText<"Strip debugging information">; defm symbol_ordering_file: - Eq<"symbol-ordering-file", "Layout sections to place symbols in the order specified by symbol ordering file">; + EEq<"symbol-ordering-file", "Layout sections to place symbols in the order specified by symbol ordering file">; defm sysroot: Eq<"sysroot", "Set the system root">; @@ -445,7 +445,7 @@ defm undefined_version: B<"undefined-version", "Allow unused version in version script (default)", "Report version scripts that refer undefined symbols">; -defm rsp_quoting: Eq<"rsp-quoting", "Quoting style for response files">, +defm rsp_quoting: EEq<"rsp-quoting", "Quoting style for response files">, MetaVarName<"[posix,windows]">; def v: Flag<["-"], "v">, HelpText<"Display the version number">; diff --git a/lld/test/ELF/color-diagnostics.test b/lld/test/ELF/color-diagnostics.test index 7f1e46c13492..6d87b1130bee 100644 --- a/lld/test/ELF/color-diagnostics.test +++ b/lld/test/ELF/color-diagnostics.test @@ -1,21 +1,21 @@ # Windows command prompt doesn't support ANSI escape sequences. # REQUIRES: shell -# RUN: not ld.lld -xyz -color-diagnostics /nosuchfile 2>&1 \ +# RUN: not ld.lld -xyz --color-diagnostics /nosuchfile 2>&1 \ # RUN: | FileCheck -check-prefix=COLOR %s -# RUN: not ld.lld -xyz -color-diagnostics=always /nosuchfile 2>&1 \ +# RUN: not ld.lld -xyz --color-diagnostics=always /nosuchfile 2>&1 \ # RUN: | FileCheck -check-prefix=COLOR %s # COLOR: {{ld.lld: .\[0;31merror: .\[0munknown argument '-xyz'}} # COLOR: {{ld.lld: .\[0;31merror: .\[0mcannot open /nosuchfile}} -# RUN: not ld.lld -color-diagnostics=foobar 2>&1 | FileCheck -check-prefix=ERR %s +# RUN: not ld.lld --color-diagnostics=foobar 2>&1 | FileCheck -check-prefix=ERR %s # ERR: unknown option: --color-diagnostics=foobar # RUN: not ld.lld /nosuchfile 2>&1 | FileCheck -check-prefix=NOCOLOR %s -# RUN: not ld.lld -color-diagnostics=never /nosuchfile 2>&1 \ +# RUN: not ld.lld --color-diagnostics=never /nosuchfile 2>&1 \ # RUN: | FileCheck -check-prefix=NOCOLOR %s -# RUN: not ld.lld -color-diagnostics=always -no-color-diagnostics \ +# RUN: not ld.lld --color-diagnostics=always --no-color-diagnostics \ # RUN: /nosuchfile 2>&1 | FileCheck -check-prefix=NOCOLOR %s # NOCOLOR: ld.lld: error: cannot open /nosuchfile From 95ddbed9b797e33d952e4ab9d33f08039c471788 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Wed, 5 Jan 2022 11:21:21 +0100 Subject: [PATCH 784/992] [mlir] Split out Python bindings for dialects into separate libs Historically, the bindings for the Linalg dialect were included into the "core" bindings library because they depended on the C++ implementation of the "core" bindings. The other dialects followed the pattern. Now that this dependency is gone, split out each dialect into a separate Python extension library. Depends On D116649, D116605 Reviewed By: stellaraccident Differential Revision: https://reviews.llvm.org/D116662 --- mlir/lib/Bindings/Python/DialectLinalg.cpp | 9 +- mlir/lib/Bindings/Python/DialectQuant.cpp | 16 +-- .../Bindings/Python/DialectSparseTensor.cpp | 14 +-- mlir/lib/Bindings/Python/Dialects.h | 26 ---- mlir/lib/Bindings/Python/MainModule.cpp | 10 -- mlir/python/CMakeLists.txt | 51 ++++++-- mlir/python/mlir/dialects/_linalg_ops_ext.py | 2 +- mlir/python/mlir/dialects/linalg/__init__.py | 25 ++-- .../dialects/linalg/opdsl/lang/emitter.py | 3 +- mlir/python/mlir/dialects/quant.py | 2 +- mlir/python/mlir/dialects/sparse_tensor.py | 2 +- .../llvm-project-overlay/mlir/BUILD.bazel | 112 +++++++++++------- 12 files changed, 154 insertions(+), 118 deletions(-) delete mode 100644 mlir/lib/Bindings/Python/Dialects.h diff --git a/mlir/lib/Bindings/Python/DialectLinalg.cpp b/mlir/lib/Bindings/Python/DialectLinalg.cpp index a168256159ed..2e54ebeb61fb 100644 --- a/mlir/lib/Bindings/Python/DialectLinalg.cpp +++ b/mlir/lib/Bindings/Python/DialectLinalg.cpp @@ -6,14 +6,13 @@ // //===----------------------------------------------------------------------===// -#include "Dialects.h" #include "mlir-c/Dialect/Linalg.h" #include "mlir-c/IR.h" #include "mlir/Bindings/Python/PybindAdaptors.h" namespace py = pybind11; -void mlir::python::populateDialectLinalgSubmodule(py::module m) { +static void populateDialectLinalgSubmodule(py::module m) { m.def( "fill_builtin_region", [](MlirOperation op) { mlirLinalgFillBuiltinNamedOpRegion(op); }, @@ -21,3 +20,9 @@ void mlir::python::populateDialectLinalgSubmodule(py::module m) { "Fill the region for `op`, which is assumed to be a builtin named Linalg " "op."); } + +PYBIND11_MODULE(_mlirDialectsLinalg, m) { + m.doc() = "MLIR Linalg dialect."; + + populateDialectLinalgSubmodule(m); +} diff --git a/mlir/lib/Bindings/Python/DialectQuant.cpp b/mlir/lib/Bindings/Python/DialectQuant.cpp index f2fad706afa3..844cbec4eadd 100644 --- a/mlir/lib/Bindings/Python/DialectQuant.cpp +++ b/mlir/lib/Bindings/Python/DialectQuant.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include "Dialects.h" #include "mlir-c/Dialect/Quant.h" #include "mlir-c/IR.h" #include "mlir/Bindings/Python/PybindAdaptors.h" @@ -16,16 +15,13 @@ using namespace llvm; using namespace mlir; using namespace mlir::python::adaptors; -void mlir::python::populateDialectQuantSubmodule(const py::module &m, - const py::module &irModule) { - auto typeClass = irModule.attr("Type"); - +static void populateDialectQuantSubmodule(const py::module &m) { //===-------------------------------------------------------------------===// // QuantizedType //===-------------------------------------------------------------------===// - auto quantizedType = mlir_type_subclass(m, "QuantizedType", - mlirTypeIsAQuantizedType, typeClass); + auto quantizedType = + mlir_type_subclass(m, "QuantizedType", mlirTypeIsAQuantizedType); quantizedType.def_staticmethod( "default_minimum_for_integer", [](bool isSigned, unsigned integralWidth) { @@ -305,3 +301,9 @@ void mlir::python::populateDialectQuantSubmodule(const py::module &m, return mlirCalibratedQuantizedTypeGetMax(type); }); } + +PYBIND11_MODULE(_mlirDialectsQuant, m) { + m.doc() = "MLIR Quantization dialect"; + + populateDialectQuantSubmodule(m); +} \ No newline at end of file diff --git a/mlir/lib/Bindings/Python/DialectSparseTensor.cpp b/mlir/lib/Bindings/Python/DialectSparseTensor.cpp index c9e3cb6394bb..b24d024d19f6 100644 --- a/mlir/lib/Bindings/Python/DialectSparseTensor.cpp +++ b/mlir/lib/Bindings/Python/DialectSparseTensor.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include "Dialects.h" #include "mlir-c/Dialect/SparseTensor.h" #include "mlir-c/IR.h" #include "mlir/Bindings/Python/PybindAdaptors.h" @@ -16,18 +15,14 @@ using namespace llvm; using namespace mlir; using namespace mlir::python::adaptors; -void mlir::python::populateDialectSparseTensorSubmodule( - const py::module &m, const py::module &irModule) { - auto attributeClass = irModule.attr("Attribute"); - +static void populateDialectSparseTensorSubmodule(const py::module &m) { py::enum_(m, "DimLevelType", py::module_local()) .value("dense", MLIR_SPARSE_TENSOR_DIM_LEVEL_DENSE) .value("compressed", MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED) .value("singleton", MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON); mlir_attribute_subclass(m, "EncodingAttr", - mlirAttributeIsASparseTensorEncodingAttr, - attributeClass) + mlirAttributeIsASparseTensorEncodingAttr) .def_classmethod( "get", [](py::object cls, @@ -72,3 +67,8 @@ void mlir::python::populateDialectSparseTensorSubmodule( return mlirSparseTensorEncodingAttrGetIndexBitWidth(self); }); } + +PYBIND11_MODULE(_mlirDialectsSparseTensor, m) { + m.doc() = "MLIR SparseTensor dialect."; + populateDialectSparseTensorSubmodule(m); +} diff --git a/mlir/lib/Bindings/Python/Dialects.h b/mlir/lib/Bindings/Python/Dialects.h deleted file mode 100644 index a130903c6c8c..000000000000 --- a/mlir/lib/Bindings/Python/Dialects.h +++ /dev/null @@ -1,26 +0,0 @@ -//===- Dialects.h - Declaration for dialect submodule factories -----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_BINDINGS_PYTHON_DIALECTS_H -#define MLIR_BINDINGS_PYTHON_DIALECTS_H - -#include - -namespace mlir { -namespace python { - -void populateDialectLinalgSubmodule(pybind11::module m); -void populateDialectSparseTensorSubmodule(const pybind11::module &m, - const pybind11::module &irModule); -void populateDialectQuantSubmodule(const pybind11::module &m, - const pybind11::module &irModule); - -} // namespace python -} // namespace mlir - -#endif // MLIR_BINDINGS_PYTHON_DIALECTS_H diff --git a/mlir/lib/Bindings/Python/MainModule.cpp b/mlir/lib/Bindings/Python/MainModule.cpp index f55482676255..1d6d8fa01d3b 100644 --- a/mlir/lib/Bindings/Python/MainModule.cpp +++ b/mlir/lib/Bindings/Python/MainModule.cpp @@ -10,7 +10,6 @@ #include "PybindUtils.h" -#include "Dialects.h" #include "Globals.h" #include "IRModule.h" #include "Pass.h" @@ -100,13 +99,4 @@ PYBIND11_MODULE(_mlir, m) { auto passModule = m.def_submodule("passmanager", "MLIR Pass Management Bindings"); populatePassManagerSubmodule(passModule); - - // Define and populate dialect submodules. - auto dialectsModule = m.def_submodule("dialects"); - auto linalgModule = dialectsModule.def_submodule("linalg"); - populateDialectLinalgSubmodule(linalgModule); - populateDialectSparseTensorSubmodule( - dialectsModule.def_submodule("sparse_tensor"), irModule); - populateDialectQuantSubmodule(dialectsModule.def_submodule("quant"), - irModule); } diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt index 1fb98c54079d..60d60d4aff71 100644 --- a/mlir/python/CMakeLists.txt +++ b/mlir/python/CMakeLists.txt @@ -25,8 +25,6 @@ declare_mlir_python_sources(MLIRPythonSources.Core _mlir_libs/_mlir/__init__.pyi _mlir_libs/_mlir/ir.pyi _mlir_libs/_mlir/passmanager.pyi - # TODO: this should be split out into a separate library. - _mlir_libs/_mlir/dialects/quant.pyi ) declare_mlir_python_sources(MLIRPythonSources.ExecutionEngine @@ -122,7 +120,8 @@ declare_mlir_python_sources( ADD_TO_PARENT MLIRPythonSources.Dialects ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" SOURCES - dialects/quant.py) + dialects/quant.py + _mlir_libs/_mlir/dialects/quant.pyi) declare_mlir_dialect_python_bindings( ADD_TO_PARENT MLIRPythonSources.Dialects @@ -191,9 +190,6 @@ declare_mlir_python_extension(MLIRPythonExtension.Core ADD_TO_PARENT MLIRPythonSources.Core ROOT_DIR "${PYTHON_SOURCE_DIR}" SOURCES - DialectLinalg.cpp # TODO: Break this out. - DialectSparseTensor.cpp # TODO: Break this out. - DialectQuant.cpp # TODO: Break this out. MainModule.cpp IRAffine.cpp IRAttributes.cpp @@ -205,7 +201,6 @@ declare_mlir_python_extension(MLIRPythonExtension.Core Pass.cpp # Headers must be included explicitly so they are installed. - Dialects.h Globals.h IRModule.h Pass.h @@ -219,10 +214,46 @@ declare_mlir_python_extension(MLIRPythonExtension.Core MLIRCAPIRegistration # TODO: See about dis-aggregating # Dialects - MLIRCAPILinalg # TODO: Remove when above is removed. - MLIRCAPISparseTensor # TODO: Remove when above is removed. MLIRCAPIStandard - MLIRCAPIQuant # TODO: Remove when above is removed. +) + +declare_mlir_python_extension(MLIRPythonExtension.Dialects.Linalg.Pybind + MODULE_NAME _mlirDialectsLinalg + ADD_TO_PARENT MLIRPythonSources.Dialects.linalg + ROOT_DIR "${PYTHON_SOURCE_DIR}" + SOURCES + DialectLinalg.cpp + PRIVATE_LINK_LIBS + LLVMSupport + EMBED_CAPI_LINK_LIBS + MLIRCAPIIR + MLIRCAPILinalg +) + +declare_mlir_python_extension(MLIRPythonExtension.Dialects.Quant.Pybind + MODULE_NAME _mlirDialectsQuant + ADD_TO_PARENT MLIRPythonSources.Dialects.quant + ROOT_DIR "${PYTHON_SOURCE_DIR}" + SOURCES + DialectQuant.cpp + PRIVATE_LINK_LIBS + LLVMSupport + EMBED_CAPI_LINK_LIBS + MLIRCAPIIR + MLIRCAPIQuant +) + +declare_mlir_python_extension(MLIRPythonExtension.Dialects.SparseTensor.Pybind + MODULE_NAME _mlirDialectsSparseTensor + ADD_TO_PARENT MLIRPythonSources.Dialects.sparse_tensor + ROOT_DIR "${PYTHON_SOURCE_DIR}" + SOURCES + DialectSparseTensor.cpp + PRIVATE_LINK_LIBS + LLVMSupport + EMBED_CAPI_LINK_LIBS + MLIRCAPIIR + MLIRCAPISparseTensor ) declare_mlir_python_extension(MLIRPythonExtension.AllPassesRegistration diff --git a/mlir/python/mlir/dialects/_linalg_ops_ext.py b/mlir/python/mlir/dialects/_linalg_ops_ext.py index 90f922724899..167a9232d136 100644 --- a/mlir/python/mlir/dialects/_linalg_ops_ext.py +++ b/mlir/python/mlir/dialects/_linalg_ops_ext.py @@ -6,7 +6,7 @@ from typing import Optional, Sequence, Union from ..ir import * from ._ods_common import get_default_loc_context - from .._mlir_libs._mlir.dialects.linalg import fill_builtin_region + from .._mlir_libs._mlirDialectsLinalg import fill_builtin_region except ImportError as e: raise RuntimeError("Error loading imports from extension module") from e diff --git a/mlir/python/mlir/dialects/linalg/__init__.py b/mlir/python/mlir/dialects/linalg/__init__.py index 976718337111..eadb8420c06a 100644 --- a/mlir/python/mlir/dialects/linalg/__init__.py +++ b/mlir/python/mlir/dialects/linalg/__init__.py @@ -2,6 +2,9 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# Re-export the objects provided by pybind. +from ..._mlir_libs._mlirDialectsLinalg import * + # These are the backing OpView classes generated from the linalg tablegen # definitions following these steps: # DSL -> YAML -> tblgen -> pytblgen -> build/.../_linalg_ops_gen.py. @@ -15,39 +18,39 @@ # C=TensorDef(U, S.M, S.N, output=True)): # ``` # using the linalg-py eDSL. -# The linalg-py eDSL builds a python representation (PyRepr) that is +# The linalg-py eDSL builds a python representation (PyRepr) that is # used in following ways: # 1. PyRepr -> YAML to generate the C++ and Python .td files. These # then turn into the core C++ Op classes and Python OpView classes -# respectively (made available in _linalg_ops_gen). The generic OpView class +# respectively (made available in _linalg_ops_gen). The generic OpView class # mechanism makes the C++ classes available to python through the CAPI. # PyRepr -> YAML currently occurs before compiler compile time. # The other steps in this category occur at compiler compile time. -# 2. PyRepr -> linalg.core_named_ops calls: piggybacks on the +# 2. PyRepr -> linalg.core_named_ops calls: piggybacks on the # _linalg_ops_gen classes and the OpView mechanism to build IR at # runtime in python: # a. by default, the Named Op Form is emitted, e.g.: # `linalg.matmul(lhs, rhs, outs=[out])` creates the following IR: # ``` -# %1 = linalg.matmul ins(%arg0, %arg1 : tensor<4x16xf32>, tensor<16x8xf32>) +# %1 = linalg.matmul ins(%arg0, %arg1 : tensor<4x16xf32>, tensor<16x8xf32>) # outs(%0 : tensor<4x8xf32>) -# -> tensor<4x8xf32> +# -> tensor<4x8xf32> # ``` # b. by setting emit_generic=True, the Generic Op Form is emitted, e.g.: # `linalg.matmul(lhs, rhs, outs=[out], emit_generic=True)` creates the following IR: # ``` -# %1 = linalg.generic {indexing_maps = [...], iterator_types = [...]} -# ins(%arg0, %arg1 : tensor<4x16xf32>, tensor<16x8xf32>) +# %1 = linalg.generic {indexing_maps = [...], iterator_types = [...]} +# ins(%arg0, %arg1 : tensor<4x16xf32>, tensor<16x8xf32>) # outs(%0 : tensor<4x8xf32>) { -# ^bb0(%arg2: f32, %arg3: f32, %arg4: f32): +# ^bb0(%arg2: f32, %arg3: f32, %arg4: f32): # ... # linalg.yield %3 : f32 -# } -> tensor<4x8xf32> +# } -> tensor<4x8xf32> # ``` # 3. PyRepr -> Runtime Custom Op definitions: directly generates a # linalg.generic form like in 2.b. -# !!!WARNING!!!: if one creates a runtime custom op with the same name +# !!!WARNING!!!: if one creates a runtime custom op with the same name # as an existing core named op, step 2. will likely take precedence. -# TODO: guard against surprises and fail create Runtime Custom Ops with +# TODO: guard against surprises and fail create Runtime Custom Ops with # the same name as existing Core Named Ops. from .opdsl.ops.core_named_ops import * diff --git a/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py b/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py index c3cfdfac95dc..aa44194b5152 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py @@ -5,7 +5,6 @@ from typing import Dict, List, Sequence, Tuple, Union from .....ir import * -from ....._mlir_libs._mlir.dialects.linalg import fill_builtin_region from .... import linalg from .... import std @@ -173,7 +172,7 @@ def emit_named_structured_op(op_config: LinalgStructuredOpConfig, op_name: str, f"Unknown named op_name / op_class_name: {op_name} / {op_class_name}") named_op = getattr(linalg, op_class_name)(ins, outs, result_types) - fill_builtin_region(named_op.operation) + linalg.fill_builtin_region(named_op.operation) # Note: mlir-linalg-ods-yaml-gen.cpp uses a special linalg.memoized_indexing_maps # attribute that the non-yaml path does not. The non-yaml path hardcodes the # indexing_maps in C++ directly. diff --git a/mlir/python/mlir/dialects/quant.py b/mlir/python/mlir/dialects/quant.py index 92990b1c5cd6..bf1fc5f2de37 100644 --- a/mlir/python/mlir/dialects/quant.py +++ b/mlir/python/mlir/dialects/quant.py @@ -2,4 +2,4 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -from .._mlir_libs._mlir.dialects.quant import * +from .._mlir_libs._mlirDialectsQuant import * diff --git a/mlir/python/mlir/dialects/sparse_tensor.py b/mlir/python/mlir/dialects/sparse_tensor.py index 4f6b675ec9e5..769418e04969 100644 --- a/mlir/python/mlir/dialects/sparse_tensor.py +++ b/mlir/python/mlir/dialects/sparse_tensor.py @@ -3,5 +3,5 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception from ._sparse_tensor_ops_gen import * -from .._mlir_libs._mlir.dialects.sparse_tensor import * +from .._mlir_libs._mlirDialectsSparseTensor import * from .._mlir_libs import _mlirSparseTensorPasses as _cextSparseTensorPasses diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 109336b972db..938c717a7700 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -647,10 +647,18 @@ cc_library( ], ) +# These flags are needed for pybind11 to work. +PYBIND11_COPTS = [ + "-fexceptions", + "-frtti", +] + +PYBIND11_FEATURES = [ + # Cannot use header_modules (parse_headers feature fails). + "-use_header_modules", +] + MLIR_PYTHON_BINDINGS_SOURCES = [ - "lib/Bindings/Python/DialectLinalg.cpp", - "lib/Bindings/Python/DialectSparseTensor.cpp", - "lib/Bindings/Python/DialectQuant.cpp", "lib/Bindings/Python/IRAffine.cpp", "lib/Bindings/Python/IRAttributes.cpp", "lib/Bindings/Python/IRCore.cpp", @@ -664,15 +672,8 @@ MLIR_PYTHON_BINDINGS_SOURCES = [ cc_library( name = "MLIRBindingsPythonCore", srcs = MLIR_PYTHON_BINDINGS_SOURCES, - # These flags are needed for pybind11 to work. - copts = [ - "-fexceptions", - "-frtti", - ], - features = [ - # Cannot use header_modules (parse_headers feature fails). - "-use_header_modules", - ], + copts = PYBIND11_COPTS, + features = PYBIND11_FEATURES, tags = [ "manual", # External dependency "nobuildkite", # TODO(gcmn): Add support for this target @@ -683,10 +684,7 @@ cc_library( ":CAPIGPU", ":CAPIIR", ":CAPIInterfaces", - ":CAPILinalg", - ":CAPIQuant", ":CAPIRegistration", - ":CAPISparseTensor", ":MLIRBindingsPythonHeadersAndDeps", "//llvm:Support", "@pybind11", @@ -697,15 +695,8 @@ cc_library( cc_library( name = "MLIRBindingsPythonCoreNoCAPI", srcs = MLIR_PYTHON_BINDINGS_SOURCES, - # These flags are needed for pybind11 to work. - copts = [ - "-fexceptions", - "-frtti", - ], - features = [ - # Cannot use header_modules (parse_headers feature fails). - "-use_header_modules", - ], + copts = PYBIND11_COPTS, + features = PYBIND11_FEATURES, tags = [ "manual", # External dependency "nobuildkite", # TODO(gcmn): Add support for this target @@ -715,10 +706,7 @@ cc_library( ":CAPIDebugHeaders", ":CAPIGPUHeaders", ":CAPIIRHeaders", - ":CAPILinalgHeaders", - ":CAPIQuantHeaders", ":CAPIRegistrationHeaders", - ":CAPISparseTensorHeaders", ":MLIRBindingsPythonHeaders", "//llvm:Support", "@pybind11", @@ -740,23 +728,10 @@ cc_library( ":CAPIGPUObjects", ":CAPIIRObjects", ":CAPIInterfacesObjects", - ":CAPILinalgObjects", - ":CAPIQuantObjects", ":CAPIRegistrationObjects", - ":CAPISparseTensorObjects", ], ) -PYBIND11_COPTS = [ - "-fexceptions", - "-frtti", -] - -PYBIND11_FEATURES = [ - # Cannot use header_modules (parse_headers feature fails). - "-use_header_modules", -] - # Dynamic library with the MLIR Python extension. cc_binary( name = "_mlir.so", @@ -776,6 +751,63 @@ cc_binary( ], ) +cc_binary( + name = "_mlirDialectsLinalg.so", + srcs = ["lib/Bindings/Python/DialectLinalg.cpp"], + copts = PYBIND11_COPTS, + features = PYBIND11_FEATURES, + linkshared = 1, + linkstatic = 0, + tags = [ + "manual", # External dependency + "nobuildkite", # TODO(gcmn): Add support for this target + ], + deps = [ + ":CAPIIR", + ":CAPILinalg", + ":CAPIRegistration", + ":MLIRBindingsPythonHeadersAndDeps", + ], +) + +cc_binary( + name = "_mlirDialectsQuant.so", + srcs = ["lib/Bindings/Python/DialectQuant.cpp"], + copts = PYBIND11_COPTS, + features = PYBIND11_FEATURES, + linkshared = 1, + linkstatic = 0, + tags = [ + "manual", # External dependency + "nobuildkite", # TODO(gcmn): Add support for this target + ], + deps = [ + ":CAPIIR", + ":CAPIQuant", + ":CAPIRegistration", + ":MLIRBindingsPythonHeadersAndDeps", + ], +) + +cc_binary( + name = "_mlirDialectsSparseTensor.so", + srcs = ["lib/Bindings/Python/DialectSparseTensor.cpp"], + copts = PYBIND11_COPTS, + features = PYBIND11_FEATURES, + linkshared = 1, + linkstatic = 0, + tags = [ + "manual", # External dependency + "nobuildkite", # TODO(gcmn): Add support for this target + ], + deps = [ + ":CAPIIR", + ":CAPISparseTensor", + ":CAPIRegistration", + ":MLIRBindingsPythonHeadersAndDeps", + ], +) + # Dynamic library with the MLIR Conversions Python extension. cc_binary( name = "_mlirConversions.so", From bbce75e352be0637305a1b59ac5eca7175bceece Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Thu, 6 Jan 2022 17:33:25 +0800 Subject: [PATCH 785/992] Update Bug report URL to Github Issues Although we moved to Github Issues. The bug report message refers to Bugzilla still. This patch tries to update these URLs. Reviewed By: MaskRay, Quuxplusone, jhenderson, libunwind, libc++ Differential Revision: https://reviews.llvm.org/D116351 --- clang-tools-extra/docs/clang-doc.rst | 2 +- clang/docs/CommandGuide/clang.rst | 2 +- clang/www/c_status.html | 4 ++-- clang/www/cxx_status.html | 6 +++--- clang/www/get_involved.html | 2 +- clang/www/get_started.html | 2 +- clang/www/menu.html.incl | 2 +- libcxx/docs/index.rst | 2 +- libunwind/docs/index.rst | 2 +- lldb/docs/index.rst | 2 +- llvm/CMakeLists.txt | 2 +- llvm/docs/CommandGuide/llvm-install-name-tool.rst | 2 +- llvm/docs/CommandGuide/llvm-libtool-darwin.rst | 2 +- llvm/docs/CommandGuide/llvm-lipo.rst | 2 +- llvm/docs/CommandGuide/llvm-objcopy.rst | 2 +- llvm/docs/CommandGuide/llvm-objdump.rst | 2 +- llvm/docs/CommandGuide/llvm-otool.rst | 2 +- llvm/docs/CommandGuide/llvm-size.rst | 2 +- llvm/docs/CommandGuide/llvm-strings.rst | 2 +- llvm/docs/CommandGuide/llvm-strip.rst | 2 +- llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn | 2 +- llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn | 4 ++-- .../clang/include/clang/Config/config.h | 2 +- .../llvm-project-overlay/llvm/include/llvm/Config/config.h | 4 ++-- 24 files changed, 29 insertions(+), 29 deletions(-) diff --git a/clang-tools-extra/docs/clang-doc.rst b/clang-tools-extra/docs/clang-doc.rst index 9be8a8dc31d4..2db3e914ac8a 100644 --- a/clang-tools-extra/docs/clang-doc.rst +++ b/clang-tools-extra/docs/clang-doc.rst @@ -12,7 +12,7 @@ source code and comments. The tool is in a very early development stage, so you might encounter bugs and crashes. Submitting reports with information about how to reproduce the issue -to `the LLVM bugtracker `_ will definitely help the +to `the LLVM bug tracker `_ will definitely help the project. If you have any ideas or suggestions, please to put a feature request there. diff --git a/clang/docs/CommandGuide/clang.rst b/clang/docs/CommandGuide/clang.rst index a24e138e86a7..6797020d1b68 100644 --- a/clang/docs/CommandGuide/clang.rst +++ b/clang/docs/CommandGuide/clang.rst @@ -662,7 +662,7 @@ ENVIRONMENT BUGS ---- -To report bugs, please visit . Most bug reports should +To report bugs, please visit . Most bug reports should include preprocessed source files (use the :option:`-E` option) and the full output of the compiler, along with information to reproduce. diff --git a/clang/www/c_status.html b/clang/www/c_status.html index 561f061c6449..42bc57d969ad 100644 --- a/clang/www/c_status.html +++ b/clang/www/c_status.html @@ -72,8 +72,8 @@

C Support in Clang

reports, but we do not currently track our DR status (help with tracking DR status is appreciated).

-

The LLVM bug tracker contains a -Clang C component that tracks known bugs with Clang's language +

The LLVM bug tracker uses +the "c", "c11", "c18", and "c2x" labels to track known bugs with Clang's language conformance.

C89 implementation status

diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index 3cf12ff47721..5005ec6c08e4 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -76,9 +76,9 @@

C++ Support in Clang

Specifications that will help drive the future of the C++ programming language.

-

The LLVM bug tracker contains Clang -C++ components that track known bugs with Clang's language conformance in -each language mode.

+

The LLVM bug tracker uses +the "c++" label, as well as mode-specific labels such as "c++11", "c++14", +and so on, to track known bugs with Clang's language conformance.

C++98 implementation status

diff --git a/clang/www/get_involved.html b/clang/www/get_involved.html index 96784108ce6e..d6eddb8227a5 100755 --- a/clang/www/get_involved.html +++ b/clang/www/get_involved.html @@ -65,7 +65,7 @@

Follow what's going on

If you're looking for something to work on, check out our Open Projects page or look through the Bugzilla bug database.

+href="https://github.com/llvm/llvm-project/issues/">LLVM bug tracker.

Contributing Extensions to Clang

diff --git a/clang/www/get_started.html b/clang/www/get_started.html index bc9629d7e2ff..ab5f7fac6a6c 100755 --- a/clang/www/get_started.html +++ b/clang/www/get_started.html @@ -19,7 +19,7 @@

Getting Started: Building and Running Clang

options. This should get you up and running with the minimum of muss and fuss. If you like what you see, please consider getting involved with the Clang community. If you run into problems, please file -bugs in LLVM Bugzilla.

+bugs on the LLVM bug tracker.

Release Clang Versions

diff --git a/clang/www/menu.html.incl b/clang/www/menu.html.incl index 3a360dc3af85..72c483d27345 100755 --- a/clang/www/menu.html.incl +++ b/clang/www/menu.html.incl @@ -36,7 +36,7 @@ cfe-users List cfe-dev List cfe-commits List - Bug Reports + Bug Reports Planet Clang IRC: irc.oftc.net#llvm diff --git a/libcxx/docs/index.rst b/libcxx/docs/index.rst index 6ed811fee1a8..817d6b42a5bc 100644 --- a/libcxx/docs/index.rst +++ b/libcxx/docs/index.rst @@ -217,7 +217,7 @@ Quick Links =========== * `LLVM Homepage `_ * `libc++abi Homepage `_ -* `LLVM Bugzilla `_ +* `LLVM bug tracker `_ * `libcxx-commits Mailing List`_ * `libcxx-dev Mailing List`_ * `Browse libc++ Sources `_ diff --git a/libunwind/docs/index.rst b/libunwind/docs/index.rst index f7ff29d095bc..3597e757f59b 100644 --- a/libunwind/docs/index.rst +++ b/libunwind/docs/index.rst @@ -98,7 +98,7 @@ Please include [libunwind] in the subject. Quick Links =========== * `LLVM Homepage `_ -* `LLVM Bugzilla `_ +* `LLVM bug tracker `_ * `cfe-commits Mailing List`_ * `cfe-dev Mailing List`_ * `Browse libunwind Sources `_ diff --git a/lldb/docs/index.rst b/lldb/docs/index.rst index 0ca444f31ed1..24643ce8bc02 100644 --- a/lldb/docs/index.rst +++ b/lldb/docs/index.rst @@ -178,4 +178,4 @@ interesting areas to contribute to lldb. Source Code Code Reviews - Bug Reports + Bug Reports diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index af32d35521a3..edc2c8cded9c 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -234,7 +234,7 @@ option(LLVM_APPEND_VC_REV set(PACKAGE_NAME LLVM) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") -set(PACKAGE_BUGREPORT "https://bugs.llvm.org/") +set(PACKAGE_BUGREPORT "https://github.com/llvm/llvm-project/issues/") set(BUG_REPORT_URL "${PACKAGE_BUGREPORT}" CACHE STRING "Default URL where bug reports are to be submitted.") diff --git a/llvm/docs/CommandGuide/llvm-install-name-tool.rst b/llvm/docs/CommandGuide/llvm-install-name-tool.rst index 9309215cd6ab..ff42741b0352 100644 --- a/llvm/docs/CommandGuide/llvm-install-name-tool.rst +++ b/llvm/docs/CommandGuide/llvm-install-name-tool.rst @@ -76,7 +76,7 @@ Otherwise, it exits with code 0. BUGS ---- -To report bugs, please visit . +To report bugs, please visit . SEE ALSO -------- diff --git a/llvm/docs/CommandGuide/llvm-libtool-darwin.rst b/llvm/docs/CommandGuide/llvm-libtool-darwin.rst index ccdf464301ed..0fdd51da3092 100644 --- a/llvm/docs/CommandGuide/llvm-libtool-darwin.rst +++ b/llvm/docs/CommandGuide/llvm-libtool-darwin.rst @@ -97,7 +97,7 @@ Otherwise, it exits with code 0. BUGS ---- -To report bugs, please visit . +To report bugs, please visit . SEE ALSO -------- diff --git a/llvm/docs/CommandGuide/llvm-lipo.rst b/llvm/docs/CommandGuide/llvm-lipo.rst index 20b2984fc9b2..aeaaea9d6cda 100644 --- a/llvm/docs/CommandGuide/llvm-lipo.rst +++ b/llvm/docs/CommandGuide/llvm-lipo.rst @@ -70,4 +70,4 @@ COMMANDS BUGS ---- -To report bugs, please visit . +To report bugs, please visit . diff --git a/llvm/docs/CommandGuide/llvm-objcopy.rst b/llvm/docs/CommandGuide/llvm-objcopy.rst index 5f3aa88405e2..21f1a53e593c 100644 --- a/llvm/docs/CommandGuide/llvm-objcopy.rst +++ b/llvm/docs/CommandGuide/llvm-objcopy.rst @@ -536,7 +536,7 @@ Otherwise, it exits with code 0. BUGS ---- -To report bugs, please visit . +To report bugs, please visit . There is a known issue with :option:`--input-target` and :option:`--target` causing only ``binary`` and ``ihex`` formats to have any effect. Other values diff --git a/llvm/docs/CommandGuide/llvm-objdump.rst b/llvm/docs/CommandGuide/llvm-objdump.rst index bd4b77e52f24..cbc525fba8a6 100644 --- a/llvm/docs/CommandGuide/llvm-objdump.rst +++ b/llvm/docs/CommandGuide/llvm-objdump.rst @@ -397,7 +397,7 @@ XCOFF ONLY OPTIONS AND COMMANDS BUGS ---- -To report bugs, please visit . +To report bugs, please visit . SEE ALSO -------- diff --git a/llvm/docs/CommandGuide/llvm-otool.rst b/llvm/docs/CommandGuide/llvm-otool.rst index 3f6624e47462..c4673502d324 100644 --- a/llvm/docs/CommandGuide/llvm-otool.rst +++ b/llvm/docs/CommandGuide/llvm-otool.rst @@ -132,7 +132,7 @@ Otherwise, it exits with code 0. BUGS ---- -To report bugs, please visit . +To report bugs, please visit . SEE ALSO -------- diff --git a/llvm/docs/CommandGuide/llvm-size.rst b/llvm/docs/CommandGuide/llvm-size.rst index 3feed2810dfd..f244769545b3 100644 --- a/llvm/docs/CommandGuide/llvm-size.rst +++ b/llvm/docs/CommandGuide/llvm-size.rst @@ -191,4 +191,4 @@ Otherwise, it exits with code 0. BUGS ---- -To report bugs, please visit . +To report bugs, please visit . diff --git a/llvm/docs/CommandGuide/llvm-strings.rst b/llvm/docs/CommandGuide/llvm-strings.rst index f66b22ec8df0..0a9ab5dca0c1 100644 --- a/llvm/docs/CommandGuide/llvm-strings.rst +++ b/llvm/docs/CommandGuide/llvm-strings.rst @@ -123,4 +123,4 @@ Otherwise, it exits with code 0. BUGS ---- -To report bugs, please visit . +To report bugs, please visit . diff --git a/llvm/docs/CommandGuide/llvm-strip.rst b/llvm/docs/CommandGuide/llvm-strip.rst index 461fb828a2bb..ce5c824ecd5d 100644 --- a/llvm/docs/CommandGuide/llvm-strip.rst +++ b/llvm/docs/CommandGuide/llvm-strip.rst @@ -194,7 +194,7 @@ Otherwise, it exits with code 0. BUGS ---- -To report bugs, please visit . +To report bugs, please visit . SEE ALSO -------- diff --git a/llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn b/llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn index 01c2d4e4d596..3182c4c1c026 100644 --- a/llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn @@ -8,7 +8,7 @@ write_cmake_config("Config") { input = "config.h.cmake" output = "$target_gen_dir/config.h" values = [ - "BUG_REPORT_URL=https://bugs.llvm.org/", + "BUG_REPORT_URL=https://github.com/llvm/llvm-project/issues/", "CLANG_DEFAULT_PIE_ON_LINUX=", "CLANG_DEFAULT_LINKER=", "CLANG_DEFAULT_STD_C=", diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn index 8e22f8984cf9..489d5abec9e2 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn @@ -72,7 +72,7 @@ write_cmake_config("config") { input = "config.h.cmake" output = "$target_gen_dir/config.h" values = [ - "BUG_REPORT_URL=https://bugs.llvm.org/", + "BUG_REPORT_URL=https://github.com/llvm/llvm-project/issues/", "ENABLE_BACKTRACES=1", "ENABLE_CRASH_OVERRIDES=1", "BACKTRACE_HEADER=execinfo.h", @@ -120,7 +120,7 @@ write_cmake_config("config") { "LLVM_VERSION_INFO=", "LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO=1", "LLVM_WINDOWS_PREFER_FORWARD_SLASH=", - "PACKAGE_BUGREPORT=https://bugs.llvm.org/", + "PACKAGE_BUGREPORT=https://github.com/llvm/llvm-project/issues/", "PACKAGE_NAME=LLVM", "PACKAGE_STRING=LLVM ${llvm_version}git", "PACKAGE_VERSION=${llvm_version}git", diff --git a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h index ee4c6277d2ae..ba28d8606265 100644 --- a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h +++ b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h @@ -20,7 +20,7 @@ #define CLANG_CONFIG_H /* Bug report URL. */ -#define BUG_REPORT_URL "https://bugs.llvm.org/" +#define BUG_REPORT_URL "https://github.com/llvm/llvm-project/issues/" /* Default to -fPIE and -pie on Linux. */ #define CLANG_DEFAULT_PIE_ON_LINUX 0 diff --git a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h index 404501139494..45945510d02b 100644 --- a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h +++ b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h @@ -24,7 +24,7 @@ #include "llvm/Config/llvm-config.h" /* Bug report URL. */ -#define BUG_REPORT_URL "https://bugs.llvm.org/" +#define BUG_REPORT_URL "https://github.com/llvm/llvm-project/issues/" /* Define to 1 to enable backtraces, and to 0 otherwise. */ #define ENABLE_BACKTRACES 1 @@ -332,7 +332,7 @@ /* LTDL_SHLIB_EXT defined in Bazel */ /* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "https://bugs.llvm.org/" +#define PACKAGE_BUGREPORT "https://github.com/llvm/llvm-project/issues/" /* Define to the full name of this package. */ #define PACKAGE_NAME "LLVM" From 560972052a25ada0efd47ddaf21ece1cd286ae65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20B=C3=B6ck?= Date: Thu, 6 Jan 2022 00:41:35 +0100 Subject: [PATCH 786/992] [mlir][LLVM] Implement mapping of phi source values of `llvm.invoke` This patch allows the usage of the normalDestOperands and unwindDestOperands operands of llvm.invoke and have them be correctly mapped to phis in the successor when exported to LLVM IR. Differential Revision: https://reviews.llvm.org/D116706 --- .../LLVMIR/LLVMToLLVMIRTranslation.cpp | 5 ++-- mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 11 ++++++-- mlir/test/Target/LLVMIR/llvmir.mlir | 28 +++++++++++++++++++ 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp index 3254aed6c341..e4932e84cd28 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp @@ -340,9 +340,9 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder, } if (auto invOp = dyn_cast(opInst)) { - auto operands = moduleTranslation.lookupValues(opInst.getOperands()); + auto operands = moduleTranslation.lookupValues(invOp.getCalleeOperands()); ArrayRef operandsRef(operands); - llvm::Value *result; + llvm::Instruction *result; if (auto attr = opInst.getAttrOfType("callee")) { result = builder.CreateInvoke( moduleTranslation.lookupFunction(attr.getValue()), @@ -359,6 +359,7 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder, moduleTranslation.lookupBlock(invOp.getSuccessor(1)), operandsRef.drop_front()); } + moduleTranslation.mapBranch(invOp, result); // InvokeOp can only have 0 or 1 result if (invOp->getNumResults() != 0) { moduleTranslation.mapValue(opInst.getResult(0), result); diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 404018bebe93..1ca3fb2fc664 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -386,8 +386,15 @@ static Value getPHISourceValue(Block *current, Block *pred, return switchOp.getCaseOperands(i.index())[index]; } - llvm_unreachable("only branch or switch operations can be terminators of a " - "block that has successors"); + if (auto invokeOp = dyn_cast(terminator)) { + return invokeOp.getNormalDest() == current + ? invokeOp.getNormalDestOperands()[index] + : invokeOp.getUnwindDestOperands()[index]; + } + + llvm_unreachable( + "only branch, switch or invoke operations can be terminators " + "of a block that has successors"); } /// Connect the PHI nodes to the results of preceding blocks. diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index 6741d51b5f39..54dfd519d81c 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -1326,6 +1326,34 @@ llvm.func @invoke_result(%arg0 : !llvm.ptr) attributes { personality = @__gx // ----- +llvm.func @foo() +llvm.func @__gxx_personality_v0(...) -> i32 + +// CHECK-LABEL: @invoke_phis +llvm.func @invoke_phis() -> i32 attributes { personality = @__gxx_personality_v0 } { +// CHECK: invoke void @foo() +// CHECK-NEXT: to label %[[normal:[0-9]+]] unwind label %[[unwind:[0-9]+]] + %0 = llvm.mlir.constant(0 : i32) : i32 + llvm.invoke @foo() to ^bb1(%0 : i32) unwind ^bb2 : () -> () + +// CHECK: [[normal]]: +// CHECK-NEXT: %[[a1:[0-9]+]] = phi i32 [ 1, %[[unwind]] ], [ 0, %0 ] +// CHECK-NEXT: ret i32 %[[a1]] +^bb1(%1 : i32): + llvm.return %1 : i32 + +// CHECK: [[unwind]]: +// CHECK-NEXT: landingpad { i8*, i32 } +// CHECK-NEXT: cleanup +// CHECK-NEXT: br label %[[normal]] +^bb2: + %2 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> + %3 = llvm.mlir.constant(1 : i32) : i32 + llvm.br ^bb1(%3 : i32) +} + +// ----- + // CHECK-LABEL: @callFreezeOp llvm.func @callFreezeOp(%x : i32) { // CHECK: freeze i32 %{{[0-9]+}} From 0fa174398bfd7040128340bae03228838ee6082e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 6 Jan 2022 10:26:16 +0100 Subject: [PATCH 787/992] [LICM] Add test for noalias call (NFC) Add a test with a noalias call that is not a known allocation function. --- llvm/test/Transforms/LICM/promote-tls.ll | 208 +++++++++++++++++++---- 1 file changed, 175 insertions(+), 33 deletions(-) diff --git a/llvm/test/Transforms/LICM/promote-tls.ll b/llvm/test/Transforms/LICM/promote-tls.ll index abb9d96c3ea0..a0c966eee79a 100644 --- a/llvm/test/Transforms/LICM/promote-tls.ll +++ b/llvm/test/Transforms/LICM/promote-tls.ll @@ -1,5 +1,6 @@ -; RUN: opt -tbaa -basic-aa -licm -S < %s | FileCheck %s -; RUN: opt -aa-pipeline=tbaa,basic-aa -passes='require,require,require,require,loop-mssa(licm)' -S %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -S < %s | FileCheck %s +; RUN: opt -passes='loop-mssa(licm)' -S %s | FileCheck %s ; If we can prove a local is thread local, we can insert stores during ; promotion which wouldn't be legal otherwise. @@ -12,8 +13,36 @@ target triple = "x86_64-linux-generic" declare i8* @malloc(i64) ; Exercise the TLS case -; CHECK-LABEL: @test define i32* @test(i32 %n) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MEM:%.*]] = call noalias dereferenceable(16) i8* @malloc(i64 16) +; CHECK-NEXT: [[ADDR:%.*]] = bitcast i8* [[MEM]] to i32* +; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[ADDR_PROMOTED:%.*]] = load i32, i32* [[ADDR]], align 4 +; CHECK-NEXT: br label [[FOR_HEADER:%.*]] +; CHECK: for.header: +; CHECK-NEXT: [[NEW1:%.*]] = phi i32 [ [[ADDR_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[NEW:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[GUARD:%.*]] = load atomic i8*, i8** @p monotonic, align 8 +; CHECK-NEXT: [[EXITCMP:%.*]] = icmp eq i8* [[GUARD]], null +; CHECK-NEXT: br i1 [[EXITCMP]], label [[FOR_BODY]], label [[EARLY_EXIT:%.*]] +; CHECK: early-exit: +; CHECK-NEXT: [[NEW1_LCSSA:%.*]] = phi i32 [ [[NEW1]], [[FOR_HEADER]] ] +; CHECK-NEXT: store i32 [[NEW1_LCSSA]], i32* [[ADDR]], align 4 +; CHECK-NEXT: ret i32* null +; CHECK: for.body: +; CHECK-NEXT: [[NEW]] = add i32 [[NEW1]], 1 +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_HEADER]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]] +; CHECK: for.cond.for.end_crit_edge: +; CHECK-NEXT: [[NEW_LCSSA:%.*]] = phi i32 [ [[NEW]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SPLIT:%.*]] = phi i32* [ [[ADDR]], [[FOR_BODY]] ] +; CHECK-NEXT: store i32 [[NEW_LCSSA]], i32* [[ADDR]], align 4 +; CHECK-NEXT: ret i32* null +; entry: ;; ignore the required null check for simplicity %mem = call dereferenceable(16) noalias i8* @malloc(i64 16) @@ -21,8 +50,6 @@ entry: br label %for.body.lr.ph for.body.lr.ph: ; preds = %entry -; CHECK-LABEL: for.body.lr.ph: -; CHECK-NEXT: %addr.promoted = load i32, i32* %addr, align 4 br label %for.header for.header: @@ -34,8 +61,6 @@ for.header: br i1 %exitcmp, label %for.body, label %early-exit early-exit: -; CHECK-LABEL: early-exit: -; CHECK: store i32 %new1.lcssa, i32* %addr, align 4 ret i32* null for.body: @@ -46,23 +71,47 @@ for.body: br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge for.cond.for.end_crit_edge: ; preds = %for.body -; CHECK-LABEL: for.cond.for.end_crit_edge: -; CHECK: store i32 %new.lcssa, i32* %addr, align 4 %split = phi i32* [ %addr, %for.body ] ret i32* null } ; Stack allocations can also be thread-local -; CHECK-LABEL: @test2 define i32* @test2(i32 %n) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MEM:%.*]] = alloca i8, i32 16, align 1 +; CHECK-NEXT: [[ADDR:%.*]] = bitcast i8* [[MEM]] to i32* +; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[ADDR_PROMOTED:%.*]] = load i32, i32* [[ADDR]], align 4 +; CHECK-NEXT: br label [[FOR_HEADER:%.*]] +; CHECK: for.header: +; CHECK-NEXT: [[NEW1:%.*]] = phi i32 [ [[ADDR_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[NEW:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[GUARD:%.*]] = load atomic i8*, i8** @p monotonic, align 8 +; CHECK-NEXT: [[EXITCMP:%.*]] = icmp eq i8* [[GUARD]], null +; CHECK-NEXT: br i1 [[EXITCMP]], label [[FOR_BODY]], label [[EARLY_EXIT:%.*]] +; CHECK: early-exit: +; CHECK-NEXT: [[NEW1_LCSSA:%.*]] = phi i32 [ [[NEW1]], [[FOR_HEADER]] ] +; CHECK-NEXT: store i32 [[NEW1_LCSSA]], i32* [[ADDR]], align 4 +; CHECK-NEXT: ret i32* null +; CHECK: for.body: +; CHECK-NEXT: [[NEW]] = add i32 [[NEW1]], 1 +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_HEADER]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]] +; CHECK: for.cond.for.end_crit_edge: +; CHECK-NEXT: [[NEW_LCSSA:%.*]] = phi i32 [ [[NEW]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SPLIT:%.*]] = phi i32* [ [[ADDR]], [[FOR_BODY]] ] +; CHECK-NEXT: store i32 [[NEW_LCSSA]], i32* [[ADDR]], align 4 +; CHECK-NEXT: ret i32* null +; entry: %mem = alloca i8, i32 16 %addr = bitcast i8* %mem to i32* br label %for.body.lr.ph for.body.lr.ph: ; preds = %entry -; CHECK-LABEL: for.body.lr.ph: -; CHECK-NEXT: %addr.promoted = load i32, i32* %addr, align 4 br label %for.header for.header: @@ -74,8 +123,6 @@ for.header: br i1 %exitcmp, label %for.body, label %early-exit early-exit: -; CHECK-LABEL: early-exit: -; CHECK: store i32 %new1.lcssa, i32* %addr, align 4 ret i32* null for.body: @@ -86,20 +133,103 @@ for.body: br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge for.cond.for.end_crit_edge: ; preds = %for.body -; CHECK-LABEL: for.cond.for.end_crit_edge: -; CHECK: store i32 %new.lcssa, i32* %addr, align 4 + %split = phi i32* [ %addr, %for.body ] + ret i32* null +} + +declare noalias i8* @custom_malloc(i64) + +; Custom allocation function marked via noalias. +define i32* @test_custom_malloc(i32 %n) { +; CHECK-LABEL: @test_custom_malloc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MEM:%.*]] = call noalias dereferenceable(16) i8* @custom_malloc(i64 16) +; CHECK-NEXT: [[ADDR:%.*]] = bitcast i8* [[MEM]] to i32* +; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[ADDR_PROMOTED:%.*]] = load i32, i32* [[ADDR]], align 4 +; CHECK-NEXT: br label [[FOR_HEADER:%.*]] +; CHECK: for.header: +; CHECK-NEXT: [[NEW1:%.*]] = phi i32 [ [[ADDR_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[NEW:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[GUARD:%.*]] = load volatile i8*, i8** @p, align 8 +; CHECK-NEXT: [[EXITCMP:%.*]] = icmp eq i8* [[GUARD]], null +; CHECK-NEXT: br i1 [[EXITCMP]], label [[FOR_BODY]], label [[EARLY_EXIT:%.*]] +; CHECK: early-exit: +; CHECK-NEXT: ret i32* null +; CHECK: for.body: +; CHECK-NEXT: [[NEW]] = add i32 [[NEW1]], 1 +; CHECK-NEXT: store i32 [[NEW]], i32* [[ADDR]], align 4 +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_HEADER]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]] +; CHECK: for.cond.for.end_crit_edge: +; CHECK-NEXT: [[SPLIT:%.*]] = phi i32* [ [[ADDR]], [[FOR_BODY]] ] +; CHECK-NEXT: ret i32* null +; +entry: + %mem = call dereferenceable(16) noalias i8* @custom_malloc(i64 16) + %addr = bitcast i8* %mem to i32* + br label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + br label %for.header + +for.header: + %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %old = load i32, i32* %addr, align 4 + ; deliberate impossible to analyze branch + %guard = load volatile i8*, i8** @p + %exitcmp = icmp eq i8* %guard, null + br i1 %exitcmp, label %for.body, label %early-exit + +early-exit: + ret i32* null + +for.body: + %new = add i32 %old, 1 + store i32 %new, i32* %addr, align 4 + %inc = add nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc, %n + br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.body %split = phi i32* [ %addr, %for.body ] ret i32* null } declare i8* @not_malloc(i64) -; Negative test - not TLS -; CHECK-LABEL: @test_neg -define i32* @test_neg(i32 %n) { +; Negative test - not an allocation function. +define i32* @test_neg_not_malloc(i32 %n) { +; CHECK-LABEL: @test_neg_not_malloc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MEM:%.*]] = call dereferenceable(16) i8* @not_malloc(i64 16) +; CHECK-NEXT: [[ADDR:%.*]] = bitcast i8* [[MEM]] to i32* +; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: br label [[FOR_HEADER:%.*]] +; CHECK: for.header: +; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[OLD:%.*]] = load i32, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[GUARD:%.*]] = load volatile i8*, i8** @p, align 8 +; CHECK-NEXT: [[EXITCMP:%.*]] = icmp eq i8* [[GUARD]], null +; CHECK-NEXT: br i1 [[EXITCMP]], label [[FOR_BODY]], label [[EARLY_EXIT:%.*]] +; CHECK: early-exit: +; CHECK-NEXT: ret i32* null +; CHECK: for.body: +; CHECK-NEXT: [[NEW:%.*]] = add i32 [[OLD]], 1 +; CHECK-NEXT: store i32 [[NEW]], i32* [[ADDR]], align 4 +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_HEADER]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]] +; CHECK: for.cond.for.end_crit_edge: +; CHECK-NEXT: [[SPLIT:%.*]] = phi i32* [ [[ADDR]], [[FOR_BODY]] ] +; CHECK-NEXT: ret i32* null +; entry: ;; ignore the required null check for simplicity - %mem = call dereferenceable(16) noalias i8* @not_malloc(i64 16) + %mem = call dereferenceable(16) i8* @not_malloc(i64 16) %addr = bitcast i8* %mem to i32* br label %for.body.lr.ph @@ -115,13 +245,9 @@ for.header: br i1 %exitcmp, label %for.body, label %early-exit early-exit: -; CHECK-LABEL: early-exit: -; CHECK-NOT: store ret i32* null for.body: -; CHECK-LABEL: for.body: -; CHECK: store i32 %new, i32* %addr, align 4 %new = add i32 %old, 1 store i32 %new, i32* %addr, align 4 %inc = add nsw i32 %i.02, 1 @@ -129,16 +255,38 @@ for.body: br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge for.cond.for.end_crit_edge: ; preds = %for.body -; CHECK-LABEL: for.cond.for.end_crit_edge: -; CHECK-NOT: store %split = phi i32* [ %addr, %for.body ] ret i32* null } ; Negative test - can't speculate load since branch ; may control alignment -; CHECK-LABEL: @test_neg2 define i32* @test_neg2(i32 %n) { +; CHECK-LABEL: @test_neg2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MEM:%.*]] = call noalias dereferenceable(16) i8* @malloc(i64 16) +; CHECK-NEXT: [[ADDR:%.*]] = bitcast i8* [[MEM]] to i32* +; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: br label [[FOR_HEADER:%.*]] +; CHECK: for.header: +; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[GUARD:%.*]] = load volatile i8*, i8** @p, align 8 +; CHECK-NEXT: [[EXITCMP:%.*]] = icmp eq i8* [[GUARD]], null +; CHECK-NEXT: br i1 [[EXITCMP]], label [[FOR_BODY]], label [[EARLY_EXIT:%.*]] +; CHECK: early-exit: +; CHECK-NEXT: ret i32* null +; CHECK: for.body: +; CHECK-NEXT: [[OLD:%.*]] = load i32, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[NEW:%.*]] = add i32 [[OLD]], 1 +; CHECK-NEXT: store i32 [[NEW]], i32* [[ADDR]], align 4 +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_HEADER]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]] +; CHECK: for.cond.for.end_crit_edge: +; CHECK-NEXT: [[SPLIT:%.*]] = phi i32* [ [[ADDR]], [[FOR_BODY]] ] +; CHECK-NEXT: ret i32* null +; entry: ;; ignore the required null check for simplicity %mem = call dereferenceable(16) noalias i8* @malloc(i64 16) @@ -156,13 +304,9 @@ for.header: br i1 %exitcmp, label %for.body, label %early-exit early-exit: -; CHECK-LABEL: early-exit: -; CHECK-NOT: store ret i32* null for.body: -; CHECK-LABEL: for.body: -; CHECK: store i32 %new, i32* %addr, align 4 %old = load i32, i32* %addr, align 4 %new = add i32 %old, 1 store i32 %new, i32* %addr, align 4 @@ -171,8 +315,6 @@ for.body: br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge for.cond.for.end_crit_edge: ; preds = %for.body -; CHECK-LABEL: for.cond.for.end_crit_edge: -; CHECK-NOT: store %split = phi i32* [ %addr, %for.body ] ret i32* null } From df29318e667c718859947569c0eda9563ab13bc1 Mon Sep 17 00:00:00 2001 From: Prashant Kumar Date: Thu, 6 Jan 2022 16:12:41 +0530 Subject: [PATCH 788/992] [MLIR] Add division normalization by GCD in `getDivRepr` fn. This commits adds division normalization in the `getDivRepr` function which extracts the gcd from the dividend and divisor and normalizes them. Signed-off-by: Prashant Kumar Reviewed By: bondhugula Differential Revision: https://reviews.llvm.org/D115595 --- mlir/lib/Analysis/Presburger/Utils.cpp | 33 +++++++- .../Analysis/AffineStructuresTest.cpp | 82 +++++++++++++++++-- 2 files changed, 109 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Analysis/Presburger/Utils.cpp b/mlir/lib/Analysis/Presburger/Utils.cpp index 8fb9390a440e..840e91c8329b 100644 --- a/mlir/lib/Analysis/Presburger/Utils.cpp +++ b/mlir/lib/Analysis/Presburger/Utils.cpp @@ -13,9 +13,38 @@ #include "mlir/Analysis/Presburger/Utils.h" #include "mlir/Analysis/Presburger/IntegerPolyhedron.h" #include "mlir/Support/LogicalResult.h" +#include "mlir/Support/MathExtras.h" using namespace mlir; +/// Normalize a division's `dividend` and the `divisor` by their GCD. For +/// example: if the dividend and divisor are [2,0,4] and 4 respectively, +/// they get normalized to [1,0,2] and 2. +static void normalizeDivisionByGCD(SmallVectorImpl ÷nd, + unsigned &divisor) { + if (divisor == 0 || dividend.empty()) + return; + int64_t gcd = llvm::greatestCommonDivisor(dividend.front(), int64_t(divisor)); + + // The reason for ignoring the constant term is as follows. + // For a division: + // floor((a + m.f(x))/(m.d)) + // It can be replaced by: + // floor((floor(a/m) + f(x))/d) + // Since `{a/m}/d` in the dividend satisfies 0 <= {a/m}/d < 1/d, it will not + // influence the result of the floor division and thus, can be ignored. + for (size_t i = 1, m = dividend.size() - 1; i < m; i++) { + gcd = llvm::greatestCommonDivisor(dividend[i], gcd); + if (gcd == 1) + return; + } + + // Normalize the dividend and the denominator. + std::transform(dividend.begin(), dividend.end(), dividend.begin(), + [gcd](int64_t &n) { return floor(n / gcd); }); + divisor /= gcd; +} + /// Check if the pos^th identifier can be represented as a division using upper /// bound inequality at position `ubIneq` and lower bound inequality at position /// `lbIneq`. @@ -52,7 +81,8 @@ using namespace mlir; /// -divisor * id + expr - c >= 0 <-- Upper bound for 'id' /// /// If successful, `expr` is set to dividend of the division and `divisor` is -/// set to the denominator of the division. +/// set to the denominator of the division. The final division expression is +/// normalized by GCD. static LogicalResult getDivRepr(const IntegerPolyhedron &cst, unsigned pos, unsigned ubIneq, unsigned lbIneq, SmallVector &expr, @@ -101,6 +131,7 @@ static LogicalResult getDivRepr(const IntegerPolyhedron &cst, unsigned pos, // constant term of `expr`, minus `c`. From this, // constant term of `expr` = constant term of upper bound + `c`. expr.back() = cst.atIneq(ubIneq, cst.getNumCols() - 1) + c; + normalizeDivisionByGCD(expr, divisor); return success(); } diff --git a/mlir/unittests/Analysis/AffineStructuresTest.cpp b/mlir/unittests/Analysis/AffineStructuresTest.cpp index b83feddbcaaf..0d1d642cbf82 100644 --- a/mlir/unittests/Analysis/AffineStructuresTest.cpp +++ b/mlir/unittests/Analysis/AffineStructuresTest.cpp @@ -592,12 +592,12 @@ TEST(FlatAffineConstraintsTest, computeLocalReprConstantFloorDiv) { fac.addInequality({1, 2, -8, 1, 10}); fac.addEquality({1, 2, -4, 1, 10}); - fac.addLocalFloorDiv({0, 0, 0, 0, 10}, 30); - fac.addLocalFloorDiv({0, 0, 0, 0, 0, 99}, 101); + fac.addLocalFloorDiv({0, 0, 0, 0, 100}, 30); + fac.addLocalFloorDiv({0, 0, 0, 0, 0, 206}, 101); - std::vector> divisions = {{0, 0, 0, 0, 0, 0, 10}, - {0, 0, 0, 0, 0, 0, 99}}; - SmallVector denoms = {30, 101}; + std::vector> divisions = {{0, 0, 0, 0, 0, 0, 3}, + {0, 0, 0, 0, 0, 0, 2}}; + SmallVector denoms = {1, 1}; // Check if floordivs with constant numerator can be computed. checkDivisionRepresentation(fac, divisions, denoms); @@ -750,6 +750,31 @@ TEST(FlatAffineConstraintsTest, mergeDivisionsSimple) { EXPECT_EQ(fac1.getNumLocalIds(), 2u); EXPECT_EQ(fac2.getNumLocalIds(), 2u); } + + { + // Division Normalization test. + // (x) : (exists z, y = [x / 2] : x = 3y and x + z + 1 >= 0). + FlatAffineConstraints fac1(1, 0, 1); + // This division would be normalized. + fac1.addLocalFloorDiv({3, 0, 0}, 6); // y = [3x / 6] -> [x/2]. + fac1.addEquality({1, 0, -3, 0}); // x = 3z. + fac1.addInequality({1, 1, 0, 1}); // x + y + 1 >= 0. + + // (x) : (exists y = [x / 2], z : x = 5y). + FlatAffineConstraints fac2(1); + fac2.addLocalFloorDiv({1, 0}, 2); // y = [x / 2]. + fac2.addEquality({1, -5, 0}); // x = 5y. + fac2.appendLocalId(); // Add local id z. + + fac1.mergeLocalIds(fac2); + + // Local space should be same. + EXPECT_EQ(fac1.getNumLocalIds(), fac2.getNumLocalIds()); + + // One division should be matched + 2 unmatched local ids. + EXPECT_EQ(fac1.getNumLocalIds(), 3u); + EXPECT_EQ(fac2.getNumLocalIds(), 3u); + } } TEST(FlatAffineConstraintsTest, mergeDivisionsNestedDivsions) { @@ -800,6 +825,29 @@ TEST(FlatAffineConstraintsTest, mergeDivisionsNestedDivsions) { EXPECT_EQ(fac1.getNumLocalIds(), 3u); EXPECT_EQ(fac2.getNumLocalIds(), 3u); } + { + // (x) : (exists y = [x / 2], z = [x + y / 3]: y + z >= x). + FlatAffineConstraints fac1(1); + fac1.addLocalFloorDiv({2, 0}, 4); // y = [2x / 4] -> [x / 2]. + fac1.addLocalFloorDiv({1, 1, 0}, 3); // z = [x + y / 3]. + fac1.addInequality({-1, 1, 1, 0}); // y + z >= x. + + // (x) : (exists y = [x / 2], z = [x + y / 3]: y + z <= x). + FlatAffineConstraints fac2(1); + fac2.addLocalFloorDiv({1, 0}, 2); // y = [x / 2]. + // This division would be normalized. + fac2.addLocalFloorDiv({3, 3, 0}, 9); // z = [3x + 3y / 9] -> [x + y / 3]. + fac2.addInequality({1, -1, -1, 0}); // y + z <= x. + + fac1.mergeLocalIds(fac2); + + // Local space should be same. + EXPECT_EQ(fac1.getNumLocalIds(), fac2.getNumLocalIds()); + + // 2 divisions should be matched. + EXPECT_EQ(fac1.getNumLocalIds(), 2u); + EXPECT_EQ(fac2.getNumLocalIds(), 2u); + } } TEST(FlatAffineConstraintsTest, mergeDivisionsConstants) { @@ -821,6 +869,30 @@ TEST(FlatAffineConstraintsTest, mergeDivisionsConstants) { // Local space should be same. EXPECT_EQ(fac1.getNumLocalIds(), fac2.getNumLocalIds()); + // 2 divisions should be matched. + EXPECT_EQ(fac1.getNumLocalIds(), 2u); + EXPECT_EQ(fac2.getNumLocalIds(), 2u); + } + { + // (x) : (exists y = [x + 1 / 3], z = [x + 2 / 3]: y + z >= x). + FlatAffineConstraints fac1(1); + fac1.addLocalFloorDiv({1, 1}, 2); // y = [x + 1 / 2]. + // Normalization test. + fac1.addLocalFloorDiv({3, 0, 6}, 9); // z = [3x + 6 / 9] -> [x + 2 / 3]. + fac1.addInequality({-1, 1, 1, 0}); // y + z >= x. + + // (x) : (exists y = [x + 1 / 3], z = [x + 2 / 3]: y + z <= x). + FlatAffineConstraints fac2(1); + // Normalization test. + fac2.addLocalFloorDiv({2, 2}, 4); // y = [2x + 2 / 4] -> [x + 1 / 2]. + fac2.addLocalFloorDiv({1, 0, 2}, 3); // z = [x + 2 / 3]. + fac2.addInequality({1, -1, -1, 0}); // y + z <= x. + + fac1.mergeLocalIds(fac2); + + // Local space should be same. + EXPECT_EQ(fac1.getNumLocalIds(), fac2.getNumLocalIds()); + // 2 divisions should be matched. EXPECT_EQ(fac1.getNumLocalIds(), 2u); EXPECT_EQ(fac2.getNumLocalIds(), 2u); From ba927f66c0214f1353fc76b8f2aa8374c48bb13d Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 6 Jan 2022 11:02:14 +0000 Subject: [PATCH 789/992] [AArch64] Regenerate arith overflow test, and add a few more select tests. NFC --- llvm/test/CodeGen/AArch64/arm64-xaluo.ll | 2280 ++++++++++++++++++++-- 1 file changed, 2073 insertions(+), 207 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/arm64-xaluo.ll b/llvm/test/CodeGen/AArch64/arm64-xaluo.ll index d8f5db89954f..0ce5b8ab8e40 100644 --- a/llvm/test/CodeGen/AArch64/arm64-xaluo.ll +++ b/llvm/test/CodeGen/AArch64/arm64-xaluo.ll @@ -1,15 +1,35 @@ -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-enable-atomic-cfg-tidy=0 -disable-post-ra -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-enable-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort=1 -disable-post-ra -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-enable-atomic-cfg-tidy=0 -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -disable-post-ra -verify-machineinstrs | FileCheck %s --check-prefixes=GISEL,FALLBACK +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,SDAG +; RUN: llc < %s -mtriple=arm64-eabi -fast-isel -fast-isel-abort=1 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,FAST +; RUN: llc < %s -mtriple=arm64-eabi -global-isel -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,GISEL ; ; Get the actual value of the overflow bit. ; define zeroext i1 @saddo1.i32(i32 %v1, i32 %v2, i32* %res) { +; SDAG-LABEL: saddo1.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: adds w8, w0, w1 +; SDAG-NEXT: cset w0, vs +; SDAG-NEXT: str w8, [x2] +; SDAG-NEXT: ret +; +; FAST-LABEL: saddo1.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: adds w8, w0, w1 +; FAST-NEXT: cset w9, vs +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str w8, [x2] +; FAST-NEXT: ret +; +; GISEL-LABEL: saddo1.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: adds w8, w0, w1 +; GISEL-NEXT: cset w9, vs +; GISEL-NEXT: ubfx w0, w9, #0, #1 +; GISEL-NEXT: str w8, [x2] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: saddo1.i32 -; CHECK: adds {{w[0-9]+}}, w0, w1 -; CHECK-NEXT: cset {{w[0-9]+}}, vs %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 %obit = extractvalue {i32, i1} %t, 1 @@ -19,10 +39,29 @@ entry: ; Test the immediate version. define zeroext i1 @saddo2.i32(i32 %v1, i32* %res) { +; SDAG-LABEL: saddo2.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: adds w8, w0, #4 +; SDAG-NEXT: cset w0, vs +; SDAG-NEXT: str w8, [x1] +; SDAG-NEXT: ret +; +; FAST-LABEL: saddo2.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: adds w8, w0, #4 +; FAST-NEXT: cset w9, vs +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str w8, [x1] +; FAST-NEXT: ret +; +; GISEL-LABEL: saddo2.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: adds w8, w0, #4 +; GISEL-NEXT: cset w9, vs +; GISEL-NEXT: ubfx w0, w9, #0, #1 +; GISEL-NEXT: str w8, [x1] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: saddo2.i32 -; CHECK: adds {{w[0-9]+}}, w0, #4 -; CHECK-NEXT: cset {{w[0-9]+}}, vs %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 4) %val = extractvalue {i32, i1} %t, 0 %obit = extractvalue {i32, i1} %t, 1 @@ -32,10 +71,29 @@ entry: ; Test negative immediates. define zeroext i1 @saddo3.i32(i32 %v1, i32* %res) { +; SDAG-LABEL: saddo3.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: subs w8, w0, #4 +; SDAG-NEXT: cset w0, vs +; SDAG-NEXT: str w8, [x1] +; SDAG-NEXT: ret +; +; FAST-LABEL: saddo3.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: subs w8, w0, #4 +; FAST-NEXT: cset w9, vs +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str w8, [x1] +; FAST-NEXT: ret +; +; GISEL-LABEL: saddo3.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: subs w8, w0, #4 +; GISEL-NEXT: cset w9, vs +; GISEL-NEXT: ubfx w0, w9, #0, #1 +; GISEL-NEXT: str w8, [x1] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: saddo3.i32 -; CHECK: subs {{w[0-9]+}}, w0, #4 -; CHECK-NEXT: cset {{w[0-9]+}}, vs %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 -4) %val = extractvalue {i32, i1} %t, 0 %obit = extractvalue {i32, i1} %t, 1 @@ -45,10 +103,32 @@ entry: ; Test immediates that are too large to be encoded. define zeroext i1 @saddo4.i32(i32 %v1, i32* %res) { +; SDAG-LABEL: saddo4.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #16777215 +; SDAG-NEXT: adds w8, w0, w8 +; SDAG-NEXT: cset w0, vs +; SDAG-NEXT: str w8, [x1] +; SDAG-NEXT: ret +; +; FAST-LABEL: saddo4.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: mov w8, #16777215 +; FAST-NEXT: adds w8, w0, w8 +; FAST-NEXT: cset w9, vs +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str w8, [x1] +; FAST-NEXT: ret +; +; GISEL-LABEL: saddo4.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: mov w8, #16777215 +; GISEL-NEXT: adds w8, w0, w8 +; GISEL-NEXT: cset w9, vs +; GISEL-NEXT: ubfx w0, w9, #0, #1 +; GISEL-NEXT: str w8, [x1] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: saddo4.i32 -; CHECK: adds {{w[0-9]+}}, w0, {{w[0-9]+}} -; CHECK-NEXT: cset {{w[0-9]+}}, vs %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 16777215) %val = extractvalue {i32, i1} %t, 0 %obit = extractvalue {i32, i1} %t, 1 @@ -58,10 +138,29 @@ entry: ; Test shift folding. define zeroext i1 @saddo5.i32(i32 %v1, i32 %v2, i32* %res) { +; SDAG-LABEL: saddo5.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: adds w8, w0, w1, lsl #16 +; SDAG-NEXT: cset w0, vs +; SDAG-NEXT: str w8, [x2] +; SDAG-NEXT: ret +; +; FAST-LABEL: saddo5.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: adds w8, w0, w1, lsl #16 +; FAST-NEXT: cset w9, vs +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str w8, [x2] +; FAST-NEXT: ret +; +; GISEL-LABEL: saddo5.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: adds w8, w0, w1, lsl #16 +; GISEL-NEXT: cset w9, vs +; GISEL-NEXT: ubfx w0, w9, #0, #1 +; GISEL-NEXT: str w8, [x2] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: saddo5.i32 -; CHECK: adds {{w[0-9]+}}, w0, w1 -; CHECK-NEXT: cset {{w[0-9]+}}, vs %lsl = shl i32 %v2, 16 %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %lsl) %val = extractvalue {i32, i1} %t, 0 @@ -71,10 +170,29 @@ entry: } define zeroext i1 @saddo1.i64(i64 %v1, i64 %v2, i64* %res) { +; SDAG-LABEL: saddo1.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: adds x8, x0, x1 +; SDAG-NEXT: cset w0, vs +; SDAG-NEXT: str x8, [x2] +; SDAG-NEXT: ret +; +; FAST-LABEL: saddo1.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: adds x8, x0, x1 +; FAST-NEXT: cset w9, vs +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str x8, [x2] +; FAST-NEXT: ret +; +; GISEL-LABEL: saddo1.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: adds x8, x0, x1 +; GISEL-NEXT: cset w9, vs +; GISEL-NEXT: ubfx w0, w9, #0, #1 +; GISEL-NEXT: str x8, [x2] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: saddo1.i64 -; CHECK: adds {{x[0-9]+}}, x0, x1 -; CHECK-NEXT: cset {{w[0-9]+}}, vs %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -83,10 +201,29 @@ entry: } define zeroext i1 @saddo2.i64(i64 %v1, i64* %res) { +; SDAG-LABEL: saddo2.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: adds x8, x0, #4 +; SDAG-NEXT: cset w0, vs +; SDAG-NEXT: str x8, [x1] +; SDAG-NEXT: ret +; +; FAST-LABEL: saddo2.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: adds x8, x0, #4 +; FAST-NEXT: cset w9, vs +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str x8, [x1] +; FAST-NEXT: ret +; +; GISEL-LABEL: saddo2.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: adds x8, x0, #4 +; GISEL-NEXT: cset w9, vs +; GISEL-NEXT: ubfx w0, w9, #0, #1 +; GISEL-NEXT: str x8, [x1] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: saddo2.i64 -; CHECK: adds {{x[0-9]+}}, x0, #4 -; CHECK-NEXT: cset {{w[0-9]+}}, vs %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 4) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -95,10 +232,29 @@ entry: } define zeroext i1 @saddo3.i64(i64 %v1, i64* %res) { +; SDAG-LABEL: saddo3.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: subs x8, x0, #4 +; SDAG-NEXT: cset w0, vs +; SDAG-NEXT: str x8, [x1] +; SDAG-NEXT: ret +; +; FAST-LABEL: saddo3.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: subs x8, x0, #4 +; FAST-NEXT: cset w9, vs +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str x8, [x1] +; FAST-NEXT: ret +; +; GISEL-LABEL: saddo3.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: subs x8, x0, #4 +; GISEL-NEXT: cset w9, vs +; GISEL-NEXT: ubfx w0, w9, #0, #1 +; GISEL-NEXT: str x8, [x1] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: saddo3.i64 -; CHECK: subs {{x[0-9]+}}, x0, #4 -; CHECK-NEXT: cset {{w[0-9]+}}, vs %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -4) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -106,15 +262,30 @@ entry: ret i1 %obit } -; FALLBACK-NOT: remark{{.*}}uaddo.i32 define zeroext i1 @uaddo.i32(i32 %v1, i32 %v2, i32* %res) { +; SDAG-LABEL: uaddo.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: adds w8, w0, w1 +; SDAG-NEXT: cset w0, hs +; SDAG-NEXT: str w8, [x2] +; SDAG-NEXT: ret +; +; FAST-LABEL: uaddo.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: adds w8, w0, w1 +; FAST-NEXT: cset w9, hs +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str w8, [x2] +; FAST-NEXT: ret +; +; GISEL-LABEL: uaddo.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: adds w8, w0, w1 +; GISEL-NEXT: cset w9, hs +; GISEL-NEXT: ubfx w0, w9, #0, #1 +; GISEL-NEXT: str w8, [x2] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: uaddo.i32 -; CHECK: adds {{w[0-9]+}}, w0, w1 -; CHECK-NEXT: cset {{w[0-9]+}}, hs -; GISEL-LABEL: uaddo.i32 -; GISEL: adds {{w[0-9]+}}, w0, w1 -; GISEL-NEXT: cset {{w[0-9]+}}, hs %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 %obit = extractvalue {i32, i1} %t, 1 @@ -122,15 +293,30 @@ entry: ret i1 %obit } -; FALLBACK-NOT: remark{{.*}}uaddo.i64 define zeroext i1 @uaddo.i64(i64 %v1, i64 %v2, i64* %res) { +; SDAG-LABEL: uaddo.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: adds x8, x0, x1 +; SDAG-NEXT: cset w0, hs +; SDAG-NEXT: str x8, [x2] +; SDAG-NEXT: ret +; +; FAST-LABEL: uaddo.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: adds x8, x0, x1 +; FAST-NEXT: cset w9, hs +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str x8, [x2] +; FAST-NEXT: ret +; +; GISEL-LABEL: uaddo.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: adds x8, x0, x1 +; GISEL-NEXT: cset w9, hs +; GISEL-NEXT: ubfx w0, w9, #0, #1 +; GISEL-NEXT: str x8, [x2] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: uaddo.i64 -; CHECK: adds {{x[0-9]+}}, x0, x1 -; CHECK-NEXT: cset {{w[0-9]+}}, hs -; GISEL-LABEL: uaddo.i64 -; GISEL: adds {{x[0-9]+}}, x0, x1 -; GISEL-NEXT: cset {{w[0-9]+}}, hs %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -139,10 +325,29 @@ entry: } define zeroext i1 @ssubo1.i32(i32 %v1, i32 %v2, i32* %res) { +; SDAG-LABEL: ssubo1.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: subs w8, w0, w1 +; SDAG-NEXT: cset w0, vs +; SDAG-NEXT: str w8, [x2] +; SDAG-NEXT: ret +; +; FAST-LABEL: ssubo1.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: subs w8, w0, w1 +; FAST-NEXT: cset w9, vs +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str w8, [x2] +; FAST-NEXT: ret +; +; GISEL-LABEL: ssubo1.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: subs w8, w0, w1 +; GISEL-NEXT: cset w9, vs +; GISEL-NEXT: ubfx w0, w9, #0, #1 +; GISEL-NEXT: str w8, [x2] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: ssubo1.i32 -; CHECK: subs {{w[0-9]+}}, w0, w1 -; CHECK-NEXT: cset {{w[0-9]+}}, vs %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 %obit = extractvalue {i32, i1} %t, 1 @@ -151,10 +356,29 @@ entry: } define zeroext i1 @ssubo2.i32(i32 %v1, i32* %res) { +; SDAG-LABEL: ssubo2.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: adds w8, w0, #4 +; SDAG-NEXT: cset w0, vs +; SDAG-NEXT: str w8, [x1] +; SDAG-NEXT: ret +; +; FAST-LABEL: ssubo2.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: adds w8, w0, #4 +; FAST-NEXT: cset w9, vs +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str w8, [x1] +; FAST-NEXT: ret +; +; GISEL-LABEL: ssubo2.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: adds w8, w0, #4 +; GISEL-NEXT: cset w9, vs +; GISEL-NEXT: ubfx w0, w9, #0, #1 +; GISEL-NEXT: str w8, [x1] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: ssubo2.i32 -; CHECK: adds {{w[0-9]+}}, w0, #4 -; CHECK-NEXT: cset {{w[0-9]+}}, vs %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 -4) %val = extractvalue {i32, i1} %t, 0 %obit = extractvalue {i32, i1} %t, 1 @@ -163,10 +387,29 @@ entry: } define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) { +; SDAG-LABEL: ssubo.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: subs x8, x0, x1 +; SDAG-NEXT: cset w0, vs +; SDAG-NEXT: str x8, [x2] +; SDAG-NEXT: ret +; +; FAST-LABEL: ssubo.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: subs x8, x0, x1 +; FAST-NEXT: cset w9, vs +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str x8, [x2] +; FAST-NEXT: ret +; +; GISEL-LABEL: ssubo.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: subs x8, x0, x1 +; GISEL-NEXT: cset w9, vs +; GISEL-NEXT: ubfx w0, w9, #0, #1 +; GISEL-NEXT: str x8, [x2] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: ssubo.i64 -; CHECK: subs {{x[0-9]+}}, x0, x1 -; CHECK-NEXT: cset {{w[0-9]+}}, vs %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -175,10 +418,29 @@ entry: } define zeroext i1 @usubo.i32(i32 %v1, i32 %v2, i32* %res) { +; SDAG-LABEL: usubo.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: subs w8, w0, w1 +; SDAG-NEXT: cset w0, lo +; SDAG-NEXT: str w8, [x2] +; SDAG-NEXT: ret +; +; FAST-LABEL: usubo.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: subs w8, w0, w1 +; FAST-NEXT: cset w9, lo +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str w8, [x2] +; FAST-NEXT: ret +; +; GISEL-LABEL: usubo.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: subs w8, w0, w1 +; GISEL-NEXT: cset w9, lo +; GISEL-NEXT: ubfx w0, w9, #0, #1 +; GISEL-NEXT: str w8, [x2] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: usubo.i32 -; CHECK: subs {{w[0-9]+}}, w0, w1 -; CHECK-NEXT: cset {{w[0-9]+}}, lo %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 %obit = extractvalue {i32, i1} %t, 1 @@ -187,10 +449,29 @@ entry: } define zeroext i1 @usubo.i64(i64 %v1, i64 %v2, i64* %res) { +; SDAG-LABEL: usubo.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: subs x8, x0, x1 +; SDAG-NEXT: cset w0, lo +; SDAG-NEXT: str x8, [x2] +; SDAG-NEXT: ret +; +; FAST-LABEL: usubo.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: subs x8, x0, x1 +; FAST-NEXT: cset w9, lo +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str x8, [x2] +; FAST-NEXT: ret +; +; GISEL-LABEL: usubo.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: subs x8, x0, x1 +; GISEL-NEXT: cset w9, lo +; GISEL-NEXT: ubfx w0, w9, #0, #1 +; GISEL-NEXT: str x8, [x2] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: usubo.i64 -; CHECK: subs {{x[0-9]+}}, x0, x1 -; CHECK-NEXT: cset {{w[0-9]+}}, lo %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -199,11 +480,33 @@ entry: } define zeroext i1 @smulo.i32(i32 %v1, i32 %v2, i32* %res) { +; SDAG-LABEL: smulo.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: smull x8, w0, w1 +; SDAG-NEXT: cmp x8, w8, sxtw +; SDAG-NEXT: cset w0, ne +; SDAG-NEXT: str w8, [x2] +; SDAG-NEXT: ret +; +; FAST-LABEL: smulo.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: smull x8, w0, w1 +; FAST-NEXT: cmp x8, w8, sxtw +; FAST-NEXT: cset w9, ne +; FAST-NEXT: str w8, [x2] +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: ret +; +; GISEL-LABEL: smulo.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: smull x8, w0, w1 +; GISEL-NEXT: mul w9, w0, w1 +; GISEL-NEXT: asr x8, x8, #32 +; GISEL-NEXT: cmp w8, w9, asr #31 +; GISEL-NEXT: cset w0, ne +; GISEL-NEXT: str w9, [x2] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: smulo.i32 -; CHECK: smull x[[MREG:[0-9]+]], w0, w1 -; CHECK-NEXT: cmp x[[MREG]], w[[MREG]], sxtw -; CHECK-NEXT: cset {{w[0-9]+}}, ne %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 %obit = extractvalue {i32, i1} %t, 1 @@ -212,12 +515,34 @@ entry: } define zeroext i1 @smulo.i64(i64 %v1, i64 %v2, i64* %res) { +; SDAG-LABEL: smulo.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mul x8, x0, x1 +; SDAG-NEXT: smulh x9, x0, x1 +; SDAG-NEXT: cmp x9, x8, asr #63 +; SDAG-NEXT: cset w0, ne +; SDAG-NEXT: str x8, [x2] +; SDAG-NEXT: ret +; +; FAST-LABEL: smulo.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: mul x8, x0, x1 +; FAST-NEXT: smulh x9, x0, x1 +; FAST-NEXT: cmp x9, x8, asr #63 +; FAST-NEXT: cset w9, ne +; FAST-NEXT: str x8, [x2] +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: ret +; +; GISEL-LABEL: smulo.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: mul x8, x0, x1 +; GISEL-NEXT: smulh x9, x0, x1 +; GISEL-NEXT: cmp x9, x8, asr #63 +; GISEL-NEXT: cset w0, ne +; GISEL-NEXT: str x8, [x2] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: smulo.i64 -; CHECK: mul [[MREG:x[0-9]+]], x0, x1 -; CHECK-NEXT: smulh [[HREG:x[0-9]+]], x0, x1 -; CHECK-NEXT: cmp [[HREG]], [[MREG]], asr #63 -; CHECK-NEXT: cset {{w[0-9]+}}, ne %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -226,10 +551,29 @@ entry: } define zeroext i1 @smulo2.i64(i64 %v1, i64* %res) { +; SDAG-LABEL: smulo2.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: adds x8, x0, x0 +; SDAG-NEXT: cset w0, vs +; SDAG-NEXT: str x8, [x1] +; SDAG-NEXT: ret +; +; FAST-LABEL: smulo2.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: adds x8, x0, x0 +; FAST-NEXT: cset w9, vs +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str x8, [x1] +; FAST-NEXT: ret +; +; GISEL-LABEL: smulo2.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: adds x8, x0, x0 +; GISEL-NEXT: cset w9, vs +; GISEL-NEXT: ubfx w0, w9, #0, #1 +; GISEL-NEXT: str x8, [x1] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: smulo2.i64 -; CHECK: adds [[MREG:x[0-9]+]], x0, x0 -; CHECK-NEXT: cset {{w[0-9]+}}, vs %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 2) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -238,11 +582,34 @@ entry: } define zeroext i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) { +; SDAG-LABEL: umulo.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: umull x8, w0, w1 +; SDAG-NEXT: tst x8, #0xffffffff00000000 +; SDAG-NEXT: cset w0, ne +; SDAG-NEXT: str w8, [x2] +; SDAG-NEXT: ret +; +; FAST-LABEL: umulo.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: umull x8, w0, w1 +; FAST-NEXT: tst x8, #0xffffffff00000000 +; FAST-NEXT: cset w9, ne +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str w8, [x2] +; FAST-NEXT: ret +; +; GISEL-LABEL: umulo.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: umull x8, w0, w1 +; GISEL-NEXT: mul w9, w0, w1 +; GISEL-NEXT: lsr x8, x8, #32 +; GISEL-NEXT: cmp w8, #0 +; GISEL-NEXT: cset w8, ne +; GISEL-NEXT: mov w0, w8 +; GISEL-NEXT: str w9, [x2] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: umulo.i32 -; CHECK: umull [[MREG:x[0-9]+]], w0, w1 -; CHECK-NEXT: tst [[MREG]], #0xffffffff00000000 -; CHECK-NEXT: cset {{w[0-9]+}}, ne %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 %obit = extractvalue {i32, i1} %t, 1 @@ -251,11 +618,37 @@ entry: } define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, i64* %res) { +; SDAG-LABEL: umulo.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: umulh x8, x0, x1 +; SDAG-NEXT: mul x9, x0, x1 +; SDAG-NEXT: cmp xzr, x8 +; SDAG-NEXT: cset w8, ne +; SDAG-NEXT: mov w0, w8 +; SDAG-NEXT: str x9, [x2] +; SDAG-NEXT: ret +; +; FAST-LABEL: umulo.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: umulh x8, x0, x1 +; FAST-NEXT: mul x9, x0, x1 +; FAST-NEXT: cmp xzr, x8 +; FAST-NEXT: cset w8, ne +; FAST-NEXT: and w8, w8, #0x1 +; FAST-NEXT: mov w0, w8 +; FAST-NEXT: str x9, [x2] +; FAST-NEXT: ret +; +; GISEL-LABEL: umulo.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: umulh x8, x0, x1 +; GISEL-NEXT: mul x9, x0, x1 +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: cset w8, ne +; GISEL-NEXT: mov w0, w8 +; GISEL-NEXT: str x9, [x2] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: umulo.i64 -; CHECK: umulh [[MREG:x[0-9]+]], x0, x1 -; CHECK-NEXT: cmp xzr, [[MREG]] -; CHECK-NEXT: cset {{w[0-9]+}}, ne %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -264,10 +657,29 @@ entry: } define zeroext i1 @umulo2.i64(i64 %v1, i64* %res) { +; SDAG-LABEL: umulo2.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: adds x8, x0, x0 +; SDAG-NEXT: cset w0, hs +; SDAG-NEXT: str x8, [x1] +; SDAG-NEXT: ret +; +; FAST-LABEL: umulo2.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: adds x8, x0, x0 +; FAST-NEXT: cset w9, hs +; FAST-NEXT: and w0, w9, #0x1 +; FAST-NEXT: str x8, [x1] +; FAST-NEXT: ret +; +; GISEL-LABEL: umulo2.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: adds x8, x0, x0 +; GISEL-NEXT: cset w9, hs +; GISEL-NEXT: ubfx w0, w9, #0, #1 +; GISEL-NEXT: str x8, [x1] +; GISEL-NEXT: ret entry: -; CHECK-LABEL: umulo2.i64 -; CHECK: adds [[MREG:x[0-9]+]], x0, x0 -; CHECK-NEXT: cset {{w[0-9]+}}, hs %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 2) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -280,10 +692,26 @@ entry: ; Check the use of the overflow bit in combination with a select instruction. ; define i32 @saddo.select.i32(i32 %v1, i32 %v2) { +; SDAG-LABEL: saddo.select.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmn w0, w1 +; SDAG-NEXT: csel w0, w0, w1, vs +; SDAG-NEXT: ret +; +; FAST-LABEL: saddo.select.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmn w0, w1 +; FAST-NEXT: csel w0, w0, w1, vs +; FAST-NEXT: ret +; +; GISEL-LABEL: saddo.select.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmn w0, w1 +; GISEL-NEXT: cset w8, vs +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel w0, w0, w1, ne +; GISEL-NEXT: ret entry: -; CHECK-LABEL: saddo.select.i32 -; CHECK: cmn w0, w1 -; CHECK-NEXT: csel w0, w0, w1, vs %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 %ret = select i1 %obit, i32 %v1, i32 %v2 @@ -291,10 +719,26 @@ entry: } define i1 @saddo.not.i32(i32 %v1, i32 %v2) { +; SDAG-LABEL: saddo.not.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmn w0, w1 +; SDAG-NEXT: cset w0, vc +; SDAG-NEXT: ret +; +; FAST-LABEL: saddo.not.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmn w0, w1 +; FAST-NEXT: cset w0, vc +; FAST-NEXT: ret +; +; GISEL-LABEL: saddo.not.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmn w0, w1 +; GISEL-NEXT: cset w8, vs +; GISEL-NEXT: eor w8, w8, #0x1 +; GISEL-NEXT: and w0, w8, #0x1 +; GISEL-NEXT: ret entry: -; CHECK-LABEL: saddo.not.i32 -; CHECK: cmn w0, w1 -; CHECK-NEXT: cset w0, vc %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 %ret = xor i1 %obit, true @@ -302,10 +746,26 @@ entry: } define i64 @saddo.select.i64(i64 %v1, i64 %v2) { +; SDAG-LABEL: saddo.select.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmn x0, x1 +; SDAG-NEXT: csel x0, x0, x1, vs +; SDAG-NEXT: ret +; +; FAST-LABEL: saddo.select.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmn x0, x1 +; FAST-NEXT: csel x0, x0, x1, vs +; FAST-NEXT: ret +; +; GISEL-LABEL: saddo.select.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmn x0, x1 +; GISEL-NEXT: cset w8, vs +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel x0, x0, x1, ne +; GISEL-NEXT: ret entry: -; CHECK-LABEL: saddo.select.i64 -; CHECK: cmn x0, x1 -; CHECK-NEXT: csel x0, x0, x1, vs %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 %ret = select i1 %obit, i64 %v1, i64 %v2 @@ -313,10 +773,26 @@ entry: } define i1 @saddo.not.i64(i64 %v1, i64 %v2) { +; SDAG-LABEL: saddo.not.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmn x0, x1 +; SDAG-NEXT: cset w0, vc +; SDAG-NEXT: ret +; +; FAST-LABEL: saddo.not.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmn x0, x1 +; FAST-NEXT: cset w0, vc +; FAST-NEXT: ret +; +; GISEL-LABEL: saddo.not.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmn x0, x1 +; GISEL-NEXT: cset w8, vs +; GISEL-NEXT: eor w8, w8, #0x1 +; GISEL-NEXT: and w0, w8, #0x1 +; GISEL-NEXT: ret entry: -; CHECK-LABEL: saddo.not.i64 -; CHECK: cmn x0, x1 -; CHECK-NEXT: cset w0, vc %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 %ret = xor i1 %obit, true @@ -324,10 +800,26 @@ entry: } define i32 @uaddo.select.i32(i32 %v1, i32 %v2) { +; SDAG-LABEL: uaddo.select.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmn w0, w1 +; SDAG-NEXT: csel w0, w0, w1, hs +; SDAG-NEXT: ret +; +; FAST-LABEL: uaddo.select.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmn w0, w1 +; FAST-NEXT: csel w0, w0, w1, hs +; FAST-NEXT: ret +; +; GISEL-LABEL: uaddo.select.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmn w0, w1 +; GISEL-NEXT: cset w8, hs +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel w0, w0, w1, ne +; GISEL-NEXT: ret entry: -; CHECK-LABEL: uaddo.select.i32 -; CHECK: cmn w0, w1 -; CHECK-NEXT: csel w0, w0, w1, hs %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 %ret = select i1 %obit, i32 %v1, i32 %v2 @@ -335,10 +827,26 @@ entry: } define i1 @uaddo.not.i32(i32 %v1, i32 %v2) { +; SDAG-LABEL: uaddo.not.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmn w0, w1 +; SDAG-NEXT: cset w0, lo +; SDAG-NEXT: ret +; +; FAST-LABEL: uaddo.not.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmn w0, w1 +; FAST-NEXT: cset w0, lo +; FAST-NEXT: ret +; +; GISEL-LABEL: uaddo.not.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmn w0, w1 +; GISEL-NEXT: cset w8, hs +; GISEL-NEXT: eor w8, w8, #0x1 +; GISEL-NEXT: and w0, w8, #0x1 +; GISEL-NEXT: ret entry: -; CHECK-LABEL: uaddo.not.i32 -; CHECK: cmn w0, w1 -; CHECK-NEXT: cset w0, lo %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 %ret = xor i1 %obit, true @@ -346,10 +854,26 @@ entry: } define i64 @uaddo.select.i64(i64 %v1, i64 %v2) { +; SDAG-LABEL: uaddo.select.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmn x0, x1 +; SDAG-NEXT: csel x0, x0, x1, hs +; SDAG-NEXT: ret +; +; FAST-LABEL: uaddo.select.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmn x0, x1 +; FAST-NEXT: csel x0, x0, x1, hs +; FAST-NEXT: ret +; +; GISEL-LABEL: uaddo.select.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmn x0, x1 +; GISEL-NEXT: cset w8, hs +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel x0, x0, x1, ne +; GISEL-NEXT: ret entry: -; CHECK-LABEL: uaddo.select.i64 -; CHECK: cmn x0, x1 -; CHECK-NEXT: csel x0, x0, x1, hs %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 %ret = select i1 %obit, i64 %v1, i64 %v2 @@ -357,10 +881,26 @@ entry: } define i1 @uaddo.not.i64(i64 %v1, i64 %v2) { +; SDAG-LABEL: uaddo.not.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmn x0, x1 +; SDAG-NEXT: cset w0, lo +; SDAG-NEXT: ret +; +; FAST-LABEL: uaddo.not.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmn x0, x1 +; FAST-NEXT: cset w0, lo +; FAST-NEXT: ret +; +; GISEL-LABEL: uaddo.not.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmn x0, x1 +; GISEL-NEXT: cset w8, hs +; GISEL-NEXT: eor w8, w8, #0x1 +; GISEL-NEXT: and w0, w8, #0x1 +; GISEL-NEXT: ret entry: -; CHECK-LABEL: uaddo.not.i64 -; CHECK: cmn x0, x1 -; CHECK-NEXT: cset w0, lo %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 %ret = xor i1 %obit, true @@ -368,10 +908,26 @@ entry: } define i32 @ssubo.select.i32(i32 %v1, i32 %v2) { +; SDAG-LABEL: ssubo.select.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmp w0, w1 +; SDAG-NEXT: csel w0, w0, w1, vs +; SDAG-NEXT: ret +; +; FAST-LABEL: ssubo.select.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmp w0, w1 +; FAST-NEXT: csel w0, w0, w1, vs +; FAST-NEXT: ret +; +; GISEL-LABEL: ssubo.select.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmp w0, w1 +; GISEL-NEXT: cset w8, vs +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel w0, w0, w1, ne +; GISEL-NEXT: ret entry: -; CHECK-LABEL: ssubo.select.i32 -; CHECK: cmp w0, w1 -; CHECK-NEXT: csel w0, w0, w1, vs %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 %ret = select i1 %obit, i32 %v1, i32 %v2 @@ -379,10 +935,26 @@ entry: } define i1 @ssubo.not.i32(i32 %v1, i32 %v2) { +; SDAG-LABEL: ssubo.not.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmp w0, w1 +; SDAG-NEXT: cset w0, vc +; SDAG-NEXT: ret +; +; FAST-LABEL: ssubo.not.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmp w0, w1 +; FAST-NEXT: cset w0, vc +; FAST-NEXT: ret +; +; GISEL-LABEL: ssubo.not.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmp w0, w1 +; GISEL-NEXT: cset w8, vs +; GISEL-NEXT: eor w8, w8, #0x1 +; GISEL-NEXT: and w0, w8, #0x1 +; GISEL-NEXT: ret entry: -; CHECK-LABEL: ssubo.not.i32 -; CHECK: cmp w0, w1 -; CHECK-NEXT: cset w0, vc %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 %ret = xor i1 %obit, true @@ -390,10 +962,26 @@ entry: } define i64 @ssubo.select.i64(i64 %v1, i64 %v2) { +; SDAG-LABEL: ssubo.select.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmp x0, x1 +; SDAG-NEXT: csel x0, x0, x1, vs +; SDAG-NEXT: ret +; +; FAST-LABEL: ssubo.select.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmp x0, x1 +; FAST-NEXT: csel x0, x0, x1, vs +; FAST-NEXT: ret +; +; GISEL-LABEL: ssubo.select.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmp x0, x1 +; GISEL-NEXT: cset w8, vs +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel x0, x0, x1, ne +; GISEL-NEXT: ret entry: -; CHECK-LABEL: ssubo.select.i64 -; CHECK: cmp x0, x1 -; CHECK-NEXT: csel x0, x0, x1, vs %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 %ret = select i1 %obit, i64 %v1, i64 %v2 @@ -401,10 +989,26 @@ entry: } define i1 @ssub.not.i64(i64 %v1, i64 %v2) { +; SDAG-LABEL: ssub.not.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmp x0, x1 +; SDAG-NEXT: cset w0, vc +; SDAG-NEXT: ret +; +; FAST-LABEL: ssub.not.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmp x0, x1 +; FAST-NEXT: cset w0, vc +; FAST-NEXT: ret +; +; GISEL-LABEL: ssub.not.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmp x0, x1 +; GISEL-NEXT: cset w8, vs +; GISEL-NEXT: eor w8, w8, #0x1 +; GISEL-NEXT: and w0, w8, #0x1 +; GISEL-NEXT: ret entry: -; CHECK-LABEL: ssub.not.i64 -; CHECK: cmp x0, x1 -; CHECK-NEXT: cset w0, vc %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 %ret = xor i1 %obit, true @@ -412,10 +1016,26 @@ entry: } define i32 @usubo.select.i32(i32 %v1, i32 %v2) { +; SDAG-LABEL: usubo.select.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmp w0, w1 +; SDAG-NEXT: csel w0, w0, w1, lo +; SDAG-NEXT: ret +; +; FAST-LABEL: usubo.select.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmp w0, w1 +; FAST-NEXT: csel w0, w0, w1, lo +; FAST-NEXT: ret +; +; GISEL-LABEL: usubo.select.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmp w0, w1 +; GISEL-NEXT: cset w8, lo +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel w0, w0, w1, ne +; GISEL-NEXT: ret entry: -; CHECK-LABEL: usubo.select.i32 -; CHECK: cmp w0, w1 -; CHECK-NEXT: csel w0, w0, w1, lo %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 %ret = select i1 %obit, i32 %v1, i32 %v2 @@ -423,10 +1043,26 @@ entry: } define i1 @usubo.not.i32(i32 %v1, i32 %v2) { +; SDAG-LABEL: usubo.not.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmp w0, w1 +; SDAG-NEXT: cset w0, hs +; SDAG-NEXT: ret +; +; FAST-LABEL: usubo.not.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmp w0, w1 +; FAST-NEXT: cset w0, hs +; FAST-NEXT: ret +; +; GISEL-LABEL: usubo.not.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmp w0, w1 +; GISEL-NEXT: cset w8, lo +; GISEL-NEXT: eor w8, w8, #0x1 +; GISEL-NEXT: and w0, w8, #0x1 +; GISEL-NEXT: ret entry: -; CHECK-LABEL: usubo.not.i32 -; CHECK: cmp w0, w1 -; CHECK-NEXT: cset w0, hs %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 %ret = xor i1 %obit, true @@ -434,10 +1070,26 @@ entry: } define i64 @usubo.select.i64(i64 %v1, i64 %v2) { +; SDAG-LABEL: usubo.select.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmp x0, x1 +; SDAG-NEXT: csel x0, x0, x1, lo +; SDAG-NEXT: ret +; +; FAST-LABEL: usubo.select.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmp x0, x1 +; FAST-NEXT: csel x0, x0, x1, lo +; FAST-NEXT: ret +; +; GISEL-LABEL: usubo.select.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmp x0, x1 +; GISEL-NEXT: cset w8, lo +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel x0, x0, x1, ne +; GISEL-NEXT: ret entry: -; CHECK-LABEL: usubo.select.i64 -; CHECK: cmp x0, x1 -; CHECK-NEXT: csel x0, x0, x1, lo %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 %ret = select i1 %obit, i64 %v1, i64 %v2 @@ -445,10 +1097,26 @@ entry: } define i1 @usubo.not.i64(i64 %v1, i64 %v2) { +; SDAG-LABEL: usubo.not.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmp x0, x1 +; SDAG-NEXT: cset w0, hs +; SDAG-NEXT: ret +; +; FAST-LABEL: usubo.not.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmp x0, x1 +; FAST-NEXT: cset w0, hs +; FAST-NEXT: ret +; +; GISEL-LABEL: usubo.not.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmp x0, x1 +; GISEL-NEXT: cset w8, lo +; GISEL-NEXT: eor w8, w8, #0x1 +; GISEL-NEXT: and w0, w8, #0x1 +; GISEL-NEXT: ret entry: -; CHECK-LABEL: usubo.not.i64 -; CHECK: cmp x0, x1 -; CHECK-NEXT: cset w0, hs %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 %ret = xor i1 %obit, true @@ -456,11 +1124,29 @@ entry: } define i32 @smulo.select.i32(i32 %v1, i32 %v2) { +; SDAG-LABEL: smulo.select.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: smull x8, w0, w1 +; SDAG-NEXT: cmp x8, w8, sxtw +; SDAG-NEXT: csel w0, w0, w1, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: smulo.select.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: smull x8, w0, w1 +; FAST-NEXT: cmp x8, w8, sxtw +; FAST-NEXT: csel w0, w0, w1, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: smulo.select.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: smull x8, w0, w1 +; GISEL-NEXT: mul w9, w0, w1 +; GISEL-NEXT: asr x8, x8, #32 +; GISEL-NEXT: cmp w8, w9, asr #31 +; GISEL-NEXT: csel w0, w0, w1, ne +; GISEL-NEXT: ret entry: -; CHECK-LABEL: smulo.select.i32 -; CHECK: smull x[[MREG:[0-9]+]], w0, w1 -; CHECK-NEXT: cmp x[[MREG]], w[[MREG]], sxtw -; CHECK-NEXT: csel w0, w0, w1, ne %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 %ret = select i1 %obit, i32 %v1, i32 %v2 @@ -468,11 +1154,30 @@ entry: } define i1 @smulo.not.i32(i32 %v1, i32 %v2) { +; SDAG-LABEL: smulo.not.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: smull x8, w0, w1 +; SDAG-NEXT: cmp x8, w8, sxtw +; SDAG-NEXT: cset w0, eq +; SDAG-NEXT: ret +; +; FAST-LABEL: smulo.not.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: smull x8, w0, w1 +; FAST-NEXT: cmp x8, w8, sxtw +; FAST-NEXT: cset w0, eq +; FAST-NEXT: ret +; +; GISEL-LABEL: smulo.not.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: smull x8, w0, w1 +; GISEL-NEXT: mul w9, w0, w1 +; GISEL-NEXT: asr x8, x8, #32 +; GISEL-NEXT: cmp w8, w9, asr #31 +; GISEL-NEXT: cset w8, ne +; GISEL-NEXT: eor w0, w8, #0x1 +; GISEL-NEXT: ret entry: -; CHECK-LABEL: smulo.not.i32 -; CHECK: smull x[[MREG:[0-9]+]], w0, w1 -; CHECK-NEXT: cmp x[[MREG]], w[[MREG]], sxtw -; CHECK-NEXT: cset w0, eq %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 %ret = xor i1 %obit, true @@ -480,12 +1185,14 @@ entry: } define i64 @smulo.select.i64(i64 %v1, i64 %v2) { +; CHECK-LABEL: smulo.select.i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x0, x1 +; CHECK-NEXT: smulh x9, x0, x1 +; CHECK-NEXT: cmp x9, x8, asr #63 +; CHECK-NEXT: csel x0, x0, x1, ne +; CHECK-NEXT: ret entry: -; CHECK-LABEL: smulo.select.i64 -; CHECK: mul [[MREG:x[0-9]+]], x0, x1 -; CHECK-NEXT: smulh [[HREG:x[0-9]+]], x0, x1 -; CHECK-NEXT: cmp [[HREG]], [[MREG]], asr #63 -; CHECK-NEXT: csel x0, x0, x1, ne %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 %ret = select i1 %obit, i64 %v1, i64 %v2 @@ -493,12 +1200,31 @@ entry: } define i1 @smulo.not.i64(i64 %v1, i64 %v2) { +; SDAG-LABEL: smulo.not.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mul x8, x0, x1 +; SDAG-NEXT: smulh x9, x0, x1 +; SDAG-NEXT: cmp x9, x8, asr #63 +; SDAG-NEXT: cset w0, eq +; SDAG-NEXT: ret +; +; FAST-LABEL: smulo.not.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: mul x8, x0, x1 +; FAST-NEXT: smulh x9, x0, x1 +; FAST-NEXT: cmp x9, x8, asr #63 +; FAST-NEXT: cset w0, eq +; FAST-NEXT: ret +; +; GISEL-LABEL: smulo.not.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: mul x8, x0, x1 +; GISEL-NEXT: smulh x9, x0, x1 +; GISEL-NEXT: cmp x9, x8, asr #63 +; GISEL-NEXT: cset w8, ne +; GISEL-NEXT: eor w0, w8, #0x1 +; GISEL-NEXT: ret entry: -; CHECK-LABEL: smulo.not.i64 -; CHECK: mul [[MREG:x[0-9]+]], x0, x1 -; CHECK-NEXT: smulh [[HREG:x[0-9]+]], x0, x1 -; CHECK-NEXT: cmp [[HREG]], [[MREG]], asr #63 -; CHECK-NEXT: cset w0, eq %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 %ret = xor i1 %obit, true @@ -506,11 +1232,28 @@ entry: } define i32 @umulo.select.i32(i32 %v1, i32 %v2) { +; SDAG-LABEL: umulo.select.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: umull x8, w0, w1 +; SDAG-NEXT: tst x8, #0xffffffff00000000 +; SDAG-NEXT: csel w0, w0, w1, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: umulo.select.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: umull x8, w0, w1 +; FAST-NEXT: tst x8, #0xffffffff00000000 +; FAST-NEXT: csel w0, w0, w1, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: umulo.select.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: umull x8, w0, w1 +; GISEL-NEXT: lsr x8, x8, #32 +; GISEL-NEXT: cmp w8, #0 +; GISEL-NEXT: csel w0, w0, w1, ne +; GISEL-NEXT: ret entry: -; CHECK-LABEL: umulo.select.i32 -; CHECK: umull [[MREG:x[0-9]+]], w0, w1 -; CHECK-NEXT: tst [[MREG]], #0xffffffff00000000 -; CHECK-NEXT: csel w0, w0, w1, ne %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 %ret = select i1 %obit, i32 %v1, i32 %v2 @@ -518,11 +1261,29 @@ entry: } define i1 @umulo.not.i32(i32 %v1, i32 %v2) { +; SDAG-LABEL: umulo.not.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: umull x8, w0, w1 +; SDAG-NEXT: tst x8, #0xffffffff00000000 +; SDAG-NEXT: cset w0, eq +; SDAG-NEXT: ret +; +; FAST-LABEL: umulo.not.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: umull x8, w0, w1 +; FAST-NEXT: tst x8, #0xffffffff00000000 +; FAST-NEXT: cset w0, eq +; FAST-NEXT: ret +; +; GISEL-LABEL: umulo.not.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: umull x8, w0, w1 +; GISEL-NEXT: lsr x8, x8, #32 +; GISEL-NEXT: cmp w8, #0 +; GISEL-NEXT: cset w8, ne +; GISEL-NEXT: eor w0, w8, #0x1 +; GISEL-NEXT: ret entry: -; CHECK-LABEL: umulo.not.i32 -; CHECK: umull [[MREG:x[0-9]+]], w0, w1 -; CHECK-NEXT: tst [[MREG]], #0xffffffff00000000 -; CHECK-NEXT: cset w0, eq %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 %ret = xor i1 %obit, true @@ -530,11 +1291,27 @@ entry: } define i64 @umulo.select.i64(i64 %v1, i64 %v2) { +; SDAG-LABEL: umulo.select.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: umulh x8, x0, x1 +; SDAG-NEXT: cmp xzr, x8 +; SDAG-NEXT: csel x0, x0, x1, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: umulo.select.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: umulh x8, x0, x1 +; FAST-NEXT: cmp xzr, x8 +; FAST-NEXT: csel x0, x0, x1, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: umulo.select.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: umulh x8, x0, x1 +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x0, x0, x1, ne +; GISEL-NEXT: ret entry: -; CHECK-LABEL: umulo.select.i64 -; CHECK: umulh [[MREG:x[0-9]+]], x0, x1 -; CHECK-NEXT: cmp xzr, [[MREG]] -; CHECK-NEXT: csel x0, x0, x1, ne %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 %ret = select i1 %obit, i64 %v1, i64 %v2 @@ -542,11 +1319,28 @@ entry: } define i1 @umulo.not.i64(i64 %v1, i64 %v2) { +; SDAG-LABEL: umulo.not.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: umulh x8, x0, x1 +; SDAG-NEXT: cmp xzr, x8 +; SDAG-NEXT: cset w0, eq +; SDAG-NEXT: ret +; +; FAST-LABEL: umulo.not.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: umulh x8, x0, x1 +; FAST-NEXT: cmp xzr, x8 +; FAST-NEXT: cset w0, eq +; FAST-NEXT: ret +; +; GISEL-LABEL: umulo.not.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: umulh x8, x0, x1 +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: cset w8, ne +; GISEL-NEXT: eor w0, w8, #0x1 +; GISEL-NEXT: ret entry: -; CHECK-LABEL: umulo.not.i64 -; CHECK: umulh [[MREG:x[0-9]+]], x0, x1 -; CHECK-NEXT: cmp xzr, [[MREG]] -; CHECK-NEXT: cset w0, eq %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 %ret = xor i1 %obit, true @@ -554,14 +1348,821 @@ entry: } +define i8 @uaddo.selectboth.i8(i8 %a, i8 %b) { +; SDAG-LABEL: uaddo.selectboth.i8: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: and w8, w0, #0xff +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: add w8, w8, w1, uxtb +; SDAG-NEXT: tst w8, #0x100 +; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: uaddo.selectboth.i8: +; FAST: // %bb.0: // %entry +; FAST-NEXT: and w8, w0, #0xff +; FAST-NEXT: mov w9, #10 +; FAST-NEXT: add w8, w8, w1, uxtb +; FAST-NEXT: tst w8, #0x100 +; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: uaddo.selectboth.i8: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: and w8, w1, #0xff +; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: add w8, w8, w0, uxtb +; GISEL-NEXT: cmp w8, w8, uxtb +; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: ret +entry: + %m = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %a, i8 %b) + %m1 = extractvalue { i8, i1 } %m, 0 + %m2 = extractvalue { i8, i1 } %m, 1 + %r = select i1 %m2, i8 %m1, i8 10 + ret i8 %r +} + +define i8 @saddo.selectboth.i8(i8 %a, i8 %b) { +; SDAG-LABEL: saddo.selectboth.i8: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: sxtb w8, w0 +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: add w8, w8, w1, sxtb +; SDAG-NEXT: cmp w8, w8, sxtb +; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: saddo.selectboth.i8: +; FAST: // %bb.0: // %entry +; FAST-NEXT: sxtb w8, w0 +; FAST-NEXT: mov w9, #10 +; FAST-NEXT: add w8, w8, w1, sxtb +; FAST-NEXT: cmp w8, w8, sxtb +; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: saddo.selectboth.i8: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: sxtb w8, w1 +; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: add w8, w8, w0, sxtb +; GISEL-NEXT: cmp w8, w8, sxtb +; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: ret +entry: + %m = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %a, i8 %b) + %m1 = extractvalue { i8, i1 } %m, 0 + %m2 = extractvalue { i8, i1 } %m, 1 + %r = select i1 %m2, i8 %m1, i8 10 + ret i8 %r +} + +define i16 @uaddo.selectboth.i16(i16 %a, i16 %b) { +; SDAG-LABEL: uaddo.selectboth.i16: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: and w8, w0, #0xffff +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: add w8, w8, w1, uxth +; SDAG-NEXT: tst w8, #0x10000 +; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: uaddo.selectboth.i16: +; FAST: // %bb.0: // %entry +; FAST-NEXT: and w8, w0, #0xffff +; FAST-NEXT: mov w9, #10 +; FAST-NEXT: add w8, w8, w1, uxth +; FAST-NEXT: tst w8, #0x10000 +; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: uaddo.selectboth.i16: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: and w8, w1, #0xffff +; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: add w8, w8, w0, uxth +; GISEL-NEXT: cmp w8, w8, uxth +; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: ret +entry: + %m = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %a, i16 %b) + %m1 = extractvalue { i16, i1 } %m, 0 + %m2 = extractvalue { i16, i1 } %m, 1 + %r = select i1 %m2, i16 %m1, i16 10 + ret i16 %r +} + +define i16 @saddo.selectboth.i16(i16 %a, i16 %b) { +; SDAG-LABEL: saddo.selectboth.i16: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: sxth w8, w0 +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: add w8, w8, w1, sxth +; SDAG-NEXT: cmp w8, w8, sxth +; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: saddo.selectboth.i16: +; FAST: // %bb.0: // %entry +; FAST-NEXT: sxth w8, w0 +; FAST-NEXT: mov w9, #10 +; FAST-NEXT: add w8, w8, w1, sxth +; FAST-NEXT: cmp w8, w8, sxth +; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: saddo.selectboth.i16: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: sxth w8, w1 +; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: add w8, w8, w0, sxth +; GISEL-NEXT: cmp w8, w8, sxth +; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: ret +entry: + %m = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 %a, i16 %b) + %m1 = extractvalue { i16, i1 } %m, 0 + %m2 = extractvalue { i16, i1 } %m, 1 + %r = select i1 %m2, i16 %m1, i16 10 + ret i16 %r +} + +define i32 @uaddo.selectboth.i32(i32 %a, i32 %b) { +; SDAG-LABEL: uaddo.selectboth.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: adds w8, w0, w1 +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: csel w0, w8, w9, hs +; SDAG-NEXT: ret +; +; FAST-LABEL: uaddo.selectboth.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: adds w8, w0, w1 +; FAST-NEXT: mov w9, #10 +; FAST-NEXT: csel w0, w8, w9, hs +; FAST-NEXT: ret +; +; GISEL-LABEL: uaddo.selectboth.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: adds w8, w0, w1 +; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: cset w9, hs +; GISEL-NEXT: tst w9, #0x1 +; GISEL-NEXT: csel w0, w8, w10, ne +; GISEL-NEXT: ret +entry: + %m = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %m1 = extractvalue { i32, i1 } %m, 0 + %m2 = extractvalue { i32, i1 } %m, 1 + %r = select i1 %m2, i32 %m1, i32 10 + ret i32 %r +} + +define i32 @saddo.selectboth.i32(i32 %a, i32 %b) { +; SDAG-LABEL: saddo.selectboth.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: adds w8, w0, w1 +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: csel w0, w8, w9, vs +; SDAG-NEXT: ret +; +; FAST-LABEL: saddo.selectboth.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: adds w8, w0, w1 +; FAST-NEXT: mov w9, #10 +; FAST-NEXT: csel w0, w8, w9, vs +; FAST-NEXT: ret +; +; GISEL-LABEL: saddo.selectboth.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: adds w8, w0, w1 +; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: cset w9, vs +; GISEL-NEXT: tst w9, #0x1 +; GISEL-NEXT: csel w0, w8, w10, ne +; GISEL-NEXT: ret +entry: + %m = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %m1 = extractvalue { i32, i1 } %m, 0 + %m2 = extractvalue { i32, i1 } %m, 1 + %r = select i1 %m2, i32 %m1, i32 10 + ret i32 %r +} + +define i64 @uaddo.selectboth.i64(i64 %a, i64 %b) { +; SDAG-LABEL: uaddo.selectboth.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: adds x8, x0, x1 +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: csel x0, x8, x9, hs +; SDAG-NEXT: ret +; +; FAST-LABEL: uaddo.selectboth.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: adds x8, x0, x1 +; FAST-NEXT: mov x9, #10 +; FAST-NEXT: csel x0, x8, x9, hs +; FAST-NEXT: ret +; +; GISEL-LABEL: uaddo.selectboth.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: adds x8, x0, x1 +; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: cset w9, hs +; GISEL-NEXT: tst w9, #0x1 +; GISEL-NEXT: csel x0, x8, x10, ne +; GISEL-NEXT: ret +entry: + %m = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %m1 = extractvalue { i64, i1 } %m, 0 + %m2 = extractvalue { i64, i1 } %m, 1 + %r = select i1 %m2, i64 %m1, i64 10 + ret i64 %r +} + +define i64 @saddo.selectboth.i64(i64 %a, i64 %b) { +; SDAG-LABEL: saddo.selectboth.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: adds x8, x0, x1 +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: csel x0, x8, x9, vs +; SDAG-NEXT: ret +; +; FAST-LABEL: saddo.selectboth.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: adds x8, x0, x1 +; FAST-NEXT: mov x9, #10 +; FAST-NEXT: csel x0, x8, x9, vs +; FAST-NEXT: ret +; +; GISEL-LABEL: saddo.selectboth.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: adds x8, x0, x1 +; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: cset w9, vs +; GISEL-NEXT: tst w9, #0x1 +; GISEL-NEXT: csel x0, x8, x10, ne +; GISEL-NEXT: ret +entry: + %m = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %m1 = extractvalue { i64, i1 } %m, 0 + %m2 = extractvalue { i64, i1 } %m, 1 + %r = select i1 %m2, i64 %m1, i64 10 + ret i64 %r +} + +define i8 @usubo.selectboth.i8(i8 %a, i8 %b) { +; SDAG-LABEL: usubo.selectboth.i8: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: and w8, w0, #0xff +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: sub w8, w8, w1, uxtb +; SDAG-NEXT: tst w8, #0xffffff00 +; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: usubo.selectboth.i8: +; FAST: // %bb.0: // %entry +; FAST-NEXT: and w8, w0, #0xff +; FAST-NEXT: mov w9, #10 +; FAST-NEXT: sub w8, w8, w1, uxtb +; FAST-NEXT: tst w8, #0xffffff00 +; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: usubo.selectboth.i8: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: and w8, w0, #0xff +; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: sub w8, w8, w1, uxtb +; GISEL-NEXT: cmp w8, w8, uxtb +; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: ret +entry: + %m = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %a, i8 %b) + %m1 = extractvalue { i8, i1 } %m, 0 + %m2 = extractvalue { i8, i1 } %m, 1 + %r = select i1 %m2, i8 %m1, i8 10 + ret i8 %r +} + +define i8 @ssubo.selectboth.i8(i8 %a, i8 %b) { +; CHECK-LABEL: ssubo.selectboth.i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: mov w9, #10 +; CHECK-NEXT: sub w8, w8, w1, sxtb +; CHECK-NEXT: cmp w8, w8, sxtb +; CHECK-NEXT: csel w0, w8, w9, ne +; CHECK-NEXT: ret +entry: + %m = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %a, i8 %b) + %m1 = extractvalue { i8, i1 } %m, 0 + %m2 = extractvalue { i8, i1 } %m, 1 + %r = select i1 %m2, i8 %m1, i8 10 + ret i8 %r +} + +define i16 @usubo.selectboth.i16(i16 %a, i16 %b) { +; SDAG-LABEL: usubo.selectboth.i16: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: and w8, w0, #0xffff +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: sub w8, w8, w1, uxth +; SDAG-NEXT: tst w8, #0xffff0000 +; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: usubo.selectboth.i16: +; FAST: // %bb.0: // %entry +; FAST-NEXT: and w8, w0, #0xffff +; FAST-NEXT: mov w9, #10 +; FAST-NEXT: sub w8, w8, w1, uxth +; FAST-NEXT: tst w8, #0xffff0000 +; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: usubo.selectboth.i16: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: and w8, w0, #0xffff +; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: sub w8, w8, w1, uxth +; GISEL-NEXT: cmp w8, w8, uxth +; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: ret +entry: + %m = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 %a, i16 %b) + %m1 = extractvalue { i16, i1 } %m, 0 + %m2 = extractvalue { i16, i1 } %m, 1 + %r = select i1 %m2, i16 %m1, i16 10 + ret i16 %r +} + +define i16 @ssubo.selectboth.i16(i16 %a, i16 %b) { +; CHECK-LABEL: ssubo.selectboth.i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: mov w9, #10 +; CHECK-NEXT: sub w8, w8, w1, sxth +; CHECK-NEXT: cmp w8, w8, sxth +; CHECK-NEXT: csel w0, w8, w9, ne +; CHECK-NEXT: ret +entry: + %m = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 %a, i16 %b) + %m1 = extractvalue { i16, i1 } %m, 0 + %m2 = extractvalue { i16, i1 } %m, 1 + %r = select i1 %m2, i16 %m1, i16 10 + ret i16 %r +} + +define i32 @usubo.selectboth.i32(i32 %a, i32 %b) { +; SDAG-LABEL: usubo.selectboth.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: subs w8, w0, w1 +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: csel w0, w8, w9, lo +; SDAG-NEXT: ret +; +; FAST-LABEL: usubo.selectboth.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: subs w8, w0, w1 +; FAST-NEXT: mov w9, #10 +; FAST-NEXT: csel w0, w8, w9, lo +; FAST-NEXT: ret +; +; GISEL-LABEL: usubo.selectboth.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: subs w8, w0, w1 +; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: cset w9, lo +; GISEL-NEXT: tst w9, #0x1 +; GISEL-NEXT: csel w0, w8, w10, ne +; GISEL-NEXT: ret +entry: + %m = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %m1 = extractvalue { i32, i1 } %m, 0 + %m2 = extractvalue { i32, i1 } %m, 1 + %r = select i1 %m2, i32 %m1, i32 10 + ret i32 %r +} + +define i32 @ssubo.selectboth.i32(i32 %a, i32 %b) { +; SDAG-LABEL: ssubo.selectboth.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: subs w8, w0, w1 +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: csel w0, w8, w9, vs +; SDAG-NEXT: ret +; +; FAST-LABEL: ssubo.selectboth.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: subs w8, w0, w1 +; FAST-NEXT: mov w9, #10 +; FAST-NEXT: csel w0, w8, w9, vs +; FAST-NEXT: ret +; +; GISEL-LABEL: ssubo.selectboth.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: subs w8, w0, w1 +; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: cset w9, vs +; GISEL-NEXT: tst w9, #0x1 +; GISEL-NEXT: csel w0, w8, w10, ne +; GISEL-NEXT: ret +entry: + %m = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %m1 = extractvalue { i32, i1 } %m, 0 + %m2 = extractvalue { i32, i1 } %m, 1 + %r = select i1 %m2, i32 %m1, i32 10 + ret i32 %r +} + +define i64 @usubo.selectboth.i64(i64 %a, i64 %b) { +; SDAG-LABEL: usubo.selectboth.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: subs x8, x0, x1 +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: csel x0, x8, x9, lo +; SDAG-NEXT: ret +; +; FAST-LABEL: usubo.selectboth.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: subs x8, x0, x1 +; FAST-NEXT: mov x9, #10 +; FAST-NEXT: csel x0, x8, x9, lo +; FAST-NEXT: ret +; +; GISEL-LABEL: usubo.selectboth.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: subs x8, x0, x1 +; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: cset w9, lo +; GISEL-NEXT: tst w9, #0x1 +; GISEL-NEXT: csel x0, x8, x10, ne +; GISEL-NEXT: ret +entry: + %m = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %m1 = extractvalue { i64, i1 } %m, 0 + %m2 = extractvalue { i64, i1 } %m, 1 + %r = select i1 %m2, i64 %m1, i64 10 + ret i64 %r +} + +define i64 @ssubo.selectboth.i64(i64 %a, i64 %b) { +; SDAG-LABEL: ssubo.selectboth.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: subs x8, x0, x1 +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: csel x0, x8, x9, vs +; SDAG-NEXT: ret +; +; FAST-LABEL: ssubo.selectboth.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: subs x8, x0, x1 +; FAST-NEXT: mov x9, #10 +; FAST-NEXT: csel x0, x8, x9, vs +; FAST-NEXT: ret +; +; GISEL-LABEL: ssubo.selectboth.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: subs x8, x0, x1 +; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: cset w9, vs +; GISEL-NEXT: tst w9, #0x1 +; GISEL-NEXT: csel x0, x8, x10, ne +; GISEL-NEXT: ret +entry: + %m = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %m1 = extractvalue { i64, i1 } %m, 0 + %m2 = extractvalue { i64, i1 } %m, 1 + %r = select i1 %m2, i64 %m1, i64 10 + ret i64 %r +} + + +define i8 @umulo.selectboth.i8(i8 %a, i8 %b) { +; SDAG-LABEL: umulo.selectboth.i8: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: and w8, w1, #0xff +; SDAG-NEXT: and w9, w0, #0xff +; SDAG-NEXT: mul w8, w9, w8 +; SDAG-NEXT: lsr w9, w8, #8 +; SDAG-NEXT: cmp w9, #0 +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: umulo.selectboth.i8: +; FAST: // %bb.0: // %entry +; FAST-NEXT: and w8, w1, #0xff +; FAST-NEXT: and w9, w0, #0xff +; FAST-NEXT: mul w8, w9, w8 +; FAST-NEXT: lsr w9, w8, #8 +; FAST-NEXT: cmp w9, #0 +; FAST-NEXT: mov w9, #10 +; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: umulo.selectboth.i8: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: and w8, w0, #0xff +; GISEL-NEXT: and w9, w1, #0xff +; GISEL-NEXT: mul w8, w8, w9 +; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: cmp w8, w8, uxtb +; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: ret +entry: + %m = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 %b) + %m1 = extractvalue { i8, i1 } %m, 0 + %m2 = extractvalue { i8, i1 } %m, 1 + %r = select i1 %m2, i8 %m1, i8 10 + ret i8 %r +} + +define i8 @smulo.selectboth.i8(i8 %a, i8 %b) { +; SDAG-LABEL: smulo.selectboth.i8: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: sxtb w8, w1 +; SDAG-NEXT: sxtb w9, w0 +; SDAG-NEXT: mul w8, w9, w8 +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: cmp w8, w8, sxtb +; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: smulo.selectboth.i8: +; FAST: // %bb.0: // %entry +; FAST-NEXT: sxtb w8, w1 +; FAST-NEXT: sxtb w9, w0 +; FAST-NEXT: mul w8, w9, w8 +; FAST-NEXT: mov w9, #10 +; FAST-NEXT: cmp w8, w8, sxtb +; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: smulo.selectboth.i8: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: sxtb w8, w0 +; GISEL-NEXT: sxtb w9, w1 +; GISEL-NEXT: mul w8, w8, w9 +; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: cmp w8, w8, sxtb +; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: ret +entry: + %m = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 %b) + %m1 = extractvalue { i8, i1 } %m, 0 + %m2 = extractvalue { i8, i1 } %m, 1 + %r = select i1 %m2, i8 %m1, i8 10 + ret i8 %r +} + +define i16 @umulo.selectboth.i16(i16 %a, i16 %b) { +; SDAG-LABEL: umulo.selectboth.i16: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: and w8, w1, #0xffff +; SDAG-NEXT: and w9, w0, #0xffff +; SDAG-NEXT: mul w8, w9, w8 +; SDAG-NEXT: lsr w9, w8, #16 +; SDAG-NEXT: cmp w9, #0 +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: umulo.selectboth.i16: +; FAST: // %bb.0: // %entry +; FAST-NEXT: and w8, w1, #0xffff +; FAST-NEXT: and w9, w0, #0xffff +; FAST-NEXT: mul w8, w9, w8 +; FAST-NEXT: lsr w9, w8, #16 +; FAST-NEXT: cmp w9, #0 +; FAST-NEXT: mov w9, #10 +; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: umulo.selectboth.i16: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: and w8, w0, #0xffff +; GISEL-NEXT: and w9, w1, #0xffff +; GISEL-NEXT: mul w8, w8, w9 +; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: cmp w8, w8, uxth +; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: ret +entry: + %m = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 %a, i16 %b) + %m1 = extractvalue { i16, i1 } %m, 0 + %m2 = extractvalue { i16, i1 } %m, 1 + %r = select i1 %m2, i16 %m1, i16 10 + ret i16 %r +} + +define i16 @smulo.selectboth.i16(i16 %a, i16 %b) { +; SDAG-LABEL: smulo.selectboth.i16: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: sxth w8, w1 +; SDAG-NEXT: sxth w9, w0 +; SDAG-NEXT: mul w8, w9, w8 +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: cmp w8, w8, sxth +; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: smulo.selectboth.i16: +; FAST: // %bb.0: // %entry +; FAST-NEXT: sxth w8, w1 +; FAST-NEXT: sxth w9, w0 +; FAST-NEXT: mul w8, w9, w8 +; FAST-NEXT: mov w9, #10 +; FAST-NEXT: cmp w8, w8, sxth +; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: smulo.selectboth.i16: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: sxth w8, w0 +; GISEL-NEXT: sxth w9, w1 +; GISEL-NEXT: mul w8, w8, w9 +; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: cmp w8, w8, sxth +; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: ret +entry: + %m = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 %a, i16 %b) + %m1 = extractvalue { i16, i1 } %m, 0 + %m2 = extractvalue { i16, i1 } %m, 1 + %r = select i1 %m2, i16 %m1, i16 10 + ret i16 %r +} + +define i32 @umulo.selectboth.i32(i32 %a, i32 %b) { +; SDAG-LABEL: umulo.selectboth.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: umull x9, w0, w1 +; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: tst x9, #0xffffffff00000000 +; SDAG-NEXT: csel w0, w9, w8, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: umulo.selectboth.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: umull x9, w0, w1 +; FAST-NEXT: mov w8, #10 +; FAST-NEXT: tst x9, #0xffffffff00000000 +; FAST-NEXT: csel w0, w9, w8, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: umulo.selectboth.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: umull x9, w0, w1 +; GISEL-NEXT: mov w8, #10 +; GISEL-NEXT: mul w10, w0, w1 +; GISEL-NEXT: lsr x9, x9, #32 +; GISEL-NEXT: cmp w9, #0 +; GISEL-NEXT: csel w0, w10, w8, ne +; GISEL-NEXT: ret +entry: + %m = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b) + %m1 = extractvalue { i32, i1 } %m, 0 + %m2 = extractvalue { i32, i1 } %m, 1 + %r = select i1 %m2, i32 %m1, i32 10 + ret i32 %r +} + +define i32 @smulo.selectboth.i32(i32 %a, i32 %b) { +; SDAG-LABEL: smulo.selectboth.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: smull x9, w0, w1 +; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: cmp x9, w9, sxtw +; SDAG-NEXT: csel w0, w9, w8, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: smulo.selectboth.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: smull x9, w0, w1 +; FAST-NEXT: mov w8, #10 +; FAST-NEXT: cmp x9, w9, sxtw +; FAST-NEXT: csel w0, w9, w8, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: smulo.selectboth.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: smull x9, w0, w1 +; GISEL-NEXT: mov w8, #10 +; GISEL-NEXT: mul w10, w0, w1 +; GISEL-NEXT: asr x9, x9, #32 +; GISEL-NEXT: cmp w9, w10, asr #31 +; GISEL-NEXT: csel w0, w10, w8, ne +; GISEL-NEXT: ret +entry: + %m = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %a, i32 %b) + %m1 = extractvalue { i32, i1 } %m, 0 + %m2 = extractvalue { i32, i1 } %m, 1 + %r = select i1 %m2, i32 %m1, i32 10 + ret i32 %r +} + +define i64 @umulo.selectboth.i64(i64 %a, i64 %b) { +; SDAG-LABEL: umulo.selectboth.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: umulh x9, x0, x1 +; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: mul x10, x0, x1 +; SDAG-NEXT: cmp xzr, x9 +; SDAG-NEXT: csel x0, x10, x8, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: umulo.selectboth.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: umulh x9, x0, x1 +; FAST-NEXT: mov x8, #10 +; FAST-NEXT: mul x10, x0, x1 +; FAST-NEXT: cmp xzr, x9 +; FAST-NEXT: csel x0, x10, x8, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: umulo.selectboth.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: umulh x9, x0, x1 +; GISEL-NEXT: mov w8, #10 +; GISEL-NEXT: mul x10, x0, x1 +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: csel x0, x10, x8, ne +; GISEL-NEXT: ret +entry: + %m = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %a, i64 %b) + %m1 = extractvalue { i64, i1 } %m, 0 + %m2 = extractvalue { i64, i1 } %m, 1 + %r = select i1 %m2, i64 %m1, i64 10 + ret i64 %r +} + +define i64 @smulo.selectboth.i64(i64 %a, i64 %b) { +; SDAG-LABEL: smulo.selectboth.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mul x9, x0, x1 +; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: smulh x10, x0, x1 +; SDAG-NEXT: cmp x10, x9, asr #63 +; SDAG-NEXT: csel x0, x9, x8, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: smulo.selectboth.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: mul x9, x0, x1 +; FAST-NEXT: mov x8, #10 +; FAST-NEXT: smulh x10, x0, x1 +; FAST-NEXT: cmp x10, x9, asr #63 +; FAST-NEXT: csel x0, x9, x8, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: smulo.selectboth.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: mul x9, x0, x1 +; GISEL-NEXT: mov w8, #10 +; GISEL-NEXT: smulh x10, x0, x1 +; GISEL-NEXT: cmp x10, x9, asr #63 +; GISEL-NEXT: csel x0, x9, x8, ne +; GISEL-NEXT: ret +entry: + %m = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %a, i64 %b) + %m1 = extractvalue { i64, i1 } %m, 0 + %m2 = extractvalue { i64, i1 } %m, 1 + %r = select i1 %m2, i64 %m1, i64 10 + ret i64 %r +} + + ; ; Check the use of the overflow bit in combination with a branch instruction. ; define zeroext i1 @saddo.br.i32(i32 %v1, i32 %v2) { +; SDAG-LABEL: saddo.br.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmn w0, w1 +; SDAG-NEXT: cset w0, vc +; SDAG-NEXT: ret +; +; FAST-LABEL: saddo.br.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmn w0, w1 +; FAST-NEXT: mov w9, #1 +; FAST-NEXT: cset w8, vs +; FAST-NEXT: bic w8, w9, w8 +; FAST-NEXT: and w0, w8, #0x1 +; FAST-NEXT: ret +; +; GISEL-LABEL: saddo.br.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmn w0, w1 +; GISEL-NEXT: cset w8, vs +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: ret entry: -; CHECK-LABEL: saddo.br.i32 -; CHECK: cmn w0, w1 -; CHECK-NEXT: b.vc %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 %obit = extractvalue {i32, i1} %t, 1 @@ -575,10 +2176,29 @@ continue: } define zeroext i1 @saddo.br.i64(i64 %v1, i64 %v2) { +; SDAG-LABEL: saddo.br.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmn x0, x1 +; SDAG-NEXT: cset w0, vc +; SDAG-NEXT: ret +; +; FAST-LABEL: saddo.br.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmn x0, x1 +; FAST-NEXT: mov w9, #1 +; FAST-NEXT: cset w8, vs +; FAST-NEXT: bic w8, w9, w8 +; FAST-NEXT: and w0, w8, #0x1 +; FAST-NEXT: ret +; +; GISEL-LABEL: saddo.br.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmn x0, x1 +; GISEL-NEXT: cset w8, vs +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: ret entry: -; CHECK-LABEL: saddo.br.i64 -; CHECK: cmn x0, x1 -; CHECK-NEXT: b.vc %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -592,10 +2212,29 @@ continue: } define zeroext i1 @uaddo.br.i32(i32 %v1, i32 %v2) { +; SDAG-LABEL: uaddo.br.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmn w0, w1 +; SDAG-NEXT: cset w0, lo +; SDAG-NEXT: ret +; +; FAST-LABEL: uaddo.br.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmn w0, w1 +; FAST-NEXT: mov w9, #1 +; FAST-NEXT: cset w8, hs +; FAST-NEXT: bic w8, w9, w8 +; FAST-NEXT: and w0, w8, #0x1 +; FAST-NEXT: ret +; +; GISEL-LABEL: uaddo.br.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmn w0, w1 +; GISEL-NEXT: cset w8, hs +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: ret entry: -; CHECK-LABEL: uaddo.br.i32 -; CHECK: cmn w0, w1 -; CHECK-NEXT: b.lo %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 %obit = extractvalue {i32, i1} %t, 1 @@ -609,10 +2248,29 @@ continue: } define zeroext i1 @uaddo.br.i64(i64 %v1, i64 %v2) { +; SDAG-LABEL: uaddo.br.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmn x0, x1 +; SDAG-NEXT: cset w0, lo +; SDAG-NEXT: ret +; +; FAST-LABEL: uaddo.br.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmn x0, x1 +; FAST-NEXT: mov w9, #1 +; FAST-NEXT: cset w8, hs +; FAST-NEXT: bic w8, w9, w8 +; FAST-NEXT: and w0, w8, #0x1 +; FAST-NEXT: ret +; +; GISEL-LABEL: uaddo.br.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmn x0, x1 +; GISEL-NEXT: cset w8, hs +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: ret entry: -; CHECK-LABEL: uaddo.br.i64 -; CHECK: cmn x0, x1 -; CHECK-NEXT: b.lo %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -626,10 +2284,29 @@ continue: } define zeroext i1 @ssubo.br.i32(i32 %v1, i32 %v2) { +; SDAG-LABEL: ssubo.br.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmp w0, w1 +; SDAG-NEXT: cset w0, vc +; SDAG-NEXT: ret +; +; FAST-LABEL: ssubo.br.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmp w0, w1 +; FAST-NEXT: mov w9, #1 +; FAST-NEXT: cset w8, vs +; FAST-NEXT: bic w8, w9, w8 +; FAST-NEXT: and w0, w8, #0x1 +; FAST-NEXT: ret +; +; GISEL-LABEL: ssubo.br.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmp w0, w1 +; GISEL-NEXT: cset w8, vs +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: ret entry: -; CHECK-LABEL: ssubo.br.i32 -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.vc %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 %obit = extractvalue {i32, i1} %t, 1 @@ -643,10 +2320,29 @@ continue: } define zeroext i1 @ssubo.br.i64(i64 %v1, i64 %v2) { +; SDAG-LABEL: ssubo.br.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmp x0, x1 +; SDAG-NEXT: cset w0, vc +; SDAG-NEXT: ret +; +; FAST-LABEL: ssubo.br.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmp x0, x1 +; FAST-NEXT: mov w9, #1 +; FAST-NEXT: cset w8, vs +; FAST-NEXT: bic w8, w9, w8 +; FAST-NEXT: and w0, w8, #0x1 +; FAST-NEXT: ret +; +; GISEL-LABEL: ssubo.br.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmp x0, x1 +; GISEL-NEXT: cset w8, vs +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: ret entry: -; CHECK-LABEL: ssubo.br.i64 -; CHECK: cmp x0, x1 -; CHECK-NEXT: b.vc %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -660,10 +2356,29 @@ continue: } define zeroext i1 @usubo.br.i32(i32 %v1, i32 %v2) { +; SDAG-LABEL: usubo.br.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmp w0, w1 +; SDAG-NEXT: cset w0, hs +; SDAG-NEXT: ret +; +; FAST-LABEL: usubo.br.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmp w0, w1 +; FAST-NEXT: mov w9, #1 +; FAST-NEXT: cset w8, lo +; FAST-NEXT: bic w8, w9, w8 +; FAST-NEXT: and w0, w8, #0x1 +; FAST-NEXT: ret +; +; GISEL-LABEL: usubo.br.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmp w0, w1 +; GISEL-NEXT: cset w8, lo +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: ret entry: -; CHECK-LABEL: usubo.br.i32 -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.hs %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 %obit = extractvalue {i32, i1} %t, 1 @@ -677,10 +2392,29 @@ continue: } define zeroext i1 @usubo.br.i64(i64 %v1, i64 %v2) { +; SDAG-LABEL: usubo.br.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmp x0, x1 +; SDAG-NEXT: cset w0, hs +; SDAG-NEXT: ret +; +; FAST-LABEL: usubo.br.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmp x0, x1 +; FAST-NEXT: mov w9, #1 +; FAST-NEXT: cset w8, lo +; FAST-NEXT: bic w8, w9, w8 +; FAST-NEXT: and w0, w8, #0x1 +; FAST-NEXT: ret +; +; GISEL-LABEL: usubo.br.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmp x0, x1 +; GISEL-NEXT: cset w8, lo +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: ret entry: -; CHECK-LABEL: usubo.br.i64 -; CHECK: cmp x0, x1 -; CHECK-NEXT: b.hs %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -694,11 +2428,32 @@ continue: } define zeroext i1 @smulo.br.i32(i32 %v1, i32 %v2) { +; SDAG-LABEL: smulo.br.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: smull x8, w0, w1 +; SDAG-NEXT: cmp x8, w8, sxtw +; SDAG-NEXT: cset w0, eq +; SDAG-NEXT: ret +; +; FAST-LABEL: smulo.br.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: smull x9, w0, w1 +; FAST-NEXT: mov w8, #1 +; FAST-NEXT: cmp x9, w9, sxtw +; FAST-NEXT: cset w9, ne +; FAST-NEXT: bic w8, w8, w9 +; FAST-NEXT: and w0, w8, #0x1 +; FAST-NEXT: ret +; +; GISEL-LABEL: smulo.br.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: smull x8, w0, w1 +; GISEL-NEXT: mul w9, w0, w1 +; GISEL-NEXT: asr x8, x8, #32 +; GISEL-NEXT: cmp w8, w9, asr #31 +; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: ret entry: -; CHECK-LABEL: smulo.br.i32 -; CHECK: smull x[[MREG:[0-9]+]], w0, w1 -; CHECK-NEXT: cmp x[[MREG]], w[[MREG]], sxtw -; CHECK-NEXT: b.eq %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 %obit = extractvalue {i32, i1} %t, 1 @@ -712,12 +2467,33 @@ continue: } define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) { +; SDAG-LABEL: smulo.br.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mul x8, x0, x1 +; SDAG-NEXT: smulh x9, x0, x1 +; SDAG-NEXT: cmp x9, x8, asr #63 +; SDAG-NEXT: cset w0, eq +; SDAG-NEXT: ret +; +; FAST-LABEL: smulo.br.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: mul x9, x0, x1 +; FAST-NEXT: mov w8, #1 +; FAST-NEXT: smulh x10, x0, x1 +; FAST-NEXT: cmp x10, x9, asr #63 +; FAST-NEXT: cset w9, ne +; FAST-NEXT: bic w8, w8, w9 +; FAST-NEXT: and w0, w8, #0x1 +; FAST-NEXT: ret +; +; GISEL-LABEL: smulo.br.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: mul x8, x0, x1 +; GISEL-NEXT: smulh x9, x0, x1 +; GISEL-NEXT: cmp x9, x8, asr #63 +; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: ret entry: -; CHECK-LABEL: smulo.br.i64 -; CHECK: mul [[MREG:x[0-9]+]], x0, x1 -; CHECK-NEXT: smulh [[HREG:x[0-9]+]], x0, x1 -; CHECK-NEXT: cmp [[HREG]], [[MREG]], asr #63 -; CHECK-NEXT: b.eq %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -731,10 +2507,29 @@ continue: } define zeroext i1 @smulo2.br.i64(i64 %v1) { +; SDAG-LABEL: smulo2.br.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmn x0, x0 +; SDAG-NEXT: cset w0, vc +; SDAG-NEXT: ret +; +; FAST-LABEL: smulo2.br.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmn x0, x0 +; FAST-NEXT: mov w8, #1 +; FAST-NEXT: cset w9, vs +; FAST-NEXT: bic w8, w8, w9 +; FAST-NEXT: and w0, w8, #0x1 +; FAST-NEXT: ret +; +; GISEL-LABEL: smulo2.br.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmn x0, x0 +; GISEL-NEXT: cset w8, vs +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: ret entry: -; CHECK-LABEL: smulo2.br.i64 -; CHECK: cmn x0, x0 -; CHECK-NEXT: b.vc %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 2) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -748,11 +2543,31 @@ continue: } define zeroext i1 @umulo.br.i32(i32 %v1, i32 %v2) { +; SDAG-LABEL: umulo.br.i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: umull x8, w0, w1 +; SDAG-NEXT: tst x8, #0xffffffff00000000 +; SDAG-NEXT: cset w0, eq +; SDAG-NEXT: ret +; +; FAST-LABEL: umulo.br.i32: +; FAST: // %bb.0: // %entry +; FAST-NEXT: umull x9, w0, w1 +; FAST-NEXT: mov w8, #1 +; FAST-NEXT: tst x9, #0xffffffff00000000 +; FAST-NEXT: cset w9, ne +; FAST-NEXT: bic w8, w8, w9 +; FAST-NEXT: and w0, w8, #0x1 +; FAST-NEXT: ret +; +; GISEL-LABEL: umulo.br.i32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: umull x8, w0, w1 +; GISEL-NEXT: lsr x8, x8, #32 +; GISEL-NEXT: cmp w8, #0 +; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: ret entry: -; CHECK-LABEL: umulo.br.i32 -; CHECK: umull [[MREG:x[0-9]+]], w0, w1 -; CHECK-NEXT: tst [[MREG]], #0xffffffff00000000 -; CHECK-NEXT: b.eq %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 %obit = extractvalue {i32, i1} %t, 1 @@ -766,10 +2581,30 @@ continue: } define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) { +; SDAG-LABEL: umulo.br.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: umulh x8, x0, x1 +; SDAG-NEXT: cmp xzr, x8 +; SDAG-NEXT: cset w0, eq +; SDAG-NEXT: ret +; +; FAST-LABEL: umulo.br.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: umulh x9, x0, x1 +; FAST-NEXT: mov w8, #1 +; FAST-NEXT: cmp xzr, x9 +; FAST-NEXT: cset w9, ne +; FAST-NEXT: bic w8, w8, w9 +; FAST-NEXT: and w0, w8, #0x1 +; FAST-NEXT: ret +; +; GISEL-LABEL: umulo.br.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: umulh x8, x0, x1 +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: ret entry: -; CHECK-LABEL: umulo.br.i64 -; CHECK: umulh [[REG:x[0-9]+]], x0, x1 -; CHECK-NEXT: {{cbz|cmp}} %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -783,10 +2618,29 @@ continue: } define zeroext i1 @umulo2.br.i64(i64 %v1) { +; SDAG-LABEL: umulo2.br.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: cmn x0, x0 +; SDAG-NEXT: cset w0, lo +; SDAG-NEXT: ret +; +; FAST-LABEL: umulo2.br.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: cmn x0, x0 +; FAST-NEXT: mov w8, #1 +; FAST-NEXT: cset w9, hs +; FAST-NEXT: bic w8, w8, w9 +; FAST-NEXT: and w0, w8, #0x1 +; FAST-NEXT: ret +; +; GISEL-LABEL: umulo2.br.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: cmn x0, x0 +; GISEL-NEXT: cset w8, hs +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: cset w0, eq +; GISEL-NEXT: ret entry: -; CHECK-LABEL: umulo2.br.i64 -; CHECK: cmn x0, x0 -; CHECK-NEXT: b.lo %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 2) %val = extractvalue {i64, i1} %t, 0 %obit = extractvalue {i64, i1} %t, 1 @@ -799,16 +2653,28 @@ continue: ret i1 true } +declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8) nounwind readnone +declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16) nounwind readnone declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone +declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8) nounwind readnone +declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16) nounwind readnone declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone +declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8) nounwind readnone +declare {i16, i1} @llvm.ssub.with.overflow.i16(i16, i16) nounwind readnone declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone +declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8) nounwind readnone +declare {i16, i1} @llvm.usub.with.overflow.i16(i16, i16) nounwind readnone declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone +declare {i8, i1} @llvm.smul.with.overflow.i8(i8, i8) nounwind readnone +declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone +declare {i8, i1} @llvm.umul.with.overflow.i8(i8, i8) nounwind readnone +declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone From d4d9de362b6ac2aac67e557f819c57dcfe79e2fe Mon Sep 17 00:00:00 2001 From: Andrew Ng Date: Tue, 14 Dec 2021 14:34:44 +0000 Subject: [PATCH 790/992] [CMake] Support passing arguments to build tool for external projects Add CMake variable LLVM_EXTERNAL_PROJECT_BUILD_TOOL_ARGS to allow arguments to be passed to the native tool used in CMake --build invocations for external projects. Can be used to pass extra arguments for enhanced versions of build tools, e.g. distributed build options. Differential Revision: https://reviews.llvm.org/D115815 --- llvm/CMakeLists.txt | 4 ++++ llvm/cmake/modules/LLVMExternalProjectUtils.cmake | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index edc2c8cded9c..9548dfff0e2a 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -176,6 +176,10 @@ if(LLVM_CCACHE_BUILD) endif() endif() +set(LLVM_EXTERNAL_PROJECT_BUILD_TOOL_ARGS "" CACHE STRING + "Optional arguments for the native tool used in CMake --build invocations for external projects.") +mark_as_advanced(LLVM_EXTERNAL_PROJECT_BUILD_TOOL_ARGS) + option(LLVM_DEPENDENCY_DEBUGGING "Dependency debugging mode to verify correctly expressed library dependencies (Darwin only)" OFF) # Some features of the LLVM build may be disallowed when dependency debugging is diff --git a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake index 5f19098614db..7c417b41cd34 100644 --- a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake +++ b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake @@ -11,8 +11,14 @@ function(llvm_ExternalProject_BuildCmd out_var target bin_dir) # Use special command for Makefiles to support parallelism. set(${out_var} "$(MAKE)" "-C" "${bin_dir}" "${target}" PARENT_SCOPE) else() + set(tool_args "${LLVM_EXTERNAL_PROJECT_BUILD_TOOL_ARGS}") + if(NOT tool_args STREQUAL "") + string(CONFIGURE "${tool_args}" tool_args @ONLY) + string(PREPEND tool_args "-- ") + separate_arguments(tool_args UNIX_COMMAND "${tool_args}") + endif() set(${out_var} ${CMAKE_COMMAND} --build ${bin_dir} --target ${target} - --config ${ARG_CONFIGURATION} PARENT_SCOPE) + --config ${ARG_CONFIGURATION} ${tool_args} PARENT_SCOPE) endif() endfunction() From 86d113a8b8aec1092d51115a8ff3e7e6682d1931 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 6 Jan 2022 11:52:19 +0000 Subject: [PATCH 791/992] [SCEVExpand] Do not create redundant 'or false' for pred expansion. This patch updates SCEVExpander::expandUnionPredicate to not create redundant 'or false, x' instructions. While those are trivially foldable, they can be easily avoided and hinder code that checks the size/cost of the generated checks before further folds. I am planning on look into a few other similar improvements to code generated by SCEVExpander. I remember a while ago @lebedev.ri working on doing some trivial folds like that in IRBuilder itself, but there where concerns that such changes may subtly break existing code. Reviewed By: reames, lebedev.ri Differential Revision: https://reviews.llvm.org/D116696 --- .../Utils/ScalarEvolutionExpander.cpp | 11 +++-- .../scev-inserted-runtime-check.ll | 6 +-- .../ARM/mve-gather-scatter-tailpred.ll | 3 +- .../PowerPC/optimal-epilog-vectorization.ll | 6 +-- .../illegal-parallel-loop-uniform-write.ll | 3 +- .../Transforms/LoopVectorize/X86/pr35432.ll | 3 +- .../first-order-recurrence-complex.ll | 6 +-- .../Transforms/LoopVectorize/induction.ll | 42 ++++++++----------- .../optimal-epilog-vectorization.ll | 3 +- .../pr30654-phiscev-sext-trunc.ll | 9 ++-- llvm/test/Transforms/LoopVectorize/pr45259.ll | 3 +- .../runtime-check-needed-but-empty.ll | 3 +- .../runtime-check-small-clamped-bounds.ll | 9 ++-- .../wrapping-pointer-versioning.ll | 15 +++---- 14 files changed, 45 insertions(+), 77 deletions(-) diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index c840ee85795f..1f12ece7cc12 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -2578,17 +2578,16 @@ Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred, Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union, Instruction *IP) { - auto *BoolType = IntegerType::get(IP->getContext(), 1); - Value *Check = ConstantInt::getNullValue(BoolType); - // Loop over all checks in this set. + SmallVector Checks; for (auto Pred : Union->getPredicates()) { - auto *NextCheck = expandCodeForPredicate(Pred, IP); + Checks.push_back(expandCodeForPredicate(Pred, IP)); Builder.SetInsertPoint(IP); - Check = Builder.CreateOr(Check, NextCheck); } - return Check; + if (Checks.empty()) + return ConstantInt::getFalse(IP->getContext()); + return Builder.CreateOr(Checks); } Value *SCEVExpander::fixupLCSSAFormFor(Instruction *User, unsigned OpIdx) { diff --git a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll index 5dd6c28a34c8..2fc49f25789c 100644 --- a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll +++ b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll @@ -25,7 +25,6 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias % ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] ; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]]) ; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 @@ -36,7 +35,7 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias % ; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]] ; CHECK-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]] -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]] ; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] ; CHECK: for.body.ph.lver.orig: ; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] @@ -174,7 +173,6 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3 ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] ; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]]) ; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 @@ -185,7 +183,7 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3 ; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*) ; CHECK-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]] -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]] ; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] ; CHECK: for.body.ph.lver.orig: ; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index 82e36d9af912..d610ef3aa124 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -348,8 +348,7 @@ define void @test_stride_loopinvar_4i32(i32* readonly %data, i32* noalias nocapt ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: ; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STRIDE:%.*]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = or i1 false, [[IDENT_CHECK]] -; CHECK-NEXT: br i1 [[TMP0]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N:%.*]], 3 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll index d734763802f5..7c3937408052 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll @@ -549,8 +549,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-TWO-CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 ; VF-TWO-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; VF-TWO-CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] -; VF-TWO-CHECK-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]] -; VF-TWO-CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; VF-TWO-CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; VF-TWO-CHECK: vector.main.loop.iter.check: ; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32 ; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -780,8 +779,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; VF-FOUR-CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 ; VF-FOUR-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; VF-FOUR-CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] -; VF-FOUR-CHECK-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]] -; VF-FOUR-CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; VF-FOUR-CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; VF-FOUR-CHECK: vector.main.loop.iter.check: ; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32 ; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll index c5cfc54c1529..21538398456a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -67,8 +67,7 @@ define void @foo(i32* nocapture %a, i32* nocapture %b, i32 %k, i32 %m) #0 { ; CHECK-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp slt i32 [[TMP10]], [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 false, i1 [[TMP12]], i1 [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = or i1 false, [[TMP14]] -; CHECK-NEXT: br i1 [[TMP15]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll index c3c613bec944..f53e8e6e1d5e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -60,8 +60,7 @@ define i32 @main() local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[TMP9]], 255 ; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP19:%.*]] = or i1 false, [[TMP18]] -; CHECK-NEXT: br i1 [[TMP19]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP6]], 8 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP6]], [[N_MOD_VF]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index 92168421e382..e2ea0afc9d18 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -650,8 +650,7 @@ define void @sink_dominance(i32* %ptr, i32 %N) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP1]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = select i1 false, i1 [[TMP3]], i1 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = or i1 false, [[TMP5]] -; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[TMP5]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]] @@ -739,8 +738,7 @@ define void @sink_dominance_2(i32* %ptr, i32 %N) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP1]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = select i1 false, i1 [[TMP3]], i1 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = or i1 false, [[TMP5]] -; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[TMP5]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 701303d813ad..68b9ce0f7353 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -3563,7 +3563,6 @@ define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) { ; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255 ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8 ; CHECK-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]] @@ -3572,7 +3571,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) { ; CHECK-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255 ; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]] ; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 @@ -3791,7 +3790,6 @@ define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) { ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]] ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]] @@ -3800,7 +3798,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) { ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]] ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]] +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4 @@ -3992,7 +3990,6 @@ define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) { ; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255 ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8 ; CHECK-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]] @@ -4001,7 +3998,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) { ; CHECK-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255 ; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]] ; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 @@ -4230,7 +4227,6 @@ define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) { ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]] ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]] @@ -4239,7 +4235,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) { ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]] ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]] +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4 @@ -4644,8 +4640,7 @@ define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) { ; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]] -; CHECK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]] @@ -4688,9 +4683,9 @@ define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) { ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 2 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; IND: vector.scevcheck: -; IND-NEXT: [[TMP0:%.*]] = add i64 [[K]], -2147483649 -; IND-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], -2147483648 -; IND-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; IND-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1 +; IND-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 2147483648 +; IND-NEXT: br i1 [[TMP1]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]] ; IND: vector.ph: ; IND-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -2 ; IND-NEXT: br label [[VECTOR_BODY:%.*]] @@ -4730,9 +4725,9 @@ define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) { ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 4 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; UNROLL: vector.scevcheck: -; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[K]], -2147483649 -; UNROLL-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], -2147483648 -; UNROLL-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1 +; UNROLL-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 2147483648 +; UNROLL-NEXT: br i1 [[TMP1]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]] ; UNROLL: vector.ph: ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -4 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] @@ -4785,8 +4780,7 @@ define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) { ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 4 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]] @@ -4837,9 +4831,9 @@ define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) { ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 8 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; INTERLEAVE: vector.scevcheck: -; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i64 [[K]], -2147483649 -; INTERLEAVE-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], -2147483648 -; INTERLEAVE-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1 +; INTERLEAVE-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 2147483648 +; INTERLEAVE-NEXT: br i1 [[TMP1]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: vector.ph: ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -8 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -6563,10 +6557,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]] ; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP16:%.*]] = or i1 false, [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = sext i8 [[TMP1]] to i32 ; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[IDENT_CHECK]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP15]], [[IDENT_CHECK]] ; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 @@ -6795,10 +6788,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]] ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW]] -; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = or i1 false, [[TMP15]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = sext i8 [[TMP1]] to i32 ; UNROLL-NO-IC-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP17]] -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[IDENT_CHECK]] +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = or i1 [[TMP15]], [[IDENT_CHECK]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index 5b249f0dc24c..d191d15f393f 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -179,8 +179,7 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]] -; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll index e0d0a4221673..2e7ad450a4c5 100644 --- a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll @@ -61,10 +61,9 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]] ; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP16:%.*]] = or i1 false, [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = sext i8 [[TMP1]] to i32 ; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[IDENT_CHECK]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP15]], [[IDENT_CHECK]] ; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 @@ -188,10 +187,9 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]] ; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP16:%.*]] = or i1 false, [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = sext i8 [[TMP1]] to i32 ; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[IDENT_CHECK]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP15]], [[IDENT_CHECK]] ; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 @@ -389,8 +387,7 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr { ; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP9]], [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP15:%.*]] = or i1 false, [[TMP14]] -; CHECK-NEXT: br i1 [[TMP15]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] diff --git a/llvm/test/Transforms/LoopVectorize/pr45259.ll b/llvm/test/Transforms/LoopVectorize/pr45259.ll index 088695c4e609..a5e8d3bb8953 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45259.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45259.ll @@ -31,8 +31,7 @@ define i8 @widget(i8* %arr, i8 %t9) { ; CHECK-NEXT: [[TMP10:%.*]] = select i1 false, i1 [[TMP8]], i1 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP4]], 255 ; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP10]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = or i1 false, [[TMP12]] -; CHECK-NEXT: br i1 [[TMP13]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[TMP12]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll index c3c073628754..46c383bcce33 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll @@ -7,8 +7,7 @@ define void @test(float* %A, i32 %x) { ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: ; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[X:%.*]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = or i1 false, [[IDENT_CHECK]] -; CHECK-NEXT: br i1 [[TMP0]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll index 64ab374b64fb..b29d29b427d0 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll @@ -27,8 +27,7 @@ define void @load_clamped_index(i32* %A, i32* %B, i32 %N) { ; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3 ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]] -; CHECK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[N]], -1 ; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 @@ -118,8 +117,7 @@ define void @store_clamped_index(i32* %A, i32* %B, i32 %N) { ; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3 ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]] -; CHECK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[N]], -1 ; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 @@ -288,8 +286,7 @@ define void @clamped_index_equal_dependence(i32* %A, i32* %B, i32 %N) { ; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3 ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]] -; CHECK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] diff --git a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll index 84a6b67548d1..503e7ca513a5 100644 --- a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll +++ b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll @@ -42,7 +42,6 @@ define void @f1(i16* noalias %a, ; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 ; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] ; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] -; LV-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] ; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) ; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 @@ -53,7 +52,7 @@ define void @f1(i16* noalias %a, ; LV-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]] ; LV-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]] ; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]] -; LV-NEXT: [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]] +; LV-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]] ; LV-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] @@ -169,7 +168,6 @@ define void @f2(i16* noalias %a, ; LV-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 ; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] -; LV-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]] ; LV-NEXT: [[TMP12:%.*]] = trunc i64 [[N]] to i31 ; LV-NEXT: [[TMP13:%.*]] = zext i31 [[TMP12]] to i64 ; LV-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 1 @@ -185,7 +183,7 @@ define void @f2(i16* noalias %a, ; LV-NEXT: [[TMP19:%.*]] = icmp ult i8* [[TMP16]], [[SCEVGEP5]] ; LV-NEXT: [[TMP20:%.*]] = select i1 true, i1 [[TMP18]], i1 [[TMP19]] ; LV-NEXT: [[TMP21:%.*]] = or i1 [[TMP20]], [[MUL_OVERFLOW4]] -; LV-NEXT: [[TMP22:%.*]] = or i1 [[TMP11]], [[TMP21]] +; LV-NEXT: [[TMP22:%.*]] = or i1 [[TMP10]], [[TMP21]] ; LV-NEXT: br i1 [[TMP22]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] @@ -285,7 +283,6 @@ define void @f3(i16* noalias %a, ; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 ; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] ; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] -; LV-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] ; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) ; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 @@ -296,7 +293,7 @@ define void @f3(i16* noalias %a, ; LV-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]] ; LV-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]] ; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]] -; LV-NEXT: [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]] +; LV-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]] ; LV-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] @@ -388,7 +385,6 @@ define void @f4(i16* noalias %a, ; LV-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 ; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] -; LV-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]] ; LV-NEXT: [[TMP12:%.*]] = sext i32 [[TMP1]] to i64 ; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]] ; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) @@ -402,7 +398,7 @@ define void @f4(i16* noalias %a, ; LV-NEXT: [[TMP17:%.*]] = icmp ult i8* [[TMP14]], [[SCEVGEP5]] ; LV-NEXT: [[TMP18:%.*]] = select i1 true, i1 [[TMP16]], i1 [[TMP17]] ; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW4]] -; LV-NEXT: [[TMP20:%.*]] = or i1 [[TMP11]], [[TMP19]] +; LV-NEXT: [[TMP20:%.*]] = or i1 [[TMP10]], [[TMP19]] ; LV-NEXT: br i1 [[TMP20]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] @@ -502,7 +498,6 @@ define void @f5(i16* noalias %a, ; LV-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 ; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] -; LV-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]] ; LV-NEXT: [[TMP12:%.*]] = sext i32 [[TMP1]] to i64 ; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]] ; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) @@ -516,7 +511,7 @@ define void @f5(i16* noalias %a, ; LV-NEXT: [[TMP17:%.*]] = icmp ult i8* [[TMP14]], [[SCEVGEP5]] ; LV-NEXT: [[TMP18:%.*]] = select i1 true, i1 [[TMP16]], i1 [[TMP17]] ; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW4]] -; LV-NEXT: [[TMP20:%.*]] = or i1 [[TMP11]], [[TMP19]] +; LV-NEXT: [[TMP20:%.*]] = or i1 [[TMP10]], [[TMP19]] ; LV-NEXT: br i1 [[TMP20]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] From 8eb74626fa454d33edc1eb86f0c9ce3a006c7dfd Mon Sep 17 00:00:00 2001 From: Peixin-Qiao Date: Thu, 6 Jan 2022 20:00:16 +0800 Subject: [PATCH 792/992] [flang][OpenMP] Add some semantic checks for threadprivate and declare target directives This supports the following checks for THREADPRIVATE Directive: ``` [5.1] 2.21.2 THREADPRIVATE Directive A threadprivate variable must not appear in any clause except the copyin, copyprivate, schedule, num_threads, thread_limit, and if clauses. ``` This supports the following checks for DECLARE TARGET Directive: ``` [5.1] 2.14.7 Declare Target Directive A threadprivate variable cannot appear in the directive. ``` Besides, procedure name and the entity with PARAMETER attribute cannot be in the threadprivate directive. The main program name and module name cannot be in the threadprivate directive and declare target directive. There is no clear description or restriction about the entity with PARAMETER attribute in OpenMP 5.1 Specification, and a warning is given. Reviewed By: kiranchandramohan, shraiysh, NimishMishra Differential Revision: https://reviews.llvm.org/D114941 --- flang/lib/Semantics/check-omp-structure.cpp | 92 +++++++++++++++++-- .../Semantics/omp-declarative-directive.f90 | 5 +- flang/test/Semantics/omp-declare-target02.f90 | 24 ----- flang/test/Semantics/omp-declare-target03.f90 | 17 ++++ flang/test/Semantics/omp-declare-target04.f90 | 16 ++++ flang/test/Semantics/omp-threadprivate02.f90 | 8 -- flang/test/Semantics/omp-threadprivate03.f90 | 27 ++++++ flang/test/Semantics/omp-threadprivate04.f90 | 48 ++++++++++ 8 files changed, 196 insertions(+), 41 deletions(-) create mode 100644 flang/test/Semantics/omp-declare-target03.f90 create mode 100644 flang/test/Semantics/omp-declare-target04.f90 create mode 100644 flang/test/Semantics/omp-threadprivate03.f90 create mode 100644 flang/test/Semantics/omp-threadprivate04.f90 diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 16efa1edf8f9..db7dd2e9670f 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -844,26 +844,63 @@ void OmpStructureChecker::CheckThreadprivateOrDeclareTargetVar( common::visitors{ [&](const parser::Designator &) { if (const auto *name{parser::Unwrap(ompObject)}) { - const auto &scope{context_.FindScope(name->symbol->name())}; - if (FindCommonBlockContaining(*name->symbol)) { + const auto &declScope{ + GetProgramUnitContaining(name->symbol->GetUltimate())}; + const auto *sym = + declScope.parent().FindSymbol(name->symbol->name()); + if (sym && + (sym->has() || + sym->has())) { + context_.Say(name->source, + "The module name or main program name cannot be in a %s " + "directive"_err_en_US, + ContextDirectiveAsFortran()); + } else if (name->symbol->GetUltimate().IsSubprogram()) { + if (GetContext().directive == + llvm::omp::Directive::OMPD_threadprivate) + context_.Say(name->source, + "The procedure name cannot be in a %s " + "directive"_err_en_US, + ContextDirectiveAsFortran()); + // TODO: Check for procedure name in declare target directive. + } else if (name->symbol->attrs().test(Attr::PARAMETER)) { + if (GetContext().directive == + llvm::omp::Directive::OMPD_threadprivate) + context_.Say(name->source, + "The entity with PARAMETER attribute cannot be in a %s " + "directive"_err_en_US, + ContextDirectiveAsFortran()); + else if (GetContext().directive == + llvm::omp::Directive::OMPD_declare_target) + context_.Say(name->source, + "The entity with PARAMETER attribute is used in a %s " + "directive"_en_US, + ContextDirectiveAsFortran()); + } else if (FindCommonBlockContaining(*name->symbol)) { context_.Say(name->source, "A variable in a %s directive cannot be an element of a " "common block"_err_en_US, ContextDirectiveAsFortran()); } else if (!IsSave(*name->symbol) && - scope.kind() != Scope::Kind::MainProgram && - scope.kind() != Scope::Kind::Module) { + declScope.kind() != Scope::Kind::MainProgram && + declScope.kind() != Scope::Kind::Module) { context_.Say(name->source, "A variable that appears in a %s directive must be " "declared in the scope of a module or have the SAVE " "attribute, either explicitly or implicitly"_err_en_US, ContextDirectiveAsFortran()); - } - if (FindEquivalenceSet(*name->symbol)) { + } else if (FindEquivalenceSet(*name->symbol)) { context_.Say(name->source, "A variable in a %s directive cannot appear in an " "EQUIVALENCE statement"_err_en_US, ContextDirectiveAsFortran()); + } else if (name->symbol->test(Symbol::Flag::OmpThreadprivate) && + GetContext().directive == + llvm::omp::Directive::OMPD_declare_target) { + context_.Say(name->source, + "A THREADPRIVATE variable cannot appear in a %s " + "directive"_err_en_US, + ContextDirectiveAsFortran()); } } }, @@ -1407,6 +1444,49 @@ void OmpStructureChecker::Leave(const parser::OmpClauseList &) { llvm::omp::Clause::OMPC_copyprivate, {llvm::omp::Clause::OMPC_nowait}); } + auto testThreadprivateVarErr = [&](Symbol sym, parser::Name name, + llvmOmpClause clauseTy) { + if (sym.test(Symbol::Flag::OmpThreadprivate)) + context_.Say(name.source, + "A THREADPRIVATE variable cannot be in %s clause"_err_en_US, + parser::ToUpperCaseLetters(getClauseName(clauseTy).str())); + }; + + // [5.1] 2.21.2 Threadprivate Directive Restriction + OmpClauseSet threadprivateAllowedSet{llvm::omp::Clause::OMPC_copyin, + llvm::omp::Clause::OMPC_copyprivate, llvm::omp::Clause::OMPC_schedule, + llvm::omp::Clause::OMPC_num_threads, llvm::omp::Clause::OMPC_thread_limit, + llvm::omp::Clause::OMPC_if}; + for (auto it : GetContext().clauseInfo) { + llvmOmpClause type = it.first; + const auto *clause = it.second; + if (!threadprivateAllowedSet.test(type)) { + if (const auto *objList{GetOmpObjectList(*clause)}) { + for (const auto &ompObject : objList->v) { + std::visit( + common::visitors{ + [&](const parser::Designator &) { + if (const auto *name{ + parser::Unwrap(ompObject)}) + testThreadprivateVarErr( + name->symbol->GetUltimate(), *name, type); + }, + [&](const parser::Name &name) { + if (name.symbol) { + for (const auto &mem : + name.symbol->get().objects()) { + testThreadprivateVarErr(mem->GetUltimate(), name, type); + break; + } + } + }, + }, + ompObject.u); + } + } + } + } + CheckRequireAtLeastOneOf(); } diff --git a/flang/test/Semantics/omp-declarative-directive.f90 b/flang/test/Semantics/omp-declarative-directive.f90 index b9b39a309687..f53d73a742bd 100644 --- a/flang/test/Semantics/omp-declarative-directive.f90 +++ b/flang/test/Semantics/omp-declarative-directive.f90 @@ -44,9 +44,8 @@ module m2 contains subroutine foo !$omp declare target - !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly - !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly - !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !WARNING: The entity with PARAMETER attribute is used in a DECLARE TARGET directive + !WARNING: The entity with PARAMETER attribute is used in a DECLARE TARGET directive !$omp declare target (foo, N, M) !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly diff --git a/flang/test/Semantics/omp-declare-target02.f90 b/flang/test/Semantics/omp-declare-target02.f90 index 2ef5df51c672..2e4e5ee64028 100644 --- a/flang/test/Semantics/omp-declare-target02.f90 +++ b/flang/test/Semantics/omp-declare-target02.f90 @@ -63,17 +63,9 @@ subroutine func() !$omp declare target (arr3) - !ERROR: Implicitly typed local entity 'blk2' not allowed in specification expression - !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly - !$omp declare target (blk2) - !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block !$omp declare target (a2) - !ERROR: Implicitly typed local entity 'blk3' not allowed in specification expression - !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly - !$omp declare target (blk3) - !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block !$omp declare target (a3) @@ -82,17 +74,9 @@ subroutine func() !$omp declare target to (arr3_to) - !ERROR: Implicitly typed local entity 'blk2_to' not allowed in specification expression - !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly - !$omp declare target to (blk2_to) - !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block !$omp declare target to (a2_to) - !ERROR: Implicitly typed local entity 'blk3_to' not allowed in specification expression - !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly - !$omp declare target to (blk3_to) - !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block !$omp declare target to (a3_to) @@ -101,17 +85,9 @@ subroutine func() !$omp declare target link (arr3_link) - !ERROR: Implicitly typed local entity 'blk2_link' not allowed in specification expression - !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly - !$omp declare target link (blk2_link) - !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block !$omp declare target link (a2_link) - !ERROR: Implicitly typed local entity 'blk3_link' not allowed in specification expression - !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly - !$omp declare target link (blk3_link) - !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block !$omp declare target link (a3_link) end diff --git a/flang/test/Semantics/omp-declare-target03.f90 b/flang/test/Semantics/omp-declare-target03.f90 new file mode 100644 index 000000000000..8b291f22f7fa --- /dev/null +++ b/flang/test/Semantics/omp-declare-target03.f90 @@ -0,0 +1,17 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenmp +! OpenMP Version 5.1 +! Check OpenMP construct validity for the following directives: +! 2.14.7 Declare Target Directive + +module mod1 +end + +program main + use mod1 + + !ERROR: The module name or main program name cannot be in a DECLARE TARGET directive + !$omp declare target (mod1) + + !ERROR: The module name or main program name cannot be in a DECLARE TARGET directive + !$omp declare target (main) +end diff --git a/flang/test/Semantics/omp-declare-target04.f90 b/flang/test/Semantics/omp-declare-target04.f90 new file mode 100644 index 000000000000..9f3b7757bc9b --- /dev/null +++ b/flang/test/Semantics/omp-declare-target04.f90 @@ -0,0 +1,16 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenmp +! OpenMP Version 5.1 +! Check OpenMP construct validity for the following directives: +! 2.14.7 Declare Target Directive + +program main + integer, save :: x, y + + !$omp threadprivate(x) + + !ERROR: A THREADPRIVATE variable cannot appear in a DECLARE TARGET directive + !ERROR: A THREADPRIVATE variable cannot appear in a DECLARE TARGET directive + !$omp declare target (x, y) + + !$omp threadprivate(y) +end diff --git a/flang/test/Semantics/omp-threadprivate02.f90 b/flang/test/Semantics/omp-threadprivate02.f90 index 4a4034908f40..5978a8e832fe 100644 --- a/flang/test/Semantics/omp-threadprivate02.f90 +++ b/flang/test/Semantics/omp-threadprivate02.f90 @@ -40,19 +40,11 @@ subroutine func() !$omp threadprivate(/blk2/) - !ERROR: Implicitly typed local entity 'blk2' not allowed in specification expression - !ERROR: A variable that appears in a THREADPRIVATE directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly - !$omp threadprivate(blk2) - !ERROR: A variable in a THREADPRIVATE directive cannot be an element of a common block !$omp threadprivate(a2) !$omp threadprivate(/blk3/) - !ERROR: Implicitly typed local entity 'blk3' not allowed in specification expression - !ERROR: A variable that appears in a THREADPRIVATE directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly - !$omp threadprivate(blk3) - !ERROR: A variable in a THREADPRIVATE directive cannot be an element of a common block !$omp threadprivate(a3) end diff --git a/flang/test/Semantics/omp-threadprivate03.f90 b/flang/test/Semantics/omp-threadprivate03.f90 new file mode 100644 index 000000000000..5a9e0dbaad98 --- /dev/null +++ b/flang/test/Semantics/omp-threadprivate03.f90 @@ -0,0 +1,27 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenmp +! OpenMP Version 5.1 +! Check OpenMP construct validity for the following directives: +! 2.21.2 Threadprivate Directive + +module mod1 +end + +program main + use mod1 + integer, parameter :: i = 1 + + !ERROR: The module name or main program name cannot be in a THREADPRIVATE directive + !$omp threadprivate(mod1) + + !ERROR: The module name or main program name cannot be in a THREADPRIVATE directive + !$omp threadprivate(main) + + !ERROR: The entity with PARAMETER attribute cannot be in a THREADPRIVATE directive + !$omp threadprivate(i) + +contains + subroutine sub() + !ERROR: The procedure name cannot be in a THREADPRIVATE directive + !$omp threadprivate(sub) + end +end diff --git a/flang/test/Semantics/omp-threadprivate04.f90 b/flang/test/Semantics/omp-threadprivate04.f90 new file mode 100644 index 000000000000..45684fcb2baa --- /dev/null +++ b/flang/test/Semantics/omp-threadprivate04.f90 @@ -0,0 +1,48 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenmp +! OpenMP Version 5.1 +! Check OpenMP construct validity for the following directives: +! 2.21.2 Threadprivate Directive + +program main + integer :: i, N = 10 + integer, save :: x + common /blk/ y + + !$omp threadprivate(x, /blk/) + + !$omp parallel num_threads(x) + !$omp end parallel + + !$omp single copyprivate(x, /blk/) + !$omp end single + + !$omp do schedule(static, x) + do i = 1, N + y = x + end do + !$omp end do + + !$omp parallel copyin(x, /blk/) + !$omp end parallel + + !$omp parallel if(x > 1) + !$omp end parallel + + !$omp teams thread_limit(x) + !$omp end teams + + !ERROR: A THREADPRIVATE variable cannot be in PRIVATE clause + !ERROR: A THREADPRIVATE variable cannot be in PRIVATE clause + !$omp parallel private(x, /blk/) + !$omp end parallel + + !ERROR: A THREADPRIVATE variable cannot be in FIRSTPRIVATE clause + !ERROR: A THREADPRIVATE variable cannot be in FIRSTPRIVATE clause + !$omp parallel firstprivate(x, /blk/) + !$omp end parallel + + !ERROR: A THREADPRIVATE variable cannot be in SHARED clause + !ERROR: A THREADPRIVATE variable cannot be in SHARED clause + !$omp parallel shared(x, /blk/) + !$omp end parallel +end From 9cbe000df269117905ee68a87800f1d96cfb9885 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Thu, 6 Jan 2022 10:53:59 +0000 Subject: [PATCH 793/992] [LV] Load/store/reduction type must be sized, assert it. This addresses a suggestion by @nikic on D115356. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 30d34083959d..a61efbf2460d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5990,8 +5990,8 @@ void LoopVectorizationCostModel::collectElementTypesForWidening() { if (auto *ST = dyn_cast(&I)) T = ST->getValueOperand()->getType(); - if (!T->isSized()) - continue; + assert(T->isSized() && + "Expected the load/store/recurrence type to be sized"); ElementTypesInLoop.insert(T); } From f430c1eb6443282481e72e4fd209c9ada61e7cf1 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 6 Jan 2022 12:10:58 +0100 Subject: [PATCH 794/992] [Tests] Add elementtype attribute to indirect inline asm operands (NFC) This updates LLVM tests for D116531 by adding elementtype attributes to operands that correspond to indirect asm constraints. --- llvm/test/Analysis/BasicAA/pr52735.ll | 2 +- .../AArch64/GlobalISel/arm64-fallback.ll | 2 +- .../GlobalISel/irtranslator-inline-asm.ll | 2 +- llvm/test/CodeGen/AArch64/arm64-inline-asm.ll | 8 +- llvm/test/CodeGen/AArch64/arm64_32.ll | 4 +- .../CodeGen/AArch64/inlineasm-X-constraint.ll | 8 +- .../GlobalISel/irtranslator-inline-asm.ll | 2 +- .../ARM/2007-05-14-RegScavengerAssert.ll | 4 +- .../CodeGen/ARM/2009-05-18-InlineAsmMem.ll | 2 +- .../ARM/2013-11-08-inline-asm-neon-array.ll | 2 +- llvm/test/CodeGen/ARM/arm-modifier.ll | 8 +- llvm/test/CodeGen/ARM/indirect-reg-input.ll | 2 +- llvm/test/CodeGen/ARM/inline-diagnostics.ll | 2 +- llvm/test/CodeGen/ARM/inlineasm-64bit.ll | 6 +- .../CodeGen/ARM/inlineasm-X-constraint.ll | 8 +- llvm/test/CodeGen/ARM/inlineasm3.ll | 10 +- llvm/test/CodeGen/ARM/mult-alt-generic-arm.ll | 4 +- llvm/test/CodeGen/ARM/pr25317.ll | 2 +- .../AVR/inline-asm/inline-asm-invalid.ll | 2 +- llvm/test/CodeGen/BPF/inline_asm.ll | 4 +- .../Generic/2007-04-27-InlineAsm-X-Dest.ll | 2 +- .../Generic/2007-04-27-LargeMemObject.ll | 4 +- .../CodeGen/Generic/2007-12-17-InvokeAsm.ll | 2 +- .../CodeGen/Generic/2008-02-20-MatchingMem.ll | 2 +- llvm/test/CodeGen/Hexagon/inline-asm-error.ll | 2 +- .../CodeGen/Hexagon/inline-asm-hexagon.ll | 2 +- llvm/test/CodeGen/Hexagon/jump-prob.ll | 4 +- .../CodeGen/Hexagon/rdf-inline-asm-fixed.ll | 2 +- llvm/test/CodeGen/Hexagon/regp-underflow.ll | 20 ++-- .../MSP430/inline-asm-absolute-addressing.ll | 2 +- .../CodeGen/MSP430/mult-alt-generic-msp430.ll | 4 +- llvm/test/CodeGen/Mips/constraint-empty.ll | 2 +- .../CodeGen/Mips/inlineasm-constraint-R.ll | 8 +- .../CodeGen/Mips/inlineasm-constraint-ZC-1.ll | 18 +-- .../CodeGen/Mips/inlineasm-constraint-ZC-2.ll | 2 +- .../CodeGen/Mips/inlineasm-constraint-m-1.ll | 8 +- .../CodeGen/Mips/inlineasm-constraint-m-2.ll | 12 +- .../CodeGen/Mips/inlineasm-constraint-o.ll | 8 +- .../2007-04-30-InlineAsmEarlyClobber.ll | 2 +- .../2007-05-14-InlineAsmSelectCrash.ll | 2 +- .../PowerPC/2007-09-11-RegCoalescerAssert.ll | 2 +- .../2007-10-16-InlineAsmFrameOffset.ll | 2 +- ...009-08-17-inline-asm-addr-mode-breakage.ll | 2 +- llvm/test/CodeGen/PowerPC/PR3488.ll | 2 +- llvm/test/CodeGen/PowerPC/asm-constraints.ll | 2 +- ...compat-LoadReserve-StoreCond-64bit-only.ll | 2 +- ...tins-ppc-xlcompat-LoadReserve-StoreCond.ll | 6 +- .../builtins-ppc-xlcompat-check-ldarx-opt.ll | 2 +- llvm/test/CodeGen/PowerPC/ia-mem-r0.ll | 36 +++--- llvm/test/CodeGen/PowerPC/ia-neg-const.ll | 2 +- .../PowerPC/inlineasm-output-template.ll | 2 +- .../PowerPC/mult-alt-generic-powerpc.ll | 4 +- .../PowerPC/mult-alt-generic-powerpc64.ll | 4 +- llvm/test/CodeGen/PowerPC/subreg-postra-2.ll | 2 +- llvm/test/CodeGen/PowerPC/subreg-postra.ll | 2 +- .../CodeGen/PowerPC/xray-ret-is-terminator.ll | 4 +- llvm/test/CodeGen/RISCV/inline-asm.ll | 8 +- .../2008-10-10-InlineAsmMemoryOperand.ll | 4 +- llvm/test/CodeGen/SPARC/2011-01-11-CC.ll | 2 +- llvm/test/CodeGen/SPARC/inlineasm.ll | 2 +- .../CodeGen/SPARC/mult-alt-generic-sparc.ll | 4 +- llvm/test/CodeGen/SystemZ/asm-01.ll | 10 +- llvm/test/CodeGen/SystemZ/asm-02.ll | 14 +-- llvm/test/CodeGen/SystemZ/asm-03.ll | 8 +- llvm/test/CodeGen/SystemZ/asm-04.ll | 12 +- llvm/test/CodeGen/SystemZ/asm-05.ll | 4 +- llvm/test/CodeGen/SystemZ/frame-25.ll | 2 +- llvm/test/CodeGen/WebAssembly/inline-asm.ll | 4 +- llvm/test/CodeGen/X86/2006-07-20-InlineAsm.ll | 4 +- .../CodeGen/X86/2006-12-16-InlineAsmCrash.ll | 2 +- .../CodeGen/X86/2007-04-08-InlineAsmCrash.ll | 2 +- .../X86/2007-10-28-inlineasm-q-modifier.ll | 2 +- .../X86/2007-11-03-x86-64-q-constraint.ll | 2 +- .../X86/2007-11-04-LiveVariablesBug.ll | 3 +- .../X86/2008-02-22-LocalRegAllocBug.ll | 2 +- .../CodeGen/X86/2008-02-25-InlineAsmBug.ll | 2 +- .../CodeGen/X86/2008-09-17-inline-asm-1.ll | 4 +- .../CodeGen/X86/2008-09-18-inline-asm-2.ll | 2 +- .../CodeGen/X86/2009-04-13-2AddrAssert-2.ll | 2 +- .../X86/2009-04-29-IndirectDestOperands.ll | 2 +- .../X86/2009-07-19-AsmExtraOperands.ll | 2 +- .../X86/2009-10-14-LiveVariablesBug.ll | 2 +- .../X86/2010-06-28-matched-g-constraint.ll | 2 +- .../X86/2010-07-13-indirectXconstraint.ll | 2 +- llvm/test/CodeGen/X86/2010-09-16-asmcrash.ll | 2 +- llvm/test/CodeGen/X86/9601.ll | 2 +- llvm/test/CodeGen/X86/asm-indirect-mem.ll | 2 +- .../X86/asm-reg-type-mismatch-avx512.ll | 2 +- .../test/CodeGen/X86/asm-reg-type-mismatch.ll | 2 +- .../X86/callbr-asm-instr-scheduling.ll | 4 +- llvm/test/CodeGen/X86/callbr-asm-kill.mir | 2 +- .../CodeGen/X86/callbr-asm-phi-placement.ll | 2 +- llvm/test/CodeGen/X86/callbr-asm-sink.ll | 2 +- llvm/test/CodeGen/X86/cas.ll | 4 +- llvm/test/CodeGen/X86/complex-asm.ll | 2 +- llvm/test/CodeGen/X86/crash.ll | 4 +- .../CodeGen/X86/inline-asm-A-constraint.ll | 2 +- .../CodeGen/X86/inline-asm-R-constraint.ll | 2 +- .../X86/inline-asm-duplicated-constraint.ll | 2 +- .../CodeGen/X86/inline-asm-flag-output.ll | 112 +++++++++--------- llvm/test/CodeGen/X86/inline-asm-fpstack.ll | 6 +- llvm/test/CodeGen/X86/inline-asm-h.ll | 2 +- llvm/test/CodeGen/X86/inline-asm-pic.ll | 2 +- llvm/test/CodeGen/X86/inline-asm-ptr-cast.ll | 2 +- .../X86/inline-asm-sp-clobber-memcpy.ll | 2 +- .../CodeGen/X86/inline-asm-stack-realign3.ll | 2 +- llvm/test/CodeGen/X86/inline-asm.ll | 2 +- llvm/test/CodeGen/X86/leaf-fp-elim.ll | 2 +- .../test/CodeGen/X86/ms-inline-asm-PR44272.ll | 2 +- llvm/test/CodeGen/X86/ms-inline-asm-array.ll | 2 +- llvm/test/CodeGen/X86/ms-inline-asm-avx512.ll | 2 +- .../X86/ms-inline-asm-redundant-clobber.ll | 2 +- llvm/test/CodeGen/X86/ms-inline-asm.ll | 12 +- .../test/CodeGen/X86/mult-alt-generic-i686.ll | 4 +- .../CodeGen/X86/mult-alt-generic-x86_64.ll | 4 +- llvm/test/CodeGen/X86/mult-alt-x86.ll | 64 +++++----- .../CodeGen/X86/multiple-loop-post-inc.ll | 6 +- .../CodeGen/X86/phys-reg-local-regalloc.ll | 6 +- llvm/test/CodeGen/X86/pr3154.ll | 6 +- .../X86/regalloc-advanced-split-cost.ll | 4 +- .../CodeGen/X86/semantic-interposition-asm.ll | 4 +- ...ative-execution-side-effect-suppression.ll | 2 +- llvm/test/CodeGen/X86/win64_regcall.ll | 2 +- llvm/test/CodeGen/XCore/inline-asm.ll | 4 +- .../AddressSanitizer/X86/asm_cpuid.ll | 2 +- .../X86/asm_more_registers_than_available.ll | 2 +- .../AddressSanitizer/X86/bug_11395.ll | 2 +- .../AddressSanitizer/localescape.ll | 2 +- .../MemorySanitizer/msan_asm_conservative.ll | 20 ++-- .../MemorySanitizer/msan_x86_bts_asm.ll | 2 +- .../Instrumentation/SanitizerCoverage/seh.ll | 2 +- llvm/test/Linker/inlineasm.ll | 2 +- llvm/test/MC/AsmParser/pr28805.ll | 2 +- .../FunctionImport/Inputs/inlineasm.ll | 2 +- .../Transforms/Inline/2007-04-15-InlineEH.ll | 2 +- llvm/test/Transforms/Inline/devirtualize-4.ll | 2 +- .../Transforms/InstCombine/getelementptr.ll | 5 +- .../Verifier/inline-asm-indirect-operand.ll | 4 +- 138 files changed, 361 insertions(+), 363 deletions(-) diff --git a/llvm/test/Analysis/BasicAA/pr52735.ll b/llvm/test/Analysis/BasicAA/pr52735.ll index 5b78ab595c50..ba0c8bf97d0b 100644 --- a/llvm/test/Analysis/BasicAA/pr52735.ll +++ b/llvm/test/Analysis/BasicAA/pr52735.ll @@ -17,7 +17,7 @@ define dso_local i32 @foo() { entry: %v = alloca i32, align 4 %0 = bitcast i32* %v to i8* - callbr void asm "movl $$1, $0", "=*m,X,~{dirflag},~{fpsr},~{flags}"(i32* nonnull %v, i8* blockaddress(@foo, %out)) + callbr void asm "movl $$1, $0", "=*m,X,~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) nonnull %v, i8* blockaddress(@foo, %out)) to label %asm.fallthrough [label %out] asm.fallthrough: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll index 13a6fe72c2f0..501ea11dd054 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -100,7 +100,7 @@ define void @asm_indirect_output() { entry: %ap = alloca i8*, align 8 %0 = load i8*, i8** %ap, align 8 - call void asm sideeffect "", "=*r|m,0,~{memory}"(i8** %ap, i8* %0) + call void asm sideeffect "", "=*r|m,0,~{memory}"(i8** elementtype(i8*) %ap, i8* %0) ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll index b44a6e5dbd67..8aedbad87195 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll @@ -208,7 +208,7 @@ define i32 @test_memory_constraint(i32* %a) nounwind { ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK: $w0 = COPY [[COPY1]](s32) ; CHECK: RET_ReallyLR implicit $w0 - %1 = tail call i32 asm "ldr $0, $1", "=r,*m"(i32* %a) + %1 = tail call i32 asm "ldr $0, $1", "=r,*m"(i32* elementtype(i32) %a) ret i32 %1 } diff --git a/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll b/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll index e556ec7a3e0e..86e2d2eb607d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll +++ b/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll @@ -97,7 +97,7 @@ define void @t7(i8* %f, i32 %g) nounwind { entry: %f.addr = alloca i8*, align 8 store i8* %f, i8** %f.addr, align 8 - call void asm "str ${1:w}, $0", "=*Q,r"(i8** %f.addr, i32 %g) nounwind + call void asm "str ${1:w}, $0", "=*Q,r"(i8** elementtype(i8*) %f.addr, i32 %g) nounwind ret void } @@ -464,7 +464,7 @@ define void @test_zero_address() { ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: ret entry: - tail call i32 asm sideeffect "ldr $0, $1 \0A", "=r,*Q"(i32* null) + tail call i32 asm sideeffect "ldr $0, $1 \0A", "=r,*Q"(i32* elementtype(i32) null) ret void } @@ -499,7 +499,7 @@ entry: %m.addr = alloca <9 x float>, align 16 %m = load <9 x float>, <9 x float>* %0, align 16 store <9 x float> %m, <9 x float>* %m.addr, align 16 - call void asm sideeffect "", "=*r|m,0,~{memory}"(<9 x float>* nonnull %m.addr, <9 x float> %m) + call void asm sideeffect "", "=*r|m,0,~{memory}"(<9 x float>* elementtype(<9 x float>) nonnull %m.addr, <9 x float> %m) ret void } @@ -515,6 +515,6 @@ define void @test_o_output_constraint() { ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %b = alloca i8, align 1 - call void asm "mov $0, 7", "=*o"(i8* %b) + call void asm "mov $0, 7", "=*o"(i8* elementtype(i8) %b) ret void } diff --git a/llvm/test/CodeGen/AArch64/arm64_32.ll b/llvm/test/CodeGen/AArch64/arm64_32.ll index b5c2c6ebb81d..b452a9ee419f 100644 --- a/llvm/test/CodeGen/AArch64/arm64_32.ll +++ b/llvm/test/CodeGen/AArch64/arm64_32.ll @@ -596,7 +596,7 @@ define void @test_asm_memory(i32* %base.addr) { ; CHECK: add w[[ADDR:[0-9]+]], w0, #4 ; CHECK: str wzr, [x[[ADDR]] %addr = getelementptr i32, i32* %base.addr, i32 1 - call void asm sideeffect "str wzr, $0", "*m"(i32* %addr) + call void asm sideeffect "str wzr, $0", "*m"(i32* elementtype(i32) %addr) ret void } @@ -606,7 +606,7 @@ define void @test_unsafe_asm_memory(i64 %val) { ; CHECK: str wzr, [x[[ADDR]]] %addr_int = trunc i64 %val to i32 %addr = inttoptr i32 %addr_int to i32* - call void asm sideeffect "str wzr, $0", "*m"(i32* %addr) + call void asm sideeffect "str wzr, $0", "*m"(i32* elementtype(i32) %addr) ret void } diff --git a/llvm/test/CodeGen/AArch64/inlineasm-X-constraint.ll b/llvm/test/CodeGen/AArch64/inlineasm-X-constraint.ll index 77652cc071ef..4a226e6d5828 100644 --- a/llvm/test/CodeGen/AArch64/inlineasm-X-constraint.ll +++ b/llvm/test/CodeGen/AArch64/inlineasm-X-constraint.ll @@ -19,7 +19,7 @@ define double @f1(double %f, i32 %pscr_value) { entry: %f.addr = alloca double, align 8 store double %f, double* %f.addr, align 8 - call void asm sideeffect "msr fpsr,$1", "=*X,r"(double* nonnull %f.addr, i32 %pscr_value) nounwind + call void asm sideeffect "msr fpsr,$1", "=*X,r"(double* elementtype(double) nonnull %f.addr, i32 %pscr_value) nounwind %0 = load double, double* %f.addr, align 8 %add = fadd double %0, %0 ret double %add @@ -37,7 +37,7 @@ define i32 @f2(i32 %f, i32 %pscr_value) { entry: %f.addr = alloca i32, align 4 store i32 %f, i32* %f.addr, align 4 - call void asm sideeffect "msr fpsr,$1", "=*X,r"(i32* nonnull %f.addr, i32 %pscr_value) nounwind + call void asm sideeffect "msr fpsr,$1", "=*X,r"(i32* elementtype(i32) nonnull %f.addr, i32 %pscr_value) nounwind %0 = load i32, i32* %f.addr, align 4 %mul = mul i32 %0, %0 ret i32 %mul @@ -60,7 +60,7 @@ define <8 x i8> @f3() { entry: %vector_res_int8x8 = alloca <8 x i8>, align 8 %0 = getelementptr inbounds <8 x i8>, <8 x i8>* %vector_res_int8x8, i32 0, i32 0 - call void asm sideeffect "msr fpsr,$1", "=*X,r"(<8 x i8>* nonnull %vector_res_int8x8, i32 undef) nounwind + call void asm sideeffect "msr fpsr,$1", "=*X,r"(<8 x i8>* elementtype(<8 x i8>) nonnull %vector_res_int8x8, i32 undef) nounwind %1 = load <8 x i8>, <8 x i8>* %vector_res_int8x8, align 8 %mul = mul <8 x i8> %1, %1 ret <8 x i8> %mul @@ -147,6 +147,6 @@ bb: ; CHECK: str [[Dest]], [x0] define void @f8(i64 *%x) { entry: - tail call void asm sideeffect "add $0, x0, x0", "=*X"(i64 *%x) + tail call void asm sideeffect "add $0, x0, x0", "=*X"(i64* elementtype(i64) %x) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll index d31e73e91b60..038b0082c2b1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -239,7 +239,7 @@ define i32 @test_memory_constraint(i32 addrspace(3)* %a) nounwind { ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 - %1 = tail call i32 asm "ds_read_b32 $0, $1", "=v,*m"(i32 addrspace(3)* %a) + %1 = tail call i32 asm "ds_read_b32 $0, $1", "=v,*m"(i32 addrspace(3)* elementtype(i32) %a) ret i32 %1 } diff --git a/llvm/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll b/llvm/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll index 5ace8992102a..78e132e1ecce 100644 --- a/llvm/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll +++ b/llvm/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll @@ -21,8 +21,8 @@ bb: ; preds = %bb, %entry bb59: ; preds = %bb %tmp68 = sdiv i64 0, 0 ; [#uses=1] %tmp6869 = trunc i64 %tmp68 to i32 ; [#uses=2] - %tmp81 = call i32 asm "smull $0, $1, $2, $3 \0A\09mov $0, $0, lsr $4\0A\09add $1, $0, $1, lsl $5\0A\09", "=&r,=*&r,r,r,i,i"( i32* null, i32 %tmp6869, i32 13316085, i32 23, i32 9 ) ; [#uses=0] - %tmp90 = call i32 asm "smull $0, $1, $2, $3 \0A\09mov $0, $0, lsr $4\0A\09add $1, $0, $1, lsl $5\0A\09", "=&r,=*&r,r,r,i,i"( i32* null, i32 %tmp6869, i32 10568984, i32 23, i32 9 ) ; [#uses=0] + %tmp81 = call i32 asm "smull $0, $1, $2, $3 \0A\09mov $0, $0, lsr $4\0A\09add $1, $0, $1, lsl $5\0A\09", "=&r,=*&r,r,r,i,i"( i32* elementtype( i32) null, i32 %tmp6869, i32 13316085, i32 23, i32 9 ) ; [#uses=0] + %tmp90 = call i32 asm "smull $0, $1, $2, $3 \0A\09mov $0, $0, lsr $4\0A\09add $1, $0, $1, lsl $5\0A\09", "=&r,=*&r,r,r,i,i"( i32* elementtype( i32) null, i32 %tmp6869, i32 10568984, i32 23, i32 9 ) ; [#uses=0] unreachable cond_next789: ; preds = %entry diff --git a/llvm/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll b/llvm/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll index e5c2fb4d67a1..2672aa317ba1 100644 --- a/llvm/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll +++ b/llvm/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll @@ -3,6 +3,6 @@ define void @foo(i32 %i, i32* %p) nounwind { ;CHECK: swp r2, r0, [r1] - %asmtmp = call i32 asm sideeffect "swp $0, $2, $3", "=&r,=*m,r,*m,~{memory}"(i32* %p, i32 %i, i32* %p) nounwind + %asmtmp = call i32 asm sideeffect "swp $0, $2, $3", "=&r,=*m,r,*m,~{memory}"(i32* elementtype(i32) %p, i32 %i, i32* elementtype(i32) %p) nounwind ret void } diff --git a/llvm/test/CodeGen/ARM/2013-11-08-inline-asm-neon-array.ll b/llvm/test/CodeGen/ARM/2013-11-08-inline-asm-neon-array.ll index 5a864772faef..2c5f00e15ba0 100644 --- a/llvm/test/CodeGen/ARM/2013-11-08-inline-asm-neon-array.ll +++ b/llvm/test/CodeGen/ARM/2013-11-08-inline-asm-neon-array.ll @@ -9,7 +9,7 @@ target triple = "armv7--" define void @foo() #0 { %vsrc = alloca %struct.uint8x8x4_t, align 8 %ptr = alloca i8; - %1 = call i8* asm sideeffect "vld4.u8 ${0:h}, [$1], $2", "=*w,=r,r,1"(%struct.uint8x8x4_t* %vsrc, i32 0, i8* %ptr) + %1 = call i8* asm sideeffect "vld4.u8 ${0:h}, [$1], $2", "=*w,=r,r,1"(%struct.uint8x8x4_t* elementtype(%struct.uint8x8x4_t) %vsrc, i32 0, i8* %ptr) ret void } diff --git a/llvm/test/CodeGen/ARM/arm-modifier.ll b/llvm/test/CodeGen/ARM/arm-modifier.ll index 67d468e8abd2..93a16fa26249 100644 --- a/llvm/test/CodeGen/ARM/arm-modifier.ll +++ b/llvm/test/CodeGen/ARM/arm-modifier.ll @@ -35,7 +35,7 @@ define void @f2() nounwind { entry: ; CHECK: f2 ; CHECK: ldr r0, [r{{[0-9]+}}] -call void asm sideeffect "ldr r0, [${0:m}]\0A\09", "*m,~{r0}"(i32** @f2_ptr) nounwind +call void asm sideeffect "ldr r0, [${0:m}]\0A\09", "*m,~{r0}"(i32** elementtype(i32*) @f2_ptr) nounwind ret void } @@ -51,9 +51,9 @@ entry: ; CHECK: ldm {{lr|r[0-9]+}}, {r{{[0-9]+}}, r{{[0-9]+}}} %tmp = load i64, i64* @f3_var, align 4 %tmp1 = load i64, i64* @f3_var2, align 4 -%0 = call i64 asm sideeffect "stm ${0:m}, ${1:M}\0A\09adds $3, $1\0A\09", "=*m,=r,1,r"(i64** @f3_ptr, i64 %tmp, i64 %tmp1) nounwind +%0 = call i64 asm sideeffect "stm ${0:m}, ${1:M}\0A\09adds $3, $1\0A\09", "=*m,=r,1,r"(i64** elementtype(i64*) @f3_ptr, i64 %tmp, i64 %tmp1) nounwind store i64 %0, i64* @f3_var, align 4 -%1 = call i64 asm sideeffect "ldm ${1:m}, ${0:M}\0A\09", "=r,*m"(i64** @f3_ptr) nounwind +%1 = call i64 asm sideeffect "ldm ${1:m}, ${0:M}\0A\09", "=r,*m"(i64** elementtype(i64*) @f3_ptr) nounwind store i64 %1, i64* @f3_var, align 4 ret void } @@ -62,7 +62,7 @@ define i64 @f4(i64* %val) nounwind { entry: ;CHECK-LABEL: f4: ;CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r{{[0-9]+}}] - %0 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [$1]", "=&r,r,*Qo"(i64* %val, i64* %val) nounwind + %0 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [$1]", "=&r,r,*Qo"(i64* %val, i64* elementtype(i64) %val) nounwind ret i64 %0 } diff --git a/llvm/test/CodeGen/ARM/indirect-reg-input.ll b/llvm/test/CodeGen/ARM/indirect-reg-input.ll index e82e1dee9428..4c2c77d6bcf4 100644 --- a/llvm/test/CodeGen/ARM/indirect-reg-input.ll +++ b/llvm/test/CodeGen/ARM/indirect-reg-input.ll @@ -9,6 +9,6 @@ define void @switch_to_stack(%struct.my_stack* %stack) nounwind { entry: %regs = getelementptr inbounds %struct.my_stack, %struct.my_stack* %stack, i32 0, i32 0 - tail call void asm "\0A", "=*r,*0"(%struct.myjmp_buf* %regs, %struct.myjmp_buf* %regs) + tail call void asm "\0A", "=*r,*0"(%struct.myjmp_buf* elementtype(%struct.myjmp_buf) %regs, %struct.myjmp_buf* elementtype(%struct.myjmp_buf) %regs) ret void } diff --git a/llvm/test/CodeGen/ARM/inline-diagnostics.ll b/llvm/test/CodeGen/ARM/inline-diagnostics.ll index 3f5b73c5a211..036053c580ee 100644 --- a/llvm/test/CodeGen/ARM/inline-diagnostics.ll +++ b/llvm/test/CodeGen/ARM/inline-diagnostics.ll @@ -7,7 +7,7 @@ define float @inline_func(float %f1, float %f2) #0 { %c1 = alloca %struct.float4, align 4 %c2 = alloca %struct.float4, align 4 %c3 = alloca %struct.float4, align 4 - call void asm sideeffect "vmul.f32 ${2:q}, ${0:q}, ${1:q}", "=*r,=*r,*w"(%struct.float4* %c1, %struct.float4* %c2, %struct.float4* %c3) #1, !srcloc !1 + call void asm sideeffect "vmul.f32 ${2:q}, ${0:q}, ${1:q}", "=*r,=*r,*w"(%struct.float4* elementtype(%struct.float4) %c1, %struct.float4* elementtype(%struct.float4) %c2, %struct.float4* elementtype(%struct.float4) %c3) #1, !srcloc !1 %x = getelementptr inbounds %struct.float4, %struct.float4* %c3, i32 0, i32 0 %1 = load float, float* %x, align 4 ret float %1 diff --git a/llvm/test/CodeGen/ARM/inlineasm-64bit.ll b/llvm/test/CodeGen/ARM/inlineasm-64bit.ll index 62c71ab375c8..8b68cad32847 100644 --- a/llvm/test/CodeGen/ARM/inlineasm-64bit.ll +++ b/llvm/test/CodeGen/ARM/inlineasm-64bit.ll @@ -5,7 +5,7 @@ define void @i64_write(i64* %p, i64 %val) nounwind { ; CHECK-LABEL: i64_write: ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r{{[0-9]+}}] ; CHECK: strexd [[REG1]], {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}} - %1 = tail call i64 asm sideeffect "1: ldrexd $0, ${0:H}, [$2]\0A strexd $0, $3, ${3:H}, [$2]\0A teq $0, #0\0A bne 1b", "=&r,=*Qo,r,r,~{cc}"(i64* %p, i64* %p, i64 %val) nounwind + %1 = tail call i64 asm sideeffect "1: ldrexd $0, ${0:H}, [$2]\0A strexd $0, $3, ${3:H}, [$2]\0A teq $0, #0\0A bne 1b", "=&r,=*Qo,r,r,~{cc}"(i64* elementtype(i64) %p, i64* %p, i64 %val) nounwind ret void } @@ -49,7 +49,7 @@ define void @foo(i64* %p, i64 %i) nounwind { ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r{{[0-9]+}}] ; CHECK: strexd [[REG1]], {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}} ; CHECK: {{pop|pop.w}} {{{r[4-9]|r10|r11}} - %1 = tail call { i64, i64 } asm sideeffect "@ atomic64_set\0A1: ldrexd $0, ${0:H}, [$3]\0Aldrexd $1, ${1:H}, [$3]\0A strexd $0, $4, ${4:H}, [$3]\0A teq $0, #0\0A bne 1b", "=&r,=&r,=*Qo,r,r,~{cc}"(i64* %p, i64* %p, i64 %i) nounwind + %1 = tail call { i64, i64 } asm sideeffect "@ atomic64_set\0A1: ldrexd $0, ${0:H}, [$3]\0Aldrexd $1, ${1:H}, [$3]\0A strexd $0, $4, ${4:H}, [$3]\0A teq $0, #0\0A bne 1b", "=&r,=&r,=*Qo,r,r,~{cc}"(i64* elementtype(i64) %p, i64* %p, i64 %i) nounwind ret void } @@ -91,7 +91,7 @@ define i64 @tied_64bit_test(i64 %in) nounwind { ; CHECK-LABEL: tied_64bit_test: ; CHECK: OUT([[OUTREG:r[0-9]+]]), IN([[OUTREG]]) %addr = alloca i64 - call void asm "OUT($0), IN($1)", "=*rm,0"(i64* %addr, i64 %in) + call void asm "OUT($0), IN($1)", "=*rm,0"(i64* elementtype(i64) %addr, i64 %in) ret i64 %in } diff --git a/llvm/test/CodeGen/ARM/inlineasm-X-constraint.ll b/llvm/test/CodeGen/ARM/inlineasm-X-constraint.ll index d3d53df11b56..40d8062f2124 100644 --- a/llvm/test/CodeGen/ARM/inlineasm-X-constraint.ll +++ b/llvm/test/CodeGen/ARM/inlineasm-X-constraint.ll @@ -19,7 +19,7 @@ define arm_aapcs_vfpcc double @f1(double %f, i32 %pscr_value) { entry: %f.addr = alloca double, align 8 store double %f, double* %f.addr, align 8 - call void asm sideeffect "vmsr fpscr,$1", "=*X,r"(double* nonnull %f.addr, i32 %pscr_value) nounwind + call void asm sideeffect "vmsr fpscr,$1", "=*X,r"(double* elementtype(double) nonnull %f.addr, i32 %pscr_value) nounwind %0 = load double, double* %f.addr, align 8 %add = fadd double %0, %0 ret double %add @@ -37,7 +37,7 @@ define arm_aapcs_vfpcc i32 @f2(i32 %f, i32 %pscr_value) { entry: %f.addr = alloca i32, align 4 store i32 %f, i32* %f.addr, align 4 - call void asm sideeffect "vmsr fpscr,$1", "=*X,r"(i32* nonnull %f.addr, i32 %pscr_value) nounwind + call void asm sideeffect "vmsr fpscr,$1", "=*X,r"(i32* elementtype(i32) nonnull %f.addr, i32 %pscr_value) nounwind %0 = load i32, i32* %f.addr, align 4 %mul = mul i32 %0, %0 ret i32 %mul @@ -66,7 +66,7 @@ define arm_aapcs_vfpcc <8 x i8> @f3() { entry: %vector_res_int8x8 = alloca <8 x i8>, align 8 %0 = getelementptr inbounds <8 x i8>, <8 x i8>* %vector_res_int8x8, i32 0, i32 0 - call void asm sideeffect "vmsr fpscr,$1", "=*X,r"(<8 x i8>* nonnull %vector_res_int8x8, i32 undef) nounwind + call void asm sideeffect "vmsr fpscr,$1", "=*X,r"(<8 x i8>* elementtype(<8 x i8>) nonnull %vector_res_int8x8, i32 undef) nounwind %1 = load <8 x i8>, <8 x i8>* %vector_res_int8x8, align 8 %mul = mul <8 x i8> %1, %1 ret <8 x i8> %mul @@ -152,6 +152,6 @@ bb: ; CHECK: str r{{.*}}, [r0] define void @f8(i32 *%x) { entry: - tail call void asm sideeffect "add $0, r0, r0", "=*X"(i32 *%x) + tail call void asm sideeffect "add $0, r0, r0", "=*X"(i32* elementtype(i32) %x) ret void } diff --git a/llvm/test/CodeGen/ARM/inlineasm3.ll b/llvm/test/CodeGen/ARM/inlineasm3.ll index 59706c4e4180..c318cdfca36f 100644 --- a/llvm/test/CodeGen/ARM/inlineasm3.ll +++ b/llvm/test/CodeGen/ARM/inlineasm3.ll @@ -10,7 +10,7 @@ entry: ; CHECK: vmov.32 d30[0], ; CHECK: vmov q8, q15 %tmp = alloca %struct.int32x4_t, align 16 - call void asm sideeffect "vmov.I64 q15, #0\0Avmov.32 d30[0], $1\0Avmov ${0:q}, q15\0A", "=*w,r,~{d31},~{d30}"(%struct.int32x4_t* %tmp, i32 8192) nounwind + call void asm sideeffect "vmov.I64 q15, #0\0Avmov.32 d30[0], $1\0Avmov ${0:q}, q15\0A", "=*w,r,~{d31},~{d30}"(%struct.int32x4_t* elementtype(%struct.int32x4_t) %tmp, i32 8192) nounwind ret void } @@ -48,7 +48,7 @@ ret i32 0 @k.2126 = internal unnamed_addr global float 1.000000e+00 define i32 @t4() nounwind { entry: -call void asm sideeffect "flds s15, $0 \0A", "*^Uv,~{s15}"(float* @k.2126) nounwind +call void asm sideeffect "flds s15, $0 \0A", "*^Uv,~{s15}"(float* elementtype(float) @k.2126) nounwind ret i32 0 } @@ -56,7 +56,7 @@ ret i32 0 define i32 @t5() nounwind { entry: -call void asm sideeffect "flds s15, $0 \0A", "*^Uvm,~{s15}"(float* @k.2126) nounwind +call void asm sideeffect "flds s15, $0 \0A", "*^Uvm,~{s15}"(float* elementtype(float) @k.2126) nounwind ret i32 0 } @@ -108,7 +108,7 @@ entry: ; CHECK: str r1, [r0] %f.addr = alloca i8*, align 4 store i8* %f, i8** %f.addr, align 4 - call void asm "str $1, $0", "=*Q,r"(i8** %f.addr, i32 %g) nounwind + call void asm "str $1, $0", "=*Q,r"(i8** elementtype(i8*) %f.addr, i32 %g) nounwind ret void } @@ -129,6 +129,6 @@ define i32 @fn1() local_unnamed_addr nounwind { entry: ; CHECK: mov [[addr:r[0-9]+]], #5 ; CHECK: ldrh {{.*}}[[addr]] - %0 = tail call i32 asm "ldrh $0, $1", "=r,*Q"(i8* inttoptr (i32 5 to i8*)) nounwind + %0 = tail call i32 asm "ldrh $0, $1", "=r,*Q"(i8* elementtype(i8) inttoptr (i32 5 to i8*)) nounwind ret i32 %0 } diff --git a/llvm/test/CodeGen/ARM/mult-alt-generic-arm.ll b/llvm/test/CodeGen/ARM/mult-alt-generic-arm.ll index 6ee114d4d4a1..2ac2b8eefff9 100644 --- a/llvm/test/CodeGen/ARM/mult-alt-generic-arm.ll +++ b/llvm/test/CodeGen/ARM/mult-alt-generic-arm.ll @@ -9,7 +9,7 @@ target triple = "arm--" define arm_aapcscc void @single_m() nounwind { entry: - call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind + call void asm "foo $1,$0", "=*m,*m"(i32* elementtype(i32) @mout0, i32* elementtype(i32) @min1) nounwind ret void } @@ -167,7 +167,7 @@ entry: define arm_aapcscc void @multi_m() nounwind { entry: %tmp = load i32, i32* @min1, align 4 - call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind + call void asm "foo $1,$0", "=*m|r,m|r"(i32* elementtype(i32) @mout0, i32 %tmp) nounwind ret void } diff --git a/llvm/test/CodeGen/ARM/pr25317.ll b/llvm/test/CodeGen/ARM/pr25317.ll index 679b5a0299af..ca29185672bf 100644 --- a/llvm/test/CodeGen/ARM/pr25317.ll +++ b/llvm/test/CodeGen/ARM/pr25317.ll @@ -6,6 +6,6 @@ target triple = "armv7--linux-gnueabihf" ; CHECK-LABEL: f: ; CHECK: str lr, [r0] define void @f(i32* %p) { - call void asm sideeffect "str lr, $0", "=*o"(i32* %p) + call void asm sideeffect "str lr, $0", "=*o"(i32* elementtype(i32) %p) ret void } diff --git a/llvm/test/CodeGen/AVR/inline-asm/inline-asm-invalid.ll b/llvm/test/CodeGen/AVR/inline-asm/inline-asm-invalid.ll index 81b0f6e9b283..8a3dcfc90ab8 100644 --- a/llvm/test/CodeGen/AVR/inline-asm/inline-asm-invalid.ll +++ b/llvm/test/CodeGen/AVR/inline-asm/inline-asm-invalid.ll @@ -3,7 +3,7 @@ define void @foo(i16 %a) { ; CHECK: error: invalid operand in inline asm: 'jl ${0:l}' %i.addr = alloca i32, align 4 - call void asm sideeffect "jl ${0:l}", "*m"(i32* %i.addr) + call void asm sideeffect "jl ${0:l}", "*m"(i32* elementtype(i32) %i.addr) ret void } diff --git a/llvm/test/CodeGen/BPF/inline_asm.ll b/llvm/test/CodeGen/BPF/inline_asm.ll index 7822ac49ed89..138e02db6235 100644 --- a/llvm/test/CodeGen/BPF/inline_asm.ll +++ b/llvm/test/CodeGen/BPF/inline_asm.ll @@ -34,9 +34,9 @@ entry: ; CHECK: r1 = 4 %2 = tail call i32 asm sideeffect "$0 = $1 ll", "=r,i"(i64 333333333333) #2 ; CHECK: r1 = 333333333333 ll - %3 = call i32 asm sideeffect "$0 = *(u16 *) $1", "=r,*m"(i32* nonnull %a) #2 + %3 = call i32 asm sideeffect "$0 = *(u16 *) $1", "=r,*m"(i32* elementtype(i32) nonnull %a) #2 ; CHECK: r1 = *(u16 *) (r10 - 4) - %4 = call i32 asm sideeffect "$0 = *(u32 *) $1", "=r,*m"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @g, i64 0, i64 1)) #2 + %4 = call i32 asm sideeffect "$0 = *(u32 *) $1", "=r,*m"(i32* elementtype(i32) getelementptr inbounds ([2 x i32], [2 x i32]* @g, i64 0, i64 1)) #2 ; CHECK: r1 = g ll ; CHECK: r0 = *(u32 *) (r1 + 4) call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #2 diff --git a/llvm/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll b/llvm/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll index 0f82ba61b288..5c0770f1f324 100644 --- a/llvm/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll +++ b/llvm/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll @@ -3,6 +3,6 @@ ; Test that we can have an "X" output constraint. define void @test(i16 * %t) { - call void asm sideeffect "foo $0", "=*X,~{dirflag},~{fpsr},~{flags},~{memory}"( i16* %t ) + call void asm sideeffect "foo $0", "=*X,~{dirflag},~{fpsr},~{flags},~{memory}"( i16* elementtype( i16) %t ) ret void } diff --git a/llvm/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll b/llvm/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll index 05989a0836cf..c8cce5c1894e 100644 --- a/llvm/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll +++ b/llvm/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll @@ -6,8 +6,8 @@ define void @test() { entry: %currfpu = alloca %struct..0anon, align 16 ; <%struct..0anon*> [#uses=2] %mxcsr = alloca %struct..0anon, align 16 ; <%struct..0anon*> [#uses=1] - call void asm sideeffect "fnstenv $0", "=*m,~{dirflag},~{fpsr},~{flags}"( %struct..0anon* %currfpu ) - call void asm sideeffect "$0 $1", "=*m,*m,~{dirflag},~{fpsr},~{flags}"( %struct..0anon* %mxcsr, %struct..0anon* %currfpu ) + call void asm sideeffect "fnstenv $0", "=*m,~{dirflag},~{fpsr},~{flags}"( %struct..0anon* elementtype( %struct..0anon) %currfpu ) + call void asm sideeffect "$0 $1", "=*m,*m,~{dirflag},~{fpsr},~{flags}"( %struct..0anon* elementtype( %struct..0anon) %mxcsr, %struct..0anon* elementtype(%struct..0anon) %currfpu ) ret void } diff --git a/llvm/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll b/llvm/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll index a9d68ae5444c..3b13077b37d8 100644 --- a/llvm/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll +++ b/llvm/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll @@ -2,7 +2,7 @@ define fastcc void @bc__support__high_resolution_time__initialize_clock_rate() personality i32 (...)* @__gxx_personality_v0 { entry: - invoke void asm "rdtsc\0A\09movl %eax, $0\0A\09movl %edx, $1", "=*imr,=*imr,~{dirflag},~{fpsr},~{flags},~{dx},~{ax}"( i32* null, i32* null ) + invoke void asm "rdtsc\0A\09movl %eax, $0\0A\09movl %edx, $1", "=*imr,=*imr,~{dirflag},~{fpsr},~{flags},~{dx},~{ax}"( i32* elementtype( i32) null, i32* elementtype(i32) null ) to label %.noexc unwind label %cleanup144 .noexc: ; preds = %entry diff --git a/llvm/test/CodeGen/Generic/2008-02-20-MatchingMem.ll b/llvm/test/CodeGen/Generic/2008-02-20-MatchingMem.ll index 20f3dcc2971d..12362f1a1ea4 100644 --- a/llvm/test/CodeGen/Generic/2008-02-20-MatchingMem.ll +++ b/llvm/test/CodeGen/Generic/2008-02-20-MatchingMem.ll @@ -3,7 +3,7 @@ define void @test(i32* %X) nounwind { entry: %tmp1 = getelementptr i32, i32* %X, i32 10 ; [#uses=2] - tail call void asm sideeffect " $0 $1 ", "=*im,*im,~{memory}"( i32* %tmp1, i32* %tmp1 ) nounwind + tail call void asm sideeffect " $0 $1 ", "=*im,*im,~{memory}"( i32* elementtype( i32) %tmp1, i32* elementtype(i32) %tmp1 ) nounwind ret void } diff --git a/llvm/test/CodeGen/Hexagon/inline-asm-error.ll b/llvm/test/CodeGen/Hexagon/inline-asm-error.ll index 0a1e70830f58..0254836127b7 100644 --- a/llvm/test/CodeGen/Hexagon/inline-asm-error.ll +++ b/llvm/test/CodeGen/Hexagon/inline-asm-error.ll @@ -8,7 +8,7 @@ define void @f0(%s.0* byval(%s.0) align 8 %a0) { b0: - call void asm sideeffect ".weak OFFSET_0;jump ##(OFFSET_0 + 0x14c15f0)", "*r"(%s.0* nonnull %a0), !srcloc !0 + call void asm sideeffect ".weak OFFSET_0;jump ##(OFFSET_0 + 0x14c15f0)", "*r"(%s.0* elementtype(%s.0) nonnull %a0), !srcloc !0 ret void } diff --git a/llvm/test/CodeGen/Hexagon/inline-asm-hexagon.ll b/llvm/test/CodeGen/Hexagon/inline-asm-hexagon.ll index 302096d49b3e..76d7ae6ce52e 100644 --- a/llvm/test/CodeGen/Hexagon/inline-asm-hexagon.ll +++ b/llvm/test/CodeGen/Hexagon/inline-asm-hexagon.ll @@ -10,7 +10,7 @@ entry: %free_list_ptr.addr = alloca i64*, align 4 store i64* %free_list_ptr, i64** %free_list_ptr.addr, align 4 %0 = load i32*, i32** %item_ptr, align 4 - %1 = call { i64, i32 } asm sideeffect "1: $0 = memd_locked($5)\0A\09 $1 = HIGH(${0:H}) \0A\09 $1 = add($1,#1) \0A\09 memw($6) = LOW(${0:L}) \0A\09 $0 = combine($7,$1) \0A\09 memd_locked($5,p0) = $0 \0A\09 if !p0 jump 1b\0A\09", "=&r,=&r,=*m,=*m,r,r,r,r,*m,*m,~{p0}"(i64** %free_list_ptr.addr, i8** %free_item_ptr, i64 0, i64* %free_list_ptr, i8** %free_item_ptr, i32* %0, i64** %free_list_ptr.addr, i8** %free_item_ptr) nounwind + %1 = call { i64, i32 } asm sideeffect "1: $0 = memd_locked($5)\0A\09 $1 = HIGH(${0:H}) \0A\09 $1 = add($1,#1) \0A\09 memw($6) = LOW(${0:L}) \0A\09 $0 = combine($7,$1) \0A\09 memd_locked($5,p0) = $0 \0A\09 if !p0 jump 1b\0A\09", "=&r,=&r,=*m,=*m,r,r,r,r,*m,*m,~{p0}"(i64** elementtype(i64*) %free_list_ptr.addr, i8** elementtype(i8*) %free_item_ptr, i64 0, i64* %free_list_ptr, i8** %free_item_ptr, i32* %0, i64** elementtype(i64*) %free_list_ptr.addr, i8** elementtype(i8*) %free_item_ptr) nounwind %asmresult1 = extractvalue { i64, i32 } %1, 1 ret i32 %asmresult1 } diff --git a/llvm/test/CodeGen/Hexagon/jump-prob.ll b/llvm/test/CodeGen/Hexagon/jump-prob.ll index a5f420df0df5..a5805e723cad 100644 --- a/llvm/test/CodeGen/Hexagon/jump-prob.ll +++ b/llvm/test/CodeGen/Hexagon/jump-prob.ll @@ -70,7 +70,7 @@ b4: ; preds = %b2 b5: ; preds = %b4 store i8 0, i8* %a2, align 1, !tbaa !0 %v17 = getelementptr inbounds [2 x %s.0], [2 x %s.0]* @g0, i32 0, i32 0, i32 3, i32 %v2 - %v18 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* %v17, i32* %v17, i32 1, i32* %v17) #0, !srcloc !5 + %v18 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* elementtype(i32) %v17, i32* %v17, i32 1, i32* elementtype(i32) %v17) #0, !srcloc !5 %v19 = load i32, i32* %v17, align 4, !tbaa !3 %v20 = icmp eq i32 %v19, 255 br i1 %v20, label %b6, label %b7 @@ -114,7 +114,7 @@ b8: ; preds = %b4 b9: ; preds = %b8 store i8 1, i8* %a2, align 1, !tbaa !0 %v42 = getelementptr inbounds [2 x %s.0], [2 x %s.0]* @g0, i32 0, i32 1, i32 3, i32 %v2 - %v43 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* %v42, i32* %v42, i32 1, i32* %v42) #0, !srcloc !5 + %v43 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* elementtype(i32) %v42, i32* %v42, i32 1, i32* elementtype(i32) %v42) #0, !srcloc !5 %v44 = load i32, i32* %v42, align 4, !tbaa !3 %v45 = icmp eq i32 %v44, 255 br i1 %v45, label %b10, label %b11 diff --git a/llvm/test/CodeGen/Hexagon/rdf-inline-asm-fixed.ll b/llvm/test/CodeGen/Hexagon/rdf-inline-asm-fixed.ll index bbd05ae0b500..33e148d3244f 100644 --- a/llvm/test/CodeGen/Hexagon/rdf-inline-asm-fixed.ll +++ b/llvm/test/CodeGen/Hexagon/rdf-inline-asm-fixed.ll @@ -15,7 +15,7 @@ entry: %0 = bitcast i32* %arg1 to i8* call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #2 store i32 %status, i32* %arg1, align 4, !tbaa !1 - %1 = call i32 asm sideeffect "r0 = #$1\0Ar1 = $2\0Ar2 = $4\0Atrap0 (#0)\0A$0 = r0", "=r,i,r,*m,r,~{r0},~{r1},~{r2}"(i32 24, i32* nonnull %arg1, i32* nonnull %arg1, i32 %status) #2, !srcloc !5 + %1 = call i32 asm sideeffect "r0 = #$1\0Ar1 = $2\0Ar2 = $4\0Atrap0 (#0)\0A$0 = r0", "=r,i,r,*m,r,~{r0},~{r1},~{r2}"(i32 24, i32* nonnull %arg1, i32* elementtype(i32) nonnull %arg1, i32 %status) #2, !srcloc !5 call void @llvm.lifetime.end.p0i8(i64 4, i8* %0) #2 ret i32 %1 } diff --git a/llvm/test/CodeGen/Hexagon/regp-underflow.ll b/llvm/test/CodeGen/Hexagon/regp-underflow.ll index 748f98d744f2..a880eab948e1 100644 --- a/llvm/test/CodeGen/Hexagon/regp-underflow.ll +++ b/llvm/test/CodeGen/Hexagon/regp-underflow.ll @@ -47,34 +47,34 @@ b4: ; preds = %b3 b5: ; preds = %b5, %b4 %v6 = phi i32* [ %v5, %b4 ], [ %v29, %b5 ] %v7 = phi i32 [ 0, %b4 ], [ %v27, %b5 ] - %v8 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* @g3, i32* @g3, i32 1, i32* @g3), !srcloc !4 + %v8 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* elementtype(i32) @g3, i32* @g3, i32 1, i32* elementtype(i32) @g3), !srcloc !4 store i32 %v8, i32* %v6, align 4, !tbaa !0 %v9 = getelementptr i32, i32* %v6, i32 1 - %v10 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* @g3, i32* @g3, i32 1, i32* @g3), !srcloc !4 + %v10 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* elementtype(i32) @g3, i32* @g3, i32 1, i32* elementtype(i32) @g3), !srcloc !4 store i32 %v10, i32* %v9, align 4, !tbaa !0 %v11 = getelementptr i32, i32* %v6, i32 2 - %v12 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* @g3, i32* @g3, i32 1, i32* @g3), !srcloc !4 + %v12 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* elementtype(i32) @g3, i32* @g3, i32 1, i32* elementtype(i32) @g3), !srcloc !4 store i32 %v12, i32* %v11, align 4, !tbaa !0 %v13 = getelementptr i32, i32* %v6, i32 3 - %v14 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* @g3, i32* @g3, i32 1, i32* @g3), !srcloc !4 + %v14 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* elementtype(i32) @g3, i32* @g3, i32 1, i32* elementtype(i32) @g3), !srcloc !4 store i32 %v14, i32* %v13, align 4, !tbaa !0 %v15 = getelementptr i32, i32* %v6, i32 4 - %v16 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* @g3, i32* @g3, i32 1, i32* @g3), !srcloc !4 + %v16 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* elementtype(i32) @g3, i32* @g3, i32 1, i32* elementtype(i32) @g3), !srcloc !4 store i32 %v16, i32* %v15, align 4, !tbaa !0 %v17 = getelementptr i32, i32* %v6, i32 5 - %v18 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* @g3, i32* @g3, i32 1, i32* @g3), !srcloc !4 + %v18 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* elementtype(i32) @g3, i32* @g3, i32 1, i32* elementtype(i32) @g3), !srcloc !4 store i32 %v18, i32* %v17, align 4, !tbaa !0 %v19 = getelementptr i32, i32* %v6, i32 6 - %v20 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* @g3, i32* @g3, i32 1, i32* @g3), !srcloc !4 + %v20 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* elementtype(i32) @g3, i32* @g3, i32 1, i32* elementtype(i32) @g3), !srcloc !4 store i32 %v20, i32* %v19, align 4, !tbaa !0 %v21 = getelementptr i32, i32* %v6, i32 7 - %v22 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* @g3, i32* @g3, i32 1, i32* @g3), !srcloc !4 + %v22 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* elementtype(i32) @g3, i32* @g3, i32 1, i32* elementtype(i32) @g3), !srcloc !4 store i32 %v22, i32* %v21, align 4, !tbaa !0 %v23 = getelementptr i32, i32* %v6, i32 8 - %v24 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* @g3, i32* @g3, i32 1, i32* @g3), !srcloc !4 + %v24 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* elementtype(i32) @g3, i32* @g3, i32 1, i32* elementtype(i32) @g3), !srcloc !4 store i32 %v24, i32* %v23, align 4, !tbaa !0 %v25 = getelementptr i32, i32* %v6, i32 9 - %v26 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* @g3, i32* @g3, i32 1, i32* @g3), !srcloc !4 + %v26 = tail call i32 asm sideeffect "1: $0 = memw_locked($2)\0A $0 = add($0, $3)\0A memw_locked($2, p0) = $0\0A if !p0 jump 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* elementtype(i32) @g3, i32* @g3, i32 1, i32* elementtype(i32) @g3), !srcloc !4 store i32 %v26, i32* %v25, align 4, !tbaa !0 %v27 = add nsw i32 %v7, 10 %v28 = icmp eq i32 %v27, 100 diff --git a/llvm/test/CodeGen/MSP430/inline-asm-absolute-addressing.ll b/llvm/test/CodeGen/MSP430/inline-asm-absolute-addressing.ll index 91505dca48dd..ee730a128b08 100644 --- a/llvm/test/CodeGen/MSP430/inline-asm-absolute-addressing.ll +++ b/llvm/test/CodeGen/MSP430/inline-asm-absolute-addressing.ll @@ -10,6 +10,6 @@ target triple = "msp430-elf" define void @f() { entry: ; CHECK: mov r1, &256 - call void asm sideeffect "mov r1, $0", "*m"(i8* inttoptr (i16 256 to i8*)) + call void asm sideeffect "mov r1, $0", "*m"(i8* elementtype(i8) inttoptr (i16 256 to i8*)) ret void } diff --git a/llvm/test/CodeGen/MSP430/mult-alt-generic-msp430.ll b/llvm/test/CodeGen/MSP430/mult-alt-generic-msp430.ll index f8ae49e197b7..86809c5bd3d0 100644 --- a/llvm/test/CodeGen/MSP430/mult-alt-generic-msp430.ll +++ b/llvm/test/CodeGen/MSP430/mult-alt-generic-msp430.ll @@ -9,7 +9,7 @@ target triple = "msp430" define void @single_m() nounwind { entry: - call void asm "foo $1,$0", "=*m,*m"(i16* @mout0, i16* @min1) nounwind + call void asm "foo $1,$0", "=*m,*m"(i16* elementtype(i16) @mout0, i16* elementtype(i16) @min1) nounwind ret void } @@ -167,7 +167,7 @@ entry: define void @multi_m() nounwind { entry: %tmp = load i16, i16* @min1, align 2 - call void asm "foo $1,$0", "=*m|r,m|r"(i16* @mout0, i16 %tmp) nounwind + call void asm "foo $1,$0", "=*m|r,m|r"(i16* elementtype(i16) @mout0, i16 %tmp) nounwind ret void } diff --git a/llvm/test/CodeGen/Mips/constraint-empty.ll b/llvm/test/CodeGen/Mips/constraint-empty.ll index 65b5d436457b..849320f61a15 100644 --- a/llvm/test/CodeGen/Mips/constraint-empty.ll +++ b/llvm/test/CodeGen/Mips/constraint-empty.ll @@ -5,7 +5,7 @@ define void @foo() { entry: %s = alloca i32, align 4 %x = alloca i32, align 4 - call void asm "", "=*imr,=*m,0,*m,~{$1}"(i32* %x, i32* %s, i32* %x, i32* %s) + call void asm "", "=*imr,=*m,0,*m,~{$1}"(i32* elementtype(i32) %x, i32* elementtype(i32) %s, i32* %x, i32* elementtype(i32) %s) ; CHECK: #APP ; CHECK: #NO_APP diff --git a/llvm/test/CodeGen/Mips/inlineasm-constraint-R.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-R.ll index 9c7611ba81d5..2cd2be128db1 100644 --- a/llvm/test/CodeGen/Mips/inlineasm-constraint-R.ll +++ b/llvm/test/CodeGen/Mips/inlineasm-constraint-R.ll @@ -6,7 +6,7 @@ define void @R(i32 *%p) nounwind { entry: ; CHECK-LABEL: R: - call void asm sideeffect "lw $$1, $0", "*R,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 0)) + call void asm sideeffect "lw $$1, $0", "*R,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 0)) ; CHECK: lw $[[BASEPTR:[0-9]+]], %got(data)( ; CHECK: #APP @@ -20,7 +20,7 @@ define void @R_offset_4(i32 *%p) nounwind { entry: ; CHECK-LABEL: R_offset_4: - call void asm sideeffect "lw $$1, $0", "*R,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 1)) + call void asm sideeffect "lw $$1, $0", "*R,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 1)) ; CHECK: lw $[[BASEPTR:[0-9]+]], %got(data)( ; CHECK: #APP @@ -34,7 +34,7 @@ define void @R_offset_254(i32 *%p) nounwind { entry: ; CHECK-LABEL: R_offset_254: - call void asm sideeffect "lw $$1, $0", "*R,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 63)) + call void asm sideeffect "lw $$1, $0", "*R,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 63)) ; CHECK-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)( ; CHECK: #APP @@ -48,7 +48,7 @@ define void @R_offset_256(i32 *%p) nounwind { entry: ; CHECK-LABEL: R_offset_256: - call void asm sideeffect "lw $$1, $0", "*R,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 64)) + call void asm sideeffect "lw $$1, $0", "*R,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 64)) ; CHECK-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)( ; CHECK: addiu $[[BASEPTR2:[0-9]+]], $[[BASEPTR]], 256 diff --git a/llvm/test/CodeGen/Mips/inlineasm-constraint-ZC-1.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-ZC-1.ll index 59778df3b423..956f3c5288b9 100644 --- a/llvm/test/CodeGen/Mips/inlineasm-constraint-ZC-1.ll +++ b/llvm/test/CodeGen/Mips/inlineasm-constraint-ZC-1.ll @@ -8,7 +8,7 @@ define void @ZC(i32 *%p) nounwind { entry: ; ALL-LABEL: ZC: - call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 0)) + call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 0)) ; ALL: lw $[[BASEPTR:[0-9]+]], %got(data)( ; ALL: #APP @@ -22,7 +22,7 @@ define void @ZC_offset_n4(i32 *%p) nounwind { entry: ; ALL-LABEL: ZC_offset_n4: - call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 -1)) + call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 -1)) ; ALL: lw $[[BASEPTR:[0-9]+]], %got(data)( ; ALL: #APP @@ -36,7 +36,7 @@ define void @ZC_offset_4(i32 *%p) nounwind { entry: ; ALL-LABEL: ZC_offset_4: - call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 1)) + call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 1)) ; ALL: lw $[[BASEPTR:[0-9]+]], %got(data)( ; ALL: #APP @@ -50,7 +50,7 @@ define void @ZC_offset_252(i32 *%p) nounwind { entry: ; ALL-LABEL: ZC_offset_252: - call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 63)) + call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 63)) ; ALL: lw $[[BASEPTR:[0-9]+]], %got(data)( ; ALL: #APP @@ -64,7 +64,7 @@ define void @ZC_offset_256(i32 *%p) nounwind { entry: ; ALL-LABEL: ZC_offset_256: - call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 64)) + call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 64)) ; ALL: lw $[[BASEPTR:[0-9]+]], %got(data)( @@ -85,7 +85,7 @@ define void @ZC_offset_2044(i32 *%p) nounwind { entry: ; ALL-LABEL: ZC_offset_2044: - call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 511)) + call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 511)) ; ALL: lw $[[BASEPTR:[0-9]+]], %got(data)( @@ -106,7 +106,7 @@ define void @ZC_offset_2048(i32 *%p) nounwind { entry: ; ALL-LABEL: ZC_offset_2048: - call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 512)) + call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 512)) ; ALL: lw $[[BASEPTR:[0-9]+]], %got(data)( @@ -128,7 +128,7 @@ define void @ZC_offset_32764(i32 *%p) nounwind { entry: ; ALL-LABEL: ZC_offset_32764: - call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8191)) + call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8191)) ; ALL-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)( @@ -150,7 +150,7 @@ define void @ZC_offset_32768(i32 *%p) nounwind { entry: ; ALL-LABEL: ZC_offset_32768: - call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8192)) + call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8192)) ; ALL-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)( ; ALL-DAG: ori $[[T0:[0-9]+]], $zero, 32768 diff --git a/llvm/test/CodeGen/Mips/inlineasm-constraint-ZC-2.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-ZC-2.ll index 2a0904c54c9a..c9c94deec6e4 100644 --- a/llvm/test/CodeGen/Mips/inlineasm-constraint-ZC-2.ll +++ b/llvm/test/CodeGen/Mips/inlineasm-constraint-ZC-2.ll @@ -27,7 +27,7 @@ entry: ; ALL: #NO_APP - %1 = call { i32, i32 } asm sideeffect ".set push\0A.set noreorder\0A1:\0All $0, $2\0Aaddu $1, $0, $3\0Asc $1, $2\0Abeqz $1, 1b\0Aaddu $1, $0, $3\0A.set pop\0A", "=&r,=&r,=*^ZC,Ir,*^ZC,~{memory},~{$1}"(i32* %count, i32 10, i32* %count) + %1 = call { i32, i32 } asm sideeffect ".set push\0A.set noreorder\0A1:\0All $0, $2\0Aaddu $1, $0, $3\0Asc $1, $2\0Abeqz $1, 1b\0Aaddu $1, $0, $3\0A.set pop\0A", "=&r,=&r,=*^ZC,Ir,*^ZC,~{memory},~{$1}"(i32* elementtype(i32) %count, i32 10, i32* elementtype(i32) %count) %asmresult1.i = extractvalue { i32, i32 } %1, 1 %cmp = icmp ne i32 %asmresult1.i, 10 %conv = zext i1 %cmp to i32 diff --git a/llvm/test/CodeGen/Mips/inlineasm-constraint-m-1.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-m-1.ll index 11ef8341cbdb..d48caaabdbc0 100644 --- a/llvm/test/CodeGen/Mips/inlineasm-constraint-m-1.ll +++ b/llvm/test/CodeGen/Mips/inlineasm-constraint-m-1.ll @@ -6,7 +6,7 @@ define void @m(i32 *%p) nounwind { entry: ; CHECK-LABEL: m: - call void asm sideeffect "lw $$1, $0", "*m,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 0)) + call void asm sideeffect "lw $$1, $0", "*m,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 0)) ; CHECK: lw $[[BASEPTR:[0-9]+]], %got(data)( ; CHECK: #APP @@ -20,7 +20,7 @@ define void @m_offset_4(i32 *%p) nounwind { entry: ; CHECK-LABEL: m_offset_4: - call void asm sideeffect "lw $$1, $0", "*m,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 1)) + call void asm sideeffect "lw $$1, $0", "*m,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 1)) ; CHECK: lw $[[BASEPTR:[0-9]+]], %got(data)( ; CHECK: #APP @@ -34,7 +34,7 @@ define void @m_offset_32764(i32 *%p) nounwind { entry: ; CHECK-LABEL: m_offset_32764: - call void asm sideeffect "lw $$1, $0", "*m,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8191)) + call void asm sideeffect "lw $$1, $0", "*m,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8191)) ; CHECK-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)( ; CHECK: #APP @@ -48,7 +48,7 @@ define void @m_offset_32768(i32 *%p) nounwind { entry: ; CHECK-LABEL: m_offset_32768: - call void asm sideeffect "lw $$1, $0", "*m,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8192)) + call void asm sideeffect "lw $$1, $0", "*m,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8192)) ; CHECK-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)( ; CHECK-DAG: ori $[[T0:[0-9]+]], $zero, 32768 diff --git a/llvm/test/CodeGen/Mips/inlineasm-constraint-m-2.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-m-2.ll index caf17f9dbf7d..0a6994a715bf 100644 --- a/llvm/test/CodeGen/Mips/inlineasm-constraint-m-2.ll +++ b/llvm/test/CodeGen/Mips/inlineasm-constraint-m-2.ll @@ -19,8 +19,8 @@ entry: ; CHECK: sw $[[T3]], 0($[[T1]]) %l1 = alloca i32, align 4 - call void asm "sw $1, $0", "=*m,r"(i32* %l1, i32 %x) nounwind - %0 = call i32 asm "lw $0, $1", "=r,*m"(i32* %l1) nounwind + call void asm "sw $1, $0", "=*m,r"(i32* elementtype(i32) %l1, i32 %x) nounwind + %0 = call i32 asm "lw $0, $1", "=r,*m"(i32* elementtype(i32) %l1) nounwind store i32 %0, i32* @g1, align 4 ret i32 %0 } @@ -55,13 +55,13 @@ entry: define void @main() { entry: ; Second word: - tail call void asm sideeffect " lw $0, ${1:D}", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @b, i32 0, i32 3)) + tail call void asm sideeffect " lw $0, ${1:D}", "r,*m,~{$11}"(i32 undef, i32* elementtype(i32) getelementptr inbounds ([20 x i32], [20 x i32]* @b, i32 0, i32 3)) ; First word. Notice, no 'D': - tail call void asm sideeffect " lw $0, ${1}", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @b, i32 0, i32 3)) + tail call void asm sideeffect " lw $0, ${1}", "r,*m,~{$11}"(i32 undef, i32* elementtype(i32) getelementptr inbounds ([20 x i32], [20 x i32]* @b, i32 0, i32 3)) ; High-order part. - tail call void asm sideeffect " lw $0, ${1:M}", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @b, i32 0, i32 3)) + tail call void asm sideeffect " lw $0, ${1:M}", "r,*m,~{$11}"(i32 undef, i32* elementtype(i32) getelementptr inbounds ([20 x i32], [20 x i32]* @b, i32 0, i32 3)) ; Low-order part. - tail call void asm sideeffect " lw $0, ${1:L}", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @b, i32 0, i32 3)) + tail call void asm sideeffect " lw $0, ${1:L}", "r,*m,~{$11}"(i32 undef, i32* elementtype(i32) getelementptr inbounds ([20 x i32], [20 x i32]* @b, i32 0, i32 3)) ret void } diff --git a/llvm/test/CodeGen/Mips/inlineasm-constraint-o.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-o.ll index de677cbcc681..157bf6875a73 100644 --- a/llvm/test/CodeGen/Mips/inlineasm-constraint-o.ll +++ b/llvm/test/CodeGen/Mips/inlineasm-constraint-o.ll @@ -6,7 +6,7 @@ define void @o(i32 *%p) nounwind { entry: ; CHECK-LABEL: o: - call void asm sideeffect "lw $$1, $0", "*o,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 0)) + call void asm sideeffect "lw $$1, $0", "*o,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 0)) ; CHECK: lw $[[BASEPTR:[0-9]+]], %got(data)( ; CHECK: #APP @@ -20,7 +20,7 @@ define void @o_offset_4(i32 *%p) nounwind { entry: ; CHECK-LABEL: o_offset_4: - call void asm sideeffect "lw $$1, $0", "*o,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 1)) + call void asm sideeffect "lw $$1, $0", "*o,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 1)) ; CHECK: lw $[[BASEPTR:[0-9]+]], %got(data)( ; CHECK: #APP @@ -34,7 +34,7 @@ define void @o_offset_32764(i32 *%p) nounwind { entry: ; CHECK-LABEL: o_offset_32764: - call void asm sideeffect "lw $$1, $0", "*o,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8191)) + call void asm sideeffect "lw $$1, $0", "*o,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8191)) ; CHECK-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)( ; CHECK: #APP @@ -48,7 +48,7 @@ define void @o_offset_32768(i32 *%p) nounwind { entry: ; CHECK-LABEL: o_offset_32768: - call void asm sideeffect "lw $$1, $0", "*o,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8192)) + call void asm sideeffect "lw $$1, $0", "*o,~{$1}"(i32* elementtype(i32) getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8192)) ; CHECK-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)( ; CHECK-DAG: ori $[[T0:[0-9]+]], $zero, 32768 diff --git a/llvm/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll b/llvm/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll index 937a64d31072..f80ca2b96316 100644 --- a/llvm/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll +++ b/llvm/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll @@ -22,7 +22,7 @@ target triple = "powerpc-unknown-linux-gnu" define i64 @test(i32 %A, i32 %B, i32 %C) nounwind { entry: %Y = alloca i32, align 4 ; [#uses=2] - %tmp4 = call i32 asm "subf${3:I}c $1,$4,$3\0A\09subfze $0,$2", "=r,=*&r,r,rI,r"( i32* %Y, i32 %A, i32 %B, i32 %C ) ; [#uses=1] + %tmp4 = call i32 asm "subf${3:I}c $1,$4,$3\0A\09subfze $0,$2", "=r,=*&r,r,rI,r"( i32* elementtype( i32) %Y, i32 %A, i32 %B, i32 %C ) ; [#uses=1] %tmp5 = load i32, i32* %Y ; [#uses=1] %tmp56 = zext i32 %tmp5 to i64 ; [#uses=1] %tmp7 = shl i64 %tmp56, 32 ; [#uses=1] diff --git a/llvm/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll b/llvm/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll index c5da2baab643..d86fc8528cb9 100644 --- a/llvm/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll +++ b/llvm/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll @@ -15,7 +15,7 @@ bb: ; preds = %bb, %entry %tmp8 = getelementptr float, float* %tmp56, i32 %i.035.0 ; [#uses=2] %tmp101112 = bitcast float* %tmp8 to i8* ; [#uses=1] %tmp1617 = bitcast float* %tmp8 to i32* ; [#uses=1] - %tmp21 = tail call i32 asm "lwbrx $0, $2, $1", "=r,r,bO,*m"( i8* %tmp101112, i32 0, i32* %tmp1617 ) ; [#uses=0] + %tmp21 = tail call i32 asm "lwbrx $0, $2, $1", "=r,r,bO,*m"(i8* %tmp101112, i32 0, i32* elementtype(i32) %tmp1617 ) ; [#uses=0] %indvar.next = add i32 %i.035.0, 1 ; [#uses=2] %exitcond = icmp eq i32 %indvar.next, 4 ; [#uses=1] br i1 %exitcond, label %return, label %bb diff --git a/llvm/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll b/llvm/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll index 53552323b72c..b3369ca1b75b 100644 --- a/llvm/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll +++ b/llvm/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll @@ -4,6 +4,6 @@ define void @_ZN17TCMalloc_SpinLock4LockEv(%struct.TCMalloc_SpinLock* %this) { entry: - %tmp3 = call i32 asm sideeffect "1: lwarx $0, 0, $1\0A\09stwcx. $2, 0, $1\0A\09bne- 1b\0A\09isync", "=&r,=*r,r,1,~{dirflag},~{fpsr},~{flags},~{memory}"( i32** null, i32 1, i32* null ) ; [#uses=0] + %tmp3 = call i32 asm sideeffect "1: lwarx $0, 0, $1\0A\09stwcx. $2, 0, $1\0A\09bne- 1b\0A\09isync", "=&r,=*r,r,1,~{dirflag},~{fpsr},~{flags},~{memory}"(i32** elementtype(i32*) null, i32 1, i32* null) ; [#uses=0] unreachable } diff --git a/llvm/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll b/llvm/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll index a69e145f3adf..b2d65314d395 100644 --- a/llvm/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll +++ b/llvm/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll @@ -8,7 +8,7 @@ define i32 @test() { entry: %data = alloca i32 ; [#uses=1] %compressedPage = alloca %struct._StorePageMax ; <%struct._StorePageMax*> [#uses=0] - %tmp107 = call i32 asm "lwbrx $0, $2, $1", "=r,r,bO,*m"( i8* null, i32 0, i32* %data ) ; [#uses=0] + %tmp107 = call i32 asm "lwbrx $0, $2, $1", "=r,r,bO,*m"( i8* null, i32 0, i32* elementtype(i32) %data ) ; [#uses=0] unreachable } diff --git a/llvm/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/llvm/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll index 0209808f2f19..3c5ca4000bc6 100644 --- a/llvm/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll +++ b/llvm/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll @@ -17,7 +17,7 @@ entry: store i32 %y, i32* %y_addr %0 = load i32, i32* %y_addr, align 4 ; [#uses=1] %1 = getelementptr inbounds [0 x i32], [0 x i32]* @x, i32 0, i32 %0 ; [#uses=1] - call void asm sideeffect "isync\0A\09eieio\0A\09stw $1, $0", "=*o,r,~{memory}"(i32* %1, i32 0) nounwind + call void asm sideeffect "isync\0A\09eieio\0A\09stw $1, $0", "=*o,r,~{memory}"(i32* elementtype(i32) %1, i32 0) nounwind br label %return return: ; preds = %entry diff --git a/llvm/test/CodeGen/PowerPC/PR3488.ll b/llvm/test/CodeGen/PowerPC/PR3488.ll index 69c375a149f3..068b43fd3b0e 100644 --- a/llvm/test/CodeGen/PowerPC/PR3488.ll +++ b/llvm/test/CodeGen/PowerPC/PR3488.ll @@ -98,7 +98,7 @@ module asm "\09.previous\09\09\09\09\09" ; Function Attrs: nounwind define void @__alloc_pages_nodemask() #0 { entry: - %0 = call i64 asm sideeffect "ld${1:U}${1:X} $0,$1", "=r,*m"(i64* undef) + %0 = call i64 asm sideeffect "ld${1:U}${1:X} $0,$1", "=r,*m"(i64* elementtype(i64) undef) br i1 undef, label %do.body.lr.ph.i.i.i, label %zone_page_state_snapshot.exit.i.i ; CHECK: ld 3, 0(3) diff --git a/llvm/test/CodeGen/PowerPC/asm-constraints.ll b/llvm/test/CodeGen/PowerPC/asm-constraints.ll index da77d1a16979..014b5bc2711d 100644 --- a/llvm/test/CodeGen/PowerPC/asm-constraints.ll +++ b/llvm/test/CodeGen/PowerPC/asm-constraints.ll @@ -32,7 +32,7 @@ entry: store i32 %result, i32* %result.addr, align 4 store i8* %addr, i8** %addr.addr, align 8 %0 = load i8*, i8** %addr.addr, align 8 - %1 = call i32 asm sideeffect "ld${1:U}${1:X} $0,$1\0Acmpw $0,$0\0Abne- 1f\0A1: isync\0A", "=r,*m,~{memory},~{cr0}"(i8* %0) #1, !srcloc !0 + %1 = call i32 asm sideeffect "ld${1:U}${1:X} $0,$1\0Acmpw $0,$0\0Abne- 1f\0A1: isync\0A", "=r,*m,~{memory},~{cr0}"(i8* elementtype(i8) %0) #1, !srcloc !0 store i32 %1, i32* %result.addr, align 4 ret void } diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond-64bit-only.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond-64bit-only.ll index d00901f3ace2..a07cf46ce752 100644 --- a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond-64bit-only.ll +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond-64bit-only.ll @@ -15,7 +15,7 @@ define dso_local i64 @test_ldarx(i64* readnone %a) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: blr entry: - %0 = call i64 asm sideeffect "ldarx $0, ${1:y}", "=r,*Z,~{memory}"(i64* %a) + %0 = call i64 asm sideeffect "ldarx $0, ${1:y}", "=r,*Z,~{memory}"(i64* elementtype(i64) %a) ret i64 %0 } diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll index 4db143b17269..a04ef4b73ccf 100644 --- a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll @@ -25,7 +25,7 @@ define dso_local signext i32 @test_lwarx(i32* readnone %a) { ; CHECK-32-NEXT: #NO_APP ; CHECK-32-NEXT: blr entry: - %0 = call i32 asm sideeffect "lwarx $0, ${1:y}", "=r,*Z,~{memory}"(i32* %a) + %0 = call i32 asm sideeffect "lwarx $0, ${1:y}", "=r,*Z,~{memory}"(i32* elementtype(i32) %a) ret i32 %0 } @@ -113,7 +113,7 @@ define dso_local signext i16 @test_lharx(i16* %a) { ; CHECK-32-NEXT: extsh 3, 3 ; CHECK-32-NEXT: blr entry: - %0 = tail call i16 asm sideeffect "lharx $0, ${1:y}", "=r,*Z,~{memory}"(i16* %a) + %0 = tail call i16 asm sideeffect "lharx $0, ${1:y}", "=r,*Z,~{memory}"(i16* elementtype(i16) %a) ret i16 %0 } @@ -135,6 +135,6 @@ define dso_local zeroext i8 @test_lbarx(i8* %a) { ; CHECK-32-NEXT: clrlwi 3, 3, 24 ; CHECK-32-NEXT: blr entry: - %0 = tail call i8 asm sideeffect "lbarx $0, ${1:y}", "=r,*Z,~{memory}"(i8* %a) + %0 = tail call i8 asm sideeffect "lbarx $0, ${1:y}", "=r,*Z,~{memory}"(i8* elementtype(i8) %a) ret i8 %0 } diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-check-ldarx-opt.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-check-ldarx-opt.ll index ed9bee2003b7..7aeac798f53c 100644 --- a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-check-ldarx-opt.ll +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-check-ldarx-opt.ll @@ -65,7 +65,7 @@ entry: br label %do.body do.body: ; preds = %do.body, %entry - %1 = call i64 asm sideeffect "ldarx $0, ${1:y}", "=r,*Z,~{memory}"(i64* nonnull %x64) + %1 = call i64 asm sideeffect "ldarx $0, ${1:y}", "=r,*Z,~{memory}"(i64* elementtype(i64) nonnull %x64) %2 = call i32 @llvm.ppc.stdcx(i8* nonnull %0, i64 0) %tobool.not = icmp eq i32 %2, 0 br i1 %tobool.not, label %do.body, label %do.end diff --git a/llvm/test/CodeGen/PowerPC/ia-mem-r0.ll b/llvm/test/CodeGen/PowerPC/ia-mem-r0.ll index 6928667c6db7..ca981f3f986c 100644 --- a/llvm/test/CodeGen/PowerPC/ia-mem-r0.ll +++ b/llvm/test/CodeGen/PowerPC/ia-mem-r0.ll @@ -14,75 +14,75 @@ define void @test1({ i8*, void (i8*, i8*)* } %fn_arg) { %regs = alloca [18 x i64], align 8 store { i8*, void (i8*, i8*)* } %fn_arg, { i8*, void (i8*, i8*)* }* %fn %1 = bitcast [18 x i64]* %regs to i64* - call void asm sideeffect "std 14, $0", "=*m"(i64* %1) + call void asm sideeffect "std 14, $0", "=*m"(i64* elementtype(i64) %1) %2 = bitcast [18 x i64]* %regs to i8* %3 = getelementptr i8, i8* %2, i32 8 %4 = bitcast i8* %3 to i64* - call void asm sideeffect "std 15, $0", "=*m"(i64* %4) + call void asm sideeffect "std 15, $0", "=*m"(i64* elementtype(i64) %4) %5 = bitcast [18 x i64]* %regs to i8* %6 = getelementptr i8, i8* %5, i32 16 %7 = bitcast i8* %6 to i64* - call void asm sideeffect "std 16, $0", "=*m"(i64* %7) + call void asm sideeffect "std 16, $0", "=*m"(i64* elementtype(i64) %7) %8 = bitcast [18 x i64]* %regs to i8* %9 = getelementptr i8, i8* %8, i32 24 %10 = bitcast i8* %9 to i64* - call void asm sideeffect "std 17, $0", "=*m"(i64* %10) + call void asm sideeffect "std 17, $0", "=*m"(i64* elementtype(i64) %10) %11 = bitcast [18 x i64]* %regs to i8* %12 = getelementptr i8, i8* %11, i32 32 %13 = bitcast i8* %12 to i64* - call void asm sideeffect "std 18, $0", "=*m"(i64* %13) + call void asm sideeffect "std 18, $0", "=*m"(i64* elementtype(i64) %13) %14 = bitcast [18 x i64]* %regs to i8* %15 = getelementptr i8, i8* %14, i32 40 %16 = bitcast i8* %15 to i64* - call void asm sideeffect "std 19, $0", "=*m"(i64* %16) + call void asm sideeffect "std 19, $0", "=*m"(i64* elementtype(i64) %16) %17 = bitcast [18 x i64]* %regs to i8* %18 = getelementptr i8, i8* %17, i32 48 %19 = bitcast i8* %18 to i64* - call void asm sideeffect "std 20, $0", "=*m"(i64* %19) + call void asm sideeffect "std 20, $0", "=*m"(i64* elementtype(i64) %19) %20 = bitcast [18 x i64]* %regs to i8* %21 = getelementptr i8, i8* %20, i32 56 %22 = bitcast i8* %21 to i64* - call void asm sideeffect "std 21, $0", "=*m"(i64* %22) + call void asm sideeffect "std 21, $0", "=*m"(i64* elementtype(i64) %22) %23 = bitcast [18 x i64]* %regs to i8* %24 = getelementptr i8, i8* %23, i32 64 %25 = bitcast i8* %24 to i64* - call void asm sideeffect "std 22, $0", "=*m"(i64* %25) + call void asm sideeffect "std 22, $0", "=*m"(i64* elementtype(i64) %25) %26 = bitcast [18 x i64]* %regs to i8* %27 = getelementptr i8, i8* %26, i32 72 %28 = bitcast i8* %27 to i64* - call void asm sideeffect "std 23, $0", "=*m"(i64* %28) + call void asm sideeffect "std 23, $0", "=*m"(i64* elementtype(i64) %28) %29 = bitcast [18 x i64]* %regs to i8* %30 = getelementptr i8, i8* %29, i32 80 %31 = bitcast i8* %30 to i64* - call void asm sideeffect "std 24, $0", "=*m"(i64* %31) + call void asm sideeffect "std 24, $0", "=*m"(i64* elementtype(i64) %31) %32 = bitcast [18 x i64]* %regs to i8* %33 = getelementptr i8, i8* %32, i32 88 %34 = bitcast i8* %33 to i64* - call void asm sideeffect "std 25, $0", "=*m"(i64* %34) + call void asm sideeffect "std 25, $0", "=*m"(i64* elementtype(i64) %34) %35 = bitcast [18 x i64]* %regs to i8* %36 = getelementptr i8, i8* %35, i32 96 %37 = bitcast i8* %36 to i64* - call void asm sideeffect "std 26, $0", "=*m"(i64* %37) + call void asm sideeffect "std 26, $0", "=*m"(i64* elementtype(i64) %37) %38 = bitcast [18 x i64]* %regs to i8* %39 = getelementptr i8, i8* %38, i32 104 %40 = bitcast i8* %39 to i64* - call void asm sideeffect "std 27, $0", "=*m"(i64* %40) + call void asm sideeffect "std 27, $0", "=*m"(i64* elementtype(i64) %40) %41 = bitcast [18 x i64]* %regs to i8* %42 = getelementptr i8, i8* %41, i32 112 %43 = bitcast i8* %42 to i64* - call void asm sideeffect "std 28, $0", "=*m"(i64* %43) + call void asm sideeffect "std 28, $0", "=*m"(i64* elementtype(i64) %43) %44 = bitcast [18 x i64]* %regs to i8* %45 = getelementptr i8, i8* %44, i32 120 %46 = bitcast i8* %45 to i64* - call void asm sideeffect "std 29, $0", "=*m"(i64* %46) + call void asm sideeffect "std 29, $0", "=*m"(i64* elementtype(i64) %46) %47 = bitcast [18 x i64]* %regs to i8* %48 = getelementptr i8, i8* %47, i32 128 %49 = bitcast i8* %48 to i64* - call void asm sideeffect "std 30, $0", "=*m"(i64* %49) + call void asm sideeffect "std 30, $0", "=*m"(i64* elementtype(i64) %49) %50 = bitcast [18 x i64]* %regs to i8* %51 = getelementptr i8, i8* %50, i32 136 %52 = bitcast i8* %51 to i64* - call void asm sideeffect "std 31, $0", "=*m"(i64* %52) + call void asm sideeffect "std 31, $0", "=*m"(i64* elementtype(i64) %52) %53 = getelementptr { i8*, void (i8*, i8*)* }, { i8*, void (i8*, i8*)* }* %fn, i32 0, i32 1 %.funcptr = load void (i8*, i8*)*, void (i8*, i8*)** %53 %54 = getelementptr { i8*, void (i8*, i8*)* }, { i8*, void (i8*, i8*)* }* %fn, i32 0, i32 0 diff --git a/llvm/test/CodeGen/PowerPC/ia-neg-const.ll b/llvm/test/CodeGen/PowerPC/ia-neg-const.ll index cbb605965e3d..5f788016bea4 100644 --- a/llvm/test/CodeGen/PowerPC/ia-neg-const.ll +++ b/llvm/test/CodeGen/PowerPC/ia-neg-const.ll @@ -9,7 +9,7 @@ define i64 @main() #0 { entry: %x = alloca i64, align 8 store i64 0, i64* %x, align 8 - %0 = call i64 asm sideeffect "ld $0,$1\0A\09add${2:I} $0,$0,$2", "=&r,*m,Ir"(i64* %x, i64 -1) #0 + %0 = call i64 asm sideeffect "ld $0,$1\0A\09add${2:I} $0,$0,$2", "=&r,*m,Ir"(i64* elementtype(i64) %x, i64 -1) #0 ret i64 %0 } diff --git a/llvm/test/CodeGen/PowerPC/inlineasm-output-template.ll b/llvm/test/CodeGen/PowerPC/inlineasm-output-template.ll index d56d77f26585..fbfaf16385ff 100644 --- a/llvm/test/CodeGen/PowerPC/inlineasm-output-template.ll +++ b/llvm/test/CodeGen/PowerPC/inlineasm-output-template.ll @@ -32,6 +32,6 @@ define dso_local i32 @test_inlineasm_c_output_template2() { ; PPC64-LABEL: test_inlineasm_L_output_template ; PPC64: # 8(4) define dso_local void @test_inlineasm_L_output_template(i64 %0, i64* %1) { - tail call void asm sideeffect "# ${0:L}", "*m"(i64* %1) + tail call void asm sideeffect "# ${0:L}", "*m"(i64* elementtype(i64) %1) ret void } diff --git a/llvm/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll b/llvm/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll index 9bbec5b24188..658376c4a3b2 100644 --- a/llvm/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll +++ b/llvm/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll @@ -9,7 +9,7 @@ target triple = "powerpc--" define void @single_m() nounwind { entry: - call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind + call void asm "foo $1,$0", "=*m,*m"(i32* elementtype(i32) @mout0, i32* elementtype(i32) @min1) nounwind ret void } @@ -166,7 +166,7 @@ entry: define void @multi_m() nounwind { entry: %tmp = load i32, i32* @min1, align 4 - call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind + call void asm "foo $1,$0", "=*m|r,m|r"(i32* elementtype(i32) @mout0, i32 %tmp) nounwind ret void } diff --git a/llvm/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll b/llvm/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll index 1fa9c0d3c130..d834b9175091 100644 --- a/llvm/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll +++ b/llvm/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll @@ -9,7 +9,7 @@ target triple = "powerpc64--" define void @single_m() nounwind { entry: - call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind + call void asm "foo $1,$0", "=*m,*m"(i32* elementtype(i32) @mout0, i32* elementtype(i32) @min1) nounwind ret void } @@ -166,7 +166,7 @@ entry: define void @multi_m() nounwind { entry: %tmp = load i32, i32* @min1, align 4 - call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind + call void asm "foo $1,$0", "=*m|r,m|r"(i32* elementtype(i32) @mout0, i32 %tmp) nounwind ret void } diff --git a/llvm/test/CodeGen/PowerPC/subreg-postra-2.ll b/llvm/test/CodeGen/PowerPC/subreg-postra-2.ll index 794c9c190d1c..cfef020c2d35 100644 --- a/llvm/test/CodeGen/PowerPC/subreg-postra-2.ll +++ b/llvm/test/CodeGen/PowerPC/subreg-postra-2.ll @@ -21,7 +21,7 @@ wait_on_buffer.exit1319: ; preds = %while.body392 %conv.i.i1322 = and i64 %1, 1 %lnot404 = icmp eq i64 %conv.i.i1322, 0 %.err.4 = select i1 %lnot404, i32 -5, i32 %input1 - %2 = call i64 asm sideeffect "1:.long 0x7c0000a8 $| ((($0) & 0x1f) << 21) $| (((0) & 0x1f) << 16) $| ((($3) & 0x1f) << 11) $| (((0) & 0x1) << 0) \0Aandc $0,$0,$2\0Astdcx. $0,0,$3\0Abne- 1b\0A", "=&r,=*m,r,r,*m,~{cc},~{memory}"(i64* %b_state.i.i1314, i64 262144, i64* %b_state.i.i1314, i64* %b_state.i.i1314) #0 + %2 = call i64 asm sideeffect "1:.long 0x7c0000a8 $| ((($0) & 0x1f) << 21) $| (((0) & 0x1f) << 16) $| ((($3) & 0x1f) << 11) $| (((0) & 0x1) << 0) \0Aandc $0,$0,$2\0Astdcx. $0,0,$3\0Abne- 1b\0A", "=&r,=*m,r,r,*m,~{cc},~{memory}"(i64* elementtype(i64) %b_state.i.i1314, i64 262144, i64* %b_state.i.i1314, i64* elementtype(i64) %b_state.i.i1314) #0 store i8* %0, i8** %input4, align 8 %cmp.i1312 = icmp eq i32* %input2, %input3 br i1 %cmp.i1312, label %while.end418, label %while.body392 diff --git a/llvm/test/CodeGen/PowerPC/subreg-postra.ll b/llvm/test/CodeGen/PowerPC/subreg-postra.ll index 38e27c73c907..9f5f9e70808d 100644 --- a/llvm/test/CodeGen/PowerPC/subreg-postra.ll +++ b/llvm/test/CodeGen/PowerPC/subreg-postra.ll @@ -138,7 +138,7 @@ wait_on_buffer.exit1319: ; preds = %while.body392 %conv.i.i1322 = and i64 %1, 1 %lnot404 = icmp eq i64 %conv.i.i1322, 0 %.err.4 = select i1 %lnot404, i32 -5, i32 %inp2 - %2 = call i64 asm sideeffect "1:.long 0x7c0000a8 $| ((($0) & 0x1f) << 21) $| (((0) & 0x1f) << 16) $| ((($3) & 0x1f) << 11) $| (((0) & 0x1) << 0) \0Aandc $0,$0,$2\0Astdcx. $0,0,$3\0Abne- 1b\0A", "=&r,=*m,r,r,*m,~{cc},~{memory}"(i64* %b_state.i.i1314, i64 262144, i64* %b_state.i.i1314, i64* %b_state.i.i1314) #1 + %2 = call i64 asm sideeffect "1:.long 0x7c0000a8 $| ((($0) & 0x1f) << 21) $| (((0) & 0x1f) << 16) $| ((($3) & 0x1f) << 11) $| (((0) & 0x1) << 0) \0Aandc $0,$0,$2\0Astdcx. $0,0,$3\0Abne- 1b\0A", "=&r,=*m,r,r,*m,~{cc},~{memory}"(i64* elementtype(i64) %b_state.i.i1314, i64 262144, i64* %b_state.i.i1314, i64* elementtype(i64) %b_state.i.i1314) #1 %prev.i.i.i1325 = getelementptr inbounds i8, i8* %0, i64 8 %3 = load i32*, i32** %inp4, align 8 store i32* %3, i32** %inp5, align 8 diff --git a/llvm/test/CodeGen/PowerPC/xray-ret-is-terminator.ll b/llvm/test/CodeGen/PowerPC/xray-ret-is-terminator.ll index 9418ce58a49a..1f176f6f3667 100644 --- a/llvm/test/CodeGen/PowerPC/xray-ret-is-terminator.ll +++ b/llvm/test/CodeGen/PowerPC/xray-ret-is-terminator.ll @@ -9,12 +9,12 @@ bb: br i1 undef, label %bb1, label %bb8 bb1: - %tmp = tail call i64 asm sideeffect "", "=&r,=*m,b,r,*m,~{cc}"(i64* nonnull undef, i64* nonnull undef, i64 1, i64* nonnull undef) + %tmp = tail call i64 asm sideeffect "", "=&r,=*m,b,r,*m,~{cc}"(i64* elementtype(i64) nonnull undef, i64* nonnull undef, i64 1, i64* elementtype(i64) nonnull undef) %tmp2 = icmp eq i64 %tmp, 0 br i1 %tmp2, label %bb3, label %bb8 bb3: - %tmp4 = tail call i64 asm sideeffect "", "=&r,=*m,b,r,r,*m,~{cc}"(i64* undef, i64* undef, i64 0, i64 undef, i64* undef) + %tmp4 = tail call i64 asm sideeffect "", "=&r,=*m,b,r,r,*m,~{cc}"(i64* elementtype(i64) undef, i64* undef, i64 0, i64 undef, i64* elementtype(i64) undef) %tmp5 = icmp eq i64 0, %tmp4 br i1 %tmp5, label %bb6, label %bb3 diff --git a/llvm/test/CodeGen/RISCV/inline-asm.ll b/llvm/test/CodeGen/RISCV/inline-asm.ll index de5d9a5f22a8..4a96f1dbbd27 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm.ll @@ -60,7 +60,7 @@ define void @constraint_m(i32* %a) nounwind { ; RV64I-NEXT: #APP ; RV64I-NEXT: #NO_APP ; RV64I-NEXT: ret - call void asm sideeffect "", "=*m"(i32* %a) + call void asm sideeffect "", "=*m"(i32* elementtype(i32) %a) ret void } @@ -78,7 +78,7 @@ define i32 @constraint_m2(i32* %a) nounwind { ; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: #NO_APP ; RV64I-NEXT: ret - %1 = tail call i32 asm "lw $0, $1", "=r,*m"(i32* %a) + %1 = tail call i32 asm "lw $0, $1", "=r,*m"(i32* elementtype(i32) %a) ret i32 %1 } @@ -170,8 +170,8 @@ define void @constraint_A(i8* %a) nounwind { ; RV64I-NEXT: lb s1, 0(a0) ; RV64I-NEXT: #NO_APP ; RV64I-NEXT: ret - tail call void asm sideeffect "sb s0, $0", "*A"(i8* %a) - tail call void asm sideeffect "lb s1, $0", "*A"(i8* %a) + tail call void asm sideeffect "sb s0, $0", "*A"(i8* elementtype(i8) %a) + tail call void asm sideeffect "lb s1, $0", "*A"(i8* elementtype(i8) %a) ret void } diff --git a/llvm/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll b/llvm/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll index 32ebc02ce201..e6712b5fc9a9 100644 --- a/llvm/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll +++ b/llvm/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll @@ -7,10 +7,10 @@ target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 define internal void @set_fast_math() nounwind { entry: %fsr = alloca i32 ; [#uses=4] - call void asm "st %fsr, $0", "=*m"(i32* %fsr) nounwind + call void asm "st %fsr, $0", "=*m"(i32* elementtype(i32) %fsr) nounwind %0 = load i32, i32* %fsr, align 4 ; [#uses=1] %1 = or i32 %0, 4194304 ; [#uses=1] store i32 %1, i32* %fsr, align 4 - call void asm sideeffect "ld $0, %fsr", "*m"(i32* %fsr) nounwind + call void asm sideeffect "ld $0, %fsr", "*m"(i32* elementtype(i32) %fsr) nounwind ret void } diff --git a/llvm/test/CodeGen/SPARC/2011-01-11-CC.ll b/llvm/test/CodeGen/SPARC/2011-01-11-CC.ll index 6b738e386c3a..a779ebb7ec92 100644 --- a/llvm/test/CodeGen/SPARC/2011-01-11-CC.ll +++ b/llvm/test/CodeGen/SPARC/2011-01-11-CC.ll @@ -182,7 +182,7 @@ entry: %4 = add i128 %2, %3 %5 = bitcast i8* %sum to i128* store i128 %4, i128* %5 - tail call void asm sideeffect "", "=*m,*m"(i128 *%0, i128* %5) nounwind + tail call void asm sideeffect "", "=*m,*m"(i128* elementtype(i128) %0, i128* elementtype(i128) %5) nounwind %6 = load i128, i128* %0 %7 = sub i128 %2, %6 %8 = bitcast i8* %diff to i128* diff --git a/llvm/test/CodeGen/SPARC/inlineasm.ll b/llvm/test/CodeGen/SPARC/inlineasm.ll index ca68a5a2317c..eb95be2519b4 100644 --- a/llvm/test/CodeGen/SPARC/inlineasm.ll +++ b/llvm/test/CodeGen/SPARC/inlineasm.ll @@ -117,7 +117,7 @@ entry: ; CHECK: std %l0, [%o0] define void @test_addressing_mode_i64(i64* %out) { entry: - call void asm "std %l0, $0", "=*m,r"(i64* nonnull %out, i64 0) + call void asm "std %l0, $0", "=*m,r"(i64* elementtype(i64) nonnull %out, i64 0) ret void } diff --git a/llvm/test/CodeGen/SPARC/mult-alt-generic-sparc.ll b/llvm/test/CodeGen/SPARC/mult-alt-generic-sparc.ll index 49496c304d23..8ee5e409f3cb 100644 --- a/llvm/test/CodeGen/SPARC/mult-alt-generic-sparc.ll +++ b/llvm/test/CodeGen/SPARC/mult-alt-generic-sparc.ll @@ -9,7 +9,7 @@ target triple = "sparc" define void @single_m() nounwind { entry: - call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind + call void asm "foo $1,$0", "=*m,*m"(i32* elementtype(i32) @mout0, i32* elementtype(i32) @min1) nounwind ret void } @@ -167,7 +167,7 @@ entry: define void @multi_m() nounwind { entry: %tmp = load i32, i32* @min1, align 4 - call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind + call void asm "foo $1,$0", "=*m|r,m|r"(i32* elementtype(i32) @mout0, i32 %tmp) nounwind ret void } diff --git a/llvm/test/CodeGen/SystemZ/asm-01.ll b/llvm/test/CodeGen/SystemZ/asm-01.ll index 3dbc8ac268b7..f003eb2cca13 100644 --- a/llvm/test/CodeGen/SystemZ/asm-01.ll +++ b/llvm/test/CodeGen/SystemZ/asm-01.ll @@ -9,7 +9,7 @@ define void @f1(i64 %base) { ; CHECK: blah 0(%r2) ; CHECK: br %r14 %addr = inttoptr i64 %base to i64 * - call void asm "blah $0", "=*Q" (i64 *%addr) + call void asm "blah $0", "=*Q" (i64* elementtype(i64) %addr) ret void } @@ -21,7 +21,7 @@ define void @f2(i64 %base) { ; CHECK: br %r14 %add = add i64 %base, -1 %addr = inttoptr i64 %add to i64 * - call void asm "blah $0", "=*Q" (i64 *%addr) + call void asm "blah $0", "=*Q" (i64* elementtype(i64) %addr) ret void } @@ -32,7 +32,7 @@ define void @f3(i64 %base) { ; CHECK: br %r14 %add = add i64 %base, 4095 %addr = inttoptr i64 %add to i64 * - call void asm "blah $0", "=*Q" (i64 *%addr) + call void asm "blah $0", "=*Q" (i64* elementtype(i64) %addr) ret void } @@ -44,7 +44,7 @@ define void @f4(i64 %base) { ; CHECK: br %r14 %add = add i64 %base, 4096 %addr = inttoptr i64 %add to i64 * - call void asm "blah $0", "=*Q" (i64 *%addr) + call void asm "blah $0", "=*Q" (i64* elementtype(i64) %addr) ret void } @@ -56,6 +56,6 @@ define void @f5(i64 %base, i64 %index) { ; CHECK: br %r14 %add = add i64 %base, %index %addr = inttoptr i64 %add to i64 * - call void asm "blah $0", "=*Q" (i64 *%addr) + call void asm "blah $0", "=*Q" (i64* elementtype(i64) %addr) ret void } diff --git a/llvm/test/CodeGen/SystemZ/asm-02.ll b/llvm/test/CodeGen/SystemZ/asm-02.ll index c916d827cde6..dd92e6b06c28 100644 --- a/llvm/test/CodeGen/SystemZ/asm-02.ll +++ b/llvm/test/CodeGen/SystemZ/asm-02.ll @@ -9,7 +9,7 @@ define void @f1(i64 %base) { ; CHECK: blah 0(%r2) ; CHECK: br %r14 %addr = inttoptr i64 %base to i64 * - call void asm "blah $0", "=*R" (i64 *%addr) + call void asm "blah $0", "=*R" (i64* elementtype(i64) %addr) ret void } @@ -21,7 +21,7 @@ define void @f2(i64 %base) { ; CHECK: br %r14 %add = add i64 %base, -1 %addr = inttoptr i64 %add to i64 * - call void asm "blah $0", "=*R" (i64 *%addr) + call void asm "blah $0", "=*R" (i64* elementtype(i64) %addr) ret void } @@ -32,7 +32,7 @@ define void @f3(i64 %base) { ; CHECK: br %r14 %add = add i64 %base, 4095 %addr = inttoptr i64 %add to i64 * - call void asm "blah $0", "=*R" (i64 *%addr) + call void asm "blah $0", "=*R" (i64* elementtype(i64) %addr) ret void } @@ -44,7 +44,7 @@ define void @f4(i64 %base) { ; CHECK: br %r14 %add = add i64 %base, 4096 %addr = inttoptr i64 %add to i64 * - call void asm "blah $0", "=*R" (i64 *%addr) + call void asm "blah $0", "=*R" (i64* elementtype(i64) %addr) ret void } @@ -55,7 +55,7 @@ define void @f5(i64 %base, i64 %index) { ; CHECK: br %r14 %add = add i64 %base, %index %addr = inttoptr i64 %add to i64 * - call void asm "blah $0", "=*R" (i64 *%addr) + call void asm "blah $0", "=*R" (i64* elementtype(i64) %addr) ret void } @@ -67,7 +67,7 @@ define void @f6(i64 %base, i64 %index) { %add = add i64 %base, 4095 %addi = add i64 %add, %index %addr = inttoptr i64 %addi to i64 * - call void asm "blah $0", "=*R" (i64 *%addr) + call void asm "blah $0", "=*R" (i64* elementtype(i64) %addr) ret void } @@ -80,6 +80,6 @@ define void @f7(i64 %base, i64 %index) { %add = add i64 %base, 4096 %addi = add i64 %add, %index %addr = inttoptr i64 %addi to i64 * - call void asm "blah $0", "=*R" (i64 *%addr) + call void asm "blah $0", "=*R" (i64* elementtype(i64) %addr) ret void } diff --git a/llvm/test/CodeGen/SystemZ/asm-03.ll b/llvm/test/CodeGen/SystemZ/asm-03.ll index d4fd564ce193..eebab2643f28 100644 --- a/llvm/test/CodeGen/SystemZ/asm-03.ll +++ b/llvm/test/CodeGen/SystemZ/asm-03.ll @@ -10,7 +10,7 @@ define void @f1(i64 %base) { ; CHECK: br %r14 %add = add i64 %base, -524288 %addr = inttoptr i64 %add to i64 * - call void asm "blah $0", "=*S" (i64 *%addr) + call void asm "blah $0", "=*S" (i64* elementtype(i64) %addr) ret void } @@ -22,7 +22,7 @@ define void @f2(i64 %base) { ; CHECK: br %r14 %add = add i64 %base, -524289 %addr = inttoptr i64 %add to i64 * - call void asm "blah $0", "=*S" (i64 *%addr) + call void asm "blah $0", "=*S" (i64* elementtype(i64) %addr) ret void } @@ -33,7 +33,7 @@ define void @f3(i64 %base) { ; CHECK: br %r14 %add = add i64 %base, 524287 %addr = inttoptr i64 %add to i64 * - call void asm "blah $0", "=*S" (i64 *%addr) + call void asm "blah $0", "=*S" (i64* elementtype(i64) %addr) ret void } @@ -45,6 +45,6 @@ define void @f4(i64 %base) { ; CHECK: br %r14 %add = add i64 %base, 524288 %addr = inttoptr i64 %add to i64 * - call void asm "blah $0", "=*S" (i64 *%addr) + call void asm "blah $0", "=*S" (i64* elementtype(i64) %addr) ret void } diff --git a/llvm/test/CodeGen/SystemZ/asm-04.ll b/llvm/test/CodeGen/SystemZ/asm-04.ll index eb91bef83769..0322fe700060 100644 --- a/llvm/test/CodeGen/SystemZ/asm-04.ll +++ b/llvm/test/CodeGen/SystemZ/asm-04.ll @@ -10,7 +10,7 @@ define void @f1(i64 %base) { ; CHECK: br %r14 %add = add i64 %base, -524288 %addr = inttoptr i64 %add to i64 * - call void asm "blah $0", "=*T" (i64 *%addr) + call void asm "blah $0", "=*T" (i64* elementtype(i64) %addr) ret void } @@ -22,7 +22,7 @@ define void @f2(i64 %base) { ; CHECK: br %r14 %add = add i64 %base, -524289 %addr = inttoptr i64 %add to i64 * - call void asm "blah $0", "=*T" (i64 *%addr) + call void asm "blah $0", "=*T" (i64* elementtype(i64) %addr) ret void } @@ -33,7 +33,7 @@ define void @f3(i64 %base) { ; CHECK: br %r14 %add = add i64 %base, 524287 %addr = inttoptr i64 %add to i64 * - call void asm "blah $0", "=*T" (i64 *%addr) + call void asm "blah $0", "=*T" (i64* elementtype(i64) %addr) ret void } @@ -45,7 +45,7 @@ define void @f4(i64 %base) { ; CHECK: br %r14 %add = add i64 %base, 524288 %addr = inttoptr i64 %add to i64 * - call void asm "blah $0", "=*T" (i64 *%addr) + call void asm "blah $0", "=*T" (i64* elementtype(i64) %addr) ret void } @@ -56,7 +56,7 @@ define void @f5(i64 %base, i64 %index) { ; CHECK: br %r14 %add = add i64 %base, %index %addr = inttoptr i64 %add to i64 * - call void asm "blah $0", "=*T" (i64 *%addr) + call void asm "blah $0", "=*T" (i64* elementtype(i64) %addr) ret void } @@ -68,6 +68,6 @@ define void @f6(i64 %base, i64 %index) { %add = add i64 %base, 524287 %addi = add i64 %add, %index %addr = inttoptr i64 %addi to i64 * - call void asm "blah $0", "=*T" (i64 *%addr) + call void asm "blah $0", "=*T" (i64* elementtype(i64) %addr) ret void } diff --git a/llvm/test/CodeGen/SystemZ/asm-05.ll b/llvm/test/CodeGen/SystemZ/asm-05.ll index 832ae2fba420..6b8556832cfe 100644 --- a/llvm/test/CodeGen/SystemZ/asm-05.ll +++ b/llvm/test/CodeGen/SystemZ/asm-05.ll @@ -8,7 +8,7 @@ define void @f1(i64 %base) { ; CHECK: blah 0(%r2) ; CHECK: br %r14 %addr = inttoptr i64 %base to i64 * - call void asm "blah $0", "=*m" (i64 *%addr) + call void asm "blah $0", "=*m" (i64* elementtype(i64) %addr) ret void } @@ -17,6 +17,6 @@ define void @f2(i64 %base) { ; CHECK: blah 0(%r2) ; CHECK: br %r14 %addr = inttoptr i64 %base to i64 * - call void asm "blah $0", "=*o" (i64 *%addr) + call void asm "blah $0", "=*o" (i64* elementtype(i64) %addr) ret void } diff --git a/llvm/test/CodeGen/SystemZ/frame-25.ll b/llvm/test/CodeGen/SystemZ/frame-25.ll index 64c175bd4eca..f6b54c0b2507 100644 --- a/llvm/test/CodeGen/SystemZ/frame-25.ll +++ b/llvm/test/CodeGen/SystemZ/frame-25.ll @@ -17,7 +17,7 @@ define void @fun0() #0 { entry: %b = alloca [16 x i8], align 1 %0 = getelementptr inbounds [16 x i8], [16 x i8]* %b, i64 0, i64 0 - call void asm "stcke $0", "=*Q"([16 x i8]* nonnull %b) #2 + call void asm "stcke $0", "=*Q"([16 x i8]* elementtype([16 x i8]) nonnull %b) #2 ret void } diff --git a/llvm/test/CodeGen/WebAssembly/inline-asm.ll b/llvm/test/CodeGen/WebAssembly/inline-asm.ll index 7a219febb59d..038a03a71ddc 100644 --- a/llvm/test/CodeGen/WebAssembly/inline-asm.ll +++ b/llvm/test/CodeGen/WebAssembly/inline-asm.ll @@ -77,7 +77,7 @@ entry: ; CHECK-NEXT: local.get $push[[S1:[0-9]+]]=, 1{{$}} ; CHECK-NEXT: i32.store16 0($pop[[S0]]), $pop[[S1]]{{$}} define void @X_i16(i16 * %t) { - call void asm sideeffect "foo $0", "=*X,~{dirflag},~{fpsr},~{flags},~{memory}"(i16* %t) + call void asm sideeffect "foo $0", "=*X,~{dirflag},~{fpsr},~{flags},~{memory}"(i16* elementtype(i16) %t) ret void } @@ -87,7 +87,7 @@ define void @X_i16(i16 * %t) { ; CHECK-NEXT: local.get $push[[S1:[0-9]+]]=, 1{{$}} ; CHECK-NEXT: i32.store 0($pop[[S0]]), $pop[[S1]]{{$}} define void @X_ptr(i16 ** %t) { - call void asm sideeffect "foo $0", "=*X,~{dirflag},~{fpsr},~{flags},~{memory}"(i16** %t) + call void asm sideeffect "foo $0", "=*X,~{dirflag},~{fpsr},~{flags},~{memory}"(i16** elementtype(i16*) %t) ret void } diff --git a/llvm/test/CodeGen/X86/2006-07-20-InlineAsm.ll b/llvm/test/CodeGen/X86/2006-07-20-InlineAsm.ll index 944fae68abc4..727407897fed 100644 --- a/llvm/test/CodeGen/X86/2006-07-20-InlineAsm.ll +++ b/llvm/test/CodeGen/X86/2006-07-20-InlineAsm.ll @@ -7,7 +7,7 @@ define i32 @foo(i32 %X) { entry: %X_addr = alloca i32 ; [#uses=3] store i32 %X, i32* %X_addr - call void asm sideeffect "xchg{l} {$0,$1|$1,$0}", "=*m,=*r,m,1,~{dirflag},~{fpsr},~{flags}"( i32* @G, i32* %X_addr, i32* @G, i32 %X ) + call void asm sideeffect "xchg{l} {$0,$1|$1,$0}", "=*m,=*r,m,1,~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) @G, i32* elementtype(i32) %X_addr, i32* @G, i32 %X ) %tmp1 = load i32, i32* %X_addr ; [#uses=1] ret i32 %tmp1 } @@ -16,7 +16,7 @@ define i32 @foo2(i32 %X) { entry: %X_addr = alloca i32 ; [#uses=3] store i32 %X, i32* %X_addr - call void asm sideeffect "xchg{l} {$0,$1|$1,$0}", "=*m,=*r,1,~{dirflag},~{fpsr},~{flags}"( i32* @G, i32* %X_addr, i32 %X ) + call void asm sideeffect "xchg{l} {$0,$1|$1,$0}", "=*m,=*r,1,~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) @G, i32* elementtype(i32) %X_addr, i32 %X ) %tmp1 = load i32, i32* %X_addr ; [#uses=1] ret i32 %tmp1 } diff --git a/llvm/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll b/llvm/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll index 60bffdd908c2..b08b0e573afc 100644 --- a/llvm/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll +++ b/llvm/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll @@ -23,7 +23,7 @@ define i1 @_ZNK12QImageWriter8canWriteEv() { %tmp62 = load %struct.QImageWriterPrivate*, %struct.QImageWriterPrivate** null ; <%struct.QImageWriterPrivate*> [#uses=1] %tmp = getelementptr %struct.QImageWriterPrivate, %struct.QImageWriterPrivate* %tmp62, i32 0, i32 9 ; <%struct.QString*> [#uses=1] %tmp75 = call %struct.QString* @_ZN7QStringaSERKS_( %struct.QString* %tmp, %struct.QString* null ) ; <%struct.QString*> [#uses=0] - call void asm sideeffect "lock\0Adecl $0\0Asetne 1", "=*m"( i32* null ) + call void asm sideeffect "lock\0Adecl $0\0Asetne 1", "=*m"( i32* elementtype( i32) null ) ret i1 false } diff --git a/llvm/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll b/llvm/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll index 176b566fe0ae..0b55af9c5ed9 100644 --- a/llvm/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll +++ b/llvm/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll @@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu" define fastcc i32 @bc_divide(%struct.bc_struct* %n1, %struct.bc_struct* %n2, %struct.bc_struct** %quot, i32 %scale) nounwind { entry: - %tmp7.i46 = tail call i64 asm sideeffect ".byte 0x0f,0x31", "={dx},=*{ax},~{dirflag},~{fpsr},~{flags}"( i64* getelementptr (%struct.CycleCount, %struct.CycleCount* @_programStartTime, i32 0, i32 1) ) ; [#uses=0] + %tmp7.i46 = tail call i64 asm sideeffect ".byte 0x0f,0x31", "={dx},=*{ax},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) getelementptr (%struct.CycleCount, %struct.CycleCount* @_programStartTime, i32 0, i32 1) ) ; [#uses=0] %tmp221 = sdiv i32 10, 0 ; [#uses=1] tail call fastcc void @_one_mult( i8* null, i32 0, i32 %tmp221, i8* null ) ret i32 0 diff --git a/llvm/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll b/llvm/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll index d02346d103c1..21872289dfad 100644 --- a/llvm/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll +++ b/llvm/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll @@ -5,7 +5,7 @@ target triple = "x86_64-unknown-linux-gnu" define i32 @kernel_init(i8* %unused) { entry: - call void asm sideeffect "foo ${0:q}", "=*imr"( i64* null ) + call void asm sideeffect "foo ${0:q}", "=*imr"( i64* elementtype( i64) null ) ret i32 0 } diff --git a/llvm/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll b/llvm/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll index 27ec8260d06b..a81e2701e2e9 100644 --- a/llvm/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll +++ b/llvm/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll @@ -4,6 +4,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-unknown-linux-gnu" define void @yield() { - %tmp9 = call i64 asm sideeffect "xchgb ${0:b},$1", "=q,*m,0,~{dirflag},~{fpsr},~{flags},~{memory}"( i64* null, i64 0 ) ; + %tmp9 = call i64 asm sideeffect "xchgb ${0:b},$1", "=q,*m,0,~{dirflag},~{fpsr},~{flags},~{memory}"( i64* elementtype( i64) null, i64 0 ) ; ret void } diff --git a/llvm/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll b/llvm/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll index ec3bce9c666a..8331c34bc32a 100644 --- a/llvm/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll +++ b/llvm/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll @@ -10,7 +10,6 @@ entry: %tmp12 = trunc i64 %tmp1 to i32 ; [#uses=2] store i32 %tmp12, i32* %lines, align 4 %tmp6 = call i64* asm sideeffect "foo", -"=r,=*r,=*r,r,0,1,2,~{dirflag},~{fpsr},~{flags},~{memory}"( i64** %p2_addr, -i32* %lines, i64 256, i64* %p1, i64* %p2, i32 %tmp12 ) ; [#uses=0] +"=r,=*r,=*r,r,0,1,2,~{dirflag},~{fpsr},~{flags},~{memory}"(i64** elementtype(i64*) %p2_addr, i32* elementtype(i32) %lines, i64 256, i64* %p1, i64* %p2, i32 %tmp12 ) ; [#uses=0] ret void } diff --git a/llvm/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll b/llvm/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll index c4572d3f64b4..bac724d9edea 100644 --- a/llvm/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll +++ b/llvm/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll @@ -90,7 +90,7 @@ entry: %tmp32 = load i8*, i8** %src_addr, align 4 ; [#uses=1] %tmp33 = getelementptr i8, i8* %tmp32, i32 %tmp31 ; [#uses=1] %tmp3334 = bitcast i8* %tmp33 to i32* ; [#uses=1] - call void asm sideeffect "movd $4, %mm0 \0A\09movd $5, %mm1 \0A\09movd $6, %mm2 \0A\09movd $7, %mm3 \0A\09punpcklbw %mm1, %mm0 \0A\09punpcklbw %mm3, %mm2 \0A\09movq %mm0, %mm1 \0A\09punpcklwd %mm2, %mm0 \0A\09punpckhwd %mm2, %mm1 \0A\09movd %mm0, $0 \0A\09punpckhdq %mm0, %mm0 \0A\09movd %mm0, $1 \0A\09movd %mm1, $2 \0A\09punpckhdq %mm1, %mm1 \0A\09movd %mm1, $3 \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* %tmp12, i32* %tmp56, i32* %tmp1011, i32* %tmp1617, i32* %tmp1920, i32* %tmp2324, i32* %tmp2829, i32* %tmp3334 ) nounwind + call void asm sideeffect "movd $4, %mm0 \0A\09movd $5, %mm1 \0A\09movd $6, %mm2 \0A\09movd $7, %mm3 \0A\09punpcklbw %mm1, %mm0 \0A\09punpcklbw %mm3, %mm2 \0A\09movq %mm0, %mm1 \0A\09punpcklwd %mm2, %mm0 \0A\09punpckhwd %mm2, %mm1 \0A\09movd %mm0, $0 \0A\09punpckhdq %mm0, %mm0 \0A\09movd %mm0, $1 \0A\09movd %mm1, $2 \0A\09punpckhdq %mm1, %mm1 \0A\09movd %mm1, $3 \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* elementtype( i32) %tmp12, i32* elementtype(i32) %tmp56, i32* elementtype(i32) %tmp1011, i32* elementtype(i32) %tmp1617, i32* elementtype(i32) %tmp1920, i32* elementtype(i32) %tmp2324, i32* elementtype(i32) %tmp2829, i32* elementtype(i32) %tmp3334 ) nounwind br label %return return: ; preds = %entry diff --git a/llvm/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll b/llvm/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll index 1251e3fda8c7..822b79d97efe 100644 --- a/llvm/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll +++ b/llvm/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll @@ -25,7 +25,7 @@ bb32: ; preds = %entry %pix_addr.0327.sum337 = add i32 %pix_addr.0327.rec, %tmp154.sum ; [#uses=1] %tmp191 = getelementptr i8, i8* %pix, i32 %pix_addr.0327.sum337 ; [#uses=1] %tmp191192 = bitcast i8* %tmp191 to i32* ; [#uses=1] - call void asm sideeffect "movd $4, %mm0 \0A\09movd $5, %mm1 \0A\09movd $6, %mm2 \0A\09movd $7, %mm3 \0A\09punpcklbw %mm1, %mm0 \0A\09punpcklbw %mm3, %mm2 \0A\09movq %mm0, %mm1 \0A\09punpcklwd %mm2, %mm0 \0A\09punpckhwd %mm2, %mm1 \0A\09movd %mm0, $0 \0A\09punpckhdq %mm0, %mm0 \0A\09movd %mm0, $1 \0A\09movd %mm1, $2 \0A\09punpckhdq %mm1, %mm1 \0A\09movd %mm1, $3 \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* null, i32* %tmp164, i32* %tmp169, i32* %tmp174, i32* %tmp177178, i32* %tmp181182, i32* %tmp186187, i32* %tmp191192 ) nounwind + call void asm sideeffect "movd $4, %mm0 \0A\09movd $5, %mm1 \0A\09movd $6, %mm2 \0A\09movd $7, %mm3 \0A\09punpcklbw %mm1, %mm0 \0A\09punpcklbw %mm3, %mm2 \0A\09movq %mm0, %mm1 \0A\09punpcklwd %mm2, %mm0 \0A\09punpckhwd %mm2, %mm1 \0A\09movd %mm0, $0 \0A\09punpckhdq %mm0, %mm0 \0A\09movd %mm0, $1 \0A\09movd %mm1, $2 \0A\09punpckhdq %mm1, %mm1 \0A\09movd %mm1, $3 \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* elementtype( i32) null, i32* elementtype(i32) %tmp164, i32* elementtype(i32) %tmp169, i32* elementtype(i32) %tmp174, i32* elementtype(i32) %tmp177178, i32* elementtype(i32) %tmp181182, i32* elementtype(i32) %tmp186187, i32* elementtype(i32) %tmp191192 ) nounwind unreachable bb292: ; preds = %entry diff --git a/llvm/test/CodeGen/X86/2008-09-17-inline-asm-1.ll b/llvm/test/CodeGen/X86/2008-09-17-inline-asm-1.ll index 1ba17254c3c3..e77e8456746b 100644 --- a/llvm/test/CodeGen/X86/2008-09-17-inline-asm-1.ll +++ b/llvm/test/CodeGen/X86/2008-09-17-inline-asm-1.ll @@ -20,8 +20,8 @@ target triple = "i386-apple-darwin8" define i32 @aci(i32* %pw) nounwind { entry: %0 = load i32, i32* @x, align 4 - %asmtmp = tail call { i32, i32 } asm "movl $0, %eax\0A\090:\0A\09test %eax, %eax\0A\09je 1f\0A\09movl %eax, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{ax},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind - %asmtmp2 = tail call { i32, i32 } asm "movl $0, %edx\0A\090:\0A\09test %edx, %edx\0A\09je 1f\0A\09movl %edx, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{dx},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind + %asmtmp = tail call { i32, i32 } asm "movl $0, %eax\0A\090:\0A\09test %eax, %eax\0A\09je 1f\0A\09movl %eax, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{ax},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* elementtype(i32) %pw, i32* elementtype(i32) %pw) nounwind + %asmtmp2 = tail call { i32, i32 } asm "movl $0, %edx\0A\090:\0A\09test %edx, %edx\0A\09je 1f\0A\09movl %edx, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{dx},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* elementtype(i32) %pw, i32* elementtype(i32) %pw) nounwind %asmresult2 = extractvalue { i32, i32 } %asmtmp, 0 %asmresult3 = extractvalue { i32, i32 } %asmtmp2, 0 %1 = add i32 %asmresult2, %asmresult3 diff --git a/llvm/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/llvm/test/CodeGen/X86/2008-09-18-inline-asm-2.ll index 51f2dfbfdb58..552adfda42d7 100644 --- a/llvm/test/CodeGen/X86/2008-09-18-inline-asm-2.ll +++ b/llvm/test/CodeGen/X86/2008-09-18-inline-asm-2.ll @@ -38,7 +38,7 @@ entry: %3 = load i32, i32* %0, align 4 ; [#uses=1] %4 = load i32, i32* %1, align 4 ; [#uses=1] %5 = load i8, i8* %state, align 1 ; [#uses=1] - %asmtmp = tail call { i32, i32, i32, i32 } asm sideeffect "#1st=$0 $1 2nd=$1 $2 3rd=$2 $4 5th=$4 $3=4th 1$0 1%eXx 5$4 5%eXx 6th=$5", "=&r,=r,=r,=*m,=&q,=*imr,1,2,*m,5,~{dirflag},~{fpsr},~{flags},~{cx}"(i8** %2, i8* %state, i32 %3, i32 %4, i8** %2, i8 %5) nounwind ; <{ i32, i32, i32, i32 }> [#uses=3] + %asmtmp = tail call { i32, i32, i32, i32 } asm sideeffect "#1st=$0 $1 2nd=$1 $2 3rd=$2 $4 5th=$4 $3=4th 1$0 1%eXx 5$4 5%eXx 6th=$5", "=&r,=r,=r,=*m,=&q,=*imr,1,2,*m,5,~{dirflag},~{fpsr},~{flags},~{cx}"(i8** elementtype(i8*) %2, i8* elementtype(i8) %state, i32 %3, i32 %4, i8** elementtype(i8*) %2, i8 %5) nounwind ; <{ i32, i32, i32, i32 }> [#uses=3] %asmresult = extractvalue { i32, i32, i32, i32 } %asmtmp, 0 ; [#uses=1] %asmresult1 = extractvalue { i32, i32, i32, i32 } %asmtmp, 1 ; [#uses=1] store i32 %asmresult1, i32* %0 diff --git a/llvm/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll b/llvm/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll index bd1b47a588ef..f0e922d87c4b 100644 --- a/llvm/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll +++ b/llvm/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll @@ -6,7 +6,7 @@ define void @bn_sqr_comba8(i32* nocapture %r, i32* %a) nounwind { entry: - %asmtmp23 = tail call %0 asm "mulq $3", "={ax},={dx},{ax},*m,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 0, i32* %a) nounwind ; <%0> [#uses=1] + %asmtmp23 = tail call %0 asm "mulq $3", "={ax},={dx},{ax},*m,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 0, i32* elementtype(i32) %a) nounwind ; <%0> [#uses=1] %asmresult25 = extractvalue %0 %asmtmp23, 1 ; [#uses=1] %asmtmp26 = tail call %0 asm "addq $0,$0; adcq $2,$1", "={dx},=r,imr,0,1,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 0, i32 %asmresult25, i32 0) nounwind ; <%0> [#uses=1] %asmresult27 = extractvalue %0 %asmtmp26, 0 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll b/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll index 2615164c6d4b..b4edc115ec01 100644 --- a/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll +++ b/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll @@ -36,7 +36,7 @@ entry: %tmp15 = load i32, i32* %arrayidx14 ; [#uses=1] %arrayidx17 = getelementptr i32, i32* %data, i32 8 ; [#uses=1] %tmp18 = load i32, i32* %arrayidx17 ; [#uses=1] - %0 = call i32 asm "cpuid", "={ax},=*{bx},=*{cx},=*{dx},{ax},{bx},{cx},{dx},~{dirflag},~{fpsr},~{flags}"(i32* %arrayidx2, i32* %arrayidx4, i32* %arrayidx6, i32 %tmp9, i32 %tmp12, i32 %tmp15, i32 %tmp18) nounwind ; [#uses=1] + %0 = call i32 asm "cpuid", "={ax},=*{bx},=*{cx},=*{dx},{ax},{bx},{cx},{dx},~{dirflag},~{fpsr},~{flags}"(i32* elementtype(i32) %arrayidx2, i32* elementtype(i32) %arrayidx4, i32* elementtype(i32) %arrayidx6, i32 %tmp9, i32 %tmp12, i32 %tmp15, i32 %tmp18) nounwind ; [#uses=1] store i32 %0, i32* %arrayidx ret void } diff --git a/llvm/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll b/llvm/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll index 2f5c898ce221..3faf9b2afabd 100644 --- a/llvm/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll +++ b/llvm/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll @@ -3,7 +3,7 @@ define i32 @atomic_cmpset_long(i64* %dst, i64 %exp, i64 %src) nounwind ssp noredzone noimplicitfloat { entry: - %0 = call i8 asm sideeffect "\09lock ; \09\09\09cmpxchgq $2,$1 ;\09 sete\09$0 ;\09\091:\09\09\09\09# atomic_cmpset_long", "={ax},=*m,r,{ax},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* undef, i64 undef, i64 undef, i64* undef) nounwind ; [#uses=0] + %0 = call i8 asm sideeffect "\09lock ; \09\09\09cmpxchgq $2,$1 ;\09 sete\09$0 ;\09\091:\09\09\09\09# atomic_cmpset_long", "={ax},=*m,r,{ax},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* elementtype(i64) undef, i64 undef, i64 undef, i64* elementtype(i64) undef) nounwind ; [#uses=0] br label %1 ;